From 6d2010ae8f7a6078e10b361c6962983bab233e0f Mon Sep 17 00:00:00 2001
From: Apple <opensource@apple.com>
Date: Wed, 13 Jul 2011 23:06:20 +0000
Subject: [PATCH] xnu-1699.22.73.tar.gz

---
 EXTERNAL_HEADERS/Availability.h               |  156 +
 EXTERNAL_HEADERS/AvailabilityInternal.h       |  393 +
 EXTERNAL_HEADERS/AvailabilityMacros.h         |  820 ++
 EXTERNAL_HEADERS/Makefile                     |    7 +-
 EXTERNAL_HEADERS/architecture/Makefile        |    4 +-
 EXTERNAL_HEADERS/architecture/ppc/Makefile    |   33 -
 EXTERNAL_HEADERS/architecture/ppc/asm_help.h  |  456 -
 .../architecture/ppc/basic_regs.h             |  306 -
 EXTERNAL_HEADERS/architecture/ppc/fp_regs.h   |  153 -
 .../architecture/ppc/macro_help.h             |   64 -
 .../architecture/ppc/pseudo_inst.h            |  420 -
 EXTERNAL_HEADERS/architecture/ppc/reg_help.h  |  230 -
 EXTERNAL_HEADERS/mach-o/arm/reloc.h           |   42 -
 EXTERNAL_HEADERS/mach-o/loader.h              |    6 +
 EXTERNAL_HEADERS/mach-o/ppc/reloc.h           |   65 -
 EXTERNAL_HEADERS/stdarg.h                     |  164 +-
 Makefile                                      |   36 +-
 README                                        |   71 +-
 {osfmk/profiling/ppc => SETUP}/Makefile       |   17 +-
 SETUP/config/Makefile                         |   42 +
 SETUP/config/config.h                         |  293 +
 .../doconf/doconf.csh => SETUP/config/doconf  |   19 +-
 SETUP/config/externs.c                        |   82 +
 SETUP/config/lexer.l                          |  214 +
 SETUP/config/main.c                           |  296 +
 SETUP/config/mkglue.c                         |  331 +
 SETUP/config/mkheaders.c                      |  276 +
 SETUP/config/mkioconf.c                       | 2086 ++++
 SETUP/config/mkmakefile.c                     | 1182 +++
 SETUP/config/mkswapconf.c                     |  247 +
 SETUP/config/openp.c                          |   93 +
 SETUP/config/parser.y                         | 1278 +++
 SETUP/config/searchp.c                        |   90 +
 SETUP/kextsymboltool/Makefile                 |   31 +
 SETUP/kextsymboltool/kextsymboltool.c         |  912 ++
 .../newvers/newvers.csh => SETUP/newvers      |    0
 SETUP/seed_objroot                            |  133 -
 SETUP/setsegname/Makefile                     |   31 +
 SETUP/setsegname/setsegname.c                 |  237 +
 bsd/Makefile                                  |   17 +-
 bsd/bsm/Makefile                              |    4 -
 bsd/bsm/audit.h                               |    3 +
 bsd/bsm/audit_kevents.h                       |   12 +-
 bsd/conf/MASTER                               |   32 +-
 bsd/conf/MASTER.i386                          |   13 +-
 bsd/conf/MASTER.ppc                           |   99 -
 bsd/conf/MASTER.x86_64                        |   13 +-
 bsd/conf/Makefile                             |  100 +-
 bsd/conf/Makefile.i386                        |   40 +-
 bsd/conf/Makefile.ppc                         |   53 -
 bsd/conf/Makefile.template                    |   79 +-
 bsd/conf/Makefile.x86_64                      |   40 +-
 bsd/conf/files                                |   36 +-
 bsd/conf/files.i386                           |   18 +-
 bsd/conf/files.ppc                            |   34 -
 bsd/conf/files.x86_64                         |   16 +-
 bsd/conf/param.c                              |    3 +-
 bsd/conf/tools/Makefile                       |   32 -
 bsd/conf/tools/doconf/Makefile                |   49 -
 bsd/crypto/Makefile                           |    6 +-
 bsd/crypto/aes/Assert.c                       |   34 +
 bsd/crypto/aes/Makefile                       |    6 +-
 bsd/crypto/aes/aes.h                          |   16 +-
 bsd/crypto/aes/gen/Makefile                   |    4 -
 bsd/crypto/aes/gen/aesopt.h                   |    4 +-
 bsd/crypto/aes/i386/AES.s                     |  143 +
 bsd/crypto/aes/i386/Context.h                 |    9 +
 bsd/crypto/aes/i386/Data.mk                   |   30 +
 bsd/crypto/aes/i386/Data.s                    | 5196 ++++++++++
 bsd/crypto/aes/i386/EncryptDecrypt.s          |  607 ++
 bsd/crypto/aes/i386/ExpandKeyForDecryption.s  | 1214 +++
 bsd/crypto/aes/i386/ExpandKeyForEncryption.s  |  801 ++
 bsd/crypto/aes/i386/MakeData.c                |  516 +
 bsd/crypto/aes/i386/Makefile                  |   16 +-
 bsd/crypto/aes/i386/ReadMe.txt                |   22 +
 bsd/crypto/aes/i386/aes_crypt_hw.s            |  472 +
 bsd/crypto/aes/i386/aes_key_hw.s              |  405 +
 bsd/crypto/aes/i386/aes_modes.c               |  471 -
 bsd/crypto/aes/i386/aes_modes_asm.s           |  420 +
 bsd/crypto/aes/i386/aes_modes_hw.s            | 1669 ++++
 bsd/crypto/aes/i386/aes_x86_v2.s              | 1298 ---
 bsd/crypto/aes/i386/aesopt.h                  |  719 --
 bsd/crypto/aes/i386/aesxts.c                  |  392 +
 bsd/crypto/aes/i386/aesxts.h                  |  103 +
 bsd/crypto/aes/i386/aesxts_asm.s              | 1305 +++
 bsd/crypto/aes/i386/edefs.h                   |  130 -
 bsd/crypto/aes/ppc/Makefile                   |   36 -
 bsd/crypto/aes/ppc/aescrypt.c                 |  411 -
 bsd/crypto/aes/ppc/aeskey.c                   |  455 -
 bsd/crypto/aes/ppc/aesopt.h                   |  753 --
 bsd/crypto/aes/ppc/aestab.c                   |  384 -
 bsd/crypto/aes/ppc/aestab.h                   |  175 -
 bsd/crypto/aes/test/ReadMe.txt                |   97 +
 bsd/crypto/aes/test/makegenx86.sh             |    8 +
 bsd/crypto/aes/test/makeoptx86.sh             |   10 +
 bsd/crypto/aes/test/tstaes.c                  |  131 +
 bsd/crypto/blowfish/Makefile                  |    4 -
 bsd/crypto/cast128/Makefile                   |    4 -
 bsd/crypto/des/Makefile                       |    4 -
 bsd/crypto/doc/KernelCrypto.plist             |   76 +
 bsd/crypto/doc/KernelCrypto.txt               |  149 +
 bsd/crypto/rc4/Makefile                       |    4 -
 bsd/crypto/sha2/Makefile                      |    4 -
 bsd/crypto/sha2/intel/sha256.s                |  617 ++
 bsd/crypto/sha2/intel/sha256nossse3.s         |  649 ++
 bsd/crypto/sha2/sha2.c                        |   38 +-
 bsd/dev/Makefile                              |    4 -
 bsd/dev/chud/chud_bsd_callback.c              |   48 +-
 bsd/dev/dtrace/dtrace.c                       | 1098 +-
 bsd/dev/dtrace/dtrace_glue.c                  |   55 +-
 bsd/dev/dtrace/dtrace_subr.c                  |    7 +-
 bsd/dev/dtrace/fasttrap.c                     |   24 +-
 bsd/dev/dtrace/fbt.c                          |  186 +-
 bsd/dev/dtrace/lockstat.c                     |   14 +-
 bsd/dev/dtrace/profile_prvd.c                 |   55 +-
 bsd/dev/dtrace/sdt.c                          |  240 +-
 bsd/dev/dtrace/sdt_subr.c                     |   72 +-
 bsd/dev/dtrace/systrace.c                     |   62 +-
 bsd/dev/i386/conf.c                           |    1 +
 bsd/dev/i386/dtrace_isa.c                     |   37 +-
 bsd/dev/i386/fasttrap_isa.c                   |    5 +-
 bsd/dev/i386/fbt_x86.c                        | 1729 ++--
 bsd/dev/i386/mem.c                            |   17 +-
 bsd/dev/i386/munge.s                          |  139 +-
 bsd/dev/i386/sdt_x86.c                        |  112 +
 bsd/dev/i386/sysctl.c                         |  167 +-
 bsd/dev/i386/systemcalls.c                    |   98 +-
 bsd/dev/i386/unix_signal.c                    |    6 +-
 bsd/dev/memdev.c                              |   12 +-
 bsd/dev/ppc/conf.c                            |  354 -
 bsd/dev/ppc/cons.c                            |  139 -
 bsd/dev/ppc/dtrace_isa.c                      |  589 --
 bsd/dev/ppc/dtrace_subr_ppc.c                 |  193 -
 bsd/dev/ppc/fasttrap_isa.c                    |  734 --
 bsd/dev/ppc/fbt_ppc.c                         |  694 --
 bsd/dev/ppc/ffs.c                             |   59 -
 bsd/dev/ppc/ffs.s                             |   70 -
 bsd/dev/ppc/kern_machdep.c                    |  263 -
 bsd/dev/ppc/km.c                              |  392 -
 bsd/dev/ppc/mem.c                             |  241 -
 bsd/dev/ppc/munge.s                           |  477 -
 bsd/dev/ppc/ppc_init.c                        |  276 -
 bsd/dev/ppc/sdt_ppc.c                         |   71 -
 bsd/dev/ppc/stubs.c                           |  102 -
 bsd/dev/ppc/systemcalls.c                     |  435 -
 bsd/dev/ppc/unix_signal.c                     |  953 --
 bsd/dev/ppc/xsumas.s                          |  401 -
 bsd/dev/random/Makefile                       |    4 -
 bsd/dev/unix_startup.c                        |   72 +-
 bsd/dev/vn/Makefile                           |    5 -
 bsd/dev/vn/vn.c                               |   14 +-
 bsd/dev/x86_64/munge.s                        |  130 +
 bsd/hfs/Makefile                              |    4 -
 bsd/hfs/hfs.h                                 |  131 +-
 bsd/hfs/hfs_attrlist.c                        |   46 +-
 bsd/hfs/hfs_btreeio.c                         |   17 +-
 bsd/hfs/hfs_catalog.c                         |  338 +-
 bsd/hfs/hfs_catalog.h                         |   23 +-
 bsd/hfs/hfs_chash.c                           |   57 +-
 bsd/hfs/hfs_cnode.c                           | 1154 ++-
 bsd/hfs/hfs_cnode.h                           |   59 +-
 bsd/hfs/hfs_cprotect.c                        |  908 ++
 bsd/hfs/hfs_dbg.h                             |    2 +-
 bsd/hfs/hfs_encodings.c                       |    6 +-
 bsd/hfs/hfs_endian.c                          |   15 +-
 bsd/hfs/hfs_format.h                          |   25 +-
 bsd/hfs/hfs_fsctl.h                           |    9 +-
 bsd/hfs/hfs_hotfiles.c                        |   31 +-
 bsd/hfs/hfs_kdebug.h                          |   54 +
 bsd/hfs/hfs_link.c                            |   88 +-
 bsd/hfs/hfs_lookup.c                          |  155 +-
 bsd/hfs/hfs_mount.h                           |    1 +
 bsd/hfs/hfs_notification.c                    |   16 +
 bsd/hfs/hfs_readwrite.c                       |  811 +-
 bsd/hfs/hfs_search.c                          |   48 +-
 bsd/hfs/hfs_vfsops.c                          | 3015 ++++--
 bsd/hfs/hfs_vfsutils.c                        |  583 +-
 bsd/hfs/hfs_vnops.c                           | 1418 ++-
 bsd/hfs/hfs_xattr.c                           | 1038 +-
 bsd/hfs/hfscommon/BTree/BTree.c               |    8 +-
 bsd/hfs/hfscommon/BTree/BTreeAllocate.c       |   29 +-
 bsd/hfs/hfscommon/BTree/BTreeScanner.c        |    2 +-
 bsd/hfs/hfscommon/Catalog/FileIDsServices.c   |  286 +-
 bsd/hfs/hfscommon/Misc/FileExtentMapping.c    |  118 +-
 bsd/hfs/hfscommon/Misc/HybridAllocator.c      |  533 +
 bsd/hfs/hfscommon/Misc/VolumeAllocation.c     | 3141 +++++-
 bsd/hfs/hfscommon/headers/FileMgrInternal.h   |   36 +-
 bsd/hfs/hfscommon/headers/HybridAllocator.h   |  101 +
 bsd/hfs/hfscommon/headers/RedBlackTree.h      |  969 ++
 bsd/i386/param.h                              |   14 +-
 bsd/kern/Makefile                             |   26 +
 bsd/kern/bsd_init.c                           |  246 +-
 bsd/kern/bsd_stubs.c                          |   96 +
 bsd/kern/decmpfs.c                            |   16 +-
 bsd/kern/imageboot.c                          |  268 +-
 bsd/kern/kdebug.c                             | 1234 ++-
 bsd/kern/kern_acct.c                          |    8 +-
 bsd/kern/kern_aio.c                           |    2 +-
 bsd/kern/kern_authorization.c                 |   63 +-
 bsd/kern/kern_clock.c                         |    2 +-
 bsd/kern/kern_control.c                       |  157 +-
 bsd/kern/kern_core.c                          |   42 +-
 bsd/kern/kern_credential.c                    | 1171 ++-
 bsd/kern/kern_descrip.c                       |  572 +-
 bsd/kern/kern_event.c                         |  237 +-
 bsd/kern/kern_exec.c                          | 1183 ++-
 bsd/kern/kern_exit.c                          |  108 +-
 bsd/kern/kern_fork.c                          |   45 +-
 bsd/kern/kern_lockf.c                         |  153 +-
 bsd/kern/kern_malloc.c                        |  226 +-
 bsd/kern/kern_memorystatus.c                  |  839 +-
 bsd/kern/kern_mib.c                           |  273 +-
 bsd/kern/kern_mman.c                          |  108 +-
 bsd/kern/kern_newsysctl.c                     |  580 +-
 bsd/kern/kern_panicinfo.c                     |   49 +-
 bsd/kern/kern_priv.c                          |  119 +
 bsd/kern/kern_proc.c                          |  137 +-
 bsd/kern/kern_prot.c                          |  153 +-
 bsd/kern/kern_resource.c                      |  281 +-
 bsd/kern/kern_shutdown.c                      |   49 +-
 bsd/kern/kern_sig.c                           |  156 +-
 bsd/kern/kern_symfile.c                       |  227 +-
 bsd/kern/kern_synch.c                         |   35 +-
 bsd/kern/kern_sysctl.c                        | 1457 ++-
 bsd/kern/kern_time.c                          |    3 +-
 bsd/kern/kern_xxx.c                           |    2 +-
 bsd/kern/kpi_mbuf.c                           |  100 +-
 bsd/kern/kpi_socket.c                         |  250 +-
 bsd/kern/kpi_socketfilter.c                   | 1210 ++-
 bsd/kern/mach_loader.c                        |  552 +-
 bsd/kern/mach_loader.h                        |    3 +
 bsd/kern/mach_process.c                       |   14 +-
 bsd/kern/makesyscalls.sh                      |   42 +-
 bsd/kern/mcache.c                             |   48 +-
 bsd/kern/netboot.c                            |  171 +-
 bsd/kern/policy_check.c                       |  511 +
 bsd/kern/posix_sem.c                          |   44 +-
 bsd/kern/posix_shm.c                          |  194 +-
 bsd/kern/proc_info.c                          |  481 +-
 bsd/kern/process_policy.c                     |  460 +
 bsd/kern/pthread_support.c                    | 3311 ++++---
 bsd/kern/pthread_synch.c                      |  300 +-
 bsd/kern/subr_log.c                           |  234 +-
 bsd/kern/subr_prof.c                          |   58 +-
 bsd/kern/sys_generic.c                        |  349 +-
 bsd/kern/sys_pipe.c                           |   53 +-
 bsd/kern/sys_socket.c                         |   28 +-
 bsd/kern/syscalls.master                      |  128 +-
 bsd/kern/sysv_ipc.c                           |   91 +-
 bsd/kern/sysv_msg.c                           |    6 +-
 bsd/kern/sysv_sem.c                           |   34 +-
 bsd/kern/sysv_shm.c                           |   18 +-
 bsd/kern/trace.codes                          | 2149 ++++
 bsd/kern/tty.c                                |   32 +-
 bsd/kern/tty_ptmx.c                           |  340 +-
 bsd/kern/tty_subr.c                           |    4 +-
 bsd/kern/tty_tty.c                            |   14 +-
 bsd/kern/ubc_subr.c                           |  276 +-
 bsd/kern/uipc_domain.c                        |   99 +-
 bsd/kern/uipc_mbuf.c                          | 1958 ++--
 bsd/kern/uipc_mbuf2.c                         |  244 +-
 bsd/kern/uipc_socket.c                        |  701 +-
 bsd/kern/uipc_socket2.c                       |  237 +-
 bsd/kern/uipc_syscalls.c                      |  261 +-
 bsd/kern/uipc_usrreq.c                        |  189 +-
 bsd/kern/vm_pressure.c                        |  236 +
 bsd/{ppc/reg.h => kern/vm_pressure.h}         |   22 +-
 bsd/libkern/libkern.h                         |   11 +-
 bsd/machine/_limits.h                         |    4 +-
 bsd/machine/_param.h                          |    4 +-
 bsd/machine/_structs.h                        |    4 +-
 bsd/machine/_types.h                          |    4 +-
 bsd/machine/dis_tables.h                      |    4 +-
 bsd/machine/disklabel.h                       |    4 +-
 bsd/machine/endian.h                          |    4 +-
 bsd/machine/exec.h                            |    6 +-
 bsd/machine/fasttrap_isa.h                    |    4 +-
 bsd/machine/limits.h                          |    4 +-
 bsd/machine/param.h                           |    4 +-
 bsd/machine/profile.h                         |    4 +-
 bsd/machine/psl.h                             |    4 +-
 bsd/machine/ptrace.h                          |    4 +-
 bsd/machine/reboot.h                          |    4 +-
 bsd/machine/reg.h                             |    4 +-
 bsd/machine/setjmp.h                          |    4 +-
 bsd/machine/signal.h                          |    4 +-
 bsd/machine/types.h                           |    4 +-
 bsd/machine/ucontext.h                        |    4 +-
 bsd/machine/vmparam.h                         |    4 +-
 bsd/man/man2/Makefile                         |   14 +-
 bsd/man/man2/auditon.2                        |   16 +
 bsd/man/man2/dup.2                            |   16 +-
 bsd/man/man2/exchangedata.2                   |    3 +-
 bsd/man/man2/fcntl.2                          |  111 +-
 bsd/man/man2/getattrlist.2                    |   85 +-
 bsd/man/man2/getaudit.2                       |   15 +-
 bsd/man/man2/getauid.2                        |    4 +-
 bsd/man/man2/getdirentries.2                  |   20 +-
 bsd/man/man2/getdirentriesattr.2              |   23 +-
 bsd/man/man2/getdtablesize.2                  |   63 +
 bsd/man/man2/getfsstat.2                      |  113 +-
 bsd/man/man2/getgroups.2                      |   32 +-
 bsd/man/man2/gettimeofday.2                   |    1 -
 bsd/man/man2/kqueue.2                         |   42 +-
 bsd/man/man2/madvise.2                        |    2 +-
 bsd/man/man2/mmap.2                           |    9 +
 bsd/man/man2/open.2                           |   18 +-
 bsd/man/man2/pathconf.2                       |   10 +
 bsd/man/man2/pipe.2                           |    9 +-
 bsd/man/man2/posix_spawn.2                    |   13 +-
 bsd/man/man2/quotactl.2                       |    1 -
 bsd/man/man2/sem_close.2                      |   60 +
 bsd/man/man2/sem_open.2                       |  169 +
 bsd/man/man2/sem_post.2                       |   65 +
 bsd/man/man2/sem_unlink.2                     |   74 +
 bsd/man/man2/sem_wait.2                       |   88 +
 bsd/man/man2/sendfile.2                       |   14 +-
 bsd/man/man2/setaudit.2                       |   38 +-
 bsd/man/man2/setgroups.2                      |    6 +-
 bsd/man/man2/setregid.2                       |   92 +
 bsd/man/man2/setreuid.2                       |   90 +
 bsd/man/man2/setxattr.2                       |    7 +
 bsd/man/man2/shm_open.2                       |  179 +
 bsd/man/man2/shm_unlink.2                     |   87 +
 bsd/man/man2/stat.2                           |  152 +-
 bsd/man/man2/statfs.2                         |   27 +-
 bsd/man/man2/undelete.2                       |  108 +
 .../man3/posix_spawn_file_actions_addclose.3  |   50 +-
 bsd/man/man3/posix_spawnattr_setflags.3       |   12 +-
 bsd/man/man4/auditpipe.4                      |   16 +-
 bsd/man/man4/gif.4                            |    4 +-
 bsd/man/man4/icmp6.4                          |    2 +-
 bsd/man/man4/netintro.4                       |    3 +-
 bsd/man/man4/random.4                         |    2 +-
 bsd/man/man5/Makefile                         |    2 -
 bsd/man/man5/dir.5                            |   16 +-
 bsd/man/man5/fs.5                             |  343 -
 bsd/man/man5/inode.5                          |    1 -
 bsd/miscfs/Makefile                           |    4 -
 bsd/miscfs/devfs/Makefile                     |    4 -
 bsd/miscfs/devfs/devfs_tree.c                 |    2 +
 bsd/miscfs/devfs/devfs_vfsops.c               |   11 +-
 bsd/miscfs/devfs/devfs_vnops.c                |  178 +-
 bsd/miscfs/devfs/devfsdefs.h                  |   44 +-
 bsd/miscfs/fifofs/Makefile                    |    4 -
 bsd/miscfs/nullfs/null.h                      |  118 -
 bsd/miscfs/nullfs/null_subr.c                 |  304 -
 bsd/miscfs/nullfs/null_vfsops.c               |  382 -
 bsd/miscfs/nullfs/null_vnops.c                |  570 --
 bsd/miscfs/specfs/Makefile                    |    4 -
 bsd/miscfs/specfs/spec_vnops.c                |  628 +-
 bsd/miscfs/specfs/specdev.h                   |    1 +
 bsd/miscfs/union/Makefile                     |    4 -
 bsd/miscfs/union/union.h                      |  151 -
 bsd/miscfs/union/union_subr.c                 | 1604 ---
 bsd/miscfs/union/union_vfsops.c               |  563 --
 bsd/miscfs/union/union_vnops.c                | 1726 ----
 bsd/net/Makefile                              |   13 +-
 bsd/net/bpf.c                                 |  447 +-
 bsd/net/bpf.h                                 |   12 +-
 bsd/net/bpf_filter.c                          |  137 +-
 bsd/net/bpfdesc.h                             |   16 +-
 bsd/net/bridgestp.c                           | 2425 +++++
 bsd/net/bridgestp.h                           |  441 +
 bsd/net/dlil.c                                | 4276 ++++----
 bsd/net/dlil.h                                |  121 +-
 bsd/net/ether_if_module.c                     |   45 +-
 bsd/net/ether_inet6_pr_module.c               |  174 +-
 bsd/net/ether_inet_pr_module.c                |  512 +-
 bsd/net/ethernet.h                            |   28 +-
 bsd/net/if.c                                  | 1851 ++--
 bsd/net/if.h                                  |   51 +-
 bsd/net/if_atm.h                              |  136 -
 bsd/net/if_bond.c                             |  101 +-
 bsd/net/if_bridge.c                           | 5138 ++++++++++
 bsd/net/if_bridgevar.h                        |  499 +
 bsd/net/if_disc.c                             |  240 -
 bsd/net/if_dummy.c                            |  290 -
 bsd/net/if_ethersubr.c                        |  229 -
 bsd/net/if_fddisubr.c                         |  637 --
 bsd/net/if_gif.c                              |  134 +-
 bsd/net/if_gif.h                              |    1 +
 bsd/net/if_llreach.c                          |  565 ++
 bsd/net/if_llreach.h                          |  150 +
 bsd/net/if_loop.c                             |   22 +-
 bsd/net/if_media.h                            |    2 +-
 bsd/net/if_mib.c                              |   64 +-
 bsd/net/if_mib.h                              |    2 +-
 bsd/net/if_pflog.c                            |    5 +-
 bsd/net/if_stf.c                              |  119 +-
 bsd/net/if_types.h                            |    5 +-
 bsd/net/if_utun.c                             |   20 +-
 bsd/net/if_var.h                              |  747 +-
 bsd/net/if_vlan.c                             |  641 +-
 bsd/net/kext_net.h                            |   63 +-
 bsd/net/kpi_interface.c                       | 1243 +--
 bsd/net/kpi_interface.h                       |  112 +-
 bsd/net/kpi_protocol.c                        |   20 +-
 bsd/net/multicast_list.c                      |    3 +-
 bsd/net/ndrv.c                                |    4 +
 bsd/net/ndrv.h                                |    2 +-
 bsd/net/net_osdep.h                           |   14 +-
 bsd/net/net_str_id.c                          |   12 +-
 bsd/net/netsrc.c                              |  253 +
 .../ppc/cframe.h => bsd/net/netsrc.h          |   62 +-
 bsd/net/ntstat.c                              | 1954 ++++
 bsd/net/ntstat.h                              |  348 +
 bsd/net/pf.c                                  |  126 +-
 bsd/net/pf_if.c                               |   36 +-
 bsd/net/pf_ioctl.c                            |  445 +-
 bsd/net/pf_osfp.c                             |   11 +-
 bsd/net/pf_table.c                            |   13 +-
 bsd/net/pfkeyv2.h                             |    1 +
 bsd/net/pfvar.h                               |   75 +-
 bsd/net/ppp_deflate.c                         |    4 +-
 bsd/net/route.c                               |  974 +-
 bsd/net/route.h                               |   76 +-
 bsd/net/rtsock.c                              |  965 +-
 bsd/net/rtsock_mip.c                          |   76 -
 bsd/netat/Makefile                            |    4 -
 bsd/netat/asp_proto.c                         |    2 +-
 bsd/netat/at.c                                |   78 +-
 bsd/netat/at_var.h                            |    4 +-
 bsd/netat/ddp.c                               |   13 +-
 bsd/netat/ddp_lap.c                           |   10 +-
 bsd/netat/sys_glue.c                          |    4 +-
 bsd/netinet/Makefile                          |    9 +-
 bsd/netinet/icmp6.h                           |  195 +-
 bsd/netinet/if_atm.c                          |  303 -
 bsd/netinet/if_atm.h                          |   77 -
 bsd/netinet/if_fddi.h                         |  114 -
 bsd/netinet/igmp.c                            | 3949 +++++++-
 bsd/netinet/igmp.h                            |  104 +-
 bsd/netinet/igmp_var.h                        |  239 +-
 bsd/netinet/in.c                              |  767 +-
 bsd/netinet/in.h                              |  204 +-
 bsd/netinet/in_arp.c                          |  621 +-
 bsd/netinet/in_arp.h                          |    5 +-
 bsd/netinet/in_cksum.c                        |   78 +-
 bsd/netinet/in_dhcp.c                         |   22 +-
 bsd/netinet/in_gif.c                          |   14 +-
 bsd/netinet/in_mcast.c                        | 3641 +++++++
 bsd/netinet/in_pcb.c                          |  436 +-
 bsd/netinet/in_pcb.h                          |  147 +-
 bsd/netinet/in_pcblist.c                      |  383 +
 bsd/netinet/in_proto.c                        |    2 +-
 bsd/netinet/in_rmx.c                          |   86 +-
 bsd/netinet/in_tclass.c                       |  850 ++
 bsd/netinet/in_var.h                          |  285 +-
 bsd/netinet/ip6.h                             |  103 +-
 bsd/netinet/ip_divert.c                       |   48 +-
 bsd/netinet/ip_dummynet.c                     |  129 +-
 bsd/netinet/ip_dummynet.h                     |    2 +-
 bsd/netinet/ip_encap.c                        |   14 +-
 bsd/netinet/ip_encap.h                        |    2 +-
 bsd/netinet/ip_flow.c                         |  380 -
 bsd/netinet/ip_flow.h                         |   87 -
 bsd/netinet/ip_fw.h                           |    2 +
 bsd/netinet/ip_fw2.c                          |   49 +-
 bsd/netinet/ip_fw2.h                          |    2 +
 bsd/netinet/ip_fw2_compat.c                   |    4 +-
 bsd/netinet/ip_icmp.c                         |   56 +-
 bsd/netinet/ip_id.c                           |    1 +
 bsd/netinet/ip_input.c                        |  376 +-
 bsd/netinet/ip_mroute.c                       |   40 +-
 bsd/netinet/ip_mroute.h                       |    4 +-
 bsd/netinet/ip_output.c                       | 1092 +-
 bsd/netinet/ip_var.h                          |   60 +-
 bsd/netinet/kpi_ipfilter.c                    |  169 +-
 bsd/netinet/kpi_ipfilter.h                    |    9 +-
 bsd/netinet/raw_ip.c                          |  179 +-
 bsd/netinet/tcp.h                             |   88 +-
 bsd/netinet/tcp_cc.h                          |  124 +
 bsd/netinet/tcp_debug.c                       |    2 +-
 bsd/netinet/tcp_input.c                       | 1305 ++-
 bsd/netinet/tcp_ledbat.c                      |  434 +
 bsd/netinet/tcp_newreno.c                     |  344 +
 bsd/netinet/tcp_output.c                      |  318 +-
 bsd/netinet/tcp_sack.c                        |   20 +-
 bsd/netinet/tcp_seq.h                         |    2 +
 bsd/netinet/tcp_subr.c                        |  440 +-
 bsd/netinet/tcp_timer.c                       |  953 +-
 bsd/netinet/tcp_timer.h                       |  117 +-
 bsd/netinet/tcp_usrreq.c                      |  278 +-
 bsd/netinet/tcp_var.h                         |  269 +-
 bsd/netinet/udp_usrreq.c                      |  395 +-
 bsd/netinet6/Makefile                         |    8 +-
 bsd/netinet6/ah.h                             |    2 +-
 bsd/netinet6/ah6.h                            |    2 +-
 bsd/netinet6/ah_core.c                        |   82 +-
 bsd/netinet6/ah_input.c                       |   57 +-
 bsd/netinet6/dest6.c                          |   10 +-
 bsd/netinet6/esp6.h                           |    2 +-
 bsd/netinet6/esp_core.c                       |    2 +-
 bsd/netinet6/esp_input.c                      |  146 +-
 bsd/netinet6/frag6.c                          |   75 +-
 bsd/netinet6/icmp6.c                          |  758 +-
 bsd/netinet6/in6.c                            | 1609 +--
 bsd/netinet6/in6.h                            |  279 +-
 bsd/netinet6/in6_cksum.c                      |    2 +-
 bsd/netinet6/in6_gif.c                        |   23 +-
 bsd/netinet6/in6_gif.h                        |    2 +-
 bsd/netinet6/in6_ifattach.c                   |  448 +-
 bsd/netinet6/in6_ifattach.h                   |   40 +-
 bsd/netinet6/in6_mcast.c                      | 3490 +++++++
 bsd/netinet6/in6_pcb.c                        |  362 +-
 bsd/netinet6/in6_pcb.h                        |   12 +-
 bsd/netinet6/in6_prefix.c                     |  241 +-
 bsd/netinet6/in6_prefix.h                     |    1 -
 bsd/netinet6/in6_proto.c                      |  117 +-
 bsd/netinet6/in6_rmx.c                        |  124 +-
 bsd/netinet6/in6_src.c                        | 1471 ++-
 bsd/netinet6/in6_var.h                        |  391 +-
 bsd/netinet6/ip6_forward.c                    |  132 +-
 bsd/netinet6/ip6_fw.c                         |   46 +-
 bsd/netinet6/ip6_fw.h                         |    2 +
 bsd/netinet6/ip6_id.c                         |  304 +
 bsd/netinet6/ip6_input.c                      |  863 +-
 bsd/netinet6/ip6_mroute.c                     |  141 +-
 bsd/netinet6/ip6_mroute.h                     |   16 +-
 bsd/netinet6/ip6_output.c                     | 2978 +++---
 bsd/netinet6/ip6_var.h                        |  192 +-
 bsd/netinet6/ip6protosw.h                     |   10 +-
 bsd/netinet6/ipcomp6.h                        |    2 +-
 bsd/netinet6/ipcomp_input.c                   |   11 +-
 bsd/netinet6/ipsec.c                          |  156 +-
 bsd/netinet6/mld6.c                           | 3577 ++++++-
 bsd/netinet6/mld6.h                           |  139 +
 bsd/netinet6/mld6_var.h                       |  215 +-
 bsd/netinet6/nd6.c                            |  878 +-
 bsd/netinet6/nd6.h                            |  243 +-
 bsd/netinet6/nd6_nbr.c                        | 1082 +-
 bsd/netinet6/nd6_rtr.c                        | 3033 ++++--
 bsd/netinet6/raw_ip6.c                        |  230 +-
 bsd/netinet6/route6.c                         |   62 +-
 bsd/netinet6/scope6.c                         |  257 +-
 bsd/netinet6/scope6_var.h                     |   46 +
 bsd/netinet6/tcp6_var.h                       |   32 +-
 bsd/netinet6/udp6_output.c                    |  110 +-
 bsd/netinet6/udp6_usrreq.c                    |  177 +-
 bsd/netinet6/udp6_var.h                       |    2 +-
 bsd/netkey/Makefile                           |    4 -
 bsd/netkey/key.c                              |  726 +-
 bsd/nfs/Makefile                              |    4 -
 bsd/nfs/krpc.h                                |   22 +-
 bsd/nfs/nfs.h                                 |  485 +-
 bsd/nfs/nfs4_subs.c                           | 1990 +++-
 bsd/nfs/nfs4_vnops.c                          | 6126 ++++++++----
 bsd/nfs/nfs_bio.c                             |  260 +-
 bsd/nfs/nfs_boot.c                            |    8 +-
 bsd/nfs/nfs_gss.c                             |  338 +-
 bsd/nfs/nfs_gss.h                             |   17 +-
 bsd/nfs/nfs_lock.c                            |  791 +-
 bsd/nfs/nfs_lock.h                            |   23 +-
 bsd/nfs/nfs_node.c                            |  275 +-
 bsd/nfs/nfs_serv.c                            |  211 +-
 bsd/nfs/nfs_socket.c                          | 2481 ++++-
 bsd/nfs/nfs_srvcache.c                        |   31 +-
 bsd/nfs/nfs_subs.c                            |  687 +-
 bsd/nfs/nfs_syscalls.c                        |  163 +-
 bsd/nfs/nfs_vfsops.c                          | 3856 ++++++--
 bsd/nfs/nfs_vnops.c                           | 1388 ++-
 bsd/nfs/nfsm_subs.h                           |   68 +-
 bsd/nfs/nfsmount.h                            |  229 +-
 bsd/nfs/nfsnode.h                             |  136 +-
 bsd/nfs/nfsproto.h                            |   28 +-
 bsd/nfs/nfsrvcache.h                          |   11 +-
 bsd/nfs/rpcv2.h                               |    5 +-
 bsd/nfs/xdr_subs.h                            |  417 +-
 bsd/ppc/Makefile                              |   33 -
 bsd/ppc/_limits.h                             |   27 -
 bsd/ppc/_param.h                              |   46 -
 bsd/ppc/_structs.h                            |  217 -
 bsd/ppc/_types.h                              |  120 -
 bsd/ppc/decodePPC.h                           |  919 --
 bsd/ppc/endian.h                              |  124 -
 bsd/ppc/exec.h                                |  108 -
 bsd/ppc/fasttrap_isa.h                        |  106 -
 bsd/ppc/limits.h                              |  107 -
 bsd/ppc/param.h                               |  141 -
 bsd/ppc/profile.h                             |   58 -
 bsd/ppc/reboot.h                              |   55 -
 bsd/ppc/setjmp.h                              |  121 -
 bsd/ppc/signal.h                              |   83 -
 bsd/ppc/types.h                               |  172 -
 bsd/ppc/ucontext.h                            |   73 -
 bsd/ppc/vmparam.h                             |   66 -
 bsd/security/Makefile                         |    4 -
 bsd/security/audit/Makefile                   |    4 -
 bsd/security/audit/audit.c                    |   83 +-
 bsd/security/audit/audit.h                    |   19 +-
 bsd/security/audit/audit_arg.c                |    8 +-
 bsd/security/audit/audit_bsd.c                |  169 +-
 bsd/security/audit/audit_bsd.h                |   74 +-
 bsd/security/audit/audit_bsm.c                |   22 +-
 bsd/security/audit/audit_ioctl.h              |   25 +
 bsd/security/audit/audit_private.h            |    5 +-
 bsd/security/audit/audit_session.c            | 1949 ++--
 bsd/security/audit/audit_syscalls.c           |  214 +-
 bsd/security/audit/audit_worker.c             |   33 +-
 bsd/sys/Makefile                              |   49 +-
 bsd/sys/attr.h                                |   31 +-
 bsd/sys/buf.h                                 |   62 +-
 bsd/sys/buf_internal.h                        |   36 +-
 bsd/sys/cdefs.h                               |  122 +-
 bsd/sys/codesign.h                            |    3 +
 bsd/sys/conf.h                                |   32 +-
 .../sys/content_protection.h                  |   32 +-
 bsd/sys/cprotect.h                            |  117 +-
 bsd/sys/decmpfs.h                             |    2 +-
 bsd/sys/disk.h                                |   17 +-
 bsd/sys/dtrace.h                              |   70 +-
 bsd/sys/dtrace_glue.h                         |   96 +-
 bsd/sys/dtrace_impl.h                         |   39 +
 bsd/sys/errno.h                               |   56 +-
 bsd/sys/event.h                               |   47 +-
 bsd/sys/fasttrap_impl.h                       |    5 +-
 bsd/sys/fbt.h                                 |   15 +-
 bsd/sys/fcntl.h                               |   63 +-
 bsd/sys/file.h                                |    2 +
 bsd/sys/file_internal.h                       |   10 +-
 bsd/sys/filedesc.h                            |    6 +-
 osfmk/ppc/machine_cpu.h => bsd/sys/fileport.h |   39 +-
 bsd/sys/fsctl.h                               |  176 +-
 bsd/sys/fsevents.h                            |    1 +
 bsd/sys/fslog.h                               |    8 +
 bsd/sys/imageboot.h                           |   11 +-
 bsd/sys/imgact.h                              |   30 +-
 bsd/{ppc/ptrace.h => sys/imgsrc.h}            |   46 +-
 bsd/sys/kauth.h                               |   31 +-
 bsd/sys/kdebug.h                              |  168 +-
 bsd/sys/kern_control.h                        |    6 +
 bsd/sys/kern_memorystatus.h                   |   42 +-
 bsd/sys/kpi_mbuf.h                            |   61 +-
 bsd/sys/kpi_socket.h                          |   32 +-
 bsd/sys/make_posix_availability.sh            |   71 +
 bsd/sys/make_symbol_aliasing.sh               |   86 +
 bsd/sys/malloc.h                              |   20 +-
 bsd/sys/mbuf.h                                |  623 +-
 bsd/sys/mcache.h                              |   51 +-
 bsd/sys/mman.h                                |    1 +
 bsd/sys/mount.h                               |   89 +-
 bsd/sys/mount_internal.h                      |   53 +-
 bsd/sys/msgbuf.h                              |   22 +-
 bsd/sys/namei.h                               |   56 +-
 bsd/{dev/ppc/memmove.c => sys/netboot.h}      |   38 +-
 bsd/sys/priv.h                                |   95 +
 bsd/sys/proc.h                                |   36 +-
 bsd/sys/proc_info.h                           |   90 +-
 bsd/sys/proc_internal.h                       |   42 +-
 bsd/sys/process_policy.h                      |  177 +
 bsd/sys/protosw.h                             |   16 +-
 bsd/sys/pthread_internal.h                    |   41 +-
 bsd/sys/queue.h                               |   68 +-
 bsd/sys/reboot.h                              |    2 +-
 bsd/sys/resource.h                            |   13 +-
 bsd/sys/sdt_impl.h                            |    5 +-
 bsd/sys/signal.h                              |    6 -
 bsd/sys/socket.h                              |   91 +-
 bsd/sys/socketvar.h                           |  144 +-
 bsd/sys/sockio.h                              |   17 +-
 bsd/sys/spawn.h                               |    9 +
 bsd/sys/spawn_internal.h                      |    5 +-
 bsd/sys/stat.h                                |   13 +-
 bsd/sys/sys_domain.h                          |    3 +-
 bsd/sys/sysctl.h                              |  123 +-
 bsd/sys/sysent.h                              |    5 +-
 bsd/sys/syslog.h                              |  164 +-
 bsd/sys/systm.h                               |   11 +-
 bsd/sys/time.h                                |    4 +-
 bsd/sys/tree.h                                |  693 +-
 bsd/sys/tty.h                                 |    2 +
 bsd/sys/ubc.h                                 |    1 +
 bsd/sys/ubc_internal.h                        |   12 +
 bsd/sys/ucontext.h                            |    6 -
 bsd/sys/ucred.h                               |   12 +-
 bsd/sys/un.h                                  |    5 +
 bsd/sys/unistd.h                              |    3 +-
 bsd/sys/unpcb.h                               |    5 +-
 bsd/sys/user.h                                |   30 +-
 bsd/sys/vfs_context.h                         |    2 +
 bsd/sys/vnode.h                               |  312 +-
 bsd/sys/vnode_if.h                            |  211 +-
 bsd/sys/vnode_internal.h                      |   99 +-
 bsd/sys/xattr.h                               |   16 +-
 bsd/uuid/Makefile                             |    4 -
 bsd/vfs/Makefile                              |    4 -
 bsd/vfs/kpi_vfs.c                             |  942 +-
 bsd/vfs/vfs_attrlist.c                        |  211 +-
 bsd/vfs/vfs_bio.c                             |  787 +-
 bsd/vfs/vfs_cache.c                           |  103 +-
 bsd/vfs/vfs_cluster.c                         |  538 +-
 bsd/vfs/vfs_conf.c                            |   26 +-
 bsd/vfs/vfs_fsevents.c                        |   28 +-
 bsd/vfs/vfs_fslog.c                           |   82 +-
 bsd/vfs/vfs_init.c                            |   18 +-
 bsd/vfs/vfs_journal.c                         | 4873 +++++----
 bsd/vfs/vfs_journal.h                         |   51 +-
 bsd/vfs/vfs_lookup.c                          | 1533 +--
 bsd/vfs/vfs_subr.c                            | 2005 +++-
 bsd/vfs/vfs_syscalls.c                        | 2813 ++++--
 bsd/vfs/vfs_utfconv.c                         |   17 +-
 bsd/vfs/vfs_vnops.c                           |  518 +-
 bsd/vfs/vfs_xattr.c                           |   87 +-
 bsd/vfs/vnode_if.c                            |   95 +
 bsd/vm/Makefile                               |    4 -
 bsd/vm/dp_backing_file.c                      |   62 +-
 bsd/vm/vm_unix.c                              |  481 +-
 bsd/vm/vnode_pager.c                          |  120 +-
 config/BSDKernel.exports                      |   16 +
 config/BSDKernel.ppc.exports                  |   37 -
 config/Dummy.exports                          |    1 +
 config/IOKit.exports                          |   10 +-
 config/IOKit.i386.exports                     |    3 -
 config/IOKit.ppc.exports                      |  383 -
 config/IOKit.x86_64.exports                   |    3 -
 config/Libkern.exports                        |    3 +-
 config/Libkern.i386.exports                   |    3 +
 config/Libkern.ppc.exports                    |   29 -
 config/Libkern.x86_64.exports                 |    4 +-
 config/MACFramework.exports                   |    2 +
 config/MACFramework.ppc.exports               |    9 -
 config/Mach.ppc.exports                       |    1 -
 config/Makefile                               |  153 +-
 config/MasterVersion                          |    2 +-
 config/Private.exports                        |   42 +-
 config/Private.i386.exports                   |   23 +-
 config/Private.ppc.exports                    |    2 -
 config/Private.x86_64.exports                 |   22 +
 config/System6.0.exports                      |    8 +-
 config/System6.0.i386.exports                 |    4 +-
 config/System6.0.ppc.exports                  |  256 -
 config/Unsupported.exports                    |   10 +-
 config/Unsupported.i386.exports               |    4 +-
 config/Unsupported.ppc.exports                |  118 -
 config/Unsupported.x86_64.exports             |    1 +
 config/version.c                              |    2 +
 .../IOKit/AppleKeyStoreInterface.h            |   50 +-
 iokit/IOKit/IOBufferMemoryDescriptor.h        |   13 +-
 iokit/IOKit/IOCatalogue.h                     |   22 +-
 iokit/IOKit/IOCommandGate.h                   |    6 +-
 iokit/IOKit/IODMACommand.h                    |    2 +-
 iokit/IOKit/IODataQueueShared.h               |    2 +-
 iokit/IOKit/IOEventSource.h                   |   33 +-
 iokit/IOKit/IOHibernatePrivate.h              |   58 +-
 iokit/IOKit/IOInterruptEventSource.h          |   11 +
 iokit/IOKit/IOKitDebug.h                      |   29 +-
 iokit/IOKit/IOKitKeys.h                       |    1 +
 iokit/IOKit/IOKitKeysPrivate.h                |   22 +-
 iokit/IOKit/IOKitServer.h                     |    9 +-
 iokit/IOKit/IOLib.h                           |   32 +-
 iokit/IOKit/IOMemoryCursor.h                  |   80 -
 iokit/IOKit/IOMemoryDescriptor.h              |   26 +-
 iokit/IOKit/IOMessage.h                       |  176 +-
 iokit/IOKit/IONVRAM.h                         |   12 +-
 iokit/IOKit/IOPlatformExpert.h                |    9 +-
 iokit/IOKit/IOService.h                       |   96 +-
 iokit/IOKit/IOServicePM.h                     |    9 +
 iokit/IOKit/IOSharedLock.h                    |   79 +-
 iokit/IOKit/IOStatistics.h                    |  220 +
 iokit/IOKit/IOStatisticsPrivate.h             |  359 +
 iokit/IOKit/IOTimeStamp.h                     |    6 +-
 iokit/IOKit/IOTimerEventSource.h              |    6 +-
 iokit/IOKit/IOTypes.h                         |    5 +
 iokit/IOKit/IOUserClient.h                    |   16 +-
 iokit/IOKit/IOWorkLoop.h                      |   39 +-
 iokit/IOKit/Makefile                          |   29 +-
 iokit/IOKit/i386/IOSharedLockImp.h            |  113 -
 iokit/IOKit/machine/Makefile                  |    2 -
 iokit/IOKit/nvram/Makefile                    |    2 -
 iokit/IOKit/platform/Makefile                 |    2 -
 iokit/IOKit/power/Makefile                    |    2 -
 iokit/IOKit/ppc/IODBDMA.h                     |  367 -
 iokit/IOKit/ppc/IOSharedLockImp.h             |  199 -
 iokit/IOKit/ppc/Makefile                      |   32 -
 iokit/IOKit/pwr_mgt/IOPM.h                    |  105 +-
 iokit/IOKit/pwr_mgt/IOPMDeprecated.h          |  177 -
 iokit/IOKit/pwr_mgt/IOPMPrivate.h             |  436 +-
 iokit/IOKit/pwr_mgt/IOPowerConnection.h       |   22 +-
 iokit/IOKit/pwr_mgt/Makefile                  |    4 +-
 iokit/IOKit/pwr_mgt/RootDomain.h              |  348 +-
 iokit/IOKit/rtc/Makefile                      |    2 -
 iokit/IOKit/system_management/Makefile        |    2 -
 iokit/Kernel/IOBufferMemoryDescriptor.cpp     |   10 -
 iokit/Kernel/IOCPU.cpp                        |   13 +-
 iokit/Kernel/IOCatalogue.cpp                  |  248 +-
 iokit/Kernel/IOCommandGate.cpp                |   40 +-
 iokit/Kernel/IOCommandQueue.cpp               |   27 +-
 iokit/Kernel/IODMACommand.cpp                 |   26 +-
 iokit/Kernel/IODMAController.cpp              |    4 +-
 iokit/Kernel/IODeviceTreeSupport.cpp          |   54 +-
 iokit/Kernel/IOEventSource.cpp                |  109 +-
 iokit/Kernel/IOFilterInterruptEventSource.cpp |   45 +-
 iokit/Kernel/IOHibernateIO.cpp                |  663 +-
 iokit/Kernel/IOHibernateInternal.h            |   13 +-
 iokit/Kernel/IOHibernateRestoreKernel.c       |  751 +-
 iokit/Kernel/IOInterruptController.cpp        |   66 +-
 iokit/Kernel/IOInterruptEventSource.cpp       |   90 +-
 iokit/Kernel/IOKitDebug.cpp                   |   14 +-
 iokit/Kernel/IOKitKernelInternal.h            |   27 +
 iokit/Kernel/IOLib.cpp                        |   80 +-
 iokit/Kernel/IOMemoryCursor.cpp               |   63 -
 iokit/Kernel/IOMemoryDescriptor.cpp           |  117 +-
 iokit/Kernel/IONVRAM.cpp                      |   93 +-
 iokit/Kernel/IOPMPowerSource.cpp              |   20 +-
 iokit/Kernel/IOPMrootDomain.cpp               | 6646 ++++++++-----
 iokit/Kernel/IOPlatformExpert.cpp             |  127 +-
 iokit/Kernel/IORegistryEntry.cpp              |    7 +-
 iokit/Kernel/IOService.cpp                    |  296 +-
 iokit/Kernel/IOServicePM.cpp                  | 4078 +++++---
 iokit/Kernel/IOServicePMPrivate.h             |  493 +-
 iokit/Kernel/IOServicePrivate.h               |    4 +
 iokit/Kernel/IOStartIOKit.cpp                 |   23 +-
 iokit/Kernel/IOStatistics.cpp                 | 1279 +++
 iokit/Kernel/IOTimerEventSource.cpp           |   49 +-
 iokit/Kernel/IOUserClient.cpp                 |  287 +-
 iokit/Kernel/IOWorkLoop.cpp                   |  356 +-
 iokit/Kernel/RootDomainUserClient.cpp         |  303 +-
 iokit/Kernel/RootDomainUserClient.h           |   27 +-
 iokit/Kernel/i386/IOKeyStoreHelper.cpp        |  104 +
 iokit/Kernel/i386/IOSharedLock.s              |   59 +-
 iokit/Kernel/ppc/IOAsmSupport.s               |  120 -
 iokit/Kernel/ppc/IODBDMA.cpp                  |  161 -
 iokit/Kernel/x86_64/IOSharedLock.s            |   55 +-
 iokit/KernelConfigTables.cpp                  |   26 +-
 iokit/Makefile                                |    5 +-
 iokit/bsddev/DINetBootHook.cpp                |  132 +-
 iokit/bsddev/IOKitBSDInit.cpp                 |    8 +-
 iokit/conf/MASTER                             |   13 +-
 iokit/conf/MASTER.i386                        |    5 +-
 iokit/conf/MASTER.ppc                         |   18 -
 iokit/conf/MASTER.x86_64                      |    5 +-
 iokit/conf/Makefile                           |   19 +-
 iokit/conf/Makefile.i386                      |   14 +-
 iokit/conf/Makefile.ppc                       |   27 -
 iokit/conf/Makefile.template                  |   22 +-
 iokit/conf/Makefile.x86_64                    |   14 +-
 iokit/conf/files                              |    4 +-
 iokit/conf/files.i386                         |    5 +-
 iokit/conf/files.ppc                          |   20 -
 iokit/conf/files.x86_64                       |    5 +-
 iokit/conf/tools/Makefile                     |   32 -
 iokit/conf/tools/doconf/Makefile              |   47 -
 iokit/conf/tools/doconf/doconf.csh            |  321 -
 kgmacros                                      | 4263 ++++++--
 libkern/Makefile                              |   17 +-
 libkern/OSKextLib.cpp                         |   67 +-
 libkern/OSKextVersion.c                       |    1 +
 libkern/c++/OSKext.cpp                        | 2820 ++++--
 libkern/c++/OSMetaClass.cpp                   |    1 +
 libkern/c++/OSObject.cpp                      |   26 -
 libkern/c++/OSObjectAsm.s                     |   75 -
 libkern/c++/OSOrderedSet.cpp                  |    4 +-
 libkern/c++/OSRuntime.cpp                     |   43 +-
 libkern/c++/OSSet.cpp                         |   31 +-
 libkern/c++/OSSymbol.cpp                      |   11 +-
 .../TestSerialization/test1/test1_main.cpp    |    0
 libkern/conf/MASTER                           |    7 +
 libkern/conf/MASTER.i386                      |    5 +-
 libkern/conf/MASTER.ppc                       |   19 -
 libkern/conf/MASTER.x86_64                    |    5 +-
 libkern/conf/Makefile                         |   19 +-
 libkern/conf/Makefile.i386                    |    6 +
 libkern/conf/Makefile.ppc                     |    7 -
 libkern/conf/Makefile.template                |   17 +-
 libkern/conf/Makefile.x86_64                  |    6 +
 libkern/conf/files                            |    3 +-
 libkern/conf/files.i386                       |    7 +
 libkern/conf/files.ppc                        |    6 -
 libkern/conf/files.x86_64                     |    7 +
 libkern/conf/tools/Makefile                   |   32 -
 libkern/conf/tools/doconf/Makefile            |   47 -
 libkern/conf/tools/doconf/doconf.csh          |  321 -
 libkern/crypto/intel/sha1edp.h                |   51 +
 libkern/crypto/intel/sha1edp.s                | 1481 +++
 libkern/crypto/sha1.c                         |   55 +-
 libkern/gen/OSAtomicOperations.c              |   11 +-
 libkern/gen/OSDebug.cpp                       |   43 +-
 libkern/kernel_mach_header.c                  |   27 +
 libkern/kmod/Makefile.kmod                    |   18 +-
 libkern/kmod/cplus_start.c                    |    7 +-
 libkern/kmod/cplus_stop.c                     |    7 +-
 libkern/kxld/Makefile                         |   94 +-
 {iokit/Kernel => libkern/kxld}/WKdmCompress.c |    6 +-
 .../Kernel => libkern/kxld}/WKdmDecompress.c  |    0
 libkern/kxld/i386/WKdmCompress.s              |  597 ++
 libkern/kxld/i386/WKdmDecompress.s            |  675 ++
 libkern/kxld/kxld.c                           |  456 +-
 libkern/kxld/kxld_array.c                     |    3 +
 libkern/kxld/kxld_copyright.c                 |   34 +-
 libkern/kxld/kxld_demangle.c                  |   28 +
 libkern/kxld/kxld_demangle.h                  |   28 +
 libkern/kxld/kxld_kext.c                      | 3260 ++----
 libkern/kxld/kxld_kext.h                      |   85 +-
 libkern/kxld/kxld_object.c                    | 2185 ++++
 libkern/kxld/kxld_object.h                    |  159 +
 libkern/kxld/kxld_reloc.c                     |  298 +-
 libkern/kxld/kxld_reloc.h                     |   54 +-
 libkern/kxld/kxld_sect.c                      |   25 +-
 libkern/kxld/kxld_sect.h                      |    5 +-
 libkern/kxld/kxld_seg.c                       |   44 +-
 libkern/kxld/kxld_seg.h                       |   10 +-
 libkern/kxld/kxld_state.c                     | 1072 --
 libkern/kxld/kxld_state.h                     |  155 -
 libkern/kxld/kxld_stubs.c                     |   25 +-
 libkern/kxld/kxld_sym.c                       |  222 +-
 libkern/kxld/kxld_sym.h                       |   19 +-
 libkern/kxld/kxld_symtab.c                    |  212 +-
 libkern/kxld/kxld_symtab.h                    |   33 +-
 libkern/kxld/kxld_util.c                      |   28 +-
 libkern/kxld/kxld_util.h                      |   12 +-
 libkern/kxld/kxld_vtable.c                    |  531 +-
 libkern/kxld/kxld_vtable.h                    |   41 +-
 libkern/kxld/tests/kextcopyright.c            |   29 +
 libkern/kxld/tests/kxld_array_test.c          |  160 +
 libkern/kxld/tests/kxld_dict_test.c           |   44 +-
 .../psl.h => libkern/kxld/tests/kxld_test.c   |   26 +-
 .../kxld/tests/kxld_test.h                    |   13 +-
 libkern/kxld/tests/loadtest.py                |   28 +
 libkern/libkern/Makefile                      |   13 +-
 libkern/libkern/OSAtomic.h                    |  113 +-
 libkern/libkern/OSAtomic.h.save               |  305 -
 libkern/libkern/OSByteOrder.h                 |    4 +-
 libkern/libkern/OSCrossEndian.h               |   21 -
 libkern/libkern/OSDebug.h                     |    3 +
 libkern/libkern/OSKextLib.h                   |   40 +-
 libkern/libkern/OSKextLibPrivate.h            |  122 +-
 {iokit/Kernel => libkern/libkern}/WKdm.h      |    6 +-
 libkern/libkern/_OSByteOrder.h                |    2 +-
 libkern/libkern/c++/Makefile                  |    4 -
 libkern/libkern/c++/OSKext.h                  |   95 +-
 libkern/libkern/c++/OSMetaClass.h             |    9 +-
 libkern/libkern/c++/OSObject.h                |   21 +-
 libkern/libkern/c++/OSOrderedSet.h            |    4 +-
 libkern/libkern/c++/OSSet.h                   |   36 +-
 libkern/libkern/crypto/Makefile               |    4 -
 libkern/libkern/crypto/sha1.h                 |    2 +
 libkern/libkern/kernel_mach_header.h          |    6 +-
 libkern/libkern/kext_request_keys.h           |   42 +-
 libkern/libkern/kxld.h                        |   45 +-
 libkern/libkern/kxld_types.h                  |   26 +-
 libkern/libkern/machine/Makefile              |    4 -
 libkern/libkern/mkext.h                       |    1 +
 libkern/libkern/ppc/Makefile                  |   31 -
 libkern/libkern/ppc/OSByteOrder.h             |  206 -
 libkern/libkern/prelink.h                     |    2 +-
 libkern/libkern/tree.h                        |  802 ++
 libkern/libkern/version.h.template            |   42 +-
 libkern/ppc/OSAtomic.s                        |  104 -
 libkern/ppc/bcmp.s                            |   92 -
 libkern/ppc/memcmp.s                          |  106 -
 libkern/ppc/strlen.s                          |  118 -
 libkern/uuid/Makefile                         |    4 -
 libkern/uuid/uuid.c                           |   21 +-
 libkern/x86_64/OSAtomic.s                     |   16 +-
 libkern/zlib/adler32.c                        |   22 +-
 libkern/zlib/arm/adler32vec.s                 |  428 -
 libkern/zlib/arm/inffastS.s                   |  565 --
 libkern/zlib/inffast.c                        |    9 +-
 libkern/zlib/intel/adler32vec.s               | 1050 ++
 libkern/zlib/intel/inffastS.s                 | 1179 +++
 libsa/Makefile                                |    4 +-
 libsa/bootstrap.cpp                           |  113 +-
 libsa/conf/MASTER                             |    1 -
 libsa/conf/MASTER.i386                        |    1 -
 libsa/conf/MASTER.ppc                         |   18 -
 libsa/conf/MASTER.x86_64                      |    1 -
 libsa/conf/Makefile                           |   19 +-
 libsa/conf/Makefile.i386                      |    1 +
 libsa/conf/Makefile.ppc                       |    7 -
 libsa/conf/Makefile.template                  |   17 +-
 libsa/conf/Makefile.x86_64                    |    1 +
 libsa/conf/files.ppc                          |    1 -
 libsa/conf/tools/Makefile                     |   32 -
 libsa/conf/tools/doconf/Makefile              |   47 -
 libsa/conf/tools/doconf/doconf.csh            |  321 -
 libsa/lastkernelconstructor.c                 |    4 +-
 libsa/libsa/Makefile                          |    2 -
 libsyscall/BSDmakefile                        |  141 -
 libsyscall/GNUmakefile                        |    8 -
 libsyscall/Libsyscall.xcconfig                |   31 +
 .../Libsyscall.xcodeproj/project.pbxproj      | 1029 ++
 libsyscall/Makefile                           |   65 -
 libsyscall/Makefile.inc                       |   52 -
 libsyscall/Makefile.xbs                       |  130 -
 libsyscall/Platforms/MacOSX/i386/syscall.map  |   93 +
 .../Platforms/MacOSX/x86_64/syscall.map       |   54 +
 libsyscall/Platforms/syscall.map              |   16 +
 libsyscall/create-syscalls.pl                 |  266 -
 libsyscall/custom/SYS.h                       |   47 +-
 libsyscall/custom/__fork.s                    |  176 +-
 libsyscall/custom/__getpid.s                  |   40 +-
 libsyscall/custom/__gettimeofday.s            |   16 +-
 libsyscall/custom/__lseek.s                   |    6 +-
 libsyscall/custom/__pipe.s                    |   16 +-
 libsyscall/custom/__psynch_cvbroad.s          |    4 +-
 libsyscall/custom/__psynch_cvwait.s           |    4 +-
 libsyscall/custom/__ptrace.s                  |   14 +-
 libsyscall/custom/__sigaltstack.s             |    6 +-
 libsyscall/custom/__sigreturn.s               |    6 +-
 libsyscall/custom/__syscall.s                 |    8 +-
 libsyscall/custom/__thread_selfid.s           |    4 +-
 libsyscall/custom/__vfork.s                   |   51 +-
 libsyscall/custom/custom.s                    |   39 +-
 .../custom/errno.c                            |    3 +-
 libsyscall/include/Makefile.inc               |    1 -
 libsyscall/include/processor_facilities.h     |   36 -
 libsyscall/mach/Makefile.inc                  |   74 -
 .../mach/abort.h                              |   15 +-
 libsyscall/mach/bootstrap_ports.c             |   72 -
 libsyscall/mach/brk.2                         |  150 -
 libsyscall/mach/clock_sleep.c                 |   13 +-
 .../mach/dylib_link.c                         |    4 +-
 libsyscall/mach/err_iokit.sub                 |   16 +-
 libsyscall/mach/err_ipc.sub                   |    6 +-
 libsyscall/mach/err_kern.sub                  |    4 +-
 libsyscall/mach/err_libkern.sub               |    6 +-
 libsyscall/mach/err_mach_ipc.sub              |    6 +-
 libsyscall/mach/err_server.sub                |   32 +-
 libsyscall/mach/error_codes.c                 |    4 +-
 libsyscall/mach/errorlib.h                    |   15 +-
 libsyscall/mach/exc_catcher.c                 |   36 +-
 libsyscall/mach/exc_catcher.h                 |   64 +
 libsyscall/mach/exc_catcher_state.c           |   35 +-
 libsyscall/mach/exc_catcher_state_identity.c  |   35 +-
 libsyscall/mach/fprintf_stderr.c              |   22 +-
 libsyscall/mach/headers/Makefile.inc          |   10 -
 libsyscall/mach/i386/Makefile.inc             |    3 -
 libsyscall/mach/{headers => mach}/errorlib.h  |   12 +-
 libsyscall/mach/{headers => mach}/mach.h      |    0
 .../mach/{headers => mach}/mach_error.h       |    0
 libsyscall/mach/{headers => mach}/mach_init.h |    7 +-
 .../mach/{headers => mach}/mach_interface.h   |    0
 libsyscall/mach/{headers => mach}/port_obj.h  |    0
 libsyscall/mach/{headers => mach}/sync.h      |    0
 libsyscall/mach/{headers => mach}/task.h      |    4 -
 .../mach/{headers => mach}/thread_act.h       |    4 -
 libsyscall/mach/{headers => mach}/vm_task.h   |    0
 libsyscall/mach/mach_error.c                  |   16 +-
 libsyscall/mach/mach_error_string.c           |    1 -
 libsyscall/mach/mach_init.c                   |  197 +-
 libsyscall/mach/mach_init_libSystem.c         |   58 -
 libsyscall/mach/mach_init_ports.c             |  140 -
 .../mach/mach_legacy.c                        |   29 +-
 libsyscall/mach/mach_msg.c                    |   68 +-
 libsyscall/mach/mig_allocate.c                |    2 +-
 libsyscall/mach/mig_deallocate.c              |    2 +-
 libsyscall/mach/mig_reply_port.c              |   95 +
 .../rpc.h => libsyscall/mach/mig_reply_port.h |   13 +-
 libsyscall/mach/mig_strncpy.c                 |   23 +-
 libsyscall/mach/ms_thread_switch.c            |   10 +-
 libsyscall/mach/panic.c                       |   19 +-
 libsyscall/mach/port_obj.c                    |    2 +-
 libsyscall/mach/ppc/Makefile.inc              |    3 -
 libsyscall/mach/ppc64/Makefile.inc            |    4 -
 libsyscall/mach/sbrk.c                        |   78 -
 libsyscall/mach/servers/Makefile.inc          |   16 -
 libsyscall/mach/slot_name.c                   |   20 -
 libsyscall/mach/string.c                      |  120 +
 .../mach/string.h                             |   41 +-
 libsyscall/mach/x86_64/Makefile.inc           |    3 -
 libsyscall/wrappers/__get_cpu_capabilities.s  |   49 +
 .../wrappers/_errno.h                         |    8 +-
 .../wrappers/_libc_funcptr.c                  |   78 +-
 .../wrappers/_libkernel_init.c                |   36 +-
 .../wrappers/_libkernel_init.h                |   38 +-
 libsyscall/wrappers/cancelable/fcntl-base.c   |   65 +
 .../cancelable/fcntl-cancel.c}                |   15 +-
 libsyscall/wrappers/cancelable/fcntl.c        |   34 +
 .../wrappers/cancelable/select-cancel.c       |   26 +
 libsyscall/wrappers/cancelable/select.c       |   27 +
 .../wrappers/cancelable/sigsuspend-cancel.c   |   26 +
 .../cancelable/sigsuspend.c}                  |   13 +-
 .../wrappers/init_cpu_capabilities.c          |   33 +-
 libsyscall/wrappers/ioctl.c                   |   47 +
 libsyscall/wrappers/kill.c                    |   43 +
 libsyscall/wrappers/legacy/accept.c           |   56 +
 libsyscall/wrappers/legacy/bind.c             |   56 +
 libsyscall/wrappers/legacy/connect.c          |   56 +
 libsyscall/wrappers/legacy/getattrlist.c      |   65 +
 libsyscall/wrappers/legacy/getpeername.c      |   56 +
 libsyscall/wrappers/legacy/getsockname.c      |   56 +
 libsyscall/wrappers/legacy/kill.c             |   30 +
 libsyscall/wrappers/legacy/lchown.c           |   55 +
 .../wrappers/legacy/listen.c                  |   47 +-
 libsyscall/wrappers/legacy/mprotect.c         |   69 +
 libsyscall/wrappers/legacy/msync.c            |   53 +
 libsyscall/wrappers/legacy/munmap.c           |   65 +
 libsyscall/wrappers/legacy/open.c             |   54 +
 libsyscall/wrappers/legacy/recvfrom.c         |   55 +
 libsyscall/wrappers/legacy/recvmsg.c          |   55 +
 libsyscall/wrappers/legacy/select-pre1050.c   |   32 +
 libsyscall/wrappers/legacy/select.c           |   31 +
 libsyscall/wrappers/legacy/sendmsg.c          |   56 +
 libsyscall/wrappers/legacy/sendto.c           |   56 +
 libsyscall/wrappers/legacy/setattrlist.c      |   65 +
 libsyscall/wrappers/legacy/sigsuspend.c       |   31 +
 libsyscall/wrappers/legacy/socketpair.c       |   57 +
 libsyscall/wrappers/memcpy.c                  |  143 +
 libsyscall/wrappers/remove-counter.c          |   49 +
 libsyscall/wrappers/rename.c                  |   33 +
 libsyscall/wrappers/rmdir.c                   |   33 +
 libsyscall/wrappers/select-base.c             |   82 +
 libsyscall/wrappers/sigsuspend-base.c         |   41 +
 libsyscall/wrappers/unix03/chmod.c            |   62 +
 libsyscall/wrappers/unix03/fchmod.c           |   62 +
 libsyscall/wrappers/unix03/getrlimit.c        |   46 +
 libsyscall/wrappers/unix03/mmap.c             |   62 +
 libsyscall/wrappers/unix03/setrlimit.c        |   46 +
 libsyscall/wrappers/unlink.c                  |   33 +
 libsyscall/xcodescripts/compat-symlinks.sh    |   32 +
 libsyscall/xcodescripts/compile-syscalls.pl   |  130 +
 libsyscall/xcodescripts/create-syscalls.pl    |  403 +
 libsyscall/xcodescripts/mach_install_mig.sh   |   97 +
 makedefs/MakeInc.cmd                          |  127 +-
 makedefs/MakeInc.def                          |  202 +-
 makedefs/MakeInc.dir                          |  147 +-
 makedefs/MakeInc.rule                         |  131 +-
 osfmk/Makefile                                |   16 +-
 osfmk/UserNotification/Makefile               |    4 -
 osfmk/UserNotification/UNDRequest.defs        |    5 +-
 osfmk/chud/chud_cpu.c                         |   12 -
 osfmk/chud/chud_thread.c                      |   47 +-
 osfmk/chud/chud_xnu.h                         |   23 +-
 osfmk/chud/chud_xnu_glue.h                    |    4 +-
 osfmk/chud/chud_xnu_private.h                 |    4 +-
 osfmk/chud/i386/chud_osfmk_callback_i386.c    |   90 +-
 osfmk/chud/i386/chud_thread_i386.c            |    7 -
 osfmk/chud/ppc/chud_cpu_asm.h                 |   38 -
 osfmk/chud/ppc/chud_cpu_asm.s                 |  593 --
 osfmk/chud/ppc/chud_cpu_ppc.c                 | 1182 ---
 osfmk/chud/ppc/chud_osfmk_callback_ppc.c      |  549 -
 osfmk/chud/ppc/chud_spr.h                     |  273 -
 osfmk/chud/ppc/chud_thread_ppc.c              |  586 --
 osfmk/chud/ppc/chud_xnu_private.h             |   59 -
 osfmk/conf/MASTER                             |   29 +-
 osfmk/conf/MASTER.i386                        |    6 +-
 osfmk/conf/MASTER.ppc                         |   67 -
 osfmk/conf/MASTER.x86_64                      |   12 +-
 osfmk/conf/Makefile                           |   17 +-
 osfmk/conf/Makefile.i386                      |   15 +-
 osfmk/conf/Makefile.ppc                       |   76 -
 osfmk/conf/Makefile.template                  |   24 +-
 osfmk/conf/Makefile.x86_64                    |   27 +-
 osfmk/conf/files                              |   15 +-
 osfmk/conf/files.i386                         |   44 +-
 osfmk/conf/files.ppc                          |  120 -
 osfmk/conf/files.x86_64                       |   41 +-
 osfmk/conf/tools/Makefile                     |   32 -
 osfmk/conf/tools/doconf/Makefile              |   47 -
 osfmk/conf/tools/doconf/doconf.csh            |  321 -
 osfmk/console/i386/serial_console.c           |   10 +-
 osfmk/console/ppc/serial_console.c            |  329 -
 osfmk/console/ppc/video_scroll.s              |  141 -
 osfmk/console/serial_general.c                |    1 -
 osfmk/console/serial_protos.h                 |   11 +-
 osfmk/console/video_console.c                 |   10 +-
 osfmk/ddb/db_command.c                        |   70 -
 osfmk/ddb/db_print.c                          |    2 +-
 osfmk/ddb/db_sym.c                            |    4 +-
 osfmk/ddb/db_trap.c                           |    4 -
 osfmk/ddb/db_variables.c                      |    4 +-
 osfmk/ddb/db_variables.h                      |    2 +-
 osfmk/ddb/makedis.c                           |    5 +-
 osfmk/default_pager/default_pager.c           |    1 +
 osfmk/default_pager/default_pager_internal.h  |   13 +-
 osfmk/default_pager/dp_backing_store.c        |  448 +-
 osfmk/default_pager/dp_memory_object.c        |   28 +
 osfmk/device/device.defs                      |    8 +-
 osfmk/device/iokit_rpc.c                      |   49 +-
 osfmk/device/subrs.c                          |    8 +-
 osfmk/gssd/Makefile                           |    4 -
 osfmk/gssd/gssd_mach.defs                     |  106 +-
 osfmk/gssd/gssd_mach_types.h                  |   45 +-
 osfmk/i386/AT386/model_dep.c                  |  242 +-
 osfmk/i386/Diagnostics.h                      |    4 +-
 osfmk/i386/Makefile                           |    8 +-
 osfmk/i386/acpi.c                             |   23 +-
 osfmk/i386/asm.h                              |  100 +
 osfmk/i386/bsd_i386.c                         |  283 +-
 osfmk/i386/bsd_i386_native.c                  |  283 +
 osfmk/i386/bzero.s                            |    2 +-
 osfmk/i386/commpage/atomic.s                  |  396 -
 osfmk/i386/commpage/bcopy_scalar.s            |  136 -
 osfmk/i386/commpage/bcopy_sse2.s              |  473 -
 osfmk/i386/commpage/bcopy_sse3x.s             |  823 --
 osfmk/i386/commpage/bcopy_sse3x_64.s          |  820 --
 osfmk/i386/commpage/bcopy_sse42.s             |  311 -
 osfmk/i386/commpage/bcopy_sse42_64.s          |  301 -
 osfmk/i386/commpage/bzero_scalar.s            |  115 -
 osfmk/i386/commpage/bzero_sse2.s              |  162 -
 osfmk/i386/commpage/bzero_sse2_64.s           |  161 -
 osfmk/i386/commpage/bzero_sse42.s             |  151 -
 osfmk/i386/commpage/bzero_sse42_64.s          |  148 -
 osfmk/i386/commpage/cacheflush.s              |   79 -
 osfmk/i386/commpage/commpage.c                |  149 +-
 osfmk/i386/commpage/commpage.h                |    1 +
 osfmk/i386/commpage/commpage_asm.s            |   78 -
 osfmk/i386/commpage/commpage_gettimeofday.s   |  122 -
 .../commpage/commpage_mach_absolute_time.s    |  173 -
 osfmk/i386/commpage/commpage_sigs.c           |  189 -
 osfmk/i386/commpage/cpu_number.s              |   77 -
 osfmk/i386/commpage/fifo_queues.s             |   74 -
 osfmk/i386/commpage/longcopy_sse3x.s          |  221 -
 osfmk/i386/commpage/longcopy_sse3x_64.s       |  210 -
 osfmk/i386/commpage/memset_pattern_sse2.s     |  183 -
 osfmk/i386/commpage/memset_pattern_sse2_64.s  |  184 -
 osfmk/i386/commpage/pthreads.s                |  111 -
 osfmk/i386/commpage/spinlocks.s               |  189 -
 osfmk/i386/copyio.c                           |  621 ++
 osfmk/i386/cpu.c                              |   16 +-
 osfmk/i386/cpu_capabilities.h                 |  131 +-
 osfmk/i386/cpu_data.h                         |   90 +-
 osfmk/i386/cpuid.c                            |   57 +-
 osfmk/i386/cpuid.h                            |    2 +-
 osfmk/i386/cswitch.s                          |   12 +-
 osfmk/i386/db_interface.c                     |    2 +
 osfmk/i386/db_machdep.h                       |    4 +-
 osfmk/i386/db_trace.c                         |   18 +-
 osfmk/i386/endian.h                           |    8 +-
 osfmk/i386/etimer.c                           |  126 +-
 osfmk/i386/fpu.c                              |   37 +-
 osfmk/i386/fpu.h                              |   10 +-
 osfmk/i386/gdt.c                              |   14 +-
 osfmk/i386/genassym.c                         |  107 +-
 osfmk/i386/hibernate_i386.c                   |   18 +-
 osfmk/i386/hibernate_restore.c                |   96 +-
 osfmk/i386/hw_lock_types.h                    |    2 +-
 osfmk/i386/i386_init.c                        |  114 +-
 osfmk/i386/i386_lock.s                        |  705 +-
 osfmk/i386/i386_vm_init.c                     |  271 +-
 osfmk/i386/idle_pt.c                          |   16 +-
 osfmk/i386/idt.s                              |  545 +-
 osfmk/i386/idt64.s                            |  717 +-
 osfmk/i386/ipl.h                              |  112 -
 osfmk/i386/lapic.c                            |  866 +-
 osfmk/i386/lapic.h                            |    2 +
 osfmk/i386/lapic_native.c                     |  919 ++
 osfmk/i386/ldt.c                              |   12 +-
 osfmk/i386/locks.h                            |   82 +-
 osfmk/i386/locks_i386.c                       |  219 +-
 osfmk/i386/locore.s                           | 1241 +--
 osfmk/i386/loose_ends.c                       |  644 +-
 osfmk/i386/machine_check.c                    |   53 +-
 osfmk/i386/machine_check.h                    |    9 +-
 osfmk/i386/machine_cpu.h                      |    1 +
 osfmk/i386/machine_routines.c                 |   79 +-
 osfmk/i386/machine_routines.h                 |   31 +-
 osfmk/i386/machine_routines_asm.s             |   93 +-
 osfmk/i386/misc_protos.h                      |    5 +
 osfmk/i386/mp.c                               |  628 +-
 osfmk/i386/mp.h                               |   51 +-
 osfmk/i386/mp_desc.c                          |  103 +-
 osfmk/i386/mp_desc.h                          |   65 +-
 osfmk/i386/mp_events.h                        |    2 +-
 osfmk/i386/mp_native.c                        |  126 +
 osfmk/i386/mtrr.c                             |    5 +-
 osfmk/i386/pal_hibernate.h                    |   45 +
 .../{ppc/cpu_number.h => i386/pal_lock_asm.h} |   19 +-
 osfmk/i386/pal_native.h                       |  102 +
 osfmk/i386/pal_routines.c                     |  349 +
 osfmk/i386/pal_routines.h                     |  184 +
 osfmk/i386/pal_routines_asm.s                 |  192 +
 osfmk/{ppc/mp.h => i386/pal_rtclock_asm.h}    |   15 +-
 osfmk/i386/pcb.c                              |  665 +-
 osfmk/i386/pcb_native.c                       |  652 ++
 osfmk/i386/pmCPU.c                            |  131 +-
 osfmk/i386/pmCPU.h                            |   19 +-
 osfmk/i386/pmap.c                             |  811 +-
 osfmk/i386/pmap.h                             |  119 +-
 osfmk/i386/pmap_common.c                      |  505 +
 osfmk/i386/pmap_internal.h                    |  515 +-
 osfmk/i386/pmap_pcid.h                        |   99 +
 osfmk/i386/pmap_x86_common.c                  |  438 +-
 osfmk/i386/proc_reg.h                         |  122 +-
 osfmk/i386/rtclock.c                          |  231 +-
 osfmk/i386/rtclock_asm.h                      |  290 +
 .../i386/{rtclock.h => rtclock_asm_native.h}  |   64 +-
 osfmk/i386/rtclock_native.c                   |  202 +
 .../{ppc/rtclock.h => i386/rtclock_protos.h}  |   47 +-
 osfmk/i386/seg.h                              |    4 +-
 osfmk/i386/serial_io.h                        |    4 +-
 osfmk/i386/simple_lock.h                      |    9 +-
 osfmk/i386/start.s                            |   27 +-
 osfmk/i386/startup64.c                        |    4 -
 osfmk/i386/thread.h                           |  166 +-
 osfmk/i386/trap.c                             |  448 +-
 osfmk/i386/trap.h                             |   11 +-
 osfmk/i386/trap_native.c                      |  295 +
 osfmk/i386/tsc.c                              |    1 -
 osfmk/i386/tsc.h                              |    4 +-
 osfmk/i386/ucode.c                            |  201 +
 osfmk/i386/ucode.h                            |   30 +
 osfmk/i386/user_ldt.c                         |    4 +-
 osfmk/i386/vmx/vmx_asm.h                      |    4 +-
 osfmk/i386/vmx/vmx_cpu.c                      |   13 +-
 osfmk/ipc/ipc_entry.c                         |   11 +-
 osfmk/ipc/ipc_entry.h                         |    2 +
 osfmk/ipc/ipc_init.c                          |   21 +-
 osfmk/ipc/ipc_kmsg.c                          |  378 +-
 osfmk/ipc/ipc_kmsg.h                          |   21 +-
 osfmk/ipc/ipc_labelh.c                        |    4 +
 osfmk/ipc/ipc_mqueue.c                        |   37 +-
 osfmk/ipc/ipc_mqueue.h                        |    2 +-
 osfmk/ipc/ipc_notify.c                        |   18 +
 osfmk/ipc/ipc_notify.h                        |    5 +
 osfmk/ipc/ipc_object.c                        |   46 +-
 osfmk/ipc/ipc_object.h                        |    5 +
 osfmk/ipc/ipc_port.c                          |  316 +-
 osfmk/ipc/ipc_port.h                          |   63 +-
 osfmk/ipc/ipc_pset.c                          |    4 +-
 osfmk/ipc/ipc_right.c                         |  357 +-
 osfmk/ipc/ipc_right.h                         |   15 +-
 osfmk/ipc/ipc_space.c                         |    5 +
 osfmk/ipc/ipc_table.c                         |   14 +-
 osfmk/ipc/ipc_table.h                         |    6 +-
 osfmk/ipc/ipc_types.h                         |    1 +
 osfmk/ipc/mach_debug.c                        |   32 +-
 osfmk/ipc/mach_msg.c                          |   32 +-
 osfmk/ipc/mach_port.c                         |   87 +-
 osfmk/kdp/kdp.c                               |  156 +-
 osfmk/kdp/kdp_core.h                          |   15 +-
 osfmk/kdp/kdp_dyld.h                          |    2 +-
 osfmk/kdp/kdp_en_debugger.h                   |    1 +
 osfmk/kdp/kdp_private.h                       |    1 +
 osfmk/kdp/kdp_udp.c                           |  498 +-
 osfmk/kdp/ml/i386/kdp_vm.c                    |  102 +-
 osfmk/kdp/ml/i386/kdp_x86_common.c            |   10 +-
 osfmk/kdp/ml/ppc/kdp_asm.s                    |   95 -
 osfmk/kdp/ml/ppc/kdp_machdep.c                |  827 --
 osfmk/kdp/ml/ppc/kdp_misc.s                   |   71 -
 osfmk/kdp/ml/ppc/kdp_vm.c                     |  570 --
 osfmk/kdp/ml/x86_64/kdp_machdep.c             |    5 +
 osfmk/kdp/ml/x86_64/kdp_vm.c                  |   37 +-
 osfmk/kern/Makefile                           |    1 +
 osfmk/kern/ast.c                              |   11 +-
 osfmk/kern/audit_sessionport.c                |  139 +-
 osfmk/kern/audit_sessionport.h                |    8 +-
 osfmk/kern/bsd_kern.c                         |   54 +-
 osfmk/kern/call_entry.h                       |  121 +-
 osfmk/kern/clock.c                            |  104 +-
 osfmk/kern/clock_oldops.c                     |    2 +-
 osfmk/kern/debug.c                            |  103 +-
 osfmk/kern/debug.h                            |   43 +-
 osfmk/kern/etimer.h                           |   13 +-
 osfmk/kern/exception.c                        |    1 -
 osfmk/kern/extmod_statistics.c                |  136 +
 .../PPCcalls.c => kern/extmod_statistics.h}   |   33 +-
 osfmk/kern/hibernate.c                        |   11 +
 osfmk/kern/host.c                             |  121 +-
 osfmk/kern/host.h                             |    4 +-
 osfmk/kern/host_notify.c                      |   12 +-
 osfmk/kern/host_statistics.h                  |    7 -
 osfmk/kern/ipc_kobject.c                      |    2 -
 osfmk/kern/ipc_mig.c                          |   46 +-
 osfmk/kern/ipc_misc.c                         |   99 +-
 osfmk/kern/ipc_misc.h                         |    6 +-
 osfmk/kern/kalloc.c                           |  142 +-
 osfmk/kern/kalloc.h                           |    6 +-
 osfmk/kern/kern_types.h                       |    9 +
 osfmk/kern/kext_alloc.c                       |    7 +-
 osfmk/kern/kmod.c                             |    8 +-
 osfmk/kern/locks.c                            |   34 +-
 osfmk/kern/locks.h                            |   11 +-
 osfmk/kern/mach_param.h                       |    2 +-
 osfmk/kern/machine.c                          |   23 +-
 osfmk/kern/misc_protos.h                      |    5 +-
 osfmk/kern/mk_sp.c                            |   26 +-
 osfmk/kern/pms.h                              |   11 -
 osfmk/kern/printf.c                           |   16 +-
 osfmk/kern/priority.c                         |  181 +-
 osfmk/kern/processor.c                        |   41 +-
 osfmk/kern/processor.h                        |   32 +-
 osfmk/kern/processor_data.h                   |   42 +
 osfmk/kern/queue.c                            |   65 +-
 osfmk/kern/queue.h                            |   52 +-
 osfmk/kern/sched.h                            |  126 +-
 osfmk/kern/sched_average.c                    |   38 +-
 osfmk/kern/sched_fixedpriority.c              |  727 ++
 osfmk/kern/sched_grrr.c                       |  956 ++
 osfmk/kern/sched_prim.c                       | 1623 ++-
 osfmk/kern/sched_prim.h                       |  368 +-
 osfmk/kern/sched_proto.c                      |  597 ++
 osfmk/kern/stack.c                            |  111 +-
 osfmk/kern/startup.c                          |  113 +-
 osfmk/kern/startup.h                          |    1 +
 osfmk/kern/sync_lock.c                        |   10 +-
 osfmk/kern/sync_sema.c                        |    4 +-
 osfmk/kern/syscall_subr.c                     |   44 +-
 osfmk/kern/syscall_sw.c                       |    4 +
 osfmk/kern/syscall_sw.h                       |   29 +-
 osfmk/kern/task.c                             |  281 +-
 osfmk/kern/task.h                             |  211 +-
 osfmk/kern/task_policy.c                      | 1154 ++-
 osfmk/kern/thread.c                           |  195 +-
 osfmk/kern/thread.h                           |   93 +-
 osfmk/kern/thread_act.c                       |   78 +-
 osfmk/kern/thread_call.c                      |  187 +-
 osfmk/kern/thread_policy.c                    |  178 +-
 osfmk/kern/timer_call.c                       |  552 +-
 osfmk/kern/timer_call.h                       |   31 +-
 osfmk/kern/timer_queue.h                      |   26 +-
 osfmk/kern/wait_queue.c                       |  105 +-
 osfmk/kern/wait_queue.h                       |   19 +-
 osfmk/kern/zalloc.c                           | 1332 ++-
 osfmk/kern/zalloc.h                           |   76 +-
 osfmk/kextd/Makefile                          |    4 -
 osfmk/libsa/machine/types.h                   |    4 +-
 osfmk/libsa/ppc/types.h                       |   71 -
 osfmk/libsa/types.h                           |    1 -
 osfmk/lockd/Makefile                          |    4 -
 osfmk/mach/Makefile                           |   38 +-
 osfmk/mach/branch_predicates.h                |   35 +
 osfmk/mach/clock_types.h                      |    1 +
 osfmk/mach/host_info.h                        |   45 +
 osfmk/mach/i386/_structs.h                    |   28 +-
 osfmk/mach/i386/_types.h                      |  221 -
 osfmk/mach/i386/sdt_isa.h                     |    8 +-
 osfmk/mach/i386/thread_status.h               |  123 +-
 osfmk/mach/i386/vm_param.h                    |   25 +-
 osfmk/mach/mach_host.defs                     |   14 +
 osfmk/mach/mach_port.defs                     |   13 +-
 osfmk/mach/mach_traps.h                       |   51 +-
 osfmk/mach/mach_types.defs                    |    5 +-
 osfmk/mach/mach_types.h                       |    1 +
 osfmk/mach/mach_vm.defs                       |    4 +
 osfmk/mach/machine.h                          |    2 +
 osfmk/mach/machine/asm.h                      |    4 +-
 osfmk/mach/machine/boolean.h                  |    4 +-
 osfmk/mach/machine/exception.h                |    4 +-
 osfmk/mach/machine/kern_return.h              |    4 +-
 osfmk/mach/machine/machine_types.defs         |    4 +-
 osfmk/mach/machine/ndr_def.h                  |    4 +-
 osfmk/mach/machine/processor_info.h           |    4 +-
 osfmk/mach/machine/rpc.h                      |    4 +-
 osfmk/mach/machine/sdt.h                      |   64 +
 osfmk/mach/machine/sdt_isa.h                  |    4 +-
 osfmk/mach/machine/syscall_sw.h               |    4 +-
 osfmk/mach/machine/thread_state.h             |    4 +-
 osfmk/mach/machine/thread_status.h            |    4 +-
 osfmk/mach/machine/vm_param.h                 |    4 +-
 osfmk/mach/machine/vm_types.h                 |    4 +-
 osfmk/mach/memory_object.defs                 |    4 +
 osfmk/mach/memory_object_types.h              |   17 +-
 osfmk/mach/message.h                          |    4 +-
 osfmk/mach/notify.defs                        |   12 +-
 osfmk/mach/notify.h                           |   11 +-
 osfmk/mach/port.h                             |    4 +-
 osfmk/mach/ppc/Makefile                       |   35 -
 osfmk/mach/ppc/_structs.h                     |  392 -
 osfmk/mach/ppc/_types.h                       |  234 -
 osfmk/mach/ppc/boolean.h                      |   74 -
 osfmk/mach/ppc/exception.h                    |  119 -
 osfmk/mach/ppc/kern_return.h                  |   74 -
 osfmk/mach/ppc/machine_types.defs             |  126 -
 osfmk/mach/ppc/ndr_def.h                      |   43 -
 osfmk/mach/ppc/processor_info.h               |  176 -
 osfmk/mach/ppc/sdt_isa.h                      |  427 -
 osfmk/mach/ppc/syscall_sw.h                   |   79 -
 osfmk/mach/ppc/thread_status.h                |  150 -
 osfmk/mach/ppc/vm_param.h                     |  110 -
 osfmk/mach/ppc/vm_types.h                     |  157 -
 osfmk/mach/processor.defs                     |    2 +-
 osfmk/mach/security.defs                      |    2 +-
 osfmk/mach/shared_region.h                    |   18 +-
 osfmk/mach/syscall_sw.h                       |    4 +-
 osfmk/mach/task.defs                          |   28 +-
 osfmk/mach/task_info.h                        |   30 +-
 osfmk/mach/task_policy.h                      |    1 +
 osfmk/mach/thread_act.defs                    |   14 +-
 osfmk/mach/thread_policy.h                    |   16 +
 osfmk/mach/vm_prot.h                          |    9 +-
 osfmk/mach/vm_region.h                        |    2 +
 osfmk/mach/vm_statistics.h                    |   64 +-
 osfmk/mach_debug/mach_debug_types.defs        |   17 +-
 osfmk/mach_debug/zone_info.h                  |   47 +-
 osfmk/machine/Makefile                        |    2 +
 osfmk/machine/asm.h                           |    4 +-
 osfmk/machine/ast.h                           |    4 +-
 osfmk/machine/ast_types.h                     |    4 +-
 osfmk/machine/commpage.h                      |    4 +-
 osfmk/machine/cpu_affinity.h                  |    4 +-
 osfmk/machine/cpu_capabilities.h              |    8 +-
 osfmk/machine/cpu_data.h                      |    4 +-
 osfmk/machine/cpu_number.h                    |    4 +-
 osfmk/machine/db_machdep.h                    |    4 +-
 osfmk/machine/endian.h                        |    4 +-
 osfmk/machine/io_map_entries.h                |    4 +-
 osfmk/machine/lock.h                          |    4 +-
 osfmk/machine/locks.h                         |    4 +-
 osfmk/machine/machine_cpu.h                   |    4 +-
 osfmk/machine/machine_routines.h              |    4 +-
 osfmk/machine/machine_rpc.h                   |    4 +-
 osfmk/machine/machlimits.h                    |    4 +-
 osfmk/machine/machparam.h                     |    4 +-
 .../machine/pal_hibernate.h                   |   18 +-
 .../machine/pal_routines.h                    |   11 +-
 osfmk/machine/pmap.h                          |    4 +-
 osfmk/machine/sched_param.h                   |    4 +-
 osfmk/machine/setjmp.h                        |    4 +-
 osfmk/machine/simple_lock.h                   |    4 +-
 osfmk/machine/task.h                          |    4 +-
 osfmk/machine/thread.h                        |    4 +-
 osfmk/machine/timer.h                         |    4 +-
 osfmk/machine/trap.h                          |    4 +-
 osfmk/machine/vm_tuning.h                     |    4 +-
 osfmk/machine/xpr.h                           |    4 +-
 osfmk/pmc/pmc.c                               |   33 +-
 osfmk/pmc/pmc.h                               |   10 +
 osfmk/ppc/AltiAssist.s                        |   91 -
 osfmk/ppc/Diagnostics.c                       |  571 --
 osfmk/ppc/Diagnostics.h                       |  124 -
 osfmk/ppc/Emulate.s                           | 1445 ---
 osfmk/ppc/Emulate64.s                         |  957 --
 osfmk/ppc/Firmware.h                          |  166 -
 osfmk/ppc/Firmware.s                          | 2517 -----
 osfmk/ppc/FirmwareC.c                         |  338 -
 osfmk/ppc/FirmwareCalls.h                     |   81 -
 osfmk/ppc/Makefile                            |   36 -
 osfmk/ppc/PPCcalls.h                          |   84 -
 osfmk/ppc/Performance.s                       |  124 -
 osfmk/ppc/PseudoKernel.c                      |  450 -
 osfmk/ppc/PseudoKernel.h                      |   99 -
 osfmk/ppc/_setjmp.s                           |  194 -
 osfmk/ppc/aligned_data.s                      |  209 -
 osfmk/ppc/asm.h                               |  781 --
 osfmk/ppc/ast.h                               |   43 -
 osfmk/ppc/ast_types.h                         |   41 -
 osfmk/ppc/atomic_switch.h                     |  130 -
 osfmk/ppc/atomic_switch.s                     |  238 -
 osfmk/ppc/bat_init.c                          |  301 -
 osfmk/ppc/bcopy.s                             |  981 --
 osfmk/ppc/bcopytest.c                         |  621 --
 osfmk/ppc/bits.s                              |  111 -
 osfmk/ppc/boot.h                              |   28 -
 osfmk/ppc/bzero.s                             |  331 -
 osfmk/ppc/cache.s                             |  389 -
 osfmk/ppc/commpage/atomic.s                   |  280 -
 osfmk/ppc/commpage/bcopy_64.s                 |  306 -
 osfmk/ppc/commpage/bcopy_970.s                |  626 --
 osfmk/ppc/commpage/bcopy_g3.s                 |  275 -
 osfmk/ppc/commpage/bcopy_g4.s                 |  622 --
 osfmk/ppc/commpage/bigcopy_970.s              |  331 -
 osfmk/ppc/commpage/bzero_128.s                |  173 -
 osfmk/ppc/commpage/bzero_32.s                 |  129 -
 osfmk/ppc/commpage/cacheflush.s               |  110 -
 osfmk/ppc/commpage/commpage.c                 |  679 --
 osfmk/ppc/commpage/commpage.h                 |   92 -
 osfmk/ppc/commpage/commpage_asm.s             |  272 -
 osfmk/ppc/commpage/gettimeofday.s             |  255 -
 osfmk/ppc/commpage/mach_absolute_time.s       |   80 -
 osfmk/ppc/commpage/memset_64.s                |   96 -
 osfmk/ppc/commpage/memset_g3.s                |  132 -
 osfmk/ppc/commpage/memset_g4.s                |  131 -
 osfmk/ppc/commpage/memset_g5.s                |  168 -
 osfmk/ppc/commpage/pthread.s                  |  121 -
 osfmk/ppc/commpage/spinlocks.s                |  247 -
 osfmk/ppc/conf.c                              |   87 -
 osfmk/ppc/console_feed.c                      |  266 -
 osfmk/ppc/console_feed_entries.h              |   48 -
 osfmk/ppc/cpu.c                               | 1184 ---
 osfmk/ppc/cpu_capabilities.h                  |  254 -
 osfmk/ppc/cpu_data.h                          |   63 -
 osfmk/ppc/cpu_internal.h                      |   89 -
 osfmk/ppc/cswtch.s                            | 2486 -----
 osfmk/ppc/db_asm.s                            |  107 -
 osfmk/ppc/db_disasm.c                         |  232 -
 osfmk/ppc/db_interface.c                      |  592 --
 osfmk/ppc/db_low_trace.c                      | 1106 ---
 osfmk/ppc/db_low_trace.h                      |   62 -
 osfmk/ppc/db_machdep.h                        |  186 -
 osfmk/ppc/db_trace.c                          | 1122 ---
 osfmk/ppc/endian.h                            |   93 -
 osfmk/ppc/etimer.c                            |  195 -
 osfmk/ppc/exception.h                         |  693 --
 osfmk/ppc/fpu_protos.h                        |   41 -
 osfmk/ppc/genassym.c                          | 1438 ---
 osfmk/ppc/hexfont.h                           |  301 -
 osfmk/ppc/hibernate_ppc.c                     |  213 -
 osfmk/ppc/hibernate_restore.s                 |  192 -
 osfmk/ppc/hw_exception.s                      | 1832 ----
 osfmk/ppc/hw_lock.s                           | 2187 ----
 osfmk/ppc/hw_lock_types.h                     |   74 -
 osfmk/ppc/hw_perfmon.c                        |  959 --
 osfmk/ppc/hw_perfmon.h                        |  122 -
 osfmk/ppc/hw_perfmon_mmcr.h                   |  186 -
 osfmk/ppc/hw_vm.s                             | 8794 -----------------
 osfmk/ppc/instrumentation.h                   |   61 -
 osfmk/ppc/interrupt.c                         |  187 -
 osfmk/ppc/io_map.c                            |  131 -
 osfmk/ppc/io_map_entries.h                    |   45 -
 osfmk/ppc/lock.h                              |   86 -
 osfmk/ppc/locks.h                             |  220 -
 osfmk/ppc/locks_ppc.c                         | 2360 -----
 osfmk/ppc/low_trace.h                         |   92 -
 osfmk/ppc/lowglobals.h                        |  102 -
 osfmk/ppc/lowmem_vectors.s                    | 4010 --------
 osfmk/ppc/machine_routines.c                  |  847 --
 osfmk/ppc/machine_routines.h                  |  338 -
 osfmk/ppc/machine_routines_asm.s              | 2345 -----
 osfmk/ppc/machine_task.c                      |   85 -
 osfmk/ppc/machlimits.h                        |   92 -
 osfmk/ppc/machparam.h                         |   86 -
 osfmk/ppc/mappings.c                          | 1805 ----
 osfmk/ppc/mappings.h                          |  499 -
 osfmk/ppc/mcount.s                            |   81 -
 osfmk/ppc/mem.h                               |   68 -
 osfmk/ppc/misc.c                              |  120 -
 osfmk/ppc/misc_asm.s                          |  287 -
 osfmk/ppc/misc_protos.h                       |  138 -
 osfmk/ppc/model_dep.c                         | 1045 --
 osfmk/ppc/movc.s                              | 1303 ---
 osfmk/ppc/new_screen.h                        |   48 -
 osfmk/ppc/pcb.c                               |  672 --
 osfmk/ppc/pmap.c                              | 2121 ----
 osfmk/ppc/pmap.h                              |  338 -
 osfmk/ppc/pms.c                               |  743 --
 osfmk/ppc/pmsCPU.c                            |  313 -
 osfmk/ppc/ppc_disasm.i                        |  234 -
 osfmk/ppc/ppc_init.c                          |  302 -
 osfmk/ppc/ppc_vm_init.c                       |  427 -
 osfmk/ppc/proc_reg.h                          |  403 -
 osfmk/ppc/rtclock.c                           |  306 -
 osfmk/ppc/savearea.c                          |  327 -
 osfmk/ppc/savearea.h                          |  393 -
 osfmk/ppc/savearea_asm.s                      | 1621 ---
 osfmk/ppc/scc_8530.h                          |  428 -
 osfmk/ppc/sched_param.h                       |   70 -
 osfmk/ppc/screen_switch.h                     |  141 -
 osfmk/ppc/serial_defs.h                       |   83 -
 osfmk/ppc/serial_io.c                         |  659 --
 osfmk/ppc/serial_io.h                         |  150 -
 osfmk/ppc/setjmp.h                            |   57 -
 osfmk/ppc/simple_lock.h                       |  178 -
 osfmk/ppc/skiplists.s                         | 1297 ---
 osfmk/ppc/spec_reg.h                          |   47 -
 osfmk/ppc/start.s                             | 1283 ---
 osfmk/ppc/status.c                            | 1820 ----
 osfmk/ppc/task.h                              |   63 -
 osfmk/ppc/thread.h                            |  212 -
 osfmk/ppc/trap.c                              | 1012 --
 osfmk/ppc/trap.h                              |  105 -
 osfmk/ppc/vm_tuning.h                         |   35 -
 osfmk/ppc/vmachmon.c                          | 2024 ----
 osfmk/ppc/vmachmon.h                          |  498 -
 osfmk/ppc/vmachmon_asm.s                      | 2368 -----
 osfmk/profiling/Makefile                      |    6 -
 osfmk/profiling/machine/profile-md.h          |    4 +-
 osfmk/profiling/ppc/profile-md.h              |  144 -
 osfmk/vm/bsd_vm.c                             |   58 +-
 osfmk/vm/default_freezer.c                    |  616 ++
 osfmk/vm/default_freezer.h                    |  160 +
 osfmk/vm/device_vm.c                          |    3 +-
 osfmk/vm/memory_object.c                      |  613 +-
 osfmk/vm/memory_object.h                      |   11 +
 osfmk/vm/pmap.h                               |   24 +-
 osfmk/vm/vm_apple_protect.c                   |   12 +-
 osfmk/vm/vm_debug.c                           |    8 +-
 osfmk/vm/vm_fault.c                           |  404 +-
 osfmk/vm/vm_fault.h                           |    3 +-
 osfmk/vm/vm_init.c                            |    3 +
 osfmk/vm/vm_kern.c                            |   52 +-
 osfmk/vm/vm_map.c                             | 1433 ++-
 osfmk/vm/vm_map.h                             |   68 +-
 osfmk/vm/vm_map_store.c                       |  176 +
 osfmk/vm/vm_map_store.h                       |  135 +
 osfmk/vm/vm_map_store_ll.c                    |  246 +
 .../machdep.c => osfmk/vm/vm_map_store_ll.h   |   52 +-
 osfmk/vm/vm_map_store_rb.c                    |  166 +
 osfmk/vm/vm_map_store_rb.h                    |   46 +
 osfmk/vm/vm_object.c                          | 1134 ++-
 osfmk/vm/vm_object.h                          |   90 +-
 osfmk/vm/vm_page.h                            |  142 +-
 osfmk/vm/vm_pageout.c                         | 1700 ++--
 osfmk/vm/vm_pageout.h                         |   61 +-
 osfmk/vm/vm_protos.h                          |   48 +-
 osfmk/vm/vm_purgeable_internal.h              |    6 -
 osfmk/vm/vm_resident.c                        |  635 +-
 osfmk/vm/vm_shared_region.c                   |  452 +-
 osfmk/vm/vm_shared_region.h                   |   54 +-
 osfmk/vm/vm_swapfile_pager.c                  |    3 +-
 osfmk/vm/vm_user.c                            |  148 +-
 osfmk/x86_64/bzero.s                          |    2 +-
 osfmk/x86_64/copyio.c                         |  351 +
 osfmk/x86_64/cswitch.s                        |   13 +-
 osfmk/x86_64/idt64.s                          |  487 +-
 osfmk/x86_64/idt_table.h                      |   93 +-
 osfmk/x86_64/locore.s                         |   26 +-
 osfmk/x86_64/loose_ends.c                     |  369 +-
 osfmk/x86_64/machine_routines_asm.s           |   41 +-
 osfmk/x86_64/pal_routines_asm.s               |  194 +
 osfmk/x86_64/pmap.c                           |  948 +-
 osfmk/x86_64/pmap_pcid.c                      |  310 +
 osfmk/x86_64/start.s                          |   74 +-
 pexpert/Makefile                              |   20 +-
 pexpert/conf/MASTER                           |    1 -
 pexpert/conf/MASTER.i386                      |    1 -
 pexpert/conf/MASTER.ppc                       |   18 -
 pexpert/conf/MASTER.x86_64                    |    1 -
 pexpert/conf/Makefile                         |   19 +-
 pexpert/conf/Makefile.ppc                     |    8 -
 pexpert/conf/Makefile.template                |   10 +-
 pexpert/conf/files.ppc                        |    7 -
 pexpert/conf/tools/Makefile                   |   32 -
 pexpert/conf/tools/doconf/Makefile            |   47 -
 pexpert/conf/tools/doconf/doconf.csh          |  321 -
 pexpert/gen/bootargs.c                        |  107 +-
 pexpert/i386/pe_init.c                        |    4 +-
 pexpert/i386/pe_kprintf.c                     |   15 +-
 pexpert/i386/pe_serial.c                      |    1 -
 pexpert/pexpert/Makefile                      |   11 -
 pexpert/pexpert/i386/boot.h                   |   31 +-
 pexpert/pexpert/i386/efi.h                    |   24 +-
 pexpert/pexpert/machine/boot.h                |    4 +-
 pexpert/pexpert/machine/protos.h              |    4 +-
 pexpert/pexpert/pexpert.h                     |   11 +
 pexpert/pexpert/ppc/Makefile                  |   27 -
 pexpert/pexpert/ppc/boot.h                    |   92 -
 pexpert/pexpert/ppc/interrupts.h              |   36 -
 pexpert/pexpert/ppc/powermac.h                |   60 -
 pexpert/pexpert/ppc/protos.h                  |  160 -
 pexpert/pexpert/protos.h                      |    4 -
 pexpert/ppc/pe_clock_speed.c                  |  183 -
 pexpert/ppc/pe_clock_speed_asm.s              |  116 -
 pexpert/ppc/pe_identify_machine.c             |  194 -
 pexpert/ppc/pe_init.c                         |  269 -
 pexpert/ppc/pe_kprintf.c                      |  154 -
 security/Makefile                             |    7 +-
 security/conf/MASTER                          |    2 +-
 security/conf/MASTER.i386                     |    2 +-
 security/conf/MASTER.ppc                      |   31 -
 security/conf/MASTER.x86_64                   |    2 +-
 security/conf/Makefile                        |   19 +-
 security/conf/Makefile.i386                   |   11 -
 security/conf/Makefile.ppc                    |   18 -
 security/conf/Makefile.template               |   11 +-
 security/conf/Makefile.x86_64                 |   11 -
 security/conf/files                           |    1 +
 security/conf/files.i386                      |    1 -
 security/conf/files.ppc                       |    1 -
 security/conf/tools/Makefile                  |   32 -
 security/conf/tools/doconf/Makefile           |   49 -
 security/conf/tools/doconf/doconf.csh         |  321 -
 security/conf/tools/newvers/Makefile          |   47 -
 security/mac.h                                |   18 +
 security/mac_alloc.h                          |    1 +
 security/mac_audit.c                          |   11 -
 security/mac_base.c                           |  147 +-
 security/mac_framework.h                      |   27 +-
 security/mac_internal.h                       |   38 +
 security/mac_iokit.c                          |   27 +
 security/mac_label.c                          |    1 +
 security/mac_net.c                            |    4 +-
 security/mac_policy.h                         |  253 +-
 security/mac_posix_shm.c                      |    2 +-
 security/mac_priv.c                           |  106 +
 security/mac_process.c                        |   48 +-
 security/mac_stub.c                           |   20 +
 security/mac_system.c                         |   13 +
 security/mac_vfs.c                            |   48 +-
 tools/lockstat/Makefile                       |    2 +-
 tools/lockstat/lockstat.c                     |    6 +-
 tools/symbolify.py                            |   82 +
 tools/tests/MPMMTest/KQMPMMtest.c             |   23 +
 tools/tests/MPMMTest/Makefile                 |    2 +-
 tools/tests/affinity/Makefile                 |    4 +-
 tools/tests/execperf/Makefile                 |   79 +
 tools/tests/execperf/exit-asm.S               |   42 +
 tools/tests/execperf/exit.c                   |   12 +
 tools/tests/execperf/printexecinfo.c          |   68 +
 tools/tests/execperf/run.c                    |   89 +
 tools/tests/execperf/test.sh                  |   30 +
 tools/tests/jitter/Makefile                   |   16 +
 tools/tests/jitter/cpu_number.s               |   33 +
 tools/tests/jitter/timer_jitter.c             |  480 +
 tools/tests/kqueue_tests/Makefile             |    8 +-
 ..._readwrite_tests.c => kqueue_file_tests.c} |  380 +-
 tools/tests/libMicro/AppleReadMe              |  107 +-
 tools/tests/libMicro/Makefile                 |   31 +-
 tools/tests/libMicro/Makefile.Darwin          |   20 +-
 tools/tests/libMicro/Makefile.com.Darwin      |    0
 tools/tests/libMicro/README                   |   11 +
 tools/tests/libMicro/apple/Makefile.Darwin    |   27 +-
 .../tests/libMicro/apple/Makefile.benchmarks  |   17 +-
 .../tests/libMicro/apple/Makefile.com.Darwin  |    3 +
 tools/tests/libMicro/apple/getaddrinfo_host.c |  244 +
 tools/tests/libMicro/apple/getaddrinfo_port.c |  157 +
 tools/tests/libMicro/apple/getgrent.c         |  163 +
 tools/tests/libMicro/apple/getgrgid.c         |  228 +
 tools/tests/libMicro/apple/getgrnam.c         |  231 +
 tools/tests/libMicro/apple/getpwent.c         |  163 +
 tools/tests/libMicro/apple/getpwnam.c         |  262 +
 tools/tests/libMicro/apple/getpwuid.c         |  256 +
 tools/tests/libMicro/apple/lmbench_bw_mem.c   |   29 +-
 .../libMicro/apple/mbr_check_membership.c     |  254 +
 .../apple/mbr_check_service_membership.c      |  281 +
 .../apple/od_query_create_with_node.c         |  381 +
 tools/tests/libMicro/bench.sh                 |   56 +-
 tools/tests/libMicro/benchDS.sh               |  324 +
 tools/tests/libMicro/coreos_bench.sh          |  837 ++
 tools/tests/libMicro/exp.c                    |   19 +
 tools/tests/libMicro/libmicro.h               |    2 +
 tools/tests/libMicro/log.c                    |   19 +
 tools/tests/libMicro/longjmp.c                |    7 +-
 tools/tests/libMicro/od_account_create.sh     |  129 +
 tools/tests/libMicro/od_account_delete.sh     |   98 +
 tools/tests/libMicro/siglongjmp.c             |    4 +
 tools/tests/superpages/testsp.c               |  210 +-
 .../testkext.xcodeproj/project.pbxproj        |  105 +
 .../tests/testkext/testthreadcall-Info.plist  |   47 +
 tools/tests/testkext/testthreadcall.cpp       |   65 +
 tools/tests/testkext/testthreadcall.h         |   18 +
 tools/tests/testkext/testvmx.cpp              |    3 -
 tools/tests/testkext/testvmx.h                |    3 -
 .../tests/xnu_quick_test/32bit_inode_tests.c  |    1 -
 tools/tests/xnu_quick_test/README             |   13 +-
 .../xnu_quick_test/atomic_fifo_queue_test.c   |   33 +
 tools/tests/xnu_quick_test/commpage_tests.c   |  361 +
 tools/tests/xnu_quick_test/helpers/arch.c     |    6 -
 .../tests/xnu_quick_test/helpers/data_exec.c  |   19 +-
 tools/tests/xnu_quick_test/helpers/launch.c   |   37 -
 tools/tests/xnu_quick_test/kqueue_tests.c     |   76 +-
 tools/tests/xnu_quick_test/machvm_tests.c     |  146 +-
 tools/tests/xnu_quick_test/main.c             |   64 +-
 tools/tests/xnu_quick_test/makefile           |   32 +-
 tools/tests/xnu_quick_test/memory_tests.c     |  157 +-
 tools/tests/xnu_quick_test/misc.c             |   11 +-
 tools/tests/xnu_quick_test/sched_tests.c      |  231 +
 tools/tests/xnu_quick_test/socket_tests.c     |  123 +-
 tools/tests/xnu_quick_test/tests.c            |  254 +-
 tools/tests/xnu_quick_test/tests.h            |   18 +-
 tools/tests/xnu_quick_test/xattr_tests.c      |   70 +-
 tools/tests/zero-to-n/Makefile                |    5 +
 tools/tests/zero-to-n/zero-to-n.c             |  579 ++
 1834 files changed, 222690 insertions(+), 195265 deletions(-)
 create mode 100644 EXTERNAL_HEADERS/Availability.h
 create mode 100644 EXTERNAL_HEADERS/AvailabilityInternal.h
 create mode 100644 EXTERNAL_HEADERS/AvailabilityMacros.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/Makefile
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/asm_help.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/basic_regs.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/fp_regs.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/macro_help.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/pseudo_inst.h
 delete mode 100644 EXTERNAL_HEADERS/architecture/ppc/reg_help.h
 delete mode 100644 EXTERNAL_HEADERS/mach-o/arm/reloc.h
 delete mode 100644 EXTERNAL_HEADERS/mach-o/ppc/reloc.h
 rename {osfmk/profiling/ppc => SETUP}/Makefile (66%)
 create mode 100644 SETUP/config/Makefile
 create mode 100644 SETUP/config/config.h
 rename bsd/conf/tools/doconf/doconf.csh => SETUP/config/doconf (94%)
 create mode 100644 SETUP/config/externs.c
 create mode 100644 SETUP/config/lexer.l
 create mode 100644 SETUP/config/main.c
 create mode 100644 SETUP/config/mkglue.c
 create mode 100644 SETUP/config/mkheaders.c
 create mode 100644 SETUP/config/mkioconf.c
 create mode 100644 SETUP/config/mkmakefile.c
 create mode 100644 SETUP/config/mkswapconf.c
 create mode 100644 SETUP/config/openp.c
 create mode 100644 SETUP/config/parser.y
 create mode 100644 SETUP/config/searchp.c
 create mode 100644 SETUP/kextsymboltool/Makefile
 create mode 100644 SETUP/kextsymboltool/kextsymboltool.c
 rename security/conf/tools/newvers/newvers.csh => SETUP/newvers (100%)
 mode change 100644 => 100755
 delete mode 100755 SETUP/seed_objroot
 create mode 100644 SETUP/setsegname/Makefile
 create mode 100644 SETUP/setsegname/setsegname.c
 delete mode 100644 bsd/conf/MASTER.ppc
 delete mode 100644 bsd/conf/Makefile.ppc
 delete mode 100644 bsd/conf/files.ppc
 delete mode 100644 bsd/conf/tools/Makefile
 delete mode 100644 bsd/conf/tools/doconf/Makefile
 create mode 100644 bsd/crypto/aes/Assert.c
 mode change 100644 => 100755 bsd/crypto/aes/aes.h
 create mode 100644 bsd/crypto/aes/i386/AES.s
 create mode 100644 bsd/crypto/aes/i386/Context.h
 create mode 100644 bsd/crypto/aes/i386/Data.mk
 create mode 100644 bsd/crypto/aes/i386/Data.s
 create mode 100644 bsd/crypto/aes/i386/EncryptDecrypt.s
 create mode 100644 bsd/crypto/aes/i386/ExpandKeyForDecryption.s
 create mode 100644 bsd/crypto/aes/i386/ExpandKeyForEncryption.s
 create mode 100644 bsd/crypto/aes/i386/MakeData.c
 create mode 100644 bsd/crypto/aes/i386/ReadMe.txt
 create mode 100644 bsd/crypto/aes/i386/aes_crypt_hw.s
 create mode 100644 bsd/crypto/aes/i386/aes_key_hw.s
 delete mode 100644 bsd/crypto/aes/i386/aes_modes.c
 create mode 100644 bsd/crypto/aes/i386/aes_modes_asm.s
 create mode 100644 bsd/crypto/aes/i386/aes_modes_hw.s
 delete mode 100644 bsd/crypto/aes/i386/aes_x86_v2.s
 delete mode 100644 bsd/crypto/aes/i386/aesopt.h
 create mode 100644 bsd/crypto/aes/i386/aesxts.c
 create mode 100644 bsd/crypto/aes/i386/aesxts.h
 create mode 100644 bsd/crypto/aes/i386/aesxts_asm.s
 delete mode 100644 bsd/crypto/aes/i386/edefs.h
 delete mode 100644 bsd/crypto/aes/ppc/Makefile
 delete mode 100644 bsd/crypto/aes/ppc/aescrypt.c
 delete mode 100644 bsd/crypto/aes/ppc/aeskey.c
 delete mode 100644 bsd/crypto/aes/ppc/aesopt.h
 delete mode 100644 bsd/crypto/aes/ppc/aestab.c
 delete mode 100644 bsd/crypto/aes/ppc/aestab.h
 create mode 100644 bsd/crypto/aes/test/ReadMe.txt
 create mode 100755 bsd/crypto/aes/test/makegenx86.sh
 create mode 100755 bsd/crypto/aes/test/makeoptx86.sh
 create mode 100644 bsd/crypto/aes/test/tstaes.c
 create mode 100644 bsd/crypto/doc/KernelCrypto.plist
 create mode 100644 bsd/crypto/doc/KernelCrypto.txt
 create mode 100644 bsd/crypto/sha2/intel/sha256.s
 create mode 100644 bsd/crypto/sha2/intel/sha256nossse3.s
 delete mode 100644 bsd/dev/ppc/conf.c
 delete mode 100644 bsd/dev/ppc/cons.c
 delete mode 100644 bsd/dev/ppc/dtrace_isa.c
 delete mode 100644 bsd/dev/ppc/dtrace_subr_ppc.c
 delete mode 100644 bsd/dev/ppc/fasttrap_isa.c
 delete mode 100644 bsd/dev/ppc/fbt_ppc.c
 delete mode 100644 bsd/dev/ppc/ffs.c
 delete mode 100644 bsd/dev/ppc/ffs.s
 delete mode 100644 bsd/dev/ppc/kern_machdep.c
 delete mode 100644 bsd/dev/ppc/km.c
 delete mode 100644 bsd/dev/ppc/mem.c
 delete mode 100644 bsd/dev/ppc/munge.s
 delete mode 100644 bsd/dev/ppc/ppc_init.c
 delete mode 100644 bsd/dev/ppc/sdt_ppc.c
 delete mode 100644 bsd/dev/ppc/stubs.c
 delete mode 100644 bsd/dev/ppc/systemcalls.c
 delete mode 100644 bsd/dev/ppc/unix_signal.c
 delete mode 100644 bsd/dev/ppc/xsumas.s
 create mode 100644 bsd/hfs/hfs_cprotect.c
 create mode 100644 bsd/hfs/hfs_kdebug.h
 create mode 100644 bsd/hfs/hfscommon/Misc/HybridAllocator.c
 create mode 100644 bsd/hfs/hfscommon/headers/HybridAllocator.h
 create mode 100644 bsd/hfs/hfscommon/headers/RedBlackTree.h
 create mode 100644 bsd/kern/Makefile
 create mode 100644 bsd/kern/kern_priv.c
 create mode 100644 bsd/kern/policy_check.c
 create mode 100644 bsd/kern/process_policy.c
 create mode 100644 bsd/kern/trace.codes
 create mode 100644 bsd/kern/vm_pressure.c
 rename bsd/{ppc/reg.h => kern/vm_pressure.h} (80%)
 create mode 100644 bsd/man/man2/getdtablesize.2
 create mode 100644 bsd/man/man2/sem_close.2
 create mode 100644 bsd/man/man2/sem_open.2
 create mode 100644 bsd/man/man2/sem_post.2
 create mode 100644 bsd/man/man2/sem_unlink.2
 create mode 100644 bsd/man/man2/sem_wait.2
 create mode 100644 bsd/man/man2/setregid.2
 create mode 100644 bsd/man/man2/setreuid.2
 create mode 100644 bsd/man/man2/shm_open.2
 create mode 100644 bsd/man/man2/shm_unlink.2
 create mode 100644 bsd/man/man2/undelete.2
 delete mode 100644 bsd/man/man5/fs.5
 delete mode 100644 bsd/man/man5/inode.5
 delete mode 100644 bsd/miscfs/nullfs/null.h
 delete mode 100644 bsd/miscfs/nullfs/null_subr.c
 delete mode 100644 bsd/miscfs/nullfs/null_vfsops.c
 delete mode 100644 bsd/miscfs/nullfs/null_vnops.c
 delete mode 100644 bsd/miscfs/union/union_subr.c
 delete mode 100644 bsd/miscfs/union/union_vfsops.c
 delete mode 100644 bsd/miscfs/union/union_vnops.c
 create mode 100644 bsd/net/bridgestp.c
 create mode 100644 bsd/net/bridgestp.h
 delete mode 100644 bsd/net/if_atm.h
 create mode 100644 bsd/net/if_bridge.c
 create mode 100644 bsd/net/if_bridgevar.h
 delete mode 100644 bsd/net/if_disc.c
 delete mode 100644 bsd/net/if_dummy.c
 delete mode 100644 bsd/net/if_ethersubr.c
 delete mode 100644 bsd/net/if_fddisubr.c
 create mode 100644 bsd/net/if_llreach.c
 create mode 100644 bsd/net/if_llreach.h
 create mode 100644 bsd/net/netsrc.c
 rename EXTERNAL_HEADERS/architecture/ppc/cframe.h => bsd/net/netsrc.h (58%)
 create mode 100644 bsd/net/ntstat.c
 create mode 100644 bsd/net/ntstat.h
 delete mode 100644 bsd/net/rtsock_mip.c
 delete mode 100644 bsd/netinet/if_atm.c
 delete mode 100644 bsd/netinet/if_atm.h
 delete mode 100644 bsd/netinet/if_fddi.h
 create mode 100644 bsd/netinet/in_mcast.c
 create mode 100644 bsd/netinet/in_pcblist.c
 create mode 100644 bsd/netinet/in_tclass.c
 delete mode 100644 bsd/netinet/ip_flow.c
 delete mode 100644 bsd/netinet/ip_flow.h
 create mode 100644 bsd/netinet/tcp_cc.h
 create mode 100644 bsd/netinet/tcp_ledbat.c
 create mode 100644 bsd/netinet/tcp_newreno.c
 create mode 100644 bsd/netinet6/in6_mcast.c
 create mode 100644 bsd/netinet6/ip6_id.c
 create mode 100644 bsd/netinet6/mld6.h
 delete mode 100644 bsd/ppc/Makefile
 delete mode 100644 bsd/ppc/_limits.h
 delete mode 100644 bsd/ppc/_param.h
 delete mode 100644 bsd/ppc/_structs.h
 delete mode 100644 bsd/ppc/_types.h
 delete mode 100644 bsd/ppc/decodePPC.h
 delete mode 100644 bsd/ppc/endian.h
 delete mode 100644 bsd/ppc/exec.h
 delete mode 100644 bsd/ppc/fasttrap_isa.h
 delete mode 100644 bsd/ppc/limits.h
 delete mode 100644 bsd/ppc/param.h
 delete mode 100644 bsd/ppc/profile.h
 delete mode 100644 bsd/ppc/reboot.h
 delete mode 100644 bsd/ppc/setjmp.h
 delete mode 100644 bsd/ppc/signal.h
 delete mode 100644 bsd/ppc/types.h
 delete mode 100644 bsd/ppc/ucontext.h
 delete mode 100644 bsd/ppc/vmparam.h
 rename osfmk/ppc/Performance.h => bsd/sys/content_protection.h (71%)
 rename osfmk/ppc/machine_cpu.h => bsd/sys/fileport.h (68%)
 rename bsd/{ppc/ptrace.h => sys/imgsrc.h} (74%)
 create mode 100755 bsd/sys/make_posix_availability.sh
 create mode 100755 bsd/sys/make_symbol_aliasing.sh
 rename bsd/{dev/ppc/memmove.c => sys/netboot.h} (72%)
 create mode 100644 bsd/sys/priv.h
 create mode 100644 bsd/sys/process_policy.h
 delete mode 100644 config/BSDKernel.ppc.exports
 create mode 100644 config/Dummy.exports
 delete mode 100644 config/IOKit.ppc.exports
 delete mode 100644 config/Libkern.ppc.exports
 delete mode 100644 config/MACFramework.ppc.exports
 delete mode 100644 config/Mach.ppc.exports
 delete mode 100644 config/Private.ppc.exports
 delete mode 100644 config/System6.0.ppc.exports
 delete mode 100644 config/Unsupported.ppc.exports
 rename osfmk/ppc/cpu_affinity.h => iokit/IOKit/AppleKeyStoreInterface.h (62%)
 create mode 100644 iokit/IOKit/IOStatistics.h
 create mode 100644 iokit/IOKit/IOStatisticsPrivate.h
 delete mode 100644 iokit/IOKit/i386/IOSharedLockImp.h
 delete mode 100644 iokit/IOKit/ppc/IODBDMA.h
 delete mode 100644 iokit/IOKit/ppc/IOSharedLockImp.h
 delete mode 100644 iokit/IOKit/ppc/Makefile
 delete mode 100644 iokit/IOKit/pwr_mgt/IOPMDeprecated.h
 create mode 100644 iokit/Kernel/IOStatistics.cpp
 create mode 100644 iokit/Kernel/i386/IOKeyStoreHelper.cpp
 delete mode 100644 iokit/Kernel/ppc/IOAsmSupport.s
 delete mode 100644 iokit/Kernel/ppc/IODBDMA.cpp
 delete mode 100644 iokit/conf/MASTER.ppc
 delete mode 100644 iokit/conf/Makefile.ppc
 delete mode 100644 iokit/conf/files.ppc
 delete mode 100644 iokit/conf/tools/Makefile
 delete mode 100644 iokit/conf/tools/doconf/Makefile
 delete mode 100755 iokit/conf/tools/doconf/doconf.csh
 delete mode 100644 libkern/c++/OSObjectAsm.s
 mode change 100644 => 100755 libkern/c++/Tests/TestSerialization/test1/test1_main.cpp
 delete mode 100644 libkern/conf/MASTER.ppc
 delete mode 100644 libkern/conf/Makefile.ppc
 delete mode 100644 libkern/conf/files.ppc
 delete mode 100644 libkern/conf/tools/Makefile
 delete mode 100644 libkern/conf/tools/doconf/Makefile
 delete mode 100755 libkern/conf/tools/doconf/doconf.csh
 create mode 100644 libkern/crypto/intel/sha1edp.h
 create mode 100644 libkern/crypto/intel/sha1edp.s
 rename {iokit/Kernel => libkern/kxld}/WKdmCompress.c (97%)
 rename {iokit/Kernel => libkern/kxld}/WKdmDecompress.c (100%)
 create mode 100644 libkern/kxld/i386/WKdmCompress.s
 create mode 100644 libkern/kxld/i386/WKdmDecompress.s
 create mode 100644 libkern/kxld/kxld_object.c
 create mode 100644 libkern/kxld/kxld_object.h
 delete mode 100644 libkern/kxld/kxld_state.c
 delete mode 100644 libkern/kxld/kxld_state.h
 create mode 100644 libkern/kxld/tests/kxld_array_test.c
 rename bsd/ppc/psl.h => libkern/kxld/tests/kxld_test.c (76%)
 rename osfmk/ppc/machine_rpc.h => libkern/kxld/tests/kxld_test.h (86%)
 delete mode 100644 libkern/libkern/OSAtomic.h.save
 rename {iokit/Kernel => libkern/libkern}/WKdm.h (97%)
 delete mode 100644 libkern/libkern/ppc/Makefile
 delete mode 100644 libkern/libkern/ppc/OSByteOrder.h
 create mode 100644 libkern/libkern/tree.h
 delete mode 100644 libkern/ppc/OSAtomic.s
 delete mode 100644 libkern/ppc/bcmp.s
 delete mode 100644 libkern/ppc/memcmp.s
 delete mode 100644 libkern/ppc/strlen.s
 delete mode 100644 libkern/zlib/arm/adler32vec.s
 delete mode 100644 libkern/zlib/arm/inffastS.s
 create mode 100644 libkern/zlib/intel/adler32vec.s
 create mode 100644 libkern/zlib/intel/inffastS.s
 delete mode 100644 libsa/conf/MASTER.ppc
 delete mode 100644 libsa/conf/Makefile.ppc
 delete mode 100644 libsa/conf/files.ppc
 delete mode 100644 libsa/conf/tools/Makefile
 delete mode 100644 libsa/conf/tools/doconf/Makefile
 delete mode 100755 libsa/conf/tools/doconf/doconf.csh
 delete mode 100644 libsyscall/BSDmakefile
 delete mode 100644 libsyscall/GNUmakefile
 create mode 100644 libsyscall/Libsyscall.xcconfig
 create mode 100644 libsyscall/Libsyscall.xcodeproj/project.pbxproj
 delete mode 100644 libsyscall/Makefile
 delete mode 100644 libsyscall/Makefile.inc
 delete mode 100644 libsyscall/Makefile.xbs
 create mode 100644 libsyscall/Platforms/MacOSX/i386/syscall.map
 create mode 100644 libsyscall/Platforms/MacOSX/x86_64/syscall.map
 create mode 100644 libsyscall/Platforms/syscall.map
 delete mode 100755 libsyscall/create-syscalls.pl
 rename osfmk/chud/ppc/chud_xnu_glue.h => libsyscall/custom/errno.c (95%)
 delete mode 100644 libsyscall/include/Makefile.inc
 delete mode 100644 libsyscall/include/processor_facilities.h
 delete mode 100644 libsyscall/mach/Makefile.inc
 rename osfmk/ppc/hardclock_entries.h => libsyscall/mach/abort.h (83%)
 delete mode 100644 libsyscall/mach/bootstrap_ports.c
 delete mode 100644 libsyscall/mach/brk.2
 rename iokit/Kernel/ppc/IOSharedLock.s => libsyscall/mach/dylib_link.c (90%)
 create mode 100644 libsyscall/mach/exc_catcher.h
 delete mode 100644 libsyscall/mach/headers/Makefile.inc
 delete mode 100644 libsyscall/mach/i386/Makefile.inc
 rename libsyscall/mach/{headers => mach}/errorlib.h (94%)
 rename libsyscall/mach/{headers => mach}/mach.h (100%)
 rename libsyscall/mach/{headers => mach}/mach_error.h (100%)
 rename libsyscall/mach/{headers => mach}/mach_init.h (95%)
 rename libsyscall/mach/{headers => mach}/mach_interface.h (100%)
 rename libsyscall/mach/{headers => mach}/port_obj.h (100%)
 rename libsyscall/mach/{headers => mach}/sync.h (100%)
 rename libsyscall/mach/{headers => mach}/task.h (93%)
 rename libsyscall/mach/{headers => mach}/thread_act.h (92%)
 rename libsyscall/mach/{headers => mach}/vm_task.h (100%)
 delete mode 100644 libsyscall/mach/mach_init_libSystem.c
 delete mode 100644 libsyscall/mach/mach_init_ports.c
 rename pexpert/ppc/pe_bootargs.c => libsyscall/mach/mach_legacy.c (80%)
 create mode 100644 libsyscall/mach/mig_reply_port.c
 rename osfmk/mach/ppc/rpc.h => libsyscall/mach/mig_reply_port.h (85%)
 delete mode 100644 libsyscall/mach/ppc/Makefile.inc
 delete mode 100644 libsyscall/mach/ppc64/Makefile.inc
 delete mode 100644 libsyscall/mach/sbrk.c
 delete mode 100644 libsyscall/mach/servers/Makefile.inc
 create mode 100644 libsyscall/mach/string.c
 rename osfmk/mach/ppc/thread_state.h => libsyscall/mach/string.h (59%)
 delete mode 100644 libsyscall/mach/x86_64/Makefile.inc
 create mode 100644 libsyscall/wrappers/__get_cpu_capabilities.s
 rename osfmk/x86_64/genassym.c => libsyscall/wrappers/_errno.h (88%)
 rename osfmk/ppc/testjump.c => libsyscall/wrappers/_libc_funcptr.c (55%)
 rename bsd/hfs/cprotect.c => libsyscall/wrappers/_libkernel_init.c (67%)
 rename bsd/ppc/disklabel.h => libsyscall/wrappers/_libkernel_init.h (65%)
 create mode 100644 libsyscall/wrappers/cancelable/fcntl-base.c
 rename libsyscall/{mach/x86_64/mach_absolute_time.S => wrappers/cancelable/fcntl-cancel.c} (81%)
 create mode 100644 libsyscall/wrappers/cancelable/fcntl.c
 create mode 100644 libsyscall/wrappers/cancelable/select-cancel.c
 create mode 100644 libsyscall/wrappers/cancelable/select.c
 create mode 100644 libsyscall/wrappers/cancelable/sigsuspend-cancel.c
 rename libsyscall/{mach/i386/mach_absolute_time.S => wrappers/cancelable/sigsuspend.c} (81%)
 rename bsd/dev/ppc/sysctl.c => libsyscall/wrappers/init_cpu_capabilities.c (61%)
 create mode 100644 libsyscall/wrappers/ioctl.c
 create mode 100644 libsyscall/wrappers/kill.c
 create mode 100644 libsyscall/wrappers/legacy/accept.c
 create mode 100644 libsyscall/wrappers/legacy/bind.c
 create mode 100644 libsyscall/wrappers/legacy/connect.c
 create mode 100644 libsyscall/wrappers/legacy/getattrlist.c
 create mode 100644 libsyscall/wrappers/legacy/getpeername.c
 create mode 100644 libsyscall/wrappers/legacy/getsockname.c
 create mode 100644 libsyscall/wrappers/legacy/kill.c
 create mode 100644 libsyscall/wrappers/legacy/lchown.c
 rename osfmk/ppc/xpr.h => libsyscall/wrappers/legacy/listen.c (50%)
 create mode 100644 libsyscall/wrappers/legacy/mprotect.c
 create mode 100644 libsyscall/wrappers/legacy/msync.c
 create mode 100644 libsyscall/wrappers/legacy/munmap.c
 create mode 100644 libsyscall/wrappers/legacy/open.c
 create mode 100644 libsyscall/wrappers/legacy/recvfrom.c
 create mode 100644 libsyscall/wrappers/legacy/recvmsg.c
 create mode 100644 libsyscall/wrappers/legacy/select-pre1050.c
 create mode 100644 libsyscall/wrappers/legacy/select.c
 create mode 100644 libsyscall/wrappers/legacy/sendmsg.c
 create mode 100644 libsyscall/wrappers/legacy/sendto.c
 create mode 100644 libsyscall/wrappers/legacy/setattrlist.c
 create mode 100644 libsyscall/wrappers/legacy/sigsuspend.c
 create mode 100644 libsyscall/wrappers/legacy/socketpair.c
 create mode 100644 libsyscall/wrappers/memcpy.c
 create mode 100644 libsyscall/wrappers/remove-counter.c
 create mode 100644 libsyscall/wrappers/rename.c
 create mode 100644 libsyscall/wrappers/rmdir.c
 create mode 100644 libsyscall/wrappers/select-base.c
 create mode 100644 libsyscall/wrappers/sigsuspend-base.c
 create mode 100644 libsyscall/wrappers/unix03/chmod.c
 create mode 100644 libsyscall/wrappers/unix03/fchmod.c
 create mode 100644 libsyscall/wrappers/unix03/getrlimit.c
 create mode 100644 libsyscall/wrappers/unix03/mmap.c
 create mode 100644 libsyscall/wrappers/unix03/setrlimit.c
 create mode 100644 libsyscall/wrappers/unlink.c
 create mode 100755 libsyscall/xcodescripts/compat-symlinks.sh
 create mode 100755 libsyscall/xcodescripts/compile-syscalls.pl
 create mode 100755 libsyscall/xcodescripts/create-syscalls.pl
 create mode 100755 libsyscall/xcodescripts/mach_install_mig.sh
 delete mode 100644 osfmk/chud/ppc/chud_cpu_asm.h
 delete mode 100644 osfmk/chud/ppc/chud_cpu_asm.s
 delete mode 100644 osfmk/chud/ppc/chud_cpu_ppc.c
 delete mode 100644 osfmk/chud/ppc/chud_osfmk_callback_ppc.c
 delete mode 100644 osfmk/chud/ppc/chud_spr.h
 delete mode 100644 osfmk/chud/ppc/chud_thread_ppc.c
 delete mode 100644 osfmk/chud/ppc/chud_xnu_private.h
 delete mode 100644 osfmk/conf/MASTER.ppc
 delete mode 100644 osfmk/conf/Makefile.ppc
 delete mode 100644 osfmk/conf/files.ppc
 delete mode 100644 osfmk/conf/tools/Makefile
 delete mode 100644 osfmk/conf/tools/doconf/Makefile
 delete mode 100755 osfmk/conf/tools/doconf/doconf.csh
 delete mode 100644 osfmk/console/ppc/serial_console.c
 delete mode 100644 osfmk/console/ppc/video_scroll.s
 create mode 100644 osfmk/i386/bsd_i386_native.c
 delete mode 100644 osfmk/i386/commpage/atomic.s
 delete mode 100644 osfmk/i386/commpage/bcopy_scalar.s
 delete mode 100644 osfmk/i386/commpage/bcopy_sse2.s
 delete mode 100644 osfmk/i386/commpage/bcopy_sse3x.s
 delete mode 100644 osfmk/i386/commpage/bcopy_sse3x_64.s
 delete mode 100644 osfmk/i386/commpage/bcopy_sse42.s
 delete mode 100644 osfmk/i386/commpage/bcopy_sse42_64.s
 delete mode 100644 osfmk/i386/commpage/bzero_scalar.s
 delete mode 100644 osfmk/i386/commpage/bzero_sse2.s
 delete mode 100644 osfmk/i386/commpage/bzero_sse2_64.s
 delete mode 100644 osfmk/i386/commpage/bzero_sse42.s
 delete mode 100644 osfmk/i386/commpage/bzero_sse42_64.s
 delete mode 100644 osfmk/i386/commpage/cacheflush.s
 delete mode 100644 osfmk/i386/commpage/commpage_gettimeofday.s
 delete mode 100644 osfmk/i386/commpage/commpage_mach_absolute_time.s
 delete mode 100644 osfmk/i386/commpage/commpage_sigs.c
 delete mode 100644 osfmk/i386/commpage/cpu_number.s
 delete mode 100644 osfmk/i386/commpage/longcopy_sse3x.s
 delete mode 100644 osfmk/i386/commpage/longcopy_sse3x_64.s
 delete mode 100644 osfmk/i386/commpage/memset_pattern_sse2.s
 delete mode 100644 osfmk/i386/commpage/memset_pattern_sse2_64.s
 delete mode 100644 osfmk/i386/commpage/spinlocks.s
 create mode 100644 osfmk/i386/copyio.c
 delete mode 100644 osfmk/i386/ipl.h
 create mode 100644 osfmk/i386/lapic_native.c
 create mode 100644 osfmk/i386/mp_native.c
 create mode 100644 osfmk/i386/pal_hibernate.h
 rename osfmk/{ppc/cpu_number.h => i386/pal_lock_asm.h} (82%)
 create mode 100644 osfmk/i386/pal_native.h
 create mode 100644 osfmk/i386/pal_routines.c
 create mode 100644 osfmk/i386/pal_routines.h
 create mode 100644 osfmk/i386/pal_routines_asm.s
 rename osfmk/{ppc/mp.h => i386/pal_rtclock_asm.h} (86%)
 create mode 100644 osfmk/i386/pcb_native.c
 create mode 100644 osfmk/i386/pmap_common.c
 create mode 100644 osfmk/i386/pmap_pcid.h
 create mode 100644 osfmk/i386/rtclock_asm.h
 rename osfmk/i386/{rtclock.h => rtclock_asm_native.h} (67%)
 create mode 100644 osfmk/i386/rtclock_native.c
 rename osfmk/{ppc/rtclock.h => i386/rtclock_protos.h} (64%)
 create mode 100644 osfmk/i386/trap_native.c
 create mode 100644 osfmk/i386/ucode.c
 create mode 100644 osfmk/i386/ucode.h
 delete mode 100644 osfmk/kdp/ml/ppc/kdp_asm.s
 delete mode 100644 osfmk/kdp/ml/ppc/kdp_machdep.c
 delete mode 100644 osfmk/kdp/ml/ppc/kdp_misc.s
 delete mode 100644 osfmk/kdp/ml/ppc/kdp_vm.c
 create mode 100644 osfmk/kern/extmod_statistics.c
 rename osfmk/{ppc/PPCcalls.c => kern/extmod_statistics.h} (71%)
 create mode 100644 osfmk/kern/sched_fixedpriority.c
 create mode 100644 osfmk/kern/sched_grrr.c
 create mode 100644 osfmk/kern/sched_proto.c
 delete mode 100644 osfmk/libsa/ppc/types.h
 create mode 100644 osfmk/mach/branch_predicates.h
 delete mode 100644 osfmk/mach/i386/_types.h
 delete mode 100644 osfmk/mach/ppc/Makefile
 delete mode 100644 osfmk/mach/ppc/_structs.h
 delete mode 100644 osfmk/mach/ppc/_types.h
 delete mode 100644 osfmk/mach/ppc/boolean.h
 delete mode 100644 osfmk/mach/ppc/exception.h
 delete mode 100644 osfmk/mach/ppc/kern_return.h
 delete mode 100644 osfmk/mach/ppc/machine_types.defs
 delete mode 100644 osfmk/mach/ppc/ndr_def.h
 delete mode 100644 osfmk/mach/ppc/processor_info.h
 delete mode 100644 osfmk/mach/ppc/sdt_isa.h
 delete mode 100644 osfmk/mach/ppc/syscall_sw.h
 delete mode 100644 osfmk/mach/ppc/thread_status.h
 delete mode 100644 osfmk/mach/ppc/vm_param.h
 delete mode 100644 osfmk/mach/ppc/vm_types.h
 rename libsyscall/mach/ppc/mach_absolute_time.s => osfmk/machine/pal_hibernate.h (81%)
 rename iokit/IOKit/machine/IOSharedLockImp.h => osfmk/machine/pal_routines.h (85%)
 delete mode 100644 osfmk/ppc/AltiAssist.s
 delete mode 100644 osfmk/ppc/Diagnostics.c
 delete mode 100644 osfmk/ppc/Diagnostics.h
 delete mode 100644 osfmk/ppc/Emulate.s
 delete mode 100644 osfmk/ppc/Emulate64.s
 delete mode 100644 osfmk/ppc/Firmware.h
 delete mode 100644 osfmk/ppc/Firmware.s
 delete mode 100644 osfmk/ppc/FirmwareC.c
 delete mode 100644 osfmk/ppc/FirmwareCalls.h
 delete mode 100644 osfmk/ppc/Makefile
 delete mode 100644 osfmk/ppc/PPCcalls.h
 delete mode 100644 osfmk/ppc/Performance.s
 delete mode 100644 osfmk/ppc/PseudoKernel.c
 delete mode 100644 osfmk/ppc/PseudoKernel.h
 delete mode 100644 osfmk/ppc/_setjmp.s
 delete mode 100644 osfmk/ppc/aligned_data.s
 delete mode 100644 osfmk/ppc/asm.h
 delete mode 100644 osfmk/ppc/ast.h
 delete mode 100644 osfmk/ppc/ast_types.h
 delete mode 100644 osfmk/ppc/atomic_switch.h
 delete mode 100644 osfmk/ppc/atomic_switch.s
 delete mode 100644 osfmk/ppc/bat_init.c
 delete mode 100644 osfmk/ppc/bcopy.s
 delete mode 100644 osfmk/ppc/bcopytest.c
 delete mode 100644 osfmk/ppc/bits.s
 delete mode 100644 osfmk/ppc/boot.h
 delete mode 100644 osfmk/ppc/bzero.s
 delete mode 100644 osfmk/ppc/cache.s
 delete mode 100644 osfmk/ppc/commpage/atomic.s
 delete mode 100644 osfmk/ppc/commpage/bcopy_64.s
 delete mode 100644 osfmk/ppc/commpage/bcopy_970.s
 delete mode 100644 osfmk/ppc/commpage/bcopy_g3.s
 delete mode 100644 osfmk/ppc/commpage/bcopy_g4.s
 delete mode 100644 osfmk/ppc/commpage/bigcopy_970.s
 delete mode 100644 osfmk/ppc/commpage/bzero_128.s
 delete mode 100644 osfmk/ppc/commpage/bzero_32.s
 delete mode 100644 osfmk/ppc/commpage/cacheflush.s
 delete mode 100644 osfmk/ppc/commpage/commpage.c
 delete mode 100644 osfmk/ppc/commpage/commpage.h
 delete mode 100644 osfmk/ppc/commpage/commpage_asm.s
 delete mode 100644 osfmk/ppc/commpage/gettimeofday.s
 delete mode 100644 osfmk/ppc/commpage/mach_absolute_time.s
 delete mode 100644 osfmk/ppc/commpage/memset_64.s
 delete mode 100644 osfmk/ppc/commpage/memset_g3.s
 delete mode 100644 osfmk/ppc/commpage/memset_g4.s
 delete mode 100644 osfmk/ppc/commpage/memset_g5.s
 delete mode 100644 osfmk/ppc/commpage/pthread.s
 delete mode 100644 osfmk/ppc/commpage/spinlocks.s
 delete mode 100644 osfmk/ppc/conf.c
 delete mode 100644 osfmk/ppc/console_feed.c
 delete mode 100644 osfmk/ppc/console_feed_entries.h
 delete mode 100644 osfmk/ppc/cpu.c
 delete mode 100644 osfmk/ppc/cpu_capabilities.h
 delete mode 100644 osfmk/ppc/cpu_data.h
 delete mode 100644 osfmk/ppc/cpu_internal.h
 delete mode 100644 osfmk/ppc/cswtch.s
 delete mode 100644 osfmk/ppc/db_asm.s
 delete mode 100644 osfmk/ppc/db_disasm.c
 delete mode 100644 osfmk/ppc/db_interface.c
 delete mode 100644 osfmk/ppc/db_low_trace.c
 delete mode 100644 osfmk/ppc/db_low_trace.h
 delete mode 100644 osfmk/ppc/db_machdep.h
 delete mode 100644 osfmk/ppc/db_trace.c
 delete mode 100644 osfmk/ppc/endian.h
 delete mode 100644 osfmk/ppc/etimer.c
 delete mode 100644 osfmk/ppc/exception.h
 delete mode 100644 osfmk/ppc/fpu_protos.h
 delete mode 100644 osfmk/ppc/genassym.c
 delete mode 100644 osfmk/ppc/hexfont.h
 delete mode 100644 osfmk/ppc/hibernate_ppc.c
 delete mode 100644 osfmk/ppc/hibernate_restore.s
 delete mode 100644 osfmk/ppc/hw_exception.s
 delete mode 100644 osfmk/ppc/hw_lock.s
 delete mode 100644 osfmk/ppc/hw_lock_types.h
 delete mode 100644 osfmk/ppc/hw_perfmon.c
 delete mode 100644 osfmk/ppc/hw_perfmon.h
 delete mode 100644 osfmk/ppc/hw_perfmon_mmcr.h
 delete mode 100644 osfmk/ppc/hw_vm.s
 delete mode 100644 osfmk/ppc/instrumentation.h
 delete mode 100644 osfmk/ppc/interrupt.c
 delete mode 100644 osfmk/ppc/io_map.c
 delete mode 100644 osfmk/ppc/io_map_entries.h
 delete mode 100644 osfmk/ppc/lock.h
 delete mode 100644 osfmk/ppc/locks.h
 delete mode 100644 osfmk/ppc/locks_ppc.c
 delete mode 100644 osfmk/ppc/low_trace.h
 delete mode 100644 osfmk/ppc/lowglobals.h
 delete mode 100644 osfmk/ppc/lowmem_vectors.s
 delete mode 100644 osfmk/ppc/machine_routines.c
 delete mode 100644 osfmk/ppc/machine_routines.h
 delete mode 100644 osfmk/ppc/machine_routines_asm.s
 delete mode 100644 osfmk/ppc/machine_task.c
 delete mode 100644 osfmk/ppc/machlimits.h
 delete mode 100644 osfmk/ppc/machparam.h
 delete mode 100644 osfmk/ppc/mappings.c
 delete mode 100644 osfmk/ppc/mappings.h
 delete mode 100644 osfmk/ppc/mcount.s
 delete mode 100644 osfmk/ppc/mem.h
 delete mode 100644 osfmk/ppc/misc.c
 delete mode 100644 osfmk/ppc/misc_asm.s
 delete mode 100644 osfmk/ppc/misc_protos.h
 delete mode 100644 osfmk/ppc/model_dep.c
 delete mode 100644 osfmk/ppc/movc.s
 delete mode 100644 osfmk/ppc/new_screen.h
 delete mode 100644 osfmk/ppc/pcb.c
 delete mode 100644 osfmk/ppc/pmap.c
 delete mode 100644 osfmk/ppc/pmap.h
 delete mode 100644 osfmk/ppc/pms.c
 delete mode 100644 osfmk/ppc/pmsCPU.c
 delete mode 100644 osfmk/ppc/ppc_disasm.i
 delete mode 100644 osfmk/ppc/ppc_init.c
 delete mode 100644 osfmk/ppc/ppc_vm_init.c
 delete mode 100644 osfmk/ppc/proc_reg.h
 delete mode 100644 osfmk/ppc/rtclock.c
 delete mode 100644 osfmk/ppc/savearea.c
 delete mode 100644 osfmk/ppc/savearea.h
 delete mode 100644 osfmk/ppc/savearea_asm.s
 delete mode 100644 osfmk/ppc/scc_8530.h
 delete mode 100644 osfmk/ppc/sched_param.h
 delete mode 100644 osfmk/ppc/screen_switch.h
 delete mode 100644 osfmk/ppc/serial_defs.h
 delete mode 100644 osfmk/ppc/serial_io.c
 delete mode 100644 osfmk/ppc/serial_io.h
 delete mode 100644 osfmk/ppc/setjmp.h
 delete mode 100644 osfmk/ppc/simple_lock.h
 delete mode 100644 osfmk/ppc/skiplists.s
 delete mode 100644 osfmk/ppc/spec_reg.h
 delete mode 100644 osfmk/ppc/start.s
 delete mode 100644 osfmk/ppc/status.c
 delete mode 100644 osfmk/ppc/task.h
 delete mode 100644 osfmk/ppc/thread.h
 delete mode 100644 osfmk/ppc/trap.c
 delete mode 100644 osfmk/ppc/trap.h
 delete mode 100644 osfmk/ppc/vm_tuning.h
 delete mode 100644 osfmk/ppc/vmachmon.c
 delete mode 100644 osfmk/ppc/vmachmon.h
 delete mode 100644 osfmk/ppc/vmachmon_asm.s
 delete mode 100644 osfmk/profiling/ppc/profile-md.h
 create mode 100644 osfmk/vm/default_freezer.c
 create mode 100644 osfmk/vm/default_freezer.h
 create mode 100644 osfmk/vm/vm_map_store.c
 create mode 100644 osfmk/vm/vm_map_store.h
 create mode 100644 osfmk/vm/vm_map_store_ll.c
 rename bsd/dev/ppc/machdep.c => osfmk/vm/vm_map_store_ll.h (61%)
 create mode 100644 osfmk/vm/vm_map_store_rb.c
 create mode 100644 osfmk/vm/vm_map_store_rb.h
 create mode 100644 osfmk/x86_64/copyio.c
 create mode 100644 osfmk/x86_64/pal_routines_asm.s
 create mode 100644 osfmk/x86_64/pmap_pcid.c
 delete mode 100644 pexpert/conf/MASTER.ppc
 delete mode 100644 pexpert/conf/Makefile.ppc
 delete mode 100644 pexpert/conf/files.ppc
 delete mode 100644 pexpert/conf/tools/Makefile
 delete mode 100644 pexpert/conf/tools/doconf/Makefile
 delete mode 100755 pexpert/conf/tools/doconf/doconf.csh
 delete mode 100644 pexpert/pexpert/ppc/Makefile
 delete mode 100644 pexpert/pexpert/ppc/boot.h
 delete mode 100644 pexpert/pexpert/ppc/interrupts.h
 delete mode 100644 pexpert/pexpert/ppc/powermac.h
 delete mode 100644 pexpert/pexpert/ppc/protos.h
 delete mode 100644 pexpert/ppc/pe_clock_speed.c
 delete mode 100644 pexpert/ppc/pe_clock_speed_asm.s
 delete mode 100644 pexpert/ppc/pe_identify_machine.c
 delete mode 100644 pexpert/ppc/pe_init.c
 delete mode 100644 pexpert/ppc/pe_kprintf.c
 delete mode 100644 security/conf/MASTER.ppc
 delete mode 100644 security/conf/Makefile.ppc
 delete mode 100644 security/conf/files.ppc
 delete mode 100644 security/conf/tools/Makefile
 delete mode 100644 security/conf/tools/doconf/Makefile
 delete mode 100644 security/conf/tools/doconf/doconf.csh
 delete mode 100644 security/conf/tools/newvers/Makefile
 create mode 100644 security/mac_priv.c
 create mode 100755 tools/symbolify.py
 create mode 100644 tools/tests/execperf/Makefile
 create mode 100644 tools/tests/execperf/exit-asm.S
 create mode 100644 tools/tests/execperf/exit.c
 create mode 100644 tools/tests/execperf/printexecinfo.c
 create mode 100644 tools/tests/execperf/run.c
 create mode 100755 tools/tests/execperf/test.sh
 create mode 100644 tools/tests/jitter/Makefile
 create mode 100644 tools/tests/jitter/cpu_number.s
 create mode 100644 tools/tests/jitter/timer_jitter.c
 mode change 100644 => 100755 tools/tests/kqueue_tests/Makefile
 rename tools/tests/kqueue_tests/{kqueue_readwrite_tests.c => kqueue_file_tests.c} (98%)
 mode change 100644 => 100755 tools/tests/libMicro/Makefile.com.Darwin
 create mode 100644 tools/tests/libMicro/apple/getaddrinfo_host.c
 create mode 100644 tools/tests/libMicro/apple/getaddrinfo_port.c
 create mode 100644 tools/tests/libMicro/apple/getgrent.c
 create mode 100644 tools/tests/libMicro/apple/getgrgid.c
 create mode 100644 tools/tests/libMicro/apple/getgrnam.c
 create mode 100644 tools/tests/libMicro/apple/getpwent.c
 create mode 100644 tools/tests/libMicro/apple/getpwnam.c
 create mode 100644 tools/tests/libMicro/apple/getpwuid.c
 create mode 100644 tools/tests/libMicro/apple/mbr_check_membership.c
 create mode 100644 tools/tests/libMicro/apple/mbr_check_service_membership.c
 create mode 100644 tools/tests/libMicro/apple/od_query_create_with_node.c
 create mode 100644 tools/tests/libMicro/benchDS.sh
 create mode 100644 tools/tests/libMicro/coreos_bench.sh
 create mode 100644 tools/tests/libMicro/od_account_create.sh
 create mode 100644 tools/tests/libMicro/od_account_delete.sh
 create mode 100644 tools/tests/testkext/testthreadcall-Info.plist
 create mode 100644 tools/tests/testkext/testthreadcall.cpp
 create mode 100644 tools/tests/testkext/testthreadcall.h
 create mode 100644 tools/tests/xnu_quick_test/atomic_fifo_queue_test.c
 create mode 100644 tools/tests/xnu_quick_test/commpage_tests.c
 create mode 100644 tools/tests/xnu_quick_test/sched_tests.c
 create mode 100644 tools/tests/zero-to-n/Makefile
 create mode 100644 tools/tests/zero-to-n/zero-to-n.c

diff --git a/EXTERNAL_HEADERS/Availability.h b/EXTERNAL_HEADERS/Availability.h
new file mode 100644
index 000000000..e811335c1
--- /dev/null
+++ b/EXTERNAL_HEADERS/Availability.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2007-2010 by Apple Inc.. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+ 
+#ifndef __AVAILABILITY__
+#define __AVAILABILITY__
+ /*     
+    These macros are for use in OS header files. They enable function prototypes
+    and Objective-C methods to be tagged with the OS version in which they
+    were first available; and, if applicable, the OS version in which they 
+    became deprecated.  
+     
+    The desktop Mac OS X and the iPhone OS X each have different version numbers.
+    The __OSX_AVAILABLE_STARTING() macro allows you to specify both the desktop
+    and phone OS version numbers.  For instance:
+        __OSX_AVAILABLE_STARTING(__MAC_10_2,__IPHONE_2_0)
+    means the function/method was first available on Mac OS X 10.2 on the desktop
+    and first available in OS X 2.0 on the iPhone.
+    
+    If a function is available on one platform, but not the other a _NA (not
+    applicable) parameter is used.  For instance:
+            __OSX_AVAILABLE_STARTING(__MAC_10_3,__IPHONE_NA)
+    means that the function/method was first available on Mac OS X 10.3, and it
+    currently not implemented on the iPhone.
+
+    At some point, a function/method may be deprecated.  That means Apple
+    recommends applications stop using the function, either because there is a 
+    better replacement or the functionality is being phased out.  Deprecated
+    functions/methods can be tagged with a __OSX_AVAILABLE_BUT_DEPRECATED()
+    macro which specifies the OS version where the function became available
+    as well as the OS version in which it became deprecated.  For instance:
+        __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0,__MAC_10_5,__IPHONE_NA,__IPHONE_NA)
+    means that the function/method was introduced in Mac OS X 10.0, then
+    became deprecated beginning in Mac OS X 10.5.  On the iPhone the function 
+    has never been available.  
+    
+    For these macros to function properly, a program must specify the OS version range 
+    it is targeting.  The min OS version is specified as an option to the compiler:
+    -mmacosx-version-min=10.x when building for Mac OS X, and -miphone-version-min=1.x.x
+    when building for the iPhone.  The upper bound for the OS version is rarely needed,
+    but it can be set on the command line via: -D__MAC_OS_X_VERSION_MAX_ALLOWED=10xx for
+    Mac OS X and __IPHONE_OS_VERSION_MAX_ALLOWED = 1xxx for iPhone.  
+    
+    Examples:
+
+        A function available in Mac OS X 10.5 and later, but not on the phone:
+        
+            extern void mymacfunc() __OSX_AVAILABLE_STARTING(__MAC_10_5,__IPHONE_NA);
+
+
+        An Objective-C method in Mac OS X 10.5 and later, but not on the phone:
+        
+            @interface MyClass : NSObject
+            -(void) mymacmethod __OSX_AVAILABLE_STARTING(__MAC_10_5,__IPHONE_NA);
+            @end
+
+        
+        An enum available on the phone, but not available on Mac OS X:
+        
+            #if __IPHONE_OS_VERSION_MIN_REQUIRED
+                enum { myEnum = 1 };
+            #endif
+           Note: this works when targeting the Mac OS X platform because 
+           __IPHONE_OS_VERSION_MIN_REQUIRED is undefined which evaluates to zero. 
+        
+
+        An enum with values added in different iPhoneOS versions:
+		
+			enum {
+			    myX  = 1,	// Usable on iPhoneOS 2.1 and later
+			    myY  = 2,	// Usable on iPhoneOS 3.0 and later
+			    myZ  = 3,	// Usable on iPhoneOS 3.0 and later
+				...
+		      Note: you do not want to use #if with enumeration values
+			  when a client needs to see all values at compile time
+			  and use runtime logic to only use the viable values.
+			  
+
+    It is also possible to use the *_VERSION_MIN_REQUIRED in source code to make one
+    source base that can be compiled to target a range of OS versions.  It is best
+    to not use the _MAC_* and __IPHONE_* macros for comparisons, but rather their values.
+    That is because you might get compiled on an old OS that does not define a later
+    OS version macro, and in the C preprocessor undefined values evaluate to zero
+    in expresssions, which could cause the #if expression to evaluate in an unexpected
+    way.
+    
+        #ifdef __MAC_OS_X_VERSION_MIN_REQUIRED
+            // code only compiled when targeting Mac OS X and not iPhone
+            // note use of 1050 instead of __MAC_10_5
+            #if __MAC_OS_X_VERSION_MIN_REQUIRED < 1050
+                // code in here might run on pre-Leopard OS
+            #else
+                // code here can assume Leopard or later
+            #endif
+        #endif
+
+
+*/
+
+#define __MAC_10_0      1000
+#define __MAC_10_1      1010
+#define __MAC_10_2      1020
+#define __MAC_10_3      1030
+#define __MAC_10_4      1040
+#define __MAC_10_5      1050
+#define __MAC_10_6      1060
+#define __MAC_10_7      1070
+#define __MAC_NA        9999   /* not available */
+
+#define __IPHONE_2_0     20000
+#define __IPHONE_2_1     20100
+#define __IPHONE_2_2     20200
+#define __IPHONE_3_0     30000
+#define __IPHONE_3_1     30100
+#define __IPHONE_3_2     30200
+#define __IPHONE_NA      99999  /* not available */
+
+#include <AvailabilityInternal.h>
+
+
+#ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
+    #define __OSX_AVAILABLE_STARTING(_mac, _iphone) __AVAILABILITY_INTERNAL##_iphone
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) \
+                                                    __AVAILABILITY_INTERNAL##_iphoneIntro##_DEP##_iphoneDep
+
+#elif defined(__MAC_OS_X_VERSION_MIN_REQUIRED)
+    #define __OSX_AVAILABLE_STARTING(_mac, _iphone) __AVAILABILITY_INTERNAL##_mac
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) \
+                                                    __AVAILABILITY_INTERNAL##_macIntro##_DEP##_macDep
+
+#else
+    #define __OSX_AVAILABLE_STARTING(_mac, _iphone)
+    #define __OSX_AVAILABLE_BUT_DEPRECATED(_macIntro, _macDep, _iphoneIntro, _iphoneDep) 
+#endif
+
+
+#endif /* __AVAILABILITY__ */
diff --git a/EXTERNAL_HEADERS/AvailabilityInternal.h b/EXTERNAL_HEADERS/AvailabilityInternal.h
new file mode 100644
index 000000000..a4524708e
--- /dev/null
+++ b/EXTERNAL_HEADERS/AvailabilityInternal.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2007-2010 by Apple Inc.. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+    File:       AvailabilityInternal.h
+ 
+    Contains:   implementation details of __OSX_AVAILABLE_* macros from <Availability.h>
+
+*/
+#ifndef __AVAILABILITY_INTERNAL__
+#define __AVAILABILITY_INTERNAL__
+
+
+
+#ifndef __IPHONE_OS_VERSION_MIN_REQUIRED
+    #ifdef __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__
+        /* compiler sets __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ when -miphoneos-version-min is used */
+        #define __IPHONE_OS_VERSION_MIN_REQUIRED __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__
+    #endif
+#endif
+
+#ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
+    /* don't use visibility attribute for iPhoneOS */
+    #define __AVAILABILITY_INTERNAL_DEPRECATED         __attribute__((deprecated))
+    #define __AVAILABILITY_INTERNAL_UNAVAILABLE        __attribute__((unavailable))
+    #define __AVAILABILITY_INTERNAL_WEAK_IMPORT        __attribute__((weak_import))
+    #define __AVAILABILITY_INTERNAL_REGULAR            
+#else
+    #define __AVAILABILITY_INTERNAL_DEPRECATED         __attribute__((deprecated,visibility("default")))
+    #define __AVAILABILITY_INTERNAL_UNAVAILABLE        __attribute__((unavailable,visibility("default")))
+    #define __AVAILABILITY_INTERNAL_WEAK_IMPORT        __attribute__((weak_import,visibility("default")))
+    #define __AVAILABILITY_INTERNAL_REGULAR            __attribute__((visibility("default")))
+#endif
+
+#ifdef __IPHONE_OS_VERSION_MIN_REQUIRED
+    /* make sure a default max version is set */
+    #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED
+        #define __IPHONE_OS_VERSION_MAX_ALLOWED     __IPHONE_3_2
+    #endif
+    /* make sure a valid min is set */
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
+        #undef __IPHONE_OS_VERSION_MIN_REQUIRED
+        #define __IPHONE_OS_VERSION_MIN_REQUIRED    __IPHONE_2_0 
+    #endif
+
+    /* set up internal macros (up to 2.0) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_0
+    #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0    __AVAILABILITY_INTERNAL_DEPRECATED
+    /* set up internal macros (up to 2.1) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1    __AVAILABILITY_INTERNAL_DEPRECATED
+    #endif
+    /* set up internal macros (up to 2.2) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_2_2
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2    __AVAILABILITY_INTERNAL_DEPRECATED
+    #endif
+    /* set up internal macros (up to 3.0) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_0
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_2_2
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL__IPHONE_3_0
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0    __AVAILABILITY_INTERNAL_DEPRECATED
+    #endif
+    /* set up internal macros (up to 3.1) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_1
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_2_2
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_0
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL__IPHONE_3_1
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1    __AVAILABILITY_INTERNAL_DEPRECATED
+    #endif
+    /* set up internal macros (up to 3.2) */
+    #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2                __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA     __AVAILABILITY_INTERNAL__IPHONE_3_2
+    #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_1
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_2_2
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_0
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_1
+    #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_REGULAR
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL__IPHONE_3_2
+    #else
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2    __AVAILABILITY_INTERNAL_DEPRECATED
+    #endif
+    /* set up internal macros (n/a) */
+    #define __AVAILABILITY_INTERNAL__IPHONE_NA                     __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA      __AVAILABILITY_INTERNAL_UNAVAILABLE
+
+#elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
+    /* compiler for Mac OS X sets __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ */
+    #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
+    /* make sure a default max version is set */
+    #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED
+        #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_7
+    #endif
+    /* set up internal macros */
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_7
+        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_7
+        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_7        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_6
+        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_6
+        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_6        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_5
+        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_5
+        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_5        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_4        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_3        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_2        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_1        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_WEAK_IMPORT
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0        __AVAILABILITY_INTERNAL_REGULAR
+    #endif
+    #define __AVAILABILITY_INTERNAL__MAC_NA             __AVAILABILITY_INTERNAL_UNAVAILABLE
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1        __AVAILABILITY_INTERNAL__MAC_10_0
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2        __AVAILABILITY_INTERNAL__MAC_10_1
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3        __AVAILABILITY_INTERNAL__MAC_10_2
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4        __AVAILABILITY_INTERNAL__MAC_10_3
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_5
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5        __AVAILABILITY_INTERNAL__MAC_10_4
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6        __AVAILABILITY_INTERNAL__MAC_10_5
+    #endif
+    #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_7
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+        #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL_DEPRECATED
+    #else
+        #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_0
+        #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_1
+        #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_2
+        #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_3
+        #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_4
+        #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_5
+        #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7        __AVAILABILITY_INTERNAL__MAC_10_6
+    #endif
+    #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_0
+    #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_1
+    #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_2
+    #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_3
+    #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_4
+    #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_5
+    #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_6
+    #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA             __AVAILABILITY_INTERNAL__MAC_10_7
+    #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA               __AVAILABILITY_INTERNAL_UNAVAILABLE
+#endif
+
+#endif /* __AVAILABILITY_INTERNAL__ */
diff --git a/EXTERNAL_HEADERS/AvailabilityMacros.h b/EXTERNAL_HEADERS/AvailabilityMacros.h
new file mode 100644
index 000000000..02981bd13
--- /dev/null
+++ b/EXTERNAL_HEADERS/AvailabilityMacros.h
@@ -0,0 +1,820 @@
+/*
+ * Copyright (c) 2001-2010 by Apple Inc.. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+     File:       AvailabilityMacros.h
+ 
+     More Info:  See TechNote 2064
+
+     Contains:   Autoconfiguration of AVAILABLE_ macros for Mac OS X
+
+                 This header enables a developer to specify build time
+                 constraints on what Mac OS X versions the resulting
+                 application will be run.  There are two bounds a developer
+                 can specify:
+                 
+                      MAC_OS_X_VERSION_MIN_REQUIRED
+                      MAC_OS_X_VERSION_MAX_ALLOWED
+                      
+                The lower bound controls which calls to OS functions will 
+                be weak-importing (allowed to be unresolved at launch time).
+                The upper bound controls which OS functionality, if used,
+                will result in a compiler error because that functionality is
+                not available on on any OS is the specifed range.
+                
+                For example, suppose an application is compiled with:
+                
+                      MAC_OS_X_VERSION_MIN_REQUIRED = MAC_OS_X_VERSION_10_2
+                      MAC_OS_X_VERSION_MAX_ALLOWED  = MAC_OS_X_VERSION_10_3
+                     
+                and an OS header contains:
+                
+                     extern void funcA(void) AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER;
+                     extern void funcB(void) AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2;
+                     extern void funcC(void) AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3;
+                     extern void funcD(void) AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER;
+                     extern void funcE(void) AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER;
+                     extern void funcF(void) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
+                     extern void funcG(void) AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER;
+                     
+                     typedef long TypeA DEPRECATED_IN_MAC_OS_X_VERSION_10_0_AND_LATER;
+                     typedef long TypeB DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER;
+                     typedef long TypeC DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER;
+                     typedef long TypeD DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER;
+                     typedef long TypeE DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER;
+
+                Any application code which uses these declarations will get the following:
+                
+                                compile         link          run 
+                                -------         ------        -------
+                     funcA:     normal          normal        normal
+                     funcB:     warning         normal        normal
+                     funcC:     normal          normal        normal
+                     funcD:     normal          normal        normal
+                     funcE:     normal          normal        normal
+                     funcF:     normal          weak          on 10.3 normal, on 10.2 (&funcF == NULL)
+                     funcG:     error           error         n/a
+                     typeA:     warning
+                     typeB:     warning
+                     typeC:     warning
+                     typeD:     normal
+                     typeE:     normal
+                  
+  
+*/
+#ifndef __AVAILABILITYMACROS__
+#define __AVAILABILITYMACROS__
+
+
+/*
+ * Set up standard Mac OS X versions
+ */
+#define MAC_OS_X_VERSION_10_0 1000
+#define MAC_OS_X_VERSION_10_1 1010
+#define MAC_OS_X_VERSION_10_2 1020
+#define MAC_OS_X_VERSION_10_3 1030
+#define MAC_OS_X_VERSION_10_4 1040
+#define MAC_OS_X_VERSION_10_5 1050
+#define MAC_OS_X_VERSION_10_6 1060
+#define MAC_OS_X_VERSION_10_7 1070
+
+
+/* 
+ * If min OS not specified, assume 10.1 for ppc and 10.4 for all others
+ * Note: gcc driver may set _ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED_ based on MACOSX_DEPLOYMENT_TARGET environment variable
+ */
+#ifndef MAC_OS_X_VERSION_MIN_REQUIRED
+    #ifdef __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
+        #if (__i386__ || __x86_64__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4)
+            #warning Building for Intel with Mac OS X Deployment Target < 10.4 is invalid.
+        #elif __ppc64__ && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < MAC_OS_X_VERSION_10_4)
+            #warning Building for ppc64 with Mac OS X Deployment Target < 10.4 is invalid.
+        #endif
+        #define MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
+    #else
+        #if __ppc64__ || __i386__ || __x86_64__
+            #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_4
+        #else
+            #define MAC_OS_X_VERSION_MIN_REQUIRED MAC_OS_X_VERSION_10_1
+        #endif
+    #endif
+#endif
+
+/*
+ * if max OS not specified, assume largerof(10.6, min)
+ */
+#ifndef MAC_OS_X_VERSION_MAX_ALLOWED
+    #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_7
+        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED
+    #else
+        #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_7
+    #endif
+#endif
+
+/*
+ * Error on bad values
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_MIN_REQUIRED
+    #error MAC_OS_X_VERSION_MAX_ALLOWED must be >= MAC_OS_X_VERSION_MIN_REQUIRED
+#endif
+#if MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_0
+    #error MAC_OS_X_VERSION_MIN_REQUIRED must be >= MAC_OS_X_VERSION_10_0
+#endif
+
+/*
+ * only certain compilers support __attribute__((weak_import))
+ */
+#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) && (MAC_OS_X_VERSION_MIN_REQUIRED >= 1020)
+    #define WEAK_IMPORT_ATTRIBUTE __attribute__((weak_import))
+#elif defined(__MWERKS__) && (__MWERKS__ >= 0x3205) && (MAC_OS_X_VERSION_MIN_REQUIRED >= 1020) && !defined(__INTEL__)
+    #define WEAK_IMPORT_ATTRIBUTE __attribute__((weak_import))
+#else
+    #define WEAK_IMPORT_ATTRIBUTE
+#endif
+
+/*
+ * only certain compilers support __attribute__((deprecated))
+ */
+#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+    #define DEPRECATED_ATTRIBUTE __attribute__((deprecated))
+#else
+    #define DEPRECATED_ATTRIBUTE
+#endif
+
+/*
+ * only certain compilers support __attribute__((unavailable))
+ */
+#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+    #define UNAVAILABLE_ATTRIBUTE __attribute__((unavailable))
+#else
+    #define UNAVAILABLE_ATTRIBUTE
+#endif
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+ * 
+ * Used on functions introduced in Mac OS X 10.0 
+ */
+#define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on functions introduced in Mac OS X 10.0, 
+ * and deprecated in Mac OS X 10.0
+ */
+#define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_0_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.0 
+ */
+#define DEPRECATED_IN_MAC_OS_X_VERSION_10_0_AND_LATER     DEPRECATED_ATTRIBUTE
+
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.1 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_1
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_1
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * and deprecated in Mac OS X 10.1
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.1
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.1 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.2 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * and deprecated in Mac OS X 10.2
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.2
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.2
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.2 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.3 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.3, 
+ * and deprecated in Mac OS X 10.3
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.3
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.3
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * but later deprecated in Mac OS X 10.3
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.3 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.4 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.4, 
+ * and deprecated in Mac OS X 10.4
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.4
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.4
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * but later deprecated in Mac OS X 10.4
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4
+ * 
+ * Used on declarations introduced in Mac OS X 10.3, 
+ * but later deprecated in Mac OS X 10.4
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.4 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.5 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.5, 
+ * and deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * but later deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
+ * 
+ * Used on declarations introduced in Mac OS X 10.3, 
+ * but later deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5
+ * 
+ * Used on declarations introduced in Mac OS X 10.4, 
+ * but later deprecated in Mac OS X 10.5
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.5 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.6 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.6, 
+ * and deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.3, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.4, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6
+ * 
+ * Used on declarations introduced in Mac OS X 10.5, 
+ * but later deprecated in Mac OS X 10.6
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.6 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+
+
+
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+ * 
+ * Used on declarations introduced in Mac OS X 10.7 
+ */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER     UNAVAILABLE_ATTRIBUTE
+#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER     WEAK_IMPORT_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED
+ * 
+ * Used on declarations introduced in Mac OS X 10.7, 
+ * and deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED    AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.0, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.1, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.2, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.3, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.4, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.5, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
+#endif
+
+/*
+ * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
+ * 
+ * Used on declarations introduced in Mac OS X 10.6, 
+ * but later deprecated in Mac OS X 10.7
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    DEPRECATED_ATTRIBUTE
+#else
+    #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7    AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#endif
+
+/*
+ * DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER
+ * 
+ * Used on types deprecated in Mac OS X 10.7 
+ */
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER    DEPRECATED_ATTRIBUTE
+#else
+    #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER
+#endif
+
+#endif  /* __AVAILABILITYMACROS__ */
+
+
diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile
index 9f8e3535b..46ee40f90 100644
--- a/EXTERNAL_HEADERS/Makefile
+++ b/EXTERNAL_HEADERS/Makefile
@@ -11,9 +11,6 @@ INSTINC_SUBDIRS =	\
 	architecture	\
 	mach-o
 
-INSTINC_SUBDIRS_PPC = 	\
-	architecture
-
 INSTINC_SUBDIRS_I386 =	\
 	architecture
 
@@ -23,8 +20,12 @@ INSTINC_SUBDIRS_X86_64 =	\
 INSTINC_SUBDIRS_ARM =	\
 	architecture
 
+
 EXPORT_FILES = \
 	AppleSecureBootEpoch.h \
+	Availability.h	\
+	AvailabilityInternal.h	\
+	AvailabilityMacros.h	\
 	ar.h		\
 	stdarg.h	\
 	stdbool.h	\
diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile
index 8c929ba14..a322a080f 100644
--- a/EXTERNAL_HEADERS/architecture/Makefile
+++ b/EXTERNAL_HEADERS/architecture/Makefile
@@ -9,9 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC = 	\
-	ppc
-
 INSTINC_SUBDIRS_I386 =	\
 	i386
 
@@ -21,6 +18,7 @@ INSTINC_SUBDIRS_X86_64 =	\
 INSTINC_SUBDIRS_ARM =	\
 	arm
 
+
 EXPORT_FILES = 
 
 INSTALL_MI_LIST = 
diff --git a/EXTERNAL_HEADERS/architecture/ppc/Makefile b/EXTERNAL_HEADERS/architecture/ppc/Makefile
deleted file mode 100644
index 374f3bd9a..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS_PPC =
-
-EXPORT_FILES = 		\
-	asm_help.h	\
-	basic_regs.h	\
-	cframe.h	\
-	fp_regs.h	\
-	macro_help.h	\
-	pseudo_inst.h	\
-	reg_help.h
-
-
-INSTALL_MD_LIST = 
-
-INSTALL_MD_DIR = 
-
-EXPORT_MD_LIST = ${EXPORT_FILES}
-
-EXPORT_MD_DIR = architecture/ppc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/EXTERNAL_HEADERS/architecture/ppc/asm_help.h b/EXTERNAL_HEADERS/architecture/ppc/asm_help.h
deleted file mode 100644
index 0ff2171c4..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/asm_help.h
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1996 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/asm_help.h
- *	Author:	Mike DeMoney, NeXT Software, Inc.
- *
- *	This header file defines macros useful when writing assembly code
- *	for the PowerPC processors.
- *	r12 is used as the tmp register / PICIFY base.
- *
- * HISTORY
- * 20-May-97  Umesh Vaishampayan (umeshv@apple.com)
- *	Implemented Dynamic / PIC macros.
- *
- * 28-Dec-96  Umesh Vaishampayan (umeshv@NeXT.com)
- *	added ".align" directive to various macros to avoid alignment 
- *  	faults. Moved Register Usage #defines to reg_help.h as that's
- *	where they should have been in the first place.
- *	Added Dynamic / PIC macroes for routines which refernce external
- *	symbols. Not implemented fully as yet.
- *
- * 05-Nov-92  Mike DeMoney (mike@next.com)
- *	Created.
- */
-
-#ifndef	_ARCH_PPC_ASM_HELP_H_
-#define	_ARCH_PPC_ASM_HELP_H_
-
-#include	<architecture/ppc/reg_help.h>
-
-#ifdef	__ASSEMBLER__
-/*
- * ppc stack frames look like this after procedure prolog has
- * been executed:
- *
- * Higher address:
- *			.........
- *		+-------------------------------+
- * 		| caller's LR			|
- *		+-------------------------------+
- * 		| caller's CR			|
- *		+-------------------------------+
- * Caller's SP->| caller's caller's sp		|  ^^ Caller's Frame ^^
- *		+===============================+  vv Called Rtn Frame vv
- *		|	Save Area for		| FPF 31
- *			..........
- *		| 	Caller's FPF's		| FPF n
- *		+-------------------------------+
- *		|	Save Area for		| GRF 31
- *			..........
- *		| 	Caller's GRF's		| GRF n
- *		+-------------------------------+
- *		|	alignment pad		|
- *			............
- *		|	(if necessary)		|
- *		+-------------------------------+
- *		|	Local			|
- *			........
- *		| 	Variables		|
- *		+-------------------------------+
- * SP + X ->	| aN for FUTURE call		|
- *		+-------------------------------+
- *			..........
- *		+-------------------------------+
- * SP + 28 ->	| a1 for FUTURE call		|
- *		+-------------------------------+
- * SP + 24 ->	| a0 for FUTURE call		|
- *		+-------------------------------+
- * SP + 20 ->	| caller's TOC			|
- *		+-------------------------------+
- * SP + 16 ->	| reserved			|
- *		+-------------------------------+
- * SP + 12 ->	| reserved			|
- *		+-------------------------------+
- * SP + 8 ->	| LR callee-save for FUTURE call|
- *		+-------------------------------+
- * SP + 4 ->	| CR callee-save for FUTURE call|
- *		+-------------------------------+
- * SP ->	| caller's sp			|
- *		+===============================+
- * Lower address:
- *
- * NOTE: All state with the exception of LR and CR are saved in the
- * called routines frame.  LR and CR are saved in the CALLER'S FRAME.
- *
- * ALSO NOTE: Args to the called routine are found in the caller's frame.
- */
-
-/*
- * ARG(n) -- stack offset to n'th argument
- *
- * NOTE CAREFULLY!  These macros start numbering arguments at 1 (NOT 0)
- * The first argument is ARG(1).
- *
- * ALSO NOTE:  This stack offset is only valid if using routine
- * DOES NOT alter SP.
- *
- */
-#define	ARG(n)		((((n) - 1) * 4) + 24)
-
-/*
- * Macros for building stack frame according to C calling conventions.
- * lr, cr, and sp are saved.
- *
- * NOTE WELL: localvarsize is in bytes, maxargsout is a count of words,
- * grfsaved and fpfsaved is a count of registers.  BE SURE TO COUNT
- * BOTH FP (r31) AND sN REGISTERS IN THE COUNT OF GRF REGISTERS SAVED!
- * This will be TWO more than the N of the highest sN register you
- * save: s2 implies you are saving s2, s1, s0, and fp => grfsaved
- * should be 4!
- *
- * FURTHER NOTE: These macros do NOT SAVE GRF or FPF registers.  User
- * must do that.  GRF sN regs should be saved via
- *	stmw	sN,SAVED_GRF_S(N)(sp)
- * where N is the highest numbered s* register to be saved.  E.g. if
- * s0, s1, and s2 are to be saved use:
- *	stmw	s2,SAVED_GRF_S(2)(sp)
- * Note that this also saves fp.
- * An individual saved grf can be loaded via:
- *	lwz	s2,SAVED_GRF_S(2)(sp)
- * Analogous stuff works for fpf's.
- *
- * NOTE: these simple routines will be replaced with more complicated
- * ones once we know what the linker and gdb will require as for as 
- * register use masks and frame declarations.
- *
- * Warning: ROUND_TO_STACK is only to be used in assembly language;
- * for C usage, use ROUND_FRAME() in reg_help.h.
- */
-#define	ROUND_TO_STACK(len)				\
-	(((len) + STACK_INCR - 1) / STACK_INCR * STACK_INCR)
-
-#define	BUILD_FRAME(localvarsize, maxargsout, grfsaved, fpfsaved)	\
-	.set	__argoutsize, ROUND_TO_STACK((maxargsout) * 4)		@\
-	.if	__argoutsize < 32					@\
-	  .set	__argoutsize,32						@\
-	.endif								@\
-	.set	__framesize, ROUND_TO_STACK(				\
-			24 + __argoutsize + (localvarsize)		\
-			+ 4*(grfsaved) + 8*(fpfsaved))			@\
-	.set	__grfbase,(__framesize - 4*(grfsaved) - 8*(fpfsaved))	@\
-	.set	__fpfbase,(__framesize - 8*(fpfsaved))			@\
-	mflr	r0							@\
-	mfcr	r12							@\
-	stw	r0,8(sp)						@\
-	stw	r12,4(sp)						@\
-	stwu	r1,-__framesize(r1)
-
-/*
- * Macros for referencing data in stack frame.
- *
- * NOTE WELL: ARG's and VAR's start at 1, NOT 0. Why ??? (FIXME)
- */
-#define	LOCAL_VAR(n)	(((n)-1)*4 + __argoutsize + 24)
-#define	SAVED_GRF_S(n)	(__grfbase + ((grfsaved) - (n) - 2) * 4)
-#define	SAVED_FRF_FS(n)	(__fpfbase + ((fpfsaved) - (n) - 1) * 4)
-#define	ARG_IN(n)	(ARG(n) + __framesize)
-#define	ARG_OUT(n)	(ARG(n) + 0)
-#define	SAVED_FP	(__grfbase + ((grfsaved) - 1) * 4)
-#define	SAVED_LR	(__framesize + 8)
-#define	SAVED_CR	(__framesize + 4)
-
-/*
- * Macros for unwinding stack frame.
- * NOTE: GRF's and FPF's are NOT RESTORED.  User must do this before
- * using this macro.
- */
-#define	RETURN						\
-	.if	__framesize				@\
-	  lwz32	r0,r1,SAVED_LR				@\
-	  lwz32	r12,r1,SAVED_CR				@\
-	  addic	sp,r1,__framesize			@\
-	  mtlr	r0					@\
-	  mtcrf	0xff,r12				@\
-	  blr						@\
-	.else						@\
-	  blr						@\
-	.endif
-
-
-/*
- * Macros for declaring procedures
- *
- * Use of these macros allows ctags to have a predictable way
- * to find various types of declarations.  They also simplify
- * inserting appropriate symbol table information.
- *
- * NOTE: these simple stubs will be replaced with more
- * complicated versions once we know what the linker and gdb
- * will require as far as register use masks and frame declarations.
- * These macros may also be ifdef'ed in the future to contain profiling
- * code.
- *
- * FIXME: Document what makes a leaf a LEAF and a handler a HANDLER.
- * (E.g. leaf's have return pc in lr, NESTED's have rpc in offset off
- * sp, handlers have rpc in exception frame which is found via exception
- * link, etc etc.)
- */
-
-/*
- * TEXT -- declare start of text segment
- */
-#define	TEXT						\
-	.text						@\
-	.align 2
-
-/*
- * LEAF -- declare global leaf procedure
- * NOTE: Control SHOULD NOT FLOW into a LEAF!  A LEAF should only
- * be jumped to.  (A leaf may do an align.)  Use a LABEL() if you
- * need control to flow into the label.
- */
-#define	LEAF(name)					\
-	.align 2					@\
-	.globl	name					@\
-name:							@\
-	.set	__framesize,0
-
-/*
- * X_LEAF -- declare alternate global label for leaf
- */
-#define	X_LEAF(name, value)				\
-	.globl	name					@\
-	.set	name,value
-
-/*
- * P_LEAF -- declare private leaf procedure
- */
-#define	P_LEAF(name)					\
-	.align 2					@\
-name:							@\
-	.set	__framesize,0
-
-/*
- * LABEL -- declare a global code label
- * MUST be used (rather than LEAF, NESTED, etc) if control
- * "flows into" the label.
- */
-#define	LABEL(name)					\
-	.align 2					@\
-	.globl	name					@\
-name:
-
-/*
- * NESTED -- declare procedure that invokes other procedures
- */
-#define	NESTED(name, localvarsize, maxargsout, grfsaved, fpfsaved)\
-	.align 2				@\
-	.globl	name				@\
-name:						@\
-	BUILD_FRAME(localvarsize, maxargsout, grfsaved, fpfsaved)
-
-/*
- * X_NESTED -- declare alternate global label for nested proc
- */
-#define	X_NESTED(name, value)			\
-	.globl	name				@\
-	.set	name,value
-
-/*
- * P_NESTED -- declare private nested procedure
- */
-#define	P_NESTED(name, localvarsize, maxargsout, grfsaved, fpfsaved)\
-	.align 2					@\
-name:							@\
-	BUILD_FRAME(locavarsize, maxargsout, grfsaved, fpfsaved)
-
-/*
- * HANDLER -- declare procedure with exception frame rather than
- * standard C frame
- */
-#define	HANDLER(name)					\
-	.align 2					@\
-	.globl	name					@\
-name:
-
-/*
- * X_HANDLER -- declare alternate name for exception handler
- * (Should appear immediately before a HANDLER declaration or
- * another X_HANDLER declaration)
- */
-#define	X_HANDLER(name)					\
-	.align 2					@\
-	.globl	name					@\
-name:
-
-/*
- * P_HANDLER -- declare private handler
- */
-#define	P_HANDLER(name)					\
-	.align 2				@\
-name:
-
-/*
- * END -- mark end of procedure
- * FIXME: Unimplemented for now.
- */
-#define	END(name)
-
-/*
- * BL -- call procedure (relative)
- */
-#define	BL(name)					\
-	bl	name
-
-/*
- * Storage definition macros
- * The main purpose of these is to allow an easy handle for ctags
- */
-
-/*
- * IMPORT -- import symbol
- */
-#define	IMPORT(name)					\
-	.reference	name
-
-/*
- * ABS -- declare global absolute symbol
- */
-#define	ABS(name, value)				\
-	.globl	name					@\
-	.set	name,value
-
-/*
- * P_ABS -- declare private absolute symbol
- */
-#define	P_ABS(name, value)				\
-	.set	name,value
-
-/*
- * EXPORT -- declare global label for data
- */
-#define	EXPORT(name)					\
-	.align 2					@\
-	.globl	name					@\
-name:
-
-/*
- * BSS -- declare global zero'ed storage
- */
-#define	BSS(name,size)					\
-	.comm	name,size
-
-
-/*
- * P_BSS -- declare private zero'ed storage
- */
-#define	P_BSS(name,size)				\
-	.lcomm	name,size
-
-/*
- * dynamic/PIC macros for routines which reference external symbols
- */
-#if defined(__DYNAMIC__)
-#define PICIFY_REG r12
-
-/* Assume that the lr is saved before calling any of these macros */
-/* using PICIFY() */
-
-#define PICIFY(var)				\
-	mflr	r0				@\
-	bl	1f				@\
-1:	mflr	PICIFY_REG			@\
-	mtlr	r0				@\
-	addis	PICIFY_REG, PICIFY_REG, ha16(L ## var ## $non_lazy_ptr - 1b) @\
-	lwz	PICIFY_REG, lo16(L ## var ## $non_lazy_ptr - 1b)(PICIFY_REG)
-
-#define CALL_EXTERN_AGAIN(var)			\
-	PICIFY(var)				@\
-	mtctr	PICIFY_REG			@\
-	mflr	r0				@\
-	stw	r0,8(r1)			@\
-	stwu	r1,-56(r1)			@\
-	bctrl					@\
-	addic	r1,r1,56     			@\
-	lwz	r0,8(r1)			@\
-	mtlr	r0
-
-#define NON_LAZY_STUB(var)			\
-	.non_lazy_symbol_pointer		@\
-	.align 2				@\
-L ## var ## $non_lazy_ptr:			@\
-	.indirect_symbol var			@\
-	.long 0					@\
-	.text					@\
-	.align 2
-
-#define	BRANCH_EXTERN(var)			\
-	PICIFY(var)				@\
-	mtctr	PICIFY_REG			@\
-	bctr					@\
-	NON_LAZY_STUB(var)
-
-#define CALL_EXTERN(var)			\
-	CALL_EXTERN_AGAIN(var)			@\
-	NON_LAZY_STUB(var)
-
-#define REG_TO_EXTERN(reg, var)			\
-	PICIFY(var)				@\
-	stw reg, 0(PICIFY_REG)			@\
-	NON_LAZY_STUB(var)
-
-#define EXTERN_TO_REG(reg, var)			\
-	PICIFY(var)				@\
-	lwz	reg, 0(PICIFY_REG)		@\
-	NON_LAZY_STUB(var)
-
-#else /* ! __DYNAMIC__ */
-#define TMP_REG r12
-#define BRANCH_EXTERN(var)			\
-	b	var
-
-#define CALL_EXTERN(var)			\
-	bl	var
-
-#define CALL_EXTERN_AGAIN(var)			\
-	CALL_EXTERN(var)
-
-#define REG_TO_EXTERN(reg, var)			\
-	lis	TMP_REG, ha16(var)		@\
-	stw	reg, lo16(var)(TMP_REG)
-
-#define EXTERN_TO_REG(reg, var)			\
-	lis	reg, ha16(var)			@\
-	lwz	reg, lo16(var)(reg)
-
-#endif	/* __DYNAMIC__ */
-
-#endif	/* __ASSEMBLER__ */
-#endif	/* _ARCH_PPC_ASM_HELP_H_ */
diff --git a/EXTERNAL_HEADERS/architecture/ppc/basic_regs.h b/EXTERNAL_HEADERS/architecture/ppc/basic_regs.h
deleted file mode 100644
index b9dbdf699..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/basic_regs.h
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1996 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/basic_regs.h
- *	Author:	Doug Mitchell, NeXT Software, Inc.
- *
- *	Basic ppc registers.
- *
- * HISTORY
- * 22-May-97  Umesh Vaishampayan  (umeshv@apple.com)
- 	Updated to match MPCFPE32B/AD 1/97 REV. 1 
- * 29-Dec-96  Umesh Vaishampayan  (umeshv@NeXT.com)
- *	Ported from m98k.
- * 05-Nov-92  Doug Mitchell at NeXT
- *	Created.
- */
-
-#ifndef _ARCH_PPC_BASIC_REGS_H_
-#define _ARCH_PPC_BASIC_REGS_H_
-
-#include <architecture/ppc/reg_help.h>
-#include <architecture/ppc/macro_help.h>
-
-#if !defined(__ASSEMBLER__)
-
-/*
- * Number of General Purpose registers.
- */
-#define PPC_NGP_REGS	32
-
-/*
- * Common half-word used in Machine State Register and in 
- * various exception frames. Defined as a macro because the compiler
- * will align a struct to a word boundary when used inside another struct.
- */
-#define MSR_BITS							   \
-	unsigned	ee:BIT_WIDTH(15),	/* external intr enable    */ \
-			pr:BIT_WIDTH(14),	/* problem state	   */ \
-			fp:BIT_WIDTH(13),	/* floating point avail	   */ \
-			me:BIT_WIDTH(12),	/* machine check enable	   */ \
-			fe0:BIT_WIDTH(11),	/* fp exception mode 0	   */ \
-			se:BIT_WIDTH(10),	/* single step enable	   */ \
-			be:BIT_WIDTH(9),	/* branch trace enable	   */ \
-			fe1:BIT_WIDTH(8),	/* fp exception mode 0	   */ \
-			rsvd1:BIT_WIDTH(7),	/* reserved		   */ \
-			ip:BIT_WIDTH(6),	/* interrupt prefix	   */ \
-			ir:BIT_WIDTH(5),	/* instruction relocate	   */ \
-			dr:BIT_WIDTH(4),	/* data relocate	   */ \
-			rsvd2:BITS_WIDTH(3,2),	/* reserved		   */ \
-			ri:BIT_WIDTH(1),	/* recoverable exception   */ \
-			le:BIT_WIDTH(0)		/* Little-endian mode	   */
-
-/*
- * Machine state register.
- * Read and written via get_msr() and set_msr() inlines, below.
- */
-typedef struct {
-	unsigned	rsvd3:BITS_WIDTH(31,19),	// reserved
-			pow:BIT_WIDTH(18),	// Power management enable
-			rsvd0: BIT_WIDTH(17),	// reserved
-			ile: BIT_WIDTH(16);	// exception little endian
-
-			MSR_BITS;			// see above
-} msr_t;
-
-/*
- * Data Storage Interrupt Status Register (DSISR)
- */
-typedef struct {
-	unsigned	dse:BIT_WIDTH(31);	// direct-store error
-	unsigned	tnf:BIT_WIDTH(30);	// translation not found
-	unsigned	:BITS_WIDTH(29,28);
-	unsigned	pe:BIT_WIDTH(27);	// protection error
-	unsigned	dsr:BIT_WIDTH(26);	// lwarx/stwcx to direct-store
-	unsigned	rw:BIT_WIDTH(25);	// 1 => store, 0 => load
-	unsigned	:BITS_WIDTH(24,23);
-	unsigned	dab:BIT_WIDTH(22);	// data address bkpt (601)
-	unsigned	ssf:BIT_WIDTH(21);	// seg table search failed
-	unsigned	:BITS_WIDTH(20,0);
-} dsisr_t;
-
-/*
- * Instruction Storage Interrupt Status Register (really SRR1)
- */
-typedef struct {
-	unsigned	:BIT_WIDTH(31);
-	unsigned	tnf:BIT_WIDTH(30);	// translation not found
-	unsigned	:BIT_WIDTH(29);
-	unsigned	dse:BIT_WIDTH(28);	// direct-store fetch error
-	unsigned	pe:BIT_WIDTH(27);	// protection error
-	unsigned	:BITS_WIDTH(26,22);
-	unsigned	ssf:BIT_WIDTH(21);	// seg table search failed
-	unsigned	:BITS_WIDTH(20,16);
-	MSR_BITS;
-} isisr_t;
-
-/*
- * Alignment Interrupt Status Register (really DSISR)
- * NOTE: bit numbers in field *names* are in IBM'ese (0 is MSB).
- * FIXME: Yuck!!! Double Yuck!!!
- */
-typedef struct {
-	unsigned	:BITS_WIDTH(31,20);
-	unsigned	ds3031:BITS_WIDTH(19,18);// bits 30:31 if DS form
-	unsigned	:BIT_WIDTH(17);
-	unsigned	x2930:BITS_WIDTH(16,15); // bits 29:30 if X form
-	unsigned	x25:BIT_WIDTH(14);	 // bit 25 if X form or
-						 // bit 5 if D or DS form
-	unsigned	x2124:BITS_WIDTH(13,10); // bits 21:24 if X form or
-						 // bits 1:4 if D or DS form
-	unsigned	all615:BITS_WIDTH(9,0);	 // bits 6:15 of instr
-	MSR_BITS;
-} aisr_t;
-
-/*
- * Program Interrupt Status Register (really SRR1)
- */
-typedef struct {
-	unsigned	:BITS_WIDTH(31,21);
-	unsigned	fpee:BIT_WIDTH(20);	// floating pt enable exception
-	unsigned	ill:BIT_WIDTH(19);	// illegal instruction
-	unsigned	priv:BIT_WIDTH(18);	// privileged instruction
-	unsigned	trap:BIT_WIDTH(17);	// trap program interrupt
-	unsigned	subseq:BIT_WIDTH(16);	// 1 => SRR0 points to
-						// subsequent instruction
-	MSR_BITS;
-} pisr_t;
-
-/*
- * Condition register. May not be useful in C, let's see...
- */
-typedef struct {
-	unsigned	lt:BIT_WIDTH(31),	// negative
-			gt:BIT_WIDTH(30),	// positive
-			eq:BIT_WIDTH(29),	// equal to zero
-			so:BIT_WIDTH(28),	// summary overflow
-			fx:BIT_WIDTH(27),	// floating point exception
-			fex:BIT_WIDTH(26),	// fp enabled exception
-			vx:BIT_WIDTH(25),	// fp invalid operation
-						//    exception
-			ox:BIT_WIDTH(24),	// fp overflow exception
-			rsvd:BITS_WIDTH(23,0);	// reserved
-} cr_t;
-
-/*
- * Abstract values representing fe0:fe1.
- * See get_fp_exc_mode(), below.
- */
-typedef enum {
-	FEM_IGNORE_EXCEP,	// ignore exceptions
-	FEM_IMPR_NONREC,	// imprecise nonrecoverable
-	FEM_IMPR_RECOV,		// imprecise recoverable
-	FEM_PRECISE
-} fp_exc_mode_t;
-
-
-/*
- * Special purpose registers.
- */
- 
-/*
- * Processor version register (special purpose register pvr).
- */
-typedef struct {
-	unsigned	version:BITS_WIDTH(31,16),	
-			revision:BITS_WIDTH(15,0);
-} pvr_t;
-
-/*
- * Fixed point exception register (special purpose register xer)
- */
-typedef struct {
-	unsigned	so:BIT_WIDTH(31),	// summary overflow
-			ov:BIT_WIDTH(30),	// overflow
-			ca:BIT_WIDTH(29),	// carry
-			rsvd1:BITS_WIDTH(28,7), // reserved
-			byte_count:BITS_WIDTH(6,0);	
-} xer_t;
-
-/*
- * Inlines and macros to manipulate the above registers.
- */
- 
-/*
- * Get/set machine state register.
- */
-static __inline__ msr_t
-get_msr()
-{
-	msr_t	__msr_tmp;	
-	__asm__ volatile ("mfmsr %0  /* mfmsr */" : "=r" (__msr_tmp));
-	return __msr_tmp;
-}
-
-static __inline__ void
-set_msr(msr_t msr)
-{
-	__asm__ volatile ("mtmsr %0 /* mtmsr */ " : : "r" (msr));	
-}
-
-/* 
- * Determine current fp_exc_mode_t given prog_mode.
- */
-static __inline__ fp_exc_mode_t
-get_fp_exc_mode(pmr_t pmr)
-{
-	if(pmr.fe0)
-		return pmr.fe1 ? FEM_PRECISE : FEM_IMPR_RECOV;
-	else
-		return pmr.fe1 ? FEM_IMPR_NONREC : FEM_IGNORE_EXCEP;
-}
-
-/*
- * Software definitions for special purpose registers.
- * The same register is used as per_cpu data pointer and
- * vector base register. This requires that the vector
- * table be the first item in the per_cpu table.
- */
-#define SR_EXCEPTION_TMP_LR	sprg0
-#define SR_EXCEPTION_TMP_CR	sprg1
-#define SR_EXCEPTION_TMP_AT	sprg2
-#define SR_PER_CPU_DATA		sprg3
-#define SR_VBR			sprg3
-
-/*
- * Get/set special purpose registers.
- *
- * GET_SPR - get SPR by name.
- *
- * Example usage:
- *
- *   {
- *	xer_t	some_xer;
- *
- *	some_xer = GET_SPR(xer_t, xer);
- *	...
- *   }
- *
- * This is a strange one. We're creating a list of C expressions within
- * a set of curlies; the last expression ("__spr_tmp;") is the return value
- * of the statement created by the curlies.
- *
- */
- 
-#define GET_SPR(type, spr)					\
-({								\
-	unsigned	__spr_tmp;				\
-	__asm__ volatile ("mfspr %0, " STRINGIFY(spr) : "=r" (__spr_tmp));				\
-	*(type *)&__spr_tmp;					\
-})
-
-/* 
- * Example usage of SET_SPR:
- *
- *   {
- *	xer_t some_xer;
- *
- *	...set up some_xer...
- *	SET_SPR(xer, some_xer);
- *   }
- */
-#define	SET_SPR(spr, val)					\
-MACRO_BEGIN							\
-	__typeof__ (val) __spr_tmp = (val);			\
-	__asm__ volatile ("mtspr "STRINGIFY(spr) ", %0" : : "r" (__spr_tmp));					\
-MACRO_END
-
-/*
- * Fully synchronize instruction stream.
- */
-static __inline__ void
-ppc_sync()
-{
-	__asm__ volatile ("sync         /* sync */" : : );
-}
-
-#endif /* ! __ASSEMBLER__ */
-
-#endif /* _ARCH_PPC_BASIC_REGS_H_ */
-
diff --git a/EXTERNAL_HEADERS/architecture/ppc/fp_regs.h b/EXTERNAL_HEADERS/architecture/ppc/fp_regs.h
deleted file mode 100644
index ab48b8821..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/fp_regs.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1996 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/fp_regs.h
- *	Author:	Doug Mitchell, NeXT Software, Inc.
- *
- *	ppc floating point registers.
- *
- * HISTORY
- * 29-Dec-96  Umesh Vaishampayan  (umeshv@NeXT.com)
- *	Ported from m98k.
- * 05-Nov-92  Doug Mitchell at NeXT
- *	Created.
- */
-
-#ifndef _ARCH_PPC_FP_REGS_H_
-#define _ARCH_PPC_FP_REGS_H_
-
-#include <architecture/ppc/reg_help.h>
-
-#if !defined(__ASSEMBLER__)
-/*
- * Floating point status and control register.
- *
- * This struct is aligned to an 8-byte boundary because 64-bit
- * load/store instructions (lfd/stfd) are used to access it. The
- * FPSCR can only be read/written through other FP registers.
- */
-typedef struct {
-        unsigned        unused[1] __attribute__(( aligned(8) ));
-	unsigned	fx:BIT_WIDTH(31),	// exception summary
-			fex:BIT_WIDTH(30),	// enabled exception summary
-			vx:BIT_WIDTH(29),	// invalid op exception
-						//    summary
-			ox:BIT_WIDTH(28),	// overflow exception
-			ux:BIT_WIDTH(27),	// underflow exception
-			zx:BIT_WIDTH(26),	// divide by zero exception
-			xx:BIT_WIDTH(25),	// inexact exception
-			vx_snan:BIT_WIDTH(24),	// not a number exception
-			vx_isi:BIT_WIDTH(23),	// exception
-			vx_idi:BIT_WIDTH(22),	// exception
-			vx_zdz:BIT_WIDTH(21),	// exception
-			vx_imz:BIT_WIDTH(20),	// exception
-			vx_xvc:BIT_WIDTH(19),	// exception
-			fr:BIT_WIDTH(18),	// fraction rounded
-			fi:BIT_WIDTH(17),	// fraction inexact
-			class:BIT_WIDTH(16),	// class descriptor
-			fl:BIT_WIDTH(15),	// negative
-			fg:BIT_WIDTH(14),	// positive
-			fe:BIT_WIDTH(13),	// equal or zero
-			fu:BIT_WIDTH(12),	// not a number
-			rsvd1:BIT_WIDTH(11),	// reserved
-			vx_soft:BIT_WIDTH(10),	// software request exception
-			rsvd2:BIT_WIDTH(9),	// reserved
-			vx_cvi:BIT_WIDTH(8),	// invalid integer convert
-						//    exception
-			ve:BIT_WIDTH(7),	// invalid op exception enable
-			oe:BIT_WIDTH(6),	// overflow exception enable
-			ue:BIT_WIDTH(5),	// underflow exception enable
-			ze:BIT_WIDTH(4),	// divide by zero exception
-						//    enable
-			xe:BIT_WIDTH(3),	// inexact exception enable
-			ni:BIT_WIDTH(2),	// non-IEEE exception enable
-			rn:BITS_WIDTH(1,0);	// rounding control
-} ppc_fp_scr_t;
-
-/*
- * Values for fp_scr_t.rn (rounding control).
- */
-typedef enum {
-	RN_NEAREST = 0,
-	RN_TOWARD_ZERO = 1,
-	RN_TOWARD_PLUS = 2,
-	RN_TOWARD_MINUS = 3
-} ppc_fp_rn_t;
-
-/*
- * ppc_fpf_t -- data types that MAY be in floating point register file
- * Actual data types supported is implementation dependent
- */
-typedef union {
-        float           f;              // 32 bit IEEE single
-        double          d;              // 64 bit IEEE double
-     
-        /* 
-	 * Insure compiler aligns struct appropriately 
-	 */
-        unsigned        x[2] __attribute__(( aligned(8) ));
-} ppc_fpf_t;
-
-/*
- * Number of FP registers.
- */
-#define PPC_NFP_REGS	32
-
-/*
- * Read/write FPSCR.
- * FIXME - these don't work, you need to go thru a fp register.
- */
-typedef union {
-	double 		__dbl;
-	ppc_fp_scr_t 	__scr;
-} __fp_un_t;
-
-static __inline__ ppc_fp_scr_t
-get_fp_scr()
-{
-	__fp_un_t 	__fp_un;
-	
-	__asm__ volatile ("mffs. %0           /* mffs */"	\
-	  	: "=f" (__fp_un.__dbl));
-	return (__fp_un.__scr);		
-}
-
-static __inline__ void
-set_fp_scr(ppc_fp_scr_t fp_scr)
-{
-	__fp_un_t 	__fp_un;
-
-	__fp_un.__scr = fp_scr;
-	__asm__ volatile ("mtfsf 0xff, %0;    /* mtfsf */ "	\
-	  : : "f" (__fp_un.__dbl));	
-}
-
-#endif /* ! __ASSEMBLER__ */
-
-#endif /* _ARCH_PPC_FP_REGS_H_ */
diff --git a/EXTERNAL_HEADERS/architecture/ppc/macro_help.h b/EXTERNAL_HEADERS/architecture/ppc/macro_help.h
deleted file mode 100644
index a149f8eb0..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/macro_help.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1996 NeXT Software, Inc.
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1989 Carnegie-Mellon University
- * Copyright (c) 1988 Carnegie-Mellon University
- * All rights reserved.  The CMU software License Agreement specifies
- * the terms and conditions for use and redistribution.
- *
- *	File:	architecture/ppc/macro_help.h
- *
- *	Provide help in making lint-free macro routines
- *
- * HISTORY
- *
- *	29-Dec-96  Umesh Vaishampayan  (umeshv@NeXT.com)
- *		Created from m98k version. 
- */
-
-#ifndef	_ARCH_PPC_MACRO_HELP_H_
-#define	_ARCH_PPC_MACRO_HELP_H_
-
-#ifndef	MACRO_BEGIN
-# define		MACRO_BEGIN	do {
-#endif	/* MACRO_BEGIN */
-
-#ifndef	MACRO_END
-# define		MACRO_END	} while (0)
-#endif	/* MACRO_END */
-
-#ifndef	MACRO_RETURN
-# define		MACRO_RETURN	if (1) return
-#endif	/* MACRO_RETURN */
-
-#endif	/* _ARCH_PPC_MACRO_HELP_H_ */
-
diff --git a/EXTERNAL_HEADERS/architecture/ppc/pseudo_inst.h b/EXTERNAL_HEADERS/architecture/ppc/pseudo_inst.h
deleted file mode 100644
index da4071e6b..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/pseudo_inst.h
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1996 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/pseudo_inst.h
- *	Author:	Mike DeMoney
- *
- *	This header file defines assembler pseudo-instruction macros for
- *	for the ppc.
- *
- *	NOTE: This is obviously only useful to include in assembly
- *	code source.
- *
- *	ALSO NOTE: These macros don't attempt to be 64-bit compatable
- *
- * HISTORY
- * 29-Dec-96  Umesh Vaishampayan  (umeshv@NeXT.com)
- *	Ported from m98k.
- * 05-Nov-92  Mike DeMoney (mike@next.com)
- *	Created.
- */
-
-#ifndef	_ARCH_PPC_PSEUDO_INST_H_
-#define	_ARCH_PPC_PSEUDO_INST_H_
-
-#include <architecture/ppc/reg_help.h>
-#include <architecture/ppc/asm_help.h>
-
-#ifdef	__ASSEMBLER__
-
-/*
- * Pseudo instruction definitions
- */
-
-/*
- * Macro package initialization
- */
-	.set	__no_at,0		/* allow at by default */
-
-/*
- * .at_off -- disable use of at by macros
- * .at_on -- enable use of at by macros
- */
-.macro	.at_off
-	.set	__no_at,1
-.endmacro
-
-.macro	.at_on
-	.set	__no_at,0
-.endmacro
-
-/*
- * li32	rD,IMMED
- *
- * Load 32-bit immediate into rD
- * FIXME: Need a way to undefine built-in macro for this.
- */
-.macro	li32				// li32	rD,immed
-.if	$n != 2
-	.abort	"invalid operands of li32"
-.endif
-.abs	__is_abs,$1
-.if	!__is_abs
-	addis	$0,0,hi16($1)
-	ori	$0,$0,lo16($1)
-.elseif	$1 == 0
-	addi	$0,0,0
-.elseif	($1 & 0xffff) == 0
-	addis	$0,0,hi16($1)
-.elseif	($1 & 0xffff8000) == 0
-	addi	$0,0,$1
-.elseif ($1 & 0xffff8000) == 0xffff8000
-	addi	$0,0,$1
-.else
-	addis	$0,0,hi16($1)
-	ori	$0,$0,lo16($1)
-.endif
-.endmacro
- 
-
-/*
- * andi32. rD,rS1,IMMED
- *
- * Perform "andi." with (possibly) 32-bit immediate
- */
-.macro	andi32.				// andi32. rD,rS1,IMMED
-.if	$n != 3
-	.abort	"invalid operands of andi."
-.endif
-	.set	__used_at,0
-.abs	__is_abs,$2
-.if	!__is_abs
-	.set	__used_at,1
-	li32	at,$2
-	and.	$0,$1,at
-.elseif	($2 & 0xffff0000) == 0
-	andi.	$0,$1,$2
-.elseif	($2 & 0xffff) == 0
-	andis.	$0,$1,hi16($2)
-.else
-	.set	__used_at,1
-	li32	at,$2
-	and.	$0,$1,at
-.endif
-.if	__no_at & __used_at
-	.abort	"Macro uses at while .no_at in effect"
-.endif
-.endmacro
- 
-/*
- * ori32	rD,rS1,IMMED
- *
- * Perform "ori" with (possibly) 32-bit immediate
- */
-.macro	ori32				// ori32	rD,rS1,IMMED
-.if	$n != 3
-	.abort	"invalid operands of ori"
-.endif
-.abs	__is_abs,$2
-.if	!__is_abs
-	oris	$0,$1,hi16($2)
-	ori	$0,$1,lo16($2)
-.elseif	($2 & 0xffff0000) == 0
-	ori	$0,$1,$2
-.elseif	($2 & 0xffff) == 0
-	oris	$0,$1,hi16($2)
-.else
-	oris	$0,$1,hi16($2)
-	ori	$0,$1,lo16($2)
-.endif
-.endmacro
- 
-/*
- * xori32	rD,rS1,IMMED
- *
- * Perform "xor" with (possibly) 32-bit immediate
- */
-.macro	xori32				// xori32	rD,rS1,IMMED
-.if	$n != 3
-	.abort	"invalid operands of xori"
-.endif
-.abs	__is_abs,$2
-.if	!__is_abs
-	xoris	$0,$1,hi16($2)
-	xori	$0,$1,lo16($2)
-.elseif	($2 & 0xffff0000) == 0
-	xori	$0,$1,$2
-.elseif	($2 & 0xffff) == 0
-	xoris	$0,$1,hi16($2)
-.else
-	xoris	$0,$1,hi16($2)
-	xori	$0,$1,lo16($2)
-.endif
-.endmacro
-
-
-/*
- * MEMREF_INST -- macros to memory referencing instructions
- * "capable" of dealing with 32 bit offsets.
- *
- * NOTE: Because the assembler doesn't have any mechanism for easily
- * parsing the d(rS) syntax of register-displacement form instructions,
- * these instructions do NOT mirror the normal memory reference
- * instructions.  The following "transformation" is used:
- *	lbz	rD,d(rS)
- * becomes:
- *	lbz32	rD,rS,d
- * I.e.: "32" is appended to the instruction name and the base register
- * and displacement become the 2'nd and 3'rd comma-separated operands.
- *
- * The forms:
- *	lbz32	rD,d
- * and:
- *	lbz32	rD,rS
- * are also recognized and the missing operand is assumed 0.
- *
- * ALSO NOTE: r0 or zt should never be used as rS in these instructions.
- * Use "0" as rS in this case.
- */
-#define	MEMREF_INST(op)						\
-.macro	op ## 32						@\
-.set	__used_at,0						@\
-.if	$n == 3							@\
- .greg	__is_greg,$1						@\
- .abs	__is_abs,$2						@\
- .if	__is_abs						@\
-  .if	($2 & 0xffff8000) == 0					@\
-	op	$0,$2($1)					@\
-  .elseif ($2 & 0xffff8000) == 0xffff8000			@\
-	op	$0,$2($1)					@\
-  .else								@\
-   .if	!__is_greg						@\
-	.set	__used_at,1					@\
-	lis	at,ha16($2)					@\
-	op	$0,lo16($2)(at)					@\
-   .else							@\
-  	.set	__used_at,1					@\
-	lis	at,ha16($2)					@\
-	add	at,at,$1					@\
-	op	$0,lo16($2)(at)					@\
-   .endif							@\
-  .endif							@\
- .else								@\
-  .if	!__is_greg						@\
-	.set	__used_at,1					@\
-	lis	at,ha16($2)					@\
-	op	$0,lo16($2)(at)					@\
-  .else								@\
-  	.set	__used_at,1					@\
-	lis	at,ha16($2)					@\
-	add	at,at,$1					@\
-	op	$0,lo16($2)(at)					@\
-  .endif							@\
- .endif								@\
-.elseif	$n == 2							@\
- .greg	__is_greg,$1						@\
- .if	!__is_greg						@\
-  .abs	__is_abs,$1						@\
-  .if	__is_abs						@\
-   .if	($1 & 0xffff8000) == 0					@\
-	op	$0,$1(0)					@\
-   .elseif ($1 & 0xffff8000) == 0xffff8000			@\
-	op	$0,$1(0)					@\
-   .else							@\
-	.set	__used_at,1					@\
-	lis	at,ha16($1)					@\
-	op	$0,lo16($1)(at)					@\
-   .endif							@\
-  .else								@\
-	.set	__used_at,1					@\
-	lis	at,ha16($1)					@\
-	op	$0,lo16($1)(at)					@\
-  .endif							@\
- .else								@\
-	op	$0,0($1)					@\
- .endif								@\
-.else								@\
-	.abort "Invalid operands of " #op "32"			@\
-.endif								@\
-.if	__no_at &  __used_at					@\
-	.abort	"Macro uses at while .no_at in effect"		@\
-.endif								@\
-.endmacro
-
-MEMREF_INST(lbz)
-MEMREF_INST(lhz)
-MEMREF_INST(lha)
-MEMREF_INST(lwz)
-MEMREF_INST(lwa)
-MEMREF_INST(ld)
-
-MEMREF_INST(stb)
-MEMREF_INST(sth)
-MEMREF_INST(stw)
-MEMREF_INST(std)
-
-MEMREF_INST(lmw)
-MEMREF_INST(lmd)
-MEMREF_INST(stmw)
-MEMREF_INST(stmd)
-
-/*
- * ARITH_INST -- define 32-bit immediate forms of arithmetic
- * instructions
- *
- *	E.g.	addi32	rD,rS,IMMED
- */
-#define	ARITH_INST(op, op3, sf)					\
-.macro	op ## 32 ## sf						@\
-.if	$n != 3							@\
-	.abort	"invalid operands to " #op "32"			@\
-.endif								@\
-.abs	__is_abs,$2						@\
-.if	__is_abs						@\
- .if	($2 & 0xffff8000) == 0					@\
-	op##sf	$0,$1,$2					@\
- .elseif	($2 & 0xffff8000) == 0xffff8000			@\
-	op##sf	$0,$1,$2					@\
- .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
- .else								@\
-	li32	at,$2						@\
-	op3##sf	$0,$1,at					@\
- .endif								@\
-.elseif	__no_at							@\
-	.abort	"Macro uses at while .no_at in effect"		@\
-.else								@\
-	li32	at,$2						@\
-	op3##sf	$0,$1,at					@\
-.endif								@\
-.endmacro
-
-ARITH_INST(addi, add, )
-ARITH_INST(subi, sub, )
-ARITH_INST(addic, addc, )
-ARITH_INST(subic, subc, )
-ARITH_INST(addic, addc, .)
-ARITH_INST(subic, subc, .)
-ARITH_INST(mulli, mull, )
-
-/*
- * CMPEX_INST -- define 32-bit immediate forms of extended compare
- * instructions
- *
- *	E.g.	cmpwi32	cr3,rS,IMMED
- *		cmpwi32	rS,IMMED
- */
-#define	CMPEX_INST(op, op3)					\
-.macro	op ## 32						@\
-.if	$n == 3							@\
- .abs	__is_abs,$2						@\
- .if	__is_abs						@\
-  .if	($2 & 0xffff8000) == 0					@\
-	op	$0,$1,$2					@\
-  .elseif	($2 & 0xffff8000) == 0xffff8000			@\
-	op	$0,$1,$2					@\
-  .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
-  .else								@\
-	li32	at,$2						@\
-	op3	$0,$1,at					@\
-  .endif							@\
- .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
- .else								@\
-	li32	at,$2						@\
-	op3	$0,$1,at					@\
- .endif								@\
-.elseif	$n == 2							@\
- .abs	__is_abs,$1						@\
- .if	__is_abs						@\
-  .if	($1 & 0xffff8000) == 0					@\
-	op	$0,$1						@\
-  .elseif	($1 & 0xffff8000) == 0xffff8000			@\
-	op	$0,$1						@\
-  .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
-  .else								@\
-	li32	at,$1						@\
-	op3	$0,at						@\
-  .endif							@\
- .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
- .else								@\
-	li32	at,$1						@\
-	op3	$0,at						@\
- .endif								@\
-.else								@\
-	.abort	"invalid operands to " #op "32"			@\
-.endif								@\
-.endmacro
-
-CMPEX_INST(cmpdi, cmpd)
-CMPEX_INST(cmpwi, cmpw)
-CMPEX_INST(cmpldi, cmpld)
-CMPEX_INST(cmplwi, cmplw)
-
-/*
- * CMP_INST -- define 32-bit immediate forms of standard compare
- * instructions
- *
- *	E.g.	cmpi32	cr3,0,rS,IMMED
- */
-#define	CMP_INST(op, op3)					\
-.macro	op ## 32						@\
-.if	$n == 4							@\
- .abs	__is_abs,$3						@\
- .if	__is_abs						@\
-  .if	($3 & 0xffff8000) == 0					@\
-	op	$0,$1,$2,$3					@\
-  .elseif	($3 & 0xffff8000) == 0xffff8000			@\
-	op	$0,$1,$2,$3					@\
-  .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
-  .else								@\
-	li32	at,$3						@\
-	op3	$0,$1,$2,at					@\
-  .endif							@\
- .elseif	__no_at						@\
-	.abort	"Macro uses at while .no_at in effect"		@\
- .else								@\
-	li32	at,$3						@\
-	op3	$0,$1,$2,at					@\
- .endif								@\
-.else								@\
-	.abort	"invalid operands to " #op "32"			@\
-.endif								@\
-.endmacro
-
-CMP_INST(cmpi, cmp)
-CMP_INST(cmpli, cmpl)
-
-#endif	/* __ASSEMBLER__ */
-
-#endif	/* _ARCH_PPC_PSEUDO_INST_H_ */
diff --git a/EXTERNAL_HEADERS/architecture/ppc/reg_help.h b/EXTERNAL_HEADERS/architecture/ppc/reg_help.h
deleted file mode 100644
index 6a0e2842e..000000000
--- a/EXTERNAL_HEADERS/architecture/ppc/reg_help.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1996 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/reg_help.h
- *	Author:	Doug Mitchell, NeXT Computer, Inc.
- *
- *	m98k-specific macros and inlines for defining machine registers.
- *
- * HISTORY
- * 05-Nov-92  Doug Mitchell at NeXT
- *	Created.
- *
- * 29-Dec-96  Umesh Vaishampayan  (umeshv@NeXT.com)
- *	Ported from m98k. Removed dependency on nrw directory.
- *	Merged code from architecture/nrw/reg_help.h.
- *	Moved Register Usage #defines from asm_help.h in here.
- */
-
-#ifndef _ARCH_PPC_REG_HELP_H_
-#define _ARCH_PPC_REG_HELP_H_
-
-#if defined(__ASSEMBLER__)
-/*
- * GRF Register Usage Aliases
- */
-#define	zt		r0		// architecturally 0 for mem refs only!
-					// real reg other inst, caller-saved
-#define	sp		r1		// stack pointer, callee-saved
-#define	toc		r2		// tbl of contents, callee-saved
-#define	a0		r3		// arg 0, return value 0, caller saved
-#define	a1		r4		// arg 1, return value 1, caller saved
-#define	a2		r5		// ....
-#define	a3		r6
-#define	a4		r7
-#define	a5		r8
-#define	a6		r9
-#define	a7		r10		// arg 7, return value 7, caller saved
-#define	ep		r11		// environment ptr, caller saved
-#define	at		r12		// assembler temp, caller saved
-#define	s17		r13		// callee-saved 17
-#define	s16		r14
-#define	s15		r15
-#define	s14		r16
-#define	s13		r17
-#define	s12		r18
-#define	s11		r19
-#define	s10		r20
-#define	s9		r21
-#define	s8		r22
-#define	s7		r23
-#define	s6		r24
-#define	s5		r25
-#define	s4		r26
-#define	s3		r27
-#define	s2		r28
-#define	s1		r29		// ....
-#define	s0		r30		// callee-saved 0
-#define	fp		r31		// frame-pointer, callee-saved
-
-/*
- * Conversion of GRF aliases to register numbers
- */
-#define	GRF_ZT		0		// architecturally 0 for mem refs only!
-					// real reg other inst, caller-saved
-#define	GRF_SP		1		// stack pointer, callee-saved
-#define	GRF_TOC		2		// tbl of contents, callee-saved
-#define	GRF_A0		3		// arg 0, return value 0, caller saved
-#define	GRF_A1		4		// arg 1, return value 1, caller saved
-#define	GRF_A2		5		// ....
-#define	GRF_A3		6
-#define	GRF_A4		7
-#define	GRF_A5		8
-#define	GRF_A6		9
-#define	GRF_A7		10		// arg 7, return value 7, caller saved
-#define	GRF_EP		11		// environment ptr, caller saved
-#define	GRF_AT		12		// assembler temp, caller saved
-#define	GRF_S17		13		// callee-saved 17
-#define	GRF_S16		14
-#define	GRF_S15		15
-#define	GRF_S14		16
-#define	GRF_S13		17
-#define	GRF_S12		18
-#define	GRF_S11		19
-#define	GRF_S10		20
-#define	GRF_S9		21
-#define	GRF_S8		22
-#define	GRF_S7		23
-#define	GRF_S6		24
-#define	GRF_S5		25
-#define	GRF_S4		26
-#define	GRF_S3		27
-#define	GRF_S2		28
-#define	GRF_S1		29		// ....
-#define	GRF_S0		30		// callee-saved 0
-#define	GRF_FP		31		// frame pointer, callee-saved
-
-/*
- * FPF Register names
- */
-#define	ft0		f0		// scratch reg, caller-saved
-#define	fa0		f1		// fp arg 0, return 0, caller-saved
-#define	fa1		f2		// fp arg 1, caller-saved
-#define	fa2		f3		// fp arg 2, caller-saved
-#define	fa3		f4
-#define	fa4		f5
-#define	fa5		f6
-#define	fa6		f7
-#define	fa7		f8
-#define	fa8		f9
-#define	fa9		f10
-#define	fa10		f11
-#define	fa11		f12
-#define	fa12		f13		// fp arg 12, caller-saved
-#define	fs17		f14		// callee-saved 17
-#define	fs16		f15
-#define	fs15		f16
-#define	fs14		f17
-#define	fs13		f18
-#define	fs12		f19
-#define	fs11		f20
-#define	fs10		f21
-#define	fs9		f22
-#define	fs8		f23
-#define	fs7		f24
-#define	fs6		f25
-#define	fs5		f26
-#define	fs4		f27
-#define	fs3		f28
-#define	fs2		f29
-#define	fs1		f30
-#define	fs0		f31		// callee-saved 0
-
-/*
- * Conversion of FPF aliases to register numbers
- */
-#define	FPF_FT0		0		// scratch reg, caller-saved
-#define	FPF_FA0		1		// fp arg 0, return 0, caller-saved
-#define	FPF_FA1		2		// fp arg 1, caller-saved
-#define	FPF_FA2		3		// fp arg 2, caller-saved
-#define	FPF_FA3		4
-#define	FPF_FA4		5
-#define	FPF_FA5		6
-#define	FPF_FA6		7
-#define	FPF_FA7		8
-#define	FPF_FA8		9
-#define	FPF_FA9		10
-#define	FPF_FA10	11
-#define	FPF_FA11	12
-#define	FPF_FA12	13		// fp arg 12, caller-saved
-#define	FPF_FS17	14		// callee-saved 17
-#define	FPF_FS16	15
-#define	FPF_FS15	16
-#define	FPF_FS14	17
-#define	FPF_FS13	18
-#define	FPF_FS12	19
-#define	FPF_FS11	20
-#define	FPF_FS10	21
-#define	FPF_FS9		22
-#define	FPF_FS8		23
-#define	FPF_FS7		24
-#define	FPF_FS6		25
-#define	FPF_FS5		26
-#define	FPF_FS4		27
-#define	FPF_FS3		28
-#define	FPF_FS2		29
-#define	FPF_FS1		30
-#define	FPF_FS0		31		// callee-saved 0
-
-#endif	/* __ASSEMBLER__ */
-
-
-/* Bitfield definition aid */
-#define	BITS_WIDTH(msb, lsb)	((msb)-(lsb)+1)
-#define	BIT_WIDTH(pos)		(1)	/* mostly to record the position */
-
-/* Mask creation */
-#define	MKMASK(width, offset)	(((unsigned)-1)>>(32-(width))<<(offset))
-#define	BITSMASK(msb, lsb)	MKMASK(BITS_WIDTH(msb, lsb), lsb & 0x1f)
-#define	BITMASK(pos)		MKMASK(BIT_WIDTH(pos), pos & 0x1f)
-
-/* Register addresses */
-#if	__ASSEMBLER__
-# define	REG_ADDR(type, addr)	(addr)
-#else	/* ! __ASSEMBLER__ */
-# define	REG_ADDR(type, addr)	(*(volatile type *)(addr))
-#endif	/* __ASSEMBLER__ */
-
-/* Cast a register to be an unsigned */
-/* CAUTION :	non naturally aligned foo can result into alignment traps
- *		use at own risk.
- */
-#define	CONTENTS(foo)	(*(unsigned *) &(foo))
-
-/* STRINGIFY -- perform all possible substitutions, then stringify */
-#define	__STR(x)	#x		/* just a helper macro */
-#define	STRINGIFY(x)	__STR(x)
-
-/* 
- * Stack pointer must always be a multiple of 16
- */
-#define	STACK_INCR	16
-#define	ROUND_FRAME(x)	((((unsigned)(x)) + STACK_INCR - 1) & ~(STACK_INCR-1))
-
-#endif /* _ARCH_PPC_REG_HELP_H_ */
diff --git a/EXTERNAL_HEADERS/mach-o/arm/reloc.h b/EXTERNAL_HEADERS/mach-o/arm/reloc.h
deleted file mode 100644
index e2da8b80c..000000000
--- a/EXTERNAL_HEADERS/mach-o/arm/reloc.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*
- * Relocation types used in the arm implementation.  Relocation entries for
- * things other than instructions use the same generic relocation as discribed
- * in <mach-o/reloc.h> and their r_type is ARM_RELOC_VANILLA, one of the
- * *_SECTDIFF or the *_PB_LA_PTR types.  The rest of the relocation types are
- * for instructions.  Since they are for instructions the r_address field
- * indicates the 32 bit instruction that the relocation is to be preformed on.
- */
-enum reloc_type_arm
-{
-    ARM_RELOC_VANILLA,	/* generic relocation as discribed above */
-    ARM_RELOC_PAIR,	/* the second relocation entry of a pair */
-    ARM_RELOC_SECTDIFF,	/* a PAIR follows with subtract symbol value */
-    ARM_RELOC_LOCAL_SECTDIFF, /* like ARM_RELOC_SECTDIFF, but the symbol
-				 referenced was local.  */
-    ARM_RELOC_PB_LA_PTR,/* prebound lazy pointer */
-    ARM_RELOC_BR24,	/* 24 bit branch displacement (to a word address) */
-    ARM_THUMB_RELOC_BR22, /* 22 bit branch displacement (to a half-word
-			     address) */
-};
diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h
index b00ac7a67..9fecf2b4a 100644
--- a/EXTERNAL_HEADERS/mach-o/loader.h
+++ b/EXTERNAL_HEADERS/mach-o/loader.h
@@ -197,6 +197,12 @@ struct mach_header_64 {
 					   load the main executable at a
 					   random address.  Only used in
 					   MH_EXECUTE filetypes. */
+#define MH_NO_HEAP_EXECUTION 0x1000000	/* When this bit is set, the OS will
+					   run the main executable with
+					   a non-executable heap even on
+					   platforms (e.g. i386) that don't
+					   require it. Only used in MH_EXECUTE
+					   filetypes. */
 
 /*
  * The load commands directly follow the mach_header.  The total size of all
diff --git a/EXTERNAL_HEADERS/mach-o/ppc/reloc.h b/EXTERNAL_HEADERS/mach-o/ppc/reloc.h
deleted file mode 100644
index 7b564cc0a..000000000
--- a/EXTERNAL_HEADERS/mach-o/ppc/reloc.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*
- * Relocation types used in the ppc implementation.  Relocation entries for
- * things other than instructions use the same generic relocation as discribed
- * above and their r_type is RELOC_VANILLA.  The rest of the relocation types
- * are for instructions.  Since they are for instructions the r_address field
- * indicates the 32 bit instruction that the relocation is to be preformed on.
- * The fields r_pcrel and r_length are ignored for non-RELOC_VANILLA r_types
- * except for PPC_RELOC_BR14.
- *
- * For PPC_RELOC_BR14 if the r_length is the unused value 3, then the branch was
- * statically predicted setting or clearing the Y-bit based on the sign of the
- * displacement or the opcode.  If this is the case the static linker must flip
- * the value of the Y-bit if the sign of the displacement changes for non-branch
- * always conditions.
- */
-enum reloc_type_ppc
-{
-    PPC_RELOC_VANILLA,	/* generic relocation as discribed above */
-    PPC_RELOC_PAIR,	/* the second relocation entry of a pair */
-    PPC_RELOC_BR14,	/* 14 bit branch displacement (to a word address) */
-    PPC_RELOC_BR24,	/* 24 bit branch displacement (to a word address) */
-    PPC_RELOC_HI16,	/* a PAIR follows with the low half */
-    PPC_RELOC_LO16,	/* a PAIR follows with the high half */
-    PPC_RELOC_HA16,	/* Same as the RELOC_HI16 except the low 16 bits and the
-			 * high 16 bits are added together with the low 16 bits
-			 * sign extened first.  This means if bit 15 of the low
-			 * 16 bits is set the high 16 bits stored in the
-			 * instruction will be adjusted.
-			 */
-    PPC_RELOC_LO14,	/* Same as the LO16 except that the low 2 bits are not
-			 * stored in the instruction and are always zero.  This
-			 * is used in double word load/store instructions.
-			 */
-    PPC_RELOC_SECTDIFF,	/* a PAIR follows with subtract symbol value */
-    PPC_RELOC_PB_LA_PTR,/* prebound lazy pointer */
-    PPC_RELOC_HI16_SECTDIFF, /* section difference forms of above.  a PAIR */
-    PPC_RELOC_LO16_SECTDIFF, /* follows these with subtract symbol value */
-    PPC_RELOC_HA16_SECTDIFF,
-    PPC_RELOC_JBSR,
-    PPC_RELOC_LO14_SECTDIFF,
-    PPC_RELOC_LOCAL_SECTDIFF  /* like PPC_RELOC_SECTDIFF, but the symbol
-				 referenced was local.  */
-};
diff --git a/EXTERNAL_HEADERS/stdarg.h b/EXTERNAL_HEADERS/stdarg.h
index f178505e8..bbbaff93e 100644
--- a/EXTERNAL_HEADERS/stdarg.h
+++ b/EXTERNAL_HEADERS/stdarg.h
@@ -1,133 +1,47 @@
-/* Copyright (C) 1989, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-/* As a special exception, if you include this header file into source
-   files compiled by GCC, this header file does not by itself cause
-   the resulting executable to be covered by the GNU General Public
-   License.  This exception does not however invalidate any other
-   reasons why the executable file might be covered by the GNU General
-   Public License.  */
-
-/*
- * ISO C Standard:  7.15  Variable arguments  <stdarg.h>
+/*===---- stdarg.h - Variable argument handling ----------------------------===
+ *
+ * Copyright (c) 2008 Eli Friedman
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
  */
 
-#ifndef _STDARG_H
-#ifndef _ANSI_STDARG_H_
-#ifndef __need___va_list
-#define _STDARG_H
-#define _ANSI_STDARG_H_
-#endif /* not __need___va_list */
-#undef __need___va_list
-
-/* Define __gnuc_va_list.  */
+#ifndef __STDARG_H
+#define __STDARG_H
 
-#ifndef __GNUC_VA_LIST
-#define __GNUC_VA_LIST
-typedef __builtin_va_list __gnuc_va_list;
-#endif
+typedef __builtin_va_list va_list;
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap)          __builtin_va_end(ap)
+#define va_arg(ap, type)    __builtin_va_arg(ap, type)
 
-/* Define the standard macros for the user,
-   if this invocation was from the user program.  */
-#ifdef _STDARG_H
-
-#define va_start(v,l)	__builtin_va_start(v,l)
-#define va_end(v)	__builtin_va_end(v)
-#define va_arg(v,l)	__builtin_va_arg(v,l)
-#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L
-#define va_copy(d,s)	__builtin_va_copy(d,s)
-#endif
-#define __va_copy(d,s)	__builtin_va_copy(d,s)
-
-/* Define va_list, if desired, from __gnuc_va_list. */
-/* We deliberately do not define va_list when called from
-   stdio.h, because ANSI C says that stdio.h is not supposed to define
-   va_list.  stdio.h needs to have access to that data type, 
-   but must not use that name.  It should use the name __gnuc_va_list,
-   which is safe because it is reserved for the implementation.  */
-
-#ifdef _HIDDEN_VA_LIST  /* On OSF1, this means varargs.h is "half-loaded".  */
-#undef _VA_LIST
-#endif
-
-#ifdef _BSD_VA_LIST
-#undef _BSD_VA_LIST
-#endif
+/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode
+ * or -ansi is not specified, since it was not part of C90.
+ */
+#define __va_copy(d,s) __builtin_va_copy(d,s)
 
-#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
-/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
-   so we must avoid testing it and setting it here.
-   SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
-   have no conflict with that.  */
-#ifndef _VA_LIST_
-#define _VA_LIST_
-#ifdef __i860__
-#ifndef _VA_LIST
-#define _VA_LIST va_list
-#endif
-#endif /* __i860__ */
-typedef __gnuc_va_list va_list;
-#ifdef _SCO_DS
-#define __VA_LIST
+#if __STDC_VERSION__ >= 199900L || !defined(__STRICT_ANSI__)
+#define va_copy(dest, src)  __builtin_va_copy(dest, src)
 #endif
-#endif /* _VA_LIST_ */
-#else /* not __svr4__ || _SCO_DS */
 
-/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
-   But on BSD NET2 we must not test or define or undef it.
-   (Note that the comments in NET 2's ansi.h
-   are incorrect for _VA_LIST_--see stdio.h!)  */
-#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
-/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5  */
-#ifndef _VA_LIST_DEFINED
-/* The macro _VA_LIST is used in SCO Unix 3.2.  */
-#ifndef _VA_LIST
-/* The macro _VA_LIST_T_H is used in the Bull dpx2  */
-#ifndef _VA_LIST_T_H
-/* The macro __va_list__ is used by BeOS.  */
-#ifndef __va_list__
-typedef __gnuc_va_list va_list;
-#endif /* not __va_list__ */
-#endif /* not _VA_LIST_T_H */
-#endif /* not _VA_LIST */
-#endif /* not _VA_LIST_DEFINED */
-#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
-#define _VA_LIST_
-#endif
-#ifndef _VA_LIST
-#define _VA_LIST
-#endif
-#ifndef _VA_LIST_DEFINED
-#define _VA_LIST_DEFINED
-#endif
-#ifndef _VA_LIST_T_H
-#define _VA_LIST_T_H
-#endif
-#ifndef __va_list__
-#define __va_list__
-#endif
-
-#endif /* not _VA_LIST_, except on certain systems */
-
-#endif /* not __svr4__ */
-
-#endif /* _STDARG_H */
+/* Hack required to make standard headers work, at least on Ubuntu */
+#define __GNUC_VA_LIST 1
+typedef __builtin_va_list __gnuc_va_list;
 
-#endif /* not _ANSI_STDARG_H_ */
-#endif /* not _STDARG_H */
+#endif /* __STDARG_H */
diff --git a/Makefile b/Makefile
index 57c8a4c88..acd493419 100644
--- a/Makefile
+++ b/Makefile
@@ -32,40 +32,24 @@ ALL_SUBDIRS = \
 	libsa \
 	security
 
-CONFIG_SUBDIRS_PPC = config
-
 CONFIG_SUBDIRS_I386 = config
-
 CONFIG_SUBDIRS_X86_64 = config
-
 CONFIG_SUBDIRS_ARM = config
 
 INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS
-
-INSTINC_SUBDIRS_PPC = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS
-
-INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS
-
-INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS
-
-INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS
+INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS)
+INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS)
+INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS)
 
 EXPINC_SUBDIRS = $(ALL_SUBDIRS)
-
-EXPINC_SUBDIRS_PPC =  $(EXPINC_SUBDIRS)
-
 EXPINC_SUBDIRS_I386 = $(EXPINC_SUBDIRS)
-
 EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS)
-
 EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS)
 
-COMP_SUBDIRS_PPC = $(ALL_SUBDIRS)
+SETUP_SUBDIRS = SETUP
 
 COMP_SUBDIRS_I386 = $(ALL_SUBDIRS)
-
 COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS)
-
 COMP_SUBDIRS_ARM = $(ALL_SUBDIRS)
 
 INST_SUBDIRS =	\
@@ -77,14 +61,18 @@ INST_SUBDIRS =	\
 	config	\
 	security
 
-INSTALL_FILE_LIST= \
-	mach_kernel
+INSTALL_KERNEL_FILE = mach_kernel
+
+INSTALL_KERNEL_DIR = /
 
-INSTALL_FILE_DIR= \
-	/
 
 INSTMAN_SUBDIRS = \
 	bsd
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
+
+# This target is defined to compile and run xnu_quick_test under testbots
+testbots:
+	/usr/bin/make MORECFLAGS="-D RUN_UNDER_TESTBOTS=1" testbots -C ./tools/tests/xnu_quick_test/
+
diff --git a/README b/README
index 2040c2cee..b9e102527 100644
--- a/README
+++ b/README
@@ -15,32 +15,17 @@ A. How to build XNU:
 
   By default, architecture defaults to the build machine 
   architecture, and the kernel configuration is set to build for DEVELOPMENT.
-  The machine configuration defaults to S5L8900X for arm and default for i386 and ppc.
   
   This will also create a bootable image, mach_kernel,  and a kernel binary 
   with symbols, mach_kernel.sys.
-	
-  Examples:
-	/* make a debug kernel for H1 arm board */
-	make TARGET_CONFIGS="debug arm s5l8900x" SDKROOT=/path/to/SDK
-	
-    $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
 
-	/* make debug and development kernels for H1 arm board */
-	make TARGET_CONFIGS="debug arm s5l8900x  development arm s5l8900x" SDKROOT=/path/to/SDK
-	
-    $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
-    $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/mach_kernel: bootable image
 
-	/* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration  */
-	make TARGET_CONFIGS="default arm default" SDKROOT=/path/to/SDK
+	/* this is all you need to do to build with RELEASE kernel configuration  */
+	make TARGET_CONFIGS="release x86_64 default" SDKROOT=/path/to/SDK
 	
 	or the following is equivalent (ommitted SDKROOT will use /)
 	
-	make ARCH_CONFIGS=ARM
+	make ARCH_CONFIGS=X86_64
 
 2) Building a Component
 
@@ -64,7 +49,7 @@ A. How to build XNU:
   and KERNEL_CONFIGS).
   
   Example:
-    $(OBJROOT)/RELEASE_PPC/osfmk/RELEASE/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/RELEASE_X86_64/osfmk/RELEASE/osfmk.filelist: list of objects in osfmk component
 
   From the component top directory:
 
@@ -81,36 +66,36 @@ A. How to build XNU:
   Define kernel configuration to DEBUG in your environment or when running a 
   make command.  Then, apply procedures 4, 5
 
-  $ make TARGET_CONFIGS="DEBUG PPC DEFAULT" all
+  $ make TARGET_CONFIGS="DEBUG X86_64 DEFAULT" all
 
   or
 
-  $ make KERNEL_CONFIGS=DEBUG all
+  $ make KERNEL_CONFIGS=DEBUG ARCH_CONFIGS=X86_64 all
 
   or
 
-  $ export TARGET_CONFIGS="DEBUG ARM MX31ADS"
+  $ export TARGET_CONFIGS="DEBUG X86_64 DEFAULT"
   $ export SDKROOT=/path/to/SDK
   $ make all
 
   Example:
-    $(OBJROOT)/DEBUG_PPC/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_PPC/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_X86_64/osfmk/DEBUG/osfmk.filelist: list of objects in osfmk component
+    $(OBJROOT)/DEBUG_X86_64/mach_kernel: bootable image
 
 4) Building fat
 
   Define architectures in your environment or when running a make command.
   Apply procedures 3, 4, 5
 
-  $ make TARGET_CONFIGS="RELEASE PPC default RELEASE I386 default" exporthdrs all
+  $ make TARGET_CONFIGS="RELEASE I386 DEFAULT RELEASE X86_64 DEFAULT" exporthdrs all
 
   or
 
-  $ make ARCH_CONFIGS="PPC I386" exporthdrs all
+  $ make ARCH_CONFIGS="I386 X86_64" exporthdrs all
 
   or
 
-  $ export ARCH_CONFIGS="PPC I386"
+  $ export ARCH_CONFIGS="I386 X86_64"
   $ make exporthdrs all
 
 5) Verbose make 
@@ -127,16 +112,28 @@ A. How to build XNU:
 
   From the top directory, run:
 
-    $ ~rc/bin/buildit . -arch ppc -arch i386 -noinstallsrc -nosum
-	
-	or for multiple arm builds
-	
-    $ ~rc/bin/buildit . -noinstallsrc -nosum -- TARGET_CONFIGS="release arm MX31ADS release arm LN2410SBC"
-	
-	or for default arm build (kernel config DEVELOPMENT and machine config MX31ADS)
-	
-    $ ~rc/bin/buildit . -arch arm -noinstallsrc -nosum -- TARGET_CONFIGS="release arm MX31ADS release arm LN2410SBC"
+    $ ~rc/bin/buildit . -arch i386 -arch x86_64 -arch armv7 -arch ppc -noinstallsrc -nosum
+
 	
+  xnu supports a number of XBS build aliases, which allow B&I to build
+  the same source submission multiple times in different ways, to
+  produce different results. Each build alias supports the standard
+  "clean", "install", "installsrc", "installhdrs" targets, but
+  conditionalize their behavior on the RC_ProjectName make variable
+  which is passed as the -project argument to ~rc/bin/buildit, which
+  can be one of:
+
+  -project xnu          # the default, builds /mach_kernel, kernel-space
+                        # headers, user-space headers, man pages,
+                        # symbol-set kexts
+
+  -project xnu_debug    # a DEBUG kernel in /AppleInternal with dSYM
+
+  -project libkxld      # user-space version of kernel linker
+
+  -project Libsyscall   # automatically generate BSD syscall stubs
+
+
 
 8) Creating tags and cscope
 
@@ -157,6 +154,8 @@ A. How to build XNU:
 
    $ make -w              # trace recursive make invocations. Useful in combination with VERBOSE=YES
 
+   $ make BUILD_LTO=1	  # built with LLVM Link Time Optimization (experimental)
+
 =============================================
 B. How to install a new header file from XNU
 
diff --git a/osfmk/profiling/ppc/Makefile b/SETUP/Makefile
similarity index 66%
rename from osfmk/profiling/ppc/Makefile
rename to SETUP/Makefile
index ebea6420f..7a0e5c5b4 100644
--- a/osfmk/profiling/ppc/Makefile
+++ b/SETUP/Makefile
@@ -7,19 +7,10 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-
-DATAFILES = \
-	profile-md.h
-
-INSTALL_MD_LIST = ${DATAFILES}
-
-INSTALL_MD_DIR = profile/ppc
-
-EXPORT_MD_LIST = ${DATAFILES}
-
-EXPORT_MD_DIR = profile/ppc
+SETUP_SUBDIRS = 	\
+	config		\
+	kextsymboltool	\
+	setsegname
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
-
-
diff --git a/SETUP/config/Makefile b/SETUP/config/Makefile
new file mode 100644
index 000000000..8889afef3
--- /dev/null
+++ b/SETUP/config/Makefile
@@ -0,0 +1,42 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = externs.o main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o	\
+	mkswapconf.o openp.o searchp.o lexer.yy.o parser.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT)
+
+config: $(OBJS)
+	$(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+	@echo HOST_LD $@
+	$(_v)$(HOST_CODESIGN) -s - $@
+	@echo HOST_CODESIGN $@
+
+.c.o:
+	$(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+	@echo HOST_CC $@
+
+parser.c: parser.y
+	$(_v)$(HOST_BISON) -y -d -d -o $@ $<
+	@echo HOST_BISON $@
+
+lexer.yy.c: lexer.l
+	$(_v)$(HOST_FLEX) --header-file=lexer.yy.h -o $@ $<
+	@echo HOST_FLEX $@
+
+main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o lexer.yy.c: parser.c
+
+do_build_setup: config
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/config/config.h b/SETUP/config/config.h
new file mode 100644
index 000000000..54219e1db
--- /dev/null
+++ b/SETUP/config/config.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *	@(#)config.h	5.8 (Berkeley) 6/18/88
+ */
+
+/*
+ * Config.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct file_list {
+	struct	file_list *f_next;	
+	char	*f_fn;			/* the name */
+	u_char	f_type;			/* see below */
+	u_char	f_flags;		/* see below */
+	short	f_special;		/* requires special make rule */
+	char	*f_needs;
+	char	*f_extra;		/* stuff to add to make line */
+	/*
+	 * Random values:
+	 *	swap space parameters for swap areas
+	 *	root device, etc. for system specifications
+	 */
+	union {
+		struct {		/* when swap specification */
+			dev_t	fuw_swapdev;
+			int	fuw_swapsize;
+		} fuw;
+		struct {		/* when system specification */
+			dev_t	fus_rootdev;
+			dev_t	fus_argdev;
+			dev_t	fus_dumpdev;
+		} fus;
+	} fun;
+#define	f_swapdev	fun.fuw.fuw_swapdev
+#define	f_swapsize	fun.fuw.fuw_swapsize
+#define	f_rootdev	fun.fus.fus_rootdev
+#define	f_argdev	fun.fus.fus_argdev
+#define	f_dumpdev	fun.fus.fus_dumpdev
+};
+
+/*
+ * Types.
+ */
+#define DRIVER		1
+#define NORMAL		2
+#define	INVISIBLE	3
+#define	PROFILING	4
+#define	SYSTEMSPEC	5
+#define	SWAPSPEC	6
+
+/*
+ * Attributes (flags).
+ */
+#define	CONFIGDEP	0x01	/* obsolete? */
+#define	OPTIONSDEF	0x02	/* options definition entry */
+#define ORDERED		0x04	/* don't list in OBJ's, keep "files" order */
+#define SEDIT		0x08	/* run sed filter (SQT) */
+
+/*
+ * Maximum number of fields for variable device fields (SQT).
+ */
+#define	NFIELDS		10
+
+struct	idlst {
+	char	*id;
+	struct	idlst *id_next;
+	int	id_vec;		/* Sun interrupt vector number */
+};
+
+struct device {
+	int	d_type;			/* CONTROLLER, DEVICE, bus adaptor */
+	struct	device *d_conn;		/* what it is connected to */
+	const char	*d_name;	/* name of device (e.g. rk11) */
+	struct	idlst *d_vec;		/* interrupt vectors */
+	int	d_pri;			/* interrupt priority */
+	int	d_addr;			/* address of csr */
+	int	d_unit;			/* unit number */
+	int	d_drive;		/* drive number */
+	int	d_slave;		/* slave number */
+#define QUES	-1	/* -1 means '?' */
+#define	UNKNOWN -2	/* -2 means not set yet */
+	int	d_dk;			/* if init 1 set to number for iostat */
+	int	d_flags;		/* nlags for device init */
+	struct	device *d_next;		/* Next one in list */
+        u_short d_mach;                 /* Sun - machine type (0 = all)*/
+        u_short d_bus;                  /* Sun - bus type (0 = unknown) */
+	u_long	d_fields[NFIELDS];	/* fields values (SQT) */
+	int	d_bin;			/* interrupt bin (SQT) */
+	int	d_addrmod;		/* address modifier (MIPS) */
+	char	*d_init;		/* pseudo device init routine name */
+};
+#define TO_NEXUS	(struct device *)-1
+#define TO_SLOT		(struct device *)-1
+
+struct config {
+	char	*c_dev;
+	char	*s_sysname;
+};
+
+/*
+ * Config has a global notion of which machine type is
+ * being used.  It uses the name of the machine in choosing
+ * files and directories.  Thus if the name of the machine is ``vax'',
+ * it will build from ``Makefile.vax'' and use ``../vax/inline''
+ * in the makerules, etc.
+ */
+extern int	machine;
+extern const char	*machinename;
+#define	MACHINE_VAX	1
+#define	MACHINE_SUN	2
+#define	MACHINE_ROMP	3
+#define	MACHINE_SUN2	4
+#define	MACHINE_SUN3	5
+#define	MACHINE_MMAX	6
+#define	MACHINE_SQT	7
+#define MACHINE_SUN4	8
+#define	MACHINE_I386	9
+#define	MACHINE_IX	10
+#define MACHINE_MIPSY	11
+#define	MACHINE_MIPS	12
+#define	MACHINE_I860	13
+#define	MACHINE_M68K	14
+#define	MACHINE_M88K	15
+#define	MACHINE_M98K	16
+#define MACHINE_HPPA	17
+#define MACHINE_SPARC	18
+#define MACHINE_PPC	19
+#define MACHINE_ARM	20
+#define MACHINE_X86_64	21
+
+/*
+ * For each machine, a set of CPU's may be specified as supported.
+ * These and the options (below) are put in the C flags in the makefile.
+ */
+struct cputype {
+	char	*cpu_name;
+	struct	cputype *cpu_next;
+};
+
+extern struct cputype  *cputype;
+
+/*
+ * In order to configure and build outside the kernel source tree,
+ * we may wish to specify where the source tree lives.
+ */
+extern const char *source_directory;
+extern const char *object_directory;
+extern char *config_directory;
+
+FILE *fopenp(const char *fpath, char *file, char *complete, const char *ftype);
+const char *get_VPATH(void);
+#define VPATH	get_VPATH()
+
+/*
+ * A set of options may also be specified which are like CPU types,
+ * but which may also specify values for the options.
+ * A separate set of options may be defined for make-style options.
+ */
+struct opt {
+	char	*op_name;
+	char	*op_value;
+	struct	opt *op_next;
+};
+
+extern struct opt *opt, *mkopt, *opt_tail, *mkopt_tail;
+
+extern char	*ident;
+const char	*get_word(FILE *fp);
+char	*ns(const char *str);
+char	*qu(int num);
+char	*path(const char *file);
+
+extern int	do_trace;
+
+#if	MACHINE_VAX
+extern int	seen_mba, seen_uba;
+#endif
+
+extern int	seen_vme, seen_mbii;
+
+extern struct	device *dtab;
+dev_t	nametodev(char *name, int defunit, char defpartition);
+char	*devtoname(dev_t dev);
+
+extern char	errbuf[80];
+extern int	yyline;
+
+extern struct	file_list *ftab, *conf_list, **confp;
+extern char	*build_directory;
+
+extern int	profiling;
+
+extern int	maxusers;
+
+#define eq(a,b)	(!strcmp(a,b))
+
+#ifdef	mips
+#define DEV_MASK 0xf
+#define	DEV_SHIFT  4
+#else	mips
+#define DEV_MASK 0x7
+#define	DEV_SHIFT  3
+#endif	mips
+
+/* External function references */
+char *get_rest(FILE *fp);
+
+int yyparse(void);
+void yyerror(const char *s);
+
+void vax_ioconf(void);
+void sun_ioconf(void);
+void romp_ioconf(void);
+void mmax_ioconf(void);
+void sqt_ioconf(void);
+void i386_ioconf(void);
+void mips_ioconf(void);
+void m68k_ioconf(void);
+void m88k_ioconf(void);
+void m98k_ioconf(void);
+void hppa_ioconf(void);
+void sparc_ioconf(void);
+void ppc_ioconf(void);
+void arm_ioconf(void);
+void x86_64_ioconf(void);
+
+void swapconf(void);
+
+void ubglue(void);
+void mbglue(void);
+
+void makefile(void);
+void headers(void);
+int opteq(const char *cp, const char *dp);
+
+void init_dev(struct device *dp);
+void newdev(struct device *dp);
+void dev_param(struct device *dp, const char *str, long num);
+
+int searchp(const char *spath, char *file, char *fullname, int (*func)(char *));
diff --git a/bsd/conf/tools/doconf/doconf.csh b/SETUP/config/doconf
similarity index 94%
rename from bsd/conf/tools/doconf/doconf.csh
rename to SETUP/config/doconf
index 6fedb4786..2d4e952e9 100755
--- a/bsd/conf/tools/doconf/doconf.csh
+++ b/SETUP/config/doconf
@@ -69,17 +69,14 @@ set prog=$0
 set prog=$prog:t
 set nonomatch
 set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
+set CONFIG_DIR=$OBJROOT/SETUP/config
 
 unset domake
 unset doconfig
 unset beverbose
 unset MACHINE
 unset profile
+unset SOC_CONFIG
 
 while ($#argv >= 1)
     if ("$argv[1]" =~ -*) then
@@ -100,6 +97,14 @@ while ($#argv >= 1)
 	    set MACHINE="$argv[2]"
 	    shift
 	    breaksw
+	case "-soc":
+	    if ($#argv < 2) then
+		echo "${prog}: missing argument to ${argv[1]}"
+		exit 1
+	    endif
+	    set SOC_CONFIG="$argv[2]"
+	    shift
+	    breaksw
 	case "-d":
 	    if ($#argv < 2) then
 		echo "${prog}: missing argument to ${argv[1]}"
@@ -168,11 +173,15 @@ set FEATURES_H=(cs_*.h mach_*.h net_*.h\
 set MASTER_DIR=../conf
 set MASTER =   ${MASTER_DIR}/MASTER
 set MASTER_CPU=${MASTER}.${cpu}
+set MASTER_CPU_PER_SOC=${MASTER}.${cpu}.${SOC_CONFIG}
+if (-f $MASTER_CPU_PER_SOC) set MASTER_CPU = ${MASTER_CPU_PER_SOC}
 
 set MASTER_LOCAL = ${MASTER}.local
 set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
+set MASTER_CPU_PER_SOC_LOCAL = ${MASTER_CPU_PER_SOC}.local
 if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
 if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
+if (-f $MASTER_CPU_PER_SOC_LOCAL) set MASTER_CPU_LOCAL = ${MASTER_CPU_PER_SOC_LOCAL}
 
 if (! -d $OBJDIR) then
     if ($?beverbose) then
diff --git a/SETUP/config/externs.c b/SETUP/config/externs.c
new file mode 100644
index 000000000..d1bdd8942
--- /dev/null
+++ b/SETUP/config/externs.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* Copyright (c) Apple Computer, Inc. All rights reserved. */
+
+#include <config.h>
+
+
+/*
+ * Config has a global notion of which machine type is
+ * being used.  It uses the name of the machine in choosing
+ * files and directories.  Thus if the name of the machine is ``vax'',
+ * it will build from ``Makefile.vax'' and use ``../vax/inline''
+ * in the makerules, etc.
+ */
+int	machine;
+const char	*machinename;
+
+/*
+ * For each machine, a set of CPU's may be specified as supported.
+ * These and the options (below) are put in the C flags in the makefile.
+ */
+
+struct cputype  *cputype;
+
+/*
+ * In order to configure and build outside the kernel source tree,
+ * we may wish to specify where the source tree lives.
+ */
+const char *source_directory;
+const char *object_directory;
+char *config_directory;
+
+/*
+ * A set of options may also be specified which are like CPU types,
+ * but which may also specify values for the options.
+ * A separate set of options may be defined for make-style options.
+ */
+struct opt *opt, *mkopt, *opt_tail, *mkopt_tail;
+
+char	*ident;
+
+int	do_trace;
+
+#if	MACHINE_VAX
+int	seen_mba, seen_uba;
+#endif
+
+int	seen_vme, seen_mbii;
+
+struct	device *dtab;
+
+char	errbuf[80];
+int	yyline;
+
+struct	file_list *ftab, *conf_list, **confp;
+char	*build_directory;
+
+int	profiling = 0;
+
+int	maxusers;
+
diff --git a/SETUP/config/lexer.l b/SETUP/config/lexer.l
new file mode 100644
index 000000000..c5502b4ba
--- /dev/null
+++ b/SETUP/config/lexer.l
@@ -0,0 +1,214 @@
+%{
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *	@(#)config.l	5.5 (Berkeley) 6/18/88
+ */
+
+#include <ctype.h>
+#include "parser.h"
+#include "config.h"
+
+int	kw_lookup(char *word);
+int	octal(char *str);
+int	hex(char *str);
+int	yylex(void);
+
+#define tprintf if (do_trace) printf
+
+/*
+ * Key word table
+ */
+
+struct kt {
+	const char *kt_name;
+	int kt_val;
+} key_words[] = {
+	{ "and",	AND },
+	{ "args",	ARGS },
+	{ "at",		AT },
+	{ "builddir",	BUILDDIR },
+	{ "config",	CONFIG },
+	{ "configdir",	CONFIGDIR },
+	{ "controller",	CONTROLLER },
+	{ "cpu",	CPU },
+	{ "csr",	CSR },
+	{ "device",	DEVICE },
+	{ "disk",	DISK },
+	{ "drive",	DRIVE },
+	{ "dumps",	DUMPS },
+	{ "flags",	FLAGS },
+	{ "hz",		HZ },
+	{ "ident",	IDENT },
+	{ "init",	INIT },
+	{ "machine",	MACHINE },
+	{ "major",	MAJOR },
+	{ "makeoptions", MAKEOPTIONS },
+	{ "makevariables", MAKEOPTIONS },
+	{ "master",	MASTER },
+	{ "maxusers",	MAXUSERS },
+	{ "mba",	MBA },
+	{ "minor",	MINOR },
+	{ "nexus",	NEXUS },
+	{ "objectdir",	OBJECTDIR },
+	{ "on",		ON },
+	{ "options",	OPTIONS },
+	{ "priority",	PRIORITY },
+	{ "profile",	PROFILE },
+	{ "pseudo-device",PSEUDO_DEVICE },
+	{ "root",	ROOT },
+	{ "size",	SIZE },
+	{ "slave",	SLAVE },
+	{ "sourcedir",	SOURCEDIR },
+	{ "swap",	SWAP },
+	{ "tape",	DEVICE },
+	{ "trace",	TRACE },
+	{ "uba",	UBA },
+	{ "vector",	VECTOR },
+	{ "lun",	LUN },			/* MMAX only */
+	{ "slot",	SLOT },			/* MMAX only */
+	{ "tape",	TAPE },			/* MMAX only */
+	{ "bin",	BIN },			/* SQT ONLY */
+	{ "am",		ADDRMOD },		/* MIPS */
+	{ "mbii",	MBII },			/* MIPS */
+ 	{ "vme",	VME },			/* MIPS */
+	{ 0, 0 },
+};
+%}
+
+%option nounput
+
+WORD	([A-Za-z_][-A-Za-z_]*|[A-Z][-A-Za-z_0-9]*)
+WORD1	([A-Za-z_][-A-Za-z_0-9]*)
+%%
+{WORD} |
+{WORD1}		{
+			int i;
+
+			if ((i = kw_lookup(yytext)) == -1)
+			{
+				yylval.str = yytext;
+				tprintf("id(%s) ", yytext);
+				return ID;
+			}
+			tprintf("(%s) ", yytext);
+			return i;
+		}
+\"[^"]+\"	{
+			yytext[strlen(yytext)-1] = '\0';
+			yylval.str = yytext + 1;
+			return ID;
+		}
+0[0-7]*		{
+			yylval.val = octal(yytext);
+			tprintf("#O:%o ", yylval.val);
+			return NUMBER;
+		}
+0x[0-9a-fA-F]+	{
+			yylval.val = hex(yytext);
+			tprintf("#X:%x ", yylval.val);
+			return NUMBER;
+		}
+[1-9][0-9]*	{
+			yylval.val = atoi(yytext);
+			tprintf("#D:%d ", yylval.val);
+			return NUMBER;
+		}
+[0-9]"."[0-9]*	{
+			yylval.val = (int) (60 * atof(yytext) + 0.5);
+			return FPNUMBER;
+		}
+"-"		{
+			return MINUS;
+		}
+"?"		{
+			yylval.val = -1;
+			tprintf("? ");
+			return NUMBER;
+		}
+\n/[ \t]	{
+			yyline++;
+			tprintf("\n... ");
+		}
+\n		{
+			yyline++;
+			tprintf("\n");
+			return SEMICOLON;
+		}
+#.*		{	/* Ignored (comment) */;	}
+[ \t]*		{	/* Ignored (white space) */;	}
+";"		{	return SEMICOLON;		}
+","		{	return COMMA;			}
+"="		{	return EQUALS;			}
+"@"		{	return AT;			}
+.		{	return yytext[0];		}
+
+
+%%
+/*
+ * kw_lookup
+ *	Look up a string in the keyword table.  Returns a -1 if the
+ *	string is not a keyword otherwise it returns the keyword number
+ */
+
+int
+kw_lookup(char *word)
+{
+	register struct kt *kp;
+
+	for (kp = key_words; kp->kt_name != 0; kp++)
+		if (eq(word, kp->kt_name))
+			return kp->kt_val;
+	return -1;
+}
+
+/*
+ * Number conversion routines
+ */
+
+int
+octal(char *str)
+{
+	int num;
+
+	(void) sscanf(str, "%o", &num);
+	return num;
+}
+
+int
+hex(char *str)
+{
+	int num;
+
+	(void) sscanf(str+2, "%x", &num);
+	return num;
+}
+
+int
+yywrap()
+{
+	return 1;
+}
diff --git a/SETUP/config/main.c b/SETUP/config/main.c
new file mode 100644
index 000000000..024b17be8
--- /dev/null
+++ b/SETUP/config/main.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef lint
+char copyright[] =
+"@(#) Copyright (c) 1980 Regents of the University of California.\n\
+ All rights reserved.\n";
+#endif /* not lint */
+
+#ifndef lint
+static char sccsid[] __attribute__((used)) = "@(#)main.c	5.9 (Berkeley) 6/18/88";
+#endif /* not lint */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "parser.h"
+#include "config.h"
+
+/*
+ * Config builds a set of files for building a UNIX
+ * system given a description of the desired system.
+ */
+int
+main(int argc, char *argv[])
+{
+
+	source_directory = "..";	/* default */
+	object_directory = "..";
+	config_directory = (char *) 0;
+	while ((argc > 1) && (argv[1][0] == '-')) {
+		char		*c;
+
+		argv++; argc--;
+		for (c = &argv[0][1]; *c ; c++) {
+			switch (*c) {
+				case 'b':
+					build_directory = argv[1];
+					goto check_arg;
+
+				case 'd':
+					source_directory = argv[1];
+					goto check_arg;
+
+				case 'o':
+					object_directory = argv[1];
+					goto check_arg;
+
+				case 'c':
+					config_directory = argv[1];
+
+				 check_arg:
+				 	if (argv[1] == (char *) 0)
+						goto usage_error;
+					argv++; argc--;
+					break;
+
+				case 'p':
+					profiling++;
+					break;
+				default:
+					goto usage_error;
+			}
+		}
+	}
+	if (config_directory == (char *) 0) {
+		config_directory =
+			malloc((unsigned) strlen(source_directory) + 6);
+		(void) sprintf(config_directory, "%s/conf", source_directory);
+	}
+	if (argc != 2) {
+		usage_error: ;
+		fprintf(stderr, "usage: config [ -bcdo dir ] [ -p ] sysname\n");
+		exit(1);
+	}
+	if (!build_directory)
+		build_directory = argv[1];
+	if (freopen(argv[1], "r", stdin) == NULL) {
+		perror(argv[1]);
+		exit(2);
+	}
+	dtab = NULL;
+	confp = &conf_list;
+	opt = 0;
+	if (yyparse())
+		exit(3);
+	switch (machine) {
+
+	case MACHINE_VAX:
+		vax_ioconf();		/* Print ioconf.c */
+		ubglue();		/* Create ubglue.s */
+		break;
+
+	case MACHINE_SUN:
+		sun_ioconf();
+		break;
+
+	case MACHINE_SUN2:
+	case MACHINE_SUN3:
+	case MACHINE_SUN4:
+		sun_ioconf();           /* Print ioconf.c */
+		mbglue();               /* Create mbglue.s */
+		break;
+
+	case MACHINE_ROMP:
+		romp_ioconf();
+		break;
+
+	case MACHINE_MMAX:
+		mmax_ioconf();
+		break;
+
+	case MACHINE_SQT:
+		sqt_ioconf();
+		break;
+
+	case MACHINE_I386:
+	case MACHINE_IX:
+		i386_ioconf();
+		break;
+
+	case MACHINE_MIPSY:
+	case MACHINE_MIPS:
+		mips_ioconf();
+		break;
+
+	case MACHINE_I860:
+		/* i860_ioconf(); */
+		break;
+
+	case MACHINE_M68K:
+		m68k_ioconf();
+  		break;
+
+	case MACHINE_M88K:
+		m88k_ioconf();
+  		break;
+
+	case MACHINE_M98K:
+		m98k_ioconf();
+  		break;
+
+	case MACHINE_HPPA:
+		hppa_ioconf();
+		break;
+
+	case MACHINE_SPARC:
+		sparc_ioconf();
+		break;
+
+	case MACHINE_PPC:
+		ppc_ioconf();
+		break;
+
+	case MACHINE_ARM:
+		arm_ioconf();
+		break;
+
+	case MACHINE_X86_64:
+		x86_64_ioconf();
+		break;
+
+	default:
+		printf("Specify machine type, e.g. ``machine vax''\n");
+		exit(1);
+	}
+
+	makefile();			/* build Makefile */
+	headers();			/* make a lot of .h files */
+	swapconf();			/* swap config files */
+
+	return 0;
+}
+
+/*
+ * get_word
+ *	returns EOF on end of file
+ *	NULL on end of line
+ *	pointer to the word otherwise
+ */
+const char *
+get_word(FILE *fp)
+{
+	static char line[80];
+	register int ch;
+	register char *cp;
+
+	while ((ch = getc(fp)) != EOF)
+		if (ch != ' ' && ch != '\t')
+			break;
+	if (ch == EOF)
+		return ((char *)EOF);
+	if (ch == '\n')
+		return (NULL);
+	if (ch == '|')
+		return( "|");
+	cp = line;
+	*cp++ = ch;
+	while ((ch = getc(fp)) != EOF) {
+		if (isspace(ch))
+			break;
+		*cp++ = ch;
+	}
+	*cp = 0;
+	if (ch == EOF)
+		return ((char *)EOF);
+	(void) ungetc(ch, fp);
+	return (line);
+}
+
+/*
+ * get_rest
+ *	returns EOF on end of file
+ *	NULL on end of line
+ *	pointer to the word otherwise
+ */
+char *
+get_rest(FILE *fp)
+{
+	static char line[80];
+	register int ch;
+	register char *cp;
+
+	cp = line;
+	while ((ch = getc(fp)) != EOF) {
+		if (ch == '\n')
+			break;
+		*cp++ = ch;
+	}
+	*cp = 0;
+	if (ch == EOF)
+		return ((char *)EOF);
+	return (line);
+}
+
+/*
+ * prepend the path to a filename
+ */
+char *
+path(const char *file)
+{
+	register char *cp;
+
+	cp = malloc((unsigned)(strlen(build_directory)+
+			       strlen(file)+
+			       strlen(object_directory)+
+			       3));
+	(void) sprintf(cp, "%s/%s/%s", object_directory, build_directory, file);
+	return (cp);
+}
diff --git a/SETUP/config/mkglue.c b/SETUP/config/mkglue.c
new file mode 100644
index 000000000..9d4b5ac6f
--- /dev/null
+++ b/SETUP/config/mkglue.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#ifndef lint
+static char sccsid[] __attribute__((used)) = "@(#)mkglue.c	5.6 (Berkeley) 6/18/88";
+#endif /* not lint */
+
+/*
+ * Make the bus adaptor interrupt glue files.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+#include "parser.h"
+#include <ctype.h>
+
+void dump_mb_handler(FILE *fp, struct idlst *vec, int number);
+void dump_ubavec(FILE *fp, char *vector, int number);
+void dump_std(FILE *fp, FILE *gp);
+void dump_intname(FILE *fp, char *vector, int number);
+void dump_ctrs(FILE *fp);
+void glue(FILE *fp, void (*dump_handler)(FILE *, struct idlst *, int));
+
+/*
+ * Create the UNIBUS interrupt vector glue file.
+ */
+void
+ubglue(void)
+{
+	register FILE *fp, *gp;
+	register struct device *dp, *mp;
+
+	fp = fopen(path("ubglue.s"), "w");
+	if (fp == 0) {
+		perror(path("ubglue.s"));
+		exit(1);
+	}
+	gp = fopen(path("ubvec.s"), "w");
+	if (gp == 0) {
+		perror(path("ubvec.s"));
+		exit(1);
+	}
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (mp != 0 && mp != (struct device *)-1 &&
+		    !eq(mp->d_name, "mba")) {
+			struct idlst *id, *id2;
+
+			for (id = dp->d_vec; id; id = id->id_next) {
+				for (id2 = dp->d_vec; id2; id2 = id2->id_next) {
+					if (id2 == id) {
+						dump_ubavec(fp, id->id,
+						    dp->d_unit);
+						break;
+					}
+					if (!strcmp(id->id, id2->id))
+						break;
+				}
+			}
+		}
+	}
+	dump_std(fp, gp);
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (mp != 0 && mp != (struct device *)-1 &&
+		    !eq(mp->d_name, "mba")) {
+			struct idlst *id, *id2;
+
+			for (id = dp->d_vec; id; id = id->id_next) {
+				for (id2 = dp->d_vec; id2; id2 = id2->id_next) {
+					if (id2 == id) {
+						dump_intname(fp, id->id,
+							dp->d_unit);
+						break;
+					}
+					if (!strcmp(id->id, id2->id))
+						break;
+				}
+			}
+		}
+	}
+	dump_ctrs(fp);
+	(void) fclose(fp);
+	(void) fclose(gp);
+}
+
+static int cntcnt = 0;		/* number of interrupt counters allocated */
+
+/*
+ * Print a UNIBUS interrupt vector.
+ */
+void
+dump_ubavec(FILE *fp, char *vector, int number)
+{
+	char nbuf[80];
+	register char *v = nbuf;
+
+	switch (machine) {
+
+	case MACHINE_VAX:
+		(void) sprintf(v, "%s%d", vector, number);
+		fprintf(fp, "\t.globl\t_X%s\n\t.align\t2\n_X%s:\n",
+		    v, v);
+		fprintf(fp,"\tTIM_PUSHR(0)\n");
+		fprintf(fp, "\tincl\t_fltintrcnt+(4*%d)\n", cntcnt++);
+		if (strncmp(vector, "dzx", 3) == 0)
+			fprintf(fp, "\tmovl\t$%d,r0\n\tjmp\tdzdma\n\n", number);
+		else {
+			if (strncmp(vector, "uur", 3) == 0) {
+				fprintf(fp, "#ifdef UUDMA\n");
+				fprintf(fp, "\tmovl\t$%d,r0\n\tjsb\tuudma\n",
+					    number);
+				fprintf(fp, "#endif\n");
+			}
+			fprintf(fp, "\tpushl\t$%d\n", number);
+			fprintf(fp, "\tcalls\t$1,_%s\n",vector);
+			fprintf(fp, "\tCOUNT(V_INTR)\n");
+			fprintf(fp, "\tTSREI_POPR\n");
+		}
+		break;
+
+	case MACHINE_MIPSY:
+	case MACHINE_MIPS:
+		/*
+		 * Actually, we should never get here!
+		 * Main does not even call ubglue.
+		 */
+		if (strncmp(vector, "dzx", 3) == 0)
+			fprintf(fp, "\tDZINTR(%s,%d)\n", vector, number);
+		else
+			fprintf(fp, "\tDEVINTR(%s,%d)\n", vector, number);
+		break;
+	}
+
+}
+
+static	const char *vaxinames[] = {
+	"clock", "cnr", "cnx", "tur", "tux",
+	"mba0", "mba1", "mba2", "mba3",
+	"uba0", "uba1", "uba2", "uba3"
+};
+static	struct stdintrs {
+	const char	**si_names;	/* list of standard interrupt names */
+	int	si_n;		/* number of such names */
+} stdintrs[] = {
+	{ vaxinames, sizeof (vaxinames) / sizeof (vaxinames[0]) },
+};
+/*
+ * Start the interrupt name table with the names
+ * of the standard vectors not directly associated
+ * with a bus.  Also, dump the defines needed to
+ * reference the associated counters into a separate
+ * file which is prepended to locore.s.
+ */
+void
+dump_std(FILE *fp, FILE *gp)
+{
+	register struct stdintrs *si = &stdintrs[machine-1];
+	register const char **cpp;
+	register int i;
+
+	fprintf(fp, "\n\t.globl\t_intrnames\n");
+	fprintf(fp, "\n\t.globl\t_eintrnames\n");
+	fprintf(fp, "\t.data\n");
+	fprintf(fp, "_intrnames:\n");
+	cpp = si->si_names;
+	for (i = 0; i < si->si_n; i++) {
+		const char *cp;
+		char *tp;
+		char buf[80];
+
+		cp = *cpp;
+		if (cp[0] == 'i' && cp[1] == 'n' && cp[2] == 't') {
+			cp += 3;
+			if (*cp == 'r')
+				cp++;
+		}
+		for (tp = buf; *cp; cp++)
+			if (islower(*cp))
+				*tp++ = toupper(*cp);
+			else
+				*tp++ = *cp;
+		*tp = '\0';
+		fprintf(gp, "#define\tI_%s\t%lu\n", buf, i*sizeof (long));
+		fprintf(fp, "\t.asciz\t\"%s\"\n", *cpp);
+		cpp++;
+	}
+}
+
+void
+dump_intname(FILE *fp, char *vector, int number)
+{
+	register char *cp = vector;
+
+	fprintf(fp, "\t.asciz\t\"");
+	/*
+	 * Skip any "int" or "intr" in the name.
+	 */
+	while (*cp)
+		if (cp[0] == 'i' && cp[1] == 'n' &&  cp[2] == 't') {
+			cp += 3;
+			if (*cp == 'r')
+				cp++;
+		} else {
+			putc(*cp, fp);
+			cp++;
+		}
+	fprintf(fp, "%d\"\n", number);
+}
+
+/*
+ * Reserve space for the interrupt counters.
+ */
+void
+dump_ctrs(FILE *fp)
+{
+	struct stdintrs *si = &stdintrs[machine-1];
+
+	fprintf(fp, "_eintrnames:\n");
+	fprintf(fp, "\n\t.globl\t_intrcnt\n");
+	fprintf(fp, "\n\t.globl\t_eintrcnt\n");
+	fprintf(fp, "\t.align 2\n");
+	fprintf(fp, "_intrcnt:\n");
+	fprintf(fp, "\t.space\t4 * %d\n", si->si_n);
+	fprintf(fp, "_fltintrcnt:\n");
+	fprintf(fp, "\t.space\t4 * %d\n", cntcnt);
+	fprintf(fp, "_eintrcnt:\n\n");
+	fprintf(fp, "\t.text\n");
+}
+
+/*
+ * Routines for making Sun mb interrupt file mbglue.s
+ */
+
+/*
+ * print an interrupt handler for mainbus
+ */
+void
+dump_mb_handler(FILE *fp, struct idlst *vec, int number)
+{
+	fprintf(fp, "\tVECINTR(_X%s%d, _%s, _V%s%d)\n",
+		vec->id, number, vec->id, vec->id, number);
+}
+
+void
+mbglue(void)
+{
+	register FILE *fp;
+	const char *name = "mbglue.s";
+
+	fp = fopen(path(name), "w");
+	if (fp == 0) {
+		perror(path(name));
+		exit(1);
+	}
+	fprintf(fp, "#include <machine/asm_linkage.h>\n\n");
+	glue(fp, dump_mb_handler);
+	(void) fclose(fp);
+}
+
+void
+glue(FILE *fp, void (*dump_handler)(FILE *, struct idlst *, int))
+{
+	register struct device *dp, *mp;
+
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (mp != 0 && mp != (struct device *)-1 &&
+		    !eq(mp->d_name, "mba")) {
+			struct idlst *vd, *vd2;
+
+			for (vd = dp->d_vec; vd; vd = vd->id_next) {
+				for (vd2 = dp->d_vec; vd2; vd2 = vd2->id_next) {
+					if (vd2 == vd) {
+						(void)(*dump_handler)
+							(fp, vd, dp->d_unit);
+						break;
+					}
+					if (!strcmp(vd->id, vd2->id))
+						break;
+				}
+			}
+		}
+	}
+}
diff --git a/SETUP/config/mkheaders.c b/SETUP/config/mkheaders.c
new file mode 100644
index 000000000..a0e3fdc38
--- /dev/null
+++ b/SETUP/config/mkheaders.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef lint
+static char sccsid[] __attribute__((used)) = "@(#)mkheaders.c	5.5 (Berkeley) 6/18/88";
+#endif /* not lint */
+
+/*
+ * Make all the .h files for the optional entries
+ */
+
+#include <stdio.h>
+#include <unistd.h>	/* unlink */
+#include <ctype.h>
+#include "config.h"
+#include "parser.h"
+
+static void	do_count(const char *dev, const char *hname, int search);
+static void	do_header(const char *dev, const char *hname, int count);
+static int	file_needed(const char *name);
+static char 	*toheader(const char *dev);
+static char	*tomacro(const char *dev);
+
+void
+headers(void)
+{
+	struct file_list *fl;
+
+	for (fl = ftab; fl != 0; fl = fl->f_next)
+		if (fl->f_needs != 0)
+			do_count(fl->f_needs, fl->f_needs, 1);
+}
+
+/*
+ * count all the devices of a certain type and recurse to count
+ * whatever the device is connected to
+ */
+void
+do_count(const char *dev, const char *hname, int search)
+{
+	struct device *dp, *mp;
+	int count;
+
+	for (count = 0,dp = dtab; dp != 0; dp = dp->d_next)
+		if (dp->d_unit != -1 && eq(dp->d_name, dev)) {
+			/*
+			 * Avoid making .h files for bus types on sun machines
+			 */
+			if ((machine == MACHINE_SUN2 ||
+			     machine == MACHINE_SUN3 ||
+			     machine == MACHINE_SUN4)
+			    && dp->d_conn == TO_NEXUS){
+				return;
+			}
+			if (dp->d_type == PSEUDO_DEVICE) {
+				count =
+				    dp->d_slave != UNKNOWN ? dp->d_slave : 1;
+				if (dp->d_flags)
+					dev = NULL;
+				break;
+			}
+                        if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3
+			    && machine != MACHINE_SUN4)
+				/* avoid ie0,ie0,ie1 setting NIE to 3 */
+			count++;
+			/*
+			 * Allow holes in unit numbering,
+			 * assumption is unit numbering starts
+			 * at zero.
+			 */
+			if (dp->d_unit + 1 > count)
+				count = dp->d_unit + 1;
+			if (search) {
+				mp = dp->d_conn;
+                                if (mp != 0 && mp != TO_NEXUS &&
+				    mp->d_conn != TO_NEXUS) {
+                                        /*
+					 * Check for the case of the
+					 * controller that the device
+					 * is attached to is in a separate
+					 * file (e.g. "sd" and "sc").
+					 * In this case, do NOT define
+					 * the number of controllers
+					 * in the hname .h file.
+					 */
+					if (!file_needed(mp->d_name))
+					    do_count(mp->d_name, hname, 0);
+					search = 0;
+				}
+			}
+		}
+	do_header(dev, hname, count);
+}
+
+/*
+ * Scan the file list to see if name is needed to bring in a file.
+ */
+static int
+file_needed(const char *name)
+{
+	struct file_list *fl;
+
+	for (fl = ftab; fl != 0; fl = fl->f_next) {
+		if (fl->f_needs && strcmp(fl->f_needs, name) == 0)
+			return (1);
+	}
+	return (0);
+}
+
+static void
+do_header(const char *dev, const char *hname, int count)
+{
+	char *file, *name;
+	const char *inw;
+	char *inwcopy;
+	struct file_list *fl = NULL;	/* may exit for(;;) uninitted */
+	struct file_list *fl_head, *fl_prev;
+	FILE *inf, *outf;
+	int inc, oldcount;
+
+	file = toheader(hname);
+	name = tomacro(dev?dev:hname) + (dev == NULL);
+	inf = fopen(file, "r");
+	oldcount = -1;
+	if (inf == 0) {
+		(void) unlink(file);
+		outf = fopen(file, "w");
+		if (outf == 0) {
+			perror(file);
+			exit(1);
+		}
+		fprintf(outf, "#define %s %d\n", name, count);
+		(void) fclose(outf);
+		file = path("meta_features.h");
+		outf = fopen(file, "a");
+		if (outf == 0) {
+			perror(file);
+			exit(1);
+		}
+		fprintf(outf, "#include <%s.h>\n", hname);
+		(void) fclose(outf);
+		return;
+	}
+	fl_head = 0;
+	for (;;) {
+		const char *cp;
+		if ((inw = get_word(inf)) == 0 || inw == (char *)EOF)
+			break;
+		if ((inw = get_word(inf)) == 0 || inw == (char *)EOF)
+			break;
+		inwcopy = ns(inw);
+		cp = get_word(inf);
+		if (cp == 0 || cp == (char *)EOF)
+			break;
+		inc = atoi(cp);
+		if (eq(inwcopy, name)) {
+			oldcount = inc;
+			inc = count;
+		}
+		cp = get_word(inf);
+		if (cp == (char *)EOF)
+			break;
+		fl = (struct file_list *) malloc(sizeof *fl);
+		fl->f_fn = inwcopy;
+		fl->f_type = inc;
+		fl->f_next = fl_head;
+		fl_head = fl;
+	}
+	(void) fclose(inf);
+	if (count == oldcount) {
+		while (fl !=0) {
+			fl_prev = fl;
+			fl = fl->f_next;
+			free((char *)fl_prev);
+		}
+		return;
+	}
+	if (oldcount == -1) {
+		fl = (struct file_list *) malloc(sizeof *fl);
+		fl->f_fn = name;
+		fl->f_type = count;
+		fl->f_next = fl_head;
+		fl_head = fl;
+	}
+	unlink(file);
+	outf = fopen(file, "w");
+	if (outf == 0) {
+		perror(file);
+		exit(1);
+	}
+	for (fl = fl_head; fl != 0; fl = fl->f_next) {
+		fprintf(outf, "#define %s %d\n",
+		    fl->f_fn, count ? fl->f_type : 0);
+		free((char *)fl);
+	}
+	(void) fclose(outf);
+}
+
+/*
+ * convert a dev name to a .h file name
+ */
+static char *
+toheader(const char *dev)
+{
+	static char hbuf[MAXPATHLEN];
+	(void) snprintf(hbuf, sizeof hbuf, "%s.h", path(dev));
+	hbuf[MAXPATHLEN-1] = '\0';
+	return (hbuf);
+}
+
+/*
+ * convert a dev name to a macro name
+ */
+static char *
+tomacro(const char *dev)
+{
+	static char mbuf[FILENAME_MAX];
+	char *cp;
+
+	cp = mbuf;
+	*cp++ = 'N';
+	while (*dev)
+		if (!islower(*dev))
+			*cp++ = *dev++;
+		else
+			*cp++ = toupper(*dev++);
+	*cp++ = 0;
+	return (mbuf);
+}
diff --git a/SETUP/config/mkioconf.c b/SETUP/config/mkioconf.c
new file mode 100644
index 000000000..90b6c2f97
--- /dev/null
+++ b/SETUP/config/mkioconf.c
@@ -0,0 +1,2086 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+ 
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>	/* for unlink */
+#include "parser.h"
+#include "config.h"
+
+/*
+ * build the ioconf.c file
+ */
+char	*intv(struct device *dev);
+char	*intv2(struct device *dev);
+void	i386_pseudo_inits(FILE *fp);	/* XXX function in wrong block */
+void	check_vector(struct idlst *vec);
+void	nrw_ioconf(void);
+void	m88k_pseudo_inits(FILE *fp);
+void	m98k_pseudo_inits(FILE *fp);
+char	*m88k_dn(char *name);
+char	*m98k_dn(char *name);
+char	*concat3(char *buf, const char *p1, const char *p2, const char *p3);
+
+#if MACHINE_VAX
+
+void
+vax_ioconf(void)
+{
+	register struct device *dp, *mp, *np;
+	register int uba_n, slave;
+	FILE *fp;
+
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+/*MACH_KERNEL*/
+	fprintf(fp, "#ifndef  MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#include <machine/pte.h>\n");
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/buf.h>\n");
+	fprintf(fp, "#include <sys/map.h>\n");
+	fprintf(fp, "#include <sys/vm.h>\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#endif   MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "\n");
+	fprintf(fp, "#include <vaxmba/mbavar.h>\n");
+	fprintf(fp, "#include <vaxuba/ubavar.h>\n\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C (caddr_t)\n\n");
+	/*
+	 * First print the mba initialization structures
+	 */
+	if (seen_mba) {
+		for (dp = dtab; dp != 0; dp = dp->d_next) {
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			    !eq(mp->d_name, "mba"))
+				continue;
+			fprintf(fp, "extern struct mba_driver %sdriver;\n",
+			    dp->d_name);
+		}
+		fprintf(fp, "\nstruct mba_device mbdinit[] = {\n");
+		fprintf(fp, "\t/* Device,  Unit, Mba, Drive, Dk */\n");
+		for (dp = dtab; dp != 0; dp = dp->d_next) {
+			mp = dp->d_conn;
+			if (dp->d_unit == QUES || mp == 0 ||
+			    mp == TO_NEXUS || !eq(mp->d_name, "mba"))
+				continue;
+			if (dp->d_addr) {
+				printf("can't specify csr address on mba for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_vec != 0) {
+				printf("can't specify vector for %s%d on mba\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive == UNKNOWN) {
+				printf("drive not specified for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_slave != UNKNOWN) {
+				printf("can't specify slave number for %s%d\n", 
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			fprintf(fp, "\t{ &%sdriver, %d,   %s,",
+				dp->d_name, dp->d_unit, qu(mp->d_unit));
+			fprintf(fp, "  %s,  %d },\n",
+				qu(dp->d_drive), dp->d_dk);
+		}
+		fprintf(fp, "\t0\n};\n\n");
+		/*
+		 * Print the mbsinit structure
+		 * Driver Controller Unit Slave
+		 */
+		fprintf(fp, "struct mba_slave mbsinit [] = {\n");
+		fprintf(fp, "\t/* Driver,  Ctlr, Unit, Slave */\n");
+		for (dp = dtab; dp != 0; dp = dp->d_next) {
+			/*
+			 * All slaves are connected to something which
+			 * is connected to the massbus.
+			 */
+			if ((mp = dp->d_conn) == 0 || mp == TO_NEXUS)
+				continue;
+			np = mp->d_conn;
+			if (np == 0 || np == TO_NEXUS ||
+			    !eq(np->d_name, "mba"))
+				continue;
+			fprintf(fp, "\t{ &%sdriver, %s",
+			    mp->d_name, qu(mp->d_unit));
+			fprintf(fp, ",  %2d,    %s },\n",
+			    dp->d_unit, qu(dp->d_slave));
+		}
+		fprintf(fp, "\t0\n};\n\n");
+	}
+	/*
+	 * Now generate interrupt vectors for the unibus
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_vec != 0) {
+			struct idlst *ip;
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			    !eq(mp->d_name, "uba"))
+				continue;
+			fprintf(fp,
+			    "extern struct uba_driver %sdriver;\n",
+			    dp->d_name);
+			fprintf(fp, "extern ");
+			ip = dp->d_vec;
+			for (;;) {
+				fprintf(fp, "X%s%d()", ip->id, dp->d_unit);
+				ip = ip->id_next;
+				if (ip == 0)
+					break;
+				fprintf(fp, ", ");
+			}
+			fprintf(fp, ";\n");
+			fprintf(fp, "int\t (*%sint%d[])() = { ", dp->d_name,
+			    dp->d_unit);
+			ip = dp->d_vec;
+			for (;;) {
+				fprintf(fp, "X%s%d", ip->id, dp->d_unit);
+				ip = ip->id_next;
+				if (ip == 0)
+					break;
+				fprintf(fp, ", ");
+			}
+			fprintf(fp, ", 0 } ;\n");
+		}
+	}
+	fprintf(fp, "\nstruct uba_ctlr ubminit[] = {\n");
+	fprintf(fp, "/*\t driver,\tctlr,\tubanum,\talive,\tintr,\taddr */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 ||
+		    !eq(mp->d_name, "uba"))
+			continue;
+		if (dp->d_vec == 0) {
+			printf("must specify vector for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_addr == 0) {
+			printf("must specify csr address for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+			printf("drives need their own entries; dont ");
+			printf("specify drive or slave for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_flags) {
+			printf("controllers (e.g. %s%d) ",
+			    dp->d_name, dp->d_unit);
+			printf("don't have flags, only devices do\n");
+			continue;
+		}
+		fprintf(fp,
+		    "\t{ &%sdriver,\t%d,\t%s,\t0,\t%sint%d, C 0%o },\n",
+		    dp->d_name, dp->d_unit, qu(mp->d_unit),
+		    dp->d_name, dp->d_unit, dp->d_addr);
+	}
+	fprintf(fp, "\t0\n};\n");
+/* unibus devices */
+	fprintf(fp, "\nstruct uba_device ubdinit[] = {\n");
+	fprintf(fp,
+"\t/* driver,  unit, ctlr,  ubanum, slave,   intr,    addr,    dk, flags*/\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 ||
+		    mp == TO_NEXUS || mp->d_type == MASTER ||
+		    eq(mp->d_name, "mba"))
+			continue;
+		np = mp->d_conn;
+		if (np != 0 && np != TO_NEXUS && eq(np->d_name, "mba"))
+			continue;
+		np = 0;
+		if (eq(mp->d_name, "uba")) {
+			if (dp->d_vec == 0) {
+				printf("must specify vector for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr == 0) {
+				printf("must specify csr for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+				printf("drives/slaves can be specified ");
+				printf("only for controllers, ");
+				printf("not for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			uba_n = mp->d_unit;
+			slave = QUES;
+		} else {
+			if ((np = mp->d_conn) == 0) {
+				printf("%s%d isn't connected to anything ",
+				    mp->d_name, mp->d_unit);
+				printf(", so %s%d is unattached\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			uba_n = np->d_unit;
+			if (dp->d_drive == UNKNOWN) {
+				printf("must specify ``drive number'' ");
+				printf("for %s%d\n", dp->d_name, dp->d_unit);
+				continue;
+			}
+			/* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */
+			/* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */
+			if (dp->d_slave != UNKNOWN) {
+				printf("slave numbers should be given only ");
+				printf("for massbus tapes, not for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_vec != 0) {
+				printf("interrupt vectors should not be ");
+				printf("given for drive %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr != 0) {
+				printf("csr addresses should be given only ");
+				printf("on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = dp->d_drive;
+		}
+		fprintf(fp, "\t{ &%sdriver,  %2d,   %s,",
+		    eq(mp->d_name, "uba") ? dp->d_name : mp->d_name, dp->d_unit,
+		    eq(mp->d_name, "uba") ? " -1" : qu(mp->d_unit));
+		fprintf(fp, "  %s,    %2d,   %s, C 0%-6o,  %d,  0x%x },\n",
+		    qu(uba_n), slave, intv(dp), dp->d_addr, dp->d_dk,
+		    dp->d_flags);
+	}
+	fprintf(fp, "\t0\n};\n");
+	(void) fclose(fp);
+}
+#endif
+
+#if MACHINE_SUN
+#define SP_OBIO	0x0004	/* on board i/o (for sun/autoconf.h) */
+
+#define	VEC_LO	64
+#define	VEC_HI	255
+
+void pseudo_inits(FILE *fp);
+
+void
+check_vector(struct idlst *vec)
+{
+
+	if (vec->id_vec == 0)
+		fprintf(stderr, "vector number for %s not given\n", vec->id);
+	else if (vec->id_vec < VEC_LO || vec->id_vec > VEC_HI)
+		fprintf(stderr,
+			"vector number %d for %s is not between %d and %d\n",
+			vec->id_vec, vec->id, VEC_LO, VEC_HI);
+}
+
+void
+sun_ioconf(void)
+{
+	register struct device *dp, *mp;
+	register int slave;
+	register struct idlst *vp;
+	FILE *fp;
+
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+/*MACH_KERNEL*/
+	fprintf(fp, "#ifndef  MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/buf.h>\n");
+	fprintf(fp, "#include <sys/map.h>\n");
+	fprintf(fp, "#include <sys/vm.h>\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#endif   MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "\n");
+	fprintf(fp, "#include <sundev/mbvar.h>\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C (caddr_t)\n\n");
+	fprintf(fp, "\n");
+
+	/*
+	 * Now generate interrupt vectors for the Mainbus
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (mp == TO_NEXUS || mp == 0 || mp->d_conn != TO_NEXUS)
+			continue;
+		fprintf(fp, "extern struct mb_driver %sdriver;\n",
+			    dp->d_name);
+		if (dp->d_vec != 0) {
+			if (dp->d_pri == 0)
+				fprintf(stderr,
+				    "no priority specified for %s%d\n",
+				    dp->d_name, dp->d_unit);
+			fprintf(fp, "extern ");
+			for (vp = dp->d_vec;;) {
+				if (machine == MACHINE_SUN4)
+					fprintf(fp, "%s()", vp->id);
+				else
+					fprintf(fp, "X%s%d()",
+						vp->id, dp->d_unit);
+				vp = vp->id_next;
+				if (vp == 0)
+					break;
+				fprintf(fp, ", ");
+			}
+			fprintf(fp, ";\n");
+
+			for (vp = dp->d_vec; vp; vp = vp->id_next) {
+				fprintf(fp, "int V%s%d = %d;\n",
+				    vp->id, dp->d_unit, dp->d_unit);
+			}
+
+			fprintf(fp, "struct vec %s[] = { ", intv(dp));
+			for (vp = dp->d_vec; vp != 0; vp = vp->id_next) {
+				if (machine == MACHINE_SUN4)
+					fprintf(fp, "{ %s, %d, &V%s%d }, ",
+						vp->id, vp->id_vec,
+						vp->id, dp->d_unit);
+				else
+				fprintf(fp, "{ X%s%d, %d, &V%s%d }, ",
+					vp->id, dp->d_unit, vp->id_vec,
+					vp->id, dp->d_unit);
+				check_vector(vp);
+			}
+			fprintf(fp, "0 };\n");
+		}
+	}
+
+	/*
+	 * Now spew forth the mb_ctlr structures
+	 */
+	fprintf(fp, "\nstruct mb_ctlr mbcinit[] = {\n");
+	fprintf(fp,
+"/* driver,\tctlr,\talive,\taddress,\tintpri,\t intr,\tspace */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 ||
+		    mp->d_conn != TO_NEXUS)
+			continue;
+		if (dp->d_addr == UNKNOWN) {
+			printf("must specify csr address for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+			printf("drives need their own entries; ");
+			printf("don't specify drive or slave for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_flags) {
+			printf("controllers (e.g. %s%d) don't have flags, ",
+			    dp->d_name, dp->d_unit);
+			printf("only devices do\n");
+			continue;
+		}
+		if (machine == MACHINE_SUN4)
+		fprintf(fp,
+		"{ &%sdriver,\t%d,\t0,\tC 0x%08x,\t%d,\t%s, 0x%x },\n",
+		    dp->d_name, dp->d_unit, dp->d_addr,
+		    (dp->d_bus==SP_OBIO) ? (dp->d_pri << 1) : (dp->d_pri<<1)-1,
+		    intv(dp), ((dp->d_mach << 16) | dp->d_bus));
+		else
+			fprintf(fp,
+		"{ &%sdriver,\t%d,\t0,\tC 0x%08x,\t%d,\t%s, 0x%x },\n",
+		    dp->d_name, dp->d_unit, dp->d_addr,
+		    dp->d_pri, intv(dp), ((dp->d_mach << 16) | dp->d_bus));
+	}
+	fprintf(fp, "\t0\n};\n");
+
+	/*
+	 * Now we go for the mb_device stuff
+	 */
+	fprintf(fp, "\nstruct mb_device mbdinit[] = {\n");
+	fprintf(fp,
+"/* driver,\tunit, ctlr, slave, address,      pri, dk, flags, intr, space */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 ||
+		    mp == TO_NEXUS || mp->d_type == MASTER)
+			continue;
+		if (mp->d_conn == TO_NEXUS) {
+			if (dp->d_addr == UNKNOWN) {
+				printf("must specify csr for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+				printf("drives/slaves can be specified only ");
+				printf("for controllers, not for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = QUES;
+		} else {
+			if (mp->d_conn == 0) {
+				printf("%s%d isn't connected to anything, ",
+				    mp->d_name, mp->d_unit);
+				printf("so %s%d is unattached\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive == UNKNOWN) {
+				printf("must specify ``drive number'' for %s%d\n",
+				   dp->d_name, dp->d_unit);
+				continue;
+			}
+			/* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */
+			/* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */
+			if (dp->d_slave != UNKNOWN) {
+				printf("slave numbers should be given only ");
+				printf("for massbus tapes, not for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_pri != 0) {
+				printf("interrupt priority should not be ");
+				printf("given for drive %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr != UNKNOWN) {
+				printf("csr addresses should be given only");
+				printf(" on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = dp->d_drive;
+		}
+		if (machine == MACHINE_SUN4)
+		fprintf(fp,
+"{ &%sdriver,\t%d,  %s,   %2d,     C 0x%08x, %d,   %d, 0x%x, %s, 0x%x },\n",
+		    mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name, dp->d_unit,
+		    mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit),
+		    slave,
+		    dp->d_addr == UNKNOWN? 0 : dp->d_addr,
+		    dp->d_pri * 2, dp->d_dk, dp->d_flags, intv(dp),
+		    ((dp->d_mach << 16) | dp->d_bus));
+		else
+			fprintf(fp,
+"{ &%sdriver,\t%d,  %s,   %2d,     C 0x%08x, %d,   %d, 0x%x, %s, 0x%x },\n",
+		    mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name, dp->d_unit,
+		    mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit),
+		    slave,
+		    dp->d_addr == UNKNOWN? 0 : dp->d_addr,
+		    dp->d_pri, dp->d_dk, dp->d_flags, intv(dp),
+		    ((dp->d_mach << 16) | dp->d_bus));
+	}
+	fprintf(fp, "\t0\n};\n");
+	pseudo_inits(fp);
+	(void) fclose(fp);
+}
+
+void
+pseudo_inits(FILE *fp)
+{
+#ifdef	notdef
+	register struct device *dp;
+	int count;
+
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+#endif	notdef
+	fprintf(fp, "struct pseudo_init {\n");
+	fprintf(fp, "\tint\tps_count;\n\tint\t(*ps_func)();\n");
+	fprintf(fp, "} pseudo_inits[] = {\n");
+#ifdef	notdef
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+#endif	notdef
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+#endif
+
+#if MACHINE_ROMP
+void
+romp_ioconf(void)
+{
+	register struct device *dp, *mp;
+	register int slave;
+	FILE *fp;
+
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+/*MACH_KERNEL*/
+	fprintf(fp, "#ifndef  MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/buf.h>\n");
+	fprintf(fp, "#include <sys/map.h>\n");
+	fprintf(fp, "#include <sys/vm.h>\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#endif   MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "\n");
+	fprintf(fp, "#include <caio/ioccvar.h>\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C (caddr_t)\n\n");
+	fprintf(fp, "\n");
+
+	fprintf (fp, "struct     iocc_hd iocc_hd[] = {{C 0xF0000000,}};\n");
+	/*
+	 * Now generate interrupt vectors for the  Winnerbus
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_pri != 0) {
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			    !eq(mp->d_name, "iocc"))
+				continue;
+			fprintf(fp, "extern struct iocc_driver %sdriver;\n",
+			    dp->d_name);
+		}
+	}
+	/*
+	 * Now spew forth the iocc_cinfo structure
+	 */
+	fprintf(fp, "\nstruct iocc_ctlr iocccinit[] = {\n");
+	fprintf(fp, "/*\t driver,\tctlr,\talive,\taddr,\tintpri */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_type != CONTROLLER)
+			continue;
+		if (mp == TO_NEXUS || mp == 0 || !eq(mp->d_name, "iocc"))
+			continue;
+		if (dp->d_unit == QUES && eq(dp->d_name,"hdc"))
+			continue;
+		if (dp->d_unit == QUES && eq(dp->d_name,"fdc"))
+			continue;
+		if (dp->d_pri == 0) {
+			printf("must specify priority for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_addr == 0) {
+			printf("must specify csr address for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+			printf("drives need their own entries; ");
+			printf("dont specify drive or slave for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_flags) {
+			printf("controllers (e.g. %s%d) don't have flags, ",
+			    dp->d_name, dp->d_unit);
+			printf("only devices do\n");
+			continue;
+		}
+		fprintf(fp, "\t{ &%sdriver,\t%d,\t0,\tC 0x%x,\t%d },\n",
+		    dp->d_name, dp->d_unit, dp->d_addr, dp->d_pri);
+	}
+	fprintf(fp, "\t0\n};\n");
+	/*
+	 * Now we go for the iocc_device stuff
+	 */
+	fprintf(fp, "\nstruct iocc_device ioccdinit[] = {\n");
+	fprintf(fp,
+"\t/* driver,  unit, ctlr,  slave,   addr,    pri,    dk, flags*/\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 ||
+		    mp == TO_NEXUS || mp->d_type == MASTER ||
+		    eq(mp->d_name, "iocca"))
+			continue;
+		if (eq(mp->d_name, "iocc")) {
+			if (dp->d_pri == 0) {
+				printf("must specify vector for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr == 0) {
+				printf("must specify csr for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+				printf("drives/slaves can be specified only ");
+				printf("for controllers, not for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = QUES;
+		} else {
+			if (mp->d_conn == 0) {
+				printf("%s%d isn't connected to anything, ",
+				    mp->d_name, mp->d_unit);
+				printf("so %s%d is unattached\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive == UNKNOWN) {
+				printf("must specify ``drive number'' for %s%d\n",
+				   dp->d_name, dp->d_unit);
+				continue;
+			}
+			/* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */
+			/* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */
+			if (dp->d_slave != UNKNOWN) {
+				printf("slave numbers should be given only ");
+				printf("for massbus tapes, not for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_pri != 0) {
+				printf("interrupt priority should not be ");
+				printf("given for drive %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr != 0) {
+				printf("csr addresses should be given only");
+				printf("on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = dp->d_drive;
+		}
+		fprintf(fp,
+"\t{ &%sdriver,  %2d,   %s,    %2d,   C 0x%x, %d,  %d,  0x%x },\n",
+		    eq(mp->d_name, "iocc") ? dp->d_name : mp->d_name, dp->d_unit,
+		    eq(mp->d_name, "iocc") ? " -1" : qu(mp->d_unit),
+ 		    slave, dp->d_addr, dp->d_pri, dp->d_dk, dp->d_flags);
+ 	}
+ 	fprintf(fp, "\t0\n};\n");
+ 	(void) fclose(fp);
+} 
+
+#endif	MACHINE_ROMP
+
+#if	MACHINE_MMAX
+void
+mmax_ioconf(void)
+{
+	register struct device *dp, *dp1, *mp;
+	FILE *fp;
+	int	unit;
+
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <mmaxio/io.h>\n\n");
+
+	/*
+	 *	Multimax code is a little messy because we have to
+	 * 	scan the entire list for each device to generate the
+	 * 	structures correctly.  We cheat and use the d->d_pri
+	 *	field to avoid doing anything twice.  -1000 is an obvious
+	 *	bogus value for this field.
+	 */
+
+	for (dp1 = dtab; dp1 != 0; dp1 = dp1->d_next) {
+	    /* 
+	     *	If pri is not -1000, then haven't seen device yet.
+	     */
+	    if (dp1->d_pri != -1000) switch (dp1->d_type) {
+
+	    case CONTROLLER:
+		fprintf(fp,"struct devaddr %s_devaddr[] = {\n",
+			dp1->d_name);
+		/*
+		 *	Now scan entire list and get all of them.  Use
+		 *	unit to make sure unit numbers are right.
+		 */
+		unit = 0;
+		for (dp = dp1; dp != 0; dp = dp->d_next) {
+			if (!strcmp(dp->d_name, dp1->d_name)) {
+				mp = dp->d_conn;
+				if (mp != TO_SLOT) {
+		printf("%s%d: controller must be connected to slot.\n",
+						dp->d_name, dp->d_unit);
+					exit(1);
+				}
+				if (dp->d_vec != 0) {
+		printf("%s%d: cannot configure multimax interrupt vectors.\n",
+						dp->d_name, dp->d_unit);
+				}
+				if (dp->d_pri != 0) {
+		printf("%s%d: interrupt priority is nonsense on multimax.\n",
+						dp->d_name, dp->d_unit);
+				}
+				if ((dp->d_drive != UNKNOWN) ||
+					(dp->d_slave !=UNKNOWN)) {
+		printf("%s%d: don't specify drive or slave for controller.\n",
+						dp->d_name, dp->d_unit);
+				}
+				/*
+				 *	Fix unit number if bogus
+				 */
+				if(dp->d_unit != unit) {
+	printf("Warning: %s%d configured as %s%d -- fix config file.\n",
+		dp->d_name,dp->d_unit,dp->d_name,unit);
+					dp->d_unit = unit;
+				}
+				unit++;
+				fprintf(fp,"\t{ %d, 0, 0},\n",dp->d_addr);
+				dp->d_pri = -1000; /* done this one */
+			}
+		}
+		fprintf(fp,"} ;\n\n");
+		break;
+
+	    case DEVICE:
+		fprintf(fp,"struct subdevaddr %s_subdevaddr[] = {\n",
+			dp1->d_name);
+		/*
+		 *	Now scan entire list and get all of them.  Use
+		 *	unit to make sure unit numbers are right.
+		 */
+		unit = 0;
+		for (dp = dp1; dp != 0; dp = dp->d_next) {
+			if (!strcmp(dp->d_name, dp1->d_name)) {
+				mp = dp->d_conn;
+				if ( (mp == 0) || (mp == TO_SLOT) ||
+					(mp->d_type != CONTROLLER)) {
+				printf("%s%d: device has no controller.\n",
+						dp->d_name, dp->d_unit);
+					exit(1);
+				}
+				if (dp->d_vec != 0) {
+		printf("%s%d: cannot configure multimax interrupt vectors.\n",
+						dp->d_name, dp->d_unit);
+				}
+				if (dp->d_pri != 0) {
+		printf("%s%d: interrupt priority is nonsense on multimax.\n",
+						dp->d_name, dp->d_unit);
+				}
+				if ((dp->d_drive != UNKNOWN) ||
+					(dp->d_slave !=UNKNOWN)) {
+		printf("%s%d: use 'unit' instead of 'drive' or 'slave'.\n",
+						dp->d_name, dp->d_unit);
+				}
+				/*
+				 *	Fix unit number if bogus
+				 */
+				if(dp->d_unit != unit) {
+	printf("Warning: %s%d configured as %s%d -- fix config file.\n",
+				dp->d_name,dp->d_unit,dp->d_name,unit);
+					dp->d_unit = unit;
+				}
+				unit++;
+				if((dp->d_addr == 0) || (dp->d_addr == QUES)){
+			printf("%s%d: must specify logical unit number.\n",
+					dp->d_name,dp->d_unit);
+					exit(1);
+				}
+				fprintf(fp,"\t{ %d, %d, 0},\n",mp->d_unit,
+					dp->d_addr);
+				dp->d_pri = -1000; /* don't do this again */
+			}
+		}
+		fprintf(fp,"} ;\n\n");
+		break;
+
+	    case PSEUDO_DEVICE:
+		/*
+		 *	Doesn't exist as far as ioconf.c is concerned.
+		 */
+		break;
+
+	    default:
+		printf("Bogus device type for %s\n", dp1->d_name);
+		exit(1);
+		break;
+	    }
+	}
+	
+	(void) fclose(fp);
+}
+
+#endif	MACHINE_MMAX
+
+#if	MACHINE_SQT
+
+/*
+ * Define prototype device spec lines.
+ *
+ * For now, have static set of controller prototypes.  This should be
+ * upgraded to using (eg) controllers.balance (ala Sequent /etc/config)
+ * to support custom boards without need to edit this file.
+ */
+
+/*
+ *  flags for indicating presence of upper and lower bound values
+ */
+
+#define	P_LB	1
+#define	P_UB	2
+
+struct p_entry {
+	const char 	*p_name;		/* name of field */
+	long	p_def;				/* default value */
+	long 	p_lb;				/* lower bound for field */
+	long	p_ub;				/* upper bound of field */ 
+	char	p_flags;			/* bound valid flags */
+};
+
+struct proto {
+	const char	*p_name;		/* name of controller type */
+	struct  p_entry	p_fields[NFIELDS];	/* ordered list of fields */
+	int	p_seen;				/* any seen? */
+};
+
+/*
+ * MULTIBUS Adapter:
+ *	type mbad  index csr flags maps[0,256] bin[0,7] intr[0,7]
+ */
+
+static	struct	proto	mbad_proto = {
+	"mbad",
+       {{ "index",	0,	0,	0,	0 },
+	{ "csr",	0,	0,	0,	0 },
+	{ "flags",	0,	0,	0,	0 },
+	{ "maps",	0,	0,	256,	P_LB|P_UB },
+	{ "bin",	0,	0,	7,	P_LB|P_UB },
+	{ "intr",	0,	0,	7,	P_LB|P_UB },},
+	0
+};
+
+/*
+ * SCSI/Ether Controller:
+ *	type sec   flags bin[0,7] req doneq index target[0,7]=-1 unit
+ */
+
+static	struct	proto	sec_proto = {
+	"sec",
+       {{ "flags",	0,	0,	0,	0 },
+	{ "bin",	0,	0,	7,	P_LB|P_UB } ,
+	{ "req",	0,	0,	0,	0 },
+	{ "doneq",	0,	0,	0,	0 },
+	{ "index",	0,	0,	0,	0 },
+	{ "target",	-1,	0,	7,	P_LB|P_UB },
+	{ "unit",	0,	0,	0,	0 },},
+	0
+};
+
+/*
+ * "Zeke" (FAST) Disk Controller (Dual-Channel Disk Controller):
+ *	type zdc index[0,31] drive[-1,7] drive_type[-1,1]
+ *
+ * Levgal values for drive_type:
+ *	M2333K = 0	(swallow)
+ *	M2351A = 1	(eagle)
+ *	wildcard = -1	(run-time determined)
+ */
+
+static	struct	proto	zdc_proto = {
+	"zdc",
+       {{ "index",	0,	0,	31,	P_LB|P_UB },
+	{ "drive",	0,	-1,	7,	P_LB|P_UB },
+	{ "drive_type",	0,	-1,	1,	P_LB|P_UB },},
+	0
+};
+
+static	struct	proto	*ptab[] = {
+	&mbad_proto,
+	&sec_proto,
+	&zdc_proto,
+	(struct proto *) 0
+};
+
+/*
+ * locate a prototype structure in the queue of such structures.
+ * return NULL if not found.
+ */
+
+static struct proto *
+find_proto(const char *str)
+{
+	register struct proto *ptp;
+	register int	ptbx;
+
+	for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) {
+		if (eq(str, ptp->p_name))
+			return(ptp);
+	}
+	return(NULL);
+}
+
+void
+dev_param(struct device *dp, const char *str, long num)
+{
+	register struct p_entry *entry;
+	register struct proto *ptp;
+
+	ptp = find_proto(dp->d_conn->d_name);
+	if (ptp == NULL) {
+		fprintf(stderr,"dev %s cont %s", dp->d_name, dp->d_conn->d_name);
+		yyerror("invalid controller");
+		return;
+	}
+
+	for (entry = ptp->p_fields; entry->p_name != NULL; entry++) {
+		if (eq(entry->p_name, str)) {
+			if ((entry->p_flags & P_LB) && (num < entry->p_lb)) {
+				yyerror("parameter below range");
+				return;
+			}
+			if ((entry->p_flags & P_UB) && (num > entry->p_ub)) {
+				yyerror("parameter above range");
+				return;
+			}
+			dp->d_fields[entry-ptp->p_fields] = num;
+			return;
+		}
+	}
+
+	yyerror("invalid parameter");
+}
+
+void
+sqt_ioconf(void)
+{
+	register struct device *dp, *mp;
+	register int count;
+	const char *namep;
+	register struct proto *ptp;
+	register struct p_entry *entry;
+	FILE	*fp;
+	int	bin_table[8];
+	int	ptbx;
+	int	found;
+
+	for (count = 0; count < 8; count++)
+		bin_table[count] = 0;
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == NULL) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+/*MACH_KERNEL*/
+	fprintf(fp, "#ifndef  MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/systm.h>\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#endif   MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "\n");
+	fprintf(fp, "#include <machine/ioconf.h>\n");
+
+	fprintf(fp, "\nu_long\tMBAd_IOwindow =\t\t3*256*1024;\t/* top 1/4 Meg */\n\n");
+
+	for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) {
+
+		fprintf(fp, "/*\n");
+		fprintf(fp, " * %s device configuration.\n", ptp->p_name);
+		fprintf(fp, " */\n\n");
+		fprintf(fp, "\n");
+		fprintf(fp, "#include <sqt%s/ioconf.h>\n", ptp->p_name);
+		fprintf(fp, "\n");
+
+		/*
+		 * Generate dev structures for this controller
+		 */
+		for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) {
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			   !eq(mp->d_name, ptp->p_name) ||
+			   (namep != NULL && eq(dp->d_name, namep)) )
+				continue;
+			fprintf(fp, "extern\tstruct\t%s_driver\t%s_driver;\n",
+			    ptp->p_name, namep = dp->d_name);
+			ptp->p_seen = 1;
+		}
+
+		found = 0;
+		for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) {
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			   !eq(mp->d_name, ptp->p_name))
+				continue;
+			if (namep == NULL || !eq(namep, dp->d_name)) {
+				count = 0;
+				if (namep != NULL) 
+					fprintf(fp, "};\n");
+				found = 1;
+				fprintf(fp, "\nstruct\t%s_dev %s_%s[] = {\n",
+						ptp->p_name,
+						ptp->p_name,
+						namep = dp->d_name);
+				fprintf(fp, "/*");
+				entry = ptp->p_fields;
+				for (; entry->p_name != NULL; entry++)
+					fprintf(fp, "\t%s",entry->p_name);
+				fprintf(fp, " */\n");
+			}
+			if (dp->d_bin != UNKNOWN)
+				bin_table[dp->d_bin]++;
+			fprintf(fp, "{");
+			for (entry = ptp->p_fields; entry->p_name != NULL; entry++) {
+				if (eq(entry->p_name,"index"))
+					fprintf(fp, "\t%d,", mp->d_unit);
+				else
+					fprintf(fp, "\t%lu,",
+						dp->d_fields[entry-ptp->p_fields]);
+			}
+			fprintf(fp, "\t},\t/* %s%d */\n", dp->d_name, count++);
+		}
+		if (found)
+			fprintf(fp, "};\n\n");
+
+		/*
+	 	* Generate conf array
+	 	*/
+		fprintf(fp, "/*\n");
+		fprintf(fp, " * %s_conf array collects all %s devices\n", 
+			ptp->p_name, ptp->p_name);
+		fprintf(fp, " */\n\n");
+		fprintf(fp, "struct\t%s_conf %s_conf[] = {\n", 
+			ptp->p_name, ptp->p_name);
+		fprintf(fp, "/*\tDriver\t\t#Entries\tDevices\t\t*/\n");
+		for (dp = dtab, namep = NULL; dp != 0; dp = dp->d_next) {
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS ||
+			   !eq(mp->d_name, ptp->p_name))
+				continue;
+			if (namep == NULL || !eq(namep, dp->d_name)) {
+				if (namep != NULL)
+					fprintf(fp, 
+			"{\t&%s_driver,\t%d,\t\t%s_%s,\t},\t/* %s */\n",
+			namep, count, ptp->p_name, namep, namep);
+				count = 0;
+				namep = dp->d_name;
+			}
+			++count;
+		}
+		if (namep != NULL) {
+			fprintf(fp, 
+			  "{\t&%s_driver,\t%d,\t\t%s_%s,\t},\t/* %s */\n",
+			  namep, count, ptp->p_name, namep, namep);
+		}
+		fprintf(fp, "\t{ 0 },\n");
+		fprintf(fp, "};\n\n");
+
+	}
+
+	/*
+	 * Pseudo's
+	 */
+
+	fprintf(fp, "/*\n");
+	fprintf(fp, " * Pseudo-device configuration\n");
+	fprintf(fp, " */\n\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type == PSEUDO_DEVICE) {
+			fprintf(fp, "extern\tint\t%sboot();\n", dp->d_name);
+		}
+	}
+	fprintf(fp, "\nstruct\tpseudo_dev pseudo_dev[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type == PSEUDO_DEVICE) {
+			fprintf(fp, "\t{ \"%s\",\t%d,\t%sboot,\t},\n",
+				dp->d_name, 
+				dp->d_slave == UNKNOWN ? 32 : dp->d_slave, 
+				dp->d_name);
+		}
+	}
+	fprintf(fp, "\t{ 0 },\n");
+	fprintf(fp, "};\n\n");
+
+	/*
+	 * Bin interrupt table and misc
+	 */
+
+	fprintf(fp, "/*\n");
+	fprintf(fp, " * Interrupt table\n");
+	fprintf(fp, " */\n\n");
+	fprintf(fp, "int\tbin_intr[8] = {\n");
+	fprintf(fp, "\t\t0,\t\t\t\t/* bin 0, always zero */\n");
+	for (count=1; count < 8; count++) {
+		fprintf(fp, "\t\t%d,\t\t\t\t/* bin %d */\n", 
+			bin_table[count], count);
+	}
+	fprintf(fp, "};\n");
+
+	/*
+	 * b8k_cntlrs[]
+	 */
+
+	fprintf(fp, "/*\n");
+	fprintf(fp, " * b8k_cntlrs array collects all controller entries\n");
+	fprintf(fp, " */\n\n");
+	for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) {
+		if (ptp->p_seen)
+			fprintf(fp, "extern int  conf_%s(),\tprobe_%s_devices(),\t%s_map();\n",
+				ptp->p_name, ptp->p_name, ptp->p_name);
+	}
+	fprintf(fp, "\n\nstruct\tcntlrs b8k_cntlrs[] = {\n");
+	fprintf(fp, "/*\tconf\t\tprobe_devs\t\tmap\t*/\n");
+
+	for (ptbx = 0; (ptp = ptab[ptbx]) != NULL; ptbx++) {
+		if (ptp->p_seen)
+			fprintf(fp, "{\tconf_%s,\tprobe_%s_devices,\t%s_map\t}, \n",
+				ptp->p_name, ptp->p_name, ptp->p_name);
+	}
+	fprintf(fp, "{\t0,\t},\n");
+	fprintf(fp, "};\n");
+
+	(void) fclose(fp);
+}
+
+#endif	MACHINE_SQT
+#if	MACHINE_I386
+void
+i386_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/busvar.h>\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C (void *)\n");
+	fprintf(fp, "\n");
+
+	i386_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+#endif	MACHINE_I386
+
+#if MACHINE_MIPSY || MACHINE_MIPS
+
+void declare(const char *cp);
+int is_declared(const char *cp);
+
+void
+mips_ioconf(void)
+{
+	register struct device *dp, *mp, *np;
+	register int slave;
+	FILE *fp;
+	char buf1[64], buf2[64];
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+/*MACH_KERNEL*/
+	fprintf(fp, "#ifndef  MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/buf.h>\n");
+	fprintf(fp, "#include <sys/map.h>\n");
+	fprintf(fp, "#include <sys/vm.h>\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "#endif   MACH_KERNEL\n");
+/*MACH_KERNEL*/
+	fprintf(fp, "\n");
+	if (seen_mbii && seen_vme) {
+		printf("can't have both vme and mbii devices\n");
+		exit(1);
+	}
+	if (seen_mbii)
+		fprintf(fp, "#include <mipsmbii/mbiivar.h>\n");
+	if (seen_vme)
+		fprintf(fp, "#include <mipsvme/vmevar.h>\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C	(caddr_t)\n");
+	fprintf(fp, "#define NULL	0\n\n");
+	if (!seen_mbii)
+		goto checkvme;
+	/*
+	 * MBII stuff should go here
+	 */
+
+checkvme:
+	if (!seen_vme)
+		goto closefile;
+	/*
+	 * Now generate interrupt vectors for the vme bus
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_vec != 0) {
+			struct idlst *ip;
+			mp = dp->d_conn;
+			if (mp == 0 || mp == TO_NEXUS || !eq(mp->d_name, "vme"))
+				continue;
+			if (is_declared(dp->d_name))
+				continue;
+			declare(dp->d_name);
+			fprintf(fp, "extern struct vme_driver %sdriver;\n",
+			    dp->d_name);
+			fprintf(fp, "extern ");
+			ip = dp->d_vec;
+			for (;;) {
+				fprintf(fp, "%s()", ip->id);
+				ip = ip->id_next;
+				if (ip == 0)
+					break;
+				fprintf(fp, ", ");
+			}
+			fprintf(fp, ";\n");
+			fprintf(fp, "int (*_%sint%d[])() = { ", dp->d_name,
+			    dp->d_unit);
+			ip = dp->d_vec;
+			for (;;) {
+				fprintf(fp, "%s", ip->id);
+				ip = ip->id_next;
+				if (ip == 0)
+					break;
+				fprintf(fp, ", ");
+			}
+			fprintf(fp, ", 0 } ;\n\n");
+		}
+	}
+	fprintf(fp, "\nstruct vme_ctlr vmminit[] = {\n");
+	fprintf(fp,
+"  /*          driver  ctlr alive        intr          addr    am */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 ||
+		    !eq(mp->d_name, "vme"))
+			continue;
+		if (dp->d_vec == 0) {
+			printf("must specify vector for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_addr == 0) {
+			printf("must specify csr address for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_addrmod == 0) {
+			printf("must specify address modifier for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+			printf("drives need their own entries; dont ");
+			printf("specify drive or slave for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_flags) {
+			printf("controllers (e.g. %s%d) ",
+			    dp->d_name, dp->d_unit);
+			printf("don't have flags, only devices do\n");
+			continue;
+		}
+		fprintf(fp,
+"  {   %14s, %3d,    0, %11s, C 0x%08x, 0x%02x },\n",
+		     concat3(buf1, "&", dp->d_name, "driver"),
+		     dp->d_unit,
+		     concat3(buf2, "_", dp->d_name, "int"),
+		     dp->d_addr,
+		     dp->d_addrmod);
+	}
+	fprintf(fp, "  {             NULL }\n};\n");
+	/*
+	 * vme devices
+	 */
+	fprintf(fp, "\nstruct vme_device vmdinit[] = {\n");
+	fprintf(fp,
+"/*       driver  unit ctlr slave      intr          addr    am dk       flags */\n"
+	);
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 ||
+		    mp == TO_NEXUS || mp->d_type == MASTER)
+			continue;
+		for (np = mp; np && np != TO_NEXUS; np = np->d_conn)
+			if (eq(np->d_name, "vme"))
+				break;
+		if (np != 0 && np != TO_NEXUS && !eq(np->d_name, "vme"))
+			continue;
+		np = 0;
+		if (eq(mp->d_name, "vme")) {
+			if (dp->d_vec == 0) {
+				printf("must specify vector for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr == 0) {
+				printf("must specify csr for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addrmod == 0) {
+				printf(
+			"must specify address modifier for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+				printf("drives/slaves can be specified ");
+				printf("only for controllers, ");
+				printf("not for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = QUES;
+		} else {
+			if ((np = mp->d_conn) == 0) {
+				printf("%s%d isn't connected to anything ",
+				    mp->d_name, mp->d_unit);
+				printf(", so %s%d is unattached\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive == UNKNOWN) {
+				printf("must specify ``drive number'' ");
+				printf("for %s%d\n", dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_slave != UNKNOWN) {
+				printf("slave numbers should be given only ");
+				printf("for massbus tapes, not for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_vec != 0) {
+				printf("interrupt vectors should not be ");
+				printf("given for drive %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr != 0) {
+				printf("csr addresses should be given only ");
+				printf("on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addrmod != 0) {
+				printf("address modifiers should be given only ");
+				printf("on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = dp->d_drive;
+		}
+		fprintf(fp,
+"{%14s, %3d, %3s, %4d,%10s, C 0x%08x, 0x%02x, %1d, 0x%08x },\n",
+		    concat3(buf1, "&",
+		        eq(mp->d_name, "vme") ? dp->d_name : mp->d_name,
+			"driver"),
+		    dp->d_unit,
+		    eq(mp->d_name, "vme") ? "-1" : qu(mp->d_unit),
+		    slave,
+		    intv2(dp),
+		    dp->d_addr,
+		    dp->d_addrmod,
+		    dp->d_dk,
+		    dp->d_flags);
+	}
+	fprintf(fp, "{          NULL }\n};\n");
+closefile:
+	(void) fclose(fp);
+}
+
+char *
+intv2(struct device *dev)
+{
+	static char buf[20];
+
+	if (dev->d_vec == 0) {
+		strcpy(buf, "NULL");
+	} else {
+		(void) sprintf(buf, "_%sint", dev->d_name);
+	}
+	return (buf);
+}
+
+char *
+concat3(char *buf, const char *p1, const char *p2, const char *p3)
+{
+	(void) sprintf(buf, "%s%s%s", p1, p2, p3);
+	return (buf);
+}
+
+#define	MAXDEVS	100
+#define	DEVLEN	10
+char decl_devices[MAXDEVS][DEVLEN];
+
+void
+declare(const char *cp)
+{
+	register int i;
+
+	for (i = 0; i < MAXDEVS; i++)
+		if (decl_devices[i][0] == 0) {
+			strncpy(decl_devices[i], cp, DEVLEN);
+			return;
+		}
+	printf("device table full, fix mkioconf.c\n");
+	exit(1);
+}
+
+int
+is_declared(const char *cp)
+{
+	register int i;
+
+	for (i = 0; i < MAXDEVS; i++) {
+		if (decl_devices[i][0] == 0)
+			return(0);
+		if (strncmp(decl_devices[i], cp, DEVLEN) == 0)
+			return(1);
+	}
+	return(0);
+}
+#endif MACHINE_MIPSY || MACHINE_MIPS
+
+#if	MACHINE_M68K
+char	*m68k_dn(const char *name);
+void	m68k_pseudo_inits(FILE *fp);
+
+void
+m68k_ioconf(void)
+{
+	register struct device *dp, *mp;
+	register int slave;
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/m68k/busvar.h>\n");
+	fprintf(fp, "\n");
+	fprintf(fp, "#define C (void *)\n");
+	fprintf(fp, "\n");
+
+	/*
+	 * Now generate interrupt vectors for the bus
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (mp == TO_NEXUS || mp == 0 || mp->d_conn != TO_NEXUS)
+			continue;
+		fprintf(fp, "extern struct bus_driver %sdriver;\n",
+			    dp->d_name);
+	}
+
+	/*
+	 * Now spew forth the bus_ctrl structures
+	 */
+	fprintf(fp, "\nstruct bus_ctrl bus_cinit[] = {\n");
+	fprintf(fp,
+"  /* driver        ctrl   ipl         address */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_type != CONTROLLER || mp == TO_NEXUS || mp == 0 ||
+		    mp->d_conn != TO_NEXUS || dp->d_unit == QUES)
+			continue;
+		if (dp->d_addr == UNKNOWN) {
+			printf("must specify csr address for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+			printf("drives need their own entries; ");
+			printf("don't specify drive or slave for %s%d\n",
+			    dp->d_name, dp->d_unit);
+			continue;
+		}
+		if (dp->d_flags) {
+			printf("controllers (e.g. %s%d) don't have flags, ",
+			    dp->d_name, dp->d_unit);
+			printf("only devices do\n");
+			continue;
+		}
+		fprintf(fp,
+"  {  %-12s, %5d, %4d,   C 0x%08x },\n",
+		    m68k_dn(dp->d_name), dp->d_unit, dp->d_pri, dp->d_addr);
+	}
+	fprintf(fp, "  0\n};\n");
+
+	/*
+	 * Now we go for the bus_device stuff
+	 */
+	fprintf(fp, "\nstruct bus_device bus_dinit[] = {\n");
+	fprintf(fp,
+"  /* driver      unit ctrl slave ipl  dk       flags       address  name */\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		mp = dp->d_conn;
+		if (dp->d_unit == QUES || dp->d_type != DEVICE || mp == 0 ||
+		    mp == TO_NEXUS || mp->d_type == MASTER)
+			continue;
+		if (mp->d_conn == TO_NEXUS) {
+			if (dp->d_addr == UNKNOWN) {
+				printf("must specify csr for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive != UNKNOWN || dp->d_slave != UNKNOWN) {
+				printf("drives/slaves can be specified only ");
+				printf("for controllers, not for device %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = UNKNOWN;
+		} else {
+			if (mp->d_conn == 0) {
+				printf("%s%d isn't connected to anything, ",
+				    mp->d_name, mp->d_unit);
+				printf("so %s%d is unattached\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_drive == UNKNOWN) {
+				printf("must specify ``drive number'' for %s%d\n",
+				   dp->d_name, dp->d_unit);
+				continue;
+			}
+			/* NOTE THAT ON THE UNIBUS ``drive'' IS STORED IN */
+			/* ``SLAVE'' AND WE DON'T WANT A SLAVE SPECIFIED */
+			if (dp->d_slave != UNKNOWN) {
+				printf("slave numbers should be given only ");
+				printf("for massbus tapes, not for %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_pri != 0) {
+				printf("interrupt priority should not be ");
+				printf("given for drive %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			if (dp->d_addr != 0) {
+				printf("csr addresses should be given only");
+				printf(" on controllers, not on %s%d\n",
+				    dp->d_name, dp->d_unit);
+				continue;
+			}
+			slave = dp->d_drive;
+		}
+		fprintf(fp,
+"  {  %-12s, %3d, %s,  %s,%3d,%3d, %#10x, C 0x%08x, \"%s\" },\n",
+		    m68k_dn(mp->d_conn == TO_NEXUS? dp->d_name : mp->d_name),
+		    dp->d_unit,
+		    mp->d_conn == TO_NEXUS? " -1" : qu(mp->d_unit),
+		    qu(slave),
+		    dp->d_pri, -dp->d_dk, dp->d_flags,
+		    dp->d_addr == UNKNOWN? 0 : dp->d_addr,
+		    dp->d_name);
+	}
+	fprintf(fp, "  0\n};\n");
+	m68k_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+m68k_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+void
+i386_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+char *
+m68k_dn(const char *name)
+{
+	sprintf(errbuf, "&%sdriver", name); return ns(errbuf);
+}
+#endif	MACHINE_M68K
+
+#if	MACHINE_M88K || MACHINE_M98K
+char	*nrw_dn(char *name);
+void	nrw_pseudo_inits(FILE *fp);
+
+void
+nrw_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/nrw/busvar.h>\n");
+	fprintf(fp, "\n");
+	nrw_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+nrw_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+char *
+nrw_dn(char *name)
+{
+	sprintf(errbuf, "&%sdriver,", name);
+	return(errbuf);
+}
+
+void
+m88k_ioconf(void)
+{
+	nrw_ioconf();
+}
+
+void
+m98k_ioconf(void)
+{
+	nrw_ioconf();
+}
+
+void
+m88k_pseudo_inits(FILE *fp)
+{
+	nrw_pseudo_inits(fp);
+}
+
+void
+m98k_pseudo_inits(FILE *fp)
+{
+	nrw_pseudo_inits(fp);
+}
+
+char *
+m88k_dn(char *name)
+{
+	return(nrw_dn(name));
+}
+
+char *
+m98k_dn(char *name)
+{
+	return(nrw_dn(name));
+}
+
+
+#endif	MACHINE_M88K || MACHINE_M98K
+
+#ifdef MACHINE_HPPA
+char	*hppa_dn(char *name);
+void	hppa_pseudo_inits(FILE *fp);
+
+void
+hppa_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/hppa/busvar.h>\n");
+	fprintf(fp, "\n");
+	hppa_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+hppa_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+char *
+hppa_dn(char *name)
+{
+	sprintf(errbuf, "&%sdriver,", name);
+
+	return (errbuf);
+}
+
+#endif MACHINE_HPPA
+
+#ifdef MACHINE_SPARC
+char	*sparc_dn(char *name);
+void	sparc_pseudo_inits(FILE *fp);
+
+void
+sparc_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/busvar.h>\n");
+	fprintf(fp, "\n");
+	sparc_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+sparc_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+char *
+sparc_dn(char *name)
+{
+	sprintf(errbuf, "&%sdriver,", name);
+	return (errbuf);
+}
+
+#endif MACHINE_SPARC
+
+#ifdef MACHINE_PPC
+char	*ppc_dn(char *name);
+void	ppc_pseudo_inits(FILE *fp);
+
+void
+ppc_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/busvar.h>\n");
+	fprintf(fp, "\n");
+	ppc_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+ppc_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+char *
+ppc_dn(name)
+	char *name;
+{
+	sprintf(errbuf, "&%sdriver,", name);
+	return (errbuf);
+}
+
+#endif MACHINE_PPC
+
+#ifdef MACHINE_ARM
+void	arm_pseudo_inits(FILE *fp);
+
+void
+arm_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/busvar.h>\n");
+	fprintf(fp, "\n");
+	arm_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+arm_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+#endif /* MACHINE_ARM */
+
+#ifdef MACHINE_X86_64
+void	x86_64_pseudo_inits(FILE *fp);
+
+void
+x86_64_ioconf(void)
+{
+	FILE *fp;
+
+	unlink(path("ioconf.c"));
+	fp = fopen(path("ioconf.c"), "w");
+	if (fp == 0) {
+		perror(path("ioconf.c"));
+		exit(1);
+	}
+	fprintf(fp, "#include <dev/busvar.h>\n");
+	fprintf(fp, "\n");
+	x86_64_pseudo_inits (fp);
+	(void) fclose(fp);
+}
+
+void
+x86_64_pseudo_inits(FILE *fp)
+{
+	register struct device *dp;
+	int count;
+
+	fprintf(fp, "\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		fprintf(fp, "extern int %s(int);\n", dp->d_init);
+	}
+	fprintf(fp, "\nstruct pseudo_init pseudo_inits[] = {\n");
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (dp->d_type != PSEUDO_DEVICE || dp->d_init == 0)
+			continue;
+		count = dp->d_slave;
+		if (count <= 0)
+			count = 1;
+		fprintf(fp, "\t{%d,\t%s},\n", count, dp->d_init);
+	}
+	fprintf(fp, "\t{0,\t0},\n};\n");
+}
+
+#endif /* MACHINE_X86_64 */
+
+char *
+intv(struct device *dev)
+{
+	static char buf[20];
+
+	if (dev->d_vec == 0) {
+		strcpy(buf, "     0");
+	} else {
+		(void) sprintf(buf, "%sint%d", dev->d_name, dev->d_unit);
+	}
+	return ns(buf);
+}
+
+char *
+qu(int num)
+{
+
+	if (num == QUES) {
+		strcpy(errbuf, "'?'");
+	} else if (num == UNKNOWN) {
+		strcpy(errbuf, " -1");
+	} else {
+		(void) sprintf(errbuf, "%3d", num);
+	}
+	return ns(errbuf);
+}
diff --git a/SETUP/config/mkmakefile.c b/SETUP/config/mkmakefile.c
new file mode 100644
index 000000000..6ac9aa099
--- /dev/null
+++ b/SETUP/config/mkmakefile.c
@@ -0,0 +1,1182 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef lint
+static char sccsid[] __attribute__((used)) = "@(#)mkmakefile.c	5.21 (Berkeley) 6/18/88";
+#endif /* not lint */
+
+/*
+ * Build the makefile for the system, from
+ * the information in the files files and the
+ * additional files for the machine being compiled to.
+ */
+
+#include <stdio.h>
+#include <unistd.h>	/* for unlink */
+#include <ctype.h>
+#include "parser.h"
+#include "config.h"
+
+void	read_files(void);
+void	do_objs(FILE *fp, const char *msg, int ext);
+void	do_ordered(FILE *fp);
+void	do_files(FILE *fp, const char *msg, char ext);
+void	do_machdep(FILE *ofp);
+void	do_build(const char *name, void (*format)(FILE *));
+void	do_rules(FILE *f);
+void	do_load(FILE *f);
+struct file_list *do_systemspec(FILE *f, struct file_list *fl, int first);
+void	do_swapspec(FILE *f, const char *name, char *sysname);
+void	copy_dependencies(FILE *makin, FILE *makout);
+
+void	build_cputypes(FILE *fp);
+void	build_confdep(FILE *fp);
+
+struct file_list *fl_lookup(char *file);
+struct file_list *fltail_lookup(char *file);
+struct file_list *new_fent(void);
+
+void	put_source_file_name(FILE *fp, struct file_list *tp);
+
+
+#define DO_SWAPFILE	0
+
+#define next_word(fp, wd) \
+	{ register const char *word = get_word(fp); \
+	  if (word == (char *)EOF) \
+		return; \
+	  else \
+		wd = word; \
+	}
+
+static	struct file_list *fcur;
+const char *tail(const char *fn);
+char *allCaps(char *str);
+
+/*
+ * Lookup a file, by name.
+ */
+struct file_list *
+fl_lookup(char *file)
+{
+	register struct file_list *fp;
+
+	for (fp = ftab ; fp != 0; fp = fp->f_next) {
+		if (eq(fp->f_fn, file))
+			return (fp);
+	}
+	return (0);
+}
+
+/*
+ * Lookup a file, by final component name.
+ */
+struct file_list *
+fltail_lookup(char *file)
+{
+	register struct file_list *fp;
+
+	for (fp = ftab ; fp != 0; fp = fp->f_next) {
+		if (eq(tail(fp->f_fn), tail(file)))
+			return (fp);
+	}
+	return (0);
+}
+
+/*
+ * Make a new file list entry
+ */
+struct file_list *
+new_fent(void)
+{
+	register struct file_list *fp;
+
+	fp = (struct file_list *) malloc(sizeof *fp);
+	fp->f_needs = 0;
+	fp->f_next = 0;
+	fp->f_flags = 0;
+	fp->f_type = 0;
+	fp->f_extra = (char *) 0;
+	if (fcur == 0)
+		fcur = ftab = fp;
+	else
+		fcur->f_next = fp;
+	fcur = fp;
+	return (fp);
+}
+
+char	*COPTS;
+static	struct users {
+	int	u_default;
+	int	u_min;
+	int	u_max;
+} users[] = {
+	{ 24, 2, 1024 },		/* MACHINE_VAX */
+	{  8, 2, 32 },			/* MACHINE_SUN */
+	{ 16, 4, 32 },			/* MACHINE_ROMP */
+	{  8, 2, 32 },			/* MACHINE_SUN2 */
+	{  8, 2, 32 },			/* MACHINE_SUN3 */
+	{ 24, 8, 1024},			/* MACHINE_MMAX */
+	{ 32, 8, 1024},			/* MACHINE_SQT */
+	{  8, 2, 32 },			/* MACHINE_SUN4 */
+	{  2, 2, 1024 },		/* MACHINE_I386 */
+	{ 32, 8, 1024 },		/* MACHINE_IX */
+	{ 32, 8, 1024 },		/* MACHINE_MIPSY */
+	{ 32, 8, 1024 },		/* MACHINE_MIPS*/
+	{ 32, 8, 1024 },		/* MACHINE_I860*/
+	{  8, 2, 32 },			/* MACHINE_M68K */
+	{  8, 2, 32 },			/* MACHINE_M88K */
+	{  8, 2, 32 },			/* MACHINE_M98K */
+	{  8, 2, 32 },			/* MACHINE_HPPA */
+	{  8, 2, 32 },			/* MACHINE_SPARC */
+	{  8, 2, 32 },			/* MACHINE_PPC */
+	{  8, 2, 32 },			/* MACHINE_ARM */
+	{  8, 2, 32 },			/* MACHINE_X86_64 */
+};
+#define NUSERS	(sizeof (users) / sizeof (users[0]))
+
+const char *
+get_VPATH(void)
+{
+    static char *vpath = NULL;
+
+    if ((vpath == NULL) &&
+	((vpath = getenv("VPATH")) != NULL) &&
+	(*vpath != ':')) {
+	register char *buf = malloc((unsigned)(strlen(vpath) + 2));
+
+	vpath = strcat(strcpy(buf, ":"), vpath);
+    }
+
+    return vpath ? vpath : "";
+}
+
+
+/*
+ * Build the makefile from the skeleton
+ */
+void
+makefile(void)
+{
+	FILE *ifp, *ofp;
+	FILE *dfp;
+	char pname[BUFSIZ];
+	char line[BUFSIZ];
+	struct opt *op;
+	struct users *up;
+
+	read_files();
+	(void) sprintf(line, "%s/Makefile.template", config_directory);
+	ifp = fopenp(VPATH, line, pname, "r");
+	if (ifp == 0) {
+		perror(line);
+		exit(1);
+	}
+	dfp = fopen(path("Makefile"), "r");
+	rename(path("Makefile"), path("Makefile.old"));
+	unlink(path("Makefile.old"));
+	unlink(path("M.d"));
+	if ((ofp = fopen(path("M.d"), "w")) == NULL) {
+		perror(path("M.d"));
+		/* We'll let this error go */
+	}
+	else
+	 	fclose(ofp);
+	ofp = fopen(path("Makefile"), "w");
+	if (ofp == 0) {
+		perror(path("Makefile"));
+		exit(1);
+	}
+	fprintf(ofp, "SOURCE_DIR=%s\n", source_directory);
+
+	if (machine == MACHINE_SUN || machine == MACHINE_SUN2 
+	    || machine == MACHINE_SUN3 || machine == MACHINE_SUN4)
+		fprintf(ofp, "IDENT=-D%s -D%s", machinename, allCaps(ident));
+	else
+		fprintf(ofp, "IDENT=-D%s", allCaps(ident));
+	if (profiling)
+		fprintf(ofp, " -DGPROF");
+	if (cputype == 0) {
+		printf("cpu type must be specified\n");
+		exit(1);
+	}
+	do_build("cputypes.h", build_cputypes);
+
+	for (op = opt; op; op = op->op_next)
+		if (op->op_value)
+			fprintf(ofp, " -D%s=\"%s\"", op->op_name, op->op_value);
+		else
+			fprintf(ofp, " -D%s", op->op_name);
+	fprintf(ofp, "\n");
+	if ((unsigned)machine > NUSERS) {
+		printf("maxusers config info isn't present, using vax\n");
+		up = &users[MACHINE_VAX-1];
+	} else
+		up = &users[machine-1];
+	if (maxusers < up->u_min) {
+		maxusers = up->u_min;
+	} else if (maxusers > up->u_max)
+		printf("warning: maxusers > %d (%d)\n", up->u_max, maxusers);
+	if (maxusers) {
+		do_build("confdep.h", build_confdep);
+	}
+	for (op = mkopt; op; op = op->op_next)
+		if (op->op_value)
+			fprintf(ofp, "%s=%s\n", op->op_name, op->op_value);
+		else
+			fprintf(ofp, "%s\n", op->op_name);
+
+	while (fgets(line, BUFSIZ, ifp) != 0) {
+		if (*line == '%')
+			goto percent;
+		if (profiling && strncmp(line, "COPTS=", 6) == 0) {
+			register char *cp;
+			if (machine != MACHINE_MMAX)
+			    fprintf(ofp,
+				"GPROF.EX=$(SOURCE_DIR)/machdep/%s/gmon.ex\n", machinename);
+			cp = index(line, '\n');
+			if (cp)
+				*cp = 0;
+			cp = line + 6;
+			while (*cp && (*cp == ' ' || *cp == '\t'))
+				cp++;
+			COPTS = malloc((unsigned)(strlen(cp) + 1));
+			if (COPTS == 0) {
+				printf("config: out of memory\n");
+				exit(1);
+			}
+			strcpy(COPTS, cp);
+			if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) {
+				fprintf(ofp, "%s ${CCPROFOPT}\n", line);
+				fprintf(ofp, "PCOPTS=%s\n", cp);
+			} else if (machine == MACHINE_MMAX)
+				fprintf(ofp, "%s -p\n",line);
+			else
+				fprintf(ofp, "%s -pg\n", line);
+			continue;
+		}
+		fprintf(ofp, "%s", line);
+		continue;
+	percent:
+		if (eq(line, "%OBJS\n")) {
+			do_objs(ofp, "OBJS=", -1);
+		} else if (eq(line, "%CFILES\n")) {
+			do_files(ofp, "CFILES=", 'c');
+			do_objs(ofp, "COBJS=", 'c');
+		} else if (eq(line, "%MFILES\n")) {
+			do_files(ofp, "MFILES=", 'm');
+			do_objs(ofp, "MOBJS=", 'm');
+		} else if (eq(line, "%SFILES\n")) {
+			do_files(ofp, "SFILES=", 's');
+			do_objs(ofp, "SOBJS=", 's');
+		} else if (eq(line, "%BFILES\n"))
+			do_files(ofp, "BFILES=", 'b');
+		else if (eq(line, "%MACHDEP\n")) {
+			/*
+			 * Move do_machdep() after the mkopt stuff.
+			 */
+			for (op = mkopt; op; op = op->op_next)
+				fprintf(ofp, "%s=%s\n", op->op_name, op->op_value);
+			do_machdep(ofp);
+		} else if (eq(line, "%ORDERED\n"))
+			do_ordered(ofp);
+		else if (eq(line, "%RULES\n"))
+			do_rules(ofp);
+		else if (eq(line, "%LOAD\n"))
+			do_load(ofp);
+		else
+			fprintf(stderr,
+			    "Unknown %% construct in generic makefile: %s",
+			    line);
+	}
+	if (dfp != NULL)
+	{
+		copy_dependencies(dfp, ofp);
+		(void) fclose(dfp);
+	}
+	(void) fclose(ifp);
+	(void) fclose(ofp);
+}
+
+/*
+ * Read in the information about files used in making the system.
+ * Store it in the ftab linked list.
+ */
+void
+read_files(void)
+{
+	FILE *fp;
+	register struct file_list *tp, *pf;
+	register struct device *dp;
+	register struct opt *op;
+	const char *wd;
+	char *this, *needs;
+	const char *devorprof;
+	int options;
+	int not_option;
+	int ordered;
+	int sedit;				/* SQT */
+	char pname[BUFSIZ];
+	char fname[1024];
+	char *rest = (char *) 0;
+	struct cputype *cp;
+	int nreqs, first = 1, isdup;
+
+	ftab = 0;
+	(void) sprintf(fname, "%s/files", config_directory);
+openit:
+	fp = fopenp(VPATH, fname, pname, "r");
+	if (fp == 0) {
+		perror(fname);
+		exit(1);
+	}
+next:
+	options = 0;
+	rest = (char *) 0;
+	/*
+	 * filename	[ standard | optional ]
+	 *	[ dev* | profiling-routine ] [ device-driver]
+	 */
+	/*
+	 * MACHINE_SQT ONLY:
+	 *
+	 * filename	[ standard | optional ] 
+	 *	[ ordered | sedit ]
+	 *	[ dev* | profiling-routine ] [ device-driver]
+	 */
+	wd = get_word(fp);
+	if (wd == (char *)EOF) {
+		(void) fclose(fp);
+		if (first == 1) {
+			(void) sprintf(fname, "%s/files.%s", config_directory, machinename);
+			first++;
+			goto openit;
+		}
+		if (first == 2) {
+			(void) sprintf(fname, "files.%s", allCaps(ident));
+			first++;
+			fp = fopenp(VPATH, fname, pname, "r");
+			if (fp != 0)
+				goto next;
+		}
+		return;
+	}
+	if (wd == 0)
+		goto next;
+	/*
+	 *  Allow comment lines beginning witha '#' character.
+	 */
+	if (*wd == '#')
+	{
+		while ((wd=get_word(fp)) && wd != (char *)EOF)
+			;
+		goto next;
+	}
+
+	this = ns(wd);
+	next_word(fp, wd);
+	if (wd == 0) {
+		printf("%s: No type for %s.\n",
+		    fname, this);
+		exit(1);
+	}
+	if ((pf = fl_lookup(this)) && (pf->f_type != INVISIBLE || pf->f_flags))
+		isdup = 1;
+	else
+		isdup = 0;
+	tp = 0;
+	if (first == 3 && (tp = fltail_lookup(this)) != 0)
+		printf("%s: Local file %s overrides %s.\n",
+		    fname, this, tp->f_fn);
+	nreqs = 0;
+	devorprof = "";
+	ordered = 0;
+	sedit = 1;				/* SQT: assume sedit for now */
+	needs = 0;
+	if (eq(wd, "standard"))
+		goto checkdev;
+	if (!eq(wd, "optional")) {
+		printf("%s: %s must be optional or standard\n", fname, this);
+		exit(1);
+	}
+	if (strncmp(this, "OPTIONS/", 8) == 0)
+		options++;
+	not_option = 0;
+nextopt:
+	next_word(fp, wd);
+	if (wd == 0)
+		goto doneopt;
+	if (eq(wd, "ordered")) {
+		ordered++;
+		goto nextopt;
+	}
+	if (machine == MACHINE_SQT && eq(wd, "sedit")) {
+		sedit++;
+		goto nextopt;
+	}
+	if (eq(wd, "not")) {
+		not_option = !not_option;
+		goto nextopt;
+	}
+	devorprof = wd;
+	if (eq(wd, "device-driver") || eq(wd, "profiling-routine")) {
+		next_word(fp, wd);
+		goto save;
+	}
+	nreqs++;
+	if (needs == 0 && nreqs == 1)
+		needs = ns(wd);
+	if (isdup)
+		goto invis;
+	if (options)
+	{
+		struct opt *lop = 0;
+		struct device tdev;
+
+		/*
+		 *  Allocate a pseudo-device entry which we will insert into
+		 *  the device list below.  The flags field is set non-zero to
+		 *  indicate an internal entry rather than one generated from
+		 *  the configuration file.  The slave field is set to define
+		 *  the corresponding symbol as 0 should we fail to find the
+		 *  option in the option list.
+		 */
+		init_dev(&tdev);
+		tdev.d_name = ns(wd);
+		tdev.d_type = PSEUDO_DEVICE;
+		tdev.d_flags++;
+		tdev.d_slave = 0;
+
+		for (op=opt; op; lop=op, op=op->op_next)
+		{
+			char *od = allCaps(ns(wd));
+
+			/*
+			 *  Found an option which matches the current device
+			 *  dependency identifier.  Set the slave field to
+			 *  define the option in the header file.
+			 */
+			if (strcmp(op->op_name, od) == 0)
+			{
+				tdev.d_slave = 1;
+				if (lop == 0)
+					opt = op->op_next;
+				else
+					lop->op_next = op->op_next;
+				free(op);
+				op = 0;
+			 }
+			free(od);
+			if (op == 0)
+				break;
+		}
+		newdev(&tdev);
+	}
+ 	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if (eq(dp->d_name, wd) && (dp->d_type != PSEUDO_DEVICE || dp->d_slave)) {
+			if (not_option)
+				goto invis;	/* dont want file if option present */
+			else
+				goto nextopt;
+		}
+	}
+	if (not_option)
+		goto nextopt;		/* want file if option missing */
+
+	for (op = opt; op != 0; op = op->op_next)
+		if (op->op_value == 0 && opteq(op->op_name, wd)) {
+			if (nreqs == 1) {
+				free(needs);
+				needs = 0;
+			}
+			goto nextopt;
+		}
+
+	for (cp = cputype; cp; cp = cp->cpu_next)
+		if (opteq(cp->cpu_name, wd)) {
+			if (nreqs == 1) {
+				free(needs);
+				needs = 0;
+			}
+			goto nextopt;
+		}
+
+invis:
+	while ((wd = get_word(fp)) != 0)
+		;
+	if (tp == 0)
+		tp = new_fent();
+	tp->f_fn = this;
+	tp->f_type = INVISIBLE;
+	tp->f_needs = needs;
+	tp->f_flags = isdup;
+	goto next;
+
+doneopt:
+	if (nreqs == 0) {
+		printf("%s: what is %s optional on?\n",
+		    fname, this);
+		exit(1);
+	}
+
+checkdev:
+	if (wd) {
+		if (*wd == '|')
+			goto getrest;
+		next_word(fp, wd);
+		if (wd) {
+			if (eq(wd, "ordered")) {
+				ordered++;
+				goto checkdev;
+			}
+			if (machine == MACHINE_SQT && eq(wd, "sedit")) {
+				sedit++;
+				goto checkdev;
+			}
+			devorprof = wd;
+			next_word(fp, wd);
+		}
+	}
+
+save:
+getrest:
+	if (wd) {
+		if (*wd == '|') {
+			rest = ns(get_rest(fp));
+		} else {
+			printf("%s: syntax error describing %s\n",
+			       fname, this);
+			exit(1);
+		}
+	}
+	if (eq(devorprof, "profiling-routine") && profiling == 0)
+		goto next;
+	if (tp == 0)
+		tp = new_fent();
+	tp->f_fn = this;
+	tp->f_extra = rest;
+	if (options)
+		tp->f_type = INVISIBLE;
+	else
+	if (eq(devorprof, "device-driver"))
+		tp->f_type = DRIVER;
+	else if (eq(devorprof, "profiling-routine"))
+		tp->f_type = PROFILING;
+	else
+		tp->f_type = NORMAL;
+	tp->f_flags = 0;
+	if (ordered)
+		tp->f_flags |= ORDERED;
+	if (sedit)				/* SQT */
+		tp->f_flags |= SEDIT;
+	tp->f_needs = needs;
+	if (pf && pf->f_type == INVISIBLE)
+		pf->f_flags = 1;		/* mark as duplicate */
+	goto next;
+}
+
+int
+opteq(const char *cp, const char *dp)
+{
+	char c, d;
+
+	for (; ; cp++, dp++) {
+		if (*cp != *dp) {
+			c = isupper(*cp) ? tolower(*cp) : *cp;
+			d = isupper(*dp) ? tolower(*dp) : *dp;
+			if (c != d)
+				return (0);
+		}
+		if (*cp == 0)
+			return (1);
+	}
+}
+
+void
+put_source_file_name(FILE *fp, struct file_list *tp)
+{
+	if ((tp->f_fn[0] == '.') && (tp->f_fn[1] == '/'))
+		fprintf(fp, "%s ", tp->f_fn);
+	 else
+		fprintf(fp, "$(SOURCE_DIR)/%s ", tp->f_fn);
+}
+
+void
+do_objs(FILE *fp, const char *msg, int ext)
+{
+	register struct file_list *tp;
+	register int lpos, len;
+	char *cp;
+	char och;
+	const char *sp;
+#if	DO_SWAPFILE
+	register struct file_list *fl;
+	char swapname[32];
+#endif	DO_SWAPFILE
+
+	fprintf(fp, "%s", msg);
+	lpos = strlen(msg);
+	for (tp = ftab; tp != 0; tp = tp->f_next) {
+		if (tp->f_type == INVISIBLE)
+			continue;
+
+		/*
+		 *	Check for '.o' file in list
+		 */
+		cp = tp->f_fn + (len = strlen(tp->f_fn)) - 1;
+		if ((ext == -1 && tp->f_flags & ORDERED) ||		/* not in objs */
+		    (ext != -1 && *cp != ext))
+			continue;
+		else if (*cp == 'o') {
+			if (len + lpos > 72) {
+				lpos = 8;
+				fprintf(fp, "\\\n\t");
+			}
+			put_source_file_name(fp, tp);
+			fprintf(fp, " ");
+			lpos += len + 1;
+			continue;
+		}
+		sp = tail(tp->f_fn);
+#if	DO_SWAPFILE
+		for (fl = conf_list; fl; fl = fl->f_next) {
+			if (fl->f_type != SWAPSPEC)
+				continue;
+			(void) sprintf(swapname, "swap%s.c", fl->f_fn);
+			if (eq(sp, swapname))
+				goto cont;
+		}
+#endif	DO_SWAPFILE
+		cp = (char *)sp + (len = strlen(sp)) - 1;
+		och = *cp;
+		*cp = 'o';
+		if (len + lpos > 72) {
+			lpos = 8;
+			fprintf(fp, "\\\n\t");
+		}
+		fprintf(fp, "%s ", sp);
+		lpos += len + 1;
+		*cp = och;
+#if	DO_SWAPFILE
+cont:
+		;
+#endif	DO_SWAPFILE
+	}
+	if (lpos != 8)
+		putc('\n', fp);
+}
+
+/* not presently used and probably broken,  use ORDERED instead */
+void
+do_ordered(FILE *fp)
+{
+	register struct file_list *tp;
+	register int lpos, len;
+	char *cp;
+	char och;
+	const char *sp;
+
+	fprintf(fp, "ORDERED=");
+	lpos = 10;
+	for (tp = ftab; tp != 0; tp = tp->f_next) {
+		if ((tp->f_flags & ORDERED) != ORDERED)
+			continue;
+		sp = tail(tp->f_fn);
+		cp = (char *)sp + (len = strlen(sp)) - 1;
+		och = *cp;
+		*cp = 'o';
+		if (len + lpos > 72) {
+			lpos = 8;
+			fprintf(fp, "\\\n\t");
+		}
+		fprintf(fp, "%s ", sp);
+		lpos += len + 1;
+		*cp = och;
+	}
+	if (lpos != 8)
+		putc('\n', fp);
+}
+
+void
+do_files(FILE *fp, const char *msg, char ext)
+{
+	register struct file_list *tp;
+	register int lpos, len=0; /* dvw: init to 0 */
+
+	fprintf(fp, "%s", msg);
+	lpos = 8;
+	for (tp = ftab; tp != 0; tp = tp->f_next) {
+		if (tp->f_type == INVISIBLE)
+			continue;
+		if (tp->f_fn[strlen(tp->f_fn)-1] != ext)
+			continue;
+		/*
+		 * Always generate a newline.
+		 * Our Makefile's aren't readable anyway.
+		 */
+
+		lpos = 8;
+		fprintf(fp, "\\\n\t");
+		put_source_file_name(fp, tp);
+		lpos += len + 1;
+	}
+	if (lpos != 8)
+		putc('\n', fp);
+}
+
+/*
+ *  Include machine dependent makefile in output
+ */
+
+void
+do_machdep(FILE *ofp)
+{
+	FILE *ifp;
+	char pname[BUFSIZ];
+	char line[BUFSIZ];
+
+	(void) sprintf(line, "%s/Makefile.%s", config_directory, machinename);
+	ifp = fopenp(VPATH, line, pname, "r");
+	if (ifp == 0) {
+		perror(line);
+		exit(1);
+	}
+	while (fgets(line, BUFSIZ, ifp) != 0) {
+		if (profiling && (strncmp(line, "LIBS=", 5) == 0)) 
+			fprintf(ofp,"LIBS=${LIBS_P}\n");
+		else
+			fputs(line, ofp);
+	}
+	fclose(ifp);
+}
+
+
+/*
+ *  Format configuration dependent parameter file.
+ */
+
+void
+build_confdep(FILE *fp)
+{
+	fprintf(fp, "#define MAXUSERS %d\n", maxusers);
+}
+
+/*
+ *  Format cpu types file.
+ */
+
+void
+build_cputypes(FILE *fp)
+{
+	struct cputype *cp;
+
+	for (cp = cputype; cp; cp = cp->cpu_next)
+		fprintf(fp, "#define\t%s\t1\n", cp->cpu_name);
+}
+
+
+
+/*
+ *  Build a define parameter file.  Create it first in a temporary location and
+ *  determine if this new contents differs from the old before actually
+ *  replacing the original (so as not to introduce avoidable extraneous
+ *  compilations).
+ */
+
+void
+do_build(const char *name, void (*format)(FILE *))
+{
+	static char temp[]="#config.tmp";
+	FILE *tfp, *ofp;
+	int c;
+
+	unlink(path(temp));
+	tfp = fopen(path(temp), "w+");
+	if (tfp == 0) {
+		perror(path(temp));
+		exit(1);
+	}
+	unlink(path(temp));
+	(*format)(tfp);
+	ofp = fopen(path(name), "r");
+	if (ofp != 0)
+	{
+		fseek(tfp, 0, 0);
+		while ((c = fgetc(tfp)) != EOF)
+			if (fgetc(ofp) != c)
+				goto copy;
+		if (fgetc(ofp) == EOF)
+			goto same;
+		
+	}
+copy:
+	if (ofp)
+		fclose(ofp);
+	unlink(path(name));
+	ofp = fopen(path(name), "w");
+	if (ofp == 0) {
+		perror(path(name));
+		exit(1);
+	}
+	fseek(tfp, 0, 0);
+	while ((c = fgetc(tfp)) != EOF)
+		fputc(c, ofp);
+same:
+	fclose(ofp);
+	fclose(tfp);
+}
+
+const char *
+tail(const char *fn)
+{
+	register const char *cp;
+
+	cp = rindex(fn, '/');
+	if (cp == 0)
+		return (fn);
+	return (cp+1);
+}
+
+/*
+ * Create the makerules for each file
+ * which is part of the system.
+ * Devices are processed with the special c2 option -i
+ * which avoids any problem areas with i/o addressing
+ * (e.g. for the VAX); assembler files are processed by as.
+ */
+void
+do_rules(FILE *f)
+{
+	char *cp;
+	char *np, och;
+	const char *tp;
+	register struct file_list *ftp;
+	const char *extras = ""; /* dvw: init to "" */
+	char *source_dir;
+	char och_upper;
+	const char *nl = "";
+
+	for (ftp = ftab; ftp != 0; ftp = ftp->f_next) {
+		if (ftp->f_type == INVISIBLE)
+			continue;
+		cp = (np = ftp->f_fn) + strlen(ftp->f_fn) - 1;
+		och = *cp;
+		/*
+			*	Don't compile '.o' files
+			*/
+		if (och == 'o')
+			continue;
+		/*
+			*	Determine where sources should come from
+			*/
+		if ((np[0] == '.') && (np[1] == '/')) {
+			source_dir = "";
+			np += 2;
+		} else
+			source_dir = "$(SOURCE_DIR)/";
+		*cp = '\0';
+		tp = tail(np);	/* dvw: init tp before 'if' */
+		if (och == 'o') {
+			fprintf(f, "%so: %so\n\t${O_RULE_1A}%s%.*s${O_RULE_1B}\n\n",
+					tp, np, source_dir, (int)(tp-np), np);
+			continue;
+		}
+		fprintf(f, "%so: %s%s%c\n", tp, source_dir, np, och);
+		if (och == 's') {
+			switch (machine) {
+			case MACHINE_MIPSY:
+			case MACHINE_MIPS:
+				switch (ftp->f_type) {
+				case NORMAL:
+				case DRIVER:
+					fprintf(f, "\t@${RM} %so\n", tp);
+					fprintf(f, "\t${CC} ${CCASFLAGS}%s %s%s%ss\n\n",
+						(ftp->f_extra?ftp->f_extra:""), extras, source_dir, np);
+					break;
+	
+				case PROFILING:
+					if (!profiling)
+						continue;
+					fprintf(f, "\t@${RM} %so\n", tp);
+					fprintf(f, "\t${CC} ${CCPASFLAGS}%s %s%s%ss\n\n",
+						(ftp->f_extra?ftp->f_extra:""), extras, source_dir, np);
+					break;
+	
+				default:
+					printf("Don't know rules for %s.s\n", np);
+					break;
+				}
+				break;
+			default:
+			fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n",
+					source_dir, (int)(tp-np), np, nl);
+			fprintf(f, "\t${S_RULE_2}%s\n", nl);
+			fprintf(f, "\t${S_RULE_3}\n\n");
+			}
+			continue;
+		}
+		if (och == 'b') {
+			fprintf(f, "\t${B_RULE_1A}%s%.*s${B_RULE_1B}\n\n", 
+				source_dir, (int)(tp-np), np);
+			continue;
+		}
+		extras = "";
+		switch (ftp->f_type) {
+	
+		case NORMAL:
+			switch (machine) {
+	
+			case MACHINE_MIPSY:
+			case MACHINE_MIPS:
+				fprintf(f, "\t@${RM} %so\n", tp);
+				fprintf(f, "\t${CC} ${CCNFLAGS}%s %s%s%sc\n\n",
+					(ftp->f_extra?ftp->f_extra:""), extras, source_dir, np);
+				continue;
+	#if	0
+			case MACHINE_SQT:
+				if (ftp->f_flags & SEDIT) {
+					fprintf(f, "\t${CC} -SO ${COPTS} %s%s%sc | \\\n", extras, source_dir, np);
+					fprintf(f, "\t${SEDCMD} | ${C2} | ${AS} ${CAFLAGS} -o %so\n\n", tp);
+				} else {
+					fprintf(f, "\t${CC} -c -O ${COPTS} %s%s%sc\n\n",
+						source_dir, extras, np);
+				}
+				break;
+	#endif	0
+			default:
+				goto common;
+			}
+			break;
+	
+		case DRIVER:
+			switch (machine) {
+	
+			case MACHINE_MIPSY:
+			case MACHINE_MIPS:
+				fprintf(f, "\t@${RM} %so\n", tp);
+				fprintf(f, "\t${CC} ${CCDFLAGS}%s %s%s%sc\n\n",
+					(ftp->f_extra?ftp->f_extra:""), extras, source_dir, np);
+				continue;
+			default:
+				extras = "_D";
+				goto common;
+			}
+			break;
+	
+		case PROFILING:
+			if (!profiling)
+				continue;
+			if (COPTS == 0) {
+				fprintf(stderr,
+					"config: COPTS undefined in generic makefile");
+				COPTS = "";
+			}
+			switch (machine) {
+				case MACHINE_MIPSY:
+				case MACHINE_MIPS:
+					fprintf(f, "\t@${RM} %so\n", tp);
+					fprintf(f, "\t${CC} ${CCPFLAGS}%s %s../%sc\n\n",
+						(ftp->f_extra?ftp->f_extra:""), extras, np);
+					continue;
+				case MACHINE_VAX:
+				case MACHINE_ROMP:
+				case MACHINE_SQT:
+				case MACHINE_MMAX:
+				case MACHINE_SUN3:
+				case MACHINE_SUN4:
+				case MACHINE_I386:
+				case MACHINE_I860:
+				case MACHINE_HPPA:
+				case MACHINE_SPARC:
+				case MACHINE_PPC:
+				case MACHINE_ARM:
+				case MACHINE_X86_64:
+					extras = "_P";
+					goto common;
+				default:
+				fprintf(stderr,
+					"config: don't know how to profile kernel on this cpu\n");
+				break;
+			}
+	
+		common:
+			och_upper = och + 'A' - 'a';
+			fprintf(f, "\t${%c_RULE_1A%s}", och_upper, extras);
+			if (ftp->f_extra)
+				fprintf(f, "%s", ftp->f_extra);
+			fprintf(f, "%s%.*s${%c_RULE_1B%s}%s\n",
+					source_dir, (int)(tp-np), np, och_upper, extras, nl);
+			fprintf(f, "\t${%c_RULE_2%s}%s\n", och_upper, extras, nl);
+			fprintf(f, "\t${%c_RULE_3%s}%s\n", och_upper, extras, nl);
+			fprintf(f, "\t${%c_RULE_4%s}\n\n", och_upper, extras);
+			break;
+	
+		default:
+			printf("Don't know rules for %s\n", np);
+			break;
+		}
+		*cp = och;
+	}
+}
+
+/*
+ * Create the load strings
+ */
+void
+do_load(FILE *f)
+{
+	register struct file_list *fl;
+	int first = 1;
+
+	fl = conf_list;
+	while (fl) {
+		if (fl->f_type != SYSTEMSPEC) {
+			fl = fl->f_next;
+			continue;
+		}
+		fl = do_systemspec(f, fl, first);
+		if (first)
+			first = 0;
+	}
+	fprintf(f, "LOAD =");
+	for (fl = conf_list; fl != 0; fl = fl->f_next)
+		if (fl->f_type == SYSTEMSPEC)
+			fprintf(f, " %s", fl->f_needs);
+#ifdef	multimax
+	fprintf(f, "\n\nall .ORDER: includelinks ${LOAD}\n");
+#else	multimax
+	fprintf(f, "\n\nall: includelinks ${LOAD}\n");
+#endif	multimax
+	fprintf(f, "\n");
+}
+
+struct file_list *
+do_systemspec(FILE *f, struct file_list *fl, __unused int first)
+{
+	/*
+	 * Variable for kernel name.
+	 */
+	fprintf(f, "KERNEL_NAME=%s\n", fl->f_needs);
+
+	fprintf(f, "%s .ORDER: %s.sys ${SYSDEPS}\n",
+		fl->f_needs, fl->f_needs);
+	fprintf(f, "\t${SYS_RULE_1}\n");
+	fprintf(f, "\t${SYS_RULE_2}\n");
+	fprintf(f, "\t${SYS_RULE_3}\n");
+	fprintf(f, "\t${SYS_RULE_4}\n\n");
+	do_swapspec(f, fl->f_fn, fl->f_needs);
+	for (fl = fl->f_next; fl != NULL && fl->f_type == SWAPSPEC; fl = fl->f_next)
+		continue;
+	return (fl);
+}
+
+void
+do_swapspec(__unused FILE *f, __unused const char *name, __unused char *sysname)
+{
+
+#if	DO_SWAPFILE
+	char *gdir = eq(name, "generic")?"$(MACHINEDIR)/":"";
+
+	fprintf(f, "%s.sys:${P} ${PRELDDEPS} ${LDOBJS} ${LDDEPS}\n\n", sysname);
+	fprintf(f, "%s.swap: swap%s.o\n", sysname, name);
+	fprintf(f, "\t@rm -f $@\n");
+	fprintf(f, "\t@cp swap%s.o $@\n\n", name);
+	fprintf(f, "swap%s.o: %sswap%s.c ${SWAPDEPS}\n", name, gdir, name);
+	if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) {
+		fprintf(f, "\t@${RM} swap%s.o\n", name);
+		fprintf(f, "\t${CC} ${CCNFLAGS} %sswap%s.c\n\n", gdir, name);
+	} else {
+		fprintf(f, "\t${C_RULE_1A}%s${C_RULE_1B}\n", gdir);
+		fprintf(f, "\t${C_RULE_2}\n");
+		fprintf(f, "\t${C_RULE_3}\n");
+		fprintf(f, "\t${C_RULE_4}\n\n");
+	}
+#endif	DO_SWAPFILE
+}
+
+char *
+allCaps(str)
+	register char *str;
+{
+	register char *cp = str;
+
+	while (*str) {
+		if (islower(*str))
+			*str = toupper(*str);
+		str++;
+	}
+	return (cp);
+}
+
+#define OLDSALUTATION "# DO NOT DELETE THIS LINE"
+
+#define LINESIZE 1024
+static char makbuf[LINESIZE];		/* one line buffer for makefile */
+
+void
+copy_dependencies(FILE *makin, FILE *makout)
+{
+	register int oldlen = (sizeof OLDSALUTATION - 1);
+
+	while (fgets(makbuf, LINESIZE, makin) != NULL) {
+		if (! strncmp(makbuf, OLDSALUTATION, oldlen))
+			break;
+	}
+	while (fgets(makbuf, LINESIZE, makin) != NULL) {
+		if (oldlen != 0)
+		{
+			if (makbuf[0] == '\n')
+				continue;
+			else
+				oldlen = 0;
+		}
+		fputs(makbuf, makout);
+	}
+}
diff --git a/SETUP/config/mkswapconf.c b/SETUP/config/mkswapconf.c
new file mode 100644
index 000000000..fdd14d722
--- /dev/null
+++ b/SETUP/config/mkswapconf.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+ 
+/*
+ * Copyright (c) 1980 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef lint
+static char sccsid[] __attribute__((used)) = "@(#)mkswapconf.c	5.6 (Berkeley) 6/18/88";
+#endif /* not lint */
+
+/*
+ * Build a swap configuration file.
+ */
+#include "config.h"
+
+#include <stdio.h>
+#include <unistd.h>	/* for unlink */
+#include <ctype.h>
+
+struct file_list *do_swap(struct file_list *fl);
+void initdevtable(void);
+
+void
+swapconf(void)
+{
+	register struct file_list *fl;
+
+	fl = conf_list;
+	while (fl) {
+		if (fl->f_type != SYSTEMSPEC) {
+			fl = fl->f_next;
+			continue;
+		}
+		fl = do_swap(fl);
+	}
+}
+
+struct file_list *
+do_swap(struct file_list *fl)
+{
+	FILE *fp;
+	char  swapname[80];
+	register struct file_list *swap;
+	dev_t dev;
+
+	if (eq(fl->f_fn, "generic")) {
+		fl = fl->f_next;
+		return (fl->f_next);
+	}
+	if (machine == MACHINE_MMAX) {
+		printf("Error: Multimax must specify swap generic only.\n");
+		exit(1);
+	}
+	(void) sprintf(swapname, "swap%s.c", fl->f_fn);
+	fp = fopen(path(swapname), "w");
+	if (fp == 0) {
+		perror(path(swapname));
+		exit(1);
+	}
+	fprintf(fp, "#include <sys/param.h>\n");
+	fprintf(fp, "#include <sys/conf.h>\n");
+	fprintf(fp, "\n");
+	/*
+	 * If there aren't any swap devices
+	 * specified, just return, the error
+	 * has already been noted.
+	 */
+	swap = fl->f_next;
+	if (swap == 0 || swap->f_type != SWAPSPEC) {
+		(void) unlink(path(swapname));
+		fclose(fp);
+		return (swap);
+	}
+	fprintf(fp, "dev_t\trootdev = makedev(%d, %d);\n",
+		major(fl->f_rootdev), minor(fl->f_rootdev));
+	fprintf(fp, "dev_t\targdev  = makedev(%d, %d);\n",
+		major(fl->f_argdev), minor(fl->f_argdev));
+	fprintf(fp, "dev_t\tdumpdev = makedev(%d, %d);\n",
+		major(fl->f_dumpdev), minor(fl->f_dumpdev));
+	fprintf(fp, "\n");
+	fprintf(fp, "struct\tswdevt swdevt[] = {\n");
+	do {
+		dev = swap->f_swapdev;
+		fprintf(fp, "\t{ makedev(%d, %d),\t0,\t%d },\t/* %s */\n",
+		    major(dev), minor(dev), swap->f_swapsize, swap->f_fn);
+		swap = swap->f_next;
+	} while (swap && swap->f_type == SWAPSPEC);
+	fprintf(fp, "\t{ 0, 0, 0 }\n");
+	fprintf(fp, "};\n");
+	if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS) {
+		fprintf(fp, "\nsetconf()\n");
+		fprintf(fp, "{\n");
+		fprintf(fp, "\t/* resolve reference for non-generic kernels */\n");
+		fprintf(fp, "}\n");
+	}
+	fclose(fp);
+	return (swap);
+}
+
+static	int devtablenotread = 1;
+static	struct devdescription {
+	char	*dev_name;
+	int	dev_major;
+	struct	devdescription *dev_next;
+} *devtable;
+
+/*
+ * Given a device name specification figure out:
+ *	major device number
+ *	partition
+ *	device name
+ *	unit number
+ * This is a hack, but the system still thinks in
+ * terms of major/minor instead of string names.
+ */
+dev_t
+nametodev(char *name, int defunit, char defpartition)
+{
+	char *cp, partition;
+	int unit;
+	register struct devdescription *dp;
+
+	cp = name;
+	if (cp == 0) {
+		fprintf(stderr, "config: internal error, nametodev\n");
+		exit(1);
+	}
+	while (*cp && !isdigit(*cp))
+		cp++;
+	unit = *cp ? atoi(cp) : defunit;
+	if (unit < 0 || unit > 31) {
+		fprintf(stderr,
+"config: %s: invalid device specification, unit out of range\n", name);
+		unit = defunit;			/* carry on more checking */
+	}
+	if (*cp) {
+		*cp++ = '\0';
+		while (*cp && isdigit(*cp))
+			cp++;
+	}
+	partition = *cp ? *cp : defpartition;
+	if (partition < 'a' || partition > 'h') {
+		fprintf(stderr,
+"config: %c: invalid device specification, bad partition\n", *cp);
+		partition = defpartition;	/* carry on */
+	}
+	if (devtablenotread)
+		initdevtable();
+	for (dp = devtable; dp->dev_next; dp = dp->dev_next)
+		if (eq(name, dp->dev_name))
+			break;
+	if (dp == 0) {
+		fprintf(stderr, "config: %s: unknown device\n", name);
+		return (NODEV);
+	}
+	return (makedev(dp->dev_major, (unit << DEV_SHIFT) + (partition - 'a')));
+}
+
+char *
+devtoname(dev_t dev)
+{
+	char buf[80]; 
+	register struct devdescription *dp;
+
+	if (devtablenotread)
+		initdevtable();
+	for (dp = devtable; dp->dev_next; dp = dp->dev_next)
+		if (major(dev) == dp->dev_major)
+			break;
+	if (dp == 0)
+		dp = devtable;
+	(void) sprintf(buf, "%s%d%c", dp->dev_name,
+		minor(dev) >> DEV_SHIFT, (minor(dev) & DEV_MASK) + 'a');
+	return (ns(buf));
+}
+
+void
+initdevtable(void)
+{
+	char buf[BUFSIZ];
+	char line[BUFSIZ];
+	int maj;
+	register struct devdescription **dp = &devtable;
+	FILE *fp;
+
+	(void) sprintf(buf, "%s/devices.%s", config_directory, machinename);
+	fp = fopenp(VPATH, buf, line, "r");
+	if (fp == NULL) {
+		fprintf(stderr, "config: can't open %s\n", buf);
+		exit(1);
+	}
+	while (fgets(line, BUFSIZ, fp) != 0) {
+		if (*line == '#' || *line == '\n')
+			continue;
+		if (sscanf(line, "%s\t%d\n", buf, &maj) != 2)
+			break;
+		*dp = (struct devdescription *)malloc(sizeof (**dp));
+		(*dp)->dev_name = ns(buf);
+		(*dp)->dev_major = maj;
+		dp = &(*dp)->dev_next;
+	}
+	*dp = 0;
+	fclose(fp);
+	devtablenotread = 0;
+}
diff --git a/SETUP/config/openp.c b/SETUP/config/openp.c
new file mode 100644
index 000000000..c05cd9daf
--- /dev/null
+++ b/SETUP/config/openp.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*  openp, fopenp  --  search pathlist and open file
+ *
+ *  Usage:
+ *	i = openp (path,file,complete,flags,mode)
+ *	f = fopenp (path,file,complete,type)
+ *	int i,flags,mode;
+ *	FILE *f;
+ *	char *path,*file,*complete,*type;
+ *
+ *  Openp searches for "file" in the pathlist "path";
+ *  when the file is found and can be opened by open()
+ *  with the specified "flags" and "mode", then the full filename
+ *  is copied into "complete" and openp returns the file
+ *  descriptor.  If no such file is found, openp returns -1.
+ *  Fopenp performs the same function, using fopen() instead
+ *  of open() and type instead of flags/mode; it returns 0 if no
+ *  file is found.
+ *
+ *  HISTORY
+ * 30-Apr-85  Steven Shafer (sas) at Carnegie-Mellon University
+ *	Adapted for 4.2 BSD UNIX.  Added new parameter to openp.c;
+ *	changed names of flags, mode, and type parameters to reflect
+ *	current manual entries for open and fopen.
+ *
+ * 20-Nov-79  Steven Shafer (sas) at Carnegie-Mellon University
+ *	Created for VAX.
+ *
+ */
+
+#include <stdio.h>
+#include <fcntl.h>	/* open */
+#include "config.h"
+
+
+int openp(const char *fpath, char *file, char *complete, int flags, int mode);
+
+static int flgs,mod,value;
+static const char *ftyp;
+static FILE *fvalue;
+
+static int
+func(char *fnam)
+{
+	value = open (fnam,flgs,mod);
+	return (value < 0);
+}
+
+static int
+ffunc(char *fnam)
+{
+	fvalue = fopen (fnam,ftyp);
+	return (fvalue == 0);
+}
+
+int
+openp(const char *fpath, char *file, char *complete, int flags, int mode)
+{
+	flgs = flags;
+	mod = mode;
+	if (searchp(fpath,file,complete,func) < 0)  return (-1);
+	return (value);
+}
+
+FILE *
+fopenp(const char *fpath, char *file, char *complete, const char *ftype)
+{
+	ftyp = ftype;
+	if (searchp(fpath,file,complete,ffunc) < 0)  return (0);
+	return (fvalue);
+}
diff --git a/SETUP/config/parser.y b/SETUP/config/parser.y
new file mode 100644
index 000000000..4f77b93e4
--- /dev/null
+++ b/SETUP/config/parser.y
@@ -0,0 +1,1278 @@
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * Copyright (c) 1988 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *	@(#)config.y	5.8 (Berkeley) 6/18/88
+ */
+
+%union {
+	char	*str;
+	int	val;
+	struct	file_list *file;
+	struct	idlst *lst;
+}
+
+%token	ADDRMOD
+%token	AND
+%token	ANY
+%token	ARGS
+%token	AT
+%token	BIN
+%token	BUILDDIR
+%token	COMMA
+%token	CONFIG
+%token	CONFIGDIR
+%token	CONTROLLER
+%token	CPU
+%token	CSR
+%token	DEVICE
+%token	DISK
+%token	DRIVE
+%token	DST
+%token	DUMPS
+%token	EQUALS
+%token	FLAGS
+%token	HZ
+%token	IDENT
+%token	INIT
+%token	MACHINE
+%token	MAJOR
+%token	MASTER
+%token	MAXUSERS
+%token	MAXDSIZ
+%token	MBA
+%token	MBII
+%token	MINOR
+%token	MINUS
+%token	NEXUS
+%token	OBJECTDIR
+%token	ON
+%token	OPTIONS
+%token	MAKEOPTIONS
+%token	PRIORITY
+%token	PROFILE
+%token	PSEUDO_DEVICE
+%token	ROOT
+%token	SEMICOLON
+%token	SIZE
+%token	SLAVE
+%token	SOURCEDIR
+%token	SWAP
+%token	TIMEZONE
+%token	TRACE
+%token	UBA
+%token	VECTOR
+%token	VME
+%token  VME16D16
+%token  VME24D16
+%token  VME32D16
+%token  VME16D32
+%token  VME24D32
+%token  VME32D32
+
+/* following 3 are unique to CMU */
+%token	LUN
+%token	SLOT
+%token	TAPE
+
+%token	<str>	ID
+%token	<val>	NUMBER
+%token	<val>	FPNUMBER
+
+%type	<str>	Save_id
+%type	<str>	Opt_value
+%type	<str>	Dev
+%type	<lst>	Id_list
+%type	<val>	optional_size
+%type	<str>	device_name
+%type	<val>	major_minor
+%type	<val>	arg_device_spec
+%type	<val>	root_device_spec
+%type	<val>	dump_device_spec
+%type	<file>	swap_device_spec
+%type	<val>	Value
+
+%{
+
+#include "config.h"
+#include <ctype.h>
+#include <stdio.h>
+
+struct	device cur;
+struct	device *curp = 0;
+char	*temp_id;
+char	*val_id;
+/* char	*malloc(); */
+
+int yylex(void);
+
+int finddev(dev_t dev);
+int alreadychecked(dev_t dev, dev_t list[], dev_t *last);
+void deverror(const char *systemname, const char *devtype);
+void mkconf(char *sysname);
+struct file_list *newswap(void);
+void mkswap(struct file_list *syslist, struct file_list *fl, int size);
+struct device *huhcon(const char *dev);
+void check_nexus(struct device *dev, int num);
+void check_slot(struct device *dev, int num);
+void checksystemspec(struct file_list *fl);
+void verifysystemspecs(void);
+dev_t *verifyswap(struct file_list *fl, dev_t checked[], dev_t *pchecked);
+struct device *dconnect(const char *dev, int num);
+
+%}
+%%
+Configuration:
+	Many_specs
+		{ verifysystemspecs(); }
+		;
+
+Many_specs:
+	Many_specs Spec
+		|
+	/* lambda */
+		;
+
+Spec:
+	Device_spec SEMICOLON
+	      { newdev(&cur); } |
+	Config_spec SEMICOLON
+		|
+	TRACE SEMICOLON
+	      { do_trace = !do_trace; } |
+	SEMICOLON
+		|
+	error SEMICOLON
+		;
+
+Config_spec:
+	MACHINE Save_id
+	    {
+		if (!strcmp($2, "vax")) {
+			machine = MACHINE_VAX;
+			machinename = "vax";
+		} else if (!strcmp($2, "sun")) {
+			/* default to Sun 3 */
+			machine = MACHINE_SUN3;
+			machinename = "sun3";
+		} else if (!strcmp($2, "sun2")) {
+			machine = MACHINE_SUN2;
+			machinename = "sun2";
+		} else if (!strcmp($2, "sun3")) {
+			machine = MACHINE_SUN3;
+			machinename = "sun3";
+		} else if (!strcmp($2, "sun4")) {
+			machine = MACHINE_SUN4;
+			machinename = "sun4";
+		} else if (!strcmp($2, "romp")) {
+			machine = MACHINE_ROMP;
+			machinename = "romp";
+		} else if (!strcmp($2, "ca")) {
+			machine = MACHINE_ROMP;
+			machinename = "ca";
+		} else if (!strcmp($2, "mmax")) {
+			machine = MACHINE_MMAX;
+			machinename = "mmax";
+		} else if (!strcmp($2, "sqt")) {
+			machine = MACHINE_SQT;
+			machinename = "sqt";
+		} else if (!strcmp($2, "i")) {
+			machine = MACHINE_I386;
+			machinename = "i386";
+		} else if (!strcmp($2, "i386")) {
+			machine = MACHINE_I386;
+			machinename = "i386";
+		} else if (!strcmp($2, "ix")) {
+			machine = MACHINE_IX;
+			machinename = "ix";
+		} else if (!strcmp($2, "mipsy")) {
+			machine = MACHINE_MIPSY;
+			machinename = "mipsy";
+		} else if (!strcmp($2, "mips")) {
+			machine = MACHINE_MIPS;
+			machinename = "mips";
+		} else if (!strcmp($2, "i860")) {
+			machine = MACHINE_I860;
+			machinename = "i860";
+		} else if (!strcmp($2, "m68k")) {
+			machine = MACHINE_M68K;
+			machinename = "m68k";
+		} else if (!strcmp($2, "m88k")) {
+			machine = MACHINE_M88K;
+			machinename = "m88k";
+		} else if (!strcmp($2, "m98k")) {
+			machine = MACHINE_M98K;
+			machinename = "m98k";
+		} else if (!strcmp($2, "hppa")) {
+			machine = MACHINE_HPPA;
+			machinename = "hppa";
+		} else if (!strcmp($2, "sparc")) {
+			machine = MACHINE_SPARC;
+			machinename = "sparc";
+		} else if (!strcmp($2, "ppc")) {
+			machine = MACHINE_PPC;
+			machinename = "ppc";
+		} else if (!strcmp($2, "arm")) {
+			machine = MACHINE_ARM;
+			machinename = "arm";
+		} else if (!strcmp($2, "x86_64")) {
+			machine = MACHINE_X86_64;
+			machinename = "x86_64";
+		} else
+			yyerror("Unknown machine type");
+	      } |
+	CPU Save_id
+	      {
+		struct cputype *cp =
+		    (struct cputype *)malloc(sizeof (struct cputype));
+		cp->cpu_name = ns($2);
+		cp->cpu_next = cputype;
+		cputype = cp;
+		free(temp_id);
+	      } |
+	OPTIONS Opt_list
+		|
+	MAKEOPTIONS Mkopt_list
+		|
+	IDENT ID
+	      { ident = ns($2); }
+		|
+	System_spec
+		|
+	MAXUSERS NUMBER
+	      { maxusers = $2; }
+		|
+	BUILDDIR Save_id
+		{ build_directory = ns($2); }
+		|
+	CONFIGDIR Save_id
+		{ config_directory = ns($2); }
+		|
+	OBJECTDIR Save_id
+		{ object_directory = ns($2); }
+		|
+	SOURCEDIR Save_id
+		{ source_directory = ns($2); }
+		|
+	PROFILE
+		{ profiling++; }
+		;
+
+System_spec:
+	  System_id
+		{ checksystemspec(*confp); }
+	| System_id System_parameter_list
+		{ checksystemspec(*confp); }
+	;
+
+System_id:
+	  CONFIG Save_id
+		{ mkconf($2); }
+	;
+
+System_parameter_list:
+	  System_parameter_list System_parameter
+	| System_parameter
+	;
+
+System_parameter:
+	  swap_spec
+	| root_spec
+	| dump_spec
+	| arg_spec
+	;
+
+swap_spec:
+	  SWAP optional_on swap_device_list
+	;
+
+swap_device_list:
+	  swap_device_list AND swap_device
+	| swap_device
+	;
+
+swap_device:
+	  swap_device_spec optional_size
+	      { mkswap(*confp, $1, $2); }
+	;
+
+swap_device_spec:
+	  device_name
+		{
+			struct file_list *fl = newswap();
+
+			if (eq($1, "generic"))
+				fl->f_fn = $1;
+			else {
+				fl->f_swapdev = nametodev($1, 0, 'b');
+				fl->f_fn = devtoname(fl->f_swapdev);
+			}
+			$$ = fl;
+		}
+	| major_minor
+		{
+			struct file_list *fl = newswap();
+
+			fl->f_swapdev = $1;
+			fl->f_fn = devtoname($1);
+			$$ = fl;
+		}
+	;
+
+root_spec:
+	  ROOT optional_on root_device_spec
+		{
+			struct file_list *fl = *confp;
+
+			if (fl && fl->f_rootdev != NODEV)
+				yyerror("extraneous root device specification");
+			else
+				fl->f_rootdev = $3;
+		}
+	;
+
+root_device_spec:
+	  device_name
+		{ $$ = nametodev($1, 0, 'a'); }
+	| major_minor
+	;
+
+dump_spec:
+	  DUMPS optional_on dump_device_spec
+		{
+			struct file_list *fl = *confp;
+
+			if (fl && fl->f_dumpdev != NODEV)
+				yyerror("extraneous dump device specification");
+			else
+				fl->f_dumpdev = $3;
+		}
+
+	;
+
+dump_device_spec:
+	  device_name
+		{ $$ = nametodev($1, 0, 'b'); }
+	| major_minor
+	;
+
+arg_spec:
+	  ARGS optional_on arg_device_spec
+		{
+			struct file_list *fl = *confp;
+
+			if (fl && fl->f_argdev != NODEV)
+				yyerror("extraneous arg device specification");
+			else
+				fl->f_argdev = $3;
+		}
+	;
+
+arg_device_spec:
+	  device_name
+		{ $$ = nametodev($1, 0, 'b'); }
+	| major_minor
+	;
+
+major_minor:
+	  MAJOR NUMBER MINOR NUMBER
+		{ $$ = makedev($2, $4); }
+	;
+
+optional_on:
+	  ON
+	| /* empty */
+	;
+
+optional_size:
+	  SIZE NUMBER
+	      { $$ = $2; }
+	| /* empty */
+	      { $$ = 0; }
+	;
+
+device_name:
+	  Save_id
+		{ $$ = $1; }
+	| Save_id NUMBER
+		{
+			char buf[80];
+
+			(void) sprintf(buf, "%s%d", $1, $2);
+			$$ = ns(buf); free($1);
+		}
+	| Save_id NUMBER ID
+		{
+			char buf[80];
+
+			(void) sprintf(buf, "%s%d%s", $1, $2, $3);
+			$$ = ns(buf); free($1);
+		}
+	;
+
+Opt_list:
+	Opt_list COMMA Option
+		|
+	Option
+		;
+
+Option:
+	Save_id
+	      {
+		struct opt *op = (struct opt *)malloc(sizeof (struct opt));
+		op->op_name = ns($1);
+		op->op_next = (struct opt *) 0;
+		op->op_value = 0;
+		if (opt == (struct opt *) 0)
+			opt = op;
+		else
+			opt_tail->op_next = op;
+		opt_tail = op;
+		free(temp_id);
+	      } |
+	Save_id EQUALS Opt_value
+	      {
+		struct opt *op = (struct opt *)malloc(sizeof (struct opt));
+		op->op_name = ns($1);
+		op->op_next = (struct opt *) 0;
+		op->op_value = ns($3);
+		if (opt == (struct opt *) 0)
+			opt = op;
+		else
+			opt_tail->op_next = op;
+		opt_tail = op;
+		free(temp_id);
+		if (val_id)
+			free(val_id);
+	      } ;
+
+Opt_value:
+	ID
+	      { $$ = val_id = ns($1); } |
+	NUMBER
+	      { char nb[16];
+	          (void) sprintf(nb, "%u", $1);
+	      	  $$ = val_id = ns(nb);
+	      } |
+	/* lambda from MIPS -- WHY */
+	      { $$ = val_id = ns(""); }
+	      ;
+
+Save_id:
+	ID
+	      { $$ = temp_id = ns($1); }
+	;
+
+Mkopt_list:
+	Mkopt_list COMMA Mkoption
+		|
+	Mkoption
+		;
+
+Mkoption:
+	Save_id
+	      {
+		struct opt *op = (struct opt *)malloc(sizeof (struct opt));
+		op->op_name = ns($1);
+		op->op_next =  (struct opt *) 0;
+		op->op_value = 0;
+		mkopt = op;
+		free(temp_id);
+	      } |
+	Save_id EQUALS Opt_value
+	      {
+		struct opt *op = (struct opt *)malloc(sizeof (struct opt));
+		op->op_name = ns($1);
+		op->op_next =  (struct opt *) 0;
+		op->op_value = ns($3);
+		if (mkopt == (struct opt *) 0)
+			mkopt = op;
+		else
+			mkopt_tail->op_next = op;
+		mkopt_tail = op;
+		free(temp_id);
+		if (val_id)
+			free(val_id);
+	      } ;
+
+Dev:
+	UBA
+	      { $$ = ns("uba"); } |
+	MBA
+	      { $$ = ns("mba"); } |
+        VME16D16
+	      {
+		if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3
+		    && machine != MACHINE_SUN4)
+			yyerror("wrong machine type for vme16d16");
+		$$ = ns("vme16d16");
+		} |
+	VME24D16
+	      {
+		if (machine != MACHINE_SUN2 && machine != MACHINE_SUN3
+		    && machine != MACHINE_SUN4)
+			yyerror("wrong machine type for vme24d16");
+			$$ = ns("vme24d16");
+		} |
+	VME32D16
+	      {
+		if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4)
+
+                        yyerror("wrong machine type for vme32d16");
+                $$ = ns("vme32d16");
+                } |
+        VME16D32
+              {
+                if (machine != MACHINE_SUN3  && machine != MACHINE_SUN4)
+                        yyerror("wrong machine type for vme16d32");
+                $$ = ns("vme16d32");
+                } |
+        VME24D32
+              {
+		if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4)
+			yyerror("wrong machine type for vme24d32");
+		$$ = ns("vme24d32");
+		} |
+        VME32D32
+	      {
+		if (machine != MACHINE_SUN3 && machine != MACHINE_SUN4)
+			yyerror("wrong machine type for vme32d32");
+		$$ = ns("vme32d32");
+		} |
+	VME
+	      {
+		if (machine != MACHINE_MIPSY && machine != MACHINE_MIPS)
+			yyerror("wrong machine type for vme");
+			$$ = ns("vme");
+		} |
+	MBII
+	      {
+		if (machine != MACHINE_MIPSY && machine != MACHINE_MIPS)
+			yyerror("wrong machine type for mbii");
+			$$ = ns("mbii");
+		} |
+	ID
+	      { $$ = ns($1); }
+	;
+
+Device_spec:
+	DEVICE Dev_name Dev_info Int_spec
+	      { cur.d_type = DEVICE; } |
+	MASTER Dev_name Dev_info Int_spec
+	      { cur.d_type = MASTER; } |
+	DISK Dev_name Dev_info Int_spec
+	      { cur.d_dk = 1; cur.d_type = DEVICE; } |
+/* TAPE rule is unique to CMU */
+	TAPE Dev_name Dev_info Int_spec
+	      { cur.d_type = DEVICE; } |
+	CONTROLLER Dev_name Dev_info Int_spec
+	      { cur.d_type = CONTROLLER; } |
+	PSEUDO_DEVICE Init_dev Dev
+	      {
+		cur.d_name = $3;
+		cur.d_type = PSEUDO_DEVICE;
+		} |
+	PSEUDO_DEVICE Init_dev Dev NUMBER
+	      {
+		cur.d_name = $3;
+		cur.d_type = PSEUDO_DEVICE;
+		cur.d_slave = $4;
+		} |
+	PSEUDO_DEVICE Init_dev Dev INIT ID
+	      {
+		cur.d_name = $3;
+		cur.d_type = PSEUDO_DEVICE;
+		cur.d_init = ns($5);
+		} |
+	PSEUDO_DEVICE Init_dev Dev NUMBER INIT ID
+	      {
+		cur.d_name = $3;
+		cur.d_type = PSEUDO_DEVICE;
+		cur.d_slave = $4;
+		cur.d_init = ns($6);
+		};
+
+Dev_name:
+	Init_dev Dev NUMBER
+	      {
+		cur.d_name = $2;
+		if (eq($2, "mba"))
+			seen_mba = 1;
+		else if (eq($2, "uba"))
+			seen_uba = 1;
+		else if (eq($2, "mbii"))
+			seen_mbii = 1;
+		else if (eq($2, "vme"))
+			seen_vme = 1;
+		cur.d_unit = $3;
+		};
+
+Init_dev:
+	/* lambda */
+	      { init_dev(&cur); };
+
+Dev_info:
+	Con_info Info_list
+		|
+	/* lambda */
+		;
+
+Con_info:
+	AT Dev NUMBER
+	      {
+		if (eq(cur.d_name, "mba") || eq(cur.d_name, "uba")
+		    || eq(cur.d_name, "mbii") || eq(cur.d_name, "vme")) {
+			(void) sprintf(errbuf,
+			    "%s must be connected to a nexus", cur.d_name);
+			yyerror(errbuf);
+		}
+		cur.d_conn = dconnect($2, $3);
+		if (machine == MACHINE_SQT)
+			dev_param(&cur, "index", cur.d_unit);
+		} |
+/* AT SLOT NUMBER rule is unique to CMU */
+	AT SLOT NUMBER
+	      { 
+		check_slot(&cur, $3);
+		cur.d_addr = $3;
+		cur.d_conn = TO_SLOT; 
+		 } |
+	AT NEXUS NUMBER
+	      { check_nexus(&cur, $3); cur.d_conn = TO_NEXUS; };
+
+Info_list:
+	Info_list Info
+		|
+	/* lambda */
+		;
+
+Info:
+	CSR NUMBER
+	      {
+		cur.d_addr = $2;
+		if (machine == MACHINE_SQT) {
+			dev_param(&cur, "csr", $2);
+		}
+		} |
+	DRIVE NUMBER
+	      {
+			cur.d_drive = $2;
+			if (machine == MACHINE_SQT) {
+				dev_param(&cur, "drive", $2);
+			}
+		} |
+	SLAVE NUMBER
+	      {
+		if (cur.d_conn != 0 && cur.d_conn != TO_NEXUS &&
+		    cur.d_conn->d_type == MASTER)
+			cur.d_slave = $2;
+		else
+			yyerror("can't specify slave--not to master");
+		} |
+/* MIPS */
+	ADDRMOD NUMBER
+	      { cur.d_addrmod = $2; } |
+/* LUN NUMBER rule is unique to CMU */
+	LUN NUMBER
+	      {
+		if ((cur.d_conn != 0) && (cur.d_conn != TO_SLOT) &&
+			(cur.d_conn->d_type == CONTROLLER)) {
+			cur.d_addr = $2; 
+		}
+		else {
+			yyerror("device requires controller card");
+		    }
+		} |
+	FLAGS NUMBER
+	      {
+		cur.d_flags = $2;
+		if (machine == MACHINE_SQT) {
+			dev_param(&cur, "flags", $2);
+		}
+	      } |
+	BIN NUMBER
+	      { 
+		 if (machine != MACHINE_SQT)
+			yyerror("bin specification only valid on Sequent Balance");
+		 if ($2 < 1 || $2 > 7)  
+			yyerror("bogus bin number");
+		 else {
+			cur.d_bin = $2;
+			dev_param(&cur, "bin", $2);
+		}
+	       } |
+	Dev Value
+	      {
+		if (machine != MACHINE_SQT)
+			yyerror("bad device spec");
+		dev_param(&cur, $1, $2);
+		};
+
+Value:
+	NUMBER
+	      |
+	MINUS NUMBER
+	      { $$ = -($2); }
+	;
+
+Int_spec:
+        Vec_spec
+	      { cur.d_pri = 0; } |
+	PRIORITY NUMBER
+	      { cur.d_pri = $2; } |
+        PRIORITY NUMBER Vec_spec
+	      { cur.d_pri = $2; } |
+        Vec_spec PRIORITY NUMBER
+	      { cur.d_pri = $3; } |
+	/* lambda */
+		;
+
+Vec_spec:
+        VECTOR Id_list
+	      { cur.d_vec = $2; };
+
+
+Id_list:
+	Save_id
+	      {
+		struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst));
+		a->id = $1; a->id_next = 0; $$ = a;
+		a->id_vec = 0;
+		} |
+	Save_id Id_list
+		{
+		struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst));
+	        a->id = $1; a->id_next = $2; $$ = a;
+		a->id_vec = 0;
+		} |
+        Save_id NUMBER
+	      {
+		struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst));
+		a->id_next = 0; a->id = $1; $$ = a;
+		a->id_vec = $2;
+		} |
+        Save_id NUMBER Id_list
+	      {
+		struct idlst *a = (struct idlst *)malloc(sizeof(struct idlst));
+		a->id_next = $3; a->id = $1; $$ = a;
+		a->id_vec = $2;
+		};
+
+%%
+
+void
+yyerror(const char *s)
+{
+	fprintf(stderr, "config: line %d: %s\n", yyline, s);
+}
+
+/*
+ * return the passed string in a new space
+ */
+char *
+ns(const char *str)
+{
+	register char *cp;
+
+	cp = malloc((unsigned)(strlen(str)+1));
+	(void) strcpy(cp, str);
+	return (cp);
+}
+
+/*
+ * add a device to the list of devices
+ */
+void
+newdev(struct device *dp)
+{
+	register struct device *np;
+
+	np = (struct device *) malloc(sizeof *np);
+	*np = *dp;
+	if (curp == 0)
+		dtab = np;
+	else
+		curp->d_next = np;
+	curp = np;
+	curp->d_next = 0;
+}
+
+/*
+ * note that a configuration should be made
+ */
+void
+mkconf(char *sysname)
+{
+	register struct file_list *fl, **flp;
+
+	fl = (struct file_list *) malloc(sizeof *fl);
+	fl->f_type = SYSTEMSPEC;
+	fl->f_needs = sysname;
+	fl->f_rootdev = NODEV;
+	fl->f_argdev = NODEV;
+	fl->f_dumpdev = NODEV;
+	fl->f_fn = 0;
+	fl->f_next = 0;
+	for (flp = confp; *flp; flp = &(*flp)->f_next)
+		;
+	*flp = fl;
+	confp = flp;
+}
+
+struct file_list *
+newswap(void)
+{
+	struct file_list *fl = (struct file_list *)malloc(sizeof (*fl));
+
+	fl->f_type = SWAPSPEC;
+	fl->f_next = 0;
+	fl->f_swapdev = NODEV;
+	fl->f_swapsize = 0;
+	fl->f_needs = 0;
+	fl->f_fn = 0;
+	return (fl);
+}
+
+/*
+ * Add a swap device to the system's configuration
+ */
+void
+mkswap(struct file_list *syslist, struct file_list *fl, int size)
+{
+	register struct file_list **flp;
+
+	if (syslist == 0 || syslist->f_type != SYSTEMSPEC) {
+		yyerror("\"swap\" spec precedes \"config\" specification");
+		return;
+	}
+	if (size < 0) {
+		yyerror("illegal swap partition size");
+		return;
+	}
+	/*
+	 * Append swap description to the end of the list.
+	 */
+	flp = &syslist->f_next;
+	for (; *flp && (*flp)->f_type == SWAPSPEC; flp = &(*flp)->f_next)
+		;
+	fl->f_next = *flp;
+	*flp = fl;
+	fl->f_swapsize = size;
+	/*
+	 * If first swap device for this system,
+	 * set up f_fn field to insure swap
+	 * files are created with unique names.
+	 */
+	if (syslist->f_fn)
+		return;
+	if (eq(fl->f_fn, "generic"))
+		syslist->f_fn = ns(fl->f_fn);
+	else
+		syslist->f_fn = ns(syslist->f_needs);
+}
+
+/*
+ * find the pointer to connect to the given device and number.
+ * returns 0 if no such device and prints an error message
+ */
+struct device *
+dconnect(const char *dev, int num)
+{
+	register struct device *dp;
+
+	if (num == QUES)
+		return (huhcon(dev));
+	for (dp = dtab; dp != 0; dp = dp->d_next) {
+		if ((num != dp->d_unit) || !eq(dev, dp->d_name))
+			continue;
+		if (dp->d_type != CONTROLLER && dp->d_type != MASTER) {
+			(void) sprintf(errbuf,
+			    "%s connected to non-controller", dev);
+			yyerror(errbuf);
+			return (0);
+		}
+		return (dp);
+	}
+	(void) sprintf(errbuf, "%s %d not defined", dev, num);
+	yyerror(errbuf);
+	return (0);
+}
+
+/*
+ * connect to an unspecific thing
+ */
+struct device *
+huhcon(const char *dev)
+{
+	register struct device *dp, *dcp;
+	struct device rdev;	/* only used if dp is NULL */
+	int oldtype;
+
+	memset(&rdev, 0, sizeof rdev);
+
+	/*
+	 * First make certain that there are some of these to wildcard on
+	 */
+	for (dp = dtab; dp != 0; dp = dp->d_next)
+		if (eq(dp->d_name, dev))
+			break;
+	if (dp == 0) {
+		(void) sprintf(errbuf, "no %s's to wildcard", dev);
+		yyerror(errbuf);
+		return (0);
+	}
+	oldtype = dp->d_type;
+	dcp = dp->d_conn;
+	/*
+	 * Now see if there is already a wildcard entry for this device
+	 * (e.g. Search for a "uba ?")
+	 */
+	for (; dp != 0; dp = dp->d_next)
+		if (eq(dev, dp->d_name) && dp->d_unit == -1)
+			break;
+	/*
+	 * If there isn't, make one because everything needs to be connected
+	 * to something.
+	 */
+	if (dp == 0) {
+		dp = &rdev;
+		init_dev(dp);
+		dp->d_unit = QUES;
+		dp->d_name = ns(dev);
+		dp->d_type = oldtype;
+		newdev(dp);
+		dp = curp;
+		/*
+		 * Connect it to the same thing that other similar things are
+		 * connected to, but make sure it is a wildcard unit
+		 * (e.g. up connected to sc ?, here we make connect sc? to a
+		 * uba?).  If other things like this are on the NEXUS or
+		 * if they aren't connected to anything, then make the same
+		 * connection, else call ourself to connect to another
+		 * unspecific device.
+		 */
+		if (dcp == TO_NEXUS || dcp == 0)
+			dp->d_conn = dcp;
+		else
+			dp->d_conn = dconnect(dcp->d_name, QUES);
+	}
+	return (dp);
+}
+
+void
+init_dev(struct device *dp)
+{
+
+	dp->d_name = "OHNO!!!";
+	dp->d_type = DEVICE;
+	dp->d_conn = 0;
+	dp->d_vec = 0;
+	dp->d_addr = dp->d_pri = dp->d_flags = dp->d_dk = 0;
+	dp->d_slave = dp->d_drive = dp->d_unit = UNKNOWN;
+	if (machine == MACHINE_SUN2 || machine == MACHINE_SUN3
+	    || machine == MACHINE_SUN4){
+		dp->d_addr = UNKNOWN;
+		dp->d_mach = dp->d_bus = 0;
+	}
+	if (machine == MACHINE_MIPSY || machine == MACHINE_MIPS){
+		dp->d_addrmod = 0;
+	}
+	dp->d_init = 0;
+}
+
+/*
+ * make certain that this is a reasonable type of thing to connect to a nexus
+ */
+void
+check_nexus(struct device *dev, int num)
+{
+
+	switch (machine) {
+
+	case MACHINE_VAX:
+		if (!eq(dev->d_name, "uba") && !eq(dev->d_name, "mba"))
+			yyerror("only uba's and mba's should be connected to the nexus");
+		if (num != QUES)
+			yyerror("can't give specific nexus numbers");
+		break;
+
+	case MACHINE_SUN:
+		if (!eq(dev->d_name, "mb"))
+			yyerror("only mb's should be connected to the nexus");
+		break;
+
+	case MACHINE_ROMP:
+		if (!eq(dev->d_name, "iocc"))
+			yyerror("only iocc's should be connected to the nexus");
+		break;
+        case MACHINE_SUN2:
+		if (!eq(dev->d_name, "virtual") &&
+		    !eq(dev->d_name, "obmem") &&
+		    !eq(dev->d_name, "obio") &&
+		    !eq(dev->d_name, "mbmem") &&
+		    !eq(dev->d_name, "mbio") &&
+		    !eq(dev->d_name, "vme16d16") &&
+		    !eq(dev->d_name, "vme24d16")) {
+			(void)sprintf(errbuf,
+			    "unknown bus type `%s' for nexus connection on %s",
+			    dev->d_name, machinename);
+			yyerror(errbuf);
+		}
+
+	case MACHINE_MMAX:
+		yyerror("don't grok 'nexus' on mmax -- try 'slot'.");
+		break;
+        case MACHINE_SUN3:
+        case MACHINE_SUN4:
+		if (!eq(dev->d_name, "virtual") &&
+		    !eq(dev->d_name, "obmem") &&
+		    !eq(dev->d_name, "obio") &&
+		    !eq(dev->d_name, "mbmem") &&
+		    !eq(dev->d_name, "mbio") &&
+		    !eq(dev->d_name, "vme16d16") &&
+		    !eq(dev->d_name, "vme24d16") &&
+                    !eq(dev->d_name, "vme32d16") &&
+		    !eq(dev->d_name, "vme16d32") &&
+		    !eq(dev->d_name, "vme24d32") &&
+		    !eq(dev->d_name, "vme32d32")) {
+			(void)sprintf(errbuf,
+			    "unknown bus type `%s' for nexus connection on %s",
+			    dev->d_name, machinename);
+			yyerror(errbuf);
+		}
+		break;
+	case MACHINE_MIPSY:
+	case MACHINE_MIPS:
+		if (!eq(dev->d_name, "vme") && !eq(dev->d_name, "mbii"))
+			yyerror("only vme's and mbii's should be connected to the nexus");
+		if (num != QUES)
+			yyerror("can't give specific nexus numbers");
+		break;
+	}
+}
+
+/*
+ * make certain that this is a reasonable type of thing to connect to a slot
+ */
+
+void
+check_slot(struct device *dev, int num)
+{
+
+	switch (machine) {
+
+	case MACHINE_MMAX:
+		if (!eq(dev->d_name, "emc"))
+			yyerror("only emc's plug into backplane slots.");
+		if (num == QUES)
+			yyerror("specific slot numbers must be given");
+		break;
+
+	case MACHINE_SQT:
+		if (!eq(dev->d_name, "mbad") &&
+		    !eq(dev->d_name, "zdc") &&
+		    !eq(dev->d_name, "sec")) {
+			(void)sprintf(errbuf,
+			    "unknown bus type `%s' for slot on %s",
+			    dev->d_name, machinename);
+			yyerror(errbuf);
+		}
+		break;
+
+	default:
+		yyerror("don't grok 'slot' for this machine -- try 'nexus'.");
+		break;
+	}
+}
+
+/*
+ * Check system specification and apply defaulting
+ * rules on root, argument, dump, and swap devices.
+ */
+void
+checksystemspec(struct file_list *fl)
+{
+	char buf[BUFSIZ];
+	register struct file_list *swap;
+	int generic;
+
+	if (fl == 0 || fl->f_type != SYSTEMSPEC) {
+		yyerror("internal error, bad system specification");
+		exit(1);
+	}
+	swap = fl->f_next;
+	generic = swap && swap->f_type == SWAPSPEC && eq(swap->f_fn, "generic");
+	if (fl->f_rootdev == NODEV && !generic) {
+		yyerror("no root device specified");
+		exit(1);
+	}
+	/*
+	 * Default swap area to be in 'b' partition of root's
+	 * device.  If root specified to be other than on 'a'
+	 * partition, give warning, something probably amiss.
+	 */
+	if (swap == 0 || swap->f_type != SWAPSPEC) {
+		dev_t dev;
+
+		swap = newswap();
+		dev = fl->f_rootdev;
+		if (minor(dev) & DEV_MASK) {
+			(void) sprintf(buf,
+"Warning, swap defaulted to 'b' partition with root on '%c' partition",
+				(minor(dev) & DEV_MASK) + 'a');
+			yyerror(buf);
+		}
+		swap->f_swapdev =
+		   makedev(major(dev), (minor(dev) &~ DEV_MASK) | ('b' - 'a'));
+		swap->f_fn = devtoname(swap->f_swapdev);
+		mkswap(fl, swap, 0);
+	}
+	/*
+	 * Make sure a generic swap isn't specified, along with
+	 * other stuff (user must really be confused).
+	 */
+	if (generic) {
+		if (fl->f_rootdev != NODEV)
+			yyerror("root device specified with generic swap");
+		if (fl->f_argdev != NODEV)
+			yyerror("arg device specified with generic swap");
+		if (fl->f_dumpdev != NODEV)
+			yyerror("dump device specified with generic swap");
+		return;
+	}
+	/*
+	 * Default argument device and check for oddball arrangements.
+	 */
+	if (fl->f_argdev == NODEV)
+		fl->f_argdev = swap->f_swapdev;
+	if (fl->f_argdev != swap->f_swapdev)
+		yyerror("Warning, arg device different than primary swap");
+	/*
+	 * Default dump device and warn if place is not a
+	 * swap area or the argument device partition.
+	 */
+	if (fl->f_dumpdev == NODEV)
+		fl->f_dumpdev = swap->f_swapdev;
+	if (fl->f_dumpdev != swap->f_swapdev && fl->f_dumpdev != fl->f_argdev) {
+		struct file_list *p = swap->f_next;
+
+		for (; p && p->f_type == SWAPSPEC; p = p->f_next)
+			if (fl->f_dumpdev == p->f_swapdev)
+				return;
+		(void) sprintf(buf, "Warning, orphaned dump device, %s",
+			"do you know what you're doing");
+		yyerror(buf);
+	}
+}
+
+/*
+ * Verify all devices specified in the system specification
+ * are present in the device specifications.
+ */
+void
+verifysystemspecs(void)
+{
+	register struct file_list *fl;
+	dev_t checked[50];
+	register dev_t *pchecked = checked;
+
+	for (fl = conf_list; fl; fl = fl->f_next) {
+		if (fl->f_type != SYSTEMSPEC)
+			continue;
+		if (!finddev(fl->f_rootdev))
+			deverror(fl->f_needs, "root");
+		*pchecked++ = fl->f_rootdev;
+		pchecked = verifyswap(fl->f_next, checked, pchecked);
+#define	samedev(dev1, dev2) \
+	((minor(dev1) &~ DEV_MASK) != (minor(dev2) &~ DEV_MASK))
+		if (!alreadychecked(fl->f_dumpdev, checked, pchecked)) {
+			if (!finddev(fl->f_dumpdev))
+				deverror(fl->f_needs, "dump");
+			*pchecked++ = fl->f_dumpdev;
+		}
+		if (!alreadychecked(fl->f_argdev, checked, pchecked)) {
+			if (!finddev(fl->f_argdev))
+				deverror(fl->f_needs, "arg");
+			*pchecked++ = fl->f_argdev;
+		}
+	}
+}
+
+/*
+ * Do as above, but for swap devices.
+ */
+dev_t *
+verifyswap(struct file_list *fl, dev_t checked[], dev_t *pchecked)
+{
+
+	for (;fl && fl->f_type == SWAPSPEC; fl = fl->f_next) {
+		if (eq(fl->f_fn, "generic"))
+			continue;
+		if (alreadychecked(fl->f_swapdev, checked, pchecked))
+			continue;
+		if (!finddev(fl->f_swapdev))
+			fprintf(stderr,
+			   "config: swap device %s not configured", fl->f_fn);
+		*pchecked++ = fl->f_swapdev;
+	}
+	return (pchecked);
+}
+
+/*
+ * Has a device already been checked
+ * for it's existence in the configuration?
+ */
+int
+alreadychecked(dev_t dev, dev_t list[], dev_t *last)
+{
+	register dev_t *p;
+
+	for (p = list; p < last; p++)
+		if (samedev(*p, dev))
+			return (1);
+	return (0);
+}
+
+void
+deverror(const char *systemname, const char *devtype)
+{
+
+	fprintf(stderr, "config: %s: %s device not configured\n",
+		systemname, devtype);
+}
+
+/*
+ * Look for the device in the list of
+ * configured hardware devices.  Must
+ * take into account stuff wildcarded.
+ */
+/*ARGSUSED*/
+int
+finddev(__unused dev_t dev)
+{
+
+	/* punt on this right now */
+	return (1);
+}
diff --git a/SETUP/config/searchp.c b/SETUP/config/searchp.c
new file mode 100644
index 000000000..b79ca6a44
--- /dev/null
+++ b/SETUP/config/searchp.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
+ * Reserved.  This file contains Original Code and/or Modifications of
+ * Original Code as defined in and that are subject to the Apple Public
+ * Source License Version 1.0 (the 'License').  You may not use this file
+ * except in compliance with the License.  Please obtain a copy of the
+ * License at http://www.apple.com/publicsource and read it before using
+ * this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License."
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*  searchp  --  search through pathlist for file
+ *
+ *  Usage:  p = searchp (path,file,fullname,func);
+ *	char *p, *path, *file, *fullname;
+ *	int (*func)();
+ *
+ *  Searchp will parse "path", a list of pathnames separated
+ *  by colons, prepending each pathname to "file".  The resulting
+ *  filename will be passed to "func", a function provided by the
+ *  user.  This function must return zero if the search is
+ *  successful (i.e. ended), and non-zero if the search must
+ *  continue.  If the function returns zero (success), then
+ *  searching stops, the full filename is placed into "fullname",
+ *  and searchp returns 0.  If the pathnames are all unsuccessfully
+ *  examined, then searchp returns -1.
+ *  If "file" begins with a slash, it is assumed to be an
+ *  absolute pathname and the "path" list is not used.  Note
+ *  that this rule is used by Bell's cc also; whereas Bell's
+ *  sh uses the rule that any filename which CONTAINS a slash
+ *  is assumed to be absolute.  The execlp and execvp procedures
+ *  also use this latter rule.  In my opinion, this is bogosity.
+ *
+ *  HISTORY
+ * 01-Apr-86  Rudy Nedved (ern) at Carnegie-Mellon University
+ *	4.1BSD system ignores trailing slashes. 4.2BSD does not. 
+ *	Therefore don't add a seperating slash if there is a null
+ *	filename.
+ *
+ * 23-Oct-82  Steven Shafer (sas) at Carnegie-Mellon University
+ *	Fixed two bugs: (1) calling function as "func" instead of
+ *	"(*func)", (2) omitting trailing null name implied by trailing
+ *	colon in path.  Latter bug fixed by introducing "lastchar" and
+ *	changing final loop test to look for "*lastchar" instead of
+ *	"*nextpath".
+ *
+ * 20-Nov-79  Steven Shafer (sas) at Carnegie-Mellon University
+ *	Created for VAX.  If you're thinking of using this, you probably
+ *	should look at openp() and fopenp() (or the "want..." routines)
+ *	instead.
+ *
+ */
+#include "config.h"
+
+int
+searchp(const char *spath, char *file, char *fullname, int (*func)(char *))
+{
+	const char *nextpath, *nextchar, *lastchar;
+	char *fname;
+	int failure;
+
+	nextpath = ((*file == '/') ? "" : spath);
+	do {
+		fname = fullname;
+		nextchar = nextpath;
+		while (*nextchar && (*nextchar != ':'))
+			*fname++ = *nextchar++;
+		if (nextchar != nextpath && *file) *fname++ = '/';
+		lastchar = nextchar;
+		nextpath = ((*nextchar) ? nextchar + 1 : nextchar);
+		nextchar = file;	/* append file */
+		while (*nextchar)  *fname++ = *nextchar++;
+		*fname = '\0';
+		failure = (*func) (fullname);
+	} 
+	while (failure && (*lastchar));
+	return (failure ? -1 : 0);
+}
diff --git a/SETUP/kextsymboltool/Makefile b/SETUP/kextsymboltool/Makefile
new file mode 100644
index 000000000..137f253d2
--- /dev/null
+++ b/SETUP/kextsymboltool/Makefile
@@ -0,0 +1,31 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = kextsymboltool.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT) -lstdc++
+
+kextsymboltool: $(OBJS)
+	$(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+	@echo HOST_LD $@
+	$(_v)$(HOST_CODESIGN) -s - $@
+	@echo HOST_CODESIGN $@
+
+.c.o:
+	$(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+	@echo HOST_CC $@
+
+do_build_setup: kextsymboltool
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/kextsymboltool/kextsymboltool.c b/SETUP/kextsymboltool/kextsymboltool.c
new file mode 100644
index 000000000..ee46713e4
--- /dev/null
+++ b/SETUP/kextsymboltool/kextsymboltool.c
@@ -0,0 +1,912 @@
+/*
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <libc.h>
+#include <errno.h>
+#include <ctype.h>
+
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+
+#include <mach-o/arch.h>
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/swap.h>
+
+#include <uuid/uuid.h>
+
+#include <IOKit/IOTypes.h>
+
+#pragma mark Typedefs, Enums, Constants
+/*********************************************************************
+* Typedefs, Enums, Constants
+*********************************************************************/
+typedef enum {
+    kErrorNone = 0,
+    kError,
+    kErrorFileAccess,
+    kErrorDiskFull,
+    kErrorDuplicate
+} ToolError;
+
+#pragma mark Function Protos
+/*********************************************************************
+* Function Protos
+*********************************************************************/
+__private_extern__ ToolError
+readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize);
+
+__private_extern__ ToolError
+writeFile(int fd, const void * data, size_t length);
+
+extern char* __cxa_demangle (const char* mangled_name,
+				   char* buf,
+				   size_t* n,
+				   int* status);
+
+#pragma mark Functions
+/*********************************************************************
+*********************************************************************/
+__private_extern__ ToolError
+writeFile(int fd, const void * data, size_t length)
+{
+    ToolError err;
+
+    if (length != (size_t)write(fd, data, length))
+        err = kErrorDiskFull;
+    else
+        err = kErrorNone;
+
+    if (kErrorNone != err)
+        perror("couldn't write output");
+
+    return( err );
+}
+
+/*********************************************************************
+*********************************************************************/
+__private_extern__ ToolError
+readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize)
+{
+    ToolError err = kErrorFileAccess;
+    int fd;
+    struct stat stat_buf;
+
+    *objAddr = 0;
+    *objSize = 0;
+
+    do
+    {
+        if((fd = open(path, O_RDONLY)) == -1)
+	    continue;
+
+	if(fstat(fd, &stat_buf) == -1)
+	    continue;
+
+        if (0 == (stat_buf.st_mode & S_IFREG)) 
+            continue;
+
+       /* Don't try to map an empty file, it fails now due to conformance
+        * stuff (PR 4611502).
+        */
+        if (0 == stat_buf.st_size) {
+            err = kErrorNone;
+            continue;
+        }
+
+	*objSize = stat_buf.st_size;
+
+        *objAddr = (vm_offset_t)mmap(NULL /* address */, *objSize,
+            PROT_READ|PROT_WRITE, MAP_FILE|MAP_PRIVATE /* flags */,
+            fd, 0 /* offset */);
+
+	if ((void *)*objAddr == MAP_FAILED) {
+            *objAddr = 0;
+            *objSize = 0;
+	    continue;
+	}
+
+	err = kErrorNone;
+
+    } while( false );
+
+    if (-1 != fd)
+    {
+        close(fd);
+    }
+    if (kErrorNone != err)
+    {
+        fprintf(stderr, "couldn't read %s: %s\n", path, strerror(errno));
+    }
+
+    return( err );
+}
+
+
+enum { kExported = 0x00000001, kObsolete = 0x00000002 };
+
+struct symbol {
+    char * name;
+    unsigned int name_len;
+    char * indirect;
+    unsigned int indirect_len;
+    unsigned int flags;
+    struct symbol * list;
+    unsigned int list_count;
+};
+
+static bool issymchar( char c )
+{
+    return ((c > ' ') && (c <= '~') && (c != ':') && (c != '#'));
+}
+
+static bool iswhitespace( char c )
+{
+    return ((c == ' ') || (c == '\t'));
+}
+
+/*
+ * Function for qsort for comparing symbol list names.
+ */
+static int
+qsort_cmp(const void * _left, const void * _right)
+{
+    struct symbol * left  = (struct symbol *) _left;
+    struct symbol * right = (struct symbol *) _right;
+
+    return (strcmp(left->name, right->name));
+}
+
+/*
+ * Function for bsearch for finding a symbol name.
+ */
+
+static int
+bsearch_cmp( const void * _key, const void * _cmp)
+{
+    char * key = (char *)_key;
+    struct symbol * cmp = (struct symbol *) _cmp;
+
+    return(strcmp(key, cmp->name));
+}
+
+struct bsearch_key
+{
+    char * name;
+    unsigned int name_len;
+};
+
+static int
+bsearch_cmp_prefix( const void * _key, const void * _cmp)
+{
+    struct bsearch_key * key = (struct bsearch_key *)_key;
+    struct symbol *      cmp = (struct symbol *) _cmp;
+
+    return(strncmp(key->name, cmp->name, key->name_len));
+}
+
+static uint32_t
+count_symbols(char * file, vm_size_t file_size)
+{
+    uint32_t nsyms = 0;
+    char *   scan;
+    char *   eol;
+    char *   next;
+
+    for (scan = file; true; scan = next) {
+
+        eol = memchr(scan, '\n', file_size - (scan - file));
+        if (eol == NULL) {
+            break;
+        }
+        next = eol + 1;
+
+       /* Skip empty lines.
+        */
+        if (eol == scan) {
+            continue;
+        }
+
+       /* Skip comment lines.
+        */
+        if (scan[0] == '#') {
+            continue;
+        }
+
+       /* Scan past any non-symbol characters at the beginning of the line. */
+        while ((scan < eol) && !issymchar(*scan)) {
+            scan++;
+        }
+
+       /* No symbol on line? Move along.
+        */
+        if (scan == eol) {
+            continue;
+        }
+
+       /* Skip symbols starting with '.'.
+        */
+        if (scan[0] == '.') {
+            continue;
+        }
+        nsyms++;
+    }
+    
+    return nsyms;
+}
+
+static uint32_t
+store_symbols(char * file, vm_size_t file_size, struct symbol * symbols, uint32_t idx, uint32_t max_symbols)
+{
+    char *   scan;
+    char *   line;
+    char *   eol;
+    char *   next;
+
+    uint32_t strtabsize;
+
+    strtabsize = 0;
+
+    for (scan = file, line = file; true; scan = next, line = next) {
+
+        char *       name = NULL;
+        char *       name_term = NULL;
+        unsigned int name_len = 0;
+        char *       indirect = NULL;
+        char *       indirect_term = NULL;
+        unsigned int indirect_len = 0;
+        char *       option = NULL;
+        char *       option_term = NULL;
+        unsigned int option_len = 0;
+        char         optionstr[256];
+        boolean_t    obsolete = 0;
+
+        eol = memchr(scan, '\n', file_size - (scan - file));
+        if (eol == NULL) {
+            break;
+        }
+        next = eol + 1;
+
+       /* Skip empty lines.
+        */
+        if (eol == scan) {
+            continue;
+        }
+
+        *eol = '\0';
+
+       /* Skip comment lines.
+        */
+        if (scan[0] == '#') {
+            continue;
+        }
+
+       /* Scan past any non-symbol characters at the beginning of the line. */
+        while ((scan < eol) && !issymchar(*scan)) {
+            scan++;
+        }
+
+       /* No symbol on line? Move along.
+        */
+        if (scan == eol) {
+            continue;
+        }
+
+       /* Skip symbols starting with '.'.
+        */
+        if (scan[0] == '.') {
+            continue;
+        }
+
+        name = scan;
+
+       /* Find the end of the symbol.
+        */
+        while ((*scan != '\0') && issymchar(*scan)) {
+            scan++;
+        }
+
+       /* Note char past end of symbol.
+        */
+        name_term = scan;
+
+       /* Stored length must include the terminating nul char.
+        */
+        name_len = name_term - name + 1;
+
+       /* Now look for an indirect.
+        */
+        if (*scan != '\0') {
+            while ((*scan != '\0') && iswhitespace(*scan)) {
+                scan++;
+            }
+            if (*scan == ':') {
+                scan++;
+                while ((*scan != '\0') && iswhitespace(*scan)) {
+                    scan++;
+                }
+                if (issymchar(*scan)) {
+                    indirect = scan;
+
+                   /* Find the end of the symbol.
+                    */
+                    while ((*scan != '\0') && issymchar(*scan)) {
+                        scan++;
+                    }
+
+                   /* Note char past end of symbol.
+                    */
+                    indirect_term = scan;
+
+                   /* Stored length must include the terminating nul char.
+                    */
+                    indirect_len = indirect_term - indirect + 1;
+
+                } else if (*scan == '\0') {
+		    fprintf(stderr, "bad format in symbol line: %s\n", line);
+		    exit(1);
+		}
+            } else if (*scan != '\0' && *scan != '-') {
+                fprintf(stderr, "bad format in symbol line: %s\n", line);
+                exit(1);
+            }
+        }
+
+        /* Look for options.
+         */
+        if (*scan != '\0') {
+            while ((*scan != '\0') && iswhitespace(*scan)) {
+                scan++;
+            }
+
+            if (*scan == '-') {
+                scan++;
+
+                if (isalpha(*scan)) {
+                    option = scan;
+
+                   /* Find the end of the option.
+                    */
+                    while ((*scan != '\0') && isalpha(*scan)) {
+                        scan++;
+                    }
+
+                   /* Note char past end of option.
+                    */
+                    option_term = scan;
+                    option_len = option_term - option;
+
+                    if (option_len >= sizeof(optionstr)) {
+                        fprintf(stderr, "option too long in symbol line: %s\n", line);
+                        exit(1);
+                    }
+                    memcpy(optionstr, option, option_len);
+                    optionstr[option_len] = '\0';
+
+                    /* Find the option.
+                     */
+                    if (!strncmp(optionstr, "obsolete", option_len)) {
+                        obsolete = TRUE;
+                    }
+
+                } else if (*scan == '\0') {
+		    fprintf(stderr, "bad format in symbol line: %s\n", line);
+		    exit(1);
+		}
+
+            }
+
+        }
+
+        if(idx >= max_symbols) {
+            fprintf(stderr, "symbol[%d/%d] overflow: %s\n", idx, max_symbols, line);
+            exit(1);
+        }
+
+        *name_term = '\0';
+        if (indirect_term) {
+            *indirect_term = '\0';
+        }
+        
+        symbols[idx].name = name;
+        symbols[idx].name_len = name_len;
+        symbols[idx].indirect = indirect;
+        symbols[idx].indirect_len = indirect_len;
+        symbols[idx].flags = (obsolete) ? kObsolete : 0;
+
+        strtabsize += symbols[idx].name_len + symbols[idx].indirect_len;
+        idx++;
+    }
+
+    return strtabsize;
+}
+
+/*********************************************************************
+*********************************************************************/
+int main(int argc, char * argv[])
+{
+    ToolError	err;
+    int			i, fd;
+    const char *	output_name = NULL;
+    uint32_t		zero = 0, num_files = 0;
+    uint32_t		filenum;
+    uint32_t		strx, strtabsize, strtabpad;
+    struct symbol *	import_symbols;
+    struct symbol *	export_symbols;
+    uint32_t		num_import_syms, num_export_syms;
+    uint32_t		result_count, num_removed_syms;
+    uint32_t		import_idx, export_idx;
+    const NXArchInfo *	host_arch;
+    const NXArchInfo *	target_arch;
+    boolean_t		require_imports = true;
+    boolean_t		diff = false;
+
+
+    struct file {
+        vm_offset_t  mapped;
+        vm_size_t    mapped_size;
+	uint32_t     nsyms;
+	boolean_t    import;
+	const char * path;
+    };
+    struct file files[64];
+    
+    host_arch = NXGetLocalArchInfo();
+    target_arch = host_arch;
+
+    for( i = 1; i < argc; i += 2)
+    {
+	boolean_t import;
+
+        if (!strcmp("-sect", argv[i]))
+        {
+	    require_imports = false;
+	    i--;
+	    continue;
+        }
+        if (!strcmp("-diff", argv[i]))
+        {
+	    require_imports = false;
+	    diff = true;
+	    i--;
+	    continue;
+        }
+
+	if (i == (argc - 1))
+	{
+	    fprintf(stderr, "bad arguments: %s\n", argv[i]);
+	    exit(1);
+	}
+
+        if (!strcmp("-arch", argv[i]))
+        {
+            target_arch = NXGetArchInfoFromName(argv[i + 1]);
+	    if (!target_arch)
+	    {
+		fprintf(stderr, "unknown architecture name: %s\n", argv[i+1]);
+		exit(1);
+	    }
+            continue;
+        }
+        if (!strcmp("-output", argv[i]))
+        {
+	    output_name = argv[i+1];
+            continue;
+        }
+
+        if (!strcmp("-import", argv[i]))
+	    import = true;
+	else if (!strcmp("-export", argv[i]))
+	    import = false;
+	else
+	{
+	    fprintf(stderr, "unknown option: %s\n", argv[i]);
+	    exit(1);
+	}
+
+        err = readFile(argv[i+1], &files[num_files].mapped, &files[num_files].mapped_size);
+        if (kErrorNone != err)
+            exit(1);
+
+        if (files[num_files].mapped && files[num_files].mapped_size)
+	{
+	    files[num_files].import = import;
+	    files[num_files].path   = argv[i+1];
+            num_files++;
+	}
+    }
+
+    if (!output_name)
+    {
+	fprintf(stderr, "no output file\n");
+	exit(1);
+    }
+
+    num_import_syms = 0;
+    num_export_syms = 0;
+    for (filenum = 0; filenum < num_files; filenum++)
+    {
+        files[filenum].nsyms = count_symbols((char *) files[filenum].mapped, files[filenum].mapped_size);
+	if (files[filenum].import)
+	    num_import_syms += files[filenum].nsyms;
+	else
+	    num_export_syms += files[filenum].nsyms;
+    }
+    if (!num_export_syms)
+    {
+	fprintf(stderr, "no export names\n");
+	exit(1);
+    }
+
+    import_symbols = calloc(num_import_syms, sizeof(struct symbol));
+    export_symbols = calloc(num_export_syms, sizeof(struct symbol));
+
+    import_idx = 0;
+    export_idx = 0;
+
+    for (filenum = 0; filenum < num_files; filenum++)
+    {
+	if (files[filenum].import)
+	{
+	    store_symbols((char *) files[filenum].mapped, files[filenum].mapped_size,
+					import_symbols, import_idx, num_import_syms);
+	    import_idx += files[filenum].nsyms;
+	}
+	else
+	{
+	    store_symbols((char *) files[filenum].mapped, files[filenum].mapped_size,
+					export_symbols, export_idx, num_export_syms);
+	    export_idx += files[filenum].nsyms;
+	}
+	if (false && !files[filenum].nsyms)
+	{
+	    fprintf(stderr, "warning: file %s contains no names\n", files[filenum].path);
+	}
+    }
+
+
+    qsort(import_symbols, num_import_syms, sizeof(struct symbol), &qsort_cmp);
+    qsort(export_symbols, num_export_syms, sizeof(struct symbol), &qsort_cmp);
+
+    result_count = 0;
+    num_removed_syms = 0;
+    strtabsize = 4;
+    if (num_import_syms)
+    {
+	for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+	{
+	    struct symbol * result;
+	    char * name;
+	    size_t len;
+	    boolean_t wild;
+
+	    name = export_symbols[export_idx].indirect;
+	    len  = export_symbols[export_idx].indirect_len;
+	    if (!name)
+	    {
+		name = export_symbols[export_idx].name;
+		len  = export_symbols[export_idx].name_len;
+	    }
+	    wild = ((len > 2) && ('*' == name[len-=2]));
+	    if (wild)
+	    {
+		struct bsearch_key key;
+		key.name = name;
+		key.name_len = len;
+		result = bsearch(&key, import_symbols, 
+				    num_import_syms, sizeof(struct symbol), &bsearch_cmp_prefix);
+
+		if (result)
+		{
+		    struct symbol * first;
+		    struct symbol * last;
+
+		    strtabsize += (result->name_len + result->indirect_len);
+
+		    first = result;
+		    while (--first >= &import_symbols[0])
+		    {
+			if (bsearch_cmp_prefix(&key, first))
+			    break;
+			strtabsize += (first->name_len + first->indirect_len);
+		    }
+		    first++;
+
+		    last = result;
+		    while (++last < (&import_symbols[0] + num_import_syms))
+		    {
+			if (bsearch_cmp_prefix(&key, last))
+			    break;
+			strtabsize += (last->name_len + last->indirect_len);
+		    }
+		    result_count += last - first;
+		    result = first;
+		    export_symbols[export_idx].list = first;
+		    export_symbols[export_idx].list_count = last - first;
+		    export_symbols[export_idx].flags |= kExported;
+		}
+	    }
+	    else
+		result = bsearch(name, import_symbols, 
+				    num_import_syms, sizeof(struct symbol), &bsearch_cmp);
+
+	    if (!result && require_imports)
+	    {
+		int status;
+		char * demangled_result = 
+			__cxa_demangle(export_symbols[export_idx].name + 1, NULL, NULL, &status);
+		fprintf(stderr, "exported name not in import list: %s\n",
+					demangled_result ? demangled_result : export_symbols[export_idx].name);
+//		fprintf(stderr, "                                : %s\n", export_symbols[export_idx].name);
+		if (demangled_result) {
+			free(demangled_result);
+		}
+		num_removed_syms++;
+	    }
+	    if (diff)
+	    {
+		if (!result)
+		    result = &export_symbols[export_idx];
+		else
+		    result = NULL;
+	    }
+	    if (result && !wild)
+	    {
+		export_symbols[export_idx].flags |= kExported;
+		strtabsize += (export_symbols[export_idx].name_len + export_symbols[export_idx].indirect_len);
+		result_count++;
+		export_symbols[export_idx].list = &export_symbols[export_idx];
+		export_symbols[export_idx].list_count = 1;
+	    }
+	}
+    }
+    strtabpad = (strtabsize + 3) & ~3;
+
+    if (require_imports && num_removed_syms)
+    {
+	err = kError;
+	goto finish;
+    }
+
+    fd = open(output_name, O_WRONLY|O_CREAT|O_TRUNC, 0755);
+    if (-1 == fd)
+    {
+	perror("couldn't write output");
+	err = kErrorFileAccess;
+	goto finish;
+    }
+
+    struct symtab_command symcmd;
+    struct uuid_command uuidcmd;
+
+    symcmd.cmd		= LC_SYMTAB;
+    symcmd.cmdsize	= sizeof(symcmd);
+    symcmd.symoff	= sizeof(symcmd) + sizeof(uuidcmd);
+    symcmd.nsyms	= result_count;
+    symcmd.strsize	= strtabpad;
+
+    uuidcmd.cmd         = LC_UUID;
+    uuidcmd.cmdsize     = sizeof(uuidcmd);
+    uuid_generate(uuidcmd.uuid);
+
+    if (CPU_ARCH_ABI64 & target_arch->cputype)
+    {
+	struct mach_header_64 hdr;
+	hdr.magic	= MH_MAGIC_64;
+	hdr.cputype	= target_arch->cputype;
+	hdr.cpusubtype	= target_arch->cpusubtype;
+	hdr.filetype	= MH_KEXT_BUNDLE;
+	hdr.ncmds	= 2;
+	hdr.sizeofcmds	= sizeof(symcmd) + sizeof(uuidcmd);
+	hdr.flags	= MH_INCRLINK;
+
+	symcmd.symoff	+= sizeof(hdr);
+	symcmd.stroff	= result_count * sizeof(struct nlist_64) 
+				+ symcmd.symoff;
+
+	if (target_arch->byteorder != host_arch->byteorder)
+	    swap_mach_header_64(&hdr, target_arch->byteorder);
+	err = writeFile(fd, &hdr, sizeof(hdr));
+    }
+    else
+    {
+	struct mach_header    hdr;
+	hdr.magic	= MH_MAGIC;
+	hdr.cputype	= target_arch->cputype;
+	hdr.cpusubtype	= target_arch->cpusubtype;
+	hdr.filetype	= (target_arch->cputype == CPU_TYPE_I386) ? MH_OBJECT : MH_KEXT_BUNDLE;
+	hdr.ncmds	= 2;
+	hdr.sizeofcmds	= sizeof(symcmd) + sizeof(uuidcmd);
+	hdr.flags	= MH_INCRLINK;
+
+	symcmd.symoff	+= sizeof(hdr);
+	symcmd.stroff	= result_count * sizeof(struct nlist) 
+				+ symcmd.symoff;
+
+	if (target_arch->byteorder != host_arch->byteorder)
+	    swap_mach_header(&hdr, target_arch->byteorder);
+	err = writeFile(fd, &hdr, sizeof(hdr));
+    }
+
+    if (kErrorNone != err)
+	goto finish;
+
+    if (target_arch->byteorder != host_arch->byteorder) {
+        swap_symtab_command(&symcmd, target_arch->byteorder);
+        swap_uuid_command(&uuidcmd, target_arch->byteorder);
+    }
+    err = writeFile(fd, &symcmd, sizeof(symcmd));
+    if (kErrorNone != err)
+	goto finish;
+    err = writeFile(fd, &uuidcmd, sizeof(uuidcmd));
+    if (kErrorNone != err)
+        goto finish;
+
+    strx = 4;
+    for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+    {
+	if (!export_symbols[export_idx].name)
+	    continue;
+	if (!(kExported & export_symbols[export_idx].flags))
+	    continue;
+
+	if (export_idx
+	  && export_symbols[export_idx - 1].name
+	  && !strcmp(export_symbols[export_idx - 1].name, export_symbols[export_idx].name))
+	{
+	    fprintf(stderr, "duplicate export: %s\n", export_symbols[export_idx - 1].name);
+	    err = kErrorDuplicate;
+	    goto finish;
+	}
+
+	for (import_idx = 0; import_idx < export_symbols[export_idx].list_count; import_idx++)
+	{
+
+	    if (export_symbols[export_idx].list != &export_symbols[export_idx])
+	    {
+		printf("wild: %s, %s\n", export_symbols[export_idx].name, 
+			export_symbols[export_idx].list[import_idx].name);
+	    }
+	    if (CPU_ARCH_ABI64 & target_arch->cputype)
+	    {
+		struct nlist_64 nl;
+
+		nl.n_sect  = 0;
+                nl.n_desc  = 0;
+		nl.n_un.n_strx = strx;
+		strx += export_symbols[export_idx].list[import_idx].name_len;
+                
+                if (export_symbols[export_idx].flags & kObsolete) {
+                    nl.n_desc |= N_DESC_DISCARDED;
+                }
+
+		if (export_symbols[export_idx].list[import_idx].indirect)
+		{
+		    nl.n_type  = N_INDR | N_EXT;
+		    nl.n_value = strx;
+		    strx += export_symbols[export_idx].list[import_idx].indirect_len;
+		}
+		else
+		{
+		    nl.n_type  = N_UNDF | N_EXT;
+		    nl.n_value = 0;
+		}
+
+		if (target_arch->byteorder != host_arch->byteorder)
+		    swap_nlist_64(&nl, 1, target_arch->byteorder);
+
+		err = writeFile(fd, &nl, sizeof(nl));
+	    }
+	    else
+	    {
+		struct nlist nl;
+
+		nl.n_sect  = 0;
+		nl.n_desc  = 0;
+		nl.n_un.n_strx = strx;
+		strx += export_symbols[export_idx].list[import_idx].name_len;
+ 
+                if (export_symbols[export_idx].flags & kObsolete) {
+                    nl.n_desc |= N_DESC_DISCARDED;
+                }
+
+		if (export_symbols[export_idx].list[import_idx].indirect)
+		{
+		    nl.n_type  = N_INDR | N_EXT;
+		    nl.n_value = strx;
+		    strx += export_symbols[export_idx].list[import_idx].indirect_len;
+		}
+		else
+		{
+		    nl.n_type  = N_UNDF | N_EXT;
+		    nl.n_value = 0;
+		}
+
+		if (target_arch->byteorder != host_arch->byteorder)
+		    swap_nlist(&nl, 1, target_arch->byteorder);
+
+		err = writeFile(fd, &nl, sizeof(nl));
+	    }
+	}
+
+	if (kErrorNone != err)
+	    goto finish;
+    }
+
+    strx = sizeof(uint32_t);
+    err = writeFile(fd, &zero, strx);
+    if (kErrorNone != err)
+	goto finish;
+
+    for (export_idx = 0; export_idx < num_export_syms; export_idx++)
+    {
+	if (!export_symbols[export_idx].name)
+	    continue;
+
+	for (import_idx = 0; import_idx < export_symbols[export_idx].list_count; import_idx++)
+	{
+	    err = writeFile(fd, export_symbols[export_idx].list[import_idx].name, 
+			export_symbols[export_idx].list[import_idx].name_len);
+	    if (kErrorNone != err)
+		goto finish;
+	    if (export_symbols[export_idx].list[import_idx].indirect)
+	    {
+		err = writeFile(fd, export_symbols[export_idx].list[import_idx].indirect, 
+			    export_symbols[export_idx].list[import_idx].indirect_len);
+		if (kErrorNone != err)
+		    goto finish;
+	    }
+	}
+    }
+
+    err = writeFile(fd, &zero, strtabpad - strtabsize);
+    if (kErrorNone != err)
+	goto finish;
+	
+    close(fd);
+
+
+finish:
+    for (filenum = 0; filenum < num_files; filenum++) {
+        // unmap file
+        if (files[filenum].mapped_size)
+        {
+            munmap((caddr_t)files[filenum].mapped, files[filenum].mapped_size);
+            files[filenum].mapped     = 0;
+            files[filenum].mapped_size = 0;
+        }
+
+    }
+
+    if (kErrorNone != err)
+    {
+	if (output_name)
+	    unlink(output_name);
+        exit(1);
+    }
+    else
+        exit(0);
+    return(0);
+}
+
diff --git a/security/conf/tools/newvers/newvers.csh b/SETUP/newvers
old mode 100644
new mode 100755
similarity index 100%
rename from security/conf/tools/newvers/newvers.csh
rename to SETUP/newvers
diff --git a/SETUP/seed_objroot b/SETUP/seed_objroot
deleted file mode 100755
index 6773e70e4..000000000
--- a/SETUP/seed_objroot
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/bin/sh
-
-if [ ! $OBJROOT ]
-then
-  echo "OBJROOT not defined"
-  exit 1
-fi
-
-if [ ! $PREBUILT_OBJROOT ]
-then
-  PREBUILT_OBJROOT=/Prebuilt/$1/xnu/BUILD/obj
-fi
-
-if [ ! -e $PREBUILT_OBJROOT ]
-then
-  echo "$PREBUILT_OBJROOT doesn't exist"
-  exit 1
-else
-if [ $# = 2 -a ! -e $PREBUILT_OBJROOT/$2 ]
-then
-  echo "$PREBUILT_OBJROOT/$2 doesn't exist"
-  exit 1
-fi
-if [ -e $PREBUILT_OBJROOT/BUILDING_SEED_OBJROOT ]
-then
-  echo "Building $PREBUILT_OBJROOT, try later"
-  exit 1
-fi
-fi
-
-cd $PREBUILT_OBJROOT
-
-if [ $# = 1 ]
-then
-
-if [ ! -e $OBJROOT ]
-then
-mkdir -p $OBJROOT
-echo "Copying $PREBUILT_OBJROOT in $OBJROOT"
-pax -rw . $OBJROOT
-else
-echo "Remove $OBJROOT before calling seed_objroot"
-exit 1
-fi
-
-else
-
-if [ ! -e $OBJROOT/$2 ]
-then
-mkdir -p $OBJROOT/$2
-echo "Copying $PREBUILT_OBJROOT/$2 in $OBJROOT/$2"
-pax -rw $2 $OBJROOT
-RELEASE_OBJ=`echo $2 | sed 's/DEBUG/RELEASE/'`
-if [ $1 != $RELEASE_OBJ -a ! -e $OBJROOT/$RELEASE_OBJ ]
-then
-mkdir -p $OBJROOT/$RELEASE_OBJ
-echo "Copying $PREBUILT_OBJROOT/$RELEASE_OBJ in $OBJROOT/$RELEASE_OBJ"
-pax -rw $RELEASE_OBJ $OBJROOT
-fi
-
-else
-echo "remove $OBJROOT/$2 before calling seed_objroot"
-exit 1
-fi
-
-fi
-
-if [ ! -e $OBJROOT/EXPORT_HDRS ]
-then
-echo "Copying $PREBUILT_OBJROOT/EXPORT_HDRS in $OBJROOT/EXPORT_HDRS"
-mkdir -p $OBJROOT/EXPORT_HDRS
-pax -rw EXPORT_HDRS $OBJROOT
-fi
-
-cd $OBJROOT
-if [ -e RELEASE_PPC/osfmk/RELEASE/config.RELEASE_PPC ]
-then
-PREV_OBJROOT=`grep objectdir RELEASE_PPC/osfmk/RELEASE/config.RELEASE_PPC | cut -f 2 -d\" |
- sed 's|/RELEASE_PPC/osfmk/RELEASE||'`
-fi
-if [ -z $PREV_OBJROOT -a -e DEBUG_PPC/osfmk/DEBUG/config.DEBUG_PPC ]
-then
-  PREV_OBJROOT=`grep objectdir DEBUG_PPC/osfmk/DEBUG/config.DEBUG_PPC | cut -f 2 -d\" |
-   sed 's|/DEBUG_PPC/osfmk/DEBUG||'`
-fi
-if [ -z $PREV_OBJROOT -a -e RELEASE_I386/osfmk/RELEASE/config.RELEASE_I386 ]
-then
-  PREV_OBJROOT=`grep objectdir RELEASE_I386/osfmk/RELEASE/config.RELEASE_I386 | cut -f 2 -d\" |
-   sed 's|/RELEASE_I386/osfmk/RELEASE||'`
-fi
-if [ -z $PREV_OBJROOT -a -e DEBUG_I386/osfmk/DEBUG/config.DEBUG_I386 ]
-then
-  PREV_OBJROOT=`grep objectdir DEBUG_I386/osfmk/DEBUG/config.DEBUG_I386 | cut -f 2 -d\" |
-   sed 's|/DEBUG_I386/osfmk/DEBUG||'`
-fi
-if [ -z $PREV_OBJROOT ]
-then
-  echo "PREV_OBJROOT not found"
-  exit 1
-fi
-
-if [ -e RELEASE_PPC/osfmk/RELEASE/config.RELEASE_PPC ]
-then
-PREV_SRCROOT=`grep sourcedir RELEASE_PPC/osfmk/RELEASE/config.RELEASE_PPC | cut -f 2 -d\"` 
-fi
-if [ -z $PREV_SRCROOT -a -e DEBUG_PPC/osfmk/DEBUG/config.DEBUG_PPC ]
-then
-  PREV_SRCROOT=`grep sourcedir DEBUG_PPC/osfmk/DEBUG/config.DEBUG_PPC | cut -f 2 -d\"` 
-fi
-if [ -z $PREV_SRCROOT -a -e RELEASE_I386/osfmk/RELEASE/config.RELEASE_I386 ]
-then
-PREV_SRCROOT=`grep sourcedir RELEASE_I386/osfmk/RELEASE/config.RELEASE_I386 | cut -f 2 -d\"` 
-fi
-if [ -z $PREV_SRCROOT -a -e DEBUG_I386/osfmk/DEBUG/config.DEBUG_I386 ]
-then
-  PREV_SRCROOT=`grep sourcedir DEBUG_I386/osfmk/DEBUG/config.DEBUG_I386 | cut -f 2 -d\"` 
-fi
-if [ -z $PREV_SRCROOT ]
-then
-  echo "PREV_SRCROOT not found"
-  exit 1
-fi
-
-echo "s|$PREV_OBJROOT|$OBJROOT|" > prebuild.sed
-echo "s|$PREV_SRCROOT|$SRCROOT|" >>prebuild.sed
-
-for i in `find . -name Makedep -print`
-do
-sed -f prebuild.sed $i > $i.tmp
-rm $i
-mv $i.tmp $i
-done
-rm -f `find $OBJROOT -name Makefile -print` prebuild.sed
diff --git a/SETUP/setsegname/Makefile b/SETUP/setsegname/Makefile
new file mode 100644
index 000000000..70e5e2641
--- /dev/null
+++ b/SETUP/setsegname/Makefile
@@ -0,0 +1,31 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+OBJS = setsegname.o
+
+CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I.
+
+WARNFLAGS = -Wall
+
+LDFLAGS = -isysroot $(HOST_SDKROOT)
+
+setsegname: $(OBJS)
+	$(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^
+	@echo HOST_LD $@
+	$(_v)$(HOST_CODESIGN) -s - $@
+	@echo HOST_CODESIGN $@
+
+.c.o:
+	$(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $<
+	@echo HOST_CC $@
+
+do_build_setup: setsegname
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
diff --git a/SETUP/setsegname/setsegname.c b/SETUP/setsegname/setsegname.c
new file mode 100644
index 000000000..9afd6bc5d
--- /dev/null
+++ b/SETUP/setsegname/setsegname.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2007 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <libc.h>
+#include <errno.h>
+
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+
+#include <mach-o/swap.h>
+
+#include <IOKit/IOTypes.h>
+
+/*********************************************************************
+*********************************************************************/
+static int
+writeFile(int fd, const void * data, size_t length)
+{
+    int error = 0;
+
+    if (length != (size_t)write(fd, data, length)) {
+        error = -1;
+    }
+
+    if (error != 0) {
+        perror("couldn't write output");
+    }
+
+    return error;
+}
+
+/*********************************************************************
+*********************************************************************/
+static int
+readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize)
+{
+    int error = -1;
+    int fd;
+    struct stat stat_buf;
+
+    *objAddr = 0;
+    *objSize = 0;
+
+    do {
+        if ((fd = open(path, O_RDONLY)) == -1) {
+            continue;
+        }
+
+        if (fstat(fd, &stat_buf) == -1) {
+            continue;
+        }
+
+        if (0 == (stat_buf.st_mode & S_IFREG)) {
+            continue;
+        }
+
+        if (0 == stat_buf.st_size) {
+            error = 0;
+            continue;
+        }
+
+        *objSize = stat_buf.st_size;
+
+        *objAddr = (vm_offset_t)mmap(NULL /* address */, *objSize,
+            PROT_READ|PROT_WRITE, MAP_FILE|MAP_PRIVATE /* flags */,
+            fd, 0 /* offset */);
+
+        if ((void *)*objAddr == MAP_FAILED) {
+                *objAddr = 0;
+                *objSize = 0;
+            continue;
+        }
+
+        error = 0;
+
+    } while (false);
+
+    if (-1 != fd) {
+        close(fd);
+    }
+    if (error) {
+        fprintf(stderr, "couldn't read %s: %s\n", path, strerror(errno));
+    }
+
+    return error;
+}
+
+/*********************************************************************
+*********************************************************************/
+int main(int argc, char * argv[])
+{
+    int                     error;
+    const char            * output_name = NULL;
+    const char            * newseg_name = NULL;
+    struct mach_header    * hdr;
+    struct mach_header_64 * hdr64;
+    struct load_command   * cmds;
+    boolean_t		        swap = false;
+    uint32_t		        ncmds, cmdtype;
+    uint32_t		        len;
+    vm_offset_t		        input;
+    vm_size_t		        input_size;
+    uint32_t		        nsects = 0;
+    uint32_t                * flags = NULL;
+    uint32_t		        attr;
+    typedef char            segname_t[16];
+    segname_t             * names = NULL;
+
+    if ((argc != 5) || strcmp("-o", argv[3])) {
+        fprintf(stderr, "Usage: %s NEWSEGNAME input -o output\n", argv[0]);
+        exit(1);
+    }
+
+    output_name = argv[4];
+    newseg_name = argv[1];
+
+    error = readFile(argv[2], &input, &input_size);
+    if (error) {
+        exit(1);
+    }
+
+    hdr = (typeof(hdr)) input;
+    switch (hdr->magic) {
+        case MH_CIGAM:
+            swap = true;
+            // fall thru
+        case MH_MAGIC:
+            ncmds = hdr->ncmds;
+            cmds  = (typeof(cmds)) (hdr+1);
+            break;
+
+        case MH_CIGAM_64:
+            swap = true;
+            // fall thru
+        case MH_MAGIC_64:
+            hdr64 = (typeof(hdr64)) hdr;
+            ncmds = hdr64->ncmds;
+            cmds  = (typeof(cmds)) (hdr64+1);
+            break;
+
+        default:
+            fprintf(stderr, "not macho input file\n");
+            exit(1);
+            break;
+    }
+
+    if (swap) {
+        ncmds = OSSwapInt32(ncmds);
+    }
+    while (ncmds--) {
+        cmdtype = cmds->cmd;
+        if (swap) {
+            cmdtype = OSSwapInt32(cmdtype);
+        }
+        nsects = 0;
+        len    = 0;
+        if (LC_SEGMENT == cmdtype) {
+            struct segment_command * segcmd;
+            struct section         * sects;
+
+            segcmd = (typeof(segcmd)) cmds;
+            nsects = segcmd->nsects;
+            sects  = (typeof(sects))(segcmd + 1);
+            names  = &sects->segname;
+            flags  = &sects->flags;
+            len    = sizeof(*sects);
+        } else if (LC_SEGMENT_64 == cmdtype) {
+            struct segment_command_64 * segcmd;
+            struct section_64         * sects;
+
+            segcmd = (typeof(segcmd)) cmds;
+            nsects = segcmd->nsects;
+            sects  = (typeof(sects))(segcmd + 1);
+            names  = &sects->segname;
+            flags  = &sects->flags;
+            len    = sizeof(*sects);
+        }
+
+        if (swap)
+            nsects = OSSwapInt32(nsects);
+        while (nsects--) {
+            attr = *flags;
+            if (swap) {
+                attr = OSSwapInt32(attr);
+            }
+
+            if (!(S_ATTR_DEBUG & attr)) {
+                strncpy((char *)names, newseg_name, sizeof(*names));
+            }
+        
+            names = (typeof(names))(((uintptr_t) names) + len);
+            flags = (typeof(flags))(((uintptr_t) flags) + len);
+        }
+
+        len = cmds->cmdsize;
+        if (swap) {
+            len = OSSwapInt32(len);
+        }
+        cmds = (typeof(cmds))(((uintptr_t) cmds) + len);
+    }
+
+    int fd = open(output_name, O_WRONLY|O_CREAT|O_TRUNC, 0755);
+    if (-1 == fd) {
+        error = -1;
+    } else {
+        error = writeFile(fd, (const void *) input, input_size);
+        close(fd);
+    }
+
+    if (error) {
+        fprintf(stderr, "couldn't write output: %s\n", strerror(errno));
+        exit(1);
+    }
+
+    exit(0);
+    return 0;
+}
diff --git a/bsd/Makefile b/bsd/Makefile
index d4df2fc62..8beb22975 100644
--- a/bsd/Makefile
+++ b/bsd/Makefile
@@ -26,14 +26,13 @@ INSTINC_SUBDIRS = \
 	uuid \
 	vfs
 
-INSTINC_SUBDIRS_PPC = \
-	ppc 
-
 INSTINC_SUBDIRS_I386 = \
-	i386 
+	i386 \
+	crypto
 
 INSTINC_SUBDIRS_X86_64 = \
-	i386 
+	i386 \
+	crypto
 
 INSTINC_SUBDIRS_ARM = \
 	arm 
@@ -58,9 +57,6 @@ EXPINC_SUBDIRS = \
 	vfs \
 	vm
 
-EXPINC_SUBDIRS_PPC = \
-	ppc 
-
 EXPINC_SUBDIRS_I386 = \
 	i386 
 
@@ -70,16 +66,17 @@ EXPINC_SUBDIRS_X86_64 = \
 EXPINC_SUBDIRS_ARM = \
 	arm 
 
-SETUP_SUBDIRS = 	\
-	conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 	\
 	conf
 
 INST_SUBDIRS =	\
+	kern
 
 INSTMAN_SUBDIRS = \
 	man
 
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
diff --git a/bsd/bsm/Makefile b/bsd/bsm/Makefile
index 0bb6f4dcf..f660aafb5 100644
--- a/bsd/bsm/Makefile
+++ b/bsd/bsm/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/bsm/audit.h b/bsd/bsm/audit.h
index bb4a9497b..a24cc88d7 100644
--- a/bsd/bsm/audit.h
+++ b/bsd/bsm/audit.h
@@ -125,6 +125,8 @@
 #define	A_SETQCTRL	36
 #define	A_GETCOND	37
 #define	A_SETCOND	38
+#define	A_GETSFLAGS	39
+#define	A_SETSFLAGS	40
 
 /*
  * Audit policy controls.
@@ -318,6 +320,7 @@ int	setaudit_addr(const struct auditinfo_addr *, int);
 #include <mach/port.h>
 mach_port_name_t audit_session_self(void);
 au_asid_t	 audit_session_join(mach_port_name_t port);
+int		 audit_session_port(au_asid_t asid, mach_port_name_t *portname);
 #endif /* __APPLE_API_PRIVATE */
 
 #endif /* defined(_KERNEL) || defined(KERNEL) */
diff --git a/bsd/bsm/audit_kevents.h b/bsd/bsm/audit_kevents.h
index 268c456c3..25e3eb829 100644
--- a/bsd/bsm/audit_kevents.h
+++ b/bsd/bsm/audit_kevents.h
@@ -36,7 +36,7 @@
  * The reserved event numbers for kernel events are 1...2047 and 43001..44900.
  */
 #define	AUE_IS_A_KEVENT(e)	(((e) > 0 && (e) < 2048) || 	\
-    				 ((e) > 43000 && (e) < 45000))
+    				 ((e) > 43000 && (e) < 44901))
 
 /*
  * Values marked as AUE_NULL are not required to be audited as per CAPP.
@@ -596,6 +596,16 @@
 #define	AUE_PWRITE		43193	/* Darwin/FreeBSD. */
 #define	AUE_FSCTL		43194	/* Darwin. */
 #define	AUE_FFSCTL		43195	/* Darwin. */
+#define	AUE_LPATHCONF		43196	/* FreeBSD. */
+#define	AUE_PDFORK		43197	/* FreeBSD. */
+#define	AUE_PDKILL		43198	/* FreeBSD. */
+#define	AUE_PDGETPID		43199	/* FreeBSD. */
+#define	AUE_PDWAIT		43200	/* FreeBSD. */
+
+#define	AUE_SESSION_START	44901	/* Darwin. */
+#define	AUE_SESSION_UPDATE	44902	/* Darwin. */
+#define	AUE_SESSION_END		44903	/* Darwin. */
+#define	AUE_SESSION_CLOSE	44904	/* Darwin. */
 
 /*
  * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER
index 93872ad07..bb57c6dae 100644
--- a/bsd/conf/MASTER
+++ b/bsd/conf/MASTER
@@ -101,6 +101,7 @@ options		MACH_IPC_COMPAT	# Enable old IPC interface	# <ipc_compat>
 options		MACH_IPC_DEBUG	# Enable IPC debugging calls	# <ipc_debug>
 options		MACH_IPC_TEST	# Testing code/printfs		# <ipc_test>
 options		MACH_LDEBUG	# Sanity-check simple locking	# <test>
+options		CONFIG_ZLEAKS	# Live zone leak debug sysctls	# <zleaks>
 options 	MACH_NP		# Mach IPC support		# <np>
 options		MACH_NBC	# No buffer cache		# <nbc>
 options		MACH_NET	# Fast network access		# <mach_net>
@@ -123,15 +124,12 @@ options		LLC		# 802.2 support			# <llc>
 options		LOOP		# loopback support		# <loop>
 options		MROUTING	# multicast routing		# <mrouting>
 options		ROUTING		# routing			# <routing>
-options		NETMIBS		#				# <netmibs>
 options		VLAN		#				# <vlan>
 options		BOND		#				# <bond>
 options		PF		# Packet Filter			# <pf>
 options		PF_PKTHDR	# PF tag inside mbuf pkthdr	# <pf_pkthdr>
-options		PKT_PRIORITY	# Packet priority support	# <pkt_priority>
 options		PFLOG		# PF log interface		# <pflog>
 options		IPDIVERT        # Divert sockets (for NAT)      # <ipdivert>
-options		IPFLOW		# IP fast forwarding		# <ipflow>
 options		IPFIREWALL      # IP Firewalling (used by NAT)  # <ipfirewall>
 options		IPFIREWALL_FORWARD      #Transparent proxy      # <ipfirewall>
 options		IPFIREWALL_DEFAULT_TO_ACCEPT    # allow everything by default   # <ipfirewall>
@@ -144,7 +142,6 @@ options		RANDOM_IP_ID	# random (not sequential) ip ids	# <randomipid>
 options		TCP_DROP_SYNFIN	# Drop TCP packets with SYN+FIN set	# <tcpdrop_synfin>
 options		ICMP_BANDLIM	# ICMP bandwidth limiting sysctl
 options		IFNET_INPUT_SANITY_CHK	# allow dlil/ifnet input sanity check # <ifnet_input_chk>
-options		IFNET_ROUTE_REFCNT # count route references to ifnet	# <ifnet_route_refcnt>
 options		SYSV_SEM	# SVID semaphores			# <sysv_sem>
 options		SYSV_MSG	# SVID messages				# <sysv_msg>
 options		SYSV_SHM	# SVID shared mem			# <sysv_shm>
@@ -169,22 +166,18 @@ options		NETWORKING	# networking layer			# <inet, inet6, netat>
 options		CONFIG_FSE	# file system events		# <config_fse>
 options		CONFIG_IMAGEBOOT	# local image boot	# <config_imageboot>
 options		CONFIG_SOWUPCALL	# SB_UPCALL on sowwakeup	# <config_sowupcall>
-options		CONFIG_MBUF_NOEXPAND	# limit mbuf expansion	# <config_mbuf_noexpand>
 options		CONFIG_MBUF_JUMBO	# jumbo cluster pool	# <config_mbuf_jumbo>
-options		CONFIG_MBUF_TAGS_MALLOC # use malloc for tags	# <config_mbuf_tags_malloc>
 options		CONFIG_FORCE_OUT_IFP	# Enable IP_FORCE_OUT_IFP # <config_force_out_ifp>
 options		CONFIG_IFEF_NOWINDOWSCALE # Scale TCP window per driver # <config_ifef_nowindowscale>
 
 options		CONFIG_WORKQUEUE	# <config_workqueue>
 
-
 #
 #	4.4 filesystems 
 #
 options		FFS			# Fast Filesystem Support	# <ffs>
 options		HFS			# HFS/HFS+ support		# <hfs>
 options		FIFO		# fifo support			# <fifo>
-options		UNION		# union_fs support		# <union>
 options		FDESC		# fdesc_fs support		# <fdesc>
 options		DEVFS		# devfs support			# <devfs>
 options		JOURNALING	# journaling support	# <journaling>
@@ -199,6 +192,7 @@ options		REV_ENDIAN_FS	# Reverse Endian FS		# <revfs>
 options		NAMEDSTREAMS	# named stream vnop support	# <namedstreams>
 options		CONFIG_VOLFS	# volfs path support (legacy)	# <config_volfs>
 options		CONFIG_IMGSRC_ACCESS # source of imageboot dmg	# <config_imgsrc_access>
+options		CONFIG_TRIGGERS	# trigger vnodes		# <config_triggers>
 
 #
 # NFS support
@@ -249,6 +243,7 @@ options			randomipid		# <inet,randomipid>
 
 options		ZLIB		# inflate/deflate support	# <zlib>
 
+options		IF_BRIDGE			# <if_bridge>
 
 makeoptions	LIBDRIVER = "libDriver_kern.o"			# <libdriver>
 makeoptions	LIBOBJC   = "libkobjc.o"			# <kernobjc>
@@ -292,7 +287,6 @@ options   CONFIG_KN_HASHSIZE=20		# <bsmall>
 options   CONFIG_VNODES=263168		# <large,xlarge>
 options   CONFIG_VNODES=263168		# <medium>
 options   CONFIG_VNODES=10240		# <small>
-options   CONFIG_VNODES=1024		# <xsmall>
 options   CONFIG_VNODES=750			# <bsmall>
 
 options   CONFIG_VNODE_FREE_MIN=500		# <large,xlarge>
@@ -396,6 +390,12 @@ options   CONFIG_MFCTBLSIZ=256			# <medium,large,xlarge>
 options   CONFIG_MFCTBLSIZ=128			# <small,xsmall>
 options   CONFIG_MFCTBLSIZ=16			# <bsmall>
 
+#
+# configurable kernel message buffer size
+#
+options   CONFIG_MSG_BSIZE=4096			# <bsmall,small,xsmall>
+options   CONFIG_MSG_BSIZE=16384		# <medium,large,xlarge>
+
 #
 #  configurable kernel - use these options to strip strings from panic
 #  and printf calls.
@@ -406,6 +406,11 @@ options   CONFIG_NO_PANIC_STRINGS		# <no_panic_str>
 options   CONFIG_NO_PRINTF_STRINGS		# <no_printf_str>
 options   CONFIG_NO_KPRINTF_STRINGS		# <no_kprintf_str>
 
+#
+# use finer-grained lock groups for the proc subsystem
+#
+options   CONFIG_FINE_LOCK_GROUPS               # <medium,large,xlarge>
+
 #
 # configurable kernel - general switch to say we are building for an
 # embedded device
@@ -433,6 +438,14 @@ options		CONFIG_CODE_DECRYPTION	# <config_embedded>
 
 options		CONFIG_PROTECT	# <config_protect>
 
+#
+# freeze - support app hibernation, used on embedded
+# CONFIG_FREEZE_SUSPENDED_MIN is the minimum number of suspended  
+# processes to be left unhibernated
+#
+options		CONFIG_FREEZE					# <freeze>
+
+options		CHECK_CS_VALIDATION_BITMAP			# <config_cs_validation_bitmap>
 
 #
 #  Ethernet (ARP)
@@ -463,6 +476,7 @@ pseudo-device  vndevice	   16       init    vndevice_init   # <xlarge>
 pseudo-device  vndevice		8       init    vndevice_init   # <large>
 pseudo-device  vndevice		4       init    vndevice_init   # <medium>
 pseudo-device  vndevice		3       init    vndevice_init   # <small>
+pseudo-device  vndevice		2       init    vndevice_init   # <xsmall>
 pseudo-device  vndevice		2       init    vndevice_init   # <bsmall>
 
 #
diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386
index 1e6641911..594f0fb51 100644
--- a/bsd/conf/MASTER.i386
+++ b/bsd/conf/MASTER.i386
@@ -44,21 +44,20 @@
 #  
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
-#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =	 [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ]
-#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ]
+#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks ]
+#  FILESYS =	 [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs config_hfs_trim hfs_compression config_hfs_alloc_rbtree config_imgsrc_access config_triggers ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge pf pflog pf_pkthdr ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  PROFILE =     [ RELEASE profile ]
-#  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ]
-#
+#  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
 #
 #  EMBEDDED_BASE =	[ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =	[ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =	[ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =	[ inet compat_oldsock tcpdrop_synfin bpfilter ]
 #  EMBEDDED =		[ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
-#  DEVELOPMENT =	[ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert config_dtrace ]
+#  DEVELOPMENT =	[ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver development mach_assert config_dtrace ]
 #
 ######################################################################
 #
diff --git a/bsd/conf/MASTER.ppc b/bsd/conf/MASTER.ppc
deleted file mode 100644
index d99b6e4f5..000000000
--- a/bsd/conf/MASTER.ppc
+++ /dev/null
@@ -1,99 +0,0 @@
-#
-# Mach Operating System
-# Copyright (c) 1986 Carnegie-Mellon University
-# All rights reserved.  The CMU software License Agreement
-# specifies the terms and conditions for use and redistribution.
-#  
-######################################################################
-#
-#  Master Apple configuration file (see the master machine independent
-#  configuration file for a description of the file format).
-#
-######################################################################
-#
-#  Apple (PSEUDO-)DEVICES (select any combination)
-#	ex   = Excelan EXOS 202 Ethernet interface
-#	ip   = Interphase V/SMD 3200 disk controller
-#	od   = Canon OMD-1 Optical Disk
-#	rd   = RAM disk
-#	sd   = SCSI disk
-#	sg   = Generic SCSI Device
-#	st   = SCSI tape
-#	fd   = Floppy Disk 
-#	en   = Integrated Ethernet controller
-#	dsp  = DSP560001 digital signal processor
-#	iplmeas = ipl time measurement
-#	nextp = NeXT Laser Printer
-#	sound = sound I/O
-#	vol   = removable volume support device
-#	venip = virtual Ethernet/IP network interface
-#	zs    = Serial device
-#
-#  MULTIPROCESSOR SUPPORT (select exactly one)
-#	multi = support 4 processors
-#	uni   = supports single processor
-#
-#  SPECIAL CHARACTERISTICS (select any combination)
-#	gdb        = GNU kernel debugger
-#	posix_kern = POSIX support
-#
-#  CPU TYPE (select exactly one)
-#	NeXT   = FIXME
-#
-######################################################################
-#  
-#  Standard Apple Research Configurations:
-#  -------- ----- -------- ---------------
-#
-#  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
-#  FILESYS =	 [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression ]
-#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow pkt_priority ]
-#  NFS =         [ nfsclient nfsserver ]
-#  VPN =         [ ipsec ]
-#  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
-#  DEVELOPMENT = [ RELEASE ]
-#  PROFILE =     [ RELEASE profile ]
-#  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ]
-#
-######################################################################
-#
-machine		"ppc"						# <ppc>
-cpu		"ppc"						# <ppc>
-
-options		GDB		# GNU kernel debugger		# <gdb>
-options		DEBUG		# general debugging code	# <debug>
-options		SHOW_SPACE	# print size of structures	# <debug>
-options		EVENTMETER	# event meter support		# <debug>
-options		FP_EMUL		# floating point emulation	# <fp>
-options		UXPR		# user-level XPR package	# <uxpr>
-config		mach_kernel	swap generic			# <mach>
-
-#
-# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and 
-# security/conf MASTER files.
-#
-options		CONFIG_MACF			# Mandatory Access Control Framework
-options		CONFIG_MACF_SOCKET_SUBSET	# MAC socket subest (no labels)
-#options	CONFIG_MACF_SOCKET		# MAC socket labels
-#options	CONFIG_MACF_NET			# mbuf
-#options	CONFIG_MACF_DEBUG
-#options	CONFIG_MACF_MACH
-options		CONFIG_AUDIT			# Kernel auditing         
-
-options		EVENT						# <event>
-
-#
-#  Ipl measurement system
-#
-pseudo-device	iplmeas						# <iplmeas>
-
-#
-#  NFS measurement system
-#
-pseudo-device	nfsmeas						# <nfsmeas>
-
-#
-#  Removable Volume support
-#
-pseudo-device	vol						# <vol>
-
diff --git a/bsd/conf/MASTER.x86_64 b/bsd/conf/MASTER.x86_64
index 1050897d2..4bf42910b 100644
--- a/bsd/conf/MASTER.x86_64
+++ b/bsd/conf/MASTER.x86_64
@@ -44,21 +44,20 @@
 #  
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
-#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
-#  FILESYS =	 [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs config_hfs_trim hfs_compression config_imgsrc_access ]
-#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow pkt_priority if_bridge ]
+#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks ]
+#  FILESYS =	 [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs config_hfs_trim hfs_compression config_hfs_alloc_rbtree config_imgsrc_access config_triggers ]
+#  NETWORKING =  [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge pf pflog pf_pkthdr ]
 #  NFS =         [ nfsclient nfsserver ]
 #  VPN =         [ ipsec ]
 #  RELEASE =     [ BASE NETWORKING NFS VPN FILESYS libdriver ]
 #  PROFILE =     [ RELEASE profile ]
-#  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ]
-#
+#  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
 #
 #  EMBEDDED_BASE =	[ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ]
 #  EMBEDDED_FILESYS =	[ devfs hfs journaling fdesc fifo ]
-#  EMBEDDED_NET =	[ inet compat_oldsock tcpdrop_synfin bpfilter config_mbuf_noexpand ]
+#  EMBEDDED_NET =	[ inet compat_oldsock tcpdrop_synfin bpfilter ]
 #  EMBEDDED =		[ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
-#  DEVELOPMENT =	[ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert ]
+#  DEVELOPMENT =	[ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver development mach_assert ]
 #
 ######################################################################
 #
diff --git a/bsd/conf/Makefile b/bsd/conf/Makefile
index a79644e77..afaf3eb89 100644
--- a/bsd/conf/Makefile
+++ b/bsd/conf/Makefile
@@ -3,92 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
 export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 
-export dp_backing_file.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export ubc_subr.o_CFLAGS_ADD=-Wno-discard-qual -Wshorten-64-to-32
-export vnode_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_unix.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-
-export if_mib.o_CFLAGS_ADD=-Wno-unused-parameter
-export adsp_Write.o_CFLAGS_ADD=-Wno-sign-compare
-export adsp_Packet.o_CFLAGS_ADD=-Wno-sign-compare
-export adsp_Control.o_CFLAGS_ADD=-Wno-sign-compare
-export adsp_RxAttn.o_CFLAGS_ADD=-Wno-sign-compare
-export adsp_attention.o_CFLAGS_ADD=-Wno-sign-compare
-export asp_proto.o_CFLAGS_ADD=-Wno-sign-compare
-export drv_dep.o_CFLAGS_ADD=-Wno-sign-compare
-export ddp_rtmp.o_CFLAGS_ADD=-Wno-sign-compare
-export ddp_lap.o_CFLAGS_ADD=-Wno-sign-compare
-export radix.o_CFLAGS_ADD=-Wno-sign-compare
-export route.o_CFLAGS_ADD=-Wno-sign-compare
-export rtsock.o_CFLAGS_ADD=-Wno-sign-compare
-export dhcp_options.o_CFLAGS_ADD=-Wno-sign-compare
-export igmp.o_CFLAGS_ADD=-Wno-sign-compare
-export in_cksum.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_divert.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_dummynet.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_flow.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_fw2.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_fw2_compat.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_icmp.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_input.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_mroute.o_CFLAGS_ADD=-Wno-sign-compare
-export ip_output.o_CFLAGS_ADD=-Wno-sign-compare
-export raw_ip.o_CFLAGS_ADD=-Wno-sign-compare
-export tcp_input.o_CFLAGS_ADD=-Wno-sign-compare
-export tcp_output.o_CFLAGS_ADD=-Wno-sign-compare
-export tcp_subr.o_CFLAGS_ADD=-Wno-sign-compare
-export tcp_usrreq.o_CFLAGS_ADD=-Wno-sign-compare
-export tcp_timer.o_CFLAGS_ADD=-Wno-sign-compare
-export udp_usrreq.o_CFLAGS_ADD=-Wno-sign-compare
-export ah_input.o_CFLAGS_ADD=-Wno-sign-compare
-export ah_core.o_CFLAGS_ADD=-Wno-sign-compare
-export ah_output.o_CFLAGS_ADD=-Wno-sign-compare
-export esp_core.o_CFLAGS_ADD=-Wno-sign-compare
-export esp_input.o_CFLAGS_ADD=-Wno-sign-compare
-export esp_output.o_CFLAGS_ADD=-Wno-sign-compare
-export esp_rijndael.o_CFLAGS_ADD=-Wno-sign-compare
-export ipsec.o_CFLAGS_ADD=-Wno-sign-compare
-export dest6.o_CFLAGS_ADD=-Wno-sign-compare
-export frag6.o_CFLAGS_ADD=-Wno-sign-compare
-export icmp6.o_CFLAGS_ADD=-Wno-sign-compare
-export in6.o_CFLAGS_ADD=-Wno-sign-compare
-export in6_src.o_CFLAGS_ADD=-Wno-sign-compare
-export in6_cksum.o_CFLAGS_ADD=-Wno-sign-compare
-export ip6_fw.o_CFLAGS_ADD=-Wno-sign-compare
-export ip6_forward.o_CFLAGS_ADD=-Wno-sign-compare
-export in6_ifattach.o_CFLAGS_ADD=-Wno-sign-compare
-export ip6_input.o_CFLAGS_ADD=-Wno-sign-compare
-export ip6_mroute.o_CFLAGS_ADD=-Wno-sign-compare
-export ip6_output.o_CFLAGS_ADD=-Wno-sign-compare
-export ipcomp_input.o_CFLAGS_ADD=-Wno-sign-compare
-export ipcomp_output.o_CFLAGS_ADD=-Wno-sign-compare
-export in6_proto.o_CFLAGS_ADD=-Wno-sign-compare
-export mld6.o_CFLAGS_ADD=-Wno-sign-compare
-export nd6.o_CFLAGS_ADD=-Wno-sign-compare
-export nd6_nbr.o_CFLAGS_ADD=-Wno-sign-compare
-export nd6_rtr.o_CFLAGS_ADD=-Wno-sign-compare
-export raw_ip6.o_CFLAGS_ADD=-Wno-sign-compare
-export route6.o_CFLAGS_ADD=-Wno-sign-compare
-export scope6.o_CFLAGS_ADD=-Wno-sign-compare
-export udp6_usrreq.o_CFLAGS_ADD=-Wno-sign-compare
-export key.o_CFLAGS_ADD=-Wno-sign-compare
-export keysock.o_CFLAGS_ADD=-Wno-sign-compare
-export atp_write.o_CFLAGS_ADD=-Wno-sign-compare
-export keydb.o_CFLAGS_ADD=-Wno-sign-compare
-export des_setkey.o_CFLAGS_ADD=-Wno-sign-compare
-export sys_socket.o_CFLAGS_ADD=-Wno-sign-compare
-export sys_glue.o_CFLAGS_ADD=-Wno-sign-compare
-export uipc_domain.o_CFLAGS_ADD=-Wno-sign-compare
-export uipc_mbuf.o_CFLAGS_ADD=-Wno-sign-compare
-export uipc_mbuf2.o_CFLAGS_ADD=-Wno-sign-compare
-export uipc_socket.o_CFLAGS_ADD=-Wno-sign-compare
-export uipc_socket2.o_CFLAGS_ADD=-Wno-sign-compare
-
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS = 
 
 COMP_SUBDIRS = 
 
@@ -104,30 +22,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(BSD_KERNEL_CONFIG) $(BSD_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(BSD_KERNEL_CONFIG) $(BSD_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile 
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile 
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(BSD_KERNEL_CONFIG)/Makefile	\
diff --git a/bsd/conf/Makefile.i386 b/bsd/conf/Makefile.i386
index 0b5f62979..a46354589 100644
--- a/bsd/conf/Makefile.i386
+++ b/bsd/conf/Makefile.i386
@@ -2,46 +2,12 @@
 #BEGIN  Machine dependent Makefile fragment for i386
 ######################################################################
  
-# files to build with certain warnings turned off
+# Files to build with certain warnings turned off
 dis_tables.o_CFLAGS_ADD += -Wno-cast-qual
 fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual
 
-
-# Enable -Werror for i386 builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR =		\
-	fifo_vnops.o	\
-	aescrypt.o		\
-	aeskey.o		\
-	des_setkey.o		\
-	sha2.o			\
-	if_ethersubr.o		\
-	if_media.o		\
-	kext_net.o		\
-	dhcp_options.o		\
-	in_bootp.o		\
-	krpc_subr.o		\
-	ux_exception.o		\
-	unix_startup.o 		\
-	randomdev.o		\
-	vnode_pager.o	\
-	dp_backing_file.o \
-	vm_unix.o		\
-	mem.o			\
-	km.o			\
-	init_sysent.o		\
-	drv_dep.o		\
-	sdt_x86.o       \
-	dtrace_isa.o		\
-	aes_modes.o
-
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
+# sha256 Files to build with -DSHA256_USE_ASSEMBLY=1
+sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1
 
 ######################################################################
 #END    Machine dependent Makefile fragment for i386
diff --git a/bsd/conf/Makefile.ppc b/bsd/conf/Makefile.ppc
deleted file mode 100644
index 2dd4e88b3..000000000
--- a/bsd/conf/Makefile.ppc
+++ /dev/null
@@ -1,53 +0,0 @@
-######################################################################
-#BEGIN  Machine dependent Makefile fragment for ppc
-######################################################################
- 
-# files to build with certain warnings turned off
-dis_tables.o_CFLAGS_ADD += -Wno-cast-qual
-fbt_ppc.o_CFLAGS_ADD += -Wno-cast-qual -Wno-pointer-to-int-cast
-
-
-# Enable -Werror for ppc builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR =		\
-	fifo_vnops.o	\
-	aescrypt.o		\
-	aeskey.o		\
-	des_setkey.o		\
-	sha2.o			\
-	shadow.o		\
-	if_ethersubr.o		\
-	if_media.o		\
-	kext_net.o		\
-	dhcp_options.o		\
-	in_bootp.o		\
-	krpc_subr.o		\
-	ux_exception.o		\
-	sysctl.o		\
-	unix_startup.o 		\
-	randomdev.o		\
-	devtimer.o		\
-	vnode_pager.o	\
-	dp_backing_file.o \
-	vm_unix.o		\
-	mem.o			\
-	km.o			\
-	at.o			\
-	drv_dep.o		\
-	fbt_ppc.o		\
-	sdt_ppc.o		\
-	dtrace_isa.o		\
-	dtrace_subr_ppc.o
-
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
-######################################################################
-#END    Machine dependent Makefile fragment for ppc
-######################################################################
-
diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template
index fdee45a3e..8691ce705 100644
--- a/bsd/conf/Makefile.template
+++ b/bsd/conf/Makefile.template
@@ -43,10 +43,81 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -imacros meta_features.h -DARCH_PRIVATE -DKERNEL -DDRIVER_PRIVATE \
+CFLAGS+= -include meta_features.h -DARCH_PRIVATE -DDRIVER_PRIVATE \
 	-D_KERNEL_BUILD -DKERNEL_BUILD -DMACH_KERNEL -DBSD_BUILD \
 	-DBSD_KERNEL_PRIVATE -DLP64KERN=1 -DLP64_DEBUG=0  -I. $(CFLAGS_INLINE_CONFIG)
 
+dp_backing_file.o_CFLAGS_ADD	+= -Wshorten-64-to-32
+ubc_subr.o_CFLAGS_ADD			+= -Wshorten-64-to-32
+vnode_pager.o_CFLAGS_ADD		+= -Wshorten-64-to-32
+vm_unix.o_CFLAGS_ADD			+= -Wshorten-64-to-32
+
+# Objects that don't want -Wsign-compare
+OBJS_NO_SIGN_COMPARE =		\
+		radix.o	\
+		route.o	\
+		rtsock.o	\
+		dhcp_options.o	\
+		igmp.o	\
+		in_cksum.o	\
+		ip_divert.o	\
+		ip_dummynet.o	\
+		ip_flow.o	\
+		ip_fw2.o	\
+		ip_fw2_compat.o	\
+		ip_icmp.o	\
+		ip_input.o	\
+		ip_mroute.o	\
+		ip_output.o	\
+		raw_ip.o	\
+		tcp_input.o	\
+		tcp_output.o	\
+		tcp_subr.o	\
+		tcp_usrreq.o	\
+		tcp_timer.o	\
+		udp_usrreq.o	\
+		ah_input.o	\
+		ah_core.o	\
+		ah_output.o	\
+		esp_core.o	\
+		esp_input.o	\
+		esp_output.o	\
+		esp_rijndael.o	\
+		ipsec.o	\
+		dest6.o	\
+		frag6.o	\
+		icmp6.o	\
+		in6.o	\
+		in6_src.o	\
+		in6_cksum.o	\
+		ip6_fw.o	\
+		ip6_forward.o	\
+		in6_ifattach.o	\
+		ip6_input.o	\
+		ip6_mroute.o	\
+		ip6_output.o	\
+		ipcomp_input.o	\
+		ipcomp_output.o	\
+		in6_proto.o	\
+		mld6.o	\
+		nd6.o	\
+		nd6_nbr.o	\
+		nd6_rtr.o	\
+		raw_ip6.o	\
+		route6.o	\
+		scope6.o	\
+		udp6_usrreq.o	\
+		key.o	\
+		keysock.o	\
+		keydb.o	\
+		des_setkey.o	\
+		uipc_mbuf.o	\
+		uipc_mbuf2.o	\
+		uipc_socket.o	\
+		uipc_socket2.o
+
+$(foreach file,$(OBJS_NO_SIGN_COMPARE),$(eval $(call add_perfile_cflags,$(file),-Wno-sign-compare)))
+
 #
 # Directories for mig generated files
 #
@@ -98,11 +169,11 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
+$(COMPONENT).filelist: $(LDOBJS)
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
 MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh
 
@@ -121,7 +192,7 @@ audit_kevents.c: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS)
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`;
 
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_build_all: do_depend
 
diff --git a/bsd/conf/Makefile.x86_64 b/bsd/conf/Makefile.x86_64
index 83b41e2dd..29811299a 100644
--- a/bsd/conf/Makefile.x86_64
+++ b/bsd/conf/Makefile.x86_64
@@ -2,46 +2,12 @@
 #BEGIN  Machine dependent Makefile fragment for x86_64
 ######################################################################
  
-# files to build with certain warnings turned off
+# Files to build with certain warnings turned off
 dis_tables.o_CFLAGS_ADD += -Wno-cast-qual
 fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual
 
-
-# Enable -Werror for x86_64 builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR =		\
-	fifo_vnops.o	\
-	aescrypt.o		\
-	aeskey.o		\
-	des_setkey.o		\
-	sha2.o			\
-	if_ethersubr.o		\
-	if_media.o		\
-	kext_net.o		\
-	dhcp_options.o		\
-	in_bootp.o		\
-	krpc_subr.o		\
-	ux_exception.o		\
-	unix_startup.o 		\
-	randomdev.o		\
-	vnode_pager.o	\
-	dp_backing_file.o \
-	vm_unix.o		\
-	mem.o			\
-	km.o			\
-	init_sysent.o		\
-	drv_dep.o		\
-	sdt_x86.o       \
-	dtrace_isa.o		\
-	aes_modes.o
-
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
+# sha256 Files to build with -DSHA256_USE_ASSEMBLY=1
+sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1 
 
 ######################################################################
 #END    Machine dependent Makefile fragment for x86_64
diff --git a/bsd/conf/files b/bsd/conf/files
index 92ea7269a..b3a7b10c4 100644
--- a/bsd/conf/files
+++ b/bsd/conf/files
@@ -107,6 +107,7 @@ OPTIONS/ipfw2				optional ipfw2
 OPTIONS/ipfirewall			optional ipfirewall
 OPTIONS/ipv6firewall		optional ipv6firewall
 OPTIONS/tcpdebug			optional tcpdebug
+OPTIONS/if_bridge			optional if_bridge
 OPTIONS/faith				optional faith
 OPTIONS/gif					optional gif
 OPTIONS/netat				optional netat
@@ -124,8 +125,6 @@ OPTIONS/hfs					optional hfs
 OPTIONS/mfs					optional mfs
 OPTIONS/fdesc				optional fdesc
 OPTIONS/fifo				optional fifo
-OPTIONS/nullfs				optional nullfs
-OPTIONS/union				optional union
 OPTIONS/devfs				optional devfs
 OPTIONS/crypto				optional crypto
 OPTIONS/allcrypto			optional allcrypto
@@ -183,13 +182,7 @@ bsd/vfs/vfs_fsevents.c			standard
 bsd/miscfs/deadfs/dead_vnops.c		standard
 bsd/miscfs/devfs/devfs_fdesc_support.c	optional fdesc
 bsd/miscfs/fifofs/fifo_vnops.c		optional fifo sockets
-bsd/miscfs/nullfs/null_subr.c		optional nullfs
-bsd/miscfs/nullfs/null_vfsops.c		optional nullfs
-bsd/miscfs/nullfs/null_vnops.c		optional nullfs
 bsd/miscfs/specfs/spec_vnops.c		standard
-bsd/miscfs/union/union_subr.c		optional union
-bsd/miscfs/union/union_vfsops.c		optional union
-bsd/miscfs/union/union_vnops.c		optional union
 
 bsd/miscfs/devfs/devfs_tree.c		optional devfs
 bsd/miscfs/devfs/devfs_vnops.c		optional devfs
@@ -199,10 +192,10 @@ bsd/kern/decmpfs.c			standard
 
 bsd/net/bpf.c				optional bpfilter
 bsd/net/bpf_filter.c			optional bpfilter
+bsd/net/if_bridge.c			optional if_bridge
+bsd/net/bridgestp.c			optional if_bridge
 bsd/net/bsd_comp.c			optional ppp_bsdcomp
 bsd/net/if.c				optional networking
-bsd/net/if_atmsubr.c			optional atm
-bsd/net/if_disc.c			optional disc
 bsd/net/init.c				optional sockets
 bsd/net/dlil.c				optional networking
 bsd/net/ether_if_module.c		optional ether
@@ -210,7 +203,7 @@ bsd/net/ether_at_pr_module.c		optional ether netat
 bsd/net/ether_inet_pr_module.c		optional ether inet
 bsd/net/ether_inet6_pr_module.c		optional ether inet6
 bsd/net/if_loop.c			optional loop
-bsd/net/if_mib.c			optional netmibs
+bsd/net/if_mib.c			optional networking
 bsd/net/if_sl.c				optional sl
 bsd/net/if_tun.c			optional tun
 bsd/net/if_vlan.c			optional vlan
@@ -224,9 +217,10 @@ bsd/net/raw_cb.c			optional networking
 bsd/net/raw_usrreq.c			optional networking
 bsd/net/route.c				optional networking
 bsd/net/rtsock.c			optional networking
+bsd/net/netsrc.c			optional networking
+bsd/net/ntstat.c			optional networking
 bsd/net/slcompress.c			optional ppp
 bsd/net/slcompress.c			optional sl
-bsd/net/if_dummy.c         		optional dummy
 bsd/net/if_gif.c          		optional gif
 bsd/net/if_stf.c          		optional stf
 bsd/net/net_osdep.c			optional sockets
@@ -243,19 +237,21 @@ bsd/net/pf_norm.c			optional pf
 bsd/net/pf_osfp.c			optional pf
 bsd/net/pf_ruleset.c			optional pf
 bsd/net/pf_table.c			optional pf
+bsd/net/if_llreach.c          		optional networking
 
-bsd/netinet/if_atm.c			optional atm
 bsd/netinet/igmp.c			optional inet
 bsd/netinet/in.c			optional inet
 bsd/netinet/in_dhcp.c			optional inet
 bsd/netinet/dhcp_options.c		optional inet
 bsd/netinet/in_arp.c			optional inet
+bsd/netinet/in_mcast.c			optional inet
 bsd/netinet/in_pcb.c			optional inet
+bsd/netinet/in_pcblist.c		optional inet
 bsd/netinet/in_proto.c			optional inet
 bsd/netinet/in_rmx.c			optional inet
+bsd/netinet/in_tclass.c			optional inet
 bsd/netinet/ip_divert.c			optional ipdivert
 bsd/netinet/ip_dummynet.c  		optional dummynet
-bsd/netinet/ip_flow.c			optional inet
 bsd/netinet/ip_fw2.c			optional ipfw2
 bsd/netinet/ip_fw2_compat.c		optional ipfw2
 bsd/netinet/ip_icmp.c			optional inet
@@ -271,6 +267,8 @@ bsd/netinet/tcp_sack.c			optional inet
 bsd/netinet/tcp_subr.c			optional inet
 bsd/netinet/tcp_timer.c			optional inet
 bsd/netinet/tcp_usrreq.c		optional inet
+bsd/netinet/tcp_newreno.c		optional inet
+bsd/netinet/tcp_ledbat.c		optional inet
 bsd/netinet/udp_usrreq.c		optional inet
 bsd/netinet/in_gif.c      		optional gif inet
 bsd/netinet/ip_ecn.c          		optional inet
@@ -300,8 +298,8 @@ bsd/netinet6/in6_src.c      		optional inet6
 bsd/netinet6/ipcomp_core.c  		optional ipsec
 bsd/netinet6/ipcomp_input.c 		optional ipsec
 bsd/netinet6/ipcomp_output.c      	optional ipsec
+bsd/netinet6/in6_mcast.c      		optional inet6
 bsd/netinet6/in6_pcb.c      		optional inet6
-bsd/netinet6/in6_prefix.c   		optional inet6
 bsd/netinet6/in6_proto.c    		optional inet6
 bsd/netinet6/in6_rmx.c      		optional inet6
 bsd/netinet6/mld6.c         		optional inet6
@@ -313,6 +311,7 @@ bsd/netinet6/route6.c       		optional inet6
 bsd/netinet6/scope6.c       		optional inet6
 bsd/netinet6/udp6_output.c  		optional inet6
 bsd/netinet6/udp6_usrreq.c  		optional inet6
+bsd/netinet6/ip6_id.c				optional inet6
 
 bsd/netkey/key.c          		optional ipsec
 bsd/netkey/key_debug.c    		optional ipsec
@@ -443,7 +442,7 @@ bsd/hfs/hfs_vfsutils.c				optional hfs
 bsd/hfs/hfs_vnops.c				optional hfs
 bsd/hfs/hfs_xattr.c				optional hfs
 bsd/hfs/MacOSStubs.c				optional hfs
-bsd/hfs/cprotect.c				optional hfs
+bsd/hfs/hfs_cprotect.c				optional hfs
 bsd/hfs/rangelist.c				optional hfs
 bsd/hfs/hfscommon/BTree/BTree.c			optional hfs
 bsd/hfs/hfscommon/BTree/BTreeAllocate.c		optional hfs
@@ -457,6 +456,7 @@ bsd/hfs/hfscommon/Catalog/FileIDsServices.c	optional hfs
 bsd/hfs/hfscommon/Misc/BTreeWrapper.c		optional hfs
 bsd/hfs/hfscommon/Misc/FileExtentMapping.c	optional hfs
 bsd/hfs/hfscommon/Misc/VolumeAllocation.c	optional hfs
+bsd/hfs/hfscommon/Misc/HybridAllocator.c	optional hfs
 bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c	optional hfs
 
 bsd/security/audit/audit.c			optional config_audit	
@@ -498,6 +498,7 @@ bsd/kern/kern_malloc.c			standard
 bsd/kern/kern_mman.c			standard
 bsd/kern/kern_panicinfo.c		optional panic_info
 bsd/kern/kern_physio.c			standard
+bsd/kern/kern_priv.c			standard
 bsd/kern/kern_proc.c			standard
 bsd/kern/kern_prot.c			standard
 bsd/kern/kern_resource.c		standard
@@ -556,6 +557,8 @@ bsd/kern/kpi_socketfilter.c		optional sockets
 bsd/kern/pthread_support.c		optional psynch
 bsd/kern/pthread_synch.c		standard
 bsd/kern/proc_info.c			standard
+bsd/kern/process_policy.c		standard
+bsd/kern/vm_pressure.c			standard
 bsd/kern/socket_info.c			optional sockets
 
 bsd/vm/vnode_pager.c			standard
@@ -585,3 +588,4 @@ bsd/dev/dtrace/profile_prvd.c		optional config_dtrace
 bsd/dev/dtrace/fasttrap.c		optional config_dtrace
 
 bsd/kern/imageboot.c                  optional config_imageboot
+
diff --git a/bsd/conf/files.i386 b/bsd/conf/files.i386
index 424cc3e3e..331f7202d 100644
--- a/bsd/conf/files.i386
+++ b/bsd/conf/files.i386
@@ -14,10 +14,19 @@ bsd/dev/i386/systemcalls.c	standard
 bsd/dev/i386/sysctl.c           standard
 bsd/dev/i386/unix_signal.c	standard
 bsd/dev/i386/munge.s		standard
-bsd/crypto/aes/i386/aes_x86_v2.s		optional crypto
-bsd/crypto/aes/i386/aes_modes.c			optional crypto
 
+bsd/crypto/aes/i386/AES.s		optional crypto
+bsd/crypto/aes/i386/aes_modes_asm.s	optional crypto
+bsd/crypto/aes/i386/aes_modes_hw.s	optional crypto
+bsd/crypto/aes/i386/aes_key_hw.s	optional crypto
+bsd/crypto/aes/i386/aes_crypt_hw.s	optional crypto
+bsd/crypto/aes/i386/aesxts_asm.s	optional crypto
+bsd/crypto/aes/i386/aesxts.c	optional crypto
 
+bsd/crypto/sha2/intel/sha256.s	optional crypto
+bsd/crypto/sha2/intel/sha256nossse3.s	optional crypto
+
+# Lightly ifdef'd to support K64 DTrace
 bsd/dev/i386/dtrace_isa.c	optional config_dtrace
 bsd/dev/i386/dtrace_subr_x86.c	optional config_dtrace
 bsd/dev/i386/fbt_x86.c		optional config_dtrace
@@ -26,6 +35,11 @@ bsd/dev/i386/fasttrap_isa.c	optional config_dtrace
 bsd/dev/i386/instr_size.c	optional config_dtrace
 bsd/dev/i386/dis_tables.c	optional config_dtrace
 
+# Support for identifying MACF calouts with locks held
+bsd/kern/policy_check.c			optional config_macf
+
 bsd/kern/bsd_stubs.c		standard
 bsd/netinet/in_cksum.c		optional inet
 
+
+
diff --git a/bsd/conf/files.ppc b/bsd/conf/files.ppc
deleted file mode 100644
index 57e8870a7..000000000
--- a/bsd/conf/files.ppc
+++ /dev/null
@@ -1,34 +0,0 @@
-OPTIONS/show_space		optional show_space
-OPTIONS/gdb			optional gdb
-OPTIONS/iplmeas			optional iplmeas
-
-bsd/netinet/in_cksum.c		optional inet
-
-bsd/dev/ppc/conf.c		standard
-bsd/dev/ppc/cons.c		standard
-bsd/dev/ppc/mem.c		standard
-bsd/dev/ppc/unix_signal.c	standard
-bsd/dev/ppc/ffs.s		standard
-bsd/dev/ppc/memmove.c		standard
-bsd/dev/ppc/machdep.c		standard
-bsd/dev/ppc/kern_machdep.c	standard
-bsd/dev/ppc/stubs.c		standard
-bsd/dev/ppc/systemcalls.c	standard
-bsd/dev/ppc/km.c		standard
-bsd/dev/ppc/xsumas.s		standard	
-bsd/dev/ppc/sysctl.c		standard	
-bsd/dev/ppc/munge.s		standard
-bsd/crypto/aes/ppc/aescrypt.c		optional crypto
-bsd/crypto/aes/ppc/aeskey.c			optional crypto
-bsd/crypto/aes/ppc/aestab.c			optional crypto
-
-
-bsd/dev/ppc/dtrace_isa.c	optional config_dtrace
-bsd/dev/ppc/dtrace_subr_ppc.c	optional config_dtrace
-bsd/dev/ppc/fbt_ppc.c		optional config_dtrace
-bsd/dev/ppc/sdt_ppc.c		optional config_dtrace
-bsd/dev/ppc/fasttrap_isa.c	optional config_dtrace
-
-bsd/kern/bsd_stubs.c			standard
-
-
diff --git a/bsd/conf/files.x86_64 b/bsd/conf/files.x86_64
index 322174554..fcb3be604 100644
--- a/bsd/conf/files.x86_64
+++ b/bsd/conf/files.x86_64
@@ -15,9 +15,16 @@ bsd/dev/i386/sysctl.c           standard
 bsd/dev/i386/unix_signal.c	standard
 bsd/dev/x86_64/munge.s		standard
 
-bsd/crypto/aes/gen/aescrypt.c				optional crypto
-bsd/crypto/aes/gen/aeskey.c				optional crypto
-bsd/crypto/aes/gen/aestab.c				optional crypto
+bsd/crypto/aes/i386/AES.s		optional crypto
+bsd/crypto/aes/i386/aes_modes_asm.s	optional crypto
+bsd/crypto/aes/i386/aes_modes_hw.s	optional crypto
+bsd/crypto/aes/i386/aes_key_hw.s	optional crypto
+bsd/crypto/aes/i386/aes_crypt_hw.s	optional crypto
+bsd/crypto/aes/i386/aesxts_asm.s	optional crypto
+bsd/crypto/aes/i386/aesxts.c	optional crypto
+
+bsd/crypto/sha2/intel/sha256.s  optional crypto
+bsd/crypto/sha2/intel/sha256nossse3.s   optional crypto
 
 # Lightly ifdef'd to support K64 DTrace
 bsd/dev/i386/dtrace_isa.c	optional config_dtrace
@@ -28,6 +35,9 @@ bsd/dev/i386/fasttrap_isa.c	optional config_dtrace
 bsd/dev/i386/instr_size.c	optional config_dtrace
 bsd/dev/i386/dis_tables.c	optional config_dtrace
 
+# Support for identifying MACF calouts with locks held
+bsd/kern/policy_check.c			optional config_macf
+
 bsd/kern/bsd_stubs.c		standard
 bsd/netinet/in_cksum.c		optional inet
 
diff --git a/bsd/conf/param.c b/bsd/conf/param.c
index 9aafb343c..95c01ffb5 100644
--- a/bsd/conf/param.c
+++ b/bsd/conf/param.c
@@ -91,7 +91,8 @@ int	maxprocperuid = NPROC/2;
 int nprocs = 0; /* XXX */
 
 //#define	NTEXT (80 + NPROC / 8)			/* actually the object cache */
-int    desiredvnodes = CONFIG_VNODES;
+int desiredvnodes = 0;				/* desiredvnodes is set explicitly in unix_startup.c */
+uint32_t kern_maxvnodes = 0;		/* global, to be read from the device tree */
 
 #define MAXFILES (OPEN_MAX + 2048)
 int	maxfiles = MAXFILES;
diff --git a/bsd/conf/tools/Makefile b/bsd/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/bsd/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/conf/tools/doconf/Makefile b/bsd/conf/tools/doconf/Makefile
deleted file mode 100644
index 7794a4ceb..000000000
--- a/bsd/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,49 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/crypto/Makefile b/bsd/crypto/Makefile
index 0af469f52..ab0c4b986 100644
--- a/bsd/crypto/Makefile
+++ b/bsd/crypto/Makefile
@@ -16,18 +16,16 @@ INSTINC_SUBDIRS = \
 	sha2
 
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
+	aes
 
 INSTINC_SUBDIRS_X86_64 = \
+	aes
 
 INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/aes/Assert.c b/bsd/crypto/aes/Assert.c
new file mode 100644
index 000000000..5ba9c4472
--- /dev/null
+++ b/bsd/crypto/aes/Assert.c
@@ -0,0 +1,34 @@
+/*	This module exists solely to check compile-time assertions.  It should be
+	compiled when building the project, and building should be terminated if
+	errors are encountered.  However, any object it produces need not be
+	included in the build.
+*/
+
+
+#include <stddef.h>
+
+#include "crypto/aes.h"
+#include "Context.h"
+
+/*	Declare CheckAssertion so that if any of the declarations below differ
+	from it, the compiler will report an error.
+*/
+extern char CheckAssertion[1];
+
+/*	Ensure that ContextKey is the offset of the ks member of the AES context
+	structures.
+*/
+extern char CheckAssertion[ContextKey == offsetof(aes_encrypt_ctx, ks)];
+extern char CheckAssertion[ContextKey == offsetof(aes_decrypt_ctx, ks)];
+	/*	If these assertions fail, change the definition of ContextKey in
+		Context.h to match the offset of the ks field.
+	*/
+
+/*	Ensure that ContextKeyLength is the offset of the inf member of the AES
+	context structures.
+*/
+extern char CheckAssertion[ContextKeyLength == offsetof(aes_encrypt_ctx, inf)];
+extern char CheckAssertion[ContextKeyLength == offsetof(aes_decrypt_ctx, inf)];
+	/*	If these assertions fail, change the definition of ContextKeyLength in
+		Context.h to match the offset of the inf field.
+	*/
diff --git a/bsd/crypto/aes/Makefile b/bsd/crypto/aes/Makefile
index 026261c65..6b96dbd34 100644
--- a/bsd/crypto/aes/Makefile
+++ b/bsd/crypto/aes/Makefile
@@ -9,18 +9,16 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
+	i386
 
 INSTINC_SUBDIRS_X86_64 = \
+	i386
 
 INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/aes/aes.h b/bsd/crypto/aes/aes.h
old mode 100644
new mode 100755
index eaba0a692..49c845da6
--- a/bsd/crypto/aes/aes.h
+++ b/bsd/crypto/aes/aes.h
@@ -80,14 +80,22 @@ typedef	unsigned int    aes_32t;
 #endif
 
 
+#if 0 // defined (__i386__) || defined (__x86_64__)
+
+/* 
+	looks like no other code for (i386/x86_64) is using the following definitions any more.
+	I comment this out, so the C code in the directory gen/ can be used to compile for test/development purpose.
+	Note : this is not going to change anything in the i386/x86_64 kernel. 
+			(source code in i386/, mostly in assembly, does not reference to this header file.) 
+
+	cclee	10-20-2010
+*/
 
 /* the character array 'inf' in the following structures is used    */
 /* to hold AES context information. This AES code uses cx->inf.b[0] */
 /* to hold the number of rounds multiplied by 16. The other three   */
 /* elements can be used by code that implements additional modes    */
 
-#if defined (__i386__)
-
 #if defined( AES_ERR_CHK )
 #define aes_rval     int_ret
 #else
@@ -166,7 +174,7 @@ aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
 aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]);
 #endif
 
-#if defined (__i386__)
+#if defined (__i386__) || defined (__x86_64__)
 aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]);
 #endif
 
@@ -193,7 +201,7 @@ aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
 aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]);
 #endif
 
-#if defined (__i386__)
+#if defined (__i386__) || defined (__x86_64__)
 aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]);
 #endif
 
diff --git a/bsd/crypto/aes/gen/Makefile b/bsd/crypto/aes/gen/Makefile
index 7ea225c10..d32c71c39 100644
--- a/bsd/crypto/aes/gen/Makefile
+++ b/bsd/crypto/aes/gen/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 PRIVATE_DATAFILES = \
diff --git a/bsd/crypto/aes/gen/aesopt.h b/bsd/crypto/aes/gen/aesopt.h
index 2b78eb920..fc28e4a48 100644
--- a/bsd/crypto/aes/gen/aesopt.h
+++ b/bsd/crypto/aes/gen/aesopt.h
@@ -585,12 +585,12 @@
 
 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
 
-#define word_in(x,c)    (*((aes_32t*)(x)+(c)))
+#define word_in(x,c)    (*((const aes_32t*)(x)+(c)))
 #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v))
 
 #else
 
-#define word_in(x,c)    aes_sw32(*((aes_32t*)(x)+(c)))
+#define word_in(x,c)    aes_sw32(*((const aes_32t*)(x)+(c)))
 #define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v))
 
 #endif
diff --git a/bsd/crypto/aes/i386/AES.s b/bsd/crypto/aes/i386/AES.s
new file mode 100644
index 000000000..9bf440a68
--- /dev/null
+++ b/bsd/crypto/aes/i386/AES.s
@@ -0,0 +1,143 @@
+/*	AES.s -- Core AES routines for Intel processors.
+
+	Written by Eric Postpischil, January 30, 2008.
+*/
+
+
+/*	We build these AES routines as a single module because the routines refer
+	to labels in Data.s and it is easier and faster to refer to them as local
+	labels.  In my implementations of AES for CommonCrypto, both i386 and
+	x86_64 use position-independent code.  For this in-kernel implementation,
+	i386 has been converted to absolute addressing, but x86_64 still uses PIC.
+
+	A local label can be referred to with position-independent assembler
+	expressions such as "label-base(register)", where <base> is a local label
+	whose address has been loaded into <register>.  (On i386, this is typically
+	done with the idiom of a call to the next instruction and a pop of that
+	return address into a register.)  Without local labels, the references must
+	be done using spaces for addresses of "lazy symbols" that are filled in by
+	the dynamic loader and loaded by the code that wants the address.
+
+	So the various routines in other files are assembled here via #include
+	directives.
+*/
+#include "Data.s"
+
+
+#define	TableSize	(256*4)
+	/*	Each of the arrays defined in Data.s except for the round constants
+		in _AESRcon is composed of four tables of 256 entries of four bytes
+		each.  TableSize is the number of bytes in one of those four tables.
+	*/
+
+
+// Include constants describing the AES context structures.
+#include "Context.h"
+
+
+/*	Define a macro to select a value based on architecture.  This reduces
+	some of the architecture conditionalization later in the source.
+*/
+#if defined __i386__
+	#define	Arch(i386, x86_64)	i386
+#elif defined __x86_64__
+	#define	Arch(i386, x86_64)	x86_64
+#endif
+
+
+// Define an instruction for moving pointers.
+#define	movp	Arch(movd, movd)
+	// Latter argument should be "movq", but the assembler uses "movd".
+
+
+/*	Rename the general registers.  This makes it easier to keep track of them
+	and provides names for the "whole register" that are uniform between i386
+	and x86_64.
+*/
+#if defined __i386__
+	#define	r0	%eax	// Available for any use.
+	#define	r1	%ecx	// Available for any use, some special purposes (loop).
+	#define	r2	%edx	// Available for any use.
+	#define	r3	%ebx	// Must be preserved by called routine.
+	#define	r4	%esp	// Stack pointer.
+	#define	r5	%ebp	// Frame pointer, must preserve, no bare indirect.
+	#define	r6	%esi	// Must be preserved by called routine.
+	#define	r7	%edi	// Must be preserved by called routine.
+#elif defined __x86_64__
+	#define	r0	%rax	// Available for any use.
+	#define	r1	%rcx	// Available for any use.
+	#define	r2	%rdx	// Available for any use.
+	#define	r3	%rbx	// Must be preserved by called routine.
+	#define	r4	%rsp	// Stack pointer.
+	#define	r5	%rbp	// Frame pointer.  Must be preserved by called routine.
+	#define	r6	%rsi	// Available for any use.
+	#define	r7	%rdi	// Available for any use.
+	#define	r8	%r8		// Available for any use.
+	#define	r9	%r9		// Available for any use.
+	#define	r10	%r10	// Available for any use.
+	#define	r11	%r11	// Available for any use.
+	#define	r12	%r12	// Must be preserved by called routine.
+	#define	r13	%r13	// Must be preserved by called routine.
+	#define	r14	%r14	// Must be preserved by called routine.
+	#define	r15	%r15	// Must be preserved by called routine.
+#else
+	#error "Unknown architecture."
+#endif
+
+// Define names for parts of registers.
+
+#define	r0d		%eax	// Low 32 bits of r0.
+#define	r1d		%ecx	// Low 32 bits of r1.
+#define	r2d		%edx	// Low 32 bits of r2.
+#define	r3d		%ebx	// Low 32 bits of r3.
+#define	r5d		%ebp	// Low 32 bits of r5.
+#define	r6d		%esi	// Low 32 bits of r6.
+#define	r7d		%edi	// Low 32 bits of r7.
+#define	r8d		%r8d	// Low 32 bits of r8.
+#define	r9d		%r9d	// Low 32 bits of r9.
+#define	r11d	%r11d	// Low 32 bits of r11.
+
+#define	r0l		%al		// Low byte of r0.
+#define	r1l		%cl		// Low byte of r1.
+#define	r2l		%dl		// Low byte of r2.
+#define	r3l		%bl		// Low byte of r3.
+#define	r5l		%bpl	// Low byte of r5.
+
+#define	r0h		%ah		// Second lowest byte of r0.
+#define	r1h		%ch		// Second lowest byte of r1.
+#define	r2h		%dh		// Second lowest byte of r2.
+#define	r3h		%bh		// Second lowest byte of r3.
+
+
+	.text
+
+
+// Define encryption routine, _AESEncryptWithExpandedKey
+#define	Select	0
+#include "EncryptDecrypt.s"
+#undef	Select
+
+
+// Define decryption routine, _AESDecryptWithExpandedKey
+#define	Select	1
+#include "EncryptDecrypt.s"
+#undef	Select
+
+// Define encryption routine, _AESEncryptWithExpandedKey
+#define	Select	2
+#include "EncryptDecrypt.s"
+#undef	Select
+
+
+// Define decryption routine, _AESDecryptWithExpandedKey
+#define	Select	3
+#include "EncryptDecrypt.s"
+#undef	Select
+
+
+// Define key expansion routine for encryption, _AESExpandKeyForEncryption.
+#include "ExpandKeyForEncryption.s"
+
+
+// Define key expansion for decryption routine, _AESExpandKeyForDecryption.
+#include "ExpandKeyForDecryption.s"
diff --git a/bsd/crypto/aes/i386/Context.h b/bsd/crypto/aes/i386/Context.h
new file mode 100644
index 000000000..f53cb9514
--- /dev/null
+++ b/bsd/crypto/aes/i386/Context.h
@@ -0,0 +1,9 @@
+// Define byte offset of key within context structure.
+#define	ContextKey			0
+
+/*	Define byte offset of key length within context structure.  The number
+	stored there is the number of bytes from the start of the first round key
+	to the start of the last round key.  That is 16 less than the number of
+	bytes in the entire key.
+*/
+#define	ContextKeyLength	240
diff --git a/bsd/crypto/aes/i386/Data.mk b/bsd/crypto/aes/i386/Data.mk
new file mode 100644
index 000000000..4b55d630f
--- /dev/null
+++ b/bsd/crypto/aes/i386/Data.mk
@@ -0,0 +1,30 @@
+default:
+	@echo "This makefile builds Data.s, which contains constant data for the"
+	@echo "AES implementation.  This file does not normally need to be rebuilt,"
+	@echo "so it is checked into the source code repository.  It should be"
+	@echo "changed only when the implementation changes and needs data in a"
+	@echo "different format.  (This file can also build a C version, Data.c,"
+	@echo "but that is not currently in use.)"
+	@echo ""
+	@echo "To rebuild the file(s), execute \"make -f Data.mk all\"."
+
+.PHONY:	all clean
+Targets = Data.s
+all:	$(Targets)
+
+CFLAGS += -O3 -std=c99 -Wmost -Werror
+
+.INTERMEDIATE:	MakeData
+MakeData:	MakeData.c
+
+# Do not leave bad output files if the build fails.
+.DELETE_ON_ERROR:	$(Targets)
+
+Data.c:	MakeData
+	./$< >$@ C
+
+Data.s:	MakeData
+	./$< >$@ Intel
+
+clean:
+	-rm $(Targets)
diff --git a/bsd/crypto/aes/i386/Data.s b/bsd/crypto/aes/i386/Data.s
new file mode 100644
index 000000000..d330462d0
--- /dev/null
+++ b/bsd/crypto/aes/i386/Data.s
@@ -0,0 +1,5196 @@
+// This file was generated by MakeData.c.
+
+
+	.const
+
+
+// Round constants.
+	.globl	_AESRcon
+	.private_extern	_AESRcon
+_AESRcon:
+	.byte	0	// Not used, included for indexing simplicity.
+	.byte	0x01
+	.byte	0x02
+	.byte	0x04
+	.byte	0x08
+	.byte	0x10
+	.byte	0x20
+	.byte	0x40
+	.byte	0x80
+	.byte	0x1b
+	.byte	0x36
+
+
+// Tables for InvMixColumn.
+	.globl	_AESInvMixColumnTable
+	.private_extern	_AESInvMixColumnTable
+	.align	2
+_AESInvMixColumnTable:
+	// Table 0.
+	.long	0x00000000
+	.long	0x0b0d090e
+	.long	0x161a121c
+	.long	0x1d171b12
+	.long	0x2c342438
+	.long	0x27392d36
+	.long	0x3a2e3624
+	.long	0x31233f2a
+	.long	0x58684870
+	.long	0x5365417e
+	.long	0x4e725a6c
+	.long	0x457f5362
+	.long	0x745c6c48
+	.long	0x7f516546
+	.long	0x62467e54
+	.long	0x694b775a
+	.long	0xb0d090e0
+	.long	0xbbdd99ee
+	.long	0xa6ca82fc
+	.long	0xadc78bf2
+	.long	0x9ce4b4d8
+	.long	0x97e9bdd6
+	.long	0x8afea6c4
+	.long	0x81f3afca
+	.long	0xe8b8d890
+	.long	0xe3b5d19e
+	.long	0xfea2ca8c
+	.long	0xf5afc382
+	.long	0xc48cfca8
+	.long	0xcf81f5a6
+	.long	0xd296eeb4
+	.long	0xd99be7ba
+	.long	0x7bbb3bdb
+	.long	0x70b632d5
+	.long	0x6da129c7
+	.long	0x66ac20c9
+	.long	0x578f1fe3
+	.long	0x5c8216ed
+	.long	0x41950dff
+	.long	0x4a9804f1
+	.long	0x23d373ab
+	.long	0x28de7aa5
+	.long	0x35c961b7
+	.long	0x3ec468b9
+	.long	0x0fe75793
+	.long	0x04ea5e9d
+	.long	0x19fd458f
+	.long	0x12f04c81
+	.long	0xcb6bab3b
+	.long	0xc066a235
+	.long	0xdd71b927
+	.long	0xd67cb029
+	.long	0xe75f8f03
+	.long	0xec52860d
+	.long	0xf1459d1f
+	.long	0xfa489411
+	.long	0x9303e34b
+	.long	0x980eea45
+	.long	0x8519f157
+	.long	0x8e14f859
+	.long	0xbf37c773
+	.long	0xb43ace7d
+	.long	0xa92dd56f
+	.long	0xa220dc61
+	.long	0xf66d76ad
+	.long	0xfd607fa3
+	.long	0xe07764b1
+	.long	0xeb7a6dbf
+	.long	0xda595295
+	.long	0xd1545b9b
+	.long	0xcc434089
+	.long	0xc74e4987
+	.long	0xae053edd
+	.long	0xa50837d3
+	.long	0xb81f2cc1
+	.long	0xb31225cf
+	.long	0x82311ae5
+	.long	0x893c13eb
+	.long	0x942b08f9
+	.long	0x9f2601f7
+	.long	0x46bde64d
+	.long	0x4db0ef43
+	.long	0x50a7f451
+	.long	0x5baafd5f
+	.long	0x6a89c275
+	.long	0x6184cb7b
+	.long	0x7c93d069
+	.long	0x779ed967
+	.long	0x1ed5ae3d
+	.long	0x15d8a733
+	.long	0x08cfbc21
+	.long	0x03c2b52f
+	.long	0x32e18a05
+	.long	0x39ec830b
+	.long	0x24fb9819
+	.long	0x2ff69117
+	.long	0x8dd64d76
+	.long	0x86db4478
+	.long	0x9bcc5f6a
+	.long	0x90c15664
+	.long	0xa1e2694e
+	.long	0xaaef6040
+	.long	0xb7f87b52
+	.long	0xbcf5725c
+	.long	0xd5be0506
+	.long	0xdeb30c08
+	.long	0xc3a4171a
+	.long	0xc8a91e14
+	.long	0xf98a213e
+	.long	0xf2872830
+	.long	0xef903322
+	.long	0xe49d3a2c
+	.long	0x3d06dd96
+	.long	0x360bd498
+	.long	0x2b1ccf8a
+	.long	0x2011c684
+	.long	0x1132f9ae
+	.long	0x1a3ff0a0
+	.long	0x0728ebb2
+	.long	0x0c25e2bc
+	.long	0x656e95e6
+	.long	0x6e639ce8
+	.long	0x737487fa
+	.long	0x78798ef4
+	.long	0x495ab1de
+	.long	0x4257b8d0
+	.long	0x5f40a3c2
+	.long	0x544daacc
+	.long	0xf7daec41
+	.long	0xfcd7e54f
+	.long	0xe1c0fe5d
+	.long	0xeacdf753
+	.long	0xdbeec879
+	.long	0xd0e3c177
+	.long	0xcdf4da65
+	.long	0xc6f9d36b
+	.long	0xafb2a431
+	.long	0xa4bfad3f
+	.long	0xb9a8b62d
+	.long	0xb2a5bf23
+	.long	0x83868009
+	.long	0x888b8907
+	.long	0x959c9215
+	.long	0x9e919b1b
+	.long	0x470a7ca1
+	.long	0x4c0775af
+	.long	0x51106ebd
+	.long	0x5a1d67b3
+	.long	0x6b3e5899
+	.long	0x60335197
+	.long	0x7d244a85
+	.long	0x7629438b
+	.long	0x1f6234d1
+	.long	0x146f3ddf
+	.long	0x097826cd
+	.long	0x02752fc3
+	.long	0x335610e9
+	.long	0x385b19e7
+	.long	0x254c02f5
+	.long	0x2e410bfb
+	.long	0x8c61d79a
+	.long	0x876cde94
+	.long	0x9a7bc586
+	.long	0x9176cc88
+	.long	0xa055f3a2
+	.long	0xab58faac
+	.long	0xb64fe1be
+	.long	0xbd42e8b0
+	.long	0xd4099fea
+	.long	0xdf0496e4
+	.long	0xc2138df6
+	.long	0xc91e84f8
+	.long	0xf83dbbd2
+	.long	0xf330b2dc
+	.long	0xee27a9ce
+	.long	0xe52aa0c0
+	.long	0x3cb1477a
+	.long	0x37bc4e74
+	.long	0x2aab5566
+	.long	0x21a65c68
+	.long	0x10856342
+	.long	0x1b886a4c
+	.long	0x069f715e
+	.long	0x0d927850
+	.long	0x64d90f0a
+	.long	0x6fd40604
+	.long	0x72c31d16
+	.long	0x79ce1418
+	.long	0x48ed2b32
+	.long	0x43e0223c
+	.long	0x5ef7392e
+	.long	0x55fa3020
+	.long	0x01b79aec
+	.long	0x0aba93e2
+	.long	0x17ad88f0
+	.long	0x1ca081fe
+	.long	0x2d83bed4
+	.long	0x268eb7da
+	.long	0x3b99acc8
+	.long	0x3094a5c6
+	.long	0x59dfd29c
+	.long	0x52d2db92
+	.long	0x4fc5c080
+	.long	0x44c8c98e
+	.long	0x75ebf6a4
+	.long	0x7ee6ffaa
+	.long	0x63f1e4b8
+	.long	0x68fcedb6
+	.long	0xb1670a0c
+	.long	0xba6a0302
+	.long	0xa77d1810
+	.long	0xac70111e
+	.long	0x9d532e34
+	.long	0x965e273a
+	.long	0x8b493c28
+	.long	0x80443526
+	.long	0xe90f427c
+	.long	0xe2024b72
+	.long	0xff155060
+	.long	0xf418596e
+	.long	0xc53b6644
+	.long	0xce366f4a
+	.long	0xd3217458
+	.long	0xd82c7d56
+	.long	0x7a0ca137
+	.long	0x7101a839
+	.long	0x6c16b32b
+	.long	0x671bba25
+	.long	0x5638850f
+	.long	0x5d358c01
+	.long	0x40229713
+	.long	0x4b2f9e1d
+	.long	0x2264e947
+	.long	0x2969e049
+	.long	0x347efb5b
+	.long	0x3f73f255
+	.long	0x0e50cd7f
+	.long	0x055dc471
+	.long	0x184adf63
+	.long	0x1347d66d
+	.long	0xcadc31d7
+	.long	0xc1d138d9
+	.long	0xdcc623cb
+	.long	0xd7cb2ac5
+	.long	0xe6e815ef
+	.long	0xede51ce1
+	.long	0xf0f207f3
+	.long	0xfbff0efd
+	.long	0x92b479a7
+	.long	0x99b970a9
+	.long	0x84ae6bbb
+	.long	0x8fa362b5
+	.long	0xbe805d9f
+	.long	0xb58d5491
+	.long	0xa89a4f83
+	.long	0xa397468d
+	// Table 1.
+	.long	0x00000000
+	.long	0x0d090e0b
+	.long	0x1a121c16
+	.long	0x171b121d
+	.long	0x3424382c
+	.long	0x392d3627
+	.long	0x2e36243a
+	.long	0x233f2a31
+	.long	0x68487058
+	.long	0x65417e53
+	.long	0x725a6c4e
+	.long	0x7f536245
+	.long	0x5c6c4874
+	.long	0x5165467f
+	.long	0x467e5462
+	.long	0x4b775a69
+	.long	0xd090e0b0
+	.long	0xdd99eebb
+	.long	0xca82fca6
+	.long	0xc78bf2ad
+	.long	0xe4b4d89c
+	.long	0xe9bdd697
+	.long	0xfea6c48a
+	.long	0xf3afca81
+	.long	0xb8d890e8
+	.long	0xb5d19ee3
+	.long	0xa2ca8cfe
+	.long	0xafc382f5
+	.long	0x8cfca8c4
+	.long	0x81f5a6cf
+	.long	0x96eeb4d2
+	.long	0x9be7bad9
+	.long	0xbb3bdb7b
+	.long	0xb632d570
+	.long	0xa129c76d
+	.long	0xac20c966
+	.long	0x8f1fe357
+	.long	0x8216ed5c
+	.long	0x950dff41
+	.long	0x9804f14a
+	.long	0xd373ab23
+	.long	0xde7aa528
+	.long	0xc961b735
+	.long	0xc468b93e
+	.long	0xe757930f
+	.long	0xea5e9d04
+	.long	0xfd458f19
+	.long	0xf04c8112
+	.long	0x6bab3bcb
+	.long	0x66a235c0
+	.long	0x71b927dd
+	.long	0x7cb029d6
+	.long	0x5f8f03e7
+	.long	0x52860dec
+	.long	0x459d1ff1
+	.long	0x489411fa
+	.long	0x03e34b93
+	.long	0x0eea4598
+	.long	0x19f15785
+	.long	0x14f8598e
+	.long	0x37c773bf
+	.long	0x3ace7db4
+	.long	0x2dd56fa9
+	.long	0x20dc61a2
+	.long	0x6d76adf6
+	.long	0x607fa3fd
+	.long	0x7764b1e0
+	.long	0x7a6dbfeb
+	.long	0x595295da
+	.long	0x545b9bd1
+	.long	0x434089cc
+	.long	0x4e4987c7
+	.long	0x053eddae
+	.long	0x0837d3a5
+	.long	0x1f2cc1b8
+	.long	0x1225cfb3
+	.long	0x311ae582
+	.long	0x3c13eb89
+	.long	0x2b08f994
+	.long	0x2601f79f
+	.long	0xbde64d46
+	.long	0xb0ef434d
+	.long	0xa7f45150
+	.long	0xaafd5f5b
+	.long	0x89c2756a
+	.long	0x84cb7b61
+	.long	0x93d0697c
+	.long	0x9ed96777
+	.long	0xd5ae3d1e
+	.long	0xd8a73315
+	.long	0xcfbc2108
+	.long	0xc2b52f03
+	.long	0xe18a0532
+	.long	0xec830b39
+	.long	0xfb981924
+	.long	0xf691172f
+	.long	0xd64d768d
+	.long	0xdb447886
+	.long	0xcc5f6a9b
+	.long	0xc1566490
+	.long	0xe2694ea1
+	.long	0xef6040aa
+	.long	0xf87b52b7
+	.long	0xf5725cbc
+	.long	0xbe0506d5
+	.long	0xb30c08de
+	.long	0xa4171ac3
+	.long	0xa91e14c8
+	.long	0x8a213ef9
+	.long	0x872830f2
+	.long	0x903322ef
+	.long	0x9d3a2ce4
+	.long	0x06dd963d
+	.long	0x0bd49836
+	.long	0x1ccf8a2b
+	.long	0x11c68420
+	.long	0x32f9ae11
+	.long	0x3ff0a01a
+	.long	0x28ebb207
+	.long	0x25e2bc0c
+	.long	0x6e95e665
+	.long	0x639ce86e
+	.long	0x7487fa73
+	.long	0x798ef478
+	.long	0x5ab1de49
+	.long	0x57b8d042
+	.long	0x40a3c25f
+	.long	0x4daacc54
+	.long	0xdaec41f7
+	.long	0xd7e54ffc
+	.long	0xc0fe5de1
+	.long	0xcdf753ea
+	.long	0xeec879db
+	.long	0xe3c177d0
+	.long	0xf4da65cd
+	.long	0xf9d36bc6
+	.long	0xb2a431af
+	.long	0xbfad3fa4
+	.long	0xa8b62db9
+	.long	0xa5bf23b2
+	.long	0x86800983
+	.long	0x8b890788
+	.long	0x9c921595
+	.long	0x919b1b9e
+	.long	0x0a7ca147
+	.long	0x0775af4c
+	.long	0x106ebd51
+	.long	0x1d67b35a
+	.long	0x3e58996b
+	.long	0x33519760
+	.long	0x244a857d
+	.long	0x29438b76
+	.long	0x6234d11f
+	.long	0x6f3ddf14
+	.long	0x7826cd09
+	.long	0x752fc302
+	.long	0x5610e933
+	.long	0x5b19e738
+	.long	0x4c02f525
+	.long	0x410bfb2e
+	.long	0x61d79a8c
+	.long	0x6cde9487
+	.long	0x7bc5869a
+	.long	0x76cc8891
+	.long	0x55f3a2a0
+	.long	0x58faacab
+	.long	0x4fe1beb6
+	.long	0x42e8b0bd
+	.long	0x099fead4
+	.long	0x0496e4df
+	.long	0x138df6c2
+	.long	0x1e84f8c9
+	.long	0x3dbbd2f8
+	.long	0x30b2dcf3
+	.long	0x27a9ceee
+	.long	0x2aa0c0e5
+	.long	0xb1477a3c
+	.long	0xbc4e7437
+	.long	0xab55662a
+	.long	0xa65c6821
+	.long	0x85634210
+	.long	0x886a4c1b
+	.long	0x9f715e06
+	.long	0x9278500d
+	.long	0xd90f0a64
+	.long	0xd406046f
+	.long	0xc31d1672
+	.long	0xce141879
+	.long	0xed2b3248
+	.long	0xe0223c43
+	.long	0xf7392e5e
+	.long	0xfa302055
+	.long	0xb79aec01
+	.long	0xba93e20a
+	.long	0xad88f017
+	.long	0xa081fe1c
+	.long	0x83bed42d
+	.long	0x8eb7da26
+	.long	0x99acc83b
+	.long	0x94a5c630
+	.long	0xdfd29c59
+	.long	0xd2db9252
+	.long	0xc5c0804f
+	.long	0xc8c98e44
+	.long	0xebf6a475
+	.long	0xe6ffaa7e
+	.long	0xf1e4b863
+	.long	0xfcedb668
+	.long	0x670a0cb1
+	.long	0x6a0302ba
+	.long	0x7d1810a7
+	.long	0x70111eac
+	.long	0x532e349d
+	.long	0x5e273a96
+	.long	0x493c288b
+	.long	0x44352680
+	.long	0x0f427ce9
+	.long	0x024b72e2
+	.long	0x155060ff
+	.long	0x18596ef4
+	.long	0x3b6644c5
+	.long	0x366f4ace
+	.long	0x217458d3
+	.long	0x2c7d56d8
+	.long	0x0ca1377a
+	.long	0x01a83971
+	.long	0x16b32b6c
+	.long	0x1bba2567
+	.long	0x38850f56
+	.long	0x358c015d
+	.long	0x22971340
+	.long	0x2f9e1d4b
+	.long	0x64e94722
+	.long	0x69e04929
+	.long	0x7efb5b34
+	.long	0x73f2553f
+	.long	0x50cd7f0e
+	.long	0x5dc47105
+	.long	0x4adf6318
+	.long	0x47d66d13
+	.long	0xdc31d7ca
+	.long	0xd138d9c1
+	.long	0xc623cbdc
+	.long	0xcb2ac5d7
+	.long	0xe815efe6
+	.long	0xe51ce1ed
+	.long	0xf207f3f0
+	.long	0xff0efdfb
+	.long	0xb479a792
+	.long	0xb970a999
+	.long	0xae6bbb84
+	.long	0xa362b58f
+	.long	0x805d9fbe
+	.long	0x8d5491b5
+	.long	0x9a4f83a8
+	.long	0x97468da3
+	// Table 2.
+	.long	0x00000000
+	.long	0x090e0b0d
+	.long	0x121c161a
+	.long	0x1b121d17
+	.long	0x24382c34
+	.long	0x2d362739
+	.long	0x36243a2e
+	.long	0x3f2a3123
+	.long	0x48705868
+	.long	0x417e5365
+	.long	0x5a6c4e72
+	.long	0x5362457f
+	.long	0x6c48745c
+	.long	0x65467f51
+	.long	0x7e546246
+	.long	0x775a694b
+	.long	0x90e0b0d0
+	.long	0x99eebbdd
+	.long	0x82fca6ca
+	.long	0x8bf2adc7
+	.long	0xb4d89ce4
+	.long	0xbdd697e9
+	.long	0xa6c48afe
+	.long	0xafca81f3
+	.long	0xd890e8b8
+	.long	0xd19ee3b5
+	.long	0xca8cfea2
+	.long	0xc382f5af
+	.long	0xfca8c48c
+	.long	0xf5a6cf81
+	.long	0xeeb4d296
+	.long	0xe7bad99b
+	.long	0x3bdb7bbb
+	.long	0x32d570b6
+	.long	0x29c76da1
+	.long	0x20c966ac
+	.long	0x1fe3578f
+	.long	0x16ed5c82
+	.long	0x0dff4195
+	.long	0x04f14a98
+	.long	0x73ab23d3
+	.long	0x7aa528de
+	.long	0x61b735c9
+	.long	0x68b93ec4
+	.long	0x57930fe7
+	.long	0x5e9d04ea
+	.long	0x458f19fd
+	.long	0x4c8112f0
+	.long	0xab3bcb6b
+	.long	0xa235c066
+	.long	0xb927dd71
+	.long	0xb029d67c
+	.long	0x8f03e75f
+	.long	0x860dec52
+	.long	0x9d1ff145
+	.long	0x9411fa48
+	.long	0xe34b9303
+	.long	0xea45980e
+	.long	0xf1578519
+	.long	0xf8598e14
+	.long	0xc773bf37
+	.long	0xce7db43a
+	.long	0xd56fa92d
+	.long	0xdc61a220
+	.long	0x76adf66d
+	.long	0x7fa3fd60
+	.long	0x64b1e077
+	.long	0x6dbfeb7a
+	.long	0x5295da59
+	.long	0x5b9bd154
+	.long	0x4089cc43
+	.long	0x4987c74e
+	.long	0x3eddae05
+	.long	0x37d3a508
+	.long	0x2cc1b81f
+	.long	0x25cfb312
+	.long	0x1ae58231
+	.long	0x13eb893c
+	.long	0x08f9942b
+	.long	0x01f79f26
+	.long	0xe64d46bd
+	.long	0xef434db0
+	.long	0xf45150a7
+	.long	0xfd5f5baa
+	.long	0xc2756a89
+	.long	0xcb7b6184
+	.long	0xd0697c93
+	.long	0xd967779e
+	.long	0xae3d1ed5
+	.long	0xa73315d8
+	.long	0xbc2108cf
+	.long	0xb52f03c2
+	.long	0x8a0532e1
+	.long	0x830b39ec
+	.long	0x981924fb
+	.long	0x91172ff6
+	.long	0x4d768dd6
+	.long	0x447886db
+	.long	0x5f6a9bcc
+	.long	0x566490c1
+	.long	0x694ea1e2
+	.long	0x6040aaef
+	.long	0x7b52b7f8
+	.long	0x725cbcf5
+	.long	0x0506d5be
+	.long	0x0c08deb3
+	.long	0x171ac3a4
+	.long	0x1e14c8a9
+	.long	0x213ef98a
+	.long	0x2830f287
+	.long	0x3322ef90
+	.long	0x3a2ce49d
+	.long	0xdd963d06
+	.long	0xd498360b
+	.long	0xcf8a2b1c
+	.long	0xc6842011
+	.long	0xf9ae1132
+	.long	0xf0a01a3f
+	.long	0xebb20728
+	.long	0xe2bc0c25
+	.long	0x95e6656e
+	.long	0x9ce86e63
+	.long	0x87fa7374
+	.long	0x8ef47879
+	.long	0xb1de495a
+	.long	0xb8d04257
+	.long	0xa3c25f40
+	.long	0xaacc544d
+	.long	0xec41f7da
+	.long	0xe54ffcd7
+	.long	0xfe5de1c0
+	.long	0xf753eacd
+	.long	0xc879dbee
+	.long	0xc177d0e3
+	.long	0xda65cdf4
+	.long	0xd36bc6f9
+	.long	0xa431afb2
+	.long	0xad3fa4bf
+	.long	0xb62db9a8
+	.long	0xbf23b2a5
+	.long	0x80098386
+	.long	0x8907888b
+	.long	0x9215959c
+	.long	0x9b1b9e91
+	.long	0x7ca1470a
+	.long	0x75af4c07
+	.long	0x6ebd5110
+	.long	0x67b35a1d
+	.long	0x58996b3e
+	.long	0x51976033
+	.long	0x4a857d24
+	.long	0x438b7629
+	.long	0x34d11f62
+	.long	0x3ddf146f
+	.long	0x26cd0978
+	.long	0x2fc30275
+	.long	0x10e93356
+	.long	0x19e7385b
+	.long	0x02f5254c
+	.long	0x0bfb2e41
+	.long	0xd79a8c61
+	.long	0xde94876c
+	.long	0xc5869a7b
+	.long	0xcc889176
+	.long	0xf3a2a055
+	.long	0xfaacab58
+	.long	0xe1beb64f
+	.long	0xe8b0bd42
+	.long	0x9fead409
+	.long	0x96e4df04
+	.long	0x8df6c213
+	.long	0x84f8c91e
+	.long	0xbbd2f83d
+	.long	0xb2dcf330
+	.long	0xa9ceee27
+	.long	0xa0c0e52a
+	.long	0x477a3cb1
+	.long	0x4e7437bc
+	.long	0x55662aab
+	.long	0x5c6821a6
+	.long	0x63421085
+	.long	0x6a4c1b88
+	.long	0x715e069f
+	.long	0x78500d92
+	.long	0x0f0a64d9
+	.long	0x06046fd4
+	.long	0x1d1672c3
+	.long	0x141879ce
+	.long	0x2b3248ed
+	.long	0x223c43e0
+	.long	0x392e5ef7
+	.long	0x302055fa
+	.long	0x9aec01b7
+	.long	0x93e20aba
+	.long	0x88f017ad
+	.long	0x81fe1ca0
+	.long	0xbed42d83
+	.long	0xb7da268e
+	.long	0xacc83b99
+	.long	0xa5c63094
+	.long	0xd29c59df
+	.long	0xdb9252d2
+	.long	0xc0804fc5
+	.long	0xc98e44c8
+	.long	0xf6a475eb
+	.long	0xffaa7ee6
+	.long	0xe4b863f1
+	.long	0xedb668fc
+	.long	0x0a0cb167
+	.long	0x0302ba6a
+	.long	0x1810a77d
+	.long	0x111eac70
+	.long	0x2e349d53
+	.long	0x273a965e
+	.long	0x3c288b49
+	.long	0x35268044
+	.long	0x427ce90f
+	.long	0x4b72e202
+	.long	0x5060ff15
+	.long	0x596ef418
+	.long	0x6644c53b
+	.long	0x6f4ace36
+	.long	0x7458d321
+	.long	0x7d56d82c
+	.long	0xa1377a0c
+	.long	0xa8397101
+	.long	0xb32b6c16
+	.long	0xba25671b
+	.long	0x850f5638
+	.long	0x8c015d35
+	.long	0x97134022
+	.long	0x9e1d4b2f
+	.long	0xe9472264
+	.long	0xe0492969
+	.long	0xfb5b347e
+	.long	0xf2553f73
+	.long	0xcd7f0e50
+	.long	0xc471055d
+	.long	0xdf63184a
+	.long	0xd66d1347
+	.long	0x31d7cadc
+	.long	0x38d9c1d1
+	.long	0x23cbdcc6
+	.long	0x2ac5d7cb
+	.long	0x15efe6e8
+	.long	0x1ce1ede5
+	.long	0x07f3f0f2
+	.long	0x0efdfbff
+	.long	0x79a792b4
+	.long	0x70a999b9
+	.long	0x6bbb84ae
+	.long	0x62b58fa3
+	.long	0x5d9fbe80
+	.long	0x5491b58d
+	.long	0x4f83a89a
+	.long	0x468da397
+	// Table 3.
+	.long	0x00000000
+	.long	0x0e0b0d09
+	.long	0x1c161a12
+	.long	0x121d171b
+	.long	0x382c3424
+	.long	0x3627392d
+	.long	0x243a2e36
+	.long	0x2a31233f
+	.long	0x70586848
+	.long	0x7e536541
+	.long	0x6c4e725a
+	.long	0x62457f53
+	.long	0x48745c6c
+	.long	0x467f5165
+	.long	0x5462467e
+	.long	0x5a694b77
+	.long	0xe0b0d090
+	.long	0xeebbdd99
+	.long	0xfca6ca82
+	.long	0xf2adc78b
+	.long	0xd89ce4b4
+	.long	0xd697e9bd
+	.long	0xc48afea6
+	.long	0xca81f3af
+	.long	0x90e8b8d8
+	.long	0x9ee3b5d1
+	.long	0x8cfea2ca
+	.long	0x82f5afc3
+	.long	0xa8c48cfc
+	.long	0xa6cf81f5
+	.long	0xb4d296ee
+	.long	0xbad99be7
+	.long	0xdb7bbb3b
+	.long	0xd570b632
+	.long	0xc76da129
+	.long	0xc966ac20
+	.long	0xe3578f1f
+	.long	0xed5c8216
+	.long	0xff41950d
+	.long	0xf14a9804
+	.long	0xab23d373
+	.long	0xa528de7a
+	.long	0xb735c961
+	.long	0xb93ec468
+	.long	0x930fe757
+	.long	0x9d04ea5e
+	.long	0x8f19fd45
+	.long	0x8112f04c
+	.long	0x3bcb6bab
+	.long	0x35c066a2
+	.long	0x27dd71b9
+	.long	0x29d67cb0
+	.long	0x03e75f8f
+	.long	0x0dec5286
+	.long	0x1ff1459d
+	.long	0x11fa4894
+	.long	0x4b9303e3
+	.long	0x45980eea
+	.long	0x578519f1
+	.long	0x598e14f8
+	.long	0x73bf37c7
+	.long	0x7db43ace
+	.long	0x6fa92dd5
+	.long	0x61a220dc
+	.long	0xadf66d76
+	.long	0xa3fd607f
+	.long	0xb1e07764
+	.long	0xbfeb7a6d
+	.long	0x95da5952
+	.long	0x9bd1545b
+	.long	0x89cc4340
+	.long	0x87c74e49
+	.long	0xddae053e
+	.long	0xd3a50837
+	.long	0xc1b81f2c
+	.long	0xcfb31225
+	.long	0xe582311a
+	.long	0xeb893c13
+	.long	0xf9942b08
+	.long	0xf79f2601
+	.long	0x4d46bde6
+	.long	0x434db0ef
+	.long	0x5150a7f4
+	.long	0x5f5baafd
+	.long	0x756a89c2
+	.long	0x7b6184cb
+	.long	0x697c93d0
+	.long	0x67779ed9
+	.long	0x3d1ed5ae
+	.long	0x3315d8a7
+	.long	0x2108cfbc
+	.long	0x2f03c2b5
+	.long	0x0532e18a
+	.long	0x0b39ec83
+	.long	0x1924fb98
+	.long	0x172ff691
+	.long	0x768dd64d
+	.long	0x7886db44
+	.long	0x6a9bcc5f
+	.long	0x6490c156
+	.long	0x4ea1e269
+	.long	0x40aaef60
+	.long	0x52b7f87b
+	.long	0x5cbcf572
+	.long	0x06d5be05
+	.long	0x08deb30c
+	.long	0x1ac3a417
+	.long	0x14c8a91e
+	.long	0x3ef98a21
+	.long	0x30f28728
+	.long	0x22ef9033
+	.long	0x2ce49d3a
+	.long	0x963d06dd
+	.long	0x98360bd4
+	.long	0x8a2b1ccf
+	.long	0x842011c6
+	.long	0xae1132f9
+	.long	0xa01a3ff0
+	.long	0xb20728eb
+	.long	0xbc0c25e2
+	.long	0xe6656e95
+	.long	0xe86e639c
+	.long	0xfa737487
+	.long	0xf478798e
+	.long	0xde495ab1
+	.long	0xd04257b8
+	.long	0xc25f40a3
+	.long	0xcc544daa
+	.long	0x41f7daec
+	.long	0x4ffcd7e5
+	.long	0x5de1c0fe
+	.long	0x53eacdf7
+	.long	0x79dbeec8
+	.long	0x77d0e3c1
+	.long	0x65cdf4da
+	.long	0x6bc6f9d3
+	.long	0x31afb2a4
+	.long	0x3fa4bfad
+	.long	0x2db9a8b6
+	.long	0x23b2a5bf
+	.long	0x09838680
+	.long	0x07888b89
+	.long	0x15959c92
+	.long	0x1b9e919b
+	.long	0xa1470a7c
+	.long	0xaf4c0775
+	.long	0xbd51106e
+	.long	0xb35a1d67
+	.long	0x996b3e58
+	.long	0x97603351
+	.long	0x857d244a
+	.long	0x8b762943
+	.long	0xd11f6234
+	.long	0xdf146f3d
+	.long	0xcd097826
+	.long	0xc302752f
+	.long	0xe9335610
+	.long	0xe7385b19
+	.long	0xf5254c02
+	.long	0xfb2e410b
+	.long	0x9a8c61d7
+	.long	0x94876cde
+	.long	0x869a7bc5
+	.long	0x889176cc
+	.long	0xa2a055f3
+	.long	0xacab58fa
+	.long	0xbeb64fe1
+	.long	0xb0bd42e8
+	.long	0xead4099f
+	.long	0xe4df0496
+	.long	0xf6c2138d
+	.long	0xf8c91e84
+	.long	0xd2f83dbb
+	.long	0xdcf330b2
+	.long	0xceee27a9
+	.long	0xc0e52aa0
+	.long	0x7a3cb147
+	.long	0x7437bc4e
+	.long	0x662aab55
+	.long	0x6821a65c
+	.long	0x42108563
+	.long	0x4c1b886a
+	.long	0x5e069f71
+	.long	0x500d9278
+	.long	0x0a64d90f
+	.long	0x046fd406
+	.long	0x1672c31d
+	.long	0x1879ce14
+	.long	0x3248ed2b
+	.long	0x3c43e022
+	.long	0x2e5ef739
+	.long	0x2055fa30
+	.long	0xec01b79a
+	.long	0xe20aba93
+	.long	0xf017ad88
+	.long	0xfe1ca081
+	.long	0xd42d83be
+	.long	0xda268eb7
+	.long	0xc83b99ac
+	.long	0xc63094a5
+	.long	0x9c59dfd2
+	.long	0x9252d2db
+	.long	0x804fc5c0
+	.long	0x8e44c8c9
+	.long	0xa475ebf6
+	.long	0xaa7ee6ff
+	.long	0xb863f1e4
+	.long	0xb668fced
+	.long	0x0cb1670a
+	.long	0x02ba6a03
+	.long	0x10a77d18
+	.long	0x1eac7011
+	.long	0x349d532e
+	.long	0x3a965e27
+	.long	0x288b493c
+	.long	0x26804435
+	.long	0x7ce90f42
+	.long	0x72e2024b
+	.long	0x60ff1550
+	.long	0x6ef41859
+	.long	0x44c53b66
+	.long	0x4ace366f
+	.long	0x58d32174
+	.long	0x56d82c7d
+	.long	0x377a0ca1
+	.long	0x397101a8
+	.long	0x2b6c16b3
+	.long	0x25671bba
+	.long	0x0f563885
+	.long	0x015d358c
+	.long	0x13402297
+	.long	0x1d4b2f9e
+	.long	0x472264e9
+	.long	0x492969e0
+	.long	0x5b347efb
+	.long	0x553f73f2
+	.long	0x7f0e50cd
+	.long	0x71055dc4
+	.long	0x63184adf
+	.long	0x6d1347d6
+	.long	0xd7cadc31
+	.long	0xd9c1d138
+	.long	0xcbdcc623
+	.long	0xc5d7cb2a
+	.long	0xefe6e815
+	.long	0xe1ede51c
+	.long	0xf3f0f207
+	.long	0xfdfbff0e
+	.long	0xa792b479
+	.long	0xa999b970
+	.long	0xbb84ae6b
+	.long	0xb58fa362
+	.long	0x9fbe805d
+	.long	0x91b58d54
+	.long	0x83a89a4f
+	.long	0x8da39746
+
+
+// Tables for main encryption iterations.
+	.globl	_AESEncryptTable
+	.private_extern	_AESEncryptTable
+	.align	2
+_AESEncryptTable:
+	// Table 0.
+	.long	0xa56363c6
+	.long	0x847c7cf8
+	.long	0x997777ee
+	.long	0x8d7b7bf6
+	.long	0x0df2f2ff
+	.long	0xbd6b6bd6
+	.long	0xb16f6fde
+	.long	0x54c5c591
+	.long	0x50303060
+	.long	0x03010102
+	.long	0xa96767ce
+	.long	0x7d2b2b56
+	.long	0x19fefee7
+	.long	0x62d7d7b5
+	.long	0xe6abab4d
+	.long	0x9a7676ec
+	.long	0x45caca8f
+	.long	0x9d82821f
+	.long	0x40c9c989
+	.long	0x877d7dfa
+	.long	0x15fafaef
+	.long	0xeb5959b2
+	.long	0xc947478e
+	.long	0x0bf0f0fb
+	.long	0xecadad41
+	.long	0x67d4d4b3
+	.long	0xfda2a25f
+	.long	0xeaafaf45
+	.long	0xbf9c9c23
+	.long	0xf7a4a453
+	.long	0x967272e4
+	.long	0x5bc0c09b
+	.long	0xc2b7b775
+	.long	0x1cfdfde1
+	.long	0xae93933d
+	.long	0x6a26264c
+	.long	0x5a36366c
+	.long	0x413f3f7e
+	.long	0x02f7f7f5
+	.long	0x4fcccc83
+	.long	0x5c343468
+	.long	0xf4a5a551
+	.long	0x34e5e5d1
+	.long	0x08f1f1f9
+	.long	0x937171e2
+	.long	0x73d8d8ab
+	.long	0x53313162
+	.long	0x3f15152a
+	.long	0x0c040408
+	.long	0x52c7c795
+	.long	0x65232346
+	.long	0x5ec3c39d
+	.long	0x28181830
+	.long	0xa1969637
+	.long	0x0f05050a
+	.long	0xb59a9a2f
+	.long	0x0907070e
+	.long	0x36121224
+	.long	0x9b80801b
+	.long	0x3de2e2df
+	.long	0x26ebebcd
+	.long	0x6927274e
+	.long	0xcdb2b27f
+	.long	0x9f7575ea
+	.long	0x1b090912
+	.long	0x9e83831d
+	.long	0x742c2c58
+	.long	0x2e1a1a34
+	.long	0x2d1b1b36
+	.long	0xb26e6edc
+	.long	0xee5a5ab4
+	.long	0xfba0a05b
+	.long	0xf65252a4
+	.long	0x4d3b3b76
+	.long	0x61d6d6b7
+	.long	0xceb3b37d
+	.long	0x7b292952
+	.long	0x3ee3e3dd
+	.long	0x712f2f5e
+	.long	0x97848413
+	.long	0xf55353a6
+	.long	0x68d1d1b9
+	.long	0x00000000
+	.long	0x2cededc1
+	.long	0x60202040
+	.long	0x1ffcfce3
+	.long	0xc8b1b179
+	.long	0xed5b5bb6
+	.long	0xbe6a6ad4
+	.long	0x46cbcb8d
+	.long	0xd9bebe67
+	.long	0x4b393972
+	.long	0xde4a4a94
+	.long	0xd44c4c98
+	.long	0xe85858b0
+	.long	0x4acfcf85
+	.long	0x6bd0d0bb
+	.long	0x2aefefc5
+	.long	0xe5aaaa4f
+	.long	0x16fbfbed
+	.long	0xc5434386
+	.long	0xd74d4d9a
+	.long	0x55333366
+	.long	0x94858511
+	.long	0xcf45458a
+	.long	0x10f9f9e9
+	.long	0x06020204
+	.long	0x817f7ffe
+	.long	0xf05050a0
+	.long	0x443c3c78
+	.long	0xba9f9f25
+	.long	0xe3a8a84b
+	.long	0xf35151a2
+	.long	0xfea3a35d
+	.long	0xc0404080
+	.long	0x8a8f8f05
+	.long	0xad92923f
+	.long	0xbc9d9d21
+	.long	0x48383870
+	.long	0x04f5f5f1
+	.long	0xdfbcbc63
+	.long	0xc1b6b677
+	.long	0x75dadaaf
+	.long	0x63212142
+	.long	0x30101020
+	.long	0x1affffe5
+	.long	0x0ef3f3fd
+	.long	0x6dd2d2bf
+	.long	0x4ccdcd81
+	.long	0x140c0c18
+	.long	0x35131326
+	.long	0x2fececc3
+	.long	0xe15f5fbe
+	.long	0xa2979735
+	.long	0xcc444488
+	.long	0x3917172e
+	.long	0x57c4c493
+	.long	0xf2a7a755
+	.long	0x827e7efc
+	.long	0x473d3d7a
+	.long	0xac6464c8
+	.long	0xe75d5dba
+	.long	0x2b191932
+	.long	0x957373e6
+	.long	0xa06060c0
+	.long	0x98818119
+	.long	0xd14f4f9e
+	.long	0x7fdcdca3
+	.long	0x66222244
+	.long	0x7e2a2a54
+	.long	0xab90903b
+	.long	0x8388880b
+	.long	0xca46468c
+	.long	0x29eeeec7
+	.long	0xd3b8b86b
+	.long	0x3c141428
+	.long	0x79dedea7
+	.long	0xe25e5ebc
+	.long	0x1d0b0b16
+	.long	0x76dbdbad
+	.long	0x3be0e0db
+	.long	0x56323264
+	.long	0x4e3a3a74
+	.long	0x1e0a0a14
+	.long	0xdb494992
+	.long	0x0a06060c
+	.long	0x6c242448
+	.long	0xe45c5cb8
+	.long	0x5dc2c29f
+	.long	0x6ed3d3bd
+	.long	0xefacac43
+	.long	0xa66262c4
+	.long	0xa8919139
+	.long	0xa4959531
+	.long	0x37e4e4d3
+	.long	0x8b7979f2
+	.long	0x32e7e7d5
+	.long	0x43c8c88b
+	.long	0x5937376e
+	.long	0xb76d6dda
+	.long	0x8c8d8d01
+	.long	0x64d5d5b1
+	.long	0xd24e4e9c
+	.long	0xe0a9a949
+	.long	0xb46c6cd8
+	.long	0xfa5656ac
+	.long	0x07f4f4f3
+	.long	0x25eaeacf
+	.long	0xaf6565ca
+	.long	0x8e7a7af4
+	.long	0xe9aeae47
+	.long	0x18080810
+	.long	0xd5baba6f
+	.long	0x887878f0
+	.long	0x6f25254a
+	.long	0x722e2e5c
+	.long	0x241c1c38
+	.long	0xf1a6a657
+	.long	0xc7b4b473
+	.long	0x51c6c697
+	.long	0x23e8e8cb
+	.long	0x7cdddda1
+	.long	0x9c7474e8
+	.long	0x211f1f3e
+	.long	0xdd4b4b96
+	.long	0xdcbdbd61
+	.long	0x868b8b0d
+	.long	0x858a8a0f
+	.long	0x907070e0
+	.long	0x423e3e7c
+	.long	0xc4b5b571
+	.long	0xaa6666cc
+	.long	0xd8484890
+	.long	0x05030306
+	.long	0x01f6f6f7
+	.long	0x120e0e1c
+	.long	0xa36161c2
+	.long	0x5f35356a
+	.long	0xf95757ae
+	.long	0xd0b9b969
+	.long	0x91868617
+	.long	0x58c1c199
+	.long	0x271d1d3a
+	.long	0xb99e9e27
+	.long	0x38e1e1d9
+	.long	0x13f8f8eb
+	.long	0xb398982b
+	.long	0x33111122
+	.long	0xbb6969d2
+	.long	0x70d9d9a9
+	.long	0x898e8e07
+	.long	0xa7949433
+	.long	0xb69b9b2d
+	.long	0x221e1e3c
+	.long	0x92878715
+	.long	0x20e9e9c9
+	.long	0x49cece87
+	.long	0xff5555aa
+	.long	0x78282850
+	.long	0x7adfdfa5
+	.long	0x8f8c8c03
+	.long	0xf8a1a159
+	.long	0x80898909
+	.long	0x170d0d1a
+	.long	0xdabfbf65
+	.long	0x31e6e6d7
+	.long	0xc6424284
+	.long	0xb86868d0
+	.long	0xc3414182
+	.long	0xb0999929
+	.long	0x772d2d5a
+	.long	0x110f0f1e
+	.long	0xcbb0b07b
+	.long	0xfc5454a8
+	.long	0xd6bbbb6d
+	.long	0x3a16162c
+	// Table 1.
+	.long	0x6363c6a5
+	.long	0x7c7cf884
+	.long	0x7777ee99
+	.long	0x7b7bf68d
+	.long	0xf2f2ff0d
+	.long	0x6b6bd6bd
+	.long	0x6f6fdeb1
+	.long	0xc5c59154
+	.long	0x30306050
+	.long	0x01010203
+	.long	0x6767cea9
+	.long	0x2b2b567d
+	.long	0xfefee719
+	.long	0xd7d7b562
+	.long	0xabab4de6
+	.long	0x7676ec9a
+	.long	0xcaca8f45
+	.long	0x82821f9d
+	.long	0xc9c98940
+	.long	0x7d7dfa87
+	.long	0xfafaef15
+	.long	0x5959b2eb
+	.long	0x47478ec9
+	.long	0xf0f0fb0b
+	.long	0xadad41ec
+	.long	0xd4d4b367
+	.long	0xa2a25ffd
+	.long	0xafaf45ea
+	.long	0x9c9c23bf
+	.long	0xa4a453f7
+	.long	0x7272e496
+	.long	0xc0c09b5b
+	.long	0xb7b775c2
+	.long	0xfdfde11c
+	.long	0x93933dae
+	.long	0x26264c6a
+	.long	0x36366c5a
+	.long	0x3f3f7e41
+	.long	0xf7f7f502
+	.long	0xcccc834f
+	.long	0x3434685c
+	.long	0xa5a551f4
+	.long	0xe5e5d134
+	.long	0xf1f1f908
+	.long	0x7171e293
+	.long	0xd8d8ab73
+	.long	0x31316253
+	.long	0x15152a3f
+	.long	0x0404080c
+	.long	0xc7c79552
+	.long	0x23234665
+	.long	0xc3c39d5e
+	.long	0x18183028
+	.long	0x969637a1
+	.long	0x05050a0f
+	.long	0x9a9a2fb5
+	.long	0x07070e09
+	.long	0x12122436
+	.long	0x80801b9b
+	.long	0xe2e2df3d
+	.long	0xebebcd26
+	.long	0x27274e69
+	.long	0xb2b27fcd
+	.long	0x7575ea9f
+	.long	0x0909121b
+	.long	0x83831d9e
+	.long	0x2c2c5874
+	.long	0x1a1a342e
+	.long	0x1b1b362d
+	.long	0x6e6edcb2
+	.long	0x5a5ab4ee
+	.long	0xa0a05bfb
+	.long	0x5252a4f6
+	.long	0x3b3b764d
+	.long	0xd6d6b761
+	.long	0xb3b37dce
+	.long	0x2929527b
+	.long	0xe3e3dd3e
+	.long	0x2f2f5e71
+	.long	0x84841397
+	.long	0x5353a6f5
+	.long	0xd1d1b968
+	.long	0x00000000
+	.long	0xededc12c
+	.long	0x20204060
+	.long	0xfcfce31f
+	.long	0xb1b179c8
+	.long	0x5b5bb6ed
+	.long	0x6a6ad4be
+	.long	0xcbcb8d46
+	.long	0xbebe67d9
+	.long	0x3939724b
+	.long	0x4a4a94de
+	.long	0x4c4c98d4
+	.long	0x5858b0e8
+	.long	0xcfcf854a
+	.long	0xd0d0bb6b
+	.long	0xefefc52a
+	.long	0xaaaa4fe5
+	.long	0xfbfbed16
+	.long	0x434386c5
+	.long	0x4d4d9ad7
+	.long	0x33336655
+	.long	0x85851194
+	.long	0x45458acf
+	.long	0xf9f9e910
+	.long	0x02020406
+	.long	0x7f7ffe81
+	.long	0x5050a0f0
+	.long	0x3c3c7844
+	.long	0x9f9f25ba
+	.long	0xa8a84be3
+	.long	0x5151a2f3
+	.long	0xa3a35dfe
+	.long	0x404080c0
+	.long	0x8f8f058a
+	.long	0x92923fad
+	.long	0x9d9d21bc
+	.long	0x38387048
+	.long	0xf5f5f104
+	.long	0xbcbc63df
+	.long	0xb6b677c1
+	.long	0xdadaaf75
+	.long	0x21214263
+	.long	0x10102030
+	.long	0xffffe51a
+	.long	0xf3f3fd0e
+	.long	0xd2d2bf6d
+	.long	0xcdcd814c
+	.long	0x0c0c1814
+	.long	0x13132635
+	.long	0xececc32f
+	.long	0x5f5fbee1
+	.long	0x979735a2
+	.long	0x444488cc
+	.long	0x17172e39
+	.long	0xc4c49357
+	.long	0xa7a755f2
+	.long	0x7e7efc82
+	.long	0x3d3d7a47
+	.long	0x6464c8ac
+	.long	0x5d5dbae7
+	.long	0x1919322b
+	.long	0x7373e695
+	.long	0x6060c0a0
+	.long	0x81811998
+	.long	0x4f4f9ed1
+	.long	0xdcdca37f
+	.long	0x22224466
+	.long	0x2a2a547e
+	.long	0x90903bab
+	.long	0x88880b83
+	.long	0x46468cca
+	.long	0xeeeec729
+	.long	0xb8b86bd3
+	.long	0x1414283c
+	.long	0xdedea779
+	.long	0x5e5ebce2
+	.long	0x0b0b161d
+	.long	0xdbdbad76
+	.long	0xe0e0db3b
+	.long	0x32326456
+	.long	0x3a3a744e
+	.long	0x0a0a141e
+	.long	0x494992db
+	.long	0x06060c0a
+	.long	0x2424486c
+	.long	0x5c5cb8e4
+	.long	0xc2c29f5d
+	.long	0xd3d3bd6e
+	.long	0xacac43ef
+	.long	0x6262c4a6
+	.long	0x919139a8
+	.long	0x959531a4
+	.long	0xe4e4d337
+	.long	0x7979f28b
+	.long	0xe7e7d532
+	.long	0xc8c88b43
+	.long	0x37376e59
+	.long	0x6d6ddab7
+	.long	0x8d8d018c
+	.long	0xd5d5b164
+	.long	0x4e4e9cd2
+	.long	0xa9a949e0
+	.long	0x6c6cd8b4
+	.long	0x5656acfa
+	.long	0xf4f4f307
+	.long	0xeaeacf25
+	.long	0x6565caaf
+	.long	0x7a7af48e
+	.long	0xaeae47e9
+	.long	0x08081018
+	.long	0xbaba6fd5
+	.long	0x7878f088
+	.long	0x25254a6f
+	.long	0x2e2e5c72
+	.long	0x1c1c3824
+	.long	0xa6a657f1
+	.long	0xb4b473c7
+	.long	0xc6c69751
+	.long	0xe8e8cb23
+	.long	0xdddda17c
+	.long	0x7474e89c
+	.long	0x1f1f3e21
+	.long	0x4b4b96dd
+	.long	0xbdbd61dc
+	.long	0x8b8b0d86
+	.long	0x8a8a0f85
+	.long	0x7070e090
+	.long	0x3e3e7c42
+	.long	0xb5b571c4
+	.long	0x6666ccaa
+	.long	0x484890d8
+	.long	0x03030605
+	.long	0xf6f6f701
+	.long	0x0e0e1c12
+	.long	0x6161c2a3
+	.long	0x35356a5f
+	.long	0x5757aef9
+	.long	0xb9b969d0
+	.long	0x86861791
+	.long	0xc1c19958
+	.long	0x1d1d3a27
+	.long	0x9e9e27b9
+	.long	0xe1e1d938
+	.long	0xf8f8eb13
+	.long	0x98982bb3
+	.long	0x11112233
+	.long	0x6969d2bb
+	.long	0xd9d9a970
+	.long	0x8e8e0789
+	.long	0x949433a7
+	.long	0x9b9b2db6
+	.long	0x1e1e3c22
+	.long	0x87871592
+	.long	0xe9e9c920
+	.long	0xcece8749
+	.long	0x5555aaff
+	.long	0x28285078
+	.long	0xdfdfa57a
+	.long	0x8c8c038f
+	.long	0xa1a159f8
+	.long	0x89890980
+	.long	0x0d0d1a17
+	.long	0xbfbf65da
+	.long	0xe6e6d731
+	.long	0x424284c6
+	.long	0x6868d0b8
+	.long	0x414182c3
+	.long	0x999929b0
+	.long	0x2d2d5a77
+	.long	0x0f0f1e11
+	.long	0xb0b07bcb
+	.long	0x5454a8fc
+	.long	0xbbbb6dd6
+	.long	0x16162c3a
+	// Table 2.
+	.long	0x63c6a563
+	.long	0x7cf8847c
+	.long	0x77ee9977
+	.long	0x7bf68d7b
+	.long	0xf2ff0df2
+	.long	0x6bd6bd6b
+	.long	0x6fdeb16f
+	.long	0xc59154c5
+	.long	0x30605030
+	.long	0x01020301
+	.long	0x67cea967
+	.long	0x2b567d2b
+	.long	0xfee719fe
+	.long	0xd7b562d7
+	.long	0xab4de6ab
+	.long	0x76ec9a76
+	.long	0xca8f45ca
+	.long	0x821f9d82
+	.long	0xc98940c9
+	.long	0x7dfa877d
+	.long	0xfaef15fa
+	.long	0x59b2eb59
+	.long	0x478ec947
+	.long	0xf0fb0bf0
+	.long	0xad41ecad
+	.long	0xd4b367d4
+	.long	0xa25ffda2
+	.long	0xaf45eaaf
+	.long	0x9c23bf9c
+	.long	0xa453f7a4
+	.long	0x72e49672
+	.long	0xc09b5bc0
+	.long	0xb775c2b7
+	.long	0xfde11cfd
+	.long	0x933dae93
+	.long	0x264c6a26
+	.long	0x366c5a36
+	.long	0x3f7e413f
+	.long	0xf7f502f7
+	.long	0xcc834fcc
+	.long	0x34685c34
+	.long	0xa551f4a5
+	.long	0xe5d134e5
+	.long	0xf1f908f1
+	.long	0x71e29371
+	.long	0xd8ab73d8
+	.long	0x31625331
+	.long	0x152a3f15
+	.long	0x04080c04
+	.long	0xc79552c7
+	.long	0x23466523
+	.long	0xc39d5ec3
+	.long	0x18302818
+	.long	0x9637a196
+	.long	0x050a0f05
+	.long	0x9a2fb59a
+	.long	0x070e0907
+	.long	0x12243612
+	.long	0x801b9b80
+	.long	0xe2df3de2
+	.long	0xebcd26eb
+	.long	0x274e6927
+	.long	0xb27fcdb2
+	.long	0x75ea9f75
+	.long	0x09121b09
+	.long	0x831d9e83
+	.long	0x2c58742c
+	.long	0x1a342e1a
+	.long	0x1b362d1b
+	.long	0x6edcb26e
+	.long	0x5ab4ee5a
+	.long	0xa05bfba0
+	.long	0x52a4f652
+	.long	0x3b764d3b
+	.long	0xd6b761d6
+	.long	0xb37dceb3
+	.long	0x29527b29
+	.long	0xe3dd3ee3
+	.long	0x2f5e712f
+	.long	0x84139784
+	.long	0x53a6f553
+	.long	0xd1b968d1
+	.long	0x00000000
+	.long	0xedc12ced
+	.long	0x20406020
+	.long	0xfce31ffc
+	.long	0xb179c8b1
+	.long	0x5bb6ed5b
+	.long	0x6ad4be6a
+	.long	0xcb8d46cb
+	.long	0xbe67d9be
+	.long	0x39724b39
+	.long	0x4a94de4a
+	.long	0x4c98d44c
+	.long	0x58b0e858
+	.long	0xcf854acf
+	.long	0xd0bb6bd0
+	.long	0xefc52aef
+	.long	0xaa4fe5aa
+	.long	0xfbed16fb
+	.long	0x4386c543
+	.long	0x4d9ad74d
+	.long	0x33665533
+	.long	0x85119485
+	.long	0x458acf45
+	.long	0xf9e910f9
+	.long	0x02040602
+	.long	0x7ffe817f
+	.long	0x50a0f050
+	.long	0x3c78443c
+	.long	0x9f25ba9f
+	.long	0xa84be3a8
+	.long	0x51a2f351
+	.long	0xa35dfea3
+	.long	0x4080c040
+	.long	0x8f058a8f
+	.long	0x923fad92
+	.long	0x9d21bc9d
+	.long	0x38704838
+	.long	0xf5f104f5
+	.long	0xbc63dfbc
+	.long	0xb677c1b6
+	.long	0xdaaf75da
+	.long	0x21426321
+	.long	0x10203010
+	.long	0xffe51aff
+	.long	0xf3fd0ef3
+	.long	0xd2bf6dd2
+	.long	0xcd814ccd
+	.long	0x0c18140c
+	.long	0x13263513
+	.long	0xecc32fec
+	.long	0x5fbee15f
+	.long	0x9735a297
+	.long	0x4488cc44
+	.long	0x172e3917
+	.long	0xc49357c4
+	.long	0xa755f2a7
+	.long	0x7efc827e
+	.long	0x3d7a473d
+	.long	0x64c8ac64
+	.long	0x5dbae75d
+	.long	0x19322b19
+	.long	0x73e69573
+	.long	0x60c0a060
+	.long	0x81199881
+	.long	0x4f9ed14f
+	.long	0xdca37fdc
+	.long	0x22446622
+	.long	0x2a547e2a
+	.long	0x903bab90
+	.long	0x880b8388
+	.long	0x468cca46
+	.long	0xeec729ee
+	.long	0xb86bd3b8
+	.long	0x14283c14
+	.long	0xdea779de
+	.long	0x5ebce25e
+	.long	0x0b161d0b
+	.long	0xdbad76db
+	.long	0xe0db3be0
+	.long	0x32645632
+	.long	0x3a744e3a
+	.long	0x0a141e0a
+	.long	0x4992db49
+	.long	0x060c0a06
+	.long	0x24486c24
+	.long	0x5cb8e45c
+	.long	0xc29f5dc2
+	.long	0xd3bd6ed3
+	.long	0xac43efac
+	.long	0x62c4a662
+	.long	0x9139a891
+	.long	0x9531a495
+	.long	0xe4d337e4
+	.long	0x79f28b79
+	.long	0xe7d532e7
+	.long	0xc88b43c8
+	.long	0x376e5937
+	.long	0x6ddab76d
+	.long	0x8d018c8d
+	.long	0xd5b164d5
+	.long	0x4e9cd24e
+	.long	0xa949e0a9
+	.long	0x6cd8b46c
+	.long	0x56acfa56
+	.long	0xf4f307f4
+	.long	0xeacf25ea
+	.long	0x65caaf65
+	.long	0x7af48e7a
+	.long	0xae47e9ae
+	.long	0x08101808
+	.long	0xba6fd5ba
+	.long	0x78f08878
+	.long	0x254a6f25
+	.long	0x2e5c722e
+	.long	0x1c38241c
+	.long	0xa657f1a6
+	.long	0xb473c7b4
+	.long	0xc69751c6
+	.long	0xe8cb23e8
+	.long	0xdda17cdd
+	.long	0x74e89c74
+	.long	0x1f3e211f
+	.long	0x4b96dd4b
+	.long	0xbd61dcbd
+	.long	0x8b0d868b
+	.long	0x8a0f858a
+	.long	0x70e09070
+	.long	0x3e7c423e
+	.long	0xb571c4b5
+	.long	0x66ccaa66
+	.long	0x4890d848
+	.long	0x03060503
+	.long	0xf6f701f6
+	.long	0x0e1c120e
+	.long	0x61c2a361
+	.long	0x356a5f35
+	.long	0x57aef957
+	.long	0xb969d0b9
+	.long	0x86179186
+	.long	0xc19958c1
+	.long	0x1d3a271d
+	.long	0x9e27b99e
+	.long	0xe1d938e1
+	.long	0xf8eb13f8
+	.long	0x982bb398
+	.long	0x11223311
+	.long	0x69d2bb69
+	.long	0xd9a970d9
+	.long	0x8e07898e
+	.long	0x9433a794
+	.long	0x9b2db69b
+	.long	0x1e3c221e
+	.long	0x87159287
+	.long	0xe9c920e9
+	.long	0xce8749ce
+	.long	0x55aaff55
+	.long	0x28507828
+	.long	0xdfa57adf
+	.long	0x8c038f8c
+	.long	0xa159f8a1
+	.long	0x89098089
+	.long	0x0d1a170d
+	.long	0xbf65dabf
+	.long	0xe6d731e6
+	.long	0x4284c642
+	.long	0x68d0b868
+	.long	0x4182c341
+	.long	0x9929b099
+	.long	0x2d5a772d
+	.long	0x0f1e110f
+	.long	0xb07bcbb0
+	.long	0x54a8fc54
+	.long	0xbb6dd6bb
+	.long	0x162c3a16
+	// Table 3.
+	.long	0xc6a56363
+	.long	0xf8847c7c
+	.long	0xee997777
+	.long	0xf68d7b7b
+	.long	0xff0df2f2
+	.long	0xd6bd6b6b
+	.long	0xdeb16f6f
+	.long	0x9154c5c5
+	.long	0x60503030
+	.long	0x02030101
+	.long	0xcea96767
+	.long	0x567d2b2b
+	.long	0xe719fefe
+	.long	0xb562d7d7
+	.long	0x4de6abab
+	.long	0xec9a7676
+	.long	0x8f45caca
+	.long	0x1f9d8282
+	.long	0x8940c9c9
+	.long	0xfa877d7d
+	.long	0xef15fafa
+	.long	0xb2eb5959
+	.long	0x8ec94747
+	.long	0xfb0bf0f0
+	.long	0x41ecadad
+	.long	0xb367d4d4
+	.long	0x5ffda2a2
+	.long	0x45eaafaf
+	.long	0x23bf9c9c
+	.long	0x53f7a4a4
+	.long	0xe4967272
+	.long	0x9b5bc0c0
+	.long	0x75c2b7b7
+	.long	0xe11cfdfd
+	.long	0x3dae9393
+	.long	0x4c6a2626
+	.long	0x6c5a3636
+	.long	0x7e413f3f
+	.long	0xf502f7f7
+	.long	0x834fcccc
+	.long	0x685c3434
+	.long	0x51f4a5a5
+	.long	0xd134e5e5
+	.long	0xf908f1f1
+	.long	0xe2937171
+	.long	0xab73d8d8
+	.long	0x62533131
+	.long	0x2a3f1515
+	.long	0x080c0404
+	.long	0x9552c7c7
+	.long	0x46652323
+	.long	0x9d5ec3c3
+	.long	0x30281818
+	.long	0x37a19696
+	.long	0x0a0f0505
+	.long	0x2fb59a9a
+	.long	0x0e090707
+	.long	0x24361212
+	.long	0x1b9b8080
+	.long	0xdf3de2e2
+	.long	0xcd26ebeb
+	.long	0x4e692727
+	.long	0x7fcdb2b2
+	.long	0xea9f7575
+	.long	0x121b0909
+	.long	0x1d9e8383
+	.long	0x58742c2c
+	.long	0x342e1a1a
+	.long	0x362d1b1b
+	.long	0xdcb26e6e
+	.long	0xb4ee5a5a
+	.long	0x5bfba0a0
+	.long	0xa4f65252
+	.long	0x764d3b3b
+	.long	0xb761d6d6
+	.long	0x7dceb3b3
+	.long	0x527b2929
+	.long	0xdd3ee3e3
+	.long	0x5e712f2f
+	.long	0x13978484
+	.long	0xa6f55353
+	.long	0xb968d1d1
+	.long	0x00000000
+	.long	0xc12ceded
+	.long	0x40602020
+	.long	0xe31ffcfc
+	.long	0x79c8b1b1
+	.long	0xb6ed5b5b
+	.long	0xd4be6a6a
+	.long	0x8d46cbcb
+	.long	0x67d9bebe
+	.long	0x724b3939
+	.long	0x94de4a4a
+	.long	0x98d44c4c
+	.long	0xb0e85858
+	.long	0x854acfcf
+	.long	0xbb6bd0d0
+	.long	0xc52aefef
+	.long	0x4fe5aaaa
+	.long	0xed16fbfb
+	.long	0x86c54343
+	.long	0x9ad74d4d
+	.long	0x66553333
+	.long	0x11948585
+	.long	0x8acf4545
+	.long	0xe910f9f9
+	.long	0x04060202
+	.long	0xfe817f7f
+	.long	0xa0f05050
+	.long	0x78443c3c
+	.long	0x25ba9f9f
+	.long	0x4be3a8a8
+	.long	0xa2f35151
+	.long	0x5dfea3a3
+	.long	0x80c04040
+	.long	0x058a8f8f
+	.long	0x3fad9292
+	.long	0x21bc9d9d
+	.long	0x70483838
+	.long	0xf104f5f5
+	.long	0x63dfbcbc
+	.long	0x77c1b6b6
+	.long	0xaf75dada
+	.long	0x42632121
+	.long	0x20301010
+	.long	0xe51affff
+	.long	0xfd0ef3f3
+	.long	0xbf6dd2d2
+	.long	0x814ccdcd
+	.long	0x18140c0c
+	.long	0x26351313
+	.long	0xc32fecec
+	.long	0xbee15f5f
+	.long	0x35a29797
+	.long	0x88cc4444
+	.long	0x2e391717
+	.long	0x9357c4c4
+	.long	0x55f2a7a7
+	.long	0xfc827e7e
+	.long	0x7a473d3d
+	.long	0xc8ac6464
+	.long	0xbae75d5d
+	.long	0x322b1919
+	.long	0xe6957373
+	.long	0xc0a06060
+	.long	0x19988181
+	.long	0x9ed14f4f
+	.long	0xa37fdcdc
+	.long	0x44662222
+	.long	0x547e2a2a
+	.long	0x3bab9090
+	.long	0x0b838888
+	.long	0x8cca4646
+	.long	0xc729eeee
+	.long	0x6bd3b8b8
+	.long	0x283c1414
+	.long	0xa779dede
+	.long	0xbce25e5e
+	.long	0x161d0b0b
+	.long	0xad76dbdb
+	.long	0xdb3be0e0
+	.long	0x64563232
+	.long	0x744e3a3a
+	.long	0x141e0a0a
+	.long	0x92db4949
+	.long	0x0c0a0606
+	.long	0x486c2424
+	.long	0xb8e45c5c
+	.long	0x9f5dc2c2
+	.long	0xbd6ed3d3
+	.long	0x43efacac
+	.long	0xc4a66262
+	.long	0x39a89191
+	.long	0x31a49595
+	.long	0xd337e4e4
+	.long	0xf28b7979
+	.long	0xd532e7e7
+	.long	0x8b43c8c8
+	.long	0x6e593737
+	.long	0xdab76d6d
+	.long	0x018c8d8d
+	.long	0xb164d5d5
+	.long	0x9cd24e4e
+	.long	0x49e0a9a9
+	.long	0xd8b46c6c
+	.long	0xacfa5656
+	.long	0xf307f4f4
+	.long	0xcf25eaea
+	.long	0xcaaf6565
+	.long	0xf48e7a7a
+	.long	0x47e9aeae
+	.long	0x10180808
+	.long	0x6fd5baba
+	.long	0xf0887878
+	.long	0x4a6f2525
+	.long	0x5c722e2e
+	.long	0x38241c1c
+	.long	0x57f1a6a6
+	.long	0x73c7b4b4
+	.long	0x9751c6c6
+	.long	0xcb23e8e8
+	.long	0xa17cdddd
+	.long	0xe89c7474
+	.long	0x3e211f1f
+	.long	0x96dd4b4b
+	.long	0x61dcbdbd
+	.long	0x0d868b8b
+	.long	0x0f858a8a
+	.long	0xe0907070
+	.long	0x7c423e3e
+	.long	0x71c4b5b5
+	.long	0xccaa6666
+	.long	0x90d84848
+	.long	0x06050303
+	.long	0xf701f6f6
+	.long	0x1c120e0e
+	.long	0xc2a36161
+	.long	0x6a5f3535
+	.long	0xaef95757
+	.long	0x69d0b9b9
+	.long	0x17918686
+	.long	0x9958c1c1
+	.long	0x3a271d1d
+	.long	0x27b99e9e
+	.long	0xd938e1e1
+	.long	0xeb13f8f8
+	.long	0x2bb39898
+	.long	0x22331111
+	.long	0xd2bb6969
+	.long	0xa970d9d9
+	.long	0x07898e8e
+	.long	0x33a79494
+	.long	0x2db69b9b
+	.long	0x3c221e1e
+	.long	0x15928787
+	.long	0xc920e9e9
+	.long	0x8749cece
+	.long	0xaaff5555
+	.long	0x50782828
+	.long	0xa57adfdf
+	.long	0x038f8c8c
+	.long	0x59f8a1a1
+	.long	0x09808989
+	.long	0x1a170d0d
+	.long	0x65dabfbf
+	.long	0xd731e6e6
+	.long	0x84c64242
+	.long	0xd0b86868
+	.long	0x82c34141
+	.long	0x29b09999
+	.long	0x5a772d2d
+	.long	0x1e110f0f
+	.long	0x7bcbb0b0
+	.long	0xa8fc5454
+	.long	0x6dd6bbbb
+	.long	0x2c3a1616
+
+
+// Tables for main decryption iterations.
+	.globl	_AESDecryptTable
+	.private_extern	_AESDecryptTable
+	.align	2
+_AESDecryptTable:
+	// Table 0.
+	.long	0x50a7f451
+	.long	0x5365417e
+	.long	0xc3a4171a
+	.long	0x965e273a
+	.long	0xcb6bab3b
+	.long	0xf1459d1f
+	.long	0xab58faac
+	.long	0x9303e34b
+	.long	0x55fa3020
+	.long	0xf66d76ad
+	.long	0x9176cc88
+	.long	0x254c02f5
+	.long	0xfcd7e54f
+	.long	0xd7cb2ac5
+	.long	0x80443526
+	.long	0x8fa362b5
+	.long	0x495ab1de
+	.long	0x671bba25
+	.long	0x980eea45
+	.long	0xe1c0fe5d
+	.long	0x02752fc3
+	.long	0x12f04c81
+	.long	0xa397468d
+	.long	0xc6f9d36b
+	.long	0xe75f8f03
+	.long	0x959c9215
+	.long	0xeb7a6dbf
+	.long	0xda595295
+	.long	0x2d83bed4
+	.long	0xd3217458
+	.long	0x2969e049
+	.long	0x44c8c98e
+	.long	0x6a89c275
+	.long	0x78798ef4
+	.long	0x6b3e5899
+	.long	0xdd71b927
+	.long	0xb64fe1be
+	.long	0x17ad88f0
+	.long	0x66ac20c9
+	.long	0xb43ace7d
+	.long	0x184adf63
+	.long	0x82311ae5
+	.long	0x60335197
+	.long	0x457f5362
+	.long	0xe07764b1
+	.long	0x84ae6bbb
+	.long	0x1ca081fe
+	.long	0x942b08f9
+	.long	0x58684870
+	.long	0x19fd458f
+	.long	0x876cde94
+	.long	0xb7f87b52
+	.long	0x23d373ab
+	.long	0xe2024b72
+	.long	0x578f1fe3
+	.long	0x2aab5566
+	.long	0x0728ebb2
+	.long	0x03c2b52f
+	.long	0x9a7bc586
+	.long	0xa50837d3
+	.long	0xf2872830
+	.long	0xb2a5bf23
+	.long	0xba6a0302
+	.long	0x5c8216ed
+	.long	0x2b1ccf8a
+	.long	0x92b479a7
+	.long	0xf0f207f3
+	.long	0xa1e2694e
+	.long	0xcdf4da65
+	.long	0xd5be0506
+	.long	0x1f6234d1
+	.long	0x8afea6c4
+	.long	0x9d532e34
+	.long	0xa055f3a2
+	.long	0x32e18a05
+	.long	0x75ebf6a4
+	.long	0x39ec830b
+	.long	0xaaef6040
+	.long	0x069f715e
+	.long	0x51106ebd
+	.long	0xf98a213e
+	.long	0x3d06dd96
+	.long	0xae053edd
+	.long	0x46bde64d
+	.long	0xb58d5491
+	.long	0x055dc471
+	.long	0x6fd40604
+	.long	0xff155060
+	.long	0x24fb9819
+	.long	0x97e9bdd6
+	.long	0xcc434089
+	.long	0x779ed967
+	.long	0xbd42e8b0
+	.long	0x888b8907
+	.long	0x385b19e7
+	.long	0xdbeec879
+	.long	0x470a7ca1
+	.long	0xe90f427c
+	.long	0xc91e84f8
+	.long	0x00000000
+	.long	0x83868009
+	.long	0x48ed2b32
+	.long	0xac70111e
+	.long	0x4e725a6c
+	.long	0xfbff0efd
+	.long	0x5638850f
+	.long	0x1ed5ae3d
+	.long	0x27392d36
+	.long	0x64d90f0a
+	.long	0x21a65c68
+	.long	0xd1545b9b
+	.long	0x3a2e3624
+	.long	0xb1670a0c
+	.long	0x0fe75793
+	.long	0xd296eeb4
+	.long	0x9e919b1b
+	.long	0x4fc5c080
+	.long	0xa220dc61
+	.long	0x694b775a
+	.long	0x161a121c
+	.long	0x0aba93e2
+	.long	0xe52aa0c0
+	.long	0x43e0223c
+	.long	0x1d171b12
+	.long	0x0b0d090e
+	.long	0xadc78bf2
+	.long	0xb9a8b62d
+	.long	0xc8a91e14
+	.long	0x8519f157
+	.long	0x4c0775af
+	.long	0xbbdd99ee
+	.long	0xfd607fa3
+	.long	0x9f2601f7
+	.long	0xbcf5725c
+	.long	0xc53b6644
+	.long	0x347efb5b
+	.long	0x7629438b
+	.long	0xdcc623cb
+	.long	0x68fcedb6
+	.long	0x63f1e4b8
+	.long	0xcadc31d7
+	.long	0x10856342
+	.long	0x40229713
+	.long	0x2011c684
+	.long	0x7d244a85
+	.long	0xf83dbbd2
+	.long	0x1132f9ae
+	.long	0x6da129c7
+	.long	0x4b2f9e1d
+	.long	0xf330b2dc
+	.long	0xec52860d
+	.long	0xd0e3c177
+	.long	0x6c16b32b
+	.long	0x99b970a9
+	.long	0xfa489411
+	.long	0x2264e947
+	.long	0xc48cfca8
+	.long	0x1a3ff0a0
+	.long	0xd82c7d56
+	.long	0xef903322
+	.long	0xc74e4987
+	.long	0xc1d138d9
+	.long	0xfea2ca8c
+	.long	0x360bd498
+	.long	0xcf81f5a6
+	.long	0x28de7aa5
+	.long	0x268eb7da
+	.long	0xa4bfad3f
+	.long	0xe49d3a2c
+	.long	0x0d927850
+	.long	0x9bcc5f6a
+	.long	0x62467e54
+	.long	0xc2138df6
+	.long	0xe8b8d890
+	.long	0x5ef7392e
+	.long	0xf5afc382
+	.long	0xbe805d9f
+	.long	0x7c93d069
+	.long	0xa92dd56f
+	.long	0xb31225cf
+	.long	0x3b99acc8
+	.long	0xa77d1810
+	.long	0x6e639ce8
+	.long	0x7bbb3bdb
+	.long	0x097826cd
+	.long	0xf418596e
+	.long	0x01b79aec
+	.long	0xa89a4f83
+	.long	0x656e95e6
+	.long	0x7ee6ffaa
+	.long	0x08cfbc21
+	.long	0xe6e815ef
+	.long	0xd99be7ba
+	.long	0xce366f4a
+	.long	0xd4099fea
+	.long	0xd67cb029
+	.long	0xafb2a431
+	.long	0x31233f2a
+	.long	0x3094a5c6
+	.long	0xc066a235
+	.long	0x37bc4e74
+	.long	0xa6ca82fc
+	.long	0xb0d090e0
+	.long	0x15d8a733
+	.long	0x4a9804f1
+	.long	0xf7daec41
+	.long	0x0e50cd7f
+	.long	0x2ff69117
+	.long	0x8dd64d76
+	.long	0x4db0ef43
+	.long	0x544daacc
+	.long	0xdf0496e4
+	.long	0xe3b5d19e
+	.long	0x1b886a4c
+	.long	0xb81f2cc1
+	.long	0x7f516546
+	.long	0x04ea5e9d
+	.long	0x5d358c01
+	.long	0x737487fa
+	.long	0x2e410bfb
+	.long	0x5a1d67b3
+	.long	0x52d2db92
+	.long	0x335610e9
+	.long	0x1347d66d
+	.long	0x8c61d79a
+	.long	0x7a0ca137
+	.long	0x8e14f859
+	.long	0x893c13eb
+	.long	0xee27a9ce
+	.long	0x35c961b7
+	.long	0xede51ce1
+	.long	0x3cb1477a
+	.long	0x59dfd29c
+	.long	0x3f73f255
+	.long	0x79ce1418
+	.long	0xbf37c773
+	.long	0xeacdf753
+	.long	0x5baafd5f
+	.long	0x146f3ddf
+	.long	0x86db4478
+	.long	0x81f3afca
+	.long	0x3ec468b9
+	.long	0x2c342438
+	.long	0x5f40a3c2
+	.long	0x72c31d16
+	.long	0x0c25e2bc
+	.long	0x8b493c28
+	.long	0x41950dff
+	.long	0x7101a839
+	.long	0xdeb30c08
+	.long	0x9ce4b4d8
+	.long	0x90c15664
+	.long	0x6184cb7b
+	.long	0x70b632d5
+	.long	0x745c6c48
+	.long	0x4257b8d0
+	// Table 1.
+	.long	0xa7f45150
+	.long	0x65417e53
+	.long	0xa4171ac3
+	.long	0x5e273a96
+	.long	0x6bab3bcb
+	.long	0x459d1ff1
+	.long	0x58faacab
+	.long	0x03e34b93
+	.long	0xfa302055
+	.long	0x6d76adf6
+	.long	0x76cc8891
+	.long	0x4c02f525
+	.long	0xd7e54ffc
+	.long	0xcb2ac5d7
+	.long	0x44352680
+	.long	0xa362b58f
+	.long	0x5ab1de49
+	.long	0x1bba2567
+	.long	0x0eea4598
+	.long	0xc0fe5de1
+	.long	0x752fc302
+	.long	0xf04c8112
+	.long	0x97468da3
+	.long	0xf9d36bc6
+	.long	0x5f8f03e7
+	.long	0x9c921595
+	.long	0x7a6dbfeb
+	.long	0x595295da
+	.long	0x83bed42d
+	.long	0x217458d3
+	.long	0x69e04929
+	.long	0xc8c98e44
+	.long	0x89c2756a
+	.long	0x798ef478
+	.long	0x3e58996b
+	.long	0x71b927dd
+	.long	0x4fe1beb6
+	.long	0xad88f017
+	.long	0xac20c966
+	.long	0x3ace7db4
+	.long	0x4adf6318
+	.long	0x311ae582
+	.long	0x33519760
+	.long	0x7f536245
+	.long	0x7764b1e0
+	.long	0xae6bbb84
+	.long	0xa081fe1c
+	.long	0x2b08f994
+	.long	0x68487058
+	.long	0xfd458f19
+	.long	0x6cde9487
+	.long	0xf87b52b7
+	.long	0xd373ab23
+	.long	0x024b72e2
+	.long	0x8f1fe357
+	.long	0xab55662a
+	.long	0x28ebb207
+	.long	0xc2b52f03
+	.long	0x7bc5869a
+	.long	0x0837d3a5
+	.long	0x872830f2
+	.long	0xa5bf23b2
+	.long	0x6a0302ba
+	.long	0x8216ed5c
+	.long	0x1ccf8a2b
+	.long	0xb479a792
+	.long	0xf207f3f0
+	.long	0xe2694ea1
+	.long	0xf4da65cd
+	.long	0xbe0506d5
+	.long	0x6234d11f
+	.long	0xfea6c48a
+	.long	0x532e349d
+	.long	0x55f3a2a0
+	.long	0xe18a0532
+	.long	0xebf6a475
+	.long	0xec830b39
+	.long	0xef6040aa
+	.long	0x9f715e06
+	.long	0x106ebd51
+	.long	0x8a213ef9
+	.long	0x06dd963d
+	.long	0x053eddae
+	.long	0xbde64d46
+	.long	0x8d5491b5
+	.long	0x5dc47105
+	.long	0xd406046f
+	.long	0x155060ff
+	.long	0xfb981924
+	.long	0xe9bdd697
+	.long	0x434089cc
+	.long	0x9ed96777
+	.long	0x42e8b0bd
+	.long	0x8b890788
+	.long	0x5b19e738
+	.long	0xeec879db
+	.long	0x0a7ca147
+	.long	0x0f427ce9
+	.long	0x1e84f8c9
+	.long	0x00000000
+	.long	0x86800983
+	.long	0xed2b3248
+	.long	0x70111eac
+	.long	0x725a6c4e
+	.long	0xff0efdfb
+	.long	0x38850f56
+	.long	0xd5ae3d1e
+	.long	0x392d3627
+	.long	0xd90f0a64
+	.long	0xa65c6821
+	.long	0x545b9bd1
+	.long	0x2e36243a
+	.long	0x670a0cb1
+	.long	0xe757930f
+	.long	0x96eeb4d2
+	.long	0x919b1b9e
+	.long	0xc5c0804f
+	.long	0x20dc61a2
+	.long	0x4b775a69
+	.long	0x1a121c16
+	.long	0xba93e20a
+	.long	0x2aa0c0e5
+	.long	0xe0223c43
+	.long	0x171b121d
+	.long	0x0d090e0b
+	.long	0xc78bf2ad
+	.long	0xa8b62db9
+	.long	0xa91e14c8
+	.long	0x19f15785
+	.long	0x0775af4c
+	.long	0xdd99eebb
+	.long	0x607fa3fd
+	.long	0x2601f79f
+	.long	0xf5725cbc
+	.long	0x3b6644c5
+	.long	0x7efb5b34
+	.long	0x29438b76
+	.long	0xc623cbdc
+	.long	0xfcedb668
+	.long	0xf1e4b863
+	.long	0xdc31d7ca
+	.long	0x85634210
+	.long	0x22971340
+	.long	0x11c68420
+	.long	0x244a857d
+	.long	0x3dbbd2f8
+	.long	0x32f9ae11
+	.long	0xa129c76d
+	.long	0x2f9e1d4b
+	.long	0x30b2dcf3
+	.long	0x52860dec
+	.long	0xe3c177d0
+	.long	0x16b32b6c
+	.long	0xb970a999
+	.long	0x489411fa
+	.long	0x64e94722
+	.long	0x8cfca8c4
+	.long	0x3ff0a01a
+	.long	0x2c7d56d8
+	.long	0x903322ef
+	.long	0x4e4987c7
+	.long	0xd138d9c1
+	.long	0xa2ca8cfe
+	.long	0x0bd49836
+	.long	0x81f5a6cf
+	.long	0xde7aa528
+	.long	0x8eb7da26
+	.long	0xbfad3fa4
+	.long	0x9d3a2ce4
+	.long	0x9278500d
+	.long	0xcc5f6a9b
+	.long	0x467e5462
+	.long	0x138df6c2
+	.long	0xb8d890e8
+	.long	0xf7392e5e
+	.long	0xafc382f5
+	.long	0x805d9fbe
+	.long	0x93d0697c
+	.long	0x2dd56fa9
+	.long	0x1225cfb3
+	.long	0x99acc83b
+	.long	0x7d1810a7
+	.long	0x639ce86e
+	.long	0xbb3bdb7b
+	.long	0x7826cd09
+	.long	0x18596ef4
+	.long	0xb79aec01
+	.long	0x9a4f83a8
+	.long	0x6e95e665
+	.long	0xe6ffaa7e
+	.long	0xcfbc2108
+	.long	0xe815efe6
+	.long	0x9be7bad9
+	.long	0x366f4ace
+	.long	0x099fead4
+	.long	0x7cb029d6
+	.long	0xb2a431af
+	.long	0x233f2a31
+	.long	0x94a5c630
+	.long	0x66a235c0
+	.long	0xbc4e7437
+	.long	0xca82fca6
+	.long	0xd090e0b0
+	.long	0xd8a73315
+	.long	0x9804f14a
+	.long	0xdaec41f7
+	.long	0x50cd7f0e
+	.long	0xf691172f
+	.long	0xd64d768d
+	.long	0xb0ef434d
+	.long	0x4daacc54
+	.long	0x0496e4df
+	.long	0xb5d19ee3
+	.long	0x886a4c1b
+	.long	0x1f2cc1b8
+	.long	0x5165467f
+	.long	0xea5e9d04
+	.long	0x358c015d
+	.long	0x7487fa73
+	.long	0x410bfb2e
+	.long	0x1d67b35a
+	.long	0xd2db9252
+	.long	0x5610e933
+	.long	0x47d66d13
+	.long	0x61d79a8c
+	.long	0x0ca1377a
+	.long	0x14f8598e
+	.long	0x3c13eb89
+	.long	0x27a9ceee
+	.long	0xc961b735
+	.long	0xe51ce1ed
+	.long	0xb1477a3c
+	.long	0xdfd29c59
+	.long	0x73f2553f
+	.long	0xce141879
+	.long	0x37c773bf
+	.long	0xcdf753ea
+	.long	0xaafd5f5b
+	.long	0x6f3ddf14
+	.long	0xdb447886
+	.long	0xf3afca81
+	.long	0xc468b93e
+	.long	0x3424382c
+	.long	0x40a3c25f
+	.long	0xc31d1672
+	.long	0x25e2bc0c
+	.long	0x493c288b
+	.long	0x950dff41
+	.long	0x01a83971
+	.long	0xb30c08de
+	.long	0xe4b4d89c
+	.long	0xc1566490
+	.long	0x84cb7b61
+	.long	0xb632d570
+	.long	0x5c6c4874
+	.long	0x57b8d042
+	// Table 2.
+	.long	0xf45150a7
+	.long	0x417e5365
+	.long	0x171ac3a4
+	.long	0x273a965e
+	.long	0xab3bcb6b
+	.long	0x9d1ff145
+	.long	0xfaacab58
+	.long	0xe34b9303
+	.long	0x302055fa
+	.long	0x76adf66d
+	.long	0xcc889176
+	.long	0x02f5254c
+	.long	0xe54ffcd7
+	.long	0x2ac5d7cb
+	.long	0x35268044
+	.long	0x62b58fa3
+	.long	0xb1de495a
+	.long	0xba25671b
+	.long	0xea45980e
+	.long	0xfe5de1c0
+	.long	0x2fc30275
+	.long	0x4c8112f0
+	.long	0x468da397
+	.long	0xd36bc6f9
+	.long	0x8f03e75f
+	.long	0x9215959c
+	.long	0x6dbfeb7a
+	.long	0x5295da59
+	.long	0xbed42d83
+	.long	0x7458d321
+	.long	0xe0492969
+	.long	0xc98e44c8
+	.long	0xc2756a89
+	.long	0x8ef47879
+	.long	0x58996b3e
+	.long	0xb927dd71
+	.long	0xe1beb64f
+	.long	0x88f017ad
+	.long	0x20c966ac
+	.long	0xce7db43a
+	.long	0xdf63184a
+	.long	0x1ae58231
+	.long	0x51976033
+	.long	0x5362457f
+	.long	0x64b1e077
+	.long	0x6bbb84ae
+	.long	0x81fe1ca0
+	.long	0x08f9942b
+	.long	0x48705868
+	.long	0x458f19fd
+	.long	0xde94876c
+	.long	0x7b52b7f8
+	.long	0x73ab23d3
+	.long	0x4b72e202
+	.long	0x1fe3578f
+	.long	0x55662aab
+	.long	0xebb20728
+	.long	0xb52f03c2
+	.long	0xc5869a7b
+	.long	0x37d3a508
+	.long	0x2830f287
+	.long	0xbf23b2a5
+	.long	0x0302ba6a
+	.long	0x16ed5c82
+	.long	0xcf8a2b1c
+	.long	0x79a792b4
+	.long	0x07f3f0f2
+	.long	0x694ea1e2
+	.long	0xda65cdf4
+	.long	0x0506d5be
+	.long	0x34d11f62
+	.long	0xa6c48afe
+	.long	0x2e349d53
+	.long	0xf3a2a055
+	.long	0x8a0532e1
+	.long	0xf6a475eb
+	.long	0x830b39ec
+	.long	0x6040aaef
+	.long	0x715e069f
+	.long	0x6ebd5110
+	.long	0x213ef98a
+	.long	0xdd963d06
+	.long	0x3eddae05
+	.long	0xe64d46bd
+	.long	0x5491b58d
+	.long	0xc471055d
+	.long	0x06046fd4
+	.long	0x5060ff15
+	.long	0x981924fb
+	.long	0xbdd697e9
+	.long	0x4089cc43
+	.long	0xd967779e
+	.long	0xe8b0bd42
+	.long	0x8907888b
+	.long	0x19e7385b
+	.long	0xc879dbee
+	.long	0x7ca1470a
+	.long	0x427ce90f
+	.long	0x84f8c91e
+	.long	0x00000000
+	.long	0x80098386
+	.long	0x2b3248ed
+	.long	0x111eac70
+	.long	0x5a6c4e72
+	.long	0x0efdfbff
+	.long	0x850f5638
+	.long	0xae3d1ed5
+	.long	0x2d362739
+	.long	0x0f0a64d9
+	.long	0x5c6821a6
+	.long	0x5b9bd154
+	.long	0x36243a2e
+	.long	0x0a0cb167
+	.long	0x57930fe7
+	.long	0xeeb4d296
+	.long	0x9b1b9e91
+	.long	0xc0804fc5
+	.long	0xdc61a220
+	.long	0x775a694b
+	.long	0x121c161a
+	.long	0x93e20aba
+	.long	0xa0c0e52a
+	.long	0x223c43e0
+	.long	0x1b121d17
+	.long	0x090e0b0d
+	.long	0x8bf2adc7
+	.long	0xb62db9a8
+	.long	0x1e14c8a9
+	.long	0xf1578519
+	.long	0x75af4c07
+	.long	0x99eebbdd
+	.long	0x7fa3fd60
+	.long	0x01f79f26
+	.long	0x725cbcf5
+	.long	0x6644c53b
+	.long	0xfb5b347e
+	.long	0x438b7629
+	.long	0x23cbdcc6
+	.long	0xedb668fc
+	.long	0xe4b863f1
+	.long	0x31d7cadc
+	.long	0x63421085
+	.long	0x97134022
+	.long	0xc6842011
+	.long	0x4a857d24
+	.long	0xbbd2f83d
+	.long	0xf9ae1132
+	.long	0x29c76da1
+	.long	0x9e1d4b2f
+	.long	0xb2dcf330
+	.long	0x860dec52
+	.long	0xc177d0e3
+	.long	0xb32b6c16
+	.long	0x70a999b9
+	.long	0x9411fa48
+	.long	0xe9472264
+	.long	0xfca8c48c
+	.long	0xf0a01a3f
+	.long	0x7d56d82c
+	.long	0x3322ef90
+	.long	0x4987c74e
+	.long	0x38d9c1d1
+	.long	0xca8cfea2
+	.long	0xd498360b
+	.long	0xf5a6cf81
+	.long	0x7aa528de
+	.long	0xb7da268e
+	.long	0xad3fa4bf
+	.long	0x3a2ce49d
+	.long	0x78500d92
+	.long	0x5f6a9bcc
+	.long	0x7e546246
+	.long	0x8df6c213
+	.long	0xd890e8b8
+	.long	0x392e5ef7
+	.long	0xc382f5af
+	.long	0x5d9fbe80
+	.long	0xd0697c93
+	.long	0xd56fa92d
+	.long	0x25cfb312
+	.long	0xacc83b99
+	.long	0x1810a77d
+	.long	0x9ce86e63
+	.long	0x3bdb7bbb
+	.long	0x26cd0978
+	.long	0x596ef418
+	.long	0x9aec01b7
+	.long	0x4f83a89a
+	.long	0x95e6656e
+	.long	0xffaa7ee6
+	.long	0xbc2108cf
+	.long	0x15efe6e8
+	.long	0xe7bad99b
+	.long	0x6f4ace36
+	.long	0x9fead409
+	.long	0xb029d67c
+	.long	0xa431afb2
+	.long	0x3f2a3123
+	.long	0xa5c63094
+	.long	0xa235c066
+	.long	0x4e7437bc
+	.long	0x82fca6ca
+	.long	0x90e0b0d0
+	.long	0xa73315d8
+	.long	0x04f14a98
+	.long	0xec41f7da
+	.long	0xcd7f0e50
+	.long	0x91172ff6
+	.long	0x4d768dd6
+	.long	0xef434db0
+	.long	0xaacc544d
+	.long	0x96e4df04
+	.long	0xd19ee3b5
+	.long	0x6a4c1b88
+	.long	0x2cc1b81f
+	.long	0x65467f51
+	.long	0x5e9d04ea
+	.long	0x8c015d35
+	.long	0x87fa7374
+	.long	0x0bfb2e41
+	.long	0x67b35a1d
+	.long	0xdb9252d2
+	.long	0x10e93356
+	.long	0xd66d1347
+	.long	0xd79a8c61
+	.long	0xa1377a0c
+	.long	0xf8598e14
+	.long	0x13eb893c
+	.long	0xa9ceee27
+	.long	0x61b735c9
+	.long	0x1ce1ede5
+	.long	0x477a3cb1
+	.long	0xd29c59df
+	.long	0xf2553f73
+	.long	0x141879ce
+	.long	0xc773bf37
+	.long	0xf753eacd
+	.long	0xfd5f5baa
+	.long	0x3ddf146f
+	.long	0x447886db
+	.long	0xafca81f3
+	.long	0x68b93ec4
+	.long	0x24382c34
+	.long	0xa3c25f40
+	.long	0x1d1672c3
+	.long	0xe2bc0c25
+	.long	0x3c288b49
+	.long	0x0dff4195
+	.long	0xa8397101
+	.long	0x0c08deb3
+	.long	0xb4d89ce4
+	.long	0x566490c1
+	.long	0xcb7b6184
+	.long	0x32d570b6
+	.long	0x6c48745c
+	.long	0xb8d04257
+	// Table 3.
+	.long	0x5150a7f4
+	.long	0x7e536541
+	.long	0x1ac3a417
+	.long	0x3a965e27
+	.long	0x3bcb6bab
+	.long	0x1ff1459d
+	.long	0xacab58fa
+	.long	0x4b9303e3
+	.long	0x2055fa30
+	.long	0xadf66d76
+	.long	0x889176cc
+	.long	0xf5254c02
+	.long	0x4ffcd7e5
+	.long	0xc5d7cb2a
+	.long	0x26804435
+	.long	0xb58fa362
+	.long	0xde495ab1
+	.long	0x25671bba
+	.long	0x45980eea
+	.long	0x5de1c0fe
+	.long	0xc302752f
+	.long	0x8112f04c
+	.long	0x8da39746
+	.long	0x6bc6f9d3
+	.long	0x03e75f8f
+	.long	0x15959c92
+	.long	0xbfeb7a6d
+	.long	0x95da5952
+	.long	0xd42d83be
+	.long	0x58d32174
+	.long	0x492969e0
+	.long	0x8e44c8c9
+	.long	0x756a89c2
+	.long	0xf478798e
+	.long	0x996b3e58
+	.long	0x27dd71b9
+	.long	0xbeb64fe1
+	.long	0xf017ad88
+	.long	0xc966ac20
+	.long	0x7db43ace
+	.long	0x63184adf
+	.long	0xe582311a
+	.long	0x97603351
+	.long	0x62457f53
+	.long	0xb1e07764
+	.long	0xbb84ae6b
+	.long	0xfe1ca081
+	.long	0xf9942b08
+	.long	0x70586848
+	.long	0x8f19fd45
+	.long	0x94876cde
+	.long	0x52b7f87b
+	.long	0xab23d373
+	.long	0x72e2024b
+	.long	0xe3578f1f
+	.long	0x662aab55
+	.long	0xb20728eb
+	.long	0x2f03c2b5
+	.long	0x869a7bc5
+	.long	0xd3a50837
+	.long	0x30f28728
+	.long	0x23b2a5bf
+	.long	0x02ba6a03
+	.long	0xed5c8216
+	.long	0x8a2b1ccf
+	.long	0xa792b479
+	.long	0xf3f0f207
+	.long	0x4ea1e269
+	.long	0x65cdf4da
+	.long	0x06d5be05
+	.long	0xd11f6234
+	.long	0xc48afea6
+	.long	0x349d532e
+	.long	0xa2a055f3
+	.long	0x0532e18a
+	.long	0xa475ebf6
+	.long	0x0b39ec83
+	.long	0x40aaef60
+	.long	0x5e069f71
+	.long	0xbd51106e
+	.long	0x3ef98a21
+	.long	0x963d06dd
+	.long	0xddae053e
+	.long	0x4d46bde6
+	.long	0x91b58d54
+	.long	0x71055dc4
+	.long	0x046fd406
+	.long	0x60ff1550
+	.long	0x1924fb98
+	.long	0xd697e9bd
+	.long	0x89cc4340
+	.long	0x67779ed9
+	.long	0xb0bd42e8
+	.long	0x07888b89
+	.long	0xe7385b19
+	.long	0x79dbeec8
+	.long	0xa1470a7c
+	.long	0x7ce90f42
+	.long	0xf8c91e84
+	.long	0x00000000
+	.long	0x09838680
+	.long	0x3248ed2b
+	.long	0x1eac7011
+	.long	0x6c4e725a
+	.long	0xfdfbff0e
+	.long	0x0f563885
+	.long	0x3d1ed5ae
+	.long	0x3627392d
+	.long	0x0a64d90f
+	.long	0x6821a65c
+	.long	0x9bd1545b
+	.long	0x243a2e36
+	.long	0x0cb1670a
+	.long	0x930fe757
+	.long	0xb4d296ee
+	.long	0x1b9e919b
+	.long	0x804fc5c0
+	.long	0x61a220dc
+	.long	0x5a694b77
+	.long	0x1c161a12
+	.long	0xe20aba93
+	.long	0xc0e52aa0
+	.long	0x3c43e022
+	.long	0x121d171b
+	.long	0x0e0b0d09
+	.long	0xf2adc78b
+	.long	0x2db9a8b6
+	.long	0x14c8a91e
+	.long	0x578519f1
+	.long	0xaf4c0775
+	.long	0xeebbdd99
+	.long	0xa3fd607f
+	.long	0xf79f2601
+	.long	0x5cbcf572
+	.long	0x44c53b66
+	.long	0x5b347efb
+	.long	0x8b762943
+	.long	0xcbdcc623
+	.long	0xb668fced
+	.long	0xb863f1e4
+	.long	0xd7cadc31
+	.long	0x42108563
+	.long	0x13402297
+	.long	0x842011c6
+	.long	0x857d244a
+	.long	0xd2f83dbb
+	.long	0xae1132f9
+	.long	0xc76da129
+	.long	0x1d4b2f9e
+	.long	0xdcf330b2
+	.long	0x0dec5286
+	.long	0x77d0e3c1
+	.long	0x2b6c16b3
+	.long	0xa999b970
+	.long	0x11fa4894
+	.long	0x472264e9
+	.long	0xa8c48cfc
+	.long	0xa01a3ff0
+	.long	0x56d82c7d
+	.long	0x22ef9033
+	.long	0x87c74e49
+	.long	0xd9c1d138
+	.long	0x8cfea2ca
+	.long	0x98360bd4
+	.long	0xa6cf81f5
+	.long	0xa528de7a
+	.long	0xda268eb7
+	.long	0x3fa4bfad
+	.long	0x2ce49d3a
+	.long	0x500d9278
+	.long	0x6a9bcc5f
+	.long	0x5462467e
+	.long	0xf6c2138d
+	.long	0x90e8b8d8
+	.long	0x2e5ef739
+	.long	0x82f5afc3
+	.long	0x9fbe805d
+	.long	0x697c93d0
+	.long	0x6fa92dd5
+	.long	0xcfb31225
+	.long	0xc83b99ac
+	.long	0x10a77d18
+	.long	0xe86e639c
+	.long	0xdb7bbb3b
+	.long	0xcd097826
+	.long	0x6ef41859
+	.long	0xec01b79a
+	.long	0x83a89a4f
+	.long	0xe6656e95
+	.long	0xaa7ee6ff
+	.long	0x2108cfbc
+	.long	0xefe6e815
+	.long	0xbad99be7
+	.long	0x4ace366f
+	.long	0xead4099f
+	.long	0x29d67cb0
+	.long	0x31afb2a4
+	.long	0x2a31233f
+	.long	0xc63094a5
+	.long	0x35c066a2
+	.long	0x7437bc4e
+	.long	0xfca6ca82
+	.long	0xe0b0d090
+	.long	0x3315d8a7
+	.long	0xf14a9804
+	.long	0x41f7daec
+	.long	0x7f0e50cd
+	.long	0x172ff691
+	.long	0x768dd64d
+	.long	0x434db0ef
+	.long	0xcc544daa
+	.long	0xe4df0496
+	.long	0x9ee3b5d1
+	.long	0x4c1b886a
+	.long	0xc1b81f2c
+	.long	0x467f5165
+	.long	0x9d04ea5e
+	.long	0x015d358c
+	.long	0xfa737487
+	.long	0xfb2e410b
+	.long	0xb35a1d67
+	.long	0x9252d2db
+	.long	0xe9335610
+	.long	0x6d1347d6
+	.long	0x9a8c61d7
+	.long	0x377a0ca1
+	.long	0x598e14f8
+	.long	0xeb893c13
+	.long	0xceee27a9
+	.long	0xb735c961
+	.long	0xe1ede51c
+	.long	0x7a3cb147
+	.long	0x9c59dfd2
+	.long	0x553f73f2
+	.long	0x1879ce14
+	.long	0x73bf37c7
+	.long	0x53eacdf7
+	.long	0x5f5baafd
+	.long	0xdf146f3d
+	.long	0x7886db44
+	.long	0xca81f3af
+	.long	0xb93ec468
+	.long	0x382c3424
+	.long	0xc25f40a3
+	.long	0x1672c31d
+	.long	0xbc0c25e2
+	.long	0x288b493c
+	.long	0xff41950d
+	.long	0x397101a8
+	.long	0x08deb30c
+	.long	0xd89ce4b4
+	.long	0x6490c156
+	.long	0x7b6184cb
+	.long	0xd570b632
+	.long	0x48745c6c
+	.long	0xd04257b8
+
+
+// SubBytes embedded in words tables.
+	.globl	_AESSubBytesWordTable
+	.private_extern	_AESSubBytesWordTable
+	.align	2
+_AESSubBytesWordTable:
+	// Table 0.
+	.long	0x00000063
+	.long	0x0000007c
+	.long	0x00000077
+	.long	0x0000007b
+	.long	0x000000f2
+	.long	0x0000006b
+	.long	0x0000006f
+	.long	0x000000c5
+	.long	0x00000030
+	.long	0x00000001
+	.long	0x00000067
+	.long	0x0000002b
+	.long	0x000000fe
+	.long	0x000000d7
+	.long	0x000000ab
+	.long	0x00000076
+	.long	0x000000ca
+	.long	0x00000082
+	.long	0x000000c9
+	.long	0x0000007d
+	.long	0x000000fa
+	.long	0x00000059
+	.long	0x00000047
+	.long	0x000000f0
+	.long	0x000000ad
+	.long	0x000000d4
+	.long	0x000000a2
+	.long	0x000000af
+	.long	0x0000009c
+	.long	0x000000a4
+	.long	0x00000072
+	.long	0x000000c0
+	.long	0x000000b7
+	.long	0x000000fd
+	.long	0x00000093
+	.long	0x00000026
+	.long	0x00000036
+	.long	0x0000003f
+	.long	0x000000f7
+	.long	0x000000cc
+	.long	0x00000034
+	.long	0x000000a5
+	.long	0x000000e5
+	.long	0x000000f1
+	.long	0x00000071
+	.long	0x000000d8
+	.long	0x00000031
+	.long	0x00000015
+	.long	0x00000004
+	.long	0x000000c7
+	.long	0x00000023
+	.long	0x000000c3
+	.long	0x00000018
+	.long	0x00000096
+	.long	0x00000005
+	.long	0x0000009a
+	.long	0x00000007
+	.long	0x00000012
+	.long	0x00000080
+	.long	0x000000e2
+	.long	0x000000eb
+	.long	0x00000027
+	.long	0x000000b2
+	.long	0x00000075
+	.long	0x00000009
+	.long	0x00000083
+	.long	0x0000002c
+	.long	0x0000001a
+	.long	0x0000001b
+	.long	0x0000006e
+	.long	0x0000005a
+	.long	0x000000a0
+	.long	0x00000052
+	.long	0x0000003b
+	.long	0x000000d6
+	.long	0x000000b3
+	.long	0x00000029
+	.long	0x000000e3
+	.long	0x0000002f
+	.long	0x00000084
+	.long	0x00000053
+	.long	0x000000d1
+	.long	0x00000000
+	.long	0x000000ed
+	.long	0x00000020
+	.long	0x000000fc
+	.long	0x000000b1
+	.long	0x0000005b
+	.long	0x0000006a
+	.long	0x000000cb
+	.long	0x000000be
+	.long	0x00000039
+	.long	0x0000004a
+	.long	0x0000004c
+	.long	0x00000058
+	.long	0x000000cf
+	.long	0x000000d0
+	.long	0x000000ef
+	.long	0x000000aa
+	.long	0x000000fb
+	.long	0x00000043
+	.long	0x0000004d
+	.long	0x00000033
+	.long	0x00000085
+	.long	0x00000045
+	.long	0x000000f9
+	.long	0x00000002
+	.long	0x0000007f
+	.long	0x00000050
+	.long	0x0000003c
+	.long	0x0000009f
+	.long	0x000000a8
+	.long	0x00000051
+	.long	0x000000a3
+	.long	0x00000040
+	.long	0x0000008f
+	.long	0x00000092
+	.long	0x0000009d
+	.long	0x00000038
+	.long	0x000000f5
+	.long	0x000000bc
+	.long	0x000000b6
+	.long	0x000000da
+	.long	0x00000021
+	.long	0x00000010
+	.long	0x000000ff
+	.long	0x000000f3
+	.long	0x000000d2
+	.long	0x000000cd
+	.long	0x0000000c
+	.long	0x00000013
+	.long	0x000000ec
+	.long	0x0000005f
+	.long	0x00000097
+	.long	0x00000044
+	.long	0x00000017
+	.long	0x000000c4
+	.long	0x000000a7
+	.long	0x0000007e
+	.long	0x0000003d
+	.long	0x00000064
+	.long	0x0000005d
+	.long	0x00000019
+	.long	0x00000073
+	.long	0x00000060
+	.long	0x00000081
+	.long	0x0000004f
+	.long	0x000000dc
+	.long	0x00000022
+	.long	0x0000002a
+	.long	0x00000090
+	.long	0x00000088
+	.long	0x00000046
+	.long	0x000000ee
+	.long	0x000000b8
+	.long	0x00000014
+	.long	0x000000de
+	.long	0x0000005e
+	.long	0x0000000b
+	.long	0x000000db
+	.long	0x000000e0
+	.long	0x00000032
+	.long	0x0000003a
+	.long	0x0000000a
+	.long	0x00000049
+	.long	0x00000006
+	.long	0x00000024
+	.long	0x0000005c
+	.long	0x000000c2
+	.long	0x000000d3
+	.long	0x000000ac
+	.long	0x00000062
+	.long	0x00000091
+	.long	0x00000095
+	.long	0x000000e4
+	.long	0x00000079
+	.long	0x000000e7
+	.long	0x000000c8
+	.long	0x00000037
+	.long	0x0000006d
+	.long	0x0000008d
+	.long	0x000000d5
+	.long	0x0000004e
+	.long	0x000000a9
+	.long	0x0000006c
+	.long	0x00000056
+	.long	0x000000f4
+	.long	0x000000ea
+	.long	0x00000065
+	.long	0x0000007a
+	.long	0x000000ae
+	.long	0x00000008
+	.long	0x000000ba
+	.long	0x00000078
+	.long	0x00000025
+	.long	0x0000002e
+	.long	0x0000001c
+	.long	0x000000a6
+	.long	0x000000b4
+	.long	0x000000c6
+	.long	0x000000e8
+	.long	0x000000dd
+	.long	0x00000074
+	.long	0x0000001f
+	.long	0x0000004b
+	.long	0x000000bd
+	.long	0x0000008b
+	.long	0x0000008a
+	.long	0x00000070
+	.long	0x0000003e
+	.long	0x000000b5
+	.long	0x00000066
+	.long	0x00000048
+	.long	0x00000003
+	.long	0x000000f6
+	.long	0x0000000e
+	.long	0x00000061
+	.long	0x00000035
+	.long	0x00000057
+	.long	0x000000b9
+	.long	0x00000086
+	.long	0x000000c1
+	.long	0x0000001d
+	.long	0x0000009e
+	.long	0x000000e1
+	.long	0x000000f8
+	.long	0x00000098
+	.long	0x00000011
+	.long	0x00000069
+	.long	0x000000d9
+	.long	0x0000008e
+	.long	0x00000094
+	.long	0x0000009b
+	.long	0x0000001e
+	.long	0x00000087
+	.long	0x000000e9
+	.long	0x000000ce
+	.long	0x00000055
+	.long	0x00000028
+	.long	0x000000df
+	.long	0x0000008c
+	.long	0x000000a1
+	.long	0x00000089
+	.long	0x0000000d
+	.long	0x000000bf
+	.long	0x000000e6
+	.long	0x00000042
+	.long	0x00000068
+	.long	0x00000041
+	.long	0x00000099
+	.long	0x0000002d
+	.long	0x0000000f
+	.long	0x000000b0
+	.long	0x00000054
+	.long	0x000000bb
+	.long	0x00000016
+	// Table 1.
+	.long	0x00006300
+	.long	0x00007c00
+	.long	0x00007700
+	.long	0x00007b00
+	.long	0x0000f200
+	.long	0x00006b00
+	.long	0x00006f00
+	.long	0x0000c500
+	.long	0x00003000
+	.long	0x00000100
+	.long	0x00006700
+	.long	0x00002b00
+	.long	0x0000fe00
+	.long	0x0000d700
+	.long	0x0000ab00
+	.long	0x00007600
+	.long	0x0000ca00
+	.long	0x00008200
+	.long	0x0000c900
+	.long	0x00007d00
+	.long	0x0000fa00
+	.long	0x00005900
+	.long	0x00004700
+	.long	0x0000f000
+	.long	0x0000ad00
+	.long	0x0000d400
+	.long	0x0000a200
+	.long	0x0000af00
+	.long	0x00009c00
+	.long	0x0000a400
+	.long	0x00007200
+	.long	0x0000c000
+	.long	0x0000b700
+	.long	0x0000fd00
+	.long	0x00009300
+	.long	0x00002600
+	.long	0x00003600
+	.long	0x00003f00
+	.long	0x0000f700
+	.long	0x0000cc00
+	.long	0x00003400
+	.long	0x0000a500
+	.long	0x0000e500
+	.long	0x0000f100
+	.long	0x00007100
+	.long	0x0000d800
+	.long	0x00003100
+	.long	0x00001500
+	.long	0x00000400
+	.long	0x0000c700
+	.long	0x00002300
+	.long	0x0000c300
+	.long	0x00001800
+	.long	0x00009600
+	.long	0x00000500
+	.long	0x00009a00
+	.long	0x00000700
+	.long	0x00001200
+	.long	0x00008000
+	.long	0x0000e200
+	.long	0x0000eb00
+	.long	0x00002700
+	.long	0x0000b200
+	.long	0x00007500
+	.long	0x00000900
+	.long	0x00008300
+	.long	0x00002c00
+	.long	0x00001a00
+	.long	0x00001b00
+	.long	0x00006e00
+	.long	0x00005a00
+	.long	0x0000a000
+	.long	0x00005200
+	.long	0x00003b00
+	.long	0x0000d600
+	.long	0x0000b300
+	.long	0x00002900
+	.long	0x0000e300
+	.long	0x00002f00
+	.long	0x00008400
+	.long	0x00005300
+	.long	0x0000d100
+	.long	0x00000000
+	.long	0x0000ed00
+	.long	0x00002000
+	.long	0x0000fc00
+	.long	0x0000b100
+	.long	0x00005b00
+	.long	0x00006a00
+	.long	0x0000cb00
+	.long	0x0000be00
+	.long	0x00003900
+	.long	0x00004a00
+	.long	0x00004c00
+	.long	0x00005800
+	.long	0x0000cf00
+	.long	0x0000d000
+	.long	0x0000ef00
+	.long	0x0000aa00
+	.long	0x0000fb00
+	.long	0x00004300
+	.long	0x00004d00
+	.long	0x00003300
+	.long	0x00008500
+	.long	0x00004500
+	.long	0x0000f900
+	.long	0x00000200
+	.long	0x00007f00
+	.long	0x00005000
+	.long	0x00003c00
+	.long	0x00009f00
+	.long	0x0000a800
+	.long	0x00005100
+	.long	0x0000a300
+	.long	0x00004000
+	.long	0x00008f00
+	.long	0x00009200
+	.long	0x00009d00
+	.long	0x00003800
+	.long	0x0000f500
+	.long	0x0000bc00
+	.long	0x0000b600
+	.long	0x0000da00
+	.long	0x00002100
+	.long	0x00001000
+	.long	0x0000ff00
+	.long	0x0000f300
+	.long	0x0000d200
+	.long	0x0000cd00
+	.long	0x00000c00
+	.long	0x00001300
+	.long	0x0000ec00
+	.long	0x00005f00
+	.long	0x00009700
+	.long	0x00004400
+	.long	0x00001700
+	.long	0x0000c400
+	.long	0x0000a700
+	.long	0x00007e00
+	.long	0x00003d00
+	.long	0x00006400
+	.long	0x00005d00
+	.long	0x00001900
+	.long	0x00007300
+	.long	0x00006000
+	.long	0x00008100
+	.long	0x00004f00
+	.long	0x0000dc00
+	.long	0x00002200
+	.long	0x00002a00
+	.long	0x00009000
+	.long	0x00008800
+	.long	0x00004600
+	.long	0x0000ee00
+	.long	0x0000b800
+	.long	0x00001400
+	.long	0x0000de00
+	.long	0x00005e00
+	.long	0x00000b00
+	.long	0x0000db00
+	.long	0x0000e000
+	.long	0x00003200
+	.long	0x00003a00
+	.long	0x00000a00
+	.long	0x00004900
+	.long	0x00000600
+	.long	0x00002400
+	.long	0x00005c00
+	.long	0x0000c200
+	.long	0x0000d300
+	.long	0x0000ac00
+	.long	0x00006200
+	.long	0x00009100
+	.long	0x00009500
+	.long	0x0000e400
+	.long	0x00007900
+	.long	0x0000e700
+	.long	0x0000c800
+	.long	0x00003700
+	.long	0x00006d00
+	.long	0x00008d00
+	.long	0x0000d500
+	.long	0x00004e00
+	.long	0x0000a900
+	.long	0x00006c00
+	.long	0x00005600
+	.long	0x0000f400
+	.long	0x0000ea00
+	.long	0x00006500
+	.long	0x00007a00
+	.long	0x0000ae00
+	.long	0x00000800
+	.long	0x0000ba00
+	.long	0x00007800
+	.long	0x00002500
+	.long	0x00002e00
+	.long	0x00001c00
+	.long	0x0000a600
+	.long	0x0000b400
+	.long	0x0000c600
+	.long	0x0000e800
+	.long	0x0000dd00
+	.long	0x00007400
+	.long	0x00001f00
+	.long	0x00004b00
+	.long	0x0000bd00
+	.long	0x00008b00
+	.long	0x00008a00
+	.long	0x00007000
+	.long	0x00003e00
+	.long	0x0000b500
+	.long	0x00006600
+	.long	0x00004800
+	.long	0x00000300
+	.long	0x0000f600
+	.long	0x00000e00
+	.long	0x00006100
+	.long	0x00003500
+	.long	0x00005700
+	.long	0x0000b900
+	.long	0x00008600
+	.long	0x0000c100
+	.long	0x00001d00
+	.long	0x00009e00
+	.long	0x0000e100
+	.long	0x0000f800
+	.long	0x00009800
+	.long	0x00001100
+	.long	0x00006900
+	.long	0x0000d900
+	.long	0x00008e00
+	.long	0x00009400
+	.long	0x00009b00
+	.long	0x00001e00
+	.long	0x00008700
+	.long	0x0000e900
+	.long	0x0000ce00
+	.long	0x00005500
+	.long	0x00002800
+	.long	0x0000df00
+	.long	0x00008c00
+	.long	0x0000a100
+	.long	0x00008900
+	.long	0x00000d00
+	.long	0x0000bf00
+	.long	0x0000e600
+	.long	0x00004200
+	.long	0x00006800
+	.long	0x00004100
+	.long	0x00009900
+	.long	0x00002d00
+	.long	0x00000f00
+	.long	0x0000b000
+	.long	0x00005400
+	.long	0x0000bb00
+	.long	0x00001600
+	// Table 2.
+	.long	0x00630000
+	.long	0x007c0000
+	.long	0x00770000
+	.long	0x007b0000
+	.long	0x00f20000
+	.long	0x006b0000
+	.long	0x006f0000
+	.long	0x00c50000
+	.long	0x00300000
+	.long	0x00010000
+	.long	0x00670000
+	.long	0x002b0000
+	.long	0x00fe0000
+	.long	0x00d70000
+	.long	0x00ab0000
+	.long	0x00760000
+	.long	0x00ca0000
+	.long	0x00820000
+	.long	0x00c90000
+	.long	0x007d0000
+	.long	0x00fa0000
+	.long	0x00590000
+	.long	0x00470000
+	.long	0x00f00000
+	.long	0x00ad0000
+	.long	0x00d40000
+	.long	0x00a20000
+	.long	0x00af0000
+	.long	0x009c0000
+	.long	0x00a40000
+	.long	0x00720000
+	.long	0x00c00000
+	.long	0x00b70000
+	.long	0x00fd0000
+	.long	0x00930000
+	.long	0x00260000
+	.long	0x00360000
+	.long	0x003f0000
+	.long	0x00f70000
+	.long	0x00cc0000
+	.long	0x00340000
+	.long	0x00a50000
+	.long	0x00e50000
+	.long	0x00f10000
+	.long	0x00710000
+	.long	0x00d80000
+	.long	0x00310000
+	.long	0x00150000
+	.long	0x00040000
+	.long	0x00c70000
+	.long	0x00230000
+	.long	0x00c30000
+	.long	0x00180000
+	.long	0x00960000
+	.long	0x00050000
+	.long	0x009a0000
+	.long	0x00070000
+	.long	0x00120000
+	.long	0x00800000
+	.long	0x00e20000
+	.long	0x00eb0000
+	.long	0x00270000
+	.long	0x00b20000
+	.long	0x00750000
+	.long	0x00090000
+	.long	0x00830000
+	.long	0x002c0000
+	.long	0x001a0000
+	.long	0x001b0000
+	.long	0x006e0000
+	.long	0x005a0000
+	.long	0x00a00000
+	.long	0x00520000
+	.long	0x003b0000
+	.long	0x00d60000
+	.long	0x00b30000
+	.long	0x00290000
+	.long	0x00e30000
+	.long	0x002f0000
+	.long	0x00840000
+	.long	0x00530000
+	.long	0x00d10000
+	.long	0x00000000
+	.long	0x00ed0000
+	.long	0x00200000
+	.long	0x00fc0000
+	.long	0x00b10000
+	.long	0x005b0000
+	.long	0x006a0000
+	.long	0x00cb0000
+	.long	0x00be0000
+	.long	0x00390000
+	.long	0x004a0000
+	.long	0x004c0000
+	.long	0x00580000
+	.long	0x00cf0000
+	.long	0x00d00000
+	.long	0x00ef0000
+	.long	0x00aa0000
+	.long	0x00fb0000
+	.long	0x00430000
+	.long	0x004d0000
+	.long	0x00330000
+	.long	0x00850000
+	.long	0x00450000
+	.long	0x00f90000
+	.long	0x00020000
+	.long	0x007f0000
+	.long	0x00500000
+	.long	0x003c0000
+	.long	0x009f0000
+	.long	0x00a80000
+	.long	0x00510000
+	.long	0x00a30000
+	.long	0x00400000
+	.long	0x008f0000
+	.long	0x00920000
+	.long	0x009d0000
+	.long	0x00380000
+	.long	0x00f50000
+	.long	0x00bc0000
+	.long	0x00b60000
+	.long	0x00da0000
+	.long	0x00210000
+	.long	0x00100000
+	.long	0x00ff0000
+	.long	0x00f30000
+	.long	0x00d20000
+	.long	0x00cd0000
+	.long	0x000c0000
+	.long	0x00130000
+	.long	0x00ec0000
+	.long	0x005f0000
+	.long	0x00970000
+	.long	0x00440000
+	.long	0x00170000
+	.long	0x00c40000
+	.long	0x00a70000
+	.long	0x007e0000
+	.long	0x003d0000
+	.long	0x00640000
+	.long	0x005d0000
+	.long	0x00190000
+	.long	0x00730000
+	.long	0x00600000
+	.long	0x00810000
+	.long	0x004f0000
+	.long	0x00dc0000
+	.long	0x00220000
+	.long	0x002a0000
+	.long	0x00900000
+	.long	0x00880000
+	.long	0x00460000
+	.long	0x00ee0000
+	.long	0x00b80000
+	.long	0x00140000
+	.long	0x00de0000
+	.long	0x005e0000
+	.long	0x000b0000
+	.long	0x00db0000
+	.long	0x00e00000
+	.long	0x00320000
+	.long	0x003a0000
+	.long	0x000a0000
+	.long	0x00490000
+	.long	0x00060000
+	.long	0x00240000
+	.long	0x005c0000
+	.long	0x00c20000
+	.long	0x00d30000
+	.long	0x00ac0000
+	.long	0x00620000
+	.long	0x00910000
+	.long	0x00950000
+	.long	0x00e40000
+	.long	0x00790000
+	.long	0x00e70000
+	.long	0x00c80000
+	.long	0x00370000
+	.long	0x006d0000
+	.long	0x008d0000
+	.long	0x00d50000
+	.long	0x004e0000
+	.long	0x00a90000
+	.long	0x006c0000
+	.long	0x00560000
+	.long	0x00f40000
+	.long	0x00ea0000
+	.long	0x00650000
+	.long	0x007a0000
+	.long	0x00ae0000
+	.long	0x00080000
+	.long	0x00ba0000
+	.long	0x00780000
+	.long	0x00250000
+	.long	0x002e0000
+	.long	0x001c0000
+	.long	0x00a60000
+	.long	0x00b40000
+	.long	0x00c60000
+	.long	0x00e80000
+	.long	0x00dd0000
+	.long	0x00740000
+	.long	0x001f0000
+	.long	0x004b0000
+	.long	0x00bd0000
+	.long	0x008b0000
+	.long	0x008a0000
+	.long	0x00700000
+	.long	0x003e0000
+	.long	0x00b50000
+	.long	0x00660000
+	.long	0x00480000
+	.long	0x00030000
+	.long	0x00f60000
+	.long	0x000e0000
+	.long	0x00610000
+	.long	0x00350000
+	.long	0x00570000
+	.long	0x00b90000
+	.long	0x00860000
+	.long	0x00c10000
+	.long	0x001d0000
+	.long	0x009e0000
+	.long	0x00e10000
+	.long	0x00f80000
+	.long	0x00980000
+	.long	0x00110000
+	.long	0x00690000
+	.long	0x00d90000
+	.long	0x008e0000
+	.long	0x00940000
+	.long	0x009b0000
+	.long	0x001e0000
+	.long	0x00870000
+	.long	0x00e90000
+	.long	0x00ce0000
+	.long	0x00550000
+	.long	0x00280000
+	.long	0x00df0000
+	.long	0x008c0000
+	.long	0x00a10000
+	.long	0x00890000
+	.long	0x000d0000
+	.long	0x00bf0000
+	.long	0x00e60000
+	.long	0x00420000
+	.long	0x00680000
+	.long	0x00410000
+	.long	0x00990000
+	.long	0x002d0000
+	.long	0x000f0000
+	.long	0x00b00000
+	.long	0x00540000
+	.long	0x00bb0000
+	.long	0x00160000
+	// Table 3.
+	.long	0x63000000
+	.long	0x7c000000
+	.long	0x77000000
+	.long	0x7b000000
+	.long	0xf2000000
+	.long	0x6b000000
+	.long	0x6f000000
+	.long	0xc5000000
+	.long	0x30000000
+	.long	0x01000000
+	.long	0x67000000
+	.long	0x2b000000
+	.long	0xfe000000
+	.long	0xd7000000
+	.long	0xab000000
+	.long	0x76000000
+	.long	0xca000000
+	.long	0x82000000
+	.long	0xc9000000
+	.long	0x7d000000
+	.long	0xfa000000
+	.long	0x59000000
+	.long	0x47000000
+	.long	0xf0000000
+	.long	0xad000000
+	.long	0xd4000000
+	.long	0xa2000000
+	.long	0xaf000000
+	.long	0x9c000000
+	.long	0xa4000000
+	.long	0x72000000
+	.long	0xc0000000
+	.long	0xb7000000
+	.long	0xfd000000
+	.long	0x93000000
+	.long	0x26000000
+	.long	0x36000000
+	.long	0x3f000000
+	.long	0xf7000000
+	.long	0xcc000000
+	.long	0x34000000
+	.long	0xa5000000
+	.long	0xe5000000
+	.long	0xf1000000
+	.long	0x71000000
+	.long	0xd8000000
+	.long	0x31000000
+	.long	0x15000000
+	.long	0x04000000
+	.long	0xc7000000
+	.long	0x23000000
+	.long	0xc3000000
+	.long	0x18000000
+	.long	0x96000000
+	.long	0x05000000
+	.long	0x9a000000
+	.long	0x07000000
+	.long	0x12000000
+	.long	0x80000000
+	.long	0xe2000000
+	.long	0xeb000000
+	.long	0x27000000
+	.long	0xb2000000
+	.long	0x75000000
+	.long	0x09000000
+	.long	0x83000000
+	.long	0x2c000000
+	.long	0x1a000000
+	.long	0x1b000000
+	.long	0x6e000000
+	.long	0x5a000000
+	.long	0xa0000000
+	.long	0x52000000
+	.long	0x3b000000
+	.long	0xd6000000
+	.long	0xb3000000
+	.long	0x29000000
+	.long	0xe3000000
+	.long	0x2f000000
+	.long	0x84000000
+	.long	0x53000000
+	.long	0xd1000000
+	.long	0x00000000
+	.long	0xed000000
+	.long	0x20000000
+	.long	0xfc000000
+	.long	0xb1000000
+	.long	0x5b000000
+	.long	0x6a000000
+	.long	0xcb000000
+	.long	0xbe000000
+	.long	0x39000000
+	.long	0x4a000000
+	.long	0x4c000000
+	.long	0x58000000
+	.long	0xcf000000
+	.long	0xd0000000
+	.long	0xef000000
+	.long	0xaa000000
+	.long	0xfb000000
+	.long	0x43000000
+	.long	0x4d000000
+	.long	0x33000000
+	.long	0x85000000
+	.long	0x45000000
+	.long	0xf9000000
+	.long	0x02000000
+	.long	0x7f000000
+	.long	0x50000000
+	.long	0x3c000000
+	.long	0x9f000000
+	.long	0xa8000000
+	.long	0x51000000
+	.long	0xa3000000
+	.long	0x40000000
+	.long	0x8f000000
+	.long	0x92000000
+	.long	0x9d000000
+	.long	0x38000000
+	.long	0xf5000000
+	.long	0xbc000000
+	.long	0xb6000000
+	.long	0xda000000
+	.long	0x21000000
+	.long	0x10000000
+	.long	0xff000000
+	.long	0xf3000000
+	.long	0xd2000000
+	.long	0xcd000000
+	.long	0x0c000000
+	.long	0x13000000
+	.long	0xec000000
+	.long	0x5f000000
+	.long	0x97000000
+	.long	0x44000000
+	.long	0x17000000
+	.long	0xc4000000
+	.long	0xa7000000
+	.long	0x7e000000
+	.long	0x3d000000
+	.long	0x64000000
+	.long	0x5d000000
+	.long	0x19000000
+	.long	0x73000000
+	.long	0x60000000
+	.long	0x81000000
+	.long	0x4f000000
+	.long	0xdc000000
+	.long	0x22000000
+	.long	0x2a000000
+	.long	0x90000000
+	.long	0x88000000
+	.long	0x46000000
+	.long	0xee000000
+	.long	0xb8000000
+	.long	0x14000000
+	.long	0xde000000
+	.long	0x5e000000
+	.long	0x0b000000
+	.long	0xdb000000
+	.long	0xe0000000
+	.long	0x32000000
+	.long	0x3a000000
+	.long	0x0a000000
+	.long	0x49000000
+	.long	0x06000000
+	.long	0x24000000
+	.long	0x5c000000
+	.long	0xc2000000
+	.long	0xd3000000
+	.long	0xac000000
+	.long	0x62000000
+	.long	0x91000000
+	.long	0x95000000
+	.long	0xe4000000
+	.long	0x79000000
+	.long	0xe7000000
+	.long	0xc8000000
+	.long	0x37000000
+	.long	0x6d000000
+	.long	0x8d000000
+	.long	0xd5000000
+	.long	0x4e000000
+	.long	0xa9000000
+	.long	0x6c000000
+	.long	0x56000000
+	.long	0xf4000000
+	.long	0xea000000
+	.long	0x65000000
+	.long	0x7a000000
+	.long	0xae000000
+	.long	0x08000000
+	.long	0xba000000
+	.long	0x78000000
+	.long	0x25000000
+	.long	0x2e000000
+	.long	0x1c000000
+	.long	0xa6000000
+	.long	0xb4000000
+	.long	0xc6000000
+	.long	0xe8000000
+	.long	0xdd000000
+	.long	0x74000000
+	.long	0x1f000000
+	.long	0x4b000000
+	.long	0xbd000000
+	.long	0x8b000000
+	.long	0x8a000000
+	.long	0x70000000
+	.long	0x3e000000
+	.long	0xb5000000
+	.long	0x66000000
+	.long	0x48000000
+	.long	0x03000000
+	.long	0xf6000000
+	.long	0x0e000000
+	.long	0x61000000
+	.long	0x35000000
+	.long	0x57000000
+	.long	0xb9000000
+	.long	0x86000000
+	.long	0xc1000000
+	.long	0x1d000000
+	.long	0x9e000000
+	.long	0xe1000000
+	.long	0xf8000000
+	.long	0x98000000
+	.long	0x11000000
+	.long	0x69000000
+	.long	0xd9000000
+	.long	0x8e000000
+	.long	0x94000000
+	.long	0x9b000000
+	.long	0x1e000000
+	.long	0x87000000
+	.long	0xe9000000
+	.long	0xce000000
+	.long	0x55000000
+	.long	0x28000000
+	.long	0xdf000000
+	.long	0x8c000000
+	.long	0xa1000000
+	.long	0x89000000
+	.long	0x0d000000
+	.long	0xbf000000
+	.long	0xe6000000
+	.long	0x42000000
+	.long	0x68000000
+	.long	0x41000000
+	.long	0x99000000
+	.long	0x2d000000
+	.long	0x0f000000
+	.long	0xb0000000
+	.long	0x54000000
+	.long	0xbb000000
+	.long	0x16000000
+
+
+// InvSubBytes embedded in words tables.
+	.globl	_AESInvSubBytesWordTable
+	.private_extern	_AESInvSubBytesWordTable
+	.align	2
+_AESInvSubBytesWordTable:
+	// Table 0.
+	.long	0x00000052
+	.long	0x00000009
+	.long	0x0000006a
+	.long	0x000000d5
+	.long	0x00000030
+	.long	0x00000036
+	.long	0x000000a5
+	.long	0x00000038
+	.long	0x000000bf
+	.long	0x00000040
+	.long	0x000000a3
+	.long	0x0000009e
+	.long	0x00000081
+	.long	0x000000f3
+	.long	0x000000d7
+	.long	0x000000fb
+	.long	0x0000007c
+	.long	0x000000e3
+	.long	0x00000039
+	.long	0x00000082
+	.long	0x0000009b
+	.long	0x0000002f
+	.long	0x000000ff
+	.long	0x00000087
+	.long	0x00000034
+	.long	0x0000008e
+	.long	0x00000043
+	.long	0x00000044
+	.long	0x000000c4
+	.long	0x000000de
+	.long	0x000000e9
+	.long	0x000000cb
+	.long	0x00000054
+	.long	0x0000007b
+	.long	0x00000094
+	.long	0x00000032
+	.long	0x000000a6
+	.long	0x000000c2
+	.long	0x00000023
+	.long	0x0000003d
+	.long	0x000000ee
+	.long	0x0000004c
+	.long	0x00000095
+	.long	0x0000000b
+	.long	0x00000042
+	.long	0x000000fa
+	.long	0x000000c3
+	.long	0x0000004e
+	.long	0x00000008
+	.long	0x0000002e
+	.long	0x000000a1
+	.long	0x00000066
+	.long	0x00000028
+	.long	0x000000d9
+	.long	0x00000024
+	.long	0x000000b2
+	.long	0x00000076
+	.long	0x0000005b
+	.long	0x000000a2
+	.long	0x00000049
+	.long	0x0000006d
+	.long	0x0000008b
+	.long	0x000000d1
+	.long	0x00000025
+	.long	0x00000072
+	.long	0x000000f8
+	.long	0x000000f6
+	.long	0x00000064
+	.long	0x00000086
+	.long	0x00000068
+	.long	0x00000098
+	.long	0x00000016
+	.long	0x000000d4
+	.long	0x000000a4
+	.long	0x0000005c
+	.long	0x000000cc
+	.long	0x0000005d
+	.long	0x00000065
+	.long	0x000000b6
+	.long	0x00000092
+	.long	0x0000006c
+	.long	0x00000070
+	.long	0x00000048
+	.long	0x00000050
+	.long	0x000000fd
+	.long	0x000000ed
+	.long	0x000000b9
+	.long	0x000000da
+	.long	0x0000005e
+	.long	0x00000015
+	.long	0x00000046
+	.long	0x00000057
+	.long	0x000000a7
+	.long	0x0000008d
+	.long	0x0000009d
+	.long	0x00000084
+	.long	0x00000090
+	.long	0x000000d8
+	.long	0x000000ab
+	.long	0x00000000
+	.long	0x0000008c
+	.long	0x000000bc
+	.long	0x000000d3
+	.long	0x0000000a
+	.long	0x000000f7
+	.long	0x000000e4
+	.long	0x00000058
+	.long	0x00000005
+	.long	0x000000b8
+	.long	0x000000b3
+	.long	0x00000045
+	.long	0x00000006
+	.long	0x000000d0
+	.long	0x0000002c
+	.long	0x0000001e
+	.long	0x0000008f
+	.long	0x000000ca
+	.long	0x0000003f
+	.long	0x0000000f
+	.long	0x00000002
+	.long	0x000000c1
+	.long	0x000000af
+	.long	0x000000bd
+	.long	0x00000003
+	.long	0x00000001
+	.long	0x00000013
+	.long	0x0000008a
+	.long	0x0000006b
+	.long	0x0000003a
+	.long	0x00000091
+	.long	0x00000011
+	.long	0x00000041
+	.long	0x0000004f
+	.long	0x00000067
+	.long	0x000000dc
+	.long	0x000000ea
+	.long	0x00000097
+	.long	0x000000f2
+	.long	0x000000cf
+	.long	0x000000ce
+	.long	0x000000f0
+	.long	0x000000b4
+	.long	0x000000e6
+	.long	0x00000073
+	.long	0x00000096
+	.long	0x000000ac
+	.long	0x00000074
+	.long	0x00000022
+	.long	0x000000e7
+	.long	0x000000ad
+	.long	0x00000035
+	.long	0x00000085
+	.long	0x000000e2
+	.long	0x000000f9
+	.long	0x00000037
+	.long	0x000000e8
+	.long	0x0000001c
+	.long	0x00000075
+	.long	0x000000df
+	.long	0x0000006e
+	.long	0x00000047
+	.long	0x000000f1
+	.long	0x0000001a
+	.long	0x00000071
+	.long	0x0000001d
+	.long	0x00000029
+	.long	0x000000c5
+	.long	0x00000089
+	.long	0x0000006f
+	.long	0x000000b7
+	.long	0x00000062
+	.long	0x0000000e
+	.long	0x000000aa
+	.long	0x00000018
+	.long	0x000000be
+	.long	0x0000001b
+	.long	0x000000fc
+	.long	0x00000056
+	.long	0x0000003e
+	.long	0x0000004b
+	.long	0x000000c6
+	.long	0x000000d2
+	.long	0x00000079
+	.long	0x00000020
+	.long	0x0000009a
+	.long	0x000000db
+	.long	0x000000c0
+	.long	0x000000fe
+	.long	0x00000078
+	.long	0x000000cd
+	.long	0x0000005a
+	.long	0x000000f4
+	.long	0x0000001f
+	.long	0x000000dd
+	.long	0x000000a8
+	.long	0x00000033
+	.long	0x00000088
+	.long	0x00000007
+	.long	0x000000c7
+	.long	0x00000031
+	.long	0x000000b1
+	.long	0x00000012
+	.long	0x00000010
+	.long	0x00000059
+	.long	0x00000027
+	.long	0x00000080
+	.long	0x000000ec
+	.long	0x0000005f
+	.long	0x00000060
+	.long	0x00000051
+	.long	0x0000007f
+	.long	0x000000a9
+	.long	0x00000019
+	.long	0x000000b5
+	.long	0x0000004a
+	.long	0x0000000d
+	.long	0x0000002d
+	.long	0x000000e5
+	.long	0x0000007a
+	.long	0x0000009f
+	.long	0x00000093
+	.long	0x000000c9
+	.long	0x0000009c
+	.long	0x000000ef
+	.long	0x000000a0
+	.long	0x000000e0
+	.long	0x0000003b
+	.long	0x0000004d
+	.long	0x000000ae
+	.long	0x0000002a
+	.long	0x000000f5
+	.long	0x000000b0
+	.long	0x000000c8
+	.long	0x000000eb
+	.long	0x000000bb
+	.long	0x0000003c
+	.long	0x00000083
+	.long	0x00000053
+	.long	0x00000099
+	.long	0x00000061
+	.long	0x00000017
+	.long	0x0000002b
+	.long	0x00000004
+	.long	0x0000007e
+	.long	0x000000ba
+	.long	0x00000077
+	.long	0x000000d6
+	.long	0x00000026
+	.long	0x000000e1
+	.long	0x00000069
+	.long	0x00000014
+	.long	0x00000063
+	.long	0x00000055
+	.long	0x00000021
+	.long	0x0000000c
+	.long	0x0000007d
+	// Table 1.
+	.long	0x00005200
+	.long	0x00000900
+	.long	0x00006a00
+	.long	0x0000d500
+	.long	0x00003000
+	.long	0x00003600
+	.long	0x0000a500
+	.long	0x00003800
+	.long	0x0000bf00
+	.long	0x00004000
+	.long	0x0000a300
+	.long	0x00009e00
+	.long	0x00008100
+	.long	0x0000f300
+	.long	0x0000d700
+	.long	0x0000fb00
+	.long	0x00007c00
+	.long	0x0000e300
+	.long	0x00003900
+	.long	0x00008200
+	.long	0x00009b00
+	.long	0x00002f00
+	.long	0x0000ff00
+	.long	0x00008700
+	.long	0x00003400
+	.long	0x00008e00
+	.long	0x00004300
+	.long	0x00004400
+	.long	0x0000c400
+	.long	0x0000de00
+	.long	0x0000e900
+	.long	0x0000cb00
+	.long	0x00005400
+	.long	0x00007b00
+	.long	0x00009400
+	.long	0x00003200
+	.long	0x0000a600
+	.long	0x0000c200
+	.long	0x00002300
+	.long	0x00003d00
+	.long	0x0000ee00
+	.long	0x00004c00
+	.long	0x00009500
+	.long	0x00000b00
+	.long	0x00004200
+	.long	0x0000fa00
+	.long	0x0000c300
+	.long	0x00004e00
+	.long	0x00000800
+	.long	0x00002e00
+	.long	0x0000a100
+	.long	0x00006600
+	.long	0x00002800
+	.long	0x0000d900
+	.long	0x00002400
+	.long	0x0000b200
+	.long	0x00007600
+	.long	0x00005b00
+	.long	0x0000a200
+	.long	0x00004900
+	.long	0x00006d00
+	.long	0x00008b00
+	.long	0x0000d100
+	.long	0x00002500
+	.long	0x00007200
+	.long	0x0000f800
+	.long	0x0000f600
+	.long	0x00006400
+	.long	0x00008600
+	.long	0x00006800
+	.long	0x00009800
+	.long	0x00001600
+	.long	0x0000d400
+	.long	0x0000a400
+	.long	0x00005c00
+	.long	0x0000cc00
+	.long	0x00005d00
+	.long	0x00006500
+	.long	0x0000b600
+	.long	0x00009200
+	.long	0x00006c00
+	.long	0x00007000
+	.long	0x00004800
+	.long	0x00005000
+	.long	0x0000fd00
+	.long	0x0000ed00
+	.long	0x0000b900
+	.long	0x0000da00
+	.long	0x00005e00
+	.long	0x00001500
+	.long	0x00004600
+	.long	0x00005700
+	.long	0x0000a700
+	.long	0x00008d00
+	.long	0x00009d00
+	.long	0x00008400
+	.long	0x00009000
+	.long	0x0000d800
+	.long	0x0000ab00
+	.long	0x00000000
+	.long	0x00008c00
+	.long	0x0000bc00
+	.long	0x0000d300
+	.long	0x00000a00
+	.long	0x0000f700
+	.long	0x0000e400
+	.long	0x00005800
+	.long	0x00000500
+	.long	0x0000b800
+	.long	0x0000b300
+	.long	0x00004500
+	.long	0x00000600
+	.long	0x0000d000
+	.long	0x00002c00
+	.long	0x00001e00
+	.long	0x00008f00
+	.long	0x0000ca00
+	.long	0x00003f00
+	.long	0x00000f00
+	.long	0x00000200
+	.long	0x0000c100
+	.long	0x0000af00
+	.long	0x0000bd00
+	.long	0x00000300
+	.long	0x00000100
+	.long	0x00001300
+	.long	0x00008a00
+	.long	0x00006b00
+	.long	0x00003a00
+	.long	0x00009100
+	.long	0x00001100
+	.long	0x00004100
+	.long	0x00004f00
+	.long	0x00006700
+	.long	0x0000dc00
+	.long	0x0000ea00
+	.long	0x00009700
+	.long	0x0000f200
+	.long	0x0000cf00
+	.long	0x0000ce00
+	.long	0x0000f000
+	.long	0x0000b400
+	.long	0x0000e600
+	.long	0x00007300
+	.long	0x00009600
+	.long	0x0000ac00
+	.long	0x00007400
+	.long	0x00002200
+	.long	0x0000e700
+	.long	0x0000ad00
+	.long	0x00003500
+	.long	0x00008500
+	.long	0x0000e200
+	.long	0x0000f900
+	.long	0x00003700
+	.long	0x0000e800
+	.long	0x00001c00
+	.long	0x00007500
+	.long	0x0000df00
+	.long	0x00006e00
+	.long	0x00004700
+	.long	0x0000f100
+	.long	0x00001a00
+	.long	0x00007100
+	.long	0x00001d00
+	.long	0x00002900
+	.long	0x0000c500
+	.long	0x00008900
+	.long	0x00006f00
+	.long	0x0000b700
+	.long	0x00006200
+	.long	0x00000e00
+	.long	0x0000aa00
+	.long	0x00001800
+	.long	0x0000be00
+	.long	0x00001b00
+	.long	0x0000fc00
+	.long	0x00005600
+	.long	0x00003e00
+	.long	0x00004b00
+	.long	0x0000c600
+	.long	0x0000d200
+	.long	0x00007900
+	.long	0x00002000
+	.long	0x00009a00
+	.long	0x0000db00
+	.long	0x0000c000
+	.long	0x0000fe00
+	.long	0x00007800
+	.long	0x0000cd00
+	.long	0x00005a00
+	.long	0x0000f400
+	.long	0x00001f00
+	.long	0x0000dd00
+	.long	0x0000a800
+	.long	0x00003300
+	.long	0x00008800
+	.long	0x00000700
+	.long	0x0000c700
+	.long	0x00003100
+	.long	0x0000b100
+	.long	0x00001200
+	.long	0x00001000
+	.long	0x00005900
+	.long	0x00002700
+	.long	0x00008000
+	.long	0x0000ec00
+	.long	0x00005f00
+	.long	0x00006000
+	.long	0x00005100
+	.long	0x00007f00
+	.long	0x0000a900
+	.long	0x00001900
+	.long	0x0000b500
+	.long	0x00004a00
+	.long	0x00000d00
+	.long	0x00002d00
+	.long	0x0000e500
+	.long	0x00007a00
+	.long	0x00009f00
+	.long	0x00009300
+	.long	0x0000c900
+	.long	0x00009c00
+	.long	0x0000ef00
+	.long	0x0000a000
+	.long	0x0000e000
+	.long	0x00003b00
+	.long	0x00004d00
+	.long	0x0000ae00
+	.long	0x00002a00
+	.long	0x0000f500
+	.long	0x0000b000
+	.long	0x0000c800
+	.long	0x0000eb00
+	.long	0x0000bb00
+	.long	0x00003c00
+	.long	0x00008300
+	.long	0x00005300
+	.long	0x00009900
+	.long	0x00006100
+	.long	0x00001700
+	.long	0x00002b00
+	.long	0x00000400
+	.long	0x00007e00
+	.long	0x0000ba00
+	.long	0x00007700
+	.long	0x0000d600
+	.long	0x00002600
+	.long	0x0000e100
+	.long	0x00006900
+	.long	0x00001400
+	.long	0x00006300
+	.long	0x00005500
+	.long	0x00002100
+	.long	0x00000c00
+	.long	0x00007d00
+	// Table 2.
+	.long	0x00520000
+	.long	0x00090000
+	.long	0x006a0000
+	.long	0x00d50000
+	.long	0x00300000
+	.long	0x00360000
+	.long	0x00a50000
+	.long	0x00380000
+	.long	0x00bf0000
+	.long	0x00400000
+	.long	0x00a30000
+	.long	0x009e0000
+	.long	0x00810000
+	.long	0x00f30000
+	.long	0x00d70000
+	.long	0x00fb0000
+	.long	0x007c0000
+	.long	0x00e30000
+	.long	0x00390000
+	.long	0x00820000
+	.long	0x009b0000
+	.long	0x002f0000
+	.long	0x00ff0000
+	.long	0x00870000
+	.long	0x00340000
+	.long	0x008e0000
+	.long	0x00430000
+	.long	0x00440000
+	.long	0x00c40000
+	.long	0x00de0000
+	.long	0x00e90000
+	.long	0x00cb0000
+	.long	0x00540000
+	.long	0x007b0000
+	.long	0x00940000
+	.long	0x00320000
+	.long	0x00a60000
+	.long	0x00c20000
+	.long	0x00230000
+	.long	0x003d0000
+	.long	0x00ee0000
+	.long	0x004c0000
+	.long	0x00950000
+	.long	0x000b0000
+	.long	0x00420000
+	.long	0x00fa0000
+	.long	0x00c30000
+	.long	0x004e0000
+	.long	0x00080000
+	.long	0x002e0000
+	.long	0x00a10000
+	.long	0x00660000
+	.long	0x00280000
+	.long	0x00d90000
+	.long	0x00240000
+	.long	0x00b20000
+	.long	0x00760000
+	.long	0x005b0000
+	.long	0x00a20000
+	.long	0x00490000
+	.long	0x006d0000
+	.long	0x008b0000
+	.long	0x00d10000
+	.long	0x00250000
+	.long	0x00720000
+	.long	0x00f80000
+	.long	0x00f60000
+	.long	0x00640000
+	.long	0x00860000
+	.long	0x00680000
+	.long	0x00980000
+	.long	0x00160000
+	.long	0x00d40000
+	.long	0x00a40000
+	.long	0x005c0000
+	.long	0x00cc0000
+	.long	0x005d0000
+	.long	0x00650000
+	.long	0x00b60000
+	.long	0x00920000
+	.long	0x006c0000
+	.long	0x00700000
+	.long	0x00480000
+	.long	0x00500000
+	.long	0x00fd0000
+	.long	0x00ed0000
+	.long	0x00b90000
+	.long	0x00da0000
+	.long	0x005e0000
+	.long	0x00150000
+	.long	0x00460000
+	.long	0x00570000
+	.long	0x00a70000
+	.long	0x008d0000
+	.long	0x009d0000
+	.long	0x00840000
+	.long	0x00900000
+	.long	0x00d80000
+	.long	0x00ab0000
+	.long	0x00000000
+	.long	0x008c0000
+	.long	0x00bc0000
+	.long	0x00d30000
+	.long	0x000a0000
+	.long	0x00f70000
+	.long	0x00e40000
+	.long	0x00580000
+	.long	0x00050000
+	.long	0x00b80000
+	.long	0x00b30000
+	.long	0x00450000
+	.long	0x00060000
+	.long	0x00d00000
+	.long	0x002c0000
+	.long	0x001e0000
+	.long	0x008f0000
+	.long	0x00ca0000
+	.long	0x003f0000
+	.long	0x000f0000
+	.long	0x00020000
+	.long	0x00c10000
+	.long	0x00af0000
+	.long	0x00bd0000
+	.long	0x00030000
+	.long	0x00010000
+	.long	0x00130000
+	.long	0x008a0000
+	.long	0x006b0000
+	.long	0x003a0000
+	.long	0x00910000
+	.long	0x00110000
+	.long	0x00410000
+	.long	0x004f0000
+	.long	0x00670000
+	.long	0x00dc0000
+	.long	0x00ea0000
+	.long	0x00970000
+	.long	0x00f20000
+	.long	0x00cf0000
+	.long	0x00ce0000
+	.long	0x00f00000
+	.long	0x00b40000
+	.long	0x00e60000
+	.long	0x00730000
+	.long	0x00960000
+	.long	0x00ac0000
+	.long	0x00740000
+	.long	0x00220000
+	.long	0x00e70000
+	.long	0x00ad0000
+	.long	0x00350000
+	.long	0x00850000
+	.long	0x00e20000
+	.long	0x00f90000
+	.long	0x00370000
+	.long	0x00e80000
+	.long	0x001c0000
+	.long	0x00750000
+	.long	0x00df0000
+	.long	0x006e0000
+	.long	0x00470000
+	.long	0x00f10000
+	.long	0x001a0000
+	.long	0x00710000
+	.long	0x001d0000
+	.long	0x00290000
+	.long	0x00c50000
+	.long	0x00890000
+	.long	0x006f0000
+	.long	0x00b70000
+	.long	0x00620000
+	.long	0x000e0000
+	.long	0x00aa0000
+	.long	0x00180000
+	.long	0x00be0000
+	.long	0x001b0000
+	.long	0x00fc0000
+	.long	0x00560000
+	.long	0x003e0000
+	.long	0x004b0000
+	.long	0x00c60000
+	.long	0x00d20000
+	.long	0x00790000
+	.long	0x00200000
+	.long	0x009a0000
+	.long	0x00db0000
+	.long	0x00c00000
+	.long	0x00fe0000
+	.long	0x00780000
+	.long	0x00cd0000
+	.long	0x005a0000
+	.long	0x00f40000
+	.long	0x001f0000
+	.long	0x00dd0000
+	.long	0x00a80000
+	.long	0x00330000
+	.long	0x00880000
+	.long	0x00070000
+	.long	0x00c70000
+	.long	0x00310000
+	.long	0x00b10000
+	.long	0x00120000
+	.long	0x00100000
+	.long	0x00590000
+	.long	0x00270000
+	.long	0x00800000
+	.long	0x00ec0000
+	.long	0x005f0000
+	.long	0x00600000
+	.long	0x00510000
+	.long	0x007f0000
+	.long	0x00a90000
+	.long	0x00190000
+	.long	0x00b50000
+	.long	0x004a0000
+	.long	0x000d0000
+	.long	0x002d0000
+	.long	0x00e50000
+	.long	0x007a0000
+	.long	0x009f0000
+	.long	0x00930000
+	.long	0x00c90000
+	.long	0x009c0000
+	.long	0x00ef0000
+	.long	0x00a00000
+	.long	0x00e00000
+	.long	0x003b0000
+	.long	0x004d0000
+	.long	0x00ae0000
+	.long	0x002a0000
+	.long	0x00f50000
+	.long	0x00b00000
+	.long	0x00c80000
+	.long	0x00eb0000
+	.long	0x00bb0000
+	.long	0x003c0000
+	.long	0x00830000
+	.long	0x00530000
+	.long	0x00990000
+	.long	0x00610000
+	.long	0x00170000
+	.long	0x002b0000
+	.long	0x00040000
+	.long	0x007e0000
+	.long	0x00ba0000
+	.long	0x00770000
+	.long	0x00d60000
+	.long	0x00260000
+	.long	0x00e10000
+	.long	0x00690000
+	.long	0x00140000
+	.long	0x00630000
+	.long	0x00550000
+	.long	0x00210000
+	.long	0x000c0000
+	.long	0x007d0000
+	// Table 3.
+	.long	0x52000000
+	.long	0x09000000
+	.long	0x6a000000
+	.long	0xd5000000
+	.long	0x30000000
+	.long	0x36000000
+	.long	0xa5000000
+	.long	0x38000000
+	.long	0xbf000000
+	.long	0x40000000
+	.long	0xa3000000
+	.long	0x9e000000
+	.long	0x81000000
+	.long	0xf3000000
+	.long	0xd7000000
+	.long	0xfb000000
+	.long	0x7c000000
+	.long	0xe3000000
+	.long	0x39000000
+	.long	0x82000000
+	.long	0x9b000000
+	.long	0x2f000000
+	.long	0xff000000
+	.long	0x87000000
+	.long	0x34000000
+	.long	0x8e000000
+	.long	0x43000000
+	.long	0x44000000
+	.long	0xc4000000
+	.long	0xde000000
+	.long	0xe9000000
+	.long	0xcb000000
+	.long	0x54000000
+	.long	0x7b000000
+	.long	0x94000000
+	.long	0x32000000
+	.long	0xa6000000
+	.long	0xc2000000
+	.long	0x23000000
+	.long	0x3d000000
+	.long	0xee000000
+	.long	0x4c000000
+	.long	0x95000000
+	.long	0x0b000000
+	.long	0x42000000
+	.long	0xfa000000
+	.long	0xc3000000
+	.long	0x4e000000
+	.long	0x08000000
+	.long	0x2e000000
+	.long	0xa1000000
+	.long	0x66000000
+	.long	0x28000000
+	.long	0xd9000000
+	.long	0x24000000
+	.long	0xb2000000
+	.long	0x76000000
+	.long	0x5b000000
+	.long	0xa2000000
+	.long	0x49000000
+	.long	0x6d000000
+	.long	0x8b000000
+	.long	0xd1000000
+	.long	0x25000000
+	.long	0x72000000
+	.long	0xf8000000
+	.long	0xf6000000
+	.long	0x64000000
+	.long	0x86000000
+	.long	0x68000000
+	.long	0x98000000
+	.long	0x16000000
+	.long	0xd4000000
+	.long	0xa4000000
+	.long	0x5c000000
+	.long	0xcc000000
+	.long	0x5d000000
+	.long	0x65000000
+	.long	0xb6000000
+	.long	0x92000000
+	.long	0x6c000000
+	.long	0x70000000
+	.long	0x48000000
+	.long	0x50000000
+	.long	0xfd000000
+	.long	0xed000000
+	.long	0xb9000000
+	.long	0xda000000
+	.long	0x5e000000
+	.long	0x15000000
+	.long	0x46000000
+	.long	0x57000000
+	.long	0xa7000000
+	.long	0x8d000000
+	.long	0x9d000000
+	.long	0x84000000
+	.long	0x90000000
+	.long	0xd8000000
+	.long	0xab000000
+	.long	0x00000000
+	.long	0x8c000000
+	.long	0xbc000000
+	.long	0xd3000000
+	.long	0x0a000000
+	.long	0xf7000000
+	.long	0xe4000000
+	.long	0x58000000
+	.long	0x05000000
+	.long	0xb8000000
+	.long	0xb3000000
+	.long	0x45000000
+	.long	0x06000000
+	.long	0xd0000000
+	.long	0x2c000000
+	.long	0x1e000000
+	.long	0x8f000000
+	.long	0xca000000
+	.long	0x3f000000
+	.long	0x0f000000
+	.long	0x02000000
+	.long	0xc1000000
+	.long	0xaf000000
+	.long	0xbd000000
+	.long	0x03000000
+	.long	0x01000000
+	.long	0x13000000
+	.long	0x8a000000
+	.long	0x6b000000
+	.long	0x3a000000
+	.long	0x91000000
+	.long	0x11000000
+	.long	0x41000000
+	.long	0x4f000000
+	.long	0x67000000
+	.long	0xdc000000
+	.long	0xea000000
+	.long	0x97000000
+	.long	0xf2000000
+	.long	0xcf000000
+	.long	0xce000000
+	.long	0xf0000000
+	.long	0xb4000000
+	.long	0xe6000000
+	.long	0x73000000
+	.long	0x96000000
+	.long	0xac000000
+	.long	0x74000000
+	.long	0x22000000
+	.long	0xe7000000
+	.long	0xad000000
+	.long	0x35000000
+	.long	0x85000000
+	.long	0xe2000000
+	.long	0xf9000000
+	.long	0x37000000
+	.long	0xe8000000
+	.long	0x1c000000
+	.long	0x75000000
+	.long	0xdf000000
+	.long	0x6e000000
+	.long	0x47000000
+	.long	0xf1000000
+	.long	0x1a000000
+	.long	0x71000000
+	.long	0x1d000000
+	.long	0x29000000
+	.long	0xc5000000
+	.long	0x89000000
+	.long	0x6f000000
+	.long	0xb7000000
+	.long	0x62000000
+	.long	0x0e000000
+	.long	0xaa000000
+	.long	0x18000000
+	.long	0xbe000000
+	.long	0x1b000000
+	.long	0xfc000000
+	.long	0x56000000
+	.long	0x3e000000
+	.long	0x4b000000
+	.long	0xc6000000
+	.long	0xd2000000
+	.long	0x79000000
+	.long	0x20000000
+	.long	0x9a000000
+	.long	0xdb000000
+	.long	0xc0000000
+	.long	0xfe000000
+	.long	0x78000000
+	.long	0xcd000000
+	.long	0x5a000000
+	.long	0xf4000000
+	.long	0x1f000000
+	.long	0xdd000000
+	.long	0xa8000000
+	.long	0x33000000
+	.long	0x88000000
+	.long	0x07000000
+	.long	0xc7000000
+	.long	0x31000000
+	.long	0xb1000000
+	.long	0x12000000
+	.long	0x10000000
+	.long	0x59000000
+	.long	0x27000000
+	.long	0x80000000
+	.long	0xec000000
+	.long	0x5f000000
+	.long	0x60000000
+	.long	0x51000000
+	.long	0x7f000000
+	.long	0xa9000000
+	.long	0x19000000
+	.long	0xb5000000
+	.long	0x4a000000
+	.long	0x0d000000
+	.long	0x2d000000
+	.long	0xe5000000
+	.long	0x7a000000
+	.long	0x9f000000
+	.long	0x93000000
+	.long	0xc9000000
+	.long	0x9c000000
+	.long	0xef000000
+	.long	0xa0000000
+	.long	0xe0000000
+	.long	0x3b000000
+	.long	0x4d000000
+	.long	0xae000000
+	.long	0x2a000000
+	.long	0xf5000000
+	.long	0xb0000000
+	.long	0xc8000000
+	.long	0xeb000000
+	.long	0xbb000000
+	.long	0x3c000000
+	.long	0x83000000
+	.long	0x53000000
+	.long	0x99000000
+	.long	0x61000000
+	.long	0x17000000
+	.long	0x2b000000
+	.long	0x04000000
+	.long	0x7e000000
+	.long	0xba000000
+	.long	0x77000000
+	.long	0xd6000000
+	.long	0x26000000
+	.long	0xe1000000
+	.long	0x69000000
+	.long	0x14000000
+	.long	0x63000000
+	.long	0x55000000
+	.long	0x21000000
+	.long	0x0c000000
+	.long	0x7d000000
diff --git a/bsd/crypto/aes/i386/EncryptDecrypt.s b/bsd/crypto/aes/i386/EncryptDecrypt.s
new file mode 100644
index 000000000..6a6147a11
--- /dev/null
+++ b/bsd/crypto/aes/i386/EncryptDecrypt.s
@@ -0,0 +1,607 @@
+/*	This file defines _aes_encrypt or _aes_decrypt, according to the value of
+	the Select preprocessor symbol.  This file is designed to be included in
+	another assembly file using the preprocessor #include directive, to benefit
+	from some assembly-time calculations.
+
+	These two routines are nearly identical.  They differ only in the tables
+	they use, the direction they iterate through the key, and the permutation
+	performed on part of the state.
+
+	Written by Eric Postpischil, January 2008.
+*/
+
+/* add AES HW detection and HW-specific program branch cclee 3-12-10 */
+#ifdef KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+
+#if Select == 0
+	#define	Name		_aes_encrypt				// Routine name.
+	#define	MTable		_AESEncryptTable			// Main table.
+	#define	FTable		_AESSubBytesWordTable		// Final table.
+	#define	P0			S0							// State permutation.
+	#define	P1			S1
+	#define	P2			S2
+	#define	P3			S3
+	#define	Increment	+16							// ExpandedKey increment.
+#elif Select == 1
+	#define	Name		_aes_decrypt				// Routine name.
+	#define	MTable		_AESDecryptTable			// Main table.
+	#define	FTable		_AESInvSubBytesWordTable	// Final table.
+	#define	P0			S2							// State permutation.
+	#define	P1			S3
+	#define	P2			S0
+	#define	P3			S1
+	#define	Increment	-16							// ExpandedKey increment.
+#elif Select == 2
+	#define	Name		_aes_encrypt_xmm_no_save	// Routine name.
+	#define	MTable		_AESEncryptTable			// Main table.
+	#define	FTable		_AESSubBytesWordTable		// Final table.
+	#define	P0			S0							// State permutation.
+	#define	P1			S1
+	#define	P2			S2
+	#define	P3			S3
+	#define	Increment	+16							// ExpandedKey increment.
+#elif Select == 3
+	#define	Name		_aes_decrypt_xmm_no_save	// Routine name.
+	#define	MTable		_AESDecryptTable			// Main table.
+	#define	FTable		_AESInvSubBytesWordTable	// Final table.
+	#define	P0			S2							// State permutation.
+	#define	P1			S3
+	#define	P2			S0
+	#define	P3			S1
+	#define	Increment	-16							// ExpandedKey increment.
+#endif	// Select
+
+
+/*	Routine:
+
+		_AESEncryptWithExpandedKey (if Select is 0) or
+		_AESDecryptWithExpandedKey (if Select is 1).
+
+	Function:
+
+		Perform the AES cipher or its inverse as defined in Federal Information
+		Processing Standards Publication 197 (FIPS-197), November 26, 2001.
+
+		The inverse cipher here is the "Equivalent Inverse Cipher" in FIPS-197.
+
+	Input:
+
+		Constant data:
+
+			The following names must be locally defined so the assembler
+			can calculate certain offsets.
+				
+			For encryption:
+
+				static const Word _AESEncryptTable[4][256].
+
+					_AESEncryptTable[i] contains the tables T[i] defined in AES
+					Proposal: Rijndael, version 2, 03/09/99, by Joan Daemen and
+					Vincent Rijmen, section 5.2.1, page 18.  These tables
+					combine the SubBytes and MixColumns operations.
+
+				static const Word _AESSubBytesWordTable[256].
+
+					_AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
+					SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
+					differs from _AESEncryptTable in that it does not include
+					the MixColumn operation.  It is used in performing the last
+					round, which differs fromm the previous rounds in that it
+					does not include the MixColumn operation.
+
+			For decryption:
+
+				static const Word _AESDecryptTable[4][256].
+
+					The analog of _AESEncryptTable for decryption.
+
+				static const Word _AESSubBytesWordTable[256].
+
+					_AESInvSubBytesWordTable[i][j] = InvSubBytes(j) << 8*i,
+					where InvSubBytes is defined in FIPS-197.
+					_AESInvSubBytesWordTable differs from _AESDecryptTable in
+					that it does not include the InvMixColumn operation.  It is
+					used in performing the last round, which differs from the
+					previous rounds in that it does not include the
+					InvMixColumn operation.
+
+		Arguments:
+
+			const Byte *InputText.
+
+				Address of input, 16 bytes.  Best if four-byte aligned.
+
+			Byte *OutputText.
+
+				Address of output, 16 bytes.  Best if four-byte aligned.
+
+			aes_encrypt_ctx *Context or aes_decrypt_ctx *Context
+
+				aes_encrypt_ctx and aes_decrypt_ctx are identical except the
+				former is used for encryption and the latter for decryption.
+
+				Each is a structure containing the expanded key beginning at
+				offset ContextKey and a four-byte "key length" beginning at
+				offset ContextKeyLength.  The "key length" is the number of
+				bytes from the start of the first round key to the start of the
+				last round key.  That is 16 less than the number of bytes in
+				the entire key.
+
+	Output:
+
+		Encrypted or decrypted data is written to *OutputText.
+
+	Return:
+
+		aes_rval	// -1 if "key length" is invalid.  0 otherwise.
+*/
+
+	.text
+	.globl Name
+Name:
+
+    // detect AES HW, cclee 3-13-10
+#if Select < 2												// only for aes_encrypt/aes_decrypt
+#if defined __x86_64__
+    movq    __cpu_capabilities@GOTPCREL(%rip), %rax			// %rax -> __cpu_capabilities
+    mov     (%rax), %eax									// %eax = __cpu_capabilities
+#else
+#if defined	KERNEL
+    leal    __cpu_capabilities, %eax						// %eax -> __cpu_capabilities
+    mov     (%eax), %eax									// %eax = __cpu_capabilities
+#else
+	mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+    test    $(kHasAES), %eax								// __cpu_capabilities & kHasAES
+#if Select == 0
+    jne     _aes_encrypt_hw									// if AES HW detected, branch to HW specific code
+#else
+    jne     _aes_decrypt_hw									// if AES HW detected, branch to HW specific code
+#endif
+#endif		// Select
+
+	// Push new stack frame.
+	push	r5
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(3*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			4 (i386) or 0 (x86_64) bytes for ExpandedKeyEnd.
+
+			5 (i386) or 3 (x86_64) 16-byte spaces to save XMM registers.
+	*/
+	#define	LocalsSize	(Arch(4, 0) + Arch(5, 3)*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+#ifdef KERNEL
+#if	Select < 2
+	// Save XMM registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+#if defined __i386__
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+#endif
+#endif	// Select
+#endif	// KERNEL
+
+#if defined __i386__
+
+	// Number of bytes from caller's stack pointer to ours.
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Define location of argument i (presuming 4-byte arguments).
+	#define	Argument(i)	StackFrame+4*(i)(%esp)
+
+	#define	ArgInputText	Argument(0)
+	#define	ArgOutputText	Argument(1)
+	#define	ArgContext		Argument(2)
+
+#elif defined __x86_64__
+
+	// Arguments.
+	#define	InputText		r7	// Used early then overwritten for other use.
+	#define	OutputText		r6	// Needed near end of routine.
+	#define	ArgContext		r2
+		/*	The argument passed in r2 overlaps registers we need for other
+		 	work, so it must be moved early in the routine.
+		*/
+
+#endif
+
+#define	BaseP		Arch(r6, r9)	// Base pointer for addressing global data.
+#define	ExpandedKey	Arch(t0, r10)	// Address of expanded key.
+
+/*	The Work registers defined below are used to hold parts of the AES state
+	while we dissect or assemble it.  They must be assigned to the A, B, C, and
+	D registers so that we can access the bytes in %al, %ah, and so on.
+*/
+#define	Work0d	r0d
+#define	Work0l	r0l
+#define	Work0h	r0h
+#define	Work1d	r3d
+#define	Work1l	r3l
+#define	Work1h	r3h
+#define	Work2d	r1d
+#define	Work2l	r1l
+#define	Work2h	r1h
+#define	Work3d	r2d
+#define	Work3l	r2l
+#define	Work3h	r2h
+
+#define	t0		r5
+#define	t0d		r5d		// Low 32 bits of t0.
+#define	t0l		r5l		// Low byte of t0.
+
+#define	t1		r7
+
+/*	S0, S1, S2, and S3 are where we assemble the new AES state when computing
+	a regular round.  S1, S2, and S3 are assigned to the Work registers, but
+	S0 needs to go somewhere else because Work0 holds part of the old state.
+*/
+#define	S0		Arch(t1, r8d)
+#define	S1		Work1d
+#define	S2		Work2d
+#define	S3		Work3d
+
+/*	These XMM registers are used as holding space, because it is faster to
+	spill to these registers than to the stack.  (On x86_64, we do not need
+	to spill, because there are additional general registers available.
+	However, using more general registers requires saving them to the stack
+	and restoring them.  I timed it, and no time was saved.)
+*/
+#define	vS1		%xmm0
+#define	vS2		%xmm1
+#define	vS3		%xmm2
+#if defined __i386__
+	#define	vExpandedKey	%xmm3
+	#define	vIncrement		%xmm4
+#endif
+
+	// Get address of expanded key.
+	mov	ArgContext, ExpandedKey
+	#if 0 != ContextKey
+		add		$ContextKey, ExpandedKey
+	#endif
+
+/*	Store sentinel value of ExpandedKey on the stack on i386, a register on
+ 	x86_64.
+*/
+#define	ExpandedKeyEnd	Arch(5*16(r4), r11)
+
+	// Get and check "key length".
+	movzx	ContextKeyLength(ExpandedKey), r0
+	cmp		$160, r0
+	je		2f
+	cmp		$192, r0
+	je		2f
+	cmp		$224, r0
+	je		2f
+	mov		$-1, r0		// Return error.
+	jmp		9f
+2:
+
+	#if (Select == 0 || Select == 2)
+		// For encryption, prepare to iterate forward through expanded key.
+		add		ExpandedKey, r0
+		mov		r0, ExpandedKeyEnd
+	#else
+		// For decryption, prepare to iterate backward through expanded key.
+		mov		ExpandedKey, ExpandedKeyEnd
+		add		r0, ExpandedKey
+	#endif
+
+	// Initialize State from input text.
+	#if defined __i386__
+		mov		ArgInputText, BaseP
+		#define	InputText	BaseP
+	#endif
+	mov		0*4(InputText), Work0d
+	mov		1*4(InputText), S1
+	mov		2*4(InputText), S2
+	mov		3*4(InputText), S3
+#undef	InputText	// Register is reused after this for other purposes.
+
+	// Add round key and save results.
+	xor		0*4(ExpandedKey), Work0d	// S0 is in dissection register.
+	xor		1*4(ExpandedKey), S1
+	movd	S1, vS1						// Save S1 to S3 in vector registers.
+	xor		2*4(ExpandedKey), S2
+	movd	S2, vS2
+	xor		3*4(ExpandedKey), S3
+	movd	S3, vS3
+
+	add		$Increment, ExpandedKey		 // Advance to next round key.
+
+	#if defined __i386__
+		// Save expanded key address and increment in vector registers.
+		mov		$Increment, t1
+		movp	ExpandedKey, vExpandedKey
+		movp	t1, vIncrement
+	#endif
+
+	// Set up relative addressing.
+	#if defined __i386__
+
+		// Get address of 0 in BaseP.
+			call	0f				// Push program counter onto stack.
+		0:
+			pop		BaseP			// Get program counter.
+
+		// Define macros to help address data.
+#define	LookupM(table, index)	MTable-0b+(table)*TableSize(BaseP, index, 4)
+#define LookupF(table, index)	FTable-0b+(table)*TableSize(BaseP, index, 4)
+
+	#elif defined __x86_64__
+
+		lea	MTable(%rip), BaseP
+
+		// Define macros to help address data.
+		#define	LookupM(table, index)	(table)*TableSize(BaseP, index, 4)
+		#define	LookupF(table, index)	(table)*TableSize(BaseP, index, 4)
+
+/*	With these definitions of LookupM and LookupF, BaseP must be loaded with
+	the address of the table at the point where it is used.  So we need an
+	instruction to change BaseP after we are done with MTable and before we
+	start using FTable.  I would prefer to use something like:
+
+		.set	FMinusM, FTable - MTable
+		#define LookupF(table, index)	\
+			FMinusM+(table)*TableSize(BaseP, index, 4)
+
+	Then BaseP would not need to change.  However, this fails due to an
+	assembler/linker bug, <rdar://problem/5683882>.
+*/
+
+	#endif
+
+	// Get round key.
+	mov		0*4(ExpandedKey), S0
+	mov		1*4(ExpandedKey), S1
+	mov		2*4(ExpandedKey), S2
+	mov		3*4(ExpandedKey), S3
+
+1:
+	/*	Word 0 of the current state must be in Work0 now, and the next round
+		key must be in S0 to S3.
+	*/
+
+	// Process previous S0.
+	movzx	Work0l, t0
+	xor		LookupM(0, t0), S0
+	movzx	Work0h, t0d
+	xor		LookupM(1, t0), P3
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(2, t0), S2
+	movzx	Work0h, t0d
+	xor		LookupM(3, t0), P1
+
+	// Process previous S1.
+	movd	vS1, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(0, t0), S1
+	movzx	Work0h, t0d
+	xor		LookupM(1, t0), P0
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(2, t0), S3
+	movzx	Work0h, t0d
+	xor		LookupM(3, t0), P2
+
+	// Process previous S2.
+	movd	vS2, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(0, t0), S2
+	movzx	Work0h, t0d
+	xor		LookupM(1, t0), P1
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(2, t0), S0
+	movzx	Work0h, t0d
+	xor		LookupM(3, t0), P3
+
+	// Process previous S3.
+	movd	vS3, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(0, t0), S3
+	movzx	Work0h, t0d
+	xor		LookupM(1, t0), P2
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupM(2, t0), S1
+	movzx	Work0h, t0d
+	xor		LookupM(3, t0), P0
+
+	#if defined __i386__
+		paddd	vIncrement, vExpandedKey
+		movp	vExpandedKey, ExpandedKey
+	#else
+		add		$Increment, ExpandedKey
+	#endif
+
+	// Save state for next iteration and load next round key.
+	mov		S0, Work0d
+	mov		0*4(ExpandedKey), S0
+	movd	S1, vS1
+	mov		1*4(ExpandedKey), S1
+	movd	S2, vS2
+	mov		2*4(ExpandedKey), S2
+	movd	S3, vS3
+	mov		3*4(ExpandedKey), S3
+
+	cmp		ExpandedKeyEnd, ExpandedKey
+	jne		1b
+
+	/*	Word 0 of the current state must be in Work0 now, and the next round
+		key must be in S0 to S3.
+	*/
+
+	// Work around assembler bug.  See comments above about Radar 5683882.
+	#if defined __x86_64__
+		lea	FTable(%rip), BaseP
+	#endif
+
+	// Process previous S0.
+	movzx	Work0l, t0
+	xor		LookupF(0, t0), S0
+	movzx	Work0h, t0d
+	xor		LookupF(1, t0), P3
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(2, t0), S2
+	movzx	Work0h, t0d
+	xor		LookupF(3, t0), P1
+
+	// Process previous S1.
+	movd	vS1, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(0, t0), S1
+	movzx	Work0h, t0d
+	xor		LookupF(1, t0), P0
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(2, t0), S3
+	movzx	Work0h, t0d
+	xor		LookupF(3, t0), P2
+
+	// Process previous S2.
+	movd	vS2, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(0, t0), S2
+	movzx	Work0h, t0d
+	xor		LookupF(1, t0), P1
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(2, t0), S0
+	movzx	Work0h, t0d
+	xor		LookupF(3, t0), P3
+
+	// Process previous S3.
+	movd	vS3, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(0, t0), S3
+	movzx	Work0h, t0d
+	xor		LookupF(1, t0), P2
+	shr		$16, Work0d
+	movzx	Work0l, t0d
+	xor		LookupF(2, t0), S1
+	movzx	Work0h, t0d
+	xor		LookupF(3, t0), P0
+
+	#if defined __i386__	// Architecture.
+		// Get OutputText address.
+		#define	OutputText	BaseP
+		mov		ArgOutputText, OutputText
+	#endif	// Architecture.
+
+	// Write output.
+	mov		S0, 0*4(OutputText)
+	mov		S1, 1*4(OutputText)
+	mov		S2, 2*4(OutputText)
+	mov		S3, 3*4(OutputText)
+
+	xor		r0, r0		// Return success.
+
+9:
+	// Pop stack and restore registers.
+#ifdef	KERNEL
+#if	Select < 2
+#if defined __i386__
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+#endif
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+#endif	// Select
+#endif	// KERNEL
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+	#elif defined __x86_64__
+	#endif
+	pop		r3
+	pop		r5
+
+	ret
+
+
+#undef	ArgExpandedKey
+#undef	ArgInputText
+#undef	ArgNr
+#undef	ArgOutputText
+#undef	Argument
+#undef	BaseP
+#undef	ExpandedKey
+#undef	ExpandedKeyEnd
+#undef	FTable
+#undef	InputText
+#undef	LocalsSize
+#undef	LookupM
+#undef	LookupF
+#undef	MTable
+#undef	OutputText
+#undef	Padding
+#undef	SaveSize
+#undef	S0
+#undef	S1
+#undef	S2
+#undef	S3
+#undef	StackFrame
+#undef	Work0d
+#undef	Work0h
+#undef	Work0l
+#undef	Work1d
+#undef	Work1h
+#undef	Work1l
+#undef	Work2d
+#undef	Work2h
+#undef	Work2l
+#undef	Work3d
+#undef	Work3h
+#undef	Work3l
+#undef	t0
+#undef	t0d
+#undef	t0l
+#undef	t1
+#undef	vExpandedKey
+#undef	vS1
+#undef	vS2
+#undef	vS3
+
+#undef	Name
+#undef	MTable
+#undef	FTable
+#undef	P0
+#undef	P1
+#undef	P2
+#undef	P3
+#undef	Increment
diff --git a/bsd/crypto/aes/i386/ExpandKeyForDecryption.s b/bsd/crypto/aes/i386/ExpandKeyForDecryption.s
new file mode 100644
index 000000000..457508a9a
--- /dev/null
+++ b/bsd/crypto/aes/i386/ExpandKeyForDecryption.s
@@ -0,0 +1,1214 @@
+/*	This file defines _aes_decrypt_key, _aes_decrypt_key128,
+	_aes_decrypt_key192, and _aes_decrypt_key256.  It is designed to be
+	included in another assembly file with the preprocessor #include directive,
+	to benefit from some assembly-time calculations.
+
+	Written by Eric Postpischil, January 2008.
+
+	The comments here do not say much about the algorithm; the code just
+	follows the FIPS-197 specification.  I recommend reading the specification
+	before working with this code or examining the C code in the parent
+	directory that illustrates key expansion.
+
+	One complication is that this routine both expands the key and applies
+	InvMixColumn to most of the words in the expanded key.  This modifies the
+	key for use with the Equivalent Inverse Cipher.
+
+	During key expansion, there are sequences of four or six words that are
+	produced like this:
+
+		E[i+0] = E[i+0-Nk] ^ f(E[i-1]), where f is some function.
+		E[i+1] = E[i+1-Nk] ^ E[i+0].
+		E[i+2] = E[i+2-Nk] ^ E[i+1].
+		E[i+3] = E[i+3-Nk] ^ E[i+2].
+
+	When Nk is four or eight, the sequence stops there.  When it is six, it
+	goes on for two more words.  Let I be the InvMixColumn function.  for the
+	Equivalent Inverse Cipher, we want to store I(E[i+0]), I(E[i+1]),
+	I(E[i+2]), I(E[i+3]) (and two more when Nk is six).  However, we do not
+	need to calculate I four times.  In AES' finite field, I is a linear
+	combination of the four bytes of its input.  The ^ operation on the bits
+	that represent field elements is an addition in the Galois field.  So
+	I(a ^ b) = I(a) ^ I(b).  Then we have:
+
+		I(E[i+0]) = I(E[i+0-Nk] ^ f(E[i-1])) = I(E[i+0-Nk]) ^ I(f(E[i-1])).
+		I(E[i+1]) = I(E[i+1-Nk]) ^ I(E[i+0]).
+		I(E[i+2]) = I(E[i+2-Nk]) ^ I(E[i+1]).
+		I(E[i+3]) = I(E[i+3-Nk]) ^ I(E[i+2]).
+
+	To compute this, we compute I(f(E[i-1])) and XOR it with the previously
+	stored E[i+0-Nk])) to get I(E[i+0])).  Then we XOR that with the previously
+	stored E[i+1-Nk])) to get I(E[i+1])), and so on.
+
+	Note that to compute I(f(E[i-1])), we need to have E[i-1].  So we have to
+	compute the pre-InvMixColumn words of the expanded key; it is not
+	sufficient to have the post-InvMixColumn words.
+*/
+
+
+/*	Routine:
+
+		_aes_decrypt_key.
+
+		_aes_decrypt_key128, _aes_decrypt_key192, and _aes_decrypt_key256.
+
+	Function:
+
+		Expand the user's cipher key into the key schedule, as defined in
+		Federal Information Processing Standards Publication 197 (FIPS-197),
+		November 26, 2001.
+
+		For decryption, the key is modified as shown in Figure 15 in FIPS-197,
+		to support the Equivalent Inverse Cipher.
+
+	Input:
+
+		Constant data:
+
+			The following names must be locally defined so the assembler
+			can calculate certain offsets.
+
+			static const Word _AESSubBytesWordTable[4][256].
+
+				_AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
+				SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
+				differs from _AESEncryptTable in that it does not include
+				the MixColumn operation.  It is used in performing the last
+				round, which differs fromm the previous rounds in that it
+				does not include the MixColumn operation.
+
+			static const Word _AESSInvMixColumnTable[4][256].
+
+				_AESInvMixColumnTable[i][j] contains the contribution of byte
+				j to element i of the InvMixColumn operation.
+
+				The four bytes of the word _AESInvMixColumnTable[0][j] are:
+
+					{0xe}*{j}, {0x9}*{j}, {0xd}*{j}, {0xb}*{j},
+
+				listed in increasing address order, where multiplication is
+				performed in the Galois field.  {j} designates the element of
+				the Galois field represented by j.  _AESInvMixColumn[i][j] has
+				the same bytes, rotated right in the order shown above.
+
+			static const Byte _AESRcon[].
+
+				Round constants, beginning with AESRcon[1] for the first round
+				(AESRcon[0] is padding.)
+	
+		Arguments:
+
+			const uint8_t *Key
+
+				Address of user's cipher key.
+
+			int Length
+
+				Number of bytes (16, 24, or 32) or bits (128, 192, or 256) in
+				user's cipher key.
+
+				This argument is used with _aes_decrypt_key.  It is not
+				present for the other routines.  In those routines, Context
+				is the second argument.
+
+			aes_decrypt_ctx *Context
+
+				Structure to contain the expanded key beginning at offset
+				ContextKey and a four-byte "key length" beginning at offset
+				ContextKeyLength.  The "key length" is the number of bytes from
+				the start of the first round key to the startof the last rond
+				key.  That is 16 less than the number of bytes in the entire
+				key.
+
+	Output:
+
+		The expanded key and the "key length" are written to *Context.
+
+	Return:
+
+		aes_rval	// -1 if "key length" is invalid.  0 otherwise.
+*/
+/* add AES HW detection and program branch if AES HW is detected cclee 3-12-10 */
+
+#ifdef KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+
+#define	dr		r0d				// Dissection register.
+#define	drl		r0l				// Low 8 bits of dissection register.
+#define	drh		r0h				// Second-lowest 8 bits of dissection register.
+
+#define	t0		r1
+#define	t0d		r1d				// Low 32 bits of t0.
+
+#define	STable	r2				// Address of SubBytes table.  Overlaps Nk.
+#define	ITable	r3				// Address of InvMixColumn table.
+#define	offset	Arch(r5, r11)	// Address offset and loop sentinel.
+
+#define	R		r7				// Address of round constant.
+#define	K		r7				// User key pointer.
+	// R and K overlap.
+
+#define	E		r6				// Expanded key pointer.
+
+#define	ve0		%xmm0
+#define	ve1		%xmm1
+#define	ve2		%xmm2
+#define	ve3		%xmm3
+#define	ve4		%xmm4
+#define	ve5		%xmm5
+#define	vt1		%xmm6
+#define	vt0		%xmm7
+
+#define	LookupS(table, index)	(table)*TableSize(STable, index, 4)
+#define	LookupI(table, index)	(table)*TableSize(ITable, index, 4)
+
+
+/*	InvMixColumn puts InvMixColumn(dr) into vt0.  This is a non-standard
+	subroutine.  It does not conform to the ABI.  It is an integral part of
+	_ExpandKeyForDecryption and shares register use with it.
+*/
+InvMixColumn:
+	movzx	drl, t0
+	movd	LookupI(0, t0), vt0		// Look up byte 0 in table 0.
+	movzx	drh, t0d
+	movd	LookupI(1, t0), vt1		// Look up byte 1 in table 1.
+	pxor	vt1, vt0
+	shr		$16, dr
+	movzx	drl, t0d
+	movd	LookupI(2, t0), vt1		// Look up byte 2 in table 2.
+	pxor	vt1, vt0
+	movzx	drh, t0d
+	movd	LookupI(3, t0), vt1		// Look up byte 3 in table 3.
+	pxor	vt1, vt0
+	ret
+
+
+	// SubWordRotWord adds (XORs) SubWord(RotWord(dr)) to vt0.
+	.macro	SubWordRotWord
+		movzx	drl, t0
+		movd	LookupS(3, t0), vt1		// Look up byte 0 in table 3.
+		pxor	vt1, vt0
+		movzx	drh, t0d
+		movd	LookupS(0, t0), vt1		// Look up byte 1 in table 0.
+		pxor	vt1, vt0
+		shr		$$16, dr
+		movzx	drl, t0d
+		movd	LookupS(1, t0), vt1		// Look up byte 2 in table 1.
+		pxor	vt1, vt0
+		movzx	drh, t0d
+		movd	LookupS(2, t0), vt1		// Look up byte 3 in table 2.
+		pxor	vt1, vt0
+	.endmacro
+
+
+	// SubWord puts SubWord(dr) into vt0.
+	.macro	SubWord
+		movzx	drl, t0
+		movd	LookupS(0, t0), vt0		// Look up byte 0 in table 0.
+		movzx	drh, t0d
+		movd	LookupS(1, t0), vt1		// Look up byte 1 in table 1.
+		pxor	vt1,vt0
+		shr		$$16, dr
+		movzx	drl, t0d
+		movd	LookupS(2, t0), vt1		// Look up byte 2 in table 2.
+		pxor	vt1,vt0
+		movzx	drh, t0d
+		movd	LookupS(3, t0), vt1		// Look up byte 3 in table 3.
+		pxor	vt1,vt0
+	.endmacro
+
+	.text
+	.globl _aes_decrypt_key
+//	.private_extern	_aes_decrypt_key
+_aes_decrypt_key:
+
+	// detect AES HW, cclee 3-13-10
+#if defined __x86_64__
+    movq    __cpu_capabilities@GOTPCREL(%rip), %rax				// %rax -> __cpu_capabilities
+    mov     (%rax), %eax										// %eax  = __cpu_capabilities
+#else
+#if defined	KERNEL
+    leal    __cpu_capabilities, %eax							// %eax -> __cpu_capabilities
+    mov     (%eax), %eax										// %eax  = __cpu_capabilities
+#else
+	mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+
+#endif
+    test    $(kHasAES), %eax									// __cpu_capabilities & kHasAES
+    jne     _aes_decrypt_key_hw									// if AES HW detected, branch to _aes_decrypt_key_hw
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+
+			8 four-byte spaces for work.
+	*/
+	#define	LocalsSize	(8*16 + 8*4)
+
+	// Define stack offset to storage space for local data.
+	#define	Local	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+#if defined __i386__
+
+	// Define location of argument i.
+	#define	Argument(i)	StackFrame+4*(i)(r4)
+
+	#define	Nk		t0d
+
+	// Load arguments.
+	mov		Argument(2), E
+	mov		Argument(1), Nk
+	mov		Argument(0), K
+
+#elif defined __x86_64__
+
+	#define	Nk		r9d			// Number of words in key.
+	mov		r6d, Nk				// Move Nk argument out of way.
+	mov		r2, E				// Move E argument to common register.
+
+#endif
+
+	// Dispatch on key length.
+	cmp		$128, Nk
+	jge		2f
+	shl		$3, Nk				// Convert from bytes to bits.
+	cmp		$128, Nk
+2:
+	je		DKeyHas4Words
+	cmp		$192, Nk
+	je		DKeyHas6Words
+	cmp		$256, Nk
+	je		DKeyHas8Words
+	mov		$-1, r0				// Return error.
+	jmp		9f
+
+
+	.globl _aes_decrypt_key128
+//	.private_extern	_aes_decrypt_key128
+_aes_decrypt_key128:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+
+			8 four-byte spaces for work.
+	*/
+	#define	LocalsSize	(8*16 + 8*4)
+
+	// Define stack offset to storage space for local data.
+	#define	Local	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+#if defined __i386__
+
+	// Load arguments.
+	#define	Argument(i)	StackFrame+4*(i)(r4)
+	mov		Argument(1), E
+	mov		Argument(0), K
+
+#endif
+
+// Merge point for _aes_decrypt_key and _aes_decrypt_key128.
+DKeyHas4Words:
+
+	// First words of expanded key are copied from user key.
+	movd	0*4(K), ve0
+	movd	1*4(K), ve1
+	movd	2*4(K), ve2
+	movd	3*4(K), ve3
+
+	movl	$10*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	#if defined __i386__
+
+		lea		_AESRcon, R
+		lea		_AESInvMixColumnTable, ITable
+		lea		_AESSubBytesWordTable, STable
+
+	#elif defined __x86_64__
+
+		lea		_AESRcon(%rip), R
+		lea		_AESInvMixColumnTable(%rip), ITable
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	/*	With a four-word key, there are ten rounds (eleven 16-byte key blocks),
+		nine of which have InvMixColumn applied.
+	*/
+	mov		$-9*4*4, offset
+	sub		offset, E
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 0*4(E, offset)
+	movd	ve1, 1*4(E, offset)
+	movd	ve2, 2*4(E, offset)
+	movd	ve3, 3*4(E, offset)
+
+/*	Here is the first iteration of the key expansion.  It is separate from the
+	main loop below because we need to apply InvMixColumn to each of the
+	outputs, in ve0 through ve3.  In the main loop, the technique described at
+	the top of this file is used to compute the proper outputs while using
+	InvMixColumn only once.
+*/
+	add		$1, R					// Advance pointer.
+	movd	ve3, dr					// Put previous word into work register.
+	movzx	(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	pxor	ve0, ve1
+	pxor	ve1, ve2
+	pxor	ve2, ve3
+
+	add		$4*4, offset
+
+	/*	Apply InvMixColumn to each word.  The transformed values are stored in
+		the expanded key.  The original values are retained in registers for
+		further computation.
+	*/
+	movd	ve0, dr
+	call	InvMixColumn
+	movd	vt0, 0*4(E, offset)
+
+	movd	ve1, dr
+	call	InvMixColumn
+	movd	vt0, 1*4(E, offset)
+
+	movd	ve2, dr
+	call	InvMixColumn
+	movd	vt0, 2*4(E, offset)
+
+	movd	ve3, dr
+	call	InvMixColumn
+	movd	vt0, 3*4(E, offset)
+
+//	Here is the main loop.
+1:
+	add		$1, R					// Advance pointer.
+	movd	ve3, dr					// Put previous word into work register.
+	movzx	(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	pxor	ve0, ve1
+	pxor	ve1, ve2
+	pxor	ve2, ve3
+		/*	Dr. Brian Gladman uses a technique with a single XOR here instead
+			of the previous four.  There is some periodic behavior in the key
+			expansion, and Gladman maintains E[4*i+3] for the latest four
+			values of i.  XORing the value in vt0 with one of these yields its
+			replacement.  However, using this technique requires additional
+			instructions before the loop (to initialize the values) and after
+			it (to extract the final values to be stored) and either some way
+			to rotate or index four values in the loop or a four-fold unrolling
+			of the loop to provide the indexing.  Experiment suggests the
+			former is not worthwhile.  Unrolling the loop might give a small
+			gain, at the cost of increased use of instruction cache, increased
+			instructions loads the first time the routine is executed, and
+			increased code complexity, so I decided against it.
+		*/
+
+	// Apply InvMixColumn to the difference.
+	movd	vt0, dr
+	call	InvMixColumn
+
+	add		$4*4, offset
+
+	// Chain the transformed difference to previously transformed outputs.
+	movd	(0-4)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 0*4(E, offset)
+
+	movd	(1-4)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 1*4(E, offset)
+
+	movd	(2-4)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 2*4(E, offset)
+
+	movd	(3-4)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 3*4(E, offset)
+
+	jl		1b
+
+// Here is the final iteration, which does not perform InvMixColumn.
+
+	movd	ve3, dr					// Put previous word into work register.
+	movzx	1(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	movd	ve0, 4*4(E, offset)
+	pxor	ve0, ve1
+	movd	ve1, 5*4(E, offset)
+	pxor	ve1, ve2
+	movd	ve2, 6*4(E, offset)
+	pxor	ve2, ve3
+	movd	ve3, 7*4(E, offset)
+
+	xor		r0, r0				// Return success.
+
+9:
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+	.globl _aes_decrypt_key192
+//	.private_extern	_aes_decrypt_key192
+_aes_decrypt_key192:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+
+			8 four-byte spaces for work.
+	*/
+	#define	LocalsSize	(8*16 + 8*4)
+
+	// Define stack offset to storage space for local data.
+	#define	Local	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+#if defined __i386__
+
+	// Load arguments.
+	#define	Argument(i)	StackFrame+4*(i)(r4)
+	mov		Argument(1), E
+	mov		Argument(0), K
+
+#endif
+
+// Merge point for _aes_decrypt_key and _aes_decrypt_key192.
+DKeyHas6Words:
+
+	// First words of expanded key are copied from user key.
+	movd	0*4(K), ve0
+	movd	1*4(K), ve1
+	movd	2*4(K), ve2
+	movd	3*4(K), ve3
+
+	movl	$12*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	movd	4*4(K), ve4
+	movd	5*4(K), ve5
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	#if defined __i386__
+
+		lea		_AESRcon, R
+		lea		_AESInvMixColumnTable, ITable
+		lea		_AESSubBytesWordTable, STable
+
+	#elif defined __x86_64__
+
+		lea		_AESRcon(%rip), R
+		lea		_AESInvMixColumnTable(%rip), ITable
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	/*	With a six-word key, there are twelve rounds (thirteen 16-byte key
+		blocks), eleven of which have InvMixColumn applied.  The key expansion
+		proceeds in iterations of six four-byte words, so the termination
+		condition is a bit complicated.  We set offset to the negative of 10
+		four four-byte words, and the loop branch does another iteration if
+		offset is less than or equal to zero, meaning the number of iterations
+		performed so far is less than or equal to 10.  Thus, after ten
+		iterations, it branches again.  After the eleventh iteration, it
+		stops.  Code after the end of the loop computes the twelfth key block,
+		which does not have InvMixColumn applied.
+	*/
+	mov		$-10*4*4, offset
+	sub		offset, E
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 0*4(E, offset)
+	movd	ve1, 1*4(E, offset)
+	movd	ve2, 2*4(E, offset)
+	movd	ve3, 3*4(E, offset)
+
+	/*	The first four words are stored untransformed.  After that, words in
+		the expanded key are transformed by InvMixColumn.
+	*/
+	movd	ve4, dr
+	call	InvMixColumn
+	movd	vt0, 4*4(E, offset)
+
+	movd	ve5, dr
+	call	InvMixColumn
+	movd	vt0, 5*4(E, offset)
+
+/*	Here is the first iteration of the key expansion.  It is separate from the
+	main loop below because we need to apply InvMixColumn to each of the
+	outputs, in ve0 through ve5.  In the main loop, the technique described at
+	the top of this file is used to compute the proper outputs while using
+	InvMixColumn only once.
+*/
+	add		$1, R					// Advance pointer.
+	movd	ve5, dr					// Put previous word into work register.
+	movzx	(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	pxor	ve0, ve1
+	pxor	ve1, ve2
+	pxor	ve2, ve3
+	pxor	ve3, ve4
+	pxor	ve4, ve5
+
+	add		$6*4, offset
+
+	/*	Apply InvMixColumn to each word.  The transformed values are stored in
+		the expanded key.  The original values are retained in registers for
+		further computation.
+	*/
+	movd	ve0, dr
+	call	InvMixColumn
+	movd	vt0, 0*4(E, offset)
+
+	movd	ve1, dr
+	call	InvMixColumn
+	movd	vt0, 1*4(E, offset)
+
+	movd	ve2, dr
+	call	InvMixColumn
+	movd	vt0, 2*4(E, offset)
+
+	movd	ve3, dr
+	call	InvMixColumn
+	movd	vt0, 3*4(E, offset)
+
+	movd	(4-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 4*4(E, offset)
+
+	movd	(5-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 5*4(E, offset)
+
+//	Here is the main loop.
+1:
+	add		$1, R					// Advance pointer.
+	movd	ve5, dr					// Put previous word into work register.
+	movzx	(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	pxor	ve0, ve1
+	pxor	ve1, ve2
+	pxor	ve2, ve3
+	pxor	ve3, ve4
+	pxor	ve4, ve5
+
+	// Apply InvMixColumn to the difference.
+	movd	vt0, dr
+	call	InvMixColumn
+
+	add		$6*4, offset
+
+	// Chain the transformed difference to previously transformed outputs.
+	movd	(0-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 0*4(E, offset)
+
+	movd	(1-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 1*4(E, offset)
+
+	movd	(2-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 2*4(E, offset)
+
+	movd	(3-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 3*4(E, offset)
+
+	movd	(4-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 4*4(E, offset)
+
+	movd	(5-6)*4(E, offset), vt1
+	pxor	vt1, vt0
+	movd	vt0, 5*4(E, offset)
+
+	jle		1b
+
+// Here is the final iteration, which does not perform InvMixColumn.
+
+	movd	ve5, dr					// Put previous word into work register.
+	movzx	1(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+	pxor	vt0, ve0
+
+	// Chain to successive words.
+	movd	ve0, 6*4(E, offset)
+	pxor	ve0, ve1
+	movd	ve1, 7*4(E, offset)
+	pxor	ve1, ve2
+	movd	ve2, 8*4(E, offset)
+	pxor	ve2, ve3
+	movd	ve3, 9*4(E, offset)
+
+	xor		r0, r0				// Return success.
+
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+	.globl _aes_decrypt_key256
+//	.private_extern	_aes_decrypt_key256
+_aes_decrypt_key256:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+
+			8 four-byte spaces for work.
+	*/
+	#define	LocalsSize	(8*16 + 8*4)
+
+	// Define stack offset to storage space for local data.
+	#define	Local	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+#if defined __i386__
+
+	// Load arguments.
+	#define	Argument(i)	StackFrame+4*(i)(r4)
+	mov		Argument(1), E
+	mov		Argument(0), K
+
+#endif
+
+// Merge point for _aes_decrypt_key and _aes_decrypt_key256.
+DKeyHas8Words:
+
+	// First words of expanded key are copied from user key.
+	movd	0*4(K), ve0
+	movd	1*4(K), ve1
+	movd	2*4(K), ve2
+	movd	3*4(K), ve3
+
+	movl	$14*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 0*4(E)
+	movd	ve1, 1*4(E)
+	movd	ve2, 2*4(E)
+	movd	ve3, 3*4(E)
+	movd	4*4(K), ve0
+	movd	5*4(K), ve1
+	movd	6*4(K), ve2
+	movd	7*4(K), ve3
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	#if defined __i386__
+
+		lea		_AESRcon, R
+		lea		_AESInvMixColumnTable, ITable
+		lea		_AESSubBytesWordTable, STable
+
+	#elif defined __x86_64__
+
+		lea		_AESRcon(%rip), R
+		lea		_AESInvMixColumnTable(%rip), ITable
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	/*	With an eight-word key, there are fourteen rounds (fifteen 16-byte key
+		blocks), thirteen of which have InvMixColumn applied.
+	*/
+	mov		$-12*4*4, offset
+	sub		offset, E
+
+	// Save untransformed values in stack area.
+	movd	ve0, 4*4+Local(r4)
+	movd	ve1, 5*4+Local(r4)
+	movd	ve2, 6*4+Local(r4)
+	movd	ve3, 7*4+Local(r4)
+
+	/*	Apply InvMixColumn to words 4 through 7.  The transformed values are
+		stored in the expanded key.  The original values are saved in the stack
+		area for further computation.
+	*/
+	movd	ve0, dr
+	call	InvMixColumn
+	movd	vt0, 4*4(E, offset)
+
+	movd	ve1, dr
+	call	InvMixColumn
+	movd	vt0, 5*4(E, offset)
+
+	movd	ve2, dr
+	call	InvMixColumn
+	movd	vt0, 6*4(E, offset)
+
+	movd	ve3, dr
+	call	InvMixColumn
+	movd	vt0, 7*4(E, offset)
+
+/*	Here is the first iteration of the key expansion.  It is separate from the
+	main loop below because we need to apply InvMixColumn to each of the
+	outputs, in ve0 through ve3.  In the main loop, the technique described at
+	the top of this file is used to compute the proper outputs while using
+	InvMixColumn only once.
+*/
+	add		$1, R					// Advance pointer.
+	movd	ve3, dr					// Put previous word into work register.
+	movzx	(R), t0d				// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+
+	add		$8*4, offset
+
+	movd	(0-8)*4(E, offset), ve0		// Get old word.
+	pxor	vt0, ve0
+	movd	ve0, 0*4+Local(r4)			// Save on stack.
+	movd	ve0, dr
+	call	InvMixColumn
+	movd	vt0, 0*4(E, offset)			// Write to expanded key.
+
+	/*	 Chain to successive words and apply InvMixColumn to each word.  The
+		 transformed values are stored in the expanded key.  The original
+		 values are retained in local data for further computation.
+	*/
+	movd	(1-8)*4(E, offset), ve1		// Get old word.
+	pxor	ve0, ve1					// Chain.
+	movd	ve1, 1*4+Local(r4)			// Save on stack.
+	movd	ve1, dr
+	call	InvMixColumn
+	movd	vt0, 1*4(E, offset)			// Write to expanded key.
+
+	movd	(2-8)*4(E, offset), ve2		// Get old word.
+	pxor	ve1, ve2					// Chain.
+	movd	ve2, 2*4+Local(r4)			// Save on stack.
+	movd	ve2, dr
+	call	InvMixColumn
+	movd	vt0, 2*4(E, offset)			// Write to expanded key.
+
+	movd	(3-8)*4(E, offset), ve3		// Get old word.
+	pxor	ve2, ve3					// Chain.
+	movd	ve3, 3*4+Local(r4)			// Save on stack.
+	movd	ve3, dr
+	call	InvMixColumn
+	movd	vt0, 3*4(E, offset)			// Write to expanded key.
+
+	movd	ve3, dr						// Put previous word into work register.
+	SubWord
+
+	movd	4*4+Local(r4), ve0			// Get old word.
+	pxor	vt0, ve0					// Chain.
+	movd	ve0, 4*4+Local(r4)			// Save on stack.
+
+	movd	5*4+Local(r4), ve1			// Get old word.
+	pxor	ve0, ve1					// Chain.
+	movd	ve1, 5*4+Local(r4)			// Save on stack.
+
+	movd	6*4+Local(r4), ve2			// Get old word.
+	pxor	ve1, ve2					// Chain.
+	movd	ve2, 6*4+Local(r4)			// Save on stack.
+
+	movd	7*4+Local(r4), ve3			// Get old word.
+	pxor	ve2, ve3					// Chain.
+	movd	ve3, 7*4+Local(r4)			// Save on stack.
+
+	movd	vt0, dr						// Move change to work register.
+	call	InvMixColumn
+
+	movd	(4-8)*4(E, offset), vt1		// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, 4*4(E, offset)			// Write new word to expanded key.
+
+	movd	(5-8)*4(E, offset), vt1		// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, 5*4(E, offset)			// Write new word to expanded key.
+
+	movd	(6-8)*4(E, offset), vt1		// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, 6*4(E, offset)			// Write new word to expanded key.
+
+	movd	(7-8)*4(E, offset), vt1		// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, 7*4(E, offset)			// Write new word to expanded key.
+
+//	Here is the main loop.
+1:
+	add		$1, R						// Advance pointer.
+	movd	ve3, dr						// Put previous word into work register.
+	movzx	(R), t0d					// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+
+	movd	0*4+Local(r4), ve0			// Get old word.
+	pxor	vt0, ve0
+	movd	ve0, 0*4+Local(r4)			// Save on stack.
+
+	// Chain to successive words.
+	movd	1*4+Local(r4), ve1			// Get old word.
+	pxor	ve0, ve1					// Chain.
+	movd	ve1, 1*4+Local(r4)			// Save on stack.
+
+	movd	2*4+Local(r4), ve2			// Get old word.
+	pxor	ve1, ve2					// Chain.
+	movd	ve2, 2*4+Local(r4)			// Save on stack.
+
+	movd	3*4+Local(r4), ve3			// Get old word.
+	pxor	ve2, ve3					// Chain.
+	movd	ve3, 3*4+Local(r4)			// Save on stack.
+
+	movd	vt0, dr						// Move change to work register.
+	call	InvMixColumn
+
+	movd	0*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (0+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	1*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (1+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	2*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (2+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	3*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (3+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	ve3, dr						// Put previous word into work register.
+	SubWord
+
+	movd	4*4+Local(r4), ve0			// Get old word.
+	pxor	vt0, ve0					// Chain.
+	movd	ve0, 4*4+Local(r4)			// Save on stack.
+
+	movd	5*4+Local(r4), ve1			// Get old word.
+	pxor	ve0, ve1					// Chain.
+	movd	ve1, 5*4+Local(r4)			// Save on stack.
+
+	movd	6*4+Local(r4), ve2			// Get old word.
+	pxor	ve1, ve2					// Chain.
+	movd	ve2, 6*4+Local(r4)			// Save on stack.
+
+	movd	7*4+Local(r4), ve3			// Get old word.
+	pxor	ve2, ve3					// Chain.
+	movd	ve3, 7*4+Local(r4)			// Save on stack.
+
+	movd	vt0, dr						// Move change to work register.
+	call	InvMixColumn
+
+	movd	4*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (4+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	5*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (5+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	6*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (6+8)*4(E, offset)		// Write new word to expanded key.
+
+	movd	7*4(E, offset), vt1			// Get old word.
+	pxor	vt1, vt0					// Chain.
+	movd	vt0, (7+8)*4(E, offset)		// Write new word to expanded key.
+
+	add		$8*4, offset
+
+	jl		1b
+
+	movd	ve3, dr						// Put previous word into work register.
+	movzx	1(R), t0d					// Get round constant.
+	movd	t0d, vt0
+
+	SubWordRotWord
+
+	movd	0*4+Local(r4), ve0			// Get old word.
+	pxor	vt0, ve0					// Chain.
+	movd	ve0, (0+8)*4(E, offset)
+
+	// Chain to successive words.
+	movd	1*4+Local(r4), ve1			// Get old word.
+	pxor	ve0, ve1					// Chain.
+	movd	ve1, (1+8)*4(E, offset)
+
+	movd	2*4+Local(r4), ve2			// Get old word.
+	pxor	ve1, ve2					// Chain.
+	movd	ve2, (2+8)*4(E, offset)
+
+	movd	3*4+Local(r4), ve3			// Get old word.
+	pxor	ve2, ve3					// Chain.
+	movd	ve3, (3+8)*4(E, offset)
+
+	xor		r0, r0				// Return success.
+
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+#undef	Address
+#undef	Argument
+#undef	E
+#undef	ITable
+#undef	K
+#undef	Local
+#undef	LocalsSize
+#undef	LookupI
+#undef	LookupS
+#undef	Nk
+#undef	Padding
+#undef	R
+#undef	SaveSize
+#undef	STable
+#undef	StackFrame
+#undef	dr
+#undef	drh
+#undef	drl
+#undef	offset
+#undef	t0
+#undef	t0d
+#undef	ve0
+#undef	ve1
+#undef	ve2
+#undef	ve3
+#undef	ve4
+#undef	ve5
+#undef	vt0
+#undef	vt1
diff --git a/bsd/crypto/aes/i386/ExpandKeyForEncryption.s b/bsd/crypto/aes/i386/ExpandKeyForEncryption.s
new file mode 100644
index 000000000..1ce3c9553
--- /dev/null
+++ b/bsd/crypto/aes/i386/ExpandKeyForEncryption.s
@@ -0,0 +1,801 @@
+/*	This file defines _aes_encrypt_key, _aes_encrypt_key128,
+	_aes_encrypt_key192, and _aes_encrypt_key256.  It is designed to be
+	included in another assembly file with the preprocessor #include directive,
+	to benefit from some assembly-time calculations.
+
+	Written by Eric Postpischil, January 2008.
+
+	The comments here do not say much about the algorithm; the code just
+	follows the FIPS-197 specification.  I recommend reading the specification
+	before working with this code or examining the C code in the parent
+	directory that illustrates key expansion.
+*/
+
+
+/*	Routines:
+
+		_aes_encrypt_key.
+
+		_aes_encrypt_key128, _aes_encrypt_key192, and _aes_encrypt_key256.
+
+	Function:
+
+		Expand the user's cipher key into the key schedule, as defined in
+		Federal Information Processing Standards Publication 197 (FIPS-197),
+		November 26, 2001.
+
+	Input:
+
+		Constant data:
+
+			The following names must be locally defined so the assembler
+			can calculate certain offsets.
+
+			static const Word _AESSubBytesWordTable[4][256].
+
+				_AESSubBytesWordTable[i][j] = SubBytes(j) << 8*i, where
+				SubBytes is defined in FIPS-197.  _AESSubBytesWordTable
+				differs from _AESEncryptTable in that it does not include
+				the MixColumn operation.  It is used in performing the last
+				round, which differs fromm the previous rounds in that it
+				does not include the MixColumn operation.
+
+			static const Byte _AESRcon[].
+
+				Round constants, beginning with AESRcon[1] for the first round
+				(AESRcon[0] is padding.)
+	
+		Arguments:
+
+			const uint8_t *Key
+
+				Address of user's cipher key.
+
+			int Length
+
+				Number of bytes (16, 24, or 32) or bits (128, 192, or 256) in
+				user's cipher key.
+
+				This argument is used with _aes_encrypt_key.  It is not
+				present for the other routines.  In those routines, Context
+				is the second argument.
+
+			aes_encrypt_ctx *Context
+
+				Structure to contain the expanded key beginning at offset
+				ContextKey and a four-byte "key length" beginning at offset
+				ContextKeyLength.  The "key length" is the number of bytes from
+				the start of the first round key to the start of the last round
+				key.  That is 16 less than the number of bytes in the entire
+				key.
+
+	Output:
+
+		The expanded key and the "key length" are written to *Context.
+
+	Return:
+
+		aes_rval	// -1 if "key length" is invalid.  0 otherwise.
+*/
+
+/* add AES HW detection and program branch if AES HW is detected cclee 3-12-10 */
+#ifdef KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+
+	.text
+	.globl _aes_encrypt_key
+//	.private_extern	_aes_encrypt_key
+_aes_encrypt_key:
+
+    // detect AES HW, cclee-3-13-10
+#if defined __x86_64__
+    movq    __cpu_capabilities@GOTPCREL(%rip), %rax				// %rax -> __cpu_capabilities
+    mov     (%rax), %eax										// %eax  = __cpu_capabilities
+#else
+#if defined KERNEL
+    leal    __cpu_capabilities, %eax							// %eax -> __cpu_capabilities
+    mov     (%eax), %eax										// %eax  = __cpu_capabilities
+#else
+	mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+    test    $(kHasAES), %eax									// __cpu_capabilities & kHasAES
+    jne     _aes_encrypt_key_hw									// if AES HW detected, branch to _aes_encrypt_key_hw
+
+#define	dr		r0d				// Dissection register.
+#define	drl		r0l				// Low 8 bits of dissection register.
+#define	drh		r0h				// Second-lowest 8 bits of dissection register.
+
+#define	t0		r1
+#define	t0d		r1d				// Low 32 bits of t0.
+
+#define	offset	Arch(r5, r11)	// Address offset and loop sentinel.
+
+#define	R		r7				// Address of round constant.
+#define	K		r7				// User key pointer.
+	// R and K overlap.
+
+#define	E		r6				// Expanded key pointer.
+
+#define	ve0		%xmm0
+#define	ve1		%xmm1
+#define	ve2		%xmm2
+#define	ve3		%xmm3
+#define	vt3		%xmm4
+#define	vt2		%xmm5
+#define	vt1		%xmm6
+#define	vt0		%xmm7
+
+#if defined __i386__
+	#define	LookupS(table, index)	\
+		_AESSubBytesWordTable+(table)*TableSize(, index, 4)
+#elif defined __x86_64__
+	#define	LookupS(table, index)	(table)*TableSize(STable, index, 4)
+#endif
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+	*/
+	#define	LocalsSize	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+#if defined __i386__
+
+	// Define location of argument i.
+	#define	Argument(i)	StackFrame+4*(i)(r4)
+
+	#define	Nk		t0d
+
+	// Load arguments.
+	mov		Argument(2), E
+	mov		Argument(1), Nk
+	mov		Argument(0), K
+
+#elif defined __x86_64__
+
+	#define	Nk		r9d			// Number of words in key.
+	mov		r6d, Nk				// Move Nk argument out of way.
+	mov		r2, E				// Move E argument to common register.
+
+#endif
+
+	// Dispatch on key length.
+	cmp		$128, Nk
+	jge		2f
+	shl		$3, Nk				// Convert from bytes to bits.
+	cmp		$128, Nk
+2:
+	je		EKeyHas4Words
+	cmp		$192, Nk
+	je		EKeyHas6Words
+	cmp		$256, Nk
+	je		EKeyHas8Words
+	mov		$-1, r0				// Return error.
+	jmp		9f
+
+// Stop using Nk.
+#undef	Nk
+
+	.globl _aes_encrypt_key128
+//	.private_extern	_aes_encrypt_key128
+_aes_encrypt_key128:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+	*/
+	#define	LocalsSize	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+	#if defined __i386__
+
+		// Load arguments.
+		#define	Argument(i)	StackFrame+4*(i)(r4)
+		mov		Argument(1), E
+		mov		Argument(0), K
+
+	#endif
+
+// Merge point for _aes_encrypt_key and _aes_encrypt_key128.
+EKeyHas4Words:
+
+#define	e0	r2d
+#define	e1	r3d
+#define	e2	Arch(r5d, r11d)
+#define	e3	r7d
+
+	// First words of expanded key are copied from user key.
+	mov		0*4(K), e0
+	mov		1*4(K), e1
+	mov		2*4(K), e2
+	mov		3*4(K), e3
+
+	movl	$10*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	// Cache round constants in output buffer.  The last is a sentinel.
+	movb	$0x01,  1*16(E)
+	movb	$0x02,  2*16(E)
+	movb	$0x04,  3*16(E)
+	movb	$0x08,  4*16(E)
+	movb	$0x10,  5*16(E)
+	movb	$0x20,  6*16(E)
+	movb	$0x40,  7*16(E)
+	movb	$0x80,  8*16(E)
+	movb	$0x1b,  9*16(E)
+	movb	$0x36, 10*16(E)
+
+	#if defined __x86_64__
+
+		#define	STable	r8
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	// Store initial words of expanded key, which are copies of user's key.
+	mov		e0, 0*4(E)
+	mov		e1, 1*4(E)
+	mov		e2, 2*4(E)
+	mov		e3, 3*4(E)
+
+1:
+	mov		e3, dr				// Put previous word into dissection register.
+
+	// Perform SubWord(RotWord(dr)).
+	movzx	drl, t0
+	xor		LookupS(3, t0), e0		// Look up byte 0 in table 3.
+	movzx	drh, t0d
+	xor		LookupS(0, t0), e0		// Look up byte 1 in table 0.
+	shr		$16, dr
+	movzx	drl, t0d
+	xor		LookupS(1, t0), e0		// Look up byte 2 in table 1.
+	movzx	drh, t0d
+	xor		LookupS(2, t0), e0		// Look up byte 3 in table 2.
+
+	add		$4*4, E
+
+	movzx	(E), t0d				// Get cached round constant.
+	xor		t0d, e0					// XOR with word from four words back.
+
+	// Chain to successive words.
+	mov		e0, 0*4(E)
+	xor		e0, e1
+	mov		e1, 1*4(E)
+	xor		e1, e2
+	mov		e2, 2*4(E)
+	xor		e2, e3
+	mov		e3, 3*4(E)
+
+	cmp		$0x36, t0d				// Was this the last round constant?
+
+	jne		1b
+
+	xor		r0, r0		// Return success.
+
+9:
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+// Reset definitions for next case.
+#undef	e0
+#undef	e1
+#undef	e2
+#undef	e3
+
+#undef	vt3
+#undef	vt2
+#define	ve4	%xmm4
+#define	ve5	%xmm5
+
+
+	.globl _aes_encrypt_key192
+//	.private_extern	_aes_encrypt_key192
+_aes_encrypt_key192:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+	*/
+	#define	LocalsSize	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+	#if defined __i386__
+
+		// Load arguments.
+		#define	Argument(i)	StackFrame+4*(i)(r4)
+		mov		Argument(1), E
+		mov		Argument(0), K
+
+	#endif
+
+// Merge point for _aes_encrypt_key and _aes_encrypt_key192.
+EKeyHas6Words:
+
+	// First words of expanded key are copied from user key.
+	movd	0*4(K), ve0
+	movd	1*4(K), ve1
+	movd	2*4(K), ve2
+	movd	3*4(K), ve3
+
+	movl	$12*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	movd	4*4(K), ve4
+	movd	5*4(K), ve5
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	#if defined __i386__
+
+		lea		_AESRcon, R
+
+	#elif defined __x86_64__
+
+		lea		_AESRcon(%rip), R
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	/*	With a six-word key, there are twelve rounds (thirteen 16-byte key
+		blocks).
+	*/
+	mov		$-12*4*4, offset
+	sub		offset, E
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 0*4(E, offset)
+	movd	ve1, 1*4(E, offset)
+	movd	ve2, 2*4(E, offset)
+	movd	ve3, 3*4(E, offset)
+	movd	ve4, 4*4(E, offset)
+	movd	ve5, 5*4(E, offset)
+
+/*	Jump into loop body.  The key expansion processes six four-byte words per
+	iteration.  52 are needed in the key.  So only four are needed in the last
+	iteration.
+*/
+	jmp		2f		
+1:
+	// Continue chaining to successive words.
+	pxor	ve3, ve4
+	movd	ve4, 4*4(E, offset)
+	pxor	ve4, ve5
+	movd	ve5, 5*4(E, offset)
+2:
+	add		$1, R				// Advance pointer.
+	movd	ve5, dr				// Put previous word into dissection register.
+	movzx	(R), t0				// Get round constant.
+	movd	t0d, vt1
+	pxor	vt1, ve0			// XOR with word from six words back.
+
+	// Perform SubWord(RotWord(dr)).
+	movzx	drl, t0d
+	movd	LookupS(3, t0), vt0		// Look up byte 0 in table 3.
+	movzx	drh, t0d
+	movd	LookupS(0, t0), vt1		// Look up byte 1 in table 0.
+	shr		$16, dr
+	movzx	drl, t0d
+	pxor	vt1, vt0
+	pxor	vt0, ve0
+	movd	LookupS(1, t0), vt0		// Look up byte 2 in table 1.
+	movzx	drh, t0d
+	movd	LookupS(2, t0), vt1		// Look up byte 3 in table 2.
+	pxor	vt1, vt0
+	pxor	vt0, ve0
+
+	add		$6*4, offset
+
+	// Chain to successive words.
+	movd	ve0, 0*4(E, offset)
+	pxor	ve0, ve1
+	movd	ve1, 1*4(E, offset)
+	pxor	ve1, ve2
+	movd	ve2, 2*4(E, offset)
+	pxor	ve2, ve3
+	movd	ve3, 3*4(E, offset)
+
+	jne		1b
+
+	xor		r0, r0		// Return success.
+
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+// Reset definitions for next case.
+#undef	ve4
+#undef	ve5
+#define	vt3	%xmm4
+#define	vt2	%xmm5
+
+
+	.globl _aes_encrypt_key256
+//	.private_extern	_aes_encrypt_key256
+_aes_encrypt_key256:
+
+	/*	Save registers and set SaveSize to the number of bytes pushed onto the
+		stack so far, including the caller's return address.
+	*/
+	push	r3
+	#if defined __i386__
+		push	r5
+		push	r6
+		push	r7
+		#define	SaveSize	(5*4)
+	#else
+		#define	SaveSize	(2*8)
+	#endif
+
+	/*	Number of bytes used for local variables:
+
+			8 16-byte spaces to save XMM registers.
+	*/
+	#define	LocalsSize	(8*16)
+
+	#if 0 < LocalsSize
+		// Padding to position stack pointer at a multiple of 16 bytes.
+		#define	Padding	(15 & -(SaveSize + LocalsSize))
+		sub		$Padding + LocalsSize, r4	// Allocate space on stack.
+	#else
+		#define	Padding	0
+	#endif
+
+	/*	StackFrame is the number of bytes in our stack frame, from caller's
+		stack pointer to ours (so it includes the return address).
+	*/
+	#define	StackFrame	(SaveSize + Padding + LocalsSize)
+
+	// Save xmm registers.
+	movaps	%xmm0, 0*16(r4)
+	movaps	%xmm1, 1*16(r4)
+	movaps	%xmm2, 2*16(r4)
+	movaps	%xmm3, 3*16(r4)
+	movaps	%xmm4, 4*16(r4)
+	movaps	%xmm5, 5*16(r4)
+	movaps	%xmm6, 6*16(r4)
+	movaps	%xmm7, 7*16(r4)
+
+	#if defined __i386__
+
+		// Load arguments.
+		#define	Argument(i)	StackFrame+4*(i)(r4)
+		mov		Argument(1), E
+		mov		Argument(0), K
+
+	#endif
+
+// Merge point for _aes_encrypt_key and _aes_encrypt_key256.
+EKeyHas8Words:
+
+	// First words of expanded key are copied from user key.
+	movd	0*4(K), ve0
+	movd	1*4(K), ve1
+	movd	2*4(K), ve2
+	movd	3*4(K), ve3
+
+	movl	$14*16, ContextKeyLength(E)	// Set "key length."
+
+	#if 0 != ContextKey
+		add		$ContextKey, E
+	#endif
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 0*4(E)
+	movd	ve1, 1*4(E)
+	movd	ve2, 2*4(E)
+	movd	ve3, 3*4(E)
+	movd	4*4(K), ve0
+	movd	5*4(K), ve1
+	movd	6*4(K), ve2
+	movd	7*4(K), ve3
+
+	// K cannot be used after we write to R, since they use the same register.
+
+	#if defined __i386__
+
+		lea		_AESRcon, R
+
+	#elif defined __x86_64__
+
+		lea		_AESRcon(%rip), R
+		lea		_AESSubBytesWordTable(%rip), STable
+
+	#endif
+
+	/*	With an eight-word key, there are fourteen rounds (fifteen 16-byte key
+	 	blocks).
+	*/
+	mov		$-14*4*4, offset
+	sub		offset, E
+
+	// Store initial words of expanded key, which are copies of user's key.
+	movd	ve0, 4*4(E, offset)
+	movd	ve1, 5*4(E, offset)
+	movd	ve2, 6*4(E, offset)
+	movd	ve3, 7*4(E, offset)
+
+/*	Jump into loop body.  The key expansion processes eight four-byte words per
+	iteration.  60 are needed in the key.  So only four are needed in the last
+	iteration.
+*/
+	jmp		2f		
+1:
+	movd	ve3, dr				// Put previous word into dissection register.
+
+	/*	Get word from eight words back (it is four words back from where E
+	 	currently points, and we use it to prepare the value to be stored
+		four words beyond where E currently points).
+	*/
+	movd	-4*4(E, offset), ve0
+
+	// Perform SubWord(dr).
+	movzx	drl, t0
+	movd	LookupS(0, t0), vt0		// Look up byte 0 in table 0.
+	movzx	drh, t0d
+	movd	LookupS(1, t0), vt1		// Look up byte 1 in table 1.
+	shr		$16, dr
+	movzx	drl, t0d
+	movd	LookupS(2, t0), vt2		// Look up byte 2 in table 2.
+	movzx	drh, t0d
+	movd	LookupS(3, t0), vt3		// Look up byte 3 in table 3.
+	pxor	vt1, vt0
+	pxor	vt3, vt2
+	pxor	vt0, ve0
+	pxor	vt2, ve0
+
+	movd	-3*4(E, offset), ve1	// Get words from eight words back.
+	movd	-2*4(E, offset), ve2
+	movd	-1*4(E, offset), ve3
+
+	// Chain to successive words.
+	movd	ve0, 4*4(E, offset)
+	pxor	ve0, ve1
+	movd	ve1, 5*4(E, offset)
+	pxor	ve1, ve2
+	movd	ve2, 6*4(E, offset)
+	pxor	ve2, ve3
+	movd	ve3, 7*4(E, offset)
+
+2:
+	add		$1, R				// Advance pointer.
+	movd	ve3, dr				// Put previous word into dissection register.
+	movzx	(R), t0d			// Get round constant.
+	movd	t0d, vt1
+	movd	0*4(E, offset), ve0	// Get word from eight words back.
+	pxor	vt1, ve0
+
+	// Perform SubWord(RotWord(dr)).
+	movzx	drl, t0
+	movd	LookupS(3, t0), vt0		// Look up byte 0 in table 3.
+	movzx	drh, t0d
+	movd	LookupS(0, t0), vt1		// Look up byte 1 in table 0.
+	shr		$16, dr
+	movzx	drl, t0d
+	movd	LookupS(1, t0), vt2		// Look up byte 2 in table 1.
+	movzx	drh, t0d
+	movd	LookupS(2, t0), vt3		// Look up byte 3 in table 2.
+	pxor	vt1, vt0
+	pxor	vt3, vt2
+	pxor	vt0, ve0
+	pxor	vt2, ve0
+
+	movd	1*4(E, offset), ve1
+	movd	2*4(E, offset), ve2
+	movd	3*4(E, offset), ve3
+
+	add		$8*4, offset
+
+	// Chain to successive words.
+	movd	ve0, 0*4(E, offset)
+	pxor	ve0, ve1
+	movd	ve1, 1*4(E, offset)
+	pxor	ve1, ve2
+	movd	ve2, 2*4(E, offset)
+	pxor	ve2, ve3
+	movd	ve3, 3*4(E, offset)
+
+	jne		1b
+
+	xor		r0, r0		// Return success.
+
+	// Pop stack and restore registers.
+	movaps	7*16(r4), %xmm7
+	movaps	6*16(r4), %xmm6
+	movaps	5*16(r4), %xmm5
+	movaps	4*16(r4), %xmm4
+	movaps	3*16(r4), %xmm3
+	movaps	2*16(r4), %xmm2
+	movaps	1*16(r4), %xmm1
+	movaps	0*16(r4), %xmm0
+	#if 0 < LocalsSize
+		add		$Padding + LocalsSize, r4
+	#endif
+	#if defined __i386__
+		pop		r7
+		pop		r6
+		pop		r5
+	#endif
+	pop		r3
+
+	ret
+
+
+#undef	Address
+#undef	Argument
+#undef	E
+#undef	K
+#undef	LocalsSize
+#undef	LookupS
+#undef	Padding
+#undef	R
+#undef	SaveSize
+#undef	STable
+#undef	StackFrame
+#undef	dr
+#undef	drh
+#undef	drl
+#undef	offset
+#undef	t0
+#undef	t0d
+#undef	ve0
+#undef	ve1
+#undef	ve2
+#undef	ve3
+#undef	vt0
+#undef	vt1
+#undef	vt2
+#undef	vt3
diff --git a/bsd/crypto/aes/i386/MakeData.c b/bsd/crypto/aes/i386/MakeData.c
new file mode 100644
index 000000000..262dc5996
--- /dev/null
+++ b/bsd/crypto/aes/i386/MakeData.c
@@ -0,0 +1,516 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define	MaxRcon	11
+
+typedef uint8_t Byte;
+typedef uint32_t Word;
+
+
+/*	In comments below, {n} designates the Galois field element represented by
+	the byte n.  See notes about Galois field multiplication in ReadMe.txt.
+
+	So 3+5 is addition of ordinary integers, and 3+5 == 8, while {3}+{5} is
+	addition in the field, and {3} + {5} = {3 XOR 5} = {6}.)
+*/
+
+
+// Define constants for languages.
+typedef enum { C, IntelAssembly } Language;
+
+
+/*	LogBase3[i] will contain the base-three logarithm of i in the 256-element
+	Galois field defined by AES.  That is, {3}**LogBase3[i] == {3}**i.
+*/
+static Byte LogBase3[256];
+
+/*	AntilogBase3[i] will contain {3}**i in the 256-element Galois field defined
+	by AES.  It contains extra elements so that the antilog of a+b can be found
+	by looking up a+b directly, without having to reduce modulo the period, for
+	0 <= a, b < 255.
+
+	(254 is the greatest value we encounter.  Each a or b we use is the
+	base-three logarithm of some element.  As a primitive root, the powers of
+	three cycle through all non-zero elements of the field, of which there are
+	255, so the exponents cover 0 to 254 before the powers repeat.)
+*/
+static Byte AntilogBase3[254+254+1];
+
+
+static void InitializeLogTables(void)
+{
+	// log({1}) is zero, so start {p} (power) at {1} and l (logarithm) at 0.
+	Byte p = 1;
+	int l = 0;
+	do
+	{
+		// Record table entries.
+		LogBase3[p] = l;
+		AntilogBase3[l] = p;
+
+		/*	Observe that {2}*{p} is {p << 1 ^ (a & 0x80 ? 0x1b : 0)}, per notes
+			in ReadMe.txt.  We produce {3}*{p}:
+
+				{3}*{p}
+					= {1}*{p} + {2}*{p}
+					= {1}*{p} + {p << 1 ^ (a & 0x80 ? 0x1b : 0)}
+					= {p ^ p << 1 ^ (p & 0x80 ? 0x1b : 0)}.
+		*/
+		p ^= p << 1 ^ (p & 0x80 ? 0x1b : 0);
+		++l;
+
+	} while (p != 1);	// Stop when we have gone around completely.
+
+	/*	The antilogarithms are periodic with a period of 255, and we want to
+		look up elements as high as 254+254 (the largest that a sum of two
+		logarithms could be), so we replicate the table beyond the first
+		period.
+	*/
+	for (l = 255; l < 254+254; ++l)
+		AntilogBase3[l] = AntilogBase3[l-255];
+}
+
+
+/*	MultiplyByte(Byte b, Byte c) returns {b}*{c}.  It requires tables that must
+	be initialized before this routine is used.
+*/
+static Byte MultiplyByte(Byte b, Byte c)
+{
+	// Calculate product by adding logarithms, but avoid logarithms of zero.
+	return b == 0 || c == 0 ? 0 : AntilogBase3[LogBase3[b] + LogBase3[c]];
+}
+
+
+// Return {0} if {b} is {0} and the multiplicative inverse of {b} otherwise.
+static Byte InverseByte(Byte b)
+{
+	return b == 0 ? 0 : AntilogBase3[255 - LogBase3[b]];
+}
+
+
+// Perform AES' SubBytes operation on a single byte.
+static Byte SubByte(Byte b)
+{
+	unsigned int r = InverseByte(b);
+
+	// Duplicate r as a proxy for a rotate operation.
+	r = r | r<<8;
+
+	// Apply the standard's affine transformation.
+	return r ^ r>>4 ^ r>>5 ^ r>>6 ^ r>>7 ^ 0x63;
+}
+
+
+// Define and populate tables for the SubBytes and InvSubBytes operations.
+static Byte SubBytesTable[256];
+static Byte InvSubBytesTable[256];
+
+
+static void InitializeSubBytesTable(void)
+{
+	for (int i = 0; i < 256; ++i)
+		SubBytesTable[i] = SubByte((Byte) i);
+}
+
+
+static void InitializeInvSubBytesTable(void)
+{
+	for (int i = 0; i < 256; ++i)
+		InvSubBytesTable[SubByte((Byte) i)] = i;
+}
+
+
+/*	Print tables for SubBytes function providing the output byte embedded in
+	various places in a word, so that the table entries can be used with
+	fewer byte manipulations.
+*/
+static void PrintSubBytesWordTable(Language language)
+{
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// SubBytes embedded in words tables.\n"
+				"const Word AESSubBytesWordTable[4][256] =\n"
+				"{\n");
+			for (int j = 0; j < 4; ++j)
+			{
+				printf("\t{\n");
+				for (int i = 0; i < 256; ++i)
+					printf("\t\t0x%08x,\n", SubBytesTable[i] << j*8);
+				printf("\t},\n");
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// SubBytes embedded in words tables.\n"
+				"\t.globl\t_AESSubBytesWordTable\n"
+				"\t.private_extern\t_AESSubBytesWordTable\n"
+				"\t.align\t2\n"
+				"_AESSubBytesWordTable:\n");
+			for (int j = 0; j < 4; ++j)
+			{
+				printf("\t// Table %d.\n", j);
+				for (int i = 0; i < 256; ++i)
+					printf("\t.long\t0x%08x\n", SubBytesTable[i] << j*8);
+			}
+			break;
+	}
+}
+
+
+/*	Print tables for InvSubBytes function providing the output byte embedded in
+	various places in a word, so that the table entries can be used with
+	fewer byte manipulations.
+*/
+static void PrintInvSubBytesWordTable(Language language)
+{
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// InvSubBytes embedded in words tables.\n"
+				"const Word AESInvSubBytesWordTable[4][256] =\n"
+				"{\n");
+			for (int j = 0; j < 4; ++j)
+			{
+				printf("\t{\n");
+				for (int i = 0; i < 256; ++i)
+					printf("\t\t0x%08x,\n", InvSubBytesTable[i] << j*8);
+				printf("\t},\n");
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// InvSubBytes embedded in words tables.\n"
+				"\t.globl\t_AESInvSubBytesWordTable\n"
+				"\t.private_extern\t_AESInvSubBytesWordTable\n"
+				"\t.align\t2\n"
+				"_AESInvSubBytesWordTable:\n");
+			for (int j = 0; j < 4; ++j)
+			{
+				printf("\t// Table %d.\n", j);
+				for (int i = 0; i < 256; ++i)
+					printf("\t.long\t0x%08x\n", InvSubBytesTable[i] << j*8);
+			}
+			break;
+	}
+}
+
+
+// Print the round constants.
+static void PrintRcon(Language language)
+{
+	union { Byte c[4]; Word w; } t = { { 1, 0, 0, 0 } };
+
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// Round constants.\n"
+				"const Byte AESRcon[] =\n"
+				"{\n"
+				"\t0,\t// Not used, included for indexing simplicity.\n");
+			for (int i = 1; i < MaxRcon; ++i)
+			{
+				printf("\t0x%02x,\n", t.w);
+				t.c[0] = MultiplyByte(0x2, t.c[0]);
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// Round constants.\n"
+				"\t.globl\t_AESRcon\n"
+				"\t.private_extern\t_AESRcon\n"
+				"_AESRcon:\n"
+				"\t.byte\t0\t// Not used, included for indexing simplicity.\n");
+			for (int i = 1; i < MaxRcon; ++i)
+			{
+				printf("\t.byte\t0x%02x\n", t.w);
+				t.c[0] = MultiplyByte(0x2, t.c[0]);
+			}
+			break;
+	}
+}
+
+
+// Print tables for the InvMixColumn operation.
+static void PrintInvMixColumnTable(Language language)
+{
+	Word T[4][256];
+
+	for (int i = 0; i < 256; ++i)
+	{
+		union { Byte b[4]; Word w; } c;
+
+		Byte s9 = MultiplyByte(0x9, i);
+		Byte sb = MultiplyByte(0xb, i);
+		Byte sd = MultiplyByte(0xd, i);
+		Byte se = MultiplyByte(0xe, i);
+
+		c.b[0] = se;
+		c.b[1] = s9;
+		c.b[2] = sd;
+		c.b[3] = sb;
+		T[0][i] = c.w;
+
+		c.b[0] = sb;
+		c.b[1] = se;
+		c.b[2] = s9;
+		c.b[3] = sd;
+		T[1][i] = c.w;
+
+		c.b[0] = sd;
+		c.b[1] = sb;
+		c.b[2] = se;
+		c.b[3] = s9;
+		T[2][i] = c.w;
+
+		c.b[0] = s9;
+		c.b[1] = sd;
+		c.b[2] = sb;
+		c.b[3] = se;
+		T[3][i] = c.w;
+	}
+
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// Tables for InvMixColumn.\n"
+				"const Word AESInvMixColumnTable[4][256] =\n"
+				"{\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t{\n");
+				for (int j = 0; j < 256; ++j)
+					printf("\t\t0x%08x,\n", T[i][j]);
+				printf("\t},\n");
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// Tables for InvMixColumn.\n"
+				"\t.globl\t_AESInvMixColumnTable\n"
+				"\t.private_extern\t_AESInvMixColumnTable\n"
+				"\t.align\t2\n"
+				"_AESInvMixColumnTable:\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t// Table %d.\n", i);
+				for (int j = 0; j < 256; ++j)
+					printf("\t.long\t0x%08x\n", T[i][j]);
+			}
+			break;
+	}
+}
+
+
+/*	Print the tables defined AES Proposal: Rijndael, amended, 9/04/2003,
+	section 5.2.1.  These combine the MixColumn and SubBytes operations.
+*/
+static void PrintEncryptTable(Language language)
+{
+	Word T[4][256];
+
+	for (int i = 0; i < 256; ++i)
+	{
+		union { Byte b[4]; Word w; } c;
+
+		Byte s1 = SubBytesTable[i];
+		Byte s2 = MultiplyByte(0x2, s1);
+		Byte s3 = s1 ^ s2;
+
+		c.b[0] = s2;
+		c.b[1] = s1;
+		c.b[2] = s1;
+		c.b[3] = s3;
+		T[0][i] = c.w;
+
+		c.b[0] = s3;
+		c.b[1] = s2;
+		//c.b[2] = s1;
+		c.b[3] = s1;
+		T[1][i] = c.w;
+
+		c.b[0] = s1;
+		c.b[1] = s3;
+		c.b[2] = s2;
+		//c.b[3] = s1;
+		T[2][i] = c.w;
+
+		//c.b[0] = s1;
+		c.b[1] = s1;
+		c.b[2] = s3;
+		c.b[3] = s2;
+		T[3][i] = c.w;
+	}
+
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// Tables for main encryption iterations.\n"
+				"const Word AESEncryptTable[4][256] =\n"
+				"{\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t{\n");
+				for (int j = 0; j < 256; ++j)
+					printf("\t\t0x%08x,\n", T[i][j]);
+				printf("\t},\n");
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// Tables for main encryption iterations.\n"
+				"\t.globl\t_AESEncryptTable\n"
+				"\t.private_extern\t_AESEncryptTable\n"
+				"\t.align\t2\n"
+				"_AESEncryptTable:\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t// Table %d.\n", i);
+				for (int j = 0; j < 256; ++j)
+					printf("\t.long\t0x%08x\n", T[i][j]);
+			}
+			break;
+	}
+}
+
+
+/*	Print the inverse tables.  These correspond to the tables above, but for
+	decyrption.  These combine the InvSubBytes and InvMixColumn operations.
+*/
+static void PrintDecryptTable(Language language)
+{
+	Word T[4][256];
+
+	for (int i = 0; i < 256; ++i)
+	{
+		union { Byte b[4]; Word w; } c;
+
+		Byte si = InvSubBytesTable[i];
+
+		Byte s9 = MultiplyByte(0x9, si);
+		Byte sb = MultiplyByte(0xb, si);
+		Byte sd = MultiplyByte(0xd, si);
+		Byte se = MultiplyByte(0xe, si);
+
+		c.b[0] = se;
+		c.b[1] = s9;
+		c.b[2] = sd;
+		c.b[3] = sb;
+		T[0][i] = c.w;
+
+		c.b[0] = sb;
+		c.b[1] = se;
+		c.b[2] = s9;
+		c.b[3] = sd;
+		T[1][i] = c.w;
+
+		c.b[0] = sd;
+		c.b[1] = sb;
+		c.b[2] = se;
+		c.b[3] = s9;
+		T[2][i] = c.w;
+
+		c.b[0] = s9;
+		c.b[1] = sd;
+		c.b[2] = sb;
+		c.b[3] = se;
+		T[3][i] = c.w;
+	}
+
+	switch (language)
+	{
+		case C:
+			printf("\n\n"
+				"// Tables for main decryption iterations.\n"
+				"const Word AESDecryptTable[4][256] =\n"
+				"{\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t{\n");
+				for (int j = 0; j < 256; ++j)
+					printf("\t\t0x%08x,\n", T[i][j]);
+				printf("\t},\n");
+			}
+			printf("};\n");
+			break;
+
+		case IntelAssembly:
+			printf("\n\n"
+				"// Tables for main decryption iterations.\n"
+				"\t.globl\t_AESDecryptTable\n"
+				"\t.private_extern\t_AESDecryptTable\n"
+				"\t.align\t2\n"
+				"_AESDecryptTable:\n");
+			for (int i = 0; i < 4; ++i)
+			{
+				printf("\t// Table %d.\n", i);
+				for (int j = 0; j < 256; ++j)
+					printf("\t.long\t0x%08x\n", T[i][j]);
+			}
+			break;
+	}
+}
+
+
+static void Usage(const char *ProgramName)
+{
+	fprintf(stderr,
+		"%s:  This program must have exactly one argument, \"C\" to generate\n"
+		"C or \"Intel\" to generate GCC i386/x86_64 assembly.\n", ProgramName);
+	exit(EXIT_FAILURE);
+}
+
+
+int main(int argc, char *argv[])
+{
+	if (argc != 2)
+		Usage(argv[0]);
+
+	Language language;
+
+	// Figure out which language to generate, C or Intel assembly.
+	if (0 == strcmp(argv[1], "C"))
+		language = C;
+	else if (0 == strcmp(argv[1], "Intel"))
+		language = IntelAssembly;
+	else
+		Usage(argv[0]);
+
+	printf("// This file was generated by " __FILE__ ".\n");
+
+	if (language == C)
+		printf("\n\n#include \"AES.h\"\n");
+
+	if (language == IntelAssembly)
+		printf("\n\n\t.const\n");
+
+	InitializeLogTables();
+	InitializeSubBytesTable();
+	InitializeInvSubBytesTable();
+
+	PrintRcon(language);
+	PrintInvMixColumnTable(language);
+	PrintEncryptTable(language);
+	PrintDecryptTable(language);
+	PrintSubBytesWordTable(language);
+	PrintInvSubBytesWordTable(language);
+
+	return 0;
+}
diff --git a/bsd/crypto/aes/i386/Makefile b/bsd/crypto/aes/i386/Makefile
index f116db347..851f7b2ac 100644
--- a/bsd/crypto/aes/i386/Makefile
+++ b/bsd/crypto/aes/i386/Makefile
@@ -7,28 +7,26 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-INSTINC_SUBDIRS = \
+include $(MakeInc_cmd)
+include $(MakeInc_def)
 
-INSTINC_SUBDIRS_PPC = \
+INSTINC_SUBDIRS = \
 
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
-PRIVATE_DATAFILES = \
-	aesopt.h edefs.h
-
 INSTALL_MI_DIR = crypto
 
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
-INSTALL_KF_MI_LIST =
+PRIVATE_DATAFILES = \
+	aesxts.h
 
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
+# /System/Library/Frameworks/Kernel.framework/PrivateHeaders
+INSTALL_KF_MD_LCL_LIST = ${PRIVATE_DATAFILES}
 
 include $(MakeInc_rule)
 include $(MakeInc_dir)
diff --git a/bsd/crypto/aes/i386/ReadMe.txt b/bsd/crypto/aes/i386/ReadMe.txt
new file mode 100644
index 000000000..7ac833117
--- /dev/null
+++ b/bsd/crypto/aes/i386/ReadMe.txt
@@ -0,0 +1,22 @@
+This directory contains a hybrid AES implementation.  The core AES routines
+(the actual encryption, decryption, and key expansion) are in:
+
+	AES.s
+	Data.mk
+	Data.s
+	EncryptDecrypt.s
+	ExpandKeyForDecryption.s
+	ExpandKeyForEncryption.s
+	MakeData.c
+
+Although the above files do not explicitly include aes.h, they confirm to
+certain things defined in it, notably the aes_rval type and the layout of the
+aes_encrypt_ctx and aes_decrypt_ctx structures.  These must be kept
+compatibility; the definitions of ContextKey and ContextKeyLength in AES.s must
+match the offsets of the key ("ks") and key_length ("inf") members of
+aes_encrypt_ctx and aes_decrypt_ctx.  (For some reason, aes_inf is a union that
+is written as a 32-bit integer and read as an 8-bit integer.  I do not know
+why but have reproduced that behavior in the new implementation.)
+
+aes_modes.c extends the API, most notably by implementing CBC mode using the
+basic AES block encryption.  It uses aesopt.h and edefs.h.
diff --git a/bsd/crypto/aes/i386/aes_crypt_hw.s b/bsd/crypto/aes/i386/aes_crypt_hw.s
new file mode 100644
index 000000000..2edc3e2fd
--- /dev/null
+++ b/bsd/crypto/aes/i386/aes_crypt_hw.s
@@ -0,0 +1,472 @@
+/* 	This files defines _aes_encrypt_hw and _aes_decrypt_hw --- Intel Westmere HW AES-based implementation
+	of _aes_encrypt and _aes_decrypt. 
+
+	These 2 functions SHOULD BE entried ONLY after the AES HW is verified to be available. 
+	They SHOULD NOT be called without AES HW detection. It might cause xnu to crash.
+
+	The AES HW is detected 1st thing in 
+		_aes_encrypt (EncryptDecrypt.s) 
+		_aes_decrypt (EncryptDecrypt.s)
+	and, if AES HW is detected, branch without link (ie, jump) to the functions here.
+
+	The implementation here follows the examples in an Intel White Paper
+	"Intel Advanced Encryption Standard (AES) Instruction Set" Rev.2 01
+
+	Note: Rev. 03 Final 2010 01 26 is available. Looks like some code change from Rev.2 01
+
+	cclee 3-13-10
+*/
+
+    .text
+    .align 4,0x90
+.globl	_aes_encrypt_hw
+_aes_encrypt_hw:
+
+#if	defined	__i386__	
+	movl	4(%esp), %eax	// in
+	movl	12(%esp), %edx	// ctx
+	movl	8(%esp), %ecx	// out
+
+	#define	LOCAL_SIZE	(12+16+16)		// 16-byte align (-4 for return address) + 16 (xmm0) + 16 (xmm1)
+	#define	in		%eax
+	#define	ctx		%edx
+	#define	out		%ecx
+	#define	r13		%esp
+
+#else		// x86_64
+
+	#define	LOCAL_SIZE	(8+16+16)		// 16-byte align (-8 for return address) + 16 (xmm0) + 16 (xmm1)
+	#define	in			%rdi
+	#define	ctx			%rdx
+	#define	out			%rsi
+	#define	r13			%rsp
+
+#endif		// i386 or x86_64
+
+#ifdef KERNEL
+	sub		$LOCAL_SIZE, r13
+	movaps	%xmm0, (r13)
+#endif
+	movups	(in), %xmm0
+
+	// key length identification
+	movl	240(ctx), %eax			// key length
+	cmp		$160, %eax
+	je		L_AES_128
+	cmp		$192, %eax
+	je		L_AES_192
+	cmp		$224, %eax
+	je		L_AES_256
+	mov		$-1, %eax					// return ERROR
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+L_AES_128:
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		0f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	(ctx), %xmm0
+	aesenc	16(ctx), %xmm0
+	aesenc	32(ctx), %xmm0
+	aesenc	48(ctx), %xmm0
+	aesenc	64(ctx), %xmm0
+	aesenc	80(ctx), %xmm0
+	aesenc	96(ctx), %xmm0
+	aesenc	112(ctx), %xmm0
+	aesenc	128(ctx), %xmm0
+	aesenc	144(ctx), %xmm0
+	aesenclast	160(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+0:										// special case expanded key is not 16-byte aligned	
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	160(ctx), %xmm1
+	aesenclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+L_AES_192:
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		0f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	(ctx), %xmm0
+	aesenc	16(ctx), %xmm0
+	aesenc	32(ctx), %xmm0
+	aesenc	48(ctx), %xmm0
+	aesenc	64(ctx), %xmm0
+	aesenc	80(ctx), %xmm0
+	aesenc	96(ctx), %xmm0
+	aesenc	112(ctx), %xmm0
+	aesenc	128(ctx), %xmm0
+	aesenc	144(ctx), %xmm0
+	aesenc	160(ctx), %xmm0
+	aesenc	176(ctx), %xmm0
+	aesenclast	192(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+0:										// special case expanded key is not 16-byte aligned	
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	160(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	176(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	192(ctx), %xmm1
+	aesenclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+L_AES_256:
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		0f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	(ctx), %xmm0
+	aesenc	16(ctx), %xmm0
+	aesenc	32(ctx), %xmm0
+	aesenc	48(ctx), %xmm0
+	aesenc	64(ctx), %xmm0
+	aesenc	80(ctx), %xmm0
+	aesenc	96(ctx), %xmm0
+	aesenc	112(ctx), %xmm0
+	aesenc	128(ctx), %xmm0
+	aesenc	144(ctx), %xmm0
+	aesenc	160(ctx), %xmm0
+	aesenc	176(ctx), %xmm0
+	aesenc	192(ctx), %xmm0
+	aesenc	208(ctx), %xmm0
+	aesenclast	224(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+0:										// special case expanded key is not 16-byte aligned	
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	160(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	176(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	192(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	208(ctx), %xmm1
+	aesenc	%xmm1, %xmm0
+	movups	224(ctx), %xmm1
+	aesenclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+
+    .text
+    .align 4,0x90
+.globl	_aes_decrypt_hw
+_aes_decrypt_hw:
+
+#if	defined	__i386__	
+	movl	4(%esp), %eax	// in
+	movl	12(%esp), %edx	// ctx
+	movl	8(%esp), %ecx	// out
+
+#endif
+
+#ifdef KERNEL
+	sub		$LOCAL_SIZE, r13
+	movaps	%xmm0, (r13)
+#endif
+	movups	(in), %xmm0
+
+	// key length identification
+	movl	240(ctx), %eax			// key length
+	cmp		$160, %eax
+	je		0f						// AES-128
+	cmp		$192, %eax
+	je		1f						// AES-192
+	cmp		$224, %eax
+	je		2f						// AES-256
+	mov		$-1, %eax				// return ERROR
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+0:									// AES-128
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		9f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	160(ctx), %xmm0
+	aesdec	144(ctx), %xmm0
+	aesdec	128(ctx), %xmm0
+	aesdec	112(ctx), %xmm0
+	aesdec	96(ctx), %xmm0
+	aesdec	80(ctx), %xmm0
+	aesdec	64(ctx), %xmm0
+	aesdec	48(ctx), %xmm0
+	aesdec	32(ctx), %xmm0
+	aesdec	16(ctx), %xmm0
+	aesdeclast	(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+9:										// AES-128 Decrypt : special case expanded key is not 16-byte aligned 
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	160(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	(ctx), %xmm1
+	aesdeclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+1:								// AES-192
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		9f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	192(ctx), %xmm0
+	aesdec	176(ctx), %xmm0
+	aesdec	160(ctx), %xmm0
+	aesdec	144(ctx), %xmm0
+	aesdec	128(ctx), %xmm0
+	aesdec	112(ctx), %xmm0
+	aesdec	96(ctx), %xmm0
+	aesdec	80(ctx), %xmm0
+	aesdec	64(ctx), %xmm0
+	aesdec	48(ctx), %xmm0
+	aesdec	32(ctx), %xmm0
+	aesdec	16(ctx), %xmm0
+	aesdeclast	(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+9:										// AES-192 Decrypt : special case expanded key is not 16-byte aligned 
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	192(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	176(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	160(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	(ctx), %xmm1
+	aesdeclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
+2:							// AES-256
+	testb	$15, %dl					// check whether expanded key is 16-byte aligned
+	jne		9f							// if not 16-byte aligned, aesenc xmm, m128 won't work	
+	pxor	224(ctx), %xmm0
+	aesdec	208(ctx), %xmm0
+	aesdec	192(ctx), %xmm0
+	aesdec	176(ctx), %xmm0
+	aesdec	160(ctx), %xmm0
+	aesdec	144(ctx), %xmm0
+	aesdec	128(ctx), %xmm0
+	aesdec	112(ctx), %xmm0
+	aesdec	96(ctx), %xmm0
+	aesdec	80(ctx), %xmm0
+	aesdec	64(ctx), %xmm0
+	aesdec	48(ctx), %xmm0
+	aesdec	32(ctx), %xmm0
+	aesdec	16(ctx), %xmm0
+	aesdeclast	(ctx), %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+9:										// AES-256 Decrypt : special case expanded key is not 16-byte aligned 
+#ifdef	KERNEL
+	movaps	%xmm1, 16(r13)				// save xmm1 into stack
+#endif
+	movups	224(ctx), %xmm1
+	pxor	%xmm1, %xmm0
+	movups	208(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	192(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	176(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	160(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	144(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	128(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	112(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	96(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	80(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	64(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	48(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	32(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	16(ctx), %xmm1
+	aesdec	%xmm1, %xmm0
+	movups	(ctx), %xmm1
+	aesdeclast	%xmm1, %xmm0
+	xorl	%eax, %eax
+	movups	%xmm0, (out)
+#ifdef KERNEL
+	movaps	(r13), %xmm0	
+	movaps	16(r13), %xmm1	
+	add		$LOCAL_SIZE, r13
+#endif
+	ret
+
diff --git a/bsd/crypto/aes/i386/aes_key_hw.s b/bsd/crypto/aes/i386/aes_key_hw.s
new file mode 100644
index 000000000..434fa553c
--- /dev/null
+++ b/bsd/crypto/aes/i386/aes_key_hw.s
@@ -0,0 +1,405 @@
+/* 	This files defines _aes_encrypt_key_hw and _aes_decrypt_key_hw --- Intel Westmere HW AES-based implementation
+	of _aes_encrypt_key and _aes_decrypt_key. 
+
+	These 2 functions SHOULD BE entried ONLY after the AES HW is verified to be available. 
+	They SHOULD NOT be called without AES HW detection. It might cause xnu to crash.
+
+	The AES HW is detected 1st thing in 
+		_aes_encrypt_key (ExpandKeyForEncryption.s) 
+		_aes_decrypt_key (ExpandKeyForDecryption.s)
+	and, if AES HW is detected, branch without link (ie, jump) to the functions here.
+
+	The implementation here follows the examples in an Intel White Paper
+	"Intel Advanced Encryption Standard (AES) Instruction Set" Rev.2 01
+
+	Note: Rev. 03 Final 2010 01 26 is available. Looks like some code change from Rev.2 01
+
+	cclee 3-13-10
+*/
+
+	.text	
+	.align	4,0x90
+
+	// hw_aes_encrypt_key(key, klen, hwectx);
+	// klen = 16, 24, or 32, or (128/192/256)
+
+	.globl	_aes_encrypt_key_hw
+_aes_encrypt_key_hw:
+
+#ifdef	__i386__
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx
+	push	%edi	
+	mov		8(%ebp), %eax		// pointer to key
+	mov		12(%ebp), %ebx		// klen
+	mov		16(%ebp), %edi		// ctx
+	#define	pkey	%eax
+	#define	klen	%ebx
+	#define	ctx		%edi
+	#define	sp		%esp
+	#define	cx		%ecx
+#else
+	#define	pkey	%rdi
+	#define	klen	%rsi
+	#define	ctx		%rdx
+	#define	sp		%rsp
+	#define	cx		%rcx
+	push	%rbp
+	mov		%rsp, %rbp
+#endif
+
+#ifdef	KERNEL
+	// for xmm registers save and restore
+	sub		$(16*4), sp
+#endif
+
+	cmp		$32, klen
+	jg		0f					// klen>32
+	shl		$3, klen			// convert 16/24/32 to 128/192/256
+0:
+
+	cmp		$128, klen			// AES-128 ?
+	je		L_AES_128_Encrypt_Key
+	cmp		$192, klen			// AES-192 ?
+	je		L_AES_192_Encrypt_Key
+	cmp		$256, klen			// AES-256 ?
+	je		L_AES_256_Encrypt_Key
+	mov		$1, %eax			// return error for wrong klen 
+L_Encrypt_Key_2_return:
+#ifdef	KERNEL
+	add		$(16*4), sp
+#endif
+#ifdef	__i386__
+	pop		%edi
+	pop		%ebx
+#endif
+	leave
+	ret
+
+L_AES_128_Encrypt_Key:
+#ifdef	KERNEL
+	// save xmm registers
+	movaps	%xmm1, (sp)
+	movaps	%xmm2, 16(sp)
+	movaps	%xmm3, 32(sp)
+#endif	// KERNEL
+
+	movl	$160, 240(ctx)		// write expanded key length to ctx
+	xor		cx, cx
+
+	movups	(pkey), %xmm1
+	movups	%xmm1, (ctx)
+	aeskeygenassist	$1, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$2, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$4, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$8, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x10, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x20, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x40, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x80, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x1b, %xmm1, %xmm2
+	call	L_key_expansion_128
+	aeskeygenassist	$0x36, %xmm1, %xmm2
+	call	L_key_expansion_128
+
+#ifdef	KERNEL
+	// restore xmm registers
+	movaps	(sp), %xmm1
+	movaps	16(sp), %xmm2
+	movaps	32(sp), %xmm3
+#endif	// KERNEL
+	xor		%eax, %eax			// return 0 for success
+	jmp		L_Encrypt_Key_2_return
+
+	.align	4, 0x90
+L_key_expansion_128:
+	pshufd	$0xff, %xmm2, %xmm2
+	movaps	%xmm1, %xmm3
+	pslldq	$4, %xmm3
+	pxor	%xmm3, %xmm1
+	movaps	%xmm1, %xmm3
+	pslldq	$4, %xmm3
+	pxor	%xmm3, %xmm1
+	movaps	%xmm1, %xmm3
+	pslldq	$4, %xmm3
+	pxor	%xmm3, %xmm1
+	pxor	%xmm2, %xmm1
+	add		$16, cx
+	movups	%xmm1, (ctx, cx)
+	ret
+
+L_AES_192_Encrypt_Key:
+#ifdef	KERNEL
+	// save xmm registers
+	movaps	%xmm1, (sp)
+	movaps	%xmm2, 16(sp)
+	movaps	%xmm3, 32(sp)
+	movaps	%xmm4, 48(sp)
+#endif	// KERNEL
+	movl	$192, 240(ctx)		// write expanded key length to ctx
+
+	movups	(pkey), %xmm1
+	movq	16(pkey), %xmm3
+
+	movups	%xmm1, (ctx)
+	movq	%xmm3, 16(ctx)
+
+	lea		24(ctx), cx
+
+	aeskeygenassist	$1, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$2, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$4, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$8, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$0x10, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$0x20, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$0x40, %xmm3, %xmm2
+	call	L_key_expansion_192
+	aeskeygenassist	$0x80, %xmm3, %xmm2
+	call	L_key_expansion_192
+
+#ifdef	KERNEL
+	// restore xmm registers
+	movaps	(sp), %xmm1
+	movaps	16(sp), %xmm2
+	movaps	32(sp), %xmm3
+	movaps	48(sp), %xmm4
+#endif	// KERNEL
+	xor		%eax, %eax			// return 0 for success
+	jmp		L_Encrypt_Key_2_return
+
+	.align	4, 0x90
+L_key_expansion_192:
+	pshufd	$0x55, %xmm2, %xmm2
+
+	movaps	%xmm1, %xmm4
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pxor	%xmm2, %xmm1
+
+	pshufd	$0xff, %xmm1, %xmm2
+
+	movaps	%xmm3, %xmm4
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm3
+	pxor	%xmm2, %xmm3
+
+	movups	%xmm1, (cx)
+	movq	%xmm3, 16(cx)
+
+	add		$24, cx
+	ret
+
+L_AES_256_Encrypt_Key:
+#ifdef	KERNEL
+	// save xmm registers
+	movaps	%xmm1, (sp)
+	movaps	%xmm2, 16(sp)
+	movaps	%xmm3, 32(sp)
+	movaps	%xmm4, 48(sp)
+#endif	// KERNEL
+	movl	$224, 240(ctx)		// write expanded key length to ctx
+
+	movups	(pkey), %xmm1
+	movups	16(pkey), %xmm3
+	movups	%xmm1, (ctx)
+	movups	%xmm3, 16(ctx)
+
+	lea		32(ctx), cx
+
+	aeskeygenassist	$1, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$2, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$4, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$8, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$0x10, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$0x20, %xmm3, %xmm2
+	call	L_key_expansion_256
+	aeskeygenassist	$0x40, %xmm3, %xmm2
+	call	L_key_expansion_256_final
+
+#ifdef	KERNEL
+	// restore xmm registers
+	movaps	(sp), %xmm1
+	movaps	16(sp), %xmm2
+	movaps	32(sp), %xmm3
+	movaps	48(sp), %xmm4
+#endif	// KERNEL
+	xor		%eax, %eax			// return 0 for success
+	jmp		L_Encrypt_Key_2_return
+
+	.align	4, 0x90
+L_key_expansion_256:
+
+	pshufd	$0xff, %xmm2, %xmm2
+
+	movaps	%xmm1, %xmm4
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pxor	%xmm2, %xmm1
+
+	movups	%xmm1, (cx)
+
+	aeskeygenassist	$0, %xmm1, %xmm4
+
+	pshufd	$0xaa, %xmm4, %xmm2
+
+	movaps	%xmm3, %xmm4
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm3
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm3
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm3
+	pxor	%xmm2, %xmm3
+
+	movups	%xmm3, 16(cx)
+
+	add		$32, cx
+	ret
+
+	.align	4, 0x90
+L_key_expansion_256_final:
+
+	pshufd	$0xff, %xmm2, %xmm2
+
+	movaps	%xmm1, %xmm4
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pslldq	$4, %xmm4
+
+	pxor	%xmm4, %xmm1
+	pxor	%xmm2, %xmm1
+
+	movups	%xmm1, (cx)
+	ret 
+
+// _aes_decrypt_key_hw is implemented as
+// 	1. call _aes_encrypt_key_hw
+// 	2. use aesimc to convert the expanded round keys (except the 1st and last round keys)
+
+	.text	
+	.align	4, 0x90
+	.globl	_aes_decrypt_key_hw
+_aes_decrypt_key_hw:
+
+#ifdef	__i386__
+
+	push	%ebp
+	mov		%esp, %ebp
+	sub		$(8+16), %esp
+
+	// copy input arguments for calling aes_decrypt_key_hw
+
+	mov		8(%ebp), %eax
+	mov		%eax, (%esp)
+	mov		12(%ebp), %eax
+	mov		%eax, 4(%esp)
+	mov		16(%ebp), %eax
+	mov		%eax, 8(%esp)
+
+#else
+
+	push	%rbp
+	mov		%rsp, %rbp
+	sub		$16, %rsp
+
+	// calling arguments %rdi/%rsi/%rdx will be used for encrypt_key 
+	// %rdx (ctx) will return unchanged
+	// %rsi (klen) will (<<3) if <= 32
+
+#endif
+	call	_aes_encrypt_key_hw
+	cmp		$0, %eax
+	je		L_decrypt_inv
+L_decrypt_almost_done:
+#ifdef	__i386__
+	add		$(8+16), %esp
+#else
+	add		$16, %rsp
+#endif
+	leave
+	ret
+
+L_decrypt_inv:
+#ifdef	KERNEL
+	movaps	%xmm0, (sp)
+#endif
+
+#ifdef	__i386__	
+	#undef	klen
+	#undef	ctx	
+	mov     12(%ebp), %eax      // klen
+    mov     16(%ebp), %edx      // ctx
+	#define	klen	%eax
+	#define	ctx		%edx
+	cmp		$32, klen
+	jg		0f					// klen>32
+	shl		$3, klen			// convert 16/24/32 to 128/192/256
+0:
+#endif
+
+	mov		$9, cx				// default is AES-128
+	cmp		$128, klen
+	je		L_Decrypt_Key
+	add		$2, cx
+	cmp		$192, klen
+	je		L_Decrypt_Key
+	add		$2, cx 
+
+L_Decrypt_Key:
+	add		$16, ctx
+	movups	(ctx), %xmm0
+	aesimc	%xmm0, %xmm0
+	movups	%xmm0, (ctx)
+	sub		$1, cx
+	jg		L_Decrypt_Key
+
+#ifdef	KERNEL
+	movaps	(sp), %xmm0
+#endif
+#ifdef	__i386__
+	xor		%eax, %eax
+#endif
+	jmp		L_decrypt_almost_done
+
diff --git a/bsd/crypto/aes/i386/aes_modes.c b/bsd/crypto/aes/i386/aes_modes.c
deleted file mode 100644
index fd8b1401b..000000000
--- a/bsd/crypto/aes/i386/aes_modes.c
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 31/01/2006
-
- These subroutines implement multiple block AES modes for ECB, CBC, CFB,
- OFB and CTR encryption,  The code provides support for the VIA Advanced 
- Cryptography Engine (ACE).
-
- NOTE: In the following subroutines, the AES contexts (ctx) must be
- 16 byte aligned if VIA ACE is being used
-*/
-
-//#include <memory.h>
-#include <kern/assert.h>
-
-#include "aesopt.h"
-
-#if defined( AES_MODES )
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#if defined( _MSC_VER ) && ( _MSC_VER > 800 )
-#pragma intrinsic(memcpy)
-#define in_line __inline
-#else
-#define in_line
-#endif
-
-#define BFR_BLOCKS      8
-
-/* These values are used to detect long word alignment in order to */
-/* speed up some buffer operations. This facility may not work on  */
-/* some machines so this define can be commented out if necessary  */
-
-#define FAST_BUFFER_OPERATIONS
-#pragma warning( disable : 4311 4312 )
-
-#define lp08(x)         ((uint_8t*)(x))
-#define lp32(x)         ((uint_32t*)(x))
-#define addr_mod_04(x)	((unsigned long)(x) & 3)
-#define addr_mod_16(x)  ((unsigned long)(x) & 15)
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-#include "via_ace.h"
-
-#pragma pack(16)
-
-aligned_array(unsigned long,    enc_gen_table, 12, 16) =    NEH_ENC_GEN_DATA;
-aligned_array(unsigned long,   enc_load_table, 12, 16) =   NEH_ENC_LOAD_DATA;
-aligned_array(unsigned long, enc_hybrid_table, 12, 16) = NEH_ENC_HYBRID_DATA;
-aligned_array(unsigned long,    dec_gen_table, 12, 16) =    NEH_DEC_GEN_DATA;
-aligned_array(unsigned long,   dec_load_table, 12, 16) =   NEH_DEC_LOAD_DATA;
-aligned_array(unsigned long, dec_hybrid_table, 12, 16) = NEH_DEC_HYBRID_DATA;
-
-/* NOTE: These control word macros must only be used after  */
-/* a key has been set up because they depend on key size    */
-
-#if NEH_KEY_TYPE == NEH_LOAD
-#define kd_adr(c)   ((uint_8t*)(c)->ks)
-#elif NEH_KEY_TYPE == NEH_GENERATE
-#define kd_adr(c)   ((uint_8t*)(c)->ks + (c)->inf.b[0])
-#else
-#define kd_adr(c)   ((uint_8t*)(c)->ks + ((c)->inf.b[0] == 160 ? 160 : 0))
-#endif
-
-#else
-
-#define aligned_array(type, name, no, stride) type name[no]
-#define aligned_auto(type, name, no, stride)  type name[no]
-
-#endif
-
-#if defined( _MSC_VER ) && _MSC_VER > 1200
-
-#define via_cwd(cwd, ty, dir, len) unsigned long* cwd = (dir##_##ty##_table + ((len - 128) >> 4)) 
-
-#else
-
-#define via_cwd(cwd, ty, dir, len)				\
-    aligned_auto(unsigned long, cwd, 4, 16);	\
-    cwd[1] = cwd[2] = cwd[3] = 0;				\
-    cwd[0] = neh_##dir##_##ty##_key(len)
-
-#endif
-
-/* implemented in case of wrong call for fixed tables */
-void gen_tabs(void)
-{
-}
-
-aes_rval aes_mode_reset(aes_encrypt_ctx ctx[1])
-{
-    ctx->inf.b[2] = 0; 
-    return 0;
-}
-
-aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_encrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return 1;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint_8t *ksp = (uint_8t*)(ctx->ks);
-        via_cwd(cwd, hybrid, enc, 2* ctx->inf.b[0] - 192);	
-
-        if(addr_mod_16(ctx))
-            return 1;
-
-        if(!addr_mod_16(ibuf) && !addr_mod_16(obuf))
-        {
-            via_ecb_op5(ksp,cwd,ibuf,obuf,nb);
-        }
-        else
-        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint_8t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (addr_mod_16(ibuf) ? buf : (uint_8t*)ibuf);
-                op = (addr_mod_16(obuf) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_ecb_op5(ksp,cwd,ip,op,m);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        return 0;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-    while(nb--)
-    {
-        aes_encrypt(ibuf, obuf, ctx);
-        ibuf += AES_BLOCK_SIZE;
-        obuf += AES_BLOCK_SIZE;
-    }
-#endif
-    return 0;
-}
-
-aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, const aes_decrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return 1;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint_8t *ksp = kd_adr(ctx);
-        via_cwd(cwd, hybrid, dec, 2* ctx->inf.b[0] - 192);	
-
-        if(addr_mod_16(ctx))
-            return 1;
-
-        if(!addr_mod_16(ibuf) && !addr_mod_16(obuf))
-        {
-            via_ecb_op5(ksp,cwd,ibuf,obuf,nb);
-        }
-        else
-        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint_8t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (addr_mod_16(ibuf) ? buf : (uint_8t*)ibuf);
-                op = (addr_mod_16(obuf) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_ecb_op5(ksp,cwd,ip,op,m);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        return 0;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-    while(nb--)
-    {
-        aes_decrypt(ibuf, obuf, ctx);
-        ibuf += AES_BLOCK_SIZE;
-        obuf += AES_BLOCK_SIZE;
-    }
-#endif
-    return 0;
-}
-
-aes_rval aes_cbc_encrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_encrypt_ctx ctx[1])
-{   int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return 1;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint_8t *ksp = (uint_8t*)(ctx->ks), *ivp = iv;
-        aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
-        via_cwd(cwd, hybrid, enc, 2* ctx->inf.b[0] - 192);	
-
-        if(addr_mod_16(ctx))
-            return 1;
-
-        if(addr_mod_16(iv))   /* ensure an aligned iv */
-        {
-            ivp = liv;
-            memcpy(liv, iv, AES_BLOCK_SIZE);
-        }
-
-        if(!addr_mod_16(ibuf) && !addr_mod_16(obuf) && !addr_mod_16(iv))
-        {
-            via_cbc_op7(ksp,cwd,ibuf,obuf,nb,ivp,ivp);
-        }
-        else
-        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint_8t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (addr_mod_16(ibuf) ? buf : (uint_8t*)ibuf);
-                op = (addr_mod_16(obuf) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_cbc_op7(ksp,cwd,ip,op,m,ivp,ivp);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        if(iv != ivp)
-            memcpy(iv, ivp, AES_BLOCK_SIZE);
-
-        return 0;
-    }
-
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-# ifdef FAST_BUFFER_OPERATIONS
-    if(!addr_mod_04(ibuf) && !addr_mod_04(iv))
-        while(nb--)
-        {
-            lp32(iv)[0] ^= lp32(ibuf)[0];
-            lp32(iv)[1] ^= lp32(ibuf)[1];
-            lp32(iv)[2] ^= lp32(ibuf)[2];
-            lp32(iv)[3] ^= lp32(ibuf)[3];
-            aes_encrypt(iv, iv, ctx);
-            memcpy(obuf, iv, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-    else
-# endif
-        while(nb--)
-        {
-            iv[ 0] ^= ibuf[ 0]; iv[ 1] ^= ibuf[ 1];
-            iv[ 2] ^= ibuf[ 2]; iv[ 3] ^= ibuf[ 3];
-            iv[ 4] ^= ibuf[ 4]; iv[ 5] ^= ibuf[ 5];
-            iv[ 6] ^= ibuf[ 6]; iv[ 7] ^= ibuf[ 7];
-            iv[ 8] ^= ibuf[ 8]; iv[ 9] ^= ibuf[ 9];
-            iv[10] ^= ibuf[10]; iv[11] ^= ibuf[11];
-            iv[12] ^= ibuf[12]; iv[13] ^= ibuf[13];
-            iv[14] ^= ibuf[14]; iv[15] ^= ibuf[15];
-            aes_encrypt(iv, iv, ctx);
-            memcpy(obuf, iv, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-#endif
-    return 0;
-}
-
-aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
-					 unsigned char *out_blk, const aes_encrypt_ctx cx[1])
-{
-		unsigned char tmp_iv[16];
-		int i;
-		
-		for (i = 0; i < 16; i++)
-			tmp_iv[i] = *(in_iv + i);
-		
-		return aes_cbc_encrypt(in_blk, out_blk, num_blk<<4, tmp_iv, cx);
-
-}
-
-aes_rval aes_cbc_decrypt(const unsigned char *ibuf, unsigned char *obuf,
-                    int len, unsigned char *iv, const aes_decrypt_ctx ctx[1])
-{   unsigned char tmp[AES_BLOCK_SIZE];
-    int nb = len >> 4;
-
-    if(len & (AES_BLOCK_SIZE - 1))
-        return 1;
-
-#if defined( USE_VIA_ACE_IF_PRESENT )
-
-    if(ctx->inf.b[1] == 0xff)
-    {   uint_8t *ksp = kd_adr(ctx), *ivp = iv;
-        aligned_auto(uint_8t, liv, AES_BLOCK_SIZE, 16);
-        via_cwd(cwd, hybrid, dec, 2* ctx->inf.b[0] - 192);	
-
-        if(addr_mod_16(ctx))
-            return 1;
-
-        if(addr_mod_16(iv))   /* ensure an aligned iv */
-        {
-            ivp = liv;
-            memcpy(liv, iv, AES_BLOCK_SIZE);
-        }
-
-        if(!addr_mod_16(ibuf) && !addr_mod_16(obuf) && !addr_mod_16(iv))
-        {
-            via_cbc_op6(ksp,cwd,ibuf,obuf,nb,ivp);
-        }
-        else
-        {   aligned_auto(uint_8t, buf, BFR_BLOCKS * AES_BLOCK_SIZE, 16);
-            uint_8t *ip, *op;
-
-            while(nb)
-            {
-                int m = (nb > BFR_BLOCKS ? BFR_BLOCKS : nb);
-
-                ip = (addr_mod_16(ibuf) ? buf : (uint_8t*)ibuf);
-                op = (addr_mod_16(obuf) ? buf : obuf);
-
-                if(ip != ibuf)
-                    memcpy(buf, ibuf, m * AES_BLOCK_SIZE);
-
-                via_cbc_op6(ksp,cwd,ip,op,m,ivp);
-
-                if(op != obuf)
-                    memcpy(obuf, buf, m * AES_BLOCK_SIZE);
-
-                ibuf += m * AES_BLOCK_SIZE;
-                obuf += m * AES_BLOCK_SIZE;
-                nb -= m;
-            }
-        }
-
-        if(iv != ivp)
-            memcpy(iv, ivp, AES_BLOCK_SIZE);
-
-        return 0;
-    }
-#endif
-
-#if !defined( ASSUME_VIA_ACE_PRESENT )
-# ifdef FAST_BUFFER_OPERATIONS
-    if(!addr_mod_04(obuf) && !addr_mod_04(iv))
-        while(nb--)
-        {
-            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
-            aes_decrypt(ibuf, obuf, ctx);
-            lp32(obuf)[0] ^= lp32(iv)[0];
-            lp32(obuf)[1] ^= lp32(iv)[1];
-            lp32(obuf)[2] ^= lp32(iv)[2];
-            lp32(obuf)[3] ^= lp32(iv)[3];
-            memcpy(iv, tmp, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-    else
-# endif
-        while(nb--)
-        {
-            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
-            aes_decrypt(ibuf, obuf, ctx);
-            obuf[ 0] ^= iv[ 0]; obuf[ 1] ^= iv[ 1];
-            obuf[ 2] ^= iv[ 2]; obuf[ 3] ^= iv[ 3];
-            obuf[ 4] ^= iv[ 4]; obuf[ 5] ^= iv[ 5];
-            obuf[ 6] ^= iv[ 6]; obuf[ 7] ^= iv[ 7];
-            obuf[ 8] ^= iv[ 8]; obuf[ 9] ^= iv[ 9];
-            obuf[10] ^= iv[10]; obuf[11] ^= iv[11];
-            obuf[12] ^= iv[12]; obuf[13] ^= iv[13];
-            obuf[14] ^= iv[14]; obuf[15] ^= iv[15];
-            memcpy(iv, tmp, AES_BLOCK_SIZE);
-            ibuf += AES_BLOCK_SIZE;
-            obuf += AES_BLOCK_SIZE;
-        }
-#endif
-    return 0;
-}
-
-aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk,
-					 unsigned char *out_blk, const aes_decrypt_ctx cx[1])
-{
-		unsigned char tmp_iv[16];
-		int i;
-		
-		for (i = 0; i < 16; i++)
-			tmp_iv[i] = *(in_iv + i);
-		
-		return aes_cbc_decrypt(in_blk, out_blk, num_blk<<4, tmp_iv, cx);
-
-}
-
-
-#if defined(__cplusplus)
-}
-#endif
-#endif
diff --git a/bsd/crypto/aes/i386/aes_modes_asm.s b/bsd/crypto/aes/i386/aes_modes_asm.s
new file mode 100644
index 000000000..3b0f29aa1
--- /dev/null
+++ b/bsd/crypto/aes/i386/aes_modes_asm.s
@@ -0,0 +1,420 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 31/01/2006
+
+ These subroutines implement multiple block AES modes for ECB, CBC, CFB,
+ OFB and CTR encryption,  The code provides support for the VIA Advanced 
+ Cryptography Engine (ACE).
+
+ NOTE: In the following subroutines, the AES contexts (ctx) must be
+ 16 byte aligned if VIA ACE is being used
+*/
+
+/* modified 3/5/10 cclee */
+/* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */
+/* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */
+
+/* add code comments/description and HW AES detection and execution branch cclee 3-13-10 */
+
+#ifdef KERNEL
+#include <i386/cpu_capabilities.h> 	// to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
+#else
+#include <System/i386/cpu_capabilities.h> 	// to use __cpu_capabilities&kHasAES to detect Intel Westmere AES HW
+#endif
+
+#if 0
+
+// TODO:
+// aes_ecb_encrypt and aes_ecb_decrypt are not present in gen/aescrypt.c
+// would add the implementation if needed
+// they are now compiled from aes_modes.c
+
+aes_rval aes_ecb_encrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_encrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1)) return 1;
+    while(nb--) {
+        aes_encrypt(ibuf, obuf, ctx);
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+    return 0;
+}
+
+aes_rval aes_ecb_decrypt(const unsigned char *ibuf, unsigned char *obuf,
+                    int len, const aes_decrypt_ctx ctx[1])
+{   int nb = len >> 4;
+
+    if(len & (AES_BLOCK_SIZE - 1)) return 1;
+    while(nb--) {
+        aes_decrypt(ibuf, obuf, ctx);
+        ibuf += AES_BLOCK_SIZE;
+        obuf += AES_BLOCK_SIZE;
+    }
+    return 0;
+}
+#endif
+
+#if 0
+aes_rval aes_encrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
+					 unsigned char *obuf, const aes_encrypt_ctx ctx[1])
+{
+		unsigned char iv[16];
+		int i;
+		
+		for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
+
+		while (num_blk--) {
+			iv ^= ibuf;			// 128-bit	
+            aes_encrypt(iv, iv, ctx);
+            memcpy(obuf, iv, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+			
+		}		
+
+		return 0;
+}
+#endif
+
+	.text
+	.align	4,0x90
+	.globl	_aes_encrypt_cbc
+_aes_encrypt_cbc:
+
+	// detect AES HW
+	// if AES HW detected, branch to AES-HW-specific function _aes_encrypt_cbc_hw (aes_modes_hw.s)
+	// o.w., fall through to the original AES-SW function
+
+#if defined	__x86_64__
+	movq	__cpu_capabilities@GOTPCREL(%rip), %rax			// %rax -> __cpu_capability
+	mov		(%rax), %eax									// %eax = __cpu_capabilities
+#else
+#ifdef KERNEL
+	leal	__cpu_capabilities, %eax						// %eax -> __cpu_capabilities
+	mov		(%eax), %eax									// %eax = __cpu_capabilities
+#else
+	mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+	test	$(kHasAES), %eax								// kHasAES & __cpu_capabilities
+	jne		_aes_encrypt_cbc_hw								// if AES HW detected, branch to HW-specific code
+
+	// save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
+#if	defined	__i386__
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx					// to be used as ibuf
+	push	%edi					// to be used as obuf
+	sub		$(16+16+7*16), %esp		// 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
+	mov		%esi, 12(%esp)			// save %esp in the unused 4-bytes, to be used as num_blk
+
+	#define	sp	%esp
+#else	// __x86_64__
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	sub		$(8+16+5*16+16), %rsp	// 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)	
+
+	#define	sp	%rsp
+#endif
+
+	// save xmm registers for kernel use
+	// xmm6-xmm7 will be used locally
+	// xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
+	// there is a hole not used for xmm, which is 48(sp). 
+	// it has been used to store iv (16-bytes) in i386 code
+	// for consistency between i386 and x86_64, this hole is dummied in x86_64 code
+	// also the 1st 16 bytes (sp) is dummied in x86_64 code
+
+#ifdef	KERNEL
+	movaps	%xmm7, 16(sp)
+	movaps	%xmm6, 32(sp)
+	movaps	%xmm0, 64(sp)
+	movaps	%xmm1, 80(sp)
+	movaps	%xmm2, 96(sp)
+#if defined	__i386__
+	movaps	%xmm3, 112(sp)
+	movaps	%xmm4, 128(sp)
+#endif
+#endif
+
+	// set up registers from calling arguments
+
+#if defined	__i386__
+
+	mov		12(%ebp), %eax			// in_iv
+	mov		24(%ebp), %edx			// ctx
+	movups	(%eax), %xmm7			// in_iv	
+	lea		48(%esp), %eax			// &iv[0]
+	mov		%eax, (%esp)			// 1st iv for aes_encrypt
+	mov		%eax, 4(%esp)			// 2nd iv for aes_encrypt
+	mov		%edx, 8(%esp)			// ctx for aes_encrypt
+	mov		8(%ebp), %ebx			// ibuf
+	mov		16(%ebp), %esi			// num_blk
+	mov		20(%ebp), %edi			// obuf
+
+	#define	ibuf	%ebx
+	#define	obuf	%edi
+	#define num_blk	%esi	
+
+#else	//	__x86_64__, calling arguments order : rdi/rsi/rdx/rcx/r8
+
+	mov		%rdi, %rbx				// ibuf
+	lea		48(sp), %r12			// &iv
+	movups	(%rsi), %xmm7			// in_iv
+	mov		%rdx, %r13				// num_blk
+	mov		%rcx, %r14				// obuf
+	mov		%r8, %r15				// ctx	
+
+	#define	ibuf	%rbx
+	#define	iv		%r12
+	#define	num_blk	%r13d
+	#define	obuf	%r14	
+	#define	ctx		%r15
+
+#endif
+
+	cmp		$1, num_blk				// num_blk vs 1
+	jl		9f						// if num_blk < 1, branch to bypass the main loop
+0:
+	movups	(ibuf), %xmm6			// ibuf
+#if defined	__i386__
+	lea		48(sp), %eax			// &iv[0]
+	pxor	%xmm6, %xmm7			// iv ^= ibuf
+	movups	%xmm7, (%eax)			// save iv
+#else
+	pxor	%xmm6, %xmm7			// iv ^= ibuf
+	movups	%xmm7, (iv)				// save iv
+	mov		iv, %rdi				// 1st calling argument for aes_encrypt
+	mov		iv, %rsi				// 2nd calling argument for aes_encrypt
+	mov		ctx, %rdx				// 3rd calling argument for aes_encrypt
+#endif
+	call	_aes_encrypt_xmm_no_save	// aes_encrypt(iv, iv, ctx)
+#if defined __i386__
+	leal	48(%esp), %eax			// &iv[0]
+	movups	(%eax), %xmm7			// read iv
+#else
+	movups	(iv), %xmm7				// read iv
+#endif
+	movups	%xmm7, (obuf)			// memcpy(obuf, iv, AES_BLOCK_SIZE);
+	add		$16, ibuf				// ibuf += AES_BLOCK_SIZE; 
+	add		$16, obuf				// obuf += AES_BLOCK_SIZE;	
+	sub		$1, num_blk				// num_blk --
+	jg		0b						// if num_blk > 0, repeat the loop
+9:	
+
+L_crypt_cbc_done:
+
+	// restore xmm registers due to kernel use
+#ifdef	KERNEL
+	movaps	16(sp), %xmm7
+	movaps	32(sp), %xmm6
+	movaps	64(sp), %xmm0
+	movaps	80(sp), %xmm1
+	movaps	96(sp), %xmm2
+#if defined	__i386__
+	movaps	112(sp), %xmm3
+	movaps	128(sp), %xmm4
+#endif
+#endif
+
+	xor		%eax, %eax				// to return 0 for SUCCESS
+
+#if	defined	__i386__
+	mov		12(%esp), %esi			// restore %esi
+	add		$(16+16+7*16), %esp		// 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
+	pop		%edi
+	pop		%ebx
+#else
+	add		$(8+16+5*16+16), %rsp	// 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)	
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+#endif
+	leave
+	ret
+
+#if 0
+aes_rval aes_decrypt_cbc(const unsigned char *ibuf, const unsigned char *in_iv, unsigned int num_blk,
+					 unsigned char *obuf, const aes_decrypt_ctx cx[1])
+{
+		unsigned char iv[16], tmp[16];
+		int i;
+		
+		for (i = 0; i < 16; i++) iv[i] = *(in_iv + i);
+
+		while (num_blk--) {
+
+            memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+            aes_decrypt(ibuf, obuf, ctx);
+			obuf ^= iv;
+            memcpy(iv, tmp, AES_BLOCK_SIZE);
+            ibuf += AES_BLOCK_SIZE;
+            obuf += AES_BLOCK_SIZE;
+		}
+
+		return 0;
+}
+#endif
+
+	.text
+	.align	4,0x90
+	.globl	_aes_decrypt_cbc
+_aes_decrypt_cbc:
+
+	// detect AES HW
+	// if AES HW detected, branch to AES-HW-specific function _aes_decrypt_cbc_hw (aes_modes_hw.s)
+	// o.w., fall through to the original AES-SW function
+
+#if defined	__x86_64__
+	movq	__cpu_capabilities@GOTPCREL(%rip), %rax			// %rax -> __cpu_capability
+	mov		(%rax), %eax									// %eax = __cpu_capabilities
+#else
+#ifdef KERNEL
+	leal	__cpu_capabilities, %eax						// %eax -> __cpu_capabilities
+	mov		(%eax), %eax									// %eax = __cpu_capabilities
+#else
+	mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+	test	$(kHasAES), %eax								// kHasAES & __cpu_capabilities
+	jne		_aes_decrypt_cbc_hw
+
+	// save registers and allocate stack memory for xmm registers and calling arguments (i386 only)
+#if	defined	__i386__
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx					// to be used as ibuf
+	push	%edi					// to be used as obuf
+	sub		$(16+16+7*16), %esp		// 12 (calling arguments) + 4 (%esi) + 16 (iv) + 7*16 (xmm)
+	mov		%esi, 12(%esp)			// save %esp in the unused 4-bytes, to be used as num_blk
+
+	#define	sp	%esp
+#else	// __x86_64__
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+	sub		$(8+16+5*16+16), %rsp	// 8 (align) + 16 (dummy iv) + 5*16 (xmm) + 16 (for i386-x86_64 consistency)	
+
+	#define	sp	%rsp
+#endif
+
+	// save xmm registers for kernel use
+	// xmm6-xmm7 will be used locally
+	// xmm0-xmm2 (x86_64) or xmm0-/xmm4 (i386) will be used inside _aes_encrypt_xmm_no_save (non-restored)
+	// there is a hole not used for xmm, which is 48(sp). 
+	// it has been used to store iv (16-bytes) in i386 code
+	// for consistency between i386 and x86_64, this hole is dummied in x86_64 code
+	// also the 1st 16 bytes (sp) is dummied in x86_64 code
+
+#ifdef	KERNEL
+	movaps	%xmm7, 16(sp)
+	movaps	%xmm6, 32(sp)
+	movaps	%xmm0, 64(sp)
+	movaps	%xmm1, 80(sp)
+	movaps	%xmm2, 96(sp)
+#if defined	__i386__
+	movaps	%xmm3, 112(sp)
+	movaps	%xmm4, 128(sp)
+#endif
+#endif
+
+	// set up registers from calling arguments
+
+#if defined	__i386__
+	mov		12(%ebp), %eax			// in_iv
+	mov		24(%ebp), %edx			// ctx
+	movups	(%eax), %xmm7			// in_iv	
+	mov		%edx, 8(%esp)			// ctx for aes_encrypt
+	mov		8(%ebp), %ebx			// ibuf
+	mov		16(%ebp), %esi			// num_blk
+	mov		20(%ebp), %edi			// obuf
+
+	#define	ibuf	%ebx
+	#define	obuf	%edi
+	#define num_blk	%esi	
+#else	//	__x86_64__, rdi/rsi/rdx/rcx/r8
+	mov		%rdi, %rbx				// ibuf
+	movups	(%rsi), %xmm7			// in_iv
+	mov		%rdx, %r13				// num_blk
+	mov		%rcx, %r14				// obuf 
+	mov		%r8, %r15				// ctx	
+
+	#define	ibuf	%rbx
+	#define	num_blk	%r13d
+	#define	obuf	%r14	
+	#define	ctx		%r15
+
+#endif
+           // memcpy(tmp, ibuf, AES_BLOCK_SIZE);
+           // aes_decrypt(ibuf, obuf, ctx);
+			//	obuf ^= iv;
+           // memcpy(iv, tmp, AES_BLOCK_SIZE);
+           // ibuf += AES_BLOCK_SIZE;
+           // obuf += AES_BLOCK_SIZE;
+
+	cmp		$1, num_blk					// num_blk vs 1
+	jl		L_crypt_cbc_done			// if num_blk < 1, bypass the main loop, jump to finishing code
+0:
+	movups	(ibuf), %xmm6				// tmp
+#if defined	__i386__
+	mov		ibuf, (sp)					// ibuf
+	mov		obuf, 4(sp)					// obuf
+#else
+	mov		ibuf, %rdi					// ibuf 
+	mov		obuf, %rsi					// obuf
+	mov		ctx, %rdx					// ctx
+#endif
+	call	_aes_decrypt_xmm_no_save	// aes_decrypt(ibuf, obuf, ctx)
+	movups	(obuf), %xmm0				// obuf
+	pxor	%xmm7, %xmm0				// obuf ^= iv;
+	movaps	%xmm6, %xmm7				// memcpy(iv, tmp, AES_BLOCK_SIZE);
+	movups	%xmm0, (obuf)				// update obuf
+	add		$16, ibuf					// ibuf += AES_BLOCK_SIZE; 
+	add		$16, obuf					// obuf += AES_BLOCK_SIZE;	
+	sub		$1, num_blk					// num_blk --
+	jg		0b							// if num_blk > 0, repeat the loop
+9:	
+
+	// we are done here, the finishing code is identical to that in aes_encrypt_cbc, so just jump to there
+	jmp		L_crypt_cbc_done
+
diff --git a/bsd/crypto/aes/i386/aes_modes_hw.s b/bsd/crypto/aes/i386/aes_modes_hw.s
new file mode 100644
index 000000000..401fd3dd9
--- /dev/null
+++ b/bsd/crypto/aes/i386/aes_modes_hw.s
@@ -0,0 +1,1669 @@
+/*
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+ ---------------------------------------------------------------------------
+ Issue 31/01/2006
+
+ These subroutines implement multiple block AES modes for ECB, CBC, CFB,
+ OFB and CTR encryption,  The code provides support for the VIA Advanced 
+ Cryptography Engine (ACE).
+
+ NOTE: In the following subroutines, the AES contexts (ctx) must be
+ 16 byte aligned if VIA ACE is being used
+*/
+
+/* modified 3/5/10 cclee */
+/* Clean up those related to VIA ACE and hand optimize aes_cbc_encrypt and aes_cbc_decrypt */
+/* move the xmm registers save/restore originally inside the callee functions into these 2 caller functions */
+
+/* HW-AES specific implementation cclee 3-12-10 */
+/* In aes_encrypt_cbc and aes_decrypt_cbc, __cpu_capabilities is polled, 
+	and if kHasAES is detected, branch to the hw-specific functions here */
+
+
+/* 	
+	This files defines _aes_encrypt_cbc_hw and _aes_decrypt_cbc_hw --- Intel Westmere HW AES-based implementation
+	of _aes_encrypt_cbc and _aes_decrypt_cbc. 
+
+	These 2 functions SHOULD BE entried ONLY after the AES HW is verified to be available. 
+	They SHOULD NOT be called without AES HW detection. It might cause xnu to crash.
+
+	The AES HW is detected 1st thing in 
+		_aes_encrypt_cbc (aes_modes_asm.s) 
+		_aes_decrypt_cbc (aes_modes_asm.s)
+	and, if AES HW is detected, branch without link (ie, jump) to the functions here.
+
+	The implementation here follows the examples in an Intel White Paper
+	"Intel Advanced Encryption Standard (AES) Instruction Set" Rev.2 01
+
+	Note: Rev. 03 Final 2010 01 26 is available. Looks like some code change from Rev.2 01
+
+	cclee 3-13-10
+*/
+
+/* 
+	The function _aes_decrypt_cbc_hw previously simply serially decrypts block by block
+	in our group meeting, Eric/Ali suggested that I perhaps should take a look of combining multiple blocks
+	in a loop and interleaving multiple aesdec instructions to absorb/hide stalls to improve the decrypt thoughput.
+
+	The idea was actually described in the Intel AES Instruction Set White Paper (Rev. 2.0 page 53-55) 
+
+	This modification interleaves the aesdec/aesdeclast instructions for 4 blocks in cbc mode.
+	On a K18 (2.4GHz core-i5/2.66GHz core-i7), the x86_64 decrypt throughput (in xnu-iokit) has been improved
+	from 1180/1332 to 1667/1858 MBytes/sec. This is approximately 1.40 times speedup in the decryption.
+	The encrypt throughput is not changed.  
+
+	I also enhanced the assembly code comments.
+
+	cclee-4-30-10 (Do you know 4-30 is National Honesty Day in the US? No need to know. I've been honest all the time.)
+
+*/
+
+/* ---------------------------------------------------------------------------------------------------------------- 
+
+	aes_encrypt_cbc function (see aes_modes.c or aes_modes_asm.s) :
+
+	For simplicity, I am assuming all variables are in 128-bit data type.
+
+	aes_rval aes_encrypt_cbc(const __m128 *ibuf, __m128 *iv, int num_blk, __m128 *obuf, const aes_encrypt_ctx *ctx)
+	{
+		while(num_blk--) {
+			*iv ^= *ibuf++;
+			aes_encrypt(iv, iv, ctx);
+			*obuf++ = *iv;
+		}
+		return 0;
+	}
+
+	The following is an implementation of this function using Intel AESNI.
+	This function _aes_encrypt_cbc_hw SHOULD NOT be called directly. 
+	Developer should still call _aes_encrypt_cbc (in aes_modes_asm.s) which will poll cpu_capabilities and branch
+	to this aesni-based function should it detecs that aesni is available.
+	Blindly call this function SURELY will cause a CRASH on systems with no aesni support. 
+
+	Note that each block starts with *iv, which is the output of the previous block. Therefore, the cbc blocks
+	are serially chained. This prevents us from arranging several blocks for encryption in parallel.
+
+   ----------------------------------------------------------------------------------------------------------------*/
+
+	.text
+	.align	4,0x90
+	.globl	_aes_encrypt_cbc_hw
+_aes_encrypt_cbc_hw:
+
+	// push/save registers for local use
+#if	defined	__i386__
+
+	push	%ebp
+	movl	%esp, %ebp
+	push	%ebx
+	push	%edi
+
+	#define	sp	%esp
+
+#else	// __x86_64__
+
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r13
+	push	%r14
+	push	%r15
+
+	#define	sp	%rsp
+
+#endif
+
+	// if this is kernel code, need to save used xmm registers
+#ifdef	KERNEL
+
+#if defined __i386__
+	sub		$(8*16), %esp			// for possible xmm0-xmm7 save/restore
+#else
+	sub		$(16*16), %rsp		// xmm0-xmm15 save/restore	
+#endif
+
+	movaps	%xmm0, (sp)
+	movaps	%xmm1, 16(sp)
+	movaps	%xmm2, 32(sp)
+	movaps	%xmm3, 48(sp)
+	movaps	%xmm4, 64(sp)
+	movaps	%xmm5, 80(sp)
+	movaps	%xmm6, 96(sp)
+	movaps	%xmm7, 112(sp)
+#if defined	__x86_64__
+	movaps	%xmm8, 16*8(sp)
+	movaps	%xmm9, 16*9(sp)
+	movaps	%xmm10, 16*10(sp)
+	movaps	%xmm11, 16*11(sp)
+	movaps	%xmm12, 16*12(sp)
+	movaps	%xmm13, 16*13(sp)
+	movaps	%xmm14, 16*14(sp)
+	movaps	%xmm15, 16*15(sp)
+#endif	// __x86_64__
+
+#endif	// KERNEL
+
+	#define	iv	%xmm0
+
+#ifdef	__i386__
+
+	mov		12(%ebp), %eax			// in_iv
+	mov		24(%ebp), %edx			// ctx
+	movups	(%eax), iv				// iv = in_iv	
+	mov		8(%ebp), %ebx			// ibuf
+	mov		16(%ebp), %ecx			// num_blk
+	mov		20(%ebp), %edi			// obuf
+
+	#define	ibuf	%ebx
+	#define	obuf	%edi
+	#define num_blk	%ecx	
+	#define	ctx		%edx
+
+#else
+
+	mov		%rdi, %rbx				// ibuf
+	movups	(%rsi), iv				// iv = in_iv
+	mov		%rdx, %r13				// num_blk
+	mov		%rcx, %r14				// obuf
+	mov		%r8, %r15				// ctx	
+
+	#define	ibuf	%rbx
+	#define	num_blk	%r13d
+	#define	obuf	%r14	
+	#define	ctx		%r15
+
+#endif
+
+	mov		240(ctx), %eax			// aes length
+	cmp		$160, %eax				// aes-128 encrypt ?
+	je		L_encrypt_128
+	cmp		$192, %eax				// aes-192 encrypt ?
+	je		L_encrypt_192
+	cmp		$224, %eax				// aes-256 encrypt ?
+	je		L_encrypt_256
+	mov		$-1, %eax				// return error
+	jmp		L_error	
+
+	//
+	// aes-128 encrypt_cbc operation, up to L_HW_cbc_done
+	//
+
+L_encrypt_128:
+
+	cmp		$1, num_blk				// check number of block
+	jl		L_HW_cbc_done			// should it be less than 1, nothing to do
+
+	movups	(ctx), %xmm2			// key0
+	movups	16(ctx), %xmm3			// key1
+	movups	32(ctx), %xmm4			// key2
+	movups	48(ctx), %xmm5			// key3
+	movups	64(ctx), %xmm6			// key4
+	movups	80(ctx), %xmm7			// key5
+#if defined	__x86_64__
+	movups	96(ctx), %xmm8			// key6
+	movups	112(ctx), %xmm9			// key7
+	movups	128(ctx), %xmm10		// key8
+	movups	144(ctx), %xmm11		// key9
+	movups	160(ctx), %xmm12		// keyA
+#endif
+
+	// while (num_blk--) {
+	//			*iv ^= *ibuf++;
+	//			aes_encrypt(iv, iv, ctx);
+	//			*obuf++ = *iv;
+	// }
+0:
+	movups	(ibuf), %xmm1				// *ibuf
+	pxor    %xmm2, iv					// 1st instruction inside aes_encrypt
+	pxor	%xmm1, iv					// *iv ^= *ibuf
+
+	// finishing up the rest of aes_encrypt
+    aesenc  %xmm3, iv
+    aesenc  %xmm4, iv
+    aesenc  %xmm5, iv
+    aesenc  %xmm6, iv
+    aesenc  %xmm7, iv
+#if defined	__x86_64__
+    aesenc  %xmm8, iv
+    aesenc  %xmm9, iv
+    aesenc  %xmm10, iv
+    aesenc  %xmm11, iv
+    aesenclast  %xmm12, iv
+#else
+	movups	96(ctx), %xmm1				// key6
+    aesenc  %xmm1, iv
+	movups	112(ctx), %xmm1				// key7
+    aesenc  %xmm1, iv
+	movups	128(ctx), %xmm1				// key8
+    aesenc  %xmm1, iv
+	movups	144(ctx), %xmm1				// key9
+    aesenc  %xmm1, iv
+	movups	160(ctx), %xmm1				// keyA
+    aesenclast  %xmm1, iv
+#endif
+
+	movups	iv, (obuf)					// *obuf = *iv;
+	add		$16, obuf					// obuf++;
+	add		$16, ibuf					// ibuf++;
+	sub		$1, num_blk					// num_blk --
+	jg		0b							// if num_blk > 0, repeat the loop
+
+	// the following will be branched to from all other cases (encrypt/decrypt 128/192/256)
+
+L_HW_cbc_done:
+
+	xor		%eax, %eax				// to return CRYPT_OK
+
+L_error:
+
+	// if kernel, restore xmm registers
+#ifdef	KERNEL 
+	movaps	0(sp), %xmm0
+	movaps	16(sp), %xmm1
+	movaps	32(sp), %xmm2
+	movaps	48(sp), %xmm3
+	movaps	64(sp), %xmm4
+	movaps	80(sp), %xmm5
+	movaps	96(sp), %xmm6
+	movaps	112(sp), %xmm7
+#if defined	__x86_64__
+	movaps	16*8(sp), %xmm8
+	movaps	16*9(sp), %xmm9
+	movaps	16*10(sp), %xmm10
+	movaps	16*11(sp), %xmm11
+	movaps	16*12(sp), %xmm12
+	movaps	16*13(sp), %xmm13
+	movaps	16*14(sp), %xmm14
+	movaps	16*15(sp), %xmm15
+#endif	// __x86_64__
+#endif	// KERNEL
+
+	// release used stack memory, restore used callee-saved registers, and return 
+#if	defined	__i386__
+#ifdef	KERNEL
+	add		$(8*16), %esp
+#endif
+	pop		%edi
+	pop		%ebx
+#else
+#ifdef	KERNEL
+	add		$(16*16), %rsp	
+#endif
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%rbx
+#endif
+	leave
+	ret
+
+	//
+	// aes-192 encrypt_cbc operation, after completion, branch to L_HW_cbc_done
+	//
+
+L_encrypt_192:
+
+	cmp		$1, num_blk				// check number of block
+	jl		L_HW_cbc_done			// should it be less than 1, nothing to do
+
+	movups	(ctx), %xmm2			// key0
+	movups	16(ctx), %xmm3			// key1
+	movups	32(ctx), %xmm4			// key2
+	movups	48(ctx), %xmm5			// key3
+	movups	64(ctx), %xmm6			// key4
+	movups	80(ctx), %xmm7			// key5
+#if defined	__x86_64__
+	movups	96(ctx), %xmm8			// key6
+	movups	112(ctx), %xmm9			// key7
+	movups	128(ctx), %xmm10		// key8
+	movups	144(ctx), %xmm11		// key9
+	movups	160(ctx), %xmm12		// keyA
+	movups	176(ctx), %xmm13		// keyB
+	movups	192(ctx), %xmm14		// keyC
+#endif
+	
+	// while (num_blk--) {
+	//			*iv ^= *ibuf++;
+	//			aes_encrypt(iv, iv, ctx);
+	//			*obuf++ = *iv;
+	// }
+0:
+	movups	(ibuf), %xmm1			// *ibuf
+	pxor	%xmm1, iv				// *iv ^= ibuf
+
+	// aes_encrypt(iv, iv, ctx);
+
+	pxor    %xmm2, iv
+    aesenc  %xmm3, iv
+    aesenc  %xmm4, iv
+    aesenc  %xmm5, iv
+    aesenc  %xmm6, iv
+    aesenc  %xmm7, iv
+#if defined	__x86_64__
+    aesenc  %xmm8, iv
+    aesenc  %xmm9, iv
+    aesenc  %xmm10, iv
+    aesenc  %xmm11, iv
+    aesenc  %xmm12, iv
+    aesenc  %xmm13, iv
+    aesenclast  %xmm14, iv
+#else
+	movups	96(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	112(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	128(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	144(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	160(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	176(ctx), %xmm1
+    aesenc  %xmm1, iv
+	movups	192(ctx), %xmm1
+    aesenclast  %xmm1, iv
+#endif
+
+	movups	iv, (obuf)				// *obuf = *iv;
+	add		$16, ibuf				// ibuf++
+	add		$16, obuf				// obuf++
+
+	sub		$1, num_blk				// num_blk --
+	jg		0b						// if num_blk > 0, repeat the loop
+
+	jmp		L_HW_cbc_done			// share with the common exit code
+
+	//
+	// aes-256 encrypt_cbc operation, after completion, branch to L_HW_cbc_done
+	//
+
+L_encrypt_256:
+
+	cmp		$1, num_blk				// check number of block
+	jl		L_HW_cbc_done			// should it be less than 1, nothing to do
+
+	movups	(ctx), %xmm2			// key0
+	movups	16(ctx), %xmm3			// key1
+	movups	32(ctx), %xmm4			// key2
+	movups	48(ctx), %xmm5			// key3
+	movups	64(ctx), %xmm6			// key4
+	movups	80(ctx), %xmm7			// key5
+#if defined	__x86_64__
+	movups	96(ctx), %xmm8			// key6
+	movups	112(ctx), %xmm9			// key7
+	movups	128(ctx), %xmm10		// key8
+	movups	144(ctx), %xmm11		// key9
+	movups	160(ctx), %xmm12		// keyA
+	movups	176(ctx), %xmm13		// keyB
+	movups	192(ctx), %xmm14		// keyC
+	movups	208(ctx), %xmm15		// keyD
+	// movups	224(ctx), %xmm1		// keyE
+#endif
+
+	// while (num_blk--) {
+	//			*iv ^= *ibuf++;
+	//			aes_encrypt(iv, iv, ctx);
+	//			*obuf++ = *iv;
+	// }
+0:
+	movups	(ibuf), %xmm1			// *ibuf
+	pxor	%xmm1, iv				// *iv ^= ibuf
+	
+	// aes_encrypt(iv, iv, ctx);
+	pxor    %xmm2, iv
+    aesenc  %xmm3, iv
+    aesenc  %xmm4, iv
+    aesenc  %xmm5, iv
+    aesenc  %xmm6, iv
+    aesenc  %xmm7, iv
+#if defined	__x86_64__
+	movups	224(ctx), %xmm1			// keyE
+    aesenc  %xmm8, iv
+    aesenc  %xmm9, iv
+    aesenc  %xmm10, iv
+    aesenc  %xmm11, iv
+    aesenc  %xmm12, iv
+    aesenc  %xmm13, iv
+    aesenc  %xmm14, iv
+    aesenc  %xmm15, iv
+    aesenclast  %xmm1, iv
+#else
+	movups	96(ctx), %xmm1			// key6
+    aesenc  %xmm1, iv
+	movups	112(ctx), %xmm1			// key7
+    aesenc  %xmm1, iv
+	movups	128(ctx), %xmm1			// key8
+    aesenc  %xmm1, iv
+	movups	144(ctx), %xmm1			// key9
+    aesenc  %xmm1, iv
+	movups	160(ctx), %xmm1			// keyA
+    aesenc  %xmm1, iv
+	movups	176(ctx), %xmm1			// keyB
+    aesenc  %xmm1, iv
+	movups	192(ctx), %xmm1			// keyC
+    aesenc  %xmm1, iv
+	movups	208(ctx), %xmm1			// keyD
+    aesenc  %xmm1, iv
+	movups	224(ctx), %xmm1			// keyE
+    aesenclast  %xmm1, iv
+#endif
+
+	movups	iv, (obuf)				// *obuf = *iv;
+	add		$16, ibuf				// ibuf++
+	add		$16, obuf				// obuf++
+
+	sub		$1, num_blk				// num_blk --
+	jg		0b						// if num_blk > 0, repeat the loop
+
+	jmp		L_HW_cbc_done			// share with the common exit code
+
+
+
+	//
+	// --------- END of aes_encrypt_cbc_hw  -------------------
+	//
+
+
+/* ---------------------------------------------------------------------------------------------------------------- 
+
+	aes_decrypt_cbc function (see aes_modes.c or aes_modes_asm.s) :
+
+	For simplicity, I am assuming all variables are in 128-bit data type.
+
+	aes_rval aes_decrypt_cbc(const __m128 *ibuf, __m128 *iv, int num_blk, __m128 *obuf, const aes_decrypt_ctx *ctx)
+	{
+		while(num_blk--) {
+			aes_decrypt(ibuf, obuf, ctx);
+			*obuf++ ^= *iv;
+			*iv = *ibuf++;
+		}
+		return 0;
+	}
+
+	The following is an implementation of this function using Intel AESNI.
+	This function _aes_decrypt_cbc_hw SHOULD NOT be called directly. 
+	Developer should still call _aes_decrypt_cbc (in aes_modes_asm.s) which will poll cpu_capabilities and branch
+	to this aesni-based function should it detecs that aesni is available.
+	Blindly call this function SURELY will cause a CRASH on systems with no aesni support. 
+
+	Note that the decryption operation is not related over blocks.
+	This gives opportunity of arranging aes_decrypt operations in parallel to speed up code.
+	This is equivalent to what has been described in the Intel AES Instruction Set White Paper (Rev. 2.0 page 53-55)
+	The following assembly code exploits this idea to achieve ~ 1.4 speed up in aes_decrypt_cbc.
+
+	Example C code for packing 4 blocks in an iteration is shown as follows:
+
+		while ((num_blk-=4)>=0) {
+
+			// the following 4 functions can be interleaved to exploit parallelism
+			aes_decrypt(ibuf, obuf, ctx);
+			aes_decrypt(ibuf+1, obuf+1, ctx);
+			aes_decrypt(ibuf+2, obuf+2, ctx);
+			aes_decrypt(ibuf+3, obuf+3, ctx);
+
+			obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];
+			*iv = ibuf[3];		ibuf += 4; 	obuf += 4;
+		}
+		num_blk+=4;
+
+   ----------------------------------------------------------------------------------------------------------------*/
+
+	.text
+	.align	4,0x90
+	.globl	_aes_decrypt_cbc_hw
+_aes_decrypt_cbc_hw:
+
+	// push/save registers for local use
+#if	defined	__i386__
+
+	push	%ebp
+	movl	%esp, %ebp
+	push	%ebx					// ibuf
+	push	%edi					// obuf
+
+	#define	sp	%esp
+
+#else	// __x86_64__
+
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r13
+	push	%r14
+	push	%r15
+
+	#define	sp	%rsp
+
+#endif
+
+
+	// if kernel, allocate stack space to save xmm registers
+#ifdef	KERNEL
+#if defined __i386__
+	sub		$(8*16), %esp
+#else
+	sub		$(16*16), %rsp
+#endif
+	movaps	%xmm0, (sp)
+	movaps	%xmm1, 16(sp)
+	movaps	%xmm2, 32(sp)
+	movaps	%xmm3, 48(sp)
+	movaps	%xmm4, 64(sp)
+	movaps	%xmm5, 80(sp)
+	movaps	%xmm6, 96(sp)
+	movaps	%xmm7, 112(sp)
+#if defined	__x86_64__
+	movaps	%xmm8, 16*8(sp)
+	movaps	%xmm9, 16*9(sp)
+	movaps	%xmm10, 16*10(sp)
+	movaps	%xmm11, 16*11(sp)
+	movaps	%xmm12, 16*12(sp)
+	movaps	%xmm13, 16*13(sp)
+	movaps	%xmm14, 16*14(sp)
+	movaps	%xmm15, 16*15(sp)
+#endif	// __x86_64__
+#endif
+
+	#undef	iv
+	#define	iv	%xmm0
+
+#if defined	__i386__
+	mov		12(%ebp), %eax			// in_iv
+	mov		24(%ebp), %edx			// ctx
+	movups	(%eax), iv				// iv = in_iv	
+	mov		8(%ebp), %ebx			// ibuf
+	mov		16(%ebp), %ecx			// num_blk
+	mov		20(%ebp), %edi			// obuf
+
+	#define	ibuf	%ebx
+	#define	obuf	%edi
+	#define num_blk	%ecx	
+	#define	ctx		%edx
+
+#else	//	__x86_64__, rdi/rsi/rdx/rcx/r8
+
+	mov		%rdi, %rbx				// ibuf
+	movups	(%rsi), iv				// iv = in_iv
+	mov		%rdx, %r13				// num_blk
+	mov		%rcx, %r14				// obuf
+	mov		%r8, %r15				// ctx	
+
+	#define	ibuf	%rbx
+	#define	num_blk	%r13d
+	#define	obuf	%r14	
+	#define	ctx		%r15
+
+#endif
+
+	mov		240(ctx), %eax			// aes length
+	cmp		$160, %eax				// aes-128 decrypt
+	je		L_decrypt_128
+	cmp		$192, %eax				// aes-192 decrypt
+	je		L_decrypt_192
+	cmp		$224, %eax				// aes-256 decrypt
+	je		L_decrypt_256
+
+	mov		$-1, %eax				// wrong aes length, to return -1
+	jmp		L_error					// early exit due to wrong aes length
+
+
+	//
+	// aes-128 decrypt_cbc operation, after completion, branch to L_HW_cbc_done
+	//
+
+L_decrypt_128:
+
+	cmp		$1, num_blk
+	jl		L_HW_cbc_done			// if num_blk < 1, early return
+
+	// aes-128 decrypt expanded keys
+	movups	160(ctx), %xmm3
+	movups	144(ctx), %xmm4
+	movups	128(ctx), %xmm5
+	movups	112(ctx), %xmm6
+	movups	96(ctx), %xmm7
+#if defined	__x86_64__
+	movups	80(ctx), %xmm8
+	movups	64(ctx), %xmm9
+	movups	48(ctx), %xmm10
+	movups	32(ctx), %xmm11
+	movups	16(ctx), %xmm12
+	movups	0(ctx), %xmm13
+#endif
+
+	// performs 4 block decryption in an iteration to exploit decrypt in parallel
+
+	//		while ((num_blk-=4)>=0) {
+	//			aes_decrypt(ibuf, obuf, ctx);
+	//			aes_decrypt(ibuf+1, obuf+1, ctx);
+	//			aes_decrypt(ibuf+2, obuf+2, ctx);
+	//			aes_decrypt(ibuf+3, obuf+3, ctx);
+	//			obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];
+	//			*iv = ibuf[3]; ibuf += 4; obuf += 4;
+	//		}
+
+	sub		$4, num_blk					// pre decrement num_blk by 4
+	jl		9f							// if num_blk < 4, skip the per-4-blocks processing code
+
+0:
+
+
+#if defined	__x86_64__
+
+	movups	(ibuf), %xmm1				// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2				// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm14			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm15			// tmp = 4th ibuf
+
+	// for x86_64, the expanded keys are already stored in xmm3-xmm13
+
+	// aes-128 decrypt round 0 per 4 blocks
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm14
+	pxor    %xmm3, %xmm15
+
+	// aes-128 decrypt round 1 per 4 blocks
+    aesdec  %xmm4, %xmm1
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm4, %xmm14
+    aesdec  %xmm4, %xmm15
+
+	// aes-128 decrypt round 2 per 4 blocks
+    aesdec  %xmm5, %xmm1
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm5, %xmm14
+    aesdec  %xmm5, %xmm15
+
+	// aes-128 decrypt round 3 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm14
+    aesdec  %xmm6, %xmm15
+
+	// aes-128 decrypt round 4 per 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm14
+    aesdec  %xmm7, %xmm15
+
+	// aes-128 decrypt round 5 per 4 blocks
+    aesdec  %xmm8, %xmm1
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm8, %xmm14
+    aesdec  %xmm8, %xmm15
+
+	// aes-128 decrypt round 6 per 4 blocks
+    aesdec  %xmm9, %xmm1
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm9, %xmm14
+    aesdec  %xmm9, %xmm15
+
+	// aes-128 decrypt round 7 per 4 blocks
+    aesdec  %xmm10, %xmm1
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm10, %xmm14
+    aesdec  %xmm10, %xmm15
+
+	// aes-128 decrypt round 8 per 4 blocks
+    aesdec  %xmm11, %xmm1
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm11, %xmm14
+    aesdec  %xmm11, %xmm15
+
+	// aes-128 decrypt round 9 per 4 blocks
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+
+	// aes-128 decrypt round 10 (last) per 4 blocks
+    aesdeclast  %xmm13, %xmm1
+    aesdeclast  %xmm13, %xmm2
+    aesdeclast  %xmm13, %xmm14
+    aesdeclast  %xmm13, %xmm15
+
+	pxor	iv, %xmm1				// obuf[0] ^= *iv; 
+	movups	(ibuf), iv				// ibuf[0]
+	pxor	iv, %xmm2				// obuf[1] ^= ibuf[0]; 
+	movups	16(ibuf), iv			// ibuf[1]
+	pxor	iv, %xmm14				// obuf[2] ^= ibuf[1]; 
+	movups	32(ibuf), iv			// ibuf[2] 
+	pxor	iv, %xmm15				// obuf[3] ^= obuf[2]; 
+	movups	48(ibuf), iv			// *iv = ibuf[3]
+
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm14, 32(obuf)		// write 3rd obuf
+	movups	%xmm15, 48(obuf)		// write 4th obuf
+
+
+#else
+
+	// aes_decrypt_cbc per 4 blocks using aes-128 for i386
+	// xmm1/xmm2/xmm4/xmm5 used for obuf per block
+	// xmm3 = key0
+	// xmm0 = iv
+	// xmm6/xmm7 dynamically load with other expanded keys
+
+	movups	(ibuf), %xmm1			// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2			// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm4			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm5			// tmp = 4th ibuf
+
+	// aes_decrypt
+	// for i386, sequentially load expanded keys into xmm6/xmm7
+
+	movups	144(ctx), %xmm6			// key1
+
+	// aes-128 decrypt round 0 per 4 blocks
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm4
+	pxor    %xmm3, %xmm5
+
+	movups	128(ctx), %xmm7			// key2
+
+	// aes-128 decrypt round 1 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	112(ctx), %xmm6			// key3
+
+	// aes-128 decrypt round 2 per 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	96(ctx), %xmm7			// key4
+
+	// aes-128 decrypt round 3 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	80(ctx), %xmm6			// key5
+
+	// aes-128 decrypt round 4 per 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	64(ctx), %xmm7			// key6
+
+	// aes-128 decrypt round 5 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	48(ctx), %xmm6			// key7
+
+	// aes-128 decrypt round 6 per 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	32(ctx), %xmm7			// key8
+
+	// aes-128 decrypt round 7 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	16(ctx), %xmm6			// key9
+
+	// aes-128 decrypt round 8 per 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	0(ctx), %xmm7			// keyA
+
+	// aes-128 decrypt round 9 per 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	// aes-128 decrypt round 10 (last) per 4 blocks
+    aesdeclast  %xmm7, %xmm1
+    aesdeclast  %xmm7, %xmm2
+    aesdeclast  %xmm7, %xmm4
+    aesdeclast  %xmm7, %xmm5
+
+	pxor	iv, %xmm1				// 1st obuf ^= iv; 
+	movups	(ibuf), iv				// 1st memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm2				// 2nd obuf ^= iv; 
+	movups	16(ibuf), iv			// 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm4				// 3rd obuf ^= iv; 
+	movups	32(ibuf), iv			// 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm5				// 4th obuf ^= iv; 
+	movups	48(ibuf), iv			// 4th memcpy(iv, tmp, AES_BLOCK_SIZE);
+
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm4, 32(obuf)			// write 3rd obuf
+	movups	%xmm5, 48(obuf)			// write 4th obuf
+#endif
+
+	add		$64, ibuf				// ibuf += 4; 
+	add		$64, obuf				// obuf += 4;	
+
+	sub		$4, num_blk				// num_blk -= 4
+	jge		0b						// if num_blk > 0, repeat the loop
+
+9:	add		$4, num_blk				// post incremtn num_blk by 4
+	je		L_HW_cbc_done			// if num_blk == 0, no need for forthur processing code
+
+#if defined	__i386__
+	// updated as they might be needed as expanded keys in the remaining
+	movups	144(ctx), %xmm4
+	movups	128(ctx), %xmm5
+	movups	112(ctx), %xmm6
+	movups	96(ctx), %xmm7
+#endif
+
+	test	$2, num_blk				// check whether num_blk has 2 blocks
+	je		9f						// if num_blk & 2 == 0, skip the per-pair processing code
+
+	// do the remaining 2 blocks together
+
+	movups	(ibuf), %xmm1				// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2				// tmp = 2nd ibuf
+
+	// aes_decrypt
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+    aesdec  %xmm4, %xmm1
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm5, %xmm1
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+#if defined	__x86_64__
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm8, %xmm1
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm9, %xmm1
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm10, %xmm1
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm11, %xmm1
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdeclast  %xmm13, %xmm1
+    aesdeclast  %xmm13, %xmm2
+#else
+	movups	80(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+	movups	64(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+	movups	48(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+	movups	32(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+	movups	16(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+	movups	0(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdeclast  %xmm7, %xmm1
+    aesdeclast  %xmm7, %xmm2
+	movups	112(ctx), %xmm6
+	movups	96(ctx), %xmm7
+#endif
+
+	pxor	iv, %xmm1				// obuf[0] ^= *iv; 
+	movups	(ibuf), iv				// ibuf[0]
+	pxor	iv, %xmm2				// obuf[1] ^= ibuf[0]
+	movups	16(ibuf), iv			// *iv = ibuf[1]
+
+	movups	%xmm1, (obuf)			// write obuf[0]
+	movups	%xmm2, 16(obuf)			// write obuf[1]
+
+	add		$32, ibuf				// ibuf += 2
+	add		$32, obuf				// obuf += 2
+
+9:
+	test	$1, num_blk				// check whether num_blk has residual 1 block
+	je		L_HW_cbc_done			// if num_blk == 0, no need for residual processing code
+	
+	movups	(ibuf), %xmm2				// tmp = ibuf
+	// aes_decrypt
+	pxor    %xmm3, %xmm2
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm7, %xmm2
+#if defined	__x86_64__
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm12, %xmm2
+    aesdeclast  %xmm13, %xmm2
+#else
+	movups	80(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	64(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	48(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	32(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	16(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	(ctx), %xmm1
+    aesdeclast  %xmm1, %xmm2
+#endif
+
+	pxor	iv, %xmm2			// *obuf ^= *iv; 
+	movups	(ibuf), iv			// *iv = *ibuf;
+	movups	%xmm2, (obuf)		// write *obuf
+
+	jmp		L_HW_cbc_done
+
+	//
+	// aes-192 decrypt_cbc operation, after completion, branch to L_HW_cbc_done
+	//
+
+L_decrypt_192:
+
+	cmp		$1, num_blk
+	jl		L_HW_cbc_done			// if num_blk < 1, early return
+
+	// aes-192 decryp expanded keys
+	movups	192(ctx), %xmm3
+	movups	176(ctx), %xmm4
+	movups	160(ctx), %xmm5
+	movups	144(ctx), %xmm6
+	movups	128(ctx), %xmm7
+#if defined	__x86_64__
+	movups	112(ctx), %xmm8
+	movups	96(ctx), %xmm9
+	movups	80(ctx), %xmm10
+	movups	64(ctx), %xmm11
+	movups	48(ctx), %xmm12
+	movups	32(ctx), %xmm13
+	movups	16(ctx), %xmm14
+	movups	(ctx), %xmm15
+#endif
+
+	// performs 4 block decryption in an iteration to exploit decrypt in parallel
+
+	//		while ((num_blk-=4)>=0) {
+	//			aes_decrypt(ibuf, obuf, ctx);
+	//			aes_decrypt(ibuf+1, obuf+1, ctx);
+	//			aes_decrypt(ibuf+2, obuf+2, ctx);
+	//			aes_decrypt(ibuf+3, obuf+3, ctx);
+	//			obuf[0] ^= *iv; obuf[1] ^= ibuf[1]; obuf[2] ^= ibuf[1]; obuf[3] ^= ibuf[2];
+	//			*iv = ibuf[3]; ibuf += 4; obuf += 4;
+	//		}
+
+	sub		$4, num_blk					// pre decrement num_blk by 4
+	jl		9f							// if num_blk < 4, skip the per-4-blocks processing code
+0:
+
+#if defined	__x86_64__
+
+	movups	(ibuf), %xmm1				// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2				// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm14			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm15			// tmp = 4th ibuf
+
+	// aes_decrypt, for x86_64, the expanded keys are already stored in xmm3-xmm13
+	// use %xmm12/%xmm13 ts dynamic keys in the middle, restored afterwards
+
+	// round 0 for 4 blocks
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm14
+	pxor    %xmm3, %xmm15
+
+	// round 1 for 4 blocks
+    aesdec  %xmm4, %xmm1
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm4, %xmm14
+    aesdec  %xmm4, %xmm15
+
+	// round 2 for 4 blocks
+    aesdec  %xmm5, %xmm1
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm5, %xmm14
+    aesdec  %xmm5, %xmm15
+
+	// round 3 for 4 blocks
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm14
+    aesdec  %xmm6, %xmm15
+
+	// round 4 for 4 blocks
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm14
+    aesdec  %xmm7, %xmm15
+
+	// round 5 for 4 blocks
+    aesdec  %xmm8, %xmm1
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm8, %xmm14
+    aesdec  %xmm8, %xmm15
+
+	// round 6 for 4 blocks
+    aesdec  %xmm9, %xmm1
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm9, %xmm14
+    aesdec  %xmm9, %xmm15
+
+	// round 7 for 4 blocks
+    aesdec  %xmm10, %xmm1
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm10, %xmm14
+    aesdec  %xmm10, %xmm15
+
+	// round 8 for 4 blocks
+    aesdec  %xmm11, %xmm1
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm11, %xmm14
+    aesdec  %xmm11, %xmm15
+
+	// round 9 for 4 blocks
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+
+	movups	16(ctx), %xmm12
+
+	// round A for 4 blocks
+    aesdec  %xmm13, %xmm1
+    aesdec  %xmm13, %xmm2
+    aesdec  %xmm13, %xmm14
+    aesdec  %xmm13, %xmm15
+
+	movups	(ctx), %xmm13
+
+	// round B for 4 blocks
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+
+	movups	48(ctx), %xmm12		// restore %xmm12 to its original key
+
+	// round C (last) for 4 blocks
+    aesdeclast  %xmm13, %xmm1
+    aesdeclast  %xmm13, %xmm2
+    aesdeclast  %xmm13, %xmm14
+    aesdeclast  %xmm13, %xmm15
+
+	movups	32(ctx), %xmm13		// restore %xmm13 to its original key
+
+	pxor	iv, %xmm1				// obuf[0] ^= *iv; 
+	movups	(ibuf), iv				// ibuf[0]
+	pxor	iv, %xmm2				// obuf[1] ^= ibuf[0] 
+	movups	16(ibuf), iv			// ibuf[1]
+	pxor	iv, %xmm14				// obuf[2] ^= ibuf[1] 
+	movups	32(ibuf), iv			// ibuf[2] 
+	pxor	iv, %xmm15				// obuf[3] ^= ibuf[2] 
+	movups	48(ibuf), iv			// *iv = ibuf[3] 
+
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm14, 32(obuf)		// write 3rd obuf
+	movups	%xmm15, 48(obuf)		// write 4th obuf
+
+	add		$64, ibuf				// ibuf += 4; 
+	add		$64, obuf				// obuf += 4;	
+
+	sub		$4, num_blk				// num_blk -= 4
+	jge		0b						// if num_blk > 0, repeat the loop
+
+9:	add		$4, num_blk				// post incremtn num_blk by 4
+	je		L_HW_cbc_done			// if num_blk == 0, prepare to return 
+
+	movups	16(ctx), %xmm14			// restore %xmm14 to its key
+	movups	(ctx), %xmm15			// restore %xmm15 to its key
+
+#else
+
+	movups	(ibuf), %xmm1			// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2			// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm4			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm5			// tmp = 4th ibuf
+
+	// aes_decrypt
+	// for i386, sequentially load expanded keys into xmm6/xmm7
+	movups	176(ctx), %xmm6
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm4
+	pxor    %xmm3, %xmm5
+
+	movups	160(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	144(ctx), %xmm6
+	aesdec    %xmm7, %xmm1
+	aesdec    %xmm7, %xmm2
+	aesdec    %xmm7, %xmm4
+	aesdec    %xmm7, %xmm5
+
+	movups	128(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	112(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	96(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	80(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	64(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	48(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	32(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	16(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	0(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+    aesdeclast  %xmm7, %xmm1
+    aesdeclast  %xmm7, %xmm2
+    aesdeclast  %xmm7, %xmm4
+    aesdeclast  %xmm7, %xmm5
+
+	pxor	iv, %xmm1				// 1st obuf ^= iv; 
+	movups	(ibuf), iv				// 1st memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm2				// 2nd obuf ^= iv; 
+	movups	16(ibuf), iv			// 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm4				// 3rd obuf ^= iv; 
+	movups	32(ibuf), iv			// 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm5				// 4th obuf ^= iv; 
+	movups	48(ibuf), iv			// 4th memcpy(iv, tmp, AES_BLOCK_SIZE);
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm4, 32(obuf)			// write 3rd obuf
+	movups	%xmm5, 48(obuf)			// write 4th obuf
+
+	add		$64, ibuf				// ibuf += AES_BLOCK_SIZE * 4; 
+	add		$64, obuf				// obuf += AES_BLOCK_SIZE * 4;	
+
+	sub		$4, num_blk				// num_blk -= 4
+	jge		0b						// if num_blk > 0, repeat the loop
+
+
+9:	add		$4, num_blk				//	post incremtn num_blk by 4
+	je		L_HW_cbc_done			// if num_blk == 0, no need for forthur processing code
+
+	movups	176(ctx), %xmm4
+	movups	160(ctx), %xmm5
+	movups	144(ctx), %xmm6
+	movups	128(ctx), %xmm7
+
+#endif
+
+	// per-block aes_decrypt_cbc loop
+
+0:
+	movups	(ibuf), %xmm2				// tmp = ibuf
+
+	// aes_decrypt
+	pxor    %xmm3, %xmm2
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm7, %xmm2
+#if defined	__x86_64__
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm13, %xmm2
+    aesdec  %xmm14, %xmm2
+    aesdeclast  %xmm15, %xmm2
+#else
+	movups	112(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	96(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	80(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	64(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	48(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	32(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	16(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	(ctx), %xmm1
+    aesdeclast  %xmm1, %xmm2
+#endif
+
+	pxor	iv, %xmm2			// obuf ^= iv; 
+	movups	(ibuf), iv			// memcpy(iv, tmp, AES_BLOCK_SIZE);
+
+	movups	%xmm2, (obuf)		// write obuf
+
+	add		$16, ibuf				// ibuf += AES_BLOCK_SIZE; 
+	add		$16, obuf				// obuf += AES_BLOCK_SIZE;	
+	sub		$1, num_blk				// num_blk --
+	jg		0b						// if num_blk > 0, repeat the loop
+
+	jmp		L_HW_cbc_done
+
+	//
+	// aes-256 decrypt_cbc operation, after completion, branch to L_HW_cbc_done
+	//
+
+L_decrypt_256:
+
+	cmp		$1, num_blk
+	jl		L_HW_cbc_done	
+
+	movups	224(ctx), %xmm3
+	movups	208(ctx), %xmm4
+	movups	192(ctx), %xmm5
+	movups	176(ctx), %xmm6
+	movups	160(ctx), %xmm7
+#if defined	__x86_64__
+	movups	144(ctx), %xmm8
+	movups	128(ctx), %xmm9
+	movups	112(ctx), %xmm10
+	movups	96(ctx), %xmm11
+	movups	80(ctx), %xmm12
+	movups	64(ctx), %xmm13
+	movups	48(ctx), %xmm14
+	movups	32(ctx), %xmm15
+//	movups	16(ctx), %xmm14
+//	movups	(ctx), %xmm15
+#endif
+
+#if defined	__x86_64__
+
+	sub		$4, num_blk					// pre decrement num_blk by 4
+	jl		9f							// if num_blk < 4, skip the per-4-blocks processing code
+0:
+	movups	(ibuf), %xmm1				// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2				// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm14			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm15			// tmp = 4th ibuf
+
+	// aes_decrypt, for x86_64, the expanded keys are already stored in xmm3-xmm13
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm14
+	pxor    %xmm3, %xmm15
+
+    aesdec  %xmm4, %xmm1
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm4, %xmm14
+    aesdec  %xmm4, %xmm15
+
+    aesdec  %xmm5, %xmm1
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm5, %xmm14
+    aesdec  %xmm5, %xmm15
+
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm14
+    aesdec  %xmm6, %xmm15
+
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm14
+    aesdec  %xmm7, %xmm15
+
+    aesdec  %xmm8, %xmm1
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm8, %xmm14
+    aesdec  %xmm8, %xmm15
+
+    aesdec  %xmm9, %xmm1
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm9, %xmm14
+    aesdec  %xmm9, %xmm15
+
+    aesdec  %xmm10, %xmm1
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm10, %xmm14
+    aesdec  %xmm10, %xmm15
+
+    aesdec  %xmm11, %xmm1
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm11, %xmm14
+    aesdec  %xmm11, %xmm15
+
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+	movups	48(ctx), %xmm12
+
+    aesdec  %xmm13, %xmm1
+    aesdec  %xmm13, %xmm2
+    aesdec  %xmm13, %xmm14
+    aesdec  %xmm13, %xmm15
+	movups	32(ctx), %xmm13
+
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+	movups	16(ctx), %xmm12
+
+    aesdec  %xmm13, %xmm1
+    aesdec  %xmm13, %xmm2
+    aesdec  %xmm13, %xmm14
+    aesdec  %xmm13, %xmm15
+	movups	(ctx), %xmm13
+
+    aesdec  %xmm12, %xmm1
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm12, %xmm14
+    aesdec  %xmm12, %xmm15
+	movups	80(ctx), %xmm12
+
+    aesdeclast  %xmm13, %xmm1
+    aesdeclast  %xmm13, %xmm2
+    aesdeclast  %xmm13, %xmm14
+    aesdeclast  %xmm13, %xmm15
+	movups	64(ctx), %xmm13
+
+	pxor	iv, %xmm1				// obuf ^= iv; 
+	movups	(ibuf), iv				// memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm2				// obuf ^= iv; 
+	movups	16(ibuf), iv			// memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm14				// obuf ^= iv; 
+	movups	32(ibuf), iv			// memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm15				// obuf ^= iv; 
+	movups	48(ibuf), iv			// memcpy(iv, tmp, AES_BLOCK_SIZE);
+
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm14, 32(obuf)		// write 3rd obuf
+	movups	%xmm15, 48(obuf)		// write 4th obuf
+
+	add		$64, ibuf				// ibuf += AES_BLOCK_SIZE*4; 
+	add		$64, obuf				// obuf += AES_BLOCK_SIZE*4;	
+
+	sub		$4, num_blk				// num_blk -= 4
+	jge		0b						// if num_blk > 0, repeat the loop
+
+9:	add		$4, num_blk				//	post incremtn num_blk by 4
+	je		L_HW_cbc_done			// if num_blk == 0, no need for forthur processing code
+
+	movups	48(ctx), %xmm14
+	movups	32(ctx), %xmm15
+
+#else
+
+	sub		$4, num_blk				// pre decrement num_blk by 4
+	jl		9f						// if num_blk < 4, skip the per-pair processing code
+0:
+	movups	(ibuf), %xmm1			// tmp = 1st ibuf
+	movups	16(ibuf), %xmm2			// tmp = 2nd ibuf
+	movups	32(ibuf), %xmm4			// tmp = 3rd ibuf
+	movups	48(ibuf), %xmm5			// tmp = 4th ibuf
+
+	// aes_decrypt
+	// for i386, sequentially load expanded keys into xmm6/xmm7
+	movups	208(ctx), %xmm6
+	pxor    %xmm3, %xmm1
+	pxor    %xmm3, %xmm2
+	pxor    %xmm3, %xmm4
+	pxor    %xmm3, %xmm5
+
+	movups	192(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	176(ctx), %xmm6
+	aesdec  %xmm7, %xmm1
+	aesdec	%xmm7, %xmm2
+	aesdec	%xmm7, %xmm4
+	aesdec	%xmm7, %xmm5
+
+	movups	160(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	144(ctx), %xmm6
+	aesdec	%xmm7, %xmm1
+	aesdec	%xmm7, %xmm2
+	aesdec	%xmm7, %xmm4
+	aesdec	%xmm7, %xmm5
+
+	movups	128(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	112(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	96(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	80(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	64(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	48(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	32(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+	movups	16(ctx), %xmm6
+    aesdec  %xmm7, %xmm1
+    aesdec  %xmm7, %xmm2
+    aesdec  %xmm7, %xmm4
+    aesdec  %xmm7, %xmm5
+
+	movups	0(ctx), %xmm7
+    aesdec  %xmm6, %xmm1
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm6, %xmm4
+    aesdec  %xmm6, %xmm5
+
+    aesdeclast  %xmm7, %xmm1
+    aesdeclast  %xmm7, %xmm2
+    aesdeclast  %xmm7, %xmm4
+    aesdeclast  %xmm7, %xmm5
+
+	pxor	iv, %xmm1				// 1st obuf ^= iv; 
+	movups	(ibuf), iv				// 1st memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm2				// 2nd obuf ^= iv; 
+	movups	16(ibuf), iv			// 2nd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm4				// 3rd obuf ^= iv; 
+	movups	32(ibuf), iv			// 3rd memcpy(iv, tmp, AES_BLOCK_SIZE);
+	pxor	iv, %xmm5				// 4th obuf ^= iv; 
+	movups	48(ibuf), iv			// 4th memcpy(iv, tmp, AES_BLOCK_SIZE);
+	movups	%xmm1, (obuf)			// write 1st obuf
+	movups	%xmm2, 16(obuf)			// write 2nd obuf
+	movups	%xmm4, 32(obuf)			// write 3rd obuf
+	movups	%xmm5, 48(obuf)			// write 4th obuf
+
+	add		$64, ibuf				// ibuf += AES_BLOCK_SIZE * 4; 
+	add		$64, obuf				// obuf += AES_BLOCK_SIZE * 4;	
+
+	sub		$4, num_blk				// num_blk -= 4
+	jge		0b						// if num_blk > 0, repeat the loop
+
+
+9:	add		$4, num_blk				//	post incremtn num_blk by 4
+	je		L_HW_cbc_done			// if num_blk == 0, no need for forthur processing code
+
+	movups	208(ctx), %xmm4
+	movups	192(ctx), %xmm5
+	movups	176(ctx), %xmm6
+	movups	160(ctx), %xmm7
+
+#endif
+
+0:
+	movups	(ibuf), %xmm2				// tmp = ibuf
+
+	// aes_decrypt
+	pxor	%xmm3, %xmm2
+    aesdec  %xmm4, %xmm2
+    aesdec  %xmm5, %xmm2
+    aesdec  %xmm6, %xmm2
+    aesdec  %xmm7, %xmm2
+#if defined	__x86_64__
+    aesdec  %xmm8, %xmm2
+    aesdec  %xmm9, %xmm2
+    aesdec  %xmm10, %xmm2
+    aesdec  %xmm11, %xmm2
+    aesdec  %xmm12, %xmm2
+    aesdec  %xmm13, %xmm2
+    aesdec  %xmm14, %xmm2
+    aesdec  %xmm15, %xmm2
+#else
+	movups	144(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	128(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	112(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	96(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	80(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	64(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	48(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	32(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+#endif
+	movups	16(ctx), %xmm1
+    aesdec  %xmm1, %xmm2
+	movups	(ctx), %xmm1
+    aesdeclast  %xmm1, %xmm2
+
+	pxor	iv, %xmm2			// obuf ^= iv; 
+	movups	(ibuf), iv			// memcpy(iv, tmp, AES_BLOCK_SIZE);
+
+	movups	%xmm2, (obuf)		// write obuf
+
+	add		$16, ibuf				// ibuf += AES_BLOCK_SIZE; 
+	add		$16, obuf				// obuf += AES_BLOCK_SIZE;	
+	sub		$1, num_blk				// num_blk --
+	jg		0b						// if num_blk > 0, repeat the loop
+
+	jmp		L_HW_cbc_done
+
+	//
+	// --------- END of aes_decrypt_cbc_hw  -------------------
+	//
diff --git a/bsd/crypto/aes/i386/aes_x86_v2.s b/bsd/crypto/aes/i386/aes_x86_v2.s
deleted file mode 100644
index 7ed98adb8..000000000
--- a/bsd/crypto/aes/i386/aes_x86_v2.s
+++ /dev/null
@@ -1,1298 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * ---------------------------------------------------------------------------
- * Copyright (c) 2002, Dr Brian Gladman, Worcester, UK.   All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- * ---------------------------------------------------------------------------
- * Issue 31/01/2006
- *
- * This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h 
- * and the same define to be set here as well. If AES_V2C is set this file 
- * requires the C files aeskey.c and aestab.c for support.
- *
- * This is a full assembler implementation covering encryption, decryption and
- * key scheduling. It uses 2k bytes of tables but its encryption and decryption
- * performance is very close to that obtained using large tables.  Key schedule
- * expansion is slower for both encryption and decryption but this is likely to
- * be offset by the much smaller load that this version places on the processor
- * cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
- * the design of the AES round function used here.
- *
- * This code provides the standard AES block size (128 bits, 16 bytes) and the
- * three standard AES key sizes (128, 192 and 256 bits). It has the same call
- * interface as my C implementation. The ebx, esi, edi and ebp registers are
- * preserved across calls but eax, ecx and edx and the artihmetic status flags
- * are not.
- */
-
-#include <mach/i386/asm.h>
-
-#define AES_128          /* define if AES with 128 bit keys is needed */
-#define AES_192          /* define if AES with 192 bit keys is needed */
-#define AES_256          /* define if AES with 256 bit keys is needed */
-#define AES_VAR          /* define if a variable key size is needed */
-#define ENCRYPTION       /* define if encryption is needed */
-#define DECRYPTION       /* define if decryption is needed */
-#define AES_REV_DKS      /* define if key decryption schedule is reversed */
-
-#ifndef ASM_X86_V2C
-#define ENCRYPTION_KEY_SCHEDULE /* define if enc. key expansion is needed */
-#define DECRYPTION_KEY_SCHEDULE /* define if dec. key expansion is needed */
-#endif
-
-/*
- * The encryption key schedule has the following in memory layout where N is the
- * number of rounds (10, 12 or 14):
- *
- * lo: | input key (round 0)  |  ; each round is four 32-bit words
- *     | encryption round 1   |
- *     | encryption round 2   |
- *     ....
- *     | encryption round N-1 |
- * hi: | encryption round N   |
- *
- * The decryption key schedule is normally set up so that it has the same
- * layout as above by actually reversing the order of the encryption key
- * schedule in memory (this happens when AES_REV_DKS is set):
- *
- * lo: | decryption round 0   | =              | encryption round N   |
- *     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
- *     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
- *     ....                       ....
- *     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
- * hi: | decryption round N   | =              | input key (round 0)  |
- *
- * with rounds except the first and last modified using inv_mix_column()
- * But if AES_REV_DKS is NOT set the order of keys is left as it is for
- * encryption so that it has to be accessed in reverse when used for
- * decryption (although the inverse mix column modifications are done)
- *
- * lo: | decryption round 0   | =              | input key (round 0)  |
- *     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
- *     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
- *     ....                       ....
- *     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
- * hi: | decryption round N   | =              | encryption round N   |
- *
- * This layout is faster when the assembler key scheduling provided here
- * is used.
- */
-
-/* End of user defines */
-
-#ifdef AES_VAR
-#ifndef AES_128
-#define AES_128 
-#endif
-#ifndef AES_192
-#define AES_192 
-#endif
-#ifndef AES_256
-#define AES_256 
-#endif
-#endif
-
-#ifdef AES_VAR
-#define KS_LENGTH 60
-#else
-#ifdef AES_256
-#define KS_LENGTH 60
-#else
-#ifdef AES_192
-#define KS_LENGTH 52
-#else 
-#define KS_LENGTH 44
-#endif
-#endif
-#endif
-
-/*
- * These macros implement stack based local variables
- */
-#define	save(r1)			\
-    movl    %r1, (%esp);
-
-#define	restore(r1)			\
-    movl    (%esp), %r1;
-
-#define	do_call(f, n)			\
-    call    EXT(f);			\
-    addl    $(n), %esp;
-
-/*
- * finite field multiplies by {02}, {04} and {08}
- */
-#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
-#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
-#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
-
-/*
- * finite field multiplies required in table generation
- */
-#define	f3(x) (f2(x) ^ x)
-#define	f9(x) (f8(x) ^ x)
-#define	fb(x) (f8(x) ^ f2(x) ^ x)
-#define	fd(x) (f8(x) ^ f4(x) ^ x)
-#define	fe(x) (f8(x) ^ f4(x) ^ f2(x))
-
-#define	etab_0(x) enc_tab+4(,x,8)
-#define	etab_1(x) enc_tab+3(,x,8)
-#define	etab_2(x) enc_tab+2(,x,8)
-#define	etab_3(x) enc_tab+1(,x,8)
-
-#define	etab_b(x) etab_3(x)
-
-#define	btab_0(x) enc_tab+6(,x,8)
-#define	btab_1(x) enc_tab+5(,x,8)
-#define	btab_2(x) enc_tab+4(,x,8)
-#define	btab_3(x) enc_tab+3(,x,8)
-
-/*
- * ROUND FUNCTION.  Build column[2] on ESI and column[3] on EDI that have the
- * round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
- *
- * Input:
- *
- *   EAX     column[0]
- *   EBX     column[1]
- *   ECX     column[2]
- *   EDX     column[3]
- *   ESI     column key[round][2]
- *   EDI     column key[round][3]
- *   EBP     scratch
- *
- * Output:
- *
- *   EBP     column[0]   unkeyed
- *   EBX     column[1]   unkeyed
- *   ESI     column[2]   keyed
- *   EDI     column[3]   keyed
- *   EAX     scratch
- *   ECX     scratch
- *   EDX     scratch
- */
-#define	rnd_fun(m1, m2)			\
-    roll    $16, %ebx;			\
-					\
-    ## m1 ## _zo(esi, cl, 0, ebp);	\
-    m1(esi, dh, 1, ebp);		\
-    m1(esi, bh, 3, ebp);		\
-    ## m1 ## _zo(edi, dl, 0, ebp);	\
-    m1(edi, ah, 1, ebp);		\
-    m1(edi, bl, 2, ebp);		\
-    ## m2 ## _zo(ebp, al, 0, ebp);	\
-					\
-    shrl    $16, %ebx;			\
-    andl    $0xffff0000, %eax;		\
-    orl     %ebx, %eax;			\
-    shrl    $16, %edx;			\
-					\
-    m1(ebp, ah, 1, ebx);		\
-    m1(ebp, dh, 3, ebx);		\
-    m2(ebx, dl, 2, ebx);		\
-    m1(ebx, ch, 1, edx);		\
-    ## m1 ## _zo(ebx, al, 0, edx);	\
-					\
-    shrl    $16, %eax;			\
-    shrl    $16, %ecx;			\
-					\
-    m1(ebp, cl, 2, edx);		\
-    m1(edi, ch, 3, edx);		\
-    m1(esi, al, 2, edx);		\
-    m1(ebx, ah, 3, edx)
-
-/*
- * Basic MOV and XOR Operations for normal rounds
- */
-#define	nr_xor_zo	nr_xor
-#define	nr_xor(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    xorl    etab_ ## r3(%r4), %r1;
-
-#define	nr_mov_zo	nr_mov
-#define	nr_mov(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    movl    etab_ ## r3(%r4), %r1;
-
-/*
- * Basic MOV and XOR Operations for last round
- */
-
-#if 1
-
-#define	lr_xor_zo(r1, r2, r3, r4)	\
-    movzbl  %r2, %r4;			\
-    movzbl  etab_b(%r4), %r4;		\
-    xor     %r4, %r1;
-
-#define	lr_xor(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    movzbl  etab_b(%r4), %r4;		\
-    shll    $(8*r3), %r4;		\
-    xor     %r4, %r1;
-
-#define	lr_mov_zo(r1, r2, r3, r4)	\
-    movzbl  %r2, %r4;			\
-    movzbl  etab_b(%r4), %r1;
-
-#define	lr_mov(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    movzbl  etab_b(%r4), %r1;		\
-    shll    $(8*r3), %r1;
-
-#else        /* less effective but worth leaving as an option */
-
-#define	lr_xor_zo	lr_xor
-#define	lr_xor(r1, r2, r3, r4)			\
-    movzbl  %r2, %r4;				\
-    mov     btab_ ## r3(%r4), %r4;		\
-    andl    $(0x000000ff << 8 * r3), %r4;	\
-    xor     %r4, %r1;
-
-#define	lr_mov_zo	lr_mov
-#define	lr_mov(r1, r2, r3, r4)			\
-    movzbl  %r2, %r4;				\
-    mov     btab_ ## r3(%r4), %r1;		\
-    andl    $(0x000000ff << 8 * r3), %r1;
-
-#endif
-
-/*
- * Apply S-Box to the 4 bytes in a 32-bit word and rotate left 3 byte positions
- *
- *   r1 : output is xored into this register
- *   r2 : input: a => eax, b => ebx, c => ecx, d => edx
- *   r3 : scratch register
- */
-
-#define	l3s_col(r1, r2, r3)			\
-    lr_xor_zo(r1, ## r2 ## h, 0, r3);		\
-    lr_xor(r1, ## r2 ## l, 3, r3);		\
-    shrl    $16, %e ## r2 ## x;			\
-    lr_xor(r1, ## r2 ## h, 2, r3);		\
-    lr_xor(r1, ## r2 ## l, 1, r3);
-
-/*
- * offsets to parameters
- */
-#define	in_blk		4	/* input byte array address parameter */
-#define	out_blk		8	/* output byte array address parameter */
-#define	ctx		12	/* AES context structure */
-#define	stk_spc		20	/* stack space */
-
-#ifdef  ENCRYPTION
-
-#define ENCRYPTION_TABLE 
-
-#define	enc_round			\
-    addl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    rnd_fun(nr_xor, nr_mov);		\
-					\
-    movl    %ebp, %eax;			\
-    movl    %esi, %ecx;			\
-    movl    %edi, %edx;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-#define enc_last_round			\
-    addl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    rnd_fun(lr_xor, lr_mov);		\
-					\
-    movl    %ebp, %eax;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-    .section __TEXT, __text
-
-/*
- * AES Encryption Subroutine
- */
-Entry(aes_encrypt)
-
-    subl    $stk_spc, %esp
-    movl    %ebp, 16(%esp)
-    movl    %ebx, 12(%esp)
-    movl    %esi, 8(%esp)
-    movl    %edi, 4(%esp)
-
-    movl    in_blk+stk_spc(%esp), %esi	/* input pointer */
-    movl    (%esi), %eax
-    movl    4(%esi), %ebx
-    movl    8(%esi), %ecx
-    movl    12(%esi), %edx
-
-    movl    ctx+stk_spc(%esp), %ebp	/* key pointer */
-    movzbl  4*KS_LENGTH(%ebp), %edi
-    xorl    (%ebp), %eax
-    xorl    4(%ebp), %ebx
-    xorl    8(%ebp), %ecx
-    xorl    12(%ebp), %edx
-
-    /*
-     * determine the number of rounds
-     */
-    cmpl    $10*16, %edi
-    je     aes_encrypt.3
-    cmpl    $12*16, %edi
-    je     aes_encrypt.2
-    cmpl    $14*16, %edi
-    je      aes_encrypt.1
-    movl    $-1, %eax
-    jmp     aes_encrypt.5
-
-aes_encrypt.1:
-    enc_round
-    enc_round
-aes_encrypt.2:
-    enc_round
-    enc_round
-aes_encrypt.3:
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_round
-    enc_last_round
-
-    movl    out_blk+stk_spc(%esp), %edx
-    movl    %eax, (%edx)
-    movl    %ebx, 4(%edx)
-    movl    %esi, 8(%edx)
-    movl    %edi, 12(%edx)
-    xorl    %eax, %eax
-
-aes_encrypt.5:
-    movl    16(%esp), %ebp
-    movl    12(%esp), %ebx
-    movl    8(%esp), %esi
-    movl    4(%esp), %edi
-    addl    $stk_spc, %esp
-    ret
-
-#endif
-
-/*
- * For r2 == 16, or r2 == 24 && r1 == 7, or r2 ==32 && r1 == 6
- */
-#define	f_key(r1, r2, rc_val)		\
-    l3s_col(esi, a, ebx);		\
-    xorl    $rc_val, %esi;		\
-					\
-    movl    %esi, r1*r2(%ebp);		\
-    xorl    %esi, %edi;			\
-    movl    %edi, r1*r2+4(%ebp);	\
-    xorl    %edi, %ecx;			\
-    movl    %ecx, r1*r2+8(%ebp);	\
-    xorl    %ecx, %edx;			\
-    movl    %edx, r1*r2+12(%ebp);	\
-    movl    %edx, %eax;
-
-/*
- * For r2 == 24 && r1 == 0 to 6
- */
-#define	f_key_24(r1, r2, rc_val)	\
-    f_key(r1, r2, rc_val);		\
-					\
-    xorl    r1*r2+16-r2(%ebp), %eax;	\
-    movl    %eax, r1*r2+16(%ebp);	\
-    xorl    r1*r2+20-r2(%ebp), %eax;	\
-    movl    %eax, r1*r2+20(%ebp);
-
-/*
- * For r2 ==32 && r1 == 0 to 5
- */
-#define	f_key_32(r1, r2, rc_val)	\
-    f_key(r1, r2, rc_val);		\
-					\
-    roll    $8, %eax;			\
-    pushl   %edx;			\
-    movl    r1*r2+16-r2(%ebp), %edx;	\
-    l3s_col(edx, a, ebx);		\
-    movl    %edx, %eax;			\
-    popl    %edx;			\
-    movl    %eax, r1*r2+16(%ebp);	\
-    xorl    r1*r2+20-r2(%ebp), %eax;	\
-    movl    %eax, r1*r2+20(%ebp);	\
-    xorl    r1*r2+24-r2(%ebp), %eax;	\
-    movl    %eax, r1*r2+24(%ebp);	\
-    xorl    r1*r2+28-r2(%ebp), %eax;	\
-    movl    %eax, r1*r2+28(%ebp);
-
-#ifdef ENCRYPTION_KEY_SCHEDULE
-
-#ifdef  AES_128
-
-#ifndef ENCRYPTION_TABLE
-#define ENCRYPTION_TABLE 
-#endif
-
-Entry(aes_encrypt_key128)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-
-    movl    24(%esp), %ebp
-    movl    $10*16, 4*KS_LENGTH(%ebp)
-    movl    20(%esp), %ebx
-
-    movl    (%ebx), %esi
-    movl    %esi, (%ebp)
-    movl    4(%ebx), %edi
-    movl    %edi, 4(%ebp)
-    movl    8(%ebx), %ecx
-    movl    %ecx, 8(%ebp)
-    movl    12(%ebx), %edx
-    movl    %edx, 12(%ebp)
-    addl    $16, %ebp
-    movl    %edx, %eax
-
-    f_key(0, 16, 1)
-    f_key(1, 16, 2)
-    f_key(2, 16, 4)
-    f_key(3, 16, 8)
-    f_key(4, 16, 16)
-    f_key(5, 16, 32)
-    f_key(6, 16, 64)
-    f_key(7, 16, 128)
-    f_key(8, 16, 27)
-    f_key(9, 16, 54)
-
-    popl    %edi
-    popl    %esi
-    popl    %ebx
-    popl    %ebp
-    xorl    %eax, %eax
-    ret
-
-#endif
-
-#ifdef  AES_192
-
-#ifndef ENCRYPTION_TABLE
-#define ENCRYPTION_TABLE 
-#endif
-
-Entry(aes_encrypt_key192)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-
-    movl    24(%esp), %ebp
-    movl    $12*16, 4*KS_LENGTH(%ebp)
-    movl    20(%esp), %ebx
-
-    movl    (%ebx), %esi
-    movl    %esi, (%ebp)
-    movl    4(%ebx), %edi
-    movl    %edi, 4(%ebp)
-    movl    8(%ebx), %ecx
-    movl    %ecx, 8(%ebp)
-    movl    12(%ebx), %edx
-    movl    %edx, 12(%ebp)
-    movl    16(%ebx), %eax
-    movl    %eax, 16(%ebp)
-    movl    20(%ebx), %eax
-    movl    %eax, 20(%ebp)
-    addl    $24, %ebp
-
-    f_key_24(0, 24, 1)
-    f_key_24(1, 24, 2)
-    f_key_24(2, 24, 4)
-    f_key_24(3, 24, 8)
-    f_key_24(4, 24, 16)
-    f_key_24(5, 24, 32)
-    f_key_24(6, 24, 64)
-    f_key(7, 24, 128)
-
-    popl    %edi
-    popl    %esi
-    popl    %ebx
-    popl    %ebp
-    xorl    %eax, %eax
-    ret
-
-#endif
-
-#ifdef  AES_256
-
-#ifndef ENCRYPTION_TABLE
-#define ENCRYPTION_TABLE 
-#endif
-
-Entry(aes_encrypt_key256)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-
-    movl    24(%esp), %ebp
-    movl    $14*16, 4*KS_LENGTH(%ebp)
-    movl    20(%esp), %ebx
-
-    movl    (%ebx), %esi
-    movl    %esi, (%ebp)
-    movl    4(%ebx), %edi
-    movl    %edi, 4(%ebp)
-    movl    8(%ebx), %ecx
-    movl    %ecx, 8(%ebp)
-    movl    12(%ebx), %edx
-    movl    %edx, 12(%ebp)
-    movl    16(%ebx), %eax
-    movl    %eax, 16(%ebp)
-    movl    20(%ebx), %eax
-    movl    %eax, 20(%ebp)
-    movl    24(%ebx), %eax
-    movl    %eax, 24(%ebp)
-    movl    28(%ebx), %eax
-    movl    %eax, 28(%ebp)
-    addl    $32, %ebp
-
-    f_key_32(0, 32, 1)
-    f_key_32(1, 32, 2)
-    f_key_32(2, 32, 4)
-    f_key_32(3, 32, 8)
-    f_key_32(4, 32, 16)
-    f_key_32(5, 32, 32)
-    f_key(6, 32, 64)
-
-    popl    %edi
-    popl    %esi
-    popl    %ebx
-    popl    %ebp
-    xorl    %eax, %eax
-    ret
-
-#endif
-
-#ifdef  AES_VAR
-
-#ifndef ENCRYPTION_TABLE
-#define ENCRYPTION_TABLE 
-#endif
-
-Entry(aes_encrypt_key)
-
-    movl    4(%esp), %ecx
-    movl    8(%esp), %eax
-    movl    12(%esp), %edx
-    pushl   %edx
-    pushl   %ecx
-
-    cmpl    $16, %eax
-    je      aes_encrypt_key.1
-    cmpl    $128, %eax
-    je      aes_encrypt_key.1
-
-    cmpl    $24, %eax
-    je      aes_encrypt_key.2
-    cmpl    $192, %eax
-    je      aes_encrypt_key.2
-
-    cmpl    $32, %eax
-    je      aes_encrypt_key.3
-    cmpl    $256, %eax
-    je      aes_encrypt_key.3
-    movl    $-1, %eax
-    addl    $8, %esp
-    ret
-
-aes_encrypt_key.1:
-    do_call(aes_encrypt_key128, 8)
-    ret
-aes_encrypt_key.2:
-    do_call(aes_encrypt_key192, 8)
-    ret
-aes_encrypt_key.3:
-    do_call(aes_encrypt_key256, 8)
-    ret
-
-#endif
-
-#endif
-
-#ifdef ENCRYPTION_TABLE
-
-# S-box data - 256 entries
-
-    .section __DATA, __data
-    .align ALIGN
-
-#define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
-
-enc_tab: 
-   .byte u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
-   .byte u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
-   .byte u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
-   .byte u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
-   .byte u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
-   .byte u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
-   .byte u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
-   .byte u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
-   .byte u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
-   .byte u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
-   .byte u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
-   .byte u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
-   .byte u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
-   .byte u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
-   .byte u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
-   .byte u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
-   .byte u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
-   .byte u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
-   .byte u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
-   .byte u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
-   .byte u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
-   .byte u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
-   .byte u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
-   .byte u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
-   .byte u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
-   .byte u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
-   .byte u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
-   .byte u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
-   .byte u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
-   .byte u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
-   .byte u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
-   .byte u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
-
-#endif
-
-#ifdef  DECRYPTION
-
-#define DECRYPTION_TABLE 
-
-#define dtab_0(x) dec_tab(,x,8)
-#define dtab_1(x) dec_tab+3(,x,8)
-#define dtab_2(x) dec_tab+2(,x,8)
-#define dtab_3(x) dec_tab+1(,x,8)
-#define dtab_x(x) dec_tab+7(,x,8)
-
-#define	irn_fun(m1, m2)			\
-    roll    $16, %eax;			\
-					\
-    ## m1 ## _zo(esi, cl, 0, ebp);	\
-    m1(esi, bh, 1, ebp);		\
-    m1(esi, al, 2, ebp);		\
-    ## m1 ## _zo(edi, dl, 0, ebp);	\
-    m1(edi, ch, 1, ebp);		\
-    m1(edi, ah, 3, ebp);		\
-    ## m2 ## _zo(ebp, bl, 0, ebp);	\
-					\
-    shrl    $16, %eax;			\
-    andl    $0xffff0000, %ebx;		\
-    orl     %eax, %ebx;			\
-    shrl    $16, %ecx;			\
-					\
-    m1(ebp, bh, 1, eax);		\
-    m1(ebp, ch, 3, eax);		\
-    m2(eax, cl, 2, ecx);		\
-    ## m1 ## _zo(eax, bl, 0, ecx);	\
-    m1(eax, dh, 1, ecx);		\
-					\
-    shrl    $16, %ebx;			\
-    shrl    $16, %edx;			\
-					\
-    m1(esi, dh, 3, ecx);		\
-    m1(ebp, dl, 2, ecx);		\
-    m1(eax, bh, 3, ecx);		\
-    m1(edi, bl, 2, ecx);
-
-/*
- * Basic MOV and XOR Operations for normal rounds
- */
-#define	ni_xor_zo	ni_xor
-#define	ni_xor(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    xorl    dtab_ ## r3 ## (%r4), %r1;
-
-#define	ni_mov_zo	ni_mov
-#define	ni_mov(r1, r2, r3, r4)		\
-    movzbl  %r2, %r4;			\
-    movl    dtab_ ## r3 ## (%r4), %r1;
-
-/*
- * Basic MOV and XOR Operations for last round
- */
-
-#define	li_xor_zo(r1, r2, r3, r4)	\
-    movzbl %r2, %r4;			\
-    movzbl dtab_x(%r4), %r4;		\
-    xor    %r4, %r1;
-
-#define	li_xor(r1, r2, r3, r4)		\
-    movzbl %r2, %r4;			\
-    movzbl dtab_x(%r4), %r4;		\
-    shll   $(8*r3), %r4;		\
-    xor    %r4, %r1;
-
-#define	li_mov_zo(r1, r2, r3, r4)	\
-    movzbl %r2, %r4;			\
-    movzbl dtab_x(%r4), %r1;
-
-#define	li_mov(r1, r2, r3, r4)		\
-    movzbl %r2, %r4;			\
-    movzbl dtab_x(%r4), %r1;		\
-    shl    $(8*r3), %r1;
-
-#ifdef AES_REV_DKS
-
-#define	dec_round			\
-    addl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    irn_fun(ni_xor, ni_mov);		\
-					\
-    movl    %ebp, %ebx;			\
-    movl    %esi, %ecx;			\
-    movl    %edi, %edx;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-#define	dec_last_round			\
-    addl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    irn_fun(li_xor, li_mov);		\
-					\
-    movl    %ebp, %ebx;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-#else
-
-#define	dec_round			\
-    subl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    irn_fun(ni_xor, ni_mov);		\
-					\
-    movl    %ebp, %ebx;			\
-    movl    %esi, %ecx;			\
-    movl    %edi, %edx;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-#define	dec_last_round			\
-    subl    $16, %ebp;			\
-    save(ebp);				\
-    movl    8(%ebp), %esi;		\
-    movl    12(%ebp), %edi;		\
-					\
-    irn_fun(li_xor, li_mov);		\
-					\
-    movl    %ebp, %ebx;			\
-    restore(ebp);			\
-    xorl    (%ebp), %eax;		\
-    xorl    4(%ebp), %ebx;
-
-#endif /* AES_REV_DKS */
-
-    .section __TEXT, __text
-
-/*
- * AES Decryption Subroutine
- */
-Entry(aes_decrypt)
-
-    subl    $stk_spc, %esp
-    movl    %ebp, 16(%esp)
-    movl    %ebx, 12(%esp)
-    movl    %esi, 8(%esp)
-    movl    %edi, 4(%esp)
-
-    /*
-     * input four columns and xor in first round key
-     */
-    movl    in_blk+stk_spc(%esp), %esi	/* input pointer */
-    movl    (%esi), %eax
-    movl    4(%esi), %ebx
-    movl    8(%esi), %ecx
-    movl    12(%esi), %edx
-    leal    16(%esi), %esi
-
-    movl    ctx+stk_spc(%esp), %ebp	/* key pointer */
-    movzbl  4*KS_LENGTH(%ebp), %edi
-#ifndef  AES_REV_DKS		/* if decryption key schedule is not reversed */
-    leal    (%ebp,%edi), %ebp	/* we have to access it from the top down */
-#endif
-    xorl    (%ebp), %eax	/* key schedule */
-    xorl    4(%ebp), %ebx
-    xorl    8(%ebp), %ecx
-    xorl    12(%ebp), %edx
-
-    /*
-     * determine the number of rounds
-     */
-    cmpl    $10*16, %edi
-    je     aes_decrypt.3
-    cmpl    $12*16, %edi
-    je     aes_decrypt.2
-    cmpl    $14*16, %edi
-    je      aes_decrypt.1
-    movl    $-1, %eax
-    jmp     aes_decrypt.5
-
-aes_decrypt.1:
-    dec_round
-    dec_round
-aes_decrypt.2:
-    dec_round
-    dec_round
-aes_decrypt.3:
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_round
-    dec_last_round
-
-    /*
-     * move final values to the output array.
-     */
-    movl    out_blk+stk_spc(%esp), %ebp
-    movl    %eax, (%ebp)
-    movl    %ebx, 4(%ebp)
-    movl    %esi, 8(%ebp)
-    movl    %edi, 12(%ebp)
-    xorl    %eax, %eax
-
-aes_decrypt.5:
-    movl    16(%esp), %ebp
-    movl    12(%esp), %ebx
-    movl    8(%esp), %esi
-    movl    4(%esp), %edi
-    addl    $stk_spc, %esp
-    ret
-
-#endif
-
-#define	inv_mix_col			\
-    movzbl  %dl, %ebx;			\
-    movzbl  etab_b(%ebx), %ebx;		\
-    movl    dtab_0(%ebx), %eax;		\
-    movzbl  %dh, %ebx;			\
-    shrl    $16, %edx;			\
-    movzbl  etab_b(%ebx), %ebx;		\
-    xorl    dtab_1(%ebx), %eax;		\
-    movzbl  %dl, %ebx;			\
-    movzbl  etab_b(%ebx), %ebx;		\
-    xorl    dtab_2(%ebx), %eax;		\
-    movzbl  %dh, %ebx;			\
-    movzbl  etab_b(%ebx), %ebx;		\
-    xorl    dtab_3(%ebx), %eax;
-
-#ifdef DECRYPTION_KEY_SCHEDULE
-
-#ifdef AES_128
-
-#ifndef DECRYPTION_TABLE
-#define DECRYPTION_TABLE 
-#endif
-
-Entry(aes_decrypt_key128)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-    movl    24(%esp), %eax	/* context */
-    movl    20(%esp), %edx	/* key */
-    pushl   %eax
-    pushl   %edx
-    do_call(aes_encrypt_key128, 8)
-    movl    $10*16, %eax
-    movl    24(%esp), %esi	/* pointer to first round key */
-    leal    (%esi,%eax), %edi	/* pointer to last round key */
-    addl    $32, %esi
-				/* the inverse mix column transformation */
-    movl    -16(%esi), %edx	/* needs to be applied to all round keys */
-    inv_mix_col
-    movl    %eax, -16(%esi)	/* transforming the four sub-keys in the */
-    movl    -12(%esi), %edx	/* second round key */
-    inv_mix_col
-    movl    %eax, -12(%esi)	/* transformations for subsequent rounds */
-    movl    -8(%esi), %edx	/* can then be made more efficient by */
-    inv_mix_col
-    movl    %eax, -8(%esi)	/* in the encryption round key ek[r]: */
-    movl    -4(%esi), %edx
-    inv_mix_col
-    movl    %eax, -4(%esi)	/* where n is 1..3. Hence the corresponding */
-
-aes_decrypt_key128.0:
-    movl    (%esi), %edx	/* subkeys in the decryption round key dk[r] */
-    inv_mix_col
-    movl    %eax, (%esi)	/* GF(256): */
-    xorl    -12(%esi), %eax
-    movl    %eax, 4(%esi)	/* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
-    xorl    -8(%esi), %eax
-    movl    %eax, 8(%esi)	/* So we only need one inverse mix column */
-    xorl    -4(%esi), %eax	/* operation (n = 0) for each four word cycle */
-    movl    %eax, 12(%esi)	/* in the expanded key. */
-    addl    $16, %esi
-    cmpl    %esi, %edi
-    jg      aes_decrypt_key128.0
-    jmp     dec_end
-
-#endif
-
-#ifdef AES_192
-
-#ifndef DECRYPTION_TABLE
-#define DECRYPTION_TABLE 
-#endif
-
-Entry(aes_decrypt_key192)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-    movl    24(%esp), %eax	/* context */
-    movl    20(%esp), %edx	/* key */
-    pushl   %eax
-    pushl   %edx
-    do_call(aes_encrypt_key192, 8)
-    movl    $12*16, %eax
-    movl    24(%esp), %esi	/* first round key */
-    leal    (%esi,%eax), %edi	/* last round key */
-    addl    $48, %esi		/* the first 6 words are the key, of */
-				/* which the top 2 words are part of */
-    movl    -32(%esi), %edx	/* the second round key and hence */
-    inv_mix_col
-    movl    %eax, -32(%esi)	/* need to do a further six values prior */
-    movl    -28(%esi), %edx	/* to using a more efficient technique */
-    inv_mix_col
-    movl    %eax, -28(%esi)
-				/* dk[r][n] = dk[r][n-1] ^ dk[r-1][n] */
-    movl    -24(%esi), %edx
-    inv_mix_col
-    movl    %eax, -24(%esi)	/* cycle is now 6 words long */
-    movl    -20(%esi), %edx
-    inv_mix_col
-    movl    %eax, -20(%esi)
-    movl    -16(%esi), %edx
-    inv_mix_col
-    movl    %eax, -16(%esi)
-    movl    -12(%esi), %edx
-    inv_mix_col
-    movl    %eax, -12(%esi)
-    movl    -8(%esi), %edx
-    inv_mix_col
-    movl    %eax, -8(%esi)
-    movl    -4(%esi), %edx
-    inv_mix_col
-    movl    %eax, -4(%esi)
-
-aes_decrypt_key192.0:
-    movl    (%esi), %edx	/* expanded key is 13 * 4 = 44 32-bit words */
-    inv_mix_col
-    movl    %eax, (%esi)	/* using inv_mix_col.  We have already done 8 */
-    xorl    -20(%esi), %eax	/* of these so 36 are left - hence we need */
-    movl    %eax, 4(%esi)	/* exactly 6 loops of six here */
-    xorl    -16(%esi), %eax
-    movl    %eax, 8(%esi)
-    xorl    -12(%esi), %eax
-    movl    %eax, 12(%esi)
-    xorl    -8(%esi), %eax
-    movl    %eax, 16(%esi)
-    xorl    -4(%esi), %eax
-    movl    %eax, 20(%esi)
-    addl    $24, %esi
-    cmpl    %esi, %edi
-    jg      aes_decrypt_key192.0
-    jmp     dec_end
-
-#endif
-
-#ifdef AES_256
-
-#ifndef DECRYPTION_TABLE
-#define DECRYPTION_TABLE 
-#endif
-
-Entry(aes_decrypt_key256)
-
-    pushl   %ebp
-    pushl   %ebx
-    pushl   %esi
-    pushl   %edi
-    movl    24(%esp), %eax
-    movl    20(%esp), %edx
-    pushl   %eax
-    pushl   %edx
-    do_call(aes_encrypt_key256, 8)
-    movl    $14*16, %eax
-    movl    24(%esp), %esi
-    leal    (%esi,%eax), %edi
-    addl    $64, %esi
-
-    movl    -48(%esi), %edx	/* the primary key is 8 words, of which */
-    inv_mix_col
-    movl    %eax, -48(%esi)
-    movl    -44(%esi), %edx
-    inv_mix_col
-    movl    %eax, -44(%esi)
-    movl    -40(%esi), %edx
-    inv_mix_col
-    movl    %eax, -40(%esi)
-    movl    -36(%esi), %edx
-    inv_mix_col
-    movl    %eax, -36(%esi)
-
-    movl    -32(%esi), %edx	/* the encryption key expansion cycle is */
-    inv_mix_col
-    movl    %eax, -32(%esi)	/* start by doing one complete block */
-    movl    -28(%esi), %edx
-    inv_mix_col
-    movl    %eax, -28(%esi)
-    movl    -24(%esi), %edx
-    inv_mix_col
-    movl    %eax, -24(%esi)
-    movl    -20(%esi), %edx
-    inv_mix_col
-    movl    %eax, -20(%esi)
-    movl    -16(%esi), %edx
-    inv_mix_col
-    movl    %eax, -16(%esi)
-    movl    -12(%esi), %edx
-    inv_mix_col
-    movl    %eax, -12(%esi)
-    movl    -8(%esi), %edx
-    inv_mix_col
-    movl    %eax, -8(%esi)
-    movl    -4(%esi), %edx
-    inv_mix_col
-    movl    %eax, -4(%esi)
-
-aes_decrypt_key256.0:
-    movl    (%esi), %edx	/* we can now speed up the remaining */
-    inv_mix_col
-    movl    %eax, (%esi)	/* outlined earlier.  But note that */
-    xorl    -28(%esi), %eax	/* there is one extra inverse mix */
-    movl    %eax, 4(%esi)	/* column operation as the 256 bit */
-    xorl    -24(%esi), %eax	/* key has an extra non-linear step */
-    movl    %eax, 8(%esi)	/* for the midway element. */
-    xorl    -20(%esi), %eax
-    movl    %eax, 12(%esi)	/* the expanded key is 15 * 4 = 60 */
-    movl    16(%esi), %edx	/* 32-bit words of which 52 need to */
-    inv_mix_col
-    movl    %eax, 16(%esi)	/* 12 so 40 are left - which means */
-    xorl    -12(%esi), %eax	/* that we need exactly 5 loops of 8 */
-    movl    %eax, 20(%esi)
-    xorl    -8(%esi), %eax
-    movl    %eax, 24(%esi)
-    xorl    -4(%esi), %eax
-    movl    %eax, 28(%esi)
-    addl    $32, %esi
-    cmpl    %esi, %edi
-    jg      aes_decrypt_key256.0
-
-#endif
-
-dec_end: 
-
-#ifdef AES_REV_DKS
-
-    movl    24(%esp), %esi	/* this reverses the order of the */
-dec_end.1:
-    movl    (%esi), %eax	/* round keys if required */
-    movl    4(%esi), %ebx
-    movl    (%edi), %ebp
-    movl    4(%edi), %edx
-    movl    %ebp, (%esi)
-    movl    %edx, 4(%esi)
-    movl    %eax, (%edi)
-    movl    %ebx, 4(%edi)
-
-    movl    8(%esi), %eax
-    movl    12(%esi), %ebx
-    movl    8(%edi), %ebp
-    movl    12(%edi), %edx
-    movl    %ebp, 8(%esi)
-    movl    %edx, 12(%esi)
-    movl    %eax, 8(%edi)
-    movl    %ebx, 12(%edi)
-
-    addl    $16, %esi
-    subl    $16, %edi
-    cmpl    %esi, %edi
-    jg      dec_end.1
-
-#endif
-
-    popl    %edi
-    popl    %esi
-    popl    %ebx
-    popl    %ebp
-    xorl    %eax, %eax
-    ret
-
-#ifdef AES_VAR
-
-Entry(aes_decrypt_key)
-
-    movl    4(%esp), %ecx
-    movl    8(%esp), %eax
-    movl    12(%esp), %edx
-    pushl   %edx
-    pushl   %ecx
-
-    cmpl    $16, %eax
-    je      aes_decrypt_key.1
-    cmpl    $128, %eax
-    je      aes_decrypt_key.1
-
-    cmpl    $24, %eax
-    je      aes_decrypt_key.2
-    cmpl    $192, %eax
-    je      aes_decrypt_key.2
-
-    cmpl    $32, %eax
-    je      aes_decrypt_key.3
-    cmpl    $256, %eax
-    je      aes_decrypt_key.3
-    movl    $-1, %eax
-    addl    $8, %esp
-    ret
-
-aes_decrypt_key.1:
-    do_call(aes_decrypt_key128, 8)
-    ret
-aes_decrypt_key.2:
-    do_call(aes_decrypt_key192, 8)
-    ret
-aes_decrypt_key.3:
-    do_call(aes_decrypt_key256, 8)
-    ret
-
-#endif
-
-#endif
-
-#ifdef DECRYPTION_TABLE
-
-/*
- * Inverse S-box data - 256 entries
- */
-
-    .section __DATA, __data
-    .align ALIGN
-
-#define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
-
-dec_tab: 
-   .byte v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
-   .byte v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
-   .byte v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
-   .byte v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
-   .byte v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
-   .byte v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
-   .byte v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
-   .byte v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
-   .byte v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
-   .byte v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
-   .byte v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
-   .byte v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
-   .byte v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
-   .byte v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
-   .byte v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
-   .byte v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
-   .byte v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
-   .byte v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
-   .byte v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
-   .byte v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
-   .byte v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
-   .byte v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
-   .byte v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
-   .byte v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
-   .byte v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
-   .byte v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
-   .byte v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
-   .byte v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
-   .byte v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
-   .byte v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
-   .byte v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
-   .byte v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
-
-#endif
diff --git a/bsd/crypto/aes/i386/aesopt.h b/bsd/crypto/aes/i386/aesopt.h
deleted file mode 100644
index 025eb5fcf..000000000
--- a/bsd/crypto/aes/i386/aesopt.h
+++ /dev/null
@@ -1,719 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 31/01/2006
-
- This file contains the compilation options for AES (Rijndael) and code
- that is common across encryption, key scheduling and table generation.
-
- OPERATION
-
- These source code files implement the AES algorithm Rijndael designed by
- Joan Daemen and Vincent Rijmen. This version is designed for the standard
- block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
- and 32 bytes).
-
- This version is designed for flexibility and speed using operations on
- 32-bit words rather than operations on bytes.  It can be compiled with
- either big or little endian internal byte order but is faster when the
- native byte order for the processor is used.
-
- THE CIPHER INTERFACE
-
- The cipher interface is implemented as an array of bytes in which lower
- AES bit sequence indexes map to higher numeric significance within bytes.
-
-  uint_8t                 (an unsigned  8-bit type)
-  uint_32t                (an unsigned 32-bit type)
-  struct aes_encrypt_ctx  (structure for the cipher encryption context)
-  struct aes_decrypt_ctx  (structure for the cipher decryption context)
-  aes_rval                the function return type
-
-  C subroutine calls:
-
-  aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_encrypt_ctx cx[1]);
-
-  aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_decrypt_ctx cx[1]);
-
- IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
- you call gen_tabs() before AES is used so that the tables are initialised.
-
- C++ aes class subroutines:
-
-     Class AESencrypt  for encryption
-
-      Construtors:
-          AESencrypt(void)
-          AESencrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval encrypt(const unsigned char *in, unsigned char *out) const
-
-      Class AESdecrypt  for encryption
-      Construtors:
-          AESdecrypt(void)
-          AESdecrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval decrypt(const unsigned char *in, unsigned char *out) const
-*/
-
-#if !defined( _AESOPT_H )
-#define _AESOPT_H
-
-#if defined( __cplusplus )
-#include "aescpp.h"
-#else
-#include "crypto/aes.h"
-#endif
-
-/*  PLATFORM SPECIFIC INCLUDES */
-
-#include "edefs.h"
-
-/*  CONFIGURATION - THE USE OF DEFINES
-
-    Later in this section there are a number of defines that control the
-    operation of the code.  In each section, the purpose of each define is
-    explained so that the relevant form can be included or excluded by
-    setting either 1's or 0's respectively on the branches of the related
-    #if clauses.  The following local defines should not be changed.
-*/
-
-#define ENCRYPTION_IN_C     1
-#define DECRYPTION_IN_C     2
-#define ENC_KEYING_IN_C     4
-#define DEC_KEYING_IN_C     8
-
-#define NO_TABLES           0
-#define ONE_TABLE           1
-#define FOUR_TABLES         4
-#define NONE                0
-#define PARTIAL             1
-#define FULL                2
-
-/*  --- START OF USER CONFIGURED OPTIONS --- */
-
-/*  1. BYTE ORDER WITHIN 32 BIT WORDS
-
-    The fundamental data processing units in Rijndael are 8-bit bytes. The
-    input, output and key input are all enumerated arrays of bytes in which
-    bytes are numbered starting at zero and increasing to one less than the
-    number of bytes in the array in question. This enumeration is only used
-    for naming bytes and does not imply any adjacency or order relationship
-    from one byte to another. When these inputs and outputs are considered
-    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
-    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
-    In this implementation bits are numbered from 0 to 7 starting at the
-    numerically least significant end of each byte (bit n represents 2^n).
-
-    However, Rijndael can be implemented more efficiently using 32-bit
-    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
-    into word[n]. While in principle these bytes can be assembled into words
-    in any positions, this implementation only supports the two formats in
-    which bytes in adjacent positions within words also have adjacent byte
-    numbers. This order is called big-endian if the lowest numbered bytes
-    in words have the highest numeric significance and little-endian if the
-    opposite applies.
-
-    This code can work in either order irrespective of the order used by the
-    machine on which it runs. Normally the internal byte order will be set
-    to the order of the processor on which the code is to be run but this
-    define can be used to reverse this in special situations
-
-    WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
-    This define will hence be redefined later (in section 4) if necessary
-*/
-
-#if 1 
-#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-#elif 0
-#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
-#elif 0
-#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
-#else
-#error The algorithm byte order is not defined
-#endif
-
-/*  2. VIA ACE SUPPORT
-
-    Define this option if support for the VIA ACE is required. This uses 
-    inline assembler instructions and is only implemented for the Microsoft, 
-    Intel and GCC compilers.  If VIA ACE is known to be present, then defining
-    ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption 
-    code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
-    it is detected (both present and enabled) but the normal AES code will 
-    also be present. 
-    
-    When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte 
-    aligned; other input/output buffers do not need to be 16 byte aligned 
-    but there are very large performance gains if this can be arranged.  
-    VIA ACE also requires the decryption key schedule to be in reverse 
-    order (which the following defines ensure).
-*/
-
-#if 0 && !defined( _WIN64 ) && !defined( USE_VIA_ACE_IF_PRESENT )
-#define USE_VIA_ACE_IF_PRESENT
-#endif
-
-#if 0 && !defined( _WIN64 ) && !defined( ASSUME_VIA_ACE_PRESENT )
-#define ASSUME_VIA_ACE_PRESENT
-#endif
-
-/*  3. ASSEMBLER SUPPORT
-
-    This define (which can be on the command line) enables the use of the
-    assembler code routines for encryption, decryption and key scheduling
-    as follows:
-
-    ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for 
-                encryption and decryption and but with key scheduling in C
-    ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
-                encryption, decryption and key scheduling
-    ASM_X86_V2C	uses assembler (aes_x86_v2.asm) with compressed tables for
-                encryption and decryption and but with key scheduling in C
-    ASM_AMD64_C	uses assembler (aes_amd64.asm) with compressed tables for
-                encryption and decryption and but with key scheduling in C
-
-    Change one 'if 0' below to 'if 1' to select the version or define 
-    as a compilation option.
-*/
-
-#if defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )
-#  if defined( _M_IX86 )
-#    if 0 && !defined( ASM_X86_V1C )
-#      define ASM_X86_V1C
-#    elif 0 && !defined( ASM_X86_V2  )
-#      define ASM_X86_V2
-#    elif 0 && !defined( ASM_X86_V2C )
-#      define ASM_X86_V2C
-#    endif
-#  else
-#    error Assembler code is only available for x86 and AMD64 systems
-#  endif
-#elif defined( ASM_AMD64_C )
-#  if defined( _M_X64 )
-#    if 0 && !defined( ASM_AMD64_C )
-#      define ASM_AMD64_C
-#    endif
-#  else
-#    error Assembler code is only available for x86 and AMD64 systems
-#  endif
-#endif
-
-/*  4. FAST INPUT/OUTPUT OPERATIONS.
-
-    On some machines it is possible to improve speed by transferring the
-    bytes in the input and output arrays to and from the internal 32-bit
-    variables by addressing these arrays as if they are arrays of 32-bit
-    words.  On some machines this will always be possible but there may
-    be a large performance penalty if the byte arrays are not aligned on
-    the normal word boundaries. On other machines this technique will
-    lead to memory access errors when such 32-bit word accesses are not
-    properly aligned. The option SAFE_IO avoids such problems but will
-    often be slower on those machines that support misaligned access
-    (especially so if care is taken to align the input  and output byte
-    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
-    assumed that access to byte arrays as if they are arrays of 32-bit
-    words will not cause problems when such accesses are misaligned.
-*/
-#if 1 && !defined( _MSC_VER )
-#define SAFE_IO
-#endif
-
-/*  5. LOOP UNROLLING
-
-    The code for encryption and decrytpion cycles through a number of rounds
-    that can be implemented either in a loop or by expanding the code into a
-    long sequence of instructions, the latter producing a larger program but
-    one that will often be much faster. The latter is called loop unrolling.
-    There are also potential speed advantages in expanding two iterations in
-    a loop with half the number of iterations, which is called partial loop
-    unrolling.  The following options allow partial or full loop unrolling
-    to be set independently for encryption and decryption
-*/
-#if 1
-#define ENC_UNROLL  FULL
-#elif 0
-#define ENC_UNROLL  PARTIAL
-#else
-#define ENC_UNROLL  NONE
-#endif
-
-#if 1
-#define DEC_UNROLL  FULL
-#elif 0
-#define DEC_UNROLL  PARTIAL
-#else
-#define DEC_UNROLL  NONE
-#endif
-
-/*  6. FAST FINITE FIELD OPERATIONS
-
-    If this section is included, tables are used to provide faster finite
-    field arithmetic (this has no effect if FIXED_TABLES is defined).
-*/
-#if 1
-#define FF_TABLES
-#endif
-
-/*  7. INTERNAL STATE VARIABLE FORMAT
-
-    The internal state of Rijndael is stored in a number of local 32-bit
-    word varaibles which can be defined either as an array or as individual
-    names variables. Include this section if you want to store these local
-    varaibles in arrays. Otherwise individual local variables will be used.
-*/
-#if 1
-#define ARRAYS
-#endif
-
-/*  8. FIXED OR DYNAMIC TABLES
-
-    When this section is included the tables used by the code are compiled
-    statically into the binary file.  Otherwise the subroutine gen_tabs()
-    must be called to compute them before the code is first used.
-*/
-#if 0 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) 
-#define FIXED_TABLES
-#endif
-
-/*  9. TABLE ALIGNMENT
-
-    On some sytsems speed will be improved by aligning the AES large lookup
-    tables on particular boundaries. This define should be set to a power of
-    two giving the desired alignment. It can be left undefined if alignment
-    is not needed.  This option is specific to the Microsft VC++ compiler -
-    it seems to sometimes cause trouble for the VC++ version 6 compiler.
-*/
-
-#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
-#define TABLE_ALIGN 32
-#endif
-
-/*  10. TABLE OPTIONS
-    
-    This cipher proceeds by repeating in a number of cycles known as 'rounds'
-    which are implemented by a round function which can optionally be speeded
-    up using tables.  The basic tables are each 256 32-bit words, with either
-    one or four tables being required for each round function depending on
-    how much speed is required. The encryption and decryption round functions
-    are different and the last encryption and decrytpion round functions are
-    different again making four different round functions in all.
-
-    This means that:
-      1. Normal encryption and decryption rounds can each use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-      2. The last encryption and decryption rounds can also use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-
-    Include or exclude the appropriate definitions below to set the number
-    of tables used by this implementation.
-*/
-
-#if 1   /* set tables for the normal encryption round */
-#define ENC_ROUND   FOUR_TABLES
-#elif 0
-#define ENC_ROUND   ONE_TABLE
-#else
-#define ENC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last encryption round */
-#define LAST_ENC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_ENC_ROUND  ONE_TABLE
-#else
-#define LAST_ENC_ROUND  NO_TABLES
-#endif
-
-#if 1   /* set tables for the normal decryption round */
-#define DEC_ROUND   FOUR_TABLES
-#elif 0
-#define DEC_ROUND   ONE_TABLE
-#else
-#define DEC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last decryption round */
-#define LAST_DEC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_DEC_ROUND  ONE_TABLE
-#else
-#define LAST_DEC_ROUND  NO_TABLES
-#endif
-
-/*  The decryption key schedule can be speeded up with tables in the same
-    way that the round functions can.  Include or exclude the following
-    defines to set this requirement.
-*/
-#if 1
-#define KEY_SCHED   FOUR_TABLES
-#elif 0
-#define KEY_SCHED   ONE_TABLE
-#else
-#define KEY_SCHED   NO_TABLES
-#endif
-
-/*  ---- END OF USER CONFIGURED OPTIONS ---- */
-
-/* VIA ACE support is only available for VC++ and GCC */
-
-#if !defined( _MSC_VER ) && !defined( __GNUC__ )
-#  if defined( ASSUME_VIA_ACE_PRESENT )
-#    undef ASSUME_VIA_ACE_PRESENT
-#  endif
-#  if defined( USE_VIA_ACE_IF_PRESENT )
-#    undef USE_VIA_ACE_IF_PRESENT
-#  endif
-#endif
-
-#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
-#define USE_VIA_ACE_IF_PRESENT
-#endif
-
-#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
-#define AES_REV_DKS
-#endif
-
-/* Assembler support requires the use of platform byte order */
-
-#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
-#undef  ALGORITHM_BYTE_ORDER
-#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-#endif
-
-/* In this implementation the columns of the state array are each held in
-   32-bit words. The state array can be held in various ways: in an array
-   of words, in a number of individual word variables or in a number of
-   processor registers. The following define maps a variable name x and
-   a column number c to the way the state array variable is to be held.
-   The first define below maps the state into an array x[c] whereas the
-   second form maps the state into a number of individual variables x0,
-   x1, etc.  Another form could map individual state colums to machine
-   register names.
-*/
-
-#if defined( ARRAYS )
-#define s(x,c) x[c]
-#else
-#define s(x,c) x##c
-#endif
-
-/*  This implementation provides subroutines for encryption, decryption
-    and for setting the three key lengths (separately) for encryption
-    and decryption. Since not all functions are needed, masks are set 
-    up here to determine which will be implemented in C
-*/
-
-#if !defined( AES_ENCRYPT )
-#  define EFUNCS_IN_C   0
-#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C )
-    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
-#  define EFUNCS_IN_C   ENC_KEYING_IN_C
-#elif !defined( ASM_X86_V2 )
-#  define EFUNCS_IN_C   ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
-#else
-#  define EFUNCS_IN_C   0
-#endif
-
-#if !defined( AES_DECRYPT )
-#  define DFUNCS_IN_C   0
-#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C )
-    || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )	
-#  define DFUNCS_IN_C   DEC_KEYING_IN_C
-#elif !defined( ASM_X86_V2 )
-#  define DFUNCS_IN_C   ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
-#else
-#  define DFUNCS_IN_C   0
-#endif
-
-#define FUNCS_IN_C  ( EFUNCS_IN_C | DFUNCS_IN_C )
-
-/* END OF CONFIGURATION OPTIONS */
-
-#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
-
-/* Disable or report errors on some combinations of options */
-
-#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  NO_TABLES
-#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  ONE_TABLE
-#endif
-
-#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
-#undef  ENC_UNROLL
-#define ENC_UNROLL  NONE
-#endif
-
-#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  NO_TABLES
-#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  ONE_TABLE
-#endif
-
-#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
-#undef  DEC_UNROLL
-#define DEC_UNROLL  NONE
-#endif
-
-#if defined( bswap32 )
-#define aes_sw32    bswap32
-#elif defined( bswap_32 )
-#define aes_sw32    bswap_32
-#else
-#define brot(x,n)   (((uint_32t)(x) <<  n) | ((uint_32t)(x) >> (32 - n)))
-#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
-#endif
-
-/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
-               higher index positions with wrap around into low positions
-    ups(x,n):  moves bytes by n positions to higher index positions in
-               words but without wrap around
-    bval(x,n): extracts a byte from a word
-
-    WARNING:   The definitions given here are intended only for use with
-               unsigned variables and with shift counts that are compile
-               time constants
-*/
-
-#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
-#define upr(x,n)        (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n))))
-#define ups(x,n)        ((uint_32t) (x) << (8 * (n)))
-#define bval(x,n)       ((uint_8t)((x) >> (8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0))
-#endif
-
-#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
-#define upr(x,n)        (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n))))
-#define ups(x,n)        ((uint_32t) (x) >> (8 * (n)))
-#define bval(x,n)       ((uint_8t)((x) >> (24 - 8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3))
-#endif
-
-#if defined( SAFE_IO )
-
-#define word_in(x,c)    bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \
-                                   ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3])
-#define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \
-                          ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); }
-
-#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
-
-#define word_in(x,c)    (*((uint_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v))
-
-#else
-
-#define word_in(x,c)    aes_sw32(*((uint_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v))
-
-#endif
-
-/* the finite field modular polynomial and elements */
-
-#define WPOLY   0x011b
-#define BPOLY     0x1b
-
-/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
-
-#define m1  0x80808080
-#define m2  0x7f7f7f7f
-#define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
-
-/* The following defines provide alternative definitions of gf_mulx that might
-   give improved performance if a fast 32-bit multiply is not available. Note
-   that a temporary variable u needs to be defined where gf_mulx is used.
-
-#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
-#define m4  (0x01010101 * BPOLY)
-#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
-*/
-
-/* Work out which tables are needed for the different options   */
-
-#if defined( ASM_X86_V1C )
-#if defined( ENC_ROUND )
-#undef  ENC_ROUND
-#endif
-#define ENC_ROUND   FOUR_TABLES
-#if defined( LAST_ENC_ROUND )
-#undef  LAST_ENC_ROUND
-#endif
-#define LAST_ENC_ROUND  FOUR_TABLES
-#if defined( DEC_ROUND )
-#undef  DEC_ROUND
-#endif
-#define DEC_ROUND   FOUR_TABLES
-#if defined( LAST_DEC_ROUND )
-#undef  LAST_DEC_ROUND
-#endif
-#define LAST_DEC_ROUND  FOUR_TABLES
-#if defined( KEY_SCHED )
-#undef  KEY_SCHED
-#define KEY_SCHED   FOUR_TABLES
-#endif
-#endif
-
-#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
-#if ENC_ROUND == ONE_TABLE
-#define FT1_SET
-#elif ENC_ROUND == FOUR_TABLES
-#define FT4_SET
-#else
-#define SBX_SET
-#endif
-#if LAST_ENC_ROUND == ONE_TABLE
-#define FL1_SET
-#elif LAST_ENC_ROUND == FOUR_TABLES
-#define FL4_SET
-#elif !defined( SBX_SET )
-#define SBX_SET
-#endif
-#endif
-
-#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
-#if DEC_ROUND == ONE_TABLE
-#define IT1_SET
-#elif DEC_ROUND == FOUR_TABLES
-#define IT4_SET
-#else
-#define ISB_SET
-#endif
-#if LAST_DEC_ROUND == ONE_TABLE
-#define IL1_SET
-#elif LAST_DEC_ROUND == FOUR_TABLES
-#define IL4_SET
-#elif !defined(ISB_SET)
-#define ISB_SET
-#endif
-#endif
-
-#if (FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)
-#if KEY_SCHED == ONE_TABLE
-#define LS1_SET
-#elif KEY_SCHED == FOUR_TABLES
-#define LS4_SET
-#elif !defined( SBX_SET )
-#define SBX_SET
-#endif
-#endif
-
-#if (FUNCS_IN_C & DEC_KEYING_IN_C)
-#if KEY_SCHED == ONE_TABLE
-#define IM1_SET
-#elif KEY_SCHED == FOUR_TABLES
-#define IM4_SET
-#elif !defined( SBX_SET )
-#define SBX_SET
-#endif
-#endif
-
-/* generic definitions of Rijndael macros that use tables    */
-
-#define no_table(x,box,vf,rf,c) bytes2word( \
-    box[bval(vf(x,0,c),rf(0,c))], \
-    box[bval(vf(x,1,c),rf(1,c))], \
-    box[bval(vf(x,2,c),rf(2,c))], \
-    box[bval(vf(x,3,c),rf(3,c))])
-
-#define one_table(x,op,tab,vf,rf,c) \
- (     tab[bval(vf(x,0,c),rf(0,c))] \
-  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
-  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
-  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
-
-#define four_tables(x,tab,vf,rf,c) \
- (  tab[0][bval(vf(x,0,c),rf(0,c))] \
-  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((8+r-c)&3)
-
-/* perform forward and inverse column mix operation on four bytes in long word x in */
-/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
-
-#if defined( FM4_SET )    /* not currently used */
-#define fwd_mcol(x)       four_tables(x,t_use(f,m),vf1,rf1,0)
-#elif defined( FM1_SET )  /* not currently used */
-#define fwd_mcol(x)       one_table(x,upr,t_use(f,m),vf1,rf1,0)
-#else
-#define dec_fmvars        uint_32t g2
-#define fwd_mcol(x)       (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
-#endif
-
-#if defined( IM4_SET )
-#define inv_mcol(x)       four_tables(x,t_use(i,m),vf1,rf1,0)
-#elif defined( IM1_SET )
-#define inv_mcol(x)       one_table(x,upr,t_use(i,m),vf1,rf1,0)
-#else
-#define dec_imvars        uint_32t g2, g4, g9
-#define inv_mcol(x)       (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
-                          (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
-#endif
-
-#if defined( FL4_SET )
-#define ls_box(x,c)       four_tables(x,t_use(f,l),vf1,rf2,c)
-#elif   defined( LS4_SET )
-#define ls_box(x,c)       four_tables(x,t_use(l,s),vf1,rf2,c)
-#elif defined( FL1_SET )
-#define ls_box(x,c)       one_table(x,upr,t_use(f,l),vf1,rf2,c)
-#elif defined( LS1_SET )
-#define ls_box(x,c)       one_table(x,upr,t_use(l,s),vf1,rf2,c)
-#else
-#define ls_box(x,c)     no_table(x,t_use(s,box),vf1,rf2,c)
-#endif
-
-#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
-#define ISB_SET
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/i386/aesxts.c b/bsd/crypto/aes/i386/aesxts.c
new file mode 100644
index 000000000..c0eaaa609
--- /dev/null
+++ b/bsd/crypto/aes/i386/aesxts.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All Rights Reserved.
+ * 
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include "aesxts.h"
+#include <sys/types.h>
+#include <string.h>
+#include <libkern/libkern.h>
+
+int 
+aes_encrypt_key(const uint8_t *key, int key_len, aesedp_encrypt_ctx cx[1]);
+
+int 
+aes_decrypt_key(const uint8_t *key, int key_len, aesedp_decrypt_ctx cx[1]);
+
+int
+aes_encrypt(const uint8_t *Plaintext, uint8_t *Ciphertext, aesedp_encrypt_ctx *ctx);
+
+int
+aes_decrypt(const uint8_t *Ciphertext, uint8_t *Plaintext, aesedp_decrypt_ctx *ctx);
+
+
+/* error codes [will be expanded in future releases] */
+enum {
+   CRYPT_OK=0,             /* Result OK */
+   CRYPT_ERROR=1,            /* Generic Error */
+   CRYPT_INVALID_KEYSIZE=3,  /* Invalid key size given */
+   CRYPT_INVALID_ARG=16,      /* Generic invalid argument */
+};
+
+static int 
+aesedp_keysize(int *keysize)
+{
+	switch (*keysize) {
+		case 16:
+		case 24:
+		case 32:
+			return CRYPT_OK;
+		default:
+			return CRYPT_INVALID_KEYSIZE;
+	}
+}
+
+static int 
+aesedp_setup(const uint8_t *key, int keylen, int num_rounds __unused, aesedp_ctx *skey)
+{
+	aesedp_ctx *ctx = (aesedp_ctx *) skey;
+	int retval;
+
+	if((retval = aesedp_keysize(&keylen)) != CRYPT_OK) return retval;
+	if((retval = aes_encrypt_key(key, keylen, &ctx->encrypt)) != CRYPT_OK) return CRYPT_ERROR;
+	if((retval = aes_decrypt_key(key, keylen, &ctx->decrypt)) != CRYPT_OK) return CRYPT_ERROR;
+	return CRYPT_OK;
+}
+
+#ifdef ZZZNEVER
+static int 
+aesedp_ecb_encrypt(const uint8_t *pt, uint8_t *ct, aesedp_ctx *skey)
+{
+	aesedp_ctx *ctx = (aesedp_ctx *) skey;
+	return aes_encrypt(pt, ct, &ctx->encrypt);
+}
+
+
+
+static int 
+aesedp_ecb_decrypt(const uint8_t *ct, uint8_t *pt, aesedp_ctx *skey)
+{
+	return aes_decrypt(ct, pt, &skey->decrypt);
+}
+#endif
+
+
+static void 
+aesedp_done(aesedp_ctx *skey __unused)
+{
+}
+
+/** Start XTS mode
+   @param cipher      The index of the cipher to use
+   @param key1        The encrypt key
+   @param key2        The tweak encrypt key
+   @param keylen      The length of the keys (each) in octets
+   @param num_rounds  The number of rounds for the cipher (0 == default)
+   @param xts         [out] XTS structure
+   Returns CRYPT_OK upon success.
+*/
+
+uint32_t
+xts_start(uint32_t cipher, // ignored - we're doing this for xts-aes only
+						const uint8_t *IV __unused, // ignored
+						const uint8_t *key1, int keylen,
+						const uint8_t *key2, int tweaklen __unused, // both keys are the same size for xts
+						uint32_t num_rounds, // ignored
+						uint32_t options __unused,    // ignored
+						symmetric_xts *xts)
+{
+   uint32_t err;
+
+   /* check inputs */
+   if((key1 == NULL)|| (key2 == NULL) || (xts == NULL)) return CRYPT_INVALID_ARG;
+
+   /* schedule the two ciphers */
+   if ((err = aesedp_setup(key1, keylen, num_rounds, &xts->key1)) != 0) {
+      return err;
+   }
+   if ((err = aesedp_setup(key2, keylen, num_rounds, &xts->key2)) != 0) {
+      return err;
+   }
+   xts->cipher = cipher;
+
+   return err;
+}
+
+
+
+
+/** multiply by x 
+  @param I      The value to multiply by x (LFSR shift)
+*/
+#if defined __x86_64__ || defined __i386__
+extern void xts_mult_x(uint8_t *I);
+#else
+static void xts_mult_x(uint8_t *I)
+{
+  uint32_t x;
+  uint8_t t, tt;
+
+  for (x = t = 0; x < 16; x++) {
+     tt   = I[x] >> 7;
+     I[x] = ((I[x] << 1) | t) & 0xFF;
+     t    = tt;
+  }
+  if (tt) {
+     I[0] ^= 0x87;
+  } 
+}
+#endif
+
+#if defined __x86_64__ || defined __i386__
+extern int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
+extern int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim);
+#else
+static int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx)
+{
+   uint32_t x;
+   uint32_t err;
+
+   /* tweak encrypt block i */
+   for (x = 0; x < 16; x += sizeof(uint64_t)) {
+      *((uint64_t*)&C[x]) = *((uint64_t*)&P[x]) ^ *((uint64_t*)&T[x]);
+   }
+     
+   if ((err = aes_encrypt(C, C, ctx)) != CRYPT_OK) {
+      return CRYPT_INVALID_KEYSIZE;
+   }
+
+   for (x = 0; x < 16; x += sizeof(uint64_t)) {
+      *((uint64_t*)&C[x]) ^= *((uint64_t*)&T[x]);
+   }
+
+   /* LFSR the tweak */
+   xts_mult_x(T);
+
+   return CRYPT_OK;
+}   
+#endif
+
+/** XTS Encryption
+  @param pt     [in]  Plaintext
+  @param ptlen  Length of plaintext (and ciphertext)
+  @param ct     [out] Ciphertext
+  @param tweak  [in] The 128--bit encryption tweak (e.g. sector number)
+  @param xts    The XTS structure
+  Returns CRYPT_OK upon success
+*/
+int xts_encrypt(
+   const uint8_t *pt, unsigned long ptlen,
+         uint8_t *ct,
+   const uint8_t *tweak,
+         symmetric_xts *xts)
+{
+   aesedp_encrypt_ctx *encrypt_ctx = &xts->key1.encrypt;
+   uint8_t PP[16], CC[16], T[16];
+   uint32_t i, m, mo, lim;
+   uint32_t err;
+
+   /* check inputs */
+   if((pt == NULL) || (ct == NULL)|| (tweak == NULL) || (xts == NULL)) return 1;
+
+   /* get number of blocks */
+   m  = ptlen >> 4;
+   mo = ptlen & 15;
+
+	/* must have at least one full block */
+   if (m == 0) {
+      return CRYPT_INVALID_ARG;
+   }
+
+   /* encrypt the tweak */
+   if ((err = aes_encrypt(tweak, T, &xts->key2.encrypt)) != 0) {
+      return CRYPT_INVALID_KEYSIZE;
+   }
+
+   /* for i = 0 to m-2 do */
+   if (mo == 0) {
+      lim = m;
+   } else {
+      lim = m - 1;
+   }
+
+#if defined __x86_64__ || defined __i386__
+	if (lim>0) {
+		err = tweak_crypt_group(pt, ct, T, encrypt_ctx, lim);
+      	ct += (lim<<4);
+      	pt += (lim<<4);
+	}
+#else
+   for (i = 0; i < lim; i++) {
+      err = tweak_crypt(pt, ct, T, encrypt_ctx);
+      ct += 16;
+      pt += 16;
+   }
+#endif
+   
+   /* if ptlen not divide 16 then */
+   if (mo > 0) {
+      /* CC = tweak encrypt block m-1 */
+      if ((err = tweak_crypt(pt, CC, T, encrypt_ctx)) != 0) {
+         return err;
+      }
+
+      /* Cm = first ptlen % 16 bytes of CC */
+      for (i = 0; i < mo; i++) {
+          PP[i] = pt[16+i];
+          ct[16+i] = CC[i];
+      }
+
+      for (; i < 16; i++) {
+          PP[i] = CC[i];
+      }
+
+      /* Cm-1 = Tweak encrypt PP */
+      if ((err = tweak_crypt(PP, ct, T, encrypt_ctx)) != 0) {
+         return err;
+      }
+   }
+
+   return err;
+}
+
+#if defined __x86_64__ || defined __i386__
+extern int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
+extern int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim);
+#else
+static int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx)
+{
+   uint32_t x;
+   uint32_t err;
+
+   /* tweak encrypt block i */
+   for (x = 0; x < 16; x += sizeof(uint64_t)) {
+      *((uint64_t*)&P[x]) = *((uint64_t*)&C[x]) ^ *((uint64_t*)&T[x]);
+   }
+     
+   err = aes_decrypt(P, P, ctx);  
+
+   for (x = 0; x < 16; x += sizeof(uint64_t)) {
+      *((uint64_t*)&P[x]) ^=  *((uint64_t*)&T[x]);
+   }
+
+   /* LFSR the tweak */
+   xts_mult_x(T);
+
+   return err;
+}   
+#endif
+
+/** XTS Decryption
+  @param ct     [in] Ciphertext
+  @param ptlen  Length of plaintext (and ciphertext)
+  @param pt     [out]  Plaintext
+  @param tweak  [in] The 128--bit encryption tweak (e.g. sector number)
+  @param xts    The XTS structure
+  Returns CRYPT_OK upon success
+*/
+
+int xts_decrypt(
+   const uint8_t *ct, unsigned long ptlen,
+         uint8_t *pt,
+   const uint8_t *tweak,
+         symmetric_xts *xts)
+{
+   aesedp_decrypt_ctx *decrypt_ctx = &xts->key1.decrypt;
+   uint8_t PP[16], CC[16], T[16];
+   uint32_t i, m, mo, lim;
+   uint32_t err;
+
+   /* check inputs */
+   if((pt == NULL) || (ct == NULL)|| (tweak == NULL) || (xts == NULL)) return 1;
+
+   /* get number of blocks */
+   m  = ptlen >> 4;
+   mo = ptlen & 15;
+
+   /* must have at least one full block */
+   if (m == 0) {
+      return CRYPT_INVALID_ARG;
+   }
+
+   /* encrypt the tweak , yes - encrypt */
+   if ((err = aes_encrypt(tweak, T, &xts->key2.encrypt)) != 0) {
+      return CRYPT_INVALID_KEYSIZE;
+   }
+
+   /* for i = 0 to m-2 do */
+   if (mo == 0) {
+      lim = m;
+   } else {
+      lim = m - 1;
+   }
+
+#if defined __x86_64__ || defined __i386__
+	if (lim>0) {
+		err = tweak_uncrypt_group(ct, pt, T, decrypt_ctx, lim);
+      	ct += (lim<<4);
+      	pt += (lim<<4);
+	}
+#else
+   for (i = 0; i < lim; i++) {
+      err = tweak_uncrypt(ct, pt, T, decrypt_ctx);
+      ct += 16;
+      pt += 16;
+   }
+#endif
+   
+   /* if ptlen not divide 16 then */
+   if (mo > 0) {
+      memcpy(CC, T, 16);
+      xts_mult_x(CC);
+
+      /* PP = tweak decrypt block m-1 */
+      if ((err = tweak_uncrypt(ct, PP, CC, decrypt_ctx)) != CRYPT_OK) {
+        return err;
+      }
+
+      /* Pm = first ptlen % 16 bytes of PP */
+      for (i = 0; i < mo; i++) {
+          CC[i]    = ct[16+i];
+          pt[16+i] = PP[i];
+      }
+      for (; i < 16; i++) {
+          CC[i] = PP[i];
+      }
+
+      /* Pm-1 = Tweak uncrypt CC */
+      if ((err = tweak_uncrypt(CC, pt, T, decrypt_ctx)) != CRYPT_OK) {
+        return err;
+      }
+   }
+
+   return CRYPT_OK;
+}
+
+
+
+void xts_done(symmetric_xts *xts)
+{
+   if(xts == NULL) return;
+   aesedp_done(&xts->key1);
+   aesedp_done(&xts->key2);
+}
+
diff --git a/bsd/crypto/aes/i386/aesxts.h b/bsd/crypto/aes/i386/aesxts.h
new file mode 100644
index 000000000..fe7618066
--- /dev/null
+++ b/bsd/crypto/aes/i386/aesxts.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All Rights Reserved.
+ * 
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ *  aesxts.h
+ *
+ *
+ */
+
+#include "stdint.h"
+
+
+#ifndef _AESXTS_H
+#define _AESXTS_H
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+/*
+ * The context for XTS-AES
+ */
+
+
+#define KS_LENGTH       60
+
+typedef struct {   
+	uint32_t ks[KS_LENGTH];
+	uint32_t rn;
+} aesedp_encrypt_ctx;
+
+typedef struct {   
+	uint32_t ks[KS_LENGTH];
+	uint32_t rn;
+} aesedp_decrypt_ctx;
+
+typedef struct {   
+	aesedp_decrypt_ctx decrypt;
+	aesedp_encrypt_ctx encrypt;
+} aesedp_ctx;
+
+// xts mode context
+
+typedef struct {
+   aesedp_ctx				key1, key2;
+   uint32_t						cipher; // ignore - this is to fit with the library, but in this case we're only using aes
+} symmetric_xts;
+
+
+/*
+ * These are the interfaces required for XTS-AES support
+ */
+ 
+uint32_t
+xts_start(uint32_t cipher, // ignored - we're doing this for xts-aes only
+						const uint8_t *IV, // ignored
+						const uint8_t *key1, int keylen,
+						const uint8_t *key2, int tweaklen, // both keys are the same size for xts
+						uint32_t num_rounds, // ignored
+						uint32_t options,    // ignored
+						symmetric_xts *xts);
+
+int xts_encrypt(
+   const uint8_t *pt, unsigned long ptlen,
+         uint8_t *ct,
+   const uint8_t *tweak, // this can be considered the sector IV for this use
+         symmetric_xts *xts);
+		 
+int xts_decrypt(
+   const uint8_t *ct, unsigned long ptlen,
+         uint8_t *pt,
+   const uint8_t *tweak, // this can be considered the sector IV for this use
+         symmetric_xts *xts);
+
+
+void xts_done(symmetric_xts *xts);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _AESXTS_H */
\ No newline at end of file
diff --git a/bsd/crypto/aes/i386/aesxts_asm.s b/bsd/crypto/aes/i386/aesxts_asm.s
new file mode 100644
index 000000000..ec6b924b7
--- /dev/null
+++ b/bsd/crypto/aes/i386/aesxts_asm.s
@@ -0,0 +1,1305 @@
+/*
+	This file "aesxts.s" provides x86_64 / i386 optimization of the following functions
+
+	0. xts_mult_x_on_xmm7 : a code macro that is used throughout all other functions
+	1. void xts_mult_x(uint8_t *I);
+	2. int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
+	3. int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim); 
+	4. int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
+	5. int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim); 
+
+	This file should be compiled together with xtsClearC.c
+
+	functions 1,2,4 are supposed to replace the C functions in xtsClearC.c for x86_64/i386 architectures
+	functions 3,5 are only given here, no C code is available, they are called in xts_encrypt/xts_decrypt (xtsClearC.c)
+	  - we can possibly add C code for functions 3 and 5 for future porting to other architectures
+
+	cclee 4-29-10
+
+*/
+
+#ifdef KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+#define	CRYPT_OK	0		// can not include "crypt.h" in which CRYPT_OK is from enum  
+
+/*
+	The following macro is used throughout the functions in this file.
+	It is the core function within the function xts_mult_x defined in (xtsClearC.c)
+
+	upon entry, %xmm7 = the input tweak (128-bit), 
+	on return, %xmm7 = the updated tweak (128-bit)
+	the macro uses %xmm1/%xmm2/%ecx in the computation
+	the operation can be described as follows :
+	0. let x = %xmm7; 					// 128-bit little-endian input 
+	1. x = rotate_left(x,1);			// rotate left by 1 -bit
+	2. if (x&1) x ^= 0x0000...0086;		// if least significant bit = 1, least significant byte ^= 0x86;
+	3. return x; 
+
+	It's a pity that SSE does not support shifting of the whole 128-bit xmm registers.
+	The workaround is 
+		1. using parallel dual quad (8-byte) shifting, 1 for the 2 bottom 63-bits, 1 for the 2 leading bits
+		2. manipulating the shifted quad words to form the 128-bit shifted result.
+
+	Input : %xmm7
+	Output : %xmm7
+	Used : %xmm1/%xmm2/%ecx
+
+	The macro is good for both x86_64 and i386. 
+
+*/	
+
+	.macro		xts_mult_x_on_xmm7			// input : x = %xmm7, MS = most significant, LS = least significant
+	movaps		%xmm7, %xmm1				// %xmm1 = a copy of x 
+	movaps		%xmm7, %xmm2				// %xmm2 = a copy of x
+	psllq		$$1, %xmm7					// 1-bit left shift of 2 quad words (x1<<1, x0<<1), zero-filled 
+	psrlq		$$63, %xmm1					// 2 leading bits, each in the least significant bit of a quad word 
+	psrad		$$31, %xmm2					// the MS 32-bit will be either 0 or -1, depending on the MS bit of x
+	pshufd		$$0xc6, %xmm1, %xmm1		// switch the positions of the 2 leading bits
+	pshufd		$$0x03, %xmm2, %xmm2		// the LS 32-bit will be either 0 or -1, depending on the MS bit of x
+	por			%xmm1, %xmm7				// we finally has %xmm7 = rotate_left(x,1);
+	movl		$$0x86, %ecx				// a potential byte to xor the bottom byte
+	movd		%ecx, %xmm1					// copy it to %xmm1, the other is 0
+	pand		%xmm2, %xmm1				// %xmm1 = 0 or 0x86, depending on the MS bit of x
+	pxor		%xmm1, %xmm7				// rotate_left(x,1) ^= 0 or 0x86 depending on the MS bit of x
+	.endm
+
+
+/* 
+	function : void xts_mult_x(uint8_t *I);
+
+	1. load (__m128*) (I) into xmm7
+	2. macro xts_mult_x_on_xmm7 (i/o @ xmm7, used xmm1/xmm2/ecx) 
+	3. save output (%xmm7) to memory pointed by I
+
+	input : 16-byte memory pointed by I
+	output : same 16-byte memory pointed by I
+
+	if kernel code, xmm1/xmm2/xmm7 saved and restored
+	other used registers : eax/ecx
+
+ */	
+	.text
+	.align  4,0x90
+	.globl	_xts_mult_x
+_xts_mult_x:
+
+#if defined __x86_64__
+	#define	I 	%rdi						// 1st argument at %rdi for x86_64
+	#define	sp	%rsp
+#else
+	mov		4(%esp), %eax					// 1st argument at stack, offset 4 for ret_addr for i386
+	#define	I	%eax
+	#define	sp	%esp
+#endif	
+
+	// if KERNEL code, allocate memory and save xmm1/xmm2/xmm7
+#ifdef	KERNEL
+#if defined __x86_64__
+	sub		$0x38, sp						// 8-bytes alignment + 3 * 16 bytes	
+#else
+	sub		$0x3c, sp						// 12-bytes alignment + 3 * 16 bytes 
+#endif
+	movaps	%xmm1, (sp)
+	movaps	%xmm2, 16(sp)
+	movaps	%xmm7, 32(sp)
+#endif
+
+	// load, compute, and save
+	movups	(I), %xmm7						// load input tweak 128-bit into %xmm7
+	xts_mult_x_on_xmm7						// the macro (also used else where) will update %xmm7 as the output
+	movups	%xmm7, (I)						// save the xts_mult_x output 
+
+	// if KERNEL code, restore xmm1/xmm2/xmm7 and deallocate stack memory
+#ifdef	KERNEL
+	movaps	(sp), %xmm1
+	movaps	16(sp), %xmm2
+	movaps	32(sp), %xmm7
+#if defined __x86_64__
+	add		$0x38, sp						// 8-bytes alignment + 3 * 16 bytes	
+#else
+	add		$0x3c, sp						// 12-bytes alignment + 3 * 16 bytes	
+#endif
+#endif
+
+	ret										// return
+
+	#undef	I
+	#undef	sp
+
+/* 
+	The following is x86_64/i386 assembly implementation of 
+
+	int tweak_crypt(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx);
+
+	Its C code implementation is given in xtsClearC.c
+
+	all pointers P/C/T points to a block of 16 bytes. In the following description, P/C/T represent 128-bit data.
+
+	The operation of tweak_crypt
+
+	1. C = P ^ T
+	2. err = aes_encryp(C, C, ctx); if (err != CRYPT_OK) return err;
+	3. C = C ^ T
+	4. xts_mult_x(T)
+	5. return CRYPT_OK;
+
+	The following is the assembly implementation flow
+
+	1. save used xmm registers (xmm1/xmm7) if kernel code 
+	2. load xmm1 = P, xmm7 = T
+	3. xmm1 = C = P ^ T
+	4. write xmm1 to C
+	5. call aes_encryp(C,C,ctx); note that it will use aesni if available, also xmm will return intact
+	6. load xmm1 = C
+	7. xmm1 = C = C^T = xmm1 ^ xmm7
+	8. write xmm1 to C
+	9. update T (in xmm7) via xts_mult_x macro
+	a. restore xmm registers (xmm1/xmm7) if kernel code
+	b. return CRYPT_OK (in eax) 
+
+	Note: used xmm registers : xmm1/xmm2/xmm7, xmm2 in xts_mult_x macro
+
+*/
+
+	.text
+	.align  4,0x90
+	.globl	_tweak_crypt
+_tweak_crypt:
+#if defined	__i386__
+
+	// push into stack for local use
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx
+	push	%edi
+	push	%esi
+
+	// alllocate stack memory for local use
+	sub		$12+16*4, %esp				// 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
+
+	// load with called arguments
+	mov		8(%ebp), %eax				// P, we need this only briefly, so eax is fine
+	mov		12(%ebp), %edi				// C
+	mov		16(%ebp), %ebx				// T
+	mov		20(%ebp), %esi				// ctx
+
+	#define	P	%eax
+	#define	C	%edi
+	#define	T	%ebx
+	#define	ctx	%esi
+	#define	sp	%esp
+
+#else
+	// x86_64 calling argument order : rdi/rsi/rdx/rcx/r8
+
+	// push into stack for local use
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	// alllocate stack memory for local use, if kernel code, need to save/restore xmm registers
+#ifdef KERNEL
+	sub		$4*16, %rsp					// only need 3*16, add 16 extra so to make save/restore xmm common to i386
+#endif
+
+	// load with called arguments, release rdi/rsi/rdx/rcx/r8, as need to call aes_encrypt
+	mov		%rsi, %r13
+	mov		%rdx, %r14
+	mov		%rcx, %r15
+
+	#define	P 	%rdi
+	#define	C	%r13
+	#define	T	%r14
+	#define	ctx	%r15
+	#define	sp	%rsp
+
+#endif
+
+	// if kernel, save used xmm registers
+#ifdef	KERNEL
+	movaps	%xmm1, 16(sp)
+	movaps	%xmm2, 32(sp)
+	movaps	%xmm7, 48(sp)
+#endif
+
+	movups	(P), %xmm1					// P
+	movups	(T), %xmm7					// T
+
+	// setup caliing arguments for aes_encrypt
+#if defined	__i386__
+	mov		C, (%esp)					// C
+	mov		C, 4(%esp)					// C
+	mov		ctx, 8(%esp)				// ctx
+#else
+	mov		C, %rdi						// C
+	mov		C, %rsi						// C
+	mov		ctx, %rdx					// ctx
+#endif
+
+	pxor	%xmm7, %xmm1				// C = P ^ T	
+	movups	%xmm1, (C)					// save C into memory
+
+	call	_aes_encrypt				// err = aes_encrypt(C,C,ctx);
+
+	cmp		$CRYPT_OK, %eax				// check err == CRYPT_OK
+	jne		9f							// if err != CRYPT_OK, exit
+
+	movups	(C), %xmm1					// load xmm1 = C
+	pxor	%xmm7, %xmm1				// C ^= T
+	movups	%xmm1, (C)					// write C with xmm1, xmm1 is freed now, will be changed in the following macro
+
+	xts_mult_x_on_xmm7					// update T (on xmm7)
+
+	movups	%xmm7, (T)					// write xmm7 to T
+9:
+
+	// restore used xmm registers if this is for kernel
+#ifdef	KERNEL
+	movaps	16(sp), %xmm1
+	movaps	32(sp), %xmm2
+	movaps	48(sp), %xmm7
+#endif
+
+	// free stack memory and restore callee registers
+#if defined	__i386__
+	add		$12+16*4, %esp				// 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
+	pop		%esi
+	pop		%edi
+	pop		%ebx
+#else
+#ifdef	KERNEL
+	add		$4*16, %rsp					// only need 3*16, add 16 extra so make save/restore xmm common to i386
+#endif
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+#endif
+
+	// return, eax/rax already has the return val
+	leave
+	ret
+
+	#undef	P
+	#undef	C
+	#undef	T
+	#undef	ctx
+	#undef	sp
+
+/* 
+	The following is x86_64/i386 assembly implementation of 
+
+	int tweak_crypt_group(const uint8_t *P, uint8_t *C, uint8_t *T, aesedp_encrypt_ctx *ctx, uint32_t lim);
+
+	TODO : Its C code implementation is YET to be provided in xtsClearC.c (for the benefit of porting to other ISAs)
+	This function is grouped version of the above function tweak_crypt(), so xmm registers save/restore only need
+	to happen once for all grouped blocks.
+
+	The implementation here probes __cpu_capabilities to detect whether aesni (or hw-aes instruction) is available.
+	If aesni is available, the code branch to optimized code that uses aesni.
+
+	The optimized aesni code operates as follows:
+
+	while (more than 4 consecutive blocks available) {
+
+		do xts_mult_x macro 4 times and write the 4 tweaks on stack (16-byte aligned)
+	
+		perform 4 C = P ^ T;	// T is on 16-byte aligned stack
+
+		perform 4 aes_encrypt (all aes_encrypt instruction interleaved to achieve better throughtput)
+
+		perform 4 C = C ^ T		// T is on 16-byte aligned stack
+
+	}
+
+	The code then falls through to the scalar code, that sequentially performs what tweak_crypt does
+
+	1. C = P ^ T
+	2. err = aes_encryp(C, C, ctx); if (err != CRYPT_OK) return err;
+	3. C = C ^ T
+	4. xts_mult_x(T)
+
+	Note: used xmm registers : 
+			xmm0-xmm5, xmm7 if aesni is available
+			xmm0-xmm4, xmm7 if aesni is not available.
+
+*/
+
+    .text
+	.align  4,0x90
+	.globl	_tweak_crypt_group
+_tweak_crypt_group:
+
+#if defined	__i386__
+
+	// push callee-saved registers for local use
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx
+	push	%edi
+	push	%esi
+
+	// allocate stack memory for local use and/or xmm register save for kernel code
+	sub		$(12+8*16+16*4), %esp		// 12 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) aesni
+										// 12 (alignment) + 8*16 (xmm) + 4*16 (only 12 used for aes_encrypt) no aesni 
+	// transfer calling arguments
+	mov		20(%ebp), %eax				// ctx
+	mov		12(%ebp), %edi				// C
+	mov		16(%ebp), %ebx				// T
+	mov		8(%ebp), %esi				// P
+	mov		%eax, 8(%esp)				// ctx as the 3rd parameter to aes_decrypt
+
+	#define	P	%esi
+	#define	C	%edi
+	#define	T	%ebx
+	#define	lim	24(%ebp)
+	#define	sp	%esp
+
+#else
+
+	// push callee-saved registers for local use
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	// allocate stack memory for local use and/or xmm register save for kernel code
+	sub		$(8+8*16+16*5), %rsp		// 8 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) + 16 (common to i386)
+
+	// rdi/rsi/rdx/rcx/r8
+	// transfer calling arguments
+	mov		%rdi, %r12	
+	mov		%rsi, %r13
+	mov		%rdx, %r14
+	mov		%rcx, %r15
+	mov		%r8,  %rbx
+
+	#define	P 	%r12
+	#define	C	%r13
+	#define	T	%r14
+	#define	ctx	%r15
+	#define	lim	%ebx
+	#define	sp	%rsp
+#endif
+
+#ifdef	KERNEL
+	movaps	%xmm0, 0x50(sp)
+	movaps	%xmm1, 0x60(sp)
+	movaps	%xmm2, 0x70(sp)
+	movaps	%xmm3, 0x80(sp)
+	movaps	%xmm4, 0x90(sp)
+	movaps	%xmm7, 0xa0(sp)
+#endif
+
+	// probe __cpu_capabilities to detect aesni
+#if defined __x86_64__
+    movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
+    mov     (%rax), %eax                                    // %eax = __cpu_capabilities
+#else		// i386
+#if defined KERNEL
+    leal    __cpu_capabilities, %eax                        // %eax -> __cpu_capabilities
+    mov     (%eax), %eax                                    // %eax = __cpu_capabilities
+#else
+    movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+	test    $(kHasAES), %eax
+	je		L_crypt_group_sw								// if aesni not available, jump to sw-based implementation
+
+	// aesni-based implementation
+
+	sub		$4, lim											// pre-decrement lim by 4
+	jl		9f												// if lim < 4, skip the following code
+
+	movups	(T), %xmm7										// xmm7 is the tweak before encrypting every 4 blocks	
+#ifdef	KERNEL
+	movaps	%xmm5, 0xb0(sp)									// hw-aes-based uses extra xmm5
+#endif
+
+0:
+	// derive 4 tweaks using xts_mult_x macro, and save on aligned stack space
+	// xmm7 will be the tweak for next 4-blocks iteration
+
+	#define	tweak1	16(sp)
+	#define	tweak2	32(sp)
+	#define	tweak3	48(sp)
+	#define	tweak4	64(sp)
+
+	movaps	%xmm7, tweak1									// save 1st tweak on stack
+	xts_mult_x_on_xmm7										// compute 2nd tweak
+	movaps	%xmm7, tweak2									// save 2nd tweak on stack
+	xts_mult_x_on_xmm7										// compute 3rd tweak
+	movaps	%xmm7, tweak3									// save 3rd tweak on stack
+	xts_mult_x_on_xmm7										// compute 4th tweak
+	movaps	%xmm7, tweak4									// save 4th tweak on stack
+	xts_mult_x_on_xmm7										// compute 1st tweak for next iteration
+
+	// read 4 Ps
+	movups	(P), %xmm0
+	movups	16(P), %xmm1
+	movups	32(P), %xmm2
+	movups	48(P), %xmm3
+
+	// 4 C = P ^ T
+	pxor	tweak1, %xmm0
+	pxor	tweak2, %xmm1
+	pxor	tweak3, %xmm2
+	pxor	tweak4, %xmm3
+
+	// 4 interleaved aes_encrypt
+
+#if defined	__i386__
+	mov		8(sp), %ecx	// ctx
+	#undef	ctx
+	#define	ctx	%ecx
+#endif
+
+	mov		240(ctx), %eax					// aes length 
+
+	cmp		$160, %eax						// AES-128 ?
+	je		160f
+	cmp		$192, %eax						// AES-192 ?
+	je		192f
+	cmp		$224, %eax						// AES-256 ?
+	je		224f
+	mov		$-1, %eax						// error : non-supported aes length
+#ifdef	KERNEL
+	movaps	0xb0(sp), %xmm5					// hw-aes-based uses extra xmm5
+#endif
+	jmp		L_error_crypt
+
+	// definitions, macros, and constructs for 4 blocks hw-aes-encrypt
+
+	// the following key definitions will also be used in tweak_uncrypt_group 
+	#define	key0			0(ctx)
+	#define	key1			16(ctx)
+	#define	key2			32(ctx)
+	#define	key3			48(ctx)
+	#define	key4			64(ctx)
+	#define	key5			80(ctx)
+	#define	key6			96(ctx)
+	#define	key7			112(ctx)
+	#define	key8			128(ctx)
+	#define	key9			144(ctx)
+	#define	keyA			160(ctx)
+	#define	keyB			176(ctx)
+	#define	keyC			192(ctx)
+	#define	keyD			208(ctx)
+	#define	keyE			224(ctx)
+
+	#define	aes		aesenc
+	#define	aeslast	aesenclast
+
+	// all aes encrypt operations start with the following sequence
+	.macro	aes_common_part
+	movups	key0, %xmm4
+	movups	key1, %xmm5
+	pxor	%xmm4, %xmm0
+	pxor	%xmm4, %xmm1
+	pxor	%xmm4, %xmm2
+	pxor	%xmm4, %xmm3
+	movups	key2, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key3, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key4, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key5, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key6, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key7, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key8, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key9, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	keyA, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	.endm
+
+	// all aes encypt operations end with the following 4 instructions	
+	.macro	aes_last
+	aeslast	%xmm4, %xmm0
+	aeslast	%xmm4, %xmm1
+	aeslast	%xmm4, %xmm2
+	aeslast	%xmm4, %xmm3
+	.endm
+
+	.macro	aes_128
+	aes_common_part			// encrypt common part
+	aes_last				// encrypt ending part
+	.endm
+	
+	.macro	aes_192
+	aes_common_part			// encrypt common part
+
+	// 10 extra instructions in between common and ending
+	movups	keyB, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	keyC, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+
+	aes_last				// encrypt ending part
+	.endm
+
+	.macro	aes_256
+	aes_common_part			// encrypt common part
+
+	// 20 extra instructions in between common and ending
+	movups	keyB, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	keyC, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	keyD, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	keyE, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+
+	aes_last				// encrypt ending part
+	.endm
+
+160:	// AES-128 encrypt
+	aes_128
+	jmp		8f
+
+192:	// AES-192 encrypt
+	aes_192
+	jmp		8f
+
+224:	// AES-256 encrypt	
+	aes_256
+
+8:	
+
+	// 4 C = C ^ T
+	pxor	tweak1, %xmm0
+	pxor	tweak2, %xmm1
+	pxor	tweak3, %xmm2
+	pxor	tweak4, %xmm3
+
+	// write 4 Cs
+	movups	%xmm0, (C)
+	movups	%xmm1, 16(C)
+	movups	%xmm2, 32(C)
+	movups	%xmm3, 48(C)
+
+	add	$64, P
+	add	$64, C
+
+	sub		$4, lim
+	jge		0b
+
+#ifdef	KERNEL
+	movaps	0xb0(sp), %xmm5				// hw-aes-based uses extra xmm5
+#endif
+	movups	%xmm7, (T)
+
+9:
+	xor		%eax, %eax					// to return CRYPT_OK
+	add		$4, lim						// post-increment lim by 4
+	je		9f							// if lim==0, branch to prepare to return	
+
+L_crypt_group_sw:
+
+	movups	(T), %xmm7					// T, xmm7 will be used as T (128-bit) throughtout the loop
+
+	sub		$1, lim						// pre-decrement lim by 1	
+	jl		1f							// if lim < 1, branch to prepare to return
+0:
+	movups	(P), %xmm0					// P
+
+	// prepare for calling aes_encrypt
+#if defined	__i386__
+	mov		C, (%esp)					// C
+	mov		C, 4(%esp)					// C
+										// ctx was prepared previously in preamble
+#else
+	mov		C, %rdi						// C
+	mov		C, %rsi						// C
+	mov		ctx, %rdx					// ctx
+#endif
+
+	pxor	%xmm7, %xmm0				// C = P ^ T	
+	movups	%xmm0, (C)					// save C into memory
+
+	call	_aes_encrypt_xmm_no_save	// err = aes_encrypt(C,C,ctx);
+
+	cmp		$CRYPT_OK, %eax				// err == CRYPT_OK ? 
+	jne		9f							// if err != CRYPT_OK, branch to exit with error
+
+	movups	(C), %xmm0					// load xmm0 with C
+	pxor	%xmm7, %xmm0				// C ^= T
+	movups	%xmm0, (C)					// save output C
+
+	xts_mult_x_on_xmm7
+
+	add		$16, C						// next C
+	add		$16, P						// next P
+	sub		$1, lim						// lim--
+	jge		0b							// if (lim>0) repeat the scalar loop
+
+1:	movups	%xmm7, (T)					// save final tweak 
+L_error_crypt:
+9:
+	// if kernel, restore used xmm registers
+#ifdef	KERNEL
+	movaps	0x50(sp), %xmm0
+	movaps	0x60(sp), %xmm1
+	movaps	0x70(sp), %xmm2
+	movaps	0x80(sp), %xmm3
+	movaps	0x90(sp), %xmm4
+	movaps	0xa0(sp), %xmm7
+#endif
+
+#if defined	__i386__
+	add		$(12+16*8+16*4), %esp
+	pop		%esi
+	pop		%edi
+	pop		%ebx
+#else
+	add		$(8+16*8+16*5), %rsp
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+#endif
+	leave
+	ret
+
+	#undef	P
+	#undef	C
+	#undef	T
+	#undef	ctx
+	#undef	sp
+
+/* 
+	The following is x86_64/i386 assembly implementation of 
+
+	int tweak_uncrypt(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx);
+
+	Its C code implementation is given in xtsClearC.c
+
+	all pointers C/P/T points to a block of 16 bytes. In the following description, C/P/T represent 128-bit data.
+
+	The operation of tweak_crypt
+
+	1. P = C ^ T
+	2. err = aes_decryp(P, P, ctx); if (err != CRYPT_OK) return err;
+	3. P = P ^ T
+	4. xts_mult_x(T)
+	5. return CRYPT_OK;
+
+	The following is the assembly implementation flow
+
+	1. save used xmm registers (xmm1/xmm7) if kernel code 
+	2. load xmm1 = C, xmm7 = T
+	3. xmm1 = P = C ^ T
+	4. write xmm1 to P
+	5. call aes_decryp(P,P,ctx); note that it will use aesni if available, also xmm will return intact
+	6. load xmm1 = P
+	7. xmm1 = P = P^T = xmm1 ^ xmm7
+	8. write xmm1 to P
+	9. update T (in xmm7) via xts_mult_x macro
+	a. restore xmm registers (xmm1/xmm7) if kernel code
+	b. return CRYPT_OK (in eax) 
+
+	Note: used xmm registers : xmm1/xmm2/xmm7, xmm2 in xts_mult_x macro
+
+*/
+
+	.text
+	.align  4,0x90
+	.globl	_tweak_uncrypt
+_tweak_uncrypt:
+#if defined	__i386__
+
+	// push into stack for local use
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx
+	push	%edi
+	push	%esi
+
+	// alllocate stack memory for local use
+	sub		$12+16*4, %esp				// 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
+
+	// load with called arguments
+	mov		8(%ebp), %eax				// C, we need this only briefly, so eax is fine
+	mov		12(%ebp), %edi				// P
+	mov		16(%ebp), %ebx				// T
+	mov		20(%ebp), %esi				// ctx
+
+	#define	C	%eax
+	#define	P	%edi
+	#define	T	%ebx
+	#define	ctx	%esi
+	#define	sp	%esp
+
+#else
+	// x86_64 calling argument order : rdi/rsi/rdx/rcx/r8
+
+	// push into stack for local use
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	// alllocate stack memory for local use, if kernel code, need to save/restore xmm registers
+#ifdef KERNEL
+	sub		$4*16, %rsp					// only need 3*16, add 16 extra so to make save/restore xmm common to i386
+#endif
+
+	// load with called arguments, release rdi/rsi/rdx/rcx/r8, as need to call aes_decrypt
+	mov		%rsi, %r13
+	mov		%rdx, %r14
+	mov		%rcx, %r15
+
+	#define	C 	%rdi
+	#define	P	%r13
+	#define	T	%r14
+	#define	ctx	%r15
+	#define	sp	%rsp
+
+#endif
+
+	// if kernel, save used xmm registers
+#ifdef	KERNEL
+	movaps	%xmm1, 16(sp)
+	movaps	%xmm2, 32(sp)
+	movaps	%xmm7, 48(sp)
+#endif
+
+	movups	(C), %xmm1					// C
+	movups	(T), %xmm7					// T
+
+	// setup caliing arguments for aes_decrypt
+#if defined	__i386__
+	mov		P, (%esp)					// P
+	mov		P, 4(%esp)					// P
+	mov		ctx, 8(%esp)				// ctx
+#else
+	mov		P, %rdi						// P
+	mov		P, %rsi						// P
+	mov		ctx, %rdx					// ctx
+#endif
+
+	pxor	%xmm7, %xmm1				// P = C ^ T	
+	movups	%xmm1, (P)					// save P into memory
+
+	call	_aes_decrypt				// err = aes_decrypt(P,P,ctx);
+
+	cmp		$CRYPT_OK, %eax				// check err == CRYPT_OK
+	jne		9f							// if err != CRYPT_OK, exit
+
+	movups	(P), %xmm1					// load xmm1 = P
+	pxor	%xmm7, %xmm1				// P ^= T
+	movups	%xmm1, (P)					// write P with xmm1, xmm1 is freed now, will be changed in the following macro
+
+	xts_mult_x_on_xmm7					// update T (on xmm7)
+
+	movups	%xmm7, (T)					// write xmm7 to T
+9:
+
+	// restore used xmm registers if this is for kernel
+#ifdef	KERNEL
+	movaps	16(sp), %xmm1
+	movaps	32(sp), %xmm2
+	movaps	48(sp), %xmm7
+#endif
+
+	// free stack memory and restore callee registers
+#if defined	__i386__
+	add		$12+16*4, %esp				// 12 (alignment) + 3*16 (xmm save/restore) + 16 (aes_crypt calling arguments)
+	pop		%esi
+	pop		%edi
+	pop		%ebx
+#else
+#ifdef	KERNEL
+	add		$4*16, %rsp					// only need 3*16, add 16 extra so make save/restore xmm common to i386
+#endif
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+#endif
+
+	// return, eax/rax already has the return val
+	leave
+	ret
+
+	#undef	P
+	#undef	C
+	#undef	T
+	#undef	ctx
+	#undef	sp
+
+/* 
+	The following is x86_64/i386 assembly implementation of 
+
+	int tweak_uncrypt_group(const uint8_t *C, uint8_t *P, uint8_t *T, aesedp_decrypt_ctx *ctx, uint32_t lim);
+
+	TODO : Its C code implementation is YET to be provided in xtsClearC.c (for the benefit of porting to other ISAs)
+	This function is grouped version of the above function tweak_uncrypt(), so xmm registers save/restore only need
+	to happen once for all grouped blocks.
+
+	The implementation here probes __cpu_capabilities to detect whether aesni (or hw-aes instruction) is available.
+	If aesni is available, the code branch to optimized code that uses aesni.
+
+	The optimized aesni code operates as follows:
+
+	while (more than 4 consecutive blocks available) {
+
+		do xts_mult_x macro 4 times and write the 4 tweaks on stack (16-byte aligned)
+	
+		perform 4 P = C ^ T;	// T is on 16-byte aligned stack
+
+		perform 4 aes_decrypt (all aes_decrypt instruction interleaved to achieve better throughtput)
+
+		perform 4 P = P ^ T		// T is on 16-byte aligned stack
+
+	}
+
+	The code then falls through to the scalar code, that sequentially performs what tweak_crypt does
+
+	1. P = C ^ T
+	2. err = aes_decryp(P, P, ctx); if (err != CRYPT_OK) return err;
+	3. P = P ^ T
+	4. xts_mult_x(T)
+
+	Note: used xmm registers : 
+			xmm0-xmm5, xmm7 if aesni is available
+			xmm0-xmm4, xmm7 if aesni is not available.
+
+*/
+
+    .text
+	.align  4,0x90
+	.globl	_tweak_uncrypt_group
+_tweak_uncrypt_group:
+
+#if defined	__i386__
+
+	// push callee-saved registers for local use
+	push	%ebp
+	mov		%esp, %ebp
+	push	%ebx
+	push	%edi
+	push	%esi
+
+	// allocate stack memory for local use and/or xmm register save for kernel code
+	sub		$(12+8*16+16*4), %esp		// 12 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) aesni
+										// 12 (alignment) + 8*16 (xmm) + 4*16 (only 12 used for aes_decrypt) no aesni 
+	// transfer calling arguments
+	mov		20(%ebp), %eax				// ctx
+	mov		12(%ebp), %edi				// P
+	mov		16(%ebp), %ebx				// T
+	mov		8(%ebp), %esi				// C
+	mov		%eax, 8(%esp)				// ctx as the 3rd parameter to aes_decrypt
+
+	#define	C	%esi
+	#define	P	%edi
+	#define	T	%ebx
+	#define	lim	24(%ebp)
+	#define	sp	%esp
+
+#else
+
+	// push callee-saved registers for local use
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	// allocate stack memory for local use and/or xmm register save for kernel code
+	sub		$(8+8*16+16*5), %rsp		// 8 (alignment) + 8*16 (xmm) + 4*16 (pre-computed tweaks) + 16 (common to i386)
+
+	// rdi/rsi/rdx/rcx/r8
+	// transfer calling arguments
+	mov		%rdi, %r12	
+	mov		%rsi, %r13
+	mov		%rdx, %r14
+	mov		%rcx, %r15
+	mov		%r8,  %rbx
+
+	#define	C 	%r12
+	#define	P	%r13
+	#define	T	%r14
+	#define	ctx	%r15
+	#define	lim	%ebx
+	#define	sp	%rsp
+#endif
+
+#ifdef	KERNEL
+	movaps	%xmm0, 0x50(sp)
+	movaps	%xmm1, 0x60(sp)
+	movaps	%xmm2, 0x70(sp)
+	movaps	%xmm3, 0x80(sp)
+	movaps	%xmm4, 0x90(sp)
+	movaps	%xmm7, 0xa0(sp)
+#endif
+
+	// probe __cpu_capabilities to detect aesni
+#if defined __x86_64__
+    movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
+    mov     (%rax), %eax                                    // %eax = __cpu_capabilities
+#else		// i386
+#if defined KERNEL
+    leal    __cpu_capabilities, %eax                        // %eax -> __cpu_capabilities
+    mov     (%eax), %eax                                    // %eax = __cpu_capabilities
+#else
+    movl    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+#endif
+	test    $(kHasAES), %eax
+	je		L_uncrypt_group_sw								// if aesni not available, jump to sw-based implementation
+
+	// aesni-based implementation
+
+	sub		$4, lim											// pre-decrement lim by 4
+	jl		9f												// if lim < 4, skip the following code
+
+	movups	(T), %xmm7										// xmm7 is the tweak before decrypting every 4 blocks	
+#ifdef	KERNEL
+	movaps	%xmm5, 0xb0(sp)									// hw-aes-based uses extra xmm5
+#endif
+
+0:
+	// derive 4 tweaks using xts_mult_x macro, and save on aligned stack space
+	// xmm7 will be the tweak for next 4-blocks iteration
+
+	#define	tweak1	16(sp)
+	#define	tweak2	32(sp)
+	#define	tweak3	48(sp)
+	#define	tweak4	64(sp)
+
+	movaps	%xmm7, tweak1									// save 1st tweak on stack
+	xts_mult_x_on_xmm7										// compute 2nd tweak
+	movaps	%xmm7, tweak2									// save 2nd tweak on stack
+	xts_mult_x_on_xmm7										// compute 3rd tweak
+	movaps	%xmm7, tweak3									// save 3rd tweak on stack
+	xts_mult_x_on_xmm7										// compute 4th tweak
+	movaps	%xmm7, tweak4									// save 4th tweak on stack
+	xts_mult_x_on_xmm7										// compute 1st tweak for next iteration
+
+	// read 4 Cs
+	movups	(C), %xmm0
+	movups	16(C), %xmm1
+	movups	32(C), %xmm2
+	movups	48(C), %xmm3
+
+	// 4 P = C ^ T
+	pxor	tweak1, %xmm0
+	pxor	tweak2, %xmm1
+	pxor	tweak3, %xmm2
+	pxor	tweak4, %xmm3
+
+	// 4 interleaved aes_decrypt
+
+#if defined	__i386__
+	mov		8(sp), %ecx	// ctx
+	#undef	ctx
+	#define	ctx	%ecx
+#endif
+
+	mov		240(ctx), %eax					// aes length 
+
+	cmp		$160, %eax						// AES-128 ?
+	je		160f
+	cmp		$192, %eax						// AES-192 ?
+	je		192f
+	cmp		$224, %eax						// AES-256 ?
+	je		224f
+	mov		$-1, %eax						// error : non-supported aes length
+#ifdef	KERNEL
+	movaps	0xb0(sp), %xmm5					// hw-aes-based uses extra xmm5
+#endif
+	jmp		L_error_uncrypt
+
+	// definitions, macros to construc hw-aes-decrypt
+	// will reuse previously defined key0 = (ctx), key1 = 16(ctx), ....
+	#undef	aes
+	#undef	aeslast
+	#define aes	aesdec
+	#define	aeslast aesdeclast
+
+	.macro	aes_decrypt_common
+	movups	key8, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key7, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key6, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key5, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key4, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key3, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key2, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key1, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	key0, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	aeslast	%xmm4, %xmm0
+	aeslast	%xmm4, %xmm1
+	aeslast	%xmm4, %xmm2
+	aeslast	%xmm4, %xmm3
+	.endm
+
+	.macro	aes_dec_128
+	movups	keyA, %xmm4
+	movups	key9, %xmm5
+	pxor	%xmm4, %xmm0
+	pxor	%xmm4, %xmm1
+	pxor	%xmm4, %xmm2
+	pxor	%xmm4, %xmm3
+	aes_decrypt_common
+	.endm
+
+	.macro	aes_dec_192
+	movups	keyC, %xmm4
+	movups	keyB, %xmm5
+	pxor	%xmm4, %xmm0
+	pxor	%xmm4, %xmm1
+	pxor	%xmm4, %xmm2
+	pxor	%xmm4, %xmm3
+	movups	keyA, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key9, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	aes_decrypt_common
+	.endm
+
+	.macro	aes_dec_256
+	movups	keyE, %xmm4
+	movups	keyD, %xmm5
+	pxor	%xmm4, %xmm0
+	pxor	%xmm4, %xmm1
+	pxor	%xmm4, %xmm2
+	pxor	%xmm4, %xmm3
+	movups	keyC, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	keyB, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	movups	keyA, %xmm4
+	aes		%xmm5, %xmm0
+	aes		%xmm5, %xmm1
+	aes		%xmm5, %xmm2
+	aes		%xmm5, %xmm3
+	movups	key9, %xmm5
+	aes		%xmm4, %xmm0
+	aes		%xmm4, %xmm1
+	aes		%xmm4, %xmm2
+	aes		%xmm4, %xmm3
+	aes_decrypt_common
+	.endm
+
+160:	// AES-128 decrypt
+	aes_dec_128
+	jmp		8f
+
+192:	// AES-192 decrypt
+	aes_dec_192
+	jmp		8f
+
+224:	// AES-256 decrypt	
+	aes_dec_256
+
+8:	
+
+	// 4 P = P ^ T
+	pxor	tweak1, %xmm0
+	pxor	tweak2, %xmm1
+	pxor	tweak3, %xmm2
+	pxor	tweak4, %xmm3
+
+	// write 4 Ps
+	movups	%xmm0, (P)
+	movups	%xmm1, 16(P)
+	movups	%xmm2, 32(P)
+	movups	%xmm3, 48(P)
+
+	add	$64, C
+	add	$64, P
+
+	sub		$4, lim
+	jge		0b
+
+#ifdef	KERNEL
+	movaps	0xb0(sp), %xmm5				// hw-aes-based uses extra xmm5
+#endif
+	movups	%xmm7, (T)
+
+9:
+	xor		%eax, %eax					// to return CRYPT_OK
+	add		$4, lim						// post-increment lim by 4
+	je		9f							// if lim==0, branch to prepare to return	
+
+L_uncrypt_group_sw:
+
+	movups	(T), %xmm7					// T, xmm7 will be used as T (128-bit) throughtout the loop
+
+	sub		$1, lim						// pre-decrement lim by 1	
+	jl		1f							// if lim < 1, branch to prepare to return
+0:
+	movups	(C), %xmm0					// C
+
+	// prepare for calling aes_decrypt
+#if defined	__i386__
+	mov		P, (%esp)					// P
+	mov		P, 4(%esp)					// P
+										// ctx was prepared previously in preamble
+#else
+	mov		P, %rdi						// P
+	mov		P, %rsi						// P
+	mov		ctx, %rdx					// ctx
+#endif
+
+	pxor	%xmm7, %xmm0				// P = C ^ T	
+	movups	%xmm0, (P)					// save P into memory
+
+	call	_aes_decrypt_xmm_no_save	// err = aes_decrypt(P,P,ctx);
+
+	cmp		$CRYPT_OK, %eax				// err == CRYPT_OK ? 
+	jne		9f							// if err != CRYPT_OK, branch to exit with error
+
+	movups	(P), %xmm0					// load xmm0 with P
+	pxor	%xmm7, %xmm0				// P ^= T
+	movups	%xmm0, (P)					// save output P
+
+	xts_mult_x_on_xmm7
+
+	add		$16, C						// next C
+	add		$16, P						// next P
+	sub		$1, lim						// lim--
+	jge		0b							// if (lim>0) repeat the scalar loop
+
+1:	movups	%xmm7, (T)					// save final tweak 
+L_error_uncrypt:
+9:
+	// if kernel, restore used xmm registers
+#ifdef	KERNEL
+	movaps	0x50(sp), %xmm0
+	movaps	0x60(sp), %xmm1
+	movaps	0x70(sp), %xmm2
+	movaps	0x80(sp), %xmm3
+	movaps	0x90(sp), %xmm4
+	movaps	0xa0(sp), %xmm7
+#endif
+
+#if defined	__i386__
+	add		$(12+16*8+16*4), %esp
+	pop		%esi
+	pop		%edi
+	pop		%ebx
+#else
+	add		$(8+16*8+16*5), %rsp
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+#endif
+	leave
+	ret
diff --git a/bsd/crypto/aes/i386/edefs.h b/bsd/crypto/aes/i386/edefs.h
deleted file mode 100644
index d25bef89c..000000000
--- a/bsd/crypto/aes/i386/edefs.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 31/01/2006
-*/
-
-#ifndef EDEFS_H
-#define EDEFS_H
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define IS_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-#define IS_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-
-#if defined(__GNUC__) || defined(__GNU_LIBRARY__)
-#  if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
-#    include <sys/endian.h>
-#  elif defined( BSD ) && ( BSD >= 199103 ) || defined( __DJGPP__ ) || defined( __CYGWIN32__ ) 
-#      include <machine/endian.h>
-#  elif defined(__APPLE__)
-#    if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN )
-#      define BIG_ENDIAN
-#    elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN )
-#      define LITTLE_ENDIAN
-#    endif
-#  elif !defined( __MINGW32__ )
-#    include <endian.h>
-#    if !defined(__BEOS__)
-#      include <byteswap.h>
-#    endif
-#  endif
-#endif
-
-#if !defined(PLATFORM_BYTE_ORDER)
-#  if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)
-#    if    defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif !defined(LITTLE_ENDIAN) &&  defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#    endif
-#  elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)
-#    if    defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif !defined(_LITTLE_ENDIAN) &&  defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#   endif
-#  elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)
-#    if    defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif !defined(__LITTLE_ENDIAN__) &&  defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#    endif
-#  endif
-#endif
-
-/*  if the platform is still unknown, try to find its byte order    */
-/*  from commonly used machine defines                              */
-
-#if !defined(PLATFORM_BYTE_ORDER)
-
-#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
-      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
-      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
-      defined( vax )       || defined( vms )     || defined( VMS )        || \
-      defined( __VMS )     || defined( _M_X64 )
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-
-#elif defined( AMIGA )    || defined( applec )  || defined( __AS400__ )  || \
-      defined( _CRAY )    || defined( __hppa )  || defined( __hp9000 )   || \
-      defined( ibm370 )   || defined( mc68000 ) || defined( m68k )       || \
-      defined( __MRC__ )  || defined( __MVS__ ) || defined( __MWERKS__ ) || \
-      defined( sparc )    || defined( __sparc)  || defined( SYMANTEC_C ) || \
-      defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ )  || \
-	  defined( __VOS__ )
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#else
-#  error Please edit edefs.h (lines 117 or 119) to set the platform byte order
-#endif
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-#endif
diff --git a/bsd/crypto/aes/ppc/Makefile b/bsd/crypto/aes/ppc/Makefile
deleted file mode 100644
index 99755ad2e..000000000
--- a/bsd/crypto/aes/ppc/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_PPC = \
-
-INSTINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_PPC = \
-
-EXPINC_SUBDIRS_I386 = \
-
-PRIVATE_DATAFILES = \
-	aestab.h aesopt.h
-
-INSTALL_MI_DIR = crypto
-
-EXPORT_MI_DIR = ${INSTALL_MI_DIR}
-
-INSTALL_KF_MI_LIST =
-
-INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES}
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/crypto/aes/ppc/aescrypt.c b/bsd/crypto/aes/ppc/aescrypt.c
deleted file mode 100644
index 31d4c81af..000000000
--- a/bsd/crypto/aes/ppc/aescrypt.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- This file contains the code for implementing encryption and decryption
- for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
- can optionally be replaced by code written in assembler using NASM. For
- further details see the file aesopt.h
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c])
-#define xo(y,x,c) (s(y,c) ^= s(x, c))
-#define si(y,x,c)   (s(y,c) = word_in(x, c))
-#define so(y,x,c)   word_out(y, c, s(x,c))
-
-#if defined(ARRAYS)
-#define locals(y,x)     x[4],y[4]
-#else
-#define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
-#endif
-
-#define dtables(tab)     const aes_32t *tab##0, *tab##1, *tab##2, *tab##3
-#define itables(tab)     tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3]
-
-#define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
-                        s(y,2) = s(x,2); s(y,3) = s(x,3);
-
-#define key_in(y,x,k)   ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3)
-#define cbc(y,x)        xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3)
-#define state_in(y,x)   si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
-#define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
-#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
-
-#if defined(ENCRYPTION) && !defined(AES_ASM)
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined(_MSC_VER)
-#pragma optimize( "s", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define fwd_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
-
-#if defined(FT4_SET)
-#undef  dec_fmvars
-#  if defined(ENC_ROUND_CACHE_TABLES)
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c))
-#  else
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c))
-#  endif
-#elif defined(FT1_SET)
-#undef  dec_fmvars
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c))
-#else
-#define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c)))
-#endif
-
-#if defined(FL4_SET)
-#  if defined(LAST_ENC_ROUND_CACHE_TABLES)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c))
-#  else
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c))
-#  endif
-#elif defined(FL1_SET)
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c))
-#else
-#define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c))
-#endif
-
-aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
-					 unsigned char *out, const aes_encrypt_ctx cx[1])
-{   aes_32t         locals(b0, b1);
-    const aes_32t   *kp;
-    const aes_32t   *kptr = cx->ks;
-#if defined(ENC_ROUND_CACHE_TABLES)
-	dtables(t_fn);
-#endif
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-	dtables(t_fl);
-#endif
-
-#if defined( dec_fmvars )
-    dec_fmvars; /* declare variables for fwd_mcol() if needed */
-#endif
-
-#if defined( AES_ERR_CHK )
-    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
-        return aes_error;
-#endif
-
-	// Load IV into b0.
-	state_in(b0, in_iv);
-
-	for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)
-	{
-		kp = kptr;
-#if 0
-		// Read the plaintext into b1
-		state_in(b1, in);
-		// Do the CBC with b0 which is either the iv or the ciphertext of the previous block.
-		cbc(b1, b0);
-
-		// Xor b1 with the key schedule to get things started.
-		key_in(b0, b1, kp);
-#else
-		// Since xor is associative we mess with the ordering here to get the loads started early
-		key_in(b1, b0, kp);  // Xor b0(IV) with the key schedule and assign to b1
-		state_in(b0, in);    // Load block into b0
-		cbc(b0, b1);         // Xor b0 with b1 and store in b0
-#endif
-
-#if defined(ENC_ROUND_CACHE_TABLES)
-		itables(t_fn);
-#endif
-
-#if (ENC_UNROLL == FULL)
-
-		switch(cx->rn)
-		{
-		case 14:
-			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-			kp += 2 * N_COLS;
-		case 12:
-			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-			kp += 2 * N_COLS;
-		case 10:
-		default:
-			round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
-			round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
-			round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
-			round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
-			round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
-			round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-			itables(t_fl);
-#endif
-			round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
-		}
-
-#else
-
-		{   aes_32t    rnd;
-#if (ENC_UNROLL == PARTIAL)
-			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
-			{
-				kp += N_COLS;
-				round(fwd_rnd, b1, b0, kp);
-				kp += N_COLS;
-				round(fwd_rnd, b0, b1, kp);
-			}
-			kp += N_COLS;
-			round(fwd_rnd,  b1, b0, kp);
-#else
-			for(rnd = 0; rnd < cx->rn - 1; ++rnd)
-			{
-				kp += N_COLS;
-				round(fwd_rnd, b1, b0, kp);
-				l_copy(b0, b1);
-			}
-#endif
-#if defined(LAST_ENC_ROUND_CACHE_TABLES)
-			itables(t_fl);
-#endif
-			kp += N_COLS;
-			round(fwd_lrnd, b0, b1, kp);
-		}
-#endif
-	
-		state_out(out, b0);
-	}
-
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(DECRYPTION) && !defined(AES_ASM)
-
-/* Visual C++ .Net v7.1 provides the fastest encryption code when using
-   Pentium optimiation with small code but this is poor for decryption
-   so we need to control this with the following VC++ pragmas
-*/
-
-#if defined(_MSC_VER)
-#pragma optimize( "t", on )
-#endif
-
-/* Given the column (c) of the output state variable, the following
-   macros give the input state variables which are needed in its
-   computation for each row (r) of the state. All the alternative
-   macros give the same end values but expand into different ways
-   of calculating these values.  In particular the complex macro
-   used for dynamically variable block sizes is designed to expand
-   to a compile time constant whenever possible but will expand to
-   conditional clauses on some branches (I am grateful to Frank
-   Yellin for this construction)
-*/
-
-#define inv_var(x,r,c)\
- ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
- : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
- : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
- :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
-
-#if defined(IT4_SET)
-#undef  dec_imvars
-#  if defined(DEC_ROUND_CACHE_TABLES)
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c))
-#  else
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c))
-#  endif
-#elif defined(IT1_SET)
-#undef  dec_imvars
-#define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c))
-#else
-#define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)))
-#endif
-
-#if defined(IL4_SET)
-#  if defined(LAST_DEC_ROUND_CACHE_TABLES)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c))
-#  else
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c))
-#  endif
-#elif defined(IL1_SET)
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c))
-#else
-#define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))
-#endif
-
-aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,
-					 unsigned char *out, const aes_decrypt_ctx cx[1])
-{   aes_32t        locals(b0, b1);
-    const aes_32t *kptr = cx->ks + cx->rn * N_COLS;
-	const aes_32t *kp;
-#if defined(DEC_ROUND_CACHE_TABLES)
-	dtables(t_in);
-#endif
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-	dtables(t_il);
-#endif
-
-#if defined( dec_imvars )
-    dec_imvars; /* declare variables for inv_mcol() if needed */
-#endif
-	
-#if defined( AES_ERR_CHK )
-    if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 )
-        return aes_error;
-#endif
-
-#if defined(DEC_ROUND_CACHE_TABLES)
-	itables(t_in);
-#endif	
-	
-	in += AES_BLOCK_SIZE * (num_blk - 1);
-	out += AES_BLOCK_SIZE * (num_blk - 1);
-	// Load the last block's ciphertext into b1
-	state_in(b1, in);
-
-	for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)
-	{
-		kp = kptr;
-		// Do the xor part of state_in, where b1 is the previous block's ciphertext.
-		key_in(b0, b1, kp);
-
-#if (DEC_UNROLL == FULL)
-	
-		switch(cx->rn)
-		{
-		case 14:
-			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-			kp -= 2 * N_COLS;
-		case 12:
-			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-			kp -= 2 * N_COLS;
-		case 10:
-		default:
-			round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
-			round(inv_rnd,  b1, b0, kp -  3 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  4 * N_COLS);
-			round(inv_rnd,  b1, b0, kp -  5 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  6 * N_COLS);
-			round(inv_rnd,  b1, b0, kp -  7 * N_COLS);
-			round(inv_rnd,  b0, b1, kp -  8 * N_COLS);
-			round(inv_rnd,  b1, b0, kp -  9 * N_COLS);
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-			itables(t_il);
-#endif	
-			round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
-		}
-
-#else
-	
-		{   aes_32t    rnd;
-#if (DEC_UNROLL == PARTIAL)
-			for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd)
-			{
-				kp -= N_COLS;
-				round(inv_rnd, b1, b0, kp);
-				kp -= N_COLS;
-				round(inv_rnd, b0, b1, kp);
-			}
-			kp -= N_COLS;
-			round(inv_rnd, b1, b0, kp);
-#else
-			for(rnd = 0; rnd < cx->rn - 1; ++rnd)
-			{
-				kp -= N_COLS;
-				round(inv_rnd, b1, b0, kp);
-				l_copy(b0, b1);
-			}
-#endif
-#if defined(LAST_DEC_ROUND_CACHE_TABLES)
-			itables(t_il);
-#endif	
-			kp -= N_COLS;
-			round(inv_lrnd, b0, b1, kp);
-		}
-#endif
-
-		if (num_blk == 1)
-		{
-			// We are doing the first block so we need the IV rather than the previous
-			// block for CBC (there is no previous block)
-			state_in(b1, in_iv);
-		}
-		else
-		{
-			in -= AES_BLOCK_SIZE;
-			state_in(b1, in);
-		}
-
-		// Do the CBC with b1 which is either the IV or the ciphertext of the previous block.
-		cbc(b0, b1);
-
-		state_out(out, b0);
-	}
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
diff --git a/bsd/crypto/aes/ppc/aeskey.c b/bsd/crypto/aes/ppc/aeskey.c
deleted file mode 100644
index 5e0a6453c..000000000
--- a/bsd/crypto/aes/ppc/aeskey.c
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue Date: 26/08/2003
-
- This file contains the code for implementing the key schedule for AES
- (Rijndael) for block and key sizes of 16, 24, and 32 bytes. See aesopt.h
- for further details including optimisation.
-*/
-
-#include "aesopt.h"
-#include "aestab.h"
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-/* Initialise the key schedule from the user supplied key. The key
-   length can be specified in bytes, with legal values of 16, 24
-   and 32, or in bits, with legal values of 128, 192 and 256. These
-   values correspond with Nk values of 4, 6 and 8 respectively.
-
-   The following macros implement a single cycle in the key
-   schedule generation process. The number of cycles needed
-   for each cx->n_col and nk value is:
-
-    nk =             4  5  6  7  8
-    ------------------------------
-    cx->n_col = 4   10  9  8  7  7
-    cx->n_col = 5   14 11 10  9  9
-    cx->n_col = 6   19 15 12 11 11
-    cx->n_col = 7   21 19 16 13 14
-    cx->n_col = 8   29 23 19 17 14
-*/
-
-#define ke4(k,i) \
-{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
-    k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-#define kel4(k,i) \
-{   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
-    k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
-}
-
-#define ke6(k,i) \
-{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
-    k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-    k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \
-}
-#define kel6(k,i) \
-{   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
-    k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
-}
-
-#define ke8(k,i) \
-{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
-    k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
-    k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \
-    k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \
-}
-#define kel8(k,i) \
-{   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
-    k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
-}
-
-#if defined(ENCRYPTION_KEY_SCHEDULE)
-
-#if defined(AES_128) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[4];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < ((11 * N_COLS - 5) / 4); ++i)
-            ke4(cx->ks, i);
-    }
-#else
-    ke4(cx->ks, 0);  ke4(cx->ks, 1);
-    ke4(cx->ks, 2);  ke4(cx->ks, 3);
-    ke4(cx->ks, 4);  ke4(cx->ks, 5);
-    ke4(cx->ks, 6);  ke4(cx->ks, 7);
-    ke4(cx->ks, 8);
-#endif
-    kel4(cx->ks, 9);
-    cx->rn = 10;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[6];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (13 * N_COLS - 7) / 6; ++i)
-            ke6(cx->ks, i);
-    }
-#else
-    ke6(cx->ks, 0);  ke6(cx->ks, 1);
-    ke6(cx->ks, 2);  ke6(cx->ks, 3);
-    ke6(cx->ks, 4);  ke6(cx->ks, 5);
-    ke6(cx->ks, 6);
-#endif
-    kel6(cx->ks, 7);
-    cx->rn = 12;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-
-aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1])
-{   aes_32t    ss[8];
-
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    cx->ks[6] = ss[6] = word_in(key, 6);
-    cx->ks[7] = ss[7] = word_in(key, 7);
-
-#if ENC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (15 * N_COLS - 9) / 8; ++i)
-            ke8(cx->ks,  i);
-    }
-#else
-    ke8(cx->ks, 0); ke8(cx->ks, 1);
-    ke8(cx->ks, 2); ke8(cx->ks, 3);
-    ke8(cx->ks, 4); ke8(cx->ks, 5);
-#endif
-    kel8(cx->ks, 6);
-    cx->rn = 14;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_VAR)
-
-aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1])
-{
-    switch(key_len)
-    {
-#if defined( AES_ERR_CHK )
-    case 16: case 128: return aes_encrypt_key128(key, cx);
-    case 24: case 192: return aes_encrypt_key192(key, cx);
-    case 32: case 256: return aes_encrypt_key256(key, cx);
-    default: return aes_error;
-#else
-    case 16: case 128: aes_encrypt_key128(key, cx); return;
-    case 24: case 192: aes_encrypt_key192(key, cx); return;
-    case 32: case 256: aes_encrypt_key256(key, cx); return;
-#endif
-    }
-}
-
-#endif
-
-#endif
-
-#if defined(DECRYPTION_KEY_SCHEDULE)
-
-#if DEC_ROUND == NO_TABLES
-#define ff(x)   (x)
-#else
-#define ff(x)   inv_mcol(x)
-#if defined( dec_imvars )
-#define d_vars  dec_imvars
-#endif
-#endif
-
-#if 1
-#define kdf4(k,i) \
-{   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \
-    ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
-    ss[4] ^= k[4*(i)];   k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \
-    ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \
-}
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
-    k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
-    k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
-}
-#define kdl4(k,i) \
-{   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \
-    k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \
-    k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \
-}
-#else
-#define kdf4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \
-}
-#define kd4(k,i) \
-{   ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \
-    ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \
-}
-#define kdl4(k,i) \
-{   ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \
-    ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \
-}
-#endif
-
-#define kdf6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \
-    ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \
-}
-#define kd6(k,i) \
-{   ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
-    ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
-    ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
-    ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
-}
-#define kdl6(k,i) \
-{   ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \
-    ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \
-}
-
-#define kdf8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \
-    ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \
-    ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \
-    ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \
-}
-#define kd8(k,i) \
-{   aes_32t g = ls_box(ss[7],3) ^ t_use(r,c)[i]; \
-    ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \
-    ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \
-    ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \
-    ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \
-    g = ls_box(ss[3],0); \
-    ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \
-    ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \
-    ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \
-    ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \
-}
-#define kdl8(k,i) \
-{   ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \
-    ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \
-}
-
-#if defined(AES_128) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[5];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    {   aes_32t i;
-
-        for(i = 0; i < (11 * N_COLS - 5) / 4; ++i)
-            ke4(cx->ks, i);
-        kel4(cx->ks, 9);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 10 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#else
-    kdf4(cx->ks, 0);  kd4(cx->ks, 1);
-     kd4(cx->ks, 2);  kd4(cx->ks, 3);
-     kd4(cx->ks, 4);  kd4(cx->ks, 5);
-     kd4(cx->ks, 6);  kd4(cx->ks, 7);
-     kd4(cx->ks, 8); kdl4(cx->ks, 9);
-#endif
-    cx->rn = 10;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_192) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[7];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    {   aes_32t i;
-
-        for(i = 0; i < (13 * N_COLS - 7) / 6; ++i)
-            ke6(cx->ks, i);
-        kel6(cx->ks, 7);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 12 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-#endif
-    }
-#else
-    cx->ks[4] = ff(ss[4] = word_in(key, 4));
-    cx->ks[5] = ff(ss[5] = word_in(key, 5));
-    kdf6(cx->ks, 0); kd6(cx->ks, 1);
-    kd6(cx->ks, 2);  kd6(cx->ks, 3);
-    kd6(cx->ks, 4);  kd6(cx->ks, 5);
-    kd6(cx->ks, 6); kdl6(cx->ks, 7);
-#endif
-    cx->rn = 12;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_256) || defined(AES_VAR)
-
-aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1])
-{   aes_32t    ss[8];
-#if defined( d_vars )
-        d_vars;
-#endif
-    cx->ks[0] = ss[0] = word_in(key, 0);
-    cx->ks[1] = ss[1] = word_in(key, 1);
-    cx->ks[2] = ss[2] = word_in(key, 2);
-    cx->ks[3] = ss[3] = word_in(key, 3);
-
-#if DEC_UNROLL == NONE
-    cx->ks[4] = ss[4] = word_in(key, 4);
-    cx->ks[5] = ss[5] = word_in(key, 5);
-    cx->ks[6] = ss[6] = word_in(key, 6);
-    cx->ks[7] = ss[7] = word_in(key, 7);
-    {   aes_32t i;
-
-        for(i = 0; i < (15 * N_COLS - 9) / 8; ++i)
-            ke8(cx->ks,  i);
-        kel8(cx->ks,  i);
-#if !(DEC_ROUND == NO_TABLES)
-        for(i = N_COLS; i < 14 * N_COLS; ++i)
-            cx->ks[i] = inv_mcol(cx->ks[i]);
-
-#endif
-    }
-#else
-    cx->ks[4] = ff(ss[4] = word_in(key, 4));
-    cx->ks[5] = ff(ss[5] = word_in(key, 5));
-    cx->ks[6] = ff(ss[6] = word_in(key, 6));
-    cx->ks[7] = ff(ss[7] = word_in(key, 7));
-    kdf8(cx->ks, 0); kd8(cx->ks, 1);
-    kd8(cx->ks, 2);  kd8(cx->ks, 3);
-    kd8(cx->ks, 4);  kd8(cx->ks, 5);
-    kdl8(cx->ks, 6);
-#endif
-    cx->rn = 14;
-#if defined( AES_ERR_CHK )
-    return aes_good;
-#endif
-}
-
-#endif
-
-#if defined(AES_VAR)
-
-aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1])
-{
-    switch(key_len)
-    {
-#if defined( AES_ERR_CHK )
-    case 16: case 128: return aes_decrypt_key128(key, cx);
-    case 24: case 192: return aes_decrypt_key192(key, cx);
-    case 32: case 256: return aes_decrypt_key256(key, cx);
-    default: return aes_error;
-#else
-    case 16: case 128: aes_decrypt_key128(key, cx); return;
-    case 24: case 192: aes_decrypt_key192(key, cx); return;
-    case 32: case 256: aes_decrypt_key256(key, cx); return;
-#endif
-    }
-}
-
-#endif
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
diff --git a/bsd/crypto/aes/ppc/aesopt.h b/bsd/crypto/aes/ppc/aesopt.h
deleted file mode 100644
index 2b78eb920..000000000
--- a/bsd/crypto/aes/ppc/aesopt.h
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- My thanks go to Dag Arne Osvik for devising the schemes used here for key
- length derivation from the form of the key schedule
-
- This file contains the compilation options for AES (Rijndael) and code
- that is common across encryption, key scheduling and table generation.
-
- OPERATION
-
- These source code files implement the AES algorithm Rijndael designed by
- Joan Daemen and Vincent Rijmen. This version is designed for the standard
- block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
- and 32 bytes).
-
- This version is designed for flexibility and speed using operations on
- 32-bit words rather than operations on bytes.  It can be compiled with
- either big or little endian internal byte order but is faster when the
- native byte order for the processor is used.
-
- THE CIPHER INTERFACE
-
- The cipher interface is implemented as an array of bytes in which lower
- AES bit sequence indexes map to higher numeric significance within bytes.
-
-  aes_08t                 (an unsigned  8-bit type)
-  aes_32t                 (an unsigned 32-bit type)
-  struct aes_encrypt_ctx  (structure for the cipher encryption context)
-  struct aes_decrypt_ctx  (structure for the cipher decryption context)
-  aes_rval                the function return type
-
-  C subroutine calls:
-
-  aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
-  aes_rval aes_encrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_encrypt_ctx cx[1]);
-
-  aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
-  aes_rval aes_decrypt(const unsigned char *in, unsigned char *out,
-                                                  const aes_decrypt_ctx cx[1]);
-
- IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
- you call genTabs() before AES is used so that the tables are initialised.
-
- C++ aes class subroutines:
-
-     Class AESencrypt  for encryption
-
-      Construtors:
-          AESencrypt(void)
-          AESencrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval encrypt(const unsigned char *in, unsigned char *out) const
-
-      Class AESdecrypt  for encryption
-      Construtors:
-          AESdecrypt(void)
-          AESdecrypt(const unsigned char *key) - 128 bit key
-      Members:
-          aes_rval key128(const unsigned char *key)
-          aes_rval key192(const unsigned char *key)
-          aes_rval key256(const unsigned char *key)
-          aes_rval decrypt(const unsigned char *in, unsigned char *out) const
-
-    COMPILATION
-
-    The files used to provide AES (Rijndael) are
-
-    a. aes.h for the definitions needed for use in C.
-    b. aescpp.h for the definitions needed for use in C++.
-    c. aesopt.h for setting compilation options (also includes common code).
-    d. aescrypt.c for encryption and decrytpion, or
-    e. aeskey.c for key scheduling.
-    f. aestab.c for table loading or generation.
-    g. aescrypt.asm for encryption and decryption using assembler code.
-    h. aescrypt.mmx.asm for encryption and decryption using MMX assembler.
-
-    To compile AES (Rijndael) for use in C code use aes.h and set the
-    defines here for the facilities you need (key lengths, encryption
-    and/or decryption). Do not define AES_DLL or AES_CPP.  Set the options
-    for optimisations and table sizes here.
-
-    To compile AES (Rijndael) for use in in C++ code use aescpp.h but do
-    not define AES_DLL
-
-    To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use
-    aes.h and include the AES_DLL define.
-
-    CONFIGURATION OPTIONS (here and in aes.h)
-
-    a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL
-    b. You may need to set PLATFORM_BYTE_ORDER to define the byte order.
-    c. If you want the code to run in a specific internal byte order, then
-       ALGORITHM_BYTE_ORDER must be set accordingly.
-    d. set other configuration options decribed below.
-*/
-
-#if !defined( _AESOPT_H )
-#define _AESOPT_H
-
-#include <crypto/aes/aes.h>
-
-/*  CONFIGURATION - USE OF DEFINES
-
-    Later in this section there are a number of defines that control the
-    operation of the code.  In each section, the purpose of each define is
-    explained so that the relevant form can be included or excluded by
-    setting either 1's or 0's respectively on the branches of the related
-    #if clauses.
-
-    PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS
-
-    To obtain the highest speed on processors with 32-bit words, this code
-    needs to determine the byte order of the target machine. The following
-    block of code is an attempt to capture the most obvious ways in which
-    various environemnts define byte order. It may well fail, in which case
-    the definitions will need to be set by editing at the points marked
-    **** EDIT HERE IF NECESSARY **** below.  My thanks go to Peter Gutmann
-    for his assistance with this endian detection nightmare.
-*/
-
-#define BRG_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
-#define BRG_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
-
-#if defined(__GNUC__) || defined(__GNU_LIBRARY__)
-#  if defined(__FreeBSD__) || defined(__OpenBSD__)
-#    include <sys/endian.h>
-#  elif defined( BSD ) && BSD >= 199103
-#      include <machine/endian.h>
-#  elif defined(__APPLE__)
-#    if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN )
-#      define BIG_ENDIAN
-#    elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN )
-#      define LITTLE_ENDIAN
-#    endif
-#  else
-#    include <endian.h>
-#    if defined(__BEOS__)
-#      include <byteswap.h>
-#    endif
-#  endif
-#endif
-
-#if !defined(PLATFORM_BYTE_ORDER)
-#  if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN)
-#    if    defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(LITTLE_ENDIAN) &&  defined(BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    endif
-#  elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN)
-#    if    defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(_LITTLE_ENDIAN) &&  defined(_BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#   endif
-#  elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)
-#    if    defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif !defined(__LITTLE_ENDIAN__) &&  defined(__BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#    elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__)
-#      define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#    endif
-#  endif
-#endif
-
-/*  if the platform is still unknown, try to find its byte order    */
-/*  from commonly used machine defines                              */
-
-#if !defined(PLATFORM_BYTE_ORDER)
-
-#if   defined( __alpha__ ) || defined( __alpha ) || defined( i386 )       || \
-      defined( __i386__ )  || defined( _M_I86 )  || defined( _M_IX86 )    || \
-      defined( __OS2__ )   || defined( sun386 )  || defined( __TURBOC__ ) || \
-      defined( vax )       || defined( vms )     || defined( VMS )        || \
-      defined( __VMS )
-#  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-
-#elif defined( AMIGA )    || defined( applec )  || defined( __AS400__ )  || \
-      defined( _CRAY )    || defined( __hppa )  || defined( __hp9000 )   || \
-      defined( ibm370 )   || defined( mc68000 ) || defined( m68k )       || \
-      defined( __MRC__ )  || defined( __MVS__ ) || defined( __MWERKS__ ) || \
-      defined( sparc )    || defined( __sparc)  || defined( SYMANTEC_C ) || \
-      defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ )
-#  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#elif 0     /* **** EDIT HERE IF NECESSARY **** */
-#  define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN
-#else
-#  error Please edit aesopt.h (line 234 or 236) to set the platform byte order
-#endif
-
-#endif
-
-/*  SOME LOCAL DEFINITIONS  */
-
-#define NO_TABLES              0
-#define ONE_TABLE              1
-#define FOUR_TABLES            4
-#define NONE                   0
-#define PARTIAL                1
-#define FULL                   2
-
-#if defined(bswap32)
-#define aes_sw32    bswap32
-#elif defined(bswap_32)
-#define aes_sw32    bswap_32
-#else
-#define brot(x,n)   (((aes_32t)(x) <<  n) | ((aes_32t)(x) >> (32 - n)))
-#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
-#endif
-
-/*  1. FUNCTIONS REQUIRED
-
-    This implementation provides subroutines for encryption, decryption
-    and for setting the three key lengths (separately) for encryption
-    and decryption. When the assembler code is not being used the following
-    definition blocks allow the selection of the routines that are to be
-    included in the compilation.
-*/
-#if defined( AES_ENCRYPT )
-#define ENCRYPTION
-#define ENCRYPTION_KEY_SCHEDULE
-#endif
-
-#if defined( AES_DECRYPT )
-#define DECRYPTION
-#define DECRYPTION_KEY_SCHEDULE
-#endif
-
-/*  2. ASSEMBLER SUPPORT
-
-    This define (which can be on the command line) enables the use of the
-    assembler code routines for encryption and decryption with the C code
-    only providing key scheduling
-*/
-#if 0 && !defined(AES_ASM)
-#define AES_ASM
-#endif
-
-/*  3. BYTE ORDER WITHIN 32 BIT WORDS
-
-    The fundamental data processing units in Rijndael are 8-bit bytes. The
-    input, output and key input are all enumerated arrays of bytes in which
-    bytes are numbered starting at zero and increasing to one less than the
-    number of bytes in the array in question. This enumeration is only used
-    for naming bytes and does not imply any adjacency or order relationship
-    from one byte to another. When these inputs and outputs are considered
-    as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
-    byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
-    In this implementation bits are numbered from 0 to 7 starting at the
-    numerically least significant end of each byte (bit n represents 2^n).
-
-    However, Rijndael can be implemented more efficiently using 32-bit
-    words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
-    into word[n]. While in principle these bytes can be assembled into words
-    in any positions, this implementation only supports the two formats in
-    which bytes in adjacent positions within words also have adjacent byte
-    numbers. This order is called big-endian if the lowest numbered bytes
-    in words have the highest numeric significance and little-endian if the
-    opposite applies.
-
-    This code can work in either order irrespective of the order used by the
-    machine on which it runs. Normally the internal byte order will be set
-    to the order of the processor on which the code is to be run but this
-    define can be used to reverse this in special situations
-
-    NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set
-*/
-#if 1 || defined(AES_ASM)
-#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
-#elif 0
-#define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN
-#elif 0
-#define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN
-#else
-#error The algorithm byte order is not defined
-#endif
-
-/*  4. FAST INPUT/OUTPUT OPERATIONS.
-
-    On some machines it is possible to improve speed by transferring the
-    bytes in the input and output arrays to and from the internal 32-bit
-    variables by addressing these arrays as if they are arrays of 32-bit
-    words.  On some machines this will always be possible but there may
-    be a large performance penalty if the byte arrays are not aligned on
-    the normal word boundaries. On other machines this technique will
-    lead to memory access errors when such 32-bit word accesses are not
-    properly aligned. The option SAFE_IO avoids such problems but will
-    often be slower on those machines that support misaligned access
-    (especially so if care is taken to align the input  and output byte
-    arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
-    assumed that access to byte arrays as if they are arrays of 32-bit
-    words will not cause problems when such accesses are misaligned.
-*/
-#if 0 && !defined(_MSC_VER)
-#define SAFE_IO
-#endif
-
-/*  5. LOOP UNROLLING
-
-    The code for encryption and decrytpion cycles through a number of rounds
-    that can be implemented either in a loop or by expanding the code into a
-    long sequence of instructions, the latter producing a larger program but
-    one that will often be much faster. The latter is called loop unrolling.
-    There are also potential speed advantages in expanding two iterations in
-    a loop with half the number of iterations, which is called partial loop
-    unrolling.  The following options allow partial or full loop unrolling
-    to be set independently for encryption and decryption
-*/
-#if 1
-#define ENC_UNROLL  FULL
-#elif 0
-#define ENC_UNROLL  PARTIAL
-#else
-#define ENC_UNROLL  NONE
-#endif
-
-#if 1
-#define DEC_UNROLL  FULL
-#elif 0
-#define DEC_UNROLL  PARTIAL
-#else
-#define DEC_UNROLL  NONE
-#endif
-
-/*  6. FAST FINITE FIELD OPERATIONS
-
-    If this section is included, tables are used to provide faster finite
-    field arithmetic (this has no effect if FIXED_TABLES is defined).
-*/
-#if 1
-#define FF_TABLES
-#endif
-
-/*  7. INTERNAL STATE VARIABLE FORMAT
-
-    The internal state of Rijndael is stored in a number of local 32-bit
-    word varaibles which can be defined either as an array or as individual
-    names variables. Include this section if you want to store these local
-    varaibles in arrays. Otherwise individual local variables will be used.
-*/
-#if 0
-#define ARRAYS
-#endif
-
-/* In this implementation the columns of the state array are each held in
-   32-bit words. The state array can be held in various ways: in an array
-   of words, in a number of individual word variables or in a number of
-   processor registers. The following define maps a variable name x and
-   a column number c to the way the state array variable is to be held.
-   The first define below maps the state into an array x[c] whereas the
-   second form maps the state into a number of individual variables x0,
-   x1, etc.  Another form could map individual state colums to machine
-   register names.
-*/
-
-#if defined(ARRAYS)
-#define s(x,c) x[c]
-#else
-#define s(x,c) x##c
-#endif
-
-/*  8. FIXED OR DYNAMIC TABLES
-
-    When this section is included the tables used by the code are compiled
-    statically into the binary file.  Otherwise the subroutine gen_tabs()
-    must be called to compute them before the code is first used.
-*/
-#if 1
-#define FIXED_TABLES
-#endif
-
-/*  9. TABLE ALIGNMENT
-
-    On some sytsems speed will be improved by aligning the AES large lookup
-    tables on particular boundaries. This define should be set to a power of
-    two giving the desired alignment. It can be left undefined if alignment
-    is not needed.  This option is specific to the Microsft VC++ compiler -
-    it seems to sometimes cause trouble for the VC++ version 6 compiler.
-*/
-
-#if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300)
-#define TABLE_ALIGN 64
-#endif
-
-/*  10. INTERNAL TABLE CONFIGURATION
-
-    This cipher proceeds by repeating in a number of cycles known as 'rounds'
-    which are implemented by a round function which can optionally be speeded
-    up using tables.  The basic tables are each 256 32-bit words, with either
-    one or four tables being required for each round function depending on
-    how much speed is required. The encryption and decryption round functions
-    are different and the last encryption and decrytpion round functions are
-    different again making four different round functions in all.
-
-    This means that:
-      1. Normal encryption and decryption rounds can each use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-      2. The last encryption and decryption rounds can also use either 0, 1
-         or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
-
-    Include or exclude the appropriate definitions below to set the number
-    of tables used by this implementation.
-*/
-
-#if 1   /* set tables for the normal encryption round */
-#define ENC_ROUND   FOUR_TABLES
-#elif 0
-#define ENC_ROUND   ONE_TABLE
-#else
-#define ENC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last encryption round */
-#define LAST_ENC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_ENC_ROUND  ONE_TABLE
-#else
-#define LAST_ENC_ROUND  NO_TABLES
-#endif
-
-#if 1   /* set tables for the normal decryption round */
-#define DEC_ROUND   FOUR_TABLES
-#elif 0
-#define DEC_ROUND   ONE_TABLE
-#else
-#define DEC_ROUND   NO_TABLES
-#endif
-
-#if 1   /* set tables for the last decryption round */
-#define LAST_DEC_ROUND  FOUR_TABLES
-#elif 0
-#define LAST_DEC_ROUND  ONE_TABLE
-#else
-#define LAST_DEC_ROUND  NO_TABLES
-#endif
-
-/*  The decryption key schedule can be speeded up with tables in the same
-    way that the round functions can.  Include or exclude the following
-    defines to set this requirement.
-*/
-#if 1
-#define KEY_SCHED   FOUR_TABLES
-#elif 0
-#define KEY_SCHED   ONE_TABLE
-#else
-#define KEY_SCHED   NO_TABLES
-#endif
-
-/*  11. TABLE POINTER CACHING
-
-    Normally tables are referenced directly, Enable this option if you wish to
-    cache pointers to the tables in the encrypt/decrypt code.  Note that this
-	only works if you are using FOUR_TABLES for the ROUND you enable this for.
-*/
-#if 1
-#define ENC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define LAST_ENC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define DEC_ROUND_CACHE_TABLES
-#endif
-#if 1
-#define LAST_DEC_ROUND_CACHE_TABLES
-#endif
-
-
-/* END OF CONFIGURATION OPTIONS */
-
-#define RC_LENGTH   (5 * (AES_BLOCK_SIZE / 4 - 2))
-
-/* Disable or report errors on some combinations of options */
-
-#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  NO_TABLES
-#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
-#undef  LAST_ENC_ROUND
-#define LAST_ENC_ROUND  ONE_TABLE
-#endif
-
-#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
-#undef  ENC_UNROLL
-#define ENC_UNROLL  NONE
-#endif
-
-#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  NO_TABLES
-#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
-#undef  LAST_DEC_ROUND
-#define LAST_DEC_ROUND  ONE_TABLE
-#endif
-
-#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
-#undef  DEC_UNROLL
-#define DEC_UNROLL  NONE
-#endif
-
-/*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
-               higher index positions with wrap around into low positions
-    ups(x,n):  moves bytes by n positions to higher index positions in
-               words but without wrap around
-    bval(x,n): extracts a byte from a word
-
-    NOTE:      The definitions given here are intended only for use with
-               unsigned variables and with shift counts that are compile
-               time constants
-*/
-
-#if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN)
-#define upr(x,n)        (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n))))
-#define ups(x,n)        ((aes_32t) (x) << (8 * (n)))
-#define bval(x,n)       ((aes_08t)((x) >> (8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
-#endif
-
-#if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN)
-#define upr(x,n)        (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n))))
-#define ups(x,n)        ((aes_32t) (x) >> (8 * (n))))
-#define bval(x,n)       ((aes_08t)((x) >> (24 - 8 * (n))))
-#define bytes2word(b0, b1, b2, b3)  \
-        (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
-#endif
-
-#if defined(SAFE_IO)
-
-#define word_in(x,c)    bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \
-                                   ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3])
-#define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \
-                          ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); }
-
-#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
-
-#define word_in(x,c)    (*((aes_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v))
-
-#else
-
-#define word_in(x,c)    aes_sw32(*((aes_32t*)(x)+(c)))
-#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v))
-
-#endif
-
-/* the finite field modular polynomial and elements */
-
-#define WPOLY   0x011b
-#define BPOLY     0x1b
-
-/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
-
-#define m1  0x80808080
-#define m2  0x7f7f7f7f
-#define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
-
-/* The following defines provide alternative definitions of gf_mulx that might
-   give improved performance if a fast 32-bit multiply is not available. Note
-   that a temporary variable u needs to be defined where gf_mulx is used.
-
-#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
-#define m4  (0x01010101 * BPOLY)
-#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
-*/
-
-/* Work out which tables are needed for the different options   */
-
-#if defined( AES_ASM )
-#if defined( ENC_ROUND )
-#undef  ENC_ROUND
-#endif
-#define ENC_ROUND   FOUR_TABLES
-#if defined( LAST_ENC_ROUND )
-#undef  LAST_ENC_ROUND
-#endif
-#define LAST_ENC_ROUND  FOUR_TABLES
-#if defined( DEC_ROUND )
-#undef  DEC_ROUND
-#endif
-#define DEC_ROUND   FOUR_TABLES
-#if defined( LAST_DEC_ROUND )
-#undef  LAST_DEC_ROUND
-#endif
-#define LAST_DEC_ROUND  FOUR_TABLES
-#if defined( KEY_SCHED )
-#undef  KEY_SCHED
-#define KEY_SCHED   FOUR_TABLES
-#endif
-#endif
-
-#if defined(ENCRYPTION) || defined(AES_ASM)
-#if ENC_ROUND == ONE_TABLE
-#define FT1_SET
-#elif ENC_ROUND == FOUR_TABLES
-#define FT4_SET
-#else
-#define SBX_SET
-#endif
-#if LAST_ENC_ROUND == ONE_TABLE
-#define FL1_SET
-#elif LAST_ENC_ROUND == FOUR_TABLES
-#define FL4_SET
-#elif !defined(SBX_SET)
-#define SBX_SET
-#endif
-#endif
-
-#if defined(DECRYPTION) || defined(AES_ASM)
-#if DEC_ROUND == ONE_TABLE
-#define IT1_SET
-#elif DEC_ROUND == FOUR_TABLES
-#define IT4_SET
-#else
-#define ISB_SET
-#endif
-#if LAST_DEC_ROUND == ONE_TABLE
-#define IL1_SET
-#elif LAST_DEC_ROUND == FOUR_TABLES
-#define IL4_SET
-#elif !defined(ISB_SET)
-#define ISB_SET
-#endif
-#endif
-
-#if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
-#if KEY_SCHED == ONE_TABLE
-#define LS1_SET
-#define IM1_SET
-#elif KEY_SCHED == FOUR_TABLES
-#define LS4_SET
-#define IM4_SET
-#elif !defined(SBX_SET)
-#define SBX_SET
-#endif
-#endif
-
-/* generic definitions of Rijndael macros that use tables    */
-
-#define no_table(x,box,vf,rf,c) bytes2word( \
-    box[bval(vf(x,0,c),rf(0,c))], \
-    box[bval(vf(x,1,c),rf(1,c))], \
-    box[bval(vf(x,2,c),rf(2,c))], \
-    box[bval(vf(x,3,c),rf(3,c))])
-
-#define one_table(x,op,tab,vf,rf,c) \
- (     tab[bval(vf(x,0,c),rf(0,c))] \
-  ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
-  ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
-  ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
-
-#define four_tables(x,tab,vf,rf,c) \
- (  tab[0][bval(vf(x,0,c),rf(0,c))] \
-  ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
-  ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
-  ^ tab[3][bval(vf(x,3,c),rf(3,c))])
-
-#define four_cached_tables(x,tab,vf,rf,c) \
-(  tab##0[bval(vf(x,0,c),rf(0,c))] \
-   ^ tab##1[bval(vf(x,1,c),rf(1,c))] \
-   ^ tab##2[bval(vf(x,2,c),rf(2,c))] \
-   ^ tab##3[bval(vf(x,3,c),rf(3,c))])
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((8+r-c)&3)
-
-/* perform forward and inverse column mix operation on four bytes in long word x in */
-/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
-
-#if defined(FM4_SET)    /* not currently used */
-#define fwd_mcol(x)     four_tables(x,t_use(f,m),vf1,rf1,0)
-#elif defined(FM1_SET)  /* not currently used */
-#define fwd_mcol(x)     one_table(x,upr,t_use(f,m),vf1,rf1,0)
-#else
-#define dec_fmvars      aes_32t g2
-#define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
-#endif
-
-#if defined(IM4_SET)
-#define inv_mcol(x)     four_tables(x,t_use(i,m),vf1,rf1,0)
-#elif defined(IM1_SET)
-#define inv_mcol(x)     one_table(x,upr,t_use(i,m),vf1,rf1,0)
-#else
-#define dec_imvars      aes_32t g2, g4, g9
-#define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
-                        (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
-#endif
-
-#if defined(FL4_SET)
-#define ls_box(x,c)     four_tables(x,t_use(f,l),vf1,rf2,c)
-#elif   defined(LS4_SET)
-#define ls_box(x,c)     four_tables(x,t_use(l,s),vf1,rf2,c)
-#elif defined(FL1_SET)
-#define ls_box(x,c)     one_table(x,upr,t_use(f,l),vf1,rf2,c)
-#elif defined(LS1_SET)
-#define ls_box(x,c)     one_table(x,upr,t_use(l,s),vf1,rf2,c)
-#else
-#define ls_box(x,c)     no_table(x,t_use(s,box),vf1,rf2,c)
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/ppc/aestab.c b/bsd/crypto/aes/ppc/aestab.c
deleted file mode 100644
index dfd2ee969..000000000
--- a/bsd/crypto/aes/ppc/aestab.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
-*/
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#define DO_TABLES
-
-#include "aesopt.h"
-
-#if defined(FIXED_TABLES)
-
-#define sb_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
-
-#define isb_data(w) {\
-    w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
-    w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
-    w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
-    w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
-    w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
-    w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
-    w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
-    w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
-    w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
-    w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
-    w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
-    w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
-    w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
-    w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
-    w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
-    w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
-    w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
-    w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
-    w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
-    w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
-    w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
-    w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
-    w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
-    w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
-    w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
-    w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
-    w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
-    w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
-    w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
-    w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
-    w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
-    w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
-
-#define mm_data(w) {\
-    w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
-    w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
-    w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
-    w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
-    w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
-    w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
-    w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
-    w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
-    w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
-    w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
-    w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
-    w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
-    w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
-    w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
-    w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
-    w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
-    w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
-    w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
-    w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
-    w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
-    w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
-    w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
-    w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
-    w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
-    w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
-    w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
-    w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
-    w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
-    w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
-    w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
-    w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
-    w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
-
-#define rc_data(w) {\
-    w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
-    w(0x1b), w(0x36) }
-
-#define h0(x)   (x)
-
-#define w0(p)   bytes2word(p, 0, 0, 0)
-#define w1(p)   bytes2word(0, p, 0, 0)
-#define w2(p)   bytes2word(0, 0, p, 0)
-#define w3(p)   bytes2word(0, 0, 0, p)
-
-#define u0(p)   bytes2word(f2(p), p, p, f3(p))
-#define u1(p)   bytes2word(f3(p), f2(p), p, p)
-#define u2(p)   bytes2word(p, f3(p), f2(p), p)
-#define u3(p)   bytes2word(p, p, f3(p), f2(p))
-
-#define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
-#define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
-#define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
-#define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
-
-#endif
-
-#if defined(FIXED_TABLES) || !defined(FF_TABLES)
-
-#define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
-#define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
-#define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
-                        ^ (((x>>5) & 4) * WPOLY))
-#define f3(x)   (f2(x) ^ x)
-#define f9(x)   (f8(x) ^ x)
-#define fb(x)   (f8(x) ^ f2(x) ^ x)
-#define fd(x)   (f8(x) ^ f4(x) ^ x)
-#define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
-
-#else
-
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ? pow[ 255 - log[x]] : 0)
-
-#endif
-
-#include "aestab.h"
-
-#if defined(FIXED_TABLES)
-
-/* implemented in case of wrong call for fixed tables */
-
-void gen_tabs(void)
-{
-}
-
-#else   /* dynamic table generation */
-
-#if !defined(FF_TABLES)
-
-/*  Generate the tables for the dynamic table option
-
-    It will generally be sensible to use tables to compute finite
-    field multiplies and inverses but where memory is scarse this
-    code might sometimes be better. But it only has effect during
-    initialisation so its pretty unimportant in overall terms.
-*/
-
-/*  return 2 ^ (n - 1) where n is the bit number of the highest bit
-    set in x with x in the range 1 < x < 0x00000200.   This form is
-    used so that locals within fi can be bytes rather than words
-*/
-
-static aes_08t hibit(const aes_32t x)
-{   aes_08t r = (aes_08t)((x >> 1) | (x >> 2));
-
-    r |= (r >> 2);
-    r |= (r >> 4);
-    return (r + 1) >> 1;
-}
-
-/* return the inverse of the finite field element x */
-
-static aes_08t fi(const aes_08t x)
-{   aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
-
-    if(x < 2) return x;
-
-    for(;;)
-    {
-        if(!n1) return v1;
-
-        while(n2 >= n1)
-        {
-            n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
-        }
-
-        if(!n2) return v2;
-
-        while(n1 >= n2)
-        {
-            n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
-        }
-    }
-}
-
-#endif
-
-/* The forward and inverse affine transformations used in the S-box */
-
-#define fwd_affine(x) \
-    (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8)))
-
-#define inv_affine(x) \
-    (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8)))
-
-static int init = 0;
-
-void gen_tabs(void)
-{   aes_32t  i, w;
-
-#if defined(FF_TABLES)
-
-    aes_08t  pow[512], log[256];
-
-    if(init) return;
-    /*  log and power tables for GF(2^8) finite field with
-        WPOLY as modular polynomial - the simplest primitive
-        root is 0x03, used here to generate the tables
-    */
-
-    i = 0; w = 1;
-    do
-    {
-        pow[i] = (aes_08t)w;
-        pow[i + 255] = (aes_08t)w;
-        log[w] = (aes_08t)i++;
-        w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-    }
-    while (w != 1);
-
-#else
-    if(init) return;
-#endif
-
-    for(i = 0, w = 1; i < RC_LENGTH; ++i)
-    {
-        t_set(r,c)[i] = bytes2word(w, 0, 0, 0);
-        w = f2(w);
-    }
-
-    for(i = 0; i < 256; ++i)
-    {   aes_08t    b;
-
-        b = fwd_affine(fi((aes_08t)i));
-        w = bytes2word(f2(b), b, b, f3(b));
-
-#if defined( SBX_SET )
-        t_set(s,box)[i] = b;
-#endif
-
-#if defined( FT1_SET )                 /* tables for a normal encryption round */
-        t_set(f,n)[i] = w;
-#endif
-#if defined( FT4_SET )
-        t_set(f,n)[0][i] = w;
-        t_set(f,n)[1][i] = upr(w,1);
-        t_set(f,n)[2][i] = upr(w,2);
-        t_set(f,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-
-#if defined( FL1_SET )                 /* tables for last encryption round (may also   */
-        t_set(f,l)[i] = w;        /* be used in the key schedule)                 */
-#endif
-#if defined( FL4_SET )
-        t_set(f,l)[0][i] = w;
-        t_set(f,l)[1][i] = upr(w,1);
-        t_set(f,l)[2][i] = upr(w,2);
-        t_set(f,l)[3][i] = upr(w,3);
-#endif
-
-#if defined( LS1_SET )                 /* table for key schedule if t_set(f,l) above is    */
-        t_set(l,s)[i] = w;      /* not of the required form                     */
-#endif
-#if defined( LS4_SET )
-        t_set(l,s)[0][i] = w;
-        t_set(l,s)[1][i] = upr(w,1);
-        t_set(l,s)[2][i] = upr(w,2);
-        t_set(l,s)[3][i] = upr(w,3);
-#endif
-
-        b = fi(inv_affine((aes_08t)i));
-        w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-#if defined( IM1_SET )                 /* tables for the inverse mix column operation  */
-        t_set(i,m)[b] = w;
-#endif
-#if defined( IM4_SET )
-        t_set(i,m)[0][b] = w;
-        t_set(i,m)[1][b] = upr(w,1);
-        t_set(i,m)[2][b] = upr(w,2);
-        t_set(i,m)[3][b] = upr(w,3);
-#endif
-
-#if defined( ISB_SET )
-        t_set(i,box)[i] = b;
-#endif
-#if defined( IT1_SET )                 /* tables for a normal decryption round */
-        t_set(i,n)[i] = w;
-#endif
-#if defined( IT4_SET )
-        t_set(i,n)[0][i] = w;
-        t_set(i,n)[1][i] = upr(w,1);
-        t_set(i,n)[2][i] = upr(w,2);
-        t_set(i,n)[3][i] = upr(w,3);
-#endif
-        w = bytes2word(b, 0, 0, 0);
-#if defined( IL1_SET )                 /* tables for last decryption round */
-        t_set(i,l)[i] = w;
-#endif
-#if defined( IL4_SET )
-        t_set(i,l)[0][i] = w;
-        t_set(i,l)[1][i] = upr(w,1);
-        t_set(i,l)[2][i] = upr(w,2);
-        t_set(i,l)[3][i] = upr(w,3);
-#endif
-    }
-    init = 1;
-}
-
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
diff --git a/bsd/crypto/aes/ppc/aestab.h b/bsd/crypto/aes/ppc/aestab.h
deleted file mode 100644
index 004ef9e74..000000000
--- a/bsd/crypto/aes/ppc/aestab.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- ---------------------------------------------------------------------------
- Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
-
- LICENSE TERMS
-
- The free distribution and use of this software in both source and binary
- form is allowed (with or without changes) provided that:
-
-   1. distributions of this source code include the above copyright
-      notice, this list of conditions and the following disclaimer;
-
-   2. distributions in binary form include the above copyright
-      notice, this list of conditions and the following disclaimer
-      in the documentation and/or other associated materials;
-
-   3. the copyright holder's name is not used to endorse products
-      built using this software without specific written permission.
-
- ALTERNATIVELY, provided that this notice is retained in full, this product
- may be distributed under the terms of the GNU General Public License (GPL),
- in which case the provisions of the GPL apply INSTEAD OF those given above.
-
- DISCLAIMER
-
- This software is provided 'as is' with no explicit or implied warranties
- in respect of its properties, including, but not limited to, correctness
- and/or fitness for purpose.
- ---------------------------------------------------------------------------
- Issue 28/01/2004
-
- This file contains the code for declaring the tables needed to implement
- AES. The file aesopt.h is assumed to be included before this header file.
- If there are no global variables, the definitions here can be used to put
- the AES tables in a structure so that a pointer can then be added to the
- AES context to pass them to the AES routines that need them.   If this
- facility is used, the calling program has to ensure that this pointer is
- managed appropriately.  In particular, the value of the t_dec(in,it) item
- in the table structure must be set to zero in order to ensure that the
- tables are initialised. In practice the three code sequences in aeskey.c
- that control the calls to gen_tabs() and the gen_tabs() routine itself will
- have to be changed for a specific implementation. If global variables are
- available it will generally be preferable to use them with the precomputed
- FIXED_TABLES option that uses static global tables.
-
- The following defines can be used to control the way the tables
- are defined, initialised and used in embedded environments that
- require special features for these purposes
-
-    the 't_dec' construction is used to declare fixed table arrays
-    the 't_set' construction is used to set fixed table values
-    the 't_use' construction is used to access fixed table values
-
-    256 byte tables:
-
-        t_xxx(s,box)    => forward S box
-        t_xxx(i,box)    => inverse S box
-
-    256 32-bit word OR 4 x 256 32-bit word tables:
-
-        t_xxx(f,n)      => forward normal round
-        t_xxx(f,l)      => forward last round
-        t_xxx(i,n)      => inverse normal round
-        t_xxx(i,l)      => inverse last round
-        t_xxx(l,s)      => key schedule table
-        t_xxx(i,m)      => key schedule table
-
-    Other variables and tables:
-
-        t_xxx(r,c)      => the rcon table
-*/
-
-#if !defined( _AESTAB_H )
-#define _AESTAB_H
-
-#define t_dec(m,n) t_##m##n
-#define t_set(m,n) t_##m##n
-#define t_use(m,n) t_##m##n
-
-#if defined(FIXED_TABLES)
-#define Const const
-#else
-#define Const
-#endif
-
-#if defined(DO_TABLES)
-#define Extern
-#else
-#define Extern extern
-#endif
-
-#if defined(_MSC_VER) && defined(TABLE_ALIGN)
-#define Align __declspec(align(TABLE_ALIGN))
-#else
-#define Align
-#endif
-
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-#if defined(DO_TABLES) && defined(FIXED_TABLES)
-#define d_1(t,n,b,e)       Align Const t n[256]    =   b(e)
-#define d_4(t,n,b,e,f,g,h) Align Const t n[4][256] = { b(e), b(f), b(g), b(h) }
-Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0);
-#else
-#define d_1(t,n,b,e)       Extern Align Const t n[256]
-#define d_4(t,n,b,e,f,g,h) Extern Align Const t n[4][256]
-Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH];
-#endif
-
-#if defined( SBX_SET )
-    d_1(aes_08t, t_dec(s,box), sb_data, h0);
-#endif
-#if defined( ISB_SET )
-    d_1(aes_08t, t_dec(i,box), isb_data, h0);
-#endif
-
-#if defined( FT1_SET )
-    d_1(aes_32t, t_dec(f,n), sb_data, u0);
-#endif
-#if defined( FT4_SET )
-    d_4(aes_32t, t_dec(f,n), sb_data, u0, u1, u2, u3);
-#endif
-
-#if defined( FL1_SET )
-    d_1(aes_32t, t_dec(f,l), sb_data, w0);
-#endif
-#if defined( FL4_SET )
-    d_4(aes_32t, t_dec(f,l), sb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( IT1_SET )
-    d_1(aes_32t, t_dec(i,n), isb_data, v0);
-#endif
-#if defined( IT4_SET )
-    d_4(aes_32t, t_dec(i,n), isb_data, v0, v1, v2, v3);
-#endif
-
-#if defined( IL1_SET )
-    d_1(aes_32t, t_dec(i,l), isb_data, w0);
-#endif
-#if defined( IL4_SET )
-    d_4(aes_32t, t_dec(i,l), isb_data, w0, w1, w2, w3);
-#endif
-
-#if defined( LS1_SET )
-#if defined( FL1_SET )
-#undef  LS1_SET
-#else
-    d_1(aes_32t, t_dec(l,s), sb_data, w0);
-#endif
-#endif
-
-#if defined( LS4_SET )
-#if defined( FL4_SET )
-#undef  LS4_SET
-#else
-    d_4(aes_32t, t_dec(l,s), sb_data, w0, w1, w2, w3);
-#endif
-#endif
-
-#if defined( IM1_SET )
-    d_1(aes_32t, t_dec(i,m), mm_data, v0);
-#endif
-#if defined( IM4_SET )
-    d_4(aes_32t, t_dec(i,m), mm_data, v0, v1, v2, v3);
-#endif
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
diff --git a/bsd/crypto/aes/test/ReadMe.txt b/bsd/crypto/aes/test/ReadMe.txt
new file mode 100644
index 000000000..1329e84be
--- /dev/null
+++ b/bsd/crypto/aes/test/ReadMe.txt
@@ -0,0 +1,97 @@
+This directory contains file and shell scripts 
+
+	tstaes.c
+	makegenarm.sh
+	makegenx86.sh
+	makeoptx86.sh
+
+that can be used to build executables. These executable are used to validate the implementation
+and to benchmark the performance of the aes functions in the kernel. This directory also serves
+as a development environment for porting of the aes functions to any new architectures.
+
+On xnu-1699.20.6 (from which we add this work), the generic aes source code sits at bsd/crypto/aes/gen. The x86_64 
+and i386 architectural optimization is given in bsd/crypto/aes/i386.
+
+After making some code corrections (aes.h and most assembly code in i386), now you can build a test executable
+that is functionally equivalent to aes in the kernel code.
+
+To generate a test executable for the aes in x86_64/i386 kernel,
+
+	$ makeoptx86.sh
+
+This will build a test executable tstaesoptx86 (x86_64/i386). The executable will automatically detects the 
+CPU clock rates. You specify the number of iterations and the number of 16-byte blocks for simulation. 
+The executable generates (random number) the test data, and calls aes_encrypt_cbc to encrypt the plain data
+into cipher data, and then calls aes_decrypt_cbc to decrypt cipher into decrypted data. Afterwards, it compares
+the decrypted data against the plain data. Should there be a mismatch, the code breaks and exit. 
+Otherwise, it measures the times the system spends on the 2 functions under test. Afterwards, it prints out
+the performance profiling data.
+
+On K5,
+
+$ tstaesoptx86 1000 2560
+device max CPU clock rate = 2659.00 MHz
+40960 bytes per cbc call
+ aes_encrypt_cbc : time elapsed =   220.24 usecs,  177.37 MBytes/sec,    14.30 cycles/byte
+  best iteration : time elapsed =   218.30 usecs,  178.94 MBytes/sec,    14.17 cycles/byte
+ worst iteration : time elapsed =   286.14 usecs,  136.51 MBytes/sec,    18.58 cycles/byte
+
+ aes_decrypt_cbc : time elapsed =   199.85 usecs,  195.46 MBytes/sec,    12.97 cycles/byte
+  best iteration : time elapsed =   198.17 usecs,  197.12 MBytes/sec,    12.86 cycles/byte
+ worst iteration : time elapsed =   228.12 usecs,  171.23 MBytes/sec,    14.81 cycles/byte
+
+On K5B (with aesni)
+
+$ tstaesoptx86 1000 256    
+device max CPU clock rate = 2400.00 MHz
+4096 bytes per cbc call
+ aes_encrypt_cbc : time elapsed =     6.69 usecs,  583.67 MBytes/sec,     3.92 cycles/byte
+  best iteration : time elapsed =     6.38 usecs,  612.46 MBytes/sec,     3.74 cycles/byte
+ worst iteration : time elapsed =     9.72 usecs,  401.96 MBytes/sec,     5.69 cycles/byte
+
+ aes_decrypt_cbc : time elapsed =     2.05 usecs, 1902.65 MBytes/sec,     1.20 cycles/byte
+  best iteration : time elapsed =     1.96 usecs, 1997.06 MBytes/sec,     1.15 cycles/byte
+ worst iteration : time elapsed =     4.60 usecs,  849.00 MBytes/sec,     2.70 cycles/byte
+
+You can also build a test executable using the generic source code for the i386/x86_64 architecture.
+
+	$ makegenx86.sh
+
+When run on K5,
+
+$ tstaesgenx86 1000 2560   
+device max CPU clock rate = 2659.00 MHz
+40960 bytes per cbc call
+ aes_encrypt_cbc : time elapsed =   278.05 usecs,  140.49 MBytes/sec,    18.05 cycles/byte
+  best iteration : time elapsed =   274.63 usecs,  142.24 MBytes/sec,    17.83 cycles/byte
+ worst iteration : time elapsed =   309.70 usecs,  126.13 MBytes/sec,    20.10 cycles/byte
+
+ aes_decrypt_cbc : time elapsed =   265.43 usecs,  147.17 MBytes/sec,    17.23 cycles/byte
+  best iteration : time elapsed =   262.20 usecs,  148.98 MBytes/sec,    17.02 cycles/byte
+ worst iteration : time elapsed =   296.19 usecs,  131.88 MBytes/sec,    19.23 cycles/byte
+
+We can see the current AES implementation in the x86_64 kernel has been improved from 17.83/17.02
+down to 14.12/12.86 cycles/byte for aes_encrypt_cbc and aes_decrypt_cbc, respectively.
+
+
+ --------- iOS ---------
+
+Similarly, you can build a test executable for the aes in the armv7 kernel (which uses the generic source code)
+
+	$ makegenarm.sh
+
+Note that you need the iOS SDK installed. We can then copy this executable to iOS devices for simulation.
+
+On N88,
+
+iPhone:~ root# ./tstaesgenarm 1000 2560
+device max CPU clock rate = 600.00 MHz
+40960 bytes per cbc call
+ aes_encrypt_cbc : time elapsed =  2890.18 usecs,   13.52 MBytes/sec,    42.34 cycles/byte
+  best iteration : time elapsed =  2692.00 usecs,   14.51 MBytes/sec,    39.43 cycles/byte
+ worst iteration : time elapsed = 18248.33 usecs,    2.14 MBytes/sec,   267.31 cycles/byte
+
+ aes_decrypt_cbc : time elapsed =  3078.20 usecs,   12.69 MBytes/sec,    45.09 cycles/byte
+  best iteration : time elapsed =  2873.33 usecs,   13.59 MBytes/sec,    42.09 cycles/byte
+ worst iteration : time elapsed =  9664.79 usecs,    4.04 MBytes/sec,   141.57 cycles/byte
+
diff --git a/bsd/crypto/aes/test/makegenx86.sh b/bsd/crypto/aes/test/makegenx86.sh
new file mode 100755
index 000000000..ea4de6f63
--- /dev/null
+++ b/bsd/crypto/aes/test/makegenx86.sh
@@ -0,0 +1,8 @@
+#!/bin/ksh
+
+cc -Os -c -arch i386 -arch x86_64 -I ../../../ ../gen/aescrypt.c -o aescrypt.o
+cc -Os -c -arch i386 -arch x86_64 -I ../../../ ../gen/aeskey.c -o aeskey.o
+cc -Os -c -arch i386 -arch x86_64 -I ../../../ ../gen/aestab.c -o aestab.o
+
+cc -arch i386 -arch x86_64 -Os tstaes.c aescrypt.o aeskey.o aestab.o -o tstaesgenx86
+rm -fr aescrypt.o aeskey.o aestab.o
diff --git a/bsd/crypto/aes/test/makeoptx86.sh b/bsd/crypto/aes/test/makeoptx86.sh
new file mode 100755
index 000000000..3732e037f
--- /dev/null
+++ b/bsd/crypto/aes/test/makeoptx86.sh
@@ -0,0 +1,10 @@
+#!/bin/ksh
+
+cc -c -Os -arch i386 -arch x86_64 ../i386/AES.s -o AES.o
+cc -c -Os -arch i386 -arch x86_64 ../i386/aes_crypt_hw.s -o aes_crypt_hw.o
+cc -c -Os -arch i386 -arch x86_64 ../i386/aes_key_hw.s -o aes_key_hw.o
+cc -c -Os -arch i386 -arch x86_64 ../i386/aes_modes_asm.s -o aes_modes_asm.o
+cc -c -Os -arch i386 -arch x86_64 ../i386/aes_modes_hw.s -o aes_modes_hw.o
+
+cc -Os -arch i386 -arch x86_64 tstaes.c AES.o aes_crypt_hw.o aes_key_hw.o aes_modes_asm.o aes_modes_hw.o -o tstaesoptx86
+rm -fr AES.o aes_crypt_hw.o aes_key_hw.o aes_modes_asm.o aes_modes_hw.o
diff --git a/bsd/crypto/aes/test/tstaes.c b/bsd/crypto/aes/test/tstaes.c
new file mode 100644
index 000000000..cbe364ed7
--- /dev/null
+++ b/bsd/crypto/aes/test/tstaes.c
@@ -0,0 +1,131 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "../aes.h"
+#include <mach/mach_time.h>
+#include <sys/sysctl.h>
+
+
+aes_encrypt_ctx	encrypt_ctx;
+aes_decrypt_ctx	decrypt_ctx;
+
+size_t getFreq()
+{
+    int mib[2];
+    size_t cpufreq, len;
+    mib[0] = CTL_HW;
+    mib[1] = HW_CPU_FREQ;
+    len = sizeof(cpufreq);
+
+    sysctl(mib, 2, &cpufreq, &len, NULL, 0);
+
+    return  cpufreq;
+}
+
+
+uint32_t	cpu_freq;
+
+main(int argc, char **argv)
+{
+
+	char	*plain;
+	char	*cipher;
+	char	*decrypt;
+
+uint32_t	ITERATIONS;
+uint32_t	NUM_BLOCKS;
+uint32_t	data_size;
+
+	char 	key[32];
+	char 	iv[16];
+	int		checksum=0;
+	int		i, j, iterations;
+	uint64_t    t0, t1, t2, sum=0, max_time=0, min_time=-1, sum1=0, max_time1=0, min_time1=-1;
+    float       time, time_max, time_min, time1, time_max1, time_min1;
+
+	cpu_freq = getFreq();
+
+	if (cpu_freq == 0) {
+		fprintf(stderr, "this appears to be an N90 device, where cpu_freq can not be detected. set to 800MHz.\n");
+		cpu_freq = 800000000;
+	} else {
+		fprintf(stderr, "device max CPU clock rate = %.2f MHz\n", cpu_freq/1.e6);
+	}
+
+    mach_timebase_info_data_t info;
+    kern_return_t err = mach_timebase_info( &info );
+
+	if (argc!=3) {
+		fprintf(stderr, "usage : %s iterations num_16bytes_block\n", argv[0]);
+		exit(1);
+	}
+	ITERATIONS = atoi(argv[1]);
+	NUM_BLOCKS = atoi(argv[2]);
+	data_size = 16*NUM_BLOCKS;
+
+	plain = malloc(data_size);
+	cipher = malloc(data_size);
+	decrypt = malloc(data_size);
+
+	if ((plain==NULL) || (cipher==NULL) || (decrypt==NULL)) {
+		fprintf(stderr,"malloc error.\n");
+		exit(1);
+	}
+
+	for (i=0;i<data_size;i++) plain[i] = random();
+	for (i=0;i<32;i++) key[i] = random();
+	for (i=0;i<16;i++) iv[i] = random();
+
+	aes_encrypt_key128(key, &encrypt_ctx);
+	aes_decrypt_key128(key, &decrypt_ctx);
+
+	for (iterations=0;iterations<ITERATIONS;iterations++) {
+		t0 = mach_absolute_time();
+
+		// encrypt
+		aes_encrypt_cbc(plain, iv, NUM_BLOCKS, cipher, &encrypt_ctx);
+
+		t1 = mach_absolute_time();
+
+		// decrypt
+		aes_decrypt_cbc(cipher, iv, NUM_BLOCKS, decrypt, &decrypt_ctx);
+
+		t2 = mach_absolute_time();
+
+		for (i=0;i<(16*NUM_BLOCKS);i++) if (plain[i]!=decrypt[i]) {
+				fprintf(stderr,"error : decrypt != plain. i = %d\n", i);
+				exit(1);
+		}
+		sum += (t1-t0);
+		sum1 += (t2-t1);
+		t2-=t1;
+		t1-=t0;
+		if (t1>max_time) max_time = t1;
+        if (t1<min_time) min_time = t1;
+		if (t2>max_time1) max_time1 = t2;
+        if (t2<min_time1) min_time1 = t2;
+	}
+
+	time = sum * 1e-9* ((double) info.numer)/((double) info.denom);
+	time_max = max_time * 1e-9* ((double) info.numer)/((double) info.denom);
+    time_min = min_time * 1e-9* ((double) info.numer)/((double) info.denom);
+
+	time1 = sum1 * 1e-9* ((double) info.numer)/((double) info.denom);
+	time_max1 = max_time1 * 1e-9* ((double) info.numer)/((double) info.denom);
+    time_min1 = min_time1 * 1e-9* ((double) info.numer)/((double) info.denom);
+
+	printf("%d bytes per cbc call\n", data_size);
+	printf(" aes_encrypt_cbc : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time/ITERATIONS,data_size*ITERATIONS/1024./1024./time, time*1.*cpu_freq/ITERATIONS/data_size);
+	printf("  best iteration : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time_min,data_size/1024./1024./time_min, time_min*1.*cpu_freq/data_size);
+    printf(" worst iteration : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time_max,data_size/1024./1024./time_max, time_max*1.*cpu_freq/data_size);
+
+	printf("\n");
+
+	printf(" aes_decrypt_cbc : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time1/ITERATIONS,data_size*ITERATIONS/1024./1024./time1, time1*1.*cpu_freq/ITERATIONS/data_size);
+	printf("  best iteration : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time_min1,data_size/1024./1024./time_min1, time_min1*1.*cpu_freq/data_size);
+    printf(" worst iteration : time elapsed = %8.2f usecs, %7.2f MBytes/sec, %8.2f cycles/byte\n", 1.e6*time_max1,data_size/1024./1024./time_max1, time_max1*1.*cpu_freq/data_size);
+
+	free(plain);
+	free(cipher);
+	free(decrypt);
+}
diff --git a/bsd/crypto/blowfish/Makefile b/bsd/crypto/blowfish/Makefile
index 6b3066a93..26126163a 100644
--- a/bsd/crypto/blowfish/Makefile
+++ b/bsd/crypto/blowfish/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/cast128/Makefile b/bsd/crypto/cast128/Makefile
index 6eb76064a..100921729 100644
--- a/bsd/crypto/cast128/Makefile
+++ b/bsd/crypto/cast128/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/des/Makefile b/bsd/crypto/des/Makefile
index df4545d55..2eee6301a 100644
--- a/bsd/crypto/des/Makefile
+++ b/bsd/crypto/des/Makefile
@@ -9,8 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -19,8 +17,6 @@ INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/doc/KernelCrypto.plist b/bsd/crypto/doc/KernelCrypto.plist
new file mode 100644
index 000000000..9c9cfd1c9
--- /dev/null
+++ b/bsd/crypto/doc/KernelCrypto.plist
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<array>
+	<dict>
+		<key>OpenSourceLicense</key>
+		<string>Other</string>
+		<key>OpenSourceLicenseFile</key>
+		<string>KernelCrypto.txt</string>
+		<key>OpenSourceModifications</key>
+		<string>Extensive customization for OS X</string>
+		<key>OpenSourceProject</key>
+		<string>openssl</string>
+		<key>OpenSourceURL</key>
+		<string>http://www.openssl.org/source/openssl-0.9.6.tar.gz</string>
+		<key>OpenSourceVersion</key>
+		<string>openssl-0.9.6</string>
+		<key>OpenSourceWebsiteURL</key>
+		<string>http://www.openssl.org/</string>
+	</dict>
+	<dict>
+		<key>OpenSourceImportDate</key>
+		<string>2004-04-07</string>
+		<key>OpenSourceLicense</key>
+		<string>Other</string>
+		<key>OpenSourceLicenseFile</key>
+		<string>KernelCrypto.txt</string>
+		<key>OpenSourceModifications</key>
+		<string>Customization for OS X</string>
+		<key>OpenSourceProject</key>
+		<string>Gladman AES</string>
+		<key>OpenSourceURL</key>
+		<string>http://fp.gladman.plus.com/AES/aesfull.zip</string>
+		<key>OpenSourceVersion</key>
+		<string>aes-src-26-08-05</string>
+		<key>OpenSourceWebsiteURL</key>
+		<string>http://fp.gladman.plus.com/AES/index.htm</string>
+	</dict>
+	<dict>
+		<key>OpenSourceImportDate</key>
+		<string>2005-09-02</string>
+		<key>OpenSourceLicense</key>
+		<string>Other</string>
+		<key>OpenSourceLicenseFile</key>
+		<string>KernelCrypto.txt</string>
+		<key>OpenSourceModifications</key>
+		<string>Customization for OS X</string>
+		<key>OpenSourceProject</key>
+		<string>Gladman SHA2</string>
+		<key>OpenSourceURL</key>
+		<string>http://fp.gladman.plus.com/cryptography_technology/sha/sha-26-08-05.zip</string>
+		<key>OpenSourceVersion</key>
+		<string>sha-26-08-05</string>
+		<key>OpenSourceWebsiteURL</key>
+		<string>http://fp.gladman.plus.com/cryptography_technology/sha/index.htm</string>
+	</dict>
+	<dict>
+		<key>OpenSourceImportDate</key>
+		<string>2010-04-14</string>
+		<key>OpenSourceLicense</key>
+		<string>Other</string>
+		<key>OpenSourceLicenseFile</key>
+		<string>KernelCrypto.txt</string>
+		<key>OpenSourceModifications</key>
+		<string>Customization for OS X</string>
+		<key>OpenSourceProject</key>
+		<string>Gladman XTS-AES</string>
+		<key>OpenSourceURL</key>
+		<string>http://gladman.plushost.co.uk/oldsite/AES/xts-vs2008-17-07-09.zip</string>
+		<key>OpenSourceVersion</key>
+		<string>xts-vs2008-17-07-09</string>
+		<key>OpenSourceWebsiteURL</key>
+		<string>http://gladman.plushost.co.uk/oldsite/AES/index.php</string>
+	</dict>
+</array>
+</plist>
diff --git a/bsd/crypto/doc/KernelCrypto.txt b/bsd/crypto/doc/KernelCrypto.txt
new file mode 100644
index 000000000..611542795
--- /dev/null
+++ b/bsd/crypto/doc/KernelCrypto.txt
@@ -0,0 +1,149 @@
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+ ---------------------------------------------------------------------------
+ License for Dr. Brian Gladman's SHA2 implementation
+ ---------------------------------------------------------------------------
+
+ Copyright (c) 2002, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+
+ ---------------------------------------------------------------------------
+ License for Dr. Brian Gladman's AES implementation
+ ---------------------------------------------------------------------------
+ Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
+
+ LICENSE TERMS
+
+ The free distribution and use of this software in both source and binary
+ form is allowed (with or without changes) provided that:
+
+   1. distributions of this source code include the above copyright
+      notice, this list of conditions and the following disclaimer;
+
+   2. distributions in binary form include the above copyright
+      notice, this list of conditions and the following disclaimer
+      in the documentation and/or other associated materials;
+
+   3. the copyright holder's name is not used to endorse products
+      built using this software without specific written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this product
+ may be distributed under the terms of the GNU General Public License (GPL),
+ in which case the provisions of the GPL apply INSTEAD OF those given above.
+
+ DISCLAIMER
+
+ This software is provided 'as is' with no explicit or implied warranties
+ in respect of its properties, including, but not limited to, correctness
+ and/or fitness for purpose.
+
+
+---------------------------------------------------------------------------
+ License for Dr. Brian Gladman's XTS implementation
+ ---------------------------------------------------------------------------
+
+Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+
+LICENSE TERMS
+
+The free distribution and use of this software is allowed (with or without
+changes) provided that:
+
+1. source code distributions include the above copyright notice, this
+   list of conditions and the following disclaimer;
+
+2. binary distributions include the above copyright notice, this list
+   of conditions and the following disclaimer in their documentation;
+
+3. the name of the copyright holder is not used to endorse products
+   built using this software without specific written permission.
+
+DISCLAIMER
+
+This software is provided 'as is' with no explicit or implied warranties
+in respect of its properties, including, but not limited to, correctness
+and/or fitness for purpose.
+
+
diff --git a/bsd/crypto/rc4/Makefile b/bsd/crypto/rc4/Makefile
index 4de505de8..9aad66e3a 100644
--- a/bsd/crypto/rc4/Makefile
+++ b/bsd/crypto/rc4/Makefile
@@ -9,8 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -19,8 +17,6 @@ INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/sha2/Makefile b/bsd/crypto/sha2/Makefile
index 8e85f612c..4cc93fb76 100644
--- a/bsd/crypto/sha2/Makefile
+++ b/bsd/crypto/sha2/Makefile
@@ -9,8 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -19,8 +17,6 @@ INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/crypto/sha2/intel/sha256.s b/bsd/crypto/sha2/intel/sha256.s
new file mode 100644
index 000000000..59353ff4b
--- /dev/null
+++ b/bsd/crypto/sha2/intel/sha256.s
@@ -0,0 +1,617 @@
+/*
+	This file provides x86_64/i386 hand implementation of the following function
+
+	void SHA256_Transform(SHA256_ctx *ctx, char *data, unsigned int num_blocks);
+
+	which is a C function in sha2.c (from xnu).
+
+	The code 1st probes cpu_capabilities to detect whether ssse3 is supported. If not, it branches to
+	SHA256_Transform_nossse3 (in a separate source file sha256nossse3.s) that was cloned from this file
+	with all ssse3 instructions replaced with sse3 or below instructions.
+
+	sha256 algorithm per block description:
+
+		1. W(0:15) = big-endian (per 4 bytes) loading of input data (64 byte) 
+		2. load 8 digests a-h from ctx->state
+		3. for r = 0:15
+				T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
+				d += T1;
+				h = T1 + Sigma0(a) + Maj(a,b,c)
+				permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
+		4. for r = 16:63
+				W[r] = W[r-16] + sigma1(W[r-2]) + W[r-7] + sigma0(W[r-15]);
+				T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
+				d += T1;
+				h = T1 + Sigma0(a) + Maj(a,b,c)
+				permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
+				
+	In the assembly implementation:	
+		- a circular window of message schedule W(r:r+15) is updated and stored in xmm0-xmm3
+		- its corresponding W+K(r:r+15) is updated and stored in a stack space circular buffer
+		- the 8 digests (a-h) will be stored in GPR or m32 (all in GPR for x86_64, and some in m32 for i386)
+
+	the implementation per block looks like
+
+	----------------------------------------------------------------------------
+
+	load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
+	pre_calculate and store W+K(0:15) in stack
+
+	load digests a-h from ctx->state;
+
+	for (r=0;r<48;r+=4) {
+		digests a-h update and permute round r:r+3
+		update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
+	}
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+	}
+
+	ctx->states += digests a-h;
+
+	----------------------------------------------------------------------------
+
+	our implementation (allows multiple blocks per call) pipelines the loading of W/WK of a future block 
+	into the last 16 rounds of its previous block:
+
+	----------------------------------------------------------------------------
+
+	load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
+	pre_calculate and store W+K(0:15) in stack
+
+L_loop:
+
+	load digests a-h from ctx->state;
+
+	for (r=0;r<48;r+=4) {
+		digests a-h update and permute round r:r+3
+		update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
+	}
+
+	num_block--;
+	if (num_block==0)	jmp L_last_block;
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+		load W([r:r+3]%16) (big-endian per 4 bytes) into xmm0:xmm3 
+		pre_calculate and store W+K([r:r+3]%16) in stack
+	}
+
+	ctx->states += digests a-h;
+
+	jmp	L_loop;
+
+L_last_block:
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+	}
+
+	ctx->states += digests a-h;
+
+	------------------------------------------------------------------------
+
+	Apple CoreOS vector & numerics
+	cclee 8-3-10
+*/
+
+#if defined	KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+
+	// associate variables with registers or memory
+
+#if defined	(__x86_64__)
+	#define	sp			%rsp
+	#define	ctx			%rdi
+	#define data		%rsi
+	#define	num_blocks	%rdx
+
+	#define	a			%r8d
+	#define	b			%r9d
+	#define	c			%r10d
+	#define	d			%r11d
+	#define	e			%r12d
+	#define	f			%r13d
+	#define	g			%r14d
+	#define	h			%r15d
+
+	#define	K			%rbx
+	#define stack_size	(8+16*8+16+64)	// 8 (align) + xmm0:xmm7 + L_aligned_bswap + WK(0:15)
+
+	#define	L_aligned_bswap	64(sp)		// bswap : big-endian loading of 4-byte words
+	#define	xmm_save	80(sp)			// starting address for xmm save/restore
+#else
+	#define	sp 	%esp
+	#define stack_size	(12+16*8+16+16+64)	// 12 (align) + xmm0:xmm7 + 16 (c,f,h,K) + L_aligned_bswap + WK(0:15)
+	#define	ctx_addr	20+stack_size(sp)	// ret_addr + 4 registers = 20, 1st caller argument
+	#define	data_addr	24+stack_size(sp)	// 2nd caller argument
+	#define	num_blocks	28+stack_size(sp)	// 3rd caller argument
+
+	#define	a	%ebx
+	#define	b	%edx
+	#define	c	64(sp)
+	#define	d	%ebp
+	#define	e	%esi
+	#define	f	68(sp)
+	#define	g	%edi
+	#define	h	72(sp)
+
+	#define	K	76(sp)					// pointer to K256[] table
+	#define	L_aligned_bswap	80(sp)		// bswap : big-endian loading of 4-byte words
+	#define	xmm_save	96(sp)			// starting address for xmm save/restore
+#endif
+
+	// 2 local variables
+	#define	t	%eax
+	#define	s	%ecx
+
+	// a window (16 words) of message scheule
+	#define	W0	%xmm0
+	#define	W1	%xmm1
+	#define	W2	%xmm2
+	#define	W3	%xmm3
+
+	// circular buffer for WK[(r:r+15)%16]
+	#define WK(x)   (x&15)*4(sp)
+
+// #define Ch(x,y,z)   (((x) & (y)) ^ ((~(x)) & (z)))
+
+	.macro Ch
+	mov		$0, t		// x
+	mov		$0, s		// x
+	not		t			// ~x
+	and		$1, s		// x & y
+	and		$2, t		// ~x & z
+	xor		s, t		// t = ((x) & (y)) ^ ((~(x)) & (z));
+	.endm
+
+// #define Maj(x,y,z)  (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+	.macro	Maj
+	mov		$0, t		// x
+	mov		$1, s		// y
+	and		s, t		// x&y
+	and		$2, s		// y&z
+	xor		s, t		// (x&y) ^ (y&z)
+	mov		$2, s		// z
+	and		$0, s		// (x&z)
+	xor		s, t		// t = (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 
+	.endm
+
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+// #define R(b,x)      ((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+// #define S32(b,x)    (((x) >> (b)) | ((x) << (32 - (b))))
+
+// #define sigma0_256(x)   (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
+
+	// performs sigma0_256 on 4 words on an xmm registers
+	// use xmm6/xmm7 as intermediate registers
+	.macro	sigma0
+	movdqa	$0, %xmm6
+	movdqa	$0, %xmm7
+	psrld	$$3, $0			// SHR3(x)
+	psrld	$$7, %xmm6		// part of ROTR7
+	pslld	$$14, %xmm7		// part of ROTR18
+	pxor	%xmm6, $0
+	pxor	%xmm7, $0
+	psrld	$$11, %xmm6		// part of ROTR18
+	pslld	$$11, %xmm7		// part of ROTR7
+	pxor	%xmm6, $0
+	pxor	%xmm7, $0
+	.endm
+
+// #define sigma1_256(x)   (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
+
+	// performs sigma1_256 on 4 words on an xmm registers
+	// use xmm6/xmm7 as intermediate registers
+	.macro	sigma1
+	movdqa	$0, %xmm6
+	movdqa	$0, %xmm7
+	psrld	$$10, $0		// SHR10(x)
+	psrld	$$17, %xmm6		// part of ROTR17
+	pxor	%xmm6, $0
+	pslld	$$13, %xmm7		// part of ROTR19
+	pxor	%xmm7, $0
+	psrld	$$2, %xmm6		// part of ROTR19
+	pxor	%xmm6, $0
+	pslld	$$2, %xmm7		// part of ROTR17
+	pxor	%xmm7, $0
+	.endm
+
+// #define Sigma0_256(x)   (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+
+	.macro	Sigma0
+	mov		$0, t			// x
+	mov		$0, s			// x
+	ror		$$2, t			// S32(2,  (x))
+	ror		$$13, s			// S32(13,  (x))
+	xor		s, t			// S32(2,  (x)) ^ S32(13, (x))
+	ror		$$9, s			// S32(22,  (x))
+	xor		s, t			// t = (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+	.endm
+
+// #define Sigma1_256(x)   (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+
+	.macro	Sigma1
+	mov		$0, s			// x
+	ror		$$6, s			// S32(6,  (x))
+	mov		s, t			// S32(6,  (x))
+	ror		$$5, s			// S32(11, (x))
+	xor		s, t			// S32(6,  (x)) ^ S32(11, (x))
+	ror		$$14, s			// S32(25, (x))
+	xor		s, t			// t = (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+	.endm
+
+	// per round digests update
+	.macro	round
+	Sigma1	$4				// t = T1
+	add		t, $7			// use h to store h+Sigma1(e)
+	Ch		$4, $5, $6		// t = Ch (e, f, g);
+	add		$7, t			// t = h+Sigma1(e)+Ch(e,f,g);
+	add		WK($8), t		// h = T1
+	add		t, $3			// d += T1;
+	mov		t, $7			// h = T1
+	Sigma0	$0				// t = Sigma0(a);
+	add		t, $7			// h = T1 + Sigma0(a);
+	Maj		$0, $1, $2		// t = Maj(a,b,c)
+	add		t, $7			// h = T1 + Sigma0(a) + Maj(a,b,c);			
+	.endm
+
+	// per 4 rounds digests update and permutation
+	// permutation is absorbed by rotating the roles of digests a-h
+	.macro	rounds
+	round	$0, $1, $2, $3, $4, $5, $6, $7, 0+$8
+	round	$7, $0, $1, $2, $3, $4, $5, $6, 1+$8
+	round	$6, $7, $0, $1, $2, $3, $4, $5, 2+$8
+	round	$5, $6, $7, $0, $1, $2, $3, $4, 3+$8
+	.endm
+
+	// update the message schedule W and W+K (4 rounds) 16 rounds ahead in the future 
+	.macro	message_schedule
+
+	// 4 32-bit K256 words in xmm5
+#if defined	(__x86_64__)
+	movdqu	(K), %xmm5
+#else
+	mov		K, t
+	movdqu	(t), %xmm5 
+#endif	
+	add		$$16, K				// K points to next K256 word for next iteration
+	movdqa	$1, %xmm4 			// W7:W4
+	palignr	$$4, $0, %xmm4		// W4:W1
+	sigma0	%xmm4				// sigma0(W4:W1)
+	movdqa	$3, %xmm6 			// W15:W12
+	paddd	%xmm4, $0			// $0 = W3:W0 + sigma0(W4:W1) 
+	palignr	$$4, $2, %xmm6		// W12:W9
+	paddd	%xmm6, $0			// $0 = W12:W9 + sigma0(W4:W1) + W3:W0	
+	movdqa	$3, %xmm4			// W15:W12
+	psrldq	$$8, %xmm4			// 0,0,W15,W14	
+	sigma1	%xmm4				// sigma1(0,0,W15,W14)
+	paddd	%xmm4, $0			// sigma1(0,0,W15,W14) + W12:W9 + sigma0(W4:W1) + W3:W0
+	movdqa	$0, %xmm4			// W19-sigma1(W17), W18-sigma1(W16), W17, W16
+	pslldq	$$8, %xmm4			// W17, W16, 0, 0
+	sigma1	%xmm4				// sigma1(W17,W16,0,0)
+	paddd	%xmm4, $0			// W19:W16
+	paddd	$0, %xmm5			// WK
+	movdqa	%xmm5, WK($4)
+	.endm
+
+	// this macro is used in the last 16 rounds of a current block
+	// it reads the next message (16 4-byte words), load it into 4 words W[r:r+3], computes WK[r:r+3]
+	// and save into stack to prepare for next block
+
+	.macro	update_W_WK
+#if defined (__x86_64__)
+	movdqu	$0*16(data), $1		// read 4 4-byte words
+	pshufb	L_aligned_bswap, $1	// big-endian of each 4-byte word, W[r:r+3]
+	movdqu	$0*16(K), %xmm4		// K[r:r+3]
+#else
+	mov		data_addr, t
+	movdqu	$0*16(t), $1		// read 4 4-byte words
+	pshufb	L_aligned_bswap, $1	// big-endian of each 4-byte word, W[r:r+3]
+	mov		K, t
+	movdqu	$0*16(t), %xmm4		// K[r:r+3]
+#endif
+	paddd	$1, %xmm4			// WK[r:r+3]
+	movdqa	%xmm4, WK($0*4)		// save WK[r:r+3] into stack circular buffer
+	.endm
+
+	.text
+
+#if defined (__x86_64__) || defined (__i386__)
+
+	.globl	_SHA256_Transform
+
+_SHA256_Transform:
+
+
+	// detect SSSE3 and dispatch appropriate code branch
+	#if defined __x86_64__
+        movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
+        mov     (%rax), %eax                                    // %eax = __cpu_capabilities
+    #else       // i386
+        #if defined KERNEL
+            leal    __cpu_capabilities, %eax                    // %eax -> __cpu_capabilities
+            mov     (%eax), %eax                                // %eax = __cpu_capabilities
+        #else
+            mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+        #endif
+    #endif
+    test    $(kHasSupplementalSSE3), %eax
+    je      _SHA256_Transform_nossse3                              // branch to no-ssse3 code
+
+	// push callee-saved registers
+#if defined	(__x86_64__)
+	push	%rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+#else
+    push    %ebp
+	push    %ebx
+    push    %esi
+    push    %edi
+#endif
+
+	// allocate stack space
+	sub		$stack_size, sp
+
+	// if kernel code, save used xmm registers
+#if	KERNEL
+	movdqa	%xmm0, 0*16+xmm_save
+	movdqa	%xmm1, 1*16+xmm_save
+	movdqa	%xmm2, 2*16+xmm_save
+	movdqa	%xmm3, 3*16+xmm_save
+	movdqa	%xmm4, 4*16+xmm_save
+	movdqa	%xmm5, 5*16+xmm_save
+	movdqa	%xmm6, 6*16+xmm_save
+	movdqa	%xmm7, 7*16+xmm_save
+#endif
+
+	// set up bswap parameters in the aligned stack space and pointer to table K256[]
+#if defined (__x86_64__)
+	lea		_K256(%rip), K
+	lea		L_bswap(%rip), %rax
+	movdqa	(%rax), %xmm0
+#else
+	lea		_K256, t
+	mov		t, K
+	lea		L_bswap, %eax
+	movdqa	(%eax), %xmm0
+#endif
+	movdqa	%xmm0, L_aligned_bswap	
+
+	// load W[0:15] into xmm0-xmm3
+#if defined (__x86_64__)
+	movdqu	0*16(data), W0
+	movdqu	1*16(data), W1
+	movdqu	2*16(data), W2
+	movdqu	3*16(data), W3
+	add		$64, data
+#else
+	mov		data_addr, t
+	movdqu	0*16(t), W0
+	movdqu	1*16(t), W1
+	movdqu	2*16(t), W2
+	movdqu	3*16(t), W3
+	add		$64, data_addr
+#endif
+	pshufb	L_aligned_bswap, W0
+	pshufb	L_aligned_bswap, W1
+	pshufb	L_aligned_bswap, W2
+	pshufb	L_aligned_bswap, W3
+
+	// compute WK[0:15] and save in stack
+#if defined (__x86_64__)
+	movdqu	0*16(K), %xmm4	
+	movdqu	1*16(K), %xmm5
+	movdqu	2*16(K), %xmm6	
+	movdqu	3*16(K), %xmm7
+#else
+	mov		K, t
+	movdqu	0*16(t), %xmm4	
+	movdqu	1*16(t), %xmm5
+	movdqu	2*16(t), %xmm6	
+	movdqu	3*16(t), %xmm7
+#endif
+	add		$64, K
+	paddd	%xmm0, %xmm4
+	paddd	%xmm1, %xmm5
+	paddd	%xmm2, %xmm6
+	paddd	%xmm3, %xmm7
+	movdqa	%xmm4, WK(0)
+	movdqa	%xmm5, WK(4)
+	movdqa	%xmm6, WK(8)
+	movdqa	%xmm7, WK(12)
+
+L_loop:
+
+	// digests a-h = ctx->states;
+#if defined (__x86_64__)
+	mov		0*4(ctx), a
+	mov		1*4(ctx), b
+	mov		2*4(ctx), c
+	mov		3*4(ctx), d
+	mov		4*4(ctx), e
+	mov		5*4(ctx), f
+	mov		6*4(ctx), g
+	mov		7*4(ctx), h
+#else
+	mov		ctx_addr, t
+	mov 	0*4(t), a
+	mov 	1*4(t), b
+	mov 	2*4(t), s
+	mov		s, c
+	mov 	3*4(t), d
+	mov 	4*4(t), e
+	mov 	5*4(t), s
+	mov		s, f
+	mov 	6*4(t), g
+	mov 	7*4(t), s
+	mov		s, h
+#endif
+
+	// rounds 0:47 interleaved with W/WK update for rounds 16:63
+	rounds	a, b, c, d, e, f, g, h, 0
+	message_schedule W0,W1,W2,W3,16
+	rounds	e, f, g, h, a, b, c, d, 4 
+	message_schedule W1,W2,W3,W0,20
+	rounds	a, b, c, d, e, f, g, h, 8
+	message_schedule W2,W3,W0,W1,24
+	rounds	e, f, g, h, a, b, c, d, 12 
+	message_schedule W3,W0,W1,W2,28
+	rounds	a, b, c, d, e, f, g, h, 16
+	message_schedule W0,W1,W2,W3,32
+	rounds	e, f, g, h, a, b, c, d, 20 
+	message_schedule W1,W2,W3,W0,36
+	rounds	a, b, c, d, e, f, g, h, 24
+	message_schedule W2,W3,W0,W1,40
+	rounds	e, f, g, h, a, b, c, d, 28 
+	message_schedule W3,W0,W1,W2,44
+	rounds	a, b, c, d, e, f, g, h, 32
+	message_schedule W0,W1,W2,W3,48
+	rounds	e, f, g, h, a, b, c, d, 36 
+	message_schedule W1,W2,W3,W0,52
+	rounds	a, b, c, d, e, f, g, h, 40
+	message_schedule W2,W3,W0,W1,56
+	rounds	e, f, g, h, a, b, c, d, 44 
+	message_schedule W3,W0,W1,W2,60
+
+	// revert K to the beginning of K256[]
+#if defined __x86_64__
+	sub		$256, K
+#else
+	subl	$256, K
+#endif
+
+	sub		$1, num_blocks				// num_blocks--
+	je		L_final_block				// if final block, wrap up final rounds
+
+	// rounds 48:63 interleaved with W/WK initialization for next block rounds 0:15 
+	rounds	a, b, c, d, e, f, g, h, 48
+	update_W_WK	0, W0
+	rounds	e, f, g, h, a, b, c, d, 52 
+	update_W_WK	1, W1
+	rounds	a, b, c, d, e, f, g, h, 56
+	update_W_WK	2, W2
+	rounds	e, f, g, h, a, b, c, d, 60 
+	update_W_WK	3, W3
+
+	add		$64, K
+#if defined (__x86_64__)
+	add		$64, data
+#else
+	add		$64, data_addr
+#endif
+
+	// ctx->states += digests a-h
+#if	defined (__x86_64__)
+	add		a, 0*4(ctx)
+	add		b, 1*4(ctx)
+	add		c, 2*4(ctx)
+	add		d, 3*4(ctx)
+	add		e, 4*4(ctx)
+	add		f, 5*4(ctx)
+	add		g, 6*4(ctx)
+	add		h, 7*4(ctx)
+#else
+	mov		ctx_addr, t
+	add		a, 0*4(t)
+	add		b, 1*4(t)
+	mov		c, s
+	add		s, 2*4(t)
+	add		d, 3*4(t)
+	add		e, 4*4(t)
+	mov		f, s
+	add		s, 5*4(t)
+	add		g, 6*4(t)
+	mov		h, s
+	add		s, 7*4(t)
+#endif
+
+	jmp		L_loop				// branch for next block
+
+	// wrap up digest update round 48:63 for final block
+L_final_block:
+	rounds	a, b, c, d, e, f, g, h, 48
+	rounds	e, f, g, h, a, b, c, d, 52 
+	rounds	a, b, c, d, e, f, g, h, 56
+	rounds	e, f, g, h, a, b, c, d, 60 
+
+	// ctx->states += digests a-h
+#if	defined (__x86_64__)
+	add		a, 0*4(ctx)
+	add		b, 1*4(ctx)
+	add		c, 2*4(ctx)
+	add		d, 3*4(ctx)
+	add		e, 4*4(ctx)
+	add		f, 5*4(ctx)
+	add		g, 6*4(ctx)
+	add		h, 7*4(ctx)
+#else
+	mov		ctx_addr, t
+	add		a, 0*4(t)
+	add		b, 1*4(t)
+	mov		c, s
+	add		s, 2*4(t)
+	add		d, 3*4(t)
+	add		e, 4*4(t)
+	mov		f, s
+	add		s, 5*4(t)
+	add		g, 6*4(t)
+	mov		h, s
+	add		s, 7*4(t)
+#endif
+
+	// if kernel, restore xmm0-xmm7
+#if	KERNEL
+	movdqa	0*16+xmm_save, %xmm0
+	movdqa	1*16+xmm_save, %xmm1
+	movdqa	2*16+xmm_save, %xmm2
+	movdqa	3*16+xmm_save, %xmm3
+	movdqa	4*16+xmm_save, %xmm4
+	movdqa	5*16+xmm_save, %xmm5
+	movdqa	6*16+xmm_save, %xmm6
+	movdqa	7*16+xmm_save, %xmm7
+#endif
+
+	// free allocated stack memory
+	add		$stack_size, sp
+
+	// restore callee-saved registers
+#if defined (__x86_64__)
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+	pop		%rbp
+#else
+    pop		%edi
+    pop		%esi
+	pop		%ebx
+    pop		%ebp
+#endif
+
+	// return
+	ret
+
+
+	.const
+	.align	4, 0x90
+
+L_bswap:
+    .long   0x00010203
+    .long   0x04050607
+    .long   0x08090a0b
+    .long   0x0c0d0e0f
+
+#endif		// x86_64/i386
+
diff --git a/bsd/crypto/sha2/intel/sha256nossse3.s b/bsd/crypto/sha2/intel/sha256nossse3.s
new file mode 100644
index 000000000..b4dd0a035
--- /dev/null
+++ b/bsd/crypto/sha2/intel/sha256nossse3.s
@@ -0,0 +1,649 @@
+/*
+	This file provides x86_64/i386 hand implementation of the following function
+
+	void SHA256_Transform(SHA256_ctx *ctx, char *data, unsigned int num_blocks);
+
+	which is a C function in sha2.c (from xnu).
+
+	The code SHA256_Transform_nossse3 is a clone of SHA256_Transform
+	with all ssse3 instructions replaced with sse3 or below instructions.
+
+	For performance reason, this function should not be called directly. This file should be working
+	together with the one that implements SHA256_Transform. There, cpu_capabilities is probed to detect
+	ssse3. If ssse3 is not supported, the execution will be branched to this no-ssse3-specific function.
+
+	sha256 algorithm per block description:
+
+		1. W(0:15) = big-endian (per 4 bytes) loading of input data (64 byte) 
+		2. load 8 digests a-h from ctx->state
+		3. for r = 0:15
+				T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
+				d += T1;
+				h = T1 + Sigma0(a) + Maj(a,b,c)
+				permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
+		4. for r = 16:63
+				W[r] = W[r-16] + sigma1(W[r-2]) + W[r-7] + sigma0(W[r-15]);
+				T1 = h + Sigma1(e) + Ch(e,f,g) + K[r] + W[r];
+				d += T1;
+				h = T1 + Sigma0(a) + Maj(a,b,c)
+				permute a,b,c,d,e,f,g,h into h,a,b,c,d,e,f,g
+				
+	In the assembly implementation:	
+		- a circular window of message schedule W(r:r+15) is updated and stored in xmm0-xmm3
+		- its corresponding W+K(r:r+15) is updated and stored in a stack space circular buffer
+		- the 8 digests (a-h) will be stored in GPR or m32 (all in GPR for x86_64, and some in m32 for i386)
+
+	the implementation per block looks like
+
+	----------------------------------------------------------------------------
+
+	load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
+	pre_calculate and store W+K(0:15) in stack
+
+	load digests a-h from ctx->state;
+
+	for (r=0;r<48;r+=4) {
+		digests a-h update and permute round r:r+3
+		update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
+	}
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+	}
+
+	ctx->states += digests a-h;
+
+	----------------------------------------------------------------------------
+
+	our implementation (allows multiple blocks per call) pipelines the loading of W/WK of a future block 
+	into the last 16 rounds of its previous block:
+
+	----------------------------------------------------------------------------
+
+	load W(0:15) (big-endian per 4 bytes) into xmm0:xmm3 
+	pre_calculate and store W+K(0:15) in stack
+
+L_loop:
+
+	load digests a-h from ctx->state;
+
+	for (r=0;r<48;r+=4) {
+		digests a-h update and permute round r:r+3
+		update W([r:r+3]%16) and WK([r:r+3]%16) for the next 4th iteration 
+	}
+
+	num_block--;
+	if (num_block==0)	jmp L_last_block;
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+		load W([r:r+3]%16) (big-endian per 4 bytes) into xmm0:xmm3 
+		pre_calculate and store W+K([r:r+3]%16) in stack
+	}
+
+	ctx->states += digests a-h;
+
+	jmp	L_loop;
+
+L_last_block:
+
+	for (r=48;r<64;r+=4) {
+		digests a-h update and permute round r:r+3
+	}
+
+	ctx->states += digests a-h;
+
+	------------------------------------------------------------------------
+
+	Apple CoreOS vector & numerics
+	cclee 8-3-10
+*/
+
+#if defined	KERNEL
+#include <i386/cpu_capabilities.h>
+#else
+#include <System/i386/cpu_capabilities.h>
+#endif
+
+	// associate variables with registers or memory
+
+#if defined	(__x86_64__)
+	#define	sp			%rsp
+	#define	ctx			%rdi
+	#define data		%rsi
+	#define	num_blocks	%rdx
+
+	#define	a			%r8d
+	#define	b			%r9d
+	#define	c			%r10d
+	#define	d			%r11d
+	#define	e			%r12d
+	#define	f			%r13d
+	#define	g			%r14d
+	#define	h			%r15d
+
+	#define	K			%rbx
+	#define stack_size	(8+16*8+16+64)	// 8 (align) + xmm0:xmm7 + L_aligned_bswap + WK(0:15)
+
+	#define	xmm_save	80(sp)			// starting address for xmm save/restore
+#else
+	#define	sp 	%esp
+	#define stack_size	(12+16*8+16+16+64)	// 12 (align) + xmm0:xmm7 + 16 (c,f,h,K) + L_aligned_bswap + WK(0:15)
+	#define	ctx_addr	20+stack_size(sp)	// ret_addr + 4 registers = 20, 1st caller argument
+	#define	data_addr	24+stack_size(sp)	// 2nd caller argument
+	#define	num_blocks	28+stack_size(sp)	// 3rd caller argument
+
+	#define	a	%ebx
+	#define	b	%edx
+	#define	c	64(sp)
+	#define	d	%ebp
+	#define	e	%esi
+	#define	f	68(sp)
+	#define	g	%edi
+	#define	h	72(sp)
+
+	#define	K	76(sp)					// pointer to K256[] table
+	#define	xmm_save	96(sp)			// starting address for xmm save/restore
+#endif
+
+	// 2 local variables
+	#define	t	%eax
+	#define	s	%ecx
+
+	// a window (16 words) of message scheule
+	#define	W0	%xmm0
+	#define	W1	%xmm1
+	#define	W2	%xmm2
+	#define	W3	%xmm3
+
+	// circular buffer for WK[(r:r+15)%16]
+	#define WK(x)   (x&15)*4(sp)
+
+// #define Ch(x,y,z)   (((x) & (y)) ^ ((~(x)) & (z)))
+
+	.macro Ch
+	mov		$0, t		// x
+	mov		$0, s		// x
+	not		t			// ~x
+	and		$1, s		// x & y
+	and		$2, t		// ~x & z
+	xor		s, t		// t = ((x) & (y)) ^ ((~(x)) & (z));
+	.endm
+
+// #define Maj(x,y,z)  (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+	.macro	Maj
+	mov		$0, t		// x
+	mov		$1, s		// y
+	and		s, t		// x&y
+	and		$2, s		// y&z
+	xor		s, t		// (x&y) ^ (y&z)
+	mov		$2, s		// z
+	and		$0, s		// (x&z)
+	xor		s, t		// t = (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 
+	.endm
+
+/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
+// #define R(b,x)      ((x) >> (b))
+/* 32-bit Rotate-right (used in SHA-256): */
+// #define S32(b,x)    (((x) >> (b)) | ((x) << (32 - (b))))
+
+// #define sigma0_256(x)   (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
+
+	// performs sigma0_256 on 4 words on an xmm registers
+	// use xmm6/xmm7 as intermediate registers
+	.macro	sigma0
+	movdqa	$0, %xmm6
+	movdqa	$0, %xmm7
+	psrld	$$3, $0			// SHR3(x)
+	psrld	$$7, %xmm6		// part of ROTR7
+	pslld	$$14, %xmm7		// part of ROTR18
+	pxor	%xmm6, $0
+	pxor	%xmm7, $0
+	psrld	$$11, %xmm6		// part of ROTR18
+	pslld	$$11, %xmm7		// part of ROTR7
+	pxor	%xmm6, $0
+	pxor	%xmm7, $0
+	.endm
+
+// #define sigma1_256(x)   (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))
+
+	// performs sigma1_256 on 4 words on an xmm registers
+	// use xmm6/xmm7 as intermediate registers
+	.macro	sigma1
+	movdqa	$0, %xmm6
+	movdqa	$0, %xmm7
+	psrld	$$10, $0		// SHR10(x)
+	psrld	$$17, %xmm6		// part of ROTR17
+	pxor	%xmm6, $0
+	pslld	$$13, %xmm7		// part of ROTR19
+	pxor	%xmm7, $0
+	psrld	$$2, %xmm6		// part of ROTR19
+	pxor	%xmm6, $0
+	pslld	$$2, %xmm7		// part of ROTR17
+	pxor	%xmm7, $0
+	.endm
+
+// #define Sigma0_256(x)   (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+
+	.macro	Sigma0
+	mov		$0, t			// x
+	mov		$0, s			// x
+	ror		$$2, t			// S32(2,  (x))
+	ror		$$13, s			// S32(13,  (x))
+	xor		s, t			// S32(2,  (x)) ^ S32(13, (x))
+	ror		$$9, s			// S32(22,  (x))
+	xor		s, t			// t = (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
+	.endm
+
+// #define Sigma1_256(x)   (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+
+	.macro	Sigma1
+	mov		$0, s			// x
+	ror		$$6, s			// S32(6,  (x))
+	mov		s, t			// S32(6,  (x))
+	ror		$$5, s			// S32(11, (x))
+	xor		s, t			// S32(6,  (x)) ^ S32(11, (x))
+	ror		$$14, s			// S32(25, (x))
+	xor		s, t			// t = (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
+	.endm
+
+	// per round digests update
+	.macro	round
+	Sigma1	$4				// t = T1
+	add		t, $7			// use h to store h+Sigma1(e)
+	Ch		$4, $5, $6		// t = Ch (e, f, g);
+	add		$7, t			// t = h+Sigma1(e)+Ch(e,f,g);
+	add		WK($8), t		// h = T1
+	add		t, $3			// d += T1;
+	mov		t, $7			// h = T1
+	Sigma0	$0				// t = Sigma0(a);
+	add		t, $7			// h = T1 + Sigma0(a);
+	Maj		$0, $1, $2		// t = Maj(a,b,c)
+	add		t, $7			// h = T1 + Sigma0(a) + Maj(a,b,c);			
+	.endm
+
+	// per 4 rounds digests update and permutation
+	// permutation is absorbed by rotating the roles of digests a-h
+	.macro	rounds
+	round	$0, $1, $2, $3, $4, $5, $6, $7, 0+$8
+	round	$7, $0, $1, $2, $3, $4, $5, $6, 1+$8
+	round	$6, $7, $0, $1, $2, $3, $4, $5, 2+$8
+	round	$5, $6, $7, $0, $1, $2, $3, $4, 3+$8
+	.endm
+
+	// update the message schedule W and W+K (4 rounds) 16 rounds ahead in the future 
+	.macro	message_schedule
+
+	// 4 32-bit K256 words in xmm5
+#if defined	(__x86_64__)
+	movdqu	(K), %xmm5
+#else
+	mov		K, t
+	movdqu	(t), %xmm5 
+#endif	
+	add		$$16, K				// K points to next K256 word for next iteration
+	movdqa	$1, %xmm4 			// W7:W4
+#if 0
+	palignr	$$4, $0, %xmm4		// W4:W1
+#else	// no-ssse3 implementation of palignr
+	movdqa  $0, %xmm7
+    pslldq  $$12, %xmm4
+    psrldq  $$4, %xmm7
+    por     %xmm7, %xmm4
+#endif
+	sigma0	%xmm4				// sigma0(W4:W1)
+	movdqa	$3, %xmm6 			// W15:W12
+	paddd	%xmm4, $0			// $0 = W3:W0 + sigma0(W4:W1) 
+#if 0
+	palignr	$$4, $2, %xmm6		// W12:W9
+#else	// no-ssse3 implementation of palignr
+	movdqa  $2, %xmm7
+    pslldq  $$12, %xmm6
+    psrldq  $$4, %xmm7
+    por     %xmm7, %xmm6
+#endif
+	paddd	%xmm6, $0			// $0 = W12:W9 + sigma0(W4:W1) + W3:W0	
+	movdqa	$3, %xmm4			// W15:W12
+	psrldq	$$8, %xmm4			// 0,0,W15,W14	
+	sigma1	%xmm4				// sigma1(0,0,W15,W14)
+	paddd	%xmm4, $0			// sigma1(0,0,W15,W14) + W12:W9 + sigma0(W4:W1) + W3:W0
+	movdqa	$0, %xmm4			// W19-sigma1(W17), W18-sigma1(W16), W17, W16
+	pslldq	$$8, %xmm4			// W17, W16, 0, 0
+	sigma1	%xmm4				// sigma1(W17,W16,0,0)
+	paddd	%xmm4, $0			// W19:W16
+	paddd	$0, %xmm5			// WK
+	movdqa	%xmm5, WK($4)
+	.endm
+
+	// this macro is used in the last 16 rounds of a current block
+	// it reads the next message (16 4-byte words), load it into 4 words W[r:r+3], computes WK[r:r+3]
+	// and save into stack to prepare for next block
+
+	.macro	update_W_WK
+#if defined (__x86_64__)
+#if 0
+	movdqu	$0*16(data), $1		// read 4 4-byte words
+	pshufb	L_aligned_bswap, $1	// big-endian of each 4-byte word, W[r:r+3]
+#else	// no-ssse3 implementation
+	mov     0+$0*16(data), s
+    bswap   s
+    mov     s, 0+WK($0*4)
+    mov     4+$0*16(data), s
+    bswap   s
+    mov     s, 4+WK($0*4)
+    mov     8+$0*16(data), s
+    bswap   s
+    mov     s, 8+WK($0*4)
+    mov     12+$0*16(data), s
+    bswap   s
+    mov     s, 12+WK($0*4)
+    movdqa  WK($0*4), $1
+#endif
+	movdqu	$0*16(K), %xmm4		// K[r:r+3]
+#else
+	mov		data_addr, t
+#if 0
+	movdqu	$0*16(t), $1		// read 4 4-byte words
+	pshufb	L_aligned_bswap, $1	// big-endian of each 4-byte word, W[r:r+3]
+#else	// no-ssse3 implementation
+	mov     0+$0*16(t), s
+    bswap   s
+    mov     s, 0+WK($0*4)
+    mov     4+$0*16(t), s
+    bswap   s
+    mov     s, 4+WK($0*4)
+    mov     8+$0*16(t), s
+    bswap   s
+    mov     s, 8+WK($0*4)
+    mov     12+$0*16(t), s
+    bswap   s
+    mov     s, 12+WK($0*4)
+    movdqa  WK($0*4), $1
+#endif
+	mov		K, t
+	movdqu	$0*16(t), %xmm4		// K[r:r+3]
+#endif
+	paddd	$1, %xmm4			// WK[r:r+3]
+	movdqa	%xmm4, WK($0*4)		// save WK[r:r+3] into stack circular buffer
+	.endm
+
+	.text
+
+#if defined (__x86_64__) || defined (__i386__)
+
+	.globl	_SHA256_Transform_nossse3
+
+_SHA256_Transform_nossse3:
+
+	// push callee-saved registers
+#if defined	(__x86_64__)
+	push	%rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+#else
+    push    %ebp
+	push    %ebx
+    push    %esi
+    push    %edi
+#endif
+
+	// allocate stack space
+	sub		$stack_size, sp
+
+	// if kernel code, save used xmm registers
+#if	KERNEL
+	movdqa	%xmm0, 0*16+xmm_save
+	movdqa	%xmm1, 1*16+xmm_save
+	movdqa	%xmm2, 2*16+xmm_save
+	movdqa	%xmm3, 3*16+xmm_save
+	movdqa	%xmm4, 4*16+xmm_save
+	movdqa	%xmm5, 5*16+xmm_save
+	movdqa	%xmm6, 6*16+xmm_save
+	movdqa	%xmm7, 7*16+xmm_save
+#endif
+
+	// set up pointer to table K256[]
+#if defined (__x86_64__)
+	lea		_K256(%rip), K
+#else
+	lea		_K256, t
+	mov		t, K
+#endif
+
+	// load W[0:15] into xmm0-xmm3
+    .macro  mybswap
+    movl    0+$0*16($1), a
+    movl    4+$0*16($1), b
+    movl    8+$0*16($1), e
+    movl    12+$0*16($1), d
+    bswap   a
+    bswap   b
+    bswap   e
+    bswap   d
+    movl    a, $0*16(sp)
+    movl    b, 4+$0*16(sp)
+    movl    e, 8+$0*16(sp)
+    movl    d, 12+$0*16(sp)
+    .endm
+
+#if defined (__x86_64__)
+    mybswap 0, data
+    mybswap 1, data
+    mybswap 2, data
+    mybswap 3, data
+    add     $64, data
+#else
+    mov     data_addr, t
+    mybswap 0, t
+    mybswap 1, t
+    mybswap 2, t
+    mybswap 3, t
+    add     $64, data_addr
+#endif
+    movdqa  0*16(sp), W0
+    movdqa  1*16(sp), W1
+    movdqa  2*16(sp), W2
+    movdqa  3*16(sp), W3
+
+	// compute WK[0:15] and save in stack
+#if defined (__x86_64__)
+	movdqu	0*16(K), %xmm4	
+	movdqu	1*16(K), %xmm5
+	movdqu	2*16(K), %xmm6	
+	movdqu	3*16(K), %xmm7
+#else
+	mov		K, t
+	movdqu	0*16(t), %xmm4	
+	movdqu	1*16(t), %xmm5
+	movdqu	2*16(t), %xmm6	
+	movdqu	3*16(t), %xmm7
+#endif
+	add		$64, K
+	paddd	%xmm0, %xmm4
+	paddd	%xmm1, %xmm5
+	paddd	%xmm2, %xmm6
+	paddd	%xmm3, %xmm7
+	movdqa	%xmm4, WK(0)
+	movdqa	%xmm5, WK(4)
+	movdqa	%xmm6, WK(8)
+	movdqa	%xmm7, WK(12)
+
+L_loop:
+
+	// digests a-h = ctx->states;
+#if defined (__x86_64__)
+	mov		0*4(ctx), a
+	mov		1*4(ctx), b
+	mov		2*4(ctx), c
+	mov		3*4(ctx), d
+	mov		4*4(ctx), e
+	mov		5*4(ctx), f
+	mov		6*4(ctx), g
+	mov		7*4(ctx), h
+#else
+	mov		ctx_addr, t
+	mov 	0*4(t), a
+	mov 	1*4(t), b
+	mov 	2*4(t), s
+	mov		s, c
+	mov 	3*4(t), d
+	mov 	4*4(t), e
+	mov 	5*4(t), s
+	mov		s, f
+	mov 	6*4(t), g
+	mov 	7*4(t), s
+	mov		s, h
+#endif
+
+	// rounds 0:47 interleaved with W/WK update for rounds 16:63
+	rounds	a, b, c, d, e, f, g, h, 0
+	message_schedule W0,W1,W2,W3,16
+	rounds	e, f, g, h, a, b, c, d, 4 
+	message_schedule W1,W2,W3,W0,20
+	rounds	a, b, c, d, e, f, g, h, 8
+	message_schedule W2,W3,W0,W1,24
+	rounds	e, f, g, h, a, b, c, d, 12 
+	message_schedule W3,W0,W1,W2,28
+	rounds	a, b, c, d, e, f, g, h, 16
+	message_schedule W0,W1,W2,W3,32
+	rounds	e, f, g, h, a, b, c, d, 20 
+	message_schedule W1,W2,W3,W0,36
+	rounds	a, b, c, d, e, f, g, h, 24
+	message_schedule W2,W3,W0,W1,40
+	rounds	e, f, g, h, a, b, c, d, 28 
+	message_schedule W3,W0,W1,W2,44
+	rounds	a, b, c, d, e, f, g, h, 32
+	message_schedule W0,W1,W2,W3,48
+	rounds	e, f, g, h, a, b, c, d, 36 
+	message_schedule W1,W2,W3,W0,52
+	rounds	a, b, c, d, e, f, g, h, 40
+	message_schedule W2,W3,W0,W1,56
+	rounds	e, f, g, h, a, b, c, d, 44 
+	message_schedule W3,W0,W1,W2,60
+
+	// revert K to the beginning of K256[]
+#if defined __x86_64__
+	sub		$256, K
+#else
+	subl	$256, K
+#endif
+
+	sub		$1, num_blocks				// num_blocks--
+	je		L_final_block				// if final block, wrap up final rounds
+
+	// rounds 48:63 interleaved with W/WK initialization for next block rounds 0:15 
+	rounds	a, b, c, d, e, f, g, h, 48
+	update_W_WK	0, W0
+	rounds	e, f, g, h, a, b, c, d, 52 
+	update_W_WK	1, W1
+	rounds	a, b, c, d, e, f, g, h, 56
+	update_W_WK	2, W2
+	rounds	e, f, g, h, a, b, c, d, 60 
+	update_W_WK	3, W3
+
+	add		$64, K
+#if defined (__x86_64__)
+	add		$64, data
+#else
+	add		$64, data_addr
+#endif
+
+	// ctx->states += digests a-h
+#if	defined (__x86_64__)
+	add		a, 0*4(ctx)
+	add		b, 1*4(ctx)
+	add		c, 2*4(ctx)
+	add		d, 3*4(ctx)
+	add		e, 4*4(ctx)
+	add		f, 5*4(ctx)
+	add		g, 6*4(ctx)
+	add		h, 7*4(ctx)
+#else
+	mov		ctx_addr, t
+	add		a, 0*4(t)
+	add		b, 1*4(t)
+	mov		c, s
+	add		s, 2*4(t)
+	add		d, 3*4(t)
+	add		e, 4*4(t)
+	mov		f, s
+	add		s, 5*4(t)
+	add		g, 6*4(t)
+	mov		h, s
+	add		s, 7*4(t)
+#endif
+
+	jmp		L_loop				// branch for next block
+
+	// wrap up digest update round 48:63 for final block
+L_final_block:
+	rounds	a, b, c, d, e, f, g, h, 48
+	rounds	e, f, g, h, a, b, c, d, 52 
+	rounds	a, b, c, d, e, f, g, h, 56
+	rounds	e, f, g, h, a, b, c, d, 60 
+
+	// ctx->states += digests a-h
+#if	defined (__x86_64__)
+	add		a, 0*4(ctx)
+	add		b, 1*4(ctx)
+	add		c, 2*4(ctx)
+	add		d, 3*4(ctx)
+	add		e, 4*4(ctx)
+	add		f, 5*4(ctx)
+	add		g, 6*4(ctx)
+	add		h, 7*4(ctx)
+#else
+	mov		ctx_addr, t
+	add		a, 0*4(t)
+	add		b, 1*4(t)
+	mov		c, s
+	add		s, 2*4(t)
+	add		d, 3*4(t)
+	add		e, 4*4(t)
+	mov		f, s
+	add		s, 5*4(t)
+	add		g, 6*4(t)
+	mov		h, s
+	add		s, 7*4(t)
+#endif
+
+	// if kernel, restore xmm0-xmm7
+#if	KERNEL
+	movdqa	0*16+xmm_save, %xmm0
+	movdqa	1*16+xmm_save, %xmm1
+	movdqa	2*16+xmm_save, %xmm2
+	movdqa	3*16+xmm_save, %xmm3
+	movdqa	4*16+xmm_save, %xmm4
+	movdqa	5*16+xmm_save, %xmm5
+	movdqa	6*16+xmm_save, %xmm6
+	movdqa	7*16+xmm_save, %xmm7
+#endif
+
+	// free allocated stack memory
+	add		$stack_size, sp
+
+	// restore callee-saved registers
+#if defined (__x86_64__)
+	pop		%r15
+	pop		%r14
+	pop		%r13
+	pop		%r12
+	pop		%rbx
+	pop		%rbp
+#else
+    pop		%edi
+    pop		%esi
+	pop		%ebx
+    pop		%ebp
+#endif
+
+	// return
+	ret
+
+
+#endif		// x86_64/i386
+
diff --git a/bsd/crypto/sha2/sha2.c b/bsd/crypto/sha2/sha2.c
index c306068dc..603d32834 100644
--- a/bsd/crypto/sha2/sha2.c
+++ b/bsd/crypto/sha2/sha2.c
@@ -63,7 +63,7 @@
  *
  */
 
-#ifndef assert(x)
+#ifndef assert
 #define assert(x) do {} while(0)
 #endif
 
@@ -202,13 +202,21 @@ typedef u_int64_t sha2_word64;	/* Exactly 8 bytes */
  * only.
  */
 void SHA512_Last(SHA512_CTX*);
+#if defined	(SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+void SHA256_Transform(SHA256_CTX*, const sha2_word32*, unsigned int num_blocks);
+#else
 void SHA256_Transform(SHA256_CTX*, const sha2_word32*);
+#endif
 void SHA512_Transform(SHA512_CTX*, const sha2_word64*);
 
 
 /*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
 /* Hash constant words K for SHA-256: */
+#if defined	(SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+const sha2_word32 K256[64] = {		// assembly code will need to read this table
+#else
 static const sha2_word32 K256[64] = {
+#endif
 	0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
 	0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
 	0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
@@ -324,6 +332,8 @@ void SHA256_Init(SHA256_CTX* context) {
 	context->bitcount = 0;
 }
 
+#if !(defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__)))
+
 #ifdef SHA2_UNROLL_TRANSFORM
 
 /* Unrolled SHA-256 round macros: */
@@ -499,6 +509,8 @@ void SHA256_Transform(SHA256_CTX* context, const sha2_word32* data) {
 
 #endif /* SHA2_UNROLL_TRANSFORM */
 
+#endif	// defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+
 void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
 	unsigned int	freespace, usedspace;
 
@@ -521,7 +533,11 @@ void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
 			context->bitcount += freespace << 3;
 			len -= freespace;
 			data += freespace;
+#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+			SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
+#else
 			SHA256_Transform(context, (sha2_word32*)context->buffer);
+#endif
 		} else {
 			/* The buffer is not yet full */
 			bcopy(data, &context->buffer[usedspace], len);
@@ -531,6 +547,17 @@ void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
 			return;
 		}
 	}
+#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+	{
+		unsigned int	kk = len/SHA256_BLOCK_LENGTH;
+		if (kk>0) {
+			SHA256_Transform(context, (const sha2_word32*)data, kk);
+			context->bitcount += (SHA256_BLOCK_LENGTH << 3)*kk;
+			len -= SHA256_BLOCK_LENGTH*kk;
+			data += SHA256_BLOCK_LENGTH*kk;
+		}
+	}	
+#else
 	while (len >= SHA256_BLOCK_LENGTH) {
 		/* Process as many complete blocks as we can */
 		SHA256_Transform(context, (const sha2_word32*)data);
@@ -538,6 +565,7 @@ void SHA256_Update(SHA256_CTX* context, const sha2_byte *data, size_t len) {
 		len -= SHA256_BLOCK_LENGTH;
 		data += SHA256_BLOCK_LENGTH;
 	}
+#endif
 	if (len > 0) {
 		/* There's left-overs, so save 'em */
 		bcopy(data, context->buffer, len);
@@ -573,7 +601,11 @@ void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
 					bzero(&context->buffer[usedspace], SHA256_BLOCK_LENGTH - usedspace);
 				}
 				/* Do second-to-last transform: */
+#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+				SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
+#else
 				SHA256_Transform(context, (sha2_word32*)context->buffer);
+#endif
 
 				/* And set-up for the last transform: */
 				bzero(context->buffer, SHA256_SHORT_BLOCK_LENGTH);
@@ -589,7 +621,11 @@ void SHA256_Final(sha2_byte digest[], SHA256_CTX* context) {
 		*(sha2_word64*)&context->buffer[SHA256_SHORT_BLOCK_LENGTH] = context->bitcount;
 
 		/* Final transform: */
+#if defined (SHA256_USE_ASSEMBLY) && (defined(__x86_64__)||defined(__i386__))
+		SHA256_Transform(context, (sha2_word32*)context->buffer, 1);
+#else
 		SHA256_Transform(context, (sha2_word32*)context->buffer);
+#endif
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 		{
diff --git a/bsd/dev/Makefile b/bsd/dev/Makefile
index b2f00140a..01f00592f 100644
--- a/bsd/dev/Makefile
+++ b/bsd/dev/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/dev/chud/chud_bsd_callback.c b/bsd/dev/chud/chud_bsd_callback.c
index 6fad80050..a28bebf46 100644
--- a/bsd/dev/chud/chud_bsd_callback.c
+++ b/bsd/dev/chud/chud_bsd_callback.c
@@ -36,15 +36,10 @@
 #include <sys/systm.h> /* struct sysent */
 #include <sys/sysproto.h>
 #include <sys/kdebug.h>	/* KDEBUG_ENABLE_CHUD */
+#include <sys/kauth.h> /* kauth_cred_get */
 #include <libkern/OSAtomic.h>
-
-#ifdef __ppc__
-#include <ppc/savearea.h>
-
-#define FM_ARG0				0x38ULL	// offset from r1 to first argument
-#define SPILLED_WORD_COUNT	7		// number of 32-bit words spilled to the stack
-
-extern struct savearea * find_user_regs( thread_t act);
+#if CONFIG_MACF
+#include <security/mac_framework.h> /* mac_system_check_chud */
 #endif
 
 #pragma mark **** kern debug ****
@@ -87,8 +82,6 @@ chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func)
 		(void * volatile *)&kdebug_callback_fn)) {
 		
 		kdbg_control_chud(TRUE, (void *)chudxnu_private_kdebug_callback);
-		OSBitOrAtomic((UInt32)KDEBUG_ENABLE_CHUD, (volatile UInt32 *)&kdebug_enable);
-		
 		return KERN_SUCCESS;
 	}
 	return KERN_FAILURE;
@@ -97,7 +90,6 @@ chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func)
 __private_extern__ kern_return_t
 chudxnu_kdebug_callback_cancel(void)
 {
-	OSBitAndAtomic((UInt32)~(KDEBUG_ENABLE_CHUD), (volatile UInt32 *)&kdebug_enable);
 	kdbg_control_chud(FALSE, NULL);
 
 	chudxnu_kdebug_callback_func_t old = kdebug_callback_fn;
@@ -175,40 +167,18 @@ static kern_return_t chud_null_syscall(uint64_t code __unused,
 int
 chud(__unused proc_t p, struct chud_args *uap, int32_t *retval)
 {
+#if CONFIG_MACF
+	int error = mac_system_check_chud(kauth_cred_get());
+	if (error)
+		return error;
+#endif
+
     chudxnu_syscall_callback_func_t fn = syscall_callback_fn;
     
 	if(!fn) {
 		return EINVAL;
 	}
 
-#ifdef __ppc__
-	// ppc32 user land spills 2.5 64-bit args (5 x 32-bit) to the stack
-	// here we have to copy them out.  r1 is the stack pointer in this world.
-	// the offset is calculated according to the PPC32 ABI
-	// Important: this only happens for 32-bit user threads
-
-	if(!IS_64BIT_PROCESS(p)) {
-		struct savearea *regs = find_user_regs(current_thread());
-		if(!regs) {
-			return EINVAL;
-		}
-
-		// %r1 is the stack pointer on ppc32
-		uint32_t stackPointer = regs->save_r1;
-
-		// calculate number of bytes spilled to the stack
-		uint32_t spilledSize = sizeof(struct chud_args) - (sizeof(uint32_t) * SPILLED_WORD_COUNT);
-
-		// obtain offset to arguments spilled onto user-thread stack
-		user_addr_t incomingAddr = (user_addr_t)stackPointer + FM_ARG0;
-
-		// destination is halfway through arg3
-		uint8_t *dstAddr = (uint8_t*)(&(uap->arg3)) + sizeof(uint32_t);
-		
-		copyin(incomingAddr, dstAddr, spilledSize);
-	}
-#endif
-	
 	*retval = fn(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, uap->arg5);
 		
 	return 0;
diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c
index 081f70dc3..745a0fa01 100644
--- a/bsd/dev/dtrace/dtrace.c
+++ b/bsd/dev/dtrace/dtrace.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -100,6 +100,7 @@
 #include <sys/systm.h>
 #include <sys/dtrace_impl.h>
 #include <sys/param.h>
+#include <sys/proc_internal.h>
 #include <sys/ioctl.h>
 #include <sys/fcntl.h>
 #include <miscfs/devfs/devfs.h>
@@ -112,13 +113,17 @@
 #include <sys/user.h>
 #include <mach/exception_types.h>
 #include <sys/signalvar.h>
+#include <mach/task.h>
 #include <kern/zalloc.h>
 #include <kern/ast.h>
 #include <netinet/in.h>
 
 #if defined(__APPLE__)
+#include <kern/cpu_data.h>
 extern uint32_t pmap_find_phys(void *, uint64_t);
 extern boolean_t pmap_valid_page(uint32_t);
+extern void OSKextRegisterKextsWithDTrace(void);
+extern kmod_info_t g_kernel_kmod_info;
 #endif /* __APPLE__ */
 
 
@@ -140,6 +145,7 @@ extern void dtrace_postinit(void);
 
 extern kern_return_t chudxnu_dtrace_callback
 	(uint64_t selector, uint64_t *args, uint32_t count);
+
 #endif /* __APPLE__ */
 
 /*
@@ -170,7 +176,7 @@ size_t		dtrace_global_maxsize = (16 * 1024);
 size_t		dtrace_actions_max = (16 * 1024);
 size_t		dtrace_retain_max = 1024;
 dtrace_optval_t	dtrace_helper_actions_max = 32;
-dtrace_optval_t	dtrace_helper_providers_max = 32;
+dtrace_optval_t	dtrace_helper_providers_max = 64;
 dtrace_optval_t	dtrace_dstate_defsize = (1 * 1024 * 1024);
 size_t		dtrace_strsize_default = 256;
 dtrace_optval_t	dtrace_cleanrate_default = 9900990;		/* 101 hz */
@@ -238,6 +244,12 @@ static dtrace_genid_t   dtrace_retained_gen;    /* current retained enab gen */
 static dtrace_dynvar_t	dtrace_dynhash_sink;	/* end of dynamic hash chains */
 #if defined(__APPLE__)
 static int		dtrace_dof_mode;	/* See dtrace_impl.h for a description of Darwin's dof modes. */
+
+			/*
+			 * This does't quite fit as an internal variable, as it must be accessed in
+			 * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
+			 */
+int			dtrace_kernel_symbol_mode;	/* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
 #endif
 
 #if defined(__APPLE__)
@@ -249,6 +261,8 @@ static int		dtrace_dof_mode;	/* See dtrace_impl.h for a description of Darwin's
  */
 
 struct zone *dtrace_probe_t_zone;
+
+static int dtrace_module_unloaded(struct kmod_info *kmod);
 #endif /* __APPLE__ */
 
 /*
@@ -328,10 +342,16 @@ static void
 dtrace_nullop(void)
 {}
 
+static int
+dtrace_enable_nullop(void)
+{
+    return (0);
+}
+
 static dtrace_pops_t	dtrace_provider_ops = {
 	(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
 	(void (*)(void *, struct modctl *))dtrace_nullop,
-	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
+	(int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
 	(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
@@ -429,8 +449,8 @@ static lck_mtx_t dtrace_errlock;
 	(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
 	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
 }
-#else 
-#if (defined(__x86_64__) || defined(__ppc64__))
+#else
+#if defined(__x86_64__)
 /* FIXME: two function calls!! */
 #define	DTRACE_TLS_THRKEY(where) { \
 	uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
@@ -542,12 +562,11 @@ dtrace_load##bits(uintptr_t addr)					\
 	return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0);		\
 }
 #else /* __APPLE__ */
-#define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" );
+#define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
 
 #if (defined(__i386__) || defined (__x86_64__))
 #define	DTRACE_LOADFUNC(bits)						\
 /*CSTYLED*/								\
-extern vm_offset_t dtraceLoadRecover##bits;				\
 uint##bits##_t dtrace_load##bits(uintptr_t addr);			\
 									\
 uint##bits##_t								\
@@ -578,7 +597,7 @@ dtrace_load##bits(uintptr_t addr)					\
 	}								\
 									\
 	{								\
-	volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits;		\
+	volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits;		\
 	*flags |= CPU_DTRACE_NOFAULT;					\
 	recover = dtrace_set_thread_recover(current_thread(), recover);	\
 	/*CSTYLED*/							\
@@ -598,7 +617,6 @@ dtrace_load##bits(uintptr_t addr)					\
 #else /* all other architectures */
 #define	DTRACE_LOADFUNC(bits)						\
 /*CSTYLED*/								\
-extern vm_offset_t dtraceLoadRecover##bits;				\
 uint##bits##_t dtrace_load##bits(uintptr_t addr);			\
 									\
 uint##bits##_t								\
@@ -629,7 +647,7 @@ dtrace_load##bits(uintptr_t addr)					\
 	}								\
 									\
 	{								\
-	volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits;		\
+	volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits;		\
 	*flags |= CPU_DTRACE_NOFAULT;					\
 	recover = dtrace_set_thread_recover(current_thread(), recover);	\
 	/*CSTYLED*/	\
@@ -654,6 +672,7 @@ dtrace_load##bits(uintptr_t addr)					\
 #define	DTRACE_DYNHASH_SINK	1
 #define	DTRACE_DYNHASH_VALID	2
 
+#define DTRACE_MATCH_FAIL       -1
 #define	DTRACE_MATCH_NEXT	0
 #define	DTRACE_MATCH_DONE	1
 #define	DTRACE_ANCHORED(probe)	((probe)->dtpr_func[0] != '\0')
@@ -1291,12 +1310,12 @@ dtrace_priv_proc_common_user(dtrace_state_t *state)
 #else
 	if ((cr = dtrace_CRED()) != NULL &&
 #endif /* __APPLE__ */
-	    s_cr->cr_uid == cr->cr_uid &&
-	    s_cr->cr_uid == cr->cr_ruid &&
-	    s_cr->cr_uid == cr->cr_suid &&
-	    s_cr->cr_gid == cr->cr_gid &&
-	    s_cr->cr_gid == cr->cr_rgid &&
-	    s_cr->cr_gid == cr->cr_sgid)
+	    posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid &&
+	    posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid &&
+	    posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid &&
+	    posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid &&
+	    posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid &&
+	    posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid)
 		return (1);
 
 	return (0);
@@ -4946,15 +4965,20 @@ next:
 #if !defined(__APPLE__)
 			ipaddr_t ip4;
 #else
-			in_addr_t ip4;
+			uint32_t ip4;
 #endif /* __APPLE__ */
 			uint8_t *ptr8, val;
 
 			/*
 			 * Safely load the IPv4 address.
 			 */
+#if !defined(__APPLE__)			
 			ip4 = dtrace_load32(tupregs[argi].dttk_value);
-
+#else
+			dtrace_bcopy(
+			    (void *)(uintptr_t)tupregs[argi].dttk_value,
+			    (void *)(uintptr_t)&ip4, sizeof (ip4));
+#endif /* __APPLE__ */			
 			/*
 			 * Check an IPv4 string will fit in scratch.
 			 */
@@ -6180,7 +6204,7 @@ dtrace_action_raise(uint64_t sig)
 
 	if (uthread && uthread->t_dtrace_sig == 0) {
 		uthread->t_dtrace_sig = sig;
-		astbsd_on();
+		act_set_astbsd(current_thread());
 	}
 #endif /* __APPLE__ */
 }
@@ -6198,21 +6222,55 @@ dtrace_action_stop(void)
 		aston(curthread);
 	}
 #else
-	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
-
-	if (uthread && uthread->t_dtrace_stop == 0) {
+        uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
+	if (uthread) {
+		/*
+		 * The currently running process will be set to task_suspend
+		 * when it next leaves the kernel.
+		*/
 		uthread->t_dtrace_stop = 1;
-		astbsd_on();
+		act_set_astbsd(current_thread());
 	}
+
 #endif /* __APPLE__ */
 }
 
+#if defined(__APPLE__)
+static void
+dtrace_action_pidresume(uint64_t pid)
+{
+	if (dtrace_destructive_disallow)
+		return;
+
+	if (kauth_cred_issuser(kauth_cred_get()) == 0) {
+		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);		
+		return;
+	}
+
+        uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
+
+	/*
+	 * When the currently running process leaves the kernel, it attempts to
+	 * task_resume the process (denoted by pid), if that pid appears to have
+	 * been stopped by dtrace_action_stop().
+	 * The currently running process has a pidresume() queue depth of 1 --
+	 * subsequent invocations of the pidresume() action are ignored.
+	 */	
+
+	if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) {
+		uthread->t_dtrace_resumepid = pid;
+		act_set_astbsd(current_thread());
+	}
+}
+#endif /* __APPLE__ */
+
+
 static void
 dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
 {
 	hrtime_t now;
 	volatile uint16_t *flags;
-	cpu_t *cpu = CPU;
+	dtrace_cpu_t *cpu = CPU;
 
 	if (dtrace_destructive_disallow)
 		return;
@@ -6601,17 +6659,21 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
 
 				ASSERT(s_cr != NULL);
 
+			/*
+			 * XXX this is hackish, but so is setting a variable
+			 * XXX in a McCarthy OR...
+			 */
 #if !defined(__APPLE__)
 				if ((cr = CRED()) == NULL ||
 #else
 				if ((cr = dtrace_CRED()) == NULL ||
 #endif /* __APPLE__ */
-				    s_cr->cr_uid != cr->cr_uid ||
-				    s_cr->cr_uid != cr->cr_ruid ||
-				    s_cr->cr_uid != cr->cr_suid ||
-				    s_cr->cr_gid != cr->cr_gid ||
-				    s_cr->cr_gid != cr->cr_rgid ||
-				    s_cr->cr_gid != cr->cr_sgid ||
+				    posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid ||
+				    posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid ||
+				    posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid ||
+				    posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid ||
+				    posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid ||
+				    posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid ||
 #if !defined(__APPLE__)
 				    (proc = ttoproc(curthread)) == NULL ||
 				    (proc->p_flag & SNOCD))
@@ -6868,6 +6930,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
 					dtrace_action_raise(val);
 				continue;
 
+#if defined(__APPLE__)				
+			case DTRACEACT_PIDRESUME:
+				if (dtrace_priv_proc_destructive(state))
+					dtrace_action_pidresume(val);
+				continue;
+#endif /* __APPLE__ */				
+
 			case DTRACEACT_COMMIT:
 				ASSERT(!committed);
 
@@ -7126,12 +7195,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
    on some function in the transitive closure of the call to dtrace_probe(). Solaris has some
    strong guarantees that this won't happen, the Darwin implementation is not so mature as to
    make those guarantees. */
+
 void
 dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
     uint64_t arg2, uint64_t arg3, uint64_t arg4)
 {
 	thread_t thread = current_thread();
-
+	disable_preemption();
 	if (id == dtrace_probeid_error) {
 		__dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
 		dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
@@ -7143,6 +7213,7 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
 #if DEBUG
 	else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN);
 #endif
+	enable_preemption();
 }
 #endif /* __APPLE__ */
 
@@ -7733,7 +7804,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
 {
 	dtrace_probe_t template, *probe;
 	dtrace_hash_t *hash = NULL;
-	int len, best = INT_MAX, nmatched = 0;
+	int len, rc, best = INT_MAX, nmatched = 0;
 	dtrace_id_t i;
 
 	lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
@@ -7745,7 +7816,8 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
 	if (pkp->dtpk_id != DTRACE_IDNONE) {
 		if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
 		    dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
-			(void) (*matched)(probe, arg);
+		        if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
+                               return (DTRACE_MATCH_FAIL);
 			nmatched++;
 		}
 		return (nmatched);
@@ -7802,8 +7874,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
 
 			nmatched++;
 
-			if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
-				break;
+                       if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+			       if (rc == DTRACE_MATCH_FAIL)
+                                       return (DTRACE_MATCH_FAIL);
+			       break;
+                       }
 		}
 
 		return (nmatched);
@@ -7822,8 +7897,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
 
 		nmatched++;
 
-		if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
-			break;
+		if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+		    if (rc == DTRACE_MATCH_FAIL)
+			return (DTRACE_MATCH_FAIL);
+		    break;
+		}
 	}
 
 	return (nmatched);
@@ -8051,7 +8129,7 @@ dtrace_unregister(dtrace_provider_id_t id)
 	dtrace_probe_t *probe, *first = NULL;
 
 	if (old->dtpv_pops.dtps_enable ==
-	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
+	    (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
 		/*
 		 * If DTrace itself is the provider, we're called with locks
 		 * already held.
@@ -8201,7 +8279,7 @@ dtrace_invalidate(dtrace_provider_id_t id)
 	dtrace_provider_t *pvp = (dtrace_provider_t *)id;
 
 	ASSERT(pvp->dtpv_pops.dtps_enable !=
-	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+	    (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
 
 	lck_mtx_lock(&dtrace_provider_lock);
 	lck_mtx_lock(&dtrace_lock);
@@ -8242,7 +8320,7 @@ dtrace_condense(dtrace_provider_id_t id)
 	 * Make sure this isn't the dtrace provider itself.
 	 */
 	ASSERT(prov->dtpv_pops.dtps_enable !=
-	    (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+	  (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
 
 	lck_mtx_lock(&dtrace_provider_lock);
 	lck_mtx_lock(&dtrace_lock);
@@ -8508,7 +8586,6 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
 {
 	struct modctl *ctl;
 	int all = 0;
-#pragma unused(ctl) /* __APPLE__ */
 
 	lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
 
@@ -8516,22 +8593,22 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
 		all = 1;
 		prv = dtrace_provider;
 	}
-
+		 
 	do {
 		/*
 		 * First, call the blanket provide operation.
 		 */
 		prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
-
-#if !defined(__APPLE__)
+		
 		/*
 		 * Now call the per-module provide operation.  We will grab
 		 * mod_lock to prevent the list from being modified.  Note
 		 * that this also prevents the mod_busy bits from changing.
 		 * (mod_busy can only be changed with mod_lock held.)
 		 */
-		mutex_enter(&mod_lock);
-
+		lck_mtx_lock(&mod_lock);
+		
+#if !defined(__APPLE__)
 		ctl = &modules;
 		do {
 			if (ctl->mod_busy || ctl->mod_mp == NULL)
@@ -8540,29 +8617,15 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
 			prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
 
 		} while ((ctl = ctl->mod_next) != &modules);
-
-		mutex_exit(&mod_lock);
 #else
-#if 0 /* FIXME: Workaround for PR_4643546 */
-		/* NOTE: kmod_lock has been removed. */
-		simple_lock(&kmod_lock);
-		
-		kmod_info_t *ktl = kmod;
-		while (ktl) {
-			prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl);
-			ktl = ktl->next;
+		ctl = dtrace_modctl_list;
+		while (ctl) {
+			prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
+			ctl = ctl->mod_next;
 		}
-		
-		simple_unlock(&kmod_lock);
-#else
-		/*
-		 * Don't bother to iterate over the kmod list. At present only fbt
-		 * offers a provide_module in its dtpv_pops, and then it ignores the
-		 * module anyway.
-		 */
-		prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, NULL);
 #endif
-#endif /* __APPLE__ */
+		
+		lck_mtx_unlock(&mod_lock);
 	} while (all && (prv = prv->dtpv_next) != NULL);
 }
 
@@ -9295,7 +9358,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
 			break;
 
 		default:
-			err += efunc(dp->dtdo_len - 1, "bad return size");
+			err += efunc(dp->dtdo_len - 1, "bad return size\n");
 		}
 	}
 
@@ -10356,7 +10419,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
 	return (ecb);
 }
 
-static void
+static int
 dtrace_ecb_enable(dtrace_ecb_t *ecb)
 {
 	dtrace_probe_t *probe = ecb->dte_probe;
@@ -10369,7 +10432,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
 		/*
 		 * This is the NULL probe -- there's nothing to do.
 		 */
-		return;
+	    return(0);
 	}
 
 	if (probe->dtpr_ecb == NULL) {
@@ -10383,8 +10446,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
 		if (ecb->dte_predicate != NULL)
 			probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
 
-		prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
-		    probe->dtpr_id, probe->dtpr_arg);
+		return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
+                    probe->dtpr_id, probe->dtpr_arg));
 	} else {
 		/*
 		 * This probe is already active.  Swing the last pointer to
@@ -10397,6 +10460,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
 		probe->dtpr_predcache = 0;
 
 		dtrace_sync();
+		return(0);
 	}
 }
 
@@ -10860,6 +10924,9 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
 		case DTRACEACT_CHILL:
 		case DTRACEACT_DISCARD:
 		case DTRACEACT_RAISE:
+#if defined(__APPLE__)
+		case DTRACEACT_PIDRESUME:
+#endif /* __APPLE__ */
 			if (dp == NULL)
 				return (EINVAL);
 			break;
@@ -11196,7 +11263,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
 	if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
 		return (DTRACE_MATCH_DONE);
 
-	dtrace_ecb_enable(ecb);
+	if (dtrace_ecb_enable(ecb) < 0)
+               return (DTRACE_MATCH_FAIL);
+	
 	return (DTRACE_MATCH_NEXT);
 }
 
@@ -11313,7 +11382,7 @@ static int
 dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
     processorid_t cpu)
 {
-	cpu_t *cp;
+	dtrace_cpu_t *cp;
 	dtrace_buffer_t *buf;
 
 	lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
@@ -12052,7 +12121,7 @@ static int
 dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
 {
 	int i = 0;
-	int matched = 0;
+	int total_matched = 0, matched = 0;
 
 	lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
 	lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
@@ -12063,7 +12132,14 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
 		enab->dten_current = ep;
 		enab->dten_error = 0;
 
-		matched += dtrace_probe_enable(&ep->dted_probe, enab);
+		/*
+		 * If a provider failed to enable a probe then get out and
+		 * let the consumer know we failed.
+		 */
+		if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
+			return (EBUSY);
+
+		total_matched += matched;
 
 		if (enab->dten_error != 0) {
 			/*
@@ -12091,7 +12167,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
 
 	enab->dten_probegen = dtrace_probegen;
 	if (nmatched != NULL)
-		*nmatched = matched;
+		*nmatched = total_matched;
 
 	return (0);
 }
@@ -12351,16 +12427,22 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp)
 #if !defined(__APPLE__)
 	dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
 
-	if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) {
+        if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
+	  dof->dofh_loadsz != hdr.dofh_loadsz) {
+	    kmem_free(dof, hdr.dofh_loadsz);
+	    *errp = EFAULT;
+	    return (NULL);
+	}
 #else
 	dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
 
-	if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) {
+        if (copyin(uarg, dof, hdr.dofh_loadsz) != 0  ||
+	  dof->dofh_loadsz != hdr.dofh_loadsz) {
+	    dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
+	    *errp = EFAULT;
+	    return (NULL);
+	}	    
 #endif
-		dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
-		*errp = EFAULT;
-		return (NULL);
-	}
 
 	return (dof);
 }
@@ -16079,30 +16161,257 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to)
 /*
  * DTrace Hook Functions
  */
+
+#if defined(__APPLE__)
+/*
+ * Routines to manipulate the modctl list within dtrace
+ */
+
+modctl_t *dtrace_modctl_list;
+
+static void
+dtrace_modctl_add(struct modctl * newctl)
+{
+	struct modctl *nextp, *prevp;
+
+	ASSERT(newctl != NULL);
+	lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+
+	// Insert new module at the front of the list,
+	
+	newctl->mod_next = dtrace_modctl_list;
+	dtrace_modctl_list = newctl;
+
+	/*
+	 * If a module exists with the same name, then that module
+	 * must have been unloaded with enabled probes. We will move
+	 * the unloaded module to the new module's stale chain and
+	 * then stop traversing the list.
+	 */
+
+	prevp = newctl;
+	nextp = newctl->mod_next;
+    
+	while (nextp != NULL) {
+		if (nextp->mod_loaded) {
+			/* This is a loaded module. Keep traversing. */
+			prevp = nextp;
+			nextp = nextp->mod_next;
+			continue;
+		}
+		else {
+			/* Found an unloaded module */
+			if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) {
+				/* Names don't match. Keep traversing. */
+				prevp = nextp;
+				nextp = nextp->mod_next;
+				continue;
+			}
+			else {
+				/* We found a stale entry, move it. We're done. */
+				prevp->mod_next = nextp->mod_next;
+				newctl->mod_stale = nextp;
+				nextp->mod_next = NULL;
+				break;
+			}
+		}
+	}
+}
+
+static modctl_t *
+dtrace_modctl_lookup(struct kmod_info * kmod)
+{
+    lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+
+    struct modctl * ctl;
+
+    for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) {
+	if (ctl->mod_id == kmod->id)
+	    return(ctl);
+    }
+    return (NULL);
+}
+
+/*
+ * This routine is called from dtrace_module_unloaded().
+ * It removes a modctl structure and its stale chain
+ * from the kext shadow list.
+ */
+static void
+dtrace_modctl_remove(struct modctl * ctl)
+{
+	ASSERT(ctl != NULL);
+	lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+	modctl_t *prevp, *nextp, *curp;
+
+	// Remove stale chain first
+	for (curp=ctl->mod_stale; curp != NULL; curp=nextp) {
+		nextp = curp->mod_stale;
+		/* There should NEVER be user symbols allocated at this point */
+		ASSERT(curp->mod_user_symbols == NULL);	
+		kmem_free(curp, sizeof(modctl_t));
+	}
+
+	prevp = NULL;
+	curp = dtrace_modctl_list;
+	
+	while (curp != ctl) {
+		prevp = curp;
+		curp = curp->mod_next;
+	}
+
+	if (prevp != NULL) {
+		prevp->mod_next = ctl->mod_next;
+	}
+	else {
+		dtrace_modctl_list = ctl->mod_next;
+	}
+
+	/* There should NEVER be user symbols allocated at this point */
+	ASSERT(ctl->mod_user_symbols == NULL);
+
+	kmem_free (ctl, sizeof(modctl_t));
+}
+	
+#endif /* __APPLE__ */
+
+/*
+ * APPLE NOTE: The kext loader will call dtrace_module_loaded
+ * when the kext is loaded in memory, but before calling the
+ * kext's start routine.
+ *
+ * Return 0 on success
+ * Return -1 on failure
+ */
+	
+#if !defined (__APPLE__)
 static void
 dtrace_module_loaded(struct modctl *ctl)
+#else
+static int
+dtrace_module_loaded(struct kmod_info *kmod)
+#endif /* __APPLE__ */
 {
 	dtrace_provider_t *prv;
 
-	lck_mtx_lock(&dtrace_provider_lock);
-	lck_mtx_lock(&mod_lock);
-
 #if !defined(__APPLE__)
+	mutex_enter(&dtrace_provider_lock);
+	mutex_enter(&mod_lock);
+	
 	ASSERT(ctl->mod_busy);
 #else
-	/* FIXME: awaits kmod awareness PR_4648477. */
-#endif /* __APPLE__ */
+		
+	/*
+	 * If kernel symbols have been disabled, return immediately
+	 * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
+	 */
+	if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER)
+		return 0;
+	
+	struct modctl *ctl = NULL;
+	if (!kmod || kmod->address == 0 || kmod->size == 0)
+		return(-1);
+		
+	lck_mtx_lock(&dtrace_provider_lock);
+	lck_mtx_lock(&mod_lock);	
+	
+	/*
+	 * Have we seen this kext before?
+	 */
 
+	ctl = dtrace_modctl_lookup(kmod);
+
+	if (ctl != NULL) {
+		/* bail... we already have this kext in the modctl list */
+		lck_mtx_unlock(&mod_lock);
+		lck_mtx_unlock(&dtrace_provider_lock);
+		if (dtrace_err_verbose)
+			cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id);
+		return(-1);
+	}
+	else {
+		ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP);
+		if (ctl == NULL) {
+			if (dtrace_err_verbose)
+				cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id);
+			lck_mtx_unlock(&mod_lock);
+			lck_mtx_unlock(&dtrace_provider_lock);
+			return (-1);
+		}
+		ctl->mod_next = NULL;
+		ctl->mod_stale = NULL;
+		strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname));
+		ctl->mod_loadcnt = kmod->id;
+		ctl->mod_nenabled = 0;
+		ctl->mod_address  = kmod->address;
+		ctl->mod_size = kmod->size;
+		ctl->mod_id = kmod->id;
+		ctl->mod_loaded = 1;
+		ctl->mod_flags = 0;
+		ctl->mod_user_symbols = NULL;
+		
+		/*
+		 * Find the UUID for this module, if it has one
+		 */
+		kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address;
+		struct load_command* load_cmd = (struct load_command *)&header[1];
+		uint32_t i;
+		for (i = 0; i < header->ncmds; i++) {
+			if (load_cmd->cmd == LC_UUID) {
+				struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd;
+				memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
+				ctl->mod_flags |= MODCTL_HAS_UUID;
+				break;
+			}
+			load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize);
+		}
+		
+		if (ctl->mod_address == g_kernel_kmod_info.address) {
+			ctl->mod_flags |= MODCTL_IS_MACH_KERNEL;
+		}
+	}
+	dtrace_modctl_add(ctl);
+	
+	/*
+	 * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
+	 */
+	lck_mtx_lock(&dtrace_lock);
+	
+	/*
+	 * If the module does not have a valid UUID, we will not be able to find symbols for it from
+	 * userspace. Go ahead and instrument it now.
+	 */
+	if (MOD_HAS_UUID(ctl) && (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE)) {
+		lck_mtx_unlock(&dtrace_lock);
+		lck_mtx_unlock(&mod_lock);
+		lck_mtx_unlock(&dtrace_provider_lock);
+		return 0;
+	}
+	
+	ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS;
+	
+	lck_mtx_unlock(&dtrace_lock);
+#endif /* __APPLE__ */
+	
 	/*
 	 * We're going to call each providers per-module provide operation
 	 * specifying only this module.
 	 */
 	for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
-		prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
-
+		prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);	
+	
+#if defined(__APPLE__)
+	/*
+	 * The contract with the kext loader is that once this function has completed,
+	 * it may delete kernel symbols at will. We must set this while still holding
+	 * the mod_lock.
+	 */
+	ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS;
+#endif
+	
 	lck_mtx_unlock(&mod_lock);
 	lck_mtx_unlock(&dtrace_provider_lock);
-
+	
 	/*
 	 * If we have any retained enablings, we need to match against them.
 	 * Enabling probes requires that cpu_lock be held, and we cannot hold
@@ -16112,17 +16421,22 @@ dtrace_module_loaded(struct modctl *ctl)
 	 * our task queue to do the match for us.
 	 */
 	lck_mtx_lock(&dtrace_lock);
-
+	
 	if (dtrace_retained == NULL) {
 		lck_mtx_unlock(&dtrace_lock);
+#if !defined(__APPLE__)
 		return;
+#else
+		return 0;
+#endif
 	}
-
+	
+#if !defined(__APPLE__)
 	(void) taskq_dispatch(dtrace_taskq,
-	    (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
-
-	lck_mtx_unlock(&dtrace_lock);
-
+			      (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
+	
+	mutex_exit(&dtrace_lock);
+	
 	/*
 	 * And now, for a little heuristic sleaze:  in general, we want to
 	 * match modules as soon as they load.  However, we cannot guarantee
@@ -16134,8 +16448,23 @@ dtrace_module_loaded(struct modctl *ctl)
 	 * just loaded may not be immediately instrumentable.
 	 */
 	delay(1);
+#else
+	/* APPLE NOTE!
+	 *
+	 * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
+	 * holds it for any reason. Thus the comment above is invalid, we can directly invoke
+	 * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
+	 * the delay call as well.
+	 */
+	lck_mtx_unlock(&dtrace_lock);
+	
+	dtrace_enabling_matchall();
+	
+	return 0;
+#endif /* __APPLE__ */
 }
-
+	
+#if !defined(__APPLE__)
 static void
 dtrace_module_unloaded(struct modctl *ctl)
 {
@@ -16144,27 +16473,27 @@ dtrace_module_unloaded(struct modctl *ctl)
 
 	template.dtpr_mod = ctl->mod_modname;
 
-	lck_mtx_lock(&dtrace_provider_lock);
-	lck_mtx_lock(&mod_lock);
-	lck_mtx_lock(&dtrace_lock);
+	mutex_enter(&dtrace_provider_lock);
+	mutex_enter(&mod_lock);
+	mutex_enter(&dtrace_lock);
 
 	if (dtrace_bymod == NULL) {
 		/*
 		 * The DTrace module is loaded (obviously) but not attached;
 		 * we don't have any work to do.
 		 */
-		lck_mtx_unlock(&dtrace_provider_lock);
-		lck_mtx_unlock(&mod_lock);
-		lck_mtx_unlock(&dtrace_lock);
+		mutex_exit(&dtrace_provider_lock);
+		mutex_exit(&mod_lock);
+		mutex_exit(&dtrace_lock);
 		return;
 	}
 
 	for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
 	    probe != NULL; probe = probe->dtpr_nextmod) {
 		if (probe->dtpr_ecb != NULL) {
-			lck_mtx_unlock(&dtrace_provider_lock);
-			lck_mtx_unlock(&mod_lock);
-			lck_mtx_unlock(&dtrace_lock);
+			mutex_exit(&dtrace_provider_lock);
+			mutex_exit(&mod_lock);
+			mutex_exit(&dtrace_lock);
 
 			/*
 			 * This shouldn't _actually_ be possible -- we're
@@ -16222,25 +16551,185 @@ dtrace_module_unloaded(struct modctl *ctl)
 		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
 		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
 		vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
-#if !defined(__APPLE__)
 		kmem_free(probe, sizeof (dtrace_probe_t));
-#else
-		zfree(dtrace_probe_t_zone, probe);
-#endif /* __APPLE__ */
 	}
 
-	lck_mtx_unlock(&dtrace_lock);
-	lck_mtx_unlock(&mod_lock);
-	lck_mtx_unlock(&dtrace_provider_lock);
+	mutex_exit(&dtrace_lock);
+	mutex_exit(&mod_lock);
+	mutex_exit(&dtrace_provider_lock);
 }
+#else  /* __APPLE__ */
 
-void
-dtrace_suspend(void)
+/*
+ * Return 0 on success
+ * Return -1 on failure
+ */
+static int
+dtrace_module_unloaded(struct kmod_info *kmod)
 {
-	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
-}
+	dtrace_probe_t template, *probe, *first, *next;
+	dtrace_provider_t *prov;
+        struct modctl *ctl = NULL;
+	struct modctl *syncctl = NULL;
+	struct modctl *nextsyncctl = NULL;
+	int syncmode = 0;
+	
+        lck_mtx_lock(&dtrace_provider_lock);
+	lck_mtx_lock(&mod_lock);
+	lck_mtx_lock(&dtrace_lock);
 
-void
+	if (kmod == NULL) {
+	    syncmode = 1;
+	}
+	else {
+	    ctl = dtrace_modctl_lookup(kmod);
+	    if (ctl == NULL)
+	    {
+		lck_mtx_unlock(&dtrace_lock);
+		lck_mtx_unlock(&mod_lock);
+		lck_mtx_unlock(&dtrace_provider_lock);
+		return (-1);
+	    }
+	    ctl->mod_loaded = 0;
+	    ctl->mod_address = 0;
+	    ctl->mod_size = 0;
+	}
+	
+	if (dtrace_bymod == NULL) {
+		/*
+		 * The DTrace module is loaded (obviously) but not attached;
+		 * we don't have any work to do.
+		 */
+	         if (ctl != NULL)
+			 (void)dtrace_modctl_remove(ctl);
+		 lck_mtx_unlock(&dtrace_provider_lock);
+		 lck_mtx_unlock(&mod_lock);
+		 lck_mtx_unlock(&dtrace_lock);
+		 return(0);
+	}
+
+	/* Syncmode set means we target and traverse entire modctl list. */
+        if (syncmode)
+	    nextsyncctl = dtrace_modctl_list;
+
+syncloop:
+	if (syncmode)
+	{
+	    /* find a stale modctl struct */
+	    for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) {
+		if (syncctl->mod_address == 0)
+		    break;
+	    }
+	    if (syncctl==NULL)
+	    {
+		/* We have no more work to do */
+		lck_mtx_unlock(&dtrace_provider_lock);
+		lck_mtx_unlock(&mod_lock);
+		lck_mtx_unlock(&dtrace_lock);
+		return(0);
+	    }
+	    else {
+		/* keep track of next syncctl in case this one is removed */
+		nextsyncctl = syncctl->mod_next;
+		ctl = syncctl;
+	    }
+	}
+
+	template.dtpr_mod = ctl->mod_modname;
+	
+	for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
+	    probe != NULL; probe = probe->dtpr_nextmod) {
+	        if (probe->dtpr_ecb != NULL) {
+			/*
+			 * This shouldn't _actually_ be possible -- we're
+			 * unloading a module that has an enabled probe in it.
+			 * (It's normally up to the provider to make sure that
+			 * this can't happen.)  However, because dtps_enable()
+			 * doesn't have a failure mode, there can be an
+			 * enable/unload race.  Upshot:  we don't want to
+			 * assert, but we're not going to disable the
+			 * probe, either.
+			 */
+
+
+		        if (syncmode) {
+			    /* We're syncing, let's look at next in list */
+			    goto syncloop;
+			}
+
+			lck_mtx_unlock(&dtrace_provider_lock);
+			lck_mtx_unlock(&mod_lock);
+			lck_mtx_unlock(&dtrace_lock);
+		    
+			if (dtrace_err_verbose) {
+				cmn_err(CE_WARN, "unloaded module '%s' had "
+				    "enabled probes", ctl->mod_modname);
+			}
+			return(-1);
+		}
+	}
+
+	probe = first;
+
+	for (first = NULL; probe != NULL; probe = next) {
+		ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
+
+		dtrace_probes[probe->dtpr_id - 1] = NULL;
+
+		next = probe->dtpr_nextmod;
+		dtrace_hash_remove(dtrace_bymod, probe);
+		dtrace_hash_remove(dtrace_byfunc, probe);
+		dtrace_hash_remove(dtrace_byname, probe);
+
+		if (first == NULL) {
+			first = probe;
+			probe->dtpr_nextmod = NULL;
+		} else {
+			probe->dtpr_nextmod = first;
+			first = probe;
+		}
+	}
+
+	/*
+	 * We've removed all of the module's probes from the hash chains and
+	 * from the probe array.  Now issue a dtrace_sync() to be sure that
+	 * everyone has cleared out from any probe array processing.
+	 */
+	dtrace_sync();
+
+	for (probe = first; probe != NULL; probe = first) {
+		first = probe->dtpr_nextmod;
+		prov = probe->dtpr_provider;
+		prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
+		    probe->dtpr_arg);
+		kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
+		kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
+		kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
+		vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
+
+		zfree(dtrace_probe_t_zone, probe);
+	}
+
+	dtrace_modctl_remove(ctl);
+	
+	if (syncmode)
+	    goto syncloop;
+
+	lck_mtx_unlock(&dtrace_lock);
+	lck_mtx_unlock(&mod_lock);
+	lck_mtx_unlock(&dtrace_provider_lock);
+
+	return(0);
+}
+#endif /* __APPLE__ */
+
+void
+dtrace_suspend(void)
+{
+	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
+}
+
+void
 dtrace_resume(void)
 {
 	dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
@@ -16463,13 +16952,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 	    dtrace_provider, NULL, NULL, "END", 0, NULL);
 	dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
-#elif defined(__ppc__) || defined(__ppc64__)
-	dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
-	    dtrace_provider, NULL, NULL, "BEGIN", 2, NULL);
-	dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
-	    dtrace_provider, NULL, NULL, "END", 1, NULL);
-	dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
-	    dtrace_provider, NULL, NULL, "ERROR", 4, NULL);
 #elif (defined(__i386__) || defined (__x86_64__))
 	dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
 	    dtrace_provider, NULL, NULL, "BEGIN", 1, NULL);
@@ -16505,6 +16987,15 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 	if (dtrace_anon.dta_enabling != NULL) {
 		ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
 
+#if defined(__APPLE__)
+		/*
+		 * If there is anonymous dof, we should switch symbol modes.
+		 */
+		if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
+			dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
+		}
+#endif
+		
 		dtrace_enabling_provide(NULL);
 		state = dtrace_anon.dta_state;
 
@@ -16612,7 +17103,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 	lck_mtx_unlock(&cpu_lock);
 
 	if (state == NULL) {
-		if (--dtrace_opens == 0)
+		if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)		    
 			(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 		lck_mtx_unlock(&dtrace_lock);
 		return (EAGAIN);
@@ -16624,7 +17115,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 	lck_mtx_unlock(&cpu_lock);
 
 	if (rv != 0 || state == NULL) {
-		if (--dtrace_opens == 0)
+		if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
 			(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 		lck_mtx_unlock(&dtrace_lock);
 		/* propagate EAGAIN or ERESTART */
@@ -16656,6 +17147,27 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
 	}
 
 	lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
+
+	/*
+	 * Update kernel symbol state.
+	 *
+	 * We must own the provider and dtrace locks. 
+	 *
+	 * NOTE! It may appear there is a race by setting this value so late
+	 * after dtrace_probe_provide. However, any kext loaded after the
+	 * call to probe provide and before we set LAZY_OFF will be marked as
+	 * eligible for symbols from userspace. The same dtrace that is currently
+	 * calling dtrace_open() (this call!) will get a list of kexts needing
+	 * symbols and fill them in, thus closing the race window.
+	 *
+	 * We want to set this value only after it certain it will succeed, as
+	 * this significantly reduces the complexity of error exits.
+	 */
+	lck_mtx_lock(&dtrace_lock);
+	if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
+		dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
+	}
+	lck_mtx_unlock(&dtrace_lock);
 #endif /* __APPLE__ */
 
 	return (0);
@@ -16691,31 +17203,52 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
 
 	dtrace_state_destroy(state);
 	ASSERT(dtrace_opens > 0);
-	if (--dtrace_opens == 0)
-		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
 
+	/*
+	 * Only relinquish control of the kernel debugger interface when there
+	 * are no consumers and no anonymous enablings.
+	 */
+	if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
+		(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
+	
 	lck_mtx_unlock(&dtrace_lock);
 	lck_mtx_unlock(&cpu_lock);
 
 #if defined(__APPLE__)
-
 	/*
 	 * Lock ordering requires the dof mode lock be taken before
 	 * the dtrace_lock.
 	 */
 	lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
 	lck_mtx_lock(&dtrace_lock);
+	
+	if (dtrace_opens == 0) {
+		/*
+		 * If we are currently lazy-off, and this is the last close, transition to
+		 * lazy state.
+		 */
+		if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
+			dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
+		}
 
-	/*
-	 * If we are currently lazy-off, and this is the last close, transition to
-	 * lazy state.
-	 */
-	if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF && dtrace_opens == 0) {
-		dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
+		/*
+		 * If we are the last dtrace client, switch back to lazy (from userspace) symbols
+		 */
+		if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) {
+			dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
+		}
 	}
-
+	
 	lck_mtx_unlock(&dtrace_lock);
 	lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
+	
+	/*
+	 * Kext probes may be retained past the end of the kext's lifespan. The
+	 * probes are kept until the last reference to them has been removed.
+	 * Since closing an active dtrace context is likely to drop that last reference,
+	 * lets take a shot at cleaning out the orphaned probes now.
+	 */
+	dtrace_module_unloaded(NULL);
 #endif /* __APPLE__ */
 
 	return (0);
@@ -18437,8 +18970,254 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv
 		return (0);
 	}
 
-	default:
-		break;
+	case DTRACEIOC_MODUUIDSLIST: {
+		size_t module_uuids_list_size;
+		dtrace_module_uuids_list_t* uuids_list;
+		uint64_t dtmul_count;
+		
+		/*
+		 * Fail if the kernel symbol mode makes this operation illegal.
+		 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
+		 * for them without holding the dtrace_lock.
+		 */		
+		if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
+		    dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
+			cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode);
+			return (EPERM);
+		}
+			
+		/*
+		 * Read the number of symbolsdesc structs being passed in.
+		 */
+		if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count),
+			   &dtmul_count,
+			   sizeof(dtmul_count))) {
+			cmn_err(CE_WARN, "failed to copyin dtmul_count");
+			return (EFAULT);
+		}
+		
+		/*
+		 * Range check the count. More than 2k kexts is probably an error.
+		 */
+		if (dtmul_count > 2048) {
+			cmn_err(CE_WARN, "dtmul_count is not valid");
+			return (EINVAL);
+		}
+
+		/*
+		 * For all queries, we return EINVAL when the user specified
+		 * count does not match the actual number of modules we find
+		 * available.
+		 *
+		 * If the user specified count is zero, then this serves as a
+		 * simple query to count the available modules in need of symbols.
+		 */
+		
+		rval = 0;
+
+		if (dtmul_count == 0)
+		{
+			lck_mtx_lock(&mod_lock);
+			struct modctl* ctl = dtrace_modctl_list;
+			while (ctl) {
+				ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+				if (!MOD_SYMBOLS_DONE(ctl)) {
+					dtmul_count++;
+					rval = EINVAL;
+				}
+				ctl = ctl->mod_next;
+			}
+			lck_mtx_unlock(&mod_lock);
+			
+			if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0)
+				return (EFAULT);
+			else
+				return (rval);
+		}
+		
+		/*
+		 * If we reach this point, then we have a request for full list data.
+		 * Allocate a correctly sized structure and copyin the data.
+		 */
+		module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count);
+		if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL) 
+			return (ENOMEM);
+		
+		/* NOTE! We can no longer exit this method via return */
+		if (copyin(arg, uuids_list, module_uuids_list_size) != 0) {
+			cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t");
+			rval = EFAULT;
+			goto moduuidslist_cleanup;
+		}
+		
+		/*
+		 * Check that the count didn't change between the first copyin and the second.
+		 */
+		if (uuids_list->dtmul_count != dtmul_count) {
+			rval = EINVAL;
+			goto moduuidslist_cleanup;
+		}
+		
+		/*
+		 * Build the list of UUID's that need symbols
+		 */
+		lck_mtx_lock(&mod_lock);
+		
+		dtmul_count = 0;
+		
+		struct modctl* ctl = dtrace_modctl_list;
+		while (ctl) {
+			/*
+			 * We assume that userspace symbols will be "better" than kernel level symbols,
+			 * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
+			 * are available, add user syms if the module might use them.
+			 */
+			ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+			if (!MOD_SYMBOLS_DONE(ctl)) {
+				UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count];
+				if (dtmul_count++ < uuids_list->dtmul_count) {
+					memcpy(uuid, ctl->mod_uuid, sizeof(UUID));
+				}
+			}
+			ctl = ctl->mod_next;
+		}
+		
+		lck_mtx_unlock(&mod_lock);
+		
+		if (uuids_list->dtmul_count < dtmul_count)
+			rval = EINVAL;
+		
+		uuids_list->dtmul_count = dtmul_count;
+		
+		/*
+		 * Copyout the symbols list (or at least the count!)
+		 */
+		if (copyout(uuids_list, arg, module_uuids_list_size) != 0) {
+			cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t");
+			rval = EFAULT;
+		}
+		
+	moduuidslist_cleanup:
+		/*
+		 * If we had to allocate struct memory, free it.
+		 */
+		if (uuids_list != NULL) {
+			kmem_free(uuids_list, module_uuids_list_size);
+		}
+		
+		return rval;
+	}
+
+	case DTRACEIOC_PROVMODSYMS: {
+		size_t module_symbols_size;
+		dtrace_module_symbols_t* module_symbols;
+		uint64_t dtmodsyms_count;
+				
+		/*
+		 * Fail if the kernel symbol mode makes this operation illegal.
+		 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
+		 * for them without holding the dtrace_lock.
+		 */
+		if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
+		    dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
+			cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode);
+			return (EPERM);
+		}
+		
+		/*
+		 * Read the number of module symbols structs being passed in.
+		 */
+		if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count),
+			   &dtmodsyms_count,
+			   sizeof(dtmodsyms_count))) {
+			cmn_err(CE_WARN, "failed to copyin dtmodsyms_count");
+			return (EFAULT);
+		}
+		
+		/*
+		 * Range check the count. How much data can we pass around?
+		 * FIX ME!
+		 */
+		if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
+			cmn_err(CE_WARN, "dtmodsyms_count is not valid");
+			return (EINVAL);
+		}
+			
+		/*
+		 * Allocate a correctly sized structure and copyin the data.
+		 */
+		module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
+		if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL) 
+			return (ENOMEM);
+			
+		rval = 0;
+
+		/* NOTE! We can no longer exit this method via return */
+		if (copyin(arg, module_symbols, module_symbols_size) != 0) {
+			cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols->dtmodsyms_count);
+			rval = EFAULT;
+			goto module_symbols_cleanup;
+		}
+			
+		/*
+		 * Check that the count didn't change between the first copyin and the second.
+		 */
+		if (module_symbols->dtmodsyms_count != dtmodsyms_count) {
+			rval = EINVAL;
+			goto module_symbols_cleanup;
+		}
+			
+		/*
+		 * Find the modctl to add symbols to.
+		 */
+		lck_mtx_lock(&dtrace_provider_lock);
+		lck_mtx_lock(&mod_lock);
+		
+		struct modctl* ctl = dtrace_modctl_list;
+		while (ctl) {
+			ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+			if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) {
+				if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) {
+					/* BINGO! */
+					ctl->mod_user_symbols = module_symbols;
+					break;
+				}
+			}
+			ctl = ctl->mod_next;
+		}
+
+		if (ctl) {
+			dtrace_provider_t *prv;
+
+			/*
+			 * We're going to call each providers per-module provide operation
+			 * specifying only this module.
+			 */
+			for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
+				prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);	
+						
+			/*
+			 * We gave every provider a chance to provide with the user syms, go ahead and clear them
+			 */
+			ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
+		}
+		
+		lck_mtx_unlock(&mod_lock);
+		lck_mtx_unlock(&dtrace_provider_lock);
+
+	module_symbols_cleanup:
+		/*
+		 * If we had to allocate struct memory, free it.
+		 */
+		if (module_symbols != NULL) {
+			kmem_free(module_symbols, module_symbols_size);
+		}
+		
+		return rval;
+	}
+			
+		default:
+			break;
 	}
 
 	return (ENOTTY);
@@ -18912,12 +19691,14 @@ dtrace_init( void )
 		lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr);
 		lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr);
 
+		dtrace_modctl_list = NULL;
+
 		cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP );
 		for (i = 0; i < ncpu; ++i) {
 			lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr);
 		}
 
-		cpu_list = (cpu_t *)kmem_zalloc( ncpu * sizeof(cpu_t), KM_SLEEP );
+		cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP );
 		for (i = 0; i < ncpu; ++i) {
 			cpu_list[i].cpu_id = (processorid_t)i;
 			cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]);
@@ -18965,6 +19746,14 @@ dtrace_init( void )
 				break;
 		}
 
+		/*
+		 * See dtrace_impl.h for a description of kernel symbol modes.
+		 * The default is to wait for symbols from userspace (lazy symbols).
+		 */
+		if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) {
+			dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
+		}
+				
 		gDTraceInited = 1;
 
 	} else
@@ -18974,12 +19763,29 @@ dtrace_init( void )
 void
 dtrace_postinit(void)
 {
-		/*
-		 * Called from bsd_init after all provider's *_init() routines have been
-		 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
-		 * to go.
-		 */
-		dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
+	/*
+	 * Called from bsd_init after all provider's *_init() routines have been
+	 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
+	 * to go.
+	 */
+	dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
+	
+	/*
+	 * Add the mach_kernel to the module list for lazy processing
+	 */
+	struct kmod_info fake_kernel_kmod;
+	memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod));
+	
+	strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name));
+	fake_kernel_kmod.id = 1;
+	fake_kernel_kmod.address = g_kernel_kmod_info.address;
+	fake_kernel_kmod.size = g_kernel_kmod_info.size;
+
+	if (dtrace_module_loaded(&fake_kernel_kmod) != 0) {
+		printf("dtrace_postinit: Could not register mach_kernel modctl\n");
+	}
+	
+	(void)OSKextRegisterKextsWithDTrace();
 }
 #undef DTRACE_MAJOR
 
diff --git a/bsd/dev/dtrace/dtrace_glue.c b/bsd/dev/dtrace/dtrace_glue.c
index 6d4586e2c..a046e3eac 100644
--- a/bsd/dev/dtrace/dtrace_glue.c
+++ b/bsd/dev/dtrace/dtrace_glue.c
@@ -227,7 +227,7 @@ done:
 lck_mtx_t cpu_lock;
 lck_mtx_t mod_lock;
 
-cpu_t *cpu_list;
+dtrace_cpu_t *cpu_list;
 cpu_core_t *cpu_core; /* XXX TLB lockdown? */
 
 /*
@@ -267,41 +267,38 @@ PRIV_POLICY_ONLY(void *cr, int priv, int boolean)
 	return kauth_cred_issuser(cr); /* XXX TODO: HAS_PRIVILEGE(cr, priv); */
 }
 
+/* XXX Get around const poisoning using structure assigns */
 gid_t
-crgetgid(const cred_t *cr) { return cr->cr_groups[0]; }
+crgetgid(const cred_t *cr) { cred_t copy_cr = *cr; return kauth_cred_getgid(&copy_cr); }
 
 uid_t
-crgetuid(const cred_t *cr) { return cr->cr_uid; }
+crgetuid(const cred_t *cr) { cred_t copy_cr = *cr; return kauth_cred_getuid(&copy_cr); }
 
 /*
  * "cyclic"
  */
 
 /* osfmk/kern/timer_call.h */
-typedef void            *call_entry_param_t;
-typedef void            (*call_entry_func_t)(
-								call_entry_param_t      param0,
-								call_entry_param_t      param1);
-
-typedef struct call_entry {
-	queue_chain_t       q_link;
-	call_entry_func_t   func;
-	call_entry_param_t  param0;
-	call_entry_param_t  param1;
-	uint64_t            deadline;
-	enum {
-		IDLE,
-		PENDING,
-		DELAYED }         state;
-} call_entry_data_t;
-
-
-typedef struct call_entry   *timer_call_t;
 typedef void                *timer_call_param_t;
 typedef void                (*timer_call_func_t)(
 	timer_call_param_t      param0,
 	timer_call_param_t      param1);
 
+typedef struct timer_call {
+	queue_chain_t       q_link;
+	queue_t             queue;
+	timer_call_func_t   func;
+	timer_call_param_t  param0;
+	timer_call_param_t  param1;
+	decl_simple_lock_data(,lock);
+	uint64_t            deadline;
+	uint64_t            soft_deadline;
+	uint32_t            flags;
+	boolean_t	    async_dequeue;
+} timer_call_data_t;
+
+typedef struct timer_call   *timer_call_t;
+
 extern void
 timer_call_setup(
 	timer_call_t            call,
@@ -312,7 +309,13 @@ extern boolean_t
 timer_call_enter1(
 	timer_call_t            call,
 	timer_call_param_t      param1,
-	uint64_t                deadline);
+	uint64_t                deadline,
+	uint32_t		flags);
+
+#ifndef TIMER_CALL_CRITICAL
+#define TIMER_CALL_CRITICAL 0x1
+#define TIMER_CALL_LOCAL    0x2
+#endif /* TIMER_CALL_CRITICAL */
 
 extern boolean_t
 timer_call_cancel(
@@ -322,7 +325,7 @@ typedef struct wrap_timer_call {
 	cyc_handler_t hdlr;
 	cyc_time_t when;
 	uint64_t deadline;
-	struct call_entry call;
+	struct timer_call call;
 } wrap_timer_call_t;
 
 #define WAKEUP_REAPER 0x7FFFFFFFFFFFFFFFLL
@@ -337,7 +340,7 @@ _timer_call_apply_cyclic( void *ignore, void *vTChdl )
 	(*(wrapTC->hdlr.cyh_func))( wrapTC->hdlr.cyh_arg );
 
 	clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, mach_absolute_time(), &(wrapTC->deadline) );
-	timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline );
+	timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL );
 
 	/* Did timer_call_remove_cyclic request a wakeup call when this timer call was re-armed? */
 	if (wrapTC->when.cyt_interval == WAKEUP_REAPER)
@@ -359,7 +362,7 @@ timer_call_add_cyclic(wrap_timer_call_t *wrapTC, cyc_handler_t *handler, cyc_tim
 	wrapTC->deadline = now;
 
 	clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, now, &(wrapTC->deadline) );
-	timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline );
+	timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL );
 
 	return (cyclic_id_t)wrapTC;
 }
diff --git a/bsd/dev/dtrace/dtrace_subr.c b/bsd/dev/dtrace/dtrace_subr.c
index 3d8e65309..c3a69c48f 100644
--- a/bsd/dev/dtrace/dtrace_subr.c
+++ b/bsd/dev/dtrace/dtrace_subr.c
@@ -49,11 +49,14 @@ int (*dtrace_fasttrap_probe_ptr)(struct regs *);
  * They're assigned in dtrace.c but Darwin never calls them.
  */
 void (*dtrace_cpu_init)(processorid_t);
+#if !defined(__APPLE__)
 void (*dtrace_modload)(struct modctl *);
 void (*dtrace_modunload)(struct modctl *);
-#if defined(__APPLE__)
+#else
+int (*dtrace_modload)(struct kmod_info *);
+int (*dtrace_modunload)(struct kmod_info *);
 void (*dtrace_helpers_cleanup)(proc_t *);
-#endif
+#endif  /*__APPLE__*/
 void (*dtrace_helpers_fork)(proc_t *, proc_t *);
 void (*dtrace_cpustart_init)(void);
 void (*dtrace_cpustart_fini)(void);
diff --git a/bsd/dev/dtrace/fasttrap.c b/bsd/dev/dtrace/fasttrap.c
index 814778290..f75e9df72 100644
--- a/bsd/dev/dtrace/fasttrap.c
+++ b/bsd/dev/dtrace/fasttrap.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -961,7 +961,7 @@ fasttrap_disable_callbacks(void)
 	ASSERT(fasttrap_pid_count > 0);
 	fasttrap_pid_count--;
 	if (fasttrap_pid_count == 0) {
-		cpu_t *cur, *cpu = CPU;
+		dtrace_cpu_t *cur, *cpu = CPU;
 
 		/*
 		 * APPLE NOTE: This loop seems broken, it touches every CPU
@@ -987,7 +987,7 @@ fasttrap_disable_callbacks(void)
 }
 
 /*ARGSUSED*/
-static void
+static int
 fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg, id)
@@ -1016,7 +1016,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 	 * provider can't go away while we're in this code path.
 	 */
 	if (probe->ftp_prov->ftp_retired)
-		return;
+	    return(0);
 
 	/*
 	 * If we can't find the process, it may be that we're in the context of
@@ -1030,11 +1030,11 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 		 * does not return process's with SIDL set, but we always return
 		 * the child process.
 		 */
-		return;
+	    return(0);
 #else
 
 		if ((curproc->p_flag & SFORKING) == 0)
-			return;
+		    return(0);
 
 		lck_mtx_lock(&pidlock);
 		p = prfind(probe->ftp_pid);
@@ -1109,7 +1109,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 			 * drop our reference on the trap table entry.
 			 */
 			fasttrap_disable_callbacks();
-			return;
+			return(0);
 		}
 	}
 
@@ -1117,6 +1117,7 @@ fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 	sprunlock(p);
 
 	probe->ftp_enabled = 1;
+	return (0);
 }
 
 /*ARGSUSED*/
@@ -2155,9 +2156,6 @@ fasttrap_meta_create_probe(void *arg, void *parg,
 		 * Both 32 & 64 bit want to go back one byte, to point at the first NOP
 		 */
 		tp->ftt_pc = dhpb->dthpb_base + (int64_t)dhpb->dthpb_offs[i] - 1;
-#elif defined(__ppc__)
-		/* All PPC probes are zero offset. */
-		tp->ftt_pc = dhpb->dthpb_base + (int64_t)dhpb->dthpb_offs[i];
 #else
 #error "Architecture not supported"
 #endif
@@ -2199,9 +2197,6 @@ fasttrap_meta_create_probe(void *arg, void *parg,
 		 * Both 32 & 64 bit want to go forward two bytes, to point at a single byte nop.
 		 */
 		tp->ftt_pc = dhpb->dthpb_base + (int64_t)dhpb->dthpb_enoffs[j] + 2;
-#elif defined(__ppc__)
-		/* All PPC is-enabled probes are zero offset. */
-		tp->ftt_pc = dhpb->dthpb_base + (int64_t)dhpb->dthpb_enoffs[j];
 #else
 #error "Architecture not supported"
 #endif
@@ -2294,7 +2289,8 @@ fasttrap_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *
 
 		probe = kmem_alloc(size, KM_SLEEP);
 
-		if (copyin(arg, probe, size) != 0) {
+		if (copyin(arg, probe, size) != 0 ||
+		    probe->ftps_noffs != noffs) {
 			kmem_free(probe, size);
 			return (EFAULT);
 		}
diff --git a/bsd/dev/dtrace/fbt.c b/bsd/dev/dtrace/fbt.c
index 94e15da00..5a6570ed1 100644
--- a/bsd/dev/dtrace/fbt.c
+++ b/bsd/dev/dtrace/fbt.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -52,16 +52,10 @@
 
 /* #include <machine/trap.h> */
 struct savearea_t; /* Used anonymously */
-typedef kern_return_t (*perfCallback)(int, struct savearea_t *, int, int);
+typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, int);
 
-#if defined (__ppc__) || defined (__ppc64__)
-extern perfCallback tempDTraceTrapHook, tempDTraceIntHook;
-extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int);
-extern kern_return_t fbt_perfIntCallback(int, struct savearea_t *, int, int);
-#else
 extern perfCallback tempDTraceTrapHook;
-extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int);
-#endif
+extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *);
 
 #define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
 #define	FBT_PROBETAB_SIZE	0x8000		/* 32k entries -- 128K total */
@@ -111,25 +105,42 @@ fbt_destroy(void *arg, dtrace_id_t id, void *parg)
 }
 
 /*ARGSUSED*/
-static void
+int
 fbt_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	fbt_probe_t *fbt = parg;
-	struct modctl *ctl = fbt->fbtp_ctl;
+	struct modctl *ctl = NULL;
+
+    for (; fbt != NULL; fbt = fbt->fbtp_next) {
 
-#if defined (__ppc__) || defined (__ppc64__)
-	dtrace_casptr(&tempDTraceIntHook, NULL, fbt_perfIntCallback);
-	if (tempDTraceIntHook != (perfCallback)fbt_perfIntCallback) {
+	ctl = fbt->fbtp_ctl;
+	
+	if (!ctl->mod_loaded) {
 		if (fbt_verbose) {
-			cmn_err(CE_NOTE, "fbt_enable is failing for probe %s "
-			    "in module %s: tempDTraceIntHook already occupied.",
+			cmn_err(CE_NOTE, "fbt is failing for probe %s "
+			    "(module %s unloaded)",
 			    fbt->fbtp_name, ctl->mod_modname);
 		}
-		return;
+
+		continue;
 	}
-#endif
-	
+
+	/*
+	 * Now check that our modctl has the expected load count.  If it
+	 * doesn't, this module must have been unloaded and reloaded -- and
+	 * we're not going to touch it.
+	 */
+	if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) {
+		if (fbt_verbose) {
+			cmn_err(CE_NOTE, "fbt is failing for probe %s "
+			    "(module %s reloaded)",
+			    fbt->fbtp_name, ctl->mod_modname);
+		}
+
+		continue;
+	}	
+
 	dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
 	if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
 		if (fbt_verbose) {
@@ -137,14 +148,21 @@ fbt_enable(void *arg, dtrace_id_t id, void *parg)
 			    "in module %s: tempDTraceTrapHook already occupied.",
 			    fbt->fbtp_name, ctl->mod_modname);
 		}
-		return;
+		continue;
 	}
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
+	if (fbt->fbtp_currentval != fbt->fbtp_patchval) {
 		(void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, 
 								sizeof(fbt->fbtp_patchval));
-		
-	dtrace_membar_consumer();
+                fbt->fbtp_currentval = fbt->fbtp_patchval;
+		ctl->mod_nenabled++;
+	}
+
+    }
+    
+    dtrace_membar_consumer();
+    
+    return (0);
 }
 
 /*ARGSUSED*/
@@ -153,11 +171,22 @@ fbt_disable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	fbt_probe_t *fbt = parg;
+	struct modctl *ctl = NULL;
+
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
+	    ctl = fbt->fbtp_ctl;
+	    
+	    if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
+		continue;
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
+	    if (fbt->fbtp_currentval != fbt->fbtp_savedval) {
 		(void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, 
 								sizeof(fbt->fbtp_savedval));
-		
+		fbt->fbtp_currentval = fbt->fbtp_savedval;
+		ASSERT(ctl->mod_nenabled > 0);
+		ctl->mod_nenabled--;
+	    }
+	}
 	dtrace_membar_consumer();
 }
 
@@ -167,11 +196,20 @@ fbt_suspend(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	fbt_probe_t *fbt = parg;
+	struct modctl *ctl = NULL;
 
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
-		(void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, 
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
+	    ctl = fbt->fbtp_ctl;
+
+	    ASSERT(ctl->mod_nenabled > 0);
+	    if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
+		continue;
+
+	    (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, 
 								sizeof(fbt->fbtp_savedval));
-		
+	    fbt->fbtp_currentval = fbt->fbtp_savedval;
+	}
+	
 	dtrace_membar_consumer();
 }
 
@@ -181,34 +219,30 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	fbt_probe_t *fbt = parg;
-	struct modctl *ctl = fbt->fbtp_ctl;
+	struct modctl *ctl = NULL;
 
-#if defined (__ppc__) || defined (__ppc64__)
-	dtrace_casptr(&tempDTraceIntHook, NULL, fbt_perfIntCallback);
-	if (tempDTraceIntHook != (perfCallback)fbt_perfIntCallback) {
-		if (fbt_verbose) {
-			cmn_err(CE_NOTE, "fbt_enable is failing for probe %s "
-			    "in module %s: tempDTraceIntHook already occupied.",
-			    fbt->fbtp_name, ctl->mod_modname);
-		}
-		return;
-	}
-#endif
+	for (; fbt != NULL; fbt = fbt->fbtp_next) {
+	    ctl = fbt->fbtp_ctl;
+
+	    ASSERT(ctl->mod_nenabled > 0);
+	    if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
+		continue;
 	
-	dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
-	if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
+	    dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
+	    if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
 		if (fbt_verbose) {
 			cmn_err(CE_NOTE, "fbt_resume is failing for probe %s "
 			    "in module %s: tempDTraceTrapHook already occupied.",
 			    fbt->fbtp_name, ctl->mod_modname);
 		}
 		return;
-	}
+	    }
 	
-	for (; fbt != NULL; fbt = fbt->fbtp_next)
-		(void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, 
+	    (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, 
 								sizeof(fbt->fbtp_patchval));
-		
+  	    fbt->fbtp_currentval = fbt->fbtp_patchval;
+	}
+	
 	dtrace_membar_consumer();
 }
 
@@ -422,8 +456,8 @@ static struct cdevsw fbt_cdevsw =
 	0					/* type */
 };
 
-static int gDisableFBT = 0;
-struct modctl g_fbt_kernctl;
+int gIgnoreFBTBlacklist = 0;
+static int gFBTInited = 0;
 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
 #undef kmem_free /* from its binding to dt_kmem_free glue */
 #include <vm/vm_kern.h>
@@ -431,66 +465,22 @@ struct modctl g_fbt_kernctl;
 void
 fbt_init( void )
 {
-
-	PE_parse_boot_argn("DisableFBT", &gDisableFBT, sizeof (gDisableFBT));
-
-	if (0 == gDisableFBT)
+	if (0 == gFBTInited)
 	{
 		int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw);
-		unsigned long size = 0, header_size, round_size;
-	   	kern_return_t ret;
-		void *p, *q;
 		
 		if (majdevno < 0) {
 			printf("fbt_init: failed to allocate a major number!\n");
 			return;
 		}
-
-		/*
-		 * Capture the kernel's mach_header in its entirety and the contents of
-		 * its LINKEDIT segment (and only that segment). This is sufficient to
-		 * build all the fbt probes lazily the first time a client looks to
-		 * the fbt provider. Remeber these on the global struct modctl g_fbt_kernctl.
-		 */
-		header_size = sizeof(kernel_mach_header_t) + _mh_execute_header.sizeofcmds;
-		p = getsegdatafromheader(&_mh_execute_header, SEG_LINKEDIT, &size);
-
-        round_size = round_page(header_size + size);
-		/* "q" will accomodate copied kernel_mach_header_t, its load commands, and LINKEIT segment. */
-		ret = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&q, round_size);
-
-		if (p && (ret == KERN_SUCCESS)) {
-			kernel_segment_command_t *sgp;
-
-			bcopy( (void *)&_mh_execute_header, q, header_size);
-			bcopy( p, (char *)q + header_size, size);
-
-			sgp = getsegbynamefromheader(q, SEG_LINKEDIT);
-
-			if (sgp) {
-				sgp->vmaddr = (uintptr_t)((char *)q + header_size);
-				g_fbt_kernctl.address = (vm_address_t)q;
-				g_fbt_kernctl.size = header_size + size;
-			} else {
-				kmem_free(kernel_map, (vm_offset_t)q, round_size);
-				g_fbt_kernctl.address = (vm_address_t)NULL;
-				g_fbt_kernctl.size = 0;
-			}
-		} else {
-			if (ret == KERN_SUCCESS)
-				kmem_free(kernel_map, (vm_offset_t)q, round_size);
-			g_fbt_kernctl.address = (vm_address_t)NULL;
-			g_fbt_kernctl.size = 0;
-		}
-
-		strncpy((char *)&(g_fbt_kernctl.mod_modname), "mach_kernel", KMOD_MAX_NAME);
-		((char *)&(g_fbt_kernctl.mod_modname))[KMOD_MAX_NAME -1] = '\0';
+		
+		PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
 
 		fbt_attach( (dev_info_t	*)(uintptr_t)majdevno, DDI_ATTACH );
-
-		gDisableFBT = 1; /* Ensure this initialization occurs just one time. */
+		
+		gFBTInited = 1; /* Ensure this initialization occurs just one time. */
 	}
 	else
-		printf("fbt_init: DisableFBT non-zero, no FBT probes will be provided.\n");
+		panic("fbt_init: called twice!\n");
 }
 #undef FBT_MAJOR
diff --git a/bsd/dev/dtrace/lockstat.c b/bsd/dev/dtrace/lockstat.c
index 0f9d6d4ff..a9f003e65 100644
--- a/bsd/dev/dtrace/lockstat.c
+++ b/bsd/dev/dtrace/lockstat.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -59,10 +59,6 @@
 #define	NOP	0x90
 #define	RET	0xc3
 #define LOCKSTAT_AFRAMES 1
-#elif	__ppc__
-#define	NOP	0x60000000
-#define RET	0x4e800020	/* blr */
-#define LOCKSTAT_AFRAMES 2
 #else
 #error "not ported to this architecture"
 #endif
@@ -188,11 +184,6 @@ void lockstat_hot_patch(boolean_t active)
 		instr = (active ? NOP : RET );
 		(void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i]), 
 								sizeof(instr));
-#endif
-#ifdef __ppc__
-		uint32_t instr;
-		instr = (active ? NOP : RET );
-		(void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i]), sizeof(instr));
 #endif
 	}
 }
@@ -206,7 +197,7 @@ static dev_info_t	*lockstat_devi;	/* saved in xxattach() for xxinfo() */
 static dtrace_provider_id_t lockstat_id;
 
 /*ARGSUSED*/
-static void
+static int
 lockstat_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg) /* __APPLE__ */
@@ -220,6 +211,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg)
 
 	lockstat_hot_patch(TRUE);
 	membar_producer();
+	return(0);
 
 }
 
diff --git a/bsd/dev/dtrace/profile_prvd.c b/bsd/dev/dtrace/profile_prvd.c
index a74254c5c..69f3aadd5 100644
--- a/bsd/dev/dtrace/profile_prvd.c
+++ b/bsd/dev/dtrace/profile_prvd.c
@@ -49,6 +49,7 @@
 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
 #include <kern/cpu_data.h>
 #include <kern/thread.h>
+#include <kern/assert.h>
 #include <mach/thread_status.h>
 
 #include <sys/param.h>
@@ -65,9 +66,9 @@
 
 #include <sys/dtrace_glue.h>
 
-#if defined(__ppc__) || defined(__ppc64__)
-extern struct savearea *find_kern_regs(thread_t);
-#elif defined(__i386__) || defined(__x86_64__)
+#include <machine/pal_routines.h>
+
+#if defined(__i386__) || defined(__x86_64__)
 extern x86_saved_state_t *find_kern_regs(thread_t);
 #else
 #error Unknown architecture
@@ -127,9 +128,7 @@ static dtrace_provider_id_t profile_id;
 
 #else /* is Mac OS X */
 
-#if defined(__ppc__) || defined(__ppc64__)
-#define PROF_ARTIFICIAL_FRAMES 8
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 #define PROF_ARTIFICIAL_FRAMES  9
 #else
 #error Unknown architecture
@@ -185,7 +184,6 @@ static int profile_ticks[] = {
 static uint32_t profile_max;		/* maximum number of profile probes */
 static uint32_t profile_total;	/* current number of profile probes */
 
-
 static void
 profile_fire(void *arg)
 {
@@ -200,22 +198,7 @@ profile_fire(void *arg)
 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
 	    CPU->cpu_profile_upc, late, 0, 0);
 #else
-#if defined(__ppc__) || defined(__ppc64__)
-	{
-	struct savearea *sv = find_kern_regs(current_thread());
-
-	if (sv) {
-		if (USERMODE(sv->save_srr1)) {
-			dtrace_probe(prof->prof_id, 0x0, sv->save_srr0, late, 0, 0);
-		} else {
-			dtrace_probe(prof->prof_id, sv->save_srr0, 0x0, late, 0, 0);
-		}
-	} else {
-		dtrace_probe(prof->prof_id, 0xcafebabe,
-	    	0x0, late, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
-	}
-	}
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
 
 	if (NULL != kern_regs) {
@@ -228,6 +211,7 @@ profile_fire(void *arg)
 #error Unknown arch
 #endif
 	} else {
+		pal_register_cache_state(current_thread(), VALID);
 		/* Possibly a user interrupt */
 		x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
 
@@ -260,22 +244,7 @@ profile_tick(void *arg)
 	dtrace_probe(prof->prof_id, CPU->cpu_profile_pc,
 	    CPU->cpu_profile_upc, 0, 0, 0);
 #else
-#if defined(__ppc__) || defined(__ppc64__)
-	{
-	struct savearea *sv = find_kern_regs(current_thread());
-
-	if (sv) {
-		if (USERMODE(sv->save_srr1)) {
-			dtrace_probe(prof->prof_id, 0x0, sv->save_srr0, 0, 0, 0);
-		} else {
-			dtrace_probe(prof->prof_id, sv->save_srr0, 0x0, 0, 0, 0);
-		}
-	} else {
-		dtrace_probe(prof->prof_id, 0xcafebabe,
-	    	0x0, 0, 0, 0); /* XXX_BOGUS also see profile_usermode() below. */
-	}
-	}
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	x86_saved_state_t *kern_regs = find_kern_regs(current_thread());
 
 	if (NULL != kern_regs) {
@@ -288,6 +257,7 @@ profile_tick(void *arg)
 #error Unknown arch
 #endif
 	} else {
+		pal_register_cache_state(current_thread(), VALID);
 		/* Possibly a user interrupt */
 		x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
 
@@ -550,7 +520,7 @@ profile_destroy(void *arg, dtrace_id_t id, void *parg)
 
 /*ARGSUSED*/
 static void
-profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
+profile_online(void *arg, dtrace_cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
 {
 #pragma unused(cpu) /* __APPLE__ */
 	profile_probe_t *prof = arg;
@@ -580,7 +550,7 @@ profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
 
 /*ARGSUSED*/
 static void
-profile_offline(void *arg, cpu_t *cpu, void *oarg)
+profile_offline(void *arg, dtrace_cpu_t *cpu, void *oarg)
 {
 	profile_probe_percpu_t *pcpu = oarg;
 
@@ -593,7 +563,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg)
 }
 
 /*ARGSUSED*/
-static void
+static int
 profile_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id) /* __APPLE__ */
@@ -636,6 +606,7 @@ profile_enable(void *arg, dtrace_id_t id, void *parg)
 		prof->prof_cyclic = (cyclic_id_t)cyclic_add_omni(&omni); /* cast puns cyclic_id_list_t with cyclic_id_t */
 	}
 #endif /* __APPLE__ */
+	return(0);
 }
 
 /*ARGSUSED*/
diff --git a/bsd/dev/dtrace/sdt.c b/bsd/dev/dtrace/sdt.c
index 725ab5585..bca167f01 100644
--- a/bsd/dev/dtrace/sdt.c
+++ b/bsd/dev/dtrace/sdt.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -46,18 +46,12 @@
 #include <sys/dtrace_glue.h>
 
 #include <sys/sdt_impl.h>
+extern int dtrace_kernel_symbol_mode;
 
 struct savearea_t; /* Used anonymously */
-typedef kern_return_t (*perfCallback)(int, struct savearea_t *, int, int);
+typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, int);
 
-#if defined (__ppc__) || defined (__ppc64__)
-extern perfCallback tempDTraceTrapHook, tempDTraceIntHook;
-extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int);
-extern kern_return_t fbt_perfIntCallback(int, struct savearea_t *, int, int);
-
-#define	SDT_PATCHVAL	0x7c810808
-#define SDT_AFRAMES     6
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 extern perfCallback tempDTraceTrapHook;
 extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int);
 
@@ -86,7 +80,7 @@ static void
 __sdt_provide_module(void *arg, struct modctl *ctl)
 {
 #pragma unused(arg)
-	struct module *mp = (struct module *)ctl->address;
+	struct module *mp = (struct module *)ctl->mod_address;
 	char *modname = ctl->mod_modname;
 	sdt_probedesc_t *sdpd;
 	sdt_probe_t *sdp, *old;
@@ -220,14 +214,13 @@ sdt_destroy(void *arg, dtrace_id_t id, void *parg)
 }
 
 /*ARGSUSED*/
-static void
+static int
 sdt_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	sdt_probe_t *sdp = parg;
 	struct modctl *ctl = sdp->sdp_ctl;
 
-#if !defined(__APPLE__)
 	ctl->mod_nenabled++;
 
 	/*
@@ -256,20 +249,7 @@ sdt_enable(void *arg, dtrace_id_t id, void *parg)
 		}
 		goto err;
 	}
-#endif /* __APPLE__ */
 
-#if defined (__ppc__) || defined (__ppc64__)
-	dtrace_casptr(&tempDTraceIntHook, NULL, fbt_perfIntCallback);
-	if (tempDTraceIntHook != (perfCallback)fbt_perfIntCallback) {
-		if (sdt_verbose) {
-			cmn_err(CE_NOTE, "sdt_enable is failing for probe %s "
-			    "in module %s: tempDTraceIntHook already occupied.",
-			    sdp->sdp_name, ctl->mod_modname);
-		}
-		return;
-	}
-#endif
-	
 	dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
 	if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
 		if (sdt_verbose) {
@@ -277,7 +257,7 @@ sdt_enable(void *arg, dtrace_id_t id, void *parg)
 			    "in module %s: tempDTraceTrapHook already occupied.",
 			    sdp->sdp_name, ctl->mod_modname);
 		}
-		return;
+		return (0);
 	}
 
 	while (sdp != NULL) {
@@ -285,10 +265,9 @@ sdt_enable(void *arg, dtrace_id_t id, void *parg)
 		                       (vm_size_t)sizeof(sdp->sdp_patchval));
 		sdp = sdp->sdp_next;
 	}
-#if !defined(__APPLE__)
+
 err:
-#endif /* __APPLE__ */
-	;
+	return (0);
 }
 
 /*ARGSUSED*/
@@ -297,14 +276,12 @@ sdt_disable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg,id)
 	sdt_probe_t *sdp = parg;
-#if !defined(__APPLE__)
 	struct modctl *ctl = sdp->sdp_ctl;
 
 	ctl->mod_nenabled--;
 
 	if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
 		goto err;
-#endif /* __APPLE__ */
 
 	while (sdp != NULL) {
 		(void)ml_nofault_copy( (vm_offset_t)&sdp->sdp_savedval, (vm_offset_t)sdp->sdp_patchpoint, 
@@ -312,19 +289,10 @@ sdt_disable(void *arg, dtrace_id_t id, void *parg)
 		sdp = sdp->sdp_next;
 	}
 
-#if !defined(__APPLE__)
 err:
-#endif /* __APPLE__ */	
 	;
 }
 
-static uint64_t
-sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
-{
-#pragma unused(arg,id,parg)	/* __APPLE__ */
-	return dtrace_getarg(argno, aframes);
-}
-
 static dtrace_pops_t sdt_pops = {
 	NULL,
 	sdt_provide_module,
@@ -561,107 +529,116 @@ void sdt_init( void )
 		}
 
 		if (KERNEL_MAGIC != _mh_execute_header.magic) {
-        	g_sdt_kernctl.address = (vm_address_t)NULL;
-        	g_sdt_kernctl.size = 0;
+			g_sdt_kernctl.mod_address = (vm_address_t)NULL;
+			g_sdt_kernctl.mod_size = 0;
 		} else {
-		kernel_mach_header_t        *mh;
-    		struct load_command         *cmd;
-    		kernel_segment_command_t    *orig_ts = NULL, *orig_le = NULL;
-    		struct symtab_command       *orig_st = NULL;
-    		kernel_nlist_t		    *sym = NULL;
-    		char                        *strings;
-    		unsigned int 		    i;
-
-		g_sdt_mach_module.sdt_nprobes = 0;
-		g_sdt_mach_module.sdt_probes = NULL;
-
-        	g_sdt_kernctl.address = (vm_address_t)&g_sdt_mach_module;
-        	g_sdt_kernctl.size = 0;
-		strncpy((char *)&(g_sdt_kernctl.mod_modname), "mach_kernel", KMOD_MAX_NAME);
-
-		mh = &_mh_execute_header;
-    		cmd = (struct load_command*) &mh[1];
-    		for (i = 0; i < mh->ncmds; i++) {
-        		if (cmd->cmd == LC_SEGMENT_KERNEL) {
-            		kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
-
-            		if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
-                		orig_ts = orig_sg;
-            		else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
-                		orig_le = orig_sg;
-            		else if (LIT_STRNEQL(orig_sg->segname, ""))
-                		orig_ts = orig_sg; /* kexts have a single unnamed segment */
-        		}
-        		else if (cmd->cmd == LC_SYMTAB)
-            		orig_st = (struct symtab_command *) cmd;
-	
-        		cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
-    		}
-	
-    		if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
-        		return;
-
-		sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
-		strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-
-    		for (i = 0; i < orig_st->nsyms; i++) {
-        		uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
-        		char *name = strings + sym[i].n_un.n_strx;
+			kernel_mach_header_t        *mh;
+			struct load_command         *cmd;
+			kernel_segment_command_t    *orig_ts = NULL, *orig_le = NULL;
+			struct symtab_command       *orig_st = NULL;
+			kernel_nlist_t		    *sym = NULL;
+			char                        *strings;
+			unsigned int 		    i;
+			
+			g_sdt_mach_module.sdt_nprobes = 0;
+			g_sdt_mach_module.sdt_probes = NULL;
+			
+			g_sdt_kernctl.mod_address = (vm_address_t)&g_sdt_mach_module;
+			g_sdt_kernctl.mod_size = 0;
+			strncpy((char *)&(g_sdt_kernctl.mod_modname), "mach_kernel", KMOD_MAX_NAME);
+			
+			g_sdt_kernctl.mod_next = NULL;
+			g_sdt_kernctl.mod_stale = NULL;
+			g_sdt_kernctl.mod_id = 0;
+			g_sdt_kernctl.mod_loadcnt = 1;
+			g_sdt_kernctl.mod_loaded = 1;
+			g_sdt_kernctl.mod_flags = 0;
+			g_sdt_kernctl.mod_nenabled = 0;
+			
+			mh = &_mh_execute_header;
+			cmd = (struct load_command*) &mh[1];
+			for (i = 0; i < mh->ncmds; i++) {
+				if (cmd->cmd == LC_SEGMENT_KERNEL) {
+					kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
+					
+					if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
+						orig_ts = orig_sg;
+					else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
+						orig_le = orig_sg;
+					else if (LIT_STRNEQL(orig_sg->segname, ""))
+						orig_ts = orig_sg; /* kexts have a single unnamed segment */
+				}
+				else if (cmd->cmd == LC_SYMTAB)
+					orig_st = (struct symtab_command *) cmd;
+				
+				cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
+			}
+			
+			if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
+				return;
+			
+			sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
+			strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
+			
+			for (i = 0; i < orig_st->nsyms; i++) {
+				uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
+				char *name = strings + sym[i].n_un.n_strx;
 				const char *prev_name;
 				unsigned long best;
 				unsigned int j;
-
-        		/* Check that the symbol is a global and that it has a name. */
-        		if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
-            		continue;
-
-        		if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
-            		continue;
-
-        		/* Lop off omnipresent leading underscore. */
-        		if (*name == '_')
-            		name += 1;
-
-				if (strstr(name, DTRACE_PROBE_PREFIX)) {
+				
+				/* Check that the symbol is a global and that it has a name. */
+				if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
+					continue;
+				
+				if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
+					continue;
+				
+				/* Lop off omnipresent leading underscore. */
+				if (*name == '_')
+					name += 1;
+				
+				if (strncmp(name, DTRACE_PROBE_PREFIX, sizeof(DTRACE_PROBE_PREFIX) - 1) == 0) {
 					sdt_probedesc_t *sdpd = kmem_alloc(sizeof(sdt_probedesc_t), KM_SLEEP);
 					int len = strlen(name) + 1;
-
+					
 					sdpd->sdpd_name = kmem_alloc(len, KM_SLEEP);
 					strncpy(sdpd->sdpd_name, name, len); /* NUL termination is ensured. */
-
+					
 					prev_name = "<unknown>";
 					best = 0;
 					
-					/* Avoid shadow build warnings */
+					/*
+					 * Find the symbol immediately preceding the sdt probe site just discovered,
+					 * that symbol names the function containing the sdt probe.
+					 */
 					for (j = 0; j < orig_st->nsyms; j++) {
 						uint8_t jn_type = sym[j].n_type & (N_TYPE | N_EXT);
 						char *jname = strings + sym[j].n_un.n_strx;
-
+						
 						if (((N_SECT | N_EXT) != jn_type && (N_ABS | N_EXT) != jn_type))
 							continue;
-
+						
 						if (0 == sym[j].n_un.n_strx) /* iff a null, "", name. */
 							continue;
-
+						
 						if (*jname == '_')
 							jname += 1;
-						if (strstr(jname, DTRACE_PROBE_PREFIX))
-							continue;
-
+						
 						if (*(unsigned long *)sym[i].n_value <= (unsigned long)sym[j].n_value)
 							continue;
-
+						
 						if ((unsigned long)sym[j].n_value > best) {
 							best = (unsigned long)sym[j].n_value;
 							prev_name = jname;
 						}
 					}
-
+					
 					sdpd->sdpd_func = kmem_alloc((len = strlen(prev_name) + 1), KM_SLEEP);
 					strncpy(sdpd->sdpd_func, prev_name, len); /* NUL termination is ensured. */
-
+					
 					sdpd->sdpd_offset = *(unsigned long *)sym[i].n_value;
-
+					
 					sdpd->sdpd_next = g_sdt_mach_module.sdt_probes;
 					g_sdt_mach_module.sdt_probes = sdpd;
 				} else {
@@ -669,9 +646,9 @@ void sdt_init( void )
 				}
 			}
 		}
-
+		
 		sdt_attach( (dev_info_t	*)(uintptr_t)majdevno, DDI_ATTACH );
-
+		
 		gSDTInited = 1;
 	} else
 		panic("sdt_init: called twice!\n");
@@ -683,19 +660,32 @@ void sdt_init( void )
 void
 sdt_provide_module(void *arg, struct modctl *ctl)
 {
-#pragma unused(ctl)
 #pragma unused(arg)
-    __sdt_provide_module(arg, &g_sdt_kernctl);
-
-	sdt_probedesc_t *sdpd = g_sdt_mach_module.sdt_probes;
-	while (sdpd) {
-		sdt_probedesc_t *this_sdpd = sdpd;
-		kmem_free((void *)sdpd->sdpd_name, strlen(sdpd->sdpd_name) + 1);
-		kmem_free((void *)sdpd->sdpd_func, strlen(sdpd->sdpd_func) + 1);
-		sdpd = sdpd->sdpd_next;
-		kmem_free((void *)this_sdpd, sizeof(sdt_probedesc_t));
+	ASSERT(ctl != NULL);
+	ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
+	lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+	
+	if (MOD_SDT_DONE(ctl))
+		return;
+		
+	if (MOD_IS_MACH_KERNEL(ctl)) {
+		__sdt_provide_module(arg, &g_sdt_kernctl);
+		
+		sdt_probedesc_t *sdpd = g_sdt_mach_module.sdt_probes;
+		while (sdpd) {
+			sdt_probedesc_t *this_sdpd = sdpd;
+			kmem_free((void *)sdpd->sdpd_name, strlen(sdpd->sdpd_name) + 1);
+			kmem_free((void *)sdpd->sdpd_func, strlen(sdpd->sdpd_func) + 1);
+			sdpd = sdpd->sdpd_next;
+			kmem_free((void *)this_sdpd, sizeof(sdt_probedesc_t));
+		}
+		g_sdt_mach_module.sdt_probes = NULL;
+	} else {
+		/* FIXME -- sdt in kext not yet supported */
 	}
-	g_sdt_mach_module.sdt_probes = NULL;
+	
+	/* Need to mark this module as completed */
+	ctl->mod_flags |= MODCTL_SDT_PROBES_PROVIDED;
 }
 
 #endif /* __APPLE__ */
diff --git a/bsd/dev/dtrace/sdt_subr.c b/bsd/dev/dtrace/sdt_subr.c
index 90ea1331a..891207713 100644
--- a/bsd/dev/dtrace/sdt_subr.c
+++ b/bsd/dev/dtrace/sdt_subr.c
@@ -92,6 +92,7 @@ sdt_provider_t sdt_providers[] = {
 	{ "proc", "__proc____", &stab_attr, 0 },
 	{ "io", "__io____", &stab_attr, 0 },
 	{ "ip", "__ip____", &stab_attr, 0 },
+	{ "tcp", "__tcp____", &stab_attr, 0 },
 	{ "mib", "__mib____", &stab_attr, 0 },
 	{ "fsinfo", "__fsinfo____", &fsinfo_attr, 0 },
 	{ "nfsv3", "__nfsv3____", &stab_attr, 0 },
@@ -808,21 +809,66 @@ sdt_argdesc_t sdt_args[] = {
 	    "nfsv4cbinfo_t *" },
 	{ "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *", NULL },
 
-	{ "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
-	{ "ip", "send", 1, 1, "conn_t *", "csinfo_t *" },
+	{ "ip", "send", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "ip", "send", 1, 1, "struct inpcb *", "csinfo_t *" },
 	{ "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
-	{ "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
-	{ "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" },
-	{ "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" },
-	{ "ip", "send", 6, 6, "int", NULL }, /* used by __dtrace_ipsr_ill_t */
-	{ "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" },
-	{ "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" },
+	{ "ip", "send", 3, 3, "struct ifnet *", "ifinfo_t *" },
+	{ "ip", "send", 4, 4, "struct ip *", "ipv4info_t *" },
+	{ "ip", "send", 5, 5, "struct ip6_hdr *", "ipv6info_t *" },
+	{ "ip", "receive", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "ip", "receive", 1, 1, "struct inpcb *", "csinfo_t *" },
 	{ "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
-	{ "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" },
-	{ "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" },
-	{ "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" },
-	{ "ip", "receive", 6, 6, "int", NULL }, /* used by __dtrace_ipsr_ill_t */
-
+	{ "ip", "receive", 3, 3, "struct ifnet *", "ifinfo_t *" },
+	{ "ip", "receive", 4, 4, "struct ip *", "ipv4info_t *" },
+	{ "ip", "receive", 5, 5, "struct ip6_hdr *", "ipv6info_t *" },
+
+	{ "tcp", "connect-established", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "connect-established", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "connect-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "connect-established", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "connect-established", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "connect-refused", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "connect-refused", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "connect-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "connect-refused", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "connect-refused", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "connect-request", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "connect-request", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "connect-request", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "connect-request", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "connect-request", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "accept-established", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "accept-established", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "accept-established", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "accept-established", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "accept-established", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "accept-refused", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "accept-refused", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "accept-refused", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "accept-refused", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "accept-refused", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "state-change", 0, 0, "void", "void" },
+	{ "tcp", "state-change", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "state-change", 2, 2, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "state-change", 3, 3, "int32_t", "tcpnsinfo_t *" },
+	{ "tcp", "send", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "send", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "send", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "send", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "send", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "receive", 0, 0, "struct mbuf *", "pktinfo_t *" },
+	{ "tcp", "receive", 1, 1, "struct inpcb *", "csinfo_t *" },
+	{ "tcp", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" },
+	{ "tcp", "receive", 3, 3, "struct tcpcb *", "tcpsinfo_t *" },
+	{ "tcp", "receive", 4, 4, "struct tcphdr *", "tcpinfo_t *" },
+	{ "tcp", "cc", 0, 0, "struct mbuf *", "pktinfo_t *"},
+	{ "tcp", "cc", 1, 1, "struct inpcb *", "csinfo_t *"},
+	{ "tcp", "cc", 2, 2, "struct tcpcb *", "tcpsinfo_t *"},
+	{ "tcp", "cc", 3, 3, "struct tcphdr *", "tcpinfo_t *"},
+	{ "tcp", "cc", 4, 4, "int32_t", "tcpccevent_t *"},
+	{ "tcp", "iaj", 0, 0, "struct tcpcb *", "tcpsinfo_t *"},
+	{ "tcp", "iaj", 1, 1, "uint32_t", NULL},
+	{ "tcp", "iaj", 2, 2, "uint32_t", NULL},
 	{ "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" },
 	{ "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" },
 
diff --git a/bsd/dev/dtrace/systrace.c b/bsd/dev/dtrace/systrace.c
index 74ab8a105..271b2a0e1 100644
--- a/bsd/dev/dtrace/systrace.c
+++ b/bsd/dev/dtrace/systrace.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -73,10 +73,9 @@ typedef x86_saved_state_t savearea_t;
 #include <sys/conf.h>
 #include <sys/user.h>
 
-#if defined (__ppc__) || defined (__ppc64__)
-#define	SYSTRACE_ARTIFICIAL_FRAMES	3
-#define MACHTRACE_ARTIFICIAL_FRAMES 4
-#elif defined(__i386__) || defined (__x86_64__)
+#include <machine/pal_routines.h>
+
+#if defined(__i386__) || defined (__x86_64__)
 #define	SYSTRACE_ARTIFICIAL_FRAMES	2
 #define MACHTRACE_ARTIFICIAL_FRAMES 3
 #else
@@ -107,7 +106,6 @@ systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
 #pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
 }
 
-
 int32_t
 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
 {
@@ -122,24 +120,10 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
 #endif
 	syscall_arg_t *ip = (syscall_arg_t *)uap;
 
-#if defined (__ppc__) || defined (__ppc64__)
-	{
-		savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
-
-		flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
-
-		if (flavor)
-			code = regs->save_r3;
-		else
-			code = regs->save_r0;
-
-		/*
-		 * FIXME: unix_syscall screens for "unsafe calls" and instead calls nosys(), *not* sysent[code] !
-		 */
-	}
-#elif defined(__i386__) || defined (__x86_64__)
+#if defined(__i386__) || defined (__x86_64__)
 #pragma unused(flavor)
 	{
+		pal_register_cache_state(current_thread(), VALID);
 		x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
 
 		if (is_saved_state64(tagged_regs)) {
@@ -482,7 +466,7 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg)
 }
 
 /*ARGSUSED*/
-static void
+static int
 systrace_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg) /* __APPLE__ */
@@ -505,7 +489,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
 
 	if (enabled) {
 		ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
-		return;
+		return(0);
 	}
 
 	(void) casptr(&sysent[sysnum].sy_callc,
@@ -516,6 +500,7 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg)
 	    (void *)systrace_sysent32[sysnum].stsy_underlying,
 	    (void *)dtrace_systrace_syscall32);
 #endif
+	return (0);
 }
 
 /*ARGSUSED*/
@@ -740,17 +725,13 @@ typedef void    mach_munge_t(const void *, void *);
 typedef struct {
         int                     mach_trap_arg_count;
         int                     (*mach_trap_function)(void);
-#if defined(__i386__)
-        boolean_t               mach_trap_stack;
-#else
+#if 0 /* no active architectures use mungers for mach traps */
         mach_munge_t            *mach_trap_arg_munge32; /* system call arguments for 32-bit */
         mach_munge_t            *mach_trap_arg_munge64; /* system call arguments for 64-bit */
 #endif
-#if     !MACH_ASSERT
-        int                     mach_trap_unused;
-#else
+#if     MACH_ASSERT
         const char*             mach_trap_name;
-#endif /* !MACH_ASSERT */
+#endif /* MACH_ASSERT */
 } mach_trap_t;
 
 extern mach_trap_t              mach_trap_table[];
@@ -803,20 +784,10 @@ dtrace_machtrace_syscall(struct mach_call_args *args)
 	syscall_arg_t *ip = (syscall_arg_t *)args;
 	mach_call_t mach_call;
 
-#if defined (__ppc__) || defined (__ppc64__)
-	{
-		savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
-
-		flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
-
-		if (flavor)
-			code = -regs->save_r3;
-		else
-			code = -regs->save_r0;
-	}
-#elif defined(__i386__) || defined (__x86_64__)
+#if defined(__i386__) || defined (__x86_64__)
 #pragma unused(flavor)
 	{
+		pal_register_cache_state(current_thread(), VALID);
 		x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
 
 		if (is_saved_state64(tagged_regs)) {
@@ -937,7 +908,7 @@ machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
 }
 
 /*ARGSUSED*/
-static void
+static int
 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
 {
 #pragma unused(arg) /* __APPLE__ */
@@ -954,12 +925,13 @@ machtrace_enable(void *arg, dtrace_id_t id, void *parg)
 
 	if (enabled) {
 	    ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
-		return;
+	    return(0);
 	}
 
 	(void) casptr(&mach_trap_table[sysnum].mach_trap_function,
 		      (void *)machtrace_sysent[sysnum].stsy_underlying,
 		      (void *)dtrace_machtrace_syscall);
+	return(0);
 }
 
 /*ARGSUSED*/
diff --git a/bsd/dev/i386/conf.c b/bsd/dev/i386/conf.c
index 964f945bf..b7de69df7 100644
--- a/bsd/dev/i386/conf.c
+++ b/bsd/dev/i386/conf.c
@@ -288,6 +288,7 @@ struct cdevsw	cdevsw[] =
 };
 int	nchrdev = sizeof (cdevsw) / sizeof (cdevsw[0]);
 
+uint64_t cdevsw_flags[sizeof (cdevsw) / sizeof (cdevsw[0])];
 
 #include	<sys/vnode.h> /* for VCHR and VBLK */
 /*
diff --git a/bsd/dev/i386/dtrace_isa.c b/bsd/dev/i386/dtrace_isa.c
index 65749f9df..88e789fce 100644
--- a/bsd/dev/i386/dtrace_isa.c
+++ b/bsd/dev/i386/dtrace_isa.c
@@ -48,6 +48,8 @@ typedef x86_saved_state_t savearea_t;
 #include <kern/sched_prim.h>
 #include <miscfs/devfs/devfs.h>
 #include <mach/vm_param.h>
+#include <machine/pal_routines.h>
+#include <i386/mp.h>
 
 /*
  * APPLE NOTE:  The regmap is used to decode which 64bit uregs[] register
@@ -126,11 +128,6 @@ dtrace_getipl(void)
 /*
  * MP coordination
  */
-
-extern void mp_broadcast(
-       void (*action_func)(void *),
-       void *arg);
-
 typedef struct xcArg {
 	processorid_t cpu;
 	dtrace_xcall_t f;
@@ -147,6 +144,7 @@ xcRemote( void *foo )
 	}
 }
 
+
 /*
  * dtrace_xcall() is not called from probe context.
  */
@@ -159,13 +157,17 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
 	xcArg.f = f;
 	xcArg.arg = arg;
 
-	mp_broadcast( xcRemote, (void *)&xcArg);
+	if (cpu == DTRACE_CPUALL) {
+		mp_cpus_call (CPUMASK_ALL, SYNC, xcRemote, (void*)&xcArg);
+	}
+	else {
+		mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), SYNC, xcRemote, (void*)&xcArg);
+	}
 }
 
 /*
  * Runtime and ABI
  */
-
 uint64_t
 dtrace_getreg(struct regs *savearea, uint_t reg)
 {
@@ -420,6 +422,7 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
 	if (thread == NULL)
 		goto zero;
 
+	pal_register_cache_state(thread, VALID);
 	regs = (x86_saved_state_t *)find_user_regs(thread);
 	if (regs == NULL)
 		goto zero;
@@ -483,6 +486,7 @@ dtrace_getustackdepth(void)
 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
 		return (-1);
 
+	pal_register_cache_state(thread, VALID);
 	regs = (x86_saved_state_t *)find_user_regs(thread);
 	if (regs == NULL)
 		return 0;
@@ -746,7 +750,9 @@ dtrace_getarg(int arg, int aframes)
 		fp = fp->backchain;
 		pc = fp->retaddr;
 
-		if (pc  == (uintptr_t)dtrace_invop_callsite) {
+		if (dtrace_invop_callsite_pre != NULL
+			&& pc  >  (uintptr_t)dtrace_invop_callsite_pre
+			&& pc  <= (uintptr_t)dtrace_invop_callsite_post) {
 #if defined(__i386__)
 			/*
 			 * If we pass through the invalid op handler, we will
@@ -783,8 +789,10 @@ dtrace_getarg(int arg, int aframes)
 			if (arg <= inreg) {
 				stack = (uintptr_t *)&saved_state->rdi;
 			} else {
-				stack = (uintptr_t *)(saved_state->isf.rsp);
-				arg -= inreg;
+				fp = (struct frame *)(saved_state->isf.rsp);
+				stack = (uintptr_t *)&fp[1]; /* Find marshalled
+								arguments */
+				arg -= inreg + 1;
 			}
 #else
 #error Unknown arch
@@ -794,7 +802,11 @@ dtrace_getarg(int arg, int aframes)
 	}
 
 	/*
-	 * Arrive here when provider has called dtrace_probe directly.
+	 * We know that we did not come through a trap to get into
+	 * dtrace_probe() --  We arrive here when the provider has
+	 * called dtrace_probe() directly.
+	 * The probe ID is the first argument to dtrace_probe().
+	 * We must advance beyond that to get the argX.
 	 */
 	arg++; /* Advance past probeID */
 
@@ -815,7 +827,8 @@ dtrace_getarg(int arg, int aframes)
 
 load:
 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
-	val = *(((uint64_t *)stack) + arg); /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
+	/* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
+	val = (uint64_t)(*(((uintptr_t *)stack) + arg));
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
 	return (val);
diff --git a/bsd/dev/i386/fasttrap_isa.c b/bsd/dev/i386/fasttrap_isa.c
index be620b517..e3bfb9402 100644
--- a/bsd/dev/i386/fasttrap_isa.c
+++ b/bsd/dev/i386/fasttrap_isa.c
@@ -45,6 +45,8 @@ extern dtrace_id_t dtrace_probeid_error;
 #include <sys/dtrace_ptss.h>
 #include <kern/debug.h>
 
+#include <machine/pal_routines.h>
+
 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
 
@@ -2207,11 +2209,11 @@ fasttrap_return_probe(x86_saved_state_t *regs)
 	return (0);
 }
 
-
 uint64_t
 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
+	pal_register_cache_state(current_thread(), VALID);
 #pragma unused(arg, id, parg, aframes)
 	return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 1, argno));
 }
@@ -2220,6 +2222,7 @@ uint64_t
 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
+	pal_register_cache_state(current_thread(), VALID);
 #pragma unused(arg, id, parg, aframes)
 	return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 0, argno));
 }
diff --git a/bsd/dev/i386/fbt_x86.c b/bsd/dev/i386/fbt_x86.c
index 19d461ac2..baec24f83 100644
--- a/bsd/dev/i386/fbt_x86.c
+++ b/bsd/dev/i386/fbt_x86.c
@@ -39,6 +39,7 @@
 #include <mach-o/loader.h> 
 #include <mach-o/nlist.h>
 #include <libkern/kernel_mach_header.h>
+#include <libkern/OSAtomic.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -101,7 +102,9 @@ extern dtrace_provider_id_t	fbt_id;
 extern fbt_probe_t		**fbt_probetab;
 extern int			fbt_probetab_mask;
 
-kern_return_t fbt_perfCallback(int, x86_saved_state_t *, __unused int, __unused int);
+extern int			gIgnoreFBTBlacklist; /* From fbt_init */
+
+kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
 
 /*
  * Critical routines that must not be probed. PR_5221096, PR_5379018.
@@ -144,6 +147,7 @@ static const char * critical_blacklist[] =
 	"cpu_topology_start_cpu", 	
 	"cpu_type", 
 	"cpuid_cpu_display",
+	"cpuid_extfeatures",
 	"handle_pending_TLB_flushes",
 	"hw_compare_and_store",
 	"machine_idle_cstate",
@@ -171,8 +175,8 @@ static const char * probe_ctx_closure[] =
 	"IS_64BIT_PROCESS",
 	"OSCompareAndSwap",
 	"absolutetime_to_microtime",
+	"act_set_astbsd",
 	"ast_pending",
-	"astbsd_on",
 	"clock_get_calendar_nanotime_nowait",
 	"copyin",
 	"copyin_user",
@@ -257,6 +261,238 @@ static const void * bsearch(
 	return (NULL);
 }
 
+/*
+ * Module validation
+ */ 
+static int
+is_module_valid(struct modctl* ctl)
+{
+	ASSERT(!MOD_FBT_PROBES_PROVIDED(ctl));
+	ASSERT(!MOD_FBT_INVALID(ctl));
+	
+	if (0 == ctl->mod_address || 0 == ctl->mod_size) {
+		return FALSE;
+	}
+	
+	if (0 == ctl->mod_loaded) {
+	        return FALSE;
+	}
+	
+	if (strstr(ctl->mod_modname, "CHUD") != NULL)
+		return FALSE;
+	
+        /*
+	 * If the user sets this, trust they know what they are doing.
+	 */
+	if (gIgnoreFBTBlacklist)   /* per boot-arg set in fbt_init() */
+		return TRUE;
+
+	/*
+	 * These drivers control low level functions that when traced
+	 * cause problems, especially in the sleep/wake paths.
+	 * If somebody really wants to drill in on one of these kexts, then
+	 * they can override blacklisting using the boot-arg above.
+	 */
+
+	if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
+		return FALSE;
+
+	if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
+		return FALSE;	
+
+	if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
+		return FALSE;
+
+	if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
+		return FALSE;
+
+	if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
+		return FALSE;
+	
+	if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
+		return FALSE;
+
+	if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
+		return FALSE;	
+	
+	
+
+	return TRUE;
+}
+
+/*
+ * FBT probe name validation
+ */
+static int
+is_symbol_valid(const char* name)
+{
+	/*
+	 * If the user set this, trust they know what they are doing.
+	 */
+	if (gIgnoreFBTBlacklist)
+		return TRUE;
+		
+	if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
+		/*
+		 * Anything beginning with "dtrace_" may be called
+		 * from probe context unless it explitly indicates
+		 * that it won't be called from probe context by
+		 * using the prefix "dtrace_safe_".
+		 */
+		return FALSE;
+	}
+	
+	if (LIT_STRNSTART(name, "fasttrap_") ||
+	    LIT_STRNSTART(name, "fuword") ||
+	    LIT_STRNSTART(name, "suword") ||
+	    LIT_STRNEQL(name, "sprlock") ||
+	    LIT_STRNEQL(name, "sprunlock") ||
+	    LIT_STRNEQL(name, "uread") ||
+	    LIT_STRNEQL(name, "uwrite")) {
+		return FALSE; /* Fasttrap inner-workings. */
+	}
+	
+	if (LIT_STRNSTART(name, "dsmos_")) 
+		return FALSE; /* Don't Steal Mac OS X! */
+	
+        if (LIT_STRNSTART(name, "_dtrace"))
+		return FALSE; /* Shims in dtrace.c */
+	
+	if (LIT_STRNSTART(name, "chud"))
+		return FALSE; /* Professional courtesy. */
+	
+	if (LIT_STRNSTART(name, "hibernate_"))
+		return FALSE; /* Let sleeping dogs lie. */
+
+	if (LIT_STRNEQL(name, "_ZNK6OSData14getBytesNoCopyEv"))
+		return FALSE;  /* Data::getBytesNoCopy, IOHibernateSystemWake path */
+	
+	if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
+	    LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) { /* IOService::temperatureCriticalForZone */
+		return FALSE; /* Per the fire code */
+	}
+	
+	/*
+	 * Place no probes (illegal instructions) in the exception handling path!
+	 */
+	if (LIT_STRNEQL(name, "t_invop") ||
+	    LIT_STRNEQL(name, "enter_lohandler") ||
+	    LIT_STRNEQL(name, "lo_alltraps") ||
+	    LIT_STRNEQL(name, "kernel_trap") ||
+	    LIT_STRNEQL(name, "interrupt") ||		  
+	    LIT_STRNEQL(name, "i386_astintr")) {
+		return FALSE;
+	}
+	
+	if (LIT_STRNEQL(name, "current_thread") ||
+	    LIT_STRNEQL(name, "ast_pending") ||
+	    LIT_STRNEQL(name, "fbt_perfCallback") ||
+	    LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
+	    LIT_STRNEQL(name, "get_threadtask") ||
+	    LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
+	    LIT_STRNEQL(name, "dtrace_invop") ||
+	    LIT_STRNEQL(name, "fbt_invop") ||
+	    LIT_STRNEQL(name, "sdt_invop") ||
+	    LIT_STRNEQL(name, "max_valid_stack_address")) {
+		return FALSE;
+	}
+	
+	/*
+	 * Voodoo.
+	 */
+	if (LIT_STRNSTART(name, "machine_stack_") ||
+	    LIT_STRNSTART(name, "mapping_") ||
+	    LIT_STRNEQL(name, "tmrCvt") ||
+	    
+	    LIT_STRNSTART(name, "tsc_") ||
+	    
+	    LIT_STRNSTART(name, "pmCPU") ||
+	    LIT_STRNEQL(name, "pmKextRegister") ||
+	    LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
+	    LIT_STRNEQL(name, "pmSafeMode") ||
+	    LIT_STRNEQL(name, "pmTimerSave") ||
+	    LIT_STRNEQL(name, "pmTimerRestore") ||
+	    LIT_STRNEQL(name, "pmUnRegister") ||
+	    LIT_STRNSTART(name, "pms") ||
+	    LIT_STRNEQL(name, "power_management_init") ||
+	    LIT_STRNSTART(name, "usimple_") ||
+	    LIT_STRNSTART(name, "lck_spin_lock") ||
+	    LIT_STRNSTART(name, "lck_spin_unlock") ||		  
+	    
+	    LIT_STRNSTART(name, "rtc_") ||
+	    LIT_STRNSTART(name, "_rtc_") ||
+	    LIT_STRNSTART(name, "rtclock_") ||
+	    LIT_STRNSTART(name, "clock_") ||
+	    LIT_STRNSTART(name, "absolutetime_to_") ||
+	    LIT_STRNEQL(name, "setPop") ||
+	    LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
+	    LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
+	    
+	    LIT_STRNSTART(name, "etimer_") ||
+	    
+	    LIT_STRNSTART(name, "commpage_") ||
+	    LIT_STRNSTART(name, "pmap_") ||
+	    LIT_STRNSTART(name, "ml_") ||
+	    LIT_STRNSTART(name, "PE_") ||
+	    LIT_STRNEQL(name, "kprintf") ||
+	    LIT_STRNSTART(name, "lapic_") ||
+	    LIT_STRNSTART(name, "act_machine") ||
+	    LIT_STRNSTART(name, "acpi_")  ||
+	    LIT_STRNSTART(name, "pal_")){
+		return FALSE;
+	}
+
+	/*
+         * Avoid machine_ routines. PR_5346750.
+         */
+        if (LIT_STRNSTART(name, "machine_"))
+		return FALSE;
+	
+	if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
+		return FALSE;
+	
+        /*
+         * Place no probes on critical routines. PR_5221096
+         */
+        if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
+                return FALSE;
+	
+        /*
+	 * Place no probes that could be hit in probe context.
+	 */
+	if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
+		return FALSE;
+	}
+	
+	/*
+	 * Place no probes that could be hit on the way to the debugger.
+	 */
+	if (LIT_STRNSTART(name, "kdp_") ||
+	    LIT_STRNSTART(name, "kdb_") ||
+	    LIT_STRNSTART(name, "kdbg_") ||
+	    LIT_STRNSTART(name, "kdebug_") ||
+	    LIT_STRNSTART(name, "kernel_debug") ||
+	    LIT_STRNEQL(name, "Debugger") ||
+	    LIT_STRNEQL(name, "Call_DebuggerC") ||
+	    LIT_STRNEQL(name, "lock_debugger") ||
+	    LIT_STRNEQL(name, "unlock_debugger") ||
+	    LIT_STRNEQL(name, "SysChoked"))  {
+		return FALSE;
+	}
+	
+	
+	/*
+	 * Place no probes that could be hit on the way to a panic.
+	 */
+	if (NULL != strstr(name, "panic_") ||
+	    LIT_STRNEQL(name, "panic") ||
+	    LIT_STRNEQL(name, "preemption_underflow_panic")) {
+		return FALSE;
+	}
+	
+	return TRUE;
+}
+
 #if defined(__i386__)
 int
 fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
@@ -313,8 +549,8 @@ kern_return_t
 fbt_perfCallback(
                 int         		trapno,
                 x86_saved_state_t 	*tagged_regs,
-                __unused int        unused1,
-                __unused int        unused2)
+                uintptr_t        	*lo_spp,
+		__unused int unused )
 {
 	kern_return_t retval = KERN_FAILURE;
 	x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
@@ -322,7 +558,8 @@ fbt_perfCallback(
 
 	if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
 		boolean_t oldlevel, cpu_64bit;
-		uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0;
+		uint32_t esp_probe, fp, *pDst, delta = 0;
+		uintptr_t old_sp;
 		int emul;
 
 		cpu_64bit = ml_is64bit();
@@ -335,10 +572,26 @@ fbt_perfCallback(
 			esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */
 		}
 
+		__asm__ volatile(
+			"Ldtrace_invop_callsite_pre_label:\n"
+			".data\n"
+			".private_extern _dtrace_invop_callsite_pre\n"
+			"_dtrace_invop_callsite_pre:\n"
+			"  .long Ldtrace_invop_callsite_pre_label\n"
+			".text\n"
+				 );
+		
 		emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
-		__asm__ volatile(".globl _dtrace_invop_callsite");
-		__asm__ volatile("_dtrace_invop_callsite:");
 
+		__asm__ volatile(
+			"Ldtrace_invop_callsite_post_label:\n"
+			".data\n"
+			".private_extern _dtrace_invop_callsite_post\n"
+			"_dtrace_invop_callsite_post:\n"
+			"  .long Ldtrace_invop_callsite_post_label\n"
+			".text\n"
+				 );
+		
 		switch (emul) {
 		case DTRACE_INVOP_NOP:
 			saved_state->eip += DTRACE_INVOP_NOP_SKIP;	/* Skip over the patched NOP (planted by sdt.) */
@@ -379,27 +632,18 @@ fbt_perfCallback(
 			
 			if (cpu_64bit)
 				saved_state->uesp += (delta << 2);
-
-/* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed
- * (on behalf of its caller -- trap_from_kernel()). Ultimately,
- * trap_from_kernel's stack pointer is restored from this slot.
- * This is sensitive to the manner in which the compiler preserves %edi,
- * and trap_from_kernel()'s internals.
- */
-			ebp = (uint32_t *)__builtin_frame_address(0);
-			ebp = (uint32_t *)*ebp;
-			edi = *(ebp - 1);
+/* Obtain the stack pointer recorded by the trampolines */
+			old_sp = *lo_spp;
 /* Shift contents of stack */
 			for (pDst = (uint32_t *)fp;
-			     pDst > (((uint32_t *)edi));
+			     pDst > (((uint32_t *)old_sp));
 				 pDst--)
 				*pDst = pDst[-delta];
 
 /* Track the stack lift in "saved_state". */
 			saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2));
-
-/* Now adjust the value of %edi in our caller (kernel_trap)'s frame */
-			*(ebp - 1) = edi + (delta << 2);
+/* Adjust the stack pointer utilized by the trampolines */
+			*lo_spp = old_sp + (delta << 2);
 
 			retval = KERN_SUCCESS;
 			break;
@@ -418,47 +662,299 @@ fbt_perfCallback(
 
 /*ARGSUSED*/
 static void
-__fbt_provide_module(void *arg, struct modctl *ctl)
+__provide_probe_32(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
 {
-#pragma unused(arg)
-	kernel_mach_header_t		*mh;
-	struct load_command         *cmd;
-	kernel_segment_command_t	*orig_ts = NULL, *orig_le = NULL;
-	struct symtab_command       *orig_st = NULL;
-	struct nlist                *sym = NULL;
-	char						*strings;
-	uintptr_t					instrLow, instrHigh;
-	char						*modname;
-	unsigned int				i, j;
+	unsigned int	j;
+	unsigned int	doenable = 0;
+	dtrace_id_t	thisid;
 
-	int gIgnoreFBTBlacklist = 0;
-	PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
-
-	mh = (kernel_mach_header_t *)(ctl->address);
-	modname = ctl->mod_modname;
+	fbt_probe_t *newfbt, *retfbt, *entryfbt;
+	machine_inst_t *instr, *limit, theInstr, i1, i2;
+	int size;
 
-	if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
+	for (j = 0, instr = symbolStart, theInstr = 0;
+	     (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); 
+	     j++) {
+		theInstr = instr[0];
+		if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
+			break;
+		
+		if ((size = dtrace_instr_size(instr)) <= 0)
+			break;
+		
+		instr += size;
+	}
+	
+	if (theInstr != FBT_PUSHL_EBP)
 		return;
-
+	
+	i1 = instr[1];
+	i2 = instr[2];
+	
+	limit = (machine_inst_t *)instrHigh;
+	
+	if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
+	    (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
+		instr += 1; /* Advance to the movl %esp,%ebp */
+		theInstr = i1;
+	} else {
+		/*
+		 * Sometimes, the compiler will schedule an intervening instruction
+		 * in the function prologue. Example:
+		 *
+		 * _mach_vm_read:
+		 * 000006d8        pushl   %ebp
+		 * 000006d9        movl    $0x00000004,%edx
+		 * 000006de        movl    %esp,%ebp
+		 * 
+		 * Try the next instruction, to see if it is a movl %esp,%ebp
+		 */
+		
+		instr += 1; /* Advance past the pushl %ebp */
+		if ((size = dtrace_instr_size(instr)) <= 0)
+			return;
+		
+		instr += size;
+		
+		if ((instr + 1) >= limit)
+			return;
+		
+		i1 = instr[0];
+		i2 = instr[1];
+		
+		if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
+		    !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
+			return;
+		
+		/* instr already points at the movl %esp,%ebp */
+		theInstr = i1;
+	}
+	
+	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
+	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+	
+	if (thisid != 0) {
+		/*
+		 * The dtrace_probe previously existed, so we have to hook
+		 * the newfbt entry onto the end of the existing fbt's chain.
+		 * If we find an fbt entry that was previously patched to
+		 * fire, (as indicated by the current patched value), then
+		 * we want to enable this newfbt on the spot.
+		 */
+		entryfbt = dtrace_probe_arg (fbt_id, thisid);
+		ASSERT (entryfbt != NULL);
+		for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
+			if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
+				doenable++;
+			
+			if (entryfbt->fbtp_next == NULL) {
+				entryfbt->fbtp_next = newfbt;
+				newfbt->fbtp_id = entryfbt->fbtp_id;
+				break;
+			}
+		}		    
+	}
+	else {
+		/*
+		 * The dtrace_probe did not previously exist, so we
+		 * create it and hook in the newfbt.  Since the probe is
+		 * new, we obviously do not need to enable it on the spot.
+		 */
+		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
+		doenable = 0;
+	}
+	
+	
+	newfbt->fbtp_patchpoint = instr;
+	newfbt->fbtp_ctl = ctl;
+	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+	newfbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
+	newfbt->fbtp_savedval = theInstr;
+	newfbt->fbtp_patchval = FBT_PATCHVAL;
+	newfbt->fbtp_currentval = 0;
+	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
+	fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
+	
+	if (doenable)
+		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+	
 	/*
-	 * Employees of dtrace and their families are ineligible.  Void
-	 * where prohibited.
+	 * The fbt entry chain is in place, one entry point per symbol.
+	 * The fbt return chain can have multiple return points per symbol.
+	 * Here we find the end of the fbt return chain.
 	 */
-
-	if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
+	
+	doenable=0;
+	
+	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
+	if (thisid != 0) {
+		/* The dtrace_probe previously existed, so we have to
+		 * find the end of the existing fbt chain.  If we find
+		 * an fbt return that was previously patched to fire,
+		 * (as indicated by the currrent patched value), then
+		 * we want to enable any new fbts on the spot.
+		 */
+		retfbt = dtrace_probe_arg (fbt_id, thisid);
+		ASSERT(retfbt != NULL);
+		for (;  retfbt != NULL; retfbt =  retfbt->fbtp_next) {
+			if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
+				doenable++;
+			if(retfbt->fbtp_next == NULL)
+				break;
+		}
+	}
+	else {
+		doenable = 0;
+		retfbt = NULL;
+	}
+	
+again:
+	if (instr >= limit)
 		return;
-
-	if (strstr(modname, "CHUD") != NULL)
+	
+	/*
+	 * If this disassembly fails, then we've likely walked off into
+	 * a jump table or some other unsuitable area.  Bail out of the
+	 * disassembly now.
+	 */
+	if ((size = dtrace_instr_size(instr)) <= 0)
+		return;
+	
+	/*
+	 * We (desperately) want to avoid erroneously instrumenting a
+	 * jump table, especially given that our markers are pretty
+	 * short:  two bytes on x86, and just one byte on amd64.  To
+	 * determine if we're looking at a true instruction sequence
+	 * or an inline jump table that happens to contain the same
+	 * byte sequences, we resort to some heuristic sleeze:  we
+	 * treat this instruction as being contained within a pointer,
+	 * and see if that pointer points to within the body of the
+	 * function.  If it does, we refuse to instrument it.
+	 */
+	for (j = 0; j < sizeof (uintptr_t); j++) {
+		uintptr_t check = (uintptr_t)instr - j;
+		uint8_t *ptr;
+		
+		if (check < (uintptr_t)symbolStart)
+			break;
+		
+		if (check + sizeof (uintptr_t) > (uintptr_t)limit)
+			continue;
+		
+		ptr = *(uint8_t **)check;
+		
+		if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
+			instr += size;
+			goto again;
+		}
+	}
+	
+	/*
+	 * OK, it's an instruction.
+	 */
+	theInstr = instr[0];
+	
+	/* Walked onto the start of the next routine? If so, bail out of this function. */
+	if (theInstr == FBT_PUSHL_EBP)
+		return;
+	
+	if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
+		instr += size;
+		goto again;
+	}
+	
+	/*
+	 * Found the popl %ebp; or leave.
+	 */
+	machine_inst_t *patch_instr = instr;
+	
+	/*
+	 * Scan forward for a "ret", or "jmp".
+	 */
+	instr += size;
+	if (instr >= limit)
+		return;
+	
+	size = dtrace_instr_size(instr);
+	if (size <= 0) /* Failed instruction decode? */
+		return;
+	
+	theInstr = instr[0];
+	
+	if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
+	    !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
+	    !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
+	    !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
+	    !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
 		return;
+	
+	/*
+	 * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
+	 */
+	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+	
+	if (retfbt == NULL) {
+		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
+						      symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
+	} else {
+		retfbt->fbtp_next = newfbt;
+		newfbt->fbtp_id = retfbt->fbtp_id;
+	}
+	
+	retfbt = newfbt;
+	newfbt->fbtp_patchpoint = patch_instr;
+	newfbt->fbtp_ctl = ctl;
+	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+	
+	if (*patch_instr == FBT_POPL_EBP) {
+		newfbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
+	} else {
+		ASSERT(*patch_instr == FBT_LEAVE);
+		newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
+	}
+	newfbt->fbtp_roffset =
+	(uintptr_t)(patch_instr - (uint8_t *)symbolStart);
+	
+	newfbt->fbtp_savedval = *patch_instr;
+	newfbt->fbtp_patchval = FBT_PATCHVAL;
+	newfbt->fbtp_currentval = 0;
+	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
+	fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
+	
+	if (doenable)
+		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+	
+	instr += size;
+	goto again;
+}
 
+static void
+__kernel_syms_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+	kernel_mach_header_t		*mh;
+	struct load_command		*cmd;
+	kernel_segment_command_t	*orig_ts = NULL, *orig_le = NULL;
+	struct symtab_command		*orig_st = NULL;
+	struct nlist			*sym = NULL;
+	char				*strings;
+	uintptr_t			instrLow, instrHigh;
+	char				*modname;
+	unsigned int			i;
+	
+	mh = (kernel_mach_header_t *)(ctl->mod_address);
+	modname = ctl->mod_modname;
+	
 	if (mh->magic != MH_MAGIC)
 		return;
-
+	
 	cmd = (struct load_command *) &mh[1];
 	for (i = 0; i < mh->ncmds; i++) {
 		if (cmd->cmd == LC_SEGMENT_KERNEL) {
 			kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
-
+			
 			if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
 				orig_ts = orig_sg;
 			else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
@@ -468,370 +964,75 @@ __fbt_provide_module(void *arg, struct modctl *ctl)
 		}
 		else if (cmd->cmd == LC_SYMTAB)
 			orig_st = (struct symtab_command *) cmd;
-
+		
 		cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
 	}
-
+	
 	if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
 		return;
-
+	
 	sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
 	strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-
+	
 	/* Find extent of the TEXT section */
 	instrLow = (uintptr_t)orig_ts->vmaddr;
 	instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
-
+		
 	for (i = 0; i < orig_st->nsyms; i++) {
-		fbt_probe_t *fbt, *retfbt;
-		machine_inst_t *instr, *limit, theInstr, i1, i2;
 		uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
 		char *name = strings + sym[i].n_un.n_strx;
-		int size;
-
+		
 		/* Check that the symbol is a global and that it has a name. */
 		if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
 			continue;
-
+		
 		if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
 			continue;
 		
 		/* Lop off omnipresent leading underscore. */			
 		if (*name == '_')
 			name += 1;
-
-		if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
-			/*
-			 * Anything beginning with "dtrace_" may be called
-			 * from probe context unless it explitly indicates
-			 * that it won't be called from probe context by
-			 * using the prefix "dtrace_safe_".
-			 */
-			continue;
-		}
-
-		if (LIT_STRNSTART(name, "dsmos_")) 
-			continue; /* Don't Steal Mac OS X! */
-
-        if (LIT_STRNSTART(name, "_dtrace"))
-			continue; /* Shims in dtrace.c */
-
-		if (LIT_STRNSTART(name, "chud"))
-			continue; /* Professional courtesy. */
-		
-		if (LIT_STRNSTART(name, "hibernate_"))
-			continue; /* Let sleeping dogs lie. */
 		
-		if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
-			LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
-			continue; /* Per the fire code */
-
 		/*
-		 * Place no probes (illegal instructions) in the exception handling path!
+		 * We're only blacklisting functions in the kernel for now.
 		 */
-		if (LIT_STRNEQL(name, "t_invop") ||
-			LIT_STRNEQL(name, "enter_lohandler") ||
-			LIT_STRNEQL(name, "lo_alltraps") ||
-			LIT_STRNEQL(name, "kernel_trap") ||
-			LIT_STRNEQL(name, "interrupt") ||		  
-			LIT_STRNEQL(name, "i386_astintr"))
-			continue;
-
-		if (LIT_STRNEQL(name, "current_thread") ||
-			LIT_STRNEQL(name, "ast_pending") ||
-			LIT_STRNEQL(name, "fbt_perfCallback") ||
-			LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
-			LIT_STRNEQL(name, "get_threadtask") ||
-			LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
-			LIT_STRNEQL(name, "dtrace_invop") ||
-			LIT_STRNEQL(name, "fbt_invop") ||
-			LIT_STRNEQL(name, "sdt_invop") ||
-			LIT_STRNEQL(name, "max_valid_stack_address"))
+		if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
 			continue;
+		
+		__provide_probe_32(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
+	}
+}
 
-		/*
-		 * Voodoo.
-		 */
-		if (LIT_STRNSTART(name, "machine_stack_") ||
-			LIT_STRNSTART(name, "mapping_") ||
-			LIT_STRNEQL(name, "tmrCvt") ||
-
-			LIT_STRNSTART(name, "tsc_") ||
-
-			LIT_STRNSTART(name, "pmCPU") ||
-			LIT_STRNEQL(name, "pmKextRegister") ||
-			LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
-			LIT_STRNEQL(name, "pmSafeMode") ||
-			LIT_STRNEQL(name, "pmTimerSave") ||
-			LIT_STRNEQL(name, "pmTimerRestore") ||
-			LIT_STRNEQL(name, "pmUnRegister") ||
-			LIT_STRNSTART(name, "pms") ||
-			LIT_STRNEQL(name, "power_management_init") ||
-			LIT_STRNSTART(name, "usimple_") ||
-			LIT_STRNEQL(name, "lck_spin_lock") ||
-			LIT_STRNEQL(name, "lck_spin_unlock") ||		  
-
-			LIT_STRNSTART(name, "rtc_") ||
-			LIT_STRNSTART(name, "_rtc_") ||
-			LIT_STRNSTART(name, "rtclock_") ||
-			LIT_STRNSTART(name, "clock_") ||
-			LIT_STRNSTART(name, "absolutetime_to_") ||
-			LIT_STRNEQL(name, "setPop") ||
-			LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
-			LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
-
-			LIT_STRNSTART(name, "etimer_") ||
-
-			LIT_STRNSTART(name, "commpage_") ||
-			LIT_STRNSTART(name, "pmap_") ||
-			LIT_STRNSTART(name, "ml_") ||
-			LIT_STRNSTART(name, "PE_") ||
-		        LIT_STRNEQL(name, "kprintf") ||
-			LIT_STRNSTART(name, "lapic_") ||
-			LIT_STRNSTART(name, "acpi_"))
-			continue;
+static void
+__user_syms_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+	char				*modname;
+	unsigned int			i;
+	
+	modname = ctl->mod_modname;
+	
+	dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
+	if (module_symbols) {
+		for (i=0; i<module_symbols->dtmodsyms_count; i++) {
+			dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
+			char* name = symbol->dtsym_name;
+			
+			/* Lop off omnipresent leading underscore. */			
+			if (*name == '_')
+				name += 1;
 
-        /*
-         * Avoid machine_ routines. PR_5346750.
-         */
-        if (LIT_STRNSTART(name, "machine_"))
-            continue;
+			/*
+			 * We're only blacklisting functions in the kernel for now.
+			 */
+			if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
+				continue;
 
-		if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
-			continue;
+			__provide_probe_32(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
+		}
+	}
+}
 
-        /*
-         * Place no probes on critical routines. PR_5221096
-         */
-        if (!gIgnoreFBTBlacklist &&
-            bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
-                continue;
-
-        /*
-		 * Place no probes that could be hit in probe context.
-		 */
-		if (!gIgnoreFBTBlacklist && 
-			bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
-			continue;
-
-		/*
-		 * Place no probes that could be hit on the way to the debugger.
-		 */
-		if (LIT_STRNSTART(name, "kdp_") ||
-			LIT_STRNSTART(name, "kdb_") ||
-			LIT_STRNSTART(name, "kdbg_") ||
-			LIT_STRNSTART(name, "kdebug_") ||
-			LIT_STRNEQL(name, "kernel_debug") ||
-			LIT_STRNEQL(name, "Debugger") ||
-			LIT_STRNEQL(name, "Call_DebuggerC") ||
-			LIT_STRNEQL(name, "lock_debugger") ||
-			LIT_STRNEQL(name, "unlock_debugger") ||
-			LIT_STRNEQL(name, "SysChoked")) 
-			continue;
-
-		/*
-		 * Place no probes that could be hit on the way to a panic.
-		 */
-		if (NULL != strstr(name, "panic_") ||
-			LIT_STRNEQL(name, "panic") ||
-			LIT_STRNEQL(name, "handleMck") ||
-			LIT_STRNEQL(name, "unresolved_kernel_trap"))
-			continue;
-		
-		if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
-			continue;
-
-		for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
-			 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); 
-			 j++) {
-			theInstr = instr[0];
-			if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
-				break;
-
-			if ((size = dtrace_instr_size(instr)) <= 0)
-				break;
- 
-			instr += size;
-		}
-
-		if (theInstr != FBT_PUSHL_EBP)
-			continue;
-
-		i1 = instr[1];
-		i2 = instr[2];
-
-		limit = (machine_inst_t *)instrHigh;
-
-		if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
-			(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
-				instr += 1; /* Advance to the movl %esp,%ebp */
-				theInstr = i1;
-		} else {
-			/*
-			 * Sometimes, the compiler will schedule an intervening instruction
-			 * in the function prologue. Example:
-			 *
-			 * _mach_vm_read:
-			 * 000006d8        pushl   %ebp
-			 * 000006d9        movl    $0x00000004,%edx
-			 * 000006de        movl    %esp,%ebp
-			 * 
-			 * Try the next instruction, to see if it is a movl %esp,%ebp
-			 */
-
-			instr += 1; /* Advance past the pushl %ebp */
-			if ((size = dtrace_instr_size(instr)) <= 0)
-				continue;
- 
-			instr += size;
-
-			if ((instr + 1) >= limit)
-				continue;
-
-			i1 = instr[0];
-			i2 = instr[1];
-
-			if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
-				!(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
-				continue;
-
-			/* instr already points at the movl %esp,%ebp */
-			theInstr = i1;
-		}
-
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
-		fbt->fbtp_patchpoint = instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-		fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
-		fbt->fbtp_savedval = theInstr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-
-		retfbt = NULL;
-again:
-		if (instr >= limit)
-			continue;
-
-		/*
-		 * If this disassembly fails, then we've likely walked off into
-		 * a jump table or some other unsuitable area.  Bail out of the
-		 * disassembly now.
-		 */
-		if ((size = dtrace_instr_size(instr)) <= 0)
-			continue;
-
-		/*
-		 * We (desperately) want to avoid erroneously instrumenting a
-		 * jump table, especially given that our markers are pretty
-		 * short:  two bytes on x86, and just one byte on amd64.  To
-		 * determine if we're looking at a true instruction sequence
-		 * or an inline jump table that happens to contain the same
-		 * byte sequences, we resort to some heuristic sleeze:  we
-		 * treat this instruction as being contained within a pointer,
-		 * and see if that pointer points to within the body of the
-		 * function.  If it does, we refuse to instrument it.
-		 */
-		for (j = 0; j < sizeof (uintptr_t); j++) {
-			uintptr_t check = (uintptr_t)instr - j;
-			uint8_t *ptr;
-
-			if (check < sym[i].n_value)
-				break;
-
-			if (check + sizeof (uintptr_t) > (uintptr_t)limit)
-				continue;
-
-			ptr = *(uint8_t **)check;
-
-			if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
-				instr += size;
-				goto again;
-			}
-		}
-
-		/*
-		 * OK, it's an instruction.
-		 */
-		theInstr = instr[0];
-
-		/* Walked onto the start of the next routine? If so, bail out of this function. */
-		if (theInstr == FBT_PUSHL_EBP)
-			continue;
-
-		if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
-			instr += size;
-			goto again;
-		}
-
-		/*
-		 * Found the popl %ebp; or leave.
-		 */
-		machine_inst_t *patch_instr = instr;
-
-		/*
-		 * Scan forward for a "ret", or "jmp".
-		 */
-		instr += size;
-		if (instr >= limit)
-			continue;
-
-		size = dtrace_instr_size(instr);
-		if (size <= 0) /* Failed instruction decode? */
-			continue;
-
-		theInstr = instr[0];
-
-		if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
-			!(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
-			!(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
-			!(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
-			!(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
-			continue;
-
-		/*
-		 * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
-		 */
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-
-		if (retfbt == NULL) {
-			fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
-			    name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
-		} else {
-			retfbt->fbtp_next = fbt;
-			fbt->fbtp_id = retfbt->fbtp_id;
-		}
-
-		retfbt = fbt;
-		fbt->fbtp_patchpoint = patch_instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-
-		if (*patch_instr == FBT_POPL_EBP) {
-			fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
-		} else {
-			ASSERT(*patch_instr == FBT_LEAVE);
-			fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
-		}
-		fbt->fbtp_roffset =
-		    (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
-
-		fbt->fbtp_savedval = *patch_instr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
-
-		instr += size;
-		goto again;
-	}
-}
 #elif defined(__x86_64__)
 int
 fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
@@ -870,7 +1071,7 @@ kern_return_t
 fbt_perfCallback(
                 int         		trapno,
                 x86_saved_state_t 	*tagged_regs,
-                __unused int        unused1,
+		uintptr_t		*lo_spp,
                 __unused int        unused2)
 {
 	kern_return_t retval = KERN_FAILURE;
@@ -878,18 +1079,36 @@ fbt_perfCallback(
 
 	if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
 		boolean_t oldlevel;
-		uint64_t rsp_probe, *rbp, r12, fp, delta = 0;
+		uint64_t rsp_probe, fp, delta = 0;
+		uintptr_t old_sp;
 		uint32_t *pDst;
 		int emul;
 
+
 		oldlevel = ml_set_interrupts_enabled(FALSE);
 
 		/* Calculate where the stack pointer was when the probe instruction "fired." */
 		rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
 
+		__asm__ volatile(
+			"Ldtrace_invop_callsite_pre_label:\n"
+			".data\n"
+			".private_extern _dtrace_invop_callsite_pre\n"
+			"_dtrace_invop_callsite_pre:\n"
+			"  .quad Ldtrace_invop_callsite_pre_label\n"
+			".text\n"
+				 );
+
 		emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
-		__asm__ volatile(".globl _dtrace_invop_callsite");
-		__asm__ volatile("_dtrace_invop_callsite:");
+
+		__asm__ volatile(
+			"Ldtrace_invop_callsite_post_label:\n"
+			".data\n"
+			".private_extern _dtrace_invop_callsite_post\n"
+			"_dtrace_invop_callsite_post:\n"
+			"  .quad Ldtrace_invop_callsite_post_label\n"
+			".text\n"
+				 );		
 
 		switch (emul) {
 		case DTRACE_INVOP_NOP:
@@ -929,25 +1148,18 @@ fbt_perfCallback(
  */
 			delta += 2;
 			saved_state->isf.rsp += (delta << 2);
-
-/* XXX Fragile in the extreme. 
- * This is sensitive to trap_from_kernel()'s internals.
- */
-			rbp = (uint64_t *)__builtin_frame_address(0);
-			rbp = (uint64_t *)*rbp;
-			r12 = *(rbp - 4);
-
+/* Obtain the stack pointer recorded by the trampolines */
+			old_sp = *lo_spp;
 /* Shift contents of stack */
 			for (pDst = (uint32_t *)fp;
-			     pDst > (((uint32_t *)r12));
+			     pDst > (((uint32_t *)old_sp));
 				 pDst--)
 				*pDst = pDst[-delta];
 
 /* Track the stack lift in "saved_state". */
 			saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
-
-/* Now adjust the value of %r12 in our caller (kernel_trap)'s frame */
-			*(rbp - 4) = r12 + (delta << 2);
+/* Adjust the stack pointer utilized by the trampolines */
+			*lo_spp = old_sp + (delta << 2);
 
 			retval = KERN_SUCCESS;
 			break;
@@ -966,47 +1178,301 @@ fbt_perfCallback(
 
 /*ARGSUSED*/
 static void
-__fbt_provide_module(void *arg, struct modctl *ctl)
+__provide_probe_64(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
 {
-#pragma unused(arg)
-	kernel_mach_header_t		*mh;
-	struct load_command         *cmd;
-	kernel_segment_command_t	*orig_ts = NULL, *orig_le = NULL;
-	struct symtab_command       *orig_st = NULL;
-	struct nlist_64             *sym = NULL;
-	char						*strings;
-	uintptr_t					instrLow, instrHigh;
-	char						*modname;
-	unsigned int				i, j;
-
-	int gIgnoreFBTBlacklist = 0;
-	PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
-
-	mh = (kernel_mach_header_t *)(ctl->address);
-	modname = ctl->mod_modname;
+	unsigned int			j;
+	unsigned int			doenable = 0;
+	dtrace_id_t			thisid;
 
-	if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
+	fbt_probe_t *newfbt, *retfbt, *entryfbt;
+	machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
+	int size;
+		
+	for (j = 0, instr = symbolStart, theInstr = 0;
+	     (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); 
+	     j++) {
+		theInstr = instr[0];
+		if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
+			break;
+		
+		if ((size = dtrace_instr_size(instr)) <= 0)
+			break;
+		
+		instr += size;
+	}
+	
+	if (theInstr != FBT_PUSH_RBP)
 		return;
-
+	
+	i1 = instr[1];
+	i2 = instr[2];
+	i3 = instr[3];
+	
+	limit = (machine_inst_t *)instrHigh;
+	
+	if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
+		instr += 1; /* Advance to the mov %rsp,%rbp */
+		theInstr = i1;
+	} else {
+		return;
+	}
+#if 0
+	else {
+		/*
+		 * Sometimes, the compiler will schedule an intervening instruction
+		 * in the function prologue. Example:
+		 *
+		 * _mach_vm_read:
+		 * 000006d8        pushl   %ebp
+		 * 000006d9        movl    $0x00000004,%edx
+		 * 000006de        movl    %esp,%ebp
+		 * 
+		 * Try the next instruction, to see if it is a movl %esp,%ebp
+		 */
+		
+		instr += 1; /* Advance past the pushl %ebp */
+		if ((size = dtrace_instr_size(instr)) <= 0)
+			return;
+		
+		instr += size;
+		
+		if ((instr + 1) >= limit)
+			return;
+		
+		i1 = instr[0];
+		i2 = instr[1];
+		
+		if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
+		    !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
+			return;
+		
+		/* instr already points at the movl %esp,%ebp */
+		theInstr = i1;
+	}
+#endif
+	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
+	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+	
+	if (thisid != 0) {
+		/*
+		 * The dtrace_probe previously existed, so we have to hook
+		 * the newfbt entry onto the end of the existing fbt's chain.
+		 * If we find an fbt entry that was previously patched to
+		 * fire, (as indicated by the current patched value), then
+		 * we want to enable this newfbt on the spot.
+		 */
+		entryfbt = dtrace_probe_arg (fbt_id, thisid);
+		ASSERT (entryfbt != NULL);
+		for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
+			if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
+				doenable++;
+			
+			if (entryfbt->fbtp_next == NULL) {
+				entryfbt->fbtp_next = newfbt;
+				newfbt->fbtp_id = entryfbt->fbtp_id;
+				break;
+			}
+		}		    
+	}
+	else {
+		/*
+		 * The dtrace_probe did not previously exist, so we
+		 * create it and hook in the newfbt.  Since the probe is
+		 * new, we obviously do not need to enable it on the spot.
+		 */
+		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
+		doenable = 0;
+	}
+	
+	newfbt->fbtp_patchpoint = instr;
+	newfbt->fbtp_ctl = ctl;
+	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+	newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
+	newfbt->fbtp_savedval = theInstr;
+	newfbt->fbtp_patchval = FBT_PATCHVAL;
+	newfbt->fbtp_currentval = 0;
+	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
+	fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
+	
+	if (doenable)
+		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+	
 	/*
-	 * Employees of dtrace and their families are ineligible.  Void
-	 * where prohibited.
+	 * The fbt entry chain is in place, one entry point per symbol.
+	 * The fbt return chain can have multiple return points per symbol.
+	 * Here we find the end of the fbt return chain.
 	 */
-
-	if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
+	
+	doenable=0;
+	
+	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
+	if (thisid != 0) {
+		/* The dtrace_probe previously existed, so we have to
+		 * find the end of the existing fbt chain.  If we find
+		 * an fbt return that was previously patched to fire,
+		 * (as indicated by the currrent patched value), then
+		 * we want to enable any new fbts on the spot.
+		 */
+		retfbt = dtrace_probe_arg (fbt_id, thisid);
+		ASSERT(retfbt != NULL);
+		for (;  retfbt != NULL; retfbt =  retfbt->fbtp_next) {
+			if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
+				doenable++;
+			if(retfbt->fbtp_next == NULL)
+				break;
+		}
+	}
+	else {
+		doenable = 0;
+		retfbt = NULL;
+	}
+	
+again:
+	if (instr >= limit)
 		return;
-
-	if (strstr(modname, "CHUD") != NULL)
+	
+	/*
+	 * If this disassembly fails, then we've likely walked off into
+	 * a jump table or some other unsuitable area.  Bail out of the
+	 * disassembly now.
+	 */
+	if ((size = dtrace_instr_size(instr)) <= 0)
+		return;
+	
+	/*
+	 * We (desperately) want to avoid erroneously instrumenting a
+	 * jump table, especially given that our markers are pretty
+	 * short:  two bytes on x86, and just one byte on amd64.  To
+	 * determine if we're looking at a true instruction sequence
+	 * or an inline jump table that happens to contain the same
+	 * byte sequences, we resort to some heuristic sleeze:  we
+	 * treat this instruction as being contained within a pointer,
+	 * and see if that pointer points to within the body of the
+	 * function.  If it does, we refuse to instrument it.
+	 */
+	for (j = 0; j < sizeof (uintptr_t); j++) {
+		uintptr_t check = (uintptr_t)instr - j;
+		uint8_t *ptr;
+		
+		if (check < (uintptr_t)symbolStart)
+			break;
+		
+		if (check + sizeof (uintptr_t) > (uintptr_t)limit)
+			continue;
+		
+		ptr = *(uint8_t **)check;
+		
+		if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
+			instr += size;
+			goto again;
+		}
+	}
+	
+	/*
+	 * OK, it's an instruction.
+	 */
+	theInstr = instr[0];
+	
+	/* Walked onto the start of the next routine? If so, bail out of this function. */
+	if (theInstr == FBT_PUSH_RBP)
 		return;
+	
+	if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
+		instr += size;
+		goto again;
+	}
+	
+	/*
+	 * Found the pop %rbp; or leave.
+	 */
+	machine_inst_t *patch_instr = instr;
+	
+	/*
+	 * Scan forward for a "ret", or "jmp".
+	 */
+	instr += size;
+	if (instr >= limit)
+		return;
+	
+	size = dtrace_instr_size(instr);
+	if (size <= 0) /* Failed instruction decode? */
+		return;
+	
+	theInstr = instr[0];
+	
+	if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
+	    !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
+	    !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
+	    !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
+	    !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
+		return;
+	
+	/*
+	 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
+	 */
+	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+	
+	if (retfbt == NULL) {
+		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
+						      symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
+	} else {
+		retfbt->fbtp_next = newfbt;
+		newfbt->fbtp_id = retfbt->fbtp_id;
+	}
+	
+	retfbt = newfbt;
+	newfbt->fbtp_patchpoint = patch_instr;
+	newfbt->fbtp_ctl = ctl;
+	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+	
+	if (*patch_instr == FBT_POP_RBP) {
+		newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
+	} else {
+		ASSERT(*patch_instr == FBT_LEAVE);
+		newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
+	}
+	newfbt->fbtp_roffset =
+	(uintptr_t)(patch_instr - (uint8_t *)symbolStart);
+	
+	newfbt->fbtp_savedval = *patch_instr;
+	newfbt->fbtp_patchval = FBT_PATCHVAL;
+	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
+	fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
+	
+	if (doenable)
+		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+	
+	instr += size;
+	goto again;
+}
 
+static void
+__kernel_syms_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+	kernel_mach_header_t		*mh;
+	struct load_command		*cmd;
+	kernel_segment_command_t	*orig_ts = NULL, *orig_le = NULL;
+	struct symtab_command		*orig_st = NULL;
+	struct nlist_64			*sym = NULL;
+	char				*strings;
+	uintptr_t			instrLow, instrHigh;
+	char				*modname;
+	unsigned int			i;
+	
+	mh = (kernel_mach_header_t *)(ctl->mod_address);
+	modname = ctl->mod_modname;
+	
 	if (mh->magic != MH_MAGIC_64)
 		return;
-
+	
 	cmd = (struct load_command *) &mh[1];
 	for (i = 0; i < mh->ncmds; i++) {
 		if (cmd->cmd == LC_SEGMENT_KERNEL) {
 			kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
-
+			
 			if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
 				orig_ts = orig_sg;
 			else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
@@ -1016,402 +1482,105 @@ __fbt_provide_module(void *arg, struct modctl *ctl)
 		}
 		else if (cmd->cmd == LC_SYMTAB)
 			orig_st = (struct symtab_command *) cmd;
-
+		
 		cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
 	}
-
+	
 	if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
 		return;
-
+	
 	sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
 	strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-
+	
 	/* Find extent of the TEXT section */
 	instrLow = (uintptr_t)orig_ts->vmaddr;
 	instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
-
+	
 	for (i = 0; i < orig_st->nsyms; i++) {
-		fbt_probe_t *fbt, *retfbt;
-		machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
 		uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
 		char *name = strings + sym[i].n_un.n_strx;
-		int size;
-
+		
 		/* Check that the symbol is a global and that it has a name. */
 		if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
 			continue;
-
+		
 		if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
 			continue;
 		
 		/* Lop off omnipresent leading underscore. */			
 		if (*name == '_')
 			name += 1;
-
-		if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
-			/*
-			 * Anything beginning with "dtrace_" may be called
-			 * from probe context unless it explitly indicates
-			 * that it won't be called from probe context by
-			 * using the prefix "dtrace_safe_".
-			 */
-			continue;
-		}
-
-		if (LIT_STRNSTART(name, "fasttrap_") ||
-		    LIT_STRNSTART(name, "fuword") ||
-		    LIT_STRNSTART(name, "suword") ||
-			LIT_STRNEQL(name, "sprlock") ||
-			LIT_STRNEQL(name, "sprunlock") ||
-			LIT_STRNEQL(name, "uread") ||
-			LIT_STRNEQL(name, "uwrite"))
-			continue; /* Fasttrap inner-workings. */
-
-		if (LIT_STRNSTART(name, "dsmos_")) 
-			continue; /* Don't Steal Mac OS X! */
-
-        if (LIT_STRNSTART(name, "_dtrace"))
-			continue; /* Shims in dtrace.c */
-
-		if (LIT_STRNSTART(name, "chud"))
-			continue; /* Professional courtesy. */
-		
-		if (LIT_STRNSTART(name, "hibernate_"))
-			continue; /* Let sleeping dogs lie. */
 		
-		if (LIT_STRNEQL(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
-			LIT_STRNEQL(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
-			continue; /* Per the fire code */
-
-		/*
-		 * Place no probes (illegal instructions) in the exception handling path!
-		 */
-		if (LIT_STRNEQL(name, "t_invop") ||
-			LIT_STRNEQL(name, "enter_lohandler") ||
-			LIT_STRNEQL(name, "lo_alltraps") ||
-			LIT_STRNEQL(name, "kernel_trap") ||
-			LIT_STRNEQL(name, "interrupt") ||		  
-			LIT_STRNEQL(name, "i386_astintr"))
-			continue;
-
-		if (LIT_STRNEQL(name, "current_thread") ||
-			LIT_STRNEQL(name, "ast_pending") ||
-			LIT_STRNEQL(name, "fbt_perfCallback") ||
-			LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
-			LIT_STRNEQL(name, "get_threadtask") ||
-			LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
-			LIT_STRNEQL(name, "dtrace_invop") ||
-			LIT_STRNEQL(name, "fbt_invop") ||
-			LIT_STRNEQL(name, "sdt_invop") ||
-			LIT_STRNEQL(name, "max_valid_stack_address"))
-			continue;
-
-		/*
-		 * Voodoo.
-		 */
-		if (LIT_STRNSTART(name, "machine_stack_") ||
-			LIT_STRNSTART(name, "mapping_") ||
-			LIT_STRNEQL(name, "tmrCvt") ||
-
-			LIT_STRNSTART(name, "tsc_") ||
-
-			LIT_STRNSTART(name, "pmCPU") ||
-			LIT_STRNEQL(name, "pmKextRegister") ||
-			LIT_STRNEQL(name, "pmMarkAllCPUsOff") || 
-			LIT_STRNEQL(name, "pmSafeMode") ||
-			LIT_STRNEQL(name, "pmTimerSave") ||
-			LIT_STRNEQL(name, "pmTimerRestore") ||		  
-			LIT_STRNEQL(name, "pmUnRegister") ||
-			LIT_STRNSTART(name, "pms") ||
-			LIT_STRNEQL(name, "power_management_init") ||
-			LIT_STRNSTART(name, "usimple_") ||
-			LIT_STRNSTART(name, "lck_spin_lock") ||
-			LIT_STRNSTART(name, "lck_spin_unlock") ||
-
-			LIT_STRNSTART(name, "rtc_") ||
-			LIT_STRNSTART(name, "_rtc_") ||
-			LIT_STRNSTART(name, "rtclock_") ||
-			LIT_STRNSTART(name, "clock_") ||
-			LIT_STRNSTART(name, "absolutetime_to_") ||
-			LIT_STRNEQL(name, "setPop") ||
-			LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
-			LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
-
-			LIT_STRNSTART(name, "etimer_") ||
-
-			LIT_STRNSTART(name, "commpage_") ||
-			LIT_STRNSTART(name, "pmap_") ||
-			LIT_STRNSTART(name, "ml_") ||
-			LIT_STRNSTART(name, "PE_") ||
-		        LIT_STRNEQL(name, "kprintf") ||
-			LIT_STRNSTART(name, "lapic_") ||
-			LIT_STRNSTART(name, "acpi_"))
-			continue;
-
-        /*
-         * Avoid machine_ routines. PR_5346750.
-         */
-        if (LIT_STRNSTART(name, "machine_"))
-            continue;
-
-		if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
-			continue;
-
-        /*
-         * Place no probes on critical routines. PR_5221096
-         */
-        if (!gIgnoreFBTBlacklist &&
-            bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
-                continue;
-
-        /*
-		 * Place no probes that could be hit in probe context.
-		 */
-		if (!gIgnoreFBTBlacklist && 
-			bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
-			continue;
-
-		/*
-		 * Place no probes that could be hit on the way to the debugger.
-		 */
-		if (LIT_STRNSTART(name, "kdp_") ||
-			LIT_STRNSTART(name, "kdb_") ||
-			LIT_STRNSTART(name, "kdbg_") ||
-			LIT_STRNSTART(name, "kdebug_") ||
-			LIT_STRNEQL(name, "kernel_debug") ||
-			LIT_STRNEQL(name, "Debugger") ||
-			LIT_STRNEQL(name, "Call_DebuggerC") ||
-			LIT_STRNEQL(name, "lock_debugger") ||
-			LIT_STRNEQL(name, "unlock_debugger") ||
-			LIT_STRNEQL(name, "SysChoked")) 
-			continue;
-
 		/*
-		 * Place no probes that could be hit on the way to a panic.
+		 * We're only blacklisting functions in the kernel for now.
 		 */
-		if (NULL != strstr(name, "panic_") ||
-			LIT_STRNEQL(name, "panic") ||
-			LIT_STRNEQL(name, "handleMck") ||
-			LIT_STRNEQL(name, "unresolved_kernel_trap"))
+		if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
 			continue;
 		
-		if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
-			continue;
-
-		for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
-			 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); 
-			 j++) {
-			theInstr = instr[0];
-			if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
-				break;
-
-			if ((size = dtrace_instr_size(instr)) <= 0)
-				break;
- 
-			instr += size;
-		}
-
-		if (theInstr != FBT_PUSH_RBP)
-			continue;
-
-		i1 = instr[1];
-		i2 = instr[2];
-		i3 = instr[3];
-
-		limit = (machine_inst_t *)instrHigh;
+		__provide_probe_64(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
+	}
+}
 
-		if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
-				instr += 1; /* Advance to the mov %rsp,%rbp */
-				theInstr = i1;
-		} else {
-			continue;
-		}
-#if 0
-		else {
+static void
+__user_syms_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+	char				*modname;
+	unsigned int			i;
+	
+	modname = ctl->mod_modname;
+	
+	dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
+	if (module_symbols) {
+		for (i=0; i<module_symbols->dtmodsyms_count; i++) {
+			dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
+			char* name = symbol->dtsym_name;
+			
+			/* Lop off omnipresent leading underscore. */			
+			if (*name == '_')
+				name += 1;
+			
 			/*
-			 * Sometimes, the compiler will schedule an intervening instruction
-			 * in the function prologue. Example:
-			 *
-			 * _mach_vm_read:
-			 * 000006d8        pushl   %ebp
-			 * 000006d9        movl    $0x00000004,%edx
-			 * 000006de        movl    %esp,%ebp
-			 * 
-			 * Try the next instruction, to see if it is a movl %esp,%ebp
+			 * We're only blacklisting functions in the kernel for now.
 			 */
-
-			instr += 1; /* Advance past the pushl %ebp */
-			if ((size = dtrace_instr_size(instr)) <= 0)
-				continue;
- 
-			instr += size;
-
-			if ((instr + 1) >= limit)
-				continue;
-
-			i1 = instr[0];
-			i2 = instr[1];
-
-			if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
-				!(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
+			if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
 				continue;
-
-			/* instr already points at the movl %esp,%ebp */
-			theInstr = i1;
-		}
-#endif
-
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
-		fbt->fbtp_patchpoint = instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-		fbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
-		fbt->fbtp_savedval = theInstr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-
-		retfbt = NULL;
-again:
-		if (instr >= limit)
-			continue;
-
-		/*
-		 * If this disassembly fails, then we've likely walked off into
-		 * a jump table or some other unsuitable area.  Bail out of the
-		 * disassembly now.
-		 */
-		if ((size = dtrace_instr_size(instr)) <= 0)
-			continue;
-
-		/*
-		 * We (desperately) want to avoid erroneously instrumenting a
-		 * jump table, especially given that our markers are pretty
-		 * short:  two bytes on x86, and just one byte on amd64.  To
-		 * determine if we're looking at a true instruction sequence
-		 * or an inline jump table that happens to contain the same
-		 * byte sequences, we resort to some heuristic sleeze:  we
-		 * treat this instruction as being contained within a pointer,
-		 * and see if that pointer points to within the body of the
-		 * function.  If it does, we refuse to instrument it.
-		 */
-		for (j = 0; j < sizeof (uintptr_t); j++) {
-			uintptr_t check = (uintptr_t)instr - j;
-			uint8_t *ptr;
-
-			if (check < sym[i].n_value)
-				break;
-
-			if (check + sizeof (uintptr_t) > (uintptr_t)limit)
-				continue;
-
-			ptr = *(uint8_t **)check;
-
-			if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
-				instr += size;
-				goto again;
-			}
-		}
-
-		/*
-		 * OK, it's an instruction.
-		 */
-		theInstr = instr[0];
-
-		/* Walked onto the start of the next routine? If so, bail out of this function. */
-		if (theInstr == FBT_PUSH_RBP)
-			continue;
-
-		if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
-			instr += size;
-			goto again;
-		}
-
-		/*
-		 * Found the pop %rbp; or leave.
-		 */
-		machine_inst_t *patch_instr = instr;
-
-		/*
-		 * Scan forward for a "ret", or "jmp".
-		 */
-		instr += size;
-		if (instr >= limit)
-			continue;
-
-		size = dtrace_instr_size(instr);
-		if (size <= 0) /* Failed instruction decode? */
-			continue;
-
-		theInstr = instr[0];
-
-		if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
-			!(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
-			!(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
-			!(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
-			!(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
-			continue;
-
-		/*
-		 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
-		 */
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-
-		if (retfbt == NULL) {
-			fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
-			    name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
-		} else {
-			retfbt->fbtp_next = fbt;
-			fbt->fbtp_id = retfbt->fbtp_id;
-		}
-
-		retfbt = fbt;
-		fbt->fbtp_patchpoint = patch_instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-
-		if (*patch_instr == FBT_POP_RBP) {
-			fbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
-		} else {
-			ASSERT(*patch_instr == FBT_LEAVE);
-			fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
+			
+			__provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
 		}
-		fbt->fbtp_roffset =
-		    (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
-
-		fbt->fbtp_savedval = *patch_instr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
-
-		instr += size;
-		goto again;
 	}
 }
 #else
 #error Unknown arch
 #endif
 
-extern struct modctl g_fbt_kernctl;
-#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
-#undef kmem_free /* from its binding to dt_kmem_free glue */
-#include <vm/vm_kern.h>
+extern int dtrace_kernel_symbol_mode;
 
 /*ARGSUSED*/
 void
 fbt_provide_module(void *arg, struct modctl *ctl)
 {
-#pragma unused(ctl)
-	__fbt_provide_module(arg, &g_fbt_kernctl);
+	ASSERT(ctl != NULL);
+	ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
+	lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
 
-	if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL )
-	    kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size));
-	g_fbt_kernctl.address = 0;
-	g_fbt_kernctl.size = 0;
+	if (MOD_FBT_DONE(ctl))
+		return;
+	
+	if (!is_module_valid(ctl)) {
+		ctl->mod_flags |= MODCTL_FBT_INVALID;
+		return;
+	}
+	
+	if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
+		__kernel_syms_provide_module(arg, ctl);
+		ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+		return;
+	}
+	
+	if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
+		__user_syms_provide_module(arg, ctl);
+		ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+		return;
+	}	
 }
diff --git a/bsd/dev/i386/mem.c b/bsd/dev/i386/mem.c
index e598cdf67..4b4589295 100644
--- a/bsd/dev/i386/mem.c
+++ b/bsd/dev/i386/mem.c
@@ -88,7 +88,9 @@
 
 extern addr64_t  kvtophys(vm_offset_t va); 
 extern boolean_t kernacc(off_t, size_t );
+#if !defined(SECURE_KERNEL)
 extern int setup_kmem;
+#endif
 
 static caddr_t devzerobuf;
 
@@ -117,8 +119,11 @@ mmioctl(dev_t dev, u_long cmd, __unused caddr_t data,
 {
 	int minnum = minor(dev);
 
-	if ((setup_kmem == 0) && ((minnum == 0) || (minnum == 1)))
-		return(EINVAL);
+	if ((minnum == 0) || (minnum == 1))
+#if !defined(SECURE_KERNEL)
+		if (setup_kmem == 0) 
+			return(EINVAL);
+#endif
 
 	switch (cmd) {
 	case FIONBIO:
@@ -149,8 +154,12 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw)
 
 		/* minor device 0 is physical memory */
 		case 0:
+#if defined(SECURE_KERNEL)
+			return(ENODEV);
+#else
 			if (setup_kmem == 0)
 				return(ENODEV);
+#endif
 
 			v = trunc_page(uio->uio_offset);
 			if (uio->uio_offset >= (off_t)mem_size)
@@ -169,8 +178,12 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw)
 
 		/* minor device 1 is kernel memory */
 		case 1:
+#if defined(SECURE_KERNEL)
+			return(ENODEV);
+#else
 			if (setup_kmem == 0)
 				return(ENODEV);
+#endif
 			/* Do some sanity checking */
 			if (((vm_address_t)uio->uio_offset >= VM_MAX_KERNEL_ADDRESS) ||
 				((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_AND_KEXT_ADDRESS))
diff --git a/bsd/dev/i386/munge.s b/bsd/dev/i386/munge.s
index d174c06e3..9df397097 100644
--- a/bsd/dev/i386/munge.s
+++ b/bsd/dev/i386/munge.s
@@ -140,16 +140,92 @@ Entry(munge_wl)			/* Costs an extra w move to do this */
 ENTRY(munge_wlw)
 	movl	8(%esp),%ecx	// get &uu_args
 	xorl	%edx,%edx
-	movl	12(%ecx),%eax
+Lwlw:
+	movl	12(%ecx),%eax	//l
 	movl	%eax,16(%ecx)
 	movl	%edx,20(%ecx)
-	movl	8(%ecx),%eax
+Lwl:
+	movl	8(%ecx),%eax	//l
 	movl	%eax,12(%ecx)
 	movl	4(%ecx),%eax
 	movl	%eax,8(%ecx)
-	movl	%edx,4(%ecx)
+	movl	%edx,4(%ecx)	//w
 	ret
 
+ENTRY(munge_wlwwwll)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+Lwlwwwll:
+	movl	36(%ecx),%eax
+	movl	%eax,52(%ecx)
+	movl	32(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,44(%ecx)
+	movl	24(%ecx),%eax
+	movl	%eax,40(%ecx)
+	movl	20(%ecx),%eax
+	movl	%eax,32(%ecx)
+	movl	%edx,36(%ecx)
+Lwlww:
+	movl	16(%ecx),%eax
+	movl	%eax,24(%ecx)
+	movl	%edx,28(%ecx)
+	jmp 	Lwlw
+
+ENTRY(munge_wlwwwllw)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	40(%ecx),%eax
+	movl	%eax,56(%ecx)
+	movl	%edx,60(%ecx)
+	jmp	Lwlwwwll
+
+ENTRY(munge_wlwwlwlw)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	40(%ecx),%eax
+	movl	%eax,56(%ecx)
+	movl	%edx,60(%ecx)
+	movl	36(%ecx),%eax
+	movl	%eax,52(%ecx)
+	movl	32(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,40(%ecx)
+	movl	%edx,44(%ecx)
+	movl	24(%ecx),%eax
+	movl	%eax,36(%ecx)
+	movl	20(%ecx),%eax
+	movl	%eax,32(%ecx)
+	jmp 	Lwlww
+
+ENTRY(munge_wllwwll)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+
+	movl	40(%ecx),%eax	// l
+	movl	%eax,52(%ecx)
+	movl	36(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	32(%ecx),%eax	// l
+	movl	%eax,44(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,40(%ecx)
+
+	movl	24(%ecx),%eax	//w
+	movl	%eax,32(%ecx)
+	movl	%edx,36(%ecx)
+	movl	20(%ecx),%eax	//w
+	movl	%eax,24(%ecx)
+	movl	%edx,28(%ecx)
+
+	movl	16(%ecx),%eax	//l
+	movl	%eax,20(%ecx)
+	movl	12(%ecx),%eax
+	movl	%eax,16(%ecx)
+	jmp	Lwl
+
 Entry(munge_wwwlw)
 	movl	8(%esp),%ecx	// get &uu_args
 	xorl	%edx,%edx
@@ -195,6 +271,63 @@ ENTRY(munge_wwwwwl)
 	movl	%eax,44(%ecx)
 	jmp	Lw5
 
+ENTRY(munge_wwwwwlww)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	32(%ecx),%eax
+	movl	%eax,56(%ecx)
+	movl	%edx,60(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	%edx,52(%ecx)
+	movl	20(%ecx),%eax
+	movl	%eax,40(%ecx)
+	movl	24(%ecx),%eax
+	movl	%eax,44(%ecx)
+	jmp	Lw5
+
+ENTRY(munge_wwwwwllw)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	36(%ecx),%eax
+	movl	%eax,56(%ecx)
+	movl	%edx,60(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	32(%ecx),%eax
+	movl	%eax,52(%ecx)
+	movl	20(%ecx),%eax
+	movl	%eax,40(%ecx)
+	movl	24(%ecx),%eax
+	movl	%eax,44(%ecx)
+	jmp	Lw5
+
+ENTRY(munge_wwwwwlll)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	36(%ecx),%eax
+	movl	%eax,56(%ecx)
+	movl	40(%ecx),%eax
+	movl	%eax,60(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	32(%ecx),%eax
+	movl	%eax,52(%ecx)
+	movl	20(%ecx),%eax
+	movl	%eax,40(%ecx)
+	movl	24(%ecx),%eax
+	movl	%eax,44(%ecx)
+	jmp	Lw5
+
+ENTRY(munge_wwwwwwl)
+	movl	8(%esp),%ecx	// get &uu_args
+	xorl	%edx,%edx
+	movl	24(%ecx),%eax
+	movl	%eax,48(%ecx)
+	movl	28(%ecx),%eax
+	movl	%eax,52(%ecx)
+	jmp 	Lw6
+	
 ENTRY(munge_wwwwwwlw)
 	movl	8(%esp),%ecx	// get &uu_args
 	xorl	%edx,%edx
diff --git a/bsd/dev/i386/sdt_x86.c b/bsd/dev/i386/sdt_x86.c
index c354b303e..680ed779b 100644
--- a/bsd/dev/i386/sdt_x86.c
+++ b/bsd/dev/i386/sdt_x86.c
@@ -107,3 +107,115 @@ sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
 #endif
 
 
+struct frame {
+    struct frame *backchain;
+    uintptr_t retaddr;
+};
+
+/*ARGSUSED*/
+uint64_t
+sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
+{
+#pragma unused(arg, id, parg)    
+	uint64_t val;
+	struct frame *fp = (struct frame *)__builtin_frame_address(0);
+	uintptr_t *stack;
+	uintptr_t pc;
+	int i;
+
+#if defined(__x86_64__)
+    /*
+     * A total of 6 arguments are passed via registers; any argument with
+     * index of 5 or lower is therefore in a register.
+     */
+    int inreg = 5;
+#endif
+
+	for (i = 1; i <= aframes; i++) {
+		fp = fp->backchain;
+		pc = fp->retaddr;
+
+		if (dtrace_invop_callsite_pre != NULL
+			&& pc  >  (uintptr_t)dtrace_invop_callsite_pre
+			&& pc  <= (uintptr_t)dtrace_invop_callsite_post) {
+#if defined(__i386__)
+			/*
+			 * If we pass through the invalid op handler, we will
+			 * use the pointer that it passed to the stack as the
+			 * second argument to dtrace_invop() as the pointer to
+			 * the frame we're hunting for.
+			 */
+
+			stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
+			fp = (struct frame *)stack[1]; /* Grab *second* argument */
+			stack = (uintptr_t *)&fp[0]; /* Find marshalled arguments */
+#elif defined(__x86_64__)
+			/*
+			 * In the case of x86_64, we will use the pointer to the
+			 * save area structure that was pushed when we took the
+			 * trap.  To get this structure, we must increment
+			 * beyond the frame structure. If the
+			 * argument that we're seeking is passed on the stack,
+			 * we'll pull the true stack pointer out of the saved
+			 * registers and decrement our argument by the number
+			 * of arguments passed in registers; if the argument
+			 * we're seeking is passed in regsiters, we can just
+			 * load it directly.
+			 */
+
+			/* fp points to frame of dtrace_invop() activation. */
+			fp = fp->backchain; /* to fbt_perfcallback() activation. */
+			fp = fp->backchain; /* to kernel_trap() activation. */
+			fp = fp->backchain; /* to trap_from_kernel() activation. */
+			
+			x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)&fp[1];
+			x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
+
+			if (argno <= inreg) {
+				stack = (uintptr_t *)&saved_state->rdi;
+			} else {
+				fp = (struct frame *)(saved_state->isf.rsp);
+				stack = (uintptr_t *)&fp[0]; /* Find marshalled
+								arguments */
+				argno -= (inreg +1);
+			}
+#else
+#error Unknown arch
+#endif
+			goto load;
+		}
+	}
+
+	/*
+	 * We know that we did not come through a trap to get into
+	 * dtrace_probe() --  We arrive here when the provider has
+	 * called dtrace_probe() directly.
+	 * The probe ID is the first argument to dtrace_probe().
+	 * We must advance beyond that to get the argX.
+	 */
+	argno++; /* Advance past probeID */
+
+#if defined(__x86_64__)
+	if (argno <= inreg) {
+		/*
+		 * This shouldn't happen.  If the argument is passed in a
+		 * register then it should have been, well, passed in a
+		 * register...
+		 */
+		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
+		return (0);
+	}
+
+	argno -= (inreg + 1);
+#endif
+	stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
+
+load:
+	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+	/* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
+	val = (uint64_t)(*(((uintptr_t *)stack) + argno));
+	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+	return (val);
+}
+    
diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c
index c255529a1..ba3bfc1ee 100644
--- a/bsd/dev/i386/sysctl.c
+++ b/bsd/dev/i386/sysctl.c
@@ -33,6 +33,9 @@
 #include <i386/cpuid.h>
 #include <i386/tsc.h>
 #include <i386/machine_routines.h>
+#include <i386/ucode.h>
+#include <kern/clock.h>
+#include <libkern/libkern.h>
 
 static int
 _i386_cpu_info SYSCTL_HANDLER_ARGS
@@ -201,6 +204,42 @@ cpu_flex_ratio_max SYSCTL_HANDLER_ARGS
 	return SYSCTL_OUT(req, &flex_ratio_max, sizeof(flex_ratio_max));
 }
 
+static int
+cpu_ucode_update SYSCTL_HANDLER_ARGS
+{
+	__unused struct sysctl_oid *unused_oidp = oidp;
+	__unused void *unused_arg1 = arg1;
+	__unused int unused_arg2 = arg2;
+	uint64_t addr;
+	int error;
+	
+	error = SYSCTL_IN(req, &addr, sizeof(addr));
+	if (error)
+		return error;
+
+	int ret = ucode_interface(addr);
+	return ret;
+}
+
+extern uint64_t panic_restart_timeout;
+static int
+panic_set_restart_timeout(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+	int new_value = 0, old_value = 0, changed = 0, error;
+	uint64_t nstime;
+
+	if (panic_restart_timeout) {
+		absolutetime_to_nanoseconds(panic_restart_timeout, &nstime);
+		old_value = nstime / NSEC_PER_SEC;
+	}
+
+	error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
+	if (error == 0 && changed) {
+		nanoseconds_to_absolutetime(((uint64_t)new_value) * NSEC_PER_SEC, &panic_restart_timeout);
+	}
+	return error;
+}
+
 /*
  * Populates the {CPU, vector, latency} triple for the maximum observed primary
  * interrupt latency
@@ -226,107 +265,113 @@ misc_interrupt_latency_max(__unused struct sysctl_oid *oidp, __unused void *arg1
 SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"CPU info");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_basic, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_basic, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_max_basic),sizeof(uint32_t),
 	    i386_cpu_info, "IU", "Max Basic Information value");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_ext, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_ext, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_max_ext), sizeof(uint32_t),
 	    i386_cpu_info, "IU", "Max Extended Function Information value");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, vendor, CTLTYPE_STRING | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, vendor, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_vendor), 0,
 	    i386_cpu_info, "A", "CPU vendor");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand_string, CTLTYPE_STRING | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand_string, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_brand_string), 0,
 	    i386_cpu_info, "A", "CPU brand string");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, family, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, family, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_family), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU family");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, model, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, model, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_model), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU model");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, extmodel, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, extmodel, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_extmodel), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU extended model");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfamily, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfamily, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_extfamily), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU extended family");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, stepping, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, stepping, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_stepping), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU stepping");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, feature_bits, CTLTYPE_QUAD | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, feature_bits, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_features), sizeof(uint64_t),
 	    i386_cpu_info, "IU", "CPU features");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeature_bits, CTLTYPE_QUAD | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeature_bits, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_extfeatures), sizeof(uint64_t),
 	    i386_cpu_info, "IU", "CPU extended features");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, signature, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, signature, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_signature), sizeof(uint32_t),
 	    i386_cpu_info, "I", "CPU signature");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand, CTLTYPE_INT | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_brand), sizeof(uint8_t),
 	    i386_cpu_info, "I", "CPU brand");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, features, CTLTYPE_STRING | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, features, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_features, "A", "CPU feature names");
 
-SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeatures, CTLTYPE_STRING | CTLFLAG_RD, 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeatures, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_extfeatures, "A", "CPU extended feature names");
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, logical_per_package,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_logical_per_package, "I", "CPU logical cpus per package");
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, cores_per_package,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_cores_per_package),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "CPU cores per package");
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, microcode_version,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_microcode_version),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Microcode version number");
 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, processor_flag,
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
+	    (void *)offsetof(i386_cpu_info_t, cpuid_processor_flag),
+	    sizeof(uint32_t),
+	    i386_cpu_info, "I", "CPU processor flag");
+
 
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, mwait, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"mwait");
 
 SYSCTL_PROC(_machdep_cpu_mwait, OID_AUTO, linesize_min,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_mwait_leaf_t, linesize_min),
 	    sizeof(uint32_t),
 	    cpu_mwait, "I", "Monitor/mwait minimum line size");
 
 SYSCTL_PROC(_machdep_cpu_mwait, OID_AUTO, linesize_max,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_mwait_leaf_t, linesize_max),
 	    sizeof(uint32_t),
 	    cpu_mwait, "I", "Monitor/mwait maximum line size");
 
 SYSCTL_PROC(_machdep_cpu_mwait, OID_AUTO, extensions,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_mwait_leaf_t, extensions),
 	    sizeof(uint32_t),
 	    cpu_mwait, "I", "Monitor/mwait extensions");
 
 SYSCTL_PROC(_machdep_cpu_mwait, OID_AUTO, sub_Cstates,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_mwait_leaf_t, sub_Cstates),
 	    sizeof(uint32_t),
 	    cpu_mwait, "I", "Monitor/mwait sub C-states");
@@ -336,31 +381,31 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, thermal, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"thermal");
 
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, sensor,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_thermal_leaf_t, sensor),
 	    sizeof(boolean_t),
 	    cpu_thermal, "I", "Thermal sensor present");
 
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, dynamic_acceleration,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_thermal_leaf_t, dynamic_acceleration),
 	    sizeof(boolean_t),
 	    cpu_thermal, "I", "Dynamic Acceleration Technology (Turbo Mode)");
 
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, invariant_APIC_timer,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_thermal_leaf_t, invariant_APIC_timer),
 	    sizeof(boolean_t),
 	    cpu_thermal, "I", "Invariant APIC Timer");
 
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, thresholds,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_thermal_leaf_t, thresholds),
 	    sizeof(uint32_t),
 	    cpu_thermal, "I", "Number of interrupt thresholds");
 
 SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, ACNT_MCNT,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_thermal_leaf_t, ACNT_MCNT),
 	    sizeof(boolean_t),
 	    cpu_thermal, "I", "ACNT_MCNT capability");
@@ -410,43 +455,43 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, arch_perf, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"arch_perf");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, version,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, version),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Architectural Performance Version Number");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, number,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, number),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Number of counters per logical cpu");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, width,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, width),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Bit width of counters");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, events_number,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, events_number),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Number of monitoring events");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, events,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, events),
 	    sizeof(uint32_t),
 	    cpu_arch_perf, "I", "Bit vector of events");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, fixed_number,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, fixed_number),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Number of fixed-function counters");
 
 SYSCTL_PROC(_machdep_cpu_arch_perf, OID_AUTO, fixed_width,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(cpuid_arch_perf_leaf_t, fixed_width),
 	    sizeof(uint8_t),
 	    cpu_arch_perf, "I", "Bit-width of fixed-function counters");
@@ -456,19 +501,19 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, cache, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"cache");
 
 SYSCTL_PROC(_machdep_cpu_cache, OID_AUTO, linesize,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_cache_linesize),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Cacheline size");
 
 SYSCTL_PROC(_machdep_cpu_cache, OID_AUTO, L2_associativity,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_cache_L2_associativity),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "L2 cache associativity");
 
 SYSCTL_PROC(_machdep_cpu_cache, OID_AUTO, size,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_cache_size),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Cache size (in Kbytes)");
@@ -482,7 +527,7 @@ SYSCTL_NODE(_machdep_cpu_tlb, OID_AUTO, data, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"data");
 
 SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, small,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_INST][TLB_SMALL][0]),
 	    sizeof(uint32_t),
@@ -490,7 +535,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, small,
 	    "Number of small page instruction TLBs");
 
 SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_DATA][TLB_SMALL][0]),
 	    sizeof(uint32_t),
@@ -498,7 +543,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small,
 	    "Number of small page data TLBs (1st level)");
 
 SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small_level1,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_DATA][TLB_SMALL][1]),
 	    sizeof(uint32_t),
@@ -506,7 +551,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small_level1,
 	    "Number of small page data TLBs (2nd level)");
 
 SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, large,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_INST][TLB_LARGE][0]),
 	    sizeof(uint32_t),
@@ -514,7 +559,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, large,
 	    "Number of large page instruction TLBs");
 
 SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_DATA][TLB_LARGE][0]),
 	    sizeof(uint32_t),
@@ -522,7 +567,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large,
 	    "Number of large page data TLBs (1st level)");
 
 SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large_level1,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t,
 			     cpuid_tlb[TLB_DATA][TLB_LARGE][1]),
 	    sizeof(uint32_t),
@@ -530,7 +575,7 @@ SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large_level1,
 	    "Number of large page data TLBs (2nd level)");
 
 SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, shared,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_stlb),
 	    sizeof(uint32_t),
 	    i386_cpu_info_nonzero, "I",
@@ -541,26 +586,26 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, address_bits, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"address_bits");
 
 SYSCTL_PROC(_machdep_cpu_address_bits, OID_AUTO, physical,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_address_bits_physical),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Number of physical address bits");
 
 SYSCTL_PROC(_machdep_cpu_address_bits, OID_AUTO, virtual,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, cpuid_address_bits_virtual),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Number of virtual address bits");
 
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, core_count,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, core_count),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Number of enabled cores per package");
 
 SYSCTL_PROC(_machdep_cpu, OID_AUTO, thread_count,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    (void *)offsetof(i386_cpu_info_t, thread_count),
 	    sizeof(uint32_t),
 	    i386_cpu_info, "I", "Number of enabled threads per package");
@@ -569,34 +614,40 @@ SYSCTL_NODE(_machdep_cpu, OID_AUTO, flex_ratio, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"Flex ratio");
 
 SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, desired,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_flex_ratio_desired, "I", "Flex ratio desired (0 disabled)");
 
 SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, min,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_flex_ratio_min, "I", "Flex ratio min (efficiency)");
 
 SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, max,
-	    CTLTYPE_INT | CTLFLAG_RD, 
+	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 
 	    0, 0,
 	    cpu_flex_ratio_max, "I", "Flex ratio max (non-turbo)");
 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, ucupdate, 
+			CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0,
+            cpu_ucode_update, "S", "Microcode update interface");
+
 uint64_t pmap_pv_hashlist_walks;
 uint64_t pmap_pv_hashlist_cnts;
 uint32_t pmap_pv_hashlist_max;
 uint32_t pmap_kernel_text_ps = PAGE_SIZE;
+extern uint32_t pv_hashed_kern_low_water_mark;
 
 /*extern struct sysctl_oid_list sysctl__machdep_pmap_children;*/
 
 SYSCTL_NODE(_machdep, OID_AUTO, pmap, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"PMAP info");
 
-SYSCTL_QUAD    (_machdep_pmap, OID_AUTO, hashwalks, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_walks, "");
-SYSCTL_QUAD    (_machdep_pmap, OID_AUTO, hashcnts, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_cnts, "");
-SYSCTL_INT     (_machdep_pmap, OID_AUTO, hashmax, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_max, 0, "");
-SYSCTL_INT     (_machdep_pmap, OID_AUTO, kernel_text_ps, CTLFLAG_RD | CTLFLAG_KERN, &pmap_kernel_text_ps, 0, "");
+SYSCTL_QUAD    (_machdep_pmap, OID_AUTO, hashwalks, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &pmap_pv_hashlist_walks, "");
+SYSCTL_QUAD    (_machdep_pmap, OID_AUTO, hashcnts, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &pmap_pv_hashlist_cnts, "");
+SYSCTL_INT     (_machdep_pmap, OID_AUTO, hashmax, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &pmap_pv_hashlist_max, 0, "");
+SYSCTL_INT     (_machdep_pmap, OID_AUTO, kernel_text_ps, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &pmap_kernel_text_ps, 0, "");
+SYSCTL_INT     (_machdep_pmap, OID_AUTO, kern_pv_reserve, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, &pv_hashed_kern_low_water_mark, 0, "");
 
 SYSCTL_NODE(_machdep, OID_AUTO, memmap, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "physical memory map");
 
@@ -621,9 +672,15 @@ SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Other, CTLFLAG_RD|CTLFLAG_LOCKED, &firmwa
 SYSCTL_NODE(_machdep, OID_AUTO, tsc, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "Timestamp counter parameters");
 
 SYSCTL_QUAD(_machdep_tsc, OID_AUTO, frequency, CTLFLAG_RD|CTLFLAG_LOCKED, &tscFreq, "");
+
 SYSCTL_NODE(_machdep, OID_AUTO, misc, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
 	"Miscellaneous x86 kernel parameters");
 
+SYSCTL_PROC(_machdep_misc, OID_AUTO, panic_restart_timeout,
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 
+	    0, 0,
+	    panic_set_restart_timeout, "I", "Panic restart timeout in seconds");
+
 SYSCTL_PROC(_machdep_misc, OID_AUTO, interrupt_latency_max, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 
 	    0, 0,
 	    misc_interrupt_latency_max, "A", "Maximum Interrupt latency");
diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c
index 660d0d1aa..7a849ca31 100644
--- a/bsd/dev/i386/systemcalls.c
+++ b/bsd/dev/i386/systemcalls.c
@@ -34,6 +34,7 @@
 #include <kern/debug.h>
 #include <mach/machine/thread_status.h>
 #include <mach/thread_act.h>
+#include <mach/branch_predicates.h>
 
 #include <sys/kernel.h>
 #include <sys/vm.h>
@@ -54,6 +55,8 @@
 #include <i386/machine_routines.h>
 #include <mach/i386/syscall_sw.h>
 
+#include <machine/pal_routines.h>
+
 #if CONFIG_DTRACE
 extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
@@ -69,6 +72,15 @@ extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
 /* dynamically generated at build time based on syscalls.master */
 extern const char *syscallnames[];
 
+/*
+ * This needs to be a single switch so that it's "all on" or "all off",
+ * rather than being turned on for some code paths and not others, as this
+ * has a tendency to introduce "blame the next guy" bugs.
+ */
+#if DEBUG
+#define	FUNNEL_DEBUG	1	/* Check for funnel held on exit */
+#endif
+
 /*
  * Function:	unix_syscall
  *
@@ -90,6 +102,7 @@ unix_syscall(x86_saved_state_t *state)
 	struct uthread		*uthread;
 	x86_saved_state32_t	*regs;
 	boolean_t		args_in_uthread;
+	boolean_t		is_vfork;
 
 	assert(is_saved_state32(state));
 	regs = saved_state32(state);
@@ -100,15 +113,15 @@ unix_syscall(x86_saved_state_t *state)
 	thread = current_thread();
 	uthread = get_bsdthread_info(thread);
 
-
 	/* Get the approriate proc; may be different from task's for vfork() */
-	if (!(uthread->uu_flag & UT_VFORK))
-		p = (struct proc *)get_bsdtask_info(current_task());
-	else 
+	is_vfork = uthread->uu_flag & UT_VFORK;
+	if (__improbable(is_vfork != 0))
 		p = current_proc();
+	else 
+		p = (struct proc *)get_bsdtask_info(current_task());
 
 	/* Verify that we are not being called from a task without a proc */
-	if (p == NULL) {
+	if (__improbable(p == NULL)) {
 		regs->eax = EPERM;
 		regs->efl |= EFL_CF;
 		task_terminate_internal(current_task());
@@ -126,7 +139,7 @@ unix_syscall(x86_saved_state_t *state)
 
 	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
 
-	if (callp == sysent) {
+	if (__improbable(callp == sysent)) {
 		code = fuword(params);
 		params += sizeof(int);
 		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
@@ -151,7 +164,7 @@ unix_syscall(x86_saved_state_t *state)
 			}
 		}
 
-		if (code != 180) {
+		if (__probable(code != 180)) {
 	        	int *ip = (int *)vt;
 
 			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
@@ -191,9 +204,6 @@ unix_syscall(x86_saved_state_t *state)
 	AUDIT_SYSCALL_ENTER(code, p, uthread);
 	error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
         AUDIT_SYSCALL_EXIT(code, p, uthread, error);
-#if CONFIG_MACF
-	mac_thread_userret(code, error, thread);
-#endif
 
 #ifdef JOE_DEBUG
         if (uthread->uu_iocount)
@@ -203,7 +213,7 @@ unix_syscall(x86_saved_state_t *state)
 	uthread->t_dtrace_errno = error;
 #endif /* CONFIG_DTRACE */
 
-	if (error == ERESTART) {
+	if (__improbable(error == ERESTART)) {
 		/*
 		 * Move the user's pc back to repeat the syscall:
 		 * 5 bytes for a sysenter, or 2 for an int 8x.
@@ -211,14 +221,10 @@ unix_syscall(x86_saved_state_t *state)
 		 * - see debug trap handler in idt.s/idt64.s
 		 */
 
-		if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) {
-			regs->eip -= 5;
-		}
-		else
-			regs->eip -= 2;
+		pal_syscall_restart(thread, state);
 	}
 	else if (error != EJUSTRETURN) {
-		if (error) {
+		if (__improbable(error)) {
 		    regs->eax = error;
 		    regs->efl |= EFL_CF;	/* carry bit */
 		} else { /* (not error) */
@@ -232,13 +238,14 @@ unix_syscall(x86_saved_state_t *state)
 		error, regs->eax, regs->edx);
 
 	uthread->uu_flag &= ~UT_NOTCANCELPT;
-#if DEBUG
+#if FUNNEL_DEBUG
 	/*
 	 * if we're holding the funnel panic
 	 */
 	syscall_exit_funnelcheck();
-#endif /* DEBUG */
-	if (uthread->uu_lowpri_window) {
+#endif /* FUNNEL_DEBUG */
+
+	if (__improbable(uthread->uu_lowpri_window)) {
 	        /*
 		 * task is marked as a low priority I/O type
 		 * and the I/O we issued while in this system call
@@ -248,10 +255,13 @@ unix_syscall(x86_saved_state_t *state)
 		 */
 		throttle_lowpri_io(TRUE);
 	}
-	if (code != 180)
+	if (__probable(code != 180))
 	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
 				      error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
+	if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
+		pal_execve_return(thread);
+	}
 
 	thread_exception_return();
 	/* NOTREACHED */
@@ -273,21 +283,21 @@ unix_syscall64(x86_saved_state_t *state)
 
 	assert(is_saved_state64(state));
 	regs = saved_state64(state);
-
+#if	DEBUG
 	if (regs->rax == 0x2000800)
 		thread_exception_return();
-
+#endif
 	thread = current_thread();
 	uthread = get_bsdthread_info(thread);
 
 	/* Get the approriate proc; may be different from task's for vfork() */
-	if (!(uthread->uu_flag & UT_VFORK))
+	if (__probable(!(uthread->uu_flag & UT_VFORK)))
 		p = (struct proc *)get_bsdtask_info(current_task());
 	else 
 		p = current_proc();
 
 	/* Verify that we are not being called from a task without a proc */
-	if (p == NULL) {
+	if (__improbable(p == NULL)) {
 		regs->rax = EPERM;
 		regs->isf.rflags |= EFL_CF;
 		task_terminate_internal(current_task());
@@ -303,7 +313,7 @@ unix_syscall64(x86_saved_state_t *state)
 	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
 	uargp = (void *)(&regs->rdi);
 
-	if (callp == sysent) {
+	if (__improbable(callp == sysent)) {
 	        /*
 		 * indirect system call... system call number
 		 * passed as 'arg0'
@@ -323,7 +333,7 @@ unix_syscall64(x86_saved_state_t *state)
 		}
 		assert(callp->sy_narg <= 8);
 
-		if (callp->sy_narg > args_in_regs) {
+		if (__improbable(callp->sy_narg > args_in_regs)) {
 			int copyin_count;
 
 			copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
@@ -339,7 +349,7 @@ unix_syscall64(x86_saved_state_t *state)
 		/*
 		 * XXX Turn 64 bit unsafe calls into nosys()
 		 */
-		if (callp->sy_flags & UNSAFE_64BIT) {
+		if (__improbable(callp->sy_flags & UNSAFE_64BIT)) {
 			callp = &sysent[63];
 			goto unsafe;
 		}
@@ -360,25 +370,34 @@ unsafe:
 	
 	uthread->uu_flag |= UT_NOTCANCELPT;
 
+#ifdef JOE_DEBUG
+        uthread->uu_iocount = 0;
+        uthread->uu_vpindex = 0;
+#endif
 
 	AUDIT_SYSCALL_ENTER(code, p, uthread);
 	error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
         AUDIT_SYSCALL_EXIT(code, p, uthread, error);
 
+#ifdef JOE_DEBUG
+        if (uthread->uu_iocount)
+               printf("system call returned with uu_iocount != 0\n");
+#endif
+
 #if CONFIG_DTRACE
 	uthread->t_dtrace_errno = error;
 #endif /* CONFIG_DTRACE */
 	
-	if (error == ERESTART) {
+	if (__improbable(error == ERESTART)) {
 		/*
 		 * all system calls come through via the syscall instruction
 		 * in 64 bit mode... its 2 bytes in length
 		 * move the user's pc back to repeat the syscall:
 		 */
-	        regs->isf.rip -= 2;
+		pal_syscall_restart( thread, state );
 	}
 	else if (error != EJUSTRETURN) {
-		if (error) {
+		if (__improbable(error)) {
 			regs->rax = error;
 			regs->isf.rflags |= EFL_CF;	/* carry bit */
 		} else { /* (not error) */
@@ -416,12 +435,14 @@ unsafe:
 	
 	uthread->uu_flag &= ~UT_NOTCANCELPT;
 
+#if FUNNEL_DEBUG	
 	/*
 	 * if we're holding the funnel panic
 	 */
 	syscall_exit_funnelcheck();
+#endif /* FUNNEL_DEBUG */
 
-	if (uthread->uu_lowpri_window) {
+	if (__improbable(uthread->uu_lowpri_window)) {
 	        /*
 		 * task is marked as a low priority I/O type
 		 * and the I/O we issued while in this system call
@@ -431,7 +452,7 @@ unsafe:
 		 */
 		throttle_lowpri_io(TRUE);
 	}
-	if (code != 180)
+	if (__probable(code != 180))
 	        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
 				      error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);
 
@@ -453,6 +474,7 @@ unix_syscall_return(int error)
 	thread = current_thread();
 	uthread = get_bsdthread_info(thread);
 
+	pal_register_cache_state(thread, DIRTY);
 
 	p = current_proc();
 
@@ -480,11 +502,9 @@ unix_syscall_return(int error)
 
 		if (error == ERESTART) {
 			/*
-			 * all system calls come through via the syscall instruction
-			 * in 64 bit mode... its 2 bytes in length
-			 * move the user's pc back to repeat the syscall:
+			 * repeat the syscall
 			 */
-			regs->isf.rip -= 2;
+			pal_syscall_restart( thread, find_user_regs(thread) );
 		}
 		else if (error != EJUSTRETURN) {
 			if (error) {
@@ -542,7 +562,7 @@ unix_syscall_return(int error)
 			code = fuword(params);
 		}
 		if (error == ERESTART) {
-			regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
+			pal_syscall_restart( thread, find_user_regs(thread) );
 		}
 		else if (error != EJUSTRETURN) {
 			if (error) {
@@ -561,10 +581,12 @@ unix_syscall_return(int error)
 
 	uthread->uu_flag &= ~UT_NOTCANCELPT;
 
+#if FUNNEL_DEBUG	
 	/*
 	 * if we're holding the funnel panic
 	 */
 	syscall_exit_funnelcheck();
+#endif /* FUNNEL_DEBUG */
 
 	if (uthread->uu_lowpri_window) {
 	        /*
diff --git a/bsd/dev/i386/unix_signal.c b/bsd/dev/i386/unix_signal.c
index 06ed4172c..4292d6515 100644
--- a/bsd/dev/i386/unix_signal.c
+++ b/bsd/dev/i386/unix_signal.c
@@ -54,12 +54,12 @@
 #include <i386/machine_routines.h>
 #include <i386/seg.h>
 
-#include <sys/kdebug.h>
+#include <machine/pal_routines.h>
 
+#include <sys/kdebug.h>
 #include <sys/sdt.h>
 
 
-
 /* Forward: */
 extern boolean_t machine_exception(int, mach_exception_code_t, 
 		mach_exception_subcode_t, int *, mach_exception_subcode_t *);
@@ -610,6 +610,8 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint
 	        goto bad;
 	ml_fp_setvalid(FALSE);
 
+	/* Tell the PAL layer about the signal */
+	pal_set_signal_delivery( thread );
 
 	proc_lock(p);
 
diff --git a/bsd/dev/memdev.c b/bsd/dev/memdev.c
index fe07f5e53..c425c7e08 100644
--- a/bsd/dev/memdev.c
+++ b/bsd/dev/memdev.c
@@ -175,7 +175,7 @@ int mdevBMajor = -1;
 int mdevCMajor = -1;
 
 static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char);
-dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
+dev_t mdevadd(int devid, uint64_t base, unsigned int size, int phys);
 dev_t mdevlookup(int devid);
 void mdevremoveall(void);
 
@@ -543,7 +543,7 @@ char *cvtnum(char *pos, char *end, unsigned int *num) {		/* Convert to a number
 
 #endif /* CONFIG_MEMDEV_INSECURE */
 
-dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
+dev_t mdevadd(int devid, uint64_t base, unsigned int size, int phys) {
 	
 	int i;
 	
@@ -556,7 +556,7 @@ dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
 				continue;								/* Skip check */
 			}
 			if(!(((base + size -1 ) < mdev[i].mdBase) || ((mdev[i].mdBase + mdev[i].mdSize - 1) < base))) {	/* Is there any overlap? */
-				panic("mdevadd: attempt to add overlapping memory device at %08lX-%08lX\n", (long) mdev[i].mdBase, (long) mdev[i].mdBase + mdev[i].mdSize - 1);
+				panic("mdevadd: attempt to add overlapping memory device at %016llX-%016llX\n", mdev[i].mdBase, mdev[i].mdBase + mdev[i].mdSize - 1);
 			}
 		}
 		if(devid < 0) {									/* Do we have free slots? */
@@ -567,7 +567,7 @@ dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
 		if(devid >= 16) {								/* Giving us something bogus? */
 			panic("mdevadd: attempt to explicitly add a bogus memory device: %08X\n", devid);
 		}
-		if(mdev[devid].mdFlags &mdInited) {				/* Already there? */
+		if(mdev[devid].mdFlags & mdInited) {			/* Already there? */
 			panic("mdevadd: attempt to explicitly add a previously defined memory device: %08X\n", devid);
 		}
 	}
@@ -611,8 +611,8 @@ dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
 	mdev[devid].mdSecsize = DEV_BSIZE;					/* Set starting block size */
 	if(phys) mdev[devid].mdFlags |= mdPhys;				/* Show that we are in physical memory */
 	mdev[devid].mdFlags |= mdInited;					/* Show we are all set up */
-	printf("Added memory device md%x/rmd%x (%08X/%08X) at %08X for %08X\n", 
-		devid, devid, mdev[devid].mdBDev, mdev[devid].mdCDev, base << 12, size << 12);
+	printf("Added memory device md%x/rmd%x (%08X/%08X) at %016llX for %016llX\n", 
+		   devid, devid, mdev[devid].mdBDev, mdev[devid].mdCDev, base << 12, (uint64_t)size << 12);
 	return mdev[devid].mdBDev;
 }
 
diff --git a/bsd/dev/ppc/conf.c b/bsd/dev/ppc/conf.c
deleted file mode 100644
index acc9a8545..000000000
--- a/bsd/dev/ppc/conf.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1997 by Apple Computer, Inc., all rights reserved
- * Copyright (c) 1993 NeXT Computer, Inc.
- *
- * UNIX Device switch tables.
- *
- * HISTORY
- *
- * 30 July 1997 Umesh Vaishampayan (umeshv@apple.com)
- * 	enabled file descriptor pseudo-device.
- * 18 June 1993 ? at NeXT
- *	Cleaned up a lot of stuff in this file.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/ioctl.h>
-#include <sys/tty.h>
-#include <sys/conf.h>
-#include <machine/cons.h>
-
-
-struct bdevsw	bdevsw[] =
-{
-	/*
-	 *	For block devices, every other block of 8 slots is 
-	 *	reserved to NeXT.  The other slots are available for
-	 *	the user.  This way we can both add new entries without
-	 *	running into each other.  Be sure to fill in NeXT's
-	 *	8 reserved slots when you jump over us -- we'll do the
-	 *	same for you.
-	 */
-
-	/* 0 - 7 are reserved to NeXT */
-
-	NO_BDEVICE,							/* 0*/
-	NO_BDEVICE,							/* 1*/
-	NO_BDEVICE,							/* 2*/
-	NO_BDEVICE,							/* 3*/
-	NO_BDEVICE,							/* 4*/
-	NO_BDEVICE,							/* 5*/
-	NO_BDEVICE,							/* 6*/
-	NO_BDEVICE,							/* 7*/
-
-	/* 8 - 15 are reserved to the user */
-	NO_BDEVICE,							/* 8*/
-	NO_BDEVICE,							/* 9*/
-	NO_BDEVICE,							/*10*/
-	NO_BDEVICE,							/*11*/
-	NO_BDEVICE,							/*12*/
-	NO_BDEVICE,							/*13*/
-	NO_BDEVICE,							/*14*/
-	NO_BDEVICE,							/*15*/
-
-	/* 16 - 23 are reserved to NeXT */
-	NO_BDEVICE,							/*16*/
-	NO_BDEVICE,							/*17*/
-	NO_BDEVICE,							/*18*/
-	NO_BDEVICE,							/*18*/
-	NO_BDEVICE,							/*20*/
-	NO_BDEVICE,							/*21*/
-	NO_BDEVICE,							/*22*/
-	NO_BDEVICE,							/*23*/
-};
-
-int	nblkdev = sizeof (bdevsw) / sizeof (bdevsw[0]);
-
-extern struct tty *km_tty[];
-
-dev_t chrtoblk(dev_t dev);
-int chrtoblk_set(int cdev, int bdev);
-int iskmemdev(dev_t dev);
-
-
-/* XXX No support for linker sets, so must declare here */
-int cttyopen(dev_t dev, int flag, int mode, struct proc *p);
-int cttyread(dev_t dev, struct uio *uio, int flag);
-int cttywrite(dev_t dev, struct uio *uio, int flag);
-int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p);
-int cttyselect(dev_t dev, int flag, void* wql, struct proc *p);
-
-/* XXX bsd/dev/ppc/mem.c */
-int mmread(dev_t dev, struct uio *uio, int flag);
-int mmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
-int mmwrite(dev_t dev, struct uio *uio, int flag);
-
-#define	mmselect	(select_fcn_t *)seltrue
-
-#if 1
-#ifdef NPTY
-#undef NPTY
-#endif /* NPTY */
-#define NPTY 32
-#else /* 1 */
-#include <pty.h>
-#endif /* 1 */
-#if NPTY > 0
-extern struct tty *pt_tty[];
-extern d_open_t		ptsopen;
-extern d_close_t	ptsclose;
-extern d_read_t		ptsread;
-extern d_write_t	ptswrite;
-extern d_stop_t		ptsstop;
-extern d_open_t		ptcopen;
-extern d_close_t	ptcclose;
-extern d_read_t		ptcread;
-extern d_write_t	ptcwrite;
-extern d_select_t	ptcselect;
-extern d_ioctl_t	ptyioctl;
-#else
-#define ptsopen		eno_opcl
-#define ptsclose	eno_opcl
-#define ptsread		eno_rdwrt
-#define ptswrite	eno_rdwrt
-#define	ptsstop		nulldev
-
-#define ptcopen		eno_opcl
-#define ptcclose	eno_opcl
-#define ptcread		eno_rdwrt
-#define ptcwrite	eno_rdwrt
-#define	ptcselect	eno_select
-#define ptyioctl	eno_ioctl
-#endif
-
-extern d_open_t         logopen;
-extern d_close_t        logclose;
-extern d_read_t         logread;
-extern d_ioctl_t        logioctl;
-extern d_select_t       logselect;
-
-struct cdevsw	cdevsw[] =
-{
-	/*
-	 *	For character devices, every other block of 16 slots is
-	 *	reserved to NeXT.  The other slots are available for
-	 *	the user.  This way we can both add new entries without
-	 *	running into each other.  Be sure to fill in NeXT's
-	 *	16 reserved slots when you jump over us -- we'll do the
-	 *	same for you.
-	 */
-
-	/* 0 - 15 are reserved to NeXT */
-
-    {
-	consopen,	consclose,	consread,	conswrite,	/* 0*/
-	consioctl,	((stop_fcn_t *)&nulldev),
-					((reset_fcn_t *)&nulldev),
-							0,	consselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc, D_TTY
-   },
-    NO_CDEVICE,								/* 1*/
-    {
-	cttyopen,	((open_close_fcn_t *)&nulldev),
-					cttyread,	cttywrite,	/* 2*/
-	cttyioctl,	((stop_fcn_t *)&nulldev),
-					((reset_fcn_t *)&nulldev),
-							0,	cttyselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc,	D_TTY
-    },
-    {
-	((open_close_fcn_t *)&nulldev),
-			((open_close_fcn_t *)&nulldev),
-					mmread,		mmwrite,	/* 3*/
-	mmioctl,	((stop_fcn_t *)&nulldev),
-					((reset_fcn_t *)&nulldev),
-							0,	mmselect,
-	eno_mmap,		eno_strat,	eno_getc,	eno_putc,	D_DISK
-    },
-    {
-	ptsopen,	ptsclose,	ptsread,	ptswrite,	/* 4*/
-	ptyioctl,	ptsstop,	((reset_fcn_t *)&nulldev),
-							pt_tty,		ttselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc,	D_TTY
-    },
-    {
-	ptcopen,	ptcclose,	ptcread,	ptcwrite,	/* 5*/
-	ptyioctl,	((stop_fcn_t *)&nulldev),
-					((reset_fcn_t *)&nulldev),
-							0,		ptcselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc,	D_TTY
-    },
-    {
-	logopen,	logclose,	logread,	eno_rdwrt,	/* 6*/
-	logioctl,	eno_stop,	((reset_fcn_t *)&nulldev),
-							0,		logselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc,	0
-    },
-    NO_CDEVICE,								/* 7*/
-    NO_CDEVICE,								/* 8*/
-    NO_CDEVICE,								/* 9*/
-    NO_CDEVICE,								/*10*/
-    NO_CDEVICE,								/*11*/
-    {
-	kmopen,		kmclose,	kmread,		kmwrite,	/*12*/
-	kmioctl,	((stop_fcn_t *)&nulldev),
-					((reset_fcn_t *)&nulldev),
-							km_tty,		ttselect,
-	eno_mmap,	eno_strat,	eno_getc,	eno_putc,	0
-    },
-    NO_CDEVICE,								/*13*/
-    NO_CDEVICE,								/*14*/
-    NO_CDEVICE,								/*15*/
-
-	/* 16 - 31 are reserved to the user */
-    NO_CDEVICE,								/*16*/
-    NO_CDEVICE,								/*17*/
-    NO_CDEVICE,								/*18*/
-    NO_CDEVICE,								/*19*/
-    NO_CDEVICE,								/*20*/
-    NO_CDEVICE,								/*21*/
-    NO_CDEVICE,								/*22*/
-    NO_CDEVICE,								/*23*/
-    NO_CDEVICE,								/*24*/
-    NO_CDEVICE,								/*25*/
-    NO_CDEVICE,								/*26*/
-    NO_CDEVICE,								/*27*/
-    NO_CDEVICE,								/*28*/
-    NO_CDEVICE,								/*29*/
-    NO_CDEVICE,								/*30*/
-    NO_CDEVICE,								/*31*/
-
-	/* 32 - 47 are reserved to NeXT */
-    NO_CDEVICE,								/*32*/
-    NO_CDEVICE,								/*33*/
-    NO_CDEVICE,								/*34*/
-    NO_CDEVICE,								/*35*/
-    NO_CDEVICE,								/*36*/
-	/* 37 used to be for nvram */
-    NO_CDEVICE,								/*37*/
-    NO_CDEVICE,								/*38*/
-    NO_CDEVICE,								/*39*/
-    NO_CDEVICE,								/*40*/
-	/* 41 used to be for fd */
-    NO_CDEVICE,								/*41*/
-    NO_CDEVICE,								/*42*/
-};
-int	nchrdev = sizeof (cdevsw) / sizeof (cdevsw[0]);
-
-
-#include	<sys/vnode.h> /* for VCHR and VBLK */
-/*
- * return true if a disk
- */
-int
-isdisk(dev_t dev, int type)
-{
-	dev_t	maj = major(dev);
-
-	switch (type) {
-	case VCHR:
-		maj = chrtoblk(maj);
-		if (maj == NODEV) {
-			break;
-		}
-		/* FALL THROUGH */
-	case VBLK:
-		if (bdevsw[maj].d_type == D_DISK) {
-			return (1);
-		}
-		break;
-	}
-	return(0);
-}
-
-static int chrtoblktab[] = {
-	/* CHR*/	/* BLK*/	/* CHR*/	/* BLK*/
-	/*  0 */	NODEV,		/*  1 */	NODEV,
-	/*  2 */	NODEV,		/*  3 */	NODEV,
-	/*  4 */	NODEV,		/*  5 */	NODEV,
-	/*  6 */	NODEV,		/*  7 */	NODEV,
-	/*  8 */	NODEV,		/*  9 */	NODEV,
-	/* 10 */	NODEV,		/* 11 */	NODEV,
-	/* 12 */	NODEV,		/* 13 */	NODEV,
-	/* 14 */	6,		/* 15 */	NODEV,
-	/* 16 */	NODEV,		/* 17 */	NODEV,
-	/* 18 */	NODEV,		/* 19 */	NODEV,
-	/* 20 */	NODEV,		/* 21 */	NODEV,
-	/* 22 */	NODEV,		/* 23 */	NODEV,
-	/* 24 */	NODEV,		/* 25 */	NODEV,
-	/* 26 */	NODEV,		/* 27 */	NODEV,
-	/* 28 */	NODEV,		/* 29 */	NODEV,
-	/* 30 */	NODEV,		/* 31 */	NODEV,
-	/* 32 */	NODEV,		/* 33 */	NODEV,
-	/* 34 */	NODEV,		/* 35 */	NODEV,
-	/* 36 */	NODEV,		/* 37 */	NODEV,
-	/* 38 */	NODEV,		/* 39 */	NODEV,
-	/* 40 */	NODEV,		/* 41 */	1,
-	/* 42 */	NODEV,		/* 43 */	NODEV,
-	/* 44 */	NODEV,
-};
-
-/*
- * convert chr dev to blk dev
- */
-dev_t
-chrtoblk(dev_t dev)
-{
-	int blkmaj;
-
-	if (major(dev) >= nchrdev)
-		return(NODEV);
-	blkmaj = chrtoblktab[major(dev)];
-	if (blkmaj == NODEV)
-		return(NODEV);
-	return(makedev(blkmaj, minor(dev)));
-}
-
-int
-chrtoblk_set(int cdev, int bdev)
-{
-	if (cdev >= nchrdev)
-		return (NODEV);
-	if (bdev != NODEV && bdev >= nblkdev)
-		return (NODEV);
-	chrtoblktab[cdev] = bdev;
-	return 0;
-}
-
-/*
- * Returns true if dev is /dev/mem or /dev/kmem.
- */
-int
-iskmemdev(dev_t dev)
-{
-
-	return (major(dev) == 3 && minor(dev) < 2);
-}
diff --git a/bsd/dev/ppc/cons.c b/bsd/dev/ppc/cons.c
deleted file mode 100644
index 207ee03ae..000000000
--- a/bsd/dev/ppc/cons.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 
- * Copyright (c) 1987, 1988 NeXT, Inc.
- *
- * HISTORY
- *  7-Jan-93  Mac Gillon (mgillon) at NeXT
- *	Integrated POSIX support
- *
- * 12-Aug-87  John Seamons (jks) at NeXT
- *	Ported to NeXT.
- */ 
-
-/*
- * Indirect driver for console.
- */
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/ioctl.h>
-#include <sys/tty.h>
-#include <sys/proc.h>
-#include <sys/uio.h>
-#include <machine/cons.h>
-
-struct tty	*constty;	/* current console device */
-
-/*
- * The km driver supplied the default console device for the systems
- * (usually a raw frame buffer driver, but potentially a serial driver).
- */
-extern struct tty *km_tty[1];
-
-static dev_t
-cndev(void)
-{
-        if (constty)
-                return constty->t_dev;
-        else
-                return km_tty[0]->t_dev;
-}
-
-/*ARGSUSED*/
-int
-consopen(__unused dev_t dev, int flag, int devtype, struct proc *pp)
-{
-	dev = cndev();
-	return ((*cdevsw[major(dev)].d_open)(dev, flag, devtype, pp));
-}
-
-
-/*ARGSUSED*/
-int
-consclose(__unused dev_t dev, int flag, int mode, struct proc *pp)
-{
-	dev = cndev();
-	return ((*cdevsw[major(dev)].d_close)(dev, flag, mode, pp));
-}
-
-
-/*ARGSUSED*/
-int
-consread(__unused dev_t dev, struct uio *uio, int ioflag)
-{
-	dev = cndev();
-	return ((*cdevsw[major(dev)].d_read)(dev, uio, ioflag));
-}
-
-
-/*ARGSUSED*/
-int
-conswrite(__unused dev_t dev, struct uio *uio, int ioflag)
-{
-	dev = cndev();
-	return ((*cdevsw[major(dev)].d_write)(dev, uio, ioflag));
-}
-
-
-/*ARGSUSED*/
-int
-consioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
-{
-	dev = cndev();
-#if 0
-	/*
-	 * Superuser can always use this to wrest control of console
-	 * output from the "virtual" console.
-	 *
-	 * XXX Unfortunately, this code doesn't do what the author thougt
-	 * XXX it did; use of the console device, a TIOCCONS would always
-	 * XXX disassociate the console from a virtual terminal and send
-	 * XXX it back to the fake tty.
-	 */
-	if ((unsigned) cmd == TIOCCONS && constty) {
-		int error = proc_suser(p);
-		if (!error) {
-			constty = NULL;
-		}
-		return(error);
-	}
-#endif	/* 0 */
-
-	return ((*cdevsw[major(dev)].d_ioctl)(dev, cmd, addr, flag, p));
-}
-
-
-/*ARGSUSED*/
-/* called with funnel held */
-int
-consselect(__unused dev_t dev, int flag, void *wql, struct proc *p)
-{
-	dev = cndev();
-	return ((*cdevsw[major(dev)].d_select)(dev, flag, wql, p));
-}
diff --git a/bsd/dev/ppc/dtrace_isa.c b/bsd/dev/ppc/dtrace_isa.c
deleted file mode 100644
index 21b49bdc4..000000000
--- a/bsd/dev/ppc/dtrace_isa.c
+++ /dev/null
@@ -1,589 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
-#include <kern/thread.h>
-#include <mach/thread_status.h>
-#include <stdarg.h>
-#include <string.h>
-#include <sys/malloc.h>
-#include <sys/time.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/proc_internal.h>
-#include <sys/kauth.h>
-#include <sys/dtrace.h>
-#include <sys/dtrace_impl.h>
-#include <libkern/OSAtomic.h>
-#include <kern/thread_call.h>
-#include <kern/task.h>
-#include <kern/sched_prim.h>
-#include <miscfs/devfs/devfs.h>
-#include <mach/vm_param.h>
-#include <machine/cpu_capabilities.h>
-
-extern dtrace_id_t      dtrace_probeid_error;   /* special ERROR probe */
-
-void
-dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
-    int fltoffs, int fault, uint64_t illval)
-{
-	/*
-	 * dtrace_getarg() is a lost cause on PPC. For the case of the error probe firing lets
-	 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
-	 */
-	state->dts_arg_error_illval = illval;
-	dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
-}
-
-/*
- * Atomicity and synchronization
- */
-void
-dtrace_membar_producer(void)
-{
-	__asm__ volatile("sync");
-}
-
-void
-dtrace_membar_consumer(void)
-{
-	__asm__ volatile("isync");
-}
-
-/*
- * Interrupt manipulation
- * XXX dtrace_getipl() can be called from probe context.
- */
-int
-dtrace_getipl(void)
-{
-	return (ml_at_interrupt_context() ? 1: 0);
-}
-
-/*
- * MP coordination
- */
-typedef void (*broadcastFunc) (uint32_t);
-
-int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t); /* osfmk/ppc/machine_cpu.h */
-
-typedef struct xcArg {
-	processorid_t cpu;
-	dtrace_xcall_t f;
-	void *arg;
-	uint32_t waitVar;
-} xcArg_t;
-
-static void
-xcRemote( uint32_t foo )
-{
-	xcArg_t *pArg = (xcArg_t *)foo;
-	
-	if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) {
-		(pArg->f)(pArg->arg);
-	}
-	
-    if(!hw_atomic_sub(&(pArg->waitVar), 1)) {      /* Drop the wait count */
-        thread_wakeup((event_t)&(pArg->waitVar));  /* If we were the last, wake up the signaller */
-    }
-}
-
-/*
- * dtrace_xcall() is not called from probe context.
- */
-void
-dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
-{
-	xcArg_t xcArg;
-	
-	/* Talking to ourselves, are we? */
-	if ( cpu == CPU->cpu_id ) {
-		(*f)(arg);
-		return;
-	}
-	
-	if ( cpu == DTRACE_CPUALL ) {
-		(*f)(arg);
-	}
-	
-	xcArg.cpu = cpu;
-	xcArg.f = f;
-	xcArg.arg = arg;
-    xcArg.waitVar = 0;
-
-	(void)cpu_broadcast(&(xcArg.waitVar), xcRemote, (uint32_t)&xcArg);
-}
-
-/*
- * Runtime and ABI
- */
-uint64_t
-dtrace_getreg(struct regs *savearea, uint_t reg)
-{
-	ppc_saved_state_t *regs = (ppc_saved_state_t *)savearea;
-    uint64_t mask = (_cpu_capabilities & k64Bit) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
-	
-	/* See osfmk/ppc/savearea.h */
-	if (reg > 68) { /* beyond mmcr2 */
-		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
-		return (0);
-	}
-	
-	switch (reg) {
-		/* First 38 registers are saved to 64 bits r0-r31, srr0, srr1, xer, lr, ctr, dar. */
-		default:
-			return (((uint64_t *)(&(regs->save_r0)))[reg]) & mask;
-
-		/* Handle the 32-bit registers */
-		case 38: case 39: case 40: case 41: /* cr, dsisr, exception, vrsave */
-		case 42: case 43: case 44: case 45: /* vscr[4] */
-		case 46: case 47: case 48: case 49:     /* fpscrpad, fpscr, save_1d8[2] */
-		case 50: case 51: case 52: case 53: /* save_1E0[8] */
-		case 54: case 55: case 56: case 57: 
-		case 58: case 59: case 60: case 61: /* save_pmc[8] */
-		case 62: case 63: case 64: case 65: 
-			return (uint64_t)(((unsigned int *)(&(regs->save_cr)))[reg - 38]);
-			
-		case 66:
-			return regs->save_mmcr0 & mask;
-		case 67:
-			return regs->save_mmcr1 & mask;
-		case 68:
-			return regs->save_mmcr2 & mask;
-	}
-}
-
-#define RETURN_OFFSET 8
-#define RETURN_OFFSET64 16
-#define REGPC save_srr0
-#define REGSP save_r1
-
-/*
- * XXX dtrace_getustack_common() can be called from probe context.
- */
-static int
-dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc,
-    user_addr_t sp)
-{
-#if 0
-	volatile uint16_t *flags =
-	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
-
-	uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl*/
-	size_t s1, s2;
-#endif
-	int ret = 0;
-	boolean_t is64Bit = proc_is64bit(current_proc());
-
-	ASSERT(pcstack == NULL || pcstack_limit > 0);
-	
-#if 0 /* XXX signal stack crawl*/
-	if (p->p_model == DATAMODEL_NATIVE) {
-		s1 = sizeof (struct frame) + 2 * sizeof (long);
-		s2 = s1 + sizeof (siginfo_t);
-	} else {
-		s1 = sizeof (struct frame32) + 3 * sizeof (int);
-		s2 = s1 + sizeof (siginfo32_t);
-	}
-#endif
-
-	while (pc != 0) {
-		ret++;
-		if (pcstack != NULL) {
-			*pcstack++ = (uint64_t)pc;
-			pcstack_limit--;
-			if (pcstack_limit <= 0)
-				break;
-		}
-
-		if (sp == 0)
-			break;
-
-#if 0 /* XXX signal stack crawl*/
-		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
-			if (p->p_model == DATAMODEL_NATIVE) {
-				ucontext_t *ucp = (ucontext_t *)oldcontext;
-				greg_t *gregs = ucp->uc_mcontext.gregs;
-
-				sp = dtrace_fulword(&gregs[REG_FP]);
-				pc = dtrace_fulword(&gregs[REG_PC]);
-
-				oldcontext = dtrace_fulword(&ucp->uc_link);
-			} else {
-				ucontext32_t *ucp = (ucontext32_t *)oldcontext;
-				greg32_t *gregs = ucp->uc_mcontext.gregs;
-
-				sp = dtrace_fuword32(&gregs[EBP]);
-				pc = dtrace_fuword32(&gregs[EIP]);
-
-				oldcontext = dtrace_fuword32(&ucp->uc_link);
-			}
-		} 
-		else
-#endif
-		{
-			if (is64Bit) {
-				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
-				sp = dtrace_fuword64(sp);
-			} else {
-				pc = dtrace_fuword32((sp + RETURN_OFFSET));
-				sp = dtrace_fuword32(sp);
-			}
-		}
-	}
-
-	return (ret);
-}
-
-void
-dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
-{
-	thread_t thread = current_thread();
-	ppc_saved_state_t *regs;
-	user_addr_t pc, sp;
-	volatile uint16_t *flags =
-	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
-	int n;
-	boolean_t is64Bit = proc_is64bit(current_proc());
-
-	if (*flags & CPU_DTRACE_FAULT)
-		return;
-
-	if (pcstack_limit <= 0)
-		return;
-
-	/*
-	 * If there's no user context we still need to zero the stack.
-	 */
-	if (thread == NULL)
-		goto zero;
-
-	regs = (ppc_saved_state_t *)find_user_regs(thread);
-	if (regs == NULL)
-		goto zero;
-		
-	*pcstack++ = (uint64_t)proc_selfpid();
-	pcstack_limit--;
-
-	if (pcstack_limit <= 0)
-		return;
-
-	pc = regs->REGPC;
-	sp = regs->REGSP;
-
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
-		*pcstack++ = (uint64_t)pc;
-		pcstack_limit--;
-		if (pcstack_limit <= 0)
-			return;
-
-		pc = regs->save_lr;
-	}
-	
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_USTACK_FP)) {
-		/*
-		 * If the ustack fp flag is set, the stack frame from sp to
-		 * fp contains no valid call information. Start with the fp.
-		 */
-		if (is64Bit)
-			sp = dtrace_fuword64(sp);
-		else
-			sp = (user_addr_t)dtrace_fuword32(sp);
-	}
-
-	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp);
-	ASSERT(n >= 0);
-	ASSERT(n <= pcstack_limit);
-
-	pcstack += n;
-	pcstack_limit -= n;
-
-zero:
-	while (pcstack_limit-- > 0)
-		*pcstack++ = 0;
-}
-
-int
-dtrace_getustackdepth(void)
-{
-	thread_t thread = current_thread();
-	ppc_saved_state_t *regs;
-	user_addr_t pc, sp;
-	int n = 0;
-	boolean_t is64Bit = proc_is64bit(current_proc());
-
-	if (thread == NULL)
-		return 0;
-
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
-		return (-1);
-
-	regs = (ppc_saved_state_t *)find_user_regs(thread);
-	if (regs == NULL)
-		return 0;
-
-	pc = regs->REGPC;
-	sp = regs->REGSP;
-
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
-		n++;
-		pc = regs->save_lr;
-	}
-	
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_USTACK_FP)) {
-		/*
-		 * If the ustack fp flag is set, the stack frame from sp to
-		 * fp contains no valid call information. Start with the fp.
-		 */
-		if (is64Bit)
-			sp = dtrace_fuword64(sp);
-		else
-			sp = (user_addr_t)dtrace_fuword32(sp);
-	}
-
-	n += dtrace_getustack_common(NULL, 0, pc, sp);
-
-	return (n);
-}
-
-void
-dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
-{
-	thread_t thread = current_thread();
-	ppc_saved_state_t *regs;
-	user_addr_t pc, sp;
-	volatile uint16_t *flags =
-	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
-#if 0
-	uintptr_t oldcontext;
-	size_t s1, s2;
-#endif
-	boolean_t is64Bit = proc_is64bit(current_proc());
-
-	if (*flags & CPU_DTRACE_FAULT)
-		return;
-
-	if (pcstack_limit <= 0)
-		return;
-
-	/*
-	 * If there's no user context we still need to zero the stack.
-	 */
-	if (thread == NULL)
-		goto zero;
-
-	regs = (ppc_saved_state_t *)find_user_regs(thread);
-	if (regs == NULL)
-		goto zero;
-		
-	*pcstack++ = (uint64_t)proc_selfpid();
-	pcstack_limit--;
-
-	if (pcstack_limit <= 0)
-		return;
-
-	pc = regs->REGPC;
-	sp = regs->REGSP;
-	
-#if 0 /* XXX signal stack crawl*/
-	oldcontext = lwp->lwp_oldcontext;
-
-	if (p->p_model == DATAMODEL_NATIVE) {
-		s1 = sizeof (struct frame) + 2 * sizeof (long);
-		s2 = s1 + sizeof (siginfo_t);
-	} else {
-		s1 = sizeof (struct frame32) + 3 * sizeof (int);
-		s2 = s1 + sizeof (siginfo32_t);
-	}
-#endif
-
-	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
-		*pcstack++ = (uint64_t)pc;
-		*fpstack++ = 0;
-		pcstack_limit--;
-		if (pcstack_limit <= 0)
-			return;
-
-		/*
-		 * XXX This is wrong, but we do not yet support stack helpers.
-		 */
-		if (is64Bit)
-			pc = dtrace_fuword64(sp);
-		else
-			pc = dtrace_fuword32(sp);
-	}
-
-	while (pc != 0) {
-		*pcstack++ = (uint64_t)pc;
-		*fpstack++ = sp;
-		pcstack_limit--;
-		if (pcstack_limit <= 0)
-			break;
-
-		if (sp == 0)
-			break;
-
-#if 0 /* XXX signal stack crawl*/
-		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
-			if (p->p_model == DATAMODEL_NATIVE) {
-				ucontext_t *ucp = (ucontext_t *)oldcontext;
-				greg_t *gregs = ucp->uc_mcontext.gregs;
-
-				sp = dtrace_fulword(&gregs[REG_FP]);
-				pc = dtrace_fulword(&gregs[REG_PC]);
-
-				oldcontext = dtrace_fulword(&ucp->uc_link);
-			} else {
-				ucontext_t *ucp = (ucontext_t *)oldcontext;
-				greg_t *gregs = ucp->uc_mcontext.gregs;
-
-				sp = dtrace_fuword32(&gregs[EBP]);
-				pc = dtrace_fuword32(&gregs[EIP]);
-
-				oldcontext = dtrace_fuword32(&ucp->uc_link);
-			}
-		} 
-		else
-#endif
-		{
-			if (is64Bit) {
-				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
-				sp = dtrace_fuword64(sp);
-			} else {
-				pc = dtrace_fuword32((sp + RETURN_OFFSET));
-				sp = dtrace_fuword32(sp);
-			}
-		}
-	}
-
-zero:
-	while (pcstack_limit-- > 0)
-		*pcstack++ = 0;
-}
-
-void
-dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
-    uint32_t *intrpc)
-{
-	struct frame *fp = (struct frame *)__builtin_frame_address(0);
-	struct frame *nextfp, *minfp, *stacktop;
-	int depth = 0;
-	int last = 0;
-	uintptr_t pc;
-	uintptr_t caller = CPU->cpu_dtrace_caller;
-	int on_intr;
-
-	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
-		stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
-	else
-		stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
-
-	minfp = fp;
-
-	aframes++;
-
-	if (intrpc != NULL && depth < pcstack_limit)
-		pcstack[depth++] = (pc_t)intrpc;
-
-	while (depth < pcstack_limit) {
-		nextfp = *(struct frame **)fp;
-		pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET);
-
-		if (nextfp <= minfp || nextfp >= stacktop) {
-			if (on_intr) {
-				/*
-				 * Hop from interrupt stack to thread stack.
-				 */
-				vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
-
-				minfp = (struct frame *)kstack_base;
-				stacktop = (struct frame *)(kstack_base + kernel_stack_size);
-
-				on_intr = 0;
-				continue;
-			}
-			/*
-			 * This is the last frame we can process; indicate
-			 * that we should return after processing this frame.
-			 */
-			last = 1;
-		}
-
-		if (aframes > 0) {
-			if (--aframes == 0 && caller != 0) {
-				/*
-				 * We've just run out of artificial frames,
-				 * and we have a valid caller -- fill it in
-				 * now.
-				 */
-				ASSERT(depth < pcstack_limit);
-				pcstack[depth++] = (pc_t)caller;
-				caller = 0;
-			}
-		} else {
-			if (depth < pcstack_limit)
-				pcstack[depth++] = (pc_t)pc;
-		}
-
-		if (last) {
-			while (depth < pcstack_limit)
-				pcstack[depth++] = 0;
-			return;
-		}
-
-		fp = nextfp;
-		minfp = fp;
-	}
-}
-
-uint64_t
-dtrace_getarg(int arg, int aframes)
-{
-#pragma unused(arg,aframes)
-	return 0xfeedfacedeafbeadLL; /* XXX Only called for arg >= 5 */
-}
-
-/*
- * Load/Store Safety
- */
-
-void
-dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
-{
-	/*
-	 * "base" is the smallest toxic address in the range, "limit" is the first
-	 * VALID address greater than "base".
-	 */
-	func(0x0, VM_MIN_KERNEL_ADDRESS);
-	if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0)
-			func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
-}
-
-extern void *mapping_phys_lookup(ppnum_t, unsigned int *);
-
diff --git a/bsd/dev/ppc/dtrace_subr_ppc.c b/bsd/dev/ppc/dtrace_subr_ppc.c
deleted file mode 100644
index 5040a9183..000000000
--- a/bsd/dev/ppc/dtrace_subr_ppc.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * #pragma ident	"@(#)dtrace_subr.c	1.12	05/06/08 SMI"
- */
-
-#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
-#include <sys/dtrace.h>
-#include <sys/dtrace_glue.h>
-#include <sys/dtrace_impl.h>
-#include <sys/fasttrap.h>
-#include <sys/vm.h>
-#include <sys/user.h>
-#include <sys/kauth.h>
-#include <kern/debug.h>
-
-int (*dtrace_pid_probe_ptr)(ppc_saved_state_t *);
-int (*dtrace_return_probe_ptr)(ppc_saved_state_t *);
-kern_return_t dtrace_user_probe(ppc_saved_state_t *sv);
-
-kern_return_t
-dtrace_user_probe(ppc_saved_state_t *sv)
-{
-
-	lck_rw_t *rwp;
-	struct proc *p = current_proc();
-
-	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
-	/*
-	 * DTrace accesses t_cred in probe context.	 t_cred
-	 * must always be either NULL, or point to a valid,
-	 * allocated cred structure.
-	 */
-	kauth_cred_uthread_update(uthread, p);
-
-	if (sv->save_exception == T_DTRACE_RET) {
-
-/*
- *		T_DTRACE_RET is generated by the kernel when an emulation sequence 
- *		ends.  Unlike the x86 implementation, this can not be caused by
- *		a user state trap instruction.  It is a system error if it occurs
- *		when not stepping and is, therefore, a panickable offence.
- */
-
-		if(uthread->t_dtrace_step == 0) {	/* Are we supposed to be tracing? */
-			panic("dtrace_user_probe: T_DTRACE_RET when not stepping\n");
-		}
-
-		if (uthread->t_dtrace_ast) {
-			printf("dtrace_user_probe() should be calling aston()\n");
-			// aston(uthread);
-			// uthread->t_sig_check = 1;
-		}
-
-		/*
-		 * Clear all user tracing flags.
-		 */
-		uthread->t_dtrace_ft = 0;
-
-		/*
-		 * We need to wait until after we've called the
-		 * dtrace_return_probe_ptr function pointer to step the pc.
-		 */
-		rwp = &CPU->cpu_ft_lock;
-		lck_rw_lock_shared(rwp);
-
-		if (dtrace_return_probe_ptr != NULL) (void)(*dtrace_return_probe_ptr)(sv);
-		lck_rw_unlock_shared(rwp);
-
-		sv->save_srr0 = sv->save_srr0 + 4;	/* Step to next instruction */
-		if(!(sv->save_srr1 & 0x8000000000000000ULL)) sv->save_srr0 &= 0x00000000FFFFFFFF;	/* Trim if in 32-bit mode */
-
-		return KERN_SUCCESS;
-		
-	} else {
-
-/*
- *	We have taken our normal trap to get here.  Make sure we expect it
- */
-		uint32_t instr;
-		rwp = &CPU->cpu_ft_lock;
-
-		/*
-		 * The DTrace fasttrap provider uses a trap, "twi 31,r31,0xDDDD".
-		 * We will only be here if dtrace (or someone pretending to be us)
-		 * sets the trap.
-		 * We let DTrace take the first crack at handling
-		 * this trap; if it's not a probe that DTrace knowns about,
-		 * we call into the trap() routine to handle it like a
-		 * breakpoint placed by a conventional debugger.
-		 */
-
-		/*
-		 * APPLE NOTE: I believe the purpose of the reader/writers lock
-		 * is thus: There are times which dtrace needs to prevent calling
-		 * dtrace_pid_probe_ptr(). Sun's original impl grabbed a plain
-		 * mutex here. However, that serialized all probe calls, and
-		 * destroyed MP behavior. So now they use a RW lock, with probes
-		 * as readers, and the top level synchronization as a writer.
-		 */
-		lck_rw_lock_shared(rwp);
-		if (dtrace_pid_probe_ptr != NULL && 
-			(*dtrace_pid_probe_ptr)(sv) == 0) {
-			lck_rw_unlock_shared(rwp);
-			return KERN_SUCCESS;
-		}
-		lck_rw_unlock_shared(rwp);
-
-		/*
-		 * If the instruction that caused the breakpoint trap doesn't
-		 * look like our trap anymore, it may be that this tracepoint
-		 * was removed just after the user thread executed it. In
-		 * that case, return to user land to retry the instuction.
-		 *
-		 * Note that the PC is correct because we do not advance it until after emulation.
-		 */
-		if (fuword32(sv->save_srr0, &instr) == 0 && instr != FASTTRAP_INSTR) {
-			return KERN_SUCCESS;
-		}
-
-	}
-
-/*
- *	If we get here, we go back to throw an exception
- */
-
-	return KERN_FAILURE;
-}
-
-void
-dtrace_safe_synchronous_signal(void)
-{
-// This is commented out of the x86 code and is never called.
-}
-
-int
-dtrace_safe_defer_signal(void)
-{
-// This is commented out of the x86 code and is never called.
-	return 0;
-}
diff --git a/bsd/dev/ppc/fasttrap_isa.c b/bsd/dev/ppc/fasttrap_isa.c
deleted file mode 100644
index 10e2edd08..000000000
--- a/bsd/dev/ppc/fasttrap_isa.c
+++ /dev/null
@@ -1,734 +0,0 @@
-/*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * #pragma ident	"@(#)fasttrap_isa.c	1.27	08/04/09 SMI"
- */
-
-#ifdef KERNEL
-#ifndef _KERNEL
-#define _KERNEL /* Solaris vs. Darwin */
-#endif
-#endif
-
-#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
-#include <sys/fasttrap_isa.h>
-#include <sys/fasttrap_impl.h>
-#include <sys/dtrace.h>
-#include <sys/dtrace_impl.h>
-#include <sys/dtrace_ptss.h>
-#include <kern/debug.h>
-#include <ppc/decodePPC.h>
-#include <kern/task.h>
-#include <mach/vm_param.h>
-#include <mach/mach_vm.h>
-#include <mach/task.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h> /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */
-extern dtrace_id_t dtrace_probeid_error;
-
-/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
-#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
-
-static int32_t branchtaken(int32_t bo, int32_t bi, ppc_saved_state_t *sv);
-static int32_t dtrace_decode_ppc(uint32_t inst);
-int patchInst(task_t task, addr64_t vaddr, uint32_t inst);
-kern_return_t dtrace_user_probe(ppc_saved_state_t *sv);
-
-/*
- * Lossless User-Land Tracing on PPC
- * ---------------------------------
- *
- * PPC uses a different technique to emulate user-land instruction replaces by a probe
- * trap than x86.
- *
- * Like x86, it will emulate all forms of branch instructions.  We will not attempt
- * to emulate any instruction that we know will cause an interruption or exception
- * (system call, trap, privileged instruction, instruction that uses a privileged
- * register).
- *
- * NOTE: I am thinking that we should punish tight loopers, e.g., branch-to-dot.
- * Depending upon clock resolution and how fast we can process these guys, it is
- * possible that its quantum will never decrease.  Maybe we could just manually
- * end the guy's quantum and let the next guy go...
- *
- * When fasttrap_tracepoint_init is called, we fetch the instruction and decode it.
- * If we don't recognize it or find it is a "banned" instruction, we return -1,
- * telling our caller to forget it.  Otherwise we save the instruction image and
- * enough of the decode to quickly handle it at probe time.  We cram it into
- * the fasttrap_machtp_t structure.
- *
- * When the probe hits, we verify that the PC is still a probe point and if not,
- * we bail.  Otherwise we have a bit more to do.
- *
- * If DTFTP_ENTRY is set, we have an entry probe and need to call dtrace_probe.
- *
- * If DTFTP_IS_ENABLED is set, all we need to do is to return a 1.
- *
- * If ftp_argmap is NULL, we call dtrace_probe
- *
- * Otherwise, we figure out what the arguments are and pass them to dtrace_probe
- *
- * Next, we need to set up to emulate the probed instruction and here is where we are
- * the most different than the x86 code.
- *
- * Like x86, we first check to see if the instruction is any form of branch.  If so, 
- * we emulate it completely within the kernel and are done.
- *
- * If it is anything else, we build a code stream within the kernel to execute the
- * instruction.  Note that this is very different from x86 which build the code in
- * userland.
- *
- * The generated stream needs to be executed within the kernel's code space but with
- * the user address space and registers.  Because PPC allows different translation modes
- * for instruction fetch and data fetch, this is not too difficult.
- *
- * There are two kinds streams needed: execute and continue, and execute and return,
- * which are used for entry/offset and exit probes respectivily. 
- *
- * The probe code will copy the instruction image into the current user savearea (which
- * also contains the complete user state register context).  A flag that requests either
- * execute/continue or execute/return is also set in the savearea.
- *
- * We now exit the dtrace code and the marked context makes its way back to the point
- * where it will be dispatched on the processor.
- *
- * The exception return code will start to restore the user context, including registers
- * and address space.  However, before dispatching the user, it will notice that the
- * emulate flags are set.  At this point the code will build a code stream 
- * in an area in the per_proc that consists of
- * the original instruction followed by a trap instruction.  It will set the new MSR (in
- * SRR1) to have address translation enable for data, translation disabled for instruction
- * fetches, interruptions disabled, and supervisor state.
- *
- * The new PC and MSR are loaded via a RFID and the generated stream is executed. If a
- * synchronous fault occurs, it is either handled (PTE miss, FPU or vector unavailable),
- * emulated (alignment or denorm), or passed on to the user.
- *
- * Assuming the emulated instruction completes, the trap will execute.  When that happens, 
- * low-level trap handler will check its flags.  If the trap corresponds to an
- * execute/continue stream, the trap handler will adjust the PC and complete the
- * transition into user space. 
- *
- * If the trap corresponds to an execute/return stream, the handler will generate 
- * a T_DTRACE_RET exception and let the trap handler pass it along to dtrace_user_probe.
- *
- */
-
-
-static uint64_t
-fasttrap_anarg(ppc_saved_state_t *sv, int function_entry, int argno)
-{
-#pragma unused(function_entry)
-	uint32_t farg;
- 	uint64_t value;
- 	
- 	/* The first 8 arguments (argno 0-7) are in registers */
- 	if (argno < 8) {
- 		value = (&sv->save_r3)[argno];
- 	} else {
- 		if (sv->save_srr1 & 0x8000000000000000ULL) {
- 			/* 64-bit */
- 			/* Grab argument >= 8 from stack */
- 			fasttrap_fuword64_noerr(sv->save_r1 + 48 + ((argno)* sizeof(uint64_t)), &value);
- 		} else {
- 			/* 32-bit */
-			/* Grab argument >= 8 from stack */
- 			fasttrap_fuword32_noerr(sv->save_r1 + 24 + ((argno) * sizeof(uint32_t)), &farg);
-			value = (uint64_t)farg;
- 		}
- 	}
- 	
- 	return (value);
-}
-
-/*ARGSUSED*/
-int
-fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc,
-    fasttrap_probe_type_t type)
-{
-#pragma unused(type)
-
-	uint32_t instr, testr1, testr2, testr3;
-	user_addr_t targpc;
-	int32_t target, optype;
-
-	/*
-	 * Read the instruction at the given address out of the process's
-	 * address space. We don't have to worry about a debugger
-	 * changing this instruction before we overwrite it with our trap
-	 * instruction since P_PR_LOCK is set. Since instructions can span
-	 * pages, we potentially read the instruction in two parts. If the
-	 * second part fails, we just zero out that part of the instruction.
-	 */
-	/*
-	 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
-	 */
-
-	if (uread(p, &instr, 4, pc) != 0) return (-1);	/* Grab instruction, return suddenly if read fails... */
-		
-	optype = dtrace_decode_ppc(instr);		/* See if we have an instruction we can probe */
-	
-	tp->ftt_instr = instr;					/* Save the instruction image */
-	testr1 = tp->ftt_bo = (uint8_t)((instr >> (31 - 10)) & 0x1F);	/* Extract branch options */
-	testr2 = tp->ftt_bi = (uint8_t)((instr >> (31 - 15)) & 0x1F);	/* Extract condition register bit */
-	testr3 = (instr >> (31 - 20)) & 0x1F;	/* Get that last register */
-	tp->ftt_flgs = (uint8_t)(instr & 3);	/* Set the absolute address and link flags */
-
-	switch(optype) {						/* Do instruction specific decode */
-		
-		case diCMN:							/* Common instruction */
-			tp->ftt_type = ftmtCommon;		/* Mark as common instruction */
-			break;
-			
-		case diINV: 						/* Invalid */
-		case diTRP:							/* Trap */
-		case diSC:							/* System Call */
-		case diRFI: 						/* Return from interrupt */
-		case diPRV:							/* Priviliged instruction */
-			return (-1);					/* We will not emulate these... */
-			break;
-		
-		case diB:							/* Branch */
-			tp->ftt_type = ftmtB;			/* Mark as branch instruction */
-			target = instr & 0x03FFFFFC;	/* Extract address or offset */
-			if(target & 0x02000000) target |= 0xFC000000;	/* Sign extend */
-			tp->ftt_trgt = target;			/* Trim back down and save */
-			
-			targpc = (user_addr_t)((int64_t)target);	/* Generate a target address, hopefully we sign extend... */
-			if(!(tp->ftt_flgs & ftmtAbs)) {	/* Are we dealing with an offset here? */
-				targpc = targpc + pc;		/* Apply offset to get target address */
-			}
-			
-			if(targpc == pc) return -1;		/* Branching to self is a sin and is forbidden... */
-			break;
-			
-		case diBC:							/* Branch conditional */
-			tp->ftt_type = ftmtBC;			/* Mark as branch conditional */
-			target = instr & 0x0000FFFC;	/* Extract address or offset */
-			if(target & 0x00008000) target |= 0xFFFF0000;	/* Sign extend */
-			tp->ftt_trgt = target;			/* Trim back down and save */
-			
-			targpc = (user_addr_t)((int64_t)target);	/* Generate a target address, hopefully we sign extend... */
-			if(!(tp->ftt_flgs & ftmtAbs)) {		/* Are we dealing with an offset here? */
-				targpc = targpc + pc;		/* Apply offset to get target address */
-			}
-			
-			if(targpc == pc) return -1;		/* Branching to self is a sin and is forbidden... */
-			break;
-			
-		case diBLR:							/* Branch conditional to link register */
-			tp->ftt_type = ftmtBLR;			/* Mark as branch conditional to link register */
-			break;
-			
-		case diBCTR:						/* Branch conditional to count register */
-			tp->ftt_type = ftmtBCTR;		/* Mark as branch conditional to count register */
-			break;
-			
-		case diOR:							/* OR */
-			if((instr >> 26) == 24) {		/* Is this the ORI nop? */
-				if((testr1 == testr2) && ((instr & 0x0000FFFF) == 0)) tp->ftt_type = ftmtNOP;	/* Remember if this is a NOP instruction */
-				else tp->ftt_type = ftmtCommon;	/* Otherwise it is a common ORI instruction */
-			}
-			else if((testr1 == testr2) && (testr1 == testr3)) tp->ftt_type = ftmtNOP;	/* If all three registers are the same, this is a NOP */
-			else tp->ftt_type = ftmtCommon;	/* Otherwise it is a common OR instruction */
-
-			break;
-			
-		default:
-			panic("fasttrap_tracepoint_init: invalid branch decode, inst = %08X, optype = %d\n", instr, optype);
-			break;
-			
-	}
-
-	return (0);
-}
-
-int
-fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
-{
-	return patchInst(p->task, tp->ftt_pc, FASTTRAP_INSTR);	/* Patch the instruction and flush it */
-}
-
-extern void dbgTrace(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t);
-
-int
-fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
-{
-	uint32_t instr;
-
-	/*
-	 * Distinguish between read or write failures and a changed
-	 * instruction.
-	 */
-	if (uread(p, &instr, 4, tp->ftt_pc) != 0) return (0);	/* Get the instruction, but exit if not mapped */
-
-//	dbgTrace(0x99999999, (uint32_t)tp->ftt_pc, tp->ftt_instr, instr, 0);	/* (TRACE/DEBUG) */
-
-	if (instr != FASTTRAP_INSTR) return (0);	/* Did someone change it? If so, just leave */
-
-	return patchInst(p->task, tp->ftt_pc, tp->ftt_instr);	/* Patch the old instruction back in and flush it */
-}
-
-static void
-fasttrap_return_common(ppc_saved_state_t *sv, user_addr_t pc, pid_t pid, user_addr_t new_pc)
-{
-
-	fasttrap_tracepoint_t *tp;
-	fasttrap_bucket_t *bucket;
-	fasttrap_id_t *id;
-	lck_mtx_t *pid_mtx;
-
-	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
-	lck_mtx_lock(pid_mtx);
-	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
-
-	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
-		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
-		    tp->ftt_proc->ftpc_acount != 0)
-			break;
-	}
-
-	/*
-	 * Don't sweat it if we can't find the tracepoint again. Unlike
-	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
-	 * is not essential to the correct execution of the process.
-	 */
-	if (tp == NULL) {
-		lck_mtx_unlock(pid_mtx);
-		return;
-	}
-
-	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
-		/*
-		 * If there's a branch that could act as a return site, we
-		 * need to trace it, and check here if the program counter is
-		 * external to the function.
-		 */
-		if((new_pc - id->fti_probe->ftp_faddr) < id->fti_probe->ftp_fsize)	/* Is target within the function? */
-			continue;							/* Yeah, skip this one... */
-
-		DTRACE_CPUFLAG_SET(CPU_DTRACE_USTACK_FP);
-		if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
-			dtrace_probe(dtrace_probeid_error, 0 /* state */, 
-				     id->fti_probe->ftp_id, 1 /* ndx */, -1 /* offset */, 
-				     DTRACEFLT_UPRIV);
-		} else {
-			dtrace_probe(id->fti_probe->ftp_id,
-				pc - id->fti_probe->ftp_faddr,
-				sv->save_r3, sv->save_r4, 0, 0);
-		}
-		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_USTACK_FP);
-	}
-
-	lck_mtx_unlock(pid_mtx);
-}
-
-static void
-fasttrap_usdt_args(fasttrap_probe_t *probe, ppc_saved_state_t *sv, int argc,
-    uint64_t *argv)
-{
-	int i, x, cap = MIN(argc, probe->ftp_nargs);
-	uint32_t farg;
-
-	for (i = 0; i < cap; i++) {
-		x = probe->ftp_argmap[i];
-
-		if (x <= 8) {							/* Is this argument in a register? */
-			argv[i] = (&sv->save_r0)[x];
-		} else {
-			if(sv->save_srr1 & 0x8000000000000000ULL) {	/* Are we running in 64-bit? */
-				fasttrap_fuword64_noerr(sv->save_r1 + 48 + (x * sizeof(uint64_t)), &argv[i]);	/* Grab argument > 8 from stack */
-			}
-			else {
-				fasttrap_fuword32_noerr(sv->save_r1 + 24 + (x * sizeof(uint32_t)), &farg);	/* Grab argument > 8 from stack */
-				argv[i] = (uint64_t)farg;		/* Convert to 64-bit */
-			}
-		}
-	}
-
-	for (; i < argc; i++) {
-		argv[i] = 0;
-	}
-}
-
-int
-fasttrap_pid_probe(ppc_saved_state_t *sv)
-{
-	proc_t *p = current_proc();
-	fasttrap_bucket_t *bucket;
-	lck_mtx_t *pid_mtx;
-	fasttrap_tracepoint_t *tp, tp_local;
-	pid_t pid;
-	dtrace_icookie_t cookie;
-	uint_t is_enabled = 0;
-	user_addr_t new_pc = 0;
-	user_addr_t pc;
-	user_addr_t addrmask;
-
-	pc = sv->save_srr0;							/* Remember the PC for later */
-	if(sv->save_srr1 & 0x8000000000000000ULL) addrmask = 0xFFFFFFFFFFFFFFFFULL;	/* Set 64-bit addressing if enabled */
-	else addrmask = 0x00000000FFFFFFFFULL;		/* Otherwise set 32-bit */
-
-	uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
-
-	/*
-	 * Clear all user tracing flags.
-	 */
-	uthread->t_dtrace_ft = 0;
-
-	/*
-	 * Treat a child created by a call to vfork(2) as if it were its
-	 * parent. We know that there's only one thread of control in such a
-	 * process: this one.
-	 */
-	/*
-	 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
-	 * FIXME: How do we assert this?
-	 */
-	while (p->p_lflag & P_LINVFORK) p = p->p_pptr;	/* Search the end */
-
-	pid = p->p_pid;
-	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
-	lck_mtx_lock(pid_mtx);
-	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, sv->save_srr0)];	/* Get the bucket that corresponds to out PC */
-
-	/*
-	 * Lookup the tracepoint that the process just hit.
-	 */
-	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
-		if (pid == tp->ftt_pid && (sv->save_srr0 == tp->ftt_pc) &&
-		    tp->ftt_proc->ftpc_acount != 0)
-			break;
-	}
-
-	/*
-	 * If we couldn't find a matching tracepoint, either a tracepoint has
-	 * been inserted without using the pid<pid> ioctl interface (see
-	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
-	 */
-	if (tp == NULL) {
-		lck_mtx_unlock(pid_mtx);
-		return (-1);
-	}
-
-	if (tp->ftt_ids != NULL) {
-		fasttrap_id_t *id;
-		
-		for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
-			fasttrap_probe_t *probe = id->fti_probe;
-			
-			if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
-				dtrace_probe(dtrace_probeid_error, 0 /* state */, 
-				     id->fti_probe->ftp_id, 1 /* ndx */, -1 /* offset */, 
-				     DTRACEFLT_UPRIV);
-			} else if (id->fti_ptype == DTFTP_ENTRY) {
-				/*
-				 * We note that this was an entry
-				 * probe to help ustack() find the
-				 * first caller.
-				 */
-				cookie = dtrace_interrupt_disable();
-				DTRACE_CPUFLAG_SET(CPU_DTRACE_USTACK_FP | CPU_DTRACE_ENTRY);
-				dtrace_probe(probe->ftp_id, sv->save_r3, sv->save_r4,	/* Call the main probe routine with the first 5 args */
-					sv->save_r5, sv->save_r6, sv->save_r7);
-				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_USTACK_FP | CPU_DTRACE_ENTRY);
-				dtrace_interrupt_enable(cookie);
-				
-			} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
-				/*
-				 * Note that in this case, we don't
-				 * call dtrace_probe() since it's only
-				 * an artificial probe meant to change
-				 * the flow of control so that it
-				 * encounters the true probe.
-				 */
-				is_enabled = 1;
-				
-			} else if (probe->ftp_argmap == NULL) {
-				DTRACE_CPUFLAG_SET(CPU_DTRACE_USTACK_FP);
-				dtrace_probe(probe->ftp_id, sv->save_r3, sv->save_r4,	/* Call the main probe routine with the first 5 args */
-					     sv->save_r5, sv->save_r6, sv->save_r7);
-				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_USTACK_FP);
-					     
-			} else {
-				uint64_t t[5];
-				
-				fasttrap_usdt_args(probe, sv, 5, t);	/* Grab 5 arguments */
-				
-				DTRACE_CPUFLAG_SET(CPU_DTRACE_USTACK_FP);
-				dtrace_probe(probe->ftp_id, t[0], t[1],
-					     t[2], t[3], t[4]);
-				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_USTACK_FP);
-			}
-
-			/* APPLE NOTE: Oneshot probes get one and only one chance... */
-			if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
-				fasttrap_tracepoint_remove(p, tp);
-			}
-		}
-	}
-
-	/*
-	 * We're about to do a bunch of work so we cache a local copy of
-	 * the tracepoint to emulate the instruction, and then find the
-	 * tracepoint again later if we need to light up any return probes.
-	 */
-	tp_local = *tp;
-	lck_mtx_unlock(pid_mtx);
-	tp = &tp_local;
-
-	/*
-	 * If there's an is-enabled probe connected to this tracepoint it
-	 * means that there was a 'xor r3,r3,r3'
-	 * instruction that was placed there by DTrace when the binary was
-	 * linked. As this probe is, in fact, enabled, we need to stuff 1
-	 * into R3. Accordingly, we can bypass all the instruction
-	 * emulation logic since we know the inevitable result. It's possible
-	 * that a user could construct a scenario where the 'is-enabled'
-	 * probe was on some other instruction, but that would be a rather
-	 * exotic way to shoot oneself in the foot.
-	 */
-	if (is_enabled) {
-		sv->save_r3 = 1;				/* Set condition to true */
-		new_pc = (sv->save_srr0 + 4) & addrmask;		/* Just fall through to the next instruction */
-		goto done;
-	}
-
-	/*
-	 * We emulate certain types of instructions to ensure correctness
-	 * (in the case of position dependent instructions) or optimize
-	 * common cases. The rest we execute in the kernel, but with
-	 * most of the user's context active.
-	 */
-	switch (tp->ftt_type) {
-	
-		case ftmtNOP:					/* NOP  */
-			new_pc = (sv->save_srr0 + 4) & addrmask;	/* Just fall through to the next instruction */
-			break;
-
-		case ftmtB:						/* Plain unconditional branch */
-			new_pc = (user_addr_t)((int64_t)tp->ftt_trgt);	/* Assume target is absolute address for the moment */
-			if(!(tp->ftt_flgs & ftmtAbs)) new_pc = (new_pc + sv->save_srr0) & addrmask;	/* We don't have absolute address, use as offset from instruction address */
-
-			if(tp->ftt_flgs & ftmtLink) sv->save_lr = (sv->save_srr0 + 4) & addrmask;	/* Set the LR to the next instruction if needed */
-			break;
-		
-		case ftmtBC:					/* Conditional PC relative or absolute branch */
-			new_pc = (user_addr_t)((int64_t)tp->ftt_trgt);	/* Assume target is absolute address for the moment */
-			if(!(tp->ftt_flgs & ftmtAbs)) new_pc = new_pc + sv->save_srr0;	/* We don't have absolute address, use as offset from instruction address */
-
-			if(tp->ftt_flgs & ftmtLink) sv->save_lr = (sv->save_srr0 + 4) & addrmask;	/* Set the LR to the next instruction if needed */
-			if(!branchtaken(tp->ftt_bo, tp->ftt_bi, sv)) new_pc = (sv->save_srr0 + 4) & addrmask;	/* If branch was not taken, set PC to next address */
-			break;
-		
-		case ftmtBLR:					/* Conditional branch to LR */
-			new_pc = sv->save_lr;		/* Branch target comes from the LR */
-
-			if(tp->ftt_flgs & ftmtLink) sv->save_lr = (sv->save_srr0 + 4) & addrmask;	/* Set the LR to the next instruction if needed */			
-			if(!branchtaken(tp->ftt_bo, tp->ftt_bi, sv)) new_pc = (sv->save_srr0 + 4) & addrmask;	/* If branch was not taken, set PC to next address */
-			break;
-		
-		case ftmtBCTR:					/* Conditional branch to CTR */
-			new_pc = sv->save_ctr;		/* Branch target comes from the CTR */
-
-			if(tp->ftt_flgs & ftmtLink) sv->save_lr = (sv->save_srr0 + 4) & addrmask;	/* Set the LR to the next instruction if needed */			
-			if(!branchtaken(tp->ftt_bo, tp->ftt_bi, sv)) new_pc = (sv->save_srr0 + 4) & addrmask;	/* If branch was not taken, set PC to next address */
-			break;
-		
-		case ftmtCommon:				/* Common, non-in-kernel emulated instruction */
-			sv->save_instr[0] = 1;		/* We only have one instruction to inject */
-			sv->save_instr[1] = tp->ftt_instr;	/* Set the instruction */
-			sv->save_hdr.save_flags = sv->save_hdr.save_flags | SAVinject;	/* Tell low-level exception return to inject the instruction */
-			uthread->t_dtrace_step = 1;	/* Let it be known that a trace return is imminent */
-			return 0;					/* Go and don't dome back until you are done... */
-			
-		default:
-			panic("fasttrap_pid_probe: invalid ftt_type = %08X\n", tp->ftt_type);	/* Huh, wha happened? */
-			break;
-	}
-		
-
-done:
-	
-	/*
-	 * If there were no return probes when we first found the tracepoint,
-	 * we should feel no obligation to honor any return probes that were
-	 * subsequently enabled -- they'll just have to wait until the next
-	 * time around.
-	 */
-	sv->save_srr0 = new_pc;				/* Set the new PC */
-	if (tp->ftt_retids != NULL) fasttrap_return_common(sv, pc, pid, new_pc);
-
-	return (0);
-}
-
-
-int
-fasttrap_return_probe(ppc_saved_state_t *sv)
-{
-
-	user_addr_t pc, npc;
-	
-	proc_t *p = current_proc();
-
-
-	/*
-	 * Treat a child created by a call to vfork(2) as if it were its
-	 * parent. We know that there's only one thread of control in such a
-	 * process: this one.
-	 */
-	/*
-	 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
-	 * How do we assert this?
-	 */
-	while (p->p_lflag & P_LINVFORK) {
-		p = p->p_pptr;
-	}
-
-	pc = sv->save_srr0;		/* Get the PC of the probed instruction */
-	npc = pc + 4;			/* Get next PC */	
-	if(!(sv->save_srr1 & 0x8000000000000000ULL)) npc &= 0x00000000FFFFFFFF;	/* Wrap new PC if running 32-bit */
-	fasttrap_return_common(sv, pc, p->p_pid, npc);
-
-	return (0);
-}
-
-uint64_t
-fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
-    int aframes)
-{
-#pragma unused(arg, id, parg, aframes)
-	return (fasttrap_anarg((ppc_saved_state_t *)find_user_regs(current_thread()), 1, argno));
-}
-
-uint64_t
-fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
-    int aframes)
-{
-#pragma unused(arg, id, parg, aframes)
-	return (fasttrap_anarg((ppc_saved_state_t *)find_user_regs(current_thread()), 0, argno));
-}
-
-
-static int32_t branchtaken(int32_t bo, int32_t bi, ppc_saved_state_t *sv) {
-	int32_t bcond, czero, crmatch;
-	uint64_t ctr;
-	
-	if((bo & 0x14) == 0x14) return 1;	/* If this is a branch always, exit with true... */
-	
-	czero = 0;							/* Assume that we have not just decremented the CTR to 0 */
-	
-	if(!(bo & 4)) {						/* Skip the next bit if we do NOT muck with the CTR */
-		ctr = sv->save_ctr = sv->save_ctr - 1;	/* Decrement the CTR */
-		if(!(sv->save_srr1 & 0x8000000000000000ULL)) ctr &= 0x00000000FFFFFFFF;	/* Only look at the bottom 32 bits if 32-bit mode */
-		czero = (ctr == 0);				/* Remember if we just hit zero */
-	}
-	
-	bcond = (bo >> 3);					/* If 1, branch if CR flag is 1.  If 0, branch if 0 */
-	crmatch = bo >> 4;					/* If bo[0] is set, do not check CR flag */
-	crmatch = crmatch | (((sv->save_cr >> (31 - bi)) ^ bcond) ^ 1);	/* Low bit is now set if CR flag matches or CR is not checked. Other bits are trash. */
-
-//	dbgTrace(0x77777777, bo, bi, sv->save_cr, ((czero | crmatch) & 1));	/* (TRACE/DEBUG) */
-
-	return ((czero | crmatch) & 1);		/* Return 1 if branch taken, 0 if not... */	
-}
-
-static int32_t dtrace_decode_ppc(uint32_t inst) {
-
-	int32_t curdcd, lastmask, newmask, spr, bit, bito, word;
-	uint16_t xop = 0;
-	dcdtab *dcd;
-	
-	curdcd = inst >> 26;				/* Isolate major op code to start decode */
-	lastmask = 99;						/* Always force a new xop at the start */
-	
-	while(1) {							/* Loop until we find instruction or fail */
-		dcd = &insts[curdcd];			/* Point to the current decode table entry */
-		if(dcd->dcdFlgs & dcdJump) {	/* Should we jump to a new spot in the decode table? */
-			curdcd = dcd->dcdMatch;		/* Jump */
-			continue;
-		}
-		
-		newmask = dcd->dcdFlgs & dcdMask;	/* Isolate the mask index */
-		if(lastmask != newmask) {		/* Are we changing masks? */
-			if(!newmask) break;			/* If the mask is 0, we match everything and succeed... (note: lastmask can never be 0) */
-			xop = inst & masktab[newmask];	/* Clear all extra bits to make match */
-			lastmask = newmask;			/* Remember */
-		}
-		
-		if(xop == dcd->dcdMatch) break;	/* We found our guy! */
-		
-		if(!(dcd->dcdFlgs & dcdStep)) {	/* No stepping, we failed */
-			dcd = &dcdfail;				/* Point to a failure entry */
-			break;						/* Leave... */
-		}
-		
-		curdcd = curdcd + 1;			/* Step to the next decode entry */
-	}
-
-	if(dcd->dcdType != diSPR) return (int32_t)(dcd->dcdType);	/* Return what we found */
-	
-	spr = (inst >> (31 - 20)) & 0x3FF;	/* Get the source */
-	spr = ((spr << 5) & 0x3E0) | ((spr >> 5) & 0x1F);	/* Flip to right order */
-	
-	word = spr >> 5;					/* Get word index into table */
-	bito = spr & 0x1F;					/* Get bit offset into entry */
-	bit = 0x80000000 >> bito;			/* Position bit for a test */
-	
-	if(!(sprtbl[word] & bit)) return (diINV);	/* Bogus SPR so whole instruction is invalid... */
-	
-	if(spr & 0x10) return (diPRV);		/* This is a priviliged SPR so instruction is priviliged... */
-	return (diCMN);						/* Just a common SPR so instruction is the same... */
-}
diff --git a/bsd/dev/ppc/fbt_ppc.c b/bsd/dev/ppc/fbt_ppc.c
deleted file mode 100644
index 0a505d23e..000000000
--- a/bsd/dev/ppc/fbt_ppc.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/* #pragma ident	"@(#)fbt.c	1.15	05/09/19 SMI" */
-
-#ifdef KERNEL
-#ifndef _KERNEL
-#define _KERNEL /* Solaris vs. Darwin */
-#endif
-#endif
-
-#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
-#include <kern/cpu_data.h>
-#include <kern/thread.h>
-#include <mach/thread_status.h>
-
-#include <mach-o/loader.h> 
-#include <mach-o/nlist.h>
-
-extern struct mach_header _mh_execute_header; /* the kernel's mach header */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/errno.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/conf.h>
-#include <sys/fcntl.h>
-#include <miscfs/devfs/devfs.h>
-
-#include <sys/dtrace.h>
-#include <sys/dtrace_impl.h>
-#include <sys/fbt.h>
-
-#include <sys/dtrace_glue.h>
-#include <machine/cpu_capabilities.h>
-
-#define DTRACE_INVOP_NOP_SKIP 4
-
-#define DTRACE_INVOP_MFLR_R0 11
-#define DTRACE_INVOP_MFLR_R0_SKIP 4
-
-#define FBT_MFLR_R0		0x7c0802a6
-
-#define FBT_MTLR_R0		0x7c0803a6
-#define FBT_BLR			0x4e800020
-#define FBT_BCTR		0x4e800420
-
-#define FBT_LI_MASK 0x03fffffc
-#define FBT_JUMP	0x48000000
-#define IS_JUMP(instr) (((instr) & ~FBT_LI_MASK) == FBT_JUMP) /* Relative, No LR update -- AA == 0b, LK == 0b */
-#define FBT_LI_EXTD64(instr) \
-	(((instr) & 0x02000000) ? \
-	 	(((uint64_t)((instr) & FBT_LI_MASK)) | 0xfffffffffc000000ULL) : \
-	 	 ((uint64_t)((instr) & FBT_LI_MASK)))
-
-#define FBT_PATCHVAL	0x7c810808
-#define FBT_AFRAMES_ENTRY		6
-#define FBT_AFRAMES_RETURN		6
-
-#define	FBT_ENTRY	"entry"
-#define	FBT_RETURN	"return"
-#define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
-
-extern dtrace_provider_id_t	fbt_id;
-extern fbt_probe_t		**fbt_probetab;
-extern int			fbt_probetab_mask;
-
-kern_return_t fbt_perfCallback(int, ppc_saved_state_t *, int, int);
-kern_return_t fbt_perfIntCallback(int, ppc_saved_state_t *, int, int);
-
-/*
- * Critical routines that must not be probed. PR_5221096, PR_5379018.
- */
-
-static const char * critical_blacklist[] =
-{
-	"bcopy_phys",
-	"bcopy_physvir_32",
-	"cpu_control",
-	"cpu_exit_wait",
-	"cpu_info",
-	"cpu_info_count",
-	"cpu_init",
-	"cpu_machine_init",
-	"cpu_per_proc_alloc",
-	"cpu_per_proc_free",
-	"cpu_signal_handler",
-	"cpu_sleep",
-	"cpu_start",
-	"cpu_subtype",
-	"cpu_threadtype",
-	"cpu_to_processor",
-	"cpu_type",
-	"mapSkipListVerifyC",
-	"ml_nofault_copy",
-	"register_cpu_setup_func",
-	"unregister_cpu_setup_func"
-};
-#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
-
-/*
- * The transitive closure of entry points that can be reached from probe context.
- * (Apart from routines whose names begin with dtrace_).
- */
-static const char * probe_ctx_closure[] =
-{
-	"Debugger",
-	"MapUserMemoryWindow",
-	"OSCompareAndSwap",
-	"absolutetime_to_microtime",
-	"bcopy",
-	"clock_get_calendar_nanotime_nowait",
-	"copyin",
-	"copyinstr",
-	"copyout",
-	"copyoutstr",
-	"cpu_number",
-	"current_proc",
-	"current_processor",
-	"current_task",
-	"current_thread",
-	"debug_enter",
-	"find_user_regs",
-	"getPerProc",
-	"get_bsdtask_info",
-	"get_bsdthread_info",
-	"get_threadtask",
-	"hw_atomic_and",
-	"hw_compare_and_store",
-	"hw_find_map",
-	"kauth_cred_get",
-	"kauth_getgid",
-	"kauth_getuid",
-	"mach_absolute_time",
-	"mapping_drop_busy",
-	"mapping_find",
-	"mapping_phys_lookup",
-	"max_valid_stack_address",
-	"ml_at_interrupt_context",
-	"ml_phys_write_byte_64",
-	"ml_phys_write_half_64",
-	"ml_phys_write_word_64",
-	"ml_set_interrupts_enabled",
-	"panic",
-	"pmap_find_phys",
-	"prf",
-	"proc_is64bit",
-	"proc_selfname",
-	"proc_selfpid",
-	"proc_selfppid",
-	"psignal_lock",
-	"sdt_getargdesc",
-	"splhigh",
-	"splx",
-	"strlcpy",
-	"systrace_stub",
-	"timer_grab"
-};
-#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
-
-static int _cmp(const void *a, const void *b)
-{
-	return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
-}
-
-static const void * bsearch(
-	register const void *key,
-	const void *base0,
-	size_t nmemb,
-	register size_t size,
-	register int (*compar)(const void *, const void *)) {
-
-	register const char *base = base0;
-	register size_t lim;
-	register int cmp;
-	register const void *p;
-
-	for (lim = nmemb; lim != 0; lim >>= 1) {
-		p = base + (lim >> 1) * size;
-		cmp = (*compar)(key, p);
-		if (cmp == 0)
-			return p;
-		if (cmp > 0) {	/* key > p: move right */
-			base = (const char *)p + size;
-			lim--;
-		}		/* else move left */
-	}
-	return (NULL);
-}
-
-int
-fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
-{
-	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
-	uint64_t mask = (_cpu_capabilities & k64Bit) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
-
-	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
-		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
-			
-			if (fbt->fbtp_roffset == 0) {
-				ppc_saved_state_t *regs = (ppc_saved_state_t *)stack;
-
-				CPU->cpu_dtrace_caller = regs->save_lr;
-				
-				dtrace_probe(fbt->fbtp_id, regs->save_r3 & mask, regs->save_r4 & mask,
-					regs->save_r5 & mask, regs->save_r6 & mask, regs->save_r7 & mask);
-					
-				CPU->cpu_dtrace_caller = (uintptr_t)NULL;
-			} else {
-			
-				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
-
-				if (fbt->fbtp_rval == DTRACE_INVOP_TAILJUMP) {
-					ppc_saved_state_t *regs = (ppc_saved_state_t *)stack;
-
-					regs->save_srr0 = (uint64_t)fbt->fbtp_patchpoint + FBT_LI_EXTD64(fbt->fbtp_savedval);
-					regs->save_srr0 &= mask;
-				}
-				
-				CPU->cpu_dtrace_caller = (uintptr_t)NULL;
-			}
-
-			return (fbt->fbtp_rval);
-		}
-	}
-
-	return (0);
-}
-
-#include <ppc/proc_reg.h> /* For USER_MODE */
-#define IS_USER_TRAP(regs) USER_MODE((regs)->save_srr1)
-#define T_VECTOR_SIZE   4               /* function pointer size */
-#define T_PROGRAM       (0x07 * T_VECTOR_SIZE)
-#define FBT_EXCEPTION_CODE T_PROGRAM
-
-kern_return_t
-fbt_perfCallback(
-                int         trapno,
-                ppc_saved_state_t *regs,
-                int         unused1,
-                int         unused2)
-{
-#pragma unused (unused1)
-#pragma unused (unused2)
-	kern_return_t retval = KERN_FAILURE;
-	
-	if (!IS_USER_TRAP(regs) && FBT_EXCEPTION_CODE == trapno) {
-		boolean_t oldlevel;
-		
-		oldlevel = ml_set_interrupts_enabled(FALSE);
-						
-		switch (dtrace_invop( regs->save_srr0, (uintptr_t *)regs, regs->save_r3 )) {
-		case DTRACE_INVOP_NOP:
-			regs->save_srr0 += DTRACE_INVOP_NOP_SKIP;	/* Skip over the bytes of the patched NOP */
-			retval = KERN_SUCCESS;
-			break;
-
-		case DTRACE_INVOP_MFLR_R0:
-			regs->save_r0 = regs->save_lr;					/* Emulate patched mflr r0 */
-			regs->save_srr0 += DTRACE_INVOP_MFLR_R0_SKIP;	/* Skip over the bytes of the patched mflr r0 */
-			retval = KERN_SUCCESS;
-			break;
-
-		case DTRACE_INVOP_RET:
-			regs->save_srr0 = regs->save_lr;				/* Emulate patched blr by resuming execution at the LR */
-			retval = KERN_SUCCESS;
-			break;
-			
-		case DTRACE_INVOP_BCTR:
-			regs->save_srr0 = regs->save_ctr;				/* Emulate patched bctr by resuming execution at the CTR */
-			retval = KERN_SUCCESS;
-			break;
-			
-		case DTRACE_INVOP_TAILJUMP:
-			retval = KERN_SUCCESS;
-			break;
-			
-		default:
-			retval = KERN_FAILURE;
-			break;
-		}
-		ml_set_interrupts_enabled(oldlevel);
-	}
-	
-	return retval;
-}
-
-kern_return_t
-fbt_perfIntCallback(
-                int         trapno,
-                ppc_saved_state_t *regs,
-                int         unused1,
-                int         unused2)
-{
-	kern_return_t retval = KERN_FAILURE;
-	
-	if (KERN_SUCCESS == (retval = fbt_perfCallback(trapno, regs, unused1, unused2)))
-		enable_preemption();
-	
-	return retval;
-}
-
-/*ARGSUSED*/
-static void
-__fbt_provide_module(void *arg, struct modctl *ctl)
-{
-#pragma unused(arg)
-	struct mach_header			*mh;
-	struct load_command         *cmd;
-    struct segment_command      *orig_ts = NULL, *orig_le = NULL;
-    struct symtab_command       *orig_st = NULL;
-	struct nlist                *sym = NULL;
-	char						*strings;
-	uintptr_t					instrLow, instrHigh;
-	char						*modname;
-	unsigned int i;
-
-	int gIgnoreFBTBlacklist = 0;
-	PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
-
-	mh = (struct mach_header *)(ctl->address);
-	modname = ctl->mod_modname;
-	
-	if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
-		return;
-		
-	/*
-	 * Employees of dtrace and their families are ineligible.  Void
-	 * where prohibited.
-	 */
-
-	if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
-		return;
-
-	if (strstr(modname, "CHUD") != NULL)
-		return;
-		
-	if (mh->magic != MH_MAGIC)
-		return;
-		
-	cmd = (struct load_command *) &mh[1];
-	for (i = 0; i < mh->ncmds; i++) {
-        if (cmd->cmd == LC_SEGMENT) {
-            struct segment_command *orig_sg = (struct segment_command *) cmd;
- 
-            if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
-                orig_ts = orig_sg;
-            else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
-                orig_le = orig_sg;
-            else if (LIT_STRNEQL(orig_sg->segname, ""))
-                orig_ts = orig_sg; /* kexts have a single unnamed segment */
-        }
-        else if (cmd->cmd == LC_SYMTAB)
-            orig_st = (struct symtab_command *) cmd;
-
-        cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
-    }
-
-	if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
-		return;
-
-	sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
-	strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-	
-	/* Find extent of the TEXT section */
-	instrLow = (uintptr_t)orig_ts->vmaddr;
-	instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
-
-	for (i = 0; i < orig_st->nsyms; i++) {
-		fbt_probe_t *fbt, *retfbt;
-		machine_inst_t *instr, *limit, theInstr;
-        uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
-		char *name = strings + sym[i].n_un.n_strx;
-		int j;
-
-		/* Check that the symbol is a global and that it has a name. */
-        if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
-            continue;
-			
-		if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
-			continue;
- 
-		/* Lop off omnipresent leading underscore. */			
-		if (*name == '_')
-			name += 1;
-		
-		if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
-			/*
-			 * Anything beginning with "dtrace_" may be called
-			 * from probe context unless it explitly indicates
-			 * that it won't be called from probe context by
-			 * using the prefix "dtrace_safe_".
-			 */
-			continue;
-		}
-		
-		if (LIT_STRNSTART(name, "fasttrap_") ||
-		    LIT_STRNSTART(name, "fuword") ||
-		    LIT_STRNSTART(name, "suword") ||
-			LIT_STRNEQL(name, "sprlock") ||
-			LIT_STRNEQL(name, "sprunlock") ||
-			LIT_STRNEQL(name, "uread") ||
-			LIT_STRNEQL(name, "uwrite"))
-			continue; /* Fasttrap inner-workings. */
-
-        if (LIT_STRNSTART(name, "dsmos_")) 
-            continue; /* Don't Steal Mac OS X! */
-
-        if (LIT_STRNSTART(name, "_dtrace")) 
-			continue; /* Shims in dtrace.c */
-		
-		if (LIT_STRNSTART(name, "chud"))
-			continue; /* Professional courtesy. */
-
-        if (LIT_STRNSTART(name, "hibernate_"))
-            continue; /* Let sleeping dogs lie. */
-        
-        if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
-            LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
-            continue; /* Per the fire code */
-
-		/*
-		 * Place no probes (illegal instructions) in the exception handling path!
-		 */
-		if (LIT_STRNEQL(name, "L_handler700") ||
-			LIT_STRNEQL(name, "save_get_phys_64") ||
-			LIT_STRNEQL(name, "save_get_phys_32") ||
-			LIT_STRNEQL(name, "EmulExit") ||
-			LIT_STRNEQL(name, "Emulate") ||
-			LIT_STRNEQL(name, "Emulate64") ||
-			LIT_STRNEQL(name, "switchSegs") ||
-			LIT_STRNEQL(name, "save_ret_phys"))
-			continue;
-
-		if (LIT_STRNEQL(name, "thandler") ||
-			LIT_STRNEQL(name, "versave") ||
-			LIT_STRNEQL(name, "timer_event") ||
-			LIT_STRNEQL(name, "hw_atomic_or") ||
-			LIT_STRNEQL(name, "trap"))
-			continue;
-
-		if (LIT_STRNEQL(name, "fbt_perfCallback") ||
-			LIT_STRNEQL(name, "fbt_perfIntCallback") ||
-			LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
-			LIT_STRNEQL(name, "dtrace_invop") ||
-			LIT_STRNEQL(name, "fbt_invop") ||
-			LIT_STRNEQL(name, "sdt_invop") ||
-			LIT_STRNEQL(name, "max_valid_stack_address"))
-			continue;
-
-		/*
-		 * Probes encountered while we're on the interrupt stack are routed along
-		 * the interrupt handling path. No probes allowed there either!
-		 */
-		if (LIT_STRNEQL(name, "ihandler") ||
-			LIT_STRNEQL(name, "interrupt") ||
-			LIT_STRNEQL(name, "disable_preemption"))
-			continue;
-
-		/*
-		 * Avoid weird stack voodoo in and under machine_stack_handoff et al
-		 */
-        if (LIT_STRNSTART(name, "machine_stack") ||
-            LIT_STRNEQL(name, "getPerProc") ||     /* Called in machine_stack_handoff with weird stack state */
-            LIT_STRNEQL(name, "fpu_save") ||     /* Called in machine_stack_handoff with weird stack state */
-            LIT_STRNEQL(name, "vec_save") ||     /* Called in machine_stack_handoff with weird stack state */
-            LIT_STRNEQL(name, "pmap_switch"))     /* Called in machine_stack_handoff with weird stack state */
-				continue;
-
-		/*
-		 * Avoid machine_ routines. PR_5346750.
-		 */
-		if (LIT_STRNSTART(name, "machine_"))
-			continue;
-
-		/*
-		 * Avoid low level pmap and virtual machine monitor PowerPC routines. See PR_5379018.
-		 */
-
-		if (LIT_STRNSTART(name, "hw_") ||
-			LIT_STRNSTART(name, "mapping_") ||
-			LIT_STRNSTART(name, "commpage_") ||
-			LIT_STRNSTART(name, "pmap_") ||
-			LIT_STRNSTART(name, "vmm_"))
-				continue;
-		/*
-		 * Place no probes on critical routines. PR_5221096
-		 */
-		if (!gIgnoreFBTBlacklist && 
-			bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
-				continue;
-
-		/*
-		 * Place no probes that could be hit in probe context.
-		 */
-		if (!gIgnoreFBTBlacklist && 
-			bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
-				continue;
-
-		/*
-		 * Place no probes that could be hit on the way to the debugger.
-		 */
-		if (LIT_STRNSTART(name, "kdp_") ||
-			LIT_STRNSTART(name, "kdb_") ||
-			LIT_STRNSTART(name, "kdbg_") ||
-			LIT_STRNSTART(name, "kdebug_") ||
-			LIT_STRNEQL(name, "kernel_debug") ||
-			LIT_STRNEQL(name, "Debugger") ||
-			LIT_STRNEQL(name, "Call_DebuggerC") ||
-			LIT_STRNEQL(name, "lock_debugger") ||
-			LIT_STRNEQL(name, "unlock_debugger") ||
-			LIT_STRNEQL(name, "SysChoked")) 
-			continue;
-		
-		/*
-		 * Place no probes that could be hit on the way to a panic.
-		 */
-		if (NULL != strstr(name, "panic_") ||
-			LIT_STRNEQL(name, "panic") ||
-			LIT_STRNEQL(name, "handleMck") ||
-			LIT_STRNEQL(name, "unresolved_kernel_trap"))
-			continue;
-		
-		if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
-			continue;
-			
-		/*
-		 * Scan forward for mflr r0.
-		 */
-		for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
-			 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)instr);
-			 j++, instr++) 
-		{
-			theInstr = *instr;
-			if (theInstr == FBT_MFLR_R0) /* Place the entry probe here. */
-				break;
-			if (theInstr == FBT_MTLR_R0) /* We've gone too far, bail. */
-				break;
-			if (theInstr == FBT_BLR) /* We've gone too far, bail. */
-				break;
-		}
-			
-		if (theInstr != FBT_MFLR_R0)
-			continue;
-			
-		limit = (machine_inst_t *)instrHigh;
-
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-		fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
-		fbt->fbtp_patchpoint = instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-		fbt->fbtp_rval = DTRACE_INVOP_MFLR_R0;
-		fbt->fbtp_savedval = theInstr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-
-		instr++; /* Move on down the line */
-		retfbt = NULL;
-again:
-		if (instr >= limit)
-			continue;
-
-		/*
-		 * We (desperately) want to avoid erroneously instrumenting a
-		 * jump table. To determine if we're looking at a true instruction
-		 * or an inline jump table that happens to contain the same
-		 * byte sequences, we resort to some heuristic sleeze:  we
-		 * treat this instruction as being contained within a pointer,
-		 * and see if that pointer points to within the body of the
-		 * function.  If it does, we refuse to instrument it.
-		 */
-		{
-			machine_inst_t *ptr = *(machine_inst_t **)instr;
-
-			if (ptr >= (machine_inst_t *)sym[i].n_value && ptr < limit) {
-				instr++;
-				goto again;
-			}
-		}
-
-		/*
-		 * OK, it's an instruction.
-		 */
-		theInstr = *instr;
-
-		/* Walked onto the start of the next routine? If so, bail out from this function. */
-		if (theInstr == FBT_MFLR_R0)
-			continue;
-
-		if (theInstr != FBT_MTLR_R0) {
-			instr++;
-			goto again;
-		}
-
-		/*
-		 * Found mtlr r0;
-		 * Scan forward for a blr, bctr, or a jump (relative, no LR change).
-		 */
-		instr++;
-		for (j = 0; (j < 12) && (instr < limit); j++, instr++) {
-			theInstr = *instr;
-			if (theInstr == FBT_BLR || theInstr == FBT_BCTR || IS_JUMP(theInstr) || 
-				theInstr == FBT_MFLR_R0 || theInstr == FBT_MTLR_R0)
-				break;
-		}
-
-		if (!(theInstr == FBT_BLR || theInstr == FBT_BCTR || IS_JUMP(theInstr)))
-			goto again;
-
-		/*
-		 * We have a winner: "mtlr r0; ... ; {blr, bctr, j}" !
-		 */
-		fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
-		strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-
-		if (retfbt == NULL) {
-			fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
-			    name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
-		} else {
-			retfbt->fbtp_next = fbt;
-			fbt->fbtp_id = retfbt->fbtp_id;
-		}
-
-		retfbt = fbt;
-		fbt->fbtp_patchpoint = instr;
-		fbt->fbtp_ctl = ctl;
-		fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-
-		if (theInstr == FBT_BLR)
-			fbt->fbtp_rval = DTRACE_INVOP_RET;
-		else if (theInstr == FBT_BCTR)
-			fbt->fbtp_rval = DTRACE_INVOP_BCTR;
-		else
-			fbt->fbtp_rval = DTRACE_INVOP_TAILJUMP;
-
-		fbt->fbtp_roffset =
-		    (uintptr_t)((uint8_t *)instr - (uint8_t *)sym[i].n_value);
-
-		fbt->fbtp_savedval = *instr;
-		fbt->fbtp_patchval = FBT_PATCHVAL;
-		fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
-		fbt->fbtp_symndx = i;
-		fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-		instr++;
-		goto again;
-	}
-}
-
-extern struct modctl g_fbt_kernctl;
-#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
-#undef kmem_free /* from its binding to dt_kmem_free glue */
-#include <vm/vm_kern.h>
-
-/*ARGSUSED*/
-void
-fbt_provide_module(void *arg, struct modctl *ctl)
-{
-#pragma unused(ctl)
-	__fbt_provide_module(arg, &g_fbt_kernctl);
-
-	if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL )
-	    kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size));
-	g_fbt_kernctl.address = 0;
-	g_fbt_kernctl.size = 0;
-}
diff --git a/bsd/dev/ppc/ffs.c b/bsd/dev/ppc/ffs.c
deleted file mode 100644
index c3f06a74f..000000000
--- a/bsd/dev/ppc/ffs.c
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1991 NeXT Computer, Inc.  All rights reserved.
- *
- *      File:   machdep/i386/libc/ffs.c
- *      Author: Bruce Martin, NeXT Computer, Inc.
- *
- *      This file contains machine dependent code for the ffs function
- *      on NeXT i386-based products.  Currently tuned for the i486.
- *
- * HISTORY
- * 27-Sep-92  Bruce Martin (Bruce_Martin@NeXT.COM)
- *	Created: stolen from Mike's code.
- */
-
-unsigned
-ffs(unsigned mask)
-{
-	unsigned bitpos;
-
-	if (mask == 0)
-		return 0;
-
-	bitpos = 1;
-	while ((mask & 0xff) == 0) {
-		bitpos += 8;
-		mask >>= 8;
-	}
-	while ((mask & 1) == 0) {
-		bitpos += 1;
-		mask >>= 1;
-	}
-	return bitpos;
-}
diff --git a/bsd/dev/ppc/ffs.s b/bsd/dev/ppc/ffs.s
deleted file mode 100644
index 290053a82..000000000
--- a/bsd/dev/ppc/ffs.s
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1992, 1997-1998 Apple Computer, Inc.  All rights reserved.
- *
- *      File:  machdep/ppc/libc/ffs.s
- *
- *	int ffs(int value)
- *
- *	DESCRIPTION
- *		The ffs() function finds the first bit set in value and returns the
- *		index of that bit.  Bits are numbered starting from 1, starting at
- *		the right-most bit. A return value of 0 means that the argument was
- *
- * HISTORY
- *	14-Aug-1998	Umesh Vaishampayan	(umeshv@apple.com)
- *		Optimized!
- *
- *  10-Mar-1998 Matt Watson (mwatson@apple.com)
- *      Correctified
- *
- *  19-Jan-1998 Matt Watson (mwatson@apple.com)
- *      Simplified
- *
- *  24-Jan-1997 Umesh Vaishampayan (umeshv@NeXT.com)
- *      Ported to PPC. 
- */
-
-.text
-.align 4
-.globl _ffs
-_ffs:					/* Cycles */
-	neg		r0,r3		/* 0 */
-	and		r3,r0,r3	/* 1 */
-	li		r4, 32		/* 1 */
-	cntlzw	r3,r3		/* 2 */
-	subf	r3,r3,r4	/* 3 */
-	blr
-
-	.globl	_abs
-_abs:
-	srawi r0,r3,31
-	xor r3,r0,r3
-	subf r3,r0,r3
-	blr
-
diff --git a/bsd/dev/ppc/kern_machdep.c b/bsd/dev/ppc/kern_machdep.c
deleted file mode 100644
index 1f45bd131..000000000
--- a/bsd/dev/ppc/kern_machdep.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *	Copyright (C) 1990, 1993  NeXT, Inc.
- *	Copyright (C) 1997  Apple Computer, Inc.
- *
- *	File:	next/kern_machdep.c
- *	Author:	John Seamons
- *
- *	Machine-specific kernel routines.
- */
-
-#include	<sys/types.h>
-#include	<sys/param.h>
-#include	<mach/machine.h>
-#include	<mach/boolean.h>
-#include	<mach/vm_param.h>
-#include	<kern/cpu_number.h>
-#include	<machine/exec.h>
-
-boolean_t kernacc(off_t, size_t );
-
-
-/*
- * Routine: grade_binary()
- *
- * Function:
- *	Return a relative preference for exectypes and execsubtypes in fat
- *	executable files.  The higher the grade, the higher the preference.
- *	A grade of 0 means not acceptable.
- *
- * Note:	We really don't care about the real cpu_type() here,
- *		because machines can only have one type.
- */
-int
-grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype)
-{
-	int		cpusubtype = cpu_subtype();
-
-	/*
-	 * This code should match cpusubtype_findbestarch() in best_arch.c
-	 * in the cctools project.  As of 2/16/98 this is what has been
-	 * agreed upon for the PowerPC subtypes.  If an exact match is not
-	 * found the subtype will be picked from the following order:
-	 *		970(but only on 970), 7450, 7400, 750, ALL
-	 * Note the 601 is NOT in the list above.  It is only picked via
-	 * an exact match. For details see Radar 2213821.
-	 */
-
-	switch (cpusubtype) {
-	case CPU_SUBTYPE_POWERPC_970:
-		switch(exectype) {
-		case CPU_TYPE_POWERPC64:	/* CPU_IS64BIT | CPU_POWERPC */
-			switch(execsubtype) {
-			/*
-			 * Prefer 64 bit architecture specific binaries; note
-			 * that this value does not mean the same thing here
-			 * as it does below.
-			 */
-			case CPU_SUBTYPE_POWERPC_970:
-				return 8;
-			/* Prefer generic binaries */
-			case CPU_SUBTYPE_POWERPC_ALL:
-				return 7;
-			default:
-				return 0;
-			}
-			/* NOTREACHED */
-
-		case CPU_TYPE_POWERPC:
-			switch(execsubtype) {
-			/*
-			 * Prefer 32 bit binaries with 64 bit leaf functions;
-			 * this is actually bogus use of the subtype to encode
-			 * CPU feature bits.
-			 */
-			case CPU_SUBTYPE_POWERPC_970:
-				return 6;
-			case CPU_SUBTYPE_POWERPC_7450:
-				return 4;
-			case CPU_SUBTYPE_POWERPC_7400:
-				return 3;
-			case CPU_SUBTYPE_POWERPC_750:
-				return 2;
-			case CPU_SUBTYPE_POWERPC_ALL:
-				return 1;
-			default:
-				return 0;
-			}
-			/* NOTREACHED */
-
-		default:
-			return 0;
-		}
-		/* NOTREACHED */
-
-	case CPU_SUBTYPE_POWERPC_7450:
-		switch(exectype) {
-		case CPU_TYPE_POWERPC64:	/* CPU_IS64BIT | CPU_POWERPC */
-			return 0;
-
-		case CPU_TYPE_POWERPC:
-			switch(execsubtype) {
-			case CPU_SUBTYPE_POWERPC_7450:
-				return 6;
-			case CPU_SUBTYPE_POWERPC_7400:
-				return 4;
-			case CPU_SUBTYPE_POWERPC_750:
-				return 3;
-			case CPU_SUBTYPE_POWERPC_ALL:
-				return 1;
-			default:
-				return 0;
-			}
-			/* NOTREACHED */
-
-		default:
-			return 0;
-		}
-		/* NOTREACHED */
-
-	case CPU_SUBTYPE_POWERPC_7400:
-		switch(exectype) {
-		case CPU_TYPE_POWERPC64:	/* CPU_IS64BIT | CPU_POWERPC */
-			return 0;
-
-		case CPU_TYPE_POWERPC:
-			switch(execsubtype) {
-			case CPU_SUBTYPE_POWERPC_7400:
-				return 6;
-			case CPU_SUBTYPE_POWERPC_7450:
-				return 4;
-			case CPU_SUBTYPE_POWERPC_750:
-				return 3;
-			case CPU_SUBTYPE_POWERPC_ALL:
-				return 1;
-			default:
-				return 0;
-			}
-			/* NOTREACHED */
-
-		default:
-			return 0;
-		}
-		/* NOTREACHED */
-
-	case CPU_SUBTYPE_POWERPC_750:
-		switch(exectype) {
-		case CPU_TYPE_POWERPC64:	/* CPU_IS64BIT | CPU_POWERPC */
-			return 0;
-
-		case CPU_TYPE_POWERPC:
-			switch(execsubtype) {
-			case CPU_SUBTYPE_POWERPC_750:
-				return 6;
-#ifndef ADDRESS_RADAR_2678019
-			/*
-			 * Currently implemented because dropping this would
-			 * turn the executable subtype into a "has Altivec"
-			 * flag, which we do not want to permit.  It could
-			 * also break working third party applications
-			 * already in use in the field.
-			 */
-			case CPU_SUBTYPE_POWERPC_7400:
-				return 4;
-			case CPU_SUBTYPE_POWERPC_7450:
-				return 3;
-#endif	/* ADDRESS_RADAR_2678019 */
-			case CPU_SUBTYPE_POWERPC_ALL:
-				return 1;
-			default:
-				return 0;
-			}
-			/* NOTREACHED */
-
-		default:
-			return 0;
-		}
-		/* NOTREACHED */
-
-	default:
-		switch(exectype) {
-		case CPU_TYPE_POWERPC64:	/* CPU_IS64BIT | CPU_POWERPC */
-			return 0;
-
-		case CPU_TYPE_POWERPC:
-			/* Special case for PPC601 */
-			if (cpusubtype == execsubtype)
-				return 6;
-			/*
-			 * If we get here it is because it is a cpusubtype we
-			 * don't support or a new cpusubtype that was added
-			 * since this code was written.  Both will be
-			 * considered unacceptable.
-			 */
-			return 0;
-			/* NOTREACHED */
-
-		default:
-			return 0;
-		}
-		/* NOTREACHED */
-	}
-	/* NOTREACHED */
-}
-
-extern vm_map_offset_t kvtophys64(vm_map_offset_t);
-
-boolean_t
-kernacc(
-    off_t 	start,
-    size_t	len
-)
-{
-	off_t base;
-	off_t end;
-    
-	base = trunc_page_64(start);
-	end = start + len;
-	
-	while (base < end) {
-		if(kvtophys64((vm_map_offset_t)base) == (vm_map_offset_t)0)
-			return(FALSE);
-		base += page_size;
-	}   
-
-	return (TRUE);
-}
-
-void
-md_prepare_for_shutdown(int paniced, int howto, char * command);
-
-void
-md_prepare_for_shutdown(__unused int paniced, __unused int howto,
-			__unused char * command)
-{
-	return;
-}
diff --git a/bsd/dev/ppc/km.c b/bsd/dev/ppc/km.c
deleted file mode 100644
index e82d6be27..000000000
--- a/bsd/dev/ppc/km.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 	Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved. 
- *
- * km.m - kernel keyboard/monitor module, procedural interface.
- *
- * HISTORY
- */
-
-#include <sys/kernel.h>
-#include <sys/tty.h>
-
-#include <machine/cons.h>
-#include <sys/conf.h>
-#include <sys/systm.h>
-#include <sys/uio.h>
-#include <sys/fcntl.h>		/* for kmopen */
-#include <sys/errno.h>		
-#include <sys/proc.h>		/* for kmopen */
-#include <sys/msgbuf.h>
-#include <sys/time.h>
-#include <dev/kmreg_com.h>
-#include <pexpert/pexpert.h>
-
-/*
- * 'Global' variables, shared only by this file and conf.c.
- */
-struct tty *km_tty[1] = { 0 };
-
-/*
- * this works early on, after initialize_screen() but before autoconf (and thus
- * before we have a kmDevice).
- */
-int disableConsoleOutput;
-
-static int initialized = 0;
-
-extern void kminit(void);
-
-// used by or implemented in the osfmk project
-extern void cnputcusr(char);		// From osfmk
-extern int  cngetc(void);		// From osfmk
-extern void cons_cinput(char ch);	// Used by osfmk
-
-static int kmoutput(struct tty *tp);
-static void kmtimeout(void *tp);
-static void kmstart(struct tty *tp);
-
-extern void KeyboardOpen(void);
-
-void
-kminit(void)
-{
-	km_tty[0] = ttymalloc();
-   	km_tty[0]->t_dev = makedev(12, 0);
-	initialized = 1;
-}
-
-/*
- * cdevsw interface to km driver.
- */
-int 
-kmopen(dev_t dev, int flag, __unused int devtype, proc_t pp)
-{
-	int unit;
-	struct tty *tp;
-	struct winsize *wp;
-	int ret;
-	
-	unit = minor(dev);
-	if(unit >= 1)
-		return (ENXIO);
-
-	tp = km_tty[unit];
-
-	tty_lock(tp);
-
-	tp->t_oproc = kmstart;
-	tp->t_param = NULL;
-	tp->t_dev = dev;
-	
-	if ( !(tp->t_state & TS_ISOPEN) ) {
-		tp->t_iflag = TTYDEF_IFLAG;
-		tp->t_oflag = TTYDEF_OFLAG;
-		tp->t_cflag = (CREAD | CS8 | CLOCAL);
-		tp->t_lflag = TTYDEF_LFLAG;
-		tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
-		termioschars(&tp->t_termios);
-		ttsetwater(tp);
-	} else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) {
-		ret = EBUSY;
-		goto out;
-	}
-
-	tp->t_state |= TS_CARR_ON; /* lie and say carrier exists and is on. */
-
-	ret = ((*linesw[tp->t_line].l_open)(dev, tp));
-	{
-		PE_Video video;
-		wp = &tp->t_winsize;
-		/*
-		 * Magic numbers.  These are CHARWIDTH and CHARHEIGHT
-		 * from osfmk/ppc/POWERMAC/video_console.c
-		 */
-		wp->ws_xpixel = 8;
-		wp->ws_ypixel = 16;
-
-		tty_unlock(tp);		/* XXX race window */
-
-		if (flag & O_POPUP)
-			PE_initialize_console(0, kPETextScreen);
-
-		bzero(&video, sizeof(video));
-		PE_current_console(&video);
-
-		tty_lock(tp);
-
-		if( video.v_width != 0 && video.v_height != 0 ) {
-			wp->ws_col = video.v_width / wp->ws_xpixel;
-			wp->ws_row = video.v_height / wp->ws_ypixel;
-		} else {
-			wp->ws_col = 100;
-			wp->ws_row = 36;
-		}
-	}
-
-out:
-	tty_unlock(tp);
-
-	return ret;
-}
-
-int 
-kmclose(dev_t dev, __unused int flag, __unused int mode, __unused proc_t p)
-{
-	int ret;
-	struct tty *tp = km_tty[minor(dev)];
-
-	tty_lock(tp);
-	ret = (*linesw[tp->t_line].l_close)(tp,flag);
-	ttyclose(tp);
-	tty_unlock(tp);
-
-	return (ret);
-}
-
-int 
-kmread(dev_t dev, struct uio *uio, int ioflag)
-{
-	int ret;
-	struct tty *tp = km_tty[minor(dev)];
-
-	tty_lock(tp);
-	ret = (*linesw[tp->t_line].l_read)(tp, uio, ioflag);
-	tty_unlock(tp);
-
-	return (ret);
-}
-
-int 
-kmwrite(dev_t dev, struct uio *uio, int ioflag)
-{
-	int ret;
-	struct tty *tp = km_tty[minor(dev)];
-
-	tty_lock(tp);
-	ret = (*linesw[tp->t_line].l_write)(tp, uio, ioflag);
-	tty_unlock(tp);
-
-	return (ret);
-}
-
-int 
-kmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
-{
-	int error = 0;
-	struct tty *tp = km_tty[minor(dev)];
-	struct winsize *wp;
-
-	tty_lock(tp);
-	
-	switch (cmd) {
-	    case KMIOCSIZE:
-		wp = (struct winsize *)data;
-		*wp = tp->t_winsize;
-		break;
-		
-	    case TIOCSWINSZ:
-		/* Prevent changing of console size --
-		 * this ensures that login doesn't revert to the
-		 * termcap-defined size
-		 */
-		error = EINVAL;
-		break;
-
-	    /* Bodge in the CLOCAL flag as the km device is always local */
-	    case TIOCSETA_32:
-	    case TIOCSETAW_32:
-	    case TIOCSETAF_32:
-		{
-			struct termios32 *t = (struct termios32 *)data;
-			t->c_cflag |= CLOCAL;
-			/* No Break */
-		}
-		goto fallthrough;
-	    case TIOCSETA_64:
-	    case TIOCSETAW_64:
-	    case TIOCSETAF_64:
-		{
-			struct user_termios *t = (struct user_termios *)data;
-			t->c_cflag |= CLOCAL;
-			/* No Break */
-		}
-fallthrough:
-	    default:		
-		error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
-		if (ENOTTY != error)
-			break;
-		error = ttioctl_locked(tp, cmd, data, flag, p);
-		break;
-	}
-
-	tty_unlock(tp);
-
-	return (error);
-}
-
-/*
- * kmputc
- *
- * Output a character to the serial console driver via cnputcusr(),
- * which is exported by that driver.
- *
- * Locks:       Assumes tp in the calling tty driver code is locked on
- *              entry, remains locked on exit
- *
- * Notes:       Called from kmoutput(); giving the locking output
- *              assumptions here, this routine should be static (and
- *              inlined, given there is only one call site).
- */
-int 
-kmputc(__unused dev_t dev, char c)
-{
-	if(!disableConsoleOutput && initialized) {
-		/* OCRNL */
-		if(c == '\n')
-			cnputcusr('\r');
-		cnputcusr(c);
-	}
-
-	return (0);
-}
-
-
-/*
- * Callouts from linesw.
- */
- 
-#define KM_LOWAT_DELAY	((ns_time_t)1000)
-
-/*
- * t_oproc for this driver; called from within the line discipline
- *
- * Locks:	Assumes tp is locked on entry, remains locked on exit
- */
-static void 
-kmstart(struct tty *tp)
-{
-	if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP))
-		goto out;
-	if (tp->t_outq.c_cc == 0)
-		goto out;
-	tp->t_state |= TS_BUSY;
-	kmoutput(tp);
-	return;
-
-out:
-	(*linesw[tp->t_line].l_start)(tp);
-	return;
-}
-
-/*
- * One-shot output retry timeout from kmoutput(); re-calls kmoutput() at
- * intervals until the output queue for the tty is empty, at which point
- * the timeout is not rescheduled by kmoutput()
- *
- * This function must take the tty_lock() around the kmoutput() call; it
- * ignores the return value.
- */
-static void
-kmtimeout(void *arg)
-{
-	struct tty	*tp = (struct tty *)arg;
-
-	tty_lock(tp);
-	(void)kmoutput(tp);
-	tty_unlock(tp);
-}
-
-/*
- * kmoutput
- *
- * Locks:	Assumes tp is locked on entry, remains locked on exit
- *
- * Notes:	Called from kmstart() and kmtimeout(); kmtimeout() is a
- *		timer initiated by this routine to deal with pending
- *		output not yet flushed (output is flushed at a maximum
- *		of sizeof(buf) charatcers at a time before dropping into
- *		the timeout code).
- */
-static int 
-kmoutput(struct tty *tp)
-{
-	char 	buf[80];	/* buffer; limits output per call */
-	char 	*cp;
-	int 	cc = -1;
-
-
-	/* While there is data available to be output... */
-	while (tp->t_outq.c_cc > 0) {
-		cc = ndqb(&tp->t_outq, 0);
-		if (cc == 0)
-			break;
-		/*
-		 * attempt to output as many characters as are available,
-		 * up to the available transfer buffer size.
-		 */
-		cc = min(cc, sizeof buf);
-		/* copy the output queue contents to the buffer */
-		(void) q_to_b(&tp->t_outq, (unsigned char *)buf, cc);
-		for (cp = buf; cp < &buf[cc]; cp++) {
-			/* output the buffer one charatcer at a time */
-			kmputc(tp->t_dev, *cp & 0x7f);
-		}
-	}
-        if (tp->t_outq.c_cc > 0) {
-		timeout((timeout_fcn_t)kmtimeout, tp, hz);
-	}
-	tp->t_state &= ~TS_BUSY;
-	(*linesw[tp->t_line].l_start)(tp);
-
-	return 0;
-}
-
-/*
- * cons_cinput
- *
- * Driver character input from the polled mode serial console driver calls
- * this routine to input a character from the serial driver into the tty
- * line discipline specific input processing receiv interrupt routine,
- * l_rint().
- *
- * Locks:       Assumes that the tty_lock() is NOT held on the tp, so a
- *              serial driver should NOT call this function as a result
- *              of being called from a function which already holds the
- *              lock; ECHOE will be handled at the line discipline, if
- *              output echo processing is going to occur.
- */
-void
-cons_cinput(char ch)
-{
-	struct tty *tp = km_tty[0];	/* XXX */
-
-	tty_lock(tp);
-	(*linesw[tp->t_line].l_rint) (ch, tp);
-	tty_unlock(tp);
-}
diff --git a/bsd/dev/ppc/mem.c b/bsd/dev/ppc/mem.c
deleted file mode 100644
index fc2d39efb..000000000
--- a/bsd/dev/ppc/mem.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*-
- * Copyright (c) 1988 University of Utah.
- * Copyright (c) 1982, 1986, 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department, and code derived from software contributed to
- * Berkeley by William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: Utah $Hdr: mem.c 1.13 89/10/08$
- *	@(#)mem.c	8.1 (Berkeley) 6/11/93
- */
-
-#include <mach_load.h>
-
-/*
- * Memory special file
- */
-
-#include <sys/param.h>
-#include <sys/dir.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/vm.h>
-#include <sys/uio_internal.h>
-#include <sys/malloc.h>
-
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-#include <mach/vm_param.h>
-
-#include <ppc/Diagnostics.h>
-#include <ppc/mappings.h>
-
-static caddr_t devzerobuf;
-
-extern boolean_t kernacc(off_t, size_t );
-extern int setup_kmem;
-
-int mmread(dev_t dev, struct uio *uio, int flag);
-int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw);
-int mmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
-int mmwrite(dev_t dev, struct uio *uio, int flag);
-
-int
-mmread(dev_t dev, struct uio *uio, __unused int flag)
-{
-
-	return (mmrw(dev, uio, UIO_READ));
-}
-
-int
-mmwrite(dev_t dev, struct uio *uio, __unused int flag)
-{
-
-	return (mmrw(dev, uio, UIO_WRITE));
-}
-
-int
-mmioctl(dev_t dev, u_long cmd, __unused caddr_t data, 
-		__unused int flag, __unused struct proc *p)
-{
-	int minnum = minor(dev);
-
-	if ((setup_kmem == 0) && ((minnum == 0) || (minnum == 1)))
-		return(EINVAL);
-
-	switch (cmd) {
-	case FIONBIO:
-	case FIOASYNC:
-		/* OK to do nothing: we always return immediately */
-		break;
-	default:
-		return ENODEV;
-	}
-
-	return (0);
-}
-
-int
-mmrw(dev, uio, rw)
-	dev_t dev;
-	struct uio *uio;
-	enum uio_rw rw;
-{
-	register int o;
-#if LP64KERN
-	register uint64_t c;
-#else
-	register uint c;
-#endif
-	addr64_t vll;
-	int error = 0;
-	vm_offset_t	where;
-
-	while (uio_resid(uio) > 0 && error == 0) {
-		uio_update(uio, 0);
-
-		switch (minor(dev)) {
-
-/* minor device 0 is physical memory */
-		case 0:
-			if (setup_kmem == 0)
-				return(ENODEV);
-			vll = trunc_page_64(uio->uio_offset);
-			if(((vll >> 31) == 1) || vll >= ((dgWork.dgFlags & enaDiagDM) ? mem_actual : max_mem))
-				goto fault;
-
-			if(dgWork.dgFlags & enaDiagDM) {			/* Can we really get all memory? */
-				if (kmem_alloc_pageable(kernel_map, &where, PAGE_SIZE) != KERN_SUCCESS) {
-					goto fault;
-				}
-				else {
-					addr64_t collad;
-					
-					collad = mapping_make(kernel_pmap, (addr64_t)where, (ppnum_t)(vll >> 12), 0, 1, VM_PROT_READ);	/* Map it in for the moment */
-					if(collad) {						/* See if it failed (shouldn't happen)  */
-						kmem_free(kernel_map, where, PAGE_SIZE);	/* Toss the page */
-						goto fault;						/* Kill the transfer */
-					}
-				}
-			}
-			else {
-				if (kmem_alloc(kernel_map, &where, 4096) 
-					!= KERN_SUCCESS) {
-					goto fault;
-				}
-			}
-			o = uio->uio_offset - vll;
-			c = min(PAGE_SIZE - o, uio_curriovlen(uio));
-			error = uiomove((caddr_t)(where + o), c, uio);
-
-			if(dgWork.dgFlags & enaDiagDM) (void)mapping_remove(kernel_pmap, (addr64_t)where);	/* Unmap it */
-			kmem_free(kernel_map, where, PAGE_SIZE);
-			continue;
-
-		/* minor device 1 is kernel memory */
-		case 1:
-			if (setup_kmem == 0)
-				return(ENODEV);
-			/* Do some sanity checking */
-			if (((addr64_t)uio->uio_offset > vm_last_addr) ||
-				((addr64_t)uio->uio_offset < VM_MIN_KERNEL_ADDRESS))
-				goto fault;
-			c = uio_curriovlen(uio);
-			if (!kernacc(uio->uio_offset, c))
-				goto fault;
-			error = uiomove64(uio->uio_offset, c, uio);
-			continue;
-
-		/* minor device 2 is EOF/RATHOLE */
-		case 2:
-			if (rw == UIO_READ)
-				return (0);
-			c = uio_curriovlen(uio);
-			break;
-		/* minor device 3 is ZERO/RATHOLE */
-		case 3:
-			if(devzerobuf == NULL) {
-				MALLOC(devzerobuf, caddr_t,PAGE_SIZE, M_TEMP, M_WAITOK);
-				bzero(devzerobuf, PAGE_SIZE);
-			}
-			if(uio->uio_rw == UIO_WRITE) {
-				c = uio_curriovlen(uio);
-				break;
-			}
-			c = min(uio_curriovlen(uio), PAGE_SIZE);
-			error = uiomove(devzerobuf, c, uio);
-			continue;
-		default:
-			goto fault;
-			break;
-		}
-			
-		if (error)
-			break;
-		uio_update(uio, c);
-	}
-	return (error);
-fault:
-	return (EFAULT);
-}
-
diff --git a/bsd/dev/ppc/munge.s b/bsd/dev/ppc/munge.s
deleted file mode 100644
index 9e33bc326..000000000
--- a/bsd/dev/ppc/munge.s
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- *  Syscall argument mungers.
- *
- *  Passed a pointer to the users register array in the savearea, we copy args into
- *  the uu_arg[] array, padding etc as appropriate.  The issue is that parameters
- *  passed in registers from a 32-bit address space do not map directly into the uu_args.
- *  For example, a 32-bit long-long comes in two registers, but we need to combine
- *  them into one 64-bit long-long in the uu_args.
- *
- *  There are several functions in this file.  Each takes two parameters:
- *
- *      void    munge_XXXX( const void *regs, void *uu_args);
- *
- *  The name of the function encodes the number and type of the parameters, as follows:
- *
- *      w = a 32-bit value such as an int or a 32-bit ptr, that does not require
- *          sign extension.  These are handled by skipping a word in the input,
- *          zeroing a word of output, and copying a word from input to output.
- *
- *      s = a 32-bit value such as a long, which must be sign-extended to a 64-bit
- *          long-long in the uu_args.  These are handled by skipping a word of
- *          input, loading a word of input and sign extending it to a double,
- *          and storing two words of output.
- *
- *      l = a 64-bit long-long, passed in two registers.  These are handled by skipping
- *          a word of input, copying a word, skipping another word of input, and
- *          copying another word.
- *
- *      d = a 32-bit int or a 64-bit ptr or long, passed in via a 64-bit GPR 
- *          from a 64-bit process.  We copy two words from input to output.
- *
- *  For example, "munge_wls" takes a word, a long-long, and a word.  This takes
- *  four registers: the first word is in one, the long-long takes two, and the
- *  final word is in the fourth.  We store six words: a 0, the low words of the
- *  first three registers, and the two words resulting from sign-extending the
- *  low word of the fourth register.
- *
- *  As you can see, we save a lot of code by collapsing mungers that are prefixes
- *  of each other, into the more general routine.  This ends up copying a few extra
- *  bytes of parameters, but big deal.  The old kernel copied all eight words for
- *  every system call.
- *
- *  These routines assume explicit pad words in the uu_arg structures, that fill out
- *  int parameters to 64 bits.  Having pad words makes munging args for 64-bit
- *  processes the equivalent of a simple bcopy(), though it does introduce an
- *  endian dependency.
- */
-
-        .align  5
-        .globl  _munge_dddddddd        // that is 8 'd's
-_munge_dddddddd:
-        .globl  _munge_ddddddd
-_munge_ddddddd:
-        .globl  _munge_dddddd
-_munge_dddddd:
-        .globl  _munge_ddddd
-_munge_ddddd:
-        ld     r5,0*8+0(r3)
-        ld     r6,1*8+0(r3)
-        ld     r7,2*8+0(r3)
-        ld     r8,3*8+0(r3)
-        ld     r9,4*8+0(r3)
-        ld     r10,5*8+0(r3)
-        ld     r11,6*8+0(r3)
-        ld     r12,7*8+0(r3)
-        
-        std     r5,0*8+0(r4)
-        std     r6,1*8+0(r4)
-        std     r7,2*8+0(r4)
-        std     r8,3*8+0(r4)
-        std     r9,4*8+0(r4)
-        std     r10,5*8+0(r4)
-        std     r11,6*8+0(r4)
-        std     r12,7*8+0(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_dddd
-_munge_dddd:
-        .globl  _munge_ddd
-_munge_ddd:
-        .globl  _munge_dd
-_munge_dd:
-        .globl  _munge_d
-_munge_d:
-        ld     r5,0*8+0(r3)
-        ld     r6,1*8+0(r3)
-        ld     r7,2*8+0(r3)
-        ld     r8,3*8+0(r3)
-        
-        std     r5,0*8+0(r4)
-        std     r6,1*8+0(r4)
-        std     r7,2*8+0(r4)
-        std     r8,3*8+0(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwwwwww        // that is 8 'w's
-_munge_wwwwwwww:
-        .globl  _munge_wwwwwww
-_munge_wwwwwww:
-        .globl  _munge_wwwwww
-_munge_wwwwww:
-        .globl  _munge_wwwww
-_munge_wwwww:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        lwz     r10,5*8+4(r3)
-        lwz     r11,6*8+4(r3)
-        lwz     r12,7*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r0,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        stw     r0,4*8+0(r4)
-        stw     r9,4*8+4(r4)
-        stw     r0,5*8+0(r4)
-        stw     r10,5*8+4(r4)
-        stw     r0,6*8+0(r4)
-        stw     r11,6*8+4(r4)
-        stw     r0,7*8+0(r4)
-        stw     r12,7*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwww
-_munge_wwww:
-        .globl  _munge_www
-_munge_www:
-        .globl  _munge_ww
-_munge_ww:
-        .globl  _munge_w
-_munge_w:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r0,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        
-        blr
-
-        .align	5
-	.globl	_munge_l
-_munge_l:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-
-        stw     r5,0*8+0(r4)
-        stw     r6,0*8+4(r4)
-        
-        blr
-        
-        .align  5
-        .globl  _munge_wlw
-_munge_wlw:
-        .globl  _munge_wl
-_munge_wl:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r6,1*8+0(r4)
-        stw     r7,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r8,2*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwl
-_munge_wwwl:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r8,3*8+0(r4)
-        stw     r9,3*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwlww
-_munge_wwwlww:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        lwz     r10,5*8+4(r3)
-        lwz     r11,6*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r8,3*8+0(r4)
-        stw     r9,3*8+4(r4)
-        stw     r0,4*8+0(r4)
-        stw     r10,4*8+4(r4)
-        stw     r0,5*8+0(r4)
-        stw     r11,5*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwlwww
-_munge_wwlwww:
-        li      r0,0
-        lwz     r5,0*8+4(r3)	// Wwlwww
-        lwz     r6,1*8+4(r3)	// wWlwww
-        lwz     r7,2*8+4(r3)	// wwLwww (hi)
-        lwz     r8,3*8+4(r3)	// wwLwww (lo)
-        lwz     r9,4*8+4(r3)	// wwlWww
-        lwz     r10,5*8+4(r3)	// wwlwWw
-        lwz     r11,6*8+4(r3)	// wwlwwW
-        
-        stw     r0,0*8+0(r4)	// 0wlwww
-        stw     r5,0*8+4(r4)	// Wwlwww
-        stw     r0,1*8+0(r4)	// w0lwww
-        stw     r6,1*8+4(r4)	// wWlwww
-        stw     r7,2*8+0(r4)	// wwLwww (hi)
-        stw     r8,2*8+4(r4)	// wwLwww (lo)
-        stw     r0,3*8+0(r4)	// wwl0ww 
-        stw     r9,3*8+4(r4)	// wwlwww
-        stw     r0, 4*8+0(r4)	// wwlw0w
-        stw     r10,4*8+4(r4)	// wwlwWw
-        stw     r0, 5*8+0(r4)	// wwlww0
-        stw     r11,5*8+4(r4)	// wwlwwW
-        
-        blr
-
-        .align  5
-        .globl  _munge_wwwwlw	// 4 'w's and an l an w
-_munge_wwwwlw:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        lwz     r10,5*8+4(r3)
-        lwz     r11,6*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r0,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        stw     r9,4*8+0(r4)
-        stw     r10,4*8+4(r4)
-        stw     r0,5*8+0(r4)
-        stw     r11,5*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwwl	// 4 'w's and an l
-_munge_wwwwl:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        lwz     r10,5*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r0,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        stw     r9,4*8+0(r4)
-        stw     r10,4*8+4(r4)
-        
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwwwl      // 5 'w's and an l
-_munge_wwwwwl:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-        lwz     r10,5*8+4(r3)
-        lwz     r11,6*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r0,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        stw     r0,4*8+0(r4)
-        stw     r9,4*8+4(r4)
-        stw     r10,5*8+0(r4)
-        stw     r11,5*8+4(r4)
-        
-        blr
-        
-        
-        .align  5
-        .globl  _munge_wsw
-_munge_wsw:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-
-        stw     r0,0*8+0(r4)
-        srawi   r2,r6,31
-        stw     r5,0*8+4(r4)
-        stw     r2,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-
-        blr
-        
-        
-        .align  5
-        .globl  _munge_wws
-_munge_wws:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        srawi   r2,r7,31
-        stw     r6,1*8+4(r4)
-        stw     r2,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-
-        blr
-
-
-        .align  5
-        .globl  _munge_wwwsw
-_munge_wwwsw:
-        li      r0,0
-        lwz     r5,0*8+4(r3)
-        lwz     r6,1*8+4(r3)
-        lwz     r7,2*8+4(r3)
-        lwz     r8,3*8+4(r3)
-        lwz     r9,4*8+4(r3)
-                
-        stw     r0,0*8+0(r4)
-        stw     r5,0*8+4(r4)
-        stw     r0,1*8+0(r4)
-        stw     r6,1*8+4(r4)
-        srawi   r2,r8,31
-        stw     r0,2*8+0(r4)
-        stw     r7,2*8+4(r4)
-        stw     r2,3*8+0(r4)
-        stw     r8,3*8+4(r4)
-        stw     r0,4*8+0(r4)
-        stw     r9,4*8+4(r4)
-
-        blr
-
-		.align 5
-		.globl	_munge_llllll
-_munge_llllll:
-		li		r0,0
-		lwz		r5,0*8+4(r3)	// l1
-		lwz		r6,1*8+4(r3)
-		lwz		r7,2*8+4(r3)	// l2
-		lwz		r8,3*8+4(r3)
-		lwz		r9,4*8+4(r3)	// l3
-		lwz		r10,5*8+4(r3)
-		lwz		r11,6*8+4(r3)	// l4
-
-		stw		r5,0*8+0(r4)
-		stw		r6,0*8+4(r4)
-		stw		r7,1*8+0(r4)
-		stw		r8,1*8+4(r4)
-		stw		r9,2*8+0(r4)
-		stw		r10,2*8+4(r4)
-		stw		r11,3*8+0(r4)
-
-		// the rest spill to the stack (r1)
-		// we'll zero fill for now
-		// and make the syscall handler
-		// do the copyin from the user stack
-		stw		r0,3*8+4(r4)
-		stw		r0,4*8+0(r4)
-		stw		r0,4*8+4(r4)
-		stw		r0,5*8+0(r4)
-		stw		r0,5*8+4(r4)
-
-		blr
diff --git a/bsd/dev/ppc/ppc_init.c b/bsd/dev/ppc/ppc_init.c
deleted file mode 100644
index 545cfe5ae..000000000
--- a/bsd/dev/ppc/ppc_init.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-
-#include <mach/vm_types.h>
-#include <mach/vm_param.h>
-#include <mach/thread_status.h>
-#include <kern/misc_protos.h>
-#include <kern/assert.h>
-#include <kern/cpu_number.h>
-
-#include <ppc/proc_reg.h>
-#include <ppc/boot.h>
-#include <ppc/misc_protos.h>
-#include <ppc/pmap.h>
-#include <ppc/pmap_internals.h>
-#include <ppc/mem.h>
-#include <ppc/exception.h>
-#include <ppc/gdb_defs.h>
-#include <ppc/POWERMAC/video_board.h>
-#include <ppc/POWERMAC/video_pdm.h>
-
-#ifdef	__MACHO__
-#include <libkern/kernel_mach_header.h>
-#endif
-
-/* External references */
-
-extern unsigned int intstack[];	/* declared in start.s */
-extern unsigned int intstack_top_ss;	/* declared in start.s */
-#if	MACH_KGDB
-extern unsigned int gdbstackptr;	/* declared in start.s */
-extern unsigned int gdbstack_top_ss;	/* declared in start.s */
-#endif	/* MACH_KGDB */
-
-/* Stuff declared in kern/bootstrap.c which we may need to initialise */
-
-extern vm_offset_t     boot_start;
-extern vm_size_t       boot_size;
-extern vm_offset_t     boot_region_desc;
-extern vm_size_t       boot_region_count;
-extern int             boot_thread_state_flavor;
-extern thread_state_t  boot_thread_state;
-extern unsigned int    boot_thread_state_count;
-
-/* Trap handling function prototypes */
-
-extern void thandler(void);	/* trap handler */
-extern void ihandler(void);	/* interrupt handler */
-extern void shandler(void);	/* syscall handler */
-extern void gdbhandler(void);	/* debugger handler */
-extern void fpu_switch(void);	/* fp handler */
-extern void atomic_switch_trap(void);	/* fast path atomic thread switch */
-
-/* definitions */
-
-struct ppc_thread_state boot_task_thread_state;
-
-
-
-
-
-#if 1 /* TODO NMGS - vm_map_steal_memory shouldn't use these - remove */
-vm_offset_t avail_start;
-vm_offset_t avail_end;
-#endif 
-unsigned int avail_remaining = 0;
-vm_offset_t first_avail;
-
-/*
- * Mach-O Support 
- */
-
-
-#ifdef __MACHO__
-void *sectTEXTB;
-unsigned long sectSizeTEXT;
-void *sectDATAB;
-unsigned long sectSizeDATA;
-void *sectOBJCB;
-unsigned long sectSizeOBJC;
-void *sectLINKB;
-unsigned long sectSizeLINK;
-
-vm_offset_t end, etext, edata;
-#define	ETEXT	etext
-#endif
-
-
-
-void ppc_vm_init(unsigned int memory_size, boot_args *args)
-{
-	unsigned int htabmask;
-	unsigned int i;
-	vm_offset_t  addr;
-	int boot_task_end_offset;
-
-	printf("mem_size = %d M\n",memory_size / (1024 * 1024));
-
-#ifdef __MACHO__
-	/* Now retrieve addresses for end, edata, and etext 
-	 * from MACH-O headers.
-	 */
-
-
-	etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
-	edata = (vm_offset_t) sectDATAB + sectSizeDATA;
-	end = getlastaddr();
-#endif
-
-	/* Stitch valid memory regions together - they may be contiguous
-	 * even though they're not already glued together
-	 */
-
-	/* Go through the list of memory regions passed in via the args
-	 * and copy valid entries into the pmap_mem_regions table, adding
-	 * further calculated entries.
-	 */
-	
-	
-	/* Initialise the pmap system, using space above `first_avail'*/
-
-#ifndef	__MACHO__
-	free_regions[free_regions_count].start =
-	  	round_page((unsigned int)&_ExceptionVectorsEnd -
-			   (unsigned int)&_ExceptionVectorsStart);
-#else
-	/* On MACH-O generated kernels, the Exception Vectors
-	 * are already mapped and loaded at 0 -- no relocation
-	 * or freeing of memory is needed
-	 */
-
-	free_regions[free_regions_count].start = round_page((unsigned int)&_ExceptionVectorsEnd) + 4096;
-#endif
-
-	/* If we are on a PDM machine memory at 1M might be used
-	 * for video. TODO NMGS call video driver to do this
-	 * somehow
-	 */
-
-
-	/* For PowerMac, first_avail is set to above the bootstrap task.
-         * TODO NMGS - different screen modes - might free mem?
-         */
-
-	first_avail = round_page(args->first_avail);
-
-
-	/* map in the exception vectors */
-	/*
-	 * map the kernel text, data and bss. Don't forget other regions too
-	 */
-	for (i = 0; i < args->kern_info.region_count; i++) {
-#if	MACH_KDB
-		if (args->kern_info.regions[i].prot == VM_PROT_NONE &&
-		    i == args->kern_info.region_count - 1) {
-			/* assume that's the kernel symbol table */
-			kern_sym_start = args->kern_info.regions[i].addr;
-			kern_sym_size = args->kern_info.regions[i].size;
-			printf("kernel symbol table at 0x%x size 0x%x\n",
-			       kern_sym_start, kern_sym_size);
-			args->kern_info.regions[i].prot |=
-				(VM_PROT_WRITE|VM_PROT_READ);
-		}
-#endif	/* MACH_KDB */
-
-#ifdef __MACHO__
-		/* Skip the VECTORS segment */
-		if (args->kern_info.regions[i].addr == 0)
-			continue;
-#endif
-
-	boot_region_count = args->task_info.region_count;
-	boot_size = 0;
-	boot_task_end_offset = 0;
-	/* Map bootstrap task pages 1-1 so that user_bootstrap can find it */
-	for (i = 0; i < boot_region_count; i++) {
-		if (args->task_info.regions[i].mapped) {
-			/* kernel requires everything page aligned */
-#if DEBUG
-			printf("mapping virt 0x%08x to phys 0x%08x end 0x%x, prot=0x%b\n",
-				 ppc_trunc_page(args->task_info.base_addr + 
-					args->task_info.regions[i].offset),
-				 ppc_trunc_page(args->task_info.base_addr + 
-					args->task_info.regions[i].offset),
-				 ppc_round_page(args->task_info.base_addr + 
-					args->task_info.regions[i].offset +
-					args->task_info.regions[i].size),
-				 args->task_info.regions[i].prot,
-				 "\x10\1READ\2WRITE\3EXEC");
-#endif /* DEBUG */
-
-			(void)pmap_map(
-				  ppc_trunc_page(args->task_info.base_addr + 
-				      args->task_info.regions[i].offset),
-			          ppc_trunc_page(args->task_info.base_addr + 
-				      args->task_info.regions[i].offset),
-			          ppc_round_page(args->task_info.base_addr +
-				      args->task_info.regions[i].offset +
-				      args->task_info.regions[i].size),
-			          args->task_info.regions[i].prot);
-
-			/* Count the size of mapped space */
-			boot_size += args->task_info.regions[i].size;
-
-			/* There may be an overlapping physical page
-			 * mapped to two different virtual addresses
-			 */
-			if (boot_task_end_offset >
-			    args->task_info.regions[i].offset) {
-				boot_size -= boot_task_end_offset - 
-					args->task_info.regions[i].offset;
-#if DEBUG
-				printf("WARNING - bootstrap overlaps regions\n");
-#endif /* DEBUG */
-			}
-
-			boot_task_end_offset =
-				args->task_info.regions[i].offset +
-				args->task_info.regions[i].size;
-		}
-	}
-
-	if (boot_region_count) {
-
-		/* Add a new region to the bootstrap task for it's stack */
-		args->task_info.regions[boot_region_count].addr =
-			BOOT_STACK_BASE;
-		args->task_info.regions[boot_region_count].size =
-			BOOT_STACK_SIZE;
-		args->task_info.regions[boot_region_count].mapped = FALSE;
-		boot_region_count++;
-		
-		boot_start        = args->task_info.base_addr;
-		boot_region_desc  = (vm_offset_t) args->task_info.regions;
-		/* TODO NMGS need to put param info onto top of boot stack */
-		boot_task_thread_state.r1   = BOOT_STACK_PTR-0x100;
-		boot_task_thread_state.srr0 = args->task_info.entry;
-		boot_task_thread_state.srr1 =
-			MSR_MARK_SYSCALL(MSR_EXPORT_MASK_SET);
-		
-		boot_thread_state_flavor = PPC_THREAD_STATE;
-		boot_thread_state_count  = PPC_THREAD_STATE_COUNT;
-		boot_thread_state        =
-			(thread_state_t)&boot_task_thread_state;
-	}
-
-
-
-}
-
diff --git a/bsd/dev/ppc/sdt_ppc.c b/bsd/dev/ppc/sdt_ppc.c
deleted file mode 100644
index bcd10c967..000000000
--- a/bsd/dev/ppc/sdt_ppc.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/* #pragma ident	"@(#)sdt.c	1.6	06/03/24 SMI" */
-
-#ifdef KERNEL
-#ifndef _KERNEL
-#define _KERNEL /* Solaris vs. Darwin */
-#endif
-#endif
-
-#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
-#include <kern/cpu_data.h>
-#include <kern/thread.h>
-#include <mach/thread_status.h>
-
-#include <sys/dtrace.h>
-#include <sys/dtrace_impl.h>
-
-#include <sys/dtrace_glue.h>
-
-#include <sys/sdt_impl.h>
-#include <machine/cpu_capabilities.h>
-
-extern sdt_probe_t      **sdt_probetab;
-
-/*ARGSUSED*/
-int
-sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
-{
-    uint64_t mask = (_cpu_capabilities & k64Bit) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
-
-#pragma unused(eax)
-	sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
-
-	for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
-		if ((uintptr_t)sdt->sdp_patchpoint == addr) {
-			ppc_saved_state_t *regs = (ppc_saved_state_t *)stack;
-			
-            dtrace_probe(sdt->sdp_id, regs->save_r3 & mask, regs->save_r4 & mask,
-                regs->save_r5 & mask, regs->save_r6 & mask, regs->save_r7 & mask);
-				
-			return (DTRACE_INVOP_NOP);
-		}
-	}
-
-	return (0);
-}
-
diff --git a/bsd/dev/ppc/stubs.c b/bsd/dev/ppc/stubs.c
deleted file mode 100644
index 55e2f0170..000000000
--- a/bsd/dev/ppc/stubs.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1997 by Apple Computer, Inc., all rights reserved
- * Copyright (c) 1993 NeXT Computer, Inc.
- *
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/ioctl.h>
-#include <sys/tty.h>
-#include <sys/conf.h>
-#include <sys/proc.h>
-#include <sys/user.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <vm/vm_map.h>
-
-extern void	*get_bsduthreadarg(thread_t);
-extern int	*get_bsduthreadrval(thread_t);
-
-/* 
- * copy a null terminated string from one point to another in 
- * the kernel address space.
- *   - no access checks are performed.
- *   - if the end of string isn't found before
- *     maxlen bytes are copied,  return ENAMETOOLONG,
- *     indicating an incomplete copy.
- *   - otherwise, return 0, indicating success.
- * the number of bytes copied is always returned in lencopied.
- */
-/* from ppc/fault_copy.c -Titan1T4 VERSION  */
-int
-copystr(const void *vfrom, void *vto, size_t maxlen, size_t *lencopied)
-{
-    register unsigned l;
-	const char *from;
-	char *to;
-
-	from = vfrom;
-	to = vto;
-    for (l = 0; l < maxlen; l++)
-        if ((*to++ = *from++) == '\0') {
-            if (lencopied)
-                *lencopied = l + 1;
-            return 0;
-        }
-    if (lencopied)
-	*lencopied = maxlen;
-    return ENAMETOOLONG;
-}
-
-int copywithin(src, dst, count)
-void  * src, *dst;
-size_t count;
-{
-	bcopy(src,dst,count);
-	return 0;
-}
-
-void *
-get_bsduthreadarg(thread_t th)
-{
-struct uthread *ut;
-	ut = get_bsdthread_info(th);
-	return((void *)(ut->uu_arg));
-}
-
-int *
-get_bsduthreadrval(thread_t th)
-{
-struct uthread *ut;
-	ut = get_bsdthread_info(th);
-	return(&ut->uu_rval[0]);
-}
-
diff --git a/bsd/dev/ppc/systemcalls.c b/bsd/dev/ppc/systemcalls.c
deleted file mode 100644
index a8fd2dcfd..000000000
--- a/bsd/dev/ppc/systemcalls.c
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * NOTICE: This file was modified by McAfee Research in 2004 to introduce
- * support for mandatory and extensible security protections.  This notice
- * is included in support of clause 2.2 (b) of the Apple Public License,
- * Version 2.0.
- */
-
-#include <mach/mach_traps.h>
-
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/assert.h>
-#include <kern/clock.h>
-#include <kern/locks.h>
-#include <kern/sched_prim.h>
-#include <mach/machine/thread_status.h>
-#include <mach/thread_act.h>
-#include <ppc/savearea.h>
-
-#include <sys/kernel.h>
-#include <sys/vm.h>
-#include <sys/proc_internal.h>
-#include <sys/syscall.h>
-#include <sys/systm.h>
-#include <sys/user.h>
-#include <sys/errno.h>
-#include <sys/kdebug.h>
-#include <sys/sysent.h>
-#include <sys/sysproto.h>
-#include <sys/kauth.h>
-
-#include <security/audit/audit.h>
-
-#if CONFIG_DTRACE
-extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
-extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
-#endif
-
-extern void
-unix_syscall(struct savearea *regs);
-
-extern struct savearea * 
-find_user_regs(
-	thread_t act);
-
-extern lck_spin_t * tz_slock;
-
-/*
- * Function:	unix_syscall
- *
- * Inputs:	regs	- pointer to Process Control Block
- *
- * Outputs:	none
- */
-void
-unix_syscall(struct savearea	*regs)
-{
-	thread_t			thread_act;
-	struct uthread		*uthread;
-	struct proc			*proc;
-	struct sysent		*callp;
-	int					error;
-	unsigned int		code;
-	boolean_t			flavor;
-
-	flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
-
-	if (flavor)
-		code = regs->save_r3;
-	else
-		code = regs->save_r0;
-
-	if (kdebug_enable && (code != 180)) {
-		if (flavor)
-			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
-				regs->save_r4, regs->save_r5, regs->save_r6, regs->save_r7, 0);
-		else
-			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
-				regs->save_r3, regs->save_r4, regs->save_r5, regs->save_r6, 0);
-	}
-	thread_act = current_thread();
-	uthread = get_bsdthread_info(thread_act);
-
-	if (!(uthread->uu_flag & UT_VFORK))
-		proc = (struct proc *)get_bsdtask_info(current_task());
-	else
-		proc = current_proc();
-
-	/* Make sure there is a process associated with this task */
-	if (proc == NULL) {
-		regs->save_r3 = (long long)EPERM;
-		/* set the "pc" to execute cerror routine */
-		regs->save_srr0 -= 4;
-		task_terminate_internal(current_task());
-		thread_exception_return();
-		/* NOTREACHED */
-	}
-
-	/*
-	 * Delayed binding of thread credential to process credential, if we
-	 * are not running with an explicitly set thread credential.
-	 */
-	kauth_cred_uthread_update(uthread, proc);
-
-	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
-
-	if (callp->sy_narg != 0) {
-		void 		*regsp;
-		sy_munge_t 	*mungerp;
-		
-		if (IS_64BIT_PROCESS(proc)) {
-			/* XXX Turn 64 bit unsafe calls into nosys() */
-			if (callp->sy_flags & UNSAFE_64BIT) {
-				callp = &sysent[63];
-				goto unsafe;
-			}
-			mungerp = callp->sy_arg_munge64;
-		}
-		else {
-			mungerp = callp->sy_arg_munge32;
-		}
-		if ( !flavor) {
-			regsp = (void *) &regs->save_r3;
-		} else {
-			/* indirect system call consumes an argument so only 7 are supported */
-			if (callp->sy_narg > 7) {
-				callp = &sysent[63];
-				goto unsafe;
-			}
-			regsp = (void *) &regs->save_r4;
-		}
-		/* call syscall argument munger to copy in arguments (see xnu/bsd/dev/ppc/munge.s) */
-		(*mungerp)(regsp, (void *) &uthread->uu_arg[0]);
-	}
-
-unsafe:
-	
-	uthread->uu_flag |= UT_NOTCANCELPT;
-
-	uthread->uu_rval[0] = 0;
-
-	/*
-	 * r4 is volatile, if we set it to regs->save_r4 here the child
-	 * will have parents r4 after execve
-	 */
-	uthread->uu_rval[1] = 0;
-
-	error = 0;
-
-	/*
-	 * PPC runtime calls cerror after every unix system call, so
-	 * assume no error and adjust the "pc" to skip this call.
-	 * It will be set back to the cerror call if an error is detected.
-	 */
-	regs->save_srr0 += 4;
-
-#ifdef JOE_DEBUG
-	uthread->uu_iocount = 0;
-	uthread->uu_vpindex = 0;
-#endif
-	AUDIT_SYSCALL_ENTER(code, proc, uthread);
-	error = (*(callp->sy_call))(proc, (void *)uthread->uu_arg, &(uthread->uu_rval[0]));
-	AUDIT_SYSCALL_EXIT(code, proc, uthread, error);
-#if CONFIG_MACF
-	mac_thread_userret(code, error, thread_act);
-#endif
-
-
-#ifdef JOE_DEBUG
-	if (uthread->uu_iocount)
-	        printf("system call returned with uu_iocount != 0\n");
-#endif
-#if CONFIG_DTRACE
-	uthread->t_dtrace_errno = error;
-#endif /* CONFIG_DTRACE */
-
-	regs = find_user_regs(thread_act);
-
-	if (error == ERESTART) {
-		regs->save_srr0 -= 8;
-	} else if (error != EJUSTRETURN) {
-		if (error) {
-			regs->save_r3 = (long long)error;
-			/* set the "pc" to execute cerror routine */
-			regs->save_srr0 -= 4;
-		} else { /* (not error) */
-			switch (callp->sy_return_type) {
-			case _SYSCALL_RET_INT_T:
-				regs->save_r3 = uthread->uu_rval[0];
-				regs->save_r4 = uthread->uu_rval[1];
-				break;
-			case _SYSCALL_RET_UINT_T:
-				regs->save_r3 = ((u_int)uthread->uu_rval[0]);
-				regs->save_r4 = ((u_int)uthread->uu_rval[1]);
-				break;
-			case _SYSCALL_RET_OFF_T:
-			case _SYSCALL_RET_UINT64_T:
-				/* return 64 bits split across two registers for 32 bit */
-				/* process and in one register for 64 bit process */
-				if (IS_64BIT_PROCESS(proc)) {
-					u_int64_t 	*retp = (u_int64_t *)&uthread->uu_rval[0];
-					regs->save_r3 = *retp;
-					regs->save_r4 = 0;
-				}
-				else {
-					regs->save_r3 = uthread->uu_rval[0];
-					regs->save_r4 = uthread->uu_rval[1];
-				}
-				break;
-			case _SYSCALL_RET_ADDR_T:
-			case _SYSCALL_RET_SIZE_T:
-			case _SYSCALL_RET_SSIZE_T:
-				/* the variable length return types (user_addr_t, user_ssize_t, 
-				 * and user_size_t) are always the largest possible size in the 
-				 * kernel (we use uu_rval[0] and [1] as one 64 bit value).
-				 */
-				{
-					user_addr_t *retp = (user_addr_t *)&uthread->uu_rval[0];
-					regs->save_r3 = *retp;
-					regs->save_r4 = 0;
-				}
-				break;
-			case _SYSCALL_RET_NONE:
-				break;
-			default:
-				panic("unix_syscall: unknown return type");
-				break;
-			}
-		} 
-	}
-	/* else  (error == EJUSTRETURN) { nothing } */
-
-
-	uthread->uu_flag &= ~UT_NOTCANCELPT;
-
-	/* panic if funnel is held */
-	syscall_exit_funnelcheck();
-
-	if (uthread->uu_lowpri_window) {
-	        /*
-		 * task is marked as a low priority I/O type
-		 * and the I/O we issued while in this system call
-		 * collided with normal I/O operations... we'll
-		 * delay in order to mitigate the impact of this
-		 * task on the normal operation of the system
-		 */
-		throttle_lowpri_io(TRUE);
-	}
-	if (kdebug_enable && (code != 180)) {
-
-	        if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T)
-		        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-					      error, uthread->uu_rval[1], 0, proc->p_pid, 0);
-		else
-		        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-					      error, uthread->uu_rval[0], uthread->uu_rval[1], proc->p_pid, 0);
-	}
-
-	thread_exception_return();
-	/* NOTREACHED */
-}
-
-void
-unix_syscall_return(int error)
-{
-	thread_t					thread_act;
-	struct uthread				*uthread;
-	struct proc					*proc;
-	struct savearea				*regs;
-	unsigned int				code;
-	struct sysent				*callp;
-
-	thread_act = current_thread();
-	proc = current_proc();
-	uthread = get_bsdthread_info(thread_act);
-
-	regs = find_user_regs(thread_act);
-
-	if (regs->save_r0 != 0)
-		code = regs->save_r0;
-	else
-		code = regs->save_r3;
-
-	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
-
-#if CONFIG_DTRACE
-        if (callp->sy_call == dtrace_systrace_syscall)
-                dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
-#endif /* CONFIG_DTRACE */
-	AUDIT_SYSCALL_EXIT(code, proc, uthread, error);
-
-	/*
-	 * Get index into sysent table
-	 */   
-	if (error == ERESTART) {
-		regs->save_srr0 -= 8;
-	} else if (error != EJUSTRETURN) {
-		if (error) {
-			regs->save_r3 = (long long)error;
-			/* set the "pc" to execute cerror routine */
-			regs->save_srr0 -= 4;
-		} else { /* (not error) */
-			switch (callp->sy_return_type) {
-			case _SYSCALL_RET_INT_T:
-				regs->save_r3 = uthread->uu_rval[0];
-				regs->save_r4 = uthread->uu_rval[1];
-				break;
-			case _SYSCALL_RET_UINT_T:
-				regs->save_r3 = ((u_int)uthread->uu_rval[0]);
-				regs->save_r4 = ((u_int)uthread->uu_rval[1]);
-				break;
-			case _SYSCALL_RET_OFF_T:
-			case _SYSCALL_RET_UINT64_T:
-				/* return 64 bits split across two registers for 32 bit */
-				/* process and in one register for 64 bit process */
-				if (IS_64BIT_PROCESS(proc)) {
-					u_int64_t 	*retp = (u_int64_t *)&uthread->uu_rval[0];
-					regs->save_r3 = *retp;
-				}
-				else {
-					regs->save_r3 = uthread->uu_rval[0];
-					regs->save_r4 = uthread->uu_rval[1];
-				}
-				break;
-			case _SYSCALL_RET_ADDR_T:
-			case _SYSCALL_RET_SIZE_T:
-			case _SYSCALL_RET_SSIZE_T:
-				/* the variable length return types (user_addr_t, user_ssize_t, 
-				 * and user_size_t) are always the largest possible size in the 
-				 * kernel (we use uu_rval[0] and [1] as one 64 bit value).
-				 */
-				{
-					u_int64_t 	*retp = (u_int64_t *)&uthread->uu_rval[0];
-					regs->save_r3 = *retp;
-				}
-				break;
-			case _SYSCALL_RET_NONE:
-				break;
-			default:
-				panic("unix_syscall: unknown return type");
-				break;
-			}
-		} 
-	}
-	/* else  (error == EJUSTRETURN) { nothing } */
-
-
-	uthread->uu_flag &= ~UT_NOTCANCELPT;
-
-	/* panic if funnel is held */
-	syscall_exit_funnelcheck();
-
-	if (uthread->uu_lowpri_window) {
-	        /*
-		 * task is marked as a low priority I/O type
-		 * and the I/O we issued while in this system call
-		 * collided with normal I/O operations... we'll
-		 * delay in order to mitigate the impact of this
-		 * task on the normal operation of the system
-		 */
-		throttle_lowpri_io(TRUE);
-	}
-	if (kdebug_enable && (code != 180)) {
-	        if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T)
-		        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-					      error, uthread->uu_rval[1], 0, proc->p_pid, 0);
-		else
-		        KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
-					      error, uthread->uu_rval[0], uthread->uu_rval[1], proc->p_pid, 0);
-	}
-
-	thread_exception_return();
-	/* NOTREACHED */
-}
-
-void
-munge_lwww(
-	const void	*in32,
-	void		*out64)
-{
-	const uint32_t	*arg32;
-	uint64_t	*arg64;
-
-	arg32 = (const uint32_t *) in32;
-	arg64 = (uint64_t *) out64;
-
-	arg64[3] = arg32[9];	/* lwwW */
-	arg64[2] = arg32[7];	/* lwWw */
-	arg64[1] = arg32[5]; 	/* lWww */
-	arg64[0] = ((uint64_t) arg32[1]) << 32;	/* Lwww (hi) */
-	arg64[0] |= (uint64_t) arg32[3];	/* Lwww (lo) */
-}
-
-void
-munge_lw(
-	const void	*in32,
-	void		*out64)
-{
-	const uint32_t	*arg32;
-	uint64_t	*arg64;
-
-	arg32 = (const uint32_t *) in32;
-	arg64 = (uint64_t *) out64;
-
-	arg64[1] = arg32[5]; 	/* lW */
-	arg64[0] = ((uint64_t) arg32[1]) << 32;	/* Lw (hi) */
-	arg64[0] |= (uint64_t) arg32[3];	/* Lw (lo) */
-}
diff --git a/bsd/dev/ppc/unix_signal.c b/bsd/dev/ppc/unix_signal.c
deleted file mode 100644
index 4ca48b0b7..000000000
--- a/bsd/dev/ppc/unix_signal.c
+++ /dev/null
@@ -1,953 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- */
-
-#include <mach/mach_types.h>
-#include <mach/exception_types.h>
-
-#include <sys/param.h>
-#include <sys/proc_internal.h>
-#include <sys/user.h>
-#include <sys/ucontext.h>
-#include <sys/sysproto.h>
-#include <sys/systm.h>
-#include <sys/ux_exception.h>
-
-#include <ppc/signal.h>
-#include <sys/signalvar.h>
-#include <sys/kdebug.h>
-#include <sys/wait.h>
-#include <kern/thread.h>
-#include <mach/ppc/thread_status.h>
-#include <ppc/proc_reg.h>
-
-#include <sys/sdt.h>
-
-// #include <machine/thread.h> XXX include path messed up for some reason...
-
-/* XXX functions not in a Mach headers */
-extern kern_return_t thread_getstatus(register thread_t act, int flavor,
-			thread_state_t tstate, mach_msg_type_number_t *count);
-extern unsigned int get_msr_exportmask(void);
-extern kern_return_t thread_setstatus(thread_t thread, int flavor,
-			thread_state_t tstate, mach_msg_type_number_t count);
-extern void ppc_checkthreadstate(void *, int);
-extern struct savearea_vec *find_user_vec_curr(void);
-extern int thread_enable_fpe(thread_t act, int onoff);
-
-
-
-#define	C_32_REDZONE_LEN	224
-#define	C_32_STK_ALIGN		16
-#define C_32_PARAMSAVE_LEN	64
-#define	C_32_LINKAGE_LEN	48
-
-#define	C_64_REDZONE_LEN	320
-#define	C_64_STK_ALIGN		32
-#define	C_64_PARAMSAVE_LEN	64
-#define	C_64_LINKAGE_LEN	48
-
-#define TRUNC_DOWN32(a,b,c)	((((uint32_t)a)-(b)) & ((uint32_t)(-(c))))
-#define TRUNC_DOWN64(a,b,c)	((((uint64_t)a)-(b)) & ((uint64_t)(-(c))))
-
-/*
- * The stack layout possibilities (info style); This needs to mach with signal trampoline code
- *
- * Traditional:			1
- * Traditional64:		20
- * Traditional64with vec:	25
- * 32bit context		30
- * 32bit context with vector	35
- * 64bit context		40
- * 64bit context with vector	45
- * Dual context			50
- * Dual context with vector	55
- *
- */
- 
-#define UC_TRAD			1
-#define UC_TRAD_VEC		6
-#define UC_TRAD64		20
-#define UC_TRAD64_VEC		25
-#define UC_FLAVOR		30
-#define UC_FLAVOR_VEC		35
-#define UC_FLAVOR64		40
-#define UC_FLAVOR64_VEC		45
-#define UC_DUAL			50
-#define UC_DUAL_VEC		55
-#define	UC_SET_ALT_STACK	0x40000000
-#define UC_RESET_ALT_STACK	0x80000000
-
- /* The following are valid mcontext sizes */
-#define UC_FLAVOR_SIZE ((PPC_THREAD_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int))
-
-#define UC_FLAVOR_VEC_SIZE ((PPC_THREAD_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_FLOAT_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int))
-
-#define UC_FLAVOR64_SIZE ((PPC_THREAD_STATE64_COUNT + PPC_EXCEPTION_STATE64_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int))
-
-#define UC_FLAVOR64_VEC_SIZE ((PPC_THREAD_STATE64_COUNT + PPC_EXCEPTION_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int))
-
-
-/*
- * NOTE: Source and target may *NOT* overlap!
- */
-static void
-ucontext_32to64(struct ucontext64 *in, struct user_ucontext64 *out)
-{
-	out->uc_onstack		= in->uc_onstack;
-	out->uc_sigmask		= in->uc_sigmask;
-
-	/* internal "structure assign" */
-	out->uc_stack.ss_sp	= CAST_USER_ADDR_T(in->uc_stack.ss_sp);
-	out->uc_stack.ss_size	= in->uc_stack.ss_size;
-	out->uc_stack.ss_flags	= in->uc_stack.ss_flags;
-
-	out->uc_link		= CAST_USER_ADDR_T(in->uc_link);
-	out->uc_mcsize		= in->uc_mcsize;
-	out->uc_mcontext64	= CAST_USER_ADDR_T(in->uc_mcontext64);
-}
-
-/*
- * This conversion is safe, since if we are converting for a 32 bit process,
- * then it's values of uc-stack.ss_size and uc_mcsize will never exceed 4G.
- *
- * NOTE: Source and target may *NOT* overlap!
- */
-static void
-ucontext_64to32(struct user_ucontext64 *in, struct ucontext64 *out)
-{
-	out->uc_onstack		= in->uc_onstack;
-	out->uc_sigmask		= in->uc_sigmask;
-
-	/* internal "structure assign" */
-	out->uc_stack.ss_sp	= CAST_DOWN(void *,in->uc_stack.ss_sp);
-	out->uc_stack.ss_size	= in->uc_stack.ss_size;	/* range reduction */
-	out->uc_stack.ss_flags	= in->uc_stack.ss_flags;
-
-	out->uc_link		= CAST_DOWN(void *,in->uc_link);
-	out->uc_mcsize		= in->uc_mcsize;	/* range reduction */
-	out->uc_mcontext64	= CAST_DOWN(void *,in->uc_mcontext64);
-}
-
-/*
- * NOTE: Source and target may *NOT* overlap!
- */
-static void
-siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out)
-{
-	out->si_signo	= in->si_signo;
-	out->si_errno	= in->si_errno;
-	out->si_code	= in->si_code;
-	out->si_pid	= in->si_pid;
-	out->si_uid	= in->si_uid;
-	out->si_status	= in->si_status;
-	out->si_addr	= CAST_DOWN_EXPLICIT(user32_addr_t,in->si_addr);
-	/* following cast works for sival_int because of padding */
-	out->si_value.sival_ptr	= CAST_DOWN_EXPLICIT(user32_addr_t,in->si_value.sival_ptr);
-	out->si_band	= in->si_band;			/* range reduction */
-	out->__pad[0]	= in->pad[0];			/* mcontext.ss.r1 */
-}
-
-static void
-siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out)
-{
-	out->si_signo	= in->si_signo;
-	out->si_errno	= in->si_errno;
-	out->si_code	= in->si_code;
-	out->si_pid	= in->si_pid;
-	out->si_uid	= in->si_uid;
-	out->si_status	= in->si_status;
-	out->si_addr	= in->si_addr;
-	out->si_value.sival_ptr	= in->si_value.sival_ptr;
-	out->si_band	= in->si_band;			/* range reduction */
-	out->__pad[0]	= in->pad[0];			/* mcontext.ss.r1 */
-}
-
-
-/*
- * Arrange for this process to run a signal handler
- */
-
-void
-sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused uint32_t code)
-{
-	kern_return_t kretn;
-	struct mcontext mctx;
-	user_addr_t p_mctx = USER_ADDR_NULL;		/* mcontext dest. */
-	struct mcontext64 mctx64;
-	user_addr_t p_mctx64 = USER_ADDR_NULL;		/* mcontext dest. */
-	struct user_ucontext64 uctx;
-	user_addr_t p_uctx;		/* user stack addr top copy ucontext */
-	user_siginfo_t sinfo;
-	user_addr_t p_sinfo;		/* user stack addr top copy siginfo */
-	struct sigacts *ps = p->p_sigacts;
-	int oonstack;
-	user_addr_t sp;
-	mach_msg_type_number_t state_count;
-	thread_t th_act;
-	struct uthread *ut;
-	int infostyle = UC_TRAD;
-	int dualcontext =0;
-	user_addr_t trampact;
-	int vec_used = 0;
-	int stack_size = 0;
-	void * tstate;
-	int flavor;
-        int ctx32 = 1;
-
-	th_act = current_thread();
-	ut = get_bsdthread_info(th_act);
-
-	/*
-	 * XXX We conditionalize type passed here based on SA_SIGINFO, but
-	 * XXX we always send up all the information, regardless; perhaps
-	 * XXX this should not be conditionalized?  Defer making this change
-	 * XXX now, due to possible tools impact.
-	 */
-	if (p->p_sigacts->ps_siginfo & sigmask(sig)) {
-		/*
-		 * If SA_SIGINFO is set, then we must provide the user
-		 * process both a siginfo_t and a context argument.  We call
-		 * this "FLAVORED", as opposed to "TRADITIONAL", which doesn't
-		 * expect a context.  "DUAL" is a type of "FLAVORED".
-		 */
-		if (is_64signalregset()) {
-			/*
-			 * If this is a 64 bit CPU, we must include a 64 bit
-			 * context in the data we pass to user space; we may
-			 * or may not also include a 32 bit context at the
-			 * same time, for non-leaf functions.
-			 *
-			 * The user may also explicitly choose to not receive
-			 * a 32 bit context, at their option; we only allow
-			 * this to happen on 64 bit processors, for obvious
-			 * reasons.
-			 */
-			if (IS_64BIT_PROCESS(p) ||
-			    (p->p_sigacts->ps_64regset & sigmask(sig))) {
-				 /*
-				  * For a 64 bit process, there is no 32 bit
-				  * context.
-				  */
-				ctx32 = 0;
-				infostyle = UC_FLAVOR64;
-			} else {
-				/*
-				 * For a 32 bit process on a 64 bit CPU, we
-				 * may have 64 bit leaf functions, so we need
-				 * both contexts.
-				 */
-				dualcontext = 1;
-				infostyle = UC_DUAL;
-			}
-		} else {
-			/*
-			 * If this is a 32 bit CPU, then we only have a 32 bit
-			 * context to contend with.
-			 */
-			infostyle = UC_FLAVOR;
-		}
-        } else {
-		/*
-		 * If SA_SIGINFO is not set, then we have a traditional style
-		 * call which does not need additional context passed.  The
-		 * default is 32 bit traditional.
-		 *
-		 * XXX The second check is redundant on PPC32; keep it anyway.
-		 */
-		if (is_64signalregset() || IS_64BIT_PROCESS(p)) {
-			/*
-			 * However, if this is a 64 bit CPU, we need to change
-			 * this to 64 bit traditional, and drop the 32 bit
-			 * context.
-			 */
-			ctx32 = 0;
-			infostyle = UC_TRAD64;
-		}
-	}
-
-	proc_unlock(p);
-
-	/* I need this for SIGINFO anyway */
-	flavor = PPC_THREAD_STATE;
-	tstate = (void *)&mctx.ss;
-	state_count = PPC_THREAD_STATE_COUNT;
-	if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-		goto bad;
-
-	if ((ctx32 == 0) || dualcontext) {
-		flavor = PPC_THREAD_STATE64;
-		tstate = (void *)&mctx64.ss;
-		state_count = PPC_THREAD_STATE64_COUNT;
-                if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-	}
-
-        if ((ctx32 == 1) || dualcontext) {
-                flavor = PPC_EXCEPTION_STATE;
-		tstate = (void *)&mctx.es;
-		state_count = PPC_EXCEPTION_STATE_COUNT;
-                if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-       } 
-       
-	if ((ctx32 == 0) || dualcontext) {
- 		flavor = PPC_EXCEPTION_STATE64;
-		tstate = (void *)&mctx64.es;
-		state_count = PPC_EXCEPTION_STATE64_COUNT;
-       
-                if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-                
-        }
-       
-
-        if ((ctx32 == 1) || dualcontext) {
-                flavor = PPC_FLOAT_STATE;
-		tstate = (void *)&mctx.fs;
-		state_count = PPC_FLOAT_STATE_COUNT;
-                if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-       } 
-       
-	if ((ctx32 == 0) || dualcontext) {
- 		flavor = PPC_FLOAT_STATE;
-		tstate = (void *)&mctx64.fs;
-		state_count = PPC_FLOAT_STATE_COUNT;
-                       if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-                
-        }
-
-
-	if (find_user_vec_curr()) {
-		vec_used = 1;
-
-                if ((ctx32 == 1) || dualcontext) {
-                    flavor = PPC_VECTOR_STATE;
-                    tstate = (void *)&mctx.vs;
-                    state_count = PPC_VECTOR_STATE_COUNT;
-                    if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                    goto bad;
-                    infostyle += 5;
-            } 
-       
-            if ((ctx32 == 0) || dualcontext) {
-                    flavor = PPC_VECTOR_STATE;
-                    tstate = (void *)&mctx64.vs;
-                    state_count = PPC_VECTOR_STATE_COUNT;
-                    if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count)  != KERN_SUCCESS)
-                        goto bad;
-                    infostyle += 5;
-           }
-	}  
-
-	trampact = ps->ps_trampact[sig];
-	oonstack = ut->uu_sigstk.ss_flags & SA_ONSTACK;
-
-	/* figure out where our new stack lives */
-	if ((ut->uu_flag & UT_ALTSTACK) && !oonstack &&
-		(ps->ps_sigonstack & sigmask(sig))) {
-		sp = ut->uu_sigstk.ss_sp;
-		sp += ut->uu_sigstk.ss_size;
-		stack_size = ut->uu_sigstk.ss_size;
-		ut->uu_sigstk.ss_flags |= SA_ONSTACK;
-	}
-	else {
-		if (ctx32 == 0)
-			sp = mctx64.ss.r1;
-		else
-			sp = CAST_USER_ADDR_T(mctx.ss.r1);
-	}
-
-	
-	/* put siginfo on top */
-        
-	/* preserve RED ZONE area */
-	if (IS_64BIT_PROCESS(p))
-		sp = TRUNC_DOWN64(sp, C_64_REDZONE_LEN, C_64_STK_ALIGN);
-	else
-		sp = TRUNC_DOWN32(sp, C_32_REDZONE_LEN, C_32_STK_ALIGN);
-
-        /* next are the saved registers */
-        if ((ctx32 == 0) || dualcontext) {
-            sp -= sizeof(struct mcontext64);
-            p_mctx64 = sp;
-        }
-        if ((ctx32 == 1) || dualcontext) {
-            sp -= sizeof(struct mcontext);
-            p_mctx = sp;
-        }    
-        
-	if (IS_64BIT_PROCESS(p)) {
-		/* context goes first on stack */
-		sp -= sizeof(struct user_ucontext64);
-		p_uctx = sp;
-
-		/* this is where siginfo goes on stack */
-		sp -= sizeof(user64_siginfo_t);
-		p_sinfo = sp;
-		
-		sp = TRUNC_DOWN64(sp, C_64_PARAMSAVE_LEN+C_64_LINKAGE_LEN, C_64_STK_ALIGN);
-	} else {
-		/*
-		 * struct ucontext and struct ucontext64 are identical in
-		 * size and content; the only difference is the internal
-		 * pointer type for the last element, which makes no
-		 * difference for the copyout().
-		 */
-
-		/* context goes first on stack */
-		sp -= sizeof(struct ucontext64);
-		p_uctx = sp;
-
-		/* this is where siginfo goes on stack */
-		sp -= sizeof(user32_siginfo_t);
-		p_sinfo = sp;
-
-		sp = TRUNC_DOWN32(sp, C_32_PARAMSAVE_LEN+C_32_LINKAGE_LEN, C_32_STK_ALIGN);
-	}
-
-	uctx.uc_onstack = oonstack;
-	uctx.uc_sigmask = mask;
-	uctx.uc_stack.ss_sp = sp;
-	uctx.uc_stack.ss_size = stack_size;
-	if (oonstack)
-		uctx.uc_stack.ss_flags |= SS_ONSTACK;
-		
-	uctx.uc_link = 0;
-	if (ctx32 == 0)
-		uctx.uc_mcsize = (size_t)((PPC_EXCEPTION_STATE64_COUNT + PPC_THREAD_STATE64_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int));
-	else
-		uctx.uc_mcsize = (size_t)((PPC_EXCEPTION_STATE_COUNT + PPC_THREAD_STATE_COUNT + PPC_FLOAT_STATE_COUNT) * sizeof(int));
-	
-	if (vec_used) 
-		uctx.uc_mcsize += (size_t)(PPC_VECTOR_STATE_COUNT * sizeof(int));
-        
-	if (ctx32 == 0)
-             uctx.uc_mcontext64 = p_mctx64;
-       else
-            uctx.uc_mcontext64 = p_mctx;
-
-	/* setup siginfo */
-	bzero((caddr_t)&sinfo, sizeof(sinfo));
-	sinfo.si_signo = sig;
-	if (ctx32 == 0) {
-		sinfo.si_addr = mctx64.ss.srr0;
-		sinfo.pad[0] = mctx64.ss.r1;
-	} else {
-		sinfo.si_addr = CAST_USER_ADDR_T(mctx.ss.srr0);
-		sinfo.pad[0] = CAST_USER_ADDR_T(mctx.ss.r1);
-	}
-
-	switch (sig) {
-		case SIGILL:
-			/*
-			 * If it's 64 bit and not a dual context, mctx will
-			 * contain uninitialized data, so we have to use
-			 * mctx64 here.
-			 */
-			if(ctx32 == 0) {
-				if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT)))
-					sinfo.si_code = ILL_ILLOPC;
-				else if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT)))
-					sinfo.si_code = ILL_PRVOPC;
-				else if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT)))
-					sinfo.si_code = ILL_ILLTRP;
-				else
-					sinfo.si_code = ILL_NOOP;
-			} else {
-				if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT)))
-					sinfo.si_code = ILL_ILLOPC;
-				else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT)))
-					sinfo.si_code = ILL_PRVOPC;
-				else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT)))
-					sinfo.si_code = ILL_ILLTRP;
-				else
-					sinfo.si_code = ILL_NOOP;
-			}
-			break;
-		case SIGFPE:
-#define FPSCR_VX	2
-#define FPSCR_OX	3
-#define FPSCR_UX	4
-#define FPSCR_ZX	5
-#define FPSCR_XX	6
-			/*
-			 * If it's 64 bit and not a dual context, mctx will
-			 * contain uninitialized data, so we have to use
-			 * mctx64 here.
-			 */
-			if(ctx32 == 0) {
-				if (mctx64.fs.fpscr & (1 << (31 - FPSCR_VX)))
-					sinfo.si_code = FPE_FLTINV;
-				else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_OX)))
-					sinfo.si_code = FPE_FLTOVF;
-				else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_UX)))
-					sinfo.si_code = FPE_FLTUND;
-				else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_ZX)))
-					sinfo.si_code = FPE_FLTDIV;
-				else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_XX)))
-					sinfo.si_code = FPE_FLTRES;
-				else
-					sinfo.si_code = FPE_NOOP;
-			} else {
-				if (mctx.fs.fpscr & (1 << (31 - FPSCR_VX)))
-					sinfo.si_code = FPE_FLTINV;
-				else if (mctx.fs.fpscr & (1 << (31 - FPSCR_OX)))
-					sinfo.si_code = FPE_FLTOVF;
-				else if (mctx.fs.fpscr & (1 << (31 - FPSCR_UX)))
-					sinfo.si_code = FPE_FLTUND;
-				else if (mctx.fs.fpscr & (1 << (31 - FPSCR_ZX)))
-					sinfo.si_code = FPE_FLTDIV;
-				else if (mctx.fs.fpscr & (1 << (31 - FPSCR_XX)))
-					sinfo.si_code = FPE_FLTRES;
-				else
-					sinfo.si_code = FPE_NOOP;
-			}
-			break;
-
-		case SIGBUS:
-			if (ctx32 == 0) {
-				sinfo.si_addr = mctx64.es.dar;
-			} else {
-				sinfo.si_addr = CAST_USER_ADDR_T(mctx.es.dar);
-			}
-			/* on ppc we generate only if EXC_PPC_UNALIGNED */
-			sinfo.si_code = BUS_ADRALN;
-			break;
-
-		case SIGSEGV:
-			/*
-			 * If it's 64 bit and not a dual context, mctx will
-			 * contain uninitialized data, so we have to use
-			 * mctx64 here.
-			 */
-			if (ctx32 == 0) {
-				sinfo.si_addr = mctx64.es.dar;
-				/* First check in srr1 and then in dsisr */
-				if (mctx64.ss.srr1 & (1 << (31 - DSISR_PROT_BIT)))
-					sinfo.si_code = SEGV_ACCERR;
-				else if (mctx64.es.dsisr & (1 << (31 - DSISR_PROT_BIT)))
-					sinfo.si_code = SEGV_ACCERR;
-				else
-					sinfo.si_code = SEGV_MAPERR;
-			} else {
-				sinfo.si_addr = CAST_USER_ADDR_T(mctx.es.dar);
-				/* First check in srr1 and then in dsisr */
-				if (mctx.ss.srr1 & (1 << (31 - DSISR_PROT_BIT)))
-					sinfo.si_code = SEGV_ACCERR;
-				else if (mctx.es.dsisr & (1 << (31 - DSISR_PROT_BIT)))
-					sinfo.si_code = SEGV_ACCERR;
-				else
-					sinfo.si_code = SEGV_MAPERR;
-			}
-			break;
-		default:
-		{
-			int status_and_exitcode;
-
-			/*
-			 * All other signals need to fill out a minimum set of
-			 * information for the siginfo structure passed into
-			 * the signal handler, if SA_SIGINFO was specified.
-			 *
-			 * p->si_status actually contains both the status and
-			 * the exit code; we save it off in its own variable
-			 * for later breakdown.
-			 */
-			proc_lock(p);
-			sinfo.si_pid = p->si_pid;
-			p->si_pid = 0;
-			status_and_exitcode = p->si_status;
-			p->si_status = 0;
-			sinfo.si_uid = p->si_uid;
-			p->si_uid = 0;
-			sinfo.si_code = p->si_code;
-			p->si_code = 0;
-			proc_unlock(p);
-			if (sinfo.si_code == CLD_EXITED) {
-				if (WIFEXITED(status_and_exitcode)) 
-					sinfo.si_code = CLD_EXITED;
-				else if (WIFSIGNALED(status_and_exitcode)) {
-					if (WCOREDUMP(status_and_exitcode)) {
-						sinfo.si_code = CLD_DUMPED;
-						status_and_exitcode = W_EXITCODE(status_and_exitcode,status_and_exitcode);
-					} else {
-						sinfo.si_code = CLD_KILLED;
-						status_and_exitcode = W_EXITCODE(status_and_exitcode,status_and_exitcode);
-					}
-				}
-			}
-			/*
-			 * The recorded status contains the exit code and the
-			 * signal information, but the information to be passed
-			 * in the siginfo to the handler is supposed to only
-			 * contain the status, so we have to shift it out.
-			 */
-			sinfo.si_status = WEXITSTATUS(status_and_exitcode);
-			break;
-		}
-	}
-
-
-	/* copy info out to user space */
-	if (IS_64BIT_PROCESS(p)) {
-		user64_siginfo_t sinfo64;
-
-		siginfo_user_to_user64(&sinfo,&sinfo64);
-
-#if CONFIG_DTRACE		
-        bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo));
-
-        ut->t_dtrace_siginfo.si_signo = sinfo.si_signo;
-        ut->t_dtrace_siginfo.si_code = sinfo.si_code;
-        ut->t_dtrace_siginfo.si_pid = sinfo.si_pid;
-        ut->t_dtrace_siginfo.si_uid = sinfo.si_uid;
-        ut->t_dtrace_siginfo.si_status = sinfo.si_status;
-		/* XXX truncates faulting address to void * on K32  */
-        ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo.si_addr);
-
-
-        /* Fire DTrace proc:::fault probe when signal is generated by hardware. */
-        switch (sig) {
-        case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP:
-            DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo));
-            break;
-        default:
-            break;
-        }
-
-		/* XXX truncates catcher address to uintptr_t */
-		DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo),
-			void (*)(void), CAST_DOWN(sig_t, catcher));
-#endif /* CONFIG_DTRACE */
-
-		if (copyout(&uctx, p_uctx, sizeof(struct user_ucontext64)))
-			goto bad;
-		if (copyout(&sinfo64, p_sinfo, sizeof(sinfo64)))
-			goto bad;
-	} else {
-		struct ucontext64 uctx32;
-		user32_siginfo_t sinfo32;
-
-		ucontext_64to32(&uctx, &uctx32);
-		siginfo_user_to_user32(&sinfo,&sinfo32);
-
-#if CONFIG_DTRACE
-        bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo));
-
-        ut->t_dtrace_siginfo.si_signo = sinfo.si_signo;
-        ut->t_dtrace_siginfo.si_code = sinfo.si_code;
-        ut->t_dtrace_siginfo.si_pid = sinfo.si_pid;
-        ut->t_dtrace_siginfo.si_uid = sinfo.si_uid;
-        ut->t_dtrace_siginfo.si_status = sinfo.si_status;
-        ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo.si_addr);
-
-
-        /* Fire DTrace proc:::fault probe when signal is generated by hardware. */
-        switch (sig) {
-        case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP:
-            DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo));
-            break;
-        default:
-            break;
-        }
-
-		DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo),
-			void (*)(void), CAST_DOWN(sig_t, catcher));
-#endif /* CONFIG_DTRACE */
-
-		if (copyout(&uctx32, p_uctx, sizeof(struct ucontext64)))
-			goto bad;
-
-		if (copyout(&sinfo32, p_sinfo, sizeof(sinfo32)))
-			goto bad;
-	}
-        if ((ctx32 == 0) || dualcontext) {
-	    /*
-	     * NOTE: Size of mcontext is not variant between 64bit and
-	     * 32bit programs usng 64bit registers.
-	     */
-            if (copyout(&mctx64, p_mctx64, (vec_used? UC_FLAVOR64_VEC_SIZE: UC_FLAVOR64_SIZE)))
-		goto bad;
-        }
-        if ((ctx32 == 1) || dualcontext) {
-            if (copyout(&mctx, p_mctx, uctx.uc_mcsize))
-		goto bad;
-        }    
-
-
-	/* Place our arguments in arg registers: rtm dependent */
-	if(IS_64BIT_PROCESS(p)) {
-		mctx64.ss.r3 = catcher;
-		mctx64.ss.r4 = CAST_USER_ADDR_T(infostyle);
-		mctx64.ss.r5 = CAST_USER_ADDR_T(sig);
-		mctx64.ss.r6 = p_sinfo;
-		mctx64.ss.r7 = p_uctx;
-
-		mctx64.ss.srr0 = trampact;
-		/* MSR_EXPORT_MASK_SET */
-		mctx64.ss.srr1 = CAST_USER_ADDR_T(get_msr_exportmask());
-		mctx64.ss.r1 = sp;
-		state_count = PPC_THREAD_STATE64_COUNT;
-		if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE64, (void *)&mctx64.ss, state_count))  != KERN_SUCCESS) {
-			panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn);
-		}	
-	} else {
-		mctx.ss.r3 = CAST_DOWN(uint32_t,catcher);
-		mctx.ss.r4 = (uint32_t)infostyle;
-		mctx.ss.r5 = (uint32_t)sig;
-		mctx.ss.r6 = CAST_DOWN(uint32_t,p_sinfo);
-		mctx.ss.r7 = CAST_DOWN(uint32_t,p_uctx);
-
-		mctx.ss.srr0 = CAST_DOWN(uint32_t,trampact);
-		/* MSR_EXPORT_MASK_SET */
-		mctx.ss.srr1 = get_msr_exportmask();
-		mctx.ss.r1 = CAST_DOWN(uint32_t,sp);
-		state_count = PPC_THREAD_STATE_COUNT;
-		if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE, (void *)&mctx.ss, state_count))  != KERN_SUCCESS) {
-			panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn);
-		}	
-	}
-
-	proc_lock(p);
-	return;
-
-bad:
-	proc_lock(p);
-	SIGACTION(p, SIGILL) = SIG_DFL;
-	sig = sigmask(SIGILL);
-	p->p_sigignore &= ~sig;
-	p->p_sigcatch &= ~sig;
-	ut->uu_sigmask &= ~sig;
-	/* sendsig is called with signal lock held */
-	proc_unlock(p);
-	psignal_locked(p, SIGILL);
-	proc_lock(p);
-	return;
-}
-
-/*
- * System call to cleanup state after a signal
- * has been taken.  Reset signal mask and
- * stack state from context left by sendsig (above).
- * Return to previous pc and psl as specified by
- * context left by sendsig. Check carefully to
- * make sure that the user has not modified the
- * psl to gain improper priviledges or to cause
- * a machine fault.
- */
-
-/* ARGSUSED */
-int
-sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval)
-{
-	struct user_ucontext64 uctx;
-
-	char mactx[sizeof(struct mcontext64)];
-	struct mcontext *p_mctx;
-	struct mcontext64 *p_64mctx;
-	int error;
-	thread_t th_act;
-	struct sigacts *ps = p->p_sigacts;
-	sigset_t mask;	
-	user_addr_t action;
-	uint32_t state_count;
-	unsigned int state_flavor;
-	struct uthread * ut;
-	int vec_used = 0;
-	void *tsptr, *fptr, *vptr;
-        int infostyle = uap->infostyle;
-
-	th_act = current_thread();
-
-	ut = (struct uthread *)get_bsdthread_info(th_act);
-
-	/*
-	 * If we are being asked to change the altstack flag on the thread, we
-	 * just rest it and return (the uap->uctx is not used).
-	 */
-	if (infostyle == UC_SET_ALT_STACK) {
-		ut->uu_sigstk.ss_flags |= SA_ONSTACK;
-		return (0);
-	} else if ((unsigned int)infostyle == UC_RESET_ALT_STACK) {
-		ut->uu_sigstk.ss_flags &= ~SA_ONSTACK;
-		return (0);
-	}
-
-	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->uctx, &uctx, sizeof(struct user_ucontext64));
-		if (error)
-			return(error);
-	} else {
-		struct ucontext64 uctx32;
-
-		/*
-		 * struct ucontext and struct ucontext64 are identical in
-		 * size and content; the only difference is the internal
-		 * pointer type for the last element, which makes no
-		 * difference for the copyin().
-		 */
-		error = copyin(uap->uctx, &uctx32, sizeof(struct ucontext));
-		if (error)
-			return(error);
-		ucontext_32to64(&uctx32, &uctx);
-	}
-        
-
-	/* validate the machine context size */
-	switch (uctx.uc_mcsize) {
-		case UC_FLAVOR64_VEC_SIZE:
-		case UC_FLAVOR64_SIZE:
-		case UC_FLAVOR_VEC_SIZE:
-		case UC_FLAVOR_SIZE:
-			break;
-		default:
-			return(EINVAL);
-	}
-
-	/*
-	 * The 64 bit process mcontext is identical to the mcontext64, so
-	 * there is no conversion necessary.
-	 */
-	error = copyin(uctx.uc_mcontext64, mactx, uctx.uc_mcsize);
-	if (error)
-		return(error);
-	
-	if ((uctx.uc_onstack & 01))
-			ut->uu_sigstk.ss_flags |= SA_ONSTACK;
-	else
-			ut->uu_sigstk.ss_flags &= ~SA_ONSTACK;
-
-	ut->uu_sigmask = uctx.uc_sigmask & ~sigcantmask;
-	if (ut->uu_siglist & ~ut->uu_sigmask)
-		signal_setast(current_thread());	
-
-	vec_used = 0;
-	switch (infostyle)  {
-                case UC_FLAVOR64_VEC:
-                case UC_TRAD64_VEC:
-                                vec_used = 1;
-                case UC_TRAD64:
-		case UC_FLAVOR64:  {
-                            p_64mctx = (struct mcontext64 *)mactx;	
-                            tsptr = (void *)&p_64mctx->ss;
-                            fptr = (void *)&p_64mctx->fs;
-                            vptr = (void *)&p_64mctx->vs;
-                            state_flavor = PPC_THREAD_STATE64;
-                            state_count = PPC_THREAD_STATE64_COUNT;
-                    } 
-                    break;
-		case UC_FLAVOR_VEC :
-		case UC_TRAD_VEC :
-                                vec_used = 1;
-		case UC_FLAVOR :
-		case UC_TRAD :
-		default: {
-			p_mctx = (struct mcontext *)mactx;	
-			tsptr = (void *)&p_mctx->ss;
-			fptr = (void *)&p_mctx->fs;
-			vptr = (void *)&p_mctx->vs;
-			state_flavor = PPC_THREAD_STATE;
-			state_count = PPC_THREAD_STATE_COUNT;
-		}
-		break;
-	} /* switch () */
-
-	/* validate the thread state, set/reset appropriate mode bits in srr1 */
-	(void)ppc_checkthreadstate(tsptr, state_flavor);
-
-	if (thread_setstatus(th_act, state_flavor, tsptr, state_count)  != KERN_SUCCESS) {
-		return(EINVAL);
-	}	
-
-	state_count = PPC_FLOAT_STATE_COUNT;
-	if (thread_setstatus(th_act, PPC_FLOAT_STATE, fptr, state_count)  != KERN_SUCCESS) {
-		return(EINVAL);
-	}	
-
-	mask = sigmask(SIGFPE);
-	if (((ut->uu_sigmask & mask) == 0) && (p->p_sigcatch & mask) && ((p->p_sigignore & mask) == 0)) {
-		action = ps->ps_sigact[SIGFPE];
-		if((action != SIG_DFL) && (action != SIG_IGN)) {
-			thread_enable_fpe(th_act, 1);
-		}
-	}
-
-	if (vec_used) {
-		state_count = PPC_VECTOR_STATE_COUNT;
-		if (thread_setstatus(th_act, PPC_VECTOR_STATE, vptr, state_count)  != KERN_SUCCESS) {
-			return(EINVAL);
-		}	
-	}
-	return (EJUSTRETURN);
-}
-
-/*
- * machine_exception() performs MD translation
- * of a mach exception to a unix signal and code.
- */
-
-boolean_t
-machine_exception(
-		int				exception,
-		mach_exception_code_t		code,
-		__unused mach_exception_subcode_t subcode,
-		int				*unix_signal,
-		mach_exception_code_t		*unix_code)
-{
-    switch(exception) {
-
-    case EXC_BAD_INSTRUCTION:
-	*unix_signal = SIGILL;
-	*unix_code = code;
-	break;
-
-    case EXC_ARITHMETIC:
-	*unix_signal = SIGFPE;
-	*unix_code = code;
-	break;
-
-    case EXC_SOFTWARE:
-	if (code == EXC_PPC_TRAP) {
-		*unix_signal = SIGTRAP;
-		*unix_code = code;
-		break;
-	} else
-		return(FALSE);
-
-    default:
-	return(FALSE);
-    }
-   
-    return(TRUE);
-}
-
diff --git a/bsd/dev/ppc/xsumas.s b/bsd/dev/ppc/xsumas.s
deleted file mode 100644
index 6ac06e947..000000000
--- a/bsd/dev/ppc/xsumas.s
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#define kShort  11
-#define cr1_gt  5       // bit 1 of cr1
-
-/*
- * short xsum_assym( short *p, int len, short xsum, boolean odd);
- *
- *  r3 - Pointer to data
- *  r4 - Length of data
- *  r5 - Accumulated sum value
- *  r6 -"Starting on odd address" flag (relative to byte 0 of the checksumed data)
- *
- * Note: If the "odd" flag is set, the address in r3 will be even.  Nonetheless, we
- *       correctly handle the case where the flag is set and the address is odd.
- *
- * This is the internet (IP, TCP) checksum algorithm, which is the 1s-complement sum
- * of the data, treated as an array of 16-bit integers.  1s-complement sums are done
- * via "add with carry" operations on a 2s-complement machine like PPC.  Note that
- * the adds can be done in parallel on 32-bit (or 64-bit) registers, as long as the
- * final sum is folded down to 16 bits.  On 32-bit machines we use "adde", which is
- * perfect except that it serializes the adds on the carry bit.  On 64-bit machines
- * we avoid this serialization by adding 32-bit words into 64-bit sums, then folding
- * all 64-bits into a 16-bit sum at the end.  We cannot use "adde" on 64-bit sums,
- * because the kernel runs in 32-bit mode even on 64-bit machines (so the carry bit
- * is set on the low 32-bits of the sum.)
- *
- * Using Altivec is tempting, but the performance impact of the greatly increased
- * number of exceptions and register save/restore traffic probably make it impractical
- * for now.
- */        
-        .globl  _xsum_assym
-        .globl  _xsum_nop_if_32bit
-        .text
-        .align  5
-_xsum_assym:
-        cmplwi  cr0,r4,kShort   ; too short to word align?
-        rlwinm  r2,r3,0,0x3     ; get byte offset in word
-        dcbt    0,r3            ; touch in 1st cache line
-        cmpwi   cr6,r2,0        ; is address word aligned?
-        ble     cr0,Lshort      ; skip if too short to bother aligning
-        
-        subfic  r0,r2,4         ; get #bytes in partial word
-        cmplwi  cr1,r6,0        ; set cr1_gt if "starting on odd address" flag is set
-        addic   r0,r0,0         ; turn off carry
-        beq     cr6,Laligned    ; skip if already word aligned (r2==0 if aligned)
-        
-;       Partial word at start: zero filled on left, it becomes initial checksum.
-        
-        rlwinm  r3,r3,0,0,29    ; word align address
-        mtcrf   0x01,r2         ; move byte offset to cr7
-        lwz     r6,0(r3)        ; get partial word
-        li      r7,-1           ; start of mask for partial fill
-        slwi    r8,r2,3         ; multiply byte offset by 8
-        sub     r4,r4,r0        ; adjust length for bytes in partial word
-        crxor   cr1_gt,31,cr1_gt; set flag if byte-lane swap will be necessary
-        srw     r7,r7,r8        ; get mask for bytes to keep in partial word
-        addi    r3,r3,4         ; point to next word of input
-        and     r2,r6,r7        ; zero fill on left
-        
-;       Address is now word aligned.  Prepare for inner loop over 32-byte chunks.
-;           r2 = initial checksum
-;           r3 = word aligned address
-;           r4 = length remaining
-;           r5 = accumulated sum parameter
-;        carry = off
-;       cr1_gt = "starting on odd address" flag
-
-Laligned:
-        srwi.   r0,r4,5         ; get count of 32-byte chunks
-        mtcrf   0x02,r4         ; move residual length to cr6 and cr7
-        mtcrf   0x01,r4
-        beq     cr0,Lleftovers  ; no chunks
-        
-        mtctr   r0              ; set up loop count
-        li      r4,32           ; offset to next chunk
-_xsum_nop_if_32bit:
-        b       L64BitPath      ; use the 64-bit path (patched to nop on 32-bit machine)
-        dcbt    r4,r3           ; touch in 2nd cache line
-        li      r0,96           ; get touch offset
-        b       LInnerLoop32    ; enter 32-bit loop
-        
-;       Inner loop for 32-bit machines.
-
-        .align  4
-LInnerLoop32:
-        lwz     r4,0(r3)
-        lwz     r6,4(r3)
-        lwz     r7,8(r3)
-        lwz     r8,12(r3)
-        adde    r2,r2,r4
-        lwz     r9,16(r3)
-        adde    r2,r2,r6
-        lwz     r10,20(r3)
-        adde    r2,r2,r7
-        lwz     r11,24(r3)
-        adde    r2,r2,r8
-        lwz     r12,28(r3)
-        adde    r2,r2,r9
-        dcbt    r3,r0
-        adde    r2,r2,r10
-        addi    r3,r3,32
-        adde    r2,r2,r11
-        adde    r2,r2,r12
-        bdnz+   LInnerLoop32
-
-;       Handle leftover bytes.
-;           r2 = checksum so far
-;           r3 = word aligned address
-;           r5 = accumulated sum parameter
-;        carry = live
-;       cr1_gt = "starting on odd address" flag
-;      cr6,cr7 = residual length
-
-Lleftovers:
-        bf      27,Lleftover8   ; test 0x10 bit of residual length
-        lwz     r4,0(r3)
-        lwz     r6,4(r3)
-        lwz     r7,8(r3)
-        lwz     r8,12(r3)
-        addi    r3,r3,16
-        adde    r2,r2,r4
-        adde    r2,r2,r6
-        adde    r2,r2,r7
-        adde    r2,r2,r8
-Lleftover8:
-        bf      28,Lleftover4
-        lwz     r4,0(r3)
-        lwz     r6,4(r3)
-        addi    r3,r3,8
-        adde    r2,r2,r4
-        adde    r2,r2,r6
-Lleftover4:
-        bf      29,Lleftover2
-        lwz     r4,0(r3)
-        addi    r3,r3,4
-        adde    r2,r2,r4
-Lleftover2:
-        bf      30,Lleftover1
-        lhz     r4,0(r3)
-        addi    r3,r3,2
-        adde    r2,r2,r4
-Lleftover1:
-        bf      31,Lwrapup
-        lbz     r4,0(r3)
-        slwi    r4,r4,8         ; shift last byte into proper lane
-        adde    r2,r2,r4
-
-;       All data bytes checksummed.  Wrap up.
-;           r2 = checksum so far (word parallel)
-;           r5 = accumulated sum parameter
-;        carry = live
-;       cr1_gt = "starting on odd address" flag
-
-Lwrapup:
-        addze   r2,r2           ; add in last carry
-        addze   r2,r2           ; in case the "addze" carries
-Lwrapupx:                       ; here from short-operand case, with xer(ca) undefined
-        srwi    r6,r2,16        ; top half of 32-bit checksum
-        rlwinm  r7,r2,0,0xFFFF  ; lower half
-        add     r2,r6,r7        ; add them together
-        srwi    r6,r2,16        ; then do it again, in case first carried
-        rlwinm  r7,r2,0,0xFFFF
-        add     r2,r6,r7
-        bf      cr1_gt,Lswapped ; test "starting on odd address" flag
-        
-;       The checksum began on an odd address, so swap bytes.
-
-        rlwinm  r6,r2,24,0x00FF ; move top byte to bottom
-        rlwinm  r7,r2,8,0xFF00  ; bottom to top
-        or      r2,r6,r7        ; rejoin
-        
-;       Finally, add in checksum passed in as a parameter.
-
-Lswapped:
-        add     r2,r2,r5        ; add passed-in checksum
-        srwi    r6,r2,16        ; top half of 32-bit checksum
-        rlwinm  r7,r2,0,0xFFFF  ; lower half
-        add     r2,r6,r7        ; add them together
-        srwi    r6,r2,16        ; then do it again, in case first carried
-        rlwinm  r7,r2,0,0xFFFF
-        add     r3,r6,r7        ; steer result into r3
-        blr
-
-;       Handle short operands.  Do a halfword at a time.
-;           r3 = address
-;           r4 = length (<= kShort)
-;           r5 = accumulated sum parameter
-;           r6 = "starting on odd byte" flag
-
-Lshort:
-        cmpwi   cr6,r4,2        ; at least two bytes?
-        andi.   r0,r4,1         ; odd length?
-        li      r2,0            ; initialize checksum
-        cmplwi  cr1,r6,0        ; set cr1_gt if "starting on odd address" flag is set
-        blt     cr6,Lshort2     ; fewer than two bytes, so skip
-Lshort1:
-        cmpwi   cr6,r4,4        ; two more bytes (after we decrement)?
-        lhz     r7,0(r3)
-        subi    r4,r4,2
-        addi    r3,r3,2
-        add     r2,r2,r7        ; note no need for "adde"
-        bge     cr6,Lshort1     ; loop for 2 more bytes
-Lshort2:
-        beq     Lwrapupx        ; no byte at end, proceed to checkout with carry undefined
-        lbz     r7,0(r3)
-        slwi    r7,r7,8         ; shift last byte into proper lane
-        add     r2,r2,r7
-        b       Lwrapupx
-        
-;       Handle 64-bit machine.  The major improvement over the 32-bit path is that we use
-;       four parallel 32-bit accumulators, which carry into the upper half naturally so we
-;       do not have to use "adde", which serializes on the carry bit.  Note that we cannot
-;       do 64-bit "adde"s, because we run in 32-bit mode so carry would not be set correctly.
-;           r2 = checksum so far (ie, the zero-filled partial first word)
-;           r3 = word aligned address
-;           r5 = accumulated sum parameter
-;          ctr = number of 32-byte chunks of input
-;        carry = unused in this code
-;       cr1_gt = "starting on odd address" flag
-;      cr6,cr7 = residual length
-
-L64BitPath:
-        stw     r13,-4(r1)      ; save a few nonvolatile regs in red zone so we can use them
-        stw     r14,-8(r1)
-        stw     r15,-12(r1)
-        stw     r16,-16(r1)
-        li      r0,128          ; to touch next line
-        li      r13,0           ; r13-r15 are the accumulators, so initialize them
-        dcbt    r3,r0           ; touch in next cache line, and keep loads away from the above stores
-        lwz     r4,0(r3)        ; start pipeline by loading first 32 bytes into r4, r6-r12
-        lwz     r6,4(r3)
-        lwz     r7,8(r3)
-        mr      r14,r2          ; just copy incoming partial word into one of the accumulators
-        li      r15,0
-        lwz     r8,12(r3)
-        lwz     r9,16(r3)
-        li      r16,0
-        li      r0,256          ; get touch offset
-        lwz     r10,20(r3)
-        lwz     r11,24(r3)
-        lwz     r12,28(r3)      ; load last word of previous chunk
-        addi    r3,r3,32        ; skip past the chunk
-        bdnz++  LInnerLoop64    ; enter loop if another chunk to go
-        
-        b       LAddLastChunk   ; only one chunk
-        
-;       Inner loop for 64-bit processors.  This loop is scheduled for the 970.
-;       It is pipelined (loads are one iteration ahead of adds), and unrolled.
-;       It should take 9-10 cycles per iteration, which consumes 64 bytes of input.
-
-        .align  5
-LInnerLoop64:                   ; 64 bytes/iteration
-        add     r13,r13,r4      ; cycle 1
-        add     r14,r14,r6
-        dcbt    r3,r0           ; touch in 2 lines ahead
-        lwz     r4,0(r3)
-        
-        add     r15,r15,r7      ; cycle 2, etc
-        lwz     r6,4(r3)
-        lwz     r7,8(r3)
-        add     r16,r16,r8
-        
-        lwz     r8,12(r3)
-        add     r13,r13,r9
-        add     r14,r14,r10
-        lwz     r9,16(r3)
-        
-        add     r15,r15,r11
-        lwz     r10,20(r3)
-        lwz     r11,24(r3)
-        add     r16,r16,r12
-        bdz--   LEarlyExit      ; early exit if no more chunks
-        
-        lwz     r12,28(r3)
-        add     r13,r13,r4
-        add     r14,r14,r6
-        lwz     r4,32(r3)
-        
-        add     r15,r15,r7
-        lwz     r6,36(r3)
-        lwz     r7,40(r3)
-        add     r16,r16,r8
-        
-        lwz     r8,44(r3)
-        add     r13,r13,r9
-        add     r14,r14,r10
-        lwz     r9,48(r3)
-        
-        add     r15,r15,r11
-        lwz     r10,52(r3)
-        lwz     r11,56(r3)
-        add     r16,r16,r12
-        
-        nop                     ; position last load in 2nd dispatch slot
-        lwz     r12,60(r3)
-        addi    r3,r3,64
-        bdnz++  LInnerLoop64
-        
-        b       LAddLastChunk
-
-;       Add in the last 32-byte chunk, and any leftover bytes.
-;           r3 = word aligned address of next byte of data
-;           r5 = accumulated sum parameter
-;      r13-r16 = the four accumulators
-;       cr1_gt = "starting on odd address" flag
-;      cr6,cr7 = residual length
-
-LEarlyExit:                     ; here from middle of inner loop
-        lwz     r12,28(r3)      ; load last word of last chunk
-        addi    r3,r3,32
-LAddLastChunk:                  ; last 32-byte chunk of input is in r4,r6-r12
-        add     r13,r13,r4      ; add in last chunk
-        add     r14,r14,r6      ; these are 64-bit adds
-        add     r15,r15,r7
-        add     r16,r16,r8
-        add     r13,r13,r9
-        add     r14,r14,r10
-        add     r15,r15,r11
-        add     r16,r16,r12
-
-;       Handle leftover bytes, if any.
-
-        bf      27,Lleft1       ; test 0x10 bit of residual length
-        lwz     r4,0(r3)
-        lwz     r6,4(r3)
-        lwz     r7,8(r3)
-        lwz     r8,12(r3)
-        addi    r3,r3,16
-        add     r13,r13,r4
-        add     r14,r14,r6
-        add     r15,r15,r7
-        add     r16,r16,r8
-Lleft1:
-        bf      28,Lleft2
-        lwz     r4,0(r3)
-        lwz     r6,4(r3)
-        addi    r3,r3,8
-        add     r13,r13,r4
-        add     r14,r14,r6
-Lleft2:
-        bf      29,Lleft3
-        lwz     r4,0(r3)
-        addi    r3,r3,4
-        add     r14,r14,r4
-Lleft3:
-        bf      30,Lleft4
-        lhz     r4,0(r3)
-        addi    r3,r3,2
-        add     r15,r15,r4
-Lleft4:
-        bf      31,Lleft5
-        lbz     r4,0(r3)
-        slwi    r4,r4,8         ; shift last byte into proper lane
-        add     r16,r16,r4
-
-;       All data bytes have been checksummed.  Now we must add together the four
-;       accumulators and restore the regs from the red zone.
-;           r3 = word aligned address of next byte of data
-;           r5 = accumulated sum parameter
-;      r13-r16 = the four accumulators
-;        carry = not used so far
-;       cr1_gt = "starting on odd address" flag
-
-Lleft5:
-        add     r8,r13,r14      ; add the four accumulators together
-        add     r9,r15,r16
-        lwz     r13,-4(r1)      ; start to restore nonvolatiles from red zone
-        lwz     r14,-8(r1)
-        add     r8,r8,r9        ; now r8 is 64-bit sum of the four accumulators
-        lwz     r15,-12(r1)
-        lwz     r16,-16(r1)
-        srdi    r7,r8,32        ; get upper half of 64-bit sum
-        addc    r2,r7,r8        ; finally, do a 32-bit add of the two halves of r8 (setting carry)
-        b       Lwrapup         ; merge r2, r5, and carry into a 16-bit checksum
diff --git a/bsd/dev/random/Makefile b/bsd/dev/random/Makefile
index 1190bc1ff..7a07200d9 100644
--- a/bsd/dev/random/Makefile
+++ b/bsd/dev/random/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c
index 4ec548794..f167a1752 100644
--- a/bsd/dev/unix_startup.c
+++ b/bsd/dev/unix_startup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -42,6 +42,7 @@
 #include <sys/file_internal.h>
 #include <sys/proc_internal.h>
 #include <sys/clist.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/tty.h>
@@ -50,7 +51,9 @@
 #include <machine/cons.h>
 #include <pexpert/pexpert.h>
 #include <sys/socketvar.h>
+#include <pexpert/pexpert.h>
 
+extern uint32_t kern_maxvnodes;
 extern vm_map_t mb_map;
 
 #if INET || INET6
@@ -62,7 +65,7 @@ void            bsd_bufferinit(void) __attribute__((section("__TEXT, initcode"))
 extern void     md_prepare_for_shutdown(int, int, char *);
 
 unsigned int	bsd_mbuf_cluster_reserve(boolean_t *);
-void bsd_srv_setup(int);
+void bsd_scale_setup(int);
 void bsd_exec_setup(int);
 
 /*
@@ -71,7 +74,7 @@ void bsd_exec_setup(int);
 
 #ifdef	NBUF
 int             max_nbuf_headers = NBUF;
-int             niobuf_headers = NBUF / 2;
+int             niobuf_headers = (NBUF / 2) + 2048;
 int 		nbuf_hashelements = NBUF;
 int 		nbuf_headers = NBUF;
 #else
@@ -81,11 +84,11 @@ int 		nbuf_hashelements = 0;
 int		nbuf_headers = 0;
 #endif
 
-SYSCTL_INT (_kern, OID_AUTO, nbuf, CTLFLAG_RD, &nbuf_headers, 0, "");
-SYSCTL_INT (_kern, OID_AUTO, maxnbuf, CTLFLAG_RW, &max_nbuf_headers, 0, "");
+SYSCTL_INT (_kern, OID_AUTO, nbuf, CTLFLAG_RD | CTLFLAG_LOCKED, &nbuf_headers, 0, "");
+SYSCTL_INT (_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED, &max_nbuf_headers, 0, "");
 
 __private_extern__ int customnbuf = 0;
-int             srv = 0;	/* Flag indicates a server boot when set */
+int             serverperfmode = 0;	/* Flag indicates a server boot when set */
 int             ncl = 0;
 static unsigned int mbuf_poolsz;
 
@@ -118,10 +121,12 @@ bsd_startupearly(void)
 	} else
 		nbuf_hashelements = max_nbuf_headers;
 
-	if (niobuf_headers == 0)
-		niobuf_headers = max_nbuf_headers;
-	if (niobuf_headers > 4096)
-		niobuf_headers = 4096;
+	if (niobuf_headers == 0) {
+		if (max_nbuf_headers < 4096)
+			niobuf_headers = max_nbuf_headers;
+		else
+			niobuf_headers = (max_nbuf_headers / 2) + 2048;
+	}
 	if (niobuf_headers < CONFIG_MIN_NIOBUF)
 		niobuf_headers = CONFIG_MIN_NIOBUF;
 
@@ -176,18 +181,23 @@ bsd_startupearly(void)
 #endif /* SOCKETS */
 
 	if (vnodes_sized == 0) {
-	/*
-	 * Size vnodes based on memory 
-	 * Number vnodes  is (memsize/64k) + 1024 
-	 * This is the calculation that is used by launchd in tiger
-	 * we are clipping the max based on 16G 
-	 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
-	 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
-	 * but can be smaller or larger. 
-	 */
-	desiredvnodes  = (sane_size/65536) + 1024;
-	if (desiredvnodes > CONFIG_VNODES)
-		desiredvnodes = CONFIG_VNODES;
+		if (!PE_get_default("kern.maxvnodes", &desiredvnodes, sizeof(desiredvnodes))) {
+			/*
+			 * Size vnodes based on memory 
+			 * Number vnodes  is (memsize/64k) + 1024 
+			 * This is the calculation that is used by launchd in tiger
+			 * we are clipping the max based on 16G 
+			 * ie ((16*1024*1024*1024)/(64 *1024)) + 1024 = 263168;
+			 * CONFIG_VNODES is set to 263168 for "medium" configurations (the default)
+			 * but can be smaller or larger. 
+			 */
+			desiredvnodes  = (sane_size/65536) + 1024;
+#ifdef CONFIG_VNODES
+				if (desiredvnodes > CONFIG_VNODES)
+					desiredvnodes = CONFIG_VNODES;
+#endif
+		}
+		vnodes_sized = 1;
 	}
 }
 
@@ -252,7 +262,6 @@ bsd_mbuf_cluster_reserve(boolean_t *overridden)
 	 * to correctly compute the size of the low-memory VM pool.  It is
 	 * redundant but rather harmless.
 	 */
-	//(void) PE_parse_boot_argn("srv", &srv, sizeof (srv));
 	(void) PE_parse_boot_argn("ncl", &ncl, sizeof (ncl));
 	(void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof (mbuf_pool));
 
@@ -265,12 +274,12 @@ bsd_mbuf_cluster_reserve(boolean_t *overridden)
 
         if (sane_size > (64 * 1024 * 1024) || ncl != 0) {
 
-		if (ncl || srv)
+		if (ncl || serverperfmode)
 			was_overridden = TRUE;
 
 	        if ((nmbclusters = ncl) == 0) {
 			/* Auto-configure the mbuf pool size */
-			nmbclusters = mbuf_default_ncl(srv, sane_size);
+			nmbclusters = mbuf_default_ncl(serverperfmode, sane_size);
 		} else {
 			/* Make sure it's not odd in case ncl is manually set */
 			if (nmbclusters & 0x1)
@@ -280,6 +289,9 @@ bsd_mbuf_cluster_reserve(boolean_t *overridden)
 			if (nmbclusters > MAX_NCL)
 				nmbclusters = MAX_NCL;
 		}
+
+		/* Round it down to nearest multiple of 4KB clusters */
+		nmbclusters = P2ROUNDDOWN(nmbclusters, NCLPBG);
 	}
 	mbuf_poolsz = nmbclusters << MCLSHIFT;
 done:
@@ -296,11 +308,15 @@ void IOSleep(int);
 
 
 void
-bsd_srv_setup(int scale)
+bsd_scale_setup(int scale)
 {
 #if defined(__LP64__)
-	/* if memory is more than 16G, then apply rules for processes */
-	if (scale >  0) {
+	if ((scale > 0) && (serverperfmode == 0)) {
+		maxproc *= scale;
+		maxprocperuid = (maxproc * 2) / 3;
+	}
+	/* Apply server scaling rules */
+	if ((scale >  0) && (serverperfmode !=0)) {
 		maxproc = 2500 * scale;
 		hard_maxproc = maxproc;
 		/* no fp usage */
diff --git a/bsd/dev/vn/Makefile b/bsd/dev/vn/Makefile
index 64ae209ac..b4e415a16 100644
--- a/bsd/dev/vn/Makefile
+++ b/bsd/dev/vn/Makefile
@@ -3,23 +3,18 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
 export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 
-CFLAGS+=$(WERROR)
 
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c
index bac913331..2a0001d48 100644
--- a/bsd/dev/vn/vn.c
+++ b/bsd/dev/vn/vn.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1141,10 +1141,10 @@ vniocattach_file(struct vn_softc *vn,
 
 	flags = FREAD|FWRITE;
 	if (in_kernel) {
-		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
+		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
 	}
 	else {
-		NDINIT(&nd, LOOKUP, FOLLOW, 
+		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
 			   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
 			   vniop->vn_file, ctx);
 	}
@@ -1156,11 +1156,11 @@ vniocattach_file(struct vn_softc *vn,
 		}
 		flags &= ~FWRITE;
 		if (in_kernel) {
-			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, 
+			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, 
 			       vniop->vn_file, ctx);
 		}
 		else {
-			NDINIT(&nd, LOOKUP, FOLLOW, 
+			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
 				   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
 			       vniop->vn_file, ctx);
 		}
@@ -1221,10 +1221,10 @@ vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop,
 
 	flags = FREAD|FWRITE;
 	if (in_kernel) {
-		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
+		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx);
 	}
 	else {
-		NDINIT(&nd, LOOKUP, FOLLOW, 
+		NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 
 			   (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
 			   vniop->vn_file, ctx);
 	}
diff --git a/bsd/dev/x86_64/munge.s b/bsd/dev/x86_64/munge.s
index ec5b6123b..cb2f6dfc0 100644
--- a/bsd/dev/x86_64/munge.s
+++ b/bsd/dev/x86_64/munge.s
@@ -132,16 +132,91 @@ Lw2:
 Entry(munge_wl)			/* Costs an extra w move to do this */
 ENTRY(munge_wlw)
 	xorl	%edx,%edx
+Lwlw:
 	movl	12(%rsi),%eax
 	movl	%eax,16(%rsi)
 	movl	%edx,20(%rsi)
+Lwl:
 	movl	8(%rsi),%eax
 	movl	%eax,12(%rsi)
 	movl	4(%rsi),%eax
 	movl	%eax,8(%rsi)
+
 	movl	%edx,4(%rsi)
 	ret
 
+ENTRY(munge_wlwwwll)
+	xorl	%edx,%edx
+Lwlwwwll:
+	movl	36(%rsi),%eax
+	movl	%eax,52(%rsi)
+	movl	32(%rsi),%eax
+	movl	%eax,48(%rsi)
+
+	movl	28(%rsi),%eax
+	movl	%eax,44(%rsi)
+	movl	24(%rsi),%eax
+	movl	%eax,40(%rsi)
+
+	movl	20(%rsi),%eax
+	movl	%eax,32(%rsi)
+	movl	%edx,36(%rsi)
+Lwlww:
+	movl	16(%rsi),%eax
+	movl	%eax,24(%rsi)
+	movl	%edx,28(%rsi)
+	jmp	Lwlw
+
+ENTRY(munge_wlwwwllw)
+	xorl	%edx,%edx
+	movl	40(%rsi),%eax
+	movl	%eax,56(%rsi)
+	movl	%edx,60(%rsi)
+	jmp     Lwlwwwll
+
+ENTRY(munge_wlwwlwlw)
+	xorl	%edx,%edx
+	movl	40(%rsi),%eax
+	movl	%eax,56(%rsi)
+	movl	%edx,60(%rsi)
+	movl	36(%rsi),%eax
+	movl	%eax,52(%rsi)
+	movl	32(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,40(%rsi)
+	movl	%edx,44(%rsi)
+	movl	24(%rsi),%eax
+	movl	%eax,36(%rsi)
+	movl	20(%rsi),%eax
+	movl	%eax,32(%rsi)
+	jmp     Lwlww
+
+
+ENTRY(munge_wllwwll)
+	xorl	%edx,%edx
+
+	movl	40(%rsi),%eax	//l
+	movl	%eax,52(%rsi)
+	movl	36(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	32(%rsi),%eax	//l
+	movl	%eax,44(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,40(%rsi)
+	movl	24(%rsi),%eax	//w
+	movl	%eax,32(%rsi)
+	movl	%edx,36(%rsi)
+	movl	20(%rsi),%eax	//w
+	movl	%eax,24(%rsi)
+	movl	%edx,28(%rsi)
+	movl	16(%rsi),%eax	//l
+	movl	%eax,20(%rsi)
+	movl	12(%rsi),%eax
+	movl	%eax,16(%rsi)
+
+	jmp	Lwl
+
 Entry(munge_wwwlw)
 	xorl	%edx,%edx
 	movl	20(%rsi),%eax
@@ -183,6 +258,61 @@ ENTRY(munge_wwwwwl)
 	movl	%eax,44(%rsi)
 	jmp	Lw5
 
+
+ENTRY(munge_wwwwwlww)
+	xorl	%edx,%edx
+	movl	32(%rsi),%eax
+	movl	%eax,56(%rsi)
+	movl	%edx,60(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	%edx,52(%rsi)
+	movl	20(%rsi),%eax
+	movl	%eax,40(%rsi)
+	movl	24(%rsi),%eax
+	movl	%eax,44(%rsi)
+
+	jmp	Lw5
+
+ENTRY(munge_wwwwwllw)
+	xorl	%edx,%edx
+	movl	36(%rsi),%eax
+	movl	%eax,56(%rsi)
+	movl	%edx,60(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	32(%rsi),%eax
+	movl	%eax,52(%rsi)
+	movl	20(%rsi),%eax
+	movl	%eax,40(%rsi)
+	movl	24(%rsi),%eax
+	movl	%eax,44(%rsi)
+	jmp	Lw5
+
+ENTRY(munge_wwwwwlll)
+	xorl	%edx,%edx
+	movl	36(%rsi),%eax
+	movl	%eax,56(%rsi)
+	movl	40(%rsi),%eax
+	movl	%eax,60(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	32(%rsi),%eax
+	movl	%eax,52(%rsi)
+	movl	20(%rsi),%eax
+	movl	%eax,40(%rsi)
+	movl	24(%rsi),%eax
+	movl	%eax,44(%rsi)
+	jmp	Lw5
+
+ENTRY(munge_wwwwwwl)
+	xorl	%edx,%edx
+	movl	24(%rsi),%eax
+	movl	%eax,48(%rsi)
+	movl	28(%rsi),%eax
+	movl	%eax,52(%rsi)
+	jmp	Lw6
+
 ENTRY(munge_wwwwwwlw)
 	xorl	%edx,%edx
 	movl	32(%rsi),%eax
diff --git a/bsd/hfs/Makefile b/bsd/hfs/Makefile
index 814b9184d..27705308f 100644
--- a/bsd/hfs/Makefile
+++ b/bsd/hfs/Makefile
@@ -9,16 +9,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h
index 24807f7f3..114fcecc7 100644
--- a/bsd/hfs/hfs.h
+++ b/bsd/hfs/hfs.h
@@ -29,6 +29,14 @@
 #ifndef __HFS__
 #define __HFS__
 
+/* If set to 1, enables the code to allocate blocks from the start 
+ * of the disk instead of the nextAllocation for sparse devices like 
+ * sparse disk images or sparsebundle images.  The free extent cache 
+ * for such volumes is also maintained based on the start block instead 
+ * of number of contiguous allocation blocks.  These devices prefer 
+ * allocation of blocks near the start of the disk to avoid the 
+ * increasing the image size, but it can also result in file fragmentation. 
+ */
 #define HFS_SPARSE_DEV 1
 
 #if DEBUG
@@ -60,6 +68,10 @@
 #include <hfs/hfs_encodings.h>
 #include <hfs/hfs_hotfiles.h>
 
+#if CONFIG_HFS_ALLOC_RBTREE
+#include <hfs/hfscommon/headers/HybridAllocator.h>
+#endif
+
 /*
  *	Just reported via MIG interface.
  */
@@ -110,11 +122,11 @@ extern struct timezone gTimeZone;
  */
  
 #define HFS_ROOTVERYLOWDISKTRIGGERFRACTION 5
-#define HFS_ROOTVERYLOWDISKTRIGGERLEVEL ((u_int64_t)(125*1024*1024))
+#define HFS_ROOTVERYLOWDISKTRIGGERLEVEL ((u_int64_t)(512*1024*1024))
 #define HFS_ROOTLOWDISKTRIGGERFRACTION 10
-#define HFS_ROOTLOWDISKTRIGGERLEVEL ((u_int64_t)(250*1024*1024))
+#define HFS_ROOTLOWDISKTRIGGERLEVEL ((u_int64_t)(1024*1024*1024))
 #define HFS_ROOTLOWDISKSHUTOFFFRACTION 11
-#define HFS_ROOTLOWDISKSHUTOFFLEVEL ((u_int64_t)(375*1024*1024))
+#define HFS_ROOTLOWDISKSHUTOFFLEVEL ((u_int64_t)(1024*1024*1024 + 250*1024*1024))
 
 #define HFS_VERYLOWDISKTRIGGERFRACTION 1
 #define HFS_VERYLOWDISKTRIGGERLEVEL ((u_int64_t)(100*1024*1024))
@@ -178,8 +190,9 @@ typedef struct hfsmount {
 	int16_t				vcbFlags; /* Runtime flag to indicate if volume is dirty/clean */
 	u_int32_t 			vcbAtrb;
 	u_int32_t 			vcbJinfoBlock;
-	time_t        hfs_itime;   /* file system creation time */
-	time_t        hfs_btime;   /* file system last backup time */
+	u_int32_t 			localCreateDate;/* volume create time from volume header (For HFS+, value is in local time) */
+	time_t				hfs_itime;	/* file system creation time (creation date of the root folder) */
+	time_t				hfs_btime;	/* file system last backup time */
 	u_int32_t 			blockSize;	/* size of allocation blocks */
 	u_int32_t 			totalBlocks;	/* total allocation blocks */
 	u_int32_t			allocLimit;	/* Do not allocate this block or beyond */
@@ -204,11 +217,33 @@ typedef struct hfsmount {
 	/* cache of largest known free extents */
 	u_int32_t			vcbFreeExtCnt;
 	HFSPlusExtentDescriptor vcbFreeExt[kMaxFreeExtents];
+	lck_spin_t			vcbFreeExtLock;
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	/*
+	 * Access to these fields should only be done 
+	 * after acquiring the bitmap lock.  Note that the
+	 * "offset_block_end" field indicates the portion of 
+	 * the bitmap that is currently managed by the red-black tree.
+	 */
+	
+	/* Normal Allocation Tree */
+	extent_tree_offset_t offset_tree;
+	u_int32_t 			offset_free_extents;  /* number of free extents managed by tree */
+	u_int32_t			offset_block_end;
+#endif
 	
+	/* 
+	 * For setting persistent in-mount fields that relate
+	 * to the use of the extent trees.  See HFS Red-Black 
+	 * Tree Allocator Flags below.
+	 */
+	u_int32_t extent_tree_flags;
+
+
 	u_int32_t		reserveBlocks;		/* free block reserve */
 	u_int32_t		loanedBlocks;		/* blocks on loan for delayed allocations */
 	
-	u_int32_t 			localCreateDate;	/* creation times for HFS+ volumes are in local time */
 
 	/*
 	 * HFS+ Private system directories (two). Any access
@@ -232,8 +267,9 @@ typedef struct hfsmount {
 	u_int32_t            jnl_size;
 	u_int32_t            hfs_jnlfileid;
 	u_int32_t            hfs_jnlinfoblkid;
-	lck_rw_t	     hfs_global_lock;
+	lck_rw_t	     	hfs_global_lock;
 	u_int32_t            hfs_global_lock_nesting;
+	void*				hfs_global_lockowner;
 	
 	/* Notification variables: */
 	u_int32_t		hfs_notification_conditions;
@@ -266,7 +302,7 @@ typedef struct hfsmount {
 	int		hfc_maxfiles;   /* maximum files to track */
 	struct vnode *  hfc_filevp;
 
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
 	/* Sparse device variables: */
 	struct vnode * hfs_backingfs_rootvp;
 	u_int32_t      hfs_last_backingstatfs;
@@ -281,8 +317,9 @@ typedef struct hfsmount {
 	lck_rw_t       hfs_insync;     /* protects sync/freeze interaction */
 
 	/* Resize variables: */
-	u_int32_t		hfs_resize_filesmoved;
-	u_int32_t		hfs_resize_totalfiles;
+	u_int32_t		hfs_resize_blocksmoved;
+	u_int32_t		hfs_resize_totalblocks;
+	u_int32_t		hfs_resize_progress;
 
 	/* Per mount cnode hash variables: */
 	lck_mtx_t      hfs_chash_mutex;	/* protects access to cnode hash table */
@@ -313,6 +350,7 @@ typedef struct hfsmount {
 	u_int64_t       hfs_max_pending_io;
 					
 	thread_call_t   hfs_syncer;	      // removeable devices get sync'ed by this guy
+
 } hfsmount_t;
 
 #define HFS_META_DELAY     (100)
@@ -321,7 +359,6 @@ typedef struct hfsmount {
 typedef hfsmount_t  ExtendedVCB;
 
 /* Aliases for legacy (Mac OS 9) field names */
-#define vcbCrDate          hfs_itime
 #define vcbLsMod           hfs_mtime
 #define vcbVolBkUp         hfs_btime
 #define extentsRefNum      hfs_extents_vp
@@ -362,6 +399,15 @@ static __inline__ Boolean IsVCBDirty(ExtendedVCB *vcb)
  */
 enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
 
+/* HFS Red-Black Tree Allocator Flags */
+#define HFS_ALLOC_RB_ENABLED		0x000001  	/* trees in use */
+#define HFS_ALLOC_RB_ERRORED		0x000002 	/* tree hit error; disabled for the mount */
+#define HFS_ALLOC_RB_MZACTIVE		0x000004 	/* metazone tree has finished building */
+#define HFS_ALLOC_RB_ACTIVE			0x000008	/* normalzone tree finished building */
+
+/* HFS Red-Black Unmount Synch. Flags */
+#define HFS_ALLOC_TREEBUILD_INFLIGHT	0x000010
+#define HFS_ALLOC_TEARDOWN_INFLIGHT		0x000020
 
 /* HFS mount point flags */
 #define HFS_READ_ONLY             0x00001
@@ -380,6 +426,7 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
 #define HFS_CREATING_BTREE        0x02000
 /* When set, do not update nextAllocation in the mount structure */
 #define HFS_SKIP_UPDATE_NEXT_ALLOCATION 0x04000	
+/* When set, the file system supports extent-based extended attributes */
 #define HFS_XATTR_EXTENTS         0x08000	
 #define	HFS_FOLDERCOUNT           0x10000
 /* When set, the file system exists on a virtual device, like disk image */
@@ -391,7 +438,7 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
  */
 #define HFS_RDONLY_DOWNGRADE      0x80000
 #define HFS_DID_CONTIG_SCAN      0x100000
-#define HFS_UNMAP                0x200000
+#define HFS_SSD					 0x400000
 
 
 /* Macro to update next allocation block in the HFS mount structure.  If 
@@ -416,9 +463,6 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
 			lck_mtx_unlock(&(hfsmp)->hfs_mutex); \
 	}                                                    \
 
-#define hfs_global_exclusive_lock_acquire(hfsmp) lck_rw_lock_exclusive(&(hfsmp)->hfs_global_lock)
-#define hfs_global_exclusive_lock_release(hfsmp) lck_rw_unlock_exclusive(&(hfsmp)->hfs_global_lock)
-
 /* Macro for incrementing and decrementing the folder count in a cnode 
  * attribute only if the HFS_FOLDERCOUNT bit is set in the mount flags 
  * and kHFSHasFolderCount bit is set in the cnode flags.  Currently these 
@@ -517,12 +561,10 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 };
 /*
  * HFS specific fcntl()'s
  */
-#define HFS_BULKACCESS      (FCNTL_FS_SPECIFIC_BASE + 0x00001)
-#define HFS_GET_MOUNT_TIME  (FCNTL_FS_SPECIFIC_BASE + 0x00002)
-#define HFS_GET_LAST_MTIME  (FCNTL_FS_SPECIFIC_BASE + 0x00003)
 #define HFS_GET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00004)
 #define HFS_SET_BOOT_INFO   (FCNTL_FS_SPECIFIC_BASE + 0x00005)
-#define HFS_EXT_BULKACCESS  (FCNTL_FS_SPECIFIC_BASE + 0x00006)
+/* See HFSIOC_EXT_BULKACCESS and friends for HFS specific fsctls*/
+
 
 
 /*
@@ -537,7 +579,6 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 };
 	FUNCTION PROTOTYPES 
 ******************************************************************************/
 
-
 /*****************************************************************************
 	hfs_vnop_xxx functions from different files 
 ******************************************************************************/
@@ -545,6 +586,9 @@ int hfs_vnop_readdirattr(struct vnop_readdirattr_args *);  /* in hfs_attrlist.c
 
 int hfs_vnop_inactive(struct vnop_inactive_args *);        /* in hfs_cnode.c */
 int hfs_vnop_reclaim(struct vnop_reclaim_args *);          /* in hfs_cnode.c */
+int hfs_set_backingstore (struct vnode *vp, int val);				/* in hfs_cnode.c */
+int hfs_is_backingstore (struct vnode *vp, int *val);		/* in hfs_cnode.c */
+
 
 int hfs_vnop_link(struct vnop_link_args *);                /* in hfs_link.c */
 
@@ -633,6 +677,11 @@ extern int  hfs_relocate(struct  vnode *, u_int32_t, kauth_cred_t, struct  proc
 
 extern int hfs_truncate(struct vnode *, off_t, int, int, int, vfs_context_t);
 
+extern int hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 
+								struct filefork *rsrcfork,  u_int32_t fileid);
+
+extern int hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp);
+
 extern int hfs_bmap(struct vnode *, daddr_t, struct vnode **, daddr64_t *, unsigned int *);
 
 extern int hfs_fsync(struct vnode *, int, int, struct proc *);
@@ -643,9 +692,12 @@ extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid);
 
 extern int hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state);
 
-extern void hfs_check_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype);
+extern int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks);
 
-extern int  hfs_isallocated(struct hfsmount *, u_int32_t, u_int32_t);
+extern int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, 
+		u_int32_t numBlocks, u_int32_t *alloc_count);
+
+extern int hfs_isrbtree_active (struct hfsmount *hfsmp);
 
 
 /*****************************************************************************
@@ -656,7 +708,7 @@ int hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context);
 /* used as a callback by the journaling code */
 extern void hfs_sync_metadata(void *arg);
 
-extern int hfs_vget(struct hfsmount *, cnid_t, struct vnode **, int);
+extern int hfs_vget(struct hfsmount *, cnid_t, struct vnode **, int, int);
 
 extern void hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding);
 
@@ -694,6 +746,15 @@ extern int overflow_extents(struct filefork *fp);
 extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
 		struct proc *p, int invokesuperuserstatus);
 
+extern int check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg);
+extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type);
+
+/*
+ * Journal lock function prototypes
+ */
+int hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype);
+void hfs_unlock_global (struct hfsmount *hfsmp);
+
 
 /* HFS System file locking */
 #define SFL_CATALOG     0x0001
@@ -717,7 +778,7 @@ extern u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve);
 
 short MacToVFSError(OSErr err);
 
-void hfs_metadatazone_init(struct hfsmount *hfsmp);
+void hfs_metadatazone_init(struct hfsmount *hfsmp, int disable);
 
 /* HFS directory hint functions. */
 extern directoryhint_t * hfs_getdirhint(struct cnode *, int, int);
@@ -735,9 +796,11 @@ extern int  hfs_virtualmetafile(struct cnode *);
 
 extern int hfs_start_transaction(struct hfsmount *hfsmp);
 extern int hfs_end_transaction(struct hfsmount *hfsmp);
-extern int hfs_journal_flush(struct hfsmount *hfsmp);
+extern int hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO);
 extern void hfs_sync_ejectable(struct hfsmount *hfsmp);
 
+extern void hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents);
+
 /* Erase unused Catalog nodes due to <rdar://problem/6947811>. */
 extern int hfs_erase_unused_nodes(struct hfsmount *hfsmp);
 
@@ -758,7 +821,7 @@ extern int hfs_btsync(struct vnode *vp, int sync_transaction);
 extern void replace_desc(struct cnode *cp, struct cat_desc *cdp);
 
 extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
-			struct vnode **rvpp, int can_drop_lock, int error_on_unlinked);
+			struct vnode **rvpp, int can_drop_lock, int error_on_unlink);
 
 extern int hfs_update(struct vnode *, int);
 
@@ -766,10 +829,24 @@ extern int hfs_update(struct vnode *, int);
 /*****************************************************************************
 	Functions from hfs_xattr.c
 ******************************************************************************/
+
+/* Maximum extended attribute size supported for all extended attributes except  
+ * resource fork and finder info.
+ */
+#define HFS_XATTR_MAXSIZE	(128 * 1024)
+
+/* Number of bits used to represent maximum extended attribute size */
+#define HFS_XATTR_SIZE_BITS	18
+
 int  hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey);
 int  hfs_buildattrkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key);
 void hfs_xattr_init(struct hfsmount * hfsmp);
 int file_attribute_exist(struct hfsmount *hfsmp, uint32_t fileID);
+int init_attrdata_vnode(struct hfsmount *hfsmp);
+int hfs_getxattr_internal(struct cnode *, struct vnop_getxattr_args *,
+							struct hfsmount *, u_int32_t);
+int hfs_setxattr_internal(struct cnode *, caddr_t, size_t, 
+						  struct vnop_setxattr_args *, struct hfsmount *, u_int32_t);
 
 
 
@@ -779,7 +856,7 @@ int file_attribute_exist(struct hfsmount *hfsmp, uint32_t fileID);
 
 extern int  hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp,
                        struct componentname *cnp, int skip_reserve);
-extern int  hfs_lookuplink(struct hfsmount *hfsmp, cnid_t linkfileid,
+extern int  hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid,
                            cnid_t *prevlinkid,  cnid_t *nextlinkid);
 extern void  hfs_privatedir_init(struct hfsmount *, enum privdirtype);
 
diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c
index dc24f99a7..3f1e6da64 100644
--- a/bsd/hfs/hfs_attrlist.c
+++ b/bsd/hfs/hfs_attrlist.c
@@ -41,6 +41,7 @@
 #include <sys/unistd.h>
 #include <sys/mount_internal.h>
 #include <sys/kauth.h>
+#include <sys/fsctl.h>
 
 #include <kern/locks.h>
 
@@ -80,7 +81,6 @@ static u_int32_t hfs_real_user_access(vnode_t vp, vfs_context_t ctx);
  * apply for the file system you are doing the readdirattr on. To make life 
  * simpler, this call will only return entries in its directory, hfs like.
  */
-__private_extern__
 int
 hfs_vnop_readdirattr(ap)
 	struct vnop_readdirattr_args /* {
@@ -138,6 +138,19 @@ hfs_vnop_readdirattr(ap)
 	    (alist->forkattr != 0)) {
 		return (EINVAL);
 	}
+
+	if (VTOC(dvp)->c_flags & UF_COMPRESSED) {
+		int compressed = hfs_file_is_compressed(VTOC(dvp), 0);  /* 0 == take the cnode lock */
+
+		if (!compressed) {
+			error = check_for_dataless_file(dvp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+		}
+	}
+
+
 	/*
 	 * Take an exclusive directory lock since we manipulate the directory hints
 	 */
@@ -256,12 +269,12 @@ hfs_vnop_readdirattr(ap)
 			/*
 			 * Obtain vnode for our vnode_authorize() calls.
 			 */
-			if (hfs_vget(hfsmp, cattrp->ca_fileid, &vp, 0) != 0) {
+			if (hfs_vget(hfsmp, cattrp->ca_fileid, &vp, 0, 0) != 0) {
 				vp = NULL;
 			}
 		} else if (!(ap->a_options & FSOPT_NOINMEMUPDATE)) {
 			/* Get in-memory cnode data (if any). */
-			vp = hfs_chash_getvnode(hfsmp, cattrp->ca_fileid, 0, 0);
+			vp = hfs_chash_getvnode(hfsmp, cattrp->ca_fileid, 0, 0, 0);
 		}
 		if (vp != NULL) {
 			cp = VTOC(vp);
@@ -405,7 +418,7 @@ exit2:
 /*
  * Pack cnode attributes into an attribute block.
  */
- __private_extern__
+__private_extern__
 void
 hfs_packattrblk(struct attrblock *abp,
 		struct hfsmount *hfsmp,
@@ -654,7 +667,10 @@ packcommonattr(
 	    }
 	}
 	if (ATTR_CMN_FNDRINFO & attr) {
+		u_int8_t *finfo = NULL;
 		bcopy(&cap->ca_finderinfo, attrbufptr, sizeof(u_int8_t) * 32);
+		finfo = (u_int8_t*)attrbufptr;
+
 		/* Don't expose a symlink's private type/creator. */
 		if (S_ISLNK(cap->ca_mode)) {
 			struct FndrFileInfo *fip;
@@ -663,6 +679,18 @@ packcommonattr(
 			fip->fdType = 0;
 			fip->fdCreator = 0;
 		}
+
+		/* advance 16 bytes into the attrbuf */
+		finfo = finfo + 16;
+		if (S_ISREG(cap->ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->date_added = 0;
+		}
+		else if (S_ISDIR(cap->ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->date_added = 0;
+		}
+
 		attrbufptr = (char *)attrbufptr + sizeof(u_int8_t) * 32;
 	}
 	if (ATTR_CMN_OWNERID & attr) {
@@ -814,7 +842,10 @@ packfileattr(
 	off_t datasize = datafork->cf_size;
 	off_t totalsize = datasize + rsrcfork->cf_size;
 #if HFS_COMPRESSION
-	if ( cattrp->ca_flags & UF_COMPRESSED ) {
+	int handle_compressed;
+	handle_compressed =  (cattrp->ca_flags & UF_COMPRESSED);// && hfs_file_is_compressed(VTOC(vp), 1);
+	
+	if (handle_compressed) {
 		if (attr & (ATTR_FILE_DATALENGTH|ATTR_FILE_TOTALSIZE)) {
 			if ( 0 == hfs_uncompressed_size_of_compressed_file(hfsmp, vp, cattrp->ca_fileid, &datasize, 1) ) { /* 1 == don't take the cnode lock */
 				/* total size of a compressed file is just the data size */
@@ -865,7 +896,7 @@ packfileattr(
 	 * passed by hfs_vnop_readdirattr() may be null. 
 	 */
 	
-	if ( cattrp->ca_flags & UF_COMPRESSED ) {
+	if ( handle_compressed ) {
 		if (attr & ATTR_FILE_DATAALLOCSIZE) {
 			*((off_t *)attrbufptr) = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize;
 			attrbufptr = ((off_t *)attrbufptr) + 1;
@@ -902,7 +933,7 @@ packfileattr(
 /*
  * Calculate the total size of an attribute block.
  */
- __private_extern__
+__private_extern__
 int
 hfs_attrblksize(struct attrlist *attrlist)
 {
@@ -1015,7 +1046,6 @@ hfs_real_user_access(vnode_t vp, vfs_context_t ctx)
 }
 		
 
-__private_extern__
 u_int32_t
 DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode,
 		struct mount *mp, kauth_cred_t cred, __unused struct proc *p)
diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c
index ddcb91277..7295cee54 100644
--- a/bsd/hfs/hfs_btreeio.c
+++ b/bsd/hfs/hfs_btreeio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -53,8 +53,8 @@ extern int bdwrite_internal(struct buf *, int);
 static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount);
 static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp);
 
+void btree_swap_node(struct buf *bp, __unused void *arg);
 
-__private_extern__
 OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemCount minBlockCount)
 {
 	BTreeControlBlockPtr	bTreePtr;
@@ -71,7 +71,6 @@ OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, __unused ItemC
 }
 
 
-__private_extern__
 OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions options, BlockDescriptor *block)
 {
     OSStatus	 retval = E_NONE;
@@ -165,7 +164,6 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt
 }
 
 
-__private_extern__
 void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
 {
 	struct hfsmount	*hfsmp = VTOHFS(vp);
@@ -185,7 +183,7 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
 	blockPtr->isModified = 1;
 }
 
-static void
+void
 btree_swap_node(struct buf *bp, __unused void *arg)
 {
     //	struct hfsmount *hfsmp = (struct hfsmount *)arg;
@@ -218,7 +216,6 @@ btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
 }
 
 
-__private_extern__
 OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options)
 {
     struct hfsmount	*hfsmp = VTOHFS(vp);
@@ -331,7 +328,6 @@ exit:
 }
 
 
-__private_extern__
 OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
 {
 #pragma unused (maxEOF)
@@ -467,7 +463,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF)
 			trim = ((filePtr->fcbEOF - origSize) % btInfo.nodeSize);
 		}
 
-		ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0);
+		ret = TruncateFileC(vcb, filePtr, filePtr->fcbEOF - trim, 0, 0, FTOC(filePtr)->c_fileid, 0);
 		filePtr->fcbEOF = (u_int64_t)filePtr->ff_blocks * (u_int64_t)vcb->blockSize;
 
 		// XXXdbg - panic if the file didn't get trimmed back properly
@@ -611,6 +607,8 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node
 	u_int16_t  offset;
 	int intrans = 0;
 	int result;
+	int newvnode_flags = 0;
+	
 again:
 	/*
 	 * Serialize creation using HFS_CREATING_BTREE flag.
@@ -654,7 +652,8 @@ again:
 	bzero(&cfork, sizeof(cfork));
 	cfork.cf_clump = nodesize * nodecnt;
 
-	result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, &vp);
+	result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, 
+							 &cfork, &vp, &newvnode_flags);
 	if (result) {
 		goto exit;
 	}
diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c
index 0ae0e2600..0ab6f4585 100644
--- a/bsd/hfs/hfs_catalog.c
+++ b/bsd/hfs/hfs_catalog.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -90,10 +90,12 @@ u_char modetodirtype[16] = {
 #define MODE_TO_DT(mode)  (modetodirtype[((mode) & S_IFMT) >> 12])
 
 
-static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_int32_t hint, int wantrsrc,
+#define HFS_LOOKUP_SYSFILE	0x1	/* If set, allow lookup of system files */
+#define HFS_LOOKUP_HARDLINK	0x2	/* If set, allow lookup of hard link records and not resolve the hard links */
+static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t hint, int wantrsrc,
                   struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid);
 
-static int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
+int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
                   struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp);
 
 /* Internal catalog support routines */
@@ -133,8 +135,9 @@ static int buildthread(void *keyp, void *recp, int std_hfs, int directory);
 
 static int cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalogFile *crp);
 
+static int cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp,
+	struct cat_fork *dataforkp, struct cat_fork *rsrcforkp);
 
-__private_extern__
 int
 cat_preflight(struct hfsmount *hfsmp, catops_t ops, cat_cookie_t *cookie, __unused proc_t p)
 {
@@ -152,7 +155,6 @@ cat_preflight(struct hfsmount *hfsmp, catops_t ops, cat_cookie_t *cookie, __unus
 	return MacToVFSError(result);
 }
 
-__private_extern__
 void
 cat_postflight(struct hfsmount *hfsmp, cat_cookie_t *cookie, __unused proc_t p)
 {
@@ -167,8 +169,7 @@ cat_postflight(struct hfsmount *hfsmp, cat_cookie_t *cookie, __unused proc_t p)
 		hfs_systemfile_unlock(hfsmp, lockflags);
 }
 
- 
-__private_extern__
+__private_extern__ 
 void
 cat_convertattr(
 	struct hfsmount *hfsmp,
@@ -297,7 +298,6 @@ cat_releasedesc(struct cat_desc *descp)
  * Note: The caller is responsible for releasing the output
  * catalog descriptor (when supplied outdescp is non-null).
  */
-__private_extern__
 int
 cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
              struct cat_desc *outdescp, struct cat_attr *attrp,
@@ -344,7 +344,6 @@ exit:
 	return (result);
 }
 
-__private_extern__
 int
 cat_insertfilethread(struct hfsmount *hfsmp, struct cat_desc *descp)
 {
@@ -409,7 +408,6 @@ exit:
  * catalog descriptor (when supplied outdescp is non-null).
 
  */
-__private_extern__
 int
 cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp)
 {
@@ -482,7 +480,6 @@ exit:
  * Note: The caller is responsible for releasing the output
  * catalog descriptor (when supplied outdescp is non-null).
  */
-__private_extern__
 int
 cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files,
     struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp)
@@ -543,7 +540,9 @@ cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files,
 		goto exit;
 	}
 
-	result = cat_lookupbykey(hfsmp, keyp, allow_system_files, 0, 0, outdescp, attrp, forkp, NULL);
+	result = cat_lookupbykey(hfsmp, keyp, 
+			((allow_system_files != 0) ? HFS_LOOKUP_SYSFILE : 0), 
+			0, 0, outdescp, attrp, forkp, NULL);
 	/* No corresponding file/folder record found for a thread record,
 	 * mark the volume inconsistent.
 	 */
@@ -569,7 +568,7 @@ exit:
 /*
  * cat_lookupmangled - lookup a catalog node using a mangled name
  */
-static int
+int
 cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc,
                   struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp)
 {
@@ -625,7 +624,7 @@ falsematch:
  * cat_lookupbykey - lookup a catalog node using a cnode key
  */
 static int
-cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_int32_t hint, int wantrsrc,
+cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t hint, int wantrsrc,
                   struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid)
 {
 	struct BTreeIterator * iterator;
@@ -637,6 +636,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 	u_int32_t ilink = 0;
 	cnid_t cnid = 0;
 	u_int32_t encoding = 0;
+	cnid_t parentid = 0;
 
 	std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord);
 
@@ -652,16 +652,18 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 	if (result) 
 		goto exit;
 
-	/* Save the cnid and encoding now in case there's a hard link */
+	/* Save the cnid, parentid, and encoding now in case there's a hard link or inode */
 	cnid = getcnid(recp);
+	if (!std_hfs) {
+		parentid = keyp->hfsPlus.parentID;
+	}
 	encoding = getencoding(recp);
 	hint = iterator->hint.nodeNum;
 
 	/* Hide the journal files (if any) */
 	if ((hfsmp->jnl || ((HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) &&
 		((cnid == hfsmp->hfs_jnlfileid) || (cnid == hfsmp->hfs_jnlinfoblkid)) &&
-		 !allow_system_files) {
-
+		 !(flags & HFS_LOOKUP_SYSFILE)) {
 		result = ENOENT;
 		goto exit;
 	}
@@ -674,7 +676,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 	if (!std_hfs
 	    && (attrp || forkp) 
 	    && (recp->recordType == kHFSPlusFileRecord)
-	    && ((to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->vcbCrDate) ||
+	    && ((to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->hfs_itime) ||
 	        (to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->hfs_metadata_createdate))) {
 		int isdirlink = 0;
 		int isfilelink = 0;
@@ -687,7 +689,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 			   (SWAP_BE32(recp->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator)) {
 			isdirlink = 1;
 		}
-		if (isfilelink || isdirlink) {
+		if ((isfilelink || isdirlink) && !(flags & HFS_LOOKUP_HARDLINK)) {
 			ilink = recp->hfsPlusFile.hl_linkReference;
 			(void) cat_resolvelink(hfsmp, ilink, isdirlink, (struct HFSPlusCatalogFile *)recp);
 		}
@@ -701,8 +703,50 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 			getbsdattr(hfsmp, &cnoderec, attrp);
 		} else {
 			getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp);
-			if (ilink)
+			if (ilink) {
+				/* Update the inode number for this hard link */
 				attrp->ca_linkref = ilink;
+			}
+
+			/* 
+			 * Set kHFSHasLinkChainBit for hard links, and reset it for all 
+			 * other items.  Also set linkCount to 1 for regular files.
+			 *
+			 * Due to some bug (rdar://8505977), some regular files can have 
+			 * kHFSHasLinkChainBit set and linkCount more than 1 even if they 
+			 * are not really hard links.  The runtime code should not consider 
+			 * these files has hard links.  Therefore we reset the kHFSHasLinkChainBit 
+			 * and linkCount for regular file before we vend it out.  This might 
+			 * also result in repairing the bad files on disk, if the corresponding 
+			 * file is modified and updated on disk.  
+			 */
+			if (ilink) {
+				/* This is a hard link and the link count bit was not set */
+				if (!(attrp->ca_recflags & kHFSHasLinkChainMask)) {
+					printf ("hfs: set hardlink bit on vol=%s cnid=%u inoid=%u\n", hfsmp->vcbVN, cnid, ilink);
+					attrp->ca_recflags |= kHFSHasLinkChainMask;
+				}
+			} else { 
+				/* Make sure that this non-hard link (regular) record is not 
+				 * an inode record or a valid hard link being that is not 
+				 * resolved for volume resize purposes.  We do not want to 
+				 * reset the hard link bit or reset link count on these records.
+				 */
+				if (!(flags & HFS_LOOKUP_HARDLINK) && 
+				    (parentid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && 
+				    (parentid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid)) {
+					/* This is not a hard link or inode and the link count bit was set */
+					if (attrp->ca_recflags & kHFSHasLinkChainMask) {
+						printf ("hfs: clear hardlink bit on vol=%s cnid=%u\n", hfsmp->vcbVN, cnid);
+						attrp->ca_recflags &= ~kHFSHasLinkChainMask;
+					}
+					/* This is a regular file and the link count was more than 1 */
+					if (S_ISREG(attrp->ca_mode) && (attrp->ca_linkcount > 1)) {
+						printf ("hfs: set linkcount=1 on vol=%s cnid=%u old=%u\n", hfsmp->vcbVN, cnid, attrp->ca_linkcount);
+						attrp->ca_linkcount = 1;
+					}
+				}
+			}
 		}
 	}
 	if (forkp != NULL) {
@@ -765,6 +809,22 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 			if ((validblks < forkp->cf_blocks) && (forkp->cf_extents[7].blockCount == 0)) {
 				off_t psize;
 
+				/* 
+				 * This is technically a volume corruption. 
+				 * If the total number of blocks calculated by iterating + summing
+				 * the extents in the resident extent records, is less than that 
+				 * which is reported in the catalog entry, we should force a fsck.  
+				 * Only modifying ca_blocks here is not guaranteed to make it out 
+				 * to disk; it is a runtime-only field. 
+				 * 
+				 * Note that we could have gotten into this state if we had invalid ranges 
+				 * that existed in borrowed blocks that somehow made it out to disk. 
+				 * The cnode's on disk block count should never be greater 
+				 * than that which is in its extent records.
+				 */
+
+				(void) hfs_mark_volume_inconsistent (hfsmp);
+
 				forkp->cf_blocks = validblks;
 				if (attrp != NULL) {
 					attrp->ca_blocks = validblks + recp->hfsPlusFile.resourceFork.totalBlocks;
@@ -813,7 +873,6 @@ exit:
  * The caller is responsible for releasing the output
  * catalog descriptor (when supplied outdescp is non-null).
  */
-__private_extern__
 int
 cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
 	struct cat_desc *out_descp)
@@ -988,7 +1047,6 @@ exit:
  * Note: The caller is responsible for releasing the output
  * catalog descriptor (when supplied out_cdp is non-null).
  */
-__private_extern__
 int 
 cat_rename (
 	struct hfsmount * hfsmp,
@@ -1287,7 +1345,6 @@ exit:
  *	2. BTDeleteRecord(thread);
  *	3. BTUpdateRecord(parent);
  */
-__private_extern__
 int
 cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp)
 {
@@ -1378,12 +1435,13 @@ exit:
 
 
 /*
- * cnode_update - update the catalog node described by descp
- * using the data from attrp and forkp.
+ * cat_update_internal - update the catalog node described by descp
+ * using the data from attrp and forkp.  
+ * If update_hardlink is true, the hard link catalog record is updated
+ * and not the inode catalog record. 
  */
-__private_extern__
-int
-cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
+static int
+cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp,
 	struct cat_fork *dataforkp, struct cat_fork *rsrcforkp)
 {
 	FCB * fcb;
@@ -1408,13 +1466,14 @@ cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr
 	 * For open-deleted files we need to do a lookup by cnid
 	 * (using thread rec).
 	 *
-	 * For hard links, the target of the update is the inode
-	 * itself (not the link record) so a lookup by fileid
-	 * (i.e. thread rec) is needed.
+	 * For hard links and if not requested by caller, the target 
+	 * of the update is the inode itself (not the link record) 
+	 * so a lookup by fileid (i.e. thread rec) is needed.
 	 */
-	if ((descp->cd_cnid != attrp->ca_fileid) ||
-	    (descp->cd_namelen == 0) ||
-	    (attrp->ca_recflags & kHFSHasLinkChainMask)) {
+	if ((update_hardlink == false) && 
+	    ((descp->cd_cnid != attrp->ca_fileid) ||
+	     (descp->cd_namelen == 0) ||
+	     (attrp->ca_recflags & kHFSHasLinkChainMask))) {
 		result = getkey(hfsmp, attrp->ca_fileid, (CatalogKey *)&iterator->key);
 	} else {
 		result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0);
@@ -1439,6 +1498,17 @@ exit:
 	return MacToVFSError(result);
 }
 
+/*
+ * cat_update - update the catalog node described by descp
+ * using the data from attrp and forkp. 
+ */
+int
+cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
+	struct cat_fork *dataforkp, struct cat_fork *rsrcforkp)
+{
+	return cat_update_internal(hfsmp, false, descp, attrp, dataforkp, rsrcforkp);
+}
+
 /*
  * catrec_update - Update the fields of a catalog record
  * This is called from within BTUpdateRecord.
@@ -1585,6 +1655,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st
 	}
 	case kHFSPlusFileRecord: {
 		HFSPlusCatalogFile *file;
+		int is_dirlink; 
 		
 		file = (struct HFSPlusCatalogFile *)crp;
 		/* Do a quick sanity check */
@@ -1627,13 +1698,22 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st
 		 * supplied values (which will be default), which has the
 		 * same effect as creating a new file while
 		 * MNT_UNKNOWNPERMISSIONS is set.
+		 *
+		 * Do not modify bsdInfo for directory hard link records.
+		 * They are set during creation and are not modifiable, so just 
+		 * leave them alone. 
 		 */
-		if ((file->bsdInfo.fileMode != 0) ||
-		    (attrp->ca_flags != 0) ||
-		    (attrp->ca_uid != hfsmp->hfs_uid) ||
-		    (attrp->ca_gid != hfsmp->hfs_gid) ||
-		    ((attrp->ca_mode & ALLPERMS) !=
-		     (hfsmp->hfs_file_mask & ACCESSPERMS))) {
+		is_dirlink = (file->flags & kHFSHasLinkChainMask) &&     
+			     (SWAP_BE32(file->userInfo.fdType) == kHFSAliasType) && 
+			     (SWAP_BE32(file->userInfo.fdCreator) == kHFSAliasCreator);
+
+		if (!is_dirlink && 
+		    ((file->bsdInfo.fileMode != 0) ||
+		     (attrp->ca_flags != 0) ||
+		     (attrp->ca_uid != hfsmp->hfs_uid) ||
+		     (attrp->ca_gid != hfsmp->hfs_gid) ||
+		     ((attrp->ca_mode & ALLPERMS) !=
+		      (hfsmp->hfs_file_mask & ACCESSPERMS)))) {
 			if ((file->bsdInfo.fileMode == 0) ||
 			    (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) == 0) {
 				file->bsdInfo.ownerID = attrp->ca_uid;
@@ -1679,8 +1759,18 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st
 		/* Push out special field if necessary */
 		if (S_ISBLK(attrp->ca_mode) || S_ISCHR(attrp->ca_mode)) {
 			file->bsdInfo.special.rawDevice = attrp->ca_rdev;
-		} else if (descp->cd_cnid != attrp->ca_fileid || attrp->ca_linkcount == 2) {
-			file->hl_linkCount = attrp->ca_linkcount;
+		} 
+		else {
+			/* 
+			 * Protect against the degenerate case where the descriptor contains the
+			 * raw inode ID in its CNID field.  If the HFSPlusCatalogFile record indicates
+			 * the linkcount was greater than 1 (the default value), then it must have become
+			 * a hardlink.  In this case, update the linkcount from the cat_attr passed in.
+			 */
+			if ((descp->cd_cnid != attrp->ca_fileid) || (attrp->ca_linkcount > 1 ) ||
+					(file->hl_linkCount > 1)) {
+				file->hl_linkCount = attrp->ca_linkcount;
+			}
 		}
 		break;
 	}
@@ -1809,7 +1899,7 @@ cat_check_link_ancestry(struct hfsmount *hfsmp, cnid_t cnid, cnid_t pointed_at_c
 
 
 /*
- * updatelink_callback - update a link's chain
+ * update_siblinglinks_callback - update a link's chain
  */
 
 struct linkupdate_state {
@@ -1819,12 +1909,12 @@ struct linkupdate_state {
 };
 
 static int
-updatelink_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct linkupdate_state *state)
+update_siblinglinks_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct linkupdate_state *state)
 {
 	HFSPlusCatalogFile *file;
 
 	if (crp->recordType != kHFSPlusFileRecord) {
-		printf("hfs: updatelink_callback: unexpected rec type %d\n", crp->recordType);
+		printf("hfs: update_siblinglinks_callback: unexpected rec type %d\n", crp->recordType);
 		return (btNotFound);
 	}
 
@@ -1837,17 +1927,16 @@ updatelink_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct l
 			file->hl_nextLinkID = state->nextlinkid;
 		}
 	} else {
-		printf("hfs: updatelink_callback: file %d isn't a chain\n", file->fileID);
+		printf("hfs: update_siblinglinks_callback: file %d isn't a chain\n", file->fileID);
 	}
 	return (0);
 }
 
 /*
- * cat_updatelink - update a link's chain
+ * cat_update_siblinglinks - update a link's chain
  */
-__private_extern__
 int
-cat_updatelink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevlinkid, cnid_t nextlinkid)
+cat_update_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevlinkid, cnid_t nextlinkid)
 {
 	FCB * fcb;
 	BTreeIterator * iterator;
@@ -1859,24 +1948,25 @@ cat_updatelink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevlinkid, cni
 	state.prevlinkid = prevlinkid;
 	state.nextlinkid = nextlinkid;
 
-	/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
-	iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
-	iterator->hint.nodeNum = 0;
+	/* Create an iterator for use by us temporarily */
+	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+	bzero(iterator, sizeof(*iterator));
 
 	result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key);
 	if (result == 0) {
-		result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)updatelink_callback, &state);
+		result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)update_siblinglinks_callback, &state);
 		(void) BTFlushPath(fcb);
 	} else {
-		printf("hfs: cat_updatelink: couldn't resolve cnid %d\n", linkfileid);
+		printf("hfs: cat_update_siblinglinks: couldn't resolve cnid %d\n", linkfileid);
 	}
+
+	FREE (iterator, M_TEMP);
 	return MacToVFSError(result);
 }
 
 /*
  * cat_lookuplink - lookup a link by it's name
  */
-__private_extern__
 int
 cat_lookuplink(struct hfsmount *hfsmp, struct cat_desc *descp, cnid_t *linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
 {
@@ -1888,9 +1978,9 @@ cat_lookuplink(struct hfsmount *hfsmp, struct cat_desc *descp, cnid_t *linkfilei
 
 	fcb = hfsmp->hfs_catalog_cp->c_datafork;
 
-	/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
-	iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
-	iterator->hint.nodeNum = 0;
+	/* Create an iterator for use by us temporarily */
+	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+	bzero(iterator, sizeof(*iterator));
 
 	if ((result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0))) {
 		goto exit;
@@ -1914,16 +2004,16 @@ cat_lookuplink(struct hfsmount *hfsmp, struct cat_desc *descp, cnid_t *linkfilei
 		*nextlinkid = 0;
 	}
 exit:
+	FREE(iterator, M_TEMP);
 	return MacToVFSError(result);
 }
 
 
 /*
- * cat_lookuplink - lookup a link by its cnid
+ * cat_lookup_siblinglinks - lookup previous and next link ID for link using its cnid
  */
-__private_extern__
 int
-cat_lookuplinkbyid(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
+cat_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
 {
 	FCB * fcb;
 	BTreeIterator * iterator;
@@ -1933,18 +2023,19 @@ cat_lookuplinkbyid(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid
 
 	fcb = hfsmp->hfs_catalog_cp->c_datafork;
 
-	/* Borrow the btcb iterator since we have an exclusive catalog lock. */	
-	iterator = &((BTreeControlBlockPtr)(fcb->ff_sysfileinfo))->iterator;
-	iterator->hint.nodeNum = 0;
+	/* Create an iterator for use by us temporarily */
+	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+	bzero(iterator, sizeof(*iterator));
+
 
 	if ((result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key))) {
-		printf("hfs: cat_lookuplinkbyid: getkey for %d failed %d\n", linkfileid, result);
+		printf("hfs: cat_lookup_siblinglinks: getkey for %d failed %d\n", linkfileid, result);
 		goto exit;
 	}
 	BDINIT(btdata, &file);
 
 	if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) {
-		printf("hfs: cat_lookuplinkbyid: cannot find %d\n", linkfileid);
+		printf("hfs: cat_lookup_siblinglinks: cannot find %d\n", linkfileid);
 		goto exit;
 	}
 	/* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */
@@ -1953,7 +2044,7 @@ cat_lookuplinkbyid(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid
 
 		parent = ((HFSPlusCatalogKey *)&iterator->key)->parentID;
 
-		/* ADL inodes don't have a chain (its in an EA) */
+		/* directory inodes don't have a chain (its in an EA) */
 		if (parent == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
 			result = ENOLINK;  /* signal to caller to get head of list */
 		} else if (parent == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) {
@@ -1968,6 +2059,7 @@ cat_lookuplinkbyid(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid
 		*nextlinkid = 0;
 	}
 exit:
+	FREE(iterator, M_TEMP);		
 	return MacToVFSError(result);
 }
 
@@ -1983,7 +2075,6 @@ exit:
  *	 ca_flags
  *	 ca_finderinfo (type and creator)
  */
-__private_extern__
 int
 cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp,
                cnid_t nextlinkid, cnid_t *linkfileid)
@@ -2278,7 +2369,6 @@ exit:
 /*
  * cat_deletelink - delete a link from the catalog
  */
-__private_extern__
 int
 cat_deletelink(struct hfsmount *hfsmp, struct cat_desc *descp)
 {
@@ -2455,7 +2545,6 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec,
  *
  * Note: index is zero relative
  */
-__private_extern__
 int
 cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_entrylist *ce_list)
 {
@@ -2690,7 +2779,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp,
 		 * regardless, so it's slightly safer to let that logic mark the boolean,
 		 * especially since it's closer to the return of this function.
 		 */		 
-
+			
 		if (state->cbs_extended) {
 			/* The last record has not been returned yet, so we 
 			 * want to stop after packing the last item 
@@ -3043,7 +3132,6 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp,
 /*
  * Pack a uio buffer with directory entries from the catalog
  */
-__private_extern__
 int
 cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint,
 				  uio_t uio, int extended, int * items, int * eofflag)
@@ -3087,7 +3175,7 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
 	 * field to track whether or not we've returned EOF from the iterator function.
 	 */
 	state.cbs_eof = false;
-
+	
 	iterator = (BTreeIterator *) ((char *)state.cbs_linkinfo + (maxlinks * sizeof(linkinfo_t)));
 	key = (CatalogKey *)&iterator->key;
 	have_key = 0;
@@ -3215,12 +3303,13 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
 	/* Note that state.cbs_index is still valid on errors */
 	*items = state.cbs_index - index;
 	index = state.cbs_index;
-	
+
 	/*
 	 * Also note that cbs_eof is set in all cases if we ever hit EOF
 	 * during the enumeration by the catalog callback.  Mark the directory's hint
 	 * descriptor as having hit EOF.
 	 */
+
 	if (state.cbs_eof) {
 		dirhint->dh_desc.cd_flags |= CD_EOF;
 		*eofflag = 1;
@@ -3335,7 +3424,6 @@ cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp,
  * The name portion of the key is compared using a 16-bit binary comparison. 
  * This is called from the b-tree code.
  */
-__private_extern__
 int
 cat_binarykeycompare(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey)
 {
@@ -3517,7 +3605,6 @@ buildkey(struct hfsmount *hfsmp, struct cat_desc *descp,
 /*
  * Resolve hard link reference to obtain the inode record.
  */
-__private_extern__
 int
 cat_resolvelink(struct hfsmount *hfsmp, u_int32_t linkref, int isdirlink, struct HFSPlusCatalogFile *recp)
 {
@@ -3657,7 +3744,6 @@ exit:
  * The key's parent id is the only part of the key expected to be used by the caller.
  * The name portion of the key may not always be valid (ie in the case of a hard link).
  */
-__private_extern__
 int
 cat_getkeyplusattr(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key, struct cat_attr *attrp)
 {
@@ -3684,7 +3770,7 @@ cat_getkeyplusattr(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key, struct
 		 * Pick up the first link in the chain and get a descriptor for it.
 		 * This allows blind bulk access checks to work for hardlinks.
 		 */
-		if ((cat_lookuplinkbyid(hfsmp, cnid, &prevlinkid,  &nextlinkid) == 0) &&
+		if ((cat_lookup_siblinglinks(hfsmp, cnid, &prevlinkid,  &nextlinkid) == 0) &&
 		    (nextlinkid != 0)) {
 			if (cat_findname(hfsmp, nextlinkid, &linkdesc) == 0) {
 				key->hfsPlus.parentID = linkdesc.cd_parentcnid;
@@ -4203,3 +4289,99 @@ isadir(const CatalogRecord *crp)
 		crp->recordType == kHFSPlusFolderRecord);
 }
 
+/*
+ * cat_lookup_dirlink - lookup a catalog record for directory hard link 
+ * (not inode) using catalog record id.  Note that this function does 
+ * NOT resolve directory hard link to its directory inode and return 
+ * the link record.
+ *
+ * Note: The caller is responsible for releasing the output catalog 
+ * descriptor (when supplied outdescp is non-null).
+ */
+int
+cat_lookup_dirlink(struct hfsmount *hfsmp, cnid_t dirlink_id, 
+		u_int8_t forktype, struct cat_desc *outdescp, 
+		struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	struct BTreeIterator *iterator = NULL;
+	FSBufferDescriptor btdata;
+	u_int16_t datasize;
+	CatalogKey *keyp;
+	CatalogRecord *recp = NULL;
+	int error;
+
+	/* No directory hard links on standard HFS */
+	if (hfsmp->vcbSigWord == kHFSSigWord) {
+		return ENOTSUP;
+	}
+
+	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
+	if (iterator == NULL) {
+		return ENOMEM;
+	}
+	bzero(iterator, sizeof(*iterator));
+	buildthreadkey(dirlink_id, 1, (CatalogKey *)&iterator->key);
+
+	MALLOC(recp, CatalogRecord *, sizeof(CatalogRecord), M_TEMP, M_WAITOK);
+	if (recp == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
+	BDINIT(btdata, recp);
+
+	error = BTSearchRecord(VTOF(HFSTOVCB(hfsmp)->catalogRefNum), iterator,
+				&btdata, &datasize, iterator);
+	if (error) {
+		goto out;
+	}
+	/* Directory hard links are catalog file record */
+	if (recp->recordType != kHFSPlusFileThreadRecord) {
+		error = ENOENT;
+		goto out;
+	}
+
+	keyp = (CatalogKey *)&recp->hfsPlusThread.reserved;
+	keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength +
+				  (keyp->hfsPlus.nodeName.length * 2);
+	if (forktype == kHFSResourceForkType) {
+		/* Lookup resource fork for directory hard link */
+		error = cat_lookupbykey(hfsmp, keyp, HFS_LOOKUP_HARDLINK, 0, true, outdescp, attrp, forkp, NULL);
+	} else {
+		/* Lookup data fork, if any, for directory hard link */
+		error = cat_lookupbykey(hfsmp, keyp, HFS_LOOKUP_HARDLINK, 0, false, outdescp, attrp, forkp, NULL);
+	}
+	if (error) {
+		printf ("hfs: cat_lookup_dirlink(): Error looking up file record for id=%u (error=%d)\n", dirlink_id, error);
+		hfs_mark_volume_inconsistent(hfsmp);
+		goto out;
+	}
+	/* Just for sanity, make sure that id in catalog record and thread record match */
+	if ((outdescp != NULL) && (dirlink_id != outdescp->cd_cnid)) {
+		printf ("hfs: cat_lookup_dirlink(): Requested cnid=%u != found_cnid=%u\n", dirlink_id, outdescp->cd_cnid);
+		hfs_mark_volume_inconsistent(hfsmp);
+		error = ENOENT;
+	}
+
+out:
+	if (recp) {
+		FREE(recp, M_TEMP);
+	}
+	FREE(iterator, M_TEMP);
+
+	return MacToVFSError(error);
+}
+
+/*
+ * cnode_update_dirlink - update the catalog node for directory hard link 
+ * described by descp using the data from attrp and forkp.
+ */
+int
+cat_update_dirlink(struct hfsmount *hfsmp, u_int8_t forktype, 
+		struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp)
+{
+	if (forktype == kHFSResourceForkType) {
+		return cat_update_internal(hfsmp, true, descp, attrp, NULL, forkp);
+	} else {
+		return cat_update_internal(hfsmp, true, descp, attrp, forkp, NULL);
+	} 
+}
diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h
index 6c1eaa130..e8574e17d 100644
--- a/bsd/hfs/hfs_catalog.h
+++ b/bsd/hfs/hfs_catalog.h
@@ -63,7 +63,7 @@ struct cat_desc {
 	const u_int8_t * cd_nameptr; /* pointer to cnode name */
 };
 
-/* cd_flags 
+/* cd_flags
  *
  * CD_EOF is used by hfs_vnop_readdir / cat_getdirentries to indicate EOF was
  * encountered during a directory enumeration.  When this flag is observed
@@ -258,6 +258,11 @@ union CatalogRecord {
 };
 typedef union CatalogRecord  CatalogRecord;
 
+/* Constants for HFS fork types */
+enum {
+	kHFSDataForkType = 0x0, 	/* data fork */
+	kHFSResourceForkType = 0xff	/* resource fork */
+};
 
 /*
  * Catalog Interface
@@ -404,7 +409,7 @@ enum {
 extern int cat_deletelink( struct hfsmount *hfsmp,
                            struct cat_desc *descp);
 
-extern int cat_updatelink( struct hfsmount *hfsmp,
+extern int cat_update_siblinglinks( struct hfsmount *hfsmp,
                            cnid_t linkfileid,
                            cnid_t prevlinkid,
                            cnid_t nextlinkid);
@@ -415,11 +420,23 @@ extern int cat_lookuplink( struct hfsmount *hfsmp,
                            cnid_t *prevlinkid,
                            cnid_t *nextlinkid);
 
-extern int cat_lookuplinkbyid( struct hfsmount *hfsmp,
+extern int cat_lookup_siblinglinks( struct hfsmount *hfsmp,
                                cnid_t linkfileid,
                                cnid_t *prevlinkid,
                                cnid_t *nextlinkid);
 
+extern int cat_lookup_dirlink(struct hfsmount *hfsmp, 
+			     cnid_t dirlink_id, 
+			     u_int8_t forktype, 
+			     struct cat_desc *outdescp, 
+			     struct cat_attr *attrp, 
+			     struct cat_fork *forkp);
+
+extern int cat_update_dirlink(struct hfsmount *hfsmp, 
+			      u_int8_t forktype, 
+			      struct cat_desc *descp, 
+			      struct cat_attr *attrp, 
+			      struct cat_fork *rsrcforkp);
 
 #endif /* __APPLE_API_PRIVATE */
 #endif /* KERNEL */
diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c
index b2db58e75..997d247ae 100644
--- a/bsd/hfs/hfs_chash.c
+++ b/bsd/hfs/hfs_chash.c
@@ -146,9 +146,8 @@ hfs_delete_chash(struct hfsmount *hfsmp)
  *
  * If it is in core, but locked, wait for it.
  */
-__private_extern__
 struct vnode *
-hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock)
+hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock, int allow_deleted)
 {
 	struct cnode *cp;
 	struct vnode *vp;
@@ -201,13 +200,15 @@ loop:
 		 * lock on the cnode which would allow the node to be
 		 * unlinked
 		 */
-		if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
-			if (!skiplock)
-		        	hfs_unlock(cp);
-			vnode_put(vp);
-
-			return (NULL);
-		}			
+		if (!allow_deleted) {
+			if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+				if (!skiplock)
+						hfs_unlock(cp);
+				vnode_put(vp);
+	
+				return (NULL);
+			}
+		}
 		return (vp);
 	}
 exit:
@@ -218,8 +219,12 @@ exit:
 
 /*
  * Use the device, fileid pair to snoop an incore cnode.
+ *
+ * A cnode can exists in chash even after it has been 
+ * deleted from the catalog, so this function returns 
+ * ENOENT if C_NOEXIST is set in the cnode's flag.
+ * 
  */
-__private_extern__
 int
 hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int (*callout)(const struct cat_desc *,
                 const struct cat_attr *, void *), void * arg)
@@ -237,6 +242,10 @@ hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int (*callout)(const struct
 	for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
 		if (cp->c_fileid != inum)
 			continue;
+	       /* Skip cnodes that have been removed from the catalog */
+		if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
+			break;
+		}
 		/* Skip cnodes being created or reclaimed. */
 		if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
 			result = callout(&cp->c_desc, &cp->c_attr, arg);
@@ -257,10 +266,16 @@ hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int (*callout)(const struct
  *
  * If the cnode is C_DELETED, then return NULL since that 
  * inum is no longer valid for lookups (open-unlinked file).
+ *
+ * If the cnode is C_DELETED but also marked C_RENAMED, then that means
+ * the cnode was renamed over and a new entry exists in its place.  The caller
+ * should re-drive the lookup to get the newer entry.  In that case, we'll still
+ * return NULL for the cnode, but also return GNV_CHASH_RENAMED in the output flags
+ * of this function to indicate the caller that they should re-drive.
  */
-__private_extern__
 struct cnode *
-hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock)
+hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, 
+				   int wantrsrc, int skiplock, int *out_flags, int *hflags)
 {
 	struct cnode	*cp;
 	struct cnode	*ncp = NULL;
@@ -295,6 +310,7 @@ loop_with_lock:
 			 * The desired vnode isn't there so tag the cnode.
 			 */
 			SET(cp->c_hflag, H_ATTACH);
+			*hflags |= H_ATTACH;
 
 			hfs_chash_unlock(hfsmp);
 		} else {
@@ -311,7 +327,7 @@ loop_with_lock:
 			 * this cnode and add it to the hash
 			 * just dump our allocation
 			 */
-		        FREE_ZONE(ncp, sizeof(struct cnode), M_HFSNODE);
+		    FREE_ZONE(ncp, sizeof(struct cnode), M_HFSNODE);
 			ncp = NULL;
 		}
 
@@ -330,13 +346,19 @@ loop_with_lock:
 		 * is no longer valid for lookups.
 		 */
 		if ((cp->c_flag & (C_NOEXISTS | C_DELETED)) && !wantrsrc) {
+			int renamed = 0;
+			if (cp->c_flag & C_RENAMED) {
+				renamed = 1;
+			}
 			if (!skiplock)
 				hfs_unlock(cp);
 			if (vp != NULLVP) {
 				vnode_put(vp);
 			} else {
 				hfs_chash_lock_spin(hfsmp);
-		        	CLR(cp->c_hflag, H_ATTACH);
+		        CLR(cp->c_hflag, H_ATTACH);
+				*hflags &= ~H_ATTACH;
+
 				if (ISSET(cp->c_hflag, H_WAITING)) {
 					CLR(cp->c_hflag, H_WAITING);
 					wakeup((caddr_t)cp);
@@ -345,6 +367,9 @@ loop_with_lock:
 			}
 			vp = NULL;
 			cp = NULL;
+			if (renamed) {
+				*out_flags = GNV_CHASH_RENAMED;
+			}
 		}
 		*vpp = vp;
 		return (cp);
@@ -358,8 +383,7 @@ loop_with_lock:
 
 	if (ncp == NULL) {
 		hfs_chash_unlock(hfsmp);
-
-	        MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK);
+	    MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK);
 		/*
 		 * since we dropped the chash lock, 
 		 * we need to go back and re-verify
@@ -372,6 +396,7 @@ loop_with_lock:
 
 	bzero(ncp, sizeof(struct cnode));
 	SET(ncp->c_hflag, H_ALLOC);
+	*hflags |= H_ALLOC;
 	ncp->c_fileid = inum;
 	TAILQ_INIT(&ncp->c_hintlist); /* make the list empty */
 	TAILQ_INIT(&ncp->c_originlist);
diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c
index c17c8d4dd..8703ecb9b 100644
--- a/bsd/hfs/hfs_cnode.c
+++ b/bsd/hfs/hfs_cnode.c
@@ -36,6 +36,7 @@
 #include <sys/ubc.h>
 #include <sys/quota.h>
 #include <sys/kdebug.h>
+#include <libkern/OSByteOrder.h>
 
 #include <kern/locks.h>
 
@@ -46,6 +47,7 @@
 #include <hfs/hfs_catalog.h>
 #include <hfs/hfs_cnode.h>
 #include <hfs/hfs_quota.h>
+#include <hfs/hfs_format.h>
 
 extern int prtactive;
 
@@ -53,88 +55,192 @@ extern lck_attr_t *  hfs_lock_attr;
 extern lck_grp_t *  hfs_mutex_group;
 extern lck_grp_t *  hfs_rwlock_group;
 
-static int  hfs_filedone(struct vnode *vp, vfs_context_t context);
-
 static void  hfs_reclaim_cnode(struct cnode *);
-
+static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
 static int hfs_isordered(struct cnode *, struct cnode *);
 
-inline int hfs_checkdeleted (struct cnode *cp) {
-	return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
+__inline__ int hfs_checkdeleted (struct cnode *cp) {
+	return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);	
 }
 
 
 /*
- * Last reference to an cnode.  If necessary, write or delete it.
+ * Function used by a special fcntl() that decorates a cnode/vnode that
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' indicates whether or not to set the bit in the cnode flags
+ * 
+ * Returns non-zero on failure. 0 on success 
  */
-__private_extern__
-int
-hfs_vnop_inactive(struct vnop_inactive_args *ap)
-{
-	struct vnode *vp = ap->a_vp;
-	struct cnode *cp;
-	struct hfsmount *hfsmp = VTOHFS(vp);
-	struct proc *p = vfs_context_proc(ap->a_context);
-	int error = 0;
-	int recycle = 0;
-	int forkcount = 0;
-	int truncated = 0;
-	int started_tr = 0;
-	int took_trunc_lock = 0;
-	cat_cookie_t cookie;
-	int cat_reserve = 0;
-	int lockflags;
-	enum vtype v_type;
-
-	v_type = vnode_vtype(vp);
+int hfs_set_backingstore (struct vnode *vp, int val) {
+	struct cnode *cp = NULL;
+	int err = 0;
+	
 	cp = VTOC(vp);
-
-	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
-	    (hfsmp->hfs_freezing_proc == p)) {
-		return (0);
+	if (vnode_isdir(vp)) {
+		return EINVAL;
 	}
 
-	/*
-	 * Ignore nodes related to stale file handles.
-	 * We are peeking at the cnode flag without the lock, but if C_NOEXISTS
-	 * is set, that means the cnode doesn't have any backing store in the 
-	 * catalog anymore, and is otherwise safe to force a recycle
-	 */
+	/* lock the cnode */
+	err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
+	if (err) {
+		return err;
+	}
 	
-	if (cp->c_flag & C_NOEXISTS) {
-		vnode_recycle(vp);
-		return (0);
+	if (val) {
+		cp->c_flag |= C_BACKINGSTORE;
+	}
+	else {
+		cp->c_flag &= ~C_BACKINGSTORE;
 	}
 
-	if ((v_type == VREG || v_type == VLNK)) {
-		hfs_lock_truncate(cp, TRUE);
-		took_trunc_lock = 1;
+	/* unlock everything */
+	hfs_unlock (cp);
+
+	return err;
+}
+
+/*
+ * Function used by a special fcntl() that check to see if a cnode/vnode
+ * indicates it is backing another filesystem, like a disk image.
+ *
+ * the argument 'val' is an output argument for whether or not the bit is set
+ * 
+ * Returns non-zero on failure. 0 on success 
+ */
+
+int hfs_is_backingstore (struct vnode *vp, int *val) {
+	struct cnode *cp = NULL;
+	int err = 0;
+
+	if (!vnode_isreg(vp)) {
+		*val = 0;
+		return 0;
 	}
 
-	(void) hfs_lock(cp, HFS_FORCE_LOCK);
+	cp = VTOC(vp);
+
+	/* lock the cnode */
+	err = hfs_lock (cp, HFS_SHARED_LOCK);
+	if (err) {
+		return err;
+	}
+
+	if (cp->c_flag & C_BACKINGSTORE) {
+		*val = 1;
+	}	
+	else {
+		*val = 0;
+	}
+
+	/* unlock everything */
+	hfs_unlock (cp);
 
-	if (cp->c_datafork)
+	return err;
+}
+
+
+/*
+ * hfs_cnode_teardown
+ *
+ * This is an internal function that is invoked from both hfs_vnop_inactive
+ * and hfs_vnop_reclaim.  As VNOP_INACTIVE is not necessarily called from vnodes
+ * being recycled and reclaimed, it is important that we do any post-processing
+ * necessary for the cnode in both places.  Important tasks include things such as
+ * releasing the blocks from an open-unlinked file when all references to it have dropped,
+ * and handling resource forks separately from data forks.
+ *
+ * Note that we take only the vnode as an argument here (rather than the cnode).
+ * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
+ * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
+ * vnode we need to reclaim if only the cnode is supplied. 
+ *
+ * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
+ * if both are invoked right after the other.  In the second call, most of this function's if()
+ * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.  
+ * As a quick check to see if this function is necessary, determine if the cnode is already
+ * marked C_NOEXISTS.  If it is, then it is safe to skip this function.  The only tasks that 
+ * remain for cnodes marked in such a fashion is to teardown their fork references and 
+ * release all directory hints and hardlink origins.  However, both of those are done 
+ * in hfs_vnop_reclaim.  hfs_update, by definition, is not necessary if the cnode's catalog
+ * entry is no longer there.  
+ *
+ * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim.  If we are
+ * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info 
+ * is totally gone by that point.
+ *
+ * Assumes that both truncate and cnode locks for 'cp' are held.
+ */
+static 
+int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
+	
+	int forkcount = 0;
+	enum vtype v_type;
+	struct cnode *cp;
+	int error = 0;
+	int started_tr = 0;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct proc *p = vfs_context_proc(ctx);
+	int truncated = 0;
+    cat_cookie_t cookie;
+    int cat_reserve = 0;
+    int lockflags;
+	int ea_error = 0;
+	
+	v_type = vnode_vtype(vp);
+	cp = VTOC(vp);
+	
+	if (cp->c_datafork) {
 		++forkcount;
-	if (cp->c_rsrcfork)
+	}
+	if (cp->c_rsrcfork) {
 		++forkcount;
-
+	}
+	
+	
 	/*
-	 * We should lock cnode before checking the flags in the 
-	 * condition below and should unlock the cnode before calling 
-	 * ubc_setsize() as cluster code can call other HFS vnops which
-	 * will try to acquire the same cnode lock and cause deadlock.
-	 * Only call ubc_setsize to 0 if we are the last fork.
-	 */
-	if ((v_type == VREG || v_type == VLNK) &&
+	 * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
+	 * The dirty regions would have already been synced to disk, so informing UBC
+	 * that they can toss the pages doesn't help anyone at this point. 
+	 * 
+	 * Note that this is a performance problem if the vnode goes straight to reclaim
+	 * (and skips inactive), since there would be no way for anyone to notify the UBC
+	 * that all pages in this file are basically useless.
+	 */	
+	if (reclaim == 0) {
+		/*
+		 * Check whether we are tearing down a cnode with only one remaining fork.
+		 * If there are blocks in its filefork, then we need to unlock the cnode
+		 * before calling ubc_setsize.  The cluster layer may re-enter the filesystem
+		 * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
+		 * panic.  
+		 */
+		
+		if ((v_type == VREG || v_type == VLNK) &&
 			(cp->c_flag & C_DELETED) &&
 			(VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
-		hfs_unlock(cp); 
-		ubc_setsize(vp, 0);
-		(void) hfs_lock(cp, HFS_FORCE_LOCK);
+			hfs_unlock(cp); 
+			/* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
+			ubc_setsize(vp, 0);
+			(void) hfs_lock(cp, HFS_FORCE_LOCK);
+		}	
 	}
-
-	if (v_type == VREG && !ISSET(cp->c_flag, C_DELETED) && VTOF(vp)->ff_blocks) {
-		hfs_filedone(vp, ap->a_context);
+	
+	/* 
+	 * Push file data out for normal files that haven't been evicted from 
+	 * the namespace.  We only do this if this function was not called from reclaim,
+	 * because by that point the UBC information has been totally torn down.  
+	 * 
+	 * There should also be no way that a normal file that has NOT been deleted from 
+	 * the namespace to skip INACTIVE and go straight to RECLAIM.  That race only happens
+	 * when the file becomes open-unlinked. 
+	 */
+	if ((v_type == VREG) && 
+		(!ISSET(cp->c_flag, C_DELETED)) && 
+		(!ISSET(cp->c_flag, C_NOEXISTS)) &&
+		(VTOF(vp)->ff_blocks) &&
+		(reclaim == 0)) {
+		hfs_filedone(vp, ctx);
 	}
 	/* 
 	 * Remove any directory hints or cached origins
@@ -145,12 +251,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 	if (cp->c_flag & C_HARDLINK) {
 		hfs_relorigins(cp);
 	}
-
-	/* Hurry the recycling process along if we're an open-unlinked file */
-	if((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) {
-		recycle = 1;	
-	}
-
+	
 	/*
 	 * This check is slightly complicated.  We should only truncate data 
 	 * in very specific cases for open-unlinked files.  This is because
@@ -162,7 +263,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 	 * If we're the last fork, then we have cleaning up to do.
 	 * 
 	 * A) last fork, and vp == c_vp
-	 *	Truncate away own fork dat. If rsrc fork is not in core, truncate it too.
+	 *	Truncate away own fork data. If rsrc fork is not in core, truncate it too.
 	 *
 	 * B) last fork, and vp == c_rsrc_vp
 	 *	Truncate ourselves, assume data fork has been cleaned due to C).
@@ -177,192 +278,320 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
 	 * D) not the last fork, vp == c_rsrc_vp
 	 *	Don't enter the block below, just clean up vnode and push it out of core.
 	 */
-
-	if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED) &&
-			((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
+	
+	if ((v_type == VREG || v_type == VLNK) && 
+		(cp->c_flag & C_DELETED) &&
+		((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
+		
+		/* Truncate away our own fork data. (Case A, B, C above) */
 		if (VTOF(vp)->ff_blocks != 0) {
 			/*
 			 * Since we're already inside a transaction,
 			 * tell hfs_truncate to skip the ubc_setsize.
+			 *
+			 * This truncate call (and the one below) is fine from VNOP_RECLAIM's 
+			 * context because we're only removing blocks, not zero-filling new 
+			 * ones.  The C_DELETED check above makes things much simpler. 
 			 */
-			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ap->a_context);
-			if (error)
+			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+			if (error) {
 				goto out;
+			}
 			truncated = 1;
 		}
-
+		
 		/* 
-		 * If c_blocks > 0 and we are the last fork (data fork), then
-		 * we can go and and truncate away the rsrc fork blocks if
-		 * they were not in core.
+		 * Truncate away the resource fork, if we represent the data fork and
+		 * it is the last fork.  That means, by definition, the rsrc fork is not in 
+		 * core.  So we bring it into core, and then truncate it away. 
+		 * 
+		 * This is invoked via case A above only.
 		 */
 		if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
 			struct vnode *rvp = NULLVP;
-
+			
+			/* 
+			 * It is safe for us to pass FALSE to the argument can_drop_lock
+			 * on this call to hfs_vgetrsrc.  We know that the resource fork does not 
+			 * exist in core, so we'll have to go to the catalog to retrieve its 
+			 * information.  That will attach the resource fork vnode to our cnode. 
+			 */
 			error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE, FALSE);
-			if (error)
+			if (error) {
 				goto out;
+			}
 			/*
 			 * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock().
+			 * 
+			 * By bringing the vnode into core above, we may force hfs_vnop_reclaim 
+			 * to only partially finish if that's what called us.  Bringing the 
+			 * resource fork into core results in a new rsrc vnode that will get 
+			 * immediately marked for termination below.  It will get recycled/reclaimed 
+			 * as soon as possible, but that could cause another round of inactive and reclaim. 
 			 */
 			cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE;
-			error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ap->a_context);
-			if (error)
+			error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ctx);
+			if (error) {
 				goto out;
+			}
+			
+			/* 
+			 * Note that the following call to vnode_recycle is safe from within the
+			 * context of hfs_vnop_inactive or hfs_vnop_reclaim.  It is being invoked
+			 * on the RSRC fork vp (which is not our current vnode) As such, we hold 
+			 * an iocount on it and vnode_recycle will just add the MARKTERM bit at this
+			 * point.
+			 */
 			vnode_recycle(rvp);  /* all done with this vnode */
 		}
 	}
-
-	// If needed, get rid of any xattrs that this file (or directory) may have.
-	// Note that this must happen outside of any other transactions
-	// because it starts/ends its own transactions and grabs its
-	// own locks.  This is to prevent a file with a lot of attributes
-	// from creating a transaction that is too large (which panics).
-	//
-	if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && 
-			(cp->c_flag & C_DELETED) && (forkcount <= 1)) {
-		hfs_removeallattr(hfsmp, cp->c_fileid);
-	}
-
+	
 	/*
-	 * Check for a postponed deletion.
-	 * (only delete cnode when the last fork goes inactive)
+	 * If we represent the last fork (or none in the case of a dir), 
+	 * and the cnode has become open-unlinked,
+	 * AND it has EA's, then we need to get rid of them.
+	 *
+	 * Note that this must happen outside of any other transactions
+	 * because it starts/ends its own transactions and grabs its
+	 * own locks.  This is to prevent a file with a lot of attributes
+	 * from creating a transaction that is too large (which panics).
 	 */
-	if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {			
-		/*
-		 * Mark cnode in transit so that no one can get this 
-		 * cnode from cnode hash.
-		 */
-	        // hfs_chash_mark_in_transit(hfsmp, cp);
-	        // XXXdbg - remove the cnode from the hash table since it's deleted
-	        //          otherwise someone could go to sleep on the cnode and not
-	        //          be woken up until this vnode gets recycled which could be
-	        //          a very long time...
-		hfs_chashremove(hfsmp, cp);
-
-		cp->c_flag |= C_NOEXISTS;   // XXXdbg
-		cp->c_rdev = 0;
-
-		if (started_tr == 0) {
-		    if (hfs_start_transaction(hfsmp) != 0) {
-			error = EINVAL;
-			goto out;
-		    }
-		    started_tr = 1;
-		}
+    if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
+		(cp->c_flag & C_DELETED) && 
+		(forkcount <= 1)) {
 		
-		/*
-		 * Reserve some space in the Catalog file.
-		 */
-		if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+        ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
+    }
+	
+	
+	/*
+	 * If the cnode represented an open-unlinked file, then now
+	 * actually remove the cnode's catalog entry and release all blocks
+	 * it may have been using.  
+	 */
+    if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
+        /*
+         * Mark cnode in transit so that no one can get this 
+         * cnode from cnode hash.
+         */
+		// hfs_chash_mark_in_transit(hfsmp, cp);
+		// XXXdbg - remove the cnode from the hash table since it's deleted
+		//          otherwise someone could go to sleep on the cnode and not
+		//          be woken up until this vnode gets recycled which could be
+		//          a very long time...
+        hfs_chashremove(hfsmp, cp);
+		
+        cp->c_flag |= C_NOEXISTS;   // XXXdbg
+        cp->c_rdev = 0;
+		
+        if (started_tr == 0) {
+            if (hfs_start_transaction(hfsmp) != 0) {
+				error = EINVAL;
+				goto out;
+            }
+            started_tr = 1;
+        }
+		
+        /*
+         * Reserve some space in the Catalog file.
+         */
+        if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
+            goto out;
+        }
+        cat_reserve = 1;
+		
+        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+		
+        if (cp->c_blocks > 0) {
+            printf("hfs_inactive: deleting non-empty%sfile %d, "
+                   "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
+                   (int)cp->c_fileid, (int)cp->c_blocks);
+        }
+		
+		//
+        // release the name pointer in the descriptor so that
+        // cat_delete() will use the file-id to do the deletion.
+        // in the case of hard links this is imperative (in the
+        // case of regular files the fileid and cnid are the
+        // same so it doesn't matter).
+        //
+        cat_releasedesc(&cp->c_desc);
+		
+        /*
+         * The descriptor name may be zero,
+         * in which case the fileid is used.
+         */
+        error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+		
+        if (error && truncated && (error != ENXIO))
+            printf("hfs_inactive: couldn't delete a truncated file!");
+		
+        /* Update HFS Private Data dir */
+        if (error == 0) {
+            hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
+            if (vnode_isdir(vp)) {
+                DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+            }
+            (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
+							 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+        }
+		
+        hfs_systemfile_unlock(hfsmp, lockflags);
+		
+        if (error) {			
 			goto out;
 		}
-		cat_reserve = 1;
-
-		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-
-		if (cp->c_blocks > 0) {
-			printf("hfs_inactive: deleting non-empty%sfile %d, "
-			       "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
-			       (int)cp->c_fileid, (int)cp->c_blocks);
-		}
-
-		//
-		// release the name pointer in the descriptor so that
-		// cat_delete() will use the file-id to do the deletion.
-		// in the case of hard links this is imperative (in the
-		// case of regular files the fileid and cnid are the
-		// same so it doesn't matter).
-		//
-		cat_releasedesc(&cp->c_desc);
 		
-		/*
-		 * The descriptor name may be zero,
-		 * in which case the fileid is used.
-		 */
-		error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
+#if QUOTA
+        if (hfsmp->hfs_flags & HFS_QUOTAS)
+            (void)hfs_chkiq(cp, -1, NOCRED, 0);
+#endif /* QUOTA */
 		
-		if (error && truncated && (error != ENXIO))
-			printf("hfs_inactive: couldn't delete a truncated file!");
-
-  		/* Update HFS Private Data dir */
-		if (error == 0) {
-			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-			if (vnode_isdir(vp)) {
-				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
+        /* Already set C_NOEXISTS at the beginning of this block */
+        cp->c_flag &= ~C_DELETED;
+        cp->c_touch_chgtime = TRUE;
+        cp->c_touch_modtime = TRUE;
+		
+        if (error == 0)
+            hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
+    }
+	
+	/*
+     * A file may have had delayed allocations, in which case hfs_update
+     * would not have updated the catalog record (cat_update).  We need
+     * to do that now, before we lose our fork data.  We also need to
+     * force the update, or hfs_update will again skip the cat_update.
+	 *
+	 * If the file has C_NOEXISTS set, then we can skip the hfs_update call
+	 * because the catalog entry has already been removed.  There would be no point
+     * to looking up the entry in the catalog to modify it when we already know it's gone
+	 */
+    if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
+		((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || 
+		 cp->c_touch_chgtime || cp->c_touch_modtime)) {
+			
+			if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
+				cp->c_flag |= C_FORCEUPDATE;
 			}
-			(void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
-				&hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
+			hfs_update(vp, 0);
 		}
+	
+out:
+    if (cat_reserve)
+        cat_postflight(hfsmp, &cookie, p);
+	
+    // XXXdbg - have to do this because a goto could have come here
+    if (started_tr) {
+        hfs_end_transaction(hfsmp);
+        started_tr = 0;
+    }
+	
+	
+	return error;	
+}
 
-		hfs_systemfile_unlock(hfsmp, lockflags);
-
-		if (error)
-			goto out;
 
-#if QUOTA
-		if (hfsmp->hfs_flags & HFS_QUOTAS)
-			(void)hfs_chkiq(cp, -1, NOCRED, 0);
-#endif /* QUOTA */
-		
-		/* Already set C_NOEXISTS at the beginning of this block */
-		cp->c_flag &= ~C_DELETED;
-		cp->c_touch_chgtime = TRUE;
-		cp->c_touch_modtime = TRUE;
 
-		if (error == 0)
-			hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
-	}
+/*
+ * hfs_vnop_inactive
+ *
+ * The last usecount on the vnode has gone away, so we need to tear down
+ * any remaining data still residing in the cnode.  If necessary, write out
+ * remaining blocks or delete the cnode's entry in the catalog.
+ */
+int
+hfs_vnop_inactive(struct vnop_inactive_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct cnode *cp;
+	struct hfsmount *hfsmp = VTOHFS(vp);
+	struct proc *p = vfs_context_proc(ap->a_context);
+	int error = 0;
+	int took_trunc_lock = 0;
+	enum vtype v_type;
+	
+	v_type = vnode_vtype(vp);
+	cp = VTOC(vp);
 
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
+	    (hfsmp->hfs_freezing_proc == p)) {
+		error = 0;
+		goto inactive_done;
+	}	
+	
 	/*
-	 * A file may have had delayed allocations, in which case hfs_update
-	 * would not have updated the catalog record (cat_update).  We need
-	 * to do that now, before we lose our fork data.  We also need to
-	 * force the update, or hfs_update will again skip the cat_update.
+	 * For safety, do NOT call vnode_recycle from inside this function.  This can cause 
+	 * problems in the following scenario:
+	 * 
+	 * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
+	 * 
+	 * If we're being invoked as a result of a reclaim that was already in-flight, then we
+	 * cannot call vnode_recycle again.  Being in reclaim means that there are no usecounts or
+	 * iocounts by definition.  As a result, if we were to call vnode_recycle, it would immediately
+	 * try to re-enter reclaim again and panic.  
+	 *
+	 * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
+	 * 1) last usecount goes away on the vnode (vnode_rele)
+	 * 2) last iocount goes away on a vnode that previously had usecounts but didn't have 
+	 * 		vnode_recycle called (vnode_put)
+	 * 3) vclean by way of reclaim
+	 *
+	 * In this function we would generally want to call vnode_recycle to speed things 
+	 * along to ensure that we don't leak blocks due to open-unlinked files.  However, by 
+	 * virtue of being in this function already, we can call hfs_cnode_teardown, which 
+	 * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that 
+	 * there's no entry in the catalog and no backing store anymore.  If that's the case, 
+	 * then we really don't care all that much when the vnode actually goes through reclaim.
+	 * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
+	 * unlinked file in the first place should have already called vnode_recycle on the vnode
+	 * to guarantee that it would go through reclaim in a speedy way.
 	 */
-	if ((cp->c_flag & C_MODIFIED) ||
-	    cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
-	    if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
-			cp->c_flag |= C_FORCEUPDATE;
-		}	
-		hfs_update(vp, 0);
+	
+	if (cp->c_flag & C_NOEXISTS) {
+		/* 
+		 * If the cnode has already had its cat entry removed, then 
+		 * just skip to the end. We don't need to do anything here.
+		 */
+		error = 0;
+		goto inactive_done;
 	}
-out:
-	if (cat_reserve)
-		cat_postflight(hfsmp, &cookie, p);
-
-	// XXXdbg - have to do this because a goto could have come here
-	if (started_tr) {
-	    hfs_end_transaction(hfsmp);
-	    started_tr = 0;
+	
+	if ((v_type == VREG || v_type == VLNK)) {
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+		took_trunc_lock = 1;
 	}
+	
+	(void) hfs_lock(cp, HFS_FORCE_LOCK);
+	
 	/* 
-	 * This has been removed from the namespace and has no backing store
-	 * in the catalog, so we should force a reclaim as soon as possible.
-	 * Also, we want to check the flag while we still have the cnode lock.
+	 * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
+	 * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
 	 */
-	if (cp->c_flag & C_NOEXISTS) 
-		recycle = 1;
+	error = hfs_cnode_teardown (vp, ap->a_context, 0);
 
+    /*
+     * Drop the truncate lock before unlocking the cnode
+     * (which can potentially perform a vnode_put and
+     * recycle the vnode which in turn might require the
+     * truncate lock)
+     */
+	if (took_trunc_lock) {
+	    hfs_unlock_truncate(cp, 0);
+	}
+	
 	hfs_unlock(cp);
-
-	if (took_trunc_lock)
-	    hfs_unlock_truncate(cp, TRUE);
-
-	/*
-	 * If we are done with the vnode, reclaim it
-	 * so that it can be reused immediately.
-	 */
-	if (recycle)
-		vnode_recycle(vp);
-
-	return (error);
+	
+inactive_done: 
+	
+	return error;
 }
 
+
 /*
  * File clean-up (zero fill and shrink peof).
  */
-static int
+
+int
 hfs_filedone(struct vnode *vp, vfs_context_t context)
 {
 	struct cnode *cp;
@@ -371,6 +600,8 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	struct rl_entry *invalid_range;
 	off_t leof;
 	u_int32_t blks, blocksize;
+	int cluster_flags = IO_CLOSE;
+	int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
 
 	cp = VTOC(vp);
 	fp = VTOF(vp);
@@ -380,8 +611,18 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
 		return (0);
 
+	/* 
+	 * If we are being invoked from F_SWAPDATAEXTENTS, then we 
+	 * need to issue synchronous IO; Unless we are sure that all 
+	 * of the data has been written to the disk, we won't know 
+	 * that all of the blocks have been allocated properly.
+	 */
+	if (cp->c_flag & C_SWAPINPROGRESS) {
+		cluster_flags |= IO_SYNC;
+	}
+
 	hfs_unlock(cp);
-	(void) cluster_push(vp, IO_CLOSE);
+	(void) cluster_push(vp, cluster_flags);
 	hfs_lock(cp, HFS_FORCE_LOCK);
 
 	/*
@@ -400,8 +641,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 
 		hfs_unlock(cp);
 		(void) cluster_write(vp, (struct uio *) 0,
-				     leof, end + 1, start, (off_t)0,
-				     IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
+				     leof, end + 1, start, (off_t)0, cluster_zero_flags);
 		hfs_lock(cp, HFS_FORCE_LOCK);
 		cp->c_flag |= C_MODIFIED;
 	}
@@ -417,7 +657,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 	if (blks < fp->ff_blocks)
 		(void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
 	hfs_unlock(cp);
-	(void) cluster_push(vp, IO_CLOSE);
+	(void) cluster_push(vp, cluster_flags);
 	hfs_lock(cp, HFS_FORCE_LOCK);
 	
 	/*
@@ -435,7 +675,6 @@ hfs_filedone(struct vnode *vp, vfs_context_t context)
 /*
  * Reclaim a cnode so that it can be used for other purposes.
  */
-__private_extern__
 int
 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 {
@@ -444,23 +683,30 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 	struct filefork *fp = NULL;
 	struct filefork *altfp = NULL;
 	struct hfsmount *hfsmp = VTOHFS(vp);
+	vfs_context_t ctx = ap->a_context;
 	int reclaim_cnode = 0;
-
-	(void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
+	int err = 0;
+	enum vtype v_type;
+	
+	v_type = vnode_vtype(vp);
 	cp = VTOC(vp);
 	
-	/*
-	 * A file may have had delayed allocations, in which case hfs_update
-	 * would not have updated the catalog record (cat_update).  We need
-	 * to do that now, before we lose our fork data.  We also need to
-	 * force the update, or hfs_update will again skip the cat_update.
+	/* 
+	 * We don't take the truncate lock since by the time reclaim comes along,
+	 * all dirty pages have been synced and nobody should be competing
+	 * with us for this thread.
 	 */
-	if ((cp->c_flag & C_MODIFIED) ||
-	    cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
-	    if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
-			cp->c_flag |= C_FORCEUPDATE;
-		}
-		hfs_update(vp, 0);
+	(void) hfs_lock (cp, HFS_FORCE_LOCK);
+
+	/* 
+	 * Sync to disk any remaining data in the cnode/vnode.  This includes
+	 * a call to hfs_update if the cnode has outbound data.
+	 * 
+	 * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
+	 * because the catalog entry for this cnode is already gone.
+	 */
+	if (!ISSET(cp->c_flag, C_NOEXISTS)) {
+		err = hfs_cnode_teardown(vp, ctx, 1);
 	}
 
 	/*
@@ -525,7 +771,12 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
 	if (reclaim_cnode) {
 		hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
 		hfs_reclaim_cnode(cp);
-	} else /* cnode in use */ {
+	} 
+	else  {
+		/* 
+		 * cnode in use.  If it is a directory, it could have 
+		 * no live forks. Just release the lock.
+		 */
 		hfs_unlock(cp);
 	}
 
@@ -546,7 +797,6 @@ extern int (**hfs_fifoop_p)  (void *);
  *
  * The vnode is returned with an iocount and the cnode locked
  */
-__private_extern__
 int
 hfs_getnewvnode(
 	struct hfsmount *hfsmp,
@@ -556,7 +806,8 @@ hfs_getnewvnode(
 	int flags,
 	struct cat_attr *attrp,
 	struct cat_fork *forkp,
-	struct vnode **vpp)
+	struct vnode **vpp,
+	int *out_flags)
 {
 	struct mount *mp = HFSTOVFS(hfsmp);
 	struct vnode *vp = NULL;
@@ -568,12 +819,13 @@ hfs_getnewvnode(
 	int retval;
 	int issystemfile;
 	int wantrsrc;
+	int hflags = 0;
 	struct vnode_fsparam vfsp;
 	enum vtype vtype;
 #if QUOTA
 	int i;
 #endif /* QUOTA */
-
+	
 	hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
 
 	if (attrp->ca_fileid == 0) {
@@ -591,6 +843,9 @@ hfs_getnewvnode(
 	issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
 	wantrsrc = flags & GNV_WANTRSRC;
 
+	/* Zero out the out_flags */
+	*out_flags = 0;
+
 #ifdef HFS_CHECK_LOCK_ORDER
 	/*
 	 * The only case were its permissible to hold the parent cnode
@@ -607,7 +862,8 @@ hfs_getnewvnode(
 	/*
 	 * Get a cnode (new or existing)
 	 */
-	cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, (flags & GNV_SKIPLOCK));
+	cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, 
+							(flags & GNV_SKIPLOCK), out_flags, &hflags);
 
 	/*
 	 * If the id is no longer valid for lookups we'll get back a NULL cp.
@@ -615,20 +871,76 @@ hfs_getnewvnode(
 	if (cp == NULL) {
 		return (ENOENT);
 	}
-
+	
 	/* 
-	 * Hardlinks may need an updated catalog descriptor.  However, if
-	 * the cnode has already been marked as open-unlinked (C_DELETED), then don't
-	 * replace its descriptor. 
+	 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the 
+	 * descriptor in the cnode as needed if the cnode represents a hardlink.  
+	 * We want the caller to get the most up-to-date copy of the descriptor
+	 * as possible. However, we only do anything here if there was a valid vnode.
+	 * If there isn't a vnode, then the cnode is brand new and needs to be initialized
+	 * as it doesn't have a descriptor or cat_attr yet.
+	 * 
+	 * If we are about to replace the descriptor with the user-supplied one, then validate
+	 * that the descriptor correctly acknowledges this item is a hardlink.  We could be
+	 * subject to a race where the calling thread invoked cat_lookup, got a valid lookup 
+	 * result but the file was not yet a hardlink. With sufficient delay between there
+	 * and here, we might accidentally copy in the raw inode ID into the descriptor in the
+	 * call below.  If the descriptor's CNID is the same as the fileID then it must
+	 * not yet have been a hardlink when the lookup occurred.
 	 */
+	
 	if (!(hfs_checkdeleted(cp))) {
 		if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
-			replace_desc(cp, descp);
+			/* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
+			if ((descp->cd_cnid == cp->c_attr.ca_fileid)  &&
+					(attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
+				if ((flags & GNV_SKIPLOCK) == 0) {
+					/* 
+					 * Then we took the lock. Drop it before calling
+					 * vnode_put, which may invoke hfs_vnop_inactive and need to take 
+					 * the cnode lock again.
+					 */
+					hfs_unlock(cp);
+				}
+				
+				/* 
+				 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to 
+				 * force a re-drive in the lookup routine.  
+				 * Drop the iocount on the vnode obtained from 
+				 * chash_getcnode if needed.
+				 */	
+				if (*vpp != NULL) {
+					vnode_put (*vpp);
+					*vpp = NULL;
+				}
+
+				/*
+				 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
+				 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
+				 * the hash code peeks at those fields without holding the cnode lock because
+				 * it needs to be fast.  As a result, we may have set H_ATTACH in the chash
+				 * call above.  Since we're bailing out, unset whatever flags we just set, and
+				 * wake up all waiters for this cnode.
+				 */
+				if (hflags) {
+					hfs_chashwakeup(hfsmp, cp, hflags);
+				}
+
+				*out_flags = GNV_CAT_ATTRCHANGED;
+				return ERECYCLE;	
+			}
+			else {
+				/* Otherwise, CNID != fileid. Go ahead and copy in the new descriptor */
+				replace_desc(cp, descp);
+			}
 		}
 	}
+	
+	
 	/* Check if we found a matching vnode */
-	if (*vpp != NULL)
+	if (*vpp != NULL) {
 		return (0);
+	}
 
 	/*
 	 * If this is a new cnode then initialize it.
@@ -640,12 +952,38 @@ hfs_getnewvnode(
 #endif
 
 		/* Make sure its still valid (ie exists on disk). */
-		if (!(flags & GNV_CREATE) &&
-		    !hfs_valid_cnode(hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid)) {
-			hfs_chash_abort(hfsmp, cp);
-			hfs_reclaim_cnode(cp);
-			*vpp = NULL;
-			return (ENOENT);
+		if (!(flags & GNV_CREATE)) {
+			int error = 0;
+			if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
+				hfs_chash_abort(hfsmp, cp);
+				hfs_reclaim_cnode(cp);
+				*vpp = NULL;
+				/* 
+				 * If we hit this case, that means that the entry was there in the catalog when
+				 * we did a cat_lookup earlier.  Think hfs_lookup.  However, in between the time
+				 * that we checked the catalog and the time we went to get a vnode/cnode for it,
+				 * it had been removed from the namespace and the vnode totally reclaimed.  As a result,
+				 * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
+				 * an ENOENT.  To indicate to the caller that they should really double-check the
+				 * entry (it could have been renamed over and gotten a new fileid), we mark a bit
+				 * in the output flags.
+				 */
+				if (error == ENOENT) {
+					*out_flags = GNV_CAT_DELETED;
+					return ENOENT;	
+				}
+
+				/*
+				 * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
+				 * this function as an argument because the catalog may have changed w.r.t hardlink
+				 * link counts and the firstlink field.  If that validation check fails, then let 
+				 * lookup re-drive itself to get valid/consistent data with the same failure condition below.
+				 */
+				if (error == ERECYCLE) {
+					*out_flags = GNV_CAT_ATTRCHANGED;
+					return (ERECYCLE);
+				}
+			}
 		}
 		bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
 		bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
@@ -695,6 +1033,8 @@ hfs_getnewvnode(
 				cp->c_dquot[i] = NODQUOT;
 		}
 #endif /* QUOTA */
+		/* Mark the output flag that we're vending a new cnode */
+		*out_flags |= GNV_NEW_CNODE;
 	}
 
 	if (vtype == VDIR) {
@@ -802,7 +1142,7 @@ hfs_getnewvnode(
 		vfsp.vnfs_filesize = 0;
 
 	vfsp.vnfs_flags = VNFS_ADDFSREF;
-	if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY))
+	if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
 		vfsp.vnfs_flags |= VNFS_NOCACHE;
 
 	/* Tag system files */
@@ -868,6 +1208,11 @@ hfs_getnewvnode(
 	if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
 		(void) hfs_removehotfile(vp);
 	}
+	
+#if CONFIG_PROTECT
+	if (!issystemfile && (*out_flags & GNV_NEW_CNODE))
+		cp_entry_init(cp, mp);
+#endif
 
 	*vpp = vp;
 	return (0);
@@ -900,7 +1245,16 @@ hfs_reclaim_cnode(struct cnode *cp)
 		cp->c_desc.cd_namelen = 0;
 		vfs_removename(nameptr);
 	}
-
+	
+	/*
+	 * We only call this function if we are in hfs_vnop_reclaim and 
+	 * attempting to reclaim a cnode with only one live fork.  Because the vnode
+	 * went through reclaim, any future attempts to use this item will have to
+	 * go through lookup again, which will need to create a new vnode.  Thus,
+	 * destroying the locks below (while they were still held during our parent 
+	 * function hfs_vnop_reclaim) is safe.
+	 */	
+	
 	lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
 	lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
 #if HFS_COMPRESSION
@@ -909,14 +1263,27 @@ hfs_reclaim_cnode(struct cnode *cp)
 		FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
 	}
 #endif
+#if CONFIG_PROTECT
+	cp_entry_destroy(cp);
+#endif
+	
+	
 	bzero(cp, sizeof(struct cnode));
 	FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
 }
 
 
-__private_extern__
+/*
+ * hfs_valid_cnode
+ *
+ * This function is used to validate data that is stored in-core against what is contained
+ * in the catalog.  Common uses include validating that the parent-child relationship still exist
+ * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
+ * the point of the check.
+ */
 int
-hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid)
+hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, 
+		cnid_t cnid, struct cat_attr *cattr, int *error)
 {
 	struct cat_attr attr;
 	struct cat_desc cndesc;
@@ -924,34 +1291,181 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
 	int lockflags;
 
 	/* System files are always valid */
-	if (cnid < kHFSFirstUserCatalogNodeID)
+	if (cnid < kHFSFirstUserCatalogNodeID) {
+		*error = 0;
 		return (1);
+	}
 
 	/* XXX optimization:  check write count in dvp */
 
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 
 	if (dvp && cnp) {
+		int lookup = 0;
+		struct cat_fork fork;
+
 		bzero(&cndesc, sizeof(cndesc));
 		cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
 		cndesc.cd_namelen = cnp->cn_namelen;
 		cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
 		cndesc.cd_hint = VTOC(dvp)->c_childhint;
 
-		if ((cat_lookup(hfsmp, &cndesc, 0, NULL, &attr, NULL, NULL) == 0) &&
-		    (cnid == attr.ca_fileid)) {
+		/* 
+		 * We have to be careful when calling cat_lookup.  The result argument
+		 * 'attr' may get different results based on whether or not you ask
+		 * for the filefork to be supplied as output.  This is because cat_lookupbykey
+		 * will attempt to do basic validation/smoke tests against the resident
+		 * extents if there are no overflow extent records, but it needs someplace
+		 * in memory to store the on-disk fork structures.
+		 *
+		 * Since hfs_lookup calls cat_lookup with a filefork argument, we should
+		 * do the same here, to verify that block count differences are not
+		 * due to calling the function with different styles.  cat_lookupbykey
+		 * will request the volume be fsck'd if there is true on-disk corruption
+		 * where the number of blocks does not match the number generated by 
+		 * summing the number of blocks in the resident extents.
+		 */
+		
+		lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
+		if ((lookup == 0) && (cnid == attr.ca_fileid)) {
 			stillvalid = 1;
+			*error = 0;
+		}
+		else {
+			*error = ENOENT;
+		}
+	
+		/*
+		 * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation 
+		 * race.  Specifically, if there is no vnode/cnode pair for the directory entry 
+		 * being looked up, we have to go to the catalog.  But since we don't hold any locks (aside
+		 * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
+		 * changing in between the time we do the cat_lookup there and the time we re-grab the 
+		 * catalog lock above to do another cat_lookup. 
+		 * 
+		 * However, we need to check more than just the CNID and parent-child name relationships above.  
+		 * Hardlinks can suffer the same race in the following scenario:  Suppose we do a 
+		 * cat_lookup, and find a leaf record and a raw inode for a hardlink.  Now, we have 
+		 * the cat_attr in hand (passed in above).  But in between then and now, the vnode was 
+		 * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get 
+		 * a chance to do anything.  This is possible if there are a lot of threads thrashing around
+		 * with the cnode hash.  In this case, if we don't check/validate the cat_attr in-hand, we will
+		 * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is 
+		 * on disk.  So validate the cat_attr below, if required.  This race cannot happen if the cnode/vnode
+		 * already exists, as it does in the case of rename and delete. 
+		 */ 
+		if (stillvalid && cattr != NULL) {
+			if (cattr->ca_linkcount != attr.ca_linkcount) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+			
+			if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+
+			if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
+
+			if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
+				stillvalid = 0;
+				*error = ERECYCLE;
+				goto notvalid;
+			}
 		}
 	} else {
 		if (cat_idlookup(hfsmp, cnid, 0, NULL, NULL, NULL) == 0) {
 			stillvalid = 1;
+			*error = 0;
+		}
+		else {
+			*error = ENOENT;
 		}
 	}
+notvalid:
 	hfs_systemfile_unlock(hfsmp, lockflags);
 
 	return (stillvalid);
 }
 
+/*
+ * Per HI and Finder requirements, HFS should add in the
+ * date/time that a particular directory entry was added 
+ * to the containing directory. 
+ * This is stored in the extended Finder Info for the 
+ * item in question.
+ *
+ * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
+ * We must ignore user attempts to set this part of the finderinfo, and
+ * so we need to save a local copy of the date added, write in the user 
+ * finderinfo, then stuff the value back in.  
+ */
+void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
+	u_int8_t *finfo = NULL;
+	
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = (u_int8_t*)attrp->ca_finderinfo;
+	finfo = finfo + 16;
+	
+	/* 
+	 * Make sure to write it out as big endian, since that's how
+	 * finder info is defined.  
+	 * 
+	 * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
+	 */
+	if (S_ISREG(attrp->ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+		attrp->ca_recflags |= kHFSHasDateAddedMask; 
+	}
+	else if (S_ISDIR(attrp->ca_mode)) {
+		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+		extinfo->date_added = OSSwapHostToBigInt32(dateadded);		
+		attrp->ca_recflags |= kHFSHasDateAddedMask; 
+	}
+    
+	/* If it were neither directory/file, then we'd bail out */
+	return;
+}
+
+u_int32_t hfs_get_dateadded (struct cnode *cp) {
+	u_int8_t *finfo = NULL;
+	u_int32_t dateadded = 0;
+	
+	if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
+		/* Date added was never set.  Return 0. */
+		return dateadded;
+	}
+	
+    
+	/* overlay the FinderInfo to the correct pointer, and advance */
+	finfo = (u_int8_t*)cp->c_finderinfo;
+	finfo = finfo + 16;
+	
+	/* 
+	 * FinderInfo is written out in big endian... make sure to convert it to host
+	 * native before we use it.
+	 */
+	if (S_ISREG(cp->c_attr.ca_mode)) {
+		struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+	}
+	else if (S_ISDIR(cp->c_attr.ca_mode)) {
+		struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+		dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
+	}
+    
+	return dateadded;
+}
+
+
+
 /*
  * Touch cnode times based on c_touch_xxx flags
  *
@@ -959,21 +1473,23 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname
  *
  * This will also update the volume modify time
  */
-__private_extern__
 void
 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 {
+	vfs_context_t ctx;
 	/* don't modify times if volume is read-only */
 	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
 		cp->c_touch_acctime = FALSE;
 		cp->c_touch_chgtime = FALSE;
 		cp->c_touch_modtime = FALSE;
+		return;
 	}
 	else if (hfsmp->hfs_flags & HFS_STANDARD) {
 	/* HFS Standard doesn't support access times */
 		cp->c_touch_acctime = FALSE;
 	}
 
+	ctx = vfs_context_current();
 	/*
 	 * Skip access time updates if:
 	 *	. MNT_NOATIME is set
@@ -985,10 +1501,13 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 		if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
 		    (hfsmp->hfs_freezing_proc != NULL) ||
 		    (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
-		    (cp->c_vp && vnode_israge(cp->c_vp)))
+		    (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
+				
 			cp->c_touch_acctime = FALSE;
+		}
 	}
-	if (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) {
+	if (cp->c_touch_acctime || cp->c_touch_chgtime || 
+		cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
 		struct timeval tv;
 		int touchvol = 0;
 
@@ -1027,6 +1546,14 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 			cp->c_flag |= C_MODIFIED;
 			touchvol = 1;
 		}
+		
+		if (cp->c_flag & C_NEEDS_DATEADDED) {
+			hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
+			cp->c_flag |= C_MODIFIED;
+			/* untwiddle the bit */
+			cp->c_flag &= ~C_NEEDS_DATEADDED;
+			touchvol = 1;
+		}
 
 		/* Touch the volume modtime if needed */
 		if (touchvol) {
@@ -1039,7 +1566,6 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
 /*
  * Lock a cnode.
  */
-__private_extern__
 int
 hfs_lock(struct cnode *cp, enum hfslocktype locktype)
 {
@@ -1122,7 +1648,6 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype)
 /*
  * Lock a pair of cnodes.
  */
-__private_extern__
 int
 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype)
 {
@@ -1182,7 +1707,6 @@ hfs_isordered(struct cnode *cp1, struct cnode *cp2)
  *   - only one lock taken per cnode (dup cnodes are skipped)
  *   - some of the cnode pointers may be null
  */
-__private_extern__
 int
 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
              struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode)
@@ -1245,7 +1769,6 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
 /*
  * Unlock a cnode.
  */
-__private_extern__
 void
 hfs_unlock(struct cnode *cp)
 {
@@ -1299,7 +1822,6 @@ hfs_unlock(struct cnode *cp)
 /*
  * Unlock a pair of cnodes.
  */
-__private_extern__
 void
 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
 {
@@ -1311,7 +1833,6 @@ hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
 /*
  * Unlock a group of cnodes.
  */
-__private_extern__
 void
 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
 {
@@ -1358,34 +1879,119 @@ skip2:
  *
  * The process doing a truncation must take the lock
  * exclusive. The read/write processes can take it
- * non-exclusive.
+ * shared.  The locktype argument is the same as supplied to
+ * hfs_lock.
  */
-__private_extern__
 void
-hfs_lock_truncate(struct cnode *cp, int exclusive)
+hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype)
 {
-#ifdef HFS_CHECK_LOCK_ORDER
-	if (cp->c_lockowner == current_thread())
-		panic("hfs_lock_truncate: cnode %p locked!", cp);
-#endif /* HFS_CHECK_LOCK_ORDER */
+	void * thread = current_thread();
 
-	if (exclusive)
-		lck_rw_lock_exclusive(&cp->c_truncatelock);
-	else
+	if (cp->c_truncatelockowner == thread) {
+		/* 
+		 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
+		 * 
+		 * This is needed on the hfs_vnop_pagein path where we need to ensure
+		 * the file does not change sizes while we are paging in.  However,
+		 * we may already hold the lock exclusive due to another 
+		 * VNOP from earlier in the call stack.  So if we already hold 
+		 * the truncate lock exclusive, allow it to proceed, but ONLY if 
+		 * it's in the recursive case.
+		 */
+		if (locktype != HFS_RECURSE_TRUNCLOCK) {
+			panic("hfs_lock_truncate: cnode %p locked!", cp);
+		}
+	}
+	/* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
+	else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
 		lck_rw_lock_shared(&cp->c_truncatelock);
+		cp->c_truncatelockowner = HFS_SHARED_OWNER;
+	}
+	else { /* must be an HFS_EXCLUSIVE_LOCK */
+		lck_rw_lock_exclusive(&cp->c_truncatelock);
+		cp->c_truncatelockowner = thread;
+	}
 }
 
-__private_extern__
-void
-hfs_unlock_truncate(struct cnode *cp, int exclusive)
-{
-    if (exclusive) {
-	lck_rw_unlock_exclusive(&cp->c_truncatelock);
-    } else {
-	lck_rw_unlock_shared(&cp->c_truncatelock);
-    }
+
+/*
+ * Attempt to get the truncate lock.  If it cannot be acquired, error out.
+ * This function is needed in the degenerate hfs_vnop_pagein during force unmount
+ * case.  To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
+ * temporarily need to disable V2 semantics.  
+ */
+int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) {
+	void * thread = current_thread();
+	boolean_t didlock = false;
+
+	if (cp->c_truncatelockowner == thread) {
+		/* 
+		 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
+		 * 
+		 * This is needed on the hfs_vnop_pagein path where we need to ensure
+		 * the file does not change sizes while we are paging in.  However,
+		 * we may already hold the lock exclusive due to another 
+		 * VNOP from earlier in the call stack.  So if we already hold 
+		 * the truncate lock exclusive, allow it to proceed, but ONLY if 
+		 * it's in the recursive case.
+		 */
+		if (locktype != HFS_RECURSE_TRUNCLOCK) {
+			panic("hfs_lock_truncate: cnode %p locked!", cp);
+		}
+	}
+	/* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
+	else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
+		didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
+		if (didlock) {
+			cp->c_truncatelockowner = HFS_SHARED_OWNER;
+		}
+	}
+	else { /* must be an HFS_EXCLUSIVE_LOCK */
+		didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
+		if (didlock) {
+			cp->c_truncatelockowner = thread;
+		}
+	}
+	
+	return didlock;
 }
 
 
+/*
+ * Unlock the truncate lock, which protects against size changes.
+ * 
+ * The been_recursed argument is used when we may need to return
+ * from this function without actually unlocking the truncate lock.
+ */
+void
+hfs_unlock_truncate(struct cnode *cp, int been_recursed)
+{
+	void *thread = current_thread();	
 
+	/*
+	 * If been_recursed is nonzero AND the current lock owner of the
+	 * truncate lock is our current thread, then we must have recursively
+	 * taken the lock earlier on.  If the lock were unlocked, 
+	 * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through
+	 * to the SHARED case below. 
+	 *
+	 * If been_recursed is zero (most of the time) then we check the 
+	 * lockowner field to infer whether the lock was taken exclusively or
+	 * shared in order to know what underlying lock routine to call. 
+	 */
+	if (been_recursed) {
+		if (cp->c_truncatelockowner == thread) {
+			return;	
+		}
+	}
 
+	/* HFS_LOCK_EXCLUSIVE */
+	if (thread == cp->c_truncatelockowner) {
+		cp->c_truncatelockowner = NULL;
+		lck_rw_unlock_exclusive(&cp->c_truncatelock);
+	}
+	/* HFS_LOCK_SHARED */
+	else {
+		lck_rw_unlock_shared(&cp->c_truncatelock);
+	}
+}
diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h
index 9ffb9a8ca..73c2f664a 100644
--- a/bsd/hfs/hfs_cnode.h
+++ b/bsd/hfs/hfs_cnode.h
@@ -45,6 +45,10 @@
 #if HFS_COMPRESSION
 #include <sys/decmpfs.h>
 #endif
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
 
 /*
  * The filefork is used to represent an HFS file fork (data or resource).
@@ -106,6 +110,7 @@ struct cnode {
 	lck_rw_t                c_rwlock;       /* cnode's lock */
 	void *                  c_lockowner;    /* cnode's lock owner (exclusive case only) */
 	lck_rw_t                c_truncatelock; /* protects file from truncation during read/write */
+	void *                  c_truncatelockowner;    /* truncate lock owner (exclusive case only) */
 	LIST_ENTRY(cnode)	c_hash;		/* cnode's hash chain */
 	u_int32_t		c_flag;		/* cnode's runtime flags */
 	u_int32_t		c_hflag;	/* cnode's flags for maintaining hash - protected by global hash lock */
@@ -132,6 +137,10 @@ struct cnode {
 #if HFS_COMPRESSION
 	decmpfs_cnode  *c_decmp;
 #endif /* HFS_COMPRESSION */
+#if CONFIG_PROTECT
+	cprotect_t		c_cpentry;	/* content protection data */
+#endif
+	
 };
 typedef struct cnode cnode_t;
 
@@ -183,13 +192,16 @@ typedef struct cnode cnode_t;
 #define C_FORCEUPDATE      0x00100  /* force the catalog entry update */
 #define C_HASXATTRS        0x00200  /* cnode has extended attributes */
 #define C_NEG_ENTRIES      0x00400  /* directory has negative name entries */
-#define C_WARNED_RSRC      0x00800  /* cnode lookup warning has been issued */ 
+#define C_SWAPINPROGRESS   0x00800	/* cnode's data is about to be swapped.  Issue synchronous cluster io */
 
 #define C_NEED_DATA_SETSIZE  0x01000  /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */
 #define C_NEED_RSRC_SETSIZE  0x02000  /* Do a ubc_setsize(0) on c_vp after the unlock */
 #define C_DIR_MODIFICATION   0x04000  /* Directory is being modified, wait for lookups */
 #define C_ALWAYS_ZEROFILL    0x08000  /* Always zero-fill the file on an fsync */
 
+#define C_RENAMED			0x10000	/* cnode was deleted as part of rename; C_DELETED should also be set */
+#define C_NEEDS_DATEADDED	0x20000 /* cnode needs date-added written to the finderinfo bit */
+#define C_BACKINGSTORE		0x40000 /* cnode is a backing store for an existing or currently-mounting filesystem */
 #define ZFTIMELIMIT	(5 * 60)
 
 /*
@@ -236,7 +248,7 @@ enum { kFinderInvisibleMask = 1 << 14 };
  * upon the VNOP in question.  Sometimes it is OK to use an open-unlinked file, for example, in,
  * reading.  But other times, such as on the source of a VNOP_RENAME, it should be disallowed.
  */
-int hfs_checkdeleted (struct cnode *cp);
+int hfs_checkdeleted(struct cnode *cp);
 
 /*
  * Test for a resource fork
@@ -271,16 +283,28 @@ struct hfsfid {
 /* Get new default vnode */
 extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
                            struct cat_desc *descp, int flags, struct cat_attr *attrp,
-                           struct cat_fork *forkp, struct vnode **vpp);
+                           struct cat_fork *forkp, struct vnode **vpp, int *out_flags);
 
+/* Input flags for hfs_getnewvnode */
 
 #define GNV_WANTRSRC   0x01  /* Request the resource fork vnode. */
 #define GNV_SKIPLOCK   0x02  /* Skip taking the cnode lock (when getting resource fork). */
 #define GNV_CREATE     0x04  /* The vnode is for a newly created item. */
+#define GNV_NOCACHE	   0x08  /* Delay entering this item in the name cache */
 
+/* Output flags for hfs_getnewvnode */
+#define GNV_CHASH_RENAMED	0x01	/* The cnode was renamed in-flight */
+#define GNV_CAT_DELETED		0x02	/* The cnode was deleted from the catalog */
+#define GNV_NEW_CNODE		0x04	/* We are vending out a newly initialized cnode */
+#define GNV_CAT_ATTRCHANGED	0x08	/* Something in struct cat_attr changed in between cat_lookups */
 
 /* Touch cnode times based on c_touch_xxx flags */
 extern void hfs_touchtimes(struct hfsmount *, struct cnode *);
+extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded);
+extern u_int32_t hfs_get_dateadded (struct cnode *cp); 
+
+/* Zero-fill file and push regions out to disk */
+extern int  hfs_filedone(struct vnode *vp, vfs_context_t context);
 
 /*
  * HFS cnode hash functions.
@@ -294,11 +318,14 @@ extern void  hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct
 extern void  hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int flags);
 extern void  hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp);
 
-extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock);
-extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock);
+extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc,
+										 int skiplock, int allow_deleted);
+extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, 
+										 int wantrsrc, int skiplock, int *out_flags, int *hflags);
 extern int hfs_chash_snoop(struct hfsmount *, ino_t, int (*)(const struct cat_desc *,
                             const struct cat_attr *, void *), void *);
-extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid);
+extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, 
+							cnid_t cnid, struct cat_attr *cattr, int *error);
 				
 extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
 
@@ -319,20 +346,22 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
  *  5. hfs mount point (always last)
  *
  */
-enum hfslocktype  {HFS_SHARED_LOCK = 1, HFS_EXCLUSIVE_LOCK = 2, HFS_FORCE_LOCK = 3};
+enum hfslocktype  {HFS_SHARED_LOCK = 1, HFS_EXCLUSIVE_LOCK = 2, HFS_FORCE_LOCK = 3, HFS_RECURSE_TRUNCLOCK = 4};
 #define HFS_SHARED_OWNER  (void *)0xffffffff
 
-extern int hfs_lock(struct cnode *, enum hfslocktype);
-extern int hfs_lockpair(struct cnode *, struct cnode *, enum hfslocktype);
-extern int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *,
+int hfs_lock(struct cnode *, enum hfslocktype);
+int hfs_lockpair(struct cnode *, struct cnode *, enum hfslocktype);
+int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *,
                         enum hfslocktype, struct cnode **);
 
-extern void hfs_unlock(struct cnode *);
-extern void hfs_unlockpair(struct cnode *, struct cnode *);
-extern void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *);
+void hfs_unlock(struct cnode *);
+void hfs_unlockpair(struct cnode *, struct cnode *);
+void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *);
+
+void hfs_lock_truncate(struct cnode *, enum hfslocktype);
+void hfs_unlock_truncate(struct cnode *, int been_recursed);
 
-extern void hfs_lock_truncate(struct cnode *, int);
-extern void hfs_unlock_truncate(struct cnode *, int);
+int hfs_try_trunclock(struct cnode *, enum hfslocktype);
 
 #endif /* __APPLE_API_PRIVATE */
 #endif /* KERNEL */
diff --git a/bsd/hfs/hfs_cprotect.c b/bsd/hfs/hfs_cprotect.c
new file mode 100644
index 000000000..0345e4d9e
--- /dev/null
+++ b/bsd/hfs/hfs_cprotect.c
@@ -0,0 +1,908 @@
+/*
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/cprotect.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/random.h>
+#include <sys/xattr.h>
+#include <sys/uio_internal.h>
+#include <sys/ubc_internal.h>
+#include <sys/vnode_if.h>
+#include <sys/vnode_internal.h>
+#include <libkern/OSByteOrder.h>
+
+#include "hfs.h"
+#include "hfs_cnode.h"
+
+#ifdef CONFIG_PROTECT
+static struct cp_wrap_func		g_cp_wrap_func = {NULL, NULL};
+static struct cp_global_state	g_cp_state = {0, 0};
+
+extern int (**hfs_vnodeop_p) (void *);
+
+/*
+ * CP private functions
+ */
+static int cp_is_valid_class(int);
+static int cp_getxattr(cnode_t *, struct cp_xattr *);
+static int cp_setxattr(cnode_t *, struct cp_xattr *, int);
+static struct cprotect *cp_entry_alloc(void);
+static int cp_make_keys (struct cprotect *);
+static int cp_restore_keys(struct cprotect *);
+static int cp_lock_vfs_callback(mount_t, void *);
+static int cp_lock_vnode_callback(vnode_t, void *);
+static int cp_vnode_is_eligible (vnode_t);
+static int cp_check_access (cnode_t *, int);
+static int cp_wrap(int, void *, void *);
+static int cp_unwrap(int, void *, void *);
+
+
+
+#if DEVELOPMENT || DEBUG
+#define CP_ASSERT(x)		\
+	if ((x) == 0) {			\
+		panic("CP: failed assertion in %s", __FUNCTION__); 	\
+	}
+#else
+#define CP_ASSERT(x)
+#endif
+
+int 
+cp_key_store_action(int action)
+{
+	g_cp_state.lock_state = action;
+	if (action == CP_LOCKED_STATE)
+		return vfs_iterate(0, cp_lock_vfs_callback, (void *)action);
+	else
+		return 0;
+}
+
+
+int 
+cp_register_wraps(cp_wrap_func_t key_store_func)
+{
+	g_cp_wrap_func.wrapper = key_store_func->wrapper;
+	g_cp_wrap_func.unwrapper = key_store_func->unwrapper;
+	
+	g_cp_state.wrap_functions_set = 1;
+	
+	return 0;
+}
+
+/*
+ * Allocate and initialize a cprotect blob for a new cnode.
+ * Called from hfs_getnewcnode: cnode is locked exclusive.
+ * Read xattr data off the cnode. Then, if conditions permit,
+ * unwrap the file key and cache it in the cprotect blob.
+ */
+int 
+cp_entry_init(cnode_t *cnode, struct mount *mp)
+{
+	struct cprotect *entry;
+	struct cp_xattr xattr;
+	int error = 0;
+	
+	if (!cp_fs_protected (mp)) {
+		cnode->c_cpentry = NULL;
+		return 0;
+	}
+	
+	if (!S_ISREG(cnode->c_mode)) {
+		cnode->c_cpentry = NULL;
+		return 0;
+	}
+
+	if (!g_cp_state.wrap_functions_set) {
+		printf("hfs: cp_update_entry: wrap functions not yet set\n");
+		return ENXIO;
+	}
+	
+	CP_ASSERT (cnode->c_cpentry == NULL);
+	
+	entry = cp_entry_alloc();
+	if (!entry)
+		return ENOMEM;
+	
+	entry->cp_flags |= CP_KEY_FLUSHED;
+	cnode->c_cpentry = entry;
+	
+	error = cp_getxattr(cnode, &xattr);
+	if (error == ENOATTR) {
+		/* 
+		 * Can't tell if the file is new, or was previously created but never
+		 * written to or set-classed. In either case, it'll need a fresh 
+		 * per-file key.
+		 */
+		entry->cp_flags |= CP_NEEDS_KEYS;
+		error = 0;
+	} else {
+		if (xattr.xattr_major_version != CP_CURRENT_MAJOR_VERS) {
+			printf("hfs: cp_entry_init: bad xattr version\n");
+			error = EINVAL;
+			goto out;
+		}
+
+		/* set up entry with information from xattr */
+		entry->cp_pclass = xattr.persistent_class;
+		bcopy(&xattr.persistent_key, &entry->cp_persistent_key, CP_WRAPPEDKEYSIZE);
+	}
+
+out:
+	if (error) {
+		cp_entry_destroy (cnode);
+	}
+	return error;
+}
+
+/*
+ * Set up initial key/class pair on cnode. The cnode is locked exclusive.
+ */
+int 
+cp_entry_create_keys(cnode_t *cnode)
+{
+	struct cprotect *entry = cnode->c_cpentry;
+
+	if (!entry) {
+		//unprotected file: continue
+		return 0;
+	}
+
+	CP_ASSERT((entry->cp_flags & CP_NEEDS_KEYS));
+
+	return cp_make_keys(entry);
+}
+
+/*
+ * Tear down and clear a cprotect blob for a closing file.
+ * Called at hfs_reclaim_cnode: cnode is locked exclusive. 
+ */
+void
+cp_entry_destroy(cnode_t *cnode)
+{
+	struct cprotect *entry = cnode->c_cpentry;
+	if (!entry) {
+		/* nothing to clean up */
+		return;
+	}
+	cnode->c_cpentry = NULL;
+	bzero(entry, sizeof(*entry));
+	FREE(entry, M_TEMP);
+}
+
+int 
+cp_fs_protected (mount_t mnt) {
+	return (vfs_flags(mnt) & MNT_CPROTECT);
+}
+
+
+/*
+ * Return a pointer to underlying cnode if there is one for this vnode.
+ * Done without taking cnode lock, inspecting only vnode state.
+ */
+cnode_t *
+cp_get_protected_cnode(vnode_t vp)
+{
+	if (!cp_vnode_is_eligible(vp)) {
+		return NULL;
+	}
+	
+	if (!cp_fs_protected(VTOVFS(vp))) {
+		/* mount point doesn't support it */
+		return NULL;
+	}
+	
+	return (cnode_t *) vp->v_data;
+}
+
+
+/*
+ * Sets *class to persistent class associated with vnode,
+ * or returns error.
+ */
+int 
+cp_vnode_getclass(vnode_t vp, int *class)
+{
+	struct cp_xattr xattr;
+	int error = 0;
+	struct cnode *cnode;
+	
+	if (!cp_vnode_is_eligible (vp)) {
+		return EBADF;
+	}
+	
+	cnode = VTOC(vp);
+
+	hfs_lock(cnode, HFS_SHARED_LOCK);
+
+	if (cp_fs_protected(VTOVFS(vp))) {
+		/* pull the class from the live entry */
+		struct cprotect *entry = cnode->c_cpentry;
+		if (!entry) {
+			panic("Content Protection: uninitialized cnode %p", cnode);
+		}
+
+		if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+			error = cp_make_keys(entry);
+		}
+		*class = entry->cp_pclass;
+
+	} else {
+		/* 
+		 * Mount point is not formatted for content protection. If a class
+		 * has been specified anyway, report it. Otherwise, report D.
+		 */
+		error = cp_getxattr(cnode, &xattr);
+		if (error == ENOATTR) {
+			*class = PROTECTION_CLASS_D;
+			error = 0;
+		} else if (error == 0) {
+			*class = xattr.persistent_class;
+		}
+	}
+	
+	hfs_unlock(cnode);
+	return error;
+}
+
+
+/*
+ * Sets persistent class for this file.
+ * If vnode cannot be protected (system file, non-regular file, non-hfs), EBADF.
+ * If the new class can't be accessed now, EPERM.
+ * Otherwise, record class and re-wrap key if the mount point is content-protected.
+ */
+int 
+cp_vnode_setclass(vnode_t vp, uint32_t newclass)
+{
+	struct cnode *cnode;
+	struct cp_xattr xattr;
+	struct cprotect *entry = 0;
+	int error = 0;
+	
+	if (!cp_is_valid_class(newclass)) {
+		printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass);
+		return EINVAL;
+	}
+
+	/* is this an interesting file? */
+	if (!cp_vnode_is_eligible(vp)) {
+		return EBADF;
+	}
+
+	cnode = VTOC(vp);
+
+	if (hfs_lock(cnode, HFS_EXCLUSIVE_LOCK)) {
+		return EINVAL;
+	}
+	
+	/* is the volume formatted for content protection? */
+	if (cp_fs_protected(VTOVFS(vp))) {
+		entry = cnode->c_cpentry;
+		if (entry == NULL) { 
+			error = EINVAL;
+			goto out;
+		}
+
+		if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+			if ((error = cp_make_keys(entry)) != 0) {
+				goto out;
+			}
+		}
+
+		if (entry->cp_flags & CP_KEY_FLUSHED) {
+			error = cp_restore_keys(entry);
+			if (error)
+				goto out;
+		}
+
+		/* re-wrap per-file key with new class */
+		error = cp_wrap(newclass,
+						&entry->cp_cache_key[0], 
+						&entry->cp_persistent_key[0]);
+		if (error) {
+			/* we didn't have perms to set this class. leave file as-is and error out */
+			goto out;
+		}
+
+		entry->cp_pclass = newclass;
+
+		/* prepare to write the xattr out */
+		bcopy(&entry->cp_persistent_key, &xattr.persistent_key, CP_WRAPPEDKEYSIZE);
+	} else {
+		/* no live keys for this file. just remember intended class */
+		bzero(&xattr.persistent_key, CP_WRAPPEDKEYSIZE);
+	}
+
+	xattr.xattr_major_version = CP_CURRENT_MAJOR_VERS;
+	xattr.xattr_minor_version = CP_CURRENT_MINOR_VERS;
+	xattr.key_size = CP_WRAPPEDKEYSIZE;
+	xattr.flags = 0;
+	xattr.persistent_class = newclass;
+	error = cp_setxattr(cnode, &xattr, XATTR_REPLACE);
+	
+	if (error == ENOATTR) {
+		error = cp_setxattr (cnode, &xattr, XATTR_CREATE);
+	}
+
+out:
+	hfs_unlock(cnode);
+	return error;
+}
+
+/*
+ * Check permission for the given operation (read, write, page in) on this node.
+ * Additionally, if the node needs work, do it:
+ * - create a new key for the file if one hasn't been set before
+ * - write out the xattr if it hasn't already been saved
+ * - unwrap the key if needed
+ *
+ * Takes cnode lock, and upgrades to exclusive if modifying cprotect.
+ */
+	int
+cp_handle_vnop(cnode_t *cnode, int vnop)
+{
+	struct cprotect *entry;
+	int error = 0;
+	struct cp_xattr xattr;
+
+	if ((error = hfs_lock(cnode, HFS_SHARED_LOCK)) != KERN_SUCCESS) {
+		return error;
+	}
+
+	entry = cnode->c_cpentry;
+	if (!entry)
+		goto out;
+
+	if ((error = cp_check_access(cnode, vnop)) != KERN_SUCCESS) {
+		goto out;
+	}
+
+	if (entry->cp_flags == 0) {
+		/* no more work to do */
+		goto out;
+	}
+
+	/* upgrade to exclusive lock */
+	if (lck_rw_lock_shared_to_exclusive(&cnode->c_rwlock) == FALSE) {
+		if ((error = hfs_lock(cnode, HFS_EXCLUSIVE_LOCK)) != KERN_SUCCESS) {
+			return error;
+		}
+	} else {
+		cnode->c_lockowner = current_thread();
+	}
+
+	/* generate new keys if none have ever been saved */
+	if ((entry->cp_flags & CP_NEEDS_KEYS)) {
+		if ((error = cp_make_keys(entry)) != 0) {
+			goto out;
+		}
+	}
+
+	/* unwrap keys if needed */
+	if (entry->cp_flags & CP_KEY_FLUSHED) {
+		error = cp_restore_keys(entry);
+		if (error)
+			goto out;
+	}
+
+	/* write out the xattr if it's new */
+	if (entry->cp_flags & CP_NO_XATTR) {
+		bcopy(&entry->cp_persistent_key[0], &xattr.persistent_key, CP_WRAPPEDKEYSIZE);
+		xattr.xattr_major_version = CP_CURRENT_MAJOR_VERS;
+		xattr.xattr_minor_version = CP_CURRENT_MINOR_VERS;
+		xattr.key_size = CP_WRAPPEDKEYSIZE;
+		xattr.persistent_class = entry->cp_pclass;
+		error = cp_setxattr(cnode, &xattr, XATTR_CREATE);
+	}
+
+out:
+	hfs_unlock(cnode);
+	return error;
+}
+
+/*  
+ * During hfs resize operations, we have slightly different constraints than during
+ * normal VNOPS that read/write data to files.  Specifically, we already have the cnode
+ * locked (so nobody else can modify it), and we are doing the IO with root privileges, since
+ * we are moving the data behind the user's back.  So, we skip access checks here (for unlock
+ * vs. lock), and don't worry about non-existing keys.  If the file exists on-disk with valid
+ * payload, then it must have keys set up already by definition.
+ */
+int cp_handle_relocate (cnode_t *cp) {
+	struct cprotect *entry;
+	int error = -1;
+
+	/* cp is already locked */	
+	entry = cp->c_cpentry;
+	if (!entry)
+		goto out;
+
+	/* 
+	 * Still need to validate whether to permit access to the file or not 
+	 * based on lock status 
+	 */
+	if ((error = cp_check_access(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != KERN_SUCCESS) {
+		goto out;
+	}	
+
+	if (entry->cp_flags == 0) {
+		/* no more work to do */
+		error = 0;
+		goto out;
+	}
+
+	/* it must have keys since it is an existing file with actual payload */
+
+	/* unwrap keys if needed */
+	if (entry->cp_flags & CP_KEY_FLUSHED) {
+		error = cp_restore_keys(entry);
+	}
+
+	/* don't need to write out the EA since the file is extant */
+out:	
+
+	/* return the cp still locked */
+	return error;
+}
+
+
+
+/*
+ * cp_getrootxattr:
+ * Gets the EA we set on the root folder (fileid 1) to get information about the
+ * version of Content Protection that was used to write to this filesystem.
+ * Note that all multi-byte fields are written to disk little endian so they must be
+ * converted to native endian-ness as needed.
+ */
+
+int cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) {
+	uio_t   auio;
+	char    uio_buf[UIO_SIZEOF(1)];
+	size_t attrsize = sizeof(struct cp_root_xattr);
+	int error = 0;
+	struct vnop_getxattr_args args;
+
+	if (!outxattr) {
+		panic("cp_xattr called with xattr == NULL");
+	}
+
+	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+	uio_addiov(auio, CAST_USER_ADDR_T(outxattr), attrsize);
+
+	args.a_desc = NULL; // unused
+	args.a_vp = NULL; //unused since we're writing EA to root folder.
+	args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+	args.a_uio = auio;
+	args.a_size = &attrsize;
+	args.a_options = XATTR_REPLACE;
+	args.a_context = NULL; // unused
+
+	error = hfs_getxattr_internal(NULL, &args, hfsmp, 1);
+
+	/* Now convert the multi-byte fields to native endianness */
+	outxattr->major_version = OSSwapLittleToHostInt16(outxattr->major_version);
+	outxattr->minor_version = OSSwapLittleToHostInt16(outxattr->minor_version);
+	outxattr->flags = OSSwapLittleToHostInt64(outxattr->flags);
+
+	if (error != KERN_SUCCESS) {
+		goto out;
+	}
+
+out:
+	uio_free(auio);
+	return error;
+}
+
+/*
+ * cp_setrootxattr:
+ * Sets the EA we set on the root folder (fileid 1) to get information about the
+ * version of Content Protection that was used to write to this filesystem.
+ * Note that all multi-byte fields are written to disk little endian so they must be
+ * converted to little endian as needed.
+ *
+ * This will be written to the disk when it detects the EA is not there, or when we need
+ * to make a modification to the on-disk version that can be done in-place.
+ */
+	int
+cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr)
+{
+	int error = 0;
+	struct vnop_setxattr_args args;
+
+	args.a_desc = NULL;
+	args.a_vp = NULL;
+	args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+	args.a_uio = NULL; //pass data ptr instead
+	args.a_options = 0; 
+	args.a_context = NULL; //no context needed, only done from mount.
+
+	/* Now convert the multi-byte fields to little endian before writing to disk. */
+	newxattr->major_version = OSSwapHostToLittleInt16(newxattr->major_version);
+	newxattr->minor_version = OSSwapHostToLittleInt16(newxattr->minor_version);
+	newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags);
+
+	error = hfs_setxattr_internal(NULL, (caddr_t)newxattr, 
+			sizeof(struct cp_root_xattr), &args, hfsmp, 1);
+	return error;
+}
+
+
+
+
+/********************
+ * Private Functions
+ *******************/
+
+static int
+cp_vnode_is_eligible(vnode_t vp)
+{
+	return ((vp->v_op == hfs_vnodeop_p) &&
+			(!vnode_issystem(vp)) &&
+			(vnode_isreg(vp)));
+}
+
+
+
+static int
+cp_is_valid_class(int class)
+{
+	return ((class >= PROTECTION_CLASS_A) &&
+			(class <= PROTECTION_CLASS_F));
+}
+
+
+static struct cprotect *
+cp_entry_alloc(void)
+{
+	struct cprotect *cp_entry;
+	
+	MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect), 
+		   M_TEMP, M_WAITOK);
+	if (cp_entry == NULL)
+		return (NULL);
+	
+	bzero(cp_entry, sizeof(*cp_entry));
+	return (cp_entry);
+}
+
+
+/*
+ * Reads xattr data off the cnode and into provided xattr.
+ * cnode lock held shared
+ */
+static int 
+cp_getxattr(cnode_t *cnode, struct cp_xattr *outxattr)
+{
+	uio_t	auio;
+	char	uio_buf[UIO_SIZEOF(1)];
+	size_t attrsize = sizeof(struct cp_xattr);
+	int error = 0;
+	struct vnop_getxattr_args args;
+		
+	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+	uio_addiov(auio, CAST_USER_ADDR_T(outxattr), attrsize);
+	
+	args.a_desc = NULL; // unused
+	args.a_vp = cnode->c_vp;
+	args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+	args.a_uio = auio;
+	args.a_size = &attrsize;
+	args.a_options = XATTR_REPLACE;
+	args.a_context = vfs_context_current(); // unused
+	error = hfs_getxattr_internal(cnode, &args, VTOHFS(cnode->c_vp), 0);
+	if (error != KERN_SUCCESS) {
+		goto out;
+	}
+
+	/* Endian swap the multi-byte fields into host endianness from L.E. */
+	outxattr->xattr_major_version = OSSwapLittleToHostInt16(outxattr->xattr_major_version);
+	outxattr->xattr_minor_version = OSSwapLittleToHostInt16(outxattr->xattr_minor_version);
+	outxattr->key_size = OSSwapLittleToHostInt32(outxattr->key_size);
+	outxattr->flags = OSSwapLittleToHostInt32(outxattr->flags);
+	outxattr->persistent_class = OSSwapLittleToHostInt32(outxattr->persistent_class);
+
+out:
+	uio_free(auio);
+	return error;
+}
+
+/*
+ * Stores new xattr data on the cnode.
+ * cnode lock held exclusive
+ */
+static int
+cp_setxattr(cnode_t *cnode, struct cp_xattr *newxattr, int options)
+{
+	int error = 0;
+	struct vnop_setxattr_args args;
+	
+	args.a_desc = NULL;
+	args.a_vp = cnode->c_vp;
+	args.a_name = CONTENT_PROTECTION_XATTR_NAME;
+	args.a_uio = NULL; //pass data ptr instead
+	args.a_options = options; 
+	args.a_context = vfs_context_current();
+
+	/* Endian swap the multi-byte fields into L.E from host. */
+	newxattr->xattr_major_version = OSSwapHostToLittleInt16(newxattr->xattr_major_version);
+	newxattr->xattr_minor_version = OSSwapHostToLittleInt16(newxattr->xattr_minor_version);
+	newxattr->key_size = OSSwapHostToLittleInt32(newxattr->key_size);
+	newxattr->flags = OSSwapHostToLittleInt32(newxattr->flags);
+	newxattr->persistent_class = OSSwapHostToLittleInt32(newxattr->persistent_class);
+
+	error = hfs_setxattr_internal(cnode, (caddr_t)newxattr, 
+								  sizeof(struct cp_xattr), &args, VTOHFS(cnode->c_vp), 0);
+
+	if ((error == KERN_SUCCESS) && (cnode->c_cpentry)) {
+		cnode->c_cpentry->cp_flags &= ~CP_NO_XATTR;
+	}
+
+	return error;
+}
+
+
+/*
+ * Make a new random per-file key and wrap it.
+ */
+static int
+cp_make_keys(struct cprotect *entry)
+{
+	int error = 0;
+
+	if (g_cp_state.wrap_functions_set != 1) {
+		printf("hfs: CP: could not create keys: no wrappers set\n");
+		return ENXIO;
+	}
+
+	/* create new cp data: key and class */
+	read_random(&entry->cp_cache_key[0], CP_KEYSIZE);
+	entry->cp_pclass = PROTECTION_CLASS_D;
+
+	/* wrap the new key in the class key */
+	error = cp_wrap(PROTECTION_CLASS_D,
+					&entry->cp_cache_key[0], 
+					&entry->cp_persistent_key[0]);
+	
+	if (error) {
+		panic("could not wrap new key in class D\n");
+	}
+
+	/* ready for business */
+	entry->cp_flags &= ~CP_NEEDS_KEYS;
+	entry->cp_flags |= CP_NO_XATTR;
+
+	return error;
+}
+
+/*
+ * If permitted, restore entry's unwrapped key from the persistent key.
+ * If not, clear key and set CP_ENTRY_FLUSHED.
+ * cnode lock held exclusive
+ */
+static int
+cp_restore_keys(struct cprotect *entry)
+{
+	int error = 0;
+
+ 	error = cp_unwrap(entry->cp_pclass,
+					  &entry->cp_persistent_key[0],
+					  &entry->cp_cache_key[0]);
+	
+	if (error) {
+		entry->cp_flags |= CP_KEY_FLUSHED;
+		bzero(entry->cp_cache_key, CP_KEYSIZE);
+		error = EPERM;
+	}
+	else {
+		entry->cp_flags &= ~CP_KEY_FLUSHED;
+	}
+	return error;
+}
+
+static int
+cp_lock_vfs_callback(mount_t mp, void *arg)
+{
+	if (!cp_fs_protected(mp)) {
+		/* not interested in this mount point */
+		return 0;
+	}
+	
+	return vnode_iterate(mp, 0, cp_lock_vnode_callback, arg);
+}
+
+
+/*
+ * Deny access to protected files if keys have been locked.
+ *
+ * cnode lock is taken shared.
+ */
+	static int
+cp_check_access(cnode_t *cnode, int vnop)
+{
+	int error = 0;
+
+	if (g_cp_state.lock_state == CP_UNLOCKED_STATE) {
+		return KERN_SUCCESS;
+	}
+
+	if (!cnode->c_cpentry) {
+		/* unprotected node */
+		return KERN_SUCCESS;
+	}
+
+	/* Deny all access for class A files, and read access for class B */
+	switch (cnode->c_cpentry->cp_pclass) {
+		case PROTECTION_CLASS_A: {
+			error = EPERM;
+			break;
+		}
+		case PROTECTION_CLASS_B: {
+			if (vnop & CP_READ_ACCESS)
+				error = EPERM;
+			else
+				error = 0;
+			break;
+		}
+		default:
+			error = 0;
+			break;
+	}
+
+	return error;
+}
+
+
+
+/*
+ * Respond to a lock or unlock event.
+ * On lock: clear out keys from memory, then flush file contents.
+ * On unlock: nothing (function not called).
+ */
+static int
+cp_lock_vnode_callback(vnode_t vp, void *arg)
+{
+	cnode_t *cp = NULL;
+	struct cprotect *entry = NULL;
+	int error = 0;
+	int locked = 1;
+	int action = 0;
+
+	error = vnode_getwithref (vp);
+	if (error) {
+		return error;
+	}
+
+	cp = VTOC(vp);
+	hfs_lock(cp, HFS_FORCE_LOCK);
+	
+	entry = cp->c_cpentry;
+	if (!entry) {
+		/* unprotected vnode: not a regular file */
+		goto out;
+	}
+	
+	action = (int)((uintptr_t) arg);
+	switch (action) {
+		case CP_LOCKED_STATE: {
+			vfs_context_t ctx;
+			if (entry->cp_pclass != PROTECTION_CLASS_A) {
+				/* no change at lock for other classes */
+				goto out;
+			}
+			
+			/* Before doing anything else, zero-fille sparse ranges as needed */
+			ctx = vfs_context_current();
+			(void) hfs_filedone (vp, ctx);
+
+			/* first, sync back dirty pages */
+			hfs_unlock (cp);
+			ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_PUSHALL | UBC_INVALIDATE | UBC_SYNC);
+			hfs_lock (cp, HFS_FORCE_LOCK);
+			
+			/* flush keys */
+			entry->cp_flags |= CP_KEY_FLUSHED;
+			bzero(&entry->cp_cache_key, CP_KEYSIZE);
+			/* some write may have arrived in the mean time. dump those pages */
+			hfs_unlock(cp);
+			locked = 0;
+		
+			ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_INVALIDATE | UBC_SYNC);	
+			break;
+		}
+		case CP_UNLOCKED_STATE: {
+			/* no-op */
+			break;
+		}
+		default:
+			panic("unknown lock action %d\n", action);
+	}
+	
+out:
+	if (locked)
+		hfs_unlock(cp);
+	vnode_put (vp);
+	return error;
+}
+
+static int
+cp_wrap(int class, void *inkey, void *outkey)
+{
+	int error = 0;
+	size_t keyln = CP_WRAPPEDKEYSIZE;
+	
+	if (class == PROTECTION_CLASS_F) {
+		bzero(outkey, CP_WRAPPEDKEYSIZE);
+		return 0;
+	}
+	
+	error = g_cp_wrap_func.wrapper(class,
+								   inkey,
+								   CP_KEYSIZE,
+								   outkey,
+								   &keyln);
+	
+	return error;
+}
+
+
+static int
+cp_unwrap(int class, void *inkey, void *outkey)
+{
+	int error = 0;
+	size_t keyln = CP_KEYSIZE;
+	
+	if (class == PROTECTION_CLASS_F) {
+		/* we didn't save a wrapped key, so nothing to unwrap */
+		return EPERM;
+	}
+	
+	error = g_cp_wrap_func.unwrapper(class,
+									 inkey,
+									 CP_WRAPPEDKEYSIZE,
+									 outkey,
+									 &keyln);
+	
+	return error;
+	
+}
+
+
+#else
+
+int cp_key_store_action(int action __unused)
+{
+	return ENOTSUP;
+}
+
+
+int cp_register_wraps(cp_wrap_func_t key_store_func __unused)
+{
+	return ENOTSUP;
+}
+
+#endif /* CONFIG_PROTECT */
diff --git a/bsd/hfs/hfs_dbg.h b/bsd/hfs/hfs_dbg.h
index ef0423083..f39271fe4 100644
--- a/bsd/hfs/hfs_dbg.h
+++ b/bsd/hfs/hfs_dbg.h
@@ -94,7 +94,7 @@ extern int hfs_dbg_err;
     #if (HFS_DEBUG_STAGE == 4)
 		char		gDebugAssertStr[255];
 		#define DBG_ASSERT(a) { if (!(a)) { \
-				sprintf(gDebugAssertStr,"Oops - File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); \
+				snprintf(gDebugAssertStr, sizeof (gDebugAssertStr), "Oops - File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); \
                 Debugger(gDebugAssertStr); } }
 	#else
 #define DBG_ASSERT(a) { if (!(a)) { panic("File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); } }
diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c
index 4a67567b2..13c9781f8 100644
--- a/bsd/hfs/hfs_encodings.c
+++ b/bsd/hfs/hfs_encodings.c
@@ -239,8 +239,8 @@ hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCoun
 	UniChar uniStr[MAX_HFS_UNICODE_CHARS];
 	ItemCount uniCount;
 	size_t utf8len;
-	u_int8_t pascal_length = 0;
 	hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
+	u_int8_t pascal_length = 0;
 
 	/* 
 	 * Validate the length of the Pascal-style string before passing it
@@ -252,7 +252,7 @@ hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCoun
 		error = EINVAL;
 		return error;
 	}	
-
+	
 	error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
 	
 	if (uniCount == 0)
@@ -292,7 +292,7 @@ mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDst
 		/* invalid string; longer than 31 bytes */
 		error = EINVAL;
 		return error;
-	}	
+	}
 
 	error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
 	
diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c
index 6f840045d..367785b29 100644
--- a/bsd/hfs/hfs_endian.c
+++ b/bsd/hfs/hfs_endian.c
@@ -49,13 +49,14 @@
  * The direction parameter must be kSwapBTNodeBigToHost or kSwapBTNodeHostToBig.
  * The kSwapBTNodeHeaderRecordOnly "direction" is not valid for these routines.
  */
-static int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
-static int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+void hfs_swap_HFSPlusForkData (HFSPlusForkData *src);
 
 /*
  * hfs_swap_HFSPlusForkData
  */
-static void
+void
 hfs_swap_HFSPlusForkData (
     HFSPlusForkData *src
 )
@@ -160,7 +161,7 @@ hfs_swap_BTNode (
 		/*
 		 * Check srcDesc->height.  Don't swap it because it's only one byte.
 		 */
-		if (srcDesc->height > btcb->treeDepth) {
+		if (srcDesc->height > kMaxTreeDepth) {
 			printf("hfs_swap_BTNode: invalid node height (%d)\n", srcDesc->height);
 			error = fsBTInvalidHeaderErr;
 			goto fail;
@@ -314,7 +315,7 @@ hfs_swap_BTNode (
 		/* 
 		 * Check srcDesc->height.  Don't swap it because it's only one byte.
 		 */
-		if (srcDesc->height > btcb->treeDepth) {
+		if (srcDesc->height > kMaxTreeDepth) {
 			panic("hfs_UNswap_BTNode: invalid node height (%d)\n", srcDesc->height);
 			error = fsBTInvalidHeaderErr;
 			goto fail;
@@ -389,7 +390,7 @@ fail:
     return (error);
 }
 
-static int
+int
 hfs_swap_HFSPlusBTInternalNode (
     BlockDescriptor *src,
     HFSCatalogNodeID fileID,
@@ -925,7 +926,7 @@ hfs_swap_HFSPlusBTInternalNode (
     return (0);
 }
 
-static int
+int
 hfs_swap_HFSBTInternalNode (
     BlockDescriptor *src,
     HFSCatalogNodeID fileID,
diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h
index 151cadde7..ae1039a3e 100644
--- a/bsd/hfs/hfs_format.h
+++ b/bsd/hfs/hfs_format.h
@@ -232,6 +232,21 @@ struct FndrOpaqueInfo {
 } __attribute__((aligned(2), packed));
 typedef struct FndrOpaqueInfo FndrOpaqueInfo;
 
+struct FndrExtendedDirInfo {
+	u_int32_t point;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved3;
+	u_int32_t reserved4;
+} __attribute__((aligned(2), packed));
+
+struct FndrExtendedFileInfo {
+	u_int32_t reserved1;
+	u_int32_t date_added;
+	u_int16_t extended_flags;
+	u_int16_t reserved2;
+	u_int32_t reserved3;	
+} __attribute__((aligned(2), packed));
 
 /* HFS Plus Fork data info - 80 bytes */
 struct HFSPlusForkData {
@@ -354,7 +369,11 @@ enum {
 	kHFSHasLinkChainMask	= 0x0020,
 
 	kHFSHasChildLinkBit	= 0x0006,	/* folder has a child that's a dir link */
-	kHFSHasChildLinkMask	= 0x0040
+	kHFSHasChildLinkMask	= 0x0040,
+
+	kHFSHasDateAddedBit = 0x0007,	/* File/Folder has the date-added stored in the finder info. */
+	kHFSHasDateAddedMask = 0x0080 
+
 };
 
 
@@ -577,7 +596,8 @@ enum {
 	 * Therefore, bits 16-31 can only be used on HFS Plus.
 	 */
 	kHFSUnusedNodeFixBit = 31,				/* Unused nodes in the Catalog B-tree have been zero-filled.  See Radar #6947811. */
-	
+	kHFSContentProtectionBit = 30,			/* Volume has per-file content protection */
+
 	kHFSVolumeHardwareLockMask	= 1 << kHFSVolumeHardwareLockBit,
 	kHFSVolumeUnmountedMask		= 1 << kHFSVolumeUnmountedBit,
 	kHFSVolumeSparedBlocksMask	= 1 << kHFSVolumeSparedBlocksBit,
@@ -588,6 +608,7 @@ enum {
 	kHFSVolumeInconsistentMask = 1 << kHFSVolumeInconsistentBit,
 	kHFSVolumeSoftwareLockMask	= 1 << kHFSVolumeSoftwareLockBit,
 	kHFSUnusedNodeFixMask = 1 << kHFSUnusedNodeFixBit,
+	kHFSContentProtectionMask = 1 << kHFSContentProtectionBit,
 	kHFSMDBAttributesMask		= 0x8380
 };
 
diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h
index 7759e799a..7bebee3fb 100644
--- a/bsd/hfs/hfs_fsctl.h
+++ b/bsd/hfs/hfs_fsctl.h
@@ -81,8 +81,8 @@ struct hfs_journal_info {
 #define HFSIOC_BULKACCESS _IOW('h', 9, struct user32_access_t)
 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
 
-#define HFSIOC_SETACLSTATE  _IOW('h', 10, int32_t)
-#define HFS_SETACLSTATE  IOCBASECMD(HFSIOC_SETACLSTATE)
+/* Unsupported - Previously used to enable/disable ACLs */
+#define HFSIOC_UNSUPPORTED  _IOW('h', 10, int32_t)
 
 #define HFSIOC_PREV_LINK  _IOWR('h', 11, u_int32_t)
 #define HFS_PREV_LINK  IOCBASECMD(HFSIOC_PREV_LINK)
@@ -121,7 +121,10 @@ struct hfs_journal_info {
 #define HFSIOC_VOLUME_STATUS  _IOR('h', 24, u_int32_t)
 #define HFS_VOLUME_STATUS  IOCBASECMD(HFSIOC_VOLUME_STATUS)
 
-#endif /* __APPLE_API_UNSTABLE */
+/* Disable metadata zone for given volume */
+#define HFSIOC_DISABLE_METAZONE	_IO('h', 25)
+#define HFS_DISABLE_METAZONE	IOCBASECMD(HFSIOC_DISABLE_METAZONE)
 
+#endif /* __APPLE_API_UNSTABLE */
 
 #endif /* ! _HFS_FSCTL_H_ */
diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c
index ce0fe4dcf..66e273b5d 100644
--- a/bsd/hfs/hfs_hotfiles.c
+++ b/bsd/hfs/hfs_hotfiles.c
@@ -428,7 +428,6 @@ out:
 /*
  * Suspend recording the hotest files on a file system.
  */
-__private_extern__
 int
 hfs_recording_suspend(struct hfsmount *hfsmp)
 {
@@ -511,7 +510,6 @@ out:
 /*
  *
  */
-__private_extern__
 int
 hfs_recording_init(struct hfsmount *hfsmp)
 {
@@ -559,12 +557,17 @@ hfs_recording_init(struct hfsmount *hfsmp)
 			hfsmp->hfc_stage = HFC_IDLE;
 		return (0);
 	}
+
+	if (hfs_start_transaction(hfsmp) != 0) {
+		return EINVAL;
+	}
+
 	error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT);
 	if (error) {
 #if HFC_VERBOSE
 		printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN);
 #endif
-		return (error);
+		goto out2;
 	}
 	/*
 	 * Open the Hot File B-tree file for writing.
@@ -576,7 +579,7 @@ hfs_recording_init(struct hfsmount *hfsmp)
 #if HFC_VERBOSE
 		printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN);
 #endif
-		return (error);
+		goto out2;
 	}
 	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
 	if (iterator == NULL) {
@@ -697,6 +700,7 @@ out0:
 out1:
 	(void) BTScanTerminate(&scanstate, &data, &data, &data);
 out2:	
+	hfs_end_transaction(hfsmp);
 	if (iterator)
 		FREE(iterator, M_TEMP);
 	if (hfsmp->hfc_filevp) {
@@ -712,7 +716,6 @@ out2:
 /*
  * Use sync to perform ocassional background work.
  */
-__private_extern__
 int
 hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
 {
@@ -759,7 +762,6 @@ hfs_hotfilesync(struct hfsmount *hfsmp, vfs_context_t ctx)
  *
  * Note: the cnode is locked on entry.
  */
-__private_extern__
 int
 hfs_addhotfile(struct vnode *vp)
 {
@@ -847,7 +849,6 @@ hfs_addhotfile_internal(struct vnode *vp)
  *
  * Note: the cnode is locked on entry.
  */
-__private_extern__
 int
 hfs_removehotfile(struct vnode *vp)
 {
@@ -1128,7 +1129,7 @@ hotfiles_adopt(struct hfsmount *hfsmp)
 		/*
 		 * Acquire a vnode for this file.
 		 */
-		error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0);
+		error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0, 0);
 		if (error) {
 			if (error == ENOENT) {
 				error = 0;
@@ -1350,7 +1351,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx)
 		/*
 		 * Aquire the vnode for this file.
 		 */
-		error = hfs_vget(hfsmp, key->fileID, &vp, 0);
+		error = hfs_vget(hfsmp, key->fileID, &vp, 0, 0);
 		if (error) {
 			if (error == ENOENT) {
 				goto delete;  /* stale entry, go to next */
@@ -1684,6 +1685,7 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
 	int  error;
 	int  retry = 0;
 	int lockflags;
+	int newvnode_flags = 0;
 
 	*vpp = NULL;
 	p = current_proc();
@@ -1705,7 +1707,8 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp)
 	}
 again:
 	cdesc.cd_flags |= CD_ISMETA;
-	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, &cfork, &vp);
+	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, 
+							&cfork, &vp, &newvnode_flags);
 	if (error) {
 		printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error);
 		cat_releasedesc(&cdesc);
@@ -1757,7 +1760,7 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp)
 
 
 	if (hfsmp->jnl) {
-	    hfs_journal_flush(hfsmp);
+	    hfs_journal_flush(hfsmp, FALSE);
 	}
 
 	if (vnode_get(vp) == 0) {
@@ -1814,6 +1817,11 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent
 	VATTR_SET(&va, va_uid, 0);
 	VATTR_SET(&va, va_gid, 0);
 
+	if (hfs_start_transaction(hfsmp) != 0) {
+	    error = EINVAL;
+	    goto out;
+	} 
+
 	/* call ourselves directly, ignore the higher-level VFS file creation code */
 	error = VNOP_CREATE(dvp, &vp, &cname, &va, ctx);
 	if (error) {
@@ -1941,6 +1949,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent
 		kmem_free(kernel_map, (vm_offset_t)buffer, nodesize);
 	}
 out:
+	hfs_end_transaction(hfsmp);
 	if (dvp) {
 		vnode_put(dvp);
 	}
diff --git a/bsd/hfs/hfs_kdebug.h b/bsd/hfs/hfs_kdebug.h
new file mode 100644
index 000000000..5dd5d6a9c
--- /dev/null
+++ b/bsd/hfs/hfs_kdebug.h
@@ -0,0 +1,54 @@
+#include <sys/kdebug.h>
+
+/*
+ * KERNEL_DEBUG related definitions for HFS.
+ *
+ * NOTE: The Class DBG_FSYSTEM = 3, and Subclass DBG_HFS = 8, so these
+ * debug codes are of the form 0x0308nnnn.
+ */
+#define HFSDBG_CODE(code)	FSDBG_CODE(DBG_HFS, code)
+
+enum {
+	HFSDBG_UNMAP_FREE			= HFSDBG_CODE(0),	/* 0x03080000 */
+	HFSDBG_UNMAP_ALLOC			= HFSDBG_CODE(1),	/* 0x03080004 */
+	HFSDBG_UNMAP_CALLBACK		= HFSDBG_CODE(2),	/* 0x03080008 */
+	/* 0x0308000C is unused */
+	HFSDBG_BLOCK_ALLOCATE		= HFSDBG_CODE(4),	/* 0x03080010 */
+	HFSDBG_BLOCK_DEALLOCATE		= HFSDBG_CODE(5),	/* 0x03080014 */
+	HFSDBG_READ_BITMAP_BLOCK	= HFSDBG_CODE(6),	/* 0x03080018 */
+	HFSDBG_RELEASE_BITMAP_BLOCK	= HFSDBG_CODE(7),	/* 0x0308001C */
+	HFSDBG_ALLOC_CONTIG_BITMAP	= HFSDBG_CODE(8),	/* 0x03080020 */
+	HFSDBG_ALLOC_ANY_BITMAP		= HFSDBG_CODE(9),	/* 0x03080024 */
+	HFSDBG_ALLOC_KNOWN_BITMAP	= HFSDBG_CODE(10),	/* 0x03080028 */
+	HFSDBG_MARK_ALLOC_BITMAP	= HFSDBG_CODE(11),	/* 0x0308002C */
+	HFSDBG_MARK_FREE_BITMAP		= HFSDBG_CODE(12),	/* 0x03080030 */
+	HFSDBG_BLOCK_FIND_CONTIG	= HFSDBG_CODE(13),	/* 0x03080034 */
+	HFSDBG_IS_ALLOCATED			= HFSDBG_CODE(14),	/* 0x03080038 */
+	/* 0x0308003C is unused */
+	HFSDBG_RESET_EXTENT_CACHE	= HFSDBG_CODE(16),	/* 0x03080040 */
+	HFSDBG_REMOVE_EXTENT_CACHE	= HFSDBG_CODE(17),	/* 0x03080044 */
+	HFSDBG_ADD_EXTENT_CACHE		= HFSDBG_CODE(18),	/* 0x03080048 */
+};
+
+/*
+	Parameters logged by the above
+	EVENT CODE					DBG_FUNC_START arg1, arg2, arg3, arg4 ... DBG_FUNC_END arg1, arg2, arg3, arg4
+	---------------------------
+	HFSDBG_UNMAP_CALLBACK		0, extentCount, 0, 0 ... 0, 0, 0, 0
+	HFSDBG_UNMAP_FREE			startBlock, blockCount, 0, 0 ... err, 0, 0, 0
+	HFSDBG_UNMAP_ALLOC			startBlock, blockCount, 0, 0 ... err, 0, 0, 0
+	HFSDBG_REMOVE_EXTENT_CACHE	startBlock, blockCount, 0, 0 ... 0,   0, 0, 0
+	HFSDBG_ADD_EXTENT_CACHE		startBlock, blockCount, 0, 0 ... err, 0, 0, 0
+	HFSDBG_MARK_ALLOC_BITMAP	startBlock, blockCount, 0, 0 ... err, 0, 0, 0
+	HFSDBG_MARK_FREE_BITMAP		startBlock, blockCount, valid, 0 ... err, 0, 0, 0
+	HFSDBG_BLOCK_DEALLOCATE		startBlock, blockCount, flags, 0 ... err, 0, 0, 0
+	HFSDBG_IS_ALLOCATED			startBlock, blockCount, stop, 0 ... err, 0, actualBlockCount, 0
+	HFSDBG_BLOCK_ALLOCATE		startBlock, minBlocks, maxBlocks, flags ... err, actualStartBlock, actualBlockCount, 0
+	HFSDBG_ALLOC_CONTIG_BITMAP	startBlock, minBlocks, maxBlocks, useMeta ... err, actualStartBlock, actualBlockCount, 0
+	HFSDBG_ALLOC_ANY_BITMAP		startBlock, endBlock,  maxBlocks, useMeta ... err, actualStartBlock, actualBlockCount, 0
+	HFSDBG_ALLOC_KNOWN_BITMAP	0,          0,         maxBlocks, 0 ... err, actualStartBlock, actualBlockCount, 0
+	HFSDBG_BLOCK_FIND_CONTIG	startBlock, endBlock, minBlocks, maxBlocks ... err, actualStartBlock, actualBlockCount, 0
+	HFSDBG_READ_BITMAP_BLOCK	startBlock, 0,          0, 0 ... err, 0, 0, 0
+	HFSDBG_RELEASE_BITMAP_BLOCK	dirty, 0, 0, 0 ... 0, 0, 0, 0
+	HFSDBG_RESET_EXTENT_CACHE	0, 0, 0, 0 ... 0, 0, 0, 0
+*/
diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c
index 878c9def0..d24a92011 100644
--- a/bsd/hfs/hfs_link.c
+++ b/bsd/hfs/hfs_link.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,6 +35,7 @@
 #include <sys/vnode.h>
 #include <vfs/vfs_support.h>
 #include <libkern/libkern.h>
+#include <sys/fsctl.h>
 
 #include "hfs.h"
 #include "hfs_catalog.h"
@@ -61,6 +62,8 @@ const char *hfs_private_names[] = {
 static int  setfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t firstlink);
 static int  getfirstlink(struct hfsmount * hfsmp, cnid_t fileid, cnid_t *firstlink);
 
+int hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, 
+		struct cnode *dcp, struct componentname *cnp);
 /*
  * Create a new catalog link record
  *
@@ -92,7 +95,7 @@ createindirectlink(struct hfsmount *hfsmp, u_int32_t linknum, struct cat_desc *d
 	
 	/* Links are matched to inodes by link ID and to volumes by create date */
 	attr.ca_linkref = linknum;
-	attr.ca_itime = hfsmp->hfs_itime;
+	attr.ca_itime = hfsmp->hfs_metadata_createdate;
 	attr.ca_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH;
 	attr.ca_recflags = kHFSHasLinkChainMask | kHFSThreadExistsMask;
 	attr.ca_flags = UF_IMMUTABLE;
@@ -121,13 +124,15 @@ createindirectlink(struct hfsmount *hfsmp, u_int32_t linknum, struct cat_desc *d
 
 /*
  * Make a link to the cnode cp in the directory dp
- * using the name in cnp.
+ * using the name in cnp.  src_vp is the vnode that 
+ * corresponds to 'cp' which was part of the arguments to
+ * hfs_vnop_link.
  *
  * The cnodes cp and dcp must be locked.
  */
-static int
-hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp,
-		struct componentname *cnp)
+int
+hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, 
+		struct cnode *dcp, struct componentname *cnp)
 {
 	vfs_context_t ctx = cnp->cn_context;
 	struct proc *p = vfs_context_proc(ctx);
@@ -291,7 +296,7 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp,
 
 	    /* Update the original first link to point back to the new first link. */
 	    if (cp->c_attr.ca_recflags & kHFSHasLinkChainMask) {
-		(void) cat_updatelink(hfsmp, orig_firstlink, linkcnid, HFS_IGNORABLE_LINK);
+		(void) cat_update_siblinglinks(hfsmp, orig_firstlink, linkcnid, HFS_IGNORABLE_LINK);
 
 		/* Update the inode's first link value. */
 		if (type == DIR_HARDLINKS) {
@@ -327,17 +332,46 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp,
 		    panic("hfs_makelink: cat_update of privdir failed! (%d)\n", retval);
 		}
 		cp->c_flag |= C_HARDLINK;
+
+		/*
+		 * Now we need to mark the vnodes as being hardlinks via the vnode_setmultipath call.
+		 * Note that we're calling vnode_get here, which should simply add an iocount if possible, without
+		 * doing much checking.  It's safe to call this because we are protected by the cnode lock, which
+		 * ensures that anyone trying to reclaim it will block until we release it.  vnode_get will usually 
+		 * give us an extra iocount, unless the vnode is about to be reclaimed (and has no iocounts).  
+		 * In that case, we'd error out, but we'd also not care if we added the VISHARDLINK bit to the vnode.  
+		 * 
+		 * As for the iocount we're about to add, we can't necessarily always call vnode_put here.  
+		 * If the one we add is the only iocount on the vnode, and there was
+		 * sufficient vnode pressure, it could go through VNOP_INACTIVE immediately, which would
+		 * require the cnode lock and cause us to double-lock panic.  We can only call vnode_put if we know
+		 * that the vnode we're operating on is the one with which we came into hfs_vnop_link, because
+		 * that means VFS took an iocount on it for us.  If it's *not* the one that we came into the call 
+		 * with, then mark it as NEED_VNODE_PUT to have hfs_unlock drop it for us.  hfs_vnop_link will 
+		 * unlock the cnode when it is finished.
+		 */
 		if ((vp = cp->c_vp) != NULLVP) {
-		    if (vnode_get(vp) == 0) {
-			vnode_setmultipath(vp);
-			vnode_put(vp);
-		    }
+			if (vnode_get(vp) == 0) {
+				vnode_setmultipath(vp);
+				if (vp == src_vp) {
+					/* we have an iocount on data fork vnode already. */
+					vnode_put(vp);
+				}
+				else {
+					cp->c_flag |= C_NEED_DVNODE_PUT;
+				}
+			}
 		}
 		if ((vp = cp->c_rsrc_vp) != NULLVP) {
-		    if (vnode_get(vp) == 0) {
-			vnode_setmultipath(vp);
-			vnode_put(vp);
-		    }
+			if (vnode_get(vp) == 0) {
+				vnode_setmultipath(vp);
+				if (vp == src_vp) {
+					vnode_put(vp);
+				}
+				else {
+					cp->c_flag |= C_NEED_RVNODE_PUT;
+				}
+			}
 		}
 		cp->c_touch_chgtime = TRUE;
 		cp->c_flag |= C_FORCEUPDATE;
@@ -364,7 +398,6 @@ out:
  *  IN struct componentname  *a_cnp;
  *  IN vfs_context_t  a_context;
  */
-__private_extern__
 int
 hfs_vnop_link(struct vnop_link_args *ap)
 {
@@ -408,7 +441,7 @@ hfs_vnop_link(struct vnop_link_args *ap)
 			return (EPERM);
 		}
 		/* Directory hardlinks also need the parent of the original directory. */
-		if ((error = hfs_vget(hfsmp, hfs_currentparent(VTOC(vp)), &fdvp, 1))) {
+		if ((error = hfs_vget(hfsmp, hfs_currentparent(VTOC(vp)), &fdvp, 1, 0))) {
 			return (error);
 		}
 	} else {
@@ -423,6 +456,10 @@ hfs_vnop_link(struct vnop_link_args *ap)
 		}
 		return (ENOSPC);
 	}
+
+	check_for_tracked_file(vp, VTOC(vp)->c_ctime, NAMESPACE_HANDLER_LINK_CREATE, NULL);
+
+
 	/* Lock the cnodes. */
 	if (fdvp) {
 		if ((error = hfs_lockfour(VTOC(tdvp), VTOC(vp), VTOC(fdvp), NULL, HFS_EXCLUSIVE_LOCK, NULL))) {
@@ -543,7 +580,7 @@ hfs_vnop_link(struct vnop_link_args *ap)
 
 	cp->c_linkcount++;
 	cp->c_touch_chgtime = TRUE;
-	error = hfs_makelink(hfsmp, cp, tdcp, cnp);
+	error = hfs_makelink(hfsmp, vp, cp, tdcp, cnp);
 	if (error) {
 		cp->c_linkcount--;
 		hfs_volupdate(hfsmp, VOL_UPDATE, 0);
@@ -634,7 +671,6 @@ out:
  *
  * Note: dvp and vp cnodes are already locked.
  */
-__private_extern__
 int
 hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int skip_reserve)
 {
@@ -806,11 +842,11 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c
 		}
 		/* Update previous link. */
 		if (prevlinkid) {
-			(void) cat_updatelink(hfsmp, prevlinkid, HFS_IGNORABLE_LINK, nextlinkid);
+			(void) cat_update_siblinglinks(hfsmp, prevlinkid, HFS_IGNORABLE_LINK, nextlinkid);
 		}
 		/* Update next link. */
 		if (nextlinkid) {
-			(void) cat_updatelink(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK);
+			(void) cat_update_siblinglinks(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK);
 		}
 	}
 
@@ -860,7 +896,6 @@ out:
  *
  * This call is assumed to be made during mount.
  */
-__private_extern__
 void
 hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type)
 {
@@ -909,7 +944,7 @@ hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type)
 	}
 
 	/* Grab the root directory so we can update it later. */
-	if (hfs_vget(hfsmp, kRootDirID, &dvp, 0) != 0) {
+	if (hfs_vget(hfsmp, kRootDirID, &dvp, 0, 0) != 0) {
 		goto exit;
 	}
 	dcp = VTOC(dvp);
@@ -965,7 +1000,7 @@ hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type)
 		goto exit;
 	}
 	if (type == FILE_HARDLINKS) {
-		hfsmp->hfs_metadata_createdate = hfsmp->hfs_itime;
+		hfsmp->hfs_metadata_createdate = priv_attrp->ca_itime;
 	}
 	hfs_volupdate(hfsmp, VOL_MKDIR, 1);
 exit:
@@ -985,9 +1020,8 @@ exit:
 /*
  * Lookup a hardlink link (from chain)
  */
-__private_extern__
 int
-hfs_lookuplink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
+hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  cnid_t *nextlinkid)
 {
 	int lockflags;
 	int error;
@@ -997,7 +1031,7 @@ hfs_lookuplink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  c
 
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 
-	error = cat_lookuplinkbyid(hfsmp, linkfileid, prevlinkid, nextlinkid);
+	error = cat_lookup_siblinglinks(hfsmp, linkfileid, prevlinkid, nextlinkid);
 	if (error == ENOLINK) {
 		hfs_systemfile_unlock(hfsmp, lockflags);
 		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c
index c82e68cb4..13cb1aa48 100644
--- a/bsd/hfs/hfs_lookup.c
+++ b/bsd/hfs/hfs_lookup.c
@@ -164,8 +164,10 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int
 	struct cat_attr attr;
 	struct cat_fork fork;
 	int lockflags;
+	int newvnode_flags;
 
   retry:
+	newvnode_flags = 0;
 	dcp = NULL;
 	hfsmp = VTOHFS(dvp);
 	*vpp = NULL;
@@ -227,8 +229,16 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int
 			 * Note: We must drop the parent lock here before calling
 			 * hfs_getnewvnode (which takes the child lock).
 			 */
-		    	hfs_unlock(dcp);
-		    	dcp = NULL;
+			hfs_unlock(dcp);
+			dcp = NULL;
+			
+			/* Verify that the item just looked up isn't one of the hidden directories. */
+			if (desc.cd_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+				desc.cd_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+				retval = ENOENT;
+				goto exit;
+			}
+			
 			goto found;
 		}
 notfound:
@@ -301,37 +311,14 @@ found:
 		 * Directory hard links can have multiple parents so
 		 * find the appropriate parent for the current thread.
 		 */
-		if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp)), &tvp, 0))) {
+		if ((retval = hfs_vget(hfsmp, hfs_currentparent(VTOC(dvp)), &tvp, 0, 0))) {
 			goto exit;
 		}
 		*cnode_locked = 1;
 		*vpp = tvp;
 	} else {
 		int type = (attr.ca_mode & S_IFMT);
-#if NAMEDRSRCFORK
-		int rsrc_warn = 0;
 
-		/*
-		 * Check if caller wants the resource fork but utilized
-		 * the legacy "file/rsrc" access path.
-		 *
-		 * This is deprecated behavior and support for it will not
-		 * be allowed beyond case insensitive HFS+ and even that
-		 * support will be removed in the next major OS release.
-		 */
-		if ((type == S_IFREG) &&
-		    ((flags & ISLASTCN) == 0) &&
-		    (cnp->cn_nameptr[cnp->cn_namelen] == '/') &&
-		    (bcmp(&cnp->cn_nameptr[cnp->cn_namelen+1], "rsrc", 5) == 0) &&
-		    ((hfsmp->hfs_flags & (HFS_STANDARD | HFS_CASE_SENSITIVE)) == 0)) {
-		
-			cnp->cn_consume = 5;
-			cnp->cn_flags |= CN_WANTSRSRCFORK | ISLASTCN | NOCACHE;
-			cnp->cn_flags &= ~MAKEENTRY;
-			flags |= ISLASTCN;
-			rsrc_warn = 1;
-		}
-#endif
 		if (!(flags & ISLASTCN) && (type != S_IFDIR) && (type != S_IFLNK)) {
 			retval = ENOTDIR;
 			goto exit;
@@ -344,22 +331,65 @@ found:
 		if (cnp->cn_namelen != desc.cd_namelen)
 			cnp->cn_flags &= ~MAKEENTRY;
 
-		retval = hfs_getnewvnode(hfsmp, dvp, cnp, &desc, 0, &attr, &fork, &tvp);
+		retval = hfs_getnewvnode(hfsmp, dvp, cnp, &desc, 0, &attr, &fork, &tvp, &newvnode_flags);
 
 		if (retval) {
 			/*
-			 * If this was a create operation lookup and another
-			 * process removed the object before we had a chance
-			 * to create the vnode, then just treat it as the not
-			 * found case above and return EJUSTRETURN.
-			 * We should do the same for the RENAME operation since we are
-			 * going to write it in regardless.
-			 */
+			 * If this was a create/rename operation lookup, then by this point
+			 * we expected to see the item returned from hfs_getnewvnode above.  
+			 * In the create case, it would probably eventually bubble out an EEXIST 
+			 * because the item existed when we were trying to create it.  In the 
+			 * rename case, it would let us know that we need to go ahead and 
+			 * delete it as part of the rename.  However, if we hit the condition below
+			 * then it means that we found the element during cat_lookup above, but 
+			 * it is now no longer there.  We simply behave as though we never found
+			 * the element at all and return EJUSTRETURN.
+			 */  
 			if ((retval == ENOENT) &&
-			    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
-			    (flags & ISLASTCN)) {
+					((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
+					(flags & ISLASTCN)) {
 				retval = EJUSTRETURN;
 			}
+			
+			/*
+			 * If this was a straight lookup operation, we may need to redrive the entire 
+			 * lookup starting from cat_lookup if the element was deleted as the result of 
+			 * a rename operation.  Since rename is supposed to guarantee atomicity, then
+			 * lookups cannot fail because the underlying element is deleted as a result of
+			 * the rename call -- either they returned the looked up element prior to rename
+			 * or return the newer element.  If we are in this region, then all we can do is add
+			 * workarounds to guarantee the latter case. The element has already been deleted, so
+			 * we just re-try the lookup to ensure the caller gets the most recent element.
+			 */
+			if ((retval == ENOENT) && (cnp->cn_nameiop == LOOKUP) &&
+				(newvnode_flags & (GNV_CHASH_RENAMED | GNV_CAT_DELETED))) {
+				if (dcp) {
+					hfs_unlock (dcp);
+				}
+				/* get rid of any name buffers that may have lingered from the cat_lookup call */
+				cat_releasedesc (&desc);
+				goto retry;
+			}
+
+			/* Also, re-drive the lookup if the item we looked up was a hardlink, and the number 
+			 * or name of hardlinks has changed in the interim between the cat_lookup above, and
+			 * our call to hfs_getnewvnode.  hfs_getnewvnode will validate the cattr we passed it
+			 * against what is actually in the catalog after the cnode is created.  If there were
+			 * any issues, it will bubble out ERECYCLE, which we need to swallow and use as the
+			 * key to redrive as well.  We need to special case this below because in this case, 
+			 * it needs to occur regardless of the type of lookup we're doing here.  
+			 */
+			if ((retval == ERECYCLE) && (newvnode_flags & GNV_CAT_ATTRCHANGED)) {
+				if (dcp) {
+					hfs_unlock (dcp);
+				}
+				/* get rid of any name buffers that may have lingered from the cat_lookup call */
+				cat_releasedesc (&desc);
+				retval = 0;
+				goto retry;
+			}
+
+			/* skip to the error-handling code if we can't retry */
 			goto exit;
 		}
 
@@ -375,15 +405,6 @@ found:
 		}
 		*cnode_locked = 1;
 		*vpp = tvp;
-#if NAMEDRSRCFORK
-		if (rsrc_warn) {
-			if ((VTOC(tvp)->c_flag & C_WARNED_RSRC) == 0) {
-				VTOC(tvp)->c_flag |= C_WARNED_RSRC;
-				printf("hfs: %.200s: file access by '/rsrc' was deprecated in 10.4\n",
-				       cnp->cn_nameptr);
-			}
-		}
-#endif
 	}
 exit:
 	if (dcp) {
@@ -415,7 +436,6 @@ exit:
 
 #define	S_IXALL	0000111
 
-__private_extern__
 int
 hfs_vnop_lookup(struct vnop_lookup_args *ap)
 {
@@ -423,6 +443,7 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
 	struct vnode *vp;
 	struct cnode *cp;
 	struct cnode *dcp;
+	struct hfsmount *hfsmp;
 	int error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
@@ -431,6 +452,8 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
 
 	*vpp = NULL;
 	dcp = VTOC(dvp);
+	
+	hfsmp = VTOHFS(dvp);
 
 	/*
 	 * Lookup an entry in the cache
@@ -455,14 +478,24 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
 	 */
 	error = 0;
 	vp = *vpp;
-
+	cp = VTOC(vp);
+	
+	/* We aren't allowed to vend out vp's via lookup to the hidden directory */
+	if (cp->c_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
+		cp->c_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
+		/* Drop the iocount from cache_lookup */
+		vnode_put (vp);
+		error = ENOENT;
+		goto exit;
+	}
+	
+	
 	/*
 	 * If this is a hard-link vnode then we need to update
 	 * the name (of the link), the parent ID, the cnid, the
 	 * text encoding and the catalog hint.  This enables
 	 * getattrlist calls to return the correct link info.
 	 */
-	cp = VTOC(vp);
 
 	if ((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) {
 		hfs_lock(cp, HFS_FORCE_LOCK);
@@ -501,33 +534,7 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
 		}
 		hfs_unlock(cp);
 	}
-#if NAMEDRSRCFORK
-	/*
-	 * Check if caller wants the resource fork but utilized
-	 * the legacy "file/rsrc" access path.
-	 *
-	 * This is deprecated behavior and support for it will not
-	 * be allowed beyond case insensitive HFS+ and even that
-	 * support will be removed in the next major OS release.
-	 */
-	if ((dvp != vp) &&
-	    ((flags & ISLASTCN) == 0) &&
-	    vnode_isreg(vp) &&
-	    (cnp->cn_nameptr[cnp->cn_namelen] == '/') &&
-	    (bcmp(&cnp->cn_nameptr[cnp->cn_namelen+1], "rsrc", 5) == 0) &&
-	    ((VTOHFS(vp)->hfs_flags & (HFS_STANDARD | HFS_CASE_SENSITIVE)) == 0)) {		
-		cnp->cn_consume = 5;
-		cnp->cn_flags |= CN_WANTSRSRCFORK | ISLASTCN | NOCACHE;
-		cnp->cn_flags &= ~MAKEENTRY;
 
-		hfs_lock(cp, HFS_FORCE_LOCK);
-		if ((cp->c_flag & C_WARNED_RSRC) == 0) {
-			cp->c_flag |= C_WARNED_RSRC;
-			printf("hfs: %.200s: file access by '/rsrc' was deprecated in 10.4\n", cnp->cn_nameptr);
-		}
-		hfs_unlock(cp);
-	}
-#endif
 	return (error);
 	
 lookup:
diff --git a/bsd/hfs/hfs_mount.h b/bsd/hfs/hfs_mount.h
index 5782bd6f6..ca4f8703f 100644
--- a/bsd/hfs/hfs_mount.h
+++ b/bsd/hfs/hfs_mount.h
@@ -79,6 +79,7 @@ struct hfs_mount_args {
 #define HFS_GET_JOURNAL_INFO    0x6a6e6c69
 #define HFS_SET_PKG_EXTENSIONS  0x121031
 #define HFS_REPLAY_JOURNAL	0x6a6e6c72
+#define HFS_ENABLE_RESIZE_DEBUG 4	/* enable debug code for volume resizing */
 
 #endif /* __APPLE_API_UNSTABLE */
 
diff --git a/bsd/hfs/hfs_notification.c b/bsd/hfs/hfs_notification.c
index 517c8ecdc..227e744b2 100644
--- a/bsd/hfs/hfs_notification.c
+++ b/bsd/hfs/hfs_notification.c
@@ -32,7 +32,9 @@
 #include <sys/dirent.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
+#include <sys/mount_internal.h>
 #include <sys/vnode.h>
+#include <sys/vnode_internal.h>
 #include <sys/malloc.h>
 #include <sys/ubc.h>
 #include <sys/quota.h>
@@ -71,10 +73,24 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp)
 	}
 
 	if (state == 2 && !(hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK)) {
+		/* Dump some logging to track down intermittent issues */
+		printf("HFS: Very Low Disk: freeblks: %d, dangerlimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_dangerlimit);
+#if HFS_SPARSE_DEV
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			if (hfsmp->hfs_backingfs_rootvp) {
+				struct mount *mp = vnode_mount (hfsmp->hfs_backingfs_rootvp);
+				/* If we're a sparse device, dump some info about the backing store... */
+				if (mp) {
+					printf("HFS: Very Low Disk: backingstore b_avail %lld, tag %d\n", mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag);
+				}
+			}
+		}
+#endif
 		hfsmp->hfs_notification_conditions |= (VQ_VERYLOWDISK|VQ_LOWDISK);
 		vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
 	} else if (state == 1) {
 		if (!(hfsmp->hfs_notification_conditions & VQ_LOWDISK)) {
+			printf("HFS: Low Disk: freeblks: %d, warninglimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_warninglimit);
 			hfsmp->hfs_notification_conditions |= VQ_LOWDISK;
 			vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL);
 		} else if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) {
diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c
index 9fcd6a02d..27901f5de 100644
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -90,8 +90,7 @@ static int  do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip,
 
 
 int flush_cache_on_write = 0;
-SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
-
+SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
 
 /*
  * Read data from a file.
@@ -109,6 +108,7 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	off_t start_resid = uio_resid(uio);
 	off_t offset = uio_offset(uio);
 	int retval = 0;
+	int took_truncate_lock = 0;
 
 	/* Preflight checks */
 	if (!vnode_isreg(vp)) {
@@ -147,6 +147,14 @@ hfs_vnop_read(struct vnop_read_args *ap)
 			}
 			/* otherwise the file was converted back to a regular file while we were reading it */
 			retval = 0;
+		} else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+			int error;
+			
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+
 		}
 	}
 #endif /* HFS_COMPRESSION */
@@ -155,8 +163,15 @@ hfs_vnop_read(struct vnop_read_args *ap)
 	fp = VTOF(vp);
 	hfsmp = VTOHFS(vp);
 
+#if CONFIG_PROTECT
+	if ((retval = cp_handle_vnop (cp, CP_READ_ACCESS)) != 0) {
+		goto exit;
+	}
+#endif
+
 	/* Protect against a size change. */
-	hfs_lock_truncate(cp, 0);
+	hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+	took_truncate_lock = 1;
 
 	filesize = fp->ff_size;
 	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
@@ -209,7 +224,10 @@ hfs_vnop_read(struct vnop_read_args *ap)
 			hfs_unlock(cp);
 	}
 exit:
-	hfs_unlock_truncate(cp, 0);
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, 0);
+	}
+
 	return (retval);
 }
 
@@ -238,7 +256,9 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	int lockflags;
 	int cnode_locked = 0;
 	int partialwrite = 0;
-	int exclusive_lock = 0;
+	int do_snapshot = 1;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
+	int took_truncate_lock = 0;
 
 #if HFS_COMPRESSION
 	if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
@@ -247,23 +267,34 @@ hfs_vnop_write(struct vnop_write_args *ap)
 			case FILE_IS_COMPRESSED:
 				return EACCES;
 			case FILE_IS_CONVERTING:
-				/* if FILE_IS_CONVERTING, we allow writes */
+				/* if FILE_IS_CONVERTING, we allow writes but do not
+				   bother with snapshots or else we will deadlock.
+				*/
+				do_snapshot = 0;
 				break;
 			default:
 				printf("invalid state %d for compressed file\n", state);
 				/* fall through */
 		}
+	} else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
+		int error;
+		
+		error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
+		if (error != 0) {
+			return error;
+		}
 	}
+
+	if (do_snapshot) {
+		check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
+	}
+
 #endif
 
 	// LP64todo - fix this! uio_resid may be 64-bit value
 	resid = uio_resid(uio);
 	offset = uio_offset(uio);
 
-	if (ioflag & IO_APPEND) {
-	    exclusive_lock = 1;
-	}
-	
 	if (offset < 0)
 		return (EINVAL);
 	if (resid == 0)
@@ -275,8 +306,14 @@ hfs_vnop_write(struct vnop_write_args *ap)
 	fp = VTOF(vp);
 	hfsmp = VTOHFS(vp);
 
+#if CONFIG_PROTECT
+	if ((retval = cp_handle_vnop (cp, CP_WRITE_ACCESS)) != 0) {
+		goto exit;
+	}
+#endif
+
 	eflags = kEFDeferMask;	/* defer file block allocations */
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
 	/* 
 	 * When the underlying device is sparse and space
 	 * is low (< 8MB), stop doing delayed allocations
@@ -291,8 +328,15 @@ hfs_vnop_write(struct vnop_write_args *ap)
 
 again:
 	/* Protect against a size change. */
-	hfs_lock_truncate(cp, exclusive_lock);
+	if (ioflag & IO_APPEND) {
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+	}	
+	else {
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK);
+	}
+	took_truncate_lock = 1;
 
+	/* Update UIO */
 	if (ioflag & IO_APPEND) {
 		uio_setoffset(uio, fp->ff_size);
 		offset = fp->ff_size;
@@ -313,13 +357,16 @@ again:
 	 * grab the truncate lock exclusive even if we're not allocating new blocks
 	 * because we could still be growing past the LEOF.
 	 */
-	if ((exclusive_lock == 0) && 
+	if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
 	    ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
-	    	exclusive_lock = 1;
 		/* Lock upgrade failed and we lost our shared lock, try again */
 		if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
 			goto again;
 		} 
+		else {
+			/* Store the owner in the c_truncatelockowner field if we successfully upgrade */
+			cp->c_truncatelockowner = current_thread();  
+		}
 	}
 
 	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
@@ -327,7 +374,7 @@ again:
 	}
 	cnode_locked = 1;
 	
-	if (!exclusive_lock) {
+	if (cp->c_truncatelockowner == HFS_SHARED_OWNER) {
 		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 		             (int)offset, uio_resid(uio), (int)fp->ff_size,
 		             (int)filebytes, 0);
@@ -632,7 +679,10 @@ ioerr_exit:
 exit:
 	if (cnode_locked)
 		hfs_unlock(cp);
-	hfs_unlock_truncate(cp, exclusive_lock);
+	
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, 0);
+	}
 	return (retval);
 }
 
@@ -1004,7 +1054,7 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF
 	    struct vnode *vp;
 
 	    /* get the vnode for this cnid */
-	    myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
+	    myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
 	    if ( myErr ) {
 		myResult = 0;
 		goto ExitThisRoutine;
@@ -1027,21 +1077,19 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF
 	    }
 	} else {
 	    unsigned int flags;
-		   
-	    myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
-		cnattr.ca_mode, hfsmp->hfs_mp,
-		myp_ucred, theProcPtr);
+		int mode = cnattr.ca_mode & S_IFMT;   
+		myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
 
-	    if (cnattr.ca_mode & S_IFDIR) {
-		flags = R_OK | X_OK;
-	    } else {
-		flags = R_OK;
-	    }
-	    if ( (myPerms & flags) != flags) {
-		myResult = 0;
-		myErr = EACCES;
-		goto ExitThisRoutine;   /* no access */
-	    }
+		if (mode == S_IFDIR) {
+			flags = R_OK | X_OK;
+		} else {
+			flags = R_OK;
+		}
+		if ( (myPerms & flags) != flags) {
+			myResult = 0;
+			myErr = EACCES;
+			goto ExitThisRoutine;   /* no access */
+		}
 
 	    /* up the hierarchy we go */
 	    thisNodeID = catkey.hfsPlus.parentID;
@@ -1284,7 +1332,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
 		struct vnode *cvp;
 		int myErr = 0;
 		/* get the vnode for this cnid */
-		myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
+		myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
 		if ( myErr ) {
 		    access[i] = myErr;
 		    continue;
@@ -1432,6 +1480,15 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 
 	is64bit = proc_is64bit(p);
 
+#if CONFIG_PROTECT
+	{
+		int error = 0;
+		if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+			return error;
+		}
+	}
+#endif /* CONFIG_PROTECT */
+
 	switch (ap->a_command) {
 
 	case HFS_GETPATH:
@@ -1491,7 +1548,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 		if (linkfileid < kHFSFirstUserCatalogNodeID) {
 			return (EINVAL);
 		}
-		if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
+		if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
 			return (error);
 		}
 		if (ap->a_command == HFS_NEXT_LINK) {
@@ -1591,7 +1648,7 @@ fail_change_next_allocation:
 		return (error);
 	}
 
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
 	case HFS_SETBACKINGSTOREINFO: {
 		struct vnode * bsfs_rootvp;
 		struct vnode * di_vp;
@@ -1641,7 +1698,17 @@ fail_change_next_allocation:
 		vnode_put(bsfs_rootvp);
 
 		hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
+
 		hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+		/* The free extent cache is managed differently for sparse devices.  
+		 * There is a window between which the volume is mounted and the 
+		 * device is marked as sparse, so the free extent cache for this 
+		 * volume is currently initialized as normal volume (sorted by block 
+		 * count).  Reset the cache so that it will be rebuilt again 
+		 * for sparse device (sorted by start block).
+		 */
+		ResetVCBFreeExtCache(hfsmp);
+
 		hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 		hfsmp->hfs_sparsebandblks *= 4;
 
@@ -1717,14 +1784,18 @@ fail_change_next_allocation:
 		// note: can't do this after taking the lock as it will
 		// deadlock against ourselves.
 		vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
-		hfs_global_exclusive_lock_acquire(hfsmp);
+		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 
 		// DO NOT call hfs_journal_flush() because that takes a
 		// shared lock on the global exclusive lock!
-		journal_flush(hfsmp->jnl);
+		journal_flush(hfsmp->jnl, TRUE);
 
 		// don't need to iterate on all vnodes, we just need to
 		// wait for writes to the system files and the device vnode
+		//
+		// Now that journal flush waits for all metadata blocks to 
+		// be written out, waiting for btree writes is probably no
+		// longer required.
 		if (HFSTOVCB(hfsmp)->extentsRefNum)
 		    vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
 		if (HFSTOVCB(hfsmp)->catalogRefNum)
@@ -1756,7 +1827,7 @@ fail_change_next_allocation:
 		//       code that "thaws" the fs in hfs_vnop_close()
 		//
 		hfsmp->hfs_freezing_proc = NULL;
-		hfs_global_exclusive_lock_release(hfsmp);
+		hfs_unlock_global (hfsmp);
 		lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
 
 		return (0);
@@ -1794,30 +1865,6 @@ fail_change_next_allocation:
 	    return do_bulk_access_check(hfsmp, vp, ap, size, context);
 	} 
 
-	case HFS_SETACLSTATE: {
-		int state;
-
-		if (ap->a_data == NULL) {
-			return (EINVAL);
-		}
-
-		vfsp = vfs_statfs(HFSTOVFS(hfsmp));
-		state = *(int *)ap->a_data;
-
-		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
-			return (EROFS);
-		}
-		// super-user can enable or disable acl's on a volume.
-		// the volume owner can only enable acl's
-		if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
-			return (EPERM);
-		}
-		if (state == 0 || state == 1)
-			return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
-		else
-			return (EINVAL);	
-	}
-
 	case HFS_SET_XATTREXTENTS_STATE: {
 		int state;
 
@@ -1833,6 +1880,9 @@ fail_change_next_allocation:
 
 		/* Super-user can enable or disable extent-based extended 
 		 * attribute support on a volume 
+		 * Note: Starting Mac OS X 10.7, extent-based extended attributes
+		 * are enabled by default, so any change will be transient only 
+		 * till the volume is remounted.
 		 */
 		if (!is_suser()) {
 			return (EPERM);
@@ -1891,7 +1941,7 @@ fail_change_next_allocation:
 		fp = VTOF(vp);
 
 		/* Protect against a size change. */
-		hfs_lock_truncate(VTOC(vp), TRUE);
+		hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 
 #if HFS_COMPRESSION
 		if (compressed && (uncompressed_size == -1)) {
@@ -1910,7 +1960,7 @@ fail_change_next_allocation:
 			error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
 		}
 
-		hfs_unlock_truncate(VTOC(vp), TRUE);
+		hfs_unlock_truncate(VTOC(vp), 0);
 		return (error);
 	}
 
@@ -1934,21 +1984,22 @@ fail_change_next_allocation:
 		 * to a user_fbootstraptransfer_t else we get a pointer to a 
 		 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
 		 */
-		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+		if ((hfsmp->hfs_flags & HFS_READ_ONLY)
+			&& (ap->a_command == F_WRITEBOOTSTRAP)) {
 			return (EROFS);
 		}
 		if (is64bit) {
 			user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
 		}
 		else {
-	    	user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
+			user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
 			user_bootstrapp = &user_bootstrap;
 			user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
 			user_bootstrap.fbt_length = bootstrapp->fbt_length;
 			user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
 		}
 
-		if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
+		if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) || 
 				(user_bootstrapp->fbt_length > 1024)) {
 			return EINVAL;
 		}
@@ -1956,7 +2007,7 @@ fail_change_next_allocation:
 		if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
 			return EINVAL;
 	    
-	    devvp = VTOHFS(vp)->hfs_devvp;
+		devvp = VTOHFS(vp)->hfs_devvp;
 		auio = uio_create(1, user_bootstrapp->fbt_offset, 
 						  is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
 						  (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
@@ -2116,6 +2167,21 @@ fail_change_next_allocation:
 	    break;
 	}    
 
+	case HFS_DISABLE_METAZONE: {
+		/* Only root can disable metadata zone */
+		if (!is_suser()) {
+			return EACCES;
+		}
+		if (hfsmp->hfs_flags & HFS_READ_ONLY) {
+			return (EROFS);
+		}
+
+		/* Disable metadata zone now */
+		(void) hfs_metadatazone_init(hfsmp, true);
+		printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
+		break;
+	}
+	
 	default:
 		return (ENOTTY);
 	}
@@ -2541,8 +2607,32 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap)
 {
 	buf_t	bp = ap->a_bp;
 	vnode_t	vp = buf_vnode(bp);
+	int error = 0;
+	
+#if CONFIG_PROTECT
+	cnode_t *cp = NULL; 
+	
+	if ((cp = cp_get_protected_cnode(vp)) != NULL) {
+		/*
+		 * Some paths to hfs_vnop_strategy will take the cnode lock, 
+		 * and some won't. But since content protection is only enabled
+		 * for files that (a) aren't system files and (b) are regular 
+		 * files, any valid cnode here will be unlocked.
+		 */
+		hfs_lock(cp, HFS_SHARED_LOCK);
+		buf_setcpaddr(bp, cp->c_cpentry);
+	}
+#endif /* CONFIG_PROTECT */
+	
+	error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
 
-	return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
+#if CONFIG_PROTECT
+	if (cp) {
+		hfs_unlock(cp);
+	}
+#endif
+	
+	return error;
 }
 
 static int 
@@ -2556,7 +2646,7 @@ hfs_minorupdate(struct vnode *vp) {
 	return 0;
 }
 
-static int
+int
 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
 {
 	register struct cnode *cp = VTOC(vp);
@@ -2801,8 +2891,8 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c
 					lockflags |= SFL_EXTENTS;
 				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 
-				retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
-						(FCB*)fp, length, false));
+				retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, 
+													 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
 
 				hfs_systemfile_unlock(hfsmp, lockflags);
 			}
@@ -2860,13 +2950,201 @@ Err_Exit:
 	return (retval);
 }
 
+/*
+ * Preparation which must be done prior to deleting the catalog record
+ * of a file or directory.  In order to make the on-disk as safe as possible,
+ * we remove the catalog entry before releasing the bitmap blocks and the 
+ * overflow extent records.  However, some work must be done prior to deleting
+ * the catalog record.
+ * 
+ * When calling this function, the cnode must exist both in memory and on-disk.
+ * If there are both resource fork and data fork vnodes, this function should
+ * be called on both.  
+ */
+
+int
+hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
+	
+	struct filefork *fp = VTOF(vp);
+	struct cnode *cp = VTOC(vp);
+	int retval = 0;
+	
+	/* Cannot truncate an HFS directory! */
+	if (vnode_isdir(vp)) {
+		return (EISDIR);
+	}
+	
+	/* 
+	 * See the comment below in hfs_truncate for why we need to call 
+	 * setsize here.  Essentially we want to avoid pending IO if we 
+	 * already know that the blocks are going to be released here.
+	 * This function is only called when totally removing all storage for a file, so
+	 * we can take a shortcut and immediately setsize (0);
+	 */
+	ubc_setsize(vp, 0);
+	
+	/* This should only happen with a corrupt filesystem */
+	if ((off_t)fp->ff_size < 0)
+		return (EINVAL);
+	
+	/* 
+	 * We cannot just check if fp->ff_size == length (as an optimization)
+	 * since there may be extra physical blocks that also need truncation.
+	 */
+#if QUOTA
+	if ((retval = hfs_getinoquota(cp))) {
+		return(retval);
+	}
+#endif /* QUOTA */
+	
+	/* Wipe out any invalid ranges which have yet to be backed by disk */
+	rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
+	
+	/* 
+	 * Account for any unmapped blocks. Since we're deleting the 
+	 * entire file, we don't have to worry about just shrinking
+	 * to a smaller number of borrowed blocks.
+	 */
+	if (fp->ff_unallocblocks > 0) {
+		u_int32_t loanedBlocks;
+		
+		HFS_MOUNT_LOCK(hfsmp, TRUE);
+		
+		loanedBlocks = fp->ff_unallocblocks;
+		cp->c_blocks -= loanedBlocks;
+		fp->ff_blocks -= loanedBlocks;
+		fp->ff_unallocblocks = 0;
+		
+		hfsmp->loanedBlocks -= loanedBlocks;
+		
+		HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+	}
+	
+	return 0;
+}
+
+
+/*
+ * Special wrapper around calling TruncateFileC.  This function is useable
+ * even when the catalog record does not exist any longer, making it ideal
+ * for use when deleting a file.  The simplification here is that we know 
+ * that we are releasing all blocks.
+ *
+ * The caller is responsible for saving off a copy of the filefork(s)
+ * embedded within the cnode prior to calling this function.  The pointers
+ * supplied as arguments must be valid even if the cnode is no longer valid.
+ */
+
+int 
+hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, 
+					 struct filefork *rsrcfork, u_int32_t fileid) {
+	
+	off_t filebytes;
+	u_int32_t fileblocks;
+	int blksize = 0;
+	int error = 0;
+	int lockflags;
+	
+	blksize = hfsmp->blockSize;
+	
+	/* Data Fork */
+	if (datafork->ff_blocks > 0) {
+		fileblocks = datafork->ff_blocks;
+		filebytes = (off_t)fileblocks * (off_t)blksize;		
+		
+		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+		
+		while (filebytes > 0) {
+			if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
+				filebytes -= HFS_BIGFILE_SIZE;
+			} else {
+				filebytes = 0;
+			}
+			
+			/* Start a transaction, and wipe out as many blocks as we can in this iteration */
+			if (hfs_start_transaction(hfsmp) != 0) {
+				error = EINVAL;
+				break;
+			}
+			
+			if (datafork->ff_unallocblocks == 0) {
+				/* Protect extents b-tree and allocation bitmap */
+				lockflags = SFL_BITMAP;
+				if (overflow_extents(datafork))
+					lockflags |= SFL_EXTENTS;
+				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+				
+				error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
+				
+				hfs_systemfile_unlock(hfsmp, lockflags);
+			}
+			if (error == 0) {
+				datafork->ff_size = filebytes;
+			}
+			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			
+			/* Finish the transaction and start over if necessary */
+			hfs_end_transaction(hfsmp);
+			
+			if (error) {
+				break;
+			}
+		}
+	}
+	
+	/* Resource fork */
+	if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
+		fileblocks = rsrcfork->ff_blocks;
+		filebytes = (off_t)fileblocks * (off_t)blksize;
+		
+		/* We killed invalid ranges and loaned blocks before we removed the catalog entry */
+		
+		while (filebytes > 0) {
+			if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
+				filebytes -= HFS_BIGFILE_SIZE;
+			} else {
+				filebytes = 0;
+			}
+			
+			/* Start a transaction, and wipe out as many blocks as we can in this iteration */
+			if (hfs_start_transaction(hfsmp) != 0) {
+				error = EINVAL;
+				break;
+			}
+			
+			if (rsrcfork->ff_unallocblocks == 0) {
+				/* Protect extents b-tree and allocation bitmap */
+				lockflags = SFL_BITMAP;
+				if (overflow_extents(rsrcfork))
+					lockflags |= SFL_EXTENTS;
+				lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+				
+				error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
+				
+				hfs_systemfile_unlock(hfsmp, lockflags);
+			}
+			if (error == 0) {
+				rsrcfork->ff_size = filebytes;
+			}
+			(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
+			
+			/* Finish the transaction and start over if necessary */
+			hfs_end_transaction(hfsmp);			
+			
+			if (error) {
+				break;
+			}
+		}
+	}
+	
+	return error;
+}
 
 
 /*
  * Truncate a cnode to at most length size, freeing (or adding) the
  * disk blocks.
  */
-__private_extern__
 int
 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
              int skipupdate, vfs_context_t context)
@@ -2980,6 +3258,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 	struct hfsmount *hfsmp;
 	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
 	int lockflags;
+	time_t orig_ctime;
 
 	*(ap->a_bytesallocated) = 0;
 
@@ -2990,7 +3269,11 @@ hfs_vnop_allocate(struct vnop_allocate_args /* {
 	
 	cp = VTOC(vp);
 
-	hfs_lock_truncate(cp, TRUE);
+	orig_ctime = VTOC(vp)->c_ctime;
+
+	check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
+	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 
 	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
 		goto Err_Exit;
@@ -3181,7 +3464,7 @@ Std_Exit:
 	if (retval == 0)
 		retval = retval2;
 Err_Exit:
-	hfs_unlock_truncate(cp, TRUE);
+	hfs_unlock_truncate(cp, 0);
 	hfs_unlock(cp);
 	return (retval);
 }
@@ -3204,74 +3487,298 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap)
 	};
 */
 {
-	vnode_t vp = ap->a_vp;
-	int error;
+	vnode_t 	vp;
+	struct cnode	*cp;
+	struct filefork *fp;
+	int		error = 0;
+	upl_t 		upl;
+	upl_page_info_t	*pl;
+	off_t		f_offset;
+	int		offset;
+	int		isize; 
+	int		pg_index;
+	boolean_t	truncate_lock_held = FALSE;
+	boolean_t 	file_converted = FALSE;
+	kern_return_t	kret;
+	
+	vp = ap->a_vp;
+	cp = VTOC(vp);
+	fp = VTOF(vp);
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != 0) {
+		return error;
+	}
+#endif /* CONFIG_PROTECT */
+
+	if (ap->a_pl != NULL) {
+		/*
+		 * this can only happen for swap files now that
+		 * we're asking for V2 paging behavior...
+		 * so don't need to worry about decompression, or
+		 * keeping track of blocks read or taking the truncate lock
+		 */
+		error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
+				       ap->a_size, (off_t)fp->ff_size, ap->a_flags);
+		goto pagein_done;
+	}
+
+retry_pagein:
+	/*
+	 * take truncate lock (shared/recursive) to guard against 
+	 * zero-fill thru fsync interfering, but only for v2
+	 *
+	 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the 
+	 * lock shared and we are allowed to recurse 1 level if this thread already
+	 * owns the lock exclusively... this can legally occur
+	 * if we are doing a shrinking ftruncate against a file
+	 * that is mapped private, and the pages being truncated
+	 * do not currently exist in the cache... in that case
+	 * we will have to page-in the missing pages in order
+	 * to provide them to the private mapping... we must
+	 * also call hfs_unlock_truncate with a postive been_recursed 
+	 * arg to indicate that if we have recursed, there is no need to drop
+	 * the lock.  Allowing this simple recursion is necessary
+	 * in order to avoid a certain deadlock... since the ftruncate
+	 * already holds the truncate lock exclusively, if we try
+	 * to acquire it shared to protect the pagein path, we will
+	 * hang this thread
+	 *
+	 * NOTE: The if () block below is a workaround in order to prevent a 
+	 * VM deadlock. See rdar://7853471.
+	 * 
+	 * If we are in a forced unmount, then launchd will still have the 
+	 * dyld_shared_cache file mapped as it is trying to reboot.  If we 
+	 * take the truncate lock here to service a page fault, then our 
+	 * thread could deadlock with the forced-unmount.  The forced unmount 
+	 * thread will try to reclaim the dyld_shared_cache vnode, but since it's 
+	 * marked C_DELETED, it will call ubc_setsize(0).  As a result, the unmount 
+	 * thread will think it needs to copy all of the data out of the file 
+	 * and into a VM copy object.  If we hold the cnode lock here, then that 
+	 * VM operation will not be able to proceed, because we'll set a busy page 
+	 * before attempting to grab the lock.  Note that this isn't as simple as "don't
+	 * call ubc_setsize" because doing that would just shift the problem to the
+	 * ubc_msync done before the vnode is reclaimed.
+	 *
+	 * So, if a forced unmount on this volume is in flight AND the cnode is 
+	 * marked C_DELETED, then just go ahead and do the page in without taking 
+	 * the lock (thus suspending pagein_v2 semantics temporarily).  Since it's on a file
+	 * that is not going to be available on the next mount, this seems like a 
+	 * OK solution from a correctness point of view, even though it is hacky.
+	 */
+	if (vfs_isforce(vp->v_mount)) {
+		if (cp->c_flag & C_DELETED) {
+			/* If we don't get it, then just go ahead and operate without the lock */
+			truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK);
+		}
+	}
+	else {
+		hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
+		truncate_lock_held = TRUE;
+	}
+
+	kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); 
+
+	if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
+		error = EINVAL;
+		goto pagein_done;
+	}
+	isize = ap->a_size;
+
+	/* 
+	 * Scan from the back to find the last page in the UPL, so that we 
+	 * aren't looking at a UPL that may have already been freed by the
+	 * preceding aborts/completions.
+	 */ 
+	for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
+		if (upl_page_present(pl, --pg_index))
+			break;
+		if (pg_index == 0) {
+			/*
+			 * no absent pages were found in the range specified
+			 * just abort the UPL to get rid of it and then we're done
+			 */
+			ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
+			goto pagein_done;
+		}
+	}
+	/* 
+	 * initialize the offset variables before we touch the UPL.
+	 * f_offset is the position into the file, in bytes
+	 * offset is the position into the UPL, in bytes
+	 * pg_index is the pg# of the UPL we're operating on
+	 * isize is the offset into the UPL of the last page that is present. 
+	 */
+	isize = ((pg_index + 1) * PAGE_SIZE);	
+	pg_index = 0;
+	offset = 0;
+	f_offset = ap->a_f_offset;
+
+	while (isize) {
+		int  xsize;
+		int  num_of_pages;
+
+		if ( !upl_page_present(pl, pg_index)) {
+			/*
+			 * we asked for RET_ONLY_ABSENT, so it's possible
+			 * to get back empty slots in the UPL.
+			 * just skip over them
+			 */
+			f_offset += PAGE_SIZE;
+			offset   += PAGE_SIZE;
+			isize    -= PAGE_SIZE;
+			pg_index++;
+
+			continue;
+		}
+		/* 
+		 * We know that we have at least one absent page.
+		 * Now checking to see how many in a row we have
+		 */
+		num_of_pages = 1;
+		xsize = isize - PAGE_SIZE;
+
+		while (xsize) {
+			if ( !upl_page_present(pl, pg_index + num_of_pages))
+				break;
+			num_of_pages++;
+			xsize -= PAGE_SIZE;
+		}
+		xsize = num_of_pages * PAGE_SIZE;
 
 #if HFS_COMPRESSION
-	if (VNODE_IS_RSRC(vp)) {
-		/* allow pageins of the resource fork */
-	} else {
-		int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
-		if (compressed) {
-			error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+		if (VNODE_IS_RSRC(vp)) {
+			/* allow pageins of the resource fork */
+		} else {
+			int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+
 			if (compressed) {
-				if (error == 0) {
-					/* successful page-in, update the access time */
-					VTOC(vp)->c_touch_acctime = TRUE;
+				if (truncate_lock_held) {
+					/*
+					 * can't hold the truncate lock when calling into the decmpfs layer
+					 * since it calls back into this layer... even though we're only
+					 * holding the lock in shared mode, and the re-entrant path only
+					 * takes the lock shared, we can deadlock if some other thread
+					 * tries to grab the lock exclusively in between.
+					 */
+					hfs_unlock_truncate(cp, 1);
+					truncate_lock_held = FALSE;
+				}
+				ap->a_pl = upl;
+				ap->a_pl_offset = offset;
+				ap->a_f_offset = f_offset;
+				ap->a_size = xsize;
+
+				error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
+				/*
+				 * note that decpfs_pagein_compressed can change the state of
+				 * 'compressed'... it will set it to 0 if the file is no longer
+				 * compressed once the compression lock is successfully taken
+				 * i.e. we would block on that lock while the file is being inflated
+				 */
+				if (compressed) {
+					if (error == 0) {
+						/* successful page-in, update the access time */
+						VTOC(vp)->c_touch_acctime = TRUE;
 					
-					/* compressed files are not hot file candidates */
-					if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
-						VTOF(vp)->ff_bytesread = 0;
+						/* compressed files are not hot file candidates */
+						if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
+							fp->ff_bytesread = 0;
+						}
+					} else if (error == EAGAIN) {
+						/*
+						 * EAGAIN indicates someone else already holds the compression lock...
+						 * to avoid deadlocking, we'll abort this range of pages with an
+						 * indication that the pagein needs to be redriven
+						 */
+			        		ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
 					}
+					goto pagein_next_range;
+				}
+				else {
+					/* 
+					 * Set file_converted only if the file became decompressed while we were
+					 * paging in.  If it were still compressed, we would re-start the loop using the goto
+					 * in the above block.  This avoid us overloading truncate_lock_held as our retry_pagein
+					 * condition below, since we could have avoided taking the truncate lock to prevent
+					 * a deadlock in the force unmount case.
+					 */
+					file_converted = TRUE;
 				}
-				return error;
 			}
-			/* otherwise the file was converted back to a regular file while we were reading it */
+			if (file_converted == TRUE) {
+				/*
+				 * the file was converted back to a regular file after we first saw it as compressed
+				 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
+				 * reset a_size so that we consider what remains of the original request
+				 * and null out a_upl and a_pl_offset.
+				 *
+				 * We should only be able to get into this block if the decmpfs_pagein_compressed 
+				 * successfully decompressed the range in question for this file.
+				 */
+				ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
+
+				ap->a_size = isize;
+				ap->a_pl = NULL;
+				ap->a_pl_offset = 0;
+
+				/* Reset file_converted back to false so that we don't infinite-loop. */
+				file_converted = FALSE;
+				goto retry_pagein;
+			}
 		}
-	}
 #endif
+		error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
 
-	error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-	                       ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
-	/*
-	 * Keep track of blocks read.
-	 */
-	if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
-		struct cnode *cp;
-		struct filefork *fp;
-		int bytesread;
-		int took_cnode_lock = 0;
+		/*
+		 * Keep track of blocks read.
+		 */
+		if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
+			int bytesread;
+			int took_cnode_lock = 0;
 		
-		cp = VTOC(vp);
-		fp = VTOF(vp);
+			if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
+				bytesread = fp->ff_size;
+			else
+				bytesread = xsize;
 
-		if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
-			bytesread = fp->ff_size;
-		else
-			bytesread = ap->a_size;
+			/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
+			if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
+				hfs_lock(cp, HFS_FORCE_LOCK);
+				took_cnode_lock = 1;
+			}
+			/*
+			 * If this file hasn't been seen since the start of
+			 * the current sampling period then start over.
+			 */
+			if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
+				struct timeval tv;
 
-		/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
-		if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
-			hfs_lock(cp, HFS_FORCE_LOCK);
-			took_cnode_lock = 1;
+				fp->ff_bytesread = bytesread;
+				microtime(&tv);
+				cp->c_atime = tv.tv_sec;
+			} else {
+				fp->ff_bytesread += bytesread;
+			}
+			cp->c_touch_acctime = TRUE;
+			if (took_cnode_lock)
+				hfs_unlock(cp);
 		}
-		/*
-		 * If this file hasn't been seen since the start of
-		 * the current sampling period then start over.
-		 */
-		if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
-			struct timeval tv;
+pagein_next_range:
+		f_offset += xsize;
+		offset   += xsize;
+		isize    -= xsize;
+		pg_index += num_of_pages;
 
-			fp->ff_bytesread = bytesread;
-			microtime(&tv);
-			cp->c_atime = tv.tv_sec;
-		} else {
-			fp->ff_bytesread += bytesread;
-		}
-		cp->c_touch_acctime = TRUE;
-		if (took_cnode_lock)
-			hfs_unlock(cp);
+		error = 0;
 	}
+
+pagein_done:
+	if (truncate_lock_held == TRUE) {
+		/* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
+		hfs_unlock_truncate(cp, 1);
+	}
+
 	return (error);
 }
 
@@ -3338,7 +3845,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 		 * take truncate lock (shared) to guard against 
 		 * zero-fill thru fsync interfering, but only for v2 
 		 */
-		hfs_lock_truncate(cp, 0);
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK);
 
 		if (a_flags & UPL_MSYNC) {
 			request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
@@ -3346,6 +3853,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
 		else {
 			request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
 		}
+		
 		kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); 
 
 		if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
@@ -3649,7 +4157,6 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
  *
  * During step 3 page-ins to the file get suspended.
  */
-__private_extern__
 int
 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	struct  proc *p)
@@ -3685,6 +4192,22 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 	fp = VTOF(vp);
 	if (fp->ff_unallocblocks)
 		return (EINVAL);
+
+#if CONFIG_PROTECT
+	/* 
+	 * <rdar://problem/9118426>
+	 * Disable HFS file relocation on content-protected filesystems
+	 */
+	if (cp_fs_protected (hfsmp->hfs_mp)) {
+		return EINVAL;
+	}
+#endif
+
+	/* If it's an SSD, also disable HFS relocation */
+	if (hfsmp->hfs_flags & HFS_SSD) {
+		return EINVAL;
+	}
+
 	blksize = hfsmp->blockSize;
 	if (blockHint == 0)
 		blockHint = hfsmp->nextAllocation;
@@ -3707,15 +4230,15 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 
 	if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
 		hfs_unlock(cp);
-		hfs_lock_truncate(cp, TRUE);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 		/* Force lock since callers expects lock to be held. */
 		if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 			return (retval);
 		}
 		/* No need to continue if file was removed. */
 		if (cp->c_flag & C_NOEXISTS) {
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 			return (ENOENT);
 		}
 		took_trunc_lock = 1;
@@ -3730,7 +4253,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 
 	if (hfs_start_transaction(hfsmp) != 0) {
 		if (took_trunc_lock)
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 	    return (EINVAL);
 	}
 	started_tr = 1;
@@ -3850,7 +4373,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
 		goto restore;
 out:
 	if (took_trunc_lock)
-		hfs_unlock_truncate(cp, TRUE);
+		hfs_unlock_truncate(cp, 0);
 
 	if (lockflags) {
 		hfs_systemfile_unlock(hfsmp, lockflags);
@@ -3876,7 +4399,7 @@ exit:
 restore:
 	if (fp->ff_blocks == headblks) {
 		if (took_trunc_lock)
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 		goto exit;
 	}
 	/*
@@ -3889,13 +4412,14 @@ restore:
 		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 	}
 
-	(void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
+	(void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp), 
+						 FTOC(fp)->c_fileid, false);
 
 	hfs_systemfile_unlock(hfsmp, lockflags);
 	lockflags = 0;
 
 	if (took_trunc_lock)
-		hfs_unlock_truncate(cp, TRUE);
+		hfs_unlock_truncate(cp, 0);
 	goto exit;
 }
 
@@ -3954,10 +4478,19 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
 	iosize = bufsize = MIN(copysize, 128 * 1024);
 	offset = 0;
 
+	hfs_unlock(VTOC(vp));
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+		hfs_lock(VTOC(vp), HFS_FORCE_LOCK);	
+		return (error);
+	}
+#endif /* CONFIG_PROTECT */
+
 	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+		hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
 		return (ENOMEM);
-	}	
-	hfs_unlock(VTOC(vp));
+	}
 
 	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
 
diff --git a/bsd/hfs/hfs_search.c b/bsd/hfs/hfs_search.c
index 6a8a8b74f..878c70dc5 100644
--- a/bsd/hfs/hfs_search.c
+++ b/bsd/hfs/hfs_search.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1997-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -48,6 +48,7 @@
 #include <sys/utfconv.h>
 #include <sys/kauth.h>
 #include <sys/vnode_internal.h>
+#include <sys/mount_internal.h>
 
 #if CONFIG_MACF
 #include <security/mac_framework.h>
@@ -154,7 +155,6 @@ vnop_searchfs {
 };
 */
 
-__private_extern__
 int
 hfs_vnop_search(ap)
 	struct vnop_searchfs_args *ap; /*
@@ -186,7 +186,6 @@ hfs_vnop_search(ap)
 	struct proc *p = current_proc();
 	int err = E_NONE;
 	int isHFSPlus;
-	int timerExpired = false;
 	CatalogKey * myCurrentKeyPtr;
 	CatalogRecord * myCurrentDataPtr;
 	CatPosition * myCatPositionPtr;
@@ -195,6 +194,9 @@ hfs_vnop_search(ap)
 	user_size_t user_len = 0;
 	int32_t searchTime;
 	int lockflags;
+	struct uthread	*ut;
+	boolean_t timerExpired = FALSE;
+	boolean_t needThrottle = FALSE;
 
 	/* XXX Parameter check a_searchattrs? */
 
@@ -307,7 +309,7 @@ hfs_vnop_search(ap)
 		(void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p);
 		if (hfsmp->jnl) {
 		    hfs_systemfile_unlock(hfsmp, lockflags);
-		    hfs_journal_flush(hfsmp);
+		    hfs_journal_flush(hfsmp, FALSE);
 		    lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 		}
 
@@ -336,6 +338,8 @@ hfs_vnop_search(ap)
 	if (err)
 		goto ExitThisRoutine;
 
+	if (throttle_get_io_policy(&ut) == IOPOL_THROTTLE)
+		needThrottle = TRUE;
 	/*
 	 * Check all the catalog btree records...
 	 *   return the attributes for matching items
@@ -373,18 +377,24 @@ hfs_vnop_search(ap)
 			if (*(ap->a_nummatches) >= ap->a_maxmatches)
 				break;
 		}
-
-		/*
-		 * Check our elapsed time and bail if we've hit the max.
-		 * The idea here is to throttle the amount of time we
-		 * spend in the kernel.
-		 */
-		microuptime(&myCurrentTime);
-		timersub(&myCurrentTime, &myBTScanState.startTime, &myElapsedTime);
-		/* Note: assumes kMaxMicroSecsInKernel is less than 1,000,000 */
-		if (myElapsedTime.tv_sec > 0
-		||  myElapsedTime.tv_usec >= searchTime) {
-			timerExpired = true;
+		if (timerExpired == FALSE) {
+			/*
+			 * Check our elapsed time and bail if we've hit the max.
+			 * The idea here is to throttle the amount of time we
+			 * spend in the kernel.
+			 */
+			microuptime(&myCurrentTime);
+			timersub(&myCurrentTime, &myBTScanState.startTime, &myElapsedTime);
+			/*
+			 * Note: assumes kMaxMicroSecsInKernel is less than 1,000,000
+			 */
+			if (myElapsedTime.tv_sec > 0
+			    ||  myElapsedTime.tv_usec >= searchTime) {
+				timerExpired = TRUE;
+			} else if (needThrottle == TRUE) {
+				if (throttle_io_will_be_throttled(ut->uu_lowpri_window, HFSTOVFS(hfsmp)))
+					timerExpired = TRUE;
+			}
 		}
 	}
 
@@ -436,12 +446,12 @@ ResolveHardlink(struct hfsmount *hfsmp, HFSPlusCatalogFile *recp)
 	filecreatedate = to_bsd_time(recp->createDate);
 
 	if ((type == kHardLinkFileType && creator == kHFSPlusCreator) &&
-	    (filecreatedate == (time_t)hfsmp->vcbCrDate ||
+	    (filecreatedate == (time_t)hfsmp->hfs_itime ||
 	     filecreatedate == (time_t)hfsmp->hfs_metadata_createdate)) {
 		isfilelink = 1;
 	} else if ((type == kHFSAliasType && creator == kHFSAliasCreator) &&
 	           (recp->flags & kHFSHasLinkChainMask) &&
-	           (filecreatedate == (time_t)hfsmp->vcbCrDate ||
+	           (filecreatedate == (time_t)hfsmp->hfs_itime ||
 	            filecreatedate == (time_t)hfsmp->hfs_metadata_createdate)) {
 		isdirlink = 1;
 	}
@@ -556,7 +566,7 @@ CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, st
 		cnode_t *	cp;
 		
 		/* now go get catalog data for this directory */
-		myErr = hfs_vget(hfsmp, myNodeID, &vp, 0);
+		myErr = hfs_vget(hfsmp, myNodeID, &vp, 0, 0);
 		if ( myErr ) {
 			goto ExitThisRoutine;	/* no access */
 		}
diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c
index 7a049916f..1a1ca2aa4 100644
--- a/bsd/hfs/hfs_vfsops.c
+++ b/bsd/hfs/hfs_vfsops.c
@@ -87,6 +87,7 @@
 #include <sys/utfconv.h>
 #include <sys/kdebug.h>
 #include <sys/fslog.h>
+#include <sys/ubc.h>
 
 #include <kern/locks.h>
 
@@ -109,6 +110,16 @@
 #include "hfscommon/headers/FileMgrInternal.h"
 #include "hfscommon/headers/BTreesInternal.h"
 
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+
+#if CONFIG_HFS_ALLOC_RBTREE
+#include "hfscommon/headers/HybridAllocator.h"
+#endif
+
+#define HFS_MOUNT_DEBUG 1
+
 #if	HFS_DIAGNOSTIC
 int hfs_dbg_all = 0;
 int hfs_dbg_err = 0;
@@ -121,6 +132,7 @@ lck_grp_attr_t *  hfs_group_attr;
 lck_attr_t *  hfs_lock_attr;
 lck_grp_t *  hfs_mutex_group;
 lck_grp_t *  hfs_rwlock_group;
+lck_grp_t *  hfs_spinlock_group;
 
 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
@@ -134,29 +146,30 @@ static int hfs_flushfiles(struct mount *, int, struct proc *);
 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
 static int hfs_init(struct vfsconf *vfsp);
-static int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
-static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
-static int hfs_reload(struct mount *mp);
 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
-static int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
-static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
-static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
-                      user_addr_t newp, size_t newlen, vfs_context_t context);
-static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
-
-static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context);
-static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID);
+static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
+static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
 
+void hfs_initialize_allocator (struct hfsmount *hfsmp);
+int hfs_teardown_allocator (struct hfsmount *hfsmp);
+
+int hfs_mount(struct mount *mp, vnode_t  devvp, user_addr_t data, vfs_context_t context);
+int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
+int hfs_reload(struct mount *mp);
+int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
+int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
+int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
+                      user_addr_t newp, size_t newlen, vfs_context_t context);
+int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
 
 /*
  * Called by vfs_mountroot when mounting HFS Plus as root.
  */
 
-__private_extern__
 int
 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 {
@@ -165,8 +178,13 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
 	struct vfsstatfs *vfsp;
 	int error;
 
-	if ((error = hfs_mountfs(rvp, mp, NULL, 0, context)))
+	if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n", 
+					error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
+		}
 		return (error);
+	}
 
 	/* Init hfsmp */
 	hfsmp = VFSTOHFS(mp);
@@ -194,7 +212,7 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
  * mount system call
  */
 
-static int
+int
 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
 {
 	struct proc *p = vfs_context_proc(context);
@@ -204,6 +222,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 	u_int32_t cmdflags;
 
 	if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mount: copyin returned %d for fs\n", retval);
+		}
 		return (retval);
 	}
 	cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
@@ -212,10 +233,19 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 
 		/* Reload incore data after an fsck. */
 		if (cmdflags & MNT_RELOAD) {
-			if (vfs_isrdonly(mp))
-				return hfs_reload(mp);
-			else
+			if (vfs_isrdonly(mp)) {
+				int error = hfs_reload(mp);
+				if (error && HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
+				}
+				return error;
+			}
+			else {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
+				}
 				return (EINVAL);
+			}
 		}
 
 		/* Change to a read-only file system. */
@@ -227,16 +257,19 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 			 * is in progress and therefore block any further 
 			 * modifications to the file system.
 			 */
-			hfs_global_exclusive_lock_acquire(hfsmp);
+			hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 			hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
 			hfsmp->hfs_downgrading_proc = current_thread();
-			hfs_global_exclusive_lock_release(hfsmp);
+			hfs_unlock_global (hfsmp);
 
 			/* use VFS_SYNC to push out System (btree) files */
 			retval = VFS_SYNC(mp, MNT_WAIT, context);
 			if (retval && ((cmdflags & MNT_FORCE) == 0)) {
 				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 				hfsmp->hfs_downgrading_proc = NULL;
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
+				}
 				goto out;
 			}
 		
@@ -247,6 +280,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 			if ((retval = hfs_flushfiles(mp, flags, p))) {
 				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 				hfsmp->hfs_downgrading_proc = NULL;
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
+				}
 				goto out;
 			}
 
@@ -266,13 +302,16 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 				}
 			}
 			if (retval) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
 				hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
 				hfsmp->hfs_downgrading_proc = NULL;
 				hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 				goto out;
 			}
 			if (hfsmp->jnl) {
-			    hfs_global_exclusive_lock_acquire(hfsmp);
+				hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 
 			    journal_close(hfsmp->jnl);
 			    hfsmp->jnl = NULL;
@@ -281,14 +320,20 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 			    //       access to the jvp because we may need
 			    //       it later if we go back to being read-write.
 
-			    hfs_global_exclusive_lock_release(hfsmp);
+				hfs_unlock_global (hfsmp);
 			}
 
+#if CONFIG_HFS_ALLOC_RBTREE
+			(void) hfs_teardown_allocator(hfsmp);
+#endif						
 			hfsmp->hfs_downgrading_proc = NULL;
 		}
 
 		/* Change to a writable file system. */
 		if (vfs_iswriteupgrade(mp)) {
+#if CONFIG_HFS_ALLOC_RBTREE
+				thread_t allocator_thread;
+#endif
 
 			/*
 			 * On inconsistent disks, do not allow read-write mount
@@ -296,6 +341,9 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 			 */
 			if (!(vfs_flags(mp) & MNT_ROOTFS) &&
 					(hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n",  (hfsmp->vcbVN));
+				}
 				retval = EINVAL;
 				goto out;
 			}
@@ -310,39 +358,52 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 
 			    if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
 					jflags = JOURNAL_RESET;
-			    } else {
+				} else {
 					jflags = 0;
-			    }
-			    
-			    hfs_global_exclusive_lock_acquire(hfsmp);
-
-			    hfsmp->jnl = journal_open(hfsmp->jvp,
-						      (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
-						      hfsmp->jnl_size,
-						      hfsmp->hfs_devvp,
-						      hfsmp->hfs_logical_block_size,
-						      jflags,
-						      0,
-						      hfs_sync_metadata, hfsmp->hfs_mp);
-
-			    hfs_global_exclusive_lock_release(hfsmp);
-
-			    if (hfsmp->jnl == NULL) {
-				retval = EINVAL;
-				goto out;
-			    } else {
-				hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
-			    }
+				}
+
+				hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
+				hfsmp->jnl = journal_open(hfsmp->jvp,
+						(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
+						hfsmp->jnl_size,
+						hfsmp->hfs_devvp,
+						hfsmp->hfs_logical_block_size,
+						jflags,
+						0,
+						hfs_sync_metadata, hfsmp->hfs_mp);
+				
+				/*
+				 * Set up the trim callback function so that we can add
+				 * recently freed extents to the free extent cache once
+				 * the transaction that freed them is written to the
+				 * journal on disk.
+				 */
+				if (hfsmp->jnl)
+					journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
+				
+				hfs_unlock_global (hfsmp);
+
+				if (hfsmp->jnl == NULL) {
+					if (HFS_MOUNT_DEBUG) {
+						printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
+					}
+					retval = EINVAL;
+					goto out;
+				} else {
+					hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
+				}
 
 			}
 
 			/* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
 			retval = hfs_erase_unused_nodes(hfsmp);
-			if (retval != E_NONE)
+			if (retval != E_NONE) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
 				goto out;
-			
-			/* Only clear HFS_READ_ONLY after a successful write */
-			hfsmp->hfs_flags &= ~HFS_READ_ONLY;
+			}
 
 			/* If this mount point was downgraded from read-write 
 			 * to read-only, clear that information as we are now 
@@ -355,8 +416,16 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 			hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 
 			retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
-			if (retval != E_NONE)
+			if (retval != E_NONE) {
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
+				}
 				goto out;
+			}
+		
+			/* Only clear HFS_READ_ONLY after a successful write */
+			hfsmp->hfs_flags &= ~HFS_READ_ONLY;
+
 
 			if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
 				/* Setup private/hidden directories for hardlinks. */
@@ -368,8 +437,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 				/*
 				 * Allow hot file clustering if conditions allow.
 				 */
-				if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
-				    ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
+				if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && 
+						((hfsmp->hfs_flags & HFS_SSD) == 0)) {
 					(void) hfs_recording_init(hfsmp);
 				}
 				/* Force ACLs on HFS+ file systems. */
@@ -377,10 +446,45 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 					vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 				}
 			}
+
+#if CONFIG_HFS_ALLOC_RBTREE
+			/* 
+			 * Like the normal mount case, we need to handle creation of the allocation red-black tree
+			 * if we're upgrading from read-only to read-write.  
+			 *
+			 * We spawn a thread to create the pair of red-black trees for this volume.
+			 * However, in so doing, we must be careful to ensure that if this thread is still
+			 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
+			 * we'll need to set a bit that indicates we're in progress building the trees here.  
+			 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
+			 * notifies the tree generation code that an unmount is waiting.  Also, mark the extent
+			 * tree flags that the allocator is enabled for use before we spawn the thread that will start
+			 * scanning the RB tree.			 
+			 *
+			 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
+			 * which has not previously encountered a bad error on the red-black tree code.  Also, don't
+			 * try to re-build a tree that already exists. 
+			 */
+			
+			if (hfsmp->extent_tree_flags == 0) {
+				hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
+				/* Initialize EOF counter so that the thread can assume it started at initial values */
+				hfsmp->offset_block_end = 0;
+				
+				InitTree(hfsmp);
+				
+				kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
+				thread_deallocate(allocator_thread);
+			}
+
+#endif
 		}
 
 		/* Update file system parameters. */
 		retval = hfs_changefs(mp, &args);
+		if (retval &&  HFS_MOUNT_DEBUG) {
+			printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
+		}
 
 	} else /* not an update request */ {
 
@@ -388,6 +492,44 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
 		vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
 
 		retval = hfs_mountfs(devvp, mp, &args, 0, context);
+		if (retval && HFS_MOUNT_DEBUG) {
+			printf("hfs_mount: hfs_mountfs returned %d\n", retval);
+		}
+#if CONFIG_PROTECT
+		/* 
+		 * If above mount call was successful, and this mount is content protection 
+		 * enabled, then verify the on-disk EA on the root to ensure that the filesystem 
+		 * is of a suitable vintage to allow the mount to proceed.  
+		 */
+		if ((retval == 0) && (cp_fs_protected (mp))) {
+			int err = 0;
+			struct cp_root_xattr xattr;
+			bzero (&xattr, sizeof(struct cp_root_xattr));
+			hfsmp = vfs_fsprivate(mp);
+
+			/* go get the EA to get the version information */
+			err = cp_getrootxattr (hfsmp, &xattr);
+			/* If there was no EA there, then write one out. */
+			if (err == ENOATTR) {
+				bzero(&xattr, sizeof(struct cp_root_xattr));
+				xattr.major_version = CP_CURRENT_MAJOR_VERS;
+				xattr.minor_version = CP_CURRENT_MINOR_VERS;
+				xattr.flags = 0;
+
+				err = cp_setrootxattr (hfsmp, &xattr);
+			}	
+			/* 
+			 * For any other error, including having an out of date CP version in the
+			 * EA, or for an error out of cp_setrootxattr, deny the mount 
+			 * and do not proceed further.
+			 */
+			if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS)  {
+				/* Deny the mount and tear down. */
+				retval = EPERM;
+				(void) hfs_unmount (mp, MNT_FORCE, context);
+			}	
+		}				  
+#endif
 	}
 out:
 	if (retval == 0) {
@@ -629,7 +771,7 @@ hfs_reload_callback(struct vnode *vp, void *cargs)
 	/*
 	 * Re-read cnode data for all active vnodes (non-metadata files).
 	 */
-	if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp)) {
+	if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
 	        struct cat_fork *datafork;
 		struct cat_desc desc;
 
@@ -663,7 +805,7 @@ hfs_reload_callback(struct vnode *vp, void *cargs)
  *	re-load B-tree header data.
  *	re-read cnode data for all active vnodes.
  */
-static int
+int
 hfs_reload(struct mount *mountp)
 {
 	register struct vnode *devvp;
@@ -877,7 +1019,7 @@ hfs_syncer(void *arg0, void *unused)
 	    }
 
 	    if (hfsmp->jnl) {
-		    journal_flush(hfsmp->jnl);
+		    journal_flush(hfsmp->jnl, FALSE);
 	    } else {
 		    hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
 	    }
@@ -918,11 +1060,11 @@ hfs_syncer(void *arg0, void *unused)
 	    // now.  Else we defer the sync and reschedule it.
 	    //
 	    if (hfsmp->jnl) {
-		    lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+			hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
 
-		    journal_flush(hfsmp->jnl);
+		    journal_flush(hfsmp->jnl, FALSE);
 
-		    lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+			hfs_unlock_global (hfsmp);
 	    } else {
 		    hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
 	    }
@@ -957,10 +1099,119 @@ hfs_syncer(void *arg0, void *unused)
 
 extern int IOBSDIsMediaEjectable( const char *cdev_name );
 
+/*
+ * Initialization code for Red-Black Tree Allocator
+ * 
+ * This function will build the two red-black trees necessary for allocating space
+ * from the metadata zone as well as normal allocations.  Currently, we use 
+ * an advisory read to get most of the data into the buffer cache. 
+ * This function is intended to be run in a separate thread so as not to slow down mount.
+ * 
+ */
+
+void 
+hfs_initialize_allocator (struct hfsmount *hfsmp) {
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	u_int32_t err;
+	
+	/*
+	 * Take the allocation file lock.  Journal transactions will block until
+	 * we're done here. 
+	 */
+	int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	/*
+	 * GenerateTree assumes that the bitmap lock is held when you call the function.
+	 * It will drop and re-acquire the lock periodically as needed to let other allocations 
+	 * through.  It returns with the bitmap lock held. Since we only maintain one tree,
+	 * we don't need to specify a start block (always starts at 0).
+	 */
+	err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
+	if (err) {
+		goto bailout;
+	}
+	/* Mark offset tree as built */
+	hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
+	
+bailout:
+	/* 
+	 * GenerateTree may drop the bitmap lock during operation in order to give other
+	 * threads a chance to allocate blocks, but it will always return with the lock held, so
+	 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
+	 */
+	hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
+	if (err != 0) {
+		/* Wakeup any waiters on the allocation bitmap lock */
+		wakeup((caddr_t)&hfsmp->extent_tree_flags);
+	}
+	
+	hfs_systemfile_unlock(hfsmp, flags);
+#else
+#pragma unused (hfsmp)
+#endif
+}
+
+
+/* 
+ * Teardown code for the Red-Black Tree allocator. 
+ * This function consolidates the code which serializes with respect
+ * to a thread that may be potentially still building the tree when we need to begin 
+ * tearing it down.   Since the red-black tree may not be live when we enter this function
+ * we return:
+ *		1 -> Tree was live.
+ *		0 -> Tree was not active at time of call.
+ */
+
+int 
+hfs_teardown_allocator (struct hfsmount *hfsmp) {
+	int rb_used = 0;
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	
+	int flags = 0;
+	
+	/* 
+	 * Check to see if the tree-generation is still on-going.
+	 * If it is, then block until it's done.
+	 */
+	
+	flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
+	
+	while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
+		hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
+		
+		lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE, 
+					 &hfsmp->extent_tree_flags, THREAD_UNINT);
+	}
+	
+	if (hfs_isrbtree_active (hfsmp)) {
+		rb_used = 1;
+	
+		/* Tear down the RB Trees while we have the bitmap locked */
+		DestroyTrees(hfsmp);
+
+	}
+
+	hfs_systemfile_unlock(hfsmp, flags);
+#else
+	#pragma unused (hfsmp)
+#endif
+	return rb_used;
+	
+}
+
+
+static int hfs_root_unmounted_cleanly = 0;
+
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
+
 /*
  * Common code for mount and mountroot
  */
-static int
+int
 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
             int journal_replay_only, vfs_context_t context)
 {
@@ -985,7 +1236,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	daddr64_t mdb_offset;
 	int isvirtual = 0;
 	int isroot = 0;
-	u_int32_t device_features = 0;
+	int isssd;
+#if CONFIG_HFS_ALLOC_RBTREE
+	thread_t allocator_thread;
+#endif
 	
 	if (args == NULL) {
 		/* only hfs_mountroot passes us NULL as the 'args' argument */
@@ -1007,6 +1261,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 
 	/* Get the logical block size (treated as physical block size everywhere) */
 	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
+		}
 		retval = ENXIO;
 		goto error_exit;
 	}
@@ -1020,6 +1277,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
 	if (retval) {
 		if ((retval != ENOTSUP) && (retval != ENOTTY)) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
+			}
 			retval = ENXIO;
 			goto error_exit;
 		}
@@ -1039,6 +1299,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 		u_int32_t size512 = 512;
 
 		if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
+			}
 			retval = ENXIO;
 			goto error_exit;
 		}
@@ -1047,7 +1310,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
 		/* resetting block size may fail if getting block count did */
 		(void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
-
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
+		}
 		retval = ENXIO;
 		goto error_exit;
 	}
@@ -1083,11 +1348,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	/* Now switch to our preferred physical block size. */
 	if (log_blksize > 512) {
 		if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
+			}
 			retval = ENXIO;
 			goto error_exit;
 		}
 		/* Get the count of physical blocks. */
 		if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
+			}
 			retval = ENXIO;
 			goto error_exit;
 		}
@@ -1103,11 +1374,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	if ((retval = (int)buf_meta_bread(devvp, 
 				HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), 
 				phys_blksize, cred, &bp))) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
+		}
 		goto error_exit;
 	}
 	MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
 	if (mdbp == NULL) {
 		retval = ENOMEM;
+		if (HFS_MOUNT_DEBUG) { 
+			printf("hfs_mountfs: MALLOC failed\n");
+		}
 		goto error_exit;
 	}
 	bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
@@ -1116,25 +1393,27 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 
 	MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
 	if (hfsmp == NULL) {
+		if (HFS_MOUNT_DEBUG) { 
+			printf("hfs_mountfs: MALLOC (2) failed\n");
+		}
 		retval = ENOMEM;
 		goto error_exit;
 	}
 	bzero(hfsmp, sizeof(struct hfsmount));
 	
 	hfs_chashinit_finish(hfsmp);
-	
+
 	/*
-	 * See if the disk supports unmap (trim).
-	 *
-	 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
-	 * returned by vfs_ioattr.  We need to call VNOP_IOCTL ourselves.
+	 * See if the disk is a solid state device.  We need this to decide what to do about 
+	 * hotfiles.
 	 */
-	if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
-		if (device_features & DK_FEATURE_UNMAP) {
-			hfsmp->hfs_flags |= HFS_UNMAP;
+	if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
+		if (isssd) {
+			hfsmp->hfs_flags |= HFS_SSD;
 		}
 	}
-	
+
+
 	/*
 	 *  Init the volume information structure
 	 */
@@ -1143,7 +1422,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
 	lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
 	lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
-
+	lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
+	
 	vfs_setfsprivate(mp, hfsmp);
 	hfsmp->hfs_mp = mp;			/* Make VFSTOHFS work */
 	hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
@@ -1216,6 +1496,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 			retval = EROFS;
 			goto error_exit;
 		}
+
+		printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
+
 		/* Treat it as if it's read-only and not writeable */
 		hfsmp->hfs_flags |= HFS_READ_ONLY;
 		hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
@@ -1287,11 +1570,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 				log_blksize = 512;
 				if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
 				    (caddr_t)&log_blksize, FWRITE, context)) {
+
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
+					}				
 					retval = ENXIO;
 					goto error_exit;
 				}
 				if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
 				    (caddr_t)&log_blkcnt, 0, context)) {
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
+					}
 					retval = ENXIO;
 					goto error_exit;
 				}
@@ -1314,8 +1604,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 			mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
 			retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
 					phys_blksize, cred, &bp);
-			if (retval)
+			if (retval) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
+				}
 				goto error_exit;
+			}
 			bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
 			buf_brelse(bp);
 			bp = NULL;
@@ -1326,6 +1620,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 			vhp = (HFSPlusVolumeHeader*) mdbp;
 		}
 
+		if (isroot) {
+			hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
+		}
+
 		/*
 		 * On inconsistent disks, do not allow read-write mount
 		 * unless it is the boot volume being mounted.  We also
@@ -1338,6 +1636,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 		   && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
 		   && !journal_replay_only
 		   && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
+			
+			if (HFS_MOUNT_DEBUG) { 
+				printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
+			}
 			retval = EINVAL;
 			goto error_exit;
 		}
@@ -1375,6 +1677,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 					// EROFS is a special error code that means the volume has an external
 					// journal which we couldn't find.  in that case we do not want to
 					// rewrite the volume header - we'll just refuse to mount the volume.
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
+					}
 					retval = EINVAL;
 					goto error_exit;
 				}
@@ -1383,7 +1688,11 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 				// to be "FSK!" which fsck_hfs will see and force the fsck instead
 				// of just bailing out because the volume is journaled.
 				if (!ronly) {
-				    HFSPlusVolumeHeader *jvhp;
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
+					}
+
+					HFSPlusVolumeHeader *jvhp;
 
 				    hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
 				    
@@ -1418,6 +1727,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 				// in the hopes that fsck_hfs will be able to
 				// fix any damage that exists on the volume.
 				if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
+					if (HFS_MOUNT_DEBUG) { 
+						printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
+					}
 				    retval = EINVAL;
 				    goto error_exit;
 				}
@@ -1446,10 +1758,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 				"(%d) switching to 512\n", log_blksize);
 			log_blksize = 512;
 			if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
+				}
 				retval = ENXIO;
 				goto error_exit;
 			}
 			if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
+				if (HFS_MOUNT_DEBUG) { 
+					printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
+				}
 				retval = ENXIO;
 				goto error_exit;
 			}
@@ -1470,6 +1788,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 					// to be "FSK!" which fsck_hfs will see and force the fsck instead
 					// of just bailing out because the volume is journaled.
 					if (!ronly) {
+						if (HFS_MOUNT_DEBUG) { 
+							printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
+						}
 				    	HFSPlusVolumeHeader *jvhp;
 
 				    	hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
@@ -1504,6 +1825,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 					// in the hopes that fsck_hfs will be able to
 					// fix any damage that exists on the volume.
 					if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
+						if (HFS_MOUNT_DEBUG) { 
+							printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
+						}
 				    	retval = EINVAL;
 				    	goto error_exit;
 					}
@@ -1512,6 +1836,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 
 			/* Try again with a smaller block size... */
 			retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
+			if (retval && HFS_MOUNT_DEBUG) {
+				printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval); 
+			}
 		}
 		if (retval)
 			(void) hfs_relconverter(0);
@@ -1522,6 +1849,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 	hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
 
 	if ( retval ) {
+		if (HFS_MOUNT_DEBUG) { 
+			printf("hfs_mountfs: encountered failure %d \n", retval);
+		}
 		goto error_exit;
 	}
 
@@ -1538,7 +1868,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 		mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
 	} else {
 		/* HFS standard doesn't support extended readdir! */
-		mp->mnt_vtable->vfc_vfsflags &= ~VFC_VFSREADDIR_EXTENDED;
+		mount_set_noreaddirext (mp);
 	}
 
 	if (args) {
@@ -1563,10 +1893,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 		/*
 		 * Set the free space warning levels for the root volume:
 		 *
-		 * Set the "danger" limit to 5% of the volume size or 125MB, whichever
-		 * is less.  Set the "warning" limit to 10% of the volume size or 250MB,
+		 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
+		 * is less.  Set the "warning" limit to 10% of the volume size or 1GB,
 		 * whichever is less.  And last, set the "desired" freespace level to
-		 * to 11% of the volume size or 375MB, whichever is less.
+		 * to 11% of the volume size or 1.25GB, whichever is less.
 		 */
 		hfsmp->hfs_freespace_notify_dangerlimit =
 			MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
@@ -1598,6 +1928,32 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
 			}
 		}
 	}
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	/* 
+	 * We spawn a thread to create the pair of red-black trees for this volume.
+	 * However, in so doing, we must be careful to ensure that if this thread is still
+	 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
+	 * we'll need to set a bit that indicates we're in progress building the trees here.  
+	 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
+	 * notifies the tree generation code that an unmount is waiting.  Also mark the bit that
+	 * indicates the tree is live and operating.
+	 *
+	 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
+	 */
+	
+	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
+		hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
+		
+		/* Initialize EOF counter so that the thread can assume it started at initial values */
+		hfsmp->offset_block_end = 0;
+		InitTree(hfsmp);
+		
+		kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
+		thread_deallocate(allocator_thread);
+	}
+	
+#endif
 
 	/*
 	 * Start looking for free space to drop below this level and generate a
@@ -1628,7 +1984,7 @@ error_exit:
 			vnode_rele(hfsmp->hfs_devvp);
 		}
 		hfs_delete_chash(hfsmp);
-		
+
 		FREE(hfsmp, M_HFSMNT);
 		vfs_setfsprivate(mp, NULL);
 	}
@@ -1651,7 +2007,7 @@ hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t
 /*
  * unmount system call
  */
-static int
+int
 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 {
 	struct proc *p = vfs_context_proc(context);
@@ -1660,6 +2016,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 	int flags;
 	int force;
 	int started_tr = 0;
+	int rb_used = 0;
 
 	flags = 0;
 	force = 0;
@@ -1706,6 +2063,10 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 			panic("hfs_unmount: pm_sync_incomplete underflow!\n");
 	}
 	
+#if CONFIG_HFS_ALLOC_RBTREE
+	rb_used = hfs_teardown_allocator(hfsmp);
+#endif
+	
 	/*
 	 * Flush out the b-trees, volume bitmap and Volume Header
 	 */
@@ -1768,22 +2129,31 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 			HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
 		}
 
-		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-			int i;
-			u_int32_t min_start = hfsmp->totalBlocks;
-
-			// set the nextAllocation pointer to the smallest free block number
-			// we've seen so on the next mount we won't rescan unnecessarily
-			for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
-				if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
-					min_start = hfsmp->vcbFreeExt[i].startBlock;
+		
+		if (rb_used) {
+			/* If the rb-tree was live, just set min_start to 0 */
+			hfsmp->nextAllocation = 0;
+		} 
+		else {
+			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+				int i;
+				u_int32_t min_start = hfsmp->totalBlocks;
+				
+				// set the nextAllocation pointer to the smallest free block number
+				// we've seen so on the next mount we won't rescan unnecessarily
+				lck_spin_lock(&hfsmp->vcbFreeExtLock);
+				for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
+					if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
+						min_start = hfsmp->vcbFreeExt[i].startBlock;
+					}
+				}
+				lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+				if (min_start < hfsmp->nextAllocation) {
+					hfsmp->nextAllocation = min_start;
 				}
-			}
-			if (min_start < hfsmp->nextAllocation) {
-				hfsmp->nextAllocation = min_start;
 			}
 		}
-
+		
 
 		retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
 		if (retval) {
@@ -1799,7 +2169,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 	}
 
 	if (hfsmp->jnl) {
-		hfs_journal_flush(hfsmp);
+		hfs_journal_flush(hfsmp, FALSE);
 	}
 	
 	/*
@@ -1807,11 +2177,6 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 	 */
 	(void) hfsUnmount(hfsmp, p);
 
-	/*
-	 * Last chance to dump unreferenced system files.
-	 */
-	(void) vflush(mp, NULLVP, FORCECLOSE);
-
 	if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
 		(void) hfs_relconverter(hfsmp->hfs_encoding);
 
@@ -1833,7 +2198,12 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 	}
 	// XXXdbg
 
-#ifdef HFS_SPARSE_DEV
+	/*
+	 * Last chance to dump unreferenced system files.
+	 */
+	(void) vflush(mp, NULLVP, FORCECLOSE);
+
+#if HFS_SPARSE_DEV
 	/* Drop our reference on the backing fs (if any). */
 	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
 		struct vnode * tmpvp;
@@ -1845,6 +2215,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 	}
 #endif /* HFS_SPARSE_DEV */
 	lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
+	lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
 	vnode_rele(hfsmp->hfs_devvp);
 
 	hfs_delete_chash(hfsmp);
@@ -1866,7 +2237,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
 static int
 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
 {
-	return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1);
+	return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
 }
 
 
@@ -1887,7 +2258,7 @@ hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t
 	int cmd, type, error;
 
 	if (uid == ~0U)
-		uid = vfs_context_ucred(context)->cr_ruid;
+		uid = kauth_cred_getuid(vfs_context_ucred(context));
 	cmd = cmds >> SUBCMDSHIFT;
 
 	switch (cmd) {
@@ -1895,7 +2266,7 @@ hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t
 	case Q_QUOTASTAT:
 		break;
 	case Q_GETQUOTA:
-		if (uid == vfs_context_ucred(context)->cr_ruid)
+		if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
 			break;
 		/* fall through */
 	default:
@@ -1958,7 +2329,7 @@ hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t
 /*
  * Get file system statistics.
  */
-static int
+int
 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
 {
 	ExtendedVCB *vcb = VFSTOVCB(mp);
@@ -2099,7 +2470,7 @@ hfs_sync_callback(struct vnode *vp, void *cargs)
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
-static int
+int
 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
 {
 	struct proc *p = vfs_context_proc(context);
@@ -2203,7 +2574,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
 	}
 
 	if (hfsmp->jnl) {
-	    hfs_journal_flush(hfsmp);
+	    hfs_journal_flush(hfsmp, FALSE);
 	}
 
 	{
@@ -2244,7 +2615,7 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp,
 	if (fhlen < (int)sizeof(struct hfsfid))
 		return (EINVAL);
 
-	result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0);
+	result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
 	if (result) {
 		if (result == ENOENT)
 			result = ESTALE;
@@ -2319,6 +2690,7 @@ hfs_init(__unused struct vfsconf *vfsp)
 	hfs_group_attr   = lck_grp_attr_alloc_init();
 	hfs_mutex_group  = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
 	hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
+	hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
 	
 #if HFS_COMPRESSION
     decmpfs_init();
@@ -2359,7 +2731,7 @@ hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
 /*
  * HFS filesystem related variables.
  */
-static int
+int
 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, 
 			user_addr_t newp, size_t newlen, vfs_context_t context)
 {
@@ -2505,6 +2877,15 @@ encodinghint_exit:
 							 0,
 							 hfs_sync_metadata, hfsmp->hfs_mp);
 
+		/*
+		 * Set up the trim callback function so that we can add
+		 * recently freed extents to the free extent cache once
+		 * the transaction that freed them is written to the
+		 * journal on disk.
+		 */
+		if (jnl)
+			journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
+
 		if (jnl == NULL) {
 			printf("hfs: FAILED to create the journal!\n");
 			if (jvp && jvp != hfsmp->hfs_devvp) {
@@ -2516,17 +2897,17 @@ encodinghint_exit:
 			return EINVAL;
 		} 
 
-		hfs_global_exclusive_lock_acquire(hfsmp);
-		
+		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
+
 		/*
 		 * Flush all dirty metadata buffers.
 		 */
-		buf_flushdirtyblks(hfsmp->hfs_devvp, MNT_WAIT, 0, "hfs_sysctl");
-		buf_flushdirtyblks(hfsmp->hfs_extents_vp, MNT_WAIT, 0, "hfs_sysctl");
-		buf_flushdirtyblks(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, "hfs_sysctl");
-		buf_flushdirtyblks(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
+		buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
 		if (hfsmp->hfs_attribute_vp)
-			buf_flushdirtyblks(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, "hfs_sysctl");
+			buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
 
 		HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
 		HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
@@ -2541,7 +2922,7 @@ encodinghint_exit:
 
 		vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
 
-		hfs_global_exclusive_lock_release(hfsmp);
+		hfs_unlock_global (hfsmp);
 		hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
 
 		{
@@ -2576,7 +2957,7 @@ encodinghint_exit:
 
 		printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
 
-		hfs_global_exclusive_lock_acquire(hfsmp);
+		hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
 
 		// Lights out for you buddy!
 		journal_close(hfsmp->jnl);
@@ -2595,7 +2976,8 @@ encodinghint_exit:
 		
 		HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
 		
-		hfs_global_exclusive_lock_release(hfsmp);
+		hfs_unlock_global (hfsmp);
+
 		hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
 
 		{
@@ -2676,6 +3058,10 @@ encodinghint_exit:
 		file_drop(device_fd);
 		vnode_put(devvp);
 		return error;
+	} else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
+		hfs_resize_debug = 1;
+		printf ("hfs_sysctl: Enabled volume resize debugging.\n");
+		return 0;
 	}
 
 	return (ENOTSUP);
@@ -2696,7 +3082,7 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con
 
 	hfsmp = VFSTOHFS(mp);
 
-	error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1);
+	error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
 	if (error)
 		return (error);
 
@@ -2737,9 +3123,8 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con
  *
  * If the object is a file then it will represent the data fork.
  */
-__private_extern__
 int
-hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
+hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
 {
 	struct vnode *vp = NULLVP;
 	struct cat_desc cndesc;
@@ -2761,7 +3146,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
 	/*
 	 * Check the hash first
 	 */
-	vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock);
+	vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
 	if (vp) {
 		*vpp = vp;
 		return(0);
@@ -2841,7 +3226,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
 		 * Pick up the first link in the chain and get a descriptor for it.
 		 * This allows blind volfs paths to work for hardlinks.
 		 */
-		if ((hfs_lookuplink(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
+		if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
 		    (nextlinkid != 0)) {
 			lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 			error = cat_findname(hfsmp, nextlinkid, &linkdesc);
@@ -2854,13 +3239,17 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
 	}
 
 	if (linkref) {
-		error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cnfork, &vp);
+		int newvnode_flags = 0;
+		
+		error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
+								&cnfork, &vp, &newvnode_flags);
 		if (error == 0) {
 			VTOC(vp)->c_flag |= C_HARDLINK;
 			vnode_setmultipath(vp);
 		}
 	} else {
 		struct componentname cn;
+		int newvnode_flags = 0;
 
 		/* Supply hfs_getnewvnode with a component name. */
 		MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
@@ -2874,7 +3263,8 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
 		cn.cn_consume = 0;
 		bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
 	
-		error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp);
+		error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, 
+								&cnfork, &vp, &newvnode_flags);
 
 		if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
 			hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
@@ -2927,7 +3317,7 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
 		}
 
 		/* Obtain the root vnode so we can skip over it. */
-		skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0);
+		skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
 	}
 #endif /* QUOTA */
 
@@ -3004,7 +3394,6 @@ hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
  *
  * On journal volumes this will cause a volume header flush
  */
-__private_extern__
 int
 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
 {
@@ -3079,7 +3468,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
 
 	mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
     
-	mdb->drCrDate	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbCrDate)));
+	mdb->drCrDate	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
 	mdb->drLsMod	= SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
 	mdb->drAtrb	= SWAP_BE16 (vcb->vcbAtrb);
 	mdb->drNmFls	= SWAP_BE16 (vcb->vcbNmFls);
@@ -3156,7 +3545,6 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
  *  not flushed since the on-disk "H+" and "HX" signatures
  *  are always stored in-memory as "H+".
  */
-__private_extern__
 int
 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
 {
@@ -3464,7 +3852,6 @@ err_exit:
 /*
  * Extend a file system.
  */
-__private_extern__
 int
 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 {
@@ -3509,7 +3896,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	 * ownership and check permissions.
 	 */
 	if (suser(cred, NULL)) {
-		error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0);
+		error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
 
 		if (error)
 			return (error);
@@ -3562,7 +3949,11 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 
 	addblks = newblkcnt - vcb->totalBlocks;
 
-	printf("hfs_extendfs: growing %s by %d blocks\n", vcb->vcbVN, addblks);
+	if (hfs_resize_debug) {
+		printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
+		printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
+	}
+	printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
 
 	HFS_MOUNT_LOCK(hfsmp, TRUE);
 	if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
@@ -3573,9 +3964,6 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
 	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 
-	/* Invalidate the current free extent cache */
-	invalidate_free_extent_cache(hfsmp);
-	
 	/*
 	 * Enclose changes inside a transaction.
 	 */
@@ -3604,6 +3992,17 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	else
 		bitmapblks = 0;
 
+	/* 
+	 * The allocation bitmap can contain unused bits that are beyond end of 
+	 * current volume's allocation blocks.  Usually they are supposed to be 
+	 * zero'ed out but there can be cases where they might be marked as used. 
+	 * After extending the file system, those bits can represent valid 
+	 * allocation blocks, so we mark all the bits from the end of current 
+	 * volume to end of allocation bitmap as "free".
+	 */
+	BlockMarkFreeUnused(vcb, vcb->totalBlocks, 
+			(fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
+
 	if (bitmapblks > 0) {
 		daddr64_t blkno;
 		daddr_t blkcnt;
@@ -3623,8 +4022,8 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		 * zone.
 		 */
 		error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
-				kEFAllMask | kEFNoClumpMask | kEFReserveMask | kEFMetadataMask,
-				&bytesAdded);
+				kEFAllMask | kEFNoClumpMask | kEFReserveMask 
+				| kEFMetadataMask | kEFContigMask, &bytesAdded);
 
 		if (error == 0) {
 			usedExtendFileC = true;
@@ -3736,7 +4135,8 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		 * Restore to old state.
 		 */
 		if (usedExtendFileC) {
-			(void) TruncateFileC(vcb, fp, oldBitmapSize, false);
+			(void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp), 
+								 FTOC(fp)->c_fileid, false);
 		} else {
 			fp->ff_blocks -= bitmapblks;
 			fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
@@ -3752,10 +4152,15 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		hfsmp->hfs_logical_block_count = prev_phys_block_count;
 		hfsmp->hfs_alt_id_sector = prev_alt_sector;
 		MarkVCBDirty(vcb);
-		if (vcb->blockSize == 512)
-			(void) BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2);
-		else
-			(void) BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1);
+		if (vcb->blockSize == 512) {
+			if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
+				hfs_mark_volume_inconsistent(hfsmp);
+			}
+		} else {
+			if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
+				hfs_mark_volume_inconsistent(hfsmp);
+			}
+		}
 		goto out;
 	}
 	/*
@@ -3779,7 +4184,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	/* 
 	 * Update the metadata zone size based on current volume size
 	 */
-	hfs_metadatazone_init(hfsmp);
+	hfs_metadatazone_init(hfsmp, false);
 	 
 	/*
 	 * Adjust the size of hfsmp->hfs_attrdata_vp
@@ -3801,21 +4206,36 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		}
 	}
 
+	/*
+	 * Update the R/B Tree if necessary.  Since we don't have to drop the systemfile 
+	 * locks in the middle of these operations like we do in the truncate case
+	 * where we have to relocate files, we can only update the red-black tree
+	 * if there were actual changes made to the bitmap.  Also, we can't really scan the 
+	 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
+	 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
+	 * not currently controlled by the tree.  
+	 *
+	 * We only update hfsmp->allocLimit if totalBlocks actually increased. 
+	 */
+	
+	if (error == 0) {
+		UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
+	}
+	
+	/* Log successful extending */
+	printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
+	       hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
+	
 out:
 	if (error && fp) {
 		/* Restore allocation fork. */
 		bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
 		VTOC(vp)->c_blocks = fp->ff_blocks;
-
+		
 	}
-	/*
-	   Regardless of whether or not the totalblocks actually increased,
-	   we should reset the allocLimit field. If it changed, it will
-	   get updated; if not, it will remain the same.
-	*/
+	
 	HFS_MOUNT_LOCK(hfsmp, TRUE);	
 	hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
-	hfsmp->allocLimit = vcb->totalBlocks;
 	HFS_MOUNT_UNLOCK(hfsmp, TRUE);	
 	if (lockflags) {
 		hfs_systemfile_unlock(hfsmp, lockflags);
@@ -3824,7 +4244,7 @@ out:
 		hfs_end_transaction(hfsmp);
 	}
 
-	return (error);
+	return MacToVFSError(error);
 }
 
 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
@@ -3832,7 +4252,6 @@ out:
 /*
  * Truncate a file system (while still mounted).
  */
-__private_extern__
 int
 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 {
@@ -3843,17 +4262,19 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	int lockflags = 0;
 	int transaction_begun = 0;
 	Boolean updateFreeBlocks = false;
-	int error;
+	Boolean disable_sparse = false;
+	int error = 0;
 
-	HFS_MOUNT_LOCK(hfsmp, TRUE);	
+	lck_mtx_lock(&hfsmp->hfs_mutex);
 	if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
-		HFS_MOUNT_UNLOCK(hfsmp, TRUE);	
+		lck_mtx_unlock(&hfsmp->hfs_mutex);
 		return (EALREADY);
 	}
 	hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
-	hfsmp->hfs_resize_filesmoved = 0;
-	hfsmp->hfs_resize_totalfiles = 0;
-	HFS_MOUNT_UNLOCK(hfsmp, TRUE);	
+	hfsmp->hfs_resize_blocksmoved = 0;
+	hfsmp->hfs_resize_totalblocks = 0;
+	hfsmp->hfs_resize_progress = 0;
+	lck_mtx_unlock(&hfsmp->hfs_mutex);
 
 	/*
 	 * - Journaled HFS Plus volumes only.
@@ -3882,25 +4303,66 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		error = EINVAL;
 		goto out;
 	}
-	/* Make sure that the file system has enough free blocks reclaim */
-	if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
-		printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
-		error = ENOSPC;
-		goto out;
-	}
-	
-	/* Invalidate the current free extent cache */
-	invalidate_free_extent_cache(hfsmp);
-	
-	/* Start with a clean journal. */
-	hfs_journal_flush(hfsmp);
+
+	/* 
+	 * Make sure that the file system has enough free blocks reclaim.
+	 *
+	 * Before resize, the disk is divided into four zones - 
+	 * 	A. Allocated_Stationary - These are allocated blocks that exist 
+	 * 	   before the new end of disk.  These blocks will not be 
+	 * 	   relocated or modified during resize.
+	 * 	B. Free_Stationary - These are free blocks that exist before the
+	 * 	   new end of disk.  These blocks can be used for any new 
+	 * 	   allocations during resize, including allocation for relocating 
+	 * 	   data from the area of disk being reclaimed. 
+	 * 	C. Allocated_To-Reclaim - These are allocated blocks that exist
+	 *         beyond the new end of disk.  These blocks need to be reclaimed 
+	 *         during resize by allocating equal number of blocks in Free 
+	 *         Stationary zone and copying the data. 
+	 *      D. Free_To-Reclaim - These are free blocks that exist beyond the 
+	 *         new end of disk.  Nothing special needs to be done to reclaim
+	 *         them. 
+	 *
+	 * Total number of blocks on the disk before resize:
+	 * ------------------------------------------------
+	 * 	Total Blocks = Allocated_Stationary + Free_Stationary + 
+	 * 	               Allocated_To-Reclaim + Free_To-Reclaim
+	 *
+	 * Total number of blocks that need to be reclaimed:
+	 * ------------------------------------------------
+	 *	Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim 
+	 *
+	 * Note that the check below also makes sure that we have enough space 
+	 * to relocate data from Allocated_To-Reclaim to Free_Stationary.   
+	 * Therefore we do not need to check total number of blocks to relocate 
+	 * later in the code.
+	 *
+	 * The condition below gets converted to: 
+	 *
+	 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim 
+	 *
+	 * which is equivalent to:
+	 *
+	 *              Allocated To-Reclaim >= Free Stationary
+	 */
+	if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
+		printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
+		error = ENOSPC;
+		goto out;
+	}
+	
+	/* Start with a clean journal. */
+	hfs_journal_flush(hfsmp, TRUE);
 	
 	if (hfs_start_transaction(hfsmp) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 	transaction_begun = 1;
-
+	
+	/* Take the bitmap lock to update the alloc limit field */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+	
 	/*
 	 * Prevent new allocations from using the part we're trying to truncate.
 	 *
@@ -3909,12 +4371,36 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	 * interfere with allocating the new alternate volume header, and no files
 	 * in the allocation blocks beyond (i.e. the blocks we're trying to
 	 * truncate away.
+	 *
+	 * Also shrink the red-black tree if needed.
+	 */
+	if (hfsmp->blockSize == 512) {
+		error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
+	}
+	else {
+		error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
+	}
+
+	/* Sparse devices use first fit allocation which is not ideal 
+	 * for volume resize which requires best fit allocation.  If a 
+	 * sparse device is being truncated, disable the sparse device 
+	 * property temporarily for the duration of resize.  Also reset 
+	 * the free extent cache so that it is rebuilt as sorted by 
+	 * totalBlocks instead of startBlock.  
+	 *
+	 * Note that this will affect all allocations on the volume and 
+	 * ideal fix would be just to modify resize-related allocations, 
+	 * but it will result in complexity like handling of two free 
+	 * extent caches sorted differently, etc.  So we stick to this 
+	 * solution for now. 
 	 */
 	HFS_MOUNT_LOCK(hfsmp, TRUE);	
-	if (hfsmp->blockSize == 512) 
-		hfsmp->allocLimit = newblkcnt - 2;
-	else
-		hfsmp->allocLimit = newblkcnt - 1;
+	if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+		hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
+		ResetVCBFreeExtCache(hfsmp);
+		disable_sparse = true;
+	}
+	
 	/* 
 	 * Update the volume free block count to reflect the total number 
 	 * of free blocks that will exist after a successful resize.
@@ -3928,16 +4414,28 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	 */
 	hfsmp->freeBlocks -= reclaimblks;
 	updateFreeBlocks = true;
-	HFS_MOUNT_UNLOCK(hfsmp, TRUE);	
-
+	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+	
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		lockflags = 0;	
+	}
+	
 	/*
-	 * Update the metadata zone size, and, if required, disable it 
+	 * Update the metadata zone size to match the new volume size,
+	 * and if it too less, metadata zone might be disabled.
 	 */
-	hfs_metadatazone_init(hfsmp);
+	hfs_metadatazone_init(hfsmp, false);
 
 	/*
-	 * Look for files that have blocks at or beyond the location of the
-	 * new alternate volume header
+	 * If some files have blocks at or beyond the location of the
+	 * new alternate volume header, recalculate free blocks and 
+	 * reclaim blocks.  Otherwise just update free blocks count.
+	 *
+	 * The current allocLimit is set to the location of new alternate 
+	 * volume header, and reclaimblks are the total number of blocks 
+	 * that need to be reclaimed.  So the check below is really 
+	 * ignoring the blocks allocated for old alternate volume header. 
 	 */
 	if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
 		/*
@@ -3967,23 +4465,14 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 			error = EAGAIN;  /* tell client to try again */
 			goto out;
 		}
-	}
-	
+	} 
+		
 	/*
 	 * Note: we take the attributes lock in case we have an attribute data vnode
 	 * which needs to change size.
 	 */
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 
-	/*
-	 * Mark the old alternate volume header as free. 
-	 * We don't bother shrinking allocation bitmap file.
-	 */
-	if (hfsmp->blockSize == 512) 
-		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
-	else 
-		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
-
 	/*
 	 * Allocate last 1KB for alternate volume header.
 	 */
@@ -3993,6 +4482,15 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 		goto out;
 	}
 
+	/*
+	 * Mark the old alternate volume header as free. 
+	 * We don't bother shrinking allocation bitmap file.
+	 */
+	if (hfsmp->blockSize == 512) 
+		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
+	else 
+		(void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
+
 	/*
 	 * Invalidate the existing alternate volume header.
 	 *
@@ -4028,7 +4526,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
 	if (error)
 		panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
-	
+
 	/*
 	 * Adjust the size of hfsmp->hfs_attrdata_vp
 	 */
@@ -4050,17 +4548,36 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 	}
 	
 out:
-	lck_mtx_lock(&hfsmp->hfs_mutex);
-	if (error && (updateFreeBlocks == true)) 
+	/* 
+	 * Update the allocLimit to acknowledge the last one or two blocks now.
+	 * Add it to the tree as well if necessary.
+	 */
+	UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
+	
+	HFS_MOUNT_LOCK(hfsmp, TRUE);	
+	if (disable_sparse == true) {
+		/* Now that resize is completed, set the volume to be sparse 
+		 * device again so that all further allocations will be first 
+		 * fit instead of best fit.  Reset free extent cache so that 
+		 * it is rebuilt.
+		 */
+		hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
+		ResetVCBFreeExtCache(hfsmp);
+	}
+
+	if (error && (updateFreeBlocks == true)) {
 		hfsmp->freeBlocks += reclaimblks;
-	hfsmp->allocLimit = hfsmp->totalBlocks;
-	if (hfsmp->nextAllocation >= hfsmp->allocLimit)
+	}
+	
+	if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
 		hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
+	}
 	hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
-	HFS_MOUNT_UNLOCK(hfsmp, TRUE);	
+	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
+	
 	/* On error, reset the metadata zone for original volume size */
 	if (error && (updateFreeBlocks == true)) {
-		hfs_metadatazone_init(hfsmp);
+		hfs_metadatazone_init(hfsmp, false);
 	}
 	
 	if (lockflags) {
@@ -4068,12 +4585,12 @@ out:
 	}
 	if (transaction_begun) {
 		hfs_end_transaction(hfsmp);
-		hfs_journal_flush(hfsmp);
+		hfs_journal_flush(hfsmp, FALSE);
 		/* Just to be sure, sync all data to the disk */
 		(void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
 	}
 
-	return (error);
+	return MacToVFSError(error);
 }
 
 
@@ -4135,6 +4652,9 @@ hfs_copy_extent(
 	u_int32_t ioSizeSectors;	/* Device sectors in this I/O */
 	daddr64_t srcSector, destSector;
 	u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+#if CONFIG_PROTECT
+	int cpenabled = 0;
+#endif
 
 	/*
 	 * Sanity check that we have locked the vnode of the file we're copying.
@@ -4147,6 +4667,25 @@ hfs_copy_extent(
 	if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
 		panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 
+#if CONFIG_PROTECT
+	/* Prepare the CP blob and get it ready for use */
+	if (!vnode_issystem (vp) && vnode_isreg(vp) &&
+			cp_fs_protected (hfsmp->hfs_mp)) {
+		int cp_err = 0;
+		cp_err = cp_handle_relocate (cp);
+		if (cp_err) {
+			/* 
+			 * can't copy the file because we couldn't set up keys.
+			 * bail out 
+			 */
+			return cp_err;
+		}
+		else {
+			cpenabled = 1;
+		}
+	}
+#endif
+
 	/*
 	 * Determine the I/O size to use
 	 *
@@ -4176,7 +4715,14 @@ hfs_copy_extent(
 		buf_setcount(bp, ioSize);
 		buf_setblkno(bp, srcSector);
 		buf_setlblkno(bp, srcSector);
-		
+
+		/* Attach the CP to the buffer */
+#if CONFIG_PROTECT
+		if (cpenabled) {
+			buf_setcpaddr (bp, cp->c_cpentry);
+		}
+#endif
+
 		/* Do the read */
 		err = VNOP_STRATEGY(bp);
 		if (!err)
@@ -4194,6 +4740,13 @@ hfs_copy_extent(
 		buf_setlblkno(bp, destSector);
 		if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
 			buf_markfua(bp);
+
+#if CONFIG_PROTECT
+		/* Attach the CP to the buffer */
+		if (cpenabled) {
+			buf_setcpaddr (bp, cp->c_cpentry);
+		}
+#endif
 			
 		/* Do the write */
 		vnode_startwrite(hfsmp->hfs_devvp);
@@ -4230,342 +4783,941 @@ hfs_copy_extent(
 }
 
 
-static int
-hfs_relocate_callback(__unused HFSPlusExtentKey *key, HFSPlusExtentRecord *record, HFSPlusExtentRecord *state)
-{
-	bcopy(state, record, sizeof(HFSPlusExtentRecord));
-	return 0;
-}
+/* Structure to store state of reclaiming extents from a 
+ * given file.  hfs_reclaim_file()/hfs_reclaim_xattr() 
+ * initializes the values in this structure which are then 
+ * used by code that reclaims and splits the extents.
+ */
+struct hfs_reclaim_extent_info {
+	struct vnode *vp;
+	u_int32_t fileID;
+	u_int8_t forkType;
+	u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
+	u_int8_t is_sysfile;                 /* Extent belongs to system file */
+	u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
+	u_int8_t extent_index;
+	int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
+	u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
+	u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
+	u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
+	struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
+	union record {
+		HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
+		HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
+	} record;
+	HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
+					      * For catalog extent record, points to the correct 
+					      * extent information in filefork.  For overflow extent 
+					      * record, or xattr record, points to extent record 
+					      * in the structure above
+					      */
+	struct cat_desc *dirlink_desc;	
+	struct cat_attr *dirlink_attr;
+	struct filefork *dirlink_fork;	      /* For directory hard links, fp points actually to this */
+	struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr() 
+                                               * use it for reading and hfs_reclaim_extent()/hfs_split_extent() 
+					       * use it for writing updated extent record 
+					       */ 
+	struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
+	u_int16_t recordlen;
+	int overflow_count;                   /* For debugging, counter for overflow extent record */
+	FCB *fcb;                             /* Pointer to the current btree being traversed */
+};
 
-/*
- * Reclaim space at the end of a volume, used by a given file.
+/* 
+ * Split the current extent into two extents, with first extent 
+ * to contain given number of allocation blocks.  Splitting of 
+ * extent creates one new extent entry which can result in 
+ * shifting of many entries through all the extent records of a 
+ * file, and/or creating a new extent record in the overflow 
+ * extent btree. 
  *
- * This routine attempts to move any extent which contains allocation blocks
- * at or after "startblk."  A separate transaction is used to do the move.
- * The contents of any moved extents are read and written via the volume's
- * device vnode -- NOT via "vp."  During the move, moved blocks which are part
- * of a transaction have their physical block numbers invalidated so they will
- * eventually be written to their new locations.
+ * Example:
+ * The diagram below represents two consecutive extent records, 
+ * for simplicity, lets call them record X and X+1 respectively.
+ * Interesting extent entries have been denoted by letters.  
+ * If the letter is unchanged before and after split, it means 
+ * that the extent entry was not modified during the split.  
+ * A '.' means that the entry remains unchanged after the split 
+ * and is not relevant for our example.  A '0' means that the 
+ * extent entry is empty.  
  *
- * Inputs:
- *    hfsmp       The volume being resized.
- *    startblk    Blocks >= this allocation block need to be moved.
- *    locks       Which locks need to be taken for the given system file.
- *    vp          The vnode for the system file.
+ * If there isn't sufficient contiguous free space to relocate 
+ * an extent (extent "C" below), we will have to break the one 
+ * extent into multiple smaller extents, and relocate each of 
+ * the smaller extents individually.  The way we do this is by 
+ * finding the largest contiguous free space that is currently 
+ * available (N allocation blocks), and then convert extent "C" 
+ * into two extents, C1 and C2, that occupy exactly the same 
+ * allocation blocks as extent C.  Extent C1 is the first 
+ * N allocation blocks of extent C, and extent C2 is the remainder 
+ * of extent C.  Then we can relocate extent C1 since we know 
+ * we have enough contiguous free space to relocate it in its 
+ * entirety.  We then repeat the process starting with extent C2. 
+ *
+ * In record X, only the entries following entry C are shifted, and 
+ * the original entry C is replaced with two entries C1 and C2 which
+ * are actually two extent entries for contiguous allocation blocks.
+ *
+ * Note that the entry E from record X is shifted into record X+1 as 
+ * the new first entry.  Since the first entry of record X+1 is updated, 
+ * the FABN will also get updated with the blockCount of entry E.  
+ * This also results in shifting of all extent entries in record X+1.  
+ * Note that the number of empty entries after the split has been 
+ * changed from 3 to 2. 
+ *
+ * Before:
+ *               record X                           record X+1
+ *  ---------------------===---------     ---------------------------------
+ *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
+ *  ---------------------===---------     ---------------------------------    
  *
- *    The caller of this function, hfs_reclaimspace(), grabs cnode lock 
- *    for non-system files before calling this function.  
+ * After:
+ *  ---------------------=======-----     ---------------------------------
+ *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
+ *  ---------------------=======-----     ---------------------------------    
  *
- * Outputs:
- *    blks_moved  Total number of allocation blocks moved by this routine.
+ *  C1.startBlock = C.startBlock          
+ *  C1.blockCount = N
+ *
+ *  C2.startBlock = C.startBlock + N
+ *  C2.blockCount = C.blockCount - N
+ *
+ *                                        FABN = old FABN - E.blockCount
+ *
+ * Inputs: 
+ *	extent_info - This is the structure that contains state about 
+ *	              the current file, extent, and extent record that 
+ *	              is being relocated.  This structure is shared 
+ *	              among code that traverses through all the extents 
+ *	              of the file, code that relocates extents, and 
+ *	              code that splits the extent. 
+ * Output:
+ * 	Zero on success, non-zero on failure.
  */
-static int
-hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int locks, u_int32_t *blks_moved, vfs_context_t context)
+static int 
+hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
 {
-	int error;
-	int lockflags;
+	int error = 0;
+	int index = extent_info->extent_index;
 	int i;
-	u_long datablks;
-	u_long end_block;
-	u_int32_t oldStartBlock;
-	u_int32_t newStartBlock;
-	u_int32_t oldBlockCount;
-	u_int32_t newBlockCount;
-	struct filefork *fp;
-	struct cnode *cp;
-	int is_sysfile;
-	int took_truncate_lock = 0;
-	struct BTreeIterator *iterator = NULL;
-	u_int8_t forktype;
-	u_int32_t fileID;
-	u_int32_t alloc_flags;
-		
-	/* If there is no vnode for this file, then there's nothing to do. */	
-	if (vp == NULL)
-		return 0;
+	HFSPlusExtentDescriptor shift_extent;
+	HFSPlusExtentDescriptor last_extent;
+	HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
+	HFSPlusExtentRecord *extents_rec = NULL;
+	HFSPlusExtentKey *extents_key = NULL;
+	HFSPlusAttrRecord *xattr_rec = NULL;
+	HFSPlusAttrKey *xattr_key = NULL;
+	struct BTreeIterator iterator;
+	struct FSBufferDescriptor btdata;
+	uint16_t reclen;
+	uint32_t read_recStartBlock;	/* Starting allocation block number to read old extent record */
+	uint32_t write_recStartBlock;	/* Starting allocation block number to insert newly updated extent record */
+	Boolean create_record = false;
+	Boolean is_xattr;
+       
+	is_xattr = extent_info->is_xattr;
+	extents = extent_info->extents;
 
-	cp = VTOC(vp);
-	fileID = cp->c_cnid;
-	is_sysfile = vnode_issystem(vp);
-	forktype = VNODE_IS_RSRC(vp) ? 0xFF : 0;
+	if (hfs_resize_debug) {
+		printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
+	}
 
-	/* Flush all the buffer cache blocks and cluster pages associated with 
-	 * this vnode.  
-	 *
-	 * If the current vnode is a system vnode, all the buffer cache blocks 
-	 * associated with it should already be sync'ed to the disk as part of 
-	 * journal flush in hfs_truncatefs().  Normally there should not be 
-	 * buffer cache blocks for regular files, but for objects like symlinks,
-	 * we can have buffer cache blocks associated with the vnode.  Therefore
-	 * we call buf_flushdirtyblks() always.  Resource fork data for directory 
-	 * hard links are directly written using buffer cache for device vnode, 
-	 * which should also be sync'ed as part of journal flush in hfs_truncatefs().
-	 * 
-	 * Flushing cluster pages should be the normal case for regular files, 
-	 * and really should not do anything for system files.  But just to be 
-	 * sure that all blocks associated with this vnode is sync'ed to the 
-	 * disk, we call both buffer cache and cluster layer functions.  
+	/* Determine the starting allocation block number for the following
+	 * overflow extent record, if any, before the current record 
+	 * gets modified. 
 	 */
-	buf_flushdirtyblks(vp, MNT_NOWAIT, 0, "hfs_reclaim_file");
-	
-	if (!is_sysfile) {
-		/* The caller grabs cnode lock for non-system files only, therefore 
-		 * we unlock only non-system files before calling cluster layer.
-		 */
-		hfs_unlock(cp);
-		hfs_lock_truncate(cp, TRUE);
-		took_truncate_lock = 1;
+	read_recStartBlock = extent_info->recStartBlock;
+	for (i = 0; i < kHFSPlusExtentDensity; i++) {
+		if (extents[i].blockCount == 0) {
+			break;
+		}
+		read_recStartBlock += extents[i].blockCount;
 	}
-	(void) cluster_push(vp, 0);
-	if (!is_sysfile) {
-		error = hfs_lock(cp, HFS_FORCE_LOCK);
-		if (error) {
-			hfs_unlock_truncate(cp, TRUE);
-			return error;
+
+	/* Shift and split */
+	if (index == kHFSPlusExtentDensity-1) {
+		/* The new extent created after split will go into following overflow extent record */
+		shift_extent.startBlock = extents[index].startBlock + newBlockCount;
+		shift_extent.blockCount = extents[index].blockCount - newBlockCount;
+
+		/* Last extent in the record will be split, so nothing to shift */
+	} else {
+		/* Splitting of extents can result in at most of one 
+		 * extent entry to be shifted into following overflow extent 
+		 * record.  So, store the last extent entry for later. 
+		 */
+		shift_extent = extents[kHFSPlusExtentDensity-1];
+
+		/* Start shifting extent information from the end of the extent 
+		 * record to the index where we want to insert the new extent.
+		 * Note that kHFSPlusExtentDensity-1 is already saved above, and 
+		 * does not need to be shifted.  The extent entry that is being 
+		 * split does not get shifted.
+		 */
+		for (i = kHFSPlusExtentDensity-2; i > index; i--) {
+			if (hfs_resize_debug) {
+				if (extents[i].blockCount) {
+					printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
+				}
+			}
+			extents[i+1] = extents[i];
 		}
+	}
 
-		/* If the file no longer exists, nothing left to do */
-		if (cp->c_flag & C_NOEXISTS) {
-			hfs_unlock_truncate(cp, TRUE);
-			return 0;
+	if (index == kHFSPlusExtentDensity-1) {
+		/* The second half of the extent being split will be the overflow 
+		 * entry that will go into following overflow extent record.  The
+		 * value has been stored in 'shift_extent' above, so there is 
+		 * nothing to be done here.
+		 */
+	} else {
+		/* Update the values in the second half of the extent being split 
+		 * before updating the first half of the split.  Note that the 
+		 * extent to split or first half of the split is at index 'index' 
+		 * and a new extent or second half of the split will be inserted at 
+		 * 'index+1' or into following overflow extent record. 
+		 */ 
+		extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
+		extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
+	}
+	/* Update the extent being split, only the block count will change */
+	extents[index].blockCount = newBlockCount;
+
+	if (hfs_resize_debug) {
+		printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
+		if (index != kHFSPlusExtentDensity-1) {
+			printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
+		} else {
+			printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
 		}
 	}
 
-	/* Wait for any in-progress writes to this vnode to complete, so that we'll
-	 * be copying consistent bits.  (Otherwise, it's possible that an async
-	 * write will complete to the old extent after we read from it.  That
-	 * could lead to corruption.)
+	/* If the newly split extent is for large EAs or in overflow extent 
+	 * record, so update it directly in the btree using the iterator 
+	 * information from the shared extent_info structure
 	 */
-	error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
-	if (error) {
-		printf("hfs_reclaim_file: Error %d from vnode_waitforwrites\n", error);
-		return error;
+	if (extent_info->catalog_fp == NULL) {
+		error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, 
+				&(extent_info->btdata), extent_info->recordlen);
+		if (error) {
+			printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
+			goto out;
+		}
 	}
-
-	if (hfs_resize_debug) {
-		printf("hfs_reclaim_file: Start relocating %sfork for fileid=%u name=%.*s\n", (forktype ? "rsrc" : "data"), fileID, cp->c_desc.cd_namelen, cp->c_desc.cd_nameptr);
+		
+	/* No extent entry to be shifted into another extent overflow record */
+	if (shift_extent.blockCount == 0) {
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
+		}
+		error = 0;
+		goto out;
 	}
 
-	/* We always need the allocation bitmap and extents B-tree */
-	locks |= SFL_BITMAP | SFL_EXTENTS;
+	/* The overflow extent entry has to be shifted into an extent 
+	 * overflow record.  This would mean that we have to shift 
+	 * extent entries from all overflow records by one.  We will 
+	 * start iteration from the first record to the last record, 
+	 * and shift the extent entry from one record to another.  
+	 * We might have to create a new record for the last extent 
+	 * entry for the file. 
+	 */
 	
-	error = hfs_start_transaction(hfsmp);
-	if (error) {
-		printf("hfs_reclaim_file: hfs_start_transaction returned %d\n", error);
-		if (took_truncate_lock) {
-			hfs_unlock_truncate(cp, TRUE);
+	/* Initialize iterator to search the next record */
+	bzero(&iterator, sizeof(iterator));
+	if (is_xattr) {
+		/* Copy the key from the iterator that was to update the modified attribute record. */
+		xattr_key = (HFSPlusAttrKey *)&(iterator.key);
+		bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
+		/* Note: xattr_key->startBlock will be initialized later in the iteration loop */
+
+		MALLOC(xattr_rec, HFSPlusAttrRecord *, 
+				sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
+		if (xattr_rec == NULL) {
+			error = ENOMEM;
+			goto out;
 		}
-		return error;
+		btdata.bufferAddress = xattr_rec;
+		btdata.itemSize = sizeof(HFSPlusAttrRecord);
+		btdata.itemCount = 1;
+		extents = xattr_rec->overflowExtents.extents;
+	} else {
+		extents_key = (HFSPlusExtentKey *) &(iterator.key);
+		extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
+		extents_key->forkType = extent_info->forkType;
+		extents_key->fileID = extent_info->fileID;
+		/* Note: extents_key->startBlock will be initialized later in the iteration loop */
+		
+		MALLOC(extents_rec, HFSPlusExtentRecord *, 
+				sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
+		if (extents_rec == NULL) {
+			error = ENOMEM;
+			goto out;
+		}
+		btdata.bufferAddress = extents_rec;
+		btdata.itemSize = sizeof(HFSPlusExtentRecord);
+		btdata.itemCount = 1;
+		extents = extents_rec[0];
 	}
-	lockflags = hfs_systemfile_lock(hfsmp, locks, HFS_EXCLUSIVE_LOCK);
-	fp = VTOF(vp);
-	datablks = 0;
-	*blks_moved = 0;
 
-	/* Relocate non-overflow extents */
-	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
-		if (fp->ff_extents[i].blockCount == 0)
-			break;
-		oldStartBlock = fp->ff_extents[i].startBlock;
-		oldBlockCount = fp->ff_extents[i].blockCount;
-		datablks += oldBlockCount;
-		end_block = oldStartBlock + oldBlockCount;
-		/* Check if the file overlaps the target space */
-		if (end_block > startblk) {
-			alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; 
-			if (is_sysfile) {
-				alloc_flags |= HFS_ALLOC_METAZONE;
+	/* An extent entry still needs to be shifted into following overflow 
+	 * extent record.  This will result in the starting allocation block
+	 * number of the extent record being changed which is part of the key
+	 * for the extent record.  Since the extent record key is changing,
+	 * the record can not be updated, instead has to be deleted and 
+	 * inserted again.
+	 */
+	while (shift_extent.blockCount) {
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
+		}
+
+		/* Search if there is any existing overflow extent record.
+		 * For this, the logical start block number in the key is 
+		 * the value calculated based on the logical start block 
+		 * number of the current extent record and the total number 
+		 * of blocks existing in the current extent record.  
+		 */
+		if (is_xattr) {
+			xattr_key->startBlock = read_recStartBlock;
+		} else {
+			extents_key->startBlock = read_recStartBlock;
+		}
+		error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
+		if (error) {
+			if (error != btNotFound) {
+				printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
+				goto out;
 			}
-			error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
-			if (error) {
-				if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) {
-					/* Try allocating again using the metadata zone */
-					alloc_flags |= HFS_ALLOC_METAZONE;
-					error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
-				}
-				if (error) {
-					printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
-					goto fail;
-				} else {
-					if (hfs_resize_debug) {
-						printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount);
-					}
-				}
+			create_record = true;
+		}
+	
+		/* The extra extent entry from the previous record is being inserted
+		 * as the first entry in the current extent record.  This will change 
+		 * the file allocation block number (FABN) of the current extent 
+		 * record, which is the startBlock value from the extent record key.
+		 * Since one extra entry is being inserted in the record, the new 
+		 * FABN for the record will less than old FABN by the number of blocks 
+		 * in the new extent entry being inserted at the start.  We have to 
+		 * do this before we update read_recStartBlock to point at the 
+		 * startBlock of the following record.
+		 */
+		write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
+		if (hfs_resize_debug) {
+			if (create_record) {
+				printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
 			}
+		}
 
-			/* Copy data from old location to new location */
-			error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
-			if (error) {
-				printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
-				if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) {
-					hfs_mark_volume_inconsistent(hfsmp);
-				}
-				goto fail;
+		/* Now update the read_recStartBlock to account for total number 
+		 * of blocks in this extent record.  It will now point to the 
+		 * starting allocation block number for the next extent record.
+		 */
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extents[i].blockCount == 0) {
+				break;
 			}
-			fp->ff_extents[i].startBlock = newStartBlock;
-			cp->c_flag |= C_MODIFIED;
-			*blks_moved += newBlockCount;
+			read_recStartBlock += extents[i].blockCount;
+		}
 
-			/* Deallocate the old extent */
-			error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
-			if (error) {
-				printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
-				hfs_mark_volume_inconsistent(hfsmp);
-				goto fail;
+		if (create_record == true) {
+			/* Initialize new record content with only one extent entry */
+			bzero(extents, sizeof(HFSPlusExtentRecord));
+			/* The new record will contain only one extent entry */
+			extents[0] = shift_extent;
+			/* There are no more overflow extents to be shifted */
+			shift_extent.startBlock = shift_extent.blockCount = 0;
+
+			if (is_xattr) {
+				xattr_rec->recordType = kHFSPlusAttrExtents; 
+				xattr_rec->overflowExtents.reserved = 0;
+				reclen = sizeof(HFSPlusAttrExtents);
+			} else {
+				reclen = sizeof(HFSPlusExtentRecord);
 			}
-
-			/* If this is a system file, sync the volume header on disk */
-			if (is_sysfile) {
-				error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
-				if (error) {
-					printf("hfs_reclaim_file: hfs_flushvolumeheader returned %d\n", error);
-					hfs_mark_volume_inconsistent(hfsmp);
-					goto fail;
-				}
+		} else {
+			/* The overflow extent entry from previous record will be 
+			 * the first entry in this extent record.  If the last 
+			 * extent entry in this record is valid, it will be shifted 
+			 * into the following extent record as its first entry.  So 
+			 * save the last entry before shifting entries in current 
+			 * record.
+			 */
+			last_extent = extents[kHFSPlusExtentDensity-1];
+			
+			/* Shift all entries by one index towards the end */
+			for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
+				extents[i+1] = extents[i];
 			}
 
+			/* Overflow extent entry saved from previous record 
+			 * is now the first entry in the current record.
+			 */
+			extents[0] = shift_extent;
+
 			if (hfs_resize_debug) {
-				printf ("hfs_reclaim_file: Relocated %u:(%u,%u) to %u:(%u,%u)\n", i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
+				printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
 			}
-		}
-	}
-
-	/* Relocate overflow extents (if any) */
-	if (i == kHFSPlusExtentDensity && fp->ff_blocks > datablks) {
-		struct FSBufferDescriptor btdata;
-		HFSPlusExtentRecord record;
-		HFSPlusExtentKey *key;
-		FCB *fcb;
-		int overflow_count = 0;
-
-		if (kmem_alloc(kernel_map, (vm_offset_t*) &iterator, sizeof(*iterator))) {
-			printf("hfs_reclaim_file: kmem_alloc failed!\n");
-			error = ENOMEM;
-			goto fail;
-		}
 
-		bzero(iterator, sizeof(*iterator));
-		key = (HFSPlusExtentKey *) &iterator->key;
-		key->keyLength = kHFSPlusExtentKeyMaximumLength;
-		key->forkType = forktype;
-		key->fileID = fileID;
-		key->startBlock = datablks;
-	
-		btdata.bufferAddress = &record;
-		btdata.itemSize = sizeof(record);
-		btdata.itemCount = 1;
-	
-		fcb = VTOF(hfsmp->hfs_extents_vp);
+			/* The last entry from current record will be the 
+			 * overflow entry which will be the first entry for 
+			 * the following extent record.
+			 */
+			shift_extent = last_extent;
 
-		error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
-		while (error == 0) {
-			/* Stop when we encounter a different file or fork. */
-			if ((key->fileID != fileID) || 
-			    (key->forkType != forktype)) {
-				break;
+			/* Since the key->startBlock is being changed for this record, 
+			 * it should be deleted and inserted with the new key.
+			 */
+			error = BTDeleteRecord(extent_info->fcb, &iterator);
+			if (error) {
+				printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
+				goto out;
 			}
-		
-			/* Just track the overflow extent record number for debugging... */
 			if (hfs_resize_debug) {
-				overflow_count++;
+				printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
 			}
+		}
 
-			/* 
-			 * Check if the file overlaps target space.
-			 */
-			for (i = 0; i < kHFSPlusExtentDensity; ++i) {
-				if (record[i].blockCount == 0) {
-					goto fail;
-				}
-				oldStartBlock = record[i].startBlock;
-				oldBlockCount = record[i].blockCount;
-				end_block = oldStartBlock + oldBlockCount;
-				if (end_block > startblk) {
-					alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; 
-					if (is_sysfile) {
-						alloc_flags |= HFS_ALLOC_METAZONE;
-					}
-					error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
-					if (error) {
-						if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) {
-							/* Try allocating again using the metadata zone */
-							alloc_flags |= HFS_ALLOC_METAZONE; 
-							error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount);
-						} 
-						if (error) {
-							printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount);
-							goto fail;
-						} else {
-							if (hfs_resize_debug) {
-								printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount);
-							}
-						}
-					}
-					error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context);
-					if (error) {
-						printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
-						if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) {
-							hfs_mark_volume_inconsistent(hfsmp);
-						}
-						goto fail;
-					}
-					record[i].startBlock = newStartBlock;
-					cp->c_flag |= C_MODIFIED;
-					*blks_moved += newBlockCount;
-
-					/*
-					 * NOTE: To support relocating overflow extents of the
-					 * allocation file, we must update the BTree record BEFORE
-					 * deallocating the old extent so that BlockDeallocate will
-					 * use the extent's new location to calculate physical block
-					 * numbers.  (This is for the case where the old extent's
-					 * bitmap bits actually reside in the extent being moved.)
-					 */
-					error = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr) hfs_relocate_callback, &record);
-					if (error) {
-						printf("hfs_reclaim_file: BTUpdateRecord returned %d\n", error);
-						hfs_mark_volume_inconsistent(hfsmp);
-						goto fail;
-					}
-					error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
-					if (error) {
-						printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error);
-						hfs_mark_volume_inconsistent(hfsmp);
-						goto fail;
-					}
-					if (hfs_resize_debug) {
-						printf ("hfs_reclaim_file: Relocated overflow#%d %u:(%u,%u) to %u:(%u,%u)\n", overflow_count, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount);
-					}
-				}
-			}
-			/* Look for more records. */
-			error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
-			if (error == btNotFound) {
-				error = 0;
-				break;
+		/* Insert the newly created or modified extent record */
+		bzero(&iterator.hint, sizeof(iterator.hint));
+		if (is_xattr) {
+			xattr_key->startBlock = write_recStartBlock;
+		} else {
+			extents_key->startBlock = write_recStartBlock;
+		}
+		error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
+		if (error) {
+			printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
+			goto out;
+		}
+		if (hfs_resize_debug) {
+			printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
+		}
+	}
+	BTFlushPath(extent_info->fcb);
+out:
+	if (extents_rec) {
+		FREE (extents_rec, M_TEMP);
+	}
+	if (xattr_rec) {
+		FREE (xattr_rec, M_TEMP);
+	}
+	return error;
+}
+
+
+/* 
+ * Relocate an extent if it lies beyond the expected end of volume.
+ *
+ * This function is called for every extent of the file being relocated.  
+ * It allocates space for relocation, copies the data, deallocates 
+ * the old extent, and update corresponding on-disk extent.  If the function 
+ * does not find contiguous space to  relocate an extent, it splits the 
+ * extent in smaller size to be able to relocate it out of the area of 
+ * disk being reclaimed.  As an optimization, if an extent lies partially 
+ * in the area of the disk being reclaimed, it is split so that we only 
+ * have to relocate the area that was overlapping with the area of disk
+ * being reclaimed. 
+ *
+ * Note that every extent is relocated in its own transaction so that 
+ * they do not overwhelm the journal.  This function handles the extent
+ * record that exists in the catalog record, extent record from overflow 
+ * extents btree, and extents for large EAs.
+ *
+ * Inputs: 
+ *	extent_info - This is the structure that contains state about 
+ *	              the current file, extent, and extent record that 
+ *	              is being relocated.  This structure is shared 
+ *	              among code that traverses through all the extents 
+ *	              of the file, code that relocates extents, and 
+ *	              code that splits the extent. 
+ */
+static int
+hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
+{
+	int error = 0;
+	int index;
+	struct cnode *cp;
+	u_int32_t oldStartBlock;
+	u_int32_t oldBlockCount;
+	u_int32_t newStartBlock;
+	u_int32_t newBlockCount;
+	u_int32_t alloc_flags;
+	int blocks_allocated = false;
+
+	index = extent_info->extent_index;
+	cp = VTOC(extent_info->vp);
+
+	oldStartBlock = extent_info->extents[index].startBlock;
+	oldBlockCount = extent_info->extents[index].blockCount;
+
+	if (0 && hfs_resize_debug) {
+		printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
+	}
+
+	/* Check if the current extent lies completely within allocLimit */
+	if ((oldStartBlock + oldBlockCount) <= allocLimit) {
+		extent_info->cur_blockCount += oldBlockCount;
+		return error;
+	} 
+
+	/* Every extent should be relocated in its own transaction
+	 * to make sure that we don't overflow the journal buffer.
+	 */
+	error = hfs_start_transaction(hfsmp);
+	if (error) {
+		return error;
+	}
+	extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
+
+	/* Check if the extent lies partially in the area to reclaim, 
+	 * i.e. it starts before allocLimit and ends beyond allocLimit.  
+	 * We have already skipped extents that lie completely within 
+	 * allocLimit in the check above, so we only check for the 
+	 * startBlock.  If it lies partially, split it so that we 
+	 * only relocate part of the extent.
+	 */
+	if (oldStartBlock < allocLimit) {
+		newBlockCount = allocLimit - oldStartBlock;
+		error = hfs_split_extent(extent_info, newBlockCount);
+		if (error == 0) {
+			/* After successful split, the current extent does not 
+			 * need relocation, so just return back.  
+			 */
+			goto out;
+		}
+		/* Ignore error and try relocating the entire extent instead */
+	}
+
+	alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; 
+	if (extent_info->is_sysfile) {
+		alloc_flags |= HFS_ALLOC_METAZONE;
+	}
+
+	error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, 
+			&newStartBlock, &newBlockCount);
+	if ((extent_info->is_sysfile == false) && 
+	    ((error == dskFulErr) || (error == ENOSPC))) {
+		/* For non-system files, try reallocating space in metadata zone */
+		alloc_flags |= HFS_ALLOC_METAZONE;
+		error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, 
+				alloc_flags, &newStartBlock, &newBlockCount);
+	} 
+	if ((error == dskFulErr) || (error == ENOSPC)) {
+		/* We did not find desired contiguous space for this extent.  
+		 * So try to allocate the maximum contiguous space available.
+		 */
+		alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
+
+		error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, 
+				alloc_flags, &newStartBlock, &newBlockCount);
+		if (error) {
+			printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+			goto out;
+		}
+		blocks_allocated = true;
+
+		error = hfs_split_extent(extent_info, newBlockCount);
+		if (error) {
+			printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+			goto out;
+		}
+		oldBlockCount = newBlockCount;
+	}
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+		goto out;
+	}
+	blocks_allocated = true;
+
+	/* Copy data from old location to new location */
+	error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock, 
+			newStartBlock, newBlockCount, context);
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
+		goto out;
+	}
+
+	/* Update the extent record with the new start block information */
+	extent_info->extents[index].startBlock = newStartBlock;
+
+	/* Sync the content back to the disk */
+	if (extent_info->catalog_fp) {
+		/* Update the extents in catalog record */
+		if (extent_info->is_dirlink) {
+			error = cat_update_dirlink(hfsmp, extent_info->forkType, 
+					extent_info->dirlink_desc, extent_info->dirlink_attr, 
+					&(extent_info->dirlink_fork->ff_data));
+		} else {
+			cp->c_flag |= C_MODIFIED;
+			/* If this is a system file, sync volume headers on disk */
+			if (extent_info->is_sysfile) {
+				error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
 			}
 		}
+	} else {
+		/* Replace record for extents overflow or extents-based xattrs */
+		error = BTReplaceRecord(extent_info->fcb, extent_info->iterator, 
+				&(extent_info->btdata), extent_info->recordlen);
 	}
-	
-fail:
-	if (iterator) {
-		kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
+		goto out;
+	}
+
+	/* Deallocate the old extent */
+	error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+	if (error) {
+		printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
+		goto out;
 	}
+	extent_info->blocks_relocated += newBlockCount;
 
-	(void) hfs_systemfile_unlock(hfsmp, lockflags);
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+	}
 
-	if ((*blks_moved != 0) && (is_sysfile == false)) {
-		(void) hfs_update(vp, MNT_WAIT);
+out:
+	if (error != 0) {
+		if (blocks_allocated == true) {
+			BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+		}
+	} else {
+		/* On success, increment the total allocation blocks processed */
+		extent_info->cur_blockCount += newBlockCount;
 	}
 
-	(void) hfs_end_transaction(hfsmp);
+	hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
 
-	if (took_truncate_lock) {
-		hfs_unlock_truncate(cp, TRUE);
+	/* For a non-system file, if an extent entry from catalog record 
+	 * was modified, sync the in-memory changes to the catalog record
+	 * on disk before ending the transaction.
+	 */
+	if ((error == 0) && 
+	    (extent_info->overflow_count < kHFSPlusExtentDensity) &&
+	    (extent_info->is_sysfile == false)) {
+		(void) hfs_update(extent_info->vp, MNT_WAIT);
+	}
+
+	hfs_end_transaction(hfsmp);
+
+	return error;
+}
+
+/* Report intermediate progress during volume resize */
+static void 
+hfs_truncatefs_progress(struct hfsmount *hfsmp)
+{
+	u_int32_t cur_progress;
+
+	hfs_resize_progress(hfsmp, &cur_progress);
+	if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
+		printf("hfs_truncatefs: %d%% done...\n", cur_progress);
+		hfsmp->hfs_resize_progress = cur_progress;
+	}
+	return;
+}
+
+/*
+ * Reclaim space at the end of a volume for given file and forktype. 
+ *
+ * This routine attempts to move any extent which contains allocation blocks
+ * at or after "allocLimit."  A separate transaction is used for every extent 
+ * that needs to be moved.  If there is not contiguous space available for 
+ * moving an extent, it can be split into smaller extents.  The contents of 
+ * any moved extents are read and written via the volume's device vnode -- 
+ * NOT via "vp."  During the move, moved blocks which are part of a transaction 
+ * have their physical block numbers invalidated so they will eventually be 
+ * written to their new locations.
+ *
+ * This function is also called for directory hard links.  Directory hard links
+ * are regular files with no data fork and resource fork that contains alias 
+ * information for backward compatibility with pre-Leopard systems.  However 
+ * non-Mac OS X implementation can add/modify data fork or resource fork 
+ * information to directory hard links, so we check, and if required, relocate 
+ * both data fork and resource fork.  
+ *
+ * Inputs:
+ *    hfsmp       The volume being resized.
+ *    vp          The vnode for the system file.
+ *    fileID	  ID of the catalog record that needs to be relocated
+ *    forktype	  The type of fork that needs relocated,
+ *    			kHFSResourceForkType for resource fork,
+ *    			kHFSDataForkType for data fork
+ *    allocLimit  Allocation limit for the new volume size, 
+ *    		  do not use this block or beyond.  All extents 
+ *    		  that use this block or any blocks beyond this limit 
+ *    		  will be relocated.
+ *
+ * Side Effects:
+ * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation 
+ * blocks that were relocated. 
+ */
+static int
+hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, 
+		u_int8_t forktype, u_long allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	struct hfs_reclaim_extent_info *extent_info;
+	int i;
+	int lockflags = 0;
+	struct cnode *cp;
+	struct filefork *fp;
+	int took_truncate_lock = false;
+	int release_desc = false;
+	HFSPlusExtentKey *key;
+		
+	/* If there is no vnode for this file, then there's nothing to do. */	
+	if (vp == NULL) {
+		return 0;
+	}
+
+	cp = VTOC(vp);
+
+	MALLOC(extent_info, struct hfs_reclaim_extent_info *, 
+	       sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
+	if (extent_info == NULL) {
+		return ENOMEM;
+	}
+	bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
+	extent_info->vp = vp;
+	extent_info->fileID = fileID;
+	extent_info->forkType = forktype;
+	extent_info->is_sysfile = vnode_issystem(vp);
+	if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
+		extent_info->is_dirlink = true;
+	}
+	/* We always need allocation bitmap and extent btree lock */
+	lockflags = SFL_BITMAP | SFL_EXTENTS;
+	if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
+		lockflags |= SFL_CATALOG;
+	} else if (fileID == kHFSAttributesFileID) {
+		lockflags |= SFL_ATTRIBUTE;
+	} else if (fileID == kHFSStartupFileID) {
+		lockflags |= SFL_STARTUP;
+	}
+	extent_info->lockflags = lockflags;
+	extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
+
+	/* Flush data associated with current file on disk. 
+	 *
+	 * If the current vnode is directory hard link, no flushing of 
+	 * journal or vnode is required.  The current kernel does not 
+	 * modify data/resource fork of directory hard links, so nothing 
+	 * will be in the cache.  If a directory hard link is newly created, 
+	 * the resource fork data is written directly using devvp and 
+	 * the code that actually relocates data (hfs_copy_extent()) also
+	 * uses devvp for its I/O --- so they will see a consistent copy. 
+	 */
+	if (extent_info->is_sysfile) {
+		/* If the current vnode is system vnode, flush journal 
+		 * to make sure that all data is written to the disk.
+		 */
+		error = hfs_journal_flush(hfsmp, TRUE);
+		if (error) {
+			printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
+			goto out;
+		}
+	} else if (extent_info->is_dirlink == false) {
+		/* Flush all blocks associated with this regular file vnode.  
+		 * Normally there should not be buffer cache blocks for regular 
+		 * files, but for objects like symlinks, we can have buffer cache 
+		 * blocks associated with the vnode.  Therefore we call
+		 * buf_flushdirtyblks() also.
+		 */
+		buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
+
+		hfs_unlock(cp);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
+		took_truncate_lock = true;
+		(void) cluster_push(vp, 0);
+		error = hfs_lock(cp, HFS_FORCE_LOCK);
+		if (error) {
+			goto out;
+		}
+
+		/* If the file no longer exists, nothing left to do */
+		if (cp->c_flag & C_NOEXISTS) {
+			error = 0;
+			goto out;
+		}
+
+		/* Wait for any in-progress writes to this vnode to complete, so that we'll
+		 * be copying consistent bits.  (Otherwise, it's possible that an async
+		 * write will complete to the old extent after we read from it.  That
+		 * could lead to corruption.)
+		 */
+		error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
+		if (error) {
+			goto out;
+		}
+	}
+
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
+	}
+
+	if (extent_info->is_dirlink) {
+		MALLOC(extent_info->dirlink_desc, struct cat_desc *, 
+				sizeof(struct cat_desc), M_TEMP, M_WAITOK);
+		MALLOC(extent_info->dirlink_attr, struct cat_attr *, 
+				sizeof(struct cat_attr), M_TEMP, M_WAITOK);
+		MALLOC(extent_info->dirlink_fork, struct filefork *, 
+				sizeof(struct filefork), M_TEMP, M_WAITOK);
+		if ((extent_info->dirlink_desc == NULL) || 
+		    (extent_info->dirlink_attr == NULL) || 
+		    (extent_info->dirlink_fork == NULL)) {
+			error = ENOMEM;
+			goto out;
+		}
+
+		/* Lookup catalog record for directory hard link and 
+		 * create a fake filefork for the value looked up from 
+		 * the disk. 
+		 */
+		fp = extent_info->dirlink_fork;
+		bzero(extent_info->dirlink_fork, sizeof(struct filefork));
+		extent_info->dirlink_fork->ff_cp = cp;
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		error = cat_lookup_dirlink(hfsmp, fileID, forktype, 
+				extent_info->dirlink_desc, extent_info->dirlink_attr, 
+				&(extent_info->dirlink_fork->ff_data));	
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
+			goto out;
+		}
+		release_desc = true;
+	} else {
+		fp = VTOF(vp);
+	}
+
+	extent_info->catalog_fp = fp;
+	extent_info->recStartBlock = 0;
+	extent_info->extents = extent_info->catalog_fp->ff_extents;
+	/* Relocate extents from the catalog record */
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		if (fp->ff_extents[i].blockCount == 0) {
+			break;
+		}
+		extent_info->extent_index = i;
+		error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+		if (error) {
+			printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
+			goto out;
+		}
+	}
+		
+	/* If the number of allocation blocks processed for reclaiming 
+	 * are less than total number of blocks for the file, continuing 
+	 * working on overflow extents record.
+	 */
+	if (fp->ff_blocks <= extent_info->cur_blockCount) {
+		if (0 && hfs_resize_debug) {
+			printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
+		}
+		goto out;
+	}
+
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
+	}
+
+	MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+	if (extent_info->iterator == NULL) {
+		error = ENOMEM;
+		goto out;
 	}
+	bzero(extent_info->iterator, sizeof(struct BTreeIterator));
+	key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
+	key->keyLength = kHFSPlusExtentKeyMaximumLength;
+	key->forkType = forktype;
+	key->fileID = fileID;
+	key->startBlock = extent_info->cur_blockCount;
+
+	extent_info->btdata.bufferAddress = extent_info->record.overflow;
+	extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
+	extent_info->btdata.itemCount = 1;
 
+	extent_info->catalog_fp = NULL;
+
+	/* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
+	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+	error = BTSearchRecord(extent_info->fcb, extent_info->iterator, 
+			&(extent_info->btdata), &(extent_info->recordlen), 
+			extent_info->iterator);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	while (error == 0) {
+		extent_info->overflow_count++;
+		extent_info->recStartBlock = key->startBlock;
+		extent_info->extents = extent_info->record.overflow;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extent_info->record.overflow[i].blockCount == 0) {
+				goto out;
+			}
+			extent_info->extent_index = i;
+			error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+			if (error) {
+				printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
+				goto out;
+			}
+		}
+
+		/* Look for more overflow records */
+		lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
+		error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, 
+				extent_info->iterator, &(extent_info->btdata), 
+				&(extent_info->recordlen));
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			break;
+		}
+		/* Stop when we encounter a different file or fork. */
+		if ((key->fileID != fileID) || (key->forkType != forktype)) {
+			break;
+		}
+	}
+	if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+		error = 0;
+	}
+	
+out:
+	/* If any blocks were relocated, account them and report progress */
+	if (extent_info->blocks_relocated) {
+		hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
+		hfs_truncatefs_progress(hfsmp);
+		if (fileID < kHFSFirstUserCatalogNodeID) {
+			printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n", 
+					extent_info->blocks_relocated, fileID, hfsmp->vcbVN); 
+		}
+	}
+	if (extent_info->iterator) {
+		FREE(extent_info->iterator, M_TEMP);
+	}
+	if (release_desc == true) {
+		cat_releasedesc(extent_info->dirlink_desc);
+	}
+	if (extent_info->dirlink_desc) {
+		FREE(extent_info->dirlink_desc, M_TEMP);
+	}
+	if (extent_info->dirlink_attr) {
+		FREE(extent_info->dirlink_attr, M_TEMP);
+	}
+	if (extent_info->dirlink_fork) {
+		FREE(extent_info->dirlink_fork, M_TEMP);
+	}
+	if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
+		(void) hfs_update(vp, MNT_WAIT);
+	}
+	if (took_truncate_lock) {
+		hfs_unlock_truncate(cp, 0);
+	}
+	if (extent_info) {
+		FREE(extent_info, M_TEMP);
+	}
 	if (hfs_resize_debug) {
-		printf("hfs_reclaim_file: Finished relocating %sfork for fileid=%u (error=%d)\n", (forktype ? "rsrc" : "data"), fileID, error);
+		printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
 	}
 
 	return error;
@@ -4604,6 +5756,9 @@ hfs_journal_relocate_callback(void *_args)
 		hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
 	if (error) {
 		printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
+		if (bp) {
+        		buf_brelse(bp);
+		}
 		return error;
 	}
 	jibp = (JournalInfoBlock*) buf_dataptr(bp);
@@ -4629,9 +5784,10 @@ hfs_journal_relocate_callback(void *_args)
 
 
 static int
-hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
+hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
 {
 	int error;
+	int journal_err;
 	int lockflags;
 	u_int32_t oldStartBlock;
 	u_int32_t newStartBlock;
@@ -4642,6 +5798,11 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
 	struct cat_fork journal_fork;
 	struct hfs_journal_relocate_args callback_args;
 
+	if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
+		/* The journal does not require relocation */
+		return 0;
+	}
+
 	error = hfs_start_transaction(hfsmp);
 	if (error) {
 		printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
@@ -4708,13 +5869,24 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context)
 		printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
 	}
 	
-	if (!error && hfs_resize_debug) {
-		printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+	/* Account for the blocks relocated and print progress */
+	hfsmp->hfs_resize_blocksmoved += oldBlockCount;
+	hfs_truncatefs_progress(hfsmp);
+	if (!error) {
+		printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n", 
+				oldBlockCount, hfsmp->vcbVN);
+		if (hfs_resize_debug) {
+			printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
+		}
 	}
 	return error;
 
 free_fail:
-	(void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
+	journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); 
+	if (journal_err) {
+		printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
+		hfs_mark_volume_inconsistent(hfsmp);
+	}
 fail:
 	hfs_systemfile_unlock(hfsmp, lockflags);
 	(void) hfs_end_transaction(hfsmp);
@@ -4731,9 +5903,10 @@ fail:
  * the field in the volume header and the catalog record.
  */
 static int
-hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
+hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
 {
 	int error;
+	int journal_err;
 	int lockflags;
 	u_int32_t oldBlock;
 	u_int32_t newBlock;
@@ -4742,6 +5915,11 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
 	struct cat_attr jib_attr;
 	struct cat_fork jib_fork;
 	buf_t old_bp, new_bp;
+
+	if (hfsmp->vcbJinfoBlock <= allocLimit) {
+		/* The journal info block does not require relocation */
+		return 0;
+	}
 	
 	error = hfs_start_transaction(hfsmp);
 	if (error) {
@@ -4773,6 +5951,9 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
 		hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
 	if (error) {
 		printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
+		if (old_bp) {
+        		buf_brelse(old_bp);
+		}
 		goto free_fail;
 	}
 	new_bp = buf_getblk(hfsmp->hfs_devvp,
@@ -4820,101 +6001,537 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
 		printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
 		goto fail;
 	}
-	hfs_systemfile_unlock(hfsmp, lockflags);
-	error = hfs_end_transaction(hfsmp);
-	if (error) {
-		printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	error = hfs_end_transaction(hfsmp);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
+	}
+	error = hfs_journal_flush(hfsmp, FALSE);
+	if (error) {
+		printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
+	}
+
+	/* Account for the block relocated and print progress */
+	hfsmp->hfs_resize_blocksmoved += 1;
+	hfs_truncatefs_progress(hfsmp);
+	if (!error) {
+		printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n", 
+				hfsmp->vcbVN);
+		if (hfs_resize_debug) {
+			printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
+		}
+	}
+	return error;
+
+free_fail:
+	journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); 
+	if (journal_err) {
+		printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
+		hfs_mark_volume_inconsistent(hfsmp);
+	}
+
+fail:
+	hfs_systemfile_unlock(hfsmp, lockflags);
+	(void) hfs_end_transaction(hfsmp);
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
+	}
+	return error;
+}
+
+
+/*
+ * This function traverses through all extended attribute records for a given 
+ * fileID, and calls function that reclaims data blocks that exist in the 
+ * area of the disk being reclaimed which in turn is responsible for allocating 
+ * new space, copying extent data, deallocating new space, and if required, 
+ * splitting the extent.
+ *
+ * Note: The caller has already acquired the cnode lock on the file.  Therefore
+ * we are assured that no other thread would be creating/deleting/modifying 
+ * extended attributes for this file.  
+ *
+ * Side Effects:
+ * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation 
+ * blocks that were relocated. 
+ *
+ * Returns: 
+ * 	0 on success, non-zero on failure.
+ */
+static int 
+hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context) 
+{
+	int error = 0;
+	struct hfs_reclaim_extent_info *extent_info;
+	int i;
+	HFSPlusAttrKey *key;
+	int *lockflags;
+
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
+	}
+
+	MALLOC(extent_info, struct hfs_reclaim_extent_info *, 
+	       sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
+	if (extent_info == NULL) {
+		return ENOMEM;
+	}
+	bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
+	extent_info->vp = vp;
+	extent_info->fileID = fileID;
+	extent_info->is_xattr = true;
+	extent_info->is_sysfile = vnode_issystem(vp);
+	extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
+	lockflags = &(extent_info->lockflags);
+	*lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
+
+	/* Initialize iterator from the extent_info structure */
+	MALLOC(extent_info->iterator, struct BTreeIterator *, 
+	       sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
+	if (extent_info->iterator == NULL) {
+		error = ENOMEM;
+		goto out;
+	}
+	bzero(extent_info->iterator, sizeof(struct BTreeIterator));
+
+	/* Build attribute key */
+	key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
+	error = hfs_buildattrkey(fileID, NULL, key);
+	if (error) {
+		goto out;
+	}
+
+	/* Initialize btdata from extent_info structure.  Note that the 
+	 * buffer pointer actually points to the xattr record from the 
+	 * extent_info structure itself.
+	 */
+	extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
+	extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
+	extent_info->btdata.itemCount = 1;
+
+	/* 
+	 * Sync all extent-based attribute data to the disk.
+	 *
+	 * All extent-based attribute data I/O is performed via cluster 
+	 * I/O using a virtual file that spans across entire file system 
+	 * space.  
+	 */
+	hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
+	(void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
+	error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
+	hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
+	if (error) {
+		goto out;
+	}
+
+	/* Search for extended attribute for current file.  This 
+	 * will place the iterator before the first matching record.
+	 */
+	*lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
+	error = BTSearchRecord(extent_info->fcb, extent_info->iterator, 
+			&(extent_info->btdata), &(extent_info->recordlen), 
+			extent_info->iterator);
+	hfs_systemfile_unlock(hfsmp, *lockflags);
+	if (error) {
+		if (error != btNotFound) {
+			goto out;
+		}
+		/* btNotFound is expected here, so just mask it */
+		error = 0;
+	} 
+
+	while (1) {
+		/* Iterate to the next record */
+		*lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
+		error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord, 
+				extent_info->iterator, &(extent_info->btdata), 
+				&(extent_info->recordlen));
+		hfs_systemfile_unlock(hfsmp, *lockflags);
+
+		/* Stop the iteration if we encounter end of btree or xattr with different fileID */
+		if (error || key->fileID != fileID) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;				
+			}
+			break;
+		}
+
+		/* We only care about extent-based EAs */
+		if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) && 
+		    (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
+			continue;
+		}
+
+		if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
+			extent_info->overflow_count = 0;
+			extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
+		} else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
+			extent_info->overflow_count++;
+			extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
+		}
+			
+		extent_info->recStartBlock = key->startBlock;
+		for (i = 0; i < kHFSPlusExtentDensity; i++) {
+			if (extent_info->extents[i].blockCount == 0) {
+				break;
+			} 
+			extent_info->extent_index = i;
+			error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
+			if (error) {
+				printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error); 
+				goto out;
+			}
+		}
+	}
+
+out:
+	/* If any blocks were relocated, account them and report progress */
+	if (extent_info->blocks_relocated) {
+		hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
+		hfs_truncatefs_progress(hfsmp);
+	}
+	if (extent_info->iterator) {
+		FREE(extent_info->iterator, M_TEMP);
+	}
+	if (extent_info) {
+		FREE(extent_info, M_TEMP);
+	}
+	if (hfs_resize_debug) {
+		printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
+	}
+	return error;
+}
+
+/* 
+ * Reclaim any extent-based extended attributes allocation blocks from 
+ * the area of the disk that is being truncated.
+ *
+ * The function traverses the attribute btree to find out the fileIDs
+ * of the extended attributes that need to be relocated.  For every 
+ * file whose large EA requires relocation, it looks up the cnode and 
+ * calls hfs_reclaim_xattr() to do all the work for allocating 
+ * new space, copying data, deallocating old space, and if required, 
+ * splitting the extents.
+ *
+ * Inputs: 
+ * 	allocLimit    - starting block of the area being reclaimed
+ *
+ * Returns:
+ *   	returns 0 on success, non-zero on failure.
+ */
+static int
+hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
+{
+	int error = 0;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	HFSPlusAttrKey *key;
+	HFSPlusAttrRecord rec;
+	int lockflags = 0;
+	cnid_t prev_fileid = 0;
+	struct vnode *vp;
+	int need_relocate;
+	int btree_operation;
+	u_int32_t files_moved = 0;
+	u_int32_t prev_blocksmoved;
+	int i;
+
+	fcb = VTOF(hfsmp->hfs_attribute_vp);
+	/* Store the value to print total blocks moved by this function in end */
+	prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
+
+	if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
+		return ENOMEM;
+	}	
+	bzero(iterator, sizeof(*iterator));
+	key = (HFSPlusAttrKey *)&iterator->key;
+	btdata.bufferAddress = &rec;
+	btdata.itemSize = sizeof(rec);
+	btdata.itemCount = 1;
+
+	need_relocate = false;
+	btree_operation = kBTreeFirstRecord;
+	/* Traverse the attribute btree to find extent-based EAs to reclaim */
+	while (1) {
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+		error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;				
+			}
+			break;
+		}
+		btree_operation = kBTreeNextRecord;
+
+		/* If the extents of current fileID were already relocated, skip it */
+		if (prev_fileid == key->fileID) {
+			continue;
+		}
+
+		/* Check if any of the extents in the current record need to be relocated */
+		need_relocate = false;
+		switch(rec.recordType) {
+			case kHFSPlusAttrForkData:
+				for (i = 0; i < kHFSPlusExtentDensity; i++) {
+					if (rec.forkData.theFork.extents[i].blockCount == 0) {
+						break;
+					}
+					if ((rec.forkData.theFork.extents[i].startBlock + 
+					     rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
+						need_relocate = true;
+						break;
+					}
+				}
+				break;
+
+			case kHFSPlusAttrExtents:
+				for (i = 0; i < kHFSPlusExtentDensity; i++) {
+					if (rec.overflowExtents.extents[i].blockCount == 0) {
+						break;
+					}
+					if ((rec.overflowExtents.extents[i].startBlock + 
+					     rec.overflowExtents.extents[i].blockCount) > allocLimit) {
+						need_relocate = true;
+						break;
+					}
+				}
+				break;
+		};
+
+		/* Continue iterating to next attribute record */
+		if (need_relocate == false) {
+			continue;
+		}
+
+		/* Look up the vnode for corresponding file.  The cnode 
+		 * will be locked which will ensure that no one modifies 
+		 * the xattrs when we are relocating them.
+		 *
+		 * We want to allow open-unlinked files to be moved, 
+		 * so provide allow_deleted == 1 for hfs_vget().
+		 */
+		if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
+			continue;
+		}
+
+		error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		if (error) {
+			printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
+			break;
+		}
+		prev_fileid = key->fileID;
+		files_moved++;
+	}
+
+	if (files_moved) {
+		printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n", 
+				(hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
+				files_moved, hfsmp->vcbVN);
 	}
-	error = hfs_journal_flush(hfsmp);
-	if (error) {
-		printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
+
+	kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+	return error;
+}
+
+/* 
+ * Reclaim blocks from regular files.
+ *
+ * This function iterates over all the record in catalog btree looking 
+ * for files with extents that overlap into the space we're trying to 
+ * free up.  If a file extent requires relocation, it looks up the vnode 
+ * and calls function to relocate the data.
+ *
+ * Returns:
+ * 	Zero on success, non-zero on failure. 
+ */
+static int 
+hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context) 
+{
+	int error;
+	FCB *fcb;
+	struct BTreeIterator *iterator = NULL;
+	struct FSBufferDescriptor btdata;
+	int btree_operation;
+	int lockflags;
+	struct HFSPlusCatalogFile filerec;
+	struct vnode *vp;
+	struct vnode *rvp;
+	struct filefork *datafork;
+	u_int32_t files_moved = 0;
+	u_int32_t prev_blocksmoved;
+
+	fcb = VTOF(hfsmp->hfs_catalog_vp);
+	/* Store the value to print total blocks moved by this function at the end */
+	prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
+
+	if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
+		return ENOMEM;
 	}
+	bzero(iterator, sizeof(*iterator));
+
+	btdata.bufferAddress = &filerec;
+	btdata.itemSize = sizeof(filerec);
+	btdata.itemCount = 1;
+
+	btree_operation = kBTreeFirstRecord;
+	while (1) {
+		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+		error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
+		hfs_systemfile_unlock(hfsmp, lockflags);
+		if (error) {
+			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
+				error = 0;				
+			}
+			break;
+		}
+		btree_operation = kBTreeNextRecord;
+
+		if (filerec.recordType != kHFSPlusFileRecord) {
+			continue;
+		}
+
+		/* Check if any of the extents require relocation */
+		if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
+			continue;
+		}
 
-	if (!error && hfs_resize_debug) {
-		printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
+		/* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
+		if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
+			continue;
+		}
+
+		/* If data fork exists or item is a directory hard link, relocate blocks */
+		datafork = VTOF(vp);
+		if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
+			error = hfs_reclaim_file(hfsmp, vp, filerec.fileID, 
+					kHFSDataForkType, allocLimit, context);
+			if (error)  {
+				printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
+				hfs_unlock(VTOC(vp));
+				vnode_put(vp);
+				break;
+			}
+		}
+
+		/* If resource fork exists or item is a directory hard link, relocate blocks */
+		if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
+			if (vnode_isdir(vp)) {
+				/* Resource fork vnode lookup is invalid for directory hard link. 
+				 * So we fake data fork vnode as resource fork vnode.
+				 */
+				rvp = vp;
+			} else {
+				error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
+				if (error) {
+					printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
+					hfs_unlock(VTOC(vp));
+					vnode_put(vp);
+					break;
+				}
+				VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
+			}
+
+			error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID, 
+					kHFSResourceForkType, allocLimit, context);
+			if (error) {
+				printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
+				hfs_unlock(VTOC(vp));
+				vnode_put(vp);
+				break;
+			}
+		}
+
+		/* The file forks were relocated successfully, now drop the 
+		 * cnode lock and vnode reference, and continue iterating to 
+		 * next catalog record.
+		 */
+		hfs_unlock(VTOC(vp));
+		vnode_put(vp);
+		files_moved++;
 	}
-	return error;
 
-free_fail:
-	(void) BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
-fail:
-	hfs_systemfile_unlock(hfsmp, lockflags);
-	(void) hfs_end_transaction(hfsmp);
-	if (hfs_resize_debug) {
-		printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
+	if (files_moved) {
+		printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n", 
+				(hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
+				files_moved, hfsmp->vcbVN);
 	}
+
+	kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
 	return error;
 }
 
-
 /*
  * Reclaim space at the end of a file system.
  *
  * Inputs - 
- * 	startblk 	- start block of the space being reclaimed
+ * 	allocLimit 	- start block of the space being reclaimed
  * 	reclaimblks 	- number of allocation blocks to reclaim
  */
 static int
-hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context)
+hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
 {
-	struct vnode *vp = NULL;
-	FCB *fcb;
-	struct BTreeIterator * iterator = NULL;
-	struct FSBufferDescriptor btdata;
-	struct HFSPlusCatalogFile filerec;
-	u_int32_t  saved_next_allocation;
-	cnid_t * cnidbufp;
-	size_t cnidbufsize;
-	int filecnt = 0;
-	int maxfilecnt;
-	u_int32_t block;
-	int lockflags;
-	int i, j;
-	int error;
-	int lastprogress = 0;
-	u_int32_t blks_moved = 0;
-	u_int32_t total_blks_moved = 0;
-	Boolean need_relocate;
+	int error = 0;
+
+	/* 
+	 * Preflight the bitmap to find out total number of blocks that need 
+	 * relocation. 
+	 *
+	 * Note: Since allocLimit is set to the location of new alternate volume 
+	 * header, the check below does not account for blocks allocated for old 
+	 * alternate volume header.
+	 */
+	error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
+	if (error) {
+		printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
+		return error;
+	}
+	if (hfs_resize_debug) {
+		printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
+	}
 
 	/* Relocate extents of the Allocation file if they're in the way. */
-	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, startblk, SFL_BITMAP, &blks_moved, context);
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID, 
+			kHFSDataForkType, allocLimit, context);
 	if (error) {
 		printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
 		return error;
 	}
-	total_blks_moved += blks_moved;
 
 	/* Relocate extents of the Extents B-tree if they're in the way. */
-	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, startblk, SFL_EXTENTS, &blks_moved, context);
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID, 
+			kHFSDataForkType, allocLimit, context);
 	if (error) {
 		printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
 		return error;
 	}
-	total_blks_moved += blks_moved;
 
 	/* Relocate extents of the Catalog B-tree if they're in the way. */
-	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, startblk, SFL_CATALOG, &blks_moved, context);
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID, 
+			kHFSDataForkType, allocLimit, context);
 	if (error) {
 		printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
 		return error;
 	}
-	total_blks_moved += blks_moved;
 
 	/* Relocate extents of the Attributes B-tree if they're in the way. */
-	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, startblk, SFL_ATTRIBUTE, &blks_moved, context);
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID, 
+			kHFSDataForkType, allocLimit, context);
 	if (error) {
 		printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
 		return error;
 	}
-	total_blks_moved += blks_moved;
 
 	/* Relocate extents of the Startup File if there is one and they're in the way. */
-	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, startblk, SFL_STARTUP, &blks_moved, context);
+	error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID, 
+			kHFSDataForkType, allocLimit, context);
 	if (error) {
 		printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
 		return error;
 	}
-	total_blks_moved += blks_moved;
 	
 	/*
 	 * We need to make sure the alternate volume header gets flushed if we moved
@@ -4922,249 +6539,98 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimbl
 	 * shrinking the size of the volume, or else the journal code will panic
 	 * with an invalid (too large) block number.
 	 *
-	 * Note that total_blks_moved will be set if ANY extent was moved, even
+	 * Note that blks_moved will be set if ANY extent was moved, even
 	 * if it was just an overflow extent.  In this case, the journal_flush isn't
 	 * strictly required, but shouldn't hurt.
 	 */
-	if (total_blks_moved) {
-		hfs_journal_flush(hfsmp);
+	if (hfsmp->hfs_resize_blocksmoved) {
+		hfs_journal_flush(hfsmp, FALSE);
 	}
 
-	if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) {
-		error = hfs_reclaim_journal_file(hfsmp, context);
-		if (error) {
-			printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
-			return error;
-		}
-	}
-	
-	if (hfsmp->vcbJinfoBlock >= startblk) {
-		error = hfs_reclaim_journal_info_block(hfsmp, context);
-		if (error) {
-			printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
-			return error;
-		}
+	/* Relocate journal file blocks if they're in the way. */
+	error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
+	if (error) {
+		printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
+		return error;
 	}
 	
-	/* For now move a maximum of 250,000 files. */
-	maxfilecnt = MIN(hfsmp->hfs_filecount, 250000);
-	maxfilecnt = MIN((u_int32_t)maxfilecnt, reclaimblks);
-	cnidbufsize = maxfilecnt * sizeof(cnid_t);
-	if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) {
-		return (ENOMEM);
-	}	
-	if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
-		kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize);
-		return (ENOMEM);
-	}	
-
-	saved_next_allocation = hfsmp->nextAllocation;
-	/* Always try allocating new blocks after the metadata zone */
-	HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_start);
-
-	fcb = VTOF(hfsmp->hfs_catalog_vp);
-	bzero(iterator, sizeof(*iterator));
-
-	btdata.bufferAddress = &filerec;
-	btdata.itemSize = sizeof(filerec);
-	btdata.itemCount = 1;
-
-	/* Keep the Catalog and extents files locked during iteration. */
-	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_SHARED_LOCK);
-
-	error = BTIterateRecord(fcb, kBTreeFirstRecord, iterator, NULL, NULL);
+	/* Relocate journal info block blocks if they're in the way. */
+	error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
 	if (error) {
-		goto end_iteration;
-	}
-	/*
-	 * Iterate over all the catalog records looking for files
-	 * that overlap into the space we're trying to free up and 
-	 * the total number of blocks that will require relocation.
-	 */
-	for (filecnt = 0; filecnt < maxfilecnt; ) {
-		error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
-		if (error) {
-			if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
-				error = 0;				
-			}
-			break;
-		}
-		if (filerec.recordType != kHFSPlusFileRecord) {
-			continue;
-		}
-
-		need_relocate = false;
-		/* Check if data fork overlaps the target space */
-		for (i = 0; i < kHFSPlusExtentDensity; ++i) {
-			if (filerec.dataFork.extents[i].blockCount == 0) {
-				break;
-			}
-			block = filerec.dataFork.extents[i].startBlock +
-				filerec.dataFork.extents[i].blockCount;
-			if (block >= startblk) {
-				if ((filerec.fileID == hfsmp->hfs_jnlfileid) ||
-				    (filerec.fileID == hfsmp->hfs_jnlinfoblkid)) {
-					printf("hfs_reclaimspace: cannot move active journal\n");
-					error = EPERM;
-					goto end_iteration;
-				}
-				need_relocate = true;
-				goto save_fileid;
-			}
-		}
-
-		/* Check if resource fork overlaps the target space */
-		for (j = 0; j < kHFSPlusExtentDensity; ++j) {
-			if (filerec.resourceFork.extents[j].blockCount == 0) {
-				break;
-			}
-			block = filerec.resourceFork.extents[j].startBlock +
-				filerec.resourceFork.extents[j].blockCount;
-			if (block >= startblk) {
-				need_relocate = true;
-				goto save_fileid;
-			}
-		}
-
-		/* Check if any forks' overflow extents overlap the target space */
-		if ((i == kHFSPlusExtentDensity) || (j == kHFSPlusExtentDensity)) {
-			if (hfs_overlapped_overflow_extents(hfsmp, startblk, filerec.fileID)) {
-				need_relocate = true;
-				goto save_fileid;
-			}
-		}
-
-save_fileid:
-		if (need_relocate == true) {
-			cnidbufp[filecnt++] = filerec.fileID;
-			if (hfs_resize_debug) {
-				printf ("hfs_reclaimspace: Will relocate extents for fileID=%u\n", filerec.fileID);
-			}
-		}
+		printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
+		return error;
 	}
 
-end_iteration:
-	/* If no regular file was found to be relocated and 
-	 * no system file was moved, we probably do not have 
-	 * enough space to relocate the system files, or 
-	 * something else went wrong.
-	 */
-	if ((filecnt == 0) && (total_blks_moved == 0)) {
-		printf("hfs_reclaimspace: no files moved\n");
-		error = ENOSPC;
+	/* Reclaim extents from catalog file records */
+	error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
+	if (error) {
+		printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
+		return error;
 	}
-	/* All done with catalog. */
-	hfs_systemfile_unlock(hfsmp, lockflags);
-	if (error || filecnt == 0)
-		goto out;
-
-	hfsmp->hfs_resize_filesmoved = 0;
-	hfsmp->hfs_resize_totalfiles = filecnt;
-	
-	/* Now move any files that are in the way. */
-	for (i = 0; i < filecnt; ++i) {
-		struct vnode *rvp;
-		struct cnode *cp;
-		struct filefork *datafork;
-
-		if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0)
-			continue;
-		
-		cp = VTOC(vp);
-		datafork = VTOF(vp);
-
-		/* Relocating directory hard links is not supported, so we punt (see radar 6217026). */
-		if ((cp->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
-			printf("hfs_reclaimspace: Unable to relocate directory hard link id=%d\n", cp->c_cnid);
-			error = EINVAL;
-		       	goto out;
-		}
-
-		/* Relocate any overlapping data fork blocks. */
-		if (datafork && datafork->ff_blocks > 0) {
-			error = hfs_reclaim_file(hfsmp, vp, startblk, 0, &blks_moved, context);
-			if (error)  {
-				printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
-				break;
-			}
-			total_blks_moved += blks_moved;
-		}
-
-		/* Relocate any overlapping resource fork blocks. */
-		if ((cp->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) {
-			error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
-			if (error) {
-				printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", cnidbufp[i], error);
-				break;
-			}
-			error = hfs_reclaim_file(hfsmp, rvp, startblk, 0, &blks_moved, context);
-			VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
-			if (error) {
-				printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", cnidbufp[i], error);
-				break;
-			}
-			total_blks_moved += blks_moved;
-		}
-		hfs_unlock(cp);
-		vnode_put(vp);
-		vp = NULL;
-
-		++hfsmp->hfs_resize_filesmoved;
 
-		/* Report intermediate progress. */
-		if (filecnt > 100) {
-			int progress;
-
-			progress = (i * 100) / filecnt;
-			if (progress > (lastprogress + 9)) {
-				printf("hfs_reclaimspace: %d%% done...\n", progress);
-				lastprogress = progress;
-			}
-		}
-	}
-	if (vp) {
-		hfs_unlock(VTOC(vp));
-		vnode_put(vp);
-		vp = NULL;
-	}
-	if (hfsmp->hfs_resize_filesmoved != 0) {
-		printf("hfs_reclaimspace: relocated %u blocks from %d files on \"%s\"\n",
-			total_blks_moved, (int)hfsmp->hfs_resize_filesmoved, hfsmp->vcbVN);
+	/* Reclaim extents from extent-based extended attributes, if any */
+	error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
+	if (error) {
+		printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
+		return error;
 	}
-out:
-	kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
-	kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize);
 
-	/*
-	 * Restore the roving allocation pointer on errors.
-	 * (but only if we didn't move any files)
-	 */
-	if (error && hfsmp->hfs_resize_filesmoved == 0) {
-		HFS_UPDATE_NEXT_ALLOCATION(hfsmp, saved_next_allocation);
-	}
-	return (error);
+	return error;
 }
 
 
 /*
- * Check if there are any overflow data or resource fork extents that overlap 
+ * Check if there are any extents (including overflow extents) that overlap 
  * into the disk space that is being reclaimed.  
  *
  * Output - 
- * 	1 - One of the overflow extents need to be relocated
- * 	0 - No overflow extents need to be relocated, or there was an error
+ * 	true  - One of the extents need to be relocated
+ * 	false - No overflow extents need to be relocated, or there was an error
  */
 static int
-hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t fileID)
+hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
 {
 	struct BTreeIterator * iterator = NULL;
 	struct FSBufferDescriptor btdata;
 	HFSPlusExtentRecord extrec;
 	HFSPlusExtentKey *extkeyptr;
 	FCB *fcb;
-	int overlapped = 0;
-	int i;
+	int overlapped = false;
+	int i, j;
 	int error;
+	int lockflags = 0;
+	u_int32_t endblock;
+
+	/* Check if data fork overlaps the target space */
+	for (i = 0; i < kHFSPlusExtentDensity; ++i) {
+		if (filerec->dataFork.extents[i].blockCount == 0) {
+			break;
+		}
+		endblock = filerec->dataFork.extents[i].startBlock +
+			filerec->dataFork.extents[i].blockCount;
+		if (endblock > allocLimit) {
+			overlapped = true;
+			goto out;
+		}
+	}
+
+	/* Check if resource fork overlaps the target space */
+	for (j = 0; j < kHFSPlusExtentDensity; ++j) {
+		if (filerec->resourceFork.extents[j].blockCount == 0) {
+			break;
+		}
+		endblock = filerec->resourceFork.extents[j].startBlock +
+			filerec->resourceFork.extents[j].blockCount;
+		if (endblock > allocLimit) {
+			overlapped = true;
+			goto out;
+		}
+	}
+
+	/* Return back if there are no overflow extents for this file */
+	if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
+		goto out;
+	}
 
 	if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
 		return 0;
@@ -5173,7 +6639,7 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
 	extkeyptr = (HFSPlusExtentKey *)&iterator->key;
 	extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
 	extkeyptr->forkType = 0;
-	extkeyptr->fileID = fileID;
+	extkeyptr->fileID = filerec->fileID;
 	extkeyptr->startBlock = 0;
 
 	btdata.bufferAddress = &extrec;
@@ -5182,6 +6648,8 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
 	
 	fcb = VTOF(hfsmp->hfs_extents_vp);
 
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
+
 	/* This will position the iterator just before the first overflow 
 	 * extent record for given fileID.  It will always return btNotFound, 
 	 * so we special case the error code.
@@ -5197,7 +6665,7 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
 	error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
 	while (error == 0) {
 		/* Stop when we encounter a different file. */
-		if (extkeyptr->fileID != fileID) {
+		if (extkeyptr->fileID != filerec->fileID) {
 			break;
 		}
 		/* Check if any of the forks exist in the target space. */
@@ -5205,8 +6673,9 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
 			if (extrec[i].blockCount == 0) {
 				break;
 			}
-			if ((extrec[i].startBlock + extrec[i].blockCount) >= startblk) {
-				overlapped = 1;
+			endblock = extrec[i].startBlock + extrec[i].blockCount;
+			if (endblock > allocLimit) {
+				overlapped = true;
 				goto out;
 			}
 		}
@@ -5215,7 +6684,12 @@ hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_in
 	}
 
 out:
-	kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+	if (lockflags) {
+		hfs_systemfile_unlock(hfsmp, lockflags);
+	}
+	if (iterator) {
+		kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
+	}
 	return overlapped;
 }
 
@@ -5231,10 +6705,11 @@ hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
 		return (ENXIO);
 	}
 
-	if (hfsmp->hfs_resize_totalfiles > 0)
-		*progress = (hfsmp->hfs_resize_filesmoved * 100) / hfsmp->hfs_resize_totalfiles;
-	else
+	if (hfsmp->hfs_resize_totalblocks > 0) {
+		*progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
+	} else {
 		*progress = 0;
+	}
 
 	return (0);
 }
@@ -5270,6 +6745,7 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t
 {
 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
+#define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
 
 	ExtendedVCB *vcb = VFSTOVCB(mp);
 	struct hfsmount *hfsmp = VFSTOHFS(mp);
@@ -5396,20 +6872,20 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t
 	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
 		vol_attributes_attr_t *attrp = &fsap->f_attributes;
 
-        	attrp->validattr.commonattr = HFS_ATTR_CMN_VALIDMASK;
+        	attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
         	attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
         	attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
         	attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
         	attrp->validattr.forkattr = 0;
 
-        	attrp->nativeattr.commonattr = HFS_ATTR_CMN_VALIDMASK;
+        	attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
         	attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
         	attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
         	attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
         	attrp->nativeattr.forkattr = 0;
 		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
 	}	
-	fsap->f_create_time.tv_sec = hfsmp->vcbCrDate;
+	fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
 	fsap->f_create_time.tv_nsec = 0;
 	VFSATTR_SET_SUPPORTED(fsap, f_create_time);
 	fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
@@ -5470,6 +6946,10 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
 	cat_cookie_t cookie;
 	int lockflags;
 	int error = 0;
+	char converted_volname[256];
+	size_t volname_length = 0;
+	size_t conv_volname_length = 0;
+	
 
 	/*
 	 * Ignore attempts to rename a volume to a zero-length name.
@@ -5504,8 +6984,16 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
 				 */
 				if (!error) {
 					strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
+					volname_length = strlen ((const char*)vcb->vcbVN);
+#define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
+					/* Send the volume name down to CoreStorage if necessary */	
+					error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
+					if (error == 0) {
+						(void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+					}
+					error = 0;
 				}
-
+				
 				hfs_systemfile_unlock(hfsmp, lockflags);
 				cat_postflight(hfsmp, &cookie, p);
 			
@@ -5604,7 +7092,7 @@ static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
 	struct hfs_mount_args *args = NULL;
 
 	/* Replay allowed only on raw devices */
-	if (!vnode_ischr(devvp)) {
+	if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
 		retval = EINVAL;
 		goto out;
 	}
@@ -5626,7 +7114,10 @@ static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
 	bzero(args, sizeof(struct hfs_mount_args));
 
 	retval = hfs_mountfs(devvp, mp, args, 1, context);
-	buf_flushdirtyblks(devvp, MNT_WAIT, 0, "hfs_journal_replay");
+	buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
+	
+	/* FSYNC the devnode to be sure all data has been flushed */
+	retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
 
 out:
 	if (mp) {
diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c
index 97559487f..103232431 100644
--- a/bsd/hfs/hfs_vfsutils.c
+++ b/bsd/hfs/hfs_vfsutils.c
@@ -46,6 +46,7 @@
 #include <sys/utfconv.h>
 #include <sys/kauth.h>
 #include <sys/fcntl.h>
+#include <sys/fsctl.h>
 #include <sys/vnode_internal.h>
 #include <kern/clock.h>
 
@@ -68,6 +69,8 @@ static int  hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *v
 
 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
 
+#define HFS_MOUNT_DEBUG 1
+
 
 //*******************************************************************************
 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
@@ -87,7 +90,6 @@ unsigned char hfs_attrname[] = "Attribute B-tree";
 unsigned char hfs_startupname[] = "Startup File";
 
 
-__private_extern__
 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 		__unused struct proc *p)
 {
@@ -97,6 +99,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	struct cat_desc cndesc;
 	struct cat_attr cnattr;
 	struct cat_fork fork;
+	int newvnode_flags = 0;
 
 	/* Block size must be a multiple of 512 */
 	if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
@@ -115,7 +118,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	 *
 	 */
 	vcb->vcbSigWord		= SWAP_BE16 (mdb->drSigWord);
-	vcb->vcbCrDate		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
+	vcb->hfs_itime		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
 	vcb->localCreateDate	= SWAP_BE32 (mdb->drCrDate);
 	vcb->vcbLsMod		= to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
 	vcb->vcbAtrb		= SWAP_BE16 (mdb->drAtrb);
@@ -145,12 +148,13 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	 * volume encoding we use MacRoman as a fallback.
 	 */
 	if (error || (utf8chars == 0)) {
-		(void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
-		/* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny mount */
+		error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
+		/* If we fail to encode to UTF8 from Mac Roman, the name is bad.  Deny the mount */
 		if (error) {
 			goto MtVolErr;
 		}
 	}
+
 	hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
 	vcb->vcbVBMIOSize = kHFSBlockSize;
 
@@ -184,11 +188,19 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	cnattr.ca_blocks = fork.cf_blocks;
 
 	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-	                        &hfsmp->hfs_extents_vp);
-	if (error) goto MtVolErr;
+	                        &hfsmp->hfs_extents_vp, &newvnode_flags);
+	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
+		}
+		goto MtVolErr;
+	}
 	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
 	                                 (KeyCompareProcPtr)CompareExtentKeys));
 	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
+		}
 		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 		goto MtVolErr;
 	}
@@ -213,14 +225,20 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	cnattr.ca_blocks = fork.cf_blocks;
 
 	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-	                        &hfsmp->hfs_catalog_vp);
+	                        &hfsmp->hfs_catalog_vp, &newvnode_flags);
 	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
+		}
 		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 		goto MtVolErr;
 	}
 	error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 	                                 (KeyCompareProcPtr)CompareCatalogKeys));
 	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
+		}
 		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 		goto MtVolErr;
@@ -237,37 +255,41 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 	cnattr.ca_blocks = 0;
 
 	error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
-	                         &hfsmp->hfs_allocation_vp);
+	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
 	if (error) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
+		}
 		hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 		hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
 		goto MtVolErr;
 	}
 	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
 
-    /* mark the volume dirty (clear clean unmount bit) */
+      	/* mark the volume dirty (clear clean unmount bit) */
 	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
 
-    if (error == noErr)
-      {
+    if (error == noErr) {
 		error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
-      }
-
-    if ( error == noErr )
-      {
-        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )		//	if the disk is not write protected
-          {
-            MarkVCBDirty( vcb );								//	mark VCB dirty so it will be written
-          }
-      }
-
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
+		}
+	}
+	
+    if (error == noErr) {
+		/* If the disk isn't write protected.. */
+        if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
+            MarkVCBDirty (vcb); //	mark VCB dirty so it will be written
+		}
+	}
+	
 	/*
 	 * all done with system files so we can unlock now...
 	 */
 	hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
 	hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
 	hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
-
+	
 	if (error == noErr) {
 		/* If successful, then we can just return once we've unlocked the cnodes */
 		return error;
@@ -275,9 +297,7 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
 
     //--	Release any resources allocated so far before exiting with an error:
 MtVolErr:
-	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
+	hfsUnmount(hfsmp, NULL);
 
     return (error);
 }
@@ -288,7 +308,6 @@ MtVolErr:
 //
 //*******************************************************************************
 
-__private_extern__
 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
 {
@@ -301,8 +320,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	struct BTreeInfoRec btinfo;
 	u_int16_t  signature;
 	u_int16_t  hfs_version;
+	int newvnode_flags = 0;
 	int  i;
 	OSErr retval;
+	char converted_volname[256];
+	size_t volname_length = 0;
+	size_t conv_volname_length = 0;
 
 	signature = SWAP_BE16(vhp->signature);
 	hfs_version = SWAP_BE16(vhp->version);
@@ -324,23 +347,38 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 		/* Removed printf for invalid HFS+ signature because it gives
 		 * false error for UFS root volume 
 		 */
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: unknown Volume Signature\n");
+		}
 		return (EINVAL);
 	}
 
 	/* Block size must be at least 512 and a power of 2 */
 	blockSize = SWAP_BE32(vhp->blockSize);
-	if (blockSize < 512 || !powerof2(blockSize))
+	if (blockSize < 512 || !powerof2(blockSize)) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
+		}
 		return (EINVAL);
+	}
    
 	/* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
 	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
-	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0)
+	    (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
+		}
 		return (EINVAL);
+	}
 
 	/* Make sure we can live with the physical block size. */
 	if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
 	    (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
 	    (blockSize < hfsmp->hfs_logical_block_size)) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n", 
+					blockSize, hfsmp->hfs_logical_block_size);
+		}
 		return (ENXIO);
 	}
 
@@ -445,9 +483,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 				SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
 	}
 	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-	                         &hfsmp->hfs_extents_vp);
+	                         &hfsmp->hfs_extents_vp, &newvnode_flags);
 	if (retval)
 	{
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
+		}
 		goto ErrorExit;
 	}
 	hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
@@ -457,6 +498,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	                                  (KeyCompareProcPtr) CompareExtentKeysPlus));
 	if (retval)
 	{
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
+		}
 		goto ErrorExit;
 	}
 	/*
@@ -478,8 +522,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 				SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
 	}
 	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-	                         &hfsmp->hfs_catalog_vp);
+	                         &hfsmp->hfs_catalog_vp, &newvnode_flags);
 	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
+		}
 		goto ErrorExit;
 	}
 	hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
@@ -488,6 +535,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
 	                                  (KeyCompareProcPtr) CompareExtendedCatalogKeys));
 	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
+		}
 		goto ErrorExit;
 	}
 	if ((hfsmp->hfs_flags & HFS_X) &&
@@ -519,8 +569,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 				SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
 	}
 	retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-	                         &hfsmp->hfs_allocation_vp);
+	                         &hfsmp->hfs_allocation_vp, &newvnode_flags);
 	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
+		}
 		goto ErrorExit;
 	}
 	hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
@@ -546,8 +599,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 					SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
 		}
 		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-					 &hfsmp->hfs_attribute_vp);
+					 &hfsmp->hfs_attribute_vp, &newvnode_flags);
 		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
+			}
 			goto ErrorExit;
 		}
 		hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
@@ -555,6 +611,22 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 		retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
 						  (KeyCompareProcPtr) hfs_attrkeycompare));
 		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
+			}
+			goto ErrorExit;
+		}
+
+		/* Initialize vnode for virtual attribute data file that spans the 
+		 * entire file system space for performing I/O to attribute btree
+		 * We hold iocount on the attrdata vnode for the entire duration 
+		 * of mount (similar to btree vnodes)
+		 */
+		retval = init_attrdata_vnode(hfsmp);
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
+			}
 			goto ErrorExit;
 		}
 	}
@@ -579,8 +651,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 					SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
 		}
 		retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
-					 &hfsmp->hfs_startup_vp);
+					 &hfsmp->hfs_startup_vp, &newvnode_flags);
 		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
+			}
 			goto ErrorExit;
 		}
 		hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
@@ -590,13 +665,29 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	/* Pick up volume name and create date */
 	retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, &cnattr, NULL);
 	if (retval) {
+		if (HFS_MOUNT_DEBUG) {
+			printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
+		}
 		goto ErrorExit;
 	}
-	vcb->vcbCrDate = cnattr.ca_itime;
+	vcb->hfs_itime = cnattr.ca_itime;
 	vcb->volumeNameEncodingHint = cndesc.cd_encoding;
 	bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
+	volname_length = strlen ((const char*)vcb->vcbVN);
 	cat_releasedesc(&cndesc);
+	
+#define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
+
 
+	/* Send the volume name down to CoreStorage if necessary */	
+	retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
+	if (retval == 0) {
+		(void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
+	}	
+	
+	/* reset retval == 0. we don't care about errors in volname conversion */
+	retval = 0;
+	
 	/* mark the volume dirty (clear clean unmount bit) */
 	vcb->vcbAtrb &=	~kHFSVolumeUnmountedMask;
 	if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
@@ -624,6 +715,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 				// EROFS is a special error code that means the volume has an external
 				// journal which we couldn't find.  in that case we do not want to
 				// rewrite the volume header - we'll just refuse to mount the volume.
+				if (HFS_MOUNT_DEBUG) {
+					printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
+				}
 				retval = EINVAL;
 				goto ErrorExit;
 			}
@@ -663,7 +757,10 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 					bp = NULL;
 			    }
 			}
-
+			
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
+			}
 			retval = EINVAL;
 			goto ErrorExit;
 		} else if (hfsmp->jnl) {
@@ -697,7 +794,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	/*
 	 * Establish a metadata allocation zone.
 	 */
-	hfs_metadatazone_init(hfsmp);
+	hfs_metadatazone_init(hfsmp, false);
 
 	/*
 	 * Make any metadata zone adjustments.
@@ -726,8 +823,13 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
 	{
 		retval = hfs_erase_unused_nodes(hfsmp);
-		if (retval)
+		if (retval) {
+			if (HFS_MOUNT_DEBUG) {
+				printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
+			}
+
 			goto ErrorExit;
+		}
 	}
 	
 	if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) )	// if the disk is not write protected
@@ -739,30 +841,33 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 	 * Allow hot file clustering if conditions allow.
 	 */
 	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE)  &&
-	    ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
-	    ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
+	    ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
 		(void) hfs_recording_init(hfsmp);
 	}
 
 	/* Force ACLs on HFS+ file systems. */
 	vfs_setextendedsecurity(HFSTOVFS(hfsmp));
 
-	/* Check if volume supports writing of extent-based extended attributes */
-	hfs_check_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE);
+	/* Enable extent-based extended attributes by default */
+	hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
+
+	/* See if this volume should have per-file content protection enabled */
+	if (vcb->vcbAtrb & kHFSContentProtectionMask) {
+		vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
+	}
 
 	return (0);
 
 ErrorExit:
 	/*
-	 * A fatal error occurred and the volume cannot be mounted
-	 * release any resources that we aquired...
+	 * A fatal error occurred and the volume cannot be mounted, so 
+	 * release any resources that we acquired...
 	 */
-	if (hfsmp->hfs_attribute_vp)
-		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
-
+	hfsUnmount(hfsmp, NULL);
+		
+	if (HFS_MOUNT_DEBUG) {
+		printf("hfs_mounthfsplus: encountered errorr (%d)\n", retval);
+	}
 	return (retval);
 }
 
@@ -797,44 +902,47 @@ static void ReleaseMetaFileVNode(struct vnode *vp)
 *
 *************************************************************/
 
-__private_extern__
 int
 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
 {
-	/* Get rid of our attribute data vnode (if any). */
+	/* Get rid of our attribute data vnode (if any).  This is done 
+	 * after the vflush() during mount, so we don't need to worry 
+	 * about any locks.
+	 */
 	if (hfsmp->hfs_attrdata_vp) {
-		vnode_t advp = hfsmp->hfs_attrdata_vp;
-	
-		if (vnode_get(advp) == 0) {
-			vnode_rele_ext(advp, O_EVTONLY, 0);
-			vnode_put(advp);
-		}
+		ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
 		hfsmp->hfs_attrdata_vp = NULLVP;
 	}
 
-	if (hfsmp->hfs_startup_vp)
+	if (hfsmp->hfs_startup_vp) {
 		ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
-
-	if (hfsmp->hfs_allocation_vp)
-		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
-
-	if (hfsmp->hfs_attribute_vp)
+		hfsmp->hfs_startup_cp = NULL;
+		hfsmp->hfs_startup_vp = NULL;
+	}
+	
+	if (hfsmp->hfs_attribute_vp) {
 		ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
+		hfsmp->hfs_attribute_cp = NULL;
+		hfsmp->hfs_attribute_vp = NULL;
+	}
 
-	ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
-	ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+	if (hfsmp->hfs_catalog_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
+		hfsmp->hfs_catalog_cp = NULL;
+		hfsmp->hfs_catalog_vp = NULL;
+	}
 
-	/*
-	 * Setting these pointers to NULL so that any references
-	 * past this point will fail, and tell us the point of failure.
-	 * Also, facilitates a check in hfs_update for a null catalog
-	 * vp
-	 */
-	hfsmp->hfs_allocation_vp = NULL;
-	hfsmp->hfs_attribute_vp = NULL;
-	hfsmp->hfs_catalog_vp = NULL;
-	hfsmp->hfs_extents_vp = NULL;
-	hfsmp->hfs_startup_vp = NULL;
+	if (hfsmp->hfs_extents_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
+		hfsmp->hfs_extents_cp = NULL;
+		hfsmp->hfs_extents_vp = NULL;
+	}
+
+	if (hfsmp->hfs_allocation_vp) {
+		ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
+		hfsmp->hfs_allocation_cp = NULL;
+		hfsmp->hfs_allocation_vp = NULL;
+	}
 
 	return (0);
 }
@@ -880,11 +988,56 @@ overflow_extents(struct filefork *fp)
 	return (fp->ff_blocks > blocks);
 }
 
+/*
+ * Lock the HFS global journal lock 
+ */
+int 
+hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) {
+
+	void *thread = current_thread();
+
+	if (hfsmp->hfs_global_lockowner == thread) {
+		panic ("hfs_lock_global: locking against myself!");
+	}
+
+    /* HFS_SHARED_LOCK */
+	if (locktype == HFS_SHARED_LOCK) {
+		lck_rw_lock_shared (&hfsmp->hfs_global_lock);
+		hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
+	}
+    /* HFS_EXCLUSIVE_LOCK */
+	else {
+		lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
+		hfsmp->hfs_global_lockowner = thread;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Unlock the HFS global journal lock
+ */
+void 
+hfs_unlock_global (struct hfsmount *hfsmp) {
+	
+	void *thread = current_thread();
+
+    /* HFS_LOCK_EXCLUSIVE */
+	if (hfsmp->hfs_global_lockowner == thread) {
+		hfsmp->hfs_global_lockowner = NULL;
+		lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
+	}
+    /* HFS_LOCK_SHARED */
+	else {
+		lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
+	}
+}
+
 
 /*
  * Lock HFS system file(s).
  */
-__private_extern__
 int
 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype)
 {
@@ -905,7 +1058,12 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
 		}
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-		(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
+		if (hfsmp->hfs_catalog_cp) {
+			(void) hfs_lock(hfsmp->hfs_catalog_cp, locktype);
+		} else {
+			flags &= ~SFL_CATALOG;
+		}
+
 		/*
 		 * When the catalog file has overflow extents then
 		 * also acquire the extents b-tree lock if its not
@@ -949,7 +1107,12 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
 		}
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-		(void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
+		if (hfsmp->hfs_startup_cp) {
+			(void) hfs_lock(hfsmp->hfs_startup_cp, locktype);
+		} else {
+			flags &= ~SFL_STARTUP;
+		}
+
 		/*
 		 * When the startup file has overflow extents then
 		 * also acquire the extents b-tree lock if its not
@@ -966,17 +1129,14 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
 	 */
 	if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
 		/*
-		 * Since the only bitmap operations are clearing and
-		 * setting bits we always need exclusive access. And
-		 * when we have a journal, we can "hide" behind that
-		 * lock since we can only change the bitmap from
-		 * within a transaction.
+		 * If there's no bitmap cnode, ignore the bitmap lock.
 		 */
-		if (hfsmp->jnl || (hfsmp->hfs_allocation_cp == NULL)) {
+		if (hfsmp->hfs_allocation_cp == NULL) {
 			flags &= ~SFL_BITMAP;
 		} else {
 			(void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK);
-			/* The bitmap lock is also grabbed when only extent lock 
+			/* 
+			 * The bitmap lock is also grabbed when only extent lock 
 			 * was requested. Set the bitmap lock bit in the lock
 			 * flags which callers will use during unlock.
 			 */
@@ -988,7 +1148,11 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
 		 * Since the extents btree lock is recursive we always
 		 * need exclusive access.
 		 */
-		(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
+		if (hfsmp->hfs_extents_cp) {
+			(void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK);
+		} else {
+			flags &= ~SFL_EXTENTS;
+		}
 	}
 	return (flags);
 }
@@ -996,7 +1160,6 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype
 /*
  * unlock HFS system file(s).
  */
-__private_extern__
 void
 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
 {
@@ -1023,7 +1186,7 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
 		}
 		hfs_unlock(hfsmp->hfs_attribute_cp);
 	}
-	if (flags & SFL_CATALOG) {
+	if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
 		if (hfsmp->jnl == NULL) {
 			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
 			numOfLockedBuffs = count_lock_queue();
@@ -1035,10 +1198,10 @@ hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
 		}
 		hfs_unlock(hfsmp->hfs_catalog_cp);
 	}
-	if (flags & SFL_BITMAP) {
+	if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
 		hfs_unlock(hfsmp->hfs_allocation_cp);
 	}
-	if (flags & SFL_EXTENTS) {
+	if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
 		if (hfsmp->jnl == NULL) {
 			BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
 			numOfLockedBuffs = count_lock_queue();
@@ -1168,7 +1331,6 @@ u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
 }
 
 
-__private_extern__
 u_int32_t
 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
 			struct cat_attr *fattr, struct cat_fork *forkinfo)
@@ -1208,7 +1370,6 @@ GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
  * If the volume was not cleanly unmounted then some of these may
  * have persisted and need to be removed.
  */
-__private_extern__
 void
 hfs_remove_orphans(struct hfsmount * hfsmp)
 {
@@ -1286,8 +1447,9 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 		 */
 		if (bcmp(tempname, filename, namelen) == 0) {
    			struct filefork dfork;
-    			struct filefork rfork;
+    		struct filefork rfork;
   			struct cnode cnode;
+			int mode = 0;
 
 			bzero(&dfork, sizeof(dfork));
 			bzero(&rfork, sizeof(rfork));
@@ -1344,8 +1506,10 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 						fsize = 0;
 					}
 
-					if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) {
-						printf("hfs: error truncting data fork!\n");
+					if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, 
+									  cnode.c_attr.ca_fileid, false) != 0) {
+						printf("hfs: error truncating data fork!\n");
+
 						break;
 					}
 
@@ -1376,8 +1540,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 				rfork.ff_cp = &cnode;
 				cnode.c_datafork = NULL;
 				cnode.c_rsrcfork = &rfork;
-				if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) {
-					printf("hfs: error truncting rsrc fork!\n");
+				if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
+					printf("hfs: error truncating rsrc fork!\n");
 					break;
 				}
 			}
@@ -1391,7 +1555,9 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 				break;
 			}
 			
-			if (cnode.c_attr.ca_mode & S_IFDIR) {
+			mode = cnode.c_attr.ca_mode & S_IFMT;
+
+			if (mode == S_IFDIR) {
 				orphaned_dirs++;
 			}
 			else {
@@ -1400,7 +1566,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 
 			/* Update parent and volume counts */	
 			hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
-			if (cnode.c_attr.ca_mode & S_IFDIR) {
+			if (mode == S_IFDIR) {
 				DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
 			}
 
@@ -1416,7 +1582,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp)
 			   Now that Catalog is unlocked, update the volume info, making
 			   sure to differentiate between files and directories
 			*/
-			if (cnode.c_attr.ca_mode & S_IFDIR) {
+			if (mode == S_IFDIR) {
 				hfs_volupdate(hfsmp, VOL_RMDIR, 0);
 			}
 			else{
@@ -1489,7 +1655,6 @@ u_int32_t logBlockSize;
 	return logBlockSize;	
 }
 
-__private_extern__
 u_int32_t
 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
 {
@@ -1517,7 +1682,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
 	else
 		freeblks = 0;
 
-#ifdef HFS_SPARSE_DEV
+#if HFS_SPARSE_DEV
 	/* 
 	 * When the underlying device is sparse, check the
 	 * available space on the backing store volume.
@@ -1841,7 +2006,7 @@ journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
 	// desired uuid so let's try to open the device for writing and
 	// see if it works.  if it does, we'll use it.
 	
-	NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
 	if ((error = namei(&nd))) {
 		printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
 		return 1;   // keep iterating
@@ -1888,7 +2053,6 @@ journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
 
 extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout);
 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
-extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
 
 
@@ -1940,7 +2104,6 @@ open_journal_dev(const char *vol_device,
 }
 
 
-__private_extern__
 int
 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 					   void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
@@ -2063,6 +2226,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 									arg_flags,
 									arg_tbufsz,
 									hfs_sync_metadata, hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 
 		// no need to start a transaction here... if this were to fail
 		// we'd just re-init it on the next mount.
@@ -2084,6 +2249,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
 								  arg_flags,
 								  arg_tbufsz,
 								  hfs_sync_metadata, hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 
 		if (write_jibp) {
 			buf_bwrite(jinfo_bp);
@@ -2323,6 +2490,8 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
 									arg_flags,
 									arg_tbufsz,
 									hfs_sync_metadata, hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 
 		// no need to start a transaction here... if this were to fail
 		// we'd just re-init it on the next mount.
@@ -2352,6 +2521,8 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
 								  arg_flags,
 								  arg_tbufsz,
 								  hfs_sync_metadata, hfsmp->hfs_mp);
+		if (hfsmp->jnl)
+			journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
 	}
 			
 
@@ -2408,8 +2579,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
 #define HOTBAND_MINIMUM_SIZE  (10*1024*1024)
 #define HOTBAND_MAXIMUM_SIZE  (512*1024*1024)
 
+/* Initialize the metadata zone.
+ *
+ * If the size of  the volume is less than the minimum size for
+ * metadata zone, metadata zone is disabled.
+ *
+ * If disable is true, disable metadata zone unconditionally.
+ */
 void
-hfs_metadatazone_init(struct hfsmount *hfsmp)
+hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
 {
 	ExtendedVCB  *vcb;
 	u_int64_t  fs_size;
@@ -2436,6 +2614,11 @@ hfs_metadatazone_init(struct hfsmount *hfsmp)
 		really_do_it = 0;
 	}
 
+	/* If caller wants to disable metadata zone, do it */
+	if (disable == true) {
+		really_do_it = 0;
+	}
+
 	/*
 	 * Start with space for the boot blocks and Volume Header.
 	 * 1536 = byte offset from start of volume to end of volume header:
@@ -2626,7 +2809,6 @@ hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
  * Determine if a file is a "virtual" metadata file.
  * This includes journal and quota files.
  */
-__private_extern__
 int
 hfs_virtualmetafile(struct cnode *cp)
 {
@@ -2698,7 +2880,6 @@ hfs_sync_ejectable(struct hfsmount *hfsmp)
 }
 
 
-__private_extern__
 int
 hfs_start_transaction(struct hfsmount *hfsmp)
 {
@@ -2723,11 +2904,11 @@ hfs_start_transaction(struct hfsmount *hfsmp)
 	}
 #endif /* HFS_CHECK_LOCK_ORDER */
 
-    if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
-	lck_rw_lock_shared(&hfsmp->hfs_global_lock);
-	OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
-	unlock_on_err = 1;
-    }
+	if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
+		hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
+		OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+		unlock_on_err = 1;
+	}
 
 	/* If a downgrade to read-only mount is in progress, no other
 	 * process than the downgrade process is allowed to modify 
@@ -2739,67 +2920,89 @@ hfs_start_transaction(struct hfsmount *hfsmp)
 		goto out;
 	}
 
-    if (hfsmp->jnl) {
-	ret = journal_start_transaction(hfsmp->jnl);
-	if (ret == 0) {
-	    OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+	if (hfsmp->jnl) {
+		ret = journal_start_transaction(hfsmp->jnl);
+		if (ret == 0) {
+			OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
+		}
+	} else {
+		ret = 0;
 	}
-    } else {
-	ret = 0;
-    }
 
 out:
-    if (ret != 0 && unlock_on_err) {
-	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
-	OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
-    }
+	if (ret != 0 && unlock_on_err) {
+		hfs_unlock_global (hfsmp);
+		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+	}
 
     return ret;
 }
 
-__private_extern__
 int
 hfs_end_transaction(struct hfsmount *hfsmp)
 {
     int need_unlock=0, ret;
 
-    if (    hfsmp->jnl == NULL
-	|| (   journal_owner(hfsmp->jnl) == current_thread()
+    if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
 	    && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
-
 	    need_unlock = 1;
     } 
 
-    if (hfsmp->jnl) {
-	ret = journal_end_transaction(hfsmp->jnl);
-    } else {
-	ret = 0;
-    }
+	if (hfsmp->jnl) {
+		ret = journal_end_transaction(hfsmp->jnl);
+	} else {
+		ret = 0;
+	}
 
-    if (need_unlock) {
-	OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
-	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
-	hfs_sync_ejectable(hfsmp);
-    }
+	if (need_unlock) {
+		OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+		hfs_unlock_global (hfsmp);
+		hfs_sync_ejectable(hfsmp);
+	}
 
     return ret;
 }
 
 
-__private_extern__
+/* 
+ * Flush the contents of the journal to the disk. 
+ *
+ *  Input: 
+ *  	wait_for_IO - 
+ *  	If TRUE, wait to write in-memory journal to the disk 
+ *  	consistently, and also wait to write all asynchronous 
+ *  	metadata blocks to its corresponding locations
+ *  	consistently on the disk.  This means that the journal 
+ *  	is empty at this point and does not contain any 
+ *  	transactions.  This is overkill in normal scenarios  
+ *  	but is useful whenever the metadata blocks are required 
+ *  	to be consistent on-disk instead of just the journal 
+ *  	being consistent; like before live verification 
+ *  	and live volume resizing.  
+ *
+ *  	If FALSE, only wait to write in-memory journal to the 
+ *  	disk consistently.  This means that the journal still 
+ *  	contains uncommitted transactions and the file system 
+ *  	metadata blocks in the journal transactions might be 
+ *  	written asynchronously to the disk.  But there is no 
+ *  	guarantee that they are written to the disk before 
+ *  	returning to the caller.  Note that this option is 
+ *  	sufficient for file system data integrity as it 
+ *  	guarantees consistent journal content on the disk.
+ */
 int
-hfs_journal_flush(struct hfsmount *hfsmp)
+hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
 {
 	int ret;
-	
+
 	/* Only peek at hfsmp->jnl while holding the global lock */
-	lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+	hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
 	if (hfsmp->jnl) {
-		ret = journal_flush(hfsmp->jnl);
+		ret = journal_flush(hfsmp->jnl, wait_for_IO);
 	} else {
 		ret = 0;
 	}
-	lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+	hfs_unlock_global (hfsmp);
 	
 	return ret;
 }
@@ -2824,7 +3027,6 @@ hfs_journal_flush(struct hfsmount *hfsmp)
  * unused nodes have been repaired.  A newer newfs_hfs will set this bit.
  * As will fsck_hfs when it repairs the unused nodes.
  */
-__private_extern__
 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
 {
 	int result; 
@@ -2877,3 +3079,92 @@ int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
 done:
 	return result;
 }
+
+
+extern time_t snapshot_timestamp;
+
+int
+check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
+{
+	int tracked_error = 0, snapshot_error = 0;
+	
+	if (vp == NULL) {
+		return 0;
+	}
+	
+	if (VTOC(vp)->c_flags & UF_TRACKED) {
+		// the file has the tracked bit set, so send an event to the tracked-file handler
+		int error;
+		
+		// printf("hfs: tracked-file: encountered a file with the tracked bit set! (vp %p)\n", vp);
+		error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_TRACK_EVENT);
+		if (error) {
+			if (error == EAGAIN) {
+				printf("hfs: tracked-file: timed out waiting for namespace handler...\n");
+				
+			} else if (error == EINTR) {
+				// printf("hfs: tracked-file: got a signal while waiting for namespace handler...\n");
+				tracked_error = EINTR;
+			}
+		}
+	}
+
+	if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
+		// the change time is within this epoch
+		int error;
+		
+		error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
+		if (error == EDEADLK) {
+			snapshot_error = 0;
+		} else if (error) {
+			if (error == EAGAIN) {
+				printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
+			} else if (error == EINTR) {
+				// printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
+				snapshot_error = EINTR;
+			}
+		}
+	}
+	
+	if (tracked_error) return tracked_error;
+	if (snapshot_error) return snapshot_error;
+	
+	return 0;
+}
+
+int
+check_for_dataless_file(struct vnode *vp, uint64_t op_type)
+{
+	int error;
+
+	if (vp == NULL || (VTOC(vp)->c_flags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+		// there's nothing to do, it's not dataless
+		return 0;
+	}
+			
+	// printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
+	error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
+	if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
+		error = 0;
+	} else if (error) {
+		if (error == EAGAIN) {
+			printf("hfs: dataless: timed out waiting for namespace handler...\n");
+			// XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
+			return 0;				
+		} else if (error == EINTR) {
+			// printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
+			return EINTR;
+		}
+	} else if (VTOC(vp)->c_flags & UF_COMPRESSED) {
+		//
+		// if we're here, the dataless bit is still set on the file 
+		// which means it didn't get handled.  we return an error
+		// but it's presently ignored by all callers of this function.
+		//
+		// XXXdbg - EDATANOTPRESENT is what we really need...
+		//
+		return EBADF;
+	}				
+
+	return error;
+}
diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c
index 49973d29c..4c526f77b 100644
--- a/bsd/hfs/hfs_vnops.c
+++ b/bsd/hfs/hfs_vnops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -45,6 +45,10 @@
 #include <sys/disk.h>
 #include <sys/kauth.h>
 #include <sys/uio_internal.h>
+#include <sys/fsctl.h>
+#include <sys/cprotect.h>
+
+#include <string.h>
 
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
@@ -72,20 +76,23 @@
 /* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */
 int always_do_fullfsync = 0;
 SYSCTL_DECL(_vfs_generic);
-SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called");
+SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW | CTLFLAG_LOCKED, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called");
 
-static int hfs_makenode(struct vnode *dvp, struct vnode **vpp,
+int hfs_makenode(struct vnode *dvp, struct vnode **vpp,
                         struct componentname *cnp, struct vnode_attr *vap,
                         vfs_context_t ctx);
+int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p);
+int hfs_metasync_all(struct hfsmount *hfsmp);
 
-static int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p);
-static int hfs_metasync_all(struct hfsmount *hfsmp);
+int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
+                         int, int);
+int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
+                          int, int, int, struct vnode *, int);
 
-static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *,
-                         int);
+int hfs_movedata (struct vnode *, struct vnode*);
+static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, 
+						  struct filefork *dstfork, struct cnode *dst);
 
-static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *,
-                          int, int, int, struct vnode *);
 
 #if FIFO
 static int hfsfifo_read(struct vnop_read_args *);
@@ -95,26 +102,27 @@ static int hfsfifo_close(struct vnop_close_args *);
 extern int (**fifo_vnodeop_p)(void *);
 #endif /* FIFO */
 
-static int hfs_vnop_close(struct vnop_close_args*);
-static int hfs_vnop_create(struct vnop_create_args*);
-static int hfs_vnop_exchange(struct vnop_exchange_args*);
-static int hfs_vnop_fsync(struct vnop_fsync_args*);
-static int hfs_vnop_mkdir(struct vnop_mkdir_args*);
-static int hfs_vnop_mknod(struct vnop_mknod_args*);
-static int hfs_vnop_getattr(struct vnop_getattr_args*);
-static int hfs_vnop_open(struct vnop_open_args*);
-static int hfs_vnop_readdir(struct vnop_readdir_args*);
-static int hfs_vnop_remove(struct vnop_remove_args*);
-static int hfs_vnop_rename(struct vnop_rename_args*);
-static int hfs_vnop_rmdir(struct vnop_rmdir_args*);
-static int hfs_vnop_symlink(struct vnop_symlink_args*);
-static int hfs_vnop_setattr(struct vnop_setattr_args*);
-static int hfs_vnop_readlink(struct vnop_readlink_args *);
-static int hfs_vnop_pathconf(struct vnop_pathconf_args *);
-static int hfs_vnop_whiteout(struct vnop_whiteout_args *);
-static int hfsspec_read(struct vnop_read_args *);
-static int hfsspec_write(struct vnop_write_args *);
-static int hfsspec_close(struct vnop_close_args *);
+int hfs_vnop_close(struct vnop_close_args*);
+int hfs_vnop_create(struct vnop_create_args*);
+int hfs_vnop_exchange(struct vnop_exchange_args*);
+int hfs_vnop_fsync(struct vnop_fsync_args*);
+int hfs_vnop_mkdir(struct vnop_mkdir_args*);
+int hfs_vnop_mknod(struct vnop_mknod_args*);
+int hfs_vnop_getattr(struct vnop_getattr_args*);
+int hfs_vnop_open(struct vnop_open_args*);
+int hfs_vnop_readdir(struct vnop_readdir_args*);
+int hfs_vnop_remove(struct vnop_remove_args*);
+int hfs_vnop_rename(struct vnop_rename_args*);
+int hfs_vnop_rmdir(struct vnop_rmdir_args*);
+int hfs_vnop_symlink(struct vnop_symlink_args*);
+int hfs_vnop_setattr(struct vnop_setattr_args*);
+int hfs_vnop_readlink(struct vnop_readlink_args *);
+int hfs_vnop_pathconf(struct vnop_pathconf_args *);
+int hfs_vnop_whiteout(struct vnop_whiteout_args *);
+int hfs_vnop_mmap(struct vnop_mmap_args *ap);
+int hfsspec_read(struct vnop_read_args *);
+int hfsspec_write(struct vnop_write_args *);
+int hfsspec_close(struct vnop_close_args *);
 
 /* Options for hfs_removedir and hfs_removefile */
 #define HFSRM_SKIP_RESERVE  0x01
@@ -131,7 +139,7 @@ static int hfsspec_close(struct vnop_close_args *);
 /*
  * Create a regular file.
  */
-static int
+int
 hfs_vnop_create(struct vnop_create_args *ap)
 {
 	int error;
@@ -164,6 +172,7 @@ again:
 		/* Make sure it was file. */
 		if ((error == 0) && !vnode_isreg(*args.a_vpp)) {
 			vnode_put(*args.a_vpp);
+			*args.a_vpp = NULLVP;
 			error = EEXIST;
 		}
 		args.a_cnp->cn_nameiop = CREATE;
@@ -174,7 +183,7 @@ again:
 /*
  * Make device special file.
  */
-static int
+int
 hfs_vnop_mknod(struct vnop_mknod_args *ap)
 {
 	struct vnode_attr *vap = ap->a_vap;
@@ -245,7 +254,7 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock)
 		return EINVAL;
 	}
 	
-	if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1) &&
+	if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1, 0) &&
 		0 != data_vp) {
 		vref = vnode_ref(*data_vp);
 		vnode_put(*data_vp);
@@ -334,6 +343,8 @@ hfs_file_is_compressed(struct cnode *cp, int skiplock)
  *	if the caller has passed a valid vnode (has a ref count > 0), then hfsmp and fid are not required.
  *	if the caller doesn't have a vnode, pass NULL in vp, and pass valid hfsmp and fid.
  *	files size is returned in size (required)
+ *	if the indicated file is a directory (or something that doesn't have a data fork), then this call
+ *	will return an error and the caller should fall back to treating the item as an uncompressed file
  */
 int
 hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock)
@@ -349,7 +360,7 @@ hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *v
 		if (!hfsmp || !fid) {							/* make sure we have the required parameters */
 			return EINVAL;
 		}
-		if (0 != hfs_vget(hfsmp, fid, &vp, skiplock)) {		/* vnode is null, use hfs_vget() to get it */
+		if (0 != hfs_vget(hfsmp, fid, &vp, skiplock, 0)) {		/* vnode is null, use hfs_vget() to get it */
 			vp = NULL;
 		} else {
 			putaway = 1;								/* note that hfs_vget() was used to aquire the vnode */
@@ -359,10 +370,27 @@ hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *v
 	 * ensures the cached size is present in case decmpfs hasn't 
 	 * encountered this node yet.
 	 */
-	if ( ( NULL != vp ) && hfs_file_is_compressed(VTOC(vp), skiplock) ) {
-		*size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp));	/* file info will be cached now, so get size */
-	} else {
-		ret = EINVAL;
+	if (vp) {
+		if (hfs_file_is_compressed(VTOC(vp), skiplock) ) {
+			*size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp));	/* file info will be cached now, so get size */
+		} else {
+			if (VTOCMP(vp) && VTOCMP(vp)->cmp_type >= CMP_MAX) {
+				if (VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
+					// if we don't recognize this type, just use the real data fork size
+					if (VTOC(vp)->c_datafork) {
+						*size = VTOC(vp)->c_datafork->ff_size;
+						ret = 0;
+					} else {
+						ret = EINVAL;
+					}
+				} else {
+					*size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp));	/* file info will be cached now, so get size */
+					ret = 0;
+				}
+			} else {
+				ret = EINVAL;
+			}
+		}
 	}
 	
 	if (putaway) {		/* did we use hfs_vget() to get this vnode? */
@@ -396,7 +424,7 @@ hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skipl
 /*
  * Open a file/directory.
  */
-static int
+int
 hfs_vnop_open(struct vnop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
@@ -516,7 +544,7 @@ hfs_vnop_open(struct vnop_open_args *ap)
 /*
  * Close a file/directory.
  */
-static int
+int
 hfs_vnop_close(ap)
 	struct vnop_close_args /* {
 		struct vnode *a_vp;
@@ -559,11 +587,11 @@ hfs_vnop_close(ap)
 			// release cnode lock; must acquire truncate lock BEFORE cnode lock
 			hfs_unlock(cp);
 
-			hfs_lock_truncate(cp, TRUE);
+			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 			tooktrunclock = 1;
 
 			if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { 
-				hfs_unlock_truncate(cp, TRUE);
+				hfs_unlock_truncate(cp, 0);
 				// bail out if we can't re-acquire cnode lock
 				return 0;
 			}
@@ -585,8 +613,8 @@ hfs_vnop_close(ap)
 	// if we froze the fs and we're exiting, then "thaw" the fs 
 	if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
 	    hfsmp->hfs_freezing_proc = NULL;
-	    hfs_global_exclusive_lock_release(hfsmp);
-	    lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
+	    hfs_unlock_global (hfsmp);
+		lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
 	}
 
 	busy = vnode_isinuse(vp, 1);
@@ -601,7 +629,7 @@ hfs_vnop_close(ap)
 	}
 
 	if (tooktrunclock){
-		hfs_unlock_truncate(cp, TRUE);
+		hfs_unlock_truncate(cp, 0);
 	}
 	hfs_unlock(cp);
 
@@ -615,7 +643,7 @@ hfs_vnop_close(ap)
 /*
  * Get basic attributes.
  */
-static int
+int
 hfs_vnop_getattr(struct vnop_getattr_args *ap)
 {
 #define VNODE_ATTR_TIMES  \
@@ -648,10 +676,16 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 			/* if it's a data fork, we need to know if it was compressed so we can report the uncompressed size */
 			compressed = hfs_file_is_compressed(cp, 0);
 		}
-		if (compressed && (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) {
-			if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) {
-				/* failed to get the uncompressed size, we'll check for this later */
-				uncompressed_size = -1;
+		if ((VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) {
+			// if it's compressed 
+			if (compressed || (!VNODE_IS_RSRC(vp) && cp->c_decmp && cp->c_decmp->cmp_type >= CMP_MAX)) {
+				if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) {
+					/* failed to get the uncompressed size, we'll check for this later */
+					uncompressed_size = -1;
+				} else {
+					// fake that it's compressed
+					compressed = 1;
+				}
 			}
 		}
 	}
@@ -812,13 +846,17 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 				
 				if (cp->c_blocks - VTOF(vp)->ff_blocks) {
 					/* We deal with rsrc fork vnode iocount at the end of the function */
-					error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
+					error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
 					if (error) {
-						/* 
-						 * hfs_vgetrsrc may have returned a vnode in rvp even though
-						 * we got an error, because we specified error_on_unlinked.
-						 * We need to drop the iocount after we release the cnode lock, so
-						 * it will be taken care of at the end of the function if it's needed.
+						/*
+						 * Note that we call hfs_vgetrsrc with error_on_unlinked
+						 * set to FALSE.  This is because we may be invoked via
+						 * fstat() on an open-unlinked file descriptor and we must 
+						 * continue to support access to the rsrc fork until it disappears.
+						 * The code at the end of this function will be
+						 * responsible for releasing the iocount generated by 
+						 * hfs_vgetrsrc.  This is because we can't drop the iocount
+						 * without unlocking the cnode first.
 						 */
 						goto out;
 					}
@@ -876,6 +914,17 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 	vap->va_backup_time.tv_sec = cp->c_btime;
 	vap->va_backup_time.tv_nsec = 0;	
 
+	/* See if we need to emit the date added field to the user */
+	if (VATTR_IS_ACTIVE(vap, va_addedtime)) {
+		u_int32_t dateadded = hfs_get_dateadded (cp);
+		if (dateadded) {
+			vap->va_addedtime.tv_sec = dateadded;
+			vap->va_addedtime.tv_nsec = 0;
+			VATTR_SET_SUPPORTED (vap, va_addedtime);
+		}
+	}
+
+
 	/* XXX is this really a good 'optimal I/O size'? */
 	vap->va_iosize = hfsmp->hfs_logBlockSize;
 	vap->va_uid = cp->c_uid;
@@ -972,7 +1021,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 			 * have an open-unlinked file.  Go to the next link in this case.
 			 */
 			if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) {
-				if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){
+				if ((error = hfs_lookup_siblinglinks(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){
 					goto out;
 				}
 			}	
@@ -1029,7 +1078,7 @@ out:
 	return (error);
 }
 
-static int
+int
 hfs_vnop_setattr(ap)
 	struct vnop_setattr_args /* {
 		struct vnode *a_vp;
@@ -1046,7 +1095,10 @@ hfs_vnop_setattr(ap)
 	int error = 0;
 	uid_t nuid;
 	gid_t ngid;
+	time_t orig_ctime;
 
+	orig_ctime = VTOC(vp)->c_ctime;
+	
 #if HFS_COMPRESSION
 	int decmpfs_reset_state = 0;
 	/*
@@ -1056,8 +1108,23 @@ hfs_vnop_setattr(ap)
 	error = decmpfs_update_attributes(vp, vap);
 	if (error)
 		return error;
+
+	//
+	// if this is not a size-changing setattr and it is not just
+	// an atime update, then check for a snapshot.
+	//
+	if (!VATTR_IS_ACTIVE(vap, va_data_size) && !(vap->va_active == VNODE_ATTR_va_access_time)) {
+		check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NULL);
+	}
 #endif
 
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+		return (error);
+	}
+#endif /* CONFIG_PROTECT */
+
 	hfsmp = VTOHFS(vp);
 
 	/* Don't allow modification of the journal file. */
@@ -1090,6 +1157,8 @@ hfs_vnop_setattr(ap)
 			}
 		}
 		
+		check_for_tracked_file(vp, orig_ctime, vap->va_data_size == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
+
 		decmpfs_lock_compressed_data(dp, 1);
 		if (hfs_file_is_compressed(VTOC(vp), 1)) {
 			error = decmpfs_decompress_file(vp, dp, -1/*vap->va_data_size*/, 0, 1);
@@ -1101,13 +1170,13 @@ hfs_vnop_setattr(ap)
 #endif
 
 		/* Take truncate lock before taking cnode lock. */
-		hfs_lock_truncate(VTOC(vp), TRUE);
+		hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
 		
 		/* Perform the ubc_setsize before taking the cnode lock. */
 		ubc_setsize(vp, vap->va_data_size);
 
 		if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
-			hfs_unlock_truncate(VTOC(vp), TRUE);
+			hfs_unlock_truncate(VTOC(vp), 0);
 #if HFS_COMPRESSION
 			decmpfs_unlock_compressed_data(dp, 1);
 #endif
@@ -1117,7 +1186,7 @@ hfs_vnop_setattr(ap)
 
 		error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context);
 
-		hfs_unlock_truncate(cp, TRUE);
+		hfs_unlock_truncate(cp, 0);
 #if HFS_COMPRESSION
 		decmpfs_unlock_compressed_data(dp, 1);
 #endif
@@ -1297,7 +1366,6 @@ out:
  * Change the mode on a file.
  * cnode must be locked before calling.
  */
-__private_extern__
 int
 hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struct proc *p)
 {
@@ -1328,7 +1396,6 @@ hfs_chmod(struct vnode *vp, int mode, __unused kauth_cred_t cred, __unused struc
 }
 
 
-__private_extern__
 int
 hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags)
 {
@@ -1378,7 +1445,6 @@ hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean co
  * Perform chown operation on cnode cp;
  * code must be locked prior to call.
  */
-__private_extern__
 int
 #if !QUOTA
 hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, __unused kauth_cred_t cred,
@@ -1512,7 +1578,7 @@ good:
  * case the file is being tracked through its file ID. Typically
  * its used after creating a new file during a safe-save.
  */
-static int
+int
 hfs_vnop_exchange(ap)
 	struct vnop_exchange_args /* {
 		struct vnode *a_fvp;
@@ -1538,6 +1604,7 @@ hfs_vnop_exchange(ap)
 	int lockflags;
 	int error = 0, started_tr = 0, got_cookie = 0;
 	cat_cookie_t cookie;
+	time_t orig_from_ctime, orig_to_ctime;
 
 	/* The files must be on the same volume. */
 	if (vnode_mount(from_vp) != vnode_mount(to_vp))
@@ -1546,6 +1613,9 @@ hfs_vnop_exchange(ap)
 	if (from_vp == to_vp)
 		return (EINVAL);
 
+	orig_from_ctime = VTOC(from_vp)->c_ctime;
+	orig_to_ctime = VTOC(to_vp)->c_ctime;
+
 #if HFS_COMPRESSION
 	if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) {
 		if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) {
@@ -1560,6 +1630,50 @@ hfs_vnop_exchange(ap)
 	}
 #endif // HFS_COMPRESSION
 	
+	/* 
+	 * Normally, we want to notify the user handlers about the event,
+	 * except if it's a handler driving the event.
+	 */
+	if ((ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) == 0) {
+		check_for_tracked_file(from_vp, orig_from_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+		check_for_tracked_file(to_vp, orig_to_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+	}
+	else {
+		/* 
+		 * We're doing a data-swap.
+		 * Take the truncate lock/cnode lock, then verify there are no mmap references.
+		 * Issue a hfs_filedone to flush out all of the remaining state for this file.
+		 * Allow the rest of the codeflow to re-acquire the cnode locks in order.
+		 */
+		
+		hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK);	
+			
+		if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK))) {
+			hfs_unlock_truncate (VTOC(from_vp), 0);
+			return error;
+		}
+
+		/* Verify the source file is not in use by anyone besides us (including mmap refs) */
+		if (vnode_isinuse(from_vp, 1)) {
+			error = EBUSY;
+			hfs_unlock(VTOC(from_vp));
+			hfs_unlock_truncate (VTOC(from_vp), 0);
+			return error;
+		}
+
+		/* Flush out the data in the source file */
+		VTOC(from_vp)->c_flag |= C_SWAPINPROGRESS;
+		error = hfs_filedone (from_vp, ap->a_context);
+		VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS;
+		hfs_unlock(VTOC(from_vp));
+		hfs_unlock_truncate(VTOC(from_vp), 0);
+
+		if (error) {
+			return error;
+		}
+	}
+
+	
 	if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK)))
 		return (error);
 
@@ -1595,6 +1709,16 @@ hfs_vnop_exchange(ap)
 		}
 	}
 
+	/* 
+	 * Ok, now that all of the pre-flighting is done, call the underlying
+	 * function if needed.
+	 */
+	if (ap->a_options & FSOPT_EXCHANGE_DATA_ONLY) {
+		error = hfs_movedata(from_vp, to_vp);
+		goto exit;
+	}
+	
+	
 	if ((error = hfs_start_transaction(hfsmp)) != 0) {
 	    goto exit;
 	}
@@ -1729,11 +1853,338 @@ exit:
 	return (error);
 }
 
+int
+hfs_vnop_mmap(struct vnop_mmap_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	int error;
+	
+	if (VNODE_IS_RSRC(vp)) {
+		/* allow pageins of the resource fork */
+	} else {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
+		time_t orig_ctime = VTOC(vp)->c_ctime;
+		
+		if (!compressed && (VTOC(vp)->c_flags & UF_COMPRESSED)) {
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error != 0) {
+				return error;
+			}
+		}
+
+		if (ap->a_fflags & PROT_WRITE) {
+			check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL);
+		}
+	}
+	
+	//
+	// NOTE: we return ENOTSUP because we want the cluster layer
+	//       to actually do all the real work.
+	//
+	return (ENOTSUP);
+}
+
+/*
+ * hfs_movedata
+ *
+ * This is a non-symmetric variant of exchangedata.  In this function,
+ * the contents of the fork in from_vp are moved to the fork
+ * specified by to_vp.  
+ * 
+ * The cnodes pointed to by 'from_vp' and 'to_vp' must be locked. 
+ *
+ * The vnode pointed to by 'to_vp' *must* be empty prior to invoking this function.
+ * We impose this restriction because we may not be able to fully delete the entire 
+ * file's contents in a single transaction, particularly if it has a lot of extents.
+ * In the normal file deletion codepath, the file is screened for two conditions:
+ * 1) bigger than 400MB, and 2) more than 8 extents.  If so, the file is relocated to 
+ * the hidden directory and the deletion is broken up into multiple truncates.  We can't
+ * do that here because both files need to exist in the namespace. The main reason this
+ * is imposed is that we may have to touch a whole lot of bitmap blocks if there are 
+ * many extents.
+ * 
+ * Any data written to 'from_vp' after this call completes is not guaranteed
+ * to be moved. 
+ * 
+ * Arguments:
+ * vnode from_vp: source file
+ * vnode to_vp: destination file; must be empty
+ * 
+ * Returns:
+ *	EFBIG - Destination file was not empty
+ *	0	- success
+ * 
+ * 
+ */
+int hfs_movedata (struct vnode *from_vp, struct vnode *to_vp) {
+
+	struct cnode *from_cp;
+	struct cnode *to_cp;
+	struct hfsmount *hfsmp = NULL;
+	int error = 0;
+	int started_tr = 0;
+	int lockflags = 0;
+	int overflow_blocks;
+	int rsrc = 0;
+
+
+	/* Get the HFS pointers */
+	from_cp = VTOC(from_vp);
+	to_cp = VTOC(to_vp);
+	hfsmp = VTOHFS(from_vp);
+
+	/* Verify that neither source/dest file is open-unlinked */
+	if (from_cp->c_flag & (C_DELETED | C_NOEXISTS)) {
+		error = EBUSY;
+		goto movedata_exit;
+	}
+
+	if (to_cp->c_flag & (C_DELETED | C_NOEXISTS)) {
+		error = EBUSY;
+		goto movedata_exit;
+	}
+
+	/* 
+	 * Verify the source file is not in use by anyone besides us.
+	 *
+	 * This function is typically invoked by a namespace handler 
+	 * process responding to a temporarily stalled system call.  
+	 * The FD that it is working off of is opened O_EVTONLY, so
+	 * it really has no active usecounts (the kusecount from O_EVTONLY
+	 * is subtracted from the total usecounts).
+	 * 
+	 * As a result, we shouldn't have any active usecounts against
+	 * this vnode when we go to check it below.
+	 */
+	if (vnode_isinuse(from_vp, 0)) {
+		error = EBUSY;
+		goto movedata_exit;
+	}
+
+	if (from_cp->c_rsrc_vp == from_vp) {
+		rsrc = 1;
+	}
+
+	/* 
+	 * We assume that the destination file is already empty. 
+	 * Verify that it is.
+	 */
+	if (rsrc) {
+		if (to_cp->c_rsrcfork->ff_size > 0) {
+			error = EFBIG;
+			goto movedata_exit;
+		}
+	}	
+	else {
+		if (to_cp->c_datafork->ff_size > 0) {
+			error = EFBIG;
+			goto movedata_exit;
+		}
+	}
+
+	/* If the source has the rsrc open, make sure the destination is also the rsrc */
+	if (rsrc) {
+		if (to_vp != to_cp->c_rsrc_vp) {
+			error = EINVAL;
+			goto movedata_exit;
+		}
+	}
+	else {
+		/* Verify that both forks are data forks */
+		if (to_vp != to_cp->c_vp) {
+			error = EINVAL;
+			goto movedata_exit;
+	   	}	   
+	}
+
+	/* 
+	 * See if the source file has overflow extents.  If it doesn't, we don't
+	 * need to call into MoveData, and the catalog will be enough.
+	 */
+	if (rsrc) {
+		overflow_blocks = overflow_extents(from_cp->c_rsrcfork);
+	}
+	else {
+		overflow_blocks = overflow_extents(from_cp->c_datafork);
+	}	
+
+	if ((error = hfs_start_transaction (hfsmp)) != 0) {
+		goto movedata_exit;
+	}
+	started_tr = 1;
+
+	/* Lock the system files: catalog, extents, attributes */
+	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
+
+	/* Copy over any catalog allocation data into the new spot. */
+	if (rsrc) {
+		if ((error = hfs_move_fork (from_cp->c_rsrcfork, from_cp, to_cp->c_rsrcfork, to_cp))){
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			goto movedata_exit;
+		}
+	}
+	else {
+		if ((error = hfs_move_fork (from_cp->c_datafork, from_cp, to_cp->c_datafork, to_cp))) {
+			hfs_systemfile_unlock(hfsmp, lockflags);
+			goto movedata_exit;
+		}
+	}
+
+	/* 
+	 * Note that because all we're doing is moving the extents around, we can 
+	 * probably do this in a single transaction:  Each extent record (group of 8) 
+	 * is 64 bytes.  A extent overflow B-Tree node is typically 4k.  This means 
+	 * each node can hold roughly ~60 extent records == (480 extents).
+	 *
+	 * If a file was massively fragmented and had 20k extents, this means we'd 
+	 * roughly touch 20k/480 == 41 to 42 nodes, plus the index nodes, for half 
+	 * of the operation.  (inserting or deleting). So if we're manipulating 80-100 
+	 * nodes, this is basically 320k of data to write to the journal in
+	 * a bad case.  
+	 */
+	if (overflow_blocks != 0) {
+		if (rsrc) {
+			error = MoveData(hfsmp, from_cp->c_cnid, to_cp->c_cnid, 1);
+		}
+		else {
+			error = MoveData (hfsmp, from_cp->c_cnid, to_cp->c_cnid, 0);
+		}
+	}
+
+	if (error) {
+		/* Reverse the operation. Copy the fork data back into the source */
+		if (rsrc) {
+			hfs_move_fork (to_cp->c_rsrcfork, to_cp, from_cp->c_rsrcfork, from_cp);
+		}
+		else {
+			hfs_move_fork (to_cp->c_datafork, to_cp, from_cp->c_datafork, from_cp);
+		}
+	}
+	else {
+		struct cat_fork *src_data = NULL;
+		struct cat_fork *src_rsrc = NULL;
+		struct cat_fork *dst_data = NULL;
+		struct cat_fork *dst_rsrc = NULL;
+
+		/* Touch the times*/
+		to_cp->c_touch_acctime = TRUE;
+		to_cp->c_touch_chgtime = TRUE;
+		to_cp->c_touch_modtime = TRUE;
+
+		from_cp->c_touch_acctime = TRUE;
+		from_cp->c_touch_chgtime = TRUE;
+		from_cp->c_touch_modtime = TRUE;
+
+		hfs_touchtimes(hfsmp, to_cp);
+		hfs_touchtimes(hfsmp, from_cp);
+
+		if (from_cp->c_datafork) {
+			src_data = &from_cp->c_datafork->ff_data;
+		}
+		if (from_cp->c_rsrcfork) {
+			src_rsrc = &from_cp->c_rsrcfork->ff_data;
+		}
+
+		if (to_cp->c_datafork) {
+			dst_data = &to_cp->c_datafork->ff_data;
+		}
+		if (to_cp->c_rsrcfork) {
+			dst_rsrc = &to_cp->c_rsrcfork->ff_data;
+		}
+
+		/* Update the catalog nodes */
+		(void) cat_update(hfsmp, &from_cp->c_desc, &from_cp->c_attr, 
+				src_data, src_rsrc);
+
+		(void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, 
+				dst_data, dst_rsrc);
+
+	}
+	/* unlock the system files */
+	hfs_systemfile_unlock(hfsmp, lockflags);
+
+
+movedata_exit:
+	if (started_tr) {
+		hfs_end_transaction(hfsmp);
+	}
+
+	return error;
+
+}		
+
+/* 
+ * Copy all of the catalog and runtime data in srcfork to dstfork.
+ * 
+ * This allows us to maintain the invalid ranges across the movedata operation so 
+ * we don't need to force all of the pending IO right now. In addition, we move all
+ * non overflow-extent extents into the destination here.
+ */
+static int hfs_move_fork (struct filefork *srcfork, struct cnode *src_cp,
+		struct filefork *dstfork, struct cnode *dst_cp) {
+	struct rl_entry *invalid_range;
+	int size = sizeof(struct HFSPlusExtentDescriptor);
+	size = size * kHFSPlusExtentDensity;
+
+	/* If the dstfork has any invalid ranges, bail out */
+	invalid_range = TAILQ_FIRST(&dstfork->ff_invalidranges);
+	if (invalid_range != NULL) {
+		return EFBIG;
+	}
+
+	if (dstfork->ff_data.cf_size != 0 || dstfork->ff_data.cf_new_size != 0) {
+		return EFBIG;
+	}
+
+	/* First copy the invalid ranges */
+	while ((invalid_range = TAILQ_FIRST(&srcfork->ff_invalidranges))) {
+		off_t start = invalid_range->rl_start;
+		off_t end = invalid_range->rl_end;
+
+		/* Remove it from the srcfork and add it to dstfork */
+		rl_remove(start, end, &srcfork->ff_invalidranges);
+		rl_add(start, end, &dstfork->ff_invalidranges);
+	}
+
+	/* 
+	 * Ignore the ff_union.  We don't move symlinks or system files.  
+	 * Now copy the in-catalog extent information
+	 */
+	dstfork->ff_data.cf_size = srcfork->ff_data.cf_size;
+	dstfork->ff_data.cf_new_size = srcfork->ff_data.cf_new_size;
+	dstfork->ff_data.cf_vblocks = srcfork->ff_data.cf_vblocks;
+	dstfork->ff_data.cf_blocks = srcfork->ff_data.cf_blocks;
+
+	/* just memcpy the whole array of extents to the new location. */
+	memcpy (dstfork->ff_data.cf_extents, srcfork->ff_data.cf_extents, size);
+
+	/* 
+	 * Copy the cnode attribute data.
+	 *
+	 */
+	src_cp->c_blocks -= srcfork->ff_data.cf_vblocks;
+	src_cp->c_blocks -= srcfork->ff_data.cf_blocks;
+
+	dst_cp->c_blocks += srcfork->ff_data.cf_vblocks;
+	dst_cp->c_blocks += srcfork->ff_data.cf_blocks;
+
+	/* Now delete the entries in the source fork */
+	srcfork->ff_data.cf_size = 0;
+	srcfork->ff_data.cf_new_size = 0;
+	srcfork->ff_data.cf_union.cfu_bytesread = 0;
+	srcfork->ff_data.cf_vblocks = 0;
+	srcfork->ff_data.cf_blocks = 0;
+	
+	/* Zero out the old extents */
+	bzero (srcfork->ff_data.cf_extents, size);
+	return 0;
+}
+
+
 
 /*
  *  cnode must be locked
  */
-__private_extern__
 int
 hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
 {
@@ -1747,7 +2198,6 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
 	int wait;		/* all other attributes (e.g. atime, etc.) */
 	int lockflag;
 	int took_trunc_lock = 0;
-	boolean_t trunc_lock_exclusive = FALSE;
 
 	/*
 	 * Applications which only care about data integrity rather than full
@@ -1777,14 +2227,13 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
 	    }
 	} else if (UBCINFOEXISTS(vp)) {
 		hfs_unlock(cp);
-		hfs_lock_truncate(cp, trunc_lock_exclusive);
+		hfs_lock_truncate(cp, HFS_SHARED_LOCK);
 		took_trunc_lock = 1;
 
 		if (fp->ff_unallocblocks != 0) {
-			hfs_unlock_truncate(cp, trunc_lock_exclusive);
+			hfs_unlock_truncate(cp, 0);
 
-			trunc_lock_exclusive = TRUE;
-			hfs_lock_truncate(cp, trunc_lock_exclusive);
+			hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 		}
 		/* Don't hold cnode lock when calling into cluster layer. */
 		(void) cluster_push(vp, waitdata ? IO_SYNC : 0);
@@ -1811,13 +2260,12 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
 			goto datasync;
 		}
 		if (!TAILQ_EMPTY(&fp->ff_invalidranges)) {
-			if (!took_trunc_lock || trunc_lock_exclusive == FALSE) {
+			if (!took_trunc_lock || (cp->c_truncatelockowner == HFS_SHARED_OWNER)) {
 				hfs_unlock(cp);
-				if (took_trunc_lock)
-					hfs_unlock_truncate(cp, trunc_lock_exclusive);
-
-				trunc_lock_exclusive = TRUE;
-				hfs_lock_truncate(cp, trunc_lock_exclusive);
+				if (took_trunc_lock) {
+					hfs_unlock_truncate(cp, 0);
+				}
+				hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 				hfs_lock(cp, HFS_FORCE_LOCK);
 				took_trunc_lock = 1;
 			}
@@ -1848,7 +2296,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p)
 	}
 datasync:
 	if (took_trunc_lock) {
-		hfs_unlock_truncate(cp, trunc_lock_exclusive);
+		hfs_unlock_truncate(cp, 0);
 		took_trunc_lock = 0;
 	}
 	/*
@@ -1899,13 +2347,23 @@ metasync:
 		 * changes get to stable storage.
 		 */
 		if (fullsync) {
-		    if (hfsmp->jnl) {
-			hfs_journal_flush(hfsmp);
-		    } else {
-			retval = hfs_metasync_all(hfsmp);
-		    	/* XXX need to pass context! */
-			VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
-		    }
+			if (hfsmp->jnl) {
+				hfs_journal_flush(hfsmp, FALSE);
+			
+				if (journal_uses_fua(hfsmp->jnl)) {
+					/*
+					 * the journal_flush did NOT issue a sync track cache command,
+					 * and the fullsync indicates we are supposed to flush all cached
+					 * data to the media, so issue the sync track cache command
+					 * explicitly
+					 */
+					VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+				}
+			} else {
+				retval = hfs_metasync_all(hfsmp);
+				/* XXX need to pass context! */
+				VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
+			}
 		}
 	}
 
@@ -1914,7 +2372,7 @@ metasync:
 
 
 /* Sync an hfs catalog b-tree node */
-static int
+int
 hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p)
 {
 	vnode_t	vp;
@@ -1960,7 +2418,7 @@ hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, __unused struct proc *p)
  * we rely on fsck_hfs to fix that up (which it can do without any loss
  * of data).
  */
-static int
+int
 hfs_metasync_all(struct hfsmount *hfsmp)
 {
 	int lockflags;
@@ -2002,7 +2460,6 @@ hfs_btsync_callback(struct buf *bp, __unused void *dummy)
 }
 
 
-__private_extern__
 int
 hfs_btsync(struct vnode *vp, int sync_transaction)
 {
@@ -2030,7 +2487,7 @@ hfs_btsync(struct vnode *vp, int sync_transaction)
 /*
  * Remove a directory.
  */
-static int
+int
 hfs_vnop_rmdir(ap)
 	struct vnop_rmdir_args /* {
 		struct vnode *a_dvp;
@@ -2044,6 +2501,9 @@ hfs_vnop_rmdir(ap)
 	struct cnode *dcp = VTOC(dvp);
 	struct cnode *cp = VTOC(vp);
 	int error;
+	time_t orig_ctime;
+
+	orig_ctime = VTOC(vp)->c_ctime;
 
 	if (!S_ISDIR(cp->c_mode)) {
 		return (ENOTDIR);
@@ -2051,6 +2511,10 @@ hfs_vnop_rmdir(ap)
 	if (dvp == vp) {
 		return (EINVAL);
 	}
+
+	check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+	cp = VTOC(vp);
+
 	if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
 		return (error);
 	}
@@ -2060,7 +2524,7 @@ hfs_vnop_rmdir(ap)
 		hfs_unlockpair (dcp, cp);
 		return ENOENT;
 	}
-	error = hfs_removedir(dvp, vp, ap->a_cnp, 0);
+	error = hfs_removedir(dvp, vp, ap->a_cnp, 0, 0);
 
 	hfs_unlockpair(dcp, cp);
 
@@ -2072,9 +2536,9 @@ hfs_vnop_rmdir(ap)
  *
  * Both dvp and vp cnodes are locked
  */
-static int
+int
 hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
-              int skip_reserve)
+              int skip_reserve, int only_unlink)
 {
 	struct cnode *cp;
 	struct cnode *dcp;
@@ -2096,24 +2560,77 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	if (cp->c_entries != 0) {
 		return (ENOTEMPTY);
 	}
+	
+	/*
+	 * If the directory is open or in use (e.g. opendir() or current working
+	 * directory for some process); wait for inactive/reclaim to actually
+	 * remove cnode from the catalog.  Both inactive and reclaim codepaths are capable
+	 * of removing open-unlinked directories from the catalog, as well as getting rid
+	 * of EAs still on the element.  So change only_unlink to true, so that it will get 
+	 * cleaned up below.
+	 *
+	 * Otherwise, we can get into a weird old mess where the directory has C_DELETED,
+	 * but it really means C_NOEXISTS because the item was actually removed from the 
+	 * catalog.  Then when we try to remove the entry from the catalog later on, it won't
+	 * really be there anymore.  
+	 */
+	if (vnode_isinuse(vp, 0))  {
+		only_unlink = 1;
+	}
 
-	/* Check if we're removing the last link to an empty directory. */
+	/* Deal with directory hardlinks */
 	if (cp->c_flag & C_HARDLINK) {
-		/* We could also return EBUSY here */
+		/* 
+		 * Note that if we have a directory which was a hardlink at any point,
+		 * its actual directory data is stored in the directory inode in the hidden
+		 * directory rather than the leaf element(s) present in the namespace.
+		 * 
+		 * If there are still other hardlinks to this directory, 
+		 * then we'll just eliminate this particular link and the vnode will still exist.
+		 * If this is the last link to an empty directory, then we'll open-unlink the 
+		 * directory and it will be only tagged with C_DELETED (as opposed to C_NOEXISTS).
+		 * 
+		 * We could also return EBUSY here. 
+		 */
+		
 		return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve);
 	}
 	
 	/*
-	 * We want to make sure that if the directory has a lot of attributes, we process them
-	 * in separate transactions to ensure we don't panic in the journal with a gigantic
-	 * transaction. This means we'll let hfs_removefile deal with the directory, which generally
-	 * follows the same codepath as open-unlinked files.  Note that the last argument to 
-	 * hfs_removefile specifies that it is supposed to handle directories for this case.
-	 */
-	if ((hfsmp->hfs_attribute_vp != NULL) &&
-	    (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
-
-	    return hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL);
+	 * In a few cases, we may want to allow the directory to persist in an
+	 * open-unlinked state.  If the directory is being open-unlinked (still has usecount
+	 * references), or if it has EAs, or if it was being deleted as part of a rename, 
+	 * then we go ahead and move it to the hidden directory. 
+	 *
+	 * If the directory is being open-unlinked, then we want to keep the catalog entry 
+	 * alive so that future EA calls and fchmod/fstat etc. do not cause issues later.
+	 * 
+	 * If the directory had EAs, then we want to use the open-unlink trick so that the 
+	 * EA removal is not done in one giant transaction.  Otherwise, it could cause a panic
+	 * due to overflowing the journal.
+	 * 
+	 * Finally, if it was deleted as part of a rename, we move it to the hidden directory
+	 * in order to maintain rename atomicity.  
+	 * 
+	 * Note that the allow_dirs argument to hfs_removefile specifies that it is
+	 * supposed to handle directories for this case.
+     */
+		
+	if (((hfsmp->hfs_attribute_vp != NULL) &&
+	    ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0)) ||
+		(only_unlink != 0)) {
+		
+		int ret = hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL, only_unlink);
+		/* 
+		 * Even though hfs_vnop_rename calls vnode_recycle for us on tvp we call 
+		 * it here just in case we were invoked by rmdir() on a directory that had 
+		 * EAs.  To ensure that we start reclaiming the space as soon as possible,
+		 * we call vnode_recycle on the directory.
+		 */
+		vnode_recycle(vp);
+		
+		return ret;
+		
 	}
 
 	dcp->c_flag |= C_DIR_MODIFICATION;
@@ -2155,7 +2672,7 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	desc.cd_encoding = cp->c_encoding;
 	desc.cd_hint = 0;
 
-	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) {
+	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) {
 	    error = 0;
 	    goto out;
 	}
@@ -2199,16 +2716,8 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 
 	hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID));
 
-	/*
-	 * directory open or in use (e.g. opendir() or current working
-	 * directory for some process); wait for inactive to actually
-	 * remove catalog entry
-	 */
-	if (vnode_isinuse(vp, 0)) {
-		cp->c_flag |= C_DELETED;
-	} else {
-		cp->c_flag |= C_NOEXISTS;
-	}
+	/* Mark C_NOEXISTS since the catalog entry is now gone */
+	cp->c_flag |= C_NOEXISTS;
 out:
 	dcp->c_flag &= ~C_DIR_MODIFICATION;
 	wakeup((caddr_t)&dcp->c_flag);
@@ -2224,7 +2733,7 @@ out:
 /*
  * Remove a file or link.
  */
-static int
+int
 hfs_vnop_remove(ap)
 	struct vnop_remove_args /* {
 		struct vnode *a_dvp;
@@ -2237,17 +2746,29 @@ hfs_vnop_remove(ap)
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct cnode *dcp = VTOC(dvp);
-	struct cnode *cp = VTOC(vp);
+	struct cnode *cp;
 	struct vnode *rvp = NULL;
 	struct hfsmount *hfsmp = VTOHFS(vp);	
 	int error=0, recycle_rsrc=0;
 	int drop_rsrc_vnode = 0;
-	int vref;
+	time_t orig_ctime;
 
 	if (dvp == vp) {
 		return (EINVAL);
 	}
 
+	orig_ctime = VTOC(vp)->c_ctime;
+	if (!vnode_isnamedstream(vp)) {
+		error = check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+		if (error) {
+			// XXXdbg - decide on a policy for handling namespace handler failures!
+			// for now we just let them proceed.
+		}		
+	}
+	error = 0;
+
+	cp = VTOC(vp);
+
 	/* 
  	 * We need to grab the cnode lock on 'cp' before the lockpair() 
 	 * to get an iocount on the rsrc fork BEFORE we enter hfs_removefile.
@@ -2269,23 +2790,25 @@ hfs_vnop_remove(ap)
 		if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) {
 			return (error);
 		}
+
 		error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, TRUE);
 		hfs_unlock(cp);
 		if (error) {
-			/* We may have gotten a rsrc vp out even though we got an error back. */
+			/* we may have gotten an rsrc vp even though we got an error */
 			if (rvp) {
 				vnode_put(rvp);
 				rvp = NULL;
 			}
-			return error;
+			return (error);	
 		}
 		drop_rsrc_vnode = 1;
 	}
 	/* Now that we may have an iocount on rvp, do the lock pair */
-	hfs_lock_truncate(cp, TRUE);
+
+	hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 
 	if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) {
-		hfs_unlock_truncate(cp, TRUE);
+		hfs_unlock_truncate(cp, 0);
 		/* drop the iocount on rvp if necessary */
 		if (drop_rsrc_vnode) {
 			vnode_put (rvp);
@@ -2302,20 +2825,27 @@ hfs_vnop_remove(ap)
 		goto rm_done;	
 	}
 
-	error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, rvp);
-
-	//
-	// If the remove succeeded and it's an open-unlinked file that has
-	// a resource fork vnode that's not in use, we will want to recycle
-	// the rvp *after* we're done unlocking everything.  Otherwise the
-	// resource vnode will keep a v_parent reference on this vnode which
-	// prevents it from going through inactive/reclaim which means that
-	// the disk space associated with this file won't get free'd until
-	// something forces the resource vnode to get recycled (and that can
-	// take a very long time).
-	//
-	if (error == 0 && (cp->c_flag & C_DELETED) && 
-			(rvp) && !vnode_isinuse(rvp, 0)) {
+	error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, rvp, 0);
+	
+	/*
+	 * If the remove succeeded in deleting the file, then we may need to mark
+	 * the resource fork for recycle so that it is reclaimed as quickly
+	 * as possible.  If it were not recycled quickly, then this resource fork
+	 * vnode could keep a v_parent reference on the data fork, which prevents it
+	 * from going through reclaim (by giving it extra usecounts), except in the force-
+	 * unmount case.  
+	 * 
+	 * However, a caveat:  we need to continue to supply resource fork
+	 * access to open-unlinked files even if the resource fork is not open.  This is
+	 * a requirement for the compressed files work.  Luckily, hfs_vgetrsrc will handle
+	 * this already if the data fork has been re-parented to the hidden directory.
+	 * 
+	 * As a result, all we really need to do here is mark the resource fork vnode
+	 * for recycle.  If it goes out of core, it can be brought in again if needed.  
+	 * If the cnode was instead marked C_NOEXISTS, then there wouldn't be any 
+	 * more work.
+	 */
+	if ((error == 0) && (rvp)) {
 	    recycle_rsrc = 1;
 	}
 
@@ -2326,15 +2856,11 @@ hfs_vnop_remove(ap)
 	 * truncate lock)
 	 */
 rm_done:
-	hfs_unlock_truncate(cp, TRUE);
+	hfs_unlock_truncate(cp, 0);
 	hfs_unlockpair(dcp, cp);
 
 	if (recycle_rsrc) {
-		vref = vnode_ref(rvp);
-		if (vref == 0) {
-			/* vnode_ref could return an error, only release if we got a ref */
-			vnode_rele(rvp);
-		}
+		/* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */
 		vnode_recycle(rvp);
 	} 
 	
@@ -2376,24 +2902,24 @@ hfs_removefile_callback(struct buf *bp, void *hfsmp) {
  *
  * Requires cnode and truncate locks to be held.
  */
-static int
+int
 hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
-               int flags, int skip_reserve, int allow_dirs, struct vnode *rvp)
+               int flags, int skip_reserve, int allow_dirs, 
+			   struct vnode *rvp, int only_unlink)
 {
 	struct cnode *cp;
 	struct cnode *dcp;
 	struct hfsmount *hfsmp;
 	struct cat_desc desc;
 	struct timeval tv;
-	vfs_context_t ctx = cnp->cn_context;
 	int dataforkbusy = 0;
 	int rsrcforkbusy = 0;
-	int truncated = 0;
 	int lockflags;
 	int error = 0;
 	int started_tr = 0;
 	int isbigfile = 0, defer_remove=0, isdir=0;
-
+	int update_vh = 0;
+	
 	cp = VTOC(vp);
 	dcp = VTOC(dvp);
 	hfsmp = VTOHFS(vp);
@@ -2403,7 +2929,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 		return (0);
 	}
 
-	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) {
+	if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid, NULL, &error)) {
 	    return 0;
 	}
 
@@ -2485,6 +3011,11 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	    (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) {
 	    defer_remove = 1;
 	}
+	
+	/* If we are explicitly told to only unlink item and move to hidden dir, then do it */
+	if (only_unlink) {
+		defer_remove = 1;
+	}
 
 	/*
 	 * Carbon semantics prohibit deleting busy files.
@@ -2502,9 +3033,16 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	if (hfsmp->hfs_flags & HFS_QUOTAS)
 		(void)hfs_getinoquota(cp);
 #endif /* QUOTA */
-
-	/* Check if we need a ubc_setsize. */
-	if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy)) {
+	
+	/* 
+	 * Do a ubc_setsize to indicate we need to wipe contents if:
+	 *  1) item is a regular file.
+	 *  2) Neither fork is busy AND we are not told to unlink this. 
+	 *
+	 * We need to check for the defer_remove since it can be set without 
+	 * having a busy data or rsrc fork   
+	 */
+	if (isdir == 0 && (!dataforkbusy || !rsrcforkbusy) && (defer_remove == 0)) {
 		/*
 		 * A ubc_setsize can cause a pagein so defer it
 		 * until after the cnode lock is dropped.  The
@@ -2525,40 +3063,46 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 	started_tr = 1;
 
 	// XXXdbg - if we're journaled, kill any dirty symlink buffers 
-	if (hfsmp->jnl && vnode_islnk(vp))
+	if (hfsmp->jnl && vnode_islnk(vp) && (defer_remove == 0)) {
 	        buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
+	}
 
 	/*
-	 * Truncate any non-busy forks.  Busy forks will
+	 * Prepare to truncate any non-busy forks.  Busy forks will
 	 * get truncated when their vnode goes inactive.
 	 * Note that we will only enter this region if we
 	 * can avoid creating an open-unlinked file.  If 
 	 * either region is busy, we will have to create an open
 	 * unlinked file.
-	 * Since we're already inside a transaction,
-	 * tell hfs_truncate to skip the ubc_setsize.
+	 *
+	 * Since we are deleting the file, we need to stagger the runtime
+	 * modifications to do things in such a way that a crash won't 
+	 * result in us getting overlapped extents or any other 
+	 * bad inconsistencies.  As such, we call prepare_release_storage
+	 * which updates the UBC, updates quota information, and releases
+	 * any loaned blocks that belong to this file.  No actual 
+	 * truncation or bitmap manipulation is done until *AFTER*
+	 * the catalog record is removed. 
 	 */
-	if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy)) {
-		/* 
-		 * Note that 5th argument to hfs_truncate indicates whether or not 
-		 * hfs_update calls should be suppressed in call to do_hfs_truncate
-		 */
+	if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy) && (only_unlink == 0)) {
+		
 		if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) {
-			/* skip update in hfs_truncate */
-			error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 1, ctx);
-			if (error)
+			
+			error = hfs_prepare_release_storage (hfsmp, vp);
+			if (error) {
 				goto out;
-			truncated = 1;
+			}
+			update_vh = 1;
 		}
 		if (!rsrcforkbusy && rvp) {
-			/* skip update in hfs_truncate */
-			error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 1, ctx);
-			if (error)
+			error = hfs_prepare_release_storage (hfsmp, rvp);
+			if (error) {
 				goto out;
-			truncated = 1;
+			}
+			update_vh = 1;
 		}
 	}
-
+	
 	/* 
 	 * Protect against a race with rename by using the component
 	 * name passed in and parent id from dvp (instead of using 
@@ -2658,15 +3202,15 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 		if (error)
 			goto out;
 
-	} else /* Not busy */ {
-
-		if (cp->c_blocks > 0) {
-			printf("hfs_remove: attempting to delete a non-empty file %s\n",
-				cp->c_desc.cd_nameptr);
-			error = EBUSY;
-			goto out;
-		}
-
+	} 
+	else /* Not busy */ {
+		
+#if QUOTA
+		off_t savedbytes;
+		int blksize = hfsmp->blockSize;
+#endif
+		u_int32_t fileid = cp->c_fileid;
+		
 		lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 		if (!skip_reserve) {
 			if ((error = cat_preflight(hfsmp, CAT_DELETE, NULL, 0))) {
@@ -2674,30 +3218,14 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 				goto out;
 			}
 		}
-
+		
 		error = cat_delete(hfsmp, &desc, &cp->c_attr);
-
-		if (error && error != ENXIO && error != ENOENT && truncated) {
-			if ((cp->c_datafork && cp->c_datafork->ff_size != 0) ||
-					(cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) {
-				off_t data_size = 0;
-				off_t rsrc_size = 0;
-				if (cp->c_datafork) {
-					data_size = cp->c_datafork->ff_size;
-				}
-				if (cp->c_rsrcfork) {
-					rsrc_size = cp->c_rsrcfork->ff_size;
-				}
-				printf("hfs: remove: couldn't delete a truncated file (%s)" 
-						"(error %d, data sz %lld; rsrc sz %lld)",
-					cp->c_desc.cd_nameptr, error, data_size, rsrc_size);
-				hfs_mark_volume_inconsistent(hfsmp);
-			} else {
-				printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n",
-						cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error);
-			}	
+		
+		if (error && error != ENXIO && error != ENOENT) {
+			printf("hfs_removefile: deleting file %s (%d), err: %d\n",
+				   cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error);
 		}
-
+		
 		if (error == 0) {
 			/* Update the parent directory */
 			if (dcp->c_entries > 0)
@@ -2708,26 +3236,65 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
 			(void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL);
 		}
 		hfs_systemfile_unlock(hfsmp, lockflags);
-		if (error)
+		if (error) {
 			goto out;
-
+		}
+		
+		/* 
+		 * Now that we've wiped out the catalog record, the file effectively doesn't
+		 * exist anymore. So update the quota records to reflect the loss of the 
+		 * data fork and the resource fork. 
+		 */
 #if QUOTA
-		if (hfsmp->hfs_flags & HFS_QUOTAS)
+		if (cp->c_datafork->ff_blocks > 0) {
+			savedbytes = ((off_t)cp->c_datafork->ff_blocks * (off_t)blksize);
+			(void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+		}
+		
+		if (cp->c_rsrcfork && (cp->c_rsrcfork->ff_blocks > 0)) {
+			savedbytes = ((off_t)cp->c_rsrcfork->ff_blocks * (off_t)blksize);
+			(void) hfs_chkdq(cp, (int64_t)-(savedbytes), NOCRED, 0);
+		}
+		
+		if (hfsmp->hfs_flags & HFS_QUOTAS) {
 			(void)hfs_chkiq(cp, -1, NOCRED, 0);
-#endif /* QUOTA */
-
+		}
+#endif
+		
+		
+		/* 
+		 * If we didn't get any errors deleting the catalog entry, then go ahead
+		 * and release the backing store now.  The filefork pointers are still valid.
+		 */		
+		error = hfs_release_storage (hfsmp, cp->c_datafork, cp->c_rsrcfork, fileid);
+		
+		if (error) {
+			/* 
+			 * If we encountered an error updating the extents and bitmap,
+			 * mark the volume inconsistent.  At this point, the catalog record has
+			 * already been deleted, so we can't recover it at this point. We need
+			 * to proceed and update the volume header and mark the cnode C_NOEXISTS.
+			 * The subsequent fsck should be able to recover the free space for us.
+			 */
+			hfs_mark_volume_inconsistent(hfsmp);
+		}
+		else {
+			/* reset update_vh to 0, since hfs_release_storage should have done it for us */
+			update_vh = 0;
+		}
+		
 		cp->c_flag |= C_NOEXISTS;
 		cp->c_flag &= ~C_DELETED;
-		truncated = 0;  // because the catalog entry is gone
-
+		
 		cp->c_touch_chgtime = TRUE;   /* XXX needed ? */
 		--cp->c_linkcount;
-
+		
 		/* 
 		 * We must never get a directory if we're in this else block.  We could 
 		 * accidentally drop the number of files in the volume header if we did.
 		 */
 		hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID));
+		
 	}
 
 	/*
@@ -2744,14 +3311,14 @@ out:
 	if (error) {
 	    cp->c_flag &= ~C_DELETED;
 	}
-
-	/* Commit the truncation to the catalog record */
-	if (truncated) {
-	    cp->c_flag |= C_FORCEUPDATE;
-	    cp->c_touch_chgtime = TRUE;
-	    cp->c_touch_modtime = TRUE;
-	    (void) hfs_update(vp, 0);
-	}
+	
+	if (update_vh) {
+		/* 
+		 * If we bailed out earlier, we may need to update the volume header
+		 * to deal with the borrowed blocks accounting. 
+		 */
+		hfs_volupdate (hfsmp, VOL_UPDATE, 0);
+	}	
 
 	if (started_tr) {
 	    hfs_end_transaction(hfsmp);
@@ -2789,7 +3356,6 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
 	cdp->cd_flags &= ~CD_HASBUF;
 }
 
-
 /*
  * Rename a cnode.
  *
@@ -2813,7 +3379,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp)
  * been locked.  By taking the rsrc fork vnodes up front we ensure that they 
  * cannot be recycled, and that the situation mentioned above cannot happen.
  */
-static int
+int
 hfs_vnop_rename(ap)
 	struct vnop_rename_args  /* {
 		struct vnode *a_fdvp;
@@ -2849,9 +3415,21 @@ hfs_vnop_rename(ap)
 	int took_trunc_lock = 0;
 	int lockflags;
 	int error;
-	int recycle_rsrc = 0;
+	time_t orig_from_ctime, orig_to_ctime;
+
+	orig_from_ctime = VTOC(fvp)->c_ctime;
+	if (tvp && VTOC(tvp)) {
+		orig_to_ctime = VTOC(tvp)->c_ctime;
+	} else {
+		orig_to_ctime = ~0;
+	}
 
+	check_for_tracked_file(fvp, orig_from_ctime, NAMESPACE_HANDLER_RENAME_OP, NULL);
 
+	if (tvp && VTOC(tvp)) {
+		check_for_tracked_file(tvp, orig_to_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL);
+	}
+	
 	/* 
 	 * Before grabbing the four locks, we may need to get an iocount on the resource fork
 	 * vnodes in question, just like hfs_vnop_remove.  If fvp and tvp are not
@@ -2867,16 +3445,15 @@ hfs_vnop_rename(ap)
 		if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) {
 			return (error);
 		}
-		
 		/*
-		 * We care if we race against rename/delete with this cnode, so we'll
-		 * error out if this file becomes open-unlinked during this call.
+		 * We care if we race against rename/delete with this cp, so we'll error out
+		 * if the file becomes open-unlinked during this call.
 		 */
 		error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE, TRUE);
 		hfs_unlock (VTOC(fvp));
 		if (error) {
 			if (fvp_rsrc) {
-				vnode_put (fvp_rsrc);
+				vnode_put(fvp_rsrc);
 			}
 			return error;
 		}
@@ -2890,7 +3467,6 @@ hfs_vnop_rename(ap)
 		 */
 		if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) {
 			tcp = VTOC(tvp);
-			
 			/* 
 			 * We only care if we get an open-unlinked file on the dst so we 
 			 * know to null out tvp/tcp to make the rename operation act 
@@ -2898,18 +3474,19 @@ hfs_vnop_rename(ap)
 			 * namespace already it's fine to do this.  If this is true, then
 			 * make sure to unlock the cnode and drop the iocount only after the unlock.
 			 */
+
 			error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE, TRUE);
 			hfs_unlock (tcp);
 			if (error) {
 				/*
-				 * Since we specify TRUE for error-on-unlinked in hfs_vgetrsrc,
-				 * we can get a rsrc fork vp even if it returns an error.
+				 * Since we specify TRUE for error_on_unlinked in hfs_vgetrsrc,
+				 * we can get a rsrc fork vnode even if it returns an error.
 				 */
 				tcp = NULL;
 				tvp = NULL;
 				if (tvp_rsrc) {
 					vnode_put (tvp_rsrc);
-					tvp_rsrc = NULLVP;
+					tvp_rsrc = NULL;
 				}
 				/* just bypass truncate lock and act as if we never got tcp/tvp */
 				goto retry;
@@ -2919,7 +3496,7 @@ hfs_vnop_rename(ap)
 
 	/* When tvp exists, take the truncate lock for hfs_removefile(). */
 	if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) {
-		hfs_lock_truncate(VTOC(tvp), TRUE);
+		hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK);
 		took_trunc_lock = 1;
 	}
 
@@ -2928,7 +3505,7 @@ hfs_vnop_rename(ap)
 	                     HFS_EXCLUSIVE_LOCK, &error_cnode);
 	if (error) {
 		if (took_trunc_lock) {
-			hfs_unlock_truncate(VTOC(tvp), TRUE);
+			hfs_unlock_truncate(VTOC(tvp), 0);
 			took_trunc_lock = 0;
 		}
 		/* 
@@ -2974,21 +3551,22 @@ hfs_vnop_rename(ap)
 	 * the parent/child relationship with fdcp and tdcp, as well as the
 	 * component name of the target cnodes.  
 	 */
-	if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid)) {
+	if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid, NULL, &error)) {
 		error = ENOENT;
 		goto out;
 	}
 
-	if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid))) {
+	if (tcp && ((tcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, tdvp, tcnp, tcp->c_fileid, NULL, &error))) {
 	    //
 	    // hmm, the destination vnode isn't valid any more.
 	    // in this case we can just drop him and pretend he
 	    // never existed in the first place.
 	    //
 	    if (took_trunc_lock) {
-		hfs_unlock_truncate(VTOC(tvp), TRUE);
-		took_trunc_lock = 0;
+			hfs_unlock_truncate(VTOC(tvp), 0);
+			took_trunc_lock = 0;
 	    }
+		error = 0;
 
 	    hfs_unlockfour(fdcp, fcp, tdcp, tcp);
 
@@ -3186,7 +3764,33 @@ hfs_vnop_rename(ap)
 	got_cookie = 1;
 
 	/*
-	 * If the destination exists then it may need to be removed.
+	 * If the destination exists then it may need to be removed.  
+	 * 
+	 * Due to HFS's locking system, we should always move the 
+	 * existing 'tvp' element to the hidden directory in hfs_vnop_rename.
+	 * Because the VNOP_LOOKUP call enters and exits the filesystem independently
+	 * of the actual vnop that it was trying to do (stat, link, readlink),
+	 * we must release the cnode lock of that element during the interim to 
+	 * do MAC checking, vnode authorization, and other calls.  In that time, 
+	 * the item can be deleted (or renamed over). However, only in the rename 
+	 * case is it inappropriate to return ENOENT from any of those calls.  Either 
+	 * the call should return information about the old element (stale), or get 
+	 * information about the newer element that we are about to write in its place.  
+	 * 
+	 * HFS lookup has been modified to detect a rename and re-drive its 
+	 * lookup internally. For other calls that have already succeeded in 
+	 * their lookup call and are waiting to acquire the cnode lock in order 
+	 * to proceed, that cnode lock will not fail due to the cnode being marked 
+	 * C_NOEXISTS, because it won't have been marked as such.  It will only 
+	 * have C_DELETED.  Thus, they will simply act on the stale open-unlinked
+	 * element.  All future callers will get the new element.
+	 *
+	 * To implement this behavior, we pass the "only_unlink" argument to 
+	 * hfs_removefile and hfs_removedir.  This will result in the vnode acting 
+	 * as though it is open-unlinked.  Additionally, when we are done moving the 
+	 * element to the hidden directory, we vnode_recycle the target so that it is 
+	 * reclaimed as soon as possible.  Reclaim and inactive are both 
+	 * capable of clearing out unused blocks for an open-unlinked file or dir.
 	 */
 	if (tvp) {
 		/*
@@ -3209,28 +3813,54 @@ hfs_vnop_rename(ap)
 			}
 		}
 
-		if (vnode_isdir(tvp))
-			error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE);
-		else {
-			error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, tvp_rsrc);
-
-			/* 
-			 * If the destination file had a rsrc fork vnode, it may have been cleaned up
-			 * in hfs_removefile if it was not busy (had no usecounts).  This is possible
-			 * because we grabbed the iocount on the rsrc fork safely at the beginning
-			 * of the function before we did the lockfour.  However, we may still need
-			 * to take action to prevent block leaks, so aggressively recycle the vnode
-			 * if possible.  The vnode cannot be recycled because we hold an iocount on it.
+		
+		if (vnode_isdir(tvp)) {
+			/*
+			 * hfs_removedir will eventually call hfs_removefile on the directory
+			 * we're working on, because only hfs_removefile does the renaming of the
+			 * item to the hidden directory.  The directory will stay around in the
+			 * hidden directory with C_DELETED until it gets an inactive or a reclaim.
+			 * That way, we can destroy all of the EAs as needed and allow new ones to be
+			 * written.
 			 */
-
-			if ((error == 0) && (tcp->c_flag & C_DELETED) && tvp_rsrc && !vnode_isinuse(tvp_rsrc, 0)) {
-				recycle_rsrc = 1;
-			}	
+			error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE, 1);
+		}
+		else {
+			error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, tvp_rsrc, 1);
+			
+			/*
+			 * If the destination file had a resource fork vnode, then we need to get rid of
+			 * its blocks when there are no more references to it.  Because the call to
+			 * hfs_removefile above always open-unlinks things, we need to force an inactive/reclaim
+			 * on the resource fork vnode, in order to prevent block leaks.  Otherwise,
+			 * the resource fork vnode could prevent the data fork vnode from going out of scope
+			 * because it holds a v_parent reference on it.  So we mark it for termination
+			 * with a call to vnode_recycle. hfs_vnop_reclaim has been modified so that it 
+			 * can clean up the blocks of open-unlinked files and resource forks. 
+			 *
+			 * We can safely call vnode_recycle on the resource fork because we took an iocount
+			 * reference on it at the beginning of the function. 
+			 */ 
+			
+			if ((error == 0) && (tcp->c_flag & C_DELETED) && (tvp_rsrc)) {
+				vnode_recycle(tvp_rsrc);
+			}
 		}
 
-		if (error)
+		if (error) {
 			goto out;
+		}
+		
 		tvp_deleted = 1;
+		
+		/* Mark 'tcp' as being deleted due to a rename */
+		tcp->c_flag |= C_RENAMED;
+		
+		/*
+		 * Aggressively mark tvp/tcp for termination to ensure that we recover all blocks
+		 * as quickly as possible.
+		 */
+		vnode_recycle(tvp);
 	}
 skip_rm:
 	/*
@@ -3268,6 +3898,11 @@ skip_rm:
 	replace_desc(fcp, &out_desc);
 	fcp->c_parentcnid = tdcp->c_fileid;
 	fcp->c_hint = 0;
+	
+	/* Now indicate this cnode needs to have date-added written to the finderinfo */
+	fcp->c_flag |= C_NEEDS_DATEADDED;
+	(void) hfs_update (fvp, 0);
+
 
 	hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE,
 	              (fdcp->c_cnid == kHFSRootFolderID));
@@ -3327,28 +3962,12 @@ out:
 	    wakeup((caddr_t)&tdcp->c_flag);
 	}
 
-	if (took_trunc_lock)
-		hfs_unlock_truncate(VTOC(tvp), TRUE);	
+	if (took_trunc_lock) {
+		hfs_unlock_truncate(VTOC(tvp), 0);	
+	}
 
 	hfs_unlockfour(fdcp, fcp, tdcp, tcp);
 	
-	/* 
-	 * Now that we've dropped all of the locks, we need to force an inactive and a recycle 
-	 * on the old destination's rsrc fork to prevent a leak of its blocks.  Note that
-	 * doing the ref/rele is to twiddle the VL_NEEDINACTIVE bit of the vnode's flags, so that
-	 * on the last vnode_put for this vnode, we will force inactive to get triggered.
-	 * We hold an iocount from the beginning of this function so we know it couldn't have been
-	 * recycled already. 
-	 */
-	if (recycle_rsrc) {
-		int vref; 
-		vref = vnode_ref(tvp_rsrc);
-		if (vref == 0) {
-			vnode_rele(tvp_rsrc);
-		}
-		vnode_recycle(tvp_rsrc);
-	}
-
 	/* Now vnode_put the resource forks vnodes if necessary */
 	if (tvp_rsrc) {
 		vnode_put(tvp_rsrc);
@@ -3368,7 +3987,7 @@ out:
 /*
  * Make a directory.
  */
-static int
+int
 hfs_vnop_mkdir(struct vnop_mkdir_args *ap)
 {
 	/***** HACK ALERT ********/
@@ -3380,7 +3999,7 @@ hfs_vnop_mkdir(struct vnop_mkdir_args *ap)
 /*
  * Create a symbolic link.
  */
-static int
+int
 hfs_vnop_symlink(struct vnop_symlink_args *ap)
 {
 	struct vnode **vpp = ap->a_vpp;
@@ -3456,7 +4075,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
 
 		/* hfs_removefile() requires holding the truncate lock */
 		hfs_unlock(cp);
-		hfs_lock_truncate(cp, TRUE);
+		hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
 		hfs_lock(cp, HFS_FORCE_LOCK);
 
 		if (hfs_start_transaction(hfsmp) != 0) {
@@ -3465,8 +4084,8 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
 			goto out;
 		}
 		
-		(void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL);
-		hfs_unlock_truncate(cp, TRUE);
+		(void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL, 0);
+		hfs_unlock_truncate(cp, 0);
 		goto out;	
 	}
 
@@ -3562,7 +4181,7 @@ typedef union {
  * If the directory is marked as deleted-but-in-use (cp->c_flag & C_DELETED),
  * do NOT synthesize entries for "." and "..".
  */
-static int
+int
 hfs_vnop_readdir(ap)
 	struct vnop_readdir_args /* {
 		vnode_t a_vp;
@@ -3601,11 +4220,23 @@ hfs_vnop_readdir(ap)
 	/* Sanity check the uio data. */
 	if (uio_iovcnt(uio) > 1)
 		return (EINVAL);
+
+	if (VTOC(vp)->c_flags & UF_COMPRESSED) {
+		int compressed = hfs_file_is_compressed(VTOC(vp), 0);  /* 0 == take the cnode lock */
+		if (VTOCMP(vp) != NULL && !compressed) {
+			error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
+			if (error) {
+				return error;
+			}
+		}
+	}
+
+	cp = VTOC(vp);
+	hfsmp = VTOHFS(vp);
+
 	/* Note that the dirhint calls require an exclusive lock. */
 	if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
 		return (error);
-	cp = VTOC(vp);
-	hfsmp = VTOHFS(vp);
 
 	/* Pick up cnid hint (if any). */
 	if (nfs_cookies) {
@@ -3741,7 +4372,7 @@ hfs_vnop_readdir(ap)
 	
 	if (index == 0) {
 		dirhint->dh_threadhint = cp->c_dirthreadhint;
-	}
+	} 
 	else {
 		/*
 		 * If we have a non-zero index, there is a possibility that during the last
@@ -3822,7 +4453,7 @@ out:
 /*
  * Read contents of a symbolic link.
  */
-static int
+int
 hfs_vnop_readlink(ap)
 	struct vnop_readlink_args /* {
 		struct vnode *a_vp;
@@ -3845,7 +4476,6 @@ hfs_vnop_readlink(ap)
    
 	/* Zero length sym links are not allowed */
 	if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) {
-		printf("hfs: zero length symlink on fileid %d\n", cp->c_fileid);
 		error = EINVAL;
 		goto exit;
 	}
@@ -3907,7 +4537,7 @@ exit:
 /*
  * Get configurable pathname variables.
  */
-static int
+int
 hfs_vnop_pathconf(ap)
 	struct vnop_pathconf_args /* {
 		struct vnode *a_vp;
@@ -3925,9 +4555,9 @@ hfs_vnop_pathconf(ap)
 		break;
 	case _PC_NAME_MAX:
 		if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD)
-			*ap->a_retval = kHFSMaxFileNameChars;  /* 255 */
+			*ap->a_retval = kHFSMaxFileNameChars;  /* 31 */
 		else
-			*ap->a_retval = kHFSPlusMaxFileNameChars;  /* 31 */
+			*ap->a_retval = kHFSPlusMaxFileNameChars;  /* 255 */
 		break;
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;  /* 1024 */
@@ -3942,7 +4572,10 @@ hfs_vnop_pathconf(ap)
 		*ap->a_retval = 200112;		/* _POSIX_NO_TRUNC */
 		break;
 	case _PC_NAME_CHARS_MAX:
-		*ap->a_retval = kHFSPlusMaxFileNameChars;
+		if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) 
+			*ap->a_retval = kHFSMaxFileNameChars; /* 31 */
+		else 
+			*ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */
 		break;
 	case _PC_CASE_SENSITIVE:
 		if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE)
@@ -3959,6 +4592,10 @@ hfs_vnop_pathconf(ap)
 		else
 			*ap->a_retval = 64;	/* number of bits to store max file size */
 		break;
+	case _PC_XATTR_SIZE_BITS:
+		/* Number of bits to store maximum extended attribute size */
+		*ap->a_retval = HFS_XATTR_SIZE_BITS;
+		break;
 	default:
 		return (EINVAL);
 	}
@@ -3975,7 +4612,6 @@ hfs_vnop_pathconf(ap)
  *
  * The cnode must be locked exclusive
  */
-__private_extern__
 int
 hfs_update(struct vnode *vp, __unused int waitfor)
 {
@@ -4040,28 +4676,50 @@ hfs_update(struct vnode *vp, __unused int waitfor)
 	    return error;
 	}
 
-	/*
-	 * For files with invalid ranges (holes) the on-disk
-	 * field representing the size of the file (cf_size)
-	 * must be no larger than the start of the first hole.
+	/* 
+	 * Modify the values passed to cat_update based on whether or not
+	 * the file has invalid ranges or borrowed blocks.
 	 */
-	if (dataforkp && !TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) {
+	if (dataforkp) {
+		off_t numbytes = 0;
+
+		/* copy the datafork into a temporary copy so we don't pollute the cnode's */
 		bcopy(dataforkp, &datafork, sizeof(datafork));
-		datafork.cf_size = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
 		dataforkp = &datafork;
-	} else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) {
-		// always make sure the block count and the size 
-		// of the file match the number of blocks actually
-		// allocated to the file on disk
-		bcopy(dataforkp, &datafork, sizeof(datafork));
-		// make sure that we don't assign a negative block count
-		if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
-		    panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
-			  cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+
+		/*
+		 * If there are borrowed blocks, ensure that they are subtracted
+		 * from the total block count before writing the cnode entry to disk.
+		 * Only extents that have actually been marked allocated in the bitmap
+		 * should be reflected in the total block count for this fork.
+		 */
+		if (cp->c_datafork->ff_unallocblocks != 0) {
+			// make sure that we don't assign a negative block count
+			if (cp->c_datafork->ff_blocks < cp->c_datafork->ff_unallocblocks) {
+				panic("hfs: ff_blocks %d is less than unalloc blocks %d\n",
+						cp->c_datafork->ff_blocks, cp->c_datafork->ff_unallocblocks);
+			}
+
+			/* Also cap the LEOF to the total number of bytes that are allocated. */
+			datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
+			datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
+		}
+		
+		/*
+		 * For files with invalid ranges (holes) the on-disk
+		 * field representing the size of the file (cf_size)
+		 * must be no larger than the start of the first hole.
+		 * However, note that if the first invalid range exists
+		 * solely within borrowed blocks, then our LEOF and block
+		 * count should both be zero.  As a result, set it to the 
+		 * min of the current cf_size and the start of the first 
+		 * invalid range, because it may have already been reduced
+		 * to zero by the borrowed blocks check above.
+		 */
+		if (!TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges))  {
+			numbytes = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start;
+			datafork.cf_size = MIN((numbytes), (datafork.cf_size));
 		}
-		datafork.cf_blocks = (cp->c_datafork->ff_blocks - cp->c_datafork->ff_unallocblocks);
-		datafork.cf_size   = datafork.cf_blocks * HFSTOVCB(hfsmp)->blockSize;
-		dataforkp = &datafork;
 	}
 
 	/*
@@ -4098,7 +4756,7 @@ hfs_update(struct vnode *vp, __unused int waitfor)
  * Allocate a new node
  * Note - Function does not create and return a vnode for whiteout creation.
  */
-static int
+int
 hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
              struct vnode_attr *vap, vfs_context_t ctx)
 {
@@ -4113,16 +4771,19 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 	int error, started_tr = 0;
 	enum vtype vnodetype;
 	int mode;
+	int newvnode_flags = 0;
+	int nocache = 0;
+	u_int32_t gnv_flags = 0;
 
 	if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK)))
 		return (error);
 
 	/* set the cnode pointer only after successfully acquiring lock */
 	dcp = VTOC(dvp);
-	
+
 	/* Don't allow creation of new entries in open-unlinked directories */
-	if ((error = hfs_checkdeleted (dcp))) {
-		hfs_unlock (dcp);
+	if ((error = hfs_checkdeleted(dcp))) {
+		hfs_unlock(dcp);
 		return error;
 	}
 
@@ -4139,6 +4800,13 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 		vnodetype = VREG;
 	mode = MAKEIMODE(vnodetype, vap->va_mode);
 
+#if CONFIG_PROTECT
+	/* If we're creating a regular file on a CP filesystem, then delay caching */
+	if ((vnodetype == VREG ) && (cp_fs_protected (VTOVFS(dvp)))) {
+		nocache = 1;
+	}
+#endif
+	
 	/* Check if were out of usable disk space. */
 	if ((hfs_freeblks(hfsmp, 1) == 0) && (vfs_context_suser(ctx) != 0)) {
 		error = ENOSPC;
@@ -4169,7 +4837,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 		VATTR_SET_SUPPORTED(vap, va_flags);
 		attr.ca_flags = vap->va_flags;
 	}
-	
+
 	/* 
 	 * HFS+ only: all files get ThreadExists
 	 * HFSX only: dirs get HasFolderCount
@@ -4183,6 +4851,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 		}
 	}
 
+	/* Add the date added to the item */
+	hfs_write_dateadded (&attr, attr.ca_atime);
+
 	attr.ca_uid = vap->va_uid;
 	attr.ca_gid = vap->va_gid;
 	VATTR_SET_SUPPORTED(vap, va_mode);
@@ -4282,6 +4953,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 	/* Do not create vnode for whiteouts */
 	if (S_ISWHT(mode)) {
 		goto exit;
+	}	
+
+	gnv_flags |= GNV_CREATE;
+	if (nocache) {
+		gnv_flags |= GNV_NOCACHE;
 	}
 
 	/*
@@ -4297,15 +4973,72 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 	 * try to create a new vnode, and then end up reclaiming another shadow vnode to 
 	 * create the new one.  However, if everything is working properly, this should
 	 * be a non-issue as we would never enter that reclaim codepath.
-	 *
+	 * 
 	 * The cnode is locked on successful return.
 	 */
-	error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, GNV_CREATE, &attr, NULL, &tvp);
+	error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, gnv_flags, &attr, 
+							NULL, &tvp, &newvnode_flags);
 	if (error)
 		goto exit;
 
 	cp = VTOC(tvp);
 	*vpp = tvp;
+
+#if CONFIG_PROTECT
+	error = cp_entry_create_keys(cp);
+	/* 
+	 * If we fail to create keys, then do NOT allow this vnode to percolate out into the
+	 * namespace.  Delete it and return the errno that cp_entry_create_keys generated.
+	 * Luckily, we can do this without issues because the entry was newly created
+	 * and we're still holding the directory cnode lock.  Because we prevented it from
+	 * getting inserted into the namecache upon vnode creation, all accesss to this file
+	 * would have to go through the directory, whose lock we are still holding.
+	 */
+	if (error) {
+		/*
+		 * If we fail to remove/recycle the item here, we can't do much about it.  Log 
+		 * a message to the console and then we can backtrack it.  The ultimate error
+		 * that will get emitted to userland will be from the failure to create the EA blob.
+		 */
+		int err = hfs_removefile (dvp, tvp, cnp, 0, 0, 0, NULL, 0);
+		if (err) {
+			printf("hfs_makenode: removefile failed (%d) for CP file %p\n", err, tvp);
+		}
+		hfs_unlock (cp);
+		err = vnode_recycle (tvp);
+		if (err) {
+			printf("hfs_makenode: vnode_recycle failed (%d) for CP file %p\n", err, tvp);
+		}
+		/* Drop the iocount on the new vnode to force reclamation/recycling */
+		vnode_put (tvp);
+		cp = NULL;
+		*vpp = NULL;
+	}
+	else {
+		/* insert item into name cache if it wasn't already inserted.*/
+		if (nocache) {
+			cache_enter (dvp, tvp, cnp);
+		}
+	}		
+
+#endif
+/* 
+ * If CONFIG_PROTECT is not enabled, then all items will get automatically added into 
+ * the namecache, as nocache will be set to 0.
+ */
+
+#if QUOTA
+	/* 
+	 * Once we create this vnode, we need to initialize its quota data 
+	 * structures, if necessary.  We know that it is OK to just go ahead and 
+	 * initialize because we've already validated earlier (through the hfs_quotacheck 
+	 * function) to see if creating this cnode/vnode would cause us to go over quota. 
+	 */
+	if (hfsmp->hfs_flags & HFS_QUOTAS) {
+		(void) hfs_getinoquota(cp); 
+	}
+#endif
+
 exit:
 	cat_releasedesc(&out_desc);
 	
@@ -4330,8 +5063,8 @@ exit:
 }
 
 
-
-/* hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is
+/*
+ * hfs_vgetrsrc acquires a resource fork vnode corresponding to the cnode that is
  * found in 'vp'.  The rsrc fork vnode is returned with the cnode locked and iocount
  * on the rsrc vnode.
  * 
@@ -4351,10 +5084,9 @@ exit:
  * there's really no reason to double-check for errors on the cnode.
  */
 
-__private_extern__
 int
-hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, 
-		struct vnode **rvpp, int can_drop_lock, int error_on_unlinked)
+hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, 
+		int can_drop_lock, int error_on_unlinked)
 {
 	struct vnode *rvp;
 	struct vnode *dvp = NULLVP;
@@ -4363,18 +5095,21 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp,
 	int vid;
 	int delete_status = 0;
 
-
+	if (vnode_vtype(vp) == VDIR) {
+		return EINVAL;
+	}
+	
 	/*
-	 * Need to check the status of the cnode to validate it hasn't
-	 * gone open-unlinked on us before we can actually do work with it.
+	 * Need to check the status of the cnode to validate it hasn't gone 
+	 * open-unlinked on us before we can actually do work with it.
 	 */
-	delete_status = hfs_checkdeleted (cp);
+	delete_status = hfs_checkdeleted(cp);
 	if ((delete_status) && (error_on_unlinked)) {
 		return delete_status;
 	}
 
 restart:
-	/* Attempt to use exising vnode */
+	/* Attempt to use existing vnode */
 	if ((rvp = cp->c_rsrc_vp)) {
 	        vid = vnode_vid(rvp);
 
@@ -4410,11 +5145,10 @@ restart:
 				if ((delete_status = hfs_checkdeleted(cp))) {
 					/* 
 					 * If error == 0, this means that we succeeded in acquiring an iocount on the 
-					 * rsrc fork vnode.  However, if we're in this block of code, that 
-					 * means that we noticed that the cnode has gone open-unlinked.  In 
-					 * this case, the caller requested that we not do any other work and 
-					 * return an errno.  The caller will be responsible for dropping the 
-					 * iocount we just acquired because we can't do it until we've released 
+					 * rsrc fork vnode.  However, if we're in this block of code, that means that we noticed
+					 * that the cnode has gone open-unlinked.  In this case, the caller requested that we
+					 * not do any other work and return an errno.  The caller will be responsible for
+					 * dropping the iocount we just acquired because we can't do it until we've released
 					 * the cnode lock.  
 					 */
 					if (error == 0) {
@@ -4447,7 +5181,8 @@ restart:
 		struct cat_desc to_desc;
 		char delname[32];
 		int lockflags;
-
+		int newvnode_flags = 0;
+			
 		/*
 		 * Make sure cnode lock is exclusive, if not upgrade it.
 		 *
@@ -4478,7 +5213,7 @@ restart:
 		 */
 
 		if ((error_on_unlinked) && (can_drop_lock)) {
-			if ((error = hfs_checkdeleted (cp))) {
+			if ((error = hfs_checkdeleted(cp))) { 
 				return error;
 			}
 		}
@@ -4530,7 +5265,7 @@ restart:
 		dvp = vnode_getparent(vp);
 		error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL,
 		                        descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr,
-		                        &rsrcfork, &rvp);
+		                        &rsrcfork, &rvp, &newvnode_flags);
 		if (dvp)
 			vnode_put(dvp);
 		if (cn.cn_pnbuf)
@@ -4546,7 +5281,7 @@ restart:
 /*
  * Wrapper for special device reads
  */
-static int
+int
 hfsspec_read(ap)
 	struct vnop_read_args /* {
 		struct vnode *a_vp;
@@ -4565,7 +5300,7 @@ hfsspec_read(ap)
 /*
  * Wrapper for special device writes
  */
-static int
+int
 hfsspec_write(ap)
 	struct vnop_write_args /* {
 		struct vnode *a_vp;
@@ -4587,7 +5322,7 @@ hfsspec_write(ap)
  *
  * Update the times on the cnode then do device close.
  */
-static int
+int
 hfsspec_close(ap)
 	struct vnop_close_args /* {
 		struct vnode *a_vp;
@@ -4680,7 +5415,7 @@ hfsfifo_close(ap)
 /*
  * Synchronize a file's in-core state with that on disk.
  */
-static int
+int
 hfs_vnop_fsync(ap)
 	struct vnop_fsync_args /* {
 		struct vnode *a_vp;
@@ -4691,6 +5426,21 @@ hfs_vnop_fsync(ap)
 	struct vnode* vp = ap->a_vp;
 	int error;
 
+	/* Note: We check hfs flags instead of vfs mount flag because during 
+	 * read-write update, hfs marks itself read-write much earlier than
+	 * the vfs, and hence won't result in skipping of certain writes like 
+	 * zero'ing out of unused nodes, creation of hotfiles btree, etc. 
+	 */
+	if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) {
+		return 0;		
+	}
+
+#if CONFIG_PROTECT
+	if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
+		return (error);
+	}
+#endif /* CONFIG_PROTECT */
+
 	/*
 	 * We need to allow ENOENT lock errors since unlink
 	 * systenm call can call VNOP_FSYNC during vclean.
@@ -4706,7 +5456,7 @@ hfs_vnop_fsync(ap)
 }
 
 
-static int
+int
 hfs_vnop_whiteout(ap) 
 	struct vnop_whiteout_args /* {
 		struct vnode *a_dvp;
@@ -4858,7 +5608,7 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = {
     { &vnop_select_desc, (VOPFUNC)hfs_vnop_select },		/* select */
     { &vnop_revoke_desc, (VOPFUNC)nop_revoke },			/* revoke */
     { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange },		/* exchange */
-    { &vnop_mmap_desc, (VOPFUNC)err_mmap },			/* mmap */
+    { &vnop_mmap_desc, (VOPFUNC)hfs_vnop_mmap },			/* mmap */
     { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync },		/* fsync */
     { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove },		/* remove */
     { &vnop_link_desc, (VOPFUNC)hfs_vnop_link },			/* link */
diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c
index 6eec7028b..8091dfaa2 100644
--- a/bsd/hfs/hfs_xattr.c
+++ b/bsd/hfs/hfs_xattr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,6 +35,7 @@
 #include <sys/vnode.h>
 #include <sys/xattr.h>
 #include <sys/fcntl.h>
+#include <sys/fsctl.h>
 #include <sys/vnode_internal.h>
 #include <sys/kauth.h>
 
@@ -66,7 +67,6 @@ struct listattr_callback_state {
 #endif /* HFS_COMPRESSION */
 };
 
-#define HFS_MAXATTRIBUTESIZE    (128 * 1024)
 #define HFS_MAXATTRBLKS         (32 * 1024)
 
 
@@ -80,6 +80,8 @@ struct listattr_callback_state {
 
 static u_int32_t emptyfinfo[8] = {0};
 
+static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo); 
+
 const char hfs_attrdatafilename[] = "Attribute Data";
 
 static int  listattr_callback(const HFSPlusAttrKey *key, const HFSPlusAttrData *data,
@@ -216,7 +218,7 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap)
 	scp = VTOC(svp);
 
 	/* Take truncate lock before taking cnode lock. */
-	hfs_lock_truncate(scp, TRUE);
+	hfs_lock_truncate(scp, HFS_EXCLUSIVE_LOCK);
 	if ((error = hfs_lock(scp, HFS_EXCLUSIVE_LOCK))) {
 		goto out;
 	}
@@ -225,16 +227,38 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap)
 	}
 	hfs_unlock(scp);
 out:
-	hfs_unlock_truncate(scp, TRUE);
+	hfs_unlock_truncate(scp, 0);
 	return (error);
 }
 #endif
 
+/* Zero out the date added field for the specified cnode */
+static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo) {
+	u_int8_t *finfo = finderinfo;
+    
+	/* Advance finfo by 16 bytes to the 2nd half of the finderinfo */
+	finfo = finfo + 16;
+	
+    if (S_ISREG(cp->c_attr.ca_mode)) {
+        struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+        extinfo->date_added = 0;
+    }
+    else if (S_ISDIR(cp->c_attr.ca_mode)) {
+        struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+        extinfo->date_added = 0;
+    }
+	else {
+		/* Return an error */
+		return -1;
+	}
+	return 0;
+    
+}
+
 
 /*
  * Retrieve the data of an extended attribute.
  */
-__private_extern__
 int
 hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 /*
@@ -253,13 +277,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 	struct cnode *cp;
 	struct hfsmount *hfsmp;
 	uio_t uio = ap->a_uio;
-	struct BTreeIterator * iterator = NULL;
-	struct filefork *btfile;
-	FSBufferDescriptor btdata;
-	HFSPlusAttrRecord * recp = NULL;
 	size_t bufsize;
-	u_int16_t datasize;
-	int lockflags;
 	int result;
 
 	cp = VTOC(vp);
@@ -281,6 +299,9 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 			/* Make a copy since we may not export all of it. */
 			bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
 			hfs_unlock(cp);
+            
+            /* Zero out the date added field in the local copy */
+			hfs_zero_dateadded (cp, finderinfo);
 
 			/* Don't expose a symlink's private type/creator. */
 			if (vnode_islnk(vp)) {
@@ -347,17 +368,17 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 				    (result == 0) &&
 				    (uio_resid(uio) == uio_size)) {
 					/*
-					 we intentionally make the above call to VNOP_READ so that
-					 it can return an authorization/permission/etc. error
-					 based on ap->a_context and thus deny this operation;
-					 in that case, result != 0 and we won't proceed
-					 
-					 however, if result == 0, it will have returned no data
-					 because hfs_vnop_read hid the resource fork
-					 (hence uio_resid(uio) == uio_size, i.e. the uio is untouched)
-					 
-					 in that case, we try again with the decmpfs_ctx context
-					 to get the actual data
+					 * We intentionally make the above call to VNOP_READ so that
+					 * it can return an authorization/permission/etc. Error
+					 * based on ap->a_context and thus deny this operation;
+					 * in that case, result != 0 and we won't proceed.
+					 * 
+					 * However, if result == 0, it will have returned no data
+					 * because hfs_vnop_read hid the resource fork
+					 * (hence uio_resid(uio) == uio_size, i.e. the uio is untouched)
+					 * 
+					 * In that case, we try again with the decmpfs_ctx context
+					 * to get the actual data
 					 */
 					result = VNOP_READ(rvp, uio, 0, decmpfs_ctx);
 				}
@@ -387,24 +408,78 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 	if ((result = hfs_lock(cp, HFS_SHARED_LOCK))) {
 		return (result);
 	}
-	/* Bail if we don't have any extended attributes. */
+	
+	/* Check for non-rsrc, non-finderinfo EAs */
+	result = hfs_getxattr_internal (cp, ap, VTOHFS(cp->c_vp), 0);
+
+	hfs_unlock(cp);
+	
+	return MacToVFSError(result);
+}
+
+
+
+/*
+ * getxattr_internal
+ *
+ * We break out this internal function which searches the attributes B-Tree and the 
+ * overflow extents file to find non-resource, non-finderinfo EAs.  There may be cases 
+ * where we need to get EAs in contexts where we are already holding the cnode lock, 
+ * and to re-enter hfs_vnop_getxattr would cause us to double-lock the cnode.  Instead, 
+ * we can just directly call this function.
+ *
+ * We pass the hfsmp argument directly here because we may not necessarily have a cnode to
+ * operate on.  Under normal conditions, we have a file or directory to query, but if we
+ * are operating on the root directory (id 1), then we may not have a cnode.  In this case, if hte
+ * 'cp' argument is NULL, then we need to use the 'fileid' argument as the entry to manipulate
+ *
+ * NOTE: This function assumes the cnode lock for 'cp' is held exclusive or shared. 
+ */ 
+
+
+int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, 
+		struct hfsmount *hfsmp, u_int32_t fileid) {
+	
+	struct filefork *btfile;
+	struct BTreeIterator * iterator = NULL;
+	size_t bufsize = 0;
+	HFSPlusAttrRecord *recp = NULL;
+	FSBufferDescriptor btdata;
+	int lockflags = 0;
+	int result = 0;
+	u_int16_t datasize = 0;
+	uio_t uio = ap->a_uio;
+	u_int32_t target_id = 0;
+
+	if (cp) {
+		target_id = cp->c_fileid;
+	}
+	else {
+		target_id = fileid;
+	}
+
+
+	/* Bail if we don't have an EA B-Tree. */
 	if ((hfsmp->hfs_attribute_vp == NULL) ||
-	    (cp->c_attr.ca_recflags & kHFSHasAttributesMask) == 0) {
-	    	result = ENOATTR;
+	   ((cp) &&  (cp->c_attr.ca_recflags & kHFSHasAttributesMask) == 0)) {
+		result = ENOATTR;
 		goto exit;
 	}
+	
+	/* Initialize the B-Tree iterator for searching for the proper EA */
 	btfile = VTOF(hfsmp->hfs_attribute_vp);
-
+	
 	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
 	if (iterator == NULL) {
 		result = ENOMEM;
 		goto exit;
 	}
 	bzero(iterator, sizeof(*iterator));
-
+	
 	bufsize = sizeof(HFSPlusAttrData) - 2;
-	if (uio)
+	if (uio) {
 		bufsize += uio_resid(uio);
+	}
 	bufsize = MAX(bufsize, sizeof(HFSPlusAttrRecord));
 	MALLOC(recp, HFSPlusAttrRecord *, bufsize, M_TEMP, M_WAITOK);
 	if (recp == NULL) {
@@ -414,132 +489,146 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap)
 	btdata.bufferAddress = recp;
 	btdata.itemSize = bufsize;
 	btdata.itemCount = 1;
+	
+	result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+	if (result) {
+		goto exit;
+	}
 
-	result = hfs_buildattrkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
-	if (result)
-		goto exit;	
-
-	/* Lookup the attribute. */
+	/* Lookup the attribute in the Attribute B-Tree */
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
 	result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
 	hfs_systemfile_unlock(hfsmp, lockflags);
-
+	
 	if (result) {
-		if (result == btNotFound)
+		if (result == btNotFound) {
 			result = ENOATTR;
+		}
 		goto exit;
 	}
-
+	
+	/* 
+	 * Operate differently if we have inline EAs that can fit in the attribute B-Tree or if
+	 * we have extent based EAs.
+	 */
 	switch (recp->recordType) {
-	case kHFSPlusAttrInlineData:
-		/*
-		 * Sanity check record size. It's not required to have any
-		 * user data, so the minimum size is 2 bytes less that the
-		 * size of HFSPlusAttrData (since HFSPlusAttrData struct
-		 * has 2 bytes set aside for attribute data).
-		 */
-		if (datasize < (sizeof(HFSPlusAttrData) - 2)) {
-			printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", 
-				VTOC(vp)->c_fileid, ap->a_name, datasize, sizeof(HFSPlusAttrData));
-			result = ENOATTR;
-			break;
-		}
-		*ap->a_size = recp->attrData.attrSize;
-		if (uio && recp->attrData.attrSize != 0) {
-			if (*ap->a_size > (user_size_t)uio_resid(uio))
-				result = ERANGE;
-			else
-				result = uiomove((caddr_t) &recp->attrData.attrData , recp->attrData.attrSize, uio);
-		}
-		break;
-
-	case kHFSPlusAttrForkData:
-		if (datasize < sizeof(HFSPlusAttrForkData)) {
-			printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", 
-				VTOC(vp)->c_fileid, ap->a_name, datasize, sizeof(HFSPlusAttrForkData));
-			result = ENOATTR;
-			break;
-		}
-		*ap->a_size = recp->forkData.theFork.logicalSize;
-		if (uio == NULL) {
-			break;
-		}
-		if (*ap->a_size > (user_size_t)uio_resid(uio)) {
-			result = ERANGE;
+		/* Attribute fits in the Attribute B-Tree */
+		case kHFSPlusAttrInlineData:
+			/*
+			 * Sanity check record size. It's not required to have any
+			 * user data, so the minimum size is 2 bytes less that the
+			 * size of HFSPlusAttrData (since HFSPlusAttrData struct
+			 * has 2 bytes set aside for attribute data).
+			 */
+			if (datasize < (sizeof(HFSPlusAttrData) - 2)) {
+				printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", 
+					   target_id, ap->a_name, datasize, sizeof(HFSPlusAttrData));
+				result = ENOATTR;
+				break;
+			}
+			*ap->a_size = recp->attrData.attrSize;
+			if (uio && recp->attrData.attrSize != 0) {
+				if (*ap->a_size > (user_size_t)uio_resid(uio)) {
+					result = ERANGE;
+				}
+				else {
+					result = uiomove((caddr_t) &recp->attrData.attrData , recp->attrData.attrSize, uio);
+				}
+			}
 			break;
-		}
-		/* Process overflow extents if necessary. */
-		if (has_overflow_extents(&recp->forkData.theFork)) {
-			HFSPlusExtentDescriptor *extentbuf;
-			HFSPlusExtentDescriptor *extentptr;
-			size_t extentbufsize;
-			u_int32_t totalblocks;
-			u_int32_t blkcnt;
-			u_int32_t attrlen;
-
-			totalblocks = recp->forkData.theFork.totalBlocks;
-			/* Ignore bogus block counts. */
-			if (totalblocks > HFS_MAXATTRBLKS) {
-				result = ERANGE;
+		/* Extent-Based EAs */
+		case kHFSPlusAttrForkData: {
+			if (datasize < sizeof(HFSPlusAttrForkData)) {
+				printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", 
+					   target_id, ap->a_name, datasize, sizeof(HFSPlusAttrForkData));
+				result = ENOATTR;
 				break;
 			}
-			attrlen = recp->forkData.theFork.logicalSize;
-
-			/* Get a buffer to hold the worst case amount of extents. */
-			extentbufsize = totalblocks * sizeof(HFSPlusExtentDescriptor);
-			extentbufsize = roundup(extentbufsize, sizeof(HFSPlusExtentRecord));
-			MALLOC(extentbuf, HFSPlusExtentDescriptor *, extentbufsize, M_TEMP, M_WAITOK);
-			if (extentbuf == NULL) {
-				result = ENOMEM;
+			*ap->a_size = recp->forkData.theFork.logicalSize;
+			if (uio == NULL) {
 				break;
 			}
-			bzero(extentbuf, extentbufsize);
-			extentptr = extentbuf;
-
-			/* Grab the first 8 extents. */
-			bcopy(&recp->forkData.theFork.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
-			extentptr += kHFSPlusExtentDensity;
-			blkcnt = count_extent_blocks(totalblocks, recp->forkData.theFork.extents);
-
-			/* Now lookup the overflow extents. */
-			lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
-			while (blkcnt < totalblocks) {
-				((HFSPlusAttrKey *)&iterator->key)->startBlock = blkcnt;
-				result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
-				if (result ||
-				    (recp->recordType != kHFSPlusAttrExtents) ||
-				    (datasize < sizeof(HFSPlusAttrExtents))) {
-					printf("hfs_getxattr: %s missing extents, only %d blks of %d found\n",
-						ap->a_name, blkcnt, totalblocks);
-					result = ENOATTR;
-					break;   /* break from while */
+			if (*ap->a_size > (user_size_t)uio_resid(uio)) {
+				result = ERANGE;
+				break;
+			}
+			/* Process overflow extents if necessary. */
+			if (has_overflow_extents(&recp->forkData.theFork)) {
+				HFSPlusExtentDescriptor *extentbuf;
+				HFSPlusExtentDescriptor *extentptr;
+				size_t extentbufsize;
+				u_int32_t totalblocks;
+				u_int32_t blkcnt;
+				u_int32_t attrlen;
+				
+				totalblocks = recp->forkData.theFork.totalBlocks;
+				/* Ignore bogus block counts. */
+				if (totalblocks > HFS_MAXATTRBLKS) {
+					result = ERANGE;
+					break;
+				}
+				attrlen = recp->forkData.theFork.logicalSize;
+				
+				/* Get a buffer to hold the worst case amount of extents. */
+				extentbufsize = totalblocks * sizeof(HFSPlusExtentDescriptor);
+				extentbufsize = roundup(extentbufsize, sizeof(HFSPlusExtentRecord));
+				MALLOC(extentbuf, HFSPlusExtentDescriptor *, extentbufsize, M_TEMP, M_WAITOK);
+				if (extentbuf == NULL) {
+					result = ENOMEM;
+					break;
 				}
-				/* Grab the next 8 extents. */
-				bcopy(&recp->overflowExtents.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
+				bzero(extentbuf, extentbufsize);
+				extentptr = extentbuf;
+				
+				/* Grab the first 8 extents. */
+				bcopy(&recp->forkData.theFork.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
 				extentptr += kHFSPlusExtentDensity;
-				blkcnt += count_extent_blocks(totalblocks, recp->overflowExtents.extents);
-			}
-			hfs_systemfile_unlock(hfsmp, lockflags);
-
-			if (blkcnt < totalblocks) {
-				result = ENOATTR;
-			} else {
-				result = read_attr_data(hfsmp, uio, attrlen, extentbuf);
+				blkcnt = count_extent_blocks(totalblocks, recp->forkData.theFork.extents);
+				
+				/* Now lookup the overflow extents. */
+				lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
+				while (blkcnt < totalblocks) {
+					((HFSPlusAttrKey *)&iterator->key)->startBlock = blkcnt;
+					result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL);
+					if (result ||
+						(recp->recordType != kHFSPlusAttrExtents) ||
+						(datasize < sizeof(HFSPlusAttrExtents))) {
+						printf("hfs_getxattr: %s missing extents, only %d blks of %d found\n",
+							   ap->a_name, blkcnt, totalblocks);
+						result = ENOATTR;
+						break;   /* break from while */
+					}
+					/* Grab the next 8 extents. */
+					bcopy(&recp->overflowExtents.extents[0], extentptr, sizeof(HFSPlusExtentRecord));
+					extentptr += kHFSPlusExtentDensity;
+					blkcnt += count_extent_blocks(totalblocks, recp->overflowExtents.extents);
+				}
+				
+				/* Release Attr B-Tree lock */
+				hfs_systemfile_unlock(hfsmp, lockflags);
+				
+				if (blkcnt < totalblocks) {
+					result = ENOATTR;
+				} 
+				else {
+					result = read_attr_data(hfsmp, uio, attrlen, extentbuf);
+				}
+				FREE(extentbuf, M_TEMP);
+				
+			} 
+			else /* No overflow extents. */ {
+				result = read_attr_data(hfsmp, uio, recp->forkData.theFork.logicalSize, recp->forkData.theFork.extents);
 			}
-			FREE(extentbuf, M_TEMP);
-
-		} else /* No overflow extents. */ {
-			result = read_attr_data(hfsmp, uio, recp->forkData.theFork.logicalSize, recp->forkData.theFork.extents);
+			break;
 		}
-		break;
-
-	default:
-		result = ENOATTR;
-		break;		
+			
+		default:
+			/* We only support Extent or inline EAs.  Default to ENOATTR for anything else */
+			result = ENOATTR;
+			break;		
 	}
-exit:
-	hfs_unlock(cp);
-
+	
+exit:	
 	if (iterator) {
 		FREE(iterator, M_TEMP);
 	}
@@ -547,13 +636,14 @@ exit:
 		FREE(recp, M_TEMP);
 	}
 	
-	return MacToVFSError(result);
+	return result;
+	
 }
 
+
 /*
  * Set the data of an extended attribute.
  */
-__private_extern__
 int
 hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 /*
@@ -571,19 +661,10 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 	struct cnode *cp = NULL;
 	struct hfsmount *hfsmp;
 	uio_t uio = ap->a_uio;
-	struct BTreeIterator * iterator = NULL;
-	struct filefork *btfile = NULL;
 	size_t attrsize;
-	FSBufferDescriptor btdata;
-	HFSPlusAttrRecord *recp = NULL;
-	HFSPlusExtentDescriptor *extentptr = NULL;
-	HFSPlusAttrRecord attrdata;  /* 90 bytes */
 	void * user_data_ptr = NULL;
-	int started_transaction = 0;
-	int lockflags = 0;
-	int exists;
-	int allocatedblks = 0;
 	int result;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
 
 	if (ap->a_name == NULL || ap->a_name[0] == '\0') {
 		return (EINVAL);  /* invalid name */
@@ -599,6 +680,8 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		if (result != 0)
 			return result;
 	}
+
+	check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_WRITE_OP, NULL);
 #endif /* HFS_COMPRESSION */
 	
 	/* Set the Finder Info. */
@@ -606,7 +689,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		u_int8_t finderinfo[32];
 		struct FndrFileInfo *fip;
 		void * finderinfo_start;
+		u_int8_t *finfo = NULL;
 		u_int16_t fdFlags;
+		u_int32_t dateadded = 0;
 
 		attrsize = sizeof(VTOC(vp)->c_finderinfo);
 
@@ -641,6 +726,12 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 			} 
 		}
 
+		/* Grab the current date added from the cnode */
+		dateadded = hfs_get_dateadded (cp);
+        
+		/* Zero out the date added field to ignore user's attempts to set it */
+		hfs_zero_dateadded(cp, finderinfo);
+
 		if (bcmp(finderinfo_start, emptyfinfo, attrsize)) {
 			/* attr exists and "create" was specified. */
 			if (ap->a_options & XATTR_CREATE) {
@@ -654,12 +745,33 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 				return (ENOATTR);
 			}
 		}
+
+		/* 
+		 * Now restore the date added to the finderinfo to be written out.
+		 * Advance to the 2nd half of the finderinfo to write out the date added
+		 * into the buffer.
+		 *
+		 * Make sure to endian swap the date added back into big endian.  When we used
+		 * hfs_get_dateadded above to retrieve it, it swapped into local endianness
+		 * for us.  But now that we're writing it out, put it back into big endian.
+		 */
+		finfo = &finderinfo[16];
+
+		if (S_ISREG(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+		}
+		else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->date_added = OSSwapHostToBigInt32(dateadded);
+		}
+
 		/* Set the cnode's Finder Info. */
 		if (attrsize == sizeof(cp->c_finderinfo))
 			bcopy(&finderinfo[0], finderinfo_start, attrsize);
 		else
 			bcopy(&finderinfo[8], finderinfo_start, attrsize);
-
+	
 		/* Updating finderInfo updates change time and modified time */
 		cp->c_touch_chgtime = TRUE;
 		cp->c_flag |= C_MODIFIED;
@@ -724,32 +836,29 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		if (result) {
 			return (result);
 		}
-		/* 
-		 * VNOP_WRITE marks the vnode as needing a modtime update.
-		 */
+		/* VNOP_WRITE marks cnode as needing a modtime update */
 		result = VNOP_WRITE(rvp, uio, 0, ap->a_context);
 		
-		/* if open unlinked, force it inactive and recycle */
+		/* if open unlinked, force it inactive */
 		if (openunlinked) {
 			int vref;
 			vref = vnode_ref (rvp);
 			if (vref == 0) {
 				vnode_rele(rvp);
 			}
-			vnode_recycle (rvp);
+			vnode_recycle (rvp);	
 		}
 		else {
-			/* re-lock the cnode so we can update the modtimes */
-			if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
-				vnode_recycle(rvp);
+			/* cnode is not open-unlinked, so re-lock cnode to sync */
+			if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
+				vnode_recycle (rvp);
 				vnode_put(rvp);
-				return (result);
+				return result;
 			}
-
-			/* HFS fsync the resource fork to force it out to disk */
-			result = hfs_fsync (rvp, MNT_NOWAIT, 0, vfs_context_proc(ap->a_context));
-
-			hfs_unlock(cp);
+			
+			/* hfs fsync rsrc fork to force to disk and update modtime */
+			result = hfs_fsync (rvp, MNT_NOWAIT, 0, vfs_context_proc (ap->a_context));
+			hfs_unlock (cp);
 		}
 
 		vnode_put(rvp);
@@ -764,8 +873,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 	attrsize = uio_resid(uio);
 
 	/* Enforce an upper limit. */
-	if (attrsize > HFS_MAXATTRIBUTESIZE) {
-		return (E2BIG);
+	if (attrsize > HFS_XATTR_MAXSIZE) {
+		result = E2BIG;
+		goto exit;
 	}
 
 	/*
@@ -791,23 +901,82 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		goto exit;
 	}
 	cp = VTOC(vp);
+	
+	/* 
+	 * If we're trying to set a non-finderinfo, non-resourcefork EA, then
+	 * call the breakout function.
+	 */
+	result = hfs_setxattr_internal (cp, user_data_ptr, attrsize, ap, VTOHFS(vp), 0);
 
+ exit:
+	if (cp) {
+		hfs_unlock(cp);
+	}
+	if (user_data_ptr) {
+		FREE(user_data_ptr, M_TEMP);
+	}
+
+	return (result == btNotFound ? ENOATTR : MacToVFSError(result));
+}
+
+
+/*
+ * hfs_setxattr_internal
+ * 
+ * Internal function to set non-rsrc, non-finderinfo EAs to either the attribute B-Tree or
+ * extent-based EAs.
+ *
+ * See comments from hfs_getxattr_internal on why we need to pass 'hfsmp' and fileid here.
+ * The gist is that we could end up writing to the root folder which may not have a cnode.
+ *
+ * Assumptions: 
+ *		1. cnode 'cp' is locked EXCLUSIVE before calling this function.
+ *		2. data_ptr contains data to be written.  If gathering data from userland, this must be
+ *			done before calling this function.  
+ *		3. If data originates entirely in-kernel, use a null UIO, and ensure the size is less than 
+ *			hfsmp->hfs_max_inline_attrsize bytes long. 
+ */ 
+int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize,
+						   struct vnop_setxattr_args *ap, struct hfsmount *hfsmp, 
+						   u_int32_t fileid) {
+	uio_t uio = ap->a_uio;
+	struct vnode *vp = ap->a_vp;
+	int started_transaction = 0;
+	struct BTreeIterator * iterator = NULL;
+	struct filefork *btfile = NULL;
+	FSBufferDescriptor btdata;
+	HFSPlusAttrRecord attrdata;  /* 90 bytes */
+	HFSPlusAttrRecord *recp = NULL;
+	HFSPlusExtentDescriptor *extentptr = NULL;
+	int result = 0;
+	int lockflags = 0;
+	int exists = 0;
+	int allocatedblks = 0;
+	u_int32_t target_id;
+
+	if (cp) {
+		target_id = cp->c_fileid;
+	}
+	else {
+		target_id = fileid;
+	}
+	
 	/* Start a transaction for our changes. */
 	if (hfs_start_transaction(hfsmp) != 0) {
 	    result = EINVAL;
 	    goto exit;
 	}
 	started_transaction = 1;
-
+	
 	/*
 	 * Once we started the transaction, nobody can compete
 	 * with us, so make sure this file is still there.
 	 */
-	if (cp->c_flag & C_NOEXISTS) {
+	if ((cp) && (cp->c_flag & C_NOEXISTS)) {
 		result = ENOENT;
 		goto exit;
 	}
-
+	
 	/*
 	 * If there isn't an attributes b-tree then create one.
 	 */
@@ -821,10 +990,10 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 	if (hfsmp->hfs_max_inline_attrsize == 0) {
 		hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp);
 	}
-
+	
 	/* Take exclusive access to the attributes b-tree. */
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-
+	
 	/* Build the b-tree key. */
 	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
 	if (iterator == NULL) {
@@ -832,18 +1001,18 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		goto exit;
 	}
 	bzero(iterator, sizeof(*iterator));
-	result = hfs_buildattrkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+	result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
 	if (result) {
 		goto exit;
 	}
-
+	
 	/* Preflight for replace/create semantics. */
 	btfile = VTOF(hfsmp->hfs_attribute_vp);
 	btdata.bufferAddress = &attrdata;
 	btdata.itemSize = sizeof(attrdata);
 	btdata.itemCount = 1;
 	exists = BTSearchRecord(btfile, iterator, &btdata, NULL, NULL) == 0;
-
+	
 	/* Replace requires that the attribute already exists. */
 	if ((ap->a_options & XATTR_REPLACE) && !exists) {
 		result = ENOATTR;
@@ -854,6 +1023,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		result = EEXIST;
 		goto exit;	
 	}
+	
 	/* If it won't fit inline then use extent-based attributes. */
 	if (attrsize > hfsmp->hfs_max_inline_attrsize) {
 		size_t extentbufsize;
@@ -861,13 +1031,17 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		int extentblks;
 		u_int32_t *keystartblk;
 		int i;
-
-		/* Check if volume supports extent-based attributes */
-		if ((hfsmp->hfs_flags & HFS_XATTR_EXTENTS) == 0) {
-			result = E2BIG;
+		
+		if (uio == NULL) {
+			/*
+			 * setxattrs originating from in-kernel are not supported if they are bigger
+			 * than the inline max size. Just return ENOATTR and force them to do it with a
+			 * smaller EA.
+			 */
+			result = EPERM;
 			goto exit;
-		} 
-
+		}
+		
 		/* Get some blocks. */
 		blkcnt = howmany(attrsize, hfsmp->blockSize);
 		extentbufsize = blkcnt * sizeof(HFSPlusExtentDescriptor);
@@ -886,11 +1060,13 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		/* Copy data into the blocks. */
 		result = write_attr_data(hfsmp, uio, attrsize, extentptr);
 		if (result) {
-			const char *name = vnode_getname(vp);
-			printf("hfs_setxattr: write_attr_data err (%d) %s:%s\n",
-				result,  name ? name : "", ap->a_name);
-			if (name)
-				vnode_putname(name);
+			if (vp) {
+				const char *name = vnode_getname(vp);
+				printf("hfs_setxattr: write_attr_data err (%d) %s:%s\n",
+						result,  name ? name : "", ap->a_name);
+				if (name)
+					vnode_putname(name);
+			}
 			goto exit;
 		}
 
@@ -898,15 +1074,16 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		if (exists) {
 			result = remove_attribute_records(hfsmp, iterator);
 			if (result) {
-				const char *name = vnode_getname(vp);
-				printf("hfs_setxattr: remove_attribute_records err (%d) %s:%s\n",
-					result, name ? name : "", ap->a_name);
-				if (name)
-					vnode_putname(name);
-				goto exit; 
+				if (vp) {
+					const char *name = vnode_getname(vp);
+					printf("hfs_setxattr: remove_attribute_records err (%d) %s:%s\n",
+							result, name ? name : "", ap->a_name);
+					if (name)
+						vnode_putname(name);
+				}
+				goto exit;
 			}
 		}
-
 		/* Create attribute fork data record. */
 		MALLOC(recp, HFSPlusAttrRecord *, sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
 		if (recp == NULL) {
@@ -916,32 +1093,27 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		btdata.bufferAddress = recp;
 		btdata.itemCount = 1;
 		btdata.itemSize = sizeof(HFSPlusAttrForkData);
-
+		
 		recp->recordType = kHFSPlusAttrForkData;
 		recp->forkData.reserved = 0;
 		recp->forkData.theFork.logicalSize = attrsize;
 		recp->forkData.theFork.clumpSize = 0;
 		recp->forkData.theFork.totalBlocks = blkcnt;
 		bcopy(extentptr, recp->forkData.theFork.extents, sizeof(HFSPlusExtentRecord));
-
-		(void) hfs_buildattrkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
-
+		
+		(void) hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+		
 		result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
 		if (result) {
-#if HFS_XATTR_VERBOSE
-			const char *name = vnode_getname(vp);
-			printf("hfs_setxattr: BTInsertRecord err (%d) %s:%s\n",
-			       MacToVFSError(result), name ? name : "", ap->a_name);
-			if (name)
-				vnode_putname(name);
-#endif
+			printf ("hfs_setxattr: BTInsertRecord() - %d,%s err=%d\n", 
+					target_id, ap->a_name, result);
 			goto exit; 
 		}
 		extentblks = count_extent_blocks(blkcnt, recp->forkData.theFork.extents);
 		blkcnt -= extentblks;
 		keystartblk = &((HFSPlusAttrKey *)&iterator->key)->startBlock;
 		i = 0;
-
+		
 		/* Create overflow extents as needed. */
 		while (blkcnt > 0) {
 			/* Initialize the key and record. */
@@ -949,31 +1121,29 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 			btdata.itemSize = sizeof(HFSPlusAttrExtents);
 			recp->recordType = kHFSPlusAttrExtents;
 			recp->overflowExtents.reserved = 0;
-
+			
 			/* Copy the next set of extents. */
 			i += kHFSPlusExtentDensity;
 			bcopy(&extentptr[i], recp->overflowExtents.extents, sizeof(HFSPlusExtentRecord));
-
+			
 			result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
 			if (result) {
-				const char *name = vnode_getname(vp);
-				printf("hfs_setxattr: BTInsertRecord err (%d) %s:%s\n",
-					MacToVFSError(result), name ? name : "", ap->a_name);
-				if (name)
-					vnode_putname(name);
+				printf ("hfs_setxattr: BTInsertRecord() overflow - %d,%s err=%d\n", 
+						target_id, ap->a_name, result);
 				goto exit;
 			}
 			extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents);
 			blkcnt -= extentblks;
 		}
-	} else /* Inline data */ {
+	}
+	else { /* Inline data */ 
 		if (exists) {
 			result = remove_attribute_records(hfsmp, iterator);
 			if (result) {
 				goto exit;
 			}
 		}
-
+		
 		/* Calculate size of record rounded up to multiple of 2 bytes. */
 		btdata.itemSize = sizeof(HFSPlusAttrData) - 2 + attrsize + ((attrsize & 1) ? 1 : 0);
 		MALLOC(recp, HFSPlusAttrRecord *, btdata.itemSize, M_TEMP, M_WAITOK);
@@ -985,24 +1155,36 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap)
 		recp->attrData.reserved[0] = 0;
 		recp->attrData.reserved[1] = 0;
 		recp->attrData.attrSize = attrsize;
-	
+		
 		/* Copy in the attribute data (if any). */
 		if (attrsize > 0) {
-			if (user_data_ptr)
-				bcopy(user_data_ptr, &recp->attrData.attrData, attrsize);
-			else
+			if (data_ptr) {
+				bcopy(data_ptr, &recp->attrData.attrData, attrsize);
+			}
+			else {
+				/* 
+				 * A null UIO meant it originated in-kernel.  If they didn't supply data_ptr 
+				 * then deny the copy operation.
+				 */
+				if (uio == NULL) {
+					result = EPERM;
+					goto exit;
+				}
 				result = uiomove((caddr_t)&recp->attrData.attrData, attrsize, uio);
+			}
+			
 			if (result) {
 				goto exit;
 			}
 		}
-
-		(void) hfs_buildattrkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
-
+		
+		(void) hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key);
+		
 		btdata.bufferAddress = recp;
 		btdata.itemCount = 1;
 		result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize);
 	}
+	
 exit:
 	if (btfile && started_transaction) {
 		(void) BTFlushPath(btfile);
@@ -1011,16 +1193,18 @@ exit:
 		hfs_systemfile_unlock(hfsmp, lockflags);
 	}
 	if (result == 0) {
-		cp = VTOC(vp);
-		/* Setting an attribute only updates change time and not 
-		 * modified time of the file.
-		 */
-		cp->c_touch_chgtime = TRUE;
-		cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
-		if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) {
-			cp->c_attr.ca_recflags |= kHFSHasSecurityMask;
+		if (vp) {
+			cp = VTOC(vp);
+			/* Setting an attribute only updates change time and not 
+			 * modified time of the file.
+			 */
+			cp->c_touch_chgtime = TRUE;
+			cp->c_attr.ca_recflags |= kHFSHasAttributesMask;
+			if ((bcmp(ap->a_name, KAUTH_FILESEC_XATTR, sizeof(KAUTH_FILESEC_XATTR)) == 0)) {
+				cp->c_attr.ca_recflags |= kHFSHasSecurityMask;
+			}
+			(void) hfs_update(vp, 0);
 		}
-		(void) hfs_update(vp, 0);
 	}
 	if (started_transaction) {
 		if (result && allocatedblks) {
@@ -1028,12 +1212,7 @@ exit:
 		}
 		hfs_end_transaction(hfsmp);
 	}
-	if (cp) {
-		hfs_unlock(cp);
-	}
-	if (user_data_ptr) {
-		FREE(user_data_ptr, M_TEMP);
-	}
+	
 	if (recp) {
 		FREE(recp, M_TEMP);
 	}
@@ -1043,13 +1222,16 @@ exit:
 	if (iterator) {
 		FREE(iterator, M_TEMP);
 	}
-	return (result == btNotFound ? ENOATTR : MacToVFSError(result));
+	
+	return result;	
 }
 
+
+
+
 /*
  * Remove an extended attribute.
  */
-__private_extern__
 int
 hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 /*
@@ -1068,6 +1250,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 	struct BTreeIterator * iterator = NULL;
 	int lockflags;
 	int result;
+	time_t orig_ctime=VTOC(vp)->c_ctime;
 
 	if (ap->a_name == NULL || ap->a_name[0] == '\0') {
 		return (EINVAL);  /* invalid name */
@@ -1078,8 +1261,11 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 	}
 
 #if HFS_COMPRESSION
-	if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) && !(ap->a_options & XATTR_SHOWCOMPRESSION))
+	if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) && !(ap->a_options & XATTR_SHOWCOMPRESSION)) {
 		return ENOATTR;
+	}
+
+	check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_DELETE_OP, NULL);
 #endif /* HFS_COMPRESSION */
 	
 	/* If Resource Fork is non-empty then truncate it. */
@@ -1102,9 +1288,9 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 			return (result);
 		}
 
-		hfs_lock_truncate(VTOC(rvp), TRUE);
+		hfs_lock_truncate(VTOC(rvp), HFS_EXCLUSIVE_LOCK);
 		if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK))) {
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 			vnode_put(rvp);
 			return (result);
 		}
@@ -1113,7 +1299,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 		 * hfs_truncate() and hfs_update()
 		 */
 		if ((result = hfs_start_transaction(hfsmp))) {
-			hfs_unlock_truncate(cp, TRUE);
+			hfs_unlock_truncate(cp, 0);
 			hfs_unlock(cp);
 			vnode_put(rvp);
 			return (result);
@@ -1127,7 +1313,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 		}
 
 		hfs_end_transaction(hfsmp);
-		hfs_unlock_truncate(VTOC(rvp), TRUE);
+		hfs_unlock_truncate(VTOC(rvp), 0);
 		hfs_unlock(VTOC(rvp));
 
 		vnode_put(rvp);
@@ -1137,34 +1323,80 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap)
 	if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) {
 		void * finderinfo_start;
 		int finderinfo_size;
-
+		u_int8_t finderinfo[32];
+		u_int32_t date_added;
+		u_int8_t *finfo = NULL;
+        
 		if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
 			return (result);
 		}
-
-		/* Symlink's don't have an external type/creator. */
+		
+		/* Use the local copy to store our temporary changes. */
+		bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
+		
+		
+		/* Zero out the date added field in the local copy */
+		hfs_zero_dateadded (cp, finderinfo);
+		
+		/* Don't expose a symlink's private type/creator. */
 		if (vnode_islnk(vp)) {
-			/* Skip over type/creator fields. */
+			struct FndrFileInfo *fip;
+			
+			fip = (struct FndrFileInfo *)&finderinfo;
+			fip->fdType = 0;
+			fip->fdCreator = 0;
+		}
+		
+		/* Do the byte compare against the local copy */
+		if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) {
+			hfs_unlock (cp);
+			return (ENOATTR);
+		}
+		
+		/* 
+		 * If there was other content, zero out everything except 
+		 * type/creator and date added.  First, save the date added.
+		 */
+		finfo = cp->c_finderinfo;
+		finfo = finfo + 16;
+		if (S_ISREG(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			date_added = extinfo->date_added;
+		}
+		else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			date_added = extinfo->date_added;
+		}
+		
+		if (vnode_islnk(vp)) {
+			/* Ignore type/creator */
 			finderinfo_start = &cp->c_finderinfo[8];
 			finderinfo_size = sizeof(cp->c_finderinfo) - 8;
-		} else {
+		}
+		else {
 			finderinfo_start = &cp->c_finderinfo[0];
 			finderinfo_size = sizeof(cp->c_finderinfo);
 		}
-		if (bcmp(finderinfo_start, emptyfinfo, finderinfo_size) == 0) {
-			hfs_unlock(cp);
-			return (ENOATTR);
-		}
-
 		bzero(finderinfo_start, finderinfo_size);
-
+		
+		
+		/* Now restore the date added */
+		if (S_ISREG(cp->c_attr.ca_mode)) {
+			struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
+			extinfo->date_added = date_added;
+		}
+		else if (S_ISDIR(cp->c_attr.ca_mode)) {
+			struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
+			extinfo->date_added = date_added;
+		}
+        
 		/* Updating finderInfo updates change time and modified time */
 		cp->c_touch_chgtime = TRUE;
 		cp->c_flag |= C_MODIFIED;
 		hfs_update(vp, FALSE);
-
+        
 		hfs_unlock(cp);
-
+        
 		return (0);
 	}
 	/*
@@ -1305,7 +1537,7 @@ out:
  * - The Allocation Bitmap file must be locked exclusive.
  * - The iterator key must be initialized.
  */
-static int
+int
 remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator)
 {
 	struct filefork *btfile;
@@ -1334,11 +1566,9 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator)
 		int extentblks;
 		u_int32_t *keystartblk;
 
-#if HFS_XATTR_VERBOSE
 		if (datasize < sizeof(HFSPlusAttrForkData)) {
-			printf("hfs: remove_attribute_records: bad record size %d (expecting %d)\n", datasize, sizeof(HFSPlusAttrForkData));
+			printf("hfs: remove_attribute_records: bad record size %d (expecting %lu)\n", datasize, sizeof(HFSPlusAttrForkData));
 		}
-#endif
 		totalblks = attrdata.forkData.theFork.totalBlocks;
 
 		/* Process the first 8 extents. */
@@ -1385,7 +1615,6 @@ exit:
 /*
  * Retrieve the list of extended attribute names.
  */
-__private_extern__
 int
 hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
 /*
@@ -1405,12 +1634,11 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
 	struct BTreeIterator * iterator = NULL;
 	struct filefork *btfile;
 	struct listattr_callback_state state;
-	void * finderinfo_start;
-	int finderinfo_size;
 	user_addr_t user_start = 0;
 	user_size_t user_len = 0;
 	int lockflags;
 	int result;
+    u_int8_t finderinfo[32];
 
 	if (VNODE_IS_RSRC(vp)) {
 		return (EPERM);
@@ -1427,17 +1655,26 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap)
 		return (result);
 	}
 
+	/* 
+	 * Make a copy of the cnode's finderinfo to a local so we can
+	 * zero out the date added field.  Also zero out the private type/creator
+	 * for symlinks.
+	 */
+	bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo));
+	hfs_zero_dateadded (cp, finderinfo);
+	
 	/* Don't expose a symlink's private type/creator. */
 	if (vnode_islnk(vp)) {
-		/* Skip over type/creator fields. */
-		finderinfo_start = &cp->c_finderinfo[8];
-		finderinfo_size = sizeof(cp->c_finderinfo) - 8;
-	} else {
-		finderinfo_start = &cp->c_finderinfo[0];
-		finderinfo_size = sizeof(cp->c_finderinfo);
-	}
+		struct FndrFileInfo *fip;
+		
+		fip = (struct FndrFileInfo *)&finderinfo;
+		fip->fdType = 0;
+		fip->fdCreator = 0;
+	}	
+	
+    
 	/* If Finder Info is non-empty then export it's name. */
-	if (bcmp(finderinfo_start, emptyfinfo, finderinfo_size) != 0) {
+	if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) != 0) {
 		if (uio == NULL) {
 			*ap->a_size += sizeof(XATTR_FINDERINFO_NAME);
 		} else if ((user_size_t)uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) {
@@ -1546,11 +1783,9 @@ exit:
 	if (user_start) {
 		vsunlock(user_start, user_len, TRUE);
 	}
-	
 	if (iterator) {
 		FREE(iterator, M_TEMP);
 	}
-
 	hfs_unlock(cp);
 	
 	return MacToVFSError(result);
@@ -1558,7 +1793,7 @@ exit:
 
 
 /*
- * Callback - called for each attribute
+ * Callback - called for each attribute record
  */
 static int
 listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *data, struct listattr_callback_state *state)
@@ -1621,7 +1856,6 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat
  * This function takes the necessary locks on the attribute
  * b-tree file and the allocation (bitmap) file.
  */
-__private_extern__
 int
 hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid)
 {
@@ -1699,10 +1933,8 @@ hfs_xattr_init(struct hfsmount * hfsmp)
 /*
  * Enable/Disable volume attributes stored as EA for root file system.
  * Supported attributes are - 
- *	1. ACLs
- *	2. Extent-based Extended Attributes 
+ *	1. Extent-based Extended Attributes 
  */
-__private_extern__
 int
 hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state)
 {
@@ -1714,6 +1946,9 @@ hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state)
 	if (hfsmp->hfs_flags & HFS_STANDARD) {
 		return (ENOTSUP);
 	}
+	if (xattrtype != HFS_SET_XATTREXTENTS_STATE) {
+		return EINVAL;
+	}
 
 	/*
 	 * If there isn't an attributes b-tree then create one.
@@ -1736,18 +1971,8 @@ hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state)
 	 * Build a b-tree key.
 	 * We use the root's parent id (1) to hold this volume attribute.
 	 */
-	if (xattrtype == HFS_SETACLSTATE) {
-		/* ACL */
-		(void) hfs_buildattrkey(kHFSRootParentID, XATTR_EXTENDEDSECURITY_NAME,
-		                      (HFSPlusAttrKey *)&iterator->key);
-	} else if (xattrtype == HFS_SET_XATTREXTENTS_STATE) {
-		/* Extent-based extended attributes */
-		(void) hfs_buildattrkey(kHFSRootParentID, XATTR_XATTREXTENTS_NAME,
-		                      (HFSPlusAttrKey *)&iterator->key);
-	} else {
-		result = EINVAL;
-		goto exit;
-	}
+	(void) hfs_buildattrkey(kHFSRootParentID, XATTR_XATTREXTENTS_NAME,
+			      (HFSPlusAttrKey *)&iterator->key);
 
 	/* Start a transaction for our changes. */
 	if (hfs_start_transaction(hfsmp) != 0) {
@@ -1790,91 +2015,21 @@ hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state)
 
 	/* Finish the transaction of our changes. */
 	hfs_end_transaction(hfsmp);
-exit:
-	if (iterator) {
-		FREE(iterator, M_TEMP);
-	}
-	if (result == 0) {
-		if (xattrtype == HFS_SETACLSTATE) {
-			if (state == 0) {
-				vfs_clearextendedsecurity(HFSTOVFS(hfsmp));
-			} else {
-				vfs_setextendedsecurity(HFSTOVFS(hfsmp));
-			}
-		} else { 
-			/* HFS_SET_XATTREXTENTS_STATE */
-			HFS_MOUNT_LOCK(hfsmp, TRUE);
-			if (state == 0) {
-				hfsmp->hfs_flags &= ~HFS_XATTR_EXTENTS; 
-			} else {
-				hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; 
-			}
-			HFS_MOUNT_UNLOCK(hfsmp, TRUE); 
-		}
-	}
-
-	return MacToVFSError(result);
-}
-
 
- /*
- * Check for volume attributes stored as EA for root file system.
- * Supported attributes are - 
- *	1. ACLs
- *	2. Extent-based Extended Attributes 
- */
-__private_extern__
-void
-hfs_check_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype)
-{
-	struct BTreeIterator * iterator;
-	struct filefork *btfile;
-	int lockflags;
-	int result;
-
-	if (hfsmp->hfs_flags & HFS_STANDARD ||
-	    hfsmp->hfs_attribute_vp == NULL) {
-		return;
-	}
-
-	MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
-	if (iterator == NULL) {
-		return;
-	}
-	bzero(iterator, sizeof(*iterator));
-
-	/*
-	 * Build a b-tree key.
-	 * We use the root's parent id (1) to hold this volume attribute.
-	 */
-	if (xattrtype == HFS_SETACLSTATE) {
-		/* ACLs */
-		(void) hfs_buildattrkey(kHFSRootParentID, XATTR_EXTENDEDSECURITY_NAME,
-	        	              (HFSPlusAttrKey *)&iterator->key);
+	/* Update the state in the mount point */
+	HFS_MOUNT_LOCK(hfsmp, TRUE);
+	if (state == 0) {
+		hfsmp->hfs_flags &= ~HFS_XATTR_EXTENTS; 
 	} else {
-		/* Extent-based extended attributes */
-		(void) hfs_buildattrkey(kHFSRootParentID, XATTR_XATTREXTENTS_NAME,
-	        	              (HFSPlusAttrKey *)&iterator->key);
+		hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; 
 	}
-	btfile = VTOF(hfsmp->hfs_attribute_vp);
-
-	lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
-
-	/* Check for our attribute. */
-	result = BTSearchRecord(btfile, iterator, NULL, NULL, NULL);
+	HFS_MOUNT_UNLOCK(hfsmp, TRUE); 
 
-	hfs_systemfile_unlock(hfsmp, lockflags);
-	FREE(iterator, M_TEMP);
-
-	if (result == 0) {
-		if (xattrtype == HFS_SETACLSTATE) {
-			vfs_setextendedsecurity(HFSTOVFS(hfsmp));
-		} else {
-			HFS_MOUNT_LOCK(hfsmp, TRUE);
-			hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; 
-			HFS_MOUNT_UNLOCK(hfsmp, TRUE); 
-		}
+exit:
+	if (iterator) {
+		FREE(iterator, M_TEMP);
 	}
+	return MacToVFSError(result);
 }
 
 
@@ -2036,76 +2191,70 @@ getmaxinlineattrsize(struct vnode * attrvp)
 }
 
 /*
- * Get a referenced vnode for attribute data I/O.
+ * Initialize vnode for attribute data I/O.  
+ * 
+ * On success, 
+ * 	- returns zero
+ * 	- the attrdata vnode is initialized as hfsmp->hfs_attrdata_vp
+ * 	- an iocount is taken on the attrdata vnode which exists 
+ * 	  for the entire duration of the mount.  It is only dropped 
+ * 	  during unmount
+ * 	- the attrdata cnode is not locked
+ *
+ * On failure, 
+ * 	- returns non-zero value
+ * 	- the caller does not have to worry about any locks or references
  */
-static int
-get_attr_data_vnode(struct hfsmount *hfsmp, vnode_t *vpp)
+int init_attrdata_vnode(struct hfsmount *hfsmp)
 {
 	vnode_t vp;
 	int result = 0;
+	struct cat_desc cat_desc;
+	struct cat_attr cat_attr;
+	struct cat_fork cat_fork;
+	int newvnode_flags = 0;
+
+	bzero(&cat_desc, sizeof(cat_desc));
+	cat_desc.cd_parentcnid = kHFSRootParentID;
+	cat_desc.cd_nameptr = (const u_int8_t *)hfs_attrdatafilename;
+	cat_desc.cd_namelen = strlen(hfs_attrdatafilename);
+	cat_desc.cd_cnid = kHFSAttributeDataFileID;
+	/* Tag vnode as system file, note that we can still use cluster I/O */
+	cat_desc.cd_flags |= CD_ISMETA; 
+
+	bzero(&cat_attr, sizeof(cat_attr));
+	cat_attr.ca_linkcount = 1;
+	cat_attr.ca_mode = S_IFREG;
+	cat_attr.ca_fileid = cat_desc.cd_cnid;
+	cat_attr.ca_blocks = hfsmp->totalBlocks;
 
-	vp = hfsmp->hfs_attrdata_vp;
-	if (vp == NULLVP) {
-		struct cat_desc cat_desc;
-		struct cat_attr cat_attr;
-		struct cat_fork cat_fork;
-
-		/* We don't tag it as a system file since we intend to use cluster I/O. */
-		bzero(&cat_desc, sizeof(cat_desc));
-		cat_desc.cd_parentcnid = kHFSRootParentID;
-		cat_desc.cd_nameptr = (const u_int8_t *)hfs_attrdatafilename;
-		cat_desc.cd_namelen = strlen(hfs_attrdatafilename);
-		cat_desc.cd_cnid = kHFSAttributeDataFileID;
-
-		bzero(&cat_attr, sizeof(cat_attr));
-		cat_attr.ca_linkcount = 1;
-		cat_attr.ca_mode = S_IFREG;
-		cat_attr.ca_fileid = cat_desc.cd_cnid;
-		cat_attr.ca_blocks = hfsmp->totalBlocks;
-
-		/*
-		 * The attribute data file is a virtual file that spans the
-		 * entire file system space.
-		 *
-		 * Each extent-based attribute occupies a unique portion of
-		 * in this virtual file.  The cluster I/O is done using actual
-		 * allocation block offsets so no additional mapping is needed
-		 * for the VNOP_BLOCKMAP call.
-		 *
-		 * This approach allows the attribute data to be cached without
-		 * incurring the high cost of using a separate vnode per attribute.
-		 *
-		 * Since we need to acquire the attribute b-tree file lock anyways,
-		 * the virtual file doesn't introduce any additional serialization.
-		 */
-		bzero(&cat_fork, sizeof(cat_fork));
-		cat_fork.cf_size = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
-		cat_fork.cf_blocks = hfsmp->totalBlocks;
-		cat_fork.cf_extents[0].startBlock = 0;
-		cat_fork.cf_extents[0].blockCount = cat_fork.cf_blocks;
-	
-		result = hfs_getnewvnode(hfsmp, NULL, NULL, &cat_desc, 0, &cat_attr, &cat_fork, &vp);
-		if (result == 0) {
-			HFS_MOUNT_LOCK(hfsmp, 1);
-			/* Check if someone raced us for creating this vnode. */
-			if (hfsmp->hfs_attrdata_vp != NULLVP) {
-				HFS_MOUNT_UNLOCK(hfsmp, 1);
-				vnode_put(vp);
-				vnode_recycle(vp);
-				vp = hfsmp->hfs_attrdata_vp;
-			} else {
-				hfsmp->hfs_attrdata_vp = vp;
-				HFS_MOUNT_UNLOCK(hfsmp, 1);
-				/* Keep a reference on this vnode until unmount */
-				vnode_ref_ext(vp, O_EVTONLY);
-				hfs_unlock(VTOC(vp));
-			}
-		}
-	} else {
-		if ((result = vnode_get(vp)))
-			vp = NULLVP;
+	/*
+	 * The attribute data file is a virtual file that spans the
+	 * entire file system space.
+	 *
+	 * Each extent-based attribute occupies a unique portion of
+	 * in this virtual file.  The cluster I/O is done using actual
+	 * allocation block offsets so no additional mapping is needed
+	 * for the VNOP_BLOCKMAP call.
+	 *
+	 * This approach allows the attribute data to be cached without
+	 * incurring the high cost of using a separate vnode per attribute.
+	 *
+	 * Since we need to acquire the attribute b-tree file lock anyways,
+	 * the virtual file doesn't introduce any additional serialization.
+	 */
+	bzero(&cat_fork, sizeof(cat_fork));
+	cat_fork.cf_size = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
+	cat_fork.cf_blocks = hfsmp->totalBlocks;
+	cat_fork.cf_extents[0].startBlock = 0;
+	cat_fork.cf_extents[0].blockCount = cat_fork.cf_blocks;
+
+	result = hfs_getnewvnode(hfsmp, NULL, NULL, &cat_desc, 0, &cat_attr, 
+				 &cat_fork, &vp, &newvnode_flags);
+	if (result == 0) {
+		hfsmp->hfs_attrdata_vp = vp;
+		hfs_unlock(VTOC(vp));
 	}
-	*vpp = vp;
 	return (result);
 }
 
@@ -2115,7 +2264,7 @@ get_attr_data_vnode(struct hfsmount *hfsmp, vnode_t *vpp)
 static int
 read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
 {
-	vnode_t evp = NULLVP;
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
 	int bufsize;
 	int iosize;
 	int attrsize;
@@ -2123,10 +2272,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 	int i;
 	int result = 0;
 
-	if ((result = get_attr_data_vnode(hfsmp, &evp))) {
-		return (result);
-	}
-	hfs_lock_truncate(VTOC(evp), 0);
+	hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK);
 
 	bufsize = (int)uio_resid(uio);
 	attrsize = (int)datasize;
@@ -2158,7 +2304,6 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 	uio_setoffset(uio, datasize);
 
 	hfs_unlock_truncate(VTOC(evp), 0);
-	vnode_put(evp);
 	return (result);
 }
 
@@ -2168,7 +2313,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent
 static int
 write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtentDescriptor *extents)
 {
-	vnode_t evp = NULLVP;
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
 	off_t filesize;
 	int bufsize;
 	int attrsize;
@@ -2177,11 +2322,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 	int i;
 	int result = 0;
 
-	/* Get exclusive use of attribute data vnode. */
-	if ((result = get_attr_data_vnode(hfsmp, &evp))) {
-		return (result);
-	}
-	hfs_lock_truncate(VTOC(evp), 0);
+	hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK);
 
 	bufsize = uio_resid(uio);
 	attrsize = (int) datasize;
@@ -2213,7 +2354,6 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten
 	uio_setoffset(uio, datasize);
 
 	hfs_unlock_truncate(VTOC(evp), 0);
-	vnode_put(evp);
 	return (result);
 }
 
@@ -2264,7 +2404,7 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H
 #if HFS_XATTR_VERBOSE
 		printf("hfs: alloc_attr_blks: unexpected failure, %d blocks unallocated\n", blkcnt);
 #endif
-		for (; i <= 0; i--) {
+		for (; i >= 0; i--) {
 			if ((blkcnt = extents[i].blockCount) != 0) {
 				(void) BlockDeallocate(hfsmp, extents[i].startBlock, blkcnt, 0);
 				extents[i].startBlock = 0;
@@ -2283,14 +2423,11 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H
 static void
 free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *extents)
 {
-	vnode_t evp = NULLVP;
+	vnode_t evp = hfsmp->hfs_attrdata_vp;
 	int remblks = blkcnt;
 	int lockflags;
 	int i;
 
-	if (get_attr_data_vnode(hfsmp, &evp) != 0) {
-		evp = NULLVP;
-	}
 	lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 
 	for (i = 0; (remblks > 0) && (extents[i].blockCount != 0); i++) {
@@ -2325,9 +2462,6 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte
 	}
 
 	hfs_systemfile_unlock(hfsmp, lockflags);
-	if (evp) {
-		vnode_put(evp);
-	}
 }
 
 static int
diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c
index 73a521c8f..be52f5900 100644
--- a/bsd/hfs/hfscommon/BTree/BTree.c
+++ b/bsd/hfs/hfscommon/BTree/BTree.c
@@ -1583,6 +1583,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator,
 	u_int16_t				index;
 	Boolean					validHint;
 
+
 	////////////////////////// Priliminary Checks ///////////////////////////////
 
 	nodeRec.buffer = nil;					// so we can call ReleaseNode
@@ -1666,9 +1667,9 @@ Success:
 	////////////////////////////// Error Exit ///////////////////////////////////
 
 ErrorExit:
-	
+
 	(void) ReleaseNode (btreePtr, &nodeRec);
-	
+
 	iterator->hint.writeCount 	= 0;
 	iterator->hint.nodeNum		= 0;
 	iterator->hint.index		= 0;
@@ -1996,7 +1997,6 @@ OSStatus	BTSetLastSync		(FCB					*filePtr,
 	return noErr;
 }
 
-
 __private_extern__
 OSStatus	BTHasContiguousNodes	(FCB	 				*filePtr)
 {
@@ -2021,7 +2021,6 @@ Routine:	BTGetUserData
 Function:	Read the user data area of the b-tree header node.
 
 -------------------------------------------------------------------------------*/
-__private_extern__
 OSStatus
 BTGetUserData(FCB *filePtr, void * dataPtr, int dataSize)
 {
@@ -2059,7 +2058,6 @@ Routine:	BTSetUserData
 
 Function:	Write the user data area of the b-tree header node.
 -------------------------------------------------------------------------------*/
-__private_extern__
 OSStatus
 BTSetUserData(FCB *filePtr, void * dataPtr, int dataSize)
 {
diff --git a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c
index 99d586408..fe2f91714 100644
--- a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c
+++ b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c
@@ -621,7 +621,6 @@ Routine:	BTZeroUnusedNodes
 
 Function:	Write zeros to all nodes in the B-tree that are not currently in use.
 -------------------------------------------------------------------------------*/
-__private_extern__
 int
 BTZeroUnusedNodes(FCB *filePtr)
 {
@@ -695,31 +694,39 @@ BTZeroUnusedNodes(FCB *filePtr)
 						err = EIO;
 						goto ErrorExit;
 					}
-					
+
 					if (buf_flags(bp) & B_LOCKED) {
 						/* 
-						 * This node is already part of a transaction and will be
-						 * written when the transaction is committed so don't write it here.
-						 * If we did, then we'd hit a panic in hfs_vnop_bwrite since
-						 * B_LOCKED is still set
+						 * This node is already part of a transaction and will be written when
+						 * the transaction is committed, so don't write it here.  If we did, then
+						 * we'd hit a panic in hfs_vnop_bwrite because the B_LOCKED bit is still set.
 						 */
 						buf_brelse(bp);
 						continue;
 					}
-
 					
 					buf_clear(bp);
 					buf_markaged(bp);
 					
 					/*
 					 * Try not to hog the buffer cache.  Wait for the write
-					 * every 32 nodes.
+					 * every 32 nodes.   If VNOP_BWRITE reports an error, bail out and bubble
+					 * it up to the function calling us.  If we tried to update a read-only 
+					 * mount on read-only media, for example, catching the error will let 
+					 * us alert the callers of this function that they should maintain 
+					 * the mount in read-only mode.
+
 					 */
 					++numWritten;
-					if (numWritten % 32 == 0)
-						VNOP_BWRITE(bp);
-					else
+					if (numWritten % 32 == 0) {
+						err = VNOP_BWRITE(bp);
+						if (err) {
+							goto ErrorExit;
+						}
+					}
+					else {
 						buf_bawrite(bp);
+					}
 				}
 			}
 			
diff --git a/bsd/hfs/hfscommon/BTree/BTreeScanner.c b/bsd/hfs/hfscommon/BTree/BTreeScanner.c
index 1ce08e385..ea549278d 100644
--- a/bsd/hfs/hfscommon/BTree/BTreeScanner.c
+++ b/bsd/hfs/hfscommon/BTree/BTreeScanner.c
@@ -272,7 +272,7 @@ static int ReadMultipleNodes( BTScanState *theScanStatePtr )
 	}
 	
 	// now read blocks from the device 
-	myErr = (int)buf_bread(myDevPtr, 
+	myErr = (int)buf_meta_bread(myDevPtr, 
 	                       myPhyBlockNum, 
 	                       myBufferSize,  
 	                       NOCRED, 
diff --git a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c
index dbbc33b58..fee50fe6d 100644
--- a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c
+++ b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c
@@ -42,12 +42,63 @@ typedef struct ExtentsRecBuffer ExtentsRecBuffer;
 
 
 static u_int32_t CheckExtents( void *extents, u_int32_t blocks, Boolean isHFSPlus );
-static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileNumber, Boolean isHFSPlus );
-static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, Boolean isHFSPlus );
+static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileNumber, int quitEarly, u_int8_t forkType, Boolean isHFSPlus );
+static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, int quitEarly, u_int8_t forkType, Boolean isHFSPlus );
 static void  CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest );
 static void  CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest );
 static void  CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, u_int16_t bufferCount );
 
+/* 
+ * This function moves the overflow extents associated with srcID into the file associated with dstID.
+ * We should have already verified that 'srcID' has overflow extents. So now we move all of the overflow
+ * extent records.
+ */
+OSErr MoveData( ExtendedVCB *vcb, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, int rsrc) { 
+	
+	OSErr		err;
+	
+	/* 
+	 * Only the source file should have extents, so we just track those.
+	 * We operate on the fork represented by the open FD that was used to call into this
+	 * function
+	 */
+	if (rsrc) {		
+		/* Copy the extent overflow blocks. */
+		err = MoveExtents( vcb, srcID, destID, 1, (u_int8_t)0xff, 1);
+		if ( err != noErr ) {
+			if ( err != dskFulErr ) {
+				return( err );
+			}
+			/* 
+			 * In case of error, we would have probably run into problems
+			 * growing the extents b-tree.  Since the move is actually a copy + delete
+			 * just delete the new entries. Same for below.
+			 */
+			err = DeleteExtents( vcb, destID, 1, (u_int8_t)0xff, 1); 
+			ReturnIfError( err ); //	we are doomed. Just QUIT!
+			goto FlushAndReturn;
+		}
+	}
+	else {		
+		/* Copy the extent overflow blocks. */
+		err = MoveExtents( vcb, srcID, destID, 1, 0, 1);
+		if ( err != noErr ) {
+			if ( err != dskFulErr ) {
+				return( err );
+			}
+			err = DeleteExtents( vcb, destID, 1, 0, 1); 
+			ReturnIfError( err ); //	we are doomed. Just QUIT!
+			goto FlushAndReturn;
+		}
+	}
+	
+FlushAndReturn:
+	/* Write out the catalog and extent overflow B-Tree changes */
+	err = FlushCatalog( vcb );
+	err = FlushExtentFile( vcb );
+	
+	return( err );
+}
 
 
 OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param destName, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, u_int32_t srcHint, u_int32_t destHint )
@@ -61,13 +112,13 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
 	int16_t		numDestExtentBlocks;
 	OSErr		err;
 	Boolean		isHFSPlus = ( vcb->vcbSigWord == kHFSPlusSigWord );
-
+	
 	err = BuildCatalogKeyUTF8(vcb, srcID, srcName, kUndefinedStrLen, &srcKey, NULL);
 	ReturnIfError(err);
-
+	
 	err = BuildCatalogKeyUTF8(vcb, destID, destName, kUndefinedStrLen, &destKey, NULL);
 	ReturnIfError(err);
-
+	
 	if ( isHFSPlus )
 	{
 		//--	Step 1: Check the catalog nodes for extents
@@ -75,37 +126,37 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
 		//--	locate the source file, test for extents in extent file, and copy the cat record for later
 		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
 		ReturnIfError( err );
-	
+		
 		if ( srcData.recordType != kHFSPlusFileRecord )
 			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
-			
+		
 		//--	Check if there are any extents in the source file
 		//€€	I am only checling the extents in the low 32 bits, routine will fail if files extents after 2 gig are in overflow
 		numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.dataFork.extents, srcData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
 		if ( numSrcExtentBlocks == 0 )					//	then check the resource fork extents
 			numSrcExtentBlocks = CheckExtents( srcData.hfsPlusFile.resourceFork.extents, srcData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
-
+		
 		//--	Check if there are any extents in the destination file
 		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
 		ReturnIfError( err );
-	
+		
 		if ( destData.recordType != kHFSPlusFileRecord )
 			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
-
+		
 		numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.dataFork.extents, destData.hfsPlusFile.dataFork.totalBlocks, isHFSPlus );
 		if ( numDestExtentBlocks == 0 )					//	then check the resource fork extents
 			numDestExtentBlocks = CheckExtents( destData.hfsPlusFile.resourceFork.extents, destData.hfsPlusFile.resourceFork.totalBlocks, isHFSPlus );
-
+		
 		//--	Step 2: Exchange the Extent key in the extent file
 		
 		//--	Exchange the extents key in the extent file
-		err = DeleteExtents( vcb, kHFSBogusExtentFileID, isHFSPlus );
+		err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
 		ReturnIfError( err );
 		
 		if ( numSrcExtentBlocks && numDestExtentBlocks )	//	if both files have extents
 		{
 			//--	Change the source extents file ids to our known bogus value
-			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, isHFSPlus );
+			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
@@ -115,67 +166,67 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param
 			}
 			
 			//--	Change the destination extents file id's to the source id's
-			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, isHFSPlus );
+				
+			ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
-                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsPlusFile.fileID, isHFSPlus );	//	Move the extents back
+				
+                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-					
+				
 				goto ExUndo1a;
 			}
 			
 			//--	Change the bogus extents file id's to the dest id's
-            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsPlusFile.fileID, isHFSPlus );
+            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
-				err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, isHFSPlus );	//	Move the extents back
+				
+				err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-					
+				
 				goto ExUndo2aPlus;
 			}
 			
 		}
 		else if ( numSrcExtentBlocks )	//	just the source file has extents
 		{
-			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
+				
 				goto FlushAndReturn;
 			}
 		}
 		else if ( numDestExtentBlocks )	//	just the destination file has extents
 		{
-			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, destData.hfsPlusFile.fileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, destData.hfsPlusFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
+				
 				goto FlushAndReturn;
 			}
 		}
-
+		
 		//--	Step 3: Change the data in the catalog nodes
 		
 		//--	find the source cnode and put dest info in it
@@ -188,12 +239,12 @@ ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, isHFSPlus );
 		
 		err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSPlusCatalogFile), &srcHint );
 		ReturnIfError( err );
-
+		
 		//	find the destination cnode and put source info in it		
 		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
 		if ( err != noErr )
 			return( cmBadNews );
-			
+		
 		CopyBigCatalogNodeInfo( &swapData, &destData );
 		err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSPlusCatalogFile), &destHint );
 		ReturnIfError( err );
@@ -205,10 +256,10 @@ ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, isHFSPlus );
 		//--	locate the source file, test for extents in extent file, and copy the cat record for later
 		err = LocateCatalogNodeByKey( vcb, srcHint, &srcKey, &srcData, &srcHint );
 		ReturnIfError( err );
-	
+		
 		if ( srcData.recordType != kHFSFileRecord )
 			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
-			
+		
 		//--	Check if there are any extents in the source file
 		numSrcExtentBlocks = CheckExtents( srcData.hfsFile.dataExtents, srcData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
 		if ( numSrcExtentBlocks == 0 )					//	then check the resource fork extents
@@ -217,106 +268,106 @@ ExUndo2aPlus:	err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, isHFSPlus );
 		
 		//€€	Do we save the found source node for later use?
 		
-				
+		
 		//--	Check if there are any extents in the destination file
 		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
 		ReturnIfError( err );
-	
+		
 		if ( destData.recordType != kHFSFileRecord )
 			return( cmFThdDirErr );					//	Error "cmFThdDirErr = it is a directory"
-
+		
 		numDestExtentBlocks = CheckExtents( destData.hfsFile.dataExtents, destData.hfsFile.dataPhysicalSize / vcb->blockSize, isHFSPlus );
 		if ( numDestExtentBlocks == 0 )					//	then check the resource fork extents
 			numDestExtentBlocks = CheckExtents( destData.hfsFile.rsrcExtents, destData.hfsFile.rsrcPhysicalSize / vcb->blockSize, isHFSPlus );
-			
+		
 		//€€	Do we save the found destination node for later use?
-
-
+		
+		
 		//--	Step 2: Exchange the Extent key in the extent file
 		
 		//--	Exchange the extents key in the extent file
-        err = DeleteExtents( vcb, kHFSBogusExtentFileID, isHFSPlus );
+        err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
 		ReturnIfError( err );
 		
 		if ( numSrcExtentBlocks && numDestExtentBlocks )	//	if both files have extents
 		{
 			//--	Change the source extents file ids to our known bogus value
-        err = MoveExtents( vcb, srcData.hfsFile.fileID, kHFSBogusExtentFileID, isHFSPlus );
+			err = MoveExtents( vcb, srcData.hfsFile.fileID, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-ExUndo1a:		err = DeleteExtents( vcb, kHFSBogusExtentFileID, isHFSPlus );
+				
+			ExUndo1a:		err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
+				
 				err = FlushCatalog( vcb );   			//	flush the catalog
 				err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)			
 				return( dskFulErr );
 			}
 			
 			//--	Change the destination extents file id's to the source id's
-			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-ExUndo2a:		err = DeleteExtents( vcb, srcData.hfsFile.fileID, isHFSPlus );
+				
+			ExUndo2a:		err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
-                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsFile.fileID, isHFSPlus );	//	Move the extents back
+				
+                err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-					
+				
 				goto ExUndo1a;
 			}
 			
 			//--	Change the bogus extents file id's to the dest id's
-            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsFile.fileID, isHFSPlus );
+            err = MoveExtents( vcb, kHFSBogusExtentFileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, destData.hfsFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
-				err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, isHFSPlus );	//	Move the extents back
+				
+				err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );	//	Move the extents back
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-					
+				
 				goto ExUndo2a;
 			}
 			
 		}
 		else if ( numSrcExtentBlocks )	//	just the source file has extents
 		{
-			err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, srcData.hfsFile.fileID, destData.hfsFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, srcData.hfsFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
+				
 				goto FlushAndReturn;
 			}
 		}
 		else if ( numDestExtentBlocks )	//	just the destination file has extents
 		{
-			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, isHFSPlus );
+			err = MoveExtents( vcb, destData.hfsFile.fileID, srcData.hfsFile.fileID, 0, 0, isHFSPlus );
 			if ( err != noErr )
 			{
 				if ( err != dskFulErr )
 					return( err );
-
-				err = DeleteExtents( vcb, destData.hfsFile.fileID, isHFSPlus );
+				
+				err = DeleteExtents( vcb, destData.hfsFile.fileID, 0, 0, isHFSPlus );
 				ReturnIfError( err );					//	we are doomed. Just QUIT!
-
+				
 				goto FlushAndReturn;
 			}
 		}
-
+		
 		//--	Step 3: Change the data in the catalog nodes
 		
 		//--	find the source cnode and put dest info in it
@@ -330,23 +381,23 @@ ExUndo2a:		err = DeleteExtents( vcb, srcData.hfsFile.fileID, isHFSPlus );
 		
 		err = ReplaceBTreeRecord( vcb->catalogRefNum, &srcKey, srcHint, &srcData, sizeof(HFSCatalogFile), &srcHint );
 		ReturnIfError( err );
-
+		
 		
 		//	find the destination cnode and put source info in it		
 		err = LocateCatalogNodeByKey( vcb, destHint, &destKey, &destData, &destHint );
 		if ( err != noErr )
 			return( cmBadNews );
-			
+		
 		CopyCatalogNodeInfo( &swapData, &destData );
 		err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSCatalogFile), &destHint );
 		ReturnIfError( err );
 	}
 	
 	err = noErr;
-
+	
 	//--	Step 4: Error Handling section
-
-
+	
+	
 FlushAndReturn:
 	err = FlushCatalog( vcb );   			//	flush the catalog
 	err = FlushExtentFile( vcb );			//	flush the extent file (unneeded for common case, but it's cheap)			
@@ -373,7 +424,7 @@ static void  CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest )
 }
 
 
-static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, Boolean isHFSPlus )
+static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, int quitEarly, u_int8_t forkType, Boolean isHFSPlus )
 {
 	FCB *				fcb;
 	ExtentsRecBuffer	extentsBuffer[kNumExtentsToCache];
@@ -386,16 +437,16 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 	int16_t				i, j;
 	OSErr				err;
 	
-
+	
 	fcb = GetFileControlBlock(vcb->extentsRefNum);
 	
 	(void) BTInvalidateHint(&btIterator);
 	extentKeyPtr = (ExtentKey*) &btIterator.key;
 	btRecord.bufferAddress = &extentData;
 	btRecord.itemCount = 1;
-
+	
 	//--	Collect the extent records
-
+	
 	//
 	//	A search on the following key will cause the BTree to be positioned immediately
 	//	before the first extent record for file #srcFileID, but not actually positioned
@@ -408,9 +459,9 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 	if (isHFSPlus) {
 		btRecord.itemSize = sizeof(HFSPlusExtentRecord);
 		btKeySize = sizeof(HFSPlusExtentKey);
-
+		
 		extentKeyPtr->hfsPlus.keyLength	 = kHFSPlusExtentKeyMaximumLength;
-		extentKeyPtr->hfsPlus.forkType	 = 0;
+		extentKeyPtr->hfsPlus.forkType	 = forkType;
 		extentKeyPtr->hfsPlus.pad		 = 0;
 		extentKeyPtr->hfsPlus.fileID	 = srcFileID;
 		extentKeyPtr->hfsPlus.startBlock = 0;
@@ -418,7 +469,7 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 	else {
 		btRecord.itemSize = sizeof(HFSExtentRecord);
 		btKeySize = sizeof(HFSExtentKey);
-
+		
 		extentKeyPtr->hfs.keyLength	 = kHFSExtentKeyMaximumLength;
 		extentKeyPtr->hfs.forkType	 = 0;
 		extentKeyPtr->hfs.fileID	 = srcFileID;
@@ -440,7 +491,7 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 	//	of BTIterateRecord.  We'd need to set up the key for BTSearchRecord to find the last record
 	//	we found, so that BTIterateRecord would get the next one (the first we haven't processed).
 	//
-
+	
 	err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
 	
 	//	We expect a btNotFound here, since there shouldn't be an extent record with FABN = 0.
@@ -454,16 +505,16 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 		
 		return err;
 	}
-
+	
 	do
 	{
 		btRecord.bufferAddress = &extentData;
 		btRecord.itemCount = 1;
-
+		
 		for ( i=0 ; i<kNumExtentsToCache ; i++ )
 		{
 			HFSCatalogNodeID	foundFileID;
-
+			
 			err = BTIterateRecord(fcb, kBTreeNextRecord, &btIterator, &btRecord, &btRecordSize);
 			if ( err == btNotFound )		//	Did we run out of extent records in the extents tree?
 				break;						//	if xkrFNum(A0) is cleared on this error, then this test is bogus!
@@ -471,12 +522,17 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 				return( err );				//	must be ioError
 			
 			foundFileID = isHFSPlus ? extentKeyPtr->hfsPlus.fileID : extentKeyPtr->hfs.fileID;
-			if ( foundFileID == srcFileID )
-			{
+			if ( foundFileID == srcFileID ) {
+				/* Check if we need to quit early. */
+				if (quitEarly && isHFSPlus) {
+					if (extentKeyPtr->hfsPlus.forkType != forkType) {
+						break;
+					}
+				}
 				CopyExtentInfo(extentKeyPtr, &extentData, extentsBuffer, i);
 			}
-			else
-			{
+			else{
+				/* The fileID's are of a different file.  We're done here. */
 				break;
 			}
 		}
@@ -486,21 +542,20 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 			btRecordSize = sizeof(HFSPlusExtentRecord);
 		else
 			btRecordSize = sizeof(HFSExtentRecord);
-
-		for ( j=0 ; j<i ; j++ )
-		{
+		
+		for ( j=0 ; j<i ; j++ ) {
 			BTreeIterator tmpIterator;
-
+			
 			if (isHFSPlus)
 				extentsBuffer[j].extentKey.hfsPlus.fileID = destFileID;	//	change only the id in the key to dest ID
 			else
 				extentsBuffer[j].extentKey.hfs.fileID = destFileID;	//	change only the id in the key to dest ID
-
+			
 			// get iterator and buffer descriptor ready...
 			(void) BTInvalidateHint(&tmpIterator);
 			BlockMoveData(&(extentsBuffer[j].extentKey), &tmpIterator.key, btKeySize);
 			btRecord.bufferAddress = &(extentsBuffer[j].extentData);
-
+			
 			err = BTInsertRecord(fcb, &tmpIterator, &btRecord, btRecordSize);
 			if ( err != noErr )
 			{									//	parse the error
@@ -520,7 +575,7 @@ static OSErr  MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest
 		
 		if ( i != kNumExtentsToCache )			//	if the buffer is not full, we must be done
 		{
-			err = DeleteExtents( vcb, srcFileID, isHFSPlus );	//	Now delete all the extent entries with the sourceID
+			err = DeleteExtents( vcb, srcFileID, forkType, quitEarly, isHFSPlus );	//	Now delete all the extent entries with the sourceID
 			if ( DEBUG_BUILD && err != noErr )
 				DebugStr("Error from DeleteExtents");
 			break;									//	we're done!
@@ -538,8 +593,10 @@ static void  CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffe
 }
 
 
+
+
 //--	Delete all extents in extent file that have the ID given.
-static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, Boolean isHFSPlus )
+static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly,  u_int8_t forkType, Boolean isHFSPlus )
 {
 	FCB *				fcb;
 	ExtentKey *			extentKeyPtr;
@@ -548,36 +605,36 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, Boolean isHFSPl
 	FSBufferDescriptor	btRecord;
 	u_int16_t			btRecordSize;
 	OSErr				err;
-
+	
 	fcb = GetFileControlBlock(vcb->extentsRefNum);
-
+	
 	(void) BTInvalidateHint(&btIterator);
 	extentKeyPtr = (ExtentKey*) &btIterator.key;
 	btRecord.bufferAddress = &extentData;
 	btRecord.itemCount = 1;
-
+	
 	//	The algorithm is to position the BTree just before any extent records for fileID.
 	//	Then just keep getting successive records.  If the record is still for fileID,
 	//	then delete it.
 	
 	if (isHFSPlus) {
 		btRecord.itemSize = sizeof(HFSPlusExtentRecord);
-
+		
 		extentKeyPtr->hfsPlus.keyLength	 = kHFSPlusExtentKeyMaximumLength;
-		extentKeyPtr->hfsPlus.forkType	 = 0;
+		extentKeyPtr->hfsPlus.forkType	 = forkType;
 		extentKeyPtr->hfsPlus.pad		 = 0;
 		extentKeyPtr->hfsPlus.fileID	 = fileID;
 		extentKeyPtr->hfsPlus.startBlock = 0;
 	}
 	else {
 		btRecord.itemSize = sizeof(HFSExtentRecord);
-
+		
 		extentKeyPtr->hfs.keyLength	 = kHFSExtentKeyMaximumLength;
-		extentKeyPtr->hfs.forkType	 = 0;
+		extentKeyPtr->hfs.forkType	 = forkType;
 		extentKeyPtr->hfs.fileID	 = fileID;
 		extentKeyPtr->hfs.startBlock = 0;
 	}
-
+	
 	err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator);
 	if ( err != btNotFound )
 	{
@@ -587,25 +644,32 @@ static OSErr  DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, Boolean isHFSPl
 		
 		return err;				//	Got some unexpected error, so return it
 	}
-
+	
 	do
 	{
 		BTreeIterator 		tmpIterator;
 		HFSCatalogNodeID	foundFileID;
-
+		
 		err = BTIterateRecord(fcb, kBTreeNextRecord, &btIterator, &btRecord, &btRecordSize);
 		if ( err != noErr )
 		{
 			if (err == btNotFound)	//	If we hit the end of the BTree
 				err = noErr;		//		then it's OK
-				
+			
 			break;					//	We're done now.
 		}
 		
 		foundFileID = isHFSPlus ? extentKeyPtr->hfsPlus.fileID : extentKeyPtr->hfs.fileID;
-		if ( foundFileID != fileID )
+		if ( foundFileID != fileID ) {
 			break;					//	numbers don't match, we must be done
-
+		}
+		if (quitEarly && isHFSPlus) {
+			/* If we're only deleting one type of fork, then quit early if it doesn't match */
+			if (extentKeyPtr->hfsPlus.forkType != forkType) {
+				break;
+			}
+		}
+		
 		tmpIterator = btIterator;
 		err = BTDeleteRecord( fcb, &tmpIterator );
 		if (err != noErr)
diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c
index 5d037026b..ec9881da8 100644
--- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c
+++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c
@@ -180,7 +180,8 @@ static OSErr TruncateExtents(
 
 static OSErr UpdateExtentRecord (
 	ExtendedVCB		*vcb,
-	FCB						*fcb,
+	FCB				*fcb,
+	int				deleted,
 	const HFSPlusExtentKey	*extentFileKey,
 	const HFSPlusExtentRecord	extentData,
 	u_int32_t					extentBTreeHint);
@@ -456,7 +457,6 @@ static OSErr DeleteExtentRecord(
 //
 //_________________________________________________________________________________
 
-__private_extern__
 OSErr MapFileBlockC (
 	ExtendedVCB		*vcb,				// volume that file resides on
 	FCB				*fcb,				// FCB of file
@@ -682,7 +682,6 @@ static OSErr DeallocateFork(
 //	Function: 	Flushes the extent file for a specified volume
 //‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
 
-__private_extern__
 OSErr FlushExtentFile( ExtendedVCB *vcb )
 {
 	FCB *	fcb;
@@ -842,7 +841,6 @@ int32_t CompareExtentKeysPlus( const HFSPlusExtentKey *searchKey, const HFSPlusE
  * Used by hfs_extendfs to extend the volume allocation bitmap file.
  *
  */
-__private_extern__
 int
 AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockCount)
 {
@@ -896,7 +894,7 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC
 		 */
 		foundData[foundIndex].startBlock = startBlock;
 		foundData[foundIndex].blockCount = blockCount;
-		error = UpdateExtentRecord(vcb, fcb, &foundKey, foundData, hint);
+		error = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
 	}
 	(void) FlushExtentFile(vcb);
 
@@ -912,7 +910,6 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, u_int32_t startBlock, u_int32_t blockC
 //
 //_________________________________________________________________________________
 
-__private_extern__
 OSErr ExtendFileC (
 	ExtendedVCB		*vcb,				// volume that file resides on
 	FCB				*fcb,				// FCB of file to truncate
@@ -1087,21 +1084,44 @@ OSErr ExtendFileC (
 	 * should only be aggressive with re-using once-allocated pieces
 	 * if we're not dealing with system files.  If we're trying to operate
 	 * on behalf of a system file, we need the maximum contiguous amount
-	 * possible.
+	 * possible.  For non-system files we favor locality and fragmentation over
+	 * contiguity as it can result in fewer blocks being needed from the underlying
+	 * filesystem that the sparse image resides upon. 
 	 */
 	err = noErr;
 	if (   (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)
-	    && (fcb->ff_cp->c_fileid >= kHFSFirstUserCatalogNodeID)
-	    && (flags & kEFMetadataMask) == 0) {
-		if (vcb->hfs_flags & HFS_DID_CONTIG_SCAN) {
-			wantContig = false;
-		} else {
-			// we only want to do this once to scan the bitmap to
-			// fill in the vcbFreeExt table of free blocks
-			vcb->hfs_flags |= HFS_DID_CONTIG_SCAN;
-			wantContig = true;
+			&& (fcb->ff_cp->c_fileid >= kHFSFirstUserCatalogNodeID)
+			&& (flags & kEFMetadataMask) == 0) {
+		/*
+		 * We want locality over contiguity so by default we set wantContig to 
+		 * false unless we hit one of the circumstances below.
+		 */ 
+		wantContig = false;
+		if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+			/* 
+			 * If the red-black tree is acive, we can always find a suitable contiguous
+			 * chunk.  So if the user specifically requests contiguous files,  we should 
+			 * honor that no matter what kind of device it is.
+			 */
+			if (forceContig) {
+				wantContig = true;
+			}
 		}
-	} else {
+		else {
+			/* 
+			 * If the red-black tree is not active, then only set wantContig to true
+			 * if we have never done a contig scan on the device, which would populate
+			 * the free extent cache.  Note that the caller may explicitly unset the 
+			 * DID_CONTIG_SCAN bit in order to force us to vend a contiguous extent here
+			 * if the caller wants to get a contiguous chunk.
+			 */
+			if ((vcb->hfs_flags & HFS_DID_CONTIG_SCAN) == 0) { 
+				vcb->hfs_flags |= HFS_DID_CONTIG_SCAN;	
+				wantContig = true;
+			}
+		}
+	} 
+	else {
 		wantContig = true;
 	}
 	useMetaZone = flags & kEFMetadataMask;
@@ -1163,7 +1183,7 @@ OSErr ExtendFileC (
 			if ((actualStartBlock == startBlock) && (blockHint == 0)) {
 				//	We grew the file's last extent, so just adjust the number of blocks.
 				foundData[foundIndex].blockCount += actualNumBlocks;
-				err = UpdateExtentRecord(vcb, fcb, &foundKey, foundData, hint);
+				err = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
 				if (err != noErr) break;
 			}
 			else {
@@ -1217,7 +1237,7 @@ OSErr ExtendFileC (
 					//	Add a new extent into this record and update.
 					foundData[foundIndex].startBlock = actualStartBlock;
 					foundData[foundIndex].blockCount = actualNumBlocks;
-					err = UpdateExtentRecord(vcb, fcb, &foundKey, foundData, hint);
+					err = UpdateExtentRecord(vcb, fcb, 0, &foundKey, foundData, hint);
 					if (err != noErr) break;
 				}
 			}
@@ -1289,12 +1309,15 @@ Overflow:
 //
 //_________________________________________________________________________________
 
-__private_extern__
 OSErr TruncateFileC (
 	ExtendedVCB		*vcb,				// volume that file resides on
 	FCB				*fcb,				// FCB of file to truncate
 	int64_t			peof,				// new physical size for file
+	int				deleted,			// if nonzero, the file's catalog record has already been deleted.
+	int				rsrc,				// does this represent a resource fork or not?
+	uint32_t		fileid,				// the fileid of the file we're manipulating.
 	Boolean			truncateToExtent)	// if true, truncate to end of extent containing newPEOF
+
 {
 	OSErr				err;
 	u_int32_t			nextBlock;		//	next file allocation block to consider
@@ -1314,16 +1337,20 @@ OSErr TruncateFileC (
 
 	recordDeleted = false;
 	
-	if (vcb->vcbSigWord == kHFSPlusSigWord)
+	if (vcb->vcbSigWord == kHFSPlusSigWord) {
 		numExtentsPerRecord = kHFSPlusExtentDensity;
-	else
+	}
+	else {
 		numExtentsPerRecord = kHFSExtentDensity;
-
-	if (FORK_IS_RSRC(fcb))
+	}
+	
+	if (rsrc) {
 		forkType = kResourceForkType;
-	else
+	}
+	else {
 		forkType = kDataForkType;
-
+	}
+	
 	temp64 = fcb->ff_blocks;
 	physNumBlocks = (u_int32_t)temp64;
 
@@ -1349,13 +1376,21 @@ OSErr TruncateFileC (
 	 * XXX Any errors could cause ff_blocks and c_blocks to get out of sync...
 	 */
 	numBlocks = peof / vcb->blockSize;
-	FTOC(fcb)->c_blocks -= (fcb->ff_blocks - numBlocks);
+	if (!deleted) {
+		FTOC(fcb)->c_blocks -= (fcb->ff_blocks - numBlocks);
+	}
 	fcb->ff_blocks = numBlocks;
-
+	
 	// this catalog entry is modified and *must* get forced 
 	// to disk when hfs_update() is called
-	FTOC(fcb)->c_flag |= C_MODIFIED | C_FORCEUPDATE;
-	
+	if (!deleted) {
+		/* 
+		 * If the file is already C_NOEXISTS, then the catalog record
+		 * has been removed from disk already.  We wouldn't need to force 
+		 * another update
+		 */
+		FTOC(fcb)->c_flag |= (C_MODIFIED | C_FORCEUPDATE);
+	}
 	//
 	//	If the new PEOF is 0, then truncateToExtent has no meaning (we should always deallocate
 	//	all storage).
@@ -1364,7 +1399,7 @@ OSErr TruncateFileC (
 		int i;
 		
 		//	Deallocate all the extents for this fork
-		err = DeallocateFork(vcb, FTOC(fcb)->c_fileid, forkType, fcb->fcbExtents, &recordDeleted);
+		err = DeallocateFork(vcb, fileid, forkType, fcb->fcbExtents, &recordDeleted);
 		if (err != noErr) goto ErrorExit;	//	got some error, so return it
 		
 		//	Update the catalog extent record (making sure it's zeroed out)
@@ -1440,7 +1475,7 @@ OSErr TruncateFileC (
 	//	record (in the FCB, or extents file).
 	//
 	if (extentChanged) {
-		err = UpdateExtentRecord(vcb, fcb, &key, extentRecord, hint);
+		err = UpdateExtentRecord(vcb, fcb, deleted, &key, extentRecord, hint);
 		if (err != noErr) goto ErrorExit;
 	}
 	
@@ -1450,7 +1485,7 @@ OSErr TruncateFileC (
 	//	blocks.
 	//
 	if (nextBlock < physNumBlocks)
-		err = TruncateExtents(vcb, forkType, FTOC(fcb)->c_fileid, nextBlock, &recordDeleted);
+		err = TruncateExtents(vcb, forkType, fileid, nextBlock, &recordDeleted);
 
 Done:
 ErrorExit:
@@ -1465,7 +1500,6 @@ ErrorExit:
  * HFS Plus only
  *
  */
-__private_extern__
 OSErr HeadTruncateFile (
 	ExtendedVCB  *vcb,
 	FCB  *fcb,
@@ -1824,6 +1858,7 @@ Exit:
 //
 //	Input:		vcb			  			-	the volume containing the extents
 //				fcb						-	the file that owns the extents
+//				deleted					-	whether or not the file is already deleted
 //				extentFileKey  			-	pointer to extent key record (xkr)
 //						If the key length is 0, then the extents are actually part
 //						of the catalog record, stored in the FCB.
@@ -1834,18 +1869,18 @@ Exit:
 //				(other) = error from BTree
 //============================================================================
 
-static OSErr UpdateExtentRecord (
-	ExtendedVCB  *vcb,
-	FCB  *fcb,
-	const HFSPlusExtentKey  *extentFileKey,
-	const HFSPlusExtentRecord  extentData,
-	u_int32_t  extentBTreeHint)
+static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB  *fcb, int deleted,
+								 const HFSPlusExtentKey  *extentFileKey,
+								 const HFSPlusExtentRecord  extentData,
+								 u_int32_t  extentBTreeHint) 
 {
     OSErr err = noErr;
 	
 	if (extentFileKey->keyLength == 0) {	// keyLength == 0 means the FCB's extent record
 		BlockMoveData(extentData, fcb->fcbExtents, sizeof(HFSPlusExtentRecord));
-		FTOC(fcb)->c_flag |= C_MODIFIED;
+		if (!deleted) {
+			FTOC(fcb)->c_flag |= C_MODIFIED;
+		}
 	}
 	else {
 		BTreeIterator btIterator;
@@ -2013,7 +2048,6 @@ static Boolean ExtentsAreIntegral(
 //				Called by BTOpenPath during volume mount
 //_________________________________________________________________________________
 
-__private_extern__
 Boolean NodesAreContiguous(
 	ExtendedVCB	*vcb,
 	FCB			*fcb,
diff --git a/bsd/hfs/hfscommon/Misc/HybridAllocator.c b/bsd/hfs/hfscommon/Misc/HybridAllocator.c
new file mode 100644
index 000000000..6e0e1f23a
--- /dev/null
+++ b/bsd/hfs/hfscommon/Misc/HybridAllocator.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#if CONFIG_HFS_ALLOC_RBTREE
+
+#define assert(a) { if (!(a)) { panic("File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); } }
+
+//#include <sys/systm.h>
+#include "../../hfs_macos_defs.h"
+#include "../headers/HybridAllocator.h"
+
+#define bool Boolean
+
+#define ALLOC_DEBUG 0
+
+/*
+ * The rb_wrap macro in RedBlackTree.h automatically generates the source for a variety of functions that 
+ * operate on the red-black trees.  The bodies of these automatically generated functions are the corresponding 
+ * macro from RedBlackTree.h.  For example, the extent_tree_length_new() function invokes the rb_new() macro.
+ * We re-define actual wrapper functions around them so that we can re-name them and adjust the functions 
+ * that are available to the allocator in VolumeAllocation.c. 
+ *
+ * Here are the functions that get automatically generated:
+ * Offset-Tree Functions:
+ *
+ * initialize the tree
+ * static void				extent_tree_offset_new(extent_tree_offset_t * tree)
+ *
+ * Get the first node in the tree.  If it is empty, return NULL
+ * static extent_node_t*	extent_tree_offset_first (extent_tree_offset_t * tree)
+ *
+ * Get the last node in the tree.  If it is empty, return NULL
+ * static extent_node_t*	extent_tree_offset_last (extent_tree_offset_t * tree)
+ *
+ * From a given extent_node_t, grab the next one.  If no next exists, return NULL
+ * static extent_node_t*	extent_tree_offset_next (extent_tree_offset_t * tree, extent_node_t * node)
+ *
+ * From a given extent_node_t, grab the previous.  If no prev exists, return NULL
+ * static extent_node_t*	extent_tree_offset_prev(extent_tree_offset_t * tree, extent_node_t * node)
+ *  
+ * Find a extent_node_t with the specified key (search by offset). If it does not exist, return NULL
+ * static extent_node_t*	extent_tree_offset_search(extent_tree_offset_t * tree, extent_node_t * key)
+ *
+ * Find an extent node_t withthe specified key (offset).  If it does not exist, 
+ * either grab the next node, if possible, or return NULL
+ * static extent_node_t*	extent_tree_offset_nsearch(extent_tree_offset_t * tree, extent_node_t * key)
+ *
+ * Find an extent_node_t with the specified key (offset).  If it does not exist,
+ * either grab the previous node, if possible, or return NULL
+ * static extent_node_t*	extent_tree_offset_psearch(extent_tree_offset_t * tree, extent_node_t * key)
+ *
+ * Insert the specified node into the tree.
+ * static void				extent_tree_offset_insert(extent_tree_offset_t * tree, extent_node_t * node)
+ * 
+ * Remove the specified node from the tree. 
+ * static void				extent_tree_offset_remove(extent_tree_offset_t * tree, extent_node_t * node)
+ * 
+ */
+
+
+/* Static Functions only used in this file */
+static int32_t
+extent_tree_internal_alloc_space(extent_tree_offset_t *offset_tree, 
+								 u_int32_t size, u_int32_t offset, extent_node_t *node);
+
+/*
+ * cmp_offset_node
+ * 
+ * Compare the extents in two nodes by offset.
+ * 
+ * Returns: 
+ * -1 if node 1's offset < node 2's offset.
+ *  1 if node 1's offset > node 2's offset.
+ */
+
+__private_extern__ int
+cmp_offset_node(extent_node_t *node_1, extent_node_t *node_2) {
+	u_int32_t addr_1 = node_1->offset;
+	u_int32_t addr_2 = node_2->offset;
+	
+	return ((addr_1 > addr_2) - (addr_1 < addr_2));
+}
+
+/*
+ * Allocate a new red-black tree node.
+ * 
+ * Currently, we get memory from the M_TEMP zone.
+ * TODO: Need to get our own zone to avoid bloating the M_TEMP zone.
+ */
+__private_extern__ extent_node_t *
+alloc_node(u_int32_t length, u_int32_t offset) {
+	extent_node_t *node;
+	MALLOC(node, extent_node_t *, sizeof(extent_node_t), M_TEMP, M_WAITOK);
+	
+	if (node) {
+		node->offset = offset;
+		node->length = length;
+		node->offset_next = NULL;
+	}
+	return node;
+}
+
+/*
+ * De-allocate a red-black tree node.  
+ * 
+ * Currently, this goes back to the M_TEMP zone.
+ * TODO: May need to adjust this if we pull memory out of our own zone.
+ */
+__private_extern__ void
+free_node(extent_node_t *node) {
+	FREE(node, M_TEMP);
+}
+
+/*
+ * rb_wrap is a macro found in the rb.h header file.  It builds functions that operate on
+ * the red-black tree based upon the types specified here. This code will build red-black tree
+ * search functions that operate on extent_node_t's and use cmp_length_node to do length searches.
+ * It uses cmp_offset_node to do offset searches.  Ties are broken by offset. This will generate 
+ * the functions specified above. 
+ */
+
+rb_wrap(__attribute__ ((unused)) static, extent_tree_offset_, extent_tree_offset_t, extent_node_t, offset_link, cmp_offset_node)
+
+
+/*
+ * Create a new extent tree, composed of links sorted by offset.
+ */
+__private_extern__ void
+extent_tree_init(extent_tree_offset_t *offset_tree)
+{
+	extent_node_t *node = NULL;
+	extent_tree_offset_new(offset_tree);
+	
+	node = extent_tree_off_first (offset_tree);
+	if (node) {
+		node->offset_next = NULL;
+	}
+}
+
+/*
+ * Destroy an extent tree
+ * 
+ * This function finds the first node in the specified red-black tree, then 
+ * uses the embedded linked list to walk through the tree in O(n) time and destroy
+ * all of its nodes.
+ */
+__private_extern__ void
+extent_tree_destroy(extent_tree_offset_t *off_tree) {
+	extent_node_t *node = NULL;
+	extent_node_t *next = NULL;
+	
+	node = extent_tree_offset_first (off_tree);
+	
+	while (node) {
+		next = node->offset_next;
+		extent_tree_offset_remove (off_tree, node);
+		free_node (node);
+		node = next;
+	}
+}
+
+/* 
+ * Search the extent tree by offset. The "key" argument is only used to extract
+ * the offset and length information.  Its link fields are not used in the underlying
+ * tree code.
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_search(extent_tree_offset_t *tree, extent_node_t *key) {
+	return extent_tree_offset_search(tree, key);
+}
+
+/*
+ * Search the extent tree by offset, finding the next node in the tree
+ * if the specified one does not exist.  The "key" argument is only used to extract
+ * the offset and length information.  Its link fields are not used in the underlying
+ * tree code.
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_search_next(extent_tree_offset_t *offset_tree, extent_node_t *key) {
+	
+	return extent_tree_offset_nsearch (offset_tree, key);
+}
+
+/*
+ * Search the extent tree by offset to find a starting position.  Then, do a linear search
+ * through the list of free extents to find the first free extent in the tree that has size 
+ * greater than or equal to the specified size.  The "key" argument is only used to extract
+ * the offset and length information.  Its link fields are not used in the underlying
+ * tree code.
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_search_nextWithSize (extent_tree_offset_t *offset_tree, extent_node_t *key) {
+	
+	extent_node_t *current;
+	
+	u_int32_t min_size = key->length;
+	
+	current = extent_tree_offset_nsearch (offset_tree, key);
+	
+	while (current) {
+		if (current->length >= min_size) {
+			return current;
+		}
+		current = current->offset_next;
+	}
+	
+	/* return NULL if no free extent of suitable size could be found. */
+	return NULL;
+}
+
+
+/*
+ * Search the extent tree by offset, finding the previous node in the tree
+ * if the specified one does not exist.  The "key" argument is only used to extract
+ * the offset and length information.  Its link fields are not used in the underlying
+ * tree code.
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_search_prev(extent_tree_offset_t *offset_tree, extent_node_t *key) {
+	
+	return extent_tree_offset_psearch (offset_tree, key);
+}
+
+
+/*
+ * Find the first node in the extent tree, by offset.  This will be the first 
+ * free space region relative to the start of the disk. 
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_first (extent_tree_offset_t *offset_tree) {
+	return extent_tree_offset_first(offset_tree);
+}
+
+/*
+ * From a given tree node (sorted by offset), get the next node in the tree. 
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_next(extent_tree_offset_t * tree, extent_node_t *node)
+{
+	return extent_tree_offset_next(tree, node);
+}
+
+/*
+ * From a given tree node (sorted by offset), get the previous node in the tree. 
+ */
+__private_extern__ extent_node_t *
+extent_tree_off_prev(extent_tree_offset_t * tree, extent_node_t *node)
+{
+	return extent_tree_offset_prev(tree, node);
+}
+
+
+/*
+ * For a node of a given offset and size, remove it from the extent tree and
+ * insert a new node that:
+ * 
+ *	A) increase its offset by that of the node we just removed
+ *  B) decreases its size by that of the node we just removed.
+ *
+ * NOTE: Callers must ensure that the 'size' specified is less than or equal to the
+ * length of the extent represented by node.  The node pointer must point to an 
+ * extant node in the tree, as it will be removed from the tree.
+ */
+static int32_t
+extent_tree_internal_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, 
+								 u_int32_t offset, extent_node_t *node)
+{
+	if (node) {
+		extent_node_t *prev = NULL;
+		extent_node_t *next = NULL;
+		
+		if( ALLOC_DEBUG ) {
+			assert ((size <= node->length));
+			assert ((offset == node->offset));
+		}
+		
+		prev = extent_tree_offset_prev(offset_tree, node);
+		
+		/*
+		 * Note that, unless the node is exactly the size of the amount of space
+		 * requested, we do not need to remove it from the offset tree, now matter
+		 * how much space we remove from the node.  Remember that the offset tree is
+		 * sorting the extents based on their offsets, and that each node is a discrete 
+		 * chunk of free space.
+		 * 
+		 * If node A has offset B, with length C, in the offset tree, by definition, there 
+		 * can be no other node in the extent tree within the range {B, B+C}.  If there were,
+		 * we'd have overlapped extents. 
+		 * 
+		 * So in the normal case, we'll just update the offset node in place with the new offset
+		 * and size.
+		 * 
+		 * Otherwise, if we have an exact match, then just remove the node altogether.  Don't forget 
+		 * to update the next pointer for the linked list if applicable.
+		 */
+		if (node->length == size) {
+			next = node->offset_next;
+			extent_tree_offset_remove(offset_tree, node);
+			free_node(node);
+			if (prev) {
+				prev->offset_next = next;
+			}
+		}
+		else {
+			node->offset = node->offset + size;
+			node->length -= size;
+			/* The next pointer does not change since we keep the node in place */
+		}
+		return 0;
+	}	
+	return -1;
+}
+
+/*
+ * Search the extent tree for a region of free space after the specified 
+ * offset and attempt to allocate it.  
+ *
+ * This is expected to be used by attempts to grow a file contiguously.  If we 
+ * start at a file's EOF, then we can try to allocate space immediately after it 
+ * if it's available. This function specifies a tail (the offset), and then passes it 
+ * into extent_tree_offset_search. Note that this is not the search_prev or search_next 
+ * variant, so if no node exists at the specified offset we'll fail out.  
+ *
+ */
+
+__private_extern__ int32_t
+extent_tree_offset_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset) {
+	extent_node_t search_sentinel = { .offset = offset };
+	extent_node_t *node = extent_tree_offset_search(offset_tree, &search_sentinel);
+	if (node && (node->length < size)) {
+		/* It's too small. Fail the allocation */
+		if ( ALLOC_DEBUG ) { 
+			printf("HFS Allocator: internal_alloc_space, ptr (%p) node->length (%d), node->offset (%d), off(%d), size (%d) \n", 
+				   node, node->length, node->offset, offset, size);
+		}
+		return -1;		
+	}
+	return extent_tree_internal_alloc_space(offset_tree, size, offset, node);
+}
+
+
+/*
+ * Search the extent tree for a region of free space at the specified 
+ * offset and attempt to allocate it.  
+ * 
+ * This is a little bit more involved than the previous function.  It is intended for use when
+ * we may be allocating space from the middle of an existing extent node.
+ *
+ */
+
+
+__private_extern__ int32_t
+extent_tree_offset_alloc_unaligned(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset) {
+	extent_node_t search_sentinel = { .offset = offset };
+	extent_node_t *node= NULL;
+	
+	node = extent_tree_off_search_prev(offset_tree, &search_sentinel);
+	
+	if (node == NULL) {
+		return -1;
+	}
+	
+	if (node && (node->length < size)) {
+		/* It's too small. Fail the allocation */
+		if ( ALLOC_DEBUG ) { 
+			printf("HFS Allocator: internal_alloc_space, ptr (%p) node->length (%d), node->offset (%d), off(%d), size (%d) \n", 
+				   node, node->length, node->offset, offset, size);
+		}
+		return -1;		
+	}
+
+	/* Now see if we need to split this node because we're not allocating from the beginning */
+	if (offset != node->offset) {
+		
+		if (ALLOC_DEBUG) {
+			assert ((offset + size) <= (node->offset + node->length));
+			if (node->offset_next) {
+				assert ((offset > node->offset) && (offset < node->offset_next->offset));
+			}
+		}
+		
+		u_int32_t end = node->offset + node->length;
+		node->length = offset - node->offset;
+		
+		/* 
+		 * Do we need to create a new node?  If our extent we're carving away ends earlier than 
+		 * the current extent's length, then yes - we do.
+		 */		
+		if ((offset + size) < (end)) {
+			u_int32_t newoff = offset + size;
+			u_int32_t newlen = end - newoff;
+
+			extent_node_t* newnode = alloc_node(newlen, newoff);
+			extent_tree_offset_insert(offset_tree, newnode);
+			
+			extent_node_t *next = extent_tree_offset_next(offset_tree, newnode);
+			newnode->offset_next = next;
+			node->offset_next = newnode;
+		}
+		
+		return 0;
+	}
+	else {
+		return extent_tree_internal_alloc_space(offset_tree, size, offset, node);
+	}
+}
+
+
+
+/*
+ * Mark an extent of space as being free.  This means we need to insert 
+ * this extent into our tree.
+ *
+ * Search the offset tree, based on the new offset that we construct by adding 
+ * the length of our extent to be freed to its offset.  If something exists at 
+ * that offset, then we coalesce the nodes.  In this case, we do not need to adjust 
+ * the offset tree because our extent we wanted to add could not have been in the tree.
+ *
+ * If no node existed at the specified offset, then create a new one and insert it 
+ * into the tree.
+ * 
+ * Finally, search based on the node that would precede our newly created/inserted one.
+ * If possible, coalesce the previous node into our new one.  
+ *
+ * We return the node which we are modifying in this function.  
+ */
+
+__private_extern__ extent_node_t *
+extent_tree_free_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset)
+{
+	extent_node_t *prev = NULL;
+	extent_node_t *node = NULL;	
+	extent_node_t *next = NULL;
+	extent_node_t search_sentinel = { .offset = size + offset };
+	
+	node = extent_tree_offset_nsearch(offset_tree, &search_sentinel);
+	/* Insert our node into the tree, and coalesce with the next one if necessary */
+	
+	if ((node) && (node->offset == search_sentinel.offset)) {
+        node->offset = offset;
+        node->length += size;
+		next = node->offset_next;
+    }
+	else {
+		node = alloc_node(size, offset);
+        assert(node);
+        extent_tree_offset_insert(offset_tree, node);
+		
+		/* Find the next entry in the tree, if applicable. */
+		next = extent_tree_offset_next(offset_tree, node);
+		node->offset_next = next;
+	}
+	
+	/* Coalesce with the previous if necessary */
+	prev = extent_tree_offset_prev(offset_tree, node);
+	if (prev && (prev->offset + prev->length) == offset) {
+        extent_tree_offset_remove(offset_tree, prev);
+        node->offset = prev->offset;
+        node->length += prev->length;		
+        free_node(prev);
+		prev = extent_tree_offset_prev(offset_tree, node);
+    }
+	
+	/* Update the next pointer for the previous entry (if necessary) */
+	if (prev) {
+		prev->offset_next = node;
+	}
+	
+	return node;
+}
+
+/*
+ * Remove the specified node from the offset_tree.  Note that the parameter node
+ * must be an extant node in the tree.  This function is used by the allocator when
+ * we are resizing a volume and need to directly manipulate the contents of the red-black
+ * tree without going through the normal allocation and deallocation routines.
+ */
+__private_extern__ void 
+extent_tree_remove_node (extent_tree_offset_t *offset_tree, extent_node_t * node) {
+	
+	if (node) {
+		/* Just remove the entry from the tree */
+		extent_tree_offset_remove(offset_tree, node);
+	}
+	return;
+	
+}
+
+
+
+#if ALLOC_DEBUG 
+/*
+ * For each node in the tree, print out its length and block offset.
+ */
+__private_extern__ void
+extent_tree_offset_print(extent_tree_offset_t *offset_tree)
+{
+	extent_node_t *node = NULL;
+	
+	node = extent_tree_offset_first(offset_tree);
+	while (node) {
+		printf("length: %u, offset: %u\n", node->length, node->offset);
+		node = node->offset_next;
+	}
+}
+#endif
+
+#endif
diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c
index bc58bd947..de2858418 100644
--- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c
+++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,7 +32,7 @@
 
 	Version:	HFS Plus 1.0
 
-	Copyright:	© 1996-2001 by Apple Computer, Inc., all rights reserved.
+	Copyright:	ÔøΩ 1996-2009 by Apple Computer, Inc., all rights reserved.
 
 */
 
@@ -45,31 +45,92 @@ Public routines:
 					blocks.  (Will only do a single extent???)
 	BlockDeallocate
 					Deallocate a contiguous run of allocation blocks.
-
-	invalidate_free_extent_cache	Invalidate free extent cache for a given volume.
-
-Internal routines:
+ 
+	BlockMarkAllocated
+					Exported wrapper to mark blocks as in-use.  This will correctly determine
+					whether or not the red-black tree is enabled and call the appropriate function 
+					if applicable.
 	BlockMarkFree
+					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
+					not the red-black tree is enabled and call the appropriate function if applicable.
+
+ 
+	ResetVCBFreeExtCache
+					Since the red-black tree obviates the need to maintain the free extent cache, we do
+					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
+					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
+					bitmap scanning allocator.
+					We also reset and disable the free extent cache when volume resizing is 
+					in flight.
+ 
+	UpdateAllocLimit 
+					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
+					allocations from occupying space in the region we are modifying during a filesystem resize.  
+					At other times, it should be consistent with the total number of allocation blocks in the 
+					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
+					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
+					number of items in the tree that we can allocate from.
+ 
+Internal routines:
+	Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE.  This
+	is to cut down on space for functions that could not possibly be used if they are not planning to 
+	use the red-black tree code.
+ 
+	BlockMarkFreeRBTree
+					Make an internal call to BlockMarkFree and then update 
+					and/or create Red-Black Tree allocation tree nodes to correspond
+					to the free space being generated.
+	BlockMarkFreeInternal
 					Mark a contiguous range of blocks as free.  The corresponding
-					bits in the volume bitmap will be cleared.
-	BlockMarkAllocated
+					bits in the volume bitmap will be cleared.  This will actually do the work
+					of modifying the bitmap for us.
+					
+	BlockMarkAllocatedRBTree
+					Make an internal call to BlockAllocateMarked, which will update the 
+					bitmap on-disk when we allocate blocks.  If that is successful, then
+					we'll remove the appropriate entries from the red-black tree.
+	BlockMarkAllocatedInternal
 					Mark a contiguous range of blocks as allocated.  The cor-
 					responding bits in the volume bitmap are set.  Also tests to see
-					if any of the blocks were previously unallocated.
-	FindContiguous
+					if any of the blocks were previously unallocated.  
+	BlockFindContiguous
 					Find a contiguous range of blocks of a given size.  The caller
 					specifies where to begin the search (by block number).  The
-					block number of the first block in the range is returned.
+					block number of the first block in the range is returned.  This is only
+					called by the bitmap scanning logic as the red-black tree should be able
+					to do this internally by searching its tree. 
 	BlockAllocateAny
 					Find and allocate a contiguous range of blocks up to a given size.  The
 					first range of contiguous free blocks found are allocated, even if there
 					are fewer blocks than requested (and even if a contiguous range of blocks
 					of the given size exists elsewhere).
+	BlockAllocateAnyBitmap
+					Finds a range of blocks per the above requirements without using the 
+					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
+					any valid range of free space needed.
+	BlockAllocateAnyRBTree
+					Finds a valid range of blocks per the above requirements by searching
+					the red-black tree.  We can just make an internal call to 
+					BlockAllocateContigRBTree to find the valid range.
 	BlockAllocateContig
 					Find and allocate a contiguous range of blocks of a given size.  If
 					a contiguous range of free blocks of the given size isn't found, then
-					the allocation fails (i.e. it is "all or nothing").
-
+					the allocation fails (i.e. it is "all or nothing").  This routine is
+					essentially a wrapper function around its related sub-functions,
+					BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
+					respectively, the original HFS+ bitmap scanning logic and the new 
+					Red-Black Tree to search and manage free-space decisions.  This function
+					contains logic for when to use which of the allocation algorithms,
+					depending on the free space contained in the volume.
+	BlockAllocateContigBitmap
+					Finds and allocates a range of blocks specified by the size parameters
+					using the original HFS+ bitmap scanning logic.  The red-black tree
+					will not be updated if this function is used.  
+	BlockAllocateContigRBTree
+					Finds and allocates a range of blocks specified by the size parameters
+					using the new red/black tree data structure and search algorithms
+					provided by the tree library.  Updates the red/black tree nodes after
+					the on-disk data structure (bitmap) has been updated. 
 	BlockAllocateKnown
 					Try to allocate space from known free space in the volume's
 					free extent cache.
@@ -80,6 +141,57 @@ Internal routines:
 
 	ReleaseBitmapBlock
 					Release a bitmap block back into the buffer cache.
+ 
+ 
+Debug/Test Routines
+	hfs_isallocated
+					Test to see if any blocks in a range are allocated.  Journal or
+					allocation file lock must be held.
+ 
+	hfs_isallocated_scan
+					Test to see if any blocks in a range are allocated.  Releases and
+					invalidates the block used when finished.
+	
+	hfs_isrbtree_active
+					Test to see if the allocation red-black tree is live.  This function
+					requires either an exclusive or shared lock on the allocation bitmap file
+					in the HFS mount structure, to prevent red-black tree pointers from disappearing.
+ 
+	hfs_isrbtree_allocated
+					Test to see if the specified extent is marked as allocated in the red-black tree.
+					Multiplexes between the metadata zone trees and the normal allocation zone trees
+					depending on the offset of the extent specified.
+					
+	check_rbtree_extents
+					Void function that wraps around the above function (hfs_isrbtree_allocated)
+					and checks to see that the return value was appropriate based on the assertion we're
+					trying to validate (whether or not the specified extent should be marked as free 
+					or allocated).
+	
+	hfs_validate_rbtree
+					Exhaustive search function that will check every allocation block for its status in the
+					red-black tree and then check the corresponding status in the bitmap file.  If the two are out
+					of sync, it will panic.  Note that this function is extremely expensive and must NEVER
+					be run outside of debug code.
+ 
+	hfs_checktreelinks
+					Checks the embedded linked list structure of the red black tree for integrity.  The next pointer
+					should always point to whatever extent_tree_offset_next returns.
+ 
+ 
+Red Black Tree Specific Routines
+	GenerateTree
+					Build a red-black tree for the given filesystem's bitmap.
+ 
+	DestroyTrees
+					Destroy the tree on the given filesystem 
+
+
+	hfs_alloc_scan_block
+					Given a starting allocation block number, figures out which physical block contains that 
+					allocation block's bit, and scans it from the starting bit until either the ending bit or
+					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
+					
 */
 
 #include "../../hfs_macos_defs.h"
@@ -89,19 +201,54 @@ Internal routines:
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/disk.h>
+#include <sys/ubc.h>
+#include <sys/uio.h>
 #include <kern/kalloc.h>
 
 #include "../../hfs.h"
 #include "../../hfs_dbg.h"
 #include "../../hfs_format.h"
 #include "../../hfs_endian.h"
-
+#include "../../hfs_macos_defs.h"
 #include "../headers/FileMgrInternal.h"
+#include "../headers/HybridAllocator.h"
+#include "../../hfs_kdebug.h"
 
 #ifndef CONFIG_HFS_TRIM
 #define CONFIG_HFS_TRIM 0
 #endif
 
+/*
+ * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
+ * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer.  If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int hfs_kdebug_allocation = 0;
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
+SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
+SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
+enum {
+	/*
+	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
+	 * BlockDeallocate, including the internal BlockAllocateXxx
+	 * routines so we can see how an allocation was satisfied.
+	 *
+	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
+	 * free extent cache.
+	 *
+	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
+	 *
+	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
+	 * or clearing bits, scanning the bitmap).
+	 */
+	HFSDBG_ALLOC_ENABLED		= 1,
+	HFSDBG_EXT_CACHE_ENABLED	= 2,
+	HFSDBG_UNMAP_ENABLED		= 4,
+	HFSDBG_BITMAP_ENABLED		= 8
+};
 
 enum {
 	kBytesPerWord			=	4,
@@ -116,6 +263,8 @@ enum {
 #define kAllBitsSetInWord	0xFFFFFFFFul
 
 
+#define ALLOC_DEBUG 0
+
 static OSErr ReadBitmapBlock(
 	ExtendedVCB		*vcb,
 	u_int32_t		bit,
@@ -136,6 +285,15 @@ static OSErr BlockAllocateAny(
 	u_int32_t		*actualStartBlock,
 	u_int32_t		*actualNumBlocks);
 
+static OSErr BlockAllocateAnyBitmap(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		endingBlock,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks);
+
 static OSErr BlockAllocateContig(
 	ExtendedVCB		*vcb,
 	u_int32_t		startingBlock,
@@ -145,6 +303,15 @@ static OSErr BlockAllocateContig(
 	u_int32_t		*actualStartBlock,
 	u_int32_t		*actualNumBlocks);
 
+static OSErr BlockAllocateContigBitmap(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		minBlocks,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks);
+
 static OSErr BlockFindContiguous(
 	ExtendedVCB		*vcb,
 	u_int32_t		startingBlock,
@@ -161,9 +328,136 @@ static OSErr BlockAllocateKnown(
 	u_int32_t		*actualStartBlock,
 	u_int32_t		*actualNumBlocks);
 
-static int free_extent_cache_active(
-	ExtendedVCB 		*vcb);
+static OSErr BlockMarkAllocatedInternal (
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	register u_int32_t	numBlocks);
+
+static OSErr BlockMarkFreeInternal(
+	ExtendedVCB	*vcb,
+	u_int32_t	startingBlock,
+	u_int32_t	numBlocks, 
+	Boolean 	do_validate);
+
+#if CONFIG_HFS_ALLOC_RBTREE
+
+static OSErr ReleaseRBScanBitmapBlock( struct buf *bp );
+
+static OSErr BlockAllocateAnyRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks);
+
+static OSErr BlockAllocateContigRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		minBlocks,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks,
+	u_int32_t 		forceContig);
+
+static OSErr BlockMarkAllocatedRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t	numBlocks);
+	
+static OSErr BlockMarkFreeRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t	numBlocks);
+
+static int
+hfs_isrbtree_allocated (struct hfsmount * hfsmp, 
+	u_int32_t startBlock, 
+	u_int32_t numBlocks,
+	extent_node_t** node1);
+
+extern void
+hfs_validate_rbtree (struct hfsmount *hfsmp, 
+					 u_int32_t start, 
+					 u_int32_t end);
+
+static void hfs_checktreelinks (struct hfsmount *hfsmp);
+
+
+void check_rbtree_extents (struct hfsmount *hfsmp,
+	u_int32_t start,
+	u_int32_t numBlocks,
+	int shouldBeFree);
+
+int hfs_isallocated_scan (struct hfsmount *hfsmp,
+								 u_int32_t startingBlock,
+								 u_int32_t *bp_buf);
+
+static int hfs_alloc_scan_block(struct hfsmount *hfsmp, 
+								u_int32_t startbit, 
+								u_int32_t endBit, 
+								u_int32_t *bitToScan);
+
+#define ASSERT_FREE 1
+#define ASSERT_ALLOC 0
+								
+#endif /* CONFIG_HFS_ALLOC_RBTREE */
+
+/* Functions for manipulating free extent cache */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);
+
+#if ALLOC_DEBUG
+/* 
+ * Extra #includes for the debug function below.  These are not normally #included because
+ * they would constitute a layering violation
+ */
+#include <vfs/vfs_journal.h>
+#include <sys/disk.h>
+
+/*
+ * Validation Routine to verify that the TRIM list maintained by the journal
+ * is in good shape relative to what we think the bitmap should have.  We should
+ * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
+ * we panic.  
+ */
+int trim_validate_bitmap (struct hfsmount *hfsmp) {
+	u_int64_t blockno_offset;
+	u_int64_t numblocks;
+	int i;
+	int count;
+	u_int32_t startblk;
+	u_int32_t blks;
+	int err = 0;
+	uint32_t alloccount = 0;
+
+	if (hfsmp->jnl) {
+		struct journal *jnl = (struct journal*)hfsmp->jnl;
+		if (jnl->active_tr) {
+			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
+			count = trim->extent_count;
+			for (i = 0; i < count; i++) {
+				blockno_offset = trim->extents[i].offset;
+				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
+				blockno_offset = blockno_offset / hfsmp->blockSize;
+				numblocks = trim->extents[i].length / hfsmp->blockSize;
+
+				startblk = (u_int32_t)blockno_offset;
+				blks = (u_int32_t) numblocks;
+				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);
+
+				if (err == 0 && alloccount != 0) {
+					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
+				}
+			}
+		}
+	}
+	return 0;
+}
 
+#endif
 
 /*
 ;________________________________________________________________________________
@@ -188,22 +482,25 @@ static int free_extent_cache_active(
 */
 static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
 {
-	if (CONFIG_HFS_TRIM) {
-		u_int64_t offset;
-		u_int64_t length;
-		int err;
-		
-		if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
-			offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
-			length = (u_int64_t) numBlocks * hfsmp->blockSize;
-	
-			err = journal_trim_add_extent(hfsmp->jnl, offset, length);
-			if (err) {
-				printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
-				hfsmp->hfs_flags &= ~HFS_UNMAP;
-			}
+	u_int64_t offset;
+	u_int64_t length;
+	int err;
+	
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+	
+	if (hfsmp->jnl != NULL) {
+		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+		length = (u_int64_t) numBlocks * hfsmp->blockSize;
+
+		err = journal_trim_add_extent(hfsmp->jnl, offset, length);
+		if (err) {
+			printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
 		}
 	}
+	
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
 }
 
 
@@ -225,100 +522,117 @@ static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBloc
 */
 static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
 {
-	if (CONFIG_HFS_TRIM) {
-		u_int64_t offset;
-		u_int64_t length;
-		int err;
+	u_int64_t offset;
+	u_int64_t length;
+	int err;
+	
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+	
+	if (hfsmp->jnl != NULL) {
+		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
+		length = (u_int64_t) numBlocks * hfsmp->blockSize;
 		
-		if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) {
-			offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
-			length = (u_int64_t) numBlocks * hfsmp->blockSize;
-			
-			err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
-			if (err) {
-				printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
-				hfsmp->hfs_flags &= ~HFS_UNMAP;
-			}
+		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
+		if (err) {
+			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
 		}
 	}
+	
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
 }
 
 
 /*
 ;________________________________________________________________________________
 ;
-; Routine:	   BlkAlloc
-;
-; Function:    Allocate space on a volume.	If contiguous allocation is requested,
-;			   at least the requested number of bytes will be allocated or an
-;			   error will be returned.	If contiguous allocation is not forced,
-;			   the space will be allocated at the first free fragment following
-;			   the requested starting allocation block.  If there is not enough
-;			   room there, a block of less than the requested size will be
-;			   allocated.
+; Routine:		hfs_trim_callback
 ;
-;			   If the requested starting block is 0 (for new file allocations),
-;			   the volume's allocation block pointer will be used as a starting
-;			   point.
+; Function:		This function is called when a transaction that freed extents
+;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
+;				written to the on-disk journal.  This routine will add those
+;				extents to the free extent cache so that they can be reused.
 ;
-; Input Arguments:
-;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
-;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
-;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
-;	 forceContiguous - Force contiguous flag - if bit 0 set (NE), allocation is contiguous
-;					   or an error is returned
-;	 useMetaZone  - 
-;	 minBlocks	 - Number of blocks requested.	If the allocation is non-contiguous,
-;					   less than this may actually be allocated
-;	 maxBlocks	 - The maximum number of blocks to allocate.  If there is additional free
-;					   space after bytesRequested, then up to maxBlocks bytes should really
-;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
-;					   of the file's clump size.)
+;				CAUTION: This routine is called while the journal's trim lock
+;				is held shared, so that no other thread can reuse any portion
+;				of those extents.  We must be very careful about which locks
+;				we take from within this callback, to avoid deadlock.  The
+;				call to add_free_extent_cache will end up taking the cache's
+;				lock (just long enough to add these extents to the cache).
 ;
-; Output:
-;	 (result)		 - Error code, zero for successful allocation
-;	 *startBlock	 - Actual starting allocation block
-;	 *actualBlocks	 - Actual number of allocation blocks allocated
+;				CAUTION: If the journal becomes invalid (eg., due to an I/O
+;				error when trying to write to the journal), this callback
+;				will stop getting called, even if extents got freed before
+;				the journal became invalid!
 ;
-; Side effects:
-;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
+; Input Arguments:
+;	arg				- The hfsmount of the volume containing the extents.
+;	extent_count	- The number of extents freed in the transaction.
+;	extents			- An array of extents (byte ranges) that were freed.
 ;________________________________________________________________________________
 */
-static void
-sanity_check_free_ext(__unused ExtendedVCB *vcb, __unused int check_allocated)
+__private_extern__ void
+hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
 {
-#if DEBUG
-	u_int32_t i, j;
-
-	for(i=0; i < vcb->vcbFreeExtCnt; i++) {
-		u_int32_t start, nblocks;
-
-		start   = vcb->vcbFreeExt[i].startBlock;
-		nblocks = vcb->vcbFreeExt[i].blockCount;
-
-
-		if (nblocks == 0) {
-			panic("hfs: %p: slot %d in the free extent array had a zero count (%d)\n", vcb, i, start);
-		}
-
-		if (check_allocated && hfs_isallocated(vcb, start, nblocks)) {
-			panic("hfs: %p: slot %d in the free extent array is bad (%d / %d)\n",
-			      vcb, i, start, nblocks);
-		}
-
-		for(j=i+1; j < vcb->vcbFreeExtCnt; j++) {
-			if (start == vcb->vcbFreeExt[j].startBlock) {
-				panic("hfs: %p: slot %d/%d are dups?! (%d / %d ; %d / %d)\n",
-				      vcb, i, j, start, nblocks, vcb->vcbFreeExt[i].startBlock,
-				      vcb->vcbFreeExt[i].blockCount);
-			}
-		}
+	uint32_t i;
+	uint32_t startBlock, numBlocks;
+	struct hfsmount *hfsmp = arg;
+	
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
+	
+	for (i=0; i<extent_count; ++i) {
+		/* Convert the byte range in *extents back to a range of allocation blocks. */
+		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
+		numBlocks = extents[i].length / hfsmp->blockSize;
+		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
 	}
-#endif
+
+	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
 }
 
 
-__private_extern__
+/*
+ ;________________________________________________________________________________
+ ;
+ ; Routine:	   BlockAllocate
+ ;
+ ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
+ ;			   at least the requested number of bytes will be allocated or an
+ ;			   error will be returned.	If contiguous allocation is not forced,
+ ;			   the space will be allocated with the first largest extent available 
+ ;			   at the requested starting allocation block.  If there is not enough
+ ;			   room there, a block allocation of less than the requested size will be
+ ;			   allocated.
+ ;
+ ;			   If the requested starting block is 0 (for new file allocations),
+ ;			   the volume's allocation block pointer will be used as a starting
+ ;			   point.
+ ;
+ ; Input Arguments:
+ ;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
+ ;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
+ ;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
+ ;	 minBlocks	 	 - Number of blocks requested.	If the allocation is non-contiguous,
+ ;					   less than this may actually be allocated
+ ;	 maxBlocks	 	 - The maximum number of blocks to allocate.  If there is additional free
+ ;					   space after bytesRequested, then up to maxBlocks bytes should really
+ ;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
+ ;					   of the file's clump size.)
+ ;	 flags           - Flags to specify options like contiguous, use metadata zone, 
+ ;					   skip free block check, etc.
+ ;
+ ; Output:
+ ;	 (result)		 - Error code, zero for successful allocation
+ ;	 *startBlock	 - Actual starting allocation block
+ ;	 *actualBlocks	 - Actual number of allocation blocks allocated
+ ;
+ ; Side effects:
+ ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
+ ;________________________________________________________________________________
+ */
 OSErr BlockAllocate (
 	ExtendedVCB		*vcb,				/* which volume to allocate space on */
 	u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
@@ -332,9 +646,13 @@ OSErr BlockAllocate (
 	u_int32_t  freeBlocks;
 	OSErr			err;
 	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
+	struct hfsmount	*hfsmp;
 	Boolean useMetaZone;
 	Boolean forceContiguous;
 
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
+	
 	if (flags & HFS_ALLOC_FORCECONTIG) {
 		forceContiguous = true;
 	} else {
@@ -347,12 +665,27 @@ OSErr BlockAllocate (
 		useMetaZone = false;
 	}
 
+	//TODO: Figure out when we need to re-enable the RB-Tree. 
+	
+	
+	//TODO: Make sure we use allocLimit when appropriate.
+	
+	/*
+	 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
+	 * in conjunction with the Generate Tree function.   If the red-black tree does not currently contain
+	 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
+	 * we find what we need.  We'll update the tree fields when we're done, indicating that we've advanced the
+	 * high water mark for the tree.  
+	 */
+	
 	//
 	//	Initialize outputs in case we get an error
 	//
 	*actualStartBlock = 0;
 	*actualNumBlocks = 0;
-	freeBlocks = hfs_freeblks(VCBTOHFS(vcb), 0);
+	hfsmp = VCBTOHFS (vcb);
+	freeBlocks = hfs_freeblks(hfsmp, 0);
+	
 	
 	/* Skip free block check if blocks are being allocated for relocating 
 	 * data during truncating a volume.
@@ -394,14 +727,19 @@ OSErr BlockAllocate (
 	//
 	if (startingBlock == 0) {
 		HFS_MOUNT_LOCK(vcb, TRUE);
+		
+		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
 		if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 			startingBlock = vcb->sparseAllocation;
-		} else {
+		} 
+		else {
 			startingBlock = vcb->nextAllocation;
 		}
 		HFS_MOUNT_UNLOCK(vcb, TRUE);
 		updateAllocPtr = true;
 	}
+	
+	
 	if (startingBlock >= vcb->allocLimit) {
 		startingBlock = 0; /* overflow so start at beginning */
 	}
@@ -414,33 +752,91 @@ OSErr BlockAllocate (
 		err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
 		                          useMetaZone, actualStartBlock, actualNumBlocks);
 		/*
-		 * If we allocated from a new position then
-		 * also update the roving allocator.
+		 * If we allocated from a new position then also update the roving allocator.  
+		 * This will keep the roving allocation pointer up-to-date even 
+		 * if we are using the new R/B tree allocator, since
+		 * it doesn't matter to us here, how the underlying allocator found 
+		 * the block to vend out.
 		 */
 		if ((err == noErr) &&
 		    (*actualStartBlock > startingBlock) &&
 		    ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
 	    	     (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
-
 			updateAllocPtr = true;
 		}
 	} else {
+#if CONFIG_HFS_ALLOC_RBTREE
+		/* 
+		 * If the RB-Tree Allocator is live, just go straight for a 
+		 * BlockAllocateAny call and return the result.  Otherwise, 
+		 * resort to the bitmap scanner.
+		 */
+		if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+			/* Start by trying to allocate from the starting block forward */
+			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
+								   maxBlocks, useMetaZone, actualStartBlock,
+								   actualNumBlocks);
+			
+			/* 
+			 * Because the RB-Tree is live, the previous call to BlockAllocateAny
+			 * will use the rbtree variant.  As a result, it will automatically search the 
+			 * metadata zone for a valid extent if needed.  If we get a return value of 
+			 * noErr, we found a valid extent and we can skip to the end.  If the error indicates
+			 * the disk is full, that's an equally valid return code and we can skip to the end, too.
+			 */
+			if (err == noErr || err == dskFulErr) {
+				goto Exit; 
+			}
+			else {
+				//TODO: only tear down tree if the tree is finished building.
+				//Make sure to handle the ENOSPC condition properly.  We shouldn't error out in that case.
+				/* Tear down tree if we encounter an error */
+				if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) {
+					hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
+					DestroyTrees(hfsmp);
+					ResetVCBFreeExtCache(hfsmp);				
+				}
+				else {
+					goto Exit;
+				}
+				// fall through to the normal allocation since the rb-tree allocation failed.
+			}
+		}
+#endif
+					
 		/*
 		 * Scan the bitmap once, gather the N largest free extents, then
 		 * allocate from these largest extents.  Repeat as needed until
 		 * we get all the space we needed.  We could probably build up
 		 * that list when the higher level caller tried (and failed) a
 		 * contiguous allocation first.
+		 *
+		 * Note that the free-extent cache will be cease to be updated if
+		 * we are using the red-black tree for allocations.  If we jettison 
+		 * the tree, then we will reset the free-extent cache and start over.
 		 */
+		
 		err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
-		if (err == dskFulErr)
+		/* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */
+
+		if (err == dskFulErr) {
+			/* 
+			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
+			 * allocation limit.
+			 */
 			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
 			                       maxBlocks, useMetaZone, actualStartBlock,
 			                       actualNumBlocks);
-		if (err == dskFulErr)
+		}
+		if (err == dskFulErr) {
+			/*
+			 * We may be out of space in the normal zone; go up to the starting block from
+			 * the start of the volume.
+			 */
 			err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
 			                       useMetaZone, actualStartBlock,
 			                       actualNumBlocks);
+		}
 	}
 
 Exit:
@@ -450,8 +846,6 @@ Exit:
 	// still need to update things like the free block count).
 	//
 	if (*actualNumBlocks != 0) {
-		int i,j;
-
 		//
 		//	If we used the volume's roving allocation pointer, then we need to update it.
 		//	Adding in the length of the current allocation might reduce the next allocate
@@ -462,41 +856,24 @@ Exit:
 		//
 		HFS_MOUNT_LOCK(vcb, TRUE);
 
+		lck_spin_lock(&hfsmp->vcbFreeExtLock);
 		if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
 			vcb->sparseAllocation = *actualStartBlock;
 		}
+		lck_spin_unlock(&hfsmp->vcbFreeExtLock);
 		if (*actualNumBlocks < vcb->hfs_freed_block_count) {
 			vcb->hfs_freed_block_count -= *actualNumBlocks;
 		} else {
 			vcb->hfs_freed_block_count = 0;
 		}
-		
+
 		if (updateAllocPtr &&
-		    ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
-	    	     (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
+				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
+				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
 			HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
 		}
 
-		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
-			u_int32_t start, end;
-
-			start = vcb->vcbFreeExt[i].startBlock;
-			end   = start + vcb->vcbFreeExt[i].blockCount;
-
-			if (   (*actualStartBlock >= start && *actualStartBlock < end)
-			    || ((*actualStartBlock + *actualNumBlocks) > start && *actualStartBlock < start)) {
-
-				for(j=i; j < (int)vcb->vcbFreeExtCnt-1; j++) {
-					vcb->vcbFreeExt[j] = vcb->vcbFreeExt[j+1];
-				}
-
-				vcb->vcbFreeExtCnt--;
-				i--;   // so we'll check the guy we just copied down...
-				
-				// keep looping because we may have invalidated more
-				// than one entry in the array
-			}
-		}
+		(void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);
 
 		/* 
 		 * Update the number of free blocks on the volume 
@@ -510,11 +887,31 @@ Exit:
 		MarkVCBDirty(vcb);
 		HFS_MOUNT_UNLOCK(vcb, TRUE);
 
-		sanity_check_free_ext(vcb, 1);
-
 		hfs_generate_volume_notifications(VCBTOHFS(vcb));
 	}
 	
+	if (ALLOC_DEBUG) {
+		if (err == noErr) {
+			if (*actualStartBlock >= hfsmp->totalBlocks) {
+				panic ("BlockAllocate: vending invalid blocks!");
+			}
+			if (*actualStartBlock >= hfsmp->allocLimit) {
+				panic ("BlockAllocate: vending block past allocLimit!");
+			}
+			
+			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {	
+				panic ("BlockAllocate: vending too many invalid blocks!");
+			}
+			
+			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {	
+				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
+			}
+		}
+	}
+	
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
 	return err;
 }
 
@@ -522,7 +919,7 @@ Exit:
 /*
 ;________________________________________________________________________________
 ;
-; Routine:	   BlkDealloc
+; Routine:	   BlockDeallocate
 ;
 ; Function:    Update the bitmap to deallocate a run of disk allocation blocks
 ;
@@ -536,10 +933,10 @@ Exit:
 ;
 ; Side effects:
 ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
+;	 The Allocator's red-black trees may also be modified as a result.
 ;________________________________________________________________________________
 */
 
-__private_extern__
 OSErr BlockDeallocate (
 	ExtendedVCB		*vcb,			//	Which volume to deallocate space on
 	u_int32_t		firstBlock,		//	First block in range to deallocate
@@ -547,8 +944,12 @@ OSErr BlockDeallocate (
 	u_int32_t 		flags)
 {
 	OSErr			err;
-	u_int32_t		tempWord;
+	struct hfsmount *hfsmp;
+	hfsmp = VCBTOHFS(vcb);
 	
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);
+
 	//
 	//	If no blocks to deallocate, then exit early
 	//
@@ -556,11 +957,51 @@ OSErr BlockDeallocate (
 		err = noErr;
 		goto Exit;
 	}
+	
+	
+	if (ALLOC_DEBUG) {
+		if (firstBlock >= hfsmp->totalBlocks) {
+			panic ("BlockDeallocate: freeing invalid blocks!");
+		}
+		
+		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {	
+			panic ("BlockDeallocate: freeing too many invalid blocks!");
+		}			
+	}
+	
+	
+	
+
+	/*
+	 * If we're using the red-black tree code, then try to free the
+	 * blocks by marking them in the red-black tree first.  If the tree
+	 * is not active for whatever reason (or we're not using the 
+	 * R/B Tree code at all), then go straight for the BlockMarkFree 
+	 * function. 
+	 *
+	 * Remember that we can get into this function if the tree isn't finished
+	 * building.  In that case, check to see if the block we're de-allocating is
+	 * past the high watermark
+	 */
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+		/*
+		 * BlockMarkFreeRBTree deals with the case where we are resizing the
+		 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
+		 * that is currenly controlled by the r/b tree.
+		 */
+		
+		//TODO: Update multiplexing code for the half-finished case.
+		err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks);
+		adjustFreeExtCache = 0;
+	}
+	else {
+		err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
+	}
 
-	//
-	//	Call internal routine to free the sequence of blocks
-	//
-	err = BlockMarkFree(vcb, firstBlock, numBlocks);
+#else
+	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
+#endif
 	if (err)
 		goto Exit;
 
@@ -578,75 +1019,39 @@ OSErr BlockDeallocate (
 	}
 
 	vcb->hfs_freed_block_count += numBlocks;
-	if (firstBlock < vcb->sparseAllocation) {
-		vcb->sparseAllocation = firstBlock;
-	}
 
 	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
 		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
 	}
 
-	if (free_extent_cache_active(vcb) == 0) {
-		goto skip_cache;
-	}
-
-	tempWord = vcb->vcbFreeExtCnt;
-	//	Add this free chunk to the free extent list
-	if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
-		// Sorted by start block
-		if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].startBlock > firstBlock)
-			--tempWord;
-		if (tempWord < kMaxFreeExtents)
-		{
-			//	We're going to add this extent.  Bubble any smaller extents down in the list.
-			while (tempWord && vcb->vcbFreeExt[tempWord-1].startBlock > firstBlock)
-			{
-				vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
-				if (vcb->vcbFreeExt[tempWord].startBlock < vcb->sparseAllocation) {
-					vcb->sparseAllocation = vcb->vcbFreeExt[tempWord].startBlock;
-				}
-				--tempWord;
-			}
-			vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
-			vcb->vcbFreeExt[tempWord].blockCount = numBlocks;
-			
-			if (vcb->vcbFreeExtCnt < kMaxFreeExtents) {
-				++vcb->vcbFreeExtCnt;
-			}
-		}
-	} else {
-		// Sorted by num blocks
-		if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < numBlocks)
-			--tempWord;
-		if (tempWord < kMaxFreeExtents)
-		{
-			//	We're going to add this extent.  Bubble any smaller extents down in the list.
-			while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < numBlocks)
-			{
-				vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
-				if (vcb->vcbFreeExt[tempWord].startBlock < vcb->sparseAllocation) {
-					vcb->sparseAllocation = vcb->vcbFreeExt[tempWord].startBlock;
-				}
-				--tempWord;
-			}
-			vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
-			vcb->vcbFreeExt[tempWord].blockCount = numBlocks;
-			
-			if (vcb->vcbFreeExtCnt < kMaxFreeExtents) {
-				++vcb->vcbFreeExtCnt;
-			}
+	if (hfsmp->jnl == NULL) {
+		/*
+		 * In the journal case, we'll add the free extent once the journal
+		 * calls us back to tell us it wrote the transaction to disk.
+		 */
+		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
+		
+		/*
+		 * If the journal case, we'll only update sparseAllocation once the
+		 * free extent cache becomes empty (when we remove the last entry
+		 * from the cache).  Skipping it here means we're less likely to
+		 * find a recently freed extent via the bitmap before it gets added
+		 * to the free extent cache.
+		 */
+		if (firstBlock < vcb->sparseAllocation) {
+			vcb->sparseAllocation = firstBlock;
 		}
 	}
-
-skip_cache:
+	
 	MarkVCBDirty(vcb);
   	HFS_MOUNT_UNLOCK(vcb, TRUE); 
 
-	sanity_check_free_ext(vcb, 1);
-
 	hfs_generate_volume_notifications(VCBTOHFS(vcb));
 Exit:
 
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);
+
 	return err;
 }
 
@@ -656,7 +1061,6 @@ u_int8_t freebitcount[16] = {
 	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
 };
 
-__private_extern__
 u_int32_t
 MetaZoneFreeBlocks(ExtendedVCB *vcb)
 {
@@ -763,6 +1167,9 @@ static OSErr ReadBitmapBlock(
 	daddr64_t block;
 	u_int32_t blockSize;
 
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);
+
 	/*
 	 * volume bitmap blocks are protected by the allocation file lock
 	 */
@@ -792,6 +1199,9 @@ static OSErr ReadBitmapBlock(
 		}
 	}
 
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);
+
 	return err;
 }
 
@@ -816,6 +1226,9 @@ static OSErr ReleaseBitmapBlock(
 {
 	struct buf *bp = (struct buf *)blockRef;
 	
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);
+
 	if (blockRef == 0) {
 		if (dirty)
 			panic("hfs: ReleaseBitmapBlock: missing bp");
@@ -837,9 +1250,42 @@ static OSErr ReleaseBitmapBlock(
 		}
 	}
 
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+	return (0);
+}
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * ReleaseRBScanBitmapBlock is used to release struct bufs that were 
+ * created for use by the Red-Black tree generation code.  We want to force 
+ * them to be purged out of the buffer cache ASAP, so we'll release them differently
+ * than in the ReleaseBitmapBlock case.  Alternately, we know that we're only reading 
+ * the blocks, so we will never dirty them as part of the tree building scan.
+ */
+
+static OSErr ReleaseRBScanBitmapBlock(struct buf *bp ) {
+	
+	if (bp == NULL) {
+		return (0);
+	}
+	
+	if (bp) {
+		/* Mark the buffer invalid if it isn't locked, then release it */
+		if ((buf_flags(bp) & B_LOCKED) == 0) {
+			buf_markinvalid(bp);
+		}
+		buf_brelse(bp);
+	}
+	
 	return (0);
+	
+	
 }
 
+#endif
+
 
 /*
 _______________________________________________________________________
@@ -872,21 +1318,48 @@ static OSErr BlockAllocateContig(
 	u_int32_t		*actualStartBlock,
 	u_int32_t		*actualNumBlocks)
 {
-	OSErr	err;
 
-	//
-	//	Find a contiguous group of blocks at least minBlocks long.
-	//	Determine the number of contiguous blocks available (up
-	//	to maxBlocks).
-	//
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+		return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone, 
+				actualStartBlock, actualNumBlocks, 1);
+	}
+#endif
+	return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks, 
+			maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);	
+}
 
-	/*
-	 * NOTE: If the only contiguous free extent of at least minBlocks
-	 * crosses startingBlock (i.e. starts before, ends after), then we
-	 * won't find it. Earlier versions *did* find this case by letting
-	 * the second search look past startingBlock by minBlocks.  But
-	 * with the free extent cache, this can lead to duplicate entries
-	 * in the cache, causing the same blocks to be allocated twice.
+/*
+ * Variant of BlockAllocateContig that uses the original bitmap-searching logic
+ */
+
+static OSErr BlockAllocateContigBitmap(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		minBlocks,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks)
+{
+	OSErr	err;
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);
+
+	//
+	//	Find a contiguous group of blocks at least minBlocks long.
+	//	Determine the number of contiguous blocks available (up
+	//	to maxBlocks).
+	//
+
+	/*
+	 * NOTE: If the only contiguous free extent of at least minBlocks
+	 * crosses startingBlock (i.e. starts before, ends after), then we
+	 * won't find it. Earlier versions *did* find this case by letting
+	 * the second search look past startingBlock by minBlocks.  But
+	 * with the free extent cache, this can lead to duplicate entries
+	 * in the cache, causing the same blocks to be allocated twice.
 	 */
 	err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks,
 	                          maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
@@ -902,10 +1375,234 @@ static OSErr BlockAllocateContig(
 	//	Now mark those blocks allocated.
 	//
 	if (err == noErr)
-		err = BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
+		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
+	
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
+	return err;
+}
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * Variant of BlockAllocateContig that uses the newer red-black tree library
+ * in order to manage free space extents.  This will search the red-black tree
+ * and return results in the same fashion as BlockAllocateContigBitmap.
+ * 
+ * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
+ * as well as BlockAllocateContig.  In order to determine when we should vend contiguous chunks over
+ * locality-based-searches, we use the forceContig argument to determine who called us.
+ */
+
+static OSErr BlockAllocateContigRBTree(
+						  ExtendedVCB		*vcb,
+						  u_int32_t		startingBlock,
+						  u_int32_t		minBlocks,
+						  u_int32_t		maxBlocks,
+						  Boolean			useMetaZone,
+						  u_int32_t		*actualStartBlock,
+						  u_int32_t		*actualNumBlocks,
+						  u_int32_t 	forceContig)
+{
+	OSErr	err;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+	extent_node_t search_sentinel;
+	extent_node_t *node = NULL;
+	extent_node_t tempnode;
+	
+	bzero (&tempnode, sizeof(extent_node_t));
+	
+	/* Begin search at the end of the file, via startingBlock */
+	memset (&search_sentinel, 0, sizeof(extent_node_t));
+	search_sentinel.offset = startingBlock;
+	
+	*actualStartBlock = 0;
+	*actualNumBlocks = 0;
+	
+	/* 
+	 * Find the first available extent that satifies the allocation by searching
+	 * from the starting point and moving forward
+	 */
+	node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
+	
+	if (node) {
+		*actualStartBlock = node->offset;
+		*actualNumBlocks = node->length;
+	}
+	
+	 /* If we managed to grab at least minBlocks of space, then we're done. */
+
+	if (*actualNumBlocks >= minBlocks) {
+		if (*actualNumBlocks > maxBlocks) {
+			*actualNumBlocks = maxBlocks;
+		}
+		
+		
+		/* Check to see if blocks are already marked as in-use */
+		if (ALLOC_DEBUG) {
+			REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+			if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
+				printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length);
+				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
+					   *actualStartBlock, *actualNumBlocks);
+			}
+		}
+		
+		/*
+		 * BlockMarkAllocatedRBTree is responsible for removing the nodes
+		 * from the red-black tree after the bitmap has been updated on-disk.
+		 */
+		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
+		if (err == noErr) {
+			
+			if ( ALLOC_DEBUG ) {
+				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
+					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
+						   *actualStartBlock, *actualNumBlocks);
+				}
+				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);		
+			}		
+			
+			return err;
+		}
+	}
+	
+	/*
+	 * We may have failed to grow at the end of the file.  We'll try to find 
+	 * appropriate free extents, searching by size in the normal allocation zone.
+	 * 
+	 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
+	 * requested a contiguous chunk, then we try to search by offset, even if it 
+	 * means fragmenting the file.  We want all available entries starting 
+	 * from the front of the disk to avoid creating new bandfiles.  As a result, 
+	 * we'll start by searching the offset tree rather than the normal length 
+	 * tree. Note that this function can be invoked from BlockAllocateAny, in 
+	 * which the minimum block size is 1 block, making it easy to succeed. 
+	 */
+	search_sentinel.offset = hfsmp->hfs_metazone_end;
+	search_sentinel.length = minBlocks;
+	
+	if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) {
+		/* just start with the first offset node */
+		node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);		
+	}
+	else {
+		/* 
+		 * Otherwise, start from the end of the metadata zone or our next allocation pointer, 
+		 * and try to find the first chunk of size >= min.
+		 */
+		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
+		
+		if (node == NULL) {
+			extent_node_t *metaend_node;
+			/* 
+			 * Maybe there's a free extent coalesced with the space still in the metadata 
+			 * zone.  If there is, find it and allocate from the middle of it, starting at
+			 * the end of the metadata zone.
+			 *
+			 * If search_prev yields a result that is not offset == metazone_end, then that
+			 * means no node existed at that offset.  If the previous node's offset + length crosses
+			 * the metazone boundary, then allocate from there.  If it is too small to 
+			 * cross the metazone boundary, then it is of no importance and we'd have to 
+			 * report ENOSPC.
+			 */
+			metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
+			
+			if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) {
+				u_int32_t node_end = metaend_node->offset + metaend_node->length;
+				if (node_end > hfsmp->hfs_metazone_end) {
+					u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end;
+					if (modified_length >= minBlocks) {
+						/* 
+						 * Then we can allocate it.  Fill in the contents into tempnode,
+						 * and BlockMarkAllocatedRBTree below will take care of the rest.
+						 */
+						tempnode.offset = hfsmp->hfs_metazone_end;
+						tempnode.length = MIN(minBlocks, node_end - tempnode.offset);
+						node = &tempnode;
+					}
+				}
+			}
+		}
+	}
+	
+	 /* If we can't find anything useful, search the metadata zone as a last resort. */
+	
+	if ((!node) && useMetaZone) {
+		search_sentinel.offset = 0;
+		search_sentinel.length = minBlocks;
+		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
+	}
+	
+	/* If we found something useful, then go ahead and update the bitmap */
+	if ((node) && (node->length >= minBlocks)) {
+		*actualStartBlock = node->offset;
+		if (node->length >= maxBlocks) {
+			*actualNumBlocks = maxBlocks;
+		}
+		else {
+			*actualNumBlocks = node->length;
+		}
+
+		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
+		
+		if (err == noErr) {
+			if ( ALLOC_DEBUG ) {
+				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
+					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
+						   *actualStartBlock, *actualNumBlocks);
+				}
+				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);		
+			}
+		}
+	}
+	else {
+		int destroy_trees = 0;
+		/*
+		 * TODO: Add High-water mark check here.  If we couldn't find anything useful, 
+		 * when do we tear down the tree?  Or should the logic be in BlockAllocateContig??
+		 */
+		if (destroy_trees) {
+			DestroyTrees(VCBTOHFS(vcb));
+			/* Reset the Free Ext Cache since we'll be using it now. */
+			ResetVCBFreeExtCache(VCBTOHFS(vcb));
+		}
+		
+		if (ALLOC_DEBUG) {
+			printf("HFS allocator: No space on FS (%s). Node  %p Start %d Min %d, Max %d, Tree still alive.\n", 
+				   hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks);
+			
+			/* Dump the list ? */
+			extent_tree_offset_print(&hfsmp->offset_tree);
+			
+			printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n", 
+				   hfsmp->vcbVN, minBlocks, maxBlocks);
+
+
+			
+		}
+		err = dskFulErr;
+	}
+	
+	if (err == noErr) {
+		if (ALLOC_DEBUG) {
+			if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
+				panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
+		}
+	}
+	else {
+		*actualStartBlock = 0;
+		*actualNumBlocks = 0;
+	}
 	
 	return err;
+	
 }
+#endif
+
+
 
 /*
 _______________________________________________________________________
@@ -929,6 +1626,12 @@ Outputs:
 	actualNumBlocks		Number of blocks allocated, or 0 if error
 _______________________________________________________________________
 */
+
+/*
+ * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
+ * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.  
+ */
+
 static OSErr BlockAllocateAny(
 	ExtendedVCB		*vcb,
 	u_int32_t		startingBlock,
@@ -938,6 +1641,60 @@ static OSErr BlockAllocateAny(
 	u_int32_t		*actualStartBlock,
 	u_int32_t		*actualNumBlocks)
 {
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
+		return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
+	}
+#endif
+	return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
+
+}
+
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * BlockAllocateAnyRBTree finds one or more allocation blocks by using
+ * the red-black allocation tree to figure out where the free ranges are.  
+ * This function is typically used as a last resort becuase we were unable to 
+ * find the right ranges.  Outputs are the same as BlockAllocateAnyBitmap.
+ */
+static OSErr BlockAllocateAnyRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks)
+{	
+	OSErr err;
+	
+	/* 
+	 * BlockAllocateContig 
+	 */
+	/* If we're using the red-black tree, try searching at the specified offsets. */
+	err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone, 
+									actualStartBlock, actualNumBlocks, 0);
+	return err;
+	
+}
+#endif
+
+/*
+ * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
+ * where the free allocation blocks are.  Inputs and outputs are the same as for
+ * BlockAllocateAny and BlockAllocateAnyRBTree
+ */
+
+static OSErr BlockAllocateAnyBitmap(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	register u_int32_t	endingBlock,
+	u_int32_t		maxBlocks,
+	Boolean			useMetaZone,
+	u_int32_t		*actualStartBlock,
+	u_int32_t		*actualNumBlocks)
+{
 	OSErr			err;
 	register u_int32_t	block;			//	current block number
 	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
@@ -951,6 +1708,9 @@ static OSErr BlockAllocateAny(
 	Boolean dirty = false;
 	struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);
+
 	/*
 	 * When we're skipping the metadata zone and the start/end
 	 * range overlaps with the metadata zone then adjust the 
@@ -1128,11 +1888,19 @@ Exit:
 		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
 			panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
 		}
-
-		/* Remove these blocks from the TRIM list if applicable */
-		if (CONFIG_HFS_TRIM) {
-			hfs_unmap_alloc_extent(vcb, *actualStartBlock, *actualNumBlocks);
-		}	
+		
+		/*
+		 * Beware!
+		 * Because this function directly manipulates the bitmap to mark the
+		 * blocks it came across as allocated, we must inform the journal (and
+		 * subsequently, the journal's trim list) that we are allocating these 
+		 * blocks, just like in BlockMarkAllocatedInternal.  hfs_unmap_alloc_extent
+		 * and the functions it calls will serialize behind the journal trim list lock
+		 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
+		 * us manipulating the trim list, or we get there first and successfully remove
+		 * these bitmap blocks before the TRIM happens.
+		 */
+		hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks);
 	}
 	else {
 		*actualStartBlock = 0;
@@ -1142,6 +1910,9 @@ Exit:
     if (currCache)
     	(void) ReleaseBitmapBlock(vcb, blockRef, dirty);
 
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
+
 	return err;
 }
 
@@ -1178,15 +1949,25 @@ static OSErr BlockAllocateKnown(
 	u_int32_t		foundBlocks;
 	u_int32_t		newStartBlock, newBlockCount;
 
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);
+
 	HFS_MOUNT_LOCK(vcb, TRUE);
-	if (free_extent_cache_active(vcb) == 0 ||
-	    vcb->vcbFreeExtCnt == 0 || 
+	lck_spin_lock(&vcb->vcbFreeExtLock);
+	if ((hfs_isrbtree_active(vcb) == true) || 
+		vcb->vcbFreeExtCnt == 0 || 
 	    vcb->vcbFreeExt[0].blockCount == 0) {
+		lck_spin_unlock(&vcb->vcbFreeExtLock);
 		HFS_MOUNT_UNLOCK(vcb, TRUE);
+		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
 		return dskFulErr;
 	}
+	lck_spin_unlock(&vcb->vcbFreeExtLock);
 	HFS_MOUNT_UNLOCK(vcb, TRUE);
 
+	lck_spin_lock(&vcb->vcbFreeExtLock);
+
 	//	Just grab up to maxBlocks of the first (largest) free exent.
 	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
 	foundBlocks = vcb->vcbFreeExt[0].blockCount;
@@ -1246,6 +2027,7 @@ static OSErr BlockAllocateKnown(
 	}
 
 done:
+	lck_spin_unlock(&vcb->vcbFreeExtLock);
 	// sanity check
 	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
 	{
@@ -1260,24 +2042,83 @@ done:
 		//
 		//	Now mark the found extent in the bitmap
 		//
-		err = BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks);
+		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
 	}
 
-	sanity_check_free_ext(vcb, 1);
+	sanity_check_free_ext(vcb, 0);
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
 
 	return err;
 }
 
+/*
+ * BlockMarkAllocated
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ */
+
+
+OSErr BlockMarkAllocated(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	register u_int32_t	numBlocks)
+{
+	struct hfsmount *hfsmp;
+	
+	hfsmp = VCBTOHFS(vcb);
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (hfs_isrbtree_active(hfsmp)) {
+		int err;
+		
+		if ((startingBlock >= hfsmp->offset_block_end) && 
+			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
+			/* 
+			 * We're manipulating a portion of the bitmap that is not controlled by the
+			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
+			 */
+			goto justbitmap;
+		}
+		
+		err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks);
+		if (err == noErr) {
+			if ( ALLOC_DEBUG ) {
+				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+				if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
+						   startingBlock, numBlocks);
+				}
+				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC);		
+			}
+		}
+		return err;
+
+	}
+justbitmap:
+#endif
+
+	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
+	
+}
+
 
 
 /*
 _______________________________________________________________________
 
-Routine:	BlockMarkAllocated
+Routine:	BlockMarkAllocatedInternal
 
 Function:	Mark a contiguous group of blocks as allocated (set in the
 			bitmap).  It assumes those bits are currently marked
-			deallocated (clear in the bitmap).
+			deallocated (clear in the bitmap).  Note that this function
+			must be called regardless of whether or not the bitmap or
+			tree-based allocator is used, as all allocations must correctly
+			be marked on-disk.  If the tree-based approach is running, then
+			this will be done before the node is removed from the tree.
 
 Inputs:
 	vcb				Pointer to volume where space is to be allocated
@@ -1285,8 +2126,8 @@ Inputs:
 	numBlocks		Number of blocks to mark as allocated
 _______________________________________________________________________
 */
-__private_extern__
-OSErr BlockMarkAllocated(
+static 
+OSErr BlockMarkAllocatedInternal (
 	ExtendedVCB		*vcb,
 	u_int32_t		startingBlock,
 	register u_int32_t	numBlocks)
@@ -1304,9 +2145,10 @@ OSErr BlockMarkAllocated(
 	// XXXdbg
 	struct hfsmount *hfsmp = VCBTOHFS(vcb);
 
-	if (CONFIG_HFS_TRIM) {
-		hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
-	}
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
+
+	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
 	
 	//
 	//	Pre-read the bitmap block containing the first word of allocation
@@ -1349,7 +2191,7 @@ OSErr BlockMarkAllocated(
 		}
 #if DEBUG_BUILD
 		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-			panic("hfs: BlockMarkAllocated: blocks already allocated!");
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
 		}
 #endif
 		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
@@ -1387,7 +2229,7 @@ OSErr BlockMarkAllocated(
 		}
 #if DEBUG_BUILD
 		if (*currentWord != 0) {
-			panic("hfs: BlockMarkAllocated: blocks already allocated!");
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
 		}
 #endif
 		*currentWord = SWAP_BE32 (bitMask);
@@ -1425,7 +2267,7 @@ OSErr BlockMarkAllocated(
 		}
 #if DEBUG_BUILD
 		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-			panic("hfs: BlockMarkAllocated: blocks already allocated!");
+			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
 		}
 #endif
 		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap
@@ -1438,73 +2280,322 @@ Exit:
 	if (buffer)
 		(void)ReleaseBitmapBlock(vcb, blockRef, true);
 
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
+
 	return err;
 }
 
-
+#if CONFIG_HFS_ALLOC_RBTREE
 /*
-_______________________________________________________________________
-
-Routine:	BlockMarkFree
-
-Function:	Mark a contiguous group of blocks as free (clear in the
-			bitmap).  It assumes those bits are currently marked
-			allocated (set in the bitmap).
+ * This is a wrapper function around BlockMarkAllocated.  This function is
+ * called when the RB Tree-based allocator needs to mark a block as in-use.
+ * This function should take the locks that would not normally be 
+ * necessary for the normal bitmap allocator, and then call the function.  Once 
+ * the on-disk data structures are updated properly, then this will remove the 
+ * appropriate node from the tree.
+ */
 
-Inputs:
-	vcb				Pointer to volume where space is to be freed
-	startingBlock	First block number to mark as freed
-	numBlocks		Number of blocks to mark as freed
-_______________________________________________________________________
-*/
-__private_extern__
-OSErr BlockMarkFree(
+static OSErr BlockMarkAllocatedRBTree(
 	ExtendedVCB		*vcb,
-	u_int32_t		startingBlock_in,
-	register u_int32_t	numBlocks_in)
+	u_int32_t		startingBlock,
+	u_int32_t		numBlocks)
 {
-	OSErr			err;
-	u_int32_t	startingBlock = startingBlock_in;
-	u_int32_t	numBlocks = numBlocks_in;
-	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
-	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
-	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
-	u_int32_t			firstBit;		//	Bit index within word of first bit to allocate
-	u_int32_t			numBits;		//	Number of bits in word to allocate
-	u_int32_t			*buffer = NULL;
-	uintptr_t  blockRef;
-	u_int32_t  bitsPerBlock;
-	u_int32_t  wordsPerBlock;
-    // XXXdbg
-	struct hfsmount *hfsmp = VCBTOHFS(vcb);
-
-	/*
-	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
-	 * need to be able to free blocks being relocated during hfs_truncatefs.
-	 */
-	if (startingBlock + numBlocks > vcb->totalBlocks) {
-		printf ("hfs: BlockMarkFree() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
-		hfs_mark_volume_inconsistent(vcb);
-		err = EIO;
-		goto Exit;
-	}
-
-	//
-	//	Pre-read the bitmap block containing the first word of allocation
-	//
+	OSErr err;
+	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
+	int rb_err = 0;
 
-	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
-	if (err != noErr) goto Exit;
-	// XXXdbg
-	if (hfsmp->jnl) {
-		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+	
+	if (ALLOC_DEBUG) {
+		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+			panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
+				   startingBlock, numBlocks);
+		}
+		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);		
 	}
+	
+	err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks);
+	
+	if (err == noErr) {
 
-	//
-	//	Initialize currentWord, and wordsLeft.
-	//
-	{
-		u_int32_t wordIndexInBlock;
+		if (ALLOC_DEBUG) {
+			if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
+					   startingBlock, numBlocks);
+			}
+		}
+		
+		/*
+		 * Mark the blocks in the offset tree.
+		 */
+		rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock);
+		if (rb_err) {
+			if (ALLOC_DEBUG) {
+				printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err);
+			}
+			
+			/* 
+			 * We may be called from the BlockMarkAllocated interface, in which case, they would
+			 * not be picking extents from their start. Do a check here, find if the specified
+			 * extent is free, and if it is, then find the containing node.
+			 */
+			extent_node_t *node = NULL;
+			extent_node_t search_sentinel;
+			search_sentinel.offset = startingBlock;
+			
+			node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
+			
+			if (node) {
+				rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock);
+			}
+			
+			if (ALLOC_DEBUG) {
+				if (rb_err) {
+					printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err);
+				}
+			}
+		}
+		if (ALLOC_DEBUG) {
+			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);		
+		}
+	}
+	
+	/* 
+	 * If we encountered a red-black tree error, for now, we immediately back off and force
+	 * destruction of rb-tree.  Set the persistent error-detected bit in the mount point.
+	 * That will ensure that even if we reach a low-water-mark in the future we will still
+	 * not allow the rb-tree to be used.  On next mount, we will force a re-construction from
+	 * on-disk state.  As a fallback, we will now resort to the bitmap-scanning behavior.
+	 */
+	if (rb_err) {
+		/* Mark RB-Trees with error */
+		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
+		DestroyTrees(hfsmp);
+		/* Reset the Free Ext Cache since we'll be using it now. */
+		ResetVCBFreeExtCache(hfsmp);
+		printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
+	}
+	
+	return err;
+}
+#endif
+
+
+
+/*
+ * BlockMarkFree
+ * 
+ * This is a wrapper function around the internal calls which will actually mark the blocks
+ * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
+ * this logic here to avoid callers having to deal with whether or not the red-black tree
+ * is enabled.
+ *
+ */
+OSErr BlockMarkFree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	register u_int32_t	numBlocks)
+{
+	struct hfsmount *hfsmp;
+	hfsmp = VCBTOHFS(vcb);
+#if CONFIG_HFS_ALLOC_RBTREE		
+	if (hfs_isrbtree_active(hfsmp)) {		
+		int err;
+		
+		if ((startingBlock >= hfsmp->offset_block_end) && 
+			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
+			/* 
+			 * We're manipulating a portion of the bitmap that is not controlled by the
+			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
+			 */
+			goto justbitmap;
+		}
+		
+		err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks);
+		if (err == noErr) {
+			if ( ALLOC_DEBUG ) {
+				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+				if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
+						   startingBlock, numBlocks);
+				}
+				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE);		
+			}
+		}
+		return err;
+	}
+justbitmap:
+#endif
+	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
+	
+}
+
+
+/*
+ * BlockMarkFreeUnused
+ * 
+ * Scan the bitmap block beyond end of current file system for bits 
+ * that are marked as used.  If any of the bits are marked as used,
+ * this function marks them free.
+ *
+ * Note:  This was specifically written to mark all bits beyond 
+ * end of current file system during hfs_extendfs(), which makes
+ * sure that all the new blocks added to the file system are 
+ * marked as free.   We expect that all the blocks beyond end of
+ * current file system are always marked as free, but there might 
+ * be cases where are marked as used.  This function assumes that 
+ * the number of blocks marked as used incorrectly are relatively
+ * small, otherwise this can overflow journal transaction size
+ * on certain file system configurations (example, large unused 
+ * bitmap with relatively small journal). 
+ *
+ * Input:
+ * 	startingBlock: First block of the range to mark unused
+ * 	numBlocks: Number of blocks in the range to mark unused
+ *
+ * Returns: zero on success, non-zero on error.
+ */
+OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
+{
+	int error = 0;
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+	u_int32_t curNumBlocks;
+	u_int32_t  bitsPerBlock;
+	u_int32_t lastBit;
+
+	/* Use the optimal bitmap I/O size instead of bitmap block size */
+	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+
+	/* 
+	 * First clear any non bitmap allocation block aligned bits
+	 *
+	 * Calculate the first bit in the bitmap block next to 
+	 * the bitmap block containing the bit for startingBlock.
+	 * Using this value, we calculate the total number of 
+	 * bits to be marked unused from startingBlock to the 
+	 * end of bitmap block containing startingBlock. 
+	 */
+	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
+	curNumBlocks = lastBit - startingBlock;
+	if (curNumBlocks > numBlocks) {
+		curNumBlocks = numBlocks;
+	}
+	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+	if (error) {
+		return error;
+	}
+	startingBlock += curNumBlocks;
+	numBlocks -= curNumBlocks;
+
+	/* 
+	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
+	 * mark all the bits only in that bitmap block as free.  This ensures
+	 * that we do not write unmodified bitmap blocks and do not 
+	 * overwhelm the journal. 
+	 *
+	 * The code starts by checking full bitmap block at a time, and 
+	 * marks entire bitmap block as free only if any bit in that bitmap 
+	 * block is marked as used.  In the end, it handles the last bitmap 
+	 * block which might be partially full by only checking till the 
+	 * caller-specified last bit and if any bit is set, only mark that 
+	 * range as free.
+	 */
+	while (numBlocks) {
+		if (numBlocks >= bitsPerBlock) {
+			curNumBlocks = bitsPerBlock;
+		} else {
+			curNumBlocks = numBlocks;
+		}
+		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
+			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
+			if (error) {
+				return error;
+			}
+		}
+		startingBlock += curNumBlocks;
+		numBlocks -= curNumBlocks;
+	}
+
+	return error;
+}
+
+/*
+_______________________________________________________________________
+
+Routine:	BlockMarkFreeInternal
+
+Function:	Mark a contiguous group of blocks as free (clear in the
+			bitmap).  It assumes those bits are currently marked
+			allocated (set in the bitmap).
+
+Inputs:
+	vcb				Pointer to volume where space is to be freed
+	startingBlock	First block number to mark as freed
+	numBlocks		Number of blocks to mark as freed
+	do_validate 	If true, validate that the blocks being 
+					deallocated to check if they are within totalBlocks
+					for current volume and whether they were allocated
+					before they are marked free.
+_______________________________________________________________________
+*/
+static 
+OSErr BlockMarkFreeInternal(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock_in,
+	register u_int32_t	numBlocks_in,
+	Boolean 		do_validate)
+{
+	OSErr			err;
+	u_int32_t	startingBlock = startingBlock_in;
+	u_int32_t	numBlocks = numBlocks_in;
+	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
+	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
+	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
+	u_int32_t			firstBit;		//	Bit index within word of first bit to allocate
+	u_int32_t			numBits;		//	Number of bits in word to allocate
+	u_int32_t			*buffer = NULL;
+	uintptr_t  blockRef;
+	u_int32_t  bitsPerBlock;
+	u_int32_t  wordsPerBlock;
+    // XXXdbg
+	struct hfsmount *hfsmp = VCBTOHFS(vcb);
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);
+
+	/*
+	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
+	 * need to be able to free blocks being relocated during hfs_truncatefs.
+	 */
+	if ((do_validate == true) && 
+	    (startingBlock + numBlocks > vcb->totalBlocks)) {
+		if (ALLOC_DEBUG) {
+			panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
+		}
+		
+		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
+		hfs_mark_volume_inconsistent(vcb);
+		err = EIO;
+		goto Exit;
+	}
+
+	//
+	//	Pre-read the bitmap block containing the first word of allocation
+	//
+
+	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
+	if (err != noErr) goto Exit;
+	// XXXdbg
+	if (hfsmp->jnl) {
+		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
+	}
+
+	//
+	//	Initialize currentWord, and wordsLeft.
+	//
+	{
+		u_int32_t wordIndexInBlock;
 		
 		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
 		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;
@@ -1528,7 +2619,8 @@ OSErr BlockMarkFree(
 			numBits = numBlocks;					//	entire allocation is inside this one word
 			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
 		}
-		if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+		if ((do_validate == true) && 
+		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
 			goto Corruption;
 		}
 		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
@@ -1563,7 +2655,8 @@ OSErr BlockMarkFree(
 			currentWord = buffer;
 			wordsLeft = wordsPerBlock;
 		}
-		if (*currentWord != SWAP_BE32 (kAllBitsSetInWord)) {
+		if ((do_validate == true) && 
+		    (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
 			goto Corruption;
 		}
 		*currentWord = 0;							//	clear the entire word
@@ -1599,7 +2692,8 @@ OSErr BlockMarkFree(
 			currentWord = buffer;
 			wordsLeft = wordsPerBlock;
 		}
-		if ((*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
+		if ((do_validate == true) && 
+		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
 			goto Corruption;
 		}
 		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap
@@ -1612,24 +2706,128 @@ Exit:
 	if (buffer)
 		(void)ReleaseBitmapBlock(vcb, blockRef, true);
 
-	if (CONFIG_HFS_TRIM && err == noErr) {
+	if (err == noErr) {
 		hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in);
 	}
 
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);
 
 	return err;
 
 Corruption:
 #if DEBUG_BUILD
-	panic("hfs: BlockMarkFree: blocks not allocated!");
+	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
 #else
-	printf ("hfs: BlockMarkFree() trying to free unallocated blocks on volume %s\n", vcb->vcbVN);
+	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks (%u,%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
 	hfs_mark_volume_inconsistent(vcb);
 	err = EIO;
 	goto Exit;
 #endif
 }
 
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * This is a wrapper function around BlockMarkFree.  This function is
+ * called when the RB Tree-based allocator needs to mark a block as no longer
+ * in use. This function should take the locks that would not normally be 
+ * necessary for the normal bitmap deallocator, and then call the function.  Once 
+ * the on-disk data structures are updated properly, then this will update an
+ * existing rb-tree node if possible, or else create a new one. 
+ */
+
+OSErr BlockMarkFreeRBTree(
+	ExtendedVCB		*vcb,
+	u_int32_t		startingBlock,
+	register u_int32_t	numBlocks)
+{
+	OSErr err;
+	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
+	int rb_err = 0;
+	
+	if (ALLOC_DEBUG) {
+		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
+		if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+			panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
+				   startingBlock, numBlocks);
+		}
+		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);		
+	}	
+	
+	err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
+	
+	if (err == noErr) {
+		
+		/*
+		 * During a filesystem truncation, we may need to relocate files out of the
+		 * portion of the bitmap that is no longer controlled by the r/b tree. 
+		 * In this case, just update the bitmap and do not attempt to manipulate the tree.
+		 */
+		if ((startingBlock >= hfsmp->offset_block_end) && 
+			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
+			goto free_error;
+		}
+		
+		extent_node_t *newnode;
+		
+		if (ALLOC_DEBUG) {
+			/* 
+			 * Validate that the blocks in question are not allocated in the bitmap, and that they're
+			 * not in the offset tree, since it should be tracking free extents, rather than allocated 
+			 * extents
+			 */
+			if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
+				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
+					   startingBlock, numBlocks);
+			}
+		}		
+		
+		if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) {
+			if (startingBlock >= hfsmp->offset_block_end) {
+				/*
+				 * If the tree generation code has not yet finished scanning the
+				 * bitmap region containing this extent, do nothing.  If the start 
+				 * of the range to be deallocated is greater than the current high 
+				 * watermark on the offset tree, just bail out and let the scanner catch up with us. 
+				 */							
+				rb_err = 0;
+				goto free_error;
+			}
+		}
+		
+		newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock);
+		if (newnode == NULL) {
+			rb_err = 1;
+			goto free_error;
+		}
+		
+		if (ALLOC_DEBUG) {
+			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
+		}
+		
+	}
+	
+free_error:
+	/* 
+	 * We follow the same principle as in BlockMarkAllocatedRB. 
+	 * If we encounter an error in adding the extents to the rb-tree, then immediately
+	 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
+	 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
+	 */
+	if (rb_err) {
+		/* Mark RB-Trees with error */
+		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
+		DestroyTrees(hfsmp);
+		/* Reset the Free Ext Cache since we'll be using it now. */
+		ResetVCBFreeExtCache(hfsmp);
+		printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
+	}
+	
+	
+	return err;
+	
+}
+#endif
 
 /*
 _______________________________________________________________________
@@ -1639,6 +2837,9 @@ Routine:	BlockFindContiguous
 Function:	Find a contiguous range of blocks that are free (bits
 			clear in the bitmap).  If a contiguous range of the
 			minimum size can't be found, an error will be returned.
+			This is only needed to support the bitmap-scanning logic,
+			as the red-black tree should be able to do this by internally
+			searching its tree.
 
 Inputs:
 	vcb				Pointer to volume where space is to be allocated
@@ -1680,7 +2881,10 @@ static OSErr BlockFindContiguous(
 	register u_int32_t	tempWord;
 	uintptr_t  blockRef;
 	u_int32_t  wordsPerBlock;
-	u_int32_t  j, updated_free_extents = 0, really_add;
+	u_int32_t  updated_free_extent = 0;
+
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);
 
 	/*
 	 * When we're skipping the metadata zone and the start/end
@@ -1916,79 +3120,10 @@ FoundUsed:
 		if (foundBlocks >= minBlocks)
 			break;		//	Found what we needed!
 
-		HFS_MOUNT_LOCK(vcb, TRUE);
-		if (free_extent_cache_active(vcb) == 0) {
-			HFS_MOUNT_UNLOCK(vcb, TRUE);
-			goto skip_cache;
-		}
-		HFS_MOUNT_UNLOCK(vcb, TRUE);
-
-		//	This free chunk wasn't big enough.  Try inserting it into the free extent cache in case
-		//	the allocation wasn't forced contiguous.
-		really_add = 0;
-		for(j=0; j < vcb->vcbFreeExtCnt; j++) {
-			u_int32_t start, end;
-
-			start = vcb->vcbFreeExt[j].startBlock;
-			end   = start + vcb->vcbFreeExt[j].blockCount;
-
-			if (   (firstBlock >= start && firstBlock < end)
-			    || ((firstBlock + foundBlocks) > start && firstBlock < start)) {
-
-				// there's overlap with an existing entry so do not add this
-				break;
-			}
-			
-		}
-
-		if (j >= vcb->vcbFreeExtCnt) {
-			really_add = 1;
-		}
-
-		tempWord = vcb->vcbFreeExtCnt;
-		if (really_add && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
-			// Sorted by starting block
-			if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].startBlock > firstBlock)
-				--tempWord;
-			if (tempWord < kMaxFreeExtents)
-			{
-				//	We're going to add this extent.  Bubble any smaller extents down in the list.
-				while (tempWord && vcb->vcbFreeExt[tempWord-1].startBlock > firstBlock)
-				{
-					vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
-					--tempWord;
-				}
-				vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
-				vcb->vcbFreeExt[tempWord].blockCount = foundBlocks;
-			
-				if (vcb->vcbFreeExtCnt < kMaxFreeExtents) {
-					++vcb->vcbFreeExtCnt;
-				}
-				updated_free_extents = 1;
-			}
-		} else if (really_add) {
-			// Sorted by blockCount
-			if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < foundBlocks)
-				--tempWord;
-			if (tempWord < kMaxFreeExtents)
-			{
-				//	We're going to add this extent.  Bubble any smaller extents down in the list.
-				while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < foundBlocks)
-				{
-					vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1];
-					--tempWord;
-				}
-				vcb->vcbFreeExt[tempWord].startBlock = firstBlock;
-				vcb->vcbFreeExt[tempWord].blockCount = foundBlocks;
-			
-				if (vcb->vcbFreeExtCnt < kMaxFreeExtents) {
-					++vcb->vcbFreeExtCnt;
-				}
-				updated_free_extents = 1;
-			}
-		}
-skip_cache:
-		sanity_check_free_ext(vcb, 0);
+		/* We did not find the total blocks were were looking for, but 
+		 * lets add this free block run to our free extent cache list
+		 */
+		updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);
 
 	} while (currentBlock < stopBlock);
 LoopExit:
@@ -2017,17 +3152,19 @@ ErrorExit:
 		}
 	}
 	
-	if (updated_free_extents && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
+	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
 		int i;
 		u_int32_t min_start = vcb->totalBlocks;
 			
 		// set the nextAllocation pointer to the smallest free block number
 		// we've seen so on the next mount we won't rescan unnecessarily
+		lck_spin_lock(&vcb->vcbFreeExtLock);
 		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
 			if (vcb->vcbFreeExt[i].startBlock < min_start) {
 				min_start = vcb->vcbFreeExt[i].startBlock;
 			}
 		}
+		lck_spin_unlock(&vcb->vcbFreeExtLock);
 		if (min_start != vcb->totalBlocks) {
 			if (min_start < vcb->nextAllocation) {
 				vcb->nextAllocation = min_start;
@@ -2041,71 +3178,296 @@ ErrorExit:
 	if (buffer)
 		(void) ReleaseBitmapBlock(vcb, blockRef, false);
 
-	sanity_check_free_ext(vcb, 1);
+	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);
 
 	return err;
 }
 
+
+#if CONFIG_HFS_ALLOC_RBTREE
 /*
- * Test to see if any blocks in a range are allocated.
+ * Wrapper function around hfs_isrbtree_allocated.  This just takes the start offset,
+ * and the number of blocks, and whether or not we should check if the blocks are
+ * free or not.  This function is designed to be used primarily with the debug #ifdef
+ * enabled, so it results in a panic if anything unexpected occurs.
  *
- * The journal or allocation file lock must be held.
+ * shouldBeFree will be nonzero if the caller expects the zone to be free.
  */
-__private_extern__
-int 
-hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
-{
-	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
-	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
-	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
-	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
-	u_int32_t  numBits;        // Number of bits in word to allocate
-	u_int32_t  *buffer = NULL;
-	uintptr_t  blockRef;
-	u_int32_t  bitsPerBlock;
-	u_int32_t  wordsPerBlock;
-	int  inuse = 0;
-	int  error;
-
-	/*
-	 * Pre-read the bitmap block containing the first word of allocation
-	 */
-	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
-	if (error)
-		return (error);
-
-	/*
-	 * Initialize currentWord, and wordsLeft.
-	 */
-	{
-		u_int32_t wordIndexInBlock;
-		
-		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
-		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
 
-		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
-		currentWord = buffer + wordIndexInBlock;
-		wordsLeft = wordsPerBlock - wordIndexInBlock;
+void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks,
+								 u_int32_t numBlocks, int shouldBeFree) {
+	int alloc;
+	extent_node_t *node1 = NULL;
+	u_int32_t off1 = 0;
+	u_int32_t len1 = 0;
+	alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1);
+	
+	if (node1) {
+		off1 = node1->offset;
+		len1 = node1->length;
 	}
 	
-	/*
-	 * First test any non word aligned bits.
-	 */
-	firstBit = startingBlock % kBitsPerWord;
-	if (firstBit != 0) {
-		bitMask = kAllBitsSetInWord >> firstBit;
-		numBits = kBitsPerWord - firstBit;
-		if (numBits > numBlocks) {
-			numBits = numBlocks;
-			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
-		}
-		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-			inuse = 1;
-			goto Exit;
+	if (shouldBeFree) {
+		/* 
+		 * If the region should be free, then we expect to see extents in the tree
+		 * matching this start and length.  Alloc != 0 means some portion of the extent
+		 * specified was allocated. 
+		 */ 
+		if (alloc != 0){
+			panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
+				   "node1 off (%d),len(%d),, start(%d) end(%d)\n",
+				   node1, off1, len1, startBlocks, numBlocks);
 		}
-		numBlocks -= numBits;
-		++currentWord;
-		--wordsLeft;
+	}
+	else {
+		/* 
+		 * Otherwise, this means that the region should be allocated, and if we find
+		 * an extent matching it, that's bad.
+		 */
+		if (alloc == 0){
+			panic ("HFS check_rbtree_extents: Node (%p) exists! "
+				   "node1 off (%d),len(%d), start(%d) end(%d)\n",
+				   node1, off1, len1, startBlocks, numBlocks);
+		}
+	}
+}
+#endif
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * Exhaustive validation search.  This function iterates over all allocation blocks and 
+ * compares their status in the red-black tree vs. the allocation bitmap.  If the two are out of sync
+ * then it will panic.  Bitmap lock must be held while this function is run.
+ *
+ * Because this function requires a red-black tree search to validate every allocation block, it is
+ * very expensive and should ONLY be run in debug mode, and even then, infrequently. 
+ * 
+ * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
+ * 
+ */
+void
+hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){
+	
+	u_int32_t current;
+	extent_node_t* node1;
+	
+	hfs_checktreelinks (hfsmp);
+	
+	for (current = start; current < end; current++) {
+		node1 = NULL;
+		int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1);
+		int bitmap = hfs_isallocated(hfsmp, current, 1);
+		
+		if (bitmap != rbtree){
+			panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n", 
+				  current, bitmap, rbtree);
+		}
+	}
+}
+
+/*
+ * Exhaustive Red-Black Tree Linked List verification routine.  
+ *
+ * This function iterates through the red-black tree's nodes, and then verifies that the linked list
+ * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
+ * The bitmap lock must be held while this function is run.
+ */
+
+void 
+hfs_checktreelinks (struct hfsmount *hfsmp) {
+	extent_tree_offset_t *tree = &hfsmp->offset_tree;
+	
+	extent_node_t *current = NULL;
+	extent_node_t *next = NULL;
+	extent_node_t *treenext;
+	
+	current = extent_tree_off_first (tree);
+	
+	while (current) {
+		next = current->offset_next;
+		treenext = extent_tree_off_next (tree, current);
+		if (next != treenext) {
+			panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext);
+		}
+		current = treenext;
+	}
+}
+
+#endif
+
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * Test to see if any free blocks exist at a given offset.
+ * If there exists a node at the specified offset, it will return the appropriate
+ * node.
+ *
+ * NULL indicates allocated blocks exist at that offset. 
+ * 
+ * Allocation file lock must be held.
+ *
+ * Returns:
+ *	1 if blocks in the range are allocated.
+ *	0 if all blocks in the range are free.
+ */
+
+static int
+hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock, 
+						u_int32_t numBlocks, extent_node_t **ret_node) {
+	
+	extent_node_t search_sentinel;
+	extent_node_t *node = NULL;
+	extent_node_t *nextnode = NULL;
+	
+	/*
+	 * With only one tree, then we just have to validate that there are entries 
+	 * in the R/B tree at the specified offset if it really is free.
+	 */
+	search_sentinel.offset = startBlock;
+	search_sentinel.length = numBlocks;
+	
+	node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
+	if (node) {
+
+		*ret_node = node;
+		nextnode = extent_tree_off_next (&hfsmp->offset_tree, node);
+		if (nextnode != node->offset_next) {
+			panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
+		}
+				
+		/* 
+		 * Check to see if it is a superset of our target range. Because we started
+		 * with the offset or some offset prior to it, then we know the node's offset is 
+		 * at least <= startBlock.  So, if the end of the node is greater than the end of
+		 * our target range, then the whole range is free.
+		 */ 
+	
+		if ((node->offset + node->length) >= (startBlock + numBlocks)) {
+			if (node->offset > startBlock) {
+				panic ("hfs_rbtree_isallocated: bad node ordering!");
+			}	
+			return 0;
+		}
+	}	
+	/* 
+	 * We got here if either our node search resulted in a node whose extent 
+	 * was strictly before our target offset, or we couldnt' find a previous node
+	 * at all (the beginning of the volume).  If the former, then we can infer that 
+	 * at least one block in the target range is allocated since the next node's offset
+	 * must be greater than startBlock.
+	 *
+	 * Either way, this means that the target node is unavailable to allocate, so
+	 * just return 1;
+	 */	
+	return 1;
+}
+
+
+#endif
+
+/* 
+ * Count number of bits set in the given 32-bit unsigned number 
+ *
+ * Returns:
+ * 	Number of bits set
+ */
+static int num_bits_set(u_int32_t num) 
+{
+	int count;
+
+	for (count = 0; num; count++) {
+		num &= num - 1;
+	}
+
+	return count;
+}
+
+/* 
+ * For a given range of blocks, find the total number of blocks 
+ * allocated.  If 'stop_on_first' is true, it stops as soon as it 
+ * encounters the first allocated block.  This option is useful 
+ * to determine if any block is allocated or not. 
+ *
+ * Inputs:
+ * 	startingBlock	First allocation block number of the range to be scanned.
+ * 	numBlocks	Total number of blocks that need to be scanned.
+ * 	stop_on_first	Stop the search after the first allocated block is found.
+ *
+ * Output:
+ * 	allocCount	Total number of allocation blocks allocated in the given range.
+ *
+ * 			On error, it is the number of allocated blocks found 
+ * 			before the function got an error. 
+ *
+ * 			If 'stop_on_first' is set, 
+ * 				allocCount = 1 if any allocated block was found.
+ * 				allocCount = 0 if no allocated block was found.
+ *
+ * Returns:
+ * 	0 on success, non-zero on failure. 
+ */
+static int 
+hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, 
+		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
+{
+	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
+	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+	u_int32_t  numBits;        // Number of bits in word to allocate
+	u_int32_t  *buffer = NULL;
+	uintptr_t  blockRef;
+	u_int32_t  bitsPerBlock;
+	u_int32_t  wordsPerBlock;
+ 	u_int32_t  blockCount = 0;
+	int  error;
+
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);
+
+	/*
+	 * Pre-read the bitmap block containing the first word of allocation
+	 */
+	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+	if (error)
+		goto JustReturn;
+
+	/*
+	 * Initialize currentWord, and wordsLeft.
+	 */
+	{
+		u_int32_t wordIndexInBlock;
+		
+		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+
+		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+		currentWord = buffer + wordIndexInBlock;
+		wordsLeft = wordsPerBlock - wordIndexInBlock;
+	}
+	
+	/*
+	 * First test any non word aligned bits.
+	 */
+	firstBit = startingBlock % kBitsPerWord;
+	if (firstBit != 0) {
+		bitMask = kAllBitsSetInWord >> firstBit;
+		numBits = kBitsPerWord - firstBit;
+		if (numBits > numBlocks) {
+			numBits = numBlocks;
+			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+		}
+		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			}
+			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
+		}
+		numBlocks -= numBits;
+		++currentWord;
+		--wordsLeft;
 	}
 
 	/*
@@ -2128,8 +3490,11 @@ hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBl
 			wordsLeft = wordsPerBlock;
 		}
 		if (*currentWord != 0) {
-			inuse = 1;
-			goto Exit;
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			} 
+			blockCount += num_bits_set(*currentWord);
 		}
 		numBlocks -= kBitsPerWord;
 		++currentWord;
@@ -2156,54 +3521,922 @@ hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBl
 			wordsLeft = wordsPerBlock;
 		}
 		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
-			inuse = 1;
-			goto Exit;
+			if (stop_on_first) {
+				blockCount = 1;
+				goto Exit;
+			}
+			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
 		}
 	}
 Exit:
 	if (buffer) {
 		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
 	}
+	if (allocCount) {
+		*allocCount = blockCount;
+	}
+
+JustReturn:
+	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);
+
+	return (error);
+}
+
+/* 
+ * Count total number of blocks that are allocated in the given 
+ * range from the bitmap.  This is used to preflight total blocks 
+ * that need to be relocated during volume resize.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns:
+ * 	0 on success, non-zero on failure.  
+ * 	On failure, allocCount is zero. 
+ */
+int
+hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
+		u_int32_t numBlocks, u_int32_t *allocCount)
+{
+	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
+}
+
+/*
+ * Test to see if any blocks in a range are allocated.
+ * 
+ * Note:  On error, this function returns 1, which means that 
+ * one or more blocks in the range are allocated.  This function 
+ * is primarily used for volume resize and we do not want 
+ * to report to the caller that the blocks are free when we 
+ * were not able to deterministically find it out.  So on error, 
+ * we always report that the blocks are allocated.  
+ *
+ * The journal or allocation file lock must be held.
+ *
+ * Returns 
+ *	0 if all blocks in the range are free.
+ *	1 if blocks in the range are allocated, or there was an error.
+ */
+int 
+hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
+{
+	int error; 
+	u_int32_t allocCount;
+
+	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
+	if (error) {
+		/* On error, we always say that the blocks are allocated 
+		 * so that volume resize does not return false success.
+		 */
+		return 1;
+	} else {
+		/* The function was deterministically able to find out 
+		 * if there was any block allocated or not.  In that case,
+		 * the value in allocCount is good enough to be returned 
+		 * back to the caller.
+		 */
+		return allocCount;
+	}
+} 
+
+/*
+ * Check to see if the red-black tree is live.  Allocation file lock must be held
+ * shared or exclusive to call this function. Note that we may call this even if
+ * HFS is built without activating the red-black tree code.
+ */
+__private_extern__
+int 
+hfs_isrbtree_active(struct hfsmount *hfsmp){
+	
+	//TODO: Update this function to deal with a truncate/resize coming in when the tree
+	//isn't fully finished.  maybe we need to check the flags for something other than ENABLED?
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (ALLOC_DEBUG) {
+		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+	}
+	if (hfsmp){
+		
+		if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
+			return 1;
+		}
+	}
+#else
+	#pragma unused (hfsmp)
+#endif
+	/* If the RB Tree code is not enabled, then just always return 0 */
+	return 0;
+}
+
+#if CONFIG_HFS_ALLOC_RBTREE
+/*
+ * This function is basically the same as hfs_isallocated, except it's designed for 
+ * use with the red-black tree validation code.  It assumes we're only checking whether
+ * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
+ * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
+ *
+ * This should not be called in general purpose scanning code.
+ */
+int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
+	
+	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
+	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
+	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
+	u_int32_t  numBits;        // Number of bits in word to allocate
+	u_int32_t  bitsPerBlock;
+	uintptr_t  blockRef;
+	u_int32_t  wordsPerBlock;
+	u_int32_t  numBlocks = 1;
+	u_int32_t  *buffer = NULL;
+
+	int  inuse = 0;
+	int error;
+	
+	
+	if (bp_buf) {
+		/* just use passed-in buffer if avail. */
+		buffer = bp_buf;
+	}
+	else {
+		/*
+		 * Pre-read the bitmap block containing the first word of allocation
+		 */
+		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
+		if (error)
+			return (error);
+	}
+	
+	/*
+	 * Initialize currentWord, and wordsLeft.
+	 */
+	u_int32_t wordIndexInBlock;
+	
+	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
+	wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
+	
+	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
+	currentWord = buffer + wordIndexInBlock;
+		
+	/*
+	 * First test any non word aligned bits.
+	 */
+	firstBit = startingBlock % kBitsPerWord;
+	bitMask = kAllBitsSetInWord >> firstBit;
+	numBits = kBitsPerWord - firstBit;
+	if (numBits > numBlocks) {
+		numBits = numBlocks;
+		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
+	}
+	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
+		inuse = 1;
+		goto Exit;
+	}
+	numBlocks -= numBits;
+	++currentWord;
+	
+Exit:
+	if(bp_buf == NULL) {
+		if (buffer) {
+			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
+		}
+	}
 	return (inuse);
+	
+	
+	
+}
+
+/* 
+ * This function scans the specified block and adds it to the pair of trees specified 
+ * in its arguments.  We break this behavior out of GenerateTree so that an allocating
+ * thread can invoke this if the tree does not have enough extents to satisfy 
+ * an allocation request.
+ * 
+ * startbit		- the allocation block represented by a bit in 'allocblock' where we need to
+ *				start our scan.  For instance, we may need to start the normal allocation scan
+ *				in the middle of an existing allocation block.
+ * endBit		- the allocation block where we should end this search (inclusive).
+ * bitToScan	- output argument for this function to specify the next bit to scan.
+ *
+ * Returns:
+ *		0 on success
+ *		nonzero on failure. 
+ */
+
+static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit, 
+						 u_int32_t endBit, u_int32_t *bitToScan) {
+
+	int error;
+	u_int32_t curAllocBlock;
+	struct buf *blockRef = NULL;
+	u_int32_t *buffer = NULL;
+	u_int32_t wordIndexInBlock;
+	u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize;
+	u_int32_t wordsPerBlock = blockSize / kBytesPerWord; 
+	u_int32_t offset = 0;
+	u_int32_t size = 0;
+
+	/* 
+	 * Read the appropriate block from the bitmap file.  ReadBitmapBlock
+	 * figures out which actual on-disk block corresponds to the bit we're 
+	 * looking at.
+	 */	
+	error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef);
+	if (error) {
+		return error;
+	}
+	
+	/* curAllocBlock represents the logical block we're analyzing. */
+	curAllocBlock = startbit;	
+
+	/*  Figure out which word curAllocBlock corresponds to in the block we read  */
+	wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
+	
+	/* Scan a word at a time */
+	while (wordIndexInBlock < wordsPerBlock) {
+		u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]);
+		u_int32_t curBit;
+		
+		/* modulate curBit because it may start in the middle of a word */
+		for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) {
+			
+			u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit));
+			if (ALLOC_DEBUG) {
+				u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer); 
+				if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) {
+					panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x)  res(0x%x) \n",
+						  curAllocBlock, curBit, currentWord, is_allocated, res);
+				}
+			}
+			/* 
+			 * If curBit is not allocated, keep track of the start of the free range.
+			 * Increment a running tally on how many free blocks in a row we've seen.
+			 */
+			if (!is_allocated) {
+				size++;
+				if (offset == 0) {
+					offset = curAllocBlock;
+				}
+			}
+			else {
+				/* 
+				 * If we hit an allocated block, insert the extent that tracked the range
+				 * we saw, and reset our tally counter.
+				 */
+				if (size != 0) {
+					extent_tree_free_space(&hfsmp->offset_tree, size, offset);	
+					size = 0;
+					offset = 0;
+				}
+			}
+			curAllocBlock++;
+			/*
+			 * Exit early if the next bit we'd analyze would take us beyond the end of the 
+			 * range that we're supposed to scan.  
+			 */
+			if (curAllocBlock >= endBit) {
+				goto DoneScanning;
+			}
+		}
+		wordIndexInBlock++;
+	}
+DoneScanning:
+	
+	/* We may have been tracking a range of free blocks that hasn't been inserted yet. */
+	if (size != 0) {
+		extent_tree_free_space(&hfsmp->offset_tree, size, offset);	
+	}
+	/* 
+	 * curAllocBlock represents the next block we need to scan while we're in this 
+	 * function. 
+	 */
+	*bitToScan = curAllocBlock;
+	
+	ReleaseRBScanBitmapBlock(blockRef);
+
+	return 0;
+}
+
+/*
+ * Extern function that is called from mount and upgrade mount routines
+ * that enable us to initialize the tree.
+ */
+
+__private_extern__
+u_int32_t InitTree(struct hfsmount *hfsmp) {
+	extent_tree_init (&(hfsmp->offset_tree));
+	return 0;
+}
+
+
+/*
+ * This function builds the trees specified in its arguments. It uses
+ * buf_meta_breads to scan through the bitmap and re-build the tree state.
+ * It is very important to use buf_meta_bread because we need to ensure that we 
+ * read the most current version of the blocks that we're scanning.  If we used 
+ * cluster_io, then journaled transactions could still be sitting in RAM since they are
+ * written to disk in the proper location asynchronously.  
+ *
+ * Because this could be still running when mount has finished, we need to check
+ * after every allocation block that we're working on if an unmount or some other 
+ * operation that would cause us to teardown has come in. (think downgrade mount).
+ * If an unmount has come in, then abort whatever we're doing and return -1
+ * to indicate we hit an error.  If we don't do this, we'd hold up unmount for
+ * a very long time.
+ *
+ * This function assumes that the bitmap lock is acquired exclusively before being
+ * called.  It will drop the lock and then re-acquire it during operation, but 
+ * will always return with the lock held.
+ */
+__private_extern__
+u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) {
+	
+	REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+	
+	u_int32_t *cur_block_eof;
+	int error = 0;
+	
+	int USE_FINE_GRAINED_LOCKING = 0;
+		
+	/* Initialize the block counter while we hold the bitmap lock */
+	cur_block_eof = &hfsmp->offset_block_end;
+	
+	/*
+	 * This loop advances over all allocation bitmap blocks of the current region 
+	 * to scan them and add the results into the red-black tree.  We use the mount point
+	 * variable offset_block_end as our loop counter.  This gives us flexibility
+	 * because we can release the allocation bitmap lock and allow a thread that wants 
+	 * to make an allocation to grab the lock and do some scanning on our behalf while we're 
+	 * waiting to re-acquire the lock.  Then, the allocating thread will only do as much bitmap 
+	 * scanning as needed to fulfill its allocation.
+	 * 
+	 * If the other thread does IO for us, then it will update the offset_block_end 
+	 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
+	 * scanning.  So when we re-grab the lock, our current EOF/loop will immediately skip us to the next 
+	 * block that needs scanning.
+	 */
+	
+	while (*cur_block_eof < endBlock) {
+
+		/* 
+		 * If the filesystem is being resized before the bitmap has been fully scanned, we'll 
+		 * update our endBlock to match the current allocation limit in the hfsmp struct.
+		 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
+		 * be executing this code at the same time that the resize is going on.  
+		 */
+		if ((initialscan) && (endBlock != hfsmp->allocLimit)) {			
+			
+			/* If we're past the new/modified allocLimit, then just stop immediately.*/
+			if (*cur_block_eof >= hfsmp->allocLimit ) {
+				break;
+			}
+			endBlock = hfsmp->allocLimit;
+		}
+		
+		/* 
+		 * TODO: fix unmount stuff!
+		 * See rdar://7391404
+		 *
+		 * Once the RB allocator is checked in, we'll want to augment it to not hold the 
+		 * allocation bitmap lock for the entire duration of the tree scan.  For a first check-in
+		 * it's ok to do that but we can't leave it like that forever.
+		 * 
+		 * The gist of the new algorithm will work as follows:
+		 * if an unmount is in flight and has been detected:
+		 *		abort tree-build.
+		 *		unset tree-in-progress bit.
+		 *		wakeup unmount thread
+		 *		unlock allocation bitmap lock, fail out.
+		 *
+		 * The corresponding code in the unmount side should already be in place. 
+		 */
+		
+		error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof);
+				
+		//TODO: Fix this below!
+		if (USE_FINE_GRAINED_LOCKING){
+			hfs_systemfile_unlock(hfsmp, *flags);
+			*flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
+		}
+		//TODO: Infer that if *flags == 0, we don't actually need to lock/unlock. 
+	}
+	
+	return error;
 }
 
-/* Invalidate free extent cache for a given volume.
- * This cache is invalidated and disabled when a volume is being resized 
- * (via hfs_trucatefs() or hfs_extendefs()).
+/*
+ * This function destroys the specified rb-trees associated with the mount point. 
+ */
+__private_extern__
+void DestroyTrees(struct hfsmount *hfsmp) {
+	
+	if (ALLOC_DEBUG) {
+		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
+		printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN);
+		hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end );
+	}
+	
+	/*
+	 * extent_tree_destroy will start with the first entry in the tree (by offset), then
+	 * iterate through the tree quickly using its embedded linked list.  This results in tree
+	 * destruction in O(n) time.
+	 */
+	
+	if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
+		extent_tree_destroy(&hfsmp->offset_tree);
+		
+		/* Mark Trees as disabled */
+		hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED;		
+	}
+	
+	return;
+}	
+
+#endif
+
+/*
+ * This function resets all of the data structures relevant to the
+ * free extent cache stored in the hfsmount struct.  
+ * 
+ * If we are using the red-black tree code then we need to account for the fact that 
+ * we may encounter situations where we need to jettison the tree.  If that is the 
+ * case, then we fail-over to the bitmap scanning logic, but we need to ensure that 
+ * the free ext cache is zeroed before we start using it.  
  *
- * Returns: Nothing
+ * We also reset and disable the cache when allocLimit is updated... which 
+ * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). 
+ * It is independent of the type of allocator being used currently.
  */
-void invalidate_free_extent_cache(ExtendedVCB *vcb)
+void ResetVCBFreeExtCache(struct hfsmount *hfsmp) 
 {
-	u_int32_t i;
+	int bytes;
+	void *freeExt;
 
-	HFS_MOUNT_LOCK(vcb, TRUE);
-	for (i = 0; i < vcb->vcbFreeExtCnt; i++) {
-		vcb->vcbFreeExt[i].startBlock = 0;
-		vcb->vcbFreeExt[i].blockCount = 0;
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+	
+	/* reset Free Extent Count */
+	hfsmp->vcbFreeExtCnt = 0;
+	
+	/* reset the actual array */
+	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
+	freeExt = (void*)(hfsmp->vcbFreeExt);
+	
+	bzero (freeExt, bytes);
+	
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+	return;
+}
+
+/*
+ * This function is used to inform the allocator if we have to effectively shrink
+ * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. 
+ *
+ * The bitmap lock must be held when calling this function.  This function also modifies the
+ * allocLimit field in the hfs mount point structure in the general case. 
+ * 
+ * In the shrinking case, we'll have to remove all free extents from the red-black
+ * tree past the specified offset new_end_block.  In the growth case, we'll have to force
+ * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
+ * 
+ * new_end_block represents the total number of blocks available for allocation in the resized
+ * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
+ * will be out of the (0, n-1) range that are indexable in the bitmap.
+ *
+ * Returns	0 on success
+ *			errno on failure
+ */
+__private_extern__
+u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
+	
+	/* 
+	 * Update allocLimit to the argument specified, but don't do anything else 
+	 * if the red/black tree is not enabled.
+	 */
+	hfsmp->allocLimit = new_end_block;
+
+	/* Invalidate the free extent cache completely so that 
+	 * it does not have any extents beyond end of current 
+	 * volume.
+	 */
+	ResetVCBFreeExtCache(hfsmp);
+
+#if CONFIG_HFS_ALLOC_RBTREE
+	/* Shrinking the existing filesystem */
+	if ((new_end_block < hfsmp->offset_block_end) &&
+		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {	
+		extent_node_t search_sentinel;
+		extent_node_t *node = NULL;
+		/* Remover points to the current item to free/remove from the tree */
+		extent_node_t *remover = NULL;
+		
+		/* Begin search at the specified offset */
+		memset (&search_sentinel, 0, sizeof(extent_node_t));
+		search_sentinel.offset = new_end_block;
+				
+		/* 
+		 * Find the first available extent that satifies the allocation by searching
+		 * from the starting point or 1 earlier.  We may need to split apart an existing node
+		 * if it straddles the new alloc limit.
+		 */
+		node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
+		if (node) {
+			/* If it's an exact match, then just remove them all from this point forward */
+			if (node->offset == new_end_block) {
+				/* 
+				 * Find the previous entry and update its next pointer to NULL
+				 * since this entry is biting the dust.  Update remover to node.
+				 */
+				extent_node_t *prev = NULL;
+				prev = extent_tree_off_prev (&hfsmp->offset_tree, node);
+				if (prev) {
+					prev->offset_next = NULL;
+				}
+				remover = node;
+			}
+			else {
+				/* See if we need to split this node */
+				if ((node->offset + node->length) > new_end_block) {
+					/* 
+					 * Update node to reflect its new size up until new_end_block.
+					 */
+					remover = node->offset_next;
+					node->length = new_end_block - node->offset;
+					/* node is becoming the last free extent in the volume.  */
+					node->offset_next = NULL;
+				}
+				else {
+					if (node->offset_next == NULL) {
+						/*
+						 * 'node' points to the last free extent in the volume. 
+						 * Coincidentally, it is also before the new cut-off point at which 
+						 * we will stop representing bitmap values in the tree.  Just bail out now.
+						 */
+						return 0;
+					}
+					/* 
+					 * Otherwise, point our temp variable 'remover' to the node where
+					 * we'll need to start yanking things out of the tree, and make 'node' 
+					 * the last element in the tree in the linked list.
+					 */
+					remover = node->offset_next;
+					if (remover->offset <= new_end_block) {
+						panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
+					}
+					node->offset_next = NULL;
+				}
+			}
+			
+			/* 
+			 * Remover is our "temp" pointer that points to the current node to remove from 
+			 * the offset tree.  We'll simply iterate through the tree linked list, removing the current
+			 * element from the tree, freeing them as we come across them.
+			 */
+			while (remover) {
+				extent_node_t *next = remover->offset_next;
+				extent_tree_remove_node (&hfsmp->offset_tree, remover);
+				free_node (remover);
+				remover = next;
+			}
+			
+			if (ALLOC_DEBUG) {
+				printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
+				hfs_validate_rbtree (hfsmp, 0, new_end_block-1);
+			}
+			
+			/* 
+			 * Don't forget to shrink offset_block_end after a successful truncation 
+			 * new_end_block should represent the number of blocks available on the 
+			 * truncated volume.
+			 */
+			
+			hfsmp->offset_block_end = new_end_block;
+			
+			return 0;
+		}
+		else {
+			if (ALLOC_DEBUG) {
+				panic ("UpdateAllocLimit: no prev!");
+			}
+			return ENOSPC;
+		}
 	}
-	vcb->vcbFreeExtCnt = 0;
-	HFS_MOUNT_UNLOCK(vcb, TRUE);
+	/* Growing the existing filesystem */
+	else if ((new_end_block > hfsmp->offset_block_end) &&
+		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {	
+		int flags = 0;
+		int retval = 0;
+		
+		if (ALLOC_DEBUG) {
+			printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
+			hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end);
+		}
+		
+		
+		retval = GenerateTree (hfsmp, new_end_block, &flags, 0);
+		
+		/*
+		 * Don't forget to update offset_block_end after a successful tree extension.
+		 */
+		if (retval == 0) {
+			
+			if (ALLOC_DEBUG) {
+				printf ("UpdateAllocLimit: Validating rbtree after growth\n");
+				hfs_validate_rbtree (hfsmp, 0, new_end_block);
+			}
+			
+			hfsmp->offset_block_end = new_end_block;
+		}
+		
+		return retval;
+	}
+	/* Otherwise, do nothing. fall through to the code below. */	
+	printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n", 
+			hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block);
+#endif
+
+	return 0;
+
+}
+
+
+/*
+ * Remove an entry from free extent cache after it has been allocated.
+ *
+ * This function does not split extents to remove them from the allocated list.  
+ *
+ * Inputs: 
+ * 	hfsmp		- mount point structure 
+ * 	startBlock	- starting block of the extent to be removed. 
+ * 	blockCount	- number of blocks of the extent to be removed.
+ */
+static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
+{
+	int i, j;
+	int extentsRemoved = 0;
+	u_int32_t start, end;
+
+#if CONFIG_HFS_ALLOC_RBTREE
+	/* If red-black tree is enabled, no free extent cache is necessary */
+	if (hfs_isrbtree_active(hfsmp) == true) {
+		return;
+	}
+#endif
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+
+	for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
+		start = hfsmp->vcbFreeExt[i].startBlock;
+		end = start + hfsmp->vcbFreeExt[i].blockCount;
+
+		/* If the extent to remove from free extent list starts within 
+		 * this free extent, or, if it starts before this free extent 
+		 * but ends in this free extent, remove it by shifting all other
+		 * extents.
+		 */
+		if (((startBlock >= start) && (startBlock < end)) ||
+		    ((startBlock < start) && (startBlock + blockCount) > start)) {
+			for (j = i; j < (int)hfsmp->vcbFreeExtCnt - 1; j++) {
+				hfsmp->vcbFreeExt[j] = hfsmp->vcbFreeExt[j+1];
+			}
+			hfsmp->vcbFreeExtCnt--;
+			/* Decrement the index so that we check the extent 
+			 * that just got shifted to the current index.
+			 */
+			i--;
+			extentsRemoved++;
+		}
+		/* Continue looping as we might have to invalidate multiple extents, 
+		 * probably not possible in normal case, but does not hurt.
+		 */
+	}
+	
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+
+	sanity_check_free_ext(hfsmp, 0);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);
 
 	return;
 }
 
-/* Check whether free extent cache is active or not. 
- * This cache is invalidated and disabled when a volume is being resized 
- * (via hfs_trucatefs() or hfs_extendefs()).
+/*
+ * Add an entry to free extent cache after it has been deallocated.  
  *
- * This function assumes that the caller is holding the lock on 
- * the mount point.
+ * If the extent provided has blocks beyond current allocLimit, it 
+ * is clipped to allocLimit.  This function does not merge contiguous 
+ * extents, if they already exist in the list.
  *
- * Returns: 0 if the cache is not active,
- *          1 if the cache is active.
+ * Inputs: 
+ * 	hfsmp		- mount point structure 
+ * 	startBlock	- starting block of the extent to be removed. 
+ * 	blockCount	- number of blocks of the extent to be removed.
+ *
+ * Returns:
+ * 	true		- if the extent was added successfully to the list
+ * 	false		- if the extent was no added to the list, maybe because 
+ * 			  the extent was beyond allocLimit, or is not best 
+ * 			  candidate to be put in the cache.
  */
-static int free_extent_cache_active(ExtendedVCB *vcb)
+static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 
 {
-	int retval = 1;
+	Boolean retval = false;
+	u_int32_t start, end;
+	int i; 
+	
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);
+
+	/*
+	 * If using the red-black tree allocator, then there's no need to special case 
+	 * for the sparse device case.  We'll simply add the region we've recently freed
+	 * to the red-black tree, where it will get sorted by offset and length.  The only special 
+	 * casing will need to be done on the allocation side, where we may favor free extents
+	 * based on offset even if it will cause fragmentation.  This may be true, for example, if
+	 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image. 
+	 */
+#if CONFIG_HFS_ALLOC_RBTREE
+	if (hfs_isrbtree_active(hfsmp) == true) {
+		goto out_not_locked;
+	}
+#endif
+
+	/* No need to add extent that is beyond current allocLimit */
+	if (startBlock >= hfsmp->allocLimit) {
+		goto out_not_locked;
+	}
+
+	/* If end of the free extent is beyond current allocLimit, clip the extent */
+	if ((startBlock + blockCount) > hfsmp->allocLimit) {
+		blockCount = hfsmp->allocLimit - startBlock;
+	}
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
 
-	if (vcb->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
-		retval = 0;
+	/* If the free extent cache is full and the new extent fails to 
+	 * compare with the last extent, skip adding it to the list.
+	 */
+	if (hfsmp->vcbFreeExtCnt == kMaxFreeExtents) {
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			/* For sparse disks, free extent cache list is sorted by start block, lowest first */
+			if (startBlock > hfsmp->vcbFreeExt[kMaxFreeExtents-1].startBlock) {
+				goto out;
+			} 
+		} else {
+			/* For normal mounts, free extent cache list is sorted by total blocks, highest first */
+			if (blockCount <= hfsmp->vcbFreeExt[kMaxFreeExtents-1].blockCount) {
+				goto out;
+			} 
+		}
 	}
+
+	/* Check if the current extent overlaps with any of the existing 
+	 * extents.  If yes, just skip adding it to the list.  We have 
+	 * to do this check before shifting the extent records.
+	 */
+	for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
+
+		start = hfsmp->vcbFreeExt[i].startBlock;
+		end = start + hfsmp->vcbFreeExt[i].blockCount;
+
+		if (((startBlock >= start) && (startBlock < end)) ||
+		    ((startBlock < start) && (startBlock + blockCount) > start)) {
+			goto out;
+		}
+	}
+
+	/* Scan the free extent cache array from tail to head till 
+	 * we find the entry after which our new entry should be 
+	 * inserted.  After we break out of this loop, the new entry 
+	 * will be inserted at 'i+1'.
+	 */
+	for (i = (int)hfsmp->vcbFreeExtCnt-1; i >= 0; i--) {
+		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+			/* For sparse devices, find entry with smaller start block than ours */
+			if (hfsmp->vcbFreeExt[i].startBlock < startBlock) {
+				break;
+			}
+		} else {
+			/* For normal devices, find entry with greater block count than ours */
+			if (hfsmp->vcbFreeExt[i].blockCount >= blockCount) {
+				break;
+			}
+		}
+
+		/* If this is not the right spot to insert, and this is 
+		 * not the last entry in the array, just shift it and 
+		 * continue check another one. 
+		 */
+		if ((i+1) < kMaxFreeExtents) {
+			hfsmp->vcbFreeExt[i+1] = hfsmp->vcbFreeExt[i];
+		}
+	}
+	/* 'i' points to one index offset before which the new extent should be inserted */
+	hfsmp->vcbFreeExt[i+1].startBlock = startBlock;
+	hfsmp->vcbFreeExt[i+1].blockCount = blockCount;
+	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) {
+		hfsmp->vcbFreeExtCnt++;
+	}
+	retval = true;
+
+out:
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+out_not_locked:
+	sanity_check_free_ext(hfsmp, 0);
+
+	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
+		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);
+
 	return retval;
 }
+
+/* Debug function to check if the free extent cache is good or not */
+static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
+{
+	u_int32_t i, j;
+
+	/* Do not do anything if debug is not on, or if we're using the red-black tree */
+	if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) {
+		return;
+	}
+
+	lck_spin_lock(&hfsmp->vcbFreeExtLock);
+	
+	/* 
+	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
+	 * allocated blocks.
+	 */
+	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
+		u_int32_t start, nblocks;
+
+		start   = hfsmp->vcbFreeExt[i].startBlock;
+		nblocks = hfsmp->vcbFreeExt[i].blockCount;
+
+		//printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);
+
+		/* Check if any of the blocks in free extent cache are allocated.  
+		 * This should not be enabled always because it might take 
+		 * very long for large extents that get added to the list.
+		 *
+		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
+		 * because it is going to do I/O.  Note that the free extent
+		 * cache could change.  That's a risk we take when using this
+		 * debugging code.  (Another alternative would be to try to
+		 * detect when the free extent cache changed, and perhaps
+		 * restart if the list changed while we dropped the lock.)
+		 */
+		if (check_allocated) {
+			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+			if (hfs_isallocated(hfsmp, start, nblocks)) {
+				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
+					  hfsmp, i, start, nblocks);
+			}
+			lck_spin_lock(&hfsmp->vcbFreeExtLock);
+		}
+
+		/* Check if any part of the extent is beyond allocLimit */
+		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
+			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
+					hfsmp, i, start, nblocks, hfsmp->allocLimit);
+		}
+
+		/* Check if there are any duplicate start blocks */
+		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
+			if (start == hfsmp->vcbFreeExt[j].startBlock) {
+				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", 
+				      hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, 
+				      hfsmp->vcbFreeExt[j].blockCount);
+			}
+		}
+
+		/* Check if the entries are out of order */
+		if ((i+1) != hfsmp->vcbFreeExtCnt) {
+			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
+				/* sparse devices are sorted by starting block number (ascending) */
+				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
+					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+						hfsmp->vcbFreeExt[i+1].blockCount);
+				}
+			} else {
+				/* normally sorted by block count (descending) */
+				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
+					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
+						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
+						hfsmp->vcbFreeExt[i+1].blockCount);
+				}
+			}
+		}
+	}
+	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
+}
diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h
index 307178907..7276daa26 100644
--- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h
+++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,7 +32,7 @@
 
 	Version:	HFS Plus 1.0
 
-	Copyright:	© 1996-2001 by Apple Computer, Inc., all rights reserved.
+	Copyright:	� 1996-2001 by Apple Computer, Inc., all rights reserved.
 
 */
 #ifndef __FILEMGRINTERNAL__
@@ -189,6 +189,8 @@ ExchangeFileIDs					(ExtendedVCB *			volume,
 								 u_int32_t				srcHint,
 								 u_int32_t				destHint );
 
+EXTERN_API_C( OSErr )
+MoveData( ExtendedVCB *vcb, HFSCatalogNodeID srcID, HFSCatalogNodeID destID, int rsrc);
 
 /* BTree Manager Routines*/
 
@@ -232,7 +234,7 @@ BlockDeallocate					(ExtendedVCB *			vcb,
 								 u_int32_t				flags);
 
 EXTERN_API_C ( void )
-invalidate_free_extent_cache	(ExtendedVCB *			vcb);
+ResetVCBFreeExtCache(struct hfsmount *hfsmp);
 
 EXTERN_API_C( OSErr )
 BlockMarkAllocated(ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
@@ -240,8 +242,28 @@ BlockMarkAllocated(ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlock
 EXTERN_API_C( OSErr )
 BlockMarkFree( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
 
+EXTERN_API_C( OSErr )
+BlockMarkFreeUnused( ExtendedVCB *vcb, u_int32_t startingBlock, u_int32_t numBlocks);
+
 EXTERN_API_C( u_int32_t )
 MetaZoneFreeBlocks(ExtendedVCB *vcb);
+	
+EXTERN_API_C( u_int32_t )
+UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block);
+	
+#if CONFIG_HFS_ALLOC_RBTREE
+EXTERN_API_C( u_int32_t )
+GenerateTree( struct hfsmount *hfsmp, u_int32_t end_block, int *flags, int initialscan);
+	
+EXTERN_API_C( void )
+DestroyTrees( struct hfsmount *hfsmp);
+	
+EXTERN_API_C( u_int32_t )
+InitTree(struct hfsmount *hfsmp);	
+#endif
+	
+	
+	
 
 /*	File Extent Mapping routines*/
 EXTERN_API_C( OSErr )
@@ -256,11 +278,9 @@ CompareExtentKeysPlus			(const HFSPlusExtentKey *searchKey,
 								 const HFSPlusExtentKey *trialKey);
 
 EXTERN_API_C( OSErr )
-TruncateFileC					(ExtendedVCB *			vcb,
-								 FCB *					fcb,
-								 int64_t 				peof,
-								 Boolean 				truncateToExtent);
-
+TruncateFileC (ExtendedVCB *vcb, FCB *fcb, int64_t peof, int deleted, 
+			   int rsrc, uint32_t fileid, Boolean truncateToExtent);
+	
 EXTERN_API_C( OSErr )
 ExtendFileC						(ExtendedVCB *			vcb,
 								 FCB *					fcb,
diff --git a/bsd/hfs/hfscommon/headers/HybridAllocator.h b/bsd/hfs/hfscommon/headers/HybridAllocator.h
new file mode 100644
index 000000000..4add9daee
--- /dev/null
+++ b/bsd/hfs/hfscommon/headers/HybridAllocator.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#ifndef __HYBRID_ALLOC__
+#define __HYBRID_ALLOC__
+
+#include <sys/types.h>
+#include "RedBlackTree.h"
+
+typedef struct extent_node extent_node_t;
+
+struct extent_node
+{
+	u_int32_t length;
+	u_int32_t offset;
+	struct extent_node *offset_next;
+	rb_node(extent_node_t) offset_link;
+};
+
+typedef rb_tree(extent_node_t) extent_tree_offset_t;
+
+extern extent_node_t *
+alloc_node(u_int32_t length, u_int32_t offset);
+
+extern void
+free_node(extent_node_t *node); 
+
+extern extent_node_t *
+extent_tree_free_space( extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset);
+
+extern void
+extent_tree_offset_print(extent_tree_offset_t *offset_tree);
+
+extern int32_t
+extent_tree_offset_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset);
+
+extern int32_t
+extent_tree_offset_alloc_unaligned(extent_tree_offset_t *tree, u_int32_t size, u_int32_t offset);
+
+
+extern void
+extent_tree_remove_node (extent_tree_offset_t *offset_tree, extent_node_t * node);
+
+extern extent_node_t *
+extent_tree_off_first (extent_tree_offset_t *offset_tree);
+
+extern extent_node_t *
+extent_tree_off_search(extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern extent_node_t *
+extent_tree_off_search_next(extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern extent_node_t*
+extent_tree_off_search_nextWithSize (extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern extent_node_t *
+extent_tree_off_search_prev(extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern extent_node_t *
+extent_tree_off_next(extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern extent_node_t *
+extent_tree_off_prev(extent_tree_offset_t *offset_tree, extent_node_t *node);
+
+extern void
+extent_tree_init(extent_tree_offset_t *offset_tree);
+
+extern void
+extent_tree_destroy(extent_tree_offset_t *offset_tree);
+
+extern int
+cmp_offset_node(extent_node_t *node_1, extent_node_t *node_2);
+
+
+#endif
diff --git a/bsd/hfs/hfscommon/headers/RedBlackTree.h b/bsd/hfs/hfscommon/headers/RedBlackTree.h
new file mode 100644
index 000000000..21342296c
--- /dev/null
+++ b/bsd/hfs/hfscommon/headers/RedBlackTree.h
@@ -0,0 +1,969 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/******************************************************************************
+ *
+ * Copyright (C) 2008 Jason Evans <jasone@FreeBSD.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice(s), this list of conditions and the following disclaimer
+ *    unmodified other than the allowable addition of one or more
+ *    copyright notices.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice(s), this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ******************************************************************************
+ *
+ * cpp macro implementation of left-leaning red-black trees.
+ *
+ * Usage:
+ *
+ *   (Optional, see assert(3).)
+ *   #define NDEBUG
+ *
+ *   (Required.)
+ *   #include <assert.h>
+ *   #include <rb.h>
+ *   ...
+ *
+ * All operations are done non-recursively.  Parent pointers are not used, and
+ * color bits are stored in the least significant bit of right-child pointers,
+ * thus making node linkage as compact as is possible for red-black trees.
+ *
+ * Some macros use a comparison function pointer, which is expected to have the
+ * following prototype:
+ *
+ *   int (a_cmp *)(a_type *a_node, a_type *a_other);
+ *                         ^^^^^^
+ *                      or a_key
+ *
+ * Interpretation of comparision function return values:
+ *
+ *   -1 : a_node <  a_other
+ *    0 : a_node == a_other
+ *    1 : a_node >  a_other
+ *
+ * In all cases, the a_node or a_key macro argument is the first argument to the
+ * comparison function, which makes it possible to write comparison functions
+ * that treat the first argument specially.
+ *
+ ******************************************************************************/
+
+#ifndef RB_H_
+#define	RB_H_
+
+#define	RB_COMPACT
+#ifdef RB_COMPACT
+/* Node structure. */
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right_red;						\
+}
+#else
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right;							\
+    bool rbn_red;							\
+}
+#endif
+
+/* Root structure. */
+#define	rb_tree(a_type)							\
+struct {								\
+    a_type *rbt_root;							\
+    a_type rbt_nil;							\
+}
+
+/* Left accessors. */
+#define	rbp_left_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_left)
+#define	rbp_left_set(a_type, a_field, a_node, a_left) do {		\
+    (a_node)->a_field.rbn_left = a_left;				\
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define	rbp_right_get(a_type, a_field, a_node)				\
+    ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((ssize_t)-2)))
+#define	rbp_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right)	\
+      | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1)));	\
+} while (0)
+
+/* Color accessors. */
+#define	rbp_red_get(a_type, a_field, a_node)				\
+    ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((size_t)1)))
+#define	rbp_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2))			\
+      | ((ssize_t)a_red));						\
+} while (0)
+#define	rbp_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t)		\
+      (a_node)->a_field.rbn_right_red) | ((size_t)1));			\
+} while (0)
+#define	rbp_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
+} while (0)
+#else
+/* Right accessors. */
+#define	rbp_right_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_right)
+#define	rbp_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right = a_right;				\
+} while (0)
+
+/* Color accessors. */
+#define	rbp_red_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_red)
+#define	rbp_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_red = (a_red);				\
+} while (0)
+#define	rbp_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = true;				\
+} while (0)
+#define	rbp_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = false;				\
+} while (0)
+#endif
+
+/* Node initializer. */
+#define	rbp_node_new(a_type, a_field, a_tree, a_node) do {		\
+    rbp_left_set(a_type, a_field, (a_node), &(a_tree)->rbt_nil);	\
+    rbp_right_set(a_type, a_field, (a_node), &(a_tree)->rbt_nil);	\
+    rbp_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+/* Tree initializer. */
+#define	rb_new(a_type, a_field, a_tree) do {				\
+    (a_tree)->rbt_root = &(a_tree)->rbt_nil;				\
+    rbp_node_new(a_type, a_field, a_tree, &(a_tree)->rbt_nil);		\
+    rbp_black_set(a_type, a_field, &(a_tree)->rbt_nil);			\
+} while (0)
+
+/* Tree operations. */
+#define	rbp_black_height(a_type, a_field, a_tree, r_height) do {	\
+    a_type *rbp_bh_t;							\
+    for (rbp_bh_t = (a_tree)->rbt_root, (r_height) = 0;			\
+		rbp_bh_t != &(a_tree)->rbt_nil;					\
+		rbp_bh_t = rbp_left_get(a_type, a_field, rbp_bh_t)) {		\
+			if (rbp_red_get(a_type, a_field, rbp_bh_t) == false) {		\
+				(r_height)++;						\
+			}								\
+	}									\
+} while (0)
+
+#define	rbp_first(a_type, a_field, a_tree, a_root, r_node) do {		\
+    for ((r_node) = (a_root);						\
+		rbp_left_get(a_type, a_field, (r_node)) != &(a_tree)->rbt_nil;	\
+      (r_node) = rbp_left_get(a_type, a_field, (r_node))) {		\
+    }									\
+} while (0)
+
+#define	rbp_last(a_type, a_field, a_tree, a_root, r_node) do {		\
+    for ((r_node) = (a_root);						\
+		rbp_right_get(a_type, a_field, (r_node)) != &(a_tree)->rbt_nil;	\
+		(r_node) = rbp_right_get(a_type, a_field, (r_node))) {		\
+    }									\
+} while (0)
+
+#define	rbp_next(a_type, a_field, a_cmp, a_tree, a_node, r_node) do {	\
+    if (rbp_right_get(a_type, a_field, (a_node)) != &(a_tree)->rbt_nil) {						\
+		rbp_first(a_type, a_field, a_tree, rbp_right_get(a_type,	\
+					a_field, (a_node)), (r_node));				\
+    } else {								\
+		a_type *rbp_n_t = (a_tree)->rbt_root;				\
+		assert(rbp_n_t != &(a_tree)->rbt_nil);				\
+		(r_node) = &(a_tree)->rbt_nil;					\
+		while (true) {							\
+			int rbp_n_cmp = (a_cmp)((a_node), rbp_n_t);			\
+			if (rbp_n_cmp < 0) {					\
+				(r_node) = rbp_n_t;					\
+				rbp_n_t = rbp_left_get(a_type, a_field, rbp_n_t);	\
+			} else if (rbp_n_cmp > 0) {					\
+				rbp_n_t = rbp_right_get(a_type, a_field, rbp_n_t);	\
+			} else {							\
+				break;							\
+			}								\
+			assert(rbp_n_t != &(a_tree)->rbt_nil);			\
+		}								\
+    }									\
+} while (0)
+
+#define	rbp_prev(a_type, a_field, a_cmp, a_tree, a_node, r_node) do {	\
+    if (rbp_left_get(a_type, a_field, (a_node)) != &(a_tree)->rbt_nil) {\
+		rbp_last(a_type, a_field, a_tree, rbp_left_get(a_type,		\
+				a_field, (a_node)), (r_node));				\
+    } else {								\
+		a_type *rbp_p_t = (a_tree)->rbt_root;				\
+		assert(rbp_p_t != &(a_tree)->rbt_nil);				\
+		(r_node) = &(a_tree)->rbt_nil;					\
+		while (true) {							\
+			int rbp_p_cmp = (a_cmp)((a_node), rbp_p_t);			\
+			if (rbp_p_cmp < 0) {					\
+				rbp_p_t = rbp_left_get(a_type, a_field, rbp_p_t);	\
+			} else if (rbp_p_cmp > 0) {					\
+				(r_node) = rbp_p_t;					\
+				rbp_p_t = rbp_right_get(a_type, a_field, rbp_p_t);	\
+			} else {							\
+				break;							\
+			}								\
+			assert(rbp_p_t != &(a_tree)->rbt_nil);			\
+		}								\
+    }									\
+} while (0)
+
+#define	rb_first(a_type, a_field, a_tree, r_node) do {			\
+    rbp_first(a_type, a_field, a_tree, (a_tree)->rbt_root, (r_node));	\
+    if ((r_node) == &(a_tree)->rbt_nil) {				\
+		(r_node) = NULL;						\
+    }									\
+} while (0)
+
+#define	rb_last(a_type, a_field, a_tree, r_node) do {			\
+    rbp_last(a_type, a_field, a_tree, (a_tree)->rbt_root, r_node);	\
+    if ((r_node) == &(a_tree)->rbt_nil) {				\
+		(r_node) = NULL;						\
+    }									\
+} while (0)
+
+#define	rb_next(a_type, a_field, a_cmp, a_tree, a_node, r_node) do {	\
+    rbp_next(a_type, a_field, a_cmp, a_tree, (a_node), (r_node));	\
+    if ((r_node) == &(a_tree)->rbt_nil) {				\
+		(r_node) = NULL;						\
+    }									\
+} while (0)
+
+#define	rb_prev(a_type, a_field, a_cmp, a_tree, a_node, r_node) do {	\
+    rbp_prev(a_type, a_field, a_cmp, a_tree, (a_node), (r_node));	\
+    if ((r_node) == &(a_tree)->rbt_nil) {				\
+		(r_node) = NULL;						\
+    }									\
+} while (0)
+
+#define	rb_search(a_type, a_field, a_cmp, a_tree, a_key, r_node) do {	\
+    int rbp_se_cmp;							\
+    (r_node) = (a_tree)->rbt_root;					\
+    while ((r_node) != &(a_tree)->rbt_nil && (rbp_se_cmp = (a_cmp)((a_key), (r_node))) != 0) {		\
+		if (rbp_se_cmp < 0) {						\
+			(r_node) = rbp_left_get(a_type, a_field, (r_node));		\
+		} else {							\
+			(r_node) = rbp_right_get(a_type, a_field, (r_node));	\
+		}								\
+    }									\
+    if ((r_node) == &(a_tree)->rbt_nil) {				\
+		(r_node) = NULL;						\
+    }									\
+} while (0)
+
+/*
+ * Find a match if it exists.  Otherwise, find the next greater node, if one
+ * exists.
+ */
+#define	rb_nsearch(a_type, a_field, a_cmp, a_tree, a_key, r_node) do {	\
+    a_type *rbp_ns_t = (a_tree)->rbt_root;				\
+    (r_node) = NULL;							\
+    while (rbp_ns_t != &(a_tree)->rbt_nil) {				\
+		int rbp_ns_cmp = (a_cmp)((a_key), rbp_ns_t);			\
+		if (rbp_ns_cmp < 0) {						\
+			(r_node) = rbp_ns_t;					\
+			rbp_ns_t = rbp_left_get(a_type, a_field, rbp_ns_t);		\
+		} else if (rbp_ns_cmp > 0) {					\
+			rbp_ns_t = rbp_right_get(a_type, a_field, rbp_ns_t);	\
+		} else {							\
+			(r_node) = rbp_ns_t;					\
+			break;							\
+		}								\
+    }									\
+} while (0)
+
+/*
+ * Find a match if it exists.  Otherwise, find the previous lesser node, if one
+ * exists.
+ */
+#define	rb_psearch(a_type, a_field, a_cmp, a_tree, a_key, r_node) do {	\
+    a_type *rbp_ps_t = (a_tree)->rbt_root;				\
+    (r_node) = NULL;							\
+    while (rbp_ps_t != &(a_tree)->rbt_nil) {				\
+		int rbp_ps_cmp = (a_cmp)((a_key), rbp_ps_t);			\
+		if (rbp_ps_cmp < 0) {						\
+			rbp_ps_t = rbp_left_get(a_type, a_field, rbp_ps_t);		\
+		} else if (rbp_ps_cmp > 0) {					\
+			(r_node) = rbp_ps_t;					\
+			rbp_ps_t = rbp_right_get(a_type, a_field, rbp_ps_t);	\
+		} else {							\
+			(r_node) = rbp_ps_t;					\
+			break;							\
+		}								\
+    }									\
+} while (0)
+
+#define	rbp_rotate_left(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbp_right_get(a_type, a_field, (a_node));		\
+    rbp_right_set(a_type, a_field, (a_node), rbp_left_get(a_type, a_field, (r_node)));	\
+    rbp_left_set(a_type, a_field, (r_node), (a_node));			\
+} while (0)
+
+#define	rbp_rotate_right(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbp_left_get(a_type, a_field, (a_node));			\
+    rbp_left_set(a_type, a_field, (a_node),	rbp_right_get(a_type, a_field, (r_node)));	\
+    rbp_right_set(a_type, a_field, (r_node), (a_node));			\
+} while (0)
+
+#define	rbp_lean_left(a_type, a_field, a_node, r_node) do {		\
+    bool rbp_ll_red;							\
+    rbp_rotate_left(a_type, a_field, (a_node), (r_node));		\
+    rbp_ll_red = rbp_red_get(a_type, a_field, (a_node));		\
+    rbp_color_set(a_type, a_field, (r_node), rbp_ll_red);		\
+    rbp_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+#define	rbp_lean_right(a_type, a_field, a_node, r_node) do {		\
+    bool rbp_lr_red;							\
+    rbp_rotate_right(a_type, a_field, (a_node), (r_node));		\
+    rbp_lr_red = rbp_red_get(a_type, a_field, (a_node));		\
+    rbp_color_set(a_type, a_field, (r_node), rbp_lr_red);		\
+    rbp_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+#define	rbp_move_red_left(a_type, a_field, a_node, r_node) do {		\
+    a_type *rbp_mrl_t, *rbp_mrl_u;					\
+    rbp_mrl_t = rbp_left_get(a_type, a_field, (a_node));		\
+    rbp_red_set(a_type, a_field, rbp_mrl_t);				\
+    rbp_mrl_t = rbp_right_get(a_type, a_field, (a_node));		\
+    rbp_mrl_u = rbp_left_get(a_type, a_field, rbp_mrl_t);		\
+    if (rbp_red_get(a_type, a_field, rbp_mrl_u)) {			\
+		rbp_rotate_right(a_type, a_field, rbp_mrl_t, rbp_mrl_u);	\
+		rbp_right_set(a_type, a_field, (a_node), rbp_mrl_u);		\
+		rbp_rotate_left(a_type, a_field, (a_node), (r_node));		\
+		rbp_mrl_t = rbp_right_get(a_type, a_field, (a_node));		\
+		if (rbp_red_get(a_type, a_field, rbp_mrl_t)) {			\
+			rbp_black_set(a_type, a_field, rbp_mrl_t);			\
+			rbp_red_set(a_type, a_field, (a_node));			\
+			rbp_rotate_left(a_type, a_field, (a_node), rbp_mrl_t);	\
+			rbp_left_set(a_type, a_field, (r_node), rbp_mrl_t);		\
+		} else {							\
+			rbp_black_set(a_type, a_field, (a_node));			\
+		}								\
+    } else {								\
+		rbp_red_set(a_type, a_field, (a_node));				\
+		rbp_rotate_left(a_type, a_field, (a_node), (r_node));		\
+    }									\
+} while (0)
+
+#define	rbp_move_red_right(a_type, a_field, a_node, r_node) do {	\
+    a_type *rbp_mrr_t;							\
+    rbp_mrr_t = rbp_left_get(a_type, a_field, (a_node));		\
+    if (rbp_red_get(a_type, a_field, rbp_mrr_t)) {			\
+		a_type *rbp_mrr_u, *rbp_mrr_v;					\
+		rbp_mrr_u = rbp_right_get(a_type, a_field, rbp_mrr_t);		\
+		rbp_mrr_v = rbp_left_get(a_type, a_field, rbp_mrr_u);		\
+		if (rbp_red_get(a_type, a_field, rbp_mrr_v)) {			\
+			rbp_color_set(a_type, a_field, rbp_mrr_u, rbp_red_get(a_type, a_field, (a_node))); \
+			rbp_black_set(a_type, a_field, rbp_mrr_v);			\
+			rbp_rotate_left(a_type, a_field, rbp_mrr_t, rbp_mrr_u);	\
+			rbp_left_set(a_type, a_field, (a_node), rbp_mrr_u);		\
+			rbp_rotate_right(a_type, a_field, (a_node), (r_node));	\
+			rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t);	\
+			rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t);	\
+		} else {							\
+			rbp_color_set(a_type, a_field, rbp_mrr_t, rbp_red_get(a_type, a_field, (a_node))); \
+			rbp_red_set(a_type, a_field, rbp_mrr_u);			\
+			rbp_rotate_right(a_type, a_field, (a_node), (r_node));	\
+			rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t);	\
+			rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t);	\
+		}								\
+		rbp_red_set(a_type, a_field, (a_node));				\
+    } else {								\
+		rbp_red_set(a_type, a_field, rbp_mrr_t);			\
+		rbp_mrr_t = rbp_left_get(a_type, a_field, rbp_mrr_t);		\
+		if (rbp_red_get(a_type, a_field, rbp_mrr_t)) {			\
+			rbp_black_set(a_type, a_field, rbp_mrr_t);			\
+			rbp_rotate_right(a_type, a_field, (a_node), (r_node));	\
+			rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t);	\
+			rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t);	\
+		} else {							\
+			rbp_rotate_left(a_type, a_field, (a_node), (r_node));	\
+		}								\
+    }									\
+} while (0)
+
+#define	rb_insert(a_type, a_field, a_cmp, a_tree, a_node) do {		\
+    a_type rbp_i_s;							\
+    a_type *rbp_i_g, *rbp_i_p, *rbp_i_c, *rbp_i_t, *rbp_i_u;		\
+    int rbp_i_cmp = 0;							\
+    rbp_i_g = &(a_tree)->rbt_nil;					\
+    rbp_left_set(a_type, a_field, &rbp_i_s, (a_tree)->rbt_root);	\
+    rbp_right_set(a_type, a_field, &rbp_i_s, &(a_tree)->rbt_nil);	\
+    rbp_black_set(a_type, a_field, &rbp_i_s);				\
+    rbp_i_p = &rbp_i_s;							\
+    rbp_i_c = (a_tree)->rbt_root;					\
+    /* Iteratively search down the tree for the insertion point,      */\
+    /* splitting 4-nodes as they are encountered.  At the end of each */\
+    /* iteration, rbp_i_g->rbp_i_p->rbp_i_c is a 3-level path down    */\
+    /* the tree, assuming a sufficiently deep tree.                   */\
+    while (rbp_i_c != &(a_tree)->rbt_nil) {				\
+		rbp_i_t = rbp_left_get(a_type, a_field, rbp_i_c);		\
+		rbp_i_u = rbp_left_get(a_type, a_field, rbp_i_t);		\
+		if (rbp_red_get(a_type, a_field, rbp_i_t)			\
+			&& rbp_red_get(a_type, a_field, rbp_i_u)) {			\
+			/* rbp_i_c is the top of a logical 4-node, so split it.   */\
+			/* This iteration does not move down the tree, due to the */\
+			/* disruptiveness of node splitting.                      */\
+			/*                                                        */\
+			/* Rotate right.                                          */\
+			rbp_rotate_right(a_type, a_field, rbp_i_c, rbp_i_t);	\
+			/* Pass red links up one level.                           */\
+			rbp_i_u = rbp_left_get(a_type, a_field, rbp_i_t);		\
+			rbp_black_set(a_type, a_field, rbp_i_u);			\
+			if (rbp_left_get(a_type, a_field, rbp_i_p) == rbp_i_c) {	\
+				rbp_left_set(a_type, a_field, rbp_i_p, rbp_i_t);	\
+				rbp_i_c = rbp_i_t;					\
+			} else {							\
+				/* rbp_i_c was the right child of rbp_i_p, so rotate  */\
+				/* left in order to maintain the left-leaning         */\
+				/* invariant.                                         */\
+				assert(rbp_right_get(a_type, a_field, rbp_i_p) == rbp_i_c);	\
+				rbp_right_set(a_type, a_field, rbp_i_p, rbp_i_t);	\
+				rbp_lean_left(a_type, a_field, rbp_i_p, rbp_i_u);	\
+				if (rbp_left_get(a_type, a_field, rbp_i_g) == rbp_i_p) {\
+					rbp_left_set(a_type, a_field, rbp_i_g, rbp_i_u);	\
+				} else {						\
+					assert(rbp_right_get(a_type, a_field, rbp_i_g) == rbp_i_p);	\
+					rbp_right_set(a_type, a_field, rbp_i_g, rbp_i_u);	\
+				}							\
+				rbp_i_p = rbp_i_u;					\
+				rbp_i_cmp = (a_cmp)((a_node), rbp_i_p);			\
+				if (rbp_i_cmp < 0) {					\
+					rbp_i_c = rbp_left_get(a_type, a_field, rbp_i_p);	\
+				} else {						\
+					assert(rbp_i_cmp > 0);				\
+					rbp_i_c = rbp_right_get(a_type, a_field, rbp_i_p);	\
+				}							\
+				continue;						\
+			}								\
+		}								\
+		rbp_i_g = rbp_i_p;						\
+		rbp_i_p = rbp_i_c;						\
+		rbp_i_cmp = (a_cmp)((a_node), rbp_i_c);				\
+		if (rbp_i_cmp < 0) {						\
+			rbp_i_c = rbp_left_get(a_type, a_field, rbp_i_c);		\
+		} else {							\
+			assert(rbp_i_cmp > 0);					\
+			rbp_i_c = rbp_right_get(a_type, a_field, rbp_i_c);		\
+		}								\
+    }									\
+    /* rbp_i_p now refers to the node under which to insert.          */\
+    rbp_node_new(a_type, a_field, a_tree, (a_node));			\
+    if (rbp_i_cmp > 0) {						\
+		rbp_right_set(a_type, a_field, rbp_i_p, (a_node));		\
+		rbp_lean_left(a_type, a_field, rbp_i_p, rbp_i_t);		\
+		if (rbp_left_get(a_type, a_field, rbp_i_g) == rbp_i_p) {	\
+			rbp_left_set(a_type, a_field, rbp_i_g, rbp_i_t);		\
+		} else if (rbp_right_get(a_type, a_field, rbp_i_g) == rbp_i_p) {\
+			rbp_right_set(a_type, a_field, rbp_i_g, rbp_i_t);		\
+		}								\
+    } else {								\
+		rbp_left_set(a_type, a_field, rbp_i_p, (a_node));		\
+    }									\
+    /* Update the root and make sure that it is black.                */\
+    (a_tree)->rbt_root = rbp_left_get(a_type, a_field, &rbp_i_s);	\
+    rbp_black_set(a_type, a_field, (a_tree)->rbt_root);			\
+} while (0)
+
+#define	rb_remove(a_type, a_field, a_cmp, a_tree, a_node) do {		\
+    a_type rbp_r_s;							\
+    a_type *rbp_r_p, *rbp_r_c, *rbp_r_xp, *rbp_r_t, *rbp_r_u;		\
+    int rbp_r_cmp;							\
+    rbp_left_set(a_type, a_field, &rbp_r_s, (a_tree)->rbt_root);	\
+    rbp_right_set(a_type, a_field, &rbp_r_s, &(a_tree)->rbt_nil);	\
+    rbp_black_set(a_type, a_field, &rbp_r_s);				\
+    rbp_r_p = &rbp_r_s;							\
+    rbp_r_c = (a_tree)->rbt_root;					\
+    rbp_r_xp = &(a_tree)->rbt_nil;					\
+    /* Iterate down the tree, but always transform 2-nodes to 3- or   */\
+    /* 4-nodes in order to maintain the invariant that the current    */\
+    /* node is not a 2-node.  This allows simple deletion once a leaf */\
+    /* is reached.  Handle the root specially though, since there may */\
+    /* be no way to convert it from a 2-node to a 3-node.             */\
+    rbp_r_cmp = (a_cmp)((a_node), rbp_r_c);				\
+    if (rbp_r_cmp < 0) {						\
+		rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c);		\
+		rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t);		\
+		if (rbp_red_get(a_type, a_field, rbp_r_t) == false		\
+			&& rbp_red_get(a_type, a_field, rbp_r_u) == false) {		\
+			/* Apply standard transform to prepare for left move.     */\
+			rbp_move_red_left(a_type, a_field, rbp_r_c, rbp_r_t);	\
+			rbp_black_set(a_type, a_field, rbp_r_t);			\
+			rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);		\
+			rbp_r_c = rbp_r_t;						\
+		} else {							\
+			/* Move left.                                             */\
+			rbp_r_p = rbp_r_c;						\
+			rbp_r_c = rbp_left_get(a_type, a_field, rbp_r_c);		\
+		}								\
+    } else {								\
+		if (rbp_r_cmp == 0) {						\
+			assert((a_node) == rbp_r_c);				\
+			if (rbp_right_get(a_type, a_field, rbp_r_c)	== &(a_tree)->rbt_nil) { \
+				/* Delete root node (which is also a leaf node).      */\
+				if (rbp_left_get(a_type, a_field, rbp_r_c) != &(a_tree)->rbt_nil) {	\
+					rbp_lean_right(a_type, a_field, rbp_r_c, rbp_r_t);	\
+					rbp_right_set(a_type, a_field, rbp_r_t, &(a_tree)->rbt_nil);	\
+				} else {						\
+					rbp_r_t = &(a_tree)->rbt_nil;			\
+				}							\
+				rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);	\
+			} else {							\
+				/* This is the node we want to delete, but we will    */\
+				/* instead swap it with its successor and delete the  */\
+				/* successor.  Record enough information to do the    */\
+				/* swap later.  rbp_r_xp is the a_node's parent.      */\
+				rbp_r_xp = rbp_r_p;					\
+				rbp_r_cmp = 1; /* Note that deletion is incomplete.   */\
+			}								\
+		}								\
+		if (rbp_r_cmp == 1) {						\
+			if (rbp_red_get(a_type, a_field, rbp_left_get(a_type,	\
+					a_field, rbp_right_get(a_type, a_field, rbp_r_c))) == false) { \
+				rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c);	\
+				if (rbp_red_get(a_type, a_field, rbp_r_t)) {		\
+					/* Standard transform.                            */\
+					rbp_move_red_right(a_type, a_field, rbp_r_c, rbp_r_t);	\
+				} else {						\
+					/* Root-specific transform.                       */\
+					rbp_red_set(a_type, a_field, rbp_r_c);		\
+					rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t);	\
+					if (rbp_red_get(a_type, a_field, rbp_r_u)) {	\
+						rbp_black_set(a_type, a_field, rbp_r_u);	\
+						rbp_rotate_right(a_type, a_field, rbp_r_c, rbp_r_t);	\
+						rbp_rotate_left(a_type, a_field, rbp_r_c, rbp_r_u);		\
+						rbp_right_set(a_type, a_field, rbp_r_t, rbp_r_u);		\
+					} else {						\
+						rbp_red_set(a_type, a_field, rbp_r_t);		\
+						rbp_rotate_left(a_type, a_field, rbp_r_c, rbp_r_t);		\
+					}							\
+				}							\
+				rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);	\
+				rbp_r_c = rbp_r_t;					\
+			} else {							\
+				/* Move right                                     */\
+				rbp_r_p = rbp_r_c;					\
+				rbp_r_c = rbp_right_get(a_type, a_field, rbp_r_c);	\
+			}								\
+		}								\
+    }									\
+    if (rbp_r_cmp != 0) {						\
+		while (true) {							\
+			assert(rbp_r_p != &(a_tree)->rbt_nil);			\
+			rbp_r_cmp = (a_cmp)((a_node), rbp_r_c);			\
+			if (rbp_r_cmp < 0) {					\
+				rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c);	\
+				if (rbp_r_t == &(a_tree)->rbt_nil) {			\
+					/* rbp_r_c now refers to the successor node to    */\
+					/* relocate, and rbp_r_xp/a_node refer to the     */\
+					/* context for the relocation.                    */\
+					if (rbp_left_get(a_type, a_field, rbp_r_xp)	== (a_node)) {			\
+						rbp_left_set(a_type, a_field, rbp_r_xp, rbp_r_c);				\
+					} else {						\
+						assert(rbp_right_get(a_type, a_field, rbp_r_xp) == (a_node));	\
+						rbp_right_set(a_type, a_field, rbp_r_xp, rbp_r_c);				\
+					}							\
+					rbp_left_set(a_type, a_field, rbp_r_c, rbp_left_get(a_type, a_field, (a_node)));	\
+					rbp_right_set(a_type, a_field, rbp_r_c, rbp_right_get(a_type, a_field, (a_node)));	\
+					rbp_color_set(a_type, a_field, rbp_r_c, rbp_red_get(a_type, a_field, (a_node)));	\
+					if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) {					\
+						rbp_left_set(a_type, a_field, rbp_r_p, &(a_tree)->rbt_nil);				\
+					} else {						\
+						assert(rbp_right_get(a_type, a_field, rbp_r_p) == rbp_r_c);				\
+						rbp_right_set(a_type, a_field, rbp_r_p, &(a_tree)->rbt_nil);			\
+					}							\
+					break;						\
+				}							\
+				rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t);	\
+				if (rbp_red_get(a_type, a_field, rbp_r_t) == false \
+						&& rbp_red_get(a_type, a_field, rbp_r_u) == false) {	\
+						rbp_move_red_left(a_type, a_field, rbp_r_c, rbp_r_t);	\
+					if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) {	\
+						rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);\
+					} else {						\
+						rbp_right_set(a_type, a_field, rbp_r_p, rbp_r_t);		\
+					}							\
+					rbp_r_c = rbp_r_t;					\
+				} else {						\
+					rbp_r_p = rbp_r_c;					\
+					rbp_r_c = rbp_left_get(a_type, a_field, rbp_r_c);	\
+				}							\
+			} else {							\
+				/* Check whether to delete this node (it has to be    */\
+				/* the correct node and a leaf node).                 */\
+				if (rbp_r_cmp == 0) {					\
+					assert((a_node) == rbp_r_c);			\
+					if (rbp_right_get(a_type, a_field, rbp_r_c) == &(a_tree)->rbt_nil) {	\
+						/* Delete leaf node.                          */\
+						if (rbp_left_get(a_type, a_field, rbp_r_c) != &(a_tree)->rbt_nil) {	\
+							rbp_lean_right(a_type, a_field, rbp_r_c, rbp_r_t);	\
+							rbp_right_set(a_type, a_field, rbp_r_t,	&(a_tree)->rbt_nil);	\
+						} else {					\
+							rbp_r_t = &(a_tree)->rbt_nil;		\
+						}						\
+						if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) {		\
+							rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);			\
+						} else {					\
+							rbp_right_set(a_type, a_field, rbp_r_p,	rbp_r_t);			\
+						}						\
+						break;						\
+					} else {						\
+						/* This is the node we want to delete, but we */\
+						/* will instead swap it with its successor    */\
+						/* and delete the successor.  Record enough   */\
+						/* information to do the swap later.          */\
+						/* rbp_r_xp is a_node's parent.               */\
+						rbp_r_xp = rbp_r_p;				\
+					}							\
+				}							\
+				rbp_r_t = rbp_right_get(a_type, a_field, rbp_r_c);	\
+				rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t);	\
+				if (rbp_red_get(a_type, a_field, rbp_r_u) == false) {	\
+					rbp_move_red_right(a_type, a_field, rbp_r_c,	\
+					rbp_r_t);						\
+					if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) {		\
+						rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);\
+					} else {						\
+						rbp_right_set(a_type, a_field, rbp_r_p, rbp_r_t); \
+					}							\
+					rbp_r_c = rbp_r_t;					\
+				} else {						\
+					rbp_r_p = rbp_r_c;					\
+					rbp_r_c = rbp_right_get(a_type, a_field, rbp_r_c);	\
+				}							\
+			}								\
+		}								\
+    }									\
+    /* Update root.                                                   */\
+    (a_tree)->rbt_root = rbp_left_get(a_type, a_field, &rbp_r_s);	\
+} while (0)
+
+/*
+ * The rb_wrap() macro provides a convenient way to wrap functions around the
+ * cpp macros.  The main benefits of wrapping are that 1) repeated macro
+ * expansion can cause code bloat, especially for rb_{insert,remove)(), and
+ * 2) type, linkage, comparison functions, etc. need not be specified at every
+ * call point.
+ */
+
+#define	rb_wrap(a_attr, a_prefix, a_tree_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_tree_type *tree) {					\
+    rb_new(a_type, a_field, tree);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_tree_type *tree) {					\
+    a_type *ret;							\
+    rb_first(a_type, a_field, tree, ret);				\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##last(a_tree_type *tree) {					\
+    a_type *ret;							\
+    rb_last(a_type, a_field, tree, ret);				\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##next(a_tree_type *tree, a_type *node) {			\
+    a_type *ret;							\
+    rb_next(a_type, a_field, a_cmp, tree, node, ret);			\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##prev(a_tree_type *tree, a_type *node) {			\
+    a_type *ret;							\
+    rb_prev(a_type, a_field, a_cmp, tree, node, ret);			\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##search(a_tree_type *tree, a_type *key) {			\
+    a_type *ret;							\
+    rb_search(a_type, a_field, a_cmp, tree, key, ret);			\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch(a_tree_type *tree, a_type *key) {			\
+    a_type *ret;							\
+    rb_nsearch(a_type, a_field, a_cmp, tree, key, ret);			\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch(a_tree_type *tree, a_type *key) {			\
+    a_type *ret;							\
+    rb_psearch(a_type, a_field, a_cmp, tree, key, ret);			\
+    return (ret);							\
+}									\
+a_attr void								\
+a_prefix##insert(a_tree_type *tree, a_type *node) {			\
+    rb_insert(a_type, a_field, a_cmp, tree, node);			\
+}									\
+a_attr void								\
+a_prefix##remove(a_tree_type *tree, a_type *node) {			\
+    rb_remove(a_type, a_field, a_cmp, tree, node);			\
+}
+
+/*
+ * The iterators simulate recursion via an array of pointers that store the
+ * current path.  This is critical to performance, since a series of calls to
+ * rb_{next,prev}() would require time proportional to (n lg n), whereas this
+ * implementation only requires time proportional to (n).
+ *
+ * Since the iterators cache a path down the tree, any tree modification may
+ * cause the cached path to become invalid.  In order to continue iteration,
+ * use something like the following sequence:
+ *
+ *   {
+ *       a_type *node, *tnode;
+ *
+ *       rb_foreach_begin(a_type, a_field, a_tree, node) {
+ *           ...
+ *           rb_next(a_type, a_field, a_cmp, a_tree, node, tnode);
+ *           rb_remove(a_type, a_field, a_cmp, a_tree, node);
+ *           rb_foreach_next(a_type, a_field, a_cmp, a_tree, tnode);
+ *           ...
+ *       } rb_foreach_end(a_type, a_field, a_tree, node)
+ *   }
+ *
+ * Note that this idiom is not advised if every iteration modifies the tree,
+ * since in that case there is no algorithmic complexity improvement over a
+ * series of rb_{next,prev}() calls, thus making the setup overhead wasted
+ * effort.
+ */
+
+#define	rb_foreach_begin(a_type, a_field, a_tree, a_var) { /* brace A */	\
+    /* Compute the maximum possible tree depth (3X the black height). */\
+    unsigned rbp_f_height;						\
+    rbp_black_height(a_type, a_field, a_tree, rbp_f_height);		\
+    rbp_f_height *= 3;							\
+    {		/* brace B */							\
+		/* Initialize the path to contain the left spine.             */\
+		a_type *rbp_f_path[rbp_f_height];				\
+		a_type *rbp_f_node;						\
+		bool rbp_f_synced = false;					\
+		unsigned rbp_f_depth = 0;					\
+		if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) {			\
+			rbp_f_path[rbp_f_depth] = (a_tree)->rbt_root;		\
+			rbp_f_depth++;						\
+			while ((rbp_f_node = rbp_left_get(a_type, a_field,		\
+					rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) {	\
+				rbp_f_path[rbp_f_depth] = rbp_f_node;			\
+				rbp_f_depth++;						\
+			}								\
+		}								\
+		/* While the path is non-empty, iterate.                      */\
+		while (rbp_f_depth > 0) {		/* brace C */			\
+			(a_var) = rbp_f_path[rbp_f_depth-1];	
+
+/* 
+ * Note that rb_foreach_begin omits closing }'s because
+ * it expects that it will be succeeded by a call to 
+ * rb_foreach_end which will have the closing }
+ */
+
+/* Only use if modifying the tree during iteration. */
+#define	rb_foreach_next(a_type, a_field, a_cmp, a_tree, a_node)		\
+	    /* Re-initialize the path to contain the path to a_node.  */\
+	    rbp_f_depth = 0;						\
+	    if (a_node != NULL) {					\
+			if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) {		\
+				rbp_f_path[rbp_f_depth] = (a_tree)->rbt_root;	\
+				rbp_f_depth++;					\
+				rbp_f_node = rbp_f_path[0];				\
+				while (true) {					\
+					int rbp_f_cmp = (a_cmp)((a_node),		\
+					rbp_f_path[rbp_f_depth-1]);			\
+					if (rbp_f_cmp < 0) {				\
+						rbp_f_node = rbp_left_get(a_type, a_field,	\
+						rbp_f_path[rbp_f_depth-1]);		\
+					} else if (rbp_f_cmp > 0) {			\
+						rbp_f_node = rbp_right_get(a_type, a_field,	\
+								rbp_f_path[rbp_f_depth-1]);		\
+					} else {					\
+						break;					\
+					}						\
+					assert(rbp_f_node != &(a_tree)->rbt_nil);	\
+					rbp_f_path[rbp_f_depth] = rbp_f_node;		\
+					rbp_f_depth++;					\
+				}							\
+			}							\
+	    }								\
+	    rbp_f_synced = true;
+
+#define	rb_foreach_end(a_type, a_field, a_tree, a_var)			\
+			if (rbp_f_synced) {						\
+				rbp_f_synced = false;					\
+				continue;						\
+			}								\
+			/* Find the successor.                                    */\
+			if ((rbp_f_node = rbp_right_get(a_type, a_field,		\
+					rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) {	\
+				/* The successor is the left-most node in the right   */\
+				/* subtree.                                           */\
+				rbp_f_path[rbp_f_depth] = rbp_f_node;			\
+				rbp_f_depth++;						\
+				while ((rbp_f_node = rbp_left_get(a_type, a_field,	\
+						rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) {	\
+					rbp_f_path[rbp_f_depth] = rbp_f_node;		\
+					rbp_f_depth++;					\
+				}							\
+			} else {							\
+				/* The successor is above the current node.  Unwind   */\
+				/* until a left-leaning edge is removed from the      */\
+				/* path, or the path is empty.                        */\
+				for (rbp_f_depth--; rbp_f_depth > 0; rbp_f_depth--) {	\
+					if (rbp_left_get(a_type, a_field, rbp_f_path[rbp_f_depth-1]) \
+							== rbp_f_path[rbp_f_depth]) {			\
+						break;						\
+					}							\
+				}							\
+			}								\
+		}	/* close brace C */							\
+    }	/* close brace B */							\
+} /* close brace A */
+
+
+
+#define	rb_foreach_reverse_begin(a_type, a_field, a_tree, a_var) {  /* brace A */ \
+    /* Compute the maximum possible tree depth (3X the black height). */\
+    unsigned rbp_fr_height;						\
+    rbp_black_height(a_type, a_field, a_tree, rbp_fr_height);		\
+    rbp_fr_height *= 3;							\
+    {	/* brace B */								\
+		/* Initialize the path to contain the right spine.            */\
+		a_type *rbp_fr_path[rbp_fr_height];				\
+		a_type *rbp_fr_node;						\
+		bool rbp_fr_synced = false;					\
+		unsigned rbp_fr_depth = 0;					\
+		if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) {			\
+			rbp_fr_path[rbp_fr_depth] = (a_tree)->rbt_root;		\
+			rbp_fr_depth++;						\
+			while ((rbp_fr_node = rbp_right_get(a_type, a_field,	\
+					rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) {	\
+				rbp_fr_path[rbp_fr_depth] = rbp_fr_node;		\
+				rbp_fr_depth++;						\
+			}								\
+		}								\
+		/* While the path is non-empty, iterate.                      */\
+		while (rbp_fr_depth > 0) {	 /* brace C */			\
+			(a_var) = rbp_fr_path[rbp_fr_depth-1];
+
+
+/* Only use if modifying the tree during iteration. */
+#define	rb_foreach_reverse_prev(a_type, a_field, a_cmp, a_tree, a_node)	\
+	    /* Re-initialize the path to contain the path to a_node.  */\
+	    rbp_fr_depth = 0;						\
+	    if (a_node != NULL) {					\
+			if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) {		\
+				rbp_fr_path[rbp_fr_depth] = (a_tree)->rbt_root;	\
+				rbp_fr_depth++;					\
+				rbp_fr_node = rbp_fr_path[0];			\
+				while (true) {					\
+					int rbp_fr_cmp = (a_cmp)((a_node), rbp_fr_path[rbp_fr_depth-1]);	\
+					if (rbp_fr_cmp < 0) {				\
+						rbp_fr_node = rbp_left_get(a_type, a_field,	\
+						rbp_fr_path[rbp_fr_depth-1]);		\
+					} else if (rbp_fr_cmp > 0) {			\
+						rbp_fr_node = rbp_right_get(a_type, a_field, rbp_fr_path[rbp_fr_depth-1]);	\
+					} else {					\
+						break;					\
+					}						\
+					assert(rbp_fr_node != &(a_tree)->rbt_nil);	\
+					rbp_fr_path[rbp_fr_depth] = rbp_fr_node;	\
+					rbp_fr_depth++;					\
+				}							\
+			}							\
+		}								\
+		rbp_fr_synced = true;
+
+#define	rb_foreach_reverse_end(a_type, a_field, a_tree, a_var)		\
+			if (rbp_fr_synced) {					\
+				rbp_fr_synced = false;					\
+				continue;						\
+			}								\
+			if (rbp_fr_depth == 0) {					\
+				/* rb_foreach_reverse_sync() was called with a NULL   */\
+				/* a_node.                                            */\
+				break;							\
+			}								\
+			/* Find the predecessor.                                  */\
+			if ((rbp_fr_node = rbp_left_get(a_type, a_field,		\
+					rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) {	\
+				/* The predecessor is the right-most node in the left */\
+				/* subtree.                                           */\
+				rbp_fr_path[rbp_fr_depth] = rbp_fr_node;		\
+				rbp_fr_depth++;						\
+				while ((rbp_fr_node = rbp_right_get(a_type, a_field,	\
+						rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) {\
+					rbp_fr_path[rbp_fr_depth] = rbp_fr_node;		\
+					rbp_fr_depth++;					\
+				}							\
+			} else {							\
+				/* The predecessor is above the current node.  Unwind */\
+				/* until a right-leaning edge is removed from the     */\
+				/* path, or the path is empty.                        */\
+				for (rbp_fr_depth--; rbp_fr_depth > 0; rbp_fr_depth--) {\
+					if (rbp_right_get(a_type, a_field, rbp_fr_path[rbp_fr_depth-1])	\
+							== rbp_fr_path[rbp_fr_depth]) {			\
+						break;						\
+					}							\
+				}							\
+			}								\
+		}	/* Close brace C */					\
+    } /* close brace B */						\
+} /* close brace A*/
+
+#endif /* RB_H_ */
diff --git a/bsd/i386/param.h b/bsd/i386/param.h
index 03a38d2ce..0eae0fea5 100644
--- a/bsd/i386/param.h
+++ b/bsd/i386/param.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -101,9 +101,15 @@
  * clusters (MAPPED_MBUFS), MCLBYTES must also be an integral multiple
  * of the hardware page size.
  */
-#define	MSIZE		256		/* size of an mbuf */
-#define	MCLBYTES	2048		/* large enough for ether MTU */
-#define	MCLSHIFT	11
+#define	MSIZESHIFT	8			/* 256 */
+#define	MSIZE		(1 << MSIZESHIFT)	/* size of an mbuf */
+#define	MCLSHIFT	11			/* 2048 */
+#define	MCLBYTES	(1 << MCLSHIFT)		/* size of an mbuf cluster */
+#define	MBIGCLSHIFT	12			/* 4096 */
+#define	MBIGCLBYTES	(1 << MBIGCLSHIFT)	/* size of a big cluster */
+#define	M16KCLSHIFT	14			/* 16384 */
+#define	M16KCLBYTES	(1 << M16KCLSHIFT)	/* size of a jumbo cluster */
+
 #define	MCLOFSET	(MCLBYTES - 1)
 #ifndef NMBCLUSTERS
 #ifdef GATEWAY
diff --git a/bsd/kern/Makefile b/bsd/kern/Makefile
new file mode 100644
index 000000000..c7eecbb12
--- /dev/null
+++ b/bsd/kern/Makefile
@@ -0,0 +1,26 @@
+export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
+export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
+export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
+export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
+
+include $(MakeInc_cmd)
+include $(MakeInc_def)
+
+INSTALL_SHARE_MISC_LIST =	\
+	trace.codes
+
+include $(MakeInc_rule)
+include $(MakeInc_dir)
+
+SHARE_MISC_DIR = usr/share/misc
+
+INSTALL_SHARE_MISC_FILES = \
+	$(addprefix $(DSTROOT)/$(SHARE_MISC_DIR)/, $(INSTALL_SHARE_MISC_LIST))
+
+$(INSTALL_SHARE_MISC_FILES): $(DSTROOT)/$(SHARE_MISC_DIR)/% : %
+	@echo Installing $< in $(dir $@)
+	$(_v) $(MKDIR) $(DSTROOT)/$(SHARE_MISC_DIR);	\
+	$(RM) $(RMFLAGS) $@;				\
+	$(INSTALL) $(INSTALL_FLAGS) $< $(dir $@);
+
+do_build_install: $(INSTALL_SHARE_MISC_FILES)
diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c
index e2868a40c..dac2c94d8 100644
--- a/bsd/kern/bsd_init.c
+++ b/bsd/kern/bsd_init.c
@@ -106,6 +106,7 @@
 #include <kern/task.h>
 #include <kern/ast.h>
 #include <kern/kalloc.h>
+#include <mach/mach_host.h>
 
 #include <mach/vm_param.h>
 
@@ -132,6 +133,7 @@
 #include <sys/mcache.h>			/* for mcache_init() */
 #include <sys/mbuf.h>			/* for mbinit() */
 #include <sys/event.h>			/* for knote_init() */
+#include <sys/kern_memorystatus.h>	/* for kern_memorystatus_init() */
 #include <sys/aio_kern.h>		/* for aio_init() */
 #include <sys/semaphore.h>		/* for psem_cache_init() */
 #include <net/dlil.h>			/* for dlil_init() */
@@ -151,6 +153,8 @@
 #include <sys/tty.h>			/* for tty_init() */
 #include <net/if_utun.h>		/* for utun_register_control() */
 #include <net/net_str_id.h>		/* for net_str_id_init() */
+#include <net/netsrc.h>			/* for netsrc_init() */
+#include <kern/assert.h>		/* for assert() */
 
 #include <net/init.h>
 
@@ -162,6 +166,10 @@
 
 #include <machine/exec.h>
 
+#if NFSCLIENT
+#include <sys/netboot.h>
+#endif
+
 #if CONFIG_IMAGEBOOT
 #include <sys/imageboot.h>
 #endif
@@ -171,6 +179,7 @@
 #endif
 
 #include <pexpert/pexpert.h>
+#include <machine/pal_routines.h>
 
 void * get_user_regs(thread_t);		/* XXX kludge for <machine/thread.h> */
 void IOKitInitializeTime(void);		/* XXX */
@@ -216,9 +225,8 @@ char	domainname[MAXDOMNAMELEN];
 int		domainnamelen;
 #if defined(__i386__) || defined(__x86_64__)
 struct exec_archhandler exec_archhandler_ppc = {
-	.path = "/usr/libexec/oah/translate",
+	.path = "/usr/libexec/oah/RosettaNonGrata",
 };
-const char * const kRosettaStandIn_str = "/usr/libexec/oah/RosettaNonGrata";
 #else /* __i386__ */
 struct exec_archhandler exec_archhandler_ppc;
 #endif /* __i386__ */
@@ -243,16 +251,16 @@ extern void file_lock_init(void);
 extern void kmeminit(void);
 extern void bsd_bufferinit(void);
 
-extern int srv;
+extern int serverperfmode;
 extern int ncl;
 
 vm_map_t	bsd_pageable_map;
 vm_map_t	mb_map;
 
-static  int bsd_simul_execs = BSD_SIMUL_EXECS;
-static int bsd_pageable_map_size = BSD_PAGABLE_MAP_SIZE;
-__private_extern__ int execargs_cache_size = BSD_SIMUL_EXECS;
-__private_extern__ int execargs_free_count = BSD_SIMUL_EXECS;
+static  int bsd_simul_execs;
+static int bsd_pageable_map_size;
+__private_extern__ int execargs_cache_size = 0;
+__private_extern__ int execargs_free_count = 0;
 __private_extern__ vm_offset_t * execargs_cache = NULL;
 
 void bsd_exec_setup(int);
@@ -262,6 +270,14 @@ void bsd_exec_setup(int);
  * Intel only.
  */
 __private_extern__ int bootarg_no64exec = 0;
+__private_extern__ int bootarg_vnode_cache_defeat = 0;
+
+/*
+ * Prevent kernel-based ASLR from being used, for testing.
+ */
+#if DEVELOPMENT || DEBUG
+__private_extern__ int bootarg_disable_aslr = 0;
+#endif
 
 int	cmask = CMASK;
 extern int customnbuf;
@@ -274,6 +290,7 @@ static void parse_bsd_args(void);
 extern task_t bsd_init_task;
 extern char    init_task_failure_data[];
 extern void time_zone_slock_init(void);
+extern void select_wait_queue_init(void);
 static void process_name(const char *, proc_t);
 
 static void setconf(void);
@@ -289,17 +306,21 @@ extern void sysv_sem_lock_init(void);
 #if SYSV_MSG
 extern void sysv_msg_lock_init(void);
 #endif
-extern void pthread_init(void);
 
+#if !defined(SECURE_KERNEL)
 /* kmem access not enabled by default; can be changed with boot-args */
+/* We don't need to keep this symbol around in RELEASE kernel */
 int setup_kmem = 0;
+#endif
 
-/* size of kernel trace buffer, disabled by default */
-unsigned int new_nkdbufs = 0;
+#if CONFIG_MACF
+#if defined (__i386__) || defined (__x86_64__)
+/* MACF policy_check configuration flags; see policy_check.c for details */
+int policy_check_flags = 0;
 
-/* mach leak logging */
-int log_leaks = 0;
-int turn_on_log_leaks = 0;
+extern int check_policy_init(int);
+#endif
+#endif	/* CONFIG_MACF */
 
 extern void stackshot_lock_init(void);
 
@@ -343,8 +364,6 @@ struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ };
 
 extern thread_t	cloneproc(task_t, proc_t, int);
 extern int 	(*mountroot)(void);
-extern int 	netboot_mountroot(void); 	/* netboot.c */
-extern int	netboot_setup(void);
 
 lck_grp_t * proc_lck_grp;
 lck_grp_t * proc_slock_grp;
@@ -386,6 +405,10 @@ bsd_init(void)
 	struct vfs_context context;
 	kern_return_t	ret;
 	struct ucred temp_cred;
+	struct posix_cred temp_pcred;
+#if NFSCLIENT || CONFIG_IMAGEBOOT
+	boolean_t       netboot = FALSE;
+#endif
 
 #define bsd_init_kprintf(x...) /* kprintf("bsd_init: " x) */
 
@@ -427,7 +450,7 @@ bsd_init(void)
 	proc_lck_grp_attr= lck_grp_attr_alloc_init();
 
 	proc_lck_grp = lck_grp_alloc_init("proc",  proc_lck_grp_attr);
-#ifndef CONFIG_EMBEDDED
+#if CONFIG_FINE_LOCK_GROUPS
 	proc_slock_grp = lck_grp_alloc_init("proc-slock",  proc_lck_grp_attr);
 	proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock",  proc_lck_grp_attr);
 	proc_mlock_grp = lck_grp_alloc_init("proc-mlock",  proc_lck_grp_attr);
@@ -440,20 +463,21 @@ bsd_init(void)
 #endif
 #endif
 
-#ifdef CONFIG_EMBEDDED
-	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
-	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
-	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
-	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
-	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
-#else	
+#if CONFIG_FINE_LOCK_GROUPS
 	proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
 	proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr);
 	lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr);
 	lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
 	lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr);
+#else
+	proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
+	proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
+	lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr);
+	lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+	lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr);
 #endif
 
+	assert(bsd_simul_execs != 0);
 	execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr);
 	execargs_cache_size = bsd_simul_execs;
 	execargs_free_count = bsd_simul_execs;
@@ -473,6 +497,14 @@ bsd_init(void)
 	 */
 	mac_policy_initbsd();
 	kernproc->p_mac_enforce = 0;
+
+#if defined (__i386__) || defined (__x86_64__)
+	/*
+	 * We currently only support this on i386/x86_64, as that is the
+	 * only lock code we have instrumented so far.
+	 */
+	check_policy_init(policy_check_flags);
+#endif
 #endif /* MAC */
 
 	/*
@@ -483,15 +515,16 @@ bsd_init(void)
 	kernproc->p_pgrp = &pgrp0;
 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
-#ifdef CONFIG_EMBEDDED
-	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);	
-#else
+#ifdef CONFIG_FINE_LOCK_GROUPS
 	lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr);
+#else
+	lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr);
 #endif
 	/* There is no other bsd thread this point and is safe without pgrp lock */
 	LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist);
 	kernproc->p_listflag |= P_LIST_INPGRP;
 	kernproc->p_pgrpid = 0;
+	kernproc->p_uniqueid = 0;
 
 	pgrp0.pg_session = &session0;
 	pgrp0.pg_membercnt = 1;
@@ -499,10 +532,10 @@ bsd_init(void)
 	session0.s_count = 1;
 	session0.s_leader = kernproc;
 	session0.s_listflags = 0;
-#ifdef CONFIG_EMBEDDED
-	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
-#else
+#ifdef CONFIG_FINE_LOCK_GROUPS
 	lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr);
+#else
+	lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr);
 #endif
 	LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash);
 	proc_list_unlock();
@@ -515,6 +548,14 @@ bsd_init(void)
 	
 	kernproc->p_stat = SRUN;
 	kernproc->p_flag = P_SYSTEM;
+	kernproc->p_lflag = 0;
+	kernproc->p_ladvflag = 0;
+	
+#if DEVELOPMENT || DEBUG
+	if (bootarg_disable_aslr)
+		kernproc->p_flag |= P_DISABLE_ASLR;
+#endif
+
 	kernproc->p_nice = NZERO;
 	kernproc->p_pptr = kernproc;
 
@@ -531,15 +572,22 @@ bsd_init(void)
 	 */
 	bsd_init_kprintf("calling bzero\n");
 	bzero(&temp_cred, sizeof(temp_cred));
-	temp_cred.cr_ngroups = 1;
+	bzero(&temp_pcred, sizeof(temp_pcred));
+	temp_pcred.cr_ngroups = 1;
 
-	temp_cred.cr_audit.as_aia_p = &audit_default_aia;
-        /* XXX the following will go away with cr_au */
-	temp_cred.cr_au.ai_auid = AU_DEFAUDITID;
+	temp_cred.cr_audit.as_aia_p = audit_default_aia_p;
 
 	bsd_init_kprintf("calling kauth_cred_create\n");
+	/*
+	 * We have to label the temp cred before we create from it to
+	 * properly set cr_ngroups, or the create will fail.
+	 */
+	posix_cred_label(&temp_cred, &temp_pcred);
 	kernproc->p_ucred = kauth_cred_create(&temp_cred); 
 
+	/* update cred on proc */
+	PROC_UPDATE_CREDS_ONPROC(kernproc);
+
 	/* give the (already exisiting) initial thread a reference on it */
 	bsd_init_kprintf("calling kauth_cred_ref\n");
 	kauth_cred_ref(kernproc->p_ucred);
@@ -598,6 +646,7 @@ bsd_init(void)
 		vm_offset_t	minimum;
 
 		bsd_init_kprintf("calling kmem_suballoc\n");
+		assert(bsd_pageable_map_size != 0);
 		ret = kmem_suballoc(kernel_map,
 				&minimum,
 				(vm_size_t)bsd_pageable_map_size,
@@ -630,15 +679,15 @@ bsd_init(void)
 	bsd_init_kprintf("calling IOKitInitializeTime\n");
 	IOKitInitializeTime();
 
-	if (turn_on_log_leaks && !new_nkdbufs)
-		new_nkdbufs = 200000;
-	start_kern_tracing(new_nkdbufs);
-	if (turn_on_log_leaks)
-		log_leaks = 1;
-
 	bsd_init_kprintf("calling ubc_init\n");
 	ubc_init();
 
+	/*
+	 * Initialize device-switches.
+	 */
+	bsd_init_kprintf("calling devsw_init() \n");
+	devsw_init();
+
 	/* Initialize the file systems. */
 	bsd_init_kprintf("calling vfsinit\n");
 	vfsinit();
@@ -702,6 +751,8 @@ bsd_init(void)
 	psem_cache_init();
 	bsd_init_kprintf("calling time_zone_slock_init\n");
 	time_zone_slock_init();
+	bsd_init_kprintf("calling select_wait_queue_init\n");
+	select_wait_queue_init();
 
 	/* Stack snapshot facility lock */
 	stackshot_lock_init();
@@ -729,6 +780,12 @@ bsd_init(void)
 	kernproc->p_fd->fd_cdir = NULL;
 	kernproc->p_fd->fd_rdir = NULL;
 
+#if CONFIG_FREEZE
+	/* Initialise background hibernation */
+	bsd_init_kprintf("calling kern_hibernation_init\n");
+	kern_hibernation_init();
+#endif
+
 #if CONFIG_EMBEDDED
 	/* Initialize kernel memory status notifications */
 	bsd_init_kprintf("calling kern_memorystatus_init\n");
@@ -780,6 +837,10 @@ bsd_init(void)
 	
 	/* register user tunnel kernel control handler */
 	utun_register_control();
+    netsrc_init();
+	
+	/* wait for network domain to finish */
+	domainfin();
 #endif /* NETWORKING */
 
 	bsd_init_kprintf("calling vnode_pager_bootstrap\n");
@@ -794,61 +855,22 @@ bsd_init(void)
 	bsd_init_kprintf("calling inittodr\n");
 	inittodr(0);
 
-#if CONFIG_EMBEDDED
-	{
-		/* print out early VM statistics */
-		kern_return_t kr1;
-		vm_statistics_data_t stat;
-		mach_msg_type_number_t count;
-
-		count = HOST_VM_INFO_COUNT;
-		kr1 = host_statistics(host_self(),
-				      HOST_VM_INFO,
-				      (host_info_t)&stat,
-				      &count);
-		kprintf("Mach Virtual Memory Statistics (page size of 4096) bytes\n"
-			"Pages free:\t\t\t%u.\n"
-			"Pages active:\t\t\t%u.\n"
-			"Pages inactive:\t\t\t%u.\n"
-			"Pages wired down:\t\t%u.\n"
-			"\"Translation faults\":\t\t%u.\n"
-			"Pages copy-on-write:\t\t%u.\n"
-			"Pages zero filled:\t\t%u.\n"
-			"Pages reactivated:\t\t%u.\n"
-			"Pageins:\t\t\t%u.\n"
-			"Pageouts:\t\t\t%u.\n"
-			"Object cache: %u hits of %u lookups (%d%% hit rate)\n",
-
-			stat.free_count,
-			stat.active_count,
-			stat.inactive_count,
-			stat.wire_count,
-			stat.faults,
-			stat.cow_faults,
-			stat.zero_fill_count,
-			stat.reactivations,
-			stat.pageins,
-			stat.pageouts,
-			stat.hits,
-			stat.lookups,
-			(stat.hits == 0) ? 100 :
-			                   ((stat.lookups * 100) / stat.hits));
-	}
-#endif /* CONFIG_EMBEDDED */
-	
 	/* Mount the root file system. */
 	while( TRUE) {
 		int err;
 
 		bsd_init_kprintf("calling setconf\n");
 		setconf();
+#if NFSCLIENT
+		netboot = (mountroot == netboot_mountroot);
+#endif
 
 		bsd_init_kprintf("vfs_mountroot\n");
 		if (0 == (err = vfs_mountroot()))
 			break;
 		rootdevice[0] = '\0';
 #if NFSCLIENT
-		if (mountroot == netboot_mountroot) {
+		if (netboot) {
 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
 			vc_progress_set(FALSE, 0);
 			for (i=1; 1; i*=2) {
@@ -880,8 +902,10 @@ bsd_init(void)
 	filedesc0.fd_cdir = rootvnode;
 
 #if NFSCLIENT
-	if (mountroot == netboot_mountroot) {
+	if (netboot) {
 		int err;
+
+		netboot = TRUE;
 		/* post mount setup */
 		if ((err = netboot_setup()) != 0) {
 			PE_display_icon( 0, "noroot");  /* XXX a netboot-specific icon would be nicer */
@@ -903,19 +927,12 @@ bsd_init(void)
 	 * See if a system disk image is present. If so, mount it and
 	 * switch the root vnode to point to it
 	 */ 
-  
-	if(imageboot_needed()) {
-		int err;
-
-		/* An image was found */
-		if((err = imageboot_setup())) {
-			/*
-			 * this is not fatal. Keep trying to root
-			 * off the original media
-			 */
-			printf("%s: imageboot could not find root, %d\n",
-				__FUNCTION__, err);
-		}
+	if (netboot == FALSE && imageboot_needed()) {
+		/* 
+		 * An image was found.  No turning back: we're booted
+		 * with a kernel from the disk image.
+		 */
+		imageboot_setup(); 
 	}
 #endif /* CONFIG_IMAGEBOOT */
   
@@ -943,15 +960,12 @@ bsd_init(void)
 	kernproc->p_flag |= P_LP64;
 	printf("Kernel is LP64\n");
 #endif
+
+	pal_kernel_announce();
+
 #if __i386__ || __x86_64__
 	/* this should be done after the root filesystem is mounted */
 	error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
-	// 10/30/08 - gab: <rdar://problem/6324501>
-	// if default 'translate' can't be found, see if the understudy is available
-	if (ENOENT == error) {
-		strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN);
-		error = set_archhandler(kernproc, CPU_TYPE_POWERPC);
-	}
 	if (error) /* XXX make more generic */
 		exec_archhandler_ppc.path[0] = 0;
 #endif	
@@ -1117,13 +1131,19 @@ parse_bsd_args(void)
 	if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */
 		boothowto |= RB_SAFEBOOT;
 
-	if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */
-		turn_on_log_leaks = 1;
-
 	/* disable 64 bit grading */
 	if (PE_parse_boot_argn("-no64exec", namep, sizeof (namep)))
 		bootarg_no64exec = 1;
 
+	/* disable vnode_cache_is_authorized() by setting vnode_cache_defeat */
+	if (PE_parse_boot_argn("-vnode_cache_defeat", namep, sizeof (namep)))
+		bootarg_vnode_cache_defeat = 1;
+
+#if DEVELOPMENT || DEBUG
+	if (PE_parse_boot_argn("-disable_aslr", namep, sizeof (namep)))
+		bootarg_disable_aslr = 1;
+#endif
+
 	PE_parse_boot_argn("ncl", &ncl, sizeof (ncl));
 	if (PE_parse_boot_argn("nbuf", &max_nbuf_headers,
 				sizeof (max_nbuf_headers))) {
@@ -1132,11 +1152,20 @@ parse_bsd_args(void)
 #if !defined(SECURE_KERNEL)
 	PE_parse_boot_argn("kmem", &setup_kmem, sizeof (setup_kmem));
 #endif
-	PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
+
+#if CONFIG_MACF
+#if defined (__i386__) || defined (__x86_64__)
+	PE_parse_boot_argn("policy_check", &policy_check_flags, sizeof (policy_check_flags));
+#endif
+#endif	/* CONFIG_MACF */
 
 	if (PE_parse_boot_argn("msgbuf", &msgbuf, sizeof (msgbuf))) {
 		log_setsize(msgbuf);
 	}
+
+	if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) {
+		nc_disabled = 1;
+	}
 }
 
 void
@@ -1165,10 +1194,13 @@ bsd_exec_setup(int scale)
 			break;
 			
 	}
-	bsd_pageable_map_size = (bsd_simul_execs * (NCARGS + PAGE_SIZE));
+	bsd_pageable_map_size = (bsd_simul_execs * BSD_PAGEABLE_SIZE_PER_EXEC);
 }
 
 #if !NFSCLIENT
+int 
+netboot_root(void);
+
 int 
 netboot_root(void)
 {
diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c
index 64127d32a..19da61270 100644
--- a/bsd/kern/bsd_stubs.c
+++ b/bsd/kern/bsd_stubs.c
@@ -31,8 +31,10 @@
 #include <mach/mach_types.h>
 #include <mach/vm_prot.h>
 #include <vm/vm_kern.h>
+#include <sys/stat.h>
 #include <vm/vm_map.h>
 #include <sys/systm.h>
+#include <kern/assert.h>
 #include <sys/conf.h>
 #include <sys/proc_internal.h>
 #include <sys/buf.h>	/* for SET */
@@ -49,6 +51,9 @@ extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int);
 void	pcb_synch(void);
 void	tbeproc(void *);
 
+TAILQ_HEAD(,devsw_lock) devsw_locks;
+lck_mtx_t devsw_lock_list_mtx;
+lck_grp_t *devsw_lock_grp;
 
 /* Just to satisfy pstat command */
 int     dmmin, dmmax, dmtext;
@@ -280,6 +285,7 @@ cdevsw_remove(int index, struct cdevsw * csw)
 		return(-1);
 	}
 	cdevsw[index] = nocdev;
+	cdevsw_flags[index] = 0;
 	return(index);
 }
 
@@ -303,6 +309,28 @@ cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev)
 	return (index);
 }
 
+int
+cdevsw_setkqueueok(int index, struct cdevsw *csw, int use_offset)
+{
+	struct cdevsw *devsw;
+	uint64_t flags = CDEVSW_SELECT_KQUEUE;
+
+	devsw = &cdevsw[index];
+	if ((index < 0) || (index >= nchrdev) ||
+	    (memcmp((char *)devsw, 
+		          (char *)csw, 
+			  sizeof(struct cdevsw)) != 0)) {
+		return(-1);
+	}
+
+	if (use_offset) {
+		flags |= CDEVSW_USE_OFFSET;
+	}
+
+	cdevsw_flags[index] = flags;
+	return 0;
+}
+
 #include <pexpert/pexpert.h>	/* for PE_parse_boot_arg */
 
 void
@@ -336,3 +364,71 @@ bsd_hostname(char *buf, int bufsize, int *len)
 	}    
 }
 
+void
+devsw_lock(dev_t dev, int mode)
+{
+	devsw_lock_t newlock, tmplock;
+	int res;
+
+	assert(0 <= major(dev) && major(dev) < nchrdev);	
+	assert(mode == S_IFCHR || mode == S_IFBLK);
+
+	MALLOC(newlock, devsw_lock_t, sizeof(struct devsw_lock), M_TEMP, M_WAITOK | M_ZERO);
+	newlock->dl_dev = dev;
+	newlock->dl_thread = current_thread();
+	newlock->dl_mode = mode;
+	
+	lck_mtx_lock_spin(&devsw_lock_list_mtx);
+retry:
+	TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) {
+		if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {
+			res = msleep(tmplock, &devsw_lock_list_mtx, PVFS, "devsw_lock", NULL);	
+			assert(res == 0);
+			goto retry;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&devsw_locks, newlock, dl_list);
+	lck_mtx_unlock(&devsw_lock_list_mtx);
+
+}
+void
+devsw_unlock(dev_t dev, int mode)
+{
+	devsw_lock_t tmplock;
+
+	assert(0 <= major(dev) && major(dev) < nchrdev);	
+
+	lck_mtx_lock_spin(&devsw_lock_list_mtx);
+
+	TAILQ_FOREACH(tmplock, &devsw_locks, dl_list) {
+		if (tmplock->dl_dev == dev && tmplock->dl_mode == mode) {	
+			break;
+		}
+	}
+
+	if (tmplock == NULL) {
+		panic("Trying to unlock, and couldn't find lock.");
+	}
+
+	if (tmplock->dl_thread != current_thread()) {
+		panic("Trying to unlock, but I don't hold the lock.");
+	}
+
+	wakeup(tmplock);
+	TAILQ_REMOVE(&devsw_locks, tmplock, dl_list);
+	
+	lck_mtx_unlock(&devsw_lock_list_mtx);
+	
+	FREE(tmplock, M_TEMP);
+}
+
+void
+devsw_init()
+{
+	devsw_lock_grp = lck_grp_alloc_init("devsw", NULL);
+	assert(devsw_lock_grp != NULL);
+
+	lck_mtx_init(&devsw_lock_list_mtx, devsw_lock_grp, NULL);
+	TAILQ_INIT(&devsw_locks);
+}
diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c
index d0483c0e4..33e3b3040 100644
--- a/bsd/kern/decmpfs.c
+++ b/bsd/kern/decmpfs.c
@@ -204,11 +204,12 @@ _decmp_get_func(uint32_t type, int offset)
     if (IOCatalogueMatchingDriversPresent(providesName)) {
         // there is a kext that says it will register for this type, so let's wait for it
         char resourceName[80];
+        uint64_t delay = 10000000ULL; // 10 milliseconds.
         snprintf(resourceName, sizeof(resourceName), "com.apple.AppleFSCompression.Type%u", type);
         printf("waiting for %s\n", resourceName);
         while(decompressors[type] == NULL) {
             lck_rw_done(decompressorsLock); // we have to unlock to allow the kext to register
-            if (IOServiceWaitForMatchingResource(resourceName, 60)) {
+            if (IOServiceWaitForMatchingResource(resourceName, delay)) {
                 break;
             }
             if (!IOCatalogueMatchingDriversPresent(providesName)) {
@@ -217,6 +218,7 @@ _decmp_get_func(uint32_t type, int offset)
                 break;
             }
             printf("still waiting for %s\n", resourceName);
+            delay *= 2;
             lck_rw_lock_shared(decompressorsLock);
         }
         // IOKit says the kext is loaded, so it should be registered too!
@@ -659,11 +661,11 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp)
             return 0;
     }
     
-    if (!vnode_isreg(vp)) {
-        /* only regular files can be compressed */
-        ret = FILE_IS_NOT_COMPRESSED;
-        goto done;
-    }
+//    if (!vnode_isreg(vp)) {
+//        /* only regular files can be compressed */
+//        ret = FILE_IS_NOT_COMPRESSED;
+//        goto done;
+//    }
     
     mp = vnode_mount(vp); 
     if (mp == NULL) {
@@ -1137,7 +1139,7 @@ decompress:
     else {
         if (!abort_pagein) {
             /* commit our pages */
-			kr = commit_upl(pl, pl_offset, total_size, UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE, 0);
+			kr = commit_upl(pl, pl_offset, total_size, UPL_COMMIT_FREE_ON_EMPTY, 0);
         }
     }
     
diff --git a/bsd/kern/imageboot.c b/bsd/kern/imageboot.c
index 0ed79dc69..8bc4ede36 100644
--- a/bsd/kern/imageboot.c
+++ b/bsd/kern/imageboot.c
@@ -35,6 +35,7 @@
 #include <sys/filedesc.h>
 #include <sys/vnode_internal.h>
 #include <sys/imageboot.h>
+#include <kern/assert.h>
 
 #include <pexpert/pexpert.h>
 
@@ -52,33 +53,68 @@ extern char rootdevice[];
 #endif
 
 extern int di_root_image(const char *path, char devname[], dev_t *dev_p);
+static boolean_t imageboot_setup_new(void);
 
 #define kIBFilePrefix "file://"
 
-int
+__private_extern__ int
+imageboot_format_is_valid(const char *root_path)
+{
+	return (strncmp(root_path, kIBFilePrefix,
+				strlen(kIBFilePrefix)) == 0);
+}
+
+static void
+vnode_get_and_drop_always(vnode_t vp) 
+{
+	vnode_getalways(vp);
+	vnode_rele(vp);
+	vnode_put(vp);
+}
+
+__private_extern__ int
 imageboot_needed(void)
 {
 	int result = 0;
 	char *root_path = NULL;
-
+	
 	DBG_TRACE("%s: checking for presence of root path\n", __FUNCTION__);
 
 	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
 	if (root_path == NULL)
 		panic("%s: M_NAMEI zone exhausted", __FUNCTION__);
 
-	if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == TRUE) {
-		/* Got it, now verify scheme */
+	/* Check for first layer */
+	if (!(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) || 
+			PE_parse_boot_argn("rp", root_path, MAXPATHLEN) ||
+			PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN))) {
+		goto out;
+	}
+	
+	/* Sanity-check first layer */
+	if (imageboot_format_is_valid(root_path)) {
+		DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
+	} else {
+		goto out;
+	}
 
-		if (strncmp(root_path, kIBFilePrefix,
-					strlen(kIBFilePrefix)) == 0) {
-			DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
-			result = 1;
-		} else {
-			DBG_TRACE("%s: Invalid URL scheme for %s\n",
-					__FUNCTION__, root_path);
-		}
+	result = 1;
+
+	/* Check for second layer */
+	if (!(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) ||
+			PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN))) {
+		goto out;
+	}
+
+	/* Sanity-check second layer */
+	if (imageboot_format_is_valid(root_path)) {
+		DBG_TRACE("%s: Found %s\n", __FUNCTION__, root_path);
+	} else {
+		panic("%s: Invalid URL scheme for %s\n",
+				__FUNCTION__, root_path);
 	}
+
+out:
 	FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
 
 	return (result);
@@ -86,97 +122,193 @@ imageboot_needed(void)
 
 
 /*
- * We know there's an image. Attach it, and
- * switch over to root off it
- *
- * NB: p is always kernproc
+ * Swaps in new root filesystem based on image path.
+ * Current root filesystem is removed from mount list and
+ * tagged MNTK_BACKS_ROOT, MNT_ROOTFS is cleared on it, and 
+ * "rootvnode" is reset.  Root vnode of currentroot filesystem 
+ * is returned with usecount (no iocount).
  */
-
-int
-imageboot_setup()
+__private_extern__ int
+imageboot_mount_image(const char *root_path, int height)
 {
-	dev_t       dev;
-	int         error = 0;
-	char *root_path = NULL;
+	dev_t       	dev;
+	int 		error;
+	vnode_t 	old_rootvnode = NULL;
+	vnode_t 	newdp;
+	mount_t 	new_rootfs;
 
-	DBG_TRACE("%s: entry\n", __FUNCTION__);
-
-	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
-	if (root_path == NULL)
-		return (ENOMEM);
-
-	if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) {
-		error = ENOENT;
-		goto done;
-	}
-
-	printf("%s: root image url is %s\n", __FUNCTION__, root_path);
 	error = di_root_image(root_path, rootdevice, &dev);
-	if(error) {
-		printf("%s: di_root_image failed: %d\n", __FUNCTION__, error);
-		goto done;
+	if (error) {
+		panic("%s: di_root_image failed: %d\n", __FUNCTION__, error);
 	}
 
 	rootdev = dev;
 	mountroot = NULL;
 	printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
 	error = vfs_mountroot();
+	if (error != 0) {
+		panic("vfs_mountroot() failed.\n");
+	}
 
-	if (error == 0 && rootvnode != NULL) {
-		vnode_t newdp, old_rootvnode;
-		mount_t new_rootfs, old_rootfs;
+	/*
+	 * Get the vnode for '/'.
+	 * Set fdp->fd_fd.fd_cdir to reference it.
+	 */
+	if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
+		panic("%s: cannot find root vnode", __FUNCTION__);
 
-		/*
-		 * Get the vnode for '/'.
-		 * Set fdp->fd_fd.fd_cdir to reference it.
-		 */
-		if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
-			panic("%s: cannot find root vnode", __FUNCTION__);
+	if (rootvnode != NULL) {
+		/* remember the old rootvnode, but remove it from mountlist */
+		mount_t 	old_rootfs;
 
 		old_rootvnode = rootvnode;
 		old_rootfs = rootvnode->v_mount;
-
+	
 		mount_list_remove(old_rootfs);
-
+	
 		mount_lock(old_rootfs);
 #ifdef CONFIG_IMGSRC_ACCESS
 		old_rootfs->mnt_kern_flag |= MNTK_BACKS_ROOT;
 #endif /* CONFIG_IMGSRC_ACCESS */
 		old_rootfs->mnt_flag &= ~MNT_ROOTFS;
 		mount_unlock(old_rootfs);
+	}
 
-		rootvnode = newdp;
+	/* switch to the new rootvnode */
+	rootvnode = newdp;
 
-		new_rootfs = rootvnode->v_mount;
-		mount_lock(new_rootfs);
-		new_rootfs->mnt_flag |= MNT_ROOTFS;
-		mount_unlock(new_rootfs);
+	new_rootfs = rootvnode->v_mount;
+	mount_lock(new_rootfs);
+	new_rootfs->mnt_flag |= MNT_ROOTFS;
+	mount_unlock(new_rootfs);
 
-		vnode_ref(newdp);
-		vnode_put(newdp);
-		filedesc0.fd_cdir = newdp;
-		DBG_TRACE("%s: root switched\n", __FUNCTION__);
+	vnode_ref(newdp);
+	vnode_put(newdp);
+	filedesc0.fd_cdir = newdp;
+	DBG_TRACE("%s: root switched\n", __FUNCTION__);
 
+	if (old_rootvnode != NULL) {
 #ifdef CONFIG_IMGSRC_ACCESS
-		if (PE_imgsrc_mount_supported()) {
-			imgsrc_rootvnode = old_rootvnode;
-		} else {
-			vnode_getalways(old_rootvnode);
-			vnode_rele(old_rootvnode);
-			vnode_put(old_rootvnode);
-		}
+	    if (height >= 0 && PE_imgsrc_mount_supported()) {
+		imgsrc_rootvnodes[height] = old_rootvnode;
+	    } else {
+		vnode_get_and_drop_always(old_rootvnode);
+	    }
 #else 
-		vnode_getalways(old_rootvnode);
-		vnode_rele(old_rootvnode);
-		vnode_put(old_rootvnode);
+	    vnode_get_and_drop_always(old_rootvnode);
 #endif /* CONFIG_IMGSRC_ACCESS */
+	}
+	return 0;
+}
 
+static boolean_t 
+imageboot_setup_new()
+{
+	int error;
+	char *root_path = NULL;
+	int height = 0;
+	boolean_t done = FALSE;
+
+	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	assert(root_path != NULL);
+
+	if(PE_parse_boot_argn(IMAGEBOOT_CONTAINER_ARG, root_path, MAXPATHLEN) == TRUE) {
+		printf("%s: container image url is %s\n", __FUNCTION__, root_path);
+		error = imageboot_mount_image(root_path, height);
+		if (error != 0) {
+			panic("Failed to mount container image.");
+		}
+
+		height++;
+	}
+
+	if (PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN) == FALSE) {
+		if (height > 0) {
+			panic("%s specified without %s?\n", IMAGEBOOT_CONTAINER_ARG, IMAGEBOOT_ROOT_ARG);
+		}
+		goto out;
 
 	}
+
+	printf("%s: root image url is %s\n", __FUNCTION__, root_path);
+
+	error = imageboot_mount_image(root_path, height);
+	if (error != 0) {
+		panic("Failed to mount root image.");
+	}
+
+	done = TRUE;
+
+out:
+	FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
+	return done;
+}
+
+__private_extern__ void
+imageboot_setup()
+{
+	int         error = 0;
+	char *root_path = NULL;
+
+	DBG_TRACE("%s: entry\n", __FUNCTION__);
+
+	if (rootvnode == NULL) {	
+		panic("imageboot_setup: rootvnode is NULL.");
+	}
+
+	/*
+	 * New boot-arg scheme:
+	 * 	root-dmg : the dmg that will be the root filesystem.
+	 * 	container-dmg : an optional dmg that contains the root-dmg.
+	 */
+	if (imageboot_setup_new()) {
+		return;
+	}
+	
+	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	assert(root_path != NULL);
+
+	/*
+	 * Look for outermost disk image to root from.  If we're doing a nested boot,
+	 * there's some sense in which the outer image never needs to be the root filesystem,
+	 * but it does need very similar treatment: it must not be unmounted, needs a fake
+	 * device vnode created for it, and should not show up in getfsstat() until exposed 
+	 * with MNT_IMGSRC. We just make it the temporary root.
+	 */
+	if((PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) &&
+		(PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == FALSE)) {
+		panic("%s: no valid path to image.\n", __FUNCTION__);
+	}
+
+	printf("%s: root image url is %s\n", __FUNCTION__, root_path);
+	
+	error = imageboot_mount_image(root_path, 0);
+	if (error) {
+		panic("Failed on first stage of imageboot.");
+	}
+
+	/*
+	 * See if we are rooting from a nested image
+	 */
+	if(PE_parse_boot_argn("rp1", root_path, MAXPATHLEN) == FALSE) {
+		goto done;
+	}
+	
+	printf("%s: second level root image url is %s\n", __FUNCTION__, root_path);
+
+	/*
+	 * If we fail to set up second image, it's not a given that we
+	 * can safely root off the first.  
+	 */
+	error = imageboot_mount_image(root_path, 1);
+	if (error) {
+		panic("Failed on second stage of imageboot.");	
+	}
+
 done:
 	FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
 
 	DBG_TRACE("%s: exit\n", __FUNCTION__);
 
-	return (error);
+	return;
 }
diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c
index 3eb9043dd..f7c7fa73a 100644
--- a/bsd/kern/kdebug.c
+++ b/bsd/kern/kdebug.c
@@ -20,6 +20,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+
 #include <machine/spl.h>
 
 #include <sys/errno.h>
@@ -30,6 +31,7 @@
 #include <sys/sysctl.h>
 #include <sys/kdebug.h>
 #include <sys/sysproto.h>
+#include <sys/bsdtask_info.h>
 
 #define HZ      100
 #include <mach/clock_types.h>
@@ -38,11 +40,18 @@
 #include <machine/machine_routines.h>
 
 #if defined(__i386__) || defined(__x86_64__)
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
+#include <i386/mp.h>
+#include <i386/machine_routines.h>
 #endif
+
+#include <kern/clock.h>
+
 #include <kern/thread.h>
 #include <kern/task.h>
 #include <kern/debug.h>
+#include <kern/kalloc.h>
+#include <kern/cpu_data.h>
 #include <kern/assert.h>
 #include <vm/vm_kern.h>
 #include <sys/lock.h>
@@ -54,10 +63,14 @@
 #include <sys/vnode.h>
 #include <sys/vnode_internal.h>
 #include <sys/fcntl.h>
+#include <sys/file_internal.h>
+#include <sys/ubc.h>
 
 #include <mach/mach_host.h>		/* for host_info() */
 #include <libkern/OSAtomic.h>
 
+#include <machine/pal_routines.h>
+
 /* XXX should have prototypes, but Mach does not provide one */
 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
 int cpu_number(void);	/* XXX <machine/...> include path broken */
@@ -74,18 +87,14 @@ int kdbg_setrtcdec(kd_regtype *);
 int kdbg_setpidex(kd_regtype *);
 int kdbg_setpid(kd_regtype *);
 void kdbg_mapinit(void);
-int kdbg_reinit(void);
-int kdbg_bootstrap(void);
+int kdbg_reinit(boolean_t);
+int kdbg_bootstrap(boolean_t);
 
-static int create_buffers(void);
+static int create_buffers(boolean_t);
 static void delete_buffers(void);
 
 extern void IOSleep(int);
 
-#ifdef ppc
-extern uint32_t maxDec;
-#endif
-
 /* trace enable status */
 unsigned int kdebug_enable = 0;
 
@@ -96,23 +105,38 @@ unsigned int      kd_entropy_count  = 0;
 unsigned int      kd_entropy_indx   = 0;
 vm_offset_t       kd_entropy_buftomem = 0;
 
+#define MAX_ENTROPY_COUNT	(128 * 1024)
+
 
 #define SLOW_NOLOG	0x01
 #define SLOW_CHECKS	0x02
 #define SLOW_ENTROPY	0x04
-
-unsigned int kdebug_slowcheck = SLOW_NOLOG;
+#define SLOW_CHUD	0x08
 
 unsigned int kd_cpus;
 
 #define EVENTS_PER_STORAGE_UNIT		2048
 #define MIN_STORAGE_UNITS_PER_CPU	4
 
+#define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
+
+#define NATIVE_TRACE_FACILITY
+
+union kds_ptr {
+	struct {
+		uint32_t buffer_index:21;
+		uint16_t offset:11;
+	};
+	uint32_t raw;
+};
+
 struct kd_storage {
-	struct	kd_storage *kds_next;
-	kd_buf	*kds_bufptr;
-	kd_buf	*kds_buflast;
-	kd_buf	*kds_readlast;
+	union	kds_ptr kds_next;
+	uint32_t kds_bufindx;
+	uint32_t kds_bufcnt;
+	uint32_t kds_readlast;
+	boolean_t kds_lostevents;
+	uint64_t  kds_timestamp;
 
 	kd_buf	kds_records[EVENTS_PER_STORAGE_UNIT];
 };
@@ -120,34 +144,52 @@ struct kd_storage {
 #define MAX_BUFFER_SIZE			(1024 * 1024 * 128)
 #define N_STORAGE_UNITS_PER_BUFFER	(MAX_BUFFER_SIZE / sizeof(struct kd_storage))
 
-
 struct kd_storage_buffers {
 	struct	kd_storage	*kdsb_addr;
 	uint32_t		kdsb_size;
 };
 
-
-struct kd_storage *kds_free_list = NULL;
+#define KDS_PTR_NULL 0xffffffff
 struct kd_storage_buffers *kd_bufs = NULL;
 int	n_storage_units = 0;
 int	n_storage_buffers = 0;
+int	n_storage_threshold = 0;
+int	kds_waiter = 0;
+int	kde_waiter = 0;
 
+#pragma pack(0)
 struct kd_bufinfo {
-	struct  kd_storage *kd_list_head;
-	struct  kd_storage *kd_list_tail;
-	struct	kd_storage *kd_active;
-        uint64_t kd_prev_timebase;
+	union  kds_ptr kd_list_head;
+	union  kds_ptr kd_list_tail;
+	boolean_t kd_lostevents;
+	uint32_t _pad;
+	uint64_t kd_prev_timebase;
+	uint32_t num_bufs;
 } __attribute__(( aligned(CPU_CACHE_SIZE) ));
 
+struct kd_ctrl_page_t {
+	union kds_ptr kds_free_list;
+	uint32_t enabled	:1;
+	uint32_t _pad0		:31;
+	int			kds_inuse_count;
+	uint32_t kdebug_flags;
+	uint32_t kdebug_slowcheck;
+	uint32_t _pad1;
+	struct {
+		uint64_t tsc_base;
+		uint64_t ns_base;
+	} cpu_timebase[32]; // should be max number of actual logical cpus
+} kd_ctrl_page = {.kds_free_list = {.raw = KDS_PTR_NULL}, .enabled = 0, .kds_inuse_count = 0, .kdebug_flags = 0, .kdebug_slowcheck = SLOW_NOLOG};
+#pragma pack()
+
 struct kd_bufinfo *kdbip = NULL;
 
-#define KDCOPYBUF_COUNT	2048
+#define KDCOPYBUF_COUNT	8192
 #define KDCOPYBUF_SIZE	(KDCOPYBUF_COUNT * sizeof(kd_buf))
 kd_buf *kdcopybuf = NULL;
 
 
 unsigned int nkdbufs = 8192;
-unsigned int kdebug_flags = 0;
 unsigned int kdlog_beg=0;
 unsigned int kdlog_end=0;
 unsigned int kdlog_value1=0;
@@ -155,6 +197,7 @@ unsigned int kdlog_value2=0;
 unsigned int kdlog_value3=0;
 unsigned int kdlog_value4=0;
 
+static lck_spin_t * kdw_spin_lock;
 static lck_spin_t * kds_spin_lock;
 static lck_mtx_t  * kd_trace_mtx_sysctl;
 static lck_grp_t  * kd_trace_mtx_sysctl_grp;
@@ -185,10 +228,21 @@ unsigned int kd_mapcount = 0;
 vm_offset_t kd_maptomem = 0;
 
 off_t	RAW_file_offset = 0;
+int	RAW_file_written = 0;
+
+#define	RAW_FLUSH_SIZE	(2 * 1024 * 1024)
+
 
 pid_t global_state_pid = -1;       /* Used to control exclusive use of kd_buffer */
 
-#define DBG_FUNC_MASK 0xfffffffc
+#define DBG_FUNC_MASK	0xfffffffc
+
+#define INTERRUPT	0x01050000
+#define MACH_vmfault	0x01300008
+#define BSC_SysCall	0x040c0000
+#define MACH_SysCall	0x010c0000
+#define DBG_SCALL_MASK	0xffff0000
+
 
 /* task to string structure */
 struct tts
@@ -202,10 +256,10 @@ typedef struct tts tts_t;
 
 struct krt
 {
-  kd_threadmap *map;    /* pointer to the map buffer */
-  int count;
-  int maxcount;
-  struct tts *atts;
+	kd_threadmap *map;    /* pointer to the map buffer */
+	int count;
+	int maxcount;
+	struct tts *atts;
 };
 
 typedef struct krt krt_t;
@@ -215,24 +269,102 @@ typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1,
 				uintptr_t arg2, uintptr_t arg3,
 				uintptr_t arg4, uintptr_t arg5);
 
-kd_chudhook_fn kdebug_chudhook = 0;   /* pointer to CHUD toolkit function */
+volatile kd_chudhook_fn kdebug_chudhook = 0;   /* pointer to CHUD toolkit function */
 
 __private_extern__ void stackshot_lock_init( void ) __attribute__((section("__TEXT, initcode")));
 
-/* Support syscall SYS_kdebug_trace */
-int
-kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused int32_t *retval)
+static void
+kdbg_set_tracing_enabled(boolean_t enabled)
 {
-    if ( (kdebug_enable == 0) )
-        return(EINVAL);
-  
-    kernel_debug(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, 0);
-    return(0);
+	int s = ml_set_interrupts_enabled(FALSE);
+	lck_spin_lock(kds_spin_lock);
+
+	if (enabled) {
+		kdebug_enable |= KDEBUG_ENABLE_TRACE;
+		kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
+		kd_ctrl_page.enabled = 1;
+	} else {
+		kdebug_enable &= ~KDEBUG_ENABLE_TRACE;
+		kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
+		kd_ctrl_page.enabled = 0;
+	}
+	lck_spin_unlock(kds_spin_lock);
+	ml_set_interrupts_enabled(s);
 }
 
+static void
+kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
+{
+	int s = ml_set_interrupts_enabled(FALSE);
+	lck_spin_lock(kds_spin_lock);
+
+	if (enabled) {
+		kd_ctrl_page.kdebug_slowcheck |= slowflag;
+		kdebug_enable |= enableflag;
+	} else {
+		kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
+		kdebug_enable &= ~enableflag;
+	}
+	lck_spin_unlock(kds_spin_lock);
+	ml_set_interrupts_enabled(s);
+}
+
+
+#ifdef NATIVE_TRACE_FACILITY
+void
+disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
+{
+	int s = ml_set_interrupts_enabled(FALSE);
+	lck_spin_lock(kds_spin_lock);
+
+	*old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
+	*old_flags = kd_ctrl_page.kdebug_flags;
+
+	kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
+	kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
+
+	lck_spin_unlock(kds_spin_lock);
+	ml_set_interrupts_enabled(s);
+}
+
+void
+enable_wrap(uint32_t old_slowcheck, boolean_t lostevents)
+{
+	int s = ml_set_interrupts_enabled(FALSE);
+	lck_spin_lock(kds_spin_lock);
+
+	kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
+
+	if ( !(old_slowcheck & SLOW_NOLOG))
+		kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
+
+	if (lostevents == TRUE)
+		kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
+
+	lck_spin_unlock(kds_spin_lock);
+	ml_set_interrupts_enabled(s);
+}
+
+void trace_set_timebases(__unused uint64_t tsc, __unused uint64_t ns)
+{
+}
+#else
+/* Begin functions that are defined twice */
+void trace_set_timebases(uint64_t tsc, uint64_t ns)
+{
+	int cpu = cpu_number();
+	kd_ctrl_page.cpu_timebase[cpu].tsc_base = tsc;
+	kd_ctrl_page.cpu_timebase[cpu].ns_base = ns;
+}
+
+#endif
 
 static int
-create_buffers(void)
+#if defined(__i386__) || defined(__x86_64__)
+create_buffers(boolean_t early_trace)
+#else
+create_buffers(__unused boolean_t early_trace)
+#endif
 {
         int	i;
 	int	p_buffer_size;
@@ -240,6 +372,42 @@ create_buffers(void)
 	int	f_buffers;
 	int	error = 0;
 
+         /*
+	  * get the number of cpus and cache it
+	  */
+#if defined(__i386__) || defined(__x86_64__)
+	if (early_trace == TRUE) {
+		/*
+		 * we've started tracing before the
+		 * IOKit has even started running... just
+		 * use the static max value
+		 */
+		kd_cpus = max_ncpus;
+	} else
+#endif
+	{
+		host_basic_info_data_t hinfo;
+		mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+
+#define BSD_HOST 1
+		host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
+		kd_cpus = hinfo.logical_cpu_max;
+	}
+	if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS) {
+		error = ENOSPC;
+		goto out;
+	}
+
+	trace_handler_map_bufinfo((uintptr_t)kdbip, sizeof(struct kd_bufinfo) * kd_cpus);
+
+#if !defined(NATIVE_TRACE_FACILITY)
+	for(i=0;i<(int)kd_cpus;i++) {
+		get_nanotime_timebases(i, 
+				&kd_ctrl_page.cpu_timebase[i].tsc_base, 
+				&kd_ctrl_page.cpu_timebase[i].ns_base);
+	}
+#endif
+
 	if (nkdbufs < (kd_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU))
 		n_storage_units = kd_cpus * MIN_STORAGE_UNITS_PER_CPU;
 	else
@@ -275,6 +443,8 @@ create_buffers(void)
 			error = ENOSPC;
 			goto out;
 		}
+		bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
+
 		kd_bufs[i].kdsb_size = f_buffer_size;
 	}
 	if (p_buffer_size) {
@@ -282,8 +452,11 @@ create_buffers(void)
 			error = ENOSPC;
 			goto out;
 		}
+		bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
+
 		kd_bufs[i].kdsb_size = p_buffer_size;
 	}
+	n_storage_units = 0;
 
 	for (i = 0; i < n_storage_buffers; i++) {
 		struct kd_storage *kds;
@@ -293,16 +466,31 @@ create_buffers(void)
 		n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
 		kds = kd_bufs[i].kdsb_addr;
 
+		trace_handler_map_buffer(i, (uintptr_t)kd_bufs[i].kdsb_addr, kd_bufs[i].kdsb_size);
+
 		for (n = 0; n < n_elements; n++) {
-			kds[n].kds_next = kds_free_list;
-			kds_free_list = &kds[n];
+			kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
+			kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
 
-			kds[n].kds_buflast = &kds[n].kds_records[EVENTS_PER_STORAGE_UNIT];
+			kd_ctrl_page.kds_free_list.buffer_index = i;
+			kd_ctrl_page.kds_free_list.offset = n;
 		}
+		n_storage_units += n_elements;
 	}
+
 	bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_cpus);
 
-	kdebug_flags |= KDBG_BUFINIT;
+	for (i = 0; i < (int)kd_cpus; i++) {
+		kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
+		kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
+		kdbip[i].kd_lostevents = FALSE;
+		kdbip[i].num_bufs = 0;
+	}
+
+	kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
+
+	kd_ctrl_page.kds_inuse_count = 0;
+	n_storage_threshold = n_storage_units / 2;
 out:
 	if (error)
 		delete_buffers();
@@ -318,8 +506,10 @@ delete_buffers(void)
 	
 	if (kd_bufs) {
 		for (i = 0; i < n_storage_buffers; i++) {
-			if (kd_bufs[i].kdsb_addr)
+			if (kd_bufs[i].kdsb_addr) {
 				kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
+				trace_handler_unmap_buffer(i);
+			}
 		}
 		kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
 
@@ -331,58 +521,92 @@ delete_buffers(void)
 
 		kdcopybuf = NULL;
 	}
-	kds_free_list = NULL;
+	kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
 
-	kdebug_flags &= ~KDBG_BUFINIT;
+	if (kdbip) {
+		trace_handler_unmap_bufinfo();
+
+		kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_cpus);
+		
+		kdbip = NULL;
+	}
+	kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
 }
 
 
-static void
-release_storage_unit(struct kd_bufinfo *kdbp, struct kd_storage *kdsp)
+#ifdef NATIVE_TRACE_FACILITY
+void
+release_storage_unit(int cpu, uint32_t kdsp_raw)
 {
-
 	int s = 0;
+	struct	kd_storage *kdsp_actual;
+	struct kd_bufinfo *kdbp;
+	union kds_ptr kdsp;
+
+	kdsp.raw = kdsp_raw;
+
 	s = ml_set_interrupts_enabled(FALSE);
 	lck_spin_lock(kds_spin_lock);
 
-	if (kdsp == kdbp->kd_list_head) {
+	kdbp = &kdbip[cpu];
+
+	if (kdsp.raw == kdbp->kd_list_head.raw) {
 		/*
-		 * its possible for the storage unit pointed to
+		 * it's possible for the storage unit pointed to
 		 * by kdsp to have already been stolen... so
-		 * check to see if its still the head of the list
+		 * check to see if it's still the head of the list
 		 * now that we're behind the lock that protects 
 		 * adding and removing from the queue...
 		 * since we only ever release and steal units from
-		 * that position, if its no longer the head
+		 * that position, if it's no longer the head
 		 * we having nothing to do in this context
 		 */
-		kdbp->kd_list_head = kdsp->kds_next;
+		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
+		kdbp->kd_list_head = kdsp_actual->kds_next;
 	
-		kdsp->kds_next = kds_free_list;
-		kds_free_list = kdsp;
+		kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
+		kd_ctrl_page.kds_free_list = kdsp;
+
+		kd_ctrl_page.kds_inuse_count--;
 	}
 	lck_spin_unlock(kds_spin_lock);
 	ml_set_interrupts_enabled(s);
 }
 
 
-/*
- * Interrupts are disabled when we enter this routine.
- */
-static struct kd_storage *
-allocate_storage_unit(struct kd_bufinfo *kdbp)
+boolean_t
+allocate_storage_unit(int cpu)
 {
-	struct	kd_storage *kdsp;
-	struct  kd_bufinfo *kdbp_vict, *kdbp_try;
+	union	kds_ptr kdsp;
+	struct	kd_storage *kdsp_actual;
+	struct  kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
 	uint64_t	oldest_ts, ts;
+	boolean_t	retval = TRUE;
+	int			s = 0;
 		
+	s = ml_set_interrupts_enabled(FALSE);
 	lck_spin_lock(kds_spin_lock);
 
-	if ((kdsp = kds_free_list))
-		kds_free_list = kdsp->kds_next;
-	else {
-		if (kdebug_flags & KDBG_NOWRAP) {
-                        kdebug_slowcheck |= SLOW_NOLOG;
+	kdbp = &kdbip[cpu];
+
+	/* If someone beat us to the allocate, return success */
+	if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
+		kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
+
+		if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT)
+			goto out;
+	}
+	
+	if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
+		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
+		kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
+
+		kd_ctrl_page.kds_inuse_count++;
+	} else {
+		if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
+			kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
+			kdbp->kd_lostevents = TRUE;
+			retval = FALSE;
 			goto out;
 		}
 		kdbp_vict = NULL;
@@ -390,22 +614,25 @@ allocate_storage_unit(struct kd_bufinfo *kdbp)
 
 		for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_cpus]; kdbp_try++) {
 
-			if ((kdsp = kdbp_try->kd_list_head) == NULL) {
+			if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
 				/*
 				 * no storage unit to steal
 				 */
 				continue;
 			}
-			if (kdsp == kdbp_try->kd_active) {
+
+			kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
+
+			if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
 				/*
 				 * make sure we don't steal the storage unit
-				 * being actively recorded to...  this state
-				 * also implies that this is the only unit assigned
-				 * to this CPU, so we can immediately move on 
+				 * being actively recorded to...  need to
+				 * move on because we don't want an out-of-order
+				 * set of events showing up later
 				 */
 				continue;
 			}
-			ts = kdbg_get_timestamp(&(kdbp_try->kd_list_head->kds_records[0]));
+			ts = kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
 
 			if (ts < oldest_ts) {
 				/*
@@ -417,37 +644,52 @@ allocate_storage_unit(struct kd_bufinfo *kdbp)
 				kdbp_vict = kdbp_try;
 			}
 		}
-#if 1
 		if (kdbp_vict == NULL) {
 			kdebug_enable = 0;
-
-			panic("allocate_storage_unit: no storage units available\n");
+			kd_ctrl_page.enabled = 0;
+			retval = FALSE;
+			goto out;
 		}
-#endif
 		kdsp = kdbp_vict->kd_list_head;
+		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
 
-		kdbp_vict->kd_list_head = kdsp->kds_next;
+		kdbp_vict->kd_list_head = kdsp_actual->kds_next;
 
-		kdebug_flags |= KDBG_WRAPPED;
+		kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
 	}
-	kdsp->kds_next     = NULL;
-	kdsp->kds_bufptr   = &kdsp->kds_records[0];
-	kdsp->kds_readlast = kdsp->kds_bufptr;
+	kdsp_actual->kds_timestamp = mach_absolute_time();
+	kdsp_actual->kds_next.raw = KDS_PTR_NULL;
+	kdsp_actual->kds_bufcnt	  = 0;
+	kdsp_actual->kds_readlast = 0;
+
+	kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
+	kdbp->kd_lostevents = FALSE;
+	kdsp_actual->kds_bufindx  = 0;
 
-	if (kdbp->kd_list_head == NULL)
+	if (kdbp->kd_list_head.raw == KDS_PTR_NULL)
 		kdbp->kd_list_head = kdsp;
 	else
-		kdbp->kd_list_tail->kds_next = kdsp;
+		POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
 	kdbp->kd_list_tail = kdsp;
 out:
 	lck_spin_unlock(kds_spin_lock);
+	ml_set_interrupts_enabled(s);
 
-	return (kdsp);
+	return (retval);
 }
+#endif
 
+void
+kernel_debug_internal(
+	uint32_t	debugid,
+	uintptr_t	arg1,
+	uintptr_t	arg2,
+	uintptr_t	arg3,
+	uintptr_t	arg4,
+	uintptr_t	arg5,
+	int		entropy_flag);
 
-
-static void
+__attribute__((always_inline)) void
 kernel_debug_internal(
 	uint32_t	debugid,
 	uintptr_t	arg1,
@@ -459,92 +701,118 @@ kernel_debug_internal(
 {
 	struct proc 	*curproc;
 	uint64_t 	now;
-	int		s;
+	uint32_t	bindx;
+	boolean_t	s;
 	kd_buf		*kd;
 	int		cpu;
 	struct kd_bufinfo *kdbp;
-	struct kd_storage *kdsp;
+	struct kd_storage *kdsp_actual;
 
-	s = ml_set_interrupts_enabled(FALSE);
 
-	now = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
-	cpu = cpu_number();
-
-	if (kdebug_enable & KDEBUG_ENABLE_CHUD) {
-		if (kdebug_chudhook)
-			kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5);
-
-		if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE)))
-			goto out;
-	}
-	if (kdebug_slowcheck == 0)
-		goto record_trace;
+	if (kd_ctrl_page.kdebug_slowcheck) {
 
-	if (entropy_flag && (kdebug_enable & KDEBUG_ENABLE_ENTROPY)) {
-		if (kd_entropy_indx < kd_entropy_count)	{
-			kd_entropy_buffer [ kd_entropy_indx] = mach_absolute_time();
-			kd_entropy_indx++;
-		}
-	    
-		if (kd_entropy_indx == kd_entropy_count) {
+		if (kdebug_enable & KDEBUG_ENABLE_CHUD) {
+			kd_chudhook_fn chudhook;
 			/*
-			 * Disable entropy collection
+			 * Mask interrupts to minimize the interval across
+			 * which the driver providing the hook could be
+			 * unloaded.
 			 */
-			kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY;
-			kdebug_slowcheck &= ~SLOW_ENTROPY;
+			s = ml_set_interrupts_enabled(FALSE);
+			chudhook = kdebug_chudhook;
+			if (chudhook)
+				chudhook(debugid, arg1, arg2, arg3, arg4, arg5);
+			ml_set_interrupts_enabled(s);
 		}
-	}
-	if ( (kdebug_slowcheck & SLOW_NOLOG) )
-		goto out;
+		if ((kdebug_enable & KDEBUG_ENABLE_ENTROPY) && entropy_flag) {
+
+			now = mach_absolute_time();
+
+			s = ml_set_interrupts_enabled(FALSE);
+			lck_spin_lock(kds_spin_lock);
+
+			if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) {
+
+				if (kd_entropy_indx < kd_entropy_count)	{
+					kd_entropy_buffer[kd_entropy_indx] = now;
+					kd_entropy_indx++;
+				}
+				if (kd_entropy_indx == kd_entropy_count) {
+					/*
+					 * Disable entropy collection
+					 */
+					kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY;
+					kd_ctrl_page.kdebug_slowcheck &= ~SLOW_ENTROPY;
+				}
+			}
+			lck_spin_unlock(kds_spin_lock);
+			ml_set_interrupts_enabled(s);
+		}
+		if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & KDEBUG_ENABLE_TRACE))
+			goto out1;
 	
-	if (kdebug_flags & KDBG_PIDCHECK) {
-		/*
-		 * If kdebug flag is not set for current proc, return
-		 */
-		curproc = current_proc();
+		if ( !ml_at_interrupt_context()) {
+			if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
+				/*
+				 * If kdebug flag is not set for current proc, return
+				 */
+				curproc = current_proc();
 
-		if ((curproc && !(curproc->p_kdebug)) &&
-		    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
-			goto out;
-	}
-	else if (kdebug_flags & KDBG_PIDEXCLUDE) {
-		/*
-		 * If kdebug flag is set for current proc, return
-		 */
-		curproc = current_proc();
+				if ((curproc && !(curproc->p_kdebug)) &&
+				    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
+					goto out1;
+			}
+			else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
+				/*
+				 * If kdebug flag is set for current proc, return
+				 */
+				curproc = current_proc();
 
-		if ((curproc && curproc->p_kdebug) &&
-		    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
-			goto out;
-	}
-	if (kdebug_flags & KDBG_RANGECHECK) {
-		if ((debugid < kdlog_beg)
-		    || ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE)))
-			goto out;
-	}
-	else if (kdebug_flags & KDBG_VALCHECK) {
-		if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
-		    (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
-		    (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
-		    (debugid & DBG_FUNC_MASK) != kdlog_value4 &&
-		    (debugid >> 24 != DBG_TRACE))
-			goto out;
+				if ((curproc && curproc->p_kdebug) &&
+				    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)))
+					goto out1;
+			}
+		}
+		if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
+			if ((debugid < kdlog_beg)
+					|| ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE)))
+				goto out1;
+		}
+		else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
+			if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
+					(debugid & DBG_FUNC_MASK) != kdlog_value2 &&
+					(debugid & DBG_FUNC_MASK) != kdlog_value3 &&
+					(debugid & DBG_FUNC_MASK) != kdlog_value4 &&
+					(debugid >> 24 != DBG_TRACE))
+				goto out1;
+		}
 	}
-
-record_trace:
+	disable_preemption();
+	cpu = cpu_number();
 	kdbp = &kdbip[cpu];
-
-	if ((kdsp = kdbp->kd_active) == NULL) {
-		if ((kdsp = allocate_storage_unit(kdbp)) == NULL) {
+retry_q:
+	if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
+		kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
+		bindx = kdsp_actual->kds_bufindx;
+	} else
+		kdsp_actual = NULL;
+	
+	if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
+		if (allocate_storage_unit(cpu) == FALSE) {
 			/*
 			 * this can only happen if wrapping
 			 * has been disabled
 			 */
 			goto out;
 		}
-		kdbp->kd_active = kdsp;
+		goto retry_q;
 	}
-	kd = kdsp->kds_bufptr;
+	now = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
+
+	if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
+		goto retry_q;
+
+	kd = &kdsp_actual->kds_records[bindx];
 
 	kd->debugid = debugid;
 	kd->arg1 = arg1;
@@ -555,12 +823,56 @@ record_trace:
 	          
 	kdbg_set_timestamp_and_cpu(kd, now, cpu);
 
-	kdsp->kds_bufptr++;
-
-	if (kdsp->kds_bufptr >= kdsp->kds_buflast)
-	  	kdbp->kd_active = NULL;
+	OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
 out:
-	ml_set_interrupts_enabled(s);
+	enable_preemption();
+out1:
+	if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) ||
+	    (kde_waiter && kd_entropy_indx >= kd_entropy_count)) {
+		uint32_t	etype;
+		uint32_t	stype;
+		
+		etype = debugid & DBG_FUNC_MASK;
+		stype = debugid & DBG_SCALL_MASK;
+
+		if (etype == INTERRUPT || etype == MACH_vmfault ||
+		    stype == BSC_SysCall || stype == MACH_SysCall) {
+
+			boolean_t need_kds_wakeup = FALSE;
+			boolean_t need_kde_wakeup = FALSE;
+
+			/*
+			 * try to take the lock here to synchronize with the
+			 * waiter entering the blocked state... use the try
+			 * mode to prevent deadlocks caused by re-entering this
+			 * routine due to various trace points triggered in the
+			 * lck_spin_sleep_xxxx routines used to actually enter
+			 * one of our 2 wait conditions... no problem if we fail,
+			 * there will be lots of additional events coming in that
+			 * will eventually succeed in grabbing this lock
+			 */
+			s = ml_set_interrupts_enabled(FALSE);
+
+			if (lck_spin_try_lock(kdw_spin_lock)) {
+
+				if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
+					kds_waiter = 0;
+					need_kds_wakeup = TRUE;
+				}
+				if (kde_waiter && kd_entropy_indx >= kd_entropy_count) {
+					kde_waiter = 0;
+					need_kde_wakeup = TRUE;
+				}
+				lck_spin_unlock(kdw_spin_lock);
+			}
+			ml_set_interrupts_enabled(s);
+			
+			if (need_kds_wakeup == TRUE)
+				wakeup(&kds_waiter);
+			if (need_kde_wakeup == TRUE)
+				wakeup(&kde_waiter);
+		}
+	}
 }
 
 void
@@ -584,27 +896,32 @@ kernel_debug1(
 	uintptr_t	arg4,
 	uintptr_t	arg5)
 {
-	kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0);
+	kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 1);
 }
 
-static void
-kdbg_lock_init(void)
+/*
+ * Support syscall SYS_kdebug_trace
+ */
+int
+kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused int32_t *retval)
 {
-	host_basic_info_data_t hinfo;
-	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+	if ( __probable(kdebug_enable == 0) )
+		return(EINVAL);
+  
+	kernel_debug_internal(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, (uintptr_t)thread_tid(current_thread()), 0);
 
-	if (kdebug_flags & KDBG_LOCKINIT)
-		return;
+	return(0);
+}
 
-	/* get the number of cpus and cache it */
-#define BSD_HOST 1
-	host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
-	kd_cpus = hinfo.logical_cpu_max;
 
-	if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip,
-		       sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS)
-	        return;
+static void
+kdbg_lock_init(void)
+{
+	if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT)
+		return;
 
+	trace_handler_map_ctrl_page((uintptr_t)&kd_ctrl_page, sizeof(kd_ctrl_page), sizeof(struct kd_storage), sizeof(union kds_ptr));
+	
 	/*
 	 * allocate lock group attribute and group
 	 */
@@ -618,25 +935,26 @@ kdbg_lock_init(void)
 
 
 	/*
-	 * allocate and initialize spin lock and mutex
+	 * allocate and initialize mutex's
 	 */
 	kd_trace_mtx_sysctl = lck_mtx_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
 	kds_spin_lock = lck_spin_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
+	kdw_spin_lock = lck_spin_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
 
-	kdebug_flags |= KDBG_LOCKINIT;
+	kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
 }
 
 
 int
-kdbg_bootstrap(void)
+kdbg_bootstrap(boolean_t early_trace)
 {
-        kdebug_flags &= ~KDBG_WRAPPED;
+        kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
 
-	return (create_buffers());
+	return (create_buffers(early_trace));
 }
 
 int
-kdbg_reinit(void)
+kdbg_reinit(boolean_t early_trace)
 {
 	int ret = 0;
 
@@ -645,8 +963,7 @@ kdbg_reinit(void)
 	 * First make sure we're not in
 	 * the middle of cutting a trace
 	 */
-	kdebug_enable &= ~KDEBUG_ENABLE_TRACE;
-	kdebug_slowcheck |= SLOW_NOLOG;
+	kdbg_set_tracing_enabled(FALSE);
 
 	/*
 	 * make sure the SLOW_NOLOG is seen
@@ -657,14 +974,17 @@ kdbg_reinit(void)
 
 	delete_buffers();
 
-	if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
+	if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
 		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
-		kdebug_flags &= ~KDBG_MAPINIT;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
 		kd_mapsize = 0;
 		kd_mapptr = (kd_threadmap *) 0;
 		kd_mapcount = 0;
 	}  
-	ret = kdbg_bootstrap();
+	ret = kdbg_bootstrap(early_trace);
+
+	RAW_file_offset = 0;
+	RAW_file_written = 0;
 
 	return(ret);
 }
@@ -750,7 +1070,7 @@ kdbg_mapinit(void)
 	vm_offset_t	tts_maptomem=0;
 	int		i;
 
-        if (kdebug_flags & KDBG_MAPINIT)
+        if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT)
 		return;
 
 	/*
@@ -821,7 +1141,7 @@ kdbg_mapinit(void)
 	}
 
 	if (kd_mapptr && tts_mapptr) {
-		kdebug_flags |= KDBG_MAPINIT;
+		kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
 
 		/*
 		 * Initialize thread map data
@@ -847,9 +1167,7 @@ kdbg_clear(void)
 	 * First make sure we're not in
 	 * the middle of cutting a trace
 	 */
-
-	kdebug_enable &= ~KDEBUG_ENABLE_TRACE;
-	kdebug_slowcheck = SLOW_NOLOG;
+	kdbg_set_tracing_enabled(FALSE);
 
 	/*
 	 * make sure the SLOW_NOLOG is seen
@@ -858,24 +1176,24 @@ kdbg_clear(void)
 	 */
 	IOSleep(100);
 
-	if (kdebug_enable & KDEBUG_ENABLE_ENTROPY)
-		kdebug_slowcheck |= SLOW_ENTROPY;
-
         global_state_pid = -1;
-	kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-	kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
-	kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
+	kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+	kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
+	kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
 
 	delete_buffers();
 
 	/* Clean up the thread map buffer */
-	kdebug_flags &= ~KDBG_MAPINIT;
+	kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
 	if (kd_mapptr) {
 		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
 		kd_mapptr = (kd_threadmap *) 0;
 	}
 	kd_mapsize = 0;
 	kd_mapcount = 0;
+
+	RAW_file_offset = 0;
+	RAW_file_written = 0;
 }
 
 int
@@ -896,17 +1214,17 @@ kdbg_setpid(kd_regtype *kdr)
 				/*
 				 * turn on pid check for this and all pids
 				 */
-				kdebug_flags |= KDBG_PIDCHECK;
-				kdebug_flags &= ~KDBG_PIDEXCLUDE;
-				kdebug_slowcheck |= SLOW_CHECKS;
-				
+				kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
+				kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
+				kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
+
 				p->p_kdebug = 1;
 			} else {
 				/*
 				 * turn off pid check for this pid value
 				 * Don't turn off all pid checking though
 				 *
-				 * kdebug_flags &= ~KDBG_PIDCHECK;
+				 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
 				 */   
 				p->p_kdebug = 0;
 			}
@@ -938,9 +1256,9 @@ kdbg_setpidex(kd_regtype *kdr)
 				/*
 				 * turn on pid exclusion
 				 */
-				kdebug_flags |= KDBG_PIDEXCLUDE;
-				kdebug_flags &= ~KDBG_PIDCHECK;
-				kdebug_slowcheck |= SLOW_CHECKS;
+				kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
+				kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
+				kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 
 				p->p_kdebug = 1;
 			}
@@ -949,7 +1267,7 @@ kdbg_setpidex(kd_regtype *kdr)
 				 * turn off pid exclusion for this pid value
 				 * Don't turn off all pid exclusion though
 				 *
-				 * kdebug_flags &= ~KDBG_PIDEXCLUDE;
+				 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
 				 */   
 				p->p_kdebug = 0;
 			}
@@ -975,14 +1293,8 @@ kdbg_setrtcdec(kd_regtype *kdr)
 
 	if (decval && decval < KDBG_MINRTCDEC)
 		ret = EINVAL;
-#ifdef ppc
-	else {
-		maxDec = decval ? decval : 0x7FFFFFFF;	/* Set or reset the max decrementer */
-	}
-#else
 	else
 		ret = ENOTSUP;
-#endif /* ppc */
 
 	return(ret);
 }
@@ -999,10 +1311,10 @@ kdbg_setreg(kd_regtype * kdr)
 		val_2 = (kdr->value2 & 0xff);
 		kdlog_beg = (val_1<<24);
 		kdlog_end = (val_2<<24);
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
-		kdebug_slowcheck |= SLOW_CHECKS;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
+		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 		break;
 	case KDBG_SUBCLSTYPE :
 		val_1 = (kdr->value1 & 0xff);
@@ -1010,36 +1322,36 @@ kdbg_setreg(kd_regtype * kdr)
 		val = val_2 + 1;
 		kdlog_beg = ((val_1<<24) | (val_2 << 16));
 		kdlog_end = ((val_1<<24) | (val << 16));
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
-		kdebug_slowcheck |= SLOW_CHECKS;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
+		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 		break;
 	case KDBG_RANGETYPE :
 		kdlog_beg = (kdr->value1);
 		kdlog_end = (kdr->value2);
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
-		kdebug_slowcheck |= SLOW_CHECKS;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
+		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 		break;
 	case KDBG_VALCHECK:
 		kdlog_value1 = (kdr->value1);
 		kdlog_value2 = (kdr->value2);
 		kdlog_value3 = (kdr->value3);
 		kdlog_value4 = (kdr->value4);
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags &= ~KDBG_RANGECHECK;    /* Turn off range check */
-		kdebug_flags |= KDBG_VALCHECK;       /* Turn on specific value check  */
-		kdebug_slowcheck |= SLOW_CHECKS;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK;    /* Turn off range check */
+		kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK;       /* Turn on specific value check  */
+		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 		break;
 	case KDBG_TYPENONE :
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
 
-		if ( (kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK | KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
-		        kdebug_slowcheck |= SLOW_CHECKS;
+		if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK | KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
+			kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
 		else
-		        kdebug_slowcheck &= ~SLOW_CHECKS;
+			kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
 
 		kdlog_beg = 0;
 		kdlog_end = 0;
@@ -1064,8 +1376,8 @@ kdbg_getreg(__unused kd_regtype * kdr)
 		val_2 = val_1 + 1;
 		kdlog_beg = (val_1<<24);
 		kdlog_end = (val_2<<24);
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
 		break;
 	case KDBG_SUBCLSTYPE :
 		val_1 = (kdr->value1 & 0xff);
@@ -1073,17 +1385,17 @@ kdbg_getreg(__unused kd_regtype * kdr)
 		val = val_2 + 1;
 		kdlog_beg = ((val_1<<24) | (val_2 << 16));
 		kdlog_end = ((val_1<<24) | (val << 16));
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
 		break;
 	case KDBG_RANGETYPE :
 		kdlog_beg = (kdr->value1);
 		kdlog_end = (kdr->value2);
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
-		kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
 		break;
 	case KDBG_TYPENONE :
-		kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
+		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
 		kdlog_beg = 0;
 		kdlog_end = 0;
 		break;
@@ -1107,21 +1419,56 @@ kdbg_readmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 
 	if (count && (count <= kd_mapcount))
 	{
-		if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
+		if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
 		{
 			if (*number < kd_mapsize)
 				ret = EINVAL;
 			else
 			{
-				if (vp) {
-					vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset,
-						UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
-					RAW_file_offset += sizeof(uint32_t);
-
-					vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, kd_mapsize, RAW_file_offset,
-						UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+				if (vp)
+				{
+					RAW_header	header;
+					clock_sec_t	secs;
+					clock_usec_t	usecs;
+					char	*pad_buf;
+					int 	pad_size;
+
+					header.version_no = RAW_VERSION1;
+					header.thread_count = count;
+
+					clock_get_calendar_microtime(&secs, &usecs);
+					header.TOD_secs = secs;
+					header.TOD_usecs = usecs;
+					
+					ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
+						      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+					if (ret)
+						goto write_error;
+					RAW_file_offset += sizeof(RAW_header);
+
+					ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, kd_mapsize, RAW_file_offset,
+						      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+					if (ret)
+						goto write_error;
 					RAW_file_offset += kd_mapsize;
 
+					pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
+
+					if (pad_size)
+					{
+						pad_buf = (char *)kalloc(pad_size);
+						memset(pad_buf, 0, pad_size);
+
+						ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
+							UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
+						kfree(pad_buf, pad_size);
+						
+						if (ret)
+							goto write_error;
+						RAW_file_offset += pad_size;
+					}
+					RAW_file_written += sizeof(RAW_header) + kd_mapsize + pad_size;
+
 				} else {
 					if (copyout(kd_mapptr, buffer, kd_mapsize))
 						ret = EINVAL;
@@ -1134,22 +1481,24 @@ kdbg_readmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 	else
 		ret = EINVAL;
 
-	if (ret && vp) {
+	if (ret && vp)
+	{
 		count = 0;
 
 		vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset,
 			UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
 		RAW_file_offset += sizeof(uint32_t);
+		RAW_file_written += sizeof(uint32_t);
 	}
-	if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
+write_error:
+	if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
 	{
 		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
-		kdebug_flags &= ~KDBG_MAPINIT;
+		kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
 		kd_mapsize = 0;
 		kd_mapptr = (kd_threadmap *) 0;
 		kd_mapcount = 0;
 	}  
-
 	return(ret);
 }
 
@@ -1158,44 +1507,85 @@ kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout)
 {
 	int avail = *number;
 	int ret = 0;
+	int s;
+	u_int64_t abstime;
+	u_int64_t ns;
+	int wait_result = THREAD_AWAKENED;
+
 
 	if (kd_entropy_buffer)
 		return(EBUSY);
 
-	kd_entropy_count = avail/sizeof(mach_timespec_t);
-	kd_entropy_bufsize = kd_entropy_count * sizeof(mach_timespec_t);
-	kd_entropy_indx = 0;
+	if (ms_timeout < 0)
+		return(EINVAL);
+
+	kd_entropy_count = avail/sizeof(uint64_t);
+
+	if (kd_entropy_count > MAX_ENTROPY_COUNT || kd_entropy_count == 0) {
+		/*
+		 * Enforce maximum entropy entries
+		 */
+		return(EINVAL);
+	}
+	kd_entropy_bufsize = kd_entropy_count * sizeof(uint64_t);
 
 	/*
-	 * Enforce maximum entropy entries here if needed
 	 * allocate entropy buffer
 	 */
-	if (kmem_alloc(kernel_map, &kd_entropy_buftomem,
-		       (vm_size_t)kd_entropy_bufsize) == KERN_SUCCESS) {
+	if (kmem_alloc(kernel_map, &kd_entropy_buftomem, (vm_size_t)kd_entropy_bufsize) == KERN_SUCCESS) {
 		kd_entropy_buffer = (uint64_t *) kd_entropy_buftomem;
 	} else {
 		kd_entropy_buffer = (uint64_t *) 0;
 		kd_entropy_count = 0;
-		kd_entropy_indx = 0;
-		return (EINVAL);
+
+		return (ENOMEM);
 	}
+	kd_entropy_indx = 0;
 
-	if (ms_timeout < 10)
-		ms_timeout = 10;
+	KERNEL_DEBUG_CONSTANT(0xbbbbf000 | DBG_FUNC_START, ms_timeout, kd_entropy_count, 0, 0, 0);
 
 	/*
 	 * Enable entropy sampling
 	 */
-	kdebug_enable |= KDEBUG_ENABLE_ENTROPY;
-	kdebug_slowcheck |= SLOW_ENTROPY;
+	kdbg_set_flags(SLOW_ENTROPY, KDEBUG_ENABLE_ENTROPY, TRUE);
 
-	ret = tsleep (kdbg_getentropy, PRIBIO | PCATCH, "kd_entropy", (ms_timeout/(1000/HZ)));
+	if (ms_timeout) {
+		ns = (u_int64_t)ms_timeout * (u_int64_t)(1000 * 1000);
+		nanoseconds_to_absolutetime(ns,  &abstime );
+		clock_absolutetime_interval_to_deadline( abstime, &abstime );
+	} else
+		abstime = 0;
+
+	s = ml_set_interrupts_enabled(FALSE);
+	lck_spin_lock(kdw_spin_lock);
+
+	while (wait_result == THREAD_AWAKENED && kd_entropy_indx < kd_entropy_count) {
+
+		kde_waiter = 1;
+
+		if (abstime) {
+			/*
+			 * wait for the specified timeout or
+			 * until we've hit our sample limit
+			 */
+			wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kde_waiter, THREAD_ABORTSAFE, abstime);
+		} else {
+			/*
+			 * wait until we've hit our sample limit
+			 */
+			wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kde_waiter, THREAD_ABORTSAFE);
+		}
+		kde_waiter = 0;
+	}
+	lck_spin_unlock(kdw_spin_lock);
+	ml_set_interrupts_enabled(s);
 
 	/*
 	 * Disable entropy sampling
 	 */
-	kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY;
-	kdebug_slowcheck &= ~SLOW_ENTROPY;
+	kdbg_set_flags(SLOW_ENTROPY, KDEBUG_ENABLE_ENTROPY, FALSE);
+
+	KERNEL_DEBUG_CONSTANT(0xbbbbf000 | DBG_FUNC_END, ms_timeout, kd_entropy_indx, 0, 0, 0);
 
 	*number = 0;
 	ret = 0;
@@ -1204,10 +1594,10 @@ kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout)
 		/*
 		 * copyout the buffer
 		 */
-		if (copyout(kd_entropy_buffer, buffer, kd_entropy_indx * sizeof(mach_timespec_t)))
+		if (copyout(kd_entropy_buffer, buffer, kd_entropy_indx * sizeof(uint64_t)))
 			ret = EINVAL;
 		else
-			*number = kd_entropy_indx;
+			*number = kd_entropy_indx * sizeof(uint64_t);
 	}
 	/*
 	 * Always cleanup
@@ -1250,14 +1640,16 @@ kdbg_set_nkdbufs(unsigned int value)
 void
 kdbg_control_chud(int val, void *fn)
 {
-        if (val) {
-                /* enable chudhook */
+	kdbg_lock_init();
+    
+	if (val) {
+		/* enable chudhook */
 		kdebug_chudhook = fn;
-	        kdebug_enable |= KDEBUG_ENABLE_CHUD;
+		kdbg_set_flags(SLOW_CHUD, KDEBUG_ENABLE_CHUD, TRUE);
 	}
 	else {
-	        /* disable chudhook */
-                kdebug_enable &= ~KDEBUG_ENABLE_CHUD;
+		/* disable chudhook */
+		kdbg_set_flags(SLOW_CHUD, KDEBUG_ENABLE_CHUD, FALSE);
 		kdebug_chudhook = 0;
 	}
 }
@@ -1272,22 +1664,24 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	kd_regtype kd_Reg;
 	kbufinfo_t kd_bufinfo;
 	pid_t curpid;
-	struct proc *p, *curproc;
+	proc_t p, curproc;
 
 	if (name[0] == KERN_KDGETENTROPY ||
+	        name[0] == KERN_KDWRITETR ||
+	        name[0] == KERN_KDWRITEMAP ||
 		name[0] == KERN_KDEFLAGS ||
 		name[0] == KERN_KDDFLAGS ||
 		name[0] == KERN_KDENABLE ||
 		name[0] == KERN_KDSETBUF) {
 		
 		if ( namelen < 2 )
-	        return(EINVAL);
+			return(EINVAL);
 		value = name[1];
 	}
 	
 	kdbg_lock_init();
 
-	if ( !(kdebug_flags & KDBG_LOCKINIT))
+	if ( !(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT))
 	        return(ENOSPC);
 
 	lck_mtx_lock(kd_trace_mtx_sysctl);
@@ -1308,12 +1702,12 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 		kd_bufinfo.nkdbufs = nkdbufs;
 		kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap);
 
-		if ( (kdebug_slowcheck & SLOW_NOLOG) )
+		if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
 			kd_bufinfo.nolog = 1;
 		else
 			kd_bufinfo.nolog = 0;
 
-		kd_bufinfo.flags = kdebug_flags;
+		kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
 #if defined(__LP64__)
 		kd_bufinfo.flags |= KDBG_LP64;
 #endif
@@ -1371,11 +1765,11 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	switch(name[0]) {
 		case KERN_KDEFLAGS:
 			value &= KDBG_USERFLAGS;
-			kdebug_flags |= value;
+			kd_ctrl_page.kdebug_flags |= value;
 			break;
 		case KERN_KDDFLAGS:
 			value &= KDBG_USERFLAGS;
-			kdebug_flags &= ~value;
+			kd_ctrl_page.kdebug_flags &= ~value;
 			break;
 		case KERN_KDENABLE:
 			/*
@@ -1385,25 +1779,22 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 				/*
 				 * enable only if buffer is initialized
 				 */
-				if (!(kdebug_flags & KDBG_BUFINIT)) {
+				if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT)) {
 					ret = EINVAL;
 					break;
 				}
 				kdbg_mapinit();
 
-				kdebug_enable |= KDEBUG_ENABLE_TRACE;
-				kdebug_slowcheck &= ~SLOW_NOLOG;
-			}
-			else {
-				kdebug_enable &= ~KDEBUG_ENABLE_TRACE;
-				kdebug_slowcheck |= SLOW_NOLOG;
+				kdbg_set_tracing_enabled(TRUE);
 			}
+			else
+				kdbg_set_tracing_enabled(FALSE);
 			break;
 		case KERN_KDSETBUF:
 			kdbg_set_nkdbufs(value);
 			break;
 		case KERN_KDSETUP:
-			ret = kdbg_reinit();
+			ret = kdbg_reinit(FALSE);
 			break;
 		case KERN_KDREMOVE:
 			kdbg_clear();
@@ -1432,6 +1823,86 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 		case KERN_KDREADTR:
 			ret = kdbg_read(where, sizep, NULL, NULL);
 			break;
+	        case KERN_KDWRITETR:
+	        case KERN_KDWRITEMAP:
+		{
+			struct	vfs_context context;
+			struct	fileproc *fp;
+			size_t	number;
+			vnode_t	vp;
+			int	fd;
+
+			if (name[0] == KERN_KDWRITETR) {
+				int s;
+				int wait_result = THREAD_AWAKENED;
+				u_int64_t abstime;
+				u_int64_t ns;
+
+				if (*sizep) {
+					ns = ((u_int64_t)*sizep) * (u_int64_t)(1000 * 1000);
+					nanoseconds_to_absolutetime(ns,  &abstime );
+					clock_absolutetime_interval_to_deadline( abstime, &abstime );
+				} else
+					abstime = 0;
+
+				s = ml_set_interrupts_enabled(FALSE);
+				lck_spin_lock(kdw_spin_lock);
+
+				while (wait_result == THREAD_AWAKENED && kd_ctrl_page.kds_inuse_count < n_storage_threshold) {
+
+					kds_waiter = 1;
+
+					if (abstime)
+						wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
+					else
+						wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
+					
+					kds_waiter = 0;
+				}
+				lck_spin_unlock(kdw_spin_lock);
+				ml_set_interrupts_enabled(s);
+			}
+			p = current_proc();
+			fd = value;
+
+			proc_fdlock(p);
+			if ( (ret = fp_lookup(p, fd, &fp, 1)) ) {
+				proc_fdunlock(p);
+				break;
+			}
+			context.vc_thread = current_thread();
+			context.vc_ucred = fp->f_fglob->fg_cred;
+
+			if (fp->f_fglob->fg_type != DTYPE_VNODE) {
+				fp_drop(p, fd, fp, 1);
+				proc_fdunlock(p);
+
+				ret = EBADF;
+				break;
+			}
+			vp = (struct vnode *)fp->f_fglob->fg_data;
+			proc_fdunlock(p);
+
+			if ((ret = vnode_getwithref(vp)) == 0) {
+
+				if (name[0] == KERN_KDWRITETR) {
+					number = nkdbufs * sizeof(kd_buf);
+
+					KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 3)) | DBG_FUNC_START, 0, 0, 0, 0, 0);
+					ret = kdbg_read(0, &number, vp, &context);
+					KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 3)) | DBG_FUNC_END, number, 0, 0, 0, 0);
+
+					*sizep = number;
+				} else {
+					number = kd_mapsize;
+					kdbg_readmap(0, &number, vp, &context);
+				}
+				vnode_put(vp);
+			}
+			fp_drop(p, fd, fp, 0);
+
+			break;
+		}
 		case KERN_KDPIDTR:
 			if (size < sizeof(kd_regtype)) {
 				ret = EINVAL;
@@ -1489,25 +1960,32 @@ int
 kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 {
 	unsigned int count;
-	unsigned int cpu, mincpu;
+	unsigned int cpu, min_cpu;
 	uint64_t  mintime, t;
-	int error = 0,s = 0;
+	int error = 0;
 	kd_buf *tempbuf;
-	kd_buf *rcursor;
-	kd_buf *min_rcursor;
-	struct kd_storage *kdsp;
+	uint32_t rcursor;
+	kd_buf lostevent;
+	union kds_ptr kdsp;
+	struct kd_storage *kdsp_actual;
 	struct kd_bufinfo *kdbp;
+	struct kd_bufinfo *min_kdbp;
 	uint32_t tempbuf_count;
 	uint32_t tempbuf_number;
 	uint32_t old_kdebug_flags;
 	uint32_t old_kdebug_slowcheck;
+	boolean_t lostevents = FALSE;
+	boolean_t out_of_events = FALSE;
 
 	count = *number/sizeof(kd_buf);
 	*number = 0;
 
-	if (count == 0 || !(kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
+	if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
 		return EINVAL;
 
+	memset(&lostevent, 0, sizeof(lostevent));
+	lostevent.debugid = TRACEDBG_CODE(DBG_TRACE_INFO, 2);
+
 	/*
 	 * because we hold kd_trace_mtx_sysctl, no other control threads can 
 	 * be playing with kdebug_flags... the code that cuts new events could
@@ -1515,17 +1993,8 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 	 * storage chunk which is where it examines kdebug_flags... it its adding
 	 * to the same chunk we're reading from, no problem... 
 	 */
-	s = ml_set_interrupts_enabled(FALSE);
-	lck_spin_lock(kds_spin_lock);
 
-	old_kdebug_slowcheck = kdebug_slowcheck;
-	old_kdebug_flags = kdebug_flags;
-
-	kdebug_flags &= ~KDBG_WRAPPED;
-	kdebug_flags |= KDBG_NOWRAP;
-
-	lck_spin_unlock(kds_spin_lock);
-	ml_set_interrupts_enabled(s);
+	disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
 
 	if (count > nkdbufs)
 		count = nkdbufs;
@@ -1538,66 +2007,86 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 		tempbuf_number = 0;
 
 	        while (tempbuf_count) {
-			mintime = 0xffffffffffffffffULL; /* all actual timestamps are below */
-			mincpu = -1;
-			min_rcursor = NULL;
+			mintime = 0xffffffffffffffffULL;
+			min_kdbp = NULL;
+			min_cpu = 0;
 
 			for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_cpus; cpu++, kdbp++) {
 
-				if ((kdsp = kdbp->kd_list_head) == NULL)
+				if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL)
 				        continue;
-				rcursor = kdsp->kds_readlast;
+				kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
+
+				rcursor = kdsp_actual->kds_readlast;
 
-				if (rcursor == kdsp->kds_bufptr)
+				if (rcursor == kdsp_actual->kds_bufindx)
 					continue;
-				t = kdbg_get_timestamp(rcursor);
 
+				t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
+
+				if (t < kdsp_actual->kds_timestamp) {
+					/*
+					 * indicates we've not yet completed filling
+					 * in this event...
+					 * this should only occur when we're looking
+					 * at the buf that the record head is utilizing
+					 * we'll pick these events up on the next
+					 * call to kdbg_read
+					 * we bail at this point so that we don't
+					 * get an out-of-order timestream by continuing
+					 * to read events from the other CPUs' timestream(s)
+					 */
+					out_of_events = TRUE;
+					break;
+				}
 				if (t < mintime) {
-					mincpu = cpu;
 				        mintime = t;
-					min_rcursor = rcursor;
+					min_kdbp = kdbp;
+					min_cpu = cpu;
 				}
 			}
-			if (mincpu == (unsigned int)-1)
-			        /*
+			if (min_kdbp == NULL || out_of_events == TRUE) {
+				/*
 				 * all buffers ran empty
 				 */
-			        break;
-			
-			kdbp = &kdbip[mincpu];
-			kdsp = kdbp->kd_list_head;
+				out_of_events = TRUE;
+				break;
+			}
+			kdsp = min_kdbp->kd_list_head;
+			kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
 
-			*tempbuf = *min_rcursor;
+			if (kdsp_actual->kds_lostevents == TRUE) {
+				lostevent.timestamp = kdsp_actual->kds_records[kdsp_actual->kds_readlast].timestamp;
+				*tempbuf = lostevent;
+				
+				kdsp_actual->kds_lostevents = FALSE;
+				lostevents = TRUE;
 
-			if (mintime != kdbg_get_timestamp(tempbuf)) {
-				/*
-				 * we stole this storage unit and used it
-				 * before we could slurp the selected event out
-				 * so we need to re-evaluate
-				 */
-				continue;
+				goto nextevent;
 			}
+			*tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
+
+			if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
+				release_storage_unit(min_cpu, kdsp.raw);
+
 			/*
 			 * Watch for out of order timestamps
 			 */	
-			if (mintime < kdbp->kd_prev_timebase) {
+			if (mintime < min_kdbp->kd_prev_timebase) {
 				/*
 				 * if so, use the previous timestamp + 1 cycle
 				 */
-				kdbp->kd_prev_timebase++;
-				kdbg_set_timestamp_and_cpu(tempbuf, kdbp->kd_prev_timebase, mincpu);
+				min_kdbp->kd_prev_timebase++;
+				kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
 			} else
-				kdbp->kd_prev_timebase = mintime;
-
-			if (min_rcursor == kdsp->kds_readlast)
-				kdsp->kds_readlast++;
-
-			if (kdsp->kds_readlast == kdsp->kds_buflast)
-				release_storage_unit(kdbp, kdsp);
-
+				min_kdbp->kd_prev_timebase = mintime;
+nextevent:
 			tempbuf_count--;
 			tempbuf_number++;
 			tempbuf++;
+
+			if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE)
+				break;
 		}
 		if (tempbuf_number) {
 
@@ -1606,6 +2095,12 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 						UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
 
 				RAW_file_offset += (tempbuf_number * sizeof(kd_buf));
+	
+				if (RAW_file_written >= RAW_FLUSH_SIZE) {
+					cluster_push(vp, 0);
+
+					RAW_file_written = 0;
+				}
 			} else {
 				error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
 				buffer += (tempbuf_number * sizeof(kd_buf));
@@ -1618,7 +2113,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 			count   -= tempbuf_number;
 			*number += tempbuf_number;
 		}
-		if (tempbuf_count)
+		if (out_of_events == TRUE)
 		       /*
 			* all trace buffers are empty
 			*/
@@ -1628,17 +2123,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
 		        tempbuf_count = KDCOPYBUF_COUNT;
 	}
 	if ( !(old_kdebug_flags & KDBG_NOWRAP)) {
-
-		s = ml_set_interrupts_enabled(FALSE);
-		lck_spin_lock(kds_spin_lock);
-
-		kdebug_flags &= ~KDBG_NOWRAP;
-
-		if ( !(old_kdebug_slowcheck & SLOW_NOLOG))
-			kdebug_slowcheck &= ~SLOW_NOLOG;
-
-		lck_spin_unlock(kds_spin_lock);
-		ml_set_interrupts_enabled(s);
+		enable_wrap(old_kdebug_slowcheck, lostevents);
 	}
 	return (error);
 }
@@ -1656,9 +2141,6 @@ unsigned char *getProcName(struct proc *proc) {
 #if defined(__i386__) || defined (__x86_64__)
 #define TRAP_DEBUGGER __asm__ volatile("int3");
 #endif
-#ifdef __ppc__
-#define TRAP_DEBUGGER __asm__ volatile("tw 4,r3,r3");
-#endif
 
 #define SANE_TRACEBUF_SIZE (8 * 1024 * 1024)
 
@@ -1701,7 +2183,6 @@ int
 stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t *retval) {
 	int error = 0;
 
-
 	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
                 return(error);
 
@@ -1779,14 +2260,13 @@ error_exit:
 
 void
 start_kern_tracing(unsigned int new_nkdbufs) {
+
 	if (!new_nkdbufs)
 		return;
 	kdbg_set_nkdbufs(new_nkdbufs);
 	kdbg_lock_init();
-	kdbg_reinit();
-	kdebug_enable |= KDEBUG_ENABLE_TRACE;
-	kdebug_slowcheck &= ~SLOW_NOLOG;
-	kdbg_mapinit();
+	kdbg_reinit(TRUE);
+	kdbg_set_tracing_enabled(TRUE);
 
 #if defined(__i386__) || defined(__x86_64__)
 	uint64_t now = mach_absolute_time();
@@ -1808,7 +2288,7 @@ kdbg_dump_trace_to_file(const char *filename)
 	size_t		number;
 
 
-	if (kdebug_enable & (KDEBUG_ENABLE_CHUD | KDEBUG_ENABLE_ENTROPY))
+	if ( !(kdebug_enable & KDEBUG_ENABLE_TRACE))
 		return;
 
         if (global_state_pid != -1) {
@@ -1824,6 +2304,7 @@ kdbg_dump_trace_to_file(const char *filename)
 	KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 0)) | DBG_FUNC_NONE, 0, 0, 0, 0, 0);
 
 	kdebug_enable = 0;
+	kd_ctrl_page.enabled = 0;
 
 	ctx = vfs_context_kernel();
 
@@ -1840,3 +2321,44 @@ kdbg_dump_trace_to_file(const char *filename)
 
 	sync(current_proc(), (void *)NULL, (int *)NULL);
 }
+
+/* Helper function for filling in the BSD name for an address space
+ * Defined here because the machine bindings know only Mach threads
+ * and nothing about BSD processes.
+ *
+ * FIXME: need to grab a lock during this?
+ */
+void kdbg_get_task_name(char* name_buf, int len, task_t task)
+{
+	proc_t proc;
+	
+	/* Note: we can't use thread->task (and functions that rely on it) here 
+	 * because it hasn't been initialized yet when this function is called.
+	 * We use the explicitly-passed task parameter instead.
+	 */
+	proc = get_bsdtask_info(task);
+	if (proc != PROC_NULL)
+		snprintf(name_buf, len, "%s/%d", proc->p_comm, proc->p_pid);
+	else
+		snprintf(name_buf, len, "%p [!bsd]", task);
+}
+
+
+
+#if defined(NATIVE_TRACE_FACILITY)
+void trace_handler_map_ctrl_page(__unused uintptr_t addr, __unused size_t ctrl_page_size, __unused size_t storage_size, __unused size_t kds_ptr_size)
+{
+}
+void trace_handler_map_bufinfo(__unused uintptr_t addr, __unused size_t size)
+{
+}
+void trace_handler_unmap_bufinfo(void)
+{
+}
+void trace_handler_map_buffer(__unused int index, __unused uintptr_t addr, __unused size_t size)
+{
+}
+void trace_handler_unmap_buffer(__unused int index)
+{
+}
+#endif
diff --git a/bsd/kern/kern_acct.c b/bsd/kern/kern_acct.c
index 747f09221..516de08dc 100644
--- a/bsd/kern/kern_acct.c
+++ b/bsd/kern/kern_acct.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -161,7 +161,7 @@ acct(proc_t p, struct acct_args *uap, __unused int *retval)
 	 * writing and make sure it's a 'normal'.
 	 */
 	if (uap->path != USER_ADDR_NULL) {
-		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, ctx);
+		NDINIT(&nd, LOOKUP, OP_OPEN, NOFOLLOW, UIO_USERSPACE, uap->path, ctx);
 		if ((error = vn_open(&nd, FWRITE, 0)))
 			return (error);
 #if CONFIG_MACF
@@ -271,8 +271,8 @@ acct_process(proc_t p)
 	/* (6) The UID and GID of the process */
 	safecred = kauth_cred_proc_ref(p);
 
-	an_acct.ac_uid = safecred->cr_ruid;
-	an_acct.ac_gid = safecred->cr_rgid;
+	an_acct.ac_uid = kauth_cred_getruid(safecred);
+	an_acct.ac_gid = kauth_cred_getrgid(safecred);
 
 	/* (7) The terminal from which the process was started */
 	
diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c
index a2983db0a..89a2ba012 100644
--- a/bsd/kern/kern_aio.c
+++ b/bsd/kern/kern_aio.c
@@ -1394,7 +1394,7 @@ aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked)
 	/* And work queue */
 	aio_workq_lock_spin(queue);
 	aio_workq_add_entry_locked(queue, entryp);
-	wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED);
+	wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED, -1);
 	aio_workq_unlock(queue);
 	
 	if (proc_locked == 0) {
diff --git a/bsd/kern/kern_authorization.c b/bsd/kern/kern_authorization.c
index 263fc28b4..f30df6d2d 100644
--- a/bsd/kern/kern_authorization.c
+++ b/bsd/kern/kern_authorization.c
@@ -185,10 +185,9 @@ kauth_alloc_scope(const char *identifier, kauth_scope_callback_t callback, void
 	/*
 	 * Allocate and populate the scope structure.
 	 */
-	MALLOC(sp, kauth_scope_t, sizeof(*sp), M_KAUTH, M_WAITOK);
+	MALLOC(sp, kauth_scope_t, sizeof(*sp), M_KAUTH, M_WAITOK | M_ZERO);
 	if (sp == NULL)
 		return(NULL);
-	bzero(&sp->ks_listeners, sizeof(sp->ks_listeners));
 	sp->ks_flags = 0;
 	sp->ks_identifier = identifier;
 	sp->ks_idata = idata;
@@ -613,7 +612,7 @@ kauth_authorize_generic_callback(kauth_cred_t credential, __unused void *idata,
 int
 kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval)
 {
-	int applies, error, i;
+	int applies, error, i, gotguid;
 	kauth_ace_t ace;
 	guid_t guid;
 	uint32_t rights;
@@ -632,9 +631,11 @@ kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval)
 	 * Get our guid for comparison purposes.
 	 */
 	if ((error = kauth_cred_getguid(cred, &guid)) != 0) {
-		eval->ae_result = KAUTH_RESULT_DENY;
-		KAUTH_DEBUG("    ACL - can't get credential GUID (%d), ACL denied", error);
-		return(error);
+		KAUTH_DEBUG("    ACL - can't get credential GUID (%d)", error);
+		error = 0;
+		gotguid = 0;
+	} else {
+		gotguid = 1;
 	}
 
 	KAUTH_DEBUG("    ACL - %d entries, initial residual %x", eval->ae_count, eval->ae_residual);
@@ -678,7 +679,7 @@ kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval)
 			/* we don't recognise this ACE, skip it */
 			continue;
 		}
-		
+	
 		/*
 		 * Verify whether this entry applies to the credential.
 		 */
@@ -688,7 +689,10 @@ kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval)
 			applies = eval->ae_options & KAUTH_AEVAL_IS_OWNER;
 			break;
 		case KAUTH_WKG_GROUP:
-			applies = eval->ae_options & KAUTH_AEVAL_IN_GROUP;
+			if (!gotguid || (eval->ae_options & KAUTH_AEVAL_IN_GROUP_UNKNOWN))
+				applies = ((ace->ace_flags & KAUTH_ACE_KINDMASK) == KAUTH_ACE_DENY);
+			else
+				applies = eval->ae_options & KAUTH_AEVAL_IN_GROUP;
 			break;
 		/* we short-circuit these here rather than wasting time calling the group membership code */
 		case KAUTH_WKG_EVERYBODY:
@@ -700,12 +704,12 @@ kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval)
 
 		default:
 			/* check to see whether it's exactly us, or a group we are a member of */
-			applies = kauth_guid_equal(&guid, &ace->ace_applicable);
+			applies = !gotguid ? 0 : kauth_guid_equal(&guid, &ace->ace_applicable);
 			KAUTH_DEBUG("    ACL - ACE applicable " K_UUID_FMT " caller " K_UUID_FMT " %smatched",
 			    K_UUID_ARG(ace->ace_applicable), K_UUID_ARG(guid), applies ? "" : "not ");
 		
 			if (!applies) {
-				error = kauth_cred_ismember_guid(cred, &ace->ace_applicable, &applies);
+				error = !gotguid ? ENOENT : kauth_cred_ismember_guid(cred, &ace->ace_applicable, &applies);
 				/*
 				 * If we can't resolve group membership, we have to limit misbehaviour.
 				 * If the ACE is an 'allow' ACE, assume the cred is not a member (avoid
@@ -791,15 +795,37 @@ kauth_acl_inherit(vnode_t dvp, kauth_acl_t initial, kauth_acl_t *product, int is
 	 * XXX TODO: <rdar://3634665> wants a "umask ACL" from the process.
 	 */
 	inherit = NULL;
-	if ((dvp != NULL) && !vfs_authopaque(vnode_mount(dvp))) {
+	/*
+	 * If there is no initial ACL, or there is, and the initial ACLs
+	 * flags do not request "no inheritance", then we inherit.  This allows
+	 * initial object creation via open_extended() and mkdir_extended()
+	 * to reject inheritance for themselves and for inferior nodes by
+	 * specifying a non-NULL inital ACL which has the KAUTH_ACL_NO_INHERIT
+	 * flag set in the flags field.
+	 */
+	if ((initial == NULL || !(initial->acl_flags & KAUTH_ACL_NO_INHERIT)) &&
+	    (dvp != NULL) && !vfs_authopaque(vnode_mount(dvp))) {
 		VATTR_INIT(&dva);
 		VATTR_WANTED(&dva, va_acl);
 		if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) {
 			KAUTH_DEBUG("    ERROR - could not get parent directory ACL for inheritance");
 			return(error);
 		}
-		if (VATTR_IS_SUPPORTED(&dva, va_acl))
+		if (VATTR_IS_SUPPORTED(&dva, va_acl)) {
 			inherit = dva.va_acl;
+			/*
+			 * If there is an ACL on the parent directory, then
+			 * there are potentially inheritable ACE entries, but
+			 * if the flags on the directory ACL say not to
+			 * inherit, then we don't inherit.  This allows for
+			 * per directory rerooting of the inheritable ACL
+			 * hierarchy.
+			 */
+			if (inherit != NULL && inherit->acl_flags & KAUTH_ACL_NO_INHERIT) {
+				kauth_acl_free(inherit);
+				inherit = NULL;
+			}
+		}
 	}
 
 	/*
@@ -852,14 +878,17 @@ kauth_acl_inherit(vnode_t dvp, kauth_acl_t initial, kauth_acl_t *product, int is
 
 	/*
 	 * Composition is simply:
-	 *  - initial
-	 *  - inherited
+	 *  - initial direct ACEs
+	 *  - inherited ACEs from new parent
 	 */
 	index = 0;
 	if (initial != NULL) {
-		for (i = 0; i < initial->acl_entrycount; i++)
-			result->acl_ace[index++] = initial->acl_ace[i];
-		KAUTH_DEBUG("    INHERIT - applied %d initial entries", index);
+		for (i = 0; i < initial->acl_entrycount; i++) {
+			if (!(initial->acl_ace[i].ace_flags & KAUTH_ACE_INHERITED)) {
+				result->acl_ace[index++] = initial->acl_ace[i];
+			}
+		}
+		KAUTH_DEBUG("    INHERIT - applied %d of %d initial entries", index, initial->acl_entrycount);
 	}
 	if (inherit != NULL) {
 		for (i = 0; i < inherit->acl_entrycount; i++) {
diff --git a/bsd/kern/kern_clock.c b/bsd/kern/kern_clock.c
index 0cbd41e1b..1aae2df47 100644
--- a/bsd/kern/kern_clock.c
+++ b/bsd/kern/kern_clock.c
@@ -241,7 +241,7 @@ sysctl_clockrate
 }
 
 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
-		CTLTYPE_STRUCT | CTLFLAG_RD,
+		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_clockrate, "S,clockinfo", "");
 
 
diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c
index a76088eb4..92b15bc40 100644
--- a/bsd/kern/kern_control.c
+++ b/bsd/kern/kern_control.c
@@ -91,6 +91,7 @@ static int ctl_peeraddr(struct socket *so, struct sockaddr **nam);
 static struct kctl *ctl_find_by_name(const char *);
 static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit);
 
+static struct socket *kcb_find_socket(struct kctl *, u_int32_t unit);
 static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit);
 static void ctl_post_msg(u_int32_t event_code, u_int32_t id);
 
@@ -255,7 +256,7 @@ ctl_sofreelastref(struct socket *so)
         if ((kctl = kcb->kctl) != 0) {
             lck_mtx_lock(ctl_mtx);
             TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
-            lck_mtx_lock(ctl_mtx);
+            lck_mtx_unlock(ctl_mtx);
     	}
     	kcb_delete(kcb);
     }
@@ -364,10 +365,16 @@ ctl_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
     error = (*kctl->connect)(kctl, &sa, &kcb->userdata);
 	socket_lock(so, 0);
     if (error)
-		goto done;
+		goto end;
     
     soisconnected(so);
 
+end:
+	if (error && kctl->disconnect) {
+		socket_unlock(so, 0);
+		(*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
+		socket_lock(so, 0);
+	}
 done:
     if (error) {
         soisdisconnected(so);
@@ -393,12 +400,19 @@ ctl_disconnect(struct socket *so)
             (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata);
             socket_lock(so, 0);
         }
+        
+        soisdisconnected(so);
+        
+		socket_unlock(so, 0);
         lck_mtx_lock(ctl_mtx);
         kcb->kctl = 0;
     	kcb->unit = 0;
+    	while (kcb->usecount != 0) {
+    		msleep(&kcb->usecount, ctl_mtx, 0, "kcb->usecount", 0);
+    	}
         TAILQ_REMOVE(&kctl->kcb_head, kcb, next);
-        soisdisconnected(so);
         lck_mtx_unlock(ctl_mtx);
+		socket_lock(so, 0);
     }
     return 0;
 }
@@ -430,23 +444,29 @@ ctl_peeraddr(struct socket *so, struct sockaddr **nam)
 
 static int
 ctl_send(struct socket *so, int flags, struct mbuf *m,
-            __unused struct sockaddr *addr, __unused struct mbuf *control,
+            __unused struct sockaddr *addr, struct mbuf *control,
             __unused struct proc *p)
 {
 	int	 	error = 0;
 	struct ctl_cb 	*kcb = (struct ctl_cb *)so->so_pcb;
 	struct kctl		*kctl;
 	
+	if (control) m_freem(control);
+	
 	if (kcb == NULL)	/* sanity check */
-		return(ENOTCONN);
+		error = ENOTCONN;
 	
-	if ((kctl = kcb->kctl) == NULL)
-		return(EINVAL);
+	if (error == 0 && (kctl = kcb->kctl) == NULL)
+		error = EINVAL;
 		
-	if (kctl->send) {
+	if (error == 0 && kctl->send) {
 		socket_unlock(so, 0);
 		error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags);
 		socket_lock(so, 0);
+	} else {
+		m_freem(m);
+		if (error == 0)
+			error = ENOTSUP;
 	}
 	return error;
 }
@@ -454,23 +474,18 @@ ctl_send(struct socket *so, int flags, struct mbuf *m,
 errno_t
 ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags)
 {
-	struct ctl_cb 	*kcb;
 	struct socket 	*so;
 	errno_t 		error = 0;
 	struct kctl		*kctl = (struct kctl *)kctlref;
 	
 	if (kctl == NULL)
 		return EINVAL;
-		
-	kcb = kcb_find(kctl, unit);
-	if (kcb == NULL)
-		return EINVAL;
 	
-	so = (struct socket *)kcb->so;
-	if (so == NULL) 
+	so = kcb_find_socket(kctl, unit);
+	
+	if (so == NULL)
 		return EINVAL;
 	
-	socket_lock(so, 1);
 	if (sbspace(&so->so_rcv) < m->m_pkthdr.len) {
 		error = ENOBUFS;
 		goto bye;
@@ -487,7 +502,6 @@ bye:
 errno_t
 ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t flags)
 {
-	struct ctl_cb 	*kcb;
 	struct socket 	*so;
 	struct mbuf 	*m;
 	errno_t			error = 0;
@@ -499,15 +513,10 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t
 	if (kctlref == NULL)
 		return EINVAL;
 		
-	kcb = kcb_find(kctl, unit);
-	if (kcb == NULL)
+	so = kcb_find_socket(kctl, unit);
+	if (so == NULL)
 		return EINVAL;
 	
-	so = (struct socket *)kcb->so;
-	if (so == NULL) 
-		return EINVAL;
-	
-	socket_lock(so, 1);
 	if (sbspace(&so->so_rcv) < (int)len) {
 		error = ENOBUFS;
 		goto bye;
@@ -545,27 +554,21 @@ bye:
 errno_t 
 ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space)
 {
-	struct ctl_cb 	*kcb;
 	struct kctl		*kctl = (struct kctl *)kctlref;
 	struct socket 	*so;
 	long avail;
 	
 	if (kctlref == NULL || space == NULL)
 		return EINVAL;
-		
-	kcb = kcb_find(kctl, unit);
-	if (kcb == NULL)
-		return EINVAL;
 	
-	so = (struct socket *)kcb->so;
-	if (so == NULL) 
+	so = kcb_find_socket(kctl, unit);
+	if (so == NULL)
 		return EINVAL;
 	
-	socket_lock(so, 1);
 	avail = sbspace(&so->so_rcv);
 	*space = (avail < 0) ? 0 : avail;
 	socket_unlock(so, 1);
-
+	
 	return 0;
 }
 
@@ -624,6 +627,9 @@ ctl_ctloutput(struct socket *so, struct sockopt *sopt)
 			socket_unlock(so, 0);
 			error = (*kctl->getopt)(kcb->kctl, kcb->unit, kcb->userdata, sopt->sopt_name, 
 						data, &len);
+			if (data != NULL && len > sopt->sopt_valsize)
+				panic_plain("ctl_ctloutput: ctl %s returned len (%lu) > sopt_valsize (%lu)\n",
+					kcb->kctl->name, len, sopt->sopt_valsize);
 			socket_lock(so, 0);    
 			if (error == 0) {
 				if (data != NULL)
@@ -858,6 +864,46 @@ ctl_find_by_name(const char *name)
     return NULL;
 }
 
+u_int32_t
+ctl_id_by_name(const char *name)
+{
+	u_int32_t	ctl_id = 0;
+	
+	lck_mtx_lock(ctl_mtx);
+	struct kctl *kctl = ctl_find_by_name(name);
+	if (kctl) ctl_id = kctl->id;
+	lck_mtx_unlock(ctl_mtx);
+	
+	return ctl_id;
+}
+
+errno_t
+ctl_name_by_id(
+	u_int32_t id,
+	char	*out_name,
+	size_t	maxsize)
+{
+	int 		found = 0;
+	
+	lck_mtx_lock(ctl_mtx);
+	struct kctl *kctl;
+    TAILQ_FOREACH(kctl, &ctl_head, next) {
+        if (kctl->id == id)
+            break;
+    }
+    
+    if (kctl && kctl->name)
+    {
+    	if (maxsize > MAX_KCTL_NAME)
+    		maxsize = MAX_KCTL_NAME;
+    	strlcpy(out_name, kctl->name, maxsize);
+    	found = 1;
+    }
+	lck_mtx_unlock(ctl_mtx);
+	
+	return found ? 0 : ENOENT;
+}
+
 /*
  * Must be called with global ctl_mtx lock taked
  *
@@ -885,21 +931,58 @@ kcb_find(struct kctl *kctl, u_int32_t unit)
     struct ctl_cb 	*kcb;
 
     TAILQ_FOREACH(kcb, &kctl->kcb_head, next)
-        if ((kcb->unit == unit))
+        if (kcb->unit == unit)
             return kcb;
 
     return NULL;
 }
 
-/*
- * Must be called witout lock
- */
+static struct socket *
+kcb_find_socket(struct kctl *kctl, u_int32_t unit)
+{
+	struct socket *so = NULL;
+	
+	lck_mtx_lock(ctl_mtx);
+	struct ctl_cb	*kcb = kcb_find(kctl, unit);
+	if (kcb && kcb->kctl == kctl) {
+		so = kcb->so;
+		if (so) {
+			kcb->usecount++;
+		}
+	}
+	lck_mtx_unlock(ctl_mtx);
+	
+	if (so == NULL) {
+		return NULL;
+	}
+	
+	socket_lock(so, 1);
+	
+	lck_mtx_lock(ctl_mtx);
+	if (kcb->kctl == NULL)
+	{
+		lck_mtx_unlock(ctl_mtx);
+		socket_unlock(so, 1);
+		so = NULL;
+		lck_mtx_lock(ctl_mtx);
+	}
+	kcb->usecount--;
+	if (kcb->usecount == 0)
+		wakeup((event_t)&kcb->usecount);
+	lck_mtx_unlock(ctl_mtx);
+	
+	return so;
+}
+
 static void 
 ctl_post_msg(u_int32_t event_code, u_int32_t id) 
 {
     struct ctl_event_data  	ctl_ev_data;
     struct kev_msg  		ev_msg;
     
+    lck_mtx_assert(ctl_mtx, LCK_MTX_ASSERT_NOTOWNED);
+   
+    bzero(&ev_msg, sizeof(struct kev_msg)); 
     ev_msg.vendor_code    = KEV_VENDOR_APPLE;
     
     ev_msg.kev_class      = KEV_SYSTEM_CLASS;
diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c
index 52c0a3095..cf63621d9 100644
--- a/bsd/kern/kern_core.c
+++ b/bsd/kern/kern_core.c
@@ -70,24 +70,7 @@ typedef struct {
 	mach_msg_type_number_t	count;	/* count of ints in this flavor */
 } mythread_state_flavor_t;
 
-#if defined (__ppc__)
-/* 64 bit */
-mythread_state_flavor_t thread_flavor_array64[]={
-		{PPC_THREAD_STATE64 , PPC_THREAD_STATE64_COUNT},
-		{PPC_FLOAT_STATE, PPC_FLOAT_STATE_COUNT}, 
-		{PPC_EXCEPTION_STATE64, PPC_EXCEPTION_STATE64_COUNT},
-		{PPC_VECTOR_STATE, PPC_VECTOR_STATE_COUNT}
-		};
-
-/* 32 bit */
-mythread_state_flavor_t thread_flavor_array[]={
-		{PPC_THREAD_STATE , PPC_THREAD_STATE_COUNT},
-		{PPC_FLOAT_STATE, PPC_FLOAT_STATE_COUNT}, 
-		{PPC_EXCEPTION_STATE, PPC_EXCEPTION_STATE_COUNT},
-		{PPC_VECTOR_STATE, PPC_VECTOR_STATE_COUNT}
-		};
-
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 mythread_state_flavor_t thread_flavor_array [] = { 
 		{x86_THREAD_STATE, x86_THREAD_STATE_COUNT},
 		{x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT},
@@ -139,9 +122,6 @@ process_cpu_type(proc_t core_proc)
 	} else {
 		what_we_think = CPU_TYPE_I386;
 	}
-#elif defined (__ppc__)
-	#pragma unused(core_proc)
-	what_we_think = CPU_TYPE_POWERPC;
 #endif
 	return what_we_think;
 }
@@ -156,9 +136,6 @@ process_cpu_subtype(proc_t core_proc)
 	} else {
 		what_we_think = CPU_SUBTYPE_I386_ALL;
 	}
-#elif defined (__ppc__)
-	#pragma unused(core_proc)
-	what_we_think = CPU_SUBTYPE_POWERPC_ALL;
 #endif
 	return what_we_think;
 }
@@ -261,8 +238,8 @@ coredump(proc_t core_proc)
 
 	if (do_coredump == 0 ||		/* Not dumping at all */
 	    ( (sugid_coredump == 0) &&	/* Not dumping SUID/SGID binaries */
-	      ( (cred->cr_svuid != cred->cr_ruid) ||
-	        (cred->cr_svgid != cred->cr_rgid)))) {
+	      ( (kauth_cred_getsvuid(cred) != kauth_cred_getruid(cred)) ||
+	        (kauth_cred_getsvgid(cred) != kauth_cred_getrgid(cred))))) {
 
 #if CONFIG_AUDIT
 		audit_proc_coredump(core_proc, NULL, EFAULT);
@@ -320,17 +297,8 @@ coredump(proc_t core_proc)
 
 	thread_count = get_task_numacts(task);
 	segment_count = get_vmmap_entries(map);	/* XXX */
-#if defined (__ppc__)
-	if (is_64) {
-		tir1.flavor_count = sizeof(thread_flavor_array64)/sizeof(mythread_state_flavor_t);
-		bcopy(thread_flavor_array64, flavors,sizeof(thread_flavor_array64));
-	} else {
-#endif	/* __ppc __ */
-		tir1.flavor_count = sizeof(thread_flavor_array)/sizeof(mythread_state_flavor_t);
-		bcopy(thread_flavor_array, flavors,sizeof(thread_flavor_array));
-#if defined (__ppc__)
-	}
-#endif	/* __ppc __ */
+	tir1.flavor_count = sizeof(thread_flavor_array)/sizeof(mythread_state_flavor_t);
+	bcopy(thread_flavor_array, flavors,sizeof(thread_flavor_array));
 	tstate_size = 0;
 	for (i = 0; i < tir1.flavor_count; i++)
 		tstate_size += sizeof(mythread_state_flavor_t) +
diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c
index 4ab7311ae..484c86fff 100644
--- a/bsd/kern/kern_credential.c
+++ b/bsd/kern/kern_credential.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -52,6 +52,7 @@
 #include <security/audit/audit.h>
 
 #include <sys/mount.h>
+#include <sys/stat.h>	/* For manifest constants in posix_cred_access */
 #include <sys/sysproto.h>
 #include <sys/kern_callout.h>
 #include <mach/message.h>
@@ -150,6 +151,7 @@ static int	kauth_resolver_timeout = 30;	/* default: 30 seconds */
 struct kauth_resolver_work {
 	TAILQ_ENTRY(kauth_resolver_work) kr_link;
 	struct kauth_identity_extlookup kr_work;
+	uint64_t	kr_extend;
 	uint32_t	kr_seqno;
 	uint64_t	kr_subtime;	/* submission time */
 	int		kr_refs;
@@ -164,7 +166,7 @@ TAILQ_HEAD(kauth_resolver_unsubmitted_head, kauth_resolver_work) kauth_resolver_
 TAILQ_HEAD(kauth_resolver_submitted_head, kauth_resolver_work)	kauth_resolver_submitted;
 TAILQ_HEAD(kauth_resolver_done_head, kauth_resolver_work)	kauth_resolver_done;
 
-static int	kauth_resolver_submit(struct kauth_identity_extlookup *lkp);
+static int	kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data);
 static int	kauth_resolver_complete(user_addr_t message);
 static int	kauth_resolver_getwork(user_addr_t message);
 static int	kauth_resolver_getwork2(user_addr_t message);
@@ -246,21 +248,37 @@ kauth_resolver_init(void)
  *
  * Parameters:	lkp				A pointer to an external
  *						lookup request
+ *		extend_data			extended data for kr_extend
  *
  * Returns:	0				Success
  *		EWOULDBLOCK			No resolver registered
  *		EINTR				Operation interrupted (e.g. by
  *						a signal)
  *		ENOMEM				Could not allocate work item
+ *	copyinstr:EFAULT			Bad message from user space
  *	workp->kr_result:???			An error from the user space
  *						daemon (includes ENOENT!)
  *
+ * Implicit returns:
+ *		*lkp				Modified
+ *
  * Notes:	Allocate a work queue entry, submit the work and wait for
  *		the operation to either complete or time out.  Outstanding
  *		operations may also be cancelled.
+ *
+ *		Submission is by means of placing the item on a work queue
+ *		which is serviced by an external resolver thread calling
+ *		into the kernel.  The caller then sleeps until timeout,
+ *		cancellation, or an external resolver thread calls in with
+ *		a result message to kauth_resolver_complete().  All of these
+ *		events wake the caller back up.
+ *
+ *		This code is called from either kauth_cred_ismember_gid()
+ *		for a group membership request, or it is called from
+ *		kauth_cred_cache_lookup() when we get a cache miss.
  */
 static int
-kauth_resolver_submit(struct kauth_identity_extlookup *lkp)
+kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data)
 {
 	struct kauth_resolver_work *workp, *killp;
 	struct timespec ts;
@@ -294,6 +312,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp)
 		return(ENOMEM);
 
 	workp->kr_work = *lkp;
+	workp->kr_extend = extend_data;
 	workp->kr_refs = 1;
 	workp->kr_flags = KAUTH_REQUEST_UNSUBMITTED;
 	workp->kr_result = 0;
@@ -307,11 +326,19 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp)
 	workp->kr_work.el_result = KAUTH_EXTLOOKUP_INPROG;
 
 	/*
-	 * XXX As an optimisation, we could check the queue for identical
-	 * XXX items and coalesce them
+	 * XXX We *MUST NOT* attempt to coelesce identical work items due to
+	 * XXX the inability to ensure order of update of the request item
+	 * XXX extended data vs. the wakeup; instead, we let whoever is waiting
+	 * XXX for each item repeat the update when they wake up.
 	 */
 	TAILQ_INSERT_TAIL(&kauth_resolver_unsubmitted, workp, kr_link);
 
+	/*
+	 * Wake up an external resolver thread to deal with the new work; one
+	 * may not be available, and if not, then the request will be grabed
+	 * when a resolver thread comes back into the kernel to request new
+	 * work.
+	 */
 	wakeup_one((caddr_t)&kauth_resolver_unsubmitted);
 	for (;;) {
 		/* we could compute a better timeout here */
@@ -332,8 +359,9 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp)
 	}
 
 	/*
-	 * Update the moving average of how long it took; if it took longer
-	 * than the time threshold, then we complain about it being slow.
+	 * Update the moving average of how long the request took; if it
+	 * took longer than the time threshold, then we complain about it
+	 * being slow.
 	 */
 	duration = mach_absolute_time() - workp->kr_subtime;
 	if (kco_ma_addsample(&resolver_ma, duration)) {
@@ -401,15 +429,19 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp)
 		/* someone else still has a reference on this request */
 		shouldfree = 0;
 	}
+
 	/* collect request result */
-	if (error == 0)
+	if (error == 0) {
 		error = workp->kr_result;
+	}
 	KAUTH_RESOLVER_UNLOCK();
+
 	/*
 	 * If we dropped the last reference, free the request.
 	 */
-	if (shouldfree)
+	if (shouldfree) {
 		FREE(workp, M_KAUTH);
+	}
 
 	KAUTH_DEBUG("RESOLVER - returning %d", error);
 	return(error);
@@ -473,7 +505,7 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3
 			 * Allow user space resolver to override the
 			 * external resolution timeout
 			 */
-			if (message >= 30 && message <= 10000) {
+			if (message > 30 && message < 10000) {
 				kauth_resolver_timeout = message;
 				KAUTH_DEBUG("RESOLVER - new resolver changes timeout to %d seconds\n", (int)message);
 			}
@@ -625,10 +657,54 @@ kauth_resolver_getwork2(user_addr_t message)
 	 */
 	workp = TAILQ_FIRST(&kauth_resolver_unsubmitted);
 
-	if ((error = copyout(&workp->kr_work, message, sizeof(workp->kr_work))) != 0) {
+	/*
+	 * Copy out the external lookup structure for the request, not
+	 * including the el_extend field, which contains the address of the
+	 * external buffer provided by the external resolver into which we
+	 * copy the extension request information.
+	 */
+	/* BEFORE FIELD */
+	if ((error = copyout(&workp->kr_work, message, offsetof(struct kauth_identity_extlookup, el_extend))) != 0) {
+		KAUTH_DEBUG("RESOLVER - error submitting work to resolve");
+		goto out;
+	}
+	/* AFTER FIELD */
+	if ((error = copyout(&workp->kr_work.el_info_reserved_1,
+			message + offsetof(struct kauth_identity_extlookup, el_info_reserved_1),
+		sizeof(struct kauth_identity_extlookup) - offsetof(struct kauth_identity_extlookup, el_info_reserved_1))) != 0) {
 		KAUTH_DEBUG("RESOLVER - error submitting work to resolve");
 		goto out;
 	}
+
+	/*
+	 * Handle extended requests here; if we have a request of a type where
+	 * the kernel wants a translation of extended information, then we need
+	 * to copy it out into the extended buffer, assuming the buffer is
+	 * valid; we only attempt to get the buffer address if we have request
+	 * data to copy into it.
+	 */
+
+	/*
+	 * translate a user@domain string into a uid/gid/whatever
+	 */
+	if (workp->kr_work.el_flags & (KAUTH_EXTLOOKUP_VALID_PWNAM | KAUTH_EXTLOOKUP_VALID_GRNAM)) {
+		uint64_t uaddr;
+
+		error = copyin(message + offsetof(struct kauth_identity_extlookup, el_extend), &uaddr, sizeof(uaddr));
+		if (!error) {
+			size_t actual;	/* not used */
+			/*
+			 * Use copyoutstr() to reduce the copy size; we let
+			 * this catch a NULL uaddr because we shouldn't be
+			 * asking in that case anyway.
+			 */
+			error = copyoutstr(CAST_DOWN(void *,workp->kr_extend), uaddr, MAXPATHLEN, &actual);
+		}
+		if (error) {
+			KAUTH_DEBUG("RESOLVER - error submitting work to resolve");
+			goto out;
+		}
+	}
 	TAILQ_REMOVE(&kauth_resolver_unsubmitted, workp, kr_link);
 	workp->kr_flags &= ~KAUTH_REQUEST_UNSUBMITTED;
 	workp->kr_flags |= KAUTH_REQUEST_SUBMITTED;
@@ -706,6 +782,10 @@ kauth_resolver_complete(user_addr_t message)
 	struct kauth_resolver_work *killp;
 	int error, result;
 
+	/*
+	 * Copy in the mesage, including the extension field, since we are
+	 * copying into a local variable.
+	 */
 	if ((error = copyin(message, &extl, sizeof(extl))) != 0) {
 		KAUTH_DEBUG("RESOLVER - error getting completed work\n");
 		return(error);
@@ -771,22 +851,66 @@ kauth_resolver_complete(user_addr_t message)
 	}
 
 	/*
-	 * In the case of a fatal error, we assume that the resolver will restart
-	 * quickly and re-collect all of the outstanding requests.  Thus, we don't
-	 * complete the request which returned the fatal error status.
+	 * In the case of a fatal error, we assume that the resolver will
+	 * restart quickly and re-collect all of the outstanding requests.
+	 * Thus, we don't complete the request which returned the fatal
+	 * error status.
 	 */
 	if (extl.el_result != KAUTH_EXTLOOKUP_FATAL) {
 		/* scan our list for this request */
 		TAILQ_FOREACH(workp, &kauth_resolver_submitted, kr_link) {
 			/* found it? */
 			if (workp->kr_seqno == extl.el_seqno) {
-				/* copy result */
-				workp->kr_work = extl;
-				/* move onto completed list and wake up requester(s) */
+
+				/*
+				 * Get the request of the submitted queue so
+				 * that it is not cleaned up out from under
+				 * us by a timeout.
+				 */
 				TAILQ_REMOVE(&kauth_resolver_submitted, workp, kr_link);
 				workp->kr_flags &= ~KAUTH_REQUEST_SUBMITTED;
 				workp->kr_flags |= KAUTH_REQUEST_DONE;
 				workp->kr_result = result;
+
+				/* Copy the result message to the work item. */
+				memcpy(&workp->kr_work, &extl, sizeof(struct kauth_identity_extlookup));
+
+				/*
+				 * Check if we have a result in the extension
+				 * field; if we do, then we need to separately
+				 * copy the data from the message el_extend
+				 * into the request buffer that's in the work
+				 * item.  We have to do it here because we do
+				 * not want to wake up the waiter until the
+				 * data is in their buffer, and because the
+				 * actual request response may be destroyed
+				 * by the time the requester wakes up, and they
+				 * do not have access to the user space buffer
+				 * address.
+				 *
+				 * It is safe to drop and reacquire the lock
+				 * here because we've already removed the item
+				 * from the submission queue, but have not yet
+				 * moved it to the completion queue.  Note that
+				 * near simultaneous requests may result in
+				 * duplication of requests for items in this
+				 * window. This should not be a performance
+				 * issue and is easily detectable by comparing
+				 * time to live on last response vs. time of
+				 * next request in the resolver logs.
+				 */
+				if (extl.el_flags & (KAUTH_EXTLOOKUP_VALID_PWNAM|KAUTH_EXTLOOKUP_VALID_GRNAM)) {
+					size_t actual;	/* notused */
+
+					KAUTH_RESOLVER_UNLOCK();
+					error = copyinstr(extl.el_extend, CAST_DOWN(void *, workp->kr_extend), MAXPATHLEN, &actual);
+					KAUTH_RESOLVER_LOCK();
+				}
+
+				/*
+				 * Move the completed work item to the
+				 * completion queue and wake up requester(s)
+				 */
 				TAILQ_INSERT_TAIL(&kauth_resolver_done, workp, kr_link);
 				wakeup(workp);
 				break;
@@ -814,14 +938,18 @@ struct kauth_identity {
 #define KI_VALID_GID	(1<<1)
 #define KI_VALID_GUID	(1<<2)
 #define KI_VALID_NTSID	(1<<3)
+#define KI_VALID_PWNAM	(1<<4)	/* Used for translation */
+#define KI_VALID_GRNAM	(1<<5)	/* Used for translation */
 	uid_t	ki_uid;
 	gid_t	ki_gid;
 	guid_t	ki_guid;
 	ntsid_t ki_ntsid;
+	const char	*ki_name;	/* string name from string cache */
 	/*
-	 * Expiry times are the earliest time at which we will disregard the cached state and go to
-	 * userland.  Before then if the valid bit is set, we will return the cached value.  If it's
-	 * not set, we will not go to userland to resolve, just assume that there is no answer
+	 * Expiry times are the earliest time at which we will disregard the
+	 * cached state and go to userland.  Before then if the valid bit is
+	 * set, we will return the cached value.  If it's not set, we will
+	 * not go to userland to resolve, just assume that there is no answer
 	 * available.
 	 */
 	time_t	ki_guid_expiry;
@@ -838,16 +966,17 @@ static lck_mtx_t *kauth_identity_mtx;
 
 
 static struct kauth_identity *kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry,
-    ntsid_t *ntsidp, time_t ntsid_expiry);
+    ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype);
 static void	kauth_identity_register_and_free(struct kauth_identity *kip);
-static void	kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip);
+static void	kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip, uint64_t extend_data);
 static void	kauth_identity_lru(struct kauth_identity *kip);
 static int	kauth_identity_guid_expired(struct kauth_identity *kip);
 static int	kauth_identity_ntsid_expired(struct kauth_identity *kip);
-static int	kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir);
-static int	kauth_identity_find_gid(gid_t gid, struct kauth_identity *kir);
-static int	kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir);
-static int	kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir);
+static int	kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir, char *getname);
+static int	kauth_identity_find_gid(gid_t gid, struct kauth_identity *kir, char *getname);
+static int	kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir, char *getname);
+static int	kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir, char *getname);
+static int	kauth_identity_find_nam(char *name, int valid, struct kauth_identity *kir);
 
 
 /*
@@ -888,11 +1017,11 @@ kauth_identity_init(void)
  *						structure, filled in
  *
  * Notes:	It is illegal to translate between UID and GID; any given UUID
- *		or NTSID can oly refer to an NTSIDE or UUID (respectively),
+ *		or NTSID can only refer to an NTSID or UUID (respectively),
  *		and *either* a UID *or* a GID, but not both.
  */
 static struct kauth_identity *
-kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, ntsid_t *ntsidp, time_t ntsid_expiry)
+kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype)
 {
 	struct kauth_identity *kip;
 	
@@ -919,6 +1048,10 @@ kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, nt
 			kip->ki_valid |= KI_VALID_NTSID;
 		}
 		kip->ki_ntsid_expiry = ntsid_expiry;
+		if (name != NULL) {
+			kip->ki_name = name;
+			kip->ki_valid |= nametype;
+		}
 	}
 	return(kip);
 }
@@ -928,7 +1061,7 @@ kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, nt
  * kauth_identity_register_and_free
  *
  * Description:	Register an association between identity tokens.  The passed
- *		'kip' is freed by this function.
+ *		'kip' is consumed by this function.
  *
  * Parameters:	kip				Pointer to kauth_identity
  *						structure to register
@@ -975,11 +1108,22 @@ kauth_identity_register_and_free(struct kauth_identity *kip)
 			ip->ki_valid |= KI_VALID_NTSID;
 		}
 		ip->ki_ntsid_expiry = kip->ki_ntsid_expiry;
-		/* and discard the incoming identity */
-		FREE(kip, M_KAUTH);
-		ip = NULL;
+		/* a valid ki_name field overwrites the previous name field */
+		if (kip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM)) {
+			/* if there's an old one, discard it */
+			const char *oname = NULL;
+			if (ip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM))
+				oname = ip->ki_name;
+			ip->ki_name = kip->ki_name;
+			kip->ki_name = oname;
+		}
+		/* and discard the incoming entry */
+		ip = kip;
 	} else {
-		/* don't have any information on this identity, so just add it */
+		/*
+		 * if we don't have any information on this identity, add it;
+		 * if it pushes us over our limit, discard the oldest one.
+		 */
 		TAILQ_INSERT_HEAD(&kauth_identities, kip, ki_link);
 		if (++kauth_identity_count > KAUTH_IDENTITY_CACHEMAX) {
 			ip = TAILQ_LAST(&kauth_identities, kauth_identity_head);
@@ -988,9 +1132,14 @@ kauth_identity_register_and_free(struct kauth_identity *kip)
 		}
 	}
 	KAUTH_IDENTITY_UNLOCK();
-	/* have to drop lock before freeing expired entry */
-	if (ip != NULL)
+	/* have to drop lock before freeing expired entry (it may be in use) */
+	if (ip != NULL) {
+		/* if the ki_name field is used, clear it first */
+		if (ip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM))
+			vfs_removename(ip->ki_name);
+		/* free the expired entry */
 		FREE(ip, M_KAUTH);
+	}
 }
 
 
@@ -998,25 +1147,51 @@ kauth_identity_register_and_free(struct kauth_identity *kip)
  * kauth_identity_updatecache
  *
  * Description:	Given a lookup result, add any associations that we don't
- *		currently have.
+ *		currently have; replace ones which have changed.
  *
  * Parameters:	elp				External lookup result from
  *						user space daemon to kernel
  *		rkip				pointer to returned kauth
  *						identity, or NULL
+ *		extend_data			Extended data (can vary)
  *
  * Returns:	(void)
  *
  * Implicit returns:
  *		*rkip				Modified (if non-NULL)
+ *
+ * Notes:	For extended information requests, this code relies on the fact
+ *		that elp->el_flags is never used as an rvalue, and is only
+ *		ever bit-tested for valid lookup information we are willing
+ *		to cache.
+ *
+ * XXX:		We may have to do the same in the case that extended data was
+ *		passed out to user space to ensure that the request string
+ *		gets cached; we may also be able to use the rkip as an
+ *		input to avoid this.  The jury is still out.
+ *
+ * XXX:		This codes performance could be improved for multiple valid
+ *		results by combining the loop iteration in a single loop.
  */
 static void
-kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *rkip)
+kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *rkip, uint64_t extend_data)
 {
 	struct timeval tv;
 	struct kauth_identity *kip;
+	const char *speculative_name = NULL;
 
 	microuptime(&tv);
+
+	/*
+	 * If there is extended data, and that data represents a name rather
+	 * than something else, speculatively create an entry for it in the
+	 * string cache.  We do this to avoid holding the KAUTH_IDENTITY_LOCK
+	 * over the allocation later.
+	 */
+	if (elp->el_flags & (KAUTH_EXTLOOKUP_VALID_PWNAM | KAUTH_EXTLOOKUP_VALID_GRNAM)) {
+		const char *tmp = CAST_DOWN(const char *,extend_data);
+		speculative_name = vfs_addname(tmp, strnlen(tmp, MAXPATHLEN - 1), 0, 0);
+	}
 	
 	/* user identity? */
 	if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UID) {
@@ -1034,6 +1209,19 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
 					kip->ki_valid |= KI_VALID_NTSID;
 				}
 				kip->ki_ntsid_expiry = tv.tv_sec + elp->el_usid_valid;
+				if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM) {
+					const char *oname = kip->ki_name;
+					kip->ki_name = speculative_name;
+					speculative_name = NULL;
+					kip->ki_valid |= KI_VALID_PWNAM;
+					if (oname) {
+						/*
+						 * free oname (if any) outside
+						 * the lock
+						 */
+						speculative_name = oname;
+					}
+				}
 				kauth_identity_lru(kip);
 				if (rkip != NULL)
 					*rkip = *kip;
@@ -1048,18 +1236,22 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
 			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UGUID) ? &elp->el_uguid : NULL,
 			    tv.tv_sec + elp->el_uguid_valid,
 			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) ? &elp->el_usid : NULL,
-			    tv.tv_sec + elp->el_usid_valid);
+			    tv.tv_sec + elp->el_usid_valid,
+			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM) ? speculative_name : NULL,
+			    KI_VALID_PWNAM);
 			if (kip != NULL) {
 				if (rkip != NULL)
 					*rkip = *kip;
+				if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM)
+					speculative_name = NULL;
 				KAUTH_DEBUG("CACHE - learned %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid));
 				kauth_identity_register_and_free(kip);
 			}
 		}
 	}
 
-	/* group identity? */
-	if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GID) {
+	/* group identity? (ignore, if we already processed it as a user) */
+	if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GID && !(elp->el_flags & KAUTH_EXTLOOKUP_VALID_UID)) {
 		KAUTH_IDENTITY_LOCK();
 		TAILQ_FOREACH(kip, &kauth_identities, ki_link) {
 			/* matching record */
@@ -1074,6 +1266,19 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
 					kip->ki_valid |= KI_VALID_NTSID;
 				}
 				kip->ki_ntsid_expiry = tv.tv_sec + elp->el_gsid_valid;
+				if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM) {
+					const char *oname = kip->ki_name;
+					kip->ki_name = speculative_name;
+					speculative_name = NULL;
+					kip->ki_valid |= KI_VALID_GRNAM;
+					if (oname) {
+						/*
+						 * free oname (if any) outside
+						 * the lock
+						 */
+						speculative_name = oname;
+					}
+				}
 				kauth_identity_lru(kip);
 				if (rkip != NULL)
 					*rkip = *kip;
@@ -1088,16 +1293,24 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id
 			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GGUID) ? &elp->el_gguid : NULL,
 			    tv.tv_sec + elp->el_gguid_valid,
 			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) ? &elp->el_gsid : NULL,
-			    tv.tv_sec + elp->el_gsid_valid);
+			    tv.tv_sec + elp->el_gsid_valid,
+			    (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM) ? speculative_name : NULL,
+			    KI_VALID_GRNAM);
 			if (kip != NULL) {
 				if (rkip != NULL)
 					*rkip = *kip;
+				if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM)
+					speculative_name = NULL;
 				KAUTH_DEBUG("CACHE - learned %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid));
 				kauth_identity_register_and_free(kip);
 			}
 		}
 	}
 
+	/* If we have a name reference to drop, drop it here */
+	if (speculative_name != NULL) {
+		vfs_removename(speculative_name);
+	}
 }
 
 
@@ -1179,6 +1392,7 @@ kauth_identity_ntsid_expired(struct kauth_identity *kip)
  *
  * Parameters:	uid				UID to find
  *		kir				Pointer to return area
+ *		getname				Name buffer, if ki_name wanted
  *
  * Returns:	0				Found
  *		ENOENT				Not found
@@ -1187,7 +1401,7 @@ kauth_identity_ntsid_expired(struct kauth_identity *kip)
  *		*klr				Modified, if found
  */
 static int
-kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir)
+kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir, char *getname)
 {
 	struct kauth_identity *kip;
 
@@ -1197,6 +1411,9 @@ kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir)
 			kauth_identity_lru(kip);
 			/* Copy via structure assignment */
 			*kir = *kip;
+			/* If a name is wanted and one exists, copy it out */
+			if (getname != NULL && (kip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM)))
+				strlcpy(getname, kip->ki_name, MAXPATHLEN);
 			break;
 		}
 	}
@@ -1206,12 +1423,13 @@ kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir)
 
 
 /*
- * kauth_identity_find_uid
+ * kauth_identity_find_gid
  *
  * Description: Search for an entry by GID
  *
  * Parameters:	gid				GID to find
  *		kir				Pointer to return area
+ *		getname				Name buffer, if ki_name wanted
  *
  * Returns:	0				Found
  *		ENOENT				Not found
@@ -1220,7 +1438,7 @@ kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir)
  *		*klr				Modified, if found
  */
 static int
-kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir)
+kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir, char *getname)
 {
 	struct kauth_identity *kip;
 
@@ -1230,6 +1448,9 @@ kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir)
 			kauth_identity_lru(kip);
 			/* Copy via structure assignment */
 			*kir = *kip;
+			/* If a name is wanted and one exists, copy it out */
+			if (getname != NULL && (kip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM)))
+				strlcpy(getname, kip->ki_name, MAXPATHLEN);
 			break;
 		}
 	}
@@ -1245,6 +1466,7 @@ kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir)
  *
  * Parameters:	guidp				Pointer to GUID to find
  *		kir				Pointer to return area
+ *		getname				Name buffer, if ki_name wanted
  *
  * Returns:	0				Found
  *		ENOENT				Not found
@@ -1256,13 +1478,49 @@ kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir)
  *		may elect to call out to userland to revalidate.
  */
 static int
-kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir)
+kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir, char *getname)
 {
 	struct kauth_identity *kip;
 
 	KAUTH_IDENTITY_LOCK();
 	TAILQ_FOREACH(kip, &kauth_identities, ki_link) {
 		if ((kip->ki_valid & KI_VALID_GUID) && (kauth_guid_equal(guidp, &kip->ki_guid))) {
+			kauth_identity_lru(kip);
+			/* Copy via structure assignment */
+			*kir = *kip;
+			/* If a name is wanted and one exists, copy it out */
+			if (getname != NULL && (kip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM)))
+				strlcpy(getname, kip->ki_name, MAXPATHLEN);
+			break;
+		}
+	}
+	KAUTH_IDENTITY_UNLOCK();
+	return((kip == NULL) ? ENOENT : 0);
+}
+
+/*
+ * kauth_identity_find_nam
+ *
+ * Description:	Search for an entry by name
+ *
+ * Parameters:	name				Pointer to name to find
+ *		valid				KI_VALID_PWNAM or KI_VALID_GRNAM
+ *		kir				Pointer to return aread
+ *
+ * Returns:	0				Found
+ *		ENOENT				Not found
+ *
+ * Implicit returns:
+ *		*klr				Modified, if found
+ */
+static int
+kauth_identity_find_nam(char *name, int valid, struct kauth_identity *kir)
+{
+	struct kauth_identity *kip;
+
+	KAUTH_IDENTITY_LOCK();
+	TAILQ_FOREACH(kip, &kauth_identities, ki_link) {
+		if ((kip->ki_valid & valid) && !strcmp(name, kip->ki_name)) {
 			kauth_identity_lru(kip);
 			/* Copy via structure assignment */
 			*kir = *kip;
@@ -1281,6 +1539,7 @@ kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir)
  *
  * Parameters:	ntsid				Pointer to NTSID to find
  *		kir				Pointer to return area
+ *		getname				Name buffer, if ki_name wanted
  *
  * Returns:	0				Found
  *		ENOENT				Not found
@@ -1292,7 +1551,7 @@ kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir)
  *		may elect to call out to userland to revalidate.
  */
 static int
-kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir)
+kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir, char *getname)
 {
 	struct kauth_identity *kip;
 
@@ -1302,6 +1561,9 @@ kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir)
 			kauth_identity_lru(kip);
 			/* Copy via structure assignment */
 			*kir = *kip;
+			/* If a name is wanted and one exists, copy it out */
+			if (getname != NULL && (kip->ki_valid & (KI_VALID_PWNAM | KI_VALID_GRNAM)))
+				strlcpy(getname, kip->ki_name, MAXPATHLEN);
 			break;
 		}
 	}
@@ -1351,7 +1613,7 @@ int
 kauth_wellknown_guid(guid_t *guid)
 {
 	static char	fingerprint[] = {0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef};
-	int		code;
+	uint32_t		code;
 	/*
 	 * All WKGs begin with the same 12 bytes.
 	 */
@@ -1359,7 +1621,7 @@ kauth_wellknown_guid(guid_t *guid)
 		/*
 		 * The final 4 bytes are our code (in network byte order).
 		 */
-		code = OSSwapHostToBigInt32(*(u_int32_t *)&guid->g_guid[12]);
+		code = OSSwapHostToBigInt32(*(uint32_t *)&guid->g_guid[12]);
 		switch(code) {
 		case 0x0000000c:
 			return(KAUTH_WKG_EVERYBODY);
@@ -1445,16 +1707,17 @@ kauth_cred_change_egid(kauth_cred_t cred, gid_t new_egid)
 #if radar_4600026
 	int	is_member;
 #endif	/* radar_4600026 */
-	gid_t	old_egid = cred->cr_groups[0];
+	gid_t	old_egid = kauth_cred_getgid(cred);
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	/* Ignoring the first entry, scan for a match for the new egid */
-	for (i = 1; i < cred->cr_ngroups; i++) {
+	for (i = 1; i < pcred->cr_ngroups; i++) {
 		/*
 		 * If we find a match, swap them so we don't lose overall
 		 * group information
 		 */
-		if (cred->cr_groups[i] == new_egid) {
-			cred->cr_groups[i] = old_egid;
+		if (pcred->cr_groups[i] == new_egid) {
+			pcred->cr_groups[i] = old_egid;
 			DEBUG_CRED_CHANGE("kauth_cred_change_egid: unset displaced\n");
 			displaced = 0;
 			break;
@@ -1480,7 +1743,7 @@ conservative approach (i.e. less likely to cause things to break).
 	 *
 	 * NB:	This is typically a cold code path.
 	 */
-	if (displaced && !(cred->cr_flags & CRF_NOMEMBERD) &&
+	if (displaced && !(pcred->cr_flags & CRF_NOMEMBERD) &&
 	    kauth_cred_ismember_gid(cred, new_egid, &is_member) == 0 &&
 	    is_member) {
 	    	displaced = 0;
@@ -1489,7 +1752,7 @@ conservative approach (i.e. less likely to cause things to break).
 #endif	/* radar_4600026 */
 
 	/* set the new EGID into the old spot */
-	cred->cr_groups[0] = new_egid;
+	pcred->cr_groups[0] = new_egid;
 
 	return (displaced);
 }
@@ -1508,7 +1771,41 @@ uid_t
 kauth_cred_getuid(kauth_cred_t cred)
 {
 	NULLCRED_CHECK(cred);
-	return(cred->cr_uid);
+	return(posix_cred_get(cred)->cr_uid);
+}
+
+
+/*
+ * kauth_cred_getruid
+ *
+ * Description:	Fetch RUID from credential
+ *
+ * Parameters:	cred				Credential to examine
+ *
+ * Returns:	(uid_t)				RUID associated with credential
+ */
+uid_t
+kauth_cred_getruid(kauth_cred_t cred)
+{
+	NULLCRED_CHECK(cred);
+	return(posix_cred_get(cred)->cr_ruid);
+}
+
+
+/*
+ * kauth_cred_getsvuid
+ *
+ * Description:	Fetch SVUID from credential
+ *
+ * Parameters:	cred				Credential to examine
+ *
+ * Returns:	(uid_t)				SVUID associated with credential
+ */
+uid_t
+kauth_cred_getsvuid(kauth_cred_t cred)
+{
+	NULLCRED_CHECK(cred);
+	return(posix_cred_get(cred)->cr_svuid);
 }
 
 
@@ -1521,11 +1818,139 @@ kauth_cred_getuid(kauth_cred_t cred)
  *
  * Returns:	(gid_t)				GID associated with credential
  */
-uid_t
+gid_t
 kauth_cred_getgid(kauth_cred_t cred)
 {
 	NULLCRED_CHECK(cred);
-	return(cred->cr_gid);
+	return(posix_cred_get(cred)->cr_gid);
+}
+
+
+/*
+ * kauth_cred_getrgid
+ *
+ * Description:	Fetch RGID from credential
+ *
+ * Parameters:	cred				Credential to examine
+ *
+ * Returns:	(gid_t)				RGID associated with credential
+ */
+gid_t
+kauth_cred_getrgid(kauth_cred_t cred)
+{
+	NULLCRED_CHECK(cred);
+	return(posix_cred_get(cred)->cr_rgid);
+}
+
+
+/*
+ * kauth_cred_getsvgid
+ *
+ * Description:	Fetch SVGID from credential
+ *
+ * Parameters:	cred				Credential to examine
+ *
+ * Returns:	(gid_t)				SVGID associated with credential
+ */
+gid_t
+kauth_cred_getsvgid(kauth_cred_t cred)
+{
+	NULLCRED_CHECK(cred);
+	return(posix_cred_get(cred)->cr_svgid);
+}
+
+
+/*
+ * kauth_cred_guid2pwnam
+ *
+ * Description:	Fetch PWNAM from GUID
+ *
+ * Parameters:	guidp				Pointer to GUID to examine
+ *		pwnam				Pointer to user@domain buffer
+ *
+ * Returns:	0				Success
+ *	kauth_cred_cache_lookup:EINVAL
+ *
+ * Implicit returns:
+ *		*pwnam				Modified, if successful
+ *
+ * Notes:	pwnam is assumed to point to a buffer of MAXPATHLEN in size
+ */
+int
+kauth_cred_guid2pwnam(guid_t *guidp, char *pwnam)
+{
+	return(kauth_cred_cache_lookup(KI_VALID_GUID, KI_VALID_PWNAM, guidp, pwnam));
+}
+
+
+/*
+ * kauth_cred_guid2grnam
+ *
+ * Description:	Fetch GRNAM from GUID
+ *
+ * Parameters:	guidp				Pointer to GUID to examine
+ *		grnam				Pointer to group@domain buffer
+ *
+ * Returns:	0				Success
+ *	kauth_cred_cache_lookup:EINVAL
+ *
+ * Implicit returns:
+ *		*grnam				Modified, if successful
+ *
+ * Notes:	grnam is assumed to point to a buffer of MAXPATHLEN in size
+ */
+int
+kauth_cred_guid2grnam(guid_t *guidp, char *grnam)
+{
+	return(kauth_cred_cache_lookup(KI_VALID_GUID, KI_VALID_GRNAM, guidp, grnam));
+}
+
+
+/*
+ * kauth_cred_pwnam2guid
+ *
+ * Description:	Fetch PWNAM from GUID
+ *
+ * Parameters:	pwnam				String containing user@domain
+ *		guidp				Pointer to buffer for GUID
+ *
+ * Returns:	0				Success
+ *	kauth_cred_cache_lookup:EINVAL
+ *
+ * Implicit returns:
+ *		*guidp				Modified, if successful
+ *
+ * Notes:	pwnam should not point to a request larger than MAXPATHLEN
+ *		bytes in size, including the NUL termination of the string.
+ */
+int
+kauth_cred_pwnam2guid(char *pwnam, guid_t *guidp)
+{
+	return(kauth_cred_cache_lookup(KI_VALID_PWNAM, KI_VALID_GUID, pwnam, guidp));
+}
+
+
+/*
+ * kauth_cred_grnam2guid
+ *
+ * Description:	Fetch GRNAM from GUID
+ *
+ * Parameters:	grnam				String containing group@domain
+ *		guidp				Pointer to buffer for GUID
+ *
+ * Returns:	0				Success
+ *	kauth_cred_cache_lookup:EINVAL
+ *
+ * Implicit returns:
+ *		*guidp				Modified, if successful
+ *
+ * Notes:	grnam should not point to a request larger than MAXPATHLEN
+ *		bytes in size, including the NUL termination of the string.
+ */
+int
+kauth_cred_grnam2guid(char *grnam, guid_t *guidp)
+{
+	return(kauth_cred_cache_lookup(KI_VALID_GRNAM, KI_VALID_GUID, grnam, guidp));
 }
 
 
@@ -1806,27 +2231,40 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 	struct kauth_identity ki;
 	struct kauth_identity_extlookup el;
 	int error;
+	uint64_t extend_data = 0ULL;
 	int (* expired)(struct kauth_identity *kip);
+	char *namebuf = NULL;
 
 	KAUTH_DEBUG("CACHE - translate %d to %d", from, to);
 	
 	/*
 	 * Look for an existing cache entry for this association.
 	 * If the entry has not expired, return the cached information.
+	 * We do not cache user@domain translations here; they use too
+	 * much memory to hold onto forever, and can not be updated
+	 * atomically.
 	 */
+	if (to == KI_VALID_PWNAM || to == KI_VALID_GRNAM) {
+		namebuf = dst;
+	}
 	ki.ki_valid = 0;
 	switch(from) {
 	case KI_VALID_UID:
-		error = kauth_identity_find_uid(*(uid_t *)src, &ki);
+		error = kauth_identity_find_uid(*(uid_t *)src, &ki, namebuf);
 		break;
 	case KI_VALID_GID:
-		error = kauth_identity_find_gid(*(gid_t *)src, &ki);
+		error = kauth_identity_find_gid(*(gid_t *)src, &ki, namebuf);
 		break;
 	case KI_VALID_GUID:
-		error = kauth_identity_find_guid((guid_t *)src, &ki);
+		error = kauth_identity_find_guid((guid_t *)src, &ki, namebuf);
 		break;
 	case KI_VALID_NTSID:
-		error = kauth_identity_find_ntsid((ntsid_t *)src, &ki);
+		error = kauth_identity_find_ntsid((ntsid_t *)src, &ki, namebuf);
+		break;
+	case KI_VALID_PWNAM:
+	case KI_VALID_GRNAM:
+		/* Names are unique in their 'from' space */
+		error = kauth_identity_find_nam((char *)src, from, &ki);
 		break;
 	default:
 		return(EINVAL);
@@ -1862,7 +2300,7 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 					expired = NULL;
 				}
 			}
-			KAUTH_DEBUG("CACHE - found matching entry with valid %d", ki.ki_valid);
+			KAUTH_DEBUG("CACHE - found matching entry with valid 0x%08x", ki.ki_valid);
 			/*
 			 * If no expiry function, or not expired, we have found
 			 * a hit.
@@ -1882,13 +2320,33 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 			 * a better-than nothing alternative.
 			 */
 			KAUTH_DEBUG("CACHE - expired entry found");
+		} else {
+			/*
+			 * A guid can't both match a uid and a gid, so if we
+			 * found a cache entry while looking for one or the
+			 * other from a guid, the 'from' is KI_VALID_GUID,
+			 * and the 'to' is one, and the other one is valid,
+			 * then we immediately return ENOENT without calling
+			 * the resolver again.
+			 */
+			if (from == KI_VALID_GUID &&
+			    (((ki.ki_valid & KI_VALID_UID) &&
+			      to == KI_VALID_GID) ||
+			     ((ki.ki_valid & KI_VALID_GID) &&
+			      to == KI_VALID_UID))) {
+				return (ENOENT);
+			}
 		}
 	}
 
 	/*
 	 * We failed to find a cache entry; call the resolver.
 	 *
-	 * Note:	We ask for as much data as we can get.
+	 * Note:	We ask for as much non-extended data as we can get,
+	 *		and only provide (or ask for) extended information if
+	 *		we have a 'from' (or 'to') which requires it.  This
+	 *		way we don't pay for the extra transfer overhead for
+	 *		data we don't need.
 	 */
 	bzero(&el, sizeof(el));
 	el.el_info_pid = current_proc()->p_pid;
@@ -1911,6 +2369,16 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 		el.el_usid = *(ntsid_t *)src;
 		el.el_gsid = *(ntsid_t *)src;
 		break;
+	case KI_VALID_PWNAM:
+		/* extra overhead */
+		el.el_flags = KAUTH_EXTLOOKUP_VALID_PWNAM;
+		extend_data = CAST_USER_ADDR_T(src);
+		break;
+	case KI_VALID_GRNAM:
+		/* extra overhead */
+		el.el_flags = KAUTH_EXTLOOKUP_VALID_GRNAM;
+		extend_data = CAST_USER_ADDR_T(src);
+		break;
 	default:
 		return(EINVAL);
 	}
@@ -1926,25 +2394,53 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst)
 	el.el_flags |= KAUTH_EXTLOOKUP_WANT_UID | KAUTH_EXTLOOKUP_WANT_GID |
 	    KAUTH_EXTLOOKUP_WANT_UGUID | KAUTH_EXTLOOKUP_WANT_GGUID |
 	    KAUTH_EXTLOOKUP_WANT_USID | KAUTH_EXTLOOKUP_WANT_GSID;
+	if (to == KI_VALID_PWNAM) {
+		/* extra overhead */
+		el.el_flags |= KAUTH_EXTLOOKUP_WANT_PWNAM;
+		extend_data = CAST_USER_ADDR_T(dst);
+	}
+	if (to == KI_VALID_GRNAM) {
+		/* extra overhead */
+		el.el_flags |= KAUTH_EXTLOOKUP_WANT_GRNAM;
+		extend_data = CAST_USER_ADDR_T(dst);
+	}
+
+	/* Call resolver */
 	KAUTH_DEBUG("CACHE - calling resolver for %x", el.el_flags);
-	error = kauth_resolver_submit(&el);
+	error = kauth_resolver_submit(&el, extend_data);
 	KAUTH_DEBUG("CACHE - resolver returned %d", error);
-	/* was the lookup successful? */
+
+	/* was the external lookup successful? */
 	if (error == 0) {
 		/*
-		 * Save the results from the lookup - may have other
-		 * information even if we didn't get a guid.
+		 * Save the results from the lookup - we may have other
+		 * information, even if we didn't get a guid or the
+		 * extended data.
+		 *
+		 * If we came from a name, we know the extend_data is valid.
+		 */
+		if (from == KI_VALID_PWNAM)
+			el.el_flags |= KAUTH_EXTLOOKUP_VALID_PWNAM;
+		else if (from == KI_VALID_GRNAM)
+			el.el_flags |= KAUTH_EXTLOOKUP_VALID_GRNAM;
+
+		kauth_identity_updatecache(&el, &ki, extend_data);
+
+		/*
+		 * Check to see if we have a valid cache entry
+		 * originating from the result.
 		 */
-		kauth_identity_updatecache(&el, &ki);
+		if (!(ki.ki_valid & to)) {
+			error = ENOENT;
+		}
 	}
-	/*
-	 * Check to see if we have a valid result.
-	 */
-	if (!error && !(ki.ki_valid & to))
-		error = ENOENT;
 	if (error)
 		return(error);
 found:
+	/*
+	 * Copy from the appropriate struct kauth_identity cache entry
+	 * structure into the destination buffer area.
+	 */
 	switch(to) {
 	case KI_VALID_UID:
 		*(uid_t *)dst = ki.ki_uid;
@@ -1958,6 +2454,10 @@ found:
 	case KI_VALID_NTSID:
 		*(ntsid_t *)dst = ki.ki_ntsid;
 		break;
+	case KI_VALID_PWNAM:
+	case KI_VALID_GRNAM:
+		/* handled in kauth_resolver_complete() */
+		break;
 	default:
 		return(EINVAL);
 	}
@@ -2190,6 +2690,7 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el)
 int
 kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 {
+	posix_cred_t pcred = posix_cred_get(cred);
 	struct kauth_group_membership *gm;
 	struct kauth_identity_extlookup el;
 	int i, error;
@@ -2200,8 +2701,8 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 	 * We can conditionalise this on cred->cr_gmuid == KAUTH_UID_NONE since
 	 * the cache should be used for that case.
 	 */
-	for (i = 0; i < cred->cr_ngroups; i++) {
-		if (gid == cred->cr_groups[i]) {
+	for (i = 0; i < pcred->cr_ngroups; i++) {
+		if (gid == pcred->cr_groups[i]) {
 			*resultp = 1;
 			return(0);
 		}
@@ -2211,7 +2712,7 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 	 * If we don't have a UID for group membership checks, the in-cred list
 	 * was authoritative and we can stop here.
 	 */
-	if (cred->cr_gmuid == KAUTH_UID_NONE) {
+	if (pcred->cr_gmuid == KAUTH_UID_NONE) {
 		*resultp = 0;
 		return(0);
 	}
@@ -2236,7 +2737,7 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 	 */
 	KAUTH_GROUPS_LOCK();
 	TAILQ_FOREACH(gm, &kauth_groups, gm_link) {
-		if ((gm->gm_uid == cred->cr_gmuid) && (gm->gm_gid == gid) && !kauth_groups_expired(gm)) {
+		if ((gm->gm_uid == pcred->cr_gmuid) && (gm->gm_gid == gid) && !kauth_groups_expired(gm)) {
 			kauth_groups_lru(gm);
 			break;
 		}
@@ -2255,10 +2756,10 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
 	bzero(&el, sizeof(el));
 	el.el_info_pid = current_proc()->p_pid;
 	el.el_flags = KAUTH_EXTLOOKUP_VALID_UID | KAUTH_EXTLOOKUP_VALID_GID | KAUTH_EXTLOOKUP_WANT_MEMBERSHIP;
-	el.el_uid = cred->cr_gmuid;
+	el.el_uid = pcred->cr_gmuid;
 	el.el_gid = gid;
 	el.el_member_valid = 0;		/* XXX set by resolver? */
-	error = kauth_resolver_submit(&el);
+	error = kauth_resolver_submit(&el, 0ULL);
 	if (error != 0)
 		return(error);
 	/* save the results from the lookup */
@@ -2332,7 +2833,7 @@ kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp)
 		 * this is expected to be a common case.
 		 */
 		ki.ki_valid = 0;
-		if ((error = kauth_identity_find_guid(guidp, &ki)) == 0 &&
+		if ((error = kauth_identity_find_guid(guidp, &ki, NULL)) == 0 &&
 		    !kauth_identity_guid_expired(&ki)) {
 			if (ki.ki_valid & KI_VALID_GID) {
 				/* It's a group after all... */
@@ -2395,38 +2896,40 @@ kauth_cred_gid_subset(kauth_cred_t cred1, kauth_cred_t cred2, int *resultp)
 {
 	int i, err, res = 1;
 	gid_t gid;
+	posix_cred_t pcred1 = posix_cred_get(cred1);
+	posix_cred_t pcred2 = posix_cred_get(cred2);
 
 	/* First, check the local list of groups */
-	for (i = 0; i < cred1->cr_ngroups; i++) {
-		gid = cred1->cr_groups[i];
+	for (i = 0; i < pcred1->cr_ngroups; i++) {
+		gid = pcred1->cr_groups[i];
 		if ((err = kauth_cred_ismember_gid(cred2, gid, &res)) != 0) {
 			return err;
 		}
 
-		if (!res && gid != cred2->cr_rgid && gid != cred2->cr_svgid) {
+		if (!res && gid != pcred2->cr_rgid && gid != pcred2->cr_svgid) {
 			*resultp = 0;
 			return 0;
 		}
 	}
 
 	/* Check real gid */
-	if ((err = kauth_cred_ismember_gid(cred2, cred1->cr_rgid, &res)) != 0) {
+	if ((err = kauth_cred_ismember_gid(cred2, pcred1->cr_rgid, &res)) != 0) {
 		return err;
 	}
 
-	if (!res && cred1->cr_rgid != cred2->cr_rgid &&
-			cred1->cr_rgid != cred2->cr_svgid) {
+	if (!res && pcred1->cr_rgid != pcred2->cr_rgid &&
+			pcred1->cr_rgid != pcred2->cr_svgid) {
 		*resultp = 0;
 		return 0;
 	}
 
 	/* Finally, check saved gid */
-	if ((err = kauth_cred_ismember_gid(cred2, cred1->cr_svgid, &res)) != 0){
+	if ((err = kauth_cred_ismember_gid(cred2, pcred1->cr_svgid, &res)) != 0){
 		return err;
 	}
 
-	if (!res && cred1->cr_svgid != cred2->cr_rgid &&
-			cred1->cr_svgid != cred2->cr_svgid) {
+	if (!res && pcred1->cr_svgid != pcred2->cr_rgid &&
+			pcred1->cr_svgid != pcred2->cr_svgid) {
 		*resultp = 0;
 		return 0;
 	}
@@ -2453,7 +2956,7 @@ kauth_cred_gid_subset(kauth_cred_t cred1, kauth_cred_t cred2, int *resultp)
 int
 kauth_cred_issuser(kauth_cred_t cred)
 {
-	return(cred->cr_uid == 0);
+	return(kauth_cred_getuid(cred) == 0);
 }
 
 
@@ -2536,7 +3039,7 @@ kauth_cred_init(void)
 uid_t
 kauth_getuid(void)
 {
-	return(kauth_cred_get()->cr_uid);
+	return(kauth_cred_getuid(kauth_cred_get()));
 }
 
 
@@ -2553,7 +3056,7 @@ kauth_getuid(void)
 uid_t
 kauth_getruid(void)
 {
-	return(kauth_cred_get()->cr_ruid);
+	return(kauth_cred_getruid(kauth_cred_get()));
 }
 
 
@@ -2570,7 +3073,7 @@ kauth_getruid(void)
 gid_t
 kauth_getgid(void)
 {
-	return(kauth_cred_get()->cr_groups[0]);
+	return(kauth_cred_getgid(kauth_cred_get()));
 }
 
 
@@ -2587,7 +3090,7 @@ kauth_getgid(void)
 gid_t
 kauth_getrgid(void)
 {
-	return(kauth_cred_get()->cr_rgid);
+	return(kauth_cred_getrgid(kauth_cred_get()));
 }
 
 
@@ -2823,13 +3326,12 @@ kauth_cred_alloc(void)
 	
 	MALLOC_ZONE(newcred, kauth_cred_t, sizeof(*newcred), M_CRED, M_WAITOK);
 	if (newcred != 0) {
+		posix_cred_t newpcred = posix_cred_get(newcred);
 		bzero(newcred, sizeof(*newcred));
 		newcred->cr_ref = 1;
-		newcred->cr_audit.as_aia_p = &audit_default_aia;
-		/* XXX the following will go away with cr_au */
-		newcred->cr_au.ai_auid = AU_DEFAUDITID;
+		newcred->cr_audit.as_aia_p = audit_default_aia_p;
 		/* must do this, or cred has same group membership as uid 0 */
-		newcred->cr_gmuid = KAUTH_UID_NONE;
+		newpcred->cr_gmuid = KAUTH_UID_NONE;
 #if CRED_DIAGNOSTIC
 	} else {
 		panic("kauth_cred_alloc: couldn't allocate credential");
@@ -2878,12 +3380,13 @@ kauth_cred_t
 kauth_cred_create(kauth_cred_t cred)
 {
 	kauth_cred_t 	found_cred, new_cred = NULL;
+	posix_cred_t	pcred = posix_cred_get(cred);
 	int is_member = 0;
 
 	KAUTH_CRED_HASH_LOCK_ASSERT();
 
-	if (cred->cr_flags & CRF_NOMEMBERD) {
-		cred->cr_gmuid = KAUTH_UID_NONE;
+	if (pcred->cr_flags & CRF_NOMEMBERD) {
+		pcred->cr_gmuid = KAUTH_UID_NONE;
 	} else {
 		/*
 		 * If the template credential is not opting out of external
@@ -2902,7 +3405,7 @@ kauth_cred_create(kauth_cred_t cred)
 			 * the answer, so long as it's something the external
 			 * resolver could have vended.
 			 */
-			cred->cr_gmuid = cred->cr_uid;
+			pcred->cr_gmuid = pcred->cr_uid;
 		} else {
 			/*
 			 * It's not something the external resolver could
@@ -2913,13 +3416,13 @@ kauth_cred_create(kauth_cred_t cred)
 			 * cost.  Since most credentials are used multiple
 			 * times, we still get some performance win from this.
 			 */
-			cred->cr_gmuid = KAUTH_UID_NONE;
-			cred->cr_flags |= CRF_NOMEMBERD;
+			pcred->cr_gmuid = KAUTH_UID_NONE;
+			pcred->cr_flags |= CRF_NOMEMBERD;
 		}
 	}
 
 	/* Caller *must* specify at least the egid in cr_groups[0] */
-	if (cred->cr_ngroups < 1)
+	if (pcred->cr_ngroups < 1)
 		return(NULL);
 	
 	for (;;) {
@@ -2943,22 +3446,20 @@ kauth_cred_create(kauth_cred_t cred)
 		new_cred = kauth_cred_alloc();
 		if (new_cred != NULL) {
 			int		err;
-			new_cred->cr_uid = cred->cr_uid;
-			new_cred->cr_ruid = cred->cr_ruid;
-			new_cred->cr_svuid = cred->cr_svuid;
-			new_cred->cr_rgid = cred->cr_rgid;
-			new_cred->cr_svgid = cred->cr_svgid;
-			new_cred->cr_gmuid = cred->cr_gmuid;
-			new_cred->cr_ngroups = cred->cr_ngroups;	
-			bcopy(&cred->cr_groups[0], &new_cred->cr_groups[0], sizeof(new_cred->cr_groups));
+			posix_cred_t	new_pcred = posix_cred_get(new_cred);
+			new_pcred->cr_uid = pcred->cr_uid;
+			new_pcred->cr_ruid = pcred->cr_ruid;
+			new_pcred->cr_svuid = pcred->cr_svuid;
+			new_pcred->cr_rgid = pcred->cr_rgid;
+			new_pcred->cr_svgid = pcred->cr_svgid;
+			new_pcred->cr_gmuid = pcred->cr_gmuid;
+			new_pcred->cr_ngroups = pcred->cr_ngroups;	
+			bcopy(&pcred->cr_groups[0], &new_pcred->cr_groups[0], sizeof(new_pcred->cr_groups));
 #if CONFIG_AUDIT
 			bcopy(&cred->cr_audit, &new_cred->cr_audit, 
 			    sizeof(new_cred->cr_audit));
-			/* XXX the following bcopy() will go away with cr_au */
-			bcopy(&cred->cr_au, &new_cred->cr_au,
-			    sizeof(new_cred->cr_au));
 #endif
-			new_cred->cr_flags = cred->cr_flags;
+			new_pcred->cr_flags = pcred->cr_flags;
 			
 			KAUTH_CRED_HASH_LOCK();
 			err = kauth_cred_add(new_cred);
@@ -3017,6 +3518,8 @@ kauth_cred_t
 kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid_t gmuid)
 {
 	struct ucred temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	NULLCRED_CHECK(cred);
 
@@ -3024,10 +3527,10 @@ kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid
 	 * We don't need to do anything if the UIDs we are changing are
 	 * already the same as the UIDs passed in
 	 */
-	if ((euid == KAUTH_UID_NONE || cred->cr_uid == euid) &&
-	    (ruid == KAUTH_UID_NONE || cred->cr_ruid == ruid) &&
-	    (svuid == KAUTH_UID_NONE || cred->cr_svuid == svuid) &&
-	    (cred->cr_gmuid == gmuid)) {
+	if ((euid == KAUTH_UID_NONE || pcred->cr_uid == euid) &&
+	    (ruid == KAUTH_UID_NONE || pcred->cr_ruid == ruid) &&
+	    (svuid == KAUTH_UID_NONE || pcred->cr_svuid == svuid) &&
+	    (pcred->cr_gmuid == gmuid)) {
 		/* no change needed */
 		return(cred);
 	}
@@ -3038,13 +3541,13 @@ kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid
 	 */
 	bcopy(cred, &temp_cred, sizeof(temp_cred));
 	if (euid != KAUTH_UID_NONE) {
-		temp_cred.cr_uid = euid;
+		temp_pcred->cr_uid = euid;
 	}
 	if (ruid != KAUTH_UID_NONE) {
-		temp_cred.cr_ruid = ruid;
+		temp_pcred->cr_ruid = ruid;
 	}
 	if (svuid != KAUTH_UID_NONE) {
-		temp_cred.cr_svuid = svuid;
+		temp_pcred->cr_svuid = svuid;
 	}
 
 	/*
@@ -3052,8 +3555,8 @@ kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid
 	 * opt out of participation in external group resolution, unless we
 	 * unless we explicitly opt back in later.
 	 */
-	if ((temp_cred.cr_gmuid = gmuid) == KAUTH_UID_NONE) {
-		temp_cred.cr_flags |= CRF_NOMEMBERD;
+	if ((temp_pcred->cr_gmuid = gmuid) == KAUTH_UID_NONE) {
+		temp_pcred->cr_flags |= CRF_NOMEMBERD;
 	}
 
 	return(kauth_cred_update(cred, &temp_cred, TRUE));
@@ -3090,6 +3593,8 @@ kauth_cred_t
 kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid)
 {
 	struct ucred 	temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	NULLCRED_CHECK(cred);
 	DEBUG_CRED_ENTER("kauth_cred_setresgid %p %d %d %d\n", cred, rgid, egid, svgid);
@@ -3098,9 +3603,9 @@ kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid)
 	 * We don't need to do anything if the given GID are already the 
 	 * same as the GIDs in the credential.
 	 */
-	if (cred->cr_groups[0] == egid &&
-	    cred->cr_rgid == rgid &&
-	    cred->cr_svgid == svgid) {
+	if (pcred->cr_groups[0] == egid &&
+	    pcred->cr_rgid == rgid &&
+	    pcred->cr_svgid == svgid) {
 		/* no change needed */
 		return(cred);
 	}
@@ -3114,17 +3619,17 @@ kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid)
 		/* displacing a supplementary group opts us out of memberd */
 		if (kauth_cred_change_egid(&temp_cred, egid)) {
 			DEBUG_CRED_CHANGE("displaced!\n");
-			temp_cred.cr_flags |= CRF_NOMEMBERD;
-			temp_cred.cr_gmuid = KAUTH_UID_NONE;
+			temp_pcred->cr_flags |= CRF_NOMEMBERD;
+			temp_pcred->cr_gmuid = KAUTH_UID_NONE;
 		} else {
 			DEBUG_CRED_CHANGE("not displaced\n");
 		}
 	}
 	if (rgid != KAUTH_GID_NONE) {
-		temp_cred.cr_rgid = rgid;
+		temp_pcred->cr_rgid = rgid;
 	}
 	if (svgid != KAUTH_GID_NONE) {
-		temp_cred.cr_svgid = svgid;
+		temp_pcred->cr_svgid = svgid;
 	}
 
 	return(kauth_cred_update(cred, &temp_cred, TRUE));
@@ -3185,16 +3690,20 @@ kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmu
 {
 	int		i;
 	struct ucred temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred;
 
 	NULLCRED_CHECK(cred);
 
+	pcred = posix_cred_get(cred);
+
 	/*
 	 * We don't need to do anything if the given list of groups does not
 	 * change.
 	 */
-	if ((cred->cr_gmuid == gmuid) && (cred->cr_ngroups == groupcount)) {
+	if ((pcred->cr_gmuid == gmuid) && (pcred->cr_ngroups == groupcount)) {
 		for (i = 0; i < groupcount; i++) {
-			if (cred->cr_groups[i] != groups[i])
+			if (pcred->cr_groups[i] != groups[i])
 				break;
 		}
 		if (i == groupcount) {
@@ -3211,17 +3720,46 @@ kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmu
 	 * using initgroups().  This is required for POSIX conformance.
 	 */
 	bcopy(cred, &temp_cred, sizeof(temp_cred));
-	temp_cred.cr_ngroups = groupcount;
-	bcopy(groups, temp_cred.cr_groups, sizeof(temp_cred.cr_groups));
-	temp_cred.cr_gmuid = gmuid;
+	temp_pcred->cr_ngroups = groupcount;
+	bcopy(groups, temp_pcred->cr_groups, sizeof(temp_pcred->cr_groups));
+	temp_pcred->cr_gmuid = gmuid;
 	if (gmuid == KAUTH_UID_NONE)
-		temp_cred.cr_flags |= CRF_NOMEMBERD;
+		temp_pcred->cr_flags |= CRF_NOMEMBERD;
 	else
-		temp_cred.cr_flags &= ~CRF_NOMEMBERD;
+		temp_pcred->cr_flags &= ~CRF_NOMEMBERD;
 
 	return(kauth_cred_update(cred, &temp_cred, TRUE));
 }
 
+/*
+ * XXX temporary, for NFS support until we can come up with a better
+ * XXX enumeration/comparison mechanism
+ *
+ * Notes:	The return value exists to account for the possbility of a
+ *		kauth_cred_t without a POSIX label.  This will be the case in
+ *		the future (see posix_cred_get() below, for more details).
+ */
+int
+kauth_cred_getgroups(kauth_cred_t cred, gid_t *grouplist, int *countp)
+{
+	int limit = NGROUPS;
+
+	/*
+	 * If they just want a copy of the groups list, they may not care
+	 * about the actual count.  If they specify an input count, however,
+	 * treat it as an indicator of the buffer size available in grouplist,
+	 * and limit the returned list to that size.
+	 */
+	if (countp) {
+		limit = MIN(*countp, cred->cr_posix.cr_ngroups);
+		*countp = limit;
+	}
+
+	memcpy(grouplist, cred->cr_posix.cr_groups, sizeof(gid_t) * limit);
+
+	return 0;
+}
+
 
 /*
  * kauth_cred_setuidgid
@@ -3262,15 +3800,19 @@ kauth_cred_t
 kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid)
 {
 	struct ucred temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred;
 
 	NULLCRED_CHECK(cred);
 
+	pcred = posix_cred_get(cred);
+
 	/*
 	 * We don't need to do anything if the effective, real and saved
 	 * user IDs are already the same as the user ID passed into us.
 	 */
-	if (cred->cr_uid == uid && cred->cr_ruid == uid && cred->cr_svuid == uid &&
-		cred->cr_groups[0] == gid && cred->cr_rgid == gid && cred->cr_svgid == gid) {
+	if (pcred->cr_uid == uid && pcred->cr_ruid == uid && pcred->cr_svuid == uid &&
+		pcred->cr_gid == gid && pcred->cr_rgid == gid && pcred->cr_svgid == gid) {
 		/* no change needed */
 		return(cred);
 	}
@@ -3280,26 +3822,26 @@ kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid)
 	 * with the new values.
 	 */
 	bzero(&temp_cred, sizeof(temp_cred));
-	temp_cred.cr_uid = uid;
-	temp_cred.cr_ruid = uid;
-	temp_cred.cr_svuid = uid;
-	temp_cred.cr_flags = cred->cr_flags;
+	temp_pcred->cr_uid = uid;
+	temp_pcred->cr_ruid = uid;
+	temp_pcred->cr_svuid = uid;
+	temp_pcred->cr_flags = pcred->cr_flags;
 	/* inherit the opt-out of memberd */
-	if (cred->cr_flags & CRF_NOMEMBERD) {
-		temp_cred.cr_gmuid = KAUTH_UID_NONE;
-		temp_cred.cr_flags |= CRF_NOMEMBERD;
+	if (pcred->cr_flags & CRF_NOMEMBERD) {
+		temp_pcred->cr_gmuid = KAUTH_UID_NONE;
+		temp_pcred->cr_flags |= CRF_NOMEMBERD;
 	} else {
-		temp_cred.cr_gmuid = uid;
-		temp_cred.cr_flags &= ~CRF_NOMEMBERD;
+		temp_pcred->cr_gmuid = uid;
+		temp_pcred->cr_flags &= ~CRF_NOMEMBERD;
 	}
-	temp_cred.cr_ngroups = 1;
+	temp_pcred->cr_ngroups = 1;
 	/* displacing a supplementary group opts us out of memberd */
 	if (kauth_cred_change_egid(&temp_cred, gid)) {
-		temp_cred.cr_gmuid = KAUTH_UID_NONE;
-		temp_cred.cr_flags |= CRF_NOMEMBERD;
+		temp_pcred->cr_gmuid = KAUTH_UID_NONE;
+		temp_pcred->cr_flags |= CRF_NOMEMBERD;
 	}
-	temp_cred.cr_rgid = gid;
-	temp_cred.cr_svgid = gid;
+	temp_pcred->cr_rgid = gid;
+	temp_pcred->cr_svgid = gid;
 #if CONFIG_MACF
 	temp_cred.cr_label = cred->cr_label;
 #endif
@@ -3336,8 +3878,13 @@ kauth_cred_t
 kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid)
 {
 	struct ucred temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred;
 
 	NULLCRED_CHECK(cred);
+
+	pcred = posix_cred_get(cred);
+
 	DEBUG_CRED_ENTER("kauth_cred_setsvuidgid: %p u%d->%d g%d->%d\n", cred, cred->cr_svuid, uid, cred->cr_svgid, gid);
 
 	/*
@@ -3345,7 +3892,7 @@ kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid)
 	 * uids are already the same as the uid provided.  This check is
 	 * likely insufficient.
 	 */
-	if (cred->cr_svuid == uid && cred->cr_svgid == gid) {
+	if (pcred->cr_svuid == uid && pcred->cr_svgid == gid) {
 		/* no change needed */
 		return(cred);
 	}
@@ -3355,8 +3902,8 @@ kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid)
 	 * with new values.
 	 */
 	bcopy(cred, &temp_cred, sizeof(temp_cred));
-	temp_cred.cr_svuid = uid;
-	temp_cred.cr_svgid = gid;
+	temp_pcred->cr_svuid = uid;
+	temp_pcred->cr_svgid = gid;
 
 	return(kauth_cred_update(cred, &temp_cred, TRUE));
 }
@@ -3402,18 +3949,6 @@ kauth_cred_setauditinfo(kauth_cred_t cred, au_session_t *auditinfo_p)
 
 	bcopy(cred, &temp_cred, sizeof(temp_cred));
 	bcopy(auditinfo_p, &temp_cred.cr_audit, sizeof(temp_cred.cr_audit));
-	/* XXX the following will go away with cr_au */
-	temp_cred.cr_au.ai_auid = auditinfo_p->as_aia_p->ai_auid;
-	temp_cred.cr_au.ai_mask.am_success = 
-		auditinfo_p->as_mask.am_success;
-	temp_cred.cr_au.ai_mask.am_failure = 
-		auditinfo_p->as_mask.am_failure;
-	temp_cred.cr_au.ai_termid.port = 
-		auditinfo_p->as_aia_p->ai_termid.at_port;
-	temp_cred.cr_au.ai_termid.machine = 
-		auditinfo_p->as_aia_p->ai_termid.at_addr[0];
-	temp_cred.cr_au.ai_asid = auditinfo_p->as_aia_p->ai_asid;
-	/* XXX */
 
 	return(kauth_cred_update(cred, &temp_cred, FALSE));
 }
@@ -3560,6 +4095,9 @@ int kauth_proc_label_update(struct proc *p, struct label *label)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
+
 			mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
 			proc_unlock(p);
 		}
@@ -3635,6 +4173,8 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx,
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			mac_proc_set_enforce(p, MAC_ALL_ENFORCE);
 			proc_unlock(p);
 		}
@@ -3951,10 +4491,12 @@ kauth_cred_copy_real(kauth_cred_t cred)
 {
 	kauth_cred_t newcred = NULL, found_cred;
 	struct ucred temp_cred;
+	posix_cred_t temp_pcred = posix_cred_get(&temp_cred);
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	/* if the credential is already 'real', just take a reference */
-	if ((cred->cr_ruid == cred->cr_uid) &&
-	    (cred->cr_rgid == cred->cr_gid)) {
+	if ((pcred->cr_ruid == pcred->cr_uid) &&
+	    (pcred->cr_rgid == pcred->cr_gid)) {
 		kauth_cred_ref(cred);
 		return(cred);
 	}
@@ -3964,18 +4506,18 @@ kauth_cred_copy_real(kauth_cred_t cred)
 	 * with the new values.
 	 */
 	bcopy(cred, &temp_cred, sizeof(temp_cred));
-	temp_cred.cr_uid = cred->cr_ruid;
+	temp_pcred->cr_uid = pcred->cr_ruid;
 	/* displacing a supplementary group opts us out of memberd */
-	if (kauth_cred_change_egid(&temp_cred, cred->cr_rgid)) {
-		temp_cred.cr_flags |= CRF_NOMEMBERD;
-		temp_cred.cr_gmuid = KAUTH_UID_NONE;
+	if (kauth_cred_change_egid(&temp_cred, pcred->cr_rgid)) {
+		temp_pcred->cr_flags |= CRF_NOMEMBERD;
+		temp_pcred->cr_gmuid = KAUTH_UID_NONE;
 	}
 	/*
 	 * If the cred is not opted out, make sure we are using the r/euid
 	 * for group checks
 	 */
-	if (temp_cred.cr_gmuid != KAUTH_UID_NONE)
-		temp_cred.cr_gmuid = cred->cr_ruid;
+	if (temp_pcred->cr_gmuid != KAUTH_UID_NONE)
+		temp_pcred->cr_gmuid = pcred->cr_ruid;
 
 	for (;;) {
 		int		err;
@@ -4063,9 +4605,6 @@ kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t model_cred,
 	if (retain_auditinfo) {
 		bcopy(&old_cred->cr_audit, &model_cred->cr_audit, 
 		    sizeof(model_cred->cr_audit));
-		/* XXX following bcopy will go away with cr_au */
-		bcopy(&old_cred->cr_au, &model_cred->cr_au,
-		    sizeof(model_cred->cr_au));
 	}
 	
 	for (;;) {
@@ -4240,6 +4779,7 @@ kauth_cred_find(kauth_cred_t cred)
 {
 	u_long			hash_key;
 	kauth_cred_t	found_cred;
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	KAUTH_CRED_HASH_LOCK_ASSERT();
 
@@ -4258,23 +4798,26 @@ kauth_cred_find(kauth_cred_t cred)
 	/* Find cred in the credential hash table */
 	TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) {
 		boolean_t match;
+		posix_cred_t found_pcred = posix_cred_get(found_cred);
 
 		/*
 		 * don't worry about the label unless the flags in
 		 * either credential tell us to.
 		 */
-		if ((found_cred->cr_flags & CRF_MAC_ENFORCE) != 0 ||
-		    (cred->cr_flags & CRF_MAC_ENFORCE) != 0) {
+		if ((found_pcred->cr_flags & CRF_MAC_ENFORCE) != 0 ||
+		    (pcred->cr_flags & CRF_MAC_ENFORCE) != 0) {
 			/* include the label pointer in the compare */
-			match = (bcmp(&found_cred->cr_uid, &cred->cr_uid,
+			match = (bcmp(&found_pcred->cr_uid, &pcred->cr_uid,
 				 (sizeof(struct ucred) -
-				  offsetof(struct ucred, cr_uid))) == 0);
+				  offsetof(struct ucred, cr_posix))) == 0);
 		} else {
 			/* flags have to match, but skip the label in bcmp */
-			match = (found_cred->cr_flags == cred->cr_flags &&
-				 bcmp(&found_cred->cr_uid, &cred->cr_uid,
-				      (offsetof(struct ucred, cr_label) -
-				       offsetof(struct ucred, cr_uid))) == 0);
+			match = (found_pcred->cr_flags == pcred->cr_flags &&
+				 bcmp(&found_pcred->cr_uid, &pcred->cr_uid,
+				      sizeof(struct posix_cred)) == 0 &&
+			         bcmp(&found_cred->cr_audit, &cred->cr_audit,
+				      sizeof(cred->cr_audit)) == 0);
+
 		}
 		if (match) {
 			/* found a match */
@@ -4326,24 +4869,33 @@ kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key)
  *		not including the ref count or the TAILQ, which are mutable;
  *		everything else isn't.
  *
- *		We also avoid the label (if the flag is not set saying the
- *		label is actually enforced).
- *
  * Parameters:	cred				Credential for which hash is
  *						desired
  *
  * Returns:	(u_long)			Returned hash key
+ *
+ * Notes:	When actually moving the POSIX credential into a real label,
+ *		remember to update this hash computation.
  */
 static u_long
 kauth_cred_get_hashkey(kauth_cred_t cred)
 {
+	posix_cred_t pcred = posix_cred_get(cred);
 	u_long	hash_key = 0;
-	
-	hash_key = kauth_cred_hash((uint8_t *)&cred->cr_uid, 
-			((cred->cr_flags & CRF_MAC_ENFORCE) ? 
-			    sizeof(struct ucred) : offsetof(struct ucred, cr_label)) -
-			    offsetof(struct ucred, cr_uid),
-			hash_key);
+
+	if (pcred->cr_flags & CRF_MAC_ENFORCE) {
+		hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, 
+								   sizeof(struct ucred) - offsetof(struct ucred, cr_posix),
+								   hash_key);
+	} else {
+		/* skip label */
+		hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, 
+								   sizeof(struct posix_cred),
+								   hash_key);
+		hash_key = kauth_cred_hash((uint8_t *)&cred->cr_audit, 
+								   sizeof(struct au_session),
+								   hash_key);
+	}
 	return(hash_key);
 }
 
@@ -4691,3 +5243,226 @@ sysctl_dump_cred_backtraces( __unused struct sysctl_oid *oidp, __unused void *ar
 }
 
 #endif	/* KAUTH_CRED_HASH_DEBUG || DEBUG_CRED */
+
+
+/*
+ **********************************************************************
+ * The following routines will be moved to a policy_posix.c module at
+ * some future point.
+ **********************************************************************
+ */
+
+/*
+ * posix_cred_create
+ *
+ * Description:	Helper function to create a kauth_cred_t credential that is
+ *		initally labelled with a specific POSIX credential label
+ *
+ * Parameters:	pcred			The posix_cred_t to use as the initial
+ *					label value
+ *
+ * Returns:	(kauth_cred_t)		The credential that was found in the
+ *					hash or creates
+ *		NULL			kauth_cred_add() failed, or there was
+ *					no egid specified, or we failed to
+ *					attach a label to the new credential
+ *
+ * Notes:	This function currently wraps kauth_cred_create(), and is the
+ *		only consume of tht ill-fated function, apart from bsd_init().
+ *		It exists solely to support the NFS server code creation of
+ *		credentials based on the over-the-wire RPC cals containing
+ *		traditional POSIX credential information being tunneled to
+ *		the server host from the client machine.
+ *
+ *		In the future, we hope this function goes away.
+ *
+ *		In the short term, it creates a temporary credential, puts
+ *		the POSIX information from NFS into it, and then calls
+ *		kauth_cred_create(), as an internal implementaiton detail.
+ *
+ *		If we have to keep it around in the medium term, it will
+ *		create a new kauth_cred_t, then label it with a POSIX label
+ *		corresponding to the contents of the kauth_cred_t.  If the
+ *		policy_posix MACF module is not loaded, it will instead
+ *		substitute a posix_cred_t which GRANTS all access (effectively
+ *		a "root" credential) in order to not prevent NFS from working
+ *		in the case that we are not supporting POSIX credentials.
+ */
+kauth_cred_t
+posix_cred_create(posix_cred_t pcred)
+{
+	struct ucred temp_cred;
+
+	bzero(&temp_cred, sizeof(temp_cred));
+	temp_cred.cr_posix = *pcred;
+
+	return kauth_cred_create(&temp_cred);
+}
+
+
+/*
+ * posix_cred_get
+ *
+ * Description:	Given a kauth_cred_t, return the POSIX credential label, if
+ *		any, which is associated with it.
+ *
+ * Parameters:	cred			The credential to obtain the label from
+ *
+ * Returns:	posix_cred_t		The POSIX credential label
+ *
+ * Notes:	In the event that the policy_posix MACF module IS NOT loaded,
+ *		this function will return a pointer to a posix_cred_t which
+ *		GRANTS all access (effectively, a "root" credential).  This is
+ *		necessary to support legacy code which insists on tightly
+ *		integrating POSIX credentails into its APIs, including, but
+ *		not limited to, System V IPC mechanisms, POSIX IPC mechanisms,
+ *		NFSv3, signals, dtrace, and a large number of kauth routines
+ *		used to implement POSIX permissions related system calls.
+ *
+ *		In the event that the policy_posix MACF module IS loaded, and
+ *		there is no POSIX label on the kauth_cred_t credential, this
+ *		function will return a pointer to a posix_cred_t which DENIES
+ *		all access (effectively, a "deny rights granted by POSIX"
+ *		credential).  This is necessary to support the concept of a
+ *		transiently loaded POSIX policy, or kauth_cred_t credentials
+ *		which can not be used in conjunctions with POSIX permissions
+ *		checks.
+ *
+ *		This function currently returns the address of the cr_posix
+ *		field of the supplied kauth_cred_t credential, and as such
+ *		currently can not fail.  In the future, this will not be the
+ *		case.
+ */
+posix_cred_t
+posix_cred_get(kauth_cred_t cred)
+{
+	return(&cred->cr_posix);
+}
+
+
+/*
+ * posix_cred_label
+ *
+ * Description:	Label a kauth_cred_t with a POSIX credential label
+ *
+ * Parameters:	cred			The credential to label
+ *		pcred			The POSIX credential t label it with
+ *
+ * Returns:	(void)
+ *
+ * Notes:	This function is currently void in order to permit it to fit
+ *		in with the currrent MACF framework label methods which allow
+ *		labelling to fail silently.  This is like acceptable for
+ *		mandatory access controls, but not for POSIX, since those
+ *		access controls are advisory.  We will need to consider a
+ *		return value in a future version of the MACF API.
+ *
+ *		This operation currenty can not fail, as currently the POSIX
+ *		credential is a subfield of the kauth_cred_t (ucred), which
+ *		MUST be valid.  In the future, this will not be the case.
+ */
+void
+posix_cred_label(kauth_cred_t cred, posix_cred_t pcred)
+{
+	cred->cr_posix = *pcred;	/* structure assign for now */
+}
+
+
+/*
+ * posix_cred_access
+ *
+ * Description:	Perform a POSIX access check for a protected object
+ *
+ * Parameters:	cred			The credential to check
+ *		object_uid		The POSIX UID of the protected object
+ *		object_gid		The POSIX GID of the protected object
+ *		object_mode		The POSIX mode of the protected object
+ *		mode_req		The requested POSIX access rights
+ *
+ * Returns	0			Access is granted
+ *		EACCES			Access is denied
+ *
+ * Notes:	This code optimizes the case where the world and group rights
+ *		would both grant the requested rights to avoid making a group
+ *		membership query.  This is a big performance win in the case
+ *		where this is true.
+ */
+int
+posix_cred_access(kauth_cred_t cred, id_t object_uid, id_t object_gid, mode_t object_mode, mode_t mode_req)
+{
+	int is_member;
+	mode_t mode_owner = (object_mode & S_IRWXU);
+	mode_t mode_group = (object_mode & S_IRWXG) << 3;
+	mode_t mode_world = (object_mode & S_IRWXO) << 6;
+
+	/*
+	 * Check first for owner rights
+	 */
+	if (kauth_cred_getuid(cred) == object_uid && (mode_req & mode_owner) == mode_req)
+		return (0);
+
+	/*
+	 * Combined group and world rights check, if we don't have owner rights
+	 *
+	 * OPTIMIZED: If group and world rights would grant the same bits, and
+	 * they set of requested bits is in both, then we can simply check the
+	 * world rights, avoiding a group membership check, which is expensive.
+	 */
+	if ((mode_req & mode_group & mode_world) == mode_req) {
+		return (0);
+	} else {
+		/*
+		 * NON-OPTIMIZED: requires group membership check.
+		 */
+		if ((mode_req & mode_group) != mode_req) {
+			/*
+			 * exclusion group : treat errors as "is a member"
+			 *
+			 * NON-OPTIMIZED: +group would deny; must check group
+			 */
+			if (!kauth_cred_ismember_gid(cred, object_gid, &is_member) && is_member) {
+				/*
+				 * DENY: +group denies
+				 */
+				return (EACCES);
+			} else {
+				if ((mode_req & mode_world) != mode_req) {
+					/*
+					 * DENY: both -group & world would deny
+					 */
+					return (EACCES);
+				} else {
+					/*
+					 * ALLOW: allowed by -group and +world
+					 */
+					return (0);
+				}
+			}
+		} else {
+			/*
+			 * inclusion group; treat errors as "not a member"
+			 *
+			 * NON-OPTIMIZED: +group allows, world denies; must
+			 * check group
+			 */
+			if (!kauth_cred_ismember_gid(cred, object_gid, &is_member) && is_member) {
+				/*
+				 * ALLOW: allowed by +group
+				 */
+				return (0);
+			} else {
+				if ((mode_req & mode_world) != mode_req) {
+					/*
+					 * DENY: both -group & world would deny
+					 */
+					return (EACCES);
+				} else {
+					/*
+					 * ALLOW: allowed by -group and +world
+					 */
+					return (0);
+				}
+			}
+		}
+	}
+}
diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c
index d3f483d24..3283ee3c0 100644
--- a/bsd/kern/kern_descrip.c
+++ b/bsd/kern/kern_descrip.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -101,6 +101,7 @@
 #include <sys/kdebug.h>
 #include <sys/sysproto.h>
 #include <sys/pipe.h>
+#include <sys/spawn.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
 #include <libkern/OSAtomic.h>
@@ -112,6 +113,11 @@
 
 #include <mach/mach_port.h>
 
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
+#include <hfs/hfs.h>
+
 kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
     mach_msg_type_name_t, ipc_port_t *);
 void ipc_port_release_send(ipc_port_t);
@@ -119,16 +125,14 @@ void ipc_port_release_send(ipc_port_t);
 struct psemnode;
 struct pshmnode;
 
-int fdopen(dev_t dev, int mode, int type, proc_t p);
-int finishdup(proc_t p, struct filedesc *fdp, int old, int new, int32_t *retval);
+static int finishdup(proc_t p,
+    struct filedesc *fdp, int old, int new, int flags, int32_t *retval);
 
 int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked);
 void fg_drop(struct fileproc * fp);
 void fg_free(struct fileglob *fg);
 void fg_ref(struct fileproc * fp);
-#if CONFIG_EMBEDDED
 void fileport_releasefg(struct fileglob *fg);
-#endif /* CONFIG_EMBEDDED */
 
 /* flags for close_internal_locked */
 #define FD_DUP2RESV 1
@@ -156,6 +160,9 @@ extern kauth_scope_t	kauth_scope_fileop;
 
 extern int cs_debug;
 
+/* Conflict wait queue for when selects collide (opaque type) */
+extern struct wait_queue select_conflict_queue;
+
 #define f_flag f_fglob->fg_flag
 #define f_type f_fglob->fg_type
 #define f_msgcount f_fglob->fg_msgcount
@@ -474,21 +481,20 @@ dup(proc_t p, struct dup_args *uap, int32_t *retval)
 		proc_fdunlock(p);
 		return (error);
 	}
-	error = finishdup(p, fdp, old, new, retval);
+	error = finishdup(p, fdp, old, new, 0, retval);
 	fp_drop(p, old, fp, 1);
 	proc_fdunlock(p);
 
 	return (error);
 }
 
-
 /*
  * dup2
  *
  * Description:	Duplicate a file descriptor to a particular value.
  *
  * Parameters:	p				Process performing the dup
- *		uap->fd				The fd to dup
+ *		uap->from			The fd to dup
  *		uap->to				The fd to dup it to
  *		retval				Pointer to the call return area
  *
@@ -547,7 +553,8 @@ closeit:
 				goto startover;
 		}
 
-		if ((fdp->fd_ofiles[new] != NULL)  && ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
+		if ((fdp->fd_ofiles[new] != NULL) &&
+		    ((error = fp_lookup(p, new, &nfp, 1)) == 0)) {
 			fp_drop(p, old, fp, 1);
 			(void)close_internal_locked(p, new, nfp, FD_DUP2RESV);
 #if DIAGNOSTIC
@@ -558,7 +565,7 @@ closeit:
 		} else  {
 #if DIAGNOSTIC
 			if (fdp->fd_ofiles[new] != NULL)
-				panic("dup2: unable to get ref on a fileproc %d\n", new);
+				panic("dup2: no ref on fileproc %d", new);
 #endif
 			procfdtbl_reservefd(p, new);
 		}
@@ -570,11 +577,11 @@ closeit:
 	}
 #if DIAGNOSTIC
 	if (fdp->fd_ofiles[new] != 0)
-		panic("dup2-1: overwriting fd_ofiles with new %d\n", new);
+		panic("dup2: overwriting fd_ofiles with new %d", new);
 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
-		panic("dup2-1: unreserved  fileflags  with new %d\n", new);
+		panic("dup2: unreserved fileflags with new %d", new);
 #endif
-	error = finishdup(p, fdp, old, new, retval);
+	error = finishdup(p, fdp, old, new, 0, retval);
 	fp_drop(p, old, fp, 1);
 	proc_fdunlock(p);
 
@@ -678,7 +685,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 	off_t offset;
 	int newmin;
 	daddr64_t lbn, bn;
-	int devBlockSize = 0;
 	unsigned int fflag;
 	user_addr_t argp;
 	boolean_t is64bit;
@@ -723,6 +729,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 	switch (uap->cmd) {
 
 	case F_DUPFD:
+	case F_DUPFD_CLOEXEC:
 		newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */
 		AUDIT_ARG(value32, newmin);
 		if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
@@ -732,7 +739,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		}
 		if ( (error = fdalloc(p, newmin, &i)) )
 			goto out;
-		error = finishdup(p, fdp, fd, i, retval);
+		error = finishdup(p, fdp, fd, i,
+		    uap->cmd == F_DUPFD_CLOEXEC ? UF_EXCLOSE : 0, retval);
 		goto out;
 
 	case F_GETFD:
@@ -807,6 +815,36 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		error =  fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
 		goto out;
 
+	case F_SETNOSIGPIPE:
+		tmp = CAST_DOWN_EXPLICIT(int, uap->arg);
+		if (fp->f_type == DTYPE_SOCKET) {
+			error = sock_setsockopt((struct socket *)fp->f_data,
+			    SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp));
+		} else {
+			struct fileglob *fg = fp->f_fglob;
+
+			lck_mtx_lock_spin(&fg->fg_lock);
+			if (tmp)
+				fg->fg_lflags |= FG_NOSIGPIPE;
+			else
+				fg->fg_lflags &= FG_NOSIGPIPE;
+			lck_mtx_unlock(&fg->fg_lock);
+			error = 0;
+		}
+		goto out;
+
+	case F_GETNOSIGPIPE:
+		if (fp->f_type == DTYPE_SOCKET) {
+			int retsize = sizeof (*retval);
+			error = sock_getsockopt((struct socket *)fp->f_data,
+			    SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize);
+		} else {
+			*retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ?
+				1 : 0;
+			error = 0;
+		}
+		goto out;
+
 	case F_SETLKW:
 		flg |= F_WAIT;
 		/* Fall into F_SETLK */
@@ -886,6 +924,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		}
 
 	case F_GETLK:
+#if CONFIG_EMBEDDED
+	case F_GETLKPID:
+#endif
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			goto out;
@@ -943,10 +984,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
 #if CONFIG_MACF
 			error = mac_file_check_lock(proc_ucred(p), fp->f_fglob,
-			    F_GETLK, &fl);
+			    uap->cmd, &fl);
 			if (error == 0)
 #endif
-			error = VNOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX, &context);
+			error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context);
 
 			(void)vnode_put(vp);
 
@@ -1108,6 +1149,18 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 
 		goto out;
 
+	case F_NODIRECT:
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		if (uap->arg)
+		        fp->f_fglob->fg_flag |= FNODIRECT;
+		else
+		        fp->f_fglob->fg_flag &= ~FNODIRECT;
+
+		goto out;
+
 	case F_GLOBAL_NOCACHE:
 	        if (fp->f_type != DTYPE_VNODE) {
 		        error = EBADF;
@@ -1170,6 +1223,23 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		goto outdrop;
 		}
 
+        case F_FLUSH_DATA:
+
+                if (fp->f_type != DTYPE_VNODE) {
+                        error = EBADF;
+                        goto out;
+                }
+                vp = (struct vnode *)fp->f_data;
+                proc_fdunlock(p);
+
+                if ( (error = vnode_getwithref(vp)) == 0 ) {
+                        error = cluster_push(vp, 0);
+
+                        (void)vnode_put(vp);
+                }
+                goto outdrop;
+
+
 	case F_READBOOTSTRAP:
 	case F_WRITEBOOTSTRAP: {
 		user32_fbootstraptransfer_t user32_fbt_struct;
@@ -1221,9 +1291,23 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		(void)vnode_put(vp);
 		goto outdrop;
 	}
-	case F_LOG2PHYS: {
+	case F_LOG2PHYS:
+	case F_LOG2PHYS_EXT: {
 		struct log2phys l2p_struct;    /* structure for allocate command */
+		int devBlockSize;
 
+		off_t file_offset = 0;
+		size_t a_size = 0;
+		size_t run = 0;
+
+		if (uap->cmd == F_LOG2PHYS_EXT) {
+			error = copyin(argp, (caddr_t)&l2p_struct, sizeof(l2p_struct));
+			if (error)
+				goto out;
+			file_offset = l2p_struct.l2p_devoffset;
+		} else {
+			file_offset = fp->f_offset;
+		}
 		if (fp->f_type != DTYPE_VNODE) {
 			error = EBADF;
 			goto out;
@@ -1233,7 +1317,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		if ( (error = vnode_getwithref(vp)) ) {
 			goto outdrop;
 		}
-		error = VNOP_OFFTOBLK(vp, fp->f_offset, &lbn);
+		error = VNOP_OFFTOBLK(vp, file_offset, &lbn);
 		if (error) {
 			(void)vnode_put(vp);
 			goto outdrop;
@@ -1244,16 +1328,25 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 			goto outdrop;
 		}
 		devBlockSize = vfs_devblocksize(vnode_mount(vp));
-
-		error = VNOP_BLOCKMAP(vp, offset, devBlockSize, &bn, NULL, NULL, 0, &context);
+		if (uap->cmd == F_LOG2PHYS_EXT) {
+			a_size = l2p_struct.l2p_contigbytes;
+		} else {
+			a_size = devBlockSize;
+		}
+		
+		error = VNOP_BLOCKMAP(vp, offset, a_size, &bn, &run, NULL, 0, &context);
 
 		(void)vnode_put(vp);
 
 		if (!error) {
 			l2p_struct.l2p_flags = 0;	/* for now */
-			l2p_struct.l2p_contigbytes = 0;	/* for now */
+			if (uap->cmd == F_LOG2PHYS_EXT) {
+				l2p_struct.l2p_contigbytes = run - (file_offset - offset);
+			} else {
+				l2p_struct.l2p_contigbytes = 0;	/* for now */
+			}
 			l2p_struct.l2p_devoffset = bn * devBlockSize;
-			l2p_struct.l2p_devoffset += fp->f_offset - offset;
+			l2p_struct.l2p_devoffset += file_offset - offset;
 			error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct));
 		}
 		goto outdrop;
@@ -1384,7 +1477,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
 
 		/* Start the lookup relative to the file descriptor's vnode. */
-		NDINIT(&nd, LOOKUP, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+		NDINIT(&nd, LOOKUP, OP_OPEN, USEDVP | FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
 		       fopen.o_pathname, &context);
 		nd.ni_dvp = vp;
 
@@ -1429,7 +1522,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		}
 
 		/* Start the lookup relative to the file descriptor's vnode. */
-		NDINIT(&nd, DELETE, USEDVP | AUDITVNPATH1, UIO_USERSPACE, pathname, &context);
+		NDINIT(&nd, DELETE, OP_UNLINK, USEDVP | AUDITVNPATH1, UIO_USERSPACE,
+		       pathname, &context);
 		nd.ni_dvp = vp;
 
 		error = unlink1(&context, &nd, 0);
@@ -1533,6 +1627,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 					       kernel_blob_size);
 		} else {
 			/* ubc_blob_add() has consumed "kernel_blob_addr" */
+#if CHECK_CS_VALIDATION_BITMAP
+			ubc_cs_validation_bitmap_allocate( vp );
+#endif
 		}
 
 		(void) vnode_put(vp);
@@ -1540,7 +1637,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 	}
 
 	case F_MARKDEPENDENCY: {
-		struct vnode *root_vp;
 		struct vnode_attr va;
 		vfs_context_t ctx = vfs_context_current();
 		kauth_cred_t cred;
@@ -1563,13 +1659,11 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 			goto outdrop;
 		}
 
-		// the passed in vnode must be the root dir of the file system
-		if (VFS_ROOT(vp->v_mount, &root_vp, ctx) != 0 || vp != root_vp) {
+		if (!vnode_isvroot(vp)) {
 		    error = EINVAL;
 		    vnode_put(vp);
 		    goto outdrop;
 		}
-		vnode_put(root_vp);
 
 		// get the owner of the root dir
 		VATTR_INIT(&va);
@@ -1592,24 +1686,291 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval)
 		// if all those checks pass then we can mark the dependency
 		vfs_markdependency(vp->v_mount);
 		error = 0;
+		
+		vnode_put(vp);
+		
+		break;
+	}
+			
+#ifdef CONFIG_PROTECT
+	case F_GETPROTECTIONCLASS: {
+		int class = 0;
+		
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		vp = (struct vnode *)fp->f_data;
+
+		proc_fdunlock(p);
+
+		if (vnode_getwithref(vp)) {
+			error = ENOENT;
+			goto outdrop;
+		}
+	
+		error = cp_vnode_getclass (vp, &class);
+		if (error == 0) {
+			*retval = class;
+		}
 
 		vnode_put(vp);
+		break;
+	}
+	
+	case F_SETPROTECTIONCLASS: {
+		/* tmp must be a valid PROTECTION_CLASS_* */
+		tmp = CAST_DOWN_EXPLICIT(uint32_t, uap->arg);
+		
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		vp = (struct vnode *)fp->f_data;
+
+		proc_fdunlock(p);
+	
+		if (vnode_getwithref(vp)) {
+			error = ENOENT;
+			goto outdrop;
+		}	
 		
+		/* Only go forward if you have write access */
+		vfs_context_t ctx = vfs_context_current();
+		if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
+			vnode_put(vp);
+			error = EBADF;
+			goto outdrop;
+		}
+		error = cp_vnode_setclass (vp, tmp);
+		vnode_put(vp);
+		break;
+	}	
+#endif /* CONFIG_PROTECT */
+			
+	case F_MOVEDATAEXTENTS: {
+		struct fileproc *fp2 = NULL;
+		struct vnode *src_vp = NULLVP;
+		struct vnode *dst_vp = NULLVP;
+		/* We need to grab the 2nd FD out of the argments before moving on. */
+		int fd2 = CAST_DOWN_EXPLICIT(int32_t, uap->arg);
+
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		vp = src_vp = (struct vnode *)fp->f_data;
+
+		/* For now, special case HFS+ only, since this is SPI. */
+		if (src_vp->v_tag != VT_HFS) {
+			error = EINVAL;
+			goto out;
+		}
+
+		/* We're still holding the proc FD lock */
+		if ( (error = fp_lookup(p, fd2, &fp2, 1)) ) {
+			error = EBADF;
+			goto out;
+		}
+		if (fp2->f_type != DTYPE_VNODE) {
+			fp_drop(p, fd2, fp2, 1);
+			error = EBADF;
+			goto out;
+		}
+		dst_vp = (struct vnode *)fp2->f_data;
+
+		/* For now, special case HFS+ only, since this is SPI. */
+		if (dst_vp->v_tag != VT_HFS) {
+			fp_drop(p, fd2, fp2, 1);
+			error = EINVAL;
+			goto out;
+		}
+
+#if CONFIG_MACF
+		/* Re-do MAC checks against the new FD, pass in a fake argument */
+		error = mac_file_check_fcntl(proc_ucred(p), fp2->f_fglob, uap->cmd, 0);
+		if (error) {
+			fp_drop(p, fd2, fp2, 1);
+			goto out;
+		}
+#endif
+		/* Audit the 2nd FD */
+		AUDIT_ARG(fd, fd2);
+
+		proc_fdunlock(p);
+
+		/* Proc lock dropped; now we have a legit pair of FDs.  Go to work */
+
+		if (vnode_getwithref(src_vp)) {
+			fp_drop(p, fd2, fp2, 0);
+			error = ENOENT;
+			goto outdrop;
+		}	
+		if (vnode_getwithref(dst_vp)) {
+			vnode_put (src_vp);
+			fp_drop(p, fd2, fp2, 0);
+			error = ENOENT;
+			goto outdrop;
+		}	
+
+		/* 
+		 * Basic asserts; validate they are not the same and that
+		 * both live on the same filesystem.
+		 */
+
+		if (dst_vp == src_vp) {
+			vnode_put (src_vp);
+			vnode_put (dst_vp);
+			fp_drop (p, fd2, fp2, 0);
+			error = EINVAL;
+			goto outdrop;
+		}	
+	
+		if (dst_vp->v_mount != src_vp->v_mount) {
+			vnode_put (src_vp);
+			vnode_put (dst_vp);
+			fp_drop (p, fd2, fp2, 0);
+			error = EXDEV;
+			goto outdrop;
+		}
+
+		/* Now check for write access to the target files */
+		if(vnode_authorize(src_vp, NULLVP, 
+					(KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
+			vnode_put(src_vp);
+			vnode_put(dst_vp);
+			fp_drop(p, fd2, fp2, 0);
+			error = EBADF;
+			goto outdrop;
+		}
+
+		if(vnode_authorize(dst_vp, NULLVP, 
+					(KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), &context) != 0) {
+			vnode_put(src_vp);
+			vnode_put(dst_vp);
+			fp_drop(p, fd2, fp2, 0);
+			error = EBADF;
+			goto outdrop;
+		}
+
+		/* Verify that both vps point to files and not directories */
+		if (!vnode_isreg(src_vp) || !vnode_isreg(dst_vp)) {
+			vnode_put(src_vp);
+			vnode_put(dst_vp);
+			fp_drop(p, fd2, fp2, 0);
+			error = EINVAL;
+			goto outdrop;
+		}
+
+		/* 
+		 * The exchangedata syscall handler passes in 0 for the flags to VNOP_EXCHANGE.
+		 * We'll pass in our special bit indicating that the new behavior is expected
+		 */
+
+		error = VNOP_EXCHANGE(src_vp, dst_vp, FSOPT_EXCHANGE_DATA_ONLY, &context);
+
+		vnode_put (src_vp);
+		vnode_put (dst_vp);
+		fp_drop(p, fd2, fp2, 0);
 		break;
 	}
 
-	case F_GETPROTECTIONCLASS: {
-		// stub to make the API work
-		printf("Reached F_GETPROTECTIONCLASS, returning without action\n");
+	/* 
+	 * Set the vnode pointed to by 'fd'
+	 * and tag it as the (potentially future) backing store
+	 * for another filesystem
+	 */
+	case F_SETBACKINGSTORE: {
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		vp = (struct vnode *)fp->f_data;
+		
+		if (vp->v_tag != VT_HFS) {
+			error = EINVAL;
+			goto out;
+
+		}
+		proc_fdunlock(p);
+
+		if (vnode_getwithref(vp)) {
+			error = ENOENT;
+			goto outdrop;
+		}
+		
+		/* only proceed if you have write access */
+		vfs_context_t ctx = vfs_context_current();
+		if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) {
+			vnode_put(vp);
+			error = EBADF;
+			goto outdrop;
+		}
+
+		
+		/* If arg != 0, set, otherwise unset */
+		if (uap->arg) {
+			error = hfs_set_backingstore (vp, 1);
+		}
+		else {
+			error = hfs_set_backingstore (vp, 0);
+		}
+		/* Success. explicitly set error to 0. */
 		error = 0;
-		goto out;
+
+		vnode_put(vp);
+		break;
 	}
 
-	case F_SETPROTECTIONCLASS: {
-		// stub to make the API work
-		printf("Reached F_SETPROTECTIONCLASS, returning without action\n");
-		error = 0;
-		goto out;
+	/* 
+	 * like F_GETPATH, but special semantics for
+	 * the mobile time machine handler.
+	 */
+	case F_GETPATH_MTMINFO: {
+		char *pathbufp;
+		int pathlen;
+
+		if (fp->f_type != DTYPE_VNODE) {
+			error = EBADF;
+			goto out;
+		}
+		vp = (struct vnode *)fp->f_data;
+		proc_fdunlock(p);
+
+		pathlen = MAXPATHLEN;
+		MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK);
+		if (pathbufp == NULL) {
+			error = ENOMEM;
+			goto outdrop;
+		}
+		if ( (error = vnode_getwithref(vp)) == 0 ) {
+			int backingstore = 0;
+			
+			/* Check for error from vn_getpath before moving on */
+			if ((error = vn_getpath(vp, pathbufp, &pathlen)) == 0) {
+				if (vp->v_tag == VT_HFS) {
+					error = hfs_is_backingstore (vp, &backingstore);
+				}
+				(void)vnode_put(vp);
+
+				if (error == 0) {
+					error = copyout((caddr_t)pathbufp, argp, pathlen);
+				}
+				if (error == 0) {
+					/* 
+					 * If the copyout was successful, now check to ensure
+					 * that this vnode is not a BACKINGSTORE vnode.  mtmd
+					 * wants the path regardless.
+					 */
+					if (backingstore) {
+						error = EBUSY;
+					}
+				}
+			} else
+				(void)vnode_put(vp);
+		}
+		FREE(pathbufp, M_TEMP);
+		goto outdrop;
 	}
 
 
@@ -1730,6 +2091,7 @@ out:
  * Parameters:	p				Process performing the dup
  *		old				The fd to dup
  *		new				The fd to dup it to
+ *		fd_flags			Flags to augment the new fd
  *		retval				Pointer to the call return area
  *
  * Returns:	0				Success
@@ -1744,10 +2106,11 @@ out:
  *
  * Notes:	This function may drop and reacquire this lock; it is unsafe
  *		for a caller to assume that other state protected by the lock
- *		has not been subsequently changes out from under it.
+ *		has not been subsequently changed out from under it.
  */
 int
-finishdup(proc_t p, struct filedesc *fdp, int old, int new, int32_t *retval)
+finishdup(proc_t p,
+    struct filedesc *fdp, int old, int new, int fd_flags, int32_t *retval)
 {
 	struct fileproc *nfp;
 	struct fileproc *ofp;
@@ -1758,9 +2121,8 @@ finishdup(proc_t p, struct filedesc *fdp, int old, int new, int32_t *retval)
 #if DIAGNOSTIC
 	proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED);
 #endif
-
 	if ((ofp = fdp->fd_ofiles[old]) == NULL ||
-			(fdp->fd_ofileflags[old] & UF_RESERVED)) {
+	    (fdp->fd_ofileflags[old] & UF_RESERVED)) {
 		fdrelse(p, new);
 		return (EBADF);
 	}
@@ -1796,13 +2158,14 @@ finishdup(proc_t p, struct filedesc *fdp, int old, int new, int32_t *retval)
 
 #if DIAGNOSTIC
 	if (fdp->fd_ofiles[new] != 0)
-		panic("finishdup: overwriting fd_ofiles with new %d\n", new);
+		panic("finishdup: overwriting fd_ofiles with new %d", new);
 	if ((fdp->fd_ofileflags[new] & UF_RESERVED) == 0)
-		panic("finishdup: unreserved  fileflags  with new %d\n", new);
+		panic("finishdup: unreserved fileflags with new %d", new);
 #endif
 
 	if (new > fdp->fd_lastfile)
 		fdp->fd_lastfile = new;
+	*fdflags(p, new) |= fd_flags;
 	procfdtbl_releasefd(p, new, nfp);
 	*retval = new;
 	return (0);
@@ -1897,13 +2260,13 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
 
 
 	if ((fp->f_flags & FP_CLOSING) == FP_CLOSING) {
-		panic("close_internal_locked:  being called on already closing fd\n");
+		panic("close_internal_locked: being called on already closing fd");
 	}
 
 
 #if DIAGNOSTIC
 	if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
-		panic("close_internal: unreserved  fileflags  with fd %d\n", fd);
+		panic("close_internal: unreserved fileflags with fd %d", fd);
 #endif
 
 	fp->f_flags |= FP_CLOSING;
@@ -1961,7 +2324,7 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags)
 #if DIAGNOSTIC
 	if (resvfd != 0) {
 		if ((fdp->fd_ofileflags[fd] & UF_RESERVED) == 0)
-			panic("close with reserved fd returns with freed fd:%d: proc: %x\n", fd, (unsigned int)p);
+			panic("close with reserved fd returns with freed fd:%d: proc: %p", fd, p);
 	}
 #endif
 
@@ -3150,9 +3513,14 @@ fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
 	}
 	fp->f_iocount--;
 
-	if (p->p_fpdrainwait && fp->f_iocount == 0) {
-	        p->p_fpdrainwait = 0;
-		needwakeup = 1;
+	if (fp->f_iocount == 0) {
+		if (fp->f_flags & FP_SELCONFLICT)
+			fp->f_flags &= ~FP_SELCONFLICT;
+
+		if (p->p_fpdrainwait) {
+			p->p_fpdrainwait = 0;
+			needwakeup = 1;
+		}
 	}
 	if (!locked)
 		proc_fdunlock(p);
@@ -3188,7 +3556,7 @@ fp_drop(proc_t p, int fd, struct fileproc *fp, int locked)
  *
  *		The fileproc referenced is not returned; because of this, care
  *		must be taken to not drop the last reference (e.g. by closing
- *		the file).  This is inhernely unsafe, since the reference may
+ *		the file).  This is inherently unsafe, since the reference may
  *		not be recoverable from the vnode, if there is a subsequent
  *		close that destroys the associate fileproc.  The caller should
  *		therefore retain their own reference on the fileproc so that
@@ -3249,7 +3617,7 @@ file_vnode(int fd, struct vnode **vpp)
  *
  *		The fileproc referenced is not returned; because of this, care
  *		must be taken to not drop the last reference (e.g. by closing
- *		the file).  This is inhernely unsafe, since the reference may
+ *		the file).  This is inherently unsafe, since the reference may
  *		not be recoverable from the vnode, if there is a subsequent
  *		close that destroys the associate fileproc.  The caller should
  *		therefore retain their own reference on the fileproc so that
@@ -3314,7 +3682,7 @@ file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp)
  *
  *		The fileproc referenced is not returned; because of this, care
  *		must be taken to not drop the last reference (e.g. by closing
- *		the file).  This is inhernely unsafe, since the reference may
+ *		the file).  This is inherently unsafe, since the reference may
  *		not be recoverable from the socket, if there is a subsequent
  *		close that destroys the associate fileproc.  The caller should
  *		therefore retain their own reference on the fileproc so that
@@ -3445,9 +3813,14 @@ file_drop(int fd)
 	}
 	fp->f_iocount --;
 
-	if (p->p_fpdrainwait && fp->f_iocount == 0) {
-	        p->p_fpdrainwait = 0;
-		needwakeup = 1;
+	if (fp->f_iocount == 0) {
+		if (fp->f_flags & FP_SELCONFLICT)
+			fp->f_flags &= ~FP_SELCONFLICT;
+
+		if (p->p_fpdrainwait) {
+			p->p_fpdrainwait = 0;
+			needwakeup = 1;
+		}
 	}
 	proc_fdunlock(p);
 
@@ -3481,7 +3854,7 @@ file_drop(int fd)
  *		*resultfd (modified)		Returned fd
  *
  * Locks:	This function takes and drops the proc_fdlock; if this lock
- *		is alread held, use falloc_locked() instead.
+ *		is already held, use falloc_locked() instead.
  *
  * Notes:	This function takes separate process and context arguments
  *		solely to support kern_exec.c; otherwise, it would take
@@ -3505,7 +3878,7 @@ falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx)
  * falloc_locked
  *
  * Create a new open file structure and allocate
- * a file decriptor for the process that refers to it.
+ * a file descriptor for the process that refers to it.
  *
  * Returns:	0			Success
  *
@@ -3679,6 +4052,10 @@ fg_free(struct fileglob *fg)
  *		that are either marked as close-on-exec, or which were in the
  *		process of being opened at the time of the execve
  *
+ *		Also handles the case (via posix_spawn()) where -all-
+ *		files except those marked with "inherit" as treated as
+ *		close-on-exec.
+ *
  * Parameters:	p				Pointer to process calling
  *						execve
  *
@@ -3693,27 +4070,39 @@ fg_free(struct fileglob *fg)
  * XXX:		We should likely reverse the lock and funnel drop/acquire
  *		order to avoid the small race window; it's also possible that
  *		if the program doing the exec has an outstanding listen socket
- *		and a network connection is completed asyncrhonously that we
+ *		and a network connection is completed asynchronously that we
  *		will end up with a "ghost" socket reference in the new process.
  *
  *		This needs reworking to make it safe to remove the funnel from
  *		the execve and posix_spawn system calls.
  */
 void
-fdexec(proc_t p)
+fdexec(proc_t p, short flags)
 {
 	struct filedesc *fdp = p->p_fd;
 	int i;
-	struct fileproc *fp;
+	boolean_t cloexec_default = (flags & POSIX_SPAWN_CLOEXEC_DEFAULT) != 0;
 
 	proc_fdlock(p);
-	i = fdp->fd_lastfile;
+	for (i = fdp->fd_lastfile; i >= 0; i--) {
+
+		struct fileproc *fp = fdp->fd_ofiles[i];
+		char *flagp = &fdp->fd_ofileflags[i];
 
-	while (i >= 0) {
+		if (cloexec_default) {
+			/*
+			 * Reverse the usual semantics of file descriptor
+			 * inheritance - all of them should be closed
+			 * except files marked explicitly as "inherit" and
+			 * not marked close-on-exec.
+			 */
+			if ((*flagp & (UF_EXCLOSE|UF_INHERIT)) != UF_INHERIT)
+				*flagp |= UF_EXCLOSE;
+			*flagp &= ~UF_INHERIT;
+		}
 
-		fp = fdp->fd_ofiles[i];
 		if (
-		    ((fdp->fd_ofileflags[i] & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
+		    ((*flagp & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE)
 #if CONFIG_MACF
 		    || (fp && mac_file_check_inherit(proc_ucred(p), fp->f_fglob))
 #endif
@@ -3725,10 +4114,21 @@ fdexec(proc_t p)
 				fdp->fd_lastfile--;
 			if (i < fdp->fd_freefile)
 				fdp->fd_freefile = i;
+
+			/*
+			 * Wait for any third party viewers (e.g., lsof)
+			 * to release their references to this fileproc.
+			 */
+			while (fp->f_iocount > 0) {
+				p->p_fpdrainwait = 1;
+				msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO,
+				    "fpdrain", NULL);
+			}
+
 			closef_locked(fp, fp->f_fglob, p);
+
 			FREE_ZONE(fp, sizeof(*fp), M_FILEPROC);
 		}
-		i--;
 	}
 	proc_fdunlock(p);
 }
@@ -3764,7 +4164,7 @@ fdexec(proc_t p)
  *		thread making the call, rather than from the process.
  *
  *		In the case of a failure to obtain a reference, for most cases,
- *		the file entry will be silently droppped.  There's an exception
+ *		the file entry will be silently dropped.  There's an exception
  *		for the case of a chroot dir, since a failure to to obtain a
  *		reference there would constitute an "escape" from the chroot
  *		environment, which must not be allowed.  In that case, we will
@@ -3822,7 +4222,7 @@ fdcopy(proc_t p, vnode_t uth_cdir)
 		 * our reference from the parent also
 		 * since the vnode has gone DEAD making
 		 * it useless... by dropping it we'll
-		 * be that much closer to recyling it
+		 * be that much closer to recycling it
 		 */
 	        vnode_rele(fdp->fd_cdir);
 		fdp->fd_cdir = NULL;
@@ -3994,7 +4394,7 @@ fdfree(proc_t p)
 			if ((fp = fdp->fd_ofiles[i]) != NULL) {
 			  
 			  if (fdp->fd_ofileflags[i] & UF_RESERVED)
-			    	panic("fdfree: found fp with UF_RESERVED\n");
+			    	panic("fdfree: found fp with UF_RESERVED");
 
 				/* closef drops the iocount ... */
 				if ((fp->f_flags & FP_INCHRREAD) != 0) 
@@ -4186,7 +4586,7 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
  * Locks:	Assumes the caller holds the proc_fdlock
  *
  * Notes:	For character devices, this occurs on the last close of the
- *		device; for all other file descriptos, this occurs on each
+ *		device; for all other file descriptors, this occurs on each
  *		close to prevent fd's from being closed out from under
  *		operations currently in progress and blocked
  *
@@ -4210,14 +4610,25 @@ fileproc_drain(proc_t p, struct fileproc * fp)
 		if (fp->f_fglob->fg_ops->fo_drain) {
 			(*fp->f_fglob->fg_ops->fo_drain)(fp, &context);
 		}
-		if (((fp->f_flags & FP_INSELECT)== FP_INSELECT)) {
-			wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED);
+		if ((fp->f_flags & FP_INSELECT) == FP_INSELECT) {
+			if (wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
+				panic("bad wait queue for wait_queue_wakeup_all %p", fp->f_waddr);
 		} 
+		if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
+			if (wait_queue_wakeup_all(&select_conflict_queue, NULL, THREAD_INTERRUPTED) == KERN_INVALID_ARGUMENT)
+				panic("bad select_conflict_queue");
+		}
 		p->p_fpdrainwait = 1;
 
 		msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain", NULL);
 
 	}
+#if DIAGNOSTIC
+	if ((fp->f_flags & FP_INSELECT) != 0)
+		panic("FP_INSELECT set on drained fp");
+#endif
+	if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT)
+		fp->f_flags &= ~FP_SELCONFLICT;
 }
 
 
@@ -4329,7 +4740,6 @@ out1:
 
 }
 
-#if CONFIG_EMBEDDED
 /*
  * fileport_makeport
  *
@@ -4465,7 +4875,7 @@ fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
 		err = EINVAL;
 		goto out;
 	}
-	
+
 	MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK);
 	if (fp == FILEPROC_NULL) {
 		err = ENOMEM;
@@ -4483,6 +4893,7 @@ fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval)
 		proc_fdunlock(p);
 		goto out;
 	}
+	*fdflags(p, fd) |= UF_EXCLOSE;
 
 	procfdtbl_releasefd(p, fd, fp);
 	proc_fdunlock(p);
@@ -4500,7 +4911,6 @@ out:
 
 	return err;
 }
-#endif /* CONFIG_EMBEDDED */
 
 
 /*
@@ -4524,7 +4934,7 @@ out:
  * Notes:	XXX This is not thread safe; see fdopen() above
  */
 int
-dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error)
+dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error)
 {
 	struct fileproc *wfp;
 	struct fileproc *fp;
@@ -4575,7 +4985,7 @@ dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error)
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
 		 */
-	        if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+	        if (((flags & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
 		        proc_fdunlock(p);
 			return (EACCES);
 		}
@@ -4587,7 +4997,8 @@ dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error)
 		        fg_free(fp->f_fglob);
 		fp->f_fglob = wfp->f_fglob;
 
-		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd] |
+			(flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
 
 	        proc_fdunlock(p);
 		return (0);
@@ -4623,10 +5034,11 @@ fg_ref(struct fileproc * fp)
 
 #if DIAGNOSTIC
 	if ((fp->f_flags & ~((unsigned int)FP_VALID_FLAGS)) != 0)
-		panic("fg_ref: invalid bits on fp%x\n", (unsigned int)fp);
+		panic("fg_ref: invalid bits on fp %p", fp);
 
 	if (fg->fg_count == 0)
-		panic("fg_ref: adding fgcount to zeroed fg :fp %x, fg%x\n ", (unsigned int)fp, (unsigned int)fg);
+		panic("fg_ref: adding fgcount to zeroed fg: fp %p fg %p",
+		    fp, fg);
 #endif
 	fg->fg_count++;
 	lck_mtx_unlock(&fg->fg_lock);
diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c
index 5d195dcf0..632501473 100644
--- a/bsd/kern/kern_event.c
+++ b/bsd/kern/kern_event.c
@@ -92,6 +92,9 @@
 #include <libkern/libkern.h>
 #include "net/net_str_id.h"
 
+#include <mach/task.h>
+#include <kern/vm_pressure.h>
+
 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
 #define KQ_EVENT NULL
@@ -140,6 +143,8 @@ static void	kevent_continue(struct kqueue *kq, void *data, int error);
 static void	kqueue_scan_continue(void *contp, wait_result_t wait_result);
 static int	kqueue_process(struct kqueue *kq, kevent_callback_t callback,
 			       void *data, int *countp, struct proc *p);
+static int	kqueue_begin_processing(struct kqueue *kq);
+static void	kqueue_end_processing(struct kqueue *kq);
 static int	knote_process(struct knote *kn, kevent_callback_t callback,
 			      void *data, struct kqtailq *inprocessp, struct proc *p);
 static void	knote_put(struct knote *kn);
@@ -183,6 +188,15 @@ static struct filterops proc_filtops = {
         .f_event = filt_proc,
 };
 
+static int filt_vmattach(struct knote *kn);
+static void filt_vmdetach(struct knote *kn);
+static int filt_vm(struct knote *kn, long hint);
+static struct filterops vm_filtops = {
+	.f_attach = filt_vmattach,
+	.f_detach = filt_vmdetach,
+	.f_event = filt_vm,
+};
+
 extern struct filterops fs_filtops;
 
 extern struct filterops sig_filtops;
@@ -238,11 +252,6 @@ static struct filterops user_filtops = {
         .f_touch = filt_usertouch,
 };
 
-#if CONFIG_AUDIT
-/* Audit session filter */
-extern struct filterops audit_session_filtops;
-#endif
-
 /*
  * Table for for all system-defined filters.
  */
@@ -261,11 +270,8 @@ static struct filterops *sysfilt_ops[] = {
 	&machport_filtops,		/* EVFILT_MACHPORT */
 	&fs_filtops,			/* EVFILT_FS */
 	&user_filtops,			/* EVFILT_USER */
-#if CONFIG_AUDIT
-	&audit_session_filtops,		/* EVFILT_SESSION */
-#else
-	&bad_filtops,
-#endif
+	&bad_filtops,			/* unused */
+	&vm_filtops,			/* EVFILT_VM */
 };
 
 /*
@@ -455,6 +461,7 @@ static int
 filt_procattach(struct knote *kn)
 {
 	struct proc *p;
+	pid_t selfpid = (pid_t)0;
 
 	assert(PID_MAX < NOTE_PDATAMASK);
 	
@@ -466,6 +473,16 @@ filt_procattach(struct knote *kn)
 		return (ESRCH);
 	}
 
+	if ((kn->kn_sfflags & NOTE_EXIT) != 0) {
+		selfpid = proc_selfpid();
+		/* check for validity of NOTE_EXISTATUS */
+		if (((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) && 
+			((p->p_ppid != selfpid) && (((p->p_lflag & P_LTRACED) == 0) || (p->p_oppid != selfpid)))) {
+			proc_rele(p);
+			return(EACCES);
+		}
+	}
+
 	proc_klist_lock();
 
 	kn->kn_flags |= EV_CLEAR;	/* automatically set */
@@ -524,12 +541,57 @@ filt_proc(struct knote *kn, long hint)
 		if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) {
 			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		}
+		if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) {
+			kn->kn_fflags |= NOTE_EXITSTATUS;
+			kn->kn_data = (hint & NOTE_PDATAMASK);
+		}
+		if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) {
+			kn->kn_fflags |= NOTE_RESOURCEEND;
+			kn->kn_data = (hint & NOTE_PDATAMASK);
+		}
 	}
 
 	/* atomic check, no locking need when called from above */
 	return (kn->kn_fflags != 0); 
 }
 
+/*
+ * Virtual memory kevents
+ *
+ * author: Matt Jacobson [matthew_jacobson@apple.com]
+ */
+
+static int
+filt_vmattach(struct knote *kn)
+{	
+	/* 
+	 * The note will be cleared once the information has been flushed to the client. 
+	 * If there is still pressure, we will be re-alerted.
+	 */
+	kn->kn_flags |= EV_CLEAR; 
+	
+	return vm_knote_register(kn);
+}
+
+static void
+filt_vmdetach(struct knote *kn)
+{
+	vm_knote_unregister(kn);
+}
+
+static int
+filt_vm(struct knote *kn, long hint)
+{
+	/* hint == 0 means this is just an alive? check (always true) */
+	if (hint != 0) { 
+		/* If this knote is interested in the event specified in hint... */
+		if ((kn->kn_sfflags & hint) != 0) { 
+			kn->kn_fflags |= hint;
+		}
+	}
+	
+	return (kn->kn_fflags != 0);
+}
 
 /*
  * filt_timervalidate - process data from user
@@ -872,7 +934,7 @@ filt_userattach(struct knote *kn)
 {
         /* EVFILT_USER knotes are not attached to anything in the kernel */
         kn->kn_hook = NULL;
-	if (kn->kn_fflags & NOTE_TRIGGER || kn->kn_flags & EV_TRIGGER) {
+	if (kn->kn_fflags & NOTE_TRIGGER) {
 		kn->kn_hookid = 1;
 	} else {
 		kn->kn_hookid = 0;
@@ -895,10 +957,10 @@ filt_user(struct knote *kn, __unused long hint)
 static void
 filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type)
 {
-        int ffctrl;
+        uint32_t ffctrl;
         switch (type) {
         case EVENT_REGISTER:
-                if (kev->fflags & NOTE_TRIGGER || kev->flags & EV_TRIGGER) {
+                if (kev->fflags & NOTE_TRIGGER) {
                         kn->kn_hookid = 1;
                 }
 
@@ -1511,6 +1573,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
 			error = fops->f_attach(kn);
 
 			kqlock(kq);
+
 			if (error != 0) {
 				/*
 				 * Failed to attach correctly, so drop.
@@ -1594,11 +1657,6 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc
 		 */
 		if (!fops->f_isfd && fops->f_touch != NULL)
 		        fops->f_touch(kn, kev, EVENT_REGISTER);
-
-		/* We may need to push some info down to a networked filesystem */
-		if (kn->kn_filter == EVFILT_VNODE) {
-			vnode_knoteupdate(kn);
-		}
 	}
 	/* still have use ref on knote */
 
@@ -1770,6 +1828,47 @@ knote_process(struct knote 	*kn,
 	return error;
 }
 
+/*
+ * Return 0 to indicate that processing should proceed,
+ * -1 if there is nothing to process.
+ *
+ * Called with kqueue locked and returns the same way,
+ * but may drop lock temporarily.
+ */
+static int
+kqueue_begin_processing(struct kqueue *kq)
+{
+	for (;;) {
+		if (kq->kq_count == 0) {
+			return -1;
+		}
+
+		/* if someone else is processing the queue, wait */
+		if (kq->kq_nprocess != 0) {
+			wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
+			kq->kq_state |= KQ_PROCWAIT;
+			kqunlock(kq);
+			thread_block(THREAD_CONTINUE_NULL);
+			kqlock(kq);
+		} else {
+			kq->kq_nprocess = 1;
+			return 0;
+		}
+	}
+}
+
+/*
+ * Called with kqueue lock held.
+ */
+static void
+kqueue_end_processing(struct kqueue *kq)
+{
+	kq->kq_nprocess = 0;
+	if (kq->kq_state & KQ_PROCWAIT) {
+		kq->kq_state &= ~KQ_PROCWAIT;
+		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
+	}
+}
 
 /*
  * kqueue_process - process the triggered events in a kqueue
@@ -1799,23 +1898,13 @@ kqueue_process(struct kqueue *kq,
 	int error;
 
         TAILQ_INIT(&inprocess);
- restart:
-	if (kq->kq_count == 0) {
+
+	if (kqueue_begin_processing(kq) == -1) {
 		*countp = 0;
+		/* Nothing to process */
 		return 0;
 	}
 
-	/* if someone else is processing the queue, wait */
-	if (hw_atomic_add(&kq->kq_nprocess, 1) != 1) {
-	        hw_atomic_sub(&kq->kq_nprocess, 1);
-		wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0);
-		kq->kq_state |= KQ_PROCWAIT;
-		kqunlock(kq);
-		thread_block(THREAD_CONTINUE_NULL);
-		kqlock(kq);
-		goto restart;
-	}
-
 	/*
 	 * Clear any pre-posted status from previous runs, so we only
 	 * detect events that occur during this run.
@@ -1850,11 +1939,8 @@ kqueue_process(struct kqueue *kq,
 		kn->kn_tq = &kq->kq_head;
 		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}
-	hw_atomic_sub(&kq->kq_nprocess, 1);
-	if (kq->kq_state & KQ_PROCWAIT) {
-		kq->kq_state &= ~KQ_PROCWAIT;
-		wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED);
-	}
+
+	kqueue_end_processing(kq);
 
 	*countp = nevents;
 	return error;
@@ -2044,11 +2130,15 @@ static int
 kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
 {
 	struct kqueue *kq = (struct kqueue *)fp->f_data;
-	int again;
-
+	struct knote *kn;
+	struct kqtailq inprocessq;
+	int retnum = 0;
+	
 	if (which != FREAD)
 		return 0;
 
+	TAILQ_INIT(&inprocessq);
+
 	kqlock(kq);
 	/* 
 	 * If this is the first pass, link the wait queue associated with the
@@ -2067,11 +2157,12 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
 					(wait_queue_link_t)wql);
 	}
 
- retry:
-	again = 0;
-	if (kq->kq_count != 0) {
-		struct knote *kn;
+	if (kqueue_begin_processing(kq) == -1) {
+		kqunlock(kq);
+		return 0;
+	}
 
+	if (kq->kq_count != 0) {
 		/*
 		 * there is something queued - but it might be a
 		 * KN_STAYQUEUED knote, which may or may not have
@@ -2079,31 +2170,42 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t
 		 * list of knotes to see, and peek at the stay-
 		 * queued ones to be really sure.
 		 */
-		TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
-			int retnum = 0;
-			if ((kn->kn_status & KN_STAYQUEUED) == 0 ||
-			    (retnum = kn->kn_fop->f_peek(kn)) > 0) {
-				kqunlock(kq);
-				return 1;
+		while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) {
+			if ((kn->kn_status & KN_STAYQUEUED) == 0) {
+				retnum = 1;
+				goto out;
 			}
-			if (retnum < 0)
-				again++;
+
+			TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+			TAILQ_INSERT_TAIL(&inprocessq, kn, kn_tqe);
+
+			if (kqlock2knoteuse(kq, kn)) {
+				unsigned peek;
+
+				peek = kn->kn_fop->f_peek(kn);
+				if (knoteuse2kqlock(kq, kn)) {
+					if (peek > 0) {
+						retnum = 1;
+						goto out;
+					}
+				} else {
+					retnum = 0;
+				}
+			} 
 		}
 	}
 
-	/*
-	 * If we stumbled across a knote that couldn't be peeked at,
-	 * we have to drop the kq lock and try again.
-	 */
-	if (again > 0) {
-		kqunlock(kq);
-		mutex_pause(0);
-		kqlock(kq);
-		goto retry;
+out:
+	/* Return knotes to active queue */
+	while ((kn = TAILQ_FIRST(&inprocessq)) != NULL) {
+		TAILQ_REMOVE(&inprocessq, kn, kn_tqe);
+		kn->kn_tq = &kq->kq_head;
+		TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	}
 
+	kqueue_end_processing(kq);
 	kqunlock(kq);
-	return 0;
+	return retnum;
 }
 
 /*
@@ -2312,10 +2414,7 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq)
 
 	kr = wait_queue_link(wq, kq->kq_wqs);
 	if (kr == KERN_SUCCESS) {
-		kqlock(kq);
-		kn->kn_status |= KN_STAYQUEUED;
-		knote_enqueue(kn);
-		kqunlock(kq);
+		knote_markstayqueued(kn);
 		return 0;
 	} else {
 		return ENOMEM;
@@ -2531,6 +2630,7 @@ knote_init(void)
 
 	/* Initialize the timer filter lock */
 	lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr);
+	lck_mtx_init(&vm_pressure_klist_mutex, kq_lck_grp, kq_lck_attr);
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
 
@@ -2843,3 +2943,12 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo)
 	return(0);
 }
 
+
+void
+knote_markstayqueued(struct knote *kn)
+{
+	kqlock(kn->kn_kq);
+	kn->kn_status |= KN_STAYQUEUED;
+	knote_enqueue(kn);
+	kqunlock(kn->kn_kq);
+}
diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c
index 722415a70..3de273b36 100644
--- a/bsd/kern/kern_exec.c
+++ b/bsd/kern/kern_exec.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -139,6 +139,7 @@
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
 
+#include <machine/pal_routines.h>
 
 #if CONFIG_DTRACE
 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
@@ -153,8 +154,7 @@ extern void dtrace_lazy_dofs_destroy(proc_t);
 thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit);
 void vfork_exit(proc_t p, int rv);
 int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart);
-void workqueue_exit(struct proc *);
-
+extern void proc_apply_task_networkbg_internal(proc_t);
 
 /*
  * Mach things for which prototypes are unavailable from Mach headers
@@ -186,16 +186,6 @@ extern struct savearea *get_user_regs(thread_t);
 #include <sys/sdt.h>
 
 
-/*
- * SIZE_MAXPTR		The maximum size of a user space pointer, in bytes
- * SIZE_IMG_STRSPACE	The available string space, minus two pointers; we
- *			define it interms of the maximum, since we don't
- *			know the pointer size going in, until after we've
- *			parsed the executable image.
- */
-#define	SIZE_MAXPTR		8				/* 64 bits */
-#define	SIZE_IMG_STRSPACE	(NCARGS - 2 * SIZE_MAXPTR)
-
 /*
  * EAI_ITERLIMIT	The maximum number of times to iterate an image
  *			activator in exec_activate_image() before treating
@@ -203,6 +193,12 @@ extern struct savearea *get_user_regs(thread_t);
  */
 #define EAI_ITERLIMIT		10
 
+/*
+ * For #! interpreter parsing
+ */
+#define IS_WHITESPACE(ch) ((ch == ' ') || (ch == '\t'))
+#define IS_EOL(ch) ((ch == '#') || (ch == '\n'))
+
 extern vm_map_t bsd_pageable_map;
 extern struct fileops vnops;
 
@@ -218,9 +214,10 @@ static int execargs_alloc(struct image_params *imgp);
 static int execargs_free(struct image_params *imgp);
 static int exec_check_permissions(struct image_params *imgp);
 static int exec_extract_strings(struct image_params *imgp);
+static int exec_add_apple_strings(struct image_params *imgp);
 static int exec_handle_sugid(struct image_params *imgp);
 static int sugid_scripts = 0;
-SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, "");
+SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW | CTLFLAG_LOCKED, &sugid_scripts, 0, "");
 static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
 					int customstack, proc_t p);
 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
@@ -232,12 +229,14 @@ __private_extern__
 int  open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
 
 /*
- * exec_add_string
+ * exec_add_user_string
  *
  * Add the requested string to the string space area.
  *
  * Parameters;	struct image_params *		image parameter block
  *		user_addr_t			string to add to strings area
+ *		int				segment from which string comes
+ *		boolean_t			TRUE if string contributes to NCARGS
  *
  * Returns:	0			Success
  *		!0			Failure errno from copyinstr()
@@ -245,29 +244,41 @@ int  open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t
  * Implicit returns:
  *		(imgp->ip_strendp)	updated location of next add, if any
  *		(imgp->ip_strspace)	updated byte count of space remaining
+ *		(imgp->ip_argspace) updated byte count of space in NCARGS
  */
 static int
-exec_add_string(struct image_params *imgp, user_addr_t str)
+exec_add_user_string(struct image_params *imgp, user_addr_t str, int seg, boolean_t is_ncargs)
 {
-        int error = 0;
-
-        do {
-                size_t len = 0;
-		if (imgp->ip_strspace <= 0) {
+	int error = 0;
+	
+	do {
+		size_t len = 0;
+		int space;
+		
+		if (is_ncargs)
+			space = imgp->ip_argspace; /* by definition smaller than ip_strspace */
+		else
+			space = imgp->ip_strspace;
+		
+		if (space <= 0) {
 			error = E2BIG;
 			break;
 		}
-		if (!UIO_SEG_IS_USER_SPACE(imgp->ip_seg)) {
+		
+		if (!UIO_SEG_IS_USER_SPACE(seg)) {
 			char *kstr = CAST_DOWN(char *,str);	/* SAFE */
-			error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len);
+			error = copystr(kstr, imgp->ip_strendp, space, &len);
 		} else  {
-			error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace,
-			    &len);
+			error = copyinstr(str, imgp->ip_strendp, space, &len);
 		}
+
 		imgp->ip_strendp += len;
 		imgp->ip_strspace -= len;
+		if (is_ncargs)
+			imgp->ip_argspace -= len;
+		
 	} while (error == ENAMETOOLONG);
-
+	
 	return error;
 }
 
@@ -277,11 +288,10 @@ exec_add_string(struct image_params *imgp, user_addr_t str)
  * To support new app package launching for Mac OS X, the dyld needs the
  * first argument to execve() stored on the user stack.
  *
- * Save the executable path name at the top of the strings area and set
+ * Save the executable path name at the bottom of the strings area and set
  * the argument vector pointer to the location following that to indicate
  * the start of the argument and environment tuples, setting the remaining
- * string space count to the size of the string area minus the path length
- * and a reserve for two pointers.
+ * string space count to the size of the string area minus the path length.
  *
  * Parameters;	struct image_params *		image parameter block
  *		char *				path used to invoke program
@@ -295,8 +305,9 @@ exec_add_string(struct image_params *imgp, user_addr_t str)
  * Implicit returns:
  *		(imgp->ip_strings)		saved path
  *		(imgp->ip_strspace)		space remaining in ip_strings
- *		(imgp->ip_argv)			beginning of argument list
  *		(imgp->ip_strendp)		start of remaining copy area
+ *		(imgp->ip_argspace)		space remaining of NCARGS
+ *		(imgp->ip_applec)		Initial applev[0]
  *
  * Note:	We have to do this before the initial namei() since in the
  *		path contains symbolic links, namei() will overwrite the
@@ -310,10 +321,7 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
 {
 	int error;
 	size_t	len;
-	char *kpath = CAST_DOWN(char *,path);	/* SAFE */
-
-	imgp->ip_strendp = imgp->ip_strings;
-	imgp->ip_strspace = SIZE_IMG_STRSPACE;
+	char *kpath;
 
 	len = MIN(MAXPATHLEN, imgp->ip_strspace);
 
@@ -323,6 +331,7 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
 		error = copyinstr(path, imgp->ip_strings, len, &len);
 		break;
 	case UIO_SYSSPACE:
+		kpath = CAST_DOWN(char *,path);	/* SAFE */
 		error = copystr(kpath, imgp->ip_strings, len, &len);
 		break;
 	default:
@@ -333,12 +342,38 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg)
 	if (!error) {
 		imgp->ip_strendp += len;
 		imgp->ip_strspace -= len;
-		imgp->ip_argv = imgp->ip_strendp;
 	}
 
 	return(error);
 }
 
+/*
+ * exec_reset_save_path
+ *
+ * If we detect a shell script, we need to reset the string area
+ * state so that the interpreter can be saved onto the stack.
+
+ * Parameters;	struct image_params *		image parameter block
+ *
+ * Returns:	int			0	Success
+ *
+ * Implicit returns:
+ *		(imgp->ip_strings)		saved path
+ *		(imgp->ip_strspace)		space remaining in ip_strings
+ *		(imgp->ip_strendp)		start of remaining copy area
+ *		(imgp->ip_argspace)		space remaining of NCARGS
+ *
+ */
+static int
+exec_reset_save_path(struct image_params *imgp)
+{
+	imgp->ip_strendp = imgp->ip_strings;
+	imgp->ip_argspace = NCARGS;
+	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
+
+	return (0);
+}
+
 #ifdef IMGPF_POWERPC
 /*
  * exec_powerpc32_imgact
@@ -406,11 +441,15 @@ exec_powerpc32_imgact(struct image_params *imgp)
 	imgp->ip_flags |= IMGPF_POWERPC;
 
 	/* impute an interpreter */
-	error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_name,
+	error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_buffer,
 			IMG_SHSIZE, &len);
 	if (error)
 		return (error);
 
+	exec_reset_save_path(imgp);
+	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
+				   UIO_SYSSPACE);
+	
 	/*
 	 * provide a replacement string for p->p_comm; we have to use an
 	 * alternate buffer for this, rather than replacing it directly,
@@ -451,14 +490,12 @@ exec_shell_imgact(struct image_params *imgp)
 {
 	char *vdata = imgp->ip_vdata;
 	char *ihp;
-	char *line_endp;
+	char *line_startp, *line_endp;
 	char *interp;
-	char temp[16];
 	proc_t p;
 	struct fileproc *fp;
 	int fd;
 	int error;
-	size_t len;
 
 	/*
 	 * Make sure it's a shell script.  If we've already redirected
@@ -480,65 +517,82 @@ exec_shell_imgact(struct image_params *imgp)
 #endif	/* IMGPF_POWERPC */
 
 	imgp->ip_flags |= IMGPF_INTERPRET;
+	imgp->ip_interp_sugid_fd = -1;
+	imgp->ip_interp_buffer[0] = '\0';
 
-        /* Check to see if SUGID scripts are permitted.  If they aren't then
+	/* Check to see if SUGID scripts are permitted.  If they aren't then
 	 * clear the SUGID bits.
 	 * imgp->ip_vattr is known to be valid.
-         */
-        if (sugid_scripts == 0) {
-	   imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
+	 */
+	if (sugid_scripts == 0) {
+		imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
 	}
 
-	/* Find the nominal end of the interpreter line */
-	for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) {
-		if (ihp >= &vdata[IMG_SHSIZE])
+	/* Try to find the first non-whitespace character */
+	for( ihp = &vdata[2]; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
+		if (IS_EOL(*ihp)) {
+			/* Did not find interpreter, "#!\n" */
 			return (ENOEXEC);
+		} else if (IS_WHITESPACE(*ihp)) {
+			/* Whitespace, like "#!    /bin/sh\n", keep going. */
+		} else {
+			/* Found start of interpreter */
+			break;
+		}
 	}
 
-	line_endp = ihp;
-	ihp = &vdata[2];
-	/* Skip over leading spaces - until the interpreter name */
-	while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t')))
-		ihp++;
+	if (ihp == &vdata[IMG_SHSIZE]) {
+		/* All whitespace, like "#!           " */
+		return (ENOEXEC);
+	}
 
-	/*
-	 * Find the last non-whitespace character before the end of line or
-	 * the beginning of a comment; this is our new end of line.
-	 */
-	for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--)
-		continue;
+	line_startp = ihp;
+
+	/* Try to find the end of the interpreter+args string */
+	for ( ; ihp < &vdata[IMG_SHSIZE]; ihp++ ) {
+		if (IS_EOL(*ihp)) {
+			/* Got it */
+			break;
+		} else {
+			/* Still part of interpreter or args */
+		}
+	}
 
-	/* Empty? */
-	if (line_endp == ihp)
+	if (ihp == &vdata[IMG_SHSIZE]) {
+		/* A long line, like "#! blah blah blah" without end */
 		return (ENOEXEC);
+	}
+
+	/* Backtrack until we find the last non-whitespace */
+	while (IS_EOL(*ihp) || IS_WHITESPACE(*ihp)) {
+		ihp--;
+	}
+
+	/* The character after the last non-whitespace is our logical end of line */
+	line_endp = ihp + 1;
+
+	/*
+	 * Now we have pointers to the usable part of:
+	 *
+	 * "#!  /usr/bin/int first    second   third    \n"
+	 *      ^ line_startp                       ^ line_endp
+	 */
 
 	/* copy the interpreter name */
-	interp = imgp->ip_interp_name;
-	while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t'))
-		*interp++ = *ihp++;
+	interp = imgp->ip_interp_buffer;
+	for ( ihp = line_startp; (ihp < line_endp) && !IS_WHITESPACE(*ihp); ihp++)
+		*interp++ = *ihp;
 	*interp = '\0';
 
-	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name),
+	exec_reset_save_path(imgp);
+	exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_buffer),
 							UIO_SYSSPACE);
 
-	ihp = &vdata[2];
-	while (ihp < line_endp) {
-		/* Skip leading whitespace before each argument */
-		while ((*ihp == ' ') || (*ihp == '\t'))
-			ihp++;
-
-		if (ihp >= line_endp)
-			break;
-
-		/* We have an argument; copy it */
-		while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) {  
-			*imgp->ip_strendp++ = *ihp++;
-			imgp->ip_strspace--;
-		}
-		*imgp->ip_strendp++ = 0;
-		imgp->ip_strspace--;
-		imgp->ip_argc++;
-	}
+	/* Copy the entire interpreter + args for later processing into argv[] */
+	interp = imgp->ip_interp_buffer;
+	for ( ihp = line_startp; (ihp < line_endp); ihp++)
+		*interp++ = *ihp;
+	*interp = '\0';
 
 	/*
 	 * If we have a SUID oder SGID script, create a file descriptor
@@ -562,10 +616,7 @@ exec_shell_imgact(struct image_params *imgp)
 		proc_fdunlock(p);
 		vnode_ref(imgp->ip_vp);
 
-		snprintf(temp, sizeof(temp), "/dev/fd/%d", fd);
-		error = copyoutstr(temp, imgp->ip_user_fname, sizeof(temp), &len);
-		if (error)
-			return(error);
+		imgp->ip_interp_sugid_fd = fd;
 	}
 
 	return (-3);
@@ -736,6 +787,7 @@ exec_mach_imgact(struct image_params *imgp)
 	load_result_t		load_result;
 	struct _posix_spawnattr *psa = NULL;
 	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
+	int apptype = 0;
 
 	/*
 	 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
@@ -766,7 +818,7 @@ exec_mach_imgact(struct image_params *imgp)
 
 	/*
 	 * Save off the vfexec state up front; we have to do this, because
-	 * we need to know if we were in this state initally subsequent to
+	 * we need to know if we were in this state initially subsequent to
 	 * creating the backing task, thread, and uthread for the child
 	 * process (from the vfs_context_t from in img_parms).
 	 */
@@ -813,20 +865,14 @@ grade:
 	if (error)
 		goto bad;
 
-	AUDIT_ARG(argv, imgp->ip_argv, imgp->ip_argc, 
-	    imgp->ip_strendargvp - imgp->ip_argv);
-	AUDIT_ARG(envv, imgp->ip_strendargvp, imgp->ip_envc,
-	    imgp->ip_strendp - imgp->ip_strendargvp);
+	error = exec_add_apple_strings(imgp);
+	if (error)
+		goto bad;
 
-	/*
-	 * Hack for binary compatability; put three NULs on the end of the
-	 * string area, and round it up to the next word boundary.  This
-	 * ensures padding with NULs to the boundary.
-	 */
-	imgp->ip_strendp[0] = 0;
-	imgp->ip_strendp[1] = 0;
-	imgp->ip_strendp[2] = 0;
-	imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1));
+	AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc, 
+	    imgp->ip_endargv - imgp->ip_startargv);
+	AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
+	    imgp->ip_endenvv - imgp->ip_endargv);
 
 #ifdef IMGPF_POWERPC
 	/*
@@ -838,7 +884,7 @@ grade:
 	 * to the "encapsulated_binary:" label in exec_activate_image().
 	 */
 	if (imgp->ip_vattr->va_fsid == exec_archhandler_ppc.fsid &&
-		imgp->ip_vattr->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
+		imgp->ip_vattr->va_fileid == exec_archhandler_ppc.fileid) {
 		imgp->ip_flags |= IMGPF_POWERPC;
 	}
 #endif	/* IMGPF_POWERPC */
@@ -846,7 +892,7 @@ grade:
 	/*
 	 * We are being called to activate an image subsequent to a vfork()
 	 * operation; in this case, we know that our task, thread, and
-	 * uthread are actualy those of our parent, and our proc, which we
+	 * uthread are actually those of our parent, and our proc, which we
 	 * obtained indirectly from the image_params vfs_context_t, is the
 	 * new child process.
 	 */
@@ -885,7 +931,7 @@ grade:
 	 *	Load the Mach-O file.
 	 *
 	 * NOTE: An error after this point  indicates we have potentially
-	 * destroyed or overwrote some process state while attempting an
+	 * destroyed or overwritten some process state while attempting an
 	 * execve() following a vfork(), which is an unrecoverable condition.
 	 */
 
@@ -932,10 +978,9 @@ grade:
 		    cpu_type());
 	
 	/*
-	 * Close file descriptors
-	 * which specify close-on-exec.
+	 * Close file descriptors which specify close-on-exec.
 	 */
-	fdexec(p);
+	fdexec(p, psa != NULL ? psa->psa_flags : 0);
 
 	/*
 	 * deal with set[ug]id.
@@ -959,14 +1004,6 @@ grade:
 		goto badtoolate;
 	}
 
-	/*  
-	 * There is no  continuing workq context during 
-	 * vfork exec. So no need to reset then. Otherwise
-	 * clear the workqueue context.
-	 */
-	if (vfexec == 0 && spawn == 0) {
-		(void)workqueue_exit(p);
-	}
 	if (vfexec || spawn) {
 		old_map = vm_map_switch(get_task_map(task));
 	}
@@ -991,15 +1028,12 @@ grade:
 	
 	if (load_result.dynlinker) {
 		uint64_t	ap;
+		int			new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
 
 		/* Adjust the stack */
-		if (imgp->ip_flags & IMGPF_IS_64BIT) {
-			ap = thread_adjuserstack(thread, -8);
-			error = copyoutptr(load_result.mach_header, ap, 8);
-		} else {
-			ap = thread_adjuserstack(thread, -4);
-			error = suword(ap, load_result.mach_header);
-		}
+		ap = thread_adjuserstack(thread, -new_ptr_size);
+		error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
+
 		if (error) {
 		        if (vfexec || spawn)
 			        vm_map_switch(old_map);
@@ -1058,6 +1092,8 @@ grade:
 		p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
 	}
 
+	pal_dbg_set_task_name( p->task );
+
 	memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
 
 // <rdar://6598155> dtrace code cleanup needed
@@ -1143,6 +1179,22 @@ grade:
 			proc_unlock(p);
 			(void) task_suspend(p->task);
 		}
+		if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START) || (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)) {
+			if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START))
+				apptype = PROC_POLICY_OSX_APPTYPE_TAL;
+			else if (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START)
+				apptype = PROC_POLICY_OSX_APPTYPE_DBCLIENT;
+			else if (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)
+				apptype = PROC_POLICY_IOS_APPTYPE;
+			else
+				apptype = 0;
+			proc_set_task_apptype(p->task, apptype);
+			if ((apptype == PROC_POLICY_OSX_APPTYPE_TAL) || 
+				(apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) {
+
+				proc_apply_task_networkbg_internal(p);
+			}
+		}
 	}
 
 	/*
@@ -1245,21 +1297,16 @@ exec_activate_image(struct image_params *imgp)
 	if (error)
 		goto bad;
 	
-	/*
-	 * XXXAUDIT: Note: the double copyin introduces an audit
-	 * race.  To correct this race, we must use a single
-	 * copyin(), e.g. by passing a flag to namei to indicate an
-	 * external path buffer is being used.
-	 */
 	error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
 	if (error) {
 		goto bad_notrans;
 	}
 
+	/* Use imgp->ip_strings, which contains the copyin-ed exec path */
 	DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
-		imgp->ip_seg, imgp->ip_user_fname, imgp->ip_vfs_context);
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+		   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
 
 again:
 	error = namei(&nd);
@@ -1268,7 +1315,20 @@ again:
 	imgp->ip_ndp = &nd;	/* successful namei(); call nameidone() later */
 	imgp->ip_vp = nd.ni_vp;	/* if set, need to vnode_put() at some point */
 
-	error = proc_transstart(p, 0);
+	/*
+	 * Before we start the transition from binary A to binary B, make
+	 * sure another thread hasn't started exiting the process.  We grab
+	 * the proc lock to check p_lflag initially, and the transition
+	 * mechanism ensures that the value doesn't change after we release
+	 * the lock.
+	 */
+	proc_lock(p);
+	if (p->p_lflag & P_LEXIT) {
+		proc_unlock(p);
+		goto bad_notrans;
+	}
+	error = proc_transstart(p, 1);
+	proc_unlock(p);
 	if (error)
 		goto bad_notrans;
 
@@ -1322,11 +1382,16 @@ encapsulated_binary:
 			mac_vnode_label_copy(imgp->ip_vp->v_label,
 					     imgp->ip_scriptlabelp);
 #endif
+
+			nameidone(&nd);
+
 			vnode_put(imgp->ip_vp);
 			imgp->ip_vp = NULL;	/* already put */
-                
-			NDINIT(&nd, LOOKUP, (nd.ni_cnd.cn_flags & HASBUF) | (FOLLOW | LOCKLEAF),
-				UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_interp_name), imgp->ip_vfs_context);
+			imgp->ip_ndp = NULL; /* already nameidone */
+
+			/* Use imgp->ip_strings, which exec_shell_imgact reset to the interpreter */
+			NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
+				   UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_strings), imgp->ip_vfs_context);
 
 #ifdef IMGPF_POWERPC
 			/*
@@ -1379,10 +1444,10 @@ bad_notrans:
  * 		short psa_flags		posix spawn attribute flags
  *
  * Returns:	0			Success
- * 		KERN_FAILURE		Failure
+ * 		EINVAL			Failure
  * 		ENOTSUP			Illegal posix_spawn attr flag was set
  */
-static int
+static errno_t
 exec_handle_port_actions(struct image_params *imgp, short psa_flags)
 {
 	_posix_spawn_port_actions_t pacts = imgp->ip_px_spa;
@@ -1390,16 +1455,17 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags)
 	_ps_port_action_t *act = NULL;
 	task_t task = p->task;
 	ipc_port_t port = NULL;
-	kern_return_t ret = KERN_SUCCESS;
+	errno_t ret = KERN_SUCCESS;
 	int i;
 
 	for (i = 0; i < pacts->pspa_count; i++) {
 		act = &pacts->pspa_actions[i];
 
-		ret = ipc_object_copyin(get_task_ipcspace(current_task()),
+		if (ipc_object_copyin(get_task_ipcspace(current_task()),
 				CAST_MACH_PORT_TO_NAME(act->new_port),
 				MACH_MSG_TYPE_COPY_SEND,
-				(ipc_object_t *) &port);
+				(ipc_object_t *) &port) != KERN_SUCCESS)
+			return EINVAL;
 
 		if (ret) 			
 			return ret;
@@ -1409,19 +1475,19 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags)
 				/* Only allowed when not under vfork */
 				if (!(psa_flags & POSIX_SPAWN_SETEXEC))
 					return ENOTSUP;
-				ret = task_set_special_port(task, 
+				ret = (task_set_special_port(task, 
 						act->which, 
-						port);
+						port) == KERN_SUCCESS) ? 0 : EINVAL;
 				break;
 			case PSPA_EXCEPTION:
 				/* Only allowed when not under vfork */
 				if (!(psa_flags & POSIX_SPAWN_SETEXEC))
 					return ENOTSUP;
-				ret = task_set_exception_ports(task, 
+				ret = (task_set_exception_ports(task, 
 						act->mask,
 						port, 
 						act->behavior, 
-						act->flavor);
+						act->flavor) == KERN_SUCCESS) ? 0 : EINVAL;
 				break;
 #if CONFIG_AUDIT
 			case PSPA_AU_SESSION:
@@ -1430,7 +1496,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags)
 				break;
 #endif
 			default:
-				ret = KERN_FAILURE;
+				ret = EINVAL;
 		}
 		/* action failed, so release port resources */
 		if (ret) { 
@@ -1461,7 +1527,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags)
  *		normally permitted to perform.
  */
 static int
-exec_handle_file_actions(struct image_params *imgp)
+exec_handle_file_actions(struct image_params *imgp, short psa_flags)
 {
 	int error = 0;
 	int action;
@@ -1479,7 +1545,7 @@ exec_handle_file_actions(struct image_params *imgp)
 			 * a path argument, which is normally copied in from
 			 * user space; because of this, we have to support an
 			 * open from kernel space that passes an address space
-			 * context oof UIO_SYSSPACE, and casts the address
+			 * context of UIO_SYSSPACE, and casts the address
 			 * argument to a user_addr_t.
 			 */
 			struct vnode_attr va;
@@ -1494,7 +1560,7 @@ exec_handle_file_actions(struct image_params *imgp)
 			mode = ((mode &~ p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 			VATTR_SET(&va, va_mode, mode & ACCESSPERMS);
 
-			NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
+			NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_SYSSPACE,
 			       CAST_USER_ADDR_T(psfa->psfaa_openargs.psfao_path),
 			       imgp->ip_vfs_context);
 
@@ -1506,8 +1572,8 @@ exec_handle_file_actions(struct image_params *imgp)
 
 			/*
 			 * If there's an error, or we get the right fd by
-			 * accident, then drop out here.  This is easier that
-			 * rearchitecting all the open code to preallocate fd
+			 * accident, then drop out here.  This is easier than
+			 * reworking all the open code to preallocate fd
 			 * slots, and internally taking one as an argument.
 			 */
 			if (error || ival[0] == psfa->psfaa_filedes)
@@ -1566,16 +1632,68 @@ exec_handle_file_actions(struct image_params *imgp)
 			}
 			break;
 
+		case PSFA_INHERIT: {
+			struct fileproc *fp;
+			int fd = psfa->psfaa_filedes;
+
+			/*
+			 * Check to see if the descriptor exists, and
+			 * ensure it's -not- marked as close-on-exec.
+			 * [Less code than the equivalent F_GETFD/F_SETFD.]
+			 */
+			proc_fdlock(p);
+			if ((error = fp_lookup(p, fd, &fp, 1)) == 0) {
+				*fdflags(p, fd) &= ~UF_EXCLOSE;
+				(void) fp_drop(p, fd, fp, 1);
+			}
+			proc_fdunlock(p);
+			}
+			break;
+
 		default:
 			error = EINVAL;
 			break;
 		}
+
 		/* All file actions failures are considered fatal, per POSIX */
+
 		if (error)
 			break;
 	}
 
-	return (error);
+	if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0)
+		return (error);
+
+	/*
+	 * If POSIX_SPAWN_CLOEXEC_DEFAULT is set, behave (during
+	 * this spawn only) as if "close on exec" is the default
+	 * disposition of all pre-existing file descriptors.  In this case,
+	 * the list of file descriptors mentioned in the file actions
+	 * are the only ones that can be inherited, so mark them now.
+	 *
+	 * The actual closing part comes later, in fdexec().
+	 */
+	proc_fdlock(p);
+	for (action = 0; action < px_sfap->psfa_act_count; action++) {
+		_psfa_action_t *psfa = &px_sfap->psfa_act_acts[action];
+		int fd = psfa->psfaa_filedes;
+
+		switch (psfa->psfaa_type) {
+		case PSFA_DUP2:
+			fd = psfa->psfaa_openargs.psfao_oflag;
+			/*FALLTHROUGH*/
+		case PSFA_OPEN:
+		case PSFA_INHERIT:
+			*fdflags(p, fd) |= UF_INHERIT;
+			break;
+
+		case PSFA_CLOSE:
+			break;
+		}
+	}
+	proc_fdunlock(p);
+
+	return (0);
 }
 
 
@@ -1628,10 +1746,12 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	_posix_spawn_port_actions_t px_spap = NULL;
 	struct __kern_sigaction vec;
 	boolean_t spawn_no_exec = FALSE;
+	boolean_t proc_transit_set = TRUE;
+	boolean_t exec_done = FALSE;
 
 	/*
 	 * Allocate a big chunk for locals instead of using stack since these  
-	 * structures a pretty big.
+	 * structures are pretty big.
 	 */
 	MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
 	imgp = (struct image_params *) bufp;
@@ -1740,7 +1860,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	}
 
 	/*
-	 * If we don't have the extention flag that turns "posix_spawn()"
+	 * If we don't have the extension flag that turns "posix_spawn()"
 	 * into "execve() with options", then we will be creating a new
 	 * process which does not inherit memory from the parent process,
 	 * which is one of the most expensive things about using fork()
@@ -1755,7 +1875,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 
 	if (spawn_no_exec)
 		p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread);
-
+	assert(p != NULL);
 
 	/* By default, the thread everyone plays with is the parent */
 	context.vc_thread = current_thread();
@@ -1768,17 +1888,22 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	if (spawn_no_exec)
 		context.vc_thread = imgp->ip_new_thread;
 
-
 	/*
 	 * Post fdcopy(), pre exec_handle_sugid() - this is where we want
 	 * to handle the file_actions.  Since vfork() also ends up setting
 	 * us into the parent process group, and saved off the signal flags,
 	 * this is also where we want to handle the spawn flags.
 	 */
+
 	/* Has spawn file actions? */
-	if (imgp->ip_px_sfa != NULL &&
-	    (error = exec_handle_file_actions(imgp)) != 0) {
-		goto bad;
+	if (imgp->ip_px_sfa != NULL) {
+		/*
+		 * The POSIX_SPAWN_CLOEXEC_DEFAULT flag
+		 * is handled in exec_handle_file_actions().
+		 */
+		if ((error = exec_handle_file_actions(imgp,
+		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0)
+			goto bad;
 	}
 
 	/* Has spawn port actions? */
@@ -1787,7 +1912,8 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 		 * The check for the POSIX_SPAWN_SETEXEC flag is done in 
 		 * exec_handle_port_actions().
 		 */
-		if((error = exec_handle_port_actions(imgp, px_sa.psa_flags)) != 0) 
+		if ((error = exec_handle_port_actions(imgp,
+		    imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) 
 			goto bad;
 	}
 
@@ -1824,12 +1950,36 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 		 */
 		if (px_sa.psa_flags & POSIX_SPAWN_RESETIDS) {
 			kauth_cred_t my_cred = p->p_ucred;
-			kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, my_cred->cr_ruid, my_cred->cr_rgid);
-			if (my_new_cred != my_cred)
+			kauth_cred_t my_new_cred = kauth_cred_setuidgid(my_cred, kauth_cred_getruid(my_cred), kauth_cred_getrgid(my_cred));
+			if (my_new_cred != my_cred) {
 				p->p_ucred = my_new_cred;
+				/* update cred on proc */
+				PROC_UPDATE_CREDS_ONPROC(p);
+			}
 		}
+
+		/*
+		 * Disable ASLR for the spawned process.
+		 */
+		if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR)
+			OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag);
+
+		/*
+		 * Forcibly disallow execution from data pages for the spawned process
+		 * even if it would otherwise be permitted by the architecture default.
+		 */
+		if (px_sa.psa_flags & _POSIX_SPAWN_ALLOW_DATA_EXEC)
+			imgp->ip_flags |= IMGPF_ALLOW_DATA_EXEC;
 	}
 
+	/*
+	 * Disable ASLR during image activation.  This occurs either if the
+	 * _POSIX_SPAWN_DISABLE_ASLR attribute was found above or if
+	 * P_DISABLE_ASLR was inherited from the parent process.
+	 */
+	if (p->p_flag & P_DISABLE_ASLR)
+		imgp->ip_flags |= IMGPF_DISABLE_ASLR;
+
 	/* 
 	 * Clear transition flag so we won't hang if exec_activate_image() causes
 	 * an automount (and launchd does a proc sysctl to service it).
@@ -1838,6 +1988,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	 */
 	if (spawn_no_exec) {
 		proc_transend(p, 0);
+		proc_transit_set = 0;
 	}
 
 #if MAC_SPAWN	/* XXX */
@@ -1853,9 +2004,13 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval)
 	 */
 	error = exec_activate_image(imgp);
 
-	/* Image not claimed by any activator? */
-	if (error == -1)
+	if (error == 0) {
+		/* process completed the exec */
+		exec_done = TRUE;
+	} else if (error == -1) {
+		/* Image not claimed by any activator? */
 		error = ENOEXEC;
+	}
 
 	/*
 	 * If we have a spawn attr, and it contains signal related flags,
@@ -1938,6 +2093,9 @@ bad:
 	 * before check_for_signature(), which uses psignal.
 	 */
 	if (spawn_no_exec) {
+		if (proc_transit_set)
+			proc_transend(p, 0);
+
 		/*
 		 * Drop the signal lock on the child which was taken on our
 		 * behalf by forkproc()/cloneproc() to prevent signals being
@@ -2040,8 +2198,10 @@ bad:
 				p->exit_thread = current_thread();
 				proc_unlock(p);
 				exit1(p, 1, (int *)NULL);
-				task_deallocate(get_threadtask(imgp->ip_new_thread));
-				thread_deallocate(imgp->ip_new_thread);
+				if (exec_done == FALSE) {
+					task_deallocate(get_threadtask(imgp->ip_new_thread));
+					thread_deallocate(imgp->ip_new_thread);
+				}
 			} else {
 				/* someone is doing it for us; just skip it */
 				proc_unlock(p);
@@ -2165,7 +2325,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
 	imgp->ip_vattr = vap;
 	imgp->ip_origvattr = origvap;
 	imgp->ip_vfs_context = &context;
-	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
+	imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
 	imgp->ip_p_comm = alt_p_comm;		/* for PowerPC */
 	imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
 
@@ -2273,8 +2433,6 @@ copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
  * Returns:	0			Success
  *		EFAULT			Bad 'ua'
  *
- * Implicit returns:
- *		*ptr_size		Modified
  */
 static int
 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
@@ -2311,85 +2469,156 @@ copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
  * Note:	The strings segment layout is backward, from the beginning
  *		of the top of the stack to consume the minimal amount of
  *		space possible; the returned stack pointer points to the
- *		end of the area consumed (stacks grow upward).
+ *		end of the area consumed (stacks grow downward).
  *
  *		argc is an int; arg[i] are pointers; env[i] are pointers;
- *		exec_path is a pointer; the 0's are (void *)NULL's
+ *		the 0's are (void *)NULL's
  *
  * The stack frame layout is:
  *
- *	+-------------+
- * sp->	|     argc    |
- *	+-------------+
- *	|    arg[0]   |
- *	+-------------+
- *	       :
- *	       :
- *	+-------------+
- *	| arg[argc-1] |
- *	+-------------+
- *	|      0      |
- *	+-------------+
- *	|    env[0]   |
- *	+-------------+
- *	       :
- *	       :
- *	+-------------+
- *	|    env[n]   |
- *	+-------------+
- *	|      0      |
- *	+-------------+
- *	|  exec_path  |	In MacOS X PR2 Beaker2E the path passed to exec() is
- *	+-------------+	passed on the stack just after the trailing 0 of the
- *	|      0      | the envp[] array as a pointer to a string.
- *	+-------------+
- *	|  PATH AREA  |
- *	+-------------+
- *	| STRING AREA |
- *	       :
- *	       :
- *	|             | <- p->user_stack
- *	+-------------+
+ *      +-------------+ <- p->user_stack
+ *      |     16b     |
+ *      +-------------+
+ *      | STRING AREA |
+ *      |      :      |
+ *      |      :      |
+ *      |      :      |
+ *      +- -- -- -- --+
+ *      |  PATH AREA  |
+ *      +-------------+
+ *      |      0      |
+ *      +-------------+
+ *      |  applev[n]  |
+ *      +-------------+
+ *             :
+ *             :
+ *      +-------------+
+ *      |  applev[1]  |
+ *      +-------------+
+ *      | exec_path / |
+ *      |  applev[0]  |
+ *      +-------------+
+ *      |      0      |
+ *      +-------------+
+ *      |    env[n]   |
+ *      +-------------+
+ *             :
+ *             :
+ *      +-------------+
+ *      |    env[0]   |
+ *      +-------------+
+ *      |      0      |
+ *      +-------------+
+ *      | arg[argc-1] |
+ *      +-------------+
+ *             :
+ *             :
+ *      +-------------+
+ *      |    arg[0]   |
+ *      +-------------+
+ *      |     argc    |
+ * sp-> +-------------+
  *
  * Although technically a part of the STRING AREA, we treat the PATH AREA as
  * a separate entity.  This allows us to align the beginning of the PATH AREA
  * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
  * which preceed it on the stack are properly aligned.
- *
- * TODO:	argc copied with suword(), which takes a 64 bit address
  */
+
 static int
 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
 {
 	proc_t p = vfs_context_proc(imgp->ip_vfs_context);
 	int	ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
-	char	*argv = imgp->ip_argv;	/* modifiable copy of argv */
+	int	ptr_area_size;
+	void *ptr_buffer_start, *ptr_buffer;
+	int string_size;
+
 	user_addr_t	string_area;	/* *argv[], *env[] */
-	user_addr_t	path_area;	/* package launch path */
-	user_addr_t	ptr_area;	/* argv[], env[], exec_path */
+	user_addr_t	ptr_area;	/* argv[], env[], applev[] */
+	user_addr_t argc_area;	/* argc */
 	user_addr_t	stack;
-	int	stringc = imgp->ip_argc + imgp->ip_envc;
-	size_t len;
 	int error;
-	ssize_t strspace;
+
+	unsigned i;
+	struct copyout_desc {
+		char	*start_string;
+		int		count;
+#if CONFIG_DTRACE
+		user_addr_t	*dtrace_cookie;
+#endif
+		boolean_t	null_term;
+	} descriptors[] = {
+		{
+			.start_string = imgp->ip_startargv,
+			.count = imgp->ip_argc,
+#if CONFIG_DTRACE
+			.dtrace_cookie = &p->p_dtrace_argv,
+#endif
+			.null_term = TRUE
+		},
+		{
+			.start_string = imgp->ip_endargv,
+			.count = imgp->ip_envc,
+#if CONFIG_DTRACE
+			.dtrace_cookie = &p->p_dtrace_envp,
+#endif
+			.null_term = TRUE
+		},
+		{
+			.start_string = imgp->ip_strings,
+			.count = 1,
+#if CONFIG_DTRACE
+			.dtrace_cookie = NULL,
+#endif
+			.null_term = FALSE
+		},
+		{
+			.start_string = imgp->ip_endenvv,
+			.count = imgp->ip_applec - 1, /* exec_path handled above */
+#if CONFIG_DTRACE
+			.dtrace_cookie = NULL,
+#endif
+			.null_term = TRUE
+		}
+	};
 
 	stack = *stackp;
 
-	size_t patharea_len = imgp->ip_argv - imgp->ip_strings;
-	int envc_add = 0;
-	
 	/*
-	 * Set up pointers to the beginning of the string area, the beginning
-	 * of the path area, and the beginning of the pointer area (actually,
-	 * the location of argc, an int, which may be smaller than a pointer,
-	 * but we use ptr_size worth of space for it, for alignment).
+	 * All previous contributors to the string area
+	 * should have aligned their sub-area
 	 */
-	string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size;
-	path_area = string_area - ((patharea_len + ptr_size-1) & ~(ptr_size-1));
-	ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4 + envc_add) * ptr_size) - ptr_size /*argc*/;
+	if (imgp->ip_strspace % ptr_size != 0) {
+		error = EINVAL;
+		goto bad;
+	}
 
-	/* Return the initial stack address: the location of argc */
-	*stackp = ptr_area;
+	/* Grow the stack down for the strings we've been building up */
+	string_size = imgp->ip_strendp - imgp->ip_strings;
+	stack -= string_size;
+	string_area = stack;
+
+	/*
+	 * Need room for one pointer for each string, plus
+	 * one for the NULLs terminating the argv, envv, and apple areas.
+	 */
+	ptr_area_size = (imgp->ip_argc + imgp->ip_envc + imgp->ip_applec + 3) *
+	    ptr_size;
+	stack -= ptr_area_size;
+	ptr_area = stack;
+
+	/* We'll construct all the pointer arrays in our string buffer,
+	 * which we already know is aligned properly, and ip_argspace
+	 * was used to verify we have enough space.
+	 */
+	ptr_buffer_start = ptr_buffer = (void *)imgp->ip_strendp;
+
+	/*
+	 * Need room for pointer-aligned argc slot.
+	 */
+	stack -= ptr_size;
+	argc_area = stack;
 
 	/*
 	 * Record the size of the arguments area so that sysctl_procargs()
@@ -2397,92 +2626,73 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
 	 */
 	proc_lock(p);
 	p->p_argc = imgp->ip_argc;
-	p->p_argslen = (int)(stack - path_area);
+	p->p_argslen = (int)(*stackp - string_area);
 	proc_unlock(p);
 
+	/* Return the initial stack address: the location of argc */
+	*stackp = stack;
 
 	/*
-	 * Support for new app package launching for Mac OS X allocates
-	 * the "path" at the begining of the imgp->ip_strings buffer.
-	 * copy it just before the string area.
-	 */
-	len = 0;
-	error = copyoutstr(imgp->ip_strings, path_area,
-						   patharea_len,
-						   &len);
+	 * Copy out the entire strings area.
+	 */
+	error = copyout(imgp->ip_strings, string_area,
+						   string_size);
 	if (error)
 		goto bad;
 
-
-	/* Save a NULL pointer below it */
-	(void)copyoutptr(0LL, path_area - ptr_size, ptr_size);
-
-	/* Save the pointer to "path" just below it */
-	(void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size);
-
-	/*
-	 * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n]
-	 * ptr_size for argc
-	 * skip over saved path, ptr_size for pointer to path,
-	 * and ptr_size for the NULL after pointer to path.
-	 */
-
-	/* argc (int32, stored in a ptr_size area) */
-	(void)suword(ptr_area, imgp->ip_argc);
-	ptr_area += sizeof(int);
-	/* pad to ptr_size, if 64 bit image, to ensure user stack alignment */
-	if (imgp->ip_flags & IMGPF_IS_64BIT) {
-		(void)suword(ptr_area, 0);	/* int, not long: ignored */
-		ptr_area += sizeof(int);
-	}
+	for (i = 0; i < sizeof(descriptors)/sizeof(descriptors[0]); i++) {
+		char *cur_string = descriptors[i].start_string;
+		int j;
 
 #if CONFIG_DTRACE
-	p->p_dtrace_argv = ptr_area; /* user_addr_t &argv[0] for dtrace convenience */
+		if (descriptors[i].dtrace_cookie) {
+			proc_lock(p);
+			*descriptors[i].dtrace_cookie = ptr_area + ((uintptr_t)ptr_buffer - (uintptr_t)ptr_buffer_start); /* dtrace convenience */
+			proc_unlock(p);
+		}
 #endif /* CONFIG_DTRACE */
 
-	/*
-	 * We use (string_area - path_area) here rather than the more
-	 * intuitive (imgp->ip_argv - imgp->ip_strings) because we are
-	 * interested in the length of the PATH_AREA in user space,
-	 * rather than the actual length of the execution path, since
-	 * it includes alignment padding of the PATH_AREA + STRING_AREA
-	 * to a ptr_size boundary.
-	 */
-	strspace = SIZE_IMG_STRSPACE - (string_area - path_area);
-	for (;;) {
-		if (stringc == imgp->ip_envc) {
-			/* argv[n] = NULL */
-			(void)copyoutptr(0LL, ptr_area, ptr_size);
-			ptr_area += ptr_size;
-#if CONFIG_DTRACE
-			p->p_dtrace_envp = ptr_area; /* user_addr_t &env[0] for dtrace convenience */
-#endif /* CONFIG_DTRACE */
+		/*
+		 * For each segment (argv, envv, applev), copy as many pointers as requested
+		 * to our pointer buffer.
+		 */
+		for (j = 0; j < descriptors[i].count; j++) {
+			user_addr_t cur_address = string_area + (cur_string - imgp->ip_strings);
+			
+			/* Copy out the pointer to the current string. Alignment has been verified  */
+			if (ptr_size == 8) {
+				*(uint64_t *)ptr_buffer = (uint64_t)cur_address;
+			} else {
+				*(uint32_t *)ptr_buffer = (uint32_t)cur_address;
+			}
+			
+			ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
+			cur_string += strlen(cur_string) + 1; /* Only a NUL between strings in the same area */
 		}
-		if (--stringc < 0)
-			break;
 
-		/* pointer: argv[n]/env[n] */
-		(void)copyoutptr(string_area, ptr_area, ptr_size);
-
-		/* string : argv[n][]/env[n][] */
-		do {
-			if (strspace <= 0) {
-				error = E2BIG;
-				break;
+		if (descriptors[i].null_term) {
+			if (ptr_size == 8) {
+				*(uint64_t *)ptr_buffer = 0ULL;
+			} else {
+				*(uint32_t *)ptr_buffer = 0;
 			}
-			error = copyoutstr(argv, string_area,
-						strspace,
-						&len);
-			string_area += len;
-			argv += len;
-			strspace -= len;
-		} while (error == ENAMETOOLONG);
-		if (error == EFAULT || error == E2BIG)
-			break;	/* bad stack - user's problem */
-		ptr_area += ptr_size;
-	}
-	/* env[n] = NULL */
-	(void)copyoutptr(0LL, ptr_area, ptr_size);
+			
+			ptr_buffer = (void *)((uintptr_t)ptr_buffer + ptr_size);
+		}
+	}
+
+	/*
+	 * Copy out all our pointer arrays in bulk.
+	 */
+	error = copyout(ptr_buffer_start, ptr_area,
+					ptr_area_size);
+	if (error)
+		goto bad;
+
+	/* argc (int32, stored in a ptr_size area) */
+	error = copyoutptr((user_addr_t)imgp->ip_argc, argc_area, ptr_size);
+	if (error)
+		goto bad;
 
 bad:
 	return(error);
@@ -2495,6 +2705,11 @@ bad:
  * Copy arguments and environment from user space into work area; we may
  * have already copied some early arguments into the work area, and if
  * so, any arguments opied in are appended to those already there.
+ * This function is the primary manipulator of ip_argspace, since
+ * these are the arguments the client of execve(2) knows about. After
+ * each argv[]/envv[] string is copied, we charge the string length
+ * and argv[]/envv[] pointer slot to ip_argspace, so that we can
+ * full preflight the arg list size.
  *
  * Parameters:	struct image_params *	the image parameter block
  *
@@ -2504,6 +2719,8 @@ bad:
  * Implicit returns;
  *		(imgp->ip_argc)		Count of arguments, updated
  *		(imgp->ip_envc)		Count of environment strings, updated
+ *		(imgp->ip_argspace)	Count of remaining of NCARGS
+ *		(imgp->ip_interp_buffer)	Interpreter and args (mutated in place)
  *
  *
  * Note:	The argument and environment vectors are user space pointers
@@ -2513,47 +2730,101 @@ static int
 exec_extract_strings(struct image_params *imgp)
 {
 	int error = 0;
-	int strsz = 0;
 	int	ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
+	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
 	user_addr_t	argv = imgp->ip_user_argv;
 	user_addr_t	envv = imgp->ip_user_envv;
 
-	/*
-	 * If the argument vector is NULL, this is the system startup
-	 * bootstrap from load_init_program(), and there's nothing to do
-	 */
-	if (imgp->ip_user_argv == 0LL)
-		goto bad;
-
-	/* Now, get rest of arguments */
-
 	/*
 	 * Adjust space reserved for the path name by however much padding it
 	 * needs. Doing this here since we didn't know if this would be a 32- 
 	 * or 64-bit process back in exec_save_path.
 	 */
-	strsz = strlen(imgp->ip_strings) + 1;
-	imgp->ip_strspace -= ((strsz + ptr_size-1) & ~(ptr_size-1)) - strsz;
+	while (imgp->ip_strspace % new_ptr_size != 0) {
+		*imgp->ip_strendp++ = '\0';
+		imgp->ip_strspace--;
+		/* imgp->ip_argspace--; not counted towards exec args total */
+	}
 
 	/*
-	 * If we are running an interpreter, replace the av[0] that was
-	 * passed to execve() with the fully qualified path name that was
-	 * passed to execve() for interpreters which do not use the PATH
-	 * to locate their script arguments.
+	 * From now on, we start attributing string space to ip_argspace
 	 */
-	if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) {
+	imgp->ip_startargv = imgp->ip_strendp;
+	imgp->ip_argc = 0;
+
+	if((imgp->ip_flags & IMGPF_INTERPRET) != 0) {
 		user_addr_t	arg;
+		char *argstart, *ch;
+
+		/* First, the arguments in the "#!" string are tokenized and extracted. */
+		argstart = imgp->ip_interp_buffer;
+		while (argstart) {
+			ch = argstart;
+			while (*ch && !IS_WHITESPACE(*ch)) {
+				ch++;
+			}
 
-		error = copyinptr(argv, &arg, ptr_size);
-		if (error)
-			goto bad;
-		if (arg != 0LL && arg != (user_addr_t)-1) {
-			argv += ptr_size;
-			error = exec_add_string(imgp, imgp->ip_user_fname);
+			if (*ch == '\0') {
+				/* last argument, no need to NUL-terminate */
+				error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
+				argstart = NULL;
+			} else {
+				/* NUL-terminate */
+				*ch = '\0';
+				error = exec_add_user_string(imgp, CAST_USER_ADDR_T(argstart), UIO_SYSSPACE, TRUE);
+
+				/*
+				 * Find the next string. We know spaces at the end of the string have already
+				 * been stripped.
+				 */
+				argstart = ch + 1;
+				while (IS_WHITESPACE(*argstart)) {
+					argstart++;
+				}
+			}
+
+			/* Error-check, regardless of whether this is the last interpreter arg or not */
 			if (error)
 				goto bad;
+			if (imgp->ip_argspace < new_ptr_size) {
+				error = E2BIG;
+				goto bad;
+			}
+			imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
 			imgp->ip_argc++;
 		}
+
+		if (argv != 0LL) {
+			/*
+			 * If we are running an interpreter, replace the av[0] that was
+			 * passed to execve() with the path name that was
+			 * passed to execve() for interpreters which do not use the PATH
+			 * to locate their script arguments.
+			 */
+			error = copyinptr(argv, &arg, ptr_size);
+			if (error)
+				goto bad;
+			if (arg != 0LL) {
+				argv += ptr_size; /* consume without using */
+			}
+		}
+
+		if (imgp->ip_interp_sugid_fd != -1) {
+			char temp[19]; /* "/dev/fd/" + 10 digits + NUL */
+			snprintf(temp, sizeof(temp), "/dev/fd/%d", imgp->ip_interp_sugid_fd);
+			error = exec_add_user_string(imgp, CAST_USER_ADDR_T(temp), UIO_SYSSPACE, TRUE);
+		} else {
+			error = exec_add_user_string(imgp, imgp->ip_user_fname, imgp->ip_seg, TRUE);
+		}
+		
+		if (error)
+			goto bad;
+		if (imgp->ip_argspace < new_ptr_size) {
+			error = E2BIG;
+			goto bad;
+		}
+		imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
+		imgp->ip_argc++;
 	}
 
 	while (argv != 0LL) {
@@ -2563,25 +2834,36 @@ exec_extract_strings(struct image_params *imgp)
 		if (error)
 			goto bad;
 
-		argv += ptr_size;
 		if (arg == 0LL) {
 			break;
-		} else if (arg == (user_addr_t)-1) {
-			/* Um... why would it be -1? */
-			error = EFAULT;
-			goto bad;
 		}
+
+		argv += ptr_size;
+
 		/*
 		* av[n...] = arg[n]
 		*/
-		error = exec_add_string(imgp, arg);
+		error = exec_add_user_string(imgp, arg, imgp->ip_seg, TRUE);
 		if (error)
 			goto bad;
+		if (imgp->ip_argspace < new_ptr_size) {
+			error = E2BIG;
+			goto bad;
+		}
+		imgp->ip_argspace -= new_ptr_size; /* to hold argv[] entry */
 		imgp->ip_argc++;
 	}	 
+
+	/* Save space for argv[] NULL terminator */
+	if (imgp->ip_argspace < new_ptr_size) {
+		error = E2BIG;
+		goto bad;
+	}
+	imgp->ip_argspace -= new_ptr_size;
 	
-	/* Note where the args end and env begins. */
-	imgp->ip_strendargvp = imgp->ip_strendp;
+	/* Note where the args ends and env begins. */
+	imgp->ip_endargv = imgp->ip_strendp;
+	imgp->ip_envc = 0;
 
 	/* Now, get the environment */
 	while (envv != 0LL) {
@@ -2594,29 +2876,165 @@ exec_extract_strings(struct image_params *imgp)
 		envv += ptr_size;
 		if (env == 0LL) {
 			break;
-		} else if (env == (user_addr_t)-1) {
-			error = EFAULT;
-			goto bad;
 		}
 		/*
 		* av[n...] = env[n]
 		*/
-		error = exec_add_string(imgp, env);
+		error = exec_add_user_string(imgp, env, imgp->ip_seg, TRUE);
 		if (error)
 			goto bad;
+		if (imgp->ip_argspace < new_ptr_size) {
+			error = E2BIG;
+			goto bad;
+		}
+		imgp->ip_argspace -= new_ptr_size; /* to hold envv[] entry */
 		imgp->ip_envc++;
 	}
+
+	/* Save space for envv[] NULL terminator */
+	if (imgp->ip_argspace < new_ptr_size) {
+		error = E2BIG;
+		goto bad;
+	}
+	imgp->ip_argspace -= new_ptr_size;
+
+	/* Align the tail of the combined argv+envv area */
+	while (imgp->ip_strspace % new_ptr_size != 0) {
+		if (imgp->ip_argspace < 1) {
+			error = E2BIG;
+			goto bad;
+		}
+		*imgp->ip_strendp++ = '\0';
+		imgp->ip_strspace--;
+		imgp->ip_argspace--;
+	}
+	
+	/* Note where the envv ends and applev begins. */
+	imgp->ip_endenvv = imgp->ip_strendp;
+
+	/*
+	 * From now on, we are no longer charging argument
+	 * space to ip_argspace.
+	 */
+
 bad:
 	return error;
 }
 
+static char *
+random_hex_str(char *str, int len)
+{
+	uint64_t low, high, value;
+	int idx;
+	char digit;
+
+	/* A 64-bit value will only take 16 characters, plus '0x' and NULL. */
+	if (len > 19)
+		len = 19;
+
+	/* We need enough room for at least 1 digit */
+	if (len < 4)
+		return (NULL);
+
+	low = random();
+	high = random();
+	value = high << 32 | low;
+
+	str[0] = '0';
+	str[1] = 'x';
+	for (idx = 2; idx < len - 1; idx++) {
+		digit = value & 0xf;
+		value = value >> 4;
+		if (digit < 10)
+			str[idx] = '0' + digit;
+		else
+			str[idx] = 'a' + (digit - 10);
+	}
+	str[idx] = '\0';
+	return (str);
+}
+
+/*
+ * Libc has an 8-element array set up for stack guard values.  It only fills
+ * in one of those entries, and both gcc and llvm seem to use only a single
+ * 8-byte guard.  Until somebody needs more than an 8-byte guard value, don't
+ * do the work to construct them.
+ */
+#define	GUARD_VALUES 1
+#define	GUARD_KEY "stack_guard="
+
+/*
+ * System malloc needs some entropy when it is initialized.
+ */
+#define	ENTROPY_VALUES 2
+#define ENTROPY_KEY "malloc_entropy="
+
+/*
+ * Build up the contents of the apple[] string vector
+ */
+static int
+exec_add_apple_strings(struct image_params *imgp)
+{
+	int i, error;
+	int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
+	char guard[19];
+	char guard_vec[strlen(GUARD_KEY) + 19 * GUARD_VALUES + 1];
+
+	char entropy[19];
+	char entropy_vec[strlen(ENTROPY_KEY) + 19 * ENTROPY_VALUES + 1];
+
+	/* exec_save_path stored the first string */
+	imgp->ip_applec = 1;
+
+	/*
+	 * Supply libc with a collection of random values to use when
+	 * implementing -fstack-protector.
+	 */
+	(void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec));
+	for (i = 0; i < GUARD_VALUES; i++) {
+		random_hex_str(guard, sizeof (guard));
+		if (i)
+			(void)strlcat(guard_vec, ",", sizeof (guard_vec));
+		(void)strlcat(guard_vec, guard, sizeof (guard_vec));
+	}
+
+	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(guard_vec), UIO_SYSSPACE, FALSE);
+	if (error)
+		goto bad;
+	imgp->ip_applec++;
+
+	/*
+	 * Supply libc with entropy for system malloc.
+	 */
+	(void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec));
+	for (i = 0; i < ENTROPY_VALUES; i++) {
+		random_hex_str(entropy, sizeof (entropy));
+		if (i)
+			(void)strlcat(entropy_vec, ",", sizeof (entropy_vec));
+		(void)strlcat(entropy_vec, entropy, sizeof (entropy_vec));
+	}
+	
+	error = exec_add_user_string(imgp, CAST_USER_ADDR_T(entropy_vec), UIO_SYSSPACE, FALSE);
+	if (error)
+		goto bad;
+	imgp->ip_applec++;
+
+	/* Align the tail of the combined applev area */
+	while (imgp->ip_strspace % new_ptr_size != 0) {
+		*imgp->ip_strendp++ = '\0';
+		imgp->ip_strspace--;
+	}
+
+bad:
+	return error;
+}
 
 #define	unix_stack_size(p)	(p->p_rlimit[RLIMIT_STACK].rlim_cur)
 
 /*
  * exec_check_permissions
  *
- * Decription:	Verify that the file that is being attempted to be executed
+ * Description:	Verify that the file that is being attempted to be executed
  *		is in fact allowed to be executed based on it POSIX file
  *		permissions and other access control criteria
  *
@@ -2658,7 +3076,7 @@ exec_check_permissions(struct image_params *imgp)
 	 * will always succeed, and we don't want to happen unless the
 	 * file really is executable.
 	 */
-	if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
+	if (!vfs_authopaque(vnode_mount(vp)) && ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0))
 		return (EACCES);
 
 	/* Disallow zero length files */
@@ -2705,7 +3123,7 @@ exec_check_permissions(struct image_params *imgp)
 	 * cached values, then we set the PowerPC environment flag.
 	 */
 	if (vap->va_fsid == exec_archhandler_ppc.fsid &&
-		vap->va_fileid == (uint64_t)((uint32_t)exec_archhandler_ppc.fileid)) {
+		vap->va_fileid == exec_archhandler_ppc.fileid) {
 		imgp->ip_flags |= IMGPF_POWERPC;
 	}
 #endif	/* IMGPF_POWERPC */
@@ -2790,7 +3208,7 @@ exec_handle_sugid(struct image_params *imgp)
 	     kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
 	    ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
 		 ((kauth_cred_ismember_gid(cred, imgp->ip_origvattr->va_gid, &leave_sugid_clear) || !leave_sugid_clear) ||
-		 (cred->cr_gid != imgp->ip_origvattr->va_gid)))) {
+		 (kauth_cred_getgid(cred) != imgp->ip_origvattr->va_gid)))) {
 
 #if CONFIG_MACF
 /* label for MAC transition and neither VSUID nor VSGID */
@@ -2815,9 +3233,13 @@ handle_mac_transition:
 		 */
 		if (imgp->ip_origvattr->va_mode & VSUID) {
 			p->p_ucred  = kauth_cred_setresuid(p->p_ucred, KAUTH_UID_NONE, imgp->ip_origvattr->va_uid, imgp->ip_origvattr->va_uid, KAUTH_UID_NONE);
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 		}
 		if (imgp->ip_origvattr->va_mode & VSGID) {
 			p->p_ucred = kauth_cred_setresgid(p->p_ucred, KAUTH_GID_NONE, imgp->ip_origvattr->va_gid, imgp->ip_origvattr->va_gid);
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 		}
 
 #if CONFIG_MACF
@@ -2878,7 +3300,7 @@ handle_mac_transition:
 		if (dev_null == NULLVP) {
 			struct nameidata nd1;
 
-			NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE,
+			NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE,
 			    CAST_USER_ADDR_T("/dev/null"),
 			    imgp->ip_vfs_context);
 
@@ -2893,9 +3315,8 @@ handle_mac_transition:
 			}
 		}
 
-		/* Radar 2261856; setuid security hole fix */
-		/* Patch from OpenBSD: A. Ramesh */
 		/*
+		 * Radar 2261856; setuid security hole fix
 		 * XXX For setuid processes, attempt to ensure that
 		 * stdin, stdout, and stderr are already allocated.
 		 * We do not want userland to accidentally allocate
@@ -2913,7 +3334,7 @@ handle_mac_transition:
 				if ((error = falloc(p, &fp, &indx, imgp->ip_vfs_context)) != 0)
 					continue;
 
-				if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
+				if ((error = vnode_ref_ext(dev_null, FREAD, 0)) != 0) {
 					fp_free(p, indx, fp);
 					break;
 				}
@@ -2958,7 +3379,9 @@ handle_mac_transition:
 	 * Implement the semantic where the effective user and group become
 	 * the saved user and group in exec'ed programs.
 	 */
-	p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  p->p_ucred->cr_gid);
+	p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  kauth_cred_getgid(p->p_ucred));
+	/* update cred on proc */
+	PROC_UPDATE_CREDS_ONPROC(p);
 	
 	/* Update the process' identity version and set the security token */
 	p->p_idversion++;
@@ -3131,7 +3554,7 @@ load_init_program(proc_t p)
 
 	error = execve(p,&init_exec_args,retval);
 	if (error)
-		panic("Process 1 exec of %s failed, errno %d\n",
+		panic("Process 1 exec of %s failed, errno %d",
 		      init_program_name, error);
 }
 
@@ -3188,8 +3611,6 @@ load_return_to_errno(load_return_t lrtn)
 #include <kern/clock.h>
 #include <mach/kern_return.h>
 
-extern semaphore_t execve_semaphore;
-
 /*
  * execargs_alloc
  *
@@ -3244,7 +3665,7 @@ execargs_lock_sleep(void) {
 
 static kern_return_t
 execargs_purgeable_allocate(char **execarg_address) {
-	kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, NCARGS + PAGE_SIZE, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
+	kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, BSD_PAGEABLE_SIZE_PER_EXEC, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE);
 	assert(kr == KERN_SUCCESS);
 	return kr;
 }
@@ -3315,7 +3736,11 @@ execargs_alloc(struct image_params *imgp)
 		return (ENOMEM);
 	}
 
-	imgp->ip_vdata = imgp->ip_strings + NCARGS;
+	/* last page used to read in file headers */
+	imgp->ip_vdata = imgp->ip_strings + ( NCARGS + PAGE_SIZE );
+	imgp->ip_strendp = imgp->ip_strings;
+	imgp->ip_argspace = NCARGS;
+	imgp->ip_strspace = ( NCARGS + PAGE_SIZE );
 
 	return (0);
 }
@@ -3404,8 +3829,11 @@ exec_resettextvp(proc_t p, struct image_params *imgp)
 static int 
 check_for_signature(proc_t p, struct image_params *imgp)
 {
+	void *blob = NULL;
+	size_t length = 0;
 	mach_port_t port = NULL;
-	kern_return_t error = 0;
+	kern_return_t kr = KERN_FAILURE;
+	int error = EACCES;
 	unsigned char hash[SHA1_RESULTLEN];
 
 	/*
@@ -3422,35 +3850,56 @@ check_for_signature(proc_t p, struct image_params *imgp)
 		vm_map_switch_protect(get_task_map(p->task), TRUE);
 	}
 
-	/*
-	 * If the task_access_port is set and the proc isn't signed,
-	 * ask for a code signature from user space. Fail the exec
-	 * if permission is denied.
-	 */
-	error = task_get_task_access_port(p->task, &port);
-	if (error == 0 && IPC_PORT_VALID(port) && !(p->p_csflags & CS_VALID)) {
-		error = find_code_signature(port, p->p_pid);
-		if (error == KERN_FAILURE) {
-			/* Make very sure execution fails */
-			psignal(p, SIGKILL);
-			return EACCES;
-		}
+	/* If the process is not signed or if it contains
+	 * entitlements, we need to communicate through the
+	 * task_access_port to taskgated.  taskgated will provide a
+	 * detached code signature if present, and will enforce any
+	 * restrictions on entitlements.  taskgated returns
+	 * KERN_SUCCESS if it has completed its work and the exec
+	 * should continue, or KERN_FAILURE if the exec should fail.
+	 */
+	error = cs_entitlements_blob_get(p, &blob, &length);
 
-		/* Only do this if exec_resettextvp() did not fail */
-		if (p->p_textvp != NULLVP) {
-			/*
-			 * If there's a new code directory, mark this process
-			 * as signed.
-			 */
-			error = ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash); 
-			if (error == 0) {
-				proc_lock(p);
-				p->p_csflags |= CS_VALID;
-				proc_unlock(p);
-			}
+	/* if signed and no entitlements, then we're done here */
+	if ((p->p_csflags & CS_VALID) && NULL == blob) {
+		error = 0;
+		goto done;
+	}
+
+	kr = task_get_task_access_port(p->task, &port);
+	if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) {
+		error = 0;
+#if !CONFIG_EMBEDDED
+		/* fatal on the desktop when entitlements are present */
+		if (NULL != blob)
+			error = EACCES;
+#endif
+		goto done;
+	}
+
+	kr = find_code_signature(port, p->p_pid);
+	if (KERN_SUCCESS != kr) {
+		error = EACCES;
+		goto done;
+	}
+
+	/* Only do this if exec_resettextvp() did not fail */
+	if (p->p_textvp != NULLVP) {
+		/*
+		 * If there's a new code directory, mark this process
+		 * as signed.
+		 */
+		if (0 == ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash)) {
+			proc_lock(p);
+			p->p_csflags |= CS_VALID;
+			proc_unlock(p);
 		}
 	}
 
-	return KERN_SUCCESS;
+done:
+	if (0 != error)
+		/* make very sure execution fails */
+		psignal(p, SIGKILL);
+	return error;
 }
 
diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c
index 811b4fb7d..7d5ddd37b 100644
--- a/bsd/kern/kern_exit.c
+++ b/bsd/kern/kern_exit.c
@@ -158,7 +158,6 @@ int	*get_bsduthreadrval(thread_t);
 kern_return_t sys_perf_notify(thread_t thread, int pid);
 kern_return_t abnormal_exit_notify(mach_exception_data_type_t code, 
 		mach_exception_data_type_t subcode);
-void workqueue_exit(struct proc *);
 void	delay(int);
 			
 /*
@@ -256,8 +255,10 @@ exit1(proc_t p, int rv, int *retval)
 	DTRACE_PROC1(exit, int, CLD_EXITED);
 
         proc_lock(p);
+	proc_transstart(p, 1);
 	while (p->exit_thread != self) {
 		if (sig_try_locked(p) <= 0) {
+			proc_transend(p, 1);
 			if (get_threadtask(self) != task) {
 				proc_unlock(p);
 				return(0);
@@ -283,11 +284,12 @@ exit1(proc_t p, int rv, int *retval)
 	p->p_lflag |= P_LEXIT;
 	p->p_xstat = rv;
 
+	proc_transend(p, 1);
 	proc_unlock(p);
 
 	proc_prepareexit(p, rv);
 
-	/* task terminate will call proc_terminate and that cleans it up */
+	/* Last thread to terminate will call proc_exit() */
 	task_terminate_internal(task);
 
 	return(0);
@@ -372,21 +374,39 @@ proc_exit(proc_t p)
 	pid_t pid;
 	int exitval;
 
-	/* This can happen if thread_terminate of the single thread
-	 * process 
-	 */
-
 	uth = (struct uthread *)get_bsdthread_info(current_thread());
 
 	proc_lock(p);
+	proc_transstart(p, 1);
 	if( !(p->p_lflag & P_LEXIT)) {
+		/*
+		 * This can happen if a thread_terminate() occurs
+		 * in a single-threaded process.
+		 */
 		p->p_lflag |= P_LEXIT;
+		proc_transend(p, 1);
 		proc_unlock(p);
 		proc_prepareexit(p, 0);	
+		(void) task_terminate_internal(task);
 		proc_lock(p);
+	} else {
+		proc_transend(p, 1);
 	}
 
 	p->p_lflag |= P_LPEXIT;
+
+	/*
+	 * Other kernel threads may be in the middle of signalling this process.
+	 * Wait for those threads to wrap it up before making the process
+	 * disappear on them.
+	 */
+	if ((p->p_lflag & P_LINSIGNAL) || (p->p_sigwaitcnt > 0)) {
+		p->p_sigwaitcnt++;
+		while ((p->p_lflag & P_LINSIGNAL) || (p->p_sigwaitcnt > 1)) 
+			msleep(&p->p_sigmask, &p->p_mlock, PWAIT, "proc_sigdrain", NULL);
+		p->p_sigwaitcnt--;
+	}
+
 	proc_unlock(p);
 	pid = p->p_pid;
 	exitval = p->p_xstat;
@@ -429,6 +449,8 @@ proc_exit(proc_t p)
 	MALLOC_ZONE(p->p_ru, struct rusage *,
 			sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK);
 
+	nspace_proc_exit(p);
+
 	/*
 	 * need to cancel async IO requests that can be cancelled and wait for those
 	 * already active.  MAY BLOCK!
@@ -575,7 +597,7 @@ proc_exit(proc_t p)
 			 * if the reap is already in progress. So we get
 			 * the reference here exclusively and their can be
 			 * no waiters. So there is no need for a wakeup
-			 * after we are done. AlsO  the reap frees the structure
+			 * after we are done.  Also the reap frees the structure
 			 * and the proc struct cannot be used for wakeups as well. 
 			 * It is safe to use q here as this is system reap
 			 */
@@ -587,10 +609,21 @@ proc_exit(proc_t p)
 		 	* since their existence means someone is messing up.
 		 	*/
 			if (q->p_lflag & P_LTRACED) {
+				/*
+				 * Take a reference on the child process to
+				 * ensure it doesn't exit and disappear between
+				 * the time we drop the list_lock and attempt
+				 * to acquire its proc_lock.
+				 */
+				if (proc_ref_locked(q) != q)
+					continue;
+
 				proc_list_unlock();
 				proc_lock(q);
 				q->p_lflag &= ~P_LTRACED;
 				if (q->sigwait_thread) {
+					thread_t thread = q->sigwait_thread;
+
 					proc_unlock(q);
 					/*
 				 	* The sigwait_thread could be stopped at a
@@ -599,13 +632,16 @@ proc_exit(proc_t p)
 				 	* the first thread in the task. So any attempts to kill
 				 	* the process would result into a deadlock on q->sigwait.
 				 	*/
-					thread_resume((thread_t)q->sigwait_thread);
-					clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
-					threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
-				} else
+					thread_resume(thread);
+					clear_wait(thread, THREAD_INTERRUPTED);
+					threadsignal(thread, SIGKILL, 0);
+				} else {
 					proc_unlock(q);
+				}
+
 				psignal(q, SIGKILL);
 				proc_list_lock();
+				proc_rele_locked(q);
 			}
 		}
 	}
@@ -629,10 +665,9 @@ proc_exit(proc_t p)
 	 */
 	/* No need for locking here as no one than this thread can access this */
 	if (p->p_ru != NULL) {
+	    calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime, NULL);
 	    *p->p_ru = p->p_stats->p_ru;
 
-	    calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
-
 	    ruadd(p->p_ru, &p->p_stats->p_cru);
 	}
 
@@ -689,7 +724,8 @@ proc_exit(proc_t p)
 	p->task = TASK_NULL;
 	set_bsdtask_info(task, NULL);
 
-	proc_knote(p, NOTE_EXIT);
+	/* exit status will be seen  by parent process */
+	proc_knote(p, NOTE_EXIT | (p->p_xstat & 0xffff));
 
 	/* mark the thread as the one that is doing proc_exit
 	 * no need to hold proc lock in uthread_free
@@ -737,7 +773,7 @@ proc_exit(proc_t p)
 			 * p_ucred usage is safe as it is an exiting process
 			 * and reference is dropped in reap
 			 */
-			pp->si_uid = p->p_ucred->cr_ruid;
+			pp->si_uid = kauth_cred_getruid(p->p_ucred);
 			proc_unlock(pp);
 		}
 		/* mark as a zombie */
@@ -855,7 +891,7 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d
 			trace_parent->si_pid = child->p_pid;
 			trace_parent->si_status = child->p_xstat;
 			trace_parent->si_code = CLD_CONTINUED;
-			trace_parent->si_uid = child->p_ucred->cr_ruid;
+			trace_parent->si_uid = kauth_cred_getruid(child->p_ucred);
 			proc_unlock(trace_parent);
 		}
 		proc_reparentlocked(child, trace_parent, 1, 0);
@@ -899,7 +935,7 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d
 		printf("Warning : lost p_ru for %s\n", child->p_comm);
 	}
 
-	AUDIT_SESSION_PROCEXIT(child->p_ucred);
+	AUDIT_SESSION_PROCEXIT(child);
 
 	/*
 	 * Decrement the count of procs running with this uid.
@@ -907,7 +943,7 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d
 	 * and refernce is dropped after these calls down below
 	 * (locking protection is provided by list lock held in chgproccnt)
 	 */
-	(void)chgproccnt(child->p_ucred->cr_ruid, -1);
+	(void)chgproccnt(kauth_cred_getruid(child->p_ucred), -1);
 
 #if CONFIG_LCTX
 	ALLLCTX_LOCK;
@@ -948,22 +984,21 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d
 
 	proc_list_unlock();
 
-#ifdef CONFIG_EMBEDDED
-	lck_mtx_destroy(&child->p_mlock, proc_lck_grp);
-	lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp);
-#if CONFIG_DTRACE
-	lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp);
-#endif
-	lck_spin_destroy(&child->p_slock, proc_lck_grp);
-
-#else	
+#if CONFIG_FINE_LOCK_GROUPS
 	lck_mtx_destroy(&child->p_mlock, proc_mlock_grp);
 	lck_mtx_destroy(&child->p_fdmlock, proc_fdmlock_grp);
 #if CONFIG_DTRACE
 	lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp);
 #endif
 	lck_spin_destroy(&child->p_slock, proc_slock_grp);
+#else /* CONFIG_FINE_LOCK_GROUPS */
+	lck_mtx_destroy(&child->p_mlock, proc_lck_grp);
+	lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp);
+#if CONFIG_DTRACE
+	lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp);
 #endif
+	lck_spin_destroy(&child->p_slock, proc_lck_grp);
+#endif /* CONFIG_FINE_LOCK_GROUPS */
 	workqueue_destroy_lock(child);
 
 	FREE_ZONE(child, sizeof *child, M_PROC);
@@ -1754,6 +1789,8 @@ vproc_exit(proc_t p)
 				proc_lock(q);
 				q->p_lflag &= ~P_LTRACED;
 				if (q->sigwait_thread) {
+					thread_t thread = q->sigwait_thread;
+
 					proc_unlock(q);
 					/*
 				 	* The sigwait_thread could be stopped at a
@@ -1762,12 +1799,13 @@ vproc_exit(proc_t p)
 				 	* the first thread in the task. So any attempts to kill
 				 	* the process would result into a deadlock on q->sigwait.
 				 	*/
-					thread_resume((thread_t)q->sigwait_thread);
-					clear_wait(q->sigwait_thread, THREAD_INTERRUPTED);
-					threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0);
-				} else
+					thread_resume(thread);
+					clear_wait(thread, THREAD_INTERRUPTED);
+					threadsignal(thread, SIGKILL, 0);
+				} else {
 					proc_unlock(q);
-					
+				}
+
 				psignal(q, SIGKILL);
 				proc_list_lock();
 			}
@@ -1844,6 +1882,10 @@ vproc_exit(proc_t p)
 		}
 	}
 
+#if PSYNCH
+	pth_proc_hashdelete(p);
+#endif /* PSYNCH */
+
 	/*
 	 * Other substructures are freed from wait().
 	 */
@@ -1877,7 +1919,7 @@ vproc_exit(proc_t p)
 			 * p_ucred usage is safe as it is an exiting process
 			 * and reference is dropped in reap
 			 */
-			pp->si_uid = p->p_ucred->cr_ruid;
+			pp->si_uid = kauth_cred_getruid(p->p_ucred);
 			proc_unlock(pp);
 		}
 		/* mark as a zombie */
diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c
index a5b1350d3..7746398bf 100644
--- a/bsd/kern/kern_fork.c
+++ b/bsd/kern/kern_fork.c
@@ -129,7 +129,6 @@ extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t);
 
 #include <sys/sdt.h>
 
-
 /* XXX routines which should have Mach prototypes, but don't */
 void thread_set_parent(thread_t parent, int pid);
 extern void act_thread_catt(void *ctx);
@@ -365,7 +364,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
 	 * exceed the limit. The variable nprocs is the current number of
 	 * processes, maxproc is the limit.
 	 */
-	uid = kauth_cred_get()->cr_ruid;
+	uid = kauth_getruid();
 	proc_list_lock();
 	if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
 		proc_list_unlock();
@@ -466,7 +465,6 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
 
 		AUDIT_ARG(pid, child_proc->p_pid);
 
-		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
 // XXX END: wants to move to be common code (and safe)
 
 		/*
@@ -570,7 +568,6 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
 
 		AUDIT_ARG(pid, child_proc->p_pid);
 
-		AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
 // XXX END: wants to move to be common code (and safe)
 
 		/*
@@ -690,7 +687,6 @@ vfork_return(proc_t child_proc, int32_t *retval, int rval)
 	thread_t parent_thread = (thread_t)current_thread();
 	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
 	
-
 	act_thread_catt(parent_uthread->uu_userstate);
 
 	/* end vfork in parent */
@@ -948,14 +944,6 @@ cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory)
 	if (parent_proc->p_flag & P_LP64) {
 		task_set_64bit(child_task, TRUE);
 		OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag);
-#ifdef __ppc__
-		/*
-		 * PPC51: ppc64 is limited to 51-bit addresses.
-		 * Memory above that limit is handled specially at
-		 * the pmap level.
-		 */
-		pmap_map_sharedpage(child_task, get_map_pmap(get_task_map(child_task)));
-#endif /* __ppc__ */
 	} else {
 		task_set_64bit(child_task, FALSE);
 		OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag);
@@ -1031,6 +1019,9 @@ forkproc_free(proc_t p)
 	/* Stop the profiling clock */
 	stopprofclock(p);
 
+	/* Update the audit session proc count */
+	AUDIT_SESSION_PROCEXIT(p);
+
 	/* Release the credential reference */
 	kauth_cred_unref(&p->p_ucred);
 
@@ -1069,6 +1060,7 @@ forkproc(proc_t parent_proc)
 {
 	proc_t child_proc;	/* Our new process */
 	static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
+	static uint64_t nextuniqueid = 0;
 	int error = 0;
 	struct session *sessp;
 	uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());
@@ -1147,6 +1139,8 @@ retry:
 	nprocs++;
 	child_proc->p_pid = nextpid;
 	child_proc->p_idversion = nextpidversion++;
+	/* kernel process is handcrafted and not from fork, so start from 1 */
+	child_proc->p_uniqueid = ++nextuniqueid;
 #if 1
 	if (child_proc->p_pid != 0) {
 		if (pfind_locked(child_proc->p_pid) != PROC_NULL)
@@ -1180,7 +1174,7 @@ retry:
 	 * Increase reference counts on shared objects.
 	 * The p_stats and p_sigacts substructs are set in vm_fork.
 	 */
-	child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY));
+	child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR));
 	if (parent_proc->p_flag & P_PROFIL)
 		startprofclock(child_proc);
 	/*
@@ -1188,22 +1182,26 @@ retry:
 	 * credential will be granted to the new process.
 	 */
 	child_proc->p_ucred = kauth_cred_get_with_ref();
+	/* update cred on proc */
+	PROC_UPDATE_CREDS_ONPROC(child_proc);
+	/* update audit session proc count */
+	AUDIT_SESSION_PROCNEW(child_proc);
 
-#ifdef CONFIG_EMBEDDED
-	lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
-	lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
-#if CONFIG_DTRACE
-	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
-#endif
-	lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
-#else /* !CONFIG_EMBEDDED */
+#if CONFIG_FINE_LOCK_GROUPS
 	lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
 	lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
 #if CONFIG_DTRACE
 	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
 #endif
 	lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
-#endif /* !CONFIG_EMBEDDED */
+#else /* !CONFIG_FINE_LOCK_GROUPS */
+	lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
+	lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
+#if CONFIG_DTRACE
+	lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
+#endif
+	lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
+#endif /* !CONFIG_FINE_LOCK_GROUPS */
 	klist_init(&child_proc->p_klist);
 
 	if (child_proc->p_textvp != NULLVP) {
@@ -1396,6 +1394,7 @@ uthread_alloc(task_t task, thread_t thread, int noinherit)
 
 	p = (proc_t) get_bsdtask_info(task);
 	uth = (uthread_t)ut;
+	uth->uu_kwe.kwe_uth = uth;
 
 	/*
 	 * Thread inherits credential from the creating thread, if both
diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c
index 31b25d885..b7775864c 100644
--- a/bsd/kern/kern_lockf.c
+++ b/bsd/kern/kern_lockf.c
@@ -89,7 +89,7 @@ static int maxlockdepth = MAXDEPTH;
 void lf_print(const char *tag, struct lockf *lock);
 void lf_printlist(const char *tag, struct lockf *lock);
 static int	lockf_debug = 2;
-SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
 
 /*
  * If there is no mask bit selector, or there is on, and the selector is
@@ -129,11 +129,13 @@ static overlap_t lf_findoverlap(struct lockf *,
 	    struct lockf *, int, struct lockf ***, struct lockf **);
 static struct lockf *lf_getblock(struct lockf *);
 static int	 lf_getlock(struct lockf *, struct flock *);
+#if CONFIG_EMBEDDED
+static int	 lf_getlockpid(struct vnode *, struct flock *);
+#endif
 static int	 lf_setlock(struct lockf *);
 static int	 lf_split(struct lockf *, struct lockf *);
 static void	 lf_wakelock(struct lockf *, boolean_t);
 
-
 /*
  * lf_advlock
  *
@@ -172,6 +174,11 @@ lf_advlock(struct vnop_advlock_args *ap)
 
 	/* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
 
+#if CONFIG_EMBEDDED
+	if (ap->a_op == F_GETLKPID)
+		return lf_getlockpid(vp, fl);
+#endif
+
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
 	 */
@@ -289,7 +296,7 @@ lf_advlock(struct vnop_advlock_args *ap)
 		error = EINVAL;
 		break;
 	}
-	lck_mtx_unlock(&vp->v_lock);	/* done maniplulating the list */
+	lck_mtx_unlock(&vp->v_lock);	/* done manipulating the list */
 
 	LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
 	return (error);
@@ -297,25 +304,42 @@ lf_advlock(struct vnop_advlock_args *ap)
 
 
 /*
- * lf_coelesce_adjacent
+ * Take any lock attempts which are currently blocked by a given lock ("from")
+ * and mark them as blocked by a different lock ("to").  Used in the case
+ * where a byte range currently occupied by "from" is to be occupied by "to."
+ */
+static void
+lf_move_blocked(struct lockf *to, struct lockf *from)
+{
+	struct lockf *tlock;
+
+	TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
+		tlock->lf_next = to;
+	}
+
+	TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
+}
+
+/*
+ * lf_coalesce_adjacent
  *
- * Description:	Helper function: when setting a lock, coelesce adjacent
+ * Description:	Helper function: when setting a lock, coalesce adjacent
  *		locks.  Needed because adjacent locks are not overlapping,
- *		but POSIX requires that they be coelesced.
+ *		but POSIX requires that they be coalesced.
  *
  * Parameters:	lock			The new lock which may be adjacent
- *					to already locked reagions, and which
- *					should therefore be coelesced with them
+ *					to already locked regions, and which
+ *					should therefore be coalesced with them
  *
  * Returns:	<void>
  */
 static void
-lf_coelesce_adjacent(struct lockf *lock)
+lf_coalesce_adjacent(struct lockf *lock)
 {
 	struct lockf **lf = lock->lf_head;
 
 	while (*lf != NOLOCKF) {
-		/* reject locks that obviously could not be coelesced */
+		/* reject locks that obviously could not be coalesced */
 		if ((*lf == lock) ||
 		    ((*lf)->lf_id != lock->lf_id) ||
 		    ((*lf)->lf_type != lock->lf_type)) {
@@ -323,27 +347,38 @@ lf_coelesce_adjacent(struct lockf *lock)
 			continue;
 		}
 
+		/*
+		 * NOTE: Assumes that if two locks are adjacent on the number line 
+		 * and belong to the same owner, then they are adjacent on the list.
+		 */
+
 		/* If the lock ends adjacent to us, we can coelesce it */
 		if ((*lf)->lf_end != -1 &&
 		    ((*lf)->lf_end + 1) == lock->lf_start) {
 			struct lockf *adjacent = *lf;
 
-			LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent previous\n");
+			LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
 			lock->lf_start = (*lf)->lf_start;
 			*lf = lock;
 			lf = &(*lf)->lf_next;
+
+			lf_move_blocked(lock, adjacent);
+
 			FREE(adjacent, M_LOCKF);
 			continue;
 		}
-		/* If the lock starts adjacent to us, we can coelesce it */
+		/* If the lock starts adjacent to us, we can coalesce it */
 		if (lock->lf_end != -1 &&
 		    (lock->lf_end + 1) == (*lf)->lf_start) {
 			struct lockf *adjacent = *lf;
 
-			LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent following\n");
+			LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
 			lock->lf_end = (*lf)->lf_end;
 			lock->lf_next = (*lf)->lf_next;
 			lf = &lock->lf_next;
+
+			lf_move_blocked(lock, adjacent);
+
 			FREE(adjacent, M_LOCKF);
 			continue;
 		}
@@ -373,7 +408,7 @@ lf_coelesce_adjacent(struct lockf *lock)
  *	msleep:EINTR
  *
  * Notes:	We add the lock to the provisional lock list.  We do not
- *		coelesce at this time; this has implications for other lock
+ *		coalesce at this time; this has implications for other lock
  *		requestors in the blocker search mechanism.
  */
 static int
@@ -518,13 +553,8 @@ lf_setlock(struct lockf *lock)
 		error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
 
 		if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
-			struct lockf *tlock;
-
 		        if ((block = lf_getblock(lock))) {
-			        TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
-				        tlock->lf_next = block;
-				}
-			        TAILQ_CONCAT(&block->lf_blkhd, &lock->lf_blkhd, lf_block);
+				lf_move_blocked(block, lock);
 			}
 		}
 		if (error) {	/* XXX */
@@ -589,7 +619,7 @@ lf_setlock(struct lockf *lock)
 			        lf_wakelock(overlap, TRUE);
 			overlap->lf_type = lock->lf_type;
 			FREE(lock, M_LOCKF);
-			lock = overlap; /* for lf_coelesce_adjacent() */
+			lock = overlap; /* for lf_coalesce_adjacent() */
 			break;
 
 		case OVERLAP_CONTAINS_LOCK:
@@ -598,7 +628,7 @@ lf_setlock(struct lockf *lock)
 			 */
 			if (overlap->lf_type == lock->lf_type) {
 				FREE(lock, M_LOCKF);
-				lock = overlap; /* for lf_coelesce_adjacent() */
+				lock = overlap; /* for lf_coalesce_adjacent() */
 				break;
 			}
 			if (overlap->lf_start == lock->lf_start) {
@@ -676,8 +706,8 @@ lf_setlock(struct lockf *lock)
 		}
 		break;
 	}
-	/* Coelesce adjacent locks with identical attributes */
-	lf_coelesce_adjacent(lock);
+	/* Coalesce adjacent locks with identical attributes */
+	lf_coalesce_adjacent(lock);
 #ifdef LOCKF_DEBUGGING
 	if (lockf_debug & 1) {
 		lf_print("lf_setlock: got the lock", lock);
@@ -825,6 +855,55 @@ lf_getlock(struct lockf *lock, struct flock *fl)
 	return (0);
 }
 
+#if CONFIG_EMBEDDED
+int lf_getlockpid(struct vnode *vp, struct flock *fl)
+{
+	struct lockf *lf, *blk;
+
+	if (vp == 0)
+		return EINVAL;
+
+	fl->l_type = F_UNLCK;
+	
+	lck_mtx_lock(&vp->v_lock);
+
+	for (lf = vp->v_lockf; lf; lf = lf->lf_next) {
+
+		if (lf->lf_flags & F_POSIX) {
+			if ((((struct proc *)lf->lf_id)->p_pid) == fl->l_pid) {
+				fl->l_type = lf->lf_type;
+				fl->l_whence = SEEK_SET;
+				fl->l_start = lf->lf_start;
+				if (lf->lf_end == -1)
+					fl->l_len = 0;
+				else
+					fl->l_len = lf->lf_end - lf->lf_start + 1;
+
+				break;
+			}
+		}
+
+		TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
+			if (blk->lf_flags & F_POSIX) {
+				if ((((struct proc *)blk->lf_id)->p_pid) == fl->l_pid) {
+					fl->l_type = blk->lf_type;
+					fl->l_whence = SEEK_SET;
+					fl->l_start = blk->lf_start;
+					if (blk->lf_end == -1)
+						fl->l_len = 0;
+					else
+						fl->l_len = blk->lf_end - blk->lf_start + 1;
+
+					break;
+				}
+			}
+		}
+	}
+
+	lck_mtx_unlock(&vp->v_lock);
+	return (0);
+}
+#endif
 
 /*
  * lf_getblock
@@ -901,7 +980,7 @@ lf_getblock(struct lockf *lock)
  *		while lf_setlock will iterate over all overlapping locks to
  *
  *		The check parameter can be SELF, meaning we are looking for
- *		overelapping locks owned by us, or it can be OTHERS, meaning
+ *		overlapping locks owned by us, or it can be OTHERS, meaning
  *		we are looking for overlapping locks owned by someone else so
  *		we can report a blocking lock on an F_GETLK request.
  *
@@ -913,6 +992,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
 	       struct lockf ***prev, struct lockf **overlap)
 {
 	off_t start, end;
+	int found_self = 0;
 
 	*overlap = lf;
 	if (lf == NOLOCKF)
@@ -926,10 +1006,28 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
 	while (lf != NOLOCKF) {
 		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
 		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
+			/* 
+			 * Locks belonging to one process are adjacent on the
+			 * list, so if we've found any locks belonging to us,
+			 * and we're now seeing something else, then we've
+			 * examined all "self" locks.  Note that bailing out
+			 * here is quite important; for coalescing, we assume 
+			 * numerically adjacent locks from the same owner to 
+			 * be adjacent on the list.
+			 */
+			if ((type & SELF) && found_self) {
+				return OVERLAP_NONE;
+			}
+
 			*prev = &lf->lf_next;
 			*overlap = lf = lf->lf_next;
 			continue;
 		}
+
+		if ((type & SELF)) {
+			found_self = 1;
+		}
+
 #ifdef LOCKF_DEBUGGING
 		if (lockf_debug & 2)
 			lf_print("\tchecking", lf);
@@ -941,6 +1039,11 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
 		    (end != -1 && lf->lf_start > end)) {
 			/* Case 0 */
 			LOCKF_DEBUG(2, "no overlap\n");
+
+			/*
+			 * NOTE: assumes that locks for the same process are 
+			 * nonintersecting and ordered.
+			 */
 			if ((type & SELF) && end != -1 && lf->lf_start > end)
 				return (OVERLAP_NONE);
 			*prev = &lf->lf_next;
diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c
index ff86bfff6..c1700ee51 100644
--- a/bsd/kern/kern_malloc.c
+++ b/bsd/kern/kern_malloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -293,9 +293,15 @@ const char *memname[] = {
 #else
 	"",             /* 109 M_DECMPFS_CNODE */
 #endif /* HFS_COMPRESSION */
+	"ipmfilter",	/* 110 M_INMFILTER */
+	"ipmsource",	/* 111 M_IPMSOURCE */
+	"in6mfilter", 	/* 112 M_IN6MFILTER */
+	"ip6mopts",	/* 113 M_IP6MOPTS */
+	"ip6msource",	/* 114 M_IP6MSOURCE */
 };
 
 /* for use with kmzones.kz_zalloczone */
+#define KMZ_CREATEZONE_ACCT	((void *)-3)
 #define	KMZ_CREATEZONE		((void *)-2)
 #define KMZ_LOOKUPZONE		((void *)-1)
 #define KMZ_MALLOC			((void *)0)
@@ -332,7 +338,7 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 21 M_FHANDLE */
 #if (NFSCLIENT || NFSSERVER)
 	{ SOS(nfsreq),	KMZ_CREATEZONE, FALSE },	/* 22 M_NFSREQ */
-	{ SOS(nfsmount),	KMZ_CREATEZONE, FALSE },/* 23 M_NFSMNT */
+	{ SOS(nfsmount),KMZ_CREATEZONE, FALSE },	/* 23 M_NFSMNT */
 	{ SOS(nfsnode),	KMZ_CREATEZONE, FALSE },	/* 24 M_NFSNODE */
 #else
 	{ 0,		KMZ_MALLOC, FALSE },		/* 22 M_NFSREQ */
@@ -340,25 +346,25 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 24 M_NFSNODE */
 #endif
 	{ SOS(vnode),	KMZ_CREATEZONE, TRUE },		/* 25 M_VNODE */
-	{ SOS(namecache),KMZ_CREATEZONE, FALSE },	/* 26 M_CACHE */
+	{ SOS(namecache), KMZ_CREATEZONE, FALSE },	/* 26 M_CACHE */
 #if QUOTA
 	{ SOX(dquot),	KMZ_LOOKUPZONE, FALSE },	/* 27 M_DQUOT */
 #else
 	{ 0,		KMZ_MALLOC, FALSE },		/* 27 M_DQUOT */
 #endif
 	{ 0,		KMZ_MALLOC, FALSE },		/* 28 M_UFSMNT */
-	{ 0,		KMZ_MALLOC, FALSE },		/* 29 M_CGSUM */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 29 M_SHM */
 	{ SOS(plimit),	KMZ_CREATEZONE, TRUE },		/* 30 M_PLIMIT */
-	{ SOS(sigacts),	KMZ_CREATEZONE, TRUE },		/* 31 M_SIGACTS */
+	{ SOS(sigacts),	KMZ_CREATEZONE_ACCT, TRUE },	/* 31 M_SIGACTS */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 32 M_VMOBJ */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 33 M_VMOBJHASH */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 34 M_VMPMAP */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 35 M_VMPVENT */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 36 M_VMPAGER */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 37 M_VMPGDATA */
-	{ SOS(fileproc),KMZ_CREATEZONE, TRUE },		/* 38 M_FILEPROC */
-	{ SOS(filedesc),KMZ_CREATEZONE, TRUE },		/* 39 M_FILEDESC */
-	{ SOX(lockf),	KMZ_CREATEZONE, TRUE },		/* 40 M_LOCKF */
+	{ SOS(fileproc),KMZ_CREATEZONE_ACCT, TRUE },	/* 38 M_FILEPROC */
+	{ SOS(filedesc),KMZ_CREATEZONE_ACCT, TRUE },	/* 39 M_FILEDESC */
+	{ SOX(lockf),	KMZ_CREATEZONE_ACCT, TRUE },	/* 40 M_LOCKF */
 	{ SOS(proc),	KMZ_CREATEZONE, FALSE },	/* 41 M_PROC */
 	{ SOS(pstats),	KMZ_CREATEZONE, TRUE },		/* 42 M_PSTATS */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 43 M_SEGMENT */
@@ -370,10 +376,10 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 49 M_NETADDR */
 #if (NFSCLIENT || NFSSERVER)
 	{ SOX(nfsrv_sock),
-	                KMZ_CREATEZONE, FALSE },	/* 50 M_NFSSVC */
+	                KMZ_CREATEZONE_ACCT, FALSE },	/* 50 M_NFSSVC */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 51 M_NFSUID */
 	{ SOX(nfsrvcache),
-	                KMZ_CREATEZONE, FALSE },	/* 52 M_NFSD */
+	                KMZ_CREATEZONE_ACCT, FALSE },	/* 52 M_NFSD */
 #else
 	{ 0,		KMZ_MALLOC, FALSE },		/* 50 M_NFSSVC */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 51 M_NFSUID */
@@ -389,7 +395,7 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 58 unused entry */
 #if (NFSCLIENT || NFSSERVER)
 	{ SOS(nfsrv_descript),
-	                KMZ_CREATEZONE, FALSE },	/* 59 M_NFSRVDESC */
+	                KMZ_CREATEZONE_ACCT, FALSE },	/* 59 M_NFSRVDESC */
 	{ SOS(nfsdmap),	KMZ_CREATEZONE, FALSE },	/* 60 M_NFSDIROFF */
 	{ SOS(fhandle),	KMZ_LOOKUPZONE, FALSE },	/* 61 M_NFSBIGFH */
 #else
@@ -407,9 +413,9 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 69 M_ADOSFSMNT */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 70 M_ADOSFSNODE */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 71 M_ANODE */
-	{ SOX(buf),	KMZ_CREATEZONE, TRUE },		/* 72 M_BUFHDR */
+	{ 0,		KMZ_MALLOC, TRUE },		/* 72 M_BUFHDR */
 	{ (NDFILE * OFILESIZE),
-	                KMZ_CREATEZONE, FALSE },	/* 73 M_OFILETABL */
+	                KMZ_CREATEZONE_ACCT, FALSE },	/* 73 M_OFILETABL */
 	{ MCLBYTES,	KMZ_CREATEZONE, FALSE },	/* 74 M_MCLUST */
 #if HFS
 	{ SOX(hfsmount),KMZ_LOOKUPZONE, FALSE },	/* 75 M_HFSMNT */
@@ -437,15 +443,15 @@ struct kmzones {
 	{ SOS(journal), KMZ_CREATEZONE, FALSE },	/* 91 M_JNL_JNL */
 	{ SOS(transaction), KMZ_CREATEZONE, FALSE },	/* 92 M_JNL_TR */
 #else
-	{ 0,	 KMZ_MALLOC, FALSE },			/* 91 M_JNL_JNL */
-	{ 0,	 KMZ_MALLOC, FALSE },			/* 92 M_JNL_TR */
+	{ 0,	 	KMZ_MALLOC, FALSE },		/* 91 M_JNL_JNL */
+	{ 0,	 	KMZ_MALLOC, FALSE },		/* 92 M_JNL_TR */
 #endif
-	{ SOS(specinfo), KMZ_CREATEZONE, TRUE },	/* 93 M_SPECINFO */
-	{ SOS(kqueue), KMZ_CREATEZONE, FALSE },		/* 94 M_KQUEUE */
+	{ SOS(specinfo),KMZ_CREATEZONE, TRUE },		/* 93 M_SPECINFO */
+	{ SOS(kqueue),	KMZ_CREATEZONE, FALSE },	/* 94 M_KQUEUE */
 #if HFS
-	{ SOS(directoryhint), KMZ_CREATEZONE, FALSE },	/* 95 M_HFSDIRHINT */
+	{ SOS(directoryhint), KMZ_CREATEZONE, TRUE },	/* 95 M_HFSDIRHINT */
 #else
-	{ 0,	KMZ_MALLOC, FALSE },			/* 95 M_HFSDIRHINT */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 95 M_HFSDIRHINT */
 #endif
 	{ SOS(cl_readahead),  KMZ_CREATEZONE, TRUE },	/* 96 M_CLRDAHEAD */
 	{ SOS(cl_writebehind),KMZ_CREATEZONE, TRUE },	/* 97 M_CLWRBEHIND */
@@ -454,7 +460,7 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 100 M_KAUTH */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 101 M_DUMMYNET */
 #ifndef __LP64__
-	{ SOS(unsafe_fsnode),KMZ_CREATEZONE, FALSE },	/* 102 M_UNSAFEFS */
+	{ SOS(unsafe_fsnode),KMZ_CREATEZONE, TRUE },	/* 102 M_UNSAFEFS */
 #else 
 	{ 0,		KMZ_MALLOC, FALSE },		/* 102 M_UNSAFEFS */
 #endif /* __LP64__ */
@@ -465,10 +471,15 @@ struct kmzones {
 	{ 0,		KMZ_MALLOC, FALSE },		/* 107 M_LCTX */
 	{ 0,		KMZ_MALLOC, FALSE },		/* 108 M_TRAFFIC_MGT */
 #if HFS_COMPRESSION
-	{ SOS(decmpfs_cnode),KMZ_CREATEZONE, FALSE },	/* 109 M_DECMPFS_CNODE */
+	{ SOS(decmpfs_cnode),KMZ_CREATEZONE , FALSE},	/* 109 M_DECMPFS_CNODE */
 #else
 	{ 0,		KMZ_MALLOC, FALSE },		/* 109 M_DECMPFS_CNODE */
 #endif /* HFS_COMPRESSION */
+ 	{ 0,		KMZ_MALLOC, FALSE },		/* 110 M_INMFILTER */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 111 M_IPMSOURCE */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 112 M_IN6MFILTER */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 113 M_IP6MOPTS */
+	{ 0,		KMZ_MALLOC, FALSE },		/* 114 M_IP6MSOURCE */
 #undef	SOS
 #undef	SOX
 };
@@ -495,10 +506,14 @@ kmeminit(void)
 			;
 		else
 /* XXX */
-		if (kmz->kz_zalloczone == KMZ_CREATEZONE) {
+		if (kmz->kz_zalloczone == KMZ_CREATEZONE ||
+		    kmz->kz_zalloczone == KMZ_CREATEZONE_ACCT) {
 			kmz->kz_zalloczone = zinit(kmz->kz_elemsize,
 						1024 * 1024, PAGE_SIZE,
 						memname[kmz - kmzones]);
+			zone_change(kmz->kz_zalloczone, Z_CALLERACCT,
+				    (kmz->kz_zalloczone == KMZ_CREATEZONE_ACCT));
+
 			if (kmz->kz_noencrypt == TRUE)
 				zone_change(kmz->kz_zalloczone, Z_NOENCRYPT, TRUE);
 		}
@@ -526,12 +541,6 @@ kmeminit(void)
 	}
 }
 
-#define	MDECL(reqlen)					\
-union {							\
-	struct	_mhead hdr;				\
-	char	_m[(reqlen) + sizeof (struct _mhead)];	\
-}
-
 struct _mhead {
 	size_t	mlen;
 	char	dat[0];
@@ -543,8 +552,8 @@ _MALLOC(
 	int		type,
 	int		flags)
 {
-	MDECL(size)	*mem;
-	size_t		memsize = sizeof (*mem);
+	struct _mhead	*hdr;
+	size_t		memsize = sizeof (*hdr) + size;
 
 	if (type >= M_LAST)
 		panic("_malloc TYPE");
@@ -553,11 +562,11 @@ _MALLOC(
 		return (NULL);
 
 	if (flags & M_NOWAIT) {
-		mem = (void *)kalloc_noblock(memsize);
+		hdr = (void *)kalloc_noblock(memsize);
 	} else {
-		mem = (void *)kalloc(memsize);
+		hdr = (void *)kalloc(memsize);
 
-		if (mem == NULL) {
+		if (hdr == NULL) {
 
 			/*
 			 * We get here when the caller told us to block waiting for memory, but
@@ -572,15 +581,15 @@ _MALLOC(
 			panic("_MALLOC: kalloc returned NULL (potential leak), size %llu", (uint64_t) size);
 		}
 	}
-	if (!mem)
+	if (!hdr)
 		return (0);
 
-	mem->hdr.mlen = memsize;
+	hdr->mlen = memsize;
 
 	if (flags & M_ZERO)
-		bzero(mem->hdr.dat, size);
+		bzero(hdr->dat, size);
 
-	return  (mem->hdr.dat);
+	return  (hdr->dat);
 }
 
 void
@@ -600,6 +609,36 @@ _FREE(
 	kfree(hdr, hdr->mlen);
 }
 
+void *
+_REALLOC(
+	void		*addr,
+	size_t		size,
+	int		type,
+	int		flags)
+{
+	struct _mhead	*hdr;
+	void		*newaddr;
+	size_t		alloc;
+
+	/* realloc(NULL, ...) is equivalent to malloc(...) */
+	if (addr == NULL)
+		return (_MALLOC(size, type, flags));
+
+	/* Allocate a new, bigger (or smaller) block */
+	if ((newaddr = _MALLOC(size, type, flags)) == NULL)
+		return (NULL);
+
+	hdr = addr;
+	--hdr;
+	alloc = hdr->mlen - sizeof (*hdr);
+
+	/* Copy over original contents */
+	bcopy(addr, newaddr, MIN(size, alloc));
+	_FREE(addr, type);
+
+	return (newaddr);
+}
+
 void *
 _MALLOC_ZONE(
 	size_t		size,
@@ -660,3 +699,116 @@ _FREE_ZONE(
 	else
 		kfree(elem, size);
 }
+
+#if CONFIG_ZLEAKS
+
+SYSCTL_DECL(_kern_zleak);
+SYSCTL_NODE(_kern, OID_AUTO, zleak, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "zleak");
+
+/*
+ * kern.zleak.active
+ *
+ * Show the status of the zleak subsystem (0 = enabled, 1 = active,
+ * and -1 = failed), and if enabled, allow it to be activated immediately.
+ */
+static int
+sysctl_zleak_active SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int oldval, val, error;
+
+	val = oldval = get_zleak_state();
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error || !req->newptr)
+		return (error);
+	/*
+	 * Can only be activated if it's off (and not failed.)
+	 * Cannot be deactivated once it's on.
+	 */
+	if (val == 1 && oldval == 0) {
+		kern_return_t kr = zleak_activate();
+
+		if (KERN_SUCCESS != kr)
+			printf("zleak_active: failed to activate "
+			    "live zone leak debugging (%d).\n", kr);
+	} if (val == 0 && oldval == 1) {
+		printf("zleak_active: active, cannot be disabled.\n");
+		return (EINVAL);
+	}
+	return (0);
+}
+
+SYSCTL_PROC(_kern_zleak, OID_AUTO, active,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    0, 0, sysctl_zleak_active, "I", "zleak activity");
+
+/*
+ * kern.zleak.max_zonemap_size
+ *
+ * Read the value of the maximum zonemap size in bytes; useful
+ * as the maximum size that zleak.global_threshold and
+ * zleak.zone_threshold should be set to.
+ */
+static int
+sysctl_zleak_max_zonemap_size SYSCTL_HANDLER_ARGS
+{
+	uint64_t zmap_max_size = *(vm_size_t *)arg1;
+
+	return sysctl_handle_quad(oidp, &zmap_max_size, arg2, req);
+}
+
+SYSCTL_PROC(_kern_zleak, OID_AUTO, max_zonemap_size,
+    CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
+    &zleak_max_zonemap_size, 0,
+    sysctl_zleak_max_zonemap_size, "Q", "zleak max zonemap size");
+
+
+static int
+sysctl_zleak_threshold SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg2)
+	int error;
+	uint64_t value = *(vm_size_t *)arg1;
+
+	error = sysctl_io_number(req, value, sizeof (value), &value, NULL);
+
+	if (error || !req->newptr)
+		return (error);
+
+	if (value > (uint64_t)zleak_max_zonemap_size)
+		return (ERANGE);
+
+	*(vm_size_t *)arg1 = value;
+	return (0);
+}
+
+/*
+ * kern.zleak.global_threshold
+ *
+ * Set the global zleak threshold size (in bytes).  If the zone map
+ * grows larger than this value, zleaks are automatically activated.
+ *
+ * The default value is set in zleak_init().
+ */
+SYSCTL_PROC(_kern_zleak, OID_AUTO, global_threshold,
+    CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &zleak_global_tracking_threshold, 0,
+    sysctl_zleak_threshold, "Q", "zleak global threshold");
+
+/*
+ * kern.zleak.zone_threshold
+ *
+ * Set the per-zone threshold size (in bytes) above which any
+ * zone will automatically start zleak tracking.
+ *
+ * The default value is set in zleak_init().
+ *
+ * Setting this variable will have no effect until zleak tracking is
+ * activated (See above.)
+ */
+SYSCTL_PROC(_kern_zleak, OID_AUTO, zone_threshold,
+    CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &zleak_per_zone_tracking_threshold, 0,
+    sysctl_zleak_threshold, "Q", "zleak per-zone threshold");
+
+#endif	/* CONFIG_ZLEAKS */
diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c
index dfbaa794d..489ddd2be 100644
--- a/bsd/kern/kern_memorystatus.c
+++ b/bsd/kern/kern_memorystatus.c
@@ -31,6 +31,8 @@
 #include <sys/kern_memorystatus.h>
 
 #include <kern/sched_prim.h>
+#include <kern/kalloc.h>
+#include <kern/debug.h>
 #include <kern/lock.h>
 #include <kern/task.h>
 #include <kern/thread.h>
@@ -42,6 +44,126 @@
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/wait.h>
+#include <sys/tree.h>
+#include <pexpert/pexpert.h>
+
+#if CONFIG_FREEZE
+#include <vm/vm_protos.h>
+#include <vm/vm_map.h>
+
+enum {
+	kProcessSuspended =        (1 << 0), 
+	kProcessHibernated =       (1 << 1),
+	kProcessNoReclaimWorth =   (1 << 2),
+	kProcessIgnored =          (1 << 3),
+	kProcessBusy =             (1 << 4)
+};
+
+static lck_mtx_t * hibernation_mlock;
+static lck_attr_t * hibernation_lck_attr;
+static lck_grp_t * hibernation_lck_grp;
+static lck_grp_attr_t * hibernation_lck_grp_attr;
+
+typedef struct hibernation_node {
+	RB_ENTRY(hibernation_node) link;
+	pid_t pid;
+	uint32_t state;
+	mach_timespec_t hibernation_ts;
+} hibernation_node;
+
+static int hibernation_tree_compare(hibernation_node *n1, hibernation_node *n2) {
+	if (n1->pid < n2->pid)
+		return -1;
+	else if (n1->pid > n2->pid)
+		return 1;
+	else
+		return 0;
+}
+
+static RB_HEAD(hibernation_tree, hibernation_node) hibernation_tree_head;
+RB_PROTOTYPE_SC(static, hibernation_tree, hibernation_node, link, hibernation_tree_compare);
+
+RB_GENERATE(hibernation_tree, hibernation_node, link, hibernation_tree_compare);
+
+static inline boolean_t kern_hibernation_can_hibernate_processes(void);
+static boolean_t kern_hibernation_can_hibernate(void);
+
+static void kern_hibernation_add_node(hibernation_node *node);
+static hibernation_node *kern_hibernation_get_node(pid_t pid);
+static void kern_hibernation_release_node(hibernation_node *node);
+static void kern_hibernation_free_node(hibernation_node *node, boolean_t unlock);
+
+static void kern_hibernation_register_pid(pid_t pid);
+static void kern_hibernation_unregister_pid(pid_t pid);
+
+static int kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts);
+static int kern_hibernation_set_process_state(pid_t pid, uint32_t state);
+
+static void kern_hibernation_cull(void);
+
+static void kern_hibernation_thread(void);
+
+extern boolean_t vm_freeze_enabled;
+
+int kern_hibernation_wakeup = 0;
+
+static int jetsam_priority_list_hibernation_index = 0;
+
+/* Thresholds */
+static int kern_memorystatus_level_hibernate = 50;
+
+#define HIBERNATION_PAGES_MIN   ( 1 * 1024 * 1024 / PAGE_SIZE)
+#define HIBERNATION_PAGES_MAX   (16 * 1024 * 1024 / PAGE_SIZE)
+
+static unsigned int kern_memorystatus_hibernation_pages_min   = HIBERNATION_PAGES_MIN;
+static unsigned int kern_memorystatus_hibernation_pages_max   = HIBERNATION_PAGES_MAX;
+
+static unsigned int kern_memorystatus_suspended_count = 0;
+static unsigned int kern_memorystatus_hibernated_count = 0;
+
+static unsigned int kern_memorystatus_hibernation_suspended_minimum = 4;
+
+static unsigned int kern_memorystatus_low_swap_pages = 0;
+
+/* Throttling */
+#define HIBERNATION_DAILY_MB_MAX 	  1024
+#define HIBERNATION_DAILY_PAGEOUTS_MAX (HIBERNATION_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE))
+
+static struct throttle_interval_t {
+	uint32_t mins;
+	uint32_t burst_multiple;
+	uint32_t pageouts;
+	uint32_t max_pageouts;
+	mach_timespec_t ts;
+	boolean_t throttle;
+} throttle_intervals[] = {
+	{ 	   60,  8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
+	{ 24 * 60,  1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */
+};
+
+/* Stats */
+static uint64_t kern_memorystatus_hibernation_count = 0;
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_count, CTLFLAG_RD, &kern_memorystatus_hibernation_count, "");
+
+static uint64_t kern_memorystatus_hibernation_pageouts = 0;
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_pageouts, CTLFLAG_RD, &kern_memorystatus_hibernation_pageouts, "");
+
+static uint64_t kern_memorystatus_hibernation_throttle_count = 0;
+SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_throttle_count, CTLFLAG_RD, &kern_memorystatus_hibernation_throttle_count, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_min_processes, CTLFLAG_RW, &kern_memorystatus_hibernation_suspended_minimum, 0, "");
+
+#if DEVELOPMENT || DEBUG
+/* Allow parameter tweaking in these builds */
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_level_hibernate, CTLFLAG_RW, &kern_memorystatus_level_hibernate, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_min, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_min, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_max, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_max, 0, "");
+
+boolean_t kern_memorystatus_hibernation_throttle_enabled = TRUE;
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_throttle_enabled, CTLFLAG_RW, &kern_memorystatus_hibernation_throttle_enabled, 0, "");
+#endif /* DEVELOPMENT || DEBUG */
+#endif /* CONFIG_FREEZE */
 
 extern unsigned int    vm_page_free_count;
 extern unsigned int    vm_page_active_count;
@@ -54,6 +176,8 @@ static void kern_memorystatus_thread(void);
 int kern_memorystatus_wakeup = 0;
 int kern_memorystatus_level = 0;
 int kern_memorystatus_last_level = 0;
+unsigned int kern_memorystatus_delta;
+
 unsigned int kern_memorystatus_kev_failure_count = 0;
 int kern_memorystatus_level_critical = 5;
 #define kern_memorystatus_level_highwater (kern_memorystatus_level_critical + 5)
@@ -76,16 +200,66 @@ static lck_attr_t * jetsam_lck_attr;
 static lck_grp_t * jetsam_lck_grp;
 static lck_grp_attr_t * jetsam_lck_grp_attr;
 
-SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD, &kern_memorystatus_level, 0, "");
-SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD, &kern_memorystatus_kev_failure_count, 0, "");
+SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_level, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_kev_failure_count, 0, "");
+
+#if DEVELOPMENT || DEBUG
+
+enum {
+	kJetsamDiagnosticModeNone =              0, 
+	kJetsamDiagnosticModeAll  =              1,
+	kJetsamDiagnosticModeStopAtFirstActive = 2
+} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
+
+static int jetsam_diagnostic_suspended_one_active_proc = 0;
+
+static int
+sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int error, val = jetsam_diagnostic_mode;
+	boolean_t disabled;
+
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error || !req->newptr)
+ 		return (error);
+	if ((val < 0) || (val > 2)) {
+		printf("jetsam: diagnostic mode: invalid value - %d\n", val);
+		return (0);
+	}
+	
+	/* 
+	 * If jetsam_diagnostic_mode is set, we need to lower memory threshold for jetsam
+	 */
+	disabled = (val == 0) && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone);
+	
+	jetsam_diagnostic_mode = val;
+	
+	if (disabled) {
+		kern_memorystatus_level_critical = 5;
+		printf("jetsam: diagnostic mode: resetting critical level to %d\n", kern_memorystatus_level_critical);
+	} else {
+		kern_memorystatus_level_critical = 10;
+		printf("jetsam: diagnostic mode: %d: increasing critical level to %d\n", (int) jetsam_diagnostic_mode, kern_memorystatus_level_critical);
+		if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive)
+			printf("jetsam: diagnostic mode: will stop at first active app\n");
+	}
+	
+	return (0);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
+  		&jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
+#endif /* DEVELOPMENT || DEBUG */
 
 __private_extern__ void
 kern_memorystatus_init(void)
 {
-    jetsam_lck_attr = lck_attr_alloc_init();
-    jetsam_lck_grp_attr= lck_grp_attr_alloc_init();
-    jetsam_lck_grp = lck_grp_alloc_init("jetsam",  jetsam_lck_grp_attr);
-    jetsam_list_mlock = lck_mtx_alloc_init(jetsam_lck_grp, jetsam_lck_attr);
+	jetsam_lck_attr = lck_attr_alloc_init();
+	jetsam_lck_grp_attr= lck_grp_attr_alloc_init();
+	jetsam_lck_grp = lck_grp_alloc_init("jetsam",  jetsam_lck_grp_attr);
+	jetsam_list_mlock = lck_mtx_alloc_init(jetsam_lck_grp, jetsam_lck_attr);
+	kern_memorystatus_delta = 5 * atop_64(max_mem) / 100;
 
 	(void)kernel_thread(kernel_task, kern_memorystatus_thread);
 }
@@ -153,48 +327,107 @@ jetsam_snapshot_procs(void)
 }
 
 static void
-jetsam_mark_pid_in_snapshot(pid_t pid, int flag)
+jetsam_mark_pid_in_snapshot(pid_t pid, int flags)
 {
 
 	int i = 0;
 
 	for (i = 0; i < jetsam_snapshot_list_count; i++) {
 		if (jetsam_snapshot_list[i].pid == pid) {
-			jetsam_snapshot_list[i].flags |= flag;
+			jetsam_snapshot_list[i].flags |= flags;
 			return;
 		}
 	}
 }
 
 int
-jetsam_kill_top_proc(void)
+jetsam_kill_top_proc(boolean_t any, uint32_t cause)
 {
 	proc_t p;
 
+#ifndef CONFIG_FREEZE
+#pragma unused(any)
+#endif
+
 	if (jetsam_snapshot_list_count == 0) {
 		jetsam_snapshot_procs();
 	}
 	lck_mtx_lock(jetsam_list_mlock);
 	while (jetsam_priority_list_index < jetsam_priority_list_count) {
-		pid_t aPid;
-		aPid = jetsam_priority_list[jetsam_priority_list_index].pid;
+		jetsam_priority_entry_t* jetsam_priority_entry = &jetsam_priority_list[jetsam_priority_list_index];
+		pid_t aPid = jetsam_priority_entry->pid;
+#if DEVELOPMENT || DEBUG
+		int activeProcess = jetsam_priority_entry->flags & kJetsamFlagsFrontmost;
+		int procSuspendedForDiagnosis = jetsam_priority_entry->flags & kJetsamFlagsSuspForDiagnosis;
+#endif /* DEVELOPMENT || DEBUG */
 		jetsam_priority_list_index++;
 		/* skip empty slots in the list */
 		if (aPid == 0) {
 			continue; // with lock held
 		}
 		lck_mtx_unlock(jetsam_list_mlock);
-		jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsKilled);
 		p = proc_find(aPid);
 		if (p != NULL) {
-			printf("jetsam: killing pid %d [%s] - memory_status_level: %d - ", 
-					aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level);
-			exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
-			proc_rele(p);
+			int flags = cause;
+#if DEVELOPMENT || DEBUG
+			if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && procSuspendedForDiagnosis) {
+				printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
+				proc_rele(p);
+				lck_mtx_lock(jetsam_list_mlock);
+				continue;
+			}
+#endif /* DEVELOPMENT || DEBUG */
+#if CONFIG_FREEZE
+			hibernation_node *node;
+			boolean_t skip;
+			if ((node = kern_hibernation_get_node(aPid))) {
+				boolean_t reclaim_proc = !(node->state & (kProcessBusy | kProcessNoReclaimWorth));
+				if (any || reclaim_proc) {
+					if (node->state & kProcessHibernated) {
+						flags |= kJetsamFlagsHibernated;
+					}
+					skip = FALSE;
+				} else {
+					skip = TRUE;
+				}
+				kern_hibernation_release_node(node);
+			} else {
+				skip = FALSE;
+			}
+			if (skip) {
+				proc_rele(p);			
+			} else
+#endif
+			{
+#if DEVELOPMENT || DEBUG
+				if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && activeProcess) {
 #if DEBUG
-			printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
+					printf("jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
+						aPid, (p->p_comm ? p->p_comm: "(unknown)"), kern_memorystatus_level);
 #endif /* DEBUG */
-			return 0;
+					jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
+					jetsam_priority_entry->flags |= kJetsamFlagsSuspForDiagnosis;
+					task_suspend(p->task);
+					proc_rele(p);
+					if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) {
+						jetsam_diagnostic_suspended_one_active_proc = 1;
+						printf("jetsam: returning after suspending first active proc - %d\n", aPid);
+					}
+					return 0;
+				} else
+#endif /* DEVELOPMENT || DEBUG */
+				{
+					printf("jetsam: killing pid %d [%s] - memory_status_level: %d\n", 
+						aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level);
+					jetsam_mark_pid_in_snapshot(aPid, flags);
+					exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
+					proc_rele(p);
+#if DEBUG
+					printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
+#endif /* DEBUG */
+					return 0;
+				}
+			}
 		}
 	    lck_mtx_lock(jetsam_list_mlock);
 	}
@@ -220,54 +453,235 @@ jetsam_kill_hiwat_proc(void)
 		if (aPid == 0 || (hiwat < 0)) {
 			continue; // with lock held
 		}
-		lck_mtx_unlock(jetsam_list_mlock);
 		p = proc_find(aPid);
 		if (p != NULL) {
 			int32_t pages = (int32_t)jetsam_task_page_count(p->task);
-			if (pages > hiwat) {
+			boolean_t skip = (pages <= hiwat);
+#if DEVELOPMENT || DEBUG
+			if (!skip && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)) {
+				if (jetsam_priority_list[i].flags & kJetsamFlagsSuspForDiagnosis) {
+					proc_rele(p);
+					continue;
+				}
+			}
+#endif /* DEVELOPMENT || DEBUG */
+#if CONFIG_FREEZE
+			if (!skip) {
+				hibernation_node *node;
+				if ((node = kern_hibernation_get_node(aPid))) {
+					if (node->state & kProcessBusy) {
+						kern_hibernation_release_node(node);
+						skip = TRUE;
+					} else {
+						kern_hibernation_free_node(node, TRUE);
+						skip = FALSE;
+					}
+				}				
+			}
+#endif
+			if (!skip) {
 #if DEBUG
-				printf("jetsam: killing pid %d [%s] - %d pages > hiwat (%d)\n", aPid, p->p_comm, pages, hiwat);
+				printf("jetsam: %s pid %d [%s] - %d pages > hiwat (%d)\n",
+					(jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)?"suspending": "killing", aPid, p->p_comm, pages, hiwat);
 #endif /* DEBUG */
-				exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
-				proc_rele(p);
+#if DEVELOPMENT || DEBUG
+				if (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) {
+					lck_mtx_unlock(jetsam_list_mlock);
+					task_suspend(p->task);
+					proc_rele(p);
 #if DEBUG
-				printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
+					printf("jetsam: pid %d suspended for diagnosis - memory_status_level: %d\n", aPid, kern_memorystatus_level);
 #endif /* DEBUG */
-				jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsKilledHiwat);
-				jetsam_priority_list[i].pid = 0;
+					jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
+					jetsam_priority_list[i].flags |= kJetsamFlagsSuspForDiagnosis;
+				} else
+#endif /* DEVELOPMENT || DEBUG */
+				{
+					jetsam_priority_list[i].pid = 0;
+					lck_mtx_unlock(jetsam_list_mlock);
+					exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
+					proc_rele(p);
+#if DEBUG
+					printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
+#endif /* DEBUG */
+					jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsKilledHiwat);
+				}
 				return 0;
 			} else {
 				proc_rele(p);
 			}
 
 		}
-		lck_mtx_lock(jetsam_list_mlock);
 	}
 	lck_mtx_unlock(jetsam_list_mlock);
 	return -1;
 }
 
+#if CONFIG_FREEZE
+static void
+jetsam_send_hibernation_note(uint32_t flags, pid_t pid, uint32_t pages) {
+	int ret;
+	struct kev_msg ev_msg;
+	jetsam_hibernation_entry_t data;
+	
+	ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+	ev_msg.kev_class      = KEV_SYSTEM_CLASS;
+	ev_msg.kev_subclass   = KEV_MEMORYSTATUS_SUBCLASS;
+
+	ev_msg.event_code     = kMemoryStatusHibernationNote;
+
+	ev_msg.dv[0].data_length = sizeof data;
+	ev_msg.dv[0].data_ptr = &data;
+	ev_msg.dv[1].data_length = 0;
+
+	data.pid = pid;
+	data.flags = flags;
+	data.pages = pages;
+
+	ret = kev_post_msg(&ev_msg);
+	if (ret) {
+		kern_memorystatus_kev_failure_count++;
+		printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
+	}
+}
+
+static int
+jetsam_hibernate_top_proc(void)
+{
+	int hibernate_index;
+	proc_t p;
+	uint32_t i;
+
+	lck_mtx_lock(jetsam_list_mlock);
+	
+	for (hibernate_index = jetsam_priority_list_index; hibernate_index < jetsam_priority_list_count; hibernate_index++) {
+		pid_t aPid;
+		uint32_t state = 0;
+
+		aPid = jetsam_priority_list[hibernate_index].pid;
+
+		/* skip empty slots in the list */
+		if (aPid == 0) {
+			continue; // with lock held
+		}
+
+		if (kern_hibernation_get_process_state(aPid, &state, NULL) != 0) {
+			continue; // with lock held
+		}
+
+		/* ensure the process isn't marked as busy and is suspended */
+		if ((state & kProcessBusy) || !(state & kProcessSuspended)) {
+			continue; // with lock held
+		}
+
+		p = proc_find(aPid);
+		if (p != NULL) {
+			hibernation_node *node;
+			boolean_t skip;
+			uint32_t purgeable, wired, clean, dirty;
+			boolean_t shared;
+			
+			lck_mtx_unlock(jetsam_list_mlock);
+			
+			if ((node = kern_hibernation_get_node(aPid))) {
+				if (node->state & kProcessBusy) {
+					skip = TRUE;
+				} else {
+					node->state |= kProcessBusy;
+					/* Whether we hibernate or not, increase the count so can we maintain the gap between hibernated and suspended processes. */
+					kern_memorystatus_hibernated_count++;
+					skip = FALSE;
+				}
+				kern_hibernation_release_node(node);
+			} else {
+				skip = TRUE;
+			}
+			
+			if (!skip) {
+				/* Only hibernate processes meeting our size criteria. If not met, mark it as such and return. */
+				task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, TRUE);
+				skip = (dirty < kern_memorystatus_hibernation_pages_min) || (dirty > kern_memorystatus_hibernation_pages_max);		
+			}
+			
+			if (!skip) {
+				unsigned int swap_pages_free = default_pager_swap_pages_free();
+				
+				/* Ensure there's actually enough space free to hibernate this process. */
+				if (dirty > swap_pages_free) {
+					kern_memorystatus_low_swap_pages = swap_pages_free;
+					skip = TRUE;
+				}
+			}
+
+			if (skip) {
+				kern_hibernation_set_process_state(aPid, kProcessIgnored);
+				proc_rele(p);
+				return 0;
+			}
+
+#if DEBUG
+			printf("jetsam: pid %d [%s] hibernating - memory_status_level: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", 
+				aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
+#endif
+
+			task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, FALSE);
+			proc_rele(p);
+			
+			kern_hibernation_set_process_state(aPid, kProcessHibernated | (shared ? 0: kProcessNoReclaimWorth));
+			
+			/* Update stats */
+			for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
+				throttle_intervals[i].pageouts += dirty;
+			}
+			kern_memorystatus_hibernation_pageouts += dirty;
+			kern_memorystatus_hibernation_count++;
+			
+			jetsam_send_hibernation_note(kJetsamFlagsHibernated, aPid, dirty);
+
+			return dirty;
+		}
+	}
+	lck_mtx_unlock(jetsam_list_mlock);
+	return -1;
+}
+#endif /* CONFIG_FREEZE */
+
 static void
 kern_memorystatus_thread(void)
 {
 	struct kev_msg ev_msg;
 	jetsam_kernel_stats_t data;
+	boolean_t post_memorystatus_snapshot = FALSE; 
 	int ret;
 
+	bzero(&data, sizeof(jetsam_kernel_stats_t));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	while(1) {
 
-		while (kern_memorystatus_level <= kern_memorystatus_level_critical) {
-			if (jetsam_kill_top_proc() < 0) {
+#if DEVELOPMENT || DEBUG
+		jetsam_diagnostic_suspended_one_active_proc = 0;
+#endif /* DEVELOPMENT || DEBUG */
+
+		while (kern_memorystatus_level <= kern_memorystatus_level_highwater) {
+			if (jetsam_kill_hiwat_proc() < 0) {
 				break;
 			}
+			post_memorystatus_snapshot = TRUE;
 		}
 
-		while (kern_memorystatus_level <= kern_memorystatus_level_highwater) {
-			if (jetsam_kill_hiwat_proc() < 0) {
+		while (kern_memorystatus_level <= kern_memorystatus_level_critical) {
+			if (jetsam_kill_top_proc(FALSE, kJetsamFlagsKilled) < 0) {
 				break;
 			}
+			post_memorystatus_snapshot = TRUE;
+#if DEVELOPMENT || DEBUG
+			if ((jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) && jetsam_diagnostic_suspended_one_active_proc) {
+				printf("jetsam: stopping killing since 1 active proc suspended already for diagnosis\n");
+				break; // we found first active proc, let's not kill any more
+			}
+#endif /* DEVELOPMENT || DEBUG */
 		}
-				
+
 		kern_memorystatus_last_level = kern_memorystatus_level;
 
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
@@ -295,7 +709,7 @@ kern_memorystatus_thread(void)
 			printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
 		}
 
-		if (jetsam_snapshot_list_count) {
+		if (post_memorystatus_snapshot) {
 			size_t snapshot_size =  sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count;
 			ev_msg.event_code = kMemoryStatusSnapshotNote;
 			ev_msg.dv[0].data_length = sizeof snapshot_size;
@@ -318,6 +732,349 @@ kern_memorystatus_thread(void)
 	}
 }
 
+#if CONFIG_FREEZE
+
+__private_extern__ void
+kern_hibernation_init(void)
+{
+    hibernation_lck_attr = lck_attr_alloc_init();
+    hibernation_lck_grp_attr = lck_grp_attr_alloc_init();
+    hibernation_lck_grp = lck_grp_alloc_init("hibernation",  hibernation_lck_grp_attr);
+    hibernation_mlock = lck_mtx_alloc_init(hibernation_lck_grp, hibernation_lck_attr);
+	
+	RB_INIT(&hibernation_tree_head);
+
+	(void)kernel_thread(kernel_task, kern_hibernation_thread);
+}
+
+static inline boolean_t 
+kern_hibernation_can_hibernate_processes(void) 
+{
+	boolean_t ret;
+	
+	lck_mtx_lock_spin(hibernation_mlock);
+	ret = (kern_memorystatus_suspended_count - kern_memorystatus_hibernated_count) > 
+				kern_memorystatus_hibernation_suspended_minimum ? TRUE : FALSE;
+	lck_mtx_unlock(hibernation_mlock);
+	
+	return ret;
+}
+
+static boolean_t 
+kern_hibernation_can_hibernate(void)
+{
+	/* Only hibernate if we're sufficiently low on memory; this holds off hibernation right after boot, 
+	   and is generally is a no-op once we've reached steady state. */
+	if (kern_memorystatus_level > kern_memorystatus_level_hibernate) {
+		return FALSE;
+	}
+	
+	/* Check minimum suspended process threshold. */
+	if (!kern_hibernation_can_hibernate_processes()) {
+		return FALSE;
+	}
+
+	/* Is swap running low? */
+	if (kern_memorystatus_low_swap_pages) {
+		/* If there's been no movement in free swap pages since we last attempted hibernation, return. */
+		if (default_pager_swap_pages_free() <= kern_memorystatus_low_swap_pages) {
+			return FALSE;
+		}
+		
+		/* Pages have been freed, so we can retry. */
+		kern_memorystatus_low_swap_pages = 0;
+	}
+	
+	/* OK */
+	return TRUE;
+}
+
+static void
+kern_hibernation_add_node(hibernation_node *node)
+{
+	lck_mtx_lock_spin(hibernation_mlock);
+
+	RB_INSERT(hibernation_tree, &hibernation_tree_head, node);
+	kern_memorystatus_suspended_count++;
+
+	lck_mtx_unlock(hibernation_mlock);	
+}
+
+/* Returns with the hibernation lock taken */
+static hibernation_node *
+kern_hibernation_get_node(pid_t pid) 
+{
+	hibernation_node sought, *found;
+	sought.pid = pid;
+	lck_mtx_lock_spin(hibernation_mlock);
+	found = RB_FIND(hibernation_tree, &hibernation_tree_head, &sought);
+	if (!found) {
+		lck_mtx_unlock(hibernation_mlock);		
+	}
+	return found;
+}
+
+static void
+kern_hibernation_release_node(hibernation_node *node) 
+{
+#pragma unused(node)
+	lck_mtx_unlock(hibernation_mlock);	
+}
+
+static void 
+kern_hibernation_free_node(hibernation_node *node, boolean_t unlock) 
+{
+	/* make sure we're called with the hibernation_mlock held */
+	lck_mtx_assert(hibernation_mlock, LCK_MTX_ASSERT_OWNED);
+
+	if (node->state & (kProcessHibernated | kProcessIgnored)) {
+		kern_memorystatus_hibernated_count--;
+	} 
+
+	kern_memorystatus_suspended_count--;
+	
+	RB_REMOVE(hibernation_tree, &hibernation_tree_head, node);
+	kfree(node, sizeof(hibernation_node));
+
+	if (unlock) {
+		lck_mtx_unlock(hibernation_mlock);
+	}	
+}
+
+static void 
+kern_hibernation_register_pid(pid_t pid)
+{
+	hibernation_node *node;
+
+#if DEVELOPMENT || DEBUG
+	node = kern_hibernation_get_node(pid);
+	if (node) {
+		printf("kern_hibernation_register_pid: pid %d already registered!\n", pid);
+		kern_hibernation_release_node(node);
+		return;
+	}
+#endif
+
+	/* Register as a candiate for hibernation */
+	node = (hibernation_node *)kalloc(sizeof(hibernation_node));
+	if (node) {	
+		clock_sec_t sec;
+		clock_nsec_t nsec;
+		mach_timespec_t ts;
+		
+		memset(node, 0, sizeof(hibernation_node));
+
+		node->pid = pid;
+		node->state = kProcessSuspended;
+
+		clock_get_system_nanotime(&sec, &nsec);
+		ts.tv_sec = sec;
+		ts.tv_nsec = nsec;
+		
+		node->hibernation_ts = ts;
+
+		kern_hibernation_add_node(node);
+	}
+}
+
+static void 
+kern_hibernation_unregister_pid(pid_t pid)
+{
+	hibernation_node *node;
+	
+	node = kern_hibernation_get_node(pid);
+	if (node) {
+		kern_hibernation_free_node(node, TRUE);
+	}
+}
+
+void 
+kern_hibernation_on_pid_suspend(pid_t pid)
+{	
+	kern_hibernation_register_pid(pid);
+}
+
+/* If enabled, we bring all the hibernated pages back prior to resumption; otherwise, they're faulted back in on demand */
+#define THAW_ON_RESUME 1
+
+void
+kern_hibernation_on_pid_resume(pid_t pid, task_t task)
+{	
+#if THAW_ON_RESUME
+	hibernation_node *node;
+	if ((node = kern_hibernation_get_node(pid))) {
+		if (node->state & kProcessHibernated) {
+			node->state |= kProcessBusy;
+			kern_hibernation_release_node(node);
+			task_thaw(task);
+			jetsam_send_hibernation_note(kJetsamFlagsThawed, pid, 0);
+		} else {
+			kern_hibernation_release_node(node);
+		}
+	}
+#else
+#pragma unused(task)
+#endif
+	kern_hibernation_unregister_pid(pid);
+}
+
+void
+kern_hibernation_on_pid_hibernate(pid_t pid)
+{
+#pragma unused(pid)
+
+	/* Wake the hibernation thread */
+	thread_wakeup((event_t)&kern_hibernation_wakeup);	
+}
+
+static int 
+kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts) 
+{
+	hibernation_node *found;
+	int err = ESRCH;
+	
+	*state = 0;
+
+	found = kern_hibernation_get_node(pid);
+	if (found) {
+		*state = found->state;
+		if (ts) {
+			*ts = found->hibernation_ts;
+		}
+		err = 0;
+		kern_hibernation_release_node(found);
+	}
+	
+	return err;
+}
+
+static int 
+kern_hibernation_set_process_state(pid_t pid, uint32_t state) 
+{
+	hibernation_node *found;
+	int err = ESRCH;
+
+	found = kern_hibernation_get_node(pid);
+	if (found) {
+		found->state = state;
+		err = 0;
+		kern_hibernation_release_node(found);
+	}
+	
+	return err;
+}
+
+static void
+kern_hibernation_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
+{
+	if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
+		if (!interval->max_pageouts) {
+			interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * HIBERNATION_DAILY_PAGEOUTS_MAX) / (24 * 60)));
+		} else {
+			printf("jetsam: %d minute throttle timeout, resetting\n", interval->mins);
+		}
+		interval->ts.tv_sec = interval->mins * 60;
+		interval->ts.tv_nsec = 0;
+		ADD_MACH_TIMESPEC(&interval->ts, ts);
+		/* Since we update the throttle stats pre-hibernation, adjust for overshoot here */
+		if (interval->pageouts > interval->max_pageouts) {
+			interval->pageouts -= interval->max_pageouts;
+		} else {
+			interval->pageouts = 0;
+		}
+		interval->throttle = FALSE;
+	} else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) {
+		printf("jetsam: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
+		interval->throttle = TRUE;
+	}	
+#ifdef DEBUG
+	printf("jetsam: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", 
+		interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, 
+		interval->throttle ? "on" : "off");
+#endif
+}
+
+static boolean_t
+kern_hibernation_throttle_update(void) 
+{
+	clock_sec_t sec;
+	clock_nsec_t nsec;
+	mach_timespec_t ts;
+	uint32_t i;
+	boolean_t throttled = FALSE;
+
+#if DEVELOPMENT || DEBUG
+	if (!kern_memorystatus_hibernation_throttle_enabled)
+		return FALSE;
+#endif
+
+	clock_get_system_nanotime(&sec, &nsec);
+	ts.tv_sec = sec;
+	ts.tv_nsec = nsec;
+	
+	/* Check hibernation pageouts over multiple intervals and throttle if we've exceeded our budget.
+	 *
+	 * This ensures that periods of inactivity can't be used as 'credit' towards hibernation if the device has
+	 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
+	 * order to allow for bursts of activity.
+	 */
+	for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
+		kern_hibernation_update_throttle_interval(&ts, &throttle_intervals[i]);
+		if (throttle_intervals[i].throttle == TRUE)
+			throttled = TRUE;
+	}								
+
+	return throttled;
+}
+
+static void
+kern_hibernation_cull(void)
+{
+	hibernation_node *node, *next;
+	lck_mtx_lock(hibernation_mlock);
+
+	for (node = RB_MIN(hibernation_tree, &hibernation_tree_head); node != NULL; node = next) {
+		proc_t p;
+
+		next = RB_NEXT(hibernation_tree, &hibernation_tree_head, node);
+
+		/* TODO: probably suboptimal, so revisit should it cause a performance issue */
+		p = proc_find(node->pid);
+		if (p) {
+			proc_rele(p);
+		} else {
+			kern_hibernation_free_node(node, FALSE);				
+		}
+	}
+
+	lck_mtx_unlock(hibernation_mlock);	
+}
+
+static void
+kern_hibernation_thread(void)
+{
+	if (vm_freeze_enabled) {
+		if (kern_hibernation_can_hibernate()) {
+			
+			/* Cull dead processes */
+			kern_hibernation_cull();
+			
+			/* Only hibernate if we've not exceeded our pageout budgets */
+			if (!kern_hibernation_throttle_update()) {
+				jetsam_hibernate_top_proc();
+			} else {
+				printf("kern_hibernation_thread: in throttle, ignoring hibernation\n");
+				kern_memorystatus_hibernation_throttle_count++; /* Throttled, update stats */
+			}
+		}
+	}
+
+	assert_wait((event_t) &kern_hibernation_wakeup, THREAD_UNINT);
+	thread_block((thread_continue_t) kern_hibernation_thread);	
+}
+
+#endif /* CONFIG_FREEZE */
+
 static int
 sysctl_io_variable(struct sysctl_req *req, void *pValue, size_t currentsize, size_t maxsize, size_t *newsize)
 {
@@ -362,19 +1119,24 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
 	ret = sysctl_io_variable(req, &temp_list[0], currentsize, sizeof(temp_list), &newsize);
 
 	if (!ret && req->newptr) {
-		jetsam_priority_list_count = newsize / sizeof(jetsam_priority_list[0]);
+		int temp_list_count = newsize / sizeof(jetsam_priority_list[0]);
 #if DEBUG 
 		printf("set jetsam priority pids = { ");
-		for (i = 0; i < jetsam_priority_list_count; i++) {
+		for (i = 0; i < temp_list_count; i++) {
 			printf("(%d, 0x%08x, %d) ", temp_list[i].pid, temp_list[i].flags, temp_list[i].hiwat_pages);
 		}
 		printf("}\n");
 #endif /* DEBUG */
 		lck_mtx_lock(jetsam_list_mlock);
-		for (i = 0; i < jetsam_priority_list_count; i++) {
+#if CONFIG_FREEZE
+		jetsam_priority_list_hibernation_index = 0;
+#endif
+		jetsam_priority_list_index = 0;
+		jetsam_priority_list_count = temp_list_count;
+		for (i = 0; i < temp_list_count; i++) {
 			jetsam_priority_list[i] = temp_list[i];
 		}
-		for (i = jetsam_priority_list_count; i < kMaxPriorityEntries; i++) {
+		for (i = temp_list_count; i < kMaxPriorityEntries; i++) {
 			jetsam_priority_list[i].pid = 0;
 			jetsam_priority_list[i].flags = 0;
 			jetsam_priority_list[i].hiwat_pages = -1;
@@ -382,7 +1144,6 @@ sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, _
 			jetsam_priority_list[i].hiwat_reserved2 = -1;
 			jetsam_priority_list[i].hiwat_reserved3 = -1;
 		}
-		jetsam_priority_list_index = 0;
 		lck_mtx_unlock(jetsam_list_mlock);
 	}	
 	return ret;
@@ -421,5 +1182,5 @@ sysctl_handle_kern_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unus
 	return ret;
 }
 
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, sysctl_handle_kern_memorystatus_priority_list, "S,jetsam_priorities", "");
+SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_memorystatus_priority_list, "S,jetsam_priorities", "");
 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_kern_memorystatus_snapshot, "S,jetsam_snapshot", "");
diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c
index 658f13860..f82717429 100644
--- a/bsd/kern/kern_mib.c
+++ b/bsd/kern/kern_mib.c
@@ -333,34 +333,34 @@ sysctl_tbfrequency
 /*
  * hw.* MIB variables.
  */
-SYSCTL_PROC    (_hw, HW_NCPU, ncpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_NCPU, sysctl_hw_generic, "I", "");
-SYSCTL_PROC    (_hw, HW_AVAILCPU, activecpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_AVAILCPU, sysctl_hw_generic, "I", "");
-SYSCTL_PROC    (_hw, OID_AUTO, physicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_PHYSICALCPU, sysctl_hw_generic, "I", "");
-SYSCTL_PROC    (_hw, OID_AUTO, physicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_PHYSICALCPUMAX, sysctl_hw_generic, "I", "");
-SYSCTL_PROC    (_hw, OID_AUTO, logicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPU, sysctl_hw_generic, "I", "");
-SYSCTL_PROC    (_hw, OID_AUTO, logicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPUMAX, sysctl_hw_generic, "I", "");
-SYSCTL_INT     (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN, (int *)NULL, BYTE_ORDER, "");
-SYSCTL_INT     (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN, &cputype, 0, "");
-SYSCTL_INT     (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN, &cpusubtype, 0, "");
-SYSCTL_INT     (_hw, OID_AUTO, cpu64bit_capable, CTLFLAG_RD | CTLFLAG_KERN, &cpu64bit, 0, "");
-SYSCTL_INT     (_hw, OID_AUTO, cpufamily, CTLFLAG_RD | CTLFLAG_KERN, &cpufamily, 0, "");
-SYSCTL_OPAQUE  (_hw, OID_AUTO, cacheconfig, CTLFLAG_RD, &cacheconfig, sizeof(cacheconfig), "Q", "");
-SYSCTL_OPAQUE  (_hw, OID_AUTO, cachesize, CTLFLAG_RD, &cachesize, sizeof(cachesize), "Q", "");
-SYSCTL_PROC	   (_hw, OID_AUTO, pagesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, 0, sysctl_pagesize, "Q", "");
-SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_hz, "");
-SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_min_hz, "");
-SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_max, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_max_hz, "");
-SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.cpu_frequency_hz, "");
-SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency_min, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.cpu_frequency_min_hz, "");
-SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency_max, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.cpu_frequency_max_hz, "");
-SYSCTL_PROC    (_hw, OID_AUTO, cachelinesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_CACHELINE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
-SYSCTL_PROC    (_hw, OID_AUTO, l1icachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_L1ICACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
-SYSCTL_PROC    (_hw, OID_AUTO, l1dcachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_L1DCACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
-SYSCTL_PROC    (_hw, OID_AUTO, l2cachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_L2CACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
-SYSCTL_PROC    (_hw, OID_AUTO, l3cachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_L3CACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
-SYSCTL_PROC(_hw, OID_AUTO, tbfrequency, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN, 0, 0, sysctl_tbfrequency, "Q", "");
-SYSCTL_QUAD    (_hw, HW_MEMSIZE, memsize, CTLFLAG_RD | CTLFLAG_KERN, &max_mem, "");
-SYSCTL_INT     (_hw, OID_AUTO, packages, CTLFLAG_RD | CTLFLAG_KERN, &packages, 0, "");
+SYSCTL_PROC    (_hw, HW_NCPU, ncpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_NCPU, sysctl_hw_generic, "I", "");
+SYSCTL_PROC    (_hw, HW_AVAILCPU, activecpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_AVAILCPU, sysctl_hw_generic, "I", "");
+SYSCTL_PROC    (_hw, OID_AUTO, physicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_LOCAL_PHYSICALCPU, sysctl_hw_generic, "I", "");
+SYSCTL_PROC    (_hw, OID_AUTO, physicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_LOCAL_PHYSICALCPUMAX, sysctl_hw_generic, "I", "");
+SYSCTL_PROC    (_hw, OID_AUTO, logicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_LOCAL_LOGICALCPU, sysctl_hw_generic, "I", "");
+SYSCTL_PROC    (_hw, OID_AUTO, logicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_LOCAL_LOGICALCPUMAX, sysctl_hw_generic, "I", "");
+SYSCTL_INT     (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (int *)NULL, BYTE_ORDER, "");
+SYSCTL_INT     (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &cputype, 0, "");
+SYSCTL_INT     (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &cpusubtype, 0, "");
+SYSCTL_INT     (_hw, OID_AUTO, cpu64bit_capable, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &cpu64bit, 0, "");
+SYSCTL_INT     (_hw, OID_AUTO, cpufamily, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &cpufamily, 0, "");
+SYSCTL_OPAQUE  (_hw, OID_AUTO, cacheconfig, CTLFLAG_RD | CTLFLAG_LOCKED, &cacheconfig, sizeof(cacheconfig), "Q", "");
+SYSCTL_OPAQUE  (_hw, OID_AUTO, cachesize, CTLFLAG_RD | CTLFLAG_LOCKED, &cachesize, sizeof(cachesize), "Q", "");
+SYSCTL_PROC	   (_hw, OID_AUTO, pagesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_pagesize, "Q", "");
+SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_hz, "");
+SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_min_hz, "");
+SYSCTL_QUAD    (_hw, OID_AUTO, busfrequency_max, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_frequency_max_hz, "");
+SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.cpu_frequency_hz, "");
+SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency_min, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.cpu_frequency_min_hz, "");
+SYSCTL_QUAD    (_hw, OID_AUTO, cpufrequency_max, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.cpu_frequency_max_hz, "");
+SYSCTL_PROC    (_hw, OID_AUTO, cachelinesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_CACHELINE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
+SYSCTL_PROC    (_hw, OID_AUTO, l1icachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_L1ICACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
+SYSCTL_PROC    (_hw, OID_AUTO, l1dcachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_L1DCACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
+SYSCTL_PROC    (_hw, OID_AUTO, l2cachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_L2CACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
+SYSCTL_PROC    (_hw, OID_AUTO, l3cachesize, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, HW_L3CACHESIZE | CTLHW_RETQUAD, sysctl_hw_generic, "Q", "");
+SYSCTL_PROC(_hw, OID_AUTO, tbfrequency, CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_tbfrequency, "Q", "");
+SYSCTL_QUAD    (_hw, HW_MEMSIZE, memsize, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &max_mem, "");
+SYSCTL_INT     (_hw, OID_AUTO, packages, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &packages, 0, "");
 
 /*
  * Optional features can register nodes below hw.optional.
@@ -372,7 +372,7 @@ SYSCTL_INT     (_hw, OID_AUTO, packages, CTLFLAG_RD | CTLFLAG_KERN, &packages, 0
  */
 SYSCTL_NODE(_hw, OID_AUTO, optional, CTLFLAG_RW|CTLFLAG_LOCKED, NULL, "optional features");
 
-SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN, (int *)NULL, 1, "");	/* always set */
+SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, (int *)NULL, 1, "");	/* always set */
 
 /*
  * Deprecated variables.  These are supported for backwards compatibility
@@ -386,44 +386,26 @@ SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN, (in
  *
  * The *_compat nodes are *NOT* visible within the kernel.
  */
-SYSCTL_COMPAT_INT (_hw, HW_PAGESIZE,     pagesize_compat, CTLFLAG_RD | CTLFLAG_MASKED, &page_size, 0, "");
-SYSCTL_COMPAT_INT (_hw, HW_BUS_FREQ,     busfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.bus_clock_rate_hz, 0, "");
-SYSCTL_COMPAT_INT (_hw, HW_CPU_FREQ,     cpufrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.cpu_clock_rate_hz, 0, "");
-SYSCTL_PROC(_hw, HW_CACHELINE,    cachelinesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_CACHELINE, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L1ICACHESIZE, l1icachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L1ICACHESIZE, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L1DCACHESIZE, l1dcachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L1DCACHESIZE, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L2CACHESIZE,  l2cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L2CACHESIZE, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L3CACHESIZE,  l3cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L3CACHESIZE, sysctl_hw_generic, "I", "");
-SYSCTL_COMPAT_INT (_hw, HW_TB_FREQ,      tbfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.timebase_frequency_hz, 0, "");
-SYSCTL_PROC(_hw, HW_MACHINE,      machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_MACHINE, sysctl_hw_generic, "A", "");
-SYSCTL_PROC(_hw, HW_MODEL,        model, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_MODEL, sysctl_hw_generic, "A", "");
-SYSCTL_COMPAT_UINT(_hw, HW_PHYSMEM,      physmem, CTLFLAG_RD | CTLFLAG_MASKED, &mem_size, 0, "");
-SYSCTL_PROC(_hw, HW_USERMEM,      usermem, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_USERMEM,	sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_EPOCH,        epoch, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_EPOCH, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_VECTORUNIT,   vectorunit, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_VECTORUNIT, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L2SETTINGS,   l2settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L2SETTINGS, sysctl_hw_generic, "I", "");
-SYSCTL_PROC(_hw, HW_L3SETTINGS,   l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L3SETTINGS, sysctl_hw_generic, "I", "");
-SYSCTL_INT (_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &cputhreadtype, 0, "");
-
-#ifdef __ppc__
-int altivec_flag = -1;
-int graphicsops_flag = -1;
-int x64bitops_flag = -1;
-int fsqrt_flag = -1;
-int stfiwx_flag = -1;
-int dcba_flag = -1;
-int datastreams_flag = -1;
-int dcbtstreams_flag = -1;
-
-SYSCTL_INT(_hw_optional, OID_AUTO, altivec, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &altivec_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, graphicsops, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &graphicsops_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, 64bitops, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &x64bitops_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, fsqrt, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &fsqrt_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, stfiwx, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &stfiwx_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, dcba, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &dcba_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, datastreams, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &datastreams_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, dcbtstreams, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &dcbtstreams_flag, 0, "");
-#elif defined (__i386__) || defined (__x86_64__)
+SYSCTL_COMPAT_INT (_hw, HW_PAGESIZE,     pagesize_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &page_size, 0, "");
+SYSCTL_COMPAT_INT (_hw, HW_BUS_FREQ,     busfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_clock_rate_hz, 0, "");
+SYSCTL_COMPAT_INT (_hw, HW_CPU_FREQ,     cpufrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.cpu_clock_rate_hz, 0, "");
+SYSCTL_PROC(_hw, HW_CACHELINE,    cachelinesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_CACHELINE, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L1ICACHESIZE, l1icachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L1ICACHESIZE, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L1DCACHESIZE, l1dcachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L1DCACHESIZE, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L2CACHESIZE,  l2cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L2CACHESIZE, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L3CACHESIZE,  l3cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L3CACHESIZE, sysctl_hw_generic, "I", "");
+SYSCTL_COMPAT_INT (_hw, HW_TB_FREQ,      tbfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.timebase_frequency_hz, 0, "");
+SYSCTL_PROC(_hw, HW_MACHINE,      machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_MACHINE, sysctl_hw_generic, "A", "");
+SYSCTL_PROC(_hw, HW_MODEL,        model, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_MODEL, sysctl_hw_generic, "A", "");
+SYSCTL_COMPAT_UINT(_hw, HW_PHYSMEM,      physmem, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &mem_size, 0, "");
+SYSCTL_PROC(_hw, HW_USERMEM,      usermem, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_USERMEM,	sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_EPOCH,        epoch, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_EPOCH, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_VECTORUNIT,   vectorunit, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_VECTORUNIT, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L2SETTINGS,   l2settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L2SETTINGS, sysctl_hw_generic, "I", "");
+SYSCTL_PROC(_hw, HW_L3SETTINGS,   l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_L3SETTINGS, sysctl_hw_generic, "I", "");
+SYSCTL_INT (_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, &cputhreadtype, 0, "");
+
+#if defined (__i386__) || defined (__x86_64__)
 int mmx_flag = -1;
 int sse_flag = -1;
 int sse2_flag = -1;
@@ -433,22 +415,27 @@ int sse4_2_flag = -1;
 int x86_64_flag = -1;
 int supplementalsse3_flag = -1;
 int aes_flag = -1;
-
-SYSCTL_INT(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN, &mmx_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN, &sse_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN, &sse2_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN, &sse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN, &supplementalsse3_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN, &sse4_1_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN, &sse4_2_flag, 0, "");
+int avx1_0_flag = -1;
+
+SYSCTL_INT(_hw_optional, OID_AUTO, mmx, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &mmx_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, sse, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, sse2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse2_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse3_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &supplementalsse3_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_1_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &sse4_2_flag, 0, "");
 /* "x86_64" is actually a preprocessor symbol on the x86_64 kernel, so we have to hack this */
 #undef x86_64
-SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN, &x86_64_flag, 0, "");
-SYSCTL_INT(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN, &aes_flag, 0, "");
-#endif /* __ppc__ */
+SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &x86_64_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &aes_flag, 0, "");
+SYSCTL_INT(_hw_optional, OID_AUTO, avx1_0, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, &avx1_0_flag, 0, "");
+#endif /* !__i386__ && !__x86_64 && !__arm__ */
 
 /*
  * Debugging interface to the CPU power management code.
+ *
+ * Note:	Does not need locks because it disables interrupts over
+ *		the call.
  */
 static int
 pmsSysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
@@ -468,7 +455,7 @@ pmsSysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
 	return(error);
 }
 
-SYSCTL_PROC(_hw, OID_AUTO, pms, CTLTYPE_STRUCT | CTLFLAG_WR, 0, 0, pmsSysctl, "S", "Processor Power Management");
+SYSCTL_PROC(_hw, OID_AUTO, pms, CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0, pmsSysctl, "S", "Processor Power Management");
 
 
 
@@ -484,9 +471,7 @@ sysctl_mib_init(void)
 	cputype = cpu_type();
 	cpusubtype = cpu_subtype();
 	cputhreadtype = cpu_threadtype();
-#if defined(__ppc__)
-    cpu64bit = (_cpu_capabilities & k64Bit) == k64Bit;
-#elif defined(__i386__) || defined (__x86_64__)
+#if defined(__i386__) || defined (__x86_64__)
     cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit;
 #endif
 
@@ -502,114 +487,18 @@ sysctl_mib_init(void)
 		sysctl_register_oid(&sysctl__hw_cputhreadtype);
 	}
 
-#ifdef __ppc__
-/*
- * The convention for these is as follows:
- * If the sysctl does not exist, the functionality is not present in the CPU.
- * If the sysctl exists, it will not crash, and should otherwise function
- *   corectly.
- * If the sysctl exists and returns 0, we advise against using this feature.
- * If the sysctl exists and returns 1, we advise it's use.
- */
-
-	if (_cpu_capabilities & kHasAltivec) {
-		altivec_flag = 1;
-		sysctl_register_oid(&sysctl__hw_optional_altivec);
-	}
-	if (_cpu_capabilities & kHasGraphicsOps) {
-		graphicsops_flag = 1;
-		sysctl_register_oid(&sysctl__hw_optional_graphicsops);
-	}		
-	if (_cpu_capabilities & k64Bit) {
-		x64bitops_flag = 1;
-		sysctl_register_oid(&sysctl__hw_optional_64bitops);
-	}		
-	if (_cpu_capabilities & kHasFsqrt) {
-		fsqrt_flag = 1;
-		sysctl_register_oid(&sysctl__hw_optional_fsqrt);
-	}		
-	if (_cpu_capabilities & kHasStfiwx) {
-		stfiwx_flag = 1;
-		sysctl_register_oid(&sysctl__hw_optional_stfiwx);
-	}		
-	if (_cpu_capabilities & kDcbaAvailable)
-		dcba_flag = 0;
-	if (_cpu_capabilities & kDcbaRecommended)
-		dcba_flag = 1;
-	if (dcba_flag >= 0)
-		sysctl_register_oid(&sysctl__hw_optional_dcba);
-	if (_cpu_capabilities & kDataStreamsAvailable)
-		datastreams_flag = 0;
-	if (_cpu_capabilities & kDataStreamsRecommended)
-		datastreams_flag = 1;
-	if (datastreams_flag >= 0)
-		sysctl_register_oid(&sysctl__hw_optional_datastreams);
-	if (_cpu_capabilities & kDcbtStreamsAvailable)
-		dcbtstreams_flag = 0;
-	if (_cpu_capabilities & kDcbtStreamsRecommended)
-		dcbtstreams_flag = 1;
-	if (dcbtstreams_flag >= 0)
-		sysctl_register_oid(&sysctl__hw_optional_dcbtstreams);
-
-	/* hw.cpufamily */
-	switch (cpusubtype) {
-	case CPU_SUBTYPE_POWERPC_750:
-		cpufamily = CPUFAMILY_POWERPC_G3;
-		break;
-	case CPU_SUBTYPE_POWERPC_7400:
-	case CPU_SUBTYPE_POWERPC_7450:
-		cpufamily = CPUFAMILY_POWERPC_G4;
-		break;
-	case CPU_SUBTYPE_POWERPC_970:
-		cpufamily = CPUFAMILY_POWERPC_G5;
-		break;
-	default:
-		cpufamily = CPUFAMILY_UNKNOWN;
-	}
-
-	ml_cpu_info_t cpu_info;
-	ml_cpu_get_info(&cpu_info);
-
-	host_basic_info_data_t hinfo;
-	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
-	kern_return_t kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
-	if(kret != KERN_SUCCESS)
-	{
-		hinfo.max_cpus = 1;
-	}
-
-	/* hw.cachesize */
-	cachesize[0] = max_mem;
-	cachesize[1] = cpu_info.l1_dcache_size;
-	cachesize[2] = cpu_info.l2_settings ? cpu_info.l2_cache_size : 0;
-	cachesize[3] = cpu_info.l3_settings ? cpu_info.l3_cache_size : 0;
-	cachesize[4] = 0;
-	
-	/* hw.cacheconfig */
-	cacheconfig[0] = hinfo.max_cpus;
-	cacheconfig[1] = 1;
-	cacheconfig[2] = cachesize[2] ? 1 : 0;
-	cacheconfig[3] = cachesize[3] ? 1 : 0;
-	cacheconfig[4] = 0;
-
-	/* hw.packages */
-	if (cpusubtype == CPU_SUBTYPE_POWERPC_970 && 
-	    cpu_info.l2_cache_size == 1 * 1024 * 1024)
-		/* The signature of the dual-core G5 */
-		packages = roundup(hinfo.max_cpus, 2) / 2;
-	else
-		packages = hinfo.max_cpus;
-
-#elif defined (__i386__) || defined (__x86_64__)
-	mmx_flag = ((_get_cpu_capabilities() & kHasMMX) == kHasMMX)? 1 : 0;
-	sse_flag = ((_get_cpu_capabilities() & kHasSSE) == kHasSSE)? 1 : 0;
-	sse2_flag = ((_get_cpu_capabilities() & kHasSSE2) == kHasSSE2)? 1 : 0;
-	sse3_flag = ((_get_cpu_capabilities() & kHasSSE3) == kHasSSE3)? 1 : 0;
-	supplementalsse3_flag = ((_get_cpu_capabilities() & kHasSupplementalSSE3) == kHasSupplementalSSE3)? 1 : 0;
-	sse4_1_flag = ((_get_cpu_capabilities() & kHasSSE4_1) == kHasSSE4_1)? 1 : 0;
-	sse4_2_flag = ((_get_cpu_capabilities() & kHasSSE4_2) == kHasSSE4_2)? 1 : 0;
-	x86_64_flag = ((_get_cpu_capabilities() & k64Bit) == k64Bit)? 1 : 0;
-	aes_flag = ((_get_cpu_capabilities() & kHasAES) == kHasAES)? 1 : 0;
+#if defined (__i386__) || defined (__x86_64__)
+#define is_capability_set(k) (((_get_cpu_capabilities() & (k)) == (k)) ? 1 : 0)
+	mmx_flag		= is_capability_set(kHasMMX);
+	sse_flag		= is_capability_set(kHasSSE);
+	sse2_flag		= is_capability_set(kHasSSE2);
+	sse3_flag		= is_capability_set(kHasSSE3);
+	supplementalsse3_flag	= is_capability_set(kHasSupplementalSSE3);
+	sse4_1_flag		= is_capability_set(kHasSSE4_1);
+	sse4_2_flag		= is_capability_set(kHasSSE4_2);
+	x86_64_flag		= is_capability_set(k64Bit);
+	aes_flag		= is_capability_set(kHasAES);
+	avx1_0_flag		= is_capability_set(kHasAVX1_0);
 
 	/* hw.cpufamily */
 	cpufamily = cpuid_cpufamily();
@@ -633,7 +522,7 @@ sysctl_mib_init(void)
 			/ cpuid_info()->thread_count;
 
 #else /* end __arm__ */
-# warning we do not support this platform yet
-#endif /* __ppc__ */
+# error unknown architecture
+#endif /* !__i386__ && !__x86_64 && !__arm__ */
 
 }
diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c
index 6da43d2fd..979af3e5d 100644
--- a/bsd/kern/kern_mman.c
+++ b/bsd/kern/kern_mman.c
@@ -95,6 +95,9 @@
 #include <sys/ubc.h>
 #include <sys/ubc_internal.h>
 #include <sys/sysproto.h>
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
 
 #include <sys/syscall.h>
 #include <sys/kdebug.h>
@@ -156,6 +159,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 	int 			fpref=0;
 	int error =0;
 	int fd = uap->fd;
+	int num_retries = 0;
 
 	user_addr = (mach_vm_offset_t)uap->addr;
 	user_size = (mach_vm_size_t) uap->len;
@@ -203,7 +207,9 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 	user_size += pageoff;			/* low end... */
 	user_size = mach_vm_round_page(user_size);	/* hi end */
 
-
+	if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || (flags & MAP_FILE))){
+		return EINVAL;
+	}
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
@@ -216,7 +222,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 		 */
 		user_addr -= pageoff;
 		if (user_addr & PAGE_MASK)
-		return (EINVAL);
+			return (EINVAL);
 	}
 #ifdef notyet
 	/* DO not have apis to get this info, need to wait till then*/
@@ -236,6 +242,19 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 	alloc_flags = 0;
 
 	if (flags & MAP_ANON) {
+
+		maxprot = VM_PROT_ALL;
+#if CONFIG_MACF
+		/*
+		 * Entitlement check.
+		 * Re-enable once mac* is implemented.
+		 */
+		/*error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
+		if (error) {
+			return EINVAL;
+		}*/		
+#endif /* MAC */
+
 		/*
 		 * Mapping blank space is trivial.  Use positive fds as the alias
 		 * value for memory tracking. 
@@ -245,7 +264,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 			 * Use "fd" to pass (some) Mach VM allocation flags,
 			 * (see the VM_FLAGS_* definitions).
 			 */
-			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
+			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK |
 					    VM_FLAGS_PURGABLE);
 			if (alloc_flags != fd) {
 				/* reject if there are any extra flags */
@@ -254,7 +273,6 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 		}
 			
 		handle = NULL;
-		maxprot = VM_PROT_ALL;
 		file_pos = 0;
 		mapanon = 1;
 	} else {
@@ -382,6 +400,21 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 				goto bad;
 			}
 #endif /* MAC */
+
+#if CONFIG_PROTECT
+			{
+				void *cnode;
+				if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
+					error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
+					if (error) {
+						(void) vnode_put(vp);
+						goto bad;
+					}
+				}
+			}
+#endif /* CONFIG_PROTECT */
+
+
 		}
 	}
 
@@ -434,6 +467,9 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 	if (flags & MAP_NOCACHE)
 		alloc_flags |= VM_FLAGS_NO_CACHE;
 
+	if (flags & MAP_JIT){
+		alloc_flags |= VM_FLAGS_MAP_JIT;
+	}
 	/*
 	 * Lookup/allocate object.
 	 */
@@ -455,7 +491,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
 			maxprot |= VM_PROT_READ;
 #endif	/* radar 3777787 */
-
+map_anon_retry:
 		result = vm_map_enter_mem_object(user_map,
 						 &user_addr, user_size,
 						 0, alloc_flags,
@@ -464,6 +500,16 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 						 (flags & MAP_SHARED) ?
 						 VM_INHERIT_SHARE : 
 						 VM_INHERIT_DEFAULT);
+
+		/* If a non-binding address was specified for this anonymous
+		 * mapping, retry the mapping with a zero base
+		 * in the event the mapping operation failed due to
+		 * lack of space between the address and the map's maximum.
+		 */
+		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
+			user_addr = PAGE_SIZE;
+			goto map_anon_retry;
+		}
 	} else {
 		if (vnode_isswap(vp)) {
 			/*
@@ -514,7 +560,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
 			maxprot |= VM_PROT_READ;
 #endif	/* radar 3777787 */
-
+map_file_retry:
 		result = vm_map_enter_mem_object_control(user_map,
 						 &user_addr, user_size,
 						 0, alloc_flags,
@@ -523,6 +569,16 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
 						 (flags & MAP_SHARED) ?
 						 VM_INHERIT_SHARE : 
 						 VM_INHERIT_DEFAULT);
+
+		/* If a non-binding address was specified for this file backed
+		 * mapping, retry the mapping with a zero base
+		 * in the event the mapping operation failed due to
+		 * lack of space between the address and the map's maximum.
+		 */
+		if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
+			user_addr = PAGE_SIZE;
+			goto map_file_retry;
+		}
 	}
 
 	if (!mapanon) {
@@ -855,13 +911,15 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
 
 	result = mach_vm_behavior_set(user_map, start, size, new_behavior);
 	switch (result) {
-		case KERN_SUCCESS:
-			return (0);
-		case KERN_INVALID_ADDRESS:
-			return (ENOMEM);
+	case KERN_SUCCESS:
+		return 0;
+	case KERN_INVALID_ADDRESS:
+		return EINVAL;
+	case KERN_NO_SPACE:	
+		return ENOMEM;
 	}
 
-	return (EINVAL);
+	return EINVAL;
 }
 
 int
@@ -1034,6 +1092,7 @@ munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int
 	return(ENOSYS);
 }
 
+#if		!defined(CONFIG_EMBEDDED)
 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
 kern_return_t
 map_fd(struct map_fd_args *args)
@@ -1070,6 +1129,7 @@ map_fd_funneled(
 	vm_offset_t	map_addr=0;
 	vm_size_t	map_size;
 	int		err=0;
+	vm_prot_t	maxprot = VM_PROT_ALL;
 	vm_map_t	my_map;
 	proc_t		p = current_proc();
 	struct vnode_attr vattr;
@@ -1103,6 +1163,29 @@ map_fd_funneled(
 		goto bad;
 	}
 
+#if CONFIG_MACF
+	err = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
+			fp->f_fglob, VM_PROT_DEFAULT, MAP_FILE, &maxprot);
+	if (err) {
+		(void)vnode_put(vp);
+		goto bad;
+	}
+#endif /* MAC */
+
+#if CONFIG_PROTECT
+	/* check for content protection access */
+	{
+	void *cnode;
+	if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
+		err = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
+		if (err != 0) { 
+			(void)vnode_put(vp);
+			goto bad;
+		}
+	}
+	}
+#endif /* CONFIG_PROTECT */
+
 	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 
 	/*
@@ -1148,7 +1231,7 @@ map_fd_funneled(
 			my_map,
 			&map_addr, map_size, (vm_offset_t)0, 
 			VM_FLAGS_ANYWHERE, pager, offset, TRUE,
-			VM_PROT_DEFAULT, VM_PROT_ALL,
+			VM_PROT_DEFAULT, maxprot,
 			VM_INHERIT_DEFAULT);
 	if (result != KERN_SUCCESS) {
 		(void)vnode_put(vp);
@@ -1213,4 +1296,5 @@ bad:
 	fp_drop(p, fd, fp, 0);
 	return (err);
 }
+#endif		/* !defined(CONFIG_EMBEDDED) */
 
diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c
index de083965e..2d872d54c 100644
--- a/bsd/kern/kern_newsysctl.c
+++ b/bsd/kern/kern_newsysctl.c
@@ -86,20 +86,51 @@ struct sysctl_oid_list sysctl__sysctl_children;
 
 lck_rw_t * sysctl_geometry_lock = NULL;
 
-static void
-sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
+/*
+ * Conditionally allow dtrace to see these functions for debugging purposes.
+ */
+#ifdef STATIC
+#undef STATIC
+#endif
+#if 0
+#define	STATIC
+#else
+#define STATIC static
+#endif
+
+/* forward declarations  of static functions */
+STATIC funnel_t *spl_kernel_funnel(void);
+STATIC void sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i);
+STATIC int sysctl_sysctl_debug(struct sysctl_oid *oidp, void *arg1,
+	int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_name(struct sysctl_oid *oidp, void *arg1,
+	int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp,
+	int *name, u_int namelen, int *next, int *len, int level,
+	struct sysctl_oid **oidpp);
+STATIC int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l);
+STATIC int name2oid (char *name, int *oid, int *len);
+STATIC int sysctl_sysctl_name2oid(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_next(struct sysctl_oid *oidp, void *arg1, int arg2,
+        struct sysctl_req *req);
+STATIC int sysctl_sysctl_oidfmt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC void splx_kernel_funnel(funnel_t *saved);
+STATIC int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l);
+STATIC int sysctl_new_user(struct sysctl_req *req, void *p, size_t l);
+STATIC int sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+STATIC int sysctlnametomib(const char *name, int *mibp, size_t *sizep);
 
 
 
 /*
  * Locking and stats
  */
-static struct sysctl_lock memlock;
 
 /*
  * XXX this does not belong here
  */
-static funnel_t *
+STATIC funnel_t *
 spl_kernel_funnel(void)
 {
 	funnel_t *cfunnel;
@@ -113,7 +144,7 @@ spl_kernel_funnel(void)
 	return(cfunnel);
 }
 
-static void
+STATIC void
 splx_kernel_funnel(funnel_t *saved)
 {
 	if (saved != kernel_flock) {
@@ -123,7 +154,7 @@ splx_kernel_funnel(funnel_t *saved)
 	}
 }
 
-static int sysctl_root SYSCTL_HANDLER_ARGS;
+STATIC int sysctl_root SYSCTL_HANDLER_ARGS;
 
 struct sysctl_oid_list sysctl__children; /* root list */
 
@@ -133,21 +164,65 @@ struct sysctl_oid_list sysctl__children; /* root list */
  * Order by number in each list.
  */
 
-void sysctl_register_oid(struct sysctl_oid *oidp)
+void
+sysctl_register_oid(struct sysctl_oid *new_oidp)
 {
-	struct sysctl_oid_list *parent = oidp->oid_parent;
+	struct sysctl_oid *oidp = NULL;
+	struct sysctl_oid_list *parent = new_oidp->oid_parent;
 	struct sysctl_oid *p;
 	struct sysctl_oid *q;
 	int n;
-	funnel_t *fnl;
+	funnel_t *fnl = NULL;	/* compiler doesn't notice CTLFLAG_LOCKED */
+
+	/*
+	 * The OID can be old-style (needs copy), new style without an earlier
+	 * version (also needs copy), or new style with a matching version (no
+	 * copy needed).  Later versions are rejected (presumably, the OID
+	 * structure was changed for a necessary reason).
+	 */
+	if (!(new_oidp->oid_kind & CTLFLAG_OID2)) {
+		/*
+		 * XXX:	M_TEMP is perhaps not the most apropriate zone, as it
+		 * XXX:	will subject us to use-after-free by other consumers.
+		 */
+		MALLOC(oidp, struct sysctl_oid *, sizeof(*oidp), M_TEMP, M_WAITOK | M_ZERO);
+		if (oidp == NULL)
+			return;		/* reject: no memory */
+
+		/*
+		 * Copy the structure only through the oid_fmt field, which
+		 * is the last field in a non-OID2 OID structure.
+		 *
+		 * Note:	We may want to set the oid_descr to the
+		 *		oid_name (or "") at some future date.
+		 */
+		memcpy(oidp, new_oidp, offsetof(struct sysctl_oid, oid_descr));
+	} else {
+		/* It's a later version; handle the versions we know about */
+		switch (new_oidp->oid_version) {
+		case SYSCTL_OID_VERSION:
+			/* current version */
+			oidp = new_oidp;
+			break;
+		default:
+			return;			/* rejects unknown version */
+		}
+	}
 
-	fnl = spl_kernel_funnel();
+	/*
+	 * If it's a locked OID being registered, we can assume that the
+	 * caller is doing their own reentrancy locking before calling us.
+	 */
+	if (!(oidp->oid_kind & CTLFLAG_LOCKED))
+		fnl = spl_kernel_funnel();
 
 	if(sysctl_geometry_lock == NULL)
 	{
-		/* Initialise the geometry lock for reading/modifying the sysctl tree
-		 * This is done here because IOKit registers some sysctls before bsd_init()
-		 * calls sysctl_register_fixed().
+		/*
+		 * Initialise the geometry lock for reading/modifying the
+		 * sysctl tree. This is done here because IOKit registers
+		 * some sysctl's before bsd_init() calls
+		 * sysctl_register_fixed().
 		 */
 
 		lck_grp_t* lck_grp  = lck_grp_alloc_init("sysctl", NULL);
@@ -169,6 +244,12 @@ void sysctl_register_oid(struct sysctl_oid *oidp)
 				n = p->oid_number;
 		}
 		oidp->oid_number = n + 1;
+		/*
+		 * Reflect the number in an llocated OID into the template
+		 * of the caller for sysctl_unregister_oid() compares.
+		 */
+		if (oidp != new_oidp)
+			new_oidp->oid_number = oidp->oid_number;
 	}
 
 	/*
@@ -188,30 +269,83 @@ void sysctl_register_oid(struct sysctl_oid *oidp)
 	/* Release the write lock */
 	lck_rw_unlock_exclusive(sysctl_geometry_lock);
 
-	splx_kernel_funnel(fnl);
+	if (!(oidp->oid_kind & CTLFLAG_LOCKED))
+		splx_kernel_funnel(fnl);
 }
 
-void sysctl_unregister_oid(struct sysctl_oid *oidp)
+void
+sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
-	funnel_t *fnl;
+	struct sysctl_oid *removed_oidp = NULL;	/* OID removed from tree */
+	struct sysctl_oid *old_oidp = NULL;	/* OID compatibility copy */
+	funnel_t *fnl = NULL;	/* compiler doesn't notice CTLFLAG_LOCKED */
 
-	fnl = spl_kernel_funnel();
+	if (!(oidp->oid_kind & CTLFLAG_LOCKED))
+		fnl = spl_kernel_funnel();
 
 	/* Get the write lock to modify the geometry */
 	lck_rw_lock_exclusive(sysctl_geometry_lock);
 
-	SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+	if (!(oidp->oid_kind & CTLFLAG_OID2)) {
+		/*
+		 * We're using a copy so we can get the new fields in an
+		 * old structure, so we have to iterate to compare the
+		 * partial structure; when we find a match, we remove it
+		 * normally and free the memory.
+		 */
+		SLIST_FOREACH(old_oidp, oidp->oid_parent, oid_link) {
+			if (!memcmp(&oidp->oid_number, &old_oidp->oid_number, (offsetof(struct sysctl_oid, oid_descr)-offsetof(struct sysctl_oid, oid_number)))) {
+                break;
+            }
+		}
+		if (old_oidp != NULL) {
+			SLIST_REMOVE(old_oidp->oid_parent, old_oidp, sysctl_oid, oid_link);
+			removed_oidp = old_oidp;
+		}
+	} else {
+		/* It's a later version; handle the versions we know about */
+		switch (oidp->oid_version) {
+		case SYSCTL_OID_VERSION:
+			/* We can just remove the OID directly... */
+			SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link);
+			removed_oidp = oidp;
+			break;
+		default:
+			 /* XXX: Can't happen; probably tree coruption.*/
+			break;			/* rejects unknown version */
+		}
+	}
+
+	/*
+	 * We've removed it from the list at this point, but we don't want
+	 * to return to the caller until all handler references have drained
+	 * out.  Doing things in this order prevent other people coming in
+	 * and starting new operations against the OID node we want removed.
+	 *
+	 * Note:	oidp could be NULL if it wasn't found.
+	 */
+	while(removed_oidp && removed_oidp->oid_refcnt) {
+		lck_rw_sleep(sysctl_geometry_lock, LCK_SLEEP_EXCLUSIVE, &removed_oidp->oid_refcnt, THREAD_UNINT);
+	}
 
 	/* Release the write lock */
 	lck_rw_unlock_exclusive(sysctl_geometry_lock);
 
-	splx_kernel_funnel(fnl);
+	/* If it was allocated, free it after dropping the lock */
+	if (old_oidp != NULL) {
+		FREE(old_oidp, M_TEMP);
+	}
+
+	/* And drop the funnel interlock, if needed */
+	if (!(oidp->oid_kind & CTLFLAG_LOCKED))
+		splx_kernel_funnel(fnl);
 }
 
 /*
  * Bulk-register all the oids in a linker_set.
  */
-void sysctl_register_set(const char *set)
+void
+sysctl_register_set(const char *set)
 {
 	struct sysctl_oid **oidpp, *oidp;
 
@@ -223,7 +357,8 @@ void sysctl_register_set(const char *set)
 	}
 }
 
-void sysctl_unregister_set(const char *set)
+void
+sysctl_unregister_set(const char *set)
 {
 	struct sysctl_oid **oidpp, *oidp;
 
@@ -401,7 +536,32 @@ int sysctl_io_opaque(struct sysctl_req *req,void *pValue, size_t valueSize, int
  * {0,4,...}	return the kind & format info for the "..." OID.
  */
 
-static void
+/*
+ * sysctl_sysctl_debug_dump_node
+ *
+ * Description:	Dump debug information for a given sysctl_oid_list at the
+ *		given oid depth out to the kernel log, via printf
+ *
+ * Parameters:	l				sysctl_oid_list pointer
+ *		i				current node depth
+ *
+ * Returns:	(void)
+ *
+ * Implicit:	kernel log, modified
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:	This function may call itself recursively to resolve Node
+ *		values, which potentially have an inferioer sysctl_oid_list
+ *
+ *		This function is only callable indirectly via the function
+ *		sysctl_sysctl_debug()
+ *
+ * Bugs:	The node depth indentation does not work; this may be an
+ *		artifact of leading space removal by the log daemon itself
+ *		or some intermediate routine.
+ */
+STATIC void
 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 {
 	int k;
@@ -414,7 +574,8 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 
 		printf("%d %s ", oidp->oid_number, oidp->oid_name);
 
-		printf("%c%c",
+		printf("%c%c%c",
+			oidp->oid_kind & CTLFLAG_LOCKED ? 'L':' ',
 			oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
 			oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
 
@@ -439,18 +600,83 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
 	}
 }
 
-static int
+/*
+ * sysctl_sysctl_debug
+ *
+ * Description:	This function implements the "sysctl.debug" portion of the
+ *		OID space for sysctl.
+ *
+ * OID:		0, 0
+ *
+ * Parameters:	__unused
+ *
+ * Returns:	ENOENT
+ *
+ * Implicit:	kernel log, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ */
+STATIC int
 sysctl_sysctl_debug(__unused struct sysctl_oid *oidp, __unused void *arg1,
 	__unused int arg2, __unused struct sysctl_req *req)
 {
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
+	lck_rw_done(sysctl_geometry_lock);
 	return ENOENT;
 }
 
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD,
+SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD | CTLFLAG_LOCKED,
 	0, 0, sysctl_sysctl_debug, "-", "");
 
-static int
+/*
+ * sysctl_sysctl_name
+ *
+ * Description:	Convert an OID into a string name; this is used by the user
+ *		space sysctl() command line utility; this is done in a purely
+ *		advisory capacity (e.g. to provide node names for "sysctl -A"
+ *		output).
+ *
+ * OID:		0, 1
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				A pointer to the OID name list
+ *						integer array, beginning at
+ *						adjusted option base 2
+ *		arg2				The number of elements which
+ *						remain in the name array
+ *
+ * Returns:	0				Success
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *						for SYSCTL_PROC leaf node
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		Setting of the NULL termination of the output string is
+ *		delayed until after the geometry lock is dropped.  If there
+ *		are no Entries remaining in the OID name list when this
+ *		function is called, it will still write out the termination
+ *		byte.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
 sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
         struct sysctl_req *req)
 {
@@ -461,6 +687,7 @@ sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 	struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
 	char tempbuf[10];
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	while (namelen) {
 		if (!lsp) {
 			snprintf(tempbuf,sizeof(tempbuf),"%d",*name);
@@ -468,8 +695,10 @@ sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 				error = SYSCTL_OUT(req, ".", 1);
 			if (!error)
 				error = SYSCTL_OUT(req, tempbuf, strlen(tempbuf));
-			if (error)
+			if (error) {
+				lck_rw_done(sysctl_geometry_lock);
 				return (error);
+			}
 			namelen--;
 			name++;
 			continue;
@@ -484,8 +713,10 @@ sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 			if (!error)
 				error = SYSCTL_OUT(req, oid->oid_name,
 					strlen(oid->oid_name));
-			if (error)
+			if (error) {
+				lck_rw_done(sysctl_geometry_lock);
 				return (error);
+			}
 
 			namelen--;
 			name++;
@@ -501,12 +732,45 @@ sysctl_sysctl_name(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 		}
 		lsp = lsp2;
 	}
+	lck_rw_done(sysctl_geometry_lock);
 	return (SYSCTL_OUT(req, "", 1));
 }
 
-SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
+SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_name, "");
 
-static int
+/*
+ * sysctl_sysctl_next_ls
+ *
+ * Description:	For a given OID name value, return the next consecutive OID
+ *		name value within the geometry tree
+ *
+ * Parameters:	lsp				The OID list to look in
+ *		name				The OID name to start from
+ *		namelen				The length of the OID name
+ *		next				Pointer to new oid storage to
+ *						fill in
+ *		len				Pointer to receive new OID
+ *						length value of storage written
+ *		level				OID tree depth (used to compute
+ *						len value)
+ *		oidpp				Pointer to OID list entry
+ *						pointer; used to walk the list
+ *						forward across recursion
+ *
+ * Returns:	0				Returning a new entry
+ *		1				End of geometry list reached
+ *
+ * Implicit:	*next				Modified to contain the new OID
+ *		*len				Modified to contain new length
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ *
+ * Notes:	This function will not return OID values that have special
+ *		handlers, since we can not tell wheter these handlers consume
+ *		elements from the OID space as parameters.  For this reason,
+ *		we STRONGLY discourage these types of handlers
+ */
+STATIC int
 sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen, 
 	int *next, int *len, int level, struct sysctl_oid **oidpp)
 {
@@ -566,7 +830,45 @@ sysctl_sysctl_next_ls (struct sysctl_oid_list *lsp, int *name, u_int namelen,
 	return 1;
 }
 
-static int
+/*
+ * sysctl_sysctl_next
+ *
+ * Description:	This is an iterator function designed to iterate the oid tree
+ *		and provide a list of OIDs for use by the user space "sysctl"
+ *		command line tool
+ *
+ * OID:		0, 2
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				Pointer to start OID name
+ *		arg2				Start OID name length
+ *		req				Pointer to user request buffer
+ *
+ * Returns:	0				Success
+ *		ENOENT				Reached end of OID space
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *						for SYSCTL_PROC leaf node
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
 sysctl_sysctl_next(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
         struct sysctl_req *req)
 {
@@ -577,17 +879,38 @@ sysctl_sysctl_next(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 	struct sysctl_oid_list *lsp = &sysctl__children;
 	int newoid[CTL_MAXNAME];
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	i = sysctl_sysctl_next_ls (lsp, name, namelen, newoid, &j, 1, &oid);
+	lck_rw_done(sysctl_geometry_lock);
 	if (i)
 		return ENOENT;
 	error = SYSCTL_OUT(req, newoid, j * sizeof (int));
 	return (error);
 }
 
-SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
+SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_next, "");
 
-static int
-name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
+/*
+ * name2oid
+ *
+ * Description:	Support function for use by sysctl_sysctl_name2oid(); looks
+ *		up an OID name given a string name.
+ *
+ * Parameters:	name				NULL terminated string name
+ *		oid				Pointer to receive OID name
+ *		len				Pointer to receive OID length
+ *						pointer value (see "Notes")
+ *
+ * Returns:	0				Success
+ *		ENOENT				Entry not found
+ *
+ * Implicit:	*oid				Modified to contain OID value
+ *		*len				Modified to contain OID length
+ *
+ * Locks:	Assumes sysctl_geometry_lock is held prior to calling
+ */
+STATIC int
+name2oid (char *name, int *oid, int *len)
 {
 	int i;
 	struct sysctl_oid *oidp;
@@ -620,8 +943,6 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 		(*len)++;
 
 		if (!i) {
-			if (oidpp)
-				*oidpp = oidp;
 			return (0);
 		}
 
@@ -643,16 +964,54 @@ name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp)
 	return ENOENT;
 }
 
-static int
+/*
+ * sysctl_sysctl_name2oid
+ *
+ * Description:	Translate a string name to an OID name value; this is used by
+ *		the sysctlbyname() function as well as by the "sysctl" command
+ *		line command.
+ *
+ * OID:		0, 3
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				__unused
+ *		arg2				__unused
+ *		req				Request structure
+ *
+ * Returns:	ENOENT				Input length too short
+ *		ENAMETOOLONG			Input length too long
+ *		ENOMEM				Could not allocate work area
+ *	SYSCTL_IN/OUT:EPERM			Permission denied
+ *	SYSCTL_IN/OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_IN/OUT:???			Return value from user function
+ *	name2oid:ENOENT				Not found
+ *
+ * Implicit:	*req				Contents of request, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *		Preferrably, use sysctlbyname() instead.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
 sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1,
 	__unused int arg2, struct sysctl_req *req)
 {
 	char *p;
 	int error, oid[CTL_MAXNAME];
 	int len = 0;		/* set by name2oid() */
-	struct sysctl_oid *op = 0;
 
-	if (!req->newlen) 
+	if (req->newlen < 1) 
 		return ENOENT;
 	if (req->newlen >= MAXPATHLEN)	/* XXX arbitrary, undocumented */
 		return (ENAMETOOLONG);
@@ -669,7 +1028,13 @@ sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1,
 
 	p [req->newlen] = '\0';
 
-	error = name2oid(p, oid, &len, &op);
+	/*
+	 * Note:	We acquire and release the geometry lock here to
+	 *		avoid making name2oid needlessly complex.
+	 */
+	lck_rw_lock_shared(sysctl_geometry_lock);
+	error = name2oid(p, oid, &len);
+	lck_rw_done(sysctl_geometry_lock);
 
 	FREE(p, M_TEMP);
 
@@ -680,19 +1045,58 @@ sysctl_sysctl_name2oid(__unused struct sysctl_oid *oidp, __unused void *arg1,
 	return (error);
 }
 
-SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_KERN, 0, 0, 
+SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, 
 	sysctl_sysctl_name2oid, "I", "");
 
-static int
+/*
+ * sysctl_sysctl_oidfmt
+ *
+ * Description:	For a given OID name, determine the format of the data which
+ *		is associated with it.  This is used by the "sysctl" command
+ *		line command.
+ *
+ * OID:		0, 4
+ *
+ * Parameters:	oidp				__unused
+ *		arg1				The OID name to look up
+ *		arg2				The length of the OID name
+ *		req				Pointer to user request buffer
+ *
+ * Returns:	0				Success
+ *		EISDIR				Malformed request
+ *		ENOENT				No such OID name
+ *	SYSCTL_OUT:EPERM			Permission denied
+ *	SYSCTL_OUT:EFAULT			Bad user supplied buffer
+ *	SYSCTL_OUT:???				Return value from user function
+ *
+ * Implict:	Contents of user request buffer, modified
+ *
+ * Locks:	Acquires and then releases a read lock on the
+ *		sysctl_geometry_lock
+ *
+ * Notes:	SPI (System Programming Interface); this is subject to change
+ *		and may not be relied upon by third party applications; use
+ *		a subprocess to communicate with the "sysctl" command line
+ *		command instead, if you believe you need this functionality.
+ *
+ *		This function differs from other sysctl functions in that
+ *		it can not take an output buffer length of 0 to determine the
+ *		space which will be required.  It is suggested that the buffer
+ *		length be PATH_MAX, and that authors of new sysctl's refrain
+ *		from exceeding this string length.
+ */
+STATIC int
 sysctl_sysctl_oidfmt(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
         struct sysctl_req *req)
 {
-	int *name = (int *) arg1, error;
+	int *name = (int *) arg1;
+	int error = ENOENT;		/* default error: not found */
 	u_int namelen = arg2;
 	u_int indx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *lsp = &sysctl__children;
 
+	lck_rw_lock_shared(sysctl_geometry_lock);
 	oid = SLIST_FIRST(lsp);
 
 	indx = 0;
@@ -707,28 +1111,34 @@ sysctl_sysctl_oidfmt(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 				lsp = (struct sysctl_oid_list *)oid->oid_arg1;
 				oid = SLIST_FIRST(lsp);
 			} else {
-				if (indx != namelen)
-					return EISDIR;
+				if (indx != namelen) {
+					error =  EISDIR;
+					goto err;
+				}
 				goto found;
 			}
 		} else {
 			oid = SLIST_NEXT(oid, oid_link);
 		}
 	}
-	return ENOENT;
+	/* Not found */
+	goto err;
+
 found:
 	if (!oid->oid_fmt)
-		return ENOENT;
+		goto err;
 	error = SYSCTL_OUT(req, 
 		&oid->oid_kind, sizeof(oid->oid_kind));
 	if (!error)
 		error = SYSCTL_OUT(req, oid->oid_fmt, 
 			strlen(oid->oid_fmt)+1);
+err:
+	lck_rw_done(sysctl_geometry_lock);
 	return (error);
 }
 
+SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_sysctl_oidfmt, "");
 
-SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, "");
 
 /*
  * Default "handler" functions.
@@ -842,7 +1252,7 @@ sysctl_handle_opaque(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 /*
  * Transfer functions to/from kernel space.
  */
-static int
+STATIC int
 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 {
 	size_t i = 0;
@@ -860,7 +1270,7 @@ sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
 	return (0);
 }
 
-static int
+STATIC int
 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
 {
 	if (!req->newptr)
@@ -914,7 +1324,7 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle
 /*
  * Transfer function to/from user space.
  */
-static int
+STATIC int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
 	int error = 0;
@@ -937,7 +1347,7 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 	return (0);
 }
 
-static int
+STATIC int
 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
 {
 	int error;
@@ -981,10 +1391,28 @@ sysctl_root(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 			indx++;
 			if (!(oid->oid_kind & CTLFLAG_LOCKED))
 			{
+/*
+printf("sysctl_root: missing CTLFLAG_LOCKED: ");
+for(i = 0; i < (int)(indx - 1); i++)
+printf("oid[%d] = %d ", i, name[i]);
+printf("\n");
+*/
 				funnel_held = TRUE;
 			}
 			if (oid->oid_kind & CTLFLAG_NOLOCK)
 				req->lock = 0;
+			/*
+			 * For SYSCTL_PROC() functions which are for sysctl's
+			 * which have parameters at the end of their OID
+			 * space, you need to OR CTLTYPE_NODE into their
+			 * access value.
+			 *
+			 * NOTE: For binary backward compatibility ONLY! Do
+			 * NOT add new sysctl's that do this!  Existing
+			 * sysctl's which do this will eventually have
+			 * compatibility code in user space, and this method
+			 * will become unsupported.
+			 */
 			if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
 				if (oid->oid_handler)
 					goto found;
@@ -1028,7 +1456,14 @@ found:
 		goto err;
 	}
 
-	/* Most likely only root can write */
+	/*
+	 * This is where legacy enforcement of permissions occurs.  If the
+	 * flag does not say CTLFLAG_ANYBODY, then we prohibit anyone but
+	 * root from writing new values down.  If local enforcement happens
+	 * at the leaf node, then it needs to be set as CTLFLAG_ANYBODY.  In
+	 * addition, if the leaf node is set this way, then in order to do
+	 * specific enforcement, it has to be of type SYSCTL_PROC.
+	 */
 	if (!(oid->oid_kind & CTLFLAG_ANYBODY) &&
 	    req->newptr && req->p &&
 	    (error = proc_suser(req->p)))
@@ -1039,10 +1474,24 @@ found:
 		goto err;
 	}
 
+	/*
+	 * Reference the OID and drop the geometry lock; this prevents the
+	 * OID from being deleted out from under the handler call, but does
+	 * not prevent other calls into handlers or calls to manage the
+	 * geometry elsewhere from blocking...
+	 */
+	OSAddAtomic(1, &oid->oid_refcnt);
+
+	lck_rw_done(sysctl_geometry_lock);
+
+	/*
+	 * ...however, we still have to grab the funnel for those calls which
+	 * may be into code whose reentrancy is protected by the funnel; a
+	 * blocking operation should not prevent reentrancy, at this point.
+	 */
 	if (funnel_held)
 	{
 		fnl = spl_kernel_funnel();
-		MEMLOCK_LOCK();
 	}
 
 	if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
@@ -1058,10 +1507,27 @@ found:
 
 	if (funnel_held)
 	{
-		MEMLOCK_UNLOCK();
 		splx_kernel_funnel(fnl);
 	}
 
+	/*
+	 * This is tricky... we re-grab the geometry lock in order to drop
+	 * the reference and wake on the address; since the geometry
+	 * lock is a reader/writer lock rather than a mutex, we have to
+	 * wake on all apparent 1->0 transitions.  This abuses the drop
+	 * after the reference decrement in order to wake any lck_rw_sleep()
+	 * in progress in sysctl_unregister_oid() that slept because of a
+	 * non-zero reference count.
+	 *
+	 * Note:	OSAddAtomic() is defined to return the previous value;
+	 *		we use this and the fact that the lock itself is a
+	 *		barrier to avoid waking every time through on "hot"
+	 *		OIDs.
+	 */
+	lck_rw_lock_shared(sysctl_geometry_lock);
+	if (OSAddAtomic(-1, &oid->oid_refcnt) == 1)
+		wakeup(&oid->oid_refcnt);
+
 err:
 	lck_rw_done(sysctl_geometry_lock);
 	return (error);
@@ -1170,14 +1636,14 @@ userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp,
  * may not work correctly.
  */
 
-static int
+STATIC int
 sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 
 	return(kernel_sysctl(current_proc(), name, namelen, oldp, oldlenp, newp, newlen));
 }
 
-static int
+STATIC int
 sysctlnametomib(const char *name, int *mibp, size_t *sizep)
 {
 	int oid[2];
diff --git a/bsd/kern/kern_panicinfo.c b/bsd/kern/kern_panicinfo.c
index 024ec5220..1a949de7b 100644
--- a/bsd/kern/kern_panicinfo.c
+++ b/bsd/kern/kern_panicinfo.c
@@ -47,7 +47,7 @@ extern int  panic_dialog_set_image( const unsigned char * ptr, unsigned int size
 extern void panic_dialog_get_image( unsigned char ** ptr, unsigned int * size );
 
 /* make the compiler happy */
-extern int sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, struct proc *);
+static int sysctl_dopanicinfo SYSCTL_HANDLER_ARGS;
 
 
 #define PANIC_IMAGE_SIZE_LIMIT	(32 * 4096)				/* 128K - Maximum amount of memory consumed for the panic UI */
@@ -56,11 +56,20 @@ extern int sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t,
 /* Local data */
 static int image_size_limit = PANIC_IMAGE_SIZE_LIMIT;
 
-__private_extern__ int
-sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
-		   user_addr_t newp, size_t newlen, struct proc *p)
+/* XXX Should be STATIC for dtrace debugging.. */
+static int
+sysctl_dopanicinfo SYSCTL_HANDLER_ARGS
 {
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+	size_t newlen = req->newlen;	/* user buffer copy in size */
 	int error = 0;
+	proc_t p = current_proc();
+
 	vm_offset_t newimage = (vm_offset_t )NULL;
 	kern_return_t	kret;
 	unsigned char * prev_image_ptr;
@@ -70,7 +79,8 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 	if (namelen != 1)
 		return (ENOTDIR);		/* overloaded */
 
-	if ( (error = proc_suser(p)) )	/* must be super user to muck with image */
+	/* must be super user to muck with image */
+	if ( (error = proc_suser(p)) )
 		return (error);
 
 	switch (name[0]) {
@@ -80,7 +90,7 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 	case KERN_PANICINFO_TEST:
 		
 		panic_dialog_test();
-		return (0);
+		break;
 
 	case KERN_PANICINFO_MAXSIZE:
 
@@ -91,7 +101,7 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 
 		error = sysctl_int(oldp, oldlenp, newp, newlen, &image_size_limit);
 
-		return (error);
+		break;
 
 	case KERN_PANICINFO_IMAGE:
 
@@ -99,8 +109,10 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 		if ( newp != USER_ADDR_NULL ) {
 
 			/* check the length of the incoming image before allocating space for it. */
-			if ( newlen > (size_t)image_size_limit )
-				return (ENOMEM);
+			if ( newlen > (size_t)image_size_limit ) {
+				error = ENOMEM;
+				break;
+			}
 
 			/* allocate some kernel wired memory for the new image */
 			kret = kmem_alloc(kernel_map, &newimage, (vm_size_t)round_page(newlen));
@@ -118,8 +130,7 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 					error = EPERM;
 					break;
 				}
-	
-				return (error);
+				break;
 			}
 
 			/* copy the image in from user space */
@@ -169,12 +180,24 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 			}
 		}
 
-		return (0);
+		break;
 
 errout:
 		if ( newimage != (vm_offset_t )NULL )
 			(void)kmem_free(kernel_map, newimage, (vm_size_t)round_page(newlen));
 
-		return (error);
+		break;
 	}
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+SYSCTL_PROC(_kern, KERN_PANICINFO, panicinfo, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_dopanicinfo,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
diff --git a/bsd/kern/kern_priv.c b/bsd/kern/kern_priv.c
new file mode 100644
index 000000000..e7ceb6075
--- /dev/null
+++ b/bsd/kern/kern_priv.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2006 nCircle Network Security, Inc.
+ * Copyright (c) 2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson for the TrustedBSD
+ * Project under contract to nCircle Network Security, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY,
+ * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kauth.h>
+
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+/*
+ * Check a credential for privilege.  Lots of good reasons to deny privilege;
+ * only a few to grant it.
+ */
+int
+priv_check_cred(kauth_cred_t cred, int priv, __unused int flags)
+{
+	int error;
+
+	/*
+	 * We first evaluate policies that may deny the granting of
+	 * privilege unilaterally.
+	 */
+#if CONFIG_MACF
+	error = mac_priv_check(cred, priv);
+	if (error)
+		goto out;
+#endif
+
+	/*
+	 * Having determined if privilege is restricted by various policies,
+	 * now determine if privilege is granted.  At this point, any policy
+	 * may grant privilege.  For now, we allow short-circuit boolean
+	 * evaluation, so may not call all policies.  Perhaps we should.
+	 */
+	if (kauth_cred_getuid(cred) == 0) {
+		error = 0;
+		goto out;
+	}
+
+	/*
+	 * Now check with MAC, if enabled, to see if a policy module grants
+	 * privilege.
+	 */
+#if CONFIG_MACF
+	if (mac_priv_grant(cred, priv) == 0) {
+		error = 0;
+		goto out;
+	}
+#endif
+
+	/*
+	 * The default is deny, so if no policies have granted it, reject
+	 * with a privilege error here.
+	 */
+	error = EPERM;
+out:
+	return (error);
+}
diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c
index ba7505008..042a3a864 100644
--- a/bsd/kern/kern_proc.c
+++ b/bsd/kern/kern_proc.c
@@ -96,6 +96,7 @@
 #include <sys/kauth.h>
 #include <sys/codesign.h>
 #include <sys/kernel_types.h>
+#include <sys/ubc.h>
 #include <kern/kalloc.h>
 #include <kern/task.h>
 #include <kern/assert.h>
@@ -168,13 +169,10 @@ static void orphanpg(struct pgrp *pg);
 void 	proc_name_kdp(task_t t, char * buf, int size);
 char	*proc_name_address(void *p);
 
-static proc_t proc_refinternal_locked(proc_t p);
 static void  pgrp_add(struct pgrp * pgrp, proc_t parent, proc_t child);
 static void pgrp_remove(proc_t p);
 static void pgrp_replace(proc_t p, struct pgrp *pgrp);
 static void pgdelete_dropref(struct pgrp *pgrp);
-static proc_t proc_find_zombref(int pid);
-static void proc_drop_zombref(proc_t p);
 extern void pg_rele_dropref(struct pgrp * pgrp);
 
 struct fixjob_iterargs {
@@ -345,7 +343,7 @@ proc_findinternal(int pid, int locked)
 	}
 
 	p = pfind_locked(pid);
-	if ((p == PROC_NULL) || (p != proc_refinternal_locked(p)))
+	if ((p == PROC_NULL) || (p != proc_ref_locked(p)))
 		p = PROC_NULL;
 
 	if (locked == 0) {
@@ -373,15 +371,15 @@ proc_self(void)
 	p = current_proc();
 
 	proc_list_lock();
-	if (p != proc_refinternal_locked(p))
+	if (p != proc_ref_locked(p))
 		p = PROC_NULL;
 	proc_list_unlock();
 	return(p);
 }
 
 
-static proc_t
-proc_refinternal_locked(proc_t p)
+proc_t
+proc_ref_locked(proc_t p)
 {
 	proc_t p1 = p;
 	
@@ -412,7 +410,7 @@ proc_rele_locked(proc_t p)
 
 }
 
-static proc_t
+proc_t
 proc_find_zombref(int pid)
 {
 	proc_t p1 = PROC_NULL;
@@ -440,7 +438,7 @@ proc_find_zombref(int pid)
 	return(p1);
 }
 
-static void
+void
 proc_drop_zombref(proc_t p)
 {
 	proc_list_lock();
@@ -608,7 +606,7 @@ proc_parent(proc_t p)
 	proc_list_lock();
 loop:
 	pp = p->p_pptr;
-	parent =  proc_refinternal_locked(pp);
+	parent =  proc_ref_locked(pp);
 	if ((parent == PROC_NULL) && (pp != PROC_NULL) && (pp->p_stat != SZOMB) && ((pp->p_listflag & P_LIST_EXITED) != 0) && ((pp->p_listflag & P_LIST_CHILDDRAINED)== 0)){
 		pp->p_listflag |= P_LIST_CHILDLKWAIT;
 		msleep(&pp->p_childrencnt, proc_list_mlock, 0, "proc_parent", 0);
@@ -781,12 +779,34 @@ proc_pidversion(proc_t p)
 	return(p->p_idversion);
 }
 
+uint64_t
+proc_uniqueid(proc_t p)
+{
+	return(p->p_uniqueid);
+}
+
+uint64_t
+proc_selfuniqueid(void)
+{
+	proc_t p = current_proc();
+	return(p->p_uniqueid);
+}
+
 int
 proc_getcdhash(proc_t p, unsigned char *cdhash)
 {
 	return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash);
 }
 
+void
+proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size)
+{
+	if (size >= sizeof(p->p_uuid)) {
+		memcpy(uuidbuf, p->p_uuid, sizeof(p->p_uuid));
+	}
+}
+
+
 void
 bsd_set_dependency_capable(task_t task)
 {
@@ -1029,10 +1049,10 @@ enterpgrp(proc_t p, pid_t pgid, int mksess)
 			sess->s_flags = 0;
 			sess->s_listflags = 0;
 			sess->s_ttypgrpid = NO_PID;
-#ifdef CONFIG_EMBEDDED
-			lck_mtx_init(&sess->s_mlock, proc_lck_grp, proc_lck_attr);
-#else
+#if CONFIG_FINE_LOCK_GROUPS
 			lck_mtx_init(&sess->s_mlock, proc_mlock_grp, proc_lck_attr);
+#else
+			lck_mtx_init(&sess->s_mlock, proc_lck_grp, proc_lck_attr);
 #endif
 			bcopy(procsp->s_login, sess->s_login,
 			    sizeof(sess->s_login));
@@ -1055,10 +1075,10 @@ enterpgrp(proc_t p, pid_t pgid, int mksess)
 			proc_list_unlock();
 		}
 		pgrp->pg_id = pgid;
-#ifdef CONFIG_EMBEDDED
-		lck_mtx_init(&pgrp->pg_mlock, proc_lck_grp, proc_lck_attr);
-#else
+#if CONFIG_FINE_LOCK_GROUPS
 		lck_mtx_init(&pgrp->pg_mlock, proc_mlock_grp, proc_lck_attr);
+#else
+		lck_mtx_init(&pgrp->pg_mlock, proc_lck_grp, proc_lck_attr);
 #endif
 		LIST_INIT(&pgrp->pg_members);
 		pgrp->pg_membercnt = 0;
@@ -1178,18 +1198,18 @@ pgdelete_dropref(struct pgrp *pgrp)
 		if (sessp->s_count != 0)
 			panic("pg_deleteref: freeing session in use");	
 		proc_list_unlock();
-#ifdef CONFIG_EMBEDDED
-		lck_mtx_destroy(&sessp->s_mlock, proc_lck_grp);
-#else
+#if CONFIG_FINE_LOCK_GROUPS
 		lck_mtx_destroy(&sessp->s_mlock, proc_mlock_grp);
+#else
+		lck_mtx_destroy(&sessp->s_mlock, proc_lck_grp);
 #endif
 		FREE_ZONE(sessp, sizeof(struct session), M_SESSION);
 	} else
 		proc_list_unlock();
-#ifdef CONFIG_EMBEDDED
-	lck_mtx_destroy(&pgrp->pg_mlock, proc_lck_grp);
-#else
+#if CONFIG_FINE_LOCK_GROUPS
 	lck_mtx_destroy(&pgrp->pg_mlock, proc_mlock_grp);
+#else
+	lck_mtx_destroy(&pgrp->pg_mlock, proc_lck_grp);
 #endif
 	FREE_ZONE(pgrp, sizeof(*pgrp), M_PGRP);
 }
@@ -1650,14 +1670,14 @@ out:
 
 SYSCTL_NODE(_kern, KERN_LCTX, lctx, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Login Context");
 
-SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
+SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT | CTLFLAG_LOCKED,
 	    0, 0, sysctl_kern_lctx, "S,lctx",
 	    "Return entire login context table");
-SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD,
+SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD | CTLFLAG_LOCKED,
 	    sysctl_kern_lctx, "Login Context Table");
-SYSCTL_INT(_kern_lctx, OID_AUTO, last,  CTLFLAG_RD, &lastlcid, 0, ""); 
-SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD, &alllctx_cnt, 0, "");
-SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW, &maxlcid, 0, "");
+SYSCTL_INT(_kern_lctx, OID_AUTO, last,  CTLFLAG_RD | CTLFLAG_LOCKED, &lastlcid, 0, ""); 
+SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD | CTLFLAG_LOCKED, &alllctx_cnt, 0, "");
+SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &maxlcid, 0, "");
 
 #endif	/* LCTX */
 
@@ -1811,7 +1831,33 @@ csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval)
 			}
 
 			return error;
-		
+
+		case CS_OPS_ENTITLEMENTS_BLOB: {
+			char zeros[8] = { 0 };
+			void *start;
+			size_t length;
+
+			if (0 != (error = cs_entitlements_blob_get(pt,
+			    &start, &length)))
+				break;
+			if (usize < sizeof(zeros) || usize < length) {
+				error = ERANGE;
+				break;
+			}
+			if (NULL == start) {
+				start = zeros;
+				length = sizeof(zeros);
+			}
+			error = copyout(start, uaddr, length);
+			break;
+		}
+
+		case CS_OPS_MARKRESTRICT:
+			proc_lock(pt);
+			pt->p_csflags |= CS_RESTRICT;
+			proc_unlock(pt);
+			break;
+
 		default:
 			error = EINVAL;
 			break;
@@ -1984,7 +2030,7 @@ ps_allprocscan:
 
 	for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) {
 		if ( (filterfn == 0 ) || (filterfn(p, filterarg) != 0)) {
-			p = proc_refinternal_locked(p);
+			p = proc_ref_locked(p);
 
 			proc_list_unlock();
 			lockheld = 0;
@@ -2449,10 +2495,10 @@ session_rele(struct session *sess)
 		if (sess->s_count != 0)
 			panic("session_rele: freeing session in use");	
 		proc_list_unlock();
-#ifdef CONFIG_EMBEDDED
-		lck_mtx_destroy(&sess->s_mlock, proc_lck_grp);
-#else
+#if CONFIG_FINE_LOCK_GROUPS
 		lck_mtx_destroy(&sess->s_mlock, proc_mlock_grp);
+#else
+		lck_mtx_destroy(&sess->s_mlock, proc_lck_grp);
 #endif
 		FREE_ZONE(sess, sizeof(struct session), M_SESSION);
 	} else
@@ -2575,9 +2621,9 @@ unsigned long cs_procs_invalidated = 0;
 int cs_force_kill = 0;
 int cs_force_hard = 0;
 int cs_debug = 0;
-SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW, &cs_force_kill, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW, &cs_force_hard, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW, &cs_debug, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_debug, 0, "");
 
 int
 cs_allow_invalid(struct proc *p)
@@ -2633,11 +2679,9 @@ cs_invalid_page(
 	if (p->p_csflags & CS_KILL) {
 		p->p_csflags |= CS_KILLED;
 		proc_unlock(p);
-		if (cs_debug) {
-			printf("CODE SIGNING: cs_invalid_page(0x%llx): "
-			       "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
-			       vaddr, p->p_pid, p->p_comm, p->p_csflags);
-		}
+		printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+		       "p=%d[%s] honoring CS_KILL, final status 0x%x\n",
+		       vaddr, p->p_pid, p->p_comm, p->p_csflags);
 		cs_procs_killed++;
 		psignal(p, SIGKILL);
 		proc_lock(p);
@@ -2646,11 +2690,9 @@ cs_invalid_page(
 	/* CS_HARD means fail the mapping operation so the process stays valid. */
 	if (p->p_csflags & CS_HARD) {
 		proc_unlock(p);
-		if (cs_debug) {
-			printf("CODE SIGNING: cs_invalid_page(0x%llx): "
-			       "p=%d[%s] honoring CS_HARD\n",
-			       vaddr, p->p_pid, p->p_comm);
-		}
+		printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+		       "p=%d[%s] honoring CS_HARD\n",
+		       vaddr, p->p_pid, p->p_comm);
 		retval = 1;
 	} else {
 		if (p->p_csflags & CS_VALID) {
@@ -2773,9 +2815,12 @@ proc_resetpcontrol(int pid)
 	proc_t p;
 	int pcontrol;
 	int error;
+	proc_t self = current_proc();
 
-	if ((error = suser(kauth_cred_get(), 0)))
+	/* if the process has been validated to handle resource control or root is valid one */
+	if (((self->p_lflag & P_LVMRSRCOWNER) == 0) && (error = suser(kauth_cred_get(), 0)))
 		return error;
+
 	p = proc_find(pid);
 	if (p == PROC_NULL)
 		return(ESRCH);
diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c
index a084ddf89..d2408a2f3 100644
--- a/bsd/kern/kern_prot.c
+++ b/bsd/kern/kern_prot.c
@@ -360,9 +360,9 @@ gettid(__unused proc_t p, struct gettid_args *uap, int32_t *retval)
 	if (!(uthread->uu_flag & UT_SETUID))
 		return (ESRCH);
 
-	if ((error = suword(uap->uidp, uthread->uu_ucred->cr_ruid)))
+	if ((error = suword(uap->uidp, kauth_cred_getruid(uthread->uu_ucred))))
 		return (error);
-	if ((error = suword(uap->gidp, uthread->uu_ucred->cr_rgid)))
+	if ((error = suword(uap->gidp, kauth_cred_getrgid(uthread->uu_ucred))))
 		return (error);
 
 	*retval = 0;
@@ -448,21 +448,23 @@ getgroups(__unused proc_t p, struct getgroups_args *uap, int32_t *retval)
 	int ngrp;
 	int error;
 	kauth_cred_t cred;
+	posix_cred_t pcred;
 
 	/* grab reference while we muck around with the credential */
 	cred = kauth_cred_get_with_ref();
+	pcred = posix_cred_get(cred);
 
 	if ((ngrp = uap->gidsetsize) == 0) {
-		*retval = cred->cr_ngroups;
+		*retval = pcred->cr_ngroups;
 		kauth_cred_unref(&cred);
 		return (0);
 	}
-	if (ngrp < cred->cr_ngroups) {
+	if (ngrp < pcred->cr_ngroups) {
 		kauth_cred_unref(&cred);
 		return (EINVAL);
 	}
-	ngrp = cred->cr_ngroups;
-	if ((error = copyout((caddr_t)cred->cr_groups,
+	ngrp = pcred->cr_ngroups;
+	if ((error = copyout((caddr_t)pcred->cr_groups,
 	    				uap->gidset, 
 	    				ngrp * sizeof(gid_t)))) {
 		kauth_cred_unref(&cred);
@@ -716,17 +718,19 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
 	uid_t gmuid = KAUTH_UID_NONE;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 
 	uid = uap->uid;
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
 	DEBUG_CRED_ENTER("setuid (%d/%d): %p %d\n", p->p_pid, (p->p_pptr ? p->p_pptr->p_pid : 0), my_cred, uap->uid);
 	AUDIT_ARG(uid, uid);
 
-	if (uid != my_cred->cr_ruid &&	/* allow setuid(getuid()) */
-	    uid != my_cred->cr_svuid &&	/* allow setuid(saved uid) */
+	if (uid != my_pcred->cr_ruid &&		/* allow setuid(getuid()) */
+	    uid != my_pcred->cr_svuid &&	/* allow setuid(saved uid) */
 	    (error = suser(my_cred, &p->p_acflag))) {
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -747,7 +751,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
 		 * chgproccnt uses list lock for protection
 		 */
 		(void)chgproccnt(uid, 1);
-		(void)chgproccnt(my_cred->cr_ruid, -1);
+		(void)chgproccnt(my_pcred->cr_ruid, -1);
 	}
 
 	/* get current credential and take a reference while we muck with it */
@@ -761,7 +765,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
 		 * to something other than the default list for the user, as
 		 * in entering a group or leaving an exclusion group).
 		 */
-		if (!(my_cred->cr_flags & CRF_NOMEMBERD))
+		if (!(my_pcred->cr_flags & CRF_NOMEMBERD))
 			gmuid = uid;
 
   		/* 
@@ -774,7 +778,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
 		my_new_cred = kauth_cred_setresuid(my_cred, ruid, uid, svuid, gmuid);
 		if (my_cred != my_new_cred) {
 
-			DEBUG_CRED_CHANGE("setuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
+			DEBUG_CRED_CHANGE("setuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
 			proc_lock(p);
 			/*
@@ -791,6 +795,9 @@ setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
+
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 			proc_unlock(p);
 		}
@@ -828,6 +835,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
 	uid_t euid;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 	DEBUG_CRED_ENTER("seteuid: %d\n", uap->euid);
 
@@ -835,8 +843,9 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
 	AUDIT_ARG(euid, euid);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
-	if (euid != my_cred->cr_ruid && euid != my_cred->cr_svuid &&
+	if (euid != my_pcred->cr_ruid && euid != my_pcred->cr_svuid &&
 	    (error = suser(my_cred, &p->p_acflag))) {
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -855,11 +864,11 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
 		 * passed in.  The subsequent compare is safe, because it is
 		 * a pointer compare rather than a contents compare.
   		 */
-		my_new_cred = kauth_cred_setresuid(my_cred, KAUTH_UID_NONE, euid, KAUTH_UID_NONE, my_cred->cr_gmuid);
+		my_new_cred = kauth_cred_setresuid(my_cred, KAUTH_UID_NONE, euid, KAUTH_UID_NONE, my_pcred->cr_gmuid);
 	
 		if (my_cred != my_new_cred) {
 
-			DEBUG_CRED_CHANGE("seteuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
+			DEBUG_CRED_CHANGE("seteuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
 			proc_lock(p);
 			/*
@@ -876,6 +885,8 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 			proc_unlock(p);
 		}
@@ -926,6 +937,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 	uid_t ruid, euid;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 	DEBUG_CRED_ENTER("setreuid %d %d\n", uap->ruid, uap->euid);
 
@@ -939,15 +951,16 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 	AUDIT_ARG(ruid, ruid);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
 	if (((ruid != KAUTH_UID_NONE &&		/* allow no change of ruid */
-	      ruid != my_cred->cr_ruid &&	/* allow ruid = ruid */
-	      ruid != my_cred->cr_uid &&	/* allow ruid = euid */
-	      ruid != my_cred->cr_svuid) ||	/* allow ruid = svuid */
+	      ruid != my_pcred->cr_ruid &&	/* allow ruid = ruid */
+	      ruid != my_pcred->cr_uid &&	/* allow ruid = euid */
+	      ruid != my_pcred->cr_svuid) ||	/* allow ruid = svuid */
 	     (euid != KAUTH_UID_NONE &&		/* allow no change of euid */
-	      euid != my_cred->cr_uid &&	/* allow euid = euid */
-	      euid != my_cred->cr_ruid &&	/* allow euid = ruid */
-	      euid != my_cred->cr_svuid)) &&	/* allow euid = svui */
+	      euid != my_pcred->cr_uid &&	/* allow euid = euid */
+	      euid != my_pcred->cr_ruid &&	/* allow euid = ruid */
+	      euid != my_pcred->cr_svuid)) &&	/* allow euid = svui */
 	    (error = suser(my_cred, &p->p_acflag))) { /* allow root user any */
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -963,8 +976,8 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 		uid_t new_ruid;
 		uid_t svuid = KAUTH_UID_NONE;
 
-		new_euid = my_cred->cr_uid;
-		new_ruid = my_cred->cr_ruid;
+		new_euid = my_pcred->cr_uid;
+		new_ruid = my_pcred->cr_ruid;
 	
   		/* 
 		 * Set the credential with new info.  If there is no change,
@@ -973,16 +986,16 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 		 * passed in.  The subsequent compare is safe, because it is
 		 * a pointer compare rather than a contents compare.
   		 */
-		if (euid == KAUTH_UID_NONE && my_cred->cr_uid != euid) {
+		if (euid == KAUTH_UID_NONE && my_pcred->cr_uid != euid) {
 			/* changing the effective UID */
 			new_euid = euid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 		}
-		if (ruid != KAUTH_UID_NONE && my_cred->cr_ruid != ruid) {
+		if (ruid != KAUTH_UID_NONE && my_pcred->cr_ruid != ruid) {
 			/* changing the real UID; must do user accounting */
 		 	/* chgproccnt uses list lock for protection */
 			(void)chgproccnt(ruid, 1);
-			(void)chgproccnt(my_cred->cr_ruid, -1);
+			(void)chgproccnt(my_pcred->cr_ruid, -1);
 			new_ruid = ruid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 		}
@@ -992,17 +1005,17 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 		 * new effective uid.  We are protected from escalation
 		 * by the prechecking.
 		 */
-		if (my_cred->cr_svuid != uap->ruid &&
-		    my_cred->cr_svuid != uap->euid) {
+		if (my_pcred->cr_svuid != uap->ruid &&
+		    my_pcred->cr_svuid != uap->euid) {
 		    	svuid = new_euid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 		}
 
-		my_new_cred = kauth_cred_setresuid(my_cred, ruid, euid, svuid, my_cred->cr_gmuid);
+		my_new_cred = kauth_cred_setresuid(my_cred, ruid, euid, svuid, my_pcred->cr_gmuid);
 	
 		if (my_cred != my_new_cred) {
 
-			DEBUG_CRED_CHANGE("setreuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
+			DEBUG_CRED_CHANGE("setreuid CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
 			proc_lock(p);
 			/*
@@ -1019,6 +1032,8 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */
 			proc_unlock(p);
 		}
@@ -1065,6 +1080,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
 	gid_t svgid = KAUTH_GID_NONE;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 	DEBUG_CRED_ENTER("setgid(%d/%d): %d\n", p->p_pid, (p->p_pptr ? p->p_pptr->p_pid : 0), uap->gid);
 
@@ -1072,9 +1088,10 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
 	AUDIT_ARG(gid, gid);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
-	if (gid != my_cred->cr_rgid &&	/* allow setgid(getgid()) */
-	    gid != my_cred->cr_svgid &&	/* allow setgid(saved gid) */
+	if (gid != my_pcred->cr_rgid &&		/* allow setgid(getgid()) */
+	    gid != my_pcred->cr_svgid &&	/* allow setgid(saved gid) */
 	    (error = suser(my_cred, &p->p_acflag))) {
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -1119,6 +1136,8 @@ setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 			proc_unlock(p);
 		}
@@ -1161,6 +1180,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
 	gid_t egid;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 	DEBUG_CRED_ENTER("setegid %d\n", uap->egid);
 
@@ -1168,9 +1188,10 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
 	AUDIT_ARG(egid, egid);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
-	if (egid != my_cred->cr_rgid &&
-	    egid != my_cred->cr_svgid &&
+	if (egid != my_pcred->cr_rgid &&
+	    egid != my_pcred->cr_svgid &&
 	    (error = suser(my_cred, &p->p_acflag))) {
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -1188,7 +1209,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
 		my_new_cred = kauth_cred_setresgid(my_cred, KAUTH_GID_NONE, egid, KAUTH_GID_NONE);
 		if (my_cred != my_new_cred) {
 
-			DEBUG_CRED_CHANGE("setegid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
+			DEBUG_CRED_CHANGE("setegid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
 			proc_lock(p);
 			/*
@@ -1205,6 +1226,8 @@ setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 			proc_unlock(p);
 		}
@@ -1261,6 +1284,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 	gid_t rgid, egid;
 	int error;
 	kauth_cred_t my_cred, my_new_cred;
+	posix_cred_t my_pcred;
 
 	DEBUG_CRED_ENTER("setregid %d %d\n", uap->rgid, uap->egid);
 
@@ -1275,16 +1299,17 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 	AUDIT_ARG(rgid, rgid);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
 	if (((rgid != KAUTH_UID_NONE &&		/* allow no change of rgid */
-	      rgid != my_cred->cr_rgid &&	/* allow rgid = rgid */
-	      rgid != my_cred->cr_gid &&	/* allow rgid = egid */
-	      rgid != my_cred->cr_svgid) ||	/* allow rgid = svgid */
+	      rgid != my_pcred->cr_rgid &&	/* allow rgid = rgid */
+	      rgid != my_pcred->cr_gid &&	/* allow rgid = egid */
+	      rgid != my_pcred->cr_svgid) ||	/* allow rgid = svgid */
 	     (egid != KAUTH_UID_NONE &&		/* allow no change of egid */
-	      egid != my_cred->cr_groups[0] &&	/* allow no change of egid */
-	      egid != my_cred->cr_gid &&	/* allow egid = egid */
-	      egid != my_cred->cr_rgid &&	/* allow egid = rgid */
-	      egid != my_cred->cr_svgid)) &&	/* allow egid = svgid */
+	      egid != my_pcred->cr_groups[0] &&	/* allow no change of egid */
+	      egid != my_pcred->cr_gid &&	/* allow egid = egid */
+	      egid != my_pcred->cr_rgid &&	/* allow egid = rgid */
+	      egid != my_pcred->cr_svgid)) &&	/* allow egid = svgid */
 	    (error = suser(my_cred, &p->p_acflag))) { /* allow root user any */
 		kauth_cred_unref(&my_cred);
 		return (error);
@@ -1292,8 +1317,8 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 
 	/* get current credential and take a reference while we muck with it */
 	for (;;) {
-		uid_t new_egid = my_cred->cr_gid;
-		uid_t new_rgid = my_cred->cr_rgid;
+		uid_t new_egid = my_pcred->cr_gid;
+		uid_t new_rgid = my_pcred->cr_rgid;
 		uid_t svgid = KAUTH_UID_NONE;
 
 		
@@ -1304,12 +1329,12 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 		 * passed in.  The subsequent compare is safe, because it is
 		 * a pointer compare rather than a contents compare.
   		 */
-		if (egid == KAUTH_UID_NONE && my_cred->cr_groups[0] != egid) {
+		if (egid == KAUTH_UID_NONE && my_pcred->cr_gid != egid) {
 			/* changing the effective GID */
 			new_egid = egid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 		}
-		if (rgid != KAUTH_UID_NONE && my_cred->cr_rgid != rgid) {
+		if (rgid != KAUTH_UID_NONE && my_pcred->cr_rgid != rgid) {
 			/* changing the real GID */
 			new_rgid = rgid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
@@ -1320,8 +1345,8 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 		 * new effective gid.  We are protected from escalation
 		 * by the prechecking.
 		 */
-		if (my_cred->cr_svgid != uap->rgid &&
-		    my_cred->cr_svgid != uap->egid) {
+		if (my_pcred->cr_svgid != uap->rgid &&
+		    my_pcred->cr_svgid != uap->egid) {
 		    	svgid = new_egid;
 			OSBitOrAtomic(P_SUGID, &p->p_flag);
 		}
@@ -1329,7 +1354,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 		my_new_cred = kauth_cred_setresgid(my_cred, rgid, egid, svgid);
 		if (my_cred != my_new_cred) {
 
-			DEBUG_CRED_CHANGE("setregid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags);
+			DEBUG_CRED_CHANGE("setregid(CH)%d: %p/0x%08x->%p/0x%08x\n", p->p_pid, my_cred, my_pcred->cr_flags, my_new_cred, posix_cred_get(my_new_cred)->cr_flags);
 
 			proc_lock(p);
 			/* need to protect for a race where another thread
@@ -1345,6 +1370,8 @@ setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */
 			proc_unlock(p);
 		}
@@ -1444,6 +1471,7 @@ settid_with_pid(proc_t p, struct settid_with_pid_args *uap, __unused int32_t *re
 	proc_t target_proc;
 	struct uthread *uthread = get_bsdthread_info(current_thread());
 	kauth_cred_t my_cred, my_target_cred, my_new_cred;
+	posix_cred_t my_target_pcred;
 
 	AUDIT_ARG(pid, uap->pid);
 	AUDIT_ARG(value32, uap->assume);
@@ -1491,7 +1519,8 @@ settid_with_pid(proc_t p, struct settid_with_pid_args *uap, __unused int32_t *re
 		kauth_cred_ref(uthread->uu_ucred); 
 		my_cred = uthread->uu_ucred;
 		my_target_cred = kauth_cred_proc_ref(target_proc);
-		my_new_cred = kauth_cred_setuidgid(my_cred, my_target_cred->cr_uid, my_target_cred->cr_gid);
+		my_target_pcred = posix_cred_get(my_target_cred);
+		my_new_cred = kauth_cred_setuidgid(my_cred, my_target_pcred->cr_uid, my_target_pcred->cr_gid);
 		if (my_cred != my_new_cred)
 			uthread->uu_ucred = my_new_cred;
 	
@@ -1647,13 +1676,15 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused
 					continue;
 				}
 				p->p_ucred = my_new_cred;
+				/* update cred on proc */
+				PROC_UPDATE_CREDS_ONPROC(p);
 				OSBitOrAtomic(P_SUGID, &p->p_flag);
 				proc_unlock(p);
 			}
 			break;
 		}
 		/* Drop old proc reference or our extra reference */
-		AUDIT_ARG(groupset, my_cred->cr_groups, ngrp);
+		AUDIT_ARG(groupset, posix_cred_get(my_cred)->cr_groups, ngrp);
 		kauth_cred_unref(&my_cred);
 
 
@@ -1835,15 +1866,17 @@ is_suser1(void)
 {
 	proc_t p = current_proc();
 	kauth_cred_t my_cred;
+	posix_cred_t my_pcred;
 	int err;
 
 	if (!p)
 		return (0);
 
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
 
 	err =  (suser(my_cred, &p->p_acflag) == 0 ||
-			my_cred->cr_ruid == 0 || my_cred->cr_svuid == 0);
+			my_pcred->cr_ruid == 0 || my_pcred->cr_svuid == 0);
 	kauth_cred_unref(&my_cred);
 	return(err);
 }
@@ -1959,6 +1992,7 @@ set_security_token(proc_t p)
 	security_token_t sec_token;
 	audit_token_t    audit_token;
 	kauth_cred_t my_cred;
+	posix_cred_t my_pcred;
 	host_priv_t host_priv;
 
 	/*
@@ -1975,10 +2009,12 @@ set_security_token(proc_t p)
 	}
 		
 	my_cred = kauth_cred_proc_ref(p);
+	my_pcred = posix_cred_get(my_cred);
+
 	/* XXX mach_init doesn't have a p_ucred when it calls this function */
 	if (IS_VALID_CRED(my_cred)) {
 		sec_token.val[0] = kauth_cred_getuid(my_cred);
-		sec_token.val[1] = my_cred->cr_gid;
+		sec_token.val[1] = kauth_cred_getgid(my_cred);
 	} else {
 		sec_token.val[0] = 0;
 		sec_token.val[1] = 0;
@@ -1994,10 +2030,10 @@ set_security_token(proc_t p)
 	 * changes.
 	 */
 	audit_token.val[0] = my_cred->cr_audit.as_aia_p->ai_auid;
-	audit_token.val[1] = my_cred->cr_uid;
-	audit_token.val[2] = my_cred->cr_gid;
-	audit_token.val[3] = my_cred->cr_ruid;
-	audit_token.val[4] = my_cred->cr_rgid;
+	audit_token.val[1] = my_pcred->cr_uid;
+	audit_token.val[2] = my_pcred->cr_gid;
+	audit_token.val[3] = my_pcred->cr_ruid;
+	audit_token.val[4] = my_pcred->cr_rgid;
 	audit_token.val[5] = p->p_pid;
 	audit_token.val[6] = my_cred->cr_audit.as_aia_p->ai_asid;
 	audit_token.val[7] = p->p_idversion;
@@ -2028,12 +2064,13 @@ __private_extern__
 void
 cru2x(kauth_cred_t cr, struct xucred *xcr)
 {
+	posix_cred_t pcr = posix_cred_get(cr);
 
 	bzero(xcr, sizeof(*xcr));
 	xcr->cr_version = XUCRED_VERSION;
 	xcr->cr_uid = kauth_cred_getuid(cr);
-	xcr->cr_ngroups = cr->cr_ngroups;
-	bcopy(cr->cr_groups, xcr->cr_groups, sizeof(xcr->cr_groups));
+	xcr->cr_ngroups = pcr->cr_ngroups;
+	bcopy(pcr->cr_groups, xcr->cr_groups, sizeof(xcr->cr_groups));
 }
 
 #if CONFIG_LCTX
diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c
index 13b124887..d2473dbf0 100644
--- a/bsd/kern/kern_resource.c
+++ b/bsd/kern/kern_resource.c
@@ -111,8 +111,9 @@ int	donice(struct proc *curp, struct proc *chgp, int n);
 int	dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
 int	uthread_get_background_state(uthread_t);
 static void do_background_socket(struct proc *p, thread_t thread, int priority);
-static int do_background_thread(struct proc *curp, int priority);
-static int do_background_task(struct proc *curp, int priority);
+static int do_background_thread(struct proc *curp, thread_t thread, int priority);
+static int do_background_proc(struct proc *curp, struct proc *targetp, int priority);
+void proc_apply_task_networkbg_internal(proc_t);
 
 rlim_t maxdmap = MAXDSIZ;	/* XXX */ 
 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;	/* XXX */ 
@@ -125,10 +126,10 @@ rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;	/* XXX */
  */
 __private_extern__ int maxfilesperproc = OPEN_MAX;		/* per-proc open files limit */
 
-SYSCTL_INT( _kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
+SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW | CTLFLAG_LOCKED,
     		&maxprocperuid, 0, "Maximum processes allowed per userid" );
 
-SYSCTL_INT( _kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,       
+SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW | CTLFLAG_LOCKED,
     		&maxfilesperproc, 0, "Maximum files allowed open per process" );
 
 /* Args and fn for proc_iteration callback used in setpriority */
@@ -371,8 +372,10 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
 		if (uap->who != 0) {
 			return (EINVAL);
 		}
-		error = do_background_thread(curp, uap->prio);
-		(void) do_background_socket(curp, current_thread(), uap->prio);
+		error = do_background_thread(curp, current_thread(), uap->prio);
+		if (!error) {
+			(void) do_background_socket(curp, current_thread(), uap->prio);
+		}
 		found++;
 		break;
 	}
@@ -387,8 +390,10 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r
 			refheld = 1;
 		}
 
-		error = do_background_task(p, uap->prio);
-		(void) do_background_socket(p, NULL, uap->prio);
+		error = do_background_proc(curp, p, uap->prio);
+		if (!error) {
+			(void) do_background_socket(p, NULL, uap->prio);
+		}
 		
 		found++;
 		if (refheld != 0)
@@ -421,9 +426,9 @@ donice(struct proc *curp, struct proc *chgp, int n)
 	ucred = kauth_cred_proc_ref(curp);
 	my_cred = kauth_cred_proc_ref(chgp);
 
-	if (suser(ucred, NULL) && ucred->cr_ruid &&
+	if (suser(ucred, NULL) && kauth_cred_getruid(ucred) &&
 	    kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
-	    ucred->cr_ruid != kauth_cred_getuid(my_cred)) {
+	    kauth_cred_getruid(ucred) != kauth_cred_getuid(my_cred)) {
 		error = EPERM;
 		goto out;
 	}
@@ -451,19 +456,53 @@ out:
 }
 
 static int
-do_background_task(struct proc *p, int priority)
+do_background_proc(struct proc *curp, struct proc *targetp, int priority)
 {
 	int error = 0;
+	kauth_cred_t ucred;
+	kauth_cred_t target_cred;
+#if CONFIG_EMBEDDED
 	task_category_policy_data_t info;
+#endif
+
+	ucred = kauth_cred_get();
+	target_cred = kauth_cred_proc_ref(targetp);
+
+	if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
+	    kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
+	    kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
+	{
+		error = EPERM;
+		goto out;
+	}
+
+#if CONFIG_MACF
+	error = mac_proc_check_sched(curp, targetp);
+	if (error) 
+		goto out;
+#endif
+
+#if !CONFIG_EMBEDDED
+	if (priority == PRIO_DARWIN_NONUI)
+		error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
+	else
+		error = proc_set1_bgtaskpolicy(targetp->task, priority);
+	if (error)
+		goto out;
+#else /* !CONFIG_EMBEDDED */
 
 	/* set the max scheduling priority on the task */
-	if (priority & PRIO_DARWIN_BG) { 
+	if (priority == PRIO_DARWIN_BG) { 
 		info.role = TASK_THROTTLE_APPLICATION;
-	} else {
+	}
+	else if (priority == PRIO_DARWIN_NONUI) { 
+		info.role = TASK_NONUI_APPLICATION;
+	}
+	else {
 		info.role = TASK_DEFAULT_APPLICATION;
 	}
 
-	error = task_policy_set(p->task,
+	error = task_policy_set(targetp->task,
 			TASK_CATEGORY_POLICY,
 			(task_policy_t) &info,
 			TASK_CATEGORY_POLICY_COUNT);
@@ -471,22 +510,24 @@ do_background_task(struct proc *p, int priority)
 	if (error)
 		goto out;
 
-	proc_lock(p);
+	proc_lock(targetp);
 
 	/* mark proc structure as backgrounded */
-	if (priority & PRIO_DARWIN_BG) {
-		p->p_lflag |= P_LBACKGROUND;
+	if (priority == PRIO_DARWIN_BG) {
+		targetp->p_lflag |= P_LBACKGROUND;
 	} else {
-		p->p_lflag &= ~P_LBACKGROUND;
+		targetp->p_lflag &= ~P_LBACKGROUND;
 	}
 
 	/* set or reset the disk I/O priority */
-	p->p_iopol_disk = (priority == PRIO_DARWIN_BG ? 
+	targetp->p_iopol_disk = (priority == PRIO_DARWIN_BG ? 
 			IOPOL_THROTTLE : IOPOL_DEFAULT); 
 
-	proc_unlock(p);
+	proc_unlock(targetp);
+#endif /* !CONFIG_EMBEDDED */
 
 out:
+	kauth_cred_unref(&target_cred);
 	return (error);
 }
 
@@ -497,7 +538,7 @@ do_background_socket(struct proc *p, thread_t thread, int priority)
 	struct fileproc                     *fp;
 	int                                 i;
 
-	if (priority & PRIO_DARWIN_BG) {
+	if (priority == PRIO_DARWIN_BG) {
 		/*
 		 * For PRIO_DARWIN_PROCESS (thread is NULL), simply mark
 		 * the sockets with the background flag.  There's nothing
@@ -523,12 +564,6 @@ do_background_socket(struct proc *p, thread_t thread, int priority)
 		}
 
 	} else {
-		u_int32_t	traffic_mgt;
-		/*
-		 * See comments on do_background_thread().  Deregulate network
-		 * traffics only for setpriority(PRIO_DARWIN_THREAD).
-		 */
-		traffic_mgt = (thread == NULL) ? 0 : TRAFFIC_MGT_SO_BG_REGULATE;
 
 		/* disable networking IO throttle.
 		 * NOTE - It is a known limitation of the current design that we 
@@ -550,7 +585,7 @@ do_background_socket(struct proc *p, thread_t thread, int priority)
 			if ((thread) && (sockp->so_background_thread != thread)) {
 				continue;
 			}
-			socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND | traffic_mgt);
+			socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
 			sockp->so_background_thread = NULL;
 		}
 		proc_fdunlock(p);
@@ -572,15 +607,26 @@ do_background_socket(struct proc *p, thread_t thread, int priority)
  *	 and only TRAFFIC_MGT_SO_BACKGROUND is set via do_background_socket().
  */
 static int
-do_background_thread(struct proc *curp __unused, int priority)
+do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
 {
-	thread_t							thread;
 	struct uthread						*ut;
+#if !CONFIG_EMBEDDED
+	int error = 0;
+#else /* !CONFIG_EMBEDDED */
 	thread_precedence_policy_data_t		policy;
+#endif /* !CONFIG_EMBEDDED */
 	
-	thread = current_thread();
 	ut = get_bsdthread_info(thread);
 
+	/* Backgrounding is unsupported for threads in vfork */
+	if ( (ut->uu_flag & UT_VFORK) != 0) {
+		return(EPERM);
+	}
+
+#if !CONFIG_EMBEDDED
+	error = proc_set1_bgthreadpolicy(curp->task, thread_tid(thread), priority);
+	return(error);
+#else /* !CONFIG_EMBEDDED */
 	if ( (priority & PRIO_DARWIN_BG) == 0 ) {
 		/* turn off backgrounding of thread */
 		if ( (ut->uu_flag & UT_BACKGROUND) == 0 ) {
@@ -630,9 +676,57 @@ do_background_thread(struct proc *curp __unused, int priority)
 	 * thread then TRAFFIC_MGT_SO_{BACKGROUND,BG_REGULATE} is set.
 	 * Existing sockets are taken care of by do_background_socket().
 	 */
+#endif /* !CONFIG_EMBEDDED */
 	return(0);
 }
 
+#if CONFIG_EMBEDDED
+int mach_do_background_thread(thread_t thread, int prio);
+
+int
+mach_do_background_thread(thread_t thread, int prio)
+{
+	int 			error		= 0;
+	struct proc		*curp		= NULL;
+	struct proc		*targetp	= NULL;
+	kauth_cred_t	ucred;
+
+	targetp = get_bsdtask_info(get_threadtask(thread));
+	if (!targetp) {
+		return KERN_INVALID_ARGUMENT;
+	}
+
+	curp = proc_self();
+	if (curp == PROC_NULL) {
+		return KERN_FAILURE;
+	}
+
+	ucred = kauth_cred_proc_ref(curp);
+
+	if (suser(ucred, NULL) && curp != targetp) {
+		error = KERN_PROTECTION_FAILURE;
+		goto out;
+	}
+
+	error = do_background_thread(curp, thread, prio);
+	if (!error) {
+		(void) do_background_socket(curp, thread, prio);
+	} else {
+		if (error == EPERM) {
+			error = KERN_PROTECTION_FAILURE;
+		} else {
+			error = KERN_FAILURE;
+		}
+	}
+
+out:
+	proc_rele(curp);
+	kauth_cred_unref(&ucred);
+	return error;
+}
+#endif /* CONFIG_EMBEDDED */
+
+#if CONFIG_EMBEDDED
 /*
  * If the thread or its proc has been put into the background
  * with setpriority(PRIO_DARWIN_{THREAD,PROCESS}, *, PRIO_DARWIN_BG),
@@ -653,6 +747,7 @@ uthread_get_background_state(uthread_t uth)
 
 	return 0;
 }
+#endif /* CONFIG_EMBEDDED */
 
 /*
  * Returns:	0			Success
@@ -1234,19 +1329,70 @@ int
 iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval)
 {
 	int	error = 0;
+	struct _iopol_param_t iop_param;
+#if !CONFIG_EMBEDDED
+	int processwide = 0;
+#else /* !CONFIG_EMBEDDED */
 	thread_t thread = THREAD_NULL;
-	int *policy;
 	struct uthread	*ut = NULL;
-	struct _iopol_param_t iop_param;
+	int *policy;
+#endif /* !CONFIG_EMBEDDED */
 
 	if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
-		goto exit;
+		goto out;
 
 	if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
 		error = EINVAL;
-		goto exit;
+		goto out;
+	}
+
+#if !CONFIG_EMBEDDED
+	switch (iop_param.iop_scope) {
+	case IOPOL_SCOPE_PROCESS:
+		processwide = 1;
+		break;
+	case IOPOL_SCOPE_THREAD:
+		processwide = 0;
+		break;
+	default:
+		error = EINVAL;
+		goto out;
 	}
+		
+	switch(uap->cmd) {
+	case IOPOL_CMD_SET:
+		switch (iop_param.iop_policy) {
+		case IOPOL_DEFAULT:
+		case IOPOL_NORMAL:
+		case IOPOL_THROTTLE:
+		case IOPOL_PASSIVE:
+			if(processwide != 0)
+				proc_apply_task_diskacc(current_task(), iop_param.iop_policy);
+			else
+				proc_apply_thread_selfdiskacc(iop_param.iop_policy);
+				
+			break;
+		default:
+			error = EINVAL;
+			goto out;
+		}
+		break;
+	
+	case IOPOL_CMD_GET:
+		if(processwide != 0)
+			iop_param.iop_policy = proc_get_task_disacc(current_task());
+		else
+			iop_param.iop_policy = proc_get_thread_selfdiskacc();
+			
+		error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
 
+		break;
+	default:
+		error = EINVAL; // unknown command
+		break;
+	}
+
+#else /* !CONFIG_EMBEDDED */
 	switch (iop_param.iop_scope) {
 	case IOPOL_SCOPE_PROCESS:
 		policy = &p->p_iopol_disk;
@@ -1258,7 +1404,7 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
 		break;
 	default:
 		error = EINVAL;
-		goto exit;
+		goto out;
 	}
 		
 	switch(uap->cmd) {
@@ -1274,7 +1420,7 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
 			break;
 		default:
 			error = EINVAL;
-			goto exit;
+			goto out;
 		}
 		break;
 	case IOPOL_CMD_GET:
@@ -1300,7 +1446,8 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
 		break;
 	}
 
-  exit:
+#endif /* !CONFIG_EMBEDDED */
+out:
 	*retval = error;
 	return (error);
 }
@@ -1309,8 +1456,14 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un
 boolean_t thread_is_io_throttled(void);
 
 boolean_t
-thread_is_io_throttled(void) {
+thread_is_io_throttled(void) 
+{
+
+#if !CONFIG_EMBEDDED
 
+	return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE);
+		
+#else /* !CONFIG_EMBEDDED */
 	int	policy;
 	struct uthread  *ut;
 
@@ -1326,4 +1479,54 @@ thread_is_io_throttled(void) {
 			return TRUE;
 	}
 	return FALSE;
+#endif /* !CONFIG_EMBEDDED */
+}
+
+void
+proc_apply_task_networkbg(void * bsd_info)
+{
+	proc_t p = PROC_NULL;
+	proc_t curp = (proc_t)bsd_info;
+	pid_t pid;
+
+	pid = curp->p_pid;
+	p = proc_find(pid);
+	if (p != PROC_NULL) {
+		do_background_socket(p, NULL, PRIO_DARWIN_BG);
+		proc_rele(p);
+	}
+}
+
+void
+proc_restore_task_networkbg(void * bsd_info)
+{
+	proc_t p = PROC_NULL;
+	proc_t curp = (proc_t)bsd_info;
+	pid_t pid;
+
+	pid = curp->p_pid;
+	p = proc_find(pid);
+	if (p != PROC_NULL) {
+		do_background_socket(p, NULL, 0);
+		proc_rele(p);
+	}
+
+}
+
+void
+proc_set_task_networkbg(void * bsdinfo, int setbg)
+{
+	if (setbg != 0)
+		proc_apply_task_networkbg(bsdinfo);
+	else
+		proc_restore_task_networkbg(bsdinfo);
 }
+
+void
+proc_apply_task_networkbg_internal(proc_t p)
+{
+	if (p != PROC_NULL) {
+		do_background_socket(p, NULL, PRIO_DARWIN_BG);
+	}
+}
+
diff --git a/bsd/kern/kern_shutdown.c b/bsd/kern/kern_shutdown.c
index f8984bb3c..4e231826d 100644
--- a/bsd/kern/kern_shutdown.c
+++ b/bsd/kern/kern_shutdown.c
@@ -67,16 +67,16 @@
 #include <mach/task.h>			/* for task_suspend() */
 #include <sys/sysproto.h>		/* abused for sync() */
 #include <kern/clock.h>			/* for delay_for_interval() */
+#include <libkern/OSAtomic.h>
 
 #include <sys/kdebug.h>
 
-int system_inshutdown = 0;
+uint32_t system_inshutdown = 0;
 
 /* XXX should be in a header file somewhere, but isn't */
 extern void md_prepare_for_shutdown(int, int, char *);
 extern void (*unmountroot_pre_hook)(void);
 
-int	waittime = -1;
 unsigned int proc_shutdown_exitcount = 0;
 
 static int  sd_openlog(vfs_context_t);
@@ -109,37 +109,34 @@ static int  sd_callback1(proc_t p, void * arg);
 static int  sd_callback2(proc_t p, void * arg);
 static int  sd_callback3(proc_t p, void * arg);
 
-void
+int
 boot(int paniced, int howto, char *command)
 {
 	struct proc *p = current_proc();	/* XXX */
 	int hostboot_option=0;
-	int funnel_state;
 
-	system_inshutdown = 1;
-
-	funnel_state = thread_funnel_set(kernel_flock, TRUE);
-
-       /*
-	* Temporary hack to notify the power management root domain
-	* that the system will shut down.
-	*/
+	if (!OSCompareAndSwap(0, 1, &system_inshutdown)) {
+		if ( (howto&RB_QUICK) == RB_QUICK)
+			goto force_reboot;
+		return (EBUSY);
+	}
+	/*
+	 * Temporary hack to notify the power management root domain
+	 * that the system will shut down.
+	 */
 	IOSystemShutdownNotification();
 
 	md_prepare_for_shutdown(paniced, howto, command);
 
-	if ((howto&RB_QUICK)==RB_QUICK && waittime < 0) {
-		waittime = 0;
+	if ((howto&RB_QUICK)==RB_QUICK) {
 		printf("Quick reboot...\n");
 		if ((howto&RB_NOSYNC)==0) {
 			sync(p, (void *)NULL, (int *)NULL);
 		}
 	}
-	else if ((howto&RB_NOSYNC)==0 && waittime < 0) {
+	else if ((howto&RB_NOSYNC)==0) {
 		int iter, nbusy;
 
-		waittime = 0;
-		
 		printf("syncing disks... ");
 
 		/*
@@ -150,7 +147,7 @@ boot(int paniced, int howto, char *command)
 		proc_shutdown();
 
 #if CONFIG_AUDIT
- 		audit_shutdown();
+		audit_shutdown();
 #endif
 
 		if (unmountroot_pre_hook != NULL)
@@ -162,7 +159,7 @@ boot(int paniced, int howto, char *command)
 		 * Now that all processes have been terminated and system is
 		 * sync'ed up, suspend init
 		 */
-
+			
 		if (initproc && p != initproc)
 			task_suspend(initproc->task);
 
@@ -187,7 +184,6 @@ boot(int paniced, int howto, char *command)
 		else
 			printf("done\n");
 	}
-
 #if NETWORKING
 	/*
 	 * Can't just use an splnet() here to disable the network
@@ -197,6 +193,7 @@ boot(int paniced, int howto, char *command)
 	if_down_all();
 #endif /* NETWORKING */
 
+force_reboot:
 	if (howto & RB_POWERDOWN)
 		hostboot_option = HOST_REBOOT_HALT;
 	if (howto & RB_HALT)
@@ -204,13 +201,15 @@ boot(int paniced, int howto, char *command)
 	if (paniced == RB_PANIC)
 		hostboot_option = HOST_REBOOT_HALT;
 
-    if (howto & RB_UPSDELAY) {
-        hostboot_option = HOST_REBOOT_UPSDELAY;
-    }
+	if (howto & RB_UPSDELAY) {
+		hostboot_option = HOST_REBOOT_UPSDELAY;
+	}
 
 	host_reboot(host_priv_self(), hostboot_option);
-
-	thread_funnel_set(kernel_flock, FALSE);
+	/*
+	 * should not be reached
+	 */
+	return (0);
 }
 
 static int
diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c
index e0ded6e4c..de5455812 100644
--- a/bsd/kern/kern_sig.c
+++ b/bsd/kern/kern_sig.c
@@ -334,10 +334,10 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie)
 	else
 		my_cred = proc_ucred(q);
 
-	if (uc->cr_ruid == my_cred->cr_ruid ||
-	    uc->cr_ruid == my_cred->cr_svuid ||
-	    kauth_cred_getuid(uc) == my_cred->cr_ruid ||
-	    kauth_cred_getuid(uc) == my_cred->cr_svuid) {
+	if (kauth_cred_getruid(uc) == kauth_cred_getruid(my_cred) ||
+	    kauth_cred_getruid(uc) == kauth_cred_getsvuid(my_cred) ||
+	    kauth_cred_getuid(uc) == kauth_cred_getruid(my_cred) ||
+	    kauth_cred_getuid(uc) == kauth_cred_getsvuid(my_cred)) {
 		if (zombie == 0)
 			kauth_cred_unref(&my_cred);
 		return (1);
@@ -566,7 +566,7 @@ set_procsigmask(proc_t p,  int bit)
  *		process/thread pair.
  *
  *		We mark thread as unused to alow compilation without warning
- *		onnon-PPC platforms.
+ *		on non-PPC platforms.
  */
 int
 setsigvec(proc_t p, __unused thread_t thread, int signum, struct __kern_sigaction *sa, boolean_t in_sigstart)
@@ -623,14 +623,6 @@ setsigvec(proc_t p, __unused thread_t thread, int signum, struct __kern_sigactio
 			OSBitAndAtomic(~((uint32_t)P_NOCLDWAIT), &p->p_flag);
 	}
 
-#ifdef __ppc__ 
-	if (signum == SIGFPE) {
-		if (sa->sa_handler == SIG_DFL || sa->sa_handler == SIG_IGN) 
-			thread_enable_fpe(thread, 0);
-		else
-			thread_enable_fpe(thread, 1);
-	}
-#endif  /* __ppc__ */
 	/*
 	 * Set bit in p_sigignore for signals that are set to SIG_IGN,
 	 * and for signals set to SIG_DFL where the default is to ignore.
@@ -1749,34 +1741,35 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
 	if (flavor & PSIG_VFORK) {
 		sig_task = task;
 		sig_thread = thread;
-		sig_proc= p;
+		sig_proc = p;
 	} else if (flavor & PSIG_THREAD) {
 		sig_task = get_threadtask(thread);
 		sig_thread = thread;
 		sig_proc = (proc_t)get_bsdtask_info(sig_task);
 	} else {
 		sig_task = p->task;
-		sig_proc = p;
 		sig_thread = (struct thread *)0;
+		sig_proc = p;
 	}
-	if (((sig_task == TASK_NULL)  || is_kerneltask(sig_task))) {
+
+	if ((sig_task == TASK_NULL) || is_kerneltask(sig_task))
 		return;
-	}
 
 	/*
 	 * do not send signals to the process that has the thread
 	 * doing a reboot(). Not doing so will mark that thread aborted
-	 * and can cause IO failures wich will cause data loss.
+	 * and can cause IO failures wich will cause data loss.  There's
+	 * also no need to send a signal to a process that is in the middle
+	 * of being torn down.
 	 */
-	if (ISSET(sig_proc->p_flag, P_REBOOT)) {
+	if (ISSET(sig_proc->p_flag, P_REBOOT) ||
+	    ISSET(sig_proc->p_lflag, P_LEXIT))
 		return;
-	}
 
 	if( (flavor & (PSIG_VFORK | PSIG_THREAD)) == 0) {
 		proc_knote(sig_proc, NOTE_SIGNAL | signum);
 	}
 
-
 	if ((flavor & PSIG_LOCKED)== 0)
 		proc_signalstart(sig_proc, 0);
 
@@ -2027,7 +2020,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
 				if (( pp != PROC_NULL) && ((pp->p_flag & P_NOCLDSTOP) == 0)) {
 
 					my_cred = kauth_cred_proc_ref(sig_proc);
-					r_uid = my_cred->cr_ruid;
+					r_uid = kauth_cred_getruid(my_cred);
 					kauth_cred_unref(&my_cred);
 
 					proc_lock(sig_proc);
@@ -2077,6 +2070,14 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum)
 			 */
 			sig_proc->p_stat = SRUN;
 			proc_unlock(sig_proc);
+			/*
+			 * In scenarios where suspend/resume are racing
+			 * the signal we are missing AST_BSD by the time
+			 * we get here, set again to avoid races. This
+			 * was the scenario with spindump enabled shutdowns.
+			 * We would need to cover this approp down the line.
+			 */
+			act_set_astbsd(sig_thread);
 			thread_abort(sig_thread);
 
 			goto psigout;
@@ -2281,7 +2282,7 @@ issignal(proc_t p)
 			} else {
 				proc_unlock(p);
 				my_cred = kauth_cred_proc_ref(p);
-				r_uid = my_cred->cr_ruid;
+				r_uid = kauth_cred_getruid(my_cred);
 				kauth_cred_unref(&my_cred);
 
 				pp = proc_parentholdref(p);
@@ -2445,7 +2446,7 @@ issignal(proc_t p)
 					stop(p, pp);
 					if ((pp != PROC_NULL) && ((pp->p_flag & P_NOCLDSTOP) == 0)) {
 						my_cred = kauth_cred_proc_ref(p);
-						r_uid = my_cred->cr_ruid;
+						r_uid = kauth_cred_getruid(my_cred);
 						kauth_cred_unref(&my_cred);
 
 						proc_lock(pp);
@@ -2501,7 +2502,7 @@ issignal(proc_t p)
 		}
 	/* NOTREACHED */
 out:
-	proc_signalend(p,1);
+	proc_signalend(p, 1);
 	proc_unlock(p);
 	return(retval);
 }
@@ -2538,6 +2539,7 @@ CURSIG(proc_t p)
 		signum = ffs((long)sigbits);
 		mask = sigmask(signum);
 		prop = sigprop[signum];
+		sigbits &= ~mask;		/* take the signal out */
 
 		/*
 		 * We should see pending but ignored signals
@@ -2546,14 +2548,8 @@ CURSIG(proc_t p)
 		if (mask & p->p_sigignore && (p->p_lflag & P_LTRACED) == 0) {
 			continue;
 		}
+
 		if (p->p_lflag & P_LTRACED && (p->p_lflag & P_LPPWAIT) == 0) {
-			/*
-			 * Put the new signal into p_siglist.  If the
-			 * signal is being masked, look for other signals.
-			 */
-			mask = sigmask(signum);
-			if (ut->uu_sigmask & mask)
-				continue;
 			return(signum);
 		}
 
@@ -2631,7 +2627,6 @@ CURSIG(proc_t p)
 			 */
 			return (signum);
 		}
-		sigbits &= ~mask;		/* take the signal! */
 	}
 	/* NOTREACHED */
 }
@@ -2761,12 +2756,6 @@ postsig(int signum)
 			ps->ps_siginfo &= ~mask;
 			ps->ps_signodefer &= ~mask;
 		}
-#ifdef __ppc__
-		/* Needs to disable to run in user mode */
-		if (signum == SIGFPE) {
-			thread_enable_fpe(current_thread(), 0);
-		}
-#endif  /* __ppc__ */
 
 		if (ps->ps_sig != signum) {
 			code = 0;
@@ -2945,10 +2934,33 @@ bsd_ast(thread_t thread)
 	    ut->t_dtrace_sig = 0;
 	    psignal(p, dt_action_sig);
 	}
+
 	if (ut->t_dtrace_stop) {
-	    ut->t_dtrace_stop = 0;
-	    psignal(p, SIGSTOP);
+		ut->t_dtrace_stop = 0;
+		proc_lock(p);
+		p->p_dtrace_stop = 1;
+		proc_unlock(p);
+		(void)task_suspend(p->task);
+	}
+
+	if (ut->t_dtrace_resumepid) {
+		proc_t resumeproc = proc_find(ut->t_dtrace_resumepid);
+		ut->t_dtrace_resumepid = 0;
+		if (resumeproc != PROC_NULL) {
+			proc_lock(resumeproc);
+			/* We only act on processes stopped by dtrace */
+			if (resumeproc->p_dtrace_stop) {
+				resumeproc->p_dtrace_stop = 0;
+				proc_unlock(resumeproc);
+				task_resume(resumeproc->task);
+			}
+			else {
+				proc_unlock(resumeproc);
+			}
+			proc_rele(resumeproc);
+		}
 	}
+		    
 #endif /* CONFIG_DTRACE */
 
 	if (CHECK_SIGNALS(p, current_thread(), ut)) {
@@ -3066,79 +3078,37 @@ pgsigio(pid_t pgid, int sig)
 		proc_rele(p);
 }
 
-
 void
 proc_signalstart(proc_t p, int locked)
 {
-	if (locked == 0)
+	if (!locked)
 		proc_lock(p);
-	while ((p->p_lflag & P_LINSIGNAL) == P_LINSIGNAL) {
-		p->p_lflag |= P_LSIGNALWAIT;
+	p->p_sigwaitcnt++;
+	while ((p->p_lflag & P_LINSIGNAL) == P_LINSIGNAL)
 		msleep(&p->p_sigmask, &p->p_mlock, 0, "proc_signstart", NULL);
-	}
+	p->p_sigwaitcnt--;
+
 	p->p_lflag |= P_LINSIGNAL;
-#if DIAGNOSTIC
-#if SIGNAL_DEBUG
-#ifdef __ppc__
-        {
-            int  sp, *fp, numsaved; 
- 
-            __asm__ volatile("mr %0,r1" : "=r" (sp));
-
-            fp = (int *)*((int *)sp);
-            for (numsaved = 0; numsaved < 3; numsaved++) {
-                p->lockpc[numsaved] = fp[2];
-                if ((int)fp <= 0)
-                        break;
-                fp = (int *)*fp;
-            }
-        }
-#endif /* __ppc__ */       
-#endif /* SIGNAL_DEBUG */
-#endif /* DIAGNOSTIC */
 	p->p_signalholder = current_thread();
-	if (locked == 0)
+	if (!locked)
 		proc_unlock(p);
-
 }
 
 void
 proc_signalend(proc_t p, int locked)
 {
-	if (locked == 0)
+	if (!locked)
 		proc_lock(p);
 	p->p_lflag &= ~P_LINSIGNAL;
 
-#if DIAGNOSTIC
-#if SIGNAL_DEBUG
-#ifdef __ppc__
-        {
-            int sp, *fp, numsaved; 
- 
-            __asm__ volatile("mr %0,r1" : "=r" (sp));
-
-            fp = (int *)*((int *)sp);
-            for (numsaved = 0; numsaved < 3; numsaved++) {
-                p->unlockpc[numsaved] = fp[2];
-                if ((int)fp <= 0)
-                        break;
-                fp = (int *)*fp;
-            }
-        }
-#endif /* __ppc__ */       
-#endif /* SIGNAL_DEBUG */
-#endif /* DIAGNOSTIC */
-
-	if ((p->p_lflag & P_LSIGNALWAIT) == P_LSIGNALWAIT) {
-		p->p_lflag &= ~P_LSIGNALWAIT;
+	if (p->p_sigwaitcnt > 0)
 		wakeup(&p->p_sigmask);
-	}
+
 	p->p_signalholder = NULL;
-	if (locked == 0)
+	if (!locked)
 		proc_unlock(p);
 }
 
-
 void
 sig_lock_to_exit(proc_t p)
 {
diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c
index ffbff213f..dc6531b42 100644
--- a/bsd/kern/kern_symfile.c
+++ b/bsd/kern/kern_symfile.c
@@ -72,12 +72,13 @@ get_kernel_symfile(__unused proc_t p, __unused char const **symfile)
 
 struct kern_direct_file_io_ref_t
 {
-    vfs_context_t		ctx;
-    struct vnode		*vp;
+    vfs_context_t  ctx;
+    struct vnode * vp;
+    dev_t          device;
 };
 
 
-static int file_ioctl(void * p1, void * p2, int theIoctl, caddr_t result)
+static int file_ioctl(void * p1, void * p2, u_long theIoctl, caddr_t result)
 {
     dev_t device = *(dev_t*) p1;
 
@@ -85,7 +86,7 @@ static int file_ioctl(void * p1, void * p2, int theIoctl, caddr_t result)
 		    (device, theIoctl, result, S_IFBLK, p2));
 }
 
-static int device_ioctl(void * p1, __unused void * p2, int theIoctl, caddr_t result)
+static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t result)
 {
     return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
 }
@@ -94,10 +95,14 @@ struct kern_direct_file_io_ref_t *
 kern_open_file_for_direct_io(const char * name, 
 			     kern_get_file_extents_callback_t callback, 
 			     void * callback_ref,
-			     dev_t * device_result,
+			     dev_t * partition_device_result,
+			     dev_t * image_device_result,
                              uint64_t * partitionbase_result,
                              uint64_t * maxiocount_result,
-                             boolean_t * solid_state)
+                             uint32_t * oflags,
+                             off_t offset,
+                             caddr_t addr,
+                             vm_size_t len)
 {
     struct kern_direct_file_io_ref_t * ref;
 
@@ -105,14 +110,21 @@ kern_open_file_for_direct_io(const char * name,
     struct vnode_attr		va;
     int				error;
     off_t			f_offset;
-    uint32_t			blksize;
-    uint64_t			size;
+    off_t			filelength;
+    uint64_t                    fileblk;
+    size_t                      filechunk;
+    uint64_t                    physoffset;
     dev_t			device;
+    dev_t			target = 0;
+    int			        isssd = 0;
+    uint32_t                    flags = 0;
+    uint32_t			blksize;
     off_t 			maxiocount, count;
+    boolean_t                   locked = FALSE;
 
-    int (*do_ioctl)(void * p1, void * p2, int theIoctl, caddr_t result);
-    void * p1;
-    void * p2;
+    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
+    void * p1 = NULL;
+    void * p2 = NULL;
 
     error = EFAULT;
 
@@ -124,12 +136,18 @@ kern_open_file_for_direct_io(const char * name,
     }
 
     ref->vp = NULL;
-    p = current_proc();		// kernproc;
+    p = kernproc;
     ref->ctx = vfs_context_create(vfs_context_current());
 
     if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx)))
         goto out;
 
+    if (addr && len)
+    {
+	if ((error = kern_write_file(ref, offset, addr, len)))
+	    goto out;
+    }
+
     VATTR_INIT(&va);
     VATTR_WANTED(&va, va_rdev);
     VATTR_WANTED(&va, va_fsid);
@@ -169,6 +187,80 @@ kern_open_file_for_direct_io(const char * name,
 	error = EFAULT;
         goto out;
     }
+    ref->device = device;
+
+    // generate the block list
+
+    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
+    if (error)
+        goto out;
+    locked = TRUE;
+
+    // get block size
+
+    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
+    if (error)
+        goto out;
+
+    if (ref->vp->v_type == VREG)
+        filelength = va.va_data_size;
+    else
+    {
+        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
+        if (error)
+            goto out;
+	filelength = fileblk * blksize;    
+    }
+
+    f_offset = 0;
+    while (f_offset < filelength) 
+    {
+        if (ref->vp->v_type == VREG)
+        {
+            filechunk = 1*1024*1024*1024;
+            daddr64_t blkno;
+
+            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL);
+            if (error)
+                goto out;
+
+            fileblk = blkno * blksize;
+        }
+        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+        {
+            fileblk = f_offset;
+            filechunk = f_offset ? 0 : filelength;
+        }
+
+        physoffset = 0;
+        while (physoffset < filechunk)
+        {
+            dk_physical_extent_t getphysreq;
+            bzero(&getphysreq, sizeof(getphysreq));
+
+            getphysreq.offset = fileblk + physoffset;
+            getphysreq.length = (filechunk - physoffset);
+            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
+            if (error)
+                goto out;
+            if (!target)
+            {
+                target = getphysreq.dev;
+            }
+            else if (target != getphysreq.dev)
+            {
+                error = ENOTSUP;
+                goto out;
+            }
+            callback(callback_ref, getphysreq.offset, getphysreq.length);
+            physoffset += getphysreq.length;
+        }
+        f_offset += filechunk;
+    }
+    callback(callback_ref, 0ULL, 0ULL);
+
+    if (ref->vp->v_type == VREG)
+        p1 = &target;
 
     // get partition base
 
@@ -226,62 +318,37 @@ kern_open_file_for_direct_io(const char * name,
     if (maxiocount_result)
         *maxiocount_result = maxiocount;
 
-    if (solid_state)
-    {
-        int isssd = 0;
-        error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
-        if (error)
-            *solid_state = FALSE;
-        else
-            *solid_state = isssd;
-    }
-
-    // generate the block list
-
-    error = 0;
-    if (ref->vp->v_type == VREG)
-    {
-	f_offset = 0;
-	while(f_offset < (off_t) va.va_data_size) 
-	{
-	    size_t io_size = 1*1024*1024*1024;
-	    daddr64_t blkno;
-
-	    error = VNOP_BLOCKMAP(ref->vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL, 0, NULL);
-	    if (error)
-		goto out;
-	    callback(callback_ref, ((uint64_t) blkno) * blksize, (uint64_t) io_size);
-	    f_offset += io_size;
-	}
-	callback(callback_ref, 0ULL, 0ULL);
-    }
-    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
-    {
-        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &size);
-        if (error)
-            goto out;
-	size *= blksize;
-	callback(callback_ref, 0ULL, size);
-	callback(callback_ref, size, 0ULL);
-    }
+    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
+    if (!error && isssd)
+        flags |= kIOHibernateOptionSSD;
 
-    if (device_result)
-        *device_result = device;
+    if (partition_device_result)
+        *partition_device_result = device;
+    if (image_device_result)
+        *image_device_result = target;
+    if (flags)
+        *oflags = flags;
 
 out:
     kprintf("kern_open_file_for_direct_io(%d)\n", error);
 
-    if (error && ref) {
-	if (ref->vp) {
+    if (error && locked)
+    {
+        p1 = &device;
+        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
+    }
+
+    if (error && ref)
+    {
+	if (ref->vp)
+	{
 	    vnode_close(ref->vp, FWRITE, ref->ctx);
 	    ref->vp = NULLVP;
 	}
-
 	vfs_context_rele(ref->ctx);
 	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
 	ref = NULL;
     }
-
     return(ref);
 }
 
@@ -296,21 +363,47 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad
 }
 
 void
-kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref)
+kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
+			      off_t offset, caddr_t addr, vm_size_t len)
 {
+    int error;
     kprintf("kern_close_file_for_direct_io\n");
 
-    if (ref) {
-	int                error;
+    if (!ref) return;
 
-	if (ref->vp) {
-	    error = vnode_close(ref->vp, FWRITE, ref->ctx);
-	    ref->vp = NULLVP;
-	    kprintf("vnode_close(%d)\n", error);
-	}
-	vfs_context_rele(ref->ctx);
-	ref->ctx = NULL;
-	kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
+    if (ref->vp)
+    {
+        int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
+        void * p1;
+        void * p2;
+
+        if (ref->vp->v_type == VREG)
+        {
+            p1 = &ref->device;
+            p2 = kernproc;
+            do_ioctl = &file_ioctl;
+        }
+        else
+        {
+            /* Partition. */
+            p1 = ref->vp;
+            p2 = ref->ctx;
+            do_ioctl = &device_ioctl;
+        }
+        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
+        
+        if (addr && len)
+        {
+            (void) kern_write_file(ref, offset, addr, len);
+        }
+
+        error = vnode_close(ref->vp, FWRITE, ref->ctx);
+
+        ref->vp = NULLVP;
+        kprintf("vnode_close(%d)\n", error);
     }
+    vfs_context_rele(ref->ctx);
+    ref->ctx = NULL;
+    kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
 }
 
diff --git a/bsd/kern/kern_synch.c b/bsd/kern/kern_synch.c
index 68a45824e..c6b4888c3 100644
--- a/bsd/kern/kern_synch.c
+++ b/bsd/kern/kern_synch.c
@@ -162,7 +162,7 @@ _sleep(
 	struct proc *p;
 	thread_t self = current_thread();
 	struct uthread * ut;
-	int sig, catch = pri & PCATCH;
+	int sig, catch;
 	int dropmutex  = pri & PDROP;
 	int spinmutex  = pri & PSPIN;
 	int wait_result;
@@ -175,26 +175,39 @@ _sleep(
 	/* It can still block in proc_exit() after the teardown. */
 	if (p->p_stats != NULL)
 		OSIncrementAtomicLong(&p->p_stats->p_ru.ru_nvcsw);
+	
+	if (pri & PCATCH)
+		catch = THREAD_ABORTSAFE;
+	else
+		catch = THREAD_UNINT;
 
 	/* set wait message & channel */
 	ut->uu_wchan = chan;
 	ut->uu_wmesg = wmsg ? wmsg : "unknown";
 
 	if (mtx != NULL && chan != NULL && (thread_continue_t)continuation == THREAD_CONTINUE_NULL) {
+		int	flags;
+
+		if (dropmutex)
+			flags = LCK_SLEEP_UNLOCK;
+		else
+			flags = LCK_SLEEP_DEFAULT;
+
+		if (spinmutex)
+			flags |= LCK_SLEEP_SPIN;
 
 		if (abstime)
-			wait_result = lck_mtx_sleep_deadline(mtx, (dropmutex) ? LCK_SLEEP_UNLOCK : 0,
-							     chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT, abstime);
+			wait_result = lck_mtx_sleep_deadline(mtx, flags, chan, catch, abstime);
 		else
-			wait_result = lck_mtx_sleep(mtx, (dropmutex) ? LCK_SLEEP_UNLOCK : 0,
-							     chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT);
+			wait_result = lck_mtx_sleep(mtx, flags, chan, catch);
 	}
 	else {
 		if (chan != NULL)
-			assert_wait_deadline(chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT, abstime);
+			assert_wait_deadline(chan, catch, abstime);
 		if (mtx)
 			lck_mtx_unlock(mtx);
-		if (catch) {
+
+		if (catch == THREAD_ABORTSAFE) {
 			if (SHOULDissignal(p,ut)) {
 				if ((sig = CURSIG(p)) != 0) {
 					if (clear_wait(self, THREAD_INTERRUPTED) == KERN_FAILURE)
@@ -258,11 +271,11 @@ block:
 			 * first, regardless of whether awakened due
 			 * to receiving event.
 			 */
-			if (!catch)
+			if (catch != THREAD_ABORTSAFE)
 				break;
 			/* else fall through */
 		case THREAD_INTERRUPTED:
-			if (catch) {
+			if (catch == THREAD_ABORTSAFE) {
 				if (thread_should_abort(self)) {
 					error = EINTR;
 				} else if (SHOULDissignal(p, ut)) {
@@ -392,7 +405,7 @@ tsleep1(
 void
 wakeup(void *chan)
 {
-	thread_wakeup_prim((caddr_t)chan, FALSE, THREAD_AWAKENED);
+	thread_wakeup((caddr_t)chan);
 }
 
 /*
@@ -404,7 +417,7 @@ wakeup(void *chan)
 void
 wakeup_one(caddr_t chan)
 {
-	thread_wakeup_prim((caddr_t)chan, TRUE, THREAD_AWAKENED);
+	thread_wakeup_one((caddr_t)chan);
 }
 
 /*
diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c
index 842a3e572..f2c9c8711 100644
--- a/bsd/kern/kern_sysctl.c
+++ b/bsd/kern/kern_sysctl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -107,11 +107,13 @@
 #include <kern/kalloc.h>
 
 #include <mach/machine.h>
+#include <mach/mach_host.h>
 #include <mach/mach_types.h>
 #include <mach/vm_param.h>
 #include <kern/mach_param.h>
 #include <kern/task.h>
 #include <kern/lock.h>
+#include <kern/processor.h>
 #include <kern/debug.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
@@ -128,15 +130,12 @@
 #include <machine/exec.h>
 
 #include <vm/vm_protos.h>
+#include <sys/imgsrc.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 #include <i386/cpuid.h>
 #endif
 
-sysctlfn kern_sysctl;
-#if DEBUG
-sysctlfn debug_sysctl;
-#endif
 extern sysctlfn net_sysctl;
 extern sysctlfn cpu_sysctl;
 extern int aio_max_requests;  				
@@ -146,25 +145,47 @@ extern int lowpri_IO_window_msecs;
 extern int lowpri_IO_delay_msecs;
 extern int nx_enabled;
 extern int speculative_reads_disabled;
+extern int ignore_is_ssd;
+extern unsigned int speculative_prefetch_max;
 extern unsigned int preheat_pages_max;
 extern unsigned int preheat_pages_min;
-extern unsigned int preheat_pages_mult;
 extern long numvnodes;
 
-static void
+extern unsigned int vm_max_delayed_work_limit;
+extern unsigned int vm_max_batch;
+
+extern unsigned int vm_page_free_min;
+extern unsigned int vm_page_free_target;
+extern unsigned int vm_page_free_reserved;
+extern unsigned int vm_page_speculative_percentage;
+extern unsigned int vm_page_speculative_q_age_ms;
+
+/*
+ * Conditionally allow dtrace to see these functions for debugging purposes.
+ */
+#ifdef STATIC
+#undef STATIC
+#endif
+#if 0
+#define STATIC
+#else
+#define STATIC static
+#endif
+
+extern boolean_t    mach_timer_coalescing_enabled;
+
+STATIC void
 fill_user32_eproc(proc_t p, struct user32_eproc *ep);
-static void
+STATIC void
 fill_user32_externproc(proc_t p, struct user32_extern_proc *exp);
-static void
+STATIC void
 fill_user64_eproc(proc_t p, struct user64_eproc *ep);
-static void
+STATIC void
 fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp);
-static void
+STATIC void
 fill_user64_externproc(proc_t p, struct user64_extern_proc *exp);
 extern int 
 kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep);
-int
-kdebug_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep, proc_t p);
 #if NFSCLIENT
 extern int 
 netboot_root(void);
@@ -174,41 +195,94 @@ pcsamples_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep,
               proc_t p);
 __private_extern__ kern_return_t
 reset_vmobjectcache(unsigned int val1, unsigned int val2);
-int
-sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep);
-int 
-sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
-			  user_addr_t newp, size_t newlen);
-static void
+STATIC void
 fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp);
 int
 sysctl_procargs(int *name, u_int namelen, user_addr_t where, 
 				size_t *sizep, proc_t cur_proc);
-static int
-sysctl_procargs2(int *name, u_int namelen, user_addr_t where, size_t *sizep, 
-                 proc_t cur_proc);
-static int
+STATIC int
 sysctl_procargsx(int *name, u_int namelen, user_addr_t where, size_t *sizep, 
                  proc_t cur_proc, int argc_yes);
 int
 sysctl_struct(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, 
               size_t newlen, void *sp, int len);
 
-static int sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg);
-static int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg);
-static int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg);
-static int  sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg);
-static int  sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg);
+STATIC int sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg);
+STATIC int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg);
+STATIC int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg);
+STATIC int  sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg);
+STATIC int  sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg);
 #if CONFIG_LCTX
-static int  sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg);
+STATIC int  sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg);
 #endif
 int sysdoproc_callback(proc_t p, void *arg);
 
-static int __sysctl_funneled(proc_t p, struct __sysctl_args *uap, int32_t *retval);
+
+/* forward declarations for non-static STATIC */
+STATIC void fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64);
+STATIC void fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32);
+STATIC int sysctl_handle_exec_archhandler_ppc(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_handle_kern_threadname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sched_stats(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sched_stats_enable(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_file(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_kdebug_ops SYSCTL_HANDLER_ARGS;
+STATIC int sysctl_dotranslate SYSCTL_HANDLER_ARGS;
+STATIC int sysctl_doaffinity SYSCTL_HANDLER_ARGS;
+#if COUNT_SYSCALLS
+STATIC int sysctl_docountsyscalls SYSCTL_HANDLER_ARGS;
+#endif	/* COUNT_SYSCALLS */
+#if !CONFIG_EMBEDDED
+STATIC int sysctl_doprocargs SYSCTL_HANDLER_ARGS;
+#endif	/* !CONFIG_EMBEDDED */
+STATIC int sysctl_doprocargs2 SYSCTL_HANDLER_ARGS;
+STATIC int sysctl_prochandle SYSCTL_HANDLER_ARGS;
+#if DEBUG
+STATIC int sysctl_dodebug SYSCTL_HANDLER_ARGS;
+#endif
+STATIC int sysctl_aiomax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_aioprocmax(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_aiothreads(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_maxproc(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_osversion(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_bootargs(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_maxvnodes(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_securelvl(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_domainname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_hostname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_procname(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_boottime(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_symfile(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+#if NFSCLIENT
+STATIC int sysctl_netboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+#endif
+#ifdef CONFIG_IMGSRC_ACCESS
+STATIC int sysctl_imgsrcdev(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+#endif
+STATIC int sysctl_usrstack(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_usrstack64(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_coredump(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_suid_coredump(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_delayterm(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_rage_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_kern_check_openevt(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_nx(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_loadavg(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_vm_toggle_address_reuse(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_swapusage(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+#if defined(__i386__) || defined(__x86_64__)
+STATIC int sysctl_sysctl_exec_affinity(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+#endif
+STATIC int fetch_process_cputype( proc_t cur_proc, int *name, u_int namelen, cpu_type_t *cputype);
+STATIC int sysctl_sysctl_native(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_sysctl_cputype(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_safeboot(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+STATIC int sysctl_singleuser(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
+
 
 extern void IORegistrySetOSBuildVersion(char * build_version); 
 
-static void
+STATIC void
 fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64)
 {
 	la64->ldavg[0]	= la->ldavg[0];
@@ -217,7 +291,7 @@ fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64)
 	la64->fscale	= (user64_long_t)la->fscale;
 }
 
-static void
+STATIC void
 fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32)
 {
 	la32->ldavg[0]	= la->ldavg[0];
@@ -226,34 +300,76 @@ fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32)
 	la32->fscale	= (user32_long_t)la->fscale;
 }
 
+/*
+ * sysctl_mem_hold
+ *
+ * Description:	Wire down the callers address map on behalf of sysctl's
+ *		that perform their own copy operations while holding
+ *		locks e.g. in the paging path, which could lead to a
+ *		deadlock, or while holding a spinlock.
+ *
+ * Parameters:	addr			User buffer address
+ *		len			User buffer length
+ *
+ * Returns:	0			Success
+ *	vslock:ENOMEM			Insufficient physical pages to wire
+ *	vslock:EACCES			Bad protection mode
+ *	vslock:EINVAL			Invalid parameters
+ *
+ * Notes:	This code is invoked for the first OID element where the
+ *		CTLFLAG_LOCKED is not specified for a given OID node
+ *		element durng OID traversal, and is held for all
+ *		subsequent node traversals, and only released after the
+ *		leaf node handler invocation is complete.
+ *
+ * Legacy:	For legacy scyctl's provided by third party code which
+ *		expect funnel protection for calls into their code, this
+ *		routine will also take the funnel, which will also only
+ *		be released after the leaf node handler is complete.
+ *
+ *		This is to support legacy 32 bit BSD KEXTs and legacy 32
+ *		bit single threaded filesystem KEXTs and similar code
+ *		which relies on funnel protection, e.g. for things like
+ *		FSID based sysctl's.
+ *
+ *		NEW CODE SHOULD NOT RELY ON THIS BEHAVIOUR!  IT WILL BE
+ *		REMOVED IN A FUTURE RELASE OF Mac OS X!
+ *
+ * Bugs:	This routine does nothing with the new_addr and new_len
+ *		at present, but it should, since read from the user space
+ *		process adddress space which could potentially trigger
+ *		paging may also be occurring deep down.  This is due to
+ *		a current limitation of the vslock() routine, which will
+ *		always request a wired mapping be read/write, due to not
+ *		taking an access mode parameter.  Note that this could
+ *		also cause problems for output on architectures where
+ *		write access does not require read acccess if the current
+ *		mapping lacks read access.
+ *
+ * XXX:		To be moved to kern_newsysctl.c to avoid __private_extern__
+ */
+int sysctl_mem_lock(user_addr_t old_addr, user_size_t old_len, user_addr_t new_addr, user_size_t new_len);
+int
+sysctl_mem_lock(__unused user_addr_t old_addr, __unused user_size_t old_len, __unused user_addr_t new_addr, __unused user_size_t new_len)
+{
+	return 0;
+}
+
 /*
  * Locking and stats
  */
-static struct sysctl_lock memlock;
 
 /* sysctl() syscall */
 int
-__sysctl(proc_t p, struct __sysctl_args *uap, int32_t *retval)
+__sysctl(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval)
 {
-	boolean_t funnel_state;
+	boolean_t funnel_state = FALSE;		/* not held if unknown */
 	int error;
-
-	funnel_state = thread_funnel_set(kernel_flock, TRUE);
-	error = __sysctl_funneled(p, uap, retval);
-	thread_funnel_set(kernel_flock, funnel_state);
-	return(error);
-}
-
-static int
-__sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval)
-{
-	int error, dolock = 1;
 	size_t savelen = 0, oldlen = 0, newlen;
-	sysctlfn *fnp = NULL;
 	int name[CTL_MAXNAME];
 	int error1;
-	boolean_t memlock_taken = FALSE;
 	boolean_t vslock_taken = FALSE;
+	boolean_t funnel_taken = FALSE;
 #if CONFIG_MACF
 	kauth_cred_t my_cred;
 #endif
@@ -279,38 +395,49 @@ __sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval)
 	else {
 		newlen = uap->newlen;
 	}
-	
+
+/*
+ * XXX TODO:	push down rights check for CTL_HW OIDs; most duplicate
+ * XXX		it anyway, which is a performance sink, and requires use
+ * XXX		of SUID root programs (see <rdar://3915692>).
+ *
+ * Note:	Opt out of non-leaf node enforcement by removing this
+ *		check for the top level OID value, and then adding
+ *		CTLFLAG_ANYBODY to the leaf nodes in question.  Enforce as
+ *		suser for writed in leaf nodes by omitting this flag.
+ *		Enforce with a higher granularity by making the leaf node
+ *		of type SYSCTL_PROC() in order to provide a procedural
+ *		enforcement call site.
+ *
+ * NOTE:	This function is called prior to any subfunctions being
+ *		called with a fallback to userland_sysctl(); as such, this
+ *		permissions check here will veto the fallback operation.
+ */
 	/* CTL_UNSPEC is used to get oid to AUTO_OID */
 	if (uap->new != USER_ADDR_NULL
-	    && ((name[0] == CTL_KERN
-		&& !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO || name[1] == KERN_PROCDELAYTERM || 
-		     name[1] == KERN_PROCNAME || name[1] == KERN_RAGEVNODE || name[1] == KERN_CHECKOPENEVT || name[1] == KERN_THREADNAME))
-	    || (name[0] == CTL_HW)
+	    && ((name[0] == CTL_HW)
 	    || (name[0] == CTL_VM))
 	    && (error = suser(kauth_cred_get(), &p->p_acflag)))
 		return (error);
 
-/* XXX: KERN, VFS and DEBUG are handled by their respective functions,
- * but there is a fallback for all sysctls other than VFS to
- * userland_sysctl() - KILL THIS! */
-	switch (name[0]) {
-	case CTL_KERN:
-		fnp = kern_sysctl;
-		if ((name[1] != KERN_VNODE) && (name[1] != KERN_FILE) 
-			&& (name[1] != KERN_PROC))
-			dolock = 0;
-		break;
-	case CTL_VFS:
-		fnp = vfs_sysctl;
-		break;
-#if DEBUG
-	case CTL_DEBUG:
-		fnp = debug_sysctl;
-		break;
+// XXX need to relocate into each terminal instead of leaving this here...
+// XXX macf preemptory check.
+#if CONFIG_MACF
+	my_cred = kauth_cred_proc_ref(p);
+	error = mac_system_check_sysctl(
+	    my_cred, 
+	    (int *) name,
+	    uap->namelen,
+  	    uap->old,
+	    uap->oldlenp,
+	    0,		/* XXX 1 for CTL_KERN checks */
+	    uap->new,
+	    newlen
+   	);
+	kauth_cred_unref(&my_cred);
+	if (error)
+		return (error);
 #endif
-	default:
-		fnp = NULL;
-	}
 
 	if (uap->oldlenp != USER_ADDR_NULL) {
 		uint64_t	oldlen64 = fuulong(uap->oldlenp);
@@ -324,79 +451,82 @@ __sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval)
 			oldlen = 0xffffffffUL;
 	}
 
-	if (uap->old != USER_ADDR_NULL) {
-		if (!useracc(uap->old, (user_size_t)oldlen, B_WRITE))
-			return (EFAULT);
+	if ((name[0] == CTL_VFS || name[0] == CTL_VM)) {
 		/*
-		 * The kernel debug mechanism does not need to take this lock, and
-		 * we don't grab the memlock around calls to KERN_PROC because it is reentrant.
-		 * Grabbing the lock for a KERN_PROC sysctl makes a deadlock possible 5024049.
+		 * Always take the funnel for CTL_VFS and CTL_VM
+		 *
+		 * XXX We should also take it for any OID without the
+		 * XXX CTLFLAG_LOCKED set on it; fix this later!
+		 */
+		funnel_state = thread_funnel_set(kernel_flock, TRUE);
+		funnel_taken = TRUE;
+
+		/*
+		 * XXX Take the vslock() only when we are copying out; this
+		 * XXX erroneously assumes that the copy in will not cause
+		 * XXX a fault if caled from the paging path due to the
+		 * XXX having been recently touched in order to establish
+		 * XXX the input data.  This is a bad assumption.
+		 *
+		 * Note:	This is overkill, but third parties might
+		 *		already call sysctl internally in KEXTs that
+		 *		implement mass storage drivers.  If you are
+		 *		writing a new KEXT, don't do that.
 		 */
-		if (!((name[1] == KERN_KDEBUG) && (name[2] == KERN_KDGETENTROPY)) &&
-		    !(name[1] == KERN_PROC)) {
-		        MEMLOCK_LOCK();
-			memlock_taken = TRUE;
-                }
-
-		if (dolock && oldlen) {
-		        if ((error = vslock(uap->old, (user_size_t)oldlen))) {
-			        if (memlock_taken == TRUE)
-				        MEMLOCK_UNLOCK();
-				return(error);
+		if(uap->old != USER_ADDR_NULL) {
+			if (!useracc(uap->old, (user_size_t)oldlen, B_WRITE)) {
+				thread_funnel_set(kernel_flock, funnel_state);
+				return (EFAULT);
+			}
+
+			if (oldlen) {
+				if ((error = vslock(uap->old, (user_size_t)oldlen))) {
+					thread_funnel_set(kernel_flock, funnel_state);
+					return(error);
+				}
+				savelen = oldlen;
+				vslock_taken = TRUE;
 			}
-			savelen = oldlen;
-			vslock_taken = TRUE;
 		}
 	}
 
-#if CONFIG_MACF
-	my_cred = kauth_cred_proc_ref(p);
-	error = mac_system_check_sysctl(
-	    my_cred, 
-	    (int *) name,
-	    uap->namelen,
-  	    uap->old,
-	    uap->oldlenp,
-	    fnp == kern_sysctl ? 1 : 0,
-	    uap->new,
-	    newlen
-   	);
-	kauth_cred_unref(&my_cred);
-	if (!error) {
-#endif
-	if (fnp) {
-	        error = (*fnp)(name + 1, uap->namelen - 1, uap->old,
+	/*
+	 * XXX convert vfs_sysctl subelements to newsysctl; this is hard
+	 * XXX because of VFS_NUMMNTOPS being top level.
+	 */
+	error = ENOTSUP;
+	if (name[0] == CTL_VFS) {
+	        error = vfs_sysctl(name + 1, uap->namelen - 1, uap->old,
                        &oldlen, uap->new, newlen, p);
 	}
-	else
-	        error = ENOTSUP;
-#if CONFIG_MACF
-	}
-#endif
 
 	if (vslock_taken == TRUE) {
 	        error1 = vsunlock(uap->old, (user_size_t)savelen, B_WRITE);
 		if (!error)
 		        error = error1;
         }
-	if (memlock_taken == TRUE)
-	        MEMLOCK_UNLOCK();
 
-	if ( (name[0] != CTL_VFS) && (error == ENOTSUP)) {
-	        size_t  tmp = oldlen;
-		boolean_t funnel_state;
-
-		/*
-		 * Drop the funnel when calling new sysctl code, which will conditionally
-		 * grab the funnel if it really needs to.
-		 */
-		funnel_state = thread_funnel_set(kernel_flock, FALSE);
-		
+	if ( (name[0] != CTL_VFS) && (error == ENOTSUP) ) {
+		size_t	tmp = oldlen;
 		error = userland_sysctl(p, name, uap->namelen, uap->old, &tmp, 
 		                        uap->new, newlen, &oldlen);
+	}
 
+	/*
+	 * If we took the funnel, which we only do for CTL_VFS and CTL_VM on
+	 * 32 bit architectures, then drop it.
+	 *
+	 * XXX the grabbing and dropping need to move into the leaf nodes,
+	 * XXX for sysctl's that are not marked CTLFLAG_LOCKED, but this is
+	 * XXX true for the vslock, as well.  We have a start at a routine
+	 * to wrapper this (above), but it's not turned on.  The current code
+	 * removed the funnel and the vslock() from all but these two top
+	 * level OIDs.  Note that VFS only needs to take the funnel if the FS
+	 * against which it's operating is not thread safe (but since an FS
+	 * can be in the paging path, it still needs to take the vslock()).
+	 */
+	if (funnel_taken)
 		thread_funnel_set(kernel_flock, funnel_state);
-	}
 
 	if ((error) && (error != ENOMEM))
 		return (error);
@@ -424,21 +554,26 @@ int securelevel = -1;
 int securelevel;
 #endif
 
-static int
-sysctl_affinity(
-	int *name,
-	u_int namelen,
-	user_addr_t oldBuf,
-	size_t *oldSize,
-	user_addr_t newBuf,
-	__unused size_t newSize,
-	proc_t cur_proc)
+STATIC int
+sysctl_doaffinity SYSCTL_HANDLER_ARGS
 {
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+//	size_t newlen = req->newlen;	/* user buffer copy in size */
+
+	int error = ENOTSUP;		/* Default to failure */
+
+	proc_t cur_proc = current_proc();
+
 	if (namelen < 1)
 		return (ENOTSUP);
 
 	if (name[0] == 0 && 1 == namelen) {
-		return sysctl_rdint(oldBuf, oldSize, newBuf,
+		error = sysctl_rdint(oldp, oldlenp, newp,
 			                (cur_proc->p_flag & P_AFFINITY) ? 1 : 0);
 	} else if (name[0] == 1 && 2 == namelen) {
 		if (name[1] == 0) {
@@ -446,21 +581,35 @@ sysctl_affinity(
 		} else {
 			OSBitOrAtomic(P_AFFINITY, &cur_proc->p_flag);
 		}
-		return 0;
+		error =  0;
 	}
-	return (ENOTSUP);
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+SYSCTL_PROC(_kern, KERN_AFFINITY, affinity, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_doaffinity,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
+
+STATIC int
+sysctl_dotranslate SYSCTL_HANDLER_ARGS
+{
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+//	size_t newlen = req->newlen;	/* user buffer copy in size */
+	int error;
 
-static int
-sysctl_translate(
-	int *name,
-	u_int namelen,
-	user_addr_t oldBuf,
-	size_t *oldSize,
-	user_addr_t newBuf,
-	__unused size_t newSize,
-	proc_t cur_proc)
-{
+	proc_t cur_proc = current_proc();
 	proc_t p;
 	int istranslated = 0;
 	kauth_cred_t my_cred;
@@ -484,9 +633,25 @@ sysctl_translate(
 
 	istranslated = (p->p_flag & P_TRANSLATED);
 	proc_rele(p);
-	return sysctl_rdint(oldBuf, oldSize, newBuf,
+	error =  sysctl_rdint(oldp, oldlenp, newp,
 		                (istranslated != 0) ? 1 : 0);
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+/*
+ * XXX make CTLFLAG_RW so sysctl_rdint() will EPERM on attempts to write;
+ * XXX this may not be necessary.
+ */
+SYSCTL_PROC(_kern, KERN_TRANSLATE, translate, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_dotranslate,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
 
 int
 set_archhandler(__unused proc_t p, int arch)
@@ -505,7 +670,7 @@ set_archhandler(__unused proc_t p, int arch)
 		return (EBADARCH);
 	}
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+	NDINIT(&nd, LOOKUP, OP_GETATTR, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 		   CAST_USER_ADDR_T(archhandler->path), ctx);
 	error = namei(&nd);
 	if (error)
@@ -530,63 +695,20 @@ set_archhandler(__unused proc_t p, int arch)
 	vnode_put(nd.ni_vp);
 	
 	archhandler->fsid = va.va_fsid;
-	archhandler->fileid = (u_int32_t)va.va_fileid;
+	archhandler->fileid = va.va_fileid;
 	return 0;
 }
 
-/* XXX remove once Rosetta is rev'ed */
-/*****************************************************************************/
-static int
-sysctl_exec_archhandler_ppc(
-	__unused int *name,
-	__unused u_int namelen,
-	user_addr_t oldBuf,
-	size_t *oldSize,
-	user_addr_t newBuf,
-	size_t newSize,
-	proc_t p)
-{
-	int error;
-	size_t len;
-	char handler[sizeof(exec_archhandler_ppc.path)];
-	vfs_context_t ctx = vfs_context_current();
 
-	if (oldSize) {
-		len = strlen(exec_archhandler_ppc.path) + 1;
-		if (oldBuf) {
-			if (*oldSize < len)
-				return (ENOMEM);
-			error = copyout(exec_archhandler_ppc.path, oldBuf, len);
-			if (error)
-				return (error);
-		}
-		*oldSize = len - 1;
-	}
-	if (newBuf) {
-		error = suser(vfs_context_ucred(ctx), &p->p_acflag);
-		if (error)
-			return (error);
-		if (newSize >= sizeof(exec_archhandler_ppc.path))
-			return (ENAMETOOLONG);
-		error = copyin(newBuf, handler, newSize);
-		if (error)
-			return (error);
-		handler[newSize] = 0;
-		strlcpy(exec_archhandler_ppc.path, handler, MAXPATHLEN);
-		error = set_archhandler(p, CPU_TYPE_POWERPC);
-		if (error)
-			return (error);
-	}
-	return 0;
-}
-/*****************************************************************************/
-
-static int
+STATIC int
 sysctl_handle_exec_archhandler_ppc(struct sysctl_oid *oidp, void *arg1,
 		int arg2, struct sysctl_req *req)
 {
 	int error = 0;
 
+	if (req->newptr && !kauth_cred_issuser(kauth_cred_get()))
+		return (EPERM);
+
 	error = sysctl_handle_string(oidp, arg1, arg2, req);
 
 	if (error)
@@ -600,7 +722,7 @@ done:
 
 }
 
-static int
+STATIC int
 sysctl_handle_kern_threadname(	__unused struct sysctl_oid *oidp, __unused void *arg1,
 	      __unused int arg2, struct sysctl_req *req)
 {
@@ -657,133 +779,153 @@ sysctl_handle_kern_threadname(	__unused struct sysctl_oid *oidp, __unused void *
 	return 0;
 }
 
-SYSCTL_PROC(_kern, KERN_THREADNAME, threadname, CTLFLAG_ANYBODY | CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_handle_kern_threadname,"A","");
+SYSCTL_PROC(_kern, KERN_THREADNAME, threadname, CTLFLAG_ANYBODY | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_threadname,"A","");
 
 SYSCTL_NODE(_kern, KERN_EXEC, exec, CTLFLAG_RD|CTLFLAG_LOCKED, 0, "");
 
 SYSCTL_NODE(_kern_exec, OID_AUTO, archhandler, CTLFLAG_RD|CTLFLAG_LOCKED, 0, "");
 
 SYSCTL_PROC(_kern_exec_archhandler, OID_AUTO, powerpc,
-	    CTLTYPE_STRING | CTLFLAG_RW, exec_archhandler_ppc.path, 0,
-	    sysctl_handle_exec_archhandler_ppc, "A", "");
+			CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
+			exec_archhandler_ppc.path,
+			sizeof(exec_archhandler_ppc.path),
+			sysctl_handle_exec_archhandler_ppc, "A", "");
+
+#define BSD_HOST 1
+STATIC int
+sysctl_sched_stats(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+	host_basic_info_data_t hinfo;
+	kern_return_t kret;
+	uint32_t size;
+	int changed;
+	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+	struct _processor_statistics_np *buf;
+	int error;
 
-extern int get_kernel_symfile(proc_t, char **);
-__private_extern__ int 
-sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t, 
-                   size_t, proc_t);
+	kret = host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
+	if (kret != KERN_SUCCESS) {
+		return EINVAL;
+	}
 
-/*
- * kernel related system variables.
- */
-int
-kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
-            user_addr_t newp, size_t newlen, proc_t p)
-{
-	/* all sysctl names not listed below are terminal at this level */
-	if (namelen != 1
-		&& !(name[0] == KERN_PROC
-			|| name[0] == KERN_PROF 
-			|| name[0] == KERN_KDEBUG
-#if !CONFIG_EMBEDDED
-			|| name[0] == KERN_PROCARGS
-#endif
-			|| name[0] == KERN_PROCARGS2
-			|| name[0] == KERN_IPC
-			|| name[0] == KERN_SYSV
-			|| name[0] == KERN_AFFINITY
-			|| name[0] == KERN_TRANSLATE
-			|| name[0] == KERN_EXEC
-			|| name[0] == KERN_PANICINFO
-			|| name[0] == KERN_POSIX
-			|| name[0] == KERN_TFP
-			|| name[0] == KERN_TTY
-#if CONFIG_LCTX
-			|| name[0] == KERN_LCTX
-#endif
-						)
-		)
-		return (ENOTDIR);		/* overloaded */
+	size = sizeof(struct _processor_statistics_np) * (hinfo.logical_cpu_max + 2); /* One for RT Queue, One for Fair Share Queue */
+	
+	if (req->oldlen < size) {
+		return EINVAL;
+	}
+
+	MALLOC(buf, struct _processor_statistics_np*, size, M_TEMP, M_ZERO | M_WAITOK);
+	
+	kret = get_sched_statistics(buf, &size);
+	if (kret != KERN_SUCCESS) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = sysctl_io_opaque(req, buf, size, &changed);
+	if (error) {
+		goto out;
+	}
+
+	if (changed) {
+		panic("Sched info changed?!");
+	}
+out:
+	FREE(buf, M_TEMP);
+	return error;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, sched_stats, CTLFLAG_LOCKED, 0, 0, sysctl_sched_stats, "-", "");
+
+STATIC int
+sysctl_sched_stats_enable(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, __unused struct sysctl_req *req)
+{
+	boolean_t active;
+	int res;
+
+	if (req->newlen != sizeof(active)) {
+		return EINVAL;
+	}
+
+	res = copyin(req->newptr, &active, sizeof(active));
+	if (res != 0) {
+		return res;
+	}
+
+	return set_sched_stats_active(active);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, sched_stats_enable, CTLFLAG_LOCKED | CTLFLAG_WR, 0, 0, sysctl_sched_stats_enable, "-", "");
+
+extern int get_kernel_symfile(proc_t, char **);
 
-	switch (name[0]) {
-	case KERN_PROC:
-		return (sysctl_doproc(name + 1, namelen - 1, oldp, oldlenp));
-#ifdef GPROF
-	case KERN_PROF:
-		return (sysctl_doprof(name + 1, namelen - 1, oldp, oldlenp,
-		    newp, newlen));
-#endif
-	case KERN_KDEBUG:
-		return (kdebug_ops(name + 1, namelen - 1, oldp, oldlenp, p));
-#if !CONFIG_EMBEDDED
-	case KERN_PROCARGS:
-		/* new one as it does not use kinfo_proc */
-		return (sysctl_procargs(name + 1, namelen - 1, oldp, oldlenp, p));
-#endif
-	case KERN_PROCARGS2:
-		/* new one as it does not use kinfo_proc */
-		return (sysctl_procargs2(name + 1, namelen - 1, oldp, oldlenp, p));
-#if PANIC_INFO
-	case KERN_PANICINFO:
-		return(sysctl_dopanicinfo(name + 1, namelen - 1, oldp, oldlenp,
-			newp, newlen, p));
-#endif
-	case KERN_AFFINITY:
-		return sysctl_affinity(name+1, namelen-1, oldp, oldlenp,
-									newp, newlen, p);
-	case KERN_TRANSLATE:
-		return sysctl_translate(name+1, namelen-1, oldp, oldlenp, newp,
-				      newlen, p);
-
-		/* XXX remove once Rosetta has rev'ed */
-	case KERN_EXEC:
-		return sysctl_exec_archhandler_ppc(name+1, namelen-1, oldp,
-						   oldlenp, newp, newlen, p);
 #if COUNT_SYSCALLS
-	case KERN_COUNT_SYSCALLS:
-	{
-		/* valid values passed in:
-		 * = 0 means don't keep called counts for each bsd syscall
-		 * > 0 means keep called counts for each bsd syscall
-		 * = 2 means dump current counts to the system log
-		 * = 3 means reset all counts
-		 * for example, to dump current counts:  
-		 *		sysctl -w kern.count_calls=2
-		 */
-		error = sysctl_int(oldp, oldlenp, newp, newlen, &tmp);
-		if ( error != 0 ) {
-			return (error);
-		}
-			
-		if ( tmp == 1 ) {
-			do_count_syscalls = 1;
-		}
-		else if ( tmp == 0 || tmp == 2 || tmp == 3 ) {
-			extern int 			nsysent;
-			extern int			syscalls_log[];
-			extern const char *	syscallnames[];
-			int			i;
-			for ( i = 0; i < nsysent; i++ ) {
-				if ( syscalls_log[i] != 0 ) {
-					if ( tmp == 2 ) {
-						printf("%d calls - name %s \n", syscalls_log[i], syscallnames[i]);
-					}
-					else {
-						syscalls_log[i] = 0;
-					}
+#define KERN_COUNT_SYSCALLS (KERN_OSTYPE + 1000)
+
+extern int 	nsysent;
+extern int syscalls_log[];
+extern const char *syscallnames[];
+
+STATIC int
+sysctl_docountsyscalls SYSCTL_HANDLER_ARGS
+{
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	__unused int *name = arg1;	/* oid element argument vector */
+	__unused int namelen = arg2;	/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+	size_t newlen = req->newlen;	/* user buffer copy in size */
+	int error;
+
+	int tmp;
+
+	/* valid values passed in:
+	 * = 0 means don't keep called counts for each bsd syscall
+	 * > 0 means keep called counts for each bsd syscall
+	 * = 2 means dump current counts to the system log
+	 * = 3 means reset all counts
+	 * for example, to dump current counts:  
+	 *		sysctl -w kern.count_calls=2
+	 */
+	error = sysctl_int(oldp, oldlenp, newp, newlen, &tmp);
+	if ( error != 0 ) {
+		return (error);
+	}
+		
+	if ( tmp == 1 ) {
+		do_count_syscalls = 1;
+	}
+	else if ( tmp == 0 || tmp == 2 || tmp == 3 ) {
+		int			i;
+		for ( i = 0; i < nsysent; i++ ) {
+			if ( syscalls_log[i] != 0 ) {
+				if ( tmp == 2 ) {
+					printf("%d calls - name %s \n", syscalls_log[i], syscallnames[i]);
+				}
+				else {
+					syscalls_log[i] = 0;
 				}
-			}
-			if ( tmp != 0 ) {
-				do_count_syscalls = 1;
 			}
 		}
-		return (0);
-	}
-#endif
-	default:
-		return (ENOTSUP);
+		if ( tmp != 0 ) {
+			do_count_syscalls = 1;
+		}
 	}
-	/* NOTREACHED */
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+SYSCTL_PROC(_kern, KERN_COUNT_SYSCALLS, count_syscalls, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_docountsyscalls,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
+#endif	/* COUNT_SYSCALLS */
 
 #if DEBUG
 /*
@@ -797,36 +939,68 @@ struct ctldebug debug2, debug3, debug4;
 struct ctldebug debug5, debug6, debug7, debug8, debug9;
 struct ctldebug debug10, debug11, debug12, debug13, debug14;
 struct ctldebug debug15, debug16, debug17, debug18, debug19;
-static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
+STATIC struct ctldebug *debugvars[CTL_DEBUG_MAXID] = {
 	&debug0, &debug1, &debug2, &debug3, &debug4,
 	&debug5, &debug6, &debug7, &debug8, &debug9,
 	&debug10, &debug11, &debug12, &debug13, &debug14,
 	&debug15, &debug16, &debug17, &debug18, &debug19,
 };
-int
-debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
-             user_addr_t newp, size_t newlen, __unused proc_t p)
-{
+STATIC int
+sysctl_dodebug SYSCTL_HANDLER_ARGS
+{
+	int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+	size_t newlen = req->newlen;	/* user buffer copy in size */
+	int error;
+
 	struct ctldebug *cdp;
 
 	/* all sysctl names at this level are name and field */
-	if (namelen != 2)
+	if (namelen != 1)
 		return (ENOTSUP);		/* overloaded */
-	if (name[0] < 0 || name[0] >= CTL_DEBUG_MAXID)
+	if (cmd < 0 || cmd >= CTL_DEBUG_MAXID)
 		return (ENOTSUP);
-	cdp = debugvars[name[0]];
+	cdp = debugvars[cmd];
 	if (cdp->debugname == 0)
 		return (ENOTSUP);
-	switch (name[1]) {
+	switch (name[0]) {
 	case CTL_DEBUG_NAME:
-		return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname));
+		error = sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname);
+		break;
 	case CTL_DEBUG_VALUE:
-		return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar));
+		error = sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar);
+		break;
 	default:
-		return (ENOTSUP);
+		error = ENOTSUP;
+		break;
 	}
-	/* NOTREACHED */
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+/*
+ * XXX We mark this RW instead of RD to let sysctl_rdstring() return the
+ * XXX historical error.
+ */
+SYSCTL_PROC(_debug, CTL_DEBUG_NAME, name, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	CTL_DEBUG_NAME,		/* Integer argument (arg2) */
+	sysctl_dodebug,		/* Handler function */
+	NULL,			/* Data pointer */
+	"Debugging");
+SYSCTL_PROC(_debug, CTL_DEBUG_VALUE, value, CTLTYPE_NODE|CTLFLAG_RW | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	CTL_DEBUG_VALUE,	/* Integer argument (arg2) */
+	sysctl_dodebug,		/* Handler function */
+	NULL,			/* Data pointer */
+	"Debugging");
 #endif /* DEBUG */
 
 /*
@@ -1073,7 +1247,7 @@ sysctl_rdstruct(user_addr_t oldp, size_t *oldlenp,
 /*
  * Get file structures.
  */
-static int
+STATIC int
 sysctl_file
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -1118,10 +1292,10 @@ sysctl_file
 }
 
 SYSCTL_PROC(_kern, KERN_FILE, file,
-		CTLTYPE_STRUCT | CTLFLAG_RW,
+		CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_file, "S,filehead", "");
 
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg)
 {
 	if (p->p_pid != (pid_t)*(int*)arg)
@@ -1130,7 +1304,7 @@ sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg)
 		return(1);
 }
 
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg)
 {
 	if (p->p_pgrpid != (pid_t)*(int*)arg)
@@ -1139,7 +1313,7 @@ sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg)
 	  return(1);
 }
 
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg)
 {
 	boolean_t funnel_state;
@@ -1162,7 +1336,7 @@ sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg)
 	return(retval);
 }
 
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg)
 {
 	kauth_cred_t my_cred;
@@ -1181,7 +1355,7 @@ sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg)
 }
 
 
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg)
 {
 	kauth_cred_t my_cred;
@@ -1190,7 +1364,7 @@ sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg)
 	if (p->p_ucred == NULL)
 		return(0);
 	my_cred = kauth_cred_proc_ref(p);
-	ruid = my_cred->cr_ruid;
+	ruid = kauth_cred_getruid(my_cred);
 	kauth_cred_unref(&my_cred);
 
 	if (ruid != (uid_t)*(int*)arg)
@@ -1200,7 +1374,7 @@ sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg)
 }
 
 #if CONFIG_LCTX
-static int
+STATIC int
 sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg)
 {
 	if ((p->p_lctx == NULL) ||
@@ -1263,12 +1437,18 @@ sysdoproc_callback(proc_t p, void * arg)
 	return(PROC_RETURNED);
 }
 
-int
-sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
+SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "");
+STATIC int
+sysctl_prochandle SYSCTL_HANDLER_ARGS
 {
+	int cmd = oidp->oid_arg2;	/* subcommand for multiple nodes */
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t where = req->oldptr;/* user buffer copy out address */
+
 	user_addr_t dp = where;
 	size_t needed = 0;
-	int buflen = where != USER_ADDR_NULL ? *sizep : 0;
+	int buflen = where != USER_ADDR_NULL ? req->oldlen : 0;
 	int error = 0;
 	boolean_t is_64_bit = FALSE;
 	struct user32_kinfo_proc  user32_kproc;
@@ -1281,8 +1461,9 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	int ruidcheck = 0;
 	int ttycheck = 0;
 
-	if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
+	if (namelen != 1 && !(namelen == 0 && cmd == KERN_PROC_ALL))
 		return (EINVAL);
+
 	is_64_bit = proc_is64bit(current_proc()); 
 	if (is_64_bit) {
 		sizeof_kproc = sizeof(user_kproc);
@@ -1294,7 +1475,7 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	}
 
 
-	switch (name[0]) {
+	switch (cmd) {
 
 		case KERN_PROC_PID:
 			filterfn = sysdoproc_filt_KERN_PROC_PID;
@@ -1321,6 +1502,12 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 			filterfn = sysdoproc_filt_KERN_PROC_LCID;
 			break;
 #endif
+		case KERN_PROC_ALL:
+			break;
+
+		default:
+			/* must be kern.proc.<unknown> */
+			return (ENOTSUP);
 	}
 
 	error = 0;
@@ -1334,9 +1521,10 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	args.ruidcheck = ruidcheck;
 	args.ttycheck = ttycheck;
 	args.sizeof_kproc = sizeof_kproc;
-	args.uidval = name[1];
+	if (namelen)
+	args.uidval = name[0];
 
-	proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), sysdoproc_callback, &args, filterfn, &name[1]);
+	proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), sysdoproc_callback, &args, filterfn, name);
 
 	if (error)
 		return(error);
@@ -1345,20 +1533,87 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep)
 	needed = args.needed;
 	
 	if (where != USER_ADDR_NULL) {
-		*sizep = dp - where;
-		if (needed > *sizep)
+		req->oldlen = dp - where;
+		if (needed > req->oldlen)
 			return (ENOMEM);
 	} else {
 		needed += KERN_PROCSLOP;
-		*sizep = needed;
+		req->oldlen = needed;
 	}
+	/* adjust index so we return the right required/consumed amount */
+	req->oldidx += req->oldlen;
 	return (0);
 }
+/*
+ * We specify the subcommand code for multiple nodes as the 'req->arg2' value
+ * in the sysctl declaration itself, which comes into the handler function
+ * as 'oidp->oid_arg2'.
+ *
+ * For these particular sysctls, since they have well known OIDs, we could
+ * have just obtained it from the '((int *)arg1)[0]' parameter, but that would
+ * not demonstrate how to handle multiple sysctls that used OID_AUTO instead
+ * of a well known value with a common handler function.  This is desirable,
+ * because we want well known values to "go away" at some future date.
+ *
+ * It should be noted that the value of '((int *)arg1)[1]' is used for many
+ * an integer parameter to the subcommand for many of these sysctls; we'd
+ * rather have used '((int *)arg1)[0]' for that, or even better, an element
+ * in a structure passed in as the the 'newp' argument to sysctlbyname(3),
+ * and then use leaf-node permissions enforcement, but that would have
+ * necessitated modifying user space code to correspond to the interface
+ * change, and we are striving for binary backward compatibility here; even
+ * though these are SPI, and not intended for use by user space applications
+ * which are not themselves system tools or libraries, some applications
+ * have erroneously used them.
+ */
+SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_ALL,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_PID, pid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_PID,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_TTY, tty, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_TTY,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_PGRP, pgrp, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_PGRP,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_UID, uid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_UID,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_RUID, ruid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_RUID,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+SYSCTL_PROC(_kern_proc, KERN_PROC_LCID, lcid, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	KERN_PROC_LCID,		/* Integer argument (arg2) */
+	sysctl_prochandle,	/* Handler function */
+	NULL,			/* Data is size variant on ILP32/LP64 */
+	"");
+
 
 /*
  * Fill in an eproc structure for the specified process.
  */
-static void
+STATIC void
 fill_user32_eproc(proc_t p, struct user32_eproc *ep)
 {
 	struct tty *tp;
@@ -1396,15 +1651,15 @@ fill_user32_eproc(proc_t p, struct user32_eproc *ep)
 		my_cred = kauth_cred_proc_ref(p);
 
 		/* A fake historical pcred */
-		ep->e_pcred.p_ruid = my_cred->cr_ruid;
-		ep->e_pcred.p_svuid = my_cred->cr_svuid;
-		ep->e_pcred.p_rgid = my_cred->cr_rgid;
-		ep->e_pcred.p_svgid = my_cred->cr_svgid;
+		ep->e_pcred.p_ruid = kauth_cred_getruid(my_cred);
+		ep->e_pcred.p_svuid = kauth_cred_getsvuid(my_cred);
+		ep->e_pcred.p_rgid = kauth_cred_getrgid(my_cred);
+		ep->e_pcred.p_svgid = kauth_cred_getsvgid(my_cred);
 		/* A fake historical *kauth_cred_t */
 		ep->e_ucred.cr_ref = my_cred->cr_ref;
 		ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred);
-		ep->e_ucred.cr_ngroups = my_cred->cr_ngroups;
-		bcopy(my_cred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
+		ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups;
+		bcopy(posix_cred_get(my_cred)->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
 
 		kauth_cred_unref(&my_cred);
 	}
@@ -1437,7 +1692,7 @@ fill_user32_eproc(proc_t p, struct user32_eproc *ep)
 /*
  * Fill in an LP64 version of eproc structure for the specified process.
  */
-static void
+STATIC void
 fill_user64_eproc(proc_t p, struct user64_eproc *ep)
 {
 	struct tty *tp;
@@ -1476,16 +1731,16 @@ fill_user64_eproc(proc_t p, struct user64_eproc *ep)
 		my_cred = kauth_cred_proc_ref(p);
 
 		/* A fake historical pcred */
-		ep->e_pcred.p_ruid = my_cred->cr_ruid;
-		ep->e_pcred.p_svuid = my_cred->cr_svuid;
-		ep->e_pcred.p_rgid = my_cred->cr_rgid;
-		ep->e_pcred.p_svgid = my_cred->cr_svgid;
+		ep->e_pcred.p_ruid = kauth_cred_getruid(my_cred);
+		ep->e_pcred.p_svuid = kauth_cred_getsvuid(my_cred);
+		ep->e_pcred.p_rgid = kauth_cred_getrgid(my_cred);
+		ep->e_pcred.p_svgid = kauth_cred_getsvgid(my_cred);
 
 		/* A fake historical *kauth_cred_t */
 		ep->e_ucred.cr_ref = my_cred->cr_ref;
 		ep->e_ucred.cr_uid = kauth_cred_getuid(my_cred);
-		ep->e_ucred.cr_ngroups = my_cred->cr_ngroups;
-		bcopy(my_cred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
+		ep->e_ucred.cr_ngroups = posix_cred_get(my_cred)->cr_ngroups;
+		bcopy(posix_cred_get(my_cred)->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t));
 
 		kauth_cred_unref(&my_cred);
 	}
@@ -1518,7 +1773,7 @@ fill_user64_eproc(proc_t p, struct user64_eproc *ep)
 /*
  * Fill in an eproc structure for the specified process.
  */
-static void
+STATIC void
 fill_user32_externproc(proc_t p, struct user32_extern_proc *exp)
 {
 	exp->p_forw = exp->p_back = 0;
@@ -1583,7 +1838,7 @@ fill_user32_externproc(proc_t p, struct user32_extern_proc *exp)
 /*
  * Fill in an LP64 version of extern_proc structure for the specified process.
  */
-static void
+STATIC void
 fill_user64_externproc(proc_t p, struct user64_extern_proc *exp)
 {
 	exp->p_forw = exp->p_back = USER_ADDR_NULL;
@@ -1649,7 +1904,7 @@ fill_user64_externproc(proc_t p, struct user64_extern_proc *exp)
 	exp->p_ru  = CAST_USER_ADDR_T(p->p_ru);		/* XXX may be NULL */
 }
 
-static void
+STATIC void
 fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp)
 {
 	/* on a 64 bit kernel, 32 bit users will get some truncated information */
@@ -1657,23 +1912,31 @@ fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp)
 	fill_user32_eproc(p, &kp->kp_eproc);
 }
 
-static void
+STATIC void
 fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp)
 {
 	fill_user64_externproc(p, &kp->kp_proc);
 	fill_user64_eproc(p, &kp->kp_eproc);
 }
 
-int
-kdebug_ops(int *name, u_int namelen, user_addr_t where, 
-           size_t *sizep, proc_t p)
+STATIC int
+sysctl_kdebug_ops SYSCTL_HANDLER_ARGS
 {
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+//	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+//	size_t newlen = req->newlen;	/* user buffer copy in size */
+
+	proc_t p = current_proc();
 	int ret=0;
 
 	if (namelen == 0)
 		return(ENOTSUP);
 	
-    ret = suser(kauth_cred_get(), &p->p_acflag);
+	ret = suser(kauth_cred_get(), &p->p_acflag);
 	if (ret)
 		return(ret);
 	
@@ -1687,41 +1950,96 @@ kdebug_ops(int *name, u_int namelen, user_addr_t where,
 	case KERN_KDSETREG:
 	case KERN_KDGETREG:
 	case KERN_KDREADTR:
+        case KERN_KDWRITETR:
+        case KERN_KDWRITEMAP:
 	case KERN_KDPIDTR:
 	case KERN_KDTHRMAP:
 	case KERN_KDPIDEX:
 	case KERN_KDSETRTCDEC:
 	case KERN_KDSETBUF:
 	case KERN_KDGETENTROPY:
-	        ret = kdbg_control(name, namelen, where, sizep);
+	        ret = kdbg_control(name, namelen, oldp, oldlenp);
 	        break;
 	default:
 		ret= ENOTSUP;
 		break;
 	}
-	return(ret);
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!ret)
+		req->oldidx += req->oldlen;
+
+	return (ret);
 }
+SYSCTL_PROC(_kern, KERN_KDEBUG, kdebug, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_kdebug_ops,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
 
 
+#if !CONFIG_EMBEDDED
 /*
  * Return the top *sizep bytes of the user stack, or the entire area of the
  * user stack down through the saved exec_path, whichever is smaller.
  */
-int
-sysctl_procargs(int *name, u_int namelen, user_addr_t where, 
-                size_t *sizep, proc_t cur_proc)
-{
-	return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 0);
+STATIC int
+sysctl_doprocargs SYSCTL_HANDLER_ARGS
+{
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+//	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+//	size_t newlen = req->newlen;	/* user buffer copy in size */
+	int error;
+
+	error =  sysctl_procargsx( name, namelen, oldp, oldlenp, current_proc(), 0);
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+SYSCTL_PROC(_kern, KERN_PROCARGS, procargs, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_doprocargs,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
+#endif	/* !CONFIG_EMBEDDED */
+
+STATIC int
+sysctl_doprocargs2 SYSCTL_HANDLER_ARGS
+{
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = &req->oldlen;	/* user buffer copy out size */
+//	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+//	size_t newlen = req->newlen;	/* user buffer copy in size */
+	int error;
 
-static int
-sysctl_procargs2(int *name, u_int namelen, user_addr_t where, 
-                 size_t *sizep, proc_t cur_proc)
-{
-	return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 1);
+	error = sysctl_procargsx( name, namelen, oldp, oldlenp, current_proc(), 1);
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return (error);
 }
+SYSCTL_PROC(_kern, KERN_PROCARGS2, procargs2, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_doprocargs2,	/* Handler function */
+	NULL,			/* Data pointer */
+	"");
 
-static int
+STATIC int
 sysctl_procargsx(int *name, u_int namelen, user_addr_t where, 
                  size_t *sizep, proc_t cur_proc, int argc_yes)
 {
@@ -1977,7 +2295,7 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where,
 /*
  * Max number of concurrent aio requests
  */
-static int
+STATIC int
 sysctl_aiomax
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -1997,7 +2315,7 @@ sysctl_aiomax
 /*
  * Max number of concurrent aio requests per process
  */
-static int
+STATIC int
 sysctl_aioprocmax
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2017,7 +2335,7 @@ sysctl_aioprocmax
 /*
  * Max number of async IO worker threads
  */
-static int
+STATIC int
 sysctl_aiothreads
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2039,7 +2357,7 @@ sysctl_aiothreads
 /*
  * System-wide limit on the max number of processes
  */
-static int
+STATIC int
 sysctl_maxproc
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2058,26 +2376,30 @@ sysctl_maxproc
 }
 
 SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		ostype, 0, "");
 SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		osrelease, 0, "");
 SYSCTL_INT(_kern, KERN_OSREV, osrevision, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, BSD, "");
 SYSCTL_STRING(_kern, KERN_VERSION, version, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		version, 0, "");
+SYSCTL_STRING(_kern, OID_AUTO, uuid, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
+		&kernel_uuid[0], 0, "");
 
 #if DEBUG
 int debug_kprint_syscall = 0;
 char debug_kprint_syscall_process[MAXCOMLEN+1];
 
+/* Thread safe: bits and string value are not used to reclaim state */
 SYSCTL_INT (_debug, OID_AUTO, kprint_syscall,
-	    CTLFLAG_RW, &debug_kprint_syscall, 0, "kprintf syscall tracing");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &debug_kprint_syscall, 0, "kprintf syscall tracing");
 SYSCTL_STRING(_debug, OID_AUTO, kprint_syscall_process, 
-			  CTLFLAG_RW, debug_kprint_syscall_process, sizeof(debug_kprint_syscall_process),
+			  CTLFLAG_RW | CTLFLAG_LOCKED, debug_kprint_syscall_process, sizeof(debug_kprint_syscall_process),
 			  "name of process for kprintf syscall tracing");
 
 int debug_kprint_current_process(const char **namep)
@@ -2113,7 +2435,7 @@ int debug_kprint_current_process(const char **namep)
 /* PR-5293665: need to use a callback function for kern.osversion to set
  * osversion in IORegistry */
 
-static int
+STATIC int
 sysctl_osversion(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
 {
     int rval = 0;
@@ -2128,11 +2450,11 @@ sysctl_osversion(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct
 }
 
 SYSCTL_PROC(_kern, KERN_OSVERSION, osversion,
-        CTLFLAG_RW | CTLFLAG_KERN | CTLTYPE_STRING,
+        CTLFLAG_RW | CTLFLAG_KERN | CTLTYPE_STRING | CTLFLAG_LOCKED,
         osversion, 256 /* OSVERSIZE*/, 
         sysctl_osversion, "A", "");
 
-static int
+STATIC int
 sysctl_sysctl_bootargs
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2150,46 +2472,46 @@ SYSCTL_PROC(_kern, OID_AUTO, bootargs,
 	sysctl_sysctl_bootargs, "A", "bootargs");
 
 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&maxfiles, 0, "");
 SYSCTL_INT(_kern, KERN_ARGMAX, argmax, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, ARG_MAX, "");
 SYSCTL_INT(_kern, KERN_POSIX1, posix1version, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, _POSIX_VERSION, "");
 SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, NGROUPS_MAX, "");
 SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, 1, "");
 #if 1	/* _POSIX_SAVED_IDS from <unistd.h> */
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		(int *)NULL, 1, "");
 #else
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, 
-		CTLFLAG_RD | CTLFLAG_KERN, 
+		CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, 
 		NULL, 0, "");
 #endif
 SYSCTL_INT(_kern, OID_AUTO, num_files, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		&nfiles, 0, "");
 SYSCTL_COMPAT_INT(_kern, OID_AUTO, num_vnodes, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		&numvnodes, 0, "");
 SYSCTL_INT(_kern, OID_AUTO, num_tasks, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		&task_max, 0, "");
 SYSCTL_INT(_kern, OID_AUTO, num_threads, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		&thread_max, 0, "");
 SYSCTL_INT(_kern, OID_AUTO, num_taskthreads, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		&task_threadmax, 0, "");
 
-static int
+STATIC int
 sysctl_maxvnodes (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
 	int oldval = desiredvnodes;
@@ -2203,27 +2525,31 @@ sysctl_maxvnodes (__unused struct sysctl_oid *oidp, __unused void *arg1, __unuse
 	return(error);
 }
 
+SYSCTL_INT(_kern, OID_AUTO, namecache_disabled, 
+		CTLFLAG_RW | CTLFLAG_LOCKED, 
+		&nc_disabled, 0, ""); 
+
 SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_maxvnodes, "I", "");
 
 SYSCTL_PROC(_kern, KERN_MAXPROC, maxproc,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_maxproc, "I", "");
 
 SYSCTL_PROC(_kern, KERN_AIOMAX, aiomax,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_aiomax, "I", "");
 
 SYSCTL_PROC(_kern, KERN_AIOPROCMAX, aioprocmax,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_aioprocmax, "I", "");
 
 SYSCTL_PROC(_kern, KERN_AIOTHREADS, aiothreads,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_aiothreads, "I", "");
 
-static int
+STATIC int
 sysctl_securelvl
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2242,11 +2568,11 @@ sysctl_securelvl
 }
 
 SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_securelvl, "I", "");
 
 
-static int
+STATIC int
 sysctl_domainname
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2259,14 +2585,14 @@ sysctl_domainname
 }
 
 SYSCTL_PROC(_kern, KERN_DOMAINNAME, nisdomainname,
-		CTLTYPE_STRING | CTLFLAG_RW,
+		CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_domainname, "A", "");
 
 SYSCTL_COMPAT_INT(_kern, KERN_HOSTID, hostid, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&hostid, 0, "");
 
-static int
+STATIC int
 sysctl_hostname
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2280,10 +2606,10 @@ sysctl_hostname
 
 
 SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname,
-		CTLTYPE_STRING | CTLFLAG_RW,
+		CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_hostname, "A", "");
 
-static int
+STATIC int
 sysctl_procname
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2293,26 +2619,59 @@ sysctl_procname
 }
 
 SYSCTL_PROC(_kern, KERN_PROCNAME, procname,
-		CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_ANYBODY,
+		CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
 		0, 0, sysctl_procname, "A", "");
 
 SYSCTL_INT(_kern, KERN_SPECULATIVE_READS, speculative_reads_disabled, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&speculative_reads_disabled, 0, "");
 
+SYSCTL_INT(_kern, OID_AUTO, ignore_is_ssd, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&ignore_is_ssd, 0, "");
+
 SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_max, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&preheat_pages_max, 0, "");
 
 SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_min, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&preheat_pages_min, 0, "");
 
-SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_mult, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
-		&preheat_pages_mult, 0, "");
+SYSCTL_UINT(_kern, OID_AUTO, speculative_prefetch_max, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&speculative_prefetch_max, 0, "");
 
-static int
+SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_target,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_page_free_target, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_min,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_page_free_min, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_page_free_reserved,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_page_free_reserved, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_page_speculative_percentage,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_page_speculative_percentage, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_page_speculative_q_age_ms,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_page_speculative_q_age_ms, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_max_delayed_work_limit,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_max_delayed_work_limit, 0, "");
+
+SYSCTL_UINT(_kern, OID_AUTO, vm_max_batch,
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
+		&vm_max_batch, 0, "");
+
+
+STATIC int
 sysctl_boottime
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2333,10 +2692,10 @@ sysctl_boottime
 }
 
 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime,
-		CTLTYPE_STRUCT | CTLFLAG_RD,
+		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_boottime, "S,timeval", "");
 
-static int
+STATIC int
 sysctl_symfile
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2349,11 +2708,11 @@ sysctl_symfile
 
 
 SYSCTL_PROC(_kern, KERN_SYMFILE, symfile,
-		CTLTYPE_STRING | CTLFLAG_RD,
+		CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_symfile, "A", "");
 
 #if NFSCLIENT
-static int
+STATIC int
 sysctl_netboot
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2361,12 +2720,15 @@ sysctl_netboot
 }
 
 SYSCTL_PROC(_kern, KERN_NETBOOT, netboot,
-		CTLTYPE_INT | CTLFLAG_RD,
+		CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_netboot, "I", "");
 #endif
 
 #ifdef CONFIG_IMGSRC_ACCESS
-static int
+/*
+ * Legacy--act as if only one layer of nesting is possible.
+ */
+STATIC int
 sysctl_imgsrcdev 
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2378,16 +2740,16 @@ sysctl_imgsrcdev
 		return EPERM;
 	}    
 
-	if (imgsrc_rootvnode == NULL) {
+	if (imgsrc_rootvnodes[0] == NULL) {
 		return ENOENT;
 	}    
 
-	result = vnode_getwithref(imgsrc_rootvnode);
+	result = vnode_getwithref(imgsrc_rootvnodes[0]);
 	if (result != 0) {
 		return result;
 	}
 	
-	devvp = vnode_mount(imgsrc_rootvnode)->mnt_devvp;
+	devvp = vnode_mount(imgsrc_rootvnodes[0])->mnt_devvp;
 	result = vnode_getwithref(devvp);
 	if (result != 0) {
 		goto out;
@@ -2397,16 +2759,82 @@ sysctl_imgsrcdev
 
 	vnode_put(devvp);
 out:
-	vnode_put(imgsrc_rootvnode);
+	vnode_put(imgsrc_rootvnodes[0]);
 	return result;
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, imgsrcdev,
-		CTLTYPE_INT | CTLFLAG_RD,
+		CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_imgsrcdev, "I", ""); 
+
+STATIC int
+sysctl_imgsrcinfo
+(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+	int error;
+	struct imgsrc_info info[MAX_IMAGEBOOT_NESTING];	/* 2 for now, no problem */
+	uint32_t i;
+	vnode_t rvp, devvp;
+
+	if (imgsrc_rootvnodes[0] == NULLVP) {
+		return ENXIO;
+	}
+
+	for (i = 0; i < MAX_IMAGEBOOT_NESTING; i++) {
+		/*
+		 * Go get the root vnode.
+		 */
+		rvp = imgsrc_rootvnodes[i];
+		if (rvp == NULLVP) {
+			break;
+		}
+
+		error = vnode_get(rvp);
+		if (error != 0) {
+			return error;
+		}
+
+		/* 
+		 * For now, no getting at a non-local volume.
+		 */
+		devvp = vnode_mount(rvp)->mnt_devvp;
+		if (devvp == NULL) {
+			vnode_put(rvp);
+			return EINVAL;	
+		}
+
+		error = vnode_getwithref(devvp);
+		if (error != 0) {
+			vnode_put(rvp);
+			return error;
+		}
+
+		/*
+		 * Fill in info.
+		 */
+		info[i].ii_dev = vnode_specrdev(devvp);
+		info[i].ii_flags = 0;
+		info[i].ii_height = i;
+		bzero(info[i].ii_reserved, sizeof(info[i].ii_reserved));
+
+		vnode_put(devvp);
+		vnode_put(rvp);
+	}
+
+	return sysctl_io_opaque(req, info, i * sizeof(info[0]), NULL);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, imgsrcinfo,
+		CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED,
+		0, 0, sysctl_imgsrcinfo, "I", ""); 
+
 #endif /* CONFIG_IMGSRC_ACCESS */
 
-static int
+SYSCTL_INT(_kern, OID_AUTO, timer_coalescing_enabled, 
+		CTLFLAG_RW | CTLFLAG_LOCKED,
+		&mach_timer_coalescing_enabled, 0, "");
+
+STATIC int
 sysctl_usrstack
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2414,10 +2842,10 @@ sysctl_usrstack
 }
 
 SYSCTL_PROC(_kern, KERN_USRSTACK32, usrstack,
-		CTLTYPE_INT | CTLFLAG_RD,
+		CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_usrstack, "I", "");
 
-static int
+STATIC int
 sysctl_usrstack64
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2425,14 +2853,14 @@ sysctl_usrstack64
 }
 
 SYSCTL_PROC(_kern, KERN_USRSTACK64, usrstack64,
-		CTLTYPE_QUAD | CTLFLAG_RD,
+		CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_usrstack64, "Q", "");
 
 SYSCTL_STRING(_kern, KERN_COREFILE, corefile, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		corefilename, sizeof(corefilename), "");
 
-static int
+STATIC int
 sysctl_coredump
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2451,10 +2879,10 @@ sysctl_coredump
 }
 
 SYSCTL_PROC(_kern, KERN_COREDUMP, coredump,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_coredump, "I", "");
 
-static int
+STATIC int
 sysctl_suid_coredump
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2473,10 +2901,10 @@ sysctl_suid_coredump
 }
 
 SYSCTL_PROC(_kern, KERN_SUGID_COREDUMP, sugid_coredump,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_suid_coredump, "I", "");
 
-static int
+STATIC int
 sysctl_delayterm
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2495,11 +2923,11 @@ sysctl_delayterm
 }
 
 SYSCTL_PROC(_kern, KERN_PROCDELAYTERM, delayterm,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		0, 0, sysctl_delayterm, "I", "");
 
 
-static int
+STATIC int
 sysctl_rage_vnode
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2545,11 +2973,11 @@ sysctl_rage_vnode
 }
 
 SYSCTL_PROC(_kern, KERN_RAGEVNODE, rage_vnode,
-		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
 		0, 0, sysctl_rage_vnode, "I", "");
 
 
-static int
+STATIC int
 sysctl_kern_check_openevt
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2582,12 +3010,12 @@ sysctl_kern_check_openevt
 	return(error);
 }
 
-SYSCTL_PROC(_kern, KERN_CHECKOPENEVT, check_openevt, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY,
+SYSCTL_PROC(_kern, KERN_CHECKOPENEVT, check_openevt, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
             0, 0, sysctl_kern_check_openevt, "I", "set the per-process check-open-evt flag");
 
 
 
-static int
+STATIC int
 sysctl_nx
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2617,10 +3045,10 @@ sysctl_nx
 
 
 SYSCTL_PROC(_kern, KERN_NX_PROTECTION, nx, 
-		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		0, 0, sysctl_nx, "I", "");
 
-static int
+STATIC int
 sysctl_loadavg
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2636,10 +3064,30 @@ sysctl_loadavg
 }
 
 SYSCTL_PROC(_vm, VM_LOADAVG, loadavg,
-		CTLTYPE_STRUCT | CTLFLAG_RD,
+		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_loadavg, "S,loadavg", "");
 
-static int
+/*
+ * Note:	Thread safe; vm_map_lock protects in  vm_toggle_entry_reuse()
+ */
+STATIC int
+sysctl_vm_toggle_address_reuse(__unused struct sysctl_oid *oidp, __unused void *arg1,
+	      __unused int arg2, struct sysctl_req *req)
+{
+	int old_value=0, new_value=0, error=0;
+	
+	if(vm_toggle_entry_reuse( VM_TOGGLE_GETVALUE, &old_value ))
+		return(error);
+	error = sysctl_io_number(req, old_value, sizeof(int), &new_value, NULL);
+	if (!error) {
+		return (vm_toggle_entry_reuse(new_value, NULL));
+	}
+	return(error);
+}
+
+SYSCTL_PROC(_debug, OID_AUTO, toggle_address_reuse, CTLFLAG_ANYBODY | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_vm_toggle_address_reuse,"I","");
+
+STATIC int
 sysctl_swapusage
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2668,17 +3116,53 @@ sysctl_swapusage
 
 
 SYSCTL_PROC(_vm, VM_SWAPUSAGE, swapusage,
-		CTLTYPE_STRUCT | CTLFLAG_RD,
+		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_swapusage, "S,xsw_usage", "");
 
+#if CONFIG_EMBEDDED
+/* <rdar://problem/7688080> */
+boolean_t vm_freeze_enabled = FALSE;
+#endif /* CONFIG_EMBEDDED */
+
+
+#if CONFIG_FREEZE
+extern void vm_page_reactivate_all_throttled(void);
+
+static int
+sysctl_freeze_enabled SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int error, val = vm_freeze_enabled ? 1 : 0;
+	boolean_t disabled;
+
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error || !req->newptr)
+ 		return (error);
+	
+	/* 
+	 * If freeze is being disabled, we need to move dirty pages out from the throttle to the active queue. 
+	 */
+	disabled = (!val && vm_freeze_enabled);
+	
+	vm_freeze_enabled = val ? TRUE : FALSE;
+	
+	if (disabled) {
+		vm_page_reactivate_all_throttled();
+	}
+	
+	return (0);
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, freeze_enabled, CTLTYPE_INT|CTLFLAG_RW, &vm_freeze_enabled, 0, sysctl_freeze_enabled, "I", "");
+#endif /* CONFIG_FREEZE */
 
 /* this kernel does NOT implement shared_region_make_private_np() */
 SYSCTL_INT(_kern, KERN_SHREG_PRIVATIZABLE, shreg_private, 
-		CTLFLAG_RD, 
+		CTLFLAG_RD | CTLFLAG_LOCKED, 
 		(int *)NULL, 0, "");
 
 #if defined(__i386__) || defined(__x86_64__)
-static int
+STATIC int
 sysctl_sysctl_exec_affinity(__unused struct sysctl_oid *oidp,
 			   __unused void *arg1, __unused int arg2,
 			   struct sysctl_req *req)
@@ -2706,10 +3190,10 @@ sysctl_sysctl_exec_affinity(__unused struct sysctl_oid *oidp,
 	
 	return 0;
 }
-SYSCTL_PROC(_sysctl, OID_AUTO, proc_exec_affinity, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, sysctl_sysctl_exec_affinity ,"I","proc_exec_affinity");
+SYSCTL_PROC(_sysctl, OID_AUTO, proc_exec_affinity, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_exec_affinity ,"I","proc_exec_affinity");
 #endif
 
-static int
+STATIC int
 fetch_process_cputype(
 	proc_t cur_proc,
 	int *name,
@@ -2752,7 +3236,7 @@ out:
 	return (error);
 }
 
-static int
+STATIC int
 sysctl_sysctl_native(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 		    struct sysctl_req *req)
 {
@@ -2765,9 +3249,9 @@ sysctl_sysctl_native(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 		res = 0;
 	return SYSCTL_OUT(req, &res, sizeof(res));
 }	
-SYSCTL_PROC(_sysctl, OID_AUTO, proc_native, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_native ,"I","proc_native");
+SYSCTL_PROC(_sysctl, OID_AUTO, proc_native, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_native ,"I","proc_native");
 
-static int
+STATIC int
 sysctl_sysctl_cputype(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 		     struct sysctl_req *req)
 {
@@ -2777,9 +3261,9 @@ sysctl_sysctl_cputype(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 		return error;
 	return SYSCTL_OUT(req, &proc_cputype, sizeof(proc_cputype));
 }
-SYSCTL_PROC(_sysctl, OID_AUTO, proc_cputype, CTLTYPE_NODE|CTLFLAG_RD, 0, 0, sysctl_sysctl_cputype ,"I","proc_cputype");
+SYSCTL_PROC(_sysctl, OID_AUTO, proc_cputype, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_sysctl_cputype ,"I","proc_cputype");
 
-static int
+STATIC int
 sysctl_safeboot
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2787,10 +3271,10 @@ sysctl_safeboot
 }
 
 SYSCTL_PROC(_kern, KERN_SAFEBOOT, safeboot,
-		CTLTYPE_INT | CTLFLAG_RD,
+		CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_safeboot, "I", "");
 
-static int
+STATIC int
 sysctl_singleuser
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
@@ -2798,7 +3282,7 @@ sysctl_singleuser
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, singleuser,
-		CTLTYPE_INT | CTLFLAG_RD,
+		CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 		0, 0, sysctl_singleuser, "I", "");
 
 /*
@@ -2808,9 +3292,9 @@ extern boolean_t	affinity_sets_enabled;
 extern int		affinity_sets_mapping;
 
 SYSCTL_INT (_kern, OID_AUTO, affinity_sets_enabled,
-	    CTLFLAG_RW, (int *) &affinity_sets_enabled, 0, "hinting enabled");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, (int *) &affinity_sets_enabled, 0, "hinting enabled");
 SYSCTL_INT (_kern, OID_AUTO, affinity_sets_mapping,
-	    CTLFLAG_RW, &affinity_sets_mapping, 0, "mapping policy");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &affinity_sets_mapping, 0, "mapping policy");
 
 /*
  * Limit on total memory users can wire.
@@ -2833,9 +3317,9 @@ vm_map_size_t	vm_user_wire_limit;
  * There needs to be a more automatic/elegant way to do this
  */
 
-SYSCTL_QUAD(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW, &vm_global_no_user_wire_amount, "");
-SYSCTL_QUAD(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW, &vm_global_user_wire_limit, "");
-SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW, &vm_user_wire_limit, "");
+SYSCTL_QUAD(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_no_user_wire_amount, "");
+SYSCTL_QUAD(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_global_user_wire_limit, "");
+SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_user_wire_limit, "");
 
 
 
@@ -2846,15 +3330,15 @@ SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW, &vm_user_wire_limit, "")
 extern	uint32_t        kdebug_thread_block;
 
 SYSCTL_INT (_kern, OID_AUTO, kdebug_thread_block,
-	    CTLFLAG_RW, &kdebug_thread_block, 0, "kdebug thread_block");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &kdebug_thread_block, 0, "kdebug thread_block");
 
 /*
  * Kernel stack size and depth
  */
 SYSCTL_INT (_kern, OID_AUTO, stack_size,
-	    CTLFLAG_RD, (int *) &kernel_stack_size, 0, "Kernel stack size");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, (int *) &kernel_stack_size, 0, "Kernel stack size");
 SYSCTL_INT (_kern, OID_AUTO, stack_depth_max,
-	    CTLFLAG_RD, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch");
 
 /*
  * enable back trace for port allocations
@@ -2862,6 +3346,21 @@ SYSCTL_INT (_kern, OID_AUTO, stack_depth_max,
 extern int ipc_portbt;
 
 SYSCTL_INT(_kern, OID_AUTO, ipc_portbt, 
-		CTLFLAG_RW | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&ipc_portbt, 0, "");
 
+/*
+ * Scheduler sysctls
+ */
+
+/*
+ * See osfmk/kern/sched_prim.c for the corresponding definition
+ * in osfmk/. If either version changes, update the other.
+ */
+#define SCHED_STRING_MAX_LENGTH (48)
+
+extern char sched_string[SCHED_STRING_MAX_LENGTH];
+SYSCTL_STRING(_kern, OID_AUTO, sched,
+			  CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED,
+			  sched_string, sizeof(sched_string),
+			  "Timeshare scheduler implementation");
diff --git a/bsd/kern/kern_time.c b/bsd/kern/kern_time.c
index 6f392fb86..315863131 100644
--- a/bsd/kern/kern_time.c
+++ b/bsd/kern/kern_time.c
@@ -75,6 +75,7 @@
 #include <sys/kauth.h>
 #include <sys/vnode.h>
 #include <sys/time.h>
+#include <sys/priv.h>
 
 #include <sys/mount_internal.h>
 #include <sys/sysproto.h>
@@ -215,7 +216,7 @@ adjtime(struct proc *p, struct adjtime_args *uap, __unused int32_t *retval)
 	if (error)
 		return (error);
 #endif
-	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+	if ((error = priv_check_cred(kauth_cred_get(), PRIV_ADJTIME, 0)))
 		return (error);
 	if (IS_64BIT_PROCESS(p)) {
 		struct user64_timeval user_atv;
diff --git a/bsd/kern/kern_xxx.c b/bsd/kern/kern_xxx.c
index 0a080dd59..293808838 100644
--- a/bsd/kern/kern_xxx.c
+++ b/bsd/kern/kern_xxx.c
@@ -116,7 +116,7 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retva
 #endif
 	if (!error) {
 		OSBitOrAtomic(P_REBOOT, &p->p_flag);  /* No more signals for this proc */
-		boot(RB_BOOT, uap->opt, command);
+		error = boot(RB_BOOT, uap->opt, command);
 	}
 	return(error);
 }
diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c
index a89cfbef0..70ab53b31 100644
--- a/bsd/kern/kpi_mbuf.c
+++ b/bsd/kern/kpi_mbuf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -40,9 +40,9 @@
 
 #include "net/net_str_id.h"
 
-static const mbuf_flags_t mbuf_flags_mask = MBUF_EXT | MBUF_PKTHDR | MBUF_EOR |
-				MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG |
-				MBUF_LASTFRAG | MBUF_PROMISC;
+static const mbuf_flags_t mbuf_flags_mask = (MBUF_EXT | MBUF_PKTHDR | MBUF_EOR |
+    MBUF_LOOP | MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG |
+    MBUF_LASTFRAG | MBUF_PROMISC | MBUF_HASFCS);
 
 void* mbuf_data(mbuf_t mbuf)
 {
@@ -81,6 +81,10 @@ errno_t mbuf_align_32(mbuf_t mbuf, size_t len)
 	return 0;
 }
 
+/* This function is used to provide mcl_to_paddr via symbol indirection,
+ * please avoid any change in behavior or remove the indirection in 
+ * config/Unsupported*
+ */
 addr64_t mbuf_data_to_physical(void* ptr)
 {
 	return (addr64_t)(uintptr_t)mcl_to_paddr(ptr);
@@ -107,10 +111,10 @@ mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf,
     caddr_t extbuf, void (*extfree)(caddr_t , u_int, caddr_t),
     size_t extsize, caddr_t extarg)
 {
-	if (extbuf == NULL || extfree == NULL || extsize == 0)
+	if (mbuf == NULL || extbuf == NULL || extfree == NULL || extsize == 0)
 		return (EINVAL);
 
-	if ((*mbuf = m_clattach(mbuf != NULL ? *mbuf : NULL, type, extbuf,
+	if ((*mbuf = m_clattach(*mbuf, type, extbuf,
 	    extfree, extsize, extarg, how)) == NULL)
 		return (ENOMEM);
 
@@ -126,15 +130,15 @@ mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr)
 	*addr = NULL;
 
 	/* Jumbo cluster pool not available? */
-	if (*size > NBPG && njcl == 0)
+	if (*size > MBIGCLBYTES && njcl == 0)
 		return (ENOTSUP);
 
 	if (*size <= MCLBYTES && (*addr = m_mclalloc(how)) != NULL)
 		*size = MCLBYTES;
-	else if (*size > MCLBYTES && *size <= NBPG &&
+	else if (*size > MCLBYTES && *size <= MBIGCLBYTES &&
 	    (*addr = m_bigalloc(how)) != NULL)
-		*size = NBPG;
-	else if (*size > NBPG && *size <= M16KCLBYTES &&
+		*size = MBIGCLBYTES;
+	else if (*size > MBIGCLBYTES && *size <= M16KCLBYTES &&
 	    (*addr = m_16kalloc(how)) != NULL)
 		*size = M16KCLBYTES;
 	else
@@ -149,14 +153,14 @@ mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr)
 void
 mbuf_freecluster(caddr_t addr, size_t size)
 {
-	if (size != MCLBYTES && size != NBPG && size != M16KCLBYTES)
+	if (size != MCLBYTES && size != MBIGCLBYTES && size != M16KCLBYTES)
 		panic("%s: invalid size (%ld) for cluster %p", __func__,
 		    size, (void *)addr);
 
 	if (size == MCLBYTES)
 		m_mclfree(addr);
-	else if (size == NBPG)
-		m_bigfree(addr, NBPG, NULL);
+	else if (size == MBIGCLBYTES)
+		m_bigfree(addr, MBIGCLBYTES, NULL);
 	else if (njcl > 0)
 		m_16kfree(addr, M16KCLBYTES, NULL);
 	else
@@ -184,7 +188,7 @@ mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, mbuf_t* mbuf)
 	 */
 	if (size == MCLBYTES) {
 		*mbuf = m_mclget(*mbuf, how);
-	} else if (size == NBPG) {
+	} else if (size == MBIGCLBYTES) {
 		*mbuf = m_mbigget(*mbuf, how);
 	} else if (size == M16KCLBYTES) {
 		if (njcl > 0) {
@@ -254,11 +258,17 @@ errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf)
 	return error;
 }
 
+/* This function is used to provide m_free via symbol indirection, please avoid
+ * any change in behavior or remove the indirection in config/Unsupported*
+ */
 mbuf_t mbuf_free(mbuf_t mbuf)
 {
 	return m_free(mbuf);
 }
 
+/* This function is used to provide m_freem via symbol indirection, please avoid
+ * any change in behavior or remove the indirection in config/Unsupported*
+ */
 void mbuf_freem(mbuf_t mbuf)
 {
 	m_freem(mbuf);
@@ -274,6 +284,10 @@ size_t mbuf_leadingspace(const mbuf_t mbuf)
 	return m_leadingspace(mbuf);
 }
 
+/* This function is used to provide m_trailingspace via symbol indirection,
+ * please avoid any change in behavior or remove the indirection in 
+ * config/Unsupported*
+ */
 size_t mbuf_trailingspace(const mbuf_t mbuf)
 {
 	return m_trailingspace(mbuf);
@@ -332,6 +346,9 @@ errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t len, mbuf_t *location)
 	return (*location == NULL) ? ENOMEM : 0;
 }
 
+/* This function is used to provide m_adj via symbol indirection, please avoid
+ * any change in behavior or remove the indirection in config/Unsupported*
+ */
 void mbuf_adj(mbuf_t mbuf, int len)
 {
 	m_adj(mbuf, len);
@@ -544,7 +561,7 @@ void
 mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family, size_t protocol_offset)
 {
 	if ((mbuf->m_pkthdr.csum_flags &
-		 (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16)) == 0)
+		 (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16 | CSUM_DELAY_IPV6_DATA)) == 0)
 		return;
 	
 	/* Generate the packet in software, client needs it */
@@ -573,14 +590,23 @@ mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family, size_t protocol_o
 			
 			mbuf->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DELAY_IP);
 			break;
+
+		case PF_INET6:
+
+			if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
+				in_delayed_cksum_offset(mbuf, protocol_offset);
+			}
+			mbuf->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
+			break;
+			
 	
 		default:
 			/*
 			 * Not sure what to do here if anything.
-			 * Hardware checksum code looked pretty IPv4 specific.
+			 * Hardware checksum code looked pretty IPv4/IPv6 specific.
 			 */
-			if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) != 0)
-				panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 packet (%u)!\n", protocol_family);
+			if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_DELAY_IPV6_DATA)) != 0)
+				panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 or IPv6 packet (%u)!\n", protocol_family);
 	}
 }
 
@@ -619,7 +645,8 @@ mbuf_clear_vlan_tag(
 }
 
 static const mbuf_csum_request_flags_t mbuf_valid_csum_request_flags = 
-	MBUF_CSUM_REQ_IP | MBUF_CSUM_REQ_TCP | MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_SUM16;
+	MBUF_CSUM_REQ_IP | MBUF_CSUM_REQ_TCP | MBUF_CSUM_REQ_UDP |
+       	MBUF_CSUM_REQ_SUM16 | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6;
 
 errno_t
 mbuf_set_csum_requested(
@@ -827,7 +854,7 @@ mbuf_tag_allocate(
 	}
 	
 	/* Allocate an mtag */
-	tag = m_tag_alloc(id, type, length, how);
+	tag = m_tag_create(id, type, length, how, mbuf);
 	if (tag == NULL) {
 		return how == M_WAITOK ? ENOMEM : EWOULDBLOCK;
 	}
@@ -1072,34 +1099,16 @@ mbuf_get_mhlen(void)
 	return (_MHLEN);
 }
 
-mbuf_priority_t
-mbuf_get_priority(struct mbuf *m)
+u_int32_t
+mbuf_get_minclsize(void)
 {
-#if !PKT_PRIORITY
-#pragma unused(m)
-	return (MBUF_PRIORITY_NORMAL);
-#else /* PKT_PRIORITY */
-	mbuf_priority_t prio = MBUF_PRIORITY_NORMAL;
-
-	if (m == NULL || !(m->m_flags & M_PKTHDR))
-		return (prio);
-
-	/* Defaults to normal; ignore anything else but background */
-	if (m->m_pkthdr.prio == MBUF_PRIORITY_BACKGROUND)
-		prio = MBUF_PRIORITY_BACKGROUND;
-
-	return (prio);
-#endif /* PKT_PRIORITY */
+	return (MHLEN + MLEN);
 }
 
 mbuf_traffic_class_t 
 mbuf_get_traffic_class(mbuf_t m)
 {
-#if !PKT_PRIORITY
-#pragma unused(m)
-	return (MBUF_TC_BE);
-#else /* PKT_PRIORITY */
-	mbuf_priority_t prio = MBUF_TC_BE;
+	mbuf_traffic_class_t prio = MBUF_TC_BE;
 
 	if (m == NULL || !(m->m_flags & M_PKTHDR))
 		return (prio);
@@ -1108,17 +1117,11 @@ mbuf_get_traffic_class(mbuf_t m)
 		prio = m->m_pkthdr.prio;
 
 	return (prio);
-#endif /* PKT_PRIORITY */
 }
 
 errno_t 
 mbuf_set_traffic_class(mbuf_t m, mbuf_traffic_class_t tc)
 {
-#if !PKT_PRIORITY
-#pragma unused(m)
-#pragma unused(tc)
-	return 0;
-#else /* PKT_PRIORITY */
 	errno_t error = 0;
 	
 	if (m == NULL || !(m->m_flags & M_PKTHDR))
@@ -1136,5 +1139,4 @@ mbuf_set_traffic_class(mbuf_t m, mbuf_traffic_class_t tc)
 			break;
 	}
 	return error;
-#endif /* PKT_PRIORITY */
 }
diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c
index ee1fd5af0..70507beff 100644
--- a/bsd/kern/kpi_socket.c
+++ b/bsd/kern/kpi_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -172,18 +172,13 @@ sock_accept(
 	if (sa) FREE(sa, M_SONAME);
 
 	/*
-	 * If the socket has been marked as inactive by soacceptfilter(),
-	 * disallow further operations on it.  We explicitly call shutdown
-	 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
-	 * states are set for the socket.  This would also flush out data
-	 * hanging off the receive list of this socket.
+	 * If the socket has been marked as inactive by sosetdefunct(),
+	 * disallow further operations on it.
 	 */
 	if (new_so->so_flags & SOF_DEFUNCT) {
-		(void) soshutdownlock(new_so, SHUT_RD);
-		(void) soshutdownlock(new_so, SHUT_WR);
-		(void) sodisconnectlocked(new_so);
+		(void) sodefunct(current_proc(), new_so,
+		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
 	}
-
 	*new_sock = new_so;
 	if (dosocklock)	
 		socket_unlock(new_so, 1);
@@ -195,9 +190,30 @@ sock_bind(
 	socket_t				sock,
 	const struct sockaddr	*to)
 {
-	if (sock == NULL || to == NULL) return EINVAL;
+	int	error = 0;
+	struct sockaddr *sa = NULL;
+	struct sockaddr_storage ss;
+	boolean_t want_free = TRUE;
+
+	if (sock == NULL || to == NULL) 
+		return EINVAL;
+	
+	if (to->sa_len > sizeof(ss)) {
+		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK);
+		if (sa == NULL)
+			return ENOBUFS;
+	} else {
+		sa = (struct sockaddr *)&ss;
+		want_free = FALSE;
+	}
+	memcpy(sa, to, to->sa_len);
+
+	error = sobind(sock, sa);
 	
-	return sobind(sock, (struct sockaddr*)(uintptr_t)to);
+	if (sa != NULL && want_free == TRUE)
+		FREE(sa, M_SONAME);	
+
+	return error;
 }
 
 errno_t
@@ -208,23 +224,37 @@ sock_connect(
 {
 	int	error = 0;
 	lck_mtx_t *mutex_held;
+	struct sockaddr *sa = NULL;
+	struct sockaddr_storage ss;
+	boolean_t want_free = TRUE;
 	
 	if (sock == NULL || to == NULL) return EINVAL;
+	
+	if (to->sa_len > sizeof(ss)) {
+		MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME,
+			(flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK);
+		if (sa == NULL)
+			return ENOBUFS;
+	} else {
+		sa = (struct sockaddr *)&ss;
+		want_free = FALSE;
+	}
+	memcpy(sa, to, to->sa_len);
 
 	socket_lock(sock, 1);
 
 	if ((sock->so_state & SS_ISCONNECTING) &&
 		((sock->so_state & SS_NBIO) != 0 ||
 		 (flags & MSG_DONTWAIT) != 0)) {
-		socket_unlock(sock, 1);
-		return EALREADY;
+		error = EALREADY;
+		goto out;
 	}
-	error = soconnectlock(sock, (struct sockaddr*)(uintptr_t)to, 0);
+	error = soconnectlock(sock, sa, 0);
 	if (!error) {
 		if ((sock->so_state & SS_ISCONNECTING) &&
 			((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) {
-			socket_unlock(sock, 1);
-			return EINPROGRESS;
+			error = EINPROGRESS;
+			goto out;
 		}
 		
 		if (sock->so_proto->pr_getlock != NULL)  
@@ -247,7 +277,12 @@ sock_connect(
 	else {
 		sock->so_state &= ~SS_ISCONNECTING;
 	}
+out:
 	socket_unlock(sock, 1);
+
+	if (sa != NULL && want_free == TRUE)
+		FREE(sa, M_SONAME);
+		
 	return error;
 }
 
@@ -476,6 +511,27 @@ sock_setsockopt(
 	return sosetopt(sock, &sopt); /* will lock socket */
 }
 
+/*
+ * This follows the recommended mappings between DSCP code points and WMM access classes
+ */
+static u_int8_t so_tc_from_dscp(u_int8_t dscp);
+static u_int8_t
+so_tc_from_dscp(u_int8_t dscp)
+{
+	u_int8_t tc;
+
+	if (dscp >= 0x30 && dscp <= 0x3f)
+		tc = SO_TC_VO;
+	else if (dscp >= 0x20 && dscp <= 0x2f)
+		tc = SO_TC_VI;
+	else if (dscp >= 0x08 && dscp <= 0x17)
+		tc = SO_TC_BK;
+	else
+		tc = SO_TC_BE;
+
+	return tc;
+}
+
 errno_t
 sock_settclassopt(
 	socket_t	sock,
@@ -484,13 +540,9 @@ sock_settclassopt(
 
 	errno_t error = 0;
 	struct sockopt sopt;
+	int sotc;
 
-	if (sock == NULL || optval == NULL || optlen == 0) return EINVAL;
-
-	sopt.sopt_dir = SOPT_SET;
-	sopt.sopt_val = CAST_USER_ADDR_T(optval);
-	sopt.sopt_valsize = optlen;
-	sopt.sopt_p = kernproc;
+	if (sock == NULL || optval == NULL || optlen != sizeof(int)) return EINVAL;
 
 	socket_lock(sock, 1);
 	if (!(sock->so_state & SS_ISCONNECTED)) {
@@ -507,6 +559,28 @@ sock_settclassopt(
 		goto out;
 	}
 
+	/*
+	 * Set the socket traffic class based on the passed DSCP code point
+	 * regardless of the scope of the destination
+	 */
+	sotc = so_tc_from_dscp((*(const int *)optval) >> 2);
+
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_val = CAST_USER_ADDR_T(&sotc);
+	sopt.sopt_valsize = sizeof(sotc);
+	sopt.sopt_p = kernproc;
+	sopt.sopt_level = SOL_SOCKET;
+	sopt.sopt_name = SO_TRAFFIC_CLASS;
+
+	socket_unlock(sock, 0);
+	error = sosetopt(sock, &sopt);
+	socket_lock(sock, 0);
+
+	if (error != 0) {
+		printf("sock_settclassopt: sosetopt SO_TRAFFIC_CLASS failed %d\n", error);
+		goto out;
+	}
+
 	/* Check if the destination address is LAN or link local address.
 	 * We do not want to set traffic class bits if the destination
 	 * is not local 
@@ -515,6 +589,11 @@ sock_settclassopt(
 		goto out;
 	}
 
+	sopt.sopt_dir = SOPT_SET;
+	sopt.sopt_val = CAST_USER_ADDR_T(optval);
+	sopt.sopt_valsize = optlen;
+	sopt.sopt_p = kernproc;
+
 	switch (sock->so_proto->pr_domain->dom_family) {
 	case AF_INET:
 		sopt.sopt_level = IPPROTO_IP;
@@ -989,59 +1068,114 @@ sock_getlistener(socket_t sock)
 	return (sock->so_head);
 }
 
+static inline void
+sock_set_tcp_stream_priority(socket_t sock)
+{
+	if ((sock->so_proto->pr_domain->dom_family == AF_INET || 
+		sock->so_proto->pr_domain->dom_family == AF_INET6) &&
+		sock->so_proto->pr_type == SOCK_STREAM) {
+
+		set_tcp_stream_priority(sock);
+
+	}
+}
+
 /*
  * Caller must have ensured socket is valid and won't be going away.
  */
 void
-socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
+socket_set_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
 {
 	(void) OSBitOrAtomic(flags, &sock->so_traffic_mgt_flags);
+	sock_set_tcp_stream_priority(sock);
+}
+
+void
+socket_set_traffic_mgt_flags(socket_t sock, u_int32_t flags)
+{
+	socket_lock(sock, 1);
+	socket_set_traffic_mgt_flags_locked(sock, flags);
+	socket_unlock(sock, 1);
 }
 
 /*
  * Caller must have ensured socket is valid and won't be going away.
  */
 void
-socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags)
+socket_clear_traffic_mgt_flags_locked(socket_t sock, u_int32_t flags)
 {
 	(void) OSBitAndAtomic(~flags, &sock->so_traffic_mgt_flags);
+	sock_set_tcp_stream_priority(sock);
 }
 
-__private_extern__ void
-set_traffic_class(struct mbuf *m, struct socket *so, int mtc)
+void
+socket_clear_traffic_mgt_flags(socket_t sock, u_int32_t flags)
 {
-#if !PKT_PRIORITY
-#pragma unused(m)
-#pragma unused(so)
-#pragma unused(mtc)
-	return;
-#else /* PKT_PRIORITY */
-	if (!(m->m_flags & M_PKTHDR))
-		return;
+	socket_lock(sock, 1);
+	socket_clear_traffic_mgt_flags_locked(sock, flags);
+	socket_unlock(sock, 1);
+}
+
 
-	if (soisbackground(so)) {
-		m->m_pkthdr.prio = MBUF_TC_BK;			
-	} else if (mtc != MBUF_TC_NONE) {
-		if (mtc >= MBUF_TC_BE && mtc <= MBUF_TC_VO)
-			m->m_pkthdr.prio = mtc;
+/*
+ * Caller must have ensured socket is valid and won't be going away.
+ */
+errno_t
+socket_defunct(struct proc *p, socket_t so, int level)
+{
+	errno_t retval;
+
+	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
+	    level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
+		return (EINVAL);
+
+	socket_lock(so, 1);
+	/*
+	 * SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC level is meant to tear down
+	 * all of mDNSResponder IPC sockets, currently those of AF_UNIX; note
+	 * that this is an implementation artifact of mDNSResponder.  We do
+	 * a quick test against the socket buffers for SB_UNIX, since that
+	 * would have been set by unp_attach() at socket creation time.
+	 */
+	if (level == SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
+	    (so->so_rcv.sb_flags & so->so_snd.sb_flags & SB_UNIX) != SB_UNIX) {
+		socket_unlock(so, 1);
+		return (EOPNOTSUPP);
+	}
+	retval = sosetdefunct(p, so, level, TRUE);
+	if (retval == 0)
+		retval = sodefunct(p, so, level);
+	socket_unlock(so, 1);
+	return (retval);
+}
+
+errno_t
+sock_setupcall(socket_t sock, sock_upcall callback, void* context)
+{
+	if (sock == NULL)
+		return EINVAL;
+
+	/*
+	 * Note that we don't wait for any in progress upcall to complete.
+	 */
+	socket_lock(sock, 1);
+
+	sock->so_upcall = (so_upcall) callback;
+	sock->so_upcallarg = context;
+	if (callback) {
+		sock->so_rcv.sb_flags |= SB_UPCALL;
+#if CONFIG_SOWUPCALL
+		sock->so_snd.sb_flags |= SB_UPCALL;
+#endif /* CONFIG_SOWUPCALL */
 	} else {
-		switch (so->so_traffic_class) {
-			case SO_TC_BE:
-				m->m_pkthdr.prio = MBUF_TC_BE;
-				break;
-			case SO_TC_BK:
-				m->m_pkthdr.prio = MBUF_TC_BK;
-				break;
-			case SO_TC_VI:
-				m->m_pkthdr.prio = MBUF_TC_VI;
-				break;
-			case SO_TC_VO:
-				m->m_pkthdr.prio = MBUF_TC_VO;
-				break;
-			default:
-				break;
-		}
+		sock->so_rcv.sb_flags &= ~SB_UPCALL;
+#if CONFIG_SOWUPCALL
+		sock->so_snd.sb_flags &= ~SB_UPCALL;
+#endif /* CONFIG_SOWUPCALL */
 	}
-	return;
-#endif /* PKT_PRIORITY */
+	
+	socket_unlock(sock, 1);
+
+	return 0;
 }
+
diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c
index c8469ab40..67a944c2d 100644
--- a/bsd/kern/kpi_socketfilter.c
+++ b/bsd/kern/kpi_socketfilter.c
@@ -33,17 +33,56 @@
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/protosw.h>
+#include <sys/proc.h>
 #include <kern/locks.h>
+#include <kern/thread.h>
+#include <kern/debug.h>
 #include <net/kext_net.h>
 
 #include <libkern/libkern.h>
+#include <libkern/OSAtomic.h>
 
 #include <string.h>
 
+#define	SFEF_ATTACHED		0x1	/* SFE is on socket list */
+#define	SFEF_NODETACH		0x2	/* Detach should not be called */
+#define	SFEF_NOSOCKET		0x4	/* Socket is gone */
+
+struct socket_filter_entry {
+	struct socket_filter_entry	*sfe_next_onsocket;
+	struct socket_filter_entry	*sfe_next_onfilter;
+	struct socket_filter_entry	*sfe_next_oncleanup;
+	
+	struct socket_filter		*sfe_filter;
+	struct socket				*sfe_socket;
+	void						*sfe_cookie;
+	
+	uint32_t					sfe_flags;
+	int32_t						sfe_refcount;
+};
+
+struct socket_filter {
+	TAILQ_ENTRY(socket_filter)	sf_protosw_next;	
+	TAILQ_ENTRY(socket_filter)	sf_global_next;
+	struct socket_filter_entry	*sf_entry_head;
+	
+	struct protosw				*sf_proto;
+	struct sflt_filter			sf_filter;
+	u_int32_t					sf_refcount;
+};
+
+TAILQ_HEAD(socket_filter_list, socket_filter);
+
 static struct socket_filter_list	sock_filter_head;
-static lck_mtx_t					*sock_filter_lock = 0;
+static lck_rw_t						*sock_filter_lock = NULL;
+static lck_mtx_t					*sock_filter_cleanup_lock = NULL;
+static struct socket_filter_entry	*sock_filter_cleanup_entries = NULL;
+static thread_t						sock_filter_cleanup_thread = NULL;
 
-static void	sflt_detach_private(struct socket_filter_entry *entry, int unregistering);
+static void sflt_cleanup_thread(void *, wait_result_t);
+static void sflt_detach_locked(struct socket_filter_entry *entry);
+
+#pragma mark -- Internal State Management --
 
 __private_extern__ void
 sflt_init(void)
@@ -54,70 +93,361 @@ sflt_init(void)
 	
 	TAILQ_INIT(&sock_filter_head);
 	
-	/* Allocate a spin lock */
+	/* Allocate a rw lock */
 	grp_attrib = lck_grp_attr_alloc_init();
 	lck_group = lck_grp_alloc_init("socket filter lock", grp_attrib);
 	lck_grp_attr_free(grp_attrib);
 	lck_attrib = lck_attr_alloc_init();
-	sock_filter_lock = lck_mtx_alloc_init(lck_group, lck_attrib);
+	sock_filter_lock = lck_rw_alloc_init(lck_group, lck_attrib);
+	sock_filter_cleanup_lock = lck_mtx_alloc_init(lck_group, lck_attrib);
 	lck_grp_free(lck_group);
 	lck_attr_free(lck_attrib);
 }
 
-__private_extern__ void
-sflt_initsock(
-	struct socket *so)
+static void
+sflt_retain_locked(
+	struct socket_filter	*filter)
 {
-	struct protosw *proto = so->so_proto;
-	struct socket_filter *filter;
+	filter->sf_refcount++;
+}
+
+static void
+sflt_release_locked(
+	struct socket_filter	*filter)
+{
+	filter->sf_refcount--;
+	if (filter->sf_refcount == 0)
+	{
+		// Call the unregistered function
+		if (filter->sf_filter.sf_unregistered) {
+			lck_rw_unlock_exclusive(sock_filter_lock);
+			filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle);
+			lck_rw_lock_exclusive(sock_filter_lock);
+		}
+		
+		// Free the entry
+		FREE(filter, M_IFADDR);
+	}
+}
+
+static void
+sflt_entry_retain(
+	struct socket_filter_entry *entry)
+{
+	if (OSIncrementAtomic(&entry->sfe_refcount) <= 0)
+		panic("sflt_entry_retain - sfe_refcount <= 0\n");
+}
+
+static void
+sflt_entry_release(
+	struct socket_filter_entry *entry)
+{
+	SInt32 old = OSDecrementAtomic(&entry->sfe_refcount);
+	if (old == 1) {
+		// That was the last reference
+		
+		// Take the cleanup lock
+		lck_mtx_lock(sock_filter_cleanup_lock);
+		
+		// Put this item on the cleanup list
+		entry->sfe_next_oncleanup = sock_filter_cleanup_entries;
+		sock_filter_cleanup_entries = entry;
+		
+		// If the item is the first item in the list
+		if (entry->sfe_next_oncleanup == NULL) {
+			if (sock_filter_cleanup_thread == NULL) {
+				// Create a thread
+				kernel_thread_start(sflt_cleanup_thread, NULL, &sock_filter_cleanup_thread);
+			} else {
+				// Wakeup the thread
+				wakeup(&sock_filter_cleanup_entries);
+			}
+		}
+		
+		// Drop the cleanup lock
+		lck_mtx_unlock(sock_filter_cleanup_lock);
+	}
+	else if (old <= 0)
+	{
+		panic("sflt_entry_release - sfe_refcount (%d) <= 0\n", (int)old);
+	}
+}
+
+static void
+sflt_cleanup_thread(
+	__unused void * blah,
+	__unused wait_result_t blah2)
+{
+	while (1) {
+		lck_mtx_lock(sock_filter_cleanup_lock);
+		while (sock_filter_cleanup_entries == NULL) {
+			// Sleep until we've got something better to do
+			msleep(&sock_filter_cleanup_entries, sock_filter_cleanup_lock, PWAIT, "sflt_cleanup", NULL);
+		}
+		
+		// Pull the current list of dead items
+		struct socket_filter_entry	*dead = sock_filter_cleanup_entries;
+		sock_filter_cleanup_entries = NULL;
+		
+		// Drop the lock
+		lck_mtx_unlock(sock_filter_cleanup_lock);
+		
+		// Take the socket filter lock
+		lck_rw_lock_exclusive(sock_filter_lock);
+		
+		// Cleanup every dead item
+		struct socket_filter_entry	*entry;
+		for (entry = dead; entry; entry = dead) {
+			struct socket_filter_entry	**nextpp;
+			
+			dead = entry->sfe_next_oncleanup;
+			
+			// Call the detach function if necessary - drop the lock
+			if ((entry->sfe_flags & SFEF_NODETACH) == 0 &&
+				entry->sfe_filter->sf_filter.sf_detach) {
+				entry->sfe_flags |= SFEF_NODETACH;
+				lck_rw_unlock_exclusive(sock_filter_lock);
+				
+				// Warning - passing a potentially dead socket may be bad
+				entry->sfe_filter->sf_filter.
+					sf_detach(entry->sfe_cookie, entry->sfe_socket);
+				
+				lck_rw_lock_exclusive(sock_filter_lock);
+			}
+			
+			// Pull entry off the socket list -- if the socket still exists
+			if ((entry->sfe_flags & SFEF_NOSOCKET) == 0) {
+				for (nextpp = &entry->sfe_socket->so_filt; *nextpp;
+					 nextpp = &(*nextpp)->sfe_next_onsocket) {
+					if (*nextpp == entry) {
+						*nextpp = entry->sfe_next_onsocket;
+						break;
+					}
+				}
+			}
+			
+			// Pull entry off the filter list
+			for (nextpp = &entry->sfe_filter->sf_entry_head; *nextpp;
+				 nextpp = &(*nextpp)->sfe_next_onfilter) {
+				if (*nextpp == entry) {
+					*nextpp = entry->sfe_next_onfilter;
+					break;
+				}
+			}
+			
+			// Release the filter -- may drop lock, but that's okay
+			sflt_release_locked(entry->sfe_filter);
+			entry->sfe_socket = NULL;
+			entry->sfe_filter = NULL;
+			FREE(entry, M_IFADDR);
+		}
+		
+		// Drop the socket filter lock
+		lck_rw_unlock_exclusive(sock_filter_lock);
+	}
+	// Not reached
+}
+
+static int
+sflt_attach_locked(
+	struct socket			*so,
+	struct socket_filter	*filter,
+	int						socklocked)
+{
+	int error = 0;
+	struct socket_filter_entry *entry = NULL;
 	
-	if (TAILQ_FIRST(&proto->pr_filter_head) != NULL) {
-		lck_mtx_lock(sock_filter_lock);
-		TAILQ_FOREACH(filter, &proto->pr_filter_head, sf_protosw_next) {
-			sflt_attach_private(so, filter, 0, 0);
+	if (filter == NULL)
+		error = ENOENT;
+	
+	if (error == 0) {
+		/* allocate the socket filter entry */
+		MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR, M_WAITOK);
+		if (entry == NULL) {
+			error = ENOMEM;
+		}
+	}
+	
+	if (error == 0) {
+		/* Initialize the socket filter entry */
+		entry->sfe_cookie = NULL;
+		entry->sfe_flags = SFEF_ATTACHED;
+		entry->sfe_refcount = 1; // corresponds to SFEF_ATTACHED flag set
+		
+		/* Put the entry in the filter list */
+		sflt_retain_locked(filter);
+		entry->sfe_filter = filter;
+		entry->sfe_next_onfilter = filter->sf_entry_head;
+		filter->sf_entry_head = entry;
+		
+		/* Put the entry on the socket filter list */
+		entry->sfe_socket = so;
+		entry->sfe_next_onsocket = so->so_filt;
+		so->so_filt = entry;
+		
+		if (entry->sfe_filter->sf_filter.sf_attach) {
+			// Retain the entry while we call attach
+			sflt_entry_retain(entry);
+			
+			// Release the filter lock -- callers must be aware we will do this
+			lck_rw_unlock_exclusive(sock_filter_lock);
+			
+			// Unlock the socket
+			if (socklocked)
+				socket_unlock(so, 0);
+			
+			// It's finally safe to call the filter function
+			error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so);
+			
+			// Lock the socket again
+			if (socklocked)
+				socket_lock(so, 0);
+			
+			// Lock the filters again
+			lck_rw_lock_exclusive(sock_filter_lock);
+			
+			// If the attach function returns an error, this filter must be detached
+			if (error) {
+				entry->sfe_flags |= SFEF_NODETACH; // don't call sf_detach
+				sflt_detach_locked(entry);
+			}
+			
+			// Release the retain we held through the attach call
+			sflt_entry_release(entry);
 		}
-		lck_mtx_unlock(sock_filter_lock);
 	}
+	
+	return error;
 }
 
-__private_extern__ void
-sflt_termsock(
-	struct socket *so)
+errno_t
+sflt_attach_internal(
+	socket_t	socket,
+	sflt_handle	handle)
 {
-	struct socket_filter_entry *filter;
-	struct socket_filter_entry *filter_next;
+	if (socket == NULL || handle == 0)
+		return EINVAL;
+	
+	int result = EINVAL;
+	
+	lck_rw_lock_exclusive(sock_filter_lock);
+	
+	struct socket_filter *filter = NULL;
+	TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) {
+		if (filter->sf_filter.sf_handle == handle) break;
+	}
+	
+	if (filter) {
+		result = sflt_attach_locked(socket, filter, 1);
+	}
 	
-	for (filter = so->so_filt; filter; filter = filter_next) {
-		filter_next = filter->sfe_next_onsocket;
-		sflt_detach_private(filter, 0);
+	lck_rw_unlock_exclusive(sock_filter_lock);
+	
+	return result;
+}
+
+static void
+sflt_detach_locked(
+	struct socket_filter_entry	*entry)
+{
+	if ((entry->sfe_flags & SFEF_ATTACHED) != 0) {
+		entry->sfe_flags &= ~SFEF_ATTACHED;
+		sflt_entry_release(entry);
 	}
-	so->so_filt = NULL;
 }
 
+#pragma mark -- Socket Layer Hooks --
+
 __private_extern__ void
-sflt_use(
+sflt_initsock(
 	struct socket *so)
 {
-	so->so_filteruse++;
+	struct protosw *proto = so->so_proto;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	if (TAILQ_FIRST(&proto->pr_filter_head) != NULL) {
+		// Promote lock to exclusive
+		if (!lck_rw_lock_shared_to_exclusive(sock_filter_lock))
+			lck_rw_lock_exclusive(sock_filter_lock);
+		
+		// Warning: A filter unregistering will be pulled out of the list.
+		// This could happen while we drop the lock in sftl_attach_locked
+		// or sflt_release_locked. For this reason we retain a reference
+		// on the filter (or next_filter) while calling this function
+		//
+		// This protects us from a panic, but it could result in a
+		// socket being created without all of the global filters if
+		// we're attaching a filter as it is removed, if that's possible.
+		struct socket_filter *filter = TAILQ_FIRST(&proto->pr_filter_head);
+		sflt_retain_locked(filter);
+		
+		while (filter)
+		{
+			struct socket_filter *filter_next;
+			
+			// Warning: sflt_attach_private_locked will drop the lock
+			sflt_attach_locked(so, filter, 0);
+			
+			filter_next = TAILQ_NEXT(filter, sf_protosw_next);
+			if (filter_next)
+				sflt_retain_locked(filter_next);
+			
+			// Warning: filt_release_locked may remove the filter from the queue
+			sflt_release_locked(filter);
+			filter = filter_next;
+		}
+	}
+	lck_rw_done(sock_filter_lock);
 }
 
+/*
+ * sflt_termsock
+ *
+ * Detaches all filters from the socket.
+ */
+
 __private_extern__ void
-sflt_unuse(
+sflt_termsock(
 	struct socket *so)
 {
-	so->so_filteruse--;
-	if (so->so_filteruse == 0) {
-		struct socket_filter_entry *filter;
-		struct socket_filter_entry *next_filter;
-		// search for detaching filters
-		for (filter = so->so_filt; filter; filter = next_filter) {
-			next_filter = filter->sfe_next_onsocket;
+	lck_rw_lock_exclusive(sock_filter_lock);
+	
+	struct socket_filter_entry *entry;
+	
+	while ((entry = so->so_filt) != NULL) {
+		// Pull filter off the socket
+		so->so_filt = entry->sfe_next_onsocket;
+		entry->sfe_flags |= SFEF_NOSOCKET;
+		
+		// Call detach
+		sflt_detach_locked(entry);
+		
+		// On sflt_termsock, we can't return until the detach function has been called
+		// Call the detach function - this is gross because the socket filter
+		// entry could be freed when we drop the lock, so we make copies on
+		// the stack and retain everything we need before dropping the lock
+		if ((entry->sfe_flags & SFEF_NODETACH) == 0 &&
+			entry->sfe_filter->sf_filter.sf_detach) {
+			void					*sfe_cookie = entry->sfe_cookie;
+			struct socket_filter	*sfe_filter = entry->sfe_filter;
 			
-			if (filter->sfe_flags & SFEF_DETACHUSEZERO) {
-				sflt_detach_private(filter, 0);
-			}
+			// Retain the socket filter
+			sflt_retain_locked(sfe_filter);
+			
+			// Mark that we've called the detach function
+			entry->sfe_flags |= SFEF_NODETACH;
+			
+			// Drop the lock around the call to the detach function
+			lck_rw_unlock_exclusive(sock_filter_lock);
+			sfe_filter->sf_filter.sf_detach(sfe_cookie, so);
+			lck_rw_lock_exclusive(sock_filter_lock);
+			
+			// Release the filter
+			sflt_release_locked(sfe_filter);
 		}
 	}
+	
+	lck_rw_unlock_exclusive(sock_filter_lock);
 }
 
 __private_extern__ void
@@ -126,280 +456,595 @@ sflt_notify(
 	sflt_event_t	event,
 	void			*param)
 {
-	struct socket_filter_entry	*filter;
-	int						 	filtered = 0;
+	if (so->so_filt == NULL) return;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
 	
-	for (filter = so->so_filt; filter;
-		 filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_notify) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(so);
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry; entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_notify) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				unlocked = 1;
 				socket_unlock(so, 0);
 			}
-			filter->sfe_filter->sf_filter.sf_notify(
-				filter->sfe_cookie, so, event, param);
+			
+			// Finally call the filter
+			entry->sfe_filter->sf_filter.
+				sf_notify(entry->sfe_cookie, so, event, param);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
 	
-	if (filtered != 0) {
+	if (unlocked != 0) {
 		socket_lock(so, 0);
-		sflt_unuse(so);
 	}
 }
 
 __private_extern__ int
-sflt_data_in(
-	struct socket			*so,
-	const struct sockaddr	*from,
-	mbuf_t					*data,
-	mbuf_t					*control,
-	sflt_data_flag_t		flags,
-	int						*filtered)
+sflt_ioctl(
+	struct socket	*so,
+	u_long			cmd,
+	caddr_t			data)
 {
-	struct socket_filter_entry	*filter;
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
 	int							error = 0;
-	int							filtered_storage;
-	
-	if (filtered == NULL)
-		filtered = &filtered_storage;
-	*filtered = 0;
-	
-	for (filter = so->so_filt; filter && (error == 0);
-		 filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_data_in) {
-			if (*filtered == 0) {
-				*filtered = 1;
-				sflt_use(so);
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_ioctl) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
 				socket_unlock(so, 0);
+				unlocked = 1;
 			}
-			error = filter->sfe_filter->sf_filter.sf_data_in(
-						filter->sfe_cookie, so, from, data, control, flags);
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_ioctl(entry->sfe_cookie, so, cmd, data);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
-	
-	if (*filtered != 0) {
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
 		socket_lock(so, 0);
-		sflt_unuse(so);
 	}
 	
 	return error;
 }
 
-/* sflt_attach_private
- *
- * Assumptions: If filter is not NULL, socket_filter_lock is held.
- */
-
 __private_extern__ int
-sflt_attach_private(
-	struct socket *so,
-	struct socket_filter *filter,
-	sflt_handle			handle,
-	int sock_locked)
+sflt_bind(
+	struct socket			*so,
+	const struct sockaddr	*nam)
 {
-	struct socket_filter_entry *entry = NULL;
-	int didlock = 0;
-	int error = 0;
+	if (so->so_filt == NULL) return 0;
 	
-	if (filter == NULL) {
-		/* Find the filter by the handle */
-		lck_mtx_lock(sock_filter_lock);
-		didlock = 1;
-		
-		TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) {
-			if (filter->sf_filter.sf_handle == handle)
-				break;
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_bind) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_bind(entry->sfe_cookie, so, nam);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
 	
-	if (filter == NULL)
-		error = ENOENT;
+	return error;
+}
+
+__private_extern__ int
+sflt_listen(
+	struct socket			*so)
+{
+	if (so->so_filt == NULL) return 0;
 	
-	if (error == 0) {
-		/* allocate the socket filter entry */
-		MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR, M_WAITOK);
-		if (entry == NULL) {
-			error = ENOMEM;
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_listen) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_listen(entry->sfe_cookie, so);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
 	
-	if (error == 0) {
-		/* Initialize the socket filter entry and call the attach function */
-		entry->sfe_filter = filter;
-		entry->sfe_socket = so;
-		entry->sfe_cookie = NULL;
-		entry->sfe_flags = 0;
-		if (entry->sfe_filter->sf_filter.sf_attach) {
-			filter->sf_usecount++;
-		
-			if (sock_locked)
-				socket_unlock(so, 0);	
-			error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so);
-			if (sock_locked)
-				socket_lock(so, 0);	
-			
-			filter->sf_usecount--;
+	return error;
+}
+
+__private_extern__ int
+sflt_accept(
+	struct socket			*head,
+	struct socket			*so,
+	const struct sockaddr	*local,
+	const struct sockaddr	*remote)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_accept) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
 			
-			/* If the attach function returns an error, this filter is not attached */
-			if (error) {
-				FREE(entry, M_IFADDR);
-				entry = NULL;
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
 			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_accept(entry->sfe_cookie, head, so, local, remote);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
-	
-	if (error == 0) {
-		/* Put the entry in the socket list */
-		entry->sfe_next_onsocket = so->so_filt;
-		so->so_filt = entry;
-		
-		/* Put the entry in the filter list */
-		entry->sfe_next_onfilter = filter->sf_entry_head;
-		filter->sf_entry_head = entry;
-		
-		/* Incremenet the parent filter's usecount */
-		filter->sf_usecount++;
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
 	}
 	
-	if (didlock) {
-		lck_mtx_unlock(sock_filter_lock);
+	return error;
+}
+
+__private_extern__ int
+sflt_getsockname(
+	struct socket			*so,
+	struct sockaddr			**local)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_getsockname) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_getsockname(entry->sfe_cookie, so, local);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
+		}
+	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
 	}
 	
 	return error;
 }
 
+__private_extern__ int
+sflt_getpeername(
+	struct socket			*so,
+	struct sockaddr			**remote)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_getpeername) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_getpeername(entry->sfe_cookie, so, remote);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
+		}
+	}
+	lck_rw_unlock_shared(sock_filter_lock);
 
-/* sflt_detach_private
- *
- * Assumptions: if you pass 0 in for the second parameter, you are holding the
- * socket lock for the socket the entry is attached to. If you pass 1 in for
- * the second parameter, it is assumed that the entry is not on the filter's
- * list and the socket lock is not held.
- */
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
+	
+	return error;
+}
 
-static void
-sflt_detach_private(
-	struct socket_filter_entry *entry,
-	int	unregistering)
+__private_extern__ int
+sflt_connectin(
+	struct socket			*so,
+	const struct sockaddr	*remote)
 {
-	struct socket_filter_entry **next_ptr;
-	int				detached = 0;
-	int				found = 0;
+	if (so->so_filt == NULL) return 0;
 	
-	if (unregistering) {
-		socket_lock(entry->sfe_socket, 0);
-	}
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
 	
-	/*
-	 * Attempt to find the entry on the filter's list and
-	 * remove it. This prevents a filter detaching at the
-	 * same time from attempting to remove the same entry.
-	 */
-	lck_mtx_lock(sock_filter_lock);
-	if (!unregistering) {
-		if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
-			/*
-			 * Another thread is unregistering the filter, we
-			 * need to avoid detaching the filter here so the
-			 * socket won't go away.  Bump up the socket's
-			 * usecount so that it won't be freed until after
-			 * the filter unregistration has been completed;
-			 * at this point the caller has already held the
-			 * socket's lock, so we can directly modify the
-			 * usecount.
-			 */
-			if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
-				entry->sfe_socket->so_usecount++;
-				entry->sfe_flags |= SFEF_DETACHXREF;
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_connect_in) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
 			}
-			lck_mtx_unlock(sock_filter_lock);
-			return;
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_connect_in(entry->sfe_cookie, so, remote);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
-		for (next_ptr = &entry->sfe_filter->sf_entry_head; *next_ptr;
-			 next_ptr = &((*next_ptr)->sfe_next_onfilter)) {
-			if (*next_ptr == entry) {
-				found = 1;
-				*next_ptr = entry->sfe_next_onfilter;
-				break;
+	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
+	
+	return error;
+}
+
+__private_extern__ int
+sflt_connectout(
+	struct socket			*so,
+	const struct sockaddr	*nam)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_connect_out) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
 			}
-		}
-		
-		if (!found && (entry->sfe_flags & SFEF_DETACHUSEZERO) == 0) {
-			lck_mtx_unlock(sock_filter_lock);
-			return;
-		}
-	} else {
-		/*
-		 * Clear the removing flag. We will perform the detach here or
-		 * request a delayed detach.  Since we do an extra ref release
-		 * below, bump up the usecount if we haven't done so.
-		 */
-		entry->sfe_flags &= ~SFEF_UNREGISTERING;
-		if (!(entry->sfe_flags & SFEF_DETACHXREF)) {
-			entry->sfe_socket->so_usecount++;
-			entry->sfe_flags |= SFEF_DETACHXREF;
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_connect_out(entry->sfe_cookie, so, nam);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
 
-	if (entry->sfe_socket->so_filteruse != 0) {
-		entry->sfe_flags |= SFEF_DETACHUSEZERO;
-		lck_mtx_unlock(sock_filter_lock);
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
 	
-		if (unregistering) {
-#if DEBUG
-			printf("sflt_detach_private unregistering SFEF_DETACHUSEZERO "
-				"so%p so_filteruse %u so_usecount %d\n",
-				entry->sfe_socket, entry->sfe_socket->so_filteruse, 
-				entry->sfe_socket->so_usecount);
-#endif
-			socket_unlock(entry->sfe_socket, 0);	
+	return error;
+}
+
+__private_extern__ int
+sflt_setsockopt(
+	struct socket	*so,
+	struct sockopt	*sopt)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_setoption) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_setoption(entry->sfe_cookie, so, sopt);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
-		return;
-	} else {
-		/*
-		 * Check if we are removing the last attached filter and
-		 * the parent filter is being unregistered.
-		 */
-		entry->sfe_filter->sf_usecount--;
-		if ((entry->sfe_filter->sf_usecount == 0) &&
-			(entry->sfe_filter->sf_flags & SFF_DETACHING) != 0)
-			detached = 1;
-	}
-	lck_mtx_unlock(sock_filter_lock);
-		
-	/* Remove from the socket list */
-	for (next_ptr = &entry->sfe_socket->so_filt; *next_ptr;
-		 next_ptr = &((*next_ptr)->sfe_next_onsocket)) {
-		if (*next_ptr == entry) {
-			*next_ptr = entry->sfe_next_onsocket;
-			break;
+	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
+	
+	return error;
+}
+
+__private_extern__ int
+sflt_getsockopt(
+	struct socket	*so,
+	struct sockopt	*sopt)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							error = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_getoption) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_getoption(entry->sfe_cookie, so, sopt);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
 		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
+
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
+	
+	return error;
+}
+
+__private_extern__ int
+sflt_data_out(
+	struct socket			*so,
+	const struct sockaddr	*to,
+	mbuf_t					*data,
+	mbuf_t					*control,
+	sflt_data_flag_t		flags)
+{
+	if (so->so_filt == NULL) return 0;
 	
-	if (entry->sfe_filter->sf_filter.sf_detach)
-		entry->sfe_filter->sf_filter.sf_detach(entry->sfe_cookie, entry->sfe_socket);
+	struct socket_filter_entry	*entry;
+	int						 	unlocked = 0;
+	int							setsendthread = 0;
+	int							error = 0;
 	
-	if (detached && entry->sfe_filter->sf_filter.sf_unregistered) {
-		entry->sfe_filter->sf_filter.sf_unregistered(entry->sfe_filter->sf_filter.sf_handle);
-		FREE(entry->sfe_filter, M_IFADDR);
+	lck_rw_lock_shared(sock_filter_lock);
+	for (entry = so->so_filt; entry && error == 0;
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED)
+			&& entry->sfe_filter->sf_filter.sf_data_out) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				if (so->so_send_filt_thread == NULL) {
+					setsendthread = 1;
+					so->so_send_filt_thread = current_thread();
+				}
+				socket_unlock(so, 0);
+				unlocked = 1;
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.
+				sf_data_out(entry->sfe_cookie, so, to, data, control, flags);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
+		}
 	}
+	lck_rw_unlock_shared(sock_filter_lock);
 
-	if (unregistering) 
-		socket_unlock(entry->sfe_socket, 1);
+	if (unlocked) {
+		socket_lock(so, 0);
+		if (setsendthread) so->so_send_filt_thread = NULL;
+	}
+	
+	return error;
+}
 
-	FREE(entry, M_IFADDR);
+__private_extern__ int
+sflt_data_in(
+	struct socket			*so,
+	const struct sockaddr	*from,
+	mbuf_t					*data,
+	mbuf_t					*control,
+	sflt_data_flag_t		flags)
+{
+	if (so->so_filt == NULL) return 0;
+	
+	struct socket_filter_entry	*entry;
+	int							error = 0;
+	int							unlocked = 0;
+	
+	lck_rw_lock_shared(sock_filter_lock);
+	
+	for (entry = so->so_filt; entry && (error == 0);
+		 entry = entry->sfe_next_onsocket) {
+		if ((entry->sfe_flags & SFEF_ATTACHED) &&
+			entry->sfe_filter->sf_filter.sf_data_in) {
+			// Retain the filter entry and release the socket filter lock
+			sflt_entry_retain(entry);
+			lck_rw_unlock_shared(sock_filter_lock);
+			
+			// If the socket isn't already unlocked, unlock it
+			if (unlocked == 0) {
+				unlocked = 1;
+				socket_unlock(so, 0);
+			}
+			
+			// Call the filter
+			error = entry->sfe_filter->sf_filter.sf_data_in(
+						entry->sfe_cookie, so, from, data, control, flags);
+			
+			// Take the socket filter lock again and release the entry
+			lck_rw_lock_shared(sock_filter_lock);
+			sflt_entry_release(entry);
+		}
+	}
+	lck_rw_unlock_shared(sock_filter_lock);
+	
+	if (unlocked) {
+		socket_lock(so, 0);
+	}
+	
+	return error;
 }
 
+#pragma mark -- KPI --
+
 errno_t
 sflt_attach(
 	socket_t	socket,
 	sflt_handle	handle)
 {
-	if (socket == NULL || handle == 0)
-		return EINVAL;
-	
-	return sflt_attach_private(socket, NULL, handle, 0);
+	socket_lock(socket, 1);
+	errno_t result = sflt_attach_internal(socket, handle);
+	socket_unlock(socket, 1);
+	return result;
 }
 
 errno_t
@@ -407,34 +1052,29 @@ sflt_detach(
 	socket_t	socket,
 	sflt_handle	handle)
 {
-	struct socket_filter_entry	*filter;
+	struct socket_filter_entry	*entry;
 	errno_t	result = 0;
 	
 	if (socket == NULL || handle == 0)
 		return EINVAL;
 	
-	socket_lock(socket, 1);
-	
-	for (filter = socket->so_filt; filter;
-		 filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_handle == handle)
+	lck_rw_lock_exclusive(sock_filter_lock);
+	for (entry = socket->so_filt; entry;
+		 entry = entry->sfe_next_onsocket) {
+		if (entry->sfe_filter->sf_filter.sf_handle == handle &&
+			(entry->sfe_flags & SFEF_ATTACHED) != 0) {
 			break;
+		}
 	}
 	
-	if (filter != NULL) {
-		sflt_detach_private(filter, 0);
+	if (entry != NULL) {
+		sflt_detach_locked(entry);
 	}
-	else {
-		socket->so_filt = NULL;
-		result = ENOENT;
-	}
-	
-	socket_unlock(socket, 1);
+	lck_rw_unlock_exclusive(sock_filter_lock);
 	
 	return result;
 }
 
-
 errno_t
 sflt_register(
 	const struct sflt_filter	*filter,
@@ -481,7 +1121,7 @@ sflt_register(
 	}
 	bcopy(filter, &sock_filt->sf_filter, len);
 
-	lck_mtx_lock(sock_filter_lock);
+	lck_rw_lock_exclusive(sock_filter_lock);
 	/* Look for an existing entry */
 	TAILQ_FOREACH(match, &sock_filter_head, sf_global_next) {
 		if (match->sf_filter.sf_handle ==
@@ -489,7 +1129,7 @@ sflt_register(
 			break;
 		}
 	}
-
+	
 	/* Add the entry only if there was no existing entry */
 	if (match == NULL) {
 		TAILQ_INSERT_TAIL(&sock_filter_head, sock_filt, sf_global_next);
@@ -498,9 +1138,10 @@ sflt_register(
 			    sf_protosw_next);
 			sock_filt->sf_proto = pr;
 		}
+		sflt_retain_locked(sock_filt);
 	}
-	lck_mtx_unlock(sock_filter_lock);
-
+	lck_rw_unlock_exclusive(sock_filter_lock);
+	
 	if (match != NULL) {
 		FREE(sock_filt, M_IFADDR);
 		return EEXIST;
@@ -514,62 +1155,39 @@ sflt_unregister(
 	sflt_handle handle)
 {
 	struct socket_filter *filter;
-	struct socket_filter_entry *entry_head = NULL;
-	struct socket_filter_entry *next_entry = NULL;
+	lck_rw_lock_exclusive(sock_filter_lock);
 	
-	/* Find the entry and remove it from the global and protosw lists */
-	lck_mtx_lock(sock_filter_lock);
+	/* Find the entry by the handle */
 	TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) {
 		if (filter->sf_filter.sf_handle == handle)
 			break;
 	}
 	
 	if (filter) {
+		// Remove it from the global list
 		TAILQ_REMOVE(&sock_filter_head, filter, sf_global_next);
+		
+		// Remove it from the protosw list
 		if ((filter->sf_filter.sf_flags & SFLT_GLOBAL) != 0) {
 			TAILQ_REMOVE(&filter->sf_proto->pr_filter_head, filter, sf_protosw_next);
 		}
-		entry_head = filter->sf_entry_head;
-		filter->sf_entry_head = NULL;
-		filter->sf_flags |= SFF_DETACHING;
-	
-		for (next_entry = entry_head; next_entry;
-		    next_entry = next_entry->sfe_next_onfilter) {
-			/*
-			 * Mark this as "unregistering"; upon dropping the
-			 * lock, another thread may win the race and attempt
-			 * to detach a socket from it (e.g. as part of close)
-			 * before we get a chance to detach.  Setting this
-			 * flag practically tells the other thread to go away.
-			 * If the other thread wins, this causes an extra
-			 * reference hold on the socket so that it won't be
-			 * deallocated until after we finish with the detach
-			 * for it below.  If we win the race, the extra
-			 * reference hold is also taken to compensate for the
-			 * extra reference release when detach is called
-			 * with a "1" for its second parameter.
-			 */
-			next_entry->sfe_flags |= SFEF_UNREGISTERING;
+		
+		// Detach from any sockets
+		struct socket_filter_entry *entry = NULL;
+		
+		for (entry = filter->sf_entry_head; entry; entry = entry->sfe_next_onfilter) {
+			sflt_detach_locked(entry);
 		}
+		
+		// Release the filter
+		sflt_release_locked(filter);
 	}
 	
-	lck_mtx_unlock(sock_filter_lock);
+	lck_rw_unlock_exclusive(sock_filter_lock);
 	
 	if (filter == NULL)
 		return ENOENT;
 	
-	/* We need to detach the filter from any sockets it's attached to */
-	if (entry_head == 0) {
-		if (filter->sf_filter.sf_unregistered)
-			filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle);
-	} else {
-		while (entry_head) {
-			next_entry = entry_head->sfe_next_onfilter;
-			sflt_detach_private(entry_head, 1);
-			entry_head = next_entry;
-		}
-	}
-	
 	return 0;
 }
 
diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c
index a6a0ab766..ab26c40b9 100644
--- a/bsd/kern/mach_loader.c
+++ b/bsd/kern/mach_loader.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -59,6 +59,7 @@
 
 #include <machine/vmparam.h>
 #include <machine/exec.h>
+#include <machine/pal_routines.h>
 
 #include <kern/kern_types.h>
 #include <kern/cpu_number.h>
@@ -79,7 +80,6 @@
 #include <vm/vnode_pager.h>
 #include <vm/vm_protos.h> 
 
-
 /*
  * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE
  * when KERNEL is defined.
@@ -100,10 +100,6 @@ extern kern_return_t    thread_state_initialize(thread_t thread);
 
 /* XXX should have prototypes in a shared header file */
 extern int	get_map_nentries(vm_map_t);
-extern kern_return_t	thread_userstack(thread_t, int, thread_state_t,
-				unsigned int, mach_vm_offset_t *, int *);
-extern kern_return_t	thread_entrypoint(thread_t, int, thread_state_t,
-				unsigned int, mach_vm_offset_t *);
 
 extern kern_return_t	memory_object_signed(memory_object_control_t control,
 					     boolean_t is_signed);
@@ -119,8 +115,11 @@ static load_result_t load_result_null = {
 	.unixproc = 0,
 	.dynlinker = 0,
 	.customstack = 0,
+	.validentry = 0,
 	.csflags = 0,
-	.uuid = { 0 }
+	.uuid = { 0 },
+	.min_vm_addr = MACH_VM_MAX_ADDRESS,
+	.max_vm_addr = MACH_VM_MIN_ADDRESS
 };
 
 /*
@@ -135,6 +134,7 @@ parse_machfile(
 	off_t			file_offset,
 	off_t			macho_size,
 	int			depth,
+	int64_t			slide,
 	load_result_t		*result
 );
 
@@ -147,10 +147,12 @@ load_segment(
 	off_t				macho_size,
 	struct vnode			*vp,
 	vm_map_t			map,
+	int64_t				slide,
 	load_result_t			*result
 );
 
-int load_code_signature(
+static load_return_t
+load_code_signature(
 	struct linkedit_data_command	*lcp,
 	struct vnode			*vp,
 	off_t				macho_offset,
@@ -171,13 +173,7 @@ static load_return_t
 load_unixthread(
 	struct thread_command	*tcp,
 	thread_t			thread,
-	load_result_t			*result
-);
-
-static load_return_t
-load_thread(
-	struct thread_command	*tcp,
-	thread_t			thread,
+	int64_t				slide,
 	load_result_t			*result
 );
 
@@ -193,7 +189,7 @@ load_threadstack(
 	thread_t		thread,
 	uint32_t	*ts,
 	uint32_t	total_size,
-	user_addr_t	*user_stack,
+	mach_vm_offset_t	*user_stack,
 	int				*customstack
 );
 
@@ -212,10 +208,12 @@ load_dylinker(
 	vm_map_t				map,
 	thread_t			thread,
 	int						depth,
-	load_result_t			*result,
-	boolean_t			is_64bit
+	int64_t			slide,
+	load_result_t			*result
 );
 
+struct macho_data;
+
 static load_return_t
 get_macho_vnode(
 	char				*path,
@@ -223,6 +221,7 @@ get_macho_vnode(
 	struct mach_header	*mach_header,
 	off_t			*file_offset,
 	off_t			*macho_size,
+	struct macho_data	*macho_data,
 	struct vnode		**vpp
 );
 
@@ -246,7 +245,7 @@ widen_segment_command(const struct segment_command *scp32,
 static void
 note_all_image_info_section(const struct segment_command_64 *scp,
     boolean_t is64, size_t section_size, const void *sections,
-    load_result_t *result)
+    int64_t slide, load_result_t *result)
 {
 	const union {
 		struct section s32;
@@ -263,6 +262,7 @@ note_all_image_info_section(const struct segment_command_64 *scp,
 		    sizeof(sectionp->s64.sectname))) {
 			result->all_image_info_addr =
 			    is64 ? sectionp->s64.addr : sectionp->s32.addr;
+			result->all_image_info_addr += slide;
 			result->all_image_info_size =
 			    is64 ? sectionp->s64.size : sectionp->s32.size;
 			return;
@@ -270,7 +270,6 @@ note_all_image_info_section(const struct segment_command_64 *scp,
 	}
 }
 
-
 load_return_t
 load_machfile(
 	struct image_params	*imgp,
@@ -293,6 +292,8 @@ load_machfile(
 	boolean_t create_map = FALSE;
 	int spawn = (imgp->ip_flags & IMGPF_SPAWN);
 	task_t task = current_task();
+	mach_vm_offset_t	aslr_offset = 0;
+	kern_return_t 		kret;
 
 	if (new_map == VM_MAP_NULL) {
 		create_map = TRUE;
@@ -312,10 +313,12 @@ load_machfile(
 
 	if (create_map) {
 		pmap = pmap_create((vm_map_size_t) 0, (imgp->ip_flags & IMGPF_IS_64BIT));
+		pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT);
 		map = vm_map_create(pmap,
 				0,
 				vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)),
 				TRUE);
+
 	} else
 		map = new_map;
 
@@ -325,6 +328,20 @@ load_machfile(
 	if ( (header->flags & MH_ALLOW_STACK_EXECUTION) )
 	        vm_map_disable_NX(map);
 #endif
+
+	/* Forcibly disallow execution from data pages on even if the arch
+	 * normally permits it. */
+	if ((header->flags & MH_NO_HEAP_EXECUTION) && !(imgp->ip_flags & IMGPF_ALLOW_DATA_EXEC))
+		vm_map_disallow_data_exec(map);
+	
+	/*
+	 * Compute a random offset for ASLR.
+	 */
+	if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) {
+		aslr_offset = random();
+		aslr_offset %= 1 << ((imgp->ip_flags & IMGPF_IS_64BIT) ? 16 : 8);
+		aslr_offset <<= PAGE_SHIFT;
+	}
 	
 	if (!result)
 		result = &myresult;
@@ -332,7 +349,7 @@ load_machfile(
 	*result = load_result_null;
 
 	lret = parse_machfile(vp, map, thread, header, file_offset, macho_size,
-			      0, result);
+			      0, (int64_t)aslr_offset, result);
 
 	if (lret != LOAD_SUCCESS) {
 		if (create_map) {
@@ -362,7 +379,7 @@ load_machfile(
 
 	 if (create_map) {
 		/*
-		 * If this is an exec, then we are going to destory the old
+		 * If this is an exec, then we are going to destroy the old
 		 * task, and it's correct to halt it; if it's spawn, the
 		 * task is not yet running, and it makes no sense.
 		 */
@@ -376,15 +393,16 @@ load_machfile(
 			 * task halting (wait for threads and then cleanup
 			 * task resources).
 			 */
-			task_start_halt(task);
+			kret = task_start_halt(task);
+			if (kret != KERN_SUCCESS) {
+				return(kret);		
+			}
 			proc_transcommit(current_proc(), 0);
 			task_complete_halt(task);
+			workqueue_exit(current_proc());
 		}
-		old_map = swap_task_map(old_task, thread, map);
+		old_map = swap_task_map(old_task, thread, map, !spawn);
 		vm_map_clear_4GB_pagezero(old_map);
-		/* XXX L4 : For spawn the current task isn't running... */
-		if (!spawn)
-			pmap_switch(pmap);	/* Make sure we are using the new pmap */
 		vm_map_deallocate(old_map);
 	}
 	return(LOAD_SUCCESS);
@@ -397,7 +415,9 @@ load_machfile(
  * bits in the file format itself.  We read into the kernel buffer the
  * commands section, and then parse it in order to parse the mach-o file
  * format load_command segment(s).  We are only interested in a subset of
- * the total set of possible commands.
+ * the total set of possible commands. If "map"==VM_MAP_NULL or
+ * "thread"==THREAD_NULL, do not make permament VM modifications,
+ * just preflight the parse.
  */
 static
 load_return_t
@@ -409,6 +429,7 @@ parse_machfile(
 	off_t			file_offset,
 	off_t			macho_size,
 	int			depth,
+	int64_t			aslr_offset,
 	load_result_t		*result
 )
 {
@@ -428,10 +449,10 @@ parse_machfile(
 	proc_t			p = current_proc();		/* XXXX */
 	int			error;
 	int resid=0;
-	task_t task;
 	size_t			mach_header_sz = sizeof(struct mach_header);
 	boolean_t		abi64;
 	boolean_t		got_code_signatures = FALSE;
+	int64_t			slide = 0;
 
 	if (header->magic == MH_MAGIC_64 ||
 	    header->magic == MH_CIGAM_64) {
@@ -445,8 +466,6 @@ parse_machfile(
 		return(LOAD_FAILURE);
 	}
 
-	task = (task_t)get_threadtask(thread);
-
 	depth++;
 
 	/*
@@ -522,11 +541,30 @@ parse_machfile(
 			kfree(kl_addr, kl_size);
 		return(LOAD_IOERROR);
 	}
-	
+
+	/*
+	 *	For PIE and dyld, slide everything by the ASLR offset.
+	 */
+	if ((header->flags & MH_PIE) || (header->filetype == MH_DYLINKER)) {
+		slide = aslr_offset;
+	}
+
 	/*
 	 *	Scan through the commands, processing each one as necessary.
 	 */
-	for (pass = 1; pass <= 2; pass++) {
+	for (pass = 1; pass <= 3; pass++) {
+
+#if CONFIG_EMBEDDED
+		/*
+		 * Check that the entry point is contained in an executable segments
+		 */ 
+		if ((pass == 3) && (result->validentry == 0)) {
+			thread_state_initialize(thread);
+			ret = LOAD_FAILURE;
+			break;
+		}
+#endif
+
 		/*
 		 * Loop through each of the load_commands indicated by the
 		 * Mach-O header; if an absurd value is provided, we just
@@ -535,6 +573,7 @@ parse_machfile(
 		 */
 		offset = mach_header_sz;
 		ncmds = header->ncmds;
+
 		while (ncmds--) {
 			/*
 			 *	Get a pointer to the command.
@@ -565,7 +604,7 @@ parse_machfile(
 			switch(lcp->cmd) {
 			case LC_SEGMENT:
 			case LC_SEGMENT_64:
-				if (pass != 1)
+				if (pass != 2)
 					break;
 				ret = load_segment(lcp,
 				    		   header->filetype,
@@ -574,25 +613,20 @@ parse_machfile(
 						   macho_size,
 						   vp,
 						   map,
+						   slide,
 						   result);
 				break;
-			case LC_THREAD:
-				if (pass != 2)
-					break;
-				ret = load_thread((struct thread_command *)lcp,
-						   thread,
-						  result);
-				break;
 			case LC_UNIXTHREAD:
-				if (pass != 2)
+				if (pass != 1)
 					break;
 				ret = load_unixthread(
 						 (struct thread_command *) lcp,
-						   thread,
+						 thread,
+						 slide,
 						 result);
 				break;
 			case LC_LOAD_DYLINKER:
-				if (pass != 2)
+				if (pass != 3)
 					break;
 				if ((depth == 1) && (dlp == 0)) {
 					dlp = (struct dylinker_command *)lcp;
@@ -602,14 +636,14 @@ parse_machfile(
 				}
 				break;
 			case LC_UUID:
-				if (pass == 2 && depth == 1) {
+				if (pass == 1 && depth == 1) {
 					uulp = (struct uuid_command *)lcp;
 					memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid));
 				}
 				break;
 			case LC_CODE_SIGNATURE:
 				/* CODE SIGNING */
-				if (pass != 2)
+				if (pass != 1)
 					break;
 				/* pager -> uip ->
 				   load signatures & store in uip
@@ -633,7 +667,7 @@ parse_machfile(
 				break;
 #if CONFIG_CODE_DECRYPTION
 			case LC_ENCRYPTION_INFO:
-				if (pass != 2)
+				if (pass != 3)
 					break;
 				ret = set_code_unprotect(
 					(struct encryption_info_command *) lcp,
@@ -671,24 +705,15 @@ parse_machfile(
 		    }
 	    }
 
-	    if (dlp != 0)
-			ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64);
+	    if (dlp != 0) {
+		    /* load the dylinker, and always slide it by the ASLR
+		     * offset regardless of PIE */
+		    ret = load_dylinker(dlp, dlarchbits, map, thread, depth, aslr_offset, result);
+	    }
 
 	    if(depth == 1) {
 		if (result->thread_count == 0) {
 			ret = LOAD_FAILURE;
-		} else if ( abi64 ) {
-#ifdef __ppc__
-			/* Map in 64-bit commpage */
-			/*
-			 * PPC51: ppc64 is limited to 51-bit addresses.
-			 * Memory above that limit is handled specially
-			 * at the pmap level.
-			 *
-			 * <rdar://6640492> -- wrong task for vfork()/spawn()
-			 */
-			pmap_map_sharedpage(current_task(), get_map_pmap(map));
-#endif /* __ppc__ */
 		}
 	    }
 	}
@@ -780,6 +805,7 @@ load_segment(
 	off_t				macho_size,
 	struct vnode			*vp,
 	vm_map_t			map,
+	int64_t				slide,
 	load_result_t		*result
 )
 {
@@ -795,17 +821,21 @@ load_segment(
 	if (LC_SEGMENT_64 == lcp->cmd) {
 		segment_command_size = sizeof(struct segment_command_64);
 		single_section_size  = sizeof(struct section_64);
-		scp = (struct segment_command_64 *)lcp;
 	} else {
 		segment_command_size = sizeof(struct segment_command);
 		single_section_size  = sizeof(struct section);
-		scp = &segment_command;
-		widen_segment_command((struct segment_command *)lcp, scp);
 	}
 	if (lcp->cmdsize < segment_command_size)
 		return (LOAD_BADMACHO);
 	total_section_size = lcp->cmdsize - segment_command_size;
 
+	if (LC_SEGMENT_64 == lcp->cmd)
+		scp = (struct segment_command_64 *)lcp;
+	else {
+		scp = &segment_command;
+		widen_segment_command((struct segment_command *)lcp, scp);
+	}
+
 	/*
 	 * Make sure what we get from the file is really ours (as specified
 	 * by macho_size).
@@ -833,27 +863,48 @@ load_segment(
 	map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */
 	if (seg_size == 0)
 		return (KERN_SUCCESS);
-	/* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */
 	if (map_addr == 0 &&
 	    map_size == 0 &&
 	    seg_size != 0 &&
-	    scp->cmd == LC_SEGMENT_64 &&
 	    (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE &&
 	    (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) {
 		/*
-		 * This is a "page zero" segment:  it starts at address 0,
-		 * is not mapped from the binary file and is not accessible.
-		 * User-space should never be able to access that memory, so
-		 * make it completely off limits by raising the VM map's
-		 * minimum offset.
+		 * For PIE, extend page zero rather than moving it.  Extending
+		 * page zero keeps early allocations from falling predictably
+		 * between the end of page zero and the beginning of the first
+		 * slid segment.
 		 */
-		ret = vm_map_raise_min_offset(map, seg_size);
-		if (ret != KERN_SUCCESS) {
-			return (LOAD_FAILURE);
+		seg_size += slide;
+		slide = 0;
+
+		/* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */
+		if (scp->cmd == LC_SEGMENT_64) {
+			/*
+			 * This is a "page zero" segment:  it starts at address 0,
+			 * is not mapped from the binary file and is not accessible.
+			 * User-space should never be able to access that memory, so
+			 * make it completely off limits by raising the VM map's
+			 * minimum offset.
+			 */
+			ret = vm_map_raise_min_offset(map, seg_size);
+			if (ret != KERN_SUCCESS) {
+				return (LOAD_FAILURE);
+			}
+			return (LOAD_SUCCESS);
 		}
-		return (LOAD_SUCCESS);
 	}
 
+	/* If a non-zero slide was specified by the caller, apply now */
+	map_addr += slide;
+
+	if (map_addr < result->min_vm_addr)
+		result->min_vm_addr = map_addr;
+	if (map_addr+seg_size > result->max_vm_addr)
+		result->max_vm_addr = map_addr+seg_size;
+
+	if (map == VM_MAP_NULL)
+		return (LOAD_SUCCESS);
+
 	map_offset = pager_offset + scp->fileoff;	/* limited to 32 bits */
 
 	if (map_size > 0) {
@@ -930,77 +981,12 @@ load_segment(
 	    result->all_image_info_addr == MACH_VM_MIN_ADDRESS)
 		note_all_image_info_section(scp,
 		    LC_SEGMENT_64 == lcp->cmd, single_section_size,
-		    (const char *)lcp + segment_command_size, result);
-
-	return ret;
-}
-
-static
-load_return_t
-load_thread(
-	struct thread_command	*tcp,
-	thread_t			thread,
-	load_result_t		*result
-)
-{
-	kern_return_t	kret;
-	load_return_t	lret;
-	task_t			task;
-	int customstack=0;
+		    (const char *)lcp + segment_command_size, slide, result);
 
-	if (tcp->cmdsize < sizeof(*tcp))
-		return (LOAD_BADMACHO);
-	task = get_threadtask(thread);
+	if ((result->entry_point >= map_addr) && (result->entry_point < (map_addr + map_size)))
+		result->validentry = 1;
 
-	/* if count is 0; same as thread */
-	if (result->thread_count != 0) {
-		kret = thread_create(task, &thread);
-		if (kret != KERN_SUCCESS)
-			return(LOAD_RESOURCE);
-		thread_deallocate(thread);
-	}
-
-	lret = load_threadstate(thread,
-		       (uint32_t *)(((vm_offset_t)tcp) + 
-		       		sizeof(struct thread_command)),
-		       tcp->cmdsize - sizeof(struct thread_command));
-	if (lret != LOAD_SUCCESS)
-		return (lret);
-
-	if (result->thread_count == 0) {
-		lret = load_threadstack(thread,
-				(uint32_t *)(((vm_offset_t)tcp) + 
-					sizeof(struct thread_command)),
-				tcp->cmdsize - sizeof(struct thread_command),
-				&result->user_stack,
-				&customstack);
-		if (customstack)
-				result->customstack = 1;
-		else
-				result->customstack = 0;
-			
-		if (lret != LOAD_SUCCESS)
-			return(lret);
-
-		lret = load_threadentry(thread,
-				(uint32_t *)(((vm_offset_t)tcp) + 
-					sizeof(struct thread_command)),
-				tcp->cmdsize - sizeof(struct thread_command),
-				&result->entry_point);
-		if (lret != LOAD_SUCCESS)
-			return(lret);
-	}
-	/*
-	 *	Resume thread now, note that this means that the thread
-	 *	commands should appear after all the load commands to
-	 *	be sure they don't reference anything not yet mapped.
-	 */
-	else
-		thread_resume(thread);
-		
-	result->thread_count++;
-
-	return(LOAD_SUCCESS);
+	return ret;
 }
 
 static
@@ -1008,6 +994,7 @@ load_return_t
 load_unixthread(
 	struct thread_command	*tcp,
 	thread_t		thread,
+	int64_t				slide,
 	load_result_t		*result
 )
 {
@@ -1017,9 +1004,12 @@ load_unixthread(
 	if (tcp->cmdsize < sizeof(*tcp))
 		return (LOAD_BADMACHO);
 	if (result->thread_count != 0) {
-printf("load_unixthread: already have a thread!");
+		printf("load_unixthread: already have a thread!");
 		return (LOAD_FAILURE);
 	}
+
+	if (thread == THREAD_NULL)
+		return (LOAD_SUCCESS);
 	
 	ret = load_threadstack(thread,
 		       (uint32_t *)(((vm_offset_t)tcp) + 
@@ -1031,9 +1021,12 @@ printf("load_unixthread: already have a thread!");
 		return(ret);
 
 	if (customstack)
-			result->customstack = 1;
+		result->customstack = 1;
 	else
-			result->customstack = 0;
+		result->customstack = 0;
+
+	result->user_stack += slide;
+
 	ret = load_threadentry(thread,
 		       (uint32_t *)(((vm_offset_t)tcp) + 
 		       		sizeof(struct thread_command)),
@@ -1042,6 +1035,8 @@ printf("load_unixthread: already have a thread!");
 	if (ret != LOAD_SUCCESS)
 		return(ret);
 
+	result->entry_point += slide;
+
 	ret = load_threadstate(thread,
 		       (uint32_t *)(((vm_offset_t)tcp) + 
 		       		sizeof(struct thread_command)),
@@ -1107,7 +1102,7 @@ load_threadstack(
 	thread_t	thread,
 	uint32_t	*ts,
 	uint32_t	total_size,
-	user_addr_t	*user_stack,
+	mach_vm_offset_t	*user_stack,
 	int *customstack
 )
 {
@@ -1183,31 +1178,40 @@ load_threadentry(
 	return(LOAD_SUCCESS);
 }
 
+struct macho_data {
+	struct nameidata	__nid;
+	union macho_vnode_header {
+		struct mach_header	mach_header;
+		struct fat_header	fat_header;
+		char	__pad[512];
+	} __header;
+};
 
-static
-load_return_t
+static load_return_t
 load_dylinker(
 	struct dylinker_command	*lcp,
 	integer_t		archbits,
 	vm_map_t		map,
 	thread_t	thread,
 	int			depth,
-	load_result_t		*result,
-	boolean_t		is_64bit
+	int64_t			slide,
+	load_result_t		*result
 )
 {
 	char			*name;
 	char			*p;
 	struct vnode		*vp = NULLVP;	/* set by get_macho_vnode() */
-	struct mach_header	header;
+	struct mach_header	*header;
 	off_t			file_offset = 0; /* set by get_macho_vnode() */
 	off_t			macho_size = 0;	/* set by get_macho_vnode() */
-	vm_map_t		copy_map;
-	load_result_t		myresult;
+	load_result_t		*myresult;
 	kern_return_t		ret;
-	vm_map_copy_t	tmp;
-	mach_vm_offset_t	dyl_start, map_addr;
-	mach_vm_size_t		dyl_length;
+	struct macho_data	*macho_data;
+	struct {
+		struct mach_header	__header;
+		load_result_t		__myresult;
+		struct macho_data	__macho_data;
+	} *dyld_data;
 
 	if (lcp->cmdsize < sizeof(*lcp))
 		return (LOAD_BADMACHO);
@@ -1222,11 +1226,19 @@ load_dylinker(
 			return(LOAD_BADMACHO);
 	} while (*p++);
 
-	ret = get_macho_vnode(name, archbits, &header, &file_offset, &macho_size, &vp);
+	/* Allocate wad-of-data from heap to reduce excessively deep stacks */
+
+	MALLOC(dyld_data, void *, sizeof (*dyld_data), M_TEMP, M_WAITOK);
+	header = &dyld_data->__header;
+	myresult = &dyld_data->__myresult;
+	macho_data = &dyld_data->__macho_data;
+
+	ret = get_macho_vnode(name, archbits, header,
+	    &file_offset, &macho_size, macho_data, &vp);
 	if (ret)
-		return (ret);
-			
-	myresult = load_result_null;
+		goto novp_out;
+
+	*myresult = load_result_null;
 
 	/*
 	 *	First try to map dyld in directly.  This should work most of
@@ -1234,106 +1246,85 @@ load_dylinker(
 	 *	mapped to its address.
 	 */
 
-	ret = parse_machfile(vp, map, thread, &header, file_offset, macho_size,
-				depth, &myresult);
+	ret = parse_machfile(vp, map, thread, header, file_offset,
+	    macho_size, depth, slide, myresult);
 
 	/*
 	 *	If it turned out something was in the way, then we'll take
-	 *	take this longer path to map dyld into a temporary map and
-	 *	copy it into destination map at a different address.
+	 *	take this longer path to preflight dyld's vm ranges, then
+	 *	map it at a free location in the address space.
 	 */
 
 	if (ret == LOAD_NOSPACE) {
+		mach_vm_offset_t	dyl_start, map_addr;
+		mach_vm_size_t	dyl_length;
+		int64_t			slide_amount;
+
+		*myresult = load_result_null;
 
 		/*
-		 *	Load the Mach-O.
-		 *	Use a temporary map to do the work.
+		 * Preflight parsing the Mach-O file with a NULL
+		 * map, which will return the ranges needed for a
+		 * subsequent map attempt (with a slide) in "myresult"
 		 */
-		copy_map = vm_map_create(pmap_create(vm_map_round_page(macho_size),
-						     is_64bit),
-					 get_map_min(map), get_map_max(map), TRUE);
-		if (VM_MAP_NULL == copy_map) {
-			ret = LOAD_RESOURCE;
+		ret = parse_machfile(vp, VM_MAP_NULL, THREAD_NULL, header,
+		    file_offset, macho_size, depth, 0 /* slide */, myresult);
+
+		if (ret != LOAD_SUCCESS) {
 			goto out;
 		}
-	
-		myresult = load_result_null;
 
-		ret = parse_machfile(vp, copy_map, thread, &header,
-					file_offset, macho_size,
-					depth, &myresult);
-	
-		if (ret) {
-			vm_map_deallocate(copy_map);
+		dyl_start = myresult->min_vm_addr;
+		dyl_length = myresult->max_vm_addr - myresult->min_vm_addr;
+
+		dyl_length += slide;
+
+		/* To find an appropriate load address, do a quick allocation */
+		map_addr = dyl_start;
+		ret = mach_vm_allocate(map, &map_addr, dyl_length, VM_FLAGS_ANYWHERE);
+		if (ret != KERN_SUCCESS) {
+			ret = LOAD_NOSPACE;
 			goto out;
 		}
-	
-		if (get_map_nentries(copy_map) > 0) {
-	
-			dyl_start = mach_get_vm_start(copy_map);
-			dyl_length = mach_get_vm_end(copy_map) - dyl_start;
-	
-			map_addr = dyl_start;
-			ret = mach_vm_allocate(map, &map_addr, dyl_length, VM_FLAGS_ANYWHERE);
-	
-			if (ret != KERN_SUCCESS) {
-				vm_map_deallocate(copy_map);
-				ret = LOAD_NOSPACE;
-				goto out;
-			
-			}
 
-			ret = vm_map_copyin(copy_map,
-					    (vm_map_address_t)dyl_start,
-					    (vm_map_size_t)dyl_length,
-					    TRUE, &tmp);
-			if (ret != KERN_SUCCESS) {
-				(void) vm_map_remove(map,
-					     vm_map_trunc_page(map_addr),
-					     vm_map_round_page(map_addr + dyl_length),
-					     VM_MAP_NO_FLAGS);
-				vm_map_deallocate(copy_map);
-				goto out;
-			}
-	
-			ret = vm_map_copy_overwrite(map,
-					     (vm_map_address_t)map_addr,
-					     tmp, FALSE);
-			if (ret != KERN_SUCCESS) {
-				vm_map_copy_discard(tmp);
-				(void) vm_map_remove(map,
-					     vm_map_trunc_page(map_addr),
-					     vm_map_round_page(map_addr + dyl_length),
-					     VM_MAP_NO_FLAGS);
-				vm_map_deallocate(copy_map);
-				goto out;
-			}
-	
-			if (map_addr != dyl_start) {
-				myresult.entry_point += (map_addr - dyl_start);
-				myresult.all_image_info_addr +=
-				    (map_addr - dyl_start);
-			}
-		} else {
-			ret = LOAD_FAILURE;
+		ret = mach_vm_deallocate(map, map_addr, dyl_length);
+		if (ret != KERN_SUCCESS) {
+			ret = LOAD_NOSPACE;
+			goto out;
 		}
+		
+		if (map_addr < dyl_start)
+			slide_amount = -(int64_t)(dyl_start - map_addr);
+		else
+			slide_amount = (int64_t)(map_addr - dyl_start);
+
+		slide_amount += slide;
 
-		vm_map_deallocate(copy_map);
+		*myresult = load_result_null;
+
+		ret = parse_machfile(vp, map, thread, header,
+		    file_offset, macho_size, depth, slide_amount, myresult);
+
+		if (ret) {
+			goto out;
+		}
 	}
-	
+
 	if (ret == LOAD_SUCCESS) {		
 		result->dynlinker = TRUE;
-		result->entry_point = myresult.entry_point;
-		result->all_image_info_addr = myresult.all_image_info_addr;
-		result->all_image_info_size = myresult.all_image_info_size;
+		result->entry_point = myresult->entry_point;
+		result->all_image_info_addr = myresult->all_image_info_addr;
+		result->all_image_info_size = myresult->all_image_info_size;
 	}
 out:
 	vnode_put(vp);
+novp_out:
+	FREE(dyld_data, M_TEMP);
 	return (ret);
 
 }
 
-int
+static load_return_t
 load_code_signature(
 	struct linkedit_data_command	*lcp,
 	struct vnode			*vp,
@@ -1408,6 +1399,10 @@ load_code_signature(
 		/* ubc_cs_blob_add() has consumed "addr" */
 		addr = 0;
 	}
+
+#if CHECK_CS_VALIDATION_BITMAP
+	ubc_cs_validation_bitmap_allocate( vp );
+#endif
 		
 	blob = ubc_cs_blob_get(vp, cputype, -1);
 
@@ -1435,9 +1430,9 @@ set_code_unprotect(
 		   struct vnode	*vp)
 {
 	int result, len;
-	char vpath[MAXPATHLEN];
 	pager_crypt_info_t crypt_info;
 	const char * cryptname = 0;
+	char *vpath;
 	
 	size_t offset;
 	struct segment_command_64 *seg64;
@@ -1445,8 +1440,7 @@ set_code_unprotect(
 	vm_map_offset_t map_offset, map_size;
 	kern_return_t kr;
 
-	if (eip->cmdsize < sizeof(*eip))
-		return LOAD_BADMACHO;
+	if (eip->cmdsize < sizeof(*eip)) return LOAD_BADMACHO;
 	
 	switch(eip->cryptid) {
 		case 0:
@@ -1464,13 +1458,22 @@ set_code_unprotect(
 			return LOAD_BADMACHO;
 	}
 	
+	if (map == VM_MAP_NULL) return (LOAD_SUCCESS);
+	if (NULL == text_crypter_create) return LOAD_FAILURE;
+
+	MALLOC_ZONE(vpath, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if(vpath == NULL) return LOAD_FAILURE;
+	
 	len = MAXPATHLEN;
 	result = vn_getpath(vp, vpath, &len);
-	if(result) return result;
+	if(result) {
+		FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
+		return LOAD_FAILURE;
+	}
 	
 	/* set up decrypter first */
-	if(NULL==text_crypter_create) return LOAD_FAILURE;
 	kr=text_crypter_create(&crypt_info, cryptname, (void*)vpath);
+	FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI);
 	
 	if(kr) {
 		printf("set_code_unprotect: unable to create decrypter %s, kr=%d\n",
@@ -1549,6 +1552,7 @@ get_macho_vnode(
 	struct mach_header	*mach_header,
 	off_t			*file_offset,
 	off_t			*macho_size,
+	struct macho_data	*data,
 	struct vnode		**vpp
 )
 {
@@ -1556,19 +1560,14 @@ get_macho_vnode(
 	vfs_context_t		ctx = vfs_context_current();
 	proc_t			p = vfs_context_proc(ctx);
 	kauth_cred_t		kerncred;
-	struct nameidata nid, *ndp;
+	struct nameidata	*ndp = &data->__nid;
 	boolean_t		is_fat;
 	struct fat_arch		fat_arch;
-	int			error = LOAD_SUCCESS;
+	int			error;
 	int resid;
-	union {
-		struct mach_header	mach_header;
-		struct fat_header	fat_header;
-		char	pad[512];
-	} header;
+	union macho_vnode_header *header = &data->__header;
 	off_t fsize = (off_t)0;
-	int err2;
-	
+
 	/*
 	 * Capture the kernel credential for use in the actual read of the
 	 * file, since the user doing the execution may have execute rights
@@ -1579,10 +1578,8 @@ get_macho_vnode(
 	 */
 	kerncred = vfs_context_ucred(vfs_context_kernel());
 
-	ndp = &nid;
-	
 	/* init the namei data to point the file user's program name */
-	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
+	NDINIT(ndp, LOOKUP, OP_OPEN, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 
 	if ((error = namei(ndp)) != 0) {
 		if (error == ENOENT) {
@@ -1594,7 +1591,7 @@ get_macho_vnode(
 	}
 	nameidone(ndp);
 	vp = ndp->ni_vp;
-	
+
 	/* check for regular file */
 	if (vp->v_type != VREG) {
 		error = LOAD_PROTECT;
@@ -1625,41 +1622,42 @@ get_macho_vnode(
 		goto bad1;
 	}
 
-	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0,
+	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)header, sizeof (*header), 0,
 	    UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p)) != 0) {
 		error = LOAD_IOERROR;
 		goto bad2;
 	}
-	
-	if (header.mach_header.magic == MH_MAGIC ||
-	    header.mach_header.magic == MH_MAGIC_64)
-	    is_fat = FALSE;
-	else if (header.fat_header.magic == FAT_MAGIC ||
-		 header.fat_header.magic == FAT_CIGAM)
-	    is_fat = TRUE;
-	else {
-	    error = LOAD_BADMACHO;
-	    goto bad2;
+
+	if (header->mach_header.magic == MH_MAGIC ||
+	    header->mach_header.magic == MH_MAGIC_64) {
+		is_fat = FALSE;
+	} else if (header->fat_header.magic == FAT_MAGIC ||
+	    header->fat_header.magic == FAT_CIGAM) {
+		is_fat = TRUE;
+	} else {
+		error = LOAD_BADMACHO;
+		goto bad2;
 	}
 
 	if (is_fat) {
 		/* Look up our architecture in the fat file. */
-		error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch);
+		error = fatfile_getarch_with_bits(vp, archbits,
+		    (vm_offset_t)(&header->fat_header), &fat_arch);
 		if (error != LOAD_SUCCESS)
 			goto bad2;
 
 		/* Read the Mach-O header out of it */
-		error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header,
-				sizeof(header.mach_header), fat_arch.offset,
-				UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p);
+		error = vn_rdwr(UIO_READ, vp, (caddr_t)&header->mach_header,
+		    sizeof (header->mach_header), fat_arch.offset,
+		    UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p);
 		if (error) {
 			error = LOAD_IOERROR;
 			goto bad2;
 		}
 
 		/* Is this really a Mach-O? */
-		if (header.mach_header.magic != MH_MAGIC &&
-		    header.mach_header.magic != MH_MAGIC_64) {
+		if (header->mach_header.magic != MH_MAGIC &&
+		    header->mach_header.magic != MH_MAGIC_64) {
 			error = LOAD_BADMACHO;
 			goto bad2;
 		}
@@ -1677,25 +1675,23 @@ get_macho_vnode(
 		 * required, since the dynamic linker might work, but we will
 		 * refuse to load it because of this check.
 		 */
-		if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits)
-			return(LOAD_BADARCH);
+		if ((cpu_type_t)(header->mach_header.cputype & CPU_ARCH_MASK) != archbits) {
+			error = LOAD_BADARCH;
+			goto bad2;
+		}
 
 		*file_offset = 0;
 		*macho_size = fsize;
 	}
 
-	*mach_header = header.mach_header;
+	*mach_header = header->mach_header;
 	*vpp = vp;
 
 	ubc_setsize(vp, fsize);
-	
 	return (error);
 
 bad2:
-	err2 = VNOP_CLOSE(vp, FREAD, ctx);
-	vnode_put(vp);
-	return (error);
-
+	(void) VNOP_CLOSE(vp, FREAD, ctx);
 bad1:
 	vnode_put(vp);
 	return(error);
diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h
index 413d1a9a7..fd8e585db 100644
--- a/bsd/kern/mach_loader.h
+++ b/bsd/kern/mach_loader.h
@@ -60,9 +60,12 @@ typedef struct _load_result {
 		/* boolean_t */	unixproc	:1,
 				dynlinker	:1,
 				customstack	:1,
+				validentry	:1,
 						:0;
 	unsigned int		csflags;
 	unsigned char	uuid[16];	
+	mach_vm_address_t	min_vm_addr;
+	mach_vm_address_t	max_vm_addr;
 } load_result_t;
 
 struct image_params;
diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c
index 0df8a49c8..9aba89b96 100644
--- a/bsd/kern/mach_process.c
+++ b/bsd/kern/mach_process.c
@@ -94,6 +94,8 @@
 
 #include <vm/vm_protos.h>		/* cs_allow_invalid() */
 
+#include <pexpert/pexpert.h>
+
 /* XXX ken/bsd_kern.c - prototype should be in common header */
 int get_task_userstop(task_t);
 
@@ -127,6 +129,10 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
 	AUDIT_ARG(value32, uap->data);
 
 	if (uap->req == PT_DENY_ATTACH) {
+#if (DEVELOPMENT || DEBUG) && defined(__arm__)
+		if (PE_i_can_has_debugger(NULL))
+			return(0);
+#endif
 		proc_lock(p);
 		if (ISSET(p->p_lflag, P_LTRACED)) {
 			proc_unlock(p);
@@ -164,8 +170,10 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval)
 		struct proc *pproc=proc_find(p->p_oppid);
 		proc_unlock(p);
 		cs_allow_invalid(p);
-		cs_allow_invalid(pproc);
-		proc_rele(pproc);
+		if(pproc) {
+			cs_allow_invalid(pproc);
+			proc_rele(pproc);
+		}
 		return(0);
 	}
 	if (uap->req == PT_SIGEXC) {
@@ -434,7 +442,7 @@ cantrace(proc_t cur_procp, kauth_cred_t creds, proc_t traced_procp, int *errp)
 	 *	(3) it's not owned by you, or is set-id on exec
 	 *	    (unless you're root).
 	 */
-	if ((creds->cr_ruid != proc_ucred(traced_procp)->cr_ruid ||
+	if ((kauth_cred_getruid(creds) != kauth_cred_getruid(proc_ucred(traced_procp)) ||
 		ISSET(traced_procp->p_flag, P_SUGID)) &&
 		(my_err = suser(creds, &cur_procp->p_acflag)) != 0) {
 		*errp = my_err;
diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh
index d8b11ba6c..301905871 100755
--- a/bsd/kern/makesyscalls.sh
+++ b/bsd/kern/makesyscalls.sh
@@ -190,6 +190,11 @@ s/\$//g
 		printf "#include <mach/shared_region.h>\n" > sysarg
 		printf "\n#ifdef KERNEL\n" > sysarg
 		printf "#ifdef __APPLE_API_PRIVATE\n" > sysarg
+		printf "/*\n" > sysarg
+		printf " * The kernel may support multiple userspace ABIs, and must use\n" > sysarg
+		printf " * argument structures with elements large enough for any of them.\n" > sysarg
+		printf "*/\n" > sysarg
+		printf "\n" > sysarg
 		printf "#ifndef __arm__\n" > sysarg
 		printf "#define\tPAD_(t)\t(sizeof(uint64_t) <= sizeof(t) \\\n " > sysarg
 		printf "\t\t? 0 : sizeof(uint64_t) - sizeof(t))\n" > sysarg
@@ -205,8 +210,6 @@ s/\$//g
 		printf "#define\tPADR_(t)\t0\n" > sysarg
 		printf "#endif\n" > sysarg
 		printf "\n__BEGIN_DECLS\n" > sysarg
-		printf "#ifndef __MUNGE_ONCE\n" > sysarg
-		printf "#define __MUNGE_ONCE\n" > sysarg
 		printf "#ifndef __arm__\n" > sysarg
 		printf "void munge_w(const void *, void *);  \n" > sysarg
 		printf "void munge_ww(const void *, void *);  \n" > sysarg
@@ -218,6 +221,10 @@ s/\$//g
 		printf "void munge_wwwwwwww(const void *, void *);  \n" > sysarg
 		printf "void munge_wl(const void *, void *);  \n" > sysarg
 		printf "void munge_wlw(const void *, void *);  \n" > sysarg
+		printf "void munge_wlwwwll(const void *, void *);  \n" > sysarg
+		printf "void munge_wlwwwllw(const void *, void *);  \n" > sysarg
+		printf "void munge_wlwwlwlw(const void *, void *);  \n" > sysarg
+		printf "void munge_wllwwll(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwl(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwlw(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwlww(const void *, void *);  \n" > sysarg
@@ -225,13 +232,18 @@ s/\$//g
 		printf "void munge_wwwwlw(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwwl(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwwwl(const void *, void *);  \n" > sysarg
+		printf "void munge_wwwwwlww(const void *, void *);  \n" > sysarg
+		printf "void munge_wwwwwllw(const void *, void *);  \n" > sysarg
+		printf "void munge_wwwwwlll(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwwwwll(const void *, void *);  \n" > sysarg
+		printf "void munge_wwwwwwl(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwwwwlw(const void *, void *);  \n" > sysarg
 		printf "void munge_wsw(const void *, void *);  \n" > sysarg
 		printf "void munge_wws(const void *, void *);  \n" > sysarg
 		printf "void munge_wwwsw(const void *, void *);  \n" > sysarg
 		printf "void munge_llllll(const void *, void *); \n" > sysarg
 		printf "#else \n" > sysarg
+		printf "/* ARM does not need mungers for BSD system calls */\n" > sysarg
 		printf "#define munge_w  NULL \n" > sysarg
 		printf "#define munge_ww  NULL \n" > sysarg
 		printf "#define munge_www  NULL \n" > sysarg
@@ -242,6 +254,10 @@ s/\$//g
 		printf "#define munge_wwwwwwww  NULL \n" > sysarg
 		printf "#define munge_wl  NULL \n" > sysarg
 		printf "#define munge_wlw  NULL \n" > sysarg
+		printf "#define munge_wlwwwll  NULL \n" > sysarg
+		printf "#define munge_wlwwwllw  NULL \n" > sysarg
+		printf "#define munge_wlwwlwlw  NULL \n" > sysarg
+		printf "#define munge_wllwwll  NULL \n" > sysarg
 		printf "#define munge_wwwl  NULL \n" > sysarg
 		printf "#define munge_wwwlw  NULL \n" > sysarg
 		printf "#define munge_wwwlww  NULL\n" > sysarg
@@ -249,22 +265,18 @@ s/\$//g
 		printf "#define munge_wwwwl  NULL \n" > sysarg
 		printf "#define munge_wwwwlw  NULL \n" > sysarg
 		printf "#define munge_wwwwwl  NULL \n" > sysarg
+		printf "#define munge_wwwwwlww  NULL \n" > sysarg
+		printf "#define munge_wwwwwllw  NULL \n" > sysarg
+		printf "#define munge_wwwwwlll  NULL \n" > sysarg
+		printf "#define munge_wwwwwwl  NULL \n" > sysarg
 		printf "#define munge_wwwwwwlw  NULL \n" > sysarg
 		printf "#define munge_wsw  NULL \n" > sysarg
 		printf "#define munge_wws  NULL \n" > sysarg
 		printf "#define munge_wwwsw  NULL \n" > sysarg
 		printf "#define munge_llllll  NULL \n" > sysarg
-		printf "#endif // ! __arm__\n" > sysarg
-		printf "#ifdef __ppc__\n" > sysarg
-		printf "void munge_d(const void *, void *);  \n" > sysarg
-		printf "void munge_dd(const void *, void *);  \n" > sysarg
-		printf "void munge_ddd(const void *, void *);  \n" > sysarg
-		printf "void munge_dddd(const void *, void *);  \n" > sysarg
-		printf "void munge_ddddd(const void *, void *);  \n" > sysarg
-		printf "void munge_dddddd(const void *, void *);  \n" > sysarg
-		printf "void munge_ddddddd(const void *, void *);  \n" > sysarg
-		printf "void munge_dddddddd(const void *, void *);  \n" > sysarg
-		printf "#else \n" > sysarg
+		printf "#endif /* __arm__ */\n" > sysarg
+		printf "\n" > sysarg
+		printf "/* Active 64-bit user ABIs do not need munging */\n" > sysarg
 		printf "#define munge_d  NULL \n" > sysarg
 		printf "#define munge_dd  NULL \n" > sysarg
 		printf "#define munge_ddd  NULL \n" > sysarg
@@ -273,8 +285,6 @@ s/\$//g
 		printf "#define munge_dddddd  NULL \n" > sysarg
 		printf "#define munge_ddddddd  NULL \n" > sysarg
 		printf "#define munge_dddddddd  NULL \n" > sysarg
-		printf "#endif // __ppc__\n" > sysarg
-		printf "#endif /* !__MUNGE_ONCE */\n" > sysarg
 		
 		printf "\n" > sysarg
 
@@ -592,7 +602,7 @@ s/\$//g
 							 argtype[i] == "socklen_t" || argtype[i] == "uint32_t" || argtype[i] == "int32_t" ||
 							 argtype[i] == "sigset_t" || argtype[i] == "gid_t" || argtype[i] == "unsigned int" ||
 							 argtype[i] == "mode_t" || argtype[i] == "key_t" ||
-							 argtype[i] == "mach_port_name_t") {
+							 argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t") {
 						munge32 = munge32 "w"
 						munge64 = munge64 "d"
 						size32 += 4
diff --git a/bsd/kern/mcache.c b/bsd/kern/mcache.c
index 14416f34a..a0c6cfb69 100644
--- a/bsd/kern/mcache.c
+++ b/bsd/kern/mcache.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -98,9 +98,6 @@
 #define	MCACHE_UNLOCK(l)	lck_mtx_unlock(l)
 #define	MCACHE_LOCK_TRY(l)	lck_mtx_try_lock(l)
 
-/* This should be in a header file */
-#define	atomic_add_32(a, n)	((void) OSAddAtomic(n, a))
-
 static int ncpu;
 static lck_mtx_t *mcache_llock;
 static struct thread *mcache_llock_owner;
@@ -137,8 +134,8 @@ static mcache_bkttype_t mcache_bkttype[] = {
 };
 
 static mcache_t *mcache_create_common(const char *, size_t, size_t,
-    mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_notifyfn_t,
-    void *, u_int32_t, int, int);
+    mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
+    mcache_notifyfn_t, void *, u_int32_t, int, int);
 static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***,
     unsigned int, int);
 static void mcache_slab_free(void *, mcache_obj_t *, boolean_t);
@@ -192,6 +189,7 @@ mcache_init(void)
 	    PAGE_SIZE, "mcache");
 	if (mcache_zone == NULL)
 		panic("mcache_init: failed to allocate mcache zone\n");
+	zone_change(mcache_zone, Z_CALLERACCT, FALSE);
 
 	LIST_INIT(&mcache_head);
 
@@ -233,7 +231,8 @@ mcache_create(const char *name, size_t bufsize, size_t align,
     u_int32_t flags, int wait)
 {
 	return (mcache_create_common(name, bufsize, align, mcache_slab_alloc,
-	    mcache_slab_free, mcache_slab_audit, NULL, NULL, flags, 1, wait));
+	    mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1,
+	    wait));
 }
 
 /*
@@ -244,10 +243,11 @@ mcache_create(const char *name, size_t bufsize, size_t align,
 __private_extern__ mcache_t *
 mcache_create_ext(const char *name, size_t bufsize,
     mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
-    mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int wait)
+    mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
+    u_int32_t flags, int wait)
 {
 	return (mcache_create_common(name, bufsize, 0, allocfn,
-	    freefn, auditfn, notifyfn, arg, flags, 0, wait));
+	    freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait));
 }
 
 /*
@@ -256,8 +256,8 @@ mcache_create_ext(const char *name, size_t bufsize,
 static mcache_t *
 mcache_create_common(const char *name, size_t bufsize, size_t align,
     mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn,
-    mcache_notifyfn_t notifyfn, void *arg, u_int32_t flags, int need_zone,
-    int wait)
+    mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg,
+    u_int32_t flags, int need_zone, int wait)
 {
 	mcache_bkttype_t *btp;
 	mcache_t *cp = NULL;
@@ -267,7 +267,7 @@ mcache_create_common(const char *name, size_t bufsize, size_t align,
 	char lck_name[64];
 
 	/* If auditing is on and print buffer is NULL, allocate it now */
-	if ((flags & MCF_AUDIT) && mca_dump_buf == NULL) {
+	if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) {
 		int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK;
 		MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP,
 		    malloc_wait | M_ZERO);
@@ -313,6 +313,7 @@ mcache_create_common(const char *name, size_t bufsize, size_t align,
 	cp->mc_slab_alloc = allocfn;
 	cp->mc_slab_free = freefn;
 	cp->mc_slab_audit = auditfn;
+	cp->mc_slab_log = logfn;
 	cp->mc_slab_notify = notifyfn;
 	cp->mc_private = need_zone ? cp : arg;
 	cp->mc_bufsize = bufsize;
@@ -467,6 +468,11 @@ retry_alloc:
 			/* If we got them all, return to caller */
 			if ((need -= objs) == 0) {
 				MCACHE_UNLOCK(&ccp->cc_lock);
+
+				if (!(cp->mc_flags & MCF_NOLEAKLOG) &&
+				    cp->mc_slab_log != NULL)
+					(*cp->mc_slab_log)(num, *top, TRUE);
+
 				if (cp->mc_flags & MCF_DEBUG)
 					goto debug_alloc;
 
@@ -534,11 +540,14 @@ retry_alloc:
 		}
 	}
 
+	if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
+		(*cp->mc_slab_log)((num - need), *top, TRUE);
+
 	if (!(cp->mc_flags & MCF_DEBUG))
 		return (num - need);
 
 debug_alloc:
-	if (cp->mc_flags & MCF_VERIFY) {
+	if (cp->mc_flags & MCF_DEBUG) {
 		mcache_obj_t **o = top;
 		unsigned int n;
 
@@ -561,7 +570,7 @@ debug_alloc:
 	}
 
 	/* Invoke the slab layer audit callback if auditing is enabled */
-	if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL)
+	if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
 		(*cp->mc_slab_audit)(cp->mc_private, *top, TRUE);
 
 	return (num - need);
@@ -678,8 +687,11 @@ mcache_free_ext(mcache_t *cp, mcache_obj_t *list)
 	mcache_obj_t *nlist;
 	mcache_bkt_t *bkt;
 
+	if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL)
+		(*cp->mc_slab_log)(0, list, FALSE);
+
 	/* Invoke the slab layer audit callback if auditing is enabled */
-	if ((cp->mc_flags & MCF_AUDIT) && cp->mc_slab_audit != NULL)
+	if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL)
 		(*cp->mc_slab_audit)(cp->mc_private, list, FALSE);
 
 	MCACHE_LOCK(&ccp->cc_lock);
@@ -899,7 +911,7 @@ mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
 		 * the nearest 64-bit multiply; this is because we use
 		 * 64-bit memory access to set/check the pattern.
 		 */
-		if (flags & MCF_AUDIT) {
+		if (flags & MCF_DEBUG) {
 			VERIFY(((intptr_t)base + rsize) <=
 			    ((intptr_t)buf + cp->mc_chunksize));
 			mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize);
@@ -958,7 +970,7 @@ mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged)
 		/* Get the original address since we're about to free it */
 		pbuf = (void **)((intptr_t)base - sizeof (void *));
 
-		if (flags & MCF_AUDIT) {
+		if (flags & MCF_DEBUG) {
 			VERIFY(((intptr_t)base + rsize) <=
 			    ((intptr_t)*pbuf + cp->mc_chunksize));
 			mcache_audit_free_verify(NULL, base, offset, rsize);
@@ -1156,7 +1168,7 @@ mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt,
 	if (nobjs > 0) {
 		mcache_obj_t *top = bkt->bkt_obj[nobjs - 1];
 
-		if (cp->mc_flags & MCF_VERIFY) {
+		if (cp->mc_flags & MCF_DEBUG) {
 			mcache_obj_t *o = top;
 			int cnt = 0;
 
diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c
index 6c4b5437e..664f03ef7 100644
--- a/bsd/kern/netboot.c
+++ b/bsd/kern/netboot.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2001-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -57,7 +57,8 @@
 
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
-
+#include <sys/netboot.h>
+#include <sys/imageboot.h>
 #include <pexpert/pexpert.h>
 
 //#include <libkern/libkern.h>
@@ -81,10 +82,6 @@ const void *
 IOBSDRegistryEntryGetData(void * entry, const char * property_name, 
 			  int * packet_length);
 
-extern int vndevice_root_image(const char * path, char devname[], 
-			       dev_t * dev_p);
-extern int di_root_image(const char *path, char devname[], dev_t *dev_p);
-
 #define BOOTP_RESPONSE	"bootp-response"
 #define BSDP_RESPONSE	"bsdp-response"
 #define DHCP_RESPONSE	"dhcp-response"
@@ -92,16 +89,6 @@ extern int di_root_image(const char *path, char devname[], dev_t *dev_p);
 /* forward declarations */
 int	inet_aton(char * cp, struct in_addr * pin);
 
-boolean_t	netboot_iaddr(struct in_addr * iaddr_p);
-boolean_t	netboot_rootpath(struct in_addr * server_ip,
-				 char * name, int name_len, 
-				 char * path, int path_len);
-int	netboot_setup(void);
-int	netboot_mountroot(void);
-int	netboot_root(void);
-
-
-
 #define IP_FORMAT	"%d.%d.%d.%d"
 #define IP_CH(ip)	((u_char *)ip)
 #define IP_LIST(ip)	IP_CH(ip)[0],IP_CH(ip)[1],IP_CH(ip)[2],IP_CH(ip)[3]
@@ -125,29 +112,10 @@ struct netboot_info {
     char *		image_path;
     int			image_path_length;
     NetBootImageType	image_type;
-    boolean_t		use_hdix;
+    char *		second_image_path;
+    int			second_image_path_length;
 };
 
-int
-inet_aton(char * cp, struct in_addr * pin)
-{
-    u_char * b = (u_char *)pin;
-    int	   i;
-    char * p;
-
-    for (p = cp, i = 0; i < 4; i++) {
-	u_long l = strtoul(p, 0, 0);
-	if (l > 255)
-	    return (FALSE);
-	b[i] = l;
-	p = strchr(p, '.');
-	if (i < 3 && p == NULL)
-	    return (FALSE);
-	p++;
-    }
-    return (TRUE);
-}
-
 /*
  * Function: parse_booter_path
  * Purpose:
@@ -251,7 +219,7 @@ static __inline__ boolean_t
 parse_netboot_path(char * path, struct in_addr * iaddr_p, char const * * host,
 		   char * * mount_dir, char * * image_path)
 {
-	static char	tmp[MAX_IPv4_STR_LEN];	/* Danger - not thread safe */
+    static char	tmp[MAX_IPv4_STR_LEN];	/* Danger - not thread safe */
     char *	start;
     char *	colon;
 
@@ -346,35 +314,46 @@ get_root_path(char * root_path)
 
 }
 
+static void
+save_path(char * * str_p, int * length_p, char * path)
+{
+    *length_p = strlen(path) + 1;
+    *str_p = (char *)kalloc(*length_p);
+    strlcpy(*str_p, path, *length_p);
+    return;
+}
+
 static struct netboot_info *
 netboot_info_init(struct in_addr iaddr)
 {
-    struct netboot_info *	info;
+    boolean_t			have_root_path = FALSE;
+    struct netboot_info *	info = NULL;
     char * 			root_path = NULL;
-    boolean_t			use_hdix = TRUE;
-    char *			vndevice = NULL;
-
-    MALLOC_ZONE(vndevice, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
-    if (vndevice == NULL)
-    	panic("netboot_info_init: M_NAMEI zone exhausted");
-    if (PE_parse_boot_argn("vndevice", vndevice, MAXPATHLEN) == TRUE) {
-	use_hdix = FALSE;
-    }
-    FREE_ZONE(vndevice, MAXPATHLEN, M_NAMEI);
 
     info = (struct netboot_info *)kalloc(sizeof(*info));
     bzero(info, sizeof(*info));
     info->client_ip = iaddr;
     info->image_type = kNetBootImageTypeUnknown;
-    info->use_hdix = use_hdix;
 
     /* check for a booter-specified path then a NetBoot path */
     MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
     if (root_path  == NULL)
     	panic("netboot_info_init: M_NAMEI zone exhausted");
-    if (PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == TRUE
-	|| PE_parse_boot_argn("rootpath", root_path, MAXPATHLEN) == TRUE
-	|| get_root_path(root_path) == TRUE) {
+    if (PE_parse_boot_argn("rp0", root_path, MAXPATHLEN) == TRUE
+	|| PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == TRUE
+	|| PE_parse_boot_argn("rootpath", root_path, MAXPATHLEN) == TRUE) {
+	if (imageboot_format_is_valid(root_path)) {
+	    printf("netboot_info_init: rp0='%s' isn't a network path,"
+		   " ignoring\n", root_path);
+	}
+	else {
+	    have_root_path = TRUE;
+	}
+    }
+    if (have_root_path == FALSE) {
+	have_root_path = get_root_path(root_path);
+    }
+    if (have_root_path) {
 	const char * server_name = NULL;
 	char * mount_point = NULL;
 	char * image_path = NULL;
@@ -391,11 +370,11 @@ netboot_info_init(struct in_addr iaddr)
 	    strlcpy(info->server_name, server_name, info->server_name_length);
 	    strlcpy(info->mount_point, mount_point, info->mount_point_length);
 	    
-	    printf("Server %s Mount %s", 
+	    printf("netboot: NFS Server %s Mount %s", 
 		   server_name, info->mount_point);
 	    if (image_path != NULL) {
 		boolean_t 	needs_slash = FALSE;
-
+		
 		info->image_path_length = strlen(image_path) + 1;
 		if (image_path[0] != '/') {
 		    needs_slash = TRUE;
@@ -416,16 +395,27 @@ netboot_info_init(struct in_addr iaddr)
 	}
 	else if (strncmp(root_path, kNetBootRootPathPrefixHTTP, 
 			 strlen(kNetBootRootPathPrefixHTTP)) == 0) {
-	    /* only HDIX supports HTTP */
 	    info->image_type = kNetBootImageTypeHTTP;
-	    info->use_hdix = TRUE;
-	    info->image_path_length = strlen(root_path) + 1;
-	    info->image_path = (char *)kalloc(info->image_path_length);
-	    strlcpy(info->image_path, root_path, info->image_path_length);
+	    save_path(&info->image_path, &info->image_path_length,
+		      root_path);
+	    printf("netboot: HTTP URL %s\n",  info->image_path);
 	}	    
 	else {
 	    printf("netboot: root path uses unrecognized format\n");
 	}
+
+	/* check for image-within-image */
+	if (info->image_path != NULL) {
+		if (PE_parse_boot_argn(IMAGEBOOT_ROOT_ARG, root_path, MAXPATHLEN)
+			|| PE_parse_boot_argn("rp1", root_path, MAXPATHLEN)) {
+			/* rp1/root-dmg is the second-level image */
+			save_path(&info->second_image_path, &info->second_image_path_length, 
+					root_path);
+		}
+	}
+	if (info->second_image_path != NULL) {
+		printf("netboot: nested image %s\n", info->second_image_path);
+	}
     }
     FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);
     return (info);
@@ -446,6 +436,9 @@ netboot_info_free(struct netboot_info * * info_p)
 	if (info->image_path) {
 	    kfree(info->image_path, info->image_path_length);
 	}
+	if (info->second_image_path) {
+	    kfree(info->second_image_path, info->second_image_path_length);
+	}
 	kfree(info, sizeof(*info));
     }
     *info_p = NULL;
@@ -565,13 +558,10 @@ route_cmd(int cmd, struct in_addr d, struct in_addr g,
     mask.sin_len = sizeof(mask);
     mask.sin_family = AF_INET;
     mask.sin_addr = m;
-    lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
-    lck_mtx_lock(rnh_lock);
-    error = rtrequest_scoped_locked(cmd, (struct sockaddr *)&dst,
-				    (struct sockaddr *)&gw,
-				    (struct sockaddr *)&mask,
-				    flags, NULL, ifscope);
-    lck_mtx_unlock(rnh_lock);
+
+    error = rtrequest_scoped(cmd, (struct sockaddr *)&dst,
+        (struct sockaddr *)&gw, (struct sockaddr *)&mask, flags, NULL, ifscope);
+
     return (error);
 
 }
@@ -751,53 +741,24 @@ failed:
 int
 netboot_setup()
 {
-    dev_t 	dev;
     int 	error = 0;
 
     if (S_netboot_info_p == NULL
 	|| S_netboot_info_p->image_path == NULL) {
 	goto done;
     }
-    if (S_netboot_info_p->use_hdix) {
-	printf("netboot_setup: calling di_root_image\n");
-	error = di_root_image(S_netboot_info_p->image_path, 
-			      (char *)rootdevice, &dev);
-	if (error) {
-	    printf("netboot_setup: di_root_image: failed %d\n", error);
-	    goto done;
-	}
+    printf("netboot_setup: calling imageboot_mount_image\n");
+    error = imageboot_mount_image(S_netboot_info_p->image_path, -1);
+    if (error != 0) {
+	printf("netboot: failed to mount root image, %d\n", error);
     }
-    else {
-	printf("netboot_setup: calling vndevice_root_image\n");
-	error = vndevice_root_image(S_netboot_info_p->image_path, 
-				    (char *)rootdevice, &dev);
-	if (error) {
-	    printf("netboot_setup: vndevice_root_image: failed %d\n", error);
-	    goto done;
+    else if (S_netboot_info_p->second_image_path != NULL) {
+	error = imageboot_mount_image(S_netboot_info_p->second_image_path, 0);
+	if (error != 0) {
+	    printf("netboot: failed to mount second root image, %d\n", error);
 	}
     }
-    rootdev = dev;
-    mountroot = NULL;
-    printf("netboot: root device 0x%x\n", (int32_t)rootdev);
-    error = vfs_mountroot();
-    if (error == 0 && rootvnode != NULL) {
-        struct vnode *tvp;
-        struct vnode *newdp;
-
-	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
-	if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
-		panic("netboot_setup: cannot find root vnode");
-	vnode_ref(newdp);
-	vnode_put(newdp);
-	tvp = rootvnode;
-	vnode_rele(tvp);
-	filedesc0.fd_cdir = newdp;
-	rootvnode = newdp;
-	mount_list_lock();
-	TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list);
-	mount_list_unlock();
-	mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
-    }
+
  done:
     netboot_info_free(&S_netboot_info_p);
     return (error);
diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c
new file mode 100644
index 000000000..e5573a99f
--- /dev/null
+++ b/bsd/kern/policy_check.c
@@ -0,0 +1,511 @@
+#include <sys/param.h>
+#include <sys/systm.h>		/* XXX printf() */
+
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/kauth.h>
+#include <sys/mount.h>
+#include <sys/msg.h>
+#include <sys/proc.h>
+#include <sys/socketvar.h>
+#include <sys/vnode.h>
+#include <security/mac.h>
+#include <security/mac_policy.h>
+
+#include <libkern/OSDebug.h>	/* OSBPrintBacktrace */
+
+
+/* forward declaration; see bsd_init.c */
+errno_t check_policy_init(int);
+int get_thread_lock_count(thread_t th);         /* forced forward */
+
+/*
+ * Policy flags used when the policy is enabled
+ *
+ * Note:	CHECK_POLICY_CHECK is probably not very useful unless you
+ *		are kernel debugging and set a breakpoint.
+ */
+#define	CHECK_POLICY_CHECK	0x00000001	/* Check on calls */
+#define	CHECK_POLICY_FAIL	0x00000002	/* EPERM on fails */
+#define	CHECK_POLICY_BACKTRACE	0x00000004	/* Show call stack on fails */
+#define	CHECK_POLICY_PANIC	0x00000008	/* Panic on fails */
+#define	CHECK_POLICY_PERIODIC	0x00000010	/* Show fails periodically */
+
+static int policy_flags = 0;
+
+
+#define CHECK_SET_INT_HOOK(x)	.mpo_##x = (mpo_##x##_t *)common_int_hook,
+#define CHECK_SET_VOID_HOOK(x)	.mpo_##x = (mpo_##x##_t *)common_void_hook,
+
+
+/*
+ * Init; currently, we only print our arrival notice.
+ */
+static void
+hook_policy_init(struct mac_policy_conf *mpc)
+{
+	printf("Policy '%s' = '%s' ready\n", mpc->mpc_name, mpc->mpc_fullname);
+}
+
+static void
+hook_policy_initbsd(struct mac_policy_conf *mpc)
+{
+	/* called with policy_grab_exclusive mutex held; exempt */
+	printf("hook_policy_initbsd: %s\n", mpc->mpc_name);
+}
+
+
+/* Implementation */
+#define	CLASS_PERIOD_LIMIT	10000
+#define	CLASS_PERIOD_MULT	20
+
+static int policy_check_event = 1;
+static int policy_check_period = 1;
+static int policy_check_next = CLASS_PERIOD_MULT;
+
+
+static int
+common_int_hook(void)
+{
+	int	i;
+	int	rv = 0;
+
+	if ((i = get_thread_lock_count(current_thread())) != 0) {
+		/*
+		 * fail the MACF check if we hold a lock; this assumes a
+		 * a non-void (authorization) MACF hook.
+		 */
+		if (policy_flags & CHECK_POLICY_FAIL)
+			rv = EPERM;
+
+		/*
+		 * display a backtrace if we hold a lock and we are not
+		 * going to panic
+		 */
+		if ((policy_flags & (CHECK_POLICY_BACKTRACE | CHECK_POLICY_PANIC)) == CHECK_POLICY_BACKTRACE) {
+			if (policy_flags & CHECK_POLICY_PERIODIC) {
+			    /* at exponentially increasing intervals */
+			    if (!(policy_check_event % policy_check_period)) {
+				if (policy_check_event <= policy_check_next || policy_check_period == CLASS_PERIOD_LIMIT) {
+					/*
+					 * According to Derek, we could
+					 * technically get a symbolicated name
+					 * here, if we refactered some code
+					 * and set the "keepsyms=1" boot
+					 * argument...
+					 */
+					OSReportWithBacktrace("calling MACF hook with mutex count %d (event %d) ", i, policy_check_event);
+				}
+			    } else {
+				if (policy_check_period < CLASS_PERIOD_LIMIT) {
+					policy_check_next *= CLASS_PERIOD_MULT;
+					policy_check_period *= CLASS_PERIOD_MULT;
+				}
+			    }
+			} else {
+				/* always */
+				OSReportWithBacktrace("calling MACF hook with mutex count %d (event %d) ", i, policy_check_event);
+			}
+		}
+
+		/* Panic */
+		if (policy_flags & CHECK_POLICY_PANIC)
+			panic("calling MACF hook with mutex count %d\n", i);
+
+		/* count for non-fatal tracing */
+		policy_check_event++;
+	}
+
+	return rv;
+}
+
+static void
+common_void_hook(void)
+{
+	(void)common_int_hook();
+
+	return;
+}
+
+
+/*
+ * Policy hooks; one per possible hook
+ */
+static struct mac_policy_ops policy_ops = {
+
+	/* separate init */
+	.mpo_policy_init = hook_policy_init,
+	.mpo_policy_initbsd = hook_policy_initbsd,
+
+	/* operations which return int */
+	CHECK_SET_INT_HOOK(audit_check_postselect)
+	CHECK_SET_INT_HOOK(audit_check_preselect)
+	CHECK_SET_INT_HOOK(bpfdesc_check_receive)
+	CHECK_SET_INT_HOOK(cred_check_label_update_execve)
+	CHECK_SET_INT_HOOK(cred_check_label_update)
+	CHECK_SET_INT_HOOK(cred_check_visible)
+	CHECK_SET_INT_HOOK(cred_label_externalize_audit)
+	CHECK_SET_INT_HOOK(cred_label_externalize)
+	CHECK_SET_INT_HOOK(cred_label_internalize)
+	CHECK_SET_INT_HOOK(file_check_change_offset)
+	CHECK_SET_INT_HOOK(file_check_create)
+	CHECK_SET_INT_HOOK(file_check_dup)
+	CHECK_SET_INT_HOOK(file_check_fcntl)
+	CHECK_SET_INT_HOOK(file_check_get)
+	CHECK_SET_INT_HOOK(file_check_get_offset)
+	CHECK_SET_INT_HOOK(file_check_inherit)
+	CHECK_SET_INT_HOOK(file_check_ioctl)
+	CHECK_SET_INT_HOOK(file_check_lock)
+	CHECK_SET_INT_HOOK(file_check_mmap)
+	CHECK_SET_INT_HOOK(file_check_receive)
+	CHECK_SET_INT_HOOK(file_check_set)
+	CHECK_SET_INT_HOOK(ifnet_check_label_update)
+	CHECK_SET_INT_HOOK(ifnet_check_transmit)
+	CHECK_SET_INT_HOOK(ifnet_label_externalize)
+	CHECK_SET_INT_HOOK(ifnet_label_internalize)
+	CHECK_SET_INT_HOOK(inpcb_check_deliver)
+	CHECK_SET_INT_HOOK(inpcb_label_init)
+	CHECK_SET_INT_HOOK(iokit_check_device)
+	CHECK_SET_INT_HOOK(iokit_check_open)
+	CHECK_SET_INT_HOOK(iokit_check_set_properties)
+	CHECK_SET_INT_HOOK(iokit_check_hid_control)
+	CHECK_SET_INT_HOOK(ipq_label_compare)
+	CHECK_SET_INT_HOOK(ipq_label_init)
+	CHECK_SET_INT_HOOK(lctx_check_label_update)
+	CHECK_SET_INT_HOOK(lctx_label_externalize)
+	CHECK_SET_INT_HOOK(lctx_label_internalize)
+	CHECK_SET_INT_HOOK(mbuf_label_init)
+	CHECK_SET_INT_HOOK(mount_check_fsctl)
+	CHECK_SET_INT_HOOK(mount_check_getattr)
+	CHECK_SET_INT_HOOK(mount_check_label_update)
+	CHECK_SET_INT_HOOK(mount_check_mount)
+	CHECK_SET_INT_HOOK(mount_check_remount)
+	CHECK_SET_INT_HOOK(mount_check_setattr)
+	CHECK_SET_INT_HOOK(mount_check_stat)
+	CHECK_SET_INT_HOOK(mount_check_umount)
+	CHECK_SET_INT_HOOK(mount_label_externalize)
+	CHECK_SET_INT_HOOK(mount_label_internalize)
+	CHECK_SET_INT_HOOK(pipe_check_ioctl)
+	CHECK_SET_INT_HOOK(pipe_check_kqfilter)
+	CHECK_SET_INT_HOOK(pipe_check_label_update)
+	CHECK_SET_INT_HOOK(pipe_check_read)
+	CHECK_SET_INT_HOOK(pipe_check_select)
+	CHECK_SET_INT_HOOK(pipe_check_stat)
+	CHECK_SET_INT_HOOK(pipe_check_write)
+	CHECK_SET_INT_HOOK(pipe_label_externalize)
+	CHECK_SET_INT_HOOK(pipe_label_internalize)
+	CHECK_SET_INT_HOOK(policy_syscall)
+	CHECK_SET_INT_HOOK(port_check_copy_send)
+	CHECK_SET_INT_HOOK(port_check_hold_receive)
+	CHECK_SET_INT_HOOK(port_check_hold_send_once)
+	CHECK_SET_INT_HOOK(port_check_hold_send)
+	CHECK_SET_INT_HOOK(port_check_label_update)
+	CHECK_SET_INT_HOOK(port_check_make_send_once)
+	CHECK_SET_INT_HOOK(port_check_make_send)
+	CHECK_SET_INT_HOOK(port_check_method)
+	CHECK_SET_INT_HOOK(port_check_move_receive)
+	CHECK_SET_INT_HOOK(port_check_move_send_once)
+	CHECK_SET_INT_HOOK(port_check_move_send)
+	CHECK_SET_INT_HOOK(port_check_receive)
+	CHECK_SET_INT_HOOK(port_check_send)
+	CHECK_SET_INT_HOOK(port_check_service)
+	CHECK_SET_INT_HOOK(port_label_compute)
+	CHECK_SET_INT_HOOK(posixsem_check_create)
+	CHECK_SET_INT_HOOK(posixsem_check_open)
+	CHECK_SET_INT_HOOK(posixsem_check_post)
+	CHECK_SET_INT_HOOK(posixsem_check_unlink)
+	CHECK_SET_INT_HOOK(posixsem_check_wait)
+	CHECK_SET_INT_HOOK(posixshm_check_create)
+	CHECK_SET_INT_HOOK(posixshm_check_mmap)
+	CHECK_SET_INT_HOOK(posixshm_check_open)
+	CHECK_SET_INT_HOOK(posixshm_check_stat)
+	CHECK_SET_INT_HOOK(posixshm_check_truncate)
+	CHECK_SET_INT_HOOK(posixshm_check_unlink)
+	CHECK_SET_INT_HOOK(priv_check)
+	/* relative ordinal location of "priv_grant" */
+	CHECK_SET_INT_HOOK(proc_check_debug)
+	CHECK_SET_INT_HOOK(proc_check_fork)
+	CHECK_SET_INT_HOOK(proc_check_getaudit)
+	CHECK_SET_INT_HOOK(proc_check_getauid)
+	CHECK_SET_INT_HOOK(proc_check_getlcid)
+	CHECK_SET_INT_HOOK(proc_check_map_anon)
+	CHECK_SET_INT_HOOK(proc_check_mprotect)
+	CHECK_SET_INT_HOOK(proc_check_sched)
+	CHECK_SET_INT_HOOK(proc_check_setaudit)
+	CHECK_SET_INT_HOOK(proc_check_setauid)
+	CHECK_SET_INT_HOOK(proc_check_setlcid)
+	CHECK_SET_INT_HOOK(proc_check_signal)
+	CHECK_SET_INT_HOOK(proc_check_suspend_resume)
+	CHECK_SET_INT_HOOK(proc_check_wait)
+	CHECK_SET_INT_HOOK(socket_check_accept)
+	CHECK_SET_INT_HOOK(socket_check_accepted)
+	CHECK_SET_INT_HOOK(socket_check_bind)
+	CHECK_SET_INT_HOOK(socket_check_connect)
+	CHECK_SET_INT_HOOK(socket_check_create)
+	CHECK_SET_INT_HOOK(socket_check_deliver)
+	CHECK_SET_INT_HOOK(socket_check_kqfilter)
+	CHECK_SET_INT_HOOK(socket_check_label_update)
+	CHECK_SET_INT_HOOK(socket_check_listen)
+	CHECK_SET_INT_HOOK(socket_check_receive)
+	CHECK_SET_INT_HOOK(socket_check_received)
+	CHECK_SET_INT_HOOK(socket_check_select)
+	CHECK_SET_INT_HOOK(socket_check_send)
+	CHECK_SET_INT_HOOK(socket_check_stat)
+	CHECK_SET_INT_HOOK(socket_check_setsockopt)
+	CHECK_SET_INT_HOOK(socket_check_getsockopt)
+	CHECK_SET_INT_HOOK(socket_label_externalize)
+	CHECK_SET_INT_HOOK(socket_label_init)
+	CHECK_SET_INT_HOOK(socket_label_internalize)
+	CHECK_SET_INT_HOOK(socketpeer_label_externalize)
+	CHECK_SET_INT_HOOK(socketpeer_label_init)
+	CHECK_SET_INT_HOOK(system_check_acct)
+	CHECK_SET_INT_HOOK(system_check_audit)
+	CHECK_SET_INT_HOOK(system_check_auditctl)
+	CHECK_SET_INT_HOOK(system_check_auditon)
+	CHECK_SET_INT_HOOK(system_check_chud)
+	CHECK_SET_INT_HOOK(system_check_host_priv)
+	CHECK_SET_INT_HOOK(system_check_nfsd)
+	CHECK_SET_INT_HOOK(system_check_reboot)
+	CHECK_SET_INT_HOOK(system_check_settime)
+	CHECK_SET_INT_HOOK(system_check_swapoff)
+	CHECK_SET_INT_HOOK(system_check_swapon)
+	CHECK_SET_INT_HOOK(system_check_sysctl)
+	CHECK_SET_INT_HOOK(sysvmsq_check_enqueue)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msgrcv)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msgrmid)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msqctl)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msqget)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msqrcv)
+	CHECK_SET_INT_HOOK(sysvmsq_check_msqsnd)
+	CHECK_SET_INT_HOOK(sysvsem_check_semctl)
+	CHECK_SET_INT_HOOK(sysvsem_check_semget)
+	CHECK_SET_INT_HOOK(sysvsem_check_semop)
+	CHECK_SET_INT_HOOK(sysvshm_check_shmat)
+	CHECK_SET_INT_HOOK(sysvshm_check_shmctl)
+	CHECK_SET_INT_HOOK(sysvshm_check_shmdt)
+	CHECK_SET_INT_HOOK(sysvshm_check_shmget)
+	CHECK_SET_INT_HOOK(proc_check_get_task_name)
+	CHECK_SET_INT_HOOK(proc_check_get_task)
+	CHECK_SET_INT_HOOK(task_label_externalize)
+	CHECK_SET_INT_HOOK(task_label_internalize)
+	CHECK_SET_INT_HOOK(vnode_check_access)
+	CHECK_SET_INT_HOOK(vnode_check_chdir)
+	CHECK_SET_INT_HOOK(vnode_check_chroot)
+	CHECK_SET_INT_HOOK(vnode_check_create)
+	CHECK_SET_INT_HOOK(vnode_check_deleteextattr)
+	CHECK_SET_INT_HOOK(vnode_check_exchangedata)
+	CHECK_SET_INT_HOOK(vnode_check_exec)
+	CHECK_SET_INT_HOOK(vnode_check_fsgetpath)
+	CHECK_SET_INT_HOOK(vnode_check_signature)
+	CHECK_SET_INT_HOOK(vnode_check_getattrlist)
+	CHECK_SET_INT_HOOK(vnode_check_getextattr)
+	CHECK_SET_INT_HOOK(vnode_check_ioctl)
+	CHECK_SET_INT_HOOK(vnode_check_kqfilter)
+	CHECK_SET_INT_HOOK(vnode_check_label_update)
+	CHECK_SET_INT_HOOK(vnode_check_link)
+	CHECK_SET_INT_HOOK(vnode_check_listextattr)
+	CHECK_SET_INT_HOOK(vnode_check_lookup)
+	CHECK_SET_INT_HOOK(vnode_check_open)
+	CHECK_SET_INT_HOOK(vnode_check_read)
+	CHECK_SET_INT_HOOK(vnode_check_readdir)
+	CHECK_SET_INT_HOOK(vnode_check_readlink)
+	CHECK_SET_INT_HOOK(vnode_check_rename_from)
+	CHECK_SET_INT_HOOK(vnode_check_rename_to)
+	CHECK_SET_INT_HOOK(vnode_check_revoke)
+	CHECK_SET_INT_HOOK(vnode_check_searchfs)
+	CHECK_SET_INT_HOOK(vnode_check_select)
+	CHECK_SET_INT_HOOK(vnode_check_setattrlist)
+	CHECK_SET_INT_HOOK(vnode_check_setextattr)
+	CHECK_SET_INT_HOOK(vnode_check_setflags)
+	CHECK_SET_INT_HOOK(vnode_check_setmode)
+	CHECK_SET_INT_HOOK(vnode_check_setowner)
+	CHECK_SET_INT_HOOK(vnode_check_setutimes)
+	CHECK_SET_INT_HOOK(vnode_check_stat)
+	CHECK_SET_INT_HOOK(vnode_check_truncate)
+	CHECK_SET_INT_HOOK(vnode_check_uipc_bind)
+	CHECK_SET_INT_HOOK(vnode_check_uipc_connect)
+	CHECK_SET_INT_HOOK(vnode_check_unlink)
+	CHECK_SET_INT_HOOK(vnode_check_write)
+	CHECK_SET_INT_HOOK(vnode_label_associate_extattr)
+	CHECK_SET_INT_HOOK(vnode_label_externalize_audit)
+	CHECK_SET_INT_HOOK(vnode_label_externalize)
+	CHECK_SET_INT_HOOK(vnode_label_internalize)
+	CHECK_SET_INT_HOOK(vnode_label_store)
+	CHECK_SET_INT_HOOK(vnode_label_update_extattr)
+	CHECK_SET_INT_HOOK(vnode_notify_create)
+
+	/* operations which return void */
+	CHECK_SET_VOID_HOOK(bpfdesc_label_init)
+	CHECK_SET_VOID_HOOK(bpfdesc_label_destroy)
+	CHECK_SET_VOID_HOOK(bpfdesc_label_associate)
+	CHECK_SET_VOID_HOOK(cred_label_associate_fork)
+	CHECK_SET_VOID_HOOK(cred_label_associate_kernel)
+	CHECK_SET_VOID_HOOK(cred_label_associate)
+	CHECK_SET_VOID_HOOK(cred_label_associate_user)
+	CHECK_SET_VOID_HOOK(cred_label_destroy)
+	CHECK_SET_VOID_HOOK(cred_label_init)
+	CHECK_SET_VOID_HOOK(cred_label_update_execve)
+	CHECK_SET_VOID_HOOK(cred_label_update)
+	CHECK_SET_VOID_HOOK(devfs_label_associate_device)
+	CHECK_SET_VOID_HOOK(devfs_label_associate_directory)
+	CHECK_SET_VOID_HOOK(devfs_label_copy)
+	CHECK_SET_VOID_HOOK(devfs_label_destroy)
+	CHECK_SET_VOID_HOOK(devfs_label_init)
+	CHECK_SET_VOID_HOOK(devfs_label_update)
+	CHECK_SET_VOID_HOOK(file_check_mmap_downgrade)
+	CHECK_SET_VOID_HOOK(file_label_associate)
+	CHECK_SET_VOID_HOOK(file_label_destroy)
+	CHECK_SET_VOID_HOOK(file_label_init)
+	CHECK_SET_VOID_HOOK(ifnet_label_associate)
+	CHECK_SET_VOID_HOOK(ifnet_label_copy)
+	CHECK_SET_VOID_HOOK(ifnet_label_destroy)
+	CHECK_SET_VOID_HOOK(ifnet_label_init)
+	CHECK_SET_VOID_HOOK(ifnet_label_recycle)
+	CHECK_SET_VOID_HOOK(ifnet_label_update)
+	CHECK_SET_VOID_HOOK(inpcb_label_associate)
+	CHECK_SET_VOID_HOOK(inpcb_label_destroy)
+	CHECK_SET_VOID_HOOK(inpcb_label_recycle)
+	CHECK_SET_VOID_HOOK(inpcb_label_update)
+	CHECK_SET_VOID_HOOK(ipq_label_associate)
+	CHECK_SET_VOID_HOOK(ipq_label_destroy)
+	CHECK_SET_VOID_HOOK(ipq_label_update)
+	CHECK_SET_VOID_HOOK(lctx_label_destroy)
+	CHECK_SET_VOID_HOOK(lctx_label_init)
+	CHECK_SET_VOID_HOOK(lctx_label_update)
+	CHECK_SET_VOID_HOOK(lctx_notify_create)
+	CHECK_SET_VOID_HOOK(lctx_notify_join)
+	CHECK_SET_VOID_HOOK(lctx_notify_leave)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_bpfdesc)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_ifnet)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_inpcb)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_ipq)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_linklayer)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_multicast_encap)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_netlayer)
+	CHECK_SET_VOID_HOOK(mbuf_label_associate_socket)
+	CHECK_SET_VOID_HOOK(mbuf_label_copy)
+	CHECK_SET_VOID_HOOK(mbuf_label_destroy)
+	CHECK_SET_VOID_HOOK(mount_label_associate)
+	CHECK_SET_VOID_HOOK(mount_label_destroy)
+	CHECK_SET_VOID_HOOK(mount_label_init)
+	CHECK_SET_VOID_HOOK(netinet_fragment)
+	CHECK_SET_VOID_HOOK(netinet_icmp_reply)
+	CHECK_SET_VOID_HOOK(netinet_tcp_reply)
+	CHECK_SET_VOID_HOOK(pipe_label_associate)
+	CHECK_SET_VOID_HOOK(pipe_label_copy)
+	CHECK_SET_VOID_HOOK(pipe_label_destroy)
+	CHECK_SET_VOID_HOOK(pipe_label_init)
+	CHECK_SET_VOID_HOOK(pipe_label_update)
+	CHECK_SET_VOID_HOOK(policy_destroy)
+	/* relative ordinal location of "policy_init" */
+	/* relative ordinal location of "policy_initbsd" */
+	CHECK_SET_VOID_HOOK(port_label_associate_kernel)
+	CHECK_SET_VOID_HOOK(port_label_associate)
+	CHECK_SET_VOID_HOOK(port_label_copy)
+	CHECK_SET_VOID_HOOK(port_label_destroy)
+	CHECK_SET_VOID_HOOK(port_label_init)
+	CHECK_SET_VOID_HOOK(port_label_update_cred)
+	CHECK_SET_VOID_HOOK(port_label_update_kobject)
+	CHECK_SET_VOID_HOOK(posixsem_label_associate)
+	CHECK_SET_VOID_HOOK(posixsem_label_destroy)
+	CHECK_SET_VOID_HOOK(posixsem_label_init)
+	CHECK_SET_VOID_HOOK(posixshm_label_associate)
+	CHECK_SET_VOID_HOOK(posixshm_label_destroy)
+	CHECK_SET_VOID_HOOK(posixshm_label_init)
+	CHECK_SET_VOID_HOOK(proc_label_destroy)
+	CHECK_SET_VOID_HOOK(proc_label_init)
+	CHECK_SET_VOID_HOOK(socket_label_associate_accept)
+	CHECK_SET_VOID_HOOK(socket_label_associate)
+	CHECK_SET_VOID_HOOK(socket_label_copy)
+	CHECK_SET_VOID_HOOK(socket_label_destroy)
+	CHECK_SET_VOID_HOOK(socket_label_update)
+	CHECK_SET_VOID_HOOK(socketpeer_label_associate_mbuf)
+	CHECK_SET_VOID_HOOK(socketpeer_label_associate_socket)
+	CHECK_SET_VOID_HOOK(socketpeer_label_destroy)
+	CHECK_SET_VOID_HOOK(sysvmsg_label_associate)
+	CHECK_SET_VOID_HOOK(sysvmsg_label_destroy)
+	CHECK_SET_VOID_HOOK(sysvmsg_label_init)
+	CHECK_SET_VOID_HOOK(sysvmsg_label_recycle)
+	CHECK_SET_VOID_HOOK(sysvmsq_label_associate)
+	CHECK_SET_VOID_HOOK(sysvmsq_label_destroy)
+	CHECK_SET_VOID_HOOK(sysvmsq_label_init)
+	CHECK_SET_VOID_HOOK(sysvmsq_label_recycle)
+	CHECK_SET_VOID_HOOK(sysvsem_label_associate)
+	CHECK_SET_VOID_HOOK(sysvsem_label_destroy)
+	CHECK_SET_VOID_HOOK(sysvsem_label_init)
+	CHECK_SET_VOID_HOOK(sysvsem_label_recycle)
+	CHECK_SET_VOID_HOOK(sysvshm_label_associate)
+	CHECK_SET_VOID_HOOK(sysvshm_label_destroy)
+	CHECK_SET_VOID_HOOK(sysvshm_label_init)
+	CHECK_SET_VOID_HOOK(sysvshm_label_recycle)
+	CHECK_SET_VOID_HOOK(task_label_associate_kernel)
+	CHECK_SET_VOID_HOOK(task_label_associate)
+	CHECK_SET_VOID_HOOK(task_label_copy)
+	CHECK_SET_VOID_HOOK(task_label_destroy)
+	CHECK_SET_VOID_HOOK(task_label_init)
+	CHECK_SET_VOID_HOOK(task_label_update)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_devfs)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_file)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_pipe)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_posixsem)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_posixshm)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_singlelabel)
+	CHECK_SET_VOID_HOOK(vnode_label_associate_socket)
+	CHECK_SET_VOID_HOOK(vnode_label_copy)
+	CHECK_SET_VOID_HOOK(vnode_label_destroy)
+	CHECK_SET_VOID_HOOK(vnode_label_init)
+	CHECK_SET_VOID_HOOK(vnode_label_recycle)
+	CHECK_SET_VOID_HOOK(vnode_label_update)
+	CHECK_SET_VOID_HOOK(vnode_notify_rename)
+	.mpo_reserved12 = common_void_hook,
+	.mpo_reserved14 = common_void_hook,
+	.mpo_reserved15 = common_void_hook,
+	.mpo_reserved16 = common_void_hook,
+	.mpo_reserved17 = common_void_hook,
+	.mpo_reserved18 = common_void_hook,
+	.mpo_reserved19 = common_void_hook,
+	.mpo_reserved20 = common_void_hook,
+	.mpo_reserved21 = common_void_hook,
+	.mpo_reserved22 = common_void_hook,
+	.mpo_reserved23 = common_void_hook,
+	.mpo_reserved24 = common_void_hook,
+	.mpo_reserved25 = common_void_hook,
+	.mpo_reserved26 = common_void_hook,
+	.mpo_reserved27 = common_void_hook,
+	.mpo_reserved28 = common_void_hook,
+	.mpo_reserved29 = common_void_hook,
+};
+
+/*
+ * Policy definition
+ */
+static struct mac_policy_conf policy_conf = {
+	.mpc_name               = "CHECK",
+	.mpc_fullname           = "Check Assumptions Policy",
+	.mpc_field_off          = NULL,		/* no label slot */
+	.mpc_labelnames         = NULL,		/* no policy label names */
+	.mpc_labelname_count    = 0,		/* count of label names is 0 */
+	.mpc_ops                = &policy_ops,	/* policy operations */
+	.mpc_loadtime_flags     = 0,
+	.mpc_runtime_flags      = 0,
+};
+
+static mac_policy_handle_t policy_handle;
+
+/*
+ * Init routine; for a loadable policy, this would be called during the KEXT
+ * initialization; we're going to call this from bsd_init() if the boot
+ * argument for checking is present.
+ */
+errno_t
+check_policy_init(int flags)
+{
+	/* Only instantiate the module if we have been asked to do checking */
+	if (!flags)
+		return 0;
+
+	policy_flags = flags;
+
+	return mac_policy_register(&policy_conf, &policy_handle, NULL);
+}
diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c
index a2cd627f1..c312d1b84 100644
--- a/bsd/kern/posix_sem.c
+++ b/bsd/kern/posix_sem.c
@@ -30,7 +30,7 @@
  *	All Rights Reserved.
  */
 /*
- * posix_shm.c : Support for POSIX semaphore APIs
+ * posix_sem.c : Support for POSIX semaphore APIs
  *
  *	File:	posix_sem.c
  *	Author:	Ananthakrishna Ramesh
@@ -155,9 +155,9 @@ u_long	psemhash;				/* size of hash table - 1 */
 long	psemnument;			/* number of cache entries allocated */
 long	posix_sem_max = 10000;		/* tunable for max POSIX semaphores */
 					/* 10000 limits to ~1M of memory */
-SYSCTL_NODE(_kern, KERN_POSIX, posix, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Posix");
-SYSCTL_NODE(_kern_posix, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Semaphores");
-SYSCTL_LONG (_kern_posix_sem, OID_AUTO, max, CTLFLAG_RW, &posix_sem_max, "max");
+SYSCTL_NODE(_kern, KERN_POSIX, posix, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Posix");
+SYSCTL_NODE(_kern_posix, OID_AUTO, sem, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Semaphores");
+SYSCTL_LONG (_kern_posix_sem, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &posix_sem_max, "max");
 
 struct psemstats psemstats;		/* cache effectiveness statistics */
 
@@ -524,8 +524,8 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval)
 		pinfo->psem_flags = PSEM_DEFINED | PSEM_INCREATE;
 		pinfo->psem_usecount = 1;
 		pinfo->psem_mode = cmode;
-		pinfo->psem_uid = kauth_cred_getuid(kauth_cred_get());
-		pinfo->psem_gid = kauth_cred_get()->cr_gid;
+		pinfo->psem_uid = kauth_getuid();
+		pinfo->psem_gid = kauth_getgid();
 		bcopy(pnbuf, &pinfo->psem_name[0], PSEMNAMLEN);
 		pinfo->psem_name[PSEMNAMLEN]= 0;
 		pinfo->psem_flags &= ~PSEM_DEFINED;
@@ -643,39 +643,14 @@ bad:
 static int
 psem_access(struct pseminfo *pinfo, int mode, kauth_cred_t cred)
 {
-	mode_t mask;
-	int is_member;
+	int mode_req = ((mode & FREAD) ? S_IRUSR : 0) |
+		       ((mode & FWRITE) ? S_IWUSR : 0);
 
 	/* Otherwise, user id 0 always gets access. */
 	if (!suser(cred, NULL))
 		return (0);
 
-	mask = 0;
-
-	/* Otherwise, check the owner. */
-	if (kauth_cred_getuid(cred) == pinfo->psem_uid) {
-		if (mode & FREAD)
-			mask |= S_IRUSR;
-		if (mode & FWRITE)
-			mask |= S_IWUSR;
-		return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES);
-	}
-
-	/* Otherwise, check the groups. */
-	if (kauth_cred_ismember_gid(cred, pinfo->psem_gid, &is_member) == 0 && is_member) {
-		if (mode & FREAD)
-			mask |= S_IRGRP;
-		if (mode & FWRITE)
-			mask |= S_IWGRP;
-		return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES);
-	}
-
-	/* Otherwise, check everyone else. */
-	if (mode & FREAD)
-		mask |= S_IROTH;
-	if (mode & FWRITE)
-		mask |= S_IWOTH;
-	return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES);
+	return(posix_cred_access(cred, pinfo->psem_uid, pinfo->psem_gid, pinfo->psem_mode, mode_req));
 }
 
 int
@@ -809,6 +784,7 @@ sem_close(proc_t p, struct sem_close_args *uap, __unused int32_t *retval)
 		proc_fdunlock(p);
 		return(error);
 	}
+	procfdtbl_markclosefd(p, fd);
 	fileproc_drain(p, fp);
 	fdrelse(p, fd);
 	error = closef_locked(fp, fp->f_fglob, p);
diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c
index 985538e69..617d1dc9f 100644
--- a/bsd/kern/posix_shm.c
+++ b/bsd/kern/posix_shm.c
@@ -178,7 +178,7 @@ static int pshm_write (struct fileproc *fp, struct uio *uio,
 static int pshm_ioctl (struct fileproc *fp, u_long com,
 		    caddr_t data, vfs_context_t ctx);
 static int pshm_select (struct fileproc *fp, int which, void *wql, vfs_context_t ctx);
-static int pshm_close(struct pshmnode *pnode);
+static int pshm_close(struct pshminfo *pinfo, int dropref);
 static int pshm_closefile (struct fileglob *fg, vfs_context_t ctx);
 
 static int pshm_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx);
@@ -190,7 +190,7 @@ static void pshm_cache_delete(struct pshmcache *pcp);
 static void pshm_cache_purge(void);
 #endif	/* NOT_USED */
 static int pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp,
-	struct pshmcache **pcache);
+	struct pshmcache **pcache, int addref);
 
 struct 	fileops pshmops =
 	{ pshm_read, pshm_write, pshm_ioctl, pshm_select, pshm_closefile, pshm_kqfilter, 0 };
@@ -229,7 +229,7 @@ pshm_lock_init( void )
 
 static int
 pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp,
-	struct pshmcache **pcache)
+	struct pshmcache **pcache, int addref)
 {
 	struct pshmcache *pcp, *nnp;
 	struct pshmhashhead *pcpp;
@@ -258,6 +258,8 @@ pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp,
 		/* TOUCH(ncp); */
 		*pshmp = pcp->pshminfo;
 		*pcache = pcp;
+		if (addref)
+			pcp->pshminfo->pshm_usecount++;
 		return (-1);
 	}
 
@@ -287,7 +289,7 @@ pshm_cache_add(struct pshminfo *pshmp, struct pshmname *pnp, struct pshmcache *p
 
 
 	/*  if the entry has already been added by some one else return */
-	if (pshm_cache_search(&dpinfo, pnp, &dpcp) == -1) {
+	if (pshm_cache_search(&dpinfo, pnp, &dpcp, 0) == -1) {
 		return(EEXIST);
 	}
 	pshmnument++;
@@ -438,6 +440,14 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 	if (error) 
 		goto bad;
 
+	cmode &=  ALLPERMS;
+
+	fmode = FFLAGS(uap->oflag);
+	if ((fmode & (FREAD | FWRITE)) == 0) {
+		error = EINVAL;
+		goto bad;
+	}
+
 	/*
 	 * We allocate a new entry if we are less than the maximum
 	 * allowed and the one at the front of the LRU list is in use.
@@ -466,27 +476,42 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 
 	PSHM_SUBSYS_LOCK();
 
-	error = pshm_cache_search(&pinfo, &nd, &pcache);
+	/*
+	 * If we find the entry in the cache, this will take a reference,
+	 * allowing us to unlock it for the permissions check.
+	 */
+	error = pshm_cache_search(&pinfo, &nd, &pcache, 1);
+
+	PSHM_SUBSYS_UNLOCK();
 
 	if (error == ENOENT) {
 		error = EINVAL;
-		goto bad_locked;
-
+		goto bad;
 	}
+
 	if (!error) {
 		incache = 0;
-	} else
+		if (fmode & O_CREAT) {
+			/*  create a new one (commit the allocation) */
+			pinfo = new_pinfo;
+			pinfo->pshm_flags = PSHM_DEFINED | PSHM_INCREATE;
+			pinfo->pshm_usecount = 1; /* existence reference */
+			pinfo->pshm_mode = cmode;
+			pinfo->pshm_uid = kauth_getuid();
+			pinfo->pshm_gid = kauth_getgid();
+			bcopy(pnbuf, &pinfo->pshm_name[0], PSHMNAMLEN);
+			pinfo->pshm_name[PSHMNAMLEN]=0;
+#if CONFIG_MACF
+			error = mac_posixshm_check_create(kauth_cred_get(), nameptr);
+			if (error) {
+				goto bad;
+			}
+			mac_posixshm_label_associate(kauth_cred_get(), pinfo, nameptr);
+#endif
+		}
+	} else {
 		incache = 1;
-	fmode = FFLAGS(uap->oflag);
-	if ((fmode & (FREAD | FWRITE))==0) {
-		error = EINVAL;
-		goto bad_locked;
-	}
-
-	cmode &=  ALLPERMS;
-
-	if (fmode & O_CREAT) {
-		if (incache) {
+		if (fmode & O_CREAT) {
 			/*  already exists */
 			if ((fmode & O_EXCL)) {
 				AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
@@ -495,65 +520,53 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 
 				/* shm obj exists and opened O_EXCL */
 				error = EEXIST;
-				goto bad_locked;
+				goto bad;
 			} 
 
 			if( pinfo->pshm_flags & PSHM_INDELETE) {
 				error = ENOENT;
-				goto bad_locked;
+				goto bad;
 			}	
 			AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid,
 					pinfo->pshm_gid, pinfo->pshm_mode);
 #if CONFIG_MACF	
 			if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) {
-				goto bad_locked;
+				goto bad;
 			}
 #endif
 			if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) {
-				goto bad_locked;
-			}
-		} else {
-			/*  create a new one (commit the allocation) */
-			pinfo = new_pinfo;
-			pinfo->pshm_flags = PSHM_DEFINED | PSHM_INCREATE;
-			pinfo->pshm_usecount = 1; /* existence reference */
-			pinfo->pshm_mode = cmode;
-			pinfo->pshm_uid = kauth_cred_getuid(kauth_cred_get());
-			pinfo->pshm_gid = kauth_cred_get()->cr_gid;
-			bcopy(pnbuf, &pinfo->pshm_name[0], PSHMNAMLEN);
-			pinfo->pshm_name[PSHMNAMLEN]=0;
-#if CONFIG_MACF
-			error = mac_posixshm_check_create(kauth_cred_get(), nameptr);
-			if (error) {
-				goto bad_locked;
+				goto bad;
 			}
-			mac_posixshm_label_associate(kauth_cred_get(), pinfo, nameptr);
-#endif
 		}
-	} else {
+	}
+	if (!(fmode & O_CREAT)) {
 		if (!incache) {
 			/* O_CREAT is not set and the object does not exist */
 			error = ENOENT;
-			goto bad_locked;
+			goto bad;
 		}
 		if( pinfo->pshm_flags & PSHM_INDELETE) {
 			error = ENOENT;
-			goto bad_locked;
+			goto bad;
 		}	
 #if CONFIG_MACF	
 		if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) {
-			goto bad_locked;
+			goto bad;
 		}
 #endif
 
 		if ((error = pshm_access(pinfo, fmode, kauth_cred_get(), p))) {
-			goto bad_locked;
+			goto bad;
 		}
 	}
 	if (fmode & O_TRUNC) {
 		error = EINVAL;
-		goto bad_locked;
+		goto bad;
 	}
+
+
+	PSHM_SUBSYS_LOCK();
+
 #if DIAGNOSTIC 
 	if (fmode & FWRITE)
 		pinfo->pshm_writecount++;
@@ -565,9 +578,13 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 		if ( (error = pshm_cache_add(pinfo, &nd, pcp)) ) {
 			goto bad_locked;
 		}
+		/*
+		 * add reference for the new entry; otherwise, we obtained
+		 * one from the cache hit earlier.
+		 */
+		pinfo->pshm_usecount++;
 	}
 	pinfo->pshm_flags &= ~PSHM_INCREATE;
-	pinfo->pshm_usecount++; /* extra reference for the new fd */
 	new_pnode->pinfo = pinfo;
 
 	PSHM_SUBSYS_UNLOCK();
@@ -604,6 +621,17 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval)
 bad_locked:
 	PSHM_SUBSYS_UNLOCK();
 bad:
+	/*
+	 * If we obtained the entry from the cache, we need to drop the
+	 * reference; holding the reference may have prevented unlinking,
+	 * so we need to call pshm_close() to get the full effect.
+	 */
+	if (incache) {
+		PSHM_SUBSYS_LOCK();
+		pshm_close(pinfo, 1);
+		PSHM_SUBSYS_UNLOCK();
+	}
+
 	if (pcp != NULL)
 		FREE(pcp, M_SHM);
 
@@ -633,7 +661,8 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd,
 	struct pshmnode * pnode ;
 	kern_return_t kret;
 	mem_entry_name_port_t mem_object;
-	mach_vm_size_t size, total_size, alloc_size;
+	mach_vm_size_t total_size, alloc_size;
+	memory_object_size_t mosize;
 	struct pshmobj *pshmobj, *pshmobj_next, **pshmobj_next_p;
 #if CONFIG_MACF
 	int error;
@@ -658,7 +687,7 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd,
 		return(EINVAL);
 	}
 #if CONFIG_MACF
-	error = mac_posixshm_check_truncate(kauth_cred_get(), pinfo, size);
+	error = mac_posixshm_check_truncate(kauth_cred_get(), pinfo, length);
 	if (error) {
 		PSHM_SUBSYS_UNLOCK();
 		return(error);
@@ -671,14 +700,14 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd,
 
 	for (alloc_size = 0;
 	     alloc_size < total_size;
-	     alloc_size += size) {
+	     alloc_size += mosize) {
 
 		PSHM_SUBSYS_UNLOCK();
 
-		size = MIN(total_size - alloc_size, ANON_MAX_SIZE);
+		mosize = MIN(total_size - alloc_size, ANON_MAX_SIZE);
 		kret = mach_make_memory_entry_64(
 			VM_MAP_NULL,
-			&size,
+			&mosize,
 			0,
 			MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT,
 			&mem_object,
@@ -699,7 +728,7 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd,
 		PSHM_SUBSYS_LOCK();
 
 		pshmobj->pshmo_memobject = (void *) mem_object;
-		pshmobj->pshmo_size = size;
+		pshmobj->pshmo_size = mosize;
 		pshmobj->pshmo_next = NULL;
 		
 		*pshmobj_next_p = pshmobj;
@@ -787,39 +816,14 @@ pshm_stat(struct pshmnode *pnode, void *ub, int isstat64)
 int
 pshm_access(struct pshminfo *pinfo, int mode, kauth_cred_t cred, __unused proc_t p)
 {
-	mode_t mask;
-	int is_member;
+	int mode_req = ((mode & FREAD) ? S_IRUSR : 0) |
+		       ((mode & FWRITE) ? S_IWUSR : 0);
 
 	/* Otherwise, user id 0 always gets access. */
 	if (!suser(cred, NULL))
 		return (0);
 
-	mask = 0;
-
-	/* Otherwise, check the owner. */
-	if (kauth_cred_getuid(cred) == pinfo->pshm_uid) {
-		if (mode & FREAD)
-			mask |= S_IRUSR;
-		if (mode & FWRITE)
-			mask |= S_IWUSR;
-		return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
-	}
-
-	/* Otherwise, check the groups. */
-	if (kauth_cred_ismember_gid(cred, pinfo->pshm_gid, &is_member) == 0 && is_member) {
-		if (mode & FREAD)
-			mask |= S_IRGRP;
-		if (mode & FWRITE)
-			mask |= S_IWGRP;
-		return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
-	}
-
-	/* Otherwise, check everyone else. */
-	if (mode & FREAD)
-		mask |= S_IROTH;
-	if (mode & FWRITE)
-		mask |= S_IWOTH;
-	return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES);
+	return(posix_cred_access(cred, pinfo->pshm_uid, pinfo->pshm_gid, pinfo->pshm_mode, mode_req));
 }
 
 int
@@ -1051,7 +1055,7 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap,
 	}
 
 	PSHM_SUBSYS_LOCK();
-	error = pshm_cache_search(&pinfo, &nd, &pcache);
+	error = pshm_cache_search(&pinfo, &nd, &pcache, 0);
 
 	if (error == ENOENT) {
 		PSHM_SUBSYS_UNLOCK();
@@ -1132,16 +1136,16 @@ bad:
 
 /* already called locked */
 static int
-pshm_close(struct pshmnode *pnode)
+pshm_close(struct pshminfo *pinfo, int dropref)
 {
-	int error=0;
-	struct pshminfo *pinfo;
+	int error = 0;
 	struct pshmobj *pshmobj, *pshmobj_next;
 
-	if ((pinfo = pnode->pinfo) == PSHMINFO_NULL)
-		return(EINVAL);
-
-	if ((pinfo->pshm_flags & PSHM_ALLOCATED) != PSHM_ALLOCATED) {
+	/*
+	 * If we are dropping the reference we took on the cache object, don't
+	 * enforce the allocation requirement.
+	 */
+	if ( !dropref && ((pinfo->pshm_flags & PSHM_ALLOCATED) != PSHM_ALLOCATED)) {
 		return(EINVAL);
 	}
 #if DIAGNOSTIC
@@ -1170,7 +1174,6 @@ pshm_close(struct pshmnode *pnode)
 		PSHM_SUBSYS_LOCK();
 		FREE(pinfo,M_SHM);
 	}
-	FREE(pnode, M_SHM);
 	return (error);
 }
 
@@ -1178,11 +1181,20 @@ pshm_close(struct pshmnode *pnode)
 static int
 pshm_closefile(struct fileglob *fg, __unused vfs_context_t ctx)
 {
-	int error;
+	int error = EINVAL;
+	struct pshmnode *pnode;
 
 	PSHM_SUBSYS_LOCK();
-	error =  pshm_close(((struct pshmnode *)fg->fg_data));
+
+	if ((pnode = (struct pshmnode *)fg->fg_data) != NULL) {
+		if (pnode->pinfo != PSHMINFO_NULL) {
+			error =  pshm_close(pnode->pinfo, 0);
+		}
+		FREE(pnode, M_SHM);
+	}
+
 	PSHM_SUBSYS_UNLOCK();
+
 	return(error);
 }
 
diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c
index d13a2df81..a907fad59 100644
--- a/bsd/kern/proc_info.c
+++ b/bsd/kern/proc_info.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -57,6 +57,7 @@
 #include <kern/task.h>
 #include <kern/lock.h>
 #include <kern/kalloc.h>
+#include <kern/assert.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <mach/host_info.h>
@@ -75,6 +76,8 @@
 
 #include <machine/machine_routines.h>
 
+#include <kern/ipc_misc.h>
+
 #include <vm/vm_protos.h>
 
 struct pshmnode;
@@ -92,10 +95,12 @@ int proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t
 int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval);
 int proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval);
+int proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 
 /* protos for procpidinfo calls */
 int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie);
+int proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo *pbsd_shortp, int zombie);
 int proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo);
 int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 int proc_pidthreadinfo(proc_t p, uint64_t arg,  struct proc_threadinfo *pthinfo);
@@ -106,6 +111,7 @@ int proc_pidregionpathinfo(proc_t p,  uint64_t arg, user_addr_t buffer, uint32_t
 int proc_pidvnodepathinfo(proc_t p,  uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 int proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo);
+int proc_pidfileportlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval);
 
 
 /* protos for proc_pidfdinfo calls */
@@ -161,7 +167,9 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b
 			return(proc_kernmsgbuf(buffer, buffersize, retval));
 		case 5: /* set on self properties  proc_setcontrol */
 			return(proc_setcontrol(pid, flavor, arg, buffer, buffersize, retval));
-			
+		case 6:	/* proc_pidfileportinfo */
+			return(proc_pidfileportinfo(pid, flavor, (mach_port_name_t)arg, buffer, buffersize, retval));
+
 		default:
 				return(EINVAL);
 	}
@@ -180,6 +188,7 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t  bu
 	struct proc * p;
 	struct tty * tp;
 	int error = 0;
+	struct proclist *current_list;
 
 	/* if the buffer is null, return num of procs */
 	if (buffer == (user_addr_t)0) {
@@ -205,13 +214,20 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t  bu
 	
 	n = 0;
 	ptr = (int *)kbuf;
-	LIST_FOREACH(p, &allproc, p_list) {
+	current_list = &allproc;
+proc_loop:
+	LIST_FOREACH(p, current_list, p_list) {
 		skip = 0;
 		switch (type) {
 			case PROC_PGRP_ONLY:
 				if (p->p_pgrpid != (pid_t)typeinfo)
 					skip = 1;
 			  	break;
+			case PROC_PPID_ONLY:
+				if ((p->p_ppid != (pid_t)typeinfo) && (((p->p_lflag & P_LTRACED) == 0) || (p->p_oppid != (pid_t)typeinfo)))
+					skip = 1;
+			  	break;
+
 			case PROC_ALL_PIDS:
 				skip = 0;
 			  	break;
@@ -245,7 +261,7 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t  bu
 					uid_t uid;
 			
 					my_cred = kauth_cred_proc_ref(p);
-					uid = my_cred->cr_ruid;
+					uid = kauth_cred_getruid(my_cred);
 					kauth_cred_unref(&my_cred);
 					if (uid != (uid_t)typeinfo)
 						skip = 1;
@@ -256,11 +272,6 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t  bu
 			  break;
 		};
 
-		/* Do we have permission to look into this ? */
-		if (proc_security_policy(p) != 0) {
-			skip = 1;
-		}
-
 		if(skip == 0) {
 			*ptr++ = p->p_pid;
 			n++;
@@ -269,15 +280,10 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t  bu
 			break;
 	}
 	
-	if (n < numprocs) {
-		LIST_FOREACH(p, &zombproc, p_list) {
-			*ptr++ = p->p_pid;
-			n++;
-			if (n >= numprocs)
-				break;
-		}
+	if ((n < numprocs) && (current_list == &allproc)) {
+		current_list = &zombproc;
+		goto proc_loop;
 	}
-	
 
 	proc_list_unlock();
 
@@ -345,6 +351,119 @@ proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t  buffersize, int32_t *retv
 		return(error);		
 }
 
+/*
+ * Helper functions for proc_pidfileportlist.
+ */
+static int
+proc_fileport_count(__unused mach_port_name_t name,
+    __unused struct fileglob *fg, void *arg)
+{
+	uint32_t *counter = arg;
+
+	*counter += 1;
+	return (0);
+}
+
+struct fileport_fdtype_args {
+	struct proc_fileportinfo *ffa_pfi;
+	struct proc_fileportinfo *ffa_pfi_end;
+};
+
+static int
+proc_fileport_fdtype(mach_port_name_t name, struct fileglob *fg, void *arg)
+{
+	struct fileport_fdtype_args *ffa = arg;
+
+	if (ffa->ffa_pfi != ffa->ffa_pfi_end) {
+		ffa->ffa_pfi->proc_fdtype = fg->fg_type;
+		ffa->ffa_pfi->proc_fileport = name;
+		ffa->ffa_pfi++;
+		return (0);		/* keep walking */
+	} else
+		return (-1);		/* stop the walk! */
+}
+
+int
+proc_pidfileportlist(proc_t p,
+	user_addr_t buffer, uint32_t buffersize, int32_t *retval)
+{
+	void *kbuf;
+	vm_size_t kbufsize;
+	struct proc_fileportinfo *pfi;
+	uint32_t needfileports, numfileports;
+	struct fileport_fdtype_args ffa;
+	int error;
+
+	needfileports = buffersize / sizeof (*pfi);
+	if ((user_addr_t)0 == buffer || needfileports > (uint32_t)maxfiles) {
+		/*
+		 * Either (i) the user is asking for a fileport count,
+		 * or (ii) the number of fileports they're asking for is
+		 * larger than the maximum number of open files (!); count
+		 * them to bound subsequent heap allocations.
+		 */
+		numfileports = 0;
+		switch (fileport_walk(p->task,
+		    proc_fileport_count, &numfileports)) {
+		case KERN_SUCCESS:
+			break;
+		case KERN_RESOURCE_SHORTAGE:
+			return (ENOMEM);
+		case KERN_INVALID_TASK:
+			return (ESRCH);
+		default:
+			return (EINVAL);
+		}
+
+		if (numfileports == 0) {
+			*retval = 0;		/* none at all, bail */
+			return (0);
+		}
+		if ((user_addr_t)0 == buffer) {
+			numfileports += 20;	/* accelerate convergence */
+			*retval = numfileports * sizeof (*pfi);
+			return (0);
+		}
+		if (needfileports > numfileports)
+			needfileports = numfileports;
+	}
+
+	assert(buffersize >= PROC_PIDLISTFILEPORTS_SIZE);
+
+	kbufsize = (vm_size_t)needfileports * sizeof (*pfi);
+	pfi = kbuf = kalloc(kbufsize);
+	if (kbuf == NULL)
+	   	return (ENOMEM);
+	bzero(kbuf, kbufsize);
+
+	ffa.ffa_pfi = pfi;
+	ffa.ffa_pfi_end = pfi + needfileports;
+
+	switch (fileport_walk(p->task, proc_fileport_fdtype, &ffa)) {
+	case KERN_SUCCESS:
+		error = 0;
+		pfi = ffa.ffa_pfi;
+		if ((numfileports = pfi - (typeof(pfi))kbuf) == 0)
+			break;
+		if (numfileports > needfileports)
+			panic("more fileports returned than requested");
+		error = copyout(kbuf, buffer, numfileports * sizeof (*pfi));
+		break;
+	case KERN_RESOURCE_SHORTAGE:
+		error = ENOMEM;
+		break;
+	case KERN_INVALID_TASK:
+		error = ESRCH;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	kfree(kbuf, kbufsize);
+	if (error == 0)
+		*retval = numfileports * sizeof (*pfi);
+	return (error);
+}
 
 int 
 proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
@@ -363,19 +482,21 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
 	pbsd->pbi_xstatus = p->p_xstat;
 	pbsd->pbi_pid = p->p_pid;
 	pbsd->pbi_ppid = p->p_ppid;
-	pbsd->pbi_uid = my_cred->cr_uid;
-	pbsd->pbi_gid = my_cred->cr_gid; 
-	pbsd->pbi_ruid =  my_cred->cr_ruid;
-	pbsd->pbi_rgid = my_cred->cr_rgid;
-	pbsd->pbi_svuid =  my_cred->cr_svuid;
-	pbsd->pbi_svgid = my_cred->cr_svgid;
+	pbsd->pbi_uid = kauth_cred_getuid(my_cred);
+	pbsd->pbi_gid = kauth_cred_getgid(my_cred); 
+	pbsd->pbi_ruid =  kauth_cred_getruid(my_cred);
+	pbsd->pbi_rgid = kauth_cred_getrgid(my_cred);
+	pbsd->pbi_svuid =  kauth_cred_getsvuid(my_cred);
+	pbsd->pbi_svgid = kauth_cred_getsvgid(my_cred);
 	kauth_cred_unref(&my_cred);
 	
 	pbsd->pbi_nice = p->p_nice;
 	pbsd->pbi_start_tvsec = p->p_start.tv_sec;
 	pbsd->pbi_start_tvusec = p->p_start.tv_usec;
-	bcopy(&p->p_comm, &pbsd->pbi_comm[0], MAXCOMLEN-1);
-	bcopy(&p->p_name, &pbsd->pbi_name[0], 2*MAXCOMLEN-1);
+	bcopy(&p->p_comm, &pbsd->pbi_comm[0], MAXCOMLEN);
+	pbsd->pbi_comm[MAXCOMLEN - 1] = '\0';
+	bcopy(&p->p_name, &pbsd->pbi_name[0], 2*MAXCOMLEN);
+	pbsd->pbi_name[(2*MAXCOMLEN) - 1] = '\0';
 
 	pbsd->pbi_flags = 0;	
 	if ((p->p_flag & P_SYSTEM) == P_SYSTEM) 
@@ -392,6 +513,10 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
 		pbsd->pbi_flags |= PROC_FLAG_CONTROLT;
 	if ((p->p_flag & P_THCWD) == P_THCWD) 
 		pbsd->pbi_flags |= PROC_FLAG_THCWD;
+	if ((p->p_flag & P_SUGID) == P_SUGID) 
+		pbsd->pbi_flags |= PROC_FLAG_PSUGID;
+	if ((p->p_flag & P_EXEC) == P_EXEC) 
+		pbsd->pbi_flags |= PROC_FLAG_EXEC;
 
 	if (sessionp != SESSION_NULL) {
 		if (SESS_LEADER(p, sessionp))
@@ -422,6 +547,10 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
 			break;
 	};
 		
+	/* if process is a zombie skip bg state */
+	if ((zombie == 0) && (p->p_stat != SZOMB) && (p->task != TASK_NULL))
+		proc_get_darwinbgstate(p->task, &pbsd->pbi_flags);
+
 	if (zombie == 0)
 		pbsd->pbi_nfiles = p->p_fd->fd_nfiles;
 	if (pg != PGRP_NULL) {
@@ -441,6 +570,72 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie)
 }
 
 
+int 
+proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo * pbsd_shortp, int zombie)
+{
+	bzero(pbsd_shortp, sizeof(struct proc_bsdshortinfo));
+	pbsd_shortp->pbsi_pid = p->p_pid;
+	pbsd_shortp->pbsi_ppid = p->p_ppid;
+	pbsd_shortp->pbsi_pgid = p->p_pgrpid;
+	pbsd_shortp->pbsi_status = p->p_stat;
+	bcopy(&p->p_comm, &pbsd_shortp->pbsi_comm[0], MAXCOMLEN);
+	pbsd_shortp->pbsi_comm[MAXCOMLEN - 1] = '\0';
+
+	pbsd_shortp->pbsi_flags = 0;	
+	if ((p->p_flag & P_SYSTEM) == P_SYSTEM) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_SYSTEM;
+	if ((p->p_lflag & P_LTRACED) == P_LTRACED) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_TRACED;
+	if ((p->p_lflag & P_LEXIT) == P_LEXIT) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_INEXIT;
+	if ((p->p_lflag & P_LPPWAIT) == P_LPPWAIT) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_PPWAIT;
+	if ((p->p_flag & P_LP64) == P_LP64) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_LP64;
+	if ((p->p_flag & P_CONTROLT) == P_CONTROLT) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_CONTROLT;
+	if ((p->p_flag & P_THCWD) == P_THCWD) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_THCWD;
+	if ((p->p_flag & P_SUGID) == P_SUGID) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_PSUGID;
+	if ((p->p_flag & P_EXEC) == P_EXEC) 
+		pbsd_shortp->pbsi_flags |= PROC_FLAG_EXEC;
+
+	switch(PROC_CONTROL_STATE(p)) {
+		case P_PCTHROTTLE:
+			pbsd_shortp->pbsi_flags |= PROC_FLAG_PC_THROTTLE;
+			break;
+		case P_PCSUSP:
+			pbsd_shortp->pbsi_flags |= PROC_FLAG_PC_SUSP;
+			break;
+		case P_PCKILL:
+			pbsd_shortp->pbsi_flags |= PROC_FLAG_PC_KILL;
+			break;
+	};
+
+	switch(PROC_ACTION_STATE(p)) {
+		case P_PCTHROTTLE:
+			pbsd_shortp->pbsi_flags |= PROC_FLAG_PA_THROTTLE;
+			break;
+		case P_PCSUSP:
+			pbsd_shortp->pbsi_flags |= PROC_FLAG_PA_SUSP;
+			break;
+	};
+		
+	/* if process is a zombie skip bg state */
+	if ((zombie == 0) && (p->p_stat != SZOMB) && (p->task != TASK_NULL))
+		proc_get_darwinbgstate(p->task, &pbsd_shortp->pbsi_flags);
+
+	pbsd_shortp->pbsi_uid = p->p_uid;
+	pbsd_shortp->pbsi_gid = p->p_gid; 
+	pbsd_shortp->pbsi_ruid =  p->p_ruid;
+	pbsd_shortp->pbsi_rgid = p->p_rgid;
+	pbsd_shortp->pbsi_svuid =  p->p_svuid;
+	pbsd_shortp->pbsi_svgid = p->p_svgid;
+	
+	return(0);
+}
+
 int 
 proc_pidtaskinfo(proc_t p, struct proc_taskinfo * ptinfo)
 {
@@ -739,7 +934,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 	int error = ENOTSUP;
 	int gotref = 0;
 	int findzomb = 0;
-	int refheld = 0;
+	int refheld = 0, shortversion = 0;
 	uint32_t size;
 	int zombie = 0;
 
@@ -786,6 +981,14 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 			else
 				size = PROC_PIDWORKQUEUEINFO_SIZE;
 			break;
+		case PROC_PIDT_SHORTBSDINFO:
+			size = PROC_PIDT_SHORTBSDINFO_SIZE;
+			break;
+		case PROC_PIDLISTFILEPORTS:
+			size = PROC_PIDLISTFILEPORTS_SIZE;
+			if (buffer == (user_addr_t)0)
+				size = 0;
+			break;
 		default:
 			return(EINVAL);
 	}
@@ -797,7 +1000,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 		return(EOVERFLOW);
 	}
 
-	if ((flavor != PROC_PIDTBSDINFO) && (flavor != PROC_PIDPATHINFO)) {
+	if ((flavor != PROC_PIDTBSDINFO) && (flavor != PROC_PIDPATHINFO) && (flavor != PROC_PIDT_SHORTBSDINFO)) {
 		if ((p = proc_find(pid)) == PROC_NULL) {
 				error = ESRCH;
 				goto out;
@@ -816,8 +1019,11 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 		}
 		break;
 
+		case PROC_PIDT_SHORTBSDINFO:
+			shortversion = 1;
 		case PROC_PIDTBSDINFO: {
 			struct proc_bsdinfo pbsd;
+			struct proc_bsdshortinfo pbsd_short;
 
 			zombie = 0;
 			if (arg)
@@ -825,27 +1031,45 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 			p = proc_find(pid);
 			if (p == PROC_NULL) {
 				if (findzomb)  
-					p = pzfind(pid);
+					p = proc_find_zombref(pid);
 				if (p == NULL) {
 					error = ESRCH;
 					goto out;	
 				}
 				zombie = 1;
-			} else 
-				refheld = 1;
+			}  
+			refheld = 1;
 			/* Do we have permission to look into this ? */
-			if ((error = proc_security_policy(p)) != 0) {
-				if (refheld != 0)
-					proc_rele(p);
+			if ((flavor != PROC_PIDT_SHORTBSDINFO) && ((error = proc_security_policy(p)) != 0)) {
+				if (refheld != 0) {
+					if (zombie != 0)
+						proc_drop_zombref(p);
+					else
+						proc_rele(p);
+				}
 				goto out;
 			}
-			error = proc_pidbsdinfo(p, &pbsd, zombie);
-			if (refheld != 0)
-				proc_rele(p);
+			if (shortversion != 0) {
+				error = proc_pidshortbsdinfo(p, &pbsd_short, zombie);
+			} else {
+				error = proc_pidbsdinfo(p, &pbsd, zombie);
+			}
+			if (refheld != 0) {
+				if (zombie != 0)
+					proc_drop_zombref(p);
+				else
+					proc_rele(p);
+			}
 			if (error == 0) {
-				error = copyout(&pbsd, buffer, sizeof(struct proc_bsdinfo));
-				if (error == 0)
-					*retval = sizeof(struct proc_bsdinfo);
+				if (shortversion != 0) {
+					error = copyout(&pbsd_short, buffer, sizeof(struct proc_bsdshortinfo));
+					if (error == 0)
+						*retval = sizeof(struct proc_bsdshortinfo);
+				 } else {
+					error = copyout(&pbsd, buffer, sizeof(struct proc_bsdinfo));
+					if (error == 0)
+						*retval = sizeof(struct proc_bsdinfo);
+				}
 			}	
 		}
 		break;
@@ -945,6 +1169,12 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t  bu
 		}
 		break;
 
+		case PROC_PIDLISTFILEPORTS: {
+			error = proc_pidfileportlist(p, buffer, buffersize,
+			    retval);
+		}
+		break;
+
 		default:
 			error = ENOTSUP;
 	}
@@ -1297,9 +1527,8 @@ proc_pidfdinfo(int pid, int flavor,  int fd, user_addr_t buffer, uint32_t buffer
 #endif /* NETAT */
 		default: {
 			error = EINVAL;
+			goto out1;
 		}
-		break;
-
 	}
 
 	fp_drop(p, fd, fp , 0); 	
@@ -1309,6 +1538,137 @@ out:
 	return(error);
 }
 
+/*
+ * Helper function for proc_pidfileportinfo
+ */
+
+struct fileport_info_args {
+	int		fia_flavor;
+	user_addr_t	fia_buffer;
+	uint32_t	fia_buffersize;
+	int32_t		*fia_retval;
+};
+
+static kern_return_t
+proc_fileport_info(__unused mach_port_name_t name,
+	struct fileglob *fg, void *arg)
+{
+	struct fileport_info_args *fia = arg;
+	struct fileproc __fileproc, *fp = &__fileproc;
+	int error;
+
+	bzero(fp, sizeof (*fp));
+	fp->f_fglob = fg;
+
+	switch (fia->fia_flavor) {
+	case PROC_PIDFILEPORTVNODEPATHINFO: {
+		vnode_t vp;
+
+		if (fg->fg_type != DTYPE_VNODE) {
+			error = ENOTSUP;
+			break;
+		}
+		vp = (struct vnode *)fg->fg_data;
+		error = pid_vnodeinfopath(vp, vnode_vid(vp), fp, 0,
+		    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
+	}	break;
+
+	case PROC_PIDFILEPORTSOCKETINFO: {
+		socket_t so;
+
+		if (fg->fg_type != DTYPE_SOCKET) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		so = (socket_t)fg->fg_data;
+		error = pid_socketinfo(so, fp, 0,
+		    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
+	}	break;
+
+	case PROC_PIDFILEPORTPSHMINFO: {
+		struct pshmnode *pshm;
+
+		if (fg->fg_type != DTYPE_PSXSHM) {
+			error = EBADF;		/* ick - mirror fp_getfpshm */
+			break;
+		}
+		pshm = (struct pshmnode *)fg->fg_data;
+		error = pid_pshminfo(pshm, fp, 0,
+		    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
+	}	break;
+
+	case PROC_PIDFILEPORTPIPEINFO: {
+		struct pipe *cpipe;
+
+		if (fg->fg_type != DTYPE_PIPE) {
+			error = EBADF;		/* ick - mirror fp_getfpipe */
+			break;
+		}
+		cpipe = (struct pipe *)fg->fg_data;
+		error = pid_pipeinfo(cpipe, fp, 0,
+		    fia->fia_buffer, fia->fia_buffersize, fia->fia_retval);
+	}	break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+/************************* proc_pidfileportinfo routine *********************/
+int
+proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name,
+	user_addr_t buffer, uint32_t buffersize, int32_t *retval)
+{
+	proc_t p;
+	int error = ENOTSUP;
+	uint32_t size;
+	struct fileport_info_args fia;
+
+	/* fileport types are restricted by filetype_issendable() */
+
+	switch (flavor) {
+	case PROC_PIDFILEPORTVNODEPATHINFO:
+		size = PROC_PIDFILEPORTVNODEPATHINFO_SIZE;
+		break;
+	case PROC_PIDFILEPORTSOCKETINFO:
+		size = PROC_PIDFILEPORTSOCKETINFO_SIZE;
+		break;
+	case PROC_PIDFILEPORTPSHMINFO:
+		size = PROC_PIDFILEPORTPSHMINFO_SIZE;
+		break;
+	case PROC_PIDFILEPORTPIPEINFO:
+		size = PROC_PIDFILEPORTPIPEINFO_SIZE;
+		break;
+	default:
+		return (EINVAL);
+	}
+ 
+	if (buffersize < size)
+		return (ENOMEM);
+	if ((p = proc_find(pid)) == PROC_NULL) {
+		error = ESRCH;
+		goto out;
+	}
+	if ((error = proc_security_policy(p)) != 0) {
+		goto out1;
+	}
+
+	fia.fia_flavor = flavor;
+	fia.fia_buffer = buffer;
+	fia.fia_buffersize = buffersize;
+	fia.fia_retval = retval;
+
+	if (fileport_invoke(p->task, name,
+	    proc_fileport_info, &fia, &error) != KERN_SUCCESS)
+		error = EINVAL;
+out1:
+	proc_rele(p);
+out:
+	return (error);
+}
 
 static int
 proc_security_policy(proc_t p)
@@ -1339,22 +1699,23 @@ proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval)
 
 /* ********* process control sets on self only */
 int 
-proc_setcontrol(int pid, int flavor, uint64_t arg, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused int32_t * retval)
+proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, __unused int32_t * retval)
 {
 	struct proc * pself = PROC_NULL;
 	int error = 0;
 	uint32_t pcontrol = (uint32_t)arg;
+	struct uthread *ut = NULL;
 
 
 	pself = current_proc();
 	if (pid != pself->p_pid)
 		return(EINVAL);
 
-	if (pcontrol > P_PCMAX)
-		return(EINVAL);
 
 	switch (flavor) {
 		case PROC_SELFSET_PCONTROL: {
+			if (pcontrol > P_PCMAX)
+				return(EINVAL);
 			proc_lock(pself);
 			/* reset existing control setting while retaining action state */
 			pself->p_pcaction &= PROC_ACTION_MASK;
@@ -1364,10 +1725,42 @@ proc_setcontrol(int pid, int flavor, uint64_t arg, __unused user_addr_t buffer,
 		}
 		break;
 
+		case PROC_SELFSET_THREADNAME: {
+			/* PROC_SELFSET_THREADNAME_SIZE = (MAXTHREADNAMESIZE -1) */
+			if(buffersize > PROC_SELFSET_THREADNAME_SIZE)
+				return ENAMETOOLONG;
+			ut = current_uthread();
+
+			if(!ut->pth_name)
+			{
+				ut->pth_name = (char*)kalloc(MAXTHREADNAMESIZE );
+				if(!ut->pth_name)
+					return ENOMEM;
+			}
+			bzero(ut->pth_name, MAXTHREADNAMESIZE);
+			error = copyin(buffer, ut->pth_name, buffersize);
+		}
+		break;
+
+		case PROC_SELFSET_VMRSRCOWNER: {
+			/* need to to be superuser */
+			if (suser(kauth_cred_get(), (u_short *)0) != 0) {
+				error = EPERM;
+				goto out;
+			}
+
+			proc_lock(pself);
+			/* reset existing control setting while retaining action state */
+			pself->p_lflag |= P_LVMRSRCOWNER;
+			proc_unlock(pself);
+		}
+		break;
+
 		default:
 			error = ENOTSUP;
 	}
 	
+out:
 	return(error);
 }
 
diff --git a/bsd/kern/process_policy.c b/bsd/kern/process_policy.c
new file mode 100644
index 000000000..e6596dad4
--- /dev/null
+++ b/bsd/kern/process_policy.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (c) 2005, 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * process policy syscall implementation
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc_internal.h>
+#include <sys/kauth.h>
+#include <sys/unistd.h>
+#include <sys/buf.h>
+#include <sys/ioctl.h>
+#include <sys/vm.h>
+#include <sys/user.h>
+
+#include <security/audit/audit.h>
+
+#include <mach/machine.h>
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#include <kern/task.h>
+#include <kern/lock.h>
+#include <kern/kalloc.h>
+#include <kern/assert.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <mach/host_info.h>
+#include <mach/task_info.h>
+#include <mach/thread_info.h>
+#include <mach/vm_region.h>
+
+#include <sys/process_policy.h>
+#include <sys/proc_info.h>
+#include <sys/bsdtask_info.h>
+#include <sys/kdebug.h>
+#include <sys/sysproto.h>
+#include <sys/msgbuf.h>
+
+#include <machine/machine_routines.h>
+
+#include <kern/ipc_misc.h>
+#include <vm/vm_protos.h>
+
+static int handle_background(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+static int handle_hwaccess(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+static int handle_lowresrouce(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+static int handle_resourceuse(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+static int handle_apptype(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid);
+
+extern kern_return_t task_suspend(task_t);
+extern kern_return_t task_resume(task_t);
+
+/***************************** process_policy ********************/
+
+/*
+ *int process_policy(int scope, int action, int policy, int policy_subtype, 
+ *                   proc_policy_attribute_t * attrp, pid_t target_pid, 
+ *                   uint64_t target_threadid)
+ *{ int process_policy(int scope, int action, int policy, int policy_subtype, 
+ * user_addr_t attrp, pid_t target_pid, uint64_t target_threadid); }
+ */
+
+/* system call implementaion */
+int
+process_policy(struct proc *p, struct process_policy_args * uap, __unused int32_t *retval)
+{
+	int error = 0;
+	int scope = uap->scope;
+	int policy = uap->policy;
+	int action = uap->action;
+	int policy_subtype = uap->policy_subtype;
+	user_addr_t attrp = uap->attrp;
+	pid_t target_pid = uap->target_pid;
+	uint64_t target_threadid = uap->target_threadid;
+	proc_t proc = PROC_NULL;
+	proc_t curp = current_proc();
+	kauth_cred_t my_cred;
+#if CONFIG_EMBEDDED
+	kauth_cred_t target_cred;
+#endif
+
+	if ((scope != PROC_POLICY_SCOPE_PROCESS) && (scope != PROC_POLICY_SCOPE_THREAD)) {
+		return(EINVAL);
+	}
+	proc = proc_find(target_pid);
+	if (proc == PROC_NULL)  {
+		return(EINVAL);
+	}
+
+	my_cred = kauth_cred_proc_ref(curp);
+
+#if CONFIG_EMBEDDED
+	target_cred = kauth_cred_proc_ref(proc);
+
+	if (suser(my_cred, NULL) && kauth_cred_getruid(my_cred) &&
+	    kauth_cred_getuid(my_cred) != kauth_cred_getuid(target_cred) &&
+	    kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred))
+#else
+	/* 
+	 * Resoure starvation control can be used by unpriv resource owner but priv at the time of ownership claim. This is
+	 * checked in low resource handle routine. So bypass the checks here.
+	 */
+	if ((policy != PROC_POLICY_RESOURCE_STARVATION) && 
+		(policy != PROC_POLICY_APPTYPE) && 
+		(suser(my_cred, NULL) && curp != p))
+#endif
+	{
+		error = EPERM;
+		goto out;
+	}
+
+#if CONFIG_MACF
+	error = mac_proc_check_sched(curp, p);
+	if (error) 
+		goto out;
+#endif
+
+
+	switch(policy) {
+		case PROC_POLICY_BACKGROUND:
+			error = handle_background(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+			break;
+		case PROC_POLICY_HARDWARE_ACCESS:
+			error = handle_hwaccess(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+			break;
+		case PROC_POLICY_RESOURCE_STARVATION:
+			error = handle_lowresrouce(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+			break;
+		case PROC_POLICY_RESOURCE_USAGE:
+			error = handle_resourceuse(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+			break;
+		case PROC_POLICY_APPTYPE:
+			error = handle_apptype(scope, action, policy, policy_subtype, attrp, proc, target_threadid);
+			break;
+		default:
+			error = EINVAL;
+			break;
+	}
+
+out:
+	proc_rele(proc);
+        kauth_cred_unref(&my_cred);
+#if CONFIG_EMBEDDED
+        kauth_cred_unref(&target_cred);
+#endif
+	return(error);
+}
+
+
+/* darwin background handling code */
+static int 
+handle_background(int scope, int action, __unused int policy, __unused int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid)
+{
+	int intval, error = 0;
+
+
+	switch (action) {
+		case PROC_POLICY_ACTION_GET: 
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				intval = proc_get_task_bg_policy(proc->task);
+			} else {
+				/* thread scope */
+				intval = proc_get_thread_bg_policy(proc->task, target_threadid);
+			}
+			error = copyout((int *)&intval, (user_addr_t)attrp, sizeof(int));
+			break;
+
+		case PROC_POLICY_ACTION_SET: 
+			error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int));
+			if (error != 0)
+				goto out;
+			if (intval > PROC_POLICY_BG_ALL) {
+				error = EINVAL;
+				goto out;	
+			}
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				error = proc_set_bgtaskpolicy(proc->task, intval);
+			} else {
+				/* thread scope */
+				error = proc_set_bgthreadpolicy(proc->task, target_threadid, intval);
+			}
+			break;
+
+		case PROC_POLICY_ACTION_ADD: 
+			error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int));
+			if (error != 0)
+				goto out;
+			if (intval > PROC_POLICY_BG_ALL) {
+				error = EINVAL;
+				goto out;	
+			}
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				error = proc_add_bgtaskpolicy(proc->task, intval);
+			} else {
+				/* thread scope */
+				error = proc_add_bgthreadpolicy(proc->task, target_threadid, intval);
+			}
+			break;
+
+		case PROC_POLICY_ACTION_REMOVE: 
+			error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int));
+			if (error != 0)
+				goto out;
+			if (intval > PROC_POLICY_BG_ALL) {
+				error = EINVAL;
+				goto out;	
+			}
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				error = proc_remove_bgtaskpolicy(proc->task, intval);
+			} else {
+				/* thread scope */
+				error = proc_remove_bgthreadpolicy(proc->task, target_threadid, intval);
+			}
+			break;
+		
+		case PROC_POLICY_ACTION_APPLY:
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				error = proc_apply_bgtaskpolicy(proc->task);
+			} else {
+				/* thread scope */
+				error = proc_apply_bgthreadpolicy(proc->task, target_threadid);
+			}	
+			break;
+		
+		case PROC_POLICY_ACTION_RESTORE:
+			if (scope == PROC_POLICY_SCOPE_PROCESS) {
+				error = proc_restore_bgtaskpolicy(proc->task);
+			} else {
+				/* thread scope */
+				error = proc_restore_bgthreadpolicy(proc->task, target_threadid);
+			}
+			break;
+		
+		case PROC_POLICY_ACTION_DENYINHERIT:
+			error = proc_denyinherit_policy(proc->task);
+			break;
+		
+		case PROC_POLICY_ACTION_DENYSELFSET:
+			error = proc_denyselfset_policy(proc->task);
+			break;
+		
+		default:
+			return(EINVAL);
+	}
+
+out:
+	return(error);
+}
+
+static int 
+handle_hwaccess(__unused int scope, __unused int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, __unused proc_t proc, __unused uint64_t target_threadid)
+{
+	switch(policy_subtype) {
+		case PROC_POLICY_HWACCESS_NONE:
+		case PROC_POLICY_HWACCESS_DISK:
+		case PROC_POLICY_HWACCESS_GPU:
+		case PROC_POLICY_HWACCESS_NETWORK:
+		case PROC_POLICY_HWACCESS_CPU:
+			break;
+		default:
+			return(EINVAL);	
+	}
+	return(0);
+}
+
+static int 
+handle_lowresrouce(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid)
+{
+	int error = 0;
+
+	switch(policy_subtype) {
+		case PROC_POLICY_RS_NONE:
+		case PROC_POLICY_RS_VIRTUALMEM:
+			break;
+		default:
+			return(EINVAL);	
+	}
+	
+	if (action == PROC_POLICY_ACTION_RESTORE)
+		error = proc_resetpcontrol(proc_pid(proc));
+	else
+		error = EINVAL;
+
+	return(error);
+}
+
+
+static int 
+handle_resourceuse(__unused int scope, __unused int action, __unused int policy, int policy_subtype, user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid)
+{
+	proc_policy_cpuusage_attr_t cpuattr;
+	int error = 0;
+
+	switch(policy_subtype) {
+		case PROC_POLICY_RUSAGE_NONE:
+		case PROC_POLICY_RUSAGE_WIREDMEM:
+		case PROC_POLICY_RUSAGE_VIRTMEM:
+		case PROC_POLICY_RUSAGE_DISK:
+		case PROC_POLICY_RUSAGE_NETWORK:
+		case PROC_POLICY_RUSAGE_POWER:
+			return(ENOTSUP);
+			break;
+		default:
+			return(EINVAL);	
+		case PROC_POLICY_RUSAGE_CPU:
+			break;
+	}
+
+	switch (action) {
+		case PROC_POLICY_ACTION_GET: 
+			error = proc_get_task_ruse_cpu(proc->task, &cpuattr.ppattr_cpu_attr,
+                                        &cpuattr.ppattr_cpu_percentage,
+                                        &cpuattr.ppattr_cpu_attr_interval,
+                                        &cpuattr.ppattr_cpu_attr_deadline);
+			if (error == 0)
+				error = copyout((proc_policy_cpuusage_attr_t *)&cpuattr, (user_addr_t)attrp, sizeof(proc_policy_cpuusage_attr_t));
+			break;
+
+		case PROC_POLICY_ACTION_APPLY: 
+		case PROC_POLICY_ACTION_SET: 
+			error = copyin((user_addr_t)attrp, (proc_policy_cpuusage_attr_t *)&cpuattr, sizeof(proc_policy_cpuusage_attr_t));
+
+			if (error == 0) {
+			error = proc_set_task_ruse_cpu(proc->task, cpuattr.ppattr_cpu_attr, 
+					cpuattr.ppattr_cpu_percentage, 
+					cpuattr.ppattr_cpu_attr_interval, 
+					cpuattr.ppattr_cpu_attr_deadline); 
+			}
+		default:
+			error = EINVAL;
+			break;
+
+	}
+				
+	return(error);
+}
+
+
+static int 
+handle_apptype(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid)
+{
+	int error = 0;
+
+	switch(policy_subtype) {
+		case PROC_POLICY_OSX_APPTYPE_TAL:
+			/* need to be super user to do this */
+			if (kauth_cred_issuser(kauth_cred_get()) == 0) {
+				error = EPERM;
+				goto out;
+			}
+			break;
+		case PROC_POLICY_OSX_APPTYPE_DASHCLIENT:
+			/* no special priv needed */
+			break;
+		case PROC_POLICY_OSX_APPTYPE_NONE:
+		case PROC_POLICY_IOS_APPTYPE:
+		case PROC_POLICY_IOS_NONUITYPE:
+			return(ENOTSUP);
+			break;
+		default:
+			return(EINVAL);	
+	}
+
+	switch (action) {
+		case PROC_POLICY_ACTION_ENABLE:
+			/* reapply the app foreground/background policy */
+			error = proc_enable_task_apptype(proc->task, policy_subtype);
+			break;
+		case PROC_POLICY_ACTION_DISABLE: 
+			/* remove the app foreground/background policy */
+			error = proc_disable_task_apptype(proc->task, policy_subtype);
+			break;
+		default:
+			error = EINVAL;
+			break;
+	}
+				
+out:
+	return(error);
+}
+
+int
+proc_apply_resource_actions(void * bsdinfo, int type, int action)
+{
+	proc_t p = (proc_t)bsdinfo;
+
+	switch(action) {
+		case PROC_POLICY_RSRCACT_THROTTLE:
+			/* no need to do anything */
+			break;
+
+		case PROC_POLICY_RSRCACT_SUSPEND:
+			task_suspend(p->task);
+			break;
+
+		case PROC_POLICY_RSRCACT_TERMINATE:
+			psignal(p, SIGKILL);
+			break;
+
+		case PROC_POLICY_RSRCACT_NOTIFY:
+			proc_lock(p);
+			proc_knote(p, NOTE_RESOURCEEND | (type & 0xff));
+			proc_unlock(p);
+			break;
+	}
+
+	return(0);
+}
+
+
+int
+proc_restore_resource_actions(void * bsdinfo, __unused int type, int action)
+{
+	proc_t p = (proc_t)bsdinfo;
+
+	switch(action) {
+		case PROC_POLICY_RSRCACT_THROTTLE:
+		case PROC_POLICY_RSRCACT_TERMINATE:
+		case PROC_POLICY_RSRCACT_NOTIFY:
+			/* no need to do anything */
+			break;
+
+		case PROC_POLICY_RSRCACT_SUSPEND:
+			task_resume(p->task);
+			break;
+
+	}
+
+	return(0);
+}
+
diff --git a/bsd/kern/pthread_support.c b/bsd/kern/pthread_support.c
index 2691813a4..e5626dfa2 100644
--- a/bsd/kern/pthread_support.c
+++ b/bsd/kern/pthread_support.c
@@ -67,10 +67,12 @@
 #include <kern/sched_prim.h>
 #include <kern/thread_call.h>
 #include <kern/kalloc.h>
+#include <kern/zalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/processor.h>
 #include <kern/affinity.h>
 #include <kern/wait_queue.h>
+#include <kern/mach_param.h>
 #include <mach/mach_vm.h>
 #include <mach/mach_param.h>
 #include <mach/thread_policy.h>
@@ -82,9 +84,35 @@
 
 #include <libkern/OSAtomic.h>
 
-#define _PSYNCH_TRACE_ 0		/* kdebug trace */
-#define __TESTPANICS__ 0		/* panics for error conditions */
-#define COND_MTX_WAITQUEUEMOVE 0	/* auto move from cvar wait queue to mutex waitqueue */
+#include <pexpert/pexpert.h>
+
+#define __PSYNCH_DEBUG__ 0			/* debug panic actions  */
+#define _PSYNCH_TRACE_ 1		/* kdebug trace */
+
+#define __TESTMODE__ 2		/* 0 - return error on user error conditions */
+				/* 1 - log error on user error conditions */
+				/* 2 - abort caller on user error conditions */
+				/* 3 - panic on user error conditions */
+static int __test_panics__;
+static int __test_aborts__;
+static int __test_prints__;
+
+static inline void __FAILEDUSERTEST__(const char *str)
+{
+	proc_t p;
+
+	if (__test_panics__ != 0)
+		panic(str);
+
+	if (__test_aborts__ != 0 || __test_prints__ != 0)
+		p = current_proc();
+
+	if (__test_prints__ != 0)
+		printf("PSYNCH: pid[%d]: %s\n", p->p_pid, str);
+
+	if (__test_aborts__ != 0)
+		psignal(p, SIGABRT);
+}
 
 #if _PSYNCH_TRACE_
 #define _PSYNCH_TRACE_MLWAIT	0x9000000
@@ -103,6 +131,10 @@
 #define _PSYNCH_TRACE_RWUNLOCK2	0x9000034
 #define _PSYNCH_TRACE_RWHANDLEU	0x9000038
 #define _PSYNCH_TRACE_FSEQTILL	0x9000040
+#define _PSYNCH_TRACE_CLRPRE	0x9000044
+#define _PSYNCH_TRACE_CVHBROAD	0x9000048
+#define _PSYNCH_TRACE_CVSEQ	0x900004c
+#define _PSYNCH_TRACE_THWAKEUP	0x9000050
 /* user side */
 #define _PSYNCH_TRACE_UM_LOCK	0x9000060
 #define _PSYNCH_TRACE_UM_UNLOCK	0x9000064
@@ -112,8 +144,24 @@
 #define _PSYNCH_TRACE_UM_CVSIG	0x9000074
 #define _PSYNCH_TRACE_UM_CVBRD	0x9000078
 
+proc_t pthread_debug_proc = PROC_NULL;
+static inline void __PTHREAD_TRACE_DEBUG(uint32_t debugid, uintptr_t arg1, 
+                uintptr_t arg2,
+                uintptr_t arg3,
+                uintptr_t arg4,
+                uintptr_t arg5)
+{
+	proc_t p = current_proc();
+
+	if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc))
+		KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, arg5);
+}
+
 #endif /* _PSYNCH_TRACE_ */
 
+#define ECVCERORR       256
+#define ECVPERORR       512
+
 lck_mtx_t * pthread_list_mlock;
 
 #define PTHHASH(addr)    (&pthashtbl[(addr) & pthhash])
@@ -122,19 +170,28 @@ struct pthhashhead * pth_glob_hashtbl;
 u_long pthhash;
 
 LIST_HEAD(, ksyn_wait_queue) pth_free_list;
+int num_total_kwq = 0;  /* number of kwq in use currently */
+int num_infreekwq = 0;	/* number of kwq in free list */
+int num_freekwq = 0;	/* number of kwq actually  freed from the free the list */
+int num_reusekwq = 0;	/* number of kwq pulled back for reuse from free list */
+int num_addedfreekwq = 0; /* number of added free kwq from the last instance */
+int num_lastfreekwqcount = 0;	/* the free count from the last time */
 
 static int PTH_HASHSIZE = 100;
 
+static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
+static zone_t kwe_zone;	/* zone for allocation of ksyn_waitq_element */
 
 #define SEQFIT 0
 #define FIRSTFIT 1
 
 struct ksyn_queue {
-	TAILQ_HEAD(, uthread) ksynq_uthlist;
+	TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
 	uint32_t	ksynq_count;		/* number of entries in queue */
 	uint32_t	ksynq_firstnum;		/* lowest seq in queue */
 	uint32_t	ksynq_lastnum;		/* highest seq in queue */
 };
+typedef struct ksyn_queue * ksyn_queue_t;
 
 #define KSYN_QUEUE_READ		0
 #define KSYN_QUEUE_LREAD	1
@@ -146,9 +203,6 @@ struct ksyn_queue {
 struct ksyn_wait_queue {
 	LIST_ENTRY(ksyn_wait_queue) kw_hash;
 	LIST_ENTRY(ksyn_wait_queue) kw_list;
-#if USE_WAITQUEUE
-	struct wait_queue kw_wq;
-#endif /* USE_WAITQUEUE */
 	user_addr_t kw_addr;
 	uint64_t  kw_owner;
 	uint64_t kw_object;		/* object backing in shared mode */
@@ -157,78 +211,113 @@ struct ksyn_wait_queue {
 	int 	kw_pflags;		/* flags under listlock protection */
 	struct timeval kw_ts;		/* timeval need for upkeep before free */
 	int	kw_iocount;		/* inuse reference */
+	int 	kw_dropcount;		/* current users unlocking... */
 
 	int	kw_type;		/* queue type like mutex, cvar, etc */
 	uint32_t kw_inqueue;		/* num of waiters held */
+	uint32_t kw_fakecount;		/* number of error/prepost fakes */
 	uint32_t kw_highseq;		/* highest seq in the queue */
 	uint32_t kw_lowseq;		/* lowest seq in the queue */
+	uint32_t kw_lword;		/* L value from userland */
+	uint32_t kw_uword;		/* U world value from userland */
+	uint32_t kw_sword;		/* S word value from userland */
 	uint32_t kw_lastunlockseq;	/* the last seq that unlocked */
+/* for CV to be used as the seq kernel has seen so far */
+#define kw_cvkernelseq kw_lastunlockseq
+	uint32_t kw_lastseqword;		/* the last seq that unlocked */
+/* for mutex and cvar we need to track I bit values */
+	uint32_t kw_nextseqword;	/* the last seq that unlocked; with num of waiters */
+#define kw_initrecv kw_nextseqword	/* number of incoming waiters with Ibit seen sofar */
+	uint32_t kw_overlapwatch;	/* chance for overlaps  */
+#define kw_initcount kw_overlapwatch	/* number of incoming waiters with Ibit expected */
+	uint32_t kw_initcountseq;	/* highest seq with Ibit on for mutex and cvar*/
 	uint32_t kw_pre_rwwc;		/* prepost count */
 	uint32_t kw_pre_lockseq;	/* prepost target seq */
-	uint32_t kw_pre_cvretval;	/* retval for cwait on prepost */
-	uint32_t kw_pre_limrd;		/*  prepost read only(rwlock)  */
-	uint32_t kw_pre_limrdseq;	/* prepost limit seq for reads(rwlock)  */
-	uint32_t kw_pre_limrdbits;	/*  seqbit needed for updates on prepost */
+	uint32_t kw_pre_sseq;		/* prepost target sword, in cvar used for mutexowned  */
 	uint32_t kw_pre_intrcount;	/*  prepost of missed wakeup due to intrs */
 	uint32_t kw_pre_intrseq;	/*  prepost of missed wakeup limit seq */
 	uint32_t kw_pre_intrretbits;	/*  return bits value for missed wakeup threads */
 	uint32_t kw_pre_intrtype;	/*  type of failed wakueps*/
 
 	int 	kw_kflags;
-	TAILQ_HEAD(, uthread) kw_uthlist;       /* List of uthreads */
 	struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX];	/* queues to hold threads */
 	lck_mtx_t kw_lock;		/* mutex lock protecting this structure */
-	struct ksyn_wait_queue * kw_attq; /* attached queue (cvar->mutex, need in prepost */ 
 };
-
-typedef struct ksyn_queue * ksyn_queue_t;
 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
 
-#define PTHRW_EBIT			0x01
-#define PTHRW_LBIT			0x02
-#define PTHRW_YBIT			0x04
-#define PTHRW_WBIT			0x08
-#define PTHRW_UBIT			0x10
-#define PTHRW_RETRYBIT      		0x20
-/* same as 0x20, shadow W bit for rwlock */
-#define PTHRW_SHADOW_W      		0x20        
-
-#define PTHRW_TRYLKBIT      		0x40
-#define PTHRW_RW_HUNLOCK      		0x40	/* returning read thread responsible to handle unlock */
-
-#define PTHRW_MTX_NONE			0x80
-#define PTHRW_RW_INIT			0x80	/* reset on the lock bits */
-/* same as 0x80, spurious rwlock  unlock ret from kernel */
-#define PTHRW_RW_SPURIOUS     		0x80      
-
 #define PTHRW_INC			0x100
-
-#define PTHRW_BIT_MASK		0x000000ff;
+#define PTHRW_BIT_MASK		0x000000ff
 
 #define PTHRW_COUNT_SHIFT	8
 #define PTHRW_COUNT_MASK	0xffffff00
 #define PTHRW_MAX_READERS	0xffffff00
 
+/* New model bits on Lword */
+#define PTH_RWL_KBIT	0x01	/* users cannot acquire in user mode */
+#define PTH_RWL_EBIT	0x02	/* exclusive lock in progress */
+#define PTH_RWL_WBIT	0x04	/* write waiters pending in kernel */
+#define PTH_RWL_PBIT    0x04    /* prepost (cv) pending in kernel */
+#define PTH_RWL_YBIT	0x08	/* yielding write waiters pending in kernel */
+#define PTH_RWL_RETRYBIT 0x08	/* mutex retry wait */
+#define PTH_RWL_LBIT	0x10	/* long read in progress */
+#define PTH_RWL_MTXNONE 0x10    /* indicates the cvwait does not have mutex held */
+#define PTH_RWL_UBIT	0x20	/* upgrade request pending */
+#define PTH_RWL_MTX_WAIT 0x20	/* in cvar in mutex wait */
+#define PTH_RWL_RBIT	0x40	/* reader pending in kernel(not used) */
+#define PTH_RWL_MBIT	0x40	/* overlapping grants from kernel */
+#define PTH_RWL_TRYLKBIT 0x40	/* trylock attempt (mutex only) */
+#define PTH_RWL_IBIT	0x80	/* lcok reset, held untill first succeesful unlock */
+
+
+/* UBIT values for mutex, cvar */
+#define PTH_RWU_SBIT    0x01
+#define PTH_RWU_BBIT    0x02
+
+#define PTHRW_RWL_INIT       PTH_RWL_IBIT    /* reset state on the lock bits (U)*/
+
+/* New model bits on Sword */
+#define PTH_RWS_SBIT	0x01	/* kernel transition seq not set yet*/
+#define PTH_RWS_IBIT	0x02	/* Sequence is not set on return from kernel */
+#define PTH_RWS_CV_CBIT PTH_RWS_SBIT    /* kernel has cleared all info w.r.s.t CV */ 
+#define PTH_RWS_CV_PBIT PTH_RWS_IBIT    /* kernel has prepost/fake structs only,no waiters */
+#define PTH_RWS_CV_MBIT PTH_RWL_MBIT	/* to indicate prepost return */
+#define PTH_RWS_WSVBIT  0x04    /* save W bit */
+#define PTH_RWS_USVBIT  0x08    /* save U bit */
+#define PTH_RWS_YSVBIT  0x10    /* save Y bit */
+#define PTHRW_RWS_INIT       PTH_RWS_SBIT    /* reset on the lock bits (U)*/
+#define PTHRW_RWS_SAVEMASK (PTH_RWS_WSVBIT|PTH_RWS_USVBIT|PTH_RWS_YSVBIT)    /*save bits mask*/
+#define PTHRW_SW_Reset_BIT_MASK 0x000000fe      /* remove S bit and get rest of the bits */
+
+#define PTHRW_RWS_INIT       PTH_RWS_SBIT    /* reset on the lock bits (U)*/
+
+
+#define PTHRW_UN_BIT_MASK 0x000000bf	/* remove overlap  bit */
+
+
+#define PTHREAD_MTX_TID_SWITCHING (uint64_t)-1
+
+/* new L word defns */
+#define is_rwl_readinuser(x) ((((x) & (PTH_RWL_UBIT | PTH_RWL_KBIT)) == 0)||(((x) & PTH_RWL_LBIT) != 0))
+#define is_rwl_ebit_set(x) (((x) & PTH_RWL_EBIT) != 0)
+#define is_rwl_lbit_set(x) (((x) & PTH_RWL_LBIT) != 0)
+#define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0)
+#define is_rw_ubit_set(x) (((x) & PTH_RWL_UBIT) != 0)
+
+/* S word checks */
+#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT))
+#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT))
+
 /* first contended seq that kernel sees */
 #define KW_MTXFIRST_KSEQ	0x200
 #define KW_CVFIRST_KSEQ		1
 #define KW_RWFIRST_KSEQ		0x200
 
-#define is_rw_ewubit_set(x) ((x & (PTHRW_EBIT | PTHRW_WBIT | PTHRW_UBIT)) != 0)
-#define is_rw_lybit_set(x) ((x & (PTHRW_LBIT | PTHRW_YBIT)) != 0)
-#define is_rw_ebit_set(x) ((x & PTHRW_EBIT) != 0)
-#define is_rw_uebit_set(x) ((x & (PTHRW_EBIT | PTHRW_UBIT)) != 0)
-#define is_rw_ubit_set(x) ((x & PTHRW_UBIT) != 0)
-#define is_rw_either_ewyubit_set(x) ((x & (PTHRW_EBIT | PTHRW_WBIT | PTHRW_UBIT | PTHRW_YBIT)) != 0)
-
-
-/* is x lower than Y */
-#define is_seqlower(x, y) ((x  < y) || ((x - y) > (PTHRW_MAX_READERS/2)))
-/* is x lower than or eq Y */
-#define is_seqlower_eq(x, y) ((x  <= y) || ((x - y) > (PTHRW_MAX_READERS/2)))
+int is_seqlower(uint32_t x, uint32_t y);
+int is_seqlower_eq(uint32_t x, uint32_t y);
+int is_seqhigher(uint32_t x, uint32_t y);
+int is_seqhigher_eq(uint32_t x, uint32_t y);
+int find_diff(uint32_t upto, uint32_t lowest);
 
-/* is x greater than Y */
-#define is_seqhigher(x, y) ((x  > y) || ((y - x) > (PTHRW_MAX_READERS/2)))
 
 static inline  int diff_genseq(uint32_t x, uint32_t y) { 
 	if (x > y)  {
@@ -292,27 +381,39 @@ static inline  int diff_genseq(uint32_t x, uint32_t y) {
 #define PTHREAD_POLICY_FLAGS_MASK	0x1c0
 
 #define _PTHREAD_MTX_OPT_HOLDLOCK 	0x200
-#define _PTHREAD_MTX_OPT_NOHOLDLOCK 	0x400
-#define _PTHREAD_MTX_OPT_LASTDROP (_PTHREAD_MTX_OPT_HOLDLOCK | _PTHREAD_MTX_OPT_NOHOLDLOCK)
+#define _PTHREAD_MTX_OPT_NOMTX 		0x400
+
+#define _PTHREAD_MTX_OPT_NOTIFY 	0x1000
+#define _PTHREAD_MTX_OPT_MUTEX		0x2000	/* this is a mutex type  */
 
+#define _PTHREAD_RWLOCK_UPGRADE_TRY 0x10000
+
+/* pflags */
 #define KSYN_WQ_INLIST	1
 #define KSYN_WQ_INHASH	2
 #define KSYN_WQ_SHARED	4
+#define KSYN_WQ_WAITING 8	/* threads waiting for this wq to be available */
 #define KSYN_WQ_FLIST 	0X10	/* in free list to be freed after a short delay */
 
+/* kflags */
+#define KSYN_KWF_INITCLEARED	1	/* the init status found and preposts cleared */
+#define KSYN_KWF_ZEROEDOUT	2	/* the lword, etc are inited to 0 */
+
 #define KSYN_CLEANUP_DEADLINE 10
 int psynch_cleanupset;
 thread_call_t psynch_thcall;
 
 #define KSYN_WQTYPE_INWAIT	0x1000
+#define KSYN_WQTYPE_INDROP	0x2000
 #define KSYN_WQTYPE_MTX		0x1
 #define KSYN_WQTYPE_CVAR	0x2
 #define KSYN_WQTYPE_RWLOCK	0x4
 #define KSYN_WQTYPE_SEMA	0x8
 #define KSYN_WQTYPE_BARR	0x10
-#define KSYN_WQTYPE_MASK        0xffff
+#define KSYN_WQTYPE_MASK        0x00ff
 
 #define KSYN_MTX_MAX 0x0fffffff
+#define KSYN_WQTYPE_MUTEXDROP	(KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
 
 #define KW_UNLOCK_PREPOST 		0x01
 #define KW_UNLOCK_PREPOST_UPGRADE 	0x02
@@ -324,14 +425,14 @@ thread_call_t psynch_thcall;
 
 #define CLEAR_PREPOST_BITS(kwq)  {\
 			kwq->kw_pre_lockseq = 0; \
+			kwq->kw_pre_sseq = PTHRW_RWS_INIT; \
 			kwq->kw_pre_rwwc = 0; \
-			kwq->kw_pre_cvretval = 0; \
 			}
 
-#define CLEAR_READ_PREPOST_BITS(kwq)  {\
-			kwq->kw_pre_limrd = 0; \
-			kwq->kw_pre_limrdseq = 0; \
-			kwq->kw_pre_limrdbits = 0; \
+#define CLEAR_INITCOUNT_BITS(kwq)  {\
+			kwq->kw_initcount = 0; \
+			kwq->kw_initrecv = 0; \
+			kwq->kw_initcountseq = 0; \
 			}
 
 #define CLEAR_INTR_PREPOST_BITS(kwq)  {\
@@ -340,7 +441,30 @@ thread_call_t psynch_thcall;
 			kwq->kw_pre_intrretbits = 0; \
 			kwq->kw_pre_intrtype = 0; \
 			}
-	
+
+#define CLEAR_REINIT_BITS(kwq)  {\
+			if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { \
+				if((kwq->kw_inqueue != 0) && (kwq->kw_inqueue != kwq->kw_fakecount)) \
+					panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);	\
+			};\
+			if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { \
+				kwq->kw_nextseqword = PTHRW_RWS_INIT; \
+				kwq->kw_overlapwatch = 0; \
+			}; \
+			kwq->kw_pre_lockseq = 0; \
+			kwq->kw_pre_rwwc = 0; \
+			kwq->kw_pre_sseq = PTHRW_RWS_INIT; \
+			kwq->kw_lastunlockseq = PTHRW_RWL_INIT; \
+			kwq->kw_lastseqword = PTHRW_RWS_INIT; \
+			kwq->kw_pre_intrcount = 0; \
+			kwq->kw_pre_intrseq = 0; \
+			kwq->kw_pre_intrretbits = 0; \
+			kwq->kw_pre_intrtype = 0; \
+			kwq->kw_lword = 0;	\
+			kwq->kw_uword = 0;	\
+			kwq->kw_sword = PTHRW_RWS_INIT;	\
+			}
+
 void pthread_list_lock(void);
 void pthread_list_unlock(void);
 void pthread_list_lock_spin(void);
@@ -349,41 +473,69 @@ void ksyn_wqlock(ksyn_wait_queue_t kwq);
 void ksyn_wqunlock(ksyn_wait_queue_t kwq);
 ksyn_wait_queue_t ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t offset);
 int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype , ksyn_wait_queue_t * wq);
-void ksyn_wqrelease(ksyn_wait_queue_t mkwq, ksyn_wait_queue_t ckwq);
-int ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, uthread_t uth);
-kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, uthread_t uth);
-void ksyn_move_wqthread(ksyn_wait_queue_t ckwq, ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t updateval, int diffgen, int nomutex);
-extern thread_t port_name_to_thread(mach_port_name_t port_name);
+void ksyn_wqrelease(ksyn_wait_queue_t mkwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype);
 extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset);
-static void UPDATE_KWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype, int retry);
-void psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags);
-
-#if USE_WAITQUEUE
-kern_return_t wait_queue_move_all(wait_queue_t from, event64_t eventfrom, wait_queue_t to, event64_t eventto);
-kern_return_t wait_queue_move_thread(wait_queue_t from, event64_t eventfrom, thread_t th, wait_queue_t to, event64_t eventto, thread_t * mthp);
-#endif /* USE_WAITQUEUE */
-int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t * updatep, int flags, int *blockp, uint32_t premgen);
+static void UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype);
+extern thread_t port_name_to_thread(mach_port_name_t port_name);
+
+int ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log);
+kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe);
+void ksyn_freeallkwe(ksyn_queue_t kq);
+
+uint32_t psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags);
+int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t * updatep, int flags, int *blockp, uint32_t premgen);
+
 void ksyn_queue_init(ksyn_queue_t kq);
-int ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, int firstfit);
-struct uthread * ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq);
-void ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uthread_t uth);
+int ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int firstfit);
+ksyn_waitq_element_t ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq);
+void ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
+int ksyn_queue_move_tofree(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t freeq, int all, int reease);
 void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
 uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
 uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
+
 int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t  nwaiters, uint32_t *countp);
-int find_diff(uint32_t upto, uint32_t lowest);
 uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
+
+ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
+uint32_t ksyn_queue_cvcount_entries(ksyn_queue_t kq, uint32_t upto, uint32_t from, int * numwaitersp, int * numintrp, int * numprepop);
+void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
+void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release);
+ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
+ksyn_waitq_element_t ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, thread_t th, uint32_t toseq);
+
 int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp);
 int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]);
-uthread_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
+ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove);
+int kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, int flags , int * blockp);
 int kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, int flags, uint32_t premgen, int * blockp);
 
-
 static void
-UPDATE_KWQ(__unused ksyn_wait_queue_t kwq, __unused uint32_t mgen, __unused uint32_t ugen, __unused uint32_t rw_wc, __unused uint64_t tid, __unused int wqtype, __unused int retry)  
+UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, __unused uint64_t tid, __unused int wqtype)
 {
+	if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
+		if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
+			/* the values of L,U and S are cleared out due to L==S in previous transition */
+			kwq->kw_lword = mgen;
+			kwq->kw_uword = ugen;
+			kwq->kw_sword = rw_wc;
+			kwq->kw_kflags &=  ~KSYN_KWF_ZEROEDOUT;
+		}
+		if (is_seqhigher((mgen & PTHRW_COUNT_MASK), (kwq->kw_lword & PTHRW_COUNT_MASK)) != 0)
+			kwq->kw_lword = mgen;
+		if (is_seqhigher((ugen & PTHRW_COUNT_MASK), (kwq->kw_uword & PTHRW_COUNT_MASK)) != 0)
+			kwq->kw_uword = ugen;
+		if ((rw_wc & PTH_RWS_CV_CBIT) != 0) {
+			if(is_seqlower(kwq->kw_cvkernelseq, (rw_wc & PTHRW_COUNT_MASK)) != 0) {
+				kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
+			}
+			if (is_seqhigher((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_sword & PTHRW_COUNT_MASK)) != 0)
+				kwq->kw_sword = rw_wc;
+		}
+	}
 }
 
+
 /* to protect the hashes, iocounts, freelist */
 void
 pthread_list_lock(void)
@@ -426,51 +578,43 @@ ksyn_wqunlock(ksyn_wait_queue_t kwq)
 
 
 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
-void
+uint32_t
 psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags)
 {
-	uint32_t nextgen, low_writer, updatebits;
+	uint32_t nextgen, low_writer, updatebits, returnbits = 0;
 	int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-	uthread_t uth;
+	ksyn_waitq_element_t kwe = NULL;
 	kern_return_t kret = KERN_SUCCESS;
-
 	
 	nextgen = (ugen + PTHRW_INC);
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_START, kwq, lkseq, ugen, flags, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_START, (uint32_t)kwq->kw_addr, lkseq, ugen, flags, 0);
 #endif /* _PSYNCH_TRACE_ */
 
 	ksyn_wqlock(kwq);
 
 redrive:
-
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 1, kwq->kw_inqueue, nextgen, 0);
-#endif /* _PSYNCH_TRACE_ */
+	
 	if (kwq->kw_inqueue != 0) {
-		updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | PTHRW_EBIT;
-		kwq->kw_lastunlockseq = ugen;
+		updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
+		kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
 		if (firstfit != 0) 
 		{
-#if __TESTPANICS__
-		panic("psynch_mutexdrop_internal: first fit mutex arrives, not enabled yet \n");
-#endif /* __TESTPANICS__ */
 			/* first fit , pick any one */
-			uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
+			kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
+			kwe->kwe_psynchretval = updatebits;
+			kwe->kwe_kwqqueue = NULL;
 
-			if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
-				updatebits |= PTHRW_WBIT;
 #if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 2, uth, updatebits, 0);
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf1, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
 #endif /* _PSYNCH_TRACE_ */
-				
-			uth->uu_psynchretval = updatebits;
-			uth->uu_kwqqueue = NULL;
-
-			kret = ksyn_wakeup_thread(kwq, uth);
+			
+			kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 				panic("psynch_mutexdrop_internal: panic unable to wakeup firstfit mutex thread\n");
+#endif /* __TESTPANICS__ */
 			if (kret == KERN_NOT_WAITING)
 				goto redrive;
 		} else {
@@ -479,86 +623,124 @@ redrive:
 			low_writer &= PTHRW_COUNT_MASK;
 
 			if (low_writer == nextgen) {
-#if _PSYNCH_TRACE_
-				KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 3, low_writer, nextgen, 0);
-#endif /* _PSYNCH_TRACE_ */
 				/* next seq to be granted found */
-				uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
-				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
-					updatebits |= PTHRW_WBIT;
+				kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
 				
-				uth->uu_psynchretval = updatebits;
-				uth->uu_kwqqueue = NULL;
+				/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
+				kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT;
+				kwe->kwe_kwqqueue = NULL;
 
-				kret = ksyn_wakeup_thread(kwq, uth);
+#if _PSYNCH_TRACE_
+				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
+#endif /* _PSYNCH_TRACE_ */
+				
+				kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 					panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n");
-				if (kret == KERN_NOT_WAITING)
-					goto redrive;
+#endif /* __TESTPANICS__ */
+				if (kret == KERN_NOT_WAITING) {
+					/* interrupt post */
+					kwq->kw_pre_intrcount = 1;
+					kwq->kw_pre_intrseq = nextgen;
+					kwq->kw_pre_intrretbits = updatebits;
+					kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
+#if _PSYNCH_TRACE_
+					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfafafaf1, nextgen, kwq->kw_pre_intrretbits, 0);
+#endif /* _PSYNCH_TRACE_ */					
+				}
 
 			} else if (is_seqhigher(low_writer, nextgen) != 0) {
-#if _PSYNCH_TRACE_
-				KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 4, low_writer, nextgen, 0);
-#endif /* _PSYNCH_TRACE_ */
 				kwq->kw_pre_rwwc++;
+
+				if (kwq->kw_pre_rwwc > 1) {
+					__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (1)\n");
+					goto out;
+				}
+
 				kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); 
-			} else {
-#if __TESTPANICS__
-			panic("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
-#endif /* __TESTPANICS__ */
 #if _PSYNCH_TRACE_
-				KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 5, low_writer, nextgen, 0);
+				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
-				uth = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
-				if (uth != NULL) {
-					/* next seq to be granted found */
+			} else {
 
-					if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
-						updatebits |= PTHRW_WBIT;
-				
+				//__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
+
+				kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (nextgen & PTHRW_COUNT_MASK), 1);
+				if (kwe != NULL) {
+					/* next seq to be granted found */
+					/* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
+					kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT;
+					kwe->kwe_kwqqueue = NULL;
 #if _PSYNCH_TRACE_
-					KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 6, updatebits, 0, 0);
+					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
 #endif /* _PSYNCH_TRACE_ */
-					uth->uu_psynchretval = updatebits;
-					uth->uu_kwqqueue = NULL;
-
-					kret = ksyn_wakeup_thread(kwq, uth);
+					kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 						panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n");
+#endif /* __TESTPANICS__ */
 					if (kret == KERN_NOT_WAITING)
 						goto redrive;
 				} else {
 					/* next seq to be granted not found, prepost */
-#if _PSYNCH_TRACE_
-					KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 7, 0, 0, 0);
-#endif /* _PSYNCH_TRACE_ */
 					kwq->kw_pre_rwwc++;
+
+					if (kwq->kw_pre_rwwc > 1) {
+						__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (2)\n");
+						goto out;
+					}
+
 					kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); 
+#if _PSYNCH_TRACE_
+					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+#endif /* _PSYNCH_TRACE_ */
 				}
 			}
 		} 
 	} else {
+
+		/* if firstfit the last one could be spurious */
+		if (firstfit == 0) {
+			kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
+			kwq->kw_pre_rwwc++;
+
+			if (kwq->kw_pre_rwwc > 1) {
+				__FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (3)\n");
+				goto out;
+			}
+
+			kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); 
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 8, 0, 0, 0);
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
-		/* if firstfit the last one could be spurious */
-		if ((firstfit == 0) || ((lkseq & PTHRW_COUNT_MASK) != nextgen))       {
+		} else {
+			/* first fit case */
 #if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 9, 0, 0, 0);
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_lastunlockseq, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
-			kwq->kw_lastunlockseq = ugen;
-			kwq->kw_pre_rwwc++;
-			kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); 
+			kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
+			/* not set or the new lkseq is higher */
+			if ((kwq->kw_pre_rwwc == 0) || (is_seqlower(kwq->kw_pre_lockseq, lkseq) == 0))
+				kwq->kw_pre_lockseq = (lkseq & PTHRW_COUNT_MASK);
+			kwq->kw_pre_rwwc = 1;
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+#endif /* _PSYNCH_TRACE_ */
+
+			/* indicate prepost content in kernel */
+			returnbits = lkseq | PTH_RWL_PBIT;
 		}
 	}
 
+out:
 	ksyn_wqunlock(kwq);
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_END, kwq, 0, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	ksyn_wqrelease(kwq, NULL);
-	return;
+	ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX));
+	return(returnbits);
 }
 
 /*
@@ -575,19 +757,24 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t
 	int flags = uap->flags;
 	ksyn_wait_queue_t kwq;
 	int error=0;
-	int ins_flags;
+	int ins_flags, retry;
 	uthread_t uth;
 	int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT;
-	uint32_t lockseq, updatebits;
-	
+	uint32_t lockseq, updatebits=0;
+	ksyn_waitq_element_t kwe;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, (uint32_t)tid, 0);
 #endif /* _PSYNCH_TRACE_ */
 
 	uth = current_uthread();
 
-	uth->uu_lockseq = uap->mgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = uap->mgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
 	lockseq = (uap->mgen & PTHRW_COUNT_MASK);
 
 	if (firstfit  == 0) {
@@ -600,67 +787,105 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t
 	error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 1, 0, error, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 1, 0xdeadbeef, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 
 	ksyn_wqlock(kwq);
 
-	
-	if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || (lockseq == kwq->kw_pre_lockseq ))) {
+
+	if ((mgen & PTH_RWL_RETRYBIT) != 0) {
+		retry = 1;
+		mgen &= ~PTH_RWL_RETRYBIT;
+	}
+
+        /* handle first the missed wakeups */
+        if ((kwq->kw_pre_intrcount != 0) &&
+                ((kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE)) &&
+                (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
+                kwq->kw_pre_intrcount--;
+                kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
+                if (kwq->kw_pre_intrcount==0)
+                        CLEAR_INTR_PREPOST_BITS(kwq);
+                ksyn_wqunlock(kwq);
+				*retval = kwe->kwe_psynchretval;
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 0xfafafaf1, kwe->kwe_psynchretval, kwq->kw_pre_intrcount, 0);
+#endif /* _PSYNCH_TRACE_ */
+                goto out;
+        }
+
+	if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
 		/* got preposted lock */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			CLEAR_PREPOST_BITS(kwq);
-			kwq->kw_lastunlockseq = 0;
+			kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
+			if (kwq->kw_inqueue == 0) {
+				updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
+			} else {
+				updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
+			}
+			updatebits &= ~PTH_RWL_MTX_WAIT;
+			
+			kwe->kwe_psynchretval = updatebits;
+
+			if (updatebits == 0) {
+				__FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq  in mutexwait with no EBIT \n");
+			}
+			ksyn_wqunlock(kwq);
+			*retval = updatebits;
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+#endif /* _PSYNCH_TRACE_ */
+			goto out;	
 		} else {
-			panic("psynch_mutexwait: more than one prepost %d\n", (kwq->kw_pre_rwwc + 1));
+			__FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
 			kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
+			ksyn_wqunlock(kwq);
+			error = EINVAL;
+			goto out;
 		}
-		if (kwq->kw_inqueue == 0) {
-			updatebits = lockseq | PTHRW_EBIT;
-		} else {
-			updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTHRW_EBIT | PTHRW_WBIT);
-		}
-		
-		uth->uu_psynchretval = updatebits;
-#if __TESTPANICS__
-		if ((updatebits & PTHRW_COUNT_MASK) == 0)
-			panic("psynch_mutexwait: (prepost)returning 0 lseq  in mutexwait with EBIT \n");
-#endif /* __TESTPANICS__ */
-		ksyn_wqunlock(kwq);
-		*retval = updatebits;
-		goto out;	
 	}
 	
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], mgen, uth, ins_flags);
-	if (error != 0)
-		panic("psynch_mutexwait: failed to enqueue\n");
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfeedfeed, mgen, ins_flags, 0);
+#endif /* _PSYNCH_TRACE_ */
 	
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], mgen, uth, kwe, ins_flags);
+	if (error != 0) {
+		ksyn_wqunlock(kwq);
+#if _PSYNCH_TRACE_
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0);
+#endif /* _PSYNCH_TRACE_ */
+		goto out;
+	}
+	
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 		/* drops the wq lock */
 
 	if (error != 0) {
 		ksyn_wqlock(kwq);
+		
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 2, 0, error, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 3, 0xdeadbeef, error, 0);
 #endif /* _PSYNCH_TRACE_ */
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
 		ksyn_wqunlock(kwq);
 	} else {
-		updatebits = uth->uu_psynchretval;
+		updatebits = kwe->kwe_psynchretval;
+		updatebits &= ~PTH_RWL_MTX_WAIT;
 		*retval = updatebits;
-#if __TESTPANICS__
-		if ((updatebits & PTHRW_COUNT_MASK) == 0)
-			panic("psynch_mutexwait: returning 0 lseq  in mutexwait with EBIT \n");
-#endif /* __TESTPANICS__ */
+
+		if (updatebits == 0)
+			__FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq  in mutexwait with no EBIT \n");
 	}
 out:
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 
 	return(error);
@@ -670,32 +895,26 @@ out:
  *  psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
   */
 int
-psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, __unused uint32_t * retval)
+psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, uint32_t * retval)
 {
 	user_addr_t mutex  = uap->mutex;
 	uint32_t mgen = uap->mgen;
-	uint32_t lkseq = mgen &  PTHRW_COUNT_MASK;
 	uint32_t ugen = uap->ugen;
 	uint64_t tid = uap->tid;
 	int flags = uap->flags;
 	ksyn_wait_queue_t kwq;
+	uint32_t updateval;	
 	int error=0;
 
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0);
-#endif /* _PSYNCH_TRACE_ */
-	error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, KSYN_WQTYPE_MTX, &kwq);
+	error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq);
 	if (error != 0) {
-#if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_END, (uint32_t)mutex, 1, 0, error, 0);
-#endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
-	psynch_mutexdrop_internal(kwq, lkseq, ugen, flags);
+
+	updateval = psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
 	/* drops the kwq reference */
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_END, (uint32_t)mutex, 0, 0, error, 0);
-#endif /* _PSYNCH_TRACE_ */
+
+	*retval = updateval;
 	return(0);
 
 }
@@ -704,350 +923,261 @@ psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, __unused
  *  psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
  */
 int
-psynch_cvbroad(__unused proc_t p, struct psynch_cvbroad_args * uap, int * retval)
+psynch_cvbroad(__unused proc_t p, struct psynch_cvbroad_args * uap, uint32_t * retval)
 {
 	user_addr_t cond  = uap->cv;
-	uint32_t cgen = uap->cvgen;
-	uint32_t diffgen = uap->diffgen;
-	uint32_t mgen = uap->mgen;
+	uint64_t cvlsgen = uap->cvlsgen;
+	uint64_t cvudgen = uap->cvudgen;
+	uint32_t cgen, cugen, csgen, diffgen;
+	uint32_t uptoseq, fromseq;
 	int flags = uap->flags;
-	ksyn_wait_queue_t kwq, ckwq;
+	ksyn_wait_queue_t ckwq;
 	int error=0;
-#if COND_MTX_WAITQUEUEMOVE
-	int mutexowned = flags & _PTHREAD_MTX_OPT_HOLDLOCK;
-	int nomutex = flags & _PTHREAD_MTX_OPT_NOHOLDLOCK;
-	user_addr_t mutex = uap->mutex;
-	uint32_t ugen = uap->ugen;
-	uint64_t tid = uap->tid;
-	uthread_t uth;
-	kern_return_t kret = KERN_SUCCESS;
-#else /* COND_MTX_WAITQUEUEMOVE */
-	int nomutex =  _PTHREAD_MTX_OPT_NOHOLDLOCK;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	uint32_t nextgen, ngen;
-	int updatebits = 0;
+	uint32_t updatebits = 0;
+	uint32_t count;
+	struct ksyn_queue  kfreeq;
+
+	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
+	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
+	cugen = (uint32_t)((cvudgen >> 32) & 0xffffffff);
+	diffgen = ((uint32_t)(cvudgen & 0xffffffff));
+	count = (diffgen >> PTHRW_COUNT_SHIFT);
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_START, (uint32_t)cond, (uint32_t) 0, cgen, mgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0xcbcbcbc1, diffgen,flags, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_wqfind(cond, cgen, cgen, 0, 0, flags, KSYN_WQTYPE_CVAR, &ckwq);
+
+	uptoseq = cgen & PTHRW_COUNT_MASK;
+	fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
+
+	if (is_seqhigher(fromseq, uptoseq) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) {
+		__FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
+		return EINVAL;
+	}
+	if (count > (uint32_t)task_threadmax) {
+		__FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
+		return EBUSY;
+	}
+
+	ckwq = NULL;
+	
+	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 
-#if COND_MTX_WAITQUEUEMOVE
-	ngen = mgen + (PTHRW_INC * diffgen);
-	if (nomutex ==0) {
-		error = ksyn_wqfind(mutex, ngen, ugen, 0, tid, flags, KSYN_WQTYPE_MTX, &kwq);
-		if (error != 0)  {
-			kwq = NULL;
-			goto out;
-		}
-	}
-#else /* COND_MTX_WAITQUEUEMOVE */
-	nomutex = _PTHREAD_MTX_OPT_NOHOLDLOCK;
-	kwq= NULL;
-	ngen = 0;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-
+	*retval = 0;
 
 	ksyn_wqlock(ckwq);
-#if COND_MTX_WAITQUEUEMOVE
-redrive:
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	if (diffgen > ckwq->kw_inqueue) {
-		ckwq->kw_pre_rwwc = diffgen - ckwq->kw_inqueue;
-		ckwq->kw_pre_lockseq = cgen & PTHRW_BIT_MASK;
-		updatebits = ckwq->kw_pre_rwwc;	/* unused mutex refs */
-		nextgen = (mgen + (ckwq->kw_pre_rwwc * PTHRW_INC));
-	} else {
-		updatebits = 0;
-		nextgen = mgen + PTHRW_INC;
-	}
-	
-	if (ckwq->kw_inqueue != 0) {
-#if COND_MTX_WAITQUEUEMOVE
-		if (mutexowned != 0) {
-#if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, ckwq->kw_inqueue, 0);
-#endif /* _PSYNCH_TRACE_ */
-			uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq);
-			uth->uu_psynchretval = ngen;
-			uth->uu_kwqqueue = NULL;
 
-			kret = ksyn_wakeup_thread(ckwq, uth);
-			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
-				panic("cvbraoad: failed to remove\n");
-			if (kret == KERN_NOT_WAITING) {
-				/*
-				 * trying to wake one thread to return, so if
-				 * failed to wakeup get the next one.. 
-				 */
-				goto redrive;
-			}
-			nextgen = nextgen + PTHRW_INC;
-			diffgen -= 1;
-		}
-#else /* COND_MTX_WAITQUEUEMOVE */
-		updatebits = 0;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-		
-		/* nomutex case or in mutexowned case after the first one */
-		/* move them all to the mutex waitqueue */
-		if ((ckwq->kw_inqueue != 0) && (diffgen > 0)) {
-			/* atleast one more posting needed and there are waiting threads */
-			/* drops the ckwq lock */
-#if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, diffgen, 0);
-#endif /* _PSYNCH_TRACE_ */
-			/* move threads from ckwq to kwq if COND_MTX_WAITQUEUEMOVE, else wakeup */
-			ksyn_move_wqthread(ckwq, kwq, nextgen, ngen, diffgen, nomutex);
-		} else
-			ksyn_wqunlock(ckwq);
-	}  else {
-		/* no need for prepost as it is covered before */
-		ksyn_wqunlock(ckwq);
-	}
+	/* update L, U and S... */
+	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
 
-	if (error == 0) {
-		*retval = updatebits;
-	}
+	/* broadcast wakeups/prepost handling */
+	ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
 
-#if COND_MTX_WAITQUEUEMOVE
-out:
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	ksyn_wqrelease(ckwq, kwq);
+	/* set C or P bits and free if needed */
+	ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
+	ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
+	ksyn_wqunlock(ckwq);
+
+	*retval = updatebits;
+
+	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	
 	return(error);
 }
 
+ksyn_waitq_element_t
+ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, __unused ksyn_queue_t kq, thread_t th, uint32_t upto)
+{
+	uthread_t uth = get_bsdthread_info(th);
+	ksyn_waitq_element_t kwe = &uth->uu_kwe;
+		
+	if (kwe->kwe_kwqqueue != ckwq ||
+	    is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto)) {
+		/* the thread is not waiting in the cv (or wasn't when the wakeup happened) */
+		return NULL;
+	}
+	return kwe;
+}
+
 /*
  *  psynch_cvsignal: This system call is used for signalling the  blocked waiters of  psynch cvars.
  */
 int
-psynch_cvsignal(__unused proc_t p, struct psynch_cvsignal_args * uap, int * retval)
+psynch_cvsignal(__unused proc_t p, struct psynch_cvsignal_args * uap, uint32_t * retval)
 {
 	user_addr_t cond  = uap->cv;
-	uint32_t cgen = uap->cvgen;
+	uint64_t cvlsgen = uap->cvlsgen;
+	uint32_t cgen, csgen, signalseq, uptoseq;
 	uint32_t cugen = uap->cvugen;
-	uint32_t mgen = uap->mgen;
 	int threadport = uap->thread_port;
 	int flags = uap->flags;
-	ksyn_wait_queue_t kwq, ckwq;
-	int error=0, kret;
-	uthread_t uth;
-#if USE_WAITQUEUE
-	thread_t th = THREAD_NULL, mth;
-#else /* USE_WAITQUEUE */
+	ksyn_wait_queue_t ckwq = NULL;
+	ksyn_waitq_element_t kwe, nkwe = NULL;
+	ksyn_queue_t kq;
+	int error=0;
 	thread_t th = THREAD_NULL;
-#endif /* USE_WAITQUEUE */
-#if COND_MTX_WAITQUEUEMOVE
-	user_addr_t mutex = uap->mutex;
-	uint32_t ugen = uap->ugen;
-	int mutexowned = flags & _PTHREAD_MTX_OPT_HOLDLOCK;
-	int nomutex = flags & _PTHREAD_MTX_OPT_NOHOLDLOCK;
-#else /* COND_MTX_WAITQUEUEMOVE */
-	int nomutex =  _PTHREAD_MTX_OPT_NOHOLDLOCK;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	uint32_t retbits, ngen, lockseq;
+	uint32_t updatebits = 0;
+	kern_return_t kret;
+	struct ksyn_queue  kfreeq;
 
 
-	if (nomutex != 0)
-		retbits = 0;
-	else
-		retbits = 1;	
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_START, (uint32_t)cond, (uint32_t) 0, cgen, mgen, 0);
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, (uint32_t)cugen , flags, mgen, 0);
-#endif /* _PSYNCH_TRACE_ */
+	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
+	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
 
-	error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, KSYN_WQTYPE_CVAR, &ckwq);
-	if (error != 0)  {
-		*retval = retbits;	
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, threadport, 0);
 #endif /* _PSYNCH_TRACE_ */
-		return(error);
-	}
-	
-	
-	if ((flags & _PTHREAD_MTX_OPT_LASTDROP) == _PTHREAD_MTX_OPT_LASTDROP) {
-
-		ksyn_wqlock(ckwq);
-		lockseq = cgen & PTHRW_COUNT_MASK;
-		/* do  we need to check for lockseq as this is from last waiter, may be race ? */
-		if ((ckwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, ckwq->kw_pre_lockseq) != 0)) {
-			ckwq->kw_pre_rwwc--;
-			if (ckwq->kw_pre_rwwc == 0)
-				CLEAR_PREPOST_BITS(ckwq);
-		}
-		ksyn_wqunlock(ckwq);
-		/* no mutex or thread is associated with this, just notificaion */
-		th = THREAD_NULL;
-		error = 0;
-		goto out;
-	}
 
-	ngen = mgen + PTHRW_INC;
+	uptoseq = cgen & PTHRW_COUNT_MASK;
+	signalseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
 
-#if COND_MTX_WAITQUEUEMOVE
-	if (nomutex == 0) {
-		/* mutex was not operated on, ignore it */
-		error = ksyn_wqfind(mutex, ngen, ugen, 0, 0, flags, KSYN_WQTYPE_MTX, &kwq); 
-		if (error != 0)  {
-			*retval = retbits;	
-			kwq = NULL;
-			goto out;
-		}
-	} else {
-#endif /* COND_MTX_WAITQUEUEMOVE */
-		kwq = NULL;
-#if COND_MTX_WAITQUEUEMOVE
+	/* validate sane L, U, and S values */
+	if (((threadport == 0) && (is_seqhigher(signalseq, uptoseq))) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) {
+		__FAILEDUSERTEST__("psync_cvsignal; invalid sequence numbers\n");
+		error = EINVAL;
+		goto out;
 	}
-#endif /* COND_MTX_WAITQUEUEMOVE */
 
-	
+	/* If we are looking for a specific thread, grab a reference for it */
 	if (threadport != 0) {
 		th = (thread_t)port_name_to_thread((mach_port_name_t)threadport);
 		if (th == THREAD_NULL) {
-			*retval = retbits;	
 			error = ESRCH;
 			goto out;
 		}
 	}
 
-	ksyn_wqlock(ckwq);
-redrive:
-	if (ckwq->kw_inqueue != 0) {
-		*retval = 0;	
-#if COND_MTX_WAITQUEUEMOVE
-		if ((mutexowned != 0) || (nomutex != 0)) {
+	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
+	if (error != 0)  {
 #if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, ckwq->kw_inqueue, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
+#endif /* _PSYNCH_TRACE_ */		
+		goto out;
+	}
+	
+	ksyn_wqlock(ckwq);
+
+	/* update L, U and S... */
+	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
+
+	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+
+retry:
+	/* Only bother if we aren't already balanced */
+	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
+
+		kwe = (th != NULL) ? ksyn_queue_find_threadseq(ckwq, kq, th, uptoseq) :
+			ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
+		if (kwe != NULL) {
+			switch (kwe->kwe_flags) {
+				
+			case KWE_THREAD_BROADCAST:
+				/* broadcasts swallow our signal */
+				break;
+
+			case KWE_THREAD_PREPOST:
+				/* merge in with existing prepost at our same uptoseq */
+				kwe->kwe_count += 1;
+				break;
+
+			case KWE_THREAD_INWAIT:
+				if (is_seqlower((kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq)) {
+					/*
+					 * A valid thread in our range, but lower than our signal.
+					 * Matching it may leave our match with nobody to wake it if/when
+					 * it arrives (the signal originally meant for this thread might
+					 * not successfully wake it).
+					 *
+					 * Convert to broadcast - may cause some spurious wakeups
+					 * (allowed by spec), but avoids starvation (better choice).
+					 */
+#if _PSYNCH_TRACE_
+					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc1c1c1c1, uptoseq, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-			if (th != THREAD_NULL) {
-				uth = get_bsdthread_info(th);
-				if (nomutex != 0) 
-					ngen |= PTHRW_MTX_NONE;
-				uth->uu_psynchretval = ngen;
-				uth->uu_kwqqueue = NULL;
-				ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth);
-				kret = ksyn_wakeup_thread(ckwq, uth);
-				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
-					panic("psynch_cvsignal: panic waking in cvsignal\n");
-				if (kret == KERN_NOT_WAITING) {
-					if (threadport != 0) {
-						error = 0;
-					} else
-						goto redrive;
-				}
-			} else {
-				uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq);
-				if (nomutex != 0) 
-					ngen |= PTHRW_MTX_NONE;
-				uth->uu_psynchretval = ngen;
-				uth->uu_kwqqueue = NULL;
-				kret = ksyn_wakeup_thread(ckwq, uth);
-				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
-					panic("psynch_cvsignal: panic waking in cvsignal\n");
-				if (kret == KERN_NOT_WAITING) {
-					if (threadport != 0) {
-						error = 0;
-					} else
-						goto redrive;
-				}
-			}
-			ksyn_wqunlock(ckwq);
-		} else {
-#endif /* COND_MTX_WAITQUEUEMOVE */
-			/* need to move a thread to another queue */
+					ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
+				} else {
+					ksyn_queue_removeitem(ckwq, kq, kwe);
+					kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT;
+					kwe->kwe_kwqqueue = NULL;
 #if _PSYNCH_TRACE_
-			KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, ckwq->kw_inqueue, 0);
+					__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
 #endif /* _PSYNCH_TRACE_ */
-			if (th != THREAD_NULL) {
-				uth = get_bsdthread_info(th);
-				/* if given thread not blocked in cvwait , return error */
-				if (uth->uu_kwqqueue != ckwq) {
-					error = EINVAL;
-					ksyn_wqunlock(ckwq);
-					goto out;
+					kret = ksyn_wakeup_thread(ckwq, kwe);
+#if __TESTPANICS__
+					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
+						panic("ksyn_wakeup_thread: panic waking up condition waiter\n");
+#endif /* __TESTPANICS__ */
+					updatebits += PTHRW_INC;
 				}
-				ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth);
-			} else {
-				uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq);
-				if (uth == NULL)
-					panic("cvsign: null uthread after rem");
-			}
-#if COND_MTX_WAITQUEUEMOVE
-			ksyn_wqunlock(ckwq);
-#else /* COND_MTX_WAITQUEUEMOVE */
-			uth->uu_psynchretval = 0;
-			uth->uu_kwqqueue = NULL;
-			kret = ksyn_wakeup_thread(ckwq, uth);
-			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
-				panic("psynch_cvsignal: panic waking in cvsignal\n");
-			if (kret == KERN_NOT_WAITING) {
-				error = 0;
-				if (threadport == 0) 
-					goto redrive;
+
+				ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
+				break;
+				
+			default: 
+				panic("unknown kweflags\n");
+				break;
 			}
-			
+
+		} else if (th != NULL) {
+			/* 
+			 * Could not find the thread, post a broadcast, 
+			 * otherwise the waiter will be stuck. Use to send
+			 * ESRCH here, did lead to rare hangs. 
+			 */
+			ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits);
+			ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
+		} else if (nkwe == NULL) {
 			ksyn_wqunlock(ckwq);
-			error = 0;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-			
-#if COND_MTX_WAITQUEUEMOVE
-			ksyn_wqlock(kwq);
-			ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ngen, uth, SEQFIT);
-#if USE_WAITQUEUE
-                        kret = wait_queue_move_thread(&ckwq->kw_wq, ckwq->kw_addr, th, &kwq->kw_wq, kwq->kw_addr, &mth);
-                        if (kret == KERN_SUCCESS) {
-                                if (mth != THREAD_NULL) {
-                                        uth = (struct uthread *)get_bsdthread_info(mth);
-                                        uth->uu_lockseq = ngen;
-                                        TAILQ_INSERT_TAIL(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_uthlist, uth, uu_mtxlist);
-                                }
-                        }
-#else /* USE_WAITQUEUE */
-			/* no need to move anything, just update the sequence */
-			uth->uu_lockseq = ngen;
-
-#endif /* USE_WAITQUEUE */
-			ksyn_wqunlock(kwq);
-		}
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	}   else {
-		/* prepost */
+			nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
+			ksyn_wqlock(ckwq);
+			goto retry;
+
+		} else {
+			/* no eligible entries - add prepost */
+			bzero(nkwe, sizeof(struct ksyn_waitq_element));
+			nkwe->kwe_kwqqueue = ckwq;
+			nkwe->kwe_flags = KWE_THREAD_PREPOST;
+			nkwe->kwe_lockseq = uptoseq;
+			nkwe->kwe_count = 1;
+			nkwe->kwe_uth = NULL;
+			nkwe->kwe_psynchretval = 0;
+
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 3, ckwq->kw_inqueue, 0);
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfefe, uptoseq, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-		if (threadport != 0) {
-			error = EINVAL;
-			ksyn_wqunlock(ckwq);
-			goto out;
+			
+			(void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uptoseq, NULL, nkwe, SEQFIT);
+			ckwq->kw_fakecount++;
+			nkwe = NULL;
 		}
-		
-		ckwq->kw_pre_rwwc++;
-		ckwq->kw_attq = kwq;
-		ckwq->kw_pre_lockseq = cgen & PTHRW_BIT_MASK;
-		ckwq->kw_pre_cvretval = ngen;
-		*retval = retbits;	
-		ksyn_wqunlock(ckwq);
+
+		/* set C or P bits and free if needed */
+		ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
 	}
-	/* ckwq is unlocked here */
-		
+
+	ksyn_wqunlock(ckwq);
+	if (nkwe != NULL)
+		zfree(kwe_zone, nkwe);
+
+	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
+
 out:
-	ksyn_wqrelease(ckwq, kwq);
-	if (th != THREAD_NULL)
+	if (th != NULL)
 		thread_deallocate(th);
+	if (error == 0)
+		*retval = updatebits;
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, updatebits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	
 	return(error);
@@ -1060,112 +1190,318 @@ int
 psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * retval)
 {
 	user_addr_t cond  = uap->cv;
-	uint32_t cgen = uap->cvgen;
+	uint64_t cvlsgen = uap->cvlsgen;
+	uint32_t cgen, csgen;
 	uint32_t cugen = uap->cvugen;
 	user_addr_t mutex = uap->mutex;
-	uint32_t mgen =0, ugen;
-	int flags = 0;
+	uint64_t mugen = uap->mugen;
+	uint32_t mgen, ugen;
+	int flags = uap->flags;
 	ksyn_wait_queue_t kwq, ckwq;
-	int error=0;
+	int error=0, local_error = 0;
 	uint64_t abstime = 0;
-	uint32_t lockseq, updatebits;
+	uint32_t lockseq, updatebits=0;
 	struct timespec  ts;
 	uthread_t uth;
-
+	ksyn_waitq_element_t kwe, nkwe = NULL;
+	struct ksyn_queue  *kq, kfreeq;
+#if __TESTPANICS__
+	//int timeoutval = 3;		/* 3 secs */
+	//u_int64_t ntime = 0;
+#endif /* __TESTPANICS__ */
+	
 	/* for conformance reasons */
 	__pthread_testcancel(0);
 
+	csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff);
+	cgen = ((uint32_t)(cvlsgen & 0xffffffff));
+	ugen = (uint32_t)((mugen >> 32) & 0xffffffff);
+	mgen = ((uint32_t)(mugen & 0xffffffff));
+
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_START, (uint32_t)cond, (uint32_t) mutex, cgen, mgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, flags, 0);
 #endif /* _PSYNCH_TRACE_ */
-	flags = 0;
-	if ((uap->usec & 0xc0000000) != 0) {
-		if (uap->usec & 0x40000000)
-			flags |= PTHREAD_PROCESS_SHARED;
-		if (uap->usec & 0x80000000)
-			flags |= _PTHREAD_MUTEX_POLICY_FIRSTFIT;
+
+	lockseq = (cgen & PTHRW_COUNT_MASK);
+	/* 
+	 * In cvwait U word can be out of range as cond could be used only for 
+	 * timeouts. However S word needs to be within bounds and validated at
+	 * user level as well.
+	 */
+	if (is_seqhigher_eq((csgen & PTHRW_COUNT_MASK), lockseq) != 0) {
+		__FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
+		return EINVAL;
 	}
-		
-	error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
+
+	ckwq = kwq = NULL;
+	error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 1, 0xdeadbeef, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
-	if (mutex != (user_addr_t)0) {
-		mgen = uap->mgen;
-		ugen = uap->ugen;
+#if __TESTPANICS__
+	//clock_interval_to_deadline(timeoutval, NSEC_PER_SEC, &ntime);
+#endif /* __TESTPANICS__ */
 
-		error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, KSYN_WQTYPE_MTX, &kwq); {
-		if (error != 0) 
+	if (mutex != (user_addr_t)0) {
+		error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq);
+		if (error != 0)  {
+			local_error = error;
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0);
+#endif /* _PSYNCH_TRACE_ */
 			goto out;
 		}
 		
-		psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
+		(void)psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
 		/* drops kwq reference */
+		kwq = NULL;
 	}
 
-	uth = current_uthread();
-	uth->uu_lockseq = cgen;
-	lockseq = (cgen & PTHRW_COUNT_MASK);
-
-	if (uap->sec != 0 || (uap->usec & 0x3fffffff)  != 0) {
+	if (uap->sec != 0 || (uap->nsec & 0x3fffffff)  != 0) {
 		ts.tv_sec = uap->sec;
-		ts.tv_nsec = (uap->usec & 0xc0000000);
-                nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec,  &abstime );
-                clock_absolutetime_interval_to_deadline( abstime, &abstime );
+		ts.tv_nsec = (uap->nsec & 0x3fffffff);
+		nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec,  &abstime );
+		clock_absolutetime_interval_to_deadline( abstime, &abstime );
 	}
+
 	ksyn_wqlock(ckwq);
-	if ((ckwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, ckwq->kw_pre_lockseq) != 0)) {
+
+	/* update L, U and S... */
+	UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR);
+
+	/* Look for the sequence for prepost (or conflicting thread */
+	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+	kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
+
+	if (kwe != NULL) {
+		switch (kwe->kwe_flags) {
+
+		case KWE_THREAD_INWAIT:
+			ksyn_wqunlock(ckwq);
+			__FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
+			local_error = EBUSY;
+			goto out;
+
+		case KWE_THREAD_BROADCAST:
+			break;
+
+		case KWE_THREAD_PREPOST:
+			if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
+				/* we can safely consume a reference, so do so */
+				if (--kwe->kwe_count == 0) {
+					ksyn_queue_removeitem(ckwq, kq, kwe);
+					ckwq->kw_fakecount--;
+					nkwe = kwe;
+				}
+			} else {
+				/*
+				 * consuming a prepost higher than our lock sequence is valid, but
+				 * can leave the higher thread without a match. Convert the entry 
+				 * to a broadcast to compensate for this.
+				 */
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, 0, 0);
+				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc2c2c2c2, kwe->kwe_lockseq, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
+				
+				ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
+#if __TESTPANICS__
+				if (updatebits != 0)
+					panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n",
+					      updatebits);
+#endif /* __TESTPANICS__ */
+			}
+
+			break;
 			
-#if  COND_MTX_WAITQUEUEMOVE
-		updatebits = ckwq->kw_pre_cvretval | PTHRW_MTX_NONE;
-#else /* COND_MTX_WAITQUEUEMOVE */
-		updatebits = 0;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-		ckwq->kw_pre_rwwc--;
-		if (ckwq->kw_pre_rwwc == 0)
-			CLEAR_PREPOST_BITS(ckwq);
-		*retval = updatebits;
+		default:
+			panic("psync_cvwait: unexpected wait queue element type\n");
+		}
+
+#if _PSYNCH_TRACE_
+                                __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfefefefe, kwe->kwe_lockseq, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+
+
+		updatebits = PTHRW_INC;
+		ckwq->kw_sword += PTHRW_INC;
+
+		/* set C or P bits and free if needed */
+		ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1);
+
 		error = 0;
+		local_error = 0;
+
+		*retval = updatebits;
+
 		ksyn_wqunlock(ckwq);
+
+		if (nkwe != NULL)
+			zfree(kwe_zone, nkwe);
+
 		goto out;
+
+	}
 		
-	} else {
+	uth = current_uthread();
+	kwe = &uth->uu_kwe;
+	kwe->kwe_kwqqueue = ckwq;
+	kwe->kwe_flags = KWE_THREAD_INWAIT;
+	kwe->kwe_lockseq = lockseq;
+	kwe->kwe_count = 1;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, cgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, cgen, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-		error = ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], cgen, uth, FIRSTFIT);
-		if (error != 0)
-			panic("psynch_cvwait: failed to enqueue\n");
-		error = ksyn_block_thread_locked(ckwq, abstime, uth);
-		/* drops the lock */
+
+	error = ksyn_queue_insert(ckwq, kq, cgen, uth, kwe, SEQFIT);
+	if (error != 0) {
+		ksyn_wqunlock(ckwq);
+		local_error = error;
+		goto out;
 	}
+
+#if 0 /* __TESTPANICS__ */
+	/* if no timeout  is passed, set 5 secs timeout to catch hangs */
+	error = ksyn_block_thread_locked(ckwq, (abstime == 0) ? ntime : abstime, kwe, 1);
+#else
+	error = ksyn_block_thread_locked(ckwq, abstime, kwe, 1);
+#endif /* __TESTPANICS__ */
+	/* lock dropped */
+
 	
+	local_error = error;
 	if (error != 0) {
 		ksyn_wqlock(ckwq);
+		/* just in case it got woken up as we were granting */
+		*retval = kwe->kwe_psynchretval;
+
+#if __TESTPANICS__
+		if ((kwe->kwe_kwqqueue != NULL) && (kwe->kwe_kwqqueue != ckwq))
+			panic("cvwait waiting on some other kwq\n");
+
+#endif /* __TESTPANICS__ */
+
+
+		if (kwe->kwe_kwqqueue != NULL) {
+			ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
+			kwe->kwe_kwqqueue = NULL;
+		}
+		if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
+		/* the condition var granted.
+			 * reset the error so that the thread returns back.
+			 */
+			local_error = 0;
+			/* no need to set any bits just return as cvsig/broad covers this */
+			ksyn_wqunlock(ckwq);
+			*retval = 0;
+			goto out;
+		}
+
+		ckwq->kw_sword += PTHRW_INC;
+	
+		/* set C and P bits, in the local error as well as updatebits */
+		if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
+			updatebits |= PTH_RWS_CV_CBIT;
+			local_error |= ECVCERORR;
+			if (ckwq->kw_inqueue != 0) {
+				(void)ksyn_queue_move_tofree(ckwq, kq, (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1);
+			}
+			ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
+			ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
+		} else {
+			/* everythig in the queue is a fake entry ? */
+			if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) {
+				updatebits |= PTH_RWS_CV_PBIT; 
+				local_error |= ECVPERORR;
+			}
+		}
+		ksyn_wqunlock(ckwq);
+		
+	} else  {
+		/* PTH_RWL_MTX_WAIT is removed */
+		if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT)  != 0)
+			*retval = PTHRW_INC | PTH_RWS_CV_CBIT;
+		else
+			*retval = 0;
+		local_error = 0;
+	}
+out:
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0);
+#endif /* _PSYNCH_TRACE_ */
+	ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
+	return(local_error);
+}
+
+/*
+ *  psynch_cvclrprepost: This system call clears pending prepost if present.
+ */
+int
+psynch_cvclrprepost(__unused proc_t p, struct psynch_cvclrprepost_args * uap, __unused int * retval)
+{
+	user_addr_t cond  = uap->cv;
+	uint32_t cgen = uap->cvgen;
+	uint32_t cugen = uap->cvugen;
+	uint32_t csgen = uap->cvsgen;
+	uint32_t pseq = uap->preposeq;
+	uint32_t flags = uap->flags;
+	int error;
+	ksyn_wait_queue_t ckwq = NULL;
+	struct ksyn_queue  kfreeq;
+
 #if _PSYNCH_TRACE_
-		KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 3, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_NONE, (uint32_t)cond, 0xcececece, pseq, flags, 0);
 #endif /* _PSYNCH_TRACE_ */
-		if (uth->uu_kwqqueue != NULL) {
-			ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth);
+
+	if ((flags & _PTHREAD_MTX_OPT_MUTEX) == 0) {
+		error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq);
+		if (error != 0)  {
+			*retval = 0;	
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
+#endif /* _PSYNCH_TRACE_ */
+			return(error);
 		}
+
+		ksyn_wqlock(ckwq);
+		(void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (pseq & PTHRW_COUNT_MASK), &kfreeq, 0, 1); 
 		ksyn_wqunlock(ckwq);
-	} else  {
-		*retval = uth->uu_psynchretval;
+		ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP));
+	} else {
+		/* mutex type */
+		error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP), &ckwq);
+		if (error != 0)  {
+			*retval = 0;	
+#if _PSYNCH_TRACE_
+			__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0);
+#endif /* _PSYNCH_TRACE_ */
+			return(error);
+		}
 
+		ksyn_wqlock(ckwq);
+		if (((flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT) != 0) && (ckwq->kw_pre_rwwc != 0)) {
+			if (is_seqlower_eq(ckwq->kw_pre_lockseq, cgen) != 0) {
+				/* clear prepost */
+				ckwq->kw_pre_rwwc = 0;
+				ckwq->kw_pre_lockseq = 0;
+			}
+		}
+		ksyn_wqunlock(ckwq);
+		ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP));
 	}
-out:
+
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0, 0, error, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	ksyn_wqrelease(ckwq, NULL);
-	return(error);
+	return(0);
 }
 
 /* ***************** pthread_rwlock ************************ */
@@ -1182,67 +1518,106 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t
 	//uint64_t tid = uap->tid;
 	int flags = uap->flags;
 	int error = 0, block;
-	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
+	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
 	ksyn_wait_queue_t kwq;
 	uthread_t uth;
+	int isinit = lgen & PTHRW_RWL_INIT;
+	uint32_t returnbits  = 0;
+	ksyn_waitq_element_t kwe;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
 
 	/* preserve the seq number */
-	uth->uu_lockseq = lgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = lgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
+	
 	lockseq = lgen  & PTHRW_COUNT_MASK;
 
+
 	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
 
+	if (isinit != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
+			/* first to notice the reset of the lock, clear preposts */
+                	CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+	}
+
 	/* handle first the missed wakeups */
 	if ((kwq->kw_pre_intrcount != 0) && 
 		((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) && 
 		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
 
 		kwq->kw_pre_intrcount--;
-		uth->uu_psynchretval = kwq->kw_pre_intrretbits;
+		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
 		if (kwq->kw_pre_intrcount==0) 
 			CLEAR_INTR_PREPOST_BITS(kwq);	
 		ksyn_wqunlock(kwq);
 		goto out;
 	}
 
-	/* handle unlock2/downgrade first */
-	if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) {
+	/* handle overlap first as they are not counted against pre_rwwc */
+
+	/* check for overlap and if no pending W bit (indicates writers) */
+	if ((kwq->kw_overlapwatch != 0) && ((rw_wc & PTHRW_RWS_SAVEMASK) == 0) && ((lgen & PTH_RWL_WBIT) == 0)) { 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 10, kwq->kw_nextseqword, kwq->kw_lastseqword, 0);
 #endif /* _PSYNCH_TRACE_ */
-		kwq->kw_pre_limrd--;
-		/* acquired the locks, so return */
-		uth->uu_psynchretval = kwq->kw_pre_limrdbits;
-		if (kwq->kw_pre_limrd == 0)
-			CLEAR_READ_PREPOST_BITS(kwq);
-		ksyn_wqunlock(kwq);
-		goto out;
+		error = kwq_handle_overlap(kwq, lgen, ugen, rw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block);
+#if __TESTPANICS__
+		if (error != 0)
+			panic("rw_rdlock: kwq_handle_overlap failed %d\n",error);
+#endif /* __TESTPANICS__ */
+		if (block == 0) {
+			error = 0;
+			kwe->kwe_psynchretval = updatebits;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 0xff, updatebits, 0xee, 0);
+#endif /* _PSYNCH_TRACE_ */
+			ksyn_wqunlock(kwq);
+			goto out;
+		}
 	}
 
 	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			preseq = kwq->kw_pre_lockseq;
+			prerw_wc = kwq->kw_pre_sseq;
 			CLEAR_PREPOST_BITS(kwq);
-			error = kwq_handle_unlock(kwq, preseq,  &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
+				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+			}
+			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+#if __TESTPANICS__
 			if (error != 0)
-				panic("kwq_handle_unlock failed %d\n",error);
+				panic("rw_rdlock: kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 			if (block == 0) {
 				ksyn_wqunlock(kwq);
 				goto out;
@@ -1251,31 +1626,35 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t
 		}
 	}
 
+
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], lgen, uth, SEQFIT);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], lgen, uth, kwe, SEQFIT);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_rdlock: failed to enqueue\n");
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+#endif /* __TESTPANICS__ */
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 	/* drops the kwq lock */
 	
 out:
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 		ksyn_wqlock(kwq);
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwe);
 		ksyn_wqunlock(kwq);
 	} else {
 		/* update bits */
-		*retval = uth->uu_psynchretval;
+		*retval = kwe->kwe_psynchretval;
+		returnbits = kwe->kwe_psynchretval;
 	}
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	return(error);
 }
@@ -1284,7 +1663,7 @@ out:
  *  psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
  */
 int
-psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap, uint32_t * retval)
+psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_args * uap,  __unused uint32_t * retval)
 {
 	user_addr_t rwlock  = uap->rwlock;
 	uint32_t lgen = uap->lgenval;
@@ -1292,65 +1671,82 @@ psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap,
 	uint32_t rw_wc = uap->rw_wc;
 	//uint64_t tid = uap->tid;
 	int flags = uap->flags;
+	int isinit = lgen & PTHRW_RWL_INIT;
+	uint32_t returnbits=0;
+	ksyn_waitq_element_t kwe;
 
 	ksyn_wait_queue_t kwq;
 	int error=0, block = 0 ;
 	uthread_t uth;
-	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
+	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
-
-	uth->uu_lockseq = lgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = lgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
 	lockseq = (lgen & PTHRW_COUNT_MASK);
-
+	
 	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
 
+	if (isinit != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
+			/* first to notice the reset of the lock, clear preposts */
+                	CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+	}
+
 	/* handle first the missed wakeups */
 	if ((kwq->kw_pre_intrcount != 0) && 
 		(kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD) && 
 		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
 
 		kwq->kw_pre_intrcount--;
-		uth->uu_psynchretval = kwq->kw_pre_intrretbits;
+		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
 		if (kwq->kw_pre_intrcount==0) 
 			CLEAR_INTR_PREPOST_BITS(kwq);	
 		ksyn_wqunlock(kwq);
 		goto out;
 	}
 
-	/* handle unlock2/downgrade first */
-	if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) {
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0);
-#endif /* _PSYNCH_TRACE_ */
-		kwq->kw_pre_limrd--;
-		if (kwq->kw_pre_limrd == 0)
-			CLEAR_READ_PREPOST_BITS(kwq);
-		/* not a read proceed */
-	}
 
 	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			preseq = kwq->kw_pre_lockseq;
+			prerw_wc = kwq->kw_pre_sseq;
 			CLEAR_PREPOST_BITS(kwq);
-			error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_LREADLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
+				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+			}
+			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_LREADLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+#if __TESTPANICS__
 			if (error != 0)
 				panic("kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 			if (block == 0) {
 				ksyn_wqunlock(kwq);
 				goto out;
@@ -1360,32 +1756,35 @@ psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap,
 	}
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], lgen, uth, SEQFIT);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], lgen, uth, kwe, SEQFIT);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_longrdlock: failed to enqueue\n");
+#endif /* __TESTPANICS__ */
 
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 	/* drops the kwq lock */
 out:
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		ksyn_wqlock(kwq);
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwe);
 		ksyn_wqunlock(kwq);
 	} else {
 		/* update bits */
-		*retval = uth->uu_psynchretval;
+		*retval = kwe->kwe_psynchretval;
+		returnbits = kwe->kwe_psynchretval;
 	}
 
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); 
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	return(error);
 }
@@ -1406,97 +1805,122 @@ psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t
 	ksyn_wait_queue_t kwq;
 	int error=0;
 	uthread_t uth;
-	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
+	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
+	int isinit = lgen & PTHRW_RWL_INIT;
+	uint32_t returnbits  = 0;
+	ksyn_waitq_element_t kwe;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
-
-	uth->uu_lockseq = lgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = lgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
 	lockseq = (lgen & PTHRW_COUNT_MASK);
 
 	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
 
+
+	if (isinit != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
+			/* first to notice the reset of the lock, clear preposts */
+                	CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+	}
+
+
 	/* handle first the missed wakeups */
 	if ((kwq->kw_pre_intrcount != 0) && 
 		(kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE) && 
 		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
 
 		kwq->kw_pre_intrcount--;
-		uth->uu_psynchretval = kwq->kw_pre_intrretbits;
+		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
 		if (kwq->kw_pre_intrcount==0) 
 			CLEAR_INTR_PREPOST_BITS(kwq);	
 		ksyn_wqunlock(kwq);
 		goto out;
 	}
 
-	/* handle unlock2/downgrade first */
-	if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) {
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0);
-#endif /* _PSYNCH_TRACE_ */
-		kwq->kw_pre_limrd--;
-		if (kwq->kw_pre_limrd == 0)
-			CLEAR_READ_PREPOST_BITS(kwq);
-		/* not a read proceed */
-	}
 
 	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			preseq = kwq->kw_pre_lockseq;
+			prerw_wc = kwq->kw_pre_sseq;
 			CLEAR_PREPOST_BITS(kwq);
-			error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_WRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
+				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+			}
+			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_WRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+#if __TESTPANICS__
 			if (error != 0)
-				panic("kwq_handle_unlock failed %d\n",error);
+				panic("rw_wrlock: kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 			if (block == 0) {
 				ksyn_wqunlock(kwq);
-				goto out;
+				*retval = updatebits;
+				goto out1;
 			}
 			/* insert to q and proceed as ususal */
 		} 
 	}
 
+	/* No overlap watch needed  go ahead and block */
+
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], lgen, uth, SEQFIT);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], lgen, uth, kwe, SEQFIT);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_wrlock: failed to enqueue\n");
+#endif /* __TESTPANICS__ */
 
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 	/* drops the wq lock */
 
 out:
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 		ksyn_wqlock(kwq);
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
 		ksyn_wqunlock(kwq);
 	} else  {
 		/* update bits */
-		*retval = uth->uu_psynchretval;
+		*retval = kwe->kwe_psynchretval;
+		returnbits = kwe->kwe_psynchretval;
 	}
-
-	ksyn_wqrelease(kwq, NULL); 
+out1:
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); 
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	return(error);
 }
@@ -1505,7 +1929,7 @@ out:
  *  psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
  */
 int
-psynch_rw_yieldwrlock(__unused proc_t p, struct  psynch_rw_yieldwrlock_args * uap, uint32_t * retval)
+psynch_rw_yieldwrlock(__unused proc_t p, __unused struct  psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval)
 {
 	user_addr_t rwlock  = uap->rwlock;
 	uint32_t lgen = uap->lgenval;
@@ -1516,65 +1940,82 @@ psynch_rw_yieldwrlock(__unused proc_t p, struct  psynch_rw_yieldwrlock_args * ua
 	int block;
 	ksyn_wait_queue_t kwq;
 	int error=0;
+	int isinit = lgen & PTHRW_RWL_INIT;
 	uthread_t uth;
+	uint32_t returnbits=0;
+	ksyn_waitq_element_t kwe;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
-	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
+	uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0;
 
 	uth = current_uthread();
-
-	uth->uu_lockseq = lgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = lgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
 	lockseq = (lgen & PTHRW_COUNT_MASK);
 
 	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
 
+	if (isinit != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
+			/* first to notice the reset of the lock, clear preposts */
+                	CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+	}
+
 	/* handle first the missed wakeups */
 	if ((kwq->kw_pre_intrcount != 0) && 
 		(kwq->kw_pre_intrtype == PTH_RW_TYPE_YWRITE) && 
 		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
 
 		kwq->kw_pre_intrcount--;
-		uth->uu_psynchretval = kwq->kw_pre_intrretbits;
+		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
 		if (kwq->kw_pre_intrcount==0) 
 			CLEAR_INTR_PREPOST_BITS(kwq);	
 		ksyn_wqunlock(kwq);
 		goto out;
 	}
 
-	/* handle unlock2/downgrade first */
-	if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) {
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0);
-#endif /* _PSYNCH_TRACE_ */
-		kwq->kw_pre_limrd--;
-		if (kwq->kw_pre_limrd == 0)
-			CLEAR_READ_PREPOST_BITS(kwq);
-		/* not a read proceed */
-	}
-
 	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			preseq = kwq->kw_pre_lockseq;
+			prerw_wc = kwq->kw_pre_sseq;
 			CLEAR_PREPOST_BITS(kwq);
-			error = kwq_handle_unlock(kwq, preseq,  &updatebits, (KW_UNLOCK_PREPOST_YWRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
+				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+			}
+			error = kwq_handle_unlock(kwq, preseq,  prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_YWRLOCK|KW_UNLOCK_PREPOST), &block, lgen);
+#if __TESTPANICS__
 			if (error != 0)
 				panic("kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 			if (block == 0) {
 				ksyn_wqunlock(kwq);
+				*retval = updatebits;
 				goto out;
 			}
 			/* insert to q and proceed as ususal */
@@ -1582,37 +2023,40 @@ psynch_rw_yieldwrlock(__unused proc_t p, struct  psynch_rw_yieldwrlock_args * ua
 	}
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], lgen, uth, SEQFIT);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], lgen, uth, kwe, SEQFIT);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_yieldwrlock: failed to enqueue\n");
+#endif /* __TESTPANICS__ */
 
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 
 out:
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 		ksyn_wqlock(kwq);
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwe);
 		ksyn_wqunlock(kwq);
 	} else  {
 		/* update bits */
-		*retval = uth->uu_psynchretval;
+		*retval = kwe->kwe_psynchretval;
+		returnbits = kwe->kwe_psynchretval;
 	}
 
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); 
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 	return(error);
 }
 
-
+#if NOTYET
 /*
  *  psynch_rw_downgrade: This system call is used for wakeup blocked readers who are eligible to run due to downgrade.
  */
@@ -1626,72 +2070,93 @@ psynch_rw_downgrade(__unused proc_t p, struct psynch_rw_downgrade_args * uap, __
 	//uint64_t tid = uap->tid;
 	int flags = uap->flags;
 	uint32_t count = 0;
-
+	int isinit = lgen & PTHRW_RWL_INIT;
 	ksyn_wait_queue_t kwq;
 	int error=0;
 	uthread_t uth;
 	uint32_t curgen = 0;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
 
 	curgen = (lgen & PTHRW_COUNT_MASK);
 
-	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
+	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
 	
-	if (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0) {
+	if ((lgen & PTHRW_RWL_INIT) != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){
+			CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+		isinit = 1;
+	} 
+
+	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
+	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) {
 		/* spurious  updatebits?? */
+		error = 0;
 		goto out;
 	}
-	/* fast path for default case */
-	if((rw_wc == kwq->kw_inqueue) && (kwq->kw_highseq == curgen))
-		goto dounlock;
 
-	/* have we seen all the waiters? */
-	if(rw_wc > kwq->kw_inqueue) {
-		goto prepost;
+
+
+	/* If L-U != num of waiters, then it needs to be preposted or spr */
+	diff = find_diff(lgen, ugen);
+	/* take count of  the downgrade thread itself */
+	diff--;
+
+
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0);
+#endif /* _PSYNCH_TRACE_ */
+	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
+		if (count < (uint32_t)diff)
+			goto prepost;
 	}
-		
-	if (is_seqhigher(curgen, kwq->kw_highseq) != 0) {
-		goto prepost;
-	} else {
-		if (find_seq_till(kwq, curgen, rw_wc, &count) == 0) {
-			if (count < rw_wc) {
-				kwq->kw_pre_limrd = rw_wc - count;
-				kwq->kw_pre_limrdseq = lgen;
-				kwq->kw_pre_limrdbits = lgen;
-				/* found none ? */
-				if (count == 0) 
-					goto out;
-			}
-		} 
+
+	/* no prepost and all threads are in place, reset the bit */
+	if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
+		kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
 	}
+
+	/* can handle unlock now */
 		
+	CLEAR_PREPOST_BITS(kwq);
+
 dounlock:		
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 	error = kwq_handle_downgrade(kwq, lgen, 0, 0, NULL);
 
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_downgrade: failed to wakeup\n");
+#endif /* __TESTPANICS__ */
 
 out:
 	ksyn_wqunlock(kwq);
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); 
 
 	return(error);
 		
@@ -1699,7 +2164,7 @@ prepost:
 	kwq->kw_pre_rwwc = (rw_wc - count);
 	kwq->kw_pre_lockseq = lgen;
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 	error = 0;
 	goto out;
@@ -1723,32 +2188,49 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32
 	int error=0;
 	uthread_t uth;
 	uint32_t lockseq = 0, updatebits = 0, preseq = 0;
+	int isinit = lgen & PTHRW_RWL_INIT;
+	ksyn_waitq_element_t kwe;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
-
-	uth->uu_lockseq = lgen;
+	kwe = &uth->uu_kwe;
+	kwe->kwe_lockseq = lgen;
+	kwe->kwe_uth = uth;
+	kwe->kwe_psynchretval = 0;
+	kwe->kwe_kwqqueue = NULL;
 	lockseq = (lgen & PTHRW_COUNT_MASK);
-
-	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
+	
+	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
 	
 	ksyn_wqlock(kwq);
-	
+
+	if (isinit != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
+			/* first to notice the reset of the lock, clear preposts */
+                	CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+	}
+
 	/* handle first the missed wakeups */
 	if ((kwq->kw_pre_intrcount != 0) && 
-		(kwq->kw_pre_intrtype == PTH_RW_TYPE_UPGRADE) && 
+		((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) && 
 		(is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) {
 
 		kwq->kw_pre_intrcount--;
-		uth->uu_psynchretval = kwq->kw_pre_intrretbits;
+		kwe->kwe_psynchretval = kwq->kw_pre_intrretbits;
 		if (kwq->kw_pre_intrcount==0) 
 			CLEAR_INTR_PREPOST_BITS(kwq);	
 		ksyn_wqunlock(kwq);
@@ -1757,15 +2239,24 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32
 
 	if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
 		kwq->kw_pre_rwwc--;
 		if (kwq->kw_pre_rwwc == 0) {
 			preseq = kwq->kw_pre_lockseq;
+			prerw_wc = kwq->kw_pre_sseq;
 			CLEAR_PREPOST_BITS(kwq);
-			error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_UPGRADE|KW_UNLOCK_PREPOST), &block, lgen);
+			if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
+				kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+			}
+			error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_UPGRADE|KW_UNLOCK_PREPOST), &block, lgen);
+#if __TESTPANICS__
 			if (error != 0)
-				panic("kwq_handle_unlock failed %d\n",error);
+				panic("rw_rdlock: kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 			if (block == 0) {
 				ksyn_wqunlock(kwq);
 				goto out;
@@ -1776,37 +2267,52 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32
 	
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], lgen, uth, SEQFIT);
+	error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], lgen, uth, kwe, SEQFIT);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_upgrade: failed to enqueue\n");
+#endif /* __TESTPANICS__ */
 
 
-	error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth);
+	error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0);
 	/* drops the lock */
 	
 out:
 	if (error != 0) {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 		ksyn_wqlock(kwq);
-		if (uth->uu_kwqqueue != NULL)
-			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], uth);
+		if (kwe->kwe_kwqqueue != NULL)
+			ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwe);
 		ksyn_wqunlock(kwq);
 	} else {
 		/* update bits */
-		*retval = uth->uu_psynchretval;
+		*retval = kwe->kwe_psynchretval;
 	}
 
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
+
 	return(error);
 }
 
+#else /* NOTYET */
+int
+psynch_rw_upgrade(__unused proc_t p, __unused struct psynch_rw_upgrade_args * uap, __unused uint32_t * retval)
+{
+	return(0);
+}
+int
+psynch_rw_downgrade(__unused proc_t p, __unused struct psynch_rw_downgrade_args * uap, __unused int * retval)
+{
+	return(0);
+}
+#endif /* NOTYET */
 /*
  *  psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
  *			reader/writer variety lock.
@@ -1825,19 +2331,20 @@ psynch_rw_unlock(__unused proc_t p, struct psynch_rw_unlock_args  * uap, uint32_
 	uthread_t uth;
 	ksyn_wait_queue_t kwq;
 	uint32_t updatebits = 0;
-	int error=0;
+	int error=0, diff;
 	uint32_t count = 0;
+	int isinit = 0;
 	
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	uth = current_uthread();
 
-	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_RWLOCK), &kwq);
+	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
 	if (error != 0)  {
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 		return(error);
 	}
@@ -1846,59 +2353,87 @@ psynch_rw_unlock(__unused proc_t p, struct psynch_rw_unlock_args  * uap, uint32_
 
 	ksyn_wqlock(kwq);
 
-	if ((lgen & PTHRW_RW_INIT) != 0) {
-		kwq->kw_lastunlockseq = 0;
-		lgen &= ~PTHRW_RW_INIT;
-	} else if (is_seqlower(ugen, kwq->kw_lastunlockseq) != 0) {
-		/* spurious  updatebits  set */
-		updatebits = PTHRW_RW_SPURIOUS;
+	if ((lgen & PTHRW_RWL_INIT) != 0) {
+		lgen &= ~PTHRW_RWL_INIT;
+		if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){
+			CLEAR_REINIT_BITS(kwq);
+			kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0);
+#endif /* _PSYNCH_TRACE_ */
+		}
+		isinit = 1;
+	} 
+
+	/* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
+	if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) {
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, (uint32_t)0xeeeeeeee, rw_wc, kwq->kw_lastunlockseq, 0);
+#endif /* _PSYNCH_TRACE_ */
+		error = 0;
 		goto out;
 	}
 
+	/* If L-U != num of waiters, then it needs to be preposted or spr */
+	diff = find_diff(lgen, ugen);
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0);
 #endif /* _PSYNCH_TRACE_ */
-	if (find_seq_till(kwq, curgen, rw_wc, &count) == 0) {
-		if (count < rw_wc)
+	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
+		if ((count == 0) || (count < (uint32_t)diff))
 			goto prepost;
 	}
 
+	/* no prepost and all threads are in place, reset the bit */
+	if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
+		kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+	}
 
 	/* can handle unlock now */
 		
 	CLEAR_PREPOST_BITS(kwq);
-	kwq->kw_lastunlockseq = ugen;
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, 0, 0, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, 0, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
-	error = kwq_handle_unlock(kwq, lgen, &updatebits, 0, NULL, 0);
+	error = kwq_handle_unlock(kwq, lgen, rw_wc,  &updatebits, 0, NULL, 0);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
+#endif /* __TESTPANICS__ */
 out:
 	if (error == 0) {
 		/* update bits?? */
 		*retval = updatebits;
 	}
+
+
 	ksyn_wqunlock(kwq);
 
-	ksyn_wqrelease(kwq, NULL); 
+	ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, updatebits, error, 0);
 #endif /* _PSYNCH_TRACE_ */
 
 	return(error);
 		
 prepost:
-	kwq->kw_pre_rwwc = (rw_wc - count);
-	kwq->kw_pre_lockseq = curgen;
-	kwq->kw_lastunlockseq = ugen;
+	/* update if the new seq is higher than prev prepost, or first set */
+	if ((is_rws_setseq(kwq->kw_pre_sseq) != 0) || 
+			(is_seqhigher_eq((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_pre_sseq & PTHRW_COUNT_MASK)) != 0)) {
+		kwq->kw_pre_rwwc = (diff - count);
+		kwq->kw_pre_lockseq = curgen;
+		kwq->kw_pre_sseq = rw_wc;
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, rw_wc, count, 0);
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, rw_wc, count, 0);
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0);
 #endif /* _PSYNCH_TRACE_ */
-	updatebits = (lgen | PTHRW_RW_SPURIOUS);/* let this not do unlock handling */
+		updatebits = lgen;	/* let this not do unlock handling */
+	}
 	error = 0;
 	goto out;
 }
@@ -1909,86 +2444,9 @@ prepost:
  *			  to new reader arrival races
  */
 int
-psynch_rw_unlock2(__unused proc_t p, struct psynch_rw_unlock2_args  * uap, uint32_t * retval)
+psynch_rw_unlock2(__unused proc_t p, __unused struct psynch_rw_unlock2_args  * uap, __unused uint32_t * retval)
 {
-	user_addr_t rwlock  = uap->rwlock;
-	uint32_t lgen = uap->lgenval;
-	uint32_t ugen = uap->ugenval;
-	uint32_t rw_wc = uap->rw_wc;
-	//uint64_t tid = uap->tid;
-	int flags = uap->flags;
-	uthread_t uth;
-	uint32_t num_lreader, limitread, curgen, updatebits;
-	ksyn_wait_queue_t kwq;
-	int error=0, longreadset = 0;
-	int diff;
-	uint32_t count=0;
-
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0);
-#endif /* _PSYNCH_TRACE_ */
-	uth = current_uthread();
-
-	error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_RWLOCK), &kwq);
-	if (error != 0)  {
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0);
-#endif /* _PSYNCH_TRACE_ */
-		return(error);
-	}
-	
-	ksyn_wqlock(kwq);
-
-	curgen = (lgen & PTHRW_COUNT_MASK);	
-	diff = find_diff(lgen, ugen);
-
-	limitread = lgen & PTHRW_COUNT_MASK;
-
-	if (find_seq_till(kwq, curgen, diff, &count) == 0) {
-		kwq->kw_pre_limrd = diff - count;
-		kwq->kw_pre_limrdseq = lgen;
-		kwq->kw_pre_limrdbits = lgen;
-		/* found none ? */
-		if (count == 0) 
-			goto out;
-	} 
-
-	if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) {
-		num_lreader = kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum;
-		if (is_seqlower_eq(num_lreader, limitread) != 0)
-			longreadset = 1;
-	}
-	
-	updatebits = lgen;
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0);
-#endif /* _PSYNCH_TRACE_ */
-	count = ksyn_wakeupreaders(kwq, limitread, longreadset, 0, updatebits, NULL);
-	
-	if (count != 0) {
-		if (kwq->kw_pre_limrd !=  0) {
-			kwq->kw_pre_limrd += count;
-		} else {
-			kwq->kw_pre_limrd = count;
-			kwq->kw_pre_limrdseq = lgen;
-			kwq->kw_pre_limrdbits = lgen;
-		}
-	}
-	error = 0;
-
-out:
-	if (error == 0) {
-		/* update bits?? */
-		*retval = uth->uu_psynchretval;
-	}
-	ksyn_wqunlock(kwq);
-
-	ksyn_wqrelease(kwq, NULL); 
-#if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0);
-#endif /* _PSYNCH_TRACE_ */
-
-	return(error);
+	return(ENOTSUP);
 }
 
 
@@ -1996,7 +2454,31 @@ out:
 void
 pth_global_hashinit()
 {
+	int arg;
+
 	pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
+
+	/*
+	 * pthtest={0,1,2,3} (override default aborting behavior on pthread sync failures)
+	 * 0 - just return errors
+	 * 1 - print and return errors
+	 * 2 - abort user, print and return errors
+	 * 3 - panic
+	 */
+	if (!PE_parse_boot_argn("pthtest", &arg, sizeof(arg)))
+		arg = __TESTMODE__;
+
+	if (arg == 3) {
+		__test_panics__ = 1;
+		printf("Pthread support PANICS when sync kernel primitives misused\n");
+	} else if (arg == 2) {
+		__test_aborts__ = 1;
+		__test_prints__ = 1;
+		printf("Pthread support ABORTS when sync kernel primitives misused\n");
+	} else if (arg == 1) {
+		__test_prints__ = 1;
+		printf("Pthread support LOGS when sync kernel primitives misused\n");
+	}
 }
 
 void
@@ -2046,6 +2528,10 @@ pth_proc_hashdelete(proc_t p)
 	int hashsize = pthhash + 1;
 	int i;
 
+#if _PSYNCH_TRACE_
+	if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc))
+		pthread_debug_proc = PROC_NULL;
+#endif /* _PSYNCH_TRACE_ */
 	hashptr = p->p_pthhash;
 	if (hashptr == NULL)
 		return;
@@ -2060,16 +2546,39 @@ pth_proc_hashdelete(proc_t p)
 			if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
 				kwq->kw_pflags &= ~KSYN_WQ_FLIST;
 				LIST_REMOVE(kwq, kw_list);
+				num_infreekwq--;
 			}
+			num_freekwq++;
 			pthread_list_unlock();
+			/* release fake entries if present for cvars */
+			if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
+				ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
 			lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-			kfree(kwq, sizeof(struct ksyn_wait_queue));
+			zfree(kwq_zone, kwq);
 		}
 	}
 	FREE(p->p_pthhash, M_PROC);
 	p->p_pthhash = NULL;
 }
 
+/* no lock held for this as the waitqueue is getting freed */
+void
+ksyn_freeallkwe(ksyn_queue_t kq)
+{
+	ksyn_waitq_element_t kwe;
+
+	/* free all the fake entries, dequeue rest */
+	kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+	while (kwe != NULL) {
+		if (kwe->kwe_flags != KWE_THREAD_INWAIT) {
+			TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
+			zfree(kwe_zone, kwe);
+		} else {
+			TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
+		}
+		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+	}
+}
 
 /* find kernel waitqueue, if not present create one. Grants a reference  */
 int
@@ -2081,7 +2590,8 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 	uint64_t object = 0, offset = 0;
 	uint64_t hashhint;
 	proc_t p  = current_proc();
-	int retry = mgen & PTHRW_RETRYBIT;
+	int retry = mgen & PTH_RWL_RETRYBIT;
+	struct ksyn_queue kfreeq;
 	int i;
 
 	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) 
@@ -2093,18 +2603,60 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 		hashptr = p->p_pthhash;
 	}
 
+	ksyn_queue_init(&kfreeq);
+
+	if (((wqtype & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX) && (retry != 0))
+		mgen &= ~PTH_RWL_RETRYBIT;
+
+loop:
 	//pthread_list_lock_spin();
 	pthread_list_lock();
 
 	kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset);
 
 	if (kwq != NULL) {
-		kwq->kw_iocount++;
 		if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
 			LIST_REMOVE(kwq, kw_list);
 			kwq->kw_pflags &= ~KSYN_WQ_FLIST;
+			num_infreekwq--;
+			num_reusekwq++;
+		}
+		if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) {
+			if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) {
+				if (kwq->kw_iocount == 0) {
+					kwq->kw_addr = mutex;
+					kwq->kw_flags = flags;
+					kwq->kw_object = object;
+					kwq->kw_offset = offset;
+					kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
+					CLEAR_REINIT_BITS(kwq);
+					CLEAR_INTR_PREPOST_BITS(kwq);
+					CLEAR_PREPOST_BITS(kwq);
+					kwq->kw_lword = mgen;
+					kwq->kw_uword = ugen;
+					kwq->kw_sword = rw_wc;
+					kwq->kw_owner = tid;
+				} else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) {
+					/* if all users are unlockers then wait for it to finish */
+					kwq->kw_pflags |= KSYN_WQ_WAITING;
+					/* wait for the wq to be free */
+					(void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
+					/* does not have list lock */
+					goto loop;
+				} else {
+					__FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type\n");
+					pthread_list_unlock();
+					return EBUSY;
+				}
+			} else {
+				__FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type(1)\n");
+				pthread_list_unlock();
+				return EBUSY;
+			}
 		}
-		UPDATE_KWQ(kwq, mgen, ugen, rw_wc, tid, wqtype, retry);
+		kwq->kw_iocount++;
+		if (wqtype == KSYN_WQTYPE_MUTEXDROP)
+			kwq->kw_dropcount++;
 		if (kwqp != NULL)
 			*kwqp = kwq;
 		pthread_list_unlock();
@@ -2113,23 +2665,34 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 
 	pthread_list_unlock();
 
-	nkwq = kalloc(sizeof(struct ksyn_wait_queue));
+	nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
 	bzero(nkwq, sizeof(struct ksyn_wait_queue));
 	nkwq->kw_addr = mutex;
 	nkwq->kw_flags = flags;
 	nkwq->kw_iocount = 1;
+	if (wqtype == KSYN_WQTYPE_MUTEXDROP)
+			nkwq->kw_dropcount++;
 	nkwq->kw_object = object;
 	nkwq->kw_offset = offset;
 	nkwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
-	TAILQ_INIT(&nkwq->kw_uthlist);
+	nkwq->kw_lastseqword = PTHRW_RWS_INIT;
+	if (nkwq->kw_type == KSYN_WQTYPE_RWLOCK)
+		nkwq->kw_nextseqword = PTHRW_RWS_INIT;
+		
+	nkwq->kw_pre_sseq = PTHRW_RWS_INIT;
+
+	CLEAR_PREPOST_BITS(nkwq);
+	CLEAR_INTR_PREPOST_BITS(nkwq);
+	CLEAR_REINIT_BITS(nkwq);
+	nkwq->kw_lword = mgen;
+	nkwq->kw_uword = ugen;
+	nkwq->kw_sword = rw_wc;
+	nkwq->kw_owner = tid;
+
 
 	for (i=0; i< KSYN_QUEUE_MAX; i++) 
 		ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
 		
-	UPDATE_KWQ(nkwq, mgen, ugen, rw_wc, tid, wqtype, retry);
-#if USE_WAITQUEUE
-	wait_queue_init(&nkwq->kw_wq, SYNC_POLICY_FIFO);
-#endif /* USE_WAITQUEUE */
 	lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
 
 	//pthread_list_lock_spin();
@@ -2138,21 +2701,67 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 	kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset);
 
 	if (kwq != NULL) {
-		kwq->kw_iocount++;
 		if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
 			LIST_REMOVE(kwq, kw_list);
 			kwq->kw_pflags &= ~KSYN_WQ_FLIST;
+			num_infreekwq--;
+			num_reusekwq++;
+		}
+		if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) {
+			if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) {
+				if (kwq->kw_iocount == 0) {
+					kwq->kw_addr = mutex;
+					kwq->kw_flags = flags;
+					kwq->kw_object = object;
+					kwq->kw_offset = offset;
+					kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
+					CLEAR_REINIT_BITS(kwq);
+					CLEAR_INTR_PREPOST_BITS(kwq);
+					CLEAR_PREPOST_BITS(kwq);
+					kwq->kw_lword = mgen;
+					kwq->kw_uword = ugen;
+					kwq->kw_sword = rw_wc;
+					kwq->kw_owner = tid;
+				} else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) {
+					kwq->kw_pflags |= KSYN_WQ_WAITING;
+					/* wait for the wq to be free */
+					(void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
+
+					lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
+					zfree(kwq_zone, nkwq);
+					/* will acquire lock again */
+
+					goto loop;
+				} else {
+					__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(2)\n");
+					pthread_list_unlock();
+					lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
+					zfree(kwq_zone, nkwq);
+					return EBUSY;
+				}
+			} else {
+				__FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(3)\n");
+				pthread_list_unlock();
+				lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
+				zfree(kwq_zone, nkwq);
+				return EBUSY;
+			}
 		}
-		UPDATE_KWQ(kwq, mgen, ugen, rw_wc, tid, wqtype, retry);
+		kwq->kw_iocount++;
+		if (wqtype == KSYN_WQTYPE_MUTEXDROP)
+			kwq->kw_dropcount++;
 		if (kwqp != NULL)
 			*kwqp = kwq;
 		pthread_list_unlock();
 		lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
-		kfree(nkwq, sizeof(struct ksyn_wait_queue));
+		zfree(kwq_zone, nkwq);
 		return (0);
 	}
 	kwq = nkwq;
 
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword, 0xffff, 0);
+#endif /* _PSYNCH_TRACE_ */
 	if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) 
 	{
 		kwq->kw_pflags |= KSYN_WQ_SHARED;
@@ -2161,6 +2770,7 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 		LIST_INSERT_HEAD(&hashptr[mutex & pthhash], kwq, kw_hash);
 
 	kwq->kw_pflags |= KSYN_WQ_INHASH;
+	num_total_kwq++;
 
 	pthread_list_unlock();
 
@@ -2171,34 +2781,81 @@ ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uin
 
 /* Reference from find is dropped here. Starts the free process if needed  */
 void
-ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq)
+ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype)
 {
 	uint64_t deadline;
 	struct timeval t;
 	int sched = 0;
-
+	ksyn_wait_queue_t free_elem = NULL;
+	ksyn_wait_queue_t free_elem1 = NULL;
 	
 	//pthread_list_lock_spin();
 	pthread_list_lock();
 	kwq->kw_iocount--;
+	if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
+		kwq->kw_dropcount--;
+	}
 	if (kwq->kw_iocount == 0) {
-		if ((kwq->kw_pre_rwwc == 0) && (kwq->kw_inqueue == 0)) {
-			microuptime(&kwq->kw_ts);
-			LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
-			kwq->kw_pflags |= KSYN_WQ_FLIST;
+		if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
+			/* some one is waiting for the waitqueue, wake them up */
+			kwq->kw_pflags &=  ~KSYN_WQ_WAITING;
+			wakeup(&kwq->kw_pflags);
 		}
-		sched = 1;
+
+		if ((kwq->kw_pre_rwwc == 0) && (kwq->kw_inqueue == 0) && (kwq->kw_pre_intrcount == 0)) {
+			if (qfreenow == 0) {
+				microuptime(&kwq->kw_ts);
+				LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
+				kwq->kw_pflags |= KSYN_WQ_FLIST;
+				num_infreekwq++;
+				free_elem = NULL;
+			} else {
+				/* remove from the only list it is in ie hash */
+				kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
+				LIST_REMOVE(kwq, kw_hash);
+				lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
+				num_total_kwq--;
+				num_freekwq++;
+				free_elem = kwq;
+			}
+		} else 
+			free_elem = NULL;
+		if (qfreenow == 0)
+			sched = 1;
 	}
-	if (ckwq != NULL){
+
+	if (ckwq != NULL) {
 		ckwq->kw_iocount--;
+		if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
+			kwq->kw_dropcount--;
+		}
 		if ( ckwq->kw_iocount == 0) {
-			if ((ckwq->kw_pre_rwwc == 0) && (ckwq->kw_inqueue == 0)) {
-				/* mark for free if we can */
-				microuptime(&ckwq->kw_ts);
-				LIST_INSERT_HEAD(&pth_free_list, ckwq, kw_list);
-				ckwq->kw_pflags |= KSYN_WQ_FLIST;
+			if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
+				/* some one is waiting for the waitqueue, wake them up */
+				kwq->kw_pflags &=  ~KSYN_WQ_WAITING;
+				wakeup(&kwq->kw_pflags);
 			}
-			sched = 1;
+			if ((ckwq->kw_pre_rwwc == 0) && (ckwq->kw_inqueue == 0) && (ckwq->kw_pre_intrcount == 0)) {
+				if (qfreenow == 0) {
+					/* mark for free if we can */
+					microuptime(&ckwq->kw_ts);
+					LIST_INSERT_HEAD(&pth_free_list, ckwq, kw_list);
+					ckwq->kw_pflags |= KSYN_WQ_FLIST;
+					num_infreekwq++;
+					free_elem1 = NULL;
+				} else {
+					/* remove from the only list it is in ie hash */
+					ckwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
+					LIST_REMOVE(ckwq, kw_hash);
+					lck_mtx_destroy(&ckwq->kw_lock, pthread_lck_grp);
+					num_total_kwq--;
+					num_freekwq++;
+					free_elem1 = ckwq;
+				}
+			} else
+				free_elem1 = NULL;
+			if (qfreenow == 0)
+				sched = 1;
 		}
 	}
 
@@ -2211,6 +2868,10 @@ ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq)
 		thread_call_enter_delayed(psynch_thcall, deadline);
 	}
 	pthread_list_unlock();
+	if (free_elem != NULL)
+		zfree(kwq_zone, free_elem);
+	if (free_elem1 != NULL)
+		zfree(kwq_zone, free_elem1);
 }
 
 /* responsible to free the waitqueues */
@@ -2226,16 +2887,13 @@ psynch_wq_cleanup(__unused void *  param, __unused void * param1)
 	//pthread_list_lock_spin();
 	pthread_list_lock();
 
+	num_addedfreekwq = num_infreekwq - num_lastfreekwqcount;
+	num_lastfreekwqcount = num_infreekwq;
 	microuptime(&t);
 
 	LIST_FOREACH(kwq, &pth_free_list, kw_list) {
-			
-		if (count > 100) {
-			delayed = 1;
-			break;
-		}
-		if ((kwq->kw_iocount != 0) && (kwq->kw_inqueue != 0)) {
-			/* still in freelist ??? */
+		if ((kwq->kw_iocount != 0) || (kwq->kw_pre_rwwc != 0) || (kwq->kw_inqueue != 0) || (kwq->kw_pre_intrcount != 0)) {
+			/* still in use */
 			continue;
 		}
 		diff = t.tv_sec - kwq->kw_ts.tv_sec;
@@ -2244,10 +2902,13 @@ psynch_wq_cleanup(__unused void *  param, __unused void * param1)
 		if (diff >= KSYN_CLEANUP_DEADLINE) {
 			/* out of hash */
 			kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
+			num_infreekwq--;
+			num_freekwq++;
 			LIST_REMOVE(kwq, kw_hash);
 			LIST_REMOVE(kwq, kw_list);
 			LIST_INSERT_HEAD(&freelist, kwq, kw_list);
 			count ++;
+			num_total_kwq--;
 		} else {
 			delayed = 1;
 		}
@@ -2268,23 +2929,22 @@ psynch_wq_cleanup(__unused void *  param, __unused void * param1)
 	while ((kwq = LIST_FIRST(&freelist)) != NULL) {
 		LIST_REMOVE(kwq, kw_list);
 		lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
-		kfree(kwq, sizeof(struct ksyn_wait_queue));
+		zfree(kwq_zone, kwq);
 	}
 }
 
 
 int
-ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, uthread_t uth)
+ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog)
 {
 	kern_return_t kret;
 	int error = 0;
+#if _PSYNCH_TRACE_
+	uthread_t uth = NULL;
+#endif /* _PSYNCH_TRACE_ */
 
-	uth->uu_kwqqueue = (void *)kwq;
-#if USE_WAITQUEUE
-	kret  = wait_queue_assert_wait64(&kwq->kw_wq, kwq->kw_addr, THREAD_ABORTSAFE, abstime);
-#else /* USE_WAITQUEUE */
-	assert_wait_deadline(&uth->uu_psynchretval, THREAD_ABORTSAFE, abstime);
-#endif /* USE_WAITQUEUE */
+	kwe->kwe_kwqqueue = (void *)kwq;
+	assert_wait_deadline(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, abstime);
 	ksyn_wqunlock(kwq);
 
 	kret = thread_block(NULL);
@@ -2296,116 +2956,42 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, uthread_t uth)
 			error  = EINTR;
 			break;
 	}
+#if _PSYNCH_TRACE_
+	uth = current_uthread();
+#if defined(__i386__)
+	if (mylog != 0)
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uint32_t)uth, kret, 0, 0);
+#else
+	if (mylog != 0)
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xeeeeeeee, kret, error, 0xeeeeeeee, 0);
+#endif
+#endif /* _PSYNCH_TRACE_ */
+		
 	return(error);
 }
 
 kern_return_t
-#if USE_WAITQUEUE
-ksyn_wakeup_thread(ksyn_wait_queue_t kwq, uthread_t uth)
-#else /* USE_WAITQUEUE */
-ksyn_wakeup_thread(__unused ksyn_wait_queue_t kwq, uthread_t uth)
-#endif /* USE_WAITQUEUE */
+ksyn_wakeup_thread(__unused ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
 {
-	thread_t th;
 	kern_return_t kret;
-	th = uth->uu_context.vc_thread;
+#if _PSYNCH_TRACE_
+	uthread_t uth = NULL;
+#endif /* _PSYNCH_TRACE_ */
 
-#if USE_WAITQUEUE
-	kret = wait_queue_wakeup64_thread(&kwq->kw_wq, kwq->kw_addr, th, THREAD_AWAKENED);
-#else /* USE_WAITQUEUE */
-	kret = thread_wakeup_prim((caddr_t)&uth->uu_psynchretval, TRUE, THREAD_AWAKENED);
-#endif /* USE_WAITQUEUE */
+	kret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
 
 	if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 		panic("ksyn_wakeup_thread: panic waking up thread %x\n", kret);
+#if _PSYNCH_TRACE_
+	uth = kwe->kwe_uth;
+#if defined(__i386__)
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf1f2f3f4, (uint32_t)uth, kret, 0, 0);
+#endif
+#endif /* _PSYNCH_TRACE_ */
 	
-	
-
 	return(kret);
 }
 
-/* move from one waitqueue to another */
-#if COND_MTX_WAITQUEUEMOVE
-void 
-ksyn_move_wqthread( ksyn_wait_queue_t ckwq, ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t updateval, int diffgen, int nomutex)
-#else /* COND_MTX_WAITQUEUEMOVE */
-void 
-ksyn_move_wqthread( ksyn_wait_queue_t ckwq, __unused ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t updateval, __unused int diffgen, int nomutex)
-#endif /* COND_MTX_WAITQUEUEMOVE */
-{
-	kern_return_t kret;
-	uthread_t uth;
-#if COND_MTX_WAITQUEUEMOVE
-	int count = 0, error, kret;
-	uint32_t nextgen = mgen;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	struct ksyn_queue  kq;
-	uint32_t upgen;
-	
-	ksyn_queue_init(&kq);
-#if USE_WAITQUEUE
-	/* TBD wq move */
-	kret = wait_queue_move_all(&ckwq->kw_wq, ckwq->kw_addr, &kwq->kw_wq,  kwq->kw_addr);
-#else /* USE_WAITQUEUE */
-	/* no need to move as the thread is blocked at uthread address */
-	kret = KERN_SUCCESS;
-#endif /* USE_WAITQUEUE */
-
-	if (nomutex != 0) 
-		upgen = updateval | PTHRW_MTX_NONE;
-	else
-		upgen = updateval;
-	
-	if (kret== KERN_SUCCESS) {
-redrive:
-		while ((uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ckwq)) != NULL) {
-			if (nomutex != 0) {
-#if COND_MTX_WAITQUEUEMOVE
-				uth->uu_psynchretval = upgen;
-#else /* COND_MTX_WAITQUEUEMOVE */
-				uth->uu_psynchretval = 0;
-				uth->uu_kwqqueue = NULL;
-				kret = ksyn_wakeup_thread(ckwq, uth);
-				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
-					panic("ksyn_move_wqthread: panic waking up \n");
-				if (kret == KERN_NOT_WAITING)
-					goto redrive;
-#endif /* COND_MTX_WAITQUEUEMOVE */
-			} 
-#if COND_MTX_WAITQUEUEMOVE
-			  else {
-				count++;
-				if (count >diffgen)
-					panic("movethread inserting more than expected\n");
-				TAILQ_INSERT_TAIL(&kq.ksynq_uthlist, uth, uu_mtxlist);
-			}
-#endif /* COND_MTX_WAITQUEUEMOVE */
-			
-		}
-		ksyn_wqunlock(ckwq);
-
-#if COND_MTX_WAITQUEUEMOVE
-		if ( (nomutex == 0) && (count > 0)) {
-			ksyn_wqlock(kwq);
-			uth = TAILQ_FIRST(&kq.ksynq_uthlist);
-			while(uth != NULL) {
-				TAILQ_REMOVE(&kq.ksynq_uthlist, uth, uu_mtxlist);
-				error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen,  uth, SEQFIT); 
-				if (error != 0) {
-					panic("movethread insert failed\n");
-				} 
-				uth->uu_lockseq = nextgen;
-				nextgen += PTHRW_INC;
-				uth = TAILQ_FIRST(&kq.ksynq_uthlist);
-			}
-			ksyn_wqunlock(kwq);
-		}
-#endif /* COND_MTX_WAITQUEUEMOVE */
-	} else
-		panic("movethread : wq move all  failed\n");
-	return;
-}
-
 /* find the true shared obect/offset for shared mutexes */
 int 
 ksyn_findobj(uint64_t mutex, uint64_t * objectp, uint64_t * offsetp)
@@ -2509,12 +3095,13 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * typ
 	} else
 		lowest[KSYN_QUEUE_YWRITER] = 0;
 
-
 	
+#if __TESTPANICS__
 	if (count == 0)
 		panic("nothing in the queue???\n");
+#endif /* __TESTPANICS__ */
 
-        low = numbers[0];
+	low = numbers[0];
 	lowtype = typenum[0];
         if (count > 1) {
                 for (i = 1; i< count; i++) {
@@ -2535,44 +3122,39 @@ kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * typ
 int
 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t  updatebits, int * wokenp)
 {
-	uthread_t uth;
+	ksyn_waitq_element_t kwe = NULL;
 	ksyn_queue_t kq;
 	int failedwakeup = 0;
 	int numwoken = 0;
 	kern_return_t kret = KERN_SUCCESS;
-	int resetbit = updatebits & PTHRW_RW_HUNLOCK;
 	uint32_t lbits = 0;
 
 	lbits = updatebits;
 	if (longreadset != 0) {
 		/* clear all read and longreads */
-		while ((uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwq)) != NULL) {
-			uth->uu_psynchretval = lbits;
-			/* set on one thread */
-			if (resetbit != 0) {
-				lbits &= ~PTHRW_RW_HUNLOCK;
-				resetbit = 0;
-			}
+		while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwq)) != NULL) {
+			kwe->kwe_psynchretval = lbits;
+			kwe->kwe_kwqqueue = NULL;
+
 			numwoken++;
-			uth->uu_kwqqueue = NULL;
-			kret = ksyn_wakeup_thread(kwq, uth);
+			kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 				panic("ksyn_wakeupreaders: panic waking up readers\n");
+#endif /* __TESTPANICS__ */
 			if (kret == KERN_NOT_WAITING) {
 				failedwakeup++;
 			}
 		}
-		while ((uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwq)) != NULL) {
-			uth->uu_psynchretval = lbits;
-			uth->uu_kwqqueue = NULL;
-			if (resetbit != 0) {
-				lbits &= ~PTHRW_RW_HUNLOCK;
-				resetbit = 0;
-			}
+		while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwq)) != NULL) {
+			kwe->kwe_psynchretval = lbits;
+			kwe->kwe_kwqqueue = NULL;
 			numwoken++;
-			kret = ksyn_wakeup_thread(kwq, uth);
+			kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 				panic("ksyn_wakeupreaders: panic waking up lreaders\n");
+#endif /* __TESTPANICS__ */
 			if (kret == KERN_NOT_WAITING) {
 				failedwakeup++;
 			}
@@ -2580,17 +3162,15 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, i
 	} else {
 		kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
 		while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
-			uth = ksyn_queue_removefirst(kq, kwq);
-			uth->uu_psynchretval = lbits;
-			if (resetbit != 0) {
-				lbits &= ~PTHRW_RW_HUNLOCK;
-				resetbit = 0;
-			}
+			kwe = ksyn_queue_removefirst(kq, kwq);
+			kwe->kwe_psynchretval = lbits;
+			kwe->kwe_kwqqueue = NULL;
 			numwoken++;
-			uth->uu_kwqqueue = NULL;
-			kret = ksyn_wakeup_thread(kwq, uth);
+			kret = ksyn_wakeup_thread(kwq, kwe);
+#if __TESTPANICS__
 			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 				panic("ksyn_wakeupreaders: panic waking up readers\n");
+#endif /* __TESTPANICS__ */
 			if (kret == KERN_NOT_WAITING) {
 				failedwakeup++;
 			}
@@ -2605,32 +3185,45 @@ ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, i
 
 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
 int
-kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int flags, int * blockp, uint32_t premgen)
+kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t rw_wc, uint32_t * updatep, int flags, int * blockp, uint32_t premgen)
 {
 	uint32_t low_reader, low_writer, low_ywriter, low_lreader,limitrdnum;
 	int rwtype, error=0;
 	int longreadset = 0, allreaders, failed;
-	uint32_t updatebits;
+	uint32_t updatebits=0, numneeded = 0;;
 	int prepost = flags & KW_UNLOCK_PREPOST;
 	thread_t preth = THREAD_NULL;
+	ksyn_waitq_element_t kwe;
 	uthread_t uth;
 	thread_t th;
 	int woken = 0;
 	int block = 1;
-        uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
+	uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
 	kern_return_t kret = KERN_SUCCESS;
+	ksyn_queue_t kq;
+	int curthreturns = 0;
 
 #if _PSYNCH_TRACE_
-#if defined(__i386__)
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_START, (uint32_t)kwq, mgen, premgen, 0, 0);
-#endif
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_START, (uint32_t)kwq->kw_addr, mgen, premgen, rw_wc, 0);
 #endif /* _PSYNCH_TRACE_ */
 	if (prepost != 0) {
 		preth = current_thread();
 	}
 	
+	kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];	
+	kwq->kw_lastseqword = rw_wc;
+	kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
+	kwq->kw_overlapwatch = 0;
+
 	/* upgrade pending */
 	if (is_rw_ubit_set(mgen)) {
+#if __TESTPANICS__
+		panic("NO UBIT SHOULD BE SET\n");
+		updatebits = PTH_RWL_EBIT | PTH_RWL_KBIT;
+		if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
+			updatebits |= PTH_RWL_WBIT;
+		if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0)
+			updatebits |= PTH_RWL_YBIT;
 		if (prepost != 0)  {
 			if((flags & KW_UNLOCK_PREPOST_UPGRADE) != 0) {
 				/* upgrade thread calling the prepost */
@@ -2641,34 +3234,37 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 
 		}
 		if (kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE].ksynq_count > 0) {
-			uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwq);
-			uth->uu_psynchretval = (mgen  | PTHRW_EBIT) & ~PTHRW_UBIT;
-			uth->uu_kwqqueue = NULL;
-			kret = ksyn_wakeup_thread(kwq, uth);
+			kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwq);
+			
+			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
+			kwe->kwe_psynchretval = updatebits;
+			kwe->kwe_kwqqueue = NULL;
+			kret = ksyn_wakeup_thread(kwq, kwe);
 			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 				panic("kwq_handle_unlock: panic waking up the upgrade thread \n");
 			if (kret == KERN_NOT_WAITING) {
 				kwq->kw_pre_intrcount = 1;	/* actually a  count */
 				kwq->kw_pre_intrseq = mgen;
-				kwq->kw_pre_intrretbits = uth->uu_psynchretval;
+				kwq->kw_pre_intrretbits = kwe->kwe_psynchretval;
 				kwq->kw_pre_intrtype = PTH_RW_TYPE_UPGRADE;
 			}
 			error = 0;
 		} else {
 			panic("panic unable to find the upgrade thread\n");
 		}
+#endif /* __TESTPANICS__ */
 		ksyn_wqunlock(kwq);
 		goto out;
 	}
 	
 	error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
+#if __TESTPANICS__
 	if (error != 0)
 		panic("rwunlock: cannot fails to slot next round of threads");
+#endif /* __TESTPANICS__ */
 
 #if _PSYNCH_TRACE_
-#if defined(__i386__)
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq, 1, rwtype, lowest, 0);
-#endif
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 1, rwtype, 0, 0);
 #endif /* _PSYNCH_TRACE_ */
 	low_reader = lowest[KSYN_QUEUE_READ];
 	low_lreader = lowest[KSYN_QUEUE_LREAD];
@@ -2676,24 +3272,36 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 	low_ywriter = lowest[KSYN_QUEUE_YWRITER];
 
 	
-	updatebits = mgen  & ~( PTHRW_EBIT | PTHRW_WBIT |PTHRW_YBIT | PTHRW_UBIT | PTHRW_LBIT);
-
 	longreadset = 0;
 	allreaders = 0;
+	updatebits = 0;
+
+
 	switch (rwtype & PTH_RW_TYPE_MASK) {
 		case PTH_RW_TYPE_LREAD:
 			longreadset = 1;
+			
 		case PTH_RW_TYPE_READ: {
+			/* what about the preflight which is LREAD or READ ?? */
+			if  ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
+				if (rwtype & PTH_RWSHFT_TYPE_WRITE)
+					updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
+				if (rwtype & PTH_RWSHFT_TYPE_YWRITE)
+					updatebits |= PTH_RWL_YBIT;
+			}
 			limitrdnum = 0;
 			if (longreadset == 0) {
 				switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) {
 					case PTH_RWSHFT_TYPE_WRITE: 
 						limitrdnum = low_writer;
 						if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && 
-							(is_seqlower(low_lreader, low_writer) != 0)) {
+							(is_seqlower(low_lreader, limitrdnum) != 0)) {
+							longreadset = 1;
+						}
+						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) && 
+							(is_seqlower(premgen, limitrdnum) != 0)) {
 							longreadset = 1;
 						}
-				
 						break;
 					case PTH_RWSHFT_TYPE_YWRITE: 
 						/* all read ? */
@@ -2702,11 +3310,25 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 							longreadset = 1;
 						} else
 							allreaders = 1;
+						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) && 
+							(is_seqlower(premgen, low_ywriter) != 0)) {
+							longreadset = 1;
+							allreaders = 0;
+						}
+				
+						
 						break;
 					case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE):
-						limitrdnum = low_writer; 
+						if (is_seqlower(low_ywriter, low_writer) != 0) {
+							limitrdnum = low_ywriter;
+						} else
+							limitrdnum = low_writer;
 						if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && 
-							(is_seqlower(low_lreader, low_ywriter) != 0)) {
+							(is_seqlower(low_lreader, limitrdnum) != 0)) {
+							longreadset = 1;
+						}
+						if (((flags &  KW_UNLOCK_PREPOST_LREADLOCK) != 0) && 
+							(is_seqlower(premgen, limitrdnum) != 0)) {
 							longreadset = 1;
 						}
 						break;
@@ -2718,35 +3340,71 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 				};
 
 			}
+			numneeded = 0;
+			if (longreadset !=  0) {
+				updatebits |= PTH_RWL_LBIT;
+				updatebits &= ~PTH_RWL_KBIT;
+				if ((flags &  (KW_UNLOCK_PREPOST_READLOCK | KW_UNLOCK_PREPOST_LREADLOCK)) != 0)
+					numneeded += 1;
+				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
+				numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count;
+				updatebits += (numneeded << PTHRW_COUNT_SHIFT);
+				kwq->kw_overlapwatch = 1;
+			} else {
+				/* no longread, evaluate number of readers */
 
-			if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
-				updatebits |= PTHRW_WBIT;
-			else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
-				updatebits |= PTHRW_YBIT;
+				switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) {
+					case PTH_RWSHFT_TYPE_WRITE: 
+						limitrdnum = low_writer;
+						numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
+						if (((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
+							curthreturns = 1;
+							numneeded += 1;
+						}
+						break;
+					case PTH_RWSHFT_TYPE_YWRITE: 
+						/* all read ? */
+						numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
+						if ((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) {
+							curthreturns = 1;
+							numneeded += 1;
+						}
+						break;
+					case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE):
+						limitrdnum = low_writer; 
+						numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
+						if (((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
+							curthreturns = 1;
+							numneeded += 1;
+						}
+						break;
+					default: /* no writers at all */
+						/* no other waiters only readers */
+						kwq->kw_overlapwatch = 1;
+						numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
+						if ((flags &  KW_UNLOCK_PREPOST_READLOCK) != 0) {
+							curthreturns = 1;
+							numneeded += 1;
+						}
+				};
+		
+				updatebits += (numneeded << PTHRW_COUNT_SHIFT);
+			}
+			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
 
-			if (longreadset == 0) {
-				if((prepost != 0) && 
-						((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
-					((allreaders != 0) || (is_seqlower(premgen, limitrdnum) != 0))) {
-					block = 0;
-					uth = current_uthread();
-					uth->uu_psynchretval = updatebits;
-				}
-			} else {
-				updatebits |= PTHRW_LBIT;
-				if ((prepost != 0) && 
-				   ((flags & (KW_UNLOCK_PREPOST_READLOCK | KW_UNLOCK_PREPOST_LREADLOCK)) != 0)) {
-					block = 0;
-					uth = current_uthread();
-					uth->uu_psynchretval = updatebits;
-				}
+			if (curthreturns != 0) {
+				block = 0;
+				uth = current_uthread();
+				kwe = &uth->uu_kwe;
+				kwe->kwe_psynchretval = updatebits;
 			}
 			
-			if (prepost != 0) {
-				updatebits |= PTHRW_RW_HUNLOCK;
-			}
 
 			failed = ksyn_wakeupreaders(kwq, limitrdnum, longreadset, allreaders, updatebits, &woken);
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
+#endif /* _PSYNCH_TRACE_ */
+
 			if (failed != 0) {
 				kwq->kw_pre_intrcount = failed;	/* actually a  count */
 				kwq->kw_pre_intrseq = limitrdnum;
@@ -2757,43 +3415,49 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 					kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
 			} 
 
-			/* if we woken up no one and the current thread is returning, ensure it is doing unlock */
-			if ((prepost != 0) && (woken == 0) && (block == 0)&& ((updatebits & PTHRW_RW_HUNLOCK) != 0)) {
-				uth = current_uthread();
-				uth->uu_psynchretval = updatebits;
-	}
-
 			error = 0;
 
+			if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
+				panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
 		} 
 		break;
 			
 		case PTH_RW_TYPE_WRITE: {
-			updatebits |= PTHRW_EBIT;
+			
+			/* only one thread is goin to be granted */
+			updatebits |= (PTHRW_INC);
+			updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
+			
 			if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
 				block = 0;
 				if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0)
-					updatebits |= PTHRW_WBIT;
-				else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
-					updatebits |= PTHRW_YBIT;
+					updatebits |= PTH_RWL_WBIT;
+				if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
+					updatebits |= PTH_RWL_YBIT;
 				th = preth;
 				uth = get_bsdthread_info(th);
-				uth->uu_psynchretval = updatebits;
+				kwe = &uth->uu_kwe;
+				kwe->kwe_psynchretval = updatebits;
 			}  else {
 				/*  we are not granting writelock to the preposting thread */
-				uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
+				kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq);
 
 				/* if there are writers present or the preposting write thread then W bit is to be set */
 				if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) )
-					updatebits |= PTHRW_WBIT;
-				else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
-					updatebits |= PTHRW_YBIT;
-				uth->uu_psynchretval = updatebits;
-				uth->uu_kwqqueue = NULL;
+					updatebits |= PTH_RWL_WBIT;
+				if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0)
+					updatebits |= PTH_RWL_YBIT;
+				kwe->kwe_psynchretval = updatebits;
+				kwe->kwe_kwqqueue = NULL;
 				/* setup next in the queue */
-				kret = ksyn_wakeup_thread(kwq, uth);
+				kret = ksyn_wakeup_thread(kwq, kwe);
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+#if __TESTPANICS__
 				if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 					panic("kwq_handle_unlock: panic waking up writer\n");
+#endif /* __TESTPANICS__ */
 				if (kret == KERN_NOT_WAITING) {
 					kwq->kw_pre_intrcount = 1;	/* actually a  count */
 					kwq->kw_pre_intrseq = low_writer;
@@ -2802,6 +3466,9 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 				}
 				error = 0;
 			}
+			kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
+			if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
+				panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
 
 		 } 
 		break;
@@ -2809,26 +3476,36 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 		case PTH_RW_TYPE_YWRITE: {
 			/* can reader locks be granted ahead of this write? */
 			if ((rwtype & PTH_RWSHFT_TYPE_READ) != 0)  {
-				if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
-					updatebits |= PTHRW_WBIT;
-				else if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
-					updatebits |= PTHRW_YBIT;
+				if  ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
+					if (rwtype & PTH_RWSHFT_TYPE_WRITE)
+						updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
+					if (rwtype & PTH_RWSHFT_TYPE_YWRITE)
+						updatebits |= PTH_RWL_YBIT;
+				}
 					
 				if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
 					/* is lowest reader less than the low writer? */
 					if (is_seqlower(low_reader,low_writer) == 0)
 						goto yielditis;
+
+					numneeded = ksyn_queue_count_tolowest(kq, low_writer);
+					updatebits += (numneeded << PTHRW_COUNT_SHIFT);
 					if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, low_writer) != 0)) {
 						uth = current_uthread();
-						uth->uu_psynchretval = updatebits;
+						kwe = &uth->uu_kwe;
+						/* add one more */
+						updatebits += PTHRW_INC;
+						kwe->kwe_psynchretval = updatebits;
 						block = 0;
 					}
-					if (prepost != 0) {
-						updatebits |= PTHRW_RW_HUNLOCK;
-					}
 					
+					kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
+
 					/* there will be readers to wakeup , no need to check for woken */
 					failed = ksyn_wakeupreaders(kwq, low_writer, 0, 0, updatebits, NULL);
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
+#endif /* _PSYNCH_TRACE_ */
 					if (failed != 0) {
 						kwq->kw_pre_intrcount = failed;	/* actually a  count */
 						kwq->kw_pre_intrseq = low_writer;
@@ -2838,32 +3515,33 @@ kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen,  uint32_t * updatep, int
 					error = 0;
 				} else {
 					/* wakeup all readers */
+					numneeded = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
+					updatebits += (numneeded << PTHRW_COUNT_SHIFT);
 					if ((prepost != 0) &&  ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
 						uth = current_uthread();
-						uth->uu_psynchretval = updatebits;
+						kwe = &uth->uu_kwe;
+						updatebits += PTHRW_INC;
+						kwe->kwe_psynchretval = updatebits;
 						block = 0;
 					}
-					if (prepost != 0) {
-						updatebits |= PTHRW_RW_HUNLOCK;
-					}
+					kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
 					failed = ksyn_wakeupreaders(kwq, low_writer, 0, 1, updatebits, &woken);
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0);
+#endif /* _PSYNCH_TRACE_ */
 					if (failed != 0) {
 						kwq->kw_pre_intrcount = failed;	/* actually a  count */
 						kwq->kw_pre_intrseq = kwq->kw_highseq;
 						kwq->kw_pre_intrretbits = updatebits;
 						kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
 					}
-					/* if we woken up no one and the current thread is returning, ensure it is doing unlock */
-					if ((prepost != 0) && (woken ==0) && (block == 0)&& ((updatebits & PTHRW_RW_HUNLOCK) != 0)) {
-						uth = current_uthread();
-						uth->uu_psynchretval = updatebits;
-					}
 					error = 0;
 				}
 			} else {
 yielditis:
 				/* no reads, so granting yeilding writes */
-				updatebits |= PTHRW_EBIT;
+				updatebits |= PTHRW_INC;
+				updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
 
 				if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (low_writer == premgen)) {
 					/* preposting yielding write thread is being granted exclusive lock */
@@ -2871,29 +3549,35 @@ yielditis:
 					block = 0;
 
 					if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
-						updatebits |= PTHRW_WBIT;
+						updatebits |= PTH_RWL_WBIT;
 					else if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0)
-						updatebits |= PTHRW_YBIT;
+						updatebits |= PTH_RWL_YBIT;
 
 					th = preth;
 					uth = get_bsdthread_info(th);
-					uth->uu_psynchretval = updatebits;
+					kwe = &uth->uu_kwe;
+					kwe->kwe_psynchretval = updatebits;
 				}  else {
 					/*  we are granting yield writelock to some other thread */
-					uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwq);
+					kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwq);
 
 					if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0)
-						updatebits |= PTHRW_WBIT;
+						updatebits |= PTH_RWL_WBIT;
 					/* if there are ywriters present or the preposting ywrite thread then W bit is to be set */
 					else if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) )
-						updatebits |= PTHRW_YBIT;
+						updatebits |= PTH_RWL_YBIT;
 
-					uth->uu_psynchretval = updatebits;
-					uth->uu_kwqqueue = NULL;
+					kwe->kwe_psynchretval = updatebits;
+					kwe->kwe_kwqqueue = NULL;
 
-					kret = ksyn_wakeup_thread(kwq, uth);
+					kret = ksyn_wakeup_thread(kwq, kwe);
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+#if __TESTPANICS__
 					if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
 						panic("kwq_handle_unlock : panic waking up readers\n");
+#endif /* __TESTPANICS__ */
 					if (kret == KERN_NOT_WAITING) {
 						kwq->kw_pre_intrcount = 1;	/* actually a  count */
 						kwq->kw_pre_intrseq = low_ywriter;
@@ -2902,6 +3586,7 @@ yielditis:
 					}
 					error = 0;
 				}
+				kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
 			}
 		} 
 		break;
@@ -2911,21 +3596,58 @@ yielditis:
 			
 	};
 
-	if (updatep != NULL)
-		*updatep = updatebits;
 
 out:
+	if (updatep != NULL)
+		*updatep = updatebits;
 	if (blockp != NULL)
 		*blockp = block;
 #if _PSYNCH_TRACE_
-#if defined(__i386__)
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_END, (uint32_t)kwq, 0, 0, block, 0);
-#endif
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0, updatebits, block, 0);
 #endif /* _PSYNCH_TRACE_ */
 	return(error);
 }
 
+int
+kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, __unused uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, __unused int flags , int * blockp)
+{
+	uint32_t highword = kwq->kw_nextseqword & PTHRW_COUNT_MASK;
+	uint32_t lowword = kwq->kw_lastseqword & PTHRW_COUNT_MASK;
+	uint32_t val=0;
+	int withinseq;
+
+
+	/* overlap is set, so no need to check for valid state for overlap */
+	
+	withinseq = ((is_seqlower_eq(rw_wc, highword) != 0) || (is_seqhigher_eq(lowword, rw_wc) != 0));
+
+	if (withinseq != 0) {
+		if ((kwq->kw_nextseqword & PTH_RWL_LBIT) == 0)  {
+			/* if no writers ahead, overlap granted */
+			if ((lgenval & PTH_RWL_WBIT) == 0) {
+				goto grantoverlap;
+			}
+		} else  {
+			/* Lbit is set, and writers ahead does not count */
+			goto grantoverlap;
+		}
+	}
+
+	*blockp = 1;
+	return(0);
+
+grantoverlap:
+		/* increase the next expected seq by one */
+		kwq->kw_nextseqword += PTHRW_INC;
+		/* set count by one &  bits from the nextseq and add M bit */
+		val = PTHRW_INC;
+		val |= ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
+		*updatebitsp = val;
+		*blockp = 0;
+		return(0);
+}
 
+#if NOTYET
 /* handle downgrade actions */
 int
 kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, __unused int flags, __unused uint32_t premgen, __unused int * blockp)
@@ -2964,33 +3686,38 @@ kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, __unused int flags, _
 	}
 	return(0);
 }
+
+#endif /* NOTYET */
+
 /************* Indiv queue support routines ************************/
 void
 ksyn_queue_init(ksyn_queue_t kq)
 {
-	TAILQ_INIT(&kq->ksynq_uthlist);
+	TAILQ_INIT(&kq->ksynq_kwelist);
 	kq->ksynq_count = 0;
 	kq->ksynq_firstnum = 0;
 	kq->ksynq_lastnum = 0;
 }
 
-
 int
-ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, int fit)
+ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int fit)
 {
 	uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
-	struct uthread * q_uth, * r_uth;
+	ksyn_waitq_element_t q_kwe, r_kwe;
+	int res = 0;
+	uthread_t nuth = NULL;
 	
 	if (kq->ksynq_count == 0) {
-		TAILQ_INSERT_HEAD(&kq->ksynq_uthlist, uth, uu_mtxlist);
+		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
 		kq->ksynq_firstnum = lockseq;
 		kq->ksynq_lastnum = lockseq;
 		goto out;
 	}
 
 	if (fit == FIRSTFIT) {
+		/* TBD: if retry bit is set for mutex, add it to the head */
 		/* firstfit, arriving order */
-		TAILQ_INSERT_TAIL(&kq->ksynq_uthlist, uth, uu_mtxlist);
+		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
 		if (is_seqlower (lockseq, kq->ksynq_firstnum) != 0)
 			kq->ksynq_firstnum = lockseq;
 		if (is_seqhigher (lockseq, kq->ksynq_lastnum) != 0)
@@ -2998,55 +3725,79 @@ ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct
 		goto out;
 	}
 		
-	if ((lockseq == kq->ksynq_firstnum) || (lockseq == kq->ksynq_lastnum))
-		panic("ksyn_queue_insert: two threads with same lockseq ");
+	if ((lockseq == kq->ksynq_firstnum) || (lockseq == kq->ksynq_lastnum)) {
+		/* During prepost when a thread is getting cancelled, we could have two with same seq */
+		if (kwe->kwe_flags == KWE_THREAD_PREPOST) {
+			q_kwe = ksyn_queue_find_seq(kwq, kq, lockseq, 0);
+			if ((q_kwe != NULL) && ((nuth = (uthread_t)q_kwe->kwe_uth) != NULL) && 
+				((nuth->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL)) {
+				TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
+				goto out;
+
+			} else {
+				__FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq ");
+				res = EBUSY;
+				goto out1;
+			}
+		 } else {
+			__FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq ");
+			res = EBUSY;
+			goto out1;
+		}
+	}
 
 	/* check for next seq one */
 	if (is_seqlower(kq->ksynq_lastnum, lockseq) != 0) {
-		TAILQ_INSERT_TAIL(&kq->ksynq_uthlist, uth, uu_mtxlist);
+		TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
 		kq->ksynq_lastnum = lockseq;
 		goto out;
 	}
 
 	if (is_seqlower(lockseq, kq->ksynq_firstnum) != 0) {
-		TAILQ_INSERT_HEAD(&kq->ksynq_uthlist, uth, uu_mtxlist);
+		TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
 		kq->ksynq_firstnum = lockseq;
 		goto out;
 	}
 
 	/* goto slow  insert mode */
-	TAILQ_FOREACH_SAFE(q_uth, &kq->ksynq_uthlist, uu_mtxlist, r_uth) {
-		if (is_seqhigher(q_uth->uu_lockseq, lockseq) != 0) {
-			TAILQ_INSERT_BEFORE(q_uth, uth, uu_mtxlist);
+	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
+		if (is_seqhigher(q_kwe->kwe_lockseq, lockseq) != 0) {
+			TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
 			goto out;
 		}
 	}
 
+#if __TESTPANICS__
 	panic("failed to insert \n");
+#endif /* __TESTPANICS__ */
+
 out:
+	if (uth != NULL)
+		kwe->kwe_uth = uth;
 	kq->ksynq_count++;
 	kwq->kw_inqueue++;
 	update_low_high(kwq, lockseq);
-	return(0);
+out1:
+	return(res);
 }
 
-struct uthread *
+ksyn_waitq_element_t
 ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq)
 {
-	uthread_t uth = NULL;
-	uthread_t q_uth;
+	ksyn_waitq_element_t kwe = NULL;
+	ksyn_waitq_element_t q_kwe;
 	uint32_t curseq;
 
 	if (kq->ksynq_count != 0) {
-		uth = TAILQ_FIRST(&kq->ksynq_uthlist);
-		TAILQ_REMOVE(&kq->ksynq_uthlist, uth, uu_mtxlist);
-		curseq = uth->uu_lockseq & PTHRW_COUNT_MASK;
+		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
+		curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
 		kq->ksynq_count--;
 		kwq->kw_inqueue--;
 	
 		if(kq->ksynq_count != 0) {
-			q_uth = TAILQ_FIRST(&kq->ksynq_uthlist);
-			kq->ksynq_firstnum = (q_uth->uu_lockseq & PTHRW_COUNT_MASK);
+			q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+			kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
 		} else {
 			kq->ksynq_firstnum = 0;
 			kq->ksynq_lastnum = 0;
@@ -3062,28 +3813,30 @@ ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq)
 				kwq->kw_highseq = find_nexthighseq(kwq);
 		}
 	}
-	return(uth);
+	return(kwe);
 }
 
 void
-ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uthread_t uth)
+ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
 {
-	uthread_t q_uth;
+	ksyn_waitq_element_t q_kwe;
 	uint32_t curseq;
 
 	if (kq->ksynq_count > 0) {
-		TAILQ_REMOVE(&kq->ksynq_uthlist, uth, uu_mtxlist);
+		TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
 		kq->ksynq_count--;
 		if(kq->ksynq_count != 0) {
-			q_uth = TAILQ_FIRST(&kq->ksynq_uthlist);
-			kq->ksynq_firstnum = (q_uth->uu_lockseq & PTHRW_COUNT_MASK);
+			q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+			kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
+			q_kwe = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
+			kq->ksynq_lastnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK);
 		} else {
 			kq->ksynq_firstnum = 0;
 			kq->ksynq_lastnum = 0;
 		
 		}
 		kwq->kw_inqueue--;
-		curseq = uth->uu_lockseq & PTHRW_COUNT_MASK;
+		curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
 		if (kwq->kw_inqueue == 0) {
 			kwq->kw_lowseq = 0;
 			kwq->kw_highseq = 0;
@@ -3096,6 +3849,168 @@ ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uthread_t uth)
 	}
 }
 
+/* find the thread and removes from the queue */
+ksyn_waitq_element_t
+ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove)
+{
+	ksyn_waitq_element_t q_kwe, r_kwe;
+
+	/* TBD: bail out if higher seq is seen */
+	/* case where wrap in the tail of the queue exists */
+	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
+		if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
+			if (remove != 0)
+				ksyn_queue_removeitem(kwq, kq, q_kwe);
+			return(q_kwe);
+		}
+	}
+	return(NULL);
+}
+
+
+/* find the thread at the target sequence (or a broadcast/prepost at or above) */
+ksyn_waitq_element_t
+ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
+{
+	ksyn_waitq_element_t q_kwe, r_kwe;
+	uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
+
+	/* case where wrap in the tail of the queue exists */
+	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
+
+		/* skip the lower entries */
+		if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), cgen) != 0) 
+			continue;
+
+		switch (q_kwe->kwe_flags) {
+
+		case KWE_THREAD_INWAIT:
+			if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen)
+				break;
+			/* fall thru */
+
+		case KWE_THREAD_BROADCAST:
+		case KWE_THREAD_PREPOST:
+			return (q_kwe);
+		}
+	}
+	return(NULL);
+}
+
+/* look for a thread at lockseq, a  */
+ksyn_waitq_element_t
+ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
+{
+	ksyn_waitq_element_t q_kwe, r_kwe, t_kwe = NULL;
+
+	/* case where wrap in the tail of the queue exists */
+	TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
+
+		switch (q_kwe->kwe_flags) {
+
+		case KWE_THREAD_PREPOST:
+			if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
+				return t_kwe;
+			/* fall thru */
+
+		case KWE_THREAD_BROADCAST:
+			/* match any prepost at our same uptoseq or any broadcast above */
+			if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
+				continue;
+			return  q_kwe;
+
+		case KWE_THREAD_INWAIT:
+			/*
+			 * Match any (non-cancelled) thread at or below our upto sequence -
+			 * but prefer an exact match to our signal sequence (if present) to
+			 * keep exact matches happening.
+			 */
+			if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq))
+				return t_kwe;
+
+			if (q_kwe->kwe_kwqqueue == kwq) {
+				uthread_t ut = q_kwe->kwe_uth;
+				if ((ut->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) != UT_CANCEL) {
+					/* if equal or higher than our signal sequence, return this one */
+					if (is_seqhigher_eq((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq))
+						return q_kwe;
+					
+					/* otherwise, just remember this eligible thread and move on */
+					if (t_kwe == NULL)
+						t_kwe = q_kwe;
+				}
+			}
+			break;
+
+		default:
+			panic("ksyn_queue_find_signalseq(): unknow wait queue element type (%d)\n", q_kwe->kwe_flags);
+			break;
+		}
+	}
+	return t_kwe;
+}
+
+
+int
+ksyn_queue_move_tofree(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t kfreeq, int all, int release)
+{
+	ksyn_waitq_element_t kwe;
+	int count = 0;
+	uint32_t tseq = upto & PTHRW_COUNT_MASK;
+#if _PSYNCH_TRACE_
+	uthread_t ut;
+#endif /* _PSYNCH_TRACE_ */
+
+	ksyn_queue_init(kfreeq);
+
+	/* free all the entries, must be only fakes.. */
+	kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+	while (kwe != NULL) {
+		if ((all == 0) && (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), tseq) != 0)) 
+			break;
+		if (kwe->kwe_flags == KWE_THREAD_INWAIT) {
+			/* 
+			 * This scenario is typically noticed when the cvar is 
+			 * reinited and the new waiters are waiting. We can
+			 * return them as spurious wait so the cvar state gets
+			 * reset correctly.
+			 */
+#if _PSYNCH_TRACE_
+			ut = (uthread_t)kwe->kwe_uth;
+#endif /* _PSYNCH_TRACE_ */
+
+			/* skip canceled ones */
+			/* wake the rest */
+			ksyn_queue_removeitem(ckwq, kq, kwe);
+			/* set M bit to indicate to waking CV to retun Inc val */
+			kwe->kwe_psynchretval = PTHRW_INC | (PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
+			kwe->kwe_kwqqueue = NULL;
+#if _PSYNCH_TRACE_
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
+#endif /* _PSYNCH_TRACE_ */
+			(void)ksyn_wakeup_thread(ckwq, kwe);
+		} else {
+			ksyn_queue_removeitem(ckwq, kq, kwe);
+			TAILQ_INSERT_TAIL(&kfreeq->ksynq_kwelist, kwe, kwe_list);
+			ckwq->kw_fakecount--;
+			count++;
+		}
+		kwe = TAILQ_FIRST(&kq->ksynq_kwelist);
+	}
+
+	if ((release != 0) && (count != 0)) {
+		kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist);
+		while (kwe != NULL) {
+			TAILQ_REMOVE(&kfreeq->ksynq_kwelist, kwe, kwe_list);
+			zfree(kwe_zone, kwe);
+			kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist);
+		}
+	}
+
+	return(count);
+}
+
+/*************************************************************************/
 
 void
 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
@@ -3167,6 +4082,51 @@ find_nexthighseq(ksyn_wait_queue_t kwq)
 	return(highest);
 }
 
+int
+is_seqlower(uint32_t x, uint32_t y)
+{
+	if (x < y) {
+		if ((y-x) < (PTHRW_MAX_READERS/2))
+			return(1);
+	} else {
+		if ((x-y) > (PTHRW_MAX_READERS/2))
+			return(1);
+	}
+	return(0);
+}
+
+int
+is_seqlower_eq(uint32_t x, uint32_t y)
+{
+	if (x==y)
+		return(1);
+	else
+		return(is_seqlower(x,y));
+}
+
+int
+is_seqhigher(uint32_t x, uint32_t y)
+{
+	if (x > y) {
+		if ((x-y) < (PTHRW_MAX_READERS/2))
+			return(1);
+	} else {
+		if ((y-x) > (PTHRW_MAX_READERS/2))
+			return(1);
+	}
+	return(0);
+}
+
+int
+is_seqhigher_eq(uint32_t x, uint32_t y)
+{
+	if (x==y)
+		return(1);
+	else
+		return(is_seqhigher(x,y));
+}
+
+
 int
 find_diff(uint32_t upto, uint32_t lowest)
 {
@@ -3174,7 +4134,14 @@ find_diff(uint32_t upto, uint32_t lowest)
 
 	if (upto == lowest)
 		return(0);
+#if 0
 	diff = diff_genseq(upto, lowest);
+#else
+        if (is_seqlower(upto, lowest) != 0)
+                diff = diff_genseq(lowest, upto);
+        else
+                diff = diff_genseq(upto, lowest);
+#endif
 	diff = (diff >> PTHRW_COUNT_SHIFT);
 	return(diff);
 }
@@ -3188,13 +4155,13 @@ find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t
 
 
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_START, 0, 0, upto, nwaiters, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_START, 0, 0, upto, nwaiters, 0);
 #endif /* _PSYNCH_TRACE_ */
 
 	for (i= 0; i< KSYN_QUEUE_MAX; i++) {
 		count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_NONE, 0, 1, i, count, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_NONE, 0, 1, i, count, 0);
 #endif /* _PSYNCH_TRACE_ */
 		if (count >= nwaiters) {
 			break;
@@ -3205,9 +4172,11 @@ find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t
 		*countp = count;
 	}
 #if _PSYNCH_TRACE_
-	KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_END, 0, 0, count, nwaiters, 0);
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_END, 0, 0, count, nwaiters, 0);
 #endif /* _PSYNCH_TRACE_ */
-	if (count >= nwaiters)
+	if (count == 0)
+		return(0);
+	else if (count >= nwaiters)
 		return(1);
 	else
 		return(0);
@@ -3218,7 +4187,7 @@ uint32_t
 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
 {
 	uint32_t i = 0;
-	uthread_t uth, newuth;
+	ksyn_waitq_element_t kwe, newkwe;
 	uint32_t curval;
 
 	/* if nothing or the  first num is greater than upto, return none */
@@ -3227,8 +4196,8 @@ ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
 	if (upto == kq->ksynq_firstnum)
 		return(1);
 
-	TAILQ_FOREACH_SAFE(uth, &kq->ksynq_uthlist, uu_mtxlist, newuth) {
-		curval = (uth->uu_lockseq & PTHRW_COUNT_MASK);
+	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
+		curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
 		if (upto == curval) {
 			i++;
 			break;
@@ -3242,19 +4211,147 @@ ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
 	return(i);
 }
 
-/* find the thread and removes from the queue */
-uthread_t
-ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
-{
-		uthread_t q_uth, r_uth;
-		/* case where wrap in the tail of the queue exists */
-		TAILQ_FOREACH_SAFE(q_uth, &kq->ksynq_uthlist, uu_mtxlist, r_uth) {
-			if (q_uth->uu_lockseq == seq) {
-				ksyn_queue_removeitem(kwq, kq, q_uth);
-				return(q_uth);
-			}
+
+/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
+void
+ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t * updatep)
+{
+	kern_return_t kret;
+	ksyn_queue_t kq;
+	ksyn_waitq_element_t kwe, newkwe;
+	uint32_t updatebits = 0;
+	struct ksyn_queue  kfreeq;
+	uthread_t ut;
+
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_START, 0xcbcbcbc2, upto, 0, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+
+	ksyn_queue_init(&kfreeq);
+	kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
+
+ retry:
+	TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
+
+		if (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto))	/* outside our range */
+			break;
+
+		/* now handle the one we found (inside the range) */
+		switch (kwe->kwe_flags) {
+
+		case KWE_THREAD_INWAIT:
+			ut = (uthread_t)kwe->kwe_uth;
+
+			/* skip canceled ones */
+			if (kwe->kwe_kwqqueue != ckwq ||
+			    (ut->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL)
+				break;
+
+			/* wake the rest */
+			ksyn_queue_removeitem(ckwq, kq, kwe);
+			kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT;
+			kwe->kwe_kwqqueue = NULL;
+#if _PSYNCH_TRACE_
+				__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0);
+#endif /* _PSYNCH_TRACE_ */
+				kret = ksyn_wakeup_thread(ckwq, kwe);
+#if __TESTPANICS__
+			if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING))
+				panic("ksyn_wakeupreaders: panic waking up readers\n");
+#endif /* __TESTPANICS__ */
+			updatebits += PTHRW_INC;
+			break;
+			
+		case KWE_THREAD_BROADCAST:
+		case KWE_THREAD_PREPOST:
+			ksyn_queue_removeitem(ckwq, kq, kwe);
+			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
+			ckwq->kw_fakecount--;
+			break;
+			
+		default: 
+			panic("unknown kweflags\n");
+			break;
 		}
-	return(NULL);
+	}
+
+	/* Need to enter a broadcast in the queue (if not already at L == S) */
+
+	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
+
+		newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
+		if (newkwe == NULL) {
+			ksyn_wqunlock(ckwq);
+			newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
+			TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
+			ksyn_wqlock(ckwq);
+			goto retry;
+		}
+		
+		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
+		bzero(newkwe, sizeof(struct ksyn_waitq_element));
+		newkwe->kwe_kwqqueue = ckwq;
+		newkwe->kwe_flags = KWE_THREAD_BROADCAST;
+		newkwe->kwe_lockseq = upto;
+		newkwe->kwe_count = 0;
+		newkwe->kwe_uth = NULL;
+		newkwe->kwe_psynchretval = 0;
+		
+#if _PSYNCH_TRACE_
+		__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, upto, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
+		
+		(void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], upto, NULL, newkwe, SEQFIT);
+		ckwq->kw_fakecount++;
+	}
+
+	/* free up any remaining things stumbled across above */
+	kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
+	while (kwe != NULL) {
+		TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
+		zfree(kwe_zone, kwe);
+		kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
+	}
+
+	if (updatep != NULL)
+		*updatep = updatebits;
+
+#if _PSYNCH_TRACE_
+	__PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_END, 0xeeeeeeed, updatebits, 0, 0, 0);
+#endif /* _PSYNCH_TRACE_ */
 }
 
+void
+ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release)
+{
+	uint32_t updatebits = 0;
+
+	if (updatep != NULL)
+		updatebits = *updatep;
+	if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
+		updatebits |= PTH_RWS_CV_CBIT;
+		if (ckwq->kw_inqueue != 0) {
+			/* FREE THE QUEUE */
+			ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ckwq->kw_lword, kfreeq, 0, release);
+#if __TESTPANICS__
+			if (ckwq->kw_inqueue != 0)
+				panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
+#endif /* __TESTPANICS__ */
+		}
+		ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
+		ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
+	} else if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) {
+		/* only fake entries are present in the queue */
+		updatebits |= PTH_RWS_CV_PBIT; 
+	}
+	if (updatep != NULL)
+		*updatep = updatebits;
+}
+
+void
+psynch_zoneinit(void)
+{
+        kwq_zone = (zone_t)zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_waitqueue zone");
+        kwe_zone = (zone_t)zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element zone");
+}
 #endif /* PSYNCH */
diff --git a/bsd/kern/pthread_synch.c b/bsd/kern/pthread_synch.c
index 7a00399cc..3fbed7532 100644
--- a/bsd/kern/pthread_synch.c
+++ b/bsd/kern/pthread_synch.c
@@ -91,6 +91,7 @@
 #include <mach/port.h>
 #include <vm/vm_protos.h>
 #include <vm/vm_map.h>	/* for current_map() */
+#include <vm/vm_fault.h>
 #include <mach/thread_act.h> /* for thread_resume */
 #include <machine/machine_routines.h>
 #if defined(__i386__)
@@ -109,12 +110,6 @@
 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
 #endif
 
-
-#if defined(__ppc__) || defined(__ppc64__)
-#include <architecture/ppc/cframe.h>
-#endif
-
-
 lck_grp_attr_t   *pthread_lck_grp_attr;
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
@@ -130,7 +125,6 @@ extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
 extern void workqueue_thread_yielded(void);
 
 static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity);
-static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item);
 static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th,
 					user_addr_t oc_item, int oc_prio, int oc_affinity);
 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
@@ -138,7 +132,7 @@ static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlis
 static void wq_unpark_continue(void);
 static void wq_unsuspend_continue(void);
 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
-static boolean_t workqueue_addnewthread(struct workqueue *wq);
+static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
 static void workqueue_removethread(struct threadlist *tl);
 static void workqueue_lock_spin(proc_t);
 static void workqueue_unlock(proc_t);
@@ -215,9 +209,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 	isLP64 = IS_64BIT_PROCESS(p);
 
 
-#if defined(__ppc__)
-	stackaddr = 0xF0000000;
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	stackaddr = 0xB0000000;
 #else
 #error Need to define a stack address hint for this architecture
@@ -266,6 +258,22 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 		th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
 		th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
 		user_stacksize = th_stacksize;
+		
+	       /*
+		* Pre-fault the first page of the new thread's stack and the page that will
+		* contain the pthread_t structure.
+		*/	
+		vm_fault( vmap,
+		  vm_map_trunc_page(th_stack - PAGE_SIZE_64),
+		  VM_PROT_READ | VM_PROT_WRITE,
+		  FALSE, 
+		  THREAD_UNINT, NULL, 0);
+		
+		vm_fault( vmap,
+		  vm_map_trunc_page(th_pthread),
+		  VM_PROT_READ | VM_PROT_WRITE,
+		  FALSE, 
+		  THREAD_UNINT, NULL, 0);
 	} else {
 		th_stack = user_stack;
 		user_stacksize = user_stack;
@@ -275,31 +283,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 #endif
 	}
 	
-#if defined(__ppc__)
-	/*
-	 * Set up PowerPC registers...
-	 * internally they are always kept as 64 bit and
-	 * since the register set is the same between 32 and 64bit modes
-	 * we don't need 2 different methods for setting the state
-	 */
-	{
-	        ppc_thread_state64_t state64;
-		ppc_thread_state64_t *ts64 = &state64;
-
-		ts64->srr0 = (uint64_t)p->p_threadstart;
-		ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE);
-		ts64->r3 = (uint64_t)th_pthread;
-		ts64->r4 = (uint64_t)(th_thport);
-		ts64->r5 = (uint64_t)user_func;
-		ts64->r6 = (uint64_t)user_funcarg;
-		ts64->r7 = (uint64_t)user_stacksize;
-		ts64->r8 = (uint64_t)uap->flags;
-
-		thread_set_wq_state64(th, (thread_state_t)ts64);
-
-		thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64);
-	}
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	{
         /*
          * Set up i386 registers & function call.
@@ -453,26 +437,33 @@ uint32_t wq_stalled_window_usecs	= WQ_STALLED_WINDOW_USECS;
 uint32_t wq_reduce_pool_window_usecs	= WQ_REDUCE_POOL_WINDOW_USECS;
 uint32_t wq_max_timer_interval_usecs	= WQ_MAX_TIMER_INTERVAL_USECS;
 uint32_t wq_max_threads			= WORKQUEUE_MAXTHREADS;
+uint32_t wq_max_constrained_threads	= WORKQUEUE_MAXTHREADS / 8;
 
 
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_yielded_threshold, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_yielded_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_stalled_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_reduce_pool_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_max_timer_interval_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &wq_max_threads, 0, "");
 
+SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
+	   &wq_max_constrained_threads, 0, "");
+
+
+static uint32_t wq_init_constrained_limit = 1;
+
 
 void
 workqueue_init_lock(proc_t p)
@@ -542,11 +533,9 @@ wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
 	 */
 	lastblocked_ts = *lastblocked_tsp;
 
-#if defined(__ppc__)
-#else
 	if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
 		return (TRUE);
-#endif
+
 	if (lastblocked_ts >= cur_ts) {
 		/*
 		 * because the update of the timestamp when a thread blocks isn't
@@ -682,7 +671,7 @@ again:
 					}
 				}
 				if (add_thread == TRUE) {
-					retval = workqueue_addnewthread(wq);
+					retval = workqueue_addnewthread(wq, FALSE);
 					break;
 				}
 			}
@@ -774,7 +763,7 @@ workqueue_thread_yielded(void)
 		if (secs == 0 && usecs < wq_yielded_window_usecs) {
 
 			if (wq->wq_thidlecount == 0) {
-				workqueue_addnewthread(wq);
+				workqueue_addnewthread(wq, TRUE);
 				/*
 				 * 'workqueue_addnewthread' drops the workqueue lock
 				 * when creating the new thread and then retakes it before
@@ -876,14 +865,9 @@ workqueue_callback(int type, thread_t thread)
 			 * since another thread would have to get scheduled and then block after we start down 
 			 * this path), it's not a problem.  Either timestamp is adequate, so no need to retry
 			 */
-#if defined(__ppc__)
-			/*
-			 * this doesn't have to actually work reliablly for PPC, it just has to compile/link
-			 */
-			*lastblocked_ptr = (UInt64)curtime;
-#else
+
 			OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
-#endif
+
 			if (wq->wq_itemcount)
 				WQ_TIMER_NEEDED(wq, start_timer);
 
@@ -963,9 +947,13 @@ workqueue_removethread(struct threadlist *tl)
 }
 
 
-
+/*
+ * called with workq lock held
+ * dropped and retaken around thread creation
+ * return with workq lock held
+ */
 static boolean_t
-workqueue_addnewthread(struct workqueue *wq)
+workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
 {
 	struct threadlist *tl;
 	struct uthread	*uth;
@@ -975,8 +963,25 @@ workqueue_addnewthread(struct workqueue *wq)
 	void 	 	*sright;
 	mach_vm_offset_t stackaddr;
 
-	if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20))
+	if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20)) {
+		wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 		return (FALSE);
+	}
+	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
+
+	if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+		/*
+		 * if we're not creating this thread to service an overcommit request,
+		 * then check the size of the constrained thread pool...  if we've already
+		 * reached our max for threads scheduled from this pool, don't create a new
+		 * one... the callers of this function are prepared for failure.
+		 */
+		wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+		return (FALSE);
+	}
+	if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
+		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+
 	wq->wq_nthreads++;
 
 	p = wq->wq_proc;
@@ -990,9 +995,7 @@ workqueue_addnewthread(struct workqueue *wq)
 	tl = kalloc(sizeof(struct threadlist));
 	bzero(tl, sizeof(struct threadlist));
 
-#if defined(__ppc__)
-	stackaddr = 0xF0000000;
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	stackaddr = 0xB0000000;
 #else
 #error Need to define a stack address hint for this architecture
@@ -1023,6 +1026,7 @@ workqueue_addnewthread(struct workqueue *wq)
 	}
 	if (kret != KERN_SUCCESS) {
 		(void) thread_terminate(th);
+		thread_deallocate(th);
 
 		kfree(tl, sizeof(struct threadlist));
 		goto failed;
@@ -1043,11 +1047,6 @@ workqueue_addnewthread(struct workqueue *wq)
 	tl->th_priority = WORKQUEUE_NUMPRIOS;
 	tl->th_policy = -1;
 
-#if defined(__ppc__)
-	//ml_fp_setvalid(FALSE);
-	thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p));
-#endif /* __ppc__ */
-
 	uth = get_bsdthread_info(tl->th_thread);
 	uth->uu_threadlist = (void *)tl;
 
@@ -1087,6 +1086,22 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
 	if ((p->p_lflag & P_LREGISTER) == 0)
 		return(EINVAL);
 
+	num_cpus = ml_get_max_cpus();
+
+	if (wq_init_constrained_limit) {
+		uint32_t limit;
+		/*
+		 * set up the limit for the constrained pool
+		 * this is a virtual pool in that we don't
+		 * maintain it on a separate idle and run list
+		 */
+		limit = num_cpus * (WORKQUEUE_NUMPRIOS + 1);
+
+		if (limit > wq_max_constrained_threads)
+			wq_max_constrained_threads = limit;
+
+		wq_init_constrained_limit = 0;
+	}
 	workqueue_lock_spin(p);
 
 	if (p->p_wqptr == NULL) {
@@ -1107,8 +1122,6 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
 
 		workqueue_unlock(p);
 
-	        num_cpus = ml_get_max_cpus();
-
 		wq_size = sizeof(struct workqueue) +
 			(num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
 			(num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
@@ -1153,7 +1166,7 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
 		 * the size for the allocation of the workqueue struct
 		 */
 		nptr += (sizeof(uint64_t) - 1);
-		nptr = (char *)((long)nptr & ~(sizeof(uint64_t) - 1));
+		nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint64_t) - 1));
 
 		for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
 			wq->wq_lastblocked_ts[i] = (uint64_t *)nptr;
@@ -1217,9 +1230,9 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
 			        workqueue_unlock(p);
 			        return (EINVAL);
 			}
-			if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_nthreads < wq->wq_affinity_max))) {
+			if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max))) {
 
-				workqueue_addnewthread(wq);
+				workqueue_addnewthread(wq, oc_item ? TRUE : FALSE);
 
 				if (wq->wq_thidlecount == 0)
 					oc_item = 0;
@@ -1230,20 +1243,6 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
 		        KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0);
 		        }
 			break;
-		case WQOPS_QUEUE_REMOVE: {
-
-			if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
-			        return (EINVAL);
-
-			workqueue_lock_spin(p);
-
-			if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-			        workqueue_unlock(p);
-			        return (EINVAL);
-			}
-		        error = workqueue_removeitem(wq, prio, item);
-			}
-			break;
 		case WQOPS_THREAD_RETURN: {
 
 		        th = current_thread();
@@ -1423,42 +1422,16 @@ workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity
 	return (0);
 }
 
-static int 
-workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
-{
-	struct workitem *witem;
-	struct workitemlist *wl;
-	int error = ESRCH;
-
-	wl = (struct workitemlist *)&wq->wq_list[prio];
-
-	TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
-		if (witem->wi_item == item) {
-			TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
-
-			if (TAILQ_EMPTY(&wl->wl_itemlist))
-				wq->wq_list_bitmap &= ~(1 << prio);
-			wq->wq_itemcount--;
-			
-			witem->wi_item = (user_addr_t)0;
-			witem->wi_affinity = 0;
-			TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
-
-			error = 0;
-			break;
-		}
-	}
-	return (error);
-}
-
 static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
 {
-	2, 0, -2,
+	2, 0, -2, INT_MIN,
 };
 
+#define WORKQ_POLICY_TIMESHARE 1
+
 static int workqueue_policy[WORKQUEUE_NUMPRIOS] = 
 {
-	1, 1, 1,
+	WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE
 };
 
 
@@ -1536,10 +1509,20 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
 		}
 		goto grab_idle_thread;
 	}
-	if (wq->wq_itemcount == 0) {
+	/*
+	 * if we get here, the work should be handled by a constrained thread
+	 */
+	if (wq->wq_itemcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+		/*
+		 * no work to do, or we're already at or over the scheduling limit for
+		 * constrained threads...  just return or park the thread...
+		 * do not start the timer for this condition... if we don't have any work,
+		 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
+		 * constrained threads to return to the kernel before we can dispatch work from our queue
+		 */
 	        if ((th_to_park = thread) == THREAD_NULL)
 		        goto out_of_work;
-	        goto parkit;
+		goto parkit;
 	}
 	for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
 		if (wq->wq_list_bitmap & (1 << priority)) {
@@ -1727,6 +1710,16 @@ pick_up_work:
 		witem->wi_item = (user_addr_t)0;
 		witem->wi_affinity = 0;
 		TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
+
+		if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
+			wq->wq_constrained_threads_scheduled++;
+			tl->th_flags |= TH_LIST_CONSTRAINED;
+		}
+	} else {
+		if (tl->th_flags & TH_LIST_CONSTRAINED) {
+			wq->wq_constrained_threads_scheduled--;
+			tl->th_flags &= ~TH_LIST_CONSTRAINED;
+		}
 	}
 	orig_priority = tl->th_priority;
 	orig_affinity_tag = tl->th_affinity_tag;
@@ -1775,16 +1768,47 @@ pick_up_work:
 		
 		KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0);
 
-		if (tl->th_policy != policy) {
+		if ((orig_priority == WORKQUEUE_BG_PRIOQUEUE) || (priority == WORKQUEUE_BG_PRIOQUEUE)) {
+			struct uthread *ut = NULL;
+
+	        	ut = get_bsdthread_info(th_to_run);
 
+			if (orig_priority == WORKQUEUE_BG_PRIOQUEUE) {
+				/* remove the disk throttle, importance will be reset in anycase */
+#if !CONFIG_EMBEDDED
+				proc_restore_workq_bgthreadpolicy(th_to_run);
+#else /* !CONFIG_EMBEDDED */
+				if ((ut->uu_flag & UT_BACKGROUND) != 0) {
+					ut->uu_flag &= ~UT_BACKGROUND;
+					ut->uu_iopol_disk = IOPOL_NORMAL;
+				}
+#endif /* !CONFIG_EMBEDDED */
+			} 
+
+			if (priority == WORKQUEUE_BG_PRIOQUEUE) {
+#if !CONFIG_EMBEDDED
+			proc_apply_workq_bgthreadpolicy(th_to_run);
+#else /* !CONFIG_EMBEDDED */
+				if ((ut->uu_flag & UT_BACKGROUND) == 0) {
+					/* set diskthrottling */
+					ut->uu_flag |= UT_BACKGROUND;
+					ut->uu_iopol_disk = IOPOL_THROTTLE;
+				}
+#endif /* !CONFIG_EMBEDDED */
+			}
+		}
+
+		if (tl->th_policy != policy) {
 			extinfo.timeshare = policy;
 			(void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
 			tl->th_policy = policy;
 		}
+
                 precedinfo.importance = workqueue_importance[priority];
                 (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
 
+
 		KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq,  priority, policy, 0, 0);
 	}
 	if (kdebug_enable) {
@@ -1858,12 +1882,18 @@ parkit:
 	wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--;
 	wq->wq_threads_scheduled--;
 
+	if (tl->th_flags & TH_LIST_CONSTRAINED) {
+		wq->wq_constrained_threads_scheduled--;
+		wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+		tl->th_flags &= ~TH_LIST_CONSTRAINED;
+	}
 	if (wq->wq_thidlecount < 100)
 		us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
 	else
 		us_to_wait = wq_reduce_pool_window_usecs / 100;
 
 	wq->wq_thidlecount++;
+	wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 
 	assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC);
 
@@ -2080,34 +2110,11 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 	}
 }
 
+
 int
 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
 {
-#if defined(__ppc__)
-	/*
-	 * Set up PowerPC registers...
-	 * internally they are always kept as 64 bit and
-	 * since the register set is the same between 32 and 64bit modes
-	 * we don't need 2 different methods for setting the state
-	 */
-	{
-	        ppc_thread_state64_t state64;
-		ppc_thread_state64_t *ts64 = &state64;
-
-		ts64->srr0 = (uint64_t)p->p_wqthread;
-		ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE);
-		ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
-		ts64->r4 = (uint64_t)(tl->th_thport);
-		ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-		ts64->r6 = (uint64_t)item;
-		ts64->r7 = (uint64_t)reuse_thread;
-		ts64->r8 = (uint64_t)0;
-
-		if ((reuse_thread != 0) && (ts64->r3 == (uint64_t)0))
-			panic("setup_wqthread: setting reuse thread with null pthread\n");
-		thread_set_wq_state64(th, (thread_state_t)ts64);
-	}
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	int isLP64 = 0;
 
 	isLP64 = IS_64BIT_PROCESS(p);
@@ -2183,6 +2190,14 @@ fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
 	pwqinfo->pwq_nthreads = wq->wq_nthreads;
 	pwqinfo->pwq_runthreads = activecount;
 	pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
+	pwqinfo->pwq_state = 0;
+
+	if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT)
+		pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+
+	if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT)
+		pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
+
 out:
 	workqueue_unlock(p);
 	return(error);
@@ -2308,5 +2323,6 @@ pthread_init(void)
 	
 	pth_global_hashinit();
 	psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
+	psynch_zoneinit();
 #endif /* PSYNCH */
 }
diff --git a/bsd/kern/subr_log.c b/bsd/kern/subr_log.c
index d39eccd5d..2cd5c3ac2 100644
--- a/bsd/kern/subr_log.c
+++ b/bsd/kern/subr_log.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -81,6 +81,7 @@
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <kern/kalloc.h>
+#include <pexpert/pexpert.h>
 
 /* XXX should be in a common header somewhere */
 extern void klogwakeup(void);
@@ -92,6 +93,9 @@ extern void logwakeup(void);
 #define LOG_ASYNC	0x04
 #define LOG_RDWAIT	0x08
 
+/* All globals should be accessed under LOG_LOCK() */
+
+/* logsoftc only valid while log_open=1 */
 struct logsoftc {
 	int	sc_state;		/* see above for possibilities */
 	struct	selinfo sc_selp;	/* thread waiting for select */
@@ -99,11 +103,11 @@ struct logsoftc {
 } logsoftc;
 
 int	log_open;			/* also used in log() */
-char smsg_bufc[MSG_BSIZE]; /* static buffer */
-struct msgbuf temp_msgbuf = {0,MSG_BSIZE,0,0,smsg_bufc};
-struct msgbuf *msgbufp;
-static int _logentrypend = 0;
-static int log_inited = 0;
+char smsg_bufc[CONFIG_MSG_BSIZE]; /* static buffer */
+struct msgbuf msgbuf = {MSG_MAGIC,sizeof(smsg_bufc),0,0,smsg_bufc};
+struct msgbuf *msgbufp = &msgbuf;
+static int logentrypend = 0;
+
 /* the following are implemented in osfmk/kern/printf.c  */
 extern void bsd_log_lock(void);
 extern void bsd_log_unlock(void);
@@ -125,6 +129,16 @@ extern d_select_t       logselect;
 #define	LOG_LOCK() bsd_log_lock()
 #define	LOG_UNLOCK() bsd_log_unlock()
 
+#if DEBUG
+#define LOG_SETSIZE_DEBUG(x...) kprintf(x)
+#else
+#define LOG_SETSIZE_DEBUG(x...) do { } while(0)
+#endif
+
+static int sysctl_kern_msgbuf(struct sysctl_oid *oidp,
+				void *arg1,
+				int arg2,
+				struct sysctl_req *req);
 
 /*ARGSUSED*/
 int
@@ -135,21 +149,9 @@ logopen(__unused dev_t dev, __unused int flags, __unused int mode, struct proc *
 		LOG_UNLOCK();
 		return (EBUSY);
 	}
-	log_open = 1;
 	logsoftc.sc_pgid = p->p_pid;		/* signal process only */
-	/*
-	 * Potential race here with putchar() but since putchar should be
-	 * called by autoconf, msg_magic should be initialized by the time
-	 * we get here.
-	 */
-	if (msgbufp->msg_magic != MSG_MAGIC) {
-		register int i;
+	log_open = 1;
 
-		msgbufp->msg_magic = MSG_MAGIC;
-		msgbufp->msg_bufx = msgbufp->msg_bufr = 0;
-		for (i=0; i < MSG_BSIZE; i++)
-			msgbufp->msg_bufc[i] = 0;
-	}
 	LOG_UNLOCK();
 
 	return (0);
@@ -160,9 +162,9 @@ int
 logclose(__unused dev_t dev, __unused int flag, __unused int devtype, __unused struct proc *p)
 {
 	LOG_LOCK();
-	log_open = 0;
 	selwakeup(&logsoftc.sc_selp);
 	selthreadclear(&logsoftc.sc_selp);
+	log_open = 0;
 	LOG_UNLOCK();
 	return (0);
 }
@@ -171,7 +173,7 @@ logclose(__unused dev_t dev, __unused int flag, __unused int devtype, __unused s
 int
 logread(__unused dev_t dev, struct uio *uio, int flag)
 {
-	register long l;
+	int l;
 	int error = 0;
 
 	LOG_LOCK();
@@ -202,20 +204,24 @@ logread(__unused dev_t dev, struct uio *uio, int flag)
 	logsoftc.sc_state &= ~LOG_RDWAIT;
 
 	while (uio_resid(uio) > 0) {
+		int readpos;
+
 		l = msgbufp->msg_bufx - msgbufp->msg_bufr;
 		if (l < 0)
 			l = msgbufp->msg_size - msgbufp->msg_bufr;
 		l = min(l, uio_resid(uio));
 		if (l == 0)
 			break;
+
+		readpos = msgbufp->msg_bufr;
 		LOG_UNLOCK();
-		error = uiomove((caddr_t)&msgbufp->msg_bufc[msgbufp->msg_bufr],
-			(int)l, uio);
+		error = uiomove((caddr_t)&msgbufp->msg_bufc[readpos],
+			l, uio);
 		LOG_LOCK();
 		if (error)
 			break;
-		msgbufp->msg_bufr += l;
-		if (msgbufp->msg_bufr < 0 || msgbufp->msg_bufr >= msgbufp->msg_size)
+		msgbufp->msg_bufr = readpos + l;
+		if (msgbufp->msg_bufr >= msgbufp->msg_size)
 			msgbufp->msg_bufr = 0;
 	}
 out:
@@ -272,9 +278,13 @@ logwakeup(void)
 void
 klogwakeup(void)
 {
-	if (_logentrypend) {
-		_logentrypend = 0;
+	LOG_LOCK();
+	if (logentrypend && log_open) {
+		logentrypend = 0; /* only reset if someone will be reading */
+		LOG_UNLOCK();
 		logwakeup();
+	} else {
+		LOG_UNLOCK();
 	}
 }
 
@@ -282,7 +292,7 @@ klogwakeup(void)
 int
 logioctl(__unused dev_t dev, u_long com, caddr_t data, __unused int flag, __unused struct proc *p)
 {
-	long l;
+	int l;
 
 	LOG_LOCK();
 	switch (com) {
@@ -328,10 +338,7 @@ logioctl(__unused dev_t dev, u_long com, caddr_t data, __unused int flag, __unus
 void
 bsd_log_init(void)
 {
-	if (!log_inited) { 
-		msgbufp = &temp_msgbuf;
-		log_inited = 1;
-	}
+	/* After this point, we must be ready to accept characters */
 }
 
 
@@ -353,24 +360,12 @@ bsd_log_init(void)
 void
 log_putc_locked(char c)
 {
-	register struct msgbuf *mbp;
-
-	if (!log_inited) {
-		panic("bsd log is not inited");
-	}
+	struct msgbuf *mbp;
 
 	mbp = msgbufp; 
-	if (mbp-> msg_magic != MSG_MAGIC) {
-		register int i;
-
-		mbp->msg_magic = MSG_MAGIC;
-		mbp->msg_bufx = mbp->msg_bufr = 0;
-		for (i=0; i < MSG_BSIZE; i++)
-			mbp->msg_bufc[i] = 0;
-	}
 	mbp->msg_bufc[mbp->msg_bufx++] = c;
-	_logentrypend = 1;
-	if (mbp->msg_bufx < 0 || mbp->msg_bufx >= msgbufp->msg_size)
+	logentrypend = 1;
+	if (mbp->msg_bufx >= msgbufp->msg_size)
 		mbp->msg_bufx = 0;
 }
 
@@ -391,9 +386,6 @@ log_putc_locked(char c)
 void
 log_putc(char c)
 {
-	if (!log_inited) {
-		panic("bsd log is not inited");
-	}
 	LOG_LOCK();
 	log_putc_locked(c);
 	LOG_UNLOCK();
@@ -406,59 +398,143 @@ log_putc(char c)
  * to the kernel command line, and to read the current size using
  *   sysctl kern.msgbuf
  * If there is no parameter on the kernel command line, the buffer is
- * allocated statically and is MSG_BSIZE characters in size, otherwise
- * memory is dynamically allocated.
- * This function may only be called once, during kernel initialization.
- * Memory management must already be up. The buffer must not have
- * overflown yet.
+ * allocated statically and is CONFIG_MSG_BSIZE characters in size, otherwise
+ * memory is dynamically allocated. Memory management must already be up.
  */
-void
-log_setsize(long size) {
+int
+log_setsize(int size) {
 	char *new_logdata;
-	if (msgbufp->msg_size!=MSG_BSIZE) {
-		printf("log_setsize: attempt to change size more than once\n");
-		return;
-	}
-	if (size==MSG_BSIZE)
-		return;
-	if (size<MSG_BSIZE) { /* we don't support reducing the log size */
-		printf("log_setsize: can't decrease log size\n");
-		return;
-	}
+	int new_logsize, new_bufr, new_bufx;
+	char *old_logdata;
+	int old_logsize, old_bufr, old_bufx;
+	int i, count;
+	char *p, ch;
+
+	if (size > MAX_MSG_BSIZE)
+		return (EINVAL);
+
+	if (size <= 0)
+		return (EINVAL);
+
+	new_logsize = size;
 	if (!(new_logdata = (char*)kalloc(size))) {
 		printf("log_setsize: unable to allocate memory\n");
-		return;
+		return (ENOMEM);
 	}
+	bzero(new_logdata, new_logsize);
+
 	LOG_LOCK();
-	bcopy(smsg_bufc, new_logdata, MSG_BSIZE);
-	bzero(new_logdata+MSG_BSIZE, size - MSG_BSIZE);
+
+	old_logsize = msgbufp->msg_size;
+	old_logdata = msgbufp->msg_bufc;
+	old_bufr = msgbufp->msg_bufr;
+	old_bufx = msgbufp->msg_bufx;
+
+	LOG_SETSIZE_DEBUG("log_setsize(%d): old_logdata %p old_logsize %d old_bufr %d old_bufx %d\n",
+					  size, old_logdata, old_logsize, old_bufr, old_bufx);
+
+	/* start "new_logsize" bytes before the write pointer */
+	if (new_logsize <= old_bufx) {
+		count = new_logsize;
+		p = old_logdata + old_bufx - count;
+	} else {
+		/*
+		 * if new buffer is bigger, copy what we have and let the
+		 * bzero above handle the difference
+		 */
+		count = MIN(new_logsize, old_logsize);
+		p = old_logdata + old_logsize - (count - old_bufx);
+	}
+	for (i = 0; i < count; i++) {
+		if (p >= old_logdata + old_logsize)
+			p = old_logdata;
+
+		ch = *p++;
+		new_logdata[i] = ch;
+	}
+
+	new_bufx = i;
+	if (new_bufx >= new_logsize)
+		new_bufx = 0;
+	msgbufp->msg_bufx = new_bufx;
+
+	new_bufr = old_bufx - old_bufr; /* how much were we trailing bufx by? */
+	if (new_bufr < 0)
+		new_bufr += old_logsize;
+	new_bufr = new_bufx - new_bufr; /* now relative to oldest data in new buffer */
+	if (new_bufr < 0)
+		new_bufr += new_logsize;
+	msgbufp->msg_bufr = new_bufr;
+
+	msgbufp->msg_size = new_logsize;
+	msgbufp->msg_bufc = new_logdata;
+
+	LOG_SETSIZE_DEBUG("log_setsize(%d): new_logdata %p new_logsize %d new_bufr %d new_bufx %d\n",
+					  size, new_logdata, new_logsize, new_bufr, new_bufx);
+
+	LOG_UNLOCK();
+
 	/* this memory is now dead - clear it so that it compresses better
 	   in case of suspend to disk etc. */
-	bzero(smsg_bufc, MSG_BSIZE);
-	msgbufp->msg_size = size;
-	msgbufp->msg_bufc = new_logdata;
+	bzero(old_logdata, old_logsize);
+	if (old_logdata != smsg_bufc) {
+		/* dynamic memory that must be freed */
+		kfree(old_logdata, old_logsize);
+	}
+
+	printf("set system log size to %d bytes\n", new_logsize);
+
+	return 0;
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, msgbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_kern_msgbuf, "I", "");
+
+static int sysctl_kern_msgbuf(struct sysctl_oid *oidp __unused,
+							  void *arg1 __unused,
+							  int arg2 __unused,
+							  struct sysctl_req *req)
+{
+	int old_bufsize, bufsize;
+	int error;
+
+	LOG_LOCK();
+	old_bufsize = bufsize = msgbufp->msg_size;
 	LOG_UNLOCK();
-	printf("set system log size to %ld bytes\n", msgbufp->msg_size);
+
+	error = sysctl_io_number(req, bufsize, sizeof(bufsize), &bufsize, NULL);
+	if (error)
+		return (error);
+
+	if (bufsize != old_bufsize) {
+		error = log_setsize(bufsize);
+	}
+
+	return (error);
 }
 
-SYSCTL_LONG(_kern, OID_AUTO, msgbuf, CTLFLAG_RD, &temp_msgbuf.msg_size, "");
 
 /*
- * This should be called by single user mode /sbin/dmesg only.
+ * This should be called by /sbin/dmesg only via libproc.
  * It returns as much data still in the buffer as possible.
  */
 int
 log_dmesg(user_addr_t buffer, uint32_t buffersize, int32_t * retval) {
 	uint32_t i;
-	uint32_t localbuff_size = (msgbufp->msg_size + 2);
+	uint32_t localbuff_size;
 	int error = 0, newl, skip;
 	char *localbuff, *p, *copystart, ch;
-	long copysize;	
+	size_t copysize;
 
+	LOG_LOCK();
+	localbuff_size = (msgbufp->msg_size + 2); /* + '\n' + '\0' */
+	LOG_UNLOCK();
+
+	/* Allocate a temporary non-circular buffer for copyout */
 	if (!(localbuff = (char *)kalloc(localbuff_size))) {
 		printf("log_dmesg: unable to allocate memory\n");
 		return (ENOMEM);
 	}
+
 	/* in between here, the log could become bigger, but that's fine */
 	LOG_LOCK();
 
@@ -483,7 +559,7 @@ log_dmesg(user_addr_t buffer, uint32_t buffersize, int32_t * retval) {
 		}
 		if (ch == '\0')
 			continue;
-		newl = ch == '\n';
+		newl = (ch == '\n');
 		localbuff[i++] = ch;
 		/* The original version of this routine contained a buffer
 		 * overflow. At the time, a "small" targeted fix was desired
diff --git a/bsd/kern/subr_prof.c b/bsd/kern/subr_prof.c
index 5b1024141..4d07853d9 100644
--- a/bsd/kern/subr_prof.c
+++ b/bsd/kern/subr_prof.c
@@ -152,14 +152,28 @@ kmstartup(void)
 }
 
 /*
- * Return kernel profiling information.
+ * XXX		These should be broken out into per-argument OID values,
+ * XXX		since there are no sub-OID parameter values, but unfortunately
+ * XXX		there is barely enough time for an initial conversion.
+ *
+ * Note:	These items appear to be read/write.
  */
-int
+STATIC int
+sysctl_doprofhandle SYSCTL_HANDLER_ARGS
+{
 sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
               user_addr_t newp, size_t newlen)
 {
+	__unused int cmd = oidp->oid_arg2;	/* subcommand*/
+	int *name = arg1;		/* oid element argument vector */
+	int namelen = arg2;		/* number of oid element arguments */
+	user_addr_t oldp = req->oldptr;	/* user buffer copy out address */
+	size_t *oldlenp = req->oldlen;	/* user buffer copy out size */
+	user_addr_t newp = req->newptr;	/* user buffer copy in address */
+	size_t newlen = req->newlen;	/* user buffer copy in size */
+
 	struct gmonparam *gp = &_gmonparam;
-	int error;
+	int error = 0;
 
 	/* all sysctl names at this level are terminal */
 	if (namelen != 1)
@@ -169,28 +183,44 @@ sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 	case GPROF_STATE:
 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
 		if (error)
-			return (error);
+			break;
 		if (gp->state == GMON_PROF_OFF)
 			stopprofclock(kernproc);
 		else
 			startprofclock(kernproc);
-		return (0);
+		break;
 	case GPROF_COUNT:
-		return (sysctl_struct(oldp, oldlenp, newp, newlen, 
-		                      gp->kcount, gp->kcountsize));
+		error = sysctl_struct(oldp, oldlenp, newp, newlen, 
+		                      gp->kcount, gp->kcountsize);
+		break;
 	case GPROF_FROMS:
-		return (sysctl_struct(oldp, oldlenp, newp, newlen,
-		                      gp->froms, gp->fromssize));
+		error = sysctl_struct(oldp, oldlenp, newp, newlen,
+		                      gp->froms, gp->fromssize);
+		break;
 	case GPROF_TOS:
-		return (sysctl_struct(oldp, oldlenp, newp, newlen,
-		                      gp->tos, gp->tossize));
+		error = sysctl_struct(oldp, oldlenp, newp, newlen,
+		                      gp->tos, gp->tossize);
+		break;
 	case GPROF_GMONPARAM:
-		return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
+		error = sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp);
+		break;
 	default:
-		return (ENOTSUP);
+		error = ENOTSUP;
+		break;
 	}
-	/* NOTREACHED */
+
+	/* adjust index so we return the right required/consumed amount */
+	if (!error)
+		req->oldidx += req->oldlen;
+
+	return(error);
 }
+SYSCTL_PROC(_kern, KERN_PROF, prof, STLFLAG_NODE|CTLFLAG_RW | CTLFLAG_LOCKED,
+	0,			/* Pointer argument (arg1) */
+	0,			/* Integer argument (arg2) */
+	sysctl_doprofhandle,	/* Handler function */
+	NULL,			/* No explicit data */
+	"");
 
 
 /*
diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c
index 11a276bbd..bacd02b79 100644
--- a/bsd/kern/sys_generic.c
+++ b/bsd/kern/sys_generic.c
@@ -152,6 +152,21 @@ __private_extern__ int	dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 __private_extern__ int	preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
 __private_extern__ void	donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
 
+
+/* Conflict wait queue for when selects collide (opaque type) */
+struct wait_queue select_conflict_queue;
+
+/*
+ * Init routine called from bsd_init.c
+ */
+void select_wait_queue_init(void);
+void
+select_wait_queue_init(void)
+{
+	wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
+}
+
+
 #if NETAT
 extern int appletalk_inited;
 #endif /* NETAT */
@@ -570,7 +585,8 @@ dofilewrite(vfs_context_t ctx, struct fileproc *fp,
 			error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* The socket layer handles SIGPIPE */
-		if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
+		if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+		    (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
 			/* XXX Raise the signal on the thread? */
 			psignal(vfs_context_proc(ctx), SIGPIPE);
 		}
@@ -662,13 +678,14 @@ wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
 						error == EINTR || error == EWOULDBLOCK))
 		        error = 0;
 		/* The socket layer handles SIGPIPE */
-		if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
+		if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
+		    (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
 		        psignal(p, SIGPIPE);
 	}
 	*retval = count - uio_resid(uio);
 
 out:
-	if ( (error == 0) )
+	if (error == 0)
 	        fp_drop_written(p, fdes, fp);
 	else
 	        fp_drop(p, fdes, fp, 0);
@@ -937,8 +954,8 @@ extern int selcontinue(int error);
 extern int selprocess(int error, int sel_pass);
 static int selscan(struct proc *p, struct _select * sel,
 			int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
-static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
-			int nfd, int * count, int *kfcount);
+static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
+static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
 
 /*
@@ -966,7 +983,6 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
 	struct _select *sel;
 	int needzerofill = 1;
 	int count = 0;
-	int kfcount = 0;
 
 	th_act = current_thread();
 	uth = get_bsdthread_info(th_act);
@@ -1070,13 +1086,11 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
 	else
 		sel->abstime = 0;
 
-	sel->kfcount = 0;
-	if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
+	if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
 			goto continuation;
 	}
 
 	sel->count = count;
-	sel->kfcount = kfcount;
 	size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
 	if (uth->uu_allocsize) {
 		if (uth->uu_wqset == 0)
@@ -1090,7 +1104,6 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
 				panic("failed to allocate memory for waitqueue\n");
 		}
 	} else {
-		sel->count = count;
 		uth->uu_allocsize = size;
 		uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
 		if (uth->uu_wqset == (wait_queue_set_t)NULL)
@@ -1101,7 +1114,18 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva
 	wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
 
 continuation:
-	return selprocess(error, SEL_FIRSTPASS);
+
+	if (error) {
+		/*
+		 * We have already cleaned up any state we established,
+		 * either locally or as a result of selcount().  We don't
+		 * need to wait_subqueue_unlink_all(), since we haven't set
+		 * anything at this point.
+		 */
+		return (error);
+	}
+
+	return selprocess(0, SEL_FIRSTPASS);
 }
 
 int
@@ -1110,6 +1134,13 @@ selcontinue(int error)
 	return selprocess(error, SEL_SECONDPASS);
 }
 
+
+/*
+ * selprocess
+ *
+ * Parameters:	error			The error code from our caller
+ *		sel_pass		The pass we are on
+ */
 int
 selprocess(int error, int sel_pass)
 {
@@ -1134,20 +1165,24 @@ selprocess(int error, int sel_pass)
 	uth = get_bsdthread_info(th_act);
 	sel = &uth->uu_select;
 
-	/* if it is first pass wait queue is not setup yet */
 	if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
 			unwind = 0;
 	if (sel->count == 0)
 			unwind = 0;
 retry:
 	if (error != 0) {
-	  goto done;
+		sel_pass = SEL_FIRSTPASS;	/* Reset for seldrop */
+		goto done;
 	}
 
 	ncoll = nselcoll;
 	OSBitOrAtomic(P_SELECT, &p->p_flag);
 	/* skip scans if the select is just for timeouts */
 	if (sel->count) {
+		/*
+		 * Clear out any dangling refs from prior calls; technically
+		 * there should not be any.
+		 */
 		if (sel_pass == SEL_FIRSTPASS)
 			wait_queue_sub_clearrefs(uth->uu_wqset);
 
@@ -1215,10 +1250,10 @@ retry:
 		error = 0;
 	}
 
-	sel_pass = SEL_SECONDPASS;
 	if (error == 0) {
+		sel_pass = SEL_SECONDPASS;
 		if (!prepost)
-			somewakeup =1;
+			somewakeup = 1;
 		goto retry;
 	}
 done:
@@ -1253,6 +1288,23 @@ done:
 	return(error);
 }
 
+
+/*
+ * selscan
+ *
+ * Parameters:	p			Process performing the select
+ *		sel			The per-thread select context structure
+ *		nfd			The number of file descriptors to scan
+ *		retval			The per thread system call return area
+ *		sel_pass		Which pass this is; allowed values are
+ *						SEL_FIRSTPASS and SEL_SECONDPASS
+ *		wqsub			The per thread wait queue set
+ *
+ * Returns:	0			Success
+ *		EIO			Invalid p->p_fd field XXX Obsolete?
+ *		EBADF			One of the files in the bit vector is
+ *						invalid.
+ */
 static int
 selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
 	int sel_pass, wait_queue_sub_t wqsub)
@@ -1261,16 +1313,15 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
 	int msk, i, j, fd;
 	u_int32_t bits;
 	struct fileproc *fp;
-	int n = 0;
-	int nc = 0;
+	int n = 0;		/* count of bits */
+	int nc = 0;		/* bit vector offset (nc'th bit) */
 	static int flag[3] = { FREAD, FWRITE, 0 };
 	u_int32_t *iptr, *optr;
 	u_int nw;
 	u_int32_t *ibits, *obits;
 	char * wql;
 	char * wql_ptr;
-	int count, kfcount;
-	vnode_t vp;
+	int count;
 	struct vfs_context context = *vfs_context_current();
 
 	/*
@@ -1288,57 +1339,9 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
 	nw = howmany(nfd, NFDBITS);
 
 	count = sel->count;
-	kfcount = sel->kfcount;
-
-	if (kfcount > count)
-		panic("selscan: count < kfcount");
-
-	if (kfcount != 0) {
-		proc_fdlock(p);
-		for (msk = 0; msk < 3; msk++) {
-			iptr = (u_int32_t *)&ibits[msk * nw];
-			optr = (u_int32_t *)&obits[msk * nw];
-
-			for (i = 0; i < nfd; i += NFDBITS) {
-				bits = iptr[i/NFDBITS];
-
-				while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
-					bits &= ~(1 << j);
-					fp = fdp->fd_ofiles[fd];
-
-					if (fp == NULL ||
-						(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
-						proc_fdunlock(p);
-						return(EBADF);
-					}
-					if (sel_pass == SEL_SECONDPASS) {
-						wql_ptr = (char *)0;
-						fp->f_flags &= ~FP_INSELECT;
-						fp->f_waddr = (void *)0;
-					} else {
-					        wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
-						fp->f_flags |= FP_INSELECT;
-						fp->f_waddr = (void *)wqsub;
-					}
-
-					context.vc_ucred = fp->f_cred;
-
-					if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
-							&& ((vp = (struct vnode *)fp->f_data)  != NULLVP)
-							&& (vp->v_type == VCHR)
-						&& fo_select(fp, flag[msk], wql_ptr, &context)) {
-						optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
-						n++;
-					}
-					nc++;
-				}
-			}
-		}
-		proc_fdunlock(p);
-	}
 
 	nc = 0;
-	if (kfcount != count) {
+	if (count) {
 		proc_fdlock(p);
 		for (msk = 0; msk < 3; msk++) {
 			iptr = (u_int32_t *)&ibits[msk * nw];
@@ -1351,29 +1354,37 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
 					bits &= ~(1 << j);
 					fp = fdp->fd_ofiles[fd];
 
-					if (fp == NULL ||
-						(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+					if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
+						/*
+						 * If we abort because of a bad
+						 * fd, let the caller unwind...
+						 */
 						proc_fdunlock(p);
 						return(EBADF);
 					}
 					if (sel_pass == SEL_SECONDPASS) {
 						wql_ptr = (char *)0;
-						fp->f_flags &= ~FP_INSELECT;
-						fp->f_waddr = (void *)0;
+						if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
+							fp->f_flags &= ~FP_INSELECT;
+							fp->f_waddr = (void *)0;
+						}
 					} else {
 					        wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
-						fp->f_flags |= FP_INSELECT;
-						fp->f_waddr = (void *)wqsub;
+						if (fp->f_flags & FP_INSELECT) {
+							/* someone is already in select on this fp */
+							fp->f_flags |= FP_SELCONFLICT;
+							wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
+						} else {
+							fp->f_flags |= FP_INSELECT;
+							fp->f_waddr = (void *)wqsub;
+						}
 					}
 
 					context.vc_ucred = fp->f_cred;
 
-					if ((fp->f_ops && 
-						((fp->f_type != DTYPE_VNODE)
-						|| (((vp = (struct vnode *)fp->f_data)  != NULLVP)
-							&& (vp->v_type != VCHR))
-						)
-						&& fo_select(fp, flag[msk], wql_ptr, &context))) {
+					/* The select; set the bit, if true */
+					if (fp->f_ops
+						&& fo_select(fp, flag[msk], wql_ptr, &context)) {
 						optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
 						n++;
 					}
@@ -1476,9 +1487,9 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
 		/* convert the poll event into a kqueue kevent */
 		kev.ident = fds[i].fd;
 		kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
-		kev.fflags = NOTE_LOWAT;
-		kev.data = 1; /* efficiency be damned: any data should trigger */
 		kev.udata = CAST_USER_ADDR_T(&fds[i]);
+		kev.fflags = 0;
+		kev.data = 0;
 		kev.ext[0] = 0;
 		kev.ext[1] = 0;
 
@@ -1608,9 +1619,32 @@ seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
 	return (1);
 }
 
+/*
+ * selcount
+ *
+ * Count the number of bits set in the input bit vector, and establish an
+ * outstanding fp->f_iocount for each of the descriptors which will be in
+ * use in the select operation.
+ *
+ * Parameters:	p			The process doing the select
+ *		ibits			The input bit vector
+ *		nfd			The number of fd's in the vector
+ *		countp			Pointer to where to store the bit count
+ *
+ * Returns:	0			Success
+ *		EIO			Bad per process open file table
+ *		EBADF			One of the bits in the input bit vector
+ *						references an invalid fd
+ *
+ * Implicit:	*countp (modified)	Count of fd's
+ *
+ * Notes:	This function is the first pass under the proc_fdlock() that
+ *		permits us to recognize invalid descriptors in the bit vector;
+ *		the may, however, not remain valid through the drop and
+ *		later reacquisition of the proc_fdlock().
+ */
 static int
-selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits, 
-		 int nfd, int *countp, int * kfcountp)
+selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
 {
 	struct filedesc *fdp = p->p_fd;
 	int msk, i, j, fd;
@@ -1620,9 +1654,8 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
 	u_int32_t *iptr;
 	u_int nw;
 	int error=0; 
-	int kfc = 0;
 	int dropcount;
-	vnode_t vp;
+	int need_wakeup = 0;
 
 	/*
 	 * Problems when reboot; due to MacOSX signal probs
@@ -1630,7 +1663,6 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
 	 */
 	if (fdp == NULL) {
 		*countp = 0;
-		*kfcountp = 0;
 		return(EIO);
 	}
 	nw = howmany(nfd, NFDBITS);
@@ -1646,16 +1678,10 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
 				if (fp == NULL ||
 					(fdp->fd_ofileflags[fd] & UF_RESERVED)) {
 						*countp = 0;
-						*kfcountp = 0;
 						error = EBADF;
 						goto bad;
 				}
 				fp->f_iocount++;
-				if ((fp->f_type == DTYPE_VNODE)
-						&& ((vp = (struct vnode *)fp->f_data)  != NULLVP)
-						&& (vp->v_type == VCHR) )
-					kfc++;
-
 				n++;
 			}
 		}
@@ -1663,48 +1689,64 @@ selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
 	proc_fdunlock(p);
 
 	*countp = n;
-	*kfcountp = kfc;
 	return (0);
+
 bad:
 	dropcount = 0;
 	
 	if (n== 0)
 		goto out;
-	/* undo the iocounts */
-	for (msk = 0; msk < 3; msk++) {
-		iptr = (u_int32_t *)&ibits[msk * nw];
-		for (i = 0; i < nfd; i += NFDBITS) {
-			bits = iptr[i/NFDBITS];
-			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
-				bits &= ~(1 << j);
-				fp = fdp->fd_ofiles[fd];
-				if (dropcount >= n)
-					goto out;
-				fp->f_iocount--;
+	/* Ignore error return; it's already EBADF */
+	(void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
 
-				if (p->p_fpdrainwait && fp->f_iocount == 0) {
-				        p->p_fpdrainwait = 0;
-					wakeup(&p->p_fpdrainwait);
-				}
-				dropcount++;
-			}
-		}
-	}
 out:
 	proc_fdunlock(p);
+	if (need_wakeup) {
+		wakeup(&p->p_fpdrainwait);
+	}
 	return(error);
 }
 
+
+/*
+ * seldrop_locked
+ *
+ * Drop outstanding wait queue references set up during selscan(); drop the
+ * outstanding per fileproc f_iocount() picked up during the selcount().
+ *
+ * Parameters:	p			Process performing the select
+ *		ibits			Input pit bector of fd's
+ *		nfd			Number of fd's
+ *		lim			Limit to number of vector entries to
+ *						consider, or -1 for "all"
+ *		inselect		True if
+ *		need_wakeup		Pointer to flag to set to do a wakeup
+ *					if f_iocont on any descriptor goes to 0
+ *
+ * Returns:	0			Success
+ *		EBADF			One or more fds in the bit vector
+ *						were invalid, but the rest
+ *						were successfully dropped
+ *
+ * Notes:	An fd make become bad while the proc_fdlock() is not held,
+ *		if a multithreaded application closes the fd out from under
+ *		the in progress select.  In this case, we still have to
+ *		clean up after the set up on the remaining fds.
+ */
 static int
-seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
 {
 	struct filedesc *fdp = p->p_fd;
 	int msk, i, j, fd;
 	u_int32_t bits;
 	struct fileproc *fp;
-	int n = 0;
 	u_int32_t *iptr;
 	u_int nw;
+	int error = 0;
+	int dropcount = 0;
+	uthread_t uth = get_bsdthread_info(current_thread());
+
+	*need_wakeup = 0;
 
 	/*
 	 * Problems when reboot; due to MacOSX signal probs
@@ -1716,8 +1758,6 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd)
 
 	nw = howmany(nfd, NFDBITS);
 
-
-	proc_fdlock(p);
 	for (msk = 0; msk < 3; msk++) {
 		iptr = (u_int32_t *)&ibits[msk * nw];
 		for (i = 0; i < nfd; i += NFDBITS) {
@@ -1725,28 +1765,67 @@ seldrop(struct proc *p, u_int32_t *ibits, int nfd)
 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
 				bits &= ~(1 << j);
 				fp = fdp->fd_ofiles[fd];
-				if (fp == NULL 
-#if 0
-			/* if you are here then it is being closed */
-					|| (fdp->fd_ofileflags[fd] & UF_RESERVED)
-#endif
-					) {
-						proc_fdunlock(p);
-						return(EBADF);
+				/*
+				 * If we've already dropped as many as were
+				 * counted/scanned, then we are done.  
+				 */
+				if ((fromselcount != 0) && (++dropcount > lim))
+					goto done;
+
+				if (fp == NULL) {
+					/* skip (now) bad fds */
+					error = EBADF;
+					continue;
+				}
+				/*
+				 * Only clear the flag if we set it.  We'll
+				 * only find that we set it if we had made
+				 * at least one [partial] pass through selscan().
+				 */
+				if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
+					fp->f_flags &= ~FP_INSELECT;
+					fp->f_waddr = (void *)0;
 				}
-				n++;
-				fp->f_iocount--;
-				fp->f_flags &= ~FP_INSELECT;
 
-				if (p->p_fpdrainwait && fp->f_iocount == 0) {
-				        p->p_fpdrainwait = 0;
-					wakeup(&p->p_fpdrainwait);
+				fp->f_iocount--;
+				if (fp->f_iocount < 0)
+					panic("f_iocount overdecrement!");
+
+				if (fp->f_iocount == 0) {
+					/*
+					 * The last iocount is responsible for clearing
+					 * selconfict flag - even if we didn't set it -
+					 * and is also responsible for waking up anyone
+					 * waiting on iocounts to drain.
+					 */
+					if (fp->f_flags & FP_SELCONFLICT)
+						fp->f_flags &= ~FP_SELCONFLICT;
+					if (p->p_fpdrainwait) {
+						p->p_fpdrainwait = 0;
+						*need_wakeup = 1;
+					}
 				}
 			}
 		}
 	}
+done:
+	return (error);
+}
+
+
+static int
+seldrop(struct proc *p, u_int32_t *ibits, int nfd)
+{
+	int error;
+	int need_wakeup = 0;
+
+	proc_fdlock(p);
+	error =  seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
 	proc_fdunlock(p);
-	return (0);
+	if (need_wakeup) {
+		wakeup(&p->p_fpdrainwait);
+	}
+	return (error);
 }
 
 /*
@@ -1760,12 +1839,8 @@ selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
 
 	/* need to look at collisions */
 
-	if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
-		return;
-	}
-
 	/*do not record if this is second pass of select */
-	if((p_wql == (void *)0)) {
+	if(p_wql == (void *)0) {
 		return;
 	}
 
diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c
index f86ad5a11..c374ea07e 100644
--- a/bsd/kern/sys_pipe.c
+++ b/bsd/kern/sys_pipe.c
@@ -231,17 +231,17 @@ int maxpipekva = 1024 * 1024 * 16;
 #if PIPE_SYSCTLS
 SYSCTL_DECL(_kern_ipc);
 
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
 	   &maxpipekva, 0, "Pipe KVA limit");
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED,
 	   &maxpipekvawired, 0, "Pipe KVA wired limit");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED,
 	   &amountpipes, 0, "Current # of pipes");
-SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED,
 	   &nbigpipe, 0, "Current # of big pipes");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED,
 	   &amountpipekva, 0, "Pipe KVA usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED,
 	   &amountpipekvawired, 0, "Pipe wired KVA usage");
 #endif
 
@@ -1332,6 +1332,16 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
 				error = EAGAIN;
 				break;
 			}
+
+			/*
+			 * If read side wants to go away, we just issue a signal
+			 * to ourselves.
+			 */
+			if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
+				error = EPIPE;
+				break;
+			}	
+
 			/*
 			 * We have no more space and have something to offer,
 			 * wake up select/poll.
@@ -1344,14 +1354,6 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags,
 
 			if (error != 0)
 				break;
-			/*
-			 * If read side wants to go away, we just issue a signal
-			 * to ourselves.
-			 */
-			if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) {
-				error = EPIPE;
-				break;
-			}	
 		}
 	}
 	--wpipe->pipe_busy;
@@ -1741,8 +1743,14 @@ filt_piperead(struct knote *kn, long hint)
 		kn->kn_flags |= EV_EOF;
 		retval = 1;
 	} else {
-		retval = (kn->kn_sfflags & NOTE_LOWAT) ?
-		         (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0);
+		int64_t lowwat = 1;
+		if (kn->kn_sfflags & NOTE_LOWAT) {
+			if (rpipe->pipe_buffer.size && kn->kn_sdata > rpipe->pipe_buffer.size)
+				lowwat = rpipe->pipe_buffer.size;
+			else if (kn->kn_sdata > lowwat)
+				lowwat = kn->kn_sdata;
+		}
+		retval = kn->kn_data >= lowwat;
 	}
 
 	if (hint == 0)
@@ -1779,17 +1787,24 @@ filt_pipewrite(struct knote *kn, long hint)
 	}
 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 	if (!kn->kn_data && wpipe->pipe_buffer.size == 0)
-		kn->kn_data = 1; /* unwritten pipe is ready for write */
+		kn->kn_data = PIPE_BUF; /* unwritten pipe is ready for write */
 
 #ifndef PIPE_NODIRECT
 	if (wpipe->pipe_state & PIPE_DIRECTW)
 		kn->kn_data = 0;
 #endif
+	int64_t lowwat = PIPE_BUF;
+	if (kn->kn_sfflags & NOTE_LOWAT) {
+		if (wpipe->pipe_buffer.size && kn->kn_sdata > wpipe->pipe_buffer.size)
+			lowwat = wpipe->pipe_buffer.size;
+		else if (kn->kn_sdata > lowwat)
+			lowwat = kn->kn_sdata;
+	}
+	
 	if (hint == 0)
 	        PIPE_UNLOCK(rpipe);
 
-	return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
-	                         kn->kn_sdata : PIPE_BUF));
+	return (kn->kn_data >= lowwat);
 }
 
 int
diff --git a/bsd/kern/sys_socket.c b/bsd/kern/sys_socket.c
index 471cac76a..431e47658 100644
--- a/bsd/kern/sys_socket.c
+++ b/bsd/kern/sys_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -194,27 +194,7 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 
 	/* Call the socket filter's ioctl handler for most ioctls */
 	if (IOCGROUP(cmd) != 'i' && IOCGROUP(cmd) != 'r') {
-		int filtered = 0;
-		struct socket_filter_entry *filter;
-
-		for (filter = so->so_filt; filter && error == 0;
-		    filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_ioctl) {
-				if (filtered == 0) {
-					sflt_use(so);
-					socket_unlock(so, 0);
-					filtered = 1;
-				}
-				error = filter->sfe_filter->sf_filter.
-				    sf_ioctl(filter->sfe_cookie, so, cmd, data);
-			}
-		}
-
-		if (filtered) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
-
+		error = sflt_ioctl(so, cmd, data);
 		if (error != 0)
 			goto out;
 	}
@@ -462,7 +442,7 @@ soo_stat(struct socket *so, void *ub, int isstat64)
 			sb64->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 		sb64->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 		sb64->st_uid = so->so_uid;
-		sb64->st_gid = -1;	/* XXX -- what else to do? */
+		sb64->st_gid = so->so_gid;
 	} else {
 		sb->st_mode = S_IFSOCK;
 		if ((so->so_state & SS_CANTRCVMORE) == 0 ||
@@ -472,7 +452,7 @@ soo_stat(struct socket *so, void *ub, int isstat64)
 			sb->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 		sb->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 		sb->st_uid = so->so_uid;
-		sb->st_gid = -1;	/* XXX -- what else to do? */
+		sb->st_gid = so->so_gid;
 	}
 
 	ret = (*so->so_proto->pr_usrreqs->pru_sense)(so, ub, isstat64);
diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master
index 00fb84082..009dd377b 100644
--- a/bsd/kern/syscalls.master
+++ b/bsd/kern/syscalls.master
@@ -39,21 +39,21 @@
 #include <sys/sysproto.h>
 
 0	AUE_NULL	ALL	{ int nosys(void); }   { indirect syscall }
-1	AUE_EXIT	ALL	{ void exit(int rval); } 
-2	AUE_FORK	ALL	{ int fork(void); } 
+1	AUE_EXIT	ALL	{ void exit(int rval) NO_SYSCALL_STUB; } 
+2	AUE_FORK	ALL	{ int fork(void) NO_SYSCALL_STUB; } 
 3	AUE_NULL	ALL	{ user_ssize_t read(int fd, user_addr_t cbuf, user_size_t nbyte); } 
 4	AUE_NULL	ALL	{ user_ssize_t write(int fd, user_addr_t cbuf, user_size_t nbyte); } 
-5	AUE_OPEN_RWTC	ALL	{ int open(user_addr_t path, int flags, int mode); } 
+5	AUE_OPEN_RWTC	ALL	{ int open(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } 
 6	AUE_CLOSE	ALL	{ int close(int fd); } 
-7	AUE_WAIT4	ALL	{ int wait4(int pid, user_addr_t status, int options, user_addr_t rusage); } 
+7	AUE_WAIT4	ALL	{ int wait4(int pid, user_addr_t status, int options, user_addr_t rusage) NO_SYSCALL_STUB; } 
 8	AUE_NULL	ALL	{ int nosys(void); }   { old creat }
 9	AUE_LINK	ALL	{ int link(user_addr_t path, user_addr_t link); } 
-10	AUE_UNLINK	ALL	{ int unlink(user_addr_t path); } 
+10	AUE_UNLINK	ALL	{ int unlink(user_addr_t path) NO_SYSCALL_STUB; } 
 11	AUE_NULL	ALL	{ int nosys(void); }   { old execv }
 12	AUE_CHDIR	ALL	{ int chdir(user_addr_t path); } 
 13	AUE_FCHDIR	ALL	{ int fchdir(int fd); } 
 14	AUE_MKNOD	ALL	{ int mknod(user_addr_t path, int mode, int dev); } 
-15	AUE_CHMOD	ALL	{ int chmod(user_addr_t path, int mode); } 
+15	AUE_CHMOD	ALL	{ int chmod(user_addr_t path, int mode) NO_SYSCALL_STUB; } 
 16	AUE_CHOWN	ALL	{ int chown(user_addr_t path, int uid, int gid); } 
 17	AUE_NULL	ALL	{ int nosys(void); }   { old break }
 18	AUE_GETFSSTAT	ALL	{ int getfsstat(user_addr_t buf, int bufsize, int flags); } 
@@ -66,12 +66,12 @@
 25	AUE_GETEUID	ALL	{ int geteuid(void); } 
 26	AUE_PTRACE	ALL	{ int ptrace(int req, pid_t pid, caddr_t addr, int data); } 
 #if SOCKETS
-27	AUE_RECVMSG	ALL	{ int recvmsg(int s, struct msghdr *msg, int flags); } 
-28	AUE_SENDMSG	ALL	{ int sendmsg(int s, caddr_t msg, int flags); } 
-29	AUE_RECVFROM	ALL	{ int recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr); } 
-30	AUE_ACCEPT	ALL	{ int accept(int s, caddr_t name, socklen_t	*anamelen); } 
-31	AUE_GETPEERNAME	ALL	{ int getpeername(int fdes, caddr_t asa, socklen_t *alen); } 
-32	AUE_GETSOCKNAME	ALL	{ int getsockname(int fdes, caddr_t asa, socklen_t *alen); } 
+27	AUE_RECVMSG	ALL	{ int recvmsg(int s, struct msghdr *msg, int flags) NO_SYSCALL_STUB; } 
+28	AUE_SENDMSG	ALL	{ int sendmsg(int s, caddr_t msg, int flags) NO_SYSCALL_STUB; } 
+29	AUE_RECVFROM	ALL	{ int recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr) NO_SYSCALL_STUB; } 
+30	AUE_ACCEPT	ALL	{ int accept(int s, caddr_t name, socklen_t	*anamelen) NO_SYSCALL_STUB; } 
+31	AUE_GETPEERNAME	ALL	{ int getpeername(int fdes, caddr_t asa, socklen_t *alen) NO_SYSCALL_STUB; } 
+32	AUE_GETSOCKNAME	ALL	{ int getsockname(int fdes, caddr_t asa, socklen_t *alen) NO_SYSCALL_STUB; } 
 #else
 27	AUE_NULL	ALL	{ int nosys(void); }
 28	AUE_NULL	ALL	{ int nosys(void); }
@@ -84,7 +84,7 @@
 34	AUE_CHFLAGS	ALL	{ int chflags(char *path, int flags); } 
 35	AUE_FCHFLAGS	ALL	{ int fchflags(int fd, int flags); } 
 36	AUE_SYNC	ALL	{ int sync(void); } 
-37	AUE_KILL	ALL	{ int kill(int pid, int signum, int posix); } 
+37	AUE_KILL	ALL	{ int kill(int pid, int signum, int posix) NO_SYSCALL_STUB; } 
 38	AUE_NULL	ALL	{ int nosys(void); }   { old stat  }
 39	AUE_GETPPID	ALL	{ int getppid(void); } 
 40	AUE_NULL	ALL	{ int nosys(void); }   { old lstat }
@@ -93,15 +93,15 @@
 43	AUE_GETEGID	ALL	{ int getegid(void); } 
 44	AUE_PROFILE	ALL	{ int profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } 
 45	AUE_NULL	ALL	{ int nosys(void); } { old ktrace }
-46	AUE_SIGACTION	ALL	{ int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa); } 
+46	AUE_SIGACTION	ALL	{ int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa) NO_SYSCALL_STUB; } 
 47	AUE_GETGID	ALL	{ int getgid(void); } 
 48	AUE_SIGPROCMASK	ALL	{ int sigprocmask(int how, user_addr_t mask, user_addr_t omask); } 
-49	AUE_GETLOGIN	ALL	{ int getlogin(char *namebuf, u_int namelen); } 
-50	AUE_SETLOGIN	ALL	{ int setlogin(char *namebuf); } 
+49	AUE_GETLOGIN	ALL	{ int getlogin(char *namebuf, u_int namelen) NO_SYSCALL_STUB; } 
+50	AUE_SETLOGIN	ALL	{ int setlogin(char *namebuf) NO_SYSCALL_STUB; } 
 51	AUE_ACCT	ALL	{ int acct(char *path); } 
 52	AUE_SIGPENDING	ALL	{ int sigpending(struct sigvec *osv); } 
-53	AUE_SIGALTSTACK	ALL	{ int sigaltstack(struct sigaltstack *nss, struct sigaltstack *oss); } 
-54	AUE_IOCTL	ALL	{ int ioctl(int fd, u_long com, caddr_t data); } 
+53	AUE_SIGALTSTACK	ALL	{ int sigaltstack(struct sigaltstack *nss, struct sigaltstack *oss) NO_SYSCALL_STUB ; } 
+54	AUE_IOCTL	ALL	{ int ioctl(int fd, u_long com, caddr_t data) NO_SYSCALL_STUB; } 
 55	AUE_REBOOT	ALL	{ int reboot(int opt, char *command); } 
 56	AUE_REVOKE	ALL	{ int revoke(char *path); } 
 57	AUE_SYMLINK	ALL	{ int symlink(char *path, char *link); } 
@@ -112,7 +112,7 @@
 62	AUE_NULL	ALL	{ int nosys(void); }   { old fstat }
 63	AUE_NULL	ALL	{ int nosys(void); }   { used internally, reserved }
 64	AUE_NULL	ALL	{ int nosys(void); }   { old getpagesize }
-65	AUE_MSYNC	ALL	{ int msync(caddr_t addr, size_t len, int flags); } 
+65	AUE_MSYNC	ALL	{ int msync(caddr_t addr, size_t len, int flags) NO_SYSCALL_STUB; } 
 66	AUE_VFORK	ALL	{ int vfork(void); } 
 67	AUE_NULL	ALL	{ int nosys(void); }   { old vread }
 68	AUE_NULL	ALL	{ int nosys(void); }   { old vwrite }
@@ -120,8 +120,8 @@
 70	AUE_NULL	ALL	{ int nosys(void); }   { old sstk } 
 71	AUE_NULL	ALL	{ int nosys(void); }   { old mmap }
 72	AUE_NULL	ALL	{ int nosys(void); }   { old vadvise }
-73	AUE_MUNMAP	ALL	{ int munmap(caddr_t addr, size_t len); } 
-74	AUE_MPROTECT	ALL	{ int mprotect(caddr_t addr, size_t len, int prot); } 
+73	AUE_MUNMAP	ALL	{ int munmap(caddr_t addr, size_t len) NO_SYSCALL_STUB; } 
+74	AUE_MPROTECT	ALL	{ int mprotect(caddr_t addr, size_t len, int prot) NO_SYSCALL_STUB; } 
 75	AUE_MADVISE	ALL	{ int madvise(caddr_t addr, size_t len, int behav); } 
 76	AUE_NULL	ALL	{ int nosys(void); }   { old vhangup }
 77	AUE_NULL	ALL	{ int nosys(void); }   { old vlimit }
@@ -139,14 +139,14 @@
 89	AUE_GETDTABLESIZE	ALL	{ int getdtablesize(void); } 
 90	AUE_DUP2	ALL	{ int dup2(u_int from, u_int to); } 
 91	AUE_NULL	ALL	{ int nosys(void); }   { old getdopt }
-92	AUE_FCNTL	ALL	{ int fcntl(int fd, int cmd, long arg); } 
-93	AUE_SELECT	ALL	{ int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv); } 
+92	AUE_FCNTL	ALL	{ int fcntl(int fd, int cmd, long arg) NO_SYSCALL_STUB; } 
+93	AUE_SELECT	ALL	{ int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv) NO_SYSCALL_STUB; } 
 94	AUE_NULL	ALL	{ int nosys(void); }   { old setdopt }
 95	AUE_FSYNC	ALL	{ int fsync(int fd); } 
 96	AUE_SETPRIORITY	ALL	{ int setpriority(int which, id_t who, int prio); } 
 #if SOCKETS
 97	AUE_SOCKET	ALL	{ int socket(int domain, int type, int protocol); } 
-98	AUE_CONNECT	ALL	{ int connect(int s, caddr_t name, socklen_t namelen); } 
+98	AUE_CONNECT	ALL	{ int connect(int s, caddr_t name, socklen_t namelen) NO_SYSCALL_STUB; } 
 #else
 97	AUE_NULL	ALL	{ int nosys(void); }
 98	AUE_NULL	ALL	{ int nosys(void); }
@@ -157,9 +157,9 @@
 102	AUE_NULL	ALL	{ int nosys(void); }   { old recv }
 103	AUE_NULL	ALL	{ int nosys(void); }   { old sigreturn }
 #if SOCKETS
-104	AUE_BIND	ALL	{ int bind(int s, caddr_t name, socklen_t namelen); } 
+104	AUE_BIND	ALL	{ int bind(int s, caddr_t name, socklen_t namelen) NO_SYSCALL_STUB; } 
 105	AUE_SETSOCKOPT	ALL	{ int setsockopt(int s, int level, int name, caddr_t val, socklen_t valsize); } 
-106	AUE_LISTEN	ALL	{ int listen(int s, int backlog); } 
+106	AUE_LISTEN	ALL	{ int listen(int s, int backlog) NO_SYSCALL_STUB; } 
 #else
 104	AUE_NULL	ALL	{ int nosys(void); }
 105	AUE_NULL	ALL	{ int nosys(void); }
@@ -169,7 +169,7 @@
 108	AUE_NULL	ALL	{ int nosys(void); }   { old sigvec }
 109	AUE_NULL	ALL	{ int nosys(void); }   { old sigblock }
 110	AUE_NULL	ALL	{ int nosys(void); }   { old sigsetmask }
-111	AUE_NULL	ALL	{ int sigsuspend(sigset_t mask); } 
+111	AUE_NULL	ALL	{ int sigsuspend(sigset_t mask) NO_SYSCALL_STUB; } 
 112	AUE_NULL	ALL	{ int nosys(void); }   { old sigstack }
 #if SOCKETS
 113	AUE_NULL	ALL	{ int nosys(void); }   { old recvmsg }
@@ -179,7 +179,7 @@
 114	AUE_NULL	ALL	{ int nosys(void); }
 #endif /* SOCKETS */
 115	AUE_NULL	ALL	{ int nosys(void); }   { old vtrace }
-116	AUE_GETTIMEOFDAY	ALL	{ int gettimeofday(struct timeval *tp, struct timezone *tzp); } 
+116	AUE_GETTIMEOFDAY	ALL	{ int gettimeofday(struct timeval *tp, struct timezone *tzp) NO_SYSCALL_STUB; } 
 117	AUE_GETRUSAGE	ALL	{ int getrusage(int who, struct rusage *rusage); } 
 #if SOCKETS
 118	AUE_GETSOCKOPT	ALL	{ int getsockopt(int s, int level, int name, caddr_t val, socklen_t *avalsize); } 
@@ -189,28 +189,28 @@
 119	AUE_NULL	ALL	{ int nosys(void); }   { old resuba }
 120	AUE_READV	ALL	{ user_ssize_t readv(int fd, struct iovec *iovp, u_int iovcnt); } 
 121	AUE_WRITEV	ALL	{ user_ssize_t writev(int fd, struct iovec *iovp, u_int iovcnt); } 
-122	AUE_SETTIMEOFDAY	ALL	{ int settimeofday(struct timeval *tv, struct timezone *tzp); } 
+122	AUE_SETTIMEOFDAY	ALL	{ int settimeofday(struct timeval *tv, struct timezone *tzp) NO_SYSCALL_STUB; } 
 123	AUE_FCHOWN	ALL	{ int fchown(int fd, int uid, int gid); } 
-124	AUE_FCHMOD	ALL	{ int fchmod(int fd, int mode); } 
+124	AUE_FCHMOD	ALL	{ int fchmod(int fd, int mode) NO_SYSCALL_STUB; } 
 125	AUE_NULL	ALL	{ int nosys(void); }   { old recvfrom }
-126	AUE_SETREUID	ALL	{ int setreuid(uid_t ruid, uid_t euid); }
-127	AUE_SETREGID	ALL	{ int setregid(gid_t rgid, gid_t egid); }
-128	AUE_RENAME	ALL	{ int rename(char *from, char *to); } 
+126	AUE_SETREUID	ALL	{ int setreuid(uid_t ruid, uid_t euid) NO_SYSCALL_STUB; }
+127	AUE_SETREGID	ALL	{ int setregid(gid_t rgid, gid_t egid) NO_SYSCALL_STUB; }
+128	AUE_RENAME	ALL	{ int rename(char *from, char *to) NO_SYSCALL_STUB; } 
 129	AUE_NULL	ALL	{ int nosys(void); }   { old truncate }
 130	AUE_NULL	ALL	{ int nosys(void); }   { old ftruncate }
 131	AUE_FLOCK	ALL	{ int flock(int fd, int how); } 
 132	AUE_MKFIFO	ALL	{ int mkfifo(user_addr_t path, int mode); } 
 #if SOCKETS
-133	AUE_SENDTO	ALL	{ int sendto(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen); } 
+133	AUE_SENDTO	ALL	{ int sendto(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen) NO_SYSCALL_STUB; } 
 134	AUE_SHUTDOWN	ALL	{ int shutdown(int s, int how); } 
-135	AUE_SOCKETPAIR	ALL	{ int socketpair(int domain, int type, int protocol, int *rsv); } 
+135	AUE_SOCKETPAIR	ALL	{ int socketpair(int domain, int type, int protocol, int *rsv) NO_SYSCALL_STUB; } 
 #else
 133	AUE_NULL	ALL	{ int nosys(void); }
 134	AUE_NULL	ALL	{ int nosys(void); }
 135	AUE_NULL	ALL	{ int nosys(void); }
 #endif /* SOCKETS */
 136	AUE_MKDIR	ALL	{ int mkdir(user_addr_t path, int mode); } 
-137	AUE_RMDIR	ALL	{ int rmdir(char *path); } 
+137	AUE_RMDIR	ALL	{ int rmdir(char *path) NO_SYSCALL_STUB; } 
 138	AUE_UTIMES	ALL	{ int utimes(char *path, struct timeval *tptr); } 
 139	AUE_FUTIMES	ALL	{ int futimes(int fd, struct timeval *tptr); } 
 140	AUE_ADJTIME	ALL	{ int adjtime(struct timeval *delta, struct timeval *olddelta); } 
@@ -279,10 +279,10 @@
 191	AUE_PATHCONF	ALL	{ int pathconf(char *path, int name); } 
 192	AUE_FPATHCONF	ALL	{ int fpathconf(int fd, int name); } 
 193	AUE_NULL	ALL	{ int nosys(void); } 
-194	AUE_GETRLIMIT	ALL	{ int getrlimit(u_int which, struct rlimit *rlp); } 
-195	AUE_SETRLIMIT	ALL	{ int setrlimit(u_int which, struct rlimit *rlp); } 
+194	AUE_GETRLIMIT	ALL	{ int getrlimit(u_int which, struct rlimit *rlp) NO_SYSCALL_STUB; } 
+195	AUE_SETRLIMIT	ALL	{ int setrlimit(u_int which, struct rlimit *rlp) NO_SYSCALL_STUB; } 
 196	AUE_GETDIRENTRIES	ALL	{ int getdirentries(int fd, char *buf, u_int count, long *basep); } 
-197	AUE_MMAP	ALL	{ user_addr_t mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos); } 
+197	AUE_MMAP	ALL	{ user_addr_t mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) NO_SYSCALL_STUB; } 
 198	AUE_NULL	ALL	{ int nosys(void); } 	{ __syscall }
 199	AUE_LSEEK	ALL	{ off_t lseek(int fd, off_t offset, int whence); } 
 200	AUE_TRUNCATE	ALL	{ int truncate(char *path, off_t length); } 
@@ -326,8 +326,8 @@
 217	AUE_STATV	UHN	{ int statv(const char *path, struct vstat *vsb); } 	{ soon to be obsolete }
 218	AUE_LSTATV	UHN	{ int lstatv(const char *path, struct vstat *vsb); } 	{ soon to be obsolete }
 219	AUE_FSTATV	UHN	{ int fstatv(int fd, struct vstat *vsb); } 	{ soon to be obsolete }
-220	AUE_GETATTRLIST	ALL	{ int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } 
-221	AUE_SETATTRLIST	ALL	{ int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } 
+220	AUE_GETATTRLIST	ALL	{ int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options) NO_SYSCALL_STUB; } 
+221	AUE_SETATTRLIST	ALL	{ int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options) NO_SYSCALL_STUB; } 
 222	AUE_GETDIRENTRIESATTR	ALL	{ int getdirentriesattr(int fd, struct attrlist *alist, void *buffer, size_t buffersize, u_long *count, u_long *basep, u_long *newstate, u_long options); } 
 223	AUE_EXCHANGEDATA	ALL	{ int exchangedata(const char *path1, const char *path2, u_long options); } 
 224	AUE_NULL		ALL { int nosys(void); } { old checkuseraccess / fsgetpath (which moved to 427) }
@@ -349,8 +349,8 @@
 240	AUE_LISTXATTR	ALL	{ user_ssize_t listxattr(user_addr_t path, user_addr_t namebuf, size_t bufsize, int options); } 
 241	AUE_FLISTXATTR	ALL	{ user_ssize_t flistxattr(int fd, user_addr_t namebuf, size_t bufsize, int options); } 
 242	AUE_FSCTL	ALL	{ int fsctl(const char *path, u_long cmd, caddr_t data, u_int options); } 
-243	AUE_INITGROUPS	ALL	{ int initgroups(u_int gidsetsize, gid_t *gidset, int gmuid); } 
-244	AUE_POSIX_SPAWN	ALL	{ int posix_spawn(pid_t *pid, const char *path, const struct _posix_spawn_args_desc *adesc, char **argv, char **envp); } 
+243	AUE_INITGROUPS	ALL	{ int initgroups(u_int gidsetsize, gid_t *gidset, int gmuid) NO_SYSCALL_STUB; } 
+244	AUE_POSIX_SPAWN	ALL	{ int posix_spawn(pid_t *pid, const char *path, const struct _posix_spawn_args_desc *adesc, char **argv, char **envp) NO_SYSCALL_STUB; } 
 245	AUE_FFSCTL	ALL	{ int ffsctl(int fd, u_long cmd, caddr_t data, u_int options); } 
 246	AUE_NULL	ALL	{ int nosys(void); } 
 
@@ -383,7 +383,7 @@
 253	AUE_NULL	ALL	{ int nosys(void); } 
 #endif
 #if SYSV_SEM
-254	AUE_SEMCTL	ALL	{ int semctl(int semid, int semnum, int cmd, semun_t arg); } 
+254	AUE_SEMCTL	ALL	{ int semctl(int semid, int semnum, int cmd, semun_t arg) NO_SYSCALL_STUB; } 
 255	AUE_SEMGET	ALL	{ int semget(key_t key, int	nsems, int semflg); } 
 256	AUE_SEMOP	ALL	{ int semop(int semid, struct sembuf *sops, int nsops); } 
 257	AUE_NULL	ALL	{ int nosys(void); } 
@@ -394,7 +394,7 @@
 257	AUE_NULL	ALL	{ int nosys(void); } 
 #endif
 #if SYSV_MSG
-258	AUE_MSGCTL	ALL	{ int msgctl(int msqid, int cmd, struct	msqid_ds *buf); } 
+258	AUE_MSGCTL	ALL	{ int msgctl(int msqid, int cmd, struct	msqid_ds *buf) NO_SYSCALL_STUB; } 
 259	AUE_MSGGET	ALL	{ int msgget(key_t key, int msgflg); } 
 260	AUE_MSGSND	ALL	{ int msgsnd(int msqid, void *msgp, size_t msgsz, int msgflg); } 
 261	AUE_MSGRCV	ALL	{ user_ssize_t msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg); } 
@@ -406,7 +406,7 @@
 #endif
 #if SYSV_SHM
 262	AUE_SHMAT	ALL	{ user_addr_t shmat(int shmid, void *shmaddr, int shmflg); } 
-263	AUE_SHMCTL	ALL	{ int shmctl(int shmid, int cmd, struct shmid_ds *buf); } 
+263	AUE_SHMCTL	ALL	{ int shmctl(int shmid, int cmd, struct shmid_ds *buf) NO_SYSCALL_STUB; } 
 264	AUE_SHMDT	ALL	{ int shmdt(void *shmaddr); } 
 265	AUE_SHMGET	ALL	{ int shmget(key_t key, size_t size, int shmflg); } 
 #else
@@ -444,7 +444,7 @@
 292	AUE_MKDIR_EXTENDED	ALL	{ int mkdir_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } 
 293	AUE_IDENTITYSVC	ALL	{ int identitysvc(int opcode, user_addr_t message) NO_SYSCALL_STUB; } 
 294	AUE_NULL	ALL	{ int shared_region_check_np(uint64_t *start_address) NO_SYSCALL_STUB; }
-295	AUE_NULL	ALL	{ int shared_region_map_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings) NO_SYSCALL_STUB; }
+295	AUE_NULL	ALL	{ int nosys(void); } { old shared_region_map_np }
 296	AUE_NULL	ALL	{ int vm_pressure_monitor(int wait_for_pressure, int nsecs_monitored, uint32_t *pages_reclaimed); }
 #if PSYNCH
 297	AUE_NULL	ALL	{ uint32_t psynch_rw_longrdlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags)  NO_SYSCALL_STUB; }
@@ -453,9 +453,9 @@
 300	AUE_NULL	ALL	{ uint32_t psynch_rw_upgrade(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags)  NO_SYSCALL_STUB; }
 301	AUE_NULL	ALL	{ uint32_t psynch_mutexwait(user_addr_t mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; }
 302	AUE_NULL	ALL	{ uint32_t psynch_mutexdrop(user_addr_t mutex,  uint32_t mgen, uint32_t  ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; }
-303	AUE_NULL	ALL	{ int psynch_cvbroad(user_addr_t cv, uint32_t cvgen, uint32_t diffgen, user_addr_t mutex,  uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; }
-304	AUE_NULL	ALL	{ int psynch_cvsignal(user_addr_t cv, uint32_t cvgen, uint32_t cvugen, user_addr_t mutex,  uint32_t mgen, uint32_t ugen, int thread_port, uint32_t flags) NO_SYSCALL_STUB; }
-305	AUE_NULL	ALL	{ uint32_t psynch_cvwait(user_addr_t cv, uint32_t cvgen, uint32_t cvugen, user_addr_t mutex,  uint32_t mgen, uint32_t ugen, uint64_t sec, uint64_t usec) NO_SYSCALL_STUB; }
+303	AUE_NULL	ALL	{ uint32_t psynch_cvbroad(user_addr_t cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, user_addr_t mutex,  uint64_t mugen, uint64_t tid) NO_SYSCALL_STUB; }
+304	AUE_NULL	ALL	{ uint32_t psynch_cvsignal(user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, user_addr_t mutex,  uint64_t mugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; }
+305	AUE_NULL	ALL	{ uint32_t psynch_cvwait(user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, user_addr_t mutex,  uint64_t mugen, uint32_t flags, int64_t sec, uint32_t nsec) NO_SYSCALL_STUB; }
 306	AUE_NULL	ALL	{ uint32_t psynch_rw_rdlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags)  NO_SYSCALL_STUB; }
 307	AUE_NULL	ALL	{ uint32_t psynch_rw_wrlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags)  NO_SYSCALL_STUB; }
 308	AUE_NULL	ALL	{ uint32_t psynch_rw_unlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags)  NO_SYSCALL_STUB; }
@@ -477,7 +477,11 @@
 #endif
 310	AUE_GETSID	ALL	{ int getsid(pid_t pid); } 
 311	AUE_SETTIDWITHPID	ALL	{ int settid_with_pid(pid_t pid, int assume) NO_SYSCALL_STUB; } 
+#if PSYNCH
+312	AUE_NULL	ALL	{ int psynch_cvclrprepost(user_addr_t cv, uint32_t cvgen, uint32_t cvugen, uint32_t cvsgen, uint32_t prepocnt, uint32_t preposeq, uint32_t flags) NO_SYSCALL_STUB; }
+#else
 312	AUE_NULL	ALL	{ int nosys(void); } { old __pthread_cond_timedwait }
+#endif
 313	AUE_NULL	ALL	{ int aio_fsync(int op, user_addr_t aiocbp); } 
 314 	AUE_NULL	ALL	{ user_ssize_t aio_return(user_addr_t aiocbp); } 
 315	AUE_NULL	ALL	{ int aio_suspend(user_addr_t aiocblist, int nent, user_addr_t timeoutp); } 
@@ -488,7 +492,7 @@
 320	AUE_LIOLISTIO	ALL	{ int lio_listio(int mode, user_addr_t aiocblist, int nent, user_addr_t sigp); } 
 321	AUE_NULL	ALL	{ int nosys(void); } { old __pthread_cond_wait }
 322	AUE_IOPOLICYSYS	ALL	{ int iopolicysys(int cmd, void *arg) NO_SYSCALL_STUB; } 
-323	AUE_NULL	ALL	{ int nosys(void); } 
+323	AUE_NULL	ALL	{ int process_policy(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, pid_t target_pid, uint64_t target_threadid) NO_SYSCALL_STUB; } 
 324	AUE_MLOCKALL	ALL	{ int mlockall(int how); } 
 325	AUE_MUNLOCKALL	ALL	{ int munlockall(int how); } 
 326	AUE_NULL	ALL	{ int nosys(void); } 
@@ -544,7 +548,7 @@
 #endif /* CONFIG_WORKQUEUE */
 362	AUE_KQUEUE	ALL	{ int kqueue(void); } 
 363	AUE_NULL	ALL	{ int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 
-364	AUE_LCHOWN	ALL	{ int lchown(user_addr_t path, uid_t owner, gid_t group); }
+364	AUE_LCHOWN	ALL	{ int lchown(user_addr_t path, uid_t owner, gid_t group) NO_SYSCALL_STUB; }
 365	AUE_STACKSNAPSHOT	ALL	{ int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset) NO_SYSCALL_STUB; }
 #if CONFIG_WORKQUEUE
 366	AUE_NULL	ALL	{ int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } 
@@ -637,7 +641,7 @@
 ;#if OLD_SEMWAIT_SIGNAL 
 ;423	AUE_NULL	ALL	{ int nosys(void); }   { old __semwait_signal_nocancel }
 ;#else
-423     AUE_SEMWAITSIGNAL       ALL     { int __semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, int64_t tv_sec, int32_t tv_nsec) NO_SYSCALL_STUB;}
+423     AUE_SEMWAITSIGNAL       ALL     { int __semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, int64_t tv_sec, int32_t tv_nsec); }
 ;#endif
 424	AUE_MAC_MOUNT	ALL	{ int __mac_mount(char *type, char *path, int flags, caddr_t data, struct mac *mac_p); } 
 425	AUE_MAC_GET_MOUNT	ALL	{ int __mac_get_mount(char *path, struct mac *mac_p); } 
@@ -645,12 +649,18 @@
 427	AUE_FSGETPATH	ALL	{ user_ssize_t fsgetpath(user_addr_t buf, size_t bufsize, user_addr_t fsid, uint64_t objid) NO_SYSCALL_STUB; } { private fsgetpath (File Manager SPI) }
 428	AUE_NULL	ALL	{ mach_port_name_t audit_session_self(void); }
 429	AUE_NULL	ALL	{ int audit_session_join(mach_port_name_t port); }
-430 AUE_NULL	ALL { int pid_suspend(int pid); }
-431 AUE_NULL	ALL { int pid_resume(int pid); }
+430	AUE_NULL	ALL	{ int fileport_makeport(int fd, user_addr_t portnamep); }
+431	AUE_NULL	ALL	{ int fileport_makefd(mach_port_name_t port); }
+432	AUE_NULL	ALL	{ int audit_session_port(au_asid_t asid, user_addr_t portnamep); }
+433	AUE_NULL	ALL	{ int pid_suspend(int pid); }
+434	AUE_NULL	ALL	{ int pid_resume(int pid); }
 #if CONFIG_EMBEDDED
-432	AUE_NULL	ALL	{ int fileport_makeport(int fd, user_addr_t portnamep); }
-433	AUE_NULL	ALL	{ int fileport_makefd(mach_port_name_t port); }
+435	AUE_NULL	ALL	{ int pid_hibernate(int pid); }
+436	AUE_NULL	ALL	{ int pid_shutdown_sockets(int pid, int level); }
 #else
-432	AUE_NULL	ALL	{ int nosys(void); } 
-433	AUE_NULL	ALL	{ int nosys(void); } 
+435	AUE_NULL	ALL	{ int nosys(void); } 
+436	AUE_NULL	ALL	{ int nosys(void); }
 #endif
+437	AUE_NULL	ALL	{ int nosys(void); } { old shared_region_slide_np }
+438	AUE_NULL	ALL	{ int shared_region_map_and_slide_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings, uint32_t slide, uint64_t* slide_start, uint32_t slide_size) NO_SYSCALL_STUB; }
+
diff --git a/bsd/kern/sysv_ipc.c b/bsd/kern/sysv_ipc.c
index 95c23d418..8f56757c4 100644
--- a/bsd/kern/sysv_ipc.c
+++ b/bsd/kern/sysv_ipc.c
@@ -60,46 +60,101 @@
 
 #include <sys/param.h>
 #include <sys/ipc.h>
+#include <sys/stat.h>	/* mode constants */
 #include <sys/ucred.h>
 #include <sys/kauth.h>
 
 
 /*
  * Check for ipc permission
- *
- * XXX: Should pass proc argument so that we can pass 
- * XXX: proc->p_acflag to suser()
  */
 
+
 /*
+ * ipc_perm
+ *
+ *	perm->mode			mode of the object
+ *	mode				mode bits we want to test
+ *
  * Returns:	0			Success
  *		EPERM
  *		EACCES
+ *
+ * Notes:	The IPC_M bit is special, in that it may only be granted to
+ *		root, the creating user, or the owning user.
+ *
+ *		This code does not use posix_cred_access() because of the
+ *		need to check both creator and owner separately when we are
+ *		considering a rights grant.  Because of this, we need to do
+ *		two evaluations when the values are inequal, which can lead
+ *		us to defeat the callout avoidance optimization.  So we do
+ *		the work here, inline.  This is less than optimal for any
+ *		future work involving opacity of of POSIX credentials.
+ *
+ *		Setting up the mode_owner / mode_group / mode_world implicitly
+ *		masks the IPC_M bit off.  This is intentional.
+ *
+ *		See the posix_cred_access() implementation for algorithm
+ *		information.
  */
 int
-ipcperm(kauth_cred_t cred, struct ipc_perm *perm, int mode)
+ipcperm(kauth_cred_t cred, struct ipc_perm *perm, int mode_req)
 {
+	uid_t	uid = kauth_cred_getuid(cred);	/* avoid multiple calls */
+	int	want_mod_controlinfo = (mode_req & IPC_M);
+	int	is_member;
+	mode_t	mode_owner = (perm->mode & S_IRWXU);
+	mode_t	mode_group = (perm->mode & S_IRWXG) << 3;
+	mode_t	mode_world = (perm->mode & S_IRWXO) << 6;
 
+	/* Grant all rights to super user */
 	if (!suser(cred, (u_short *)NULL))
 		return (0);
 
-	/* Check for user match. */
-	if (kauth_cred_getuid(cred) != perm->cuid && kauth_cred_getuid(cred) != perm->uid) {
-		int is_member;
+	/* Grant or deny rights based on ownership */
+	if (uid == perm->cuid || uid == perm->uid) {
+		if (want_mod_controlinfo)
+			return (0);
 
-		if (mode & IPC_M)
+		return ((mode_req & mode_owner) == mode_req ? 0 : EACCES);
+	} else {
+		/* everyone else who wants to modify control info is denied */
+		if (want_mod_controlinfo)
 			return (EPERM);
-		/* Check for group match. */
-		mode >>= 3;
-		if ((kauth_cred_ismember_gid(cred, perm->gid, &is_member) || !is_member) &&
-		    (kauth_cred_ismember_gid(cred, perm->cgid, &is_member) || !is_member)) {
-			/* Check for `other' match. */
-			mode >>= 3;
-	}
 	}
 
-	if (mode & IPC_M)
+	/*
+	 * Combined group and world rights check, if no owner rights; positive
+	 * asssertion of gid/cgid equality avoids an extra callout in the
+	 * common case.
+	 */
+	if ((mode_req & mode_group & mode_world) == mode_req) {
 		return (0);
-
-	return ((mode & perm->mode) == mode ? 0 : EACCES);
+	} else {
+		if ((mode_req & mode_group) != mode_req) {
+			if ((!kauth_cred_ismember_gid(cred, perm->gid, &is_member) && is_member) &&
+			    ((perm->gid == perm->cgid) ||
+			     (!kauth_cred_ismember_gid(cred, perm->cgid, &is_member) && is_member))) {
+			    	return (EACCES);
+			} else {
+				if ((mode_req & mode_world) != mode_req) {
+					return (EACCES);
+				} else {
+					return (0);
+				}
+			}
+		} else {
+			if ((!kauth_cred_ismember_gid(cred, perm->gid, &is_member) && is_member) ||
+			    ((perm->gid != perm->cgid) &&
+			     (!kauth_cred_ismember_gid(cred, perm->cgid, &is_member) && is_member))) {
+			    	return (0);
+			} else {
+				if ((mode_req & mode_world) != mode_req) {
+					return (EACCES);
+				} else {
+					return (0);
+				}
+			}
+		}
+	}
 }
diff --git a/bsd/kern/sysv_msg.c b/bsd/kern/sysv_msg.c
index 7ed083eb9..daca44630 100644
--- a/bsd/kern/sysv_msg.c
+++ b/bsd/kern/sysv_msg.c
@@ -667,8 +667,8 @@ msgget(__unused struct proc *p, struct msgget_args *uap, int32_t *retval)
 		msqptr->u.msg_perm._key = key;
 		msqptr->u.msg_perm.cuid = kauth_cred_getuid(cred);
 		msqptr->u.msg_perm.uid = kauth_cred_getuid(cred);
-		msqptr->u.msg_perm.cgid = cred->cr_gid;
-		msqptr->u.msg_perm.gid = cred->cr_gid;
+		msqptr->u.msg_perm.cgid = kauth_cred_getgid(cred);
+		msqptr->u.msg_perm.gid = kauth_cred_getgid(cred);
 		msqptr->u.msg_perm.mode = (msgflg & 0777);
 		/* Make sure that the returned msqid is unique */
 		msqptr->u.msg_perm._seq++;
@@ -1576,7 +1576,7 @@ IPCS_msg_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
 }
 
 SYSCTL_DECL(_kern_sysv_ipcs);
-SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, msg, CTLFLAG_RW|CTLFLAG_ANYBODY,
+SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, msg, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
 	0, 0, IPCS_msg_sysctl,
 	"S,IPCS_msg_command",
 	"ipcs msg command interface");
diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c
index 0e44029cf..ed43ec893 100644
--- a/bsd/kern/sysv_sem.c
+++ b/bsd/kern/sysv_sem.c
@@ -999,8 +999,8 @@ semget(__unused struct proc *p, struct semget_args *uap, int32_t *retval)
 		sema[semid].u.sem_perm._key = key;
 		sema[semid].u.sem_perm.cuid = kauth_cred_getuid(cred);
 		sema[semid].u.sem_perm.uid = kauth_cred_getuid(cred);
-		sema[semid].u.sem_perm.cgid = cred->cr_gid;
-		sema[semid].u.sem_perm.gid = cred->cr_gid;
+		sema[semid].u.sem_perm.cgid = kauth_cred_getgid(cred);
+		sema[semid].u.sem_perm.gid = kauth_cred_getgid(cred);
 		sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 		sema[semid].u.sem_perm._seq =
 		    (sema[semid].u.sem_perm._seq + 1) & 0x7fff;
@@ -1092,6 +1092,15 @@ semop(struct proc *p, struct semop_args *uap, int32_t *retval)
 		goto semopout;
 	}
 
+	/*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
+	if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
+#ifdef SEM_DEBUG
+		printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
+		    uap->sops, &sops, nsops * sizeof(struct sembuf));
+#endif
+		goto semopout;
+	}
+
 #if CONFIG_MACF
 	/*
 	 * Initial pass thru sops to see what permissions are needed.
@@ -1110,15 +1119,6 @@ semop(struct proc *p, struct semop_args *uap, int32_t *retval)
 		goto semopout;
 #endif
 
-	/*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
-	if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
-#ifdef SEM_DEBUG
-		printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
-		    uap->sops, &sops, nsops * sizeof(struct sembuf));
-#endif
-		goto semopout;
-	}
-
 	/*
 	 * Loop trying to satisfy the vector of requests.
 	 * If we reach a point where we must wait, any requests already
@@ -1539,19 +1539,19 @@ out:
 
 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
 extern struct sysctl_oid_list sysctl__kern_sysv_children;
-SYSCTL_PROC(_kern_sysv, OID_AUTO, semmni, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, semmni, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, semmns, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, semmns, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, semmnu, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, semmnu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, semmsl, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, semmsl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
     
-SYSCTL_PROC(_kern_sysv, OID_AUTO, semume, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, semume, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
 
 
@@ -1662,7 +1662,7 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
 }
 
 SYSCTL_DECL(_kern_sysv_ipcs);
-SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY,
+SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
 	0, 0, IPCS_sem_sysctl,
 	"S,IPCS_sem_command",
 	"ipcs sem command interface");
diff --git a/bsd/kern/sysv_shm.c b/bsd/kern/sysv_shm.c
index 4a93dc597..25a484798 100644
--- a/bsd/kern/sysv_shm.c
+++ b/bsd/kern/sysv_shm.c
@@ -774,7 +774,7 @@ shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode,
 	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
 
 	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = kauth_cred_getuid(cred);
-	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
+	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = kauth_cred_getgid(cred);
 	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
 	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
 	shmseg->u.shm_segsz = uap->size;
@@ -1165,26 +1165,26 @@ ipcs_shm_sysctl_out:
 	return(error);
 }
 
-SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "SYSV");
+SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSV");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmax, CTLTYPE_QUAD | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmax, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
     &shminfo.shmmax, 0, &sysctl_shminfo ,"Q","shmmax");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmin, CTLTYPE_QUAD | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmin, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
     &shminfo.shmmin, 0, &sysctl_shminfo ,"Q","shmmin");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmni, CTLTYPE_QUAD | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmni, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
     &shminfo.shmmni, 0, &sysctl_shminfo ,"Q","shmmni");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, shmseg, CTLTYPE_QUAD | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmseg, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
     &shminfo.shmseg, 0, &sysctl_shminfo ,"Q","shmseg");
 
-SYSCTL_PROC(_kern_sysv, OID_AUTO, shmall, CTLTYPE_QUAD | CTLFLAG_RW,
+SYSCTL_PROC(_kern_sysv, OID_AUTO, shmall, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
     &shminfo.shmall, 0, &sysctl_shminfo ,"Q","shmall");
 
-SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "SYSVIPCS");
+SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSVIPCS");
 
-SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW|CTLFLAG_ANYBODY,
+SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
 	0, 0, IPCS_shm_sysctl,
 	"S,IPCS_shm_command",
 	"ipcs shm command interface");
diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes
new file mode 100644
index 000000000..3792f3d37
--- /dev/null
+++ b/bsd/kern/trace.codes
@@ -0,0 +1,2149 @@
+0x1020000	KTrap_DivideError
+0x1020004	KTrap_Debug
+0x1020008	KTrap_NMI
+0x102000c	KTrap_Int3
+0x1020010	KTrap_Overflow
+0x1020014	KTrap_BoundRange
+0x1020018	KTrap_InvalidOpcode
+0x102001c	KTrap_DeviceNotAvail
+0x1020020	KTrap_DoubleFault
+0x1020024	KTrap_Coprocessor
+0x1020028	KTrap_InvalidTSS
+0x102002c	KTrap_SegmentNotPresent
+0x1020030	KTrap_StackFault
+0x1020034	KTrap_GeneralProtection
+0x1020038	KTrap_PageFault
+0x102003c	KTrap_unknown
+0x1020040	KTrap_FloatPointError
+0x1020044	KTrap_AlignmentCheck
+0x1020048	KTrap_MachineCheck
+0x102004c	KTrap_SIMD_FP
+0x10203fc	KTrap_Preempt
+0x1050000	INTERRUPT
+0x1070000	UTrap_DivideError
+0x1070004	UTrap_Debug
+0x1070008	UTrap_NMI
+0x107000c	UTrap_Int3
+0x1070010	UTrap_Overflow
+0x1070014	UTrap_BoundRange
+0x1070018	UTrap_InvalidOpcode
+0x107001c	UTrap_DeviceNotAvail
+0x1070020	UTrap_DoubleFault
+0x1070024	UTrap_Coprocessor
+0x1070028	UTrap_InvalidTSS
+0x107002c	UTrap_SegmentNotPresent
+0x1070030	UTrap_StackFault
+0x1070034	UTrap_GeneralProtection
+0x1070038	UTrap_PageFault
+0x107003c	UTrap_unknown
+0x1070040	UTrap_FloatPointError
+0x1070044	UTrap_AlignmentCheck
+0x1070048	UTrap_MachineCheck
+0x107004c	UTrap_SIMD_FP
+0x1090000	DecrTrap
+0x1090004	DecrSet
+0x1090008	TimerCallIntr
+0x109000c	pmsStep
+0x1090010	TimerMigration
+0x1090014	rdHPET
+0x1090018	set_tsc_deadline
+0x10c0000	MACH_SysCall
+0x10c0004	MSC_kern_invalid_#1
+0x10c0008	MSC_kern_invalid_#2
+0x10c000c	MSC_kern_invalid_#3
+0x10c0010	MSC_kern_invalid_#4
+0x10c0014	MSC_kern_invalid_#5
+0x10c0018	MSC_kern_invalid_#6
+0x10c001c	MSC_kern_invalid_#7
+0x10c0020	MSC_kern_invalid_#8
+0x10c0024	MSC_kern_invalid_#9
+0x10c0028	MSC_kern_invalid_#10
+0x10c002c	MSC_kern_invalid_#11
+0x10c0030	MSC_kern_invalid_#12
+0x10c0034	MSC_kern_invalid_#13
+0x10c0038	MSC_kern_invalid_#14
+0x10c003c	MSC_kern_invalid_#15
+0x10c0040	MSC_kern_invalid_#16
+0x10c0044	MSC_kern_invalid_#17
+0x10c0048	MSC_kern_invalid_#18
+0x10c004c	MSC_kern_invalid_#19
+0x10c0050	MSC_kern_invalid_#20
+0x10c0054	MSC_kern_invalid_#21
+0x10c0058	MSC_kern_invalid_#22
+0x10c005c	MSC_kern_invalid_#23
+0x10c0060	MSC_kern_invalid_#24
+0x10c0064	MSC_kern_invalid_#25
+0x10c0068	MSC_mach_reply_port
+0x10c006c	MSC_thread_self_trap
+0x10c0070	MSC_task_self_trap
+0x10c0074	MSC_host_self_trap
+0x10c0078	MSC_kern_invalid_#30
+0x10c007c	MSC_mach_msg_trap
+0x10c0080	MSC_mach_msg_overwrite_trap
+0x10c0084	MSC_semaphore_signal_trap
+0x10c0088	MSC_semaphore_signal_all_trap
+0x10c008c	MSC_semaphore_signal_thread_trap
+0x10c0090	MSC_semaphore_wait_trap
+0x10c0094	MSC_semaphore_wait_signal_trap
+0x10c0098	MSC_semaphore_timedwait_trap
+0x10c009c	MSC_semaphore_timedwait_signal_trap
+0x10c00a0	MSC_kern_invalid_#40
+0x10c00a4	MSC_kern_invalid_#41
+0x10c00a8	MSC_kern_invalid_#42
+0x10c00ac	MSC_map_fd
+0x10c00b0	MSC_task_name_for_pid
+0x10c00b4	MSC_task_for_pid
+0x10c00b8	MSC_pid_for_task
+0x10c00bc	MSC_kern_invalid_#47
+0x10c00c0	MSC_macx_swapon
+0x10c00c4	MSC_macx_swapoff
+0x10c00c8	MSC_kern_invalid_#50
+0x10c00cc	MSC_macx_triggers
+0x10c00d0	MSC_macx_backing_store_suspend
+0x10c00d4	MSC_macx_backing_store_recovery
+0x10c00d8	MSC_kern_invalid_#54
+0x10c00dc	MSC_kern_invalid_#55
+0x10c00e0	MSC_kern_invalid_#56
+0x10c00e4	MSC_kern_invalid_#57
+0x10c00e8	MSC_pfz_exit
+0x10c00ec	MSC_swtch_pri
+0x10c00f0	MSC_swtch
+0x10c00f4	MSC_thread_switch
+0x10c00f8	MSC_clock_sleep_trap
+0x10c00fc	MSC_kern_invalid_#63
+0x10c0100	MSC_kern_invalid_#64
+0x10c0104	MSC_kern_invalid_#65
+0x10c0108	MSC_kern_invalid_#66
+0x10c010c	MSC_kern_invalid_#67
+0x10c0110	MSC_kern_invalid_#68
+0x10c0114	MSC_kern_invalid_#69
+0x10c0118	MSC_kern_invalid_#70
+0x10c011c	MSC_kern_invalid_#71
+0x10c0120	MSC_kern_invalid_#72
+0x10c0124	MSC_kern_invalid_#73
+0x10c0128	MSC_kern_invalid_#74
+0x10c012c	MSC_kern_invalid_#75
+0x10c0130	MSC_kern_invalid_#76
+0x10c0134	MSC_kern_invalid_#77
+0x10c0138	MSC_kern_invalid_#78
+0x10c013c	MSC_kern_invalid_#79
+0x10c0140	MSC_kern_invalid_#80
+0x10c0144	MSC_kern_invalid_#81
+0x10c0148	MSC_kern_invalid_#82
+0x10c014c	MSC_kern_invalid_#83
+0x10c0150	MSC_kern_invalid_#84
+0x10c0154	MSC_kern_invalid_#85
+0x10c0158	MSC_kern_invalid_#86
+0x10c015c	MSC_kern_invalid_#87
+0x10c0160	MSC_kern_invalid_#88
+0x10c0164	MSC_mach_timebase_info
+0x10c0168	MSC_mach_wait_until
+0x10c016c	MSC_mk_timer_create
+0x10c0170	MSC_mk_timer_destroy
+0x10c0174	MSC_mk_timer_arm
+0x10c0178	MSC_mk_timer_cancel
+0x10c017c	MSC_kern_invalid_#95
+0x10c0180	MSC_kern_invalid_#96
+0x10c0184	MSC_kern_invalid_#97
+0x10c0188	MSC_kern_invalid_#98
+0x10c018c	MSC_kern_invalid_#99
+0x10c0190	MSC_iokit_user_client
+0x10c0194	MSC_kern_invalid_#101
+0x10c0198	MSC_kern_invalid_#102
+0x10c019c	MSC_kern_invalid_#103
+0x10c01a0	MSC_kern_invalid_#104
+0x10c01a4	MSC_kern_invalid_#105
+0x10c01a8	MSC_kern_invalid_#106
+0x10c01ac	MSC_kern_invalid_#107
+0x10c01b0	MSC_kern_invalid_#108
+0x10c01b4	MSC_kern_invalid_#109
+0x10c01b8	MSC_kern_invalid_#110
+0x10c01bc	MSC_kern_invalid_#111
+0x10c01c0	MSC_kern_invalid_#112
+0x10c01c4	MSC_kern_invalid_#113
+0x10c01c8	MSC_kern_invalid_#114
+0x10c01cc	MSC_kern_invalid_#115
+0x10c01d0	MSC_kern_invalid_#116
+0x10c01d4	MSC_kern_invalid_#117
+0x10c01d8	MSC_kern_invalid_#118
+0x10c01dc	MSC_kern_invalid_#119
+0x10c01e0	MSC_kern_invalid_#120
+0x10c01e4	MSC_kern_invalid_#121
+0x10c01e8	MSC_kern_invalid_#122
+0x10c01ec	MSC_kern_invalid_#123
+0x10c01f0	MSC_kern_invalid_#124
+0x10c01f4	MSC_kern_invalid_#125
+0x10c01f8	MSC_kern_invalid_#126
+0x10c01fc	MSC_kern_invalid_#127
+0x1300004	MACH_Pageout
+0x1300008	MACH_vmfault
+0x1300100	MACH_purgable_token_add
+0x1300104	MACH_purgable_token_delete
+0x1300108	MACH_purgable_token_ripened
+0x130010c	MACH_purgable_token_purged
+0x1300400	MACH_vm_check_zf_delay
+0x1300404	MACH_vm_cow_delay
+0x1300408	MACH_vm_zf_delay
+0x1300410	MACH_vm_pageout_scan
+0x1300414	MACH_vm_pageout_balanceQ
+0x1300418	MACH_vm_pageout_freelist
+0x130041c	MACH_vm_pageout_purge_one
+0x1300420	MACH_vm_pageout_cache_evict
+0x1300424	MACH_vm_pageout_thread_block
+0x1300480	MACH_vm_upl_page_wait
+0x1300484	MACH_vm_iopl_page_wait
+0x1400000	MACH_SCHED
+0x1400004	MACH_STKATTACH
+0x1400008	MACH_STKHANDOFF
+0x140000c	MACH_CALLCONT
+0x1400010	MACH_CALLOUT
+0x1400014	MACH_ServiceT
+0x1400018	MACH_MKRUNNABLE
+0x140001c	MACH_PROMOTE
+0x1400020	MACH_DEMOTE
+0x1400024	MACH_IDLE
+0x1400028	MACH_STACK_DEPTH
+0x140002c	MACH_MOVED
+0x1400030       MACH_FAIRSHARE_ENTER
+0x1400034       MACH_FAIRSHARE_EXIT
+0x1400038	MACH_FAILSAFE
+0x1400040	MACH_STKHANDOFF_BT
+0x1400044	MACH_SCHED_BT
+0x1400048	MACH_IDLE_BT
+0x1400050	MACH_SCHED_GET_URGENCY
+0x1400054	MACH_SCHED_URGENCY
+0x1400058	MACH_SCHED_REDISPATCH
+0x140005C	MACH_SCHED_REMOTE_AST
+0x1400060	MACH_SCHED_LPA_BROKEN
+0x1500000	MACH_MSGID_INVALID
+0x1600000	MTX_SLEEP
+0x1600004	MTX_SLEEP_DEADLINE
+0x1600008	MTX_WAIT
+0x160000c	MTX_WAKEUP
+0x1600010	MTX_LOCK
+0x1600014	MTX_UNLOCK
+0x1600080	MTX_x86_wait
+0x1600084	MTX_x86_wakeup
+0x1600088	MTX_x86_spin
+0x160008c	MTX_x86_acquire
+0x1600090	MTX_x86_demote
+0x1600200	MTX_full_lock
+0x1600400	RW_EXCL_WaitForWriter
+0x1600404	RW_EXCL_WaitForReaders
+0x1600408	RW_SHRD_WaitForWriter
+0x160040c	RW_SHRDtoEXCL_FailedUpgrade
+0x1600410	RW_SHRDtoEXCL_WaitForReaders
+0x1600414	RW_EXCLtoSHRD
+0x1600418	RW_EXCL_SpinForWriter
+0x160041c	RW_EXCL_WaitForWriter
+0x1600420	RW_EXCL_SpinForReaders
+0x1600424	RW_EXCL_WaitForReaders
+0x1600428	RW_SHRD_unlock
+0x160042c	RW_EXCL_unlock
+0x1600440	RW_SHRD_SpinForWriter
+0x1600444	RW_SHRD_WaitForWriter
+0x1600448	RW_SHRDtoEXCL_SpinForReaders
+0x160044c	RW_SHRDtoEXCL_WaitForReaders
+0x1700000	PMAP_create
+0x1700004	PMAP_destroy
+0x1700008	PMAP_protect
+0x170000c	PMAP_page_protect
+0x1700010	PMAP_enter
+0x1700014	PMAP_remove
+0x1700018	PMAP_nest
+0x170001c	PMAP_unnest
+0x1700020	PMAP_flush_TLBS
+0x1700024	PMAP_update_interrupt
+0x1700028	PMAP_attribute_clear
+0x2010000	L_IP_In_Beg
+0x2010004	L_IP_Out_Beg
+0x2010008	L_IP_In_End
+0x201000c	L_IP_Out_End
+0x2010404	F_IP_Output
+0x2010800	F_IP_Input
+0x2010c00	F_In_CkSum
+0x2020000	L_ARP_Req
+0x2020004	L_ARP_Resp
+0x2020008	L_ARP_Reply
+0x202000c	L_ARP_Timo
+0x2020010	L_ARP_Look
+0x2020014	L_ARP_Input
+0x2030000	L_UDP_In_Beg
+0x2030004	L_UDP_Out_Beg
+0x2030008	L_UDP_In_End
+0x203000c	L_UDP_Out_End
+0x2031400	F_UDP_Input
+0x2031804	F_UDP_Output
+0x2040000	L_TCP_In_Beg
+0x2040004	L_TCP_Out_Beg
+0x2040008	L_TCP_In_End
+0x204000c	L_TCP_Out_End
+0x2040c00	F_TCP_Input
+0x2041004	F_TCP_Output
+0x2041400	F_TCP_FastT
+0x2041404	F_TCP_SlowT
+0x2041408	F_TCP_Close
+0x2041800	F_PCB_Lookup
+0x2041804	F_PCB_HshLkup
+0x2041c00	F_TCP_NewConn
+0x2041d00	F_TCP_gotSync
+0x20b0010	F_SBDrop
+0x20b0014	F_SBAppend
+0x20b0404	F_SendMsg
+0x20b0804	F_SendTo
+0x20b0c04	F_SendIt
+0x20b1004	F_SoSend
+0x20b1008	F_SoSend_CopyD
+0x20b1400	F_RecvFrom
+0x20b1800	F_RecvMsg
+0x20b1c00	F_RecvIt
+0x20b2000	F_SoReceive
+0x20b2100	F_SoShutdown
+0x20b2400	F_SoAccept
+0x20b2800	F_sendfile
+0x20b2804	F_sendfile_wait
+0x20b2808	F_sendfile_read
+0x20b280c	F_sendfile_send
+0x2650004	AT_DDPinput
+0x2f00000	F_FreemList
+0x2f00004	F_m_copym
+0x2f00008	F_getpackets
+0x2f0000c	F_getpackethdrs
+0x3010000	HFS_Write
+0x3010004	HFS_Fsync
+0x3010008	HFS_Close
+0x301000c	HFS_Remove
+0x3010010	HFS_Create
+0x3010014	HFS_Inactive
+0x3010018	HFS_Reclaim
+0x301001C	HFS_Truncate
+0x3010028	vinvalbuf
+0x3010030	HFS_Read
+0x3010034	HFS_RL_ADD
+0x3010038	HFS_RL_REMOVE
+0x301003c	MACH_copyiostr
+0x3010040	UIO_copyout
+0x3010044	UIO_copyin
+0x3010048	MACH_copyio
+0x301004c	Cl_bp
+0x3010050	Cl_iodone
+0x3010054	Cl_ubc_dump
+0x3010058	Cl_io
+0x301005c	Cl_zero
+0x3010060	Cl_cmap
+0x3010068	Cl_ioread
+0x301006c	Cl_iowrite
+0x3010070	Cl_ioabort
+0x3010074	Cl_zero_commit
+0x3010078	Cl_wrdel_commit
+0x301007c	Cl_read_abort
+0x3010080	Cl_read_copy
+0x3010084	Cl_read_list_req
+0x3010088	Cl_phys_uiomove
+0x301008c	Cl_read_commit
+0x3010090	VFS_LOOKUP
+0x3010094	Cl_read_uplmap
+0x3010098	Cl_read_uplunmap
+0x301009C	VFS_LOOKUP_DONE
+0x30100a0	Cl_write_copy
+0x30100a4	Cl_write_list_req
+0x30100a8	Cl_write_uiomove
+0x30100ac	Cl_write_zeros
+0x30100b0	Cl_write_delayed
+0x30100b4	Cl_write_abort
+0x30100b8	Cl_zero_info
+0x30100c0	Cl_rd_ahead
+0x30100c4	Cl_rd_prefetch
+0x30100c8	Cl_rd_prefabort
+0x30100cc	Cl_writepush
+0x30100d0	Cl_pageout
+0x30100d4	Cl_push
+0x30100e0	Cl_pagein
+0x30100f0	Cl_advisory_rd
+0x30100f4	Cl_adv_fault_list
+0x30100f8	Cl_adv_abort1
+0x30100fc	Cl_adv_abort2
+0x3010118	Cl_read_direct
+0x301011c	Cl_ncpr_uiomv
+0x3010120	Cl_ncpr_getupl
+0x3010124	Cl_ncpr_clio
+0x301012c	Cl_write_direct
+0x3010130	Cl_ncpw_getupl
+0x3010134	Cl_ncpw_clio
+0x3010138	Cl_sparse_collect
+0x301013c	Cl_sparse_push
+0x3010140	Cl_sparse_add
+0x3010144	Cl_release
+0x3010148	Cl_drt_emptyfree
+0x301014c	Cl_drt_retcluster
+0x3010150	Cl_drt_alloctable
+0x3010154	Cl_drt_insert
+0x3010158	Cl_drt_mark
+0x301015c	Cl_drt_6
+0x3010160	Cl_drt_freetable
+0x3010170	Cl_read_contig_getupl
+0x3010174	Cl_write_contig_getupl
+0x3010178	Cl_io_type
+0x301017c	Cl_wait_IO
+0x3010180	Vnode_Pagein
+0x3010184	throttle_lowpri_io
+0x3010200	Vnode_Pageout
+0x3010280	Vnode_WaitForWrites
+0x3010300	PageoutThrottle
+0x3010340	SuperCluster
+0x3010344	PS_Offsets
+0x3010348	PS_Indexes
+0x301034c	Dirty_Indexes
+0x3010350	PS_Write
+0x3010354	PS_WriteComplete
+0x3010380	PageoutCollect
+0x3010384	PagesOnInactive_Q
+0x3010388	PagesOnActive_Q
+0x301038c	PageoutScan
+0x3010390	PageoutWait
+0x3010394	PageoutWakeup1
+0x3010398	PageoutWakeup2
+0x301039c	PageoutWakeup3
+0x3010400	NFS_doio
+0x3010404	NFS_doio_offsets
+0x3010408	NFS_doio_zero_read
+0x301040c	NFS_doio_zero_write
+0x3010410	NFS_doio_invalidate
+0x3010414	NFS_doio_retry
+0x3010418	NFS_doio_done
+0x3010500	NFS_pagein_zero
+0x3010504	NFS_pageout_zero
+0x3010508	NFS_pagein
+0x301050c	NFS_pageout
+0x3010600	BIO_write_list_req
+0x3010604	BIO_getblk_list_req
+0x3010608	BIO_getblk
+0x301060c	BIO_biodone
+0x3010610	BIO_brelse
+0x3010614	BIO_recovered_buf
+0x3010618	BIO_dumped_buf
+0x301061c	BIO_write_delayed
+0x3010620	BIO_acquire_error
+0x3010624	BIO_write_async
+0x3010628	BIO_write_sync
+0x301062c	BIO_flushdirty
+0x3010630	BIO_getblk_msleep
+0x3010700	VM_pageout_list_req
+0x3010704	VM_pagein_list_req
+0x3010800	NFS_setattr
+0x3010804	NFS_getattr
+0x3010808	NFS_read
+0x301080c	NFS_write
+0x3010810	NFS_truncate
+0x3010814	NFS_flush
+0x3010818	NFS_flush_again
+0x301081c	NFS_flush_bvec
+0x3010820	NFS_flush_upls
+0x3010824	NFS_commit
+0x3010828	NFS_flush_commit
+0x301082c	NFS_flush_done
+0x3010830	NFS_flush_busy
+0x3010834	NFS_flush_bwrite
+0x3010838	NFS_flush_normal
+0x301083c	NFS_loadattrcache
+0x3010840	NFS_getattrcache
+0x3010844	NFS_connect
+0x3010848	NFS_reply
+0x301084c	NFS_request
+0x3010850	NFS_softterm
+0x3010854	NFS_rcvunlock
+0x3010858	NFS_rcvlock
+0x301085c	NFS_timer
+0x3010860	NFS_vinvalbuf
+0x3010864	NFS_srvcommit
+0x3010868	NFS_srvfsync
+0x301086c	NFS_RdAhead
+0x3010870	NFS_srvread
+0x3010874	NFS_srvVOPREAD
+0x3010900	UBC_setsize
+0x3010904	UBC_sync_range
+0x3010908	UBC_upl_abort_range
+0x301090c	UBC_upl_commit_range
+0x3011000	UPL_iopl_req
+0x3011004	UPL_upl_req
+0x3011008	UPL_abort_range
+0x301100c	UPL_abort
+0x3011010	UPL_commit_range
+0x3011014	UPL_commit
+0x3011018	UPL_destroy
+0x301101c	UPL_commit_range_active
+0x3011020	UPL_commit_range_inactive
+0x3011024	UPL_map_enter_upl
+0x3011028	UPL_map_remove_upl
+0x301102c	UPL_commit_range_speculative
+0x3020000	P_WrData
+0x3020004	P_WrDataDone
+0x3020008	P_RdData
+0x302000C	P_RdDataDone
+0x3020010	P_WrDataAsync
+0x3020014	P_WrDataAsyncDone
+0x3020018	P_RdDataAsync
+0x302001C	P_RdDataAsyncDone
+0x3020020	P_WrMeta
+0x3020024	P_WrMetaDone
+0x3020028	P_RdMeta
+0x302002C	P_RdMetaDone
+0x3020030	P_WrMetaAsync
+0x3020034	P_WrMetaAsyncDone
+0x3020038	P_RdMetaAsync
+0x302003C	P_RdMetaAsyncDone
+0x3020040	P_PgOut
+0x3020044	P_PgOutDone
+0x3020048	P_PgIn
+0x302004C	P_PgInDone
+0x3020050	P_PgOutAsync
+0x3020054	P_PgOutAsyncDone
+0x3020058	P_PgInAsync
+0x302005C	P_PgInAsyncDone
+0x3020080	P_WrDataT
+0x3020084	P_WrDataTDone
+0x3020088	P_RdDataT
+0x302008C	P_RdDataTDone
+0x3020090	P_WrDataAsyncT
+0x3020094	P_WrDataAsyncTDone
+0x3020098	P_RdDataAsyncT
+0x302009C	P_RdDataAsyncTDone
+0x30200a0	P_WrMetaT
+0x30200A4	P_WrMetaTDone
+0x30200a8	P_RdMetaT
+0x30200AC	P_RdMetaTDone
+0x30200b0	P_WrMetaAsyncT
+0x30200B4	P_WrMetaAsyncTDone
+0x30200b8	P_RdMetaAsyncT
+0x30200BC	P_RdMetaAsyncTDone
+0x30200c0	P_PgOutT
+0x30200C4	P_PgOutTDone
+0x30200c8	P_PgInT
+0x30200CC	P_PgInTDone
+0x30200d0	P_PgOutAsyncT
+0x30200D4	P_PgOutAsyncTDone
+0x30200d8	P_PgInAsyncT
+0x30200DC	P_PgInAsyncTDone
+0x3020100	P_WrDataP
+0x3020104	P_WrDataPDone
+0x3020108	P_RdDataP
+0x302010C	P_RdDataPDone
+0x3020110	P_WrDataAsyncP
+0x3020114	P_WrDataAsyncPDone
+0x3020118	P_RdDataAsyncP
+0x302011C	P_RdDataAsyncPDone
+0x3020120	P_WrMetaP
+0x3020124	P_WrMetaPDone
+0x3020128	P_RdMetaP
+0x302012C	P_RdMetaPDone
+0x3020130	P_WrMetaAsyncP
+0x3020134	P_WrMetaAsyncPDone
+0x3020138	P_RdMetaAsyncP
+0x302013C	P_RdMetaAsyncPDone
+0x3020140	P_PgOutP
+0x3020144	P_PgOutPDone
+0x3020148	P_PgInP
+0x302014C	P_PgInPDone
+0x3020150	P_PgOutAsyncP
+0x3020154	P_PgOutAsyncPDone
+0x3020158	P_PgInAsyncP
+0x302015C	P_PgInAsyncPDone
+0x3050004	journal_flush
+0x3070004	BootCache_tag
+0x3070008	BootCache_batch
+0x4010004	proc_exit
+0x4010008	force_exit
+0x40c0000	BSC_SysCall
+0x40c0004	BSC_exit
+0x40c0008	BSC_fork
+0x40c000c	BSC_read
+0x40c0010	BSC_write
+0x40c0014	BSC_open
+0x40c0018	BSC_close
+0x40c001c	BSC_wait4
+0x40c0020	BSC_obs_creat
+0x40c0024	BSC_link
+0x40c0028	BSC_unlink
+0x40c002c	BSC_obs_execv
+0x40c0030	BSC_chdir
+0x40c0034	BSC_fchdir
+0x40c0038	BSC_mknod
+0x40c003c	BSC_chmod
+0x40c0040	BSC_chown
+0x40c0044	BSC_obs_break
+0x40c0048	BSC_getfsstat
+0x40c004c	BSC_obs_lseek
+0x40c0050	BSC_getpid
+0x40c0054	BSC_obs_mount
+0x40c0058	BSC_obs_unmount
+0x40c005c	BSC_setuid
+0x40c0060	BSC_getuid
+0x40c0064	BSC_geteuid
+0x40c0068	BSC_ptrace
+0x40c006c	BSC_recvmsg
+0x40c0070	BSC_sendmsg
+0x40c0074	BSC_recvfrom
+0x40c0078	BSC_accept
+0x40c007c	BSC_getpeername
+0x40c0080	BSC_getsockname
+0x40c0084	BSC_access
+0x40c0088	BSC_chflags
+0x40c008c	BSC_fchflags
+0x40c0090	BSC_sync
+0x40c0094	BSC_kill
+0x40c0098	BSC_obs_stat
+0x40c009c	BSC_getppid
+0x40c00a0	BSC_obs_lstat
+0x40c00a4	BSC_dup
+0x40c00a8	BSC_pipe
+0x40c00ac	BSC_getegid
+0x40c00b0	BSC_profil
+0x40c00b4	BSC_obs_ktrace
+0x40c00b8	BSC_sigaction
+0x40c00bc	BSC_getgid
+0x40c00c0	BSC_sigprocmask
+0x40c00c4	BSC_getlogin
+0x40c00c8	BSC_setlogin
+0x40c00cc	BSC_acct
+0x40c00d0	BSC_sigpending
+0x40c00d4	BSC_sigaltstack
+0x40c00d8	BSC_ioctl
+0x40c00dc	BSC_reboot
+0x40c00e0	BSC_revoke
+0x40c00e4	BSC_symlink
+0x40c00e8	BSC_readlink
+0x40c00ec	BSC_execve
+0x40c00f0	BSC_umask
+0x40c00f4	BSC_chroot
+0x40c00f8	BSC_obs_fstat
+0x40c00fc	BSC_#63
+0x40c0100	BSC_obs_getpagesize
+0x40c0104	BSC_msync
+0x40c0108	BSC_vfork
+0x40c010c	BSC_obs_vread
+0x40c0110	BSC_obs_vwrite
+0x40c0114	BSC_obs_sbrk
+0x40c0118	BSC_obs_sstk
+0x40c011c	BSC_obs_mmap
+0x40c0120	BSC_obs_vadvise
+0x40c0124	BSC_munmap
+0x40c0128	BSC_mprotect
+0x40c012c	BSC_madvise
+0x40c0130	BSC_obs_vhangup
+0x40c0134	BSC_obs_vlimit
+0x40c0138	BSC_mincore
+0x40c013c	BSC_getgroups
+0x40c0140	BSC_setgroups
+0x40c0144	BSC_getpgrp
+0x40c0148	BSC_setpgid
+0x40c014c	BSC_setitimer
+0x40c0150	BSC_obs_wait
+0x40c0154	BSC_swapon
+0x40c0158	BSC_getitimer
+0x40c015c	BSC_obs_gethostname
+0x40c0160	BSC_obs_sethostname
+0x40c0164	BSC_getdtablesize
+0x40c0168	BSC_dup2
+0x40c016c	BSC_obs_getdopt
+0x40c0170	BSC_fcntl
+0x40c0174	BSC_select
+0x40c0178	BSC_obs_setdopt
+0x40c017c	BSC_fsync
+0x40c0180	BSC_setpriority
+0x40c0184	BSC_socket
+0x40c0188	BSC_connect
+0x40c018c	BSC_obs_accept
+0x40c0190	BSC_getpriority
+0x40c0194	BSC_obs_send
+0x40c0198	BSC_obs_recv
+0x40c019c	BSC_obs_sigreturn
+0x40c01a0	BSC_bind
+0x40c01a4	BSC_setsockopt
+0x40c01a8	BSC_listen
+0x40c01ac	BSC_obs_vtimes
+0x40c01b0	BSC_obs_sigvec
+0x40c01b4	BSC_obs_sigblock
+0x40c01b8	BSC_obs_sigsetmask
+0x40c01bc	BSC_sigsuspend
+0x40c01c0	BSC_obs_sigstack
+0x40c01c4	BSC_obs_recvmsg
+0x40c01c8	BSC_obs_sendmsg
+0x40c01cc	BSC_obs_vtrace
+0x40c01d0	BSC_gettimeofday
+0x40c01d4	BSC_getrusage
+0x40c01d8	BSC_getsockopt
+0x40c01dc	BSC_obs_resuba
+0x40c01e0	BSC_readv
+0x40c01e4	BSC_writev
+0x40c01e8	BSC_settimeofday
+0x40c01ec	BSC_fchown
+0x40c01f0	BSC_fchmod
+0x40c01f4	BSC_obs_recvfrom
+0x40c01f8	BSC_setreuid
+0x40c01fc	BSC_setregid
+0x40c0200	BSC_rename
+0x40c0204	BSC_obs_truncate
+0x40c0208	BSC_obs_ftruncate
+0x40c020c	BSC_flock
+0x40c0210	BSC_mkfifo
+0x40c0214	BSC_sendto
+0x40c0218	BSC_shutdown
+0x40c021c	BSC_socketpair
+0x40c0220	BSC_mkdir
+0x40c0224	BSC_rmdir
+0x40c0228	BSC_utimes
+0x40c022c	BSC_futimes
+0x40c0230	BSC_adjtime
+0x40c0234	BSC_obs_getpeername
+0x40c0238	BSC_gethostuuid
+0x40c023c	BSC_obs_sethostid
+0x40c0240	BSC_obs_getrlimit
+0x40c0244	BSC_obs_setrlimit
+0x40c0248	BSC_obs_killpg
+0x40c024c	BSC_setsid
+0x40c0250	BSC_obs_setquota
+0x40c0254	BSC_obs_qquota
+0x40c0258	BSC_obs_getsockname
+0x40c025c	BSC_getpgid
+0x40c0260	BSC_setprivexec
+0x40c0264	BSC_pread
+0x40c0268	BSC_pwrite
+0x40c026c	BSC_nfssvc
+0x40c0270	BSC_obs_getdirentries
+0x40c0274	BSC_statfs
+0x40c0278	BSC_fstatfs
+0x40c027c	BSC_unmount
+0x40c0280	BSC_obs_async_daemon
+0x40c0284	BSC_getfh
+0x40c0288	BSC_obs_getdomainname
+0x40c028c	BSC_obs_setdomainname
+0x40c0290	BSC_#164
+0x40c0294	BSC_quotactl
+0x40c0298	BSC_obs_exportfs
+0x40c029c	BSC_mount
+0x40c02a0	BSC_obs_ustat
+0x40c02a4	BSC_csops
+0x40c02a8	BSC_obs_table
+0x40c02ac	BSC_obs_wait3
+0x40c02b0	BSC_obs_rpause
+0x40c02b4	BSC_waitid
+0x40c02b8	BSC_obs_getdents
+0x40c02bc	BSC_obs_gc_control
+0x40c02c0	BSC_add_profil
+0x40c02c4	BSC_#177
+0x40c02c8	BSC_#178
+0x40c02cc	BSC_#179
+0x40c02d0	BSC_kdebug_trace
+0x40c02d4	BSC_setgid
+0x40c02d8	BSC_setegid
+0x40c02dc	BSC_seteuid
+0x40c02e0	BSC_sigreturn
+0x40c02e4	BSC_chud
+0x40c02e8	BSC_#186
+0x40c02ec	BSC_fdatasync
+0x40c02f0	BSC_stat
+0x40c02f4	BSC_fstat
+0x40c02f8	BSC_lstat
+0x40c02fc	BSC_pathconf
+0x40c0300	BSC_fpathconf
+0x40c0304	BSC_#193
+0x40c0308	BSC_getrlimit
+0x40c030c	BSC_setrlimit
+0x40c0310	BSC_getdirentries
+0x40c0314	BSC_mmap
+0x40c0318	BSC_obs__syscall
+0x40c031c	BSC_lseek
+0x40c0320	BSC_truncate
+0x40c0324	BSC_ftruncate
+0x40c0328	BSC_sysctl
+0x40c032c	BSC_mlock
+0x40c0330	BSC_munlock
+0x40c0334	BSC_undelete
+0x40c0338	BSC_ATsocket
+0x40c033c	BSC_ATgetmsg
+0x40c0340	BSC_ATputmsg
+0x40c0344	BSC_ATPsndreq
+0x40c0348	BSC_ATPsndrsp
+0x40c034c	BSC_ATPgetreq
+0x40c0350	BSC_ATPgetrsp
+0x40c0354	BSC_#213
+0x40c0358	BSC_#214
+0x40c035c	BSC_#215
+0x40c0360	BSC_mkcomplex
+0x40c0364	BSC_statv
+0x40c0368	BSC_lstatv
+0x40c036c	BSC_fstatv
+0x40c0370	BSC_getattrlist
+0x40c0374	BSC_setattrlist
+0x40c0378	BSC_getdirentriesattr
+0x40c037c	BSC_exchangedata
+0x40c0380	BSC_#224
+0x40c0384	BSC_searchfs
+0x40c0388	BSC_delete_Carbon
+0x40c038c	BSC_copyfile
+0x40c0390	BSC_fgetattrlist
+0x40c0394	BSC_fsetattrlist
+0x40c0398	BSC_poll
+0x40c039c	BSC_watchevent
+0x40c03a0	BSC_waitevent
+0x40c03a4	BSC_modwatch
+0x40c03a8	BSC_getxattr
+0x40c03ac	BSC_fgetxattr
+0x40c03b0	BSC_setxattr
+0x40c03b4	BSC_fsetxattr
+0x40c03b8	BSC_removexattr
+0x40c03bc	BSC_fremovexattr
+0x40c03c0	BSC_listxattr
+0x40c03c4	BSC_flistxattr
+0x40c03c8	BSC_fsctl
+0x40c03cc	BSC_initgroups
+0x40c03d0	BSC_posix_spawn
+0x40c03d4	BSC_ffsctl
+0x40c03d8	BSC_#246
+0x40c03dc	BSC_nfsclnt
+0x40c03e0	BSC_fhopen
+0x40c03e4	BSC_#249
+0x40c03e8	BSC_minherit
+0x40c03ec	BSC_semsys
+0x40c03f0	BSC_msgsys
+0x40c03f4	BSC_shmsys
+0x40c03f8	BSC_semctl
+0x40c03fc	BSC_semget
+0x40c0400	BSC_semop
+0x40c0404	BSC_#257
+0x40c0408	BSC_msgctl
+0x40c040c	BSC_msgget
+0x40c0410	BSC_msgsnd
+0x40c0414	BSC_msgrcv
+0x40c0418	BSC_shmat
+0x40c041c	BSC_shmctl
+0x40c0420	BSC_shmdt
+0x40c0424	BSC_shmget
+0x40c0428	BSC_shm_open
+0x40c042c	BSC_shm_unlink
+0x40c0430	BSC_sem_open
+0x40c0434	BSC_sem_close
+0x40c0438	BSC_sem_unlink
+0x40c043c	BSC_sem_wait
+0x40c0440	BSC_sem_trywait
+0x40c0444	BSC_sem_post
+0x40c0448	BSC_sem_getvalue
+0x40c044c	BSC_sem_init
+0x40c0450	BSC_sem_destroy
+0x40c0454	BSC_open_extended
+0x40c0458	BSC_umask_extended
+0x40c045c	BSC_stat_extended
+0x40c0460	BSC_lstat_extended
+0x40c0464	BSC_fstat_extended
+0x40c0468	BSC_chmod_extended
+0x40c046c	BSC_fchmod_extended
+0x40c0470	BSC_access_extended
+0x40c0474	BSC_settid
+0x40c0478	BSC_gettid
+0x40c047c	BSC_setsgroups
+0x40c0480	BSC_getsgroups
+0x40c0484	BSC_setwgroups
+0x40c0488	BSC_getwgroups
+0x40c048c	BSC_mkfifo_extended
+0x40c0490	BSC_mkdir_extended
+0x40c0494	BSC_identitysvc
+0x40c0498	BSC_shared_region_chk_np
+0x40c049c	BSC_shared_region_map_np
+0x40c04a0	BSC_vm_pressure_monitor
+0x40c04a4	BSC_psynch_rw_longrdlock
+0x40c04a8	BSC_psynch_rw_yieldwrlock
+0x40c04ac	BSC_psynch_rw_downgrade
+0x40c04b0	BSC_psynch_rw_upgrade
+0x40c04b4	BSC_psynch_mutexwait
+0x40c04b8	BSC_psynch_mutexdrop
+0x40c04bc	BSC_psynch_cvbroad
+0x40c04c0	BSC_psynch_cvsignal
+0x40c04c4	BSC_psynch_cvwait
+0x40c04c8	BSC_psynch_rw_rdlock
+0x40c04cc	BSC_psynch_rw_wrlock
+0x40c04d0	BSC_psynch_rw_unlock
+0x40c04d4	BSC_psynch_rw_unlock2
+0x40c04d8	BSC_getsid
+0x40c04dc	BSC_settid_with_pid
+0x40c04e0	BSC_psynch_cvclrprepost
+0x40c04e4	BSC_aio_fsync
+0x40c04e8	BSC_aio_return
+0x40c04ec	BSC_aio_suspend
+0x40c04f0	BSC_aio_cancel
+0x40c04f4	BSC_aio_error
+0x40c04f8	BSC_aio_read
+0x40c04fc	BSC_aio_write
+0x40c0500	BSC_lio_listio
+0x40c0504	BSC_obs_pthread_cond_wait
+0x40c0508	BSC_iopolicysys
+0x40c050c	BSC_process_policy
+0x40c0510	BSC_mlockall
+0x40c0514	BSC_munlockall
+0x40c0518	BSC_#326
+0x40c051c	BSC_issetugid
+0x40c0520	BSC_pthread_kill
+0x40c0524	BSC_pthread_sigmask
+0x40c0528	BSC_sigwait
+0x40c052c	BSC_disable_threadsignal
+0x40c0530	BSC_pthread_markcancel
+0x40c0534	BSC_pthread_canceled
+0x40c0538	BSC_semwait_signal
+0x40c053c	BSC_obs_utrace
+0x40c0540	BSC_proc_info
+0x40c0544	BSC_sendfile
+0x40c0548	BSC_stat64
+0x40c054c	BSC_fstat64
+0x40c0550	BSC_lstat64
+0x40c0554	BSC_stat64_extended
+0x40c0558	BSC_lstat64_extended
+0x40c055c	BSC_fstat64_extended
+0x40c0560	BSC_getdirentries64
+0x40c0564	BSC_statfs64
+0x40c0568	BSC_fstatfs64
+0x40c056c	BSC_getfsstat64
+0x40c0570	BSC_pthread_chdir
+0x40c0574	BSC_pthread_fchdir
+0x40c0578	BSC_audit
+0x40c057c	BSC_auditon
+0x40c0580	BSC_#352
+0x40c0584	BSC_getauid
+0x40c0588	BSC_setauid
+0x40c058c	BSC_getaudit
+0x40c0590	BSC_setaudit
+0x40c0594	BSC_getaudit_addr
+0x40c0598	BSC_setaudit_addr
+0x40c059c	BSC_auditctl
+0x40c05a0	BSC_bsdthread_create
+0x40c05a4	BSC_bsdthread_terminate
+0x40c05a8	BSC_kqueue
+0x40c05ac	BSC_kevent
+0x40c05b0	BSC_lchown
+0x40c05b4	BSC_stack_snapshot
+0x40c05b8	BSC_bsdthread_register
+0x40c05bc	BSC_workq_open
+0x40c05c0	BSC_workq_kernreturn
+0x40c05c4	BSC_kevent64
+0x40c05c8	BSC_obs_semwait_signal
+0x40c05cc	BSC_obs_semwait_signal_nocancel
+0x40c05d0	BSC_thread_selfid
+0x40c05d4	BSC_#373
+0x40c05d8	BSC_#374
+0x40c05dc	BSC_#375
+0x40c05e0	BSC_#376
+0x40c05e4	BSC_#377
+0x40c05e8	BSC_#378
+0x40c05ec	BSC_#379
+0x40c05f0	BSC_mac_execve
+0x40c05f4	BSC_mac_syscall
+0x40c05f8	BSC_mac_get_file
+0x40c0600	BSC_mac_get_link
+0x40c0604	BSC_mac_set_link
+0x40c0608	BSC_mac_get_proc
+0x40c060c	BSC_mac_set_proc
+0x40c0610	BSC_mac_get_fd
+0x40c0614	BSC_mac_set_fd
+0x40c0618	BSC_mac_get_pid
+0x40c061c	BSC_mac_get_lcid
+0x40c0620	BSC_mac_get_lctx
+0x40c0624	BSC_mac_set_lctx
+0x40c0628	BSC_setlcid
+0x40c062c	BSC_getlcid
+0x40c0630	BSC_read_nocancel
+0x40c0634	BSC_write_nocancel
+0x40c0638	BSC_open_nocancel
+0x40c063c	BSC_close_nocancel
+0x40c0640	BSC_wait4_nocancel
+0x40c0644	BSC_recvmsg_nocancel
+0x40c0648	BSC_sendmsg_nocancel
+0x40c064c	BSC_recvfrom_nocancel
+0x40c0650	BSC_accept_nocancel
+0x40c0654	BSC_msync_nocancel
+0x40c0658	BSC_fcntl_nocancel
+0x40c065c	BSC_select_nocancel
+0x40c0660	BSC_fsync_nocancel
+0x40c0664	BSC_connect_nocancel
+0x40c0668	BSC_sigsuspend_nocancel
+0x40c066c	BSC_readv_nocancel
+0x40c0670	BSC_writev_nocancel
+0x40c0674	BSC_sendto_nocancel
+0x40c0678	BSC_pread_nocancel
+0x40c067c	BSC_pwrite_nocancel
+0x40c0680	BSC_waitid_nocancel
+0x40c0684	BSC_poll_nocancel
+0x40c0688	BSC_msgsnd_nocancel
+0x40c068c	BSC_msgrcv_nocancel
+0x40c0690	BSC_sem_wait_nocancel
+0x40c0694	BSC_aio_suspend_nocancel
+0x40c0698	BSC_sigwait_nocancel
+0x40c069c	BSC_semwait_signal_nocancel
+0x40c06a0	BSC_mac_mount
+0x40c06a4	BSC_mac_get_mount
+0x40c06a8	BSC_mac_getfsstat
+0x40c06ac	BSC_fsgetpath
+0x40c06b0	BSC_audit_session
+0x40c06b4	BSC_audit_session_join
+0x40c06b8	BSC_fileport_makeport
+0x40c06bc	BSC_fileport_makefd
+0x40c06c0	BSC_audit_session_port
+0x40c06c4	BSC_pid_suspend
+0x40c06c8	BSC_pid_resume
+0x40c06cc	BSC_pid_hibernate
+0x40c06d0	BSC_pid_shutdown_sockets
+0x40c06d4	BSC_shared_region_slide_np
+0x40c06fc	BSC_shared_region_map_and_slide_np
+0x40e0104	BSC_msync_extended_info
+0x40e0264	BSC_pread_extended_info
+0x40e0268	BSC_pwrite_extended_info
+0x40e0314	BSC_mmap_extended_info
+0x40f0314	BSC_mmap_extended_info2
+0x5000004	INTC_Handler
+0x5010004	WL_CheckForWork
+0x5010008	WL_RunEventSources
+0x5020004	IES_client
+0x5020008	IES_latency
+0x502000c	IES_sema
+0x5020010	IES_intctxt
+0x5020018	IES_action
+0x502001c	IES_filter
+0x5030004	TES_client
+0x5030008	TES_latency
+0x503000c	TES_sema
+0x5030010	TES_action
+0x5040004	CQ_client
+0x5040008	CQ_latency
+0x504000c	CQ_sema
+0x5040010	CQ_psema
+0x5040014	CQ_plock
+0x5040018	CG_action
+0x5080004	IOSERVICE_BUSY
+0x5080008	IOSERVICE_NONBUSY
+0x508000c	IOSERVICE_MODULESTALL
+0x5080010	IOSERVICE_MODULEUNSTALL
+0x5080014	IOSERVICE_TERM_PHASE1
+0x5080018	IOSERVICE_TERM_REQUEST_OK
+0x508001c	IOSERVICE_TERM_REQUEST_FAIL
+0x5080020	IOSERVICE_TERM_SCHEDULE_STOP
+0x5080024	IOSERVICE_TERM_SCHEDULE_FINALIZE
+0x5080028	IOSERVICE_TERM_WILL
+0x508002c	IOSERVICE_TERM_DID
+0x5080030	IOSERVICE_TERM_DID_DEFER
+0x5080034	IOSERVICE_TERM_FINALIZE
+0x5080038	IOSERVICE_TERM_STOP
+0x508003c	IOSERVICE_TERM_STOP_NOP
+0x5080040	IOSERVICE_TERM_STOP_DEFER
+0x5080044	IOSERVICE_TERM_DONE
+0x5080048       IOSERVICE_KEXTD_ALIVE
+0x508004C       IOSERVICE_KEXTD_READY
+0x5080050       IOSERVICE_REGISTRY_QUIET
+0x5100004	PM_SetParent
+0x5100008	PM_AddChild
+0x510000c	PM_RemoveChild
+0x5100010	PM_CtrlDriver
+0x5100014	PM_CtrlDrvrE1
+0x5100018	PM_CtrlDrvrE2
+0x510001c	PM_CtrlDrvrE3
+0x5100020	PM_CtrlDrvrE4
+0x5100024	PM_IntDriver
+0x5100028	PM_AckE1
+0x510002c	PM_ChildAck
+0x5100030	PM_DriverAck
+0x5100034	PM_AckE2
+0x5100038	PM_AckE3
+0x510003c	PM_AckE4
+0x5100040	PM_DrvrAckSPwr
+0x5100044	PM_WillChange
+0x5100048	PM_DidChange
+0x510004c	PM_ReqstDomain
+0x5100050	PM_MakeUsable
+0x5100054	PM_ChangeTo
+0x5100058	PM_ChngeToPriv
+0x510005c	PM_SetAggrssvs
+0x5100060	PM_CritclTemp
+0x5100064	PM_OverrideOn
+0x5100068	PM_OverrideOff
+0x510006c	PM_EnqueueErr
+0x5100070	PM_CollapseQ
+0x5100074	PM_ChangeDone
+0x5100078	PM_CtrlDrvTrdy
+0x510007c	PM_IntDrvrTrdy
+0x5100080	PM_StartAckTmr
+0x5100084	PM_ParentChnge
+0x5100088	PM_AmndPrnChng
+0x510008c	PM_DeviceChnge
+0x5100090	PM_ReqDenied
+0x5100094	PM_CtrlDrvrE45
+0x5100098	PM_PrgrmHrdwre
+0x510009c	PM_InfDrvrPre
+0x51000a0	PM_InfDrvrPost
+0x51000a4	PM_RemoveDrivr
+0x51000a8	PM_IdlTimerPrd
+0x51000ac	PM_SystemWake
+0x51000b0	PM_AckE5
+0x51000b4	PM_ClientAck
+0x51000b8	PM_ClientTardy
+0x51000bc	PM_ClientCancl
+0x51000c0	PM_ClientNotfy
+0x51000c4	PM_AppNotify
+0x5230000	HID_Unexpected
+0x5230004	HID_KeyboardLEDThreadTrigger
+0x5230008	HID_KeyboardLEDThreadActive
+0x523000c	HID_KeyboardSetParam
+0x5230010	HID_KeyboardCapsThreadTrigger
+0x5230014	HID_KeyboardCapsThreadActive
+0x5230018	HID_PostEvent
+0x523001c	HID_NewUserClient
+0x5230020	HID_InturruptReport
+0x5230024	HID_DispatchScroll
+0x5230028	HID_DispatchRelativePointer
+0x523002c	HID_DispatchAbsolutePointer
+0x5230030	HID_DispatchKeyboard
+0x5230034	HID_EjectCallback
+0x5230038	HID_CapsCallback
+0x523003c	HID_#3c
+0x523004c	HID_#4c
+0x5310004	CPUPM_PSTATE
+0x5310008	CPUPM_IDLE_CSTATE
+0x531000c	CPUPM_IDLE_HALT
+0x5310010	CPUPM_IDLE_LOOP
+0x5310014	CPUPM_HPET_START
+0x5310018	CPUPM_HPET_END
+0x531001c	CPUPM_HPET_INTR
+0x5310020	CPUPM_PSTATE_HW
+0x5310024	CPUPM_PSTATE_LIMIT
+0x5310028	CPUPM_PSTATE_PARK
+0x531002c	CPUPM_PSTATE_START
+0x5310030	CPUPM_PSTATE_PAUSE
+0x5310034	CPUPM_PSTATE_RESUME
+0x5310038	CPUPM_PSTATE_DOWN
+0x531003c	CPUPM_PSTATE_UP
+0x5310040	CPUPM_PSTATE_NORM
+0x5310044	CPUPM_PSTATE_FORCE
+0x5310048	CPUPM_PSTATE_TIMEOUT
+0x531004c	CPUPM_PSTATE_SETTO
+0x5310050	CPUPM_SET_DEADLINE
+0x5310054	CPUPM_GET_DEADLINE
+0x5310058	CPUPM_DEADLINE
+0x531005c	CPUPM_IDLE_SNOOP
+0x5310060	CPUPM_IDLE_LATENCY
+0x5310064	CPUPM_IDLE_WAKEUP
+0x5310068	CPUPM_IDLE_SW_WAKEUP
+0x531006c	CPUPM_IDLE_SELECT
+0x5310070	CPUPM_IDLE_SELECTED
+0x5310074	CPUPM_IDLE_INTSKIP
+0x5310078	CPUPM_IDLE_LOCK
+0x531007c	CPUPM_IDLE_UNLOCK
+0x5310080	CPUPM_IDLE_NO_HPET
+0x5310084	CPUPM_FI_UP
+0x5310088	CPUPM_FI_UP_CPU
+0x531008c	CPUPM_FI_MP
+0x5310090	CPUPM_FI_MP_CPU
+0x5310094	CPUPM_FI_PAUSE
+0x5310098	CPUPM_FI_RUN
+0x531009c	CPUPM_PROC_HALT
+0x53100a0	CPUPM_TRACE_STOPPED
+0x53100a4	CPUPM_HPET_INT_LOCK
+0x53100a8	CPUPM_HPET_INT_UNLOCK
+0x53100ac	CPUPM_HPET_TRY_AGAIN
+0x53100b0	CPUPM_HPET_SETDEADLINE
+0x53100b4	CPUPM_LOCK_HELDBY
+0x53100b8	CPUPM_HPET_DELTA
+0x53100bc	CPUPM_HPET_TOO_LATE
+0x53100c0	CPUPM_HPET_NO_DEADLINE
+0x53100c4	CPUPM_IDLE
+0x53100c8	CPUPM_CORE_CHK_DEADLINE
+0x53100cc	CPUPM_SET_HPET_DEADLINE
+0x53100d0	CPUPM_HPET_READ
+0x53100d4	CPUPM_TIME_ADJUST
+0x53100d8	CPUPM_IDLE_MWAIT
+0x53100dc	CPUPM_FI_SLAVE_IDLE
+0x53100e0	CPUPM_FI_SLAVE_BLOCK
+0x53100e4	CPUPM_FI_MAST_SIGNAL
+0x53100e8	CPUPM_CORE_DEADLINE
+0x53100ec	CPUPM_IDLE_FAST
+0x53100f0	CPUPM_IDLE_PAUSE
+0x53100f4	CPUPM_IDLE_SHORT
+0x53100f8	CPUPM_IDLE_NORMAL
+0x53100fc	CPUPM_IDLE_SPURIOUS
+0x5310100	CPUPM_PSTATE_INFO
+0x5310104	CPUPM_PSTATE_INFO_HW
+0x5310108	CPUPM_PSTATE_FSM
+0x531010c	CPUPM_PSTATE_FSM_STEP
+0x5310110	CPUPM_PSTATE_FSM_EVAL
+0x5310114	CPUPM_PSTATE_FSM_MAP
+0x5310118	CPUPM_CPUSTEP_STEP
+0x531011c	CPUPM_CPUSTEP_STEP_UP
+0x5310120	CPUPM_CPUSTEP_STEP_DOWN
+0x5310124	CPUPM_CPUSTEP_AVAIL
+0x5310128	CPUPM_CPUSTEP_AVAIL_STEP
+0x531012c	CPUPM_CPUSTEP_AVAIL_CHNG
+0x5310130	CPUPM_CPUSTEP_LOAD
+0x5310134	CPUPM_CPUSTEP_START
+0x5310138	CPUPM_CPUSTEP_STOP
+0x531013c	CPUPM_CPUSTEP_COPY
+0x5310140	CPUPM_CPUSTEP_CLEAR
+0x5310144	CPUPM_CPUSTEP_RUNCOUNT
+0x5310148	CPUPM_CPUSTEP_WAKEUP
+0x531014c	CPUPM_PSTATE_TRACE
+0x5310150	CPUPM_PSTATE_EVENT
+0x5310154	CPUPM_IDLE_RATE
+0x5310158	CPUPM_PSTATE_FSM_RESUME
+0x531015c	CPUPM_PSTATE_FSM_PAUSE
+0x5310160	CPUPM_PSTATE_INSTRUCTION
+0x5310164	CPUPM_PSTATE_INST_ARG
+0x5310168	CPUPM_PSTATE_STACK_PUSH
+0x531016c	CPUPM_PSTATE_STACK_POP
+0x5310170	CPUPM_IDLE_PREFIRE
+0x5310174	CPUPM_PSTATE_VERIFY
+0x5310178	CPUPM_TIMER_MIGRATE
+0x531017c	CPUPM_RING_LIMIT
+0x5310180	CPUPM_CONTEXT_PAUSE
+0x5310184	CPUPM_CONTEXT_RESUME
+0x5310188	CPUPM_CONTEXT_RESUME_INFO
+0x531018c	CPUPM_THREAD_RESUME
+0x5310190	CPUPM_THREAD_PAUSE_INFO
+0x5310194	CPUPM_THREAD_RESUME_INFO
+0x5310198	CPUPM_TEST_MASTER_INFO
+0x531019c	CPUPM_TEST_SLAVE_INFO
+0x53101a0	CPUPM_TEST_INFO
+0x53101a4	CPUPM_TEST_RUN_INFO
+0x53101a8	CPUPM_TEST_SLAVE_INFO
+0x5330000	HIBERNATE
+0x5330004	HIBERNATE_WRITE_IMAGE
+0x5330008	HIBERNATE_MACHINE_INIT
+0x533000c	HIBERNATE_FLUSH_MEMORY
+0x5330010	HIBERNATE_flush_queue
+0x5330014	HIBERNATE_flush_wait
+0x5330018	HIBERNATE_flush_in_progress
+0x533001c	HIBERNATE_flush_bufs
+0x5330020	HIBERNATE_page_list_setall
+0x5330024	HIBERNATE_aes_decrypt_cbc
+0x7000004	TRACE_DATA_NEWTHREAD
+0x7000008	TRACE_DATA_EXEC
+0x7010004	TRACE_STRING_NEWTHREAD
+0x7010008	TRACE_STRING_EXEC
+0x7020000	TRACE_PANIC
+0x7020004	TRACE_TIMESTAMPS
+0x7020008	TRACE_LOST_EVENTS
+0x702000c	TRACE_WRITING_EVENTS
+0x8000000	USER_TEST
+0x8000004	USER_run
+0x8000008	USER_join
+0x800000c	USER_create
+0x8000010	USER_pthread_create
+0x8000014	USER_pthread_exit
+0x8000018	USER_pthread_join
+0x800001c	USER_pthread_run
+0x8000020	USER_pthread_cleanup_push
+0x8000100	FW_underrun
+0x8000104	FW_interrupt
+0x8000108	FW_workloop
+0x8010400	F_DLIL_Input
+0x8010800	F_DLIL_Output
+0x8010c00	F_DLIL_IfOut
+0x8040000	USER_STOP
+0x9000084	wq_deallocate_stack
+0x9000088	wq_allocate_stack
+0x9008070	wq_run_item
+0x9008074	wq_clean_thread
+0x9008078	wq_post_done
+0x900807c	wq_stk_cleanup
+0x9008080	wq_tsd_cleanup
+0x9008084	wq_tsd_destructor
+0x9008088	wq_pthread_exit
+0x900808c	wq_workqueue_exit
+0xa000100	P_CS_Read
+0xa000110	P_CS_Write
+0xa000180	P_CS_ReadDone
+0xa000190	P_CS_WriteDone
+0xa000200	P_CS_ReadChunk
+0xa000210	P_CS_WriteChunk
+0xa000280	P_CS_ReadChunkDone
+0xa000290	P_CS_WriteChunkDone
+0xa000300	P_CS_ReadCrypto
+0xa000310	P_CS_WriteCrypto
+0xa000500	P_CS_Originated_Read
+0xa000510	P_CS_Originated_Write
+0xa000580	P_CS_Originated_ReadDone
+0xa000590	P_CS_Originated_WriteDone
+0xa000900	P_CS_MetaRead
+0xa000910	P_CS_MetaWrite
+0xa000980	P_CS_MetaReadDone
+0xa000990	P_CS_MetaWriteDone
+0xa008000	P_CS_SYNC_DISK
+0xa008004	P_CS_WaitForBuffer
+0xa008008	P_CS_NoBuffer
+0xb000000	AFP_asp_tcp_usr_send
+0xb000004	AFP_asp_tcp_usr_send_after_Request
+0xb000008	AFP_asp_tcp_usr_send_after_FindDSIReq
+0xb00000c	AFP_asp_tcp_usr_send_after_Reply
+0xb000010	AFP_asp_tcp_slowtimo
+0xb000014	AFP_asp_tcp_usr_control
+0xb000018	AFP_asp_tcp_fasttimo
+0xb000020	AFP_Send
+0xb000024	AFP_Send_before_sosend
+0xb000028	AFP_Send_after_sosend
+0xb00002c	AFP_Send_before_write
+0xb000030	AFP_Send_after_write
+0xb000040	AFP_Reply
+0xb000044	AFP_Reply_rcvdAlready
+0xb000048	AFP_Reply_before_RcvLock
+0xb00004c	AFP_Reply_fail_RcvLock
+0xb000050	AFP_Reply_before_ReadDSIHdr
+0xb000054	AFP_Reply_after_ReadDSIHdr
+0xb000058	AFP_Reply_fail_ReadDSIHdr
+0xb00005c	AFP_Reply_after_FindDSIReqInfo
+0xb000060	AFP_Reply_SetAFPCmd
+0xb000064	AFP_Reply_before_ReadDSIPacket
+0xb000068	AFP_Reply_setRcvdReplyLen
+0xb000070	AFP_SendReply
+0xb000080	AFP_CreateDSIHeader
+0xb000084	AFP_CreateDSIHeader_after_GetReqID
+0xb000090	AFP_Request
+0xb0000a0	AFP_ReceiveLock
+0xb0000b0	AFP_ReceiveWakeUp
+0xb0000c0	AFP_ReceiveUnLock
+0xb0000e0	AFP_SendLock
+0xb0000e4	AFP_SendUnLock
+0xb0000f0	AFP_SendQueueLock
+0xb000100	AFP_SendQueueUnLock
+0xb000110	AFP_ReadDSIHeader
+0xb000120	AFP_Receive
+0xb000124	AFP_Receive_before_sorcv
+0xb000128	AFP_Receive_after_sorcv
+0xb000130	AFP_ReadDSIPacket
+0xb000140	AFP_DoCopyOut
+0xb000150	AFP_DoCopyIn
+0xb000160	AFP_CheckRcvTickle
+0xb000164	AFP_CheckRcvTickleTO
+0xb000170	AFP_CheckSendTickle
+0xb000180	AFP_CheckIncomingPkts
+0xb000190	AFP_ProcessOptions
+0xb000200	AFP_FindDSIReqInfo
+0xb000204	AFP_FindDSIReqInfo_foundReqInfo
+0xb000208	AFP_FindDSIReqInfo_flags
+0xb00020c	AFP_FindDSIReqLeave
+0xb000210	AFP_UsrDisconnect
+0xc000000	AFPVFS_UserReply
+0xc000004	AFPVFS_UserReplyGetMbuf
+0xc000008	AFPVFS_UserReplysosend
+0xc000010	AFPVFS_UserCommand
+0xc000018	AFPVFS_UserCommandsosend
+0xc000020	AFPVFS_ReadFork
+0xc000024	AFPVFS_ReadForkFillQPB
+0xc000028	AFPVFS_ReadForkNbrRequests
+0xc00002c	AFPVFS_ReadForkSendQPB
+0xc000030	AFPVFS_ReadForkSendErr
+0xc000040	AFPVFS_ReadForkGetReply
+0xc000044	AFPVFS_ReadForkGetReplyResult
+0xc000050	AFPVFS_WriteFork
+0xc000054	AFPVFS_WriteForkFillQPB
+0xc000058	AFPVFS_WriteForkNbrRequests
+0xc00005c	AFPVFS_WriteForkSendQPB
+0xc000060	AFPVFS_WriteForkSendErr
+0xc000064	AFPVFS_WriteForkGetReply
+0xc000068	AFPVFS_WriteForkGetReplyResult
+0xc000070	AFPVFS_GetAttr
+0xc000080	AFPVFS_SetAttr
+0xc000090	AFPVFS_GetAttrList
+0xc0000a0	AFPVFS_SetAttrList
+0xc0000b0	AFPVFS_FSCTL
+0xc0000c0	AFPVFS_LookUp
+0xc0000d0	AFPVFS_CacheLookUp
+0xc0000e0	AFPVFS_Write
+0xc0000e4	AFPVFS_WriteNoCluster
+0xc0000e8	AFPVFS_WriteDone
+0xc0000f0	AFPVFS_DoWrite
+0xc000100	AFPVFS_Lock
+0xc000110	AFPVFS_Statfs
+0xc000120	AFPVFS_Sync
+0xc000130	AFPVFS_VGet
+0xc000140	AFPVFS_FlushFiles
+0xc000150	AFPVFS_Create
+0xc000160	AFPVFS_Mknod
+0xc000170	AFPVFS_Open
+0xc000180	AFPVFS_Close
+0xc000190	AFPVFS_Access
+0xc000194	AFPVFS_AccessUID
+0xc000198	AFPVFS_AccessGID
+0xc00019c	AFPVFS_AccessWID
+0xc0001a0	AFPVFS_Writeperm
+0xc0001b0	AFPVFS_Chmod
+0xc0001c0	AFPVFS_Chflags
+0xc0001d0	AFPVFS_Exchange
+0xc0001e0	AFPVFS_Chid
+0xc0001f0	AFPVFS_Fsync
+0xc000200	AFPVFS_Remove
+0xc000210	AFPVFS_Rename
+0xc000220	AFPVFS_Copyfile
+0xc000230	AFPVFS_Mkdir
+0xc000240	AFPVFS_Symlink
+0xc000250	AFPVFS_Readdir
+0xc000260	AFPVFS_Readdirattr
+0xc000264	AFPVFS_Readdirattr1
+0xc000268	AFPVFS_Readdirattr2
+0xc00026c	AFPVFS_Readdirattr3
+0xc000270	AFPVFS_Readlink
+0xc000280	AFPVFS_Abortop
+0xc000290	AFPVFS_Inactive
+0xc0002a0	AFPVFS_Reclaim
+0xc0002b0	AFPVFS_Unlock
+0xc0002c0	AFPVFS_Islocked
+0xc0002d0	AFPVFS_Pathconf
+0xc0002e0	AFPVFS_Update
+0xc0002f0	AFPVFS_Makenode
+0xc000300	AFPVFS_Allocate
+0xc000310	AFPVFS_Search
+0xc000320	AFPVFS_Reconnect
+0xc0003e0	AFPVFS_Rmdir
+0xc0003f0	AFPVFS_Vinit
+0x11000000	DNC_PURGE1
+0x11000004	DNC_PURGE2
+0x11000008	DNC_FOUND
+0x1100000c	DNC_FAILED
+0x11000010	DNC_ENTER
+0x11000014	DNC_remove_name
+0x11000018	DNC_ENTER_CREATE
+0x1100001c	DNC_update_identity
+0x11000020	DNC_PURGE
+0x11000030	DNC_LOOKUP_PATH
+0x11000034	HFS_vnop_lookup
+0x11000038	NAMEI
+0x11000048	VFS_SUSPENDED
+0x1100004C	VFS_CACHEPURGE
+0x11000050	VFS_CACHELOOKUP_SUCCESS
+0x11000054	VFS_CACHELOOKUP_FAILED
+0x11000058	VFS_CACHELOOKUP_ENTER
+0x1100005c	VFS_CACHELOOKUP
+0x11000060	VFS_GETIOCOUNT
+0x11000064	VFS_vnode_recycle
+0x11000068	VFS_vnode_reclaim
+0x11000070	HFS_getnewvnode1
+0x11000074	HFS_getnewvnode2
+0x11000078	HFS_chash_getcnode
+0x1100007c	HFS_vfs_getpath
+0x11000080	VOLFS_lookup
+0x11000084	lookup_mountedhere
+0x11000088	VNOP_LOOKUP
+0x1100008c	HFS_chash_getvnode
+0x11000090	VFS_vnode_rele
+0x11000094	VFS_vnode_put
+0x11004100	NC_lock_shared
+0x11004104	NC_lock_exclusive
+0x11004108	NC_unlock
+0x1f000000	DYLD_initialize
+0x1f010000	DYLD_CALL_image_init_routine
+0x1f010004	DYLD_CALL_dependent_init_routine
+0x1f010008	DYLD_CALL_lazy_init_routine
+0x1f01000c	DYLD_CALL_module_init_for_library
+0x1f010010	DYLD_CALL_module_init_for_object
+0x1f010014	DYLD_CALL_module_terminator_for_object
+0x1f010018	DYLD_CALL_module_init_for_dylib
+0x1f01001c	DYLD_CALL_mod_term_func
+0x1f010020	DYLD_CALL_object_func
+0x1f010024	DYLD_CALL_library_func
+0x1f010028	DYLD_CALL_add_image_func
+0x1f01002c	DYLD_CALL_remove_image_func
+0x1f010030	DYLD_CALL_link_object_module_func
+0x1f010034	DYLD_CALL_link_library_module_func
+0x1f010038	DYLD_CALL_link_module_func
+0x1f020000	DYLD_lookup_and_bind_with_hint
+0x1f020004	DYLD_lookup_and_bind_fully
+0x1f020008	DYLD_link_module
+0x1f02000c	DYLD_ulink_module
+0x1f020010	DYLD_bind_objc_module
+0x1f020014	DYLD_bind_fully_image_containing_address
+0x1f020018	DYLD_make_delayed_module_initializer_calls
+0x1f02001c	DYLD_NSNameOfSymbol
+0x1f020020	DYLD_NSAddressOfSymbol
+0x1f020024	DYLD_NSModuleForSymbol
+0x1f020028	DYLD_NSLookupAndBindSymbolWithHint
+0x1f02002c	DYLD_NSLookupSymbolInModule
+0x1f020030	DYLD_NSLookupSymbolInImage
+0x1f020034	DYLD_NSIsSymbolNameDefined
+0x1f020038	DYLD_NSIsSymbolNameDefinedWithHint
+0x1f02003c	DYLD_NSIsSymbolNameDefinedInImage
+0x1f020040	DYLD_NSNameOfModule
+0x1f020044	DYLD_NSLibraryNameForModule
+0x1f020048	DYLD_NSAddLibrary
+0x1f02004c	DYLD_NSAddLibraryWithSearching
+0x1f020050	DYLD_NSAddImage
+0x1f030000	DYLD_lookup_symbol
+0x1f030004	DYLD_bind_lazy_symbol_reference
+0x1f030008	DYLD_bind_symbol_by_name
+0x1f03000c	DYLD_link_in_need_modules
+0x1f040000	DYLD_map_image
+0x1f040004	DYLD_load_executable_image
+0x1f040008	DYLD_load_library_image
+0x1f04000c	DYLD_map_library_image
+0x1f040010	DYLD_map_bundle_image
+0x1f040014	DYLD_load_dependent_libraries
+0x1f040018	DYLD_notify_prebinding_agent
+0x1ff10000	SCROLL_BEGIN_obs
+0x1ff10100	SCROLL_END_obs
+0x1ff20000	BOOT_BEGIN_obs
+0x1ff20100	BOOT_END_obs
+0x1ff20400	APP_DidActivateWindow_obs
+0x1ff20500	TOOL_PRIVATE_1_obs
+0x1ff20504	TOOL_PRIVATE_2_obs
+0x1ff20508	TOOL_PRIVATE_3_obs
+0x1ff2050c	TOOL_PRIVATE_4_obs
+0x1fff0000	LAUNCH_START_FINDER
+0x1fff0100	LAUNCH_START_DOCK
+0x1fff0200	LAUNCH_LSOpen
+0x1fff0204	LAUNCH_LSRegisterItem
+0x1fff0208	LAUNCH_LSGetApplicationAndFlagsForInfo
+0x1fff0300	LAUNCH_CPSLaunch
+0x1fff0304	LAUNCH_CPSRegisterwithServer
+0x1fff0308	LAUNCH_CGSCheckInNewProcess
+0x1fff030c	LAUNCH_CPSExecProcess
+0x1fff0310	LAUNCH_APP_EnterEventLoop
+0x1fff0314	LAUNCH_APP_WillOpenUntitled
+0x1fff031c	LAUNCH_APP_DidOpenUntitled
+0x1fff1000	LAUNCH_END
+0x1fffffff	LAUNCH_END
+0x20000004	RTC_sync_TBR
+0x21010000	SCROLL_BEGIN
+0x21020000	BOOT_BEGIN
+0x21030200	LOGIN_BEGIN
+0x21030204	LOGINWINDOW_LAUNCHED
+0x21030208	LOGINWINDOW_LAUNCHES_SA
+0x2103020c	LOGINWINDOW_GUI_APPEARS
+0x21030210	LOGINWINDOW_LOGIN_CLICKED
+0x21030214	LOGINWINDOW_ASKS_AUTH
+0x21030218	LOGINWINDOW_AUTH_SUCCEEDED
+0x2103021c	LOGINWINDOW_LAUNCHES_DOCK
+0x21030220	LOGINWINDOW_LAUNCHES_SUIS
+0x21030224	LOGINWINDOW_LAUNCHES_FINDER
+0x21030228	LOGINWINDOW_DOCK_LAUNCHED
+0x2103022c	LOGINWINDOW_SUIS_LAUNCHED
+0x21030230	LOGINWINDOW_FINDER_LAUNCHED
+0x21030234	LOGINWINDOW_LOGOUT_CLICKED
+0x21030238	LOGINWINDOW_QUIT_FGAPPS
+0x2103023c	LOGINWINDOW_FGAPPS_QUIT
+0x21030240	LOGINWINDOW_QUIT_SUIS
+0x21030244	LOGINWINDOW_SUIS_DIES
+0x21030248	LOGINWINDOW_QUIT_FINDER
+0x2103024c	LOGINWINDOW_FINDER_DIES
+0x21030250	LOGINWINDOW_QUIT_DOCK
+0x21030254	LOGINWINDOW_DOCK_DIES
+0x21030258	LOGINWINDOW_EXIT
+0x2103025c	LOGINWINDOW_FUS_SELUSERNAME
+0x21030260	LOGINWINDOW_FUS_SELLOGINWIND
+0x21030270	LOGIN_APPLICATION_EXECUTING
+0x21030274	LOGIN_APPLICATION_USABLE
+0x21030300	LOGIN_END
+0x21030500	LOGINWINDOW_APP_TERMINATION_REQUEST
+0x21030504	LOGINWINDOW_LOGOUT_START
+0x21030508	LOGINWINDOW_DESKTOP_UP
+0x2103050c	LOGINWINDOW_DESKTOP_UP_NOTIFICATION
+0x21040000	APP_DIDActivateWindow
+0x21050000	TOOL_PRIVATE_1
+0x21050004	TOOL_PRIVATE_2
+0x21050008	TOOL_PRIVATE_3
+0x2105000c	TOOL_PRIVATE_4
+0x21060000	LAUNCH_CPSTraceLineNum
+0x21060004	LAUNCH_CPSLaunch
+0x21060008	LAUNCH_CPSRegisterwithServer
+0x2106000c	LAUNCH_CPSCheckInNewProcess
+0x21060010	LAUNCH_CPSServerSideLaunch
+0x21060014	LAUNCH_CPSExecProcess
+0x21070000	LAUNCH_LSOpen
+0x21070004	LAUNCH_LSRegisterItem
+0x21070008	LAUNCH_LSGetApplicationAndFlagsForInfo
+0x21080000	MCX_DAEMON_START
+0x21080004	MCX_DAEMON_FINISH
+0x21080008	MCX_STARTMCX_START
+0x2108000C	MCX_STARTMCX_FINISH
+0x21080010	MCX_POSTCMP_DOCK_START
+0x21080014	MCX_POSTCMP_DOCK_FINISH
+0x21080020	MCX_POSTCMP_ENERGYSVR_START
+0x21080024	MCX_POSTCMP_ENERGYSVR_FINISH
+0x21080030	MCX_POSTCMP_LOGINITMS_START
+0x21080034	MCX_POSTCMP_LOGINITMS_FINISH
+0x21080040	MCX_CMP_COMPUTERINFO_START
+0x21080044	MCX_CMP_COMPUTERINFO_FINISH
+0x21080050	MCX_CMP_USERINFO_START
+0x21080054	MCX_CMP_USERINFO_FINISH
+0x21080060	MCX_POSTCMP_USER_START
+0x21080064	MCX_POSTCMP_USER_FINISH
+0x210800A0	MCX_MECHANISM_START
+0x210800A4	MCX_MECHANISM_FINISH
+0x210800C0	MCX_MECHANISM_PICKER_START
+0x210800C4	MCX_MECHANISM_PICKER_FINISH
+0x21080100	MCX_APPITEMS_START
+0x21080104	MCX_APPITEMS_FINISH
+0x21080200	MCX_CACHER_START
+0x21080204	MCX_CACHER_FINISH
+0x21080300	MCX_COMPOSITOR_START
+0x21080304	MCX_COMPOSITOR_FINISH
+0x21080400	MCX_DISKSETUP_START
+0x21080404	MCX_DISKSETUP_FINISH
+0x21090000	PHD_DAEMON_START
+0x21090004	PHD_DAEMON_FINISH
+0x21090010	PHD_SYNCNOW_START
+0x21090014	PHD_SYNCNOW_FINISH
+0x210b0000	TAL_APP_LAUNCH_START
+0x210b0004	TAL_APP_LAUNCH_UNSUSPENDED
+0x210b0008	TAL_APP_LAUNCH_UNTHROTTLED
+0x210b000c	TAL_APP_LAUNCH_VISIBLE
+0x210b0010	TAL_APP_LAUNCH_READY
+0x210b0014	TAL_ALL_LAUNCH_READY
+0x21800000	SMB_smbd_idle
+0x21800004	SMB_syscall_opendir
+0x21800008	SMB_syscall_readdir
+0x2180000c	SMB_syscall_seekdir
+0x21800010	SMB_syscall_telldir
+0x21800014	SMB_syscall_rewinddir
+0x21800018	SMB_syscall_mkdir
+0x2180001c	SMB_syscall_rmdir
+0x21800020	SMB_syscall_closedir
+0x21800024	SMB_syscall_open
+0x21800028	SMB_syscall_close
+0x2180002c	SMB_syscall_read
+0x21800030	SMB_syscall_pread
+0x21800034	SMB_syscall_write
+0x21800038	SMB_syscall_pwrite
+0x2180003c	SMB_syscall_lseek
+0x21800040	SMB_syscall_sendfile
+0x21800044	SMB_syscall_rename
+0x21800048	SMB_syscall_fsync
+0x2180004c	SMB_syscall_stat
+0x21800050	SMB_syscall_fstat
+0x21800054	SMB_syscall_lstat
+0x21800058	SMB_syscall_unlink
+0x2180005c	SMB_syscall_chmod
+0x21800060	SMB_syscall_fchmod
+0x21800064	SMB_syscall_chown
+0x21800068	SMB_syscall_fchown
+0x2180006c	SMB_syscall_chdir
+0x21800070	SMB_syscall_getwd
+0x21800074	SMB_syscall_utime
+0x21800078	SMB_syscall_ftruncate
+0x2180007c	SMB_syscall_fcntl_lock
+0x21800080	SMB_syscall_kernel_flock
+0x21800084	SMB_syscall_fcntl_getlock
+0x21800088	SMB_syscall_readlink
+0x2180008c	SMB_syscall_symlink
+0x21800090	SMB_syscall_link
+0x21800094	SMB_syscall_mknod
+0x21800098	SMB_syscall_realpath
+0x2180009c	SMB_syscall_get_quota
+0x218000a0	SMB_syscall_set_quota
+0x218000a4	SMB_smbmkdir
+0x218000a8	SMB_smbrmdir
+0x218000ac	SMB_smbopen
+0x218000b0	SMB_smbcreate
+0x218000b4	SMB_smbclose
+0x218000b8	SMB_smbflush
+0x218000bc	SMB_smbunlink
+0x218000c0	SMB_smbmv
+0x218000c4	SMB_smbgetatr
+0x218000c8	SMB_smbsetatr
+0x218000cc	SMB_smbread
+0x218000d0	SMB_smbwrite
+0x218000d4	SMB_smblock
+0x218000d8	SMB_smbunlock
+0x218000dc	SMB_smbctemp
+0x218000e0	SMB_smbmknew
+0x218000e4	SMB_smbcheckpath
+0x218000e8	SMB_smbexit
+0x218000ec	SMB_smblseek
+0x218000f0	SMB_smblockread
+0x218000f4	SMB_smbwriteunlock
+0x218000f8	SMB_smbreadbraw
+0x218000fc	SMB_smbreadbmpx
+0x21800100	SMB_smbreadbs
+0x21800104	SMB_smbwritebraw
+0x21800108	SMB_smbwritebmpx
+0x2180010c	SMB_smbwritebs
+0x21800110	SMB_smbwritec
+0x21800114	SMB_smbsetattre
+0x21800118	SMB_smbgetattre
+0x2180011c	SMB_smblockingx
+0x21800120	SMB_smbtrans
+0x21800124	SMB_smbtranss
+0x21800128	SMB_smbioctl
+0x2180012c	SMB_smbioctls
+0x21800130	SMB_smbcopy
+0x21800134	SMB_smbmove
+0x21800138	SMB_smbecho
+0x2180013c	SMB_smbwriteclose
+0x21800140	SMB_smbopenx
+0x21800144	SMB_smbreadx
+0x21800148	SMB_smbwritex
+0x2180014c	SMB_smbtrans2
+0x21800150	SMB_smbtranss2
+0x21800154	SMB_smbfindclose
+0x21800158	SMB_smbfindnclose
+0x2180015c	SMB_smbtcon
+0x21800160	SMB_smbtdis
+0x21800164	SMB_smbnegprot
+0x21800168	SMB_smbsesssetupx
+0x2180016c	SMB_smbulogoffx
+0x21800170	SMB_smbtconx
+0x21800174	SMB_smbdskattr
+0x21800178	SMB_smbsearch
+0x2180017c	SMB_smbffirst
+0x21800180	SMB_smbfunique
+0x21800184	SMB_smbfclose
+0x21800188	SMB_smbnttrans
+0x2180018c	SMB_smbnttranss
+0x21800190	SMB_smbntcreatex
+0x21800194	SMB_smbntcancel
+0x21800198	SMB_smbntrename
+0x2180019c	SMB_smbsplopen
+0x218001a0	SMB_smbsplwr
+0x218001a4	SMB_smbsplclose
+0x218001a8	SMB_smbsplretq
+0x218001ac	SMB_smbsends
+0x218001b0	SMB_smbsendb
+0x218001b4	SMB_smbfwdname
+0x218001b8	SMB_smbcancelf
+0x218001bc	SMB_smbgetmac
+0x218001c0	SMB_smbsendstrt
+0x218001c4	SMB_smbsendend
+0x218001c8	SMB_smbsendtxt
+0x218001cc	SMB_smbinvalid
+0x218001d0	SMB_pathworks_setdir
+0x218001d4	SMB_trans2_open
+0x218001d8	SMB_trans2_findfirst
+0x218001dc	SMB_trans2_findnext
+0x218001e0	SMB_trans2_qfsinfo
+0x218001e4	SMB_trans2_setfsinfo
+0x218001e8	SMB_trans2_qpathinfo
+0x218001ec	SMB_trans2_setpathinfo
+0x218001f0	SMB_trans2_qfileinfo
+0x218001f4	SMB_trans2_setfileinfo
+0x218001f8	SMB_trans2_fsctl
+0x218001fc	SMB_trans2_ioctl
+0x21800200	SMB_trans2_findnotifyfirst
+0x21800204	SMB_trans2_findnotifynext
+0x21800208	SMB_trans2_mkdir
+0x2180020c	SMB_trans2_session_setup
+0x21800210	SMB_trans2_get_dfs_referral
+0x21800214	SMB_trans2_report_dfs_inconsistancy
+0x21800218	SMB_nt_transact_create
+0x2180021c	SMB_nt_transact_ioctl
+0x21800220	SMB_nt_transact_set_security_desc
+0x21800224	SMB_nt_transact_notify_change
+0x21800228	SMB_nt_transact_rename
+0x2180022c	SMB_nt_transact_query_security_desc
+0x21800230	SMB_nt_transact_get_user_quota
+0x21800234	SMB_nt_transact_set_user_quota
+0x21800238	SMB_get_nt_acl
+0x2180023c	SMB_fget_nt_acl
+0x21800240	SMB_set_nt_acl
+0x21800244	SMB_fset_nt_acl
+0x21800248	SMB_chmod_acl
+0x2180024c	SMB_fchmod_acl
+0x21800250	SMB_name_release
+0x21800254	SMB_name_refresh
+0x21800258	SMB_name_registration
+0x2180025c	SMB_node_status
+0x21800260	SMB_name_query
+0x21800264	SMB_host_announce
+0x21800268	SMB_workgroup_announce
+0x2180026c	SMB_local_master_announce
+0x21800270	SMB_master_browser_announce
+0x21800274	SMB_lm_host_announce
+0x21800278	SMB_get_backup_list
+0x2180027c	SMB_reset_browser
+0x21800280	SMB_announce_request
+0x21800284	SMB_lm_announce_request
+0x21800288	SMB_domain_logon
+0x2180028c	SMB_sync_browse_lists
+0x21800290	SMB_run_elections
+0x21800294	SMB_election
+0x22000004	LAUNCHD_starting
+0x22000008	LAUNCHD_exiting
+0x2200000c	LAUNCHD_finding_stray_pg
+0x22000010	LAUNCHD_finding_all_strays
+0x22000014	LAUNCHD_finding_execless
+0x22000018	LAUNCHD_finding_weird_uids
+0x2200001c	LAUNCHD_data_pack
+0x22000020	LAUNCHD_data_unpack
+0x22000024	LAUNCHD_bug
+0x22000028	LAUNCHD_mach_ipc
+0x2200002c	LAUNCHD_bsd_kevent
+0x22000030	LAUNCHD_vproc_trans_incr
+0x22000034	LAUNCHD_vproc_trans_decr
+0xff000104	MSG_mach_notify_port_deleted
+0xff000114	MSG_mach_notify_port_destroyed
+0xff000118	MSG_mach_notify_no_senders
+0xff00011c	MSG_mach_notify_send_once
+0xff000120	MSG_mach_notify_dead_name
+0xff0001ec	MSG_audit_triggers
+0xff000320	MSG_host_info
+0xff000324	MSG_host_kernel_version
+0xff000328	MSG_host_page_size
+0xff00032c	MSG_mach_memory_object_memory_entry
+0xff000330	MSG_host_processor_info
+0xff000334	MSG_host_get_io_master
+0xff000338	MSG_host_get_clock_service
+0xff00033c	MSG_kmod_get_info
+0xff000340	MSG_host_zone_info
+0xff000344	MSG_host_virtual_physical_table_info
+0xff000348	MSG_host_ipc_hash_info
+0xff00034c	MSG_enable_bluebox
+0xff000350	MSG_disable_bluebox
+0xff000354	MSG_processor_set_default
+0xff000358	MSG_processor_set_create
+0xff00035c	MSG_mach_memory_object_memory_entry_64
+0xff000360	MSG_host_statistics
+0xff000364	MSG_host_request_notification
+0xff000368	MSG_host_lockgroup_info
+0xff00036c	MSG_host_statistics64
+0xff000370	MSG_mach_zone_info
+0xff000640	MSG_host_get_boot_info
+0xff000644	MSG_host_reboot
+0xff000648	MSG_host_priv_statistics
+0xff00064c	MSG_host_default_memory_manager
+0xff000650	MSG_vm_wire
+0xff000654	MSG_thread_wire
+0xff000658	MSG_vm_allocate_cpm
+0xff00065c	MSG_host_processors
+0xff000660	MSG_host_get_clock_control
+0xff000664	MSG_kmod_create
+0xff000668	MSG_kmod_destroy
+0xff00066c	MSG_kmod_control
+0xff000670	MSG_host_get_special_port
+0xff000674	MSG_host_set_special_port
+0xff000678	MSG_host_set_exception_ports
+0xff00067c	MSG_host_get_exception_ports
+0xff000680	MSG_host_swap_exception_ports
+0xff000684	MSG_host_load_symbol_table
+0xff000688	MSG_mach_vm_wire
+0xff00068c	MSG_host_processor_sets
+0xff000690	MSG_host_processor_set_priv
+0xff000694	MSG_set_dp_control_port
+0xff000698	MSG_get_dp_control_port
+0xff00069c	MSG_host_set_UNDServer
+0xff0006a0	MSG_host_get_UNDServer
+0xff0006a4	MSG_kext_request
+0xff000960	MSG_host_security_create_task_token
+0xff000964	MSG_host_security_set_task_token
+0xff000f9c	MSG_mach_gss_init_sec_context
+0xff000fa0	MSG_clock_get_time
+0xff000fa0	MSG_mach_gss_accept_sec_context
+0xff000fa4	MSG_clock_get_attributes
+0xff000fa4	MSG_mach_gss_log_error
+0xff000fa8	MSG_clock_alarm
+0xff000fa8	MSG_mach_gss_init_sec_context_v2
+0xff000fac	MSG_mach_gss_accept_sec_context_v2
+0xff000fb0	MSG_mach_gss_hold_cred
+0xff000fb4	MSG_mach_gss_unhold_cred
+0xff000ffc	MSG_lockd_request
+0xff001000	MSG_lockd_ping
+0xff001004	MSG_lockd_shutdown
+0xff0012c0	MSG_clock_set_time
+0xff0012c4	MSG_clock_set_attributes
+0xff001f40	MSG_memory_object_get_attributes
+0xff001f44	MSG_memory_object_change_attributes
+0xff001f48	MSG_memory_object_synchronize_completed
+0xff001f4c	MSG_memory_object_lock_request
+0xff001f50	MSG_memory_object_destroy
+0xff001f54	MSG_memory_object_upl_request
+0xff001f58	MSG_memory_object_super_upl_request
+0xff001f5c	MSG_memory_object_cluster_size
+0xff001f60	MSG_memory_object_page_op
+0xff001f64	MSG_memory_object_recover_named
+0xff001f68	MSG_memory_object_release_name
+0xff001f6c	MSG_memory_object_range_op
+0xff002008	MSG_upl_abort
+0xff00200c	MSG_upl_abort_range
+0xff002010	MSG_upl_commit
+0xff002014	MSG_upl_commit_range
+0xff002260	MSG_memory_object_init
+0xff002264	MSG_memory_object_terminate
+0xff002268	MSG_memory_object_data_request
+0xff00226c	MSG_memory_object_data_return
+0xff002270	MSG_memory_object_data_initialize
+0xff002274	MSG_memory_object_data_unlock
+0xff002278	MSG_memory_object_synchronize
+0xff00227c	MSG_memory_object_map
+0xff002280	MSG_memory_object_last_unmap
+0xff002284	MSG_memory_object_data_reclaim
+0xff002328	MSG_memory_object_create
+0xff00238c	MSG_default_pager_object_create
+0xff002390	MSG_default_pager_info
+0xff002394	MSG_default_pager_objects
+0xff002398	MSG_default_pager_object_pages
+0xff0023a0	MSG_default_pager_backing_store_create
+0xff0023a4	MSG_default_pager_backing_store_delete
+0xff0023a8	MSG_default_pager_backing_store_info
+0xff0023ac	MSG_default_pager_add_file
+0xff0023b0	MSG_default_pager_triggers
+0xff0023b4	MSG_default_pager_info_64
+0xff0023dc	MSG_default_pager_space_alert
+0xff002584	MSG_exception_raise
+0xff002588	MSG_exception_raise_state
+0xff00258c	MSG_exception_raise_state_identity
+0xff002594	MSG_mach_exception_raise
+0xff002598	MSG_mach_exception_raise_state
+0xff00259c	MSG_mach_exception_raise_state_identity
+0xff002bc0	MSG_io_object_get_class
+0xff002bc4	MSG_io_object_conforms_to
+0xff002bc8	MSG_io_iterator_next
+0xff002bcc	MSG_io_iterator_reset
+0xff002bd0	MSG_io_service_get_matching_services
+0xff002bd4	MSG_io_registry_entry_get_property
+0xff002bd8	MSG_io_registry_create_iterator
+0xff002bdc	MSG_io_registry_iterator_enter_entry
+0xff002be0	MSG_io_registry_iterator_exit_entry
+0xff002be4	MSG_io_registry_entry_from_path
+0xff002be8	MSG_io_registry_entry_get_name
+0xff002bec	MSG_io_registry_entry_get_properties
+0xff002bf0	MSG_io_registry_entry_get_property_bytes
+0xff002bf4	MSG_io_registry_entry_get_child_iterator
+0xff002bf8	MSG_io_registry_entry_get_parent_iterator
+0xff002c00	MSG_io_service_close
+0xff002c04	MSG_io_connect_get_service
+0xff002c08	MSG_io_connect_set_notification_port
+0xff002c0c	MSG_io_connect_map_memory
+0xff002c10	MSG_io_connect_add_client
+0xff002c14	MSG_io_connect_set_properties
+0xff002c18	MSG_io_connect_method_scalarI_scalarO
+0xff002c1c	MSG_io_connect_method_scalarI_structureO
+0xff002c20	MSG_io_connect_method_scalarI_structureI
+0xff002c24	MSG_io_connect_method_structureI_structureO
+0xff002c28	MSG_io_registry_entry_get_path
+0xff002c2c	MSG_io_registry_get_root_entry
+0xff002c30	MSG_io_registry_entry_set_properties
+0xff002c34	MSG_io_registry_entry_in_plane
+0xff002c38	MSG_io_object_get_retain_count
+0xff002c3c	MSG_io_service_get_busy_state
+0xff002c40	MSG_io_service_wait_quiet
+0xff002c44	MSG_io_registry_entry_create_iterator
+0xff002c48	MSG_io_iterator_is_valid
+0xff002c4c	MSG_io_make_matching
+0xff002c50	MSG_io_catalog_send_data
+0xff002c54	MSG_io_catalog_terminate
+0xff002c58	MSG_io_catalog_get_data
+0xff002c5c	MSG_io_catalog_get_gen_count
+0xff002c60	MSG_io_catalog_module_loaded
+0xff002c64	MSG_io_catalog_reset
+0xff002c68	MSG_io_service_request_probe
+0xff002c6c	MSG_io_registry_entry_get_name_in_plane
+0xff002c70	MSG_io_service_match_property_table
+0xff002c74	MSG_io_async_method_scalarI_scalarO
+0xff002c78	MSG_io_async_method_scalarI_structureO
+0xff002c7c	MSG_io_async_method_scalarI_structureI
+0xff002c80	MSG_io_async_method_structureI_structureO
+0xff002c84	MSG_io_service_add_notification
+0xff002c88	MSG_io_service_add_interest_notification
+0xff002c8c	MSG_io_service_acknowledge_notification
+0xff002c90	MSG_io_connect_get_notification_semaphore
+0xff002c94	MSG_io_connect_unmap_memory
+0xff002c98	MSG_io_registry_entry_get_location_in_plane
+0xff002c9c	MSG_io_registry_entry_get_property_recursively
+0xff002ca0	MSG_io_service_get_state
+0xff002ca4	MSG_io_service_get_matching_services_ool
+0xff002ca8	MSG_io_service_match_property_table_ool
+0xff002cac	MSG_io_service_add_notification_ool
+0xff002cb0	MSG_io_object_get_superclass
+0xff002cb4	MSG_io_object_get_bundle_identifier
+0xff002cb8	MSG_io_service_open_extended
+0xff002cbc	MSG_io_connect_map_memory_into_task
+0xff002cc0	MSG_io_connect_unmap_memory_from_task
+0xff002cc4	MSG_io_connect_method
+0xff002cc8	MSG_io_connect_async_method
+0xff002ccc	MSG_io_connect_set_notification_port_64
+0xff002cd0	MSG_io_service_add_notification_64
+0xff002cd4	MSG_io_service_add_interest_notification_64
+0xff002cd8	MSG_io_service_add_notification_ool_64
+0xff002cdc	MSG_io_registry_entry_get_registry_entry_id
+0xff002ee0	MSG_processor_start
+0xff002ee4	MSG_processor_exit
+0xff002ee8	MSG_processor_info
+0xff002eec	MSG_processor_control
+0xff002ef0	MSG_processor_assign
+0xff002ef4	MSG_processor_get_assignment
+0xff003200	MSG_mach_port_names
+0xff003204	MSG_mach_port_type
+0xff003208	MSG_mach_port_rename
+0xff00320c	MSG_mach_port_allocate_name
+0xff003210	MSG_mach_port_allocate
+0xff003214	MSG_mach_port_destroy
+0xff003218	MSG_mach_port_deallocate
+0xff00321c	MSG_mach_port_get_refs
+0xff003220	MSG_mach_port_mod_refs
+0xff003228	MSG_mach_port_set_mscount
+0xff00322c	MSG_mach_port_get_set_status
+0xff003230	MSG_mach_port_move_member
+0xff003234	MSG_mach_port_request_notification
+0xff003238	MSG_mach_port_insert_right
+0xff00323c	MSG_mach_port_extract_right
+0xff003240	MSG_mach_port_set_seqno
+0xff003244	MSG_mach_port_get_attributes
+0xff003248	MSG_mach_port_set_attributes
+0xff00324c	MSG_mach_port_allocate_qos
+0xff003250	MSG_mach_port_allocate_full
+0xff003254	MSG_task_set_port_space
+0xff003258	MSG_mach_port_get_srights
+0xff00325c	MSG_mach_port_space_info
+0xff003260	MSG_mach_port_dnrequest_info
+0xff003264	MSG_mach_port_kernel_object
+0xff003268	MSG_mach_port_insert_member
+0xff00326c	MSG_mach_port_extract_member
+0xff003270	MSG_mach_port_get_context
+0xff003274	MSG_mach_port_set_context
+0xff003278	MSG_mach_port_kobject
+0xff003520	MSG_task_create
+0xff003524	MSG_task_terminate
+0xff003528	MSG_task_threads
+0xff00352c	MSG_mach_ports_register
+0xff003530	MSG_mach_ports_lookup
+0xff003534	MSG_task_info
+0xff003538	MSG_task_set_info
+0xff00353c	MSG_task_suspend
+0xff003540	MSG_task_resume
+0xff003544	MSG_task_get_special_port
+0xff003548	MSG_task_set_special_port
+0xff00354c	MSG_thread_create
+0xff003550	MSG_thread_create_running
+0xff003554	MSG_task_set_exception_ports
+0xff003558	MSG_task_get_exception_ports
+0xff00355c	MSG_task_swap_exception_ports
+0xff003560	MSG_lock_set_create
+0xff003564	MSG_lock_set_destroy
+0xff003568	MSG_semaphore_create
+0xff00356c	MSG_semaphore_destroy
+0xff003570	MSG_task_policy_set
+0xff003574	MSG_task_policy_get
+0xff003578	MSG_task_sample
+0xff00357c	MSG_task_policy
+0xff003580	MSG_task_set_emulation
+0xff003584	MSG_task_get_emulation_vector
+0xff003588	MSG_task_set_emulation_vector
+0xff00358c	MSG_task_set_ras_pc
+0xff003590	MSG_task_zone_info
+0xff003594	MSG_task_assign
+0xff003598	MSG_task_assign_default
+0xff00359c	MSG_task_get_assignment
+0xff0035a0	MSG_task_set_policy
+0xff0035a4	MSG_task_get_state
+0xff0035a8	MSG_task_set_state
+0xff003840	MSG_thread_terminate
+0xff003844	MSG_act_get_state
+0xff003848	MSG_act_set_state
+0xff00384c	MSG_thread_get_state
+0xff003850	MSG_thread_set_state
+0xff003854	MSG_thread_suspend
+0xff003858	MSG_thread_resume
+0xff00385c	MSG_thread_abort
+0xff003860	MSG_thread_abort_safely
+0xff003864	MSG_thread_depress_abort
+0xff003868	MSG_thread_get_special_port
+0xff00386c	MSG_thread_set_special_port
+0xff003870	MSG_thread_info
+0xff003874	MSG_thread_set_exception_ports
+0xff003878	MSG_thread_get_exception_ports
+0xff00387c	MSG_thread_swap_exception_ports
+0xff003880	MSG_thread_policy
+0xff003884	MSG_thread_policy_set
+0xff003888	MSG_thread_policy_get
+0xff00388c	MSG_thread_sample
+0xff003890	MSG_etap_trace_thread
+0xff003894	MSG_thread_assign
+0xff003898	MSG_thread_assign_default
+0xff00389c	MSG_thread_get_assignment
+0xff0038a0	MSG_thread_set_policy
+0xff003b60	MSG_vm_region
+0xff003b64	MSG_vm_allocate
+0xff003b68	MSG_vm_deallocate
+0xff003b6c	MSG_vm_protect
+0xff003b70	MSG_vm_inherit
+0xff003b74	MSG_vm_read
+0xff003b78	MSG_vm_read_list
+0xff003b7c	MSG_vm_write
+0xff003b80	MSG_vm_copy
+0xff003b84	MSG_vm_read_overwrite
+0xff003b88	MSG_vm_msync
+0xff003b8c	MSG_vm_behavior_set
+0xff003b90	MSG_vm_map
+0xff003b94	MSG_vm_machine_attribute
+0xff003b98	MSG_vm_remap
+0xff003b9c	MSG_task_wire
+0xff003ba0	MSG_mach_make_memory_entry
+0xff003ba4	MSG_vm_map_page_query
+0xff003ba8	MSG_mach_vm_region_info
+0xff003bac	MSG_vm_mapped_pages_info
+0xff003bb4	MSG_vm_region_recurse
+0xff003bb8	MSG_vm_region_recurse_64
+0xff003bbc	MSG_mach_vm_region_info_64
+0xff003bc0	MSG_vm_region_64
+0xff003bc4	MSG_mach_make_memory_entry_64
+0xff003bc8	MSG_vm_map_64
+0xff003bcc	MSG_vm_map_get_upl
+0xff003bd8	MSG_vm_purgable_control
+0xff003e80	MSG_processor_set_statistics
+0xff003e84	MSG_processor_set_destroy
+0xff003e88	MSG_processor_set_max_priority
+0xff003e8c	MSG_processor_set_policy_enable
+0xff003e90	MSG_processor_set_policy_disable
+0xff003e94	MSG_processor_set_tasks
+0xff003e98	MSG_processor_set_threads
+0xff003e9c	MSG_processor_set_policy_control
+0xff003ea0	MSG_processor_set_stack_usage
+0xff003ea4	MSG_processor_set_info
+0xff004b00	MSG_mach_vm_allocate
+0xff004b04	MSG_mach_vm_deallocate
+0xff004b08	MSG_mach_vm_protect
+0xff004b0c	MSG_mach_vm_inherit
+0xff004b10	MSG_mach_vm_read
+0xff004b14	MSG_mach_vm_read_list
+0xff004b18	MSG_mach_vm_write
+0xff004b1c	MSG_mach_vm_copy
+0xff004b20	MSG_mach_vm_read_overwrite
+0xff004b24	MSG_mach_vm_msync
+0xff004b28	MSG_mach_vm_behavior_set
+0xff004b2c	MSG_mach_vm_map
+0xff004b30	MSG_mach_vm_machine_attribute
+0xff004b34	MSG_mach_vm_remap
+0xff004b38	MSG_mach_vm_page_query
+0xff004b3c	MSG_mach_vm_region_recurse
+0xff004b40	MSG_mach_vm_region
+0xff004b44	MSG__mach_make_memory_entry
+0xff004b48	MSG_mach_vm_purgable_control
+0xff004b4c	MSG_mach_vm_page_info
+0xff004e20	MSG_ledger_create
+0xff004e24	MSG_ledger_terminate
+0xff004e28	MSG_ledger_transfer
+0xff004e2c	MSG_ledger_read
+0xff005140	MSG_mach_get_task_label
+0xff005144	MSG_mach_get_task_label_text
+0xff005148	MSG_mach_get_label
+0xff00514c	MSG_mach_get_label_text
+0xff005150	MSG_mach_set_port_label
+0xff005154	MSG_mac_check_service
+0xff005158	MSG_mac_port_check_service_obj
+0xff00515c	MSG_mac_port_check_access
+0xff005160	MSG_mac_label_new
+0xff005164	MSG_mac_request_label
+0xff005dc0	MSG_UNDExecute_rpc
+0xff005dc4	MSG_UNDDisplayNoticeFromBundle_rpc
+0xff005dc8	MSG_UNDDisplayAlertFromBundle_rpc
+0xff005dcc	MSG_UNDDisplayCustomFromBundle_rpc
+0xff005dd0	MSG_UNDDisplayCustomFromDictionary_rpc
+0xff005dd4	MSG_UNDCancelNotification_rpc
+0xff005dd8	MSG_UNDDisplayNoticeSimple_rpc
+0xff005ddc	MSG_UNDDisplayAlertSimple_rpc
+0xff0060e0	MSG_UNDAlertCompletedWithResult_rpc
+0xff0060e4	MSG_UNDNotificationCreated_rpc
+0xff01a5e0	MSG_check_task_access
+0xff01a5e4	MSG_find_code_signature
+0xff04b320	MSG_kextd_ping
+0xff25a8a0	MSG_lock_acquire
+0xff25a8a4	MSG_lock_release
+0xff25a8a8	MSG_lock_try
+0xff25a8ac	MSG_lock_make_stable
+0xff25a8b0	MSG_lock_handoff
+0xff25a8b4	MSG_lock_handoff_accept
+0xff25abc0	MSG_semaphore_signal
+0xff25abc4	MSG_semaphore_signal_all
+0xff25abc8	MSG_semaphore_wait
+0xff25abcc	MSG_semaphore_signal_thread
+0xff25abd0	MSG_semaphore_timedwait
+0xff25abd4	MSG_semaphore_wait_signal
+0xff25abd8	MSG_semaphore_timedwait_signal
+0xffbebdcc	MSG_clock_alarm_reply
diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c
index 841781612..b97eb780e 100644
--- a/bsd/kern/tty.c
+++ b/bsd/kern/tty.c
@@ -1498,6 +1498,8 @@ out:
 int
 ttyselect(struct tty *tp, int rw, void *wql, proc_t p)
 {
+	int retval = 0;
+
 	if (tp == NULL)
 		return (ENXIO);
 
@@ -1505,20 +1507,32 @@ ttyselect(struct tty *tp, int rw, void *wql, proc_t p)
 
 	switch (rw) {
 	case FREAD:
-		if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE))
+		if (ISSET(tp->t_state, TS_ZOMBIE)) {
 			return(1);
+		}
+
+		retval = ttnread(tp);
+		if (retval > 0) {
+			break;
+		}
+
 		selrecord(p, &tp->t_rsel, wql);
 		break;
 	case FWRITE:
-		if ((tp->t_outq.c_cc <= tp->t_lowat &&
-		     ISSET(tp->t_state, TS_CONNECTED))
-		    || ISSET(tp->t_state, TS_ZOMBIE)) {
-			return (1);
+		if (ISSET(tp->t_state, TS_ZOMBIE)) {
+			return(1);
 		}
+
+		if ((tp->t_outq.c_cc <= tp->t_lowat) &&
+				ISSET(tp->t_state, TS_CONNECTED)) {
+			retval = tp->t_hiwat - tp->t_outq.c_cc;
+			break;
+		}
+
 		selrecord(p, &tp->t_wsel, wql);
 		break;
 	}
-	return (0);
+	return retval;
 }
 
 
@@ -3040,6 +3054,12 @@ ttyfree(struct tty *tp)
 {
 	TTY_LOCK_NOTOWNED(tp);	/* debug assert */
 
+#if DEBUG
+	if (!(SLIST_EMPTY(&tp->t_rsel.si_note) && SLIST_EMPTY(&tp->t_wsel.si_note))) {
+		panic("knotes hooked into a tty when the tty is freed.\n");
+	}
+#endif /* DEBUG */
+
 	clfree(&tp->t_rawq);
 	clfree(&tp->t_canq);
 	clfree(&tp->t_outq);
diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c
index 19c8e5bcc..a0be4feb5 100644
--- a/bsd/kern/tty_ptmx.c
+++ b/bsd/kern/tty_ptmx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1997-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -178,7 +178,7 @@ _devfs_setattr(void * handle, unsigned short mode, uid_t uid, gid_t gid)
 		char name[128];
 
 		snprintf(name, sizeof(name), "/dev/%s", direntp->de_name);
-		NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ctx);
+		NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ctx);
 		error = namei(&nd);
 		if (error)
 			goto out;
@@ -229,7 +229,7 @@ sysctl_ptmx_max(__unused struct sysctl_oid *oidp, __unused void *arg1,
 
 SYSCTL_NODE(_kern, KERN_TTY, tty, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TTY");
 SYSCTL_PROC(_kern_tty, OID_AUTO, ptmx_max,
-		CTLTYPE_INT | CTLFLAG_RW,
+		CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 		&ptmx_max, 0, &sysctl_ptmx_max, "I", "ptmx_max");
 
 
@@ -259,6 +259,39 @@ struct ptmx_ioctl {
 
 static int	ptmx_clone(dev_t dev, int minor);
 
+/*
+ * Set of locks to keep the interaction between kevents and revoke
+ * from causing havoc.
+ */
+
+#define	LOG2_PTSD_KE_NLCK	2
+#define	PTSD_KE_NLCK		(1l << LOG2_PTSD_KE_NLCK)
+#define	PTSD_KE_LOCK_INDEX(x)	((x) & (PTSD_KE_NLCK - 1))
+
+static lck_mtx_t ptsd_kevent_lock[PTSD_KE_NLCK];
+
+static void
+ptsd_kevent_lock_init(void)
+{
+	int i;
+	lck_grp_t *lgrp = lck_grp_alloc_init("ptsd kevent", LCK_GRP_ATTR_NULL);
+
+	for (i = 0; i < PTSD_KE_NLCK; i++)
+		lck_mtx_init(&ptsd_kevent_lock[i], lgrp, LCK_ATTR_NULL);
+}
+
+static void
+ptsd_kevent_mtx_lock(int minor)
+{
+	lck_mtx_lock(&ptsd_kevent_lock[PTSD_KE_LOCK_INDEX(minor)]);
+}
+
+static void
+ptsd_kevent_mtx_unlock(int minor)
+{
+	lck_mtx_unlock(&ptsd_kevent_lock[PTSD_KE_LOCK_INDEX(minor)]);
+}
+
 int
 ptmx_init( __unused int config_count)
 {
@@ -273,12 +306,25 @@ ptmx_init( __unused int config_count)
 		return (ENOENT);
 	}
 
+	if (cdevsw_setkqueueok(ptmx_major, &ptmx_cdev, 0) == -1) {
+		panic("Failed to set flags on ptmx cdevsw entry.");
+	}
+
 	/* Get a major number for /dev/pts/nnn */
 	if ((ptsd_major = cdevsw_add(-15, &ptsd_cdev)) == -1) {
 		(void)cdevsw_remove(ptmx_major, &ptmx_cdev);
 		printf("ptmx_init: failed to obtain /dev/ptmx major number\n");
 		return (ENOENT);
 	}
+	
+	if (cdevsw_setkqueueok(ptsd_major, &ptsd_cdev, 0) == -1) {
+		panic("Failed to set flags on ptmx cdevsw entry.");
+	}
+
+	/*
+	 * Locks to guard against races between revoke and kevents
+	 */
+	ptsd_kevent_lock_init();
 
 	/* Create the /dev/ptmx device {<major>,0} */
 	(void)devfs_make_node_clone(makedev(ptmx_major, 0),
@@ -549,6 +595,7 @@ ptsd_open(dev_t dev, int flag, __unused int devtype, __unused proc_t p)
 	error = (*linesw[tp->t_line].l_open)(dev, tp);
 	/* Successful open; mark as open by the slave */
 	pti->pt_flags |= PF_OPEN_S;
+	CLR(tp->t_state, TS_IOCTL_NOT_OK);
 	if (error == 0)
 		ptmx_wakeup(tp, FREAD|FWRITE);
 out:
@@ -556,6 +603,8 @@ out:
 	return (error);
 }
 
+static void ptsd_revoke_knotes(dev_t, struct tty *);
+
 FREE_BSDSTATIC int
 ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p)
 {
@@ -587,9 +636,11 @@ ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p)
 #ifdef	FIX_VSX_HANG
 	tp->t_timeout = save_timeout;
 #endif
-
 	tty_unlock(tp);
 
+	if ((flag & IO_REVOKE) == IO_REVOKE)
+		ptsd_revoke_knotes(dev, tp);
+
 	/* unconditional, just like ttyclose() */
 	ptmx_free_ioctl(minor(dev), PF_OPEN_S);
 
@@ -786,6 +837,7 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p)
 	}
 	tp->t_oproc = ptsd_start;
 	CLR(tp->t_state, TS_ZOMBIE);
+	SET(tp->t_state, TS_IOCTL_NOT_OK);
 #ifdef sun4c
 	tp->t_stop = ptsd_stop;
 #endif
@@ -1000,19 +1052,30 @@ ptsd_select(dev_t dev, int rw, void *wql, proc_t p)
 
 	switch (rw) {
 	case FREAD:
-		if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) {
+		if (ISSET(tp->t_state, TS_ZOMBIE)) {
 			retval = 1;
 			break;
 		}
+
+		retval = ttnread(tp);
+		if (retval > 0) {
+			break;
+		}
+
 		selrecord(p, &tp->t_rsel, wql);
 		break;
 	case FWRITE:
-		if ((tp->t_outq.c_cc <= tp->t_lowat &&
-		     ISSET(tp->t_state, TS_CONNECTED))
-		    || ISSET(tp->t_state, TS_ZOMBIE)) {
+		if (ISSET(tp->t_state, TS_ZOMBIE)) {
 			retval = 1;
 			break;
 		}
+
+		if ((tp->t_outq.c_cc <= tp->t_lowat) &&
+				ISSET(tp->t_state, TS_CONNECTED)) {
+			retval = tp->t_hiwat - tp->t_outq.c_cc;
+			break;
+		}
+
 		selrecord(p, &tp->t_wsel, wql);
 		break;
 	}
@@ -1044,7 +1107,7 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p)
 		 */
 		if ((tp->t_state&TS_ISOPEN) &&
 		     tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) {
-			retval = 1;
+			retval = tp->t_outq.c_cc;
 			break;
 		}
 		/* FALLTHROUGH */
@@ -1063,18 +1126,19 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p)
 		if (tp->t_state&TS_ISOPEN) {
 			if (pti->pt_flags & PF_REMOTE) {
 			    if (tp->t_canq.c_cc == 0) {
-				retval = 1;
+			        retval = (TTYHOG -1) ;
 				break;
 			    }
 			} else {
-			    if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) {
-				    retval = 1;
+			    retval = (TTYHOG - 2) - (tp->t_rawq.c_cc + tp->t_canq.c_cc);
+			    if (retval > 0) {
 				    break;
 			    }
 			    if (tp->t_canq.c_cc == 0 && (tp->t_lflag&ICANON)) {
 				    retval = 1;
 				    break;
 			    }
+			    retval = 0;
 			}
 		}
 		selrecord(p, &pti->pt_selw, wql);
@@ -1225,6 +1289,7 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
 	struct ptmx_ioctl *pti;
 	u_char *cc;
 	int stop, error = 0;
+	int allow_ext_ioctl = 1;
 
 	pti = ptmx_get_ioctl(minor(dev), 0);
 
@@ -1233,11 +1298,18 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
 
 	cc = tp->t_cc;
 
+	/*
+	 * Do not permit extended ioctls on the master side of the pty unless
+	 * the slave side has been successfully opened and initialized.
+	 */
+	if (cdevsw[major(dev)].d_open == ptmx_open && ISSET(tp->t_state, TS_IOCTL_NOT_OK))
+		allow_ext_ioctl = 0;
+
 	/*
 	 * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
 	 * ttywflush(tp) will hang if there are characters in the outq.
 	 */
-	if (cmd == TIOCEXT) {
+	if (cmd == TIOCEXT && allow_ext_ioctl) {
 		/*
 		 * When the EXTPROC bit is being toggled, we need
 		 * to send an TIOCPKT_IOCTL if the packet driver
@@ -1259,7 +1331,7 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
 		}
 		goto out;
 	} else
-	if (cdevsw[major(dev)].d_open == ptmx_open)
+	if (cdevsw[major(dev)].d_open == ptmx_open) {
 		switch (cmd) {
 
 		case TIOCGPGRP:
@@ -1363,6 +1435,17 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p)
 			error = 0;
 			goto out;
 		}
+
+		/*
+		 * Fail all other calls; pty masters are not serial devices;
+		 * we only pretend they are when the slave side of the pty is
+		 * already open.
+		 */
+		if (!allow_ext_ioctl) {
+			error = ENOTTY;
+			goto out;
+		}
+	}
 	error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p);
 	if (error == ENOTTY) {
 		error = ttioctl_locked(tp, cmd, data, flag, p);
@@ -1440,127 +1523,110 @@ out:
  * kqueue support.
  */
 int ptsd_kqfilter(dev_t, struct knote *); 
-static void ptsd_kqops_read_detach(struct knote *);
-static int ptsd_kqops_read_event(struct knote *, long);
-static void ptsd_kqops_write_detach(struct knote *);
-static int ptsd_kqops_write_event(struct knote *, long);
+static void ptsd_kqops_detach(struct knote *);
+static int ptsd_kqops_event(struct knote *, long);
 
-static struct filterops ptsd_kqops_read = {
+static struct filterops ptsd_kqops = {
 	.f_isfd = 1,
-	.f_detach = ptsd_kqops_read_detach,
-	.f_event = ptsd_kqops_read_event,
+	.f_detach = ptsd_kqops_detach,
+	.f_event = ptsd_kqops_event,
 };                                    
-static struct filterops ptsd_kqops_write = {
-	.f_isfd = 1,
-	.f_detach = ptsd_kqops_write_detach,
-	.f_event = ptsd_kqops_write_event,
-};                                  
 
-static void
-ptsd_kqops_read_detach(struct knote *kn)
-{
-	struct ptmx_ioctl *pti;
-	struct tty *tp;
-	dev_t dev = (dev_t) kn->kn_hookid;
-
-	pti = ptmx_get_ioctl(minor(dev), 0);
-	tp = pti->pt_tty;
+#define	PTSD_KNOTE_VALID	NULL
+#define	PTSD_KNOTE_REVOKED	((void *)-911l)
 
-	if (tp == NULL)
-		return;
-
-	tty_lock(tp);
-	KNOTE_DETACH(&tp->t_rsel.si_note, kn);
-	tty_unlock(tp);
-
-	kn->kn_hookid = 0;
-}
+/*
+ * In the normal case, by the time the driver_close() routine is called
+ * on the slave, all knotes have been detached.  However in the revoke(2)
+ * case, the driver's close routine is called while there are knotes active
+ * that reference the handlers below.  And we have no obvious means to
+ * reach from the driver out to the kqueue's that reference them to get
+ * them to stop.
+ */
 
-static int
-ptsd_kqops_read_event(struct knote *kn, long hint)
+static void
+ptsd_kqops_detach(struct knote *kn)
 {
 	struct ptmx_ioctl *pti;
 	struct tty *tp;
-	dev_t dev = (dev_t) kn->kn_hookid;
-	int retval = 0;
-
-	pti = ptmx_get_ioctl(minor(dev), 0);
-	tp = pti->pt_tty;
-
-	if (tp == NULL)
-		return (ENXIO);
-
-	if (hint == 0)
-		tty_lock(tp);
+	dev_t dev, lockdev = (dev_t)kn->kn_hookid;
 
-	kn->kn_data = ttnread(tp);
-	if (kn->kn_data > 0) {
-		retval = 1;
-	}
+	ptsd_kevent_mtx_lock(minor(lockdev));
 
-	if (ISSET(tp->t_state, TS_ZOMBIE)) {
-		kn->kn_flags |= EV_EOF;
-		retval = 1;
+	if ((dev = (dev_t)kn->kn_hookid) != 0) {
+		pti = ptmx_get_ioctl(minor(dev), 0);
+		if (pti != NULL && (tp = pti->pt_tty) != NULL) {
+			tty_lock(tp);
+			if (kn->kn_filter == EVFILT_READ)
+				KNOTE_DETACH(&tp->t_rsel.si_note, kn);
+			else
+				KNOTE_DETACH(&tp->t_wsel.si_note, kn);
+			tty_unlock(tp);
+			kn->kn_hookid = 0;
+		}
 	}
 
-	if (hint == 0)
-		tty_unlock(tp);
-	return (retval);
-}                                                                                                
-static void 
-ptsd_kqops_write_detach(struct knote *kn)
-{
-	struct ptmx_ioctl *pti;
-	struct tty *tp;
-	dev_t dev = (dev_t) kn->kn_hookid;
-
-	pti = ptmx_get_ioctl(minor(dev), 0);
-	tp = pti->pt_tty;
-
-	if (tp == NULL)
-		return;
-
-	tty_lock(tp);
-	KNOTE_DETACH(&tp->t_wsel.si_note, kn);
-	tty_unlock(tp);
-
-	kn->kn_hookid = 0;
+	ptsd_kevent_mtx_unlock(minor(lockdev));
 }
 
 static int
-ptsd_kqops_write_event(struct knote *kn, long hint)
+ptsd_kqops_event(struct knote *kn, long hint)
 {
 	struct ptmx_ioctl *pti;
 	struct tty *tp;
-	dev_t dev = (dev_t) kn->kn_hookid;
+	dev_t dev = (dev_t)kn->kn_hookid;
 	int retval = 0;
 
-	pti = ptmx_get_ioctl(minor(dev), 0);
-	tp = pti->pt_tty;
+	ptsd_kevent_mtx_lock(minor(dev));
 
-	if (tp == NULL)
-		return (ENXIO);
+	do {
+		if (kn->kn_hook != PTSD_KNOTE_VALID ) {
+			/* We were revoked */
+			kn->kn_data = 0;
+			kn->kn_flags |= EV_EOF;
+			retval = 1;
+			break;
+		}
 
-	if (hint == 0)
-		tty_lock(tp);
+		pti = ptmx_get_ioctl(minor(dev), 0);
+		if (pti == NULL || (tp = pti->pt_tty) == NULL) {
+			kn->kn_data = ENXIO;
+			kn->kn_flags |= EV_ERROR;
+			retval = 1;
+			break;
+		}
 
-	if ((tp->t_outq.c_cc <= tp->t_lowat) &&
-			ISSET(tp->t_state, TS_CONNECTED)) {
-		kn->kn_data = tp->t_outq.c_cn - tp->t_outq.c_cc;
-		retval = 1;
-	}
+		if (hint == 0)
+			tty_lock(tp);
 
-	if (ISSET(tp->t_state, TS_ZOMBIE)) {
-		kn->kn_flags |= EV_EOF;
-		retval = 1;
-	}
+		if (kn->kn_filter == EVFILT_READ) {
+			kn->kn_data = ttnread(tp);
+			if (kn->kn_data > 0)
+				retval = 1;
+			if (ISSET(tp->t_state, TS_ZOMBIE)) {
+				kn->kn_flags |= EV_EOF;
+				retval = 1;
+			}
+		} else {	/* EVFILT_WRITE */
+			if ((tp->t_outq.c_cc <= tp->t_lowat) &&
+			    ISSET(tp->t_state, TS_CONNECTED)) {
+				kn->kn_data = tp->t_outq.c_cn - tp->t_outq.c_cc;
+				retval = 1;
+			}
+			if (ISSET(tp->t_state, TS_ZOMBIE)) {
+				kn->kn_flags |= EV_EOF;
+				retval = 1;
+			}
+		}
 
-	if (hint == 0)
-		tty_unlock(tp);
-	return (retval);
+		if (hint == 0)
+			tty_unlock(tp);
+	} while (0);
 
-}
+	ptsd_kevent_mtx_unlock(minor(dev));
 
+	return (retval);
+}                                                                                                
 int
 ptsd_kqfilter(dev_t dev, struct knote *kn)
 {
@@ -1581,14 +1647,14 @@ ptsd_kqfilter(dev_t dev, struct knote *kn)
 	tty_lock(tp);
 
 	kn->kn_hookid = dev;
+	kn->kn_hook = PTSD_KNOTE_VALID;
+	kn->kn_fop = &ptsd_kqops;
 
         switch (kn->kn_filter) {
         case EVFILT_READ:
-                kn->kn_fop = &ptsd_kqops_read;
                 KNOTE_ATTACH(&tp->t_rsel.si_note, kn);
                 break;
         case EVFILT_WRITE:
-                kn->kn_fop = &ptsd_kqops_write;
                 KNOTE_ATTACH(&tp->t_wsel.si_note, kn);
                 break;
         default:
@@ -1600,3 +1666,59 @@ ptsd_kqfilter(dev_t dev, struct knote *kn)
         return (retval);
 }
 
+/*
+ * Support for revoke(2).
+ *
+ * Mark all the kn_hook fields so that future invocations of the
+ * f_event op will just say "EOF" *without* looking at the
+ * ptmx_ioctl structure (which may disappear or be recycled at
+ * the end of ptsd_close).  Issue wakeups to post that EOF to
+ * anyone listening.  And finally remove the knotes from the
+ * tty's klists to keep ttyclose() happy, and set the hookid to
+ * zero to make the final detach passively successful.
+ */
+static void
+ptsd_revoke_knotes(dev_t dev, struct tty *tp)
+{
+	struct klist *list;
+	struct knote *kn, *tkn;
+
+	/* (Hold and drop the right locks in the right order.) */
+
+	ptsd_kevent_mtx_lock(minor(dev));
+	tty_lock(tp);
+
+	list = &tp->t_rsel.si_note;
+	SLIST_FOREACH(kn, list, kn_selnext)
+		kn->kn_hook = PTSD_KNOTE_REVOKED;
+
+	list = &tp->t_wsel.si_note;
+	SLIST_FOREACH(kn, list, kn_selnext)
+		kn->kn_hook = PTSD_KNOTE_REVOKED;
+
+	tty_unlock(tp);
+	ptsd_kevent_mtx_unlock(minor(dev));
+
+	tty_lock(tp);
+	ttwakeup(tp);
+	ttwwakeup(tp);
+	tty_unlock(tp);
+
+	ptsd_kevent_mtx_lock(minor(dev));
+	tty_lock(tp);
+
+	list = &tp->t_rsel.si_note;
+	SLIST_FOREACH_SAFE(kn, list, kn_selnext, tkn) {
+		(void) KNOTE_DETACH(list, kn);
+		kn->kn_hookid = 0;
+	}
+
+	list = &tp->t_wsel.si_note;
+	SLIST_FOREACH_SAFE(kn, list, kn_selnext, tkn) {
+		(void) KNOTE_DETACH(list, kn);
+		kn->kn_hookid = 0;
+	}
+
+	tty_unlock(tp);
+	ptsd_kevent_mtx_unlock(minor(dev));
+}
diff --git a/bsd/kern/tty_subr.c b/bsd/kern/tty_subr.c
index c2abc010d..89bc09fe0 100644
--- a/bsd/kern/tty_subr.c
+++ b/bsd/kern/tty_subr.c
@@ -340,7 +340,9 @@ clrbits(u_char *cp, int off, int len)
 		cp[sby++] &= mask;
 
 		mask = (1<<ebi) - 1;
-		cp[eby] &= ~mask;
+		/* handle remainder bits, if any, for a non-0 ebi value */
+		if (mask)
+			cp[eby] &= ~mask;
 
 		for (i = sby; i < eby; i++)
 			cp[i] = 0x00;
diff --git a/bsd/kern/tty_tty.c b/bsd/kern/tty_tty.c
index 29868fadc..ec7bee445 100644
--- a/bsd/kern/tty_tty.c
+++ b/bsd/kern/tty_tty.c
@@ -82,9 +82,8 @@ int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, proc_t p);
 int cttyselect(dev_t dev, int flag, void* wql, proc_t p);
 static vnode_t cttyvp(proc_t p);
 
-
 int
-cttyopen(__unused dev_t dev, int flag, __unused int mode, proc_t p)
+cttyopen(dev_t dev, int flag, __unused int mode, proc_t p)
 {
 	vnode_t ttyvp = cttyvp(p);
 	struct vfs_context context;
@@ -96,7 +95,18 @@ cttyopen(__unused dev_t dev, int flag, __unused int mode, proc_t p)
 	context.vc_thread = current_thread();
 	context.vc_ucred = kauth_cred_proc_ref(p);
 
+	/*
+	 * A little hack--this device, used by many processes,
+	 * happens to do an open on another device, which can 
+	 * cause unhappiness if the second-level open blocks indefinitely 
+	 * (as could be the case if the master side has hung up).  Since
+	 * we know that this driver doesn't care about the serializing
+	 * opens and closes, we can drop the lock.
+	 */
+	devsw_unlock(dev, S_IFCHR);
 	error = VNOP_OPEN(ttyvp, flag, &context);
+	devsw_lock(dev, S_IFCHR);
+
 	vnode_put(ttyvp);
 	kauth_cred_unref(&context.vc_ucred);
 
diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c
index 473ae0a78..c7661e41b 100644
--- a/bsd/kern/ubc_subr.c
+++ b/bsd/kern/ubc_subr.c
@@ -74,6 +74,8 @@ extern kern_return_t memory_object_pages_resident(memory_object_control_t,
 							boolean_t *);
 extern kern_return_t	memory_object_signed(memory_object_control_t control,
 					     boolean_t is_signed);
+extern boolean_t	memory_object_is_slid(memory_object_control_t	control);
+
 extern void Debugger(const char *message);
 
 
@@ -140,9 +142,11 @@ enum {
 	CSMAGIC_CODEDIRECTORY = 0xfade0c02,		/* CodeDirectory blob */
 	CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */
 	CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02,	/* XXX */
+	CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171,	/* embedded entitlements */
 	CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */
 	
 	CSSLOT_CODEDIRECTORY = 0,				/* slot index for CodeDirectory */
+	CSSLOT_ENTITLEMENTS = 5
 };
 
 static const uint32_t supportsScatter = 0x20100;	// first version to support scatter option
@@ -163,6 +167,12 @@ typedef struct __SuperBlob {
 	/* followed by Blobs in no particular order as indicated by offsets in index */
 } CS_SuperBlob;
 
+typedef struct __GenericBlob {
+	uint32_t magic;				/* magic number */
+	uint32_t length;			/* total length of blob */
+	char data[];
+} CS_GenericBlob;
+
 struct Scatter {
 	uint32_t count;			// number of pages; zero for sentinel (only)
 	uint32_t base;			// first page number
@@ -353,6 +363,113 @@ hashes(
  * End of routines to navigate code signing data structures in the kernel.
  */
 
+/*
+ * ENTITLEMENTS
+ * Routines to navigate entitlements in the kernel.
+ */
+
+/* Retrieve the entitlements blob for a process.
+ * Returns:
+ *   EINVAL	no text vnode associated with the process
+ *   EBADEXEC   invalid code signing data
+ *   ENOMEM	you should reboot
+ *   0		no error occurred
+ *
+ * On success, out_start and out_length will point to the
+ * entitlements blob if found; or will be set to NULL/zero
+ * if there were no entitlements.
+ */
+int
+cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length)
+{
+	SHA1_CTX context;			/* XXX hash agility */
+	int error = 0;
+	struct cs_blob *blob_list_entry;
+	CS_SuperBlob *super_blob;
+	CS_BlobIndex *blob_index;
+	CS_GenericBlob *blob;
+	CS_CodeDirectory *code_dir;
+	unsigned char *computed_hash = NULL;
+	unsigned char *embedded_hash = NULL;
+	void *start = NULL;
+	size_t length = 0;
+	size_t hash_size = 0;
+	unsigned int i, count;
+
+	if (NULL == p->p_textvp) {
+		error = EINVAL;
+		goto out;
+	}
+	if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1,
+	    p->p_textoff)))
+		goto out;
+	super_blob = (void *)blob_list_entry->csb_mem_kaddr;
+	if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) {
+		error = EBADEXEC;
+		goto out;
+	}
+	count = ntohl(super_blob->count);
+	for (i = 0; i < count; ++i) {
+		blob_index = &super_blob->index[i];
+		blob = (void *)((char *)super_blob + ntohl(blob_index->offset));
+		switch (ntohl(blob_index->type)) {
+		case CSSLOT_CODEDIRECTORY:
+			if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic))
+				break;
+			code_dir = (void *)blob;
+			hash_size = code_dir->hashSize;
+			if (CSSLOT_ENTITLEMENTS <=
+			    ntohl(code_dir->nSpecialSlots)) {
+				embedded_hash = (void *)((char *)code_dir +
+				    ntohl(code_dir->hashOffset) -
+				    (hash_size * CSSLOT_ENTITLEMENTS));
+			}
+			break;
+		case CSSLOT_ENTITLEMENTS:
+			if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic))
+				break;
+			start = (void *)blob;
+			length = ntohl(blob->length);
+			break;
+		default:
+			break;
+		}
+	}
+	if (NULL == start && NULL == embedded_hash) {
+		error = 0;
+		goto out;
+	} else if (NULL == start || NULL == embedded_hash) {
+		error = EBADEXEC;
+		goto out;
+	}
+	if (NULL == (computed_hash = kalloc(hash_size))) {
+		error = ENOMEM;
+		goto out;
+	}
+	SHA1Init(&context);
+	SHA1Update(&context, start, length);
+	SHA1Final(computed_hash, &context);
+	if (0 != memcmp(computed_hash, embedded_hash, hash_size)) {
+		error = EBADEXEC;
+		goto out;
+	}
+	error = 0;
+out:
+	if (NULL != computed_hash)
+		kfree(computed_hash, hash_size);
+	if (0 == error) {
+		*out_start = start;
+		*out_length = length;
+	}
+	return error;
+}
+
+/*
+ * ENTITLEMENTS
+ * End of routines to navigate entitlements in the kernel.
+ */
+
+
 
 /*
  * ubc_init
@@ -626,7 +743,10 @@ ubc_setsize(struct vnode *vp, off_t nsize)
 	uip->ui_size = nsize;
 
 	if (nsize >= osize) {	/* Nothing more to do */
-		lock_vnode_and_post(vp, NOTE_EXTEND);
+		if (nsize > osize) {
+			lock_vnode_and_post(vp, NOTE_EXTEND);
+		}
+
 		return (1);		/* return success */
 	}
 
@@ -986,6 +1106,16 @@ ubc_getobject(struct vnode *vp, __unused int flags)
 	return (MEMORY_OBJECT_CONTROL_NULL);
 }
 
+boolean_t
+ubc_strict_uncached_IO(struct vnode *vp)
+{
+        boolean_t result = FALSE;
+
+	if (UBCINFOEXISTS(vp)) {
+	        result = memory_object_is_slid(vp->v_ubcinfo->ui_control);
+	}
+	return result;
+}
 
 /*
  * ubc_blktooff
@@ -1834,6 +1964,9 @@ ubc_create_upl(
 	if (bufsize & 0xfff)
 		return KERN_INVALID_ARGUMENT;
 
+	if (bufsize > MAX_UPL_SIZE * PAGE_SIZE)
+		return KERN_INVALID_ARGUMENT;
+
 	if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) {
 
 		if (uplflags & UPL_UBC_MSYNC) {
@@ -2223,12 +2356,12 @@ static SInt32 cs_blob_count_peak = 0;
 
 int cs_validation = 1;
 
-SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW, &cs_validation, 0, "Do validate code signatures");
-SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD, &cs_blob_count, 0, "Current number of code signature blobs");
-SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD, &cs_blob_size, 0, "Current size of all code signature blobs");
-SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
-SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
-SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
+SYSCTL_INT(_vm, OID_AUTO, cs_validation, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_validation, 0, "Do validate code signatures");
+SYSCTL_INT(_vm, OID_AUTO, cs_blob_count, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_count, 0, "Current number of code signature blobs");
+SYSCTL_INT(_vm, OID_AUTO, cs_blob_size, CTLFLAG_RD | CTLFLAG_LOCKED, (int *)(uintptr_t)&cs_blob_size, 0, "Current size of all code signature blobs");
+SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_count_peak, 0, "Peak number of code signature blobs");
+SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
+SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob");
 
 kern_return_t
 ubc_cs_blob_allocate(
@@ -2335,7 +2468,7 @@ ubc_cs_blob_add(
 		blob->csb_start_offset = 0;
 		blob->csb_end_offset = 0;
 	} else {
-		unsigned char *sha1_base;
+		const unsigned char *sha1_base;
 		int sha1_size;
 
 		blob->csb_flags = ntohl(cd->flags) | CS_VALID;
@@ -2582,6 +2715,9 @@ ubc_cs_free(
 		OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size);
 		kfree(blob, sizeof (*blob));
 	}
+#if CHECK_CS_VALIDATION_BITMAP
+	ubc_cs_validation_bitmap_deallocate( uip->ui_vnode );
+#endif
 	uip->cs_blobs = NULL;
 }
 
@@ -2820,3 +2956,127 @@ ubc_cs_getcdhash(
 
 	return ret;
 }
+
+#if CHECK_CS_VALIDATION_BITMAP
+#define stob(s)	((atop_64((s)) + 07) >> 3)
+extern	boolean_t	root_fs_upgrade_try;
+
+/*
+ * Should we use the code-sign bitmap to avoid repeated code-sign validation?
+ * Depends:
+ * a) Is the target vnode on the root filesystem?
+ * b) Has someone tried to mount the root filesystem read-write?
+ * If answers are (a) yes AND (b) no, then we can use the bitmap.
+ */
+#define USE_CODE_SIGN_BITMAP(vp)	( (vp != NULL) && (vp->v_mount != NULL) && (vp->v_mount->mnt_flag & MNT_ROOTFS) && !root_fs_upgrade_try) 
+kern_return_t
+ubc_cs_validation_bitmap_allocate(
+	vnode_t		vp)
+{
+	kern_return_t	kr = KERN_SUCCESS;
+	struct ubc_info *uip;
+	char		*target_bitmap;
+	vm_object_size_t	bitmap_size;
+
+	if ( ! USE_CODE_SIGN_BITMAP(vp) || (! UBCINFOEXISTS(vp))) {
+		kr = KERN_INVALID_ARGUMENT;
+	} else {
+		uip = vp->v_ubcinfo;
+
+		if ( uip->cs_valid_bitmap == NULL ) {
+			bitmap_size = stob(uip->ui_size);
+			target_bitmap = (char*) kalloc( (vm_size_t)bitmap_size );
+			if (target_bitmap == 0) {
+				kr = KERN_NO_SPACE;
+			} else {
+				kr = KERN_SUCCESS;
+			}
+			if( kr == KERN_SUCCESS ) {
+				memset( target_bitmap, 0, (size_t)bitmap_size);
+				uip->cs_valid_bitmap = (void*)target_bitmap;
+				uip->cs_valid_bitmap_size = bitmap_size;
+			}
+		}
+	}
+	return kr;
+}
+
+kern_return_t
+ubc_cs_check_validation_bitmap (
+	vnode_t			vp,
+	memory_object_offset_t		offset,
+	int			optype)
+{
+	kern_return_t	kr = KERN_SUCCESS;
+
+	if ( ! USE_CODE_SIGN_BITMAP(vp) || ! UBCINFOEXISTS(vp)) {
+		kr = KERN_INVALID_ARGUMENT;
+	} else {
+		struct ubc_info *uip = vp->v_ubcinfo;
+		char		*target_bitmap = uip->cs_valid_bitmap;
+
+		if ( target_bitmap == NULL ) {
+		       kr = KERN_INVALID_ARGUMENT;
+		} else {
+			uint64_t	bit, byte;
+			bit = atop_64( offset );
+			byte = bit >> 3;
+
+			if ( byte > uip->cs_valid_bitmap_size ) {
+			       kr = KERN_INVALID_ARGUMENT;
+			} else {
+
+				if (optype == CS_BITMAP_SET) {
+					target_bitmap[byte] |= (1 << (bit & 07));
+					kr = KERN_SUCCESS;
+				} else if (optype == CS_BITMAP_CLEAR) {
+					target_bitmap[byte] &= ~(1 << (bit & 07));
+					kr = KERN_SUCCESS;
+				} else if (optype == CS_BITMAP_CHECK) {
+					if ( target_bitmap[byte] & (1 << (bit & 07))) {
+						kr = KERN_SUCCESS;
+					} else {
+						kr = KERN_FAILURE;
+					}
+				}
+			}
+		}
+	}
+	return kr;
+}
+
+void
+ubc_cs_validation_bitmap_deallocate(
+	vnode_t		vp)
+{
+	struct ubc_info *uip;
+	void		*target_bitmap;
+	vm_object_size_t	bitmap_size;
+
+	if ( UBCINFOEXISTS(vp)) {
+		uip = vp->v_ubcinfo;
+
+		if ( (target_bitmap = uip->cs_valid_bitmap) != NULL ) {
+			bitmap_size = uip->cs_valid_bitmap_size;
+			kfree( target_bitmap, (vm_size_t) bitmap_size );
+			uip->cs_valid_bitmap = NULL;
+		}
+	}
+}
+#else
+kern_return_t	ubc_cs_validation_bitmap_allocate(__unused vnode_t vp){
+	return KERN_INVALID_ARGUMENT;
+}
+
+kern_return_t ubc_cs_check_validation_bitmap(
+	__unused struct vnode *vp, 
+	__unused memory_object_offset_t offset,
+	__unused int optype){
+
+	return KERN_INVALID_ARGUMENT;
+}
+
+void	ubc_cs_validation_bitmap_deallocate(__unused vnode_t vp){
+	return;
+}
+#endif /* CHECK_CS_VALIDATION_BITMAP */
diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c
index 37a040c86..1065d3683 100644
--- a/bsd/kern/uipc_domain.c
+++ b/bsd/kern/uipc_domain.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -77,15 +77,14 @@
 #include <pexpert/pexpert.h>
 
 void init_domain(struct domain *dp) __attribute__((section("__TEXT, initcode")));
-void concat_domain(struct domain *dp) __attribute__((section("__TEXT, initcode")));
+void prepend_domain(struct domain *dp) __attribute__((section("__TEXT, initcode")));
 
-
-void	pffasttimo(void *);
 void	pfslowtimo(void *);
 
 struct protosw *pffindprotonotype(int, int);
 struct protosw *pffindprotonotype_locked(int , int , int);
 struct domain *pffinddomain(int);
+static void net_update_uptime(void);
 
 /*
  * Add/delete 'domain': Link structure into system list,
@@ -101,6 +100,12 @@ extern int		do_reclaim;
 
 extern sysctlfn net_sysctl;
 
+static u_int64_t uptime;
+
+#ifdef INET6
+extern  void ip6_fin(void);
+#endif
+
 static void
 init_proto(struct protosw *pr)
 {
@@ -133,6 +138,16 @@ init_domain(struct domain *dp)
 			      dp->dom_name, 
 			      (int)(pr - dp->dom_protosw));
 
+#if __APPLE__
+	/*
+	 * Warn that pr_fasttimo (now pr_unused) is deprecated since rdar://7617868
+	 */
+		if (pr->pr_unused != NULL) {
+			printf("init_domain: warning %s, proto %d: pr_fasttimo is deprecated and won't be called\n", 
+				dp->dom_name, pr->pr_protocol);
+		}
+#endif
+
 		init_proto(pr);
 
 	}
@@ -147,8 +162,8 @@ init_domain(struct domain *dp)
 }
 
 void
-concat_domain(struct domain *dp) 
-{
+prepend_domain(struct domain *dp) 
+{	
 	lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_OWNED);
 	dp->dom_next = domains; 
 	domains = dp; 
@@ -162,7 +177,7 @@ net_add_domain(struct domain *dp)
 	/* First, link in the domain */
 
 	lck_mtx_lock(domain_proto_mtx);
-	concat_domain(dp);
+	prepend_domain(dp);
 
 	init_domain(dp);
 	lck_mtx_unlock(domain_proto_mtx);
@@ -302,31 +317,32 @@ domaininit(void)
 
 	lck_mtx_lock(domain_proto_mtx);
 
-	concat_domain(&localdomain);
-	concat_domain(&routedomain);
-	concat_domain(&inetdomain);
+	prepend_domain(&localdomain);
+	prepend_domain(&inetdomain);
 #if NETAT
-	concat_domain(&atalkdomain);
+	prepend_domain(&atalkdomain);
 #endif
 #if INET6
-	concat_domain(&inet6domain);
+	prepend_domain(&inet6domain);
 #endif
+        prepend_domain(&routedomain);
+
 #if IPSEC
-	concat_domain(&keydomain);
+	prepend_domain(&keydomain);
 #endif
 
 #if NS
-	concat_domain(&nsdomain);
+	prepend_domain(&nsdomain);
 #endif
 #if ISO
-	concat_domain(&isodomain);
+	prepend_domain(&isodomain);
 #endif
 #if CCITT
-	concat_domain(&ccittdomain);
+	prepend_domain(&ccittdomain);
 #endif
-	concat_domain(&ndrvdomain);
+	prepend_domain(&ndrvdomain);
 
-	concat_domain(&systemdomain);
+	prepend_domain(&systemdomain);
 
 	/*
 	 * Now ask them all to init (XXX including the routing domain,
@@ -336,10 +352,17 @@ domaininit(void)
 		init_domain(dp);
 
 	lck_mtx_unlock(domain_proto_mtx);
-	timeout(pffasttimo, NULL, 1);
 	timeout(pfslowtimo, NULL, 1);
 }
 
+void
+domainfin(void)
+{
+#ifdef INET6
+	ip6_fin();
+#endif
+}
+
 static __inline__ struct domain *
 pffinddomain_locked(int pf)
 {
@@ -525,6 +548,13 @@ pfslowtimo(__unused void *arg)
 	register struct domain *dp;
 	register struct protosw *pr;
 
+	/*
+	 * Update coarse-grained networking timestamp (in sec.); the idea
+	 * is to piggy-back on the periodic slow timeout callout to update
+	 * the counter returnable via net_uptime().
+	 */
+	net_update_uptime();
+
 	lck_mtx_lock(domain_proto_mtx);
 	for (dp = domains; dp; dp = dp->dom_next) 
 		for (pr = dp->dom_protosw; pr; pr = pr->pr_next) {
@@ -539,17 +569,26 @@ pfslowtimo(__unused void *arg)
 	timeout(pfslowtimo, NULL, hz/PR_SLOWHZ);
 }
 
-void
-pffasttimo(__unused void *arg)
+static void
+net_update_uptime(void)
 {
-	register struct domain *dp;
-	register struct protosw *pr;
+	struct timeval tv;
 
-	lck_mtx_lock(domain_proto_mtx);
-	for (dp = domains; dp; dp = dp->dom_next)
-		for (pr = dp->dom_protosw; pr; pr = pr->pr_next)
-			if (pr->pr_fasttimo)
-				(*pr->pr_fasttimo)();
-	lck_mtx_unlock(domain_proto_mtx);
-	timeout(pffasttimo, NULL, hz/PR_FASTHZ);
+	microuptime(&tv);
+	uptime = tv.tv_sec;
+}
+
+/*
+ * An alternative way to obtain the coarse-grained uptime (in seconds)
+ * for networking code which do not require high-precision timestamp,
+ * as this is significantly cheaper than microuptime().
+ */
+u_int64_t
+net_uptime(void)
+{
+	/* If we get here before pfslowtimo() fires for the first time */
+	if (uptime == 0)
+		net_update_uptime();
+
+	return (uptime);
 }
diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c
index 627cd9926..d8d3ce857 100644
--- a/bsd/kern/uipc_mbuf.c
+++ b/bsd/kern/uipc_mbuf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -84,6 +84,7 @@
 #include <kern/queue.h>
 #include <kern/sched_prim.h>
 #include <kern/cpu_number.h>
+#include <kern/zalloc.h>
 
 #include <libkern/OSAtomic.h>
 #include <libkern/libkern.h>
@@ -115,7 +116,7 @@
  *	preserve the contents of the objects during its transactions.
  *
  * MC_BIGCL:
- *	This is a cache of rudimentary objects of NBPG in size; each
+ *	This is a cache of rudimentary objects of MBIGCLBYTES in size; each
  *	object represents a mbigcluster structure.  This cache does not
  *	preserve the contents of the objects during its transaction.
  *
@@ -264,8 +265,9 @@
  * Debugging can be enabled by adding "mbuf_debug=0x3" to boot-args; this
  * translates to the mcache flags (MCF_VERIFY | MCF_AUDIT).  Additionally,
  * the CPU layer cache can be disabled by setting the MCF_NOCPUCACHE flag,
- * i.e. modify the boot argument parameter to "mbuf_debug=0x13".  Note
- * that debugging consumes more CPU and memory.
+ * i.e. modify the boot argument parameter to "mbuf_debug=0x13".  Leak
+ * detection may also be disabled by setting the MCF_NOLEAKLOG flag, e.g.
+ * "mbuf_debug=0x113".  Note that debugging consumes more CPU and memory.
  *
  * Each object is associated with exactly one mcache_audit_t structure that
  * contains the information related to its last buffer transaction.  Given
@@ -276,9 +278,9 @@
  *	| mbuf addr  |			| mclaudit[i] |
  *	+------------+			+=============+
  *	      |				| cl_audit[0] |
- *	i = MTOCL(addr)			+-------------+
+ *	i = MTOBG(addr)			+-------------+
  *	      |			+----->	| cl_audit[1] | -----> mcache_audit_t
- *	b = CLTOM(i)		|	+-------------+
+ *	b = BGTOM(i)		|	+-------------+
  *	      |			|	|     ...     |
  *	x = MCLIDX(b, addr)	|	+-------------+
  *	      |			|	| cl_audit[7] |
@@ -286,12 +288,12 @@
  *		 (e.g. x == 1)
  *
  * The mclaudit[] array is allocated at initialization time, but its contents
- * get populated when the corresponding cluster is created.  Because a cluster
- * can be turned into NMBPCL number of mbufs, we preserve enough space for the
- * mbufs so that there is a 1-to-1 mapping between them.  A cluster that never
+ * get populated when the corresponding cluster is created.  Because a page
+ * can be turned into NMBPBG number of mbufs, we preserve enough space for the
+ * mbufs so that there is a 1-to-1 mapping between them.  A page that never
  * gets (or has not yet) turned into mbufs will use only cl_audit[0] with the
- * remaining entries unused.  For big clusters, only one entry is allocated
- * and used for the entire cluster pair.
+ * remaining entries unused.  For 16KB cluster, only one entry from the first
+ * page is allocated and used for the entire object.
  */
 
 /* TODO: should be in header file */
@@ -311,7 +313,7 @@ static void *mbuf_worker_run;	/* wait channel for worker thread */
 static int mbuf_worker_ready;	/* worker thread is runnable */
 static int mbuf_expand_mcl;	/* number of cluster creation requets */
 static int mbuf_expand_big;	/* number of big cluster creation requests */
-static int mbuf_expand_16k;	/* number of 16K cluster creation requests */
+static int mbuf_expand_16k;	/* number of 16KB cluster creation requests */
 static int ncpu;		/* number of CPUs */
 static ppnum_t *mcl_paddr;	/* Array of cluster physical addresses */
 static ppnum_t mcl_pages;	/* Size of array (# physical pages) */
@@ -320,19 +322,18 @@ static mcache_t *ref_cache;	/* Cache of cluster reference & flags */
 static mcache_t *mcl_audit_con_cache; /* Audit contents cache */
 static unsigned int mbuf_debug;	/* patchable mbuf mcache flags */
 static unsigned int mb_normalized; /* number of packets "normalized" */
-static unsigned int mbuf_gscale; /* Power-of-two growth scale for m_howmany */
 
 #define	MB_GROWTH_AGGRESSIVE	1	/* Threshold: 1/2 of total */
-#define	MB_GROWTH_NORMAL	4	/* Threshold: 15/16 of total */
+#define	MB_GROWTH_NORMAL	2	/* Threshold: 3/4 of total */
 
 typedef enum {
 	MC_MBUF = 0,	/* Regular mbuf */
 	MC_CL,		/* Cluster */
-	MC_BIGCL,	/* Large (4K) cluster */
-	MC_16KCL,	/* Jumbo (16K) cluster */
+	MC_BIGCL,	/* Large (4KB) cluster */
+	MC_16KCL,	/* Jumbo (16KB) cluster */
 	MC_MBUF_CL,	/* mbuf + cluster */
-	MC_MBUF_BIGCL,	/* mbuf + large (4K) cluster */
-	MC_MBUF_16KCL	/* mbuf + jumbo (16K) cluster */
+	MC_MBUF_BIGCL,	/* mbuf + large (4KB) cluster */
+	MC_MBUF_16KCL	/* mbuf + jumbo (16KB) cluster */
 } mbuf_class_t;
 
 #define	MBUF_CLASS_MIN		MC_MBUF
@@ -371,6 +372,8 @@ typedef enum {
  * a cluster's size.  In this case, only the slab of the first cluster is
  * used.  The rest of the slabs are marked with SLF_PARTIAL to indicate
  * that they are part of the larger slab.
+ *
+ * Each slab controls a page of memory.
  */
 typedef struct mcl_slab {
 	struct mcl_slab	*sl_next;	/* neighboring slab */
@@ -394,23 +397,24 @@ typedef struct mcl_slab {
  * whenever a new piece of memory mapped in from the VM crosses the 1MB
  * boundary.
  */
-#define	NSLABSPMB	((1 << MBSHIFT) >> MCLSHIFT)	/* 512 slabs/grp */
+#define	NSLABSPMB	((1 << MBSHIFT) >> PGSHIFT)	/* 256 slabs/grp */
 
 typedef struct mcl_slabg {
 	mcl_slab_t	slg_slab[NSLABSPMB];	/* group of slabs */
 } mcl_slabg_t;
 
+/*
+ * Number of slabs needed to control a 16KB cluster object.
+ */
+#define	NSLABSP16KB	(M16KCLBYTES >> PGSHIFT)
+
 /*
  * Per-cluster audit structure.
  */
 typedef struct {
-	mcache_audit_t	*cl_audit[NMBPCL];	/* array of audits */
+	mcache_audit_t	*cl_audit[NMBPBG];	/* array of audits */
 } mcl_audit_t;
 
-#if CONFIG_MBUF_NOEXPAND
-static unsigned int maxmbufcl;
-#endif /* CONFIG_MBUF_NOEXPAND */
-
 /*
  * Size of data from the beginning of an mbuf that covers m_hdr, pkthdr
  * and m_ext structures.  If auditing is enabled, we allocate a shadow
@@ -434,6 +438,7 @@ static unsigned int maxmbufcl;
  * Each of the following two arrays hold up to nmbclusters elements.
  */
 static mcl_audit_t *mclaudit;	/* array of cluster audit information */
+static unsigned int maxclaudit;	/* max # of entries in audit table */
 static mcl_slabg_t **slabstbl;	/* cluster slabs table */
 static unsigned int maxslabgrp;	/* max # of entries in slabs table */
 static unsigned int slabgrp;	/* # of entries in slabs table */
@@ -442,13 +447,68 @@ static unsigned int slabgrp;	/* # of entries in slabs table */
 int nclusters;			/* # of clusters for non-jumbo (legacy) sizes */
 int njcl;			/* # of clusters for jumbo sizes */
 int njclbytes;			/* size of a jumbo cluster */
-union mcluster *mbutl;		/* first mapped cluster address */
-union mcluster *embutl;		/* ending virtual address of mclusters */
+union mbigcluster *mbutl;	/* first mapped cluster address */
+union mbigcluster *embutl;	/* ending virtual address of mclusters */
 int max_linkhdr;		/* largest link-level header */
 int max_protohdr;		/* largest protocol header */
 int max_hdr;			/* largest link+protocol header */
 int max_datalen;		/* MHLEN - max_hdr */
 
+static boolean_t mclverify;	/* debug: pattern-checking */
+static boolean_t mcltrace;	/* debug: stack tracing */
+static boolean_t mclfindleak;	/* debug: leak detection */
+
+/* mbuf leak detection variables */
+static struct mleak_table mleak_table;
+static mleak_stat_t *mleak_stat;
+
+#define	MLEAK_STAT_SIZE(n) \
+	((size_t)(&((mleak_stat_t *)0)->ml_trace[n]))
+
+struct mallocation {
+	mcache_obj_t *element;	/* the alloc'ed element, NULL if unused */
+	u_int32_t trace_index;	/* mtrace index for corresponding backtrace */
+	u_int32_t count;	/* How many objects were requested */
+	u_int64_t hitcount;	/* for determining hash effectiveness */
+};
+
+struct mtrace {
+	u_int64_t	collisions;
+	u_int64_t	hitcount;
+	u_int64_t	allocs;
+	u_int64_t	depth;
+	uintptr_t	addr[MLEAK_STACK_DEPTH];
+};
+
+/* Size must be a power of two for the zhash to be able to just mask off bits */
+#define	MLEAK_ALLOCATION_MAP_NUM	512
+#define	MLEAK_TRACE_MAP_NUM		256
+
+/*
+ * Sample factor for how often to record a trace.  This is overwritable
+ * by the boot-arg mleak_sample_factor.
+ */
+#define	MLEAK_SAMPLE_FACTOR		500
+
+/*
+ * Number of top leakers recorded.
+ */
+#define	MLEAK_NUM_TRACES		5
+
+static uint32_t mleak_alloc_buckets = MLEAK_ALLOCATION_MAP_NUM;
+static uint32_t mleak_trace_buckets = MLEAK_TRACE_MAP_NUM;
+
+/* Hashmaps of allocations and their corresponding traces */
+static struct mallocation *mleak_allocations;
+static struct mtrace *mleak_traces;
+static struct mtrace *mleak_top_trace[MLEAK_NUM_TRACES];
+
+/* Lock to protect mleak tables from concurrent modification */
+static lck_mtx_t *mleak_lock;
+static lck_attr_t *mleak_lock_attr;
+static lck_grp_t *mleak_lock_grp;
+static lck_grp_attr_t *mleak_lock_grp_attr;
+
 extern u_int32_t high_sb_max;
 
 /* TODO: should be in header file */
@@ -460,7 +520,6 @@ int do_reclaim = 0;
 #define	MIN16KCL	(MINCL >> 2)
 
 /* Low watermarks (only map in pages once free counts go below) */
-#define	MCL_LOWAT	MINCL
 #define	MBIGCL_LOWAT	MINBIGCL
 #define	M16KCL_LOWAT	MIN16KCL
 
@@ -525,15 +584,34 @@ static mbuf_table_t mbuf_table[] = {
 #define	NELEM(a)	(sizeof (a) / sizeof ((a)[0]))
 
 static void *mb_waitchan = &mbuf_table;	/* wait channel for all caches */
-static int mb_waiters;			/* number of sleepers */
+static int mb_waiters;			/* number of waiters */
+
+#define	MB_WDT_MAXTIME	10		/* # of secs before watchdog panic */
+static struct timeval mb_wdtstart;	/* watchdog start timestamp */
+static char mbuf_dump_buf[256];
+
+/*
+ * mbuf watchdog is enabled by default on embedded platforms.  It is
+ * also toggeable via the kern.ipc.mb_watchdog sysctl.
+ */
+#if CONFIG_EMBEDDED
+static unsigned int mb_watchdog = 1;
+#else
+static unsigned int mb_watchdog = 0;
+#endif /* CONFIG_EMBEDDED */
 
 /* The following are used to serialize m_clalloc() */
 static boolean_t mb_clalloc_busy;
 static void *mb_clalloc_waitchan = &mb_clalloc_busy;
 static int mb_clalloc_waiters;
 
+static void mbuf_mtypes_sync(boolean_t);
 static int mbstat_sysctl SYSCTL_HANDLER_ARGS;
+static void mbuf_stat_sync(void);
 static int mb_stat_sysctl SYSCTL_HANDLER_ARGS;
+static int mleak_top_trace_sysctl SYSCTL_HANDLER_ARGS;
+static int mleak_table_sysctl SYSCTL_HANDLER_ARGS;
+static char *mbuf_dump(void);
 static void mbuf_table_init(void);
 static inline void m_incref(struct mbuf *);
 static inline u_int32_t m_decref(struct mbuf *);
@@ -554,11 +632,13 @@ static unsigned int mbuf_cslab_alloc(void *, mcache_obj_t ***,
 static void mbuf_cslab_free(void *, mcache_obj_t *, int);
 static void mbuf_cslab_audit(void *, mcache_obj_t *, boolean_t);
 static int freelist_populate(mbuf_class_t, unsigned int, int);
+static void freelist_init(mbuf_class_t);
 static boolean_t mbuf_cached_above(mbuf_class_t, int);
 static boolean_t mbuf_steal(mbuf_class_t, unsigned int);
 static void m_reclaim(mbuf_class_t, unsigned int, boolean_t);
 static int m_howmany(int, size_t);
 static void mbuf_worker_thread(void);
+static void mbuf_watchdog(void);
 static boolean_t mbuf_sleep(mbuf_class_t, unsigned int, int);
 
 static void mcl_audit_init(void *, mcache_audit_t **, mcache_obj_t **,
@@ -572,6 +652,11 @@ static void mcl_audit_save_mbuf(struct mbuf *, mcache_audit_t *);
 static void mcl_audit_mcheck_panic(struct mbuf *);
 static void mcl_audit_verify_nextptr(void *, mcache_audit_t *);
 
+static void mleak_activate(void);
+static void mleak_logger(u_int32_t, mcache_obj_t *, boolean_t);
+static boolean_t mleak_log(uintptr_t *, mcache_obj_t *, uint32_t, int);
+static void mleak_free(mcache_obj_t *);
+
 static mcl_slab_t *slab_get(void *);
 static void slab_init(mcl_slab_t *, mbuf_class_t, u_int32_t,
     void *, void *, unsigned int, int, int);
@@ -582,7 +667,6 @@ static void slab_nextptr_panic(mcl_slab_t *, void *);
 static void slab_detach(mcl_slab_t *);
 static boolean_t slab_is_detached(mcl_slab_t *);
 
-static unsigned int m_length(struct mbuf *);
 static int m_copyback0(struct mbuf **, int, int, const void *, int, int);
 static struct mbuf *m_split0(struct mbuf *, int, int, int);
 
@@ -605,11 +689,19 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int);
  */
 #define	EXTF_COMPOSITE	0x1
 
+/*
+ * This flag indicates that the external cluster is read-only, i.e. it is
+ * or was referred to by more than one mbufs.  Once set, this flag is never
+ * cleared.
+ */
+#define	EXTF_READONLY	0x2
+#define	EXTF_MASK	(EXTF_COMPOSITE | EXTF_READONLY)
+
 #define	MEXT_RFA(m)		((m)->m_ext.ext_refflags)
 #define	MEXT_REF(m)		(MEXT_RFA(m)->refcnt)
 #define	MEXT_FLAGS(m)		(MEXT_RFA(m)->flags)
 #define	MBUF_IS_COMPOSITE(m)	\
-	(MEXT_REF(m) == 0 && (MEXT_FLAGS(m) & EXTF_COMPOSITE))
+	(MEXT_REF(m) == 0 && (MEXT_FLAGS(m) & EXTF_MASK) == EXTF_COMPOSITE)
 
 /*
  * Macros used to verify the integrity of the mbuf.
@@ -638,15 +730,21 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int);
 #define	MTOD(m, t)	((t)((m)->m_data))
 
 /*
- * Macros to obtain cluster index and base cluster address.
+ * Macros to obtain (4KB) cluster index and base cluster address.
+ */
+
+#define	MTOBG(x)	(((char *)(x) - (char *)mbutl) >> MBIGCLSHIFT)
+#define	BGTOM(x)	((union mbigcluster *)(mbutl + (x)))
+
+/*
+ * Macro to find the mbuf index relative to a base.
  */
-#define	MTOCL(x)	(((char *)(x) - (char *)mbutl) >> MCLSHIFT)
-#define	CLTOM(x)	((union mcluster *)(mbutl + (x)))
+#define	MCLIDX(c, m)	(((char *)(m) - (char *)(c)) >> MSIZESHIFT)
 
 /*
- * Macro to find the mbuf index relative to the cluster base.
+ * Same thing for 2KB cluster index.
  */
-#define	MCLIDX(c, m)	(((char *)(m) - (char *)(c)) >> 8)
+#define	CLBGIDX(c, m)	(((char *)(m) - (char *)(c)) >> MCLSHIFT)
 
 /*
  * Macros used during mbuf and cluster initialization.
@@ -670,6 +768,7 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int);
 		(m)->m_pkthdr.tso_segsz = 0;				\
 		(m)->m_pkthdr.vlan_tag = 0;				\
 		(m)->m_pkthdr.socket_id = 0;				\
+		(m)->m_pkthdr.vt_nrecs = 0;				\
 		m_tag_init(m);						\
 		m_prio_init(m);						\
 	}								\
@@ -759,16 +858,12 @@ static mbuf_mtypes_t *mbuf_mtypes;	/* per-CPU statistics */
 #define	MTYPES_CPU(p) \
 	((mtypes_cpu_t *)((char *)(p) + MBUF_MTYPES_SIZE(cpu_number())))
 
-/* This should be in a header file */
-#define	atomic_add_16(a, n)	((void) OSAddAtomic16(n, a))
-#define	atomic_add_32(a, n)	((void) OSAddAtomic(n, a))
-
 #define	mtype_stat_add(type, n) {					\
 	if ((unsigned)(type) < MT_MAX) {				\
 		mtypes_cpu_t *mbs = MTYPES_CPU(mbuf_mtypes);		\
 		atomic_add_32(&mbs->cpu_mtypes[type], n);		\
-	} else if ((unsigned)(type) < (unsigned)MBSTAT_MTYPES_MAX) {		\
-		atomic_add_16((int16_t*)&mbstat.m_mtypes[type], n);		\
+	} else if ((unsigned)(type) < (unsigned)MBSTAT_MTYPES_MAX) {	\
+		atomic_add_16((int16_t *)&mbstat.m_mtypes[type], n);	\
 	}								\
 }
 
@@ -776,13 +871,15 @@ static mbuf_mtypes_t *mbuf_mtypes;	/* per-CPU statistics */
 #define	mtype_stat_inc(t)	mtype_stat_add(t, 1)
 #define	mtype_stat_dec(t)	mtype_stat_sub(t, 1)
 
-static int
-mbstat_sysctl SYSCTL_HANDLER_ARGS
+static void
+mbuf_mtypes_sync(boolean_t locked)
 {
-#pragma unused(oidp, arg1, arg2)
 	int m, n;
 	mtypes_cpu_t mtc;
 
+	if (locked)
+		lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
+
 	bzero(&mtc, sizeof (mtc));
 	for (m = 0; m < ncpu; m++) {
 		mtypes_cpu_t *scp = &mbuf_mtypes->mbs_cpu[m];
@@ -794,25 +891,33 @@ mbstat_sysctl SYSCTL_HANDLER_ARGS
 		for (n = 0; n < MT_MAX; n++)
 			mtc.cpu_mtypes[n] += temp.cpu_mtypes[n];
 	}
-	lck_mtx_lock(mbuf_mlock);
+	if (!locked)
+		lck_mtx_lock(mbuf_mlock);
 	for (n = 0; n < MT_MAX; n++)
 		mbstat.m_mtypes[n] = mtc.cpu_mtypes[n];
-	lck_mtx_unlock(mbuf_mlock);
-
-	return (SYSCTL_OUT(req, &mbstat, sizeof (mbstat)));
+	if (!locked)
+		lck_mtx_unlock(mbuf_mlock);
 }
 
 static int
-mb_stat_sysctl SYSCTL_HANDLER_ARGS
+mbstat_sysctl SYSCTL_HANDLER_ARGS
 {
 #pragma unused(oidp, arg1, arg2)
-	mcache_t *cp;
-	mcache_cpu_t *ccp;
+	mbuf_mtypes_sync(FALSE);
+
+	return (SYSCTL_OUT(req, &mbstat, sizeof (mbstat)));
+}
+
+static void
+mbuf_stat_sync(void)
+{
 	mb_class_stat_t *sp;
-	void *statp;
-	int k, m, bktsize, statsz, proc64 = proc_is64bit(req->p);
+	mcache_cpu_t *ccp;
+	mcache_t *cp;
+	int k, m, bktsize;
+
+	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
-	lck_mtx_lock(mbuf_mlock);
 	for (k = 0; k < NELEM(mbuf_table); k++) {
 		cp = m_cache(k);
 		ccp = &cp->mc_cpu[0];
@@ -854,9 +959,8 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS
 			break;
 
 		case MC_CL:
-			/* Deduct clusters used in composite cache and mbufs */
-			sp->mbcl_ctotal -= (m_total(MC_MBUF_CL) +
-			    (P2ROUNDUP(m_total(MC_MBUF), NMBPCL)/NMBPCL));
+			/* Deduct clusters used in composite cache */
+			sp->mbcl_ctotal -= m_total(MC_MBUF_CL);
 			break;
 
 		case MC_BIGCL:
@@ -873,6 +977,17 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS
 			break;
 		}
 	}
+}
+
+static int
+mb_stat_sysctl SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	void *statp;
+	int k, statsz, proc64 = proc_is64bit(req->p);
+
+	lck_mtx_lock(mbuf_mlock);
+	mbuf_stat_sync();
 
 	if (!proc64) {
 		struct omb_class_stat *oc;
@@ -913,6 +1028,69 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS
 	return (SYSCTL_OUT(req, statp, statsz));
 }
 
+static int
+mleak_top_trace_sysctl SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	mleak_trace_stat_t *mltr;
+	int i;
+
+	/* Ensure leak tracing turned on */
+	if (!mclfindleak)
+		return (ENXIO);
+
+	VERIFY(mleak_stat != NULL);
+#ifdef __LP64__
+	VERIFY(mleak_stat->ml_isaddr64);
+#else
+	VERIFY(!mleak_stat->ml_isaddr64);
+#endif /* !__LP64__ */
+	VERIFY(mleak_stat->ml_cnt == MLEAK_NUM_TRACES);
+
+	lck_mtx_lock(mleak_lock);
+	mltr = &mleak_stat->ml_trace[0];
+	bzero(mltr, sizeof (*mltr) * MLEAK_NUM_TRACES);
+	for (i = 0; i < MLEAK_NUM_TRACES; i++) {
+		int j;
+
+		if (mleak_top_trace[i] == NULL ||
+		    mleak_top_trace[i]->allocs == 0)
+			continue;
+
+		mltr->mltr_collisions	= mleak_top_trace[i]->collisions;
+		mltr->mltr_hitcount	= mleak_top_trace[i]->hitcount;
+		mltr->mltr_allocs	= mleak_top_trace[i]->allocs;
+		mltr->mltr_depth	= mleak_top_trace[i]->depth;
+
+		VERIFY(mltr->mltr_depth <= MLEAK_STACK_DEPTH);
+		for (j = 0; j < mltr->mltr_depth; j++)
+			mltr->mltr_addr[j] = mleak_top_trace[i]->addr[j];
+
+		mltr++;
+	}
+	i = SYSCTL_OUT(req, mleak_stat, MLEAK_STAT_SIZE(MLEAK_NUM_TRACES));
+	lck_mtx_unlock(mleak_lock);
+
+	return (i);
+}
+
+static int
+mleak_table_sysctl SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	int i = 0;
+
+	/* Ensure leak tracing turned on */
+	if (!mclfindleak)
+		return (ENXIO);
+
+	lck_mtx_lock(mleak_lock);
+	i = SYSCTL_OUT(req, &mleak_table, sizeof (mleak_table));
+	lck_mtx_unlock(mleak_lock);
+
+	return (i);
+}
+
 static inline void
 m_incref(struct mbuf *m)
 {
@@ -924,6 +1102,14 @@ m_incref(struct mbuf *m)
 		new = old + 1;
 		ASSERT(new != 0);
 	} while (!OSCompareAndSwap(old, new, addr));
+
+	/*
+	 * If cluster is shared, mark it with (sticky) EXTF_READONLY;
+	 * we don't clear the flag when the refcount goes back to 1
+	 * to simplify code calling m_mclhasreference().
+	 */
+	if (new > 1 && !(MEXT_FLAGS(m) & EXTF_READONLY))
+		(void) OSBitOrAtomic(EXTF_READONLY, &MEXT_FLAGS(m));
 }
 
 static inline u_int32_t
@@ -944,6 +1130,7 @@ m_decref(struct mbuf *m)
 static void
 mbuf_table_init(void)
 {
+	unsigned int b, c, s;
 	int m;
 
 	MALLOC(omb_stat, struct omb_stat *, OMB_STAT_SIZE(NELEM(mbuf_table)),
@@ -968,66 +1155,78 @@ mbuf_table_init(void)
 #endif /* CONFIG_MBUF_JUMBO */
 
 	/*
-	 * nclusters is going to be split in 2 to hold both the 2K
-	 * and the 4K pools, so make sure each half is even.
+	 * nclusters holds both the 2KB and 4KB pools, so ensure it's
+	 * a multiple of 4KB clusters.
 	 */
-	nclusters = P2ROUNDDOWN(nmbclusters - njcl, 4);
+	nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG);
 	if (njcl > 0) {
 		/*
-		 * Each jumbo cluster takes 8 2K clusters, so make
-		 * sure that the pool size is evenly divisible by 8.
+		 * Each jumbo cluster takes 8 2KB clusters, so make
+		 * sure that the pool size is evenly divisible by 8;
+		 * njcl is in 2KB unit, hence treated as such.
 		 */
 		njcl = P2ROUNDDOWN(nmbclusters - nclusters, 8);
-	}
 
-#if CONFIG_MBUF_NOEXPAND
-	/* Only use 4k clusters if we're setting aside more than 256k */
-	if (nmbclusters <= 128) {
-		maxmbufcl = nmbclusters / 4;
-	} else {
-		/* Half to big clusters, half to small */
-		maxmbufcl = (nmbclusters / 4) * 3;
+		/* Update nclusters with rounded down value of njcl */
+		nclusters = P2ROUNDDOWN(nmbclusters - njcl, NCLPBG);
 	}
-#endif /* CONFIG_MBUF_NOEXPAND */
 
 	/*
-	 * 1/2 of the map is reserved for 2K clusters.  Out of this, 1/16th
-	 * of the total number of 2K clusters allocated is reserved and cannot
-	 * be turned into mbufs.  It can only be used for pure cluster objects.
+	 * njcl is valid only on platforms with 16KB jumbo clusters, where
+	 * it is configured to 1/3 of the pool size.  On these platforms,
+	 * the remaining is used for 2KB and 4KB clusters.  On platforms
+	 * without 16KB jumbo clusters, the entire pool is used for both
+	 * 2KB and 4KB clusters.  A 4KB cluster can either be splitted into
+	 * 16 mbufs, or into 2 2KB clusters.
+	 *
+	 *  +---+---+------------ ... -----------+------- ... -------+
+	 *  | c | b |              s             |        njcl       |
+	 *  +---+---+------------ ... -----------+------- ... -------+
+	 *
+	 * 1/32th of the shared region is reserved for pure 2KB and 4KB
+	 * clusters (1/64th each.)
+	 */
+	c = P2ROUNDDOWN((nclusters >> 6), 2);		/* in 2KB unit */
+	b = P2ROUNDDOWN((nclusters >> (6 + NCLPBGSHIFT)), 2); /* in 4KB unit */
+	s = nclusters - (c + (b << NCLPBGSHIFT));	/* in 2KB unit */
+
+	/*
+	 * 1/64th (c) is reserved for 2KB clusters.
 	 */
-	m_minlimit(MC_CL) = (nclusters >> 5);
-	m_maxlimit(MC_CL) = (nclusters >> 1);
+	m_minlimit(MC_CL) = c;
+	m_maxlimit(MC_CL) = s + c;			/* in 2KB unit */
 	m_maxsize(MC_CL) = m_size(MC_CL) = MCLBYTES;
 	(void) snprintf(m_cname(MC_CL), MAX_MBUF_CNAME, "cl");
 
 	/*
-	 * The remaining (15/16th) can be turned into mbufs.
+	 * Another 1/64th (b) of the map is reserved for 4KB clusters.
+	 * It cannot be turned into 2KB clusters or mbufs.
 	 */
-	m_minlimit(MC_MBUF) = 0;
-	m_maxlimit(MC_MBUF) = (m_maxlimit(MC_CL) - m_minlimit(MC_CL)) * NMBPCL;
-	m_maxsize(MC_MBUF) = m_size(MC_MBUF) = MSIZE;
-	(void) snprintf(m_cname(MC_MBUF), MAX_MBUF_CNAME, "mbuf");
+	m_minlimit(MC_BIGCL) = b;
+	m_maxlimit(MC_BIGCL) = (s >> NCLPBGSHIFT) + b;	/* in 4KB unit */
+	m_maxsize(MC_BIGCL) = m_size(MC_BIGCL) = MBIGCLBYTES;
+	(void) snprintf(m_cname(MC_BIGCL), MAX_MBUF_CNAME, "bigcl");
 
 	/*
-	 * The other 1/2 of the map is reserved for 4K clusters.
+	 * The remaining 31/32ths (s) are all-purpose (mbufs, 2KB, or 4KB)
 	 */
-	m_minlimit(MC_BIGCL) = 0;
-	m_maxlimit(MC_BIGCL) = m_maxlimit(MC_CL) >> 1;
-	m_maxsize(MC_BIGCL) = m_size(MC_BIGCL) = NBPG;
-	(void) snprintf(m_cname(MC_BIGCL), MAX_MBUF_CNAME, "bigcl");
+	m_minlimit(MC_MBUF) = 0;
+	m_maxlimit(MC_MBUF) = (s << NMBPCLSHIFT);	/* in mbuf unit */
+	m_maxsize(MC_MBUF) = m_size(MC_MBUF) = MSIZE;
+	(void) snprintf(m_cname(MC_MBUF), MAX_MBUF_CNAME, "mbuf");
 
 	/*
 	 * Set limits for the composite classes.
 	 */
 	m_minlimit(MC_MBUF_CL) = 0;
-	m_maxlimit(MC_MBUF_CL) = m_maxlimit(MC_CL) - m_minlimit(MC_CL);
+	m_maxlimit(MC_MBUF_CL) = m_maxlimit(MC_CL);
 	m_maxsize(MC_MBUF_CL) = MCLBYTES;
 	m_size(MC_MBUF_CL) = m_size(MC_MBUF) + m_size(MC_CL);
 	(void) snprintf(m_cname(MC_MBUF_CL), MAX_MBUF_CNAME, "mbuf_cl");
 
 	m_minlimit(MC_MBUF_BIGCL) = 0;
 	m_maxlimit(MC_MBUF_BIGCL) = m_maxlimit(MC_BIGCL);
-	m_maxsize(MC_MBUF_BIGCL) = NBPG;
+	m_maxsize(MC_MBUF_BIGCL) = MBIGCLBYTES;
 	m_size(MC_MBUF_BIGCL) = m_size(MC_MBUF) + m_size(MC_BIGCL);
 	(void) snprintf(m_cname(MC_MBUF_BIGCL), MAX_MBUF_CNAME, "mbuf_bigcl");
 
@@ -1035,7 +1234,7 @@ mbuf_table_init(void)
 	 * And for jumbo classes.
 	 */
 	m_minlimit(MC_16KCL) = 0;
-	m_maxlimit(MC_16KCL) = (njcl >> 3);
+	m_maxlimit(MC_16KCL) = (njcl >> NCLPJCLSHIFT);	/* in 16KB unit */
 	m_maxsize(MC_16KCL) = m_size(MC_16KCL) = M16KCLBYTES;
 	(void) snprintf(m_cname(MC_16KCL), MAX_MBUF_CNAME, "16kcl");
 
@@ -1084,19 +1283,19 @@ static ncl_tbl_t ncl_table_srv[] = {
 #endif /* __LP64__ */
 
 __private_extern__ unsigned int
-mbuf_default_ncl(int srv, uint64_t mem)
+mbuf_default_ncl(int server, uint64_t mem)
 {
 #if !defined(__LP64__)
-#pragma unused(srv)
+#pragma unused(server)
 	unsigned int n;
 	/*
 	 * 32-bit kernel (default to 64MB of mbuf pool for >= 1GB RAM).
 	 */
-        if ((n = ((mem / 16) / MCLBYTES)) > 32768)
-	        n = 32768;
+	if ((n = ((mem / 16) / MCLBYTES)) > 32768)
+		n = 32768;
 #else
 	unsigned int n, i;
-	ncl_tbl_t *tbl = (srv ? ncl_table_srv : ncl_table);
+	ncl_tbl_t *tbl = (server ? ncl_table_srv : ncl_table);
 	/*
 	 * 64-bit kernel (mbuf pool size based on table).
 	 */
@@ -1115,13 +1314,16 @@ __private_extern__ void
 mbinit(void)
 {
 	unsigned int m;
-	int initmcl = MINCL;
+	unsigned int initmcl = 0;
 	void *buf;
 	thread_t thread = THREAD_NULL;
 
 	if (nmbclusters == 0)
 		nmbclusters = NMBCLUSTERS;
 
+	/* This should be a sane (at least even) value by now */
+	VERIFY(nmbclusters != 0 && !(nmbclusters & 0x1));
+
 	/* Setup the mbuf table */
 	mbuf_table_init();
 
@@ -1131,25 +1333,51 @@ mbinit(void)
 	mbuf_mlock_attr = lck_attr_alloc_init();
 	mbuf_mlock = lck_mtx_alloc_init(mbuf_mlock_grp, mbuf_mlock_attr);
 
-	/* Allocate cluster slabs table */
-	maxslabgrp = P2ROUNDUP(nmbclusters, NSLABSPMB) / NSLABSPMB;
+	/*
+	 * Allocate cluster slabs table:
+	 *
+	 *	maxslabgrp = (N * 2048) / (1024 * 1024)
+	 *
+	 * Where N is nmbclusters rounded up to the nearest 512.  This yields
+	 * mcl_slab_g_t units, each one representing a MB of memory.
+	 */
+	maxslabgrp =
+	    (P2ROUNDUP(nmbclusters, (MBSIZE >> 11)) << MCLSHIFT) >> MBSHIFT;
 	MALLOC(slabstbl, mcl_slabg_t **, maxslabgrp * sizeof (mcl_slabg_t *),
 	    M_TEMP, M_WAITOK | M_ZERO);
 	VERIFY(slabstbl != NULL);
 
-	/* Allocate audit structures if needed */
+	/*
+	 * Allocate audit structures, if needed:
+	 *
+	 *	maxclaudit = (maxslabgrp * 1024 * 1024) / 4096
+	 *
+	 * This yields mcl_audit_t units, each one representing a page.
+	 */
 	PE_parse_boot_argn("mbuf_debug", &mbuf_debug, sizeof (mbuf_debug));
 	mbuf_debug |= mcache_getflags();
-	if (mbuf_debug & MCF_AUDIT) {
-		MALLOC(mclaudit, mcl_audit_t *,
-		    nmbclusters * sizeof (*mclaudit), M_TEMP,
-		    M_WAITOK | M_ZERO);
+	if (mbuf_debug & MCF_DEBUG) {
+		maxclaudit = ((maxslabgrp << MBSHIFT) >> PGSHIFT);
+		MALLOC(mclaudit, mcl_audit_t *, maxclaudit * sizeof (*mclaudit),
+		    M_TEMP, M_WAITOK | M_ZERO);
 		VERIFY(mclaudit != NULL);
 
 		mcl_audit_con_cache = mcache_create("mcl_audit_contents",
 		    AUDIT_CONTENTS_SIZE, 0, 0, MCR_SLEEP);
 		VERIFY(mcl_audit_con_cache != NULL);
 	}
+	mclverify = (mbuf_debug & MCF_VERIFY);
+	mcltrace = (mbuf_debug & MCF_TRACE);
+	mclfindleak = !(mbuf_debug & MCF_NOLEAKLOG);
+
+	/* Enable mbuf leak logging, with a lock to protect the tables */
+
+	mleak_lock_grp_attr = lck_grp_attr_alloc_init();
+	mleak_lock_grp = lck_grp_alloc_init("mleak_lock", mleak_lock_grp_attr);
+	mleak_lock_attr = lck_attr_alloc_init();
+	mleak_lock = lck_mtx_alloc_init(mleak_lock_grp, mleak_lock_attr);
+
+	mleak_activate();
 
 	/* Calculate the number of pages assigned to the cluster pool */
 	mcl_pages = (nmbclusters * MCLBYTES) / CLBYTES;
@@ -1161,19 +1389,41 @@ mbinit(void)
 	mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages);
 	bzero((char *)mcl_paddr, mcl_pages * sizeof (ppnum_t));
 
-	embutl = (union mcluster *)
+	embutl = (union mbigcluster *)
 	    ((unsigned char *)mbutl + (nmbclusters * MCLBYTES));
+	VERIFY((((char *)embutl - (char *)mbutl) % MBIGCLBYTES) == 0);
 
+	/* Prime up the freelist */
 	PE_parse_boot_argn("initmcl", &initmcl, sizeof (initmcl));
+	if (initmcl != 0) {
+		initmcl >>= NCLPBGSHIFT;	/* become a 4K unit */
+		if (initmcl > m_maxlimit(MC_BIGCL))
+			initmcl = m_maxlimit(MC_BIGCL);
+	}
+	if (initmcl < m_minlimit(MC_BIGCL))
+		initmcl = m_minlimit(MC_BIGCL);
 
 	lck_mtx_lock(mbuf_mlock);
 
-	if (m_clalloc(MAX(NBPG/CLBYTES, 1) * initmcl, M_WAIT, MCLBYTES) == 0)
-		panic("mbinit: m_clalloc failed\n");
+	/*
+	 * For classes with non-zero minimum limits, populate their freelists
+	 * so that m_total(class) is at least m_minlimit(class).
+	 */
+	VERIFY(m_total(MC_BIGCL) == 0 && m_minlimit(MC_BIGCL) != 0);
+	freelist_populate(m_class(MC_BIGCL), initmcl, M_WAIT);
+	VERIFY(m_total(MC_BIGCL) >= m_minlimit(MC_BIGCL));
+	freelist_init(m_class(MC_CL));
+
+	for (m = 0; m < NELEM(mbuf_table); m++) {
+		/* Make sure we didn't miss any */
+		VERIFY(m_minlimit(m_class(m)) == 0 ||
+		    m_total(m_class(m)) >= m_minlimit(m_class(m)));
+	}
 
 	lck_mtx_unlock(mbuf_mlock);
 
-	(void) kernel_thread_start((thread_continue_t)mbuf_worker_thread_init, NULL, &thread);
+	(void) kernel_thread_start((thread_continue_t)mbuf_worker_thread_init,
+	    NULL, &thread);
 	thread_deallocate(thread);
 
 	ref_cache = mcache_create("mext_ref", sizeof (struct ext_ref),
@@ -1181,7 +1431,7 @@ mbinit(void)
 
 	/* Create the cache for each class */
 	for (m = 0; m < NELEM(mbuf_table); m++) {
-		void *allocfunc, *freefunc, *auditfunc;
+		void *allocfunc, *freefunc, *auditfunc, *logfunc;
 		u_int32_t flags;
 
 		flags = mbuf_debug;
@@ -1190,10 +1440,12 @@ mbinit(void)
 			allocfunc = mbuf_cslab_alloc;
 			freefunc = mbuf_cslab_free;
 			auditfunc = mbuf_cslab_audit;
+			logfunc = mleak_logger;
 		} else {
 			allocfunc = mbuf_slab_alloc;
 			freefunc = mbuf_slab_free;
 			auditfunc = mbuf_slab_audit;
+			logfunc = mleak_logger;
 		}
 
 		/*
@@ -1206,8 +1458,11 @@ mbinit(void)
 		    njcl == 0)
 			flags |= MCF_NOCPUCACHE;
 
+		if (!mclfindleak)
+			flags |= MCF_NOLEAKLOG;
+
 		m_cache(m) = mcache_create_ext(m_cname(m), m_maxsize(m),
-		    allocfunc, freefunc, auditfunc, mbuf_slab_notify,
+		    allocfunc, freefunc, auditfunc, logfunc, mbuf_slab_notify,
 		    (void *)(uintptr_t)m, flags, MCR_SLEEP);
 	}
 
@@ -1225,30 +1480,31 @@ mbinit(void)
 	mbuf_mtypes = (mbuf_mtypes_t *)P2ROUNDUP((intptr_t)buf, CPU_CACHE_SIZE);
 	bzero(mbuf_mtypes, MBUF_MTYPES_SIZE(ncpu));
 
-	mbuf_gscale = MB_GROWTH_NORMAL;
-
-	/* 
-	 * Set the max limit on sb_max to be 1/16 th of the size of 
+	/*
+	 * Set the max limit on sb_max to be 1/16 th of the size of
 	 * memory allocated for mbuf clusters.
 	 */
-	high_sb_max = (nmbclusters << (MCLSHIFT - 4)); 
+	high_sb_max = (nmbclusters << (MCLSHIFT - 4));
 	if (high_sb_max < sb_max) {
 		/* sb_max is too large for this configuration, scale it down */
-		if (high_sb_max > (1 << MBSHIFT)) { 
+		if (high_sb_max > (1 << MBSHIFT)) {
 			/* We have atleast 16 M of mbuf pool */
 			sb_max = high_sb_max;
 		} else if ((nmbclusters << MCLSHIFT) > (1 << MBSHIFT)) {
-			/* If we have more than 1M of mbufpool, cap the size of
+			/*
+			 * If we have more than 1M of mbufpool, cap the size of
 			 * max sock buf at 1M
-			 */ 
+			 */
 			sb_max = high_sb_max = (1 << MBSHIFT);
 		} else {
 			sb_max = high_sb_max;
 		}
 	}
 
-	printf("mbinit: done (%d MB memory set for mbuf pool)\n",
-	    (nmbclusters << MCLSHIFT) >> MBSHIFT);
+	printf("mbinit: done [%d MB total pool size, (%d/%d) split]\n",
+	    (nmbclusters << MCLSHIFT) >> MBSHIFT,
+	    (nclusters << MCLSHIFT) >> MBSHIFT,
+	    (njcl << MCLSHIFT) >> MBSHIFT);
 }
 
 /*
@@ -1274,7 +1530,7 @@ slab_alloc(mbuf_class_t class, int wait)
 	 * more than one buffer chunks (e.g. mbuf slabs).  For other
 	 * slabs, this probably doesn't make much of a difference.
 	 */
-	if (class == MC_MBUF && (wait & MCR_COMP))
+	if ((class == MC_MBUF || class == MC_CL) && (wait & MCR_COMP))
 		sp = (mcl_slab_t *)TAILQ_LAST(&m_slablist(class), mcl_slhead);
 	else
 		sp = (mcl_slab_t *)TAILQ_FIRST(&m_slablist(class));
@@ -1294,7 +1550,10 @@ slab_alloc(mbuf_class_t class, int wait)
 
 	if (class == MC_MBUF) {
 		sp->sl_head = buf->obj_next;
-		VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NMBPCL - 1));
+		VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NMBPBG - 1));
+	} else if (class == MC_CL) {
+		sp->sl_head = buf->obj_next;
+		VERIFY(sp->sl_head != NULL || sp->sl_refcnt == (NCLPBG - 1));
 	} else {
 		sp->sl_head = NULL;
 	}
@@ -1319,41 +1578,33 @@ slab_alloc(mbuf_class_t class, int wait)
 	if (class == MC_CL) {
 		mbstat.m_clfree = (--m_infree(MC_CL)) + m_infree(MC_MBUF_CL);
 		/*
-		 * A 2K cluster slab can have at most 1 reference.
+		 * A 2K cluster slab can have at most NCLPBG references.
 		 */
-		VERIFY(sp->sl_refcnt == 1 && sp->sl_chunks == 1 &&
-		    sp->sl_len == m_maxsize(MC_CL) && sp->sl_head == NULL);
+		VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NCLPBG &&
+		    sp->sl_chunks == NCLPBG &&
+		    sp->sl_len == m_maxsize(MC_BIGCL));
+		VERIFY(sp->sl_refcnt < NCLPBG || sp->sl_head == NULL);
 	} else if (class == MC_BIGCL) {
-		mcl_slab_t *nsp = sp->sl_next;
 		mbstat.m_bigclfree = (--m_infree(MC_BIGCL)) +
 		    m_infree(MC_MBUF_BIGCL);
 		/*
-		 * Increment 2nd slab.  A 4K big cluster takes
-		 * 2 slabs, each having at most 1 reference.
+		 * A 4K cluster slab can have at most 1 reference.
 		 */
 		VERIFY(sp->sl_refcnt == 1 && sp->sl_chunks == 1 &&
-		    sp->sl_len == m_maxsize(MC_BIGCL) && sp->sl_head == NULL);
-		/* Next slab must already be present */
-		VERIFY(nsp != NULL);
-		nsp->sl_refcnt++;
-		VERIFY(!slab_is_detached(nsp));
-		VERIFY(nsp->sl_class == MC_BIGCL &&
-		    nsp->sl_flags == (SLF_MAPPED | SLF_PARTIAL) &&
-		    nsp->sl_refcnt == 1 && nsp->sl_chunks == 0 &&
-		    nsp->sl_len == 0 && nsp->sl_base == sp->sl_base &&
-		    nsp->sl_head == NULL);
+		    sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
 	} else if (class == MC_16KCL) {
 		mcl_slab_t *nsp;
 		int k;
 
 		--m_infree(MC_16KCL);
 		VERIFY(sp->sl_refcnt == 1 && sp->sl_chunks == 1 &&
-		    sp->sl_len == m_maxsize(MC_16KCL) && sp->sl_head == NULL);
+		    sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
 		/*
-		 * Increment 2nd-8th slab.  A 16K big cluster takes
-		 * 8 cluster slabs, each having at most 1 reference.
+		 * Increment 2nd-Nth slab reference, where N is NSLABSP16KB.
+		 * A 16KB big cluster takes NSLABSP16KB slabs, each having at
+		 * most 1 reference.
 		 */
-		for (nsp = sp, k = 1; k < (M16KCLBYTES / MCLBYTES); k++) {
+		for (nsp = sp, k = 1; k < NSLABSP16KB; k++) {
 			nsp = nsp->sl_next;
 			/* Next slab must already be present */
 			VERIFY(nsp != NULL);
@@ -1366,7 +1617,7 @@ slab_alloc(mbuf_class_t class, int wait)
 			    nsp->sl_head == NULL);
 		}
 	} else {
-		ASSERT(class == MC_MBUF);
+		VERIFY(class == MC_MBUF);
 		--m_infree(MC_MBUF);
 		/*
 		 * If auditing is turned on, this check is
@@ -1376,20 +1627,20 @@ slab_alloc(mbuf_class_t class, int wait)
 			_MCHECK((struct mbuf *)buf);
 		/*
 		 * Since we have incremented the reference count above,
-		 * an mbuf slab (formerly a 2K cluster slab that was cut
+		 * an mbuf slab (formerly a 4KB cluster slab that was cut
 		 * up into mbufs) must have a reference count between 1
-		 * and NMBPCL at this point.
+		 * and NMBPBG at this point.
 		 */
-		VERIFY(sp->sl_refcnt >= 1 &&
-		    (unsigned short)sp->sl_refcnt <= NMBPCL &&
-		    sp->sl_chunks == NMBPCL && sp->sl_len == m_maxsize(MC_CL));
-		VERIFY((unsigned short)sp->sl_refcnt < NMBPCL ||
-		    sp->sl_head == NULL);
+		VERIFY(sp->sl_refcnt >= 1 && sp->sl_refcnt <= NMBPBG &&
+		    sp->sl_chunks == NMBPBG &&
+		    sp->sl_len == m_maxsize(MC_BIGCL));
+		VERIFY(sp->sl_refcnt < NMBPBG || sp->sl_head == NULL);
 	}
 
 	/* If empty, remove this slab from the class's freelist */
 	if (sp->sl_head == NULL) {
-		VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPCL);
+		VERIFY(class != MC_MBUF || sp->sl_refcnt == NMBPBG);
+		VERIFY(class != MC_CL || sp->sl_refcnt == NCLPBG);
 		slab_remove(sp, class);
 	}
 
@@ -1415,45 +1666,38 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
 	/* Decrement slab reference */
 	sp->sl_refcnt--;
 
-	if (class == MC_CL || class == MC_BIGCL) {
+	if (class == MC_CL) {
 		VERIFY(IS_P2ALIGNED(buf, MCLBYTES));
 		/*
-		 * A 2K cluster slab can have at most 1 reference
+		 * A slab that has been splitted for 2KB clusters can have
+		 * at most 1 outstanding reference at this point.
+		 */
+		VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NCLPBG - 1) &&
+		    sp->sl_chunks == NCLPBG &&
+		    sp->sl_len == m_maxsize(MC_BIGCL));
+		VERIFY(sp->sl_refcnt < (NCLPBG - 1) ||
+		    (slab_is_detached(sp) && sp->sl_head == NULL));
+	} else if (class == MC_BIGCL) {
+		VERIFY(IS_P2ALIGNED(buf, MCLBYTES));
+		/*
+		 * A 4KB cluster slab can have at most 1 reference
 		 * which must be 0 at this point.
 		 */
 		VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 &&
 		    sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
 		VERIFY(slab_is_detached(sp));
-		if (class == MC_BIGCL) {
-			mcl_slab_t *nsp = sp->sl_next;
-			VERIFY(IS_P2ALIGNED(buf, NBPG));
-			/* Next slab must already be present */
-			VERIFY(nsp != NULL);
-			/* Decrement 2nd slab reference */
-			nsp->sl_refcnt--;
-			/*
-			 * A 4K big cluster takes 2 slabs, both
-			 * must now have 0 reference.
-			 */
-			VERIFY(slab_is_detached(nsp));
-			VERIFY(nsp->sl_class == MC_BIGCL &&
-			    (nsp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) &&
-			    nsp->sl_refcnt == 0 && nsp->sl_chunks == 0 &&
-			    nsp->sl_len == 0 && nsp->sl_base == sp->sl_base &&
-			    nsp->sl_head == NULL);
-		}
 	} else if (class == MC_16KCL) {
 		mcl_slab_t *nsp;
 		int k;
 		/*
-		 * A 16K cluster takes 8 cluster slabs, all must
+		 * A 16KB cluster takes NSLABSP16KB slabs, all must
 		 * now have 0 reference.
 		 */
-		VERIFY(IS_P2ALIGNED(buf, NBPG));
+		VERIFY(IS_P2ALIGNED(buf, MBIGCLBYTES));
 		VERIFY(sp->sl_refcnt == 0 && sp->sl_chunks == 1 &&
-		    sp->sl_len == m_maxsize(MC_16KCL) && sp->sl_head == NULL);
+		    sp->sl_len == m_maxsize(class) && sp->sl_head == NULL);
 		VERIFY(slab_is_detached(sp));
-		for (nsp = sp, k = 1; k < (M16KCLBYTES / MCLBYTES); k++) {
+		for (nsp = sp, k = 1; k < NSLABSP16KB; k++) {
 			nsp = nsp->sl_next;
 			/* Next slab must already be present */
 			VERIFY(nsp != NULL);
@@ -1467,14 +1711,15 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
 		}
 	} else {
 		/*
-		 * An mbuf slab has a total of NMBPL reference counts.
-		 * Since we have decremented the reference above, it
-		 * must now be between 0 and NMBPCL-1.
+		 * A slab that has been splitted for mbufs has at most NMBPBG
+		 * reference counts.  Since we have decremented one reference
+		 * above, it must now be between 0 and NMBPBG-1.
 		 */
-		VERIFY(sp->sl_refcnt >= 0 &&
-		    (unsigned short)sp->sl_refcnt <= (NMBPCL - 1) &&
-		    sp->sl_chunks == NMBPCL && sp->sl_len == m_maxsize(MC_CL));
-		VERIFY(sp->sl_refcnt < (NMBPCL - 1) ||
+		VERIFY(class == MC_MBUF);
+		VERIFY(sp->sl_refcnt >= 0 && sp->sl_refcnt <= (NMBPBG - 1) &&
+		    sp->sl_chunks == NMBPBG &&
+		    sp->sl_len == m_maxsize(MC_BIGCL));
+		VERIFY(sp->sl_refcnt < (NMBPBG - 1) ||
 		    (slab_is_detached(sp) && sp->sl_head == NULL));
 	}
 
@@ -1485,12 +1730,15 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
 	 */
 	if (mclaudit != NULL) {
 		mcache_audit_t *mca = mcl_audit_buf2mca(class, buf);
-		mcache_audit_free_verify(mca, buf, 0, m_maxsize(class));
+		if (mclverify) {
+			mcache_audit_free_verify(mca, buf, 0, m_maxsize(class));
+		}
 		mca->mca_uflags &= ~MB_SCVALID;
 	}
 
 	if (class == MC_CL) {
 		mbstat.m_clfree = (++m_infree(MC_CL)) + m_infree(MC_MBUF_CL);
+		buf->obj_next = sp->sl_head;
 	} else if (class == MC_BIGCL) {
 		mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
 		    m_infree(MC_MBUF_BIGCL);
@@ -1502,14 +1750,25 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
 	}
 	sp->sl_head = buf;
 
-	/* All mbufs are freed; return the cluster that we stole earlier */
-	if (sp->sl_refcnt == 0 && class == MC_MBUF) {
-		int i = NMBPCL;
-
-		m_total(MC_MBUF) -= NMBPCL;
+	/*
+	 * If a slab has been splitted to either one which holds 2KB clusters,
+	 * or one which holds mbufs, turn it back to one which holds a 4KB
+	 * cluster.
+	 */
+	if (class == MC_MBUF && sp->sl_refcnt == 0 &&
+	    m_total(class) > m_minlimit(class) &&
+	    m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) {
+		int i = NMBPBG;
+
+		m_total(MC_BIGCL)++;
+		mbstat.m_bigclusters = m_total(MC_BIGCL);
+		m_total(MC_MBUF) -= NMBPBG;
 		mbstat.m_mbufs = m_total(MC_MBUF);
-		m_infree(MC_MBUF) -= NMBPCL;
-		mtype_stat_add(MT_FREE, -((unsigned)NMBPCL));
+		m_infree(MC_MBUF) -= NMBPBG;
+		mtype_stat_add(MT_FREE, -((unsigned)NMBPBG));
+
+		VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
+		VERIFY(m_total(MC_MBUF) >= m_minlimit(MC_MBUF));
 
 		while (i--) {
 			struct mbuf *m = sp->sl_head;
@@ -1522,19 +1781,58 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf)
 		/* Remove the slab from the mbuf class's slab list */
 		slab_remove(sp, class);
 
-		/* Reinitialize it as a 2K cluster slab */
-		slab_init(sp, MC_CL, sp->sl_flags, sp->sl_base, sp->sl_base,
+		/* Reinitialize it as a 4KB cluster slab */
+		slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base,
 		    sp->sl_len, 0, 1);
 
-		if (mclaudit != NULL)
+		if (mclverify) {
 			mcache_set_pattern(MCACHE_FREE_PATTERN,
-			    (caddr_t)sp->sl_head, m_maxsize(MC_CL));
+			    (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL));
+		}
+		mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
+		    m_infree(MC_MBUF_BIGCL);
 
-		mbstat.m_clfree = (++m_infree(MC_CL)) + m_infree(MC_MBUF_CL);
+		VERIFY(slab_is_detached(sp));
+		/* And finally switch class */
+		class = MC_BIGCL;
+	} else if (class == MC_CL && sp->sl_refcnt == 0 &&
+	    m_total(class) > m_minlimit(class) &&
+	    m_total(MC_BIGCL) < m_maxlimit(MC_BIGCL)) {
+		int i = NCLPBG;
+
+		m_total(MC_BIGCL)++;
+		mbstat.m_bigclusters = m_total(MC_BIGCL);
+		m_total(MC_CL) -= NCLPBG;
+		mbstat.m_clusters = m_total(MC_CL);
+		m_infree(MC_CL) -= NCLPBG;
+		VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
+		VERIFY(m_total(MC_CL) >= m_minlimit(MC_CL));
+
+		while (i--) {
+			union mcluster *c = sp->sl_head;
+			VERIFY(c != NULL);
+			sp->sl_head = c->mcl_next;
+			c->mcl_next = NULL;
+		}
+		VERIFY(sp->sl_head == NULL);
+
+		/* Remove the slab from the 2KB cluster class's slab list */
+		slab_remove(sp, class);
+
+		/* Reinitialize it as a 4KB cluster slab */
+		slab_init(sp, MC_BIGCL, sp->sl_flags, sp->sl_base, sp->sl_base,
+		    sp->sl_len, 0, 1);
+
+		if (mclverify) {
+			mcache_set_pattern(MCACHE_FREE_PATTERN,
+			    (caddr_t)sp->sl_head, m_maxsize(MC_BIGCL));
+		}
+		mbstat.m_bigclfree = (++m_infree(MC_BIGCL)) +
+		    m_infree(MC_MBUF_BIGCL);
 
 		VERIFY(slab_is_detached(sp));
 		/* And finally switch class */
-		class = MC_CL;
+		class = MC_BIGCL;
 	}
 
 	/* Reinsert the slab to the class's slab list */
@@ -1593,6 +1891,9 @@ mbuf_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait)
 			if (mbuf_cached_above(class, wait))
 				break;
 
+			/* watchdog checkpoint */
+			mbuf_watchdog();
+
 			/* We have nothing and cannot block; give up */
 			if (wait & MCR_NOSLEEP) {
 				if (!(wait & MCR_TRYHARD)) {
@@ -1689,7 +1990,9 @@ mbuf_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 			ASSERT(!(mca->mca_uflags & MB_SCVALID));
 		}
 		/* Record this transaction */
-		mcache_buffer_log(mca, list, m_cache(class));
+		if (mcltrace)
+			mcache_buffer_log(mca, list, m_cache(class));
+
 		if (alloc)
 			mca->mca_uflags |= MB_INUSE;
 		else
@@ -1756,16 +2059,17 @@ cslab_alloc(mbuf_class_t class, mcache_obj_t ***plist, unsigned int num)
 		clsp = slab_get(cl);
 		VERIFY(m->m_flags == M_EXT && cl != NULL);
 		VERIFY(MEXT_RFA(m) != NULL && MBUF_IS_COMPOSITE(m));
-		VERIFY(clsp->sl_refcnt == 1);
-		if (class == MC_MBUF_BIGCL) {
-			nsp = clsp->sl_next;
-			/* Next slab must already be present */
-			VERIFY(nsp != NULL);
-			VERIFY(nsp->sl_refcnt == 1);
-		} else if (class == MC_MBUF_16KCL) {
+
+		if (class == MC_MBUF_CL) {
+			VERIFY(clsp->sl_refcnt >= 1 &&
+			    clsp->sl_refcnt <= NCLPBG);
+		} else {
+			VERIFY(clsp->sl_refcnt == 1);
+		}
+
+		if (class == MC_MBUF_16KCL) {
 			int k;
-			for (nsp = clsp, k = 1;
-			    k < (M16KCLBYTES / MCLBYTES); k++) {
+			for (nsp = clsp, k = 1; k < NSLABSP16KB; k++) {
 				nsp = nsp->sl_next;
 				/* Next slab must already be present */
 				VERIFY(nsp != NULL);
@@ -1802,11 +2106,21 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged)
 	mcache_obj_t *ref_list = NULL;
 	mcl_slab_t *clsp, *nsp;
 	void *cl;
+	mbuf_class_t cl_class;
 
 	ASSERT(MBUF_CLASS_VALID(class) && MBUF_CLASS_COMPOSITE(class));
 	VERIFY(class != MC_MBUF_16KCL || njcl > 0);
 	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
+	if (class == MC_MBUF_CL) {
+		cl_class = MC_CL;
+	} else if (class == MC_MBUF_BIGCL) {
+		cl_class = MC_BIGCL;
+	} else {
+		VERIFY(class == MC_MBUF_16KCL);
+		cl_class = MC_16KCL;
+	}
+
 	o = tail = list;
 
 	while ((m = ms = (struct mbuf *)o) != NULL) {
@@ -1815,37 +2129,33 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged)
 		/* Do the mbuf sanity checks */
 		if (mclaudit != NULL) {
 			mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
-			mcache_audit_free_verify(mca, m, 0, m_maxsize(MC_MBUF));
+			if (mclverify) {
+				mcache_audit_free_verify(mca, m, 0,
+				    m_maxsize(MC_MBUF));
+			}
 			ms = (struct mbuf *)mca->mca_contents;
 		}
 
 		/* Do the cluster sanity checks */
 		cl = ms->m_ext.ext_buf;
 		clsp = slab_get(cl);
-		if (mclaudit != NULL) {
-			size_t size;
-			if (class == MC_MBUF_CL)
-				size = m_maxsize(MC_CL);
-			else if (class == MC_MBUF_BIGCL)
-				size = m_maxsize(MC_BIGCL);
-			else
-				size = m_maxsize(MC_16KCL);
-			mcache_audit_free_verify(mcl_audit_buf2mca(MC_CL,
+		if (mclverify) {
+			size_t size = m_maxsize(cl_class);
+			mcache_audit_free_verify(mcl_audit_buf2mca(cl_class,
 			    (mcache_obj_t *)cl), cl, 0, size);
 		}
 		VERIFY(ms->m_type == MT_FREE);
 		VERIFY(ms->m_flags == M_EXT);
 		VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms));
-		VERIFY(clsp->sl_refcnt == 1);
-		if (class == MC_MBUF_BIGCL) {
-			nsp = clsp->sl_next;
-			/* Next slab must already be present */
-			VERIFY(nsp != NULL);
-			VERIFY(nsp->sl_refcnt == 1);
-		} else if (class == MC_MBUF_16KCL) {
+		if (cl_class == MC_CL) {
+			VERIFY(clsp->sl_refcnt >= 1 &&
+			    clsp->sl_refcnt <= NCLPBG);
+		} else {
+			VERIFY(clsp->sl_refcnt == 1);
+		}
+		if (cl_class == MC_16KCL) {
 			int k;
-			for (nsp = clsp, k = 1;
-			    k < (M16KCLBYTES / MCLBYTES); k++) {
+			for (nsp = clsp, k = 1; k < NSLABSP16KB; k++) {
 				nsp = nsp->sl_next;
 				/* Next slab must already be present */
 				VERIFY(nsp != NULL);
@@ -1926,7 +2236,7 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
     int wait)
 {
 	mbuf_class_t class = (mbuf_class_t)arg;
-	mcache_t *cp = NULL;
+	mbuf_class_t cl_class = 0;
 	unsigned int num = 0, cnum = 0, want = needed;
 	mcache_obj_t *ref_list = NULL;
 	mcache_obj_t *mp_list = NULL;
@@ -1977,22 +2287,28 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
 	if (!(wait & MCR_NOSLEEP))
 		wait |= MCR_FAILOK;
 
+	/* allocate mbufs */
 	needed = mcache_alloc_ext(m_cache(MC_MBUF), &mp_list, needed, wait);
 	if (needed == 0) {
 		ASSERT(mp_list == NULL);
 		goto fail;
 	}
-	if (class == MC_MBUF_CL)
-		cp = m_cache(MC_CL);
-	else if (class == MC_MBUF_BIGCL)
-		cp = m_cache(MC_BIGCL);
-	else
-		cp = m_cache(MC_16KCL);
-	needed = mcache_alloc_ext(cp, &clp_list, needed, wait);
+
+	/* allocate clusters */
+	if (class == MC_MBUF_CL) {
+		cl_class = MC_CL;
+	} else if (class == MC_MBUF_BIGCL) {
+		cl_class = MC_BIGCL;
+	} else {
+		VERIFY(class == MC_MBUF_16KCL);
+		cl_class = MC_16KCL;
+	}
+	needed = mcache_alloc_ext(m_cache(cl_class), &clp_list, needed, wait);
 	if (needed == 0) {
 		ASSERT(clp_list == NULL);
 		goto fail;
 	}
+
 	needed = mcache_alloc_ext(ref_cache, &ref_list, needed, wait);
 	if (needed == 0) {
 		ASSERT(ref_list == NULL);
@@ -2025,7 +2341,6 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
 		 */
 		if (mclaudit != NULL) {
 			mcache_audit_t *mca, *cl_mca;
-			size_t size;
 
 			lck_mtx_lock(mbuf_mlock);
 			mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
@@ -2048,15 +2363,22 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed,
 			lck_mtx_unlock(mbuf_mlock);
 
 			/* Technically, they are in the freelist */
-			mcache_set_pattern(MCACHE_FREE_PATTERN, m,
-			    m_maxsize(MC_MBUF));
-			if (class == MC_MBUF_CL)
-				size = m_maxsize(MC_CL);
-			else if (class == MC_MBUF_BIGCL)
-				size = m_maxsize(MC_BIGCL);
-			else
-				size = m_maxsize(MC_16KCL);
-			mcache_set_pattern(MCACHE_FREE_PATTERN, cl, size);
+			if (mclverify) {
+				size_t size;
+
+				mcache_set_pattern(MCACHE_FREE_PATTERN, m,
+				    m_maxsize(MC_MBUF));
+
+				if (class == MC_MBUF_CL)
+					size = m_maxsize(MC_CL);
+				else if (class == MC_MBUF_BIGCL)
+					size = m_maxsize(MC_BIGCL);
+				else
+					size = m_maxsize(MC_16KCL);
+
+				mcache_set_pattern(MCACHE_FREE_PATTERN, cl,
+				    size);
+			}
 		}
 
 		MBUF_INIT(ms, 0, MT_FREE);
@@ -2082,7 +2404,7 @@ fail:
 	if (mp_list != NULL)
 		mcache_free_ext(m_cache(MC_MBUF), mp_list);
 	if (clp_list != NULL)
-		mcache_free_ext(cp, clp_list);
+		mcache_free_ext(m_cache(cl_class), clp_list);
 	if (ref_list != NULL)
 		mcache_free_ext(ref_cache, ref_list);
 
@@ -2152,7 +2474,9 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 		/* Do the mbuf sanity checks and record its transaction */
 		mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m);
 		mcl_audit_mbuf(mca, m, TRUE, alloc);
-		mcache_buffer_log(mca, m, m_cache(class));
+		if (mcltrace)
+			mcache_buffer_log(mca, m, m_cache(class));
+
 		if (alloc)
 			mca->mca_uflags |= MB_COMP_INUSE;
 		else
@@ -2163,7 +2487,7 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 		 * freeing, since the contents of the actual mbuf has been
 		 * pattern-filled by the above call to mcl_audit_mbuf().
 		 */
-		if (!alloc)
+		if (!alloc && mclverify)
 			ms = (struct mbuf *)mca->mca_contents;
 
 		/* Do the cluster sanity checks and record its transaction */
@@ -2171,16 +2495,15 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 		clsp = slab_get(cl);
 		VERIFY(ms->m_flags == M_EXT && cl != NULL);
 		VERIFY(MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms));
-		VERIFY(clsp->sl_refcnt == 1);
-		if (class == MC_MBUF_BIGCL) {
-			nsp = clsp->sl_next;
-			/* Next slab must already be present */
-			VERIFY(nsp != NULL);
-			VERIFY(nsp->sl_refcnt == 1);
-		} else if (class == MC_MBUF_16KCL) {
+		if (class == MC_MBUF_CL)
+			VERIFY(clsp->sl_refcnt >= 1 &&
+			    clsp->sl_refcnt <= NCLPBG);
+		else
+			VERIFY(clsp->sl_refcnt == 1);
+
+		if (class == MC_MBUF_16KCL) {
 			int k;
-			for (nsp = clsp, k = 1;
-			    k < (M16KCLBYTES / MCLBYTES); k++) {
+			for (nsp = clsp, k = 1; k < NSLABSP16KB; k++) {
 				nsp = nsp->sl_next;
 				/* Next slab must already be present */
 				VERIFY(nsp != NULL);
@@ -2196,7 +2519,9 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc)
 		else
 			size = m_maxsize(MC_16KCL);
 		mcl_audit_cluster(mca, cl, size, alloc, FALSE);
-		mcache_buffer_log(mca, cl, m_cache(class));
+		if (mcltrace)
+			mcache_buffer_log(mca, cl, m_cache(class));
+
 		if (alloc)
 			mca->mca_uflags |= MB_COMP_INUSE;
 		else
@@ -2221,8 +2546,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 	mcache_obj_t *con_list = NULL;
 	mcl_slab_t *sp;
 
-	VERIFY(bufsize == m_maxsize(MC_CL) ||
-	    bufsize == m_maxsize(MC_BIGCL) || bufsize == m_maxsize(MC_16KCL));
+	VERIFY(bufsize == m_maxsize(MC_BIGCL) ||
+	    bufsize == m_maxsize(MC_16KCL));
 
 	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
@@ -2258,7 +2583,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 	page = kmem_mb_alloc(mb_map, size, large_buffer);
 
 	/*
-	 * If we did ask for "n" 16K physically contiguous chunks
+	 * If we did ask for "n" 16KB physically contiguous chunks
 	 * and didn't get them, then please try again without this
 	 * restriction.
 	 */
@@ -2266,8 +2591,8 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 		page = kmem_mb_alloc(mb_map, size, 0);
 
 	if (page == 0) {
-		if (bufsize <= m_maxsize(MC_BIGCL)) {
-			/* Try for 1 page if failed, only for 2KB/4KB request */
+		if (bufsize == m_maxsize(MC_BIGCL)) {
+			/* Try for 1 page if failed, only 4KB request */
 			size = NBPG;
 			page = kmem_mb_alloc(mb_map, size, 0);
 		}
@@ -2288,24 +2613,20 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 		/*
 		 * Yes, I realize this is a waste of memory for clusters
 		 * that never get transformed into mbufs, as we may end
-		 * up with NMBPCL-1 unused audit structures per cluster.
+		 * up with NMBPBG-1 unused audit structures per cluster.
 		 * But doing so tremendously simplifies the allocation
 		 * strategy, since at this point we are not holding the
-		 * mbuf lock and the caller is okay to be blocked.  For
-		 * the case of big clusters, we allocate one structure
-		 * for each as we never turn them into mbufs.
+		 * mbuf lock and the caller is okay to be blocked.
 		 */
-		if (bufsize == m_maxsize(MC_CL)) {
-			needed = numpages * 2 * NMBPCL;
+		if (bufsize == m_maxsize(MC_BIGCL)) {
+			needed = numpages * NMBPBG;
 
 			i = mcache_alloc_ext(mcl_audit_con_cache,
 			    &con_list, needed, MCR_SLEEP);
 
 			VERIFY(con_list != NULL && i == needed);
-		} else if (bufsize == m_maxsize(MC_BIGCL)) {
-			needed = numpages;
 		} else {
-			needed = numpages / (M16KCLBYTES / NBPG);
+			needed = numpages / NSLABSP16KB;
 		}
 
 		i = mcache_alloc_ext(mcache_audit_cache,
@@ -2331,68 +2652,23 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 		mcl_paddr[offset] = new_page << PGSHIFT;
 
 		/* Pattern-fill this fresh page */
-		if (mclaudit != NULL)
+		if (mclverify) {
 			mcache_set_pattern(MCACHE_FREE_PATTERN,
 			    (caddr_t)page, NBPG);
-
-		if (bufsize == m_maxsize(MC_CL)) {
-			union mcluster *mcl = (union mcluster *)page;
-
-			/* 1st cluster in the page */
-			sp = slab_get(mcl);
-			if (mclaudit != NULL)
-				mcl_audit_init(mcl, &mca_list, &con_list,
-				    AUDIT_CONTENTS_SIZE, NMBPCL);
-
-			VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0);
-			slab_init(sp, MC_CL, SLF_MAPPED,
-			    mcl, mcl, bufsize, 0, 1);
-
-			/* Insert this slab */
-			slab_insert(sp, MC_CL);
-
-			/* Update stats now since slab_get() drops the lock */
-			mbstat.m_clfree = ++m_infree(MC_CL) +
-			    m_infree(MC_MBUF_CL);
-			mbstat.m_clusters = ++m_total(MC_CL);
-			VERIFY(m_total(MC_CL) <= m_maxlimit(MC_CL));
-
-			/* 2nd cluster in the page */
-			sp = slab_get(++mcl);
-			if (mclaudit != NULL)
-				mcl_audit_init(mcl, &mca_list, &con_list,
-				    AUDIT_CONTENTS_SIZE, NMBPCL);
-
-			VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0);
-			slab_init(sp, MC_CL, SLF_MAPPED,
-			    mcl, mcl, bufsize, 0, 1);
-
-			/* Insert this slab */
-			slab_insert(sp, MC_CL);
-
-			/* Update stats now since slab_get() drops the lock */
-			mbstat.m_clfree = ++m_infree(MC_CL) +
-			    m_infree(MC_MBUF_CL);
-			mbstat.m_clusters = ++m_total(MC_CL);
-			VERIFY(m_total(MC_CL) <= m_maxlimit(MC_CL));
-		} else if (bufsize == m_maxsize(MC_BIGCL)) {
+		}
+		if (bufsize == m_maxsize(MC_BIGCL)) {
 			union mbigcluster *mbc = (union mbigcluster *)page;
-			mcl_slab_t *nsp;
 
 			/* One for the entire page */
 			sp = slab_get(mbc);
-			if (mclaudit != NULL)
-				mcl_audit_init(mbc, &mca_list, NULL, 0, 1);
-
+			if (mclaudit != NULL) {
+				mcl_audit_init(mbc, &mca_list, &con_list,
+				    AUDIT_CONTENTS_SIZE, NMBPBG);
+			}
 			VERIFY(sp->sl_refcnt == 0 && sp->sl_flags == 0);
 			slab_init(sp, MC_BIGCL, SLF_MAPPED,
 			    mbc, mbc, bufsize, 0, 1);
 
-			/* 2nd cluster's slab is part of the previous one */
-			nsp = slab_get(((union mcluster *)page) + 1);
-			slab_init(nsp, MC_BIGCL, SLF_MAPPED | SLF_PARTIAL,
-			    mbc, NULL, 0, 0, 0);
-
 			/* Insert this slab */
 			slab_insert(sp, MC_BIGCL);
 
@@ -2401,7 +2677,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 			    m_infree(MC_MBUF_BIGCL);
 			mbstat.m_bigclusters = ++m_total(MC_BIGCL);
 			VERIFY(m_total(MC_BIGCL) <= m_maxlimit(MC_BIGCL));
-		} else if ((i % (M16KCLBYTES / NBPG)) == 0) {
+		} else if ((i % NSLABSP16KB) == 0) {
 			union m16kcluster *m16kcl = (union m16kcluster *)page;
 			mcl_slab_t *nsp;
 			int k;
@@ -2416,9 +2692,12 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 			slab_init(sp, MC_16KCL, SLF_MAPPED,
 			    m16kcl, m16kcl, bufsize, 0, 1);
 
-			/* 2nd-8th cluster's slab is part of the first one */
-			for (k = 1; k < (M16KCLBYTES / MCLBYTES); k++) {
-				nsp = slab_get(((union mcluster *)page) + k);
+			/*
+			 * 2nd-Nth page's slab is part of the first one,
+			 * where N is NSLABSP16KB.
+			 */
+			for (k = 1; k < NSLABSP16KB; k++) {
+				nsp = slab_get(((union mbigcluster *)page) + k);
 				VERIFY(nsp->sl_refcnt == 0 &&
 				    nsp->sl_flags == 0);
 				slab_init(nsp, MC_16KCL,
@@ -2444,13 +2723,11 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize)
 		wakeup(mb_clalloc_waitchan);
 	}
 
-	if (bufsize == m_maxsize(MC_CL))
-		return (numpages << 1);
-	else if (bufsize == m_maxsize(MC_BIGCL))
+	if (bufsize == m_maxsize(MC_BIGCL))
 		return (numpages);
 
 	VERIFY(bufsize == m_maxsize(MC_16KCL));
-	return (numpages / (M16KCLBYTES / NBPG));
+	return (numpages / NSLABSP16KB);
 
 out:
 	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
@@ -2466,23 +2743,7 @@ out:
 	 * When non-blocking we kick a thread if we have to grow the
 	 * pool or if the number of free clusters is less than requested.
 	 */
-	if (bufsize == m_maxsize(MC_CL)) {
-		if (i > 0) {
-			/*
-			 * Remember total number of clusters needed
-			 * at this time.
-			 */
-			i += m_total(MC_CL);
-			if (i > mbuf_expand_mcl) {
-				mbuf_expand_mcl = i;
-				if (mbuf_worker_ready)
-					wakeup((caddr_t)&mbuf_worker_run);
-			}
-		}
-
-		if (m_infree(MC_CL) >= num)
-			return (1);
-	} else if (bufsize == m_maxsize(MC_BIGCL)) {
+	if (bufsize == m_maxsize(MC_BIGCL)) {
 		if (i > 0) {
 			/*
 			 * Remember total number of 4KB clusters needed
@@ -2525,44 +2786,30 @@ static int
 freelist_populate(mbuf_class_t class, unsigned int num, int wait)
 {
 	mcache_obj_t *o = NULL;
-	int i;
+	int i, numpages = 0, count;
 
 	VERIFY(class == MC_MBUF || class == MC_CL || class == MC_BIGCL ||
 	    class == MC_16KCL);
 
-#if CONFIG_MBUF_NOEXPAND
-	if ((mbstat.m_mbufs / NMBPCL) >= maxmbufcl) {
-#if DEBUG
-		static int printonce = 1;
-		if (printonce == 1) {
-			printonce = 0;
-			printf("m_expand failed, allocated %ld out of %d "
-			    "clusters\n", mbstat.m_mbufs / NMBPCL,
-			    nmbclusters);
-		}
-#endif /* DEBUG */
-		return (0);
-	}
-#endif /* CONFIG_MBUF_NOEXPAND */
-
 	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
 	switch (class) {
 	case MC_MBUF:
 	case MC_CL:
-		i = m_clalloc(num, wait, m_maxsize(MC_CL));
+	case MC_BIGCL:
+		numpages = (num * m_size(class) + NBPG - 1) / NBPG;
+		i = m_clalloc(numpages, wait, m_maxsize(MC_BIGCL));
 
-		/* Respect the 2K clusters minimum limit */
-		if (m_total(MC_CL) == m_maxlimit(MC_CL) &&
-		    m_infree(MC_CL) <= m_minlimit(MC_CL)) {
-			if (class != MC_CL || (wait & MCR_COMP))
+		/* Respect the 4KB clusters minimum limit */
+		if (m_total(MC_BIGCL) == m_maxlimit(MC_BIGCL) &&
+		    m_infree(MC_BIGCL) <= m_minlimit(MC_BIGCL)) {
+			if (class != MC_BIGCL || (wait & MCR_COMP))
 				return (0);
 		}
-		if (class == MC_CL)
+		if (class == MC_BIGCL)
 			return (i != 0);
 		break;
 
-	case MC_BIGCL:
 	case MC_16KCL:
 		return (m_clalloc(num, wait, m_maxsize(class)) != 0);
 		/* NOTREACHED */
@@ -2572,66 +2819,119 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait)
 		/* NOTREACHED */
 	}
 
-	/* Steal a cluster and cut it up to create NMBPCL mbufs */
-	if ((o = slab_alloc(MC_CL, wait)) != NULL) {
+	VERIFY(class == MC_MBUF || class == MC_CL);
+
+	/* how many objects will we cut the page into? */
+	int numobj = (class == MC_MBUF ? NMBPBG : NCLPBG);
+
+	for (count = 0; count < numpages; count++) {
+
+		/* respect totals, minlimit, maxlimit */
+		if (m_total(MC_BIGCL) <= m_minlimit(MC_BIGCL) ||
+		    m_total(class) >= m_maxlimit(class))
+			break;
+
+		if ((o = slab_alloc(MC_BIGCL, wait)) == NULL)
+			break;
+
 		struct mbuf *m = (struct mbuf *)o;
-		mcache_audit_t *mca = NULL;
+		union mcluster *c = (union mcluster *)o;
 		mcl_slab_t *sp = slab_get(o);
+		mcache_audit_t *mca = NULL;
 
 		VERIFY(slab_is_detached(sp) &&
 		    (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED);
 
-		/* Make sure that the cluster is unmolested while in freelist */
-		if (mclaudit != NULL) {
-			mca = mcl_audit_buf2mca(MC_CL, o);
-			mcache_audit_free_verify(mca, o, 0, m_maxsize(MC_CL));
+		/*
+		 * Make sure that the cluster is unmolested
+		 * while in freelist
+		 */
+		if (mclverify) {
+			mca = mcl_audit_buf2mca(MC_BIGCL, o);
+			mcache_audit_free_verify(mca, o, 0,
+			    m_maxsize(MC_BIGCL));
 		}
 
-		/* Reinitialize it as an mbuf slab */
-		slab_init(sp, MC_MBUF, sp->sl_flags, sp->sl_base, NULL,
-		    sp->sl_len, 0, NMBPCL);
+		/* Reinitialize it as an mbuf or 2K slab */
+		slab_init(sp, class, sp->sl_flags,
+		    sp->sl_base, NULL, sp->sl_len, 0, numobj);
 
-		VERIFY(m == (struct mbuf *)sp->sl_base);
+		VERIFY(o == (mcache_obj_t *)sp->sl_base);
 		VERIFY(sp->sl_head == NULL);
 
-		m_total(MC_MBUF) += NMBPCL;
-		mbstat.m_mbufs = m_total(MC_MBUF);
-		m_infree(MC_MBUF) += NMBPCL;
-		mtype_stat_add(MT_FREE, NMBPCL);
+		VERIFY(m_total(MC_BIGCL) > 0);
+		m_total(MC_BIGCL)--;
+		mbstat.m_bigclusters = m_total(MC_BIGCL);
 
-		i = NMBPCL;
-		while (i--) {
-			/*
-			 * If auditing is enabled, construct the shadow mbuf
-			 * in the audit structure instead of the actual one.
-			 * mbuf_slab_audit() will take care of restoring the
-			 * contents after the integrity check.
-			 */
-			if (mclaudit != NULL) {
-				struct mbuf *ms;
-				mca = mcl_audit_buf2mca(MC_MBUF,
-				    (mcache_obj_t *)m);
-				ms = ((struct mbuf *)mca->mca_contents);
-				ms->m_type = MT_FREE;
-			} else {
-				m->m_type = MT_FREE;
+		m_total(class) += numobj;
+		m_infree(class) += numobj;
+
+		VERIFY(m_total(MC_BIGCL) >= m_minlimit(MC_BIGCL));
+		VERIFY(m_total(class) <= m_maxlimit(class));
+
+		i = numobj;
+		if (class == MC_MBUF) {
+			mbstat.m_mbufs = m_total(MC_MBUF);
+			mtype_stat_add(MT_FREE, NMBPBG);
+			while (i--) {
+				/*
+				 * If auditing is enabled, construct the
+				 * shadow mbuf in the audit structure
+				 * instead of the actual one.
+				 * mbuf_slab_audit() will take care of
+				 * restoring the contents after the
+				 * integrity check.
+				 */
+				if (mclaudit != NULL) {
+					struct mbuf *ms;
+					mca = mcl_audit_buf2mca(MC_MBUF,
+					    (mcache_obj_t *)m);
+					ms = ((struct mbuf *)
+					    mca->mca_contents);
+					ms->m_type = MT_FREE;
+				} else {
+					m->m_type = MT_FREE;
+				}
+				m->m_next = sp->sl_head;
+				sp->sl_head = (void *)m++;
+			}
+		} else { /* MC_CL */
+			mbstat.m_clfree =
+			    m_infree(MC_CL) + m_infree(MC_MBUF_CL);
+			mbstat.m_clusters = m_total(MC_CL);
+			while (i--) {
+				c->mcl_next = sp->sl_head;
+				sp->sl_head = (void *)c++;
 			}
-			m->m_next = sp->sl_head;
-			sp->sl_head = (void *)m++;
 		}
 
-		/* Insert it into the mbuf class's slab list */
-		slab_insert(sp, MC_MBUF);
+		/* Insert into the mbuf or 2k slab list */
+		slab_insert(sp, class);
 
 		if ((i = mb_waiters) > 0)
 			mb_waiters = 0;
 		if (i != 0)
 			wakeup(mb_waitchan);
-
-		return (1);
 	}
+	return (count != 0);
+}
 
-	return (0);
+/*
+ * For each class, initialize the freelist to hold m_minlimit() objects.
+ */
+static void
+freelist_init(mbuf_class_t class)
+{
+	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
+
+	VERIFY(class == MC_CL || class == MC_BIGCL);
+	VERIFY(m_total(class) == 0);
+	VERIFY(m_minlimit(class) > 0);
+
+	while (m_total(class) < m_minlimit(class))
+		(void) freelist_populate(class, m_minlimit(class), M_WAIT);
+
+	VERIFY(m_total(class) >= m_minlimit(class));
 }
 
 /*
@@ -2736,17 +3036,23 @@ m_reclaim(mbuf_class_t class, unsigned int num, boolean_t comp)
 	switch (class) {
 	case MC_MBUF:
 		m_wantpurge(MC_CL)++;
+		m_wantpurge(MC_BIGCL)++;
 		m_wantpurge(MC_MBUF_CL)++;
 		m_wantpurge(MC_MBUF_BIGCL)++;
 		break;
 
 	case MC_CL:
 		m_wantpurge(MC_MBUF)++;
+		m_wantpurge(MC_BIGCL)++;
+		m_wantpurge(MC_MBUF_BIGCL)++;
 		if (!comp)
 			m_wantpurge(MC_MBUF_CL)++;
 		break;
 
 	case MC_BIGCL:
+		m_wantpurge(MC_MBUF)++;
+		m_wantpurge(MC_CL)++;
+		m_wantpurge(MC_MBUF_CL)++;
 		if (!comp)
 			m_wantpurge(MC_MBUF_BIGCL)++;
 		break;
@@ -2894,11 +3200,11 @@ m_free(struct mbuf *m)
 
 	if (m->m_flags & M_EXT) {
 		u_int32_t refcnt;
-		u_int32_t flags;
+		u_int32_t composite;
 
 		refcnt = m_decref(m);
-		flags = MEXT_FLAGS(m);
-		if (refcnt == 0 && flags == 0) {
+		composite = (MEXT_FLAGS(m) & EXTF_COMPOSITE);
+		if (refcnt == 0 && !composite) {
 			if (m->m_ext.ext_free == NULL) {
 				mcache_free(m_cache(MC_CL), m->m_ext.ext_buf);
 			} else if (m->m_ext.ext_free == m_bigfree) {
@@ -2913,7 +3219,7 @@ m_free(struct mbuf *m)
 			}
 			mcache_free(ref_cache, MEXT_RFA(m));
 			MEXT_RFA(m) = NULL;
-		} else if (refcnt == 0 && (flags & EXTF_COMPOSITE)) {
+		} else if (refcnt == 0 && composite) {
 			VERIFY(m->m_type != MT_FREE);
 
 			mtype_stat_dec(m->m_type);
@@ -2924,6 +3230,8 @@ m_free(struct mbuf *m)
 			m->m_len = 0;
 			m->m_next = m->m_nextpkt = NULL;
 
+			MEXT_FLAGS(m) &= ~EXTF_READONLY;
+
 			/* "Free" into the intermediate cache */
 			if (m->m_ext.ext_free == NULL) {
 				mcache_free(m_cache(MC_MBUF_CL), m);
@@ -2963,11 +3271,11 @@ m_clattach(struct mbuf *m, int type, caddr_t extbuf,
 
 	if (m->m_flags & M_EXT) {
 		u_int32_t refcnt;
-		u_int32_t flags;
+		u_int32_t composite;
 
 		refcnt = m_decref(m);
-		flags = MEXT_FLAGS(m);
-		if (refcnt == 0 && flags == 0) {
+		composite = (MEXT_FLAGS(m) & EXTF_COMPOSITE);
+		if (refcnt == 0 && !composite) {
 			if (m->m_ext.ext_free == NULL) {
 				mcache_free(m_cache(MC_CL), m->m_ext.ext_buf);
 			} else if (m->m_ext.ext_free == m_bigfree) {
@@ -2982,7 +3290,7 @@ m_clattach(struct mbuf *m, int type, caddr_t extbuf,
 			}
 			/* Re-use the reference structure */
 			rfa = MEXT_RFA(m);
-		} else if (refcnt == 0 && (flags & EXTF_COMPOSITE)) {
+		} else if (refcnt == 0 && composite) {
 			VERIFY(m->m_type != MT_FREE);
 
 			mtype_stat_dec(m->m_type);
@@ -2992,6 +3300,9 @@ m_clattach(struct mbuf *m, int type, caddr_t extbuf,
 			m->m_flags = M_EXT;
 			m->m_len = 0;
 			m->m_next = m->m_nextpkt = NULL;
+
+			MEXT_FLAGS(m) &= ~EXTF_READONLY;
+
 			/* "Free" into the intermediate cache */
 			if (m->m_ext.ext_free == NULL) {
 				mcache_free(m_cache(MC_MBUF_CL), m);
@@ -3036,14 +3347,29 @@ m_getcl(int wait, int type, int flags)
 	if (mcflags & MCR_NOSLEEP)
 		mcflags |= MCR_TRYHARD;
 
-	m = mcache_alloc(m_cache(MC_MBUF_CL), mcflags);
-	if (m != NULL) {
+	m = mcache_alloc(m_cache(MC_MBUF_CL), mcflags);
+	if (m != NULL) {
+		u_int32_t flag;
+		struct ext_ref *rfa;
+		void *cl;
+
+		VERIFY(m->m_type == MT_FREE && m->m_flags == M_EXT);
+		cl = m->m_ext.ext_buf;
+		rfa = MEXT_RFA(m);
+
+		ASSERT(cl != NULL && rfa != NULL);
+		VERIFY(MBUF_IS_COMPOSITE(m) && m->m_ext.ext_free == NULL);
+
+		flag = MEXT_FLAGS(m);
+
 		MBUF_INIT(m, hdr, type);
+		MBUF_CL_INIT(m, cl, rfa, 1, flag);
+
 		mtype_stat_inc(type);
 		mtype_stat_dec(MT_FREE);
 #if CONFIG_MACF_NET
 		if (hdr && mac_init_mbuf(m, wait) != 0) {
-			m_free(m);
+			m_freem(m);
 			return (NULL);
 		}
 #endif /* MAC_NET */
@@ -3091,7 +3417,7 @@ m_mclfree(caddr_t p)
 
 /*
  * mcl_hasreference() checks if a cluster of an mbuf is referenced by
- * another mbuf
+ * another mbuf; see comments in m_incref() regarding EXTF_READONLY.
  */
 int
 m_mclhasreference(struct mbuf *m)
@@ -3101,7 +3427,7 @@ m_mclhasreference(struct mbuf *m)
 
 	ASSERT(MEXT_RFA(m) != NULL);
 
-	return (MEXT_REF(m) > 1);
+	return ((MEXT_FLAGS(m) & EXTF_READONLY) ? 1 : 0);
 }
 
 __private_extern__ caddr_t
@@ -3292,7 +3618,7 @@ m_getpackets_internal(unsigned int *num_needed, int num_with_pkthdrs,
 			--num_with_pkthdrs;
 #if CONFIG_MACF_NET
 			if (mac_mbuf_label_init(m, wait) != 0) {
-				m_free(m);
+				m_freem(m);
 				break;
 			}
 #endif /* MAC_NET */
@@ -3608,7 +3934,7 @@ m_allocpacket_internal(unsigned int *numlist, size_t packetlen,
 #if CONFIG_MACF_NET
 		if (pkthdr && mac_init_mbuf(m, wait) != 0) {
 			--num;
-			m_free(m);
+			m_freem(m);
 			break;
 		}
 #endif /* MAC_NET */
@@ -3745,7 +4071,7 @@ m_freem_list(struct mbuf *m)
 		while (m != NULL) {
 			struct mbuf *next = m->m_next;
 			mcache_obj_t *o, *rfa;
-			u_int32_t refcnt, flags;
+			u_int32_t refcnt, composite;
 
 			if (m->m_type == MT_FREE)
 				panic("m_free: freeing an already freed mbuf");
@@ -3762,8 +4088,8 @@ m_freem_list(struct mbuf *m)
 
 			o = (mcache_obj_t *)m->m_ext.ext_buf;
 			refcnt = m_decref(m);
-			flags = MEXT_FLAGS(m);
-			if (refcnt == 0 && flags == 0) {
+			composite = (MEXT_FLAGS(m) & EXTF_COMPOSITE);
+			if (refcnt == 0 && !composite) {
 				if (m->m_ext.ext_free == NULL) {
 					o->obj_next = mcl_list;
 					mcl_list = o;
@@ -3782,7 +4108,7 @@ m_freem_list(struct mbuf *m)
 				rfa->obj_next = ref_list;
 				ref_list = rfa;
 				MEXT_RFA(m) = NULL;
-			} else if (refcnt == 0 && (flags & EXTF_COMPOSITE)) {
+			} else if (refcnt == 0 && composite) {
 				VERIFY(m->m_type != MT_FREE);
 				/*
 				 * Amortize the costs of atomic operations
@@ -3804,6 +4130,8 @@ m_freem_list(struct mbuf *m)
 				m->m_len = 0;
 				m->m_next = m->m_nextpkt = NULL;
 
+				MEXT_FLAGS(m) &= ~EXTF_READONLY;
+
 				/* "Free" into the intermediate cache */
 				o = (mcache_obj_t *)m;
 				if (m->m_ext.ext_free == NULL) {
@@ -4067,7 +4395,7 @@ nospace:
  */
 struct mbuf *
 m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait,
-    struct mbuf **m_last, int *m_off)
+    struct mbuf **m_lastm, int *m_off)
 {
 	struct mbuf *n, **np = NULL;
 	int off = off0, len = len0;
@@ -4081,8 +4409,8 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait,
 	if (off == 0 && (m->m_flags & M_PKTHDR))
 		copyhdr = 1;
 
-	if (*m_last != NULL) {
-		m = *m_last;
+	if (*m_lastm != NULL) {
+		m = *m_lastm;
 		off = *m_off;
 	} else {
 		while (off >= m->m_len) {
@@ -4159,10 +4487,10 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait,
 
 		if (len == 0) {
 			if ((off + n->m_len) == m->m_len) {
-				*m_last = m->m_next;
+				*m_lastm = m->m_next;
 				*m_off  = 0;
 			} else {
-				*m_last = m;
+				*m_lastm = m;
 				*m_off  = off + n->m_len;
 			}
 			break;
@@ -4385,6 +4713,56 @@ bad:
 	return (0);
 }
 
+/*
+ * Like m_pullup(), except a new mbuf is always allocated, and we allow
+ * the amount of empty space before the data in the new mbuf to be specified
+ * (in the event that the caller expects to prepend later).
+ */
+__private_extern__ int MSFail = 0;
+
+__private_extern__ struct mbuf *
+m_copyup(struct mbuf *n, int len, int dstoff)
+{
+	struct mbuf *m;
+	int count, space;
+
+	if (len > (MHLEN - dstoff))
+		goto bad;
+	MGET(m, M_DONTWAIT, n->m_type);
+	if (m == NULL)
+		goto bad;
+	m->m_len = 0;
+	if (n->m_flags & M_PKTHDR) {
+		m_copy_pkthdr(m, n);
+		n->m_flags &= ~M_PKTHDR;
+	}
+	m->m_data += dstoff;
+	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
+	do {
+		count = min(min(max(len, max_protohdr), space), n->m_len);
+		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
+		    (unsigned)count);
+		len -= count;
+		m->m_len += count;
+		n->m_len -= count;
+		space -= count;
+		if (n->m_len)
+			n->m_data += count;
+		else
+			n = m_free(n);
+	} while (len > 0 && n);
+	if (len > 0) {
+		(void) m_free(m);
+		goto bad;
+	}
+	m->m_next = n;
+	return (m);
+bad:
+	m_freem(n);
+	MSFail++;
+	return (NULL);
+}
+
 /*
  * Partition an mbuf chain in two pieces, returning the tail --
  * all but the first len0 bytes.  In case of failure, it returns NULL and
@@ -4531,29 +4909,9 @@ m_devget(char *buf, int totlen, int off0, struct ifnet *ifp,
 	return (top);
 }
 
-void
-mbuf_growth_aggressive(void)
-{
-	lck_mtx_lock(mbuf_mlock);
-	/*
-	 * Don't start to grow the pool until we are at least
-	 * 1/2 (50%) of current total capacity.
-	 */
-	mbuf_gscale = MB_GROWTH_AGGRESSIVE;
-	lck_mtx_unlock(mbuf_mlock);
-}
-
-void
-mbuf_growth_normal(void)
-{
-	lck_mtx_lock(mbuf_mlock);
-	/*
-	 * Don't start to grow the pool until we are at least
-	 * 15/16 (93.75%) of current total capacity.
-	 */
-	mbuf_gscale = MB_GROWTH_NORMAL;
-	lck_mtx_unlock(mbuf_mlock);
-}
+#ifndef MBUF_GROWTH_NORMAL_THRESH
+#define	MBUF_GROWTH_NORMAL_THRESH 25
+#endif
 
 /*
  * Cluster freelist allocation check.
@@ -4562,94 +4920,121 @@ static int
 m_howmany(int num, size_t bufsize)
 {
 	int i = 0, j = 0;
-	u_int32_t m_clusters, m_bigclusters, m_16kclusters;
-	u_int32_t m_clfree, m_bigclfree, m_16kclfree;
-	u_int32_t s = mbuf_gscale;
+	u_int32_t m_mbclusters, m_clusters, m_bigclusters, m_16kclusters;
+	u_int32_t m_mbfree, m_clfree, m_bigclfree, m_16kclfree;
+	u_int32_t sumclusters, freeclusters;
+	u_int32_t percent_pool, percent_kmem;
+	u_int32_t mb_growth, mb_growth_thresh;
+
+	VERIFY(bufsize == m_maxsize(MC_BIGCL) ||
+	    bufsize == m_maxsize(MC_16KCL));
 
 	lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED);
 
+	/* Numbers in 2K cluster units */
+	m_mbclusters = m_total(MC_MBUF) >> NMBPCLSHIFT;
 	m_clusters = m_total(MC_CL);
-	m_bigclusters = m_total(MC_BIGCL);
+	m_bigclusters = m_total(MC_BIGCL) << NCLPBGSHIFT;
 	m_16kclusters = m_total(MC_16KCL);
+	sumclusters = m_mbclusters + m_clusters + m_bigclusters;
+
+	m_mbfree = m_infree(MC_MBUF) >> NMBPCLSHIFT;
 	m_clfree = m_infree(MC_CL);
-	m_bigclfree = m_infree(MC_BIGCL);
+	m_bigclfree = m_infree(MC_BIGCL) << NCLPBGSHIFT;
 	m_16kclfree = m_infree(MC_16KCL);
+	freeclusters = m_mbfree + m_clfree + m_bigclfree;
 
 	/* Bail if we've maxed out the mbuf memory map */
-	if ((bufsize != m_maxsize(MC_16KCL) &&
-	    (m_clusters + (m_bigclusters << 1) >= nclusters)) ||
+	if ((bufsize == m_maxsize(MC_BIGCL) && sumclusters >= nclusters) ||
 	    (njcl > 0 && bufsize == m_maxsize(MC_16KCL) &&
-	    (m_16kclusters << 3) >= njcl)) {
-#if DEBUG
-		if (bufsize == MCLBYTES && num > m_clfree) {
-			printf("m_howmany - out of small clusters, "
-			    "%d short\n", num - mbstat.m_clfree);
-		}
-#endif /* DEBUG */
+	    (m_16kclusters << NCLPJCLSHIFT) >= njcl)) {
 		return (0);
 	}
 
-	if (bufsize == m_maxsize(MC_CL)) {
+	if (bufsize == m_maxsize(MC_BIGCL)) {
 		/* Under minimum */
-		if (m_clusters < MINCL)
-			return (MINCL - m_clusters);
-		/* Too few (free < threshold) and not over maximum */
-		if (m_clusters < m_maxlimit(MC_CL)) {
-			if (m_clfree >= MCL_LOWAT)
+		if (m_bigclusters < m_minlimit(MC_BIGCL))
+			return (m_minlimit(MC_BIGCL) - m_bigclusters);
+
+		percent_pool =
+		    ((sumclusters - freeclusters) * 100) / sumclusters;
+		percent_kmem = (sumclusters * 100) / nclusters;
+
+		/*
+		 * If a light/normal user, grow conservatively (75%)
+		 * If a heavy user, grow aggressively (50%)
+		 */
+		if (percent_kmem < MBUF_GROWTH_NORMAL_THRESH)
+			mb_growth = MB_GROWTH_NORMAL;
+		else
+			mb_growth = MB_GROWTH_AGGRESSIVE;
+
+		if (percent_kmem < 5) {
+			/* For initial allocations */
+			i = num;
+		} else {
+			/* Return if >= MBIGCL_LOWAT clusters available */
+			if (m_infree(MC_BIGCL) >= MBIGCL_LOWAT &&
+			    m_total(MC_BIGCL) >=
+			    MBIGCL_LOWAT + m_minlimit(MC_BIGCL))
 				return (0);
-			if (num >= m_clfree)
-				i = num - m_clfree;
-			if (((m_clusters + num) >> s) > m_clfree)
-				j = ((m_clusters + num) >> s) - m_clfree;
+
+			/* Ensure at least num clusters are accessible */
+			if (num >= m_infree(MC_BIGCL))
+				i = num - m_infree(MC_BIGCL);
+			if (num > m_total(MC_BIGCL) - m_minlimit(MC_BIGCL))
+				j = num - (m_total(MC_BIGCL) -
+				    m_minlimit(MC_BIGCL));
+
 			i = MAX(i, j);
-			if (i + m_clusters >= m_maxlimit(MC_CL))
-				i = m_maxlimit(MC_CL) - m_clusters;
-		}
-		VERIFY((m_total(MC_CL) + i) <= m_maxlimit(MC_CL));
-	} else if (bufsize == m_maxsize(MC_BIGCL)) {
-		/* Under minimum */
-		if (m_bigclusters < MINBIGCL)
-			return (MINBIGCL - m_bigclusters);
-		/* Too few (free < 1/16 total) and not over maximum */
-		if (m_bigclusters < m_maxlimit(MC_BIGCL)) {
-			if (m_bigclfree >= MBIGCL_LOWAT)
-				return (0);
-			if (num >= m_bigclfree)
-				i = num - m_bigclfree;
-			if (((m_bigclusters + num) >> 4) > m_bigclfree)
-				j = ((m_bigclusters + num) >> 4) - m_bigclfree;
+
+			/*
+			 * Grow pool if percent_pool > 75 (normal growth)
+			 * or percent_pool > 50 (aggressive growth).
+			 */
+			mb_growth_thresh = 100 - (100 / (1 << mb_growth));
+			if (percent_pool > mb_growth_thresh)
+				j = ((sumclusters + num) >> mb_growth) -
+				    freeclusters;
 			i = MAX(i, j);
-			if (i + m_bigclusters >= m_maxlimit(MC_BIGCL))
-				i = m_maxlimit(MC_BIGCL) - m_bigclusters;
 		}
+
+		/* Check to ensure we didn't go over limits */
+		if (i + m_bigclusters >= m_maxlimit(MC_BIGCL))
+			i = m_maxlimit(MC_BIGCL) - m_bigclusters;
+		if ((i << 1) + sumclusters >= nclusters)
+			i = (nclusters - sumclusters) >> 1;
 		VERIFY((m_total(MC_BIGCL) + i) <= m_maxlimit(MC_BIGCL));
-	} else {
+		VERIFY(sumclusters + (i << 1) <= nclusters);
+
+	} else { /* 16K CL */
 		VERIFY(njcl > 0);
 		/* Under minimum */
 		if (m_16kclusters < MIN16KCL)
 			return (MIN16KCL - m_16kclusters);
-		/* Too few (free < 1/16 total) and not over maximum */
-		if (m_16kclusters < m_maxlimit(MC_16KCL)) {
-			if (m_16kclfree >= M16KCL_LOWAT)
-				return (0);
-			if (num >= m_16kclfree)
-				i = num - m_16kclfree;
-			if (((m_16kclusters + num) >> 4) > m_16kclfree)
-				j = ((m_16kclusters + num) >> 4) - m_16kclfree;
-			i = MAX(i, j);
-			if (i + m_16kclusters >= m_maxlimit(MC_16KCL))
-				i = m_maxlimit(MC_16KCL) - m_16kclusters;
-		}
+		if (m_16kclfree >= M16KCL_LOWAT)
+			return (0);
+
+		/* Ensure at least num clusters are available */
+		if (num >= m_16kclfree)
+			i = num - m_16kclfree;
+
+		/* Always grow 16KCL pool aggressively */
+		if (((m_16kclusters + num) >> 1) > m_16kclfree)
+			j = ((m_16kclusters + num) >> 1) - m_16kclfree;
+		i = MAX(i, j);
+
+		/* Check to ensure we don't go over limit */
+		if (i + m_16kclusters >= m_maxlimit(MC_16KCL))
+			i = m_maxlimit(MC_16KCL) - m_16kclusters;
 		VERIFY((m_total(MC_16KCL) + i) <= m_maxlimit(MC_16KCL));
 	}
-
 	return (i);
 }
-
 /*
  * Return the number of bytes in the mbuf chain, m.
-  */
-static unsigned int
+ */
+unsigned int
 m_length(struct mbuf *m)
 {
 	struct mbuf *m0;
@@ -5157,6 +5542,61 @@ m_normalize(struct mbuf *m)
 	return (top);
 }
 
+/*
+ * Append the specified data to the indicated mbuf chain,
+ * Extend the mbuf chain if the new data does not fit in
+ * existing space.
+ *
+ * Return 1 if able to complete the job; otherwise 0.
+ */
+int
+m_append(struct mbuf *m0, int len, caddr_t cp)
+{
+	struct mbuf *m, *n;
+	int remainder, space;
+
+	for (m = m0; m->m_next != NULL; m = m->m_next)
+		;
+	remainder = len;
+	space = M_TRAILINGSPACE(m);
+	if (space > 0) {
+		/*
+		 * Copy into available space.
+		 */
+		if (space > remainder)
+			space = remainder;
+		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
+		m->m_len += space;
+		cp += space, remainder -= space;
+	}
+	while (remainder > 0) {
+		/*
+		 * Allocate a new mbuf; could check space
+		 * and allocate a cluster instead.
+		 */
+		n = m_get(M_WAITOK, m->m_type);
+		if (n == NULL)
+			break;
+		n->m_len = min(MLEN, remainder);
+		bcopy(cp, mtod(n, caddr_t), n->m_len);
+		cp += n->m_len;
+		remainder -= n->m_len;
+		m->m_next = n;
+		m = n;
+	}
+	if (m0->m_flags & M_PKTHDR)
+		m0->m_pkthdr.len += len - remainder;
+	return (remainder == 0);
+}
+
+struct mbuf *
+m_last(struct mbuf *m)
+{
+	while (m->m_next != NULL)
+		m = m->m_next;
+	return (m);
+}
+
 void
 m_mchtype(struct mbuf *m, int t)
 {
@@ -5183,6 +5623,34 @@ m_mcheck(struct mbuf *m)
 	_MCHECK(m);
 }
 
+/*
+ * Return a pointer to mbuf/offset of location in mbuf chain.
+ */
+struct mbuf *
+m_getptr(struct mbuf *m, int loc, int *off)
+{
+
+	while (loc >= 0) {
+		/* Normal end of search. */
+		if (m->m_len > loc) {
+			*off = loc;
+			return (m);
+		} else {
+			loc -= m->m_len;
+			if (m->m_next == NULL) {
+				if (loc == 0) {
+					/* Point at the end of valid data. */
+					*off = m->m_len;
+					return (m);
+				}
+				return (NULL);
+			}
+			m = m->m_next;
+		}
+	}
+	return (NULL);
+}
+
 /*
  * Inform the corresponding mcache(s) that there's a waiter below.
  */
@@ -5225,6 +5693,29 @@ mbuf_waiter_dec(mbuf_class_t class, boolean_t comp)
 	}
 }
 
+/*
+ * Called during slab (blocking and non-blocking) allocation.  If there
+ * is at least one waiter, and the time since the first waiter is blocked
+ * is greater than the watchdog timeout, panic the system.
+ */
+static void
+mbuf_watchdog(void)
+{
+	struct timeval now;
+	unsigned int since;
+
+	if (mb_waiters == 0 || !mb_watchdog)
+		return;
+
+	microuptime(&now);
+	since = now.tv_sec - mb_wdtstart.tv_sec;
+	if (since >= MB_WDT_MAXTIME) {
+		panic_plain("%s: %d waiters stuck for %u secs\n%s", __func__,
+		    mb_waiters, since, mbuf_dump());
+		/* NOTREACHED */
+	}
+}
+
 /*
  * Called during blocking allocation.  Returns TRUE if one or more objects
  * are available at the per-CPU caches layer and that allocation should be
@@ -5266,6 +5757,16 @@ mbuf_sleep(mbuf_class_t class, unsigned int num, int wait)
 	mbuf_waiter_inc(class, (wait & MCR_COMP));
 
 	VERIFY(!(wait & MCR_NOSLEEP));
+
+	/*
+	 * If this is the first waiter, arm the watchdog timer.  Otherwise
+	 * check if we need to panic the system due to watchdog timeout.
+	 */
+	if (mb_waiters == 0)
+		microuptime(&mb_wdtstart);
+	else
+		mbuf_watchdog();
+
 	mb_waiters++;
 	(void) msleep(mb_waitchan, mbuf_mlock, (PZERO-1), m_cname(class), NULL);
 
@@ -5420,7 +5921,7 @@ slab_get(void *buf)
 		}
 	}
 
-	ix = MTOCL(buf) % NSLABSPMB;
+	ix = MTOBG(buf) % NSLABSPMB;
 	VERIFY(ix < NSLABSPMB);
 
 	return (&slg->slg_slab[ix]);
@@ -5447,15 +5948,9 @@ slab_insert(mcl_slab_t *sp, mbuf_class_t class)
 	m_slab_cnt(class)++;
 	TAILQ_INSERT_TAIL(&m_slablist(class), sp, sl_link);
 	sp->sl_flags &= ~SLF_DETACHED;
-	if (class == MC_BIGCL) {
-		sp = sp->sl_next;
-		/* Next slab must already be present */
-		VERIFY(sp != NULL);
-		VERIFY(slab_is_detached(sp));
-		sp->sl_flags &= ~SLF_DETACHED;
-	} else if (class == MC_16KCL) {
+	if (class == MC_16KCL) {
 		int k;
-		for (k = 1; k < (M16KCLBYTES / MCLBYTES); k++) {
+		for (k = 1; k < NSLABSP16KB; k++) {
 			sp = sp->sl_next;
 			/* Next slab must already be present */
 			VERIFY(sp != NULL);
@@ -5473,15 +5968,9 @@ slab_remove(mcl_slab_t *sp, mbuf_class_t class)
 	m_slab_cnt(class)--;
 	TAILQ_REMOVE(&m_slablist(class), sp, sl_link);
 	slab_detach(sp);
-	if (class == MC_BIGCL) {
-		sp = sp->sl_next;
-		/* Next slab must already be present */
-		VERIFY(sp != NULL);
-		VERIFY(!slab_is_detached(sp));
-		slab_detach(sp);
-	} else if (class == MC_16KCL) {
+	if (class == MC_16KCL) {
 		int k;
-		for (k = 1; k < (M16KCLBYTES / MCLBYTES); k++) {
+		for (k = 1; k < NSLABSP16KB; k++) {
 			sp = sp->sl_next;
 			/* Next slab must already be present */
 			VERIFY(sp != NULL);
@@ -5511,7 +6000,7 @@ slab_nextptr_panic(mcl_slab_t *sp, void *addr)
 		void *next = ((mcache_obj_t *)buf)->obj_next;
 		if (next != addr)
 			continue;
-		if (mclaudit == NULL) {
+		if (!mclverify) {
 			if (next != NULL && !MBUF_IN_MAP(next)) {
 				mcache_t *cp = m_cache(sp->sl_class);
 				panic("%s: %s buffer %p in slab %p modified "
@@ -5553,12 +6042,14 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list,
 	boolean_t save_contents = (con_list != NULL);
 	unsigned int i, ix;
 
-	ASSERT(num <= NMBPCL);
+	ASSERT(num <= NMBPBG);
 	ASSERT(con_list == NULL || con_size != 0);
 
-	ix = MTOCL(buf);
+	ix = MTOBG(buf);
+	VERIFY(ix < maxclaudit);
+
 	/* Make sure we haven't been here before */
-	for (i = 0; i < NMBPCL; i++)
+	for (i = 0; i < NMBPBG; i++)
 		VERIFY(mclaudit[ix].cl_audit[i] == NULL);
 
 	mca = mca_tail = *mca_list;
@@ -5594,31 +6085,39 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list,
 }
 
 /*
- * Given an address of a buffer (mbuf/cluster/big cluster), return
+ * Given an address of a buffer (mbuf/2KB/4KB/16KB), return
  * the corresponding audit structure for that buffer.
  */
 static mcache_audit_t *
 mcl_audit_buf2mca(mbuf_class_t class, mcache_obj_t *o)
 {
 	mcache_audit_t *mca = NULL;
-	int ix = MTOCL(o);
+	int ix = MTOBG(o);
 
+	VERIFY(ix < maxclaudit);
 	VERIFY(IS_P2ALIGNED(o, MIN(m_maxsize(class), NBPG)));
 
 	switch (class) {
 	case MC_MBUF:
 		/*
-		 * For the mbuf case, find the index of the cluster
+		 * For the mbuf case, find the index of the page
 		 * used by the mbuf and use that index to locate the
-		 * base address of the cluster.  Then find out the
-		 * mbuf index relative to the cluster base and use
+		 * base address of the page.  Then find out the
+		 * mbuf index relative to the page base and use
 		 * it to locate the audit structure.
 		 */
-		VERIFY(MCLIDX(CLTOM(ix), o) < (int)NMBPCL);
-		mca = mclaudit[ix].cl_audit[MCLIDX(CLTOM(ix), o)];
+		VERIFY(MCLIDX(BGTOM(ix), o) < (int)NMBPBG);
+		mca = mclaudit[ix].cl_audit[MCLIDX(BGTOM(ix), o)];
 		break;
 
 	case MC_CL:
+		/*
+		 * Same thing as above, but for 2KB clusters in a page.
+		 */
+		VERIFY(CLBGIDX(BGTOM(ix), o) < (int)NCLPBG);
+		mca = mclaudit[ix].cl_audit[CLBGIDX(BGTOM(ix), o)];
+		break;
+
 	case MC_BIGCL:
 	case MC_16KCL:
 		/*
@@ -5645,19 +6144,24 @@ mcl_audit_mbuf(mcache_audit_t *mca, void *addr, boolean_t composite,
 	VERIFY(mca->mca_contents != NULL &&
 	    mca->mca_contents_size == AUDIT_CONTENTS_SIZE);
 
-	mcl_audit_verify_nextptr(next, mca);
+	if (mclverify)
+		mcl_audit_verify_nextptr(next, mca);
 
 	if (!alloc) {
 		/* Save constructed mbuf fields */
 		mcl_audit_save_mbuf(m, mca);
-		mcache_set_pattern(MCACHE_FREE_PATTERN, m, m_maxsize(MC_MBUF));
+		if (mclverify) {
+			mcache_set_pattern(MCACHE_FREE_PATTERN, m,
+			    m_maxsize(MC_MBUF));
+		}
 		((mcache_obj_t *)m)->obj_next = next;
 		return;
 	}
 
 	/* Check if the buffer has been corrupted while in freelist */
-	mcache_audit_free_verify_set(mca, addr, 0, m_maxsize(MC_MBUF));
-
+	if (mclverify) {
+		mcache_audit_free_verify_set(mca, addr, 0, m_maxsize(MC_MBUF));
+	}
 	/* Restore constructed mbuf fields */
 	mcl_audit_restore_mbuf(m, mca, composite);
 }
@@ -5704,12 +6208,14 @@ mcl_audit_cluster(mcache_audit_t *mca, void *addr, size_t size, boolean_t alloc,
 	mcache_obj_t *next = ((mcache_obj_t *)addr)->obj_next;
 
 	if (!alloc) {
-		mcache_set_pattern(MCACHE_FREE_PATTERN, addr, size);
+		if (mclverify) {
+			mcache_set_pattern(MCACHE_FREE_PATTERN, addr, size);
+		}
 		if (save_next) {
 			mcl_audit_verify_nextptr(next, mca);
 			((mcache_obj_t *)addr)->obj_next = next;
 		}
-	} else {
+	} else if (mclverify) {
 		/* Check if the buffer has been corrupted while in freelist */
 		mcl_audit_verify_nextptr(next, mca);
 		mcache_audit_free_verify_set(mca, addr, 0, size);
@@ -5732,8 +6238,8 @@ mcl_audit_mcheck_panic(struct mbuf *m)
 static void
 mcl_audit_verify_nextptr(void *next, mcache_audit_t *mca)
 {
-	if (next != NULL && next != (void *)MCACHE_FREE_PATTERN &&
-	    !MBUF_IN_MAP(next)) {
+	if (next != NULL && !MBUF_IN_MAP(next) &&
+	    (next != (void *)MCACHE_FREE_PATTERN || !mclverify)) {
 		panic("mcl_audit: buffer %p modified after free at offset 0: "
 		    "%p out of range [%p-%p)\n%s\n",
 		    mca->mca_addr, next, mbutl, embutl, mcache_dump_mca(mca));
@@ -5741,10 +6247,358 @@ mcl_audit_verify_nextptr(void *next, mcache_audit_t *mca)
 	}
 }
 
+/* This function turns on mbuf leak detection */
+static void
+mleak_activate(void)
+{
+	mleak_table.mleak_sample_factor = MLEAK_SAMPLE_FACTOR;
+	PE_parse_boot_argn("mleak_sample_factor",
+	    &mleak_table.mleak_sample_factor,
+	    sizeof (mleak_table.mleak_sample_factor));
+
+	if (mleak_table.mleak_sample_factor == 0)
+		mclfindleak = 0;
+
+	if (mclfindleak == 0)
+		return;
+
+	vm_size_t alloc_size =
+	    mleak_alloc_buckets * sizeof (struct mallocation);
+	vm_size_t trace_size = mleak_trace_buckets * sizeof (struct mtrace);
+
+	MALLOC(mleak_allocations, struct mallocation *, alloc_size,
+	    M_TEMP, M_WAITOK | M_ZERO);
+	VERIFY(mleak_allocations != NULL);
+
+	MALLOC(mleak_traces, struct mtrace *, trace_size,
+	    M_TEMP, M_WAITOK | M_ZERO);
+	VERIFY(mleak_traces != NULL);
+
+	MALLOC(mleak_stat, mleak_stat_t *, MLEAK_STAT_SIZE(MLEAK_NUM_TRACES),
+	    M_TEMP, M_WAITOK | M_ZERO);
+	VERIFY(mleak_stat != NULL);
+	mleak_stat->ml_cnt = MLEAK_NUM_TRACES;
+#ifdef __LP64__
+	mleak_stat->ml_isaddr64 = 1;
+#endif /* __LP64__ */
+}
+
+static void
+mleak_logger(u_int32_t num, mcache_obj_t *addr, boolean_t alloc)
+{
+	int temp;
+
+	if (mclfindleak == 0)
+		return;
+
+	if (!alloc)
+		return (mleak_free(addr));
+
+	temp = atomic_add_32_ov(&mleak_table.mleak_capture, 1);
+
+	if ((temp % mleak_table.mleak_sample_factor) == 0 && addr != NULL) {
+		uintptr_t bt[MLEAK_STACK_DEPTH];
+		int logged = fastbacktrace(bt, MLEAK_STACK_DEPTH);
+		mleak_log(bt, addr, logged, num);
+	}
+}
+
+/*
+ * This function records the allocation in the mleak_allocations table
+ * and the backtrace in the mleak_traces table; if allocation slot is in use,
+ * replace old allocation with new one if the trace slot is in use, return
+ * (or increment refcount if same trace).
+ */
+static boolean_t
+mleak_log(uintptr_t *bt, mcache_obj_t *addr, uint32_t depth, int num)
+{
+	struct mallocation *allocation;
+	struct mtrace *trace;
+	uint32_t trace_index;
+	int i;
+
+	/* Quit if someone else modifying the tables */
+	if (!lck_mtx_try_lock_spin(mleak_lock)) {
+		mleak_table.total_conflicts++;
+		return (FALSE);
+	}
+
+	allocation = &mleak_allocations[hashaddr((uintptr_t)addr,
+	    mleak_alloc_buckets)];
+	trace_index = hashbacktrace(bt, depth, mleak_trace_buckets);
+	trace = &mleak_traces[trace_index];
+
+	VERIFY(allocation <= &mleak_allocations[mleak_alloc_buckets - 1]);
+	VERIFY(trace <= &mleak_traces[mleak_trace_buckets - 1]);
+
+	allocation->hitcount++;
+	trace->hitcount++;
+
+	/*
+	 * If the allocation bucket we want is occupied
+	 * and the occupier has the same trace, just bail.
+	 */
+	if (allocation->element != NULL &&
+	    trace_index == allocation->trace_index) {
+		mleak_table.alloc_collisions++;
+		lck_mtx_unlock(mleak_lock);
+		return (TRUE);
+	}
+
+	/*
+	 * Store the backtrace in the traces array;
+	 * Size of zero = trace bucket is free.
+	 */
+	if (trace->allocs > 0 &&
+	    bcmp(trace->addr, bt, (depth * sizeof (uintptr_t))) != 0) {
+		/* Different, unique trace, but the same hash! Bail out. */
+		trace->collisions++;
+		mleak_table.trace_collisions++;
+		lck_mtx_unlock(mleak_lock);
+		return (TRUE);
+	} else if (trace->allocs > 0) {
+		/* Same trace, already added, so increment refcount */
+		trace->allocs++;
+	} else {
+		/* Found an unused trace bucket, so record the trace here */
+		if (trace->depth != 0) {
+			/* this slot previously used but not currently in use */
+			mleak_table.trace_overwrites++;
+		}
+		mleak_table.trace_recorded++;
+		trace->allocs = 1;
+		memcpy(trace->addr, bt, (depth * sizeof (uintptr_t)));
+		trace->depth = depth;
+		trace->collisions = 0;
+	}
+
+	/* Step 2: Store the allocation record in the allocations array */
+	if (allocation->element != NULL) {
+		/*
+		 * Replace an existing allocation.  No need to preserve
+		 * because only a subset of the allocations are being
+		 * recorded anyway.
+		 */
+		mleak_table.alloc_collisions++;
+	} else if (allocation->trace_index != 0) {
+		mleak_table.alloc_overwrites++;
+	}
+	allocation->element = addr;
+	allocation->trace_index = trace_index;
+	allocation->count = num;
+	mleak_table.alloc_recorded++;
+	mleak_table.outstanding_allocs++;
+
+	/* keep a log of the last 5 traces to be top trace, in order */
+	for (i = 0; i < MLEAK_NUM_TRACES; i++) {
+		if (mleak_top_trace[i] == NULL ||
+		    mleak_top_trace[i]->allocs <= trace->allocs) {
+			if (mleak_top_trace[i] != trace) {
+				int j = MLEAK_NUM_TRACES;
+				while (--j > i) {
+					mleak_top_trace[j] =
+					    mleak_top_trace[j - 1];
+				}
+				mleak_top_trace[i] = trace;
+			}
+			break;
+		}
+	}
+
+	lck_mtx_unlock(mleak_lock);
+	return (TRUE);
+}
+
+static void
+mleak_free(mcache_obj_t *addr)
+{
+	while (addr != NULL) {
+		struct mallocation *allocation = &mleak_allocations
+		    [hashaddr((uintptr_t)addr, mleak_alloc_buckets)];
+
+		if (allocation->element == addr &&
+		    allocation->trace_index < mleak_trace_buckets) {
+			lck_mtx_lock_spin(mleak_lock);
+			if (allocation->element == addr &&
+			    allocation->trace_index < mleak_trace_buckets) {
+				struct mtrace *trace;
+				trace = &mleak_traces[allocation->trace_index];
+				/* allocs = 0 means trace bucket is unused */
+				if (trace->allocs > 0)
+					trace->allocs--;
+				if (trace->allocs == 0)
+					trace->depth = 0;
+				/* NULL element means alloc bucket is unused */
+				allocation->element = NULL;
+				mleak_table.outstanding_allocs--;
+			}
+			lck_mtx_unlock(mleak_lock);
+		}
+		addr = addr->obj_next;
+	}
+}
+
+static struct mbtypes {
+	int		mt_type;
+	const char	*mt_name;
+} mbtypes[] = {
+	{ MT_DATA,	"data" },
+	{ MT_OOBDATA,	"oob data" },
+	{ MT_CONTROL,	"ancillary data" },
+	{ MT_HEADER,	"packet headers" },
+	{ MT_SOCKET,	"socket structures" },
+	{ MT_PCB,	"protocol control blocks" },
+	{ MT_RTABLE,	"routing table entries" },
+	{ MT_HTABLE,	"IMP host table entries" },
+	{ MT_ATABLE,	"address resolution tables" },
+	{ MT_FTABLE,	"fragment reassembly queue headers" },
+	{ MT_SONAME,	"socket names and addresses" },
+	{ MT_SOOPTS,	"socket options" },
+	{ MT_RIGHTS,	"access rights" },
+	{ MT_IFADDR,	"interface addresses" },
+	{ MT_TAG,	"packet tags" },
+	{ 0,		NULL }
+};
+
+#define	MBUF_DUMP_BUF_CHK() {	\
+	clen -= k;		\
+	if (clen < 1)		\
+		goto done;	\
+	c += k;			\
+}
+
+static char *
+mbuf_dump(void)
+{
+	unsigned long totmem = 0, totfree = 0, totmbufs, totused, totpct;
+	u_int32_t m_mbufs = 0, m_clfree = 0, m_bigclfree = 0;
+	u_int32_t m_mbufclfree = 0, m_mbufbigclfree = 0;
+	u_int32_t m_16kclusters = 0, m_16kclfree = 0, m_mbuf16kclfree = 0;
+	int nmbtypes = sizeof (mbstat.m_mtypes) / sizeof (short);
+	uint8_t seen[256];
+	struct mbtypes *mp;
+	mb_class_stat_t *sp;
+	char *c = mbuf_dump_buf;
+	int i, k, clen = sizeof (mbuf_dump_buf);
+
+	mbuf_dump_buf[0] = '\0';
+
+	/* synchronize all statistics in the mbuf table */
+	mbuf_stat_sync();
+	mbuf_mtypes_sync(TRUE);
+
+	sp = &mb_stat->mbs_class[0];
+	for (i = 0; i < mb_stat->mbs_cnt; i++, sp++) {
+		u_int32_t mem;
+
+		if (m_class(i) == MC_MBUF) {
+			m_mbufs = sp->mbcl_active;
+		} else if (m_class(i) == MC_CL) {
+			m_clfree = sp->mbcl_total - sp->mbcl_active;
+		} else if (m_class(i) == MC_BIGCL) {
+			m_bigclfree = sp->mbcl_total - sp->mbcl_active;
+		} else if (njcl > 0 && m_class(i) == MC_16KCL) {
+			m_16kclfree = sp->mbcl_total - sp->mbcl_active;
+			m_16kclusters = sp->mbcl_total;
+		} else if (m_class(i) == MC_MBUF_CL) {
+			m_mbufclfree = sp->mbcl_total - sp->mbcl_active;
+		} else if (m_class(i) == MC_MBUF_BIGCL) {
+			m_mbufbigclfree = sp->mbcl_total - sp->mbcl_active;
+		} else if (njcl > 0 && m_class(i) == MC_MBUF_16KCL) {
+			m_mbuf16kclfree = sp->mbcl_total - sp->mbcl_active;
+		}
+
+		mem = sp->mbcl_ctotal * sp->mbcl_size;
+		totmem += mem;
+		totfree += (sp->mbcl_mc_cached + sp->mbcl_infree) *
+		    sp->mbcl_size;
+
+	}
+
+	/* adjust free counts to include composite caches */
+	m_clfree += m_mbufclfree;
+	m_bigclfree += m_mbufbigclfree;
+	m_16kclfree += m_mbuf16kclfree;
+
+	totmbufs = 0;
+	for (mp = mbtypes; mp->mt_name != NULL; mp++)
+		totmbufs += mbstat.m_mtypes[mp->mt_type];
+	if (totmbufs > m_mbufs)
+		totmbufs = m_mbufs;
+	k = snprintf(c, clen, "%lu/%u mbufs in use:\n", totmbufs, m_mbufs);
+	MBUF_DUMP_BUF_CHK();
+
+	bzero(&seen, sizeof (seen));
+	for (mp = mbtypes; mp->mt_name != NULL; mp++) {
+		if (mbstat.m_mtypes[mp->mt_type] != 0) {
+			seen[mp->mt_type] = 1;
+			k = snprintf(c, clen, "\t%u mbufs allocated to %s\n",
+			    mbstat.m_mtypes[mp->mt_type], mp->mt_name);
+			MBUF_DUMP_BUF_CHK();
+		}
+	}
+	seen[MT_FREE] = 1;
+	for (i = 0; i < nmbtypes; i++)
+		if (!seen[i] && mbstat.m_mtypes[i] != 0) {
+			k = snprintf(c, clen, "\t%u mbufs allocated to "
+			    "<mbuf type %d>\n", mbstat.m_mtypes[i], i);
+			MBUF_DUMP_BUF_CHK();
+		}
+	if ((m_mbufs - totmbufs) > 0) {
+		k = snprintf(c, clen, "\t%lu mbufs allocated to caches\n",
+		    m_mbufs - totmbufs);
+		MBUF_DUMP_BUF_CHK();
+	}
+	k = snprintf(c, clen, "%u/%u mbuf 2KB clusters in use\n"
+	    "%u/%u mbuf 4KB clusters in use\n",
+	    (unsigned int)(mbstat.m_clusters - m_clfree),
+	    (unsigned int)mbstat.m_clusters,
+	    (unsigned int)(mbstat.m_bigclusters - m_bigclfree),
+	    (unsigned int)mbstat.m_bigclusters);
+	MBUF_DUMP_BUF_CHK();
+
+	if (njcl > 0) {
+		k = snprintf(c, clen, "%u/%u mbuf %uKB clusters in use\n",
+		    m_16kclusters - m_16kclfree, m_16kclusters,
+		    njclbytes / 1024);
+		MBUF_DUMP_BUF_CHK();
+	}
+	totused = totmem - totfree;
+	if (totmem == 0) {
+		totpct = 0;
+	} else if (totused < (ULONG_MAX / 100)) {
+		totpct = (totused * 100) / totmem;
+	} else {
+		u_long totmem1 = totmem / 100;
+		u_long totused1 = totused / 100;
+		totpct = (totused1 * 100) / totmem1;
+	}
+	k = snprintf(c, clen, "%lu KB allocated to network (approx. %lu%% "
+	    "in use)\n", totmem / 1024, totpct);
+	MBUF_DUMP_BUF_CHK();
+
+done:
+	return (mbuf_dump_buf);
+}
+
+#undef MBUF_DUMP_BUF_CHK
+
 SYSCTL_DECL(_kern_ipc);
-SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD | CTLFLAG_LOCKED,
+SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat,
+    CTLFLAG_RD | CTLFLAG_LOCKED,
     0, 0, mbstat_sysctl, "S,mbstat", "");
-SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_stat, CTLFLAG_RD | CTLFLAG_LOCKED,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_stat,
+    CTLFLAG_RD | CTLFLAG_LOCKED,
     0, 0, mb_stat_sysctl, "S,mb_stat", "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, mb_normalized, CTLFLAG_RD | CTLFLAG_LOCKED,
-    &mb_normalized, 0, "");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mleak_top_trace,
+    CTLFLAG_RD | CTLFLAG_LOCKED,
+    0, 0, mleak_top_trace_sysctl, "S,mb_top_trace", "");
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mleak_table,
+    CTLFLAG_RD | CTLFLAG_LOCKED,
+    0, 0, mleak_table_sysctl, "S,mleak_table", "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mleak_sample_factor,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &mleak_table.mleak_sample_factor, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mb_normalized,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &mb_normalized, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, mb_watchdog,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &mb_watchdog, 0, "");
diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c
index 5276ce659..386238460 100644
--- a/bsd/kern/uipc_mbuf2.c
+++ b/bsd/kern/uipc_mbuf2.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,11 +105,12 @@
 #include <sys/proc_internal.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#include <sys/mcache.h>
+#if INET6
 #include <netinet/in.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
-#endif
+#endif /* INET6 */
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -131,7 +132,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
 	struct mbuf *n, *o;
 	int hlen, tlen, olen;
 	int sharedcluster;
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#if defined(PULLDOWN_STAT) && INET6
 	static struct mbuf *prev = NULL;
 	int prevlen = 0, prevmlen = 0;
 #endif
@@ -144,11 +145,11 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
 		return NULL;	/* impossible */
 	}
 
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#if defined(PULLDOWN_STAT) && INET6
 	ip6stat.ip6s_pulldown++;
 #endif
 
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#if defined(PULLDOWN_STAT) && INET6
 	/* statistics for m_pullup */
 	ip6stat.ip6s_pullup++;
 	if (off + len > MHLEN)
@@ -241,7 +242,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
 	if ((off == 0 || offp) && len <= n->m_len - off)
 		goto ok;
 
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#if defined(PULLDOWN_STAT) && INET6
 	ip6stat.ip6s_pulldown_copy++;
 #endif
 
@@ -321,7 +322,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
 	 * now, we need to do the hard way.  don't m_copy as there's no room
 	 * on both end.
 	 */
-#if defined(PULLDOWN_STAT) && defined(INET6)
+#if defined(PULLDOWN_STAT) && INET6
 	ip6stat.ip6s_pulldown_alloc++;
 #endif
 	MGET(o, M_DONTWAIT, m->m_type);
@@ -365,6 +366,67 @@ ok:
 	return n;
 }
 
+/*
+ * Create and return an m_tag, either by re-using space in a previous tag
+ * or by allocating a new mbuf/cluster
+ */
+struct m_tag *
+m_tag_create(u_int32_t id, u_int16_t type, int len, int wait, struct mbuf *buf)
+{
+	struct m_tag *t = NULL;
+	struct m_tag *p;
+
+	if (len < 0)
+		return (NULL);
+
+	if (len + sizeof (struct m_tag) + sizeof (struct m_taghdr) > MLEN)
+		return (m_tag_alloc(id, type, len, wait));
+
+	/*
+	 * We've exhausted all external cases. Now, go through the m_tag
+	 * chain and see if we can fit it in any of them.
+	 * If not (t == NULL), call m_tag_alloc to store it in a new mbuf.
+	 */
+	p = SLIST_FIRST(&buf->m_pkthdr.tags);
+	while(p != NULL) {
+		/* 2KCL m_tag */
+		if (M_TAG_ALIGN(p->m_tag_len) +
+		    sizeof (struct m_taghdr) > MLEN) {
+			p = SLIST_NEXT(p, m_tag_link);
+			continue;
+		}
+
+		VERIFY(p->m_tag_cookie == M_TAG_VALID_PATTERN);
+
+		struct mbuf *m = m_dtom(p);
+		struct m_taghdr *hdr = (struct m_taghdr *)m->m_data;
+
+		VERIFY(m->m_flags & M_TAGHDR && !(m->m_flags & M_EXT));
+
+		/* The mbuf can store this m_tag */
+		if (M_TAG_ALIGN(len) <= MLEN - m->m_len) {
+			t = (struct m_tag *)(m->m_data + m->m_len);
+			hdr->refcnt++;
+			m->m_len += M_TAG_ALIGN(len);
+			VERIFY(m->m_len <= MLEN);
+			break;
+		}
+
+		p = SLIST_NEXT(p, m_tag_link);
+	}
+
+	if (t == NULL)
+		return (m_tag_alloc(id, type, len, wait));
+
+	t->m_tag_cookie = M_TAG_VALID_PATTERN;
+	t->m_tag_type = type;
+	t->m_tag_len = len;
+	t->m_tag_id = id;
+	if (len > 0)
+		bzero(t + 1, len);
+	return (t);
+}
+
 /* Get a packet tag structure along with specified data following. */
 struct m_tag *
 m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait)
@@ -372,26 +434,39 @@ m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait)
 	struct m_tag *t;
 
 	if (len < 0)
-		return NULL;
-#if CONFIG_MBUF_TAGS_MALLOC
-	t = _MALLOC(len + sizeof (struct m_tag), M_TEMP, wait);
-#else
-        if (len + sizeof(struct m_tag) <= MLEN) {
+		return (NULL);
+
+        if (M_TAG_ALIGN(len) + sizeof (struct m_taghdr) <= MLEN) {
 		struct mbuf *m = m_get(wait, MT_TAG);
+		struct m_taghdr *hdr;
+
 		if (m == NULL)
-			return NULL;
-		t = mtod(m, struct m_tag *);
-        } else if (len + sizeof(struct m_tag) <= MCLBYTES) {
-        	t = (struct m_tag *) m_mclalloc(wait);
-        } else
+			return (NULL);
+
+		m->m_flags |= M_TAGHDR;
+
+		hdr = (struct m_taghdr *)m->m_data;
+		hdr->refcnt = 1;
+		m->m_len += sizeof (struct m_taghdr);
+		t = (struct m_tag *)(m->m_data + m->m_len);
+		m->m_len += M_TAG_ALIGN(len);
+		VERIFY(m->m_len <= MLEN);
+        } else if (len + sizeof (struct m_tag) <= MCLBYTES) {
+		t = (struct m_tag *)m_mclalloc(wait);
+        } else {
                 t = NULL;
-#endif
+	}
+
 	if (t == NULL)
-		return NULL;
+		return (NULL);
+
+	t->m_tag_cookie = M_TAG_VALID_PATTERN;
 	t->m_tag_type = type;
 	t->m_tag_len = len;
 	t->m_tag_id = id;
-	return t;
+	if (len > 0)
+		bzero(t + 1, len);
+	return (t);
 }
 
 
@@ -405,25 +480,44 @@ m_tag_free(struct m_tag *t)
 	    t->m_tag_type == KERNEL_TAG_TYPE_MACLABEL)
 		mac_mbuf_tag_destroy(t);
 #endif
-#if CONFIG_MBUF_TAGS_MALLOC
-	_FREE(t, M_TEMP);
-#else
+#if INET6
+	if (t != NULL &&
+	    t->m_tag_id   == KERNEL_MODULE_TAG_ID &&
+	    t->m_tag_type == KERNEL_TAG_TYPE_INET6 &&
+	    t->m_tag_len  == sizeof (struct ip6aux))
+		ip6_destroyaux((struct ip6aux *)(t + 1));
+#endif /* INET6 */
 	if (t == NULL)
 		return;
-	if (t->m_tag_len + sizeof(struct m_tag) <= MLEN) {
+	if (M_TAG_ALIGN(t->m_tag_len) + sizeof (struct m_taghdr) <= MLEN) {
 		struct mbuf * m = m_dtom(t);
-		m_free(m);
+		VERIFY(m->m_flags & M_TAGHDR);
+		struct m_taghdr *hdr = (struct m_taghdr *)m->m_data;
+
+		/* No other tags in this mbuf */
+		if(--hdr->refcnt == 0) {
+			m_free(m);
+			return;
+		}
+
+		/* Pattern-fill the header */
+		u_int64_t *fill_ptr = (u_int64_t *)t;
+		u_int64_t *end_ptr = (u_int64_t *)(t + 1);
+		while (fill_ptr < end_ptr) {
+			*fill_ptr = M_TAG_FREE_PATTERN;
+			fill_ptr++;
+		}
 	} else {
-		MCLFREE((caddr_t)t);
+		m_mclfree((caddr_t)t);
 	}
-#endif
 }
 
 /* Prepend a packet tag. */
 void
 m_tag_prepend(struct mbuf *m, struct m_tag *t)
 {
-	KASSERT(m && t, ("m_tag_prepend: null argument, m %p t %p", m, t));
+	VERIFY(m != NULL && t != NULL);
+
 	SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
 }
 
@@ -431,7 +525,9 @@ m_tag_prepend(struct mbuf *m, struct m_tag *t)
 void
 m_tag_unlink(struct mbuf *m, struct m_tag *t)
 {
-	KASSERT(m && t, ("m_tag_unlink: null argument, m %p t %p", m, t));
+	VERIFY(m != NULL && t != NULL);
+	VERIFY(t->m_tag_cookie == M_TAG_VALID_PATTERN);
+
 	SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
 }
 
@@ -439,7 +535,8 @@ m_tag_unlink(struct mbuf *m, struct m_tag *t)
 void
 m_tag_delete(struct mbuf *m, struct m_tag *t)
 {
-	KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t));
+	VERIFY(m != NULL && t != NULL);
+
 	m_tag_unlink(m, t);
 	m_tag_free(t);
 }
@@ -450,15 +547,21 @@ m_tag_delete_chain(struct mbuf *m, struct m_tag *t)
 {
 	struct m_tag *p, *q;
 
-	KASSERT(m, ("m_tag_delete_chain: null mbuf"));
-	if (t != NULL)
+	VERIFY(m != NULL);
+
+	if (t != NULL) {
 		p = t;
-	else
+	} else {
 		p = SLIST_FIRST(&m->m_pkthdr.tags);
+	}
 	if (p == NULL)
 		return;
-	while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
+
+	VERIFY(p->m_tag_cookie == M_TAG_VALID_PATTERN);
+	while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) {
+		VERIFY(q->m_tag_cookie == M_TAG_VALID_PATTERN);
 		m_tag_delete(m, q);
+	}
 	m_tag_delete(m, p);
 }
 
@@ -468,17 +571,21 @@ m_tag_locate(struct mbuf *m, u_int32_t id, u_int16_t type, struct m_tag *t)
 {
 	struct m_tag *p;
 
-	KASSERT(m, ("m_tag_find: null mbuf"));
-	if (t == NULL)
+	VERIFY(m != NULL);
+
+	if (t == NULL) {
 		p = SLIST_FIRST(&m->m_pkthdr.tags);
-	else
+	} else {
+		VERIFY(t->m_tag_cookie == M_TAG_VALID_PATTERN);
 		p = SLIST_NEXT(t, m_tag_link);
+	}
 	while (p != NULL) {
+		VERIFY(p->m_tag_cookie == M_TAG_VALID_PATTERN);
 		if (p->m_tag_id == id && p->m_tag_type == type)
-			return p;
+			return (p);
 		p = SLIST_NEXT(p, m_tag_link);
 	}
-	return NULL;
+	return (NULL);
 }
 
 /* Copy a single tag. */
@@ -487,7 +594,8 @@ m_tag_copy(struct m_tag *t, int how)
 {
 	struct m_tag *p;
 
-	KASSERT(t, ("m_tag_copy: null tag"));
+	VERIFY(t != NULL);
+
 	p = m_tag_alloc(t->m_tag_id, t->m_tag_type, t->m_tag_len, how);
 	if (p == NULL)
 		return (NULL);
@@ -507,8 +615,16 @@ m_tag_copy(struct m_tag *t, int how)
 		mac_mbuf_tag_copy(t, p);
 	} else
 #endif
+#if INET6
+	if (t != NULL &&
+	    t->m_tag_id   == KERNEL_MODULE_TAG_ID &&
+	    t->m_tag_type == KERNEL_TAG_TYPE_INET6 &&
+	    t->m_tag_len  == sizeof (struct ip6aux)) {
+		ip6_copyaux((struct ip6aux *)(t + 1), (struct ip6aux *)(p + 1));
+	} else
+#endif /* INET6 */
 	bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */
-	return p;
+	return (p);
 }
 
 /*
@@ -522,29 +638,32 @@ m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how)
 {
 	struct m_tag *p, *t, *tprev = NULL;
 
-	KASSERT(to && from,
-		("m_tag_copy: null argument, to %p from %p", to, from));
+	VERIFY(to != NULL && from != NULL);
+
 	m_tag_delete_chain(to, NULL);
 	SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
+		VERIFY(p->m_tag_cookie == M_TAG_VALID_PATTERN);
 		t = m_tag_copy(p, how);
 		if (t == NULL) {
 			m_tag_delete_chain(to, NULL);
-			return 0;
+			return (0);
 		}
-		if (tprev == NULL)
+		if (tprev == NULL) {
 			SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
-		else {
+		} else {
 			SLIST_INSERT_AFTER(tprev, t, m_tag_link);
 			tprev = t;
 		}
 	}
-	return 1;
+	return (1);
 }
 
 /* Initialize tags on an mbuf. */
 void
 m_tag_init(struct mbuf *m)
 {
+	VERIFY(m != NULL);
+
 	SLIST_INIT(&m->m_pkthdr.tags);
 #if PF_PKTHDR
 	bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag));
@@ -555,34 +674,25 @@ m_tag_init(struct mbuf *m)
 struct m_tag *
 m_tag_first(struct mbuf *m)
 {
-	return SLIST_FIRST(&m->m_pkthdr.tags);
+	VERIFY(m != NULL);
+
+	return (SLIST_FIRST(&m->m_pkthdr.tags));
 }
 
 /* Get next tag in chain. */
 struct m_tag *
-m_tag_next(__unused struct mbuf *m, struct m_tag *t)
+m_tag_next(struct mbuf *m, struct m_tag *t)
 {
-	return SLIST_NEXT(t, m_tag_link);
-}
-
-void
-m_prio_init(struct mbuf *m)
-{
-#if !PKT_PRIORITY
 #pragma unused(m)
-#else /* PKT_PRIORITY */
-	if (m->m_flags & M_PKTHDR)
-		m->m_pkthdr.prio = MBUF_PRIORITY_NORMAL;
-#endif /* PKT_PRIORITY */
+	VERIFY(t != NULL);
+	VERIFY(t->m_tag_cookie == M_TAG_VALID_PATTERN);
+
+	return (SLIST_NEXT(t, m_tag_link));
 }
 
 void
-m_prio_background(struct mbuf *m)
+m_prio_init(struct mbuf *m)
 {
-#if !PKT_PRIORITY
-#pragma unused(m)
-#else /* PKT_PRIORITY */
 	if (m->m_flags & M_PKTHDR)
-		m->m_pkthdr.prio = MBUF_PRIORITY_BACKGROUND;
-#endif /* PKT_PRIORITY */
+		m->m_pkthdr.prio = MBUF_TC_BE;
 }
diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c
index 4b2c8a79b..b496895f6 100644
--- a/bsd/kern/uipc_socket.c
+++ b/bsd/kern/uipc_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -96,18 +96,25 @@
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #include <kern/zalloc.h>
 #include <kern/locks.h>
 #include <machine/limits.h>
 #include <libkern/OSAtomic.h>
 #include <pexpert/pexpert.h>
 #include <kern/assert.h>
+#include <kern/task.h>
+
+#include <sys/mcache.h>
 
 #if CONFIG_MACF
 #include <security/mac.h>
 #include <security/mac_framework.h>
 #endif /* MAC */
 
+extern int in6_init_done;
+
 int			so_cache_hw = 0;
 int			so_cache_timeouts = 0;
 int			so_cache_max_freed = 0;
@@ -170,15 +177,15 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 SYSCTL_DECL(_kern_ipc);
 
 int somaxconn = SOMAXCONN;
-SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, "");
+SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
 
 /* Should we get a maximum also ??? */
 static int sosendmaxchain = 65536;
 static int sosendminchain = 16384;
 static int sorecvmincopy  = 16384;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain,
     0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy,
     0, "");
 
 /*
@@ -186,7 +193,7 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
  * the socket is marked with SOF_MULTIPAGES; see below.
  */
 int sosendjcl = 1;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
 
 /*
  * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
@@ -200,9 +207,13 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
  * capable.  Set this to 1 only for testing/debugging purposes.
  */
 int sosendjcl_ignore_capab = 0;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sosendjcl_ignore_capab, 0, "");
 
+int sodefunctlog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sodefunctlog, 0, "");
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -223,7 +234,9 @@ extern struct protosw *pffindprotonotype(int, int);
 extern int soclose_locked(struct socket *);
 extern int soo_kqfilter(struct fileproc *, struct knote *, struct proc *);
 
+#if CONFIG_EMBEDDED
 extern int uthread_get_background_state(uthread_t);
+#endif /*CONFIG_EMBEDDED */
 
 #ifdef __APPLE__
 
@@ -237,6 +250,9 @@ static void so_cache_timer(void *);
 void soclose_wait_locked(struct socket *so);
 int so_isdstlocal(struct socket *so);
 
+__private_extern__ u_int32_t sotcdb = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sotcdb, 0, "");
 
 void
 socketinit(void)
@@ -275,6 +291,7 @@ socketinit(void)
 	    get_inpcb_str_size() + 4 + get_tcp_str_size());
 
 	so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone");
+	zone_change(so_cache_zone, Z_CALLERACCT, FALSE);
 	zone_change(so_cache_zone, Z_NOENCRYPT, TRUE);
 #if TEMPDEBUG
 	printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
@@ -284,6 +301,10 @@ socketinit(void)
 	so_cache_zone_element_size = str_size;
 
 	sflt_init();
+
+	VERIFY(SO_TC_MAX == SO_TC_STATS_MAX);
+	
+	socket_tclass_init();
 }
 
 static void
@@ -398,6 +419,21 @@ cached_sock_free(struct socket *so)
 #endif
 }
 
+static void
+so_update_last_owner_locked(
+	struct socket	*so,
+	proc_t			self)
+{
+	if (self == NULL)
+		self = current_proc();
+	
+	if (self)
+	{
+		so->last_upid = proc_uniqueid(self);
+		so->last_pid = proc_pid(self);
+	}
+}
+
 static void
 so_cache_timer(__unused void *dummy)
 {
@@ -464,6 +500,7 @@ soalloc(int waitok, int dom, int type)
 			return (NULL);
 		}
 #endif /* MAC_SOCKET */
+		so_update_last_owner_locked(so, NULL);
 	}
 
 	return (so);
@@ -488,8 +525,10 @@ socreate(int dom, struct socket **aso, int type, int proto)
 	register struct protosw *prp;
 	register struct socket *so;
 	register int error = 0;
+#if CONFIG_EMBEDDED
 	thread_t thread;
 	struct uthread *ut;
+#endif /* CONFIG_EMBEDDED */
 
 #if TCPDEBUG
 	extern int tcpconsdebug;
@@ -521,6 +560,7 @@ socreate(int dom, struct socket **aso, int type, int proto)
 	so->so_type = type;
 
 	so->so_uid = kauth_cred_getuid(kauth_cred_get());
+	so->so_gid = kauth_cred_getgid(kauth_cred_get());
 	if (!suser(kauth_cred_get(), NULL))
 		so->so_state = SS_PRIV;
 
@@ -566,22 +606,42 @@ socreate(int dom, struct socket **aso, int type, int proto)
 		so->so_options |= SO_DEBUG;
 #endif
 #endif
+	so_set_default_traffic_class(so);
 	/*
 	 * If this is a background thread/task, mark the socket as such.
 	 */
+#if !CONFIG_EMBEDDED
+	if (proc_get_self_isbackground() != 0) 
+#else /* !CONFIG_EMBEDDED */
 	thread = current_thread();
 	ut = get_bsdthread_info(thread);
-	if (uthread_get_background_state(ut)) {
+	if (uthread_get_background_state(ut)) 
+#endif /* !CONFIG_EMBEDDED */
+	{
 		socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
-		so->so_background_thread = thread;
-		/*
-		 * In case setpriority(PRIO_DARWIN_THREAD) was called
-		 * on this thread, regulate network (TCP) traffics.
-		 */
-		if (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) {
-			socket_set_traffic_mgt_flags(so,
-			    TRAFFIC_MGT_SO_BG_REGULATE);
-		}
+		so->so_background_thread = current_thread();
+	}
+
+	switch (dom) {
+    /*
+     * Don't mark Unix domain sockets as eligible for defunct by default.
+     */
+	case PF_LOCAL:
+		so->so_flags |= SOF_NODEFUNCT;
+		break;
+    /*
+     * Radar 9119053
+     * Since v6 initialization is asynchronous and we can't hold
+     * up the main boot path, we need to at least hold off any
+     * sockets attempting to be created until the v6 stack is
+     * up and ready. 
+     */
+	case PF_INET6:
+		if (in6_init_done == 0)
+			ip6_fin();
+        break;
+    default:
+        break;
 	}
 
 	*aso = so;
@@ -615,40 +675,25 @@ sobind(struct socket *so, struct sockaddr *nam)
 {
 	struct proc *p = current_proc();
 	int error = 0;
-	struct socket_filter_entry *filter;
-	int filtered = 0;
 
 	socket_lock(so, 1);
+	
+	so_update_last_owner_locked(so, p);
 
 	/*
-	 * If this is a bind request on a previously-accepted socket
-	 * that has been marked as inactive, reject it now before
-	 * we go any further.
+	 * If this is a bind request on a socket that has been marked
+	 * as inactive, reject it now before we go any further.
 	 */
 	if (so->so_flags & SOF_DEFUNCT) {
 		error = EINVAL;
+		SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+		    __func__, proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+		    error));
 		goto out;
 	}
 
 	/* Socket filter */
-	error = 0;
-	for (filter = so->so_filt; filter && (error == 0);
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_bind) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(so);
-				socket_unlock(so, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_bind(filter->sfe_cookie, so, nam);
-		}
-	}
-	if (filtered != 0) {
-		socket_lock(so, 0);
-		sflt_unuse(so);
-	}
-	/* End socket filter */
+	error = sflt_bind(so, nam);
 
 	if (error == 0)
 		error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
@@ -664,6 +709,9 @@ out:
 void
 sodealloc(struct socket *so)
 {
+	/* Remove any filters */
+	sflt_termsock(so);
+
 	so->so_gencnt = ++so_gencnt;
 
 #if CONFIG_MACF_SOCKET
@@ -703,10 +751,11 @@ solisten(struct socket *so, int backlog)
 {
 	struct proc *p = current_proc();
 	int error = 0;
-	struct socket_filter_entry *filter;
-	int filtered = 0;
 
 	socket_lock(so, 1);
+	
+	so_update_last_owner_locked(so, p);
+	
 	if (so->so_proto == NULL) {
 		error = EINVAL;
 		goto out;
@@ -718,13 +767,18 @@ solisten(struct socket *so, int backlog)
 
 	/*
 	 * If the listen request is made on a socket that is not fully
-	 * disconnected, or on a previously-accepted socket that has
-	 * been marked as inactive, reject the request now.
+	 * disconnected, or on a socket that has been marked as inactive,
+	 * reject the request now.
 	 */
 	if ((so->so_state &
 	    (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) ||
 	    (so->so_flags & SOF_DEFUNCT)) {
 		error = EINVAL;
+		if (so->so_flags & SOF_DEFUNCT) {
+			SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+			    __func__, proc_pid(p), so, INP_SOCKAF(so),
+			    INP_SOCKTYPE(so), error));
+		}
 		goto out;
 	}
 
@@ -733,23 +787,7 @@ solisten(struct socket *so, int backlog)
 		goto out;
 	}
 
-	error = 0;
-	for (filter = so->so_filt; filter && (error == 0);
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_listen) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(so);
-				socket_unlock(so, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_listen(filter->sfe_cookie, so);
-		}
-	}
-	if (filtered != 0) {
-		socket_lock(so, 0);
-		sflt_unuse(so);
-	}
+	error = sflt_listen(so);
 
 	if (error == 0) {
 		error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
@@ -793,9 +831,6 @@ sofreelastref(struct socket *so, int dealloc)
 
 	/* Assume socket is locked */
 
-	/* Remove any filters - may be called more than once */
-	sflt_termsock(so);
-
 	if ((!(so->so_flags & SOF_PCBCLEARING)) ||
 	    ((so->so_state & SS_NOFDREF) == 0)) {
 #ifdef __APPLE__
@@ -1104,8 +1139,7 @@ int
 soacceptfilter(struct socket *so)
 {
 	struct sockaddr *local = NULL, *remote = NULL;
-	struct socket_filter_entry *filter;
-	int error = 0, filtered = 0;
+	int error = 0;
 	struct socket *head = so->so_head;
 
 	/*
@@ -1126,29 +1160,7 @@ soacceptfilter(struct socket *so)
 		goto done;
 	}
 
-	/*
-	 * At this point, we have a reference on the listening socket
-	 * so we know it won't be going away.  Do the same for the newly
-	 * accepted socket while we invoke the accept callback routine.
-	 */
-	for (filter = so->so_filt; filter != NULL && error == 0;
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_accept != NULL) {
-			if (!filtered) {
-				filtered = 1;
-				sflt_use(so);
-				socket_unlock(so, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_accept(filter->sfe_cookie,
-			    head, so, local, remote);
-		}
-	}
-
-	if (filtered) {
-		socket_lock(so, 0);
-		sflt_unuse(so);
-	}
+	error = sflt_accept(head, so, local, remote);
 
 	/*
 	 * If we get EJUSTRETURN from one of the filters, mark this socket
@@ -1157,10 +1169,8 @@ soacceptfilter(struct socket *so)
 	 */
 	if (error == EJUSTRETURN) {
 		error = 0;
-		so->so_flags |= SOF_DEFUNCT;
-		/* Prevent data from being appended to the socket buffers */
-		so->so_snd.sb_flags |= SB_DROP;
-		so->so_rcv.sb_flags |= SB_DROP;
+		(void) sosetdefunct(current_proc(), so,
+		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
 	}
 
 	if (error != 0) {
@@ -1207,14 +1217,22 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
 	if (dolock)
 		socket_lock(so, 1);
 
+	so_update_last_owner_locked(so, p);
+	
 	/*
 	 * If this is a listening socket or if this is a previously-accepted
 	 * socket that has been marked as inactive, reject the connect request.
 	 */
 	if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
+		error = EOPNOTSUPP;
+		if (so->so_flags & SOF_DEFUNCT) {
+			SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+			    __func__, proc_pid(p), so, INP_SOCKAF(so),
+			    INP_SOCKTYPE(so), error));
+		}
 		if (dolock)
 			socket_unlock(so, 1);
-		return (EOPNOTSUPP);
+		return (error);
 	}
 
 	if ((so->so_restrictions & SO_RESTRICT_DENYOUT) != 0) {
@@ -1238,36 +1256,14 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
 		 * Run connect filter before calling protocol:
 		 *  - non-blocking connect returns before completion;
 		 */
-		struct socket_filter_entry *filter;
-		int filtered = 0;
-
-		error = 0;
-		for (filter = so->so_filt; filter && (error == 0);
-		    filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_connect_out) {
-				if (filtered == 0) {
-					filtered = 1;
-					sflt_use(so);
-					socket_unlock(so, 0);
-				}
-				error = filter->sfe_filter->sf_filter.
-				    sf_connect_out(filter->sfe_cookie, so, nam);
-			}
-		}
-		if (filtered != 0) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
+		error = sflt_connectout(so, nam);
 
 		if (error) {
 			if (error == EJUSTRETURN)
 				error = 0;
-			if (dolock)
-				socket_unlock(so, 1);
-			return (error);
+		} else {
+			error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 		}
-
-		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 	}
 	if (dolock)
 		socket_unlock(so, 1);
@@ -1377,6 +1373,8 @@ restart:
 		} else {
 			error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 			if (error) {
+				if (so->so_flags & SOF_DEFUNCT)
+					goto defunct;
 				return (error);
 			}
 			*sblocked = 1;
@@ -1384,12 +1382,17 @@ restart:
 	}
 
 	/*
-	 * If a send attempt is made on a previously-accepted socket
-	 * that has been marked as inactive (disconnected), reject
-	 * the request.
+	 * If a send attempt is made on a socket that has been marked
+	 * as inactive (disconnected), reject the request.
 	 */
-	if (so->so_flags & SOF_DEFUNCT)
-		return (ENOTCONN);
+	if (so->so_flags & SOF_DEFUNCT) {
+defunct:
+		error = EPIPE;
+		SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+		    proc_selfpid(), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+		    error));
+		return (error);
+	}
 
 	if (so->so_state & SS_CANTSENDMORE)
 		return (EPIPE);
@@ -1423,8 +1426,11 @@ restart:
 			return (EWOULDBLOCK);
 		}
 		sbunlock(&so->so_snd, 1);
+		*sblocked = 0;
 		error = sbwait(&so->so_snd);
 		if (error) {
+			if (so->so_flags & SOF_DEFUNCT)
+				goto defunct;
 			return (error);
 		}
 		goto restart;
@@ -1515,6 +1521,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
 
 	socket_lock(so, 1);
+	so_update_last_owner_locked(so, p);
+	
 	if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
 		error = EOPNOTSUPP;
 		socket_unlock(so, 1);
@@ -1555,10 +1563,6 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 		    1024 : 0);
 
 		do {
-			struct socket_filter_entry *filter;
-			int filtered;
-			boolean_t recursive;
-
 			if (uio == NULL) {
 				/*
 				 * Data is prepackaged in "top".
@@ -1611,7 +1615,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 					 * haven't yet consumed.
 					 */
 					if (freelist == NULL &&
-					    bytes_to_copy > NBPG && jumbocl) {
+					    bytes_to_copy > MBIGCLBYTES &&
+					    jumbocl) {
 						num_needed =
 						    bytes_to_copy / M16KCLBYTES;
 
@@ -1634,10 +1639,10 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 					if (freelist == NULL &&
 					    bytes_to_copy > MCLBYTES) {
 						num_needed =
-						    bytes_to_copy / NBPG;
+						    bytes_to_copy / MBIGCLBYTES;
 
 						if ((bytes_to_copy -
-						    (num_needed * NBPG)) >=
+						    (num_needed * MBIGCLBYTES)) >=
 						    MINCLSIZE)
 							num_needed++;
 
@@ -1645,7 +1650,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 						    m_getpackets_internal(
 						    (unsigned int *)&num_needed,
 						    hdrs_needed, M_WAIT, 0,
-						    NBPG);
+						    MBIGCLBYTES);
 						/*
 						 * Fall back to cluster size
 						 * if allocation failed
@@ -1783,65 +1788,24 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 			/*
 			 * Socket filter processing
 			 */
-			recursive = (so->so_send_filt_thread != NULL);
-			filtered = 0;
-			error = 0;
-			for (filter = so->so_filt; filter && (error == 0);
-			    filter = filter->sfe_next_onsocket) {
-				if (filter->sfe_filter->sf_filter.sf_data_out) {
-					int so_flags = 0;
-					if (filtered == 0) {
-						filtered = 1;
-						so->so_send_filt_thread =
-						    current_thread();
-						sflt_use(so);
-						socket_unlock(so, 0);
-						so_flags =
-						    (sendflags & MSG_OOB) ?
-						    sock_data_filt_flag_oob : 0;
-					}
-					error = filter->sfe_filter->sf_filter.
-					    sf_data_out(filter->sfe_cookie, so,
-					    addr, &top, &control, so_flags);
+			error = sflt_data_out(so, addr, &top, &control,
+						(sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0);
+			if (error) {
+				if (error == EJUSTRETURN) {
+					error = 0;
+					clen = 0;
+					control = 0;
+					top = 0;
 				}
-			}
 
-			if (filtered) {
-				/*
-				 * At this point, we've run at least one
-				 * filter.  The socket is unlocked as is
-				 * the socket buffer.  Clear the recorded
-				 * filter thread only when we are outside
-				 * of a filter's context.  This allows for
-				 * a filter to issue multiple inject calls
-				 * from its sf_data_out callback routine.
-				 */
-				socket_lock(so, 0);
-				sflt_unuse(so);
-				if (!recursive)
-					so->so_send_filt_thread = 0;
-				if (error) {
-					if (error == EJUSTRETURN) {
-						error = 0;
-						clen = 0;
-						control = 0;
-						top = 0;
-					}
-
-					goto release;
-				}
+				goto release;
 			}
 			/*
 			 * End Socket filter processing
 			 */
 
-			if (error == EJUSTRETURN) {
-				/* A socket filter handled this data */
-				error = 0;
-			} else {
-				error = (*so->so_proto->pr_usrreqs->pru_send)
-				    (so, sendflags, top, addr, control, p);
-			}
+			error = (*so->so_proto->pr_usrreqs->pru_send)
+				(so, sendflags, top, addr, control, p);
 #ifdef __APPLE__
 			if (flags & MSG_SEND)
 				so->so_temp = NULL;
@@ -1935,6 +1899,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	    so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
 
 	socket_lock(so, 1);
+	so_update_last_owner_locked(so, p);
 
 #ifdef MORE_LOCKING_DEBUG
 	if (so->so_usecount == 1)
@@ -1958,14 +1923,18 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
 	if (so->so_flags & SOF_DEFUNCT) {
 		struct sockbuf *sb = &so->so_rcv;
 
+		error = ENOTCONN;
+		SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+		    proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so), error));
 		/*
 		 * This socket should have been disconnected and flushed
-		 * prior to being returned from accept; there should be
-		 * no data on its receive list, so panic otherwise.
+		 * prior to being returned from sodefunct(); there should
+		 * be no data on its receive list, so panic otherwise.
 		 */
-		sb_empty_assert(sb, __func__);
+		if (so->so_state & SS_DEFUNCT)
+			sb_empty_assert(sb, __func__);
 		socket_unlock(so, 1);
-		return (ENOTCONN);
+		return (error);
 	}
 
 	/*
@@ -2197,6 +2166,14 @@ dontblock:
 				goto restart;
 			}
 			socket_lock(so, 0);
+			/*
+			 * If the socket has been defunct'd, drop it.
+			 */
+			if (so->so_flags & SOF_DEFUNCT) {
+				m_freem(m);
+				error = ENOTCONN;
+				goto release;
+			}
 			/*
 			 * Re-adjust the socket receive list and re-enqueue
 			 * the record in front of any packets which may have
@@ -2253,6 +2230,7 @@ dontblock:
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 		struct sockbuf *sb_rcv = &so->so_rcv;
+		struct mbuf **msgpcm = NULL;
 
 		/*
 		 * Externalizing the control messages would require us to
@@ -2265,7 +2243,23 @@ dontblock:
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
+					if (*controlp == NULL) {
+						msgpcm = controlp;
+					}
 					*controlp = m_copy(m, 0, m->m_len);
+
+					/* If we failed to allocate an mbuf,
+					 * release any previously allocated
+					 * mbufs for control data. Return 
+					 * an error. Keep the mbufs in the
+					 * socket as this is using 
+					 * MSG_PEEK flag.
+					 */
+					if (*controlp == NULL) {
+						m_freem(*msgpcm);
+						error = ENOBUFS;
+						goto release;
+					}
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
@@ -2499,8 +2493,25 @@ dontblock:
 			if (flags & MSG_PEEK) {
 				moff += len;
 			} else {
-				if (mp)
-					*mp = m_copym(m, 0, len, M_WAIT);
+				if (mp != NULL) {
+					int copy_flag;
+
+					if (flags & MSG_DONTWAIT)
+						copy_flag = M_DONTWAIT;
+					else
+						copy_flag = M_WAIT;
+					*mp = m_copym(m, 0, len, copy_flag);
+					if (*mp == NULL) {
+						/*
+					 	 * Failed to allocate an mbuf.
+					 	 * Adjust uio_resid back, it was
+					 	 * adjusted down by len bytes which
+					 	 * we didn't copy over
+					  	 */
+						uio_setresid(uio, (uio_resid(uio) + len));
+						break;
+					}
+				}
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
@@ -2959,13 +2970,13 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
-	struct socket_filter_entry *filter;
-	int filtered = 0;
 #if CONFIG_MACF_SOCKET
 	struct mac extmac;
 #endif /* MAC_SOCKET */
 
 	socket_lock(so, 1);
+	so_update_last_owner_locked(so, NULL);
+	
 	if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE))
 	    == (SS_CANTRCVMORE | SS_CANTSENDMORE) && 
 	    (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
@@ -2978,29 +2989,11 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 		sopt->sopt_dir = SOPT_SET;
 	}
 
-	error = 0;
-	for (filter = so->so_filt; filter && (error == 0);
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_setoption) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(so);
-				socket_unlock(so, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_setoption(filter->sfe_cookie, so, sopt);
-		}
-	}
-
-	if (filtered != 0) {
-		socket_lock(so, 0);
-		sflt_unuse(so);
-
-		if (error) {
-			if (error == EJUSTRETURN)
-				error = 0;
-			goto bad;
-		}
+	error = sflt_setsockopt(so, sopt);
+	if (error) {
+		if (error == EJUSTRETURN)
+			error = 0;
+		goto bad;
 	}
 
 	error = 0;
@@ -3036,6 +3029,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
+		case SO_TIMESTAMP_MONOTONIC:
 #ifdef __APPLE__
 		case SO_DONTTRUNC:
 		case SO_WANTMORE:
@@ -3126,8 +3120,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 			if (error)
 				goto bad;
 
-			error = sflt_attach_private(so, NULL,
-			    nke.nke_handle, 1);
+			error = sflt_attach_internal(so, nke.nke_handle);
 			break;
 		}
 
@@ -3253,19 +3246,76 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 			break;
 		}
 
-#if PKT_PRIORITY
 		case SO_TRAFFIC_CLASS: {
 			error = sooptcopyin(sopt, &optval, sizeof (optval),
 				sizeof (optval));
 			if (error)
 				goto bad;
-			if (optval < SO_TC_BE || optval > SO_TC_VO) {
-				error = EINVAL;
+			error = so_set_traffic_class(so, optval);
+			if (error)
 				goto bad;
-			}
-			so->so_traffic_class = optval;
+			break;
 		}
-#endif /* PKT_PRIORITY */
+
+		case SO_RECV_TRAFFIC_CLASS: {
+			error = sooptcopyin(sopt, &optval, sizeof (optval),
+				sizeof (optval));
+			if (error)
+				goto bad;
+			if (optval == 0)
+				so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
+			else
+				so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
+			break;
+		}
+		
+		case SO_TRAFFIC_CLASS_DBG: {
+			struct so_tcdbg so_tcdbg;
+			
+			error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg),
+				sizeof (struct so_tcdbg));
+			if (error)
+				goto bad;
+			error = so_set_tcdbg(so, &so_tcdbg);
+			if (error)
+				goto bad;
+			break;
+		}
+		
+		case SO_DEFUNCTOK:
+			error = sooptcopyin(sopt, &optval, sizeof (optval),
+			    sizeof (optval));
+			if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
+				if (error == 0)
+					error = EBADF;
+				goto bad;
+			}
+			/*
+			 * Any process can set SO_DEFUNCTOK (clear
+			 * SOF_NODEFUNCT), but only root can clear
+			 * SO_DEFUNCTOK (set SOF_NODEFUNCT).
+			 */
+			if (optval == 0 &&
+			    kauth_cred_issuser(kauth_cred_get()) == 0) {
+				error = EPERM;
+				goto bad;
+			}
+			if (optval)
+				so->so_flags &= ~SOF_NODEFUNCT;
+			else
+				so->so_flags |= SOF_NODEFUNCT;
+
+			SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as "
+			    "%seligible for defunct\n", __func__,
+			    proc_selfpid(), so, INP_SOCKAF(so),
+			    INP_SOCKTYPE(so),
+			    (so->so_flags & SOF_NODEFUNCT) ? "not " : ""));
+			break;
+
+		case SO_ISDEFUNCT:
+			/* This option is not settable */
+			error = EINVAL;
+			break;
 
 		default:
 			error = ENOPROTOOPT;
@@ -3355,8 +3405,6 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
-	struct	socket_filter_entry *filter;
-	int	filtered = 0;
 #if CONFIG_MACF_SOCKET
 	struct mac extmac;
 #endif /* MAC_SOCKET */
@@ -3366,32 +3414,16 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 	}
 
 	socket_lock(so, 1);
+	so_update_last_owner_locked(so, NULL);
 
-	error = 0;
-	for (filter = so->so_filt; filter && (error == 0);
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_getoption) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(so);
-				socket_unlock(so, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_getoption(filter->sfe_cookie, so, sopt);
-		}
-	}
-	if (filtered != 0) {
-		socket_lock(so, 0);
-		sflt_unuse(so);
-
-		if (error) {
-			if (error == EJUSTRETURN)
-				error = 0;
-			socket_unlock(so, 1);
-			return (error);
-		}
+	error = sflt_getsockopt(so, sopt);
+	if (error) {
+		if (error == EJUSTRETURN)
+			error = 0;
+		socket_unlock(so, 1);
+		return (error);
 	}
-
+	
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
@@ -3421,6 +3453,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
+		case SO_TIMESTAMP_MONOTONIC:
 #ifdef __APPLE__
 		case SO_DONTTRUNC:
 		case SO_WANTMORE:
@@ -3556,11 +3589,29 @@ integer:
 			error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions));
 			break;	
 		}
-#if PKT_PRIORITY
+
 		case SO_TRAFFIC_CLASS:
 			optval = so->so_traffic_class;
 			goto integer;
-#endif /* PKT_PRIORITY */
+		
+		case SO_RECV_TRAFFIC_CLASS:
+			optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
+			goto integer;
+
+		case SO_TRAFFIC_CLASS_STATS:
+			error = sooptcopyout(sopt, &so->so_tc_stats, sizeof(so->so_tc_stats));
+
+		case SO_TRAFFIC_CLASS_DBG: 
+			error = sogetopt_tcdbg(so, sopt);
+			break;
+		
+		case SO_DEFUNCTOK:
+			optval = !(so->so_flags & SOF_NODEFUNCT);
+			goto integer;
+
+		case SO_ISDEFUNCT:
+			optval = (so->so_flags & SOF_DEFUNCT);
+			goto integer;
 
 		default:
 			error = ENOPROTOOPT;
@@ -3570,8 +3621,10 @@ integer:
 		return (error);
 	}
 }
-
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
+/* The size limits on our soopt_getm is different from that on FreeBSD.
+ * We limit the size of options to MCLBYTES. This will have to change
+ * if we need to define options that need more space than MCLBYTES.
+ */
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
@@ -3579,7 +3632,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 	int sopt_size = sopt->sopt_valsize;
 	int how;
 
-	if (sopt_size > MAX_SOOPTGETM_SIZE)
+	if (sopt_size <= 0 || sopt_size > MCLBYTES)
 		return (EMSGSIZE);
 
 	how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
@@ -3600,7 +3653,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 	*mp = m;
 	m_prev = m;
 
-	while (sopt_size) {
+	while (sopt_size > 0) {
 		MGET(m, how, MT_DATA);
 		if (m == 0) {
 			m_freem(*mp);
@@ -3610,6 +3663,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 			MCLGET(m, how);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(*mp);
+				m_freem(m);
 				return (ENOBUFS);
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
@@ -3623,7 +3677,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 	return (0);
 }
 
-/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
+/* copyin sopt data into mbuf chain */
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
@@ -3654,7 +3708,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 	return (0);
 }
 
-/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
+/* copyout mbuf chain data into soopt */
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
@@ -3709,6 +3763,7 @@ sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql)
 	int revents = 0;
 
 	socket_lock(so, 1);
+	so_update_last_owner_locked(so, p);
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
@@ -3863,12 +3918,19 @@ filt_soread(struct knote *kn, long hint)
 		return (1);
 	}
 
+	int64_t	lowwat = so->so_rcv.sb_lowat;
+	if (kn->kn_sfflags & NOTE_LOWAT)
+	{
+		if (kn->kn_sdata > so->so_rcv.sb_hiwat)
+			lowwat = so->so_rcv.sb_hiwat;
+		else if (kn->kn_sdata > lowwat)
+			lowwat = kn->kn_sdata;
+	}
+	
 	if ((hint & SO_FILT_HINT_LOCKED) == 0)
 		socket_unlock(so, 1);
-
-	return ((kn->kn_flags & EV_OOBAND) ||
-	    kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
-	    kn->kn_sdata : so->so_rcv.sb_lowat));
+	
+	return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat);
 }
 
 static void
@@ -3911,14 +3973,20 @@ filt_sowrite(struct knote *kn, long hint)
 			socket_unlock(so, 1);
 		return (0);
 	}
+	int64_t	lowwat = so->so_snd.sb_lowat;
+	if (kn->kn_sfflags & NOTE_LOWAT)
+	{
+		if (kn->kn_sdata > so->so_snd.sb_hiwat)
+			lowwat = so->so_snd.sb_hiwat;
+		else if (kn->kn_sdata > lowwat)
+			lowwat = kn->kn_sdata;
+	}
 	if ((hint & SO_FILT_HINT_LOCKED) == 0)
 		socket_unlock(so, 1);
-	if (kn->kn_sfflags & NOTE_LOWAT)
-		return (kn->kn_data >= kn->kn_sdata);
-	return (kn->kn_data >= so->so_snd.sb_lowat);
+	return (kn->kn_data >= lowwat);
 }
 
-#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + sizeof(void *) + 1) + 1)
+#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
 
 __private_extern__ const char * solockhistory_nr(struct socket *so)
 {
@@ -3926,6 +3994,7 @@ __private_extern__ const char * solockhistory_nr(struct socket *so)
         int i;
         static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
 
+	bzero(lock_history_str, sizeof(lock_history_str));
         for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
                 n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ",
                         (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
@@ -4054,3 +4123,107 @@ so_isdstlocal(struct socket *so) {
 	} 
 	return 0;
 }
+
+int
+sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
+{
+	int err = 0, defunct;
+
+	defunct = (so->so_flags & SOF_DEFUNCT);
+	if (defunct) {
+		if (!(so->so_snd.sb_flags & so->so_rcv.sb_flags & SB_DROP))
+			panic("%s: SB_DROP not set", __func__);
+		goto done;
+	}
+
+	if (so->so_flags & SOF_NODEFUNCT) {
+		if (noforce) {
+			err = EOPNOTSUPP;
+			SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
+			    "[%d,%d] is not eligible for defunct (%d)\n",
+			    __func__, proc_selfpid(), proc_pid(p), level, so,
+			    INP_SOCKAF(so), INP_SOCKTYPE(so), err));
+			return (err);
+		}
+		so->so_flags &= ~SOF_NODEFUNCT;
+		SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
+		    "defunct by force\n", __func__, proc_selfpid(), proc_pid(p),
+		    level, so, INP_SOCKAF(so), INP_SOCKTYPE(so)));
+	}
+
+	so->so_flags |= SOF_DEFUNCT;
+	/* Prevent further data from being appended to the socket buffers */
+	so->so_snd.sb_flags |= SB_DROP;
+	so->so_rcv.sb_flags |= SB_DROP;
+
+done:
+	SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
+	    "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, so,
+	    INP_SOCKAF(so), INP_SOCKTYPE(so),
+	    defunct ? "is already" : "marked as"));
+
+	return (err);
+}
+
+int
+sodefunct(struct proc *p, struct socket *so, int level)
+{
+	struct sockbuf *rcv, *snd;
+
+	if (!(so->so_flags & SOF_DEFUNCT))
+		panic("%s improperly called", __func__);
+
+	if (so->so_state & SS_DEFUNCT)
+		goto done;
+
+	rcv = &so->so_rcv;
+	snd = &so->so_snd;
+
+	SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
+	    "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
+	    __func__, proc_selfpid(), proc_pid(p), level, so,
+	    INP_SOCKAF(so), INP_SOCKTYPE(so),
+	    (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags,
+	    (uint16_t)rcv->sb_flags, (uint16_t)snd->sb_flags));
+
+	/*
+	 * Unwedge threads blocked on sbwait() and sb_lock().
+	 */
+	sbwakeup(rcv);
+	sbwakeup(snd);
+
+	if (rcv->sb_flags & SB_LOCK)
+		sbunlock(rcv, 1);
+	if (snd->sb_flags & SB_LOCK)
+		sbunlock(snd, 1);
+
+	/*
+	 * Flush the buffers and disconnect.  We explicitly call shutdown
+	 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
+	 * states are set for the socket.  This would also flush out data
+	 * hanging off the receive list of this socket.
+	 */
+	(void) soshutdownlock(so, SHUT_RD);
+	(void) soshutdownlock(so, SHUT_WR);
+	(void) sodisconnectlocked(so);
+
+	/*
+	 * Explicitly handle connectionless-protocol disconnection
+	 * and release any remaining data in the socket buffers.
+	 */
+	if (!(so->so_flags & SS_ISDISCONNECTED))
+		(void) soisdisconnected(so);
+
+	if (so->so_error == 0)
+		so->so_error = EBADF;
+
+	if (rcv->sb_cc != 0)
+		sbrelease(rcv);
+	if (snd->sb_cc != 0)
+		sbrelease(snd);
+
+	so->so_state |= SS_DEFUNCT;
+
+done:
+	return (0);
+}
diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c
index a6b2af000..4b71dd80c 100644
--- a/bsd/kern/uipc_socket2.c
+++ b/bsd/kern/uipc_socket2.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -222,6 +222,20 @@ soisdisconnected(struct socket *so)
 	sorwakeup(so);
 }
 
+/* This function will issue a wakeup like soisdisconnected but it will not
+ * notify the socket filters. This will avoid unlocking the socket
+ * in the midst of closing it.
+ */
+void
+sodisconnectwakeup(struct socket *so)
+{
+	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
+	wakeup((caddr_t)&so->so_timeo);
+	sowwakeup(so);
+	sorwakeup(so);
+}
+
 /*
  * When an attempt at a new connection is noted on a socket
  * which accepts connections, sonewconn is called.  If the
@@ -276,7 +290,6 @@ sonewconn_internal(struct socket *head, int connstatus)
 		return ((struct socket *)0);
 	}
 
-	so->so_head = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
@@ -285,13 +298,15 @@ sonewconn_internal(struct socket *head, int connstatus)
 	so->so_timeo = head->so_timeo;
 	so->so_pgid  = head->so_pgid;
 	so->so_uid = head->so_uid;
+	so->so_gid = head->so_gid;
 	/* inherit socket options stored in so_flags */
 	so->so_flags = head->so_flags & (SOF_NOSIGPIPE |
 					 SOF_NOADDRAVAIL |
 					 SOF_REUSESHAREUID | 
 					 SOF_NOTIFYCONFLICT | 
 					 SOF_BINDRANDOMPORT | 
-					 SOF_NPX_SETOPTSHUT);
+					 SOF_NPX_SETOPTSHUT |
+					 SOF_NODEFUNCT);
 	so->so_usecount = 1;
 	so->next_lock_lr = 0;
 	so->next_unlock_lr = 0;
@@ -307,15 +322,11 @@ sonewconn_internal(struct socket *head, int connstatus)
 #endif
 
 	/* inherit traffic management properties of listener */
-	so->so_traffic_mgt_flags = head->so_traffic_mgt_flags &
-	    (TRAFFIC_MGT_SO_BACKGROUND | TRAFFIC_MGT_SO_BG_REGULATE);
+	so->so_traffic_mgt_flags = head->so_traffic_mgt_flags & (TRAFFIC_MGT_SO_BACKGROUND);
 	so->so_background_thread = head->so_background_thread;
-#if PKT_PRIORITY
 	so->so_traffic_class = head->so_traffic_class;
-#endif /* PKT_PRIORITY */
 
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
-		sflt_termsock(so);
 		sodealloc(so);
 		return ((struct socket *)0);
 	}
@@ -328,17 +339,36 @@ sonewconn_internal(struct socket *head, int connstatus)
 		socket_unlock(head, 0);
 	if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) ||
 	    error) {
-		sflt_termsock(so);
 		sodealloc(so);
 		if (head->so_proto->pr_unlock)
 			socket_lock(head, 0);
 		return ((struct socket *)0);
 	}
-	if (head->so_proto->pr_unlock)
+	if (head->so_proto->pr_unlock) {
 		socket_lock(head, 0);
+		/* Radar 7385998 Recheck that the head is still accepting
+		 * to avoid race condition when head is getting closed.
+		 */
+		if ((head->so_options & SO_ACCEPTCONN) == 0) {
+			so->so_state &= ~SS_NOFDREF;
+			soclose(so);
+			return ((struct socket *)0);
+		}
+	}
+
 #ifdef __APPLE__
 	so->so_proto->pr_domain->dom_refs++;
 #endif
+	/* Insert in head appropriate lists */
+	so->so_head = head;
+
+	/* Since this socket is going to be inserted into the incomp
+	 * queue, it can be picked up by another thread in 
+	 * tcp_dropdropablreq to get dropped before it is setup.. 
+	 * To prevent this race, set in-progress flag which can be
+	 * cleared later
+	 */
+	so->so_flags |= SOF_INCOMP_INPROGRESS;
 
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
@@ -367,27 +397,7 @@ sonewconn_internal(struct socket *head, int connstatus)
 struct socket *
 sonewconn(struct socket *head, int connstatus, const struct sockaddr *from)
 {
-	int error = 0;
-	struct socket_filter_entry *filter;
-	int filtered = 0;
-
-	for (filter = head->so_filt; filter && (error == 0);
-	    filter = filter->sfe_next_onsocket) {
-		if (filter->sfe_filter->sf_filter.sf_connect_in) {
-			if (filtered == 0) {
-				filtered = 1;
-				sflt_use(head);
-				socket_unlock(head, 0);
-			}
-			error = filter->sfe_filter->sf_filter.
-			    sf_connect_in(filter->sfe_cookie, head, from);
-		}
-	}
-	if (filtered != 0) {
-		socket_lock(head, 0);
-		sflt_unuse(head);
-	}
-
+	int error = sflt_connectin(head, from);
 	if (error) {
 		return (NULL);
 	}
@@ -443,6 +453,7 @@ sbwait(struct sockbuf *sb)
 		mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 	else
 		mutex_held = so->so_proto->pr_domain->dom_mtx;
+	lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 
 	sb->sb_flags |= SB_WAIT;
 
@@ -458,8 +469,13 @@ sbwait(struct sockbuf *sb)
 	if (so->so_usecount < 1)
 		panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount);
 
-	if ((so->so_state & SS_DRAINING)) {
+	if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) {
 		error = EBADF;
+		if (so->so_flags & SOF_DEFUNCT) {
+			SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+			    __func__, proc_selfpid(), so, INP_SOCKAF(so),
+			    INP_SOCKTYPE(so), error));
+		}
 	}
 
 	return (error);
@@ -484,10 +500,13 @@ sb_lock(struct sockbuf *sb)
 
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
+
 		if (so->so_proto->pr_getlock != NULL)
 			mutex_held = (*so->so_proto->pr_getlock)(so, 0);
 		else
 			mutex_held = so->so_proto->pr_domain->dom_mtx;
+		lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
+
 		if (so->so_usecount < 1)
 			panic("sb_lock: so=%p refcount=%d\n", so,
 			    so->so_usecount);
@@ -498,6 +517,14 @@ sb_lock(struct sockbuf *sb)
 		if (so->so_usecount < 1)
 			panic("sb_lock: 2 so=%p refcount=%d\n", so,
 			    so->so_usecount);
+
+		if (error == 0 && (so->so_flags & SOF_DEFUNCT)) {
+			error = EBADF;
+			SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+			    __func__, proc_selfpid(), so, INP_SOCKAF(so),
+			    INP_SOCKTYPE(so), error));
+		}
+
 		if (error)
 			return (error);
 	}
@@ -505,6 +532,15 @@ sb_lock(struct sockbuf *sb)
 	return (0);
 }
 
+void
+sbwakeup(struct sockbuf *sb)
+{
+	if (sb->sb_flags & SB_WAIT) {
+		sb->sb_flags &= ~SB_WAIT;
+		wakeup((caddr_t)&sb->sb_cc);
+	}
+}
+
 /*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
@@ -513,12 +549,17 @@ sb_lock(struct sockbuf *sb)
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
 {
+	if (so->so_flags & SOF_DEFUNCT) {
+		SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] si 0x%x, "
+		    "fl 0x%x [%s]\n", __func__, proc_selfpid(), so,
+		    INP_SOCKAF(so), INP_SOCKTYPE(so),
+		    (uint32_t)sb->sb_sel.si_flags, (uint16_t)sb->sb_flags,
+		    (sb->sb_flags & SB_RECV) ? "rcv" : "snd"));
+	}
+
 	sb->sb_flags &= ~SB_SEL;
 	selwakeup(&sb->sb_sel);
-	if (sb->sb_flags & SB_WAIT) {
-		sb->sb_flags &= ~SB_WAIT;
-		wakeup((caddr_t)&sb->sb_cc);
-	}
+	sbwakeup(sb);
 	if (so->so_state & SS_ASYNC) {
 		if (so->so_pgid < 0)
 			gsignal(-so->so_pgid, SIGIO);
@@ -685,7 +726,7 @@ sbappend(struct sockbuf *sb, struct mbuf *m)
 		return (sbappendrecord(sb, m));
 
 	if (sb->sb_flags & SB_RECV) {
-		int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
+		int error = sflt_data_in(so, NULL, &m, NULL, 0);
 		SBLASTRECORDCHK(sb, "sbappend 2");
 		if (error != 0) {
 			if (error != EJUSTRETURN)
@@ -724,7 +765,7 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m)
 	}
 
 	if (sb->sb_flags & SB_RECV) {
-		int error = sflt_data_in(so, NULL, &m, NULL, 0, NULL);
+		int error = sflt_data_in(so, NULL, &m, NULL, 0);
 		SBLASTRECORDCHK(sb, "sbappendstream 1");
 		if (error != 0) {
 			if (error != EJUSTRETURN)
@@ -844,7 +885,7 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 
 	if (sb->sb_flags & SB_RECV) {
 		int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
-		    sock_data_filt_flag_record, NULL);
+		    sock_data_filt_flag_record);
 		if (error != 0) {
 			SBLASTRECORDCHK(sb, "sbappendrecord 1");
 			if (error != EJUSTRETURN)
@@ -895,7 +936,7 @@ sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
 
 	if ((sb->sb_flags & SB_RECV) != 0) {
 		int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL,
-		    sock_data_filt_flag_oob, NULL);
+		    sock_data_filt_flag_oob);
 
 		SBLASTRECORDCHK(sb, "sbinsertoob 2");
 		if (error) {
@@ -1040,7 +1081,7 @@ sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
 	/* Call socket data in filters */
 	if ((sb->sb_flags & SB_RECV) != 0) {
 		int error;
-		error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0, NULL);
+		error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0);
 		SBLASTRECORDCHK(sb, __func__);
 		if (error) {
 			if (error != EJUSTRETURN) {
@@ -1135,7 +1176,7 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf	*m0, struct mbuf *control,
 	if (sb->sb_flags & SB_RECV) {
 		int error;
 
-		error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0, NULL);
+		error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0);
 		SBLASTRECORDCHK(sb, __func__);
 		if (error) {
 			if (error != EJUSTRETURN) {
@@ -1413,6 +1454,38 @@ sbcreatecontrol(caddr_t p, int size, int type, int level)
 	return (m);
 }
 
+struct mbuf**
+sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** mp)
+{
+	struct mbuf* m;
+	struct cmsghdr *cp;
+
+	if (*mp == NULL){
+		*mp = sbcreatecontrol(p, size, type, level);
+		return mp;
+	}
+	
+	if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN){
+		mp = &(*mp)->m_next;
+		*mp = sbcreatecontrol(p, size, type, level);
+		return mp;
+	}
+	
+	m = *mp;
+	
+	cp = (struct cmsghdr *) (mtod(m, char *) + m->m_len);
+	m->m_len += CMSG_SPACE(size);
+	
+	/* XXX check size? */
+	(void) memcpy(CMSG_DATA(cp), p, size);
+	cp->cmsg_len = CMSG_LEN(size);
+	cp->cmsg_level = level;
+	cp->cmsg_type = type;
+	
+	return mp;
+}
+
+
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
@@ -1858,72 +1931,12 @@ soisbackground(struct socket *so)
 	return (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND);
 }
 
-#if PKT_PRIORITY
-#define _MIN_NXT_CMSGHDR_PTR(cmsg)                              \
-	((char *)(cmsg) +                                       \
-	    __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) +    \
-	    __DARWIN_ALIGN32(sizeof(struct cmsghdr)))
-
-#define M_FIRST_CMSGHDR(m)                                                                      \
-        ((char *)(m) != (char *)0L && (size_t)(m)->m_len >= sizeof(struct cmsghdr) &&           \
-	  (socklen_t)(m)->m_len >= __DARWIN_ALIGN32(((struct cmsghdr *)(m)->m_data)->cmsg_len) ?\
-         (struct cmsghdr *)(m)->m_data :                                                        \
-         (struct cmsghdr *)0L)
-
-#define M_NXT_CMSGHDR(m, cmsg)                                                  \
-        ((char *)(cmsg) == (char *)0L ? M_FIRST_CMSGHDR(m) :                    \
-            _MIN_NXT_CMSGHDR_PTR(cmsg) > ((char *)(m)->m_data) + (m)->m_len ||  \
-            _MIN_NXT_CMSGHDR_PTR(cmsg) < (char *)(m)->m_data ?                  \
-                (struct cmsghdr *)0L /* NULL */ :                               \
-                (struct cmsghdr *)((unsigned char *)(cmsg) +                    \
-                            __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))
-#endif /* PKT_PRIORITY */
-
-__private_extern__ int
-mbuf_traffic_class_from_control(struct mbuf *control)
-{
-#if !PKT_PRIORITY
-#pragma unused(control)
-	return MBUF_TC_NONE;
-#else /* PKT_PRIORITY */
-	struct cmsghdr *cm;
-	
-	for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
-		int tc;	
-
-		if (cm->cmsg_len < sizeof(struct cmsghdr))
-			break;
-	
-		if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SO_TRAFFIC_CLASS)
-			continue;
-		if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
-			continue;
-
-		tc = *(int *)CMSG_DATA(cm);
-
-		switch (tc) {
-			case SO_TC_BE:
-				return MBUF_TC_BE;
-			case SO_TC_BK:
-				return MBUF_TC_BK;
-			case SO_TC_VI:
-				return MBUF_TC_VI;
-			case SO_TC_VO:
-				return MBUF_TC_VO;
-			default:
-				break;
-		}
-	}
-	
-	return MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
-}
 
 /*
  * Here is the definition of some of the basic objects in the kern.ipc
  * branch of the MIB.
  */
-SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPC");
+SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, 0, "IPC");
 
 /* Check that the maximum socket buffer size is within a range */
 
@@ -1946,20 +1959,20 @@ sysctl_sb_max(__unused struct sysctl_oid *oidp, __unused void *arg1,
 	return error;
 }
 
-SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size");
 
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD | CTLFLAG_LOCKED,
     &maxsockets, 0, "Maximum number of sockets avaliable");
-SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sb_efficiency, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, sbspace_factor, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sbspace_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sbspace_factor, 0, "Ratio of mbuf/cluster use for socket layers");
-SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
+SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD | CTLFLAG_LOCKED,
     &nmbclusters, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD, &njcl, 0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, CTLFLAG_RD, &njclbytes, 0, "");
-SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, "");
+SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW | CTLFLAG_LOCKED,
     &soqlimitcompat, 1, "Enable socket queue limit compatibility");
-SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW | CTLFLAG_LOCKED,
     &soqlencomp, 0, "Listen backlog represents only complete queue");
diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c
index 3c0aec400..521de769e 100644
--- a/bsd/kern/uipc_syscalls.c
+++ b/bsd/kern/uipc_syscalls.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -86,6 +86,7 @@
 #include <sys/kernel.h>
 #include <sys/uio_internal.h>
 #include <sys/kauth.h>
+#include <kern/task.h>
 
 #include <security/audit/audit.h>
 
@@ -191,6 +192,22 @@ socket(struct proc *p, struct socket_args *uap, int32_t *retval)
 	if (error) {
 		fp_free(p, fd, fp);
 	} else {
+		thread_t			thread;
+		struct uthread		*ut;
+		
+		thread = current_thread();
+		ut = get_bsdthread_info(thread);
+			
+		/* if this is a backgrounded thread then throttle all new sockets */
+#if !CONFIG_EMBEDDED
+		if (proc_get_selfthread_isbackground() != 0)
+#else /* !CONFIG_EMBEDDED */
+		if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) 
+#endif /* !CONFIG_EMBEDDED */
+		{
+			so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
+			so->so_background_thread = thread;
+		}
 		fp->f_data = (caddr_t)so;
 
 		proc_fdlock(p);
@@ -510,16 +527,12 @@ gotnoname:
 
 releasefd:
 	/*
-	 * If the socket has been marked as inactive by soacceptfilter(),
-	 * disallow further operations on it.  We explicitly call shutdown
-	 * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
-	 * states are set for the socket.  This would also flush out data
-	 * hanging off the receive list of this socket.
+	 * If the socket has been marked as inactive by sosetdefunct(),
+	 * disallow further operations on it.
 	 */
 	if (so->so_flags & SOF_DEFUNCT) {
-		(void) soshutdownlock(so, SHUT_RD);
-		(void) soshutdownlock(so, SHUT_WR);
-		(void) sodisconnectlocked(so);
+		sodefunct(current_proc(), so,
+		    SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL);
 	}
 
 	if (dosocklock)
@@ -735,6 +748,9 @@ socketpair(struct proc *p, struct socketpair_args *uap,
 		}
 	}
 
+	if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0)
+		goto free4;
+
 	proc_fdlock(p);
 	procfdtbl_releasefd(p, sv[0], NULL);
 	procfdtbl_releasefd(p, sv[1], NULL);
@@ -742,8 +758,7 @@ socketpair(struct proc *p, struct socketpair_args *uap,
 	fp_drop(p, sv[1], fp2, 1);
 	proc_fdunlock(p);
 
-	error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof (int));
-	return (error);
+	return (0);
 free4:
 	fp_free(p, sv[1], fp2);
 free3:
@@ -1194,63 +1209,79 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop,
 		while (m && len > 0) {
 			unsigned int tocopy;
 			struct cmsghdr *cp = mtod(m, struct cmsghdr *);
-	
-			/* 
-			 * SCM_TIMESTAMP hack because  struct timeval has a 
-			 * different size for 32 bits and 64 bits processes
-			 */
-			if (cp->cmsg_level == SOL_SOCKET &&  cp->cmsg_type == SCM_TIMESTAMP) {
-				unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
-				struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
-				int tmp_space;
-				struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
-				
-				tmp_cp->cmsg_level = SOL_SOCKET;
-				tmp_cp->cmsg_type = SCM_TIMESTAMP;
+			int cp_size = CMSG_ALIGN(cp->cmsg_len);
+			int buflen = m->m_len;
+			
+			while (buflen > 0 && len > 0) {
 				
-				if (proc_is64bit(p)) {
-					struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
-
-					tv64->tv_sec = tv->tv_sec;
-					tv64->tv_usec = tv->tv_usec;
-
-					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
-					tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
-				} else {
-					struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
+				/* 
+				 SCM_TIMESTAMP hack because  struct timeval has a 
+				 * different size for 32 bits and 64 bits processes
+				 */
+				if (cp->cmsg_level == SOL_SOCKET &&  cp->cmsg_type == SCM_TIMESTAMP) {
+					unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))];
+					struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer;
+					int tmp_space;
+					struct timeval *tv = (struct timeval *)CMSG_DATA(cp);
+					
+					tmp_cp->cmsg_level = SOL_SOCKET;
+					tmp_cp->cmsg_type = SCM_TIMESTAMP;
+					
+					if (proc_is64bit(p)) {
+						struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp);
+						
+						tv64->tv_sec = tv->tv_sec;
+						tv64->tv_usec = tv->tv_usec;
+						
+						tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval));
+						tmp_space = CMSG_SPACE(sizeof(struct user64_timeval));
+					} else {
+						struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp);
+						
+						tv32->tv_sec = tv->tv_sec;
+						tv32->tv_usec = tv->tv_usec;
+						
+						tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
+						tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
+					}
+					if (len >= tmp_space) {
+						tocopy = tmp_space;
+					} else {
+						mp->msg_flags |= MSG_CTRUNC;
+						tocopy = len;
+					}
+					error = copyout(tmp_buffer, ctlbuf, tocopy);
+					if (error)
+						goto out;
 					
-					tv32->tv_sec = tv->tv_sec;
-					tv32->tv_usec = tv->tv_usec;
-
-					tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval));
-					tmp_space = CMSG_SPACE(sizeof(struct user32_timeval));
-				}
-				if (len >= tmp_space) {
-					tocopy = tmp_space;
-				} else {
-					mp->msg_flags |= MSG_CTRUNC;
-					tocopy = len;
-				}
-				error = copyout(tmp_buffer, ctlbuf, tocopy);
-				if (error)
-					goto out;
-
-			} else {
-				if (len >= m->m_len) {
-					tocopy = m->m_len;
 				} else {
-					mp->msg_flags |= MSG_CTRUNC;
-					tocopy = len;
+					
+					if (cp_size > buflen) {
+						panic("cp_size > buflen, something wrong with alignment!");
+					}
+					
+					if (len >= cp_size) {
+						tocopy = cp_size;
+					} else {
+						mp->msg_flags |= MSG_CTRUNC;
+						tocopy = len;
+					}
+					
+					error = copyout((caddr_t) cp, ctlbuf,
+									tocopy);
+					if (error)
+						goto out;
 				}
-	
-				error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf,
-					tocopy);
-				if (error)
-					goto out;
+				
+				
+				ctlbuf += tocopy;
+				len -= tocopy;
+				
+				buflen -= cp_size;
+				cp = (struct cmsghdr *) ((unsigned char *) cp + cp_size);
+				cp_size = CMSG_ALIGN(cp->cmsg_len);
 			}
-
-			ctlbuf += tocopy;
-			len -= tocopy;
+			
 			m = m->m_next;
 		}
 		mp->msg_controllen = ctlbuf - mp->msg_control;
@@ -1266,7 +1297,6 @@ out1:
 	return (error);
 }
 
-
 /*
  * Returns:	0			Success
  *		ENOMEM
@@ -1698,28 +1728,9 @@ getsockname(__unused struct proc *p, struct getsockname_args *uap,
 	socket_lock(so, 1);
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 	if (error == 0) {
-		struct socket_filter_entry *filter;
-		int	filtered = 0;
-		for (filter = so->so_filt; filter && error == 0;
-		    filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_getsockname) {
-				if (!filtered) {
-					filtered = 1;
-					sflt_use(so);
-					socket_unlock(so, 0);
-				}
-				error = filter->sfe_filter->sf_filter.
-				    sf_getsockname(filter->sfe_cookie, so, &sa);
-			}
-		}
-
+		error = sflt_getsockname(so, &sa);
 		if (error == EJUSTRETURN)
 			error = 0;
-
-		if (filtered) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
 	}
 	socket_unlock(so, 1);
 	if (error)
@@ -1802,28 +1813,9 @@ getpeername(__unused struct proc *p, struct getpeername_args *uap,
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 	if (error == 0) {
-		struct socket_filter_entry *filter;
-		int	filtered = 0;
-		for (filter = so->so_filt; filter && error == 0;
-		    filter = filter->sfe_next_onsocket) {
-			if (filter->sfe_filter->sf_filter.sf_getpeername) {
-				if (!filtered) {
-					filtered = 1;
-					sflt_use(so);
-					socket_unlock(so, 0);
-				}
-				error = filter->sfe_filter->sf_filter.
-				    sf_getpeername(filter->sfe_cookie, so, &sa);
-			}
-		}
-
+		error = sflt_getpeername(so, &sa);
 		if (error == EJUSTRETURN)
 			error = 0;
-
-		if (filtered) {
-			socket_lock(so, 0);
-			sflt_unuse(so);
-		}
 	}
 	socket_unlock(so, 1);
 	if (error)
@@ -1983,7 +1975,7 @@ SYSCTL_DECL(_kern_ipc);
 
 #define	SFUIOBUFS 64
 static int sendfileuiobufs = SFUIOBUFS;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW, &sendfileuiobufs,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW | CTLFLAG_LOCKED, &sendfileuiobufs,
     0, "");
 
 /* Macros to compute the number of mbufs needed depending on cluster size */
@@ -2026,13 +2018,13 @@ alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
 	 * use mbuf_allocpacket().  The logic below is similar to sosend().
 	 */
 	*m = NULL;
-	if (pktlen > NBPG && jumbocl) {
+	if (pktlen > MBIGCLBYTES && jumbocl) {
 		needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen));
 		*m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES);
 	}
 	if (*m == NULL) {
 		needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen));
-		*m = m_getpackets_internal(&needed, 1, how, 0, NBPG);
+		*m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES);
 	}
 
 	/*
@@ -2043,7 +2035,7 @@ alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks,
 	 */
 	if (*m == NULL) {
 		needed = 1;
-		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, NBPG);
+		*m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES);
 	}
 	if (*m == NULL)
 		panic("%s: blocking allocation returned NULL\n", __func__);
@@ -2295,7 +2287,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval)
 
 		if (xfsize != uio_resid(auio))
 			printf("sendfile: xfsize: %lld != uio_resid(auio): "
-			    "%lld\n", xfsize, uio_resid(auio));
+				"%lld\n", xfsize, (long long)uio_resid(auio));
 
 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START),
 		    uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff),
@@ -2385,53 +2377,20 @@ retry_space:
 			}
 			goto retry_space;
 		}
+		
+		struct mbuf *control = NULL;
 		{
 			/*
 			 * Socket filter processing
 			 */
-			struct socket_filter_entry *filter;
-			int filtered = 0;
-			struct mbuf *control = NULL;
-			boolean_t recursive = (so->so_send_filt_thread != NULL);
 
-			error = 0;
-			for (filter = so->so_filt; filter && (error == 0);
-			    filter = filter->sfe_next_onsocket) {
-				if (filter->sfe_filter->sf_filter.sf_data_out) {
-					if (filtered == 0) {
-						filtered = 1;
-						so->so_send_filt_thread =
-						    current_thread();
-						sflt_use(so);
-						socket_unlock(so, 0);
-					}
-					error = filter->sfe_filter->sf_filter.
-					    sf_data_out(filter->sfe_cookie, so,
-					    NULL, &m0, &control, 0);
-				}
-			}
-
-			if (filtered) {
-				/*
-				 * At this point, we've run at least one filter.
-				 * The socket is unlocked as is the socket
-				 * buffer.  Clear the recorded filter thread
-				 * only when we are outside of a filter's
-				 * context.  This allows for a filter to issue
-				 * multiple inject calls from its sf_data_out
-				 * callback routine.
-				 */
-				socket_lock(so, 0);
-				sflt_unuse(so);
-				if (!recursive)
-					so->so_send_filt_thread = 0;
-				if (error) {
-					if (error == EJUSTRETURN) {
-						error = 0;
-						continue;
-					}
-					goto done3;
+			error = sflt_data_out(so, NULL, &m0, &control, 0);
+			if (error) {
+				if (error == EJUSTRETURN) {
+					error = 0;
+					continue;
 				}
+				goto done3;
 			}
 			/*
 			 * End Socket filter processing
@@ -2440,7 +2399,7 @@ retry_space:
 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
 		    uap->s, 0, 0, 0, 0);
 		error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0,
-		    0, 0, p);
+		    0, control, p);
 		KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START),
 		    uap->s, 0, 0, 0, 0);
 		if (error) {
diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c
index 202f2d858..c64053a2c 100644
--- a/bsd/kern/uipc_usrreq.c
+++ b/bsd/kern/uipc_usrreq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -117,6 +117,32 @@ static  u_int                   disconnect_in_progress;
 extern lck_mtx_t *uipc_lock;
 static	struct unp_head unp_shead, unp_dhead;
 
+/*
+ * mDNSResponder tracing.  When enabled, endpoints connected to
+ * /var/run/mDNSResponder will be traced; during each send on
+ * the traced socket, we log the PID and process name of the
+ * sending process.  We also print out a bit of info related
+ * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h
+ * of mDNSResponder stays the same.
+ */
+#define	MDNSRESPONDER_PATH	"/var/run/mDNSResponder"
+
+static int unpst_tracemdns;	/* enable tracing */
+
+#define	MDNS_IPC_MSG_HDR_VERSION_1	1
+
+struct mdns_ipc_msg_hdr {
+	uint32_t version;
+	uint32_t datalen;
+	uint32_t ipc_flags;
+	uint32_t op;
+	union {
+		void *context;
+		uint32_t u32[2];
+	} __attribute__((packed));
+	uint32_t reg_index;
+} __attribute__((packed));
+
 /*
  * Unix communications domain.
  *
@@ -271,7 +297,7 @@ uipc_detach(struct socket *so)
 	if (unp == 0)
 		return (EINVAL);
 
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 	unp_detach(unp);
 	return (0);
 }
@@ -428,7 +454,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 		}
 
 		so2 = unp->unp_conn->unp_socket;
-		unp_get_locks_in_order(so, so2);
+		if (so != so2)
+			unp_get_locks_in_order(so, so2);
 
 		if (unp->unp_addr)
 			from = (struct sockaddr *)unp->unp_addr;
@@ -450,7 +477,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 			control = NULL;
 		}
 
-		socket_unlock(so2, 1);
+		if (so != so2) 
+			socket_unlock(so2, 1);
 
 		m = NULL;
 		if (nam)
@@ -498,6 +526,16 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
 			break;
 		}	
 
+		if (unp->unp_flags & UNP_TRACE_MDNS) {
+			struct mdns_ipc_msg_hdr hdr;
+
+			if (mbuf_copydata(m, 0, sizeof (hdr), &hdr) == 0 &&
+			    hdr.version  == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {
+				printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",
+				    __func__, p->p_pid, p->p_comm, ntohl(hdr.op));
+			}
+		}
+
 		/*
 		 * Send to paired receive port, and then reduce send buffer
 		 * hiwater marks to maintain backpressure.  Wake up readers.
@@ -694,17 +732,19 @@ static int	unp_rights;			/* file descriptors in flight */
 static int	unp_disposed;			/* discarded file descriptors */
 
 SYSCTL_DECL(_net_local_stream);
-SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
+SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,
    &unpst_sendspace, 0, "");
-SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
+SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
    &unpst_recvspace, 0, "");
+SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,
+   &unpst_tracemdns, 0, "");
 SYSCTL_DECL(_net_local_dgram);
-SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
+SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
    &unpdg_sendspace, 0, "");
-SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
+SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
    &unpdg_recvspace, 0, "");
 SYSCTL_DECL(_net_local);
-SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
+SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");
 
 /*
  * Returns:	0			Success
@@ -739,11 +779,8 @@ unp_attach(struct socket *so)
 		return (ENOBUFS);
 	bzero(unp, sizeof (*unp));
 
-	unp->unp_mtx = lck_mtx_alloc_init(unp_mtx_grp, unp_mtx_attr);
-	if (unp->unp_mtx == NULL) {
-		zfree(unp_zone, unp);
-		return(ENOBUFS);
-	}
+	lck_mtx_init(&unp->unp_mtx, 
+		unp_mtx_grp, unp_mtx_attr);
 
 	lck_rw_lock_exclusive(unp_list_mtx);
 	LIST_INIT(&unp->unp_refs);
@@ -892,7 +929,7 @@ unp_bind(
 	socket_unlock(so, 0);
 
 	strlcpy(buf, soun->sun_path, namelen+1);
-	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
+	NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
 	    CAST_USER_ADDR_T(buf), ctx);
 	/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
@@ -938,7 +975,7 @@ unp_bind(
 
 	if (!error) {
 		/* create the socket */
-		error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
+		error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);
 	}
 
 	nameidone(&nd);
@@ -1001,7 +1038,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 	strlcpy(buf, soun->sun_path, len+1);
 	socket_unlock(so, 0);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 	    CAST_USER_ADDR_T(buf), ctx);
 	error = namei(&nd);
 	if (error) {
@@ -1046,8 +1083,13 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 
 	if (so2->so_pcb == NULL) {
 		error = ECONNREFUSED;
-		socket_unlock(so2, 1);
-		socket_lock(so, 0);
+		if (so != so2) {
+			socket_unlock(so2, 1);
+			socket_lock(so, 0);
+		} else {
+			/* Release the reference held for the listen socket */
+			so2->so_usecount--;
+		}
 		goto out;
 	}
 
@@ -1055,7 +1097,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 		socket_unlock(so2, 0);
 		socket_lock(so, 0);
 		socket_lock(so2, 0);
-	} else {
+	} else if (so > so2) {
 		socket_lock(so, 0);
 	}
 	/*
@@ -1064,15 +1106,13 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 	 * XXX - probably shouldn't return an error for SOCK_DGRAM
 	 */
 	if ((so->so_state & SS_ISCONNECTED) != 0) {
-		socket_unlock(so2, 1);
 		error = EISCONN;
-		goto out;
+		goto decref_out;
 	}
 
 	if (so->so_type != so2->so_type) {
-		socket_unlock(so2, 1);
 		error = EPROTOTYPE;
-		goto out;
+		goto decref_out;
 	}
 
 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
@@ -1149,19 +1189,41 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)
 		socket_lock(so3, 1);
 		so2 = so3;
 
+		/*
+		 * Enable tracing for mDNSResponder endpoints.  (The use
+		 * of sizeof instead of strlen below takes the null
+		 * terminating character into account.)
+		 */
+		if (unpst_tracemdns &&
+		    !strncmp(soun->sun_path, MDNSRESPONDER_PATH,
+		    sizeof (MDNSRESPONDER_PATH))) {
+			unp->unp_flags |= UNP_TRACE_MDNS;
+			unp2->unp_flags |= UNP_TRACE_MDNS;
+		}
 	}
 	
 	error = unp_connect2(so, so2);
+
+decref_out:
 	if (so2 != NULL) {
-		socket_unlock(so2, 1);
+		if (so != so2) {
+			socket_unlock(so2, 1);
+		} else {
+			/* Release the extra reference held for the listen socket.
+			 * This is possible only for SOCK_DGRAM sockets. We refuse
+			 * connecting to the same socket for SOCK_STREAM sockets.
+			 */
+			so2->so_usecount--;
+		}
 	}
 
 	if (list_so != NULL) {
 		socket_lock(list_so, 0);
 		socket_unlock(list_so, 1);
 	}
+
 out:
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 	vnode_put(vp);
 	return (error);
 }
@@ -1182,8 +1244,8 @@ unp_connect2(struct socket *so, struct socket *so2)
 
 	unp2 = sotounpcb(so2);
 
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
-	lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
 
 	/* Verify both sockets are still opened */
 	if (unp == 0 || unp2 == 0)
@@ -1197,15 +1259,18 @@ unp_connect2(struct socket *so, struct socket *so2)
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 
-		
-		/* Avoid lock order reversals due to drop/acquire in soisconnected. */
- 		/* Keep an extra reference on so2 that will be dropped
-		 * soon after getting the locks in order 
-		 */ 
-		socket_unlock(so2, 0);
-		soisconnected(so);
-		unp_get_locks_in_order(so, so2);
-		so2->so_usecount--;
+		if (so != so2) {	
+			/* Avoid lock order reversals due to drop/acquire in soisconnected. */
+ 			/* Keep an extra reference on so2 that will be dropped
+			 * soon after getting the locks in order 
+			 */ 
+			socket_unlock(so2, 0);
+			soisconnected(so);
+			unp_get_locks_in_order(so, so2);
+			so2->so_usecount--;
+		} else {
+			soisconnected(so);
+		}
 
 		break;
 
@@ -1242,8 +1307,8 @@ unp_connect2(struct socket *so, struct socket *so2)
 	default:
 		panic("unknown socket type %d in unp_connect2", so->so_type);
 	}
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
-	lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
 	return (0);
 }
 
@@ -1284,7 +1349,12 @@ unp_disconnect(struct unpcb *unp)
 	so2 = unp2->unp_socket;
 
 try_again:
-	if (so < so2) {
+	if (so == so2) {
+		if (so_locked == 0) {
+			socket_lock(so, 0);
+		}
+		waitso = so;
+	} else if (so < so2) {
 		if (so_locked == 0) {
 			socket_lock(so, 0);
 		}
@@ -1298,19 +1368,22 @@ try_again:
 		socket_lock(so, 0);
 		waitso = so;
 	}
+	so_locked = 1;
 
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
-	lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);
 
 	/* Check for the UNP_DONTDISCONNECT flag, if it
 	 * is set, release both sockets and go to sleep
 	 */
 	
 	if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {
-		socket_unlock(so2, 1);
+		if (so != so2) {
+			socket_unlock(so2, 1);
+		}
 		so_locked = 0;
 
-		(void)msleep(waitso->so_pcb, unp->unp_mtx, 
+		(void)msleep(waitso->so_pcb, &unp->unp_mtx, 
 			PSOCK | PDROP, "unpdisconnect", NULL);
 		goto try_again;
 	}
@@ -1322,12 +1395,16 @@ try_again:
 	unp->unp_conn = NULL;
 	so2->so_usecount--;
 
+	if (unp->unp_flags & UNP_TRACE_MDNS)
+		unp->unp_flags &= ~UNP_TRACE_MDNS;
+
 	switch (unp->unp_socket->so_type) {
 
 	case SOCK_DGRAM:
 		LIST_REMOVE(unp, unp_reflink);
 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
-		socket_unlock(so2, 1);
+		if (so != so2)
+			socket_unlock(so2, 1);
 		break;
 
 	case SOCK_STREAM:
@@ -1343,6 +1420,10 @@ try_again:
 
 		unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 		unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
+
+		if (unp2->unp_flags & UNP_TRACE_MDNS)
+			unp2->unp_flags &= ~UNP_TRACE_MDNS;
+
 		strdisconn = 1;
 		break;
 	default:
@@ -1362,7 +1443,7 @@ out:
 		socket_lock(so,0);
 		soisdisconnected(so);
 	}
-	lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);
 	return;
 }
 
@@ -1519,10 +1600,10 @@ unp_pcblist SYSCTL_HANDLER_ARGS
 	return (error);
 }
 
-SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
+SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
             "List of active local datagram sockets");
-SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
+SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
             "List of active local stream sockets");
 
@@ -1662,10 +1743,10 @@ unp_pcblist64 SYSCTL_HANDLER_ARGS
 	return (error);
 }
 
-SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD,
+SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
 	    (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",
 	    "List of active local datagram sockets 64 bit");
-SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD,
+SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED,
 	    (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",
 	    "List of active local stream sockets 64 bit");
 
@@ -2195,7 +2276,7 @@ unp_lock(struct socket *so, int refcount, void * lr)
         else lr_saved = lr;
 
         if (so->so_pcb) {
-                lck_mtx_lock(((struct unpcb *)so->so_pcb)->unp_mtx);
+                lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);
         } else  {
                 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n", 
 			so, lr_saved, so->so_usecount);
@@ -2232,7 +2313,7 @@ unp_unlock(struct socket *so, int refcount, void * lr)
         if (so->so_pcb == NULL) {
                 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);
         } else {
-                mutex_held = ((struct unpcb *)so->so_pcb)->unp_mtx;
+                mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;
         }
         lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
         so->unlock_lr[so->next_unlock_lr] = lr_saved;
@@ -2245,8 +2326,6 @@ unp_unlock(struct socket *so, int refcount, void * lr)
 			FREE(unp->unp_addr, M_SONAME);
 		
 		lck_mtx_unlock(mutex_held);
-		if (unp->unp_mtx)
-			lck_mtx_free(unp->unp_mtx, unp_mtx_grp);
 
 		unp->unp_gencnt = ++unp_gencnt;
 		zfree(unp_zone, unp);
@@ -2269,7 +2348,7 @@ unp_getlock(struct socket *so, __unused int locktype)
         if (so->so_pcb)  {
                 if (so->so_usecount < 0)
                         panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);
-                return(unp->unp_mtx);
+                return(&unp->unp_mtx);
         } else {
                 panic("unp_getlock: so=%p NULL so_pcb\n", so);
                 return (so->so_proto->pr_domain->dom_mtx);
diff --git a/bsd/kern/vm_pressure.c b/bsd/kern/vm_pressure.c
new file mode 100644
index 000000000..b5fc2f072
--- /dev/null
+++ b/bsd/kern/vm_pressure.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <libkern/libkern.h>
+#include <mach/mach_types.h>
+#include <mach/task.h>
+#include <sys/proc_internal.h>
+#include <sys/event.h>
+#include <sys/eventvar.h>
+#include <kern/locks.h>
+#include <sys/queue.h>
+#include <kern/vm_pressure.h>
+#include <sys/malloc.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+void vm_pressure_klist_lock(void);
+void vm_pressure_klist_unlock(void);
+
+void vm_dispatch_memory_pressure(void);
+int vm_try_terminate_candidates(void);
+int vm_try_pressure_candidates(void);
+void vm_recharge_active_list(void);
+
+struct klist vm_pressure_klist;
+struct klist vm_pressure_klist_dormant;
+
+void vm_pressure_klist_lock(void) {
+	lck_mtx_lock(&vm_pressure_klist_mutex);
+}
+
+void vm_pressure_klist_unlock(void) {
+	lck_mtx_unlock(&vm_pressure_klist_mutex);
+}
+
+int vm_knote_register(struct knote *kn) {
+	int rv = 0;
+	
+	vm_pressure_klist_lock();
+	
+	if ((kn->kn_sfflags & (NOTE_VM_PRESSURE))) {
+#if DEBUG
+		printf("[vm_pressure] process %d registering pressure notification\n", kn->kn_kq->kq_p->p_pid);
+#endif
+		KNOTE_ATTACH(&vm_pressure_klist, kn);
+	} else
+		rv = ENOTSUP;
+	
+	vm_pressure_klist_unlock();
+	
+	return rv;
+}
+
+void vm_knote_unregister(struct knote *kn) {
+	struct knote *kn_temp;
+	
+	vm_pressure_klist_lock();
+	
+#if DEBUG
+	printf("[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
+#endif
+	
+	SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) {
+		if (kn_temp == kn) {
+			KNOTE_DETACH(&vm_pressure_klist, kn);
+			vm_pressure_klist_unlock();
+			return;
+		}
+	}
+	KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
+	
+	vm_pressure_klist_unlock();
+}
+
+/* Interface for event dispatch from vm_pageout_garbage_collect thread */
+void consider_pressure_events(void) {
+	vm_dispatch_memory_pressure();
+}
+
+void vm_dispatch_memory_pressure(void) {	
+	vm_pressure_klist_lock();
+	
+	if (!SLIST_EMPTY(&vm_pressure_klist)) {
+		
+#if DEBUG
+		printf("[vm_pressure] vm_dispatch_memory_pressure\n");
+#endif
+		
+		if (vm_try_pressure_candidates()) {
+			vm_pressure_klist_unlock();
+			return;
+		}
+		
+	}
+	
+	/* Else... */
+	
+#if DEBUG
+	printf("[vm_pressure] could not find suitable event candidate\n");
+#endif
+	
+	vm_recharge_active_list();
+	
+	vm_pressure_klist_unlock();
+}
+
+/*
+ * Try standard pressure event candidates.  Called with klist lock held.
+ */
+int vm_try_pressure_candidates(void) {
+	/* 
+	 * This value is the threshold that a process must meet to be considered for scavenging.
+	 * If a process has sufficiently little resident memory, there is probably no use scavenging it.
+	 * At best, we'll scavenge very little memory.  At worst, we'll page in code pages or malloc metadata.
+	 */
+	
+#define VM_PRESSURE_MINIMUM_RSIZE	(10 * 1024 * 1024)
+	
+	struct proc *p_max = NULL;
+	unsigned int resident_max = 0;
+	struct knote *kn_max = NULL;
+	struct knote *kn;
+	
+	SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
+		if ( (kn != NULL ) && ( kn->kn_kq != NULL ) && ( kn->kn_kq->kq_p != NULL ) ) {
+			if (kn->kn_sfflags & NOTE_VM_PRESSURE) {
+				struct proc *p = kn->kn_kq->kq_p;
+				if (!(kn->kn_status & KN_DISABLED)) {
+					kern_return_t kr = KERN_SUCCESS;
+					struct task *t = (struct task *)(p->task);
+					struct task_basic_info basic_info;
+					mach_msg_type_number_t size = TASK_BASIC_INFO_COUNT;
+					if( ( kr = task_info(t, TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) == KERN_SUCCESS ) {
+						unsigned int resident_size = basic_info.resident_size;
+						/* 
+						 * We don't want a small process to block large processes from 
+						 * being notified again.  <rdar://problem/7955532>
+						 */						
+						if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
+							if (resident_size > resident_max) {
+								p_max = p;
+								resident_max = resident_size;
+								kn_max = kn;
+							}
+						} else {
+#if DEBUG
+							/* There was no candidate with enough resident memory to scavenge */
+							/* This debug print makes too much noise now */
+							//printf("[vm_pressure] threshold failed for pid %d with %u resident, skipping...\n", p->p_pid, resident_size);
+#endif
+						}
+					} else {
+#if DEBUG
+						printf("[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr);
+#endif
+					}
+				} else {
+#if DEBUG
+					printf("[vm_pressure] pid %d currently disabled, skipping...\n", p->p_pid);
+#endif
+				}
+			}
+		} else {
+#if DEBUG
+			if (kn == NULL) {
+				printf("[vm_pressure] kn is NULL\n");
+			} else if (kn->kn_kq == NULL) {
+				printf("[vm_pressure] kn->kn_kq is NULL\n");
+			} else if (kn->kn_kq->kq_p == NULL) {
+				printf("[vm_pressure] kn->kn_kq->kq_p is NULL\n");
+			}
+#endif
+		}
+	}
+	
+	if (kn_max == NULL) return 0;
+
+#if DEBUG
+	printf("[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
+#endif
+
+	KNOTE_DETACH(&vm_pressure_klist, kn_max);
+	struct klist dispatch_klist = { NULL };
+	KNOTE_ATTACH(&dispatch_klist, kn_max);
+	KNOTE(&dispatch_klist, NOTE_VM_PRESSURE);
+	KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
+	
+	return 1;
+}
+
+
+/*
+ * Remove all elements from the dormant list and place them on the active list.
+ * Called with klist lock held.
+ */
+void vm_recharge_active_list(void) {
+	/* Re-charge the main list from the dormant list if possible */
+	if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
+#if DEBUG
+		printf("[vm_pressure] recharging main list from dormant list\n");
+#endif	
+		struct knote *kn;
+		while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
+			kn = SLIST_FIRST(&vm_pressure_klist_dormant);
+			SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);
+			SLIST_INSERT_HEAD(&vm_pressure_klist, kn, kn_selnext);
+		}
+	}
+}
diff --git a/bsd/ppc/reg.h b/bsd/kern/vm_pressure.h
similarity index 80%
rename from bsd/ppc/reg.h
rename to bsd/kern/vm_pressure.h
index 0449be6df..8063c820a 100644
--- a/bsd/ppc/reg.h
+++ b/bsd/kern/vm_pressure.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,21 +25,17 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright 1993, NeXT Computer, Inc.
- */
-
-#ifndef _BSD_PPC_REG_H_
-#define _BSD_PPC_REG_H_
 
+#ifndef VM_PRESSURE_H
+#define VM_PRESSURE_H
 
-#ifdef	BSD_KERNEL_PRIVATE
+#include <sys/queue.h>
 
-/* Index into the thread_state */
-#define SP	3
-#define PC 	0
+static lck_mtx_t vm_pressure_klist_mutex;
 
-#endif /* KERNEL_PRIVATE */
+int vm_knote_register(struct knote *);
+void vm_knote_unregister(struct knote *);
 
-#endif /* _BSD_PPC_REG_H_ */
+void consider_pressure_events(void);
 
+#endif /* VM_PRESSURE_H */
diff --git a/bsd/libkern/libkern.h b/bsd/libkern/libkern.h
index 6fd1f7a86..0d9cff919 100644
--- a/bsd/libkern/libkern.h
+++ b/bsd/libkern/libkern.h
@@ -203,16 +203,7 @@ extern void flush_dcache64(addr64_t, unsigned, int);
 static __inline__ unsigned int
 clz(unsigned int num)
 {
-#if __ppc__
-	unsigned int result;
-	__asm__ volatile(
-		"cntlzw %0, %1"
-		: "=r" (result)
-		: "r" (num)
-	);
-	return result;
-
-#elif __i386__
+#if __i386__
 	unsigned int result;
 	__asm__ volatile(
 		"bsrl   %1, %0\n\t"
diff --git a/bsd/machine/_limits.h b/bsd/machine/_limits.h
index dd32b6197..c1d8abd07 100644
--- a/bsd/machine/_limits.h
+++ b/bsd/machine/_limits.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE__LIMITS_H_
 #define _BSD_MACHINE__LIMITS_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/_limits.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/_limits.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/_param.h b/bsd/machine/_param.h
index 844370744..beb2cb939 100644
--- a/bsd/machine/_param.h
+++ b/bsd/machine/_param.h
@@ -25,9 +25,7 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/_param.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/_param.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/_structs.h b/bsd/machine/_structs.h
index a0e15996e..509d5f618 100644
--- a/bsd/machine/_structs.h
+++ b/bsd/machine/_structs.h
@@ -25,9 +25,7 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/_structs.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/_structs.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/_types.h b/bsd/machine/_types.h
index ceac56ea0..92c65bf6c 100644
--- a/bsd/machine/_types.h
+++ b/bsd/machine/_types.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE__TYPES_H_
 #define _BSD_MACHINE__TYPES_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/_types.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/_types.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/dis_tables.h b/bsd/machine/dis_tables.h
index 6eaff8106..7ac37dd7e 100644
--- a/bsd/machine/dis_tables.h
+++ b/bsd/machine/dis_tables.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_DIS_TABLES_H_
 #define _BSD_MACHINE_DIS_TABLES_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/dis_tables.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/dis_tables.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/disklabel.h b/bsd/machine/disklabel.h
index 93fa986ed..490bbda8a 100644
--- a/bsd/machine/disklabel.h
+++ b/bsd/machine/disklabel.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_CPU_H_
 #define _BSD_MACHINE_CPU_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/disklabel.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/disklabel.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/endian.h b/bsd/machine/endian.h
index 879cf17bd..871af6483 100644
--- a/bsd/machine/endian.h
+++ b/bsd/machine/endian.h
@@ -31,9 +31,7 @@
 #ifndef _BSD_MACHINE_ENDIAN_H_
 #define _BSD_MACHINE_ENDIAN_H_
 
-#if defined (__ppc__) || defined(__ppc64__)
-#include "ppc/endian.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/endian.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/exec.h b/bsd/machine/exec.h
index fc8a27279..1a6417179 100644
--- a/bsd/machine/exec.h
+++ b/bsd/machine/exec.h
@@ -44,16 +44,14 @@ struct exec_info {
 struct exec_archhandler {
 	char path[MAXPATHLEN];
 	uint32_t fsid;
-	long fileid;
+	uint64_t fileid;
 };
 
 extern struct exec_archhandler exec_archhandler_ppc;
 int set_archhandler(struct proc *, int);
 int grade_binary(cpu_type_t, cpu_subtype_t);
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/exec.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/exec.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/fasttrap_isa.h b/bsd/machine/fasttrap_isa.h
index d57bac1ba..cfe9e297a 100644
--- a/bsd/machine/fasttrap_isa.h
+++ b/bsd/machine/fasttrap_isa.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_FASTTRAP_ISA_H_
 #define _BSD_MACHINE_FASTTRAP_ISA_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/fasttrap_isa.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/fasttrap_isa.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/limits.h b/bsd/machine/limits.h
index 0f40842f9..e96709f89 100644
--- a/bsd/machine/limits.h
+++ b/bsd/machine/limits.h
@@ -2,9 +2,7 @@
    compiler.  GCC provides its own limits.h which can be found in
    /usr/lib/gcc, although it is not very informative.  
    This file is public domain.  */
-#if defined (__ppc__) || defined (__ppc64__)
-#include <ppc/limits.h>
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include <i386/limits.h>
 #else
 #error architecture not supported
diff --git a/bsd/machine/param.h b/bsd/machine/param.h
index 6253a5fb4..2724da7e1 100644
--- a/bsd/machine/param.h
+++ b/bsd/machine/param.h
@@ -31,9 +31,7 @@
 #ifndef _BSD_MACHINE_PARAM_H_
 #define _BSD_MACHINE_PARAM_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/param.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/param.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/profile.h b/bsd/machine/profile.h
index ea28264c6..cc8a5eac0 100644
--- a/bsd/machine/profile.h
+++ b/bsd/machine/profile.h
@@ -33,9 +33,7 @@
 #ifndef _BSD_MACHINE_PROFILE_H_
 #define _BSD_MACHINE_PROFILE_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/profile.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/profile.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/psl.h b/bsd/machine/psl.h
index 711639e4f..01c6e0a25 100644
--- a/bsd/machine/psl.h
+++ b/bsd/machine/psl.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_PSL_H_
 #define _BSD_MACHINE_PSL_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/psl.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/psl.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/ptrace.h b/bsd/machine/ptrace.h
index 031327fe4..3320c2226 100644
--- a/bsd/machine/ptrace.h
+++ b/bsd/machine/ptrace.h
@@ -31,9 +31,7 @@
 #ifndef _BSD_MACHINE_PTRACE_H_
 #define _BSD_MACHINE_PTRACE_H_
 
-#if defined (__ppc__) || defined(__ppc64__)
-#include "ppc/ptrace.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/ptrace.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/reboot.h b/bsd/machine/reboot.h
index cf91c27da..864f1970c 100644
--- a/bsd/machine/reboot.h
+++ b/bsd/machine/reboot.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_REBOOT_H_
 #define _BSD_MACHINE_REBOOT_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/reboot.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/reboot.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/reg.h b/bsd/machine/reg.h
index 95ec0f7d1..30e5dc524 100644
--- a/bsd/machine/reg.h
+++ b/bsd/machine/reg.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_REG_H_
 #define _BSD_MACHINE_REG_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/reg.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/reg.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/setjmp.h b/bsd/machine/setjmp.h
index 4f37be1d3..262acfbc8 100644
--- a/bsd/machine/setjmp.h
+++ b/bsd/machine/setjmp.h
@@ -31,9 +31,7 @@
 #ifndef	_MACHINE_SETJMP_H_
 #define	_MACHINE_SETJMP_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/setjmp.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/setjmp.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/signal.h b/bsd/machine/signal.h
index 227d4182e..4b7f69c19 100644
--- a/bsd/machine/signal.h
+++ b/bsd/machine/signal.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_SIGNAL_H_
 #define _BSD_MACHINE_SIGNAL_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/signal.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/signal.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/types.h b/bsd/machine/types.h
index ed113ddde..5d6d4db44 100644
--- a/bsd/machine/types.h
+++ b/bsd/machine/types.h
@@ -31,9 +31,7 @@
 #ifndef _BSD_MACHINE_TYPES_H_
 #define _BSD_MACHINE_TYPES_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/types.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/types.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/ucontext.h b/bsd/machine/ucontext.h
index a0e91489a..60e157643 100644
--- a/bsd/machine/ucontext.h
+++ b/bsd/machine/ucontext.h
@@ -28,9 +28,7 @@
 #ifndef	_MACHINE_UCONTEXT_H_
 #define	_MACHINE_UCONTEXT_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/ucontext.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/ucontext.h"
 #else
 #error architecture not supported
diff --git a/bsd/machine/vmparam.h b/bsd/machine/vmparam.h
index 8911ea054..54b212382 100644
--- a/bsd/machine/vmparam.h
+++ b/bsd/machine/vmparam.h
@@ -28,9 +28,7 @@
 #ifndef _BSD_MACHINE_VMPARAM_H_
 #define _BSD_MACHINE_VMPARAM_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "ppc/vmparam.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/vmparam.h"
 #else
 #error architecture not supported
diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile
index 12cc26329..93d247a32 100644
--- a/bsd/man/man2/Makefile
+++ b/bsd/man/man2/Makefile
@@ -67,6 +67,7 @@ DATAFILES = \
 	getauid.2		\
 	getdirentries.2		\
 	getdirentriesattr.2	\
+	getdtablesize.2		\
 	getegid.2		\
 	geteuid.2		\
 	getfh.2			\
@@ -145,6 +146,11 @@ DATAFILES = \
 	rmdir.2			\
 	searchfs.2		\
 	select.2		\
+	sem_close.2		\
+	sem_open.2		\
+	sem_post.2		\
+	sem_unlink.2		\
+	sem_wait.2		\
 	semctl.2		\
 	semget.2		\
 	semop.2			\
@@ -166,12 +172,16 @@ DATAFILES = \
 	setpgid.2		\
 	setpgrp.2		\
 	setpriority.2		\
+	setregid.2		\
+	setreuid.2		\
 	setrlimit.2		\
 	setsid.2		\
 	setsockopt.2		\
 	settimeofday.2		\
 	setuid.2		\
 	setxattr.2		\
+	shm_open.2		\
+	shm_unlink.2		\
 	shmat.2			\
 	shmctl.2		\
 	shmdt.2			\
@@ -194,6 +204,7 @@ DATAFILES = \
 	syscall.2		\
 	truncate.2		\
 	umask.2			\
+	undelete.2		\
 	unlink.2		\
 	unmount.2		\
 	utimes.2		\
@@ -207,7 +218,8 @@ DATAFILES = \
 	posix_spawn.2
 
 INSTALL_MAN_LINKS = \
-	posix_spawn.2 posix_spawnp.2
+	posix_spawn.2 posix_spawnp.2	\
+	sem_wait.2 sem_trywait.2
 
 INSTALL_MAN_LIST = ${DATAFILES}
 
diff --git a/bsd/man/man2/auditon.2 b/bsd/man/man2/auditon.2
index bf37e6ab4..4d551ba7a 100644
--- a/bsd/man/man2/auditon.2
+++ b/bsd/man/man2/auditon.2
@@ -243,6 +243,15 @@ structure with the
 field set to the maximum audit log file size.
 A value of 0
 indicates no limit to the size.
+.It Dv A_SETSFLAGS
+Set the audit sessions flags for the current session.
+The
+.Fa data
+argument must point to an
+.Vt au_asflgs_t
+value containing the new audit session flags.
+Audit session flags may be updated only according to local
+access control policy.
 .It Dv A_GETCLASS
 Return the event to class mapping for the designated audit event.
 The
@@ -376,6 +385,13 @@ The
 .Va af_currsz
 field
 will be set to the current audit log file size.
+.It Dv A_GETSFLAGS
+Returns the audit session flags for the current session.
+The
+.Fa data
+argument must point to an
+.Vt au_asflgs_t
+value which will be set with the current session flags.
 .It Dv A_GETCWD
 .\" [COMMENTED OUT]: Valid description, not yet implemented.
 .\" Return the current working directory as stored in the audit subsystem.
diff --git a/bsd/man/man2/dup.2 b/bsd/man/man2/dup.2
index c13ca0bb5..897966a52 100644
--- a/bsd/man/man2/dup.2
+++ b/bsd/man/man2/dup.2
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)dup.2	8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd December 1, 2010
 .Dt DUP 2
 .Os BSD 4
 .Sh NAME
@@ -100,8 +100,18 @@ In
 the value of the new descriptor
 .Fa fildes2
 is specified.
-If this descriptor is already in use,
-the descriptor is first deallocated as if a
+If
+.Fa fildes
+and
+.Fa fildes2
+are equal, then
+.Fn dup2 
+just returns
+.Fa fildes2 ;
+no other changes are made to the existing descriptor.
+Otherwise, if descriptor
+.Fa fildes2
+is already in use, it is first deallocated as if a
 .Xr close 2
 call had been done first.
 .Sh RETURN VALUES
diff --git a/bsd/man/man2/exchangedata.2 b/bsd/man/man2/exchangedata.2
index cc2111ea4..83dc23c1b 100644
--- a/bsd/man/man2/exchangedata.2
+++ b/bsd/man/man2/exchangedata.2
@@ -24,8 +24,9 @@
 .Nd atomically exchange data between two files
 .Sh SYNOPSIS
 .Fd #include <unistd.h>
+.Fd #include <sys/attr.h>
 .Ft int
-.Fn exchangedata "const char * path1" "const char * path2" "unsigned long options"
+.Fn exchangedata "const char * path1" "const char * path2" "unsigned int options"
 .
 .Sh DESCRIPTION
 The
diff --git a/bsd/man/man2/fcntl.2 b/bsd/man/man2/fcntl.2
index d6d1ce8cd..b53a38be1 100644
--- a/bsd/man/man2/fcntl.2
+++ b/bsd/man/man2/fcntl.2
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2008 Apple Inc.  All rights reserved.
+.\" Copyright (c) 2011 Apple Inc.  All rights reserved.
 .\"
 .\" @APPLE_LICENSE_HEADER_START@
 .\" 
@@ -56,7 +56,7 @@
 .\"
 .\"     @(#)fcntl.2	8.2 (Berkeley) 1/12/94
 .\"
-.Dd October 2, 2008
+.Dd February 17, 2011
 .Dt FCNTL 2
 .Os BSD 4.2
 .Sh NAME
@@ -98,26 +98,24 @@ Same file status flags (i.e., both file descriptors
 share the same file status flags).
 .It
 The close-on-exec flag associated with the new file descriptor
-is set to remain open across
+is cleared so that the descriptor remains open across an
 .Xr execv 2
-system calls.
+system call.
 .El
+.It Dv F_DUPFD_CLOEXEC
+Like
+.Dv F_DUPFD ,
+except that the close-on-exec flag associated with the new file descriptor
+is set.
 .It Dv F_GETFD
-Get the close-on-exec flag associated with the file descriptor
-.Fa fildes .
-If the low-order bit of the returned value is 0,
-the file will remain open across
-.Fn exec ,
-otherwise the file will be closed upon execution of
-.Fn exec
+Get the flags associated with the file descriptor
+.Fa fildes ,
+as described below
 .Fa ( arg
 is ignored).
 .It Dv F_SETFD
-Set the close-on-exec flag associated with
-.Fa fildes
-to the low order bit of
-.Fa arg
-(0 or 1 as above).
+Set the file descriptor flags to
+.Fa arg .
 .It Dv F_GETFL
 Get descriptor status flags, as described below
 .Fa ( arg
@@ -187,6 +185,9 @@ Get disk device information.
 Currently this only includes the
 disk device address that corresponds
 to the current file offset.
+.It Dv F_LOG2PHYS_EXT
+Variant of F_LOG2PHYS that uses the passed in
+file offset and length.
 .It Dv F_FULLFSYNC
 Does the same thing as
 .Xr fsync 2
@@ -200,6 +201,43 @@ and Universal Disk Format (UDF) file systems.
 The operation may take quite a while to complete.
 Certain FireWire drives have also been known
 to ignore the request to flush their buffered data.
+.It Dv F_SETNOSIGPIPE
+Determines whether a
+.Dv SIGPIPE
+signal will be generated when a write fails on a pipe or socket for
+which there is no reader.  If
+.Fa arg
+is non-zero,
+.Dv SIGPIPE
+generation is disabled for descriptor
+.Fa fildes ,
+while an
+.Fa arg
+of zero enables it (the default).
+.It Dv F_GETNOSIGPIPE
+Returns whether a
+.Dv SIGPIPE
+signal will be generated when a write fails on a pipe or socket
+for which there is no reader.  The semantics of the return value
+match those of the
+.Fa arg
+of
+.Dv F_SETNOSIGPIPE .
+.El
+.Pp
+The flags for the
+.Dv F_GETFD
+and
+.Dv F_SETFD
+commands are as follows:
+.Bl -tag -width FD_CLOEXECX -offset indent
+.It Dv FD_CLOEXEC
+Close-on-exec; the given file descriptor will be automatically
+closed in the successor process image when one of the
+.Xr execv 2
+or
+.Xr posix_spawn 2
+family of system calls is invoked.
 .El
 .Pp
 The flags for the
@@ -476,15 +514,43 @@ commands operate on the following structure.
 .Pp
 The
 .Dv F_LOG2PHYS
-command operates on the following structure.
+command operates on the following structure:
 .ne 7v
 .Bd -literal
         struct log2phys {
-	    u_int32_t	l2p_flags;		/* unused so far */
-	    off_t	l2p_contigbytes;	/* unused so far */
-	    off_t	l2p_devoffset;	    /* bytes into device */
+            u_int32_t l2p_flags;        /* unused so far */
+            off_t     l2p_contigbytes;  /* unused so far */
+            off_t     l2p_devoffset;    /* bytes into device */
         };
 .Ed
+.Pp
+The
+.Dv F_LOG2PHYS_EXT
+command operates on the same structure as F_LOG2PHYS but treats it as an in/out:
+.ne 7v
+.Bd -literal
+        struct log2phys {
+            u_int32_t l2p_flags;        /* unused so far */
+            off_t     l2p_contigbytes;  /* IN: number of bytes to be queried;
+                                           OUT: number of contiguous bytes allocated at this position */
+            off_t     l2p_devoffset;    /* IN: bytes into file;
+                                           OUT: bytes into device */
+        };
+.Ed
+.Pp
+If
+.Fa fildes
+is a socket, then the
+.Dv F_SETNOSIGPIPE
+and
+.Dv F_GETNOSIGPIPE
+commands are directly analogous, and fully interoperate with the
+.Dv SO_NOSIGPIPE
+option of
+.Xr setsockopt 2
+and
+.Xr getsockopt 2
+respectively.
 .Sh RETURN VALUES
 Upon successful completion, the value returned depends on
 .Fa cmd
@@ -579,6 +645,8 @@ The argument
 .Fa cmd
 is
 .Dv F_LOG2PHYS
+or
+.Dv F_LOG2PHYS_EXT
 and
 .Fa fildes
 is not a valid file descriptor open for reading.
@@ -696,6 +764,9 @@ the process ID given as argument is not in use.
 .Xr flock 2 ,
 .Xr getdtablesize 2 ,
 .Xr open 2 ,
+.Xr pipe 2 ,
+.Xr socket 2 ,
+.Xr setsockopt 2 ,
 .Xr sigaction 3
 .Sh HISTORY
 The
diff --git a/bsd/man/man2/getattrlist.2 b/bsd/man/man2/getattrlist.2
index 856f1a110..e2af8fc60 100644
--- a/bsd/man/man2/getattrlist.2
+++ b/bsd/man/man2/getattrlist.2
@@ -354,8 +354,11 @@ An
 structure containing the name of the file system object as
 UTF-8 encoded, null terminated C string.
 The attribute data length will not be greater than
-.Dv NAME_MAX +
-1.
+.Dv NAME_MAX
++ 1 characters, which is
+.Dv NAME_MAX
+* 3 + 1 bytes (as one UTF-8-encoded character may
+take up to three bytes).
 .Pp
 .
 .It ATTR_CMN_DEVID
@@ -570,6 +573,11 @@ field of the
 .Vt stat
 structure returned by
 .Xr stat 2 .
+Only the permission bits of
+.Fa st_mode
+are valid; other bits should be ignored,
+e.g., by masking with
+.Dv ~S_IFMT .
 .
 .It ATTR_CMN_NAMEDATTRCOUNT
 A
@@ -665,6 +673,13 @@ The attribute data length will not be greater than
 Inconsistent behavior may be observed when this attribute is requested on
 hard-linked items, particularly when the file system does not support ATTR_CMN_PARENTID
 natively. Callers should be aware of this when requesting the full path of a hard-linked item.
+.
+.It ATTR_CMN_ADDEDTIME
+A
+.Vt timespec
+that contains the time that the file system object was created or renamed into
+its containing directory.  Note that inconsistent behavior may obe observed
+when this attribute is requested on hard-linked items. 
 .Pp
 .
 .El
@@ -1288,6 +1303,13 @@ that did not support them.
 .Pp
 Introduced with Darwin 10.0 (Mac OS X version 10.6).
 .
+.It VOL_CAP_FMT_64BIT_OBJECT_IDS
+If this bit is set, the volume format uses object IDs that are 64-bit. 
+This means that ATTR_CMN_FILEID and ATTR_CMN_PARENTID are the only
+legitimate attributes for obtaining object IDs from this volume and the
+32-bit fid_objno fields of the fsobj_id_t returned by ATTR_CMN_OBJID,
+ATTR_CMN_OBJPERMANENTID, and ATTR_CMN_PAROBJID are undefined.
+.
 .El
 .Pp
 .
@@ -1602,6 +1624,10 @@ structure is 64-bits (two 32-bit elements) in 32-bit code, and
 128-bits (two 64-bit elements) in 64-bit code; however, it is aligned
 on a 4-byte (32-bit) boundary, even in 64-bit code.
 .Pp
+If you use a structure
+for the attribute data, it must be correctly packed and aligned (see
+examples).
+.Pp
 .
 Inconsistent behavior may be observed when the ATTR_CMN_FULLPATH attribute is requested on
 hard-linked items, particularly when the file system does not support ATTR_CMN_PARENTID
@@ -1633,7 +1659,7 @@ struct FInfoAttrBuf {
     u_int32_t       length;
     fsobj_type_t    objType;
     char            finderInfo[32];
-};
+}  __attribute__((aligned(4), packed));
 typedef struct FInfoAttrBuf FInfoAttrBuf;
 .Pp
 .
@@ -1700,14 +1726,14 @@ typedef struct attrlist attrlist_t;
 struct FInfo2CommonAttrBuf {
     fsobj_type_t    objType;
     char            finderInfo[32];
-};
+} __attribute__((aligned(4), packed));
 typedef struct FInfo2CommonAttrBuf FInfo2CommonAttrBuf;
 .Pp
 .
 struct FInfo2AttrBuf {
     u_int32_t           length;
     FInfo2CommonAttrBuf common;
-};
+} __attribute__((aligned(4), packed));;
 typedef struct FInfo2AttrBuf FInfo2AttrBuf;
 .Pp
 .
@@ -1790,7 +1816,7 @@ struct VolAttrBuf {
     attrreference_t volNameRef;
     char            mountPointSpace[MAXPATHLEN];
     char            volNameSpace[MAXPATHLEN];
-};
+} __attribute__((aligned(4), packed));
 typedef struct VolAttrBuf VolAttrBuf;
 .Pp
 .
@@ -1843,6 +1869,53 @@ static int VolDemo(const char *path)
 }
 .Ed
 .Pp
+The following sample demonstrates the need to use packing and alignment
+controls; without the attribute, in 64-bit code, the fields of the structure are not
+placed at the locations that the kernel expects.
+.
+.Bd -literal
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <err.h>
+#include <time.h>
+#include <sys/attr.h>
+.Pp
+/* The alignment and packing attribute is necessary in 64-bit code */
+struct AttrListTimes {
+	u_int32_t       length;
+	struct timespec st_crtime;
+	struct timespec st_modtime;
+} __attribute__((aligned(4), packed));
+.Pp
+main(int argc, char **argv)
+{
+	int             rv;
+	int             i;
+.Pp
+	for (i = 1; i < argc; i++) {
+		struct attrlist attrList;
+		struct AttrListTimes myStat = {0};
+		char           *path = argv[i];
+.Pp
+		memset(&attrList, 0, sizeof(attrList));
+		attrList.bitmapcount = ATTR_BIT_MAP_COUNT;
+		attrList.commonattr = ATTR_CMN_CRTIME |
+			ATTR_CMN_MODTIME;
+.Pp
+		rv = getattrlist(path, &attrList, &myStat, sizeof(myStat), 0);
+.Pp
+		if (rv == -1) {
+			warn("getattrlist(%s)", path);
+			continue;
+		}
+		printf("%s:  Modification time = %s", argv[i], ctime(&myStat.st_modtime.tv_sec));
+	}
+	return 0;
+}
+.Ed
+.Pp
 .
 .Sh SEE ALSO
 .
diff --git a/bsd/man/man2/getaudit.2 b/bsd/man/man2/getaudit.2
index 10f84aaf6..d2895cd33 100644
--- a/bsd/man/man2/getaudit.2
+++ b/bsd/man/man2/getaudit.2
@@ -59,7 +59,6 @@ The
 data structure is defined as follows:
 .nf
 .in +4n
-
 struct auditinfo {
 	au_id_t        ai_auid;         /* Audit user ID */
 	au_mask_t      ai_mask;         /* Audit masks */
@@ -74,15 +73,13 @@ The
 .Fa ai_auid
 variable contains the audit identifier which is recorded in the audit log for
 each event the process caused.
-.PP
-
+.Pp
 The
 .Fa au_mask_t
 data structure defines the bit mask for auditing successful and failed events
 out of the predefined list of event classes. It is defined as follows:
 .nf
 .in +4n
-
 struct au_mask {
 	unsigned int    am_success;     /* success bits */
 	unsigned int    am_failure;     /* failure bits */
@@ -90,15 +87,13 @@ struct au_mask {
 typedef struct au_mask  au_mask_t;
 .in
 .fi
-.PP
-
+.Pp
 The
 .Fa au_termid_t
 data structure defines the Terminal ID recorded with every event caused by the
 process. It is defined as follows:
 .nf
 .in +4n
-
 struct au_tid {
 	dev_t           port;
 	u_int32_t       machine;
@@ -106,8 +101,7 @@ struct au_tid {
 typedef struct au_tid   au_tid_t;
 .in
 .fi
-.PP
-
+.Pp
 The
 .Fa ai_asid
 variable contains the audit session ID which is recorded with every event
@@ -122,7 +116,6 @@ data structure supports Terminal IDs with larger addresses such as those used
 in IP version 6.  It is defined as follows:
 .nf
 .in +4n
-
 struct auditinfo_addr {
 	au_id_t         ai_auid;        /* Audit user ID. */
 	au_mask_t       ai_mask;        /* Audit masks. */
@@ -134,14 +127,12 @@ typedef struct auditinfo_addr   auditinfo_addr_t;
 .in
 .fi
 .Pp
-
 The
 .Fa au_tid_addr_t
 data structure which includes a larger address storage field and an additional
 field with the type of address stored:
 .nf
 .in +4n
-
 struct au_tid_addr {
 	dev_t           at_port;
 	u_int32_t       at_type;
diff --git a/bsd/man/man2/getauid.2 b/bsd/man/man2/getauid.2
index a89d98aae..373deb2a0 100644
--- a/bsd/man/man2/getauid.2
+++ b/bsd/man/man2/getauid.2
@@ -25,7 +25,7 @@
 .Os
 .Sh NAME
 .Nm getauid
-.Nd "retrieve audit session ID"
+.Nd "retrieve audit user ID"
 .Sh SYNOPSIS
 .In bsm/audit.h
 .Ft int
@@ -34,7 +34,7 @@
 The
 .Fn getauid
 system call
-retrieves the active audit session ID for the current process via the
+retrieves the active audit user ID for the current process via the
 .Vt au_id_t
 pointed to by
 .Fa auid .
diff --git a/bsd/man/man2/getdirentries.2 b/bsd/man/man2/getdirentries.2
index a77a5d8b6..a513ea8e8 100644
--- a/bsd/man/man2/getdirentries.2
+++ b/bsd/man/man2/getdirentries.2
@@ -67,14 +67,8 @@ with buffers smaller than this size.
 .Pp
 The data in the buffer is a series of
 .Em dirent
-structures each containing the following entries:
-.Bd -literal -offset indent
-u_int32_t	d_fileno;             /* file number of entry */
-u_int16_t	d_reclen;             /* length of this record */
-u_int8_t	d_type;               /* file type, see below */
-u_int8_t	d_namlen;             /* length of string in d_name */
-char    	d_name[MAXNAMELEN + 1]; /* see below */
-.Ed
+structures (see
+.Xr dir 5 )
 .Pp
 The
 .Fa d_fileno
@@ -166,7 +160,11 @@ will not work
 with 64-bit inodes; in order to use
 .Fn getdirentries ,
 .Dv _DARWIN_NO_64_BIT_INODE
-must be defined.
+must be defined.  See
+.Xr stat 2
+for more information on
+.Dv _DARWIN_NO_64_BIT_INODE
+and its other effects.
 .Sh RETURN VALUES
 If successful, the number of bytes actually transferred is returned.
 Otherwise, -1 is returned and the global variable
@@ -193,8 +191,10 @@ error occurred while reading from or writing to the file system.
 .Sh SEE ALSO
 .Xr lseek 2 ,
 .Xr open 2 ,
+.Xr stat 2 ,
 .Xr opendir 3 ,
-.Xr readdir 3
+.Xr readdir 3 ,
+.Xr dir 5
 .Sh HISTORY
 The
 .Fn getdirentries
diff --git a/bsd/man/man2/getdirentriesattr.2 b/bsd/man/man2/getdirentriesattr.2
index a2cc333ff..78a839766 100644
--- a/bsd/man/man2/getdirentriesattr.2
+++ b/bsd/man/man2/getdirentriesattr.2
@@ -163,7 +163,7 @@ However, since the variable is too small to hold an
 .Vt off_t ,
 you should use
 .Xr lseek 2
-to get the directoy's current position instead of using this parameter.
+to get the directory's current position instead of using this parameter.
 The initial value of the variable is ignored.
 .Pp
 .
@@ -245,6 +245,16 @@ If you're implementing a volume format that supports
 .Fn getdirentriesattr ,
 you should be careful to support the behaviour specified by this document.
 .
+.Pp
+If the directory contains a mount point, then
+.Dv DIR_MNTSTATUS_MNTPOINT
+will be set in the
+.Dv ATTR_DIR_MOUNTSTATUS
+for that entry; all other attributes for that entry, however,
+will be for the underlying file system (as opposed to the mounted
+file system).
+.Xr getattrlist 2
+should be used to get the attributes for the mount point.
 .Sh ERRORS
 .Fn getdirentriesattr
 will fail if:
@@ -315,7 +325,8 @@ struct FInfoAttrBuf {
     attrreference_t name;
     fsobj_type_t    objType;
     char            finderInfo[32];
-};
+    u_int32_t       dirStatus;
+} __attribute__((aligned(4), packed));
 typedef struct FInfoAttrBuf FInfoAttrBuf;
 .Pp
 .
@@ -358,6 +369,7 @@ static int FInfoDemo(const char *dirPath)
     attrList.commonattr  =    ATTR_CMN_NAME 
                             | ATTR_CMN_OBJTYPE 
                             | ATTR_CMN_FNDRINFO;
+    attrList.dirattr     = ATTR_DIR_MOUNTSTATUS;
 .Pp
     
     err = 0;
@@ -411,7 +423,10 @@ static int FInfoDemo(const char *dirPath)
                             );
                             break;
                         case VDIR:
-                            printf("directory     ");
+                            if (thisEntry->dirStatus & DIR_MNTSTATUS_MNTPOINT)
+                                printf("mount-point   ");
+                            else
+                                printf("directory     ");
                             break;
                         default:
                             printf(
@@ -428,7 +443,7 @@ static int FInfoDemo(const char *dirPath)
 .Pp
                     // Advance to the next entry.
 .Pp
-                    ((char *) thisEntry) += thisEntry->length;
+                    thisEntry = (FInfoAttrBuf*)((char*)thisEntry + thisEntry->length);
                 }
             }
         } while ( err == 0 && ! done );
diff --git a/bsd/man/man2/getdtablesize.2 b/bsd/man/man2/getdtablesize.2
new file mode 100644
index 000000000..7465f9aeb
--- /dev/null
+++ b/bsd/man/man2/getdtablesize.2
@@ -0,0 +1,63 @@
+.\" Copyright (c) 1983, 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)getdtablesize.2	8.1 (Berkeley) 6/4/93
+.\" $FreeBSD: src/lib/libc/sys/getdtablesize.2,v 1.4.2.3 2001/12/14 18:34:00 ru Exp $
+.\"
+.Dd June 4, 1993
+.Dt GETDTABLESIZE 2
+.Os
+.Sh NAME
+.Nm getdtablesize
+.Nd get descriptor table size
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In unistd.h
+.Ft int
+.Fn getdtablesize void
+.Sh DESCRIPTION
+Each process has a fixed size descriptor table,
+which is guaranteed to have at least 20 slots.  The entries in
+the descriptor table are numbered with small integers starting at 0.
+The call
+.Fn getdtablesize
+returns the size of this table.
+.Sh SEE ALSO
+.Xr close 2 ,
+.Xr dup 2 ,
+.Xr open 2 ,
+.Xr select 2
+.Sh HISTORY
+The
+.Fn getdtablesize
+function call appeared in
+.Bx 4.2 .
diff --git a/bsd/man/man2/getfsstat.2 b/bsd/man/man2/getfsstat.2
index 47e792b60..99e2abaf6 100644
--- a/bsd/man/man2/getfsstat.2
+++ b/bsd/man/man2/getfsstat.2
@@ -56,111 +56,15 @@ function returns information about all mounted file systems.
 The
 .Fa buf
 argument is a pointer to an array of
-.Xr statfs
-structures.
-.Pp
-As of Mac OS X 10.6, the default size of the
-.Ft ino_t
-type is 64 bits (the macro
-.Dv _DARWIN_FEATURE_64_BIT_INODE
-will be defined).
-While there is no
-.Ft ino_t
-type used in the
-.Xr statfs
-structure, the changes to
-.Fn getfsstat
-are grouped together with the 64-bit inode changes.
-The string fields in the
-.Xr statfs
-structure are larger and the variant symbol
-.Li _getfsstat$INODE64
-will be automatically used.
-The
-.Xr statfs
-structure is defined as:
-.Bd -literal
-typedef struct { int32_t val[2]; } fsid_t;
-
-#define MFSTYPENAMELEN  16 /* length of fs type name including null */
-#define MAXPATHLEN      1024	
-#define MNAMELEN        MAXPATHLEN
-
-struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */
-    uint32_t    f_bsize;        /* fundamental file system block size */ 
-    int32_t     f_iosize;       /* optimal transfer block size */ 
-    uint64_t    f_blocks;       /* total data blocks in file system */ 
-    uint64_t    f_bfree;        /* free blocks in fs */ 
-    uint64_t    f_bavail;       /* free blocks avail to non-superuser */ 
-    uint64_t    f_files;        /* total file nodes in file system */ 
-    uint64_t    f_ffree;        /* free file nodes in fs */ 
-    fsid_t      f_fsid;         /* file system id */ 
-    uid_t       f_owner;        /* user that mounted the filesystem */ 
-    uint32_t    f_type;         /* type of filesystem */ 
-    uint32_t    f_flags;        /* copy of mount exported flags */ 
-    uint32_t    f_fssubtype;    /* fs sub-type (flavor) */ 
-    char        f_fstypename[MFSTYPENAMELEN];   /* fs type name */ 
-    char        f_mntonname[MAXPATHLEN];        /* directory on which mounted */ 
-    char        f_mntfromname[MAXPATHLEN];      /* mounted filesystem */ 
-    uint32_t    f_reserved[8];  /* For future use */ 
-};
-.Ed
-.Pp
-(In 10.5, 64-bit
-.Ft ino_t ,
-larger
-.Xr statfs
-structure and variant symbol were available if the macro
-.Dv _DARWIN_USE_64_BIT_INODE
-is defined before any header files are included; this macro is optional in
-10.6.)
-.Pp
-If the macro
-.Dv _DARWIN_NO_64_BIT_INODE
-is defined before any header files are included, or if the deployment target
-is less than 10.6, the legacy
-.Xr statfs
-structure will be in effect.
-The
-.Ft ino_t
-type will be 32 bits (the
-.Dv _DARWIN_FEATURE_64_BIT_INODE
-macro will not be defined), the strings in the
-.Xr statfs
-structure will be their smaller legacy size (and long mount paths may no longer
-fit) and the undecorated symbol
-.Li _getfsstat
-will be used.
-This legacy
 .Fa statfs
-structure is defined as:
-.Bd -literal
-#define MFSNAMELEN      15 /* length of fs type name, not inc. nul */
-#define MNAMELEN        90 /* length of buffer for returned name */
-
-struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */
-    short   f_otype;	/* type of file system (reserved: zero) */
-    short   f_oflags;	/* copy of mount flags (reserved: zero) */
-    long    f_bsize;	/* fundamental file system block size */
-    long    f_iosize;	/* optimal transfer block size */
-    long    f_blocks;	/* total data blocks in file system */
-    long    f_bfree;	/* free blocks in fs */
-    long    f_bavail;	/* free blocks avail to non-superuser */
-    long    f_files;	/* total file nodes in file system */
-    long    f_ffree;	/* free file nodes in fs */
-    fsid_t  f_fsid;	/* file system id */
-    uid_t   f_owner;	/* user that mounted the file system */
-    short   f_reserved1;	/* reserved for future use */
-    short   f_type;	/* type of file system (reserved) */
-    long    f_flags;	/* copy of mount flags (reserved) */
-    long    f_reserved2[2];	/* reserved for future use */
-    char    f_fstypename[MFSNAMELEN]; /* fs type name */
-    char    f_mntonname[MNAMELEN];    /* directory on which mounted */
-    char    f_mntfromname[MNAMELEN];  /* mounted file system */
-    char    f_reserved3;	/* reserved for future use */
-    long    f_reserved4[4];	/* reserved for future use */
-};
-.Ed
+structures (see 
+.Xr statfs 2 ) .
+As
+.Xr statfs 2
+indicates, the structure is defined differently depending on
+whether the macro _DARWIN_FEATURE_64_BIT_INODE is defined (see
+.Xr stat 2
+for more information on this macro).
 .Pp
 Fields that are undefined for a particular file system are set to -1.
 The buffer is filled with an array of
@@ -223,6 +127,7 @@ routine is equivalent to the default
 is defined), so there is no longer any reason to use it (it will be removed
 in the future).
 .Sh SEE ALSO
+.Xr stat 2 ,
 .Xr statfs 2 ,
 .Xr fstab 5 ,
 .Xr mount 8
diff --git a/bsd/man/man2/getgroups.2 b/bsd/man/man2/getgroups.2
index f2a9e995d..a941bc389 100644
--- a/bsd/man/man2/getgroups.2
+++ b/bsd/man/man2/getgroups.2
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2008 Apple Inc.  All rights reserved.
+.\" Copyright (c) 2008, 2010 Apple Inc.  All rights reserved.
 .\"
 .\" @APPLE_LICENSE_HEADER_START@
 .\" 
@@ -56,7 +56,7 @@
 .\"
 .\"     @(#)getgroups.2	8.2 (Berkeley) 4/16/94
 .\"
-.Dd October 2, 2008
+.Dd September 17, 2010
 .Dt GETGROUPS 2
 .Os BSD 4.2
 .Sh NAME
@@ -90,6 +90,28 @@ is 0,
 returns the number of groups without modifying the
 .Fa grouplist[]
 array.
+.Pp
+To provide compatibility with applications that use
+.Fn getgroups
+in environments where users may be in more than
+.Dv {NGROUPS_MAX}
+groups, a variant of
+.Fn getgroups ,
+obtained when compiling with either the macros
+.Dv _DARWIN_UNLIMITED_GETGROUPS
+or
+.Dv _DARWIN_C_SOURCE
+defined, can be used that is not limited to
+.Dv {NGROUPS_MAX}
+groups.
+However, this variant only returns the user's default group access list and
+not the group list modified by a call to
+.Xr setgroups 2
+(either in the current process or an ancestor process).
+Use of
+.Xr setgroups 2
+is highly discouraged, and there is no foolproof way to determine if it has
+been previously called.
 .Sh RETURN VALUES
 A successful call returns the number of groups in the group set.
 Otherwise, a value of -1 is returned and the global integer variable
@@ -112,12 +134,6 @@ The argument
 although non-zero,
 is smaller than the number of groups in the group set.
 .El
-.Sh LEGACY DESCRIPTION
-If _DARWIN_C_SOURCE is defined, 
-.Fn getgroups
-can return more than
-.Dv {NGROUPS_MAX}
-groups.
 .Sh LEGACY SYNOPSIS
 .Fd #include <sys/param.h>
 .Fd #include <sys/types.h>
diff --git a/bsd/man/man2/gettimeofday.2 b/bsd/man/man2/gettimeofday.2
index 96659f100..a9b300555 100644
--- a/bsd/man/man2/gettimeofday.2
+++ b/bsd/man/man2/gettimeofday.2
@@ -53,7 +53,6 @@
 .Fa "const struct timezone *tzp"
 .Fc
 .Sh DESCRIPTION
-.Ef
 .Pp
 The system's notion of the current Greenwich time and the current time
 zone is obtained with the
diff --git a/bsd/man/man2/kqueue.2 b/bsd/man/man2/kqueue.2
index f7a12d523..6ab998c5a 100644
--- a/bsd/man/man2/kqueue.2
+++ b/bsd/man/man2/kqueue.2
@@ -74,7 +74,7 @@ and
 The
 .Fn kqueue
 system call
-provides a generic method of notifying the user when an kernel
+provides a generic method of notifying the user when a kernel
 event (kevent) happens or a condition holds, based on the results
 of small pieces of kernel code termed filters.
 A kevent is identified by an (ident, filter) pair and specifies
@@ -267,7 +267,7 @@ the descriptor.
 .It EV_RECEIPT
 This flag is useful for making bulk changes to a kqueue without draining any
 pending events. When passed as input, it forces EV_ERROR to always be returned.
-When a filter is successfully added. The
+When a filter is successfully added, the
 .Va data
 field will be zero.
 .It EV_ONESHOT
@@ -433,6 +433,8 @@ The events to monitor are:
 .Bl -tag -width NOTE_SIGNAL 
 .It NOTE_EXIT
 The process has exited.
+.It NOTE_EXITSTATUS
+The process has exited and its exit status is in filter specific data. Valid only on child processes and to be used along with NOTE_EXIT.
 .It NOTE_FORK
 The process created a child process via
 .Xr fork 2
@@ -507,42 +509,6 @@ contains the number of times the timeout has expired since the last call to
 or
 .Fn kevent64 .
 This filter automatically sets the EV_CLEAR flag internally.
-.It EVFILT_SESSION
-Takes the audit session ID to monitor as the identifier and the events to watch for in 
-.Va fflags ,
-and returns when one or more of the requested session events occurs.  
-To monitor for events for any audit session the value AS_ANY_ASID 
-should be used as the identifier.  With AS_ANY_ASID, as new audit
-sessions are created they are included as if the were added
-individually.  The events to monitor are:
-.Bl -tag -width NOTE_AS_UPDATE
-.It NOTE_AS_START
-A new audit session has started. 
-.It NOTE_AS_END
-All the processes in the audit session have exited.  
-.It NOTE_AS_CLOSE
-This audit session is no longer valid in the kernel.  In other words, it
-is now safe to dispose of any cached information about this session or
-reuse its session ID for a new audit session.
-.It NOTE_AS_UPDATE
-The audit session information was updated.  The audit session information is 
-considered immutable once initially set.  If this becomes enforced in
-the kernel then this event may no longer be needed and may become
-obsolete.
-.It NOTE_AS_ERR
-This flag is returned if the system was unable to attach an event to a
-new session when the audit session ID of AS_ANY_ASID
-is used.  This is usually due to resource limitations.
-.El
-.Pp
-On return, 
-.Va fflags
-contains the events which triggered the filter,
-.Va ident
-contains the audit session ID, and
-.Va data
-contains the audit user ID.
-This filter automatically sets the EV_CLEAR flag internally.
 .El
 .Pp
 ----
diff --git a/bsd/man/man2/madvise.2 b/bsd/man/man2/madvise.2
index a4b4d415d..9f89c32ba 100644
--- a/bsd/man/man2/madvise.2
+++ b/bsd/man/man2/madvise.2
@@ -126,7 +126,7 @@ This is used with
 system call.
 .It Dv MADV_ZERO_WIRED_PAGES
 Indicates that the application would like the wired pages in this address
-range to be zeroed out if the address range is dellocated without first
+range to be zeroed out if the address range is deallocated without first
 unwiring the pages (i.e. a munmap(2) without a preceding munlock(2) or the application
 quits).
 This is used with
diff --git a/bsd/man/man2/mmap.2 b/bsd/man/man2/mmap.2
index af9de8c04..b55d054e1 100644
--- a/bsd/man/man2/mmap.2
+++ b/bsd/man/man2/mmap.2
@@ -148,6 +148,15 @@ VM_MAKE_TAG(tag)	to associate an 8-bit tag with the region
 <mach/vm_statistics.h> defines some preset tags (with a VM_MEMORY_ prefix).
 Users are encouraged to use tags between 240 and 255.
 Tags are used by tools such as vmmap(1) to help identify specific memory regions.
+.Pp
+VM_FLAGS_SUPERPAGE_SIZE_*	to use superpages for the allocation. 
+See <mach/vm_statistics.h> for supported architectures and sizes (or use
+VM_FLAGS_SUPERPAGE_SIZE_ANY to have the kernel choose a size).
+The specified size must be divisible by the superpage size (except for
+VM_FLAGS_SUPERPAGE_SIZE_ANY), and if you use MAP_FIXED, the specified address
+must be properly aligned. If the system cannot satisfy the request with superpages,
+the call will fail. Note that currently, superpages are always wired and not
+inherited by children of the process.
 .It Dv MAP_FILE
 Mapped from a regular file.  (This is
 the default mapping type, and need not be specified.)
diff --git a/bsd/man/man2/open.2 b/bsd/man/man2/open.2
index 2d121402d..80c293626 100644
--- a/bsd/man/man2/open.2
+++ b/bsd/man/man2/open.2
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2008 Apple Inc.  All rights reserved.
+.\" Copyright (c) 2010 Apple Inc.  All rights reserved.
 .\"
 .\" @APPLE_LICENSE_HEADER_START@
 .\" 
@@ -56,7 +56,7 @@
 .\"
 .\"     @(#)open.2	8.2 (Berkeley) 11/16/93
 .\"
-.Dd October 7, 2008
+.Dd November 10, 2010
 .Dt OPEN 2
 .Os BSD 4
 .Sh NAME
@@ -114,6 +114,7 @@ O_EXLOCK	atomically obtain an exclusive lock
 O_NOFOLLOW	do not follow symlinks
 O_SYMLINK	allow open of symlinks
 O_EVTONLY	descriptor requested for event notifications only
+O_CLOEXEC	mark as close-on-exec
 .Ed
 .Pp
 Opening a file with
@@ -133,7 +134,9 @@ returns an error.
 This may be used to implement a simple exclusive-access locking mechanism.
 If
 .Dv O_EXCL
-is set and the last component of the pathname is a symbolic link,
+is set with
+.Dv O_CREAT
+and the last component of the pathname is a symbolic link,
 .Fn open
 will fail even if the symbolic link points to a non-existent name.
 .Pp
@@ -184,6 +187,15 @@ flag is only intended for monitoring a file for changes (e.g. kqueue). Note: whe
 this flag is used, the opened file will not prevent an unmount 
 of the volume that contains the file.
 .Pp
+The
+.Dv O_CLOEXEC
+flag causes the file descriptor to be marked as close-on-exec,
+setting the
+.Dv FD_CLOEXEC
+flag.  The state of the file descriptor flags can be inspected
+using the F_GETFD fcntl.  See
+.Xr fcntl 2 .
+.Pp
 If successful,
 .Fn open
 returns a non-negative integer, termed a file descriptor.
diff --git a/bsd/man/man2/pathconf.2 b/bsd/man/man2/pathconf.2
index afe640327..9743384ce 100644
--- a/bsd/man/man2/pathconf.2
+++ b/bsd/man/man2/pathconf.2
@@ -103,6 +103,16 @@ system call, otherwise 0.
 Return 1 if file names longer than KERN_NAME_MAX are truncated.
 .It Li _PC_VDISABLE
 Returns the terminal character disabling value.
+.It Li _PC_XATTR_SIZE_BITS
+Returns the number of bits used to store maximum extended 
+attribute size in bytes.  For example, if the maximum 
+attribute size supported by a file system is 128K, the 
+value returned will be 18.  However a value 18 can mean 
+that the maximum attribute size can be anywhere from 
+(256KB - 1) to 128KB.  As a special case, the resource 
+fork can have much larger size, and some file system 
+specific extended attributes can have smaller and preset 
+size; for example, Finder Info is always 32 bytes.
 .El
 .Sh RETURN VALUES
 If the call to
diff --git a/bsd/man/man2/pipe.2 b/bsd/man/man2/pipe.2
index 03f12c196..df5b9d85c 100644
--- a/bsd/man/man2/pipe.2
+++ b/bsd/man/man2/pipe.2
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)pipe.2	8.1 (Berkeley) 6/4/93
 .\"
-.Dd June 4, 1993
+.Dd February 17, 2011
 .Dt PIPE 2
 .Os BSD 4
 .Sh NAME
@@ -82,6 +82,12 @@ signal.
 Widowing a pipe is the only way to deliver end-of-file to a reader:
 after the reader consumes any buffered data, reading a widowed pipe
 returns a zero count.
+.Pp
+The generation of the
+.Dv SIGPIPE
+signal can be suppressed using the
+.Dv F_SETNOSIGPIPE
+fcntl command.
 .Sh RETURN VALUES
 On successful creation of the pipe, zero is returned. Otherwise, 
 a value of -1 is returned and the variable
@@ -111,6 +117,7 @@ The system file table is full.
 .Xr fork 2 ,
 .Xr read 2 ,
 .Xr socketpair 2 ,
+.Xr fcntl 2 ,
 .Xr write 2
 .Sh HISTORY
 A
diff --git a/bsd/man/man2/posix_spawn.2 b/bsd/man/man2/posix_spawn.2
index 76bfa055b..6a940d5c1 100644
--- a/bsd/man/man2/posix_spawn.2
+++ b/bsd/man/man2/posix_spawn.2
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+.\" Copyright (c) 2000-2010 Apple Inc. All rights reserved.
 .\"
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 .\" 
@@ -27,7 +27,7 @@
 .\"
 .\"     @(#)posix_spawn.2
 .
-.Dd August 9, 2007
+.Dd November 2, 2010
 .Dt POSIX_SPAWN 2
 .Os "Mac OS X"
 .Sh NAME
@@ -95,7 +95,7 @@ spawned process.  The value is undefined in the case of a failure.
 .Pp
 The argument
 .Fa file_actions
-is either NULL, or it is a a pointer to a file actions object that was
+is either NULL, or it is a pointer to a file actions object that was
 initialized by a call to
 .Xr posix_spawn_file_actions_init 3
 and represents zero or more file actions.
@@ -108,9 +108,12 @@ and
 .Xr fcntl 2 ) .
 Descriptors that remain open are unaffected by
 .Fn posix_spawn
-unless their behaviour is modified by a file action; see
+unless their behaviour is modified by particular spawn flags
+or a file action; see
+.Xr posix_spawnattr_setflags 3
+and
 .Xr posix_spawn_file_actions_init 3
-for more information.
+for additional information.
 .Pp
 The argument
 .Fa attrp
diff --git a/bsd/man/man2/quotactl.2 b/bsd/man/man2/quotactl.2
index c60519313..4a4760dc6 100644
--- a/bsd/man/man2/quotactl.2
+++ b/bsd/man/man2/quotactl.2
@@ -64,7 +64,6 @@ The address of an optional command specific data structure,
 may be given; its interpretation
 is discussed below with each command.
 .Pp
-Currently quotas are supported only for the "ffs" and "hfs" filesystems.
 A command is composed of a primary command (see below)
 and a command type used to interpret the
 .Fa id .
diff --git a/bsd/man/man2/sem_close.2 b/bsd/man/man2/sem_close.2
new file mode 100644
index 000000000..cdff87c7c
--- /dev/null
+++ b/bsd/man/man2/sem_close.2
@@ -0,0 +1,60 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd June 8, 2000
+.Dt SEM_CLOSE 2
+.Os Darwin
+.Sh NAME
+.Nm sem_close
+.Nd close a named semaphore
+.Sh SYNOPSIS
+.Fd #include <semaphore.h>
+.Ft int
+.Fn sem_close "sem_t *sem"
+.Sh DESCRIPTION
+The system resources associated with the named semaphore referenced by
+.Fa sem
+are deallocated and the descriptor is invalidated.
+.Pp
+If successful,
+.Fn sem_close
+will return 0.  Otherwise, -1 is returned and
+.Va errno
+is set.
+.Sh ERRORS
+.Fn sem_close
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+.Fa sem
+is not a valid semaphore descriptor.
+.El
+.Sh SEE ALSO
+.Xr sem_init 2 ,
+.Xr sem_open 2 ,
+.Xr sem_unlink 2 ,
+.Xr semctl 2 ,
+.Xr semget 2 ,
+.Xr semop 2
+.Sh HISTORY
+.Fn sem_close
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/sem_open.2 b/bsd/man/man2/sem_open.2
new file mode 100644
index 000000000..423e98ae4
--- /dev/null
+++ b/bsd/man/man2/sem_open.2
@@ -0,0 +1,169 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd June 8, 2000
+.Dt SEM_OPEN 2
+.Os Darwin
+.Sh NAME
+.Nm sem_open
+.Nd initialize and open a named semaphore
+.Sh SYNOPSIS
+.Fd #include <semaphore.h>
+.Ft sem_t *
+.Fo sem_open
+.Fa "const char *name"
+.Fa "int oflag"
+.Fa "..."
+.Fc
+.Pp
+The parameters "mode_t mode" and "unsigned int value"
+are optional.
+.Sh DESCRIPTION
+The named semaphore named
+.Fa name
+is initialized and opened as specified by the argument
+.Fa oflag
+and a semaphore descriptor is returned to the calling process.
+.Pp
+The value of
+.Fa oflag
+is formed by
+.Em or Ns 'ing
+the following values:
+.Pp
+.Bd -literal -offset indent -compact
+O_CREAT		create the semaphore if it does not exist
+O_EXCL		error if create and semaphore exists
+.Ed
+.Pp
+If
+.Dv O_CREAT
+is specified,
+.Fn sem_open
+requires an additional two arguments.
+.Fa mode
+specifies the permissions for the semaphore as described in
+.Xr chmod 2
+and modified by the process' umask value (see
+.Xr umask 2 ) .
+The semaphore is created with an initial
+.Fa value ,
+which must be less than or equal to
+.Dv SEM_VALUE_MAX .
+.Pp
+If
+.Dv O_EXCL
+is specified and the semaphore exists,
+.Fn sem_open
+fails.  The check for the existence of the semaphore and the creation
+of the semaphore are atomic with respect to all processes calling
+.Fn sem_open
+with
+.Dv O_CREAT
+and
+.Dv O_EXCL
+set.
+.Pp
+When a new semaphore is created, it is given the user ID and group ID
+which correspond to the effective user and group IDs of the calling
+process. There is no visible entry in the file system for the created
+object in this implementation.
+.Pp
+The returned semaphore descriptor is available to the calling process
+until it is closed with
+.Fn sem_close ,
+or until the caller exits or execs.
+.Pp
+If a process makes repeated calls to
+.Fn sem_open ,
+with the same
+.Fa name
+argument, the same descriptor is returned for each successful call,
+unless
+.Fn sem_unlink
+has been called on the semaphore in the interim.
+.Pp
+If
+.Fn sem_open
+fails for any reason, it will return a value of
+.Dv SEM_FAILED
+and sets
+.Va errno .
+On success, it returns a semaphore descriptor.
+.Sh ERRORS
+The named semaphore is opened unless:
+.Bl -tag -width Er
+.It Bq Er EACCES
+The required permissions (for reading and/or writing)
+are denied for the given flags; or
+.Dv O_CREAT
+is specified, the object does not exist, and permission to
+create the semaphore is denied.
+.It Bq Er EEXIST
+.Dv O_CREAT
+and
+.Dv O_EXCL
+were specified and the semaphore exists.
+.It Bq Er EINTR
+The
+.Fn sem_open
+operation was interrupted by a signal.
+.It Bq Er EINVAL
+The
+.Fn shm_open
+operation is not supported; or
+.Dv O_CREAT
+is specified and
+.Fa value
+exceeds
+.Dv SEM_VALUE_MAX .
+.It Bq Er EMFILE
+The process has already reached its limit for semaphores or file
+descriptors in use.
+.It Bq Er ENAMETOOLONG
+.Fa name
+exceeded
+.Dv SEM_NAME_LEN
+characters.
+.It Bq Er ENFILE
+Too many semaphores or file descriptors are open on the system.
+.It Bq Er ENOENT
+.Dv O_CREAT
+is not set and the named semaphore does not exist.
+.It Bq Er ENOSPC
+.Dv O_CREAT
+is specified, the file does not exist, and there is insufficient
+space available to create the semaphore.
+.El
+.Sh SEE ALSO
+.Xr sem_close 2 ,
+.Xr sem_post 2 ,
+.Xr sem_trywait 2 ,
+.Xr sem_unlink 2 ,
+.Xr sem_wait 2 ,
+.Xr semctl 2 ,
+.Xr semget 2 ,
+.Xr semop 2 ,
+.Xr umask 2
+.Sh HISTORY
+.Fn sem_open
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/sem_post.2 b/bsd/man/man2/sem_post.2
new file mode 100644
index 000000000..36d06fde8
--- /dev/null
+++ b/bsd/man/man2/sem_post.2
@@ -0,0 +1,65 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd June 8, 2000
+.Dt SEM_POST 2
+.Os Darwin
+.Sh NAME
+.Nm sem_post
+.Nd unlock a semaphore
+.Sh SYNOPSIS
+.Fd #include <semaphore.h>
+.Ft int
+.Fn sem_post "sem_t *sem"
+.Sh DESCRIPTION
+The semaphore referenced by
+.Fa sem
+is unlocked, the value of the semaphore is incremented, and all
+threads which are waiting on the semaphore are awakened.
+.Pp
+.Fn sem_post
+is reentrant with respect to signals and may be called from within a
+signal hanlder.
+.Pp
+If successful,
+.Fn sem_post
+will return 0.  Otherwise, -1 is returned and
+.Va errno
+is set.
+.Sh ERRORS
+.Fn sem_post
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+.Fa sem
+is not a valid semaphore descriptor.
+.El
+.Sh SEE ALSO
+.Xr sem_open 2 ,
+.Xr sem_trywait 2 ,
+.Xr sem_wait 2 ,
+.Xr semctl 2 ,
+.Xr semget 2 ,
+.Xr semop 2
+.Sh HISTORY
+.Fn sem_post
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/sem_unlink.2 b/bsd/man/man2/sem_unlink.2
new file mode 100644
index 000000000..7fc7e9c4d
--- /dev/null
+++ b/bsd/man/man2/sem_unlink.2
@@ -0,0 +1,74 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd June 8, 2000
+.Dt SEM_UNLINK 2
+.Os Darwin
+.Sh NAME
+.Nm sem_unlink
+.Nd remove a named semaphore
+.Sh SYNOPSIS
+.Fd #include <semaphore.h>
+.Ft int
+.Fn sem_unlink "const char *name"
+.Sh DESCRIPTION
+The named semaphore named
+.Fa name
+is removed.  If the semaphore is in use by other processes, then
+.Fa name
+is immediately disassociated with the semaphore, but the semaphore
+itself will not be removed until all references to it have been
+closed.  Subsequent calls to
+.Fn sem_open
+using
+.Fa name
+will refer to or create a new semaphore named
+.Fa name .
+.Pp
+If successful,
+.Fn sem_unlink
+will return 0.  Otherwise, -1 is returned and
+.Va errno
+is set, and the state of the semaphore is unchanged.
+.Sh ERRORS
+.Fn sem_unlink
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EACCES
+Permission is denied to be remove the semaphore.
+.It Bq Er ENAMETOOLONG
+.Fa name
+exceeded
+.Dv SEM_NAME_LEN
+characters.
+.It Bq Er ENOENT
+The named semaphore does not exist.
+.El
+.Sh SEE ALSO
+.Xr sem_close 2 ,
+.Xr sem_open 2 ,
+.Xr semctl 2 ,
+.Xr semget 2 ,
+.Xr semop 2
+.Sh HISTORY
+.Fn sem_unlink
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/sem_wait.2 b/bsd/man/man2/sem_wait.2
new file mode 100644
index 000000000..02f8d8586
--- /dev/null
+++ b/bsd/man/man2/sem_wait.2
@@ -0,0 +1,88 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd June 8, 2000
+.Dt SEM_WAIT 2
+.Os Darwin
+.Sh NAME
+.Nm sem_trywait, sem_wait
+.Nd lock a semaphore
+.Sh SYNOPSIS
+.Fd #include <semaphore.h>
+.Ft int
+.Fn sem_trywait "sem_t *sem"
+.Ft int
+.Fn sem_wait    "sem_t *sem"
+.Sh DESCRIPTION
+The semaphore referenced by
+.Fa sem
+is locked.  When calling
+.Fn sem_wait ,
+if the semaphore's value is zero, the calling thread will block until
+the lock is acquired or until the call is interrupted by a
+signal. Alternatively, the
+.Fn sem_trywait
+function will fail if the semaphore is already locked, rather than
+blocking on the semaphore.
+.Pp
+If successful (the lock was acquired),
+.Fn sem_wait
+and
+.Fn sem_trywait
+will return 0.  Otherwise, -1 is returned and
+.Va errno
+is set, and the state of the semaphore is unchanged.
+.Sh ERRORS
+.Fn sem_wait
+and
+.Fn sem_trywait
+succeed unless:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The semaphore is already locked.
+.It Bq Er EDEADLK
+A deadlock was detected.
+.It Bq Er EINTR
+The call was interrupted by a signal.
+.It Bq Er EINVAL
+.Fa sem
+is not a valid semaphore descriptor.
+.El
+.Sh NOTES
+Applications may encounter a priority inversion while using
+semaphores.  When a thread is waiting on a semaphore which is about to
+be posted by a lower-priority thread and the lower-priority thread is
+preempted by another thread (of medium priority), a priority inversion
+has occured, and the higher-priority thread will be blocked for an
+unlimited time period.  Programmers using the realtime functionality
+of the system should take care to avoid priority inversions.
+.Sh SEE ALSO
+.Xr sem_open 2 ,
+.Xr sem_post 2 ,
+.Xr semctl 2 ,
+.Xr semget 2 ,
+.Xr semop 2
+.Sh HISTORY
+.Fn sem_wait
+and
+.Fn sem_trywait
+are specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/sendfile.2 b/bsd/man/man2/sendfile.2
index d2919e3d2..1e5e537f3 100644
--- a/bsd/man/man2/sendfile.2
+++ b/bsd/man/man2/sendfile.2
@@ -104,14 +104,16 @@ arrays is specified by
 and
 .Fa trl_cnt .
 .Pp
-When a header or trailer is specified the value of 
+When a header or trailer is specified, the value of 
 .Fa len
-returned will include the size of header or trailer sent. The user should
-provide sufficiently large value of 
+argument indicates the maximum number of bytes in the header and/or file to be sent.
+It does not control the trailer; if a trailer exists, all of it will be sent.
+If the value of 
 .Fa len
-as argument including the size of header or trailer, 
-otherwise only part of file data will be sent
-following the header.
+argument is 0, all of the header and/or file will be sent before the entire trailer is sent.
+On return, the
+.Fa len
+argument specifies the total number of bytes sent.
 .Pp
 The
 .Fa flags
diff --git a/bsd/man/man2/setaudit.2 b/bsd/man/man2/setaudit.2
index 6b1979f5d..b626e0cf8 100644
--- a/bsd/man/man2/setaudit.2
+++ b/bsd/man/man2/setaudit.2
@@ -54,7 +54,6 @@ The
 data structure is defined as follows:
 .nf
 .in +4n
-
 struct auditinfo {
 	au_id_t        ai_auid;         /* Audit user ID */
 	au_mask_t      ai_mask;         /* Audit masks */
@@ -77,15 +76,13 @@ Until
 is set to something other than AU_DEFAUDITID any audit events
 generated by the system with be filtered by the non-attributed audit
 mask.
-.PP
-
+.Pp
 The
 .Fa au_mask_t
 data structure defines the bit mask for auditing successful and failed events 
 out of the predefined list of event classes. It is defined as follows:
 .nf
 .in +4n
-
 struct au_mask {
 	unsigned int    am_success;     /* success bits */
 	unsigned int    am_failure;     /* failure bits */
@@ -93,24 +90,21 @@ struct au_mask {
 typedef struct au_mask  au_mask_t;
 .in
 .fi
-.PP
-
+.Pp
 The
 .Fa au_termid_t
 data structure defines the Terminal ID recorded with every event caused by the 
 process. It is defined as follows:
 .nf
 .in +4n
-
 struct au_tid {
 	dev_t           port;
 	u_int32_t       machine;
 };
 typedef struct au_tid   au_tid_t;
-
 .in
 .fi
-.PP
+.Pp
 The
 .Fa ai_asid
 variable contains the audit session ID which is recorded with every event 
@@ -118,7 +112,7 @@ caused by the process.  It can be any value in the range 1 to PID_MAX (99999).
 If the value of AU_ASSIGN_ASID is used for 
 .Fa ai_asid
 a unique session ID will be generated by the kernel.
-The audit session ID will be returned in 
+The audit session ID will be returned in the
 .Fa ai_asid
 field on success.
 .Pp
@@ -127,11 +121,10 @@ The
 system call
 uses the expanded
 .Fa auditinfo_addr_t 
-data structure supports Terminal IDs with larger addresses such as those used
-in IP version 6.  It is defined as follows:
+data structure which supports Terminal IDs with larger addresses
+such as those used in IP version 6.  It is defined as follows:
 .nf
 .in +4n
-
 struct auditinfo_addr {
 	au_id_t         ai_auid;        /* Audit user ID. */
 	au_mask_t       ai_mask;        /* Audit masks. */
@@ -145,11 +138,10 @@ typedef struct auditinfo_addr   auditinfo_addr_t;
 .Pp
 The 
 .Fa au_tid_addr_t
-data structure which includes a larger address storage field and an additional 
+data structure includes a larger address storage field and an additional
 field with the type of address stored:
 .nf
 .in +4n
-
 struct au_tid_addr {
 	dev_t           at_port;
 	u_int32_t       at_type;
@@ -183,18 +175,20 @@ field in
 is set to AU_IPv4 and the other
 .Fa ai_tid_addr
 fields are all set to zero.
-The 
-.Fa ai_flags
-field can only be set when a new session is initially created.
 Creating a new session is done by setting the 
 .Fa ai_asid
 field to an unique session value or AU_ASSIGN_ASID.
 These system calls will fail when attempting to change the
-.Fa ai_auid ,
-.Fa ai_termid ,
-or 
-.Fa ai_flags
+.Fa ai_auid
+or
+.Fa ai_termid
 fields once set to something other than the default values.
+The
+.Fa ai_flags
+field may be updated only according to local access control
+policy but this is usually accomplished with
+.Xr auditon 2
+using the A_SETSFLAGS command.
 The audit preselection masks may be changed at any time
 but are usually updated with
 .Xr auditon 2
diff --git a/bsd/man/man2/setgroups.2 b/bsd/man/man2/setgroups.2
index 1547f7027..0ec3c3086 100644
--- a/bsd/man/man2/setgroups.2
+++ b/bsd/man/man2/setgroups.2
@@ -33,7 +33,7 @@
 .\"
 .\"     @(#)setgroups.2	8.2 (Berkeley) 4/16/94
 .\"
-.Dd April 16, 1994
+.Dd September 15, 2010
 .Dt SETGROUPS 2
 .Os BSD 4.2
 .Sh NAME
@@ -56,6 +56,10 @@ more than
 .Dv {NGROUPS_MAX} .
 .Pp
 Only the super-user may set new groups.
+.Pp
+Use of
+.Fn setgroups
+is highly discouraged.
 .Sh RETURN VALUES
 A 0 value is returned on success, -1 on error, with
 an error code stored in
diff --git a/bsd/man/man2/setregid.2 b/bsd/man/man2/setregid.2
new file mode 100644
index 000000000..47d791647
--- /dev/null
+++ b/bsd/man/man2/setregid.2
@@ -0,0 +1,92 @@
+.\" Copyright (c) 1980, 1991, 1993, 1994
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)setregid.2	8.2 (Berkeley) 4/16/94
+.\" $FreeBSD: src/lib/libc/sys/setregid.2,v 1.6.2.4 2001/12/14 18:34:01 ru Exp $
+.\"
+.Dd April 16, 1994
+.Dt SETREGID 2
+.Os
+.Sh NAME
+.Nm setregid
+.Nd set real and effective group ID
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In unistd.h
+.Ft int
+.Fn setregid "gid_t rgid" "gid_t egid"
+.Sh DESCRIPTION
+The real and effective group ID's of the current process
+are set to the arguments.
+Unprivileged users may change the real group
+ID to the effective group ID and vice-versa; only the super-user may
+make other changes.
+.Pp
+Supplying a value of -1 for either the real or effective
+group ID forces the system to substitute the current
+ID in place of the -1 parameter.
+.Pp
+The
+.Fn setregid
+function was intended to allow swapping
+the real and effective group IDs
+in set-group-ID programs to temporarily relinquish the set-group-ID value.
+This function did not work correctly;
+its purpose is now better served by the use of the
+.Fn setegid
+function (see
+.Xr setuid 2 ) .
+.Pp
+When setting the real and effective group IDs to the same value,
+the standard
+.Fn setgid
+function is preferred.
+.Sh RETURN VALUES
+.Rv -std setregid
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EPERM
+The current process is not the super-user and a change
+other than changing the effective group-id to the real group-id
+was specified.
+.El
+.Sh SEE ALSO
+.Xr getgid 2 ,
+.Xr issetugid 2 ,
+.Xr setegid 2 ,
+.Xr setgid 2 ,
+.Xr setuid 2
+.Sh HISTORY
+The
+.Fn setregid
+system call appeared in
+.Bx 4.2 .
diff --git a/bsd/man/man2/setreuid.2 b/bsd/man/man2/setreuid.2
new file mode 100644
index 000000000..13cfeadf4
--- /dev/null
+++ b/bsd/man/man2/setreuid.2
@@ -0,0 +1,90 @@
+.\" Copyright (c) 1980, 1991, 1993, 1994
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)setreuid.2	8.2 (Berkeley) 4/16/94
+.\" $FreeBSD: src/lib/libc/sys/setreuid.2,v 1.6.2.6 2001/12/14 18:34:01 ru Exp $
+.\"
+.Dd February 8, 2001
+.Dt SETREUID 2
+.Os
+.Sh NAME
+.Nm setreuid
+.Nd set real and effective user IDs
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In unistd.h
+.Ft int
+.Fn setreuid "uid_t ruid" "uid_t euid"
+.Sh DESCRIPTION
+The real and effective user IDs of the
+current process are set according to the arguments.
+If
+.Fa ruid
+or
+.Fa euid
+is -1, the current uid is filled in by the system.
+Unprivileged users may change the real user
+ID to the effective user ID and vice-versa; only the super-user may
+make other changes.
+.Pp
+The
+.Fn setreuid
+function has been used to swap the real and effective user IDs
+in set-user-ID programs to temporarily relinquish the set-user-ID value.
+This purpose is now better served by the use of the
+.Fn seteuid
+function (see
+.Xr setuid 2 ) .
+.Pp
+When setting the real and effective user IDs to the same value,
+the standard
+.Fn setuid
+function is preferred.
+.Sh RETURN VALUES
+.Rv -std setreuid
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EPERM
+The current process is not the super-user and a change
+other than changing the effective user-id to the real user-id
+was specified.
+.El
+.Sh SEE ALSO
+.Xr getuid 2 ,
+.Xr issetugid 2 ,
+.Xr seteuid 2 ,
+.Xr setuid 2
+.Sh HISTORY
+The
+.Fn setreuid
+system call appeared in
+.Bx 4.2 .
diff --git a/bsd/man/man2/setxattr.2 b/bsd/man/man2/setxattr.2
index 6fe4f86b8..957c5bd77 100644
--- a/bsd/man/man2/setxattr.2
+++ b/bsd/man/man2/setxattr.2
@@ -91,6 +91,13 @@ is identical to
 except that it sets an extended attribute on an open file referenced by
 file descriptor
 .Fa fd .
+.Sh NOTE
+On some filesystems, such as
+.Dv HFS+ ,
+setting the extended attribute
+.Dv com.apple.ResourceFork
+will update the modification time (``mtime'') of
+the file.
 .Sh RETURN VALUES
 On success, 0 is returned.  On failure, -1 is returned and the global
 variable
diff --git a/bsd/man/man2/shm_open.2 b/bsd/man/man2/shm_open.2
new file mode 100644
index 000000000..1b4bfc685
--- /dev/null
+++ b/bsd/man/man2/shm_open.2
@@ -0,0 +1,179 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd August 29, 2008
+.Dt SHM_OPEN 2
+.Os Darwin
+.Sh NAME
+.Nm shm_open
+.Nd open a shared memory object
+.Sh SYNOPSIS
+.Fd #include <sys/mman.h>
+.Ft int
+.Fo shm_open
+.Fa "const char *name"
+.Fa "int oflag"
+.Fa "..."
+.Fc
+.Pp
+The parameter "mode_t mode" is optional.
+.Sh DESCRIPTION
+The shared memory object referenced by
+.Fa name
+is opened for reading and/or writing as specified by the argument
+.Fa oflag
+and the file descriptor returned to the calling process.
+The returned file descriptor will be the lowest non-open file
+descriptor for the calling process, and is not shared with any
+other processes, as it is a new file descriptor. The new file
+descriptor will have the
+.Dv FD_CLOEXEC
+flag set.
+Repeated calls
+to
+.Nm shm_open
+with the same string value for
+.Fn name
+will return a file descriptor referring to the same shared memory
+object, provided that the object has not been unlinked by a call to
+.Fn shm_unlink .
+The
+.Fa oflag
+argument may indicate the file is to be
+created if it does not exist (by specifying the
+.Dv O_CREAT
+flag), in which case the file is created with mode
+.Fa mode
+as described in
+.Xr chmod 2
+and modified by the process' umask value (see
+.Xr umask 2 ) .
+.Pp
+The value of
+.Fa oflag
+is formed by
+.Em or Ns 'ing
+the following values:
+.Pp
+.Bd -literal -offset indent -compact
+O_RDONLY	open for reading only
+O_RDWR		open for reading and writing
+O_CREAT		create object if it does not exist
+O_EXCL		error if create and object exists
+O_TRUNC		truncate size to 0
+.Ed
+.Pp
+Exactly one of
+.Dv O_RDONLY
+or
+.Dv O_RDWR
+must be specified.
+.Pp
+If
+.Dv O_TRUNC
+is specified and the
+file exists, the file is truncated to zero length.
+If
+.Dv O_EXCL
+is set with
+.Dv O_CREAT
+and the file already
+exists,
+.Fn shm_open
+returns an error.  This may be used to
+implement a simple exclusive access locking mechanism.
+.Pp
+If successful,
+.Fn shm_open
+returns a non-negative integer, termed a file descriptor.
+It returns -1 and sets
+.Va errno
+on failure.
+The file pointer used to mark the current position within the
+memory object is set to the beginning of the object.
+.Pp
+When a new shared memory object is created it is given the
+owner and group corresponding to the effective user and
+group of the calling process. There is no visible entry in the
+file system for the created object in this implementation.
+.Pp
+When a shared memory object is created, it persists until it
+it unlinked and all other references are gone. Objects do
+not persist across a system reboot.
+.Pp
+The system imposes a limit on the number of file descriptors
+open simultaneously by one process.
+.Xr Getdtablesize 2
+returns the current system limit.
+.Sh ERRORS
+The named object is opened unless:
+.Bl -tag -width Er
+.It Bq Er EACCES
+The required permissions (for reading and/or writing)
+are denied for the given flags.
+.It Bq Er EACCES
+.Dv O_CREAT
+is specified, the object does not exist, and permission to
+create the object is denied.
+.It Bq Er EEXIST
+.Dv O_CREAT
+and
+.Dv O_EXCL
+were specified and the object exists.
+.It Bq Er EINTR
+The
+.Fn shm_open
+operation was interrupted by a signal.
+.It Bq Er EINVAL
+The
+.Fn shm_open
+operation is not supported.
+.It Bq Er EMFILE
+The process has already reached its limit for open file descriptors.
+.It Bq Er ENAMETOOLONG
+.Fa name
+exceeded the name size limit.
+This is currently
+.Dv PSHMNAMLEN
+characters (defined in
+.In sys/posix_shm.h ) ,
+but this may change in the future.
+.It Bq Er ENFILE
+The system file table is full.
+.It Bq Er ENOENT
+.Dv O_CREAT
+is not set and the named object does not exist.
+.It Bq Er ENOSPC
+.Dv O_CREAT
+is specified, the file does not exist, and there is insufficient
+space available to create the object.
+.El
+.Sh SEE ALSO
+.Xr chmod 2 ,
+.Xr close 2 ,
+.Xr getdtablesize 2 ,
+.Xr mmap 2 ,
+.Xr shm_unlink 2 ,
+.Xr umask 2
+.Sh HISTORY
+.Fn shm_open
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/shm_unlink.2 b/bsd/man/man2/shm_unlink.2
new file mode 100644
index 000000000..7ecc66fd4
--- /dev/null
+++ b/bsd/man/man2/shm_unlink.2
@@ -0,0 +1,87 @@
+.\"	$Darwin$
+.\"
+.\" Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\" 
+.\" The contents of this file constitute Original Code as defined in and
+.\" are subject to the Apple Public Source License Version 1.1 (the
+.\" "License").  You may not use this file except in compliance with the
+.\" License.  Please obtain a copy of the License at
+.\" http://www.apple.com/publicsource and read it before using this file.
+.\" 
+.\" This Original Code and all software distributed under the License are
+.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+.\" License for the specific language governing rights and limitations
+.\" under the License.
+.\" 
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.Dd August 31, 2006
+.Dt SHM_UNLINK 2
+.Os Darwin
+.Sh NAME
+.Nm shm_unlink
+.Nd remove shared memory object
+.Sh SYNOPSIS
+.Fd #include <sys/mman.h>
+.Ft int
+.Fn shm_unlink "const char *name"
+.Sh DESCRIPTION
+The
+.Fn shm_unlink
+function disassociates the shared memory object specified by
+.Fa name
+from that name.
+The resources associated with the shared memory object remain intact
+until the last file descriptor reference is removed, e.g., by
+.Xr close 2
+or
+.Xr munmap 2 ,
+at which point the resources are reclaimed
+(if no references exist at the time of the call to
+.Fn shm_unlink ,
+the resources are reclaimed immediately).
+The name can only be reused
+when it is bound to a new shared memory object with a call to
+.Xr shm_open 2
+with the
+.Dv O_CREAT
+flag.
+.Sh RETURN VALUES
+Upon successful completion, a value of 0 is returned.
+Otherwise, a value of -1 is returned and
+.Va errno
+is set to indicate the error,
+and the named shared memory object will remain unchanged.
+.Sh ERRORS
+The
+.Fn shm_unlink
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EACCES
+Permission is denied to be remove the object.
+.It Bq Er ENAMETOOLONG
+.Fa name
+exceeded the name size limit.
+This is currently
+.Dv PSHMNAMLEN
+characters (defined in
+.In sys/posix_shm.h ) ,
+but this may change in the future.
+.It Bq Er ENOENT
+The named object does not exist.
+.El
+.Sh SEE ALSO
+.Xr close 2 ,
+.Xr mmap 2 ,
+.Xr munmap 2 ,
+.Xr shm_open 2 ,
+.Xr shmat 2 ,
+.Xr shmctl 2
+.Sh HISTORY
+.Fn shm_open
+is specified in the POSIX Realtime Extension (1003.1b-1993/1003.1i-1995).
diff --git a/bsd/man/man2/stat.2 b/bsd/man/man2/stat.2
index 76cca02f1..02de79c72 100644
--- a/bsd/man/man2/stat.2
+++ b/bsd/man/man2/stat.2
@@ -127,9 +127,7 @@ as defined by
 and into which information is placed concerning the file.
 When the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
-is not defined (the
-.Ft ino_t
-type is 32-bits), the
+is not defined (see below for more information about this macro), the
 .Fa stat
 structure is defined as:
 .Bd -literal
@@ -137,7 +135,7 @@ struct stat { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */
     dev_t    st_dev;    /* device inode resides on */
     ino_t    st_ino;    /* inode's number */
     mode_t   st_mode;   /* inode protection mode */
-    nlink_t  st_nlink;  /* number or hard links to the file */
+    nlink_t  st_nlink;  /* number of hard links to the file */
     uid_t    st_uid;    /* user-id of owner */
     gid_t    st_gid;    /* group-id of owner */
     dev_t    st_rdev;   /* device type, for special file inode */
@@ -155,16 +153,6 @@ struct stat { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */
 However, when the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
 is defined, the
-.Ft ino_t
-type will be 64-bits (force 64-bit inode mode by defining the
-.Dv _DARWIN_USE_64_BIT_INODE
-macro before including header files).
-This will cause symbol variants of the
-.Fa stat
-family, with the
-.Fa $INODE64
-suffixes, to be automatically linked in.
-In addition, the
 .Fa stat
 structure will now be defined as:
 .Bd -literal
@@ -274,6 +262,141 @@ field, see
 .Aq Pa sys/stat.h
 and
 .Xr chflags 2 .
+.Sh _DARWIN_FEATURE_64_BIT_INODE
+In order to accommodate advanced capabilities of newer file systems, the 
+.Fa struct stat ,
+.Fa struct statfs ,
+and
+.Fa struct dirent
+data structures were updated in Mac OSX 10.5.
+.Pp
+The most obvious change is the increased size of
+.Fa ino_t
+from 32 bits to 64 bits.  As a consequence, storing an ino_t in an int is
+no longer safe, and file formats storing ino_t as 32-bit values may need to
+be updated.  There are other changes as well, such as the widening of
+.Fa f_fstypename ,
+.Fa f_mntonname ,
+and
+.Fa f_mntfromname
+in
+.Fa struct statfs .
+Please refer to
+.Xr stat 2
+and
+.Xr dir 5
+for more detail on the specific changes to the other affected data structures.
+.Pp
+On platforms that existed before these updates were available, ABI
+compatibility is achieved by providing two implementations for related
+functions: one using the legacy data structures and one using the updated
+data structures.  Variants which make use of the newer structures have their
+symbols suffixed with $INODE64.  These $INODE64 suffixes are automatically
+appended by the compiler tool-chain and should not be used directly.
+.Pp
+Platforms that were released after these updates only have the newer variants
+available to them.  These platforms have the macro
+.Dv _DARWIN_FEATURE_ONLY_64_BIT_INODE
+defined.
+.Pp
+The
+.Dv _DARWIN_FEATURE_64_BIT_INODE
+macro should not be set directly.  Instead, developers should make use of the
+.Dv _DARWIN_NO_64_BIT_INODE
+or
+.Dv _DARWIN_USE_64_BIT_INODE
+macros when the default variant is not desired.  The following table details
+the effects of defining these macros for different deployment targets.
+.Pp
+.TS
+center;
+c s s s
+l | c s s
+c | c c c
+c | c c c
+l | c c c.
+T{
+.Dv _DARWIN_FEATURE_ONLY_64_BIT_INODE Sy not defined
+T}
+=
+	Deployment Target
+user defines:	< 10.5	10.5	> 10.5
+_
+T{
+.Em (none)
+T}	32-bit	32-bit	64-bit
+T{
+.Dv _DARWIN_NO_64_BIT_INODE
+T}	32-bit	32-bit	32-bit
+T{
+.Dv _DARWIN_USE_64_BIT_INODE
+T}	32-bit	64-bit	64-bit
+_
+.T&
+c s s s
+c s s s
+c | l s s
+c | c c c
+l | c c c.
+
+T{
+.Dv _DARWIN_FEATURE_ONLY_64_BIT_INODE Sy defined
+T}
+=
+user defines:	Any Deployment Target
+_
+T{
+.Em (none)
+T}	64-bit-only
+T{
+.Dv _DARWIN_NO_64_BIT_INODE
+T}	T{
+.Em (error)
+T}
+T{
+.Dv _DARWIN_USE_64_BIT_INODE
+T}	64-bit-only
+_
+.TE
+.Pp
+.Bl -tag -width 64-bit-only -offset indent
+.It 32-bit
+32-bit inode values are enabled, and the legacy structures involving the
+.Vt ino_t
+type are in use.
+The macro
+.Dv _DARWIN_FEATURE_64_BIT_INODE
+is not defined.
+.It 64-bit
+64-bit inode values are enabled, and the expanded structures involving the
+.Vt ino_t
+type are in use.
+The macro
+.Dv _DARWIN_FEATURE_64_BIT_INODE
+is defined, and loader symbols will contain the
+.Li $INODE64
+suffix.
+.It 64-bit-only
+Like 64-bit, except loader symbols do not have the
+.Li $INODE64
+suffix.
+.It Em (error)
+A compile time error is generated.
+.El
+.Pp
+Due to the increased benefits of the larger structure, it is highly
+recommended that developers not define
+.Dv _DARWIN_NO_64_BIT_INODE
+and make use of
+.Dv _DARWIN_USE_64_BIT_INODE
+when targeting Mac OSX 10.5.
+.Pp
+In addition to the $INODE64 suffixed symbols, variants suffixed with 64 are
+also available for related functions.  These functions were provided as a way
+for developers to use the updated structures in code that also made use of
+the legacy structures.  The enlarged stat structures were also prefixed with
+64 to distinguish them from their legacy variants.  These functions have been
+deprecated and should be avoided.
 .Sh RETURN VALUES
 Upon successful completion a value of 0 is returned.
 Otherwise, a value of -1 is returned and
@@ -399,6 +522,7 @@ structure when 64-bit inodes are in effect (see above).
 .Xr chown 2 ,
 .Xr utimes 2 ,
 .Xr compat 5 ,
+.Xr statfs 2 ,
 .Xr symlink 7
 .Sh BUGS
 Applying
diff --git a/bsd/man/man2/statfs.2 b/bsd/man/man2/statfs.2
index 16e80f5d4..85dce6a80 100644
--- a/bsd/man/man2/statfs.2
+++ b/bsd/man/man2/statfs.2
@@ -71,9 +71,11 @@ argument is a pointer to a
 structure.
 When the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
-is not defined (the
-.Ft ino_t
-type is 32-bits), that structure is defined as:
+is not defined (see
+.Xr stat 2
+for more information on this macro), the
+.Fa statfs
+structure is defined as:
 .Bd -literal
 typedef struct { int32_t val[2]; } fsid_t;
 
@@ -107,18 +109,8 @@ struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */
 However, when the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
 is defined, the
-.Ft ino_t
-type will be 64-bits (force 64-bit inode mode by defining the
-.Dv _DARWIN_USE_64_BIT_INODE
-macro before including header files).
-This will cause symbol variants of the 
-.Fa statfs
-family, with the
-.Fa $INODE64
-suffixes, to be automatically linked in.
-In addition, the
 .Fa statfs
-structure will now be defined as:
+structure is defined as:
 .Bd -literal
 #define MFSTYPENAMELEN  16 /* length of fs type name including null */
 #define MAXPATHLEN      1024	
@@ -144,6 +136,13 @@ struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */
 };
 .Ed
 .Pp
+Note that the
+.Fa f_fstypename ,
+.Fa f_mntonname ,
+and
+.Fa f_mntfromname
+fields are also wider in this variant.
+.Pp
 Fields that are undefined for a particular file system are set to -1.
 The
 .Fn fstatfs
diff --git a/bsd/man/man2/undelete.2 b/bsd/man/man2/undelete.2
new file mode 100644
index 000000000..b85ecdcae
--- /dev/null
+++ b/bsd/man/man2/undelete.2
@@ -0,0 +1,108 @@
+.\" Copyright (c) 1994
+.\"	Jan-Simon Pendry
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)undelete.2	8.4 (Berkeley) 10/18/94
+.\" $FreeBSD: src/lib/libc/sys/undelete.2,v 1.17 2006/01/22 19:49:37 truckman Exp $
+.\"
+.Dd January 22, 2006
+.Dt UNDELETE 2
+.Os
+.Sh NAME
+.Nm undelete
+.Nd attempt to recover a deleted file
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In unistd.h
+.Ft int
+.Fn undelete "const char *path"
+.Sh DESCRIPTION
+The
+.Fn undelete
+system call attempts to recover the deleted file named by
+.Fa path .
+Currently, this works only when the named object
+is a whiteout in a union file system.
+The system call removes the whiteout causing
+any objects in a lower layer of the
+union stack to become visible once more.
+.Pp
+Eventually, the
+.Fn undelete
+functionality may be expanded to other file systems able to recover
+deleted files such as the log-structured file system.
+.Sh RETURN VALUES
+.Rv -std undelete
+.Sh ERRORS
+The
+.Fn undelete
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er ENOTDIR
+A component of the path prefix is not a directory.
+.It Bq Er ENAMETOOLONG
+A component of a pathname exceeded 255 characters,
+or an entire path name exceeded 1023 characters.
+.It Bq Er EEXIST
+The path does not reference a whiteout.
+.It Bq Er ENOENT
+The named whiteout does not exist.
+.It Bq Er EACCES
+Search permission is denied for a component of the path prefix.
+.It Bq Er EACCES
+Write permission is denied on the directory containing the name
+to be undeleted.
+.It Bq Er ELOOP
+Too many symbolic links were encountered in translating the pathname.
+.It Bq Er EPERM
+The directory containing the name is marked sticky,
+and the containing directory is not owned by the effective user ID.
+.It Bq Er EINVAL
+The last component of the path is
+.Ql .. .
+.It Bq Er EIO
+An I/O error occurred while updating the directory entry.
+.It Bq Er EROFS
+The name resides on a read-only file system.
+.It Bq Er EFAULT
+The
+.Fa path
+argument
+points outside the process's allocated address space.
+.El
+.Sh SEE ALSO
+.Xr unlink 2
+.Sh HISTORY
+The
+.Fn undelete
+system call first appeared in
+.Bx 4.4 Lite .
diff --git a/bsd/man/man3/posix_spawn_file_actions_addclose.3 b/bsd/man/man3/posix_spawn_file_actions_addclose.3
index 6cd2033c3..36c64715d 100644
--- a/bsd/man/man3/posix_spawn_file_actions_addclose.3
+++ b/bsd/man/man3/posix_spawn_file_actions_addclose.3
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+.\" Copyright (c) 2000-2010 Apple Inc. All rights reserved.
 .\"
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 .\" 
@@ -27,7 +27,7 @@
 .\"
 .\"     @(#)posix_spawn_file_actions_addclose.3
 .
-.Dd August 22, 2007
+.Dd November 2, 2010
 .Dt POSIX_SPAWN_FILE_ACTIONS_ADDCLOSE 3
 .Os "Mac OS X"
 .Sh NAME
@@ -56,6 +56,11 @@
 .Fa "int filedes"
 .Fa "int newfiledes"
 .Fc
+.Ft int
+.Fo posix_spawn_file_actions_addinherit_np
+.Fa "posix_spawn_file_actions_t *file_actions"
+.Fa "int filedes"
+.Fc
 .Sh DESCRIPTION
 The
 .Fn posix_spawn_file_actions_addclose
@@ -115,6 +120,42 @@ is created as if
 had been called on
 .Em filedes
 prior to the new child process starting execution.
+.Pp
+The
+.Fn posix_spawn_file_actions_addinherit_np
+function adds an abstract inheritance operation to the
+list of operations associated with the object referenced by
+.Em file_actions ,
+for subsequent use in a call to
+.Xr posix_spawn 2
+or
+.Xr posix_spawnp 2 .
+The pre-existing descriptor referred to by
+.Em filedes
+is marked for inheritance into the new process image, and the
+.Em FD_CLOEXEC
+flag is cleared from the file descriptor in the new process image.
+.Pp
+Normally, for
+.Xr posix_spawn 2
+and
+.Xr posix_spawnp 2 ,
+all file descriptors are inherited from the parent process
+into the spawned process, except for those explicitly
+marked as close-on-exec.  However if the flag
+.Em POSIX_SPAWN_CLOEXEC_DEFAULT
+is set, then during the spawn operation, all pre-existing
+file descriptors in the parent process are treated as if they
+had been marked close-on-exec i.e. none of them are automatically
+inherited.  See
+.Xr posix_spawnattr_setflags 3 .
+Only file descriptors explicitly manipulated via 
+.Em file_actions
+are made available in the spawned process. In that case,
+.Fn posix_spawn_file_actions_addinherit_np
+can be used to make specific pre-existing file
+descriptors from the parent process be
+available in the spawned process.
 .Sh RETURN VALUES
 On success, these functions return 0; on failure they return an error
 number from
@@ -127,7 +168,7 @@ These functions may fail if:
 The value specified by
 .Fa filedes
 is negative or would cause the process to exceed the maximum number of
-open files it is allowed..
+open files it is allowed.
 .\" ==========
 .It Bq Er EINVAL
 The value of
@@ -135,7 +176,7 @@ The value of
 is invalid.
 .\" ==========
 .It Bq Er ENOMEM
-Insufficient memory was available eo add to the
+Insufficient memory was available to add the new action to
 .Fa file_actions .
 .El
 .Sh SEE ALSO
@@ -143,6 +184,7 @@ Insufficient memory was available eo add to the
 .Xr posix_spawnp 2 ,
 .Xr posix_spawn_file_actions_init 3 ,
 .Xr posix_spawn_file_actions_destroy 3 ,
+.Xr posix_spawnattr_setflags 3 .
 .Sh STANDARDS
 .St -susv3 [SPN]
 .Sh HISTORY
diff --git a/bsd/man/man3/posix_spawnattr_setflags.3 b/bsd/man/man3/posix_spawnattr_setflags.3
index 8828a83df..3359497ec 100644
--- a/bsd/man/man3/posix_spawnattr_setflags.3
+++ b/bsd/man/man3/posix_spawnattr_setflags.3
@@ -1,5 +1,5 @@
 .\"
-.\" Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+.\" Copyright (c) 2000-2010 Apple Inc. All rights reserved.
 .\"
 .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 .\" 
@@ -27,7 +27,7 @@
 .\"
 .\"     @(#)posix_spawnattr_setflags.3
 .
-.Dd August 22, 2007
+.Dd October 28, 2010
 .Dt POSIX_SPAWNATTR_SETFLAGS 3
 .Os "Mac OS X"
 .Sh NAME
@@ -119,6 +119,13 @@ manipulate the process before it begins execution in user space.  This
 permits, for example, obtaining exact instruction counts, or debugging
 very early in
 .Xr dyld 1 .
+.It Dv POSIX_SPAWN_CLOEXEC_DEFAULT
+.Em Apple Extension :
+If this bit is set, then only file descriptors explicitly described by the
+.Fa file_actions
+argument are available in the spawned process; all
+of the other file descriptors are
+automatically closed in the spawned process.
 .El
 .Sh RETURN VALUES
 On success, these functions return 0; on failure they return an error
@@ -154,6 +161,7 @@ is invalid.
 .Xr posix_spawnattr_setpgroup 3 ,
 .Xr posix_spawnattr_setsigdefault 3 ,
 .Xr posix_spawnattr_setsigmask 3 ,
+.Xr posix_spawn_file_actions_init 3 ,
 .Xr setpgid 2 ,
 .Xr execve 2 ,
 .Xr dyld 1
diff --git a/bsd/man/man4/auditpipe.4 b/bsd/man/man4/auditpipe.4
index 7e0d7cc3e..e3a7a9427 100644
--- a/bsd/man/man4/auditpipe.4
+++ b/bsd/man/man4/auditpipe.4
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD: src/share/man/man4/auditpipe.4,v 1.6 2008/05/02 17:36:22 rwatson Exp $
 .\"
-.Dd May 5, 2006
+.Dd Oct 18, 2010
 .Os
 .Dt AUDITPIPE 4
 .Sh NAME
@@ -156,7 +156,7 @@ These flags correspond to the
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
-.Vt u_int .
+.Vt au_mask_t .
 .It Dv AUDITPIPE_SET_PRESELECT_FLAGS
 Set the current default preselection flags for attributable events on the
 pipe.
@@ -165,7 +165,7 @@ These flags correspond to the
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
-.Vt u_int .
+.Vt au_mask_t .
 .It Dv AUDITPIPE_GET_PRESELECT_NAFLAGS
 Retrieve the current default preselection flags for non-attributable events
 on the pipe.
@@ -174,7 +174,7 @@ These flags correspond to the
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
-.Vt u_int .
+.Vt au_mask_t .
 .It Dv AUDITPIPE_SET_PRESELECT_NAFLAGS
 Set the current default preselection flags for non-attributable events on the
 pipe.
@@ -183,7 +183,7 @@ These flags correspond to the
 field in
 .Xr audit_control 5 .
 The ioctl argument should be of type
-.Vt u_int .
+.Vt au_mask_t .
 .It Dv AUDITPIPE_GET_PRESELECT_AUID
 Query the current preselection masks for a specific auid on the pipe.
 The ioctl argument should be of type
@@ -252,7 +252,5 @@ It might be desirable to provided a more flexible selection model.
 The per-pipe audit event queue is fifo, with drops occurring if either the
 user thread provides in sufficient for the record on the queue head, or on
 enqueue if there is insufficient room.
-It might be desirable to support partial reads of records, which would be
-more compatible with buffered I/O as implemented in system libraries, and to
-allow applications to select which records are dropped, possibly in the style
-of preselection.
+It might be desirable to allow applications to select which records are
+dropped, possibly in the style of preselection.
diff --git a/bsd/man/man4/gif.4 b/bsd/man/man4/gif.4
index fe42f42cd..00e63f8eb 100644
--- a/bsd/man/man4/gif.4
+++ b/bsd/man/man4/gif.4
@@ -39,7 +39,7 @@
 .Sh DESCRIPTION
 The
 .Nm
-interface is a generic tunnelling pseudo device for IPv4 and IPv6.
+interface is a generic tunneling pseudo device for IPv4 and IPv6.
 It can tunnel IPv[46] traffic over IPv[46].
 Therefore, there can be four possible configurations.
 The behavior of
@@ -195,7 +195,7 @@ The
 device first appeared in WIDE hydrangea IPv6 kit.
 .\"
 .Sh BUGS
-There are many tunnelling protocol specifications,
+There are many tunneling protocol specifications,
 defined differently from each other.
 .Nm
 may not interoperate with peers which are based on different specifications,
diff --git a/bsd/man/man4/icmp6.4 b/bsd/man/man4/icmp6.4
index 40a30a31f..f41f7216d 100644
--- a/bsd/man/man4/icmp6.4
+++ b/bsd/man/man4/icmp6.4
@@ -235,7 +235,7 @@ sockets can be opened with the
 .Dv SOCK_DGRAM
 socket type without requiring root privileges. The synopsis is the following:
 .Pp
-.Fn socket AF_INET6 SOCK_DGRAM IPPROTO_ICMP6
+.Fn socket AF_INET6 SOCK_DGRAM IPPROTO_ICMPV6
 .Pp
 This can only be used to send 
 .Tn ICMPv6
diff --git a/bsd/man/man4/netintro.4 b/bsd/man/man4/netintro.4
index 725797146..ab2b1b277 100644
--- a/bsd/man/man4/netintro.4
+++ b/bsd/man/man4/netintro.4
@@ -145,7 +145,8 @@ are known to the system
 (and additional formats are defined for possible future implementation):
 .Bd -literal
 #define    AF_UNIX      1    /* local to host (pipes) */
-#define    AF_INET      2    /* internetwork: UDP, TCP, etc. */
+#define    AF_INET      2    /* IPv4: UDP, TCP, etc. */
+#define    AF_INET6     30   /* IPv6: UDP, TCP, etc. */
 #define    AF_NS        6    /* Xerox NS protocols */
 #define    AF_CCITT     10   /* CCITT protocols, X.25 etc */
 #define    AF_HYLINK    15   /* NSC Hyperchannel */
diff --git a/bsd/man/man4/random.4 b/bsd/man/man4/random.4
index bc0dbc76c..ed72fa315 100644
--- a/bsd/man/man4/random.4
+++ b/bsd/man/man4/random.4
@@ -72,4 +72,4 @@ directly before obtaining important random numbers.
 .Sh HISTORY
 A
 .Nm
-device appeared in Linux operating system.
+device appeared in the Linux operating system.
diff --git a/bsd/man/man5/Makefile b/bsd/man/man5/Makefile
index 0780eadfa..bf6093b3f 100644
--- a/bsd/man/man5/Makefile
+++ b/bsd/man/man5/Makefile
@@ -11,8 +11,6 @@ DATAFILES = \
 	core.5 \
 	dir.5 \
 	dirent.5 \
-	fs.5 \
-	inode.5 \
 	types.5
 
 INSTALL_MAN_LIST = ${DATAFILES}
diff --git a/bsd/man/man5/dir.5 b/bsd/man/man5/dir.5
index 6f2eacb60..c9e37b3b5 100644
--- a/bsd/man/man5/dir.5
+++ b/bsd/man/man5/dir.5
@@ -87,9 +87,9 @@ and further in the file
 .Aq dirent.h .
 When the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
-is not defined (the
-.Ft ino_t
-type is 32-bits), the
+is not defined (see
+.Xr stat 2
+for more information on this macro), the
 .Fa dirent
 structure is defined as:
 .Bd -literal
@@ -116,16 +116,8 @@ struct dirent { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */
 However, when the macro
 .Dv _DARWIN_FEATURE_64_BIT_INODE
 is defined, the
-.Ft ino_t
-type will be 64-bits (force 64-bit inode mode by defining the
-.Dv _DARWIN_USE_64_BIT_INODE
-macro before including header files).
-This will cause symbol variants of the directory routines, with the
-.Fa $INODE64
-suffixes, to be automatically linked in.
-In addition, the
 .Fa dirent
-structure will now be defined as:
+structure is defined as:
 .Bd -literal
 /*
  * The dirent structure defines the format of directory entries.
diff --git a/bsd/man/man5/fs.5 b/bsd/man/man5/fs.5
deleted file mode 100644
index 18da833e6..000000000
--- a/bsd/man/man5/fs.5
+++ /dev/null
@@ -1,343 +0,0 @@
-.\"	$NetBSD: fs.5,v 1.3 1994/11/30 19:31:17 jtc Exp $
-.\"
-.\" Copyright (c) 1983, 1991, 1993
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\"    must display the following acknowledgement:
-.\"	This product includes software developed by the University of
-.\"	California, Berkeley and its contributors.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\"     @(#)fs.5	8.2 (Berkeley) 4/19/94
-.\"
-.Dd April 19, 1994
-.Dt FS 5
-.Os BSD 4.2
-.Sh NAME
-.Nm fs ,
-.Nm inode
-.Nd format of file system volume
-.Sh SYNOPSIS
-.Fd #include <sys/types.h>
-.Fd #include <ufs/fs.h>
-.Fd #include <ufs/inode.h>
-.Sh DESCRIPTION
-The files
-.Aq Pa fs.h
-and
-.Aq Pa inode.h
-declare several structures, defined variables and macros
-which are used to create and manage the underlying format of
-file system objects on random access devices (disks).
-.Pp
-The block size and number of blocks which
-comprise a file system are parameters of the file system.
-Sectors beginning at
-.Dv BBLOCK
-and continuing for
-.Dv BBSIZE
-are used
-for a disklabel and for some hardware primary
-and secondary bootstrapping programs.
-.Pp
-The actual file system begins at sector
-.Dv SBLOCK
-with the
-.Em super-block
-that is of size
-.Dv SBSIZE .
-The following structure described the super-block and is
-from the file
-.Aq Pa ufs/fs.h :
-.Bd -literal
-#define	FS_MAGIC 0x011954
-struct fs {
-	struct	fs *fs_link;	/* linked list of file systems */
-	struct	fs *fs_rlink;	/*     used for incore super blocks */
-	daddr_t	fs_sblkno;	/* addr of super-block in filesys */
-	daddr_t	fs_cblkno;	/* offset of cyl-block in filesys */
-	daddr_t	fs_iblkno;	/* offset of inode-blocks in filesys */
-	daddr_t	fs_dblkno;	/* offset of first data after cg */
-	long	fs_cgoffset;	/* cylinder group offset in cylinder */
-	long	fs_cgmask;	/* used to calc mod fs_ntrak */
-	time_t 	fs_time;    	/* last time written */
-	long	fs_size;	/* number of blocks in fs */
-	long	fs_dsize;	/* number of data blocks in fs */
-	long	fs_ncg;	/* number of cylinder groups */
-	long	fs_bsize;	/* size of basic blocks in fs */
-	long	fs_fsize;	/* size of frag blocks in fs */
-	long	fs_frag;	/* number of frags in a block in fs */
-/* these are configuration parameters */
-	long	fs_minfree;	/* minimum percentage of free blocks */
-	long	fs_rotdelay;	/* num of ms for optimal next block */
-	long	fs_rps;	/* disk revolutions per second */
-/* these fields can be computed from the others */
-	long	fs_bmask;	/* ``blkoff'' calc of blk offsets */
-	long	fs_fmask;	/* ``fragoff'' calc of frag offsets */
-	long	fs_bshift;	/* ``lblkno'' calc of logical blkno */
-	long	fs_fshift;	/* ``numfrags'' calc number of frags */
-/* these are configuration parameters */
-	long	fs_maxcontig;	/* max number of contiguous blks */
-	long	fs_maxbpg;	/* max number of blks per cyl group */
-/* these fields can be computed from the others */
-	long	fs_fragshift;	/* block to frag shift */
-	long	fs_fsbtodb;	/* fsbtodb and dbtofsb shift constant */
-	long	fs_sbsize;	/* actual size of super block */
-	long	fs_csmask;	/* csum block offset */
-	long	fs_csshift;	/* csum block number */
-	long	fs_nindir;	/* value of NINDIR */
-	long	fs_inopb;	/* value of INOPB */
-	long	fs_nspf;	/* value of NSPF */
-/* yet another configuration parameter */
-	long	fs_optim;	/* optimization preference, see below */
-/* these fields are derived from the hardware */
-	long	fs_npsect;	/* # sectors/track including spares */
-	long	fs_interleave;	/* hardware sector interleave */
-	long	fs_trackskew;	/* sector 0 skew, per track */
-	long	fs_headswitch;	/* head switch time, usec */
-	long	fs_trkseek;	/* track-to-track seek, usec */
-/* sizes determined by number of cylinder groups and their sizes */
-	daddr_t fs_csaddr;	/* blk addr of cyl grp summary area */
-	long	fs_cssize;	/* size of cyl grp summary area */
-	long	fs_cgsize;	/* cylinder group size */
-/* these fields are derived from the hardware */
-	long	fs_ntrak;	/* tracks per cylinder */
-	long	fs_nsect;	/* sectors per track */
-	long  	fs_spc;   	/* sectors per cylinder */
-/* this comes from the disk driver partitioning */
-	long	fs_ncyl;   	/* cylinders in file system */
-/* these fields can be computed from the others */
-	long	fs_cpg;	/* cylinders per group */
-	long	fs_ipg;	/* inodes per group */
-	long	fs_fpg;	/* blocks per group * fs_frag */
-/* this data must be re-computed after crashes */
-	struct	csum fs_cstotal;	/* cylinder summary information */
-/* these fields are cleared at mount time */
-	char   	fs_fmod;    	/* super block modified flag */
-	char   	fs_clean;    	/* file system is clean flag */
-	char   	fs_ronly;   	/* mounted read-only flag */
-	char   	fs_flags;   	/* currently unused flag */
-	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
-/* these fields retain the current block allocation info */
-	long	fs_cgrotor;	/* last cg searched */
-	struct	csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */
-	long	fs_cpc;	/* cyl per cycle in postbl */
-	short	fs_opostbl[16][8];	/* old rotation block list head */
-	long	fs_sparecon[56];	/* reserved for future constants */
-	quad	fs_qbmask;	/* ~fs_bmask - for use with quad size */
-	quad	fs_qfmask;	/* ~fs_fmask - for use with quad size */
-	long	fs_postblformat; /* format of positional layout tables */
-	long	fs_nrpos;	/* number of rotational positions */
-	long	fs_postbloff;	/* (short) rotation block list head */
-	long	fs_rotbloff;	/* (u_char) blocks for each rotation */
-	long	fs_magic;	/* magic number */
-	u_char	fs_space[1];	/* list of blocks for each rotation */
-/* actually longer */
-};
-.Ed
-.Pp
-Each disk drive contains some number of file systems.
-A file system consists of a number of cylinder groups.
-Each cylinder group has inodes and data.
-.Pp
-A file system is described by its super-block, which in turn
-describes the cylinder groups.  The super-block is critical
-data and is replicated in each cylinder group to protect against
-catastrophic loss.  This is done at file system creation
-time and the critical
-super-block data does not change, so the copies need not be
-referenced further unless disaster strikes.
-.Pp
-Addresses stored in inodes are capable of addressing fragments
-of `blocks'. File system blocks of at most size
-.Dv MAXBSIZE
-can 
-be optionally broken into 2, 4, or 8 pieces, each of which is
-addressable; these pieces may be
-.Dv DEV_BSIZE ,
-or some multiple of
-a
-.Dv DEV_BSIZE
-unit.
-.Pp
-Large files consist of exclusively large data blocks.  To avoid
-undue wasted disk space, the last data block of a small file is
-allocated as only as many fragments of a large block as are
-necessary.  The file system format retains only a single pointer
-to such a fragment, which is a piece of a single large block that
-has been divided.  The size of such a fragment is determinable from
-information in the inode, using the
-.Fn blksize fs ip lbn
-macro.
-.Pp
-The file system records space availability at the fragment level;
-to determine block availability, aligned fragments are examined.
-.Pp
-The root inode is the root of the file system.
-Inode 0 can't be used for normal purposes and
-historically bad blocks were linked to inode 1,
-thus the root inode is 2 (inode 1 is no longer used for
-this purpose, however numerous dump tapes make this
-assumption, so we are stuck with it).
-.Pp
-The
-.Fa fs_minfree
-element gives the minimum acceptable percentage of file system
-blocks that may be free. If the freelist drops below this level
-only the super-user may continue to allocate blocks.
-The
-.Fa fs_minfree
-element
-may be set to 0 if no reserve of free blocks is deemed necessary,
-however severe performance degradations will be observed if the
-file system is run at greater than 90% full; thus the default
-value of
-.Fa fs_minfree
-is 10%.
-.Pp
-Empirically the best trade-off between block fragmentation and
-overall disk utilization at a loading of 90% comes with a
-fragmentation of 8, thus the default fragment size is an eighth
-of the block size.
-.Pp
-The element
-.Fa fs_optim
-specifies whether the file system should try to minimize the time spent
-allocating blocks, or if it should attempt to minimize the space
-fragmentation on the disk.
-If the value of fs_minfree (see above) is less than 10%,
-then the file system defaults to optimizing for space to avoid
-running out of full sized blocks.
-If the value of minfree is greater than or equal to 10%,
-fragmentation is unlikely to be problematical, and
-the file system defaults to optimizing for time.
-.Pp
-.Em Cylinder group related limits :
-Each cylinder keeps track of the availability of blocks at different
-rotational positions, so that sequential blocks can be laid out
-with minimum rotational latency. With the default of 8 distinguished
-rotational positions, the resolution of the
-summary information is 2ms for a typical 3600 rpm drive.
-.Pp
-The element
-.Fa fs_rotdelay
-gives the minimum number of milliseconds to initiate
-another disk transfer on the same cylinder.
-It is used in determining the rotationally optimal
-layout for disk blocks within a file;
-the default value for
-.Fa fs_rotdelay
-is 2ms.
-.Pp
-Each file system has a statically allocated number of inodes.
-An inode is allocated for each
-.Dv NBPI
-bytes of disk space.
-The inode allocation strategy is extremely conservative.
-.Pp
-.Dv MINBSIZE
-is the smallest allowable block size.
-With a
-.Dv MINBSIZE
-of 4096
-it is possible to create files of size
-2^32 with only two levels of indirection.
-.Dv MINBSIZE
-must be big enough to hold a cylinder group block,
-thus changes to
-.Pq Fa struct cg
-must keep its size within
-.Dv MINBSIZE .
-Note that super-blocks are never more than size
-.Dv SBSIZE .
-.Pp
-The path name on which the file system is mounted is maintained in
-.Fa fs_fsmnt .
-.Dv MAXMNTLEN
-defines the amount of space allocated in 
-the super-block for this name.
-The limit on the amount of summary information per file system
-is defined by
-.Dv MAXCSBUFS.
-For a 4096 byte block size, it is currently parameterized for a
-maximum of two million cylinders.
-.Pp
-Per cylinder group information is summarized in blocks allocated
-from the first cylinder group's data blocks. 
-These blocks are read in from
-.Fa fs_csaddr
-(size
-.Fa fs_cssize )
-in addition to the super-block.
-.Pp
-.Sy N.B.:
-.Xr sizeof Pq Fa struct csum
-must be a power of two in order for
-the
-.Fn fs_cs
-macro to work.
-.Pp
-The
-.Em "Super-block for a file system" :
-The size of the rotational layout tables
-is limited by the fact that the super-block is of size
-.Dv SBSIZE .
-The size of these tables is
-.Em inversely
-proportional to the block
-size of the file system. The size of the tables is
-increased when sector sizes are not powers of two,
-as this increases the number of cylinders
-included before the rotational pattern repeats
-.Pq Fa fs_cpc .
-The size of the rotational layout
-tables is derived from the number of bytes remaining in
-.Pq Fa struct fs .
-.Pp
-The number of blocks of data per cylinder group
-is limited because cylinder groups are at most one block.
-The inode and free block tables
-must fit into a single block after deducting space for
-the cylinder group structure
-.Pq Fa struct cg .
-.Pp
-The
-.Em Inode :
-The inode is the focus of all file activity in the
-file system.
-There is a unique inode allocated
-for each active file,
-each current directory, each mounted-on file,
-text file, and the root.
-An inode is `named' by its device/i-number pair.
-For further information, see the include file
-.Aq Pa sys/inode.h .
-.Sh HISTORY
-A super-block structure named filsys appeared in
-.At v6 .
-The file system described in this manual appeared
-in
-.Bx 4.2 .
diff --git a/bsd/man/man5/inode.5 b/bsd/man/man5/inode.5
deleted file mode 100644
index 1b47f6228..000000000
--- a/bsd/man/man5/inode.5
+++ /dev/null
@@ -1 +0,0 @@
-.so man5/fs.5
diff --git a/bsd/miscfs/Makefile b/bsd/miscfs/Makefile
index 009da4c3f..ece064108 100644
--- a/bsd/miscfs/Makefile
+++ b/bsd/miscfs/Makefile
@@ -13,8 +13,6 @@ INSTINC_SUBDIRS = \
 	specfs \
 	union
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
@@ -22,8 +20,6 @@ EXPINC_SUBDIRS = \
 	fifofs \
 	specfs
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 SETUP_SUBDIRS = 	\
diff --git a/bsd/miscfs/devfs/Makefile b/bsd/miscfs/devfs/Makefile
index bb2e43304..9d29f42e1 100644
--- a/bsd/miscfs/devfs/Makefile
+++ b/bsd/miscfs/devfs/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c
index 58aea8eb9..daf8c8ace 100644
--- a/bsd/miscfs/devfs/devfs_tree.c
+++ b/bsd/miscfs/devfs/devfs_tree.c
@@ -148,6 +148,7 @@ lck_grp_t	* devfs_lck_grp;
 lck_grp_attr_t	* devfs_lck_grp_attr;
 lck_attr_t	* devfs_lck_attr;
 lck_mtx_t  	  devfs_mutex;
+lck_mtx_t  	  devfs_attr_mutex;
 
 devdirent_t *		dev_root = NULL; 	/* root of backing tree */
 struct devfs_stats	devfs_stats;		/* hold stats */
@@ -185,6 +186,7 @@ devfs_sinit(void)
 	devfs_lck_attr = lck_attr_alloc_init();
 
 	lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr);
+	lck_mtx_init(&devfs_attr_mutex, devfs_lck_grp, devfs_lck_attr);
 
 	DEVFS_LOCK();
         error = dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, &dev_root);
diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c
index c5875bd55..f34edc2c5 100644
--- a/bsd/miscfs/devfs/devfs_vfsops.c
+++ b/bsd/miscfs/devfs/devfs_vfsops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,9 +93,10 @@
 static int devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, vfs_context_t ctx);
 static int devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx);
 
+#if !defined(SECURE_KERNEL)
 extern int setup_kmem;
 __private_extern__ void devfs_setup_kmem(void);
-
+#endif
 
 /*-
  * Called from the generic VFS startups.
@@ -114,9 +115,11 @@ devfs_init(__unused struct vfsconf *vfsp)
 		    UID_ROOT, GID_WHEEL, 0622, "console");
     devfs_make_node(makedev(2, 0), DEVFS_CHAR, 
 		    UID_ROOT, GID_WHEEL, 0666, "tty");
+#if !defined(SECURE_KERNEL)
     if (setup_kmem) {
     	devfs_setup_kmem();
     }
+#endif
     devfs_make_node(makedev(3, 2), DEVFS_CHAR, 
 		    UID_ROOT, GID_WHEEL, 0666, "null");
     devfs_make_node(makedev(3, 3), DEVFS_CHAR, 
@@ -131,6 +134,7 @@ devfs_init(__unused struct vfsconf *vfsp)
     return 0;
 }
 
+#if !defined(SECURE_KERNEL)
 __private_extern__ void
 devfs_setup_kmem(void)
 {
@@ -139,6 +143,7 @@ devfs_setup_kmem(void)
     	devfs_make_node(makedev(3, 1), DEVFS_CHAR, 
 		    UID_ROOT, GID_KMEM, 0640, "kmem");
 }
+#endif
 
 
 /*-
@@ -495,7 +500,7 @@ devfs_kernel_mount(char * mntname)
 	/*
 	 * Get vnode to be covered
 	 */
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+	NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 	    CAST_USER_ADDR_T(mntname), ctx);
 	if ((error = namei(&nd))) {
 	    printf("devfs_kernel_mount: failed to find directory '%s', %d", 
diff --git a/bsd/miscfs/devfs/devfs_vnops.c b/bsd/miscfs/devfs/devfs_vnops.c
index 58746bb73..207a50c01 100644
--- a/bsd/miscfs/devfs/devfs_vnops.c
+++ b/bsd/miscfs/devfs/devfs_vnops.c
@@ -110,9 +110,79 @@
 #include "fdesc.h"
 #endif /* FDESC */
 
-static int devfs_update(struct vnode *vp, struct timeval *access,
-                        struct timeval *modify);
-void	devfs_rele_node(devnode_t *);
+static int 		devfs_update(struct vnode *vp, struct timeval *access,
+                        	struct timeval *modify);
+void			devfs_rele_node(devnode_t *);
+static void		devfs_consider_time_update(devnode_t *dnp, uint32_t just_changed_flags);
+static boolean_t 	devfs_update_needed(long now_s, long last_s);
+void 			dn_times_locked(devnode_t * dnp, struct timeval *t1, struct timeval *t2, struct timeval *t3, uint32_t just_changed_flags);
+void			dn_times_now(devnode_t *dnp, uint32_t just_changed_flags);
+void			dn_mark_for_delayed_times_update(devnode_t *dnp, uint32_t just_changed_flags);
+
+void 
+dn_times_locked(devnode_t * dnp, struct timeval *t1, struct timeval *t2, struct timeval *t3, uint32_t just_changed_flags)
+{
+
+	lck_mtx_assert(&devfs_attr_mutex, LCK_MTX_ASSERT_OWNED);
+
+	if (just_changed_flags & DEVFS_UPDATE_ACCESS) {
+		dnp->dn_atime.tv_sec = t1->tv_sec;
+		dnp->dn_atime.tv_nsec = t1->tv_usec * 1000;
+		dnp->dn_access = 0;
+	} else if (dnp->dn_access) {
+		dnp->dn_atime.tv_sec = MIN(t1->tv_sec, dnp->dn_atime.tv_sec + DEVFS_LAZY_UPDATE_SECONDS);
+		dnp->dn_atime.tv_nsec = t1->tv_usec * 1000;
+		dnp->dn_access = 0;
+	}
+
+	if (just_changed_flags & DEVFS_UPDATE_MOD) {
+		dnp->dn_mtime.tv_sec = t2->tv_sec;
+		dnp->dn_mtime.tv_nsec = t2->tv_usec * 1000;
+		dnp->dn_update = 0;
+	} else if (dnp->dn_update) {
+		dnp->dn_mtime.tv_sec = MIN(t2->tv_sec, dnp->dn_mtime.tv_sec + DEVFS_LAZY_UPDATE_SECONDS);
+		dnp->dn_mtime.tv_nsec = t2->tv_usec * 1000;
+		dnp->dn_update = 0;
+	}
+
+	if (just_changed_flags & DEVFS_UPDATE_CHANGE) {
+		dnp->dn_ctime.tv_sec = t3->tv_sec;
+		dnp->dn_ctime.tv_nsec = t3->tv_usec * 1000;
+		dnp->dn_change = 0;
+	} else if (dnp->dn_change) {
+		dnp->dn_ctime.tv_sec = MIN(t3->tv_sec, dnp->dn_ctime.tv_sec + DEVFS_LAZY_UPDATE_SECONDS);
+		dnp->dn_ctime.tv_nsec = t3->tv_usec * 1000;
+		dnp->dn_change = 0;
+	}
+}
+
+void
+dn_mark_for_delayed_times_update(devnode_t *dnp, uint32_t just_changed_flags)
+{
+	if (just_changed_flags & DEVFS_UPDATE_CHANGE) {
+		dnp->dn_change = 1;
+	}
+	if (just_changed_flags & DEVFS_UPDATE_ACCESS) {
+		dnp->dn_access = 1;
+	}
+	if (just_changed_flags & DEVFS_UPDATE_MOD) {
+		dnp->dn_update = 1;
+	}
+}
+
+/*
+ * Update times based on pending updates and optionally a set of new changes.
+ */
+void
+dn_times_now(devnode_t * dnp, uint32_t just_changed_flags)
+{
+	struct timeval now;
+
+	DEVFS_ATTR_LOCK_SPIN();
+	microtime(&now);
+	dn_times_locked(dnp, &now, &now, &now, just_changed_flags);
+	DEVFS_ATTR_UNLOCK();
+}
 
 
 /*
@@ -353,9 +423,6 @@ devfs_getattr(struct vnop_getattr_args *ap)
 	DEVFS_LOCK();
 	file_node = VTODN(vp);
 
-	microtime(&now);
-	dn_times(file_node, &now, &now, &now);
-
 	VATTR_RETURN(vap, va_mode, file_node->dn_mode);
 
 	/*
@@ -402,6 +469,13 @@ devfs_getattr(struct vnop_getattr_args *ap)
 		VATTR_RETURN(vap, va_iosize, MAXPHYSIO);
 	else
 		VATTR_RETURN(vap, va_iosize, vp->v_mount->mnt_vfsstat.f_iosize);
+
+
+	DEVFS_ATTR_LOCK_SPIN();
+
+	microtime(&now);
+	dn_times_locked(file_node, &now, &now, &now, 0);
+
 	/* if the time is bogus, set it to the boot time */
 	if (file_node->dn_ctime.tv_sec == 0) {
 		file_node->dn_ctime.tv_sec = boottime_sec();
@@ -414,6 +488,9 @@ devfs_getattr(struct vnop_getattr_args *ap)
 	VATTR_RETURN(vap, va_change_time, file_node->dn_ctime);
 	VATTR_RETURN(vap, va_modify_time, file_node->dn_mtime);
 	VATTR_RETURN(vap, va_access_time, file_node->dn_atime);
+
+	DEVFS_ATTR_UNLOCK();
+
 	VATTR_RETURN(vap, va_gen, 0);
 	VATTR_RETURN(vap, va_filerev, 0);
 	VATTR_RETURN(vap, va_acl, NULL);
@@ -557,13 +634,11 @@ devfs_close(struct vnop_close_args *ap)
 {
     	struct vnode *	    	vp = ap->a_vp;
 	register devnode_t * 	dnp;
-	struct timeval now;
 
 	if (vnode_isinuse(vp, 1)) {
 	    DEVFS_LOCK();
 	    dnp = VTODN(vp);
-	    microtime(&now);
-	    dn_times(dnp, &now, &now, &now);
+	    dn_times_now(dnp, 0);
 	    DEVFS_UNLOCK();
 	}
 	return (0);
@@ -579,19 +654,68 @@ devfsspec_close(struct vnop_close_args *ap)
 {
     	struct vnode *	    	vp = ap->a_vp;
 	register devnode_t * 	dnp;
-	struct timeval now;
 
 	if (vnode_isinuse(vp, 0)) {
 	    DEVFS_LOCK();
-	    microtime(&now);
 	    dnp = VTODN(vp);
-	    dn_times(dnp, &now, &now, &now);
+	    dn_times_now(dnp, 0);
 	    DEVFS_UNLOCK();
 	}
 
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap));
 }
 
+static boolean_t
+devfs_update_needed(long now_s, long last_s)
+{
+	if (now_s > last_s) {
+		if (now_s - last_s >= DEVFS_LAZY_UPDATE_SECONDS) {
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+/*
+ * Given a set of time updates required [to happen at some point], check
+ * either make those changes (and resolve other pending updates) or mark
+ * the devnode for a subsequent update.
+ */
+static void
+devfs_consider_time_update(devnode_t *dnp, uint32_t just_changed_flags)
+{
+	struct timeval 		now;
+	long now_s;
+
+	microtime(&now);
+	now_s = now.tv_sec;
+
+	if (dnp->dn_change || (just_changed_flags & DEVFS_UPDATE_CHANGE)) {
+		if (devfs_update_needed(now_s, dnp->dn_ctime.tv_sec)) {
+			dn_times_now(dnp, just_changed_flags);
+			return;
+		}
+	}
+	if (dnp->dn_access || (just_changed_flags & DEVFS_UPDATE_ACCESS)) {
+		if (devfs_update_needed(now_s, dnp->dn_atime.tv_sec)) {
+			dn_times_now(dnp, just_changed_flags);
+			return;
+		}
+	}
+	if (dnp->dn_update || (just_changed_flags & DEVFS_UPDATE_MOD)) {
+		if (devfs_update_needed(now_s, dnp->dn_mtime.tv_sec)) {
+			dn_times_now(dnp, just_changed_flags);
+			return;
+		}
+	}
+
+	/* Not going to do anything now--mark for later update */
+	dn_mark_for_delayed_times_update(dnp, just_changed_flags);
+
+	return;
+}
+
 static int
 devfsspec_read(struct vnop_read_args *ap)
         /* struct vnop_read_args {
@@ -603,7 +727,7 @@ devfsspec_read(struct vnop_read_args *ap)
 {
 	register devnode_t * 	dnp = VTODN(ap->a_vp);
 
-	dnp->dn_access = 1;
+	devfs_consider_time_update(dnp, DEVFS_UPDATE_ACCESS);
 
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap));
 }
@@ -619,8 +743,7 @@ devfsspec_write(struct vnop_write_args *ap)
 {
 	register devnode_t * 	dnp = VTODN(ap->a_vp);
 
-	dnp->dn_change = 1;
-	dnp->dn_update = 1;
+	devfs_consider_time_update(dnp, DEVFS_UPDATE_CHANGE | DEVFS_UPDATE_MOD);
 
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap));
 }
@@ -704,8 +827,7 @@ devfs_vnop_remove(struct vnop_remove_args *ap)
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
-	tdp->dn_change = 1;
-	tdp->dn_update = 1;
+	devfs_consider_time_update(tdp, DEVFS_UPDATE_CHANGE | DEVFS_UPDATE_MOD);
 
 	/*
 	 * Target must be empty if a directory and have no links
@@ -741,7 +863,6 @@ devfs_link(struct vnop_link_args *ap)
 	devnode_t * tdp;
 	devdirent_t * tnp;
 	int error = 0;
-	struct timeval now;
 
 	/*
 	 * First catch an arbitrary restriction for this FS
@@ -770,10 +891,7 @@ devfs_link(struct vnop_link_args *ap)
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
-	fp->dn_change = 1;
-
-	microtime(&now);
-	error = devfs_update(vp, &now, &now);
+	dn_times_now(fp, DEVFS_UPDATE_CHANGE);
 
 	if (!error) {
 	    error = dev_add_name(cnp->cn_nameptr, tdp, NULL, fp, &tnp);
@@ -833,7 +951,6 @@ devfs_rename(struct vnop_rename_args *ap)
 	devdirent_t *fnp,*tnp;
 	int doingdirectory = 0;
 	int error = 0;
-	struct timeval now;
 
 	DEVFS_LOCK();
 	/*
@@ -914,12 +1031,8 @@ devfs_rename(struct vnop_rename_args *ap)
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
-	fp->dn_change = 1;
-	microtime(&now);
+	dn_times_now(fp, DEVFS_UPDATE_CHANGE);
 
-	if ( (error = devfs_update(fvp, &now, &now)) ) {
-	    goto out;
-	}
 	/*
 	 * Check if just deleting a link name.
 	 */
@@ -1192,8 +1305,6 @@ devfs_readdir(struct vnop_readdir_args *ap)
 	name_node = dir_node->dn_typeinfo.Dir.dirlist;
 	nodenumber = 0;
 
-	dir_node->dn_access = 1;
-
 	while ((name_node || (nodenumber < 2)) && (uio_resid(uio) > 0))
 	{
 		switch(nodenumber)
@@ -1256,6 +1367,8 @@ devfs_readdir(struct vnop_readdir_args *ap)
 	DEVFS_UNLOCK();
 	uio->uio_offset = pos;
 
+	devfs_consider_time_update(dir_node, DEVFS_UPDATE_ACCESS);
+
 	return (error);
 }
 
@@ -1405,8 +1518,11 @@ devfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify)
 
 		return (0);
 	}
+
+	DEVFS_ATTR_LOCK_SPIN();
 	microtime(&now);
-	dn_times(ip, access, modify, &now);
+	dn_times_locked(ip, access, modify, &now, DEVFS_UPDATE_ACCESS | DEVFS_UPDATE_MOD);
+	DEVFS_ATTR_UNLOCK();
 
 	return (0);
 }
diff --git a/bsd/miscfs/devfs/devfsdefs.h b/bsd/miscfs/devfs/devfsdefs.h
index ce85cf853..e8b12000a 100644
--- a/bsd/miscfs/devfs/devfsdefs.h
+++ b/bsd/miscfs/devfs/devfsdefs.h
@@ -178,6 +178,7 @@ struct devdirent
 extern devdirent_t * 		dev_root;
 extern struct devfs_stats	devfs_stats;
 extern lck_mtx_t	  	devfs_mutex;
+extern lck_mtx_t	  	devfs_attr_mutex;
 
 /*
  * Rules for front nodes:
@@ -214,9 +215,10 @@ struct devfsmount
 #define VTODN(vp)	((devnode_t *)(vp)->v_data)
 
 #define DEVFS_LOCK()	lck_mtx_lock(&devfs_mutex)
-
 #define DEVFS_UNLOCK()	lck_mtx_unlock(&devfs_mutex)
 
+#define DEVFS_ATTR_LOCK_SPIN()	lck_mtx_lock_spin(&devfs_attr_mutex);
+#define DEVFS_ATTR_UNLOCK()	lck_mtx_unlock(&devfs_attr_mutex);
 
 /*
  * XXX all the (SInt32 *) casts below assume sizeof(int) == sizeof(long)
@@ -269,34 +271,32 @@ DEVFS_DECR_STRINGSPACE(int space)
     OSAddAtomic(-space, &devfs_stats.stringspace);
 }
 
-static __inline__ void
-dn_times(devnode_t * dnp, struct timeval *t1, struct timeval *t2, struct timeval *t3) 
-{
-	if (dnp->dn_access) {
-	    dnp->dn_atime.tv_sec = t1->tv_sec;
-	    dnp->dn_atime.tv_nsec = t1->tv_usec * 1000;
-	    dnp->dn_access = 0;
-	}
-	if (dnp->dn_update) {
-	    dnp->dn_mtime.tv_sec = t2->tv_sec;
-	    dnp->dn_mtime.tv_nsec = t2->tv_usec * 1000;
-	    dnp->dn_update = 0;
-	}
-	if (dnp->dn_change) {
-	    dnp->dn_ctime.tv_sec = t3->tv_sec;
-	    dnp->dn_ctime.tv_nsec = t3->tv_usec * 1000;
-	    dnp->dn_change = 0;
-	}
-
-	return;
-}
+/* 
+ * Access, change, and modify times are protected by a separate lock,
+ * which allows tty times to be updated (no more than once per second)
+ * in the I/O path without too much fear of contention.
+ *
+ * For getattr, update times to current time if the last update was recent; 
+ * preserve  legacy behavior that frequent stats can yield sub-second resolutions.  
+ * If the last time is old, however, we know that the event that triggered
+ * the need for an update was no more than 1s after the last update.  In that case,
+ * use (last update + 1s) as the time, avoiding the illusion that last update happened
+ * much later than it really did.
+ */
+#define DEVFS_LAZY_UPDATE_SECONDS	1
+
+#define DEVFS_UPDATE_CHANGE		0x1
+#define DEVFS_UPDATE_MOD		0x2
+#define DEVFS_UPDATE_ACCESS		0x4
 
 static __inline__ void
 dn_copy_times(devnode_t * target, devnode_t * source)
 {
+    DEVFS_ATTR_LOCK_SPIN();
     target->dn_atime = source->dn_atime;
     target->dn_mtime = source->dn_mtime;
     target->dn_ctime = source->dn_ctime;
+    DEVFS_ATTR_UNLOCK();
     return;
 }
 
diff --git a/bsd/miscfs/fifofs/Makefile b/bsd/miscfs/fifofs/Makefile
index ff18c9388..d70a3ab16 100644
--- a/bsd/miscfs/fifofs/Makefile
+++ b/bsd/miscfs/fifofs/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/miscfs/nullfs/null.h b/bsd/miscfs/nullfs/null.h
deleted file mode 100644
index 3209be3d9..000000000
--- a/bsd/miscfs/nullfs/null.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)null.h	8.3 (Berkeley) 8/20/94
- *
- *	null.h	8.2 (Berkeley) 1/21/94
- */
-#ifdef __NULLFS_NULL_H__
-#define __NULLFS_NULL_H__
-
-#include  <sys/appleapiopts.h>
-
-#ifdef __APPLE_API_PRIVATE
-struct null_args {
-	char		*target;	/* Target of loopback  */
-};
-
-struct null_mount {
-	struct mount	*nullm_vfs;
-	struct vnode	*nullm_rootvp;	/* Reference to root null_node */
-};
-
-#ifdef KERNEL
-/* LP64 version of null_args.  all pointers 
- * grow when we're dealing with a 64-bit process.
- * WARNING - keep in sync with null_args
- */
-struct user_null_args {
-	user_addr_t		target;	/* Target of loopback  */
-};
-
-/*
- * A cache of vnode references
- */
-struct null_node {
-	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
-	struct vnode	        *null_lowervp;	/* VREFed once */
-	struct vnode		*null_vnode;	/* Back pointer */
-};
-
-extern int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp);
-
-#define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
-#define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
-#define	NULLTOV(xp) ((xp)->null_vnode)
-#ifdef NULLFS_DIAGNOSTIC
-extern struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
-#define	NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
-#else
-#define	NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
-#endif
-
-extern int (**null_vnodeop_p)(void *);
-extern struct vfsops null_vfsops;
-#endif /* KERNEL */
-
-#endif /* __APPLE_API_PRIVATE */
-#endif /* __NULLFS_NULL_H__ */
diff --git a/bsd/miscfs/nullfs/null_subr.c b/bsd/miscfs/nullfs/null_subr.c
deleted file mode 100644
index d061bb77f..000000000
--- a/bsd/miscfs/nullfs/null_subr.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
- *
- *	null_subr.c	8.4 (Berkeley) 1/21/94
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/vnode.h>
-#include <sys/mount_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/ubc.h>
-#include <miscfs/nullfs/null.h>
-
-#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
-#define	NNULLNODECACHE 16
-
-/*
- * Null layer cache:
- * Each cache entry holds a reference to the lower vnode
- * along with a pointer to the alias vnode.  When an
- * entry is added the lower vnode is vnode_get'd.  When the
- * alias is removed the lower vnode is vnode_put'd.
- */
-
-#define	NULL_NHASH(vp) \
-	(&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
-LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
-u_long null_node_hash;
-
-/*
- * Initialise cache headers
- */
-nullfs_init()
-{
-
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_init\n");		/* printed during system boot */
-#endif
-	null_node_hashtbl = hashinit(NNULLNODECACHE, M_CACHE, &null_node_hash);
-}
-
-/*
- * Return a vnode_get'ed alias for lower vnode if already exists, else 0.
- */
-static struct vnode *
-null_node_find(mp, lowervp)
-	struct mount *mp;
-	struct vnode *lowervp;
-{
-	struct proc *p = curproc;	/* XXX */
-	struct null_node_hashhead *hd;
-	struct null_node *a;
-	struct vnode *vp;
-
-	/*
-	 * Find hash base, and then search the (two-way) linked
-	 * list looking for a null_node structure which is referencing
-	 * the lower vnode.  If found, the increment the null_node
-	 * reference count (but NOT the lower vnode's vnode_get counter).
-	 */
-	hd = NULL_NHASH(lowervp);
-loop:
-	for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) {
-		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
-			vp = NULLTOV(a);
-
-			if (vnode_get(vp)) {
-				printf ("null_node_find: vget failed.\n");
-				goto loop;
-			};
-			return (vp);
-		}
-	}
-
-	return NULL;
-}
-
-
-/*
- * Make a new null_node node.
- * Vp is the alias vnode, lofsvp is the lower vnode.
- * Maintain a reference to (lowervp).
- */
-static int
-null_node_alloc(mp, lowervp, vpp)
-	struct mount *mp;
-	struct vnode *lowervp;
-	struct vnode **vpp;
-{
-	struct null_node_hashhead *hd;
-	struct null_node *xp;
-	struct vnode *othervp, *vp;
-	int error;
-
-	MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
-	if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp)) {
-		FREE(xp, M_TEMP);
-		return (error);
-	}
-	vp = *vpp;
-
-	vp->v_type = lowervp->v_type;
-	xp->null_vnode = vp;
-	vp->v_data = xp;
-	xp->null_lowervp = lowervp;
-	/*
-	 * Before we insert our new node onto the hash chains,
-	 * check to see if someone else has beaten us to it.
-	 */
-	if (othervp = null_node_find(lowervp)) {
-		FREE(xp, M_TEMP);
-		vp->v_type = VBAD;	/* node is discarded */
-		vp->v_usecount = 0;	/* XXX */
-		vp->v_data = 0; /* prevent access to freed data */
-		*vpp = othervp;
-		return 0;
-	};
-	if (vp->v_type == VREG)
-		ubc_info_init(vp);
-	vnode_get(lowervp);   /* Extra vnode_get will be vnode_put'd in null_node_create */
-	hd = NULL_NHASH(lowervp);
-	LIST_INSERT_HEAD(hd, xp, null_hash);
-	return 0;
-}
-
-
-/*
- * Try to find an existing null_node vnode refering
- * to it, otherwise make a new null_node vnode which
- * contains a reference to the lower vnode.
- */
-int
-null_node_create(mp, lowervp, newvpp)
-	struct mount *mp;
-	struct vnode *lowervp;
-	struct vnode **newvpp;
-{
-	struct vnode *aliasvp;
-
-	if (aliasvp = null_node_find(mp, lowervp)) {
-		/*
-		 * null_node_find has taken another reference
-		 * to the alias vnode.
-		 */
-#ifdef NULLFS_DIAGNOSTIC
-		vprint("null_node_create: exists", NULLTOV(ap));
-#endif
-		/* vnode_get(aliasvp); --- done in null_node_find */
-	} else {
-		int error;
-
-		/*
-		 * Get new vnode.
-		 */
-#ifdef NULLFS_DIAGNOSTIC
-		printf("null_node_create: create new alias vnode\n");
-#endif
-
-		/*
-		 * Make new vnode reference the null_node.
-		 */
-		if (error = null_node_alloc(mp, lowervp, &aliasvp))
-			return error;
-
-		/*
-		 * aliasvp is already vnode_get'd by getnewvnode()
-		 */
-	}
-
-	vnode_put(lowervp);
-
-#if DIAGNOSTIC
-	if (lowervp->v_usecount < 1) {
-		/* Should never happen... */
-		vprint ("null_node_create: alias ", aliasvp);
-		vprint ("null_node_create: lower ", lowervp);
-		panic ("null_node_create: lower has 0 usecount.");
-	};
-#endif
-
-#ifdef NULLFS_DIAGNOSTIC
-	vprint("null_node_create: alias", aliasvp);
-	vprint("null_node_create: lower", lowervp);
-#endif
-
-	*newvpp = aliasvp;
-	return (0);
-}
-#ifdef NULLFS_DIAGNOSTIC
-struct vnode *
-null_checkvp(vp, fil, lno)
-	struct vnode *vp;
-	char *fil;
-	int lno;
-{
-	struct null_node *a = VTONULL(vp);
-#ifdef notyet
-	/*
-	 * Can't do this check because vnop_reclaim runs
-	 * with a funny vop vector.
-	 */
-	if (vp->v_op != null_vnodeop_p) {
-		printf ("null_checkvp: on non-null-node\n");
-		while (null_checkvp_barrier) /*WAIT*/ ;
-		panic("null_checkvp");
-	};
-#endif
-	if (a->null_lowervp == NULL) {
-		/* Should never happen */
-		int i; uint32_t *p;
-		printf("vp = %x, ZERO ptr\n", vp);
-		for (p = (uint32_t *) a, i = 0; i < 8; i++)
-			printf(" %x", p[i]);
-		printf("\n");
-		/* wait for debugger */
-		while (null_checkvp_barrier) /*WAIT*/ ;
-		panic("null_checkvp");
-	}
-	if (a->null_lowervp->v_usecount < 1) {
-		int i; uint32_t *p;
-		printf("vp = %x, unref'ed lowervp\n", vp);
-		for (p = (uint32_t *) a, i = 0; i < 8; i++)
-			printf(" %x", p[i]);
-		printf("\n");
-		/* wait for debugger */
-		while (null_checkvp_barrier) /*WAIT*/ ;
-		panic ("null with unref'ed lowervp");
-	};
-#ifdef notyet
-	printf("null %x/%d -> %x/%d [%s, %d]\n",
-	        NULLTOV(a), NULLTOV(a)->v_usecount,
-		a->null_lowervp, a->null_lowervp->v_usecount,
-		fil, lno);
-#endif
-	return a->null_lowervp;
-}
-#endif
diff --git a/bsd/miscfs/nullfs/null_vfsops.c b/bsd/miscfs/nullfs/null_vfsops.c
deleted file mode 100644
index e81c64059..000000000
--- a/bsd/miscfs/nullfs/null_vfsops.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)null_vfsops.c	8.7 (Berkeley) 5/14/95
- *
- * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
- */
-
-/*
- * Null Layer
- * (See null_vnops.c for a description of what this does.)
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/kauth.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/vnode.h>
-#include <sys/mount_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <miscfs/nullfs/null.h>
-
-/*
- * Mount null layer
- */
-static int
-nullfs_mount(mp, devvp, data, context)
-	struct mount *mp;
-	vnode_t devvp;
-	user_addr_t data;
-	vfs_context_t context;
-{
-	int error = 0;
-	struct user_null_args args;
-	struct vnode *lowerrootvp, *vp;
-	struct vnode *nullm_rootvp;
-	struct null_mount *xmp;
-	u_int size;
-
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_mount(mp = %x)\n", mp);
-#endif
-
-	/*
-	 * Update is a no-op
-	 */
-	if (mp->mnt_flag & MNT_UPDATE) {
-		return (ENOTSUP);
-		/* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, devvp, data,  p);*/
-	}
-
-	/*
-	 * Get argument
-	 */
-	if (vfs_context_is64bit(context)) {
-		error = copyin(data, (caddr_t)&args, sizeof (args));
-	}
-	else {
-		struct null_args temp;
-		error = copyin(data, (caddr_t)&temp, sizeof (temp));
-		args.target = CAST_USER_ADDR_T(temp.target);
-	}
-	if (error)
-		return (error);
-
-	/*
-	 * Find lower node
-	 */
-	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
-		UIO_USERSPACE, args.target, context);
-	if (error = namei(ndp))
-		return (error);
-	nameidone(ndp);
-	/*
-	 * Sanity check on lower vnode
-	 */
-	lowerrootvp = ndp->ni_vp;
-
-	vnode_put(ndp->ni_dvp);
-	ndp->ni_dvp = NULL;
-
-	xmp = (struct null_mount *) _MALLOC(sizeof(struct null_mount),
-				M_UFSMNT, M_WAITOK);	/* XXX */
-
-	/*
-	 * Save reference to underlying FS
-	 */
-	xmp->nullm_vfs = lowerrootvp->v_mount;
-
-	/*
-	 * Save reference.  Each mount also holds
-	 * a reference on the root vnode.
-	 */
-	error = null_node_create(mp, lowerrootvp, &vp);
-	/*
-	 * Make sure the node alias worked
-	 */
-	if (error) {
-		vnode_put(lowerrootvp);
-		FREE(xmp, M_UFSMNT);	/* XXX */
-		return (error);
-	}
-
-	/*
-	 * Keep a held reference to the root vnode.
-	 * It is vnode_put'd in nullfs_unmount.
-	 */
-	nullm_rootvp = vp;
-	nullm_rootvp->v_flag |= VROOT;
-	xmp->nullm_rootvp = nullm_rootvp;
-	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
-		mp->mnt_flag |= MNT_LOCAL;
-	mp->mnt_data = (qaddr_t) xmp;
-	vfs_getnewfsid(mp);
-
-	(void) copyinstr(args.target, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 
-	    &size);
-	bzero(mp->mnt_vfsstat.f_mntfromname + size, MNAMELEN - size);
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_mount: lower %s, alias at %s\n",
-		mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
-#endif
-	return (0);
-}
-
-/*
- * VFS start.  Nothing needed here - the start routine
- * on the underlying filesystem will have been called
- * when that filesystem was mounted.
- */
-static int
-nullfs_start(mp, flags, context)
-	struct mount *mp;
-	int flags;
-	vfs_context_t context;
-{
-	return (0);
-	/* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, context); */
-}
-
-/*
- * Free reference to null layer
- */
-static int
-nullfs_unmount(mp, mntflags, context)
-	struct mount *mp;
-	int mntflags;
-	vfs_context_t context;
-{
-	struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
-	int error;
-	int flags = 0;
-	int force = 0;
-
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_unmount(mp = %x)\n", mp);
-#endif
-
-	if (mntflags & MNT_FORCE) {
-		flags |= FORCECLOSE;
-		force = 1;
-	}
-
-	if ( (nullm_rootvp->v_usecount > 1) && !force )
-		return (EBUSY);
-	if ( (error = vflush(mp, nullm_rootvp, flags)) && !force )
-		return (error);
-
-#ifdef NULLFS_DIAGNOSTIC
-	vprint("alias root of lower", nullm_rootvp);
-#endif	 
-	/*
-	 * Release reference on underlying root vnode
-	 */
-	vnode_put(nullm_rootvp);
-	/*
-	 * And blow it away for future re-use
-	 */
-	vnode_reclaim(nullm_rootvp);
-	/*
-	 * Finally, throw away the null_mount structure
-	 */
-	FREE(mp->mnt_data, M_UFSMNT);	/* XXX */
-	mp->mnt_data = 0;
-	return 0;
-}
-
-static int
-nullfs_root(mp, vpp, context)
-	struct mount *mp;
-	struct vnode **vpp;
-	vfs_context_t context;
-{
-	struct proc *p = curproc;	/* XXX */
-	struct vnode *vp;
-
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
-			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
-			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
-			);
-#endif
-
-	/*
-	 * Return locked reference to root.
-	 */
-	vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
-	vnode_get(vp);
-	*vpp = vp;
-	return 0;
-}
-
-static int
-nullfs_quotactl(mp, cmd, uid, datap, context)
-	struct mount *mp;
-	int cmd;
-	uid_t uid;
-	caddr_t datap;
-	vfs_context_t context;
-{
-	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, datap, context);
-}
-
-static int
-nullfs_statfs(mp, sbp, context)
-	struct mount *mp;
-	struct vfsstatfs *sbp;
-	vfs_context_t context;
-{
-	int error;
-	struct vfsstatfs mstat;
-
-#ifdef NULLFS_DIAGNOSTIC
-	printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
-			MOUNTTONULLMOUNT(mp)->nullm_rootvp,
-			NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
-			);
-#endif
-
-	bzero(&mstat, sizeof(mstat));
-
-	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, context);
-	if (error)
-		return (error);
-
-	/* now copy across the "interesting" information and fake the rest */
-	//sbp->f_type = mstat.f_type;
-	sbp->f_flags = mstat.f_flags;
-	sbp->f_bsize = mstat.f_bsize;
-	sbp->f_iosize = mstat.f_iosize;
-	sbp->f_blocks = mstat.f_blocks;
-	sbp->f_bfree = mstat.f_bfree;
-	sbp->f_bavail = mstat.f_bavail;
-	sbp->f_files = mstat.f_files;
-	sbp->f_ffree = mstat.f_ffree;
-	return (0);
-}
-
-static int
-nullfs_sync(__unused struct mount *mp, __unused int waitfor,
-	__unused kauth_cred_t cred, __unused vfs_context_t context)
-{
-	/*
-	 * XXX - Assumes no data cached at null layer.
-	 */
-	return (0);
-}
-
-static int
-nullfs_vget(mp, ino, vpp, context)
-	struct mount *mp;
-	ino64_t ino;
-	struct vnode **vpp;
-	vfs_context_t context;
-{
-	
-	return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp, context);
-}
-
-static int
-nullfs_fhtovp(mp, fhlen, fhp, vpp, context)
-	struct mount *mp;
-	int fhlen;
-	unsigned char *fhp;
-	struct vnode **vpp;
-	vfs_context_t context;
-{
-
-	return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fhlen, fhp, vpp, context);
-}
-
-static int
-nullfs_vptofh(vp, fhlenp, fhp, context)
-	struct vnode *vp;
-	int *fhlenp;
-	unsigned char *fhp;
-	vfs_context_t context;
-{
-	return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhlenp, fhp, context);
-}
-
-int nullfs_init (struct vfsconf *);
-
-#define nullfs_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, proc_t))eopnotsupp
-
-struct vfsops null_vfsops = {
-	nullfs_mount,
-	nullfs_start,
-	nullfs_unmount,
-	nullfs_root,
-	nullfs_quotactl,
-	nullfs_statfs,
-	nullfs_sync,
-	nullfs_vget,
-	nullfs_fhtovp,
-	nullfs_vptofh,
-	nullfs_init,
-	nullfs_sysctl
-};
diff --git a/bsd/miscfs/nullfs/null_vnops.c b/bsd/miscfs/nullfs/null_vnops.c
deleted file mode 100644
index 4b2fb2bbf..000000000
--- a/bsd/miscfs/nullfs/null_vnops.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * John Heidemann of the UCLA Ficus project.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)null_vnops.c	8.6 (Berkeley) 5/27/95
- *
- * Ancestors:
- *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
- *	...and...
- *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
- */
-
-/*
- * Null Layer
- *
- * (See mount_null(8) for more information.)
- *
- * The null layer duplicates a portion of the file system
- * name space under a new name.  In this respect, it is
- * similar to the loopback file system.  It differs from
- * the loopback fs in two respects:  it is implemented using
- * a stackable layers techniques, and it's "null-node"s stack above
- * all lower-layer vnodes, not just over directory vnodes.
- *
- * The null layer has two purposes.  First, it serves as a demonstration
- * of layering by proving a layer which does nothing.  (It actually
- * does everything the loopback file system does, which is slightly
- * more than nothing.)  Second, the null layer can serve as a prototype
- * layer.  Since it provides all necessary layer framework,
- * new file system layers can be created very easily be starting
- * with a null layer.
- *
- * The remainder of this man page examines the null layer as a basis
- * for constructing new layers.
- *
- *
- * INSTANTIATING NEW NULL LAYERS
- *
- * New null layers are created with mount_null(8).
- * Mount_null(8) takes two arguments, the pathname
- * of the lower vfs (target-pn) and the pathname where the null
- * layer will appear in the namespace (alias-pn).  After
- * the null layer is put into place, the contents
- * of target-pn subtree will be aliased under alias-pn.
- *
- *
- * OPERATION OF A NULL LAYER
- *
- * The null layer is the minimum file system layer,
- * simply bypassing all possible operations to the lower layer
- * for processing there.  The majority of its activity centers
- * on the bypass routine, though which nearly all vnode operations
- * pass.
- *
- * The bypass routine accepts arbitrary vnode operations for
- * handling by the lower layer.  It begins by examing vnode
- * operation arguments and replacing any null-nodes by their
- * lower-layer equivlants.  It then invokes the operation
- * on the lower layer.  Finally, it replaces the null-nodes
- * in the arguments and, if a vnode is return by the operation,
- * stacks a null-node on top of the returned vnode.
- *
- * Although bypass handles most operations, vnop_getattr, vnop_lock,
- * vnop_unlock, vnop_inactive, vnop_reclaim, and vnop_print are not
- * bypassed. Vop_getattr must change the fsid being returned.
- * Vop_lock and vnop_unlock must handle any locking for the
- * current vnode as well as pass the lock request down.
- * Vop_inactive and vnop_reclaim are not bypassed so that
- * they can handle freeing null-layer specific data. Vop_print
- * is not bypassed to avoid excessive debugging information.
- * Also, certain vnode operations change the locking state within
- * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
- * and symlink). Ideally these operations should not change the
- * lock state, but should be changed to let the caller of the
- * function unlock them. Otherwise all intermediate vnode layers
- * (such as union, umapfs, etc) must catch these functions to do
- * the necessary locking at their layer.
- *
- *
- * INSTANTIATING VNODE STACKS
- *
- * Mounting associates the null layer with a lower layer,
- * effect stacking two VFSes.  Vnode stacks are instead
- * created on demand as files are accessed.
- *
- * The initial mount creates a single vnode stack for the
- * root of the new null layer.  All other vnode stacks
- * are created as a result of vnode operations on
- * this or other null vnode stacks.
- *
- * New vnode stacks come into existance as a result of
- * an operation which returns a vnode.  
- * The bypass routine stacks a null-node above the new
- * vnode before returning it to the caller.
- *
- * For example, imagine mounting a null layer with
- * "mount_null /usr/include /dev/layer/null".
- * Changing directory to /dev/layer/null will assign
- * the root null-node (which was created when the null layer was mounted).
- * Now consider opening "sys".  A vnop_lookup would be
- * done on the root null-node.  This operation would bypass through
- * to the lower layer which would return a vnode representing 
- * the UFS "sys".  Null_bypass then builds a null-node
- * aliasing the UFS "sys" and returns this to the caller.
- * Later operations on the null-node "sys" will repeat this
- * process when constructing other vnode stacks.
- *
- *
- * CREATING OTHER FILE SYSTEM LAYERS
- *
- * One of the easiest ways to construct new file system layers is to make
- * a copy of the null layer, rename all files and variables, and
- * then begin modifing the copy.  Sed can be used to easily rename
- * all variables.
- *
- * The umap layer is an example of a layer descended from the 
- * null layer.
- *
- *
- * INVOKING OPERATIONS ON LOWER LAYERS
- *
- * There are two techniques to invoke operations on a lower layer 
- * when the operation cannot be completely bypassed.  Each method
- * is appropriate in different situations.  In both cases,
- * it is the responsibility of the aliasing layer to make
- * the operation arguments "correct" for the lower layer
- * by mapping an vnode arguments to the lower layer.
- *
- * The first approach is to call the aliasing layer's bypass routine.
- * This method is most suitable when you wish to invoke the operation
- * currently being hanldled on the lower layer.  It has the advantage
- * that the bypass routine already must do argument mapping.
- * An example of this is null_getattrs in the null layer.
- *
- * A second approach is to directly invoked vnode operations on
- * the lower layer with the VOP_OPERATIONNAME interface.
- * The advantage of this method is that it is easy to invoke
- * arbitrary operations on the lower layer.  The disadvantage
- * is that vnodes arguments must be manualy mapped.
- *
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/kauth.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/vnode.h>
-#include <sys/mount_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/buf.h>
-#include <miscfs/nullfs/null.h>
-
-
-int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
-
-/*
- * This is the 10-Apr-92 bypass routine.
- *    This version has been optimized for speed, throwing away some
- * safety checks.  It should still always work, but it's not as
- * robust to programmer errors.
- *    Define SAFETY to include some error checking code.
- *
- * In general, we map all vnodes going down and unmap them on the way back.
- * As an exception to this, vnodes can be marked "unmapped" by setting
- * the Nth bit in operation's vdesc_flags.
- *
- * Also, some BSD vnode operations have the side effect of node_put'ing
- * their arguments.  With stacking, the reference counts are held
- * by the upper node, not the lower one, so we must handle these
- * side-effects here.  This is not of concern in Sun-derived systems
- * since there are no such side-effects.
- *
- * This makes the following assumptions:
- * - only one returned vpp
- * - no INOUT vpp's (Sun's vnop_open has one of these)
- * - the vnode operation vector of the first vnode should be used
- *   to determine what implementation of the op should be invoked
- * - all mapped vnodes are of our vnode-type (NEEDSWORK:
- *   problems on rmdir'ing mount points and renaming?)
- */ 
-int
-null_bypass(ap)
-	struct vnop_generic_args /* {
-		struct vnodeop_desc *a_desc;
-		<other random data follows, presumably>
-	} */ *ap;
-{
-	extern int (**null_vnodeop_p)(void *);  /* not extern, really "forward" */
-	register struct vnode **this_vp_p;
-	int error;
-	struct vnode *old_vps[VDESC_MAX_VPS];
-	struct vnode **vps_p[VDESC_MAX_VPS];
-	struct vnode ***vppp;
-	struct vnodeop_desc *descp = ap->a_desc;
-	int reles, i;
-
-	if (null_bug_bypass)
-		printf ("null_bypass: %s\n", descp->vdesc_name);
-
-#ifdef SAFETY
-	/*
-	 * We require at least one vp.
-	 */
-	if (descp->vdesc_vp_offsets == NULL ||
-	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
-		panic ("null_bypass: no vp's in map.\n");
-#endif
-
-	/*
-	 * Map the vnodes going in.
-	 * Later, we'll invoke the operation based on
-	 * the first mapped vnode's operation vector.
-	 */
-	reles = descp->vdesc_flags;
-	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
-		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-			break;   /* bail out at end of list */
-		vps_p[i] = this_vp_p = 
-			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
-		/*
-		 * We're not guaranteed that any but the first vnode
-		 * are of our type.  Check for and don't map any
-		 * that aren't.  (We must always map first vp or vclean fails.)
-		 */
-		if (i && (*this_vp_p == NULL ||
-		    (*this_vp_p)->v_op != null_vnodeop_p)) {
-			old_vps[i] = NULL;
-		} else {
-			old_vps[i] = *this_vp_p;
-			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
-			/*
-			 * XXX - Several operations have the side effect
-			 * of vnode_put'ing their vp's.  We must account for
-			 * that.  (This should go away in the future.)
-			 */
-			if (reles & 1)
-				vnode_get(*this_vp_p);
-		}
-			
-	}
-
-	/*
-	 * Call the operation on the lower layer
-	 * with the modified argument structure.
-	 */
-	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
-
-	/*
-	 * Maintain the illusion of call-by-value
-	 * by restoring vnodes in the argument structure
-	 * to their original value.
-	 */
-	reles = descp->vdesc_flags;
-	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
-		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
-			break;   /* bail out at end of list */
-		if (old_vps[i]) {
-			*(vps_p[i]) = old_vps[i];
-			if (reles & 1)
-				vnode_put(*(vps_p[i]));
-		}
-	}
-
-	/*
-	 * Map the possible out-going vpp
-	 * (Assumes that the lower layer always returns
-	 * a vnode_get'ed vpp unless it gets an error.)
-	 */
-	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
-	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
-	    !error) {
-		/*
-		 * XXX - even though some ops have vpp returned vp's,
-		 * several ops actually vnode_put this before returning.
-		 * We must avoid these ops.
-		 * (This should go away when these ops are regularized.)
-		 */
-		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
-			goto out;
-		vppp = VOPARG_OFFSETTO(struct vnode***,
-				 descp->vdesc_vpp_offset,ap);
-		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
-	}
-
- out:
-	return (error);
-}
-
-/*
- * We have to carry on the locking protocol on the null layer vnodes
- * as we progress through the tree. We also have to enforce read-only
- * if this layer is mounted read-only.
- */
-null_lookup(ap)
-	struct vnop_lookup_args /* {
-		struct vnode * a_dvp;
-		struct vnode ** a_vpp;
-		struct componentname * a_cnp;
-		vfs_context_t a_context;
-	} */ *ap;
-{
-	struct componentname *cnp = ap->a_cnp;
-	struct proc *p = cnp->cn_proc;
-	int flags = cnp->cn_flags;
-	struct vnode *dvp, *vp;
-	int error;
-
-	error = null_bypass(ap);
-
-	/*
-	 * We must do the same locking and unlocking at this layer as 
-	 * is done in the layers below us. We could figure this out 
-	 * based on the error return and the LASTCN, LOCKPARENT, and
-	 * LOCKLEAF flags. However, it is more expidient to just find 
-	 * out the state of the lower level vnodes and set ours to the
-	 * same state.
-	 */
-	dvp = ap->a_dvp;
-	vp = *ap->a_vpp;
-	if (dvp == vp)
-		return (error);
-	return (error);
-}
-
-/*
- * Setattr call.
- */
-int
-null_setattr(
-	struct vnop_setattr_args /* {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		struct vnode_attr *a_vap;
-		kauth_cred_t a_cred;
-		struct proc *a_p;
-	} */ *ap)
-{
-	struct vnode *vp = ap->a_vp;
-	struct vnode_attr *vap = ap->a_vap;
-
-	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
- 		switch (vp->v_type) {
- 		case VDIR:
- 			return (EISDIR);
- 		case VCHR:
- 		case VBLK:
- 		case VSOCK:
- 		case VFIFO:
-			return (0);
-		case VREG:
-		case VLNK:
- 		default:
-		}
-	}
-	return (null_bypass(ap));
-}
-
-/*
- *  We handle getattr only to change the fsid.
- */
-int
-null_getattr(ap)
-	struct vnop_getattr_args /* {
-		struct vnode *a_vp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} */ *ap;
-{
-	int error;
-
-	if (error = null_bypass(ap))
-		return (error);
-	/* Requires that arguments be restored. */
-	VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]);
-	return (0);
-}
-
-int
-null_access(ap)
-	struct vnop_access_args /* {
-		struct vnode *a_vp;
-		int  a_action;
-		vfs_context_t a_context;
-	} */ *ap;
-{
-	return (null_bypass(ap));
-}
-
-int
-null_inactive(ap)
-	struct vnop_inactive_args /* {
-		struct vnode *a_vp;
-		vfs_context_t a_context;
-	} */ *ap;
-{
-	/*
-	 * Do nothing (and _don't_ bypass).
-	 * Wait to vnode_put lowervp until reclaim,
-	 * so that until then our null_node is in the
-	 * cache and reusable.
-	 *
-	 * NEEDSWORK: Someday, consider inactive'ing
-	 * the lowervp and then trying to reactivate it
-	 * with capabilities (v_id)
-	 * like they do in the name lookup cache code.
-	 * That's too much work for now.
-	 */
-	return (0);
-}
-
-int
-null_reclaim(ap)
-	struct vnop_reclaim_args /* {
-		struct vnode *a_vp;
-		vfs_context_t a_context;
-	} */ *ap;
-{
-	struct vnode *vp = ap->a_vp;
-	struct null_node *xp = VTONULL(vp);
-	struct vnode *lowervp = xp->null_lowervp;
-
-	/*
-	 * Note: in vnop_reclaim, vp->v_op == dead_vnodeop_p,
-	 * so we can't call VOPs on ourself.
-	 */
-	/* After this assignment, this node will not be re-used. */
-	xp->null_lowervp = NULL;
-	LIST_REMOVE(xp, null_hash);
-	FREE(vp->v_data, M_TEMP);
-	vp->v_data = NULL;
-	vnode_put (lowervp);
-	return (0);
-}
-
-/*
- * XXX - vnop_strategy must be hand coded because it has no
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
- */
-int
-null_strategy(ap)
-	struct vnop_strategy_args /* {
-		struct buf *a_bp;
-	} */ *ap;
-{
-	struct buf *bp = ap->a_bp;
-	int error;
-	struct vnode *savedvp;
-
-	savedvp = vnode(bp);
-	buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
-
-	error = VNOP_STRATEGY(bp);
-
-	buf_setvnode(bp, savedvp);
-
-	return (error);
-}
-
-/*
- * XXX - like vnop_strategy, vnop_bwrite must be hand coded because it has no
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
- */
-int
-null_bwrite(ap)
-	struct vnop_bwrite_args /* {
-		struct buf *a_bp;
-	} */ *ap;
-{
-	struct buf *bp = ap->a_bp;
-	int error;
-	struct vnode *savedvp;
-
-	savedvp = buf_vnode(bp);
-	buf_setvnode(bp, NULLVPTOLOWERVP(savedvp));
-
-	error = VNOP_BWRITE(bp);
-
-	buf_setvnode(bp, savedvp);
-
-	return (error);
-}
-
-/*
- * Global vfs data structures
- */
-
-#define VOPFUNC int (*)(void *)
-
-int (**null_vnodeop_p)(void *);
-struct vnodeopv_entry_desc null_vnodeop_entries[] = {
-	{ &vnop_default_desc, (VOPFUNC)null_bypass },
-
-	{ &vnop_lookup_desc, (VOPFUNC)null_lookup },
-	{ &vnop_setattr_desc, (VOPFUNC)null_setattr },
-	{ &vnop_getattr_desc, (VOPFUNC)null_getattr },
-	{ &vnop_access_desc, (VOPFUNC)null_access },
-	{ &vnop_inactive_desc, (VOPFUNC)null_inactive },
-	{ &vnop_reclaim_desc, (VOPFUNC)null_reclaim },
-
-	{ &vnop_strategy_desc, (VOPFUNC)null_strategy },
-	{ &vnop_bwrite_desc, (VOPFUNC)null_bwrite },
-
-	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
-};
-struct vnodeopv_desc null_vnodeop_opv_desc =
-	{ &null_vnodeop_p, null_vnodeop_entries };
diff --git a/bsd/miscfs/specfs/Makefile b/bsd/miscfs/specfs/Makefile
index 52832cc71..7c6f583e4 100644
--- a/bsd/miscfs/specfs/Makefile
+++ b/bsd/miscfs/specfs/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c
index cbd0de6d9..8050679f8 100644
--- a/bsd/miscfs/specfs/spec_vnops.c
+++ b/bsd/miscfs/specfs/spec_vnops.c
@@ -69,8 +69,9 @@
 #include <sys/conf.h>
 #include <sys/buf_internal.h>
 #include <sys/mount_internal.h>
-#include <sys/namei.h>
 #include <sys/vnode_internal.h>
+#include <sys/file_internal.h>
+#include <sys/namei.h>
 #include <sys/stat.h>
 #include <sys/errno.h>
 #include <sys/ioctl.h>
@@ -82,6 +83,8 @@
 #include <sys/resource.h>
 #include <miscfs/specfs/specdev.h>
 #include <vfs/vfs_support.h>
+#include <kern/assert.h>
+#include <kern/task.h>
 
 #include <sys/kdebug.h>
 
@@ -247,7 +250,15 @@ spec_open(struct vnop_open_args *ap)
 			vp->v_flag |= VISTTY;
 			vnode_unlock(vp);
 		}
+		
+		devsw_lock(dev, S_IFCHR);
 		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
+
+		if (error == 0) {
+			vp->v_specinfo->si_opencount++;
+		}
+
+		devsw_unlock(dev, S_IFCHR);
 		return (error);
 
 	case VBLK:
@@ -266,7 +277,14 @@ spec_open(struct vnop_open_args *ap)
 		 */
 		if ( (error = vfs_mountedon(vp)) )
 			return (error);
+
+		devsw_lock(dev, S_IFBLK);
 		error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
+		if (!error) {
+			vp->v_specinfo->si_opencount++;
+		}
+		devsw_unlock(dev, S_IFBLK);
+
 		if (!error) {
 		    u_int64_t blkcnt;
 		    u_int32_t blksize;
@@ -382,7 +400,7 @@ spec_read(struct vnop_read_args *ap)
 			}
 			n = min((unsigned)(n  - on), uio_resid(uio));
 
-			error = uiomove((char *)0 + buf_dataptr(bp) + on, n, uio);
+			error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
 			if (n + on == bsize)
 				buf_markaged(bp);
 			buf_brelse(bp);
@@ -484,7 +502,7 @@ spec_write(struct vnop_write_args *ap)
 			}
 			n = min(n, bsize - buf_resid(bp));
 
-			error = uiomove((char *)0 + buf_dataptr(bp) + on, n, uio);
+			error = uiomove((char *)buf_dataptr(bp) + on, n, uio);
 			if (error) {
 				buf_brelse(bp);
 				return (error);
@@ -562,6 +580,8 @@ spec_select(struct vnop_select_args *ap)
 	}
 }
 
+static int filt_specattach(struct knote *kn);
+
 int
 spec_kqfilter(vnode_t vp, struct knote *kn)
 {
@@ -575,8 +595,8 @@ spec_kqfilter(vnode_t vp, struct knote *kn)
 	dev = vnode_specrdev(vp);
 
 	if (vnode_istty(vp)) {
-		/* We can hook into the slave side of a tty */
-		err = ptsd_kqfilter(dev, kn);
+		/* We can hook into TTYs... */
+		err = filt_specattach(kn);
 	} else {
 		/* Try a bpf device, as defined in bsd/net/bpf.c */
 		err = bpfkqfilter(dev, kn);
@@ -618,8 +638,12 @@ void IOSleep(int);
 #define LOWPRI_WINDOW_MSECS_INC	50
 #define LOWPRI_MAX_WINDOW_MSECS 200
 #define LOWPRI_MAX_WAITING_MSECS 200
-#define LOWPRI_SLEEP_INTERVAL 5
 
+#if CONFIG_EMBEDDED
+#define LOWPRI_SLEEP_INTERVAL 5
+#else
+#define LOWPRI_SLEEP_INTERVAL 2
+#endif
 
 struct _throttle_io_info_t {
 	struct timeval	last_normal_IO_timestamp;
@@ -627,7 +651,6 @@ struct _throttle_io_info_t {
 	SInt32 numthreads_throttling;
 	SInt32 refcnt;
 	SInt32 alloc;
-
 };
 
 struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
@@ -647,10 +670,31 @@ int     lowpri_max_waiting_msecs = LOWPRI_MAX_WAITING_MSECS;
 #define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...)
 #endif
 
-SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_initial_window_msecs, CTLFLAG_RW, &lowpri_IO_initial_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW, &lowpri_IO_window_msecs_inc, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
-SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_initial_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_IO_initial_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_IO_window_msecs_inc, LOWPRI_INITIAL_WINDOW_MSECS, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
+
+/*
+ * throttled I/O helper function
+ * convert the index of the lowest set bit to a device index
+ */
+int
+num_trailing_0(uint64_t n)
+{
+	/*
+	 * since in most cases the number of trailing 0s is very small,
+     * we simply counting sequentially from the lowest bit
+	 */
+	if (n == 0)
+		return sizeof(n) * 8;
+	int count = 0;
+	while (!ISSET(n, 1)) {
+		n >>= 1;
+		++count;
+	}
+	return count;
+}
 
 /*
  * Release the reference and if the item was allocated and this is the last
@@ -760,6 +804,41 @@ throttle_info_mount_ref(mount_t mp, void *throttle_info)
 	mp->mnt_throttle_info = throttle_info;
 }
 
+/*
+ * Private KPI routine
+ *
+ * return a handle for accessing throttle_info given a throttle_mask.  The
+ * handle must be released by throttle_info_rel_by_mask
+ */
+int
+throttle_info_ref_by_mask(uint64_t throttle_mask,
+						  throttle_info_handle_t *throttle_info_handle)
+{
+	int dev_index;
+	struct _throttle_io_info_t *info;
+
+	if (throttle_info_handle == NULL)
+		return EINVAL;
+	
+	dev_index = num_trailing_0(throttle_mask);
+	info = &_throttle_io_info[dev_index];
+	throttle_info_ref(info);
+	*(struct _throttle_io_info_t**)throttle_info_handle = info;
+	return 0;
+}
+
+/*
+ * Private KPI routine
+ *
+ * release the handle obtained by throttle_info_ref_by_mask
+ */
+void
+throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle)
+{
+	/* for now the handle is just a pointer to _throttle_io_info_t */
+	throttle_info_rel((struct _throttle_io_info_t*)throttle_info_handle);
+}
+
 /*
  * KPI routine
  *
@@ -804,12 +883,51 @@ update_last_io_time(mount_t mp)
 	microuptime(&info->last_IO_timestamp);
 }
 
+
+#if CONFIG_EMBEDDED
+
+int throttle_get_io_policy(struct uthread **ut)
+{
+	int policy = IOPOL_DEFAULT;
+	proc_t p = current_proc();
+
+	*ut = get_bsdthread_info(current_thread());
+		
+	if (p != NULL)
+		policy = p->p_iopol_disk;
+
+	if (*ut != NULL) {
+		// the I/O policy of the thread overrides that of the process
+		// unless the I/O policy of the thread is default
+		if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
+			policy = (*ut)->uu_iopol_disk;
+	}
+	return policy;
+}
+#else
+
+int throttle_get_io_policy(__unused struct uthread **ut)
+{
+	*ut = get_bsdthread_info(current_thread());
+
+	return (proc_get_task_selfdiskacc());
+}
+#endif
+
+
 static int
 throttle_io_will_be_throttled_internal(int lowpri_window_msecs, void * throttle_info)
 {
     	struct _throttle_io_info_t *info = throttle_info;
 	struct timeval elapsed;
 	int elapsed_msecs;
+	int policy;
+	struct uthread	*ut;
+
+	policy = throttle_get_io_policy(&ut);
+
+	if (ut->uu_throttle_bc == FALSE && policy != IOPOL_THROTTLE)
+		return (0);
 
 	microuptime(&elapsed);
 	timevalsub(&elapsed, &info->last_normal_IO_timestamp);
@@ -841,12 +959,15 @@ throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp)
 	return throttle_io_will_be_throttled_internal(lowpri_window_msecs, info);
 }
 
-void throttle_lowpri_io(boolean_t ok_to_sleep)
+uint32_t
+throttle_lowpri_io(int sleep_amount)
 {
-	int i;
+	int sleep_cnt = 0;
+	int numthreads_throttling;
 	int max_try_num;
 	struct uthread *ut;
 	struct _throttle_io_info_t *info;
+	int max_waiting_msecs;
 
 	ut = get_bsdthread_info(current_thread());
 
@@ -854,23 +975,39 @@ void throttle_lowpri_io(boolean_t ok_to_sleep)
 		goto done;
 
 	info = ut->uu_throttle_info;
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
-		     ut->uu_lowpri_window, ok_to_sleep, 0, 0, 0);
 
-	if (ok_to_sleep == TRUE) {
-		max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, info->numthreads_throttling);
+	if (sleep_amount != 0) {
+#if CONFIG_EMBEDDED
+		max_waiting_msecs = lowpri_max_waiting_msecs;
+#else 
+		if (ut->uu_throttle_isssd == TRUE)
+		        max_waiting_msecs = lowpri_max_waiting_msecs / 100;
+		else
+			max_waiting_msecs = lowpri_max_waiting_msecs;
+#endif
+		if (max_waiting_msecs < LOWPRI_SLEEP_INTERVAL)
+		        max_waiting_msecs = LOWPRI_SLEEP_INTERVAL;
 
-		for (i=0; i<max_try_num; i++) {
+		numthreads_throttling = info->numthreads_throttling + MIN(10, MAX(1, sleep_amount)) - 1;
+		max_try_num = max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, numthreads_throttling);
+
+		for (sleep_cnt = 0; sleep_cnt < max_try_num; sleep_cnt++) {
 			if (throttle_io_will_be_throttled_internal(ut->uu_lowpri_window, info)) {
+				if (sleep_cnt == 0) {
+					KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
+							      ut->uu_lowpri_window, max_try_num, numthreads_throttling, 0, 0);
+				}
 				IOSleep(LOWPRI_SLEEP_INTERVAL);
     				DEBUG_ALLOC_THROTTLE_INFO("sleeping because of info = %p\n", info, info );
 			} else {
 				break;
 			}
 		}
+		if (sleep_cnt) {
+			KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
+					      ut->uu_lowpri_window, sleep_cnt, 0, 0, 0);
+		}
 	}
-	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
-		     ut->uu_lowpri_window, i*5, 0, 0, 0);
 	SInt32 oldValue;
 	oldValue = OSDecrementAtomic(&info->numthreads_throttling);
 
@@ -882,35 +1019,72 @@ done:
 	if (ut->uu_throttle_info)
 		throttle_info_rel(ut->uu_throttle_info);
 	ut->uu_throttle_info = NULL;
+	ut->uu_throttle_bc = FALSE;
+
+	return (sleep_cnt * LOWPRI_SLEEP_INTERVAL);
 }
 
-int throttle_get_io_policy(struct uthread **ut)
+/*
+ * KPI routine
+ *
+ * set a kernel thread's IO policy.  policy can be:
+ * IOPOL_NORMAL, IOPOL_THROTTLE, IOPOL_PASSIVE
+ *
+ * explanations about these policies are in the man page of setiopolicy_np
+ */
+void throttle_set_thread_io_policy(int policy)
 {
-	int policy = IOPOL_DEFAULT;
-	proc_t p = current_proc();
+#if !CONFIG_EMBEDDED
+	proc_apply_thread_selfdiskacc(policy);
+#else /* !CONFIG_EMBEDDED */
+	struct uthread *ut;
+	ut = get_bsdthread_info(current_thread());
+	ut->uu_iopol_disk = policy;
+#endif /* !CONFIG_EMBEDDED */
+}
 
-	*ut = get_bsdthread_info(current_thread());
-		
-	if (p != NULL)
-		policy = p->p_iopol_disk;
 
-	if (*ut != NULL) {
-		// the I/O policy of the thread overrides that of the process
-		// unless the I/O policy of the thread is default
-		if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
-			policy = (*ut)->uu_iopol_disk;
+static
+void throttle_info_reset_window(struct uthread *ut)
+{
+	struct _throttle_io_info_t *info;
+
+	info = ut->uu_throttle_info;
+
+	OSDecrementAtomic(&info->numthreads_throttling);
+	throttle_info_rel(info);
+	ut->uu_throttle_info = NULL;
+	ut->uu_lowpri_window = 0;
+}
+
+static
+void throttle_info_set_initial_window(struct uthread *ut, struct _throttle_io_info_t *info, boolean_t isssd, boolean_t BC_throttle)
+{
+	SInt32 oldValue;
+
+	ut->uu_throttle_info = info;
+	throttle_info_ref(info);
+	DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info );
+
+	oldValue = OSIncrementAtomic(&info->numthreads_throttling);
+	if (oldValue < 0) {
+		panic("%s: numthreads negative", __func__);
 	}
-	return policy;
+	ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
+	ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
+	ut->uu_throttle_isssd = isssd;
+	ut->uu_throttle_bc = BC_throttle;
 }
 
-void throttle_info_update(void *throttle_info, int flags)
+
+static
+void throttle_info_update_internal(void *throttle_info, int flags, boolean_t isssd)
 {
 	struct _throttle_io_info_t *info = throttle_info;
 	struct uthread	*ut;
 	int policy;
 	int is_throttleable_io = 0;
 	int is_passive_io = 0;
-	SInt32 oldValue;
 
 	if (!lowpri_IO_initial_window_msecs || (info == NULL))
 		return;
@@ -949,28 +1123,19 @@ void throttle_info_update(void *throttle_info, int flags)
 		 * do the delay just before we return from the system
 		 * call that triggered this I/O or from vnode_pagein
 		 */
-		if (ut->uu_lowpri_window == 0) {
-			ut->uu_throttle_info = info;
-			throttle_info_ref(ut->uu_throttle_info);
-			DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info );
-
-			oldValue = OSIncrementAtomic(&info->numthreads_throttling);
-			if (oldValue < 0) {
-				panic("%s: numthreads negative", __func__);
-			}
-			ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
-			ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
-		} else {
+		if (ut->uu_lowpri_window == 0)
+			throttle_info_set_initial_window(ut, info, isssd, FALSE);
+		else {
 			/* The thread sends I/Os to different devices within the same system call */
 			if (ut->uu_throttle_info != info) {
-    				struct _throttle_io_info_t *old_info = ut->uu_throttle_info;
+				struct _throttle_io_info_t *old_info = ut->uu_throttle_info;
 
 				// keep track of the numthreads in the right device
 				OSDecrementAtomic(&old_info->numthreads_throttling);
 				OSIncrementAtomic(&info->numthreads_throttling);
 
-    				DEBUG_ALLOC_THROTTLE_INFO("switching from info = %p\n", old_info, old_info );
-    				DEBUG_ALLOC_THROTTLE_INFO("switching to info = %p\n", info, info );
+				DEBUG_ALLOC_THROTTLE_INFO("switching from info = %p\n", old_info, old_info );
+				DEBUG_ALLOC_THROTTLE_INFO("switching to info = %p\n", info, info );
 				/* This thread no longer needs a reference on that throttle info */
 				throttle_info_rel(ut->uu_throttle_info);
 				ut->uu_throttle_info = info;
@@ -981,26 +1146,76 @@ void throttle_info_update(void *throttle_info, int flags)
 			ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads;
 			if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads)
 				ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads;
+
+			if (isssd == FALSE) {
+				/*
+				 * we're here because we've actually issued I/Os to different devices...
+				 * if at least one of them was a non SSD, then thottle the thread
+				 * using the policy for non SSDs
+				 */
+				ut->uu_throttle_isssd = FALSE;
+			}
 		}
 	}
 }
 
+/*
+ * KPI routine
+ *
+ * this is usually called before every I/O, used for throttled I/O
+ * book keeping.  This routine has low overhead and does not sleep
+ */
+void throttle_info_update(void *throttle_info, int flags)
+{
+	throttle_info_update_internal(throttle_info, flags, FALSE);
+}
+
+/*
+ * KPI routine
+ *
+ * this is usually called before every I/O, used for throttled I/O
+ * book keeping.  This routine has low overhead and does not sleep
+ */
+void throttle_info_update_by_mask(void *throttle_info_handle, int flags)
+{
+	void *throttle_info = throttle_info_handle;
+	/* for now we only use the lowest bit of the throttle mask, so the
+	 * handle is the same as the throttle_info.  Later if we store a
+	 * set of throttle infos in the handle, we will want to loop through
+	 * them and call throttle_info_update in a loop
+	 */
+	throttle_info_update(throttle_info, flags);
+}
+
+extern int ignore_is_ssd;
+
 int
 spec_strategy(struct vnop_strategy_args *ap)
 {
         buf_t	bp;
 	int	bflags;
-	int 	policy;
+	int	policy;
 	dev_t	bdev;
 	uthread_t ut;
-	size_t devbsdunit;
 	mount_t mp;
+	int strategy_ret;
+	struct _throttle_io_info_t *throttle_info;
+	boolean_t isssd = FALSE;
 
         bp = ap->a_bp;
 	bdev = buf_device(bp);
-	bflags = buf_flags(bp);
 	mp = buf_vnode(bp)->v_mount;
 
+	policy = throttle_get_io_policy(&ut);
+
+	if (policy == IOPOL_THROTTLE) {
+		bp->b_flags |= B_THROTTLED_IO;
+		bp->b_flags &= ~B_PASSIVE;
+	} else if (policy == IOPOL_PASSIVE)
+		bp->b_flags |= B_PASSIVE;
+
+	bflags = bp->b_flags;
+
         if (kdebug_enable) {
 	        int    code = 0;
 
@@ -1014,6 +1229,11 @@ spec_strategy(struct vnop_strategy_args *ap)
 		else if (bflags & B_PAGEIO)
 		        code |= DKIO_PAGING;
 
+		if (bflags & B_THROTTLED_IO)
+			code |= DKIO_THROTTLE;
+		else if (bflags & B_PASSIVE)
+			code |= DKIO_PASSIVE;
+
 		KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
 				      bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
         }
@@ -1021,29 +1241,63 @@ spec_strategy(struct vnop_strategy_args *ap)
 	    mp && (mp->mnt_kern_flag & MNTK_ROOTDEV))
 	        hard_throttle_on_root = 1;
 
+	if (mp != NULL) {
+		if ((mp->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd)
+			isssd = TRUE;
+		throttle_info = &_throttle_io_info[mp->mnt_devbsdunit];
+	} else
+		throttle_info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1];
 
-	if (mp != NULL)
-		devbsdunit = mp->mnt_devbsdunit;
-	else
-		devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
-
-	throttle_info_update(&_throttle_io_info[devbsdunit], bflags);
-	if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) {
-		bp->b_flags |= B_THROTTLED_IO;
-	}
-
+	throttle_info_update_internal(throttle_info, bflags, isssd);
 
 	if ((bflags & B_READ) == 0) {
-		microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+		microuptime(&throttle_info->last_IO_timestamp);
 		if (mp) {
 			INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size);
 		}
 	} else if (mp) {
 		INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size);
 	}
-
-	(*bdevsw[major(bdev)].d_strategy)(bp);
+	/*
+	 * The BootCache may give us special information about
+	 * the IO, so it returns special values that we check
+	 * for here.
+	 *
+	 * IO_SATISFIED_BY_CACHE
+	 * The read has been satisfied by the boot cache. Don't
+	 * throttle the thread unnecessarily.
+	 *
+	 * IO_SHOULD_BE_THROTTLED
+	 * The boot cache is playing back a playlist and this IO
+	 * cut through. Throttle it so we're not cutting through
+	 * the boot cache too often.
+	 *
+	 * Note that typical strategy routines are defined with
+	 * a void return so we'll get garbage here. In the 
+	 * unlikely case the garbage matches our special return
+	 * value, it's not a big deal since we're only adjusting
+	 * the throttling delay.
+ 	 */
+#define IO_SATISFIED_BY_CACHE  ((int)0xcafefeed)
+#define IO_SHOULD_BE_THROTTLED ((int)0xcafebeef)
+	typedef	int strategy_fcn_ret_t(struct buf *bp);
 	
+	strategy_ret = (*(strategy_fcn_ret_t*)bdevsw[major(bdev)].d_strategy)(bp);
+	
+	if ((IO_SATISFIED_BY_CACHE == strategy_ret) && (ut->uu_lowpri_window != 0) && (ut->uu_throttle_info != NULL)) {
+		/*
+		 * If this was a throttled IO satisfied by the boot cache,
+		 * don't delay the thread.
+		 */
+		throttle_info_reset_window(ut);
+
+	} else if ((IO_SHOULD_BE_THROTTLED == strategy_ret) && (ut->uu_lowpri_window == 0) && (ut->uu_throttle_info == NULL)) {
+		/*
+		 * If the boot cache indicates this IO should be throttled,
+		 * delay the thread.
+		 */
+		throttle_info_set_initial_window(ut, throttle_info, isssd, TRUE);
+	}
 	return (0);
 }
 
@@ -1066,11 +1320,11 @@ spec_close(struct vnop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	dev_t dev = vp->v_rdev;
-	int (*devclose)(dev_t, int, int, struct proc *);
-	int mode, error;
+	int error = 0;
 	int flags = ap->a_fflag;
 	struct proc *p = vfs_context_proc(ap->a_context);
 	struct session *sessp;
+	int do_rele = 0;
 
 	switch (vp->v_type) {
 
@@ -1088,38 +1342,56 @@ spec_close(struct vnop_close_args *ap)
 		if (sessp != SESSION_NULL) {
 			if ((vcount(vp) == 1) && 
 		    		(vp == sessp->s_ttyvp)) {
+
 				session_lock(sessp);
-				sessp->s_ttyvp = NULL;
-				sessp->s_ttyvid = 0;
-				sessp->s_ttyp = TTY_NULL;
-				sessp->s_ttypgrpid = NO_PID;
+				if (vp == sessp->s_ttyvp) {
+					sessp->s_ttyvp = NULL;
+					sessp->s_ttyvid = 0;
+					sessp->s_ttyp = TTY_NULL;
+					sessp->s_ttypgrpid = NO_PID;
+					do_rele = 1;
+				} 
 				session_unlock(sessp);
-				vnode_rele(vp);
+
+				if (do_rele) {
+					vnode_rele(vp);
+				}
 			}
 			session_rele(sessp);
 		}
 
-		devclose = cdevsw[major(dev)].d_close;
-		mode = S_IFCHR;
+		devsw_lock(dev, S_IFCHR);
+
+		vp->v_specinfo->si_opencount--;
+
+		if (vp->v_specinfo->si_opencount < 0) {
+			panic("Negative open count?");
+		}
 		/*
 		 * close on last reference or on vnode revoke call
 		 */
-		if ((flags & IO_REVOKE) != 0)
-			break;
-		if (vcount(vp) > 0)
+		if ((vcount(vp) > 0) && ((flags & IO_REVOKE) == 0)) {
+			devsw_unlock(dev, S_IFCHR);
 			return (0);
+		}	
+		
+		error = cdevsw[major(dev)].d_close(dev, flags, S_IFCHR, p);
+
+		devsw_unlock(dev, S_IFCHR);
 		break;
 
 	case VBLK:
 		/*
-		 * Since every use (buffer, vnode, swap, blockmap)
-		 * holds a reference to the vnode, and because we mark
-		 * any other vnodes that alias this device, when the
-		 * sum of the reference counts on all the aliased
-		 * vnodes descends to zero, we are on last close.
+		 * If there is more than one outstanding open, don't
+		 * send the close to the device.
 		 */
-		if (vcount(vp) > 0)
+		devsw_lock(dev, S_IFBLK);
+		if (vcount(vp) > 1) {
+			vp->v_specinfo->si_opencount--;
+			devsw_unlock(dev, S_IFBLK);
 			return (0);
+		}
+		devsw_unlock(dev, S_IFBLK);
 
 		/*
 		 * On last close of a block device (that isn't mounted)
@@ -1133,8 +1405,22 @@ spec_close(struct vnop_close_args *ap)
 		if (error)
 			return (error);
 
-		devclose = bdevsw[major(dev)].d_close;
-		mode = S_IFBLK;
+		devsw_lock(dev, S_IFBLK);
+
+		vp->v_specinfo->si_opencount--;
+		
+		if (vp->v_specinfo->si_opencount < 0) {
+			panic("Negative open count?");
+		}
+
+		if (vcount(vp) > 0) {
+			devsw_unlock(dev, S_IFBLK);
+			return (0);
+		}
+
+		error = bdevsw[major(dev)].d_close(dev, flags, S_IFBLK, p);
+
+		devsw_unlock(dev, S_IFBLK);
 		break;
 
 	default:
@@ -1142,7 +1428,7 @@ spec_close(struct vnop_close_args *ap)
 		return(EBADF);
 	}
 
-	return ((*devclose)(dev, flags, mode, p));
+	return error;
 }
 
 /*
@@ -1234,3 +1520,171 @@ spec_offtoblk(struct vnop_offtoblk_args *ap)
 
 	return (0);
 }
+
+static void filt_specdetach(struct knote *kn);
+static int filt_spec(struct knote *kn, long hint);
+static unsigned filt_specpeek(struct knote *kn);
+
+struct filterops spec_filtops = {
+	.f_isfd 	= 1,
+        .f_attach 	= filt_specattach,
+        .f_detach 	= filt_specdetach,
+        .f_event 	= filt_spec,
+	.f_peek 	= filt_specpeek
+};
+
+static int
+filter_to_seltype(int16_t filter)
+{
+	switch (filter) {
+	case EVFILT_READ: 
+		return FREAD;
+	case EVFILT_WRITE:
+		return FWRITE;
+		break;
+	default:
+		panic("filt_to_seltype(): invalid filter %d\n", filter);
+		return 0;
+	}
+}
+
+static int 
+filt_specattach(struct knote *kn)
+{
+	vnode_t vp;
+	dev_t dev;
+
+	vp = (vnode_t)kn->kn_fp->f_fglob->fg_data; /* Already have iocount, and vnode is alive */
+
+	assert(vnode_ischr(vp));
+
+	dev = vnode_specrdev(vp);
+
+	if (major(dev) > nchrdev) {
+		return ENXIO;
+	}
+
+	if ((cdevsw_flags[major(dev)] & CDEVSW_SELECT_KQUEUE) == 0) {
+		return EINVAL;
+	}
+
+	/* Resulting wql is safe to unlink even if it has never been linked */
+	kn->kn_hook = wait_queue_link_allocate();
+	if (kn->kn_hook == NULL) {
+		return EAGAIN;
+	}
+
+	kn->kn_fop = &spec_filtops;
+	kn->kn_hookid = vnode_vid(vp);
+
+	knote_markstayqueued(kn);
+
+	return 0;
+}
+
+static void 
+filt_specdetach(struct knote *kn)
+{
+	kern_return_t ret;
+
+	/* 
+	 * Given wait queue link and wait queue set, unlink.  This is subtle.
+	 * If the device has been revoked from under us, selclearthread() will
+	 * have removed our link from the kqueue's wait queue set, which 
+	 * wait_queue_set_unlink_one() will detect and handle.
+	 */
+	ret = wait_queue_set_unlink_one(kn->kn_kq->kq_wqs, kn->kn_hook);
+	if (ret != KERN_SUCCESS) {
+		panic("filt_specdetach(): failed to unlink wait queue link.");
+	}
+
+	(void)wait_queue_link_free(kn->kn_hook);
+	kn->kn_hook = NULL;
+	kn->kn_status &= ~KN_STAYQUEUED;
+}
+
+static int 
+filt_spec(struct knote *kn, long hint)
+{
+	vnode_t vp;
+	uthread_t uth;
+	wait_queue_set_t old_wqs;
+	vfs_context_t ctx;
+	int selres;
+	int error;
+	int use_offset;
+	dev_t dev;
+	uint64_t flags;
+
+	assert(kn->kn_hook != NULL);
+
+	if (hint != 0) {
+		panic("filt_spec(): nonzero hint?");
+	}
+
+	uth = get_bsdthread_info(current_thread());
+	ctx = vfs_context_current();
+	vp = (vnode_t)kn->kn_fp->f_fglob->fg_data;
+
+	error = vnode_getwithvid(vp, kn->kn_hookid);
+	if (error != 0) {
+		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+		return 1;
+	}
+	
+	dev = vnode_specrdev(vp);
+	flags = cdevsw_flags[major(dev)];
+	use_offset = ((flags & CDEVSW_USE_OFFSET) != 0);
+	assert((flags & CDEVSW_SELECT_KQUEUE) != 0);
+
+	/* Trick selrecord() into hooking kqueue's wait queue set into device wait queue */
+	old_wqs = uth->uu_wqset;
+	uth->uu_wqset = kn->kn_kq->kq_wqs;
+	selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx);
+	uth->uu_wqset = old_wqs;
+
+	if (use_offset) {
+		if (kn->kn_fp->f_fglob->fg_offset >= (uint32_t)selres) {
+			kn->kn_data = 0;
+		} else {
+			kn->kn_data = ((uint32_t)selres) - kn->kn_fp->f_fglob->fg_offset;
+		}
+	} else {
+		kn->kn_data = selres;
+	}
+
+	vnode_put(vp);
+
+	return (kn->kn_data != 0);
+}
+
+static unsigned
+filt_specpeek(struct knote *kn)
+{
+	vnode_t vp;
+	uthread_t uth;
+	wait_queue_set_t old_wqs;
+	vfs_context_t ctx;
+	int error, selres;
+	
+	uth = get_bsdthread_info(current_thread());
+	ctx = vfs_context_current();
+	vp = (vnode_t)kn->kn_fp->f_fglob->fg_data;
+
+	error = vnode_getwithvid(vp, kn->kn_hookid);
+	if (error != 0) {
+		return 1; /* Just like VNOP_SELECT() on recycled vnode */
+	}
+
+	/*
+	 * Why pass the link here?  Because we may not have registered in the past...
+	 */
+	old_wqs = uth->uu_wqset;
+	uth->uu_wqset = kn->kn_kq->kq_wqs;
+	selres = VNOP_SELECT(vp, filter_to_seltype(kn->kn_filter), 0, kn->kn_hook, ctx);
+	uth->uu_wqset = old_wqs;
+
+	vnode_put(vp);
+	return selres;
+}
+
diff --git a/bsd/miscfs/specfs/specdev.h b/bsd/miscfs/specfs/specdev.h
index dfe9c9945..3394fedbf 100644
--- a/bsd/miscfs/specfs/specdev.h
+++ b/bsd/miscfs/specfs/specdev.h
@@ -79,6 +79,7 @@ struct specinfo {
 	struct	vnode *si_specnext;
 	long	si_flags;
 	dev_t	si_rdev;
+	int32_t si_opencount;
 	daddr_t	si_size;		/* device block size in bytes */
 	daddr64_t	si_lastr;	/* last read blkno (read-ahead) */
 	u_int64_t	si_devsize;	/* actual device size in bytes */
diff --git a/bsd/miscfs/union/Makefile b/bsd/miscfs/union/Makefile
index 72ccd7707..513e6bbb9 100644
--- a/bsd/miscfs/union/Makefile
+++ b/bsd/miscfs/union/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/miscfs/union/union.h b/bsd/miscfs/union/union.h
index 5411f538d..eee3e5a87 100644
--- a/bsd/miscfs/union/union.h
+++ b/bsd/miscfs/union/union.h
@@ -67,156 +67,5 @@
 #ifndef __UNION_UNION_H__
 #define __UNION_UNION_H__
 
-#include  <sys/appleapiopts.h>
-#include  <sys/cdefs.h>
-
-#ifdef __APPLE_API_PRIVATE
-struct union_args {
-	char		*target;	/* Target of loopback  */
-	int		mntflags;	/* Options on the mount */
-};
-
-#define UNMNT_ABOVE	0x0001		/* Target appears above mount point */
-#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
-#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
-#ifdef FAULTFS
-#define UNMNT_FAULTIN	0x0004		/* get the files to TOT on lookup */
-#define UNMNT_OPMASK	0x0007
-#else
-#define UNMNT_OPMASK	0x0003
-#endif
-
-#ifdef BSD_KERNEL_PRIVATE
-
-struct union_mount {
-	struct vnode	*um_uppervp;	/* */
-	int		um_uppervid;	/* vid of upper vnode */
-	struct vnode	*um_lowervp;	/* Left unlocked */
-	int		um_lowervid;	/* vid of lower vnode */
-	kauth_cred_t	um_cred;	/* Credentials of user calling mount */
-	int		um_cmode;	/* cmask from mount process */
-	int		um_op;		/* Operation mode */
-	dev_t		um_upperdev;	/* Upper root node fsid[0]*/
-};
-
-
-#define  UNION_ABOVE(x) (x->um_op == UNMNT_ABOVE)
-#define  UNION_LOWER(x) (x->um_op == UNMNT_BELOW)
-#define  UNION_REPLACE(x) (x->um_op == UNMNT_REPLACE)
-#ifdef FAULTFS
-#define  UNION_FAULTIN(x) (x->um_op == UNMNT_FAULTIN)
-#else
-#define  UNION_FAULTIN(x) (0)
-
-#endif
-
-/* LP64 version of union_args.  all pointers 
- * grow when we're dealing with a 64-bit process.
- * WARNING - keep in sync with union_args
- */
-
-struct user_union_args {
-	user_addr_t	target;		/* Target of loopback  */
-	int			mntflags;	/* Options on the mount */
-	char		_pad[4];
-};
-
-/*
- * DEFDIRMODE is the mode bits used to create a shadow directory.
- */
-#define VRWXMODE (VREAD|VWRITE|VEXEC)
-#define VRWMODE (VREAD|VWRITE)
-#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
-#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
-
-/*
- * A cache of vnode references
- */
-struct union_node {
-	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
-	struct vnode		*un_vnode;	/* Back pointer */
-
-	struct vnode	        *un_uppervp;	/* overlaying object */
-	int			un_uppervid;	/* vid of upper vnode */
-	off_t			un_uppersz;	/* size of upper object */
-
-	struct vnode	        *un_lowervp;	/* underlying object */
-	int			un_lowervid;	/* vid of upper vnode */
-	off_t			un_lowersz;	/* size of lower object */
-
-	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
-	struct vnode		*un_pvp;	/* Parent vnode */
-
-	char			*un_path;	/* saved component name */
-	int			un_hash;	/* saved un_path hash value */
-	int			un_openl;	/* # of opens on lowervp */
-	int			un_exclcnt;	/* exclusive count */
-	unsigned int		un_flags;
-	mount_t			un_mount;
-	struct vnode		**un_dircache;	/* cached union stack */
-};
-
-#define UN_WANT		0x01		/* union node is needed */
-#define UN_LOCKED	0x02		/* union node is locked */
-#define UN_CACHED	0x04		/* In union cache */
-#define UN_TRANSIT	0x08		/* The union node is in creation */
-#define UN_DELETED	0x10		/* The union node is deleted  */
-#ifdef FAULTFS
-#define UN_FAULTFS	0x80		/* The union node is for faultfs */
-#endif
-#define UN_DIRENVN	0x100		/* The union node is created for dir enumeration */
-
-
-#ifdef FAULTFS
-#define  UNNODE_FAULTIN(x) ((x->un_flags & UN_FAULTFS)== UN_FAULTFS)
-#else
-#define  UNNODE_FAULTIN(x) (0)
-#endif
-/*
- * Hash table locking flags
- */
-
-#define UNVP_WANT	0x01
-#define UNVP_LOCKED	0x02
-
-#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
-#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
-#define	UNIONTOV(un) ((un)->un_vnode)
-#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
-#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
-#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
-
-
-extern int union_allocvp(struct vnode **, struct mount *,
-				struct vnode *, struct vnode *,
-				struct componentname *, struct vnode *,
-				struct vnode *, int);
-extern int union_freevp(struct vnode *);
-extern struct vnode * union_dircache(struct vnode *, vfs_context_t);
-extern int union_copyfile(struct vnode *, struct vnode *,vfs_context_t );
-extern int union_copyup(struct union_node *, int, vfs_context_t );
-extern int union_dowhiteout(struct union_node *, vfs_context_t);
-extern int union_mkshadow(struct union_mount *, struct vnode *,
-				struct componentname *, struct vnode **);
-extern int union_mkwhiteout(struct union_mount *, struct vnode *,
-				struct componentname *, char *);
-extern int union_vn_create(struct vnode **, struct union_node *, mode_t  mode, vfs_context_t context);
-extern int union_cn_close(struct vnode *, int, vfs_context_t context);
-extern void union_removed_upper(struct union_node *un);
-extern struct vnode *union_lowervp(struct vnode *);
-extern void union_newsize(struct vnode *, off_t, off_t);
-extern int union_init(struct vfsconf *);
-extern void union_updatevp(struct union_node *, struct vnode *, struct vnode *);
-extern void union_dircache_free(struct union_node *);
-extern int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
-extern int union_faultin_copyup(struct vnode ** uvpp, vnode_t udvp, vnode_t lvp, struct componentname * cnp, vfs_context_t context);
-extern int (**union_vnodeop_p)(void *);
-extern struct vfsops union_vfsops;
-void union_lock(void);
-void union_unlock(void);
-
-#endif /* BSD_KERNEL_PRIVATE */
-
-#endif /* __APPLE_API_PRIVATE */
 
 #endif /* __UNION_UNION_H__ */
diff --git a/bsd/miscfs/union/union_subr.c b/bsd/miscfs/union/union_subr.c
deleted file mode 100644
index 34dbe14f3..000000000
--- a/bsd/miscfs/union/union_subr.c
+++ /dev/null
@@ -1,1604 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1994 Jan-Simon Pendry
- * Copyright (c) 1994
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
- */
-/*
- * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
- * support for mandatory and extensible security protections.  This notice
- * is included in support of clause 2.2 (b) of the Apple Public License,
- * Version 2.0.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc_internal.h>
-#include <sys/kauth.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
-#include <sys/vnode_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/file_internal.h>
-#include <sys/filedesc.h>
-#include <sys/queue.h>
-#include <sys/mount_internal.h>
-#include <sys/stat.h>
-#include <sys/ubc.h>
-#include <sys/uio_internal.h>
-#include <miscfs/union/union.h>
-#include <sys/lock.h>
-#include <sys/kdebug.h>
-#if CONFIG_MACF
-#include <security/mac_framework.h>
-#endif
-
-
-static int union_vn_close(struct vnode *vp, int fmode, vfs_context_t ctx);
-
-/* must be power of two, otherwise change UNION_HASH() */
-#define NHASH 32
-
-/* unsigned int ... */
-#define UNION_HASH(u, l) \
-	(((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
-
-static LIST_HEAD(unhead, union_node) unhead[NHASH];
-static int unvplock[NHASH];
-
-static lck_grp_t * union_lck_grp;
-static lck_grp_attr_t * union_lck_grp_attr;
-static lck_attr_t * union_lck_attr;
-static lck_mtx_t *  union_mtxp;
-
-static int union_dircheck(struct vnode **, struct fileproc *, vfs_context_t ctx);
-static void union_newlower(struct union_node *, struct vnode *);
-static void union_newupper(struct union_node *, struct vnode *);
-
-
-int
-union_init(__unused struct vfsconf *vfsp)
-{
-	int i;
-
-	union_lck_grp_attr= lck_grp_attr_alloc_init();
-#if DIAGNOSTIC
-	lck_grp_attr_setstat(union_lck_grp_attr);
-#endif
-	union_lck_grp = lck_grp_alloc_init("union",  union_lck_grp_attr);
-	union_lck_attr = lck_attr_alloc_init();
-#if DIAGNOSTIC
-	lck_attr_setdebug(union_lck_attr);
-#endif
-	union_mtxp = lck_mtx_alloc_init(union_lck_grp, union_lck_attr);
-
-	for (i = 0; i < NHASH; i++)
-		LIST_INIT(&unhead[i]);
-	bzero((caddr_t) unvplock, sizeof(unvplock));
-	/* add the hook for getdirentries */
-	union_dircheckp = union_dircheck;
-	
-	return (0);
-}
-
-void
-union_lock()
-{
-	lck_mtx_lock(union_mtxp);
-}
-
-void
-union_unlock()
-{
-	lck_mtx_unlock(union_mtxp);
-}
-
-
-static int
-union_list_lock(int ix)
-{
-
-	if (unvplock[ix] & UNVP_LOCKED) {
-		unvplock[ix] |= UNVP_WANT;
-		msleep((caddr_t) &unvplock[ix], union_mtxp, PINOD, "union_list_lock", NULL);
-		return (1);
-	}
-
-	unvplock[ix] |= UNVP_LOCKED;
-
-	return (0);
-}
-
-static void
-union_list_unlock(int ix)
-{
-
-	unvplock[ix] &= ~UNVP_LOCKED;
-
-	if (unvplock[ix] & UNVP_WANT) {
-		unvplock[ix] &= ~UNVP_WANT;
-		wakeup((caddr_t) &unvplock[ix]);
-	}
-}
-
-/*
- *	union_updatevp:
- *
- *	The uppervp, if not NULL, must be referenced and not locked by us
- *	The lowervp, if not NULL, must be referenced.
- *
- *	If uppervp and lowervp match pointers already installed, then
- *	nothing happens. The passed vp's (when matching) are not adjusted.
- *
- *	This routine may only be called by union_newupper() and
- *	union_newlower().
- */
-
-/* always called with union lock held */
-void
-union_updatevp(struct union_node *un, struct vnode *uppervp,
-		struct vnode *lowervp)
-{
-	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
-	int nhash = UNION_HASH(uppervp, lowervp);
-	int docache = (lowervp != NULLVP || uppervp != NULLVP);
-	int lhash, uhash;
-	vnode_t freevp;
-	vnode_t freedirvp;
-	caddr_t freepath;
-
-	/*
-	 * Ensure locking is ordered from lower to higher
-	 * to avoid deadlocks.
-	 */
-	if (nhash < ohash) {
-		lhash = nhash;
-		uhash = ohash;
-	} else {
-		lhash = ohash;
-		uhash = nhash;
-	}
-
-	if (lhash != uhash) {
-		while (union_list_lock(lhash))
-			continue;
-	}
-
-	while (union_list_lock(uhash))
-		continue;
-
-	if (ohash != nhash || !docache) {
-		if (un->un_flags & UN_CACHED) {
-			un->un_flags &= ~UN_CACHED;
-			LIST_REMOVE(un, un_cache);
-		}
-	}
-
-	if (ohash != nhash)
-		union_list_unlock(ohash);
-
-	if (un->un_lowervp != lowervp) {
-		freevp = freedirvp = NULLVP;
-		freepath = (caddr_t)0;
-		if (un->un_lowervp) {
-			freevp = un->un_lowervp;
-			un->un_lowervp = lowervp;
-			if (un->un_path) {
-				freepath = un->un_path; 
-				un->un_path = 0;
-			}
-			if (un->un_dirvp) {
-				freedirvp = un->un_dirvp;
-				un->un_dirvp = NULLVP;
-			}
-			union_unlock();
-			if (freevp)
-				vnode_put(freevp);
-			if (freedirvp)
-				vnode_put(freedirvp);
-			if (freepath)
-				_FREE(un->un_path, M_TEMP);
-			union_lock();
-		} else 
-			un->un_lowervp = lowervp;
-		if (lowervp != NULLVP)
-			un->un_lowervid = vnode_vid(lowervp);
-		un->un_lowersz = VNOVAL;
-	}
-
-	if (un->un_uppervp != uppervp) {
-		freevp = NULLVP;
-		if (un->un_uppervp) {
-			freevp = un->un_uppervp;
-		}
-		un->un_uppervp = uppervp;
-		if (uppervp != NULLVP)
-			un->un_uppervid = vnode_vid(uppervp);
-		un->un_uppersz = VNOVAL;
-		union_unlock();
-		if (freevp)
-			vnode_put(freevp);
-		union_lock();
-	}
-
-	if (docache && (ohash != nhash)) {
-		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
-		un->un_flags |= UN_CACHED;
-	}
-
-	union_list_unlock(nhash);
-}
-
-/*
- * Set a new lowervp.  The passed lowervp must be referenced and will be
- * stored in the vp in a referenced state. 
- */
-/* always called with union lock held */
-
-static void
-union_newlower(un, lowervp)
-	struct union_node *un;
-	struct vnode *lowervp;
-{
-	union_updatevp(un, un->un_uppervp, lowervp);
-}
-
-/*
- * Set a new uppervp.  The passed uppervp must be locked and will be 
- * stored in the vp in a locked state.  The caller should not unlock
- * uppervp.
- */
-
-/* always called with union lock held */
-static void
-union_newupper(un, uppervp)
-	struct union_node *un;
-	struct vnode *uppervp;
-{
-	union_updatevp(un, uppervp, un->un_lowervp);
-}
-
-/*
- * Keep track of size changes in the underlying vnodes.
- * If the size changes, then callback to the vm layer
- * giving priority to the upper layer size.
- */
-/* always called with union lock held */
-void
-union_newsize(vp, uppersz, lowersz)
-	struct vnode *vp;
-	off_t uppersz, lowersz;
-{
-	struct union_node *un;
-	off_t sz;
-
-	/* only interested in regular files */
-	if (vp->v_type != VREG)
-		return;
-
-	un = VTOUNION(vp);
-	sz = VNOVAL;
-
-	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
-		un->un_uppersz = uppersz;
-		if (sz == VNOVAL)
-			sz = un->un_uppersz;
-	}
-
-	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
-		un->un_lowersz = lowersz;
-		if (sz == VNOVAL)
-			sz = un->un_lowersz;
-	}
-
-	if (sz != VNOVAL) {
-#ifdef UNION_DIAGNOSTIC
-		printf("union: %s size now %ld\n",
-			uppersz != VNOVAL ? "upper" : "lower", (long) sz);
-#endif
-		union_unlock();
-		ubc_setsize(vp, sz);
-		union_lock();
-	}
-}
-
-/*
- *	union_allocvp:	allocate a union_node and associate it with a
- *			parent union_node and one or two vnodes.
- *
- *	vpp	Holds the returned vnode locked and referenced if no 
- *		error occurs.
- *
- *	mp	Holds the mount point.  mp may or may not be busied. 
- *		allocvp() makes no changes to mp.
- *
- *	dvp	Holds the parent union_node to the one we wish to create.
- *		XXX may only be used to traverse an uncopied lowervp-based
- *		tree?  XXX
- *
- *		dvp may or may not be locked.  allocvp() makes no changes
- *		to dvp.
- *
- *	upperdvp Holds the parent vnode to uppervp, generally used along
- *		with path component information to create a shadow of
- *		lowervp when uppervp does not exist.
- *
- *		upperdvp is referenced but unlocked on entry, and will be
- *		dereferenced on return.
- *
- *	uppervp	Holds the new uppervp vnode to be stored in the 
- *		union_node we are allocating.  uppervp is referenced but
- *		not locked, and will be dereferenced on return.
- *
- *	lowervp	Holds the new lowervp vnode to be stored in the
- *		union_node we are allocating.  lowervp is referenced but
- *		not locked, and will be dereferenced on return.
- * 
- *	cnp	Holds path component information to be coupled with
- *		lowervp and upperdvp to allow unionfs to create an uppervp
- *		later on.  Only used if lowervp is valid.  The contents
- *		of cnp is only valid for the duration of the call.
- *
- *	docache	Determine whether this node should be entered in the
- *		cache or whether it should be destroyed as soon as possible.
- *
- * All union_nodes are maintained on a singly-linked
- * list.  New nodes are only allocated when they cannot
- * be found on this list.  Entries on the list are
- * removed when the vfs reclaim entry is called.
- *
- * A single lock is kept for the entire list.  This is
- * needed because the getnewvnode() function can block
- * waiting for a vnode to become free, in which case there
- * may be more than one process trying to get the same
- * vnode.  This lock is only taken if we are going to
- * call getnewvnode(), since the kernel itself is single-threaded.
- *
- * If an entry is found on the list, then call vget() to
- * take a reference.  This is done because there may be
- * zero references to it and so it needs to removed from
- * the vnode free list.
- */
-
-/* always called with union lock held */
-
-int
-union_allocvp(struct vnode **vpp,
-	struct mount *mp,
-	struct vnode *undvp,
-	struct vnode *dvp,
-	struct componentname *cnp,
-	struct vnode *uppervp,
-	struct vnode *lowervp,
-	int docache)
-{
-	int error;
-	struct union_node *un = NULL;
-	struct union_node *unp;
-	struct vnode *xlowervp = NULLVP;
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	int hash = 0;		/* protected by docache */
-	int markroot;
-	int try;
-	struct vnode_fsparam vfsp;
-	enum vtype vtype;
-
-	if (uppervp == NULLVP && lowervp == NULLVP)
-		panic("union: unidentifiable allocation");
-
-	/*
-	 * if both upper and lower vp are provided and are off different type
-	 * consider lowervp as NULL
-	 */
-	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
-		xlowervp = lowervp;
-		lowervp = NULLVP;
-	}
-
-	/* detect the root vnode (and aliases) */
-	markroot = 0;
-	if ((uppervp == um->um_uppervp) &&
-	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
-		if (lowervp == NULLVP) {
-			lowervp = um->um_lowervp;
-			if (lowervp != NULLVP) {
-				union_unlock();
-				vnode_get(lowervp);
-				union_lock();
-			}
-		}
-		markroot = VROOT;
-	}
-
-loop:
-	if (!docache) {
-		un = NULL;
-	} else for (try = 0; try < 3; try++) {
-		switch (try) {
-		case 0:
-			if (lowervp == NULLVP)
-				continue;
-			hash = UNION_HASH(uppervp, lowervp);
-			break;
-
-		case 1:
-			if (uppervp == NULLVP)
-				continue;
-			hash = UNION_HASH(uppervp, NULLVP);
-			break;
-
-		case 2:
-			if (lowervp == NULLVP)
-				continue;
-			/* Not sure how this path gets exercised ? */
-			hash = UNION_HASH(NULLVP, lowervp);
-			break;
-		}
-
-		while (union_list_lock(hash))
-			continue;
-
-		for (un = unhead[hash].lh_first; un != 0;
-					un = un->un_cache.le_next) {
-			if ((un->un_lowervp == lowervp ||
-			     un->un_lowervp == NULLVP) &&
-			    (un->un_uppervp == uppervp ||
-			     un->un_uppervp == NULLVP) &&
-			    (un->un_mount == mp)) {
-				break;
-			}
-		}
-
-		union_list_unlock(hash);
-
-		if (un)
-			break;
-	}
-
-	if (un) {
-		/*
-		 * Obtain a lock on the union_node.
-		 * uppervp is locked, though un->un_uppervp
-		 * may not be.  this doesn't break the locking
-		 * hierarchy since in the case that un->un_uppervp
-		 * is not yet locked it will be vnode_put'd and replaced
-		 * with uppervp.
-		 */
-
-		if (un->un_flags & UN_LOCKED) {
-			un->un_flags |= UN_WANT;
-			msleep((caddr_t) &un->un_flags, union_mtxp, PINOD, "union node locked", 0);
-			goto loop;
-		}       
-		un->un_flags |= UN_LOCKED;
-                        
-		union_unlock();
-		if (UNIONTOV(un) == NULLVP)
-			panic("null vnode in union node\n");
-		if (vnode_get(UNIONTOV(un))) {
-			union_lock();
-			un->un_flags &= ~UN_LOCKED;
-			if ((un->un_flags & UN_WANT) == UN_WANT) {
-				un->un_flags &=  ~UN_LOCKED;
-				wakeup(&un->un_flags);
-			}
-			goto loop;
-		}
-		union_lock();
-
-		/*
-		 * At this point, the union_node is locked,
-		 * un->un_uppervp may not be locked, and uppervp
-		 * is locked or nil.
-		 */
-
-		/*
-		 * Save information about the upper layer.
-		 */
-		if (uppervp != un->un_uppervp) {
-			union_newupper(un, uppervp);
-		} else if (uppervp) {
-			union_unlock();
-			vnode_put(uppervp);
-			union_lock();
-		}
-
-		/*
-		 * Save information about the lower layer.
-		 * This needs to keep track of pathname
-		 * and directory information which union_vn_create
-		 * might need.
-		 */
-		if (lowervp != un->un_lowervp) {
-			union_newlower(un, lowervp);
-			if (cnp && (lowervp != NULLVP)) {
-				un->un_hash = cnp->cn_hash;
-				union_unlock();
-				MALLOC(un->un_path, caddr_t, cnp->cn_namelen+1,
-						M_TEMP, M_WAITOK);
-				bcopy(cnp->cn_nameptr, un->un_path,
-						cnp->cn_namelen);
-				vnode_get(dvp);
-				union_lock();
-				un->un_path[cnp->cn_namelen] = '\0';
-				un->un_dirvp = dvp;
-			}
-		} else if (lowervp) {
-			union_unlock();
-			vnode_put(lowervp);
-			union_lock();
-		}
-		*vpp = UNIONTOV(un);
-		un->un_flags &= ~UN_LOCKED;
-		if ((un->un_flags & UN_WANT) == UN_WANT) {
-			un->un_flags &= ~UN_WANT;
-			wakeup(&un->un_flags);
-		}
-		return (0);
-	}
-
-	if (docache) {
-		/*
-		 * otherwise lock the vp list while we call getnewvnode
-		 * since that can block.
-		 */ 
-		hash = UNION_HASH(uppervp, lowervp);
-
-		if (union_list_lock(hash))
-			goto loop;
-	}
-
-	union_unlock();
-	MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK);
-	union_lock();
-
-	bzero(unp, sizeof(struct union_node));
-	un = unp;
-	un->un_uppervp = uppervp;
-	if (uppervp != NULLVP)
-		un->un_uppervid = vnode_vid(uppervp);
-	un->un_uppersz = VNOVAL;
-	un->un_lowervp = lowervp;
-	if (lowervp != NULLVP)
-		un->un_lowervid = vnode_vid(lowervp);
-	un->un_lowersz = VNOVAL;
-	un->un_pvp = undvp;
-	if (undvp != NULLVP)
-		vnode_get(undvp);
-	un->un_dircache = 0;
-	un->un_openl = 0;
-	un->un_mount = mp;
-	un->un_flags = UN_LOCKED;
-#ifdef FAULTFS
-	if (UNION_FAULTIN(um))
-		un->un_flags |= UN_FAULTFS;
-#endif
-
-	if (docache) {
-		/* Insert with lock held */
-		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
-		un->un_flags |= UN_CACHED;
-		union_list_unlock(hash);
-	}
-
-	union_unlock();
-
-	if (uppervp)
-		vtype = uppervp->v_type;
-	else
-		vtype = lowervp->v_type;
-
-	bzero(&vfsp, sizeof(struct vnode_fsparam));
-	vfsp.vnfs_mp = mp;
-	vfsp.vnfs_vtype = vtype;
-	vfsp.vnfs_str = "unionfs";
-	vfsp.vnfs_dvp = undvp;
-	vfsp.vnfs_fsnode = unp;
-	vfsp.vnfs_cnp = cnp;
-	vfsp.vnfs_vops = union_vnodeop_p;
-	vfsp.vnfs_rdev = 0;
-	vfsp.vnfs_filesize = 0;
-	vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
-	vfsp.vnfs_marksystem = 0;
-	vfsp.vnfs_markroot = markroot;
-
-	error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp);
-	if (error) {
-		/*  XXXXX Is this right ????  XXXXXXX */
-		if (uppervp) {
-		        vnode_put(uppervp);
-		}
-		if (lowervp)
-			vnode_put(lowervp);
-
-		union_lock();
-		if (un->un_flags & UN_CACHED) {
-			un->un_flags &= ~UN_CACHED;
-			LIST_REMOVE(un, un_cache);
-		}
-		if (docache)
-			union_list_unlock(hash);
-
-		FREE(unp, M_TEMP);
-
-		return (error);
-	}
-
-	if (cnp && (lowervp != NULLVP)) {
-		un->un_hash = cnp->cn_hash;
-		un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
-		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
-		un->un_path[cnp->cn_namelen] = '\0';
-		vnode_get(dvp);
-		un->un_dirvp = dvp;
-	} else {
-		un->un_hash = 0;
-		un->un_path = 0;
-		un->un_dirvp = 0;
-	}
-
-	if (xlowervp)
-		vnode_put(xlowervp);
-
-	union_lock();
-
-	vnode_settag(*vpp, VT_UNION);
-	un->un_vnode = *vpp;
-	if (un->un_vnode->v_type == VDIR) {
-		if (un->un_uppervp == NULLVP) {
-			panic("faulting fs and no upper vp for dir?");
-		}
-
-	}
-
-
-	un->un_flags &= ~UN_LOCKED;
-	if ((un->un_flags & UN_WANT) == UN_WANT) {
-		un->un_flags &=  ~UN_WANT;
-		wakeup(&un->un_flags);
-	} 
-
-	return(error);
-
-}
-
-/* always called with union lock held */
-int
-union_freevp(struct vnode *vp)
-{
-	struct union_node *un = VTOUNION(vp);
-
-	if (un->un_flags & UN_CACHED) {
-		un->un_flags &= ~UN_CACHED;
-		LIST_REMOVE(un, un_cache);
-	}
-
-	union_unlock();
-	if (un->un_pvp != NULLVP)
-		vnode_put(un->un_pvp);
-	if (un->un_uppervp != NULLVP)
-		vnode_put(un->un_uppervp);
-	if (un->un_lowervp != NULLVP)
-		vnode_put(un->un_lowervp);
-	if (un->un_dirvp != NULLVP)
-		vnode_put(un->un_dirvp);
-	if (un->un_path)
-		_FREE(un->un_path, M_TEMP);
-
-	FREE(vp->v_data, M_TEMP);
-	vp->v_data = 0;
-	union_lock();
-
-	return (0);
-}
-
-/*
- * copyfile.  copy the vnode (fvp) to the vnode (tvp)
- * using a sequence of reads and writes.  both (fvp)
- * and (tvp) are locked on entry and exit.
- */
-/* called with no union lock held */
-int
-union_copyfile(struct vnode *fvp, struct vnode *tvp, vfs_context_t context)
-{
-	char *bufp;
-	struct uio *auio;
-	char uio_buf [ UIO_SIZEOF(1) ];
-	int error = 0;
-
-	/*
-	 * strategy:
-	 * allocate a buffer of size MAXPHYSIO.
-	 * loop doing reads and writes, keeping track
-	 * of the current uio offset.
-	 * give up at the first sign of trouble.
-	 */
-
-	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE,
-		UIO_READ /* will change */, &uio_buf, sizeof(uio_buf));
-
-	bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK);
-	if (bufp == NULL) {
-		return ENOMEM;
-	}
-
-	/* ugly loop follows... */
-	do {
-		off_t offset = uio_offset(auio);
-
-		uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
-		uio_addiov(auio, (uintptr_t)bufp, MAXPHYSIO);
-		error = VNOP_READ(fvp, auio, 0, context);
-
-		if (error == 0) {
-			user_ssize_t resid = uio_resid(auio);
-
-			uio_reset(auio, offset, UIO_SYSSPACE, UIO_WRITE);
-			uio_addiov(auio, (uintptr_t)bufp, MAXPHYSIO - resid);
-
-			if (uio_resid(auio) == 0)
-				break;
-
-			do {
-				error = VNOP_WRITE(tvp, auio, 0, context);
-			} while ((uio_resid(auio) > 0) && (error == 0));
-		}
-
-	} while (error == 0);
-
-	_FREE(bufp, M_TEMP);
-	return (error);
-}
-
-/*
- * (un) is assumed to be locked on entry and remains
- * locked on exit.
- */
-/* always called with union lock held */
-int
-union_copyup(struct union_node *un, int docopy, vfs_context_t context)
-{
-	int error;
-	struct vnode *lvp, *uvp;
-	struct vnode_attr vattr;
-	mode_t  cmode = 0;
-
-	
-	lvp = un->un_lowervp;
-
-	union_unlock();
-
-	if (UNNODE_FAULTIN(un)) {
-		/* Need to inherit exec mode in faulting fs */
-		VATTR_INIT(&vattr);
-		VATTR_WANTED(&vattr, va_flags);
-		if (vnode_getattr(lvp, &vattr, context) == 0 )
-			cmode = vattr.va_mode;
-		
-	}
-	error = union_vn_create(&uvp, un, cmode, context);
-	if (error) {
-		union_lock();
-		if (error == EEXIST) {
-			if (uvp != NULLVP) {
-				union_newupper(un, uvp);
-				error = 0;	
-			} 
-		}
-		return (error);
-	}
-
-	union_lock();
-	/* at this point, uppervp is locked */
-	union_newupper(un, uvp);
-	union_unlock();
-
-
-	if (docopy) {
-		/*
-		 * XX - should not ignore errors
-		 * from vnop_close
-		 */
-		error = VNOP_OPEN(lvp, FREAD, context);
-		if (error == 0) {
-			error = union_copyfile(lvp, uvp, context);
-			(void) VNOP_CLOSE(lvp, FREAD, context);
-		}
-#ifdef UNION_DIAGNOSTIC
-		if (error == 0)
-			uprintf("union: copied up %s\n", un->un_path);
-#endif
-
-	}
-	union_vn_close(uvp, FWRITE, context);
-
-	/*
-	 * Subsequent IOs will go to the top layer, so
-	 * call close on the lower vnode and open on the
-	 * upper vnode to ensure that the filesystem keeps
-	 * its references counts right.  This doesn't do
-	 * the right thing with (cred) and (FREAD) though.
-	 * Ignoring error returns is not right, either.
-	 */
-
-	/* No need to hold the lock as the union node should be locked for this(it is in faultin mode) */
-	if (error == 0) {
-		int i;
-
-		for (i = 0; i < un->un_openl; i++) {
-			(void) VNOP_CLOSE(lvp, FREAD, context);
-			(void) VNOP_OPEN(uvp, FREAD, context);
-		}
-		un->un_openl = 0;
-	}
-
-	union_lock();
-
-	return (error);
-
-}
-
-
-int 
-union_faultin_copyup(struct vnode **vpp, vnode_t udvp, vnode_t lvp, struct componentname * cnp, vfs_context_t context)
-{
-	int error;
-	struct vnode *uvp;
-	struct vnode_attr vattr;
-	struct vnode_attr *vap;
-	mode_t  cmode = 0;
-	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
-	struct proc * p = vfs_context_proc(context);
-	struct componentname cn;
-	
-
-	vap = &vattr;
-	VATTR_INIT(vap);
-	VATTR_WANTED(vap, va_flags);
-	if (vnode_getattr(lvp, vap, context) == 0 )
-		cmode = vattr.va_mode;
-		
-	*vpp = NULLVP;
-
-
-	if (cmode == (mode_t)0)
-		cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
-	else
-		cmode = cmode & ~p->p_fd->fd_cmask;
-
-
-	/*
-	 * Build a new componentname structure (for the same
-	 * reasons outlines in union_mkshadow()).
-	 * The difference here is that the file is owned by
-	 * the current user, rather than by the person who
-	 * did the mount, since the current user needs to be
-	 * able to write the file (that's why it is being
-	 * copied in the first place).
-	 */
-	bzero(&cn, sizeof(struct componentname));
-
-	cn.cn_namelen = cnp->cn_namelen;
-	cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
-						M_NAMEI, M_WAITOK);
-	cn.cn_pnlen = cn.cn_namelen+1;
-	bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cn.cn_namelen+1);
-	cn.cn_nameiop = CREATE;
-	cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN|UNIONCREATED);
-	cn.cn_context = context;
-	cn.cn_nameptr = cn.cn_pnbuf;
-	cn.cn_hash = 0;
-	cn.cn_consume = 0;
-
-	/*
-	 * Pass dvp unlocked and referenced on call to relookup().
-	 *
-	 * If an error occurs, dvp will be returned unlocked and dereferenced.
-	 */
-	if ((error = relookup(udvp, &uvp, &cn)) != 0) {
-		goto out;
-	}
-
-	/*
-	 * If no error occurs, dvp will be returned locked with the reference
-	 * left as before, and vpp will be returned referenced and locked.
-	 */
-	if (uvp) {
-		*vpp = uvp;
-		error = EEXIST;
-		goto out;
-	}
-
-	/*
-	 * Good - there was no race to create the file
-	 * so go ahead and create it.  The permissions
-	 * on the file will be 0666 modified by the
-	 * current user's umask.  Access to the file, while
-	 * it is unioned, will require access to the top *and*
-	 * bottom files.  Access when not unioned will simply
-	 * require access to the top-level file.
-	 *
-	 * TODO: confirm choice of access permissions.
-	 *       decide on authorisation behaviour
-	 */
-	
-	VATTR_INIT(vap);
-	VATTR_SET(vap, va_type, VREG);
-	VATTR_SET(vap, va_mode, cmode);
-
-	cn.cn_flags |= (UNIONCREATED);
-	if ((error = vn_create(udvp, &uvp, &cn, vap, 0, context)) != 0) {
-		goto out;
-	}
-
-	
-	if ((error = VNOP_OPEN(uvp, fmode, context)) != 0) {
-		vn_clearunionwait(uvp, 0);
-		vnode_recycle(uvp);
-		vnode_put(uvp);
-		goto out;
-	}
-
-	error = vnode_ref_ext(uvp, fmode);
-	if (error ) {
-		vn_clearunionwait(uvp, 0);
-		VNOP_CLOSE(uvp, fmode, context);
-		vnode_recycle(uvp);
-		vnode_put(uvp);
-		goto out;
-	}
-
-
-	/*
-	 * XX - should not ignore errors
-	 * from vnop_close
-	 */
-	error = VNOP_OPEN(lvp, FREAD, context);
-	if (error == 0) {
-		error = union_copyfile(lvp, uvp, context);
-		(void) VNOP_CLOSE(lvp, FREAD, context);
-	}
-
-	VNOP_CLOSE(uvp, fmode, context);
-	vnode_rele_ext(uvp, fmode, 0);
-	vn_clearunionwait(uvp, 0);
-
-	*vpp = uvp;
-out:
-	if ((cn.cn_flags & HASBUF) == HASBUF) {
-		FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
-		cn.cn_flags &= ~HASBUF;
-	}
-	return (error);
-}
-
-
-/*
- *	union_relookup:
- *
- *	dvp should be locked on entry and will be locked on return.  No
- *	net change in the ref count will occur.
- *
- *	If an error is returned, *vpp will be invalid, otherwise it
- *	will hold a locked, referenced vnode.  If *vpp == dvp then
- *	remember that only one exclusive lock is held.
- */
-
-/* No union lock held for this call */
-static int
-union_relookup(
-#ifdef XXX_HELP_ME
-	struct union_mount *um,
-#else	/* !XXX_HELP_ME */
-	__unused struct union_mount *um,
-#endif	/* !XXX_HELP_ME */
-	struct vnode *dvp,
-	struct vnode **vpp,
-	struct componentname *cnp,
-	struct componentname *cn,
-	char *path,
-	int pathlen)
-{
-	int error;
-
-	/*
-	 * A new componentname structure must be faked up because
-	 * there is no way to know where the upper level cnp came
-	 * from or what it is being used for.  This must duplicate
-	 * some of the work done by NDINIT, some of the work done
-	 * by namei, some of the work done by lookup and some of
-	 * the work done by vnop_lookup when given a CREATE flag.
-	 * Conclusion: Horrible.
-	 */
-	cn->cn_namelen = pathlen;
-	cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
-	cn->cn_pnlen = cn->cn_namelen+1;
-	bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
-	cn->cn_pnbuf[cn->cn_namelen] = '\0';
-
-	cn->cn_nameiop = CREATE;
-	cn->cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN );
-#ifdef XXX_HELP_ME
-	cn->cn_proc = cnp->cn_proc;
-	if (um->um_op == UNMNT_ABOVE)
-		cn->cn_cred = cnp->cn_cred;
-	else
-		cn->cn_cred = um->um_cred;
-#endif
-	cn->cn_context = cnp->cn_context;	/* XXX !UNMNT_ABOVE  case ??? */
-	cn->cn_nameptr = cn->cn_pnbuf;
-	cn->cn_hash = 0;
-	cn->cn_consume = cnp->cn_consume;
-
-	vnode_get(dvp);
-	error = relookup(dvp, vpp, cn);
-	vnode_put(dvp);
-
-	return (error);
-}
-
-/*
- * Create a shadow directory in the upper layer.
- * The new vnode is returned locked.
- *
- * (um) points to the union mount structure for access to the
- * the mounting process's credentials.
- * (dvp) is the directory in which to create the shadow directory,
- * It is locked (but not ref'd) on entry and return.
- * (cnp) is the component name to be created.
- * (vpp) is the returned newly created shadow directory, which
- * is returned locked and ref'd
- */
-/* No union lock held for this call */
-int
-union_mkshadow(um, dvp, cnp, vpp)
-	struct union_mount *um;
-	struct vnode *dvp;
-	struct componentname *cnp;
-	struct vnode **vpp;
-{
-	int error;
-	struct vnode_attr va;
-	struct componentname cn;
-
-	bzero(&cn, sizeof(struct componentname));
-
-
-	error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen);
-	if (error) 
-		goto out;
-
-	if (*vpp) {
-		error = EEXIST;
-		goto out;
-	}
-
-	/*
-	 * Policy: when creating the shadow directory in the
-	 * upper layer, create it owned by the user who did
-	 * the mount, group from parent directory, and mode
-	 * 777 modified by umask (ie mostly identical to the
-	 * mkdir syscall).  (jsp, kb)
-	 */
-
-	VATTR_INIT(&va);
-	VATTR_SET(&va, va_type, VDIR);
-	VATTR_SET(&va, va_mode, um->um_cmode);
-
-	error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context);
-out:
-	if ((cn.cn_flags & HASBUF) == HASBUF) {
-		FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
-		cn.cn_flags &= ~HASBUF;
-	}
-	return (error);
-}
-
-/*
- * Create a whiteout entry in the upper layer.
- *
- * (um) points to the union mount structure for access to the
- * the mounting process's credentials.
- * (dvp) is the directory in which to create the whiteout.
- * it is locked on entry and exit.
- * (cnp) is the componentname to be created.
- */
-/* No union lock held for this call */
-int
-union_mkwhiteout(um, dvp, cnp, path)
-	struct union_mount *um;
-	struct vnode *dvp;
-	struct componentname *cnp;
-	char *path;
-{
-	int error;
-	struct vnode *wvp;
-	struct componentname cn;
-
-	bzero(&cn, sizeof(struct componentname));
-
-	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error) {
-		goto out;
-	}
-	if (wvp) {
-		error = EEXIST;
-		goto out;
-	}
-
-	error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context);
-
-out:
-	if ((cn.cn_flags & HASBUF) == HASBUF) {
-		FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
-		cn.cn_flags &= ~HASBUF;
-	}
-	return (error);
-}
-
-
-/*
- * union_vn_create: creates and opens a new shadow file
- * on the upper union layer.  This function is similar
- * in spirit to calling vn_open() but it avoids calling namei().
- * The problem with calling namei() is that a) it locks too many
- * things, and b) it doesn't start at the "right" directory,
- * whereas relookup() is told where to start.
- *
- * On entry, the vnode associated with un is locked.  It remains locked
- * on return.
- *
- * If no error occurs, *vpp contains a locked referenced vnode for your
- * use.  If an error occurs *vpp iis undefined.
- */
-/* called with no union lock held */
-int
-union_vn_create(struct vnode **vpp, struct union_node *un, mode_t cmode, vfs_context_t  context)
-{
-	struct vnode *vp;
-	struct vnode_attr vat;
-	struct vnode_attr *vap = &vat;
-	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
-	int error;
-	struct proc * p = vfs_context_proc(context);
-	struct componentname cn;
-
-	bzero(&cn, sizeof(struct componentname));
-	*vpp = NULLVP;
-
-	if (cmode == (mode_t)0)
-		cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
-	else
-		cmode = cmode & ~p->p_fd->fd_cmask;
-
-
-	/*
-	 * Build a new componentname structure (for the same
-	 * reasons outlines in union_mkshadow()).
-	 * The difference here is that the file is owned by
-	 * the current user, rather than by the person who
-	 * did the mount, since the current user needs to be
-	 * able to write the file (that's why it is being
-	 * copied in the first place).
-	 */
-	cn.cn_namelen = strlen(un->un_path);
-	cn.cn_pnbuf = (caddr_t) _MALLOC_ZONE(cn.cn_namelen+1,
-						M_NAMEI, M_WAITOK);
-	cn.cn_pnlen = cn.cn_namelen+1;
-	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
-	cn.cn_nameiop = CREATE;
-	if (UNNODE_FAULTIN(un))
-		cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN|UNIONCREATED);
-	else
-		cn.cn_flags = (HASBUF|SAVENAME|SAVESTART|ISLASTCN);
-	cn.cn_context = context;
-	cn.cn_nameptr = cn.cn_pnbuf;
-	cn.cn_hash = un->un_hash;
-	cn.cn_consume = 0;
-
-	/*
-	 * Pass dvp unlocked and referenced on call to relookup().
-	 *
-	 * If an error occurs, dvp will be returned unlocked and dereferenced.
-	 */
-	vnode_get(un->un_dirvp);
-	if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0) {
-		vnode_put(un->un_dirvp);
-		goto out;
-	}
-	vnode_put(un->un_dirvp);
-
-	/*
-	 * If no error occurs, dvp will be returned locked with the reference
-	 * left as before, and vpp will be returned referenced and locked.
-	 */
-	if (vp) {
-		*vpp = vp;
-		error = EEXIST;
-		goto out;
-	}
-
-	/*
-	 * Good - there was no race to create the file
-	 * so go ahead and create it.  The permissions
-	 * on the file will be 0666 modified by the
-	 * current user's umask.  Access to the file, while
-	 * it is unioned, will require access to the top *and*
-	 * bottom files.  Access when not unioned will simply
-	 * require access to the top-level file.
-	 *
-	 * TODO: confirm choice of access permissions.
-	 *       decide on authorisation behaviour
-	 */
-	
-	VATTR_INIT(vap);
-	VATTR_SET(vap, va_type, VREG);
-	VATTR_SET(vap, va_mode, cmode);
-
-	if ((error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, context)) != 0) {
-		goto out;
-	}
-
-	if ((error = VNOP_OPEN(vp, fmode, context)) != 0) {
-		vnode_put(vp);
-		goto out;
-	}
-
-	vnode_lock(vp);
-	if (++vp->v_writecount <= 0)
-		panic("union: v_writecount");
-	vnode_unlock(vp);
-	*vpp = vp;
-	error = 0;
-
-out:
-	if ((cn.cn_flags & HASBUF) == HASBUF) {
-		FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
-		cn.cn_flags &= ~HASBUF;
-	}
-	return(error);
-}
-
-/* called with no union lock held */
-static int
-union_vn_close(struct vnode *vp, int fmode, vfs_context_t context)
-{
-
-	if (fmode & FWRITE) {
-		vnode_lock(vp);
-		--vp->v_writecount;
-		vnode_unlock(vp);
-	}
-	return (VNOP_CLOSE(vp, fmode, context));
-}
-
-/*
- *	union_removed_upper:
- *
- *	An upper-only file/directory has been removed; un-cache it so
- *	that unionfs vnode gets reclaimed and the last uppervp reference
- *	disappears.
- *
- *	Called with union_node unlocked.
- */
-/* always called with union lock held */
-void
-union_removed_upper(un)
-	struct union_node *un;
-{
-	union_newupper(un, NULLVP);
-	if (un->un_flags & UN_CACHED) {
-		un->un_flags &= ~UN_CACHED;
-		LIST_REMOVE(un, un_cache);
-	}
-
-}
-
-#if 0
-struct vnode *
-union_lowervp(vp)
-	struct vnode *vp;
-{
-	struct union_node *un = VTOUNION(vp);
-
-	if ((un->un_lowervp != NULLVP) &&
-	    (vp->v_type == un->un_lowervp->v_type)) {
-		if (vnode_get(un->un_lowervp) == 0)
-			return (un->un_lowervp);
-	}
-
-	return (NULLVP);
-}
-#endif
-
-/*
- * Determine whether a whiteout is needed
- * during a remove/rmdir operation.
- */
-/* called with no union lock held */
-int
-union_dowhiteout(struct union_node *un, vfs_context_t ctx)
-{
-	struct vnode_attr va;
-
-	if (UNNODE_FAULTIN(un))
-		return(0);
-
-	if ((un->un_lowervp != NULLVP) )
-		return (1);
-
-	VATTR_INIT(&va);
-	VATTR_WANTED(&va, va_flags);
-	if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 &&
-	    (va.va_flags & OPAQUE))
-		return (1);
-
-	return (0);
-}
-
-/* called with no union lock held */
-static void
-union_dircache_r(struct vnode *vp, struct vnode ***vppp, int *cntp)
-{
-	struct union_node *un;
-
-	if (vp->v_op != union_vnodeop_p) {
-		if (vppp) {
-			vnode_get(vp);
-			*(*vppp)++ = vp;
-			if (--(*cntp) == 0)
-				panic("union: dircache table too small");
-		} else {
-			(*cntp)++;
-		}
-
-		return;
-	}
-
-	un = VTOUNION(vp);
-	if (un->un_uppervp != NULLVP)
-		union_dircache_r(un->un_uppervp, vppp, cntp);
-	if (un->un_lowervp != NULLVP)
-		union_dircache_r(un->un_lowervp, vppp, cntp);
-}
-
-/* called with no union lock held */
-struct vnode *
-union_dircache(struct vnode *vp, __unused vfs_context_t context)
-{
-	int count;
-	struct vnode *nvp, *lvp;
-	struct vnode **vpp;
-	struct vnode **dircache, **newdircache;
-	struct union_node *un;
-	int error;
-	int alloced = 0;
-
-	union_lock();
-	newdircache = NULL;
-
-	nvp = NULLVP;
-	un = VTOUNION(vp);
-
-	dircache = un->un_dircache;
-	if (dircache == 0) {
-		union_unlock();
-		count = 0;
-		union_dircache_r(vp, 0, &count);
-		count++;
-#if 0
-		/* too bad; we need Union now! */
-#if MAC_XXX
-                panic("MAC Framework doesn't support unionfs (yet)\n");
-#endif /* MAC */
-#endif
-
-		dircache = (struct vnode **)
-				_MALLOC(count * sizeof(struct vnode *),
-					M_TEMP, M_WAITOK);
-		if (dircache == NULL) {
-			goto out;
-		}
-		newdircache = dircache;
-		alloced = 1;
-		vpp = dircache;
-		union_dircache_r(vp, &vpp, &count);
-		*vpp = NULLVP;
-		vpp = dircache + 1;
-		union_lock();
-	} else {
-		vpp = dircache;
-		do {
-			if (*vpp++ == un->un_uppervp)
-				break;
-		} while (*vpp != NULLVP);
-	}
-
-	lvp = *vpp;
-	union_unlock();
-	if (lvp == NULLVP) {
-		goto out;
-	}
-
-	vnode_get(lvp);
-	union_lock();
-
-	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, lvp, NULLVP, 0);
-	if (error) {
-		union_unlock();
-		vnode_put(lvp);
-		goto out;
-	}
-
-	un->un_dircache = 0;
-	un = VTOUNION(nvp);
-#if 0
-	if ((alloced != 0) && (un->un_dircache != 0)) {
-		union_unlock();
-		for (vpp = newdircache; *vpp != NULLVP; vpp++)
-			vnode_put(*vpp);
-		_FREE(newdircache, M_TEMP);
-		newdircache = NULL;
-		union_lock();
-		if (nvp != NULLVP)
-			union_freevp(nvp);
-		goto loop;
-	}
-#endif
-	un->un_dircache = dircache;
-	un->un_flags |= UN_DIRENVN;
-	
-	newdircache = NULL;
-	union_unlock();
-	return (nvp);
-
-out:
-	/*
-	 * If we allocated a new dircache and couldn't attach
-	 * it to a new vp, free the resources we allocated.
-	 */
-	if (newdircache) {
-		for (vpp = newdircache; *vpp != NULLVP; vpp++)
-			vnode_put(*vpp);
-		_FREE(newdircache, M_TEMP);
-	}
-	return (NULLVP);
-}
-
-/*
- * Module glue to remove #ifdef UNION from vfs_syscalls.c
- */
-/* Called with no union lock, the union_dircache takes locks when necessary */
-static int
-union_dircheck(struct vnode **vpp, struct fileproc *fp, vfs_context_t ctx)
-{
-	int error = 0;
-	vnode_t vp = *vpp;
-	
-	if (vp->v_op == union_vnodeop_p) {
-		struct vnode *lvp;
-
-		lvp = union_dircache(vp, ctx);
-		if (lvp != NULLVP) {
-			struct vnode_attr va;
-			/*
-			 * If the directory is opaque,
-			 * then don't show lower entries
-			 */
-			VATTR_INIT(&va);
-			VATTR_WANTED(&va, va_flags);
-			error = vnode_getattr(vp, &va, ctx);
-			if (va.va_flags & OPAQUE) {
-				vnode_put(lvp);
-				lvp = NULL;
-			}
-		}
-
-		if (lvp != NULLVP) {
-#if CONFIG_MACF
-			error = mac_vnode_check_open(ctx, lvp, FREAD);
-			if (error) {
-				vnode_put(lvp);
-				return(error);
-			}
-#endif /* MAC */
-			error = VNOP_OPEN(lvp, FREAD, ctx);
-			if (error) {
-				vnode_put(lvp);
-				return(error);
-			}
-			vnode_ref(lvp);
-			fp->f_fglob->fg_data = (caddr_t) lvp;
-			fp->f_fglob->fg_offset = 0;
-
-			error = VNOP_CLOSE(vp, FREAD, ctx);
-			vnode_rele(vp);
-			vnode_put(vp);
-			if (error)
-				return(error);
-
-			*vpp = lvp;
-			return -1;	/* goto unionread */
-		}
-	}
-	return error;
-}
-
-/*  called from inactive with union lock held */
-void
-union_dircache_free(struct union_node *un)
-{
-	struct vnode **vpp;
-
-	vpp = un->un_dircache;
-	un->un_dircache = NULL;
-	union_unlock();	
-	
-	for (; *vpp != NULLVP; vpp++)
-		vnode_put(*vpp);
-	_FREE(un->un_dircache, M_TEMP);
-	union_lock();
-}
-
diff --git a/bsd/miscfs/union/union_vfsops.c b/bsd/miscfs/union/union_vfsops.c
deleted file mode 100644
index 6924e2f67..000000000
--- a/bsd/miscfs/union/union_vfsops.c
+++ /dev/null
@@ -1,563 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1994, 1995 The Regents of the University of California.
- * Copyright (c) 1994, 1995 Jan-Simon Pendry.
- * All rights reserved.
- *
- * This code is derived from software donated to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
- */
-
-/*
- * Union Layer
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/proc_internal.h>
-#include <sys/kauth.h>
-#include <sys/vnode_internal.h>
-#include <sys/mount_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/filedesc.h>
-#include <sys/queue.h>
-#include <miscfs/union/union.h>
-
-static	int union_itercallback(vnode_t, void *);
-static int union_root(mount_t, vnode_t *, vfs_context_t);
-
-/*
- * Mount union filesystem
- */
-static int
-union_mount(mount_t mp, __unused vnode_t devvp, user_addr_t data, vfs_context_t context)
-{
-	proc_t p = vfs_context_proc(context);
-	int error = 0;
-	struct user_union_args args;
-	struct vnode *lowerrootvp = NULLVP;
-	struct vnode *upperrootvp = NULLVP;
-	struct union_mount *um = NULL;
-	kauth_cred_t cred = NOCRED;
-	const char *cp = NULL;
-	char *vcp;
-	int len;
-	u_int size;
-	struct nameidata nd;
-	
-#ifdef UNION_DIAGNOSTIC
-	printf("union_mount(mp = %x)\n", mp);
-#endif
-
-	/*
-	 * Update is a no-op
-	 */
-	if (mp->mnt_flag & MNT_UPDATE) {
-		/*
-		 * Need to provide.
-		 * 1. a way to convert between rdonly and rdwr mounts.
-		 * 2. support for nfs exports.
-		 */
-		error = ENOTSUP;
-		goto bad;
-	}
-
-	/*
-	 * Get argument
-	 */
-	if (vfs_context_is64bit(context)) {
-		error = copyin(data, (caddr_t)&args, sizeof(args));
-	}
-	else {
-		struct union_args temp;
-		error = copyin(data, (caddr_t)&temp, sizeof (temp));
-		args.target = CAST_USER_ADDR_T(temp.target);
-		args.mntflags = temp.mntflags;
-	}
-	if (error)
-		goto bad;
-
-	lowerrootvp = mp->mnt_vnodecovered;
-	vnode_get(lowerrootvp);
-
-	/*
-	 * Find upper node.
-	 */
-	NDINIT(&nd, LOOKUP, FOLLOW|WANTPARENT,
-	       (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 
-	       args.target, context);
-
-	if ((error = namei(&nd)))
-		goto bad;
-
-	nameidone(&nd);
-	upperrootvp = nd.ni_vp;
-	vnode_put(nd.ni_dvp);
-	nd.ni_dvp = NULL;
-
-	if (upperrootvp->v_type != VDIR) {
-		error = EINVAL;
-		goto bad;
-	}
-	
-	MALLOC(um, struct union_mount *, sizeof(struct union_mount),
-				M_UFSMNT, M_WAITOK);
-
-	/*
-	 * Keep a held reference to the target vnodes.
-	 * They are vnode_put'd in union_unmount.
-	 *
-	 * Depending on the _BELOW flag, the filesystems are
-	 * viewed in a different order.  In effect, this is the
-	 * same as providing a mount under option to the mount syscall.
-	 */
-
-	um->um_op = args.mntflags & UNMNT_OPMASK;
-	switch (um->um_op) {
-	case UNMNT_ABOVE:
-		um->um_lowervp = lowerrootvp;
-		um->um_uppervp = upperrootvp;
-		break;
-
-	case UNMNT_BELOW:
-		um->um_lowervp = upperrootvp;
-		um->um_uppervp = lowerrootvp;
-		break;
-
-	case UNMNT_REPLACE:
-		vnode_put(lowerrootvp);
-		lowerrootvp = NULLVP;
-		um->um_uppervp = upperrootvp;
-		um->um_lowervp = lowerrootvp;
-		break;
-
-#ifdef FAULTFS
-	case UNMNT_FAULTIN:
-		um->um_lowervp = upperrootvp;
-		um->um_uppervp = lowerrootvp;
-		break;
-#endif
-
-	default:
-		error = EINVAL;
-		goto bad;
-	}
-
-	if (um->um_lowervp != NULLVP)
-		um->um_lowervid = vnode_vid(um->um_lowervp);
-	if (um->um_uppervp != NULLVP)
-		um->um_uppervid = vnode_vid(um->um_uppervp);
-	/*
-	 * Unless the mount is readonly, ensure that the top layer
-	 * supports whiteout operations
-	 */
-#ifdef FAULTFS
-	if ((um->um_op != UNMNT_FAULTIN) && (mp->mnt_flag & MNT_RDONLY) == 0)
-#else
-	if ((mp->mnt_flag & MNT_RDONLY) == 0)
-#endif
-	{
-		error = VNOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0,
-		                      LOOKUP, context);
-		if (error)
-			goto bad;
-	}
-
-	um->um_cred = kauth_cred_get_with_ref();
-	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
-
-	/*
-	 * Depending on what you think the MNT_LOCAL flag might mean,
-	 * you may want the && to be || on the conditional below.
-	 * At the moment it has been defined that the filesystem is
-	 * only local if it is all local, ie the MNT_LOCAL flag implies
-	 * that the entire namespace is local.  If you think the MNT_LOCAL
-	 * flag implies that some of the files might be stored locally
-	 * then you will want to change the conditional.
-	 */
-	if (um->um_op == UNMNT_ABOVE) {
-		if (((um->um_lowervp == NULLVP) ||
-		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
-		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
-			mp->mnt_flag |= MNT_LOCAL;
-	}
-
-	/*
-	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
-	 * of lookup() which explicitly checks the flag, rather than asking
-	 * the filesystem for it's own opinion.  This means, that an update
-	 * mount of the underlying filesystem to go from rdonly to rdwr
-	 * will leave the unioned view as read-only.
-	 */
-	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
-
-	mp->mnt_data = (qaddr_t) um;
-	vfs_getnewfsid(mp);
-
-
-	switch (um->um_op) {
-	case UNMNT_ABOVE:
-		cp = "<above>:";
-		break;
-	case UNMNT_BELOW:
-		cp = "<below>:";
-		break;
-	case UNMNT_REPLACE:
-		cp = "";
-		break;
-#ifdef FAULTFS
-	case UNMNT_FAULTIN:
-		cp = "/FaultingFS/";
-		break;
-#endif
-	}
-	len = strlen(cp);
-	bcopy(cp, mp->mnt_vfsstat.f_mntfromname, len);
-
-	vcp = mp->mnt_vfsstat.f_mntfromname + len;
-	len = MNAMELEN - len;
-
-	(void) copyinstr(args.target, vcp, len - 1, (size_t *)&size);
-	bzero(vcp + size, len - size);
-
-#ifdef UNION_DIAGNOSTIC
-	printf("union_mount: from %s, on %s\n",
-		mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
-#endif
-	return (0);
-
-bad:
-	if (um)
-		_FREE(um, M_UFSMNT);
-	if (IS_VALID_CRED(cred))
-		kauth_cred_unref(&cred);
-	if (upperrootvp)
-		vnode_put(upperrootvp);
-	if (lowerrootvp)
-		vnode_put(lowerrootvp);
-	return (error);
-}
-
-/*
- * VFS start.  Nothing needed here - the start routine
- * on the underlying filesystem(s) will have been called
- * when that filesystem was mounted.
- */
-static int
-union_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
-{
-
-	return (0);
-}
-
-static int
-union_itercallback(__unused vnode_t vp, void *args)
-{
-	int  num = *(int *)args;
-	
-	*(int *)args = num + 1;
-	return(VNODE_RETURNED);
-}
-
-
-
-/*
- * Free reference to union layer
- */
-static int
-union_unmount(mount_t mp, int mntflags, vfs_context_t context)
-{
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	struct vnode *um_rootvp;
-	int error;
-	int freeing;
-	int flags = 0;
-
-#ifdef UNION_DIAGNOSTIC
-	printf("union_unmount(mp = %x)\n", mp);
-#endif
-
-	if (mntflags & MNT_FORCE)
-		flags |= FORCECLOSE;
-
-	if ((error = union_root(mp, &um_rootvp, context)))
-		return (error);
-
-	/*
-	 * Keep flushing vnodes from the mount list.
-	 * This is needed because of the un_pvp held
-	 * reference to the parent vnode.
-	 * If more vnodes have been freed on a given pass,
-	 * the try again.  The loop will iterate at most
-	 * (d) times, where (d) is the maximum tree depth
-	 * in the filesystem.
-	 */
-	for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
-		int n = 0;
-
-		vnode_iterate(mp, VNODE_NOLOCK_INTERNAL, union_itercallback, &n);
-
-		/* if this is unchanged then stop */
-		if (n == freeing)
-			break;
-
-		/* otherwise try once more time */
-		freeing = n;
-	}
-
-	/* At this point the root vnode should have a single reference */
-	if (vnode_isinuse(um_rootvp, 0)) {
-		vnode_put(um_rootvp);
-		return (EBUSY);
-	}
-
-#ifdef UNION_DIAGNOSTIC
-	vprint("union root", um_rootvp);
-#endif	 
-	/*
-	 * Discard references to upper and lower target vnodes.
-	 */
-	if (um->um_lowervp)
-		vnode_put(um->um_lowervp);
-	vnode_put(um->um_uppervp);
-	if (IS_VALID_CRED(um->um_cred)) {
-		kauth_cred_unref(&um->um_cred);
-	}
-	/*
-	 * Release reference on underlying root vnode
-	 */
-	vnode_put(um_rootvp);
-	/*
-	 * And blow it away for future re-use
-	 */
-	vnode_reclaim(um_rootvp);
-	/*
-	 * Finally, throw away the union_mount structure
-	 */
-	_FREE(mp->mnt_data, M_UFSMNT);	/* XXX */
-	mp->mnt_data = NULL;
-	return (0);
-}
-
-static int
-union_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t context)
-{
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	int error;
-
-	/*
-	 * Return locked reference to root.
-	 */
-	vnode_get(um->um_uppervp);
-	if (um->um_lowervp)
-		vnode_get(um->um_lowervp);
-
-	union_lock();
-	error = union_allocvp(vpp, mp,
-			      (struct vnode *) 0,
-			      (struct vnode *) 0,
-			      (struct componentname *) 0,
-			      um->um_uppervp,
-			      um->um_lowervp,
-			      1);
-	union_unlock();
-
-	if (error) {
-	        vnode_put(um->um_uppervp);
-		if (um->um_lowervp)
-			vnode_put(um->um_lowervp);
-	} 
-
-	return (error);
-}
-
-static int
-union_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context)
-{
-	int error;
-	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
-	struct vfs_attr attr;
-	uint32_t lbsize = 0;
-
-#ifdef UNION_DIAGNOSTIC
-	printf("union_vfs_getattr(mp = %x, lvp = %x, uvp = %x)\n", mp,
-			um->um_lowervp,
-	       		um->um_uppervp);
-#endif
-
-	/* Get values from lower file system (if any) */
-	if (um->um_lowervp) {
-		VFSATTR_INIT(&attr);
-		VFSATTR_WANTED(&attr, f_bsize);
-		VFSATTR_WANTED(&attr, f_blocks);
-		VFSATTR_WANTED(&attr, f_bused);
-		VFSATTR_WANTED(&attr, f_files);
-		error = vfs_getattr(um->um_lowervp->v_mount, &attr, context);
-		if (error)
-			return (error);
-
-		/* now copy across the "interesting" information and fake the rest */
-		if (VFSATTR_IS_SUPPORTED(&attr, f_bsize))
-			lbsize = attr.f_bsize;
-		else
-			lbsize = um->um_lowervp->v_mount->mnt_devblocksize;
-		fsap->f_blocks = VFSATTR_IS_SUPPORTED(&attr, f_blocks) ? attr.f_blocks : 0;
-		fsap->f_bused  = VFSATTR_IS_SUPPORTED(&attr, f_bused)  ? attr.f_bused  : 0;
-		fsap->f_files  = VFSATTR_IS_SUPPORTED(&attr, f_files)  ? attr.f_files  : 0;
-	} else {
-		fsap->f_blocks = 0;
-		fsap->f_bused = 0;
-		fsap->f_files = 0;
-	}
-
-	VFSATTR_INIT(&attr);
-	VFSATTR_WANTED(&attr, f_bsize);
-	VFSATTR_WANTED(&attr, f_blocks);
-	VFSATTR_WANTED(&attr, f_bfree);
-	VFSATTR_WANTED(&attr, f_bavail);
-	VFSATTR_WANTED(&attr, f_files);
-	VFSATTR_WANTED(&attr, f_ffree);
-	error = vfs_getattr(um->um_uppervp->v_mount, &attr, context);
-	if (error)
-		return (error);
-
-	if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) {
-		fsap->f_bsize = attr.f_bsize;
-		VFSATTR_SET_SUPPORTED(fsap, f_bsize);
-	}
-	if (VFSATTR_IS_SUPPORTED(&attr, f_iosize)) {
-		fsap->f_iosize = attr.f_iosize;
-		VFSATTR_SET_SUPPORTED(fsap, f_iosize);
-	}
-
-	/*
-	 * if the lower and upper blocksizes differ, then frig the
-	 * block counts so that the sizes reported by df make some
-	 * kind of sense.  none of this makes sense though.
-	 */
-	if (VFSATTR_IS_SUPPORTED(&attr, f_bsize))
-		fsap->f_bsize = attr.f_bsize;
-	else
-		fsap->f_bsize =  um->um_uppervp->v_mount->mnt_devblocksize;
-	VFSATTR_RETURN(fsap, f_bsize, attr.f_bsize);
-	if (fsap->f_bsize != lbsize)
-		fsap->f_blocks = fsap->f_blocks * lbsize / attr.f_bsize;
-
-	/*
-	 * The "total" fields count total resources in all layers,
-	 * the "free" fields count only those resources which are
-	 * free in the upper layer (since only the upper layer
-	 * is writeable).
-	 */
-	if (VFSATTR_IS_SUPPORTED(&attr, f_blocks))
-		fsap->f_blocks += attr.f_blocks;
-	if (VFSATTR_IS_SUPPORTED(&attr, f_bfree))
-		fsap->f_bfree = attr.f_bfree;
-	if (VFSATTR_IS_SUPPORTED(&attr, f_bavail))
-		fsap->f_bavail = attr.f_bavail;
-	if (VFSATTR_IS_SUPPORTED(&attr, f_bused))
-		fsap->f_bused += attr.f_bused;
-	if (VFSATTR_IS_SUPPORTED(&attr, f_files))
-		fsap->f_files += attr.f_files;
-	if (VFSATTR_IS_SUPPORTED(&attr, f_ffree))
-		fsap->f_ffree = attr.f_ffree;
-
-	VFSATTR_SET_SUPPORTED(fsap, f_bsize);
-	VFSATTR_SET_SUPPORTED(fsap, f_blocks);
-	VFSATTR_SET_SUPPORTED(fsap, f_bfree);
-	VFSATTR_SET_SUPPORTED(fsap, f_bavail);
-	VFSATTR_SET_SUPPORTED(fsap, f_bused);
-	VFSATTR_SET_SUPPORTED(fsap, f_files);
-	VFSATTR_SET_SUPPORTED(fsap, f_ffree);
-
-	return (0);
-}
-
-/*
- * XXX - Assumes no data cached at union layer.
- */
-#define union_sync (int (*) (mount_t, int, vfs_context_t))nullop
-
-#define union_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp
-#define union_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp
-#define union_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp
-#define union_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp
-
-struct vfsops union_vfsops = {
-	union_mount,
-	union_start,
-	union_unmount,
-	union_root,
-	NULL,			/* quotactl */
-	union_vfs_getattr,
-	union_sync,
-	union_vget,
-	union_fhtovp,
-	union_vptofh,
-	union_init,
-	union_sysctl,
-	NULL,
-	{NULL}
-};
-
-
diff --git a/bsd/miscfs/union/union_vnops.c b/bsd/miscfs/union/union_vnops.c
deleted file mode 100644
index ddc374dea..000000000
--- a/bsd/miscfs/union/union_vnops.c
+++ /dev/null
@@ -1,1726 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
- * Copyright (c) 1992, 1993, 1994, 1995
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/proc.h>
-#include <sys/kauth.h>
-#include <sys/file.h>
-#include <sys/time.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/vnode_internal.h>
-#include <sys/mount_internal.h>
-#include <sys/namei.h>
-#include <sys/malloc.h>
-#include <sys/buf_internal.h>
-#include <sys/queue.h>
-#include <sys/lock.h>
-#include <miscfs/union/union.h>
-#include <vfs/vfs_support.h>
-#include <sys/ubc.h>
-#include <sys/kdebug.h>
-#include <sys/uio_internal.h>
-
-/* called with no union lock held */
-static int
-union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
-	struct componentname *cnp)
-{
-	int error;
-	vfs_context_t ctx = cnp->cn_context;
-	struct vnode *tdvp;
-	struct vnode *dvp;
-	struct mount *mp;
-
-	dvp = *dvpp;
-
-	/*
-	 * If stepping up the directory tree, check for going
-	 * back across the mount point, in which case do what
-	 * lookup would do by stepping back down the mount
-	 * hierarchy.
-	 */
-	if (cnp->cn_flags & ISDOTDOT) {
-		while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
-			/*
-			 * Don't do the NOCROSSMOUNT check
-			 * at this level.  By definition,
-			 * union fs deals with namespaces, not
-			 * filesystems.
-			 */
-			tdvp = dvp;
-			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
-			vnode_put(tdvp);
-			vnode_get(dvp);
-		}
-	}
-
-	error = VNOP_LOOKUP(dvp, &tdvp, cnp, ctx);
-	if (error)
-		return (error);
-
-	dvp = tdvp;
-	/*
-	 * Lastly check if the current node is a mount point in
-	 * which case walk up the mount hierarchy making sure not to
-	 * bump into the root of the mount tree (ie. dvp != udvp).
-	 */
-	while (dvp != udvp && (dvp->v_type == VDIR) &&
-	       (mp = dvp->v_mountedhere)) {
-		if (vfs_busy(mp, LK_NOWAIT)) {
-			vnode_put(dvp);
-			return(ENOENT);
-		}
-		error = VFS_ROOT(mp, &tdvp, ctx);
-		vfs_unbusy(mp);
-		if (error) {
-			vnode_put(dvp);
-			return (error);
-		}
-
-		vnode_put(dvp);
-		dvp = tdvp;
-	}
-
-	*vpp = dvp;
-	return (0);
-}
-
-static int
-union_lookup(struct vnop_lookup_args *ap)
-/*
-	struct vnop_lookup_args {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		vfs_context_t a_context;
-	} *ap)
-*/
-{
-	int error;
-	int uerror = 0;
-	int  lerror = 0;
-	struct vnode *uppervp, *lowervp;
-	struct vnode *upperdvp, *lowerdvp;
-	struct vnode *dvp = ap->a_dvp;
-	struct union_node *dun;
-	struct componentname *cnp = ap->a_cnp;
-	vfs_context_t ctx = cnp->cn_context;
-	int lockparent = cnp->cn_flags & LOCKPARENT;
-	struct union_mount *um;
-	kauth_cred_t saved_cred;
-	int iswhiteout;
-	struct vnode_attr va;
-	int isfaultfs = 0;
-	int upperlookup = 0;
-	int retry_count = 0;
-
-#ifdef notyet
-	if (cnp->cn_namelen == 3 &&
-			cnp->cn_nameptr[2] == '.' &&
-			cnp->cn_nameptr[1] == '.' &&
-			cnp->cn_nameptr[0] == '.') {
-		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
-		if (dvp == NULLVP)
-			return (ENOENT);
-		vnode_get(dvp);
-
-		return (0);
-	}
-#endif
-
-	
-
-retry:
-	union_lock();
-	um = MOUNTTOUNIONMOUNT(dvp->v_mount);
-	dun = VTOUNION(dvp);
-	upperdvp = dun->un_uppervp;
-	lowerdvp = dun->un_lowervp;
-	uppervp = NULLVP;
-	lowervp = NULLVP;
-	iswhiteout = 0;
-
-	union_unlock();
-
-	if(UNION_FAULTIN(um))
-		isfaultfs = 1;
-
-	if (isfaultfs == 0)
-		cnp->cn_flags |= LOCKPARENT;
-
-	/*
-	 * do the lookup in the upper level.
-	 * if that level comsumes additional pathnames,
-	 * then assume that something special is going
-	 * on and just return that vnode.
-	 */
-	if (upperdvp != NULLVP) {
-		if (lockparent != 0) 
-			cnp->cn_flags &= ~LOCKPARENT;
-		uerror = union_lookup1(um->um_uppervp, &upperdvp,
-					&uppervp, cnp);
-		upperlookup = 1;
-
-		if (cnp->cn_consume != 0) {
-			*ap->a_vpp = uppervp;
-			if (!lockparent)
-				cnp->cn_flags &= ~LOCKPARENT;
-			else
-				cnp->cn_flags |= LOCKPARENT;	
-			return (uerror);
-		}
-		if (uerror == ENOENT || uerror == EJUSTRETURN) {
-			if (cnp->cn_flags & ISWHITEOUT) {
-				iswhiteout = 1;
-			} else if (lowerdvp != NULLVP) {
-				VATTR_INIT(&va);
-				VATTR_WANTED(&va, va_flags);
-				lerror = vnode_getattr(upperdvp, &va, ap->a_context);
-				if (lerror == 0 && (va.va_flags & OPAQUE))
-					iswhiteout = 1;
-			}
-		}
-	} else {
-		uerror = ENOENT;
-	}
-
-	/*
-	 * faultingfs: If upper layer lookup is succesful
-	 * we will return that vp if it is regular file.
-	 * So so skip lower level lookup
-	 */
-
-	if ((isfaultfs == 1) && (upperlookup == 1) && (uerror == 0) && ((vnode_isreg(uppervp) != 0)))
-		goto donelowerlookup;
-
-	/*
-	 * in a similar way to the upper layer, do the lookup
-	 * in the lower layer.   this time, if there is some
-	 * component magic going on, then vnode_put whatever we got
-	 * back from the upper layer and return the lower vnode
-	 * instead.
-	 */
-	if (lowerdvp != NULLVP && !iswhiteout) {
-		int nameiop;
-
-		/*
-		 * Only do a LOOKUP on the bottom node, since
-		 * we won't be making changes to it anyway.
-		 */
-		nameiop = cnp->cn_nameiop;
-		cnp->cn_nameiop = LOOKUP;
-		if (um->um_op == UNMNT_BELOW) {
-			/* XXX BOGUS */
-			saved_cred = cnp->cn_context->vc_ucred;
-			cnp->cn_context->vc_ucred = um->um_cred;
-			if (lockparent != 0) 
-				cnp->cn_flags &= ~LOCKPARENT;
-			lerror = union_lookup1(um->um_lowervp, &lowerdvp,
-					&lowervp, cnp);
-			cnp->cn_context->vc_ucred = saved_cred;
-		} else {
-			if (lockparent != 0) 
-				cnp->cn_flags &= ~LOCKPARENT;
-			lerror = union_lookup1(um->um_lowervp, &lowerdvp,
-					&lowervp, cnp);
-		}
-		cnp->cn_nameiop = nameiop;
-
-		if (cnp->cn_consume != 0) {
-			if (uppervp != NULLVP) {
-			        vnode_put(uppervp);
-				uppervp = NULLVP;
-			}
-			*ap->a_vpp = lowervp;
-			if (!lockparent)
-				cnp->cn_flags &= ~LOCKPARENT;
-			else
-				cnp->cn_flags |= LOCKPARENT;	
-			return (lerror);
-		}
-	} else {
-		lerror = ENOENT;
-		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
-			lowervp = LOWERVP(dun->un_pvp);
-			if (lowervp != NULLVP) {
-				lerror = 0;
-			}
-		}
-	}
-
-donelowerlookup:
-
-	if (!lockparent)
-		cnp->cn_flags &= ~LOCKPARENT;
-
-	/*
-	 * at this point, we have uerror and lerror indicating
-	 * possible errors with the lookups in the upper and lower
-	 * layers.  additionally, uppervp and lowervp are (locked)
-	 * references to existing vnodes in the upper and lower layers.
-	 *
-	 * there are now three cases to consider.
-	 * 1. if both layers returned an error, then return whatever
-	 *    error the upper layer generated.
-	 *
-	 * 2. if the top layer failed and the bottom layer succeeded
-	 *    then two subcases occur.
-	 *    a.  the bottom vnode is not a directory, in which
-	 *	  case just return a new union vnode referencing
-	 *	  an empty top layer and the existing bottom layer.
-	 *    b.  the bottom vnode is a directory, in which case
-	 *	  create a new directory in the top-level and
-	 *	  continue as in case 3.
-	 *
-	 * 3. if the top layer succeeded then return a new union
-	 *    vnode referencing whatever the new top layer and
-	 *    whatever the bottom layer returned.
-	 */
-
-	*ap->a_vpp = NULLVP;
-
-	/* case 1. */
-	if ((uerror != 0) && (lerror != 0)) {
-			if (!lockparent)
-				cnp->cn_flags &= ~LOCKPARENT;
-			else
-				cnp->cn_flags |= LOCKPARENT;	
-		return (uerror);
-	}
-
-	/* case 2. */
-	if (uerror != 0 /* && (lerror == 0) */ ) {
-		if (lowervp->v_type == VDIR) { /* case 2b. */
-			/* No need to lock the union here */
-			/* if the vnode exists it returns it even if it marks error */
-
-			uppervp = NULLVP;
-
-			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
-
-			if ((uerror == EEXIST)){
-				if (uppervp == NULLVP) {
-					retry_count++;
-					if (retry_count <= 2) {
-						if (lowervp != NULLVP) 
-							vnode_put(lowervp);
-						goto retry;
-					}
-				}
-				uerror = 0;
-			}
-			
-			if (uerror) {
-				if (uppervp != NULLVP) {
-					vnode_put(uppervp);
-				}
-				if (lowervp != NULLVP) {
-					vnode_put(lowervp);
-				}
-				if (!lockparent)
-					cnp->cn_flags &= ~LOCKPARENT;
-				else
-					cnp->cn_flags |= LOCKPARENT;	
-				return (uerror);
-			}
-		} else if ((lowervp->v_type == VREG) && (isfaultfs == 1)) {
-			error = union_faultin_copyup(&uppervp, upperdvp, lowervp, cnp, ctx);	
-			uerror = 0;
-		}
-	}
-
-
-	/* if this is faulting filesystem and upper vp exisits skip allocation of union node */
-	if ((isfaultfs == 1) && (uerror == 0) && (uppervp != NULLVP) &&  ((vnode_isreg(uppervp) != 0)|| (vnode_islnk(uppervp) != 0))) {
-		vn_checkunionwait(uppervp);
-		*ap->a_vpp = uppervp;
-		if (lowervp != NULLVP)
-			vnode_put(lowervp);
-		if (!lockparent)
-			cnp->cn_flags &= ~LOCKPARENT;
-		else
-			cnp->cn_flags |= LOCKPARENT;	
-		return(0);
-	}
-
-	union_lock();
-	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
-			      uppervp, lowervp, 1);
-	union_unlock();
-
-	if (error) {
-		if (uppervp != NULLVP)
-			vnode_put(uppervp);
-		if (lowervp != NULLVP)
-			vnode_put(lowervp);
-	}
-
-	if (!lockparent)
-		cnp->cn_flags &= ~LOCKPARENT;
-	else
-		cnp->cn_flags |= LOCKPARENT;	
-	return (error);
-}
-
-static int
-union_create(struct vnop_create_args *ap)
-/*
-	struct vnop_create_args {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct vnode *dvp = un->un_uppervp;
-	struct componentname *cnp = ap->a_cnp;
-
-	if (dvp != NULLVP) {
-		int error;
-		struct vnode *vp;
-		struct mount *mp;
-
-
-		mp = ap->a_dvp->v_mount;
-
-		/* note that this is a direct passthrough to the filesystem */
-		error = VNOP_CREATE(dvp, &vp, cnp, ap->a_vap, ap->a_context);
-		if (error)
-			return (error);
-
-		/* if this is faulting filesystem and is a reg file, skip allocation of union node */
-		if (UNNODE_FAULTIN(un) && (vp != NULLVP) && ((vnode_isreg(vp) != 0)|| (vnode_islnk(vp) != 0))) {
-			*ap->a_vpp = vp;
-			return(0);
-		}
-
-
-		union_lock();
-		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
-				NULLVP, 1);
-		union_unlock();
-		if (error)
-			vnode_put(vp);
-		return (error);
-	}
-
-	return (EROFS);
-}
-
-static int
-union_whiteout(struct vnop_whiteout_args *ap)
-/*
-	struct vnop_whiteout_args {
-		struct vnode *a_dvp;
-		struct componentname *a_cnp;
-		int a_flags;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct componentname *cnp = ap->a_cnp;
-	int error;
-
-	if (un->un_uppervp == NULLVP) {
-		return (ENOTSUP);
-	}
-
-	error =  (VNOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags, ap->a_context));
-	return(error);
-}
-
-/* mknod can do  fifos, chr, blk or whiteout entries */
-static int
-union_mknod(struct vnop_mknod_args *ap)
-/*
-	struct vnop_mknod_args {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct vnode *dvp = un->un_uppervp;
-	struct componentname *cnp = ap->a_cnp;
-
-	if (dvp != NULLVP) {
-		int error;
-		struct vnode *vp;
-		struct mount *mp;
-
-
-		mp = ap->a_dvp->v_mount;
-
-		/* note that this is a direct passthrough to the filesystem */
-		error = VNOP_MKNOD(dvp, &vp, cnp, ap->a_vap, ap->a_context);
-		if (error)
-			return (error);
-
-		if (vp != NULLVP) {
-			union_lock();
-			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
-					cnp, vp, NULLVP, 1);
-			union_unlock();
-			if (error)
-				vnode_put(vp);
-		}
-		return (error);
-	}
-	return (EROFS);
-}
-
-static int
-union_open(struct vnop_open_args *ap)
-/*
-	struct vnop_open_args {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		int a_mode;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *tvp;
-	int mode = ap->a_mode;
-	int error;
-
-	/*
-	 * If there is an existing upper vp then simply open that.
-	 */
-
-	tvp = un->un_uppervp;
-	if (tvp == NULLVP) {
-
-		/*
-		 * If the lower vnode is being opened for writing, then
-		 * copy the file contents to the upper vnode and open that,
-		 * otherwise can simply open the lower vnode.
-		 */
-		tvp = un->un_lowervp;
-		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
-			/* For above below mounts we need draining.. */
-			/* This path is not taken for faultin mode */
-			/*  LOCK the union node as well **/
-			union_lock();
-			un->un_flags |= UN_LOCKED;
-
-			error = union_copyup(un, (mode&O_TRUNC) == 0, ap->a_context);
-			un->un_flags &= ~UN_LOCKED;
-			if ((un->un_flags & UN_WANT) == UN_WANT) {
-				un->un_flags &=  ~UN_WANT;
-				wakeup(&un->un_flags);
-			}
-			union_unlock();
-			if (error == 0)
-				error = VNOP_OPEN(un->un_uppervp, mode, ap->a_context);
-			return (error);
-		}
-
-		/*
-		 * Just open the lower vnode
-		 */
-		un->un_openl++;
-
-		error = VNOP_OPEN(tvp, mode, ap->a_context);
-
-		return (error);
-	}
-
-	error = VNOP_OPEN(tvp, mode, ap->a_context);
-
-	return (error);
-}
-
-static int
-union_close(struct vnop_close_args *ap)
-/*
-	struct vnop_close_args {
-		struct vnode *a_vp;
-		int  a_fflag;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp;
-	int error = 0;
-
-	if ((vp = un->un_uppervp) == NULLVP) {
-#ifdef UNION_DIAGNOSTIC
-		if (un->un_openl <= 0)
-			panic("union: un_openl cnt");
-#endif
-		--un->un_openl;
-		vp = un->un_lowervp;
-	}
-
-	ap->a_vp = vp;
-	error =  (VCALL(vp, VOFFSET(vnop_close), ap));
-	return(error);
-}
-
-/*
- * Check access permission on the union vnode.
- * The access check being enforced is to check
- * against both the underlying vnode, and any
- * copied vnode.  This ensures that no additional
- * file permissions are given away simply because
- * the user caused an implicit file copy.
- */
-static int
-union_access(struct vnop_access_args *ap)
-/*
-	struct vnop_access_args {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		int a_action;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	int error = EACCES;
-	struct vnode *vp;
-
-	if ((vp = un->un_uppervp) != NULLVP) {
-		ap->a_vp = vp;
-		return (VCALL(vp, VOFFSET(vnop_access), ap));
-	}
-
-	if ((vp = un->un_lowervp) != NULLVP) {
-		ap->a_vp = vp;
-		error = VCALL(vp, VOFFSET(vnop_access), ap);
-		if (error == 0) {
-			struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
-
-			if (um->um_op == UNMNT_BELOW) {
-				error = VCALL(vp, VOFFSET(vnop_access), ap);
-			}
-		}
-		if (error)
-			return (error);
-	}
-
-	return (error);
-}
-
-/*
- * We handle getattr only to change the fsid and
- * track object sizes
- */
-static int
-union_getattr(struct vnop_getattr_args *ap)
-/*
-	struct vnop_getattr_args {
-		struct vnode *a_vp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error=0;
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *vp = un->un_uppervp;
-	struct vnode_attr *vap;
-	struct vnode_attr va;
-
-
-	/*
-	 * Some programs walk the filesystem hierarchy by counting
-	 * links to directories to avoid stat'ing all the time.
-	 * This means the link count on directories needs to be "correct".
-	 * The only way to do that is to call getattr on both layers
-	 * and fix up the link count.  The link count will not necessarily
-	 * be accurate but will be large enough to defeat the tree walkers.
-	 */
-
-	vap = ap->a_vap;
-
-	vp = un->un_uppervp;
-	if (vp != NULLVP) {
-		/*
-		 * It's not clear whether vnop_getattr is to be
-		 * called with the vnode locked or not.  stat() calls
-		 * it with (vp) locked, and fstat calls it with
-		 * (vp) unlocked.
-		 * In the mean time, compensate here by checking
-		 * the union_node's lock flag.
-		 */
-
-		error = vnode_getattr(vp, vap, ap->a_context);
-		if (error) {
-			return (error);
-		}
-		union_lock();
-		union_newsize(ap->a_vp, vap->va_data_size, VNOVAL);
-		union_unlock();
-	}
-
-	if (vp == NULLVP) {
-		vp = un->un_lowervp;
-	} else if (vp->v_type == VDIR) {
-		vp = un->un_lowervp;
-		VATTR_INIT(&va);
-		/* all we want from the lower node is the link count */
-		VATTR_WANTED(&va, va_nlink);
-		vap = &va;
-	} else {
-		vp = NULLVP;
-	}
-
-	if (vp != NULLVP) {
-		error = vnode_getattr(vp, vap, ap->a_context);
-		if (error) {
-			return (error);
-		}
-		union_lock();
-		union_newsize(ap->a_vp, VNOVAL, vap->va_data_size);
-		union_unlock();
-	}
-
-	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
-		ap->a_vap->va_nlink += vap->va_nlink;
-
-	VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]);
-	return (0);
-}
-
-static int
-union_setattr(struct vnop_setattr_args *ap)
-/*
-	struct vnop_setattr_args {
-		struct vnode *a_vp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	int error;
-
-	/*
-	 * Handle case of truncating lower object to zero size,
-	 * by creating a zero length upper object.  This is to
-	 * handle the case of open with O_TRUNC and O_CREAT.
-	 */
-	if (VATTR_IS_ACTIVE(ap->a_vap, va_data_size) &&
-	    (un->un_uppervp == NULLVP) &&
-	    /* assert(un->un_lowervp != NULLVP) */
-	    (un->un_lowervp->v_type == VREG)) {
-		union_lock();
-		error = union_copyup(un, (ap->a_vap->va_data_size != 0), ap->a_context);
-		union_unlock();
-		if (error) {
-			return (error);
-		}
-	}
-
-	/*
-	 * Try to set attributes in upper layer,
-	 * otherwise return read-only filesystem error.
-	 */
-	if (un->un_uppervp != NULLVP) {
-		error = vnode_setattr(un->un_uppervp, ap->a_vap, ap->a_context);
-		if ((error == 0) && VATTR_IS_ACTIVE(ap->a_vap, va_data_size)) {
-			union_lock();
-			union_newsize(ap->a_vp, ap->a_vap->va_data_size, VNOVAL);
-			union_unlock();
-		}
-	} else {
-		error = EROFS;
-	}
-
-	return (error);
-}
-
-static int
-union_read(struct vnop_read_args *ap)
-/*
-	struct vnop_read_args {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		int  a_ioflag;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	error = VNOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_context);
-
-	/*
-	 * XXX
-	 * perhaps the size of the underlying object has changed under
-	 * our feet.  take advantage of the offset information present
-	 * in the uio structure.
-	 */
-	if (error == 0) {
-		struct union_node *un = VTOUNION(ap->a_vp);
-		off_t cur = ap->a_uio->uio_offset;
-
-		if (vp == un->un_uppervp) {
-			if (cur > un->un_uppersz) {
-				union_lock();
-				union_newsize(ap->a_vp, cur, VNOVAL);
-				union_unlock();
-			}
-		} else {
-			if (cur > un->un_lowersz) {
-				union_lock();
-				union_newsize(ap->a_vp, VNOVAL, cur);
-				union_unlock();
-			}
-		}
-	}
-
-	return (error);
-}
-
-static int
-union_write(struct vnop_write_args *ap)
-/*
-	struct vnop_write_args {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		int  a_ioflag;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp;
-	struct union_node *un = VTOUNION(ap->a_vp);
-
-	vp = UPPERVP(ap->a_vp);
-	if (vp == NULLVP)
-		panic("union: missing upper layer in write");
-
-	error = VNOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_context);
-
-	/*
-	 * the size of the underlying object may be changed by the
-	 * write.
-	 */
-	if (error == 0) {
-		off_t cur = ap->a_uio->uio_offset;
-
-		if (cur > un->un_uppersz) {
-			union_lock();
-			union_newsize(ap->a_vp, cur, VNOVAL);
-			union_unlock();
-		}
-	}
-
-	return (error);
-}
-
-
-static int
-union_ioctl(struct vnop_ioctl_args *ap)
-/*
-	struct vnop_ioctl_args {
-		struct vnode *a_vp;
-		int  a_command;
-		caddr_t  a_data;
-		int  a_fflag;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = ovp;
-	return (VCALL(ovp, VOFFSET(vnop_ioctl), ap));
-}
-
-static int
-union_select(struct vnop_select_args *ap)
-/*
-	struct vnop_select_args {
-		struct vnode *a_vp;
-		int  a_which;
-		int  a_fflags;
-		void * a_wql;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = ovp;
-	return (VCALL(ovp, VOFFSET(vnop_select), ap));
-}
-
-static int
-union_revoke(struct vnop_revoke_args *ap)
-/*
-	struct vnop_revoke_args {
-		struct vnode *a_vp;
-		int a_flags;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct vnode *vp = ap->a_vp;
-
-	if (UPPERVP(vp))
-		VNOP_REVOKE(UPPERVP(vp), ap->a_flags, ap->a_context);
-	if (LOWERVP(vp))
-		VNOP_REVOKE(LOWERVP(vp), ap->a_flags, ap->a_context);
-	vnode_reclaim(vp);
-
-	return (0);
-}
-
-static int
-union_mmap(struct vnop_mmap_args *ap)
-/*
-	struct vnop_mmap_args {
-		struct vnode *a_vp;
-		int  a_fflags;
-		kauth_cred_t a_cred;
-		struct proc *a_p;
-	} *ap;
-*/
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = ovp;
-	return (VCALL(ovp, VOFFSET(vnop_mmap), ap));
-}
-
-static int
-union_mnomap(struct vnop_mnomap_args *ap)
-/*
-	struct vnop_mnomap_args {
-		struct vnode *a_vp;
-		int  a_fflags;
-		kauth_cred_t a_cred;
-		struct proc *a_p;
-	} *ap;
-*/
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = ovp;
-	return (VCALL(ovp, VOFFSET(vnop_mnomap), ap));
-}
-
-static int
-union_fsync(struct vnop_fsync_args *ap)
-/*
-	struct vnop_fsync_args {
-		struct vnode *a_vp;
-		int  a_waitfor;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error = 0;
-	struct vnode *targetvp = OTHERVP(ap->a_vp);
-
-	if (targetvp != NULLVP) {
-
-		error = VNOP_FSYNC(targetvp, ap->a_waitfor, ap->a_context);
-	}
-
-	return (error);
-}
-
-static int
-union_remove(struct vnop_remove_args *ap)
-/*
-	struct vnop_remove_args {
-		struct vnode *a_dvp;
-		struct vnode *a_vp;
-		struct componentname *a_cnp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error, flags;
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct componentname *cnp = ap->a_cnp;
-	int busydel = 0;
-
-	if (dun->un_uppervp == NULLVP)
-		panic("union remove: null upper vnode");
-
-	if (UNNODE_FAULTIN(dun) && ((ap->a_vp != NULLVP) &&
-		((vnode_isreg(ap->a_vp) != 0) || (vnode_islnk(ap->a_vp) != 0)))) {
-			return(VNOP_REMOVE(dun->un_uppervp, ap->a_vp, ap->a_cnp, ap->a_flags, ap->a_context));
-	}
-
-	if (un->un_uppervp != NULLVP) {
-		struct vnode *dvp = dun->un_uppervp;
-		struct vnode *vp = un->un_uppervp;
-
-		flags = ap->a_flags;
-		if (vnode_isinuse(ap->a_vp, 0))
-			busydel = 1;
-		if ((flags & VNODE_REMOVE_NODELETEBUSY) && (busydel != 0)) {
-				return(EBUSY);
-		}
-		if (union_dowhiteout(un, cnp->cn_context))
-			cnp->cn_flags |= DOWHITEOUT;
-	
-		if (busydel != 0)  {
-			union_lock();
-			un->un_flags |= UN_DELETED;
-			if (un->un_flags & UN_CACHED) {
-				un->un_flags &= ~UN_CACHED;
-				LIST_REMOVE(un, un_cache);
-			}
-			union_unlock();
-			vnode_ref(vp);
-		}
-		error = VNOP_REMOVE(dvp, vp, cnp, 0, ap->a_context);
-		if (!error) {
-			union_lock();
-			if (busydel == 0)
-				union_removed_upper(un);
-			union_unlock();
-		}
-	} else {
-		if (UNNODE_FAULTIN(un))
-			panic("faultfs: No uppervp");
-		error = union_mkwhiteout(
-			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
-			dun->un_uppervp, ap->a_cnp, un->un_path);
-	}
-
-	return (error);
-}
-
-static int
-union_link(struct vnop_link_args *ap)
-/*
-	struct vnop_link_args {
-		struct vnode *a_vp;
-		struct vnode *a_tdvp;
-		struct componentname *a_cnp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error = 0;
-	struct componentname *cnp = ap->a_cnp;
-	struct union_node *un;
-	struct vnode *vp;
-	struct vnode *tdvp;
-
-	un = VTOUNION(ap->a_tdvp);
-
-	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
-		vp = ap->a_vp;
-	} else {
-		struct union_node *tun = VTOUNION(ap->a_vp);
-		if (tun->un_uppervp == NULLVP) {
-			if (UNNODE_FAULTIN(tun))
-				panic("faultfs: No uppervp");
-			if (un->un_uppervp == tun->un_dirvp) {
-			}
-			union_lock();
-			/* Would need to drain for above,below mount and faulin does not enter this path */
-			un->un_flags |= UN_LOCKED;
-			error = union_copyup(tun, 1, ap->a_context);
-			un->un_flags &= ~UN_LOCKED;
-			if ((un->un_flags & UN_WANT) == UN_WANT) {
-				un->un_flags &=  ~UN_WANT;
-				wakeup(&un->un_flags);
-			}
-			union_unlock();
-		}
-		vp = tun->un_uppervp;
-	}
-	tdvp = un->un_uppervp;
-	if (tdvp == NULLVP)
-		error = EROFS;
-
-	if (error) {
-		return (error);
-	}
-
-
-	error =  (VNOP_LINK(vp, tdvp, cnp, ap->a_context));
-	return(error);
-}
-
-static int
-union_rename(struct vnop_rename_args *ap)
-/*
-	struct vnop_rename_args {
-		struct vnode *a_fdvp;
-		struct vnode *a_fvp;
-		struct componentname *a_fcnp;
-		struct vnode *a_tdvp;
-		struct vnode *a_tvp;
-		struct componentname *a_tcnp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-
-	struct vnode *fdvp = ap->a_fdvp;
-	struct vnode *fvp = ap->a_fvp;
-	struct vnode *tdvp = ap->a_tdvp;
-	struct vnode *tvp = ap->a_tvp;
-
-
-	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
-		struct union_node *un = VTOUNION(fdvp);
-		if (un->un_uppervp == NULLVP) {
-			if (UNNODE_FAULTIN(un))
-				panic("faultfs rename: No uppervp");
-			/*
-			 * this should never happen in normal
-			 * operation but might if there was
-			 * a problem creating the top-level shadow
-			 * directory.
-			 */
-			error = EXDEV;
-			goto bad;
-		}
-
-		fdvp = un->un_uppervp;
-	}
-
-	if (fvp->v_op == union_vnodeop_p) {	/* always true */
-		struct union_node *un = VTOUNION(fvp);
-		if (un->un_uppervp == NULLVP) {
-			if (UNNODE_FAULTIN(un))
-				panic("faultfs rename: No uppervp");
-			/* XXX: should do a copyup */
-			error = EXDEV;
-			goto bad;
-		}
-
-		if (un->un_lowervp != NULLVP)
-			ap->a_fcnp->cn_flags |= DOWHITEOUT;
-
-		fvp = un->un_uppervp;
-	}
-
-	if (tdvp->v_op == union_vnodeop_p) {
-		struct union_node *un = VTOUNION(tdvp);
-		if (un->un_uppervp == NULLVP) {
-			/*
-			 * this should never happen in normal
-			 * operation but might if there was
-			 * a problem creating the top-level shadow
-			 * directory.
-			 */
-			if (UNNODE_FAULTIN(un))
-				panic("faultfs rename: No uppervp");
-			error = EXDEV;
-			goto bad;
-		}
-
-		tdvp = un->un_uppervp;
-	}
-
-	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
-		struct union_node *un = VTOUNION(tvp);
-
-		tvp = un->un_uppervp;
-	}
-
-	return (VNOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp, ap->a_context));
-
-bad:
-	return (error);
-}
-
-static int
-union_mkdir(struct vnop_mkdir_args *ap)
-/*
-	struct vnop_mkdir_args {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct vnode *dvp = un->un_uppervp;
-	struct componentname *cnp = ap->a_cnp;
-
-	if (dvp != NULLVP) {
-		int error;
-		struct vnode *vp;
-
-
-		/* note that this is a direct fallthrough to the filesystem */
-		error = VNOP_MKDIR(dvp, &vp, cnp, ap->a_vap, ap->a_context);
-		if (error)
-			return (error);
-
-		union_lock();
-		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
-				NULLVP, cnp, vp, NULLVP, 1);
-		union_unlock();
-		if (error)
-			vnode_put(vp);
-		return (error);
-	}
-	return (EROFS);
-}
-
-static int
-union_rmdir(struct vnop_rmdir_args *ap)
-/*
-	struct vnop_rmdir_args {
-		struct vnode *a_dvp;
-		struct vnode *a_vp;
-		struct componentname *a_cnp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct union_node *dun = VTOUNION(ap->a_dvp);
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct componentname *cnp = ap->a_cnp;
-	int busydel = 0;
-
-	/******* NODE HAS TO BE LOCKED ******/
-	if (dun->un_uppervp == NULLVP)
-		panic("union rmdir: null upper vnode");
-
-	if (un->un_uppervp != NULLVP) {
-		struct vnode *dvp = dun->un_uppervp;
-		struct vnode *vp = un->un_uppervp;
-
-		if (vnode_isinuse(ap->a_vp, 0)) {
-			busydel = 1;
-			union_lock();
-			un->un_flags |= UN_DELETED;
-			if (un->un_flags & UN_CACHED) {
-				un->un_flags &= ~UN_CACHED;
-				LIST_REMOVE(un, un_cache);
-			}
-			union_unlock();
-			vnode_ref(vp);
-		}
-
-
-		if (union_dowhiteout(un, cnp->cn_context))
-			cnp->cn_flags |= DOWHITEOUT;
-		error = VNOP_RMDIR(dvp, vp, ap->a_cnp, ap->a_context);
-		if (!error) {
-			union_lock();
-			if (busydel == 0)
-				union_removed_upper(un);
-			union_unlock();
-		}
-	} else {
-		if (UNNODE_FAULTIN(un))
-			panic("faultfs: No uppervp");
-		error = union_mkwhiteout(
-			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
-			dun->un_uppervp, ap->a_cnp, un->un_path);
-	}
-	return (error);
-}
-
-static int
-union_symlink(struct vnop_symlink_args *ap)
-/*
-	struct vnop_symlink_args {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		char *a_target;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_dvp);
-	struct vnode *dvp = un->un_uppervp;
-	struct componentname *cnp = ap->a_cnp;
-
-	if (dvp != NULLVP) {
-		int error;
-		struct vnode *vp;
-
-		error = VNOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target, ap->a_context);
-		*ap->a_vpp = vp;
-		return (error);
-	}
-	return (EROFS);
-}
-
-/*
- * union_readdir works in concert with getdirentries and
- * readdir(3) to provide a list of entries in the unioned
- * directories.  getdirentries is responsible for walking
- * down the union stack.  readdir(3) is responsible for
- * eliminating duplicate names from the returned data stream.
- */
-static int
-union_readdir(struct vnop_readdir_args *ap)
-/*
-	struct vnop_readdir_args {
-		struct vnodeop_desc *a_desc;
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		int a_flags;
-		int *a_eofflag;
-		int *a_numdirent;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct union_node *un = VTOUNION(ap->a_vp);
-	struct vnode *uvp = un->un_uppervp;
-
-	if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF))
-		return (EINVAL);
-
-	if (uvp == NULLVP)
-		return (0);
-
-	ap->a_vp = uvp;
-	return (VCALL(uvp, VOFFSET(vnop_readdir), ap));
-}
-
-static int
-union_readlink(struct vnop_readlink_args *ap)
-/*
-	struct vnop_readlink_args {
-		struct vnode *a_vp;
-		struct uio *a_uio;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = vp;
-	error = VCALL(vp, VOFFSET(vnop_readlink), ap);
-
-	return (error);
-}
-
-static int
-union_inactive(struct vnop_inactive_args *ap)
-/*
-	struct vnop_inactive_args {
-		struct vnode *a_vp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	struct vnode *vp = ap->a_vp;
-	struct union_node *un = VTOUNION(vp);
-
-	/*
-	 * Do nothing (and _don't_ bypass).
-	 * Wait to vnode_put lowervp until reclaim,
-	 * so that until then our union_node is in the
-	 * cache and reusable.
-	 *
-	 * NEEDSWORK: Someday, consider inactive'ing
-	 * the lowervp and then trying to reactivate it
-	 * with capabilities (v_id)
-	 * like they do in the name lookup cache code.
-	 * That's too much work for now.
-	 */
-
-	union_lock();
-	if (un->un_flags & UN_DELETED) {
-		if(un->un_uppervp != NULLVP) {
-			vnode_rele(un->un_uppervp);
-		}
-		union_removed_upper(un);
-	}
-
-	if (un->un_dircache != 0)  {
-			union_dircache_free(un);
-	}
-	if (un->un_flags & UN_DIRENVN) {
-		vnode_recycle(vp);
-	}
-
-	union_unlock();
-
-	return (0);
-}
-
-static int
-union_reclaim(struct vnop_reclaim_args *ap)
-/*
-	struct vnop_reclaim_args {
-		struct vnode *a_vp;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-
-	union_lock();
-	union_freevp(ap->a_vp);
-	union_unlock();
-
-	return (0);
-}
-
-static int
-union_blockmap(struct vnop_blockmap_args *ap)
-/*
-	struct vnop_blockmap_args {
-		struct vnode *a_vp;
-		off_t a_offset;    
-		size_t a_size;
-		daddr64_t *a_bpn;
-		size_t *a_run;
-		void *a_poff;
-		int a_flags;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = vp;
-	error = VCALL(vp, VOFFSET(vnop_blockmap), ap);
-
-	return (error);
-}
-
-static int
-union_pathconf(struct vnop_pathconf_args *ap)
-/*
-	struct vnop_pathconf_args {
-		struct vnode *a_vp;
-		int a_name;
-		int *a_retval;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = vp;
-	error = VCALL(vp, VOFFSET(vnop_pathconf), ap);
-
-	return (error);
-}
-
-static int
-union_advlock(struct vnop_advlock_args *ap)
-/*
-	struct vnop_advlock_args {
-		struct vnode *a_vp;
-		caddr_t  a_id;
-		int  a_op;
-		struct flock *a_fl;
-		int  a_flags;
-		vfs_context_t a_context;
-	} *ap;
-*/
-{
-	register struct vnode *ovp = OTHERVP(ap->a_vp);
-
-	ap->a_vp = ovp;
-	return (VCALL(ovp, VOFFSET(vnop_advlock), ap));
-}
-
-
-/*
- * XXX - vnop_strategy must be hand coded because it has no
- * vnode in its arguments.
- * This goes away with a merged VM/buffer cache.
- */
-static int
-union_strategy(struct vnop_strategy_args *ap)
-/*
-	struct vnop_strategy_args {
-		struct buf *a_bp;
-	} *ap;
-*/
-{
-	struct buf *bp = ap->a_bp;
-	int error;
-	struct vnode *savedvp;
-
-	savedvp = buf_vnode(bp);
-	buf_setvnode(bp, OTHERVP(savedvp));
-
-#if DIAGNOSTIC
-	if (buf_vnode(bp) == NULLVP)
-		panic("union_strategy: nil vp");
-	if (((buf_flags(bp) & B_READ) == 0) &&
-	    (buf_vnode(bp) == LOWERVP(savedvp)))
-		panic("union_strategy: writing to lowervp");
-#endif
-
-	error = VNOP_STRATEGY(bp);
-	buf_setvnode(bp, savedvp);
-
-	return (error);
-}
-
-/* Pagein */
-static int
-union_pagein(struct vnop_pagein_args *ap)
-/*
-	struct vnop_pagein_args {
-	   	struct vnode 	*a_vp,
-	   	upl_t		a_pl,
-		upl_offset_t	a_pl_offset,
-		off_t		a_f_offset,
-		size_t		a_size,
-		int		a_flags
-		vfs_context_t	a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	error = VNOP_PAGEIN(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-	                    ap->a_size, ap->a_flags, ap->a_context);
-
-	/*
-	 * XXX
-	 * perhaps the size of the underlying object has changed under
-	 * our feet.  take advantage of the offset information present
-	 * in the uio structure.
-	 */
-	if (error == 0) {
-		struct union_node *un = VTOUNION(ap->a_vp);
-		off_t cur = ap->a_f_offset + (off_t)ap->a_pl_offset;
-
-		if (vp == un->un_uppervp) {
-			if (cur > un->un_uppersz) {
-				union_lock();
-				union_newsize(ap->a_vp, cur, VNOVAL);
-				union_unlock();
-			}
-		} else {
-			if (cur > un->un_lowersz) {
-				union_lock();
-				union_newsize(ap->a_vp, VNOVAL, cur);
-				union_unlock();
-			}
-		}
-	}
-
-	return (error);
-}
-
-/* Pageout  */
-static int
-union_pageout(struct vnop_pageout_args *ap)
-/*
-	struct vnop_pageout_args {
-	   	struct vnode 	*a_vp,
-	   	upl_t		a_pl,
-		vm_offset_t	a_pl_offset,
-		off_t		a_f_offset,
-		size_t		a_size,
-		int		a_flags
-		vfs_context_t	a_context;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp;
-	struct union_node *un = VTOUNION(ap->a_vp);
-
-	vp = UPPERVP(ap->a_vp);
-	if (vp == NULLVP)
-		panic("union: missing upper layer in pageout");
-
-	error = VNOP_PAGEOUT(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
-	                     ap->a_size, ap->a_flags, ap->a_context);
-
-	/*
-	 * the size of the underlying object may be changed by the
-	 * write.
-	 */
-	if (error == 0) {
-		off_t cur = ap->a_f_offset + (off_t)ap->a_pl_offset;
-
-		if (cur > un->un_uppersz) {
-			union_lock();
-			union_newsize(ap->a_vp, cur, VNOVAL);
-			union_unlock();
-		}
-	}
-
-	return (error);
-}
-
-/* Blktooff derives file offset for the given logical block number */
-static int
-union_blktooff(struct vnop_blktooff_args *ap)
-/*
-	struct vnop_blktooff_args {
-		struct vnode *a_vp;
-		daddr64_t a_lblkno;
-		off_t *a_offset;    
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	error = VNOP_BLKTOOFF(vp, ap->a_lblkno, ap->a_offset);
-
-	return(error);
-}
-
-/* offtoblk derives file offset for the given logical block number */
-static int
-union_offtoblk(struct vnop_offtoblk_args *ap)
-/*
-	struct vnop_offtoblk_args  {
-		struct vnode *a_vp;
-		off_t a_offset;    
-		daddr64_t *a_lblkno;
-	} *ap;
-*/
-{
-	int error;
-	struct vnode *vp = OTHERVP(ap->a_vp);
-
-	error = VNOP_OFFTOBLK(vp, ap->a_offset, ap->a_lblkno);
-
-	return(error);
-}
-
-#define VOPFUNC int (*)(void *)
-
-/*
- * Global vfs data structures
- */
-int (**union_vnodeop_p)(void *);
-struct vnodeopv_entry_desc union_vnodeop_entries[] = {
-	{ &vnop_default_desc, (VOPFUNC)vn_default_error },
-	{ &vnop_lookup_desc, (VOPFUNC)union_lookup },		/* lookup */
-	{ &vnop_create_desc, (VOPFUNC)union_create },		/* create */
-	{ &vnop_whiteout_desc, (VOPFUNC)union_whiteout },	/* whiteout */
-	{ &vnop_mknod_desc, (VOPFUNC)union_mknod },		/* mknod */
-	{ &vnop_open_desc, (VOPFUNC)union_open },		/* open */
-	{ &vnop_close_desc, (VOPFUNC)union_close },		/* close */
-	{ &vnop_access_desc, (VOPFUNC)union_access },		/* access */
-	{ &vnop_getattr_desc, (VOPFUNC)union_getattr },		/* getattr */
-	{ &vnop_setattr_desc, (VOPFUNC)union_setattr },		/* setattr */
-	{ &vnop_read_desc, (VOPFUNC)union_read },		/* read */
-	{ &vnop_write_desc, (VOPFUNC)union_write },		/* write */
-	{ &vnop_ioctl_desc, (VOPFUNC)union_ioctl },		/* ioctl */
-	{ &vnop_select_desc, (VOPFUNC)union_select },		/* select */
-	{ &vnop_revoke_desc, (VOPFUNC)union_revoke },		/* revoke */
-	{ &vnop_mmap_desc, (VOPFUNC)union_mmap },		/* mmap */
-	{ &vnop_mnomap_desc, (VOPFUNC)union_mnomap },		/* mnomap */
-	{ &vnop_fsync_desc, (VOPFUNC)union_fsync },		/* fsync */
-	{ &vnop_remove_desc, (VOPFUNC)union_remove },		/* remove */
-	{ &vnop_link_desc, (VOPFUNC)union_link },		/* link */
-	{ &vnop_rename_desc, (VOPFUNC)union_rename },		/* rename */
-	{ &vnop_mkdir_desc, (VOPFUNC)union_mkdir },		/* mkdir */
-	{ &vnop_rmdir_desc, (VOPFUNC)union_rmdir },		/* rmdir */
-	{ &vnop_symlink_desc, (VOPFUNC)union_symlink },		/* symlink */
-	{ &vnop_readdir_desc, (VOPFUNC)union_readdir },		/* readdir */
-	{ &vnop_readlink_desc, (VOPFUNC)union_readlink },	/* readlink */
-	{ &vnop_inactive_desc, (VOPFUNC)union_inactive },	/* inactive */
-	{ &vnop_reclaim_desc, (VOPFUNC)union_reclaim },		/* reclaim */
-	{ &vnop_strategy_desc, (VOPFUNC)union_strategy },	/* strategy */
-	{ &vnop_pathconf_desc, (VOPFUNC)union_pathconf },	/* pathconf */
-	{ &vnop_advlock_desc, (VOPFUNC)union_advlock },		/* advlock */
-#ifdef notdef
-	{ &vnop_bwrite_desc, (VOPFUNC)union_bwrite },		/* bwrite */
-#endif
-	{ &vnop_pagein_desc, (VOPFUNC)union_pagein },		/* Pagein */
-	{ &vnop_pageout_desc, (VOPFUNC)union_pageout },		/* Pageout */
-        { &vnop_copyfile_desc, (VOPFUNC)err_copyfile },		/* Copyfile */
-	{ &vnop_blktooff_desc, (VOPFUNC)union_blktooff },	/* blktooff */
-	{ &vnop_offtoblk_desc, (VOPFUNC)union_offtoblk },	/* offtoblk */
-	{ &vnop_blockmap_desc, (VOPFUNC)union_blockmap },	/* blockmap */
-	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
-};
-struct vnodeopv_desc union_vnodeop_opv_desc =
-	{ &union_vnodeop_p, union_vnodeop_entries };
diff --git a/bsd/net/Makefile b/bsd/net/Makefile
index 2f77f154b..79c622bf8 100644
--- a/bsd/net/Makefile
+++ b/bsd/net/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES= \
@@ -32,14 +28,15 @@ KERNELFILES= \
 	if_ether.h init.h radix.h
 
 PRIVATE_DATAFILES = \
-	if_atm.h if_vlan_var.h if_ppp.h firewire.h \
+	if_vlan_var.h if_ppp.h firewire.h \
 	ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \
-	raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \
-	if_bridgevar.h
+	netsrc.h raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \
+	if_bridgevar.h ntstat.h if_llreach.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
 	bpfdesc.h dlil_pvt.h ppp_comp.h \
-	zlib.h bpf_compat.h net_osdep.h
+	zlib.h bpf_compat.h net_osdep.h \
+	ntstat.h if_llreach.h
 
 INSTALL_MI_LIST	= ${DATAFILES}
 
diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c
index d9ec5b137..e370dfc5e 100644
--- a/bsd/net/bpf.c
+++ b/bsd/net/bpf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -119,6 +119,7 @@
 #include <net/dlil.h>
 
 #include <kern/locks.h>
+#include <kern/thread_call.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -147,13 +148,13 @@ static caddr_t bpf_alloc();
  * The default read buffer size is patchable.
  */
 static unsigned int bpf_bufsize = BPF_BUFSIZE;
-SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW, 
+SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&bpf_bufsize, 0, "");
-static unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
-SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW, 
+__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
+SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&bpf_maxbufsize, 0, "");
 static unsigned int bpf_maxdevices = 256;
-SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW, 
+SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&bpf_maxdevices, 0, "");
 
 /*
@@ -196,6 +197,7 @@ static void	bpf_mcopy(const void *, void *, size_t);
 static int	bpf_movein(struct uio *, int,
 		    struct mbuf **, struct sockaddr *, int *);
 static int	bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt);
+static void bpf_timed_out(void *, void *);
 static void bpf_wakeup(struct bpf_d *);
 static void	catchpacket(struct bpf_d *, u_char *, u_int,
 		    u_int, void (*)(const void *, void *, size_t));
@@ -216,26 +218,26 @@ static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
  * Darwin differs from BSD here, the following are static
  * on BSD and not static on Darwin.
  */
-	d_open_t	bpfopen;
-	d_close_t	bpfclose;
-	d_read_t	bpfread;
-	d_write_t	bpfwrite;
-	 ioctl_fcn_t	bpfioctl;
-	select_fcn_t	bpfpoll;
+	d_open_t	    bpfopen;
+	d_close_t	    bpfclose;
+	d_read_t	    bpfread;
+	d_write_t	    bpfwrite;
+    ioctl_fcn_t	    bpfioctl;
+    select_fcn_t	bpfselect;
 
 
 /* Darwin's cdevsw struct differs slightly from BSDs */
 #define CDEV_MAJOR 23
 static struct cdevsw bpf_cdevsw = {
-	/* open */	bpfopen,
-	/* close */	bpfclose,
-	/* read */	bpfread,
-	/* write */	bpfwrite,
-	/* ioctl */	bpfioctl,
+	/* open */	    bpfopen,
+	/* close */	    bpfclose,
+	/* read */	    bpfread,
+	/* write */	    bpfwrite,
+	/* ioctl */	    bpfioctl,
 	/* stop */		eno_stop,
 	/* reset */		eno_reset,
 	/* tty */		NULL,
-	/* select */	bpfpoll,
+	/* select */	bpfselect,
 	/* mmap */		eno_mmap,
 	/* strategy*/	eno_strat,
 	/* getc */		eno_getc,
@@ -314,6 +316,11 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
 		sa_family = AF_IEEE80211;
 		hlen = 0;
 		break;
+	
+	case DLT_IEEE802_11_RADIO:
+		sa_family = AF_IEEE80211;
+		hlen = 0;
+		break;
 
 	default:
 		return (EIO);
@@ -367,6 +374,7 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	*mp = m;
+	
 	/*
 	 * Make room for link header.
 	 */
@@ -383,8 +391,25 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc
 			goto bad;
 	}
 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
-	if (!error)
-		return (0);
+	if (error)
+		goto bad;
+	
+	/* Check for multicast destination */
+	switch (linktype) {
+		case DLT_EN10MB: {
+			struct ether_header *eh = mtod(m, struct ether_header *);
+			
+			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+				if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0)
+					m->m_flags |= M_BCAST;
+				else
+					m->m_flags |= M_MCAST;
+			}
+			break;
+		}
+	}
+	
+	return 0;
  bad:
 	m_freem(m);
 	return (error);
@@ -550,6 +575,59 @@ bpf_detachd(struct bpf_d *d)
 }
 
 
+/*
+ * Start asynchronous timer, if necessary.
+ * Must be called with bpf_mlock held.
+ */
+static void
+bpf_start_timer(struct bpf_d *d)
+{
+	uint64_t deadline;
+	struct timeval tv;
+
+	if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+		tv.tv_sec = d->bd_rtout / hz;
+		tv.tv_usec = (d->bd_rtout % hz) * tick;
+
+		clock_interval_to_deadline((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
+				NSEC_PER_USEC,
+				&deadline);
+		/*
+		 * The state is BPF_IDLE, so the timer hasn't 
+		 * been started yet, and hasn't gone off yet;
+		 * there is no thread call scheduled, so this
+		 * won't change the schedule.
+		 *
+		 * XXX - what if, by the time it gets entered,
+		 * the deadline has already passed?
+		 */
+		thread_call_enter_delayed(d->bd_thread_call, deadline);
+		d->bd_state = BPF_WAITING;
+	}
+}
+
+/*
+ * Cancel asynchronous timer.
+ * Must be called with bpf_mlock held.
+ */
+static boolean_t
+bpf_stop_timer(struct bpf_d *d)
+{
+	/*
+	 * If the timer has already gone off, this does nothing.
+	 * Our caller is expected to set d->bd_state to BPF_IDLE,
+	 * with the bpf_mlock, after we are called. bpf_timed_out()
+	 * also grabs bpf_mlock, so, if the timer has gone off and 
+	 * bpf_timed_out() hasn't finished, it's waiting for the
+	 * lock; when this thread releases the lock, it will 
+	 * find the state is BPF_IDLE, and just release the 
+	 * lock and return.
+	 */
+	return (thread_call_cancel(d->bd_thread_call));
+}
+
+
+
 /*
  * Open ethernet device.  Returns ENXIO for illegal minor device number,
  * EBUSY if file is open by another process.
@@ -612,6 +690,16 @@ bpfopen(dev_t dev, int flags, __unused int fmt,
 	d->bd_sig = SIGIO;
 	d->bd_seesent = 1;
 	d->bd_oflags = flags;
+	d->bd_state = BPF_IDLE;
+    d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
+
+	if (d->bd_thread_call == NULL) {
+		printf("bpfopen: malloc thread call failed\n");
+		bpf_dtab[minor(dev)] = NULL;
+		lck_mtx_unlock(bpf_mlock);
+		_FREE(d, M_DEVBUF);
+		return ENOMEM;
+	}
 #if CONFIG_MACF_NET
 	mac_bpfdesc_label_init(d);
 	mac_bpfdesc_label_associate(kauth_cred_get(), d);
@@ -643,12 +731,67 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 	}	
 	bpf_dtab[minor(dev)] = (void *)1;		/* Mark closing */
 
+	/*
+	 * Deal with any in-progress timeouts.
+	 */
+	switch (d->bd_state) {
+		case BPF_IDLE:
+			/*
+			 * Not waiting for a timeout, and no timeout happened.
+			 */
+			break;
+
+		case BPF_WAITING:
+			/*
+			 * Waiting for a timeout.
+			 * Cancel any timer that has yet to go off,
+			 * and mark the state as "closing".
+			 * Then drop the lock to allow any timers that
+			 * *have* gone off to run to completion, and wait
+			 * for them to finish.
+			 */
+			if (!bpf_stop_timer(d)) {
+				/*
+				 * There was no pending call, so the call must 
+				 * have been in progress. Wait for the call to
+				 * complete; we have to drop the lock while 
+				 * waiting. to let the in-progrss call complete
+				 */
+				d->bd_state = BPF_DRAINING;
+				while (d->bd_state == BPF_DRAINING)
+					msleep((caddr_t)d, bpf_mlock, PRINET,
+							"bpfdraining", NULL);
+			}
+			d->bd_state = BPF_IDLE;
+			break;
+
+		case BPF_TIMED_OUT:
+			/*
+			 * Timer went off, and the timeout routine finished.
+			 */
+			d->bd_state = BPF_IDLE;
+			break;
+
+		case BPF_DRAINING:
+			/*
+			 * Another thread is blocked on a close waiting for
+			 * a timeout to finish.
+			 * This "shouldn't happen", as the first thread to enter
+			 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and 
+			 * all subsequent threads should see that and fail with 
+			 * ENXIO.
+			 */
+			panic("Two threads blocked in a BPF close");
+			break;
+	}
+
 	if (d->bd_bif)
 		bpf_detachd(d);
 	selthreadclear(&d->bd_sel);
 #if CONFIG_MACF_NET
 	mac_bpfdesc_label_destroy(d);
 #endif
+	thread_call_free(d->bd_thread_call);
 	bpf_freed(d);
 
 	/* Mark free in same context as bpfopen comes to check */
@@ -666,15 +809,12 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt,
 static int
 bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
 {
-	int st;
+	u_int64_t abstime = 0;
 
-	lck_mtx_unlock(bpf_mlock);
-	
-	st = tsleep((caddr_t)d, pri, wmesg, timo);
-	
-	lck_mtx_lock(bpf_mlock);
+	if(timo)
+		clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
 	
-	return st;
+	return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime);
 }
 
 /*
@@ -695,6 +835,7 @@ int
 bpfread(dev_t dev, struct uio *uio, int ioflag)
 {
 	struct bpf_d *d;
+	int timed_out;
 	int error;
 
 	lck_mtx_lock(bpf_mlock);
@@ -705,7 +846,6 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
 		return (ENXIO);
 	}
 
-
 	/*
 	 * Restrict application to use a buffer the same size as
 	 * as kernel buffers.
@@ -714,6 +854,12 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
 		lck_mtx_unlock(bpf_mlock);
 		return (EINVAL);
 	}
+	
+ 	if (d->bd_state == BPF_WAITING)
+		bpf_stop_timer(d);
+	
+	timed_out = (d->bd_state == BPF_TIMED_OUT);
+	d->bd_state = BPF_IDLE;
 
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
@@ -721,9 +867,14 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
-		if (d->bd_immediate && d->bd_slen != 0) {
+		if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) 
+			&& d->bd_slen != 0) {
 			/*
-			 * A packet(s) either arrived since the previous
+			 * We're in immediate mode, or are reading
+			 * in non-blocking mode, or a timer was
+			 * started before the read (e.g., by select()
+			 * or poll()) and has expired and a packet(s)
+			 * either arrived since the previous
 			 * read or arrived while we were asleep.
 			 * Rotate the buffers and return what's here.
 			 */
@@ -806,6 +957,10 @@ bpfread(dev_t dev, struct uio *uio, int ioflag)
 static void
 bpf_wakeup(struct bpf_d *d)
 {
+	if (d->bd_state == BPF_WAITING) {
+		bpf_stop_timer(d);
+		d->bd_state = BPF_IDLE;
+	}
 	wakeup((caddr_t)d);
 	if (d->bd_async && d->bd_sig && d->bd_sigio)
 		pgsigio(d->bd_sigio, d->bd_sig);
@@ -826,6 +981,36 @@ bpf_wakeup(struct bpf_d *d)
 #endif
 }
 
+
+static void
+bpf_timed_out(void *arg, __unused void *dummy)
+{
+	struct bpf_d *d = (struct bpf_d *)arg;
+
+	lck_mtx_lock(bpf_mlock);
+	if (d->bd_state == BPF_WAITING) {
+		/*
+		 * There's a select or kqueue waiting for this; if there's 
+		 * now stuff to read, wake it up.
+		 */
+		d->bd_state = BPF_TIMED_OUT;
+		if (d->bd_slen != 0)
+			bpf_wakeup(d);
+	} else if (d->bd_state == BPF_DRAINING) {
+		/*
+		 * A close is waiting for this to finish.
+		 * Mark it as finished, and wake the close up.
+		 */
+		d->bd_state = BPF_IDLE;
+		bpf_wakeup(d);
+	}
+	lck_mtx_unlock(bpf_mlock);
+}
+	
+
+
+
+
 /* keep in sync with bpf_movein above: */
 #define MAX_DATALINK_HDR_LEN	(sizeof(struct firewire_header))
 
@@ -838,6 +1023,8 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
 	int error;
 	char 		  dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
 	int datlen = 0;
+    int bif_dlt;
+    int bd_hdrcmplt;
 
 	lck_mtx_lock(bpf_mlock);
 
@@ -853,31 +1040,56 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
 
 	ifp = d->bd_bif->bif_ifp;
 
+	if ((ifp->if_flags & IFF_UP) == 0) {
+		lck_mtx_unlock(bpf_mlock);
+		return (ENETDOWN);
+	}
 	if (uio_resid(uio) == 0) {
 		lck_mtx_unlock(bpf_mlock);
 		return (0);
 	}
 	((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
-	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, 
-			   d->bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
-			   &datlen);
-	if (error) {
-		lck_mtx_unlock(bpf_mlock);
+
+   /*
+    * fix for PR-6849527
+    * geting variables onto stack before dropping lock for bpf_movein()
+    */
+    bif_dlt = (int)d->bd_bif->bif_dlt;
+    bd_hdrcmplt  = d->bd_hdrcmplt;
+ 
+	/* bpf_movein allocating mbufs; drop lock */
+    lck_mtx_unlock(bpf_mlock);
+
+	error = bpf_movein(uio, bif_dlt, &m, 
+    bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
+    &datlen);
+	
+    if (error) {
 		return (error);
 	}
 
-	if ((unsigned)datlen > ifp->if_mtu) {
+	/* taking the lock again and verifying whether device is open */
+    lck_mtx_lock(bpf_mlock);
+	d = bpf_dtab[minor(dev)];
+	if (d == 0 || d == (void *)1) {
 		lck_mtx_unlock(bpf_mlock);
 		m_freem(m);
-		return (EMSGSIZE);
+		return (ENXIO);
 	}
-	
-	if ((error = ifp_use(ifp, kIfNetUseCount_MustNotBeZero)) != 0) {
+
+	if (d->bd_bif == NULL) {
+		lck_mtx_unlock(bpf_mlock);
+		m_free(m);
+		return (ENXIO);
+	}
+
+	if ((unsigned)datlen > ifp->if_mtu) {
 		lck_mtx_unlock(bpf_mlock);
 		m_freem(m);
-		return (error);
+		return (EMSGSIZE);
 	}
 
+
 #if CONFIG_MACF_NET
 	mac_mbuf_label_associate_bpfdesc(d, m);
 #endif
@@ -892,10 +1104,7 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
 	else {
 		error = dlil_output(ifp, PF_INET, m, NULL, (struct sockaddr *)dst_buf, 0);
 	}
-	
-	if (ifp_unuse(ifp) != 0)
-		ifp_use_reached_zero(ifp);
-	
+
 	/*
 	 * The driver frees the mbuf.
 	 */
@@ -956,6 +1165,10 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
 		return (ENXIO);
 	}
 
+	if (d->bd_state == BPF_WAITING)
+		bpf_stop_timer(d);
+	d->bd_state = BPF_IDLE;
+
 	switch (cmd) {
 
 	default:
@@ -1124,34 +1337,60 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
 	/*
 	 * Set read timeout.
 	 */
-	case BIOCSRTIMEOUT:
-		{
-			struct BPF_TIMEVAL *_tv = (struct BPF_TIMEVAL *)addr;
+        case BIOCSRTIMEOUT32:
+                {
+			struct user32_timeval *_tv = (struct user32_timeval *)addr;
 			struct timeval tv;
 
 			tv.tv_sec  = _tv->tv_sec;
 			tv.tv_usec = _tv->tv_usec;
 
-			/*
+                        /*
 			 * Subtract 1 tick from tvtohz() since this isn't
 			 * a one-shot timer.
 			 */
 			if ((error = itimerfix(&tv)) == 0)
 				d->bd_rtout = tvtohz(&tv) - 1;
 			break;
-		}
+                }
 
-	/*
+        case BIOCSRTIMEOUT64:
+                {
+			struct user64_timeval *_tv = (struct user64_timeval *)addr;
+			struct timeval tv;
+                        
+			tv.tv_sec  = _tv->tv_sec;
+			tv.tv_usec = _tv->tv_usec;
+                        
+			/*
+			 * Subtract 1 tick from tvtohz() since this isn't
+			 * a one-shot timer.
+			 */
+			if ((error = itimerfix(&tv)) == 0)
+				d->bd_rtout = tvtohz(&tv) - 1;
+			break;
+                }
+	
+        /*
 	 * Get read timeout.
 	 */
-	case BIOCGRTIMEOUT:
+	case BIOCGRTIMEOUT32:
 		{
-			struct BPF_TIMEVAL *tv = (struct BPF_TIMEVAL *)addr;
+			struct user32_timeval *tv = (struct user32_timeval *)addr;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
 			break;
-		}
+                }
+
+	case BIOCGRTIMEOUT64:
+		{
+			struct user64_timeval *tv = (struct user64_timeval *)addr;
+
+			tv->tv_sec = d->bd_rtout / hz;
+			tv->tv_usec = (d->bd_rtout % hz) * tick;
+			break;
+                }
 
 	/*
 	 * Get packet stats.
@@ -1320,14 +1559,10 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt)
 			continue;
 		/*
 		 * We found the requested interface.
-		 * If it's not up, return an error.
 		 * Allocate the packet buffers if we need to.
 		 * If we're already attached to requested interface,
 		 * just flush the buffer.
 		 */
-		if ((ifp->if_flags & IFF_UP) == 0)
-			return (ENETDOWN);
-
 		if (d->bd_sbuf == 0) {
 			error = bpf_allocbufs(d);
 			if (error != 0)
@@ -1441,10 +1676,10 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt)
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
 int
-bpfpoll(dev_t dev, int events, void * wql, struct proc *p)
+bpfselect(dev_t dev, int which, void * wql, struct proc *p)
 {
 	struct bpf_d *d;
-	int revents = 0;
+	int ret = 0;
 
 	lck_mtx_lock(bpf_mlock);
 
@@ -1454,25 +1689,38 @@ bpfpoll(dev_t dev, int events, void * wql, struct proc *p)
 		return (ENXIO);
 	}
 
-	/*
-	 * An imitation of the FIONREAD ioctl code.
-	 */
 	if (d->bd_bif == NULL) {
 		lck_mtx_unlock(bpf_mlock);
 		return (ENXIO);
 	}
 
-	if (events & (POLLIN | POLLRDNORM)) {
-		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
-			revents |= events & (POLLIN | POLLRDNORM);
-		else
-			selrecord(p, &d->bd_sel, wql);
+	switch (which) {
+		case FREAD:
+			if (d->bd_hlen != 0 ||
+					((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+					 d->bd_slen != 0))
+				ret = 1; /* read has data to return */
+			else {
+				/*
+				 * Read has no data to return.
+				 * Make the select wait, and start a timer if
+				 * necessary.
+				 */
+				selrecord(p, &d->bd_sel, wql);
+				bpf_start_timer(d);
+			}
+			break;
+
+		case FWRITE:
+			ret = 1; /* can't determine whether a write would block */
+			break;
 	}
 
 	lck_mtx_unlock(bpf_mlock);
-	return (revents);
+	return (ret);
 }
 
+
 /*
  * Support for kevent() system call.  Register EVFILT_READ filters and
  * reject all others.
@@ -1511,9 +1759,6 @@ bpfkqfilter(dev_t dev, struct knote *kn)
 		return (ENXIO);
 	}
 
-	/*
-	 * An imitation of the FIONREAD ioctl code.
-	 */
 	if (d->bd_bif == NULL) {
 		lck_mtx_unlock(bpf_mlock);
 		return (ENXIO);
@@ -1546,13 +1791,52 @@ filt_bpfread(struct knote *kn, long hint)
 		lck_mtx_lock(bpf_mlock);
 
 	if (d->bd_immediate) {
+		/*
+		 * If there's data in the hold buffer, it's the 
+		 * amount of data a read will return.
+		 *
+		 * If there's no data in the hold buffer, but
+		 * there's data in the store buffer, a read will
+		 * immediately rotate the store buffer to the 
+		 * hold buffer, the amount of data in the store
+		 * buffer is the amount of data a read will 
+		 * return.
+		 *
+		 * If there's no data in either buffer, we're not 
+		 * ready to read.
+		 */
 		kn->kn_data = (d->bd_hlen == 0 ? d->bd_slen : d->bd_hlen);
-		ready = (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? 
-					kn->kn_sdata : 1));
+		int64_t lowwat = 1;
+		if (kn->kn_sfflags & NOTE_LOWAT)
+		{
+			if (kn->kn_sdata > d->bd_bufsize)
+				lowwat = d->bd_bufsize;
+			else if (kn->kn_sdata > lowwat)
+				lowwat = kn->kn_sdata;
+		}
+		ready = (kn->kn_data >= lowwat);
 	} else {
-		kn->kn_data = d->bd_hlen;
+		/*
+		 * If there's data in the hold buffer, it's the 
+		 * amount of data a read will return.
+		 *
+		 * If there's no data in the hold buffer, but 
+		 * there's data in the store buffer, if the 
+		 * timer has expired a read will immediately
+		 * rotate the store buffer to the hold buffer,
+		 * so the amount of data in the store buffer is 
+		 * the amount of data a read will return.
+		 *
+		 * If there's no data in either buffer, or there's 
+		 * no data in the hold buffer and the timer hasn't 
+		 * expired, we're not ready to read.
+		 */
+		kn->kn_data = (d->bd_hlen == 0 && d->bd_state == BPF_TIMED_OUT ? 
+				d->bd_slen : d->bd_hlen);
 		ready = (kn->kn_data > 0);
 	}
+	if (!ready)
+		bpf_start_timer(d);
 
 	if (hint == 0)
 		lck_mtx_unlock(bpf_mlock);
@@ -1721,6 +2005,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 	struct bpf_hdr *hp;
 	int totlen, curlen;
 	int hdrlen = d->bd_bif->bif_hdrlen;
+	int do_wakeup = 0;
 	/*
 	 * Figure out how many bytes to move.  If the packet is
 	 * greater or equal to the snapshot length, transfer that
@@ -1741,7 +2026,7 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 		 * Rotate the buffers if we can, then wakeup any
 		 * pending reads.
 		 */
-		if (d->bd_fbuf == 0) {
+		if (d->bd_fbuf == NULL) {
 			/*
 			 * We haven't completed the previous read yet,
 			 * so drop the packet.
@@ -1750,15 +2035,16 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 			return;
 		}
 		ROTATE_BUFFERS(d);
-		bpf_wakeup(d);
+		do_wakeup = 1;
 		curlen = 0;
 	}
-	else if (d->bd_immediate)
+	else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
 		/*
-		 * Immediate mode is set.  A packet arrived so any
-		 * reads should be woken up.
+		 * Immediate mode is set, or the read timeout has 
+		 * already expired during a select call. A packet 
+		 * arrived, so the reader should be woken up.
 		 */
-		bpf_wakeup(d);
+		do_wakeup = 1;
 
 	/*
 	 * Append the bpf header.
@@ -1775,6 +2061,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 	 */
 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
 	d->bd_slen = curlen + totlen;
+
+	if (do_wakeup)
+		bpf_wakeup(d);
 }
 
 /*
diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h
index a07cfe28e..92a5f31a0 100644
--- a/bsd/net/bpf.h
+++ b/bsd/net/bpf.h
@@ -172,8 +172,16 @@ struct bpf_version {
 #define	BIOCGDLT	_IOR('B',106, u_int)
 #define BIOCGETIF	_IOR('B',107, struct ifreq)
 #define BIOCSETIF	_IOW('B',108, struct ifreq)
-#define BIOCSRTIMEOUT	_IOW('B',109, struct BPF_TIMEVAL)
-#define BIOCGRTIMEOUT	_IOR('B',110, struct BPF_TIMEVAL)
+#define BIOCSRTIMEOUT	_IOW('B',109, struct timeval)
+#ifdef KERNEL_PRIVATE
+#define BIOCSRTIMEOUT64	_IOW('B',109, struct user64_timeval)
+#define BIOCSRTIMEOUT32	_IOW('B',109, struct user32_timeval)
+#endif /* KERNEL_PRIVATE */
+#define BIOCGRTIMEOUT	_IOR('B',110, struct timeval)
+#ifdef KERNEL_PRIVATE
+#define BIOCGRTIMEOUT64	_IOR('B',110, struct user64_timeval)
+#define BIOCGRTIMEOUT32	_IOR('B',110, struct user32_timeval)
+#endif /* KERNEL_PRIVATE */
 #define BIOCGSTATS	_IOR('B',111, struct bpf_stat)
 #define BIOCIMMEDIATE	_IOW('B',112, u_int)
 #define BIOCVERSION	_IOR('B',113, struct bpf_version)
diff --git a/bsd/net/bpf_filter.c b/bsd/net/bpf_filter.c
index 31ce77023..69d35371f 100644
--- a/bsd/net/bpf_filter.c
+++ b/bsd/net/bpf_filter.c
@@ -110,6 +110,8 @@
 	} \
 }
 
+extern unsigned int bpf_maxbufsize;
+
 static u_int16_t	m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err);
 static u_int32_t	m_xword(struct mbuf *m, bpf_u_int32 k, int *err);
 
@@ -528,9 +530,10 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 /*
  * Return true if the 'fcode' is a valid filter program.
  * The constraints are that each jump be forward and to a valid
- * code.  The code must terminate with either an accept or reject.
- * 'valid' is an array for use by the routine (it must be at least
- * 'len' bytes long).
+ * code, that memory accesses are within valid ranges (to the 
+ * extent that this can be checked statically; loads of packet data
+ * have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
  *
  * The kernel needs to be able to verify an application's filter code.
  * Otherwise, a bogus program could easily crash the system.
@@ -538,40 +541,112 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
 int
 bpf_validate(const struct bpf_insn *f, int len)
 {
-	register int i;
+	u_int i, from;
 	const struct bpf_insn *p;
 
-	for (i = 0; i < len; ++i) {
-		/*
-		 * Check that that jumps are forward, and within
-		 * the code block.
-		 */
+	if (len < 1 || len > BPF_MAXINSNS)
+		return 0;
+	
+	for (i = 0; i < ((u_int)len); ++i) {
 		p = &f[i];
-		if (BPF_CLASS(p->code) == BPF_JMP) {
-			register int from = i + 1;
-
-			if (BPF_OP(p->code) == BPF_JA) {
-				if (from >= len || p->k >= (bpf_u_int32)(len - from))
+		switch (BPF_CLASS(p->code)) {
+			/*
+			 * Check that memory operations use valid addresses
+			 */
+			case BPF_LD:
+			case BPF_LDX:
+				switch (BPF_MODE(p->code)) {
+					case BPF_IMM:
+						break;
+					case BPF_ABS:
+					case BPF_IND:
+					case BPF_MSH:
+						/*
+						 * More strict check with actual packet length
+						 * is done runtime.
+						 */
+						if (p->k >= bpf_maxbufsize)
+							return 0;
+						break;
+					case BPF_MEM:
+						if (p->k >= BPF_MEMWORDS)
+							return 0;
+						break;
+					case BPF_LEN:
+						break;
+					default:
+						return 0;
+				}
+				break;
+			case BPF_ST:
+			case BPF_STX:
+				if (p->k >= BPF_MEMWORDS)
 					return 0;
-			}
-			else if (from >= len || p->jt >= len - from ||
-				 p->jf >= len - from)
+				break;
+			case BPF_ALU:
+				switch (BPF_OP(p->code)) {
+					case BPF_ADD:
+					case BPF_SUB:
+					case BPF_MUL:
+					case BPF_OR:
+					case BPF_AND:
+					case BPF_LSH:
+					case BPF_RSH:
+					case BPF_NEG:
+						break;
+					case BPF_DIV:
+						/* 
+						 * Check for constant division by 0
+						 */
+						if(BPF_SRC(p->code) == BPF_K && p->k == 0)
+							return 0;
+						break;
+					default:
+						return 0;
+				}
+				break;
+			case BPF_JMP:
+				/*
+				 * Check that jumps are within the code block,
+				 * and that unconditional branches don't go 
+				 * backwards as a result of an overflow.
+				 * Unconditional branches have a 32-bit offset,
+				 * so they could overflow; we check to make 
+				 * sure they don't. Conditional branches have 
+				 * an 8-bit offset, and the from address is 
+				 * less than equal to BPF_MAXINSNS, and we assume that
+				 * BPF_MAXINSNS is sufficiently small that adding 255 
+				 * to it won't overlflow
+				 *
+				 * We know that len is <= BPF_MAXINSNS, and we 
+				 * assume that BPF_MAXINSNS is less than the maximum 
+				 * size of a u_int, so that i+1 doesn't overflow
+				 */
+				from = i+1;
+				switch (BPF_OP(p->code)) {
+					case BPF_JA:
+						if (from + p->k < from || from + p->k >= ((u_int)len))
+							return 0;
+						break;
+					case BPF_JEQ:
+					case BPF_JGT:
+					case BPF_JGE:
+					case BPF_JSET:
+						if (from + p->jt >= ((u_int)len) || from + p->jf >= ((u_int)len))
+							return 0;
+						break;
+					default:
+						return 0;
+				}
+				break;
+			case BPF_RET:
+				break;
+			case BPF_MISC:
+				break;
+			default:
 				return 0;
 		}
-		/*
-		 * Check that memory operations use valid addresses.
-		 */
-		if ((BPF_CLASS(p->code) == BPF_ST ||
-		     (BPF_CLASS(p->code) == BPF_LD &&
-		      (p->code & 0xe0) == BPF_MEM)) &&
-		    p->k >= BPF_MEMWORDS)
-			return 0;
-		/*
-		 * Check for constant division by 0.
-		 */
-		if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
-			return 0;
 	}
-	return BPF_CLASS(f[len - 1].code) == BPF_RET;
+		return BPF_CLASS(f[len - 1].code) == BPF_RET;
 }
 #endif
diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h
index 2a5cd1aaf..e0507f935 100644
--- a/bsd/net/bpfdesc.h
+++ b/bsd/net/bpfdesc.h
@@ -76,6 +76,7 @@
  */
 
 #include <sys/select.h>
+#include <kern/thread_call.h>
 
 /*
  * Descriptor associated with each open bpf file.
@@ -99,7 +100,7 @@ struct bpf_d {
 
 	int		bd_bufsize;	/* absolute length of buffers */
 
-	struct bpf_if *	bd_bif;		/* interface descriptor */
+	struct bpf_if  *bd_bif;		/* interface descriptor */
 	u_int32_t		bd_rtout;	/* Read timeout in 'ticks' */
 	struct bpf_insn *bd_filter; 	/* filter code */
 	u_int32_t		bd_rcount;	/* number of packets received */
@@ -127,11 +128,24 @@ struct bpf_d {
 	int		bd_hdrcmplt;	/* false to fill in src lladdr automatically */
 	int		bd_seesent;	/* true if bpf should see sent packets */
 	int		bd_oflags;	/* device open flags */
+	thread_call_t bd_thread_call; /* for BPF timeouts with select */
 #if CONFIG_MACF_NET
 	struct label *	bd_label;	/* MAC label for descriptor */
 #endif
 };
 
+/* Values for bd_state */
+#define BPF_IDLE		0    /* no select in progress or kqueue pending */
+#define BPF_WAITING		1    /* waiting for read timeout in select/kqueue */
+#define BPF_TIMED_OUT 	2	 /* read timeout has expired in select/kqueue */
+#define BPF_DRAINING	3	 /* waiting for timeout routine to finish during close */
+
+/* Test whether a BPF is ready for read(). */
+#define bpf_ready(bd)	((bd)->bd_hlen != 0 ||		\
+			 (((bd)->bd_immediate || (bd)->bd_state == BPF_TIMED_OUT) && \
+			  (bd)->bd_slen != 0))
+
+
 /*
  * Descriptor associated with each attached hardware interface.
  */
diff --git a/bsd/net/bridgestp.c b/bsd/net/bridgestp.c
new file mode 100644
index 000000000..1d6922f28
--- /dev/null
+++ b/bsd/net/bridgestp.c
@@ -0,0 +1,2425 @@
+/*	$NetBSD: bridgestp.c,v 1.5 2003/11/28 08:56:48 keihan Exp $	*/
+
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
+ * Copyright (c) 2006 Andrew Thompson (thompsa@FreeBSD.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: bridgestp.c,v 1.5 2001/03/22 03:48:29 jason Exp
+ */
+
+/*
+ * Implementation of the spanning tree protocol as defined in
+ * ISO/IEC 802.1D-2004, June 9, 2004.
+ */
+
+#include <sys/cdefs.h>
+//__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/kernel.h>
+//#include <sys/callout.h>
+//#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+//#include <sys/mutex.h>
+//#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_llc.h>
+#include <net/if_media.h>
+
+#include <net/kpi_interface.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/if_ether.h>
+#include <net/bridgestp.h>
+
+#include <kern/thread.h>
+
+static lck_mtx_t		*bstp_task_mtx = NULL;
+static lck_grp_t 		*bstp_task_grp = NULL;
+static lck_attr_t 		*bstp_task_attr = NULL;
+static thread_t			bstp_task_thread;
+static TAILQ_HEAD(bstp_task_queue, bstp_task) 
+						bstp_task_queue = TAILQ_HEAD_INITIALIZER(bstp_task_queue);
+static struct bstp_task *bstp_task_queue_running = NULL;
+
+static void bstp_create_task_thread(void);
+static void bstp_task_thread_func(void);
+
+static void bstp_task_enqueue(struct bstp_task *);
+static void bstp_task_drain(struct bstp_task *);
+
+#define BSTP_TASK_INIT(bt, func, context) do { \
+	(bt)->bt_count = 0; \
+	(bt)->bt_func = func; \
+	(bt)->bt_context = context; \
+} while(0)
+
+
+
+#define	BSTP_LOCK_INIT(_bs)		(_bs)->bs_mtx = lck_mtx_alloc_init(bstp_lock_grp, bstp_lock_attr)
+#define	BSTP_LOCK_DESTROY(_bs)	lck_mtx_free((_bs)->bs_mtx, bstp_lock_grp)
+#define	BSTP_LOCK(_bs)			lck_mtx_lock((_bs)->bs_mtx)
+#define	BSTP_UNLOCK(_bs)		lck_mtx_unlock((_bs)->bs_mtx)
+#define	BSTP_LOCK_ASSERT(_bs)	lck_mtx_assert((_bs)->bs_mtx, LCK_MTX_ASSERT_OWNED)
+
+
+#ifdef	BRIDGESTP_DEBUG
+#define	DPRINTF(fmt, arg...)	printf("bstp: " fmt, ##arg)
+#else
+#define	DPRINTF(fmt, arg...)
+#endif
+
+#define	PV2ADDR(pv, eaddr)	do {		\
+	eaddr[0] = pv >> 40;			\
+	eaddr[1] = pv >> 32;			\
+	eaddr[2] = pv >> 24;			\
+	eaddr[3] = pv >> 16;			\
+	eaddr[4] = pv >> 8;			\
+	eaddr[5] = pv >> 0;			\
+} while (0)
+
+#define	INFO_BETTER	1
+#define	INFO_SAME	0
+#define	INFO_WORSE	-1
+
+const uint8_t bstp_etheraddr[] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+
+LIST_HEAD(, bstp_state) bstp_list;
+static lck_mtx_t		*bstp_list_mtx;
+static lck_grp_t 		*bstp_lock_grp = NULL;
+static lck_attr_t 		*bstp_lock_attr = NULL;
+
+static void	bstp_transmit(struct bstp_state *, struct bstp_port *);
+static void	bstp_transmit_bpdu(struct bstp_state *, struct bstp_port *);
+static void	bstp_transmit_tcn(struct bstp_state *, struct bstp_port *);
+static void	bstp_decode_bpdu(struct bstp_port *, struct bstp_cbpdu *,
+		    struct bstp_config_unit *);
+static void	bstp_send_bpdu(struct bstp_state *, struct bstp_port *,
+		    struct bstp_cbpdu *);
+static void	bstp_enqueue(struct ifnet *, struct mbuf *);
+static int	bstp_pdu_flags(struct bstp_port *);
+static void	bstp_received_stp(struct bstp_state *, struct bstp_port *,
+		    struct mbuf **, struct bstp_tbpdu *);
+static void	bstp_received_rstp(struct bstp_state *, struct bstp_port *,
+		    struct mbuf **, struct bstp_tbpdu *);
+static void	bstp_received_tcn(struct bstp_state *, struct bstp_port *,
+		    struct bstp_tcn_unit *);
+static void	bstp_received_bpdu(struct bstp_state *, struct bstp_port *,
+		    struct bstp_config_unit *);
+static int	bstp_pdu_rcvtype(struct bstp_port *, struct bstp_config_unit *);
+static int	bstp_pdu_bettersame(struct bstp_port *, int);
+static int	bstp_info_cmp(struct bstp_pri_vector *,
+		    struct bstp_pri_vector *);
+static int	bstp_info_superior(struct bstp_pri_vector *,
+		    struct bstp_pri_vector *);
+static void	bstp_assign_roles(struct bstp_state *);
+static void	bstp_update_roles(struct bstp_state *, struct bstp_port *);
+static void	bstp_update_state(struct bstp_state *, struct bstp_port *);
+static void	bstp_update_tc(struct bstp_port *);
+static void	bstp_update_info(struct bstp_port *);
+static void	bstp_set_other_tcprop(struct bstp_port *);
+static void	bstp_set_all_reroot(struct bstp_state *);
+static void	bstp_set_all_sync(struct bstp_state *);
+static void	bstp_set_port_state(struct bstp_port *, int);
+static void	bstp_set_port_role(struct bstp_port *, int);
+static void	bstp_set_port_proto(struct bstp_port *, int);
+static void	bstp_set_port_tc(struct bstp_port *, int);
+static void	bstp_set_timer_tc(struct bstp_port *);
+static void	bstp_set_timer_msgage(struct bstp_port *);
+static int	bstp_rerooted(struct bstp_state *, struct bstp_port *);
+static uint32_t	bstp_calc_path_cost(struct bstp_port *);
+static void	bstp_notify_state(void *, int);
+static void	bstp_notify_rtage(void *, int);
+static void	bstp_ifupdstatus(struct bstp_state *, struct bstp_port *);
+static void	bstp_enable_port(struct bstp_state *, struct bstp_port *);
+static void	bstp_disable_port(struct bstp_state *, struct bstp_port *);
+static void	bstp_tick(void *);
+static void	bstp_timer_start(struct bstp_timer *, uint16_t);
+static void	bstp_timer_stop(struct bstp_timer *);
+static void	bstp_timer_latch(struct bstp_timer *);
+static int	bstp_timer_expired(struct bstp_timer *);
+static void	bstp_hello_timer_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_message_age_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_migrate_delay_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static void	bstp_edge_delay_expiry(struct bstp_state *,
+		    struct bstp_port *);
+static int	bstp_addr_cmp(const uint8_t *, const uint8_t *);
+static int	bstp_same_bridgeid(uint64_t, uint64_t);
+static void	bstp_reinit(struct bstp_state *);
+
+static void
+bstp_transmit(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if (bs->bs_running == 0)
+		return;
+
+	/*
+	 * a PDU can only be sent if we have tx quota left and the
+	 * hello timer is running.
+	 */
+	if (bp->bp_hello_timer.active == 0) {
+		/* Test if it needs to be reset */
+		bstp_hello_timer_expiry(bs, bp);
+		return;
+	}
+	if (bp->bp_txcount > bs->bs_txholdcount)
+		/* Ran out of karma */
+		return;
+
+	if (bp->bp_protover == BSTP_PROTO_RSTP) {
+		bstp_transmit_bpdu(bs, bp);
+		bp->bp_tc_ack = 0;
+	} else { /* STP */
+		switch (bp->bp_role) {
+			case BSTP_ROLE_DESIGNATED:
+				bstp_transmit_bpdu(bs, bp);
+				bp->bp_tc_ack = 0;
+				break;
+
+			case BSTP_ROLE_ROOT:
+				bstp_transmit_tcn(bs, bp);
+				break;
+		}
+	}
+	bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+	bp->bp_flags &= ~BSTP_PORT_NEWINFO;
+}
+
+static void
+bstp_transmit_bpdu(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct bstp_cbpdu bpdu;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	bpdu.cbu_rootpri = htons(bp->bp_desg_pv.pv_root_id >> 48);
+	PV2ADDR(bp->bp_desg_pv.pv_root_id, bpdu.cbu_rootaddr);
+
+	bpdu.cbu_rootpathcost = htonl(bp->bp_desg_pv.pv_cost);
+
+	bpdu.cbu_bridgepri = htons(bp->bp_desg_pv.pv_dbridge_id >> 48);
+	PV2ADDR(bp->bp_desg_pv.pv_dbridge_id, bpdu.cbu_bridgeaddr);
+
+	bpdu.cbu_portid = htons(bp->bp_port_id);
+	bpdu.cbu_messageage = htons(bp->bp_desg_msg_age);
+	bpdu.cbu_maxage = htons(bp->bp_desg_max_age);
+	bpdu.cbu_hellotime = htons(bp->bp_desg_htime);
+	bpdu.cbu_forwarddelay = htons(bp->bp_desg_fdelay);
+
+	bpdu.cbu_flags = bstp_pdu_flags(bp);
+
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			bpdu.cbu_bpdutype = BSTP_MSGTYPE_CFG;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			bpdu.cbu_bpdutype = BSTP_MSGTYPE_RSTP;
+			break;
+	}
+
+	bstp_send_bpdu(bs, bp, &bpdu);
+}
+
+static void
+bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct bstp_tbpdu bpdu;
+	struct ifnet *ifp = bp->bp_ifp;
+	struct ether_header *eh;
+	struct mbuf *m;
+	int touched = bs ? 1 : 0;
+	
+	touched++;
+
+	KASSERT(bp == bs->bs_root_port, ("%s: bad root port\n", __func__));
+
+	if ((ifp->if_flags & IFF_RUNNING) == 0)
+		return;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return;
+
+	m->m_pkthdr.rcvif = ifp;
+	m->m_pkthdr.len = sizeof(*eh) + sizeof(bpdu);
+	m->m_len = m->m_pkthdr.len;
+
+	eh = mtod(m, struct ether_header *);
+
+	memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+	memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+	eh->ether_type = htons(sizeof(bpdu));
+
+	bpdu.tbu_ssap = bpdu.tbu_dsap = LLC_8021D_LSAP;
+	bpdu.tbu_ctl = LLC_UI;
+	bpdu.tbu_protoid = 0;
+	bpdu.tbu_protover = 0;
+	bpdu.tbu_bpdutype = BSTP_MSGTYPE_TCN;
+
+	memcpy(mtod(m, caddr_t) + sizeof(*eh), &bpdu, sizeof(bpdu));
+
+	bp->bp_txcount++;
+	bstp_enqueue(ifp, m);
+}
+
+static void
+bstp_decode_bpdu(struct bstp_port *bp, struct bstp_cbpdu *cpdu,
+    struct bstp_config_unit *cu)
+{
+	int flags;
+
+	cu->cu_pv.pv_root_id =
+	    (((uint64_t)ntohs(cpdu->cbu_rootpri)) << 48) |
+	    (((uint64_t)cpdu->cbu_rootaddr[0]) << 40) |
+	    (((uint64_t)cpdu->cbu_rootaddr[1]) << 32) |
+	    (((uint64_t)cpdu->cbu_rootaddr[2]) << 24) |
+	    (((uint64_t)cpdu->cbu_rootaddr[3]) << 16) |
+	    (((uint64_t)cpdu->cbu_rootaddr[4]) << 8) |
+	    (((uint64_t)cpdu->cbu_rootaddr[5]) << 0);
+
+	cu->cu_pv.pv_dbridge_id =
+	    (((uint64_t)ntohs(cpdu->cbu_bridgepri)) << 48) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[0]) << 40) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[1]) << 32) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[2]) << 24) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[3]) << 16) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[4]) << 8) |
+	    (((uint64_t)cpdu->cbu_bridgeaddr[5]) << 0);
+
+	cu->cu_pv.pv_cost = ntohl(cpdu->cbu_rootpathcost);
+	cu->cu_message_age = ntohs(cpdu->cbu_messageage);
+	cu->cu_max_age = ntohs(cpdu->cbu_maxage);
+	cu->cu_hello_time = ntohs(cpdu->cbu_hellotime);
+	cu->cu_forward_delay = ntohs(cpdu->cbu_forwarddelay);
+	cu->cu_pv.pv_dport_id = ntohs(cpdu->cbu_portid);
+	cu->cu_pv.pv_port_id = bp->bp_port_id;
+	cu->cu_message_type = cpdu->cbu_bpdutype;
+
+	/* Strip off unused flags in STP mode */
+	flags = cpdu->cbu_flags;
+	switch (cpdu->cbu_protover) {
+		case BSTP_PROTO_STP:
+			flags &= BSTP_PDU_STPMASK;
+			/* A STP BPDU explicitly conveys a Designated Port */
+			cu->cu_role = BSTP_ROLE_DESIGNATED;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			flags &= BSTP_PDU_RSTPMASK;
+			break;
+	}
+
+	cu->cu_topology_change_ack =
+		(flags & BSTP_PDU_F_TCA) ? 1 : 0;
+	cu->cu_proposal =
+		(flags & BSTP_PDU_F_P) ? 1 : 0;
+	cu->cu_agree =
+		(flags & BSTP_PDU_F_A) ? 1 : 0;
+	cu->cu_learning =
+		(flags & BSTP_PDU_F_L) ? 1 : 0;
+	cu->cu_forwarding =
+		(flags & BSTP_PDU_F_F) ? 1 : 0;
+	cu->cu_topology_change =
+		(flags & BSTP_PDU_F_TC) ? 1 : 0;
+
+	switch ((flags & BSTP_PDU_PRMASK) >> BSTP_PDU_PRSHIFT) {
+		case BSTP_PDU_F_ROOT:
+			cu->cu_role = BSTP_ROLE_ROOT;
+			break;
+		case BSTP_PDU_F_ALT:
+			cu->cu_role = BSTP_ROLE_ALTERNATE;
+			break;
+		case BSTP_PDU_F_DESG:
+			cu->cu_role = BSTP_ROLE_DESIGNATED;
+			break;
+	}
+}
+
+static void
+bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+    struct bstp_cbpdu *bpdu)
+{
+	struct ifnet *ifp;
+	struct mbuf *m;
+	struct ether_header *eh;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	ifp = bp->bp_ifp;
+
+	if ((ifp->if_flags & IFF_RUNNING) == 0)
+		return;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return;
+
+	eh = mtod(m, struct ether_header *);
+
+	bpdu->cbu_ssap = bpdu->cbu_dsap = LLC_8021D_LSAP;
+	bpdu->cbu_ctl = LLC_UI;
+	bpdu->cbu_protoid = htons(BSTP_PROTO_ID);
+
+	memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN);
+	memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN);
+
+	switch (bpdu->cbu_bpdutype) {
+		case BSTP_MSGTYPE_CFG:
+			bpdu->cbu_protover = BSTP_PROTO_STP;
+			m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_STP_LEN;
+			eh->ether_type = htons(BSTP_BPDU_STP_LEN);
+			memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+			    BSTP_BPDU_STP_LEN);
+			break;
+
+		case BSTP_MSGTYPE_RSTP:
+			bpdu->cbu_protover = BSTP_PROTO_RSTP;
+			bpdu->cbu_versionlen = htons(0);
+			m->m_pkthdr.len = sizeof(*eh) + BSTP_BPDU_RSTP_LEN;
+			eh->ether_type = htons(BSTP_BPDU_RSTP_LEN);
+			memcpy(mtod(m, caddr_t) + sizeof(*eh), bpdu,
+			    BSTP_BPDU_RSTP_LEN);
+			break;
+
+		default:
+			panic("not implemented");
+	}
+	m->m_pkthdr.rcvif = ifp;
+	m->m_len = m->m_pkthdr.len;
+
+	bp->bp_txcount++;
+	bstp_enqueue(ifp, m);
+}
+
+static void
+bstp_enqueue(struct ifnet *dst_ifp, struct mbuf *m)
+{
+	errno_t error = 0;
+	u_int32_t len = m->m_pkthdr.len;
+	
+	m->m_flags |= M_PROTO1; //set to avoid loops 
+	
+	error = ifnet_output_raw(dst_ifp, 0, m);
+	if (error == 0) {
+		(void) ifnet_stat_increment_out(dst_ifp, 1, len, 0);
+	} else {
+		(void) ifnet_stat_increment_out(dst_ifp, 0, 0, 1);
+	}
+}
+
+static int
+bstp_pdu_flags(struct bstp_port *bp)
+{
+	int flags = 0;
+
+	if (bp->bp_proposing && bp->bp_state != BSTP_IFSTATE_FORWARDING)
+		flags |= BSTP_PDU_F_P;
+
+	if (bp->bp_agree)
+		flags |= BSTP_PDU_F_A;
+
+	if (bp->bp_tc_timer.active)
+		flags |= BSTP_PDU_F_TC;
+
+	if (bp->bp_tc_ack)
+		flags |= BSTP_PDU_F_TCA;
+
+	switch (bp->bp_state) {
+		case BSTP_IFSTATE_LEARNING:
+			flags |= BSTP_PDU_F_L;
+			break;
+
+		case BSTP_IFSTATE_FORWARDING:
+			flags |= (BSTP_PDU_F_L | BSTP_PDU_F_F);
+			break;
+	}
+
+	switch (bp->bp_role) {
+		case BSTP_ROLE_ROOT:
+			flags |=
+				(BSTP_PDU_F_ROOT << BSTP_PDU_PRSHIFT);
+			break;
+
+		case BSTP_ROLE_ALTERNATE:
+		case BSTP_ROLE_BACKUP:	/* fall through */
+			flags |=
+				(BSTP_PDU_F_ALT << BSTP_PDU_PRSHIFT);
+			break;
+
+		case BSTP_ROLE_DESIGNATED:
+			flags |=
+				(BSTP_PDU_F_DESG << BSTP_PDU_PRSHIFT);
+			break;
+	}
+
+	/* Strip off unused flags in either mode */
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			flags &= BSTP_PDU_STPMASK;
+			break;
+		case BSTP_PROTO_RSTP:
+			flags &= BSTP_PDU_RSTPMASK;
+			break;
+	}
+	return (flags);
+}
+
+struct mbuf *
+bstp_input(struct bstp_port *bp, __unused struct ifnet *ifp, struct mbuf *m)
+{
+	struct bstp_state *bs = bp->bp_bs;
+	struct ether_header *eh;
+	struct bstp_tbpdu tpdu;
+	uint16_t len;
+
+	if (bp->bp_active == 0) {
+		m_freem(m);
+		return (NULL);
+	}
+
+	BSTP_LOCK(bs);
+
+	eh = mtod(m, struct ether_header *);
+
+	len = ntohs(eh->ether_type);
+	if (len < sizeof(tpdu))
+		goto out;
+
+	m_adj(m, ETHER_HDR_LEN);
+
+	if (m->m_pkthdr.len > len)
+		m_adj(m, len - m->m_pkthdr.len);
+	if ((unsigned int)m->m_len < sizeof(tpdu) &&
+	    (m = m_pullup(m, sizeof(tpdu))) == NULL)
+		goto out;
+
+	memcpy(&tpdu, mtod(m, caddr_t), sizeof(tpdu));
+
+	/* basic packet checks */
+	if (tpdu.tbu_dsap != LLC_8021D_LSAP ||
+	    tpdu.tbu_ssap != LLC_8021D_LSAP ||
+	    tpdu.tbu_ctl != LLC_UI)
+		goto out;
+	if (tpdu.tbu_protoid != BSTP_PROTO_ID)
+		goto out;
+
+	/*
+	 * We can treat later versions of the PDU as the same as the maximum
+	 * version we implement. All additional parameters/flags are ignored.
+	 */
+	if (tpdu.tbu_protover > BSTP_PROTO_MAX)
+		tpdu.tbu_protover = BSTP_PROTO_MAX;
+
+	if (tpdu.tbu_protover != bp->bp_protover) {
+		/*
+		 * Wait for the migration delay timer to expire before changing
+		 * protocol version to avoid flip-flops.
+		 */
+		if (bp->bp_flags & BSTP_PORT_CANMIGRATE)
+			bstp_set_port_proto(bp, tpdu.tbu_protover);
+		else
+			goto out;
+	}
+
+	/* Clear operedge upon receiving a PDU on the port */
+	bp->bp_operedge = 0;
+	bstp_timer_start(&bp->bp_edge_delay_timer,
+	    BSTP_DEFAULT_MIGRATE_DELAY);
+
+	switch (tpdu.tbu_protover) {
+		case BSTP_PROTO_STP:
+			bstp_received_stp(bs, bp, &m, &tpdu);
+			break;
+
+		case BSTP_PROTO_RSTP:
+			bstp_received_rstp(bs, bp, &m, &tpdu);
+			break;
+	}
+out:
+	BSTP_UNLOCK(bs);
+	if (m)
+		m_freem(m);
+	return (NULL);
+}
+
+static void
+bstp_received_stp(struct bstp_state *bs, struct bstp_port *bp,
+    struct mbuf **mp, struct bstp_tbpdu *tpdu)
+{
+	struct bstp_cbpdu cpdu;
+	struct bstp_config_unit *cu = &bp->bp_msg_cu;
+	struct bstp_tcn_unit tu;
+
+	switch (tpdu->tbu_bpdutype) {
+	case BSTP_MSGTYPE_TCN:
+		tu.tu_message_type = tpdu->tbu_bpdutype;
+		bstp_received_tcn(bs, bp, &tu);
+		break;
+	case BSTP_MSGTYPE_CFG:
+		if ((*mp)->m_len < BSTP_BPDU_STP_LEN &&
+		    (*mp = m_pullup(*mp, BSTP_BPDU_STP_LEN)) == NULL)
+			return;
+		memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_STP_LEN);
+
+		bstp_decode_bpdu(bp, &cpdu, cu);
+		bstp_received_bpdu(bs, bp, cu);
+		break;
+	}
+}
+
+static void
+bstp_received_rstp(struct bstp_state *bs, struct bstp_port *bp,
+    struct mbuf **mp, struct bstp_tbpdu *tpdu)
+{
+	struct bstp_cbpdu cpdu;
+	struct bstp_config_unit *cu = &bp->bp_msg_cu;
+
+	if (tpdu->tbu_bpdutype != BSTP_MSGTYPE_RSTP)
+		return;
+
+	if ((*mp)->m_len < BSTP_BPDU_RSTP_LEN &&
+	    (*mp = m_pullup(*mp, BSTP_BPDU_RSTP_LEN)) == NULL)
+		return;
+	memcpy(&cpdu, mtod(*mp, caddr_t), BSTP_BPDU_RSTP_LEN);
+
+	bstp_decode_bpdu(bp, &cpdu, cu);
+	bstp_received_bpdu(bs, bp, cu);
+}
+
+static void
+bstp_received_tcn(__unused struct bstp_state *bs, struct bstp_port *bp,
+    __unused struct bstp_tcn_unit *tcn)
+{
+	bp->bp_rcvdtcn = 1;
+	bstp_update_tc(bp);
+}
+
+static void
+bstp_received_bpdu(struct bstp_state *bs, struct bstp_port *bp,
+    struct bstp_config_unit *cu)
+{
+	int type;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	/* We need to have transitioned to INFO_MINE before proceeding */
+	switch (bp->bp_infois) {
+		case BSTP_INFO_DISABLED:
+		case BSTP_INFO_AGED:
+			return;
+	}
+
+	type = bstp_pdu_rcvtype(bp, cu);
+
+	switch (type) {
+		case BSTP_PDU_SUPERIOR:
+			bs->bs_allsynced = 0;
+			bp->bp_agreed = 0;
+			bp->bp_proposing = 0;
+
+			if (cu->cu_proposal && cu->cu_forwarding == 0)
+				bp->bp_proposed = 1;
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+
+			if (bp->bp_agree &&
+			    !bstp_pdu_bettersame(bp, BSTP_INFO_RECEIVED))
+				bp->bp_agree = 0;
+
+			/* copy the received priority and timers to the port */
+			bp->bp_port_pv = cu->cu_pv;
+			bp->bp_port_msg_age = cu->cu_message_age;
+			bp->bp_port_max_age = cu->cu_max_age;
+			bp->bp_port_fdelay = cu->cu_forward_delay;
+			bp->bp_port_htime =
+				(cu->cu_hello_time > BSTP_MIN_HELLO_TIME ?
+				 cu->cu_hello_time : BSTP_MIN_HELLO_TIME);
+
+			/* set expiry for the new info */
+			bstp_set_timer_msgage(bp);
+
+			bp->bp_infois = BSTP_INFO_RECEIVED;
+			bstp_assign_roles(bs);
+			break;
+
+		case BSTP_PDU_REPEATED:
+			if (cu->cu_proposal && cu->cu_forwarding == 0)
+				bp->bp_proposed = 1;
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+
+			/* rearm the age timer */
+			bstp_set_timer_msgage(bp);
+			break;
+
+		case BSTP_PDU_INFERIOR:
+			if (cu->cu_learning) {
+				bp->bp_agreed = 1;
+				bp->bp_proposing = 0;
+			}
+			break;
+
+		case BSTP_PDU_INFERIORALT:
+			/*
+			 * only point to point links are allowed fast
+			 * transitions to forwarding.
+			 */
+			if (cu->cu_agree && bp->bp_ptp_link) {
+				bp->bp_agreed = 1;
+				bp->bp_proposing = 0;
+			} else
+				bp->bp_agreed = 0;
+
+			if (cu->cu_topology_change)
+				bp->bp_rcvdtc = 1;
+			if (cu->cu_topology_change_ack)
+				bp->bp_rcvdtca = 1;
+			break;
+
+		case BSTP_PDU_OTHER:
+			return;	/* do nothing */
+	}
+	/* update the state machines with the new data */
+	bstp_update_state(bs, bp);
+}
+
+static int
+bstp_pdu_rcvtype(struct bstp_port *bp, struct bstp_config_unit *cu)
+{
+	int type;
+
+	/* default return type */
+	type = BSTP_PDU_OTHER;
+
+	switch (cu->cu_role) {
+	case BSTP_ROLE_DESIGNATED:
+		if (bstp_info_superior(&bp->bp_port_pv, &cu->cu_pv))
+			/* bpdu priority is superior */
+			type = BSTP_PDU_SUPERIOR;
+		else if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) ==
+		    INFO_SAME) {
+			if (bp->bp_port_msg_age != cu->cu_message_age ||
+			    bp->bp_port_max_age != cu->cu_max_age ||
+			    bp->bp_port_fdelay != cu->cu_forward_delay ||
+			    bp->bp_port_htime != cu->cu_hello_time)
+				/* bpdu priority is equal and timers differ */
+				type = BSTP_PDU_SUPERIOR;
+			else
+				/* bpdu is equal */
+				type = BSTP_PDU_REPEATED;
+		} else
+			/* bpdu priority is worse */
+			type = BSTP_PDU_INFERIOR;
+
+		break;
+
+	case BSTP_ROLE_ROOT:
+	case BSTP_ROLE_ALTERNATE:
+	case BSTP_ROLE_BACKUP:
+		if (bstp_info_cmp(&bp->bp_port_pv, &cu->cu_pv) <= INFO_SAME)
+			/*
+			 * not a designated port and priority is the same or
+			 * worse
+			 */
+			type = BSTP_PDU_INFERIORALT;
+		break;
+	}
+
+	return (type);
+}
+
+static int
+bstp_pdu_bettersame(struct bstp_port *bp, int newinfo)
+{
+	if (newinfo == BSTP_INFO_RECEIVED &&
+	    bp->bp_infois == BSTP_INFO_RECEIVED &&
+	    bstp_info_cmp(&bp->bp_port_pv, &bp->bp_msg_cu.cu_pv) >= INFO_SAME)
+		return (1);
+
+	if (newinfo == BSTP_INFO_MINE &&
+	    bp->bp_infois == BSTP_INFO_MINE &&
+	    bstp_info_cmp(&bp->bp_port_pv, &bp->bp_desg_pv) >= INFO_SAME)
+		return (1);
+
+	return (0);
+}
+
+static int
+bstp_info_cmp(struct bstp_pri_vector *pv,
+    struct bstp_pri_vector *cpv)
+{
+	if (cpv->pv_root_id < pv->pv_root_id)
+		return (INFO_BETTER);
+	if (cpv->pv_root_id > pv->pv_root_id)
+		return (INFO_WORSE);
+
+	if (cpv->pv_cost < pv->pv_cost)
+		return (INFO_BETTER);
+	if (cpv->pv_cost > pv->pv_cost)
+		return (INFO_WORSE);
+
+	if (cpv->pv_dbridge_id < pv->pv_dbridge_id)
+		return (INFO_BETTER);
+	if (cpv->pv_dbridge_id > pv->pv_dbridge_id)
+		return (INFO_WORSE);
+
+	if (cpv->pv_dport_id < pv->pv_dport_id)
+		return (INFO_BETTER);
+	if (cpv->pv_dport_id > pv->pv_dport_id)
+		return (INFO_WORSE);
+
+	return (INFO_SAME);
+}
+
+/*
+ * This message priority vector is superior to the port priority vector and
+ * will replace it if, and only if, the message priority vector is better than
+ * the port priority vector, or the message has been transmitted from the same
+ * designated bridge and designated port as the port priority vector.
+ */
+static int
+bstp_info_superior(struct bstp_pri_vector *pv,
+    struct bstp_pri_vector *cpv)
+{
+	if (bstp_info_cmp(pv, cpv) == INFO_BETTER ||
+	    (bstp_same_bridgeid(pv->pv_dbridge_id, cpv->pv_dbridge_id) &&
+	    (cpv->pv_dport_id & 0xfff) == (pv->pv_dport_id & 0xfff)))
+		return (1);
+	return (0);
+}
+
+static void
+bstp_assign_roles(struct bstp_state *bs)
+{
+	struct bstp_port *bp, *rbp = NULL;
+	struct bstp_pri_vector pv;
+
+	/* default to our priority vector */
+	bs->bs_root_pv = bs->bs_bridge_pv;
+	bs->bs_root_msg_age = 0;
+	bs->bs_root_max_age = bs->bs_bridge_max_age;
+	bs->bs_root_fdelay = bs->bs_bridge_fdelay;
+	bs->bs_root_htime = bs->bs_bridge_htime;
+	bs->bs_root_port = NULL;
+
+	/* check if any recieved info supersedes us */
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		if (bp->bp_infois != BSTP_INFO_RECEIVED)
+			continue;
+
+		pv = bp->bp_port_pv;
+		pv.pv_cost += bp->bp_path_cost;
+
+		/*
+		 * The root priority vector is the best of the set comprising
+		 * the bridge priority vector plus all root path priority
+		 * vectors whose bridge address is not equal to us.
+		 */
+		if (bstp_same_bridgeid(pv.pv_dbridge_id,
+		    bs->bs_bridge_pv.pv_dbridge_id) == 0 &&
+		    bstp_info_cmp(&bs->bs_root_pv, &pv) == INFO_BETTER) {
+			/* the port vector replaces the root */
+			bs->bs_root_pv = pv;
+			bs->bs_root_msg_age = bp->bp_port_msg_age +
+			    BSTP_MESSAGE_AGE_INCR;
+			bs->bs_root_max_age = bp->bp_port_max_age;
+			bs->bs_root_fdelay = bp->bp_port_fdelay;
+			bs->bs_root_htime = bp->bp_port_htime;
+			rbp = bp;
+		}
+	}
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* calculate the port designated vector */
+		bp->bp_desg_pv.pv_root_id = bs->bs_root_pv.pv_root_id;
+		bp->bp_desg_pv.pv_cost = bs->bs_root_pv.pv_cost;
+		bp->bp_desg_pv.pv_dbridge_id = bs->bs_bridge_pv.pv_dbridge_id;
+		bp->bp_desg_pv.pv_dport_id = bp->bp_port_id;
+		bp->bp_desg_pv.pv_port_id = bp->bp_port_id;
+
+		/* calculate designated times */
+		bp->bp_desg_msg_age = bs->bs_root_msg_age;
+		bp->bp_desg_max_age = bs->bs_root_max_age;
+		bp->bp_desg_fdelay = bs->bs_root_fdelay;
+		bp->bp_desg_htime = bs->bs_bridge_htime;
+
+
+		switch (bp->bp_infois) {
+		case BSTP_INFO_DISABLED:
+			bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+			break;
+
+		case BSTP_INFO_AGED:
+			bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+			bstp_update_info(bp);
+			break;
+
+		case BSTP_INFO_MINE:
+			bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+			/* update the port info if stale */
+			if (bstp_info_cmp(&bp->bp_port_pv,
+			    &bp->bp_desg_pv) != INFO_SAME ||
+			    (rbp != NULL &&
+			    (bp->bp_port_msg_age != rbp->bp_port_msg_age ||
+			    bp->bp_port_max_age != rbp->bp_port_max_age ||
+			    bp->bp_port_fdelay != rbp->bp_port_fdelay ||
+			    bp->bp_port_htime != rbp->bp_port_htime)))
+				bstp_update_info(bp);
+			break;
+
+		case BSTP_INFO_RECEIVED:
+			if (bp == rbp) {
+				/*
+				 * root priority is derived from this
+				 * port, make it the root port.
+				 */
+				bstp_set_port_role(bp, BSTP_ROLE_ROOT);
+				bs->bs_root_port = bp;
+			} else if (bstp_info_cmp(&bp->bp_port_pv,
+				    &bp->bp_desg_pv) == INFO_BETTER) {
+				/*
+				 * the port priority is lower than the root
+				 * port.
+				 */
+				bstp_set_port_role(bp, BSTP_ROLE_DESIGNATED);
+				bstp_update_info(bp);
+			} else {
+				if (bstp_same_bridgeid(
+				    bp->bp_port_pv.pv_dbridge_id,
+				    bs->bs_bridge_pv.pv_dbridge_id)) {
+					/*
+					 * the designated bridge refers to
+					 * another port on this bridge.
+					 */
+					bstp_set_port_role(bp,
+					    BSTP_ROLE_BACKUP);
+				} else {
+					/*
+					 * the port is an inferior path to the
+					 * root bridge.
+					 */
+					bstp_set_port_role(bp,
+					    BSTP_ROLE_ALTERNATE);
+				}
+			}
+			break;
+		}
+	}
+}
+
+static void
+bstp_update_state(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct bstp_port *bp2;
+	int synced;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	/* check if all the ports have syncronised again */
+	if (!bs->bs_allsynced) {
+		synced = 1;
+		LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+			if (!(bp2->bp_synced ||
+			     bp2->bp_role == BSTP_ROLE_ROOT)) {
+				synced = 0;
+				break;
+			}
+		}
+		bs->bs_allsynced = synced;
+	}
+
+	bstp_update_roles(bs, bp);
+	bstp_update_tc(bp);
+}
+
+static void
+bstp_update_roles(struct bstp_state *bs, struct bstp_port *bp)
+{
+	switch (bp->bp_role) {
+	case BSTP_ROLE_DISABLED:
+		/* Clear any flags if set */
+		if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+		}
+		break;
+
+	case BSTP_ROLE_ALTERNATE:
+	case BSTP_ROLE_BACKUP:
+		if ((bs->bs_allsynced && !bp->bp_agree) ||
+		    (bp->bp_proposed && bp->bp_agree)) {
+			bp->bp_proposed = 0;
+			bp->bp_agree = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			DPRINTF("%s -> ALTERNATE_AGREED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_proposed && !bp->bp_agree) {
+			bstp_set_all_sync(bs);
+			bp->bp_proposed = 0;
+			DPRINTF("%s -> ALTERNATE_PROPOSED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		/* Clear any flags if set */
+		if (bp->bp_sync || !bp->bp_synced || bp->bp_reroot) {
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> ALTERNATE_PORT\n", bp->bp_ifp->if_xname);
+		}
+		break;
+
+	case BSTP_ROLE_ROOT:
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING && !bp->bp_reroot) {
+			bstp_set_all_reroot(bs);
+			DPRINTF("%s -> ROOT_REROOT\n", bp->bp_ifp->if_xname);
+		}
+
+		if ((bs->bs_allsynced && !bp->bp_agree) ||
+		    (bp->bp_proposed && bp->bp_agree)) {
+			bp->bp_proposed = 0;
+			bp->bp_sync = 0;
+			bp->bp_agree = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			DPRINTF("%s -> ROOT_AGREED\n", bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_proposed && !bp->bp_agree) {
+			bstp_set_all_sync(bs);
+			bp->bp_proposed = 0;
+			DPRINTF("%s -> ROOT_PROPOSED\n", bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    (bp->bp_forward_delay_timer.active == 0 ||
+		    (bstp_rerooted(bs, bp) &&
+		    bp->bp_recent_backup_timer.active == 0 &&
+		    bp->bp_protover == BSTP_PROTO_RSTP))) {
+			switch (bp->bp_state) {
+			case BSTP_IFSTATE_DISCARDING:
+				bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+				break;
+			case BSTP_IFSTATE_LEARNING:
+				bstp_set_port_state(bp,
+				    BSTP_IFSTATE_FORWARDING);
+				break;
+			}
+		}
+
+		if (bp->bp_state == BSTP_IFSTATE_FORWARDING && bp->bp_reroot) {
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> ROOT_REROOTED\n", bp->bp_ifp->if_xname);
+		}
+		break;
+
+	case BSTP_ROLE_DESIGNATED:
+		if (bp->bp_recent_root_timer.active == 0 && bp->bp_reroot) {
+			bp->bp_reroot = 0;
+			DPRINTF("%s -> DESIGNATED_RETIRED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if ((bp->bp_state == BSTP_IFSTATE_DISCARDING &&
+		    !bp->bp_synced) || (bp->bp_agreed && !bp->bp_synced) ||
+		    (bp->bp_operedge && !bp->bp_synced) ||
+		    (bp->bp_sync && bp->bp_synced)) {
+			bstp_timer_stop(&bp->bp_recent_root_timer);
+			bp->bp_synced = 1;
+			bp->bp_sync = 0;
+			DPRINTF("%s -> DESIGNATED_SYNCED\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    !bp->bp_agreed && !bp->bp_proposing &&
+		    !bp->bp_operedge) {
+			bp->bp_proposing = 1;
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			bstp_timer_start(&bp->bp_edge_delay_timer,
+			    (bp->bp_ptp_link ? BSTP_DEFAULT_MIGRATE_DELAY :
+			     bp->bp_desg_max_age));
+			DPRINTF("%s -> DESIGNATED_PROPOSE\n",
+			    bp->bp_ifp->if_xname);
+		}
+
+		if (bp->bp_state != BSTP_IFSTATE_FORWARDING &&
+		    (bp->bp_forward_delay_timer.active == 0 || bp->bp_agreed ||
+		    bp->bp_operedge) &&
+		    (bp->bp_recent_root_timer.active == 0 || !bp->bp_reroot) &&
+		    !bp->bp_sync) {
+#ifdef  BRIDGESTP_DEBUG
+			if (bp->bp_agreed)
+				DPRINTF("%s -> AGREED\n", bp->bp_ifp->if_xname);
+#endif /* BRIDGESTP_DEBUG */
+			/*
+			 * If agreed|operedge then go straight to forwarding,
+			 * otherwise follow discard -> learn -> forward.
+			 */
+			if (bp->bp_agreed || bp->bp_operedge ||
+			    bp->bp_state == BSTP_IFSTATE_LEARNING) {
+				bstp_set_port_state(bp,
+				    BSTP_IFSTATE_FORWARDING);
+				bp->bp_agreed = bp->bp_protover;
+			} else if (bp->bp_state == BSTP_IFSTATE_DISCARDING)
+				bstp_set_port_state(bp, BSTP_IFSTATE_LEARNING);
+		}
+
+		if (((bp->bp_sync && !bp->bp_synced) ||
+		    (bp->bp_reroot && bp->bp_recent_root_timer.active) ||
+		    (bp->bp_flags & BSTP_PORT_DISPUTED)) && !bp->bp_operedge &&
+		    bp->bp_state != BSTP_IFSTATE_DISCARDING) {
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bp->bp_flags &= ~BSTP_PORT_DISPUTED;
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_protover == BSTP_PROTO_RSTP ?
+			    bp->bp_desg_htime : bp->bp_desg_fdelay);
+			DPRINTF("%s -> DESIGNATED_DISCARD\n",
+			    bp->bp_ifp->if_xname);
+		}
+		break;
+	}
+
+	if (bp->bp_flags & BSTP_PORT_NEWINFO)
+		bstp_transmit(bs, bp);
+}
+
+static void
+bstp_update_tc(struct bstp_port *bp)
+{
+	switch (bp->bp_tcstate) {
+		case BSTP_TCSTATE_ACTIVE:
+			if ((bp->bp_role != BSTP_ROLE_DESIGNATED &&
+			    bp->bp_role != BSTP_ROLE_ROOT) || bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+
+			if (bp->bp_rcvdtcn)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_TCN);
+			if (bp->bp_rcvdtc)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_TC);
+
+			if (bp->bp_tc_prop && !bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_PROPAG);
+
+			if (bp->bp_rcvdtca)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_ACK);
+			break;
+
+		case BSTP_TCSTATE_INACTIVE:
+			if ((bp->bp_state == BSTP_IFSTATE_LEARNING ||
+			    bp->bp_state == BSTP_IFSTATE_FORWARDING) &&
+			    bp->bp_fdbflush == 0)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+			break;
+
+		case BSTP_TCSTATE_LEARNING:
+			if (bp->bp_rcvdtc || bp->bp_rcvdtcn || bp->bp_rcvdtca ||
+			    bp->bp_tc_prop)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_LEARNING);
+			else if (bp->bp_role != BSTP_ROLE_DESIGNATED &&
+				 bp->bp_role != BSTP_ROLE_ROOT &&
+				 bp->bp_state == BSTP_IFSTATE_DISCARDING)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+
+			if ((bp->bp_role == BSTP_ROLE_DESIGNATED ||
+			    bp->bp_role == BSTP_ROLE_ROOT) &&
+			    bp->bp_state == BSTP_IFSTATE_FORWARDING &&
+			    !bp->bp_operedge)
+				bstp_set_port_tc(bp, BSTP_TCSTATE_DETECTED);
+			break;
+
+		/* these are transient states and go straight back to ACTIVE */
+		case BSTP_TCSTATE_DETECTED:
+		case BSTP_TCSTATE_TCN:
+		case BSTP_TCSTATE_TC:
+		case BSTP_TCSTATE_PROPAG:
+		case BSTP_TCSTATE_ACK:
+			DPRINTF("Invalid TC state for %s\n",
+			    bp->bp_ifp->if_xname);
+			break;
+	}
+
+}
+
+static void
+bstp_update_info(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	bp->bp_proposing = 0;
+	bp->bp_proposed = 0;
+
+	if (bp->bp_agreed && !bstp_pdu_bettersame(bp, BSTP_INFO_MINE))
+		bp->bp_agreed = 0;
+
+	if (bp->bp_synced && !bp->bp_agreed) {
+		bp->bp_synced = 0;
+		bs->bs_allsynced = 0;
+	}
+
+	/* copy the designated pv to the port */
+	bp->bp_port_pv = bp->bp_desg_pv;
+	bp->bp_port_msg_age = bp->bp_desg_msg_age;
+	bp->bp_port_max_age = bp->bp_desg_max_age;
+	bp->bp_port_fdelay = bp->bp_desg_fdelay;
+	bp->bp_port_htime = bp->bp_desg_htime;
+	bp->bp_infois = BSTP_INFO_MINE;
+
+	/* Set transmit flag but do not immediately send */
+	bp->bp_flags |= BSTP_PORT_NEWINFO;
+}
+
+/* set tcprop on every port other than the caller */
+static void
+bstp_set_other_tcprop(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+	struct bstp_port *bp2;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+		if (bp2 == bp)
+			continue;
+		bp2->bp_tc_prop = 1;
+	}
+}
+
+static void
+bstp_set_all_reroot(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bp->bp_reroot = 1;
+}
+
+static void
+bstp_set_all_sync(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		bp->bp_sync = 1;
+		bp->bp_synced = 0;	/* Not explicit in spec */
+	}
+
+	bs->bs_allsynced = 0;
+}
+
+static void
+bstp_set_port_state(struct bstp_port *bp, int state)
+{
+	if (bp->bp_state == state)
+		return;
+
+	bp->bp_state = state;
+
+	switch (bp->bp_state) {
+		case BSTP_IFSTATE_DISCARDING:
+			DPRINTF("state changed to DISCARDING on %s\n",
+			    bp->bp_ifp->if_xname);
+			break;
+
+		case BSTP_IFSTATE_LEARNING:
+			DPRINTF("state changed to LEARNING on %s\n",
+			    bp->bp_ifp->if_xname);
+
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_protover == BSTP_PROTO_RSTP ?
+			    bp->bp_desg_htime : bp->bp_desg_fdelay);
+			break;
+
+		case BSTP_IFSTATE_FORWARDING:
+			DPRINTF("state changed to FORWARDING on %s\n",
+			    bp->bp_ifp->if_xname);
+
+			bstp_timer_stop(&bp->bp_forward_delay_timer);
+			/* Record that we enabled forwarding */
+			bp->bp_forward_transitions++;
+			break;
+	}
+
+	/* notify the parent bridge */
+	bstp_task_enqueue(&bp->bp_statetask);
+}
+
+static void
+bstp_set_port_role(struct bstp_port *bp, int role)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (bp->bp_role == role)
+		return;
+
+	/* perform pre-change tasks */
+	switch (bp->bp_role) {
+		case BSTP_ROLE_DISABLED:
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_desg_max_age);
+			break;
+
+		case BSTP_ROLE_BACKUP:
+			bstp_timer_start(&bp->bp_recent_backup_timer,
+			    bp->bp_desg_htime * 2);
+			/* fall through */
+		case BSTP_ROLE_ALTERNATE:
+			bstp_timer_start(&bp->bp_forward_delay_timer,
+			    bp->bp_desg_fdelay);
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			break;
+
+		case BSTP_ROLE_ROOT:
+			bstp_timer_start(&bp->bp_recent_root_timer,
+			    BSTP_DEFAULT_FORWARD_DELAY);
+			break;
+	}
+
+	bp->bp_role = role;
+	/* clear values not carried between roles */
+	bp->bp_proposing = 0;
+	bs->bs_allsynced = 0;
+
+	/* initialise the new role */
+	switch (bp->bp_role) {
+		case BSTP_ROLE_DISABLED:
+		case BSTP_ROLE_ALTERNATE:
+		case BSTP_ROLE_BACKUP:
+			DPRINTF("%s role -> ALT/BACK/DISABLED\n",
+			    bp->bp_ifp->if_xname);
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bstp_timer_stop(&bp->bp_recent_root_timer);
+			bstp_timer_latch(&bp->bp_forward_delay_timer);
+			bp->bp_sync = 0;
+			bp->bp_synced = 1;
+			bp->bp_reroot = 0;
+			break;
+
+		case BSTP_ROLE_ROOT:
+			DPRINTF("%s role -> ROOT\n",
+			    bp->bp_ifp->if_xname);
+			bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+			bstp_timer_latch(&bp->bp_recent_root_timer);
+			bp->bp_proposing = 0;
+			break;
+
+		case BSTP_ROLE_DESIGNATED:
+			DPRINTF("%s role -> DESIGNATED\n",
+			    bp->bp_ifp->if_xname);
+			bstp_timer_start(&bp->bp_hello_timer,
+			    bp->bp_desg_htime);
+			bp->bp_agree = 0;
+			break;
+	}
+
+	/* let the TC state know that the role changed */
+	bstp_update_tc(bp);
+}
+
+static void
+bstp_set_port_proto(struct bstp_port *bp, int proto)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	/* supported protocol versions */
+	switch (proto) {
+		case BSTP_PROTO_STP:
+			/* we can downgrade protocols only */
+			bstp_timer_stop(&bp->bp_migrate_delay_timer);
+			/* clear unsupported features */
+			bp->bp_operedge = 0;
+			/* STP compat mode only uses 16 bits of the 32 */
+			if (bp->bp_path_cost > 65535)
+				bp->bp_path_cost = 65535;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			bstp_timer_start(&bp->bp_migrate_delay_timer,
+			    bs->bs_migration_delay);
+			break;
+
+		default:
+			DPRINTF("Unsupported STP version %d\n", proto);
+			return;
+	}
+
+	bp->bp_protover = proto;
+	bp->bp_flags &= ~BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_set_port_tc(struct bstp_port *bp, int state)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	bp->bp_tcstate = state;
+
+	/* initialise the new state */
+	switch (bp->bp_tcstate) {
+		case BSTP_TCSTATE_ACTIVE:
+			DPRINTF("%s -> TC_ACTIVE\n", bp->bp_ifp->if_xname);
+			/* nothing to do */
+			break;
+
+		case BSTP_TCSTATE_INACTIVE:
+			bstp_timer_stop(&bp->bp_tc_timer);
+			/* flush routes on the parent bridge */
+			bp->bp_fdbflush = 1;
+			bstp_task_enqueue(&bp->bp_rtagetask);
+			bp->bp_tc_ack = 0;
+			DPRINTF("%s -> TC_INACTIVE\n", bp->bp_ifp->if_xname);
+			break;
+
+		case BSTP_TCSTATE_LEARNING:
+			bp->bp_rcvdtc = 0;
+			bp->bp_rcvdtcn = 0;
+			bp->bp_rcvdtca = 0;
+			bp->bp_tc_prop = 0;
+			DPRINTF("%s -> TC_LEARNING\n", bp->bp_ifp->if_xname);
+			break;
+
+		case BSTP_TCSTATE_DETECTED:
+			bstp_set_timer_tc(bp);
+			bstp_set_other_tcprop(bp);
+			/* send out notification */
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			bstp_transmit(bs, bp);
+			/* reviewed for getmicrotime usage */
+			getmicrotime(&bs->bs_last_tc_time);
+			DPRINTF("%s -> TC_DETECTED\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
+
+		case BSTP_TCSTATE_TCN:
+			bstp_set_timer_tc(bp);
+			DPRINTF("%s -> TC_TCN\n", bp->bp_ifp->if_xname);
+			/* fall through */
+		case BSTP_TCSTATE_TC:
+			bp->bp_rcvdtc = 0;
+			bp->bp_rcvdtcn = 0;
+			if (bp->bp_role == BSTP_ROLE_DESIGNATED)
+				bp->bp_tc_ack = 1;
+
+			bstp_set_other_tcprop(bp);
+			DPRINTF("%s -> TC_TC\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
+
+		case BSTP_TCSTATE_PROPAG:
+			/* flush routes on the parent bridge */
+			bp->bp_fdbflush = 1;
+			bstp_task_enqueue(&bp->bp_rtagetask);
+			bp->bp_tc_prop = 0;
+			bstp_set_timer_tc(bp);
+			DPRINTF("%s -> TC_PROPAG\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
+
+		case BSTP_TCSTATE_ACK:
+			bstp_timer_stop(&bp->bp_tc_timer);
+			bp->bp_rcvdtca = 0;
+			DPRINTF("%s -> TC_ACK\n", bp->bp_ifp->if_xname);
+			bp->bp_tcstate = BSTP_TCSTATE_ACTIVE; /* UCT */
+			break;
+	}
+}
+
+static void
+bstp_set_timer_tc(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (bp->bp_tc_timer.active)
+		return;
+
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_RSTP:
+			bstp_timer_start(&bp->bp_tc_timer,
+			    bp->bp_desg_htime + BSTP_TICK_VAL);
+			bp->bp_flags |= BSTP_PORT_NEWINFO;
+			break;
+
+		case BSTP_PROTO_STP:
+			bstp_timer_start(&bp->bp_tc_timer,
+			    bs->bs_root_max_age + bs->bs_root_fdelay);
+			break;
+	}
+}
+
+static void
+bstp_set_timer_msgage(struct bstp_port *bp)
+{
+	if (bp->bp_port_msg_age + BSTP_MESSAGE_AGE_INCR <=
+	    bp->bp_port_max_age) {
+		bstp_timer_start(&bp->bp_message_age_timer,
+		    bp->bp_port_htime * 3);
+	} else
+		/* expires immediately */
+		bstp_timer_start(&bp->bp_message_age_timer, 0);
+}
+
+static int
+bstp_rerooted(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct bstp_port *bp2;
+	int rr_set = 0;
+
+	LIST_FOREACH(bp2, &bs->bs_bplist, bp_next) {
+		if (bp2 == bp)
+			continue;
+		if (bp2->bp_recent_root_timer.active) {
+			rr_set = 1;
+			break;
+		}
+	}
+	return (!rr_set);
+}
+
+int
+bstp_set_htime(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *=  BSTP_TICK_VAL;
+
+	/* value can only be changed in leagacy stp mode */
+	if (bs->bs_protover != BSTP_PROTO_STP)
+		return (EPERM);
+
+	if (t < BSTP_MIN_HELLO_TIME || t > BSTP_MAX_HELLO_TIME)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_htime = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_fdelay(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *= BSTP_TICK_VAL;
+
+	if (t < BSTP_MIN_FORWARD_DELAY || t > BSTP_MAX_FORWARD_DELAY)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_fdelay = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_maxage(struct bstp_state *bs, int t)
+{
+	/* convert seconds to ticks */
+	t *= BSTP_TICK_VAL;
+
+	if (t < BSTP_MIN_MAX_AGE || t > BSTP_MAX_MAX_AGE)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_max_age = t;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_holdcount(struct bstp_state *bs, int count)
+{
+	struct bstp_port *bp;
+
+	if (count < BSTP_MIN_HOLD_COUNT ||
+	    count > BSTP_MAX_HOLD_COUNT)
+		return (EINVAL);
+
+	BSTP_LOCK(bs);
+	bs->bs_txholdcount = count;
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bp->bp_txcount = 0;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_protocol(struct bstp_state *bs, int proto)
+{
+	struct bstp_port *bp;
+
+	switch (proto) {
+		/* Supported protocol versions */
+		case BSTP_PROTO_STP:
+		case BSTP_PROTO_RSTP:
+			break;
+
+		default:
+			return (EINVAL);
+	}
+
+	BSTP_LOCK(bs);
+	bs->bs_protover = proto;
+	bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* reinit state */
+		bp->bp_infois = BSTP_INFO_DISABLED;
+		bp->bp_txcount = 0;
+		bstp_set_port_proto(bp, bs->bs_protover);
+		bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+		bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+		bstp_timer_stop(&bp->bp_recent_backup_timer);
+	}
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_priority(struct bstp_state *bs, int pri)
+{
+	if (pri < 0 || pri > BSTP_MAX_PRIORITY)
+		return (EINVAL);
+
+	/* Limit to steps of 4096 */
+	pri -= pri % 4096;
+
+	BSTP_LOCK(bs);
+	bs->bs_bridge_priority = pri;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_port_priority(struct bstp_port *bp, int pri)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (pri < 0 || pri > BSTP_MAX_PORT_PRIORITY)
+		return (EINVAL);
+
+	/* Limit to steps of 16 */
+	pri -= pri % 16;
+
+	BSTP_LOCK(bs);
+	bp->bp_priority = pri;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_path_cost(struct bstp_port *bp, uint32_t path_cost)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (path_cost > BSTP_MAX_PATH_COST)
+		return (EINVAL);
+
+	/* STP compat mode only uses 16 bits of the 32 */
+	if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+		path_cost = 65535;
+
+	BSTP_LOCK(bs);
+
+	if (path_cost == 0) {	/* use auto */
+		bp->bp_flags &= ~BSTP_PORT_ADMCOST;
+		bp->bp_path_cost = bstp_calc_path_cost(bp);
+	} else {
+		bp->bp_path_cost = path_cost;
+		bp->bp_flags |= BSTP_PORT_ADMCOST;
+	}
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_edge(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if ((bp->bp_operedge = set) == 0)
+		bp->bp_flags &= ~BSTP_PORT_ADMEDGE;
+	else
+		bp->bp_flags |= BSTP_PORT_ADMEDGE;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_autoedge(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if (set) {
+		bp->bp_flags |= BSTP_PORT_AUTOEDGE;
+		/* we may be able to transition straight to edge */
+		if (bp->bp_edge_delay_timer.active == 0)
+			bstp_edge_delay_expiry(bs, bp);
+	} else
+		bp->bp_flags &= ~BSTP_PORT_AUTOEDGE;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_ptp(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	bp->bp_ptp_link = set;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_set_autoptp(struct bstp_port *bp, int set)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	BSTP_LOCK(bs);
+	if (set) {
+		bp->bp_flags |= BSTP_PORT_AUTOPTP;
+		if (bp->bp_role != BSTP_ROLE_DISABLED)
+			bstp_ifupdstatus(bs, bp);
+	} else
+		bp->bp_flags &= ~BSTP_PORT_AUTOPTP;
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+/*
+ * Calculate the path cost according to the link speed.
+ */
+static uint32_t
+bstp_calc_path_cost(struct bstp_port *bp)
+{
+	struct ifnet *ifp = bp->bp_ifp;
+	uint32_t path_cost;
+
+	/* If the priority has been manually set then retain the value */
+	if (bp->bp_flags & BSTP_PORT_ADMCOST)
+		return bp->bp_path_cost;
+
+	if (bp->bp_if_link_state == LINK_STATE_DOWN) {
+		/* Recalc when the link comes up again */
+		bp->bp_flags |= BSTP_PORT_PNDCOST;
+		return (BSTP_DEFAULT_PATH_COST);
+	}
+
+	if (ifp->if_baudrate < 1000)
+		return (BSTP_DEFAULT_PATH_COST);
+
+ 	/* formula from section 17.14, IEEE Std 802.1D-2004 */
+	path_cost = 20000000000ULL / (ifp->if_baudrate / 1000);
+
+	if (path_cost > BSTP_MAX_PATH_COST)
+		path_cost = BSTP_MAX_PATH_COST;
+
+	/* STP compat mode only uses 16 bits of the 32 */
+	if (bp->bp_protover == BSTP_PROTO_STP && path_cost > 65535)
+		path_cost = 65535;
+
+	return (path_cost);
+}
+
+/*
+ * Notify the bridge that a port state has changed, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
+static void
+bstp_notify_state(void *arg, __unused int pending)
+{
+	struct bstp_port *bp = (struct bstp_port *)arg;
+	struct bstp_state *bs = bp->bp_bs;
+
+	if (bp->bp_active == 1 && bs->bs_state_cb != NULL)
+		(*bs->bs_state_cb)(bp->bp_ifp, bp->bp_state);
+}
+
+/*
+ * Flush the routes on the bridge port, we need to do this from a
+ * taskqueue to avoid a LOR.
+ */
+static void
+bstp_notify_rtage(void *arg, __unused int pending)
+{
+	struct bstp_port *bp = (struct bstp_port *)arg;
+	struct bstp_state *bs = bp->bp_bs;
+	int age = 0;
+
+	BSTP_LOCK(bs);
+	switch (bp->bp_protover) {
+		case BSTP_PROTO_STP:
+			/* convert to seconds */
+			age = bp->bp_desg_fdelay / BSTP_TICK_VAL;
+			break;
+
+		case BSTP_PROTO_RSTP:
+			age = 0;
+			break;
+	}
+	BSTP_UNLOCK(bs);
+
+	if (bp->bp_active == 1 && bs->bs_rtage_cb != NULL)
+		(*bs->bs_rtage_cb)(bp->bp_ifp, age);
+
+	/* flush is complete */
+	BSTP_LOCK(bs);
+	bp->bp_fdbflush = 0;
+	BSTP_UNLOCK(bs);
+}
+
+void
+bstp_linkstate(struct ifnet *ifp, __unused int state)
+{
+	struct bstp_state *bs;
+	struct bstp_port *bp;
+
+	/* search for the stp port */
+	lck_mtx_lock(bstp_list_mtx);
+	LIST_FOREACH(bs, &bstp_list, bs_list) {
+		BSTP_LOCK(bs);
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+			if (bp->bp_ifp == ifp) {
+				bstp_ifupdstatus(bs, bp);
+				bstp_update_state(bs, bp);
+				/* it only exists once so return */
+				BSTP_UNLOCK(bs);
+				lck_mtx_unlock(bstp_list_mtx);
+				return;
+			}
+		}
+		BSTP_UNLOCK(bs);
+	}
+	lck_mtx_unlock(bstp_list_mtx);
+}
+
+static void
+bstp_ifupdstatus(struct bstp_state *bs, struct bstp_port *bp)
+{
+	struct ifnet *ifp = bp->bp_ifp;
+	struct ifmediareq ifmr;
+	int error = 0;
+
+	BSTP_LOCK_ASSERT(bs);
+
+	bzero((char *)&ifmr, sizeof(ifmr));
+	error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (caddr_t)&ifmr);
+
+	if ((error == 0) && (ifp->if_flags & IFF_UP)) {
+		if (ifmr.ifm_status & IFM_ACTIVE) {
+			/* A full-duplex link is assumed to be point to point */
+			if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
+				bp->bp_ptp_link =
+				    ifmr.ifm_active & IFM_FDX ? 1 : 0;
+			}
+
+			/* Calc the cost if the link was down previously */
+			if (bp->bp_flags & BSTP_PORT_PNDCOST) {
+				bp->bp_path_cost = bstp_calc_path_cost(bp);
+				bp->bp_flags &= ~BSTP_PORT_PNDCOST;
+			}
+
+			if (bp->bp_role == BSTP_ROLE_DISABLED)
+				bstp_enable_port(bs, bp);
+		} else {
+			if (bp->bp_role != BSTP_ROLE_DISABLED) {
+				bstp_disable_port(bs, bp);
+				if ((bp->bp_flags & BSTP_PORT_ADMEDGE) &&
+				    bp->bp_protover == BSTP_PROTO_RSTP)
+					bp->bp_operedge = 1;
+			}
+		}
+		return;
+	}
+
+	if (bp->bp_infois != BSTP_INFO_DISABLED)
+		bstp_disable_port(bs, bp);
+}
+
+static void
+bstp_enable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_infois = BSTP_INFO_AGED;
+	bstp_assign_roles(bs);
+}
+
+static void
+bstp_disable_port(struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_infois = BSTP_INFO_DISABLED;
+	bstp_assign_roles(bs);
+}
+
+static void
+bstp_tick(void *arg)
+{
+	struct bstp_state *bs = arg;
+	struct bstp_port *bp;
+	struct timespec ts;
+
+	BSTP_LOCK(bs);
+
+	if (bs->bs_running == 0)
+		return;
+
+	/* slow timer to catch missed link events */
+	if (bstp_timer_expired(&bs->bs_link_timer)) {
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+			bstp_ifupdstatus(bs, bp);
+		bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+	}
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		/* no events need to happen for these */
+		bstp_timer_expired(&bp->bp_tc_timer);
+		bstp_timer_expired(&bp->bp_recent_root_timer);
+		bstp_timer_expired(&bp->bp_forward_delay_timer);
+		bstp_timer_expired(&bp->bp_recent_backup_timer);
+
+		if (bstp_timer_expired(&bp->bp_hello_timer))
+			bstp_hello_timer_expiry(bs, bp);
+
+		if (bstp_timer_expired(&bp->bp_message_age_timer))
+			bstp_message_age_expiry(bs, bp);
+
+		if (bstp_timer_expired(&bp->bp_migrate_delay_timer))
+			bstp_migrate_delay_expiry(bs, bp);
+
+		if (bstp_timer_expired(&bp->bp_edge_delay_timer))
+			bstp_edge_delay_expiry(bs, bp);
+
+		/* update the various state machines for the port */
+		bstp_update_state(bs, bp);
+
+		if (bp->bp_txcount > 0)
+			bp->bp_txcount--;
+	}
+
+	BSTP_UNLOCK(bs);
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+	bsd_timeout(bstp_tick, bs, &ts);
+}
+
+static void
+bstp_timer_start(struct bstp_timer *t, uint16_t v)
+{
+	t->value = v;
+	t->active = 1;
+	t->latched = 0;
+}
+
+static void
+bstp_timer_stop(struct bstp_timer *t)
+{
+	t->value = 0;
+	t->active = 0;
+	t->latched = 0;
+}
+
+static void
+bstp_timer_latch(struct bstp_timer *t)
+{
+	t->latched = 1;
+	t->active = 1;
+}
+
+static int
+bstp_timer_expired(struct bstp_timer *t)
+{
+	if (t->active == 0 || t->latched)
+		return (0);
+	t->value -= BSTP_TICK_VAL;
+	if (t->value <= 0) {
+		bstp_timer_stop(t);
+		return (1);
+	}
+	return (0);
+}
+
+static void
+bstp_hello_timer_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if ((bp->bp_flags & BSTP_PORT_NEWINFO) ||
+	    bp->bp_role == BSTP_ROLE_DESIGNATED ||
+	    (bp->bp_role == BSTP_ROLE_ROOT &&
+	     bp->bp_tc_timer.active == 1)) {
+		bstp_timer_start(&bp->bp_hello_timer, bp->bp_desg_htime);
+		bp->bp_flags |= BSTP_PORT_NEWINFO;
+		bstp_transmit(bs, bp);
+	}
+}
+
+static void
+bstp_message_age_expiry(struct bstp_state *bs, struct bstp_port *bp)
+{
+	if (bp->bp_infois == BSTP_INFO_RECEIVED) {
+		bp->bp_infois = BSTP_INFO_AGED;
+		bstp_assign_roles(bs);
+		DPRINTF("aged info on %s\n", bp->bp_ifp->if_xname);
+	}
+}
+
+static void
+bstp_migrate_delay_expiry(__unused struct bstp_state *bs, struct bstp_port *bp)
+{
+	bp->bp_flags |= BSTP_PORT_CANMIGRATE;
+}
+
+static void
+bstp_edge_delay_expiry(__unused struct bstp_state *bs, struct bstp_port *bp)
+{
+	if ((bp->bp_flags & BSTP_PORT_AUTOEDGE) &&
+	    bp->bp_protover == BSTP_PROTO_RSTP && bp->bp_proposing &&
+	    bp->bp_role == BSTP_ROLE_DESIGNATED) {
+		bp->bp_operedge = 1;
+		DPRINTF("%s -> edge port\n", bp->bp_ifp->if_xname);
+	}
+}
+
+static int
+bstp_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+	int i, d;
+
+	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
+		d = ((int)a[i]) - ((int)b[i]);
+	}
+
+	return (d);
+}
+
+/*
+ * compare the bridge address component of the bridgeid
+ */
+static int
+bstp_same_bridgeid(uint64_t id1, uint64_t id2)
+{
+	u_char addr1[ETHER_ADDR_LEN];
+	u_char addr2[ETHER_ADDR_LEN];
+
+	PV2ADDR(id1, addr1);
+	PV2ADDR(id2, addr2);
+
+	if (bstp_addr_cmp(addr1, addr2) == 0)
+		return (1);
+
+	return (0);
+}
+
+void
+bstp_reinit(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+	struct ifnet *ifp, *mif;
+	u_char *e_addr;
+	static const u_char llzero[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
+
+	BSTP_LOCK_ASSERT(bs);
+
+	mif = NULL;
+	/*
+	 * Search through the Ethernet adapters and find the one with the
+	 * lowest value. The adapter which we take the MAC address from does
+	 * not need to be part of the bridge, it just needs to be a unique
+	 * value.
+	 */
+	ifnet_head_lock_shared();
+	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+		if (ifp->if_type != IFT_ETHER)
+			continue;
+
+		if (bstp_addr_cmp(ifnet_lladdr(ifp), llzero) == 0)
+			continue;
+
+		if (mif == NULL) {
+			mif = ifp;
+			continue;
+		}
+		if (bstp_addr_cmp(ifnet_lladdr(ifp), ifnet_lladdr(mif)) < 0) {
+			mif = ifp;
+			continue;
+		}
+	}
+	ifnet_head_done();
+
+	if (LIST_EMPTY(&bs->bs_bplist) || mif == NULL) {
+		/* Set the bridge and root id (lower bits) to zero */
+		bs->bs_bridge_pv.pv_dbridge_id =
+		    ((uint64_t)bs->bs_bridge_priority) << 48;
+		bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+		bs->bs_root_pv = bs->bs_bridge_pv;
+		/* Disable any remaining ports, they will have no MAC address */
+		LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+			bp->bp_infois = BSTP_INFO_DISABLED;
+			bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+		}
+		bsd_untimeout(bstp_tick, bs);
+		return;
+	}
+
+	e_addr = ifnet_lladdr(mif);
+	bs->bs_bridge_pv.pv_dbridge_id =
+	    (((uint64_t)bs->bs_bridge_priority) << 48) |
+	    (((uint64_t)e_addr[0]) << 40) |
+	    (((uint64_t)e_addr[1]) << 32) |
+	    (((uint64_t)e_addr[2]) << 24) |
+	    (((uint64_t)e_addr[3]) << 16) |
+	    (((uint64_t)e_addr[4]) << 8) |
+	    (((uint64_t)e_addr[5]));
+
+	bs->bs_bridge_pv.pv_root_id = bs->bs_bridge_pv.pv_dbridge_id;
+	bs->bs_bridge_pv.pv_cost = 0;
+	bs->bs_bridge_pv.pv_dport_id = 0;
+	bs->bs_bridge_pv.pv_port_id = 0;
+
+	if (bs->bs_running)
+		bsd_untimeout(bstp_tick, bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next) {
+		bp->bp_port_id = (bp->bp_priority << 8) |
+		    (bp->bp_ifp->if_index  & 0xfff);
+		bstp_ifupdstatus(bs, bp);
+	}
+
+	bstp_assign_roles(bs);
+	bstp_timer_start(&bs->bs_link_timer, BSTP_LINK_TIMER);
+}
+
+void
+bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
+{
+	BSTP_LOCK_INIT(bs);
+	LIST_INIT(&bs->bs_bplist);
+
+	bs->bs_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
+	bs->bs_bridge_htime = BSTP_DEFAULT_HELLO_TIME;
+	bs->bs_bridge_fdelay = BSTP_DEFAULT_FORWARD_DELAY;
+	bs->bs_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
+	bs->bs_hold_time = BSTP_DEFAULT_HOLD_TIME;
+	bs->bs_migration_delay = BSTP_DEFAULT_MIGRATE_DELAY;
+	bs->bs_txholdcount = BSTP_DEFAULT_HOLD_COUNT;
+	bs->bs_protover = BSTP_PROTO_RSTP;
+	bs->bs_state_cb = cb->bcb_state;
+	bs->bs_rtage_cb = cb->bcb_rtage;
+
+	/* reviewed for getmicrotime usage */
+	getmicrotime(&bs->bs_last_tc_time);
+
+	lck_mtx_lock(bstp_list_mtx);
+	LIST_INSERT_HEAD(&bstp_list, bs, bs_list);
+	lck_mtx_unlock(bstp_list_mtx);
+}
+
+void
+bstp_detach(struct bstp_state *bs)
+{
+	KASSERT(LIST_EMPTY(&bs->bs_bplist), ("bstp still active"));
+
+	lck_mtx_lock(bstp_list_mtx);
+	LIST_REMOVE(bs, bs_list);
+	lck_mtx_unlock(bstp_list_mtx);
+	bsd_untimeout(bstp_tick, bs);
+	BSTP_LOCK_DESTROY(bs);
+}
+
+void
+bstp_init(struct bstp_state *bs)
+{
+	struct timespec ts;
+
+	ts.tv_sec = 1;
+	ts.tv_nsec = 0;
+
+	BSTP_LOCK(bs);
+	bsd_timeout(bstp_tick, bs, &ts);
+	bs->bs_running = 1;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+}
+
+void
+bstp_stop(struct bstp_state *bs)
+{
+	struct bstp_port *bp;
+
+	BSTP_LOCK(bs);
+
+	LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
+		bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+
+	bs->bs_running = 0;
+	bsd_untimeout(bstp_tick, bs);
+	BSTP_UNLOCK(bs);
+}
+
+int
+bstp_create(struct bstp_state *bs, struct bstp_port *bp, struct ifnet *ifp)
+{
+	bzero(bp, sizeof(struct bstp_port));
+
+	BSTP_LOCK(bs);
+	bp->bp_ifp = ifp;
+	bp->bp_bs = bs;
+	bp->bp_priority = BSTP_DEFAULT_PORT_PRIORITY;
+	BSTP_TASK_INIT(&bp->bp_statetask, bstp_notify_state, bp);
+	BSTP_TASK_INIT(&bp->bp_rtagetask, bstp_notify_rtage, bp);
+
+	/* Init state */
+	bp->bp_infois = BSTP_INFO_DISABLED;
+	bp->bp_flags = BSTP_PORT_AUTOEDGE|BSTP_PORT_AUTOPTP;
+	bstp_set_port_state(bp, BSTP_IFSTATE_DISCARDING);
+	bstp_set_port_proto(bp, bs->bs_protover);
+	bstp_set_port_role(bp, BSTP_ROLE_DISABLED);
+	bstp_set_port_tc(bp, BSTP_TCSTATE_INACTIVE);
+	bp->bp_path_cost = bstp_calc_path_cost(bp);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+int
+bstp_enable(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+	struct ifnet *ifp = bp->bp_ifp;
+
+	KASSERT(bp->bp_active == 0, ("already a bstp member"));
+
+	switch (ifp->if_type) {
+		case IFT_ETHER:	/* These can do spanning tree. */
+			break;
+		default:
+			/* Nothing else can. */
+			return (EINVAL);
+	}
+
+	BSTP_LOCK(bs);
+	LIST_INSERT_HEAD(&bs->bs_bplist, bp, bp_next);
+	bp->bp_active = 1;
+	bp->bp_flags |= BSTP_PORT_NEWINFO;
+	bstp_reinit(bs);
+	bstp_update_roles(bs, bp);
+	BSTP_UNLOCK(bs);
+	return (0);
+}
+
+void
+bstp_disable(struct bstp_port *bp)
+{
+	struct bstp_state *bs = bp->bp_bs;
+
+	KASSERT(bp->bp_active == 1, ("not a bstp member"));
+
+	BSTP_LOCK(bs);
+	bstp_disable_port(bs, bp);
+	LIST_REMOVE(bp, bp_next);
+	bp->bp_active = 0;
+	bstp_reinit(bs);
+	BSTP_UNLOCK(bs);
+}
+
+/*
+ * The bstp_port structure is about to be freed by the parent bridge.
+ */
+void
+bstp_destroy(struct bstp_port *bp)
+{
+	KASSERT(bp->bp_active == 0, ("port is still attached"));
+	bstp_task_drain(&bp->bp_statetask);
+	bstp_task_drain(&bp->bp_rtagetask);
+}
+
+
+__private_extern__ void
+bstp_sys_init(void)
+{
+	lck_grp_attr_t *lck_grp_attr = NULL;
+
+	lck_grp_attr = lck_grp_attr_alloc_init();
+	bstp_lock_grp = lck_grp_alloc_init("bstp", lck_grp_attr);
+	bstp_lock_attr = lck_attr_alloc_init();
+#if BRIDGE_DEBUG
+	lck_attr_setdebug(bstp_lock_attr);
+#endif
+	bstp_list_mtx = lck_mtx_alloc_init(bstp_lock_grp, bstp_lock_attr);
+	lck_grp_attr_free(lck_grp_attr);
+
+	LIST_INIT(&bstp_list);
+
+	bstp_create_task_thread();
+}
+
+
+
+static void
+bstp_create_task_thread(void)
+{
+	kern_return_t error;
+	
+	lck_grp_attr_t *lck_grp_attr = NULL;
+
+	lck_grp_attr = lck_grp_attr_alloc_init();
+	bstp_task_grp = lck_grp_alloc_init("bstp_task", lck_grp_attr);
+	bstp_task_attr = lck_attr_alloc_init();
+#if BRIDGE_DEBUG
+	lck_attr_setdebug(bstp_task_attr);
+#endif
+	bstp_task_mtx = lck_mtx_alloc_init(bstp_lock_grp, bstp_lock_attr);
+	lck_grp_attr_free(lck_grp_attr);
+
+	error = kernel_thread_start((thread_continue_t)bstp_task_thread_func, NULL, &bstp_task_thread);
+}
+
+
+static void
+bstp_task_thread_func(void)
+{
+	struct bstp_task *bt, *tvar;
+
+	lck_mtx_lock(bstp_task_mtx);
+	
+	do {
+		while(TAILQ_EMPTY(&bstp_task_queue)) {
+			wakeup(&bstp_task_queue_running);
+			msleep(&bstp_task_queue, bstp_task_mtx, PZERO, "bstp_task_queue", NULL);
+		}
+	
+		TAILQ_FOREACH_SAFE(bt, &bstp_task_queue, bt_next, tvar) {	
+			int count = bt->bt_count;
+	
+			bt->bt_count = 0;
+		
+			bstp_task_queue_running = bt;
+			lck_mtx_unlock(bstp_task_mtx);
+			
+			(*bt->bt_func)(bt->bt_context, count);
+			
+			lck_mtx_lock(bstp_task_mtx);
+			bstp_task_queue_running = NULL;
+
+			if (bt->bt_count == 0)
+				TAILQ_REMOVE(&bstp_task_queue, bt, bt_next); 
+		}
+	} while (1);
+	
+	/* UNREACHED */
+}
+
+static void
+bstp_task_enqueue(struct bstp_task *bt)
+{
+	lck_mtx_lock(bstp_task_mtx);
+
+	if (bt->bt_count) {
+		bt->bt_count++;
+		lck_mtx_unlock(bstp_task_mtx);
+		wakeup(&bstp_task_queue);
+		return;
+	}
+	
+	bt->bt_count = 1;
+	TAILQ_INSERT_TAIL(&bstp_task_queue, bt, bt_next);
+	
+	lck_mtx_unlock(bstp_task_mtx);
+	
+	wakeup(&bstp_task_queue);
+}
+
+static void
+bstp_task_drain(struct bstp_task *bt)
+{
+	lck_mtx_lock(bstp_task_mtx);
+
+	while (bt->bt_count != 0 || bstp_task_queue_running == bt) {
+		wakeup(&bstp_task_queue);
+		msleep(&bstp_task_queue_running, bstp_task_mtx, PZERO, "bstp_task_queue", NULL);
+	}
+	lck_mtx_unlock(bstp_task_mtx);
+}
+
+
diff --git a/bsd/net/bridgestp.h b/bsd/net/bridgestp.h
new file mode 100644
index 000000000..a70f7aaba
--- /dev/null
+++ b/bsd/net/bridgestp.h
@@ -0,0 +1,441 @@
+/*	$NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $	*/
+
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed for the NetBSD Project by
+ *	Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __BRIDGESTP_H__
+#define __BRIDGESTP_H__
+
+/*
+ * Data structure and control definitions for STP interfaces.
+ */
+
+#include <sys/queue.h>
+#include <kern/locks.h>
+/* STP port states */
+#define	BSTP_IFSTATE_DISABLED	0
+#define	BSTP_IFSTATE_LISTENING	1
+#define	BSTP_IFSTATE_LEARNING	2
+#define	BSTP_IFSTATE_FORWARDING	3
+#define	BSTP_IFSTATE_BLOCKING	4
+#define	BSTP_IFSTATE_DISCARDING	5
+
+#define	BSTP_TCSTATE_ACTIVE	1
+#define	BSTP_TCSTATE_DETECTED	2
+#define	BSTP_TCSTATE_INACTIVE	3
+#define	BSTP_TCSTATE_LEARNING	4
+#define	BSTP_TCSTATE_PROPAG	5
+#define	BSTP_TCSTATE_ACK	6
+#define	BSTP_TCSTATE_TC		7
+#define	BSTP_TCSTATE_TCN	8
+
+#define	BSTP_ROLE_DISABLED	0
+#define	BSTP_ROLE_ROOT		1
+#define	BSTP_ROLE_DESIGNATED	2
+#define	BSTP_ROLE_ALTERNATE	3
+#define	BSTP_ROLE_BACKUP	4
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* STP port flags */
+#define	BSTP_PORT_CANMIGRATE	0x0001
+#define	BSTP_PORT_NEWINFO	0x0002
+#define	BSTP_PORT_DISPUTED	0x0004
+#define	BSTP_PORT_ADMCOST	0x0008
+#define	BSTP_PORT_AUTOEDGE	0x0010
+#define	BSTP_PORT_AUTOPTP	0x0020
+#define	BSTP_PORT_ADMEDGE	0x0040
+#define	BSTP_PORT_PNDCOST	0x0080
+
+/* BPDU priority */
+#define	BSTP_PDU_SUPERIOR	1
+#define	BSTP_PDU_REPEATED	2
+#define	BSTP_PDU_INFERIOR	3
+#define	BSTP_PDU_INFERIORALT	4
+#define	BSTP_PDU_OTHER		5
+
+/* BPDU flags */
+#define	BSTP_PDU_PRMASK		0x0c		/* Port Role */
+#define	BSTP_PDU_PRSHIFT	2		/* Port Role offset */
+#define	BSTP_PDU_F_UNKN		0x00		/* Unknown port    (00) */
+#define	BSTP_PDU_F_ALT		0x01		/* Alt/Backup port (01) */
+#define	BSTP_PDU_F_ROOT		0x02		/* Root port       (10) */
+#define	BSTP_PDU_F_DESG		0x03		/* Designated port (11) */
+
+#define	BSTP_PDU_STPMASK	0x81		/* strip unused STP flags */
+#define	BSTP_PDU_RSTPMASK	0x7f		/* strip unused RSTP flags */
+#define	BSTP_PDU_F_TC		0x01		/* Topology change */
+#define	BSTP_PDU_F_P		0x02		/* Proposal flag */
+#define	BSTP_PDU_F_L		0x10		/* Learning flag */
+#define	BSTP_PDU_F_F		0x20		/* Forwarding flag */
+#define	BSTP_PDU_F_A		0x40		/* Agreement flag */
+#define	BSTP_PDU_F_TCA		0x80		/* Topology change ack */
+
+/*
+ * Spanning tree defaults.
+ */
+#define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
+#define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
+#define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
+#define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
+#define	BSTP_DEFAULT_MIGRATE_DELAY	(3 * 256)
+#define	BSTP_DEFAULT_HOLD_COUNT		6
+#define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
+#define	BSTP_DEFAULT_PORT_PRIORITY	0x80
+#define	BSTP_DEFAULT_PATH_COST		55
+#define	BSTP_MIN_HELLO_TIME		(1 * 256)
+#define	BSTP_MIN_MAX_AGE		(6 * 256)
+#define	BSTP_MIN_FORWARD_DELAY		(4 * 256)
+#define	BSTP_MIN_HOLD_COUNT		1
+#define	BSTP_MAX_HELLO_TIME		(2 * 256)
+#define	BSTP_MAX_MAX_AGE		(40 * 256)
+#define	BSTP_MAX_FORWARD_DELAY		(30 * 256)
+#define	BSTP_MAX_HOLD_COUNT		10
+#define	BSTP_MAX_PRIORITY		61440
+#define	BSTP_MAX_PORT_PRIORITY		240
+#define	BSTP_MAX_PATH_COST		200000000
+
+/* BPDU message types */
+#define	BSTP_MSGTYPE_CFG	0x00		/* Configuration */
+#define	BSTP_MSGTYPE_RSTP	0x02		/* Rapid STP */
+#define	BSTP_MSGTYPE_TCN	0x80		/* Topology chg notification */
+
+/* Protocol versions */
+#define	BSTP_PROTO_ID		0x00
+#define	BSTP_PROTO_STP		0x00
+#define	BSTP_PROTO_RSTP		0x02
+#define	BSTP_PROTO_MAX		BSTP_PROTO_RSTP
+
+#define	BSTP_INFO_RECIEVED	1		/* compat */
+#define	BSTP_INFO_RECEIVED	1
+#define	BSTP_INFO_MINE		2
+#define	BSTP_INFO_AGED		3
+#define	BSTP_INFO_DISABLED	4
+
+
+#define	BSTP_MESSAGE_AGE_INCR	(1 * 256)	/* in 256ths of a second */
+#define	BSTP_TICK_VAL		(1 * 256)	/* in 256ths of a second */
+#define	BSTP_LINK_TIMER		(BSTP_TICK_VAL * 15)
+
+/*
+ * Driver callbacks for STP state changes
+ */
+typedef void (*bstp_state_cb_t)(struct ifnet *, int);
+typedef void (*bstp_rtage_cb_t)(struct ifnet *, int);
+struct bstp_cb_ops {
+	bstp_state_cb_t	bcb_state;
+	bstp_rtage_cb_t	bcb_rtage;
+};
+
+/*
+ * Because BPDU's do not make nicely aligned structures, two different
+ * declarations are used: bstp_?bpdu (wire representation, packed) and
+ * bstp_*_unit (internal, nicely aligned version).
+ */
+
+/* configuration bridge protocol data unit */
+struct bstp_cbpdu {
+	uint8_t		cbu_dsap;		/* LLC: destination sap */
+	uint8_t		cbu_ssap;		/* LLC: source sap */
+	uint8_t		cbu_ctl;		/* LLC: control */
+	uint16_t	cbu_protoid;		/* protocol id */
+	uint8_t		cbu_protover;		/* protocol version */
+	uint8_t		cbu_bpdutype;		/* message type */
+	uint8_t		cbu_flags;		/* flags (below) */
+
+	/* root id */
+	uint16_t	cbu_rootpri;		/* root priority */
+	uint8_t		cbu_rootaddr[6];	/* root address */
+
+	uint32_t	cbu_rootpathcost;	/* root path cost */
+
+	/* bridge id */
+	uint16_t	cbu_bridgepri;		/* bridge priority */
+	uint8_t		cbu_bridgeaddr[6];	/* bridge address */
+
+	uint16_t	cbu_portid;		/* port id */
+	uint16_t	cbu_messageage;		/* current message age */
+	uint16_t	cbu_maxage;		/* maximum age */
+	uint16_t	cbu_hellotime;		/* hello time */
+	uint16_t	cbu_forwarddelay;	/* forwarding delay */
+	uint8_t		cbu_versionlen;		/* version 1 length */
+} __attribute__((__packed__));
+#define	BSTP_BPDU_STP_LEN	(3 + 35)	/* LLC + STP pdu */
+#define	BSTP_BPDU_RSTP_LEN	(3 + 36)	/* LLC + RSTP pdu */
+
+/* topology change notification bridge protocol data unit */
+struct bstp_tbpdu {
+	uint8_t		tbu_dsap;		/* LLC: destination sap */
+	uint8_t		tbu_ssap;		/* LLC: source sap */
+	uint8_t		tbu_ctl;		/* LLC: control */
+	uint16_t	tbu_protoid;		/* protocol id */
+	uint8_t		tbu_protover;		/* protocol version */
+	uint8_t		tbu_bpdutype;		/* message type */
+} __attribute__((__packed__));
+
+/*
+ * Timekeeping structure used in spanning tree code.
+ */
+ 
+typedef void bstp_task_func_t(void *context, int count);
+ 
+struct bstp_task {
+	TAILQ_ENTRY(bstp_task)	bt_next;
+	int						bt_count;
+	bstp_task_func_t		*bt_func;
+	void					*bt_context;
+};
+
+struct bstp_timer {
+	int		active;
+	int		latched;
+	int		value;
+};
+
+struct bstp_pri_vector {
+	uint64_t		pv_root_id;
+	uint32_t		pv_cost;
+	uint64_t		pv_dbridge_id;
+	uint16_t		pv_dport_id;
+	uint16_t		pv_port_id;
+};
+
+struct bstp_config_unit {
+	struct bstp_pri_vector	cu_pv;
+	uint16_t	cu_message_age;
+	uint16_t	cu_max_age;
+	uint16_t	cu_forward_delay;
+	uint16_t	cu_hello_time;
+	uint8_t		cu_message_type;
+	uint8_t		cu_topology_change_ack;
+	uint8_t		cu_topology_change;
+	uint8_t		cu_proposal;
+	uint8_t		cu_agree;
+	uint8_t		cu_learning;
+	uint8_t		cu_forwarding;
+	uint8_t		cu_role;
+};
+
+struct bstp_tcn_unit {
+	uint8_t		tu_message_type;
+};
+
+struct bstp_port {
+	LIST_ENTRY(bstp_port)	bp_next;
+	struct ifnet		*bp_ifp;	/* parent if */
+	struct bstp_state	*bp_bs;
+	uint8_t			bp_active;
+	uint8_t			bp_protover;
+	uint32_t		bp_flags;
+	uint32_t		bp_path_cost;
+	uint16_t		bp_port_msg_age;
+	uint16_t		bp_port_max_age;
+	uint16_t		bp_port_fdelay;
+	uint16_t		bp_port_htime;
+	uint16_t		bp_desg_msg_age;
+	uint16_t		bp_desg_max_age;
+	uint16_t		bp_desg_fdelay;
+	uint16_t		bp_desg_htime;
+	struct bstp_timer	bp_edge_delay_timer;
+	struct bstp_timer	bp_forward_delay_timer;
+	struct bstp_timer	bp_hello_timer;
+	struct bstp_timer	bp_message_age_timer;
+	struct bstp_timer	bp_migrate_delay_timer;
+	struct bstp_timer	bp_recent_backup_timer;
+	struct bstp_timer	bp_recent_root_timer;
+	struct bstp_timer	bp_tc_timer;
+	struct bstp_config_unit bp_msg_cu;
+	struct bstp_pri_vector	bp_desg_pv;
+	struct bstp_pri_vector	bp_port_pv;
+	uint16_t		bp_port_id;
+	uint8_t			bp_state;
+	uint8_t			bp_tcstate;
+	uint8_t			bp_role;
+	uint8_t			bp_infois;
+	uint8_t			bp_tc_ack;
+	uint8_t			bp_tc_prop;
+	uint8_t			bp_fdbflush;
+	uint8_t			bp_priority;
+	uint8_t			bp_ptp_link;
+	uint8_t			bp_agree;
+	uint8_t			bp_agreed;
+	uint8_t			bp_sync;
+	uint8_t			bp_synced;
+	uint8_t			bp_proposing;
+	uint8_t			bp_proposed;
+	uint8_t			bp_operedge;
+	uint8_t			bp_reroot;
+	uint8_t			bp_rcvdtc;
+	uint8_t			bp_rcvdtca;
+	uint8_t			bp_rcvdtcn;
+	uint32_t		bp_forward_transitions;
+	uint8_t			bp_txcount;
+	struct bstp_task	bp_statetask;
+	struct bstp_task	bp_rtagetask;
+	uint32_t		bp_if_link_state;	/* cache of the parent if link state */
+};
+
+/*
+ * Values for bp_if_link_state.
+ */
+#define LINK_STATE_UNKNOWN      0       /* link invalid/unknown */
+#define LINK_STATE_DOWN         1       /* link is down */
+#define LINK_STATE_UP           2       /* link is up */
+
+/*
+ * Software state for each bridge STP.
+ */
+struct bstp_state {
+	LIST_ENTRY(bstp_state)	bs_list;
+	uint8_t			bs_running;
+	lck_mtx_t		*bs_mtx;
+	struct bstp_pri_vector	bs_bridge_pv;
+	struct bstp_pri_vector	bs_root_pv;
+	struct bstp_port	*bs_root_port;
+	uint8_t			bs_protover;
+	uint16_t		bs_migration_delay;
+	uint16_t		bs_edge_delay;
+	uint16_t		bs_bridge_max_age;
+	uint16_t		bs_bridge_fdelay;
+	uint16_t		bs_bridge_htime;
+	uint16_t		bs_root_msg_age;
+	uint16_t		bs_root_max_age;
+	uint16_t		bs_root_fdelay;
+	uint16_t		bs_root_htime;
+	uint16_t		bs_hold_time;
+	uint16_t		bs_bridge_priority;
+	uint8_t			bs_txholdcount;
+	uint8_t			bs_allsynced;
+	struct bstp_timer	bs_link_timer;
+	struct timeval		bs_last_tc_time;
+	LIST_HEAD(, bstp_port)	bs_bplist;
+	bstp_state_cb_t		bs_state_cb;
+	bstp_rtage_cb_t		bs_rtage_cb;
+};
+
+extern const uint8_t bstp_etheraddr[];
+
+void	bstp_attach(struct bstp_state *, struct bstp_cb_ops *);
+void	bstp_detach(struct bstp_state *);
+void	bstp_init(struct bstp_state *);
+void	bstp_stop(struct bstp_state *);
+int	bstp_create(struct bstp_state *, struct bstp_port *, struct ifnet *);
+int	bstp_enable(struct bstp_port *);
+void	bstp_disable(struct bstp_port *);
+void	bstp_destroy(struct bstp_port *);
+void	bstp_linkstate(struct ifnet *, int);
+int	bstp_set_htime(struct bstp_state *, int);
+int	bstp_set_fdelay(struct bstp_state *, int);
+int	bstp_set_maxage(struct bstp_state *, int);
+int	bstp_set_holdcount(struct bstp_state *, int);
+int	bstp_set_protocol(struct bstp_state *, int);
+int	bstp_set_priority(struct bstp_state *, int);
+int	bstp_set_port_priority(struct bstp_port *, int);
+int	bstp_set_path_cost(struct bstp_port *, uint32_t);
+int	bstp_set_edge(struct bstp_port *, int);
+int	bstp_set_autoedge(struct bstp_port *, int);
+int	bstp_set_ptp(struct bstp_port *, int);
+int	bstp_set_autoptp(struct bstp_port *, int);
+struct mbuf *bstp_input(struct bstp_port *, struct ifnet *, struct mbuf *);
+
+void bstp_sys_init(void);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* __BRIDGESTP_H__ */
+
diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c
index 848b3b3f1..272388f02 100644
--- a/bsd/net/dlil.c
+++ b/bsd/net/dlil.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,10 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- *	Data Link Inteface Layer
- *	Author: Ted Walker
- */
 /*
  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  * support for mandatory and extensible security protections.  This notice
@@ -53,22 +49,41 @@
 #include <net/if_arp.h>
 #include <sys/kern_event.h>
 #include <sys/kdebug.h>
+#include <sys/mcache.h>
 
 #include <kern/assert.h>
 #include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/sched_prim.h>
 #include <kern/locks.h>
+#include <kern/zalloc.h>
 #include <net/kpi_protocol.h>
 
 #include <net/if_types.h>
+#include <net/if_llreach.h>
 #include <net/kpi_interfacefilter.h>
 
+#if INET
+#include <netinet/in_var.h>
+#include <netinet/igmp_var.h>
+#endif /* INET */
+
+#if INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet6/mld6_var.h>
+#endif /* INET6 */
+
+#if NETAT
+#include <netat/at_var.h>
+#endif /* NETAT */
+
 #include <libkern/OSAtomic.h>
 
 #include <machine/machine_routines.h>
 
 #include <mach/thread_act.h>
+#include <mach/sdt.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -78,8 +93,8 @@
 #include <net/pfvar.h>
 #endif /* PF */
 
-#define DBG_LAYER_BEG			DLILDBG_CODE(DBG_DLIL_STATIC, 0)
-#define DBG_LAYER_END			DLILDBG_CODE(DBG_DLIL_STATIC, 2)
+#define DBG_LAYER_BEG		DLILDBG_CODE(DBG_DLIL_STATIC, 0)
+#define DBG_LAYER_END		DLILDBG_CODE(DBG_DLIL_STATIC, 2)
 #define DBG_FNC_DLIL_INPUT      DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
 #define DBG_FNC_DLIL_OUTPUT     DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
 #define DBG_FNC_DLIL_IFOUT      DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
@@ -95,48 +110,51 @@
 #define DLIL_PRINTF	kprintf
 #endif
 
-#define atomic_add_32(a, n)						\
-	((void) OSAddAtomic(n, (volatile SInt32 *)a))
-
-#if PKT_PRIORITY
 #define	_CASSERT(x)	\
 	switch (0) { case 0: case (x): ; }
 
-#define	IF_DATA_REQUIRE_ALIGNED_32(f)	\
-	_CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int32_t)))
+#define	IF_DATA_REQUIRE_ALIGNED_64(f)	\
+	_CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
 
-#define	IFNET_IF_DATA_REQUIRE_ALIGNED_32(f)	\
-	_CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int32_t)))
-#endif /* PKT_PRIORITY */
+#define	IFNET_IF_DATA_REQUIRE_ALIGNED_64(f)	\
+	_CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
+
+#define IFNET_IF_TC_REQUIRE_ALIGNED_64(f) \
+	_CASSERT(!(offsetof(struct ifnet, if_tc.f) % sizeof (u_int64_t)))
 
 enum {
 	kProtoKPI_v1	= 1,
 	kProtoKPI_v2	= 2
 };
 
+/*
+ * List of if_proto structures in if_proto_hash[] is protected by
+ * the ifnet lock.  The rest of the fields are initialized at protocol
+ * attach time and never change, thus no lock required as long as
+ * a reference to it is valid, via if_proto_ref().
+ */
 struct if_proto {
-    SLIST_ENTRY(if_proto)	 next_hash;
-    int						 refcount;
-    int						 detaching;
-    struct ifnet			 *ifp;
-    struct domain			 *dl_domain;
+    SLIST_ENTRY(if_proto)	next_hash;
+    u_int32_t			refcount;
+    u_int32_t			detached;
+    struct ifnet		*ifp;
     protocol_family_t		protocol_family;
-    int						proto_kpi;
+    int				proto_kpi;
     union {
 		struct {
-			proto_media_input			input;
-			proto_media_preout			pre_output;
-			proto_media_event			event;
-			proto_media_ioctl			ioctl;
+			proto_media_input		input;
+			proto_media_preout		pre_output;
+			proto_media_event		event;
+			proto_media_ioctl		ioctl;
 			proto_media_detached		detached;
 			proto_media_resolve_multi	resolve_multi;
 			proto_media_send_arp		send_arp;
 		} v1;
 		struct {
 			proto_media_input_v2		input;
-			proto_media_preout			pre_output;
-			proto_media_event			event;
-			proto_media_ioctl			ioctl;
+			proto_media_preout		pre_output;
+			proto_media_event		event;
+			proto_media_ioctl		ioctl;
 			proto_media_detached		detached;
 			proto_media_resolve_multi	resolve_multi;
 			proto_media_send_arp		send_arp;
@@ -146,51 +164,118 @@ struct if_proto {
 
 SLIST_HEAD(proto_hash_entry, if_proto);
 
+#define	DLIL_SDLMAXLEN	64
+#define	DLIL_SDLDATALEN	\
+	(DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
 
 struct dlil_ifnet {
-    /* ifnet and drvr_ext are used by the stack and drivers
-    drvr_ext extends the public ifnet and must follow dl_if */
-    struct ifnet	dl_if;			/* public ifnet */
-    
-    /* dlil private fields */
-    TAILQ_ENTRY(dlil_ifnet) dl_if_link;	/* dlil_ifnet are link together */
-    								/* it is not the ifnet list */
-    void		*if_uniqueid;	/* unique id identifying the interface */
-    size_t		if_uniqueid_len;/* length of the unique id */
-    char		if_namestorage[IFNAMSIZ]; /* interface name storage */
+	struct ifnet	dl_if;			/* public ifnet */
+	/*
+	 * dlil private fields, protected by dl_if_lock
+	 */
+	decl_lck_mtx_data(, dl_if_lock);
+	TAILQ_ENTRY(dlil_ifnet) dl_if_link;	/* dlil_ifnet link */
+	u_int32_t dl_if_flags;			/* flags (below) */
+	u_int32_t dl_if_refcnt;			/* refcnt */
+	void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
+	void	*dl_if_uniqueid;		/* unique interface id */
+	size_t	dl_if_uniqueid_len;		/* length of the unique id */
+	char	dl_if_namestorage[IFNAMSIZ];	/* interface name storage */
+	struct {
+		struct ifaddr	ifa;		/* lladdr ifa */
+		u_int8_t	asdl[DLIL_SDLMAXLEN]; /* addr storage */
+		u_int8_t	msdl[DLIL_SDLMAXLEN]; /* mask storage */
+	} dl_if_lladdr;
+	ctrace_t	dl_if_attach;		/* attach PC stacktrace */
+	ctrace_t	dl_if_detach;		/* detach PC stacktrace */
+};
+
+/* Values for dl_if_flags (private to DLIL) */
+#define	DLIF_INUSE	0x1	/* DLIL ifnet recycler, ifnet in use */
+#define	DLIF_REUSE	0x2	/* DLIL ifnet recycles, ifnet is not new */
+#define	DLIF_DEBUG	0x4	/* has debugging info */
+
+#define	IF_REF_TRACE_HIST_SIZE	8	/* size of ref trace history */
+
+/* For gdb */
+__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
+
+struct dlil_ifnet_dbg {
+	struct dlil_ifnet	dldbg_dlif;		/* dlil_ifnet */
+	u_int16_t		dldbg_if_refhold_cnt;	/* # ifnet references */
+	u_int16_t		dldbg_if_refrele_cnt;	/* # ifnet releases */
+	/*
+	 * Circular lists of ifnet_{reference,release} callers.
+	 */
+	ctrace_t		dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
+	ctrace_t		dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
 };
 
+#define	DLIL_TO_IFP(s)	(&s->dl_if)
+#define	IFP_TO_DLIL(s)	((struct dlil_ifnet *)s)
+
 struct ifnet_filter {
 	TAILQ_ENTRY(ifnet_filter)	filt_next;
-    ifnet_t						filt_ifp;
-    int							filt_detaching;
-    
-	const char					*filt_name;
-	void						*filt_cookie;
-    protocol_family_t			filt_protocol;
-    iff_input_func				filt_input;
-    iff_output_func				filt_output;
-    iff_event_func				filt_event;
-    iff_ioctl_func				filt_ioctl;
-    iff_detached_func			filt_detached;
+	u_int32_t			filt_skip;
+	ifnet_t				filt_ifp;
+	const char			*filt_name;
+	void				*filt_cookie;
+	protocol_family_t		filt_protocol;
+	iff_input_func			filt_input;
+	iff_output_func			filt_output;
+	iff_event_func			filt_event;
+	iff_ioctl_func			filt_ioctl;
+	iff_detached_func		filt_detached;
 };
 
 struct proto_input_entry;
 
 static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
 static lck_grp_t *dlil_lock_group;
-static lck_grp_t *ifnet_lock_group;
+lck_grp_t *ifnet_lock_group;
 static lck_grp_t *ifnet_head_lock_group;
-static lck_attr_t *ifnet_lock_attr;
-static lck_rw_t *ifnet_head_mutex;
-static lck_mtx_t *dlil_ifnet_mutex;
-static lck_mtx_t *dlil_mutex;
-static u_int32_t dlil_read_count = 0;
-static u_int32_t dlil_detach_waiting = 0;
+lck_attr_t *ifnet_lock_attr;
+decl_lck_rw_data(, ifnet_head_lock);
+decl_lck_mtx_data(, dlil_ifnet_lock);
 u_int32_t dlil_filter_count = 0;
 extern u_int32_t	ipv4_ll_arp_aware;
 
-#if IFNET_ROUTE_REFCNT
+#if DEBUG
+static unsigned int ifnet_debug = 1;	/* debugging (enabled) */
+#else
+static unsigned int ifnet_debug;	/* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int dlif_size;		/* size of dlil_ifnet to allocate */
+static unsigned int dlif_bufsize;	/* size of dlif_size + headroom */
+static struct zone *dlif_zone;		/* zone for dlil_ifnet */
+
+#define	DLIF_ZONE_MAX		64		/* maximum elements in zone */
+#define	DLIF_ZONE_NAME		"ifnet"		/* zone name */
+
+static unsigned int dlif_filt_size;	/* size of ifnet_filter */
+static struct zone *dlif_filt_zone;	/* zone for ifnet_filter */
+
+#define	DLIF_FILT_ZONE_MAX	8		/* maximum elements in zone */
+#define	DLIF_FILT_ZONE_NAME	"ifnet_filter"	/* zone name */
+
+static unsigned int dlif_inp_size;	/* size of dlil_threading_info */
+static struct zone *dlif_inp_zone;	/* zone for dlil_threading_info */
+
+#define	DLIF_INP_ZONE_MAX	DLIF_ZONE_MAX	/* maximum elements in zone */
+#define	DLIF_INP_ZONE_NAME	"ifnet_thread"	/* zone name */
+
+static unsigned int dlif_phash_size;	/* size of ifnet proto hash table */
+static struct zone *dlif_phash_zone;	/* zone for ifnet proto hash table */
+
+#define	DLIF_PHASH_ZONE_MAX	DLIF_ZONE_MAX	/* maximum elements in zone */
+#define	DLIF_PHASH_ZONE_NAME	"ifnet_proto_hash" /* zone name */
+
+static unsigned int dlif_proto_size;	/* size of if_proto */
+static struct zone *dlif_proto_zone;	/* zone for if_proto */
+
+#define	DLIF_PROTO_ZONE_MAX	(DLIF_ZONE_MAX*2) /* maximum elements in zone */
+#define	DLIF_PROTO_ZONE_NAME	"ifnet_proto"	/* zone name */
+
 /*
  * Updating this variable should be done by first acquiring the global
  * radix node head (rnh_lock), in tandem with settting/clearing the
@@ -198,7 +283,6 @@ extern u_int32_t	ipv4_ll_arp_aware;
  */
 u_int32_t ifnet_aggressive_drainers;
 static u_int32_t net_rtref;
-#endif /* IFNET_ROUTE_REFCNT */
 
 static struct dlil_threading_info dlil_lo_thread;
 __private_extern__  struct dlil_threading_info *dlil_lo_thread_ptr = &dlil_lo_thread;
@@ -206,135 +290,117 @@ __private_extern__  struct dlil_threading_info *dlil_lo_thread_ptr = &dlil_lo_th
 static struct mbuf *dlil_lo_input_mbuf_head = NULL;
 static struct mbuf *dlil_lo_input_mbuf_tail = NULL;
 
-#if IFNET_INPUT_SANITY_CHK
-static int dlil_lo_input_mbuf_count = 0;
-int dlil_input_sanity_check = 0;	/* sanity checking of input packet lists received */
-#endif
-int dlil_multithreaded_input = 1;
-static int cur_dlil_input_threads = 0; 
-
 static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg);
 static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
-static void dlil_call_delayed_detach_thread(void);
+static void dlil_if_trace(struct dlil_ifnet *, int);
+static void if_proto_ref(struct if_proto *);
+static void if_proto_free(struct if_proto *);
+static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
+static int dlil_ifp_proto_count(struct ifnet *);
+static void if_flt_monitor_busy(struct ifnet *);
+static void if_flt_monitor_unbusy(struct ifnet *);
+static void if_flt_monitor_enter(struct ifnet *);
+static void if_flt_monitor_leave(struct ifnet *);
+static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
+    char **, protocol_family_t);
+static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
+    protocol_family_t);
+static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
+    const struct sockaddr_dl *);
+static int ifnet_lookup(struct ifnet *);
+static void if_purgeaddrs(struct ifnet *);
+
+static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
+    struct mbuf *, char *);
+static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
+    struct mbuf *);
+static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
+    mbuf_t *, const struct sockaddr *, void *, char *, char *);
+static void ifproto_media_event(struct ifnet *, protocol_family_t,
+    const struct kev_msg *);
+static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
+    unsigned long, void *);
+static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
+    struct sockaddr_dl *, size_t);
+static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
+    const struct sockaddr_dl *, const struct sockaddr *,
+    const struct sockaddr_dl *, const struct sockaddr *);
+
+static errno_t ifp_if_output(struct ifnet *, struct mbuf *);
+static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
+    protocol_family_t *);
+static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
+    const struct ifnet_demux_desc *, u_int32_t);
+static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
+static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
+static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
+    const struct sockaddr *, const char *, const char *);
+static errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
+static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
+static void ifp_if_free(struct ifnet *);
+static void ifp_if_event(struct ifnet *, const struct kev_msg *);
+
+static void dlil_input_thread_func(struct dlil_threading_info *inpthread);
+static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
+
+static void ifnet_delayed_thread_func(void);
+static void ifnet_detach_final(struct ifnet *);
+static void ifnet_detaching_enqueue(struct ifnet *);
+static struct ifnet *ifnet_detaching_dequeue(void);
+
+static void ifp_src_route_copyout(struct ifnet *, struct route *);
+static void ifp_src_route_copyin(struct ifnet *, struct route *);
+#if INET6
+static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
+static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
+#endif /* INET6 */
+
+/* The following are protected by dlil_ifnet_lock */
+static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
+static u_int32_t ifnet_detaching_cnt;
+static void *ifnet_delayed_run;	/* wait channel for detaching thread */
+
+extern void bpfdetach(struct ifnet*);
+extern void proto_input_run(void);
 
-static void	dlil_read_begin(void);
-static __inline__ void	dlil_read_end(void);
-static int	dlil_write_begin(void);
-static void	dlil_write_end(void);
+__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
 
 #if DEBUG
-__private_extern__ int dlil_verbose = 1;
+static int dlil_verbose = 1;
 #else
-__private_extern__ int dlil_verbose = 0;
+static int dlil_verbose = 0;
 #endif /* DEBUG */
+static int dlil_multithreaded_input = 1;
+static int cur_dlil_input_threads = 0;
+#if IFNET_INPUT_SANITY_CHK
+static int dlil_lo_input_mbuf_count = 0;
+/* sanity checking of input packet lists received */
+static int dlil_input_sanity_check = 0;
+#endif
 
-unsigned int net_affinity = 1;
-static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
+SYSCTL_DECL(_net_link_generic_system);
 
-extern void bpfdetach(struct ifnet*);
-extern void proto_input_run(void); // new run_netisr
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW,
+    &dlil_verbose, 0, "Log DLIL error messages");
 
-void dlil_input_packet_list(struct ifnet  *ifp, struct mbuf *m);
-static void dlil_input_thread_func(struct dlil_threading_info *inpthread); 
-__private_extern__ int dlil_create_input_thread(
-		ifnet_t, struct dlil_threading_info *);
-__private_extern__ void dlil_terminate_input_thread(
-		struct dlil_threading_info *);
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, multi_threaded_input, CTLFLAG_RW,
+    &dlil_multithreaded_input , 0, "Uses multiple input thread for DLIL input");
 
-__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
+#if IFNET_INPUT_SANITY_CHK
+SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
+    CTLFLAG_RW, &dlil_input_sanity_check , 0,
+    "Turn on sanity checking in DLIL input");
+#endif
 
-int dlil_expand_mcl;
+unsigned int net_affinity = 1;
+static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
 
 extern u_int32_t	inject_buckets;
 
-static const u_int32_t dlil_writer_waiting = 0x80000000;
 static	lck_grp_attr_t	*dlil_grp_attributes = NULL;
 static	lck_attr_t	*dlil_lck_attributes = NULL;
 static	lck_grp_t	*dlil_input_lock_grp = NULL;
 
-static inline void*
-_cast_non_const(const void * ptr) {
-	union {
-		const void*		cval;
-		void*			val;
-	} ret;
-	
-	ret.cval = ptr;
-	return (ret.val);
-}
-
-/* Should these be inline? */
-static void
-dlil_read_begin(void)
-{
-	u_int32_t new_value;
-	u_int32_t old_value;
-	struct uthread *uth = get_bsdthread_info(current_thread());
-	
-	if (uth->dlil_incremented_read == dlil_writer_waiting)
-		panic("dlil_read_begin - thread is already a writer");
-	
-	do {
-again:
-		old_value = dlil_read_count;
-		
-		if ((old_value & dlil_writer_waiting) != 0 && uth->dlil_incremented_read == 0)
-		{
-			tsleep(&dlil_read_count, PRIBIO, "dlil_read_count", 1);
-			goto again;
-		}
-		
-		new_value = old_value + 1;
-	} while (!OSCompareAndSwap((UInt32)old_value, (UInt32)new_value, (UInt32*)&dlil_read_count));
-	
-	uth->dlil_incremented_read++;
-}
-
-static void
-dlil_read_end(void)
-{
-	struct uthread *uth = get_bsdthread_info(current_thread());
-	
-	OSDecrementAtomic(&dlil_read_count);
-	uth->dlil_incremented_read--;
-	if (dlil_read_count == dlil_writer_waiting)
-		wakeup(_cast_non_const(&dlil_writer_waiting));
-}
-
-static int
-dlil_write_begin(void)
-{
-	struct uthread *uth = get_bsdthread_info(current_thread());
-	
-	if (uth->dlil_incremented_read != 0) {
-		return EDEADLK;
-	}
-	lck_mtx_lock(dlil_mutex);
-	OSBitOrAtomic((UInt32)dlil_writer_waiting, &dlil_read_count);
-again:
-	if (dlil_read_count == dlil_writer_waiting) {
-		uth->dlil_incremented_read = dlil_writer_waiting;
-		return 0;
-	}
-	else {
-		tsleep(_cast_non_const(&dlil_writer_waiting), PRIBIO, "dlil_writer_waiting", 1);
-		goto again;
-	}
-}
-
-static void
-dlil_write_end(void)
-{
-	struct uthread *uth = get_bsdthread_info(current_thread());
-	
-	if (uth->dlil_incremented_read != dlil_writer_waiting)
-		panic("dlil_write_end - thread is not a writer");
-	OSBitAndAtomic((UInt32)~dlil_writer_waiting, &dlil_read_count);
-	lck_mtx_unlock(dlil_mutex);
-	uth->dlil_incremented_read = 0;
-	wakeup(&dlil_read_count);
-}
-
 #define PROTO_HASH_SLOTS	0x5
 
 /*
@@ -351,192 +417,248 @@ proto_hash_value(u_int32_t protocol_family)
 	 */
 	switch(protocol_family) {
 		case PF_INET:
-			return 0;
+			return (0);
 		case PF_INET6:
-			return 1;
+			return (1);
 		case PF_APPLETALK:
-			return 2;
+			return (2);
 		case PF_VLAN:
-			return 3;
+			return (3);
+		case PF_UNSPEC:
 		default:
-			return 4;
+			return (4);
 	}
 }
 
-static struct if_proto*
+/*
+ * Caller must already be holding ifnet lock.
+ */
+static struct if_proto *
 find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
 {
 	struct if_proto *proto = NULL;
 	u_int32_t i = proto_hash_value(protocol_family);
-	if (ifp->if_proto_hash) {
+
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
+
+	if (ifp->if_proto_hash != NULL)
 		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
-	}
-	
-	while(proto && proto->protocol_family != protocol_family) {
+
+	while (proto != NULL && proto->protocol_family != protocol_family)
 		proto = SLIST_NEXT(proto, next_hash);
-	}
-	
-	return proto;
+
+	if (proto != NULL)
+		if_proto_ref(proto);
+
+	return (proto);
 }
 
 static void
 if_proto_ref(struct if_proto *proto)
 {
-	OSAddAtomic(1, &proto->refcount);
+	atomic_add_32(&proto->refcount, 1);
 }
 
+extern void if_rtproto_del(struct ifnet *ifp, int protocol);
+
 static void
 if_proto_free(struct if_proto *proto)
 {
-	int oldval = OSAddAtomic(-1, &proto->refcount);
-	
-	if (oldval == 1) { /* This was the last reference */
-		FREE(proto, M_IFADDR);
+	u_int32_t oldval;
+	struct ifnet *ifp = proto->ifp;
+	u_int32_t proto_family = proto->protocol_family;
+	struct kev_dl_proto_data ev_pr_data;
+
+	oldval = atomic_add_32_ov(&proto->refcount, -1);
+	if (oldval > 1)
+		return;
+
+	/* No more reference on this, protocol must have been detached */
+	VERIFY(proto->detached);
+
+	if (proto->proto_kpi == kProtoKPI_v1) {
+		if (proto->kpi.v1.detached)
+			proto->kpi.v1.detached(ifp, proto->protocol_family);
+	}
+	if (proto->proto_kpi == kProtoKPI_v2) {
+		if (proto->kpi.v2.detached)
+			proto->kpi.v2.detached(ifp, proto->protocol_family);
 	}
+
+	/*
+	 * Cleanup routes that may still be in the routing table for that
+	 * interface/protocol pair.
+	 */
+	if_rtproto_del(ifp, proto_family);
+
+	/*
+	 * The reserved field carries the number of protocol still attached
+	 * (subject to change)
+	 */
+	ifnet_lock_shared(ifp);
+	ev_pr_data.proto_family = proto_family;
+	ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
+	ifnet_lock_done(ifp);
+
+	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
+	    (struct net_event_data *)&ev_pr_data,
+	    sizeof(struct kev_dl_proto_data));
+
+	zfree(dlif_proto_zone, proto);
 }
 
 __private_extern__ void
-ifnet_lock_assert(
-	__unused struct ifnet *ifp,
-	__unused int what)
+ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
 {
-#if IFNET_RW_LOCK
-	/*
-	 * Not implemented for rw locks.
-	 *
-	 * Function exists so when/if we use mutex we can
-	 * enable this check.
-	 */
-#else
-	lck_mtx_assert(ifp->if_lock, what);
-#endif
+	unsigned int type = 0;
+	int ass = 1;
+
+	switch (what) {
+	case IFNET_LCK_ASSERT_EXCLUSIVE:
+		type = LCK_RW_ASSERT_EXCLUSIVE;
+		break;
+
+	case IFNET_LCK_ASSERT_SHARED:
+		type = LCK_RW_ASSERT_SHARED;
+		break;
+
+	case IFNET_LCK_ASSERT_OWNED:
+		type = LCK_RW_ASSERT_HELD;
+		break;
+
+	case IFNET_LCK_ASSERT_NOTOWNED:
+		/* nothing to do here for RW lock; bypass assert */
+		ass = 0;
+		break;
+
+	default:
+		panic("bad ifnet assert type: %d", what);
+		/* NOTREACHED */
+	}
+	if (ass)
+		lck_rw_assert(&ifp->if_lock, type);
 }
 
 __private_extern__ void
-ifnet_lock_shared(
-	struct ifnet *ifp)
+ifnet_lock_shared(struct ifnet *ifp)
 {
-#if IFNET_RW_LOCK
-	lck_rw_lock_shared(ifp->if_lock);
-#else
-	lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED);
-	lck_mtx_lock(ifp->if_lock);
-#endif
+	lck_rw_lock_shared(&ifp->if_lock);
 }
 
 __private_extern__ void
-ifnet_lock_exclusive(
-	struct ifnet *ifp)
+ifnet_lock_exclusive(struct ifnet *ifp)
 {
-#if IFNET_RW_LOCK
-	lck_rw_lock_exclusive(ifp->if_lock);
-#else
-	lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED);
-	lck_mtx_lock(ifp->if_lock);
-#endif
+	lck_rw_lock_exclusive(&ifp->if_lock);
 }
 
 __private_extern__ void
-ifnet_lock_done(
-	struct ifnet *ifp)
+ifnet_lock_done(struct ifnet *ifp)
 {
-#if IFNET_RW_LOCK
-	lck_rw_done(ifp->if_lock);
-#else
-	lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_OWNED);
-	lck_mtx_unlock(ifp->if_lock);
-#endif
+	lck_rw_done(&ifp->if_lock);
 }
 
 __private_extern__ void
 ifnet_head_lock_shared(void)
 {
-	lck_rw_lock_shared(ifnet_head_mutex);
+	lck_rw_lock_shared(&ifnet_head_lock);
 }
 
 __private_extern__ void
 ifnet_head_lock_exclusive(void)
 {
-	lck_rw_lock_exclusive(ifnet_head_mutex);
+	lck_rw_lock_exclusive(&ifnet_head_lock);
 }
 
 __private_extern__ void
 ifnet_head_done(void)
 {
-	lck_rw_done(ifnet_head_mutex);
+	lck_rw_done(&ifnet_head_lock);
 }
 
-static int dlil_ifp_proto_count(struct ifnet * ifp) 
+/*
+ * Caller must already be holding ifnet lock.
+ */
+static int
+dlil_ifp_proto_count(struct ifnet * ifp)
 {
-	int				count = 0;
-	int				i;
-	
-	if (ifp->if_proto_hash != NULL) {	
-		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
-			struct if_proto *proto;
-			SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
-				count++;
-			}
+	int i, count = 0;
+
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
+
+	if (ifp->if_proto_hash == NULL)
+		goto done;
+
+	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+		struct if_proto *proto;
+		SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
+			count++;
 		}
 	}
-	
-	return count;
+done:
+	return (count);
 }
 
 __private_extern__ void
-dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, u_int32_t event_code, 
-		   struct net_event_data *event_data, u_int32_t event_data_len) 
+dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
+    u_int32_t event_code, struct net_event_data *event_data,
+    u_int32_t event_data_len)
 {
-	struct net_event_data  	ev_data;
-	struct kev_msg  		ev_msg;
-	
-	/* 
+	struct net_event_data ev_data;
+	struct kev_msg ev_msg;
+
+	bzero(&ev_msg, sizeof (ev_msg));
+	bzero(&ev_data, sizeof (ev_data));
+	/*
 	 * a net event always starts with a net_event_data structure
 	 * but the caller can generate a simple net event or
 	 * provide a longer event structure to post
 	 */
-	
-	ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-	ev_msg.kev_class      = KEV_NETWORK_CLASS;
-	ev_msg.kev_subclass   = event_subclass;
-	ev_msg.event_code 	  = event_code;    
-	
-	if (event_data == 0) {
+	ev_msg.vendor_code	= KEV_VENDOR_APPLE;
+	ev_msg.kev_class	= KEV_NETWORK_CLASS;
+	ev_msg.kev_subclass	= event_subclass;
+	ev_msg.event_code	= event_code;
+
+	if (event_data == NULL) {
 		event_data = &ev_data;
 		event_data_len = sizeof(struct net_event_data);
 	}
-	
+
 	strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
 	event_data->if_family = ifp->if_family;
 	event_data->if_unit   = (u_int32_t) ifp->if_unit;
-	
+
 	ev_msg.dv[0].data_length = event_data_len;
-	ev_msg.dv[0].data_ptr    = event_data;	
+	ev_msg.dv[0].data_ptr    = event_data;
 	ev_msg.dv[1].data_length = 0;
-	
+
 	dlil_event_internal(ifp, &ev_msg);
 }
 
-__private_extern__ int
-dlil_create_input_thread(
-	ifnet_t ifp, struct dlil_threading_info *inputthread)
+static int
+dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inputthread)
 {
 	int error;
 
 	bzero(inputthread, sizeof(*inputthread));
-	// loopback ifp may not be configured at dlil_init time.
-	if (ifp == lo_ifp)
-		strlcat(inputthread->input_name, "dlil_input_main_thread_mtx", 32);
-	else
-		snprintf(inputthread->input_name, 32, "dlil_input_%s%d_mtx", ifp->if_name, ifp->if_unit);	
+	/* loopback ifp may not be configured at dlil_init time. */
+	if (ifp == lo_ifp) {
+		(void) strlcat(inputthread->input_name,
+		    "dlil_input_main_thread_mtx", DLIL_THREADNAME_LEN);
+	} else {
+		(void) snprintf(inputthread->input_name, DLIL_THREADNAME_LEN,
+		    "dlil_input_%s%d_mtx", ifp->if_name, ifp->if_unit);
+	}
 
-	inputthread->lck_grp = lck_grp_alloc_init(inputthread->input_name, dlil_grp_attributes);
-	inputthread->input_lck  = lck_mtx_alloc_init(inputthread->lck_grp, dlil_lck_attributes);
+	inputthread->lck_grp = lck_grp_alloc_init(inputthread->input_name,
+	    dlil_grp_attributes);
+	lck_mtx_init(&inputthread->input_lck, inputthread->lck_grp,
+	    dlil_lck_attributes);
 
-	error= kernel_thread_start((thread_continue_t)dlil_input_thread_func, inputthread, &inputthread->input_thread);
+	error= kernel_thread_start((thread_continue_t)dlil_input_thread_func,
+	    inputthread, &inputthread->input_thread);
 	if (error == 0) {
-       		ml_thread_policy(inputthread->input_thread, MACHINE_GROUP,
-				 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
+		ml_thread_policy(inputthread->input_thread, MACHINE_GROUP,
+		    (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
 		/*
 		 * Except for the loopback dlil input thread, we create
 		 * an affinity set so that the matching workloop thread
@@ -557,31 +679,16 @@ dlil_create_input_thread(
 			}
 		}
 	} else {
-		panic("dlil_create_input_thread: couldn't create thread\n");
+		panic("%s: couldn't create thread", __func__);
+		/* NOTREACHED */
 	}
 	OSAddAtomic(1, &cur_dlil_input_threads);
 #if DLIL_DEBUG
-	printf("dlil_create_input_thread: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n", 
-		inputthread, inputthread->input_thread, dlil_multithreaded_input, cur_dlil_input_threads);
+	printf("%s: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n",
+	    __func__, inputthread, inputthread->input_thread,
+	    dlil_multithreaded_input, cur_dlil_input_threads);
 #endif
-	return error;
-}
-__private_extern__ void
-dlil_terminate_input_thread(
-	struct dlil_threading_info *inputthread)
-{
-	OSAddAtomic(-1, &cur_dlil_input_threads);
-
-	lck_mtx_unlock(inputthread->input_lck);
-	lck_mtx_free(inputthread->input_lck, inputthread->lck_grp);
-	lck_grp_free(inputthread->lck_grp);
-
-	FREE(inputthread, M_NKE);
-
-	/* For the extra reference count from kernel_thread_start() */
-	thread_deallocate(current_thread());
-
-	thread_terminate(current_thread());
+	return (error);
 }
 
 static kern_return_t
@@ -598,65 +705,246 @@ dlil_affinity_set(struct thread *tp, u_int32_t tag)
 void
 dlil_init(void)
 {
-	thread_t		thread = THREAD_NULL;
+	thread_t thread = THREAD_NULL;
+
+	/*
+	 * The following fields must be 64-bit aligned for atomic operations.
+	 */
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
+	IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors)
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
+	IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
+
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkpackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ibkbytes);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkpackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_obkbytes);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivipackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivibytes);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovipackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovibytes);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivopackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ivobytes);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovopackets);
+	IFNET_IF_TC_REQUIRE_ALIGNED_64(ifi_ovobytes);
+
+	/*
+	 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
+	 */
+	_CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
+	_CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
+	_CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
+	_CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
+	_CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
+	_CASSERT(IF_HWASSIST_CSUM_TCP_SUM16 == IFNET_CSUM_SUM16);
+	_CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
+	_CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
+	_CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
+	_CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
+
+	/*
+	 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
+	 */
+	_CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
+
+	PE_parse_boot_argn("net_affinity", &net_affinity,
+	    sizeof (net_affinity));
 
-	PE_parse_boot_argn("net_affinity", &net_affinity, sizeof (net_affinity));
-#if IFNET_ROUTE_REFCNT
 	PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
-#endif /* IFNET_ROUTE_REFCNT */
+
+	PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
+
+	dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
+	    sizeof (struct dlil_ifnet_dbg);
+	/* Enforce 64-bit alignment for dlil_ifnet structure */
+	dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
+	dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
+	dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
+	    0, DLIF_ZONE_NAME);
+	if (dlif_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, DLIF_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dlif_zone, Z_EXPAND, TRUE);
+	zone_change(dlif_zone, Z_CALLERACCT, FALSE);
+
+	dlif_filt_size = sizeof (struct ifnet_filter);
+	dlif_filt_zone = zinit(dlif_filt_size,
+	    DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
+	if (dlif_filt_zone == NULL) {
+		panic("%s: failed allocating %s", __func__,
+		    DLIF_FILT_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
+	zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
+
+	dlif_inp_size = sizeof (struct dlil_threading_info);
+	dlif_inp_zone = zinit(dlif_inp_size,
+	    DLIF_INP_ZONE_MAX * dlif_inp_size, 0, DLIF_INP_ZONE_NAME);
+	if (dlif_inp_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, DLIF_INP_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dlif_inp_zone, Z_EXPAND, TRUE);
+	zone_change(dlif_inp_zone, Z_CALLERACCT, FALSE);
+
+	dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
+	dlif_phash_zone = zinit(dlif_phash_size,
+	    DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
+	if (dlif_phash_zone == NULL) {
+		panic("%s: failed allocating %s", __func__,
+		    DLIF_PHASH_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
+	zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
+
+	dlif_proto_size = sizeof (struct if_proto);
+	dlif_proto_zone = zinit(dlif_proto_size,
+	    DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
+	if (dlif_proto_zone == NULL) {
+		panic("%s: failed allocating %s", __func__,
+		    DLIF_PROTO_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
+	zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
+
+	ifnet_llreach_init();
 
 	TAILQ_INIT(&dlil_ifnet_head);
 	TAILQ_INIT(&ifnet_head);
-	
+	TAILQ_INIT(&ifnet_detaching_head);
+
 	/* Setup the lock groups we will use */
 	dlil_grp_attributes = lck_grp_attr_alloc_init();
 
-	dlil_lock_group = lck_grp_alloc_init("dlil internal locks", dlil_grp_attributes);
-	ifnet_lock_group = lck_grp_alloc_init("ifnet locks", dlil_grp_attributes);
-	ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock", dlil_grp_attributes);
-	dlil_input_lock_grp = lck_grp_alloc_init("dlil input lock", dlil_grp_attributes);
-	
+	dlil_lock_group = lck_grp_alloc_init("dlil internal locks",
+	    dlil_grp_attributes);
+	ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
+	    dlil_grp_attributes);
+	ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
+	    dlil_grp_attributes);
+	dlil_input_lock_grp = lck_grp_alloc_init("dlil input lock",
+	    dlil_grp_attributes);
+
 	/* Setup the lock attributes we will use */
 	dlil_lck_attributes = lck_attr_alloc_init();
-	
+
 	ifnet_lock_attr = lck_attr_alloc_init();
-	
-	
-	ifnet_head_mutex = lck_rw_alloc_init(ifnet_head_lock_group, dlil_lck_attributes);
-	dlil_ifnet_mutex = lck_mtx_alloc_init(dlil_lock_group, dlil_lck_attributes);
-	dlil_mutex = lck_mtx_alloc_init(dlil_lock_group, dlil_lck_attributes);
-	
+
+	lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
+	    dlil_lck_attributes);
+	lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
+
 	lck_attr_free(dlil_lck_attributes);
 	dlil_lck_attributes = NULL;
-	
+
+	ifa_init();
+
 	/*
-	 * Create and start up the first dlil input thread once everything is initialized
+	 * Create and start up the first dlil input thread once everything
+	 * is initialized.
 	 */
-	dlil_create_input_thread(0, dlil_lo_thread_ptr);
+	dlil_create_input_thread(lo_ifp, dlil_lo_thread_ptr);
 
-	(void) kernel_thread_start((thread_continue_t)dlil_call_delayed_detach_thread, NULL, &thread);
+	if (kernel_thread_start((thread_continue_t)ifnet_delayed_thread_func,
+	    NULL, &thread) != 0) {
+		panic("%s: couldn't create detach thread", __func__);
+		/* NOTREACHED */
+	}
 	thread_deallocate(thread);
+
 #if PF
 	/* Initialize the packet filter */
 	pfinit();
 #endif /* PF */
 }
 
+static void
+if_flt_monitor_busy(struct ifnet *ifp)
+{
+	lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+	++ifp->if_flt_busy;
+	VERIFY(ifp->if_flt_busy != 0);
+}
+
+static void
+if_flt_monitor_unbusy(struct ifnet *ifp)
+{
+	if_flt_monitor_leave(ifp);
+}
+
+static void
+if_flt_monitor_enter(struct ifnet *ifp)
+{
+	lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+	while (ifp->if_flt_busy) {
+		++ifp->if_flt_waiters;
+		(void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
+		    (PZERO - 1), "if_flt_monitor", NULL);
+	}
+	if_flt_monitor_busy(ifp);
+}
+
+static void
+if_flt_monitor_leave(struct ifnet *ifp)
+{
+	lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
+
+	VERIFY(ifp->if_flt_busy != 0);
+	--ifp->if_flt_busy;
+
+	if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
+		ifp->if_flt_waiters = 0;
+		wakeup(&ifp->if_flt_head);
+	}
+}
+
 __private_extern__ int
-dlil_attach_filter(
-	struct ifnet			*ifp,
-	const struct iff_filter	*if_filter,
-	interface_filter_t		*filter_ref)
-{
-    int retval = 0;
-    struct ifnet_filter	*filter;
-    
-	MALLOC(filter, struct ifnet_filter *, sizeof(*filter), M_NKE, M_WAITOK);
-	if (filter == NULL)
-		return ENOMEM;
-	bzero(filter, sizeof(*filter));
+dlil_attach_filter(struct ifnet	*ifp, const struct iff_filter *if_filter,
+    interface_filter_t *filter_ref)
+{
+	int retval = 0;
+	struct ifnet_filter *filter = NULL;
 
-    
+	ifnet_head_lock_shared();
+	/* Check that the interface is in the global list */
+	if (!ifnet_lookup(ifp)) {
+		retval = ENXIO;
+		goto done;
+	}
+
+	filter = zalloc(dlif_filt_zone);
+	if (filter == NULL) {
+		retval = ENOMEM;
+		goto done;
+	}
+	bzero(filter, dlif_filt_size);
+
+	/* refcnt held above during lookup */
 	filter->filt_ifp = ifp;
 	filter->filt_cookie = if_filter->iff_cookie;
 	filter->filt_name = if_filter->iff_name;
@@ -666,14 +954,16 @@ dlil_attach_filter(
 	filter->filt_event = if_filter->iff_event;
 	filter->filt_ioctl = if_filter->iff_ioctl;
 	filter->filt_detached = if_filter->iff_detached;
-	
-	if ((retval = dlil_write_begin()) != 0) {
-		/* Failed to acquire the write lock */
-		FREE(filter, M_NKE);
-		return retval;
-	}
+
+	lck_mtx_lock(&ifp->if_flt_lock);
+	if_flt_monitor_enter(ifp);
+
+	lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
 	TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
-	dlil_write_end();
+
+	if_flt_monitor_leave(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
 	*filter_ref = filter;
 
 	/*
@@ -684,71 +974,88 @@ dlil_attach_filter(
 	if (use_routegenid)
 		routegenid_update();
 
-	return retval;
+	if (dlil_verbose) {
+		printf("%s%d: %s filter attached\n", ifp->if_name,
+		    ifp->if_unit, if_filter->iff_name);
+	}
+done:
+	ifnet_head_done();
+	if (retval != 0 && ifp != NULL) {
+		DLIL_PRINTF("%s%d: failed to attach %s (err=%d)\n",
+		    ifp->if_name, ifp->if_unit, if_filter->iff_name, retval);
+	}
+	if (retval != 0 && filter != NULL)
+		zfree(dlif_filt_zone, filter);
+
+	return (retval);
 }
 
 static int
-dlil_detach_filter_internal(
-	interface_filter_t	filter,
-	int					detached)
+dlil_detach_filter_internal(interface_filter_t	filter, int detached)
 {
 	int retval = 0;
-	
+
 	if (detached == 0) {
-		ifnet_t				ifp = NULL;
-		interface_filter_t	entry = NULL;
-
-		/* Take the write lock */
-	 	retval = dlil_write_begin();
-	 	if (retval != 0 && retval != EDEADLK)
-	 		return retval;
-	 	
-	 	/*
-	 	 * At this point either we have the write lock (retval == 0)
-	 	 * or we couldn't get it (retval == EDEADLK) because someone
-	 	 * else up the stack is holding the read lock. It is safe to
-	 	 * read, either the read or write is held. Verify the filter
-	 	 * parameter before proceeding.
-	 	 */
+		ifnet_t ifp = NULL;
+
 		ifnet_head_lock_shared();
 		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+			interface_filter_t entry = NULL;
+
+			lck_mtx_lock(&ifp->if_flt_lock);
 			TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
-				if (entry == filter)
-					break;
+				if (entry != filter || entry->filt_skip)
+					continue;
+				/*
+				 * We've found a match; since it's possible
+				 * that the thread gets blocked in the monitor,
+				 * we do the lock dance.  Interface should
+				 * not be detached since we still have a use
+				 * count held during filter attach.
+				 */
+				entry->filt_skip = 1;	/* skip input/output */
+				lck_mtx_unlock(&ifp->if_flt_lock);
+				ifnet_head_done();
+
+				lck_mtx_lock(&ifp->if_flt_lock);
+				if_flt_monitor_enter(ifp);
+				lck_mtx_assert(&ifp->if_flt_lock,
+				    LCK_MTX_ASSERT_OWNED);
+
+				/* Remove the filter from the list */
+				TAILQ_REMOVE(&ifp->if_flt_head, filter,
+				    filt_next);
+
+				if_flt_monitor_leave(ifp);
+				lck_mtx_unlock(&ifp->if_flt_lock);
+				if (dlil_verbose) {
+					printf("%s%d: %s filter detached\n",
+					    ifp->if_name, ifp->if_unit,
+					    filter->filt_name);
+				}
+				goto destroy;
 			}
-			if (entry == filter)
-				break;
+			lck_mtx_unlock(&ifp->if_flt_lock);
 		}
 		ifnet_head_done();
-		
-		if (entry != filter) {
-			/* filter parameter is not a valid filter ref */
-			if (retval == 0) {
-				dlil_write_end();
-			}
-			return EINVAL;
-		}
-		
-		if (retval == EDEADLK) {
-			/* Perform a delayed detach */
-			filter->filt_detaching = 1;
-			dlil_detach_waiting = 1;
-			wakeup(&dlil_detach_waiting);
-			return 0;
-		}
-		
-		/* Remove the filter from the list */
-		TAILQ_REMOVE(&ifp->if_flt_head, filter, filt_next);
-		dlil_write_end();
+
+		/* filter parameter is not a valid filter ref */
+		retval = EINVAL;
+		goto done;
 	}
-	
-	/* Call the detached funciton if there is one */
+
+	if (dlil_verbose)
+		printf("%s filter detached\n", filter->filt_name);
+
+destroy:
+
+	/* Call the detached function if there is one */
 	if (filter->filt_detached)
 		filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
 
 	/* Free the filter */
-	FREE(filter, M_NKE);
-	
+	zfree(dlif_filt_zone, filter);
+
 	/*
 	 * Decrease filter count and route_generation ID to let TCP
 	 * know it should reevalute doing TSO or not
@@ -757,7 +1064,12 @@ dlil_detach_filter_internal(
 	if (use_routegenid)
 		routegenid_update();
 
-	return retval;
+done:
+	if (retval != 0) {
+		DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
+		    filter->filt_name, retval);
+	}
+	return (retval);
 }
 
 __private_extern__ void
@@ -769,8 +1081,7 @@ dlil_detach_filter(interface_filter_t filter)
 }
 
 static void
-dlil_input_thread_func(
-	struct dlil_threading_info *inputthread)
+dlil_input_thread_func(struct dlil_threading_info *inputthread)
 {
 	while (1) {
 		struct mbuf *m = NULL, *m_loop = NULL;
@@ -779,28 +1090,44 @@ dlil_input_thread_func(
 		int		count;
 		struct mbuf *m1;
 #endif /* IFNET_INPUT_SANITY_CHK */
-		
-		lck_mtx_lock(inputthread->input_lck);
-		
+
+		lck_mtx_lock_spin(&inputthread->input_lck);
+
 		/* Wait until there is work to be done */
-		while ((inputthread->input_waiting & ~DLIL_INPUT_RUNNING) == 0) {
+		while (!(inputthread->input_waiting & ~DLIL_INPUT_RUNNING)) {
 			inputthread->input_waiting &= ~DLIL_INPUT_RUNNING;
-			msleep(&inputthread->input_waiting, inputthread->input_lck, 0, inputthread->input_name, 0);
+			msleep(&inputthread->input_waiting,
+			    &inputthread->input_lck, 0,
+			    inputthread->input_name, 0);
 		}
 
-	
-		lck_mtx_assert(inputthread->input_lck, LCK_MTX_ASSERT_OWNED);
+		lck_mtx_assert(&inputthread->input_lck, LCK_MTX_ASSERT_OWNED);
 
 		m = inputthread->mbuf_head;
 		inputthread->mbuf_head = NULL;
 		inputthread->mbuf_tail = NULL;
 
 		if (inputthread->input_waiting & DLIL_INPUT_TERMINATE) {
-				if (m)
-					mbuf_freem_list(m);
-				/* this is the end */
-				dlil_terminate_input_thread(inputthread);
-				return;
+			lck_mtx_unlock(&inputthread->input_lck);
+
+			if (m != NULL)
+				mbuf_freem_list(m);
+
+			OSAddAtomic(-1, &cur_dlil_input_threads);
+
+			lck_mtx_destroy(&inputthread->input_lck,
+			    inputthread->lck_grp);
+			lck_grp_free(inputthread->lck_grp);
+
+			zfree(dlif_inp_zone, inputthread);
+
+			/* for the extra refcnt from kernel_thread_start() */
+			thread_deallocate(current_thread());
+
+			/* this is the end */
+			thread_terminate(current_thread());
+			/* NOTREACHED */
+			return;
 		}
 
 		inputthread->input_waiting |= DLIL_INPUT_RUNNING;
@@ -820,69 +1147,76 @@ dlil_input_thread_func(
 				loop_cnt = dlil_lo_input_mbuf_count;
 				dlil_lo_input_mbuf_count = 0;
 			}
-		
-			lck_mtx_unlock(inputthread->input_lck);
-		
+
+			lck_mtx_unlock(&inputthread->input_lck);
+
 			for (m1 = m, count = 0; m1; m1 = mbuf_nextpkt(m1)) {
 				count++;
 			}
 			if (count != mbuf_cnt) {
-				panic("dlil_input_func - thread=%p reg. loop queue has %d packets, should have %d\n",
-					  inputthread, count, mbuf_cnt);
+				panic("%s - thread=%p reg. loop queue "
+				    "has %d packets, should have %d\n",
+				    __func__, inputthread, count, mbuf_cnt);
+				/* NOTREACHED */
 			}
-	
+
 			if (inputthread == dlil_lo_thread_ptr) {
-				for (m1 = m_loop, count = 0; m1; m1 = mbuf_nextpkt(m1)) {
+				for (m1 = m_loop, count = 0; m1;
+				    m1 = mbuf_nextpkt(m1)) {
 					count++;
 				}
 				if (count != loop_cnt) {
-					panic("dlil_input_func - thread=%p loop queue has %d packets, should have %d\n",
-					  inputthread, count, loop_cnt);
+					panic("%s - thread=%p loop queue "
+					    "has %d packets, should have %d\n",
+					    __func__, inputthread, count,
+					    loop_cnt);
+					/* NOTREACHED */
 				}
 			}
-		} else 
+		} else
 #endif /* IFNET_INPUT_SANITY_CHK */
 		{
-			lck_mtx_unlock(inputthread->input_lck);
+			lck_mtx_unlock(&inputthread->input_lck);
 		}
 
 
 		/*
 		* NOTE warning %%% attention !!!!
-		* We should think about putting some thread starvation safeguards if 
-		* we deal with long chains of packets.
+		* We should think about putting some thread starvation
+		* safeguards if we deal with long chains of packets.
 		*/
 		if (m_loop) {
-			if (inputthread == dlil_lo_thread_ptr)
+			if (inputthread == dlil_lo_thread_ptr) {
 				dlil_input_packet_list(lo_ifp, m_loop);
+			}
 #if IFNET_INPUT_SANITY_CHK
-			else
-				panic("dlil_input_func - thread=%p loop queue has %d packets, should have none!\n",
-				  inputthread, loop_cnt);
+			else {
+				panic("%s - thread=%p loop queue has %d "
+				    "packets, should have none!\n", __func__,
+				    inputthread, loop_cnt);
+				/* NOTREACHED */
+			}
 #endif /* IFNET_INPUT_SANITY_CHK */
 		}
 
-
-		if (m)
+		if (m != NULL)
 			dlil_input_packet_list(0, m);
 
+		lck_mtx_lock_spin(&inputthread->input_lck);
 
-		lck_mtx_lock(inputthread->input_lck);
-
-		if ((inputthread->input_waiting & (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)) != 0)  {
-			lck_mtx_unlock(inputthread->input_lck);
+		if (inputthread->input_waiting &
+		    (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))  {
+			lck_mtx_unlock(&inputthread->input_lck);
 			proto_input_run();
-		}	
-		else	
-			lck_mtx_unlock(inputthread->input_lck);
+		} else {
+			lck_mtx_unlock(&inputthread->input_lck);
+		}
 	}
 }
 
 errno_t
-ifnet_input(
-	ifnet_t									ifp,
-	mbuf_t									m_head,
-	const struct ifnet_stat_increment_param	*stats)
+ifnet_input(ifnet_t ifp, mbuf_t m_head,
+    const struct ifnet_stat_increment_param *stats)
 {
 	struct thread *tp = current_thread();
 	mbuf_t		m_tail;
@@ -892,9 +1226,9 @@ ifnet_input(
 #endif /* IFNET_INPUT_SANITY_CHK */
 
 	if (ifp == NULL || m_head == NULL) {
-		if (m_head)
+		if (m_head != NULL)
 			mbuf_freem_list(m_head);
-		return EINVAL;
+		return (EINVAL);
 	}
 
 	m_tail = m_head;
@@ -902,14 +1236,16 @@ ifnet_input(
 #if IFNET_INPUT_SANITY_CHK
 		if (dlil_input_sanity_check != 0) {
 			ifnet_t	rcvif;
-		
+
 			rcvif = mbuf_pkthdr_rcvif(m_tail);
 			pkt_count++;
-		
+
 			if (rcvif == NULL ||
-				(ifp->if_type != IFT_LOOP && rcvif != ifp) ||
-				(mbuf_flags(m_head) & MBUF_PKTHDR) == 0) {
-				panic("ifnet_input - invalid mbuf %p\n", m_tail);
+			    (ifp->if_type != IFT_LOOP && rcvif != ifp) ||
+			    !(mbuf_flags(m_head) & MBUF_PKTHDR)) {
+				panic("%s - invalid mbuf %p\n",
+				    __func__, m_tail);
+				/* NOTREACHED */
 			}
 		}
 #endif /* IFNET_INPUT_SANITY_CHK */
@@ -920,7 +1256,7 @@ ifnet_input(
 
 	inp = ifp->if_input_thread;
 
-	if (dlil_multithreaded_input == 0 || inp == NULL) 
+	if (dlil_multithreaded_input == 0 || inp == NULL)
 		inp = dlil_lo_thread_ptr;
 
 	/*
@@ -928,11 +1264,11 @@ ifnet_input(
 	 * affinity set, associate this workloop thread with the same set.
 	 * We will only do this once.
 	 */
-	lck_mtx_lock(inp->input_lck);
+	lck_mtx_lock_spin(&inp->input_lck);
 	if (inp->net_affinity && inp->workloop_thread == NULL) {
 		u_int32_t tag = inp->tag;
 		inp->workloop_thread = tp;
-		lck_mtx_unlock(inp->input_lck);
+		lck_mtx_unlock(&inp->input_lck);
 
 		/* Associated the current thread with the new affinity tag */
 		(void) dlil_affinity_set(tp, tag);
@@ -943,7 +1279,7 @@ ifnet_input(
 		 * its affinity.
 		 */
 		thread_reference(tp);
-		lck_mtx_lock(inp->input_lck);
+		lck_mtx_lock_spin(&inp->input_lck);
 	}
 
         /* WARNING
@@ -964,11 +1300,10 @@ ifnet_input(
 			inp->input_mbuf_cnt += pkt_count;
 			inp->input_wake_cnt++;
 
-			lck_mtx_assert(inp->input_lck, LCK_MTX_ASSERT_OWNED);
+			lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
 		}
 #endif
-	}
-	else {
+	} else {
 		if (inp->mbuf_head == NULL)
 			inp->mbuf_head = m_head;
 		else if (inp->mbuf_tail != NULL)
@@ -980,58 +1315,71 @@ ifnet_input(
 			inp->input_mbuf_cnt += pkt_count;
 			inp->input_wake_cnt++;
 
-			lck_mtx_assert(inp->input_lck, LCK_MTX_ASSERT_OWNED);
+			lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
 		}
 #endif
 	}
 
-
 	inp->input_waiting |= DLIL_INPUT_WAITING;
 	if ((inp->input_waiting & DLIL_INPUT_RUNNING) == 0) {
 		wakeup((caddr_t)&inp->input_waiting);
 	}
+	lck_mtx_unlock(&inp->input_lck);
+
 	if (stats) {
-		ifp->if_data.ifi_ipackets += stats->packets_in;
-		ifp->if_data.ifi_ibytes += stats->bytes_in;
-		ifp->if_data.ifi_ierrors += stats->errors_in;
-	
-		ifp->if_data.ifi_opackets += stats->packets_out;
-		ifp->if_data.ifi_obytes += stats->bytes_out;
-		ifp->if_data.ifi_oerrors += stats->errors_out;
-	
-		ifp->if_data.ifi_collisions += stats->collisions;
-		ifp->if_data.ifi_iqdrops += stats->dropped;
+		atomic_add_64(&ifp->if_data.ifi_ipackets, stats->packets_in);
+		atomic_add_64(&ifp->if_data.ifi_ibytes, stats->bytes_in);
+		atomic_add_64(&ifp->if_data.ifi_ierrors, stats->errors_in);
+
+		atomic_add_64(&ifp->if_data.ifi_opackets, stats->packets_out);
+		atomic_add_64(&ifp->if_data.ifi_obytes, stats->bytes_out);
+		atomic_add_64(&ifp->if_data.ifi_oerrors, stats->errors_out);
+
+		atomic_add_64(&ifp->if_data.ifi_collisions, stats->collisions);
+		atomic_add_64(&ifp->if_data.ifi_iqdrops, stats->dropped);
 	}
 
-	lck_mtx_unlock(inp->input_lck);
-	
-	return 0; 
+	return (0);
 }
 
 static int
-dlil_interface_filters_input(struct ifnet * ifp, struct mbuf * * m_p,
-			     char * * frame_header_p,
-			     protocol_family_t protocol_family)
+dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
+    char **frame_header_p, protocol_family_t protocol_family)
 {
-	struct ifnet_filter * 		filter;
+	struct ifnet_filter *filter;
 
+	/*
+	 * Pass the inbound packet to the interface filters
+	 */
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	/* prevent filter list from changing in case we drop the lock */
+	if_flt_monitor_busy(ifp);
 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
 		int result;
 
-		if (filter->filt_input 
-		    && (filter->filt_protocol == 0
-			|| filter->filt_protocol == protocol_family)) {
+		if (!filter->filt_skip && filter->filt_input != NULL &&
+		    (filter->filt_protocol == 0 ||
+		    filter->filt_protocol == protocol_family)) {
+			lck_mtx_unlock(&ifp->if_flt_lock);
+
 			result = (*filter->filt_input)(filter->filt_cookie,
-						       ifp, protocol_family,
-						       m_p, frame_header_p);
+			    ifp, protocol_family, m_p, frame_header_p);
+
+			lck_mtx_lock_spin(&ifp->if_flt_lock);
 			if (result != 0) {
+				/* we're done with the filter list */
+				if_flt_monitor_unbusy(ifp);
+				lck_mtx_unlock(&ifp->if_flt_lock);
 				return (result);
 			}
 		}
 	}
+	/* we're done with the filter list */
+	if_flt_monitor_unbusy(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
 
 	/*
-	 * Strip away M_PROTO1 bit prior to sending packet up the stack as 
+	 * Strip away M_PROTO1 bit prior to sending packet up the stack as
 	 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
 	 */
 	if (*m_p != NULL)
@@ -1040,6 +1388,45 @@ dlil_interface_filters_input(struct ifnet * ifp, struct mbuf * * m_p,
 	return (0);
 }
 
+static int
+dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
+    protocol_family_t protocol_family)
+{
+	struct ifnet_filter *filter;
+
+	/*
+	 * Pass the outbound packet to the interface filters
+	 */
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	/* prevent filter list from changing in case we drop the lock */
+	if_flt_monitor_busy(ifp);
+	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+		int result;
+
+		if (!filter->filt_skip && filter->filt_output != NULL &&
+		    (filter->filt_protocol == 0 ||
+		    filter->filt_protocol == protocol_family)) {
+			lck_mtx_unlock(&ifp->if_flt_lock);
+
+			result = filter->filt_output(filter->filt_cookie, ifp,
+			    protocol_family, m_p);
+
+			lck_mtx_lock_spin(&ifp->if_flt_lock);
+			if (result != 0) {
+				/* we're done with the filter list */
+				if_flt_monitor_unbusy(ifp);
+				lck_mtx_unlock(&ifp->if_flt_lock);
+				return (result);
+			}
+		}
+	}
+	/* we're done with the filter list */
+	if_flt_monitor_unbusy(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
+	return (0);
+}
+
 static void
 dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
 {
@@ -1050,24 +1437,21 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
 		while (m != NULL) {
 			char *	frame_header;
 			mbuf_t	next_packet;
-			
+
 			next_packet = m->m_nextpkt;
 			m->m_nextpkt = NULL;
 			frame_header = m->m_pkthdr.header;
 			m->m_pkthdr.header = NULL;
-			error = (*ifproto->kpi.v1.input)(ifproto->ifp, 
-							 ifproto->protocol_family,
-							 m, frame_header);
+			error = (*ifproto->kpi.v1.input)(ifproto->ifp,
+			    ifproto->protocol_family, m, frame_header);
 			if (error != 0 && error != EJUSTRETURN)
 				m_freem(m);
 			m = next_packet;
 		}
-	}
-	else if (ifproto->proto_kpi == kProtoKPI_v2) {
+	} else if (ifproto->proto_kpi == kProtoKPI_v2) {
 		/* Version 2 protocols support packet lists */
 		error = (*ifproto->kpi.v2.input)(ifproto->ifp,
-						 ifproto->protocol_family,
-						 m);
+		    ifproto->protocol_family, m);
 		if (error != 0 && error != EJUSTRETURN)
 			m_freem_list(m);
 	}
@@ -1078,7 +1462,6 @@ __private_extern__ void
 dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
 {
 	int				error = 0;
-	int				locked = 0;
 	protocol_family_t		protocol_family;
 	mbuf_t				next_packet;
 	ifnet_t				ifp = ifp_param;
@@ -1089,66 +1472,71 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
 
 	KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0);
 
+
 	while (m != NULL) {
-		struct if_proto *	ifproto = NULL;
+		struct if_proto *ifproto = NULL;
+		int iorefcnt = 0;
 
-		next_packet = m->m_nextpkt;
-		m->m_nextpkt = NULL;
 		if (ifp_param == NULL)
 			ifp = m->m_pkthdr.rcvif;
+
+		/* Check if this mbuf looks valid */
+		MBUF_INPUT_CHECK(m, ifp); 
+
+		next_packet = m->m_nextpkt;
+		m->m_nextpkt = NULL;
 		frame_header = m->m_pkthdr.header;
 		m->m_pkthdr.header = NULL;
 
-		if (locked == 0) {
-			/* dlil lock protects the demux and interface filters */
-			locked = 1;
-			dlil_read_begin();
+		/* Get an IO reference count if the interface is not 
+		 * loopback and it is attached.
+		 */
+		if (ifp != lo_ifp) {
+			if (!ifnet_is_attached(ifp, 1)) {
+				m_freem(m);
+				goto next;
+			}
+			iorefcnt = 1;
 		}
 
-#if PKT_PRIORITY
 		switch (m->m_pkthdr.prio) {
 			case MBUF_TC_BK:
-				ifp->if_tc.ifi_ibkpackets++;
-				ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
+				atomic_add_64(&ifp->if_tc.ifi_ibkpackets, 1);
+				atomic_add_64(&ifp->if_tc.ifi_ibkbytes, m->m_pkthdr.len);
 				break;
 			case MBUF_TC_VI:
-				ifp->if_tc.ifi_ivipackets++;
-				ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
+				atomic_add_64(&ifp->if_tc.ifi_ivipackets, 1);
+				atomic_add_64(&ifp->if_tc.ifi_ivibytes, m->m_pkthdr.len);
 				break;
 			case MBUF_TC_VO:
-				ifp->if_tc.ifi_ivopackets++;
-				ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
+				atomic_add_64(&ifp->if_tc.ifi_ivopackets, 1);
+				atomic_add_64(&ifp->if_tc.ifi_ivobytes, m->m_pkthdr.len);
 				break;
 			default:
 				break;
 		}
-#endif PKT_PRIORITY
 
 		/* find which protocol family this packet is for */
+		ifnet_lock_shared(ifp);
 		error = (*ifp->if_demux)(ifp, m, frame_header,
-					 &protocol_family);
+		    &protocol_family);
+		ifnet_lock_done(ifp);
 		if (error != 0) {
-			if (error == EJUSTRETURN) {
+			if (error == EJUSTRETURN)
 				goto next;
-			}
 			protocol_family = 0;
 		}
-		
-		/* DANGER!!! */
+
 		if (m->m_flags & (M_BCAST|M_MCAST))
-			ifp->if_imcasts++;
+			atomic_add_64(&ifp->if_imcasts, 1);
 
 		/* run interface filters, exclude VLAN packets PR-3586856 */
 		if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
-			int	filter_result;
-
-			filter_result = dlil_interface_filters_input(ifp, &m, 
-							  &frame_header,
-							  protocol_family);
-			if (filter_result != 0) {
-				if (filter_result != EJUSTRETURN) {
+			error = dlil_interface_filters_input(ifp, &m,
+			    &frame_header, protocol_family);
+			if (error != 0) {
+				if (error != EJUSTRETURN)
 					m_freem(m);
-				}
 				goto next;
 			}
 		}
@@ -1156,19 +1544,21 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
 			m_freem(m);
 			goto next;
 		}
-		
+
 		/* Lookup the protocol attachment to this interface */
 		if (protocol_family == 0) {
 			ifproto = NULL;
-		}
-		else if (last_ifproto != NULL
-			 && last_ifproto->ifp == ifp
-			 && (last_ifproto->protocol_family
-			     == protocol_family)) {
+		} else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
+		    (last_ifproto->protocol_family == protocol_family)) {
+			VERIFY(ifproto == NULL);
 			ifproto = last_ifproto;
-		}
-		else {
+			if_proto_ref(last_ifproto);
+		} else {
+			VERIFY(ifproto == NULL);
+			ifnet_lock_shared(ifp);
+			/* callee holds a proto refcnt upon success */
 			ifproto	= find_attached_proto(ifp, protocol_family);
+			ifnet_lock_done(ifp);
 		}
 		if (ifproto == NULL) {
 			/* no protocol for this packet, discard */
@@ -1176,18 +1566,14 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
 			goto next;
 		}
 		if (ifproto != last_ifproto) {
-			/* make sure ifproto can't go away during input */
-			if_proto_ref(ifproto);
 			if (last_ifproto != NULL) {
 				/* pass up the list for the previous protocol */
-				dlil_read_end();
-				
 				dlil_ifproto_input(last_ifproto, pkt_first);
 				pkt_first = NULL;
 				if_proto_free(last_ifproto);
-				dlil_read_begin();
 			}
 			last_ifproto = ifproto;
+			if_proto_ref(ifproto);
 		}
 		/* extend the list */
 		m->m_pkthdr.header = frame_header;
@@ -1198,78 +1584,127 @@ dlil_input_packet_list(struct ifnet * ifp_param, struct mbuf *m)
 		}
 		pkt_next = &m->m_nextpkt;
 
-	next:
+next:
 		if (next_packet == NULL && last_ifproto != NULL) {
 			/* pass up the last list of packets */
-			dlil_read_end();
-
 			dlil_ifproto_input(last_ifproto, pkt_first);
 			if_proto_free(last_ifproto);
-			locked = 0;
+			last_ifproto = NULL;
+		}
+		if (ifproto != NULL) {
+			if_proto_free(ifproto);
+			ifproto = NULL;
 		}
+		
 		m = next_packet;
 
+		/* update the driver's multicast filter, if needed */
+		if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+			ifp->if_updatemcasts = 0;
+		if (iorefcnt == 1)
+			ifnet_decr_iorefcnt(ifp);
 	}
-	if (locked != 0) {
-		dlil_read_end();
-	}
+
 	KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0);
 	return;
 }
 
+errno_t
+if_mcasts_update(struct ifnet *ifp)
+{
+	errno_t err;
+
+	err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
+	if (err == EAFNOSUPPORT)
+		err = 0;
+	printf("%s%d: %s %d suspended link-layer multicast membership(s) "
+	    "(err=%d)\n", ifp->if_name, ifp->if_unit,
+	    (err == 0 ? "successfully restored" : "failed to restore"),
+	    ifp->if_updatemcasts, err);
+
+	/* just return success */
+	return (0);
+}
+
 static int
 dlil_event_internal(struct ifnet *ifp, struct kev_msg *event)
 {
 	struct ifnet_filter *filter;
-	
-	if (ifp_use(ifp, kIfNetUseCount_MustNotBeZero) == 0) {
-		dlil_read_begin();
-		
-		/* Pass the event to the interface filters */
-		TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
-			if (filter->filt_event)
-				filter->filt_event(filter->filt_cookie, ifp, filter->filt_protocol, event);
+
+	/* Get an io ref count if the interface is attached */
+	if (!ifnet_is_attached(ifp, 1))
+		goto done;
+
+	/*
+	 * Pass the event to the interface filters
+	 */
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	/* prevent filter list from changing in case we drop the lock */
+	if_flt_monitor_busy(ifp);
+	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
+		if (filter->filt_event != NULL) {
+			lck_mtx_unlock(&ifp->if_flt_lock);
+
+			filter->filt_event(filter->filt_cookie, ifp,
+			    filter->filt_protocol, event);
+
+			lck_mtx_lock_spin(&ifp->if_flt_lock);
 		}
-		
-		if (ifp->if_proto_hash) {
-			int i;
-
-			for (i = 0; i < PROTO_HASH_SLOTS; i++) {
-				struct if_proto *proto;
-				
-				SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
-					proto_media_event eventp = proto->proto_kpi == kProtoKPI_v1
-						 ? proto->kpi.v1.event : proto->kpi.v2.event;
-					
-					if (eventp)
-						eventp(ifp, proto->protocol_family, event);
+	}
+	/* we're done with the filter list */
+	if_flt_monitor_unbusy(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
+	ifnet_lock_shared(ifp);
+	if (ifp->if_proto_hash != NULL) {
+		int i;
+
+		for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+			struct if_proto *proto;
+
+			SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
+			    next_hash) {
+				proto_media_event eventp =
+				    (proto->proto_kpi == kProtoKPI_v1 ?
+				    proto->kpi.v1.event :
+				    proto->kpi.v2.event);
+
+				if (eventp != NULL) {
+					if_proto_ref(proto);
+					ifnet_lock_done(ifp);
+
+					eventp(ifp, proto->protocol_family,
+					    event);
+
+					ifnet_lock_shared(ifp);
+					if_proto_free(proto);
 				}
 			}
 		}
-		
-		dlil_read_end();
-		
-		/* Pass the event to the interface */
-		if (ifp->if_event)
-			ifp->if_event(ifp, event);
-		
-		if (ifp_unuse(ifp))
-			ifp_use_reached_zero(ifp);
 	}
-	
-	return kev_post_msg(event);
+	ifnet_lock_done(ifp);
+
+	/* Pass the event to the interface */
+	if (ifp->if_event != NULL)
+		ifp->if_event(ifp, event);
+
+	/* Release the io ref count */
+	ifnet_decr_iorefcnt(ifp);
+
+done:
+	return (kev_post_msg(event));
 }
 
 errno_t
-ifnet_event(
-	ifnet_t					ifp,
-	struct kern_event_msg	*event)
+ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
 {
 	struct kev_msg               kev_msg;
 	int result = 0;
 
-	if (ifp == NULL || event == NULL) return EINVAL;
+	if (ifp == NULL || event == NULL)
+		return (EINVAL);
 
+	bzero(&kev_msg, sizeof (kev_msg));
 	kev_msg.vendor_code    = event->vendor_code;
 	kev_msg.kev_class      = event->kev_class;
 	kev_msg.kev_subclass   = event->kev_subclass;
@@ -1277,16 +1712,17 @@ ifnet_event(
 	kev_msg.dv[0].data_ptr = &event->event_data[0];
 	kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
 	kev_msg.dv[1].data_length = 0;
-	
+
 	result = dlil_event_internal(ifp, &kev_msg);
 
-	return result;
+	return (result);
 }
 
 #if CONFIG_MACF_NET
 #include <netinet/ip6.h>
 #include <netinet/ip.h>
-static int dlil_get_socket_type(struct mbuf **mp, int family, int raw)
+static int
+dlil_get_socket_type(struct mbuf **mp, int family, int raw)
 {
 	struct mbuf *m;
 	struct ip *ip;
@@ -1327,184 +1763,26 @@ static int dlil_get_socket_type(struct mbuf **mp, int family, int raw)
 static void
 if_inc_traffic_class_out(ifnet_t ifp, mbuf_t m)
 {
-#if !PKT_PRIORITY
-#pragma unused(ifp)
-#pragma unused(m)
-	return;
-#else
 	if (!(m->m_flags & M_PKTHDR))
 		return;
 
 	switch (m->m_pkthdr.prio) {
 		case MBUF_TC_BK:
-			ifp->if_tc.ifi_obkpackets++;
-			ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
+			atomic_add_64(&ifp->if_tc.ifi_obkpackets, 1);
+			atomic_add_64(&ifp->if_tc.ifi_obkbytes, m->m_pkthdr.len);
 			break;
 		case MBUF_TC_VI:
-			ifp->if_tc.ifi_ovipackets++;
-			ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
+			atomic_add_64(&ifp->if_tc.ifi_ovipackets, 1);
+			atomic_add_64(&ifp->if_tc.ifi_ovibytes, m->m_pkthdr.len);
 			break;
 		case MBUF_TC_VO:
-			ifp->if_tc.ifi_ovopackets++;
-			ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
+			atomic_add_64(&ifp->if_tc.ifi_ovopackets, 1);
+			atomic_add_64(&ifp->if_tc.ifi_ovobytes, m->m_pkthdr.len);
 			break;
 		default:
 			break;
 	}
-#endif PKT_PRIORITY
-}
-
-#if 0
-int
-dlil_output_list(
-	struct ifnet* ifp,
-	u_long proto_family,
-	struct mbuf		*packetlist,
-	caddr_t		route,
-	const struct sockaddr	*dest,
-	int						raw)
-{
-	char			*frame_type = NULL;
-	char			*dst_linkaddr = NULL;
-	int				retval = 0;
-	char			frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
-	char			dst_linkaddr_buffer[MAX_LINKADDR * 4];
-	struct ifnet_filter *filter;
-	struct if_proto	*proto = 0;
-	mbuf_t	m;
-	mbuf_t	send_head = NULL;
-	mbuf_t	*send_tail = &send_head;
-	
-	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
-	
-	dlil_read_begin();
-	
-	frame_type	   = frame_type_buffer;
-	dst_linkaddr   = dst_linkaddr_buffer;
-	
-	if (raw == 0) {
-		proto = find_attached_proto(ifp, proto_family);
-		if (proto == NULL) {
-			retval = ENXIO;
-			goto cleanup;
-		}
-	}
-	
-preout_again:
-	if (packetlist == NULL)
-		goto cleanup;
-	m = packetlist;
-	packetlist = packetlist->m_nextpkt;
-	m->m_nextpkt = NULL;
-	
-	if (raw == 0) {
-		proto_media_preout preoutp = proto->proto_kpi == kProtoKPI_v1
-			 ? proto->kpi.v1.pre_output : proto->kpi.v2.pre_output;
-		retval = 0;
-		if (preoutp)
-			retval = preoutp(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
-	
-		if (retval) {
-			if (retval == EJUSTRETURN) {
-				goto preout_again;
-			}
-			
-			m_freem(m);
-			goto cleanup;
-		}
-	}
-
-	do {
-#if CONFIG_MACF_NET
-		retval = mac_ifnet_check_transmit(ifp, m, proto_family,
-		    dlil_get_socket_type(&m, proto_family, raw));
-		if (retval) {
-			m_freem(m);
-			goto cleanup;
-		}
-#endif
-	
-		if (raw == 0 && ifp->if_framer) {
-			retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type); 
-			if (retval) {
-				if (retval != EJUSTRETURN) {
-					m_freem(m);
-				}
-				goto next;
-			}
-		}
-	
-		/* 
-		 * Let interface filters (if any) do their thing ...
-		 */
-		/* Do not pass VLAN tagged packets to filters PR-3586856 */
-		if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
-			TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
-				if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) &&
-					filter->filt_output) {
-					retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m);
-					if (retval) {
-						if (retval != EJUSTRETURN)
-							m_freem(m);
-						goto next;
-					}
-				}
-			}
-		}
-		/*
-		 * Strip away M_PROTO1 bit prior to sending packet to the driver 
-		 * as this field may be used by the driver
-		 */
-		m->m_flags &= ~M_PROTO1;
-		
-		/*
-		 * Finally, call the driver.
-		 */
-	
-		if ((ifp->if_eflags & IFEF_SENDLIST) != 0) {
-			*send_tail = m;
-			send_tail = &m->m_nextpkt;
-		}
-		else {
-			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
-			retval = ifp->if_output(ifp, m);
-			if (retval && dlil_verbose) {
-				printf("dlil_output: output error on %s%d retval = %d\n", 
-					ifp->if_name, ifp->if_unit, retval);
-			}
-			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
-		}
-		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
-
-next:
-		m = packetlist;
-		if (m) {
-			packetlist = packetlist->m_nextpkt;
-			m->m_nextpkt = NULL;
-		}
-	} while (m);
-
-	if (send_head) {
-		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
-		retval = ifp->if_output(ifp, send_head);
-		if (retval && dlil_verbose) {
-			printf("dlil_output: output error on %s%d retval = %d\n",
-				ifp->if_name, ifp->if_unit, retval);
-		}
-		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
-	}
-	
-	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
-
-cleanup:
-	dlil_read_end();
-	if (packetlist) /* if any packet left, clean up */
-		mbuf_freem_list(packetlist);
-	if (retval == EJUSTRETURN)
-		retval = 0;
-	return retval;
 }
-#endif
 
 /*
  * dlil_output
@@ -1519,62 +1797,72 @@ cleanup:
  * because a protocol is likely to interact with an ifp while it
  * is under the protocol lock.
  */
-__private_extern__ errno_t
-dlil_output(
-	ifnet_t					ifp,
-	protocol_family_t		proto_family,
-	mbuf_t					packetlist,
-	void					*route,
-	const struct sockaddr	*dest,
-	int						raw)
-{
-	char			*frame_type = NULL;
-	char			*dst_linkaddr = NULL;
-	int				retval = 0;
-	char			frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
-	char			dst_linkaddr_buffer[MAX_LINKADDR * 4];
-	struct ifnet_filter *filter;
-	struct if_proto	*proto = 0;
+errno_t
+dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
+    void *route, const struct sockaddr *dest, int raw)
+{
+	char *frame_type = NULL;
+	char *dst_linkaddr = NULL;
+	int retval = 0;
+	char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
+	char dst_linkaddr_buffer[MAX_LINKADDR * 4];
+	struct if_proto	*proto = NULL;
 	mbuf_t	m;
 	mbuf_t	send_head = NULL;
 	mbuf_t	*send_tail = &send_head;
-	
+	int iorefcnt = 0;
+
 	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0);
-	
-	dlil_read_begin();
-	
-	frame_type	   = frame_type_buffer;
-	dst_linkaddr   = dst_linkaddr_buffer;
-	
+
+	/* Get an io refcnt if the interface is attached to prevent ifnet_detach
+	 * from happening while this operation is in progress */
+	if (!ifnet_is_attached(ifp, 1)) {
+		retval = ENXIO;
+		goto cleanup;
+	}
+	iorefcnt = 1;
+
+	/* update the driver's multicast filter, if needed */
+	if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
+		ifp->if_updatemcasts = 0;
+
+	frame_type = frame_type_buffer;
+	dst_linkaddr = dst_linkaddr_buffer;
+
 	if (raw == 0) {
+		ifnet_lock_shared(ifp);
+		/* callee holds a proto refcnt upon success */
 		proto = find_attached_proto(ifp, proto_family);
 		if (proto == NULL) {
+			ifnet_lock_done(ifp);
 			retval = ENXIO;
 			goto cleanup;
 		}
+		ifnet_lock_done(ifp);
 	}
-	
+
 preout_again:
 	if (packetlist == NULL)
 		goto cleanup;
+
 	m = packetlist;
 	packetlist = packetlist->m_nextpkt;
 	m->m_nextpkt = NULL;
-	
+
 	if (raw == 0) {
-		proto_media_preout preoutp = proto->proto_kpi == kProtoKPI_v1
-			 ? proto->kpi.v1.pre_output : proto->kpi.v2.pre_output;
+		proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
+		    proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
 		retval = 0;
-		if (preoutp)
-			retval = preoutp(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr);
-	
-		if (retval) {
-			if (retval == EJUSTRETURN) {
-				goto preout_again;
+		if (preoutp != NULL) {
+			retval = preoutp(ifp, proto_family, &m, dest, route,
+			    frame_type, dst_linkaddr);
+
+			if (retval != 0) {
+				if (retval == EJUSTRETURN)
+					goto preout_again;
+				m_freem(m);
+				goto cleanup;
 			}
-			
-			m_freem(m);
-			goto cleanup;
 		}
 	}
 
@@ -1588,6 +1876,21 @@ preout_again:
 #endif
 
 	do {
+#if CONFIG_DTRACE
+		if (proto_family == PF_INET) {
+			struct ip *ip = mtod(m, struct ip*);
+	                DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+				struct ip *, ip, struct ifnet *, ifp,
+				struct ip *, ip, struct ip6_hdr *, NULL);
+
+		} else if (proto_family == PF_INET6) {
+			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*);
+			DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL,
+				struct ip6_hdr *, ip6, struct ifnet*, ifp,
+				struct ip*, NULL, struct ip6_hdr *, ip6);
+		}
+#endif /* CONFIG_DTRACE */
+
 		if (raw == 0 && ifp->if_framer) {
 			int rcvif_set = 0;
 
@@ -1605,11 +1908,11 @@ preout_again:
 				rcvif_set = 1;
 			}
 
-			retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type); 
+			retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
+			    frame_type);
 			if (retval) {
-				if (retval != EJUSTRETURN) {
+				if (retval != EJUSTRETURN)
 					m_freem(m);
-				}
 				goto next;
 			}
 
@@ -1625,25 +1928,20 @@ preout_again:
 			if (rcvif_set && m->m_pkthdr.rcvif == ifp)
 				m->m_pkthdr.rcvif = NULL;
 		}
-	
-		/* 
+
+		/*
 		 * Let interface filters (if any) do their thing ...
 		 */
 		/* Do not pass VLAN tagged packets to filters PR-3586856 */
 		if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
-			TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
-				if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) &&
-					filter->filt_output) {
-					retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m);
-					if (retval) {
-						if (retval != EJUSTRETURN)
-							m_freem(m);
-						goto next;
-					}
-				}
+			retval = dlil_interface_filters_output(ifp,
+			    &m, proto_family);
+			if (retval != 0) {
+				if (retval != EJUSTRETURN)
+					m_freem(m);
+				goto next;
 			}
 		}
-
 		/*
 		 * Strip away M_PROTO1 bit prior to sending packet to the driver
 		 * as this field may be used by the driver
@@ -1663,40 +1961,43 @@ preout_again:
 				goto next;
 		}
 
-		/* 
-		 * If this is a TSO packet, make sure the interface still advertise TSO capability
+		/*
+		 * If this is a TSO packet, make sure the interface still
+		 * advertise TSO capability.
 		 */
 
-		if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) && !(ifp->if_hwassist & IFNET_TSO_IPV4)) {
-				retval = EMSGSIZE;
-				m_freem(m);
-				goto cleanup;
+		if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) &&
+		    !(ifp->if_hwassist & IFNET_TSO_IPV4)) {
+			retval = EMSGSIZE;
+			m_freem(m);
+			goto cleanup;
 		}
 
-		if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && !(ifp->if_hwassist & IFNET_TSO_IPV6)) {
-				retval = EMSGSIZE;
-				m_freem(m);
-				goto cleanup;
+		if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
+		    !(ifp->if_hwassist & IFNET_TSO_IPV6)) {
+			retval = EMSGSIZE;
+			m_freem(m);
+			goto cleanup;
 		}
+
 		/*
 		 * Finally, call the driver.
 		 */
-	
 		if ((ifp->if_eflags & IFEF_SENDLIST) != 0) {
 			*send_tail = m;
 			send_tail = &m->m_nextpkt;
-		}
-		else {
-			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
-			
+		} else {
 			if_inc_traffic_class_out(ifp, m);
-
+			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
+			    0,0,0,0,0);
 			retval = ifp->if_output(ifp, m);
 			if (retval && dlil_verbose) {
-				printf("dlil_output: output error on %s%d retval = %d\n", 
-					ifp->if_name, ifp->if_unit, retval);
+				printf("%s: output error on %s%d retval = %d\n",
+				    __func__, ifp->if_name, ifp->if_unit,
+				    retval);
 			}
-			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
+			KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
+			    0,0,0,0,0);
 		}
 		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
 
@@ -1709,115 +2010,121 @@ next:
 	} while (m);
 
 	if (send_head) {
-		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
-		
 		if_inc_traffic_class_out(ifp, send_head);
 
+		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0);
 		retval = ifp->if_output(ifp, send_head);
 		if (retval && dlil_verbose) {
-			printf("dlil_output: output error on %s%d retval = %d\n", 
-				ifp->if_name, ifp->if_unit, retval);
+			printf("%s: output error on %s%d retval = %d\n",
+			    __func__, ifp->if_name, ifp->if_unit, retval);
 		}
 		KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0);
 	}
-	
+
 	KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
 
 cleanup:
-	dlil_read_end();
-	if (packetlist) /* if any packet left, clean up */
+	if (proto != NULL)
+		if_proto_free(proto);
+	if (packetlist) /* if any packets are left, clean up */
 		mbuf_freem_list(packetlist);
 	if (retval == EJUSTRETURN)
 		retval = 0;
-	return retval;
+	if (iorefcnt == 1)
+		ifnet_decr_iorefcnt(ifp);
+
+	return (retval);
 }
 
 errno_t
-ifnet_ioctl(
-	ifnet_t				ifp,
-	protocol_family_t	proto_fam,
-	u_long			ioctl_code,
-	void				*ioctl_arg)
-{
-	struct ifnet_filter		*filter;
-	int						retval = EOPNOTSUPP;
-	int						result = 0;
-	int						holding_read = 0;
-	
+ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
+    void *ioctl_arg)
+{
+	struct ifnet_filter *filter;
+	int retval = EOPNOTSUPP;
+	int result = 0;
+
 	if (ifp == NULL || ioctl_code == 0)
-		return EINVAL;
-	
-	/* Attempt to increment the use count. If it's zero, bail out, the ifp is invalid */
-	result = ifp_use(ifp, kIfNetUseCount_MustNotBeZero);
-	if (result != 0)
-		return EOPNOTSUPP;
-	
-	dlil_read_begin();
-	holding_read = 1;
-	
+		return (EINVAL);
+
+	/* Get an io ref count if the interface is attached */
+	if (!ifnet_is_attached(ifp, 1))
+		return (EOPNOTSUPP);
+
 	/* Run the interface filters first.
 	 * We want to run all filters before calling the protocol,
 	 * interface family, or interface.
 	 */
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	/* prevent filter list from changing in case we drop the lock */
+	if_flt_monitor_busy(ifp);
 	TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
-		if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_fam)) &&
-			filter->filt_ioctl != NULL) {
-			result = filter->filt_ioctl(filter->filt_cookie, ifp, proto_fam, ioctl_code, ioctl_arg);
+		if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
+		    filter->filt_protocol == proto_fam)) {
+			lck_mtx_unlock(&ifp->if_flt_lock);
+
+			result = filter->filt_ioctl(filter->filt_cookie, ifp,
+			    proto_fam, ioctl_code, ioctl_arg);
+
+			lck_mtx_lock_spin(&ifp->if_flt_lock);
+
 			/* Only update retval if no one has handled the ioctl */
 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
 				if (result == ENOTSUP)
 					result = EOPNOTSUPP;
 				retval = result;
-				if (retval && retval != EOPNOTSUPP) {
+				if (retval != 0 && retval != EOPNOTSUPP) {
+					/* we're done with the filter list */
+					if_flt_monitor_unbusy(ifp);
+					lck_mtx_unlock(&ifp->if_flt_lock);
 					goto cleanup;
 				}
 			}
 		}
 	}
-	
+	/* we're done with the filter list */
+	if_flt_monitor_unbusy(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
 	/* Allow the protocol to handle the ioctl */
-	if (proto_fam) {
-		struct if_proto	*proto = find_attached_proto(ifp, proto_fam);
-		
-		if (proto != 0) {
-			proto_media_ioctl ioctlp = proto->proto_kpi == kProtoKPI_v1
-				 ? proto->kpi.v1.ioctl : proto->kpi.v2.ioctl;
+	if (proto_fam != 0) {
+		struct if_proto	*proto;
+
+		/* callee holds a proto refcnt upon success */
+		ifnet_lock_shared(ifp);
+		proto = find_attached_proto(ifp, proto_fam);
+		ifnet_lock_done(ifp);
+		if (proto != NULL) {
+			proto_media_ioctl ioctlp =
+			    (proto->proto_kpi == kProtoKPI_v1 ?
+			    proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
 			result = EOPNOTSUPP;
-			if (ioctlp)
-				result = ioctlp(ifp, proto_fam, ioctl_code, ioctl_arg);
-			
+			if (ioctlp != NULL)
+				result = ioctlp(ifp, proto_fam, ioctl_code,
+				    ioctl_arg);
+			if_proto_free(proto);
+
 			/* Only update retval if no one has handled the ioctl */
 			if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
 				if (result == ENOTSUP)
 					result = EOPNOTSUPP;
 				retval = result;
-				if (retval && retval != EOPNOTSUPP) {
+				if (retval && retval != EOPNOTSUPP)
 					goto cleanup;
-				}
 			}
 		}
 	}
-	
-	/*
-	 * Since we have incremented the use count on the ifp, we are guaranteed
-	 * that the ifp will not go away (the function pointers may not be changed).
-	 * We release the dlil read lock so the interface ioctl may trigger a
-	 * protocol attach. This happens with vlan and may occur with other virtual
-	 * interfaces.
-	 */
-	dlil_read_end();
-	holding_read = 0;
-	
+
 	/* retval is either 0 or EOPNOTSUPP */
-	
+
 	/*
 	 * Let the interface handle this ioctl.
 	 * If it returns EOPNOTSUPP, ignore that, we may have
 	 * already handled this in the protocol or family.
 	 */
-	if (ifp->if_ioctl) 
+	if (ifp->if_ioctl)
 		result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
-	
+
 	/* Only update retval if no one has handled the ioctl */
 	if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
 		if (result == ENOTSUP)
@@ -1827,60 +2134,59 @@ ifnet_ioctl(
 			goto cleanup;
 		}
 	}
-	
-cleanup:
-	if (holding_read)
-		dlil_read_end();
-	if (ifp_unuse(ifp))
-		ifp_use_reached_zero(ifp);
 
+cleanup:
 	if (retval == EJUSTRETURN)
 		retval = 0;
-	return retval;
+
+	ifnet_decr_iorefcnt(ifp);
+
+	return (retval);
 }
 
 __private_extern__ errno_t
-dlil_set_bpf_tap(
-	ifnet_t			ifp,
-	bpf_tap_mode	mode,
-	bpf_packet_func	callback)
+dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
 {
 	errno_t	error = 0;
-	
-	dlil_read_begin();
-	if (ifp->if_set_bpf_tap)
+
+
+	if (ifp->if_set_bpf_tap) {
+		/* Get an io reference on the interface if it is attached */
+		if (!ifnet_is_attached(ifp, 1))
+			return ENXIO;
 		error = ifp->if_set_bpf_tap(ifp, mode, callback);
-	dlil_read_end();
-	
-	return error;
+		ifnet_decr_iorefcnt(ifp);
+	}
+	return (error);
 }
 
 errno_t
-dlil_resolve_multi(
-	struct ifnet *ifp,
-	const struct sockaddr *proto_addr,
-	struct sockaddr *ll_addr,
-	size_t ll_len)
+dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
+    struct sockaddr *ll_addr, size_t ll_len)
 {
 	errno_t	result = EOPNOTSUPP;
 	struct if_proto *proto;
 	const struct sockaddr *verify;
 	proto_media_resolve_multi resolvep;
-	
-	dlil_read_begin();
-	
+
+	if (!ifnet_is_attached(ifp, 1))
+		return result;
+
 	bzero(ll_addr, ll_len);
-	
-	/* Call the protocol first */
+
+	/* Call the protocol first; callee holds a proto refcnt upon success */
+	ifnet_lock_shared(ifp);
 	proto = find_attached_proto(ifp, proto_addr->sa_family);
+	ifnet_lock_done(ifp);
 	if (proto != NULL) {
-		resolvep = proto->proto_kpi == kProtoKPI_v1
-			 ? proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi;
+		resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
+		    proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
 		if (resolvep != NULL)
-			result = resolvep(ifp, proto_addr,(struct sockaddr_dl*)ll_addr,
-							  ll_len);
+			result = resolvep(ifp, proto_addr,
+			    (struct sockaddr_dl*)ll_addr, ll_len);
+		if_proto_free(proto);
 	}
-	
+
 	/* Let the interface verify the multicast address */
 	if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
 		if (result == 0)
@@ -1889,73 +2195,63 @@ dlil_resolve_multi(
 			verify = proto_addr;
 		result = ifp->if_check_multi(ifp, verify);
 	}
-	
-	dlil_read_end();
-	
-	return result;
+
+	ifnet_decr_iorefcnt(ifp);
+	return (result);
 }
 
 __private_extern__ errno_t
-dlil_send_arp_internal(
-	ifnet_t	ifp,
-	u_short arpop,
-	const struct sockaddr_dl* sender_hw,
-	const struct sockaddr* sender_proto,
-	const struct sockaddr_dl* target_hw,
-	const struct sockaddr* target_proto)
+dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
+    const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto,
+    const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto)
 {
 	struct if_proto *proto;
 	errno_t	result = 0;
-	
-	dlil_read_begin();
-	
+
+	/* callee holds a proto refcnt upon success */
+	ifnet_lock_shared(ifp);
 	proto = find_attached_proto(ifp, target_proto->sa_family);
+	ifnet_lock_done(ifp);
 	if (proto == NULL) {
 		result = ENOTSUP;
-	}
-	else {
+	} else {
 		proto_media_send_arp	arpp;
-		arpp = proto->proto_kpi == kProtoKPI_v1
-			 ? proto->kpi.v1.send_arp : proto->kpi.v2.send_arp;
+		arpp = (proto->proto_kpi == kProtoKPI_v1 ?
+		    proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
 		if (arpp == NULL)
 			result = ENOTSUP;
 		else
-			result = arpp(ifp, arpop, sender_hw, sender_proto, target_hw,
-						  target_proto);
+			result = arpp(ifp, arpop, sender_hw, sender_proto,
+			    target_hw, target_proto);
+		if_proto_free(proto);
 	}
-	
-	dlil_read_end();
-	
-	return result;
+
+	return (result);
 }
 
 static __inline__ int
 _is_announcement(const struct sockaddr_in * sender_sin,
-		     const struct sockaddr_in * target_sin)
+    const struct sockaddr_in * target_sin)
 {
 	if (sender_sin == NULL) {
-		return FALSE;
+		return (FALSE);
 	}
 	return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
 }
 
 __private_extern__ errno_t
-dlil_send_arp(
-	ifnet_t	ifp,
-	u_short arpop,
-	const struct sockaddr_dl* sender_hw,
-	const struct sockaddr* sender_proto,
-	const struct sockaddr_dl* target_hw,
-	const struct sockaddr* target_proto)
+dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw,
+    const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw,
+    const struct sockaddr* target_proto)
 {
 	errno_t	result = 0;
 	const struct sockaddr_in * sender_sin;
 	const struct sockaddr_in * target_sin;
-	
-	if (target_proto == NULL || (sender_proto &&
-		sender_proto->sa_family != target_proto->sa_family))
-		return EINVAL;
-	
+
+	if (target_proto == NULL || (sender_proto != NULL &&
+	    sender_proto->sa_family != target_proto->sa_family))
+		return (EINVAL);
+
 	/*
 	 * If this is an ARP request and the target IP is IPv4LL,
 	 * send the request on all interfaces.  The exception is
@@ -1964,281 +2260,293 @@ dlil_send_arp(
 	 */
 	sender_sin = (const struct sockaddr_in *)sender_proto;
 	target_sin = (const struct sockaddr_in *)target_proto;
-	if (target_proto->sa_family == AF_INET
-	    && IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr))
-	    && ipv4_ll_arp_aware != 0
-	    && arpop == ARPOP_REQUEST
-	    && !_is_announcement(target_sin, sender_sin)) {
+	if (target_proto->sa_family == AF_INET &&
+	    IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
+	    ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
+	    !_is_announcement(target_sin, sender_sin)) {
 		ifnet_t		*ifp_list;
 		u_int32_t	count;
 		u_int32_t	ifp_on;
-		
+
 		result = ENOTSUP;
 
 		if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
 			for (ifp_on = 0; ifp_on < count; ifp_on++) {
-				errno_t				new_result;
-				ifaddr_t			source_hw = NULL;
-				ifaddr_t			source_ip = NULL;
-				struct sockaddr_in	source_ip_copy;
-				
+				errno_t new_result;
+				ifaddr_t source_hw = NULL;
+				ifaddr_t source_ip = NULL;
+				struct sockaddr_in source_ip_copy;
+				struct ifnet *cur_ifp = ifp_list[ifp_on];
+
 				/*
-				 * Only arp on interfaces marked for IPv4LL ARPing. This may
-				 * mean that we don't ARP on the interface the subnet route
-				 * points to.
+				 * Only arp on interfaces marked for IPv4LL
+				 * ARPing.  This may mean that we don't ARP on
+				 * the interface the subnet route points to.
 				 */
-				if ((ifp_list[ifp_on]->if_eflags & IFEF_ARPLL) == 0) {
+				if (!(cur_ifp->if_eflags & IFEF_ARPLL))
 					continue;
-				}
 
 				/* Find the source IP address */
-				ifnet_lock_shared(ifp_list[ifp_on]);
-				source_hw = TAILQ_FIRST(&ifp_list[ifp_on]->if_addrhead);
-				TAILQ_FOREACH(source_ip, &ifp_list[ifp_on]->if_addrhead,
-							  ifa_link) {
-					if (source_ip->ifa_addr &&
-						source_ip->ifa_addr->sa_family == AF_INET) {
+				ifnet_lock_shared(cur_ifp);
+				source_hw = cur_ifp->if_lladdr;
+				TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
+				    ifa_link) {
+					IFA_LOCK(source_ip);
+					if (source_ip->ifa_addr != NULL &&
+					    source_ip->ifa_addr->sa_family ==
+					    AF_INET) {
+						/* Copy the source IP address */
+						source_ip_copy =
+						    *(struct sockaddr_in *)
+						    source_ip->ifa_addr;
+						IFA_UNLOCK(source_ip);
 						break;
 					}
+					IFA_UNLOCK(source_ip);
 				}
-				
+
 				/* No IP Source, don't arp */
 				if (source_ip == NULL) {
-					ifnet_lock_done(ifp_list[ifp_on]);
+					ifnet_lock_done(cur_ifp);
 					continue;
 				}
-				
-				/* Copy the source IP address */
-				source_ip_copy = *(struct sockaddr_in*)source_ip->ifa_addr;
-				ifaref(source_hw);
-				ifnet_lock_done(ifp_list[ifp_on]);
-				
+
+				IFA_ADDREF(source_hw);
+				ifnet_lock_done(cur_ifp);
+
 				/* Send the ARP */
-				new_result = dlil_send_arp_internal(ifp_list[ifp_on], arpop,
-									(struct sockaddr_dl*)source_hw->ifa_addr,
-									(struct sockaddr*)&source_ip_copy, NULL,
-									target_proto);
+				new_result = dlil_send_arp_internal(cur_ifp,
+				    arpop,
+				    (struct sockaddr_dl *)source_hw->ifa_addr,
+				    (struct sockaddr *)&source_ip_copy, NULL,
+				    target_proto);
 
-				ifafree(source_hw);
+				IFA_REMREF(source_hw);
 				if (result == ENOTSUP) {
 					result = new_result;
 				}
 			}
+			ifnet_list_free(ifp_list);
 		}
-		
-		ifnet_list_free(ifp_list);
-	}
-	else {
-		result = dlil_send_arp_internal(ifp, arpop, sender_hw, sender_proto,
-										target_hw, target_proto);
+	} else {
+		result = dlil_send_arp_internal(ifp, arpop, sender_hw,
+		    sender_proto, target_hw, target_proto);
 	}
-	
-	return result;
+
+	return (result);
 }
 
-__private_extern__ int
-ifp_use(
-	struct ifnet *ifp,
-	int	handle_zero)
+/*
+ * Caller must hold ifnet head lock.
+ */
+static int
+ifnet_lookup(struct ifnet *ifp)
 {
-	int old_value;
-	int retval = 0;
-	
-	do {
-		old_value = ifp->if_usecnt;
-		if (old_value == 0 && handle_zero == kIfNetUseCount_MustNotBeZero) {
-			retval = ENXIO; // ifp is invalid
+	struct ifnet *_ifp;
+
+	lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
+	TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
+		if (_ifp == ifp)
 			break;
-		}
-	} while (!OSCompareAndSwap((UInt32)old_value, (UInt32)old_value + 1, (UInt32*)&ifp->if_usecnt));
- 
-	return retval;
+	}
+	return (_ifp != NULL);
 }
-
-/* ifp_unuse is broken into two pieces.
- *
- * ifp_use and ifp_unuse must be called between when the caller calls
- * dlil_write_begin and dlil_write_end. ifp_unuse needs to perform some
- * operations after dlil_write_end has been called. For this reason,
- * anyone calling ifp_unuse must call ifp_use_reached_zero if ifp_unuse
- * returns a non-zero value. The caller must call ifp_use_reached_zero
- * after the caller has called dlil_write_end.
+/*
+ * Caller has to pass a non-zero refio argument to get a
+ * IO reference count. This will prevent ifnet_detach from
+ * being called when there are outstanding io reference counts. 
  */
-__private_extern__ void
-ifp_use_reached_zero(
-	struct ifnet *ifp)
-{
-	ifnet_detached_func	free_func;
-	
-	dlil_read_begin();
-	
-	if (ifp->if_usecnt != 0)
-		panic("ifp_use_reached_zero: ifp->if_usecnt != 0");
-	
-	ifnet_head_lock_exclusive();
-	ifnet_lock_exclusive(ifp);
-	
-	/* Remove ourselves from the list */
-	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
-	ifnet_addrs[ifp->if_index - 1] = NULL;
-	
-	/* ifp should be removed from the interface list */
-	while (ifp->if_multiaddrs.lh_first) {
-		struct ifmultiaddr *ifma = ifp->if_multiaddrs.lh_first;
-		
-		/*
-		 * When the interface is gone, we will no longer
-		 * be listening on these multicasts. Various bits
-		 * of the stack may be referencing these multicasts,
-		 * release only our reference.
+int
+ifnet_is_attached(struct ifnet *ifp, int refio)
+{
+	int ret;
+
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) ==
+	    IFRF_ATTACHED))) {
+		if (refio > 0)
+			ifp->if_refio++;
+	}
+	lck_mtx_unlock(&ifp->if_ref_lock);
+
+	return (ret);
+}
+
+void
+ifnet_decr_iorefcnt(struct ifnet *ifp)
+{
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	VERIFY(ifp->if_refio > 0);
+	VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0);
+	ifp->if_refio--;
+
+	/* if there are no more outstanding io references, wakeup the 
+	 * ifnet_detach thread if detaching flag is set.
+	 */
+	if (ifp->if_refio == 0 && 
+		(ifp->if_refflags & IFRF_DETACHING) != 0) {
+		/* Convert the spinlock to a regular mutex if we have
+		 * to wait for any reason while doing a wakeup.
 		 */
-		LIST_REMOVE(ifma, ifma_link);
-		ifma->ifma_ifp = NULL;
-		ifma_release(ifma);
+		lck_mtx_convert_spin(&ifp->if_ref_lock);
+		wakeup(&(ifp->if_refio));
 	}
+	lck_mtx_unlock(&ifp->if_ref_lock);
+}
 
-	ifp->if_eflags &= ~IFEF_DETACHING; // clear the detaching flag
-	ifnet_lock_done(ifp);
-	ifnet_head_done();
+static void
+dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
+{
+	struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
 
-	free_func = ifp->if_free;
-	dlil_read_end();
-	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
-	
-	if (free_func)
-		free_func(ifp);
+	if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
+		panic("%s: dl_if %p has no debug structure", __func__, dl_if);
+		/* NOTREACHED */
+	}
+
+	if (refhold) {
+		cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
+		tr = dl_if_dbg->dldbg_if_refhold;
+	} else {
+		cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
+		tr = dl_if_dbg->dldbg_if_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
 }
 
-__private_extern__ int
-ifp_unuse(
-	struct ifnet *ifp)
-{
-	int	oldval;
-	oldval = OSDecrementAtomic(&ifp->if_usecnt);
-	if (oldval == 0)
-		panic("ifp_unuse: ifp(%s%d)->if_usecnt was zero\n", ifp->if_name, ifp->if_unit);
- 	
-	if (oldval > 1)
-		return 0;
- 	
-	if ((ifp->if_eflags & IFEF_DETACHING) == 0)
-		panic("ifp_unuse: use count reached zero but detching flag is not set!");
- 	
- 	return 1; /* caller must call ifp_use_reached_zero */
+errno_t
+dlil_if_ref(struct ifnet *ifp)
+{
+	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+	if (dl_if == NULL)
+		return (EINVAL);
+
+	lck_mtx_lock_spin(&dl_if->dl_if_lock);
+	++dl_if->dl_if_refcnt;
+	if (dl_if->dl_if_refcnt == 0) {
+		panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
+		/* NOTREACHED */
+	}
+	if (dl_if->dl_if_trace != NULL)
+		(*dl_if->dl_if_trace)(dl_if, TRUE);
+	lck_mtx_unlock(&dl_if->dl_if_lock);
+
+	return (0);
 }
 
-extern lck_mtx_t 	*domain_proto_mtx;
+errno_t
+dlil_if_free(struct ifnet *ifp)
+{
+	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+	if (dl_if == NULL)
+		return (EINVAL);
+
+	lck_mtx_lock_spin(&dl_if->dl_if_lock);
+	if (dl_if->dl_if_refcnt == 0) {
+		panic("%s: negative refcnt for ifp=%p", __func__, ifp);
+		/* NOTREACHED */
+	}
+	--dl_if->dl_if_refcnt;
+	if (dl_if->dl_if_trace != NULL)
+		(*dl_if->dl_if_trace)(dl_if, FALSE);
+	lck_mtx_unlock(&dl_if->dl_if_lock);
+
+	return (0);
+}
 
 static errno_t
-dlil_attach_protocol_internal(
-	struct if_proto	*proto,
-	const struct ifnet_demux_desc *demux_list,
-	u_int32_t	demux_count)
+dlil_attach_protocol_internal(struct if_proto *proto,
+    const struct ifnet_demux_desc *demux_list, u_int32_t demux_count)
 {
-	struct kev_dl_proto_data	ev_pr_data;
+	struct kev_dl_proto_data ev_pr_data;
 	struct ifnet *ifp = proto->ifp;
 	int retval = 0;
 	u_int32_t hash_value = proto_hash_value(proto->protocol_family);
-    
-    /* setup some of the common values */
-	{
-		struct domain *dp;
-		lck_mtx_lock(domain_proto_mtx);
-		dp = domains;
-		while (dp && (protocol_family_t)dp->dom_family != proto->protocol_family)
-			dp = dp->dom_next;
-		proto->dl_domain = dp;
-		lck_mtx_unlock(domain_proto_mtx);
-	}
-	
-	/*
-	 * Take the write lock to protect readers and exclude other writers.
-	 */
-	if ((retval = dlil_write_begin()) != 0) {
-		printf("dlil_attach_protocol_internal - dlil_write_begin returned %d\n", retval);
-		return retval;
-	}
-	
-	/* Check that the interface isn't currently detaching */
-	ifnet_lock_shared(ifp);
-	if ((ifp->if_eflags & IFEF_DETACHING) != 0) {
+	struct if_proto *prev_proto;
+	struct if_proto *_proto;
+
+	/* callee holds a proto refcnt upon success */
+	ifnet_lock_exclusive(ifp);
+	_proto = find_attached_proto(ifp, proto->protocol_family);
+	if (_proto != NULL) {
 		ifnet_lock_done(ifp);
-		dlil_write_end();
-		return ENXIO;
+		if_proto_free(_proto);
+		return (EEXIST);
 	}
-	ifnet_lock_done(ifp);
-	
-	if (find_attached_proto(ifp, proto->protocol_family) != NULL) {
-		dlil_write_end();
-		return EEXIST;
-	}
-	
+
 	/*
 	 * Call family module add_proto routine so it can refine the
 	 * demux descriptors as it wishes.
 	 */
-	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list, demux_count);
+	retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
+	    demux_count);
 	if (retval) {
-		dlil_write_end();
-		return retval;
+		ifnet_lock_done(ifp);
+		return (retval);
 	}
-	
-	/*
-	 * We can't fail from this point on.
-	 * Increment the number of uses (protocol attachments + interface attached).
-	 */
-	ifp_use(ifp, kIfNetUseCount_MustNotBeZero);
-	
+
 	/*
 	 * Insert the protocol in the hash
 	 */
-	{
-		struct if_proto*	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
-		while (prev_proto && SLIST_NEXT(prev_proto, next_hash) != NULL)
-			prev_proto = SLIST_NEXT(prev_proto, next_hash);
-		if (prev_proto)
-			SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
-		else
-			SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], proto, next_hash);
-	}
+	prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
+	while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
+		prev_proto = SLIST_NEXT(prev_proto, next_hash);
+	if (prev_proto)
+		SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
+	else
+		SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
+		    proto, next_hash);
+
+	/* hold a proto refcnt for attach */
+	if_proto_ref(proto);
 
 	/*
-	 * Add to if_proto list for this interface
+	 * The reserved field carries the number of protocol still attached
+	 * (subject to change)
 	 */
-	if_proto_ref(proto);
-	dlil_write_end();
-	
-	/* the reserved field carries the number of protocol still attached (subject to change) */
 	ev_pr_data.proto_family = proto->protocol_family;
 	ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
-	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, 
-				  (struct net_event_data *)&ev_pr_data, 
-				  sizeof(struct kev_dl_proto_data));
-#if 0	
-	DLIL_PRINTF("dlil. Attached protocol %d to %s%d - %d\n", proto->protocol_family,
-			 ifp->if_name, ifp->if_unit, retval);
-#endif
-	return retval;
+	ifnet_lock_done(ifp);
+
+	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
+	    (struct net_event_data *)&ev_pr_data,
+	    sizeof (struct kev_dl_proto_data));
+	return (retval);
 }
 
 errno_t
 ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
-	const struct ifnet_attach_proto_param *proto_details)
+    const struct ifnet_attach_proto_param *proto_details)
 {
 	int retval = 0;
 	struct if_proto  *ifproto = NULL;
-	
-	if (ifp == NULL || protocol == 0 || proto_details == NULL)
-		return EINVAL;
-	
-	ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK);
-	if (ifproto == 0) {
-		DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
+
+	ifnet_head_lock_shared();
+	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+		retval = EINVAL;
+		goto end;
+	}
+	/* Check that the interface is in the global list */
+	if (!ifnet_lookup(ifp)) {
+		retval = ENXIO;
+		goto end;
+	}
+
+	ifproto = zalloc(dlif_proto_zone);
+	if (ifproto == NULL) {
 		retval = ENOMEM;
 		goto end;
 	}
-	bzero(ifproto, sizeof(*ifproto));
-	
+	bzero(ifproto, dlif_proto_size);
+
+	/* refcnt held above during lookup */
 	ifproto->ifp = ifp;
 	ifproto->protocol_family = protocol;
 	ifproto->proto_kpi = kProtoKPI_v1;
@@ -2249,34 +2557,52 @@ ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
 	ifproto->kpi.v1.detached = proto_details->detached;
 	ifproto->kpi.v1.resolve_multi = proto_details->resolve;
 	ifproto->kpi.v1.send_arp = proto_details->send_arp;
-	
+
 	retval = dlil_attach_protocol_internal(ifproto,
-				proto_details->demux_list, proto_details->demux_count);
-	
+	    proto_details->demux_list, proto_details->demux_count);
+
+	if (dlil_verbose) {
+		printf("%s%d: attached v1 protocol %d\n", ifp->if_name,
+		    ifp->if_unit, protocol);
+	}
+
 end:
-	if (retval && ifproto)
-		FREE(ifproto, M_IFADDR);
-	return retval;
+	if (retval != 0 && retval != EEXIST && ifp != NULL) {
+		DLIL_PRINTF("%s%d: failed to attach v1 protocol %d (err=%d)\n",
+		    ifp->if_name, ifp->if_unit, protocol, retval);
+	}
+	ifnet_head_done();
+	if (retval != 0  && ifproto != NULL)
+		zfree(dlif_proto_zone, ifproto);
+	return (retval);
 }
 
 errno_t
 ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
-	const struct ifnet_attach_proto_param_v2 *proto_details)
+    const struct ifnet_attach_proto_param_v2 *proto_details)
 {
 	int retval = 0;
 	struct if_proto  *ifproto = NULL;
-	
-	if (ifp == NULL || protocol == 0 || proto_details == NULL)
-		return EINVAL;
-	
-	ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK);
-	if (ifproto == 0) {
-		DLIL_PRINTF("ERROR - dlil failed if_proto allocation\n");
+
+	ifnet_head_lock_shared();
+	if (ifp == NULL || protocol == 0 || proto_details == NULL) {
+		retval = EINVAL;
+		goto end;
+	}
+	/* Check that the interface is in the global list */
+	if (!ifnet_lookup(ifp)) {
+		retval = ENXIO;
+		goto end;
+	}
+
+	ifproto = zalloc(dlif_proto_zone);
+	if (ifproto == NULL) {
 		retval = ENOMEM;
 		goto end;
 	}
 	bzero(ifproto, sizeof(*ifproto));
-	
+
+	/* refcnt held above during lookup */
 	ifproto->ifp = ifp;
 	ifproto->protocol_family = protocol;
 	ifproto->proto_kpi = kProtoKPI_v2;
@@ -2287,49 +2613,24 @@ ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
 	ifproto->kpi.v2.detached = proto_details->detached;
 	ifproto->kpi.v2.resolve_multi = proto_details->resolve;
 	ifproto->kpi.v2.send_arp = proto_details->send_arp;
-	
-	retval = dlil_attach_protocol_internal(ifproto,
-				proto_details->demux_list, proto_details->demux_count);
-	
-end:
-	if (retval && ifproto)
-		FREE(ifproto, M_IFADDR);
-	return retval;
-}
 
-extern void if_rtproto_del(struct ifnet *ifp, int protocol);
+	retval = dlil_attach_protocol_internal(ifproto,
+	    proto_details->demux_list, proto_details->demux_count);
 
-static int
-dlil_detach_protocol_internal(
-	struct if_proto *proto)
-{
-	struct ifnet *ifp = proto->ifp;
-	u_int32_t proto_family = proto->protocol_family;
-	struct kev_dl_proto_data	ev_pr_data;
-	
-	if (proto->proto_kpi == kProtoKPI_v1) {
-		if (proto->kpi.v1.detached)
-			proto->kpi.v1.detached(ifp, proto->protocol_family);
+	if (dlil_verbose) {
+		printf("%s%d: attached v2 protocol %d\n", ifp->if_name,
+		    ifp->if_unit, protocol);
 	}
-	if (proto->proto_kpi == kProtoKPI_v2) {
-		if (proto->kpi.v2.detached)
-			proto->kpi.v2.detached(ifp, proto->protocol_family);
+
+end:
+	if (retval != 0 && retval != EEXIST && ifp != NULL) {
+		DLIL_PRINTF("%s%d: failed to attach v2 protocol %d (err=%d)\n",
+		    ifp->if_name, ifp->if_unit, protocol, retval);
 	}
-	if_proto_free(proto);
-    
-	/*
-	 * Cleanup routes that may still be in the routing table for that interface/protocol pair.
-	 */
-	
-	if_rtproto_del(ifp, proto_family);
-	
-	/* the reserved field carries the number of protocol still attached (subject to change) */
-	ev_pr_data.proto_family   = proto_family;
-	ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp);
-	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, 
-				  (struct net_event_data *)&ev_pr_data, 
-				  sizeof(struct kev_dl_proto_data));
-	return 0;
+	ifnet_head_done();
+	if (retval != 0 && ifproto != NULL)
+		zfree(dlif_proto_zone, ifproto);
+	return (retval);
 }
 
 errno_t
@@ -2337,260 +2638,169 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
 {
 	struct if_proto *proto = NULL;
 	int	retval = 0;
-	int use_reached_zero = 0;
-	
-	if (ifp == NULL || proto_family == 0) return EINVAL;
-
-	if ((retval = dlil_write_begin()) != 0) {
-		if (retval == EDEADLK) {
-			retval = 0;
-			dlil_read_begin();
-			proto = find_attached_proto(ifp, proto_family);
-			if (proto == 0) {
-				retval = ENXIO;
-			}
-			else {
-				proto->detaching = 1;
-				dlil_detach_waiting = 1;
-				wakeup(&dlil_detach_waiting);
-			}
-			dlil_read_end();
-		}
+
+	if (ifp == NULL || proto_family == 0) {
+		retval = EINVAL;
 		goto end;
 	}
-	
+
+	ifnet_lock_exclusive(ifp);
+	/* callee holds a proto refcnt upon success */
 	proto = find_attached_proto(ifp, proto_family);
-	
 	if (proto == NULL) {
 		retval = ENXIO;
-		dlil_write_end();
+		ifnet_lock_done(ifp);
 		goto end;
 	}
-	
-	/*
-	 * Call family module del_proto
-	 */
-	
+
+	/* call family module del_proto */
 	if (ifp->if_del_proto)
 		ifp->if_del_proto(ifp, proto->protocol_family);
 
-	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], proto, if_proto, next_hash);
-	
-	/*
-	 * We can do the rest of the work outside of the write lock.
-	 */
-	use_reached_zero = ifp_unuse(ifp);
-	dlil_write_end();
-	
-	dlil_detach_protocol_internal(proto);
+	SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
+	    proto, if_proto, next_hash);
+
+	if (proto->proto_kpi == kProtoKPI_v1) {
+		proto->kpi.v1.input = ifproto_media_input_v1;
+		proto->kpi.v1.pre_output= ifproto_media_preout;
+		proto->kpi.v1.event = ifproto_media_event;
+		proto->kpi.v1.ioctl = ifproto_media_ioctl;
+		proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
+		proto->kpi.v1.send_arp = ifproto_media_send_arp;
+	} else {
+		proto->kpi.v2.input = ifproto_media_input_v2;
+		proto->kpi.v2.pre_output = ifproto_media_preout;
+		proto->kpi.v2.event = ifproto_media_event;
+		proto->kpi.v2.ioctl = ifproto_media_ioctl;
+		proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
+		proto->kpi.v2.send_arp = ifproto_media_send_arp;
+	}
+	proto->detached = 1;
+	ifnet_lock_done(ifp);
+
+	if (dlil_verbose) {
+		printf("%s%d: detached %s protocol %d\n", ifp->if_name,
+		    ifp->if_unit, (proto->proto_kpi == kProtoKPI_v1) ?
+		    "v1" : "v2", proto_family);
+	}
+
+	/* release proto refcnt held during protocol attach */
+	if_proto_free(proto);
 
 	/*
-	 * Only handle the case where the interface will go away after
-	 * we've sent the message. This way post message can send the
-	 * message to the interface safely.
+	 * Release proto refcnt held during lookup; the rest of
+	 * protocol detach steps will happen when the last proto
+	 * reference is released.
 	 */
-	
-	if (use_reached_zero)
-		ifp_use_reached_zero(ifp);
-	
+	if_proto_free(proto);
+
 end:
-    return retval;
+	return (retval);
 }
 
-/*
- * dlil_delayed_detach_thread is responsible for detaching
- * protocols, protocol filters, and interface filters after
- * an attempt was made to detach one of those items while
- * it was not safe to do so (i.e. called dlil_read_begin).
- *
- * This function will take the dlil write lock and walk
- * through each of the interfaces looking for items with
- * the detaching flag set. When an item is found, it is
- * detached from the interface and placed on a local list.
- * After all of the items have been collected, we drop the
- * write lock and performed the post detach. This is done
- * so we only have to take the write lock once.
- *
- * When detaching a protocol filter, if we find that we
- * have detached the very last protocol and we need to call
- * ifp_use_reached_zero, we have to break out of our work
- * to drop the write lock so we can call ifp_use_reached_zero.
- */
- 
-static void
-dlil_delayed_detach_thread(__unused void* foo, __unused wait_result_t wait)
+
+static errno_t
+ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
+    struct mbuf *packet, char *header)
 {
-	thread_t self = current_thread();
-	int asserted = 0;
-	
-	ml_thread_policy(self, MACHINE_GROUP,
-					 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
+#pragma unused(ifp, protocol, packet, header)
+	return (ENXIO);
+}
+
+static errno_t
+ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
+    struct mbuf *packet)
+{
+#pragma unused(ifp, protocol, packet)
+	return (ENXIO);
+
+}
+
+static errno_t
+ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
+    mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
+    char *link_layer_dest)
+{
+#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
+	return (ENXIO);
 
-	
-	while (1) {
-		if (dlil_detach_waiting != 0 && dlil_write_begin() == 0) {
-			struct ifnet *ifp;
-			struct proto_hash_entry detached_protos;
-			struct ifnet_filter_head detached_filters;
-			struct if_proto	*proto;
-			struct if_proto *next_proto;
-			struct ifnet_filter *filt;
-			struct ifnet_filter *next_filt;
-			int reached_zero;
-			
-			reached_zero = 0;
-			
-			/* Clear the detach waiting flag */
-			dlil_detach_waiting = 0;
-			TAILQ_INIT(&detached_filters);
-			SLIST_INIT(&detached_protos);
-			
-			ifnet_head_lock_shared();
-			TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-				int i;
-				
-				// Look for protocols and protocol filters
-				for (i = 0; i < PROTO_HASH_SLOTS && !reached_zero; i++) {
-					struct if_proto **prev_nextptr = &SLIST_FIRST(&ifp->if_proto_hash[i]);
-					for (proto = *prev_nextptr; proto; proto = *prev_nextptr) {
-						
-						// Detach this protocol
-						if (proto->detaching) {
-							if (ifp->if_del_proto)
-								ifp->if_del_proto(ifp, proto->protocol_family);
-							*prev_nextptr = SLIST_NEXT(proto, next_hash);
-							SLIST_INSERT_HEAD(&detached_protos, proto, next_hash);
-							reached_zero = ifp_unuse(ifp);
-							if (reached_zero) {
-								break;
-							}
-						}
-						else {
-							// Update prev_nextptr to point to our next ptr
-							prev_nextptr = &SLIST_NEXT(proto, next_hash);
-						}
-					}
-				}
-				
-				// look for interface filters that need to be detached
-				for (filt = TAILQ_FIRST(&ifp->if_flt_head); filt; filt = next_filt) {
-					next_filt = TAILQ_NEXT(filt, filt_next);
-					if (filt->filt_detaching != 0) {
-						// take this interface filter off the interface filter list
-						TAILQ_REMOVE(&ifp->if_flt_head, filt, filt_next);
-						
-						// put this interface filter on the detached filters list
-						TAILQ_INSERT_TAIL(&detached_filters, filt, filt_next);
-					}
-				}
-				
-				if (ifp->if_delayed_detach) {
-					ifp->if_delayed_detach = 0;
-					reached_zero = ifp_unuse(ifp);
-				}
-				
-				if (reached_zero)
-					break;
-			}
-			ifnet_head_done();
-			dlil_write_end();
-			
-			for (filt = TAILQ_FIRST(&detached_filters); filt; filt = next_filt) {
-				next_filt = TAILQ_NEXT(filt, filt_next);
-				/*
-				 * dlil_detach_filter_internal won't remove an item from
-				 * the list if it is already detached (second parameter).
-				 * The item will be freed though.
-				 */
-				dlil_detach_filter_internal(filt, 1);
-			}
-			
-			for (proto = SLIST_FIRST(&detached_protos); proto; proto = next_proto) {
-				next_proto = SLIST_NEXT(proto, next_hash);
-				dlil_detach_protocol_internal(proto);
-			}
-			
-			if (reached_zero) {
-				ifp_use_reached_zero(ifp);
-				dlil_detach_waiting = 1; // we may have missed something
-			}
-		}
-		
-		if (!asserted && dlil_detach_waiting == 0) {
-			asserted = 1;
-			assert_wait(&dlil_detach_waiting, THREAD_UNINT);
-		}
-		
-		if (dlil_detach_waiting == 0) {
-			asserted = 0;
-			thread_block(dlil_delayed_detach_thread);
-		}
-	}
 }
 
 static void
-dlil_call_delayed_detach_thread(void) {
-	dlil_delayed_detach_thread(NULL, THREAD_RESTART);
+ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
+    const struct kev_msg *event)
+{
+#pragma unused(ifp, protocol, event)
+}
+
+static errno_t
+ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
+    unsigned long command, void *argument)
+{
+#pragma unused(ifp, protocol, command, argument)
+	return (ENXIO);
+}
+
+static errno_t
+ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
+    struct sockaddr_dl *out_ll, size_t ll_len)
+{
+#pragma unused(ifp, proto_addr, out_ll, ll_len)
+	return (ENXIO);
+}
+
+static errno_t
+ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
+    const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
+    const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
+{
+#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
+	return (ENXIO);
 }
 
 extern int if_next_index(void);
 
 errno_t
-ifnet_attach(
-	ifnet_t						ifp,
-	const struct sockaddr_dl	*ll_addr)
+ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
 {
-	u_int32_t		    interface_family;
 	struct ifnet *tmp_if;
-	struct proto_hash_entry *new_proto_list = NULL;
-	int locked = 0;
-	
-	if (ifp == NULL) return EINVAL;
-	if (ll_addr && ifp->if_addrlen == 0) {
-		ifp->if_addrlen = ll_addr->sdl_alen;
-	}
-	else if (ll_addr && ll_addr->sdl_alen != ifp->if_addrlen) {
-		return EINVAL;
-	}
-	
-	interface_family = ifp->if_family;
-	
-	ifnet_head_lock_shared();
+	struct ifaddr *ifa;
+	struct if_data_internal if_data_saved;
+	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
 
+	if (ifp == NULL)
+		return (EINVAL);
+
+	ifnet_head_lock_exclusive();
 	/* Verify we aren't already on the list */
 	TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
 		if (tmp_if == ifp) {
 			ifnet_head_done();
-			return EEXIST;
+			return (EEXIST);
 		}
 	}
-	
-	ifnet_head_done();
-	
-	if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_lock == 0)
-#if IFNET_RW_LOCK
-		ifp->if_lock = lck_rw_alloc_init(ifnet_lock_group, ifnet_lock_attr);
-#else
-		ifp->if_lock = lck_mtx_alloc_init(ifnet_lock_group, ifnet_lock_attr);
-#endif
 
-	if (ifp->if_lock == 0) {
-		return ENOMEM;
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	if (ifp->if_refflags & IFRF_ATTACHED) {
+		panic("%s: flags mismatch (attached set) ifp=%p",
+		    __func__, ifp);
+		/* NOTREACHED */
 	}
+	lck_mtx_unlock(&ifp->if_ref_lock);
 
-	if (!(ifp->if_eflags & IFEF_REUSE) || ifp->if_fwd_route_lock == NULL) {
-		if (ifp->if_fwd_route_lock == NULL)
-			ifp->if_fwd_route_lock = lck_mtx_alloc_init(
-			    ifnet_lock_group, ifnet_lock_attr);
+	ifnet_lock_exclusive(ifp);
 
-		if (ifp->if_fwd_route_lock == NULL) {
-#if IFNET_RW_LOCK
-			lck_rw_free(ifp->if_lock, ifnet_lock_group);
-#else
-			lck_mtx_free(ifp->if_lock, ifnet_lock_group);
-#endif
-			ifp->if_lock = NULL;
-			return (ENOMEM);
+	/* Sanity check */
+	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+
+	if (ll_addr != NULL) {
+		if (ifp->if_addrlen == 0) {
+			ifp->if_addrlen = ll_addr->sdl_alen;
+		} else if (ll_addr->sdl_alen != ifp->if_addrlen) {
+			ifnet_lock_done(ifp);
+			ifnet_head_done();
+			return (EINVAL);
 		}
 	}
 
@@ -2598,251 +2808,606 @@ ifnet_attach(
 	 * Allow interfaces without protocol families to attach
 	 * only if they have the necessary fields filled out.
 	 */
-	
-	if (ifp->if_add_proto == 0 || ifp->if_del_proto == 0) {
-		DLIL_PRINTF("dlil Attempt to attach interface without family module - %d\n", 
-				interface_family);
-		return ENODEV;
-	}
-	
-	if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_proto_hash == NULL) {
-		MALLOC(new_proto_list, struct proto_hash_entry*, sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS,
-			   M_NKE, M_WAITOK);
-
-		if (new_proto_list == 0) {
-			return ENOBUFS;
-		}
+	if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
+		DLIL_PRINTF("%s: Attempt to attach interface without "
+		    "family module - %d\n", __func__, ifp->if_family);
+		ifnet_lock_done(ifp);
+		ifnet_head_done();
+		return (ENODEV);
 	}
 
-	dlil_write_begin();
-	locked = 1;
+	/* Allocate protocol hash table */
+	VERIFY(ifp->if_proto_hash == NULL);
+	ifp->if_proto_hash = zalloc(dlif_phash_zone);
+	if (ifp->if_proto_hash == NULL) {
+		ifnet_lock_done(ifp);
+		ifnet_head_done();
+		return (ENOBUFS);
+	}
+	bzero(ifp->if_proto_hash, dlif_phash_size);
 
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
 	TAILQ_INIT(&ifp->if_flt_head);
-	
-		
-	if (new_proto_list) {
-		bzero(new_proto_list, (PROTO_HASH_SLOTS * sizeof(struct proto_hash_entry)));
-		ifp->if_proto_hash = new_proto_list;
-		new_proto_list = NULL;
-	}
-	
-	/* old_if_attach */
-	{
-		char workbuf[64];
-		int namelen, masklen, socksize, ifasize;
-		struct ifaddr *ifa = NULL;
-		
-		if (ifp->if_snd.ifq_maxlen == 0)
-			ifp->if_snd.ifq_maxlen = ifqmaxlen;
-		TAILQ_INIT(&ifp->if_prefixhead);
+	VERIFY(ifp->if_flt_busy == 0);
+	VERIFY(ifp->if_flt_waiters == 0);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
+	VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+	TAILQ_INIT(&ifp->if_prefixhead);
+
+	if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
+		VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
 		LIST_INIT(&ifp->if_multiaddrs);
-		ifnet_touch_lastchange(ifp);
-		
-		/* usecount to track attachment to the ifnet list */
-		ifp_use(ifp, kIfNetUseCount_MayBeZero);
-		
-		/* Lock the list of interfaces */
-		ifnet_head_lock_exclusive();
-		ifnet_lock_exclusive(ifp);
-		
-		if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) {
-			int idx = if_next_index();
-            
-            if (idx == -1) {
-                ifnet_lock_done(ifp);
-                ifnet_head_done();
-                ifp_unuse(ifp);
-                dlil_write_end();
-                
-                return ENOBUFS;
-            }
-			ifp->if_index = idx;
-		} else {
-			ifa = TAILQ_FIRST(&ifp->if_addrhead);
-		}
-		namelen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit);
-#define _offsetof(t, m) ((uintptr_t)((caddr_t)&((t *)0)->m))
-		masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
-		socksize = masklen + ifp->if_addrlen;
-#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(u_int32_t) - 1)))
-		if ((u_int32_t)socksize < sizeof(struct sockaddr_dl))
-			socksize = sizeof(struct sockaddr_dl);
-		socksize = ROUNDUP(socksize);
-		ifasize = sizeof(struct ifaddr) + 2 * socksize;
-		
-		/*
-		 * Allocate a new ifa if we don't have one
-		 * or the old one is too small.
-		 */
-		if (ifa == NULL || socksize > ifa->ifa_addr->sa_len) {
-			if (ifa)
-				if_detach_ifa(ifp, ifa);
-			ifa = (struct ifaddr*)_MALLOC(ifasize, M_IFADDR, M_WAITOK);
-		}
-		
-		if (ifa) {
-			struct sockaddr_dl *sdl = (struct sockaddr_dl *)(ifa + 1);
-			ifnet_addrs[ifp->if_index - 1] = ifa;
-			bzero(ifa, ifasize);
-			ifa->ifa_debug |= IFD_ALLOC;
-			sdl->sdl_len = socksize;
-			sdl->sdl_family = AF_LINK;
-			bcopy(workbuf, sdl->sdl_data, namelen);
-			sdl->sdl_nlen = namelen;
-			sdl->sdl_index = ifp->if_index;
-			sdl->sdl_type = ifp->if_type;
-			if (ll_addr) {
-				sdl->sdl_alen = ll_addr->sdl_alen;
-				if (ll_addr->sdl_alen != ifp->if_addrlen)
-					panic("ifnet_attach - ll_addr->sdl_alen != ifp->if_addrlen");
-				bcopy(CONST_LLADDR(ll_addr), LLADDR(sdl), sdl->sdl_alen);
-			}
-			ifa->ifa_ifp = ifp;
-			ifa->ifa_rtrequest = link_rtrequest;
-			ifa->ifa_addr = (struct sockaddr*)sdl;
-			sdl = (struct sockaddr_dl*)(socksize + (caddr_t)sdl);
-			ifa->ifa_netmask = (struct sockaddr*)sdl;
-			sdl->sdl_len = masklen;
-			while (namelen != 0)
-				sdl->sdl_data[--namelen] = 0xff;
-		}
+	}
 
-		TAILQ_INIT(&ifp->if_addrhead);
-		ifa = ifnet_addrs[ifp->if_index - 1];
-		
-		if (ifa) {
-			/*
-			 * We don't use if_attach_ifa because we want
-			 * this address to be first on the list.
-			 */
-			ifaref(ifa);
-			ifa->ifa_debug |= IFD_ATTACHED;
-			TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
+	VERIFY(ifp->if_allhostsinm == NULL);
+	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
+	TAILQ_INIT(&ifp->if_addrhead);
+
+	if (ifp->if_snd.ifq_maxlen == 0)
+		ifp->if_snd.ifq_maxlen = ifqmaxlen;
+
+	if (ifp->if_index == 0) {
+		int idx = if_next_index();
+
+		if (idx == -1) {
+			ifp->if_index = 0;
+			ifnet_lock_done(ifp);
+			ifnet_head_done();
+			return (ENOBUFS);
 		}
+		ifp->if_index = idx;
+	}
+	/* There should not be anything occupying this slot */
+	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+	/* allocate (if needed) and initialize a link address */
+	VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL);
+	ifa = dlil_alloc_lladdr(ifp, ll_addr);
+	if (ifa == NULL) {
+		ifnet_lock_done(ifp);
+		ifnet_head_done();
+		return (ENOBUFS);
+	}
+
+	VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
+	ifnet_addrs[ifp->if_index - 1] = ifa;
+
+	/* make this address the first on the list */
+	IFA_LOCK(ifa);
+	/* hold a reference for ifnet_addrs[] */
+	IFA_ADDREF_LOCKED(ifa);
+	/* if_attach_link_ifa() holds a reference for ifa_link */
+	if_attach_link_ifa(ifp, ifa);
+	IFA_UNLOCK(ifa);
+
 #if CONFIG_MACF_NET
-		mac_ifnet_label_associate(ifp);
+	mac_ifnet_label_associate(ifp);
 #endif
-		
-		TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
-		ifindex2ifnet[ifp->if_index] = ifp;
-	}
 
-	/* 
-	 * A specific dlil input thread is created per Ethernet/PDP interface.
-	 * pseudo interfaces or other types of interfaces use the main ("loopback") thread.
-	 * If the sysctl "net.link.generic.system.multi_threaded_input" is set to zero, all packets will
-	 * be handled by the main loopback thread, reverting to 10.4.x behaviour.
-	 * 
-	 */
+	TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
+	ifindex2ifnet[ifp->if_index] = ifp;
 
-	if (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_PDP) {
+	/* Hold a reference to the underlying dlil_ifnet */
+	ifnet_reference(ifp);
+
+	/*
+	 * A specific dlil input thread is created per Ethernet/cellular
+	 * interface.  pseudo interfaces or other types of interfaces use
+	 * the main ("loopback") thread.
+	 *
+	 * If the sysctl "net.link.generic.system.multi_threaded_input" is set
+	 * to zero, all packets will be handled by the main loopback thread,
+	 * reverting to 10.4.x behaviour.
+	 */
+	if (dlil_multithreaded_input &&
+	    (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR)) {
 		int err;
 
-		if (dlil_multithreaded_input > 0) {
-			ifp->if_input_thread = _MALLOC(sizeof(struct dlil_threading_info), M_NKE, M_WAITOK);
-			if (ifp->if_input_thread == NULL)
-				panic("ifnet_attach ifp=%p couldn't alloc threading\n", ifp);
-			if ((err = dlil_create_input_thread(ifp, ifp->if_input_thread)) != 0)
-				panic("ifnet_attach ifp=%p couldn't get a thread. err=%d\n", ifp, err);
+		ifp->if_input_thread = zalloc(dlif_inp_zone);
+		if (ifp->if_input_thread == NULL) {
+			panic("%s: ifp=%p couldn't alloc threading",
+			    __func__, ifp);
+			/* NOTREACHED */
+		}
+		bzero(ifp->if_input_thread, dlif_inp_size);
+		err = dlil_create_input_thread(ifp, ifp->if_input_thread);
+		if (err != 0) {
+			panic("%s: ifp=%p couldn't get a thread. "
+			    "err=%d", __func__, ifp, err);
+			/* NOTREACHED */
+		}
 #ifdef DLIL_DEBUG
-			printf("ifnet_attach: dlil thread for ifp=%p if_index=%d\n", ifp, ifp->if_index);
+		printf("%s: dlil thread for ifp=%p if_index=%d\n",
+		    __func__, ifp, ifp->if_index);
 #endif
-		}
 	}
+
+	/* Clear stats (save and restore other fields that we care) */
+	if_data_saved = ifp->if_data;
+	bzero(&ifp->if_data, sizeof (ifp->if_data));
+	ifp->if_data.ifi_type = if_data_saved.ifi_type;
+	ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
+	ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
+	ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
+	ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
+	ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
+	ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
+	ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
+	ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
+	ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
+	ifnet_touch_lastchange(ifp);
+
+	/* Record attach PC stacktrace */
+	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
+
+	ifp->if_updatemcasts = 0;
+	if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
+		struct ifmultiaddr *ifma;
+		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+			IFMA_LOCK(ifma);
+			if (ifma->ifma_addr->sa_family == AF_LINK ||
+			    ifma->ifma_addr->sa_family == AF_UNSPEC)
+				ifp->if_updatemcasts++;
+			IFMA_UNLOCK(ifma);
+		}
+
+		printf("%s%d: attached with %d suspended link-layer multicast "
+		    "membership(s)\n", ifp->if_name, ifp->if_unit,
+		    ifp->if_updatemcasts);
+	}
+
 	ifnet_lock_done(ifp);
 	ifnet_head_done();
-#if PF
+
+	lck_mtx_lock(&ifp->if_cached_route_lock);
+	/* Enable forwarding cached route */
+	ifp->if_fwd_cacheok = 1;
+	/* Clean up any existing cached routes */
+	if (ifp->if_fwd_route.ro_rt != NULL)
+		rtfree(ifp->if_fwd_route.ro_rt);
+	bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+	if (ifp->if_src_route.ro_rt != NULL)
+		rtfree(ifp->if_src_route.ro_rt);
+	bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+	if (ifp->if_src_route6.ro_rt != NULL)
+		rtfree(ifp->if_src_route6.ro_rt);
+	bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+	ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
+
 	/*
-	 * Attach packet filter to this interface, if enaled.
+	 * Allocate and attach IGMPv3/MLDv2 interface specific variables
+	 * and trees; do this before the ifnet is marked as attached.
+	 * The ifnet keeps the reference to the info structures even after
+	 * the ifnet is detached, since the network-layer records still
+	 * refer to the info structures even after that.  This also
+	 * makes it possible for them to still function after the ifnet
+	 * is recycled or reattached.
 	 */
-	pf_ifnet_hook(ifp, 1);
-#endif /* PF */
-	dlil_write_end();
+#if INET
+	if (IGMP_IFINFO(ifp) == NULL) {
+		IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
+		VERIFY(IGMP_IFINFO(ifp) != NULL);
+	} else {
+		VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
+		igmp_domifreattach(IGMP_IFINFO(ifp));
+	}
+#endif /* INET */
+#if INET6
+	if (MLD_IFINFO(ifp) == NULL) {
+		MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
+		VERIFY(MLD_IFINFO(ifp) != NULL);
+	} else {
+		VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
+		mld_domifreattach(MLD_IFINFO(ifp));
+	}
+#endif /* INET6 */
 
-#if IFNET_ROUTE_REFCNT
+	/*
+	 * Finally, mark this ifnet as attached.
+	 */
+	lck_mtx_lock(rnh_lock);
+	ifnet_lock_exclusive(ifp);
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	ifp->if_refflags = IFRF_ATTACHED;
+	lck_mtx_unlock(&ifp->if_ref_lock);
 	if (net_rtref) {
-		(void) ifnet_set_idle_flags(ifp, IFRF_IDLE_NOTIFY,
+		/* boot-args override; enable idle notification */
+		(void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
 		    IFRF_IDLE_NOTIFY);
+	} else {
+		/* apply previous request(s) to set the idle flags, if any */
+		(void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
+		    ifp->if_idle_new_flags_mask);
+
 	}
-#endif /* IFNET_ROUTE_REFCNT */
+	ifnet_lock_done(ifp);
+	lck_mtx_unlock(rnh_lock);
+
+#if PF
+	/*
+	 * Attach packet filter to this interface, if enabled.
+	 */
+	pf_ifnet_hook(ifp, 1);
+#endif /* PF */
 
 	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
 
-    return 0;
+	if (dlil_verbose) {
+		printf("%s%d: attached%s\n", ifp->if_name, ifp->if_unit,
+		    (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
+	}
+
+	return (0);
+}
+
+/*
+ * Prepare the storage for the first/permanent link address, which must
+ * must have the same lifetime as the ifnet itself.  Although the link
+ * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
+ * its location in memory must never change as it may still be referred
+ * to by some parts of the system afterwards (unfortunate implementation
+ * artifacts inherited from BSD.)
+ *
+ * Caller must hold ifnet lock as writer.
+ */
+static struct ifaddr *
+dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
+{
+	struct ifaddr *ifa, *oifa;
+	struct sockaddr_dl *asdl, *msdl;
+	char workbuf[IFNAMSIZ*2];
+	int namelen, masklen, socksize;
+	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
+
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+	VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
+
+	namelen = snprintf(workbuf, sizeof (workbuf), "%s%d",
+	    ifp->if_name, ifp->if_unit);
+	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
+	socksize = masklen + ifp->if_addrlen;
+#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
+	if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
+		socksize = sizeof(struct sockaddr_dl);
+	socksize = ROUNDUP(socksize);
+#undef ROUNDUP
+
+	ifa = ifp->if_lladdr;
+	if (socksize > DLIL_SDLMAXLEN ||
+	    (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
+		/*
+		 * Rare, but in the event that the link address requires
+		 * more storage space than DLIL_SDLMAXLEN, allocate the
+		 * largest possible storages for address and mask, such
+		 * that we can reuse the same space when if_addrlen grows.
+		 * This same space will be used when if_addrlen shrinks.
+		 */
+		if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
+			int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
+			ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
+			if (ifa == NULL)
+				return (NULL);
+			ifa_lock_init(ifa);
+			/* Don't set IFD_ALLOC, as this is permanent */
+			ifa->ifa_debug = IFD_LINK;
+		}
+		IFA_LOCK(ifa);
+		/* address and mask sockaddr_dl locations */
+		asdl = (struct sockaddr_dl *)(ifa + 1);
+		bzero(asdl, SOCK_MAXADDRLEN);
+		msdl = (struct sockaddr_dl *)((char *)asdl + SOCK_MAXADDRLEN);
+		bzero(msdl, SOCK_MAXADDRLEN);
+	} else {
+		VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
+		/*
+		 * Use the storage areas for address and mask within the
+		 * dlil_ifnet structure.  This is the most common case.
+		 */
+		if (ifa == NULL) {
+			ifa = &dl_if->dl_if_lladdr.ifa;
+			ifa_lock_init(ifa);
+			/* Don't set IFD_ALLOC, as this is permanent */
+			ifa->ifa_debug = IFD_LINK;
+		}
+		IFA_LOCK(ifa);
+		/* address and mask sockaddr_dl locations */
+		asdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.asdl;
+		bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
+		msdl = (struct sockaddr_dl *)&dl_if->dl_if_lladdr.msdl;
+		bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
+	}
+
+	/* hold a permanent reference for the ifnet itself */
+	IFA_ADDREF_LOCKED(ifa);
+	oifa = ifp->if_lladdr;
+	ifp->if_lladdr = ifa;
+
+	VERIFY(ifa->ifa_debug == IFD_LINK);
+	ifa->ifa_ifp = ifp;
+	ifa->ifa_rtrequest = link_rtrequest;
+	ifa->ifa_addr = (struct sockaddr *)asdl;
+	asdl->sdl_len = socksize;
+	asdl->sdl_family = AF_LINK;
+	bcopy(workbuf, asdl->sdl_data, namelen);
+	asdl->sdl_nlen = namelen;
+	asdl->sdl_index = ifp->if_index;
+	asdl->sdl_type = ifp->if_type;
+	if (ll_addr != NULL) {
+		asdl->sdl_alen = ll_addr->sdl_alen;
+		bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
+	} else {
+		asdl->sdl_alen = 0;
+	}
+	ifa->ifa_netmask = (struct sockaddr*)msdl;
+	msdl->sdl_len = masklen;
+	while (namelen != 0)
+		msdl->sdl_data[--namelen] = 0xff;
+	IFA_UNLOCK(ifa);
+
+	if (oifa != NULL)
+		IFA_REMREF(oifa);
+
+	return (ifa);
+}
+
+static void
+if_purgeaddrs(struct ifnet *ifp)
+{
+#if INET
+	in_purgeaddrs(ifp);
+#endif /* INET */
+#if INET6
+	in6_purgeaddrs(ifp);
+#endif /* INET6 */
+#if NETAT
+	at_purgeaddrs(ifp);
+#endif
 }
 
 errno_t
-ifnet_detach(
-	ifnet_t	ifp)
+ifnet_detach(ifnet_t ifp)
 {
-	struct ifnet_filter *filter;
-	struct ifnet_filter	*filter_next;
-	int zeroed = 0;
-	int retval = 0;
-	struct ifnet_filter_head fhead;
-	struct dlil_threading_info *inputthread;
-	
-	if (ifp == NULL) return EINVAL;
-	
+	if (ifp == NULL)
+		return (EINVAL);
+
+	ifnet_head_lock_exclusive();
+	lck_mtx_lock(rnh_lock);
 	ifnet_lock_exclusive(ifp);
-	
-	if ((ifp->if_eflags & IFEF_DETACHING) != 0) {
+
+	/*
+	 * Check to see if this interface has previously triggered
+	 * aggressive protocol draining; if so, decrement the global
+	 * refcnt and clear PR_AGGDRAIN on the route domain if
+	 * there are no more of such an interface around.
+	 */
+	(void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
+
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
+		lck_mtx_unlock(&ifp->if_ref_lock);
+		ifnet_lock_done(ifp);
+		lck_mtx_unlock(rnh_lock);
+		ifnet_head_done();
+		return (EINVAL);
+	} else if (ifp->if_refflags & IFRF_DETACHING) {
 		/* Interface has already been detached */
+		lck_mtx_unlock(&ifp->if_ref_lock);
 		ifnet_lock_done(ifp);
-		return ENXIO;
+		lck_mtx_unlock(rnh_lock);
+		ifnet_head_done();
+		return (ENXIO);
 	}
-	
+	/* Indicate this interface is being detached */
+	ifp->if_refflags &= ~IFRF_ATTACHED;
+	ifp->if_refflags |= IFRF_DETACHING;
+	lck_mtx_unlock(&ifp->if_ref_lock);
+
+	if (dlil_verbose)
+		printf("%s%d: detaching\n", ifp->if_name, ifp->if_unit);
+
 	/*
-	 * Indicate this interface is being detached.
-	 * 
-	 * This should prevent protocols from attaching
-	 * from this point on. Interface will remain on
-	 * the list until all of the protocols are detached.
+	 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
+	 * no longer be visible during lookups from this point.
 	 */
-	ifp->if_eflags |= IFEF_DETACHING;
+	VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
+	TAILQ_REMOVE(&ifnet_head, ifp, if_link);
+	ifp->if_link.tqe_next = NULL;
+	ifp->if_link.tqe_prev = NULL;
+	ifindex2ifnet[ifp->if_index] = NULL;
+
+	/* Record detach PC stacktrace */
+	ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
+
 	ifnet_lock_done(ifp);
-	
-	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
-	
+	lck_mtx_unlock(rnh_lock);
+	ifnet_head_done();
+
 	/* Let BPF know we're detaching */
 	bpfdetach(ifp);
-	
-#if IFNET_ROUTE_REFCNT
+
+	/* Mark the interface as DOWN */
+	if_down(ifp);
+
+	/* Disable forwarding cached route */
+	lck_mtx_lock(&ifp->if_cached_route_lock);
+	ifp->if_fwd_cacheok = 0;
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+
 	/*
-	 * Check to see if this interface has previously triggered
-	 * aggressive protocol draining; if so, decrement the global
-	 * refcnt and clear PR_AGGDRAIN on the route domain if
-	 * there are no more of such an interface around.
+	 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
+	 * references to the info structures and leave them attached to
+	 * this ifnet.
 	 */
-	 if (ifp->if_want_aggressive_drain != 0)
-		(void) ifnet_set_idle_flags(ifp, 0, ~0);
-#endif /* IFNET_ROUTE_REFCNT */
-
-	if ((retval = dlil_write_begin()) != 0) {
-		if (retval == EDEADLK) {
-			retval = 0;
-			
-			/* We need to perform a delayed detach */
-			ifp->if_delayed_detach = 1;
-			dlil_detach_waiting = 1;
-			wakeup(&dlil_detach_waiting);
+#if INET
+	igmp_domifdetach(ifp);
+#endif /* INET */
+#if INET6
+	mld_domifdetach(ifp);
+#endif /* INET6 */
+
+	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
+
+	/* Let worker thread take care of the rest, to avoid reentrancy */
+	lck_mtx_lock(&dlil_ifnet_lock);
+	ifnet_detaching_enqueue(ifp);
+	lck_mtx_unlock(&dlil_ifnet_lock);
+
+	return (0);
+}
+
+static void
+ifnet_detaching_enqueue(struct ifnet *ifp)
+{
+	lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+
+	++ifnet_detaching_cnt;
+	VERIFY(ifnet_detaching_cnt != 0);
+	TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
+	wakeup((caddr_t)&ifnet_delayed_run);
+}
+
+static struct ifnet *
+ifnet_detaching_dequeue(void)
+{
+	struct ifnet *ifp;
+
+	lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
+
+	ifp = TAILQ_FIRST(&ifnet_detaching_head);
+	VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
+	if (ifp != NULL) {
+		VERIFY(ifnet_detaching_cnt != 0);
+		--ifnet_detaching_cnt;
+		TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
+		ifp->if_detaching_link.tqe_next = NULL;
+		ifp->if_detaching_link.tqe_prev = NULL;
+	}
+	return (ifp);
+}
+
+static void
+ifnet_delayed_thread_func(void)
+{
+	struct ifnet *ifp;
+
+	for (;;) {
+		lck_mtx_lock(&dlil_ifnet_lock);
+		while (ifnet_detaching_cnt == 0) {
+			(void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock,
+			    (PZERO - 1), "ifnet_delayed_thread", NULL);
+		}
+
+		VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
+
+		/* Take care of detaching ifnet */
+		ifp = ifnet_detaching_dequeue();
+		if (ifp != NULL) {
+			lck_mtx_unlock(&dlil_ifnet_lock);
+			ifnet_detach_final(ifp);
+		} else {
+			lck_mtx_unlock(&dlil_ifnet_lock);
 		}
-		return retval;
 	}
+}
 
-#if PF
-	/*
-	 * Detach this interface from packet filter, if enabled.
+static void
+ifnet_detach_final(struct ifnet *ifp)
+{
+	struct ifnet_filter *filter, *filter_next;
+	struct ifnet_filter_head fhead;
+	struct dlil_threading_info *inputthread;
+	struct ifaddr *ifa;
+	ifnet_detached_func if_free;
+	int i;
+
+	lck_mtx_lock(&ifp->if_ref_lock);
+	if (!(ifp->if_refflags & IFRF_DETACHING)) {
+		panic("%s: flags mismatch (detaching not set) ifp=%p",
+		    __func__, ifp);
+		/* NOTREACHED */
+	}
+
+	/* Wait until the existing IO references get released
+	 * before we proceed with ifnet_detach
 	 */
-	pf_ifnet_hook(ifp, 0);
-#endif /* PF */
+	while (ifp->if_refio > 0) {
+		printf("%s: Waiting for IO references on %s%d interface "
+		    "to be released\n", __func__, ifp->if_name, ifp->if_unit);
+		(void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
+			(PZERO - 1), "ifnet_ioref_wait", NULL);
+	}
+	lck_mtx_unlock(&ifp->if_ref_lock);
+
+	/* Detach interface filters */
+	lck_mtx_lock(&ifp->if_flt_lock);
+	if_flt_monitor_enter(ifp);
 
-	/* Steal the list of interface filters */
+	lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
 	fhead = ifp->if_flt_head;
 	TAILQ_INIT(&ifp->if_flt_head);
 
-	/* unuse the interface */
-	zeroed = ifp_unuse(ifp);
+	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
+		filter_next = TAILQ_NEXT(filter, filt_next);
+		lck_mtx_unlock(&ifp->if_flt_lock);
+
+		dlil_detach_filter_internal(filter, 1);
+		lck_mtx_lock(&ifp->if_flt_lock);
+	}
+	if_flt_monitor_leave(ifp);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
+	/* Tell upper layers to drop their network addresses */
+	if_purgeaddrs(ifp);
+
+	ifnet_lock_exclusive(ifp);
+
+	/* Uplumb all protocols */
+	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
+		struct if_proto *proto;
+
+		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+		while (proto != NULL) {
+			protocol_family_t family = proto->protocol_family;
+			ifnet_lock_done(ifp);
+			proto_unplumb(family, ifp);
+			ifnet_lock_exclusive(ifp);
+			proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
+		}
+		/* There should not be any protocols left */
+		VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
+	}
+	zfree(dlif_phash_zone, ifp->if_proto_hash);
+	ifp->if_proto_hash = NULL;
+
+	/* Detach (permanent) link address from if_addrhead */
+	ifa = TAILQ_FIRST(&ifp->if_addrhead);
+	VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
+	IFA_LOCK(ifa);
+	if_detach_link_ifa(ifp, ifa);
+	IFA_UNLOCK(ifa);
+
+	/* Remove (permanent) link address from ifnet_addrs[] */
+	IFA_REMREF(ifa);
+	ifnet_addrs[ifp->if_index - 1] = NULL;
+
+	/* This interface should not be on {ifnet_head,detaching} */
+	VERIFY(ifp->if_link.tqe_next == NULL);
+	VERIFY(ifp->if_link.tqe_prev == NULL);
+	VERIFY(ifp->if_detaching_link.tqe_next == NULL);
+	VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
+
+	/* Prefix list should be empty by now */
+	VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead));
+
+	/* The slot should have been emptied */
+	VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
+
+	/* There should not be any addresses left */
+	VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
 
 	/*
 	 * If thread affinity was set for the workloop thread, we will need
@@ -2853,16 +3418,19 @@ ifnet_detach(
 		if (inputthread->net_affinity) {
 			struct thread *tp;
 
-			if (inputthread == dlil_lo_thread_ptr)
-				panic("Thread affinity should not be enabled "
-				    "on the loopback dlil input thread\n");
+			if (inputthread == dlil_lo_thread_ptr) {
+				panic("%s: Thread affinity should not be "
+				    "enabled on the loopback dlil input "
+				    "thread", __func__);
+				/* NOTREACHED */
+			}
 
-			lck_mtx_lock(inputthread->input_lck);
+			lck_mtx_lock_spin(&inputthread->input_lck);
 			tp = inputthread->workloop_thread;
 			inputthread->workloop_thread = NULL;
 			inputthread->tag = 0;
 			inputthread->net_affinity = FALSE;
-			lck_mtx_unlock(inputthread->input_lck);
+			lck_mtx_unlock(&inputthread->input_lck);
 
 			/* Tear down workloop thread affinity */
 			if (tp != NULL) {
@@ -2882,183 +3450,290 @@ ifnet_detach(
 
 		if (inputthread != dlil_lo_thread_ptr) {
 #ifdef DLIL_DEBUG
-			printf("ifnet_detach: wakeup thread threadinfo: %p "
+			printf("%s: wakeup thread threadinfo: %p "
 			    "input_thread=%p threads: cur=%d max=%d\n",
-			    inputthread, inputthread->input_thread,
+			    __func__, inputthread, inputthread->input_thread,
 			    dlil_multithreaded_input, cur_dlil_input_threads);
 #endif
-			lck_mtx_lock(inputthread->input_lck);
+			lck_mtx_lock_spin(&inputthread->input_lck);
 
 			inputthread->input_waiting |= DLIL_INPUT_TERMINATE;
-			if ((inputthread->input_waiting & DLIL_INPUT_RUNNING) == 0) {
+			if (!(inputthread->input_waiting & DLIL_INPUT_RUNNING))
 				wakeup((caddr_t)&inputthread->input_waiting);
-			}
-			lck_mtx_unlock(inputthread->input_lck);
+
+			lck_mtx_unlock(&inputthread->input_lck);
 		}
 	}
-	/* last chance to clean up IPv4 forwarding cached route */
-	lck_mtx_lock(ifp->if_fwd_route_lock);
-	if (ifp->if_fwd_route.ro_rt != NULL) {
+
+	/* The driver might unload, so point these to ourselves */
+	if_free = ifp->if_free;
+	ifp->if_output = ifp_if_output;
+	ifp->if_ioctl = ifp_if_ioctl;
+	ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
+	ifp->if_free = ifp_if_free;
+	ifp->if_demux = ifp_if_demux;
+	ifp->if_event = ifp_if_event;
+	ifp->if_framer = ifp_if_framer;
+	ifp->if_add_proto = ifp_if_add_proto;
+	ifp->if_del_proto = ifp_if_del_proto;
+	ifp->if_check_multi = ifp_if_check_multi;
+
+	ifnet_lock_done(ifp);
+
+#if PF
+	/*
+	 * Detach this interface from packet filter, if enabled.
+	 */
+	pf_ifnet_hook(ifp, 0);
+#endif /* PF */
+
+	/* Filter list should be empty */
+	lck_mtx_lock_spin(&ifp->if_flt_lock);
+	VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
+	VERIFY(ifp->if_flt_busy == 0);
+	VERIFY(ifp->if_flt_waiters == 0);
+	lck_mtx_unlock(&ifp->if_flt_lock);
+
+	/* Last chance to cleanup any cached route */
+	lck_mtx_lock(&ifp->if_cached_route_lock);
+	VERIFY(!ifp->if_fwd_cacheok);
+	if (ifp->if_fwd_route.ro_rt != NULL)
 		rtfree(ifp->if_fwd_route.ro_rt);
-		ifp->if_fwd_route.ro_rt = NULL;
-	}
-	lck_mtx_unlock(ifp->if_fwd_route_lock);
-	dlil_write_end();
-	
-	for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
-		filter_next = TAILQ_NEXT(filter, filt_next);
-		dlil_detach_filter_internal(filter, 1);
-	}
-	
-	if (zeroed != 0) {
-		ifp_use_reached_zero(ifp);
+	bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
+	if (ifp->if_src_route.ro_rt != NULL)
+		rtfree(ifp->if_src_route.ro_rt);
+	bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
+	if (ifp->if_src_route6.ro_rt != NULL)
+		rtfree(ifp->if_src_route6.ro_rt);
+	bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+
+	ifnet_llreach_ifdetach(ifp);
+
+	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
+
+	if (if_free != NULL)
+		if_free(ifp);
+
+	/*
+	 * Finally, mark this ifnet as detached.
+	 */
+	lck_mtx_lock_spin(&ifp->if_ref_lock);
+	if (!(ifp->if_refflags & IFRF_DETACHING)) {
+		panic("%s: flags mismatch (detaching not set) ifp=%p",
+		    __func__, ifp);
+		/* NOTREACHED */
 	}
-	
-	return retval;
+	ifp->if_refflags &= ~IFRF_DETACHING;
+	lck_mtx_unlock(&ifp->if_ref_lock);
+
+	if (dlil_verbose)
+		printf("%s%d: detached\n", ifp->if_name, ifp->if_unit);
+
+	/* Release reference held during ifnet attach */
+	ifnet_release(ifp);
 }
 
 static errno_t
-dlil_recycle_ioctl(
-	__unused ifnet_t ifnet_ptr,
-	__unused u_long ioctl_code,
-	__unused void *ioctl_arg)
+ifp_if_output(struct ifnet *ifp, struct mbuf *m)
 {
-    return EOPNOTSUPP;
+#pragma unused(ifp)
+	m_freem(m);
+	return (0);
 }
 
-static int
-dlil_recycle_output(
-	__unused struct ifnet *ifnet_ptr,
-	struct mbuf *m)
+static errno_t
+ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
 {
-    m_freem(m);
-    return 0;
+#pragma unused(ifp, fh, pf)
+	m_freem(m);
+	return (EJUSTRETURN);
 }
 
-static void
-dlil_recycle_free(
-	__unused ifnet_t ifnet_ptr)
+static errno_t
+ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
+    const struct ifnet_demux_desc *da, u_int32_t dc)
 {
+#pragma unused(ifp, pf, da, dc)
+	return (EINVAL);
 }
 
 static errno_t
-dlil_recycle_set_bpf_tap(
-	__unused ifnet_t ifp,
-	__unused bpf_tap_mode mode,
-	__unused bpf_packet_func callback)
+ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
 {
-    /* XXX not sure what to do here */
-    return 0;
+#pragma unused(ifp, pf)
+	return (EINVAL);
+}
+
+static errno_t
+ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
+{
+#pragma unused(ifp, sa)
+	return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
+    const struct sockaddr *sa, const char *ll, const char *t)
+{
+#pragma unused(ifp, m, sa, ll, t)
+	m_freem(*m);
+	*m = NULL;
+	return (EJUSTRETURN);
+}
+
+static errno_t
+ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
+{
+#pragma unused(ifp, cmd, arg)
+	return (EOPNOTSUPP);
+}
+
+static errno_t
+ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
+{
+#pragma unused(ifp, tm, f)
+	/* XXX not sure what to do here */
+	return (0);
+}
+
+static void
+ifp_if_free(struct ifnet *ifp)
+{
+#pragma unused(ifp)
+}
+
+static void
+ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
+{
+#pragma unused(ifp, e)
 }
 
 __private_extern__
-int dlil_if_acquire(
-	u_int32_t family,
-	const void *uniqueid,
-	size_t uniqueid_len, 
-	struct ifnet **ifp)
-{
-    struct ifnet	*ifp1 = NULL;
-    struct dlil_ifnet	*dlifp1 = NULL;
-    int	ret = 0;
-
-    lck_mtx_lock(dlil_ifnet_mutex);
-    TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
-        
-        ifp1 = (struct ifnet *)dlifp1;
-            
-		if (ifp1->if_family == family)  {
-        
-            /* same uniqueid and same len or no unique id specified */
-            if ((uniqueid_len == dlifp1->if_uniqueid_len)
-                && !bcmp(uniqueid, dlifp1->if_uniqueid, uniqueid_len)) {
-                
-				/* check for matching interface in use */
-				if (ifp1->if_eflags & IFEF_INUSE) {
-					if (uniqueid_len) {
-						ret = EBUSY;
-						goto end;
-					}
-				}
-				else {
-					if (!ifp1->if_lock)
-						panic("ifp's lock is gone\n");
-					ifnet_lock_exclusive(ifp1);
-					ifp1->if_eflags |= (IFEF_INUSE | IFEF_REUSE);
-					ifnet_lock_done(ifp1);
-					*ifp = ifp1;
+int dlil_if_acquire(u_int32_t family, const void *uniqueid,
+    size_t uniqueid_len, struct ifnet **ifp)
+{
+	struct ifnet *ifp1 = NULL;
+	struct dlil_ifnet *dlifp1 = NULL;
+	void *buf, *base, **pbuf;
+	int ret = 0;
+
+	lck_mtx_lock(&dlil_ifnet_lock);
+	TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
+		ifp1 = (struct ifnet *)dlifp1;
+
+		if (ifp1->if_family != family)
+			continue;
+
+		lck_mtx_lock(&dlifp1->dl_if_lock);
+		/* same uniqueid and same len or no unique id specified */
+		if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) &&
+		    !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) {
+			/* check for matching interface in use */
+			if (dlifp1->dl_if_flags & DLIF_INUSE) {
+				if (uniqueid_len) {
+					ret = EBUSY;
+					lck_mtx_unlock(&dlifp1->dl_if_lock);
 					goto end;
-            	}
-            }
-        }
-    }
-
-    /* no interface found, allocate a new one */
-    MALLOC(dlifp1, struct dlil_ifnet *, sizeof(*dlifp1), M_NKE, M_WAITOK);
-    if (dlifp1 == 0) {
-        ret = ENOMEM;
-        goto end;
-    }
-    
-    bzero(dlifp1, sizeof(*dlifp1));
-    
-    if (uniqueid_len) {
-        MALLOC(dlifp1->if_uniqueid, void *, uniqueid_len, M_NKE, M_WAITOK);
-        if (dlifp1->if_uniqueid == 0) {
-            FREE(dlifp1, M_NKE);
-            ret = ENOMEM;
-           goto end;
-        }
-        bcopy(uniqueid, dlifp1->if_uniqueid, uniqueid_len);
-        dlifp1->if_uniqueid_len = uniqueid_len;
-    }
-
-    ifp1 = (struct ifnet *)dlifp1;
-    ifp1->if_eflags |= IFEF_INUSE;
-    ifp1->if_name = dlifp1->if_namestorage;
+				}
+			} else {
+				dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
+				lck_mtx_unlock(&dlifp1->dl_if_lock);
+				*ifp = ifp1;
+				goto end;
+			}
+		}
+		lck_mtx_unlock(&dlifp1->dl_if_lock);
+	}
+
+	/* no interface found, allocate a new one */
+	buf = zalloc(dlif_zone);
+	if (buf == NULL) {
+		ret = ENOMEM;
+		goto end;
+	}
+	bzero(buf, dlif_bufsize);
+
+	/* Get the 64-bit aligned base address for this object */
+	base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
+	    sizeof (u_int64_t));
+	VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
+
+	/*
+	 * Wind back a pointer size from the aligned base and
+	 * save the original address so we can free it later.
+	 */
+	pbuf = (void **)((intptr_t)base - sizeof (void *));
+	*pbuf = buf;
+	dlifp1 = base;
+
+	if (uniqueid_len) {
+		MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
+		    M_NKE, M_WAITOK);
+		if (dlifp1->dl_if_uniqueid == NULL) {
+			zfree(dlif_zone, dlifp1);
+			ret = ENOMEM;
+			goto end;
+		}
+		bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
+		dlifp1->dl_if_uniqueid_len = uniqueid_len;
+	}
+
+	ifp1 = (struct ifnet *)dlifp1;
+	dlifp1->dl_if_flags = DLIF_INUSE;
+	if (ifnet_debug) {
+		dlifp1->dl_if_flags |= DLIF_DEBUG;
+		dlifp1->dl_if_trace = dlil_if_trace;
+	}
+	ifp1->if_name = dlifp1->dl_if_namestorage;
 #if CONFIG_MACF_NET
-    mac_ifnet_label_init(ifp1);
+	mac_ifnet_label_init(ifp1);
 #endif
 
-    TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
-     
-     *ifp = ifp1;
+	lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
+	lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
+	lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
+	lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
+	lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_lock_group,
+	    ifnet_lock_attr);
+	lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
+	    ifnet_lock_attr);
+	lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
+
+	TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
+
+	*ifp = ifp1;
 
 end:
-	lck_mtx_unlock(dlil_ifnet_mutex);
+	lck_mtx_unlock(&dlil_ifnet_lock);
 
-    return ret;
+	VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
+	    IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
+
+	return (ret);
 }
 
 __private_extern__ void
-dlil_if_release(
-	ifnet_t	ifp)
-{
-    struct dlil_ifnet	*dlifp = (struct dlil_ifnet *)ifp;
-    
-    /* Interface does not have a lock until it is attached - radar 3713951 */
-    if (ifp->if_lock)
-		ifnet_lock_exclusive(ifp);
-    ifp->if_eflags &= ~IFEF_INUSE;
-    ifp->if_ioctl = dlil_recycle_ioctl;
-    ifp->if_output = dlil_recycle_output;
-    ifp->if_free = dlil_recycle_free;
-    ifp->if_set_bpf_tap = dlil_recycle_set_bpf_tap;
-
-    strncpy(dlifp->if_namestorage, ifp->if_name, IFNAMSIZ);
-    ifp->if_name = dlifp->if_namestorage;
+dlil_if_release(ifnet_t	ifp)
+{
+	struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
+
+	ifnet_lock_exclusive(ifp);
+	lck_mtx_lock(&dlifp->dl_if_lock);
+	dlifp->dl_if_flags &= ~DLIF_INUSE;
+	strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
+	ifp->if_name = dlifp->dl_if_namestorage;
+	lck_mtx_unlock(&dlifp->dl_if_lock);
 #if CONFIG_MACF_NET
-    /*
-     * We can either recycle the MAC label here or in dlil_if_acquire().
-     * It seems logical to do it here but this means that anything that
-     * still has a handle on ifp will now see it as unlabeled.
-     * Since the interface is "dead" that may be OK.  Revisit later.
-     */
-    mac_ifnet_label_recycle(ifp);
+	/*
+	* We can either recycle the MAC label here or in dlil_if_acquire().
+	* It seems logical to do it here but this means that anything that
+	* still has a handle on ifp will now see it as unlabeled.
+	* Since the interface is "dead" that may be OK.  Revisit later.
+	*/
+	mac_ifnet_label_recycle(ifp);
 #endif
-    if (ifp->if_lock)
-		ifnet_lock_done(ifp);
-    
+	ifnet_lock_done(ifp);
 }
 
 __private_extern__ void
@@ -3081,3 +3756,138 @@ dlil_proto_unplumb_all(struct ifnet *ifp)
 	(void) proto_unplumb(PF_APPLETALK, ifp);
 #endif /* NETAT */
 }
+
+static void
+ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
+{
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+	route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
+
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+static void
+ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
+{
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+	if (ifp->if_fwd_cacheok) {
+		route_copyin(src, &ifp->if_src_route, sizeof (*src));
+	} else {
+		rtfree(src->ro_rt);
+		src->ro_rt = NULL;
+	}
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+#if INET6
+static void
+ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
+{
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+	route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
+	    sizeof (*dst));
+
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+
+static void
+ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
+{
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
+
+	if (ifp->if_fwd_cacheok) {
+		route_copyin((struct route *)src,
+		    (struct route *)&ifp->if_src_route6, sizeof (*src));
+	} else {
+		rtfree(src->ro_rt);
+		src->ro_rt = NULL;
+	}
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
+}
+#endif /* INET6 */
+
+struct rtentry *
+ifnet_cached_rtlookup_inet(struct ifnet	*ifp, struct in_addr src_ip)
+{
+	struct route		src_rt;
+	struct sockaddr_in	*dst = (struct sockaddr_in *)(&src_rt.ro_dst);
+
+	ifp_src_route_copyout(ifp, &src_rt);
+
+	if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
+	    src_ip.s_addr != dst->sin_addr.s_addr ||
+	    src_rt.ro_rt->generation_id != route_generation) {
+		if (src_rt.ro_rt != NULL) {
+			rtfree(src_rt.ro_rt);
+			src_rt.ro_rt = NULL;
+		} else if (dst->sin_family != AF_INET) {
+			bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
+			dst->sin_len = sizeof (src_rt.ro_dst);
+			dst->sin_family = AF_INET;
+		}
+		dst->sin_addr = src_ip;
+
+		if (src_rt.ro_rt == NULL) {
+			src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
+			    0, 0, ifp->if_index);
+
+			if (src_rt.ro_rt != NULL) {
+				/* retain a ref, copyin consumes one */
+				struct rtentry	*rte = src_rt.ro_rt;
+				RT_ADDREF(rte);
+				ifp_src_route_copyin(ifp, &src_rt);
+				src_rt.ro_rt = rte;
+			}
+		}
+	}
+
+	return (src_rt.ro_rt);
+}
+
+#if INET6
+struct rtentry*
+ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
+{
+	struct route_in6 src_rt;
+
+	ifp_src_route6_copyout(ifp, &src_rt);
+
+	if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) ||
+	    !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr) ||
+	    src_rt.ro_rt->generation_id != route_generation) {
+		if (src_rt.ro_rt != NULL) {
+			rtfree(src_rt.ro_rt);
+			src_rt.ro_rt = NULL;
+		} else if (src_rt.ro_dst.sin6_family != AF_INET6) {
+			bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
+			src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
+			src_rt.ro_dst.sin6_family = AF_INET6;
+		}
+		src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
+		src_rt.ro_dst.sin6_addr = *src_ip6;
+
+		if (src_rt.ro_rt == NULL) {
+			src_rt.ro_rt = rtalloc1_scoped(
+			    (struct sockaddr *)&src_rt.ro_dst, 0, 0,
+			    ifp->if_index);
+
+			if (src_rt.ro_rt != NULL) {
+				/* retain a ref, copyin consumes one */
+				struct rtentry	*rte = src_rt.ro_rt;
+				RT_ADDREF(rte);
+				ifp_src_route6_copyin(ifp, &src_rt);
+				src_rt.ro_rt = rte;
+			}
+		}
+	}
+
+	return (src_rt.ro_rt);
+}
+#endif /* INET6 */
diff --git a/bsd/net/dlil.h b/bsd/net/dlil.h
index e91dc7a01..db1060db8 100644
--- a/bsd/net/dlil.h
+++ b/bsd/net/dlil.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,12 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- *	Copyright (c) 1999 Apple Computer, Inc. 
- *
- *	Data Link Inteface Layer
- *	Author: Ted Walker
- */
 #ifndef DLIL_H
 #define DLIL_H
 #ifdef KERNEL
@@ -82,22 +76,22 @@ struct sockaddr_dl;
 
 #endif
 
-#ifdef BSD_KERNEL_PRIVATE
-struct ifnet_stat_increment_param;
 struct iff_filter;
 
+#define	DLIL_THREADNAME_LEN	32
+
 struct dlil_threading_info {
-	mbuf_t 		mbuf_head;	/* start of mbuf list from if */
-	mbuf_t 		mbuf_tail;
-	u_int32_t 	mbuf_count;
+	decl_lck_mtx_data(, input_lck);
+	lck_grp_t	*lck_grp;	/* lock group (for lock stats) */
+	mbuf_t		mbuf_head;	/* start of mbuf list from if */
+	mbuf_t		mbuf_tail;
+	u_int32_t	mbuf_count;
 	boolean_t	net_affinity;	/* affinity set is available */
-	u_int32_t 	input_waiting;	/* DLIL condition of thread */
+	u_int32_t	input_waiting;	/* DLIL condition of thread */
 	struct thread	*input_thread;	/* thread data for this input */
 	struct thread	*workloop_thread; /* current workloop thread */
 	u_int32_t	tag;		/* current affinity tag */
-	lck_mtx_t	*input_lck;	
-	lck_grp_t	*lck_grp;	/* lock group (for lock stats) */
-	char 		input_name[32];		 
+	char		input_name[DLIL_THREADNAME_LEN];
 #if IFNET_INPUT_SANITY_CHK
 	u_int32_t	input_wake_cnt;	/* number of times the thread was awaken with packets to process */
 	u_long		input_mbuf_cnt;	/* total number of mbuf packets processed by this thread */
@@ -105,8 +99,8 @@ struct dlil_threading_info {
 };
 
 /*
-	The following are shared with kpi_protocol.c so that it may wakeup
-	the input thread to run through packets queued for protocol input.
+ * The following are shared with kpi_protocol.c so that it may wakeup
+ * the input thread to run through packets queued for protocol input.
 */
 #define	DLIL_INPUT_RUNNING	0x80000000
 #define	DLIL_INPUT_WAITING	0x40000000
@@ -114,79 +108,52 @@ struct dlil_threading_info {
 #define	DLIL_PROTO_WAITING	0x10000000
 #define	DLIL_INPUT_TERMINATE	0x08000000
 
-void dlil_init(void);
+extern void dlil_init(void);
 
-errno_t dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode,
-						 bpf_packet_func callback);
+extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
 
 /*
- * Send arp internal bypasses the check for
- * IPv4LL.
+ * Send arp internal bypasses the check for IPv4LL.
  */
-errno_t
-dlil_send_arp_internal(
-	ifnet_t	ifp,
-	u_int16_t arpop,
-	const struct sockaddr_dl* sender_hw,
-	const struct sockaddr* sender_proto,
-	const struct sockaddr_dl* target_hw,
-	const struct sockaddr* target_proto);
-
-int
-dlil_output(
-	ifnet_t					ifp,
-	protocol_family_t		proto_family,
-	mbuf_t					packetlist,
-	void					*route,
-	const struct sockaddr	*dest,
-	int						raw);
-
-errno_t
-dlil_resolve_multi(
-	struct ifnet *ifp,
-	const struct sockaddr *proto_addr,
-	struct sockaddr *ll_addr,
-	size_t ll_len);
-
-errno_t
-dlil_send_arp(
-	ifnet_t	ifp,
-	u_int16_t arpop,
-	const struct sockaddr_dl* sender_hw,
-	const struct sockaddr* sender_proto,
-	const struct sockaddr_dl* target_hw,
-	const struct sockaddr* target_proto);
-
-int dlil_attach_filter(ifnet_t ifp, const struct iff_filter *if_filter,
-					   interface_filter_t *filter_ref);
-void dlil_detach_filter(interface_filter_t filter);
-int dlil_detach_protocol(ifnet_t ifp, u_int32_t protocol);
-extern void dlil_proto_unplumb_all(ifnet_t);
+extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
+    const struct sockaddr_dl *, const struct sockaddr *,
+    const struct sockaddr_dl *, const struct sockaddr *);
 
-#endif /* BSD_KERNEL_PRIVATE */
+extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
+    const struct sockaddr *, int);
 
-void
-dlil_post_msg(struct ifnet *ifp,u_int32_t event_subclass, u_int32_t event_code, 
-		   struct net_event_data *event_data, u_int32_t event_data_len);
+extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
 
-/* 
- * dlil_if_acquire is obsolete. Use ifnet_allocate.
- */
+extern errno_t dlil_resolve_multi(struct ifnet *,
+    const struct sockaddr *, struct sockaddr *, size_t);
+
+extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
+    const struct sockaddr *, const struct sockaddr_dl *,
+    const struct sockaddr *);
 
-int dlil_if_acquire(u_int32_t family, const void *uniqueid, size_t uniqueid_len, 
-			struct ifnet **ifp);
-			
+extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
+    interface_filter_t *);
+extern void dlil_detach_filter(interface_filter_t);
 
-/* 
+extern void dlil_proto_unplumb_all(ifnet_t);
+
+extern void dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
+    struct net_event_data *, u_int32_t);
+
+/*
+ * dlil_if_acquire is obsolete. Use ifnet_allocate.
+ */
+extern int dlil_if_acquire(u_int32_t, const void *, size_t, struct ifnet **);
+/*
  * dlil_if_release is obsolete. The equivalent is called automatically when
  * an interface is detached.
  */
+extern void dlil_if_release(struct ifnet *ifp);
 
-void dlil_if_release(struct ifnet *ifp);
-
-#if IFNET_ROUTE_REFCNT
 extern u_int32_t ifnet_aggressive_drainers;
-#endif /* IFNET_ROUTE_REFCNT */
+
+extern errno_t dlil_if_ref(struct ifnet *);
+extern errno_t dlil_if_free(struct ifnet *);
 
 #endif /* KERNEL_PRIVATE */
 #endif /* KERNEL */
diff --git a/bsd/net/ether_if_module.c b/bsd/net/ether_if_module.c
index fc1d9e4cf..a1cbfb3d1 100644
--- a/bsd/net/ether_if_module.c
+++ b/bsd/net/ether_if_module.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -98,6 +98,9 @@
 #include <sys/socketvar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_bond_var.h>
+#if IF_BRIDGE
+#include <net/if_bridgevar.h>
+#endif /* IF_BRIDGE */
 
 #include <net/dlil.h>
 
@@ -133,7 +136,7 @@ struct en_desc {
 #endif
 
 /*
- * Header for the demux list, hangs off of IFP at family_cookie
+ * Header for the demux list, hangs off of IFP at if_family_cookie
  */
 
 struct ether_desc_blk_str {
@@ -147,19 +150,6 @@ struct ether_desc_blk_str {
 __private_extern__ u_char	etherbroadcastaddr[ETHER_ADDR_LEN] =
 								{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
-static __inline__ int
-_ether_cmp(const void * a, const void * b)
-{
-	const u_int16_t * a_s = (const u_int16_t *)a;
-	const u_int16_t * b_s = (const u_int16_t *)b;
-	
-	if (a_s[0] != b_s[0]
-	    || a_s[1] != b_s[1]
-	    || a_s[2] != b_s[2]) {
-		return (1);
-	}
-	return (0);
-}
 
 /*
  * Release all descriptor entries owned by this protocol (there may be several).
@@ -171,7 +161,7 @@ ether_del_proto(
 	ifnet_t ifp,
 	protocol_family_t protocol_family)
 {
-	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie;
+	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie;
 	u_int32_t	current = 0;
 	int found = 0;
 	
@@ -187,8 +177,8 @@ ether_del_proto(
 	}
 	
 	if (desc_blk->n_used == 0) {
-		FREE(ifp->family_cookie, M_IFADDR);
-		ifp->family_cookie = 0;
+		FREE(ifp->if_family_cookie, M_IFADDR);
+		ifp->if_family_cookie = 0;
 	}
 	else {
 		/* Decrement n_max_used */
@@ -207,7 +197,7 @@ ether_add_proto_internal(
 	const struct ifnet_demux_desc	*demux)
 {
 	struct en_desc *ed;
-	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie;
+	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie;
 	u_int32_t i;
 	
 	switch (demux->type) {
@@ -291,7 +281,7 @@ ether_add_proto_internal(
 			FREE(desc_blk, M_IFADDR);
 		}
 		desc_blk = tmp;
-		ifp->family_cookie = (uintptr_t)desc_blk;
+		ifp->if_family_cookie = (uintptr_t)desc_blk;
 		desc_blk->n_count = new_count;
 	}
 	else {
@@ -372,7 +362,7 @@ ether_demux(
 	u_int16_t		type;
 	u_int8_t		*data;
 	u_int32_t			i = 0;
-	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie;
+	struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie;
 	u_int32_t			maxd = desc_blk ? desc_blk->n_max_used : 0;
 	struct en_desc	*ed = desc_blk ? desc_blk->block_ptr : NULL;
 	u_int32_t		extProto1 = 0;
@@ -386,6 +376,16 @@ ether_demux(
 			m->m_flags |= M_MCAST;
 	}
 
+	if (m->m_flags & M_HASFCS) {
+                /*
+                 * If the M_HASFCS is set by the driver we want to make sure
+                 * that we strip off the trailing FCS data before handing it
+                 * up the stack.
+                 */
+                m_adj(m, -ETHER_CRC_LEN);
+	        m->m_flags &= ~M_HASFCS;
+        }
+
 	if (ifp->if_eflags & IFEF_BOND) {
 		/* if we're bonded, bond "protocol" gets all the packets */
 		*protocol_family = PF_BOND;
@@ -632,6 +632,9 @@ __private_extern__ int ether_family_init(void)
 #if BOND
 	bond_family_init();
 #endif /* BOND */
+#if IF_BRIDGE
+	bridgeattach(0);
+#endif /* IF_BRIDGE */
 
  done:
 
diff --git a/bsd/net/ether_inet6_pr_module.c b/bsd/net/ether_inet6_pr_module.c
index 371cccfd6..e8411dec6 100644
--- a/bsd/net/ether_inet6_pr_module.c
+++ b/bsd/net/ether_inet6_pr_module.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -59,8 +59,6 @@
  *
  */
 
-
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -69,6 +67,7 @@
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
+#include <sys/socketvar.h>
 #include <kern/lock.h>
 
 #include <net/if.h>
@@ -78,6 +77,7 @@
 #include <net/if_types.h>
 #include <net/ndrv.h>
 #include <net/kpi_protocol.h>
+#include <net/dlil.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -90,13 +90,6 @@
 #include <netinet6/in6_ifattach.h>
 #endif
 
-
-
-#include <sys/socketvar.h>
-
-#include <net/dlil.h>
-
-
 #if LLC && CCITT
 extern struct ifqueue pkintrq;
 #endif
@@ -114,70 +107,83 @@ extern struct ifqueue pkintrq;
  * the ether header, which is provided separately.
  */
 static errno_t
-ether_inet6_input(
-	__unused ifnet_t	ifp,
-	protocol_family_t	protocol,
-	mbuf_t				packet,
-	__unused char		*header)
+ether_inet6_input(ifnet_t ifp, protocol_family_t protocol,
+    mbuf_t packet, char *header)
 {
-	errno_t error;
+#pragma unused(ifp, protocol)
+	struct ether_header *eh = (struct ether_header *)header;
+
+	if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
+		struct ifnet *mifp;
+		/*
+		 * Trust the ifp in the mbuf, rather than ifproto's
+		 * since the packet could have been injected via
+		 * a dlil_input_packet_list() using an ifp that is
+		 * different than the one where the packet really
+		 * came from.
+		 */
+		mifp = mbuf_pkthdr_rcvif(packet);
+
+		/* Update L2 reachability record, if present (and not bcast) */
+		if (bcmp(eh->ether_shost, etherbroadcastaddr,
+		    ETHER_ADDR_LEN) != 0) {
+			nd6_llreach_set_reachable(mifp, eh->ether_shost,
+			    ETHER_ADDR_LEN);
+		}
 
-	if ((error = proto_input(protocol, packet)))
+		if (proto_input(protocol, packet) != 0)
+			m_freem(packet);
+	} else {
 		m_freem(packet);
-	return error;
+	}
+
+	return (EJUSTRETURN);
 }
 
 static errno_t
-ether_inet6_pre_output(
-    ifnet_t		    			ifp,
-    __unused protocol_family_t	protocol_family,
-    mbuf_t			     		*m0,
-    const struct sockaddr		*dst_netaddr,
-    void						*route,
-    char						*type,
-    char						*edst)
+ether_inet6_pre_output(ifnet_t ifp, protocol_family_t protocol_family,
+    mbuf_t *m0, const struct sockaddr *dst_netaddr, void *route,
+    char *type, char *edst)
 {
+#pragma unused(protocol_family)
 	errno_t	result;
-	struct	sockaddr_dl	sdl;
-	register struct mbuf *m = *m0;
+	struct sockaddr_dl sdl;
+	struct mbuf *m = *m0;
 
 	/*
 	 * Tell ether_frameout it's ok to loop packet if necessary
 	 */
 	m->m_flags |= M_LOOP;
-	
-	result = nd6_lookup_ipv6(ifp, (const struct sockaddr_in6*)dst_netaddr,
-							 &sdl, sizeof(sdl), route, *m0);
-	
+
+	result = nd6_lookup_ipv6(ifp, (const struct sockaddr_in6 *)dst_netaddr,
+	    &sdl, sizeof (sdl), route, *m0);
+
 	if (result == 0) {
-		*(u_int16_t*)type = htons(ETHERTYPE_IPV6);
+		*(u_int16_t *)type = htons(ETHERTYPE_IPV6);
 		bcopy(LLADDR(&sdl), edst, sdl.sdl_alen);
 	}
 
-
-
-    return result;
+	return (result);
 }
 
 static int
-ether_inet6_resolve_multi(
-	ifnet_t	ifp,
-	const struct sockaddr *proto_addr,
-	struct sockaddr_dl *out_ll,
-	size_t	ll_len)
+ether_inet6_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
+    struct sockaddr_dl *out_ll, size_t ll_len)
 {
-	static const size_t minsize = offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
-	const struct sockaddr_in6	*sin6 = (const struct sockaddr_in6*)proto_addr;
-	
+	static const size_t minsize =
+	    offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
+	const struct sockaddr_in6 *sin6 =
+	    (const struct sockaddr_in6 *)proto_addr;
+
 	if (proto_addr->sa_family != AF_INET6)
-		return EAFNOSUPPORT;
-	
-	if (proto_addr->sa_len < sizeof(struct sockaddr_in6))
-		return EINVAL;
-	
+		return (EAFNOSUPPORT);
+
+	if (proto_addr->sa_len < sizeof (struct sockaddr_in6))
+		return (EINVAL);
+
 	if (ll_len < minsize)
-		return EMSGSIZE;
-	
+		return (EMSGSIZE);
+
 	bzero(out_ll, minsize);
 	out_ll->sdl_len = minsize;
 	out_ll->sdl_family = AF_LINK;
@@ -187,20 +193,17 @@ ether_inet6_resolve_multi(
 	out_ll->sdl_alen = ETHER_ADDR_LEN;
 	out_ll->sdl_slen = 0;
 	ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, LLADDR(out_ll));
-	
-	return 0;
-}
 
+	return (0);
+}
 
 static errno_t
-ether_inet6_prmod_ioctl(
-	ifnet_t				ifp,
-	__unused protocol_family_t	protocol_family,
-	u_long				command,
-	void				*data)
+ether_inet6_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
+    u_long command, void *data)
 {
-    struct ifreq *ifr = (struct ifreq *) data;
-    int error = 0;
+#pragma unused(protocol_family)
+	struct ifreq *ifr = (struct ifreq *)data;
+	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
@@ -211,30 +214,30 @@ ether_inet6_prmod_ioctl(
 		break;
 
 	case SIOCGIFADDR:
-	ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN);
-	break;
+		(void) ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data,
+		    ETHER_ADDR_LEN);
+		break;
 
-    default:
-	error = EOPNOTSUPP;
-	break;
-    }
-    return (error);
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+	return (error);
 }
 
 errno_t
-ether_attach_inet6(
-	struct ifnet	*ifp,
-	__unused protocol_family_t protocol_family)
+ether_attach_inet6(struct ifnet *ifp, protocol_family_t protocol_family)
 {
+#pragma unused(protocol_family)
 	struct ifnet_attach_proto_param	proto;
 	struct ifnet_demux_desc demux[1];
-	u_short en_6native=htons(ETHERTYPE_IPV6);
+	u_short en_6native = htons(ETHERTYPE_IPV6);
 	errno_t	error;
-	
-	bzero(&proto, sizeof(proto));
+
+	bzero(&proto, sizeof (proto));
 	demux[0].type = DLIL_DESC_ETYPE2;
 	demux[0].data = &en_6native;
-	demux[0].datalen = sizeof(en_6native);
+	demux[0].datalen = sizeof (en_6native);
 	proto.demux_list = demux;
 	proto.demux_count = 1;
 	proto.input = ether_inet6_input;
@@ -243,24 +246,15 @@ ether_attach_inet6(
 	proto.resolve = ether_inet6_resolve_multi;
 	error = ifnet_attach_protocol(ifp, protocol_family, &proto);
 	if (error && error != EEXIST) {
-		printf("WARNING: ether_attach_inet6 can't attach ipv6 to %s%d\n",
-			ifp->if_name, ifp->if_unit);
+		printf("WARNING: %s can't attach ipv6 to %s%d\n", __func__,
+		    ifp->if_name, ifp->if_unit);
 	}
-	
-	return error;
+
+	return (error);
 }
 
 void
-ether_detach_inet6(
-	struct ifnet	*ifp,
-	protocol_family_t protocol_family)
+ether_detach_inet6(struct ifnet *ifp, protocol_family_t protocol_family)
 {
-	errno_t         error;
-
-	error = ifnet_detach_protocol(ifp, protocol_family);
-	if (error && error != ENOENT) {
-		printf("WARNING: ether_detach_inet6 can't detach ipv6 from %s%d\n",
-			ifp->if_name, ifp->if_unit);
-	}
+	(void) ifnet_detach_protocol(ifp, protocol_family);
 }
-
diff --git a/bsd/net/ether_inet_pr_module.c b/bsd/net/ether_inet_pr_module.c
index 422866e73..12a8ead3c 100644
--- a/bsd/net/ether_inet_pr_module.c
+++ b/bsd/net/ether_inet_pr_module.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -103,64 +103,62 @@
 #include <security/mac_framework.h>
 #endif
 
-/* Local function declerations */
+/* Local function declarations */
 extern void *kdp_get_interface(void);
 extern void kdp_set_ip_and_mac_addresses(struct in_addr *ipaddr,
-										 struct ether_addr *macaddr);
+    struct ether_addr *macaddr);
 
-static __inline__ void
-_ip_copy(struct in_addr * dst, const struct in_addr * src)
-{
-	*dst = *src;
-	return;
-}
+#define	_ip_copy(dst, src)	\
+	(*(dst) = *(src))
 
 static void
-ether_inet_arp_input(
-	struct mbuf *m)
+ether_inet_arp_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_arp *ea;
 	struct sockaddr_dl	sender_hw;
 	struct sockaddr_in	sender_ip;
 	struct sockaddr_in	target_ip;
-	
-	if (mbuf_len(m) < sizeof(*ea) &&
-		mbuf_pullup(&m, sizeof(*ea)) != 0)
+
+	if (mbuf_len(m) < sizeof (*ea) && mbuf_pullup(&m, sizeof (*ea)) != 0)
 		return;
-	
+
 	ea = mbuf_data(m);
-	
+
 	/* Verify this is an ethernet/ip arp and address lengths are correct */
 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER ||
-		ntohs(ea->arp_pro) != ETHERTYPE_IP ||
-		ea->arp_pln != sizeof(struct in_addr) ||
-		ea->arp_hln != ETHER_ADDR_LEN) {
-		mbuf_free(m);
+	    ntohs(ea->arp_pro) != ETHERTYPE_IP ||
+	    ea->arp_pln != sizeof (struct in_addr) ||
+	    ea->arp_hln != ETHER_ADDR_LEN) {
+		mbuf_freem(m);
 		return;
 	}
-	
+
 	/* Verify the sender is not broadcast */
 	if (bcmp(ea->arp_sha, etherbroadcastaddr, ETHER_ADDR_LEN) == 0) {
-		mbuf_free(m);
+		mbuf_freem(m);
 		return;
 	}
-	
-	bzero(&sender_ip, sizeof(sender_ip));
-	sender_ip.sin_len = sizeof(sender_ip);
+
+	bzero(&sender_ip, sizeof (sender_ip));
+	sender_ip.sin_len = sizeof (sender_ip);
 	sender_ip.sin_family = AF_INET;
 	_ip_copy(&sender_ip.sin_addr, (const struct in_addr *)ea->arp_spa);
 	target_ip = sender_ip;
 	_ip_copy(&target_ip.sin_addr, (const struct in_addr *)ea->arp_tpa);
-	
-	bzero(&sender_hw, sizeof(sender_hw));
-	sender_hw.sdl_len = sizeof(sender_hw);
+
+	bzero(&sender_hw, sizeof (sender_hw));
+	sender_hw.sdl_len = sizeof (sender_hw);
 	sender_hw.sdl_family = AF_LINK;
 	sender_hw.sdl_type = IFT_ETHER;
 	sender_hw.sdl_alen = ETHER_ADDR_LEN;
 	bcopy(ea->arp_sha, LLADDR(&sender_hw), ETHER_ADDR_LEN);
-	
-	arp_ip_handle_input(mbuf_pkthdr_rcvif(m), ntohs(ea->arp_op), &sender_hw, &sender_ip, &target_ip);
-	mbuf_free(m);
+
+	/* update L2 reachability record, if present */
+	arp_llreach_set_reachable(ifp, LLADDR(&sender_hw), ETHER_ADDR_LEN);
+
+	arp_ip_handle_input(ifp, ntohs(ea->arp_op), &sender_hw, &sender_ip,
+	    &target_ip);
+	mbuf_freem(m);
 }
 
 /*
@@ -169,120 +167,131 @@ ether_inet_arp_input(
  * the ether header, which is provided separately.
  */
 static errno_t
-ether_inet_input(
-	__unused ifnet_t			ifp,
-	__unused protocol_family_t	protocol_family,
-	mbuf_t						m_list)
+ether_inet_input(ifnet_t ifp, protocol_family_t	protocol_family,
+    mbuf_t m_list)
 {
+#pragma unused(ifp, protocol_family)
 	mbuf_t	m;
 	mbuf_t	*tailptr = &m_list;
 	mbuf_t	nextpkt;
-	
+
 	/* Strip ARP and non-IP packets out of the list */
 	for (m = m_list; m; m = nextpkt) {
-    	struct ether_header *eh = mbuf_pkthdr_header(m);
-    	
-    	nextpkt = m->m_nextpkt;
-		
-    	if (eh->ether_type == htons(ETHERTYPE_IP)) {
-    		/* put this packet in the list */
-    		*tailptr = m;
-    		tailptr = &m->m_nextpkt;
-    	}
-    	else {
-    		/* Pass ARP packets to arp input */
+		struct ether_header *eh = mbuf_pkthdr_header(m);
+		struct ifnet *mifp;
+
+		/*
+		 * Trust the ifp in the mbuf, rather than ifproto's
+		 * since the packet could have been injected via
+		 * a dlil_input_packet_list() using an ifp that is
+		 * different than the one where the packet really
+		 * came from.
+		 */
+		mifp = mbuf_pkthdr_rcvif(m);
+
+		nextpkt = m->m_nextpkt;
+
+		if (eh->ether_type == htons(ETHERTYPE_IP)) {
+			/*
+			 * Update L2 reachability record, if present
+			 * (and if not a broadcast sender).
+			 */
+			if (bcmp(eh->ether_shost, etherbroadcastaddr,
+			    ETHER_ADDR_LEN) != 0) {
+				arp_llreach_set_reachable(mifp, eh->ether_shost,
+				    ETHER_ADDR_LEN);
+			}
+			/* put this packet in the list */
+			*tailptr = m;
+			tailptr = &m->m_nextpkt;
+		} else {
+			/* Pass ARP packets to arp input */
 			m->m_nextpkt = NULL;
-    		if (eh->ether_type == htons(ETHERTYPE_ARP))
-    			ether_inet_arp_input(m);
-    		else
-    			mbuf_freem(m);
-    	}
+			if (eh->ether_type == htons(ETHERTYPE_ARP))
+				ether_inet_arp_input(mifp, m);
+			else
+				mbuf_freem(m);
+		}
 	}
-	
+
 	*tailptr = NULL;
-	
+
 	/* Pass IP list to ip input */
-	if (m_list != NULL && proto_input(PF_INET, m_list) != 0)
-	{
+	if (m_list != NULL && proto_input(PF_INET, m_list) != 0) {
 		mbuf_freem_list(m_list);
 	}
-	
-    return 0;
+
+	return (EJUSTRETURN);
 }
 
 static errno_t
-ether_inet_pre_output(
-	ifnet_t						ifp,
-	__unused protocol_family_t	protocol_family,
-	mbuf_t						*m0,
-	const struct sockaddr		*dst_netaddr,
-	void*						route,
-	char						*type,
-	char						*edst)
+ether_inet_pre_output(ifnet_t ifp, protocol_family_t protocol_family,
+    mbuf_t *m0, const struct sockaddr *dst_netaddr,
+    void *route, char *type, char *edst)
 {
-    register struct mbuf *m = *m0;
-    const struct ether_header *eh;
-    errno_t	result = 0;
-
-
-    if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) 
-		return ENETDOWN;
-	
-    /*
-     * Tell ether_frameout it's ok to loop packet unless negated below.
-     */
-    m->m_flags |= M_LOOP;
-
-    switch (dst_netaddr->sa_family) {
-    
-		case AF_INET: {
-				struct sockaddr_dl	ll_dest;
-				result = arp_lookup_ip(ifp, (const struct sockaddr_in*)dst_netaddr,
-									   &ll_dest, sizeof(ll_dest), (route_t)route, *m0);
-				if (result == 0) {
-					bcopy(LLADDR(&ll_dest), edst, ETHER_ADDR_LEN);
-					*(u_int16_t*)type = htons(ETHERTYPE_IP);
-				}
-			}
-			break;
+#pragma unused(protocol_family)
+	struct mbuf *m = *m0;
+	const struct ether_header *eh;
+	errno_t result = 0;
 
-		case pseudo_AF_HDRCMPLT:	
-		case AF_UNSPEC:
-			m->m_flags &= ~M_LOOP;
-			eh = (const struct ether_header *)dst_netaddr->sa_data;
-			(void)memcpy(edst, eh->ether_dhost, 6);
-			*(u_short *)type = eh->ether_type;
-			break;
-	
-		default:
-			printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit,
-				   dst_netaddr->sa_family);
-	
-			result = EAFNOSUPPORT;
-    }
-
-    return result;
+	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
+		return (ENETDOWN);
+
+	/*
+	 * Tell ether_frameout it's ok to loop packet unless negated below.
+	 */
+	m->m_flags |= M_LOOP;
+
+	switch (dst_netaddr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_dl ll_dest;
+
+		result = arp_lookup_ip(ifp,
+		    (const struct sockaddr_in *)dst_netaddr, &ll_dest,
+		    sizeof (ll_dest), (route_t)route, *m0);
+		if (result == 0) {
+			bcopy(LLADDR(&ll_dest), edst, ETHER_ADDR_LEN);
+			*(u_int16_t *)type = htons(ETHERTYPE_IP);
+		}
+	break;
+	}
+
+	case pseudo_AF_HDRCMPLT:
+	case AF_UNSPEC:
+		m->m_flags &= ~M_LOOP;
+		eh = (const struct ether_header *)dst_netaddr->sa_data;
+		(void) memcpy(edst, eh->ether_dhost, 6);
+		*(u_short *)type = eh->ether_type;
+		break;
+
+	default:
+		printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit,
+		    dst_netaddr->sa_family);
+
+		result = EAFNOSUPPORT;
+		break;
+	}
+
+	return (result);
 }
 
 static errno_t
-ether_inet_resolve_multi(
-	ifnet_t					ifp,
-	const struct sockaddr	*proto_addr,
-	struct sockaddr_dl		*out_ll,
-	size_t					ll_len)
+ether_inet_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
+    struct sockaddr_dl *out_ll, size_t ll_len)
 {
-	static const size_t minsize = offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
-	const struct sockaddr_in	*sin = (const struct sockaddr_in*)proto_addr;
-	
+	static const size_t minsize =
+	    offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN;
+	const struct sockaddr_in *sin = (const struct sockaddr_in *)proto_addr;
+
 	if (proto_addr->sa_family != AF_INET)
-		return EAFNOSUPPORT;
-	
-	if (proto_addr->sa_len < sizeof(struct sockaddr_in))
-		return EINVAL;
+		return (EAFNOSUPPORT);
+
+	if (proto_addr->sa_len < sizeof (struct sockaddr_in))
+		return (EINVAL);
 
 	if (ll_len < minsize)
-		return EMSGSIZE;
-	
+		return (EMSGSIZE);
+
 	bzero(out_ll, minsize);
 	out_ll->sdl_len = minsize;
 	out_ll->sdl_family = AF_LINK;
@@ -292,141 +301,128 @@ ether_inet_resolve_multi(
 	out_ll->sdl_alen = ETHER_ADDR_LEN;
 	out_ll->sdl_slen = 0;
 	ETHER_MAP_IP_MULTICAST(&sin->sin_addr, LLADDR(out_ll));
-	
-	return 0;
+
+	return (0);
 }
 
 static errno_t
-ether_inet_prmod_ioctl(
-    ifnet_t			ifp,
-    __unused protocol_family_t	protocol_family,
-    u_long			command,
-    void			*data)
+ether_inet_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family,
+    u_long command, void *data)
 {
-    ifaddr_t ifa = data;
-    struct ifreq *ifr = data;
-    int error = 0;
-
+#pragma unused(protocol_family)
+	ifaddr_t ifa = data;
+	struct ifreq *ifr = data;
+	int error = 0;
+
+	switch (command) {
+	case SIOCSIFADDR:
+	case SIOCAIFADDR:
+		if (!(ifnet_flags(ifp) & IFF_RUNNING)) {
+			ifnet_set_flags(ifp, IFF_UP, IFF_UP);
+			ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
+		}
 
-    switch (command) {
-    case SIOCSIFADDR:
-    case SIOCAIFADDR:
-	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
-		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
-		ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
-	}
+		if (ifaddr_address_family(ifa) != AF_INET)
+			break;
 
-	 switch (ifaddr_address_family(ifa)) {
-
-	 case AF_INET:
-
-	    inet_arp_init_ifaddr(ifp, ifa);
-	    /*
-	     * Register new IP and MAC addresses with the kernel
-	     * debugger if the interface is the same as was registered
-	     * by IOKernelDebugger. If no interface was registered,
-	     * fall back and just match against en0 interface.
-	     * Do this only for the first address of the interface
-	     * and not for aliases.
-	     */
-	    if (command == SIOCSIFADDR &&
-	        ((kdp_get_interface() != 0 &&
-	        kdp_get_interface() == ifp->if_softc) ||
-		(kdp_get_interface() == 0 && ifp->if_unit == 0)))
+		inet_arp_init_ifaddr(ifp, ifa);
+		/*
+		 * Register new IP and MAC addresses with the kernel
+		 * debugger if the interface is the same as was registered
+		 * by IOKernelDebugger. If no interface was registered,
+		 * fall back and just match against en0 interface.
+		 * Do this only for the first address of the interface
+		 * and not for aliases.
+		 */
+		if (command == SIOCSIFADDR &&
+		    ((kdp_get_interface() != 0 &&
+		    kdp_get_interface() == ifp->if_softc) ||
+		    (kdp_get_interface() == 0 && ifp->if_unit == 0)))
 			kdp_set_ip_and_mac_addresses(&(IA_SIN(ifa)->sin_addr),
 			    ifnet_lladdr(ifp));
+		break;
 
-	    break;
-
-	default:
-	    break;
-	}
-
-	break;
-
-    case SIOCGIFADDR:
-		ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN);
+	case SIOCGIFADDR:
+		ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data,
+		    ETHER_ADDR_LEN);
 		break;
 
-    default:
+	default:
 		error = EOPNOTSUPP;
 		break;
-    }
+	}
 
-    return (error);
+	return (error);
 }
 
 static void
-ether_inet_event(
-	ifnet_t						ifp,
-	__unused protocol_family_t	protocol,
-	const struct kev_msg		*event)
+ether_inet_event(ifnet_t ifp, protocol_family_t protocol,
+    const struct kev_msg *event)
 {
-	ifaddr_t	*addresses;
-	
+#pragma unused(protocol)
+	ifaddr_t *addresses;
+
 	if (event->vendor_code !=  KEV_VENDOR_APPLE ||
-		event->kev_class != KEV_NETWORK_CLASS ||
-		event->kev_subclass != KEV_DL_SUBCLASS ||
-		event->event_code != KEV_DL_LINK_ADDRESS_CHANGED) {
+	    event->kev_class != KEV_NETWORK_CLASS ||
+	    event->kev_subclass != KEV_DL_SUBCLASS ||
+	    event->event_code != KEV_DL_LINK_ADDRESS_CHANGED) {
 		return;
 	}
-	
+
 	if (ifnet_get_address_list_family(ifp, &addresses, AF_INET) == 0) {
 		int i;
-		
+
 		for (i = 0; addresses[i] != NULL; i++) {
 			inet_arp_init_ifaddr(ifp, addresses[i]);
 		}
-		
+
 		ifnet_free_address_list(addresses);
 	}
 }
 
 static errno_t
-ether_inet_arp(
-	ifnet_t								ifp,
-	u_short								arpop,
-	const struct sockaddr_dl*			sender_hw,
-	const struct sockaddr*				sender_proto,
-	const struct sockaddr_dl*			target_hw,
-	const struct sockaddr*				target_proto)
+ether_inet_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
+    const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
+    const struct sockaddr *target_proto)
 {
 	mbuf_t	m;
 	errno_t	result;
 	struct ether_header *eh;
 	struct ether_arp *ea;
-	const struct sockaddr_in* sender_ip = (const struct sockaddr_in*)sender_proto;
-	const struct sockaddr_in* target_ip = (const struct sockaddr_in*)target_proto;
+	const struct sockaddr_in *sender_ip =
+	    (const struct sockaddr_in *)sender_proto;
+	const struct sockaddr_in *target_ip =
+	    (const struct sockaddr_in *)target_proto;
 	char *datap;
-	
+
 	if (target_ip == NULL)
-		return EINVAL;
-	
+		return (EINVAL);
+
 	if ((sender_ip && sender_ip->sin_family != AF_INET) ||
 	    target_ip->sin_family != AF_INET)
-		return EAFNOSUPPORT;
-	
+		return (EAFNOSUPPORT);
+
 	result = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_DATA, &m);
 	if (result != 0)
-		return result;
-	
-	mbuf_setlen(m, sizeof(*ea));
-	mbuf_pkthdr_setlen(m, sizeof(*ea));
-	
+		return (result);
+
+	mbuf_setlen(m, sizeof (*ea));
+	mbuf_pkthdr_setlen(m, sizeof (*ea));
+
 	/* Move the data pointer in the mbuf to the end, aligned to 4 bytes */
 	datap = mbuf_datastart(m);
 	datap += mbuf_trailingspace(m);
 	datap -= (((uintptr_t)datap) & 0x3);
-	mbuf_setdata(m, datap, sizeof(*ea));
+	mbuf_setdata(m, datap, sizeof (*ea));
 	ea = mbuf_data(m);
-	
+
 	/*
 	 * Prepend the ethernet header, we will send the raw frame;
 	 * callee frees the original mbuf when allocation fails.
 	 */
-	result = mbuf_prepend(&m, sizeof(*eh), MBUF_DONTWAIT);
+	result = mbuf_prepend(&m, sizeof (*eh), MBUF_DONTWAIT);
 	if (result != 0)
-		return result;
+		return (result);
 
 	eh = mbuf_data(m);
 	eh->ether_type = htons(ETHERTYPE_ARP);
@@ -434,108 +430,108 @@ ether_inet_arp(
 #if CONFIG_MACF_NET
 	mac_mbuf_label_associate_linklayer(ifp, m);
 #endif
-	
+
 	/* Fill out the arp header */
 	ea->arp_pro = htons(ETHERTYPE_IP);
-	ea->arp_hln = sizeof(ea->arp_sha);
-	ea->arp_pln = sizeof(ea->arp_spa);
+	ea->arp_hln = sizeof (ea->arp_sha);
+	ea->arp_pln = sizeof (ea->arp_spa);
 	ea->arp_hrd = htons(ARPHRD_ETHER);
 	ea->arp_op = htons(arpop);
-	
+
 	/* Sender Hardware */
 	if (sender_hw != NULL) {
-		bcopy(CONST_LLADDR(sender_hw), ea->arp_sha, sizeof(ea->arp_sha));
-	}
-	else {
+		bcopy(CONST_LLADDR(sender_hw), ea->arp_sha,
+		    sizeof (ea->arp_sha));
+	} else {
 		ifnet_lladdr_copy_bytes(ifp, ea->arp_sha, ETHER_ADDR_LEN);
 	}
-	ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, sizeof(eh->ether_shost));
-	
+	ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, sizeof (eh->ether_shost));
+
 	/* Sender IP */
 	if (sender_ip != NULL) {
-		bcopy(&sender_ip->sin_addr, ea->arp_spa, sizeof(ea->arp_spa));
-	}
-	else {
+		bcopy(&sender_ip->sin_addr, ea->arp_spa, sizeof (ea->arp_spa));
+	} else {
 		struct ifaddr *ifa;
-		
+
 		/* Look for an IP address to use as our source */
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr != NULL &&
+			    ifa->ifa_addr->sa_family == AF_INET) {
+				bcopy(&((struct sockaddr_in *)ifa->ifa_addr)->
+				    sin_addr, ea->arp_spa, sizeof(ea->arp_spa));
+				IFA_UNLOCK(ifa);
 				break;
-		}
-		if (ifa) {
-			bcopy(&((struct sockaddr_in*)ifa->ifa_addr)->sin_addr, ea->arp_spa,
-				  sizeof(ea->arp_spa));
+			}
+			IFA_UNLOCK(ifa);
 		}
 		ifnet_lock_done(ifp);
-		
+
 		if (ifa == NULL) {
-			mbuf_free(m);
-			return ENXIO;
+			mbuf_freem(m);
+			return (ENXIO);
 		}
 	}
-	
+
 	/* Target Hardware */
-	if (target_hw == 0) {
-		bzero(ea->arp_tha, sizeof(ea->arp_tha));
-		bcopy(etherbroadcastaddr, eh->ether_dhost, sizeof(eh->ether_dhost));
-	}
-	else {
-		bcopy(CONST_LLADDR(target_hw), ea->arp_tha, sizeof(ea->arp_tha));
-		bcopy(CONST_LLADDR(target_hw), eh->ether_dhost, sizeof(eh->ether_dhost));
+	if (target_hw == NULL) {
+		bzero(ea->arp_tha, sizeof (ea->arp_tha));
+		bcopy(etherbroadcastaddr, eh->ether_dhost,
+		    sizeof (eh->ether_dhost));
+	} else {
+		bcopy(CONST_LLADDR(target_hw), ea->arp_tha,
+		    sizeof (ea->arp_tha));
+		bcopy(CONST_LLADDR(target_hw), eh->ether_dhost,
+		    sizeof (eh->ether_dhost));
 	}
-	
+
 	/* Target IP */
-	bcopy(&target_ip->sin_addr, ea->arp_tpa, sizeof(ea->arp_tpa));
-	
+	bcopy(&target_ip->sin_addr, ea->arp_tpa, sizeof (ea->arp_tpa));
+
 	ifnet_output_raw(ifp, PF_INET, m);
-	
-	return 0;
+
+	return (0);
 }
 
 errno_t
-ether_attach_inet(
-	struct ifnet	*ifp,
-	__unused protocol_family_t proto_family)
+ether_attach_inet(struct ifnet *ifp, protocol_family_t proto_family)
 {
+#pragma unused(proto_family)
 	struct ifnet_attach_proto_param_v2 proto;
 	struct ifnet_demux_desc demux[2];
-	u_short en_native=htons(ETHERTYPE_IP);
-	u_short arp_native=htons(ETHERTYPE_ARP);
+	u_short en_native = htons(ETHERTYPE_IP);
+	u_short arp_native = htons(ETHERTYPE_ARP);
 	errno_t	error;
-	
-	bzero(&demux[0], sizeof(demux));
+
+	bzero(&demux[0], sizeof (demux));
 	demux[0].type = DLIL_DESC_ETYPE2;
 	demux[0].data = &en_native;
-	demux[0].datalen = sizeof(en_native);
+	demux[0].datalen = sizeof (en_native);
 	demux[1].type = DLIL_DESC_ETYPE2;
 	demux[1].data = &arp_native;
-	demux[1].datalen = sizeof(arp_native);
+	demux[1].datalen = sizeof (arp_native);
 
-	bzero(&proto, sizeof(proto));
+	bzero(&proto, sizeof (proto));
 	proto.demux_list = demux;
-	proto.demux_count = sizeof(demux) / sizeof(demux[0]);
+	proto.demux_count = sizeof (demux) / sizeof (demux[0]);
 	proto.input = ether_inet_input;
 	proto.pre_output = ether_inet_pre_output;
 	proto.ioctl = ether_inet_prmod_ioctl;
 	proto.event = ether_inet_event;
 	proto.resolve = ether_inet_resolve_multi;
 	proto.send_arp = ether_inet_arp;
-	
+
 	error = ifnet_attach_protocol_v2(ifp, proto_family, &proto);
 	if (error && error != EEXIST) {
-		printf("WARNING: ether_attach_inet can't attach ip to %s%d\n",
-			   ifp->if_name, ifp->if_unit);
+		printf("WARNING: %s can't attach ip to %s%d\n", __func__,
+		    ifp->if_name, ifp->if_unit);
 	}
-	return error;
+	return (error);
 }
 
 void
-ether_detach_inet(
-	struct ifnet *ifp,
-	protocol_family_t proto_family)
+ether_detach_inet(struct ifnet *ifp, protocol_family_t proto_family)
 {
-	(void)ifnet_detach_protocol(ifp, proto_family);
+	(void) ifnet_detach_protocol(ifp, proto_family);
 }
-
diff --git a/bsd/net/ethernet.h b/bsd/net/ethernet.h
index 00b7fa5fb..aea52bc20 100644
--- a/bsd/net/ethernet.h
+++ b/bsd/net/ethernet.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,6 +65,14 @@
  */
 #define	ETHER_MAX_LEN		1518
 
+/*
+ * Mbuf adjust factor to force 32-bit alignment of IP header.
+ * Drivers should do m_adj(m, ETHER_ALIGN) when setting up a
+ * receive so the upper layers get the IP header properly aligned
+ * past the 14-byte Ethernet header.
+ */
+#define ETHER_ALIGN             2       /* driver adjust for IP hdr alignment */
+
 /*
  * A macro to validate a length with
  */
@@ -120,7 +128,23 @@ struct	ether_addr *ether_aton(const char *);
 
 #ifdef BSD_KERNEL_PRIVATE
 extern u_char	etherbroadcastaddr[ETHER_ADDR_LEN];
-#endif
+
+
+static __inline__ int
+_ether_cmp(const void * a, const void * b)
+{
+	const u_int16_t * a_s = (const u_int16_t *)a;
+	const u_int16_t * b_s = (const u_int16_t *)b;
+	
+	if (a_s[0] != b_s[0]
+		|| a_s[1] != b_s[1]
+		|| a_s[2] != b_s[2]) {
+		return (1);
+	}
+	return (0);
+}
+
+#endif /* BSD_KERNEL_PRIVATE */
 
 #define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
 
diff --git a/bsd/net/if.c b/bsd/net/if.c
index 02b698007..26314b948 100644
--- a/bsd/net/if.c
+++ b/bsd/net/if.c
@@ -81,9 +81,13 @@
 #include <sys/sockio.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <kern/zalloc.h>
 
 #include <machine/endian.h>
 
+#include <pexpert/pexpert.h>
+
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
@@ -106,15 +110,14 @@
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip6.h>
 #if INET6
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
+#include <netinet6/ip6_var.h>
 #endif
 #endif
 
-extern int dlil_multithreaded_input;
-extern struct dlil_threading_info *dlil_lo_thread_ptr;
-
 #if CONFIG_MACF_NET 
 #include <security/mac_framework.h>
 #endif
@@ -124,11 +127,21 @@ extern struct dlil_threading_info *dlil_lo_thread_ptr;
  * System initialization
  */
 
+/* Lock group and attribute for ifaddr lock */
+lck_attr_t	*ifa_mtx_attr;
+lck_grp_t	*ifa_mtx_grp;
+static lck_grp_attr_t	*ifa_mtx_grp_attr;
+
 static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space);
 static void if_qflush(struct ifqueue *);
 __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
 void if_rtproto_del(struct ifnet *ifp, int protocol);
 
+static int if_addmulti_common(struct ifnet *, const struct sockaddr *,
+    struct ifmultiaddr **, int);
+static int if_delmulti_common(struct ifmultiaddr *, struct ifnet *,
+    const struct sockaddr *, int);
+
 static int if_rtmtu(struct radix_node *, void *);
 static void if_rtmtu_update(struct ifnet *);
 
@@ -137,7 +150,6 @@ static int	if_clone_list(int count, int * total, user_addr_t dst);
 #endif /* IF_CLONE_LIST */
 
 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
-MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
 
 int	ifqmaxlen = IFQ_MAXLEN;
 struct	ifnethead ifnet_head = TAILQ_HEAD_INITIALIZER(ifnet_head);
@@ -147,6 +159,50 @@ LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
 
 static struct ifaddr *ifa_ifwithnet_common(const struct sockaddr *,
     unsigned int);
+static void if_attach_ifa_common(struct ifnet *, struct ifaddr *, int);
+static void if_detach_ifa_common(struct ifnet *, struct ifaddr *, int);
+
+static void if_attach_ifma(struct ifnet *, struct ifmultiaddr *, int);
+static int if_detach_ifma(struct ifnet *, struct ifmultiaddr *, int);
+
+static struct ifmultiaddr *ifma_alloc(int);
+static void ifma_free(struct ifmultiaddr *);
+static void ifma_trace(struct ifmultiaddr *, int);
+
+#if DEBUG
+static unsigned int ifma_debug = 1;	/* debugging (enabled) */
+#else
+static unsigned int ifma_debug;		/* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int ifma_size;		/* size of zone element */
+static struct zone *ifma_zone;		/* zone for ifmultiaddr */
+
+#define	IFMA_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int ifma_trace_hist_size = IFMA_TRACE_HIST_SIZE;
+
+struct ifmultiaddr_dbg {
+	struct ifmultiaddr	ifma;			/* ifmultiaddr */
+	u_int16_t		ifma_refhold_cnt;	/* # of ref */
+	u_int16_t		ifma_refrele_cnt;	/* # of rele */
+	/*
+	 * Circular lists of IFA_ADDREF and IFA_REMREF callers.
+	 */
+	ctrace_t		ifma_refhold[IFMA_TRACE_HIST_SIZE];
+	ctrace_t		ifma_refrele[IFMA_TRACE_HIST_SIZE];
+	/*
+	 * Trash list linkage
+	 */
+	TAILQ_ENTRY(ifmultiaddr_dbg) ifma_trash_link;
+};
+
+/* List of trash ifmultiaddr entries protected by ifma_trash_lock */
+static TAILQ_HEAD(, ifmultiaddr_dbg) ifma_trash_head;
+static decl_lck_mtx_data(, ifma_trash_lock);
+
+#define	IFMA_ZONE_MAX		64		/* maximum elements in zone */
+#define	IFMA_ZONE_NAME		"ifmultiaddr"	/* zone name */
 
 #if INET6
 /*
@@ -154,9 +210,36 @@ static struct ifaddr *ifa_ifwithnet_common(const struct sockaddr *,
  * should be more generalized?
  */
 extern void	nd6_setmtu(struct ifnet *);
+extern lck_mtx_t *nd6_mutex;
 #endif
 
 
+void
+ifa_init(void)
+{
+	/* Setup lock group and attribute for ifaddr */
+	ifa_mtx_grp_attr = lck_grp_attr_alloc_init();
+	ifa_mtx_grp = lck_grp_alloc_init("ifaddr", ifa_mtx_grp_attr);
+	ifa_mtx_attr = lck_attr_alloc_init();
+
+	PE_parse_boot_argn("ifa_debug", &ifma_debug, sizeof (ifma_debug));
+
+	ifma_size = (ifma_debug == 0) ? sizeof (struct ifmultiaddr) :
+	    sizeof (struct ifmultiaddr_dbg);
+
+	ifma_zone = zinit(ifma_size, IFMA_ZONE_MAX * ifma_size, 0,
+	    IFMA_ZONE_NAME);
+	if (ifma_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IFMA_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(ifma_zone, Z_EXPAND, TRUE);
+	zone_change(ifma_zone, Z_CALLERACCT, FALSE);
+
+	lck_mtx_init(&ifma_trash_lock, ifa_mtx_grp, ifa_mtx_attr);
+	TAILQ_INIT(&ifma_trash_head);
+}
+
 /*
  * Network interface utility routines.
  *
@@ -169,45 +252,106 @@ struct ifaddr **ifnet_addrs;
 struct ifnet **ifindex2ifnet;
 
 __private_extern__ void
-if_attach_ifa(
-	struct ifnet *ifp,
-	struct ifaddr *ifa)
+if_attach_ifa(struct ifnet *ifp, struct ifaddr *ifa)
+{
+	if_attach_ifa_common(ifp, ifa, 0);
+}
+
+__private_extern__ void
+if_attach_link_ifa(struct ifnet *ifp, struct ifaddr *ifa)
+{
+	if_attach_ifa_common(ifp, ifa, 1);
+}
+
+static void
+if_attach_ifa_common(struct ifnet *ifp, struct ifaddr *ifa, int link)
 {
-	ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED);
-	if (ifa->ifa_debug & IFD_ATTACHED) {
-		panic("if_attach_ifa: Attempted to attach address that's already attached!\n");
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (ifa->ifa_ifp != ifp) {
+		panic("%s: Mismatch ifa_ifp=%p != ifp=%p", __func__,
+		    ifa->ifa_ifp, ifp);
+		/* NOTREACHED */
+	} else if (ifa->ifa_debug & IFD_ATTACHED) {
+		panic("%s: Attempt to attach an already attached ifa=%p",
+		    __func__, ifa);
+		/* NOTREACHED */
+	} else if (link && !(ifa->ifa_debug & IFD_LINK)) {
+		panic("%s: Unexpected non-link address ifa=%p", __func__, ifa);
+		/* NOTREACHED */
+	} else if (!link && (ifa->ifa_debug & IFD_LINK)) {
+		panic("%s: Unexpected link address ifa=%p", __func__, ifa);
+		/* NOTREACHED */
 	}
-	ifaref(ifa);
+	IFA_ADDREF_LOCKED(ifa);
 	ifa->ifa_debug |= IFD_ATTACHED;
-	TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
+	if (link)
+		TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
+	else
+		TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
+
+	if (ifa->ifa_attached != NULL)
+		(*ifa->ifa_attached)(ifa);
 }
 
 __private_extern__ void
-if_detach_ifa(
-	struct ifnet *ifp,
-	struct ifaddr *ifa)
-{
-	ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED);
-#if 1
-	/* Debugging code */
-	if ((ifa->ifa_debug & IFD_ATTACHED) == 0) {
-		printf("if_detach_ifa: ifa is not attached to any interface! flags=%u\n", ifa->ifa_debug);
-		return;
-	}
-	else {
+if_detach_ifa(struct ifnet *ifp, struct ifaddr *ifa)
+{
+	if_detach_ifa_common(ifp, ifa, 0);
+}
+
+__private_extern__ void
+if_detach_link_ifa(struct ifnet *ifp, struct ifaddr *ifa)
+{
+	if_detach_ifa_common(ifp, ifa, 1);
+}
+
+static void
+if_detach_ifa_common(struct ifnet *ifp, struct ifaddr *ifa, int link)
+{
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (link && !(ifa->ifa_debug & IFD_LINK)) {
+		panic("%s: Unexpected non-link address ifa=%p", __func__, ifa);
+		/* NOTREACHED */
+	} else if (link && ifa != TAILQ_FIRST(&ifp->if_addrhead)) {
+		panic("%s: Link address ifa=%p not first", __func__, ifa);
+		/* NOTREACHED */
+	} else if (!link && (ifa->ifa_debug & IFD_LINK)) {
+		panic("%s: Unexpected link address ifa=%p", __func__, ifa);
+		/* NOTREACHED */
+	} else if (!(ifa->ifa_debug & IFD_ATTACHED)) {
+		panic("%s: Attempt to detach an unattached address ifa=%p",
+		    __func__, ifa);
+		/* NOTREACHED */
+	} else if (ifa->ifa_ifp != ifp) {
+		panic("%s: Mismatch ifa_ifp=%p, ifp=%p", __func__,
+		    ifa->ifa_ifp, ifp);
+		/* NOTREACHED */
+	} else if (ifa->ifa_debug & IFD_DEBUG) {
 		struct ifaddr *ifa2;
 		TAILQ_FOREACH(ifa2, &ifp->if_addrhead, ifa_link) {
 			if (ifa2 == ifa)
 				break;
 		}
 		if (ifa2 != ifa) {
-			printf("if_detach_ifa: Attempted to detach IFA that was not attached!\n");
-		}	
+			panic("%s: Attempt to detach a stray address ifa=%p",
+			    __func__, ifa);
+			/* NOTREACHED */
+		}
 	}
-#endif
 	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
+	/* This must not be the last reference to the ifaddr */
+	if (IFA_REMREF_LOCKED(ifa) == NULL) {
+		panic("%s: unexpected (missing) refcnt ifa=%p", __func__, ifa);
+		/* NOTREACHED */
+	}
 	ifa->ifa_debug &= ~IFD_ATTACHED;
-	ifafree(ifa);
+
+	if (ifa->ifa_detached != NULL)
+		(*ifa->ifa_detached)(ifa);
 }
 
 #define INITIAL_IF_INDEXLIM	8
@@ -346,7 +490,8 @@ if_clone_create(char *name, int len, void *params)
 			 * there's no straightforward way to recover if
 			 * it happens.
 			 */
-			panic("if_clone_create(): interface name too long");
+			panic("%s: interface name too long", __func__);
+			/* NOTREACHED */
 		}
 
 	}
@@ -548,36 +693,72 @@ ifa_foraddr_scoped(unsigned int addr, unsigned int scope)
 
 	lck_rw_lock_shared(in_ifaddr_rwlock);
 	TAILQ_FOREACH(ia, INADDR_HASH(addr), ia_hash) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (ia->ia_addr.sin_addr.s_addr == addr &&
-		    (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope))
+		    (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope)) {
+			IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for caller */
+			IFA_UNLOCK(&ia->ia_ifa);
 			break;
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
-	if (ia != NULL)
-		ifaref(&ia->ia_ifa);
 	lck_rw_done(in_ifaddr_rwlock);
 	return (ia);
 }
 
+#if INET6
+/*
+ * Similar to ifa_foraddr, except that this for IPv6.
+ */
+__private_extern__ struct in6_ifaddr *
+ifa_foraddr6(struct in6_addr *addr6)
+{
+	return (ifa_foraddr6_scoped(addr6, IFSCOPE_NONE));
+}
+
+__private_extern__ struct in6_ifaddr *
+ifa_foraddr6_scoped(struct in6_addr *addr6, unsigned int scope)
+{
+	struct in6_ifaddr *ia = NULL;
+
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
+	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK(&ia->ia_ifa);
+		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, addr6) &&
+		    (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope)) {
+			IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for caller */
+			IFA_UNLOCK(&ia->ia_ifa);
+			break;
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
+	lck_rw_done(&in6_ifaddr_rwlock);
+
+	return (ia);
+}
+#endif /* INET6 */
+
 /*
  * Return the first (primary) address of a given family on an interface.
  */
 __private_extern__ struct ifaddr *
 ifa_ifpgetprimary(struct ifnet *ifp, int family)
 {
-	struct ifaddr *ifa0 = NULL, *ifa;
+	struct ifaddr *ifa;
 
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-		if (ifa->ifa_addr->sa_family == family && ifa0 == NULL) {
-			ifa0 = ifa;
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family == family) {
+			IFA_ADDREF_LOCKED(ifa);	/* for caller */
+			IFA_UNLOCK(ifa);
 			break;
 		}
+		IFA_UNLOCK(ifa);
 	}
-	if (ifa0 != NULL)
-		ifaref(ifa0);
 	ifnet_lock_done(ifp);
 
-	return (ifa0);
+	return (ifa);
 }
 
 /*
@@ -585,75 +766,89 @@ ifa_ifpgetprimary(struct ifnet *ifp, int family)
  */
 /*ARGSUSED*/
 struct ifaddr *
-ifa_ifwithaddr(
-	const struct sockaddr *addr)
+ifa_ifwithaddr(const struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *result = NULL;
 
-#define	equal(a1, a2) \
-  (bcmp((const void*)(a1), (const void*)(a2), ((const struct sockaddr *)(a1))->sa_len) == 0)
-  
+#define	equal(a1, a2)							\
+	(bcmp((const void*)(a1), (const void*)(a2),			\
+	    ((const struct sockaddr *)(a1))->sa_len) == 0)
+
 	ifnet_head_lock_shared();
-	for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) {
+	for (ifp = ifnet_head.tqh_first; ifp && !result;
+	    ifp = ifp->if_link.tqe_next) {
 		ifnet_lock_shared(ifp);
 		for (ifa = ifp->if_addrhead.tqh_first; ifa;
-			 ifa = ifa->ifa_link.tqe_next) {
-			if (ifa->ifa_addr->sa_family != addr->sa_family)
+		    ifa = ifa->ifa_link.tqe_next) {
+			IFA_LOCK_SPIN(ifa);
+			if (ifa->ifa_addr->sa_family != addr->sa_family) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			if (equal(addr, ifa->ifa_addr)) {
 				result = ifa;
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
 			}
-			if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr &&
-				/* IP6 doesn't have broadcast */
-				ifa->ifa_broadaddr->sa_len != 0 &&
-				equal(ifa->ifa_broadaddr, addr)) {
+			if ((ifp->if_flags & IFF_BROADCAST) &&
+			    ifa->ifa_broadaddr != NULL &&
+			    /* IP6 doesn't have broadcast */
+			    ifa->ifa_broadaddr->sa_len != 0 &&
+			    equal(ifa->ifa_broadaddr, addr)) {
 				result = ifa;
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
 			}
+			IFA_UNLOCK(ifa);
 		}
-		if (result)
-			ifaref(result);
 		ifnet_lock_done(ifp);
 	}
 	ifnet_head_done();
-	
-	return result;
+
+	return (result);
 }
 /*
  * Locate the point to point interface with a given destination address.
  */
 /*ARGSUSED*/
 struct ifaddr *
-ifa_ifwithdstaddr(
-	const struct sockaddr *addr)
+ifa_ifwithdstaddr(const struct sockaddr *addr)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct ifaddr *result = NULL;
 
 	ifnet_head_lock_shared();
-	for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) {
-	    if (ifp->if_flags & IFF_POINTOPOINT) {
+	for (ifp = ifnet_head.tqh_first; ifp && !result;
+	    ifp = ifp->if_link.tqe_next) {
+	    if ((ifp->if_flags & IFF_POINTOPOINT)) {
 			ifnet_lock_shared(ifp);
 			for (ifa = ifp->if_addrhead.tqh_first; ifa;
-				 ifa = ifa->ifa_link.tqe_next) {
-				if (ifa->ifa_addr->sa_family != addr->sa_family)
+			    ifa = ifa->ifa_link.tqe_next) {
+				IFA_LOCK_SPIN(ifa);
+				if (ifa->ifa_addr->sa_family !=
+				    addr->sa_family) {
+					IFA_UNLOCK(ifa);
 					continue;
-				if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) {
+				}
+				if (ifa->ifa_dstaddr &&
+				    equal(addr, ifa->ifa_dstaddr)) {
 					result = ifa;
+					IFA_ADDREF_LOCKED(ifa);	/* for caller */
+					IFA_UNLOCK(ifa);
 					break;
 				}
+				IFA_UNLOCK(ifa);
 			}
-			if (result)
-				ifaref(result);
 			ifnet_lock_done(ifp);
 		}
 	}
 	ifnet_head_done();
-	return result;
+	return (result);
 }
 
 /*
@@ -686,10 +881,15 @@ ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope)
 		ifnet_lock_shared(ifp);
 		for (ifa = ifp->if_addrhead.tqh_first; ifa != NULL;
 		    ifa = ifa->ifa_link.tqe_next) {
-			if (ifa->ifa_addr->sa_family != addr->sa_family)
+			IFA_LOCK_SPIN(ifa);
+			if (ifa->ifa_addr->sa_family != addr->sa_family) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			if (equal(addr, ifa->ifa_addr)) {
 				result = ifa;
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
 			}
 			if ((ifp->if_flags & IFF_BROADCAST) &&
@@ -698,11 +898,12 @@ ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope)
 			    ifa->ifa_broadaddr->sa_len != 0 &&
 			    equal(ifa->ifa_broadaddr, addr)) {
 				result = ifa;
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
 			}
+			IFA_UNLOCK(ifa);
 		}
-		if (result != NULL)
-			ifaref(result);
 		ifnet_lock_done(ifp);
 	}
 	ifnet_head_done();
@@ -731,11 +932,17 @@ ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa = NULL;
-	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
+	struct ifaddr *ifa_maybe = NULL;
 	u_int af = addr->sa_family;
 	const char *addr_data = addr->sa_data, *cplim;
 
-	if (!ip_doscopedroute || addr->sa_family != AF_INET)
+#if INET6
+	if ((af != AF_INET && af != AF_INET6) ||
+	    (af == AF_INET && !ip_doscopedroute) ||
+	    (af == AF_INET6 && !ip6_doscopedroute))
+#else
+	if (af != AF_INET || !ip_doscopedroute)
+#endif /* !INET6 */
 		ifscope = IFSCOPE_NONE;
 
 	ifnet_head_lock_shared();
@@ -744,15 +951,14 @@ ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
-	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
-	    if (sdl->sdl_index && sdl->sdl_index <= if_index) {
+		const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr;
+		if (sdl->sdl_index && sdl->sdl_index <= if_index) {
 			ifa = ifnet_addrs[sdl->sdl_index - 1];
-	
-			if (ifa)
-				ifaref(ifa);
-			
+			if (ifa != NULL)
+				IFA_ADDREF(ifa);
+
 			ifnet_head_done();
-			return ifa;
+			return (ifa);
 		}
 	}
 
@@ -766,15 +972,19 @@ ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope)
 		     ifa = ifa->ifa_link.tqe_next) {
 			const char *cp, *cp2, *cp3;
 
-			if (ifa->ifa_addr->sa_family != af)
-next:				continue;
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr == NULL ||
+			    ifa->ifa_addr->sa_family != af) {
+next:
+				IFA_UNLOCK(ifa);
+				continue;
+			}
 #ifndef __APPLE__
 /* This breaks tunneling application trying to install a route with
  * a specific subnet and the local address as the destination
  * It's breaks binary compatibility with previous version of MacOS X
  */
 			if (
- 
 #if INET6 /* XXX: for maching gif tunnel dst as routing entry gateway */
 			    addr->sa_family != AF_INET6 &&
 #endif
@@ -787,10 +997,13 @@ next:				continue;
 				 * The trouble is that we don't know the
 				 * netmask for the remote end.
 				 */
-				if (ifa->ifa_dstaddr != 0
-				    && equal(addr, ifa->ifa_dstaddr)) {
-				    break;
- 				}
+				if (ifa->ifa_dstaddr != 0 &&
+				    equal(addr, ifa->ifa_dstaddr)) {
+					IFA_ADDREF_LOCKED(ifa);
+					IFA_UNLOCK(ifa);
+					break;
+				}
+				IFA_UNLOCK(ifa);
 			} else
 #endif /* __APPLE__*/
 			{
@@ -799,8 +1012,10 @@ next:				continue;
 				 * find using a matching interface.
 				 */
 				if (ifscope != IFSCOPE_NONE &&
-				    ifp->if_index != ifscope)
+				    ifp->if_index != ifscope) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 
 				/*
 				 * Scan all the bits in the ifa's address.
@@ -809,8 +1024,10 @@ next:				continue;
 				 * to see if it really matters.
 				 * (A byte at a time)
 				 */
-				if (ifa->ifa_netmask == 0)
+				if (ifa->ifa_netmask == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				cp = addr_data;
 				cp2 = ifa->ifa_addr->sa_data;
 				cp3 = ifa->ifa_netmask->sa_data;
@@ -826,40 +1043,33 @@ next:				continue;
 				 * before continuing to search
 				 * for an even better one.
 				 */
-				if (ifa_maybe == 0 ||
+				if (ifa_maybe == NULL ||
 				    rn_refines((caddr_t)ifa->ifa_netmask,
 				    (caddr_t)ifa_maybe->ifa_netmask)) {
-					ifaref(ifa);
-					if (ifa_maybe)
-						ifafree(ifa_maybe);
+					IFA_ADDREF_LOCKED(ifa);	/* ifa_maybe */
+					IFA_UNLOCK(ifa);
+					if (ifa_maybe != NULL)
+						IFA_REMREF(ifa_maybe);
 					ifa_maybe = ifa;
+				} else {
+					IFA_UNLOCK(ifa);
 				}
 			}
+			IFA_LOCK_ASSERT_NOTHELD(ifa);
 		}
-		
-		if (ifa) {
-			ifaref(ifa);
-		}
-		
-		/*
-		 * ifa is set if we found an exact match.
-		 * take a reference to the ifa before
-		 * releasing the ifp lock
-		 */
 		ifnet_lock_done(ifp);
-		
-		if (ifa) {
+
+		if (ifa != NULL)
 			break;
-		}
 	}
 	ifnet_head_done();
-	if (!ifa)
+
+	if (ifa == NULL)
 		ifa = ifa_maybe;
-	else if (ifa_maybe) {
-		ifafree(ifa_maybe);
-		ifa_maybe = NULL;
-	}
-	return ifa;
+	else if (ifa_maybe != NULL)
+		IFA_REMREF(ifa_maybe);
+
+	return (ifa);
 }
 
 /*
@@ -867,9 +1077,7 @@ next:				continue;
  * a given address.
  */
 struct ifaddr *
-ifaof_ifpforaddr(
-	const struct sockaddr *addr,
-	struct ifnet *ifp)
+ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
 {
 	struct ifaddr *ifa = NULL;
 	const char *cp, *cp2, *cp3;
@@ -880,55 +1088,80 @@ ifaof_ifpforaddr(
 
 	if (af >= AF_MAX)
 		return (NULL);
-	
+
 	ifnet_lock_shared(ifp);
 	for (ifa = ifp->if_addrhead.tqh_first; ifa;
 	     ifa = ifa->ifa_link.tqe_next) {
-		if (ifa->ifa_addr->sa_family != af)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != af) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (ifa_maybe == 0)
+		}
+		if (ifa_maybe == NULL) {
+			IFA_ADDREF_LOCKED(ifa);	/* for ifa_maybe */
 			ifa_maybe = ifa;
+		}
 		if (ifa->ifa_netmask == 0) {
-			if (equal(addr, ifa->ifa_addr) ||
-			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
-			    break;
+			if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr &&
+			    equal(addr, ifa->ifa_dstaddr))) {
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
+				break;
+			}
+			IFA_UNLOCK(ifa);
 			continue;
 		}
 		if (ifp->if_flags & IFF_POINTOPOINT) {
-			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
+			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) {
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
+			}
 		} else {
-		    	if (equal(addr, ifa->ifa_addr)) {
+			if (equal(addr, ifa->ifa_addr)) {
 				/* exact match */
+				IFA_ADDREF_LOCKED(ifa);	/* for caller */
+				IFA_UNLOCK(ifa);
 				break;
 			}
 			cp = addr->sa_data;
 			cp2 = ifa->ifa_addr->sa_data;
 			cp3 = ifa->ifa_netmask->sa_data;
-			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
+			cplim = ifa->ifa_netmask->sa_len +
+			    (char *)ifa->ifa_netmask;
 			for (; cp3 < cplim; cp3++)
 				if ((*cp++ ^ *cp2++) & *cp3)
 					break;
 			if (cp3 == cplim) {
 				/* subnet match */
 				if (better_ifa_maybe == NULL) {
+					/* for better_ifa_maybe */
+					IFA_ADDREF_LOCKED(ifa);
 					better_ifa_maybe = ifa;
 				}
 			}
 		}
+		IFA_UNLOCK(ifa);
 	}
-	
+
 	if (ifa == NULL) {
 		if (better_ifa_maybe != NULL) {
 			ifa = better_ifa_maybe;
+			better_ifa_maybe = NULL;
 		} else {
 			ifa = ifa_maybe;
+			ifa_maybe = NULL;
 		}
 	}
-	if (ifa) ifaref(ifa);
-	
+
 	ifnet_lock_done(ifp);
-	return ifa;
+
+	if (better_ifa_maybe != NULL)
+		IFA_REMREF(better_ifa_maybe);
+	if (ifa_maybe != NULL)
+		IFA_REMREF(ifa_maybe);
+
+	return (ifa);
 }
 
 #include <net/route.h>
@@ -944,6 +1177,7 @@ link_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa)
 	struct ifaddr *ifa;
 	struct sockaddr *dst;
 	struct ifnet *ifp;
+	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 	RT_LOCK_ASSERT_HELD(rt);
@@ -951,12 +1185,19 @@ link_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa)
 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
 		return;
+
+	/* Become a regular mutex, just in case */
+	RT_CONVERT_LOCK(rt);
+
 	ifa = ifaof_ifpforaddr(dst, ifp);
 	if (ifa) {
 		rtsetifa(rt, ifa);
-		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
-			ifa->ifa_rtrequest(cmd, rt, sa);
-		ifafree(ifa);
+		IFA_LOCK_SPIN(ifa);
+		ifa_rtrequest = ifa->ifa_rtrequest;
+		IFA_UNLOCK(ifa);
+		if (ifa_rtrequest != NULL && ifa_rtrequest != link_rtrequest)
+			ifa_rtrequest(cmd, rt, sa);
+		IFA_REMREF(ifa);
 	}
 }
 
@@ -1088,19 +1329,19 @@ ifunit(const char *name)
 
 	len = strlen(name);
 	if (len < 2 || len > IFNAMSIZ)
-		return NULL;
+		return (NULL);
 	cp = name + len - 1;
 	c = *cp;
 	if (c < '0' || c > '9')
-		return NULL;		/* trailing garbage */
+		return (NULL);		/* trailing garbage */
 	unit = 0;
 	m = 1;
 	do {
 		if (cp == name)
-			return NULL;	/* no interface name */
+			return (NULL);	/* no interface name */
 		unit += (c - '0') * m;
 		if (unit > 1000000)
-			return NULL;	/* number is unreasonable */
+			return (NULL);	/* number is unreasonable */
 		m *= 10;
 		c = *--cp;
 	} while (c >= '0' && c <= '9');
@@ -1134,7 +1375,7 @@ if_withname(struct sockaddr *sa)
 
 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
 	     (sdl->sdl_nlen > IFNAMSIZ) )
-		return NULL;
+		return (NULL);
 
 	/*
 	 * ifunit wants a null-terminated name.  It may not be null-terminated
@@ -1145,7 +1386,7 @@ if_withname(struct sockaddr *sa)
 
 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
 	ifname[sdl->sdl_nlen] = '\0';
-	return ifunit(ifname);
+	return (ifunit(ifname));
 }
 
 
@@ -1163,6 +1404,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 	struct kev_msg        ev_msg;
 	struct net_event_data ev_data;
 
+	bzero(&ev_data, sizeof(struct net_event_data));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	switch (cmd) {
 	case OSIOCGIFCONF32:
 	case SIOCGIFCONF32: {
@@ -1210,21 +1453,26 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 	}
 
 	ifp = ifunit(ifr->ifr_name);
-	if (ifp == 0)
+	if (ifp == NULL)
 		return (ENXIO);
-	switch (cmd) {
 
+	switch (cmd) {
 	case SIOCGIFFLAGS:
 		ifnet_lock_shared(ifp);
 		ifr->ifr_flags = ifp->if_flags;
 		ifnet_lock_done(ifp);
 		break;
 
+	case SIOCGIFCAP:
+		ifnet_lock_shared(ifp);
+		ifr->ifr_reqcap = ifp->if_capabilities;
+		ifr->ifr_curcap = ifp->if_capenable;
+		ifnet_lock_done(ifp);
+		break;
+
 #if CONFIG_MACF_NET
 	case SIOCGIFMAC:
 		error = mac_ifnet_label_get(kauth_cred_get(), ifr, ifp);
-		if (error)
-			return (error);
 		break;
 #endif
 	case SIOCGIFMETRIC:
@@ -1247,19 +1495,27 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 
 	case SIOCSIFFLAGS:
 		error = proc_suser(p);
-		if (error)
-			return (error);
+		if (error != 0)
+			break;
 
-		ifnet_set_flags(ifp, ifr->ifr_flags, (u_int16_t)~IFF_CANTCHANGE);
+		(void) ifnet_set_flags(ifp, ifr->ifr_flags,
+		    (u_int16_t)~IFF_CANTCHANGE);
 
-		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-				   			cmd, data);
+		/*
+		 * Note that we intentionally ignore any error from below
+		 * for the SIOCSIFFLAGS case.
+		 */
+		(void) ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
 
-		/* Send the event even upon error from the driver because we changed the flags */
+		/*
+		 * Send the event even upon error from the driver because
+		 * we changed the flags.
+		 */
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
-		
+
 		ev_msg.event_code = KEV_DL_SIFFLAGS;
 		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		ev_data.if_family = ifp->if_family;
@@ -1272,24 +1528,37 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 		ifnet_touch_lastchange(ifp);
 		break;
 
+	case SIOCSIFCAP:
+		error = proc_suser(p);
+		if (error != 0)
+			break;
+
+		if ((ifr->ifr_reqcap & ~ifp->if_capabilities)) {
+			error = EINVAL;
+			break;
+		}
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+
+		ifnet_touch_lastchange(ifp);
+		break;
+
 #if CONFIG_MACF_NET
 	case SIOCSIFMAC:
 		error = mac_ifnet_label_set(kauth_cred_get(), ifr, ifp);
-		if (error)
-			return (error);
 		break;
 #endif
 	case SIOCSIFMETRIC:
 		error = proc_suser(p);
-		if (error)
-			return (error);
-		ifp->if_metric = ifr->ifr_metric;
+		if (error != 0)
+			break;
 
+		ifp->if_metric = ifr->ifr_metric;
 
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
-	
+
 		ev_msg.event_code = KEV_DL_SIFMETRICS;
 		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		ev_data.if_family = ifp->if_family;
@@ -1305,115 +1574,135 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 
 	case SIOCSIFPHYS:
 		error = proc_suser(p);
-		if (error)
-			return error;
+		if (error != 0)
+			break;
 
-		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-							cmd, data);
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+		if (error != 0)
+			break;
 
-		if (error == 0) {
-			ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-			ev_msg.kev_class      = KEV_NETWORK_CLASS;
-			ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
-
-			ev_msg.event_code = KEV_DL_SIFPHYS;
-			strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
-			ev_data.if_family = ifp->if_family;
-			ev_data.if_unit   = (u_int32_t) ifp->if_unit;
-			ev_msg.dv[0].data_length = sizeof(struct net_event_data);
-			ev_msg.dv[0].data_ptr    = &ev_data;
-			ev_msg.dv[1].data_length = 0;
-			kev_post_msg(&ev_msg);
-
-			ifnet_touch_lastchange(ifp);
-		}
-		return(error);
+		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+		ev_msg.kev_class      = KEV_NETWORK_CLASS;
+		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+
+		ev_msg.event_code = KEV_DL_SIFPHYS;
+		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+		ev_data.if_family = ifp->if_family;
+		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
+		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
+		ev_msg.dv[0].data_ptr    = &ev_data;
+		ev_msg.dv[1].data_length = 0;
+		kev_post_msg(&ev_msg);
+
+		ifnet_touch_lastchange(ifp);
+		break;
 
 	case SIOCSIFMTU:
 	{
 		u_int32_t oldmtu = ifp->if_mtu;
 
 		error = proc_suser(p);
-		if (error)
-			return (error);
-		if (ifp->if_ioctl == NULL)
-			return (EOPNOTSUPP);
-		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
-			return (EINVAL);
-
-		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-				   			cmd, data);
+		if (error != 0)
+			break;
 
-		if (error == 0) {
-		     ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-		     ev_msg.kev_class      = KEV_NETWORK_CLASS;
-		     ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
-	
-		     ev_msg.event_code = KEV_DL_SIFMTU;
-		     strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
-		     ev_data.if_family = ifp->if_family;
-		     ev_data.if_unit   = (u_int32_t) ifp->if_unit;
-		     ev_msg.dv[0].data_length = sizeof(struct net_event_data);
-		     ev_msg.dv[0].data_ptr    = &ev_data;
-		     ev_msg.dv[1].data_length = 0;
-		     kev_post_msg(&ev_msg);
-
-			ifnet_touch_lastchange(ifp);
-			rt_ifmsg(ifp);
+		if (ifp->if_ioctl == NULL) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
+			error = EINVAL;
+			break;
 		}
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+		if (error != 0)
+			break;
+
+		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+		ev_msg.kev_class      = KEV_NETWORK_CLASS;
+		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+
+		ev_msg.event_code = KEV_DL_SIFMTU;
+		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+		ev_data.if_family = ifp->if_family;
+		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
+		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
+		ev_msg.dv[0].data_ptr    = &ev_data;
+		ev_msg.dv[1].data_length = 0;
+		kev_post_msg(&ev_msg);
+
+		ifnet_touch_lastchange(ifp);
+		rt_ifmsg(ifp);
+
 		/*
 		 * If the link MTU changed, do network layer specific procedure
 		 * and update all route entries associated with the interface,
 		 * so that their MTU metric gets updated.
 		 */
-		if (error == 0 && ifp->if_mtu != oldmtu) {
+		if (ifp->if_mtu != oldmtu) {
 			if_rtmtu_update(ifp);
 #if INET6
 			nd6_setmtu(ifp);
 #endif
 		}
-		return (error);
+		break;
 	}
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		error = proc_suser(p);
-		if (error)
-			return (error);
+		if (error != 0)
+			break;
 
 		/* Don't allow group membership on non-multicast interfaces. */
-		if ((ifp->if_flags & IFF_MULTICAST) == 0)
-			return EOPNOTSUPP;
+		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+			error = EOPNOTSUPP;
+			break;
+		}
 
-#ifndef __APPLE__
 		/* Don't let users screw up protocols' entries. */
-		if (ifr->ifr_addr.sa_family != AF_LINK)
-			return EINVAL;
-#endif
+		if (ifr->ifr_addr.sa_family != AF_UNSPEC &&
+		    ifr->ifr_addr.sa_family != AF_LINK) {
+			error = EINVAL;
+			break;
+		}
 
+		/*
+		 * User is permitted to anonymously join a particular link
+		 * multicast group via SIOCADDMULTI.  Subsequent join requested
+		 * for the same record which has an outstanding refcnt from a
+		 * past if_addmulti_anon() will not result in EADDRINUSE error
+		 * (unlike other BSDs.)  Anonymously leaving a group is also
+		 * allowed only as long as there is an outstanding refcnt held
+		 * by a previous anonymous request, or else ENOENT (even if the
+		 * link-layer multicast membership exists for a network-layer
+		 * membership.)
+		 */
 		if (cmd == SIOCADDMULTI) {
-			error = if_addmulti(ifp, &ifr->ifr_addr, NULL);
+			error = if_addmulti_anon(ifp, &ifr->ifr_addr, NULL);
 			ev_msg.event_code = KEV_DL_ADDMULTI;
 		} else {
-			error = if_delmulti(ifp, &ifr->ifr_addr);
+			error = if_delmulti_anon(ifp, &ifr->ifr_addr);
 			ev_msg.event_code = KEV_DL_DELMULTI;
 		}
-		if (error == 0) {
-		     ev_msg.vendor_code    = KEV_VENDOR_APPLE;
-		     ev_msg.kev_class      = KEV_NETWORK_CLASS;
-		     ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
-		     strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
-	
-		     ev_data.if_family = ifp->if_family;
-		     ev_data.if_unit   = (u_int32_t) ifp->if_unit;
-		     ev_msg.dv[0].data_length = sizeof(struct net_event_data);
-		     ev_msg.dv[0].data_ptr    = &ev_data;
-		     ev_msg.dv[1].data_length = 0;
-		     kev_post_msg(&ev_msg);
-
-		     ifnet_touch_lastchange(ifp);
-		}
-		return error;
+		if (error != 0)
+			break;
+
+		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+		ev_msg.kev_class      = KEV_NETWORK_CLASS;
+		ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+		strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+
+		ev_data.if_family = ifp->if_family;
+		ev_data.if_unit   = (u_int32_t) ifp->if_unit;
+		ev_msg.dv[0].data_length = sizeof(struct net_event_data);
+		ev_msg.dv[0].data_ptr    = &ev_data;
+		ev_msg.dv[1].data_length = 0;
+		kev_post_msg(&ev_msg);
+
+		ifnet_touch_lastchange(ifp);
+		break;
 
 	case SIOCSIFPHYADDR:
 	case SIOCDIFPHYADDR:
@@ -1429,20 +1718,21 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 	case SIOCSIFVLAN:
 	case SIOCSIFBOND:
 		error = proc_suser(p);
-		if (error)
-			return (error);
+		if (error != 0)
+			break;
 
-		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-				   			cmd, data);
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+		if (error != 0)
+			break;
 
-		if (error == 0)
-			ifnet_touch_lastchange(ifp);
-		return error;
+		ifnet_touch_lastchange(ifp);
+		break;
 
 	case SIOCGIFSTATUS:
 		ifs = (struct ifstat *)data;
 		ifs->ascii[0] = '\0';
-		
+
 	case SIOCGIFPSRCADDR:
 	case SIOCGIFPDSTADDR:
 	case SIOCGLIFPHYADDR:
@@ -1450,12 +1740,15 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 	case SIOCGIFMEDIA64:
 	case SIOCGIFGENERIC:
 	case SIOCGIFDEVMTU:
-		return ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-				   		   cmd, data);
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+		break;
+
 	case SIOCGIFVLAN:
 	case SIOCGIFBOND:
-		return ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, 
-				   		   cmd, data);
+		error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
+		    cmd, data);
+		break;
 
 	case SIOCGIFWAKEFLAGS:
 		ifnet_lock_shared(ifp);
@@ -1464,24 +1757,21 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 		break;
 
 	case SIOCGIFGETRTREFCNT:
-#if IFNET_ROUTE_REFCNT
 		ifnet_lock_shared(ifp);
 		ifr->ifr_route_refcnt = ifp->if_route_refcnt;
 		ifnet_lock_done(ifp);
 		break;
-#else
-		return (EOPNOTSUPP);
-#endif /* IFNET_ROUTE_REFCNT */
 
 	default:
 		oif_flags = ifp->if_flags;
-		if (so->so_proto == 0)
-			return (EOPNOTSUPP);
+		if (so->so_proto == NULL) {
+			error = EOPNOTSUPP;
+			break;
+		}
 	    {
-		int ocmd = cmd;
+		u_long ocmd = cmd;
 
 		switch (cmd) {
-
 		case SIOCSIFDSTADDR:
 		case SIOCSIFADDR:
 		case SIOCSIFBRDADDR:
@@ -1513,12 +1803,13 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 		case OSIOCGIFNETMASK:
 			cmd = SIOCGIFNETMASK;
 		}
+
 		socket_lock(so, 1);
-		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
-				data, ifp, p));
+		error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
+		    data, ifp, p));
 		socket_unlock(so, 1);
-		switch (ocmd) {
 
+		switch (ocmd) {
 		case OSIOCGIFADDR:
 		case OSIOCGIFDSTADDR:
 		case OSIOCGIFBRDADDR:
@@ -1534,12 +1825,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
 		}
 
 		if (error == EOPNOTSUPP || error == ENOTSUP)
-			error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family,
-								cmd, data);
+			error = ifnet_ioctl(ifp,
+			    so->so_proto->pr_domain->dom_family, cmd, data);
 
-		return (error);
+		break;
 	}
-	return (0);
+	return (error);
 }
 
 int
@@ -1564,47 +1855,43 @@ ifnet_set_promiscuous(
 	ifnet_t	ifp,
 	int pswitch)
 {
-	struct ifreq ifr;
 	int error = 0;
-	int oldflags;
-	int locked = 0;
-	int changed = 0;
+	int oldflags = 0;
+	int newflags = 0;
 
 	ifnet_lock_exclusive(ifp);
-	locked = 1;
 	oldflags = ifp->if_flags;
-	if (pswitch) {
-		/*
-		 * If the device is not configured up, we cannot put it in
-		 * promiscuous mode.
-		 */
-		if ((ifp->if_flags & IFF_UP) == 0) {
-			error = ENETDOWN;
-			goto done;
-		}
-		if (ifp->if_pcount++ != 0) {
-			goto done;
-		}
+	ifp->if_pcount += pswitch ? 1 : -1;
+	
+	if (ifp->if_pcount > 0)
 		ifp->if_flags |= IFF_PROMISC;
-	} else {
-		if (--ifp->if_pcount > 0)
-			goto done;
+	else
 		ifp->if_flags &= ~IFF_PROMISC;
-	}
-	ifr.ifr_flags = ifp->if_flags;
-	locked = 0;
+	
+	newflags = ifp->if_flags;
 	ifnet_lock_done(ifp);
-	error = ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, &ifr);
-	if (error == 0)
-		rt_ifmsg(ifp);
-	else
-		ifp->if_flags = oldflags;
-done:
-	if (locked) ifnet_lock_done(ifp);
-	if (changed) {
-		log(LOG_INFO, "%s%d: promiscuous mode %s\n",
+	
+	if (newflags != oldflags && (newflags & IFF_UP) != 0) {
+		error = ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
+		if (error == 0) {
+			rt_ifmsg(ifp);
+		} else {
+			ifnet_lock_exclusive(ifp);
+			// revert the flags
+			ifp->if_pcount -= pswitch ? 1 : -1;
+			if (ifp->if_pcount > 0)
+			    ifp->if_flags |= IFF_PROMISC;
+			else
+			    ifp->if_flags &= ~IFF_PROMISC;
+			ifnet_lock_done(ifp);
+		}
+	}
+	
+	if (newflags != oldflags) {
+		log(LOG_INFO, "%s%d: promiscuous mode %s%s\n",
 		    ifp->if_name, ifp->if_unit,
-		    pswitch != 0 ? "enabled" : "disabled");
+		    (newflags & IFF_PROMISC) != 0 ? "enable" : "disable",
+		    error != 0 ? " failed" : " succeeded");
 	}
 	return error;
 }
@@ -1624,7 +1911,7 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
 	struct ifreq ifr;
 	int error = 0;
 	size_t space;
-	
+
 	/*
 	 * Zero the ifr buffer to make sure we don't
 	 * disclose the contents of the stack.
@@ -1633,7 +1920,8 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
 
 	space = *ret_space;
 	ifnet_head_lock_shared();
-	for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) && ifp; ifp = ifp->if_link.tqe_next) {
+	for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) &&
+	    ifp; ifp = ifp->if_link.tqe_next) {
 		char workbuf[64];
 		size_t ifnlen, addrs;
 
@@ -1645,17 +1933,22 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
 		} else {
 			strlcpy(ifr.ifr_name, workbuf, IFNAMSIZ);
 		}
-		
+
 		ifnet_lock_shared(ifp);
 
 		addrs = 0;
 		ifa = ifp->if_addrhead.tqh_first;
 		for ( ; space > sizeof (ifr) && ifa;
 		    ifa = ifa->ifa_link.tqe_next) {
-			struct sockaddr *sa = ifa->ifa_addr;
+			struct sockaddr *sa;
+
+			IFA_LOCK(ifa);
+			sa = ifa->ifa_addr;
 #ifndef __APPLE__
-			if (curproc->p_prison && prison_if(curproc, sa))
+			if (curproc->p_prison && prison_if(curproc, sa)) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 #endif
 			addrs++;
 			if (cmd == OSIOCGIFCONF32 || cmd == OSIOCGIFCONF64) {
@@ -1663,30 +1956,38 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
 					 (struct osockaddr *)&ifr.ifr_addr;
 				ifr.ifr_addr = *sa;
 				osa->sa_family = sa->sa_family;
-				error = copyout((caddr_t)&ifr, ifrp, sizeof(ifr));
+				error = copyout((caddr_t)&ifr, ifrp,
+				    sizeof (ifr));
 				ifrp += sizeof(struct ifreq);
 			} else if (sa->sa_len <= sizeof(*sa)) {
 				ifr.ifr_addr = *sa;
-				error = copyout((caddr_t)&ifr, ifrp, sizeof(ifr));
+				error = copyout((caddr_t)&ifr, ifrp,
+				    sizeof (ifr));
 				ifrp += sizeof(struct ifreq);
 			} else {
-				if (space < sizeof (ifr) + sa->sa_len - sizeof(*sa))
+				if (space <
+				    sizeof (ifr) + sa->sa_len - sizeof(*sa)) {
+					IFA_UNLOCK(ifa);
 					break;
+				}
 				space -= sa->sa_len - sizeof(*sa);
-				error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr.ifr_name));
+				error = copyout((caddr_t)&ifr, ifrp,
+				    sizeof (ifr.ifr_name));
 				if (error == 0) {
-				    error = copyout((caddr_t)sa,
-						(ifrp + offsetof(struct ifreq, ifr_addr)),
-						sa->sa_len);
+				    error = copyout((caddr_t)sa, (ifrp +
+				        offsetof(struct ifreq, ifr_addr)),
+					sa->sa_len);
 				}
-				ifrp += (sa->sa_len + offsetof(struct ifreq, ifr_addr));
+				ifrp += (sa->sa_len + offsetof(struct ifreq,
+				    ifr_addr));
 			}
+			IFA_UNLOCK(ifa);
 			if (error)
 				break;
 			space -= sizeof (ifr);
 		}
 		ifnet_lock_done(ifp);
-		
+
 		if (error)
 			break;
 		if (!addrs) {
@@ -1738,64 +2039,300 @@ if_allmulti(struct ifnet *ifp, int onswitch)
 	return error;
 }
 
+static struct ifmultiaddr *
+ifma_alloc(int how)
+{
+	struct ifmultiaddr *ifma;
+
+	ifma = (how == M_WAITOK) ? zalloc(ifma_zone) :
+	    zalloc_noblock(ifma_zone);
+
+	if (ifma != NULL) {
+		bzero(ifma, ifma_size);
+		lck_mtx_init(&ifma->ifma_lock, ifa_mtx_grp, ifa_mtx_attr);
+		ifma->ifma_debug |= IFD_ALLOC;
+		if (ifma_debug != 0) {
+			ifma->ifma_debug |= IFD_DEBUG;
+			ifma->ifma_trace = ifma_trace;
+		}
+	}
+	return (ifma);
+}
+
+static void
+ifma_free(struct ifmultiaddr *ifma)
+{
+	IFMA_LOCK(ifma);
+
+	if (ifma->ifma_protospec != NULL) {
+		panic("%s: Protospec not NULL for ifma=%p", __func__, ifma);
+		/* NOTREACHED */
+	} else if ((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
+	    ifma->ifma_anoncnt != 0) {
+		panic("%s: Freeing ifma=%p with outstanding anon req",
+		    __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_debug & IFD_ATTACHED) {
+		panic("%s: ifma=%p attached to ifma_ifp=%p is being freed",
+		    __func__, ifma, ifma->ifma_ifp);
+		/* NOTREACHED */
+	} else if (!(ifma->ifma_debug & IFD_ALLOC)) {
+		panic("%s: ifma %p cannot be freed", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_refcount != 0) {
+		panic("%s: non-zero refcount ifma=%p", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_reqcnt != 0) {
+		panic("%s: non-zero reqcnt ifma=%p", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_ifp != NULL) {
+		panic("%s: non-NULL ifma_ifp=%p for ifma=%p", __func__,
+		    ifma->ifma_ifp, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_ll != NULL) {
+		panic("%s: non-NULL ifma_ll=%p for ifma=%p", __func__,
+		    ifma->ifma_ll, ifma);
+		/* NOTREACHED */
+	}
+	ifma->ifma_debug &= ~IFD_ALLOC;
+	if ((ifma->ifma_debug & (IFD_DEBUG | IFD_TRASHED)) ==
+	    (IFD_DEBUG | IFD_TRASHED)) {
+		lck_mtx_lock(&ifma_trash_lock);
+		TAILQ_REMOVE(&ifma_trash_head, (struct ifmultiaddr_dbg *)ifma,
+		    ifma_trash_link);
+		lck_mtx_unlock(&ifma_trash_lock);
+		ifma->ifma_debug &= ~IFD_TRASHED;
+	}
+	IFMA_UNLOCK(ifma);
+
+	if (ifma->ifma_addr != NULL) {
+		FREE(ifma->ifma_addr, M_IFADDR);
+		ifma->ifma_addr = NULL;
+	}
+	lck_mtx_destroy(&ifma->ifma_lock, ifa_mtx_grp);
+	zfree(ifma_zone, ifma);
+}
+
+static void
+ifma_trace(struct ifmultiaddr *ifma, int refhold)
+{
+	struct ifmultiaddr_dbg *ifma_dbg = (struct ifmultiaddr_dbg *)ifma;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
+
+	if (!(ifma->ifma_debug & IFD_DEBUG)) {
+		panic("%s: ifma %p has no debug structure", __func__, ifma);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &ifma_dbg->ifma_refhold_cnt;
+		tr = ifma_dbg->ifma_refhold;
+	} else {
+		cnt = &ifma_dbg->ifma_refrele_cnt;
+		tr = ifma_dbg->ifma_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % IFMA_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
 void
-ifma_reference(
-	struct ifmultiaddr *ifma)
+ifma_addref(struct ifmultiaddr *ifma, int locked)
 {
-	if (OSIncrementAtomic(&ifma->ifma_refcount) <= 0)
-		panic("ifma_reference: ifma already released or invalid\n");
+	if (!locked)
+		IFMA_LOCK(ifma);
+	else
+		IFMA_LOCK_ASSERT_HELD(ifma);
+
+	if (++ifma->ifma_refcount == 0) {
+		panic("%s: ifma=%p wraparound refcnt", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_trace != NULL) {
+		(*ifma->ifma_trace)(ifma, TRUE);
+	}
+	if (!locked)
+		IFMA_UNLOCK(ifma);
 }
 
 void
-ifma_release(
-	struct ifmultiaddr *ifma)
-{
-	while (ifma) {
-		struct ifmultiaddr *next;
-		int32_t prevValue = OSDecrementAtomic(&ifma->ifma_refcount);
-		if (prevValue < 1)
-			panic("ifma_release: ifma already released or invalid\n");
-		if (prevValue != 1)
-			break;
-		
-		/* Allow the allocator of the protospec to free it */
-		if (ifma->ifma_protospec && ifma->ifma_free) {
-			ifma->ifma_free(ifma->ifma_protospec);
-		}
-		
-		next = ifma->ifma_ll;
-		FREE(ifma->ifma_addr, M_IFMADDR);
-		FREE(ifma, M_IFMADDR);
-		ifma = next;
+ifma_remref(struct ifmultiaddr *ifma)
+{
+	struct ifmultiaddr *ll;
+
+	IFMA_LOCK(ifma);
+
+	if (ifma->ifma_refcount == 0) {
+		panic("%s: ifma=%p negative refcnt", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_trace != NULL) {
+		(*ifma->ifma_trace)(ifma, FALSE);
+	}
+
+	--ifma->ifma_refcount;
+	if (ifma->ifma_refcount > 0) {
+		IFMA_UNLOCK(ifma);
+		return;
 	}
+
+	ll = ifma->ifma_ll;
+	ifma->ifma_ifp = NULL;
+	ifma->ifma_ll = NULL;
+	IFMA_UNLOCK(ifma);
+	ifma_free(ifma);	/* deallocate it */
+
+	if (ll != NULL)
+		IFMA_REMREF(ll);
+}
+
+static void
+if_attach_ifma(struct ifnet *ifp, struct ifmultiaddr *ifma, int anon)
+{
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+	IFMA_LOCK_ASSERT_HELD(ifma);
+
+	if (ifma->ifma_ifp != ifp) {
+		panic("%s: Mismatch ifma_ifp=%p != ifp=%p", __func__,
+		    ifma->ifma_ifp, ifp);
+		/* NOTREACHED */
+	} else if (ifma->ifma_debug & IFD_ATTACHED) {
+		panic("%s: Attempt to attach an already attached ifma=%p",
+		    __func__, ifma);
+		/* NOTREACHED */
+	} else if (anon && (ifma->ifma_flags & IFMAF_ANONYMOUS)) {
+		panic("%s: ifma=%p unexpected IFMAF_ANONYMOUS", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_debug & IFD_TRASHED) {
+		panic("%s: Attempt to reattach a detached ifma=%p",
+		    __func__, ifma);
+		/* NOTREACHED */
+	}
+
+	ifma->ifma_reqcnt++;
+	VERIFY(ifma->ifma_reqcnt == 1);
+	IFMA_ADDREF_LOCKED(ifma);
+	ifma->ifma_debug |= IFD_ATTACHED;
+	if (anon) {
+		ifma->ifma_anoncnt++;
+		VERIFY(ifma->ifma_anoncnt == 1);
+		ifma->ifma_flags |= IFMAF_ANONYMOUS;
+	}
+
+	LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
 }
 
- /*
-  * Find an ifmultiaddr that matches a socket address on an interface. 
-  *
-  * Caller is responsible for holding the ifnet_lock while calling
-  * this function.
-  */
 static int
-if_addmulti_doesexist(
-	struct ifnet *ifp,
-	const struct sockaddr *sa,
-	struct ifmultiaddr **retifma)
+if_detach_ifma(struct ifnet *ifp, struct ifmultiaddr *ifma, int anon)
+{
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
+	IFMA_LOCK_ASSERT_HELD(ifma);
+
+	if (ifma->ifma_reqcnt == 0) {
+		panic("%s: ifma=%p negative reqcnt", __func__, ifma);
+		/* NOTREACHED */
+	} else if (anon && !(ifma->ifma_flags & IFMAF_ANONYMOUS)) {
+		panic("%s: ifma=%p missing IFMAF_ANONYMOUS", __func__, ifma);
+		/* NOTREACHED */
+	} else if (anon && ifma->ifma_anoncnt == 0) {
+		panic("%s: ifma=%p negative anonreqcnt", __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_ifp != ifp) {
+		panic("%s: Mismatch ifma_ifp=%p, ifp=%p", __func__,
+		    ifma->ifma_ifp, ifp);
+		/* NOTREACHED */
+	}
+
+	if (anon) {
+		--ifma->ifma_anoncnt;
+		if (ifma->ifma_anoncnt > 0)
+			return (0);
+		ifma->ifma_flags &= ~IFMAF_ANONYMOUS;
+	}
+
+	--ifma->ifma_reqcnt;
+	if (ifma->ifma_reqcnt > 0)
+		return (0);
+
+	if (ifma->ifma_protospec != NULL) {
+		panic("%s: Protospec not NULL for ifma=%p", __func__, ifma);
+		/* NOTREACHED */
+	} else if ((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
+	    ifma->ifma_anoncnt != 0) {
+		panic("%s: Detaching ifma=%p with outstanding anon req",
+		    __func__, ifma);
+		/* NOTREACHED */
+	} else if (!(ifma->ifma_debug & IFD_ATTACHED)) {
+		panic("%s: Attempt to detach an unattached address ifma=%p",
+		    __func__, ifma);
+		/* NOTREACHED */
+	} else if (ifma->ifma_debug & IFD_TRASHED) {
+		panic("%s: ifma %p is already in trash list", __func__, ifma);
+		/* NOTREACHED */
+	}
+
+	/*
+	 * NOTE: Caller calls IFMA_REMREF
+	 */
+	ifma->ifma_debug &= ~IFD_ATTACHED;
+	LIST_REMOVE(ifma, ifma_link);
+	if (LIST_EMPTY(&ifp->if_multiaddrs))
+		ifp->if_updatemcasts = 0;
+
+	if (ifma->ifma_debug & IFD_DEBUG) {
+		/* Become a regular mutex, just in case */
+		IFMA_CONVERT_LOCK(ifma);
+		lck_mtx_lock(&ifma_trash_lock);
+		TAILQ_INSERT_TAIL(&ifma_trash_head,
+		    (struct ifmultiaddr_dbg *)ifma, ifma_trash_link);
+		lck_mtx_unlock(&ifma_trash_lock);
+		ifma->ifma_debug |= IFD_TRASHED;
+	}
+
+	return (1);
+}
+
+/*
+ * Find an ifmultiaddr that matches a socket address on an interface. 
+ *
+ * Caller is responsible for holding the ifnet_lock while calling
+ * this function.
+ */
+static int
+if_addmulti_doesexist(struct ifnet *ifp, const struct sockaddr *sa,
+    struct ifmultiaddr **retifma, int anon)
 {
 	struct ifmultiaddr *ifma;
-	for (ifma = ifp->if_multiaddrs.lh_first; ifma;
-	     ifma = ifma->ifma_link.le_next) {
-		if (equal(sa, ifma->ifma_addr)) {
-			ifma->ifma_usecount++;
-			if (retifma) {
-				*retifma = ifma;
-				ifma_reference(*retifma);
+
+	for (ifma = LIST_FIRST(&ifp->if_multiaddrs); ifma != NULL;
+	     ifma = LIST_NEXT(ifma, ifma_link)) {
+		IFMA_LOCK_SPIN(ifma);
+		if (!equal(sa, ifma->ifma_addr)) {
+			IFMA_UNLOCK(ifma);
+			continue;
+		}
+		if (anon) {
+			VERIFY(!(ifma->ifma_flags & IFMAF_ANONYMOUS) ||
+			    ifma->ifma_anoncnt != 0);
+			VERIFY((ifma->ifma_flags & IFMAF_ANONYMOUS) ||
+			    ifma->ifma_anoncnt == 0);
+			ifma->ifma_anoncnt++;
+			if (!(ifma->ifma_flags & IFMAF_ANONYMOUS)) {
+				VERIFY(ifma->ifma_anoncnt == 1);
+				ifma->ifma_flags |= IFMAF_ANONYMOUS;
 			}
-			return 0;
 		}
+		if (!anon || ifma->ifma_anoncnt == 1) {
+			ifma->ifma_reqcnt++;
+			VERIFY(ifma->ifma_reqcnt > 1);
+		}
+		if (retifma != NULL) {
+			*retifma = ifma;
+			IFMA_ADDREF_LOCKED(ifma);
+		}
+		IFMA_UNLOCK(ifma);
+		return (0);
 	}
-	
-	return ENOENT;
+	return (ENOENT);
 }
 
 /*
@@ -1864,67 +2401,114 @@ copy_and_normalize(
 }
 
 /*
- * Add a multicast listenership to the interface in question.
- * The link layer provides a routine which converts
+ * Network-layer protocol domains which hold references to the underlying
+ * link-layer record must use this routine.
  */
 int
-if_addmulti(
-	struct ifnet *ifp,	/* interface to manipulate */
-	const struct sockaddr *sa,	/* address to add */
-	struct ifmultiaddr **retifma)
+if_addmulti(struct ifnet *ifp, const struct sockaddr *sa,
+    struct ifmultiaddr **retifma)
+{
+	return (if_addmulti_common(ifp, sa, retifma, 0));
+}
+
+/*
+ * Anything other than network-layer protocol domains which hold references
+ * to the underlying link-layer record must use this routine: SIOCADDMULTI
+ * ioctl, ifnet_add_multicast(), AppleTalk, if_bond.
+ */
+int
+if_addmulti_anon(struct ifnet *ifp, const struct sockaddr *sa,
+    struct ifmultiaddr **retifma)
+{
+	return (if_addmulti_common(ifp, sa, retifma, 1));
+}
+
+/*
+ * Register an additional multicast address with a network interface.
+ *
+ * - If the address is already present, bump the reference count on the
+ *   address and return.
+ * - If the address is not link-layer, look up a link layer address.
+ * - Allocate address structures for one or both addresses, and attach to the
+ *   multicast address list on the interface.  If automatically adding a link
+ *   layer address, the protocol address will own a reference to the link
+ *   layer address, to be freed when it is freed.
+ * - Notify the network device driver of an addition to the multicast address
+ *   list.
+ *
+ * 'sa' points to caller-owned memory with the desired multicast address.
+ *
+ * 'retifma' will be used to return a pointer to the resulting multicast
+ * address reference, if desired.
+ *
+ * 'anon' indicates a link-layer address with no protocol address reference
+ * made to it.  Anything other than network-layer protocol domain requests
+ * are considered as anonymous.
+ */
+static int
+if_addmulti_common(struct ifnet *ifp, const struct sockaddr *sa,
+    struct ifmultiaddr **retifma, int anon)
 {
 	struct sockaddr_storage storage;
 	struct sockaddr *llsa = NULL;
 	struct sockaddr *dupsa = NULL;
-	int error = 0;
+	int error = 0, ll_firstref = 0, lladdr;
 	struct ifmultiaddr *ifma = NULL;
 	struct ifmultiaddr *llifma = NULL;
-	
+
+	/* Only AF_UNSPEC/AF_LINK is allowed for an "anonymous" address */
+	VERIFY(!anon || sa->sa_family == AF_UNSPEC ||
+	    sa->sa_family == AF_LINK);
+
 	/* If sa is a AF_LINK or AF_UNSPEC, duplicate and normalize it */
 	if (sa->sa_family == AF_LINK || sa->sa_family == AF_UNSPEC) {
 		dupsa = copy_and_normalize(sa);
 		if (dupsa == NULL) {
-			return ENOMEM;
+			error = ENOMEM;
+			goto cleanup;
 		}
 		sa = dupsa;
 	}
-	
+
 	ifnet_lock_exclusive(ifp);
-	error = if_addmulti_doesexist(ifp, sa, retifma);
-	ifnet_lock_done(ifp);
-	
-	if (error == 0) {
+	if (!(ifp->if_flags & IFF_MULTICAST)) {
+		error = EADDRNOTAVAIL;
+		ifnet_lock_done(ifp);
 		goto cleanup;
 	}
 
+	/* If the address is already present, return a new reference to it */
+	error = if_addmulti_doesexist(ifp, sa, retifma, anon);
+	ifnet_lock_done(ifp);
+	if (error == 0)
+		goto cleanup;
+
 	/*
-	 * Give the link layer a chance to accept/reject it, and also
-	 * find out which AF_LINK address this maps to, if it isn't one
-	 * already.
+	 * The address isn't already present; give the link layer a chance
+	 * to accept/reject it, and also find out which AF_LINK address this
+	 * maps to, if it isn't one already.
 	 */
-	error = dlil_resolve_multi(ifp, sa, (struct sockaddr*)&storage,
-							   sizeof(storage));
+	error = dlil_resolve_multi(ifp, sa, (struct sockaddr *)&storage,
+	    sizeof (storage));
 	if (error == 0 && storage.ss_len != 0) {
-		llsa = copy_and_normalize((struct sockaddr*)&storage);
+		llsa = copy_and_normalize((struct sockaddr *)&storage);
 		if (llsa == NULL) {
 			error = ENOMEM;
 			goto cleanup;
 		}
-		
-		MALLOC(llifma, struct ifmultiaddr *, sizeof *llifma, M_IFMADDR, M_WAITOK);
+
+		llifma = ifma_alloc(M_WAITOK);
 		if (llifma == NULL) {
 			error = ENOMEM;
 			goto cleanup;
 		}
 	}
-	
+
 	/* to be similar to FreeBSD */
-	if (error == EOPNOTSUPP) {
+	if (error == EOPNOTSUPP)
 		error = 0;
-	}
-	else if (error) {
+	else if (error != 0)
 		goto cleanup;
-	}
 
 	/* Allocate while we aren't holding any locks */
 	if (dupsa == NULL) {
@@ -1934,185 +2518,212 @@ if_addmulti(
 			goto cleanup;
 		}
 	}
-	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
+	ifma = ifma_alloc(M_WAITOK);
 	if (ifma == NULL) {
 		error = ENOMEM;
 		goto cleanup;
 	}
-	
+
 	ifnet_lock_exclusive(ifp);
 	/*
 	 * Check again for the matching multicast.
 	 */
-	if ((error = if_addmulti_doesexist(ifp, sa, retifma)) == 0) {
+	error = if_addmulti_doesexist(ifp, sa, retifma, anon);
+	if (error == 0) {
 		ifnet_lock_done(ifp);
 		goto cleanup;
 	}
 
-	bzero(ifma, sizeof(*ifma));
-	ifma->ifma_addr = dupsa;
-	ifma->ifma_ifp = ifp;
-	ifma->ifma_usecount = 1;
-	ifma->ifma_refcount = 1;
-	
-	if (llifma != 0) {
-		if (if_addmulti_doesexist(ifp, llsa, &ifma->ifma_ll) == 0) {
-			FREE(llsa, M_IFMADDR);
-			FREE(llifma, M_IFMADDR);
+	if (llifma != NULL) {
+		VERIFY(!anon);	/* must not get here if "anonymous" */
+		if (if_addmulti_doesexist(ifp, llsa, &ifma->ifma_ll, 0) == 0) {
+			FREE(llsa, M_IFADDR);
+			llsa = NULL;
+			ifma_free(llifma);
+			llifma = NULL;
+			VERIFY(ifma->ifma_ll->ifma_ifp == ifp);
 		} else {
-			bzero(llifma, sizeof(*llifma));
+			ll_firstref = 1;
 			llifma->ifma_addr = llsa;
 			llifma->ifma_ifp = ifp;
-			llifma->ifma_usecount = 1;
-			llifma->ifma_refcount = 1;
-			LIST_INSERT_HEAD(&ifp->if_multiaddrs, llifma, ifma_link);
-
+			IFMA_LOCK(llifma);
+			if_attach_ifma(ifp, llifma, 0);
+			/* add extra refcnt for ifma */
+			IFMA_ADDREF_LOCKED(llifma);
+			IFMA_UNLOCK(llifma);
 			ifma->ifma_ll = llifma;
-			ifma_reference(ifma->ifma_ll);
 		}
 	}
-	
-	LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
-	
-	if (retifma) {
+
+	/* "anonymous" request should not result in network address */
+	VERIFY(!anon || ifma->ifma_ll == NULL);
+
+	ifma->ifma_addr = dupsa;
+	ifma->ifma_ifp = ifp;
+	IFMA_LOCK(ifma);
+	if_attach_ifma(ifp, ifma, anon);
+	IFMA_ADDREF_LOCKED(ifma);		/* for this routine */
+	if (retifma != NULL) {
 		*retifma = ifma;
-		ifma_reference(*retifma);
+		IFMA_ADDREF_LOCKED(*retifma);	/* for caller */
 	}
-
+	lladdr = (ifma->ifma_addr->sa_family == AF_UNSPEC ||
+	    ifma->ifma_addr->sa_family == AF_LINK);
+	IFMA_UNLOCK(ifma);
 	ifnet_lock_done(ifp);
-	
-	if (llsa != 0)
-		rt_newmaddrmsg(RTM_NEWMADDR, ifma);
+
+	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
+	IFMA_REMREF(ifma);			/* for this routine */
 
 	/*
 	 * We are certain we have added something, so call down to the
-	 * interface to let them know about it.
+	 * interface to let them know about it.  Do this only for newly-
+	 * added AF_LINK/AF_UNSPEC address in the if_multiaddrs set.
 	 */
-	ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
-	
-	return 0;
-	
+	if (lladdr || ll_firstref)
+		(void) ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
+
+	if (ifp->if_updatemcasts > 0)
+		ifp->if_updatemcasts = 0;
+
+	return (0);
+
 cleanup:
-	if (ifma)
-		FREE(ifma, M_IFADDR);
-	if (dupsa)
+	if (ifma != NULL)
+		ifma_free(ifma);
+	if (dupsa != NULL)
 		FREE(dupsa, M_IFADDR);
-	if (llifma)
-		FREE(llifma, M_IFADDR);
-	if (llsa)
+	if (llifma != NULL)
+		ifma_free(llifma);
+	if (llsa != NULL)
 		FREE(llsa, M_IFADDR);
-	
-	return error;
+
+	return (error);
 }
 
+/*
+ * Delete a multicast group membership by network-layer group address.
+ * This routine is deprecated.
+ */
 int
-if_delmultiaddr(
-	struct ifmultiaddr *ifma,
-	int locked)
+if_delmulti(struct ifnet *ifp, const struct sockaddr *sa)
 {
-	struct ifnet *ifp;
-	int	do_del_multi = 0;
-	
-	ifp = ifma->ifma_ifp;
-	
-	if (!locked && ifp) {
-		ifnet_lock_exclusive(ifp);
-	}
-	
-	while (ifma != NULL) {
-		struct ifmultiaddr *ll_ifma;
-		
-		if (ifma->ifma_usecount > 1) {
-			ifma->ifma_usecount--;
-			break;
-		}
-		
-		if (ifp)
-			LIST_REMOVE(ifma, ifma_link);
-	
-		ll_ifma = ifma->ifma_ll;
-	
-		if (ll_ifma) { /* send a routing msg for network addresses only */
-			if (ifp)
-				ifnet_lock_done(ifp);
-			rt_newmaddrmsg(RTM_DELMADDR, ifma);
-			if (ifp)
-				ifnet_lock_exclusive(ifp);
-		}
-		
-		/*
-		 * Make sure the interface driver is notified
-		 * in the case of a link layer mcast group being left.
-		 */
-		if (ll_ifma == 0) {
-			if (ifp && ifma->ifma_addr->sa_family == AF_LINK)
-				do_del_multi = 1;
-			break;
-		}
-		
-		if (ifp)
-			ifma_release(ifma);
-	
-		ifma = ll_ifma;
-	}
-	
-	if (!locked && ifp) {
-		/* This wasn't initially locked, we should unlock it */
-		ifnet_lock_done(ifp);
-	}
-	
-	if (do_del_multi) {
-		if (locked)
-			ifnet_lock_done(ifp);
-		ifnet_ioctl(ifp, 0, SIOCDELMULTI, NULL);
-		if (locked)
-			ifnet_lock_exclusive(ifp);
-	}
-	
-	return 0;
+	return (if_delmulti_common(NULL, ifp, sa, 0));
 }
 
 /*
- * Remove a reference to a multicast address on this interface.  Yell
- * if the request does not match an existing membership.
+ * Delete a multicast group membership by group membership pointer.
+ * Network-layer protocol domains must use this routine.
  */
 int
-if_delmulti(
-	struct ifnet *ifp,
-	const struct sockaddr *sa)
+if_delmulti_ifma(struct ifmultiaddr *ifma)
+{
+	return (if_delmulti_common(ifma, NULL, NULL, 0));
+}
+
+/*
+ * Anything other than network-layer protocol domains which hold references
+ * to the underlying link-layer record must use this routine: SIOCDELMULTI
+ * ioctl, ifnet_remove_multicast(), AppleTalk, if_bond.
+ */
+int
+if_delmulti_anon(struct ifnet *ifp, const struct sockaddr *sa)
+{
+	return (if_delmulti_common(NULL, ifp, sa, 1));
+}
+
+/*
+ * Delete a multicast group membership by network-layer group address.
+ *
+ * Returns ENOENT if the entry could not be found.
+ */
+static int
+if_delmulti_common(struct ifmultiaddr *ifma, struct ifnet *ifp,
+    const struct sockaddr *sa, int anon)
 {
-	struct ifmultiaddr	*ifma;
 	struct sockaddr		*dupsa = NULL;
-	int retval = 0;
+	int			lastref, ll_lastref = 0, lladdr;
+	struct ifmultiaddr	*ll = NULL;
 
-	if (sa->sa_family == AF_LINK || sa->sa_family == AF_UNSPEC) {
+	/* sanity check for callers */
+	VERIFY(ifma != NULL || (ifp != NULL && sa != NULL));
+
+	if (ifma != NULL)
+		ifp = ifma->ifma_ifp;
+
+	if (sa != NULL &&
+	    (sa->sa_family == AF_LINK || sa->sa_family == AF_UNSPEC)) {
 		dupsa = copy_and_normalize(sa);
-		if (dupsa == NULL) {
-			return ENOMEM;
-		}
+		if (dupsa == NULL)
+			return (ENOMEM);
 		sa = dupsa;
 	}
-	
+
 	ifnet_lock_exclusive(ifp);
-	for (ifma = ifp->if_multiaddrs.lh_first; ifma;
-	     ifma = ifma->ifma_link.le_next)
-		if (equal(sa, ifma->ifma_addr))
+	if (ifma == NULL) {
+		for (ifma = LIST_FIRST(&ifp->if_multiaddrs); ifma != NULL;
+		     ifma = LIST_NEXT(ifma, ifma_link)) {
+			IFMA_LOCK(ifma);
+			if (!equal(sa, ifma->ifma_addr) ||
+			    (anon && !(ifma->ifma_flags & IFMAF_ANONYMOUS))) {
+				VERIFY(!(ifma->ifma_flags & IFMAF_ANONYMOUS) ||
+				    ifma->ifma_anoncnt != 0);
+				IFMA_UNLOCK(ifma);
+				continue;
+			}
+			/* found; keep it locked */
 			break;
-	if (ifma == 0) {
-		ifnet_lock_done(ifp);
-		if (dupsa)
-			FREE(dupsa, M_IFADDR);
-		return ENOENT;
+		}
+		if (ifma == NULL) {
+			if (dupsa != NULL)
+				FREE(dupsa, M_IFADDR);
+			ifnet_lock_done(ifp);
+			return (ENOENT);
+		}
+	} else {
+		IFMA_LOCK(ifma);
+	}
+	IFMA_LOCK_ASSERT_HELD(ifma);
+	IFMA_ADDREF_LOCKED(ifma);	/* for this routine */
+	lastref = if_detach_ifma(ifp, ifma, anon);
+	VERIFY(!lastref || (!(ifma->ifma_debug & IFD_ATTACHED) &&
+	    ifma->ifma_reqcnt == 0));
+	VERIFY(!anon || ifma->ifma_ll == NULL);
+	ll = ifma->ifma_ll;
+	lladdr = (ifma->ifma_addr->sa_family == AF_UNSPEC ||
+	    ifma->ifma_addr->sa_family == AF_LINK);
+	IFMA_UNLOCK(ifma);
+	if (lastref && ll != NULL) {
+		IFMA_LOCK(ll);
+		ll_lastref = if_detach_ifma(ifp, ll, 0);
+		IFMA_UNLOCK(ll);
 	}
-	
-	retval = if_delmultiaddr(ifma, 1);
 	ifnet_lock_done(ifp);
-	if (dupsa)
+
+	if (lastref)
+		rt_newmaddrmsg(RTM_DELMADDR, ifma);
+
+	if ((ll == NULL && lastref && lladdr) || ll_lastref) {
+		/*
+		 * Make sure the interface driver is notified in the
+		 * case of a link layer mcast group being left.  Do
+		 * this only for a AF_LINK/AF_UNSPEC address that has
+		 * been removed from the if_multiaddrs set.
+		 */
+		ifnet_ioctl(ifp, 0, SIOCDELMULTI, NULL);
+	}
+
+	if (lastref)
+		IFMA_REMREF(ifma);	/* for if_multiaddrs list */
+	if (ll_lastref)
+		IFMA_REMREF(ll);	/* for if_multiaddrs list */
+
+	IFMA_REMREF(ifma);		/* for this routine */
+	if (dupsa != NULL)
 		FREE(dupsa, M_IFADDR);
-	
-	return retval;
-}
 
+	return (0);
+}
 
 /*
  * We don't use if_setlladdr, our interfaces are responsible for
@@ -2126,21 +2737,6 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
 }
 #endif
 
-struct ifmultiaddr *
-ifmaof_ifpforaddr(const struct sockaddr *sa, struct ifnet *ifp)
-{
-	struct ifmultiaddr *ifma;
-	
-	ifnet_lock_shared(ifp);
-	for (ifma = ifp->if_multiaddrs.lh_first; ifma;
-	     ifma = ifma->ifma_link.le_next)
-		if (equal(ifma->ifma_addr, sa))
-			break;
-	ifnet_lock_done(ifp);
-
-	return ifma;
-}
-
 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Link layers");
 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Generic link-management");
 
@@ -2279,17 +2875,18 @@ void if_rtmtu_update(struct ifnet *ifp)
 }
 
 __private_extern__ void
-if_data_internal_to_if_data(
-	struct ifnet *ifp,
-	const struct if_data_internal	*if_data_int,
-	struct if_data					*if_data)
+if_data_internal_to_if_data(struct ifnet *ifp,
+    const struct if_data_internal *if_data_int, struct if_data *if_data)
 {
-	struct dlil_threading_info *thread;
-       	if ((thread = ifp->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-#define COPYFIELD(fld)	if_data->fld = if_data_int->fld
+#pragma unused(ifp)
+#define COPYFIELD(fld)		if_data->fld = if_data_int->fld
 #define COPYFIELD32(fld)	if_data->fld = (u_int32_t)(if_data_int->fld)
+/* compiler will cast down to 32-bit */
+#define	COPYFIELD32_ATOMIC(fld) do {						\
+	atomic_get_64(if_data->fld,						\
+	    (u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);			\
+} while (0)
+
 	COPYFIELD(ifi_type);
 	COPYFIELD(ifi_typelen);
 	COPYFIELD(ifi_physical);
@@ -2302,29 +2899,28 @@ if_data_internal_to_if_data(
 	COPYFIELD(ifi_metric);
 	if (if_data_int->ifi_baudrate & 0xFFFFFFFF00000000LL) {
 		if_data->ifi_baudrate = 0xFFFFFFFF;
-	}
-	else {
+	} else {
 		COPYFIELD32(ifi_baudrate);
 	}
+
+	COPYFIELD32_ATOMIC(ifi_ipackets);
+	COPYFIELD32_ATOMIC(ifi_ierrors);
+	COPYFIELD32_ATOMIC(ifi_opackets);
+	COPYFIELD32_ATOMIC(ifi_oerrors);
+	COPYFIELD32_ATOMIC(ifi_collisions);
+	COPYFIELD32_ATOMIC(ifi_ibytes);
+	COPYFIELD32_ATOMIC(ifi_obytes);
+	COPYFIELD32_ATOMIC(ifi_imcasts);
+	COPYFIELD32_ATOMIC(ifi_omcasts);
+	COPYFIELD32_ATOMIC(ifi_iqdrops);
+	COPYFIELD32_ATOMIC(ifi_noproto);
+
+	COPYFIELD(ifi_recvtiming);
+	COPYFIELD(ifi_xmittiming);
 	
-	lck_mtx_lock(thread->input_lck);
-	COPYFIELD32(ifi_ipackets);
-	COPYFIELD32(ifi_ierrors);
-	COPYFIELD32(ifi_opackets);
-	COPYFIELD32(ifi_oerrors);
-	COPYFIELD32(ifi_collisions);
-	COPYFIELD32(ifi_ibytes);
-	COPYFIELD32(ifi_obytes);
-	COPYFIELD32(ifi_imcasts);
-	COPYFIELD32(ifi_omcasts);
-	COPYFIELD32(ifi_iqdrops);
-	COPYFIELD32(ifi_noproto);
-	COPYFIELD32(ifi_recvtiming);
-	COPYFIELD32(ifi_xmittiming);
 	if_data->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec;
 	if_data->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec;
-	lck_mtx_unlock(thread->input_lck);
-	
+
 #if IF_LASTCHANGEUPTIME
 	if_data->ifi_lastchange.tv_sec += boottime_sec();
 #endif
@@ -2333,70 +2929,103 @@ if_data_internal_to_if_data(
 	COPYFIELD(ifi_hwassist);
 	if_data->ifi_reserved1 = 0;
 	if_data->ifi_reserved2 = 0;
+#undef COPYFIELD32_ATOMIC
 #undef COPYFIELD32
 #undef COPYFIELD
 }
 
 __private_extern__ void
-if_data_internal_to_if_data64(
-	struct ifnet *ifp,
-	const struct if_data_internal	*if_data_int,
-	struct if_data64				*if_data64)
+if_data_internal_to_if_data64(struct ifnet *ifp,
+    const struct if_data_internal *if_data_int,
+    struct if_data64 *if_data64)
 {
-	struct dlil_threading_info *thread;
-       	if ((thread = ifp->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-#define COPYFIELD(fld)	if_data64->fld = if_data_int->fld
-	COPYFIELD(ifi_type);
-	COPYFIELD(ifi_typelen);
-	COPYFIELD(ifi_physical);
-	COPYFIELD(ifi_addrlen);
-	COPYFIELD(ifi_hdrlen);
-	COPYFIELD(ifi_recvquota);
-	COPYFIELD(ifi_xmitquota);
+#pragma unused(ifp)
+#define COPYFIELD64(fld)	if_data64->fld = if_data_int->fld
+#define COPYFIELD64_ATOMIC(fld) do {						\
+	atomic_get_64(if_data64->fld,						\
+	(u_int64_t *)(void *)(uintptr_t)&if_data_int->fld);			\
+} while (0)
+
+	COPYFIELD64(ifi_type);
+	COPYFIELD64(ifi_typelen);
+	COPYFIELD64(ifi_physical);
+	COPYFIELD64(ifi_addrlen);
+	COPYFIELD64(ifi_hdrlen);
+	COPYFIELD64(ifi_recvquota);
+	COPYFIELD64(ifi_xmitquota);
 	if_data64->ifi_unused1 = 0;
-	COPYFIELD(ifi_mtu);
-	COPYFIELD(ifi_metric);
-	COPYFIELD(ifi_baudrate);
-
-	lck_mtx_lock(thread->input_lck);
-	COPYFIELD(ifi_ipackets);
-	COPYFIELD(ifi_ierrors);
-	COPYFIELD(ifi_opackets);
-	COPYFIELD(ifi_oerrors);
-	COPYFIELD(ifi_collisions);
-	COPYFIELD(ifi_ibytes);
-	COPYFIELD(ifi_obytes);
-	COPYFIELD(ifi_imcasts);
-	COPYFIELD(ifi_omcasts);
-	COPYFIELD(ifi_iqdrops);
-	COPYFIELD(ifi_noproto);
-	COPYFIELD(ifi_recvtiming);
-	COPYFIELD(ifi_xmittiming);
+	COPYFIELD64(ifi_mtu);
+	COPYFIELD64(ifi_metric);
+	COPYFIELD64(ifi_baudrate);
+
+	COPYFIELD64_ATOMIC(ifi_ipackets);
+	COPYFIELD64_ATOMIC(ifi_ierrors);
+	COPYFIELD64_ATOMIC(ifi_opackets);
+	COPYFIELD64_ATOMIC(ifi_oerrors);
+	COPYFIELD64_ATOMIC(ifi_collisions);
+	COPYFIELD64_ATOMIC(ifi_ibytes);
+	COPYFIELD64_ATOMIC(ifi_obytes);
+	COPYFIELD64_ATOMIC(ifi_imcasts);
+	COPYFIELD64_ATOMIC(ifi_omcasts);
+	COPYFIELD64_ATOMIC(ifi_iqdrops);
+	COPYFIELD64_ATOMIC(ifi_noproto);
+
+	/* Note these two fields are actually 32 bit, so doing COPYFIELD64_ATOMIC will
+	 * cause them to be misaligned
+	 */
+	COPYFIELD64(ifi_recvtiming);
+	COPYFIELD64(ifi_xmittiming);
+
 	if_data64->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec;
 	if_data64->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec;
-	lck_mtx_unlock(thread->input_lck);
-	
+
 #if IF_LASTCHANGEUPTIME
 	if_data64->ifi_lastchange.tv_sec += boottime_sec();
 #endif
 
-#undef COPYFIELD
+#undef COPYFIELD64
 }
 
-void
-ifafree(struct ifaddr *ifa)
+__private_extern__ void
+if_copy_traffic_class(struct ifnet *ifp,
+    struct if_traffic_class *if_tc)
 {
-	int oldval;
+#define COPY_IF_TC_FIELD64_ATOMIC(fld) do {				\
+	atomic_get_64(if_tc->fld,							\
+	(u_int64_t *)(void *)(uintptr_t)&ifp->if_tc.fld);	\
+} while (0)
+
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkpackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ibkbytes);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_obkpackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_obkbytes);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivipackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivibytes);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovipackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovibytes);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivopackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ivobytes);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovopackets);
+	COPY_IF_TC_FIELD64_ATOMIC(ifi_ovobytes);
+
+#undef COPY_IF_TC_FIELD64_ATOMIC
+}
 
-	oldval = OSAddAtomic(-1, &ifa->ifa_refcnt);
-	if (oldval >= 1 && ifa->ifa_trace != NULL)
-		(*ifa->ifa_trace)(ifa, FALSE);
-	if (oldval == 0) {
+
+struct ifaddr *
+ifa_remref(struct ifaddr *ifa, int locked)
+{
+	if (!locked)
+		IFA_LOCK_SPIN(ifa);
+	else
+		IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (ifa->ifa_refcnt == 0)
 		panic("%s: ifa %p negative refcnt\n", __func__, ifa);
-	} else if (oldval == 1) {
-		if  (ifa->ifa_debug & IFD_ATTACHED)
+	else if (ifa->ifa_trace != NULL)
+		(*ifa->ifa_trace)(ifa, FALSE);
+	if (--ifa->ifa_refcnt == 0) {
+		if (ifa->ifa_debug & IFD_ATTACHED)
 			panic("ifa %p attached to ifp is being freed\n", ifa);
 		/*
 		 * Some interface addresses are allocated either statically
@@ -2406,22 +3035,54 @@ ifafree(struct ifaddr *ifa)
 		 * leave it alone.
 		 */
 		if (ifa->ifa_debug & IFD_ALLOC) {
-			if (ifa->ifa_free == NULL)
+			if (ifa->ifa_free == NULL) {
+				IFA_UNLOCK(ifa);
 				FREE(ifa, M_IFADDR);
-			else
+			} else {
+				/* Become a regular mutex */
+				IFA_CONVERT_LOCK(ifa);
+				/* callee will unlock */
 				(*ifa->ifa_free)(ifa);
+			}
+		} else {
+			IFA_UNLOCK(ifa);
 		}
+		ifa = NULL;
 	}
+
+	if (!locked && ifa != NULL)
+		IFA_UNLOCK(ifa);
+
+	return (ifa);
 }
 
 void
-ifaref(struct ifaddr *ifa)
+ifa_addref(struct ifaddr *ifa, int locked)
 {
-	int oldval;
+	if (!locked)
+		IFA_LOCK_SPIN(ifa);
+	else
+		IFA_LOCK_ASSERT_HELD(ifa);
 
-	oldval = OSAddAtomic(1, &ifa->ifa_refcnt);
-	if (oldval < 0)
-		panic("%s: ifa %p negative refcnt\n", __func__, ifa);
-	else if (ifa->ifa_trace != NULL)
+	if (++ifa->ifa_refcnt == 0) {
+		panic("%s: ifa %p wraparound refcnt\n", __func__, ifa);
+		/* NOTREACHED */
+	} else if (ifa->ifa_trace != NULL) {
 		(*ifa->ifa_trace)(ifa, TRUE);
+	}
+	if (!locked)
+		IFA_UNLOCK(ifa);
+}
+
+void
+ifa_lock_init(struct ifaddr *ifa)
+{
+	lck_mtx_init(&ifa->ifa_lock, ifa_mtx_grp, ifa_mtx_attr);
+}
+
+void
+ifa_lock_destroy(struct ifaddr *ifa)
+{
+	IFA_LOCK_ASSERT_NOTHELD(ifa);
+	lck_mtx_destroy(&ifa->ifa_lock, ifa_mtx_grp);
 }
diff --git a/bsd/net/if.h b/bsd/net/if.h
index 1e847ab04..a7974460c 100644
--- a/bsd/net/if.h
+++ b/bsd/net/if.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -96,6 +96,7 @@
 #define KEV_DL_LINK_ADDRESS_CHANGED	16
 #define KEV_DL_WAKEFLAGS_CHANGED	17
 #define KEV_DL_IF_IDLE_ROUTE_REFCNT	18
+#define KEV_DL_IFCAP_CHANGED		19
 
 #include <net/if_var.h>
 #include <sys/types.h>
@@ -146,16 +147,17 @@ struct if_clonereq32 {
 #define IFEF_AUTOCONFIGURING	0x1
 #define IFEF_DVR_REENTRY_OK	0x20	/* When set, driver may be reentered from its own thread */
 #define IFEF_ACCEPT_RTADVD	0x40	/* set to accept IPv6 router advertisement on the interface */
-#define IFEF_DETACHING		0x80	/* Set when interface is detaching */
-#define IFEF_USEKPI			0x100	/* Set when interface is created through the KPIs */
+#define _IFEF_DETACHING		0x80	/* deprecated */
+#define IFEF_USEKPI		0x100	/* Set when interface is created through the KPIs */
 #define IFEF_VLAN		0x200	/* interface has one or more vlans */
 #define IFEF_BOND		0x400	/* interface is part of bond */
 #define	IFEF_ARPLL		0x800	/* ARP for IPv4LL addresses on this port */
 #define	IFEF_NOWINDOWSCALE	0x1000	/* Don't scale TCP window on iface */
 #define	IFEF_NOAUTOIPV6LL	0x2000	/* Interface IPv6 LinkLocal address not provided by kernel */
-#define	IFEF_SENDLIST	0x10000000 /* Interface supports sending a list of packets */
-#define IFEF_REUSE	0x20000000 /* DLIL ifnet recycler, ifnet is not new */
-#define IFEF_INUSE	0x40000000 /* DLIL ifnet recycler, ifnet in use */
+#define IFEF_SERVICE_TRIGGERED	0x20000	/* interface is on-demand dynamically created/destroyed */
+#define	IFEF_SENDLIST		0x10000000 /* Interface supports sending a list of packets */
+#define _IFEF_REUSE		0x20000000 /* deprecated */
+#define _IFEF_INUSE		0x40000000 /* deprecated */
 #define IFEF_UPDOWNCHANGE	0x80000000 /* Interface's up/down state is changing */
 
 /*
@@ -177,6 +179,40 @@ struct if_clonereq32 {
 
 #endif /* KERNEL_PRIVATE */
 
+/*
+ * Capabilities that interfaces can advertise.
+ *
+ * struct ifnet.if_capabilities
+ *   contains the optional features & capabilities a particular interface
+ *   supports (not only the driver but also the detected hw revision).
+ *   Capabilities are defined by IFCAP_* below.
+ * struct ifnet.if_capenable
+ *   contains the enabled (either by default or through ifconfig) optional
+ *   features & capabilities on this interface.
+ *   Capabilities are defined by IFCAP_* below.
+ * struct if_data.ifi_hwassist in IFNET_* form, defined in net/kpi_interface.h,
+ *   contains the enabled optional features & capabilites that can be used
+ *   individually per packet and are specified in the mbuf pkthdr.csum_flags
+ *   field.  IFCAP_* and IFNET_* do not match one to one and IFNET_* may be
+ *   more detailed or differenciated than IFCAP_*.
+ *   IFNET_* hwassist flags have corresponding CSUM_* in sys/mbuf.h
+ */         
+#define IFCAP_RXCSUM            0x00001 /* can offload checksum on RX */
+#define IFCAP_TXCSUM            0x00002 /* can offload checksum on TX */
+#define IFCAP_VLAN_MTU          0x00004 /* VLAN-compatible MTU */
+#define IFCAP_VLAN_HWTAGGING    0x00008 /* hardware VLAN tag support */
+#define IFCAP_JUMBO_MTU         0x00010 /* 9000 byte MTU supported */
+#define IFCAP_TSO4              0x00020 /* can do TCP Segmentation Offload */
+#define IFCAP_TSO6              0x00040 /* can do TCP6 Segmentation Offload */
+#define IFCAP_LRO               0x00080 /* can do Large Receive Offload */
+#define IFCAP_AV		0x00100 /* can do 802.1 AV Bridging */
+
+#define IFCAP_HWCSUM    (IFCAP_RXCSUM | IFCAP_TXCSUM)
+#define IFCAP_TSO       (IFCAP_TSO4 | IFCAP_TSO6)
+
+#define IFCAP_VALID (IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO | IFCAP_VLAN_MTU | \
+	IFCAP_VLAN_HWTAGGING | IFCAP_JUMBO_MTU | IFCAP_AV)
+
 #define	IFQ_MAXLEN	50
 #define	IFNET_SLOWHZ	1		/* granularity is 1 second */
 
@@ -341,6 +377,7 @@ struct	ifreq {
 		struct	ifkpi	ifru_kpi;
 		u_int32_t ifru_wake_flags;
 		u_int32_t ifru_route_refcnt;
+		int     ifru_cap[2];
 	} ifr_ifru;
 #define	ifr_addr	ifr_ifru.ifru_addr	/* address */
 #define	ifr_dstaddr	ifr_ifru.ifru_dstaddr	/* other end of p-to-p link */
@@ -364,6 +401,8 @@ struct	ifreq {
 #define ifr_kpi		ifr_ifru.ifru_kpi
 #define ifr_wake_flags	ifr_ifru.ifru_wake_flags /* wake capabilities of devive */
 #define ifr_route_refcnt ifr_ifru.ifru_route_refcnt /* route references on interface */
+#define ifr_reqcap      ifr_ifru.ifru_cap[0]    /* requested capabilities */
+#define ifr_curcap      ifr_ifru.ifru_cap[1]    /* current capabilities */
 };
 
 #define	_SIZEOF_ADDR_IFREQ(ifr) \
diff --git a/bsd/net/if_atm.h b/bsd/net/if_atm.h
deleted file mode 100644
index dc4689b56..000000000
--- a/bsd/net/if_atm.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*      $NetBSD: if_atm.h,v 1.7 1996/11/09 23:02:27 chuck Exp $       */
-/* $FreeBSD: src/sys/net/if_atm.h,v 1.4 1999/12/29 04:38:34 peter Exp $ */
-
-/*
- *
- * Copyright (c) 1996 Charles D. Cranor and Washington University.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Charles D. Cranor and
- *	Washington University.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * net/if_atm.h
- */
-
-#ifdef KERNEL_PRIVATE
-#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__)
-#define RTALLOC1(A,B)		rtalloc1((A),(B))
-#elif defined(__FreeBSD__) || defined(__APPLE__)
-#define RTALLOC1(A,B)		rtalloc1((A),(B),0UL)
-#endif
-#endif /* KERNEL_PRIVATE */
-
-/*
- * pseudo header for packet transmission
- */
-struct atm_pseudohdr {
-  u_int8_t atm_ph[4];	/* flags+VPI+VCI1(msb)+VCI2(lsb) */
-};
-
-#define ATM_PH_FLAGS(X)	((X)->atm_ph[0])
-#define ATM_PH_VPI(X)	((X)->atm_ph[1])
-#define ATM_PH_VCI(X)	((((X)->atm_ph[2]) << 8) | ((X)->atm_ph[3]))
-#define ATM_PH_SETVCI(X,V) { \
-	(X)->atm_ph[2] = ((V) >> 8) & 0xff; \
-	(X)->atm_ph[3] = ((V) & 0xff); \
-}
-
-#define ATM_PH_AAL5    0x01	/* use AAL5? (0 == aal0) */
-#define ATM_PH_LLCSNAP 0x02	/* use the LLC SNAP encoding (iff aal5) */
-
-#define ATM_PH_DRIVER7  0x40	/* reserve for driver's use */
-#define ATM_PH_DRIVER8  0x80	/* reserve for driver's use */
-
-#define ATMMTU		9180	/* ATM MTU size for IP */
-				/* XXX: could be 9188 with LLC/SNAP according
-					to comer */
-
-/* user's ioctl hook for raw atm mode */
-#define SIOCRAWATM	_IOWR('a', 122, int)	/* set driver's raw mode */
-
-/* atm_pseudoioctl: turns on and off RX VCIs  [for internal use only!] */
-struct atm_pseudoioctl {
-  struct atm_pseudohdr aph;
-  void *rxhand;
-};
-#define SIOCATMENA	_IOWR('a', 123, struct atm_pseudoioctl) /* enable */
-#define SIOCATMDIS	_IOWR('a', 124, struct atm_pseudoioctl) /* disable */
-
-
-/*
- * XXX forget all the garbage in if_llc.h and do it the easy way
- */
-
-#define ATMLLC_HDR "\252\252\3\0\0\0"
-struct atmllc {
-  u_int8_t llchdr[6];	/* aa.aa.03.00.00.00 */
-  u_int8_t type[2];	/* "ethernet" type */
-};
-
-/* ATM_LLC macros: note type code in host byte order */
-#define ATM_LLC_TYPE(X) (((X)->type[0] << 8) | ((X)->type[1]))
-#define ATM_LLC_SETTYPE(X,V) { \
-	(X)->type[1] = ((V) >> 8) & 0xff; \
-	(X)->type[0] = ((V) & 0xff); \
-}
-
-#ifdef KERNEL_PRIVATE
-void	atm_ifattach(struct ifnet *);
-void	atm_input(struct ifnet *, struct atm_pseudohdr *,
-		struct mbuf *, void *);
-int	atm_output(struct ifnet *, struct mbuf *, struct sockaddr *, 
-		struct rtentry *);
-#endif /* KERNEL_PRIVATE */
-
diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c
index fa07935a6..91790bd3a 100644
--- a/bsd/net/if_bond.c
+++ b/bsd/net/if_bond.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,8 +79,6 @@
 #include <net/if_media.h>
 #include <net/multicast_list.h>
 
-extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
-
 static struct ether_addr slow_proto_multicast = {
     IEEE8023AD_SLOW_PROTO_MULTICAST
 };
@@ -724,8 +722,9 @@ ifbond_release(ifbond_ref ifb)
 		printf("ifbond_release(%s) removing multicast\n",
 		       ifb->ifb_name);
 	    }
-	    (void)if_delmultiaddr(ifb->ifb_ifma_slow_proto, 0);
-	    ifma_release(ifb->ifb_ifma_slow_proto);
+	    (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
+	        ifb->ifb_ifma_slow_proto->ifma_addr);
+	    IFMA_REMREF(ifb->ifb_ifma_slow_proto);
 	}
 	if (ifb->ifb_distributing_array != NULL) {
 	    FREE(ifb->ifb_distributing_array, M_BOND);
@@ -885,10 +884,6 @@ if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
     ifr.ifr_addr.sa_family = AF_UNSPEC;
     ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
     ether_addr_copy(ifr.ifr_addr.sa_data, ea_p);
-#if 0
-    snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", ifnet_name(ifp),
-	     ifnet_unit(ifp));
-#endif
     return (ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr));
 }
 
@@ -909,9 +904,6 @@ bond_globals_create(lacp_system_priority sys_pri,
     TAILQ_INIT(&b->ifbond_list);
     b->system = *sys;
     b->system_priority = sys_pri;
-#if 0
-    b->verbose = 1;
-#endif
     return (b);
 }
 
@@ -936,7 +928,6 @@ bond_globals_init(void)
     for (i = 0; i < 4; i++) {
 	char 		ifname[IFNAMSIZ+1];
 	snprintf(ifname, sizeof(ifname), "en%d", i);
-	/* XXX ifunit() needs to return a reference on the ifp */
 	ifp = ifunit(ifname);
 	if (ifp != NULL) {
 	    break;
@@ -1108,8 +1099,7 @@ ifbond_add_slow_proto_multicast(ifbond_ref ifb)
     sdl.sdl_nlen = 0;
     sdl.sdl_alen = sizeof(slow_proto_multicast);
     bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast));
-    error = if_addmulti(ifb->ifb_ifp, (struct sockaddr *)&sdl, 
-			&ifma);
+    error = if_addmulti_anon(ifb->ifb_ifp, (struct sockaddr *)&sdl, &ifma);
     if (error == 0) {
 	ifb->ifb_ifma_slow_proto = ifma;
     }
@@ -1236,10 +1226,10 @@ bond_if_detach(struct ifnet * ifp)
     int		error;
 
     error = ifnet_detach(ifp);
-	if (error) {
-	    printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
-		   ifnet_name(ifp), ifnet_unit(ifp), error);
-	}
+    if (error) {
+	printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
+	       ifnet_name(ifp), ifnet_unit(ifp), error);
+    }
 	
     return;
 }
@@ -2571,24 +2561,10 @@ static int
 bond_set_promisc(__unused struct ifnet *ifp)
 {
     int 		error = 0;
-#if 0
-    ifbond_ref	ifb = ifnet_softc(ifp);
-
-
-    if ((ifnet_flags(ifp) & IFF_PROMISC) != 0) {
-	if ((ifb->ifb_flags & IFBF_PROMISC) == 0) {
-	    error = ifnet_set_promiscuous(ifb->ifb_p, 1);
-	    if (error == 0)
-		ifb->ifb_flags |= IFBF_PROMISC;
-	}
-    } else {
-	if ((ifb->ifb_flags & IFBF_PROMISC) != 0) {
-	    error = ifnet_set_promiscuous(ifb->ifb_p, 0);
-	    if (error == 0)
-		ifb->ifb_flags &= ~IFBF_PROMISC;
-	}
-    }
-#endif
+    /*
+     * The benefit of doing this currently does not warrant
+     * the added code complexity. Do nothing and return.
+     */
     return (error);
 }
 
@@ -2812,7 +2788,6 @@ bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
 	switch (ibr.ibr_op) {
 	case IF_BOND_OP_ADD_INTERFACE:
 	case IF_BOND_OP_REMOVE_INTERFACE:
-	    /* XXX ifunit() needs to return a reference on the ifp */
 	    port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
 	    if (port_ifp == NULL) {
 		error = ENXIO;
@@ -2947,23 +2922,16 @@ bond_if_free(struct ifnet * ifp)
 }
 
 static void
-bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
-		   const struct kev_msg * event)
+bond_handle_event(struct ifnet * port_ifp, int event_code)
 {
     struct ifnet *	bond_ifp = NULL;
-    int			event_code = 0;
     ifbond_ref		ifb;
     int			old_distributing_count;
     bondport_ref	p;
     struct media_info	media_info = { 0, 0};
 
-    if (event->vendor_code != KEV_VENDOR_APPLE 
-	|| event->kev_class != KEV_NETWORK_CLASS 
-	|| event->kev_subclass != KEV_DL_SUBCLASS) {
-	return;
-    }
-    switch (event->event_code) {
-    case KEV_DL_IF_DETACHING:
+    switch (event_code) {
+    case KEV_DL_IF_DETACHED:
 	break;
     case KEV_DL_LINK_OFF:
     case KEV_DL_LINK_ON:
@@ -2980,8 +2948,8 @@ bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
     }
     ifb = p->po_bond;
     old_distributing_count = ifb->ifb_distributing_count;
-    switch (event->event_code) {
-    case KEV_DL_IF_DETACHING:
+    switch (event_code) {
+    case KEV_DL_IF_DETACHED:
 	bond_remove_interface(ifb, p->po_ifp);
 	break;
     case KEV_DL_LINK_OFF:
@@ -3042,6 +3010,37 @@ bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
     return;
 }
 
+static void
+bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol,
+	   const struct kev_msg * event)
+{
+    int		event_code;
+
+    if (event->vendor_code != KEV_VENDOR_APPLE 
+	|| event->kev_class != KEV_NETWORK_CLASS 
+	|| event->kev_subclass != KEV_DL_SUBCLASS) {
+	return;
+    }
+    event_code = event->event_code;
+    switch (event_code) {
+    case KEV_DL_LINK_OFF:
+    case KEV_DL_LINK_ON:
+	/* we only care about link status changes */
+	bond_handle_event(port_ifp, event_code);
+	break;
+    default:
+	break;
+    }
+    return;
+}
+
+static errno_t
+bond_detached(ifnet_t port_ifp, __unused protocol_family_t protocol)
+{
+    bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
+    return (0);
+}
+
 static void
 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
 {
@@ -3051,6 +3050,7 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code)
 	char			if_name[IFNAMSIZ];
     } event;
 
+    bzero(&event, sizeof(event));
     event.header.total_size    = sizeof(event);
     event.header.vendor_code   = KEV_VENDOR_APPLE;
     event.header.kev_class     = KEV_NETWORK_CLASS;
@@ -3082,6 +3082,7 @@ bond_attach_protocol(struct ifnet *ifp)
     bzero(&reg, sizeof(reg));
     reg.input = bond_input;
     reg.event = bond_event;
+    reg.detached = bond_detached;
 	
     error = ifnet_attach_protocol(ifp, PF_BOND, &reg);
     if (error) {
diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c
new file mode 100644
index 000000000..fd546fa0e
--- /dev/null
+++ b/bsd/net/if_bridge.c
@@ -0,0 +1,5138 @@
+/*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
+/*
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed for the NetBSD Project by
+ *	Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
+ */
+
+/*
+ * Network interface bridge support.
+ *
+ * TODO:
+ *
+ *	- Currently only supports Ethernet-like interfaces (Ethernet,
+ *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
+ *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
+ *	  consider heterogenous bridges).
+ */
+
+#include <sys/cdefs.h>
+//__FBSDID("$FreeBSD$");
+
+//#include "opt_inet.h"
+//#include "opt_inet6.h"
+//#include "opt_carp.h"
+
+#define BRIDGE_DEBUG 1
+#ifndef BRIDGE_DEBUG
+#define BRIDGE_DEBUG 0
+#endif /* BRIDGE_DEBUG */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/protosw.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/socket.h> /* for net/if.h */
+#include <sys/sockio.h>
+//#include <sys/ctype.h>  /* string functions */
+#include <sys/kernel.h>
+#include <sys/random.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+//#include <vm/uma.h>
+//#include <sys/module.h>
+//#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+//#include <sys/mutex.h>
+#include <sys/mcache.h>
+
+#include <sys/kauth.h>
+
+#include <libkern/libkern.h>
+
+#include <kern/zalloc.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+#include <net/if.h>
+//#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+//#include <net/pfil.h>
+
+#include <netinet/in.h> /* for struct arpcom */
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+#ifdef DEV_CARP
+#include <netinet/ip_carp.h>
+#endif
+//#include <machine/in_cksum.h>
+#include <netinet/if_ether.h> /* for struct arpcom */
+#include <net/bridgestp.h>
+#include <net/if_bridgevar.h>
+#include <net/if_llc.h>
+#include <net/if_vlan_var.h>
+
+#include <net/if_ether.h>
+#include <net/dlil.h>
+#include <net/kpi_interfacefilter.h>
+
+#include <net/route.h>
+#ifdef PFIL_HOOKS
+#include <netinet/ip_fw2.h>
+#include <netinet/ip_dummynet.h>
+#endif /* PFIL_HOOKS */
+
+#if BRIDGE_DEBUG
+
+#define BR_LCKDBG_MAX				4
+
+#define BRIDGE_LOCK(_sc)			bridge_lock(_sc)
+#define BRIDGE_UNLOCK(_sc)			bridge_unlock(_sc)
+#define BRIDGE_LOCK_ASSERT(_sc)		lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
+#define	BRIDGE_LOCK2REF(_sc, _err)	_err = bridge_lock2ref(_sc)
+#define	BRIDGE_UNREF(_sc)			bridge_unref(_sc)
+#define	BRIDGE_XLOCK(_sc)			bridge_xlock(_sc)
+#define	BRIDGE_XDROP(_sc)			bridge_xdrop(_sc)
+
+#else /* BRIDGE_DEBUG */
+
+#define BRIDGE_LOCK(_sc)		lck_mtx_lock((_sc)->sc_mtx)
+#define BRIDGE_UNLOCK(_sc)		lck_mtx_unlock((_sc)->sc_mtx)
+#define BRIDGE_LOCK_ASSERT(_sc)		lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
+#define	BRIDGE_LOCK2REF(_sc, _err)	do {	\
+	lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);	\
+	if ((_sc)->sc_iflist_xcnt > 0)		\
+		(_err) = EBUSY;			\
+	else					\
+		(_sc)->sc_iflist_ref++;		\
+	lck_mtx_unlock((_sc)->sc_mtx);		\
+} while (0)
+#define	BRIDGE_UNREF(_sc)		do {				\
+	lck_mtx_lock((_sc)->sc_mtx);					\
+	(_sc)->sc_iflist_ref--;						\
+	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
+		lck_mtx_unlock((_sc)->sc_mtx);					\
+		wakeup(&(_sc)->sc_cv);				\
+	} else 									\
+		lck_mtx_unlock((_sc)->sc_mtx);					\
+} while (0)
+#define	BRIDGE_XLOCK(_sc)		do {		\
+	lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);		\
+	(_sc)->sc_iflist_xcnt++;			\
+	while ((_sc)->sc_iflist_ref > 0)		\
+		msleep(&(_sc)->sc_cv, (_sc)->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);	\
+} while (0)
+#define	BRIDGE_XDROP(_sc)		do {	\
+	lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED);	\
+	(_sc)->sc_iflist_xcnt--;		\
+} while (0)
+
+#endif /* BRIDGE_DEBUG */
+
+#if NBPFILTER > 0
+#define BRIDGE_BPF_MTAP_INPUT(sc, m) \
+	if (sc->sc_bpf_input) \
+		bridge_bpf_input(sc->sc_ifp, m)
+#else /* NBPFILTER */
+#define BRIDGE_BPF_MTAP_INPUT(ifp, m)
+#endif /* NBPFILTER */
+
+/*
+ * Size of the route hash table.  Must be a power of two.
+ */
+/* APPLE MODIFICATION - per Wasabi performance improvement, change the hash table size */
+#if 0
+#ifndef BRIDGE_RTHASH_SIZE
+#define	BRIDGE_RTHASH_SIZE		1024
+#endif
+#else
+#ifndef BRIDGE_RTHASH_SIZE
+#define	BRIDGE_RTHASH_SIZE		256
+#endif
+#endif
+
+/* APPLE MODIFICATION - support for HW checksums */
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#endif
+
+#define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
+
+/*
+ * Maximum number of addresses to cache.
+ */
+#ifndef BRIDGE_RTABLE_MAX
+#define	BRIDGE_RTABLE_MAX		100
+#endif
+
+
+/*
+ * Timeout (in seconds) for entries learned dynamically.
+ */
+#ifndef BRIDGE_RTABLE_TIMEOUT
+#define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
+#endif
+
+/*
+ * Number of seconds between walks of the route list.
+ */
+#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
+#define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
+#endif
+
+/*
+ * List of capabilities to possibly mask on the member interface.
+ */
+#define	BRIDGE_IFCAPS_MASK		(IFCAP_TOE|IFCAP_TSO|IFCAP_TXCSUM)
+/*
+ * List of capabilities to disable on the member interface.
+ */
+#define	BRIDGE_IFCAPS_STRIP		IFCAP_LRO
+
+/*
+ * Bridge interface list entry.
+ */
+struct bridge_iflist {
+	TAILQ_ENTRY(bridge_iflist) bif_next;
+	struct ifnet		*bif_ifp;	/* member if */
+	struct bstp_port	bif_stp;	/* STP state */
+	uint32_t			bif_flags;	/* member if flags */
+	int					bif_savedcaps;	/* saved capabilities */
+	uint32_t			bif_addrmax;	/* max # of addresses */
+	uint32_t			bif_addrcnt;	/* cur. # of addresses */
+	uint32_t			bif_addrexceeded;/* # of address violations */
+
+	interface_filter_t 	bif_iff_ref;
+	struct bridge_softc *bif_sc;
+	char				bif_promisc;			/* promiscuous mode set */
+	char				bif_proto_attached;		/* protocol attached */
+	char 				bif_filter_attached;	/* interface filter attached */
+};
+
+/*
+ * Bridge route node.
+ */
+struct bridge_rtnode {
+	LIST_ENTRY(bridge_rtnode) brt_hash;	/* hash table linkage */
+	LIST_ENTRY(bridge_rtnode) brt_list;	/* list linkage */
+	struct bridge_iflist	*brt_dst;	/* destination if */
+	unsigned long			brt_expire;	/* expiration time */
+	uint8_t					brt_flags;	/* address flags */
+	uint8_t					brt_addr[ETHER_ADDR_LEN];
+	uint16_t				brt_vlan;	/* vlan id */
+
+};
+#define	brt_ifp			brt_dst->bif_ifp
+
+/*
+ * Software state for each bridge.
+ */
+struct bridge_softc {
+	struct ifnet				*sc_ifp;	/* make this an interface */
+	LIST_ENTRY(bridge_softc)	sc_list;
+	lck_mtx_t					*sc_mtx;
+	void						*sc_cv;
+	uint32_t					sc_brtmax;	/* max # of addresses */
+	uint32_t					sc_brtcnt;	/* cur. # of addresses */
+	uint32_t					sc_brttimeout;	/* rt timeout in seconds */
+	uint32_t					sc_iflist_ref;	/* refcount for sc_iflist */
+	uint32_t					sc_iflist_xcnt;	/* refcount for sc_iflist */
+	TAILQ_HEAD(, bridge_iflist)	sc_iflist;	/* member interface list */
+	LIST_HEAD(, bridge_rtnode)	*sc_rthash;	/* our forwarding table */
+	LIST_HEAD(, bridge_rtnode)	sc_rtlist;	/* list version of above */
+	uint32_t					sc_rthash_key;	/* key for hash */
+	TAILQ_HEAD(, bridge_iflist)	sc_spanlist;	/* span ports list */
+	struct bstp_state			sc_stp;		/* STP state */
+	uint32_t					sc_brtexceeded;	/* # of cache drops */
+	uint32_t					sc_filter_flags; /* ipf and flags */
+	
+	char						sc_if_xname[IFNAMSIZ];
+    bpf_packet_func				sc_bpf_input;
+    bpf_packet_func				sc_bpf_output;
+    u_int32_t					sc_flags;
+
+#if BRIDGE_DEBUG
+	void    					*lock_lr[BR_LCKDBG_MAX];        /* locking calling history */
+	int     					next_lock_lr;
+	void    					*unlock_lr[BR_LCKDBG_MAX];      /* unlocking caller history */
+	int     					next_unlock_lr;
+#endif /* BRIDGE_DEBUG */
+};
+
+#define SCF_DETACHING 0x1
+
+static lck_mtx_t 	*bridge_list_mtx;
+//eventhandler_tag	bridge_detach_cookie = NULL;
+
+int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
+
+static zone_t bridge_rtnode_pool = NULL;
+
+static int	bridge_clone_create(struct if_clone *, uint32_t, void *);
+static int	bridge_clone_destroy(struct ifnet *);
+
+static errno_t	bridge_ioctl(struct ifnet *, u_long, void *);
+#if HAS_IF_CAP
+static void	bridge_mutecaps(struct bridge_softc *);
+static void	bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
+		    int);
+#endif
+__private_extern__ void	bridge_ifdetach(struct bridge_iflist *, struct ifnet *);
+static int	bridge_init(struct ifnet *);
+#if HAS_BRIDGE_DUMMYNET
+static void	bridge_dummynet(struct mbuf *, struct ifnet *);
+#endif
+static void	bridge_stop(struct ifnet *, int);
+static errno_t bridge_start(struct ifnet *, struct mbuf *);
+__private_extern__ errno_t bridge_input(struct ifnet *, struct mbuf *, void *);
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t bridge_iff_output(void *, ifnet_t , protocol_family_t , mbuf_t *);
+static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+		    struct rtentry *);
+#endif
+static void	bridge_enqueue(struct bridge_softc *, struct ifnet *,
+		    struct mbuf *);
+static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
+
+static void	bridge_forward(struct bridge_softc *, struct bridge_iflist *,
+		    struct mbuf *m);
+
+static void	bridge_timer(void *);
+
+static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
+		    struct mbuf *, int);
+static void	bridge_span(struct bridge_softc *, struct mbuf *);
+
+static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
+		    uint16_t, struct bridge_iflist *, int, uint8_t);
+static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
+		    uint16_t);
+static void	bridge_rttrim(struct bridge_softc *);
+static void	bridge_rtage(struct bridge_softc *);
+static void	bridge_rtflush(struct bridge_softc *, int);
+static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
+		    uint16_t);
+
+static int	bridge_rtable_init(struct bridge_softc *);
+static void	bridge_rtable_fini(struct bridge_softc *);
+
+static int	bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
+static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
+		    const uint8_t *, uint16_t);
+static int	bridge_rtnode_insert(struct bridge_softc *,
+		    struct bridge_rtnode *);
+static void	bridge_rtnode_destroy(struct bridge_softc *,
+		    struct bridge_rtnode *);
+static void	bridge_rtable_expire(struct ifnet *, int);
+static void	bridge_state_change(struct ifnet *, int);
+
+static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
+		    const char *name);
+static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
+		    struct ifnet *ifp);
+static void	bridge_delete_member(struct bridge_softc *,
+		    struct bridge_iflist *, int);
+static void	bridge_delete_span(struct bridge_softc *,
+		    struct bridge_iflist *);
+
+static int	bridge_ioctl_add(struct bridge_softc *, void *);
+static int	bridge_ioctl_del(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
+static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
+static int	bridge_ioctl_scache(struct bridge_softc *, void *);
+static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifs32(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifs64(struct bridge_softc *, void *);
+static int	bridge_ioctl_rts32(struct bridge_softc *, void *);
+static int	bridge_ioctl_rts64(struct bridge_softc *, void *);
+static int	bridge_ioctl_saddr32(struct bridge_softc *, void *);
+static int	bridge_ioctl_saddr64(struct bridge_softc *, void *);
+static int	bridge_ioctl_sto(struct bridge_softc *, void *);
+static int	bridge_ioctl_gto(struct bridge_softc *, void *);
+static int	bridge_ioctl_daddr32(struct bridge_softc *, void *);
+static int	bridge_ioctl_daddr64(struct bridge_softc *, void *);
+static int	bridge_ioctl_flush(struct bridge_softc *, void *);
+static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
+static int	bridge_ioctl_spri(struct bridge_softc *, void *);
+static int	bridge_ioctl_ght(struct bridge_softc *, void *);
+static int	bridge_ioctl_sht(struct bridge_softc *, void *);
+static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
+static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
+static int	bridge_ioctl_gma(struct bridge_softc *, void *);
+static int	bridge_ioctl_sma(struct bridge_softc *, void *);
+static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
+static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
+static int	bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
+static int	bridge_ioctl_addspan(struct bridge_softc *, void *);
+static int	bridge_ioctl_delspan(struct bridge_softc *, void *);
+static int	bridge_ioctl_gbparam32(struct bridge_softc *, void *);
+static int	bridge_ioctl_gbparam64(struct bridge_softc *, void *);
+static int	bridge_ioctl_grte(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
+static int	bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
+static int	bridge_ioctl_sproto(struct bridge_softc *, void *);
+static int	bridge_ioctl_stxhc(struct bridge_softc *, void *);
+static int  bridge_ioctl_purge(struct bridge_softc *sc, void *arg);
+static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
+static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
+#ifdef PFIL_HOOKS
+static int	bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *,
+		    int);
+static int	bridge_ip_checkbasic(struct mbuf **mp);
+#ifdef INET6
+static int	bridge_ip6_checkbasic(struct mbuf **mp);
+#endif /* INET6 */
+static int	bridge_fragment(struct ifnet *, struct mbuf *,
+		    struct ether_header *, int, struct llc *);
+#endif /* PFIL_HOOKS */
+
+static errno_t bridge_set_bpf_tap(ifnet_t ifn, bpf_tap_mode mode, bpf_packet_func bpf_callback);
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m);
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m);
+
+static void bridge_detach(ifnet_t ifp);
+
+#define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
+
+/* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
+#define	VLANTAGOF(_m)	0
+
+static struct bstp_cb_ops bridge_ops = {
+	.bcb_state = bridge_state_change,
+	.bcb_rtage = bridge_rtable_expire
+};
+
+SYSCTL_DECL(_net_link);
+SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge");
+
+#if defined(PFIL_HOOKS)
+static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */
+static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */
+static int pfil_member = 1; /* run pfil hooks on the member interface */
+static int pfil_ipfw = 0;   /* layer2 filter with ipfw */
+static int pfil_ipfw_arp = 0;   /* layer2 filter with ipfw */
+static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for
+                                   locally destined packets */
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW,
+    &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW,
+    &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW,
+    &pfil_bridge, 0, "Packet filter on the bridge interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW,
+    &pfil_member, 0, "Packet filter on the member interface");
+SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RW,
+    &pfil_local_phys, 0,
+    "Packet filter on the physical interface for locally destined packets");
+#endif /* PFIL_HOOKS */
+
+static int log_stp   = 0;   /* log STP state changes */
+SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
+    &log_stp, 0, "Log STP state changes");
+
+struct bridge_control {
+	int				(*bc_func)(struct bridge_softc *, void *);
+	unsigned int	bc_argsize;
+	unsigned int	bc_flags;
+};
+
+#define	BC_F_COPYIN		0x01	/* copy arguments in */
+#define	BC_F_COPYOUT		0x02	/* copy arguments out */
+#define	BC_F_SUSER		0x04	/* do super-user check */
+
+static const struct bridge_control bridge_control_table32[] = {
+	{ bridge_ioctl_add,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_del,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_gifs32,		sizeof(struct ifbifconf32),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	{ bridge_ioctl_rts32,		sizeof(struct ifbaconf32),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	
+	{ bridge_ioctl_saddr32,		sizeof(struct ifbareq32),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_daddr32,		sizeof(struct ifbareq32),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gfilt,		sizeof(struct ifbrparam),
+	  BC_F_COPYOUT },
+	{ bridge_ioctl_sfilt,		sizeof(struct ifbrparam),
+	  BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_purge,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gbparam32,		sizeof(struct ifbropreq32),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_gifsstp32,		sizeof(struct ifbpstpconf32),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	
+	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+};
+
+static const struct bridge_control bridge_control_table64[] = {
+	{ bridge_ioctl_add,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_del,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gifflags,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	{ bridge_ioctl_sifflags,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_scache,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_gcache,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_gifs64,		sizeof(struct ifbifconf64),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	{ bridge_ioctl_rts64,		sizeof(struct ifbaconf64),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	
+	{ bridge_ioctl_saddr64,		sizeof(struct ifbareq64),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sto,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_gto,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_daddr64,		sizeof(struct ifbareq64),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_flush,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gpri,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_spri,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_ght,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sht,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gfd,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sfd,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gma,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	{ bridge_ioctl_sma,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifprio,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifcost,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gfilt,		sizeof(struct ifbrparam),
+	  BC_F_COPYOUT },
+	{ bridge_ioctl_sfilt,		sizeof(struct ifbrparam),
+	  BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_purge,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+
+	{ bridge_ioctl_addspan,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	{ bridge_ioctl_delspan,		sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_gbparam64,		sizeof(struct ifbropreq64),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_grte,		sizeof(struct ifbrparam),
+		BC_F_COPYOUT },
+	
+	{ bridge_ioctl_gifsstp64,		sizeof(struct ifbpstpconf64),
+		BC_F_COPYIN|BC_F_COPYOUT },
+	
+	{ bridge_ioctl_sproto,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_stxhc,		sizeof(struct ifbrparam),
+		BC_F_COPYIN|BC_F_SUSER },
+	
+	{ bridge_ioctl_sifmaxaddr,	sizeof(struct ifbreq),
+		BC_F_COPYIN|BC_F_SUSER },
+};
+
+static const unsigned int bridge_control_table_size =
+sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
+
+static LIST_HEAD(, bridge_softc) bridge_list = LIST_HEAD_INITIALIZER(bridge_list);
+
+static lck_grp_t *bridge_lock_grp = NULL;
+static lck_attr_t *bridge_lock_attr = NULL;
+
+static if_clone_t bridge_cloner = NULL;
+
+__private_extern__ int _if_brige_debug = 0;
+
+SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW,
+           &_if_brige_debug, 0, "Bridge debug");
+
+#if BRIDGE_DEBUG
+
+static void printf_ether_header(struct ether_header *eh);
+static void printf_mbuf_data(mbuf_t m, size_t offset, size_t len);
+static void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix);
+static void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix);
+static void link_print(struct sockaddr_dl * dl_p);
+
+static void bridge_lock(struct bridge_softc *);
+static void bridge_unlock(struct bridge_softc *);
+static int bridge_lock2ref(struct bridge_softc *);
+static void bridge_unref(struct bridge_softc *);
+static void bridge_xlock(struct bridge_softc *);
+static void bridge_xdrop(struct bridge_softc *);
+
+static void bridge_lock(struct bridge_softc *sc)
+{
+	void *lr_saved = __builtin_return_address(0);
+	
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+	lck_mtx_lock(sc->sc_mtx);
+	
+	sc->lock_lr[sc->next_lock_lr] = lr_saved;
+	sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX;
+}
+
+static void bridge_unlock(struct bridge_softc *sc)
+{
+	void *lr_saved = __builtin_return_address(0);
+	
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
+	sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
+	
+	lck_mtx_unlock(sc->sc_mtx);
+}
+
+static int bridge_lock2ref(struct bridge_softc *sc)
+{
+	int error = 0;
+	void *lr_saved = __builtin_return_address(0);
+	
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+	if (sc->sc_iflist_xcnt > 0)
+		error = EBUSY;
+	else
+		sc->sc_iflist_ref++;
+
+	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
+	sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
+	lck_mtx_unlock(sc->sc_mtx);
+	
+	return error;
+}
+
+static void bridge_unref(struct bridge_softc *sc)
+{
+	void *lr_saved = __builtin_return_address(0);
+
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+	lck_mtx_lock(sc->sc_mtx);
+	sc->lock_lr[sc->next_lock_lr] = lr_saved;
+	sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX;
+
+	sc->sc_iflist_ref--;
+	
+	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
+	sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
+	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0))	{
+		lck_mtx_unlock(sc->sc_mtx);
+		wakeup(&sc->sc_cv);
+	} else
+		lck_mtx_unlock(sc->sc_mtx);
+}
+
+static void bridge_xlock(struct bridge_softc *sc)
+{
+	void *lr_saved = __builtin_return_address(0);
+
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+	sc->sc_iflist_xcnt++;
+	while (sc->sc_iflist_ref > 0) {
+		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
+		sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX;
+		
+		msleep(&sc->sc_cv, sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
+
+		sc->lock_lr[sc->next_lock_lr] = lr_saved;
+		sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX;
+	}
+}
+
+static void bridge_xdrop(struct bridge_softc *sc)
+{
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+	sc->sc_iflist_xcnt--;
+}
+
+void
+printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
+{
+	if (m)
+		printf("%spktlen: %u rcvif: %p header: %p nextpkt: %p%s",
+			   prefix ? prefix : "",
+			   (unsigned int)mbuf_pkthdr_len(m), mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m), mbuf_nextpkt(m),
+			   suffix ? suffix : "");
+	else
+		printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf(mbuf_t m, const char *prefix, const char *suffix)
+{
+	if (m) {
+		printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u datastart: %p next: %p%s",
+			   prefix ? prefix : "",
+			   m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), mbuf_data(m), 
+			   (unsigned int)mbuf_maxlen(m), mbuf_datastart(m), mbuf_next(m), 
+			   !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
+		if ((mbuf_flags(m) & MBUF_PKTHDR))
+			printf_mbuf_pkthdr(m, " ", suffix);
+	} else
+		printf("%s<NULL>%s\n", prefix, suffix);
+}
+
+void
+printf_mbuf_data(mbuf_t m, size_t offset, size_t len)
+{
+	mbuf_t			n;
+	size_t			i, j;
+	size_t			pktlen, mlen, maxlen;
+	unsigned char	*ptr;
+	
+	pktlen = mbuf_pkthdr_len(m);
+	
+	if (offset > pktlen)
+		return;
+	
+	maxlen = (pktlen - offset > len) ? len : pktlen;
+	n = m;
+	mlen = mbuf_len(n);
+	ptr = mbuf_data(n);
+	for (i = 0, j = 0; i < maxlen; i++, j++) {
+		if (j >= mlen) {
+			n = mbuf_next(n);
+			if (n == 0)
+				break;
+			ptr = mbuf_data(n);
+			mlen = mbuf_len(n);
+			j = 0;
+		}
+		if (i >= offset) {
+			printf("%02x%s", ptr[j], i % 2 ? " " : "");
+		}
+	}
+	return;
+}
+
+static void
+printf_ether_header(struct ether_header *eh)
+{
+	printf("%02x:%02x:%02x:%02x:%02x:%02x > %02x:%02x:%02x:%02x:%02x:%02x 0x%04x ", 
+		   eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2], 
+		   eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5], 
+		   eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+		   eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5], 
+		   eh->ether_type);
+}
+
+static void
+link_print(struct sockaddr_dl * dl_p)
+{
+	int i;
+	
+#if 1
+	printf("sdl len %d index %d family %d type 0x%x nlen %d alen %d"
+           " slen %d addr ", dl_p->sdl_len,
+           dl_p->sdl_index,  dl_p->sdl_family, dl_p->sdl_type,
+           dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen);
+#endif
+	for (i = 0; i < dl_p->sdl_alen; i++)
+        printf("%s%x", i ? ":" : "",
+               (CONST_LLADDR(dl_p))[i]);
+	printf("\n");
+	return;
+}
+
+#endif /* BRIDGE_DEBUG */
+
+/*
+ * bridgeattach:
+ *
+ *	Pseudo-device attach routine.
+ */
+__private_extern__ int
+bridgeattach(__unused int n)
+{
+	int error;
+	lck_grp_attr_t *lck_grp_attr = NULL;
+	struct ifnet_clone_params ifnet_clone_params;
+	
+	bridge_rtnode_pool = zinit(sizeof(struct bridge_rtnode), 1024 * sizeof(struct bridge_rtnode),
+                               0, "bridge_rtnode");
+	zone_change(bridge_rtnode_pool, Z_CALLERACCT, FALSE);
+
+	lck_grp_attr = lck_grp_attr_alloc_init();
+	
+	bridge_lock_grp = lck_grp_alloc_init("if_bridge", lck_grp_attr);
+	
+	bridge_lock_attr = lck_attr_alloc_init();
+	
+#if BRIDGE_DEBUG
+	lck_attr_setdebug(bridge_lock_attr);
+#endif
+
+	bridge_list_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
+	
+	// can free the attributes once we've allocated the group lock
+	lck_grp_attr_free(lck_grp_attr);
+	
+	LIST_INIT(&bridge_list);
+	
+	bstp_sys_init();
+	
+	ifnet_clone_params.ifc_name = "bridge";
+	ifnet_clone_params.ifc_create = bridge_clone_create;
+	ifnet_clone_params.ifc_destroy = bridge_clone_destroy;
+	
+	error = ifnet_clone_attach(&ifnet_clone_params, &bridge_cloner);
+	if (error != 0)
+		printf("bridgeattach: ifnet_clone_attach failed %d\n", error);
+
+	return error;
+}
+
+#if defined(PFIL_HOOKS)
+/*
+ * handler for net.link.bridge.pfil_ipfw
+ */
+static int
+sysctl_pfil_ipfw SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1,arg2)
+	int enable = pfil_ipfw;
+	int error;
+
+	error = sysctl_handle_int(oidp, &enable, 0, req);
+	enable = (enable) ? 1 : 0;
+
+	if (enable != pfil_ipfw) {
+		pfil_ipfw = enable;
+
+		/*
+		 * Disable pfil so that ipfw doesnt run twice, if the user
+		 * really wants both then they can re-enable pfil_bridge and/or
+		 * pfil_member. Also allow non-ip packets as ipfw can filter by
+		 * layer2 type.
+		 */
+		if (pfil_ipfw) {
+			pfil_onlyip = 0;
+			pfil_bridge = 0;
+			pfil_member = 0;
+		}
+	}
+
+	return (error);
+}
+SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW,
+	    &pfil_ipfw, 0, &sysctl_pfil_ipfw, "I", "Layer2 filter with IPFW");
+#endif /* PFIL_HOOKS */
+
+/*
+ * bridge_clone_create:
+ *
+ *	Create a new bridge instance.
+ */
+static int
+bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
+{
+	struct ifnet *ifp = NULL;
+	struct bridge_softc *sc;
+	u_char eaddr[6];
+	struct ifnet_init_params init_params;
+	errno_t error = 0;
+	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) + IFNAMSIZ + ETHER_ADDR_LEN];
+	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
+
+	sc = _MALLOC(sizeof(*sc), M_DEVBUF, M_WAITOK);
+	memset(sc, 0, sizeof(*sc));
+
+	sc->sc_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr);
+	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
+	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
+	sc->sc_filter_flags = IFBF_FILT_DEFAULT;
+#ifndef BRIDGE_IPF
+	/*
+	 * For backwards compatibility with previous behaviour...
+	 * Switch off filtering on the bridge itself if BRIDGE_IPF is
+	 * not defined.
+	 */
+	sc->sc_filter_flags &= ~IFBF_FILT_USEIPF;
+#endif
+
+	/* Initialize our routing table. */
+	error = bridge_rtable_init(sc);
+	if (error != 0) {
+		printf("bridge_clone_create: bridge_rtable_init failed %d\n", error);
+		goto done;
+	}
+	
+	TAILQ_INIT(&sc->sc_iflist);
+	TAILQ_INIT(&sc->sc_spanlist);
+
+	/* use the interface name as the unique id for ifp recycle */
+	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
+             ifc->ifc_name, unit);
+	memset(&init_params, 0, sizeof(struct ifnet_init_params));
+	init_params.uniqueid = sc->sc_if_xname;
+	init_params.uniqueid_len = strlen(sc->sc_if_xname);
+	init_params.name = ifc->ifc_name;
+	init_params.unit = unit;
+	init_params.family = IFNET_FAMILY_ETHERNET;
+	init_params.type = IFT_BRIDGE;
+	init_params.output = bridge_start;
+	init_params.demux = ether_demux;
+	init_params.add_proto = ether_add_proto;
+	init_params.del_proto = ether_del_proto;
+	init_params.check_multi = ether_check_multi;
+	init_params.framer = ether_frameout;
+	init_params.softc = sc;
+	init_params.ioctl = bridge_ioctl;
+	init_params.set_bpf_tap = bridge_set_bpf_tap;
+	init_params.detach = bridge_detach;
+	init_params.broadcast_addr = etherbroadcastaddr;
+	init_params.broadcast_len = ETHER_ADDR_LEN;
+	error = ifnet_allocate(&init_params, &ifp);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_allocate failed %d\n", error);
+		goto done;
+	}
+	sc->sc_ifp = ifp;
+	
+	error = ifnet_set_mtu(ifp, ETHERMTU);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_mtu failed %d\n", error);
+		goto done;
+	}
+	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_addrlen failed %d\n", error);
+		goto done;
+	}
+	error = ifnet_set_baudrate(ifp, 10000000) ;	// XXX: this is what IONetworking does
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_baudrate failed %d\n", error);
+		goto done;
+	}
+	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_hdrlen failed %d\n", error);
+		goto done;
+	}
+	error = ifnet_set_flags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST, 
+							0xffff);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_flags failed %d\n", error);
+		goto done;
+	}
+
+#if 0
+	/*
+	 * Generate a random ethernet address with a locally administered
+	 * address.
+	 *
+	 * Since we are using random ethernet addresses for the bridge, it is
+	 * possible that we might have address collisions, so make sure that
+	 * this hardware address isn't already in use on another bridge.
+	 */
+	{
+		int retry;
+		
+		for (retry = 1; retry != 0;) {
+			struct ifnet *bifp;
+			struct bridge_softc *sc2;
+		
+			read_random(eaddr, ETHER_ADDR_LEN);
+			eaddr[0] &= ~1;		/* clear multicast bit */
+			eaddr[0] |= 2;		/* set the LAA bit */
+			retry = 0;
+			lck_mtx_lock(bridge_list_mtx);
+			LIST_FOREACH(sc2, &bridge_list, sc_list) {
+				bifp = sc2->sc_ifp;
+				if (memcmp(eaddr, ifnet_lladdr(bifp), ETHER_ADDR_LEN) == 0)
+					retry = 1;
+			}
+			lck_mtx_unlock(bridge_list_mtx);
+		}
+	}
+#else
+	/*
+	 * Generate a random ethernet address and use the private AC:DE:48
+	 * OUI code.
+	 */
+	{
+		uint32_t r;
+		
+		read_random(&r, sizeof(r));
+		eaddr[0] = 0xAC;
+		eaddr[1] = 0xDE;
+		eaddr[2] = 0x48;
+		eaddr[3] = (r >> 0)  & 0xffu;
+		eaddr[4] = (r >> 8)  & 0xffu;
+		eaddr[5] = (r >> 16) & 0xffu;
+	}
+#endif
+
+	memset(sdl, 0, sizeof(sdl_buffer));
+	sdl->sdl_family = AF_LINK;
+	sdl->sdl_nlen = strlen(sc->sc_if_xname);
+	sdl->sdl_alen = ETHER_ADDR_LEN;
+	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
+	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
+	memcpy(LLADDR(sdl), eaddr, ETHER_ADDR_LEN);
+	
+#if BRIDGE_DEBUG
+	link_print(sdl);
+#endif
+
+	error = ifnet_attach(ifp, NULL);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_attach failed %d\n", error);
+		goto done;
+	}
+	
+	error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN, IFT_ETHER);
+	if (error != 0) {
+		printf("bridge_clone_create: ifnet_set_lladdr_and_type failed %d\n", error);
+		goto done;
+	}
+	
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+	/* 
+	 * APPLE MODIFICATION - our bridge can support HW checksums 
+	 * (useful if underlying interfaces support them) on TX,
+	 * RX is not that interesting, since the stack just looks to
+	 * see if the packet has been checksummed already (I think)
+	 * but we might as well indicate we support it
+	 */
+	ifp->if_capabilities =
+		IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |
+		IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx ;
+#endif
+	
+	bstp_attach(&sc->sc_stp, &bridge_ops);
+
+	lck_mtx_lock(bridge_list_mtx);
+	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
+	lck_mtx_unlock(bridge_list_mtx);
+
+	/* attach as ethernet */
+	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header), NULL, NULL);
+
+done:
+	if (error != 0) {
+        printf("bridge_clone_create failed error %d\n", error);
+		/* Cleanup TBD */
+	}
+	
+	return error;
+}
+
+/*
+ * bridge_clone_destroy:
+ *
+ *	Destroy a bridge instance.
+ */
+static int
+bridge_clone_destroy(struct ifnet *ifp)
+{
+	struct bridge_softc *sc = ifp->if_softc;
+	struct bridge_iflist *bif;
+	errno_t error;
+
+	BRIDGE_LOCK(sc);
+	if ((sc->sc_flags & SCF_DETACHING)) {
+		BRIDGE_UNLOCK(sc);
+		return 0;
+	}
+	sc->sc_flags |= SCF_DETACHING;
+
+	bridge_stop(ifp, 1);
+
+	error = ifnet_set_flags(ifp, 0, IFF_UP);
+	if (error != 0) {
+		printf("bridge_clone_destroy: ifnet_set_flags failed %d\n", error);
+	}
+
+	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL)
+		bridge_delete_member(sc, bif, 0);
+
+	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
+		bridge_delete_span(sc, bif);
+	}
+
+	BRIDGE_UNLOCK(sc);
+
+	error = ifnet_detach(ifp);
+	if (error != 0) {
+		panic("bridge_clone_destroy: ifnet_detach(%p) failed %d\n", ifp, error);
+		if ((sc = (struct bridge_softc *)ifnet_softc(ifp)) != NULL) {
+			BRIDGE_LOCK(sc);
+			sc->sc_flags &= ~SCF_DETACHING;
+			BRIDGE_UNLOCK(sc);
+		}
+		return 0;
+	}
+
+	return 0;
+}
+
+#define DRVSPEC do { \
+		if (ifd->ifd_cmd >= bridge_control_table_size) { \
+			error = EINVAL; \
+			break; \
+		} \
+		bc = &bridge_control_table[ifd->ifd_cmd]; \
+ \
+		if (cmd == SIOCGDRVSPEC && \
+		    (bc->bc_flags & BC_F_COPYOUT) == 0) { \
+			error = EINVAL; \
+			break; \
+		} \
+		else if (cmd == SIOCSDRVSPEC && \
+		    (bc->bc_flags & BC_F_COPYOUT) != 0) { \
+			error = EINVAL; \
+			break; \
+		} \
+ \
+		if (bc->bc_flags & BC_F_SUSER) { \
+			error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); \
+			if (error) \
+				break; \
+		} \
+ \
+		if (ifd->ifd_len != bc->bc_argsize || \
+		    ifd->ifd_len > sizeof(args)) { \
+			error = EINVAL; \
+			break; \
+		} \
+ \
+		bzero(&args, sizeof(args)); \
+		if (bc->bc_flags & BC_F_COPYIN) { \
+			error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
+			if (error) \
+				break; \
+		} \
+ \
+		BRIDGE_LOCK(sc); \
+		error = (*bc->bc_func)(sc, &args); \
+		BRIDGE_UNLOCK(sc); \
+		if (error) \
+			break; \
+ \
+		if (bc->bc_flags & BC_F_COPYOUT) \
+			error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
+} while (0)
+
+
+/*
+ * bridge_ioctl:
+ *
+ *	Handle a control request from the operator.
+ */
+static errno_t
+bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
+{
+	struct bridge_softc *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *) data;
+	int error = 0;
+
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+#if BRIDGE_DEBUG
+	if (_if_brige_debug)
+		printf("bridge_ioctl: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n", 
+			ifp, 
+			cmd, 
+			(cmd & IOC_IN) ? 'I' : ' ',
+			(cmd & IOC_OUT) ? 'O' : ' ',
+			IOCPARM_LEN(cmd),
+			(char)IOCGROUP(cmd),
+			cmd & 0xff);
+#endif
+	
+	switch (cmd) {
+	
+	case SIOCSIFADDR:
+	case SIOCAIFADDR:
+		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
+		break;
+
+	case SIOCGIFMEDIA32:
+	case SIOCGIFMEDIA64:
+		error = EINVAL;
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		break;
+
+	case SIOCSDRVSPEC32:
+	case SIOCGDRVSPEC32: {
+		union {
+			struct ifbreq ifbreq;
+			struct ifbifconf32 ifbifconf;
+			struct ifbareq32 ifbareq;
+			struct ifbaconf32 ifbaconf;
+			struct ifbrparam ifbrparam;
+			struct ifbropreq32 ifbropreq;
+		} args;
+		struct ifdrv32 *ifd = (struct ifdrv32 *) data;
+		const struct bridge_control *bridge_control_table = bridge_control_table32, *bc;
+		
+		DRVSPEC;
+
+		break;
+	}
+	case SIOCSDRVSPEC64:
+	case SIOCGDRVSPEC64: {
+		union {
+			struct ifbreq ifbreq;
+			struct ifbifconf64 ifbifconf;
+			struct ifbareq64 ifbareq;
+			struct ifbaconf64 ifbaconf;
+			struct ifbrparam ifbrparam;
+			struct ifbropreq64 ifbropreq;
+		} args;
+		struct ifdrv64 *ifd = (struct ifdrv64 *) data;
+		const struct bridge_control *bridge_control_table = bridge_control_table64, *bc;
+		
+		DRVSPEC;
+		
+		break;
+	}
+
+	case SIOCSIFFLAGS:
+		if (!(ifp->if_flags & IFF_UP) &&
+		    (ifp->if_flags & IFF_RUNNING)) {
+			/*
+			 * If interface is marked down and it is running,
+			 * then stop and disable it.
+			 */
+			BRIDGE_LOCK(sc);
+			bridge_stop(ifp, 1);
+			BRIDGE_UNLOCK(sc);
+		} else if ((ifp->if_flags & IFF_UP) &&
+		    !(ifp->if_flags & IFF_RUNNING)) {
+			/*
+			 * If interface is marked up and it is stopped, then
+			 * start it.
+			 */
+			BRIDGE_LOCK(sc);
+			error = bridge_init(ifp);
+			BRIDGE_UNLOCK(sc);
+		}
+		break;
+
+	case SIOCSIFLLADDR:
+		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
+		if (error != 0)
+			printf("bridge_ioctl: ifnet_set_lladdr failed %d\n", error);
+		break;
+
+	case SIOCSIFMTU:
+		/* Do not allow the MTU to be changed on the bridge */
+		error = EINVAL;
+		break;
+
+	default:
+		/*
+		 * drop the lock as ether_ioctl() will call bridge_start() and
+		 * cause the lock to be recursed.
+		 */
+		error = ether_ioctl(ifp, cmd, data);
+#if BRIDGE_DEBUG
+		if (error != 0)
+			printf("bridge_ioctl: ether_ioctl ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu) failed error: %d\n", 
+				   ifp, 
+				   cmd, 
+				   (cmd & IOC_IN) ? 'I' : ' ',
+				   (cmd & IOC_OUT) ? 'O' : ' ',
+				   IOCPARM_LEN(cmd),
+				   (char) IOCGROUP(cmd),
+				   cmd & 0xff,
+				   error);
+#endif /* BRIDGE_DEBUG */
+		break;
+	}
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+	return (error);
+}
+
+#if HAS_IF_CAP
+/*
+ * bridge_mutecaps:
+ *
+ *	Clear or restore unwanted capabilities on the member interface
+ */
+static void
+bridge_mutecaps(struct bridge_softc *sc)
+{
+	struct bridge_iflist *bif;
+	int enabled, mask;
+
+	/* Initial bitmask of capabilities to test */
+	mask = BRIDGE_IFCAPS_MASK;
+
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		/* Every member must support it or its disabled */
+		mask &= bif->bif_savedcaps;
+	}
+
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		enabled = bif->bif_ifp->if_capenable;
+		enabled &= ~BRIDGE_IFCAPS_STRIP;
+		/* strip off mask bits and enable them again if allowed */
+		enabled &= ~BRIDGE_IFCAPS_MASK;
+		enabled |= mask;
+
+		bridge_set_ifcap(sc, bif, enabled);
+	}
+
+}
+
+static void
+bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
+{
+	struct ifnet *ifp = bif->bif_ifp;
+	struct ifreq ifr;
+	int error;
+
+	bzero(&ifr, sizeof(ifr));
+	ifr.ifr_reqcap = set;
+
+	if (ifp->if_capenable != set) {
+		IFF_LOCKGIANT(ifp);
+		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
+		IFF_UNLOCKGIANT(ifp);
+		if (error)
+			printf("error setting interface capabilities on %s\n",
+				ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+			    ifp->if_xname);
+	}
+}
+#endif /* HAS_IF_CAP */
+
+/*
+ * bridge_lookup_member:
+ *
+ *	Lookup a bridge member interface.
+ */
+static struct bridge_iflist *
+bridge_lookup_member(struct bridge_softc *sc, const char *name)
+{
+	struct bridge_iflist *bif;
+	struct ifnet *ifp;
+	char if_xname[IFNAMSIZ];
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		ifp = bif->bif_ifp;
+		snprintf(if_xname, sizeof(if_xname), "%s%d", 
+                 ifnet_name(ifp), ifnet_unit(ifp));
+		if (strncmp(if_xname, name, sizeof(if_xname)) == 0)
+			return (bif);
+	}
+
+	return (NULL);
+}
+
+/*
+ * bridge_lookup_member_if:
+ *
+ *	Lookup a bridge member interface by ifnet*.
+ */
+static struct bridge_iflist *
+bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
+{
+	struct bridge_iflist *bif;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
+		if (bif->bif_ifp == member_ifp)
+			return (bif);
+	}
+
+	return (NULL);
+}
+
+static errno_t 
+bridge_iff_input(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 mbuf_t *data, char **frame_ptr)
+{
+	errno_t error = 0;
+	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+	struct bridge_softc *sc = bif->bif_sc;
+	int included = 0;
+	size_t frmlen = 0;
+	mbuf_t m = *data;
+
+	if ((m->m_flags & M_PROTO1))
+		goto out;
+	
+	if (*frame_ptr >= (char *)mbuf_datastart(m) && *frame_ptr <= (char *)mbuf_data(m)) {
+		included = 1;
+		frmlen = (char *)mbuf_data(m) - *frame_ptr;
+	}
+#if BRIDGE_DEBUG
+	if (_if_brige_debug) {
+		printf("bridge_iff_input %s%d from %s%d m %p data %p frame %p %s frmlen %lu\n", 
+			   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+			   ifnet_name(ifp), ifnet_unit(ifp), 
+			   m, mbuf_data(m), *frame_ptr, included ? "inside" : "outside", frmlen);
+		
+		if (_if_brige_debug > 1) {
+			printf_mbuf(m, "bridge_iff_input[", "\n");
+			printf_ether_header((struct ether_header *)*frame_ptr);
+			printf_mbuf_data(m, 0, 20);
+			printf("\n");
+		}
+	}
+#endif /* BRIDGE_DEBUG */
+
+	/* Move data pointer to start of frame to the link layer header */
+	if (included) {
+		(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen, mbuf_len(m) + frmlen);
+		(void) mbuf_pkthdr_adjustlen(m, frmlen);
+	} else {
+		printf("bridge_iff_input: frame_ptr outside mbuf\n");
+		goto out;
+	}
+	
+	error = bridge_input(ifp, m, *frame_ptr);
+	
+	/* Adjust packet back to original */
+	if (error == 0) {
+		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen, mbuf_len(m) - frmlen);
+		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
+	}
+#if BRIDGE_DEBUG
+	if (_if_brige_debug > 1) {
+		printf("\n");
+		printf_mbuf(m, "bridge_iff_input]", "\n");
+	}
+#endif /* BRIDGE_DEBUG */
+
+out:
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+	
+	return error;
+}
+
+
+#if BRIDGE_MEMBER_OUT_FILTER
+static errno_t
+bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, mbuf_t *data)
+{
+	errno_t error = 0;
+	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+	struct bridge_softc *sc = bif->bif_sc;
+	mbuf_t m = *data;
+	
+	if ((m->m_flags & M_PROTO1))
+		goto out;
+	
+#if BRIDGE_DEBUG
+	if (_if_brige_debug) {
+		printf("bridge_iff_output %s%d from %s%d m %p data %p\n", 
+				ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+				ifnet_name(ifp), ifnet_unit(ifp), 
+				m, mbuf_data(m));
+	}
+#endif /* BRIDGE_DEBUG */
+
+	error = bridge_output(sc, ifp, m);
+	if (error != 0) {
+		printf("bridge_iff_output: bridge_output failed error %d\n", error);
+	}
+
+out:	
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+	return error;
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+
+static void 
+bridge_iff_event(void* cookie, ifnet_t ifp, __unused protocol_family_t protocol,
+                 const struct kev_msg *event_msg)
+{
+	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+	
+	if (event_msg->vendor_code == KEV_VENDOR_APPLE && 
+		event_msg->kev_class == KEV_NETWORK_CLASS &&
+		event_msg->kev_subclass == KEV_DL_SUBCLASS) {
+		switch (event_msg->event_code) {
+			case KEV_DL_IF_DETACHING:
+			case KEV_DL_IF_DETACHED:
+				bridge_ifdetach(bif, ifp);
+				break;
+				
+			case KEV_DL_LINK_OFF:
+			case KEV_DL_LINK_ON: {
+				bstp_linkstate(ifp, event_msg->event_code);
+				break;
+			}
+			
+			case KEV_DL_SIFFLAGS: {
+				if (bif->bif_promisc == 0 && (ifp->if_flags & IFF_UP)) {
+					errno_t error = ifnet_set_promiscuous(ifp, 1);
+					if (error != 0) {
+						printf("bridge_iff_event: ifnet_set_promiscuous(%s%d) failed %d\n",
+							ifnet_name(ifp), ifnet_unit(ifp), error);
+					} else {
+						bif->bif_promisc = 1;
+					}
+				}
+				break;
+			}
+			
+			default:
+				break;
+		}
+	}		
+}
+
+/*
+ * bridge_iff_detached:
+ *
+ *	Detach an interface from a bridge.  Called when a member
+ *	interface is detaching.
+ */
+static void 
+bridge_iff_detached(void* cookie, __unused ifnet_t ifp)
+{
+	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
+
+#if BRIDGE_DEBUG	
+	printf("bridge_iff_detached: %s%d\n",
+		ifnet_name(ifp), ifnet_unit(ifp));
+#endif
+
+	bridge_ifdetach(bif, ifp);
+
+	_FREE(bif, M_DEVBUF);
+	
+	return;
+}
+
+static errno_t
+bridge_proto_input(ifnet_t ifp, __unused protocol_family_t protocol, 
+					__unused mbuf_t packet, __unused char *header)
+{
+	printf("bridge_proto_input: unexpected packet from %s%d\n",
+		ifnet_name(ifp), ifnet_unit(ifp));
+	return 0;
+}
+
+static int
+bridge_attach_protocol(struct ifnet *ifp)
+{
+	int								error;
+	struct ifnet_attach_proto_param	reg;
+
+	printf("bridge_attach_protocol: %s%d\n",
+		ifnet_name(ifp), ifnet_unit(ifp));
+	
+	bzero(&reg, sizeof(reg));
+	reg.input = bridge_proto_input;
+	
+	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
+	if (error)
+		printf("bridge_attach_protocol: ifnet_attach_protocol(%s%d) failed, %d\n",
+			ifnet_name(ifp), ifnet_unit(ifp), error);
+
+	return (error);
+}
+
+static int
+bridge_detach_protocol(struct ifnet *ifp)
+{
+	int         error;
+
+	printf("bridge_detach_protocol: %s%d\n",
+		ifnet_name(ifp), ifnet_unit(ifp));
+
+	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
+	if (error)
+		printf("bridge_attach_protocol: ifnet_detach_protocol(%s%d) failed, %d\n",
+			ifnet_name(ifp), ifnet_unit(ifp), error);
+
+	return (error);
+}
+
+/*
+ * bridge_delete_member:
+ *
+ *	Delete the specified member interface.
+ */
+static void
+bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
+    int gone)
+{
+	struct ifnet *ifs = bif->bif_ifp;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	if (!gone) {
+		switch (ifs->if_type) {
+		case IFT_ETHER:
+		case IFT_L2VLAN:
+			/*
+			 * Take the interface out of promiscuous mode.
+			 */
+			if (bif->bif_promisc)
+				(void) ifnet_set_promiscuous(ifs, 0);
+			break;
+
+		case IFT_GIF:
+			break;
+
+		default:
+#ifdef DIAGNOSTIC
+			panic("bridge_delete_member: impossible");
+#endif
+			break;
+		}
+
+#if HAS_IF_CAP
+		/* reneable any interface capabilities */
+		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
+#endif
+	}
+
+	if (bif->bif_proto_attached) {
+		/* Respect lock ordering with DLIL lock */
+		BRIDGE_UNLOCK(sc);
+		(void) bridge_detach_protocol(ifs);
+		BRIDGE_LOCK(sc);
+	}
+	if (bif->bif_flags & IFBIF_STP)
+		bstp_disable(&bif->bif_stp);
+
+	ifs->if_bridge = NULL;
+	BRIDGE_XLOCK(sc);
+	TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
+	BRIDGE_XDROP(sc);
+
+	ifnet_release(ifs);
+
+#if HAS_IF_CAP
+	bridge_mutecaps(sc);	/* recalcuate now this interface is removed */
+#endif /* HAS_IF_CAP */
+	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
+	KASSERT(bif->bif_addrcnt == 0,
+	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
+
+	BRIDGE_UNLOCK(sc);
+	bstp_destroy(&bif->bif_stp);	/* prepare to free */
+	BRIDGE_LOCK(sc);
+	
+	if (bif->bif_filter_attached) {
+		/* Respect lock ordering with DLIL lock */
+		BRIDGE_UNLOCK(sc);
+		iflt_detach(bif->bif_iff_ref);
+		BRIDGE_LOCK(sc);
+	} else {
+		_FREE(bif, M_DEVBUF);
+	}
+}
+
+/*
+ * bridge_delete_span:
+ *
+ *	Delete the specified span interface.
+ */
+static void
+bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
+{
+	BRIDGE_LOCK_ASSERT(sc);
+
+	KASSERT(bif->bif_ifp->if_bridge == NULL,
+	    ("%s: not a span interface", __func__));
+
+	ifnet_release(bif->bif_ifp);
+
+	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
+	_FREE(bif, M_DEVBUF);
+}
+
+static int
+bridge_ioctl_add(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif = NULL;
+	struct ifnet *ifs;
+	int error = 0;
+	struct iff_filter iff;
+
+	ifs = ifunit(req->ifbr_ifsname);
+	if (ifs == NULL)
+		return (ENOENT);
+	if (ifs->if_ioctl == NULL)	/* must be supported */
+		return (EINVAL);
+
+	/* If it's in the span list, it can't be a member. */
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
+		if (ifs == bif->bif_ifp)
+			return (EBUSY);
+
+	/* Allow the first Ethernet member to define the MTU */
+	if (ifs->if_type != IFT_GIF) {
+		if (TAILQ_EMPTY(&sc->sc_iflist))
+			sc->sc_ifp->if_mtu = ifs->if_mtu;
+		else if (sc->sc_ifp->if_mtu != ifs->if_mtu) {
+			printf("%s%d: invalid MTU for %s%d", 
+                 ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+                 ifnet_name(ifs), ifnet_unit(ifs));
+			return (EINVAL);
+		}
+	}
+
+	if (ifs->if_bridge == sc)
+		return (EEXIST);
+
+	if (ifs->if_bridge != NULL)
+		return (EBUSY);
+
+	bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (bif == NULL)
+		return (ENOMEM);
+
+	bif->bif_ifp = ifs;
+	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+#if HAS_IF_CAP
+	bif->bif_savedcaps = ifs->if_capenable;
+#endif /* HAS_IF_CAP */
+	bif->bif_sc = sc;
+
+	ifnet_reference(ifs);
+
+	ifs->if_bridge = sc;
+	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
+	/*
+	 * XXX: XLOCK HERE!?!
+	 */
+	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
+
+#if HAS_IF_CAP
+	/* Set interface capabilities to the intersection set of all members */
+	bridge_mutecaps(sc);
+#endif /* HAS_IF_CAP */
+
+	
+	switch (ifs->if_type) {
+	case IFT_ETHER:
+	case IFT_L2VLAN:
+		/*
+		 * Place the interface into promiscuous mode.
+		 */
+		error = ifnet_set_promiscuous(ifs, 1);
+		if (error) {
+			/* Ignore error when device is not up */
+			if (error != ENETDOWN)
+				goto out;
+			error = 0;
+		} else {
+			bif->bif_promisc = 1;
+		}
+		break;
+
+	case IFT_GIF:
+		break;
+
+	default:
+		error = EINVAL;
+		goto out;
+	}
+
+	/* 
+	 * Respect lock ordering with DLIL lock for the following operations
+	 */
+	BRIDGE_UNLOCK(sc);
+
+	/*
+	 * install an interface filter
+	 */
+	memset(&iff, 0, sizeof(struct iff_filter));
+	iff.iff_cookie = bif;
+	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
+	iff.iff_input = bridge_iff_input;
+#if BRIDGE_MEMBER_OUT_FILTER
+	iff.iff_output = bridge_iff_output;
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+	iff.iff_event = bridge_iff_event;
+	iff.iff_detached = bridge_iff_detached;
+	error = iflt_attach(ifs, &iff, &bif->bif_iff_ref);
+	if (error != 0) {
+		printf("bridge_ioctl_add: iflt_attach failed %d\n", error);
+		BRIDGE_LOCK(sc);
+		goto out;
+	}
+	bif->bif_filter_attached = 1;
+
+	/*
+	 * install an dummy "bridge" protocol
+	 */
+	if ((error = bridge_attach_protocol(ifs)) != 0) {
+		if (error != 0) {
+			printf("bridge_ioctl_add: bridge_attach_protocol failed %d\n", error);
+			BRIDGE_LOCK(sc);
+			goto out;
+		}
+	}
+	bif->bif_proto_attached = 1;
+
+	BRIDGE_LOCK(sc);
+
+out:
+	if (error && bif != NULL)
+		bridge_delete_member(sc, bif, 1);
+	
+	return (error);
+}
+
+static int
+bridge_ioctl_del(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	bridge_delete_member(sc, bif, 0);
+
+	return (0);
+}
+
+static int
+bridge_ioctl_purge(__unused struct bridge_softc *sc, __unused void *arg)
+{	
+	return (0);
+}
+
+static int
+bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+	struct bstp_port *bp;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	bp = &bif->bif_stp;
+	req->ifbr_ifsflags = bif->bif_flags;
+	req->ifbr_state = bp->bp_state;
+	req->ifbr_priority = bp->bp_priority;
+	req->ifbr_path_cost = bp->bp_path_cost;
+	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
+	req->ifbr_proto = bp->bp_protover;
+	req->ifbr_role = bp->bp_role;
+	req->ifbr_stpflags = bp->bp_flags;
+	req->ifbr_addrcnt = bif->bif_addrcnt;
+	req->ifbr_addrmax = bif->bif_addrmax;
+	req->ifbr_addrexceeded = bif->bif_addrexceeded;
+
+	/* Copy STP state options as flags */
+	if (bp->bp_operedge)
+		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
+	if (bp->bp_flags & BSTP_PORT_AUTOEDGE)
+		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
+	if (bp->bp_ptp_link)
+		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
+	if (bp->bp_flags & BSTP_PORT_AUTOPTP)
+		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
+	if (bp->bp_flags & BSTP_PORT_ADMEDGE)
+		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
+	if (bp->bp_flags & BSTP_PORT_ADMCOST)
+		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
+	return (0);
+}
+
+static int
+bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+	struct bstp_port *bp;
+	int error;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+	bp = &bif->bif_stp;
+
+	if (req->ifbr_ifsflags & IFBIF_SPAN)
+		/* SPAN is readonly */
+		return (EINVAL);
+
+	
+	if (req->ifbr_ifsflags & IFBIF_STP) {
+		if ((bif->bif_flags & IFBIF_STP) == 0) {
+			error = bstp_enable(&bif->bif_stp);
+			if (error)
+				return (error);
+		}
+	} else {
+		if ((bif->bif_flags & IFBIF_STP) != 0)
+			bstp_disable(&bif->bif_stp);
+	}
+
+	/* Pass on STP flags */
+	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
+	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
+	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
+	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
+
+	/* Save the bits relating to the bridge */
+	bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK;
+
+
+	return (0);
+}
+
+static int
+bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	sc->sc_brtmax = param->ifbrp_csize;
+	bridge_rttrim(sc);
+
+	return (0);
+}
+
+static int
+bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	param->ifbrp_csize = sc->sc_brtmax;
+
+	return (0);
+}
+
+
+#define BRIDGE_IOCTL_GIFS do { \
+	struct bridge_iflist *bif; \
+	struct ifbreq breq; \
+	char *buf, *outbuf; \
+	unsigned int count, buflen, len; \
+ \
+	count = 0; \
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
+		count++; \
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
+		count++; \
+ \
+	buflen = sizeof(breq) * count; \
+	if (bifc->ifbic_len == 0) { \
+		bifc->ifbic_len = buflen; \
+		return (0); \
+	} \
+	BRIDGE_UNLOCK(sc); \
+	outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
+	BRIDGE_LOCK(sc); \
+ \
+	count = 0; \
+	buf = outbuf; \
+	len = min(bifc->ifbic_len, buflen); \
+	bzero(&breq, sizeof(breq)); \
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
+		if (len < sizeof(breq)) \
+			break; \
+ \
+		snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
+                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
+		/* Fill in the ifbreq structure */ \
+		error = bridge_ioctl_gifflags(sc, &breq); \
+		if (error) \
+			break; \
+		memcpy(buf, &breq, sizeof(breq)); \
+		count++; \
+		buf += sizeof(breq); \
+		len -= sizeof(breq); \
+	} \
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
+		if (len < sizeof(breq)) \
+			break; \
+ \
+		snprintf(breq.ifbr_ifsname, sizeof(breq.ifbr_ifsname), "%s%d", \
+                 ifnet_name(bif->bif_ifp), ifnet_unit(bif->bif_ifp)); \
+		breq.ifbr_ifsflags = bif->bif_flags; \
+		breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; \
+		memcpy(buf, &breq, sizeof(breq)); \
+		count++; \
+		buf += sizeof(breq); \
+		len -= sizeof(breq); \
+	} \
+ \
+	BRIDGE_UNLOCK(sc); \
+	bifc->ifbic_len = sizeof(breq) * count; \
+	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
+	BRIDGE_LOCK(sc); \
+	_FREE(outbuf, M_TEMP); \
+} while (0)
+
+static int
+bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbifconf64 *bifc = arg;
+	int error = 0;
+	
+	BRIDGE_IOCTL_GIFS;
+
+	return (error);
+}
+
+static int
+bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbifconf32 *bifc = arg;
+	int error = 0;
+
+	BRIDGE_IOCTL_GIFS;
+
+	return (error);
+}
+
+
+#define BRIDGE_IOCTL_RTS do { \
+	struct bridge_rtnode *brt; \
+	char *buf, *outbuf; \
+	unsigned int count, buflen, len; \
+	struct timespec now; \
+ \
+	if (bac->ifbac_len == 0) \
+		return (0); \
+ \
+	count = 0; \
+	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
+		count++; \
+	buflen = sizeof(bareq) * count; \
+ \
+	BRIDGE_UNLOCK(sc); \
+	outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
+	BRIDGE_LOCK(sc); \
+ \
+	count = 0; \
+	buf = outbuf; \
+	len = min(bac->ifbac_len, buflen); \
+	bzero(&bareq, sizeof(bareq)); \
+	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
+		if (len < sizeof(bareq)) \
+			goto out; \
+		snprintf(bareq.ifba_ifsname, sizeof(bareq.ifba_ifsname), "%s%d", \
+                 ifnet_name(brt->brt_ifp), ifnet_unit(brt->brt_ifp)); \
+		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr)); \
+		bareq.ifba_vlan = brt->brt_vlan; \
+		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
+			nanouptime(&now); \
+			if ((unsigned long)now.tv_sec < brt->brt_expire) \
+				bareq.ifba_expire = brt->brt_expire - now.tv_sec; \
+		} else \
+			bareq.ifba_expire = 0; \
+		bareq.ifba_flags = brt->brt_flags; \
+ \
+		memcpy(buf, &bareq, sizeof(bareq)); \
+		count++; \
+		buf += sizeof(bareq); \
+		len -= sizeof(bareq); \
+	} \
+out: \
+	BRIDGE_UNLOCK(sc); \
+	bac->ifbac_len = sizeof(bareq) * count; \
+	error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
+	BRIDGE_LOCK(sc); \
+	_FREE(outbuf, M_TEMP); \
+	return (error); \
+} while (0)
+
+static int
+bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbaconf64 *bac = arg;
+	struct ifbareq64 bareq;
+	int error = 0;
+	
+	BRIDGE_IOCTL_RTS;
+
+	return (error);
+}
+
+static int
+bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbaconf32 *bac = arg;
+	struct ifbareq32 bareq;
+	int error = 0;
+	
+	BRIDGE_IOCTL_RTS;
+
+	return (error);
+}
+
+static int
+bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbareq32 *req = arg;
+	struct bridge_iflist *bif;
+	int error;
+
+	bif = bridge_lookup_member(sc, req->ifba_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
+	    req->ifba_flags);
+
+	return (error);
+}
+
+static int
+bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbareq64 *req = arg;
+	struct bridge_iflist *bif;
+	int error;
+
+	bif = bridge_lookup_member(sc, req->ifba_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
+	    req->ifba_flags);
+
+	return (error);
+}
+
+static int
+bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	sc->sc_brttimeout = param->ifbrp_ctime;
+	return (0);
+}
+
+static int
+bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	param->ifbrp_ctime = sc->sc_brttimeout;
+	return (0);
+}
+
+static int
+bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbareq32 *req = arg;
+
+	return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
+}
+
+static int
+bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbareq64 *req = arg;
+
+	return (bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan));
+}
+
+static int
+bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+
+	bridge_rtflush(sc, req->ifbr_ifsflags);
+	return (0);
+}
+
+static int
+bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
+
+	param->ifbrp_prio = bs->bs_bridge_priority;
+	return (0);
+}
+
+static int
+bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_priority(&sc->sc_stp, param->ifbrp_prio));
+}
+
+static int
+bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
+
+	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
+	return (0);
+}
+
+static int
+bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime));
+}
+
+static int
+bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
+
+	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
+	return (0);
+}
+
+static int
+bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay));
+}
+
+static int
+bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+	struct bstp_state *bs = &sc->sc_stp;
+
+	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
+	return (0);
+}
+
+static int
+bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage));
+}
+
+static int
+bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
+}
+
+static int
+bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
+}
+
+static int
+bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	param->ifbrp_filter = sc->sc_filter_flags;
+
+	return (0);
+}
+
+static int
+bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
+		return (EINVAL);
+
+#ifndef BRIDGE_IPF
+	if (param->ifbrp_filter & IFBF_FILT_USEIPF)
+		return (EINVAL);
+#endif
+
+	sc->sc_filter_flags = param->ifbrp_filter;
+
+	return (0);
+}
+
+static int
+bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+
+	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
+	if (bif == NULL)
+		return (ENOENT);
+
+	bif->bif_addrmax = req->ifbr_addrmax;
+	return (0);
+}
+
+static int
+bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif = NULL;
+	struct ifnet *ifs;
+
+	ifs = ifunit(req->ifbr_ifsname);
+	if (ifs == NULL)
+		return (ENOENT);
+
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
+		if (ifs == bif->bif_ifp)
+			return (EBUSY);
+
+	if (ifs->if_bridge != NULL)
+		return (EBUSY);
+
+	switch (ifs->if_type) {
+		case IFT_ETHER:
+		case IFT_GIF:
+		case IFT_L2VLAN:
+			break;
+		default:
+			return (EINVAL);
+	}
+
+	bif = _MALLOC(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (bif == NULL)
+		return (ENOMEM);
+
+	bif->bif_ifp = ifs;
+	bif->bif_flags = IFBIF_SPAN;
+
+	ifnet_reference(bif->bif_ifp);
+
+	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
+
+	return (0);
+}
+
+static int
+bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
+{
+	struct ifbreq *req = arg;
+	struct bridge_iflist *bif;
+	struct ifnet *ifs;
+
+	ifs = ifunit(req->ifbr_ifsname);
+	if (ifs == NULL)
+		return (ENOENT);
+
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
+		if (ifs == bif->bif_ifp)
+			break;
+
+	if (bif == NULL)
+		return (ENOENT);
+
+	bridge_delete_span(sc, bif);
+
+	return (0);
+}
+
+#define BRIDGE_IOCTL_GBPARAM do { \
+	struct bstp_state *bs = &sc->sc_stp; \
+	struct bstp_port *root_port; \
+ \
+	req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
+	req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
+	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
+ \
+	root_port = bs->bs_root_port; \
+	if (root_port == NULL) \
+		req->ifbop_root_port = 0; \
+	else \
+		req->ifbop_root_port = root_port->bp_ifp->if_index; \
+ \
+	req->ifbop_holdcount = bs->bs_txholdcount; \
+	req->ifbop_priority = bs->bs_bridge_priority; \
+	req->ifbop_protocol = bs->bs_protover; \
+	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
+	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
+	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
+	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
+	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
+	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
+} while (0)
+
+static int
+bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbropreq32 *req = arg;
+
+	BRIDGE_IOCTL_GBPARAM;
+	
+	return (0);
+}
+
+static int
+bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbropreq64 *req = arg;
+
+	BRIDGE_IOCTL_GBPARAM;
+
+	return (0);
+}
+
+
+static int
+bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	param->ifbrp_cexceeded = sc->sc_brtexceeded;
+	return (0);
+}
+
+#define BRIDGE_IOCTL_GIFSSTP do { \
+	struct bridge_iflist *bif; \
+	struct bstp_port *bp; \
+	struct ifbpstpreq bpreq; \
+	char *buf, *outbuf; \
+	unsigned int count, buflen, len; \
+ \
+	count = 0; \
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
+		if ((bif->bif_flags & IFBIF_STP) != 0) \
+			count++; \
+	} \
+ \
+	buflen = sizeof(bpreq) * count; \
+	if (bifstp->ifbpstp_len == 0) { \
+		bifstp->ifbpstp_len = buflen; \
+		return (0); \
+	} \
+ \
+	BRIDGE_UNLOCK(sc); \
+	outbuf = _MALLOC(buflen, M_TEMP, M_WAITOK | M_ZERO); \
+	BRIDGE_LOCK(sc); \
+ \
+	count = 0; \
+	buf = outbuf; \
+	len = min(bifstp->ifbpstp_len, buflen); \
+	bzero(&bpreq, sizeof(bpreq)); \
+	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
+		if (len < sizeof(bpreq)) \
+			break; \
+ \
+		if ((bif->bif_flags & IFBIF_STP) == 0) \
+			continue; \
+ \
+		bp = &bif->bif_stp; \
+		bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
+		bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
+		bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
+		bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
+		bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
+		bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
+ \
+		memcpy(buf, &bpreq, sizeof(bpreq)); \
+		count++; \
+		buf += sizeof(bpreq); \
+		len -= sizeof(bpreq); \
+	} \
+ \
+	BRIDGE_UNLOCK(sc); \
+	bifstp->ifbpstp_len = sizeof(bpreq) * count; \
+	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
+	BRIDGE_LOCK(sc); \
+	_FREE(outbuf, M_TEMP); \
+	return (error); \
+} while (0)
+
+static int
+bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
+{
+	struct ifbpstpconf32 *bifstp = arg;
+	int error = 0;
+
+	BRIDGE_IOCTL_GIFSSTP;
+
+	return (error);
+}
+
+static int
+bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
+{
+	struct ifbpstpconf64 *bifstp = arg;
+	int error = 0;
+
+	BRIDGE_IOCTL_GIFSSTP;
+
+	return (error);
+}
+
+static int
+bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto));
+}
+
+static int
+bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
+{
+	struct ifbrparam *param = arg;
+
+	return (bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc));
+}
+
+/*
+ * bridge_ifdetach:
+ *
+ *	Detach an interface from a bridge.  Called when a member
+ *	interface is detaching.
+ */
+__private_extern__ void
+bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+
+#if BRIDGE_DEBUG
+	printf("bridge_ifdetach %s%d\n", ifnet_name(ifp), ifnet_unit(ifp));
+#endif
+
+	/* Check if the interface is a bridge member */
+	if (sc != NULL) {
+		BRIDGE_LOCK(sc);
+
+		bif = bridge_lookup_member_if(sc, ifp);
+		if (bif != NULL)
+			bridge_delete_member(sc, bif, 1);
+
+		BRIDGE_UNLOCK(sc);
+		return;
+	}
+
+	/* Check if the interface is a span port */
+	lck_mtx_lock(bridge_list_mtx);
+	LIST_FOREACH(sc, &bridge_list, sc_list) {
+		BRIDGE_LOCK(sc);
+		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
+			if (ifp == bif->bif_ifp) {
+				bridge_delete_span(sc, bif);
+				break;
+			}
+
+		BRIDGE_UNLOCK(sc);
+	}
+	lck_mtx_unlock(bridge_list_mtx);
+}
+
+/*
+ * bridge_init:
+ *
+ *	Initialize a bridge interface.
+ */
+static int
+bridge_init(struct ifnet *ifp)
+{
+	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
+	struct timespec ts;
+	errno_t error;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	if ((ifnet_flags(ifp) & IFF_RUNNING))
+		return 0;
+
+	ts.tv_sec = bridge_rtable_prune_period;
+	ts.tv_nsec = 0;
+	bsd_timeout(bridge_timer, sc, &ts);
+
+	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
+	if (error == 0)
+		bstp_init(&sc->sc_stp);		/* Initialize Spanning Tree */
+
+	return error;
+}
+
+/*
+ * bridge_stop:
+ *
+ *	Stop the bridge interface.
+ */
+static void
+bridge_stop(struct ifnet *ifp, __unused int disable)
+{
+	struct bridge_softc *sc = ifp->if_softc;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0)
+		return;
+
+	bsd_untimeout(bridge_timer, sc);
+	bstp_stop(&sc->sc_stp);
+
+	bridge_rtflush(sc, IFBF_FLUSHDYN);
+
+	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
+}
+
+/*
+ * bridge_enqueue:
+ *
+ *	Enqueue a packet on a bridge member interface.
+ *
+ */
+static void
+bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m)
+{
+	int len, error = 0;
+	short mflags;
+	struct mbuf *m0;
+
+	/* We may be sending a fragment so traverse the mbuf */
+	for (; m; m = m0) {
+		m0 = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		len = m->m_pkthdr.len;
+		mflags = m->m_flags;
+		m->m_flags |= M_PROTO1; //set to avoid loops 
+
+#if HAS_IF_CAP
+		/*
+		 * If underlying interface can not do VLAN tag insertion itself
+		 * then attach a packet tag that holds it.
+		 */
+		if ((m->m_flags & M_VLANTAG) &&
+		    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
+			m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
+			if (m == NULL) {
+				printf("%s%d: unable to prepend VLAN header\n",
+				    ifnet_name(dst_ifp), ifnet_unit(dst_ifp));
+				(void) ifnet_stat_increment_out(dst_ifp, 0, 0, 1);
+				continue;
+			}
+			m->m_flags &= ~M_VLANTAG;
+		}
+#endif /* HAS_IF_CAP */
+
+		error = ifnet_output_raw(dst_ifp, 0, m);
+		if (error == 0) {
+			(void) ifnet_stat_increment_out(sc->sc_ifp, 1, len, 0);
+		} else {
+			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+		}
+	}
+
+	return;
+}
+
+#if HAS_BRIDGE_DUMMYNET
+/*
+ * bridge_dummynet:
+ *
+ * 	Receive a queued packet from dummynet and pass it on to the output
+ * 	interface.
+ *
+ *	The mbuf has the Ethernet header already attached.
+ */
+static void
+bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
+{
+	struct bridge_softc *sc;
+
+	sc = ifp->if_bridge;
+
+	/*
+	 * The packet didnt originate from a member interface. This should only
+	 * ever happen if a member interface is removed while packets are
+	 * queued for it.
+	 */
+	if (sc == NULL) {
+		m_freem(m);
+		return;
+	}
+
+	if (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+	    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif
+	    ) {
+		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
+			return;
+		if (m == NULL)
+			return;
+	}
+
+	bridge_enqueue(sc, ifp, m);
+}
+#endif /* HAS_BRIDGE_DUMMYNET */
+
+#if BRIDGE_MEMBER_OUT_FILTER
+/*
+ * bridge_output:
+ *
+ *	Send output from a bridge member interface.  This
+ *	performs the bridging function for locally originated
+ *	packets.
+ *
+ *	The mbuf has the Ethernet header already attached.  We must
+ *	enqueue or free the mbuf before returning.
+ */
+static int
+bridge_output(struct ifnet *ifp, struct mbuf *m, __unused struct sockaddr *sa,
+    __unused struct rtentry *rt)
+{
+	struct ether_header *eh;
+	struct ifnet *dst_if;
+	struct bridge_softc *sc;
+	uint16_t vlan;
+
+#if BRIDGE_DEBUG
+	if (_if_brige_debug)
+		printf("bridge_output ifp %p %s%d\n", ifp, ifnet_name(ifp), ifnet_unit(ifp));
+#endif /* BRIDGE_DEBUG */
+	
+	if (m->m_len < ETHER_HDR_LEN) {
+		m = m_pullup(m, ETHER_HDR_LEN);
+		if (m == NULL)
+			return (0);
+	}
+
+	eh = mtod(m, struct ether_header *);
+	sc = ifp->if_bridge;
+	vlan = VLANTAGOF(m);
+
+	BRIDGE_LOCK(sc);
+
+	/* APPLE MODIFICATION 
+	 * If the packet is an 802.1X ethertype, then only send on the
+	 * original output interface.
+	 */
+	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
+		dst_if = ifp;
+		goto sendunicast;
+	}
+	
+	/*
+	 * If bridge is down, but the original output interface is up,
+	 * go ahead and send out that interface.  Otherwise, the packet
+	 * is dropped below.
+	 */
+	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
+		dst_if = ifp;
+		goto sendunicast;
+	}
+
+	/*
+	 * If the packet is a multicast, or we don't know a better way to
+	 * get there, send to all interfaces.
+	 */
+	if (ETHER_IS_MULTICAST(eh->ether_dhost))
+		dst_if = NULL;
+	else
+		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
+	if (dst_if == NULL) {
+		struct bridge_iflist *bif;
+		struct mbuf *mc;
+		int error = 0, used = 0;
+
+		bridge_span(sc, m);
+
+		BRIDGE_LOCK2REF(sc, error);
+		if (error) {
+			m_freem(m);
+			return (0);
+		}
+
+		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
+			dst_if = bif->bif_ifp;
+
+			if (dst_if->if_type == IFT_GIF)
+				continue;
+			if ((dst_if->if_flags & IFF_RUNNING) == 0)
+				continue;
+
+			/*
+			 * If this is not the original output interface,
+			 * and the interface is participating in spanning
+			 * tree, make sure the port is in a state that
+			 * allows forwarding.
+			 */
+			if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) &&
+			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+				continue;
+
+			if (LIST_NEXT(bif, bif_next) == NULL) {
+				used = 1;
+				mc = m;
+			} else {
+				mc = m_copypacket(m, M_DONTWAIT);
+				if (mc == NULL) {
+					(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+					continue;
+				}
+			}
+
+			bridge_enqueue(sc, dst_if, mc);
+		}
+		if (used == 0)
+			m_freem(m);
+		BRIDGE_UNREF(sc);
+		return (0);
+	}
+
+sendunicast:
+	/*
+	 * XXX Spanning tree consideration here?
+	 */
+
+	bridge_span(sc, m);
+	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
+		m_freem(m);
+		BRIDGE_UNLOCK(sc);
+		return (0);
+	}
+
+	BRIDGE_UNLOCK(sc);
+	bridge_enqueue(sc, dst_if, m);
+	return (0);
+}
+#endif /* BRIDGE_MEMBER_OUT_FILTER */
+
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+static struct mbuf* bridge_fix_txcsum( struct mbuf *m )
+{
+	// 	basic tests indicate that the vast majority of packets being processed
+	//	here have an Ethernet header mbuf pre-pended to them (the first case below)
+	//	the second highest are those where the Ethernet and IP/TCP/UDP headers are 
+	//	all in one mbuf (second case below)
+	//	the third case has, in fact, never hit for me -- although if I comment out 
+	//	the first two cases, that code works for them, so I consider it a 
+	//	decent general solution
+	
+	int amt = ETHER_HDR_LEN;
+	int hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+	int off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+	
+	/* 
+	 * NOTE we should never get vlan-attached packets here;
+	 * support for those COULD be added, but we don't use them
+	 * and it really kinda slows things down to worry about them
+	 */
+	
+#ifdef DIAGNOSTIC
+	if ( m_tag_find( m, PACKET_TAG_VLAN, NULL ) != NULL )
+	{
+		printf( "bridge: transmitting packet tagged with VLAN?\n" );
+		KASSERT( 0 );
+		m_freem( m );
+		return NULL;
+	}
+#endif
+	
+	if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+	{
+		amt += hlen;
+	}
+	if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+	{
+		amt += off + sizeof( uint16_t );
+	}
+	
+	if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+	{
+		amt += off + sizeof( uint16_t );
+	}
+	
+	if ( m->m_len == ETHER_HDR_LEN )
+	{
+		// this is the case where there's an Ethernet header in an mbuf
+        
+		// the first mbuf is the Ethernet header -- just strip it off and do the checksum
+		struct mbuf *m_ip = m->m_next;
+        
+		// set up m_ip so the cksum operations work
+		/* APPLE MODIFICATION 22 Apr 2008 <mvega@apple.com>
+		 *  <rdar://5817385> Clear the m_tag list before setting
+		 *  M_PKTHDR.
+		 *
+		 *  If this m_buf chain was extended via M_PREPEND(), then
+		 *  m_ip->m_pkthdr is identical to m->m_pkthdr (see
+		 *  M_MOVE_PKTHDR()). The only thing preventing access to this
+		 *  invalid packet header data is the fact that the M_PKTHDR
+		 *  flag is clear, i.e., m_ip->m_flag & M_PKTHDR == 0, but we're
+		 *  about to set the M_PKTHDR flag, so to be safe we initialize,
+		 *  more accurately, we clear, m_ip->m_pkthdr.tags via
+		 *  m_tag_init().
+		 *
+		 *  Suppose that we do not do this; if m_pullup(), below, fails,
+		 *  then m_ip will be freed along with m_ip->m_pkthdr.tags, but
+		 *  we will also free m soon after, via m_freem(), and
+		 *  consequently attempt to free m->m_pkthdr.tags in the
+		 *  process. The problem is that m->m_pkthdr.tags will have
+		 *  already been freed by virtue of being equal to
+		 *  m_ip->m_pkthdr.tags. Attempts to dereference
+		 *  m->m_pkthdr.tags in m_tag_delete_chain() will result in a
+		 *  panic.
+		 */
+		m_tag_init(m_ip);
+		/* END MODIFICATION */
+		m_ip->m_flags |= M_PKTHDR;
+		m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+		m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+		m_ip->m_pkthdr.len = m->m_pkthdr.len - ETHER_HDR_LEN;
+        
+		// set up the header mbuf so we can prepend it back on again later
+		m->m_pkthdr.csum_flags = 0;
+		m->m_pkthdr.csum_data = 0;
+		m->m_pkthdr.len = ETHER_HDR_LEN;
+		m->m_next = NULL;
+        
+        
+		// now do the checksums we need -- first IP
+		if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+		{
+			// make sure the IP header (or at least the part with the cksum) is there
+			m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+			if ( m_ip == NULL )
+			{
+				printf( "bridge: failed to flatten header\n ");
+				m_freem( m );
+				return NULL;
+			}
+			
+			// now do the checksum
+			{
+				struct ip *ip = mtod( m_ip, struct ip* );
+				ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+				printf( "bridge: performed IPv4 checksum\n" );
+#endif
+			}
+		}
+        
+		// now do a TCP or UDP delayed checksum
+		if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+		{
+			in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+			printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+		}
+        
+		// now attach the ethernet header back onto the IP packet
+		m->m_next = m_ip;
+		m->m_pkthdr.len += m_length( m_ip );	
+        
+		// clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+		m_ip->m_flags &= ~M_PKTHDR;
+        
+		// and clear any csum flags
+		m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+	}
+	else if ( m->m_len >= amt )
+	{
+		// everything fits in the first mbuf, so futz with m->m_data, m->m_len and m->m_pkthdr.len to
+		// make it work
+		m->m_len -= ETHER_HDR_LEN;
+		m->m_data += ETHER_HDR_LEN;
+		m->m_pkthdr.len -= ETHER_HDR_LEN;
+        
+		// now do the checksums we need -- first IP
+		if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+		{
+			struct ip *ip = mtod( m, struct ip* );
+			ip->ip_sum = in_cksum( m, hlen );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+			printf( "bridge: performed IPv4 checksum\n" );
+#endif
+		}
+        
+		// now do a TCP or UDP delayed checksum
+		if ( m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+		{
+			in_delayed_cksum( m );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+			printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+		}
+		
+		// now stick the ethernet header back on
+		m->m_len += ETHER_HDR_LEN;
+		m->m_data -= ETHER_HDR_LEN;
+		m->m_pkthdr.len += ETHER_HDR_LEN;
+        
+		// and clear any csum flags
+		m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+	}
+	else
+	{
+		struct mbuf *m_ip;
+        
+		// general case -- need to simply split it off and deal
+        
+		// first, calculate how much needs to be made writable (we may have a read-only mbuf here)
+		hlen = M_CSUM_DATA_IPv4_IPHL( m->m_pkthdr.csum_data );
+#if PARANOID
+		off = M_CSUM_DATA_IPv4_OFFSET( m->m_pkthdr.csum_data );
+		
+		if ( m->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+		{
+			amt += hlen;
+		}
+		
+		if ( m->m_pkthdr.csum_flags & M_CSUM_TCPv4 )
+		{
+			amt += sizeof( struct tcphdr * );
+			amt += off;
+		}
+		
+		if ( m->m_pkthdr.csum_flags & M_CSUM_UDPv4 )
+		{
+			amt += sizeof( struct udphdr * );
+			amt += off;
+		}
+#endif
+        
+		// now split the ethernet header off of the IP packet (we'll re-attach later)
+		m_ip = m_split( m, ETHER_HDR_LEN, M_NOWAIT );
+		if ( m_ip == NULL )
+		{
+			printf( "bridge_fix_txcsum: could not split ether header\n" );
+            
+			m_freem( m );
+			return NULL;
+		}
+        
+#if PARANOID
+		// make sure that the IP packet is writable for the portion we need
+		if ( m_makewritable( &m_ip, 0, amt, M_DONTWAIT ) != 0 )
+		{
+			printf( "bridge_fix_txcsum: could not make %d bytes writable\n", amt );
+            
+			m_freem( m );
+			m_freem( m_ip );
+			return NULL;
+		}
+#endif
+		
+		m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags;
+		m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data;
+        
+		m->m_pkthdr.csum_flags = 0;
+		m->m_pkthdr.csum_data = 0;
+        
+		// now do the checksums we need -- first IP
+		if ( m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4 )
+		{
+			// make sure the IP header (or at least the part with the cksum) is there
+			m_ip = m_pullup( m_ip, sizeof( struct ip ) );
+			if ( m_ip == NULL )
+			{
+				printf( "bridge: failed to flatten header\n ");
+				m_freem( m );
+				return NULL;
+			}
+			
+			// now do the checksum
+			{
+				struct ip *ip = mtod( m_ip, struct ip* );
+				ip->ip_sum = in_cksum( m_ip, hlen );
+                
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+				printf( "bridge: performed IPv4 checksum\n" );
+#endif
+			}
+		}
+        
+		// now do a TCP or UDP delayed checksum
+		if ( m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4) )
+		{
+			in_delayed_cksum( m_ip );
+            
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+			printf( "bridge: performed TCPv4/UDPv4 checksum\n" );
+#endif
+		}
+        
+		// now attach the ethernet header back onto the IP packet
+		m->m_next = m_ip;
+		m->m_pkthdr.len += m_length( m_ip );	
+        
+		// clear the M_PKTHDR flags on the ip packet (again, we re-attach later)
+		m_ip->m_flags &= ~M_PKTHDR;
+        
+		// and clear any csum flags
+		m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4);
+	}
+	
+	return m;
+}
+#endif
+
+/*
+ * bridge_start:
+ *
+ *	Start output on a bridge.
+ *
+ */
+static errno_t
+bridge_start(struct ifnet *ifp, struct mbuf *m)
+{
+	struct bridge_softc *sc = ifnet_softc(ifp);
+	struct ether_header *eh;
+	struct ifnet *dst_if;
+	
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED);
+
+	eh = mtod(m, struct ether_header *);
+	
+	BRIDGE_LOCK(sc);
+	
+	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
+		(dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0)) != NULL) {
+		
+		{
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+			/* 
+			 * APPLE MODIFICATION - if the packet needs a checksum (i.e., 
+			 * checksum has been deferred for HW support) AND the destination
+			 * interface doesn't support HW checksums, then we 
+			 * need to fix-up the checksum here
+			 */
+			if (
+				( (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4) ) != 0 ) &&
+				( (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags ) != m->m_pkthdr.csum_flags )
+				)
+			{
+				m = bridge_fix_txcsum( m );
+				if ( m == NULL )
+				{
+					goto done;
+				}
+			}
+			
+#else
+			if (eh->ether_type == htons(ETHERTYPE_IP))
+				mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+			else
+				m->m_pkthdr.csum_flags = 0;
+#endif
+			#if NBPFILTER > 0
+				if (sc->sc_bpf_output)
+					bridge_bpf_output(ifp, m);
+			#endif
+			BRIDGE_UNLOCK(sc);
+			bridge_enqueue(sc, dst_if, m);
+		}
+	} else
+	{
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+		
+		/* 
+		 * APPLE MODIFICATION - if the MULTICAST packet needs a checksum (i.e., 
+		 * checksum has been deferred for HW support) AND at least one destination
+		 * interface doesn't support HW checksums, then we go ahead and fix it up
+		 * here, since it doesn't make sense to do it more than once
+		 */
+		
+		if (
+			(m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) &&
+			/*
+			 * XXX FIX ME: keep track of whether or not we have any interfaces that 
+			 * do not support checksums (for now, assume we do)
+			 */
+			( 1 )
+			)
+		{
+			m = bridge_fix_txcsum( m );
+			if ( m == NULL )
+			{
+				goto done;
+			}
+		}
+#else
+		if (eh->ether_type == htons(ETHERTYPE_IP))
+			mbuf_outbound_finalize(m, PF_INET, sizeof(struct ether_header));
+		else
+			m->m_pkthdr.csum_flags = 0;
+#endif
+		
+		#if NBPFILTER > 0
+			if (sc->sc_bpf_output)
+				bridge_bpf_output(ifp, m);
+		#endif
+		bridge_broadcast(sc, ifp, m, 0);
+	}
+#if APPLE_BRIDGE_HWCKSUM_SUPPORT
+done:
+#endif
+
+	return 0;
+}
+
+/*
+ * bridge_forward:
+ *
+ *	The forwarding function of the bridge.
+ *
+ *	NOTE: Releases the lock on return.
+ */
+static void
+bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
+    struct mbuf *m)
+{
+	struct bridge_iflist *dbif;
+	struct ifnet *src_if, *dst_if, *ifp;
+	struct ether_header *eh;
+	uint16_t vlan;
+	uint8_t *dst;
+	int error;
+
+	lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED);
+
+#if BRIDGE_DEBUG
+	if (_if_brige_debug)
+        printf("bridge_forward %s%d m%p\n", ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), m);
+#endif /* BRIDGE_DEBUG */
+	
+	src_if = m->m_pkthdr.rcvif;
+	ifp = sc->sc_ifp;
+
+	(void) ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len, 0);
+	vlan = VLANTAGOF(m);
+
+
+	if ((sbif->bif_flags & IFBIF_STP) &&
+	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+		goto drop;
+
+	eh = mtod(m, struct ether_header *);
+	dst = eh->ether_dhost;
+
+	/* If the interface is learning, record the address. */
+	if (sbif->bif_flags & IFBIF_LEARNING) {
+		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
+		    sbif, 0, IFBAF_DYNAMIC);
+		/*
+		 * If the interface has addresses limits then deny any source
+		 * that is not in the cache.
+		 */
+		if (error && sbif->bif_addrmax)
+			goto drop;
+	}
+
+	if ((sbif->bif_flags & IFBIF_STP) != 0 &&
+	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
+		goto drop;
+
+	/*
+	 * At this point, the port either doesn't participate
+	 * in spanning tree or it is in the forwarding state.
+	 */
+
+	/*
+	 * If the packet is unicast, destined for someone on
+	 * "this" side of the bridge, drop it.
+	 */
+	if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+		dst_if = bridge_rtlookup(sc, dst, vlan);
+		if (src_if == dst_if)
+			goto drop;
+	} else {
+		/*
+		 * Check if its a reserved multicast address, any address
+		 * listed in 802.1D section 7.12.6 may not be forwarded by the
+		 * bridge.
+		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
+		 */
+		if (dst[0] == 0x01 && dst[1] == 0x80 &&
+		    dst[2] == 0xc2 && dst[3] == 0x00 &&
+		    dst[4] == 0x00 && dst[5] <= 0x0f)
+			goto drop;
+
+
+		/* ...forward it to all interfaces. */
+		atomic_add_64(&ifp->if_imcasts, 1);
+		dst_if = NULL;
+	}
+
+	/*
+	 * If we have a destination interface which is a member of our bridge,
+	 * OR this is a unicast packet, push it through the bpf(4) machinery.
+	 * For broadcast or multicast packets, don't bother because it will
+	 * be reinjected into ether_input. We do this before we pass the packets
+	 * through the pfil(9) framework, as it is possible that pfil(9) will
+	 * drop the packet, or possibly modify it, making it difficult to debug
+	 * firewall issues on the bridge.
+	 */
+#if NBPFILTER > 0
+	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
+		dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
+        m->m_pkthdr.rcvif = ifp;
+		if (sc->sc_bpf_input)
+            bridge_bpf_input(ifp, m);
+	}
+#endif /* NBPFILTER */
+
+#if defined(PFIL_HOOKS)
+	/* run the packet filter */
+	if (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+	    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif /* INET6 */
+	    ) {
+		BRIDGE_UNLOCK(sc);
+		if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0)
+			return;
+		if (m == NULL)
+			return;
+		BRIDGE_LOCK(sc);
+	}
+#endif /* PFIL_HOOKS */
+
+	if (dst_if == NULL) {
+        /*
+         * Clear any in-bound checksum flags for this packet.
+         */
+		mbuf_inbound_modified(m);
+        
+		bridge_broadcast(sc, src_if, m, 1);
+		
+		return;
+	}
+
+	/*
+	 * At this point, we're dealing with a unicast frame
+	 * going to a different interface.
+	 */
+	if ((dst_if->if_flags & IFF_RUNNING) == 0)
+		goto drop;
+
+	dbif = bridge_lookup_member_if(sc, dst_if);
+	if (dbif == NULL)
+		/* Not a member of the bridge (anymore?) */
+		goto drop;
+
+	/* Private segments can not talk to each other */
+	if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)
+		goto drop;
+
+	if ((dbif->bif_flags & IFBIF_STP) &&
+	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+		goto drop;
+
+#if HAS_DHCPRA_MASK
+	/* APPLE MODIFICATION <rdar://6985737> */
+	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
+		m = ip_xdhcpra_output(dst_if, m);
+		if (!m) {
+			++sc->sc_sc.sc_ifp.if_xdhcpra;
+			return;
+		}
+	}
+#endif /* HAS_DHCPRA_MASK */
+
+	BRIDGE_UNLOCK(sc);
+
+#if defined(PFIL_HOOKS)
+	if (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+	    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif
+	    ) {
+		if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
+			return;
+		if (m == NULL)
+			return;
+	}
+#endif /* PFIL_HOOKS */
+
+	/*
+	 * Clear any in-bound checksum flags for this packet.
+	 */
+	mbuf_inbound_modified(m);
+
+	bridge_enqueue(sc, dst_if, m);
+	return;
+
+drop:
+	BRIDGE_UNLOCK(sc);
+	m_freem(m);
+}
+
+#if BRIDGE_DEBUG
+
+char * ether_ntop(char *, size_t , const u_char *);
+
+__private_extern__ char *
+ether_ntop(char *buf, size_t len, const u_char *ap)
+{
+	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x", 
+			 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
+	
+	return buf;
+}
+
+#endif /* BRIDGE_DEBUG */
+
+/*
+ * bridge_input:
+ *
+ *	Filter input from a member interface.  Queue the packet for
+ *	bridging if it is not for us.
+ */
+__private_extern__ errno_t
+bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	struct bridge_iflist *bif, *bif2;
+	struct ifnet *bifp;
+	struct ether_header *eh;
+	struct mbuf *mc, *mc2;
+	uint16_t vlan;
+	int error;
+
+#if BRIDGE_DEBUG
+	if (_if_brige_debug)
+		printf("bridge_input: %s%d from %s%d m %p data %p\n", 
+			   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+			   ifnet_name(ifp), ifnet_unit(ifp), 
+			   m, mbuf_data(m));
+#endif /* BRIDGE_DEBUG */
+
+	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
+#if BRIDGE_DEBUG
+		if (_if_brige_debug)
+			printf( "bridge_input: %s%d not running passing along\n",
+				   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+#endif /* BRIDGE_DEBUG */
+		return 0;
+	}
+	
+	bifp = sc->sc_ifp;
+	vlan = VLANTAGOF(m);
+
+#ifdef IFF_MONITOR
+	/*
+	 * Implement support for bridge monitoring. If this flag has been
+	 * set on this interface, discard the packet once we push it through
+	 * the bpf(4) machinery, but before we do, increment the byte and
+	 * packet counters associated with this interface.
+	 */
+	if ((bifp->if_flags & IFF_MONITOR) != 0) {
+		m->m_pkthdr.rcvif  = bifp;
+		BRIDGE_BPF_MTAP_INPUT(sc, m);
+		(void) ifnet_stat_increment_in(bifp, 1, m->m_pkthdr.len, 0);
+		m_freem(m);
+		return EJUSTRETURN;
+	}
+#endif /* IFF_MONITOR */
+
+	/* 
+	 * Need to clear the promiscous flags otherwise it will be 
+	 * dropped by DLIL after processing filters 
+	 */
+	if ((mbuf_flags(m) & MBUF_PROMISC))
+		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
+	
+	BRIDGE_LOCK(sc);
+	bif = bridge_lookup_member_if(sc, ifp);
+	if (bif == NULL) {
+		BRIDGE_UNLOCK(sc);
+#if BRIDGE_DEBUG
+		if (_if_brige_debug)
+			printf( "bridge_input: %s%d bridge_lookup_member_if failed\n",
+				   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+#endif /* BRIDGE_DEBUG */
+		return 0;
+	}
+
+	eh = mtod(m, struct ether_header *);
+
+	bridge_span(sc, m);
+
+	if (m->m_flags & (M_BCAST|M_MCAST)) {
+
+#if BRIDGE_DEBUG
+		if (_if_brige_debug)
+			if ((m->m_flags & M_MCAST))
+				printf("mulicast: %02x:%02x:%02x:%02x:%02x:%02x\n",
+				   eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], 
+				   eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5]);
+		
+#endif /* BRIDGE_DEBUG */
+
+		/* Tap off 802.1D packets; they do not get forwarded. */
+		if (memcmp(eh->ether_dhost, bstp_etheraddr,
+		    ETHER_ADDR_LEN) == 0) {
+			m = bstp_input(&bif->bif_stp, ifp, m);
+			if (m == NULL) {
+				BRIDGE_UNLOCK(sc);
+				return EJUSTRETURN;
+			}
+		}
+
+		if ((bif->bif_flags & IFBIF_STP) &&
+		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+			BRIDGE_UNLOCK(sc);
+			return 0;
+		}
+
+		/*
+		 * Make a deep copy of the packet and enqueue the copy
+		 * for bridge processing; return the original packet for
+		 * local processing.
+		 */
+		mc = m_dup(m, M_DONTWAIT);
+		if (mc == NULL) {
+			BRIDGE_UNLOCK(sc);
+			return 0;
+		}
+
+		/* 
+		 * Perform the bridge forwarding function with the copy. 
+		 *
+		 * Note that bridge_forward calls BRIDGE_UNLOCK
+		 */
+		bridge_forward(sc, bif, mc);
+		
+		/*
+		 * Reinject the mbuf as arriving on the bridge so we have a
+		 * chance at claiming multicast packets. We can not loop back
+		 * here from ether_input as a bridge is never a member of a
+		 * bridge.
+		 */
+		KASSERT(bifp->if_bridge == NULL,
+		    ("loop created in bridge_input"));
+		mc2 = m_dup(m, M_DONTWAIT);
+		if (mc2 != NULL) {
+			/* Keep the layer3 header aligned */
+			int i = min(mc2->m_pkthdr.len, max_protohdr);
+			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
+		}
+		if (mc2 != NULL) {
+			// mark packet as arriving on the bridge
+			mc2->m_pkthdr.rcvif = bifp;
+			mc2->m_pkthdr.header = mbuf_data(mc2);
+			
+#if NBPFILTER > 0
+			if (sc->sc_bpf_input)
+				bridge_bpf_input(bifp, mc2);
+#endif /* NBPFILTER */
+			(void) mbuf_setdata(mc2, (char *)mbuf_data(mc2) + ETHER_HDR_LEN, mbuf_len(mc2) - ETHER_HDR_LEN);
+			(void) mbuf_pkthdr_adjustlen(mc2, - ETHER_HDR_LEN);
+			
+			(void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(mc2), 0);
+						
+#if BRIDGE_DEBUG
+			if (_if_brige_debug)
+				printf( "bridge_input: %s%d mcast for us\n",
+					   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+#endif /* BRIDGE_DEBUG */
+			
+			dlil_input_packet_list(bifp, mc2);
+		}
+
+		/* Return the original packet for local processing. */
+		return 0;
+	}
+
+	if ((bif->bif_flags & IFBIF_STP) &&
+	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
+		BRIDGE_UNLOCK(sc);
+		return 0;
+	}
+
+#ifdef DEV_CARP
+#   define OR_CARP_CHECK_WE_ARE_DST(iface) \
+	|| ((iface)->if_carp \
+	    && carp_forus((iface)->if_carp, eh->ether_dhost))
+#   define OR_CARP_CHECK_WE_ARE_SRC(iface) \
+	|| ((iface)->if_carp \
+	    && carp_forus((iface)->if_carp, eh->ether_shost))
+#else
+#   define OR_CARP_CHECK_WE_ARE_DST(iface)
+#   define OR_CARP_CHECK_WE_ARE_SRC(iface)
+#endif
+
+#ifdef INET6
+#   define OR_PFIL_HOOKED_INET6 \
+	|| PFIL_HOOKED(&inet6_pfil_hook)
+#else
+#   define OR_PFIL_HOOKED_INET6
+#endif
+
+#if defined(PFIL_HOOKS)
+#define PFIL_PHYS(sc, ifp, m) do { \
+		if (pfil_local_phys &&				\
+			(PFIL_HOOKED(&inet_pfil_hook)		\
+			 OR_PFIL_HOOKED_INET6)) {			\
+			if (bridge_pfil(&m, NULL, ifp,		\
+				PFIL_IN) != 0 || m == NULL) {	\
+				BRIDGE_UNLOCK(sc);		\
+				return (NULL);			\
+			}					\
+		} \
+	} while (0)
+#else /* PFIL_HOOKS */
+#define	PFIL_PHYS(sc, ifp, m)
+#endif /* PFIL_HOOKS */
+
+#define GRAB_OUR_PACKETS(iface) \
+	if ((iface)->if_type == IFT_GIF) \
+		continue; \
+	/* It is destined for us. */ \
+	if (memcmp(ifnet_lladdr((iface)), eh->ether_dhost,  ETHER_ADDR_LEN) == 0 \
+	    OR_CARP_CHECK_WE_ARE_DST((iface))				\
+	    ) {								\
+		if ((iface)->if_type == IFT_BRIDGE) {			\
+			BRIDGE_BPF_MTAP_INPUT(sc, m);			\
+			/* Filter on the physical interface. */		\
+			PFIL_PHYS(sc, iface, m);						\
+		}							\
+		if (bif->bif_flags & IFBIF_LEARNING) {			\
+			error = bridge_rtupdate(sc, eh->ether_shost,	\
+			    vlan, bif, 0, IFBAF_DYNAMIC);		\
+			if (error && bif->bif_addrmax) {		\
+				BRIDGE_UNLOCK(sc);			\
+				return EJUSTRETURN;				\
+			}						\
+		}							\
+		m->m_pkthdr.rcvif = iface;				\
+		BRIDGE_UNLOCK(sc);					\
+		return 0;						\
+	}								\
+									\
+	/* We just received a packet that we sent out. */		\
+	if (memcmp(ifnet_lladdr((iface)), eh->ether_shost, ETHER_ADDR_LEN) == 0 \
+	    OR_CARP_CHECK_WE_ARE_SRC((iface))			\
+	    ) {								\
+		BRIDGE_UNLOCK(sc);					\
+		return EJUSTRETURN;						\
+	}
+
+	/*
+	 * Unicast.
+	 */
+	/*
+	 * If the packet is for us, set the packets source as the
+	 * bridge, and return the packet back to ether_input for
+	 * local processing.
+	 */
+	if (memcmp(eh->ether_dhost, ifnet_lladdr(bifp),
+			   ETHER_ADDR_LEN) == 0
+		OR_CARP_CHECK_WE_ARE_DST(bifp)) {
+		
+		/* Mark the packet as arriving on the bridge interface */
+		(void) mbuf_pkthdr_setrcvif(m, bifp);
+		mbuf_pkthdr_setheader(m, frame_header);
+		
+		/*
+		 * If the interface is learning, and the source
+		 * address is valid and not multicast, record
+		 * the address.
+		 */
+		if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
+			ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
+			(eh->ether_shost[0] | eh->ether_shost[1] |
+			 eh->ether_shost[2] | eh->ether_shost[3] |
+			 eh->ether_shost[4] | eh->ether_shost[5]) != 0) {
+				(void) bridge_rtupdate(sc, eh->ether_shost,
+									   vlan, bif, 0, IFBAF_DYNAMIC);
+			}
+		
+		BRIDGE_BPF_MTAP_INPUT(sc, m);
+
+		(void) mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN, mbuf_len(m) - ETHER_HDR_LEN);
+		(void) mbuf_pkthdr_adjustlen(m, - ETHER_HDR_LEN);
+		
+		(void) ifnet_stat_increment_in(bifp, 1, mbuf_pkthdr_len(m), 0);
+
+		BRIDGE_UNLOCK(sc);
+				
+#if BRIDGE_DEBUG
+		if (_if_brige_debug)
+			printf( "bridge_input: %s%d packet for bridge\n",
+				   ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp));
+#endif /* BRIDGE_DEBUG */
+		
+		dlil_input_packet_list(bifp, m);
+		
+		return EJUSTRETURN;
+	}
+
+	/*
+	 * if the destination of the packet is for the MAC address of 
+	 * the member interface itself, then we don't need to forward
+	 * it -- just pass it back.  Note that it'll likely just be
+	 * dropped by the stack, but if something else is bound to 
+	 * the interface directly (for example, the wireless stats
+	 * protocol -- although that actually uses BPF right now), 
+	 * then it will consume the packet
+	 *
+	 * ALSO, note that we do this check AFTER checking for the 
+	 * bridge's own MAC address, because the bridge may be
+	 * using the SAME MAC address as one of its interfaces
+	 */
+	if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp),
+			   ETHER_ADDR_LEN) == 0) {
+			
+#ifdef VERY_VERY_VERY_DIAGNOSTIC
+			printf("bridge_input: not forwarding packet bound for member interface\n" );
+#endif
+			BRIDGE_UNLOCK(sc);
+			return 0;
+	}
+
+	/* Now check the all bridge members. */
+	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
+		GRAB_OUR_PACKETS(bif2->bif_ifp)
+	}
+
+#undef OR_CARP_CHECK_WE_ARE_DST
+#undef OR_CARP_CHECK_WE_ARE_SRC
+#undef OR_PFIL_HOOKED_INET6
+#undef GRAB_OUR_PACKETS
+
+	/* 
+	 * Perform the bridge forwarding function. 
+	 *
+	 * Note that bridge_forward calls BRIDGE_UNLOCK
+	 */
+	bridge_forward(sc, bif, m);
+
+	return EJUSTRETURN;
+}
+
+/*
+ * bridge_broadcast:
+ *
+ *	Send a frame to all interfaces that are members of
+ *	the bridge, except for the one on which the packet
+ *	arrived.
+ *
+ *	NOTE: Releases the lock on return.
+ */
+static void
+bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
+    struct mbuf *m, int runfilt)
+{
+#ifndef PFIL_HOOKS
+#pragma unused(runfilt)
+#endif
+	struct bridge_iflist *dbif, *sbif;
+	struct mbuf *mc;
+	struct ifnet *dst_if;
+	int error = 0, used = 0;
+
+	sbif = bridge_lookup_member_if(sc, src_if);
+
+	BRIDGE_LOCK2REF(sc, error);
+	if (error) {
+		m_freem(m);
+		return;
+	}
+
+#ifdef PFIL_HOOKS
+	/* Filter on the bridge interface before broadcasting */
+	if (runfilt && (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+	    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif /* INET6 */
+	    )) {
+		if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0)
+			goto out;
+		if (m == NULL)
+			goto out;
+	}
+#endif /* PFIL_HOOKS */
+
+	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
+		dst_if = dbif->bif_ifp;
+		if (dst_if == src_if)
+			continue;
+
+		/* Private segments can not talk to each other */
+		if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE))
+			continue;
+
+		if ((dbif->bif_flags & IFBIF_STP) &&
+		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
+			continue;
+
+		if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 &&
+		    (m->m_flags & (M_BCAST|M_MCAST)) == 0)
+			continue;
+
+		if ((dst_if->if_flags & IFF_RUNNING) == 0)
+			continue;
+
+		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
+			mc = m;
+			used = 1;
+		} else {
+			mc = m_dup(m, M_DONTWAIT);
+			if (mc == NULL) {
+				(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+				continue;
+			}
+		}
+
+#ifdef PFIL_HOOKS
+		/*
+		 * Filter on the output interface. Pass a NULL bridge interface
+		 * pointer so we do not redundantly filter on the bridge for
+		 * each interface we broadcast on.
+		 */
+		if (runfilt && (PFIL_HOOKED(&inet_pfil_hook)
+#ifdef INET6
+		    || PFIL_HOOKED(&inet6_pfil_hook)
+#endif
+		    )) {
+			if (used == 0) {
+				/* Keep the layer3 header aligned */
+				int i = min(mc->m_pkthdr.len, max_protohdr);
+				mc = m_copyup(mc, i, ETHER_ALIGN);
+				if (mc == NULL) {
+					(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+					continue;
+				}
+			}
+			if (bridge_pfil(&mc, NULL, dst_if, PFIL_OUT) != 0)
+				continue;
+			if (mc == NULL)
+				continue;
+		}
+#endif /* PFIL_HOOKS */
+
+		bridge_enqueue(sc, dst_if, mc);
+	}
+	if (used == 0)
+		m_freem(m);
+
+#ifdef PFIL_HOOKS
+out:
+#endif /* PFIL_HOOKS */
+
+	BRIDGE_UNREF(sc);
+}
+
+/*
+ * bridge_span:
+ *
+ *	Duplicate a packet out one or more interfaces that are in span mode,
+ *	the original mbuf is unmodified.
+ */
+static void
+bridge_span(struct bridge_softc *sc, struct mbuf *m)
+{
+	struct bridge_iflist *bif;
+	struct ifnet *dst_if;
+	struct mbuf *mc;
+
+	if (TAILQ_EMPTY(&sc->sc_spanlist))
+		return;
+
+	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
+		dst_if = bif->bif_ifp;
+
+		if ((dst_if->if_flags & IFF_RUNNING) == 0)
+			continue;
+
+		mc = m_copypacket(m, M_DONTWAIT);
+		if (mc == NULL) {
+			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
+			continue;
+		}
+
+		bridge_enqueue(sc, dst_if, mc);
+	}
+}
+
+
+
+/*
+ * bridge_rtupdate:
+ *
+ *	Add a bridge routing entry.
+ */
+static int
+bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
+    struct bridge_iflist *bif, int setflags, uint8_t flags)
+{
+	struct bridge_rtnode *brt;
+	int error;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	/* Check the source address is valid and not multicast. */
+	if (ETHER_IS_MULTICAST(dst) ||
+	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
+	     dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
+		return (EINVAL);
+
+
+	/* 802.1p frames map to vlan 1 */
+	if (vlan == 0)
+		vlan = 1;
+
+	/*
+	 * A route for this destination might already exist.  If so,
+	 * update it, otherwise create a new one.
+	 */
+	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
+		if (sc->sc_brtcnt >= sc->sc_brtmax) {
+			sc->sc_brtexceeded++;
+			return (ENOSPC);
+		}
+		/* Check per interface address limits (if enabled) */
+		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
+			bif->bif_addrexceeded++;
+			return (ENOSPC);
+		}
+
+		/*
+		 * Allocate a new bridge forwarding node, and
+		 * initialize the expiration time and Ethernet
+		 * address.
+		 */
+		brt = zalloc_noblock(bridge_rtnode_pool);
+		if (brt == NULL)
+			return (ENOMEM);
+
+		if (bif->bif_flags & IFBIF_STICKY)
+			brt->brt_flags = IFBAF_STICKY;
+		else
+			brt->brt_flags = IFBAF_DYNAMIC;
+
+		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
+		brt->brt_vlan = vlan;
+
+
+		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
+			zfree(bridge_rtnode_pool, brt);
+			return (error);
+		}
+		brt->brt_dst = bif;
+		bif->bif_addrcnt++;
+	}
+
+	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
+	    brt->brt_dst != bif) {
+		brt->brt_dst->bif_addrcnt--;
+		brt->brt_dst = bif;
+		brt->brt_dst->bif_addrcnt++;
+	}
+
+	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+		struct timespec now;
+	
+		nanouptime(&now);
+		brt->brt_expire = now.tv_sec + sc->sc_brttimeout;
+	}
+	if (setflags)
+		brt->brt_flags = flags;
+
+	
+	return (0);
+}
+
+/*
+ * bridge_rtlookup:
+ *
+ *	Lookup the destination interface for an address.
+ */
+static struct ifnet *
+bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+	struct bridge_rtnode *brt;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL)
+		return (NULL);
+
+	return (brt->brt_ifp);
+}
+
+/*
+ * bridge_rttrim:
+ *
+ *	Trim the routine table so that we have a number
+ *	of routing entries less than or equal to the
+ *	maximum number.
+ */
+static void
+bridge_rttrim(struct bridge_softc *sc)
+{
+	struct bridge_rtnode *brt, *nbrt;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	/* Make sure we actually need to do this. */
+	if (sc->sc_brtcnt <= sc->sc_brtmax)
+		return;
+
+	/* Force an aging cycle; this might trim enough addresses. */
+	bridge_rtage(sc);
+	if (sc->sc_brtcnt <= sc->sc_brtmax)
+		return;
+
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+			bridge_rtnode_destroy(sc, brt);
+			if (sc->sc_brtcnt <= sc->sc_brtmax)
+				return;
+		}
+	}
+}
+
+/*
+ * bridge_timer:
+ *
+ *	Aging timer for the bridge.
+ */
+static void
+bridge_timer(void *arg)
+{
+	struct bridge_softc *sc = arg;
+
+	BRIDGE_LOCK(sc);
+
+	bridge_rtage(sc);
+
+	BRIDGE_UNLOCK(sc);
+
+	if (sc->sc_ifp->if_flags & IFF_RUNNING) {
+		struct timespec ts;
+	
+		ts.tv_sec = bridge_rtable_prune_period;
+		ts.tv_nsec = 0;
+		bsd_timeout(bridge_timer, sc, &ts);
+	}
+}
+
+/*
+ * bridge_rtage:
+ *
+ *	Perform an aging cycle.
+ */
+static void
+bridge_rtage(struct bridge_softc *sc)
+{
+	struct bridge_rtnode *brt, *nbrt;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
+			struct timespec now;
+	
+			nanouptime(&now);
+			if ((unsigned long)now.tv_sec >= brt->brt_expire)
+				bridge_rtnode_destroy(sc, brt);
+		}
+	}
+}
+
+/*
+ * bridge_rtflush:
+ *
+ *	Remove all dynamic addresses from the bridge.
+ */
+static void
+bridge_rtflush(struct bridge_softc *sc, int full)
+{
+	struct bridge_rtnode *brt, *nbrt;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+			bridge_rtnode_destroy(sc, brt);
+	}
+}
+
+/*
+ * bridge_rtdaddr:
+ *
+ *	Remove an address from the table.
+ */
+static int
+bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+	struct bridge_rtnode *brt;
+	int found = 0;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	/*
+	 * If vlan is zero then we want to delete for all vlans so the lookup
+	 * may return more than one.
+	 */
+	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
+		bridge_rtnode_destroy(sc, brt);
+		found = 1;
+	}
+
+	return (found ? 0 : ENOENT);
+}
+
+/*
+ * bridge_rtdelete:
+ *
+ *	Delete routes to a speicifc member interface.
+ */
+static void
+bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
+{
+	struct bridge_rtnode *brt, *nbrt;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
+		if (brt->brt_ifp == ifp && (full ||
+			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC))
+			bridge_rtnode_destroy(sc, brt);
+	}
+}
+
+/*
+ * bridge_rtable_init:
+ *
+ *	Initialize the route table for this bridge.
+ */
+static int
+bridge_rtable_init(struct bridge_softc *sc)
+{
+	int i;
+
+	sc->sc_rthash = _MALLOC(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
+	    M_DEVBUF, M_NOWAIT);
+	if (sc->sc_rthash == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
+		LIST_INIT(&sc->sc_rthash[i]);
+
+	sc->sc_rthash_key = random();
+
+	LIST_INIT(&sc->sc_rtlist);
+
+	return (0);
+}
+
+/*
+ * bridge_rtable_fini:
+ *
+ *	Deconstruct the route table for this bridge.
+ */
+static void
+bridge_rtable_fini(struct bridge_softc *sc)
+{
+
+	KASSERT(sc->sc_brtcnt == 0,
+	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
+	_FREE(sc->sc_rthash, M_DEVBUF);
+}
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define	mix(a, b, c)							\
+do {									\
+	a -= b; a -= c; a ^= (c >> 13);					\
+	b -= c; b -= a; b ^= (a << 8);					\
+	c -= a; c -= b; c ^= (b >> 13);					\
+	a -= b; a -= c; a ^= (c >> 12);					\
+	b -= c; b -= a; b ^= (a << 16);					\
+	c -= a; c -= b; c ^= (b >> 5);					\
+	a -= b; a -= c; a ^= (c >> 3);					\
+	b -= c; b -= a; b ^= (a << 10);					\
+	c -= a; c -= b; c ^= (b >> 15);					\
+} while (/*CONSTCOND*/0)
+
+static __inline uint32_t
+bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
+{
+	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
+
+	b += addr[5] << 8;
+	b += addr[4];
+	a += addr[3] << 24;
+	a += addr[2] << 16;
+	a += addr[1] << 8;
+	a += addr[0];
+
+	mix(a, b, c);
+
+	return (c & BRIDGE_RTHASH_MASK);
+}
+
+#undef mix
+
+static int
+bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+	int i, d;
+
+	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
+		d = ((int)a[i]) - ((int)b[i]);
+	}
+
+	return (d);
+}
+
+/*
+ * bridge_rtnode_lookup:
+ *
+ *	Look up a bridge route node for the specified destination. Compare the
+ *	vlan id or if zero then just return the first match.
+ */
+static struct bridge_rtnode *
+bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
+{
+	struct bridge_rtnode *brt;
+	uint32_t hash;
+	int dir;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	hash = bridge_rthash(sc, addr);
+	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
+		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
+		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0))
+			return (brt);
+		if (dir > 0)
+			return (NULL);
+	}
+
+	return (NULL);
+}
+
+/*
+ * bridge_rtnode_insert:
+ *
+ *	Insert the specified bridge node into the route table.  We
+ *	assume the entry is not already in the table.
+ */
+static int
+bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+	struct bridge_rtnode *lbrt;
+	uint32_t hash;
+	int dir;
+
+	BRIDGE_LOCK_ASSERT(sc);
+
+	hash = bridge_rthash(sc, brt->brt_addr);
+
+	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
+	if (lbrt == NULL) {
+		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
+		goto out;
+	}
+
+	do {
+		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
+		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
+			return (EEXIST);
+		if (dir > 0) {
+			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
+			goto out;
+		}
+		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
+			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
+			goto out;
+		}
+		lbrt = LIST_NEXT(lbrt, brt_hash);
+	} while (lbrt != NULL);
+
+#ifdef DIAGNOSTIC
+	panic("bridge_rtnode_insert: impossible");
+#endif
+
+out:
+	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
+	sc->sc_brtcnt++;
+
+	return (0);
+}
+
+/*
+ * bridge_rtnode_destroy:
+ *
+ *	Destroy a bridge rtnode.
+ */
+static void
+bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
+{
+	BRIDGE_LOCK_ASSERT(sc);
+
+	LIST_REMOVE(brt, brt_hash);
+
+	LIST_REMOVE(brt, brt_list);
+	sc->sc_brtcnt--;
+	brt->brt_dst->bif_addrcnt--;
+	zfree(bridge_rtnode_pool, brt);
+}
+
+/*
+ * bridge_rtable_expire:
+ *
+ *	Set the expiry time for all routes on an interface.
+ */
+static void
+bridge_rtable_expire(struct ifnet *ifp, int age)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	struct bridge_rtnode *brt;
+
+	BRIDGE_LOCK(sc);
+
+	/*
+	 * If the age is zero then flush, otherwise set all the expiry times to
+	 * age for the interface
+	 */
+	if (age == 0)
+		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
+	else {
+		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
+			struct timespec now;
+	
+			nanouptime(&now);
+			/* Cap the expiry time to 'age' */
+			if (brt->brt_ifp == ifp &&
+			    brt->brt_expire > (unsigned long)now.tv_sec + age &&
+			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
+				brt->brt_expire = (unsigned long)now.tv_sec + age;
+		}
+	}
+	BRIDGE_UNLOCK(sc);
+}
+
+/*
+ * bridge_state_change:
+ *
+ *	Callback from the bridgestp code when a port changes states.
+ */
+static void
+bridge_state_change(struct ifnet *ifp, int state)
+{
+	struct bridge_softc *sc = ifp->if_bridge;
+	static const char *stpstates[] = {
+		"disabled",
+		"listening",
+		"learning",
+		"forwarding",
+		"blocking",
+		"discarding"
+	};
+
+	if (log_stp)
+		log(LOG_NOTICE, "%s%d: state changed to %s on %s%d\n",
+			ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp),
+		    stpstates[state], 
+		    ifnet_name(ifp), ifnet_unit(ifp));
+}
+
+#ifdef PFIL_HOOKS
+/*
+ * Send bridge packets through pfil if they are one of the types pfil can deal
+ * with, or if they are ARP or REVARP.  (pfil will pass ARP and REVARP without
+ * question.) If *bifp or *ifp are NULL then packet filtering is skipped for
+ * that interface.
+ */
+static int
+bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir)
+{
+	int snap, error, i, hlen;
+	struct ether_header *eh1, eh2;
+	struct ip_fw_args args;
+	struct ip *ip;
+	struct llc llc1;
+	u_int16_t ether_type;
+
+	snap = 0;
+	error = -1;	/* Default error if not error == 0 */
+
+#if 0
+	/* we may return with the IP fields swapped, ensure its not shared */
+	KASSERT(M_WRITABLE(*mp), ("%s: modifying a shared mbuf", __func__));
+#endif
+
+	if (pfil_bridge == 0 && pfil_member == 0 && pfil_ipfw == 0)
+		return (0); /* filtering is disabled */
+
+	i = min((*mp)->m_pkthdr.len, max_protohdr);
+	if ((*mp)->m_len < i) {
+	    *mp = m_pullup(*mp, i);
+	    if (*mp == NULL) {
+		printf("%s: m_pullup failed\n", __func__);
+		return (-1);
+	    }
+	}
+
+	eh1 = mtod(*mp, struct ether_header *);
+	ether_type = ntohs(eh1->ether_type);
+
+	/*
+	 * Check for SNAP/LLC.
+	 */
+	if (ether_type < ETHERMTU) {
+		struct llc *llc2 = (struct llc *)(eh1 + 1);
+
+		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
+		    llc2->llc_dsap == LLC_SNAP_LSAP &&
+		    llc2->llc_ssap == LLC_SNAP_LSAP &&
+		    llc2->llc_control == LLC_UI) {
+			ether_type = htons(llc2->llc_un.type_snap.ether_type);
+			snap = 1;
+		}
+	}
+
+	/*
+	 * If we're trying to filter bridge traffic, don't look at anything
+	 * other than IP and ARP traffic.  If the filter doesn't understand
+	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
+	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
+	 * but of course we don't have an AppleTalk filter to begin with.
+	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
+	 * ARP traffic.)
+	 */
+	switch (ether_type) {
+		case ETHERTYPE_ARP:
+		case ETHERTYPE_REVARP:
+			if (pfil_ipfw_arp == 0)
+				return (0); /* Automatically pass */
+			break;
+
+		case ETHERTYPE_IP:
+#ifdef INET6
+		case ETHERTYPE_IPV6:
+#endif /* INET6 */
+			break;
+		default:
+			/*
+			 * Check to see if the user wants to pass non-ip
+			 * packets, these will not be checked by pfil(9) and
+			 * passed unconditionally so the default is to drop.
+			 */
+			if (pfil_onlyip)
+				goto bad;
+	}
+
+	/* Strip off the Ethernet header and keep a copy. */
+	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t) &eh2);
+	m_adj(*mp, ETHER_HDR_LEN);
+
+	/* Strip off snap header, if present */
+	if (snap) {
+		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t) &llc1);
+		m_adj(*mp, sizeof(struct llc));
+	}
+
+	/*
+	 * Check the IP header for alignment and errors
+	 */
+	if (dir == PFIL_IN) {
+		switch (ether_type) {
+			case ETHERTYPE_IP:
+				error = bridge_ip_checkbasic(mp);
+				break;
+#ifdef INET6
+			case ETHERTYPE_IPV6:
+				error = bridge_ip6_checkbasic(mp);
+				break;
+#endif /* INET6 */
+			default:
+				error = 0;
+		}
+		if (error)
+			goto bad;
+	}
+
+	if (IPFW_LOADED && pfil_ipfw != 0 && dir == PFIL_OUT && ifp != NULL) {
+		error = -1;
+		args.rule = ip_dn_claim_rule(*mp);
+		if (args.rule != NULL && fw_one_pass)
+			goto ipfwpass; /* packet already partially processed */
+
+		args.m = *mp;
+		args.oif = ifp;
+		args.next_hop = NULL;
+		args.eh = &eh2;
+		args.inp = NULL;	/* used by ipfw uid/gid/jail rules */
+		i = ip_fw_chk_ptr(&args);
+		*mp = args.m;
+
+		if (*mp == NULL)
+			return (error);
+
+		if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
+
+			/* put the Ethernet header back on */
+			M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+			if (*mp == NULL)
+				return (error);
+			bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+			/*
+			 * Pass the pkt to dummynet, which consumes it. The
+			 * packet will return to us via bridge_dummynet().
+			 */
+			args.oif = ifp;
+			ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args);
+			return (error);
+		}
+
+		if (i != IP_FW_PASS) /* drop */
+			goto bad;
+	}
+
+ipfwpass:
+	error = 0;
+
+	/*
+	 * Run the packet through pfil
+	 */
+	switch (ether_type) {
+	case ETHERTYPE_IP:
+		/*
+		 * before calling the firewall, swap fields the same as
+		 * IP does. here we assume the header is contiguous
+		 */
+		ip = mtod(*mp, struct ip *);
+
+		ip->ip_len = ntohs(ip->ip_len);
+		ip->ip_off = ntohs(ip->ip_off);
+
+		/*
+		 * Run pfil on the member interface and the bridge, both can
+		 * be skipped by clearing pfil_member or pfil_bridge.
+		 *
+		 * Keep the order:
+		 *   in_if -> bridge_if -> out_if
+		 */
+		if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+			error = pfil_run_hooks(&inet_pfil_hook, mp, bifp,
+					dir, NULL);
+
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		if (pfil_member && ifp != NULL)
+			error = pfil_run_hooks(&inet_pfil_hook, mp, ifp,
+					dir, NULL);
+
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+			error = pfil_run_hooks(&inet_pfil_hook, mp, bifp,
+					dir, NULL);
+
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		/* check if we need to fragment the packet */
+		if (pfil_member && ifp != NULL && dir == PFIL_OUT) {
+			i = (*mp)->m_pkthdr.len;
+			if (i > ifp->if_mtu) {
+				error = bridge_fragment(ifp, *mp, &eh2, snap,
+					    &llc1);
+				return (error);
+			}
+		}
+
+		/* Recalculate the ip checksum and restore byte ordering */
+		ip = mtod(*mp, struct ip *);
+		hlen = ip->ip_hl << 2;
+		if (hlen < sizeof(struct ip))
+			goto bad;
+		if (hlen > (*mp)->m_len) {
+			if ((*mp = m_pullup(*mp, hlen)) == 0)
+				goto bad;
+			ip = mtod(*mp, struct ip *);
+			if (ip == NULL)
+				goto bad;
+		}
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		ip->ip_sum = 0;
+		if (hlen == sizeof(struct ip))
+			ip->ip_sum = in_cksum_hdr(ip);
+		else
+			ip->ip_sum = in_cksum(*mp, hlen);
+
+		break;
+#ifdef INET6
+	case ETHERTYPE_IPV6:
+		if (pfil_bridge && dir == PFIL_OUT && bifp != NULL)
+			error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
+					dir, NULL);
+
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		if (pfil_member && ifp != NULL)
+			error = pfil_run_hooks(&inet6_pfil_hook, mp, ifp,
+					dir, NULL);
+
+		if (*mp == NULL || error != 0) /* filter may consume */
+			break;
+
+		if (pfil_bridge && dir == PFIL_IN && bifp != NULL)
+			error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp,
+					dir, NULL);
+		break;
+#endif
+	default:
+		error = 0;
+		break;
+	}
+
+	if (*mp == NULL)
+		return (error);
+	if (error != 0)
+		goto bad;
+
+	error = -1;
+
+	/*
+	 * Finally, put everything back the way it was and return
+	 */
+	if (snap) {
+		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
+		if (*mp == NULL)
+			return (error);
+		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
+	}
+
+	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
+	if (*mp == NULL)
+		return (error);
+	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
+
+	return (0);
+
+bad:
+	m_freem(*mp);
+	*mp = NULL;
+	return (error);
+}
+
+
+/*
+ * Perform basic checks on header size since
+ * pfil assumes ip_input has already processed
+ * it for it.  Cut-and-pasted from ip_input.c.
+ * Given how simple the IPv6 version is,
+ * does the IPv4 version really need to be
+ * this complicated?
+ *
+ * XXX Should we update ipstat here, or not?
+ * XXX Right now we update ipstat but not
+ * XXX csum_counter.
+ */
+static int
+bridge_ip_checkbasic(struct mbuf **mp)
+{
+	struct mbuf *m = *mp;
+	struct ip *ip;
+	int len, hlen;
+	u_short sum;
+
+	if (*mp == NULL)
+		return (-1);
+
+	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
+		if ((m = m_copyup(m, sizeof(struct ip),
+			(max_linkhdr + 3) & ~3)) == NULL) {
+			/* XXXJRT new stat, please */
+			ipstat.ips_toosmall++;
+			goto bad;
+		}
+	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
+		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
+			ipstat.ips_toosmall++;
+			goto bad;
+		}
+	}
+	ip = mtod(m, struct ip *);
+	if (ip == NULL) goto bad;
+
+	if (ip->ip_v != IPVERSION) {
+		ipstat.ips_badvers++;
+		goto bad;
+	}
+	hlen = ip->ip_hl << 2;
+	if (hlen < sizeof(struct ip)) { /* minimum header length */
+		ipstat.ips_badhlen++;
+		goto bad;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == 0) {
+			ipstat.ips_badhlen++;
+			goto bad;
+		}
+		ip = mtod(m, struct ip *);
+		if (ip == NULL) goto bad;
+	}
+
+	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
+		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+	} else {
+		if (hlen == sizeof(struct ip)) {
+			sum = in_cksum_hdr(ip);
+		} else {
+			sum = in_cksum(m, hlen);
+		}
+	}
+	if (sum) {
+		ipstat.ips_badsum++;
+		goto bad;
+	}
+
+	/* Retrieve the packet length. */
+	len = ntohs(ip->ip_len);
+
+	/*
+	 * Check for additional length bogosity
+	 */
+	if (len < hlen) {
+		ipstat.ips_badlen++;
+		goto bad;
+	}
+
+	/*
+	 * Check that the amount of data in the buffers
+	 * is as at least much as the IP header would have us expect.
+	 * Drop packet if shorter than we expect.
+	 */
+	if (m->m_pkthdr.len < len) {
+		ipstat.ips_tooshort++;
+		goto bad;
+	}
+
+	/* Checks out, proceed */
+	*mp = m;
+	return (0);
+
+bad:
+	*mp = m;
+	return (-1);
+}
+
+#ifdef INET6
+/*
+ * Same as above, but for IPv6.
+ * Cut-and-pasted from ip6_input.c.
+ * XXX Should we update ip6stat, or not?
+ */
+static int
+bridge_ip6_checkbasic(struct mbuf **mp)
+{
+	struct mbuf *m = *mp;
+	struct ip6_hdr *ip6;
+
+	/*
+	 * If the IPv6 header is not aligned, slurp it up into a new
+	 * mbuf with space for link headers, in the event we forward
+	 * it.  Otherwise, if it is aligned, make sure the entire base
+	 * IPv6 header is in the first mbuf of the chain.
+	 */
+	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
+		struct ifnet *inifp = m->m_pkthdr.rcvif;
+		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
+			    (max_linkhdr + 3) & ~3)) == NULL) {
+			/* XXXJRT new stat, please */
+			ip6stat.ip6s_toosmall++;
+			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
+			goto bad;
+		}
+	} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
+		struct ifnet *inifp = m->m_pkthdr.rcvif;
+		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
+			ip6stat.ip6s_toosmall++;
+			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
+			goto bad;
+		}
+	}
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
+		ip6stat.ip6s_badvers++;
+		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
+		goto bad;
+	}
+
+	/* Checks out, proceed */
+	*mp = m;
+	return (0);
+
+bad:
+	*mp = m;
+	return (-1);
+}
+#endif /* INET6 */
+
+/*
+ * bridge_fragment:
+ *
+ *	Return a fragmented mbuf chain.
+ */
+static int
+bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh,
+    int snap, struct llc *llc)
+{
+	struct mbuf *m0;
+	struct ip *ip;
+	int error = -1;
+
+	if (m->m_len < sizeof(struct ip) &&
+	    (m = m_pullup(m, sizeof(struct ip))) == NULL)
+		goto out;
+	ip = mtod(m, struct ip *);
+
+	error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist,
+		    CSUM_DELAY_IP);
+	if (error)
+		goto out;
+
+	/* walk the chain and re-add the Ethernet header */
+	for (m0 = m; m0; m0 = m0->m_nextpkt) {
+		if (error == 0) {
+			if (snap) {
+				M_PREPEND(m0, sizeof(struct llc), M_DONTWAIT);
+				if (m0 == NULL) {
+					error = ENOBUFS;
+					continue;
+				}
+				bcopy(llc, mtod(m0, caddr_t),
+				    sizeof(struct llc));
+			}
+			M_PREPEND(m0, ETHER_HDR_LEN, M_DONTWAIT);
+			if (m0 == NULL) {
+				error = ENOBUFS;
+				continue;
+			}
+			bcopy(eh, mtod(m0, caddr_t), ETHER_HDR_LEN);
+		} else 
+			m_freem(m);
+	}
+
+	if (error == 0)
+		ipstat.ips_fragmented++;
+
+	return (error);
+
+out:
+	if (m != NULL)
+		m_freem(m);
+	return (error);
+}
+#endif /* PFIL_HOOKS */
+
+static errno_t
+bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
+{
+	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+	
+	//printf("bridge_set_bpf_tap ifp %p mode %d\n", ifp, mode);
+	
+	/* TBD locking */
+	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
+		return ENODEV;
+	}
+	
+	switch (mode) {
+		case BPF_TAP_DISABLE:
+			sc->sc_bpf_input = sc->sc_bpf_output = NULL;
+			break;
+			
+		case BPF_TAP_INPUT:
+			sc->sc_bpf_input = bpf_callback;
+			break;
+			
+		case BPF_TAP_OUTPUT:
+			sc->sc_bpf_output = bpf_callback;
+			break;
+			
+		case BPF_TAP_INPUT_OUTPUT:
+			sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
+			break;
+			
+		default:
+			break;
+	}
+	
+	return 0;
+}
+
+static void
+bridge_detach(ifnet_t ifp)
+{
+	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+	
+	bstp_detach(&sc->sc_stp);
+
+	/* Tear down the routing table. */
+	bridge_rtable_fini(sc);
+	
+	lck_mtx_lock(bridge_list_mtx);
+	LIST_REMOVE(sc, sc_list);
+	lck_mtx_unlock(bridge_list_mtx);
+	
+	ifnet_release(ifp);
+	
+	lck_mtx_free(sc->sc_mtx, bridge_lock_grp);
+	
+	_FREE(sc, M_DEVBUF);
+	return;
+}
+
+__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m)
+{
+	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+	
+	if (sc->sc_bpf_input) {
+		if (mbuf_pkthdr_rcvif(m) != ifp)
+			printf("bridge_bpf_input rcvif: %p != ifp %p\n", mbuf_pkthdr_rcvif(m), ifp);
+		(*sc->sc_bpf_input)(ifp, m);
+	}
+	return 0;
+}
+
+__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
+{
+	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
+	
+	if (sc->sc_bpf_output) {
+		(*sc->sc_bpf_output)(ifp, m);
+	}
+	return 0;
+}
diff --git a/bsd/net/if_bridgevar.h b/bsd/net/if_bridgevar.h
new file mode 100644
index 000000000..3d1375aed
--- /dev/null
+++ b/bsd/net/if_bridgevar.h
@@ -0,0 +1,499 @@
+/*	$NetBSD: if_bridgevar.h,v 1.4 2003/07/08 07:13:50 itojun Exp $	*/
+/*
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Copyright 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Jason R. Thorpe for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed for the NetBSD Project by
+ *	Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Jason L. Wright
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * OpenBSD: if_bridge.h,v 1.14 2001/03/22 03:48:29 jason Exp
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Data structure and control definitions for bridge interfaces.
+ */
+
+#ifndef _NET_IF_BRIDGEVAR_H_
+#define _NET_IF_BRIDGEVAR_H_
+
+#ifdef PRIVATE
+
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/ethernet.h>
+
+/*
+ * Commands used in the SIOCSDRVSPEC ioctl.  Note the lookup of the
+ * bridge interface itself is keyed off the ifdrv structure.
+ */
+#define	BRDGADD			0	/* add bridge member (ifbreq) */
+#define	BRDGDEL			1	/* delete bridge member (ifbreq) */
+#define	BRDGGIFFLGS		2	/* get member if flags (ifbreq) */
+#define	BRDGSIFFLGS		3	/* set member if flags (ifbreq) */
+#define	BRDGSCACHE		4	/* set cache size (ifbrparam) */
+#define	BRDGGCACHE		5	/* get cache size (ifbrparam) */
+#define	BRDGGIFS		6	/* get member list (ifbifconf) */
+#define	BRDGRTS			7	/* get address list (ifbaconf) */
+#define	BRDGSADDR		8	/* set static address (ifbareq) */
+#define	BRDGSTO			9	/* set cache timeout (ifbrparam) */
+#define	BRDGGTO			10	/* get cache timeout (ifbrparam) */
+#define	BRDGDADDR		11	/* delete address (ifbareq) */
+#define	BRDGFLUSH		12	/* flush address cache (ifbreq) */
+
+#define	BRDGGPRI		13	/* get priority (ifbrparam) */
+#define	BRDGSPRI		14	/* set priority (ifbrparam) */
+#define	BRDGGHT			15	/* get hello time (ifbrparam) */
+#define	BRDGSHT			16	/* set hello time (ifbrparam) */
+#define	BRDGGFD			17	/* get forward delay (ifbrparam) */
+#define	BRDGSFD			18	/* set forward delay (ifbrparam) */
+#define	BRDGGMA			19	/* get max age (ifbrparam) */
+#define	BRDGSMA			20	/* set max age (ifbrparam) */
+#define	BRDGSIFPRIO		21	/* set if priority (ifbreq) */
+#define	BRDGSIFCOST		22	/* set if path cost (ifbreq) */
+#define BRDGGFILT		23	/* get filter flags (ifbrparam) */
+#define BRDGSFILT	    24	/* set filter flags (ifbrparam) */
+#define	BRDGPURGE		25	/* purge address cache for a particular interface (ifbreq) */
+#define	BRDGADDS		26	/* add bridge span member (ifbreq) */
+#define	BRDGDELS		27	/* delete bridge span member (ifbreq) */
+#define	BRDGPARAM		28	/* get bridge STP params (ifbropreq) */
+#define	BRDGGRTE		29	/* get cache drops (ifbrparam) */
+#define	BRDGGIFSSTP		30	/* get member STP params list (ifbpstpconf) */
+#define	BRDGSPROTO		31	/* set protocol (ifbrparam) */
+#define	BRDGSTXHC		32	/* set tx hold count (ifbrparam) */
+#define	BRDGSIFAMAX		33	/* set max interface addrs (ifbreq) */
+
+/*
+ * Generic bridge control request.
+ */
+#pragma pack(4)
+
+struct ifbreq {
+	char		ifbr_ifsname[IFNAMSIZ];	/* member if name */
+	uint32_t	ifbr_ifsflags;		/* member if flags */
+	uint32_t	ifbr_stpflags;		/* member if STP flags */
+	uint32_t	ifbr_path_cost;		/* member if STP cost */
+	uint8_t		ifbr_portno;		/* member if port number */
+	uint8_t		ifbr_priority;		/* member if STP priority */
+	uint8_t		ifbr_proto;		/* member if STP protocol */
+	uint8_t		ifbr_role;		/* member if STP role */
+	uint8_t		ifbr_state;		/* member if STP state */
+	uint32_t	ifbr_addrcnt;		/* member if addr number */
+	uint32_t	ifbr_addrmax;		/* member if addr max */
+	uint32_t	ifbr_addrexceeded;	/* member if addr violations */
+	uint8_t		pad[32];
+};
+
+#pragma pack()
+
+/* BRDGGIFFLAGS, BRDGSIFFLAGS */
+#define	IFBIF_LEARNING		0x0001	/* if can learn */
+#define	IFBIF_DISCOVER		0x0002	/* if sends packets w/ unknown dest. */
+#define	IFBIF_STP		0x0004	/* if participates in spanning tree */
+#define	IFBIF_SPAN		0x0008	/* if is a span port */
+#define	IFBIF_STICKY		0x0010	/* if learned addresses stick */
+#define	IFBIF_BSTP_EDGE		0x0020	/* member stp edge port */
+#define	IFBIF_BSTP_AUTOEDGE	0x0040	/* member stp autoedge enabled */
+#define	IFBIF_BSTP_PTP		0x0080	/* member stp point to point */
+#define	IFBIF_BSTP_AUTOPTP	0x0100	/* member stp autoptp enabled */
+#define	IFBIF_BSTP_ADMEDGE	0x0200	/* member stp admin edge enabled */
+#define	IFBIF_BSTP_ADMCOST	0x0400	/* member stp admin path cost */
+#define	IFBIF_PRIVATE		0x0800	/* if is a private segment */
+
+#define	IFBIFBITS	"\020\001LEARNING\002DISCOVER\003STP\004SPAN" \
+			"\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \
+			"\011AUTOPTP"
+#define	IFBIFMASK	~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \
+			IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \
+			IFBIF_BSTP_ADMCOST)	/* not saved */
+
+/* BRDGFLUSH */
+#define	IFBF_FLUSHDYN		0x00	/* flush learned addresses only */
+#define	IFBF_FLUSHALL		0x01	/* flush all addresses */
+
+/* BRDGSFILT */
+#define IFBF_FILT_USEIPF	0x00000001 /* run pfil hooks on the bridge
+interface */
+#define IFBF_FILT_MEMBER	0x00000002 /* run pfil hooks on the member
+interfaces */
+#define IFBF_FILT_ONLYIP	0x00000004 /* only pass IP[46] packets when
+pfil is enabled */
+#define IFBF_FILT_MASK		0x00000007 /* mask of valid values */
+
+
+/* APPLE MODIFICATION <jhw@apple.com>: Default is to pass non-IP packets. */
+#define	IFBF_FILT_DEFAULT	( IFBF_FILT_USEIPF | IFBF_FILT_MEMBER )
+#if 0
+#define	IFBF_FILT_DEFAULT	(IFBF_FILT_USEIPF | \
+IFBF_FILT_MEMBER | \
+IFBF_FILT_ONLYIP)
+#endif
+
+/*
+ * Interface list structure.
+ */
+
+#pragma pack(4)
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbifconf {
+	uint32_t	ifbic_len;	/* buffer size */
+	union {
+		caddr_t	ifbicu_buf;
+		struct ifbreq *ifbicu_req;
+#define	ifbic_buf	ifbic_ifbicu.ifbicu_buf
+#define	ifbic_req	ifbic_ifbicu.ifbicu_req
+	} ifbic_ifbicu;
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbifconf32 {
+	uint32_t	ifbic_len;	/* buffer size */
+	union {
+		user32_addr_t	ifbicu_buf;
+		user32_addr_t	ifbicu_req;
+#define	ifbic_buf	ifbic_ifbicu.ifbicu_buf
+#define	ifbic_req	ifbic_ifbicu.ifbicu_req
+	} ifbic_ifbicu;
+};
+
+struct ifbifconf64 {
+	uint32_t	ifbic_len;	/* buffer size */
+	union {
+		user64_addr_t	ifbicu_buf;
+		user64_addr_t	ifbicu_req;
+	} ifbic_ifbicu;
+};
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge address request.
+ */
+
+#pragma pack(4)
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbareq {
+	char		ifba_ifsname[IFNAMSIZ];	/* member if name */
+	unsigned long	ifba_expire;		/* address expire time */
+	uint8_t		ifba_flags;		/* address flags */
+	uint8_t		ifba_dst[ETHER_ADDR_LEN];/* destination address */
+	uint16_t	ifba_vlan;		/* vlan id */
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbareq32 {
+	char		ifba_ifsname[IFNAMSIZ];	/* member if name */
+	uint32_t	ifba_expire;		/* address expire time */
+	uint8_t		ifba_flags;		/* address flags */
+	uint8_t		ifba_dst[ETHER_ADDR_LEN];/* destination address */
+	uint16_t	ifba_vlan;		/* vlan id */
+};
+
+struct ifbareq64 {
+	char		ifba_ifsname[IFNAMSIZ];	/* member if name */
+	uint64_t	ifba_expire;		/* address expire time */
+	uint8_t		ifba_flags;		/* address flags */
+	uint8_t		ifba_dst[ETHER_ADDR_LEN];/* destination address */
+	uint16_t	ifba_vlan;		/* vlan id */
+};
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
+#define	IFBAF_TYPEMASK	0x03	/* address type mask */
+#define	IFBAF_DYNAMIC	0x00	/* dynamically learned address */
+#define	IFBAF_STATIC	0x01	/* static address */
+#define	IFBAF_STICKY	0x02	/* sticky address */
+
+#define	IFBAFBITS	"\020\1STATIC\2STICKY"
+
+/*
+ * Address list structure.
+ */
+
+#pragma pack(4)
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbaconf {
+	uint32_t	ifbac_len;	/* buffer size */
+	union {
+		caddr_t ifbacu_buf;
+		struct ifbareq *ifbacu_req;
+#define	ifbac_buf	ifbac_ifbacu.ifbacu_buf
+#define	ifbac_req	ifbac_ifbacu.ifbacu_req
+	} ifbac_ifbacu;
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbaconf32 {
+	uint32_t	ifbac_len;	/* buffer size */
+	union {
+		user32_addr_t	ifbacu_buf;
+		user32_addr_t	ifbacu_req;
+#define	ifbac_buf	ifbac_ifbacu.ifbacu_buf
+#define	ifbac_req	ifbac_ifbacu.ifbacu_req
+	} ifbac_ifbacu;
+};
+
+struct ifbaconf64 {
+	uint32_t	ifbac_len;	/* buffer size */
+	union {
+		user64_addr_t	ifbacu_buf;
+		user64_addr_t	ifbacu_req;
+	} ifbac_ifbacu;
+};
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
+/*
+ * Bridge parameter structure.
+ */
+
+#pragma pack(4)
+
+struct ifbrparam {
+	union {
+		uint32_t ifbrpu_int32;
+		uint16_t ifbrpu_int16;
+		uint8_t ifbrpu_int8;
+	} ifbrp_ifbrpu;
+};
+
+#pragma pack()
+
+#define	ifbrp_csize	ifbrp_ifbrpu.ifbrpu_int32	/* cache size */
+#define	ifbrp_ctime	ifbrp_ifbrpu.ifbrpu_int32	/* cache time (sec) */
+#define	ifbrp_prio	ifbrp_ifbrpu.ifbrpu_int16	/* bridge priority */
+#define	ifbrp_proto	ifbrp_ifbrpu.ifbrpu_int8	/* bridge protocol */
+#define	ifbrp_txhc	ifbrp_ifbrpu.ifbrpu_int8	/* bpdu tx holdcount */
+#define	ifbrp_hellotime	ifbrp_ifbrpu.ifbrpu_int8	/* hello time (sec) */
+#define	ifbrp_fwddelay	ifbrp_ifbrpu.ifbrpu_int8	/* fwd time (sec) */
+#define	ifbrp_maxage	ifbrp_ifbrpu.ifbrpu_int8	/* max age (sec) */
+#define	ifbrp_cexceeded ifbrp_ifbrpu.ifbrpu_int32	/* # of cache dropped
+							 * adresses */
+#define	ifbrp_filter	ifbrp_ifbrpu.ifbrpu_int32	/* filtering flags */
+
+/*
+ * Bridge current operational parameters structure.
+ */
+
+#pragma pack(4)
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbropreq {
+	uint8_t		ifbop_holdcount;
+	uint8_t		ifbop_maxage;
+	uint8_t		ifbop_hellotime;
+	uint8_t		ifbop_fwddelay;
+	uint8_t		ifbop_protocol;
+	uint16_t	ifbop_priority;
+	uint16_t	ifbop_root_port;
+	uint32_t	ifbop_root_path_cost;
+	uint64_t	ifbop_bridgeid;
+	uint64_t	ifbop_designated_root;
+	uint64_t	ifbop_designated_bridge;
+	struct timeval	ifbop_last_tc_time;
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbropreq32 {
+	uint8_t		ifbop_holdcount;
+	uint8_t		ifbop_maxage;
+	uint8_t		ifbop_hellotime;
+	uint8_t		ifbop_fwddelay;
+	uint8_t		ifbop_protocol;
+	uint16_t	ifbop_priority;
+	uint16_t	ifbop_root_port;
+	uint32_t	ifbop_root_path_cost;
+	uint64_t	ifbop_bridgeid;
+	uint64_t	ifbop_designated_root;
+	uint64_t	ifbop_designated_bridge;
+	struct timeval	ifbop_last_tc_time;
+};
+
+struct ifbropreq64 {
+	uint8_t		ifbop_holdcount;
+	uint8_t		ifbop_maxage;
+	uint8_t		ifbop_hellotime;
+	uint8_t		ifbop_fwddelay;
+	uint8_t		ifbop_protocol;
+	uint16_t	ifbop_priority;
+	uint16_t	ifbop_root_port;
+	uint32_t	ifbop_root_path_cost;
+	uint64_t	ifbop_bridgeid;
+	uint64_t	ifbop_designated_root;
+	uint64_t	ifbop_designated_bridge;
+	struct timeval	ifbop_last_tc_time;
+};
+
+#endif
+
+#pragma pack()
+
+/*
+ * Bridge member operational STP params structure.
+ */
+
+#pragma pack(4)
+
+struct ifbpstpreq {
+	uint8_t		ifbp_portno;		/* bp STP port number */
+	uint32_t	ifbp_fwd_trans;		/* bp STP fwd transitions */
+	uint32_t	ifbp_design_cost;	/* bp STP designated cost */
+	uint32_t	ifbp_design_port;	/* bp STP designated port */
+	uint64_t	ifbp_design_bridge;	/* bp STP designated bridge */
+	uint64_t	ifbp_design_root;	/* bp STP designated root */
+};
+
+#pragma pack()
+
+/*
+ * Bridge STP ports list structure.
+ */
+
+#pragma pack(4)
+
+#ifndef XNU_KERNEL_PRIVATE
+
+struct ifbpstpconf {
+	uint32_t	ifbpstp_len;	/* buffer size */
+	union {
+		caddr_t	ifbpstpu_buf;
+		struct ifbpstpreq *ifbpstpu_req;
+	} ifbpstp_ifbpstpu;
+#define	ifbpstp_buf	ifbpstp_ifbpstpu.ifbpstpu_buf
+#define	ifbpstp_req	ifbpstp_ifbpstpu.ifbpstpu_req
+};
+
+#else /* XNU_KERNEL_PRIVATE */
+
+struct ifbpstpconf32 {
+	uint32_t	ifbpstp_len;	/* buffer size */
+	union {
+		user32_addr_t	ifbpstpu_buf;
+		user32_addr_t 	ifbpstpu_req;
+#define	ifbpstp_buf	ifbpstp_ifbpstpu.ifbpstpu_buf
+#define	ifbpstp_req	ifbpstp_ifbpstpu.ifbpstpu_req
+	} ifbpstp_ifbpstpu;
+};
+
+struct ifbpstpconf64 {
+	uint32_t	ifbpstp_len;	/* buffer size */
+	union {
+		user64_addr_t	ifbpstpu_buf;
+		user64_addr_t	ifbpstpu_req;
+	} ifbpstp_ifbpstpu;
+};
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#pragma pack()
+
+
+#ifdef XNU_KERNEL_PRIVATE
+
+int	bridgeattach(int);
+
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* PRIVATE */
+#endif /* !_NET_IF_BRIDGEVAR_H_ */
diff --git a/bsd/net/if_disc.c b/bsd/net/if_disc.c
deleted file mode 100644
index 229e281f6..000000000
--- a/bsd/net/if_disc.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	From: @(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_disc.c,v 1.26.2.1 2001/03/06 00:32:09 obrien Exp $
- */
-
-/*
- * Discard interface driver for protocol testing and timing.
- * (Based on the loopback.)
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/bpf.h>
-
-#ifdef TINY_DSMTU
-#define	DSMTU	(1024+512)
-#else
-#define DSMTU	65532
-#endif
-
-static void discattach(void);
-
-static struct	ifnet discif;
-static int discoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
-		    struct rtentry *);
-static void discrtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa);
-static int discioctl(struct ifnet *, u_long, caddr_t);
-
-/* ARGSUSED */
-static void
-discattach()
-{
-	register struct ifnet *ifp = &discif;
-
-	ifp->if_name = "ds";
-	ifp->if_family = APPLE_IF_FAM_DISC;
-	ifp->if_mtu = DSMTU;
-	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
-	ifp->if_ioctl = discioctl;
-	ifp->if_output = discoutput;
-	ifp->if_type = IFT_LOOP;
-	ifp->if_hdrlen = 0;
-	ifp->if_addrlen = 0;
-	if_attach(ifp);
-	bpfattach(ifp, DLT_NULL, sizeof(u_int));
-}
-
-#ifndef __APPLE__
-static int
-disc_modevent(module_t mod, int type, void *data) 
-{ 
-	switch (type) { 
-	case MOD_LOAD: 
-		discattach();
-		break; 
-	case MOD_UNLOAD: 
-		printf("if_disc module unload - not possible for this module type\n"); 
-		return EINVAL; 
-	} 
-	return 0; 
-} 
-
-static moduledata_t disc_mod = { 
-	"if_disc", 
-	disc_modevent, 
-	NULL
-}; 
-
-DECLARE_MODULE(if_disc, disc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-#endif
-
-static int
-discoutput(ifp, m, dst, rt)
-	struct ifnet *ifp;
-	register struct mbuf *m;
-	struct sockaddr *dst;
-	register struct rtentry *rt;
-{
-	if ((m->m_flags & M_PKTHDR) == 0)
-		panic("discoutput no HDR");
-	/* BPF write needs to be handled specially */
-	if (dst->sa_family == AF_UNSPEC) {
-		dst->sa_family = *(mtod(m, int *));
-		m->m_len -= sizeof(int);
-		m->m_pkthdr.len -= sizeof(int);
-		m->m_data += sizeof(int);
-	}
-
-	if (discif.if_bpf) {
-		/* We need to prepend the address family as a four byte field. */
-		u_int af = dst->sa_family;
-
-		bpf_tap_out(ifp, 0, m, &af, sizeof(af));
-	}
-	m->m_pkthdr.rcvif = ifp;
-
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-
-	m_freem(m);
-	return 0;
-}
-
-/* ARGSUSED */
-static void
-discrtrequest(cmd, rt, sa)
-	int cmd;
-	struct rtentry *rt;
-	struct sockaddr *sa;
-{
-	if (rt != NULL) {
-		RT_LOCK_ASSERT_HELD(rt);
-		rt->rt_rmx.rmx_mtu = DSMTU;
-	}
-}
-
-/*
- * Process an ioctl request.
- */
-/* ARGSUSED */
-static int
-discioctl(ifp, cmd, data)
-	register struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
-{
-	register struct ifaddr *ifa;
-	register struct ifreq *ifr = (struct ifreq *)data;
-	register int error = 0;
-
-	switch (cmd) {
-
-	case SIOCSIFADDR:
-		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
-		ifa = (struct ifaddr *)data;
-		if (ifa != 0)
-			ifa->ifa_rtrequest = discrtrequest;
-		/*
-		 * Everything else is done at a higher level.
-		 */
-		break;
-
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;		/* XXX */
-			break;
-		}
-		switch (ifr->ifr_addr.sa_family) {
-
-#if INET
-		case AF_INET:
-			break;
-#endif
-#if INET6
-		case AF_INET6:
-			break;
-#endif
-
-		default:
-			error = EAFNOSUPPORT;
-			break;
-		}
-		break;
-
-	case SIOCSIFMTU:
-		ifp->if_mtu = ifr->ifr_mtu;
-		break;
-
-	default:
-		error = EINVAL;
-	}
-	return (error);
-}
diff --git a/bsd/net/if_dummy.c b/bsd/net/if_dummy.c
deleted file mode 100644
index 68dac9c9d..000000000
--- a/bsd/net/if_dummy.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * derived from 
- *	@(#)if_loop.c	8.1 (Berkeley) 6/10/93
- * Id: if_loop.c,v 1.22 1996/06/19 16:24:10 wollman Exp
- */
-
-/*
- * Loopback interface driver for protocol testing and timing.
- */
-#if BSD310
-#include "opt_inet.h"
-#endif
-#include "dummy.h"
-#if NDUMMY > 0
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/errno.h>
-#include <sys/time.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/bpf.h>
-
-#if	INET
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#endif
-
-#if INET6
-#if !INET
-#include <netinet/in.h>
-#endif
-#include <netinet6/in6_var.h>
-#include <netinet6/ip6.h>
-#endif
-
-#if NETATALK
-#include <netinet/if_ether.h>
-#include <netatalk/at.h>
-#include <netatalk/at_var.h>
-#endif /* NETATALK */
-
-#include "bpfilter.h"
-
-static int dummyioctl(struct ifnet *, u_long, caddr_t);
-int dummyoutput(struct ifnet *, register struct mbuf *, struct sockaddr *,
-	register struct rtentry *);
-static void dummyrtrequest(int, struct rtentry *, struct sockaddr *);
-
-static void dummyattach(void *);
-PSEUDO_SET(dummyattach, if_dummy);
-
-#if TINY_DUMMYMTU
-#define	DUMMYMTU	(1024+512)
-#else
-#define DUMMYMTU	16384
-#endif
-#define HAVE_OLD_BPF 1
-
-static struct	ifnet dummyif[NDUMMY];
-
-/* ARGSUSED */
-static void
-dummyattach(dummy)
-	void *dummy;
-{
-	register struct ifnet *ifp;
-	register int i = 0;
-
-	for (i = 0; i < NDUMMY; i++) {
-		ifp = &dummyif[i];
-#if defined(__NetBSD__) || defined(__OpenBSD__)
-		sprintf(ifp->if_xname, "dummy%d", i);
-#else
-		ifp->if_name = "dummy";
-		ifp->if_unit = i;
-#endif
-#ifndef __bsdi__
-		ifp->if_softc = NULL;
-#endif
-		ifp->if_mtu = DUMMYMTU;
-		/* Change to BROADCAST experimentaly to announce its prefix. */
-		ifp->if_flags = /* IFF_LOOPBACK */ IFF_BROADCAST | IFF_MULTICAST;
-		ifp->if_ioctl = dummyioctl;
-		ifp->if_output = dummyoutput;
-		ifp->if_type = IFT_DUMMY;
-		ifp->if_hdrlen = 0;
-		ifp->if_addrlen = 0;
-		if_attach(ifp);
-#if NBPFILTER > 0
-#ifdef HAVE_OLD_BPF
-		bpfattach(ifp, DLT_NULL, sizeof(u_int));
-#else
-		bpfattach(&ifp->if_bpf, ifp, DLT_NULL, sizeof(u_int));
-#endif
-#endif
-	}
-}
-
-int
-dummyoutput(ifp, m, dst, rt)
-	struct ifnet *ifp;
-	register struct mbuf *m;
-	struct sockaddr *dst;
-	register struct rtentry *rt;
-{
-	if ((m->m_flags & M_PKTHDR) == 0)
-		panic("dummyoutput no HDR");
-#if NBPFILTER > 0
-	/* BPF write needs to be handled specially */
-	if (dst->sa_family == AF_UNSPEC) {
-		dst->sa_family = *(mtod(m, int *));
-		m->m_len -= sizeof(int);
-		m->m_pkthdr.len -= sizeof(int);
-		m->m_data += sizeof(int);
-	}
-
-	if (ifp->if_bpf) {
-		/* We need to prepend the address family as a four byte field. */
-		u_int af = dst->sa_family;
-
-		bpf_tap_out(ifp, 0, m, &af, sizeof(af));
-	}
-#endif
-	m->m_pkthdr.rcvif = ifp;
-
-	if (rt != NULL) {
-		u_int32_t rt_flags = rt->rt_flags;
-		if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
-			m_freem(m);
-			return ((rt_flags & RTF_BLACKHOLE) ? 0 :
-			    (rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH);
-		}
-	}
-	ifp->if_opackets++;
-	ifp->if_obytes += m->m_pkthdr.len;
-	proto_inject(dst->sa_family, m);
-	ifp->if_ipackets++;
-	ifp->if_ibytes += m->m_pkthdr.len;
-	return (0);
-}
-
-/* ARGSUSED */
-static void
-dummyrtrequest(cmd, rt, sa)
-	int cmd;
-	struct rtentry *rt;
-	struct sockaddr *sa;
-{
-	if (rt != NULL) {
-		RT_LOCK_ASSERT_HELD(rt);
-		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
-		/*
-		 * For optimal performance, the send and receive buffers
-		 * should be at least twice the MTU plus a little more for
-		 * overhead.
-		 */
-		rt->rt_rmx.rmx_recvpipe = 
-			rt->rt_rmx.rmx_sendpipe = 3 * DUMMYMTU;
-	}
-}
-
-/*
- * Process an ioctl request.
- */
-/* ARGSUSED */
-static int
-dummyioctl(ifp, cmd, data)
-	register struct ifnet *ifp;
-	u_long cmd;
-	caddr_t data;
-{
-	register struct ifaddr *ifa;
-	register struct ifreq *ifr = (struct ifreq *)data;
-	register int error = 0;
-
-	switch (cmd) {
-
-	case SIOCSIFADDR:
-		ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING);
-		ifa = (struct ifaddr *)data;
-		ifa->ifa_rtrequest = dummyrtrequest;
-		/*
-		 * Everything else is done at a higher level.
-		 */
-		break;
-
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-		if (ifr == 0) {
-			error = EAFNOSUPPORT;		/* XXX */
-			break;
-		}
-		switch (ifr->ifr_addr.sa_family) {
-
-#if INET
-		case AF_INET:
-			break;
-#endif
-#if INET6
-		case AF_INET6:
-			break;
-#endif
-
-		default:
-			error = EAFNOSUPPORT;
-			break;
-		}
-		break;
-
-	case SIOCSIFMTU:
-		ifp->if_mtu = ifr->ifr_mtu;
-		break;
-
-	case SIOCSIFFLAGS:
-		break;
-
-	default:
-		error = EINVAL;
-	}
-	return (error);
-}
-#endif /* NDUMMY > 0 */
diff --git a/bsd/net/if_ethersubr.c b/bsd/net/if_ethersubr.c
deleted file mode 100644
index e407e009f..000000000
--- a/bsd/net/if_ethersubr.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2000, 2009 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1982, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/net/if_ethersubr.c,v 1.70.2.17 2001/08/01 00:47:49 fenner Exp $
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/sysctl.h>
-
-#include <net/if.h>
-#include <net/route.h>
-#include <net/if_llc.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-
-#if INET || INET6
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#endif
-
-#if IPX
-#include <netipx/ipx.h>
-#include <netipx/ipx_if.h>
-#endif
-
-#include <sys/socketvar.h>
-
-#if LLC && CCITT
-extern struct ifqueue pkintrq;
-#endif
-
-/* #include "vlan.h" */
-#if NVLAN > 0
-#include <net/if_vlan_var.h>
-#endif /* NVLAN > 0 */
-
-extern u_char	etherbroadcastaddr[];
-#define senderr(e) do { error = (e); goto bad;} while (0)
-
-/*
- * Perform common duties while attaching to interface list
- */
-
-int
-ether_resolvemulti(
-	struct ifnet *ifp,
-	struct sockaddr **llsa,
-	struct sockaddr *sa)
-{
-	struct sockaddr_dl *sdl;
-	struct sockaddr_in *sin;
-	u_char *e_addr;
-#if INET6
-        struct sockaddr_in6 *sin6;
-#endif
-
-
-	switch(sa->sa_family) {
-	case AF_UNSPEC:
-		/* AppleTalk uses AF_UNSPEC for multicast registration.
-		 * No mapping needed. Just check that it's a valid MC address.
-		 */
-		e_addr = &sa->sa_data[0];
-		if ((e_addr[0] & 1) != 1)
-			return EADDRNOTAVAIL;
-		*llsa = 0;
-		return 0;
-
-	case AF_LINK:
-		/* 
-		 * No mapping needed. Just check that it's a valid MC address.
-		 */
-		sdl = (struct sockaddr_dl *)sa;
-		e_addr = LLADDR(sdl);
-		if ((e_addr[0] & 1) != 1)
-			return EADDRNOTAVAIL;
-		*llsa = 0;
-		return 0;
-
-#if INET
-	case AF_INET:
-		sin = (struct sockaddr_in *)sa;
-		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
-			return EADDRNOTAVAIL;
-		MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR,
-		       M_WAITOK);
-		if (sdl == NULL)
-			return ENOBUFS;
-		sdl->sdl_len = sizeof *sdl;
-		sdl->sdl_family = AF_LINK;
-		sdl->sdl_index = ifp->if_index;
-		sdl->sdl_type = IFT_ETHER;
-		sdl->sdl_nlen = 0;
-		sdl->sdl_alen = ETHER_ADDR_LEN;
-		sdl->sdl_slen = 0;
-		e_addr = LLADDR(sdl);
-		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
-		*llsa = (struct sockaddr *)sdl;
-		return 0;
-#endif
-#if INET6
-        case AF_INET6:
-                sin6 = (struct sockaddr_in6 *)sa;
-                if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-                        /*
-                         * An IP6 address of 0 means listen to all
-                         * of the Ethernet multicast address used for IP6.
-                         * (This is used for multicast routers.)
-                         */
-                        ifp->if_flags |= IFF_ALLMULTI;
-                        *llsa = 0;
-                        return 0;
-                }
-                MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR,
-                       M_WAITOK);
-		if (sdl == NULL)
-			return ENOBUFS;
-                sdl->sdl_len = sizeof *sdl;
-                sdl->sdl_family = AF_LINK;
-                sdl->sdl_index = ifp->if_index;
-                sdl->sdl_type = IFT_ETHER;
-                sdl->sdl_nlen = 0;
-                sdl->sdl_alen = ETHER_ADDR_LEN;
-                sdl->sdl_slen = 0;
-                e_addr = LLADDR(sdl);
-                ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
-#if 0
-                printf("ether_resolvemulti Adding %x:%x:%x:%x:%x:%x\n",
-                                e_addr[0], e_addr[1], e_addr[2], e_addr[3], e_addr[4], e_addr[5]);
-#endif
-                *llsa = (struct sockaddr *)sdl;
-                return 0;
-#endif
-
-	default:
-		/* 
-		 * Well, the text isn't quite right, but it's the name
-		 * that counts...
-		 */
-		return EAFNOSUPPORT;
-	}
-}
-
-
-/*
- * Convert Ethernet address to printable (loggable) representation.
- */
-static u_char digits[] = "0123456789abcdef";
-char *
-ether_sprintf(p, ap)
-	register u_char *p;
-        register u_char *ap;
-{	register char *cp;
-        register i;
-
-        for (cp = p, i = 0; i < 6; i++) {
-                *cp++ = digits[*ap >> 4];
-                *cp++ = digits[*ap++ & 0xf];
-                *cp++ = ':';
-        }
-        *--cp = 0;
-        return (p);
-}
diff --git a/bsd/net/if_fddisubr.c b/bsd/net/if_fddisubr.c
deleted file mode 100644
index 1de331796..000000000
--- a/bsd/net/if_fddisubr.c
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1995, 1996
- *	Matt Thomas <matt@3am-software.com>.  All rights reserved.
- * Copyright (c) 1982, 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
- */
-
-#include "opt_atalk.h"
-#include "opt_inet.h"
-#include "opt_ipx.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-
-#include <net/if.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/if_llc.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-
-#if INET
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/if_ether.h>
-#endif
-#if defined(__FreeBSD__)
-#include <netinet/if_fddi.h>
-#else
-#include <net/if_fddi.h>
-#endif
-
-#if IPX
-#include <netipx/ipx.h> 
-#include <netipx/ipx_if.h>
-#endif
-
-#if DECNET
-#include <netdnet/dn.h>
-#endif
-
-#include "bpfilter.h"
-
-#define senderr(e) { error = (e); goto bad;}
-
-/*
- * This really should be defined in if_llc.h but in case it isn't.
- */
-#ifndef llc_snap
-#define	llc_snap	llc_un.type_snap
-#endif
-
-#if defined(__bsdi__) || defined(__NetBSD__)
-#define	RTALLOC1(a, b)			rtalloc1(a, b)
-#define	ARPRESOLVE(a, b, c, d, e, f)	arpresolve(a, b, c, d, e)
-#elif defined(__FreeBSD__)
-#define	RTALLOC1(a, b)			rtalloc1(a, b, 0UL)
-#define	ARPRESOLVE(a, b, c, d, e, f)	arpresolve(a, b, c, d, e, f)
-#endif
-/*
- * FDDI output routine.
- * Encapsulate a packet of type family for the local net.
- * Use trailer local net encapsulation if enough data in first
- * packet leaves a multiple of 512 bytes of data in remainder.
- * Assumes that ifp is actually pointer to arpcom structure.
- */
-int
-fddi_output(ifp, m0, dst, rt0)
-	register struct ifnet *ifp;
-	struct mbuf *m0;
-	struct sockaddr *dst;
-	struct rtentry *rt0;
-{
-	u_int16_t type;
-	int s, loop_copy = 0, error = 0;
- 	u_char edst[6];
-	register struct mbuf *m = m0;
-	register struct rtentry *rt;
-	register struct fddi_header *fh;
-	struct arpcom *ac = (struct arpcom *)ifp;
-
-	if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
-		senderr(ENETDOWN);
-	getmicrotime(&ifp->if_lastchange);
-#if !defined(__bsdi__) || _BSDI_VERSION >= 199401
-	if (rt = rt0) {
-		if ((rt->rt_flags & RTF_UP) == 0) {
-			if (rt0 = rt = RTALLOC1(dst, 1))
-				rtunref(rt);
-			else 
-				senderr(EHOSTUNREACH);
-		}
-		if (rt->rt_flags & RTF_GATEWAY) {
-			if (rt->rt_gwroute == 0)
-				goto lookup;
-			if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
-				rtfree(rt); rt = rt0;
-			lookup: rt->rt_gwroute = RTALLOC1(rt->rt_gateway, 1);
-				if ((rt = rt->rt_gwroute) == 0)
-					senderr(EHOSTUNREACH);
-			}
-		}
-		if (rt->rt_flags & RTF_REJECT)
-			if (rt->rt_rmx.rmx_expire == 0 ||
-			    time_second < rt->rt_rmx.rmx_expire)
-				senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
-	}
-#endif
-	switch (dst->sa_family) {
-
-#if INET
-	case AF_INET: {
-#if !defined(__bsdi__) || _BSDI_VERSION >= 199401
-		if (!ARPRESOLVE(ac, rt, m, dst, edst, rt0))
-			return (0);	/* if not yet resolved */
-#else
-		int usetrailers;
-		if (!arpresolve(ac, m, &((struct sockaddr_in *)dst)->sin_addr, edst, &usetrailers))
-			return (0);	/* if not yet resolved */
-#endif
-		type = htons(ETHERTYPE_IP);
-		break;
-	}
-#endif
-#if IPX
-	case AF_IPX:
-		type = htons(ETHERTYPE_IPX);
- 		bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
-		    (caddr_t)edst, sizeof (edst));
-		break;
-#endif
-
-#if NS
-	case AF_NS:
-		type = htons(ETHERTYPE_NS);
- 		bcopy((caddr_t)&(((struct sockaddr_ns *)dst)->sns_addr.x_host),
-		    (caddr_t)edst, sizeof (edst));
-		break;
-#endif
-#if	ISO
-	case AF_ISO: {
-		int	snpalen;
-		struct	llc *l;
-		register struct sockaddr_dl *sdl;
-
-		if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway) &&
-		    sdl->sdl_family == AF_LINK && sdl->sdl_alen > 0) {
-			bcopy(LLADDR(sdl), (caddr_t)edst, sizeof(edst));
-		} else if (error =
-			    iso_snparesolve(ifp, (struct sockaddr_iso *)dst,
-					    (char *)edst, &snpalen))
-			goto bad; /* Not Resolved */
-		/* If broadcasting on a simplex interface, loopback a copy */
-		if (*edst & 1)
-			m->m_flags |= (M_BCAST|M_MCAST);
-		M_PREPEND(m, 3, M_DONTWAIT);
-		if (m == NULL)
-			return (0);
-		type = 0;
-		l = mtod(m, struct llc *);
-		l->llc_dsap = l->llc_ssap = LLC_ISO_LSAP;
-		l->llc_control = LLC_UI;
-		IFDEBUG(D_ETHER)
-			int i;
-			printf("unoutput: sending pkt to: ");
-			for (i=0; i<6; i++)
-				printf("%x ", edst[i] & 0xff);
-			printf("\n");
-		ENDDEBUG
-		} break;
-#endif /* ISO */
-#if	LLC
-/*	case AF_NSAP: */
-	case AF_CCITT: {
-		register struct sockaddr_dl *sdl = 
-			(struct sockaddr_dl *) rt -> rt_gateway;
-
-		if (sdl && sdl->sdl_family != AF_LINK && sdl->sdl_alen <= 0)
-			goto bad; /* Not a link interface ? Funny ... */
-		bcopy(LLADDR(sdl), (char *)edst, sizeof(edst));
-		if (*edst & 1)
-			loop_copy = 1;
-		type = 0;
-#if LLC_DEBUG
-		{
-			int i;
-			register struct llc *l = mtod(m, struct llc *);
-
-			printf("fddi_output: sending LLC2 pkt to: ");
-			for (i=0; i<6; i++)
-				printf("%x ", edst[i] & 0xff);
-			printf(" len 0x%x dsap 0x%x ssap 0x%x control 0x%x\n", 
-			       type & 0xff, l->llc_dsap & 0xff, l->llc_ssap &0xff,
-			       l->llc_control & 0xff);
-
-		}
-#endif /* LLC_DEBUG */
-		} break;
-#endif /* LLC */	
-
-	case AF_UNSPEC:
-	{
-		struct ether_header *eh;
-		loop_copy = -1;
-		eh = (struct ether_header *)dst->sa_data;
- 		(void)memcpy((caddr_t)edst, (caddr_t)eh->ether_dhost, sizeof (edst));
-		if (*edst & 1)
-			m->m_flags |= (M_BCAST|M_MCAST);
-		type = eh->ether_type;
-		break;
-	}
-
-#if NBPFILTER > 0
-	case AF_IMPLINK:
-	{
-		fh = mtod(m, struct fddi_header *);
-		error = EPROTONOSUPPORT;
-		switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) {
-			case FDDIFC_LLC_ASYNC: {
-				/* legal priorities are 0 through 7 */
-				if ((fh->fddi_fc & FDDIFC_Z) > 7)
-			        	goto bad;
-				break;
-			}
-			case FDDIFC_LLC_SYNC: {
-				/* FDDIFC_Z bits reserved, must be zero */
-				if (fh->fddi_fc & FDDIFC_Z)
-					goto bad;
-				break;
-			}
-			case FDDIFC_SMT: {
-				/* FDDIFC_Z bits must be non zero */
-				if ((fh->fddi_fc & FDDIFC_Z) == 0)
-					goto bad;
-				break;
-			}
-			default: {
-				/* anything else is too dangerous */
-               	 		goto bad;
-			}
-		}
-		error = 0;
-		if (fh->fddi_dhost[0] & 1)
-			m->m_flags |= (M_BCAST|M_MCAST);
-		goto queue_it;
-	}
-#endif
-	default:
-		printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit,
-			dst->sa_family);
-		senderr(EAFNOSUPPORT);
-	}
-
-	if (type != 0) {
-		register struct llc *l;
-		M_PREPEND(m, sizeof (struct llc), M_DONTWAIT);
-		if (m == 0)
-			senderr(ENOBUFS);
-		l = mtod(m, struct llc *);
-		l->llc_control = LLC_UI;
-		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
-		l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0;
-		(void)memcpy((caddr_t) &l->llc_snap.ether_type, (caddr_t) &type,
-			sizeof(u_int16_t));
-	}
-
-	/*
-	 * Add local net header.  If no space in first mbuf,
-	 * allocate another.
-	 */
-	M_PREPEND(m, sizeof (struct fddi_header), M_DONTWAIT);
-	if (m == 0)
-		senderr(ENOBUFS);
-	fh = mtod(m, struct fddi_header *);
-	fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
- 	(void)memcpy((caddr_t)fh->fddi_dhost, (caddr_t)edst, sizeof (edst));
-  queue_it:
- 	(void)memcpy((caddr_t)fh->fddi_shost, (caddr_t)ac->ac_enaddr,
-	    sizeof(fh->fddi_shost));
-
-	/*
-	 * If a simplex interface, and the packet is being sent to our
-	 * Ethernet address or a broadcast address, loopback a copy.
-	 * XXX To make a simplex device behave exactly like a duplex
-	 * device, we should copy in the case of sending to our own
-	 * ethernet address (thus letting the original actually appear
-	 * on the wire). However, we don't do that here for security
-	 * reasons and compatibility with the original behavior.
-	 */
-	if ((ifp->if_flags & IFF_SIMPLEX) &&
-	   (loop_copy != -1)) {
-		if ((m->m_flags & M_BCAST) || loop_copy) {
-			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
-
-			(void) if_simloop(ifp,
-				n, dst, sizeof(struct fddi_header));
-	     	} else if (bcmp(fh->fddi_dhost,
-		    fh->fddi_shost, sizeof(fh->fddi_shost)) == 0) {
-			(void) if_simloop(ifp,
-				m, dst, sizeof(struct fddi_header));
-			return(0);	/* XXX */
-		}
-	}
-
-	s = splimp();
-	/*
-	 * Queue message on interface, and start output if interface
-	 * not yet active.
-	 */
-	if (IF_QFULL(&ifp->if_snd)) {
-		IF_DROP(&ifp->if_snd);
-		splx(s);
-		senderr(ENOBUFS);
-	}
-	ifp->if_obytes += m->m_pkthdr.len;
-	IF_ENQUEUE(&ifp->if_snd, m);
-	if ((ifp->if_flags & IFF_OACTIVE) == 0)
-		(*ifp->if_start)(ifp);
-	splx(s);
-	if (m->m_flags & M_MCAST)
-		ifp->if_omcasts++;
-	return (error);
-
-bad:
-	if (m)
-		m_freem(m);
-	return (error);
-}
-
-/*
- * Process a received FDDI packet;
- * the packet is in the mbuf chain m without
- * the fddi header, which is provided separately.
- */
-void
-fddi_input(ifp, fh, m)
-	struct ifnet *ifp;
-	register struct fddi_header *fh;
-	struct mbuf *m;
-{
-	register struct ifqueue *inq;
-	register struct llc *l;
-	int s;
-
-	if ((ifp->if_flags & IFF_UP) == 0) {
-		m_freem(m);
-		return;
-	}
-	getmicrotime(&ifp->if_lastchange);
-	ifp->if_ibytes += m->m_pkthdr.len + sizeof (*fh);
-	if (fh->fddi_dhost[0] & 1) {
-		if (bcmp((caddr_t)fddibroadcastaddr, (caddr_t)fh->fddi_dhost,
-		    sizeof(fddibroadcastaddr)) == 0)
-			m->m_flags |= M_BCAST;
-		else
-			m->m_flags |= M_MCAST;
-		ifp->if_imcasts++;
-	} else if ((ifp->if_flags & IFF_PROMISC)
-	    && bcmp(((struct arpcom *)ifp)->ac_enaddr, (caddr_t)fh->fddi_dhost,
-		    sizeof(fh->fddi_dhost)) != 0) {
-		m_freem(m);
-		return;
-	}
-
-#ifdef M_LINK0
-	/*
-	 * If this has a LLC priority of 0, then mark it so upper
-	 * layers have a hint that it really came via a FDDI/Ethernet
-	 * bridge.
-	 */
-	if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0)
-		m->m_flags |= M_LINK0;
-#endif
-
-	l = mtod(m, struct llc *);
-	switch (l->llc_dsap) {
-#if defined(INET) || NS || IPX || defined(NETATALK)
-	case LLC_SNAP_LSAP:
-	{
-		u_int16_t type;
-		if (l->llc_control != LLC_UI || l->llc_ssap != LLC_SNAP_LSAP)
-			goto dropanyway;
-
-		if (l->llc_snap.org_code[0] != 0 || l->llc_snap.org_code[1] != 0|| l->llc_snap.org_code[2] != 0)
-			goto dropanyway;
-		type = ntohs(l->llc_snap.ether_type);
-		m_adj(m, 8);
-		switch (type) {
-#if INET
-		case ETHERTYPE_IP:
-			if (ipflow_fastforward(m))
-				return;
-			schednetisr(NETISR_IP);
-			inq = &ipintrq;
-			break;
-
-		case ETHERTYPE_ARP:
-#if !defined(__bsdi__) || _BSDI_VERSION >= 199401
-			schednetisr(NETISR_ARP);
-			inq = &arpintrq;
-			break;
-#else
-			arpinput((struct arpcom *)ifp, m);
-			return;
-#endif
-#endif
-#if IPX      
-		case ETHERTYPE_IPX: 
-			schednetisr(NETISR_IPX);
-			inq = &ipxintrq;
-			break;  
-#endif   
-#if NS
-		case ETHERTYPE_NS:
-			schednetisr(NETISR_NS);
-			inq = &nsintrq;
-			break;
-#endif
-#if DECNET
-		case ETHERTYPE_DECNET:
-			schednetisr(NETISR_DECNET);
-			inq = &decnetintrq;
-			break;
-#endif
-
-		default:
-			/* printf("fddi_input: unknown protocol 0x%x\n", type); */
-			ifp->if_noproto++;
-			goto dropanyway;
-		}
-		break;
-	}
-#endif /* INET || NS */
-#if	ISO
-	case LLC_ISO_LSAP: 
-		switch (l->llc_control) {
-		case LLC_UI:
-			/* LLC_UI_P forbidden in class 1 service */
-			if ((l->llc_dsap == LLC_ISO_LSAP) &&
-			    (l->llc_ssap == LLC_ISO_LSAP)) {
-				/* LSAP for ISO */
-				m->m_data += 3;		/* XXX */
-				m->m_len -= 3;		/* XXX */
-				m->m_pkthdr.len -= 3;	/* XXX */
-				M_PREPEND(m, sizeof *fh, M_DONTWAIT);
-				if (m == 0)
-					return;
-				*mtod(m, struct fddi_header *) = *fh;
-				IFDEBUG(D_ETHER)
-					printf("clnp packet");
-				ENDDEBUG
-				schednetisr(NETISR_ISO);
-				inq = &clnlintrq;
-				break;
-			}
-			goto dropanyway;
-			
-		case LLC_XID:
-		case LLC_XID_P:
-			if(m->m_len < 6)
-				goto dropanyway;
-			l->llc_window = 0;
-			l->llc_fid = 9;
-			l->llc_class = 1;
-			l->llc_dsap = l->llc_ssap = 0;
-			/* Fall through to */
-		case LLC_TEST:
-		case LLC_TEST_P:
-		{
-			struct sockaddr sa;
-			register struct ether_header *eh;
-			struct arpcom *ac = (struct arpcom *) ifp;
-			int i;
-			u_char c = l->llc_dsap;
-
-			l->llc_dsap = l->llc_ssap;
-			l->llc_ssap = c;
-			if (m->m_flags & (M_BCAST | M_MCAST))
-				bcopy((caddr_t)ac->ac_enaddr,
-				      (caddr_t)eh->ether_dhost, 6);
-			sa.sa_family = AF_UNSPEC;
-			sa.sa_len = sizeof(sa);
-			eh = (struct ether_header *)sa.sa_data;
-			for (i = 0; i < 6; i++) {
-				eh->ether_shost[i] = fh->fddi_dhost[i];
-				eh->ether_dhost[i] = fh->fddi_shost[i];
-			}
-			eh->ether_type = 0;
-			ifp->if_output(ifp, m, &sa, NULL);
-			return;
-		}
-		default:
-			m_freem(m);
-			return;
-		}
-		break;
-#endif /* ISO */
-#if LLC
-	case LLC_X25_LSAP:
-	{
-		M_PREPEND(m, sizeof(struct sdl_hdr) , M_DONTWAIT);
-		if (m == 0)
-			return;
-		if ( !sdl_sethdrif(ifp, fh->fddi_shost, LLC_X25_LSAP,
-				    fh->fddi_dhost, LLC_X25_LSAP, 6, 
-				    mtod(m, struct sdl_hdr *)))
-			panic("ETHER cons addr failure");
-		mtod(m, struct sdl_hdr *)->sdlhdr_len = m->m_pkthdr.len - sizeof(struct sdl_hdr);
-#if LLC_DEBUG
-		printf("llc packet\n");
-#endif /* LLC_DEBUG */
-		schednetisr(NETISR_CCITT);
-		inq = &llcintrq;
-		break;
-	}
-#endif /* LLC */
-		
-	default:
-		/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
-		ifp->if_noproto++;
-	dropanyway:
-		m_freem(m);
-		return;
-	}
-
-	s = splimp();
-	if (IF_QFULL(inq)) {
-		IF_DROP(inq);
-		m_freem(m);
-	} else
-		IF_ENQUEUE(inq, m);
-	splx(s);
-}
-/*
- * Perform common duties while attaching to interface list
- */
-#ifdef __NetBSD__
-#define	ifa_next	ifa_list.tqe_next
-#endif
-
-void
-fddi_ifattach(ifp)
-	register struct ifnet *ifp;
-{
-	register struct ifaddr *ifa;
-	register struct sockaddr_dl *sdl;
-
-	ifp->if_type = IFT_FDDI;
-	ifp->if_addrlen = 6;
-	ifp->if_hdrlen = 21;
-	ifp->if_mtu = FDDIMTU;
-	ifp->if_baudrate = 100000000;
-#if IFF_NOTRAILERS
-	ifp->if_flags |= IFF_NOTRAILERS;
-#endif
-#if defined(__FreeBSD__)
-	ifa = ifnet_addrs[ifp->if_index - 1];
-	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
-	sdl->sdl_type = IFT_FDDI;
-	sdl->sdl_alen = ifp->if_addrlen;
-	bcopy(((struct arpcom *)ifp)->ac_enaddr, LLADDR(sdl), ifp->if_addrlen);
-#elif defined(__NetBSD__)
-	LIST_INIT(&((struct arpcom *)ifp)->ac_multiaddrs);
-	for (ifa = ifp->if_addrlist.tqh_first; ifa != NULL; ifa = ifa->ifa_list.tqe_next)
-#else
-	for (ifa = ifp->if_addrlist; ifa != NULL; ifa = ifa->ifa_next)
-#endif
-#if !defined(__FreeBSD__)
-		if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) &&
-		    sdl->sdl_family == AF_LINK) {
-			sdl->sdl_type = IFT_FDDI;
-			sdl->sdl_alen = ifp->if_addrlen;
-			bcopy((caddr_t)((struct arpcom *)ifp)->ac_enaddr,
-			      LLADDR(sdl), ifp->if_addrlen);
-			break;
-		}
-#endif
-}
diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c
index 38e876d6d..b25ecb3a5 100644
--- a/bsd/net/if_gif.c
+++ b/bsd/net/if_gif.c
@@ -122,7 +122,6 @@ TAILQ_HEAD(gifhead, gif_softc) gifs = TAILQ_HEAD_INITIALIZER(gifs);
 
 #ifdef __APPLE__
 void gifattach(void);
-static void gif_create_dev(void);
 static int gif_encapcheck(const struct mbuf*, int, int, void*);
 static errno_t gif_output(ifnet_t ifp, mbuf_t m);
 static errno_t gif_input(ifnet_t ifp, protocol_family_t protocol_family,
@@ -156,6 +155,11 @@ struct ip6protosw in6_gif_protosw =
 };
 #endif
 
+static if_clone_t gif_cloner = NULL;
+static int gif_clone_create(struct if_clone *, uint32_t, void *);
+static int gif_clone_destroy(struct ifnet *);
+static void gif_delete_tunnel(struct gif_softc *);
+
 #ifdef __APPLE__
 /*
  * Theory of operation: initially, one gif interface is created.
@@ -237,6 +241,8 @@ __private_extern__ void
 gifattach(void)
 {
 	errno_t result;
+	struct ifnet_clone_params ifnet_clone_params;
+	struct if_clone *ifc = NULL; 
 
 	/* Init the list of interfaces */
 	TAILQ_INIT(&gifs);
@@ -252,8 +258,17 @@ gifattach(void)
 	if (result != 0)
 		printf("proto_register_plumber failed for AF_INET6 error=%d\n", result);
 
+	ifnet_clone_params.ifc_name = "gif";
+	ifnet_clone_params.ifc_create = gif_clone_create;
+	ifnet_clone_params.ifc_destroy = gif_clone_destroy;
+
+	result = ifnet_clone_attach(&ifnet_clone_params, &gif_cloner);
+	if (result != 0)
+		printf("gifattach: ifnet_clone_attach failed %d\n", result);
+
 	/* Create first device */
-	gif_create_dev();
+	ifc = if_clone_lookup("gif", NULL);
+	gif_clone_create(ifc, 0, NULL);
 }
 
 static errno_t
@@ -270,35 +285,34 @@ gif_set_bpf_tap(
 	return 0;
 }
 
-/* Creates another gif device if there are none free */
-static void
-gif_create_dev(void)
+
+static int
+gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params)
 {
-	struct gif_softc			*sc;
-	struct ifnet_init_params	gif_init;
-	errno_t						result = 0;
-	
-	
+	struct gif_softc	*sc = NULL;
+	struct ifnet_init_params gif_init;
+	errno_t result = 0;
+
 	/* Can't create more than GIF_MAXUNIT */
 	if (ngif >= GIF_MAXUNIT)
-		return;
-	
-	/* Check for unused gif interface */
-	TAILQ_FOREACH(sc, &gifs, gif_link) {
-		/* If unused, return, no need to create a new interface */
-		if ((ifnet_flags(sc->gif_if) & IFF_RUNNING) == 0)
-			return;
-	}
+		return (ENXIO);
 
 	sc = _MALLOC(sizeof(struct gif_softc), M_DEVBUF, M_WAITOK);
 	if (sc == NULL) {
-		log(LOG_ERR, "gifattach: failed to allocate gif%d\n", ngif);
-		return;
+		log(LOG_ERR, "gif_clone_create: failed to allocate gif%d\n", unit);
+		return ENOBUFS;
 	}
-	
+	bzero(sc, sizeof(struct gif_softc));
+
+	/* use the interface name as the unique id for ifp recycle */
+	snprintf(sc->gif_ifname, sizeof(sc->gif_ifname), "%s%d",
+			ifc->ifc_name, unit);
+
 	bzero(&gif_init, sizeof(gif_init));
+	gif_init.uniqueid = sc->gif_ifname;
+	gif_init.uniqueid_len = strlen(sc->gif_ifname);
 	gif_init.name = GIFNAME;
-	gif_init.unit = ngif;
+	gif_init.unit = unit;
 	gif_init.type = IFT_GIF;
 	gif_init.family = IFNET_FAMILY_GIF;
 	gif_init.output = gif_output;
@@ -309,22 +323,22 @@ gif_create_dev(void)
 	gif_init.ioctl = gif_ioctl;
 	gif_init.set_bpf_tap = gif_set_bpf_tap;
 
-	bzero(sc, sizeof(struct gif_softc));
 	result = ifnet_allocate(&gif_init, &sc->gif_if);
 	if (result != 0) {
-		printf("gif_create_dev, ifnet_allocate failed - %d\n", result);
+		printf("gif_clone_create, ifnet_allocate failed - %d\n", result);
 		_FREE(sc, M_DEVBUF);
-		return;
+		return ENOBUFS;
 	}
+
 	sc->encap_cookie4 = sc->encap_cookie6 = NULL;
 #if INET
 	sc->encap_cookie4 = encap_attach_func(AF_INET, -1,
-	    gif_encapcheck, &in_gif_protosw, sc);
+			gif_encapcheck, &in_gif_protosw, sc);
 	if (sc->encap_cookie4 == NULL) {
 		printf("%s: unable to attach encap4\n", if_name(sc->gif_if));
 		ifnet_release(sc->gif_if);
 		FREE(sc, M_DEVBUF);
-		return;
+		return ENOBUFS;
 	}
 #endif
 #if INET6
@@ -338,7 +352,7 @@ gif_create_dev(void)
 		printf("%s: unable to attach encap6\n", if_name(sc->gif_if));
 		ifnet_release(sc->gif_if);
 		FREE(sc, M_DEVBUF);
-		return;
+		return ENOBUFS;
 	}
 #endif
 	sc->gif_called = 0;
@@ -350,10 +364,18 @@ gif_create_dev(void)
 #endif
 	result = ifnet_attach(sc->gif_if, NULL);
 	if (result != 0) {
-		printf("gif_create_dev - ifnet_attach failed - %d\n", result);
+		printf("gif_clone_create - ifnet_attach failed - %d\n", result);
 		ifnet_release(sc->gif_if);
+		if (sc->encap_cookie4) {
+			encap_detach(sc->encap_cookie4);
+			sc->encap_cookie4 = NULL;
+		}
+		if (sc->encap_cookie6) {
+			encap_detach(sc->encap_cookie6);
+			sc->encap_cookie6 = NULL;
+		}
 		FREE(sc, M_DEVBUF);
-		return;
+		return result;
 	}
 #if CONFIG_MACF_NET
 	mac_ifnet_label_init(&sc->gif_if);
@@ -361,6 +383,43 @@ gif_create_dev(void)
 	bpfattach(sc->gif_if, DLT_NULL, sizeof(u_int));
 	TAILQ_INSERT_TAIL(&gifs, sc, gif_link);
 	ngif++;
+	return 0;
+}
+
+static int
+gif_clone_destroy(struct ifnet *ifp)
+{
+#if defined(INET) || defined(INET6)
+	int err = 0;
+#endif
+	struct gif_softc *sc = ifp->if_softc;
+
+	TAILQ_REMOVE(&gifs, sc, gif_link);
+
+	gif_delete_tunnel(sc);
+#ifdef INET6
+	if (sc->encap_cookie6 != NULL) {
+		err = encap_detach(sc->encap_cookie6);
+		KASSERT(err == 0, ("gif_clone_destroy: Unexpected error detaching encap_cookie6"));
+	}
+#endif
+#ifdef INET
+	if (sc->encap_cookie4 != NULL) {
+		err = encap_detach(sc->encap_cookie4);
+		KASSERT(err == 0, ("gif_clone_destroy: Unexpected error detaching encap_cookie4"));
+	}
+#endif
+	err = ifnet_set_flags(ifp, 0, IFF_UP);
+	if (err != 0) {
+		printf("gif_clone_destroy: ifnet_set_flags failed %d\n", err);
+	}
+
+	err = ifnet_detach(ifp);
+	if (err != 0)
+		panic("gif_clone_destroy: ifnet_detach(%p) failed %d\n", ifp, err);
+	FREE(sc, M_DEVBUF);
+	ngif--;
+	return 0;
 }
 
 static int
@@ -488,7 +547,6 @@ gif_input(
 	mbuf_t				m,
 	__unused char		*frame_header)
 {
-	errno_t error;
 	struct gif_softc *sc = ifnet_softc(ifp);
 	
 	bpf_tap_in(ifp, 0, m, &sc->gif_proto, sizeof(sc->gif_proto));
@@ -505,8 +563,11 @@ gif_input(
 	 * it occurs more times than we thought, we may change the policy
 	 * again.
 	 */
-	error = proto_input(protocol_family, m);
-	ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len, 0);
+	if (proto_input(protocol_family, m) != 0) {
+		ifnet_stat_increment_in(ifp, 0, 0, 1);
+		m_freem(m);
+	} else
+		ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len, 0);
 
 	return (0);
 }
@@ -716,11 +777,6 @@ gif_ioctl(
 
 		ifnet_set_flags(ifp, IFF_RUNNING | IFF_UP, IFF_RUNNING | IFF_UP);
 		
-#ifdef __APPLE__
-		/* Make sure at least one unused device is still available */
-		gif_create_dev();
-#endif
-
 		error = 0;
 		break;
 
@@ -839,7 +895,6 @@ gif_ioctl(
 	return error;
 }
 
-#ifndef __APPLE__
 /* This function is not used in our stack */
 void
 gif_delete_tunnel(sc)
@@ -857,4 +912,3 @@ gif_delete_tunnel(sc)
 	}
 	/* change the IFF_UP flag as well? */
 }
-#endif
diff --git a/bsd/net/if_gif.h b/bsd/net/if_gif.h
index dc193f74c..dfba33645 100644
--- a/bsd/net/if_gif.h
+++ b/bsd/net/if_gif.h
@@ -90,6 +90,7 @@ struct gif_softc {
 	TAILQ_ENTRY(gif_softc) gif_link; /* all gif's are linked */
 	bpf_tap_mode	tap_mode;
 	bpf_packet_func tap_callback;
+	char 	gif_ifname[IFNAMSIZ];
 };
 
 #define gif_ro gifsc_gifscr.gifscr_ro
diff --git a/bsd/net/if_llreach.c b/bsd/net/if_llreach.c
new file mode 100644
index 000000000..669beb0f4
--- /dev/null
+++ b/bsd/net/if_llreach.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * Link-layer Reachability Record
+ *
+ * Each interface maintains a red-black tree which contains records related
+ * to the on-link nodes which we are interested in communicating with.  Each
+ * record gets allocated and inserted into the tree in the following manner:
+ * upon processing an ARP announcement or reply from a known node (i.e. there
+ * exists a ARP route entry for the node), and if a link-layer reachability
+ * record for the node doesn't yet exist; and, upon processing a ND6 RS/RA/
+ * NS/NA/redirect from a node, and if a link-layer reachability record for the
+ * node doesn't yet exist.
+ *
+ * Each newly created record is then referred to by the resolver route entry;
+ * if a record already exists, its reference count gets increased for the new
+ * resolver entry which now refers to it.  A record gets removed from the tree
+ * and freed once its reference counts drops to zero, i.e. when there is no
+ * more resolver entry referring to it.
+ *
+ * A record contains the link-layer protocol (e.g. Ethertype IP/IPv6), the
+ * HW address of the sender, the "last heard from" timestamp (lr_lastrcvd) and
+ * the number of references made to it (lr_reqcnt).  Because the key for each
+ * record in the red-black tree consists of the link-layer protocol, therefore
+ * the namespace for the records is partitioned based on the type of link-layer
+ * protocol, i.e. an Ethertype IP link-layer record is only referred to by one
+ * or more ARP entries; an Ethernet IPv6 link-layer record is only referred to
+ * by one or more ND6 entries.  Therefore, lr_reqcnt represents the number of
+ * resolver entry references to the record for the same protocol family.
+ *
+ * Upon receiving packets from the network, the protocol's input callback
+ * (e.g. ether_inet{6}_input) informs the corresponding resolver (ARP/ND6)
+ * about the (link-layer) origin of the packet.  This results in searching
+ * for a matching record in the red-black tree for the interface where the
+ * packet arrived on.  If there's no match, no further processing takes place.
+ * Otherwise, the lr_lastrcvd timestamp of the record is updated.
+ *
+ * When an IP/IPv6 packet is transmitted to the resolver (i.e. the destination
+ * is on-link), ARP/ND6 records the "last spoken to" timestamp in the route
+ * entry ({la,ln}_lastused).
+ *
+ * The reachability of the on-link node is determined by the following logic,
+ * upon sending a packet thru the resolver:
+ *
+ *   a) If the record is only used by exactly one resolver entry (lr_reqcnt
+ *	is 1), i.e. the target host does not have IP/IPv6 aliases that we know
+ *	of, check if lr_lastrcvd is "recent."  If so, simply send the packet;
+ *	otherwise, re-resolve the target node.
+ *
+ *   b) If the record is shared by multiple resolver entries (lr_reqcnt is
+ *	greater than 1), i.e. the target host has more than one IP/IPv6 aliases
+ *	on the same network interface, we can't rely on lr_lastrcvd alone, as
+ *	one of the IP/IPv6 aliases could have been silently moved to another
+ *	node for which we don't have a link-layer record.  If lr_lastrcvd is
+ *	not "recent", we re-resolve the target node.  Otherwise, we perform
+ *	an additional check against {la,ln}_lastused to see whether it is also
+ *	"recent", relative to lr_lastrcvd.  If so, simply send the packet;
+ *	otherwise, re-resolve the target node.
+ *
+ * The value for "recent" is configurable by adjusting the basetime value for
+ * net.link.ether.inet.arp_llreach_base or net.inet6.icmp6.nd6_llreach_base.
+ * The default basetime value is 30 seconds, and the actual expiration time
+ * is calculated by multiplying the basetime value with some random factor,
+ * which results in a number between 15 to 45 seconds.  Setting the basetime
+ * value to 0 effectively disables this feature for the corresponding resolver.
+ *
+ * Assumptions:
+ *
+ * The above logic is based upon the following assumptions:
+ *
+ *   i) Network traffics are mostly bi-directional, i.e. the act of sending
+ *	packets to an on-link node would most likely cause us to receive
+ *	packets from that node.
+ *
+ *  ii) If the on-link node's IP/IPv6 address silently moves to another
+ *	on-link node for which we are not aware of, non-unicast packets
+ *	from the old node would trigger the record's lr_lastrcvd to be
+ *	kept recent.
+ *
+ * We can mitigate the above by having the resolver check its {la,ln}_lastused
+ * timestamp at all times, i.e. not only when lr_reqcnt is greater than 1; but
+ * we currently optimize for the common cases.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/tree.h>
+#include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
+
+#include <net/if_dl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_llreach.h>
+#include <net/dlil.h>
+
+#include <kern/assert.h>
+#include <kern/locks.h>
+#include <kern/zalloc.h>
+
+#if INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+static unsigned int iflr_size;		/* size of if_llreach */
+static struct zone *iflr_zone;		/* zone for if_llreach */
+
+#define	IFLR_ZONE_MAX		128		/* maximum elements in zone */
+#define	IFLR_ZONE_NAME		"if_llreach"	/* zone name */
+
+static struct if_llreach *iflr_alloc(int);
+static void iflr_free(struct if_llreach *);
+static __inline int iflr_cmp(const struct if_llreach *,
+    const struct if_llreach *);
+static __inline int iflr_reachable(struct if_llreach *, int, u_int64_t);
+static int sysctl_llreach_ifinfo SYSCTL_HANDLER_ARGS;
+
+/* The following is protected by if_llreach_lock */
+RB_GENERATE_PREV(ll_reach_tree, if_llreach, lr_link, iflr_cmp);
+
+SYSCTL_DECL(_net_link_generic_system);
+
+SYSCTL_NODE(_net_link_generic_system, OID_AUTO, llreach_info,
+    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_llreach_ifinfo,
+    "Per-interface tree of source link-layer reachability records");
+
+/*
+ * Link-layer reachability is based off node constants in RFC4861.
+ */
+#if INET6
+#define	LL_COMPUTE_RTIME(x)	ND_COMPUTE_RTIME(x)
+#else
+#define LL_MIN_RANDOM_FACTOR	512	/* 1024 * 0.5 */
+#define LL_MAX_RANDOM_FACTOR	1536	/* 1024 * 1.5 */
+#define LL_COMPUTE_RTIME(x)						\
+	(((LL_MIN_RANDOM_FACTOR * (x >> 10)) + (random() &		\
+	((LL_MAX_RANDOM_FACTOR - LL_MIN_RANDOM_FACTOR) * (x >> 10)))) / 1000)
+#endif /* !INET6 */
+
+void
+ifnet_llreach_init(void)
+{
+	iflr_size = sizeof (struct if_llreach);
+	iflr_zone = zinit(iflr_size,
+	    IFLR_ZONE_MAX * iflr_size, 0, IFLR_ZONE_NAME);
+	if (iflr_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IFLR_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(iflr_zone, Z_EXPAND, TRUE);
+	zone_change(iflr_zone, Z_CALLERACCT, FALSE);
+}
+
+void
+ifnet_llreach_ifattach(struct ifnet *ifp, boolean_t reuse)
+{
+	lck_rw_lock_exclusive(&ifp->if_llreach_lock);
+	/* Initialize link-layer source tree (if not already) */
+	if (!reuse)
+		RB_INIT(&ifp->if_ll_srcs);
+	lck_rw_done(&ifp->if_llreach_lock);
+}
+
+void
+ifnet_llreach_ifdetach(struct ifnet *ifp)
+{
+#pragma unused(ifp)
+	/*
+	 * Nothing to do for now; the link-layer source tree might
+	 * contain entries at this point, that are still referred
+	 * to by route entries pointing to this ifp.
+	 */
+}
+
+/*
+ * Link-layer source tree comparison function.
+ *
+ * An ordered predicate is necessary; bcmp() is not documented to return
+ * an indication of order, memcmp() is, and is an ISO C99 requirement.
+ */
+static __inline int
+iflr_cmp(const struct if_llreach *a, const struct if_llreach *b)
+{
+	return (memcmp(&a->lr_key, &b->lr_key, sizeof (a->lr_key)));
+}
+
+static __inline int
+iflr_reachable(struct if_llreach *lr, int cmp_delta, u_int64_t tval)
+{
+	u_int64_t now;
+	u_int64_t expire;
+
+	now = net_uptime();		/* current approx. uptime */
+	/*
+	 * No need for lr_lock; atomically read the last rcvd uptime.
+	 */
+	expire = lr->lr_lastrcvd + lr->lr_reachable;
+	/*
+	 * If we haven't heard back from the local host for over
+	 * lr_reachable seconds, consider that the host is no
+	 * longer reachable.
+	 */
+	if (!cmp_delta)
+		return (expire >= now);
+	/*
+	 * If the caller supplied a reference time, consider the
+	 * host is reachable if the record hasn't expired (see above)
+	 * and if the reference time is within the past lr_reachable
+	 * seconds.
+	 */
+	return ((expire >= now) && (now - tval) < lr->lr_reachable);
+}
+
+int
+ifnet_llreach_reachable(struct if_llreach *lr)
+{
+	/*
+	 * Check whether the cache is too old to be trusted.
+	 */
+	return (iflr_reachable(lr, 0, 0));
+}
+
+int
+ifnet_llreach_reachable_delta(struct if_llreach *lr, u_int64_t tval)
+{
+	/*
+	 * Check whether the cache is too old to be trusted.
+	 */
+	return (iflr_reachable(lr, 1, tval));
+}
+
+void
+ifnet_llreach_set_reachable(struct ifnet *ifp, u_int16_t llproto, void *addr,
+    unsigned int alen)
+{
+	struct if_llreach find, *lr;
+
+	VERIFY(alen == IF_LLREACH_MAXLEN);	/* for now */
+
+	find.lr_key.proto = llproto;
+	bcopy(addr, &find.lr_key.addr, IF_LLREACH_MAXLEN);
+
+	lck_rw_lock_shared(&ifp->if_llreach_lock);
+	lr = RB_FIND(ll_reach_tree, &ifp->if_ll_srcs, &find);
+	if (lr == NULL) {
+		lck_rw_done(&ifp->if_llreach_lock);
+		return;
+	}
+	/*
+	 * No need for lr_lock; atomically update the last rcvd uptime.
+	 */
+	lr->lr_lastrcvd = net_uptime();
+	lck_rw_done(&ifp->if_llreach_lock);
+}
+
+struct if_llreach *
+ifnet_llreach_alloc(struct ifnet *ifp, u_int16_t llproto, void *addr,
+    unsigned int alen, u_int64_t llreach_base)
+{
+	struct if_llreach find, *lr;
+	struct timeval now;
+
+	if (llreach_base == 0)
+		return (NULL);
+
+	VERIFY(alen == IF_LLREACH_MAXLEN);	/* for now */
+
+	find.lr_key.proto = llproto;
+	bcopy(addr, &find.lr_key.addr, IF_LLREACH_MAXLEN);
+
+	lck_rw_lock_shared(&ifp->if_llreach_lock);
+	lr = RB_FIND(ll_reach_tree, &ifp->if_ll_srcs, &find);
+	if (lr != NULL) {
+found:
+		IFLR_LOCK(lr);
+		VERIFY(lr->lr_reqcnt >= 1);
+		lr->lr_reqcnt++;
+		VERIFY(lr->lr_reqcnt != 0);
+		IFLR_ADDREF_LOCKED(lr);		/* for caller */
+		lr->lr_lastrcvd = net_uptime();	/* current approx. uptime */
+		IFLR_UNLOCK(lr);
+		lck_rw_done(&ifp->if_llreach_lock);
+		return (lr);
+	}
+
+	if (!lck_rw_lock_shared_to_exclusive(&ifp->if_llreach_lock))
+		lck_rw_lock_exclusive(&ifp->if_llreach_lock);
+
+	lck_rw_assert(&ifp->if_llreach_lock, LCK_RW_ASSERT_EXCLUSIVE);
+
+	/* in case things have changed while becoming writer */
+	lr = RB_FIND(ll_reach_tree, &ifp->if_ll_srcs, &find);
+	if (lr != NULL)
+		goto found;
+
+	lr = iflr_alloc(M_WAITOK);
+	if (lr == NULL) {
+		lck_rw_done(&ifp->if_llreach_lock);
+		return (NULL);
+	}
+	IFLR_LOCK(lr);
+	lr->lr_reqcnt++;
+	VERIFY(lr->lr_reqcnt == 1);
+	IFLR_ADDREF_LOCKED(lr);			/* for RB tree */
+	IFLR_ADDREF_LOCKED(lr);			/* for caller */
+	lr->lr_lastrcvd = net_uptime();		/* current approx. uptime */
+	lr->lr_baseup = lr->lr_lastrcvd;	/* base uptime */
+	microtime(&now);
+	lr->lr_basecal = now.tv_sec;		/* base calendar time */
+	lr->lr_basereachable = llreach_base;
+	lr->lr_reachable = LL_COMPUTE_RTIME(lr->lr_basereachable * 1000);
+	lr->lr_debug |= IFD_ATTACHED;
+	lr->lr_ifp = ifp;
+	lr->lr_key.proto = llproto;
+	bcopy(addr, &lr->lr_key.addr, IF_LLREACH_MAXLEN);
+	RB_INSERT(ll_reach_tree, &ifp->if_ll_srcs, lr);
+	IFLR_UNLOCK(lr);
+	lck_rw_done(&ifp->if_llreach_lock);
+
+	return (lr);
+}
+
+void
+ifnet_llreach_free(struct if_llreach *lr)
+{
+	struct ifnet *ifp;
+
+	/* no need to lock here; lr_ifp never changes */
+	ifp = lr->lr_ifp;
+
+	lck_rw_lock_exclusive(&ifp->if_llreach_lock);
+	IFLR_LOCK(lr);
+	if (lr->lr_reqcnt == 0) {
+		panic("%s: lr=%p negative reqcnt", __func__, lr);
+		/* NOTREACHED */
+	}
+	--lr->lr_reqcnt;
+	if (lr->lr_reqcnt > 0) {
+		IFLR_UNLOCK(lr);
+		lck_rw_done(&ifp->if_llreach_lock);
+		IFLR_REMREF(lr);		/* for caller */
+		return;
+	}
+	if (!(lr->lr_debug & IFD_ATTACHED)) {
+		panic("%s: Attempt to detach an unattached llreach lr=%p",
+		    __func__, lr);
+		/* NOTREACHED */
+	}
+	lr->lr_debug &= ~IFD_ATTACHED;
+	RB_REMOVE(ll_reach_tree, &ifp->if_ll_srcs, lr);
+	IFLR_UNLOCK(lr);
+	lck_rw_done(&ifp->if_llreach_lock);
+
+	IFLR_REMREF(lr);			/* for RB tree */
+	IFLR_REMREF(lr);			/* for caller */
+}
+
+u_int64_t
+ifnet_llreach_up2cal(struct if_llreach *lr, u_int64_t uptime)
+{
+	u_int64_t calendar = 0;
+
+	if (uptime != 0) {
+		struct timeval cnow;
+		u_int64_t unow;
+
+		getmicrotime(&cnow);	/* current calendar time */
+		unow = net_uptime();	/* current approx. uptime */
+		/*
+		 * Take into account possible calendar time changes;
+		 * adjust base calendar value if necessary, i.e.
+		 * the calendar skew should equate to the uptime skew.
+		 */
+		lr->lr_basecal += (cnow.tv_sec - lr->lr_basecal) -
+		    (unow - lr->lr_baseup);
+
+		calendar = lr->lr_basecal + lr->lr_reachable +
+		    (uptime - lr->lr_baseup);
+	}
+
+	return (calendar);
+}
+
+static struct if_llreach *
+iflr_alloc(int how)
+{
+	struct if_llreach *lr;
+
+	lr = (how == M_WAITOK) ? zalloc(iflr_zone) : zalloc_noblock(iflr_zone);
+	if (lr != NULL) {
+		bzero(lr, iflr_size);
+		lck_mtx_init(&lr->lr_lock, ifnet_lock_group, ifnet_lock_attr);
+		lr->lr_debug |= IFD_ALLOC;
+	}
+	return (lr);
+}
+
+static void
+iflr_free(struct if_llreach *lr)
+{
+	IFLR_LOCK(lr);
+	if (lr->lr_debug & IFD_ATTACHED) {
+		panic("%s: attached lr=%p is being freed", __func__, lr);
+		/* NOTREACHED */
+	} else if (!(lr->lr_debug & IFD_ALLOC)) {
+		panic("%s: lr %p cannot be freed", __func__, lr);
+		/* NOTREACHED */
+	} else if (lr->lr_refcnt != 0) {
+		panic("%s: non-zero refcount lr=%p", __func__, lr);
+		/* NOTREACHED */
+	} else if (lr->lr_reqcnt != 0) {
+		panic("%s: non-zero reqcnt lr=%p", __func__, lr);
+		/* NOTREACHED */
+	}
+	lr->lr_debug &= ~IFD_ALLOC;
+	IFLR_UNLOCK(lr);
+
+	lck_mtx_destroy(&lr->lr_lock, ifnet_lock_group);
+	zfree(iflr_zone, lr);
+}
+
+void
+iflr_addref(struct if_llreach *lr, int locked)
+{
+	if (!locked)
+		IFLR_LOCK(lr);
+	else
+		IFLR_LOCK_ASSERT_HELD(lr);
+
+	if (++lr->lr_refcnt == 0) {
+		panic("%s: lr=%p wraparound refcnt", __func__, lr);
+		/* NOTREACHED */
+	}
+	if (!locked)
+		IFLR_UNLOCK(lr);
+}
+
+void
+iflr_remref(struct if_llreach *lr)
+{
+	IFLR_LOCK(lr);
+	if (lr->lr_refcnt == 0) {
+		panic("%s: lr=%p negative refcnt", __func__, lr);
+		/* NOTREACHED */
+	}
+	--lr->lr_refcnt;
+	if (lr->lr_refcnt > 0) {
+		IFLR_UNLOCK(lr);
+		return;
+	}
+	IFLR_UNLOCK(lr);
+
+	iflr_free(lr);	/* deallocate it */
+}
+
+void
+ifnet_lr2ri(struct if_llreach *lr, struct rt_reach_info *ri)
+{
+	struct if_llreach_info lri;
+
+	IFLR_LOCK_ASSERT_HELD(lr);
+
+	bzero(ri, sizeof (*ri));
+	ifnet_lr2lri(lr, &lri);
+	ri->ri_refcnt = lri.lri_refcnt;
+	ri->ri_probes = lri.lri_probes;
+	ri->ri_rcv_expire = lri.lri_expire;
+}
+
+void
+ifnet_lr2lri(struct if_llreach *lr, struct if_llreach_info *lri)
+{
+	IFLR_LOCK_ASSERT_HELD(lr);
+
+	bzero(lri, sizeof (*lri));
+	/*
+	 * Note here we return request count, not actual memory refcnt.
+	 */
+	lri->lri_refcnt	= lr->lr_reqcnt;
+	lri->lri_ifindex = lr->lr_ifp->if_index;
+	lri->lri_probes	= lr->lr_probes;
+	lri->lri_expire = ifnet_llreach_up2cal(lr, lr->lr_lastrcvd);
+	lri->lri_proto = lr->lr_key.proto;
+	bcopy(&lr->lr_key.addr, &lri->lri_addr, IF_LLREACH_MAXLEN);
+}
+
+static int
+sysctl_llreach_ifinfo SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+	int		*name, retval = 0;
+	unsigned int	namelen;
+	uint32_t	ifindex;
+	struct if_llreach *lr;
+	struct if_llreach_info lri;
+	struct ifnet	*ifp;
+
+	name = (int *)arg1;
+	namelen = (unsigned int)arg2;
+
+	if (req->newptr != USER_ADDR_NULL)
+		return (EPERM);
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	ifindex = name[0];
+	ifnet_head_lock_shared();
+	if (ifindex <= 0 || ifindex > (u_int)if_index) {
+		printf("%s: ifindex %u out of range\n", __func__, ifindex);
+		ifnet_head_done();
+		return (ENOENT);
+	}
+
+	ifp = ifindex2ifnet[ifindex];
+	ifnet_head_done();
+	if (ifp == NULL) {
+		printf("%s: no ifp for ifindex %u\n", __func__, ifindex);
+		return (ENOENT);
+	}
+
+	lck_rw_lock_shared(&ifp->if_llreach_lock);
+	RB_FOREACH(lr, ll_reach_tree, &ifp->if_ll_srcs) {
+		/* Export to if_llreach_info structure */
+		IFLR_LOCK(lr);
+		ifnet_lr2lri(lr, &lri);
+		IFLR_UNLOCK(lr);
+
+		if ((retval = SYSCTL_OUT(req, &lri, sizeof (lri))) != 0)
+			break;
+	}
+	lck_rw_done(&ifp->if_llreach_lock);
+
+	return (retval);
+}
diff --git a/bsd/net/if_llreach.h b/bsd/net/if_llreach.h
new file mode 100644
index 000000000..e922fb0e4
--- /dev/null
+++ b/bsd/net/if_llreach.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef	_NET_IF_LLREACH_H_
+#define	_NET_IF_LLREACH_H_
+
+#ifdef PRIVATE
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+/*
+ * Per-interface link-layer reachability information (private).
+ */
+#define	IF_LLREACHINFO_ADDRLEN	64		/* max ll addr len */
+
+struct if_llreach_info {
+	u_int32_t		lri_refcnt;	/* reference count */
+	u_int32_t		lri_ifindex;	/* interface index */
+	u_int64_t		lri_expire;	/* expiration (calendar) time */
+	u_int32_t		lri_probes;	/* total # of probes */
+	u_int16_t		lri_reserved;	/* for future use */
+	u_int16_t		lri_proto;	/* ll proto */
+	u_int8_t		lri_addr[IF_LLREACHINFO_ADDRLEN]; /* ll addr */
+};
+
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/tree.h>
+#include <kern/lock.h>
+#include <net/ethernet.h>
+#include <netinet/in.h>
+#if INET6
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+#endif /* INET6 */
+
+/*
+ * Link-layer reachability is based off node constants in RFC4861.
+ */
+#if INET6
+#define	LL_BASE_REACHABLE	REACHABLE_TIME
+#else
+#define	LL_BASE_REACHABLE	30000	/* msec */
+#endif /* !INET6 */
+
+/*
+ * Per-interface link-layer reachability.  (Currently only for ARP/Ethernet.)
+ */
+#define	IF_LLREACH_MAXLEN	ETHER_ADDR_LEN
+
+struct if_llreach {
+	decl_lck_mtx_data(, lr_lock);
+	RB_ENTRY(if_llreach)	lr_link;	/* RB tree links */
+	struct ifnet		*lr_ifp;	/* back pointer to ifnet */
+	u_int32_t		lr_refcnt;	/* reference count */
+	u_int32_t		lr_reqcnt;	/* RB tree request count */
+	u_int32_t		lr_debug;	/* see ifa_debug flags */
+	u_int32_t		lr_probes;	/* number of probes so far */
+	u_int64_t		lr_basecal;	/* base calendar time */
+	u_int64_t		lr_baseup;	/* base uptime */
+	u_int64_t		lr_lastrcvd;	/* last-heard-of timestamp */
+	u_int32_t		lr_basereachable; /* baseline time */
+	u_int32_t		lr_reachable;	/* reachable time */
+	struct lr_key_s {
+		u_int16_t	proto;		/* ll proto */
+		u_int8_t	addr[IF_LLREACH_MAXLEN]; /* ll addr */
+	} lr_key;
+};
+
+RB_PROTOTYPE_SC_PREV(__private_extern__, ll_reach_tree, if_llreach,
+    ls_link, ifllr_cmp);
+
+#define	IFLR_LOCK_ASSERT_HELD(_iflr)					\
+	lck_mtx_assert(&(_iflr)->lr_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IFLR_LOCK_ASSERT_NOTHELD(_iflr)				\
+	lck_mtx_assert(&(_iflr)->lr_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IFLR_LOCK(_iflr)						\
+	lck_mtx_lock(&(_iflr)->lr_lock)
+
+#define	IFLR_LOCK_SPIN(_iflr)						\
+	lck_mtx_lock_spin(&(_iflr)->lr_lock)
+
+#define	IFLR_CONVERT_LOCK(_iflr) do {					\
+	IFLR_LOCK_ASSERT_HELD(_iflr);					\
+	lck_mtx_convert_spin(&(_iflr)->lr_lock);			\
+} while (0)
+
+#define	IFLR_UNLOCK(_iflr)						\
+	lck_mtx_unlock(&(_iflr)->lr_lock)
+
+#define	IFLR_ADDREF(_iflr)						\
+	iflr_addref(_iflr, 0)
+
+#define	IFLR_ADDREF_LOCKED(_iflr)					\
+	iflr_addref(_iflr, 1)
+
+#define	IFLR_REMREF(_iflr)						\
+	iflr_remref(_iflr)
+
+extern void ifnet_llreach_init(void);
+extern void ifnet_llreach_ifattach(struct ifnet *, boolean_t);
+extern void ifnet_llreach_ifdetach(struct ifnet *);
+extern struct if_llreach *ifnet_llreach_alloc(struct ifnet *, u_int16_t, void *,
+    unsigned int, u_int64_t);
+extern void ifnet_llreach_free(struct if_llreach *);
+extern int ifnet_llreach_reachable(struct if_llreach *);
+extern int ifnet_llreach_reachable_delta(struct if_llreach *, u_int64_t);
+extern void ifnet_llreach_set_reachable(struct ifnet *, u_int16_t, void *,
+    unsigned int);
+extern u_int64_t ifnet_llreach_up2cal(struct if_llreach *, u_int64_t);
+extern void ifnet_lr2ri(struct if_llreach *, struct rt_reach_info *);
+extern void ifnet_lr2lri(struct if_llreach *, struct if_llreach_info *);
+extern void iflr_addref(struct if_llreach *, int);
+extern void iflr_remref(struct if_llreach *);
+#endif /* XNU_KERNEL_PRIVATE */
+
+#ifdef  __cplusplus
+}
+#endif
+#endif /* PRIVATE */
+#endif /* !_NET_IF_LLREACH_H_ */
diff --git a/bsd/net/if_loop.c b/bsd/net/if_loop.c
index f62bdc362..5ba5b11a5 100644
--- a/bsd/net/if_loop.c
+++ b/bsd/net/if_loop.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,7 @@
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/mcache.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -106,11 +107,6 @@
 extern struct ifqueue atalkintrq;
 #endif
 
-#include "bpfilter.h"
-#if NBPFILTER > 0
-#include <net/bpfdesc.h>
-#endif
-
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
 #endif
@@ -214,11 +210,11 @@ lo_output(
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = ifp;
 
-		ifp->if_ibytes += m->m_pkthdr.len;
-		ifp->if_obytes += m->m_pkthdr.len;
+		atomic_add_64(&ifp->if_ibytes, m->m_pkthdr.len);
+		atomic_add_64(&ifp->if_obytes, m->m_pkthdr.len);
 
-		ifp->if_opackets++;
-		ifp->if_ipackets++;
+		atomic_add_64(&ifp->if_opackets, 1);
+		atomic_add_64(&ifp->if_ipackets, 1);
 
 		m->m_pkthdr.header = mtod(m, char *);
 		if (apple_hwcksum_tx != 0) {
@@ -339,7 +335,9 @@ loioctl(
 	case SIOCSIFADDR:
 		ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING);
 		ifa = (struct ifaddr *)data;
+		IFA_LOCK_SPIN(ifa);
 		ifa->ifa_rtrequest = lortrequest;
+		IFA_UNLOCK(ifa);
 		/*
 		 * Everything else is done at a higher level.
 		 */
@@ -475,7 +473,9 @@ More than one loopback interface is not supported.
 	
 	ifnet_set_mtu(lo_ifp, LOMTU);
 	ifnet_set_flags(lo_ifp, IFF_LOOPBACK | IFF_MULTICAST, IFF_LOOPBACK | IFF_MULTICAST);
-	ifnet_set_offload(lo_ifp, IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_MULTIPAGES);
+	ifnet_set_offload(lo_ifp, IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
+		IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_IPV6_FRAGMENT |
+		IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_MULTIPAGES);
 	ifnet_set_hdrlen(lo_ifp, sizeof(struct loopback_header));
 	ifnet_set_eflags(lo_ifp, IFEF_SENDLIST, IFEF_SENDLIST);
 
diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h
index 12cbc871b..32afe224d 100644
--- a/bsd/net/if_media.h
+++ b/bsd/net/if_media.h
@@ -221,7 +221,7 @@ int	ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr,
 #define IFM_FDX		0x00100000	/* Force full duplex */
 #define	IFM_HDX		0x00200000	/* Force half duplex */
 #define	IFM_FLOW	0x00400000	/* enable hardware flow control */
-#define IFM_EEE		0x00800000	/* Support energy efficient ethernet */
+#define IFM_EEE		0x00800000	/* Driver defined flag */
 #define IFM_FLAG0	0x01000000	/* Driver defined flag */
 #define IFM_FLAG1	0x02000000	/* Driver defined flag */
 #define IFM_FLAG2	0x04000000	/* Driver defined flag */
diff --git a/bsd/net/if_mib.c b/bsd/net/if_mib.c
index b21529b2a..9ab76f698 100644
--- a/bsd/net/if_mib.c
+++ b/bsd/net/if_mib.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -67,8 +67,6 @@
 #include <net/if_mib.h>
 #include <net/if_var.h>
 
-#if NETMIBS
-
 /*
  * A sysctl(3) MIB for generic interface information.  This information
  * is exported in the net.link.generic branch, which has the following
@@ -97,31 +95,17 @@ SYSCTL_DECL(_net_link_generic);
 SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RD|CTLFLAG_LOCKED, 0,
 	    "Variables global to all interfaces");
 
-SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD,
+SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &if_index, 0, "Number of configured interfaces");
 
 static int sysctl_ifdata SYSCTL_HANDLER_ARGS;
-SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RD,
+SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RD | CTLFLAG_LOCKED,
             sysctl_ifdata, "Interface table");
 
 static int sysctl_ifalldata SYSCTL_HANDLER_ARGS;
-SYSCTL_NODE(_net_link_generic, IFMIB_IFALLDATA, ifalldata, CTLFLAG_RD,
+SYSCTL_NODE(_net_link_generic, IFMIB_IFALLDATA, ifalldata, CTLFLAG_RD | CTLFLAG_LOCKED,
             sysctl_ifalldata, "Interface table");
 
-extern int dlil_multithreaded_input;
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, multi_threaded_input, CTLFLAG_RW,
-		    &dlil_multithreaded_input , 0, "Uses multiple input thread for DLIL input");
-#ifdef IFNET_INPUT_SANITY_CHK
-extern int dlil_input_sanity_check;
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, CTLFLAG_RW,
-		    &dlil_input_sanity_check , 0, "Turn on sanity checking in DLIL input");
-#endif
-
-extern int dlil_verbose;
-SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW,
-           &dlil_verbose, 0, "Log DLIL error messages");
-
-
 static int make_ifmibdata(struct ifnet *, int *, struct sysctl_req *);
 
 int 
@@ -140,7 +124,7 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
 		/*
 		 * Make sure the interface is in use
 		 */
-		if (ifp->if_refcnt > 0) {
+		if (ifnet_is_attached(ifp, 0)) {
 			snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d",
 				ifp->if_name, ifp->if_unit);
 	
@@ -191,11 +175,14 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
 #endif /* IF_MIB_WR */
 		break;
 
-#if PKT_PRIORITY
-	case IFDATA_SUPPLEMENTAL:
-		error = SYSCTL_OUT(req, &ifp->if_tc, sizeof(struct if_traffic_class));
+	case IFDATA_SUPPLEMENTAL: {
+		struct if_traffic_class if_tc;
+
+		if_copy_traffic_class(ifp, &if_tc);
+		
+		error = SYSCTL_OUT(req, &if_tc, sizeof(struct if_traffic_class));
 		break;
-#endif /* PKT_PRIORITY */
+	}
 	}
 	
 	return error;
@@ -211,23 +198,24 @@ sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */
 	struct ifnet *ifp;
 
 	if (namelen != 2)
-		return EINVAL;
+		return (EINVAL);
+
 	ifnet_head_lock_shared();
 	if (name[0] <= 0 || name[0] > if_index ||
-	    (ifp = ifindex2ifnet[name[0]]) == NULL ||
-	    ifp->if_refcnt == 0) {
+	    (ifp = ifindex2ifnet[name[0]]) == NULL) {
 		ifnet_head_done();
-		return ENOENT;
+		return (ENOENT);
 	}
+	ifnet_reference(ifp);
 	ifnet_head_done();
 
 	ifnet_lock_shared(ifp);
-	
 	error = make_ifmibdata(ifp, name, req);
-	
 	ifnet_lock_done(ifp);
-	
-	return error;
+
+	ifnet_release(ifp);
+
+	return (error);
 }
 
 int
@@ -240,20 +228,18 @@ sysctl_ifalldata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */
 	struct ifnet *ifp;
 
 	if (namelen != 2)
-		return EINVAL;
+		return (EINVAL);
 
 	ifnet_head_lock_shared();
 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
 		ifnet_lock_shared(ifp);
-		
+
 		error = make_ifmibdata(ifp, name, req);
-		
+
 		ifnet_lock_done(ifp);
-		if (error)
+		if (error != 0)
 			break;
 	}
 	ifnet_head_done();
 	return error;
 }
-
-#endif
diff --git a/bsd/net/if_mib.h b/bsd/net/if_mib.h
index 36d2667a4..5b773bddf 100644
--- a/bsd/net/if_mib.h
+++ b/bsd/net/if_mib.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
diff --git a/bsd/net/if_pflog.c b/bsd/net/if_pflog.c
index 8e7480911..ae2f9254c 100644
--- a/bsd/net/if_pflog.c
+++ b/bsd/net/if_pflog.c
@@ -68,6 +68,7 @@
 #include <sys/proc_internal.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <sys/mcache.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -344,8 +345,8 @@ pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir,
 	}
 #endif /* INET */
 
-	ifn->if_opackets++;
-	ifn->if_obytes += m->m_pkthdr.len;
+	atomic_add_64(&ifn->if_opackets, 1);
+	atomic_add_64(&ifn->if_obytes, m->m_pkthdr.len);
 
 	switch (dir) {
 	case PF_IN:
diff --git a/bsd/net/if_stf.c b/bsd/net/if_stf.c
index 96b9664b5..c9d24e249 100644
--- a/bsd/net/if_stf.c
+++ b/bsd/net/if_stf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -120,6 +120,8 @@
 
 #include <sys/malloc.h>
 
+#include <kern/locks.h>
+
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_types.h>
@@ -148,9 +150,10 @@
 #include <security/mac_framework.h>
 #endif
 
-#define IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
 #define GET_V4(x)	((const struct in_addr *)(&(x)->s6_addr16[1]))
 
+static lck_grp_t *stf_mtx_grp;
+
 struct stf_softc {
 	ifnet_t				sc_if;	   /* common area */
 	u_int32_t				sc_protocol_family; /* dlil protocol attached */
@@ -159,6 +162,7 @@ struct stf_softc {
 		struct route_in6 __sc_ro6; /* just for safety */
 	} __sc_ro46;
 #define sc_ro	__sc_ro46.__sc_ro4
+	decl_lck_mtx_data(, sc_ro_mtx);
 	const struct encaptab *encap_cookie;
 	bpf_tap_mode		tap_mode;
 	bpf_packet_func		tap_callback;
@@ -167,14 +171,16 @@ struct stf_softc {
 void stfattach (void);
 
 static int ip_stf_ttl = 40;
+static int stf_init_done;
 
 static void in_stf_input(struct mbuf *, int);
+static void stfinit(void);
 extern  struct domain inetdomain;
 struct protosw in_stf_protosw =
 { SOCK_RAW,	&inetdomain,	IPPROTO_IPV6,	PR_ATOMIC|PR_ADDR,
   in_stf_input, NULL,	NULL,		rip_ctloutput,
   NULL,
-  NULL,		NULL,	NULL,	NULL,
+  NULL,	NULL,	NULL,	NULL,
   NULL,
   &rip_usrreqs,
   NULL,		rip_unlock,	NULL, {NULL, NULL}, NULL, {0}
@@ -192,6 +198,15 @@ static void stf_rtrequest(int, struct rtentry *, struct sockaddr *);
 static errno_t stf_ioctl(ifnet_t ifp, u_long cmd, void *data);
 static errno_t stf_output(ifnet_t ifp, mbuf_t m);
 
+static void
+stfinit(void)
+{
+	if (!stf_init_done) {
+		stf_mtx_grp = lck_grp_alloc_init("stf", LCK_GRP_ATTR_NULL);
+		stf_init_done = 1;
+	}
+}
+
 /*
  * gif_input is the input handler for IP and IPv6 attached to gif
  */
@@ -202,7 +217,8 @@ stf_media_input(
 	mbuf_t				m,
 	__unused char		*frame_header)
 {
-	proto_input(protocol_family, m);
+	if (proto_input(protocol_family, m) != 0)
+		m_freem(m);
 
 	return (0);
 }
@@ -297,6 +313,8 @@ stfattach(void)
 	const struct encaptab *p;
 	struct ifnet_init_params	stf_init;
 
+	stfinit();
+
 	error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_STF,
 								   stf_attach_inet6, NULL);
 	if (error != 0)
@@ -318,6 +336,7 @@ stfattach(void)
 		return;
 	}
 	sc->encap_cookie = p;
+	lck_mtx_init(&sc->sc_ro_mtx, stf_mtx_grp, LCK_ATTR_NULL);
 	
 	bzero(&stf_init, sizeof(stf_init));
 	stf_init.name = "stf";
@@ -336,6 +355,7 @@ stfattach(void)
 	if (error != 0) {
 		printf("stfattach, ifnet_allocate failed - %d\n", error);
 		encap_detach(sc->encap_cookie);
+		lck_mtx_destroy(&sc->sc_ro_mtx, stf_mtx_grp);
 		FREE(sc, M_DEVBUF);
 		return;
 	}
@@ -355,6 +375,7 @@ stfattach(void)
 		printf("stfattach: ifnet_attach returned error=%d\n", error);
 		encap_detach(sc->encap_cookie);
 		ifnet_release(sc->sc_if);
+		lck_mtx_destroy(&sc->sc_ro_mtx, stf_mtx_grp);
 		FREE(sc, M_DEVBUF);
 		return;
 	}
@@ -404,9 +425,11 @@ stf_encapcheck(
 	 * local 6to4 address.
 	 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
 	 */
+	IFA_LOCK(&ia6->ia_ifa);
 	if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst,
 	    sizeof(ip.ip_dst)) != 0) {
-		ifafree(&ia6->ia_ifa);
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 		return 0;
 	}
 	/*
@@ -421,11 +444,13 @@ stf_encapcheck(
 	b = ip.ip_src;
 	b.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr;
 	if (a.s_addr != b.s_addr) {
-		ifafree(&ia6->ia_ifa);
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 		return 0;
 	}
 	/* stf interface makes single side match only */
-	ifafree(&ia6->ia_ifa);
+	IFA_UNLOCK(&ia6->ia_ifa);
+	IFA_REMREF(&ia6->ia_ifa);
 	return 32;
 }
 
@@ -438,38 +463,46 @@ stf_getsrcifa6(struct ifnet *ifp)
 	struct in_addr in;
 
 	ifnet_lock_shared(ifp);
-	for (ia = ifp->if_addrlist.tqh_first;
-	     ia;
-	     ia = ia->ifa_list.tqe_next)
-	{
-		if (ia->ifa_addr == NULL)
+	for (ia = ifp->if_addrlist.tqh_first; ia; ia = ia->ifa_list.tqe_next) {
+		IFA_LOCK(ia);
+		if (ia->ifa_addr == NULL) {
+			IFA_UNLOCK(ia);
 			continue;
-		if (ia->ifa_addr->sa_family != AF_INET6)
+		}
+		if (ia->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		sin6 = (struct sockaddr_in6 *)ia->ifa_addr;
-		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr))
+		if (!IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
+			IFA_UNLOCK(ia);
 			continue;
-
+		}
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
+		IFA_UNLOCK(ia);
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		for (ia4 = TAILQ_FIRST(&in_ifaddrhead);
 		     ia4;
 		     ia4 = TAILQ_NEXT(ia4, ia_link))
 		{
-			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
+			IFA_LOCK(&ia4->ia_ifa);
+			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) {
+				IFA_UNLOCK(&ia4->ia_ifa);
 				break;
+			}
+			IFA_UNLOCK(&ia4->ia_ifa);
 		}
 		lck_rw_done(in_ifaddr_rwlock);
 		if (ia4 == NULL)
 			continue;
 
-		ifaref(ia);
+		IFA_ADDREF(ia);		/* for caller */
 		ifnet_lock_done(ifp);
-		return (struct in6_ifaddr *)ia;
+		return ((struct in6_ifaddr *)ia);
 	}
 	ifnet_lock_done(ifp);
 
-	return NULL;
+	return (NULL);
 }
 
 int
@@ -491,6 +524,7 @@ stf_pre_output(
 	struct ip6_hdr *ip6;
 	struct in6_ifaddr *ia6;
 	struct sockaddr_in 	*dst4;
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 	errno_t				result = 0;
 
 	sc = ifnet_softc(ifp);
@@ -516,7 +550,7 @@ stf_pre_output(
 		m = m_pullup(m, sizeof(*ip6));
 		if (!m) {
 			*m0 = NULL; /* makes sure this won't be double freed */
-			ifafree(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
 			return ENOBUFS;
 		}
 	}
@@ -532,7 +566,7 @@ stf_pre_output(
 	else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr))
 		in4 = GET_V4(&dst6->sin6_addr);
 	else {
-		ifafree(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 		return ENETUNREACH;
 	}
 
@@ -548,15 +582,17 @@ stf_pre_output(
 		m = m_pullup(m, sizeof(struct ip));
 	if (m == NULL) {
 		*m0 = NULL; 
-		ifafree(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 		return ENOBUFS;
 	}
 	ip = mtod(m, struct ip *);
 
 	bzero(ip, sizeof(*ip));
 
+	IFA_LOCK_SPIN(&ia6->ia_ifa);
 	bcopy(GET_V4(&((struct sockaddr_in6 *)&ia6->ia_addr)->sin6_addr),
 	    &ip->ip_src, sizeof(ip->ip_src));
+	IFA_UNLOCK(&ia6->ia_ifa);
 	bcopy(in4, &ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_p = IPPROTO_IPV6;
 	ip->ip_ttl = ip_stf_ttl;
@@ -566,11 +602,11 @@ stf_pre_output(
 	else
 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
 
+	lck_mtx_lock(&sc->sc_ro_mtx);
 	dst4 = (struct sockaddr_in *)&sc->sc_ro.ro_dst;
 	if (dst4->sin_family != AF_INET ||
 	    bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) {
-		/* cache route doesn't match */
-		printf("stf_output: cached route doesn't match \n");
+		/* cache route doesn't match: always the case during the first use */
 		dst4->sin_family = AF_INET;
 		dst4->sin_len = sizeof(struct sockaddr_in);
 		bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr));
@@ -580,21 +616,15 @@ stf_pre_output(
 		}
 	}
 
-	if (sc->sc_ro.ro_rt == NULL) {
-		rtalloc(&sc->sc_ro);
-		if (sc->sc_ro.ro_rt == NULL) {
-			ifafree(&ia6->ia_ifa);
-			return ENETUNREACH;
-		}
-	}
+	result = ip_output_list(m, 0, NULL, &sc->sc_ro, IP_OUTARGS, NULL, &ipoa);
+	lck_mtx_unlock(&sc->sc_ro_mtx);
 
-	result = ip_output_list(m, 0, NULL, &sc->sc_ro, 0, NULL, NULL);
 	/* Assumption: ip_output will free mbuf on errors */
 	/* All the output processing is done here, don't let stf_output be called */
 	if (result == 0)
 		result = EJUSTRETURN;
 	*m0 = NULL;
-	ifafree(&ia6->ia_ifa);
+	IFA_REMREF(&ia6->ia_ifa);
 	return result;
 }
 static errno_t
@@ -635,12 +665,17 @@ stf_checkaddr4(
 	     ia4;
 	     ia4 = TAILQ_NEXT(ia4, ia_link))
 	{
-		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+		IFA_LOCK(&ia4->ia_ifa);
+		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) {
+			IFA_UNLOCK(&ia4->ia_ifa);
 			continue;
+		}
 		if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+			IFA_UNLOCK(&ia4->ia_ifa);
 			lck_rw_done(in_ifaddr_rwlock);
 			return -1;
 		}
+		IFA_UNLOCK(&ia4->ia_ifa);
 	}
 	lck_rw_done(in_ifaddr_rwlock);
 
@@ -820,7 +855,13 @@ stf_ioctl(
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifa = (struct ifaddr *)data;
-		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
+		if (ifa == NULL) {
+			error = EAFNOSUPPORT;
+			break;
+		}
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			error = EAFNOSUPPORT;
 			break;
 		}
@@ -829,10 +870,16 @@ stf_ioctl(
                         if ( !(ifnet_flags( ifp ) & IFF_UP) ) {
                                 /* do this only if the interface is not already up */
 				ifa->ifa_rtrequest = stf_rtrequest;
+				IFA_UNLOCK(ifa);
 				ifnet_set_flags(ifp, IFF_UP, IFF_UP);
+			} else {
+				IFA_UNLOCK(ifa);
 			}
-		} else
+		} else {
+			IFA_UNLOCK(ifa);
 			error = EINVAL;
+		}
+		IFA_LOCK_ASSERT_NOTHELD(ifa);
 		break;
 
 	case SIOCADDMULTI:
diff --git a/bsd/net/if_types.h b/bsd/net/if_types.h
index 4eced169b..b3f8e5b65 100644
--- a/bsd/net/if_types.h
+++ b/bsd/net/if_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -148,6 +148,7 @@
 #define	IFT_PFSYNC	0xf6		/* Packet filter state syncing */
 #define	IFT_CARP	0xf8		/* Common Address Redundancy Protocol */
 
-#define	IFT_PDP		0xff		/* GPRS Packet Data Protocol */
+#define	IFT_CELLULAR	0xff		/* Packet Data over Cellular */
+#define	IFT_PDP		IFT_CELLULAR	/* deprecated; use IFT_CELLULAR */
 
 #endif
diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c
index 1b35e44b4..a8667845b 100644
--- a/bsd/net/if_utun.c
+++ b/bsd/net/if_utun.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -366,6 +366,9 @@ utun_cleanup_family(
 		goto cleanup;
 	}
 	
+        /* always set SS_PRIV, we want to close and detach regardless */
+        sock_setpriv(pf_socket, 1);
+
 	result = utun_detach_ip(interface, protocol, pf_socket);
 	if (result == 0 || result == ENXIO) {
 		/* We are done! We either detached or weren't attached. */
@@ -705,7 +708,6 @@ utun_ioctl(
 	void		*data)
 {
 	errno_t	result = 0;
-	struct ifaddr 	*ifa = (struct ifaddr *)data;
 	
 	switch(command) {
 		case SIOCSIFMTU:
@@ -716,13 +718,6 @@ utun_ioctl(
 			/* ifioctl() takes care of it */
 			break;
 			
-		case SIOCSIFADDR:
-		case SIOCAIFADDR:
-			/* This will be called for called for IPv6 Address additions */
-			if (ifa->ifa_addr->sa_family == AF_INET6) 
-				break;
-			/* Fall though for other families like IPv4 */
-			
 		default:
 			result = EOPNOTSUPP;
 	}
@@ -754,7 +749,8 @@ utun_proto_input(
 	// remove protocol family first
 	mbuf_adj(m, sizeof(u_int32_t));
 	
-	proto_input(protocol, m);
+	if (proto_input(protocol, m) != 0)
+		m_freem(m);
 	
 	return 0;
 }
@@ -770,8 +766,8 @@ utun_proto_pre_output(
 	__unused char *link_layer_dest)
 {
 	
-    *(protocol_family_t *)(void *)frame_type = protocol;
-	return 0;
+	*(protocol_family_t *)(void *)frame_type = protocol;
+		return 0;
 }
 
 static errno_t
diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h
index 51b48d27c..a76aa7dbb 100644
--- a/bsd/net/if_var.h
+++ b/bsd/net/if_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -223,8 +223,6 @@ struct if_data64 {
 	struct IF_DATA_TIMEVAL ifi_lastchange;	/* time of last administrative change */
 };
 
-#pragma pack()
-
 #ifdef PRIVATE
 struct if_traffic_class {
 	u_int64_t		ifi_ibkpackets;	/* TC_BK packets received on interface */
@@ -240,11 +238,28 @@ struct if_traffic_class {
 	u_int64_t		ifi_ovopackets;	/* TC_VO packets sent on interface */
 	u_int64_t		ifi_ovobytes;	/* TC_VO bytes sent on interface */
 };
+#endif /* PRIVATE */
+
+#pragma pack()
 
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct	ifqueue {
+	void	*ifq_head;
+	void	*ifq_tail;
+	int	ifq_len;
+	int	ifq_maxlen;
+	int	ifq_drops;
+};
+
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * Internal storage of if_data. This is bound to change. Various places in the
  * stack will translate this data structure in to the externally visible
- * if_data structure above.
+ * if_data structure above.  Note that during interface attach time, the
+ * embedded if_data structure in ifnet is cleared, with the exception of
+ * some non-statistics related fields.
  */
 struct if_data_internal {
 	/* generic interface information */
@@ -259,6 +274,7 @@ struct if_data_internal {
 	u_int32_t	ifi_mtu;	/* maximum transmission unit */
 	u_int32_t	ifi_metric;	/* routing metric (external only) */
 	u_int32_t	ifi_baudrate;	/* linespeed */
+	u_int32_t	_pad;
 	/* volatile statistics */
 	u_int64_t	ifi_ipackets;	/* packets received on interface */
 	u_int64_t	ifi_ierrors;	/* input errors on interface */
@@ -279,7 +295,9 @@ struct if_data_internal {
 	u_int32_t	ifi_tso_v4_mtu;	/* TCP Segment Offload IPv4 maximum segment size */
 	u_int32_t	ifi_tso_v6_mtu;	/* TCP Segment Offload IPv6 maximum segment size */
 };
+#endif /* XNU_KERNEL_PRIVATE */
 
+#ifdef PRIVATE
 #define	if_mtu		if_data.ifi_mtu
 #define	if_type		if_data.ifi_type
 #define if_typelen	if_data.ifi_typelen
@@ -303,47 +321,57 @@ struct if_data_internal {
 #define	if_lastchange	if_data.ifi_lastchange
 #define if_recvquota	if_data.ifi_recvquota
 #define	if_xmitquota	if_data.ifi_xmitquota
-#define if_iflags	if_data.ifi_iflags
+#endif /* PRIVATE */
+#ifdef XNU_KERNEL_PRIVATE
 #define	if_tso_v4_mtu	if_data.ifi_tso_v4_mtu
 #define	if_tso_v6_mtu	if_data.ifi_tso_v6_mtu
+#endif /* XNU_KERNEL_PRIVATE */
 
-struct	mbuf;
-struct ifaddr;
-TAILQ_HEAD(ifnethead, ifnet);	/* we use TAILQs so that the order of */
-TAILQ_HEAD(ifaddrhead, ifaddr);	/* instantiation is preserved in the list */
-TAILQ_HEAD(ifprefixhead, ifprefix);
-LIST_HEAD(ifmultihead, ifmultiaddr);
-struct tqdummy;
-TAILQ_HEAD(tailq_head, tqdummy);
-
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * Forward structure declarations for function prototypes [sic].
  */
-struct	proc;
-struct	rtentry;
-struct	socket;
-struct	ether_header;
-struct  sockaddr_dl;
+struct proc;
+struct rtentry;
+struct socket;
 struct ifnet_filter;
+struct mbuf;
+struct ifaddr;
+struct tqdummy;
+struct proto_hash_entry;
+struct dlil_threading_info;
+#if PF
+struct pfi_kif;
+#endif /* PF */
 
+/* we use TAILQs so that the order of instantiation is preserved in the list */
+TAILQ_HEAD(ifnethead, ifnet);
+TAILQ_HEAD(ifaddrhead, ifaddr);
+TAILQ_HEAD(ifprefixhead, ifprefix);
+LIST_HEAD(ifmultihead, ifmultiaddr);
+TAILQ_HEAD(tailq_head, tqdummy);
 TAILQ_HEAD(ifnet_filter_head, ifnet_filter);
 TAILQ_HEAD(ddesc_head_name, dlil_demux_desc);
+#endif /* XNU_KERNEL_PRIVATE */
 
-/* All of the following IF_HWASSIST_* flags are defined
- * in kpi_inteface.h as IFNET_* flags. These are redefined
- * here as constants to avoid failures to build user level
- * programs that can not include kpi_interface.h. It is 
- * important to keep this in sync with the definitions in
- * kpi_interface.h. The corresponding constant for each 
- * definition is mentioned in the comment.
+#ifdef PRIVATE
+/*
+ * All of the following IF_HWASSIST_* flags are defined in kpi_inteface.h as
+ * IFNET_* flags. These are redefined here as constants to avoid failures to
+ * build user level programs that can not include kpi_interface.h. It is
+ * important to keep this in sync with the definitions in kpi_interface.h.
+ * The corresponding constant for each definition is mentioned in the comment.
  *
- * Bottom 16 bits reserved for hardware checksum 
+ * Bottom 16 bits reserved for hardware checksum
  */
 #define IF_HWASSIST_CSUM_IP		0x0001	/* will csum IP, IFNET_CSUM_IP */
 #define IF_HWASSIST_CSUM_TCP		0x0002	/* will csum TCP, IFNET_CSUM_TCP */
 #define IF_HWASSIST_CSUM_UDP		0x0004	/* will csum UDP, IFNET_CSUM_UDP */
 #define IF_HWASSIST_CSUM_IP_FRAGS	0x0008	/* will csum IP fragments, IFNET_CSUM_FRAGMENT */
 #define IF_HWASSIST_CSUM_FRAGMENT	0x0010	/* will do IP fragmentation, IFNET_IP_FRAGMENT */
+#define IF_HWASSIST_CSUM_TCPIPV6	0x0020	/* will csum TCPv6, IFNET_CSUM_TCPIPV6 */
+#define IF_HWASSIST_CSUM_UDPIPV6	0x0040	/* will csum UDPv6, IFNET_CSUM_UDP */
+#define IF_HWASSIST_CSUM_FRAGMENT_IPV6	0x0080	/* will do IPv6 fragmentation, IFNET_IPV6_FRAGMENT */
 #define IF_HWASSIST_CSUM_TCP_SUM16	0x1000	/* simple TCP Sum16 computation, IFNET_CSUM_SUM16 */
 #define IF_HWASSIST_CSUM_MASK		0xffff
 #define IF_HWASSIST_CSUM_FLAGS(hwassist)	((hwassist) & IF_HWASSIST_CSUM_MASK)
@@ -356,30 +384,13 @@ TAILQ_HEAD(ddesc_head_name, dlil_demux_desc);
 
 #define IF_HWASSIST_TSO_V4		0x00200000	/* will do TCP Segment offload for IPv4, IFNET_TSO_IPV4 */
 #define IF_HWASSIST_TSO_V6		0x00400000	/* will do TCP Segment offload for IPv6, IFNET_TSO_IPV6 */
-
-#define IFNET_RW_LOCK 1
-
 #endif /* PRIVATE */
-/*
- * Structure defining a queue for a network interface.
- */
-struct	ifqueue {
-	void *ifq_head;
-	void *ifq_tail;
-	int	ifq_len;
-	int	ifq_maxlen;
-	int	ifq_drops;
-};
 
-#ifdef PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/tree.h>
+#include <netinet/in.h>
 
-struct ddesc_head_str;
-struct proto_hash_entry;
-struct kev_msg;
-struct dlil_threading_info;
-#if PF
-struct pfi_kif;
-#endif /* PF */
+RB_HEAD(ll_reach_tree, if_llreach);	/* define struct ll_reach_tree */
 
 /*
  * Structure defining a network interface.
@@ -387,34 +398,42 @@ struct pfi_kif;
  * (Would like to call this struct ``if'', but C isn't PL/1.)
  */
 struct ifnet {
-	void	*if_softc;		/* pointer to driver state */
-	const char	*if_name;		/* name, e.g. ``en'' or ``lo'' */
-	TAILQ_ENTRY(ifnet) if_link; 	/* all struct ifnets are chained */
-	struct	ifaddrhead if_addrhead;	/* linked list of addresses per if */
-	u_int32_t	if_refcnt;
-#ifdef __KPI_INTERFACE__
-	ifnet_check_multi	if_check_multi;
-#else
-	void*				if_check_multi;
-#endif /* __KPI_INTERFACE__ */
-	int	if_pcount;		/* number of promiscuous listeners */
-	struct	bpf_if *if_bpf;		/* packet filter structure */
-	u_short	if_index;		/* numeric abbreviation for this if  */
-	short	if_unit;		/* sub-unit for lower level driver */
-	short	if_timer;		/* time 'til if_watchdog called */
-	short	if_flags;		/* up/down, broadcast, etc. */
-	int	if_ipending;		/* interrupts pending */
-	void	*if_linkmib;		/* link-type-specific MIB data */
-	size_t	if_linkmiblen;		/* length of above data */
-	struct	if_data_internal if_data;
-
-/* New with DLIL */
-#ifdef BSD_KERNEL_PRIVATE
-	int	if_usecnt;
-#else
-	int	refcnt;
-#endif
-#ifdef __KPI_INTERFACE__
+	/*
+	 * Lock (RW or mutex) to protect this data structure (static storage.)
+	 */
+	decl_lck_rw_data(, if_lock);
+	void		*if_softc;	/* pointer to driver state */
+	const char	*if_name;	/* name, e.g. ``en'' or ``lo'' */
+	TAILQ_ENTRY(ifnet) if_link;	/* all struct ifnets are chained */
+	TAILQ_ENTRY(ifnet) if_detaching_link; /* list of detaching ifnets */
+
+	decl_lck_mtx_data(, if_ref_lock)
+	u_int32_t	if_refflags;
+	u_int32_t	if_refio;	/* number of io ops to the underlying driver */
+
+#define	if_list		if_link
+	struct ifaddrhead if_addrhead;	/* linked list of addresses per if */
+#define	if_addrlist	if_addrhead
+	struct ifaddr	*if_lladdr;	/* link address (first/permanent) */
+
+	int		if_pcount;	/* number of promiscuous listeners */
+	struct bpf_if	*if_bpf;	/* packet filter structure */
+	u_short		if_index;	/* numeric abbreviation for this if  */
+	short		if_unit;	/* sub-unit for lower level driver */
+	short		if_timer;	/* time 'til if_watchdog called */
+	short		if_flags;	/* up/down, broadcast, etc. */
+	u_int32_t	if_eflags;	/* see <net/if.h> */
+
+	int		if_capabilities;	/* interface features & capabilities */
+	int		if_capenable;		/* enabled features & capabilities */
+
+	void		*if_linkmib;	/* link-type-specific MIB data */
+	size_t		if_linkmiblen;	/* length of above data */
+
+	struct if_data_internal if_data __attribute__((aligned(8)));
+
+	ifnet_family_t		if_family;	/* value assigned by Apple */
+	uintptr_t		if_family_cookie;
 	ifnet_output_func	if_output;
 	ifnet_ioctl_func	if_ioctl;
 	ifnet_set_bpf_tap	if_set_bpf_tap;
@@ -422,64 +441,28 @@ struct ifnet {
 	ifnet_demux_func	if_demux;
 	ifnet_event_func	if_event;
 	ifnet_framer_func	if_framer;
-	ifnet_family_t		if_family;		/* value assigned by Apple */
-#else
-	void*				if_output;
-	void*				if_ioctl;
-	void*				if_set_bpf_tap;
-	void*				if_free;
-	void*				if_demux;
-	void*				if_event;
-	void*				if_framer;
-	u_int32_t			if_family;		/* value assigned by Apple */
-#endif
+	ifnet_add_proto_func	if_add_proto;
+	ifnet_del_proto_func	if_del_proto;
+	ifnet_check_multi	if_check_multi;
+	struct proto_hash_entry	*if_proto_hash;
+	void			*if_kpi_storage;
 
+	decl_lck_mtx_data(, if_flt_lock)
+	u_int32_t		if_flt_busy;
+	u_int32_t		if_flt_waiters;
 	struct ifnet_filter_head if_flt_head;
 
-/* End DLIL specific */
+	struct ifmultihead	if_multiaddrs;	/* multicast addresses */
+	u_int32_t		if_updatemcasts; /* mcast addrs need updating */
+	int			if_amcount;	/* # of all-multicast reqs */
+	decl_lck_mtx_data(, if_addrconfig_lock); /* for serializing addr config */
+	struct in_multi		*if_allhostsinm; /* store all-hosts inm for this ifp */
 
-	u_int32_t 	if_delayed_detach; /* need to perform delayed detach */
-	void    *if_private;	/* private to interface */
-	long	if_eflags;		/* autoaddr, autoaddr done, etc. */
-
-	struct	ifmultihead if_multiaddrs; /* multicast addresses configured */
-	int	if_amcount;		/* number of all-multicast requests */
-/* procedure handles */
-#ifdef __KPI_INTERFACE__
-	ifnet_add_proto_func	if_add_proto;
-	ifnet_del_proto_func	if_del_proto;
-#else /* !__KPI_INTERFACE__ */
-	void*	if_add_proto;
-	void*	if_del_proto;
-#endif /* !__KPI_INTERFACE__ */
-	struct proto_hash_entry	*if_proto_hash;
-	void					*if_kpi_storage;
-#if 0	
-	void	*unused_was_init;
-#else
 	struct dlil_threading_info *if_input_thread;
-#endif
-	void	*unused_was_resolvemulti;
-	
-	struct ifqueue	if_snd;
-	u_int32_t 	unused_2[1];
-#ifdef __APPLE__
-	uintptr_t	family_cookie;
-	struct	ifprefixhead if_prefixhead; /* list of prefixes per if */
 
-#ifdef _KERN_LOCKS_H_
-#if IFNET_RW_LOCK
-	lck_rw_t *if_lock;		/* Lock to protect this interface */
-#else
-	lck_mtx_t *if_lock;		/* Lock to protect this interface */
-#endif
-#else
-	void	*if_lock;
-#endif
+	struct ifqueue		if_snd;
 
-#else
-	struct	ifprefixhead if_prefixhead; /* list of prefixes per if */
-#endif /* __APPLE__ */
+	struct	ifprefixhead	if_prefixhead;	/* list of prefixes per if */
 	struct {
 		u_int32_t	length;
 		union {
@@ -488,133 +471,134 @@ struct ifnet {
 		} u;
 	} if_broadcast;
 #if CONFIG_MACF_NET
-	struct  label *if_label;	/* interface MAC label */
+	struct label		*if_label;	/* interface MAC label */
 #endif
 
-	u_int32_t	if_wake_properties;
+	u_int32_t		if_wake_properties;
 #if PF
-	struct thread	*if_pf_curthread;
-	struct pfi_kif	*if_pf_kif;
+	struct thread		*if_pf_curthread;
+	struct pfi_kif		*if_pf_kif;
 #endif /* PF */
-#ifdef _KERN_LOCKS_H_
-	lck_mtx_t	*if_fwd_route_lock;
-#else
-	void		*if_fwd_route_lock;
-#endif
-	struct route	if_fwd_route;	/* cached IPv4 forwarding route */
-	void	*if_bridge;		/* bridge glue */
-#if IFNET_ROUTE_REFCNT
-	u_int32_t	if_want_aggressive_drain;
-	u_int32_t	if_idle_flags;	/* idle flags */
-	u_int32_t	if_route_refcnt; /* idle: route ref count */
-#endif /* IFNET_ROUTE_REFCNT */
-#if PKT_PRIORITY
-	struct if_traffic_class if_tc __attribute__((aligned(8)));
-#endif /* PKT_PRIORITY */
-};
 
-#ifndef __APPLE__
-/* for compatibility with other BSDs */
-#define	if_addrlist	if_addrhead
-#define	if_list		if_link
-#endif /* !__APPLE__ */
+	decl_lck_mtx_data(, if_cached_route_lock);
+	u_int32_t		if_fwd_cacheok;
+	struct route		if_fwd_route;	/* cached forwarding route */
+	struct route		if_src_route;	/* cached ipv4 source route */
+	struct route_in6	if_src_route6;	/* cached ipv6 source route */
 
+	decl_lck_rw_data(, if_llreach_lock);
+	struct ll_reach_tree	if_ll_srcs;	/* source link-layer tree */
 
-#endif /* PRIVATE */
+	void			*if_bridge;	/* bridge glue */
+
+	u_int32_t		if_want_aggressive_drain;
+	u_int32_t		if_idle_flags;	/* idle flags */
+	u_int32_t		if_idle_new_flags; /* temporary idle flags */
+	u_int32_t		if_idle_new_flags_mask; /* temporary mask */
+	u_int32_t		if_route_refcnt; /* idle: route ref count */
+
+	struct if_traffic_class if_tc __attribute__((aligned(8)));
+#if INET
+	struct igmp_ifinfo	*if_igi;	/* for IGMPv3 */
+#endif /* INET */
+#if INET6
+	struct mld_ifinfo	*if_mli;	/* for MLDv2 */
+#endif /* INET6 */
+};
+
+/*
+ * Valid values for if_useflags
+ */
+#define	IFRF_ATTACHED	0x1	/* ifnet attach is completely done */
+#define	IFRF_DETACHING	0x2	/* detach has been requested */
 
-#ifdef KERNEL_PRIVATE
 /*
  * Structure describing a `cloning' interface.
  */
 struct if_clone {
 	LIST_ENTRY(if_clone) ifc_list;	/* on list of cloners */
-	const char *ifc_name;			/* name of device, e.g. `vlan' */
-	size_t ifc_namelen;		/* length of name */
-	u_int32_t ifc_minifs;			/* minimum number of interfaces */
-	u_int32_t ifc_maxunit;		/* maximum unit number */
-	unsigned char *ifc_units;	/* bitmap to handle units */
-	u_int32_t ifc_bmlen;			/* bitmap length */
-
-	int	(*ifc_create)(struct if_clone *, u_int32_t, void *);
-	int	(*ifc_destroy)(struct ifnet *);
+	const char	*ifc_name;	/* name of device, e.g. `vlan' */
+	size_t		ifc_namelen;	/* length of name */
+	u_int32_t	ifc_minifs;	/* minimum number of interfaces */
+	u_int32_t	ifc_maxunit;	/* maximum unit number */
+	unsigned char	*ifc_units;	/* bitmap to handle units */
+	u_int32_t	ifc_bmlen;	/* bitmap length */
+
+	int		(*ifc_create)(struct if_clone *, u_int32_t, void *);
+	int		(*ifc_destroy)(struct ifnet *);
 };
 
-#define IF_CLONE_INITIALIZER(name, create, destroy, minifs, maxunit)	\
-    { { NULL, NULL }, name, sizeof(name) - 1, minifs, maxunit, NULL, 0, create, destroy }
+#define IF_CLONE_INITIALIZER(name, create, destroy, minifs, maxunit) {	      \
+	{ NULL, NULL }, name, (sizeof (name) - 1), minifs, maxunit, NULL, 0,  \
+	create, destroy							      \
+}
 
 #define M_CLONE         M_IFADDR
 
 /*
- * Bit values in if_ipending
- */
-#define	IFI_RECV	1	/* I want to receive */
-#define	IFI_XMIT	2	/* I want to transmit */
-
-/*
- * Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
- * are queues of messages stored on ifqueue structures
- * (defined above).  Entries are added to and deleted from these structures
- * by these macros, which should be called with ipl raised to splimp().
+ * Macros to manipulate ifqueue.  Users of these macros are responsible
+ * for serialization, by holding whatever lock is appropriate for the
+ * corresponding structure that is referring the ifqueue.
  */
 #define	IF_QFULL(ifq)		((ifq)->ifq_len >= (ifq)->ifq_maxlen)
 #define	IF_DROP(ifq)		((ifq)->ifq_drops++)
-#define	IF_ENQUEUE(ifq, m) { \
-	(m)->m_nextpkt = 0; \
-	if ((ifq)->ifq_tail == 0) \
-		(ifq)->ifq_head = m; \
-	else \
-		((struct mbuf*)(ifq)->ifq_tail)->m_nextpkt = m; \
-	(ifq)->ifq_tail = m; \
-	(ifq)->ifq_len++; \
+#define	IF_ENQUEUE(ifq, m) {						\
+	(m)->m_nextpkt = NULL;						\
+	if ((ifq)->ifq_tail == NULL)					\
+		(ifq)->ifq_head = m;					\
+	else								\
+		((struct mbuf*)(ifq)->ifq_tail)->m_nextpkt = m;		\
+	(ifq)->ifq_tail = m;						\
+	(ifq)->ifq_len++;						\
 }
-#define	IF_PREPEND(ifq, m) { \
-	(m)->m_nextpkt = (ifq)->ifq_head; \
-	if ((ifq)->ifq_tail == 0) \
-		(ifq)->ifq_tail = (m); \
-	(ifq)->ifq_head = (m); \
-	(ifq)->ifq_len++; \
+#define	IF_PREPEND(ifq, m) {						\
+	(m)->m_nextpkt = (ifq)->ifq_head;				\
+	if ((ifq)->ifq_tail == NULL)					\
+		(ifq)->ifq_tail = (m);					\
+	(ifq)->ifq_head = (m);						\
+	(ifq)->ifq_len++;						\
 }
-#define	IF_DEQUEUE(ifq, m) { \
-	(m) = (ifq)->ifq_head; \
-	if (m) { \
-		if (((ifq)->ifq_head = (m)->m_nextpkt) == 0) \
-			(ifq)->ifq_tail = 0; \
-		(m)->m_nextpkt = 0; \
-		(ifq)->ifq_len--; \
-	} \
+#define	IF_DEQUEUE(ifq, m) {						\
+	(m) = (ifq)->ifq_head;						\
+	if (m != NULL) {						\
+		if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL)		\
+			(ifq)->ifq_tail = NULL;				\
+		(m)->m_nextpkt = NULL;					\
+		(ifq)->ifq_len--;					\
+	}								\
 }
-
-#define	IF_ENQ_DROP(ifq, m)	if_enq_drop(ifq, m)
-
-#if defined(__GNUC__) && defined(MT_HEADER)
-static __inline int
-if_queue_drop(struct ifqueue *ifq, __unused struct mbuf *m)
-{
-	IF_DROP(ifq);
-	return 0;
+#define	IF_REMQUEUE(ifq, m) {						\
+	struct mbuf *_p = (ifq)->ifq_head;				\
+	struct mbuf *_n = (m)->m_nextpkt;				\
+	if ((m) == _p)							\
+		_p = NULL;						\
+	while (_p != NULL) {						\
+		if (_p->m_nextpkt == (m))				\
+			break;						\
+		_p = _p->m_nextpkt;					\
+	}								\
+	VERIFY(_p != NULL || ((m) == (ifq)->ifq_head));			\
+	if ((m) == (ifq)->ifq_head)					\
+		(ifq)->ifq_head = _n;					\
+	if ((m) == (ifq)->ifq_tail)					\
+		(ifq)->ifq_tail = _p;					\
+	VERIFY((ifq)->ifq_tail != NULL || (ifq)->ifq_head == NULL);	\
+	VERIFY((ifq)->ifq_len != 0);					\
+	--(ifq)->ifq_len;						\
+	if (_p != NULL)							\
+		_p->m_nextpkt = _n;					\
+	(m)->m_nextpkt = NULL;						\
 }
+#define IF_DRAIN(ifq) do {						\
+	struct mbuf *m;							\
+	for (;;) {							\
+		IF_DEQUEUE(ifq, m);					\
+		if (m == NULL)						\
+			break;						\
+		m_freem(m);						\
+	}								\
+} while (0)
 
-static __inline int
-if_enq_drop(struct ifqueue *ifq, struct mbuf *m)
-{
-	if (IF_QFULL(ifq) &&
-	    !if_queue_drop(ifq, m))
-		return 0;
-	IF_ENQUEUE(ifq, m);
-	return 1;
-}
-#else
-
-#ifdef MT_HEADER
-int	if_enq_drop(struct ifqueue *, struct mbuf *);
-#endif /* MT_HEADER */
-
-#endif /* defined(__GNUC__) && defined(MT_HEADER) */
-
-#endif /* KERNEL_PRIVATE */
-
-
-#ifdef PRIVATE
 /*
  * The ifaddr structure contains information about one address
  * of an interface.  They are maintained by the different address families,
@@ -622,21 +606,24 @@ int	if_enq_drop(struct ifqueue *, struct mbuf *);
  * together so all addresses for an interface can be located.
  */
 struct ifaddr {
-	struct	sockaddr *ifa_addr;	/* address of interface */
-	struct	sockaddr *ifa_dstaddr;	/* other end of p-to-p link */
+	decl_lck_mtx_data(, ifa_lock);	/* lock for ifaddr */
+	uint32_t	ifa_refcnt;	/* ref count, use IFA_{ADD,REM}REF */
+	uint32_t	ifa_debug;	/* debug flags */
+	struct sockaddr	*ifa_addr;	/* address of interface */
+	struct sockaddr	*ifa_dstaddr;	/* other end of p-to-p link */
 #define	ifa_broadaddr	ifa_dstaddr	/* broadcast address interface */
-	struct	sockaddr *ifa_netmask;	/* used to determine subnet */
-	struct	ifnet *ifa_ifp;		/* back-pointer to interface */
+	struct sockaddr	*ifa_netmask;	/* used to determine subnet */
+	struct ifnet	*ifa_ifp;	/* back-pointer to interface */
 	TAILQ_ENTRY(ifaddr) ifa_link;	/* queue macro glue */
 	void (*ifa_rtrequest)		/* check or clean routes (+ or -)'d */
 	    (int, struct rtentry *, struct sockaddr *);
-	uint32_t ifa_flags;		/* mostly rt_flags for cloning */
-	int32_t	ifa_refcnt;		/* ref count, use ifaref, ifafree */
-	int32_t	ifa_metric;		/* cost of going out this interface */
+	uint32_t	ifa_flags;	/* mostly rt_flags for cloning */
+	int32_t		ifa_metric;	/* cost of going out this interface */
 	void (*ifa_free)(struct ifaddr *); /* callback fn for freeing */
 	void (*ifa_trace)		/* callback fn for tracing refs */
 	    (struct ifaddr *, int);
-	uint32_t ifa_debug;		/* debug flags */
+	void (*ifa_attached)(struct ifaddr *); /* callback fn for attaching */
+	void (*ifa_detached)(struct ifaddr *); /* callback fn for detaching */
 };
 
 /*
@@ -648,13 +635,47 @@ struct ifaddr {
 /*
  * Valid values for ifa_debug
  */
-#define	IFD_ATTACHED	0x1		/* attached to an interface */
+#define	IFD_ATTACHED	0x1		/* attached to list */
 #define	IFD_ALLOC	0x2		/* dynamically allocated */
 #define	IFD_DEBUG	0x4		/* has debugging info */
+#define	IFD_LINK	0x8		/* link address */
+#define	IFD_TRASHED	0x10		/* in trash list */
+#define	IFD_SKIP	0x20		/* skip this entry */
+#define	IFD_NOTREADY	0x40		/* embryonic; not yet ready */
 
-#endif /* PRIVATE */
+#define	IFA_LOCK_ASSERT_HELD(_ifa)					\
+	lck_mtx_assert(&(_ifa)->ifa_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IFA_LOCK_ASSERT_NOTHELD(_ifa)					\
+	lck_mtx_assert(&(_ifa)->ifa_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IFA_LOCK(_ifa)							\
+	lck_mtx_lock(&(_ifa)->ifa_lock)
+
+#define	IFA_LOCK_SPIN(_ifa)						\
+	lck_mtx_lock_spin(&(_ifa)->ifa_lock)
+
+#define	IFA_CONVERT_LOCK(_ifa) do {					\
+	IFA_LOCK_ASSERT_HELD(_ifa);					\
+	lck_mtx_convert_spin(&(_ifa)->ifa_lock);			\
+} while (0)
+
+#define	IFA_UNLOCK(_ifa)						\
+	lck_mtx_unlock(&(_ifa)->ifa_lock)
+
+#define	IFA_ADDREF(_ifa)						\
+	ifa_addref(_ifa, 0)
+
+#define	IFA_ADDREF_LOCKED(_ifa)						\
+	ifa_addref(_ifa, 1)
+
+#define	IFA_REMREF(_ifa) do {						\
+	(void) ifa_remref(_ifa, 0);					\
+} while (0)
+
+#define	IFA_REMREF_LOCKED(_ifa)						\
+	ifa_remref(_ifa, 1)
 
-#ifdef KERNEL_PRIVATE
 /*
  * The prefix structure contains information about one prefix
  * of an interface.  They are maintained by the different address families,
@@ -668,115 +689,169 @@ struct ifprefix {
 	u_char	ifpr_plen;		/* prefix length in bits */
 	u_char	ifpr_type;		/* protocol dependent prefix type */
 };
-#endif /* KERNEL_PRIVATE */
-
-#ifdef PRIVATE
-typedef void (*ifma_protospec_free_func)(void* ifma_protospec);
 
 /*
  * Multicast address structure.  This is analogous to the ifaddr
  * structure except that it keeps track of multicast addresses.
- * Also, the reference count here is a count of requests for this
- * address, not a count of pointers to this structure.
+ * Also, the request count here is a count of requests for this
+ * address, not a count of pointers to this structure; anonymous
+ * membership(s) holds one outstanding request count.
  */
 struct ifmultiaddr {
+	decl_lck_mtx_data(, ifma_lock);
+	u_int32_t ifma_refcount;	/* reference count */
+	u_int32_t ifma_anoncnt;		/* # of anonymous requests */
+	u_int32_t ifma_reqcnt;		/* total requests for this address */
+	u_int32_t ifma_debug;		/* see ifa_debug flags */
+	u_int32_t ifma_flags;		/* see below */
 	LIST_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
-	struct	sockaddr *ifma_addr; 	/* address this membership is for */
+	struct sockaddr *ifma_addr;	/* address this membership is for */
 	struct ifmultiaddr *ifma_ll;	/* link-layer translation, if any */
-	struct	ifnet *ifma_ifp;		/* back-pointer to interface */
-	u_int	ifma_usecount;			/* use count, protected by ifp's lock */
-	void	*ifma_protospec;		/* protocol-specific state, if any */
-	int32_t	ifma_refcount;			/* reference count, atomically protected */
-	ifma_protospec_free_func ifma_free;	/* function called to free ifma_protospec */
+	struct ifnet *ifma_ifp;		/* back-pointer to interface */
+	void *ifma_protospec;		/* protocol-specific state, if any */
+	void (*ifma_trace)		/* callback fn for tracing refs */
+	    (struct ifmultiaddr *, int);
 };
-#endif /* PRIVATE */
-
-#ifdef KERNEL_PRIVATE
-#define IFAREF(ifa) ifaref(ifa)
-#define IFAFREE(ifa) ifafree(ifa)
 
 /*
- * To preserve kmem compatibility, we define
- * ifnet_head to ifnet. This should be temp.
+ * Values for ifma_flags
  */
-#define ifnet_head ifnet
-extern	struct ifnethead ifnet_head;
-extern struct	ifnet	**ifindex2ifnet;
-extern	int ifqmaxlen;
-extern	ifnet_t  lo_ifp;
-extern	int if_index;
-extern	struct ifaddr **ifnet_addrs;
-
-int	if_addmulti(struct ifnet *, const struct sockaddr *, struct ifmultiaddr **);
-int	if_allmulti(struct ifnet *, int);
-void	if_attach(struct ifnet *);
-int	if_delmultiaddr(struct ifmultiaddr *ifma, int locked);
-int	if_delmulti(struct ifnet *, const struct sockaddr *);
-void	if_down(struct ifnet *);
-int 	if_down_all(void);
-void	if_route(struct ifnet *, int flag, int fam);
-void	if_unroute(struct ifnet *, int flag, int fam);
-void	if_up(struct ifnet *);
-void	if_updown(struct ifnet *ifp, int up);
-/*void	ifinit(void));*/ /* declared in systm.h for main( */
-int	ifioctl(struct socket *, u_long, caddr_t, struct proc *);
-int	ifioctllocked(struct socket *, u_long, caddr_t, struct proc *);
-struct	ifnet *ifunit(const char *);
-struct  ifnet *if_withname(struct sockaddr *);
-
-int	if_clone_attach(struct if_clone *);
-void	if_clone_detach(struct if_clone *);
-struct if_clone *
-	if_clone_lookup(const char *, u_int32_t *);
-
-void	ifnet_lock_assert(struct ifnet *ifp, int what);
-void	ifnet_lock_shared(struct ifnet *ifp);
-void	ifnet_lock_exclusive(struct ifnet *ifp);
-void	ifnet_lock_done(struct ifnet *ifp);
-
-void	ifnet_head_lock_shared(void);
-void	ifnet_head_lock_exclusive(void);
-void	ifnet_head_done(void);
-
-void	if_attach_ifa(struct ifnet * ifp, struct ifaddr *ifa);
-void	if_detach_ifa(struct ifnet * ifp, struct ifaddr *ifa);
-
-void	ifma_reference(struct ifmultiaddr *ifma);
-void	ifma_release(struct ifmultiaddr *ifma);
-
-struct	ifaddr *ifa_ifwithaddr(const struct sockaddr *);
-struct	ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int);
-struct	ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
-struct	ifaddr *ifa_ifwithnet(const struct sockaddr *);
-struct	ifaddr *ifa_ifwithnet_scoped(const struct sockaddr *, unsigned int);
-struct	ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *);
-struct	ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *, const struct sockaddr *);
-struct ifaddr *ifa_ifwithroute_scoped_locked(int, const struct sockaddr *,
+#define	IFMAF_ANONYMOUS		0x1	/* has anonymous request ref(s) held */
+
+#define	IFMA_LOCK_ASSERT_HELD(_ifma)					\
+	lck_mtx_assert(&(_ifma)->ifma_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IFMA_LOCK_ASSERT_NOTHELD(_ifma)					\
+	lck_mtx_assert(&(_ifma)->ifma_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IFMA_LOCK(_ifma)						\
+	lck_mtx_lock(&(_ifma)->ifma_lock)
+
+#define	IFMA_LOCK_SPIN(_ifma)						\
+	lck_mtx_lock_spin(&(_ifma)->ifma_lock)
+
+#define	IFMA_CONVERT_LOCK(_ifma) do {					\
+	IFMA_LOCK_ASSERT_HELD(_ifma);					\
+	lck_mtx_convert_spin(&(_ifma)->ifma_lock);			\
+} while (0)
+
+#define	IFMA_UNLOCK(_ifma)						\
+	lck_mtx_unlock(&(_ifma)->ifma_lock)
+
+#define	IFMA_ADDREF(_ifma)						\
+	ifma_addref(_ifma, 0)
+
+#define	IFMA_ADDREF_LOCKED(_ifma)					\
+	ifma_addref(_ifma, 1)
+
+#define	IFMA_REMREF(_ifma)						\
+	ifma_remref(_ifma)
+
+__private_extern__ struct ifnethead ifnet_head;
+__private_extern__ struct ifnet **ifindex2ifnet;
+__private_extern__ int ifqmaxlen;
+__private_extern__ int if_index;
+__private_extern__ struct ifaddr **ifnet_addrs;
+__private_extern__ lck_attr_t *ifa_mtx_attr;
+__private_extern__ lck_grp_t *ifa_mtx_grp;
+__private_extern__ lck_grp_t *ifnet_lock_group;
+__private_extern__ lck_attr_t *ifnet_lock_attr;
+extern ifnet_t lo_ifp;
+
+extern int if_addmulti(struct ifnet *, const struct sockaddr *,
+    struct ifmultiaddr **);
+extern int if_addmulti_anon(struct ifnet *, const struct sockaddr *,
+    struct ifmultiaddr **);
+extern int if_allmulti(struct ifnet *, int);
+extern int if_delmulti(struct ifnet *, const struct sockaddr *);
+extern int if_delmulti_ifma(struct ifmultiaddr *);
+extern int if_delmulti_anon(struct ifnet *, const struct sockaddr *);
+extern void if_down(struct ifnet *);
+extern int if_down_all(void);
+extern void if_up(struct ifnet *);
+__private_extern__ void if_updown(struct ifnet *ifp, int up);
+extern int ifioctl(struct socket *, u_long, caddr_t, struct proc *);
+extern int ifioctllocked(struct socket *, u_long, caddr_t, struct proc *);
+extern struct ifnet *ifunit(const char *);
+extern struct ifnet *if_withname(struct sockaddr *);
+
+extern struct if_clone *if_clone_lookup(const char *, u_int32_t *);
+extern int if_clone_attach(struct if_clone *);
+extern void if_clone_detach(struct if_clone *);
+
+extern errno_t if_mcasts_update(struct ifnet *);
+
+typedef enum {
+	IFNET_LCK_ASSERT_EXCLUSIVE,	/* RW: held as writer */
+	IFNET_LCK_ASSERT_SHARED,	/* RW: held as reader */
+	IFNET_LCK_ASSERT_OWNED,		/* RW: writer/reader, MTX: held */
+	IFNET_LCK_ASSERT_NOTOWNED	/* not held */
+} ifnet_lock_assert_t;
+
+__private_extern__ void ifnet_lock_assert(struct ifnet *, ifnet_lock_assert_t);
+__private_extern__ void ifnet_lock_shared(struct ifnet *ifp);
+__private_extern__ void ifnet_lock_exclusive(struct ifnet *ifp);
+__private_extern__ void ifnet_lock_done(struct ifnet *ifp);
+
+__private_extern__ void	ifnet_head_lock_shared(void);
+__private_extern__ void	ifnet_head_lock_exclusive(void);
+__private_extern__ void	ifnet_head_done(void);
+
+__private_extern__ errno_t ifnet_set_idle_flags_locked(ifnet_t, u_int32_t,
+    u_int32_t);
+__private_extern__ int ifnet_is_attached(struct ifnet *, int refio);
+__private_extern__ void ifnet_decr_iorefcnt(struct ifnet *);
+
+__private_extern__ void if_attach_ifa(struct ifnet *, struct ifaddr *);
+__private_extern__ void if_attach_link_ifa(struct ifnet *, struct ifaddr *);
+__private_extern__ void if_detach_ifa(struct ifnet *, struct ifaddr *);
+__private_extern__ void if_detach_link_ifa(struct ifnet *, struct ifaddr *);
+
+extern struct ifaddr *ifa_ifwithaddr(const struct sockaddr *);
+extern struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int);
+extern struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
+extern struct ifaddr *ifa_ifwithnet(const struct sockaddr *);
+extern struct ifaddr *ifa_ifwithnet_scoped(const struct sockaddr *, unsigned int);
+extern struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *,
+    const struct sockaddr *);
+extern struct	ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *, const struct sockaddr *);
+extern struct ifaddr *ifa_ifwithroute_scoped_locked(int, const struct sockaddr *,
     const struct sockaddr *, unsigned int);
-struct	ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
-struct	ifaddr *ifa_ifpgetprimary(struct ifnet *, int);
-void	ifafree(struct ifaddr *);
-void	ifaref(struct ifaddr *);
-
-struct	ifmultiaddr *ifmaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
-
-extern struct in_ifaddr *ifa_foraddr(unsigned int);
-extern struct in_ifaddr *ifa_foraddr_scoped(unsigned int, unsigned int);
-
-#ifdef BSD_KERNEL_PRIVATE
-enum {
-	kIfNetUseCount_MayBeZero = 0,
-	kIfNetUseCount_MustNotBeZero = 1
-};
-
-int ifp_use(struct ifnet *ifp, int handle_zero);
-int ifp_unuse(struct ifnet *ifp);
-void ifp_use_reached_zero(struct ifnet *ifp);
-
-void	if_data_internal_to_if_data(struct ifnet *ifp, const struct if_data_internal *if_data_int,
-			   struct if_data *if_data);
-void	if_data_internal_to_if_data64(struct ifnet *ifp, const struct if_data_internal *if_data_int,
-							   struct if_data64 *if_data64);
-#endif /* BSD_KERNEL_PRIVATE */
-#endif /* KERNEL_PRIVATE */
+extern struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
+__private_extern__ struct ifaddr *ifa_ifpgetprimary(struct ifnet *, int);
+extern void ifa_addref(struct ifaddr *, int);
+extern struct ifaddr *ifa_remref(struct ifaddr *, int);
+extern void ifa_lock_init(struct ifaddr *);
+extern void ifa_lock_destroy(struct ifaddr *);
+extern void ifma_addref(struct ifmultiaddr *, int);
+extern void ifma_remref(struct ifmultiaddr *);
+
+extern void ifa_init(void);
+
+__private_extern__ struct in_ifaddr *ifa_foraddr(unsigned int);
+__private_extern__ struct in_ifaddr *ifa_foraddr_scoped(unsigned int,
+    unsigned int);
+
+#if INET6
+struct in6_addr;
+__private_extern__ struct in6_ifaddr *ifa_foraddr6(struct in6_addr *);
+__private_extern__ struct in6_ifaddr *ifa_foraddr6_scoped(struct in6_addr *,
+    unsigned int);
+#endif /* INET6 */
+
+__private_extern__ void if_data_internal_to_if_data(struct ifnet *ifp,
+    const struct if_data_internal *if_data_int, struct if_data *if_data);
+__private_extern__ void	if_data_internal_to_if_data64(struct ifnet *ifp,
+    const struct if_data_internal *if_data_int, struct if_data64 *if_data64);
+__private_extern__ void	if_copy_traffic_class(struct ifnet *ifp,
+    struct if_traffic_class *if_tc);
+
+__private_extern__ struct rtentry *ifnet_cached_rtlookup_inet(struct ifnet *,
+    struct in_addr);
+#if INET6
+__private_extern__ struct rtentry *ifnet_cached_rtlookup_inet6(struct ifnet *,
+    struct in6_addr *);
+#endif /* INET6 */
+
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* !_NET_IF_VAR_H_ */
diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c
index 0179f102c..cf090602d 100644
--- a/bsd/net/if_vlan.c
+++ b/bsd/net/if_vlan.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2010 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,7 @@
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/kern_event.h>
+#include <sys/mcache.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
@@ -185,8 +186,14 @@ LIST_HEAD(vlan_parent_list, vlan_parent);
 struct ifvlan;
 LIST_HEAD(ifvlan_list, ifvlan);
 
+typedef LIST_ENTRY(vlan_parent) 
+vlan_parent_entry;
+typedef LIST_ENTRY(ifvlan) 
+ifvlan_entry;
+
+#define VLP_SIGNATURE		0xfaceface
 typedef struct vlan_parent {
-    LIST_ENTRY(vlan_parent)	vlp_parent_list;/* list of parents */
+    vlan_parent_entry		vlp_parent_list;/* list of parents */
     struct ifnet *		vlp_ifp;	/* interface */
     struct ifvlan_list		vlp_vlan_list;	/* list of VLAN's */
 #define VLPF_SUPPORTS_VLAN_MTU	0x1
@@ -195,10 +202,12 @@ typedef struct vlan_parent {
     u_int32_t			vlp_flags;
     struct ifdevmtu		vlp_devmtu;
     SInt32			vlp_retain_count;
+    UInt32			vlp_signature;	/* VLP_SIGNATURE */
 } vlan_parent, * vlan_parent_ref;
 
+#define IFV_SIGNATURE		0xbeefbeef
 struct ifvlan {
-    LIST_ENTRY(ifvlan) 		ifv_vlan_list;
+    ifvlan_entry 		ifv_vlan_list;
     char			ifv_name[IFNAMSIZ]; /* our unique id */
     struct ifnet *		ifv_ifp;	/* our interface */
     vlan_parent_ref		ifv_vlp;	/* parent information */
@@ -215,6 +224,8 @@ struct ifvlan {
     u_int32_t			ifv_flags;
     bpf_packet_func		ifv_bpf_input;
     bpf_packet_func		ifv_bpf_output;
+    SInt32			ifv_retain_count;
+    UInt32			ifv_signature;	/* IFV_SIGNATURE */
 };
 
 typedef struct ifvlan * ifvlan_ref;
@@ -230,6 +241,11 @@ static vlan_globals_ref	g_vlan;
 #define	ifv_encaplen	ifv_mib.ifvm_encaplen
 #define	ifv_mtufudge	ifv_mib.ifvm_mtufudge
 
+static void
+vlan_parent_retain(vlan_parent_ref vlp);
+
+static void
+vlan_parent_release(vlan_parent_ref vlp);
 
 /**
  ** vlan_parent_ref vlp_flags in-lines
@@ -363,12 +379,10 @@ static  int vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode,
 static 	int vlan_attach_protocol(struct ifnet *ifp);
 static	int vlan_detach_protocol(struct ifnet *ifp);
 static	int vlan_setmulti(struct ifnet *ifp);
-static	int vlan_unconfig(struct ifnet *ifp);
+static	int vlan_unconfig(ifvlan_ref ifv, int need_to_wait);
 static 	int vlan_config(struct ifnet * ifp, struct ifnet * p, int tag);
 static	void vlan_if_free(struct ifnet * ifp);
-static 	void vlan_remove(ifvlan_ref ifv);
-static	void vlan_if_detach(struct ifnet * ifp);
-static 	int vlan_new_mtu(struct ifnet * ifp, int mtu);
+static 	int vlan_remove(ifvlan_ref ifv, int need_to_wait);
 
 static struct if_clone vlan_cloner = IF_CLONE_INITIALIZER(VLANNAME,
 							  vlan_clone_create, 
@@ -376,9 +390,118 @@ static struct if_clone vlan_cloner = IF_CLONE_INITIALIZER(VLANNAME,
 							  0, 
 							  IF_MAXUNIT);
 static	void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
-static	void vlan_parent_link_event(vlan_parent_ref vlp, 
+static	void vlan_parent_link_event(struct ifnet * p,
 				    u_int32_t event_code);
-extern void dlil_input_packet_list(struct ifnet  *ifp, struct mbuf *m);
+
+static 	int ifvlan_new_mtu(ifvlan_ref ifv, int mtu);
+
+/**
+ ** ifvlan_ref routines
+ **/
+static void
+ifvlan_retain(ifvlan_ref ifv)
+{
+    if (ifv->ifv_signature != IFV_SIGNATURE) {
+	panic("ifvlan_retain: bad signature\n");
+    }
+    if (ifv->ifv_retain_count == 0) {
+	panic("ifvlan_retain: retain count is 0\n");
+    }
+    OSIncrementAtomic(&ifv->ifv_retain_count);
+}
+
+static void
+ifvlan_release(ifvlan_ref ifv)
+{
+    UInt32		old_retain_count;
+
+    if (ifv->ifv_signature != IFV_SIGNATURE) {
+	panic("ifvlan_release: bad signature\n");
+    }
+    old_retain_count = OSDecrementAtomic(&ifv->ifv_retain_count);
+    switch (old_retain_count) {
+    case 0:
+	panic("ifvlan_release: retain count is 0\n");
+	break;
+    case 1:
+	if (g_vlan->verbose) {
+	    printf("ifvlan_release(%s)\n", ifv->ifv_name);
+	}
+	ifv->ifv_signature = 0;
+	FREE(ifv, M_VLAN);
+	break;
+    default:
+	break;
+    }
+    return;
+}
+
+static vlan_parent_ref
+ifvlan_get_vlan_parent_retained(ifvlan_ref ifv)
+{
+    vlan_parent_ref	vlp = ifv->ifv_vlp;
+
+    if (vlan_parent_flags_detaching(vlp)) {
+	return (NULL);
+    }
+    vlan_parent_retain(vlp);
+    return (vlp);
+}
+
+/**
+ ** ifnet_* routines
+ **/
+
+static ifvlan_ref
+ifnet_get_ifvlan(struct ifnet * ifp)
+{
+    ifvlan_ref		ifv;
+
+    ifv = (ifvlan_ref)ifnet_softc(ifp);
+    return (ifv);
+}
+
+static ifvlan_ref
+ifnet_get_ifvlan_retained(struct ifnet * ifp)
+{
+    ifvlan_ref		ifv;
+
+    ifv = ifnet_get_ifvlan(ifp);
+    if (ifv == NULL) {
+	return (NULL);
+    }
+    if (ifvlan_flags_detaching(ifv)) {
+	return (NULL);
+    }
+    ifvlan_retain(ifv);
+    return (ifv);
+}
+
+static int
+ifnet_ifvlan_vlan_parent_ok(struct ifnet * ifp, ifvlan_ref ifv,
+			    vlan_parent_ref vlp)
+{
+    ifvlan_ref		check_ifv;
+
+    check_ifv = ifnet_get_ifvlan(ifp);
+    if (check_ifv != ifv || ifvlan_flags_detaching(ifv)) {
+	/* ifvlan_ref no longer valid */
+	return (FALSE);
+    }
+    if (ifv->ifv_vlp != vlp) {
+	/* vlan_parent no longer valid */
+	return (FALSE);
+    }
+    if (vlan_parent_flags_detaching(vlp)) {
+	/* parent is detaching */
+	return (FALSE);
+    }
+    return (TRUE);
+}
+
+/**
+ ** vlan, etc. routines
+ **/
 
 static int
 vlan_globals_init(void)
@@ -471,17 +594,26 @@ vlan_bpf_input(struct ifnet * ifp, struct mbuf * m,
 /**
  ** vlan_parent synchronization routines
  **/
-static __inline__ void
+static void
 vlan_parent_retain(vlan_parent_ref vlp)
 {
+    if (vlp->vlp_signature != VLP_SIGNATURE) {
+	panic("vlan_parent_retain: signature is bad\n");
+    }
+    if (vlp->vlp_retain_count == 0) {
+	panic("vlan_parent_retain: retain count is 0\n");
+    }
     OSIncrementAtomic(&vlp->vlp_retain_count);
 }
 
-static __inline__ void
+static void
 vlan_parent_release(vlan_parent_ref vlp)
 {
     UInt32		old_retain_count;
 
+    if (vlp->vlp_signature != VLP_SIGNATURE) {
+	panic("vlan_parent_release: signature is bad\n");
+    }
     old_retain_count = OSDecrementAtomic(&vlp->vlp_retain_count);
     switch (old_retain_count) {
     case 0:
@@ -493,6 +625,7 @@ vlan_parent_release(vlan_parent_ref vlp)
 	    printf("vlan_parent_release(%s%d)\n", ifnet_name(ifp),
 		   ifnet_unit(ifp));
 	}
+	vlp->vlp_signature = 0;
 	FREE(vlp, M_VLAN);
 	break;
     default:
@@ -561,7 +694,6 @@ vlan_parent_signal(vlan_parent_ref vlp, const char * msg)
     return;
 }
 
-
 /*
  * Program our multicast filter. What we're actually doing is
  * programming the multicast filter of the parent. This has the
@@ -576,35 +708,22 @@ vlan_setmulti(struct ifnet * ifp)
     int			error = 0;
     ifvlan_ref 		ifv;
     struct ifnet *	p;
-    vlan_parent_ref	vlp;
+    vlan_parent_ref	vlp = NULL;
 
     vlan_lock();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL) {
 	goto unlock_done;
     }
-    vlp = ifv->ifv_vlp;
+    vlp = ifvlan_get_vlan_parent_retained(ifv);
     if (vlp == NULL) {
 	/* no parent, no need to program the multicast filter */
 	goto unlock_done;
     }
-    if (vlan_parent_flags_detaching(vlp)) {
-	goto unlock_done;
-    }
-    vlan_parent_retain(vlp);
     vlan_parent_wait(vlp, "vlan_setmulti");
 
     /* check again, things could have changed */
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
-	goto signal_done;
-    }
-    if (ifv->ifv_vlp != vlp) {
-	/* vlan parent changed */
-	goto signal_done;
-    }
-    if (vlp == NULL) {
-	/* no parent, no need to program the multicast filter */
+    if (ifnet_ifvlan_vlan_parent_ok(ifp, ifv, vlp) == FALSE) {
 	goto signal_done;
     }
     p = vlp->vlp_ifp;
@@ -620,6 +739,12 @@ vlan_setmulti(struct ifnet * ifp)
 
  unlock_done:
     vlan_unlock();
+    if (ifv != NULL) {
+	ifvlan_release(ifv);
+    }
+    if (vlp != NULL) {
+	vlan_parent_release(vlp);
+    }
     return (error);
 }
 
@@ -711,7 +836,8 @@ vlan_parent_create(struct ifnet * p, vlan_parent_ref * ret_vlp)
     }
     LIST_INIT(&vlp->vlp_vlan_list);
     vlp->vlp_ifp = p;
-    vlan_parent_retain(vlp);
+    vlp->vlp_retain_count = 1;
+    vlp->vlp_signature = VLP_SIGNATURE;
     if (ifnet_offload(p)
 	& (IF_HWASSIST_VLAN_MTU | IF_HWASSIST_VLAN_TAGGING)) {
 	vlan_parent_flags_set_supports_vlan_mtu(vlp);
@@ -721,28 +847,57 @@ vlan_parent_create(struct ifnet * p, vlan_parent_ref * ret_vlp)
 }
 
 static void
-vlan_parent_remove_all_vlans(vlan_parent_ref vlp)
+vlan_parent_remove_all_vlans(struct ifnet * p)
 {
     ifvlan_ref 		ifv;
-    struct ifnet *	p;
-
-    vlan_assert_lock_held();
+    int			need_vlp_release = 0;
+    ifvlan_ref		next;
+    vlan_parent_ref	vlp;
 
-    while ((ifv = LIST_FIRST(&vlp->vlp_vlan_list)) != NULL) {
-	vlan_remove(ifv);
+    vlan_lock();
+    vlp = parent_list_lookup(p);
+    if (vlp == NULL || vlan_parent_flags_detaching(vlp)) {
+	/* no VLAN's */
 	vlan_unlock();
-	vlan_if_detach(ifv->ifv_ifp);
-	vlan_lock();
+	return;
+    }
+    vlan_parent_flags_set_detaching(vlp);
+    vlan_parent_retain(vlp);
+    vlan_parent_wait(vlp, "vlan_parent_remove_all_vlans");
+    need_vlp_release++;
+    vlp = parent_list_lookup(p);
+    /* check again */
+    if (vlp == NULL) {
+	goto signal_done;
+    }
+
+    for (ifv = LIST_FIRST(&vlp->vlp_vlan_list); ifv != NULL; ifv = next) {
+	struct ifnet *	ifp = ifv->ifv_ifp;
+	int		removed;
+
+	next = LIST_NEXT(ifv, ifv_vlan_list);
+	removed = vlan_remove(ifv, FALSE);
+	if (removed) {
+	    vlan_unlock();
+	    ifnet_detach(ifp);
+	    vlan_lock();
+	}
     }
 
     /* the vlan parent has no more VLAN's */
-    p = vlp->vlp_ifp;
     ifnet_set_eflags(p, 0, IFEF_VLAN); /* clear IFEF_VLAN */
+
     LIST_REMOVE(vlp, vlp_parent_list);
+    need_vlp_release++;	/* one for being in the list */
+    need_vlp_release++; /* final reference */
+
+ signal_done:
+    vlan_parent_signal(vlp, "vlan_parent_remove_all_vlans");
     vlan_unlock();
-    vlan_parent_release(vlp);
-    vlan_lock();
 
+    while (need_vlp_release--) {
+	vlan_parent_release(vlp);
+    }
     return;
 }
 
@@ -797,13 +952,16 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
 	if (ifv == NULL)
 		return ENOBUFS;
 	bzero(ifv, sizeof(struct ifvlan));
+	ifv->ifv_retain_count = 1;
+	ifv->ifv_signature = IFV_SIGNATURE;
 	multicast_list_init(&ifv->ifv_multicast);
 	
 	/* use the interface name as the unique id for ifp recycle */
-	if ((unsigned int)snprintf(ifv->ifv_name, sizeof(ifv->ifv_name), "%s%d",
-				   ifc->ifc_name, unit) >= sizeof(ifv->ifv_name)) {
-		FREE(ifv, M_VLAN);
-		return (EINVAL);
+	if ((unsigned int)
+	    snprintf(ifv->ifv_name, sizeof(ifv->ifv_name), "%s%d",
+		     ifc->ifc_name, unit) >= sizeof(ifv->ifv_name)) {
+	    ifvlan_release(ifv);
+	    return (EINVAL);
 	}
 	
 	bzero(&vlan_init, sizeof(vlan_init));
@@ -828,16 +986,10 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
 	error = ifnet_allocate(&vlan_init, &ifp);
 	
 	if (error) {
-		FREE(ifv, M_VLAN);
-		return (error);
+	    ifvlan_release(ifv);
+	    return (error);
 	}
 	
-#if 0
-	/* NB: flags are not set here */
-	ifnet_set_link_mib_data(ifp, &ifv->ifv_mib, sizeof ifv->ifv_mib);
-	/* NB: mtu is not set here */
-#endif
-	
 	ifnet_set_offload(ifp, 0);
 	ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
 	ifnet_set_baudrate(ifp, 0);
@@ -845,9 +997,9 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
 	
 	error = ifnet_attach(ifp, NULL);
 	if (error) {
-		ifnet_release(ifp);
-		FREE(ifv, M_VLAN);
-		return (error);
+	    ifnet_release(ifp);
+	    ifvlan_release(ifv);
+	    return (error);
 	}
 	ifv->ifv_ifp = ifp;
 	
@@ -856,21 +1008,18 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
 	return (0);
 }
 
-static void
-vlan_remove(ifvlan_ref ifv)
+static int
+vlan_remove(ifvlan_ref ifv, int need_to_wait)
 {
     vlan_assert_lock_held();
+    if (ifvlan_flags_detaching(ifv)) {
+	return (0);
+    }
     ifvlan_flags_set_detaching(ifv);
-    vlan_unconfig(ifv->ifv_ifp);
-    return;
+    vlan_unconfig(ifv, need_to_wait);
+    return (1);
 }
 
-static void
-vlan_if_detach(struct ifnet * ifp)
-{
-	ifnet_detach(ifp);
-    return;
-}
 
 static int
 vlan_clone_destroy(struct ifnet *ifp)
@@ -878,18 +1027,19 @@ vlan_clone_destroy(struct ifnet *ifp)
     ifvlan_ref ifv;
 
     vlan_lock();
-    ifv = ifnet_softc(ifp);
-    if (ifv == NULL || ifnet_type(ifp) != IFT_L2VLAN) {
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL) {
 	vlan_unlock();
 	return 0;
     }
-    if (ifvlan_flags_detaching(ifv)) {
+    if (vlan_remove(ifv, TRUE) == 0) {
 	vlan_unlock();
+	ifvlan_release(ifv);
 	return 0;
     }
-    vlan_remove(ifv);
     vlan_unlock();
-    vlan_if_detach(ifp);
+    ifvlan_release(ifv);
+    ifnet_detach(ifp);
 
     return 0;
 }
@@ -900,8 +1050,8 @@ vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func func)
     ifvlan_ref	ifv;
 
     vlan_lock();
-    ifv = ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL) {
 	vlan_unlock();
 	return (ENODEV);
     }
@@ -925,6 +1075,7 @@ vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func func)
             break;
     }
     vlan_unlock();
+    ifvlan_release(ifv);
     return 0;
 }
 
@@ -938,7 +1089,7 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
     struct ifnet *		p;
     int 			soft_vlan;
     u_short			tag;
-    vlan_parent_ref		vlp;
+    vlan_parent_ref		vlp = NULL;
 	
     if (m == 0) {
 	return (0);
@@ -948,18 +1099,13 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
 	return (0);
     }
     vlan_lock();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)
-	|| ifvlan_flags_ready(ifv) == 0) {
-	vlan_unlock();
-	m_freem_list(m);
-	return (0);
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL || ifvlan_flags_ready(ifv) == 0) {
+	goto unlock_done;
     }
-    vlp = ifv->ifv_vlp;
+    vlp = ifvlan_get_vlan_parent_retained(ifv);
     if (vlp == NULL) {
-	vlan_unlock();
-	m_freem_list(m);
-	return (0);
+	goto unlock_done;
     }
     p = vlp->vlp_ifp;
     (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
@@ -968,12 +1114,16 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
     tag = ifv->ifv_tag;
     encaplen = ifv->ifv_encaplen;
     vlan_unlock();
+
+    ifvlan_release(ifv);
+    vlan_parent_release(vlp);
+
     vlan_bpf_output(ifp, m, bpf_func);
 	
     /* do not run parent's if_output() if the parent is not up */
     if ((ifnet_flags(p) & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) {
 	m_freem(m);
-	ifp->if_collisions++;
+	atomic_add_64(&ifp->if_collisions, 1);
 	return (0);
     }
     /*
@@ -992,7 +1142,7 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
 	if (m == NULL) {
 	    printf("%s%d: unable to prepend VLAN header\n", ifnet_name(ifp),
 		   ifnet_unit(ifp));
-	    ifp->if_oerrors++;
+	    atomic_add_64(&ifp->if_oerrors, 1);
 	    return (0);
 	}
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
@@ -1001,7 +1151,7 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
 	    if (m == NULL) {
 		printf("%s%d: unable to pullup VLAN header\n", ifnet_name(ifp),
 		       ifnet_unit(ifp));
-		ifp->if_oerrors++;
+		atomic_add_64(&ifp->if_oerrors, 1);
 		return (0);
 	    }
 	}
@@ -1017,7 +1167,19 @@ vlan_output(struct ifnet * ifp, struct mbuf * m)
 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 	evl->evl_tag = htons(tag);
     }
-    return ifnet_output_raw(p, PF_VLAN, m);
+    return (ifnet_output_raw(p, PF_VLAN, m));
+
+ unlock_done:
+    vlan_unlock();
+    if (ifv != NULL) {
+	ifvlan_release(ifv);
+    }
+    if (vlp != NULL) {
+	vlan_parent_release(vlp);
+    }
+    m_freem_list(m);
+    return (0);
+
 }
 
 static int
@@ -1120,20 +1282,17 @@ vlan_input(ifnet_t p, __unused protocol_family_t protocol,
     return 0;
 }
 
-#define VLAN_CONFIG_PROGRESS_VLP_RETAINED	0x1
-#define VLAN_CONFIG_PROGRESS_IN_LIST		0x2
-
 static int
 vlan_config(struct ifnet * ifp, struct ifnet * p, int tag)
 {
     int			error;
-    int			first_vlan = 0;
+    int			first_vlan = FALSE;
     ifvlan_ref 		ifv = NULL;
-    vlan_parent_ref	new_vlp = NULL;
+    int			ifv_added = FALSE;
     int			need_vlp_release = 0;
+    vlan_parent_ref	new_vlp = NULL;
     ifnet_offload_t	offload;
     u_int16_t		parent_flags;
-    u_int32_t		progress = 0;
     vlan_parent_ref	vlp = NULL;
 
     /* pre-allocate space for vlan_parent, in case we're first */
@@ -1143,14 +1302,19 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag)
     }
 
     vlan_lock();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv != NULL && ifv->ifv_vlp != NULL) {
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL || ifv->ifv_vlp != NULL) {
 	vlan_unlock();
+	if (ifv != NULL) {
+	    ifvlan_release(ifv);
+	}
 	vlan_parent_release(new_vlp);
 	return (EBUSY);
     }
     vlp = parent_list_lookup(p);
     if (vlp != NULL) {
+	vlan_parent_retain(vlp);
+	need_vlp_release++;
 	if (vlan_parent_lookup_tag(vlp, tag) != NULL) {
 	    /* already a VLAN with that tag on this interface */
 	    error = EADDRINUSE;
@@ -1158,28 +1322,38 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag)
 	}
     }
     else {
+	/* one for being in the list */
+	vlan_parent_retain(new_vlp);
+
 	/* we're the first VLAN on this interface */
 	LIST_INSERT_HEAD(&g_vlan->parent_list, new_vlp, vlp_parent_list);
 	vlp = new_vlp;
+
+	vlan_parent_retain(vlp);
+	need_vlp_release++;
     }
 
     /* need to wait to ensure no one else is trying to add/remove */
-    vlan_parent_retain(vlp);
-    progress |= VLAN_CONFIG_PROGRESS_VLP_RETAINED;
     vlan_parent_wait(vlp, "vlan_config");
 
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL) {
-	error = EOPNOTSUPP;
+    if (ifnet_get_ifvlan(ifp) != ifv) {
+	error = EINVAL;
 	goto signal_done;
     }
+
+    /* check again because someone might have gotten in */
+    if (parent_list_lookup(p) != vlp) {
+	error = EBUSY;
+	goto signal_done;
+    }
+
     if (vlan_parent_flags_detaching(vlp)
 	|| ifvlan_flags_detaching(ifv) || ifv->ifv_vlp != NULL) {
 	error = EBUSY;
 	goto signal_done;
     }
 
-    /* check again because someone might have gotten in */
+    /* check again because someone might have gotten the tag */
     if (vlan_parent_lookup_tag(vlp, tag) != NULL) {
 	/* already a VLAN with that tag on this interface */
 	error = EADDRINUSE;
@@ -1187,10 +1361,11 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag)
     }
 
     if (vlan_parent_no_vlans(vlp)) {
-	first_vlan = 1;
+	first_vlan = TRUE;
     }
     vlan_parent_add_vlan(vlp, ifv, tag);
-    progress |= VLAN_CONFIG_PROGRESS_IN_LIST;
+    ifvlan_retain(ifv);	/* parent references ifv */
+    ifv_added = TRUE;
 
     /* check whether bond interface is using parent interface */
     ifnet_lock_exclusive(p);
@@ -1271,34 +1446,44 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag)
 	/* throw it away, it wasn't needed */
 	vlan_parent_release(new_vlp);
     }
+    if (ifv != NULL) {
+	ifvlan_release(ifv);
+    }
     return 0;
 
  signal_done:
     vlan_assert_lock_held();
-    vlan_parent_signal(vlp, "vlan_config");
 
- unlock_done:
-    if ((progress & VLAN_CONFIG_PROGRESS_IN_LIST) != 0) {
+    if (ifv_added) {
 	vlan_parent_remove_vlan(vlp, ifv);
+	if (!vlan_parent_flags_detaching(vlp) && vlan_parent_no_vlans(vlp)) {
+	    /* the vlan parent has no more VLAN's */
+	    ifnet_set_eflags(p, 0, IFEF_VLAN);
+	    LIST_REMOVE(vlp, vlp_parent_list);
+	    /* release outside of the lock below */
+	    need_vlp_release++;
+
+	    /* one for being in the list */
+	    need_vlp_release++;
+	}
     }
-    if (!vlan_parent_flags_detaching(vlp) && vlan_parent_no_vlans(vlp)) {
-	/* the vlan parent has no more VLAN's */
-	ifnet_set_eflags(p, 0, IFEF_VLAN);
-	LIST_REMOVE(vlp, vlp_parent_list);
-	/* release outside of the lock below */
-	need_vlp_release = 1;
-    }
+    vlan_parent_signal(vlp, "vlan_config");
+
+ unlock_done:
     vlan_unlock();
 
-    if ((progress & VLAN_CONFIG_PROGRESS_VLP_RETAINED) != 0) {
-	vlan_parent_release(vlp);
-    }
-    if (need_vlp_release) {
+    while (need_vlp_release--) {
 	vlan_parent_release(vlp);
     }
     if (new_vlp != vlp) {
 	vlan_parent_release(new_vlp);
     }
+    if (ifv != NULL) {
+	if (ifv_added) {
+	    ifvlan_release(ifv);
+	}
+	ifvlan_release(ifv);
+    }
     return (error);
 }
 
@@ -1311,7 +1496,7 @@ vlan_link_event(struct ifnet * ifp, struct ifnet * p)
     bzero(&ifmr, sizeof(ifmr));
     snprintf(ifmr.ifm_name, sizeof(ifmr.ifm_name),
 	     "%s%d", ifnet_name(p), ifnet_unit(p));
-	if (ifnet_ioctl(p, 0, SIOCGIFMEDIA, &ifmr) == 0
+    if (ifnet_ioctl(p, 0, SIOCGIFMEDIA, &ifmr) == 0
 	&& ifmr.ifm_count > 0 && ifmr.ifm_status & IFM_AVALID) {
 	u_int32_t	event;
 	
@@ -1323,36 +1508,36 @@ vlan_link_event(struct ifnet * ifp, struct ifnet * p)
 }
 
 static int
-vlan_unconfig(struct ifnet * ifp)
+vlan_unconfig(ifvlan_ref ifv, int need_to_wait)
 {
-    int			error = 0;
-    ifvlan_ref		ifv;
-    int			last_vlan = 0;
+    struct ifnet *	ifp = ifv->ifv_ifp;
+    int			last_vlan = FALSE;
+    int			need_ifv_release = 0;
     int			need_vlp_release = 0;
     struct ifnet *	p;
     vlan_parent_ref	vlp;
 
     vlan_assert_lock_held();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL) {
-	return (0);
-    }
     vlp = ifv->ifv_vlp;
     if (vlp == NULL) {
 	return (0);
     }
-    vlan_parent_retain(vlp);
-    vlan_parent_wait(vlp, "vlan_unconfig");
+    if (need_to_wait) {
+	need_vlp_release++;
+	vlan_parent_retain(vlp);
+	vlan_parent_wait(vlp, "vlan_unconfig");
 
-    /* check again because another thread could be in vlan_unconfig */
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL) {
-	goto signal_done;
-    }
-    if (ifv->ifv_vlp != vlp) {
-	/* vlan parent changed */
-	goto signal_done;
+        /* check again because another thread could be in vlan_unconfig */
+	if (ifv != ifnet_get_ifvlan(ifp)) {
+	    goto signal_done;
+	}
+	if (ifv->ifv_vlp != vlp) {
+	    /* vlan parent changed */
+	    goto signal_done;
+	}
     }
+
+    /* ifv has a reference on vlp, need to remove it */
     need_vlp_release++;
     p = vlp->vlp_ifp;
 
@@ -1362,56 +1547,67 @@ vlan_unconfig(struct ifnet * ifp)
 	    printf("vlan_unconfig: last vlan on %s%d\n",
 		   ifnet_name(p), ifnet_unit(p));
 	}
-	last_vlan = 1;
+	last_vlan = TRUE;
     }
 
     /* back-out any effect our mtu might have had on the parent */
-    (void)vlan_new_mtu(ifp, ETHERMTU - ifv->ifv_mtufudge);
+    (void)ifvlan_new_mtu(ifv, ETHERMTU - ifv->ifv_mtufudge);
 
     vlan_unlock();
 
-    /* detach VLAN "protocol" */
-    if (last_vlan) {
-	(void)vlan_detach_protocol(p);
-    }
-
     /* un-join multicast on parent interface */
     (void)multicast_list_remove(&ifv->ifv_multicast);
 
     /* Clear our MAC address. */
     ifnet_set_lladdr_and_type(ifp, NULL, 0, IFT_L2VLAN);
 
-    vlan_lock();
+    /* detach VLAN "protocol" */
+    if (last_vlan) {
+	(void)vlan_detach_protocol(p);
+    }
 
-    /* Disconnect from parent. */
-    vlan_parent_remove_vlan(vlp, ifv);
+    vlan_lock();
 
     /* return to the state we were in before SIFVLAN */
     ifnet_set_mtu(ifp, 0);
     ifnet_set_flags(ifp, 0, 
 		    IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX | IFF_RUNNING);
     ifnet_set_offload(ifp, 0);
-    ifv->ifv_flags = 0;
     ifv->ifv_mtufudge = 0;
 
-    if (!vlan_parent_flags_detaching(vlp) && vlan_parent_no_vlans(vlp)) {
+    /* Disconnect from parent. */
+    vlan_parent_remove_vlan(vlp, ifv);
+    ifv->ifv_flags = 0;
+
+    /* vlan_parent has reference to ifv, remove it */
+    need_ifv_release++;
+
+    /* from this point on, no more referencing ifv */
+    if (last_vlan && !vlan_parent_flags_detaching(vlp)) {
 	/* the vlan parent has no more VLAN's */
 	ifnet_set_eflags(p, 0, IFEF_VLAN);
 	LIST_REMOVE(vlp, vlp_parent_list);
+
+	/* one for being in the list */
+	need_vlp_release++;
+
 	/* release outside of the lock below */
 	need_vlp_release++;
     }
 
  signal_done:
-    vlan_parent_signal(vlp, "vlan_unconfig");
+    if (need_to_wait) {
+	vlan_parent_signal(vlp, "vlan_unconfig");
+    }
     vlan_unlock();
-    vlan_parent_release(vlp);	/* one because we waited */
-
-    while (need_vlp_release--) {
+    while (need_ifv_release--) {
+	ifvlan_release(ifv);
+    }
+    while (need_vlp_release--) {	/* references to vlp */
 	vlan_parent_release(vlp);
     }
     vlan_lock();
-    return (error);
+    return (0);
 }
 
 static int
@@ -1422,9 +1618,9 @@ vlan_set_promisc(struct ifnet * ifp)
     vlan_parent_ref		vlp;
 
     vlan_lock();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
-	error = (ifv == NULL) ? EOPNOTSUPP : EBUSY;
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL) {
+	error = EBUSY;
 	goto done;
     }
 
@@ -1449,22 +1645,24 @@ vlan_set_promisc(struct ifnet * ifp)
     }
  done:
     vlan_unlock();
+    if (ifv != NULL) {
+	ifvlan_release(ifv);
+    }
     return (error);
 }
 
 static int
-vlan_new_mtu(struct ifnet * ifp, int mtu)
+ifvlan_new_mtu(ifvlan_ref ifv, int mtu)
 {
     struct ifdevmtu *	devmtu_p;
     int			error = 0;
-    ifvlan_ref		ifv;
+    struct ifnet * 	ifp = ifv->ifv_ifp;
     int			max_mtu;
     int			new_mtu = 0;
     int			req_mtu;
     vlan_parent_ref	vlp;
 
     vlan_assert_lock_held();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
     vlp = ifv->ifv_vlp;
     devmtu_p = &vlp->vlp_devmtu;
     req_mtu = mtu + ifv->ifv_mtufudge;
@@ -1504,44 +1702,45 @@ vlan_set_mtu(struct ifnet * ifp, int mtu)
 	return (EINVAL);
     }
     vlan_lock();
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
+    ifv = ifnet_get_ifvlan_retained(ifp);
+    if (ifv == NULL) {
 	vlan_unlock();
-	return ((ifv == NULL) ? EOPNOTSUPP : EBUSY);
+	return (EBUSY);
     }
-    vlp = ifv->ifv_vlp;
-    if (vlp == NULL || vlan_parent_flags_detaching(vlp)) {
+    vlp = ifvlan_get_vlan_parent_retained(ifv);
+    if (vlp == NULL) {
 	vlan_unlock();
+	ifvlan_release(ifv);
 	if (mtu != 0) {
 	    return (EINVAL);
 	}
 	return (0);
     }
-    vlan_parent_retain(vlp);
     vlan_parent_wait(vlp, "vlan_set_mtu");
 
     /* check again, something might have changed */
-    ifv = (ifvlan_ref)ifnet_softc(ifp);
-    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
-	error = (ifv == NULL) ? EOPNOTSUPP : EBUSY;
+    if (ifnet_get_ifvlan(ifp) != ifv
+	|| ifvlan_flags_detaching(ifv)) {
+	error = EBUSY;
 	goto signal_done;
     }
     if (ifv->ifv_vlp != vlp) {
 	/* vlan parent changed */
 	goto signal_done;
     }
-    if (vlp == NULL || vlan_parent_flags_detaching(vlp)) {
+    if (vlan_parent_flags_detaching(vlp)) {
 	if (mtu != 0) {
 	    error = EINVAL;
 	}
 	goto signal_done;
     }
-    error = vlan_new_mtu(ifp, mtu);
+    error = ifvlan_new_mtu(ifv, mtu);
 
  signal_done:
     vlan_parent_signal(vlp, "vlan_set_mtu");
     vlan_unlock();
     vlan_parent_release(vlp);
+    ifvlan_release(ifv);
 
     return (error);
 }
@@ -1685,7 +1884,10 @@ vlan_ioctl(ifnet_t ifp, u_long cmd, void * data)
 	    
 	    /* generate a link event based on the state of the parent */
 	    vlan_link_event(ifp, p);
-	} else {
+	} 
+	else {
+	    int		need_link_event = FALSE;
+
 	    vlan_lock();
 	    ifv = (ifvlan_ref)ifnet_softc(ifp);
 	    if (ifv == NULL || ifvlan_flags_detaching(ifv)) {
@@ -1693,9 +1895,9 @@ vlan_ioctl(ifnet_t ifp, u_long cmd, void * data)
 		error = (ifv == NULL ? EOPNOTSUPP : EBUSY);
 		break;
 	    }
-	    error = vlan_unconfig(ifp);
+	    need_link_event = vlan_remove(ifv, TRUE);
 	    vlan_unlock();
-	    if (error == 0) {
+	    if (need_link_event) {
 		interface_link_event(ifp, KEV_DL_LINK_OFF);
 	    }
 	}
@@ -1748,22 +1950,20 @@ vlan_if_free(struct ifnet * ifp)
     if (ifp == NULL) {
 	return;
     }
-    vlan_lock();
     ifv = (ifvlan_ref)ifnet_softc(ifp);
     if (ifv == NULL) {
-	vlan_unlock();
 	return;
     }
-    vlan_unlock();
+    ifvlan_release(ifv);
     ifnet_release(ifp);
-    FREE(ifv, M_VLAN);
+    return;
 }
 
 static void
 vlan_event(struct ifnet	* p, __unused protocol_family_t protocol,
 		   const struct kev_msg * event)
 {
-    vlan_parent_ref	vlp;
+    int			event_code;
 
     /* Check if the interface we are attached to is being detached */
     if (event->vendor_code != KEV_VENDOR_APPLE
@@ -1771,43 +1971,28 @@ vlan_event(struct ifnet	* p, __unused protocol_family_t protocol,
 	|| event->kev_subclass != KEV_DL_SUBCLASS) {
 	return;
     }
-    switch (event->event_code) {
-    case KEV_DL_IF_DETACHING:
+    event_code = event->event_code;
+    switch (event_code) {
     case KEV_DL_LINK_OFF:
     case KEV_DL_LINK_ON:
+	vlan_parent_link_event(p, event_code);
 	break;
     default:
 	return;
     }
-    vlan_lock();
-    if ((ifnet_eflags(p) & IFEF_VLAN) == 0) {
-	vlan_unlock();
-	/* no VLAN's */
-	return;
-    }
-    vlp = parent_list_lookup(p);
-    if (vlp == NULL) {
-	/* no VLAN's */
-	vlan_unlock();
-	return;
-    }
-    switch (event->event_code) {
-    case KEV_DL_IF_DETACHING:
-	vlan_parent_flags_set_detaching(vlp);
-	vlan_parent_remove_all_vlans(vlp);
-	break;
-		
-    case KEV_DL_LINK_OFF:
-    case KEV_DL_LINK_ON:
-	vlan_parent_link_event(vlp, event->event_code);
-	break;
-    default:
-	break;
-    }
-    vlan_unlock();
     return;
 }
 
+static errno_t
+vlan_detached(ifnet_t p, __unused protocol_family_t protocol)
+{
+    if (ifnet_is_attached(p, 0) == 0) {
+	/* if the parent isn't attached, remove all VLANs */
+	vlan_parent_remove_all_vlans(p);
+    }
+    return (0);
+}
+
 static void
 interface_link_event(struct ifnet * ifp, u_int32_t event_code)
 {
@@ -1817,6 +2002,7 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code)
 	char			if_name[IFNAMSIZ];
     } event;
 
+    bzero(&event, sizeof(event));
     event.header.total_size    = sizeof(event);
     event.header.vendor_code   = KEV_VENDOR_APPLE;
     event.header.kev_class     = KEV_NETWORK_CLASS;
@@ -1830,13 +2016,45 @@ interface_link_event(struct ifnet * ifp, u_int32_t event_code)
 }
 
 static void
-vlan_parent_link_event(vlan_parent_ref vlp, u_int32_t event_code)
+vlan_parent_link_event(struct ifnet * p, u_int32_t event_code)
 {
-    ifvlan_ref ifv;
+    ifvlan_ref 		ifv;
+    vlan_parent_ref 	vlp;
 
+    vlan_lock();
+    if ((ifnet_eflags(p) & IFEF_VLAN) == 0) {
+	vlan_unlock();
+	/* no VLAN's */
+	return;
+    }
+    vlp = parent_list_lookup(p);
+    if (vlp == NULL) {
+	/* no VLAN's */
+	vlan_unlock();
+	return;
+    }
+
+    vlan_parent_retain(vlp);
+    vlan_parent_wait(vlp, "vlan_parent_link_event");
+    if (vlan_parent_flags_detaching(vlp)) {
+	goto signal_done;
+    }
+
+    vlan_unlock();
+
+    /* vlan_parent_wait() gives us exclusive access to the list */
     LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) {
-	interface_link_event(ifv->ifv_ifp, event_code);
+	struct ifnet *	ifp = ifv->ifv_ifp;
+
+	interface_link_event(ifp, event_code);
     }
+
+    vlan_lock();
+
+ signal_done:
+    vlan_parent_signal(vlp, "vlan_parent_link_event");
+    vlan_unlock();
+    vlan_parent_release(vlp);
     return;
 
 }
@@ -1860,6 +2078,7 @@ vlan_attach_protocol(struct ifnet *ifp)
     bzero(&reg, sizeof(reg));
     reg.input            = vlan_input;
     reg.event            = vlan_event;
+    reg.detached         = vlan_detached;
     error = ifnet_attach_protocol(ifp, PF_VLAN, &reg);
     if (error) {
 	printf("vlan_proto_attach(%s%d) ifnet_attach_protocol failed, %d\n",
diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h
index 6215515a3..48ade0710 100644
--- a/bsd/net/kext_net.h
+++ b/bsd/net/kext_net.h
@@ -46,48 +46,29 @@
  * Internal implementation bits
  */
 
-struct socket_filter;
-
-#define	SFEF_DETACHUSEZERO	0x1	/* Detach when use reaches zero */
-#define	SFEF_UNREGISTERING	0x2	/* Remove due to unregister */
-#define	SFEF_DETACHXREF		0x4	/* Extra reference held for detach */
-
-struct socket_filter_entry {
-	struct socket_filter_entry	*sfe_next_onsocket;
-	struct socket_filter_entry	*sfe_next_onfilter;
-	
-	struct socket_filter		*sfe_filter;
-	struct socket				*sfe_socket;
-	void						*sfe_cookie;
-	
-	u_int32_t					sfe_flags;
-};
-
-#define	SFF_DETACHING		0x1
-
-struct socket_filter {
-	TAILQ_ENTRY(socket_filter)	sf_protosw_next;	
-	TAILQ_ENTRY(socket_filter)	sf_global_next;
-	struct socket_filter_entry	*sf_entry_head;
-	
-	struct protosw				*sf_proto;
-	struct sflt_filter			sf_filter;
-	u_int32_t					sf_flags;
-	u_int32_t					sf_usecount;
-};
-
-TAILQ_HEAD(socket_filter_list, socket_filter);
-
 /* Private, internal implementation functions */
-void	sflt_init(void) __attribute__((section("__TEXT, initcode")));
-void	sflt_initsock(struct socket *so);
-void	sflt_termsock(struct socket *so);
-void	sflt_use(struct socket *so);
-void	sflt_unuse(struct socket *so);
-void	sflt_notify(struct socket *so, sflt_event_t event, void *param);
-int		sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data,
-					 mbuf_t *control, sflt_data_flag_t flags, int *filtered);
-int		sflt_attach_private(struct socket *so, struct socket_filter *filter, sflt_handle handle, int locked);
+extern void	sflt_init(void) __attribute__((section("__TEXT, initcode")));
+extern void	sflt_initsock(struct socket *so);
+extern void	sflt_termsock(struct socket *so);
+extern errno_t	sflt_attach_internal(struct socket *so, sflt_handle	handle);
+extern void	sflt_notify(struct socket *so, sflt_event_t event, void *param);
+extern int	sflt_ioctl(struct socket *so, u_long cmd, caddr_t data);
+extern int	sflt_bind(struct socket *so, const struct sockaddr *nam);
+extern int	sflt_listen(struct socket *so);
+extern int	sflt_accept(struct socket *head, struct socket *so,
+						const struct sockaddr *local,
+						const struct sockaddr	*remote);
+extern int	sflt_getsockname(struct socket *so, struct sockaddr **local);
+extern int	sflt_getpeername(struct socket *so, struct sockaddr **remote);
+extern int	sflt_connectin(struct socket *head, const struct sockaddr *remote);
+extern int	sflt_connectout(struct socket *so, const struct sockaddr *nam);
+extern int	sflt_setsockopt(struct socket *so, struct sockopt *sopt);
+extern int	sflt_getsockopt(struct socket *so, struct sockopt *sopt);
+extern int	sflt_data_out(struct socket *so, const struct sockaddr	*to,
+						  mbuf_t *data, mbuf_t *control,
+						  sflt_data_flag_t flags);
+extern int	sflt_data_in(struct socket *so, const struct sockaddr *from,
+						 mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags);
 
 #endif /* BSD_KERNEL_PRIVATE */
 
diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c
index e56564c58..82ba11b03 100644
--- a/bsd/net/kpi_interface.c
+++ b/bsd/net/kpi_interface.c
@@ -37,6 +37,7 @@
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/kpi_mbuf.h>
+#include <sys/mcache.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
@@ -55,9 +56,6 @@
 #define TOUCHLASTCHANGE(__if_lastchange) microtime(__if_lastchange)
 #endif
 
-extern struct dlil_threading_info *dlil_lo_thread_ptr;
-extern int dlil_multithreaded_input;
-
 static errno_t
 ifnet_list_get_common(ifnet_family_t, boolean_t, ifnet_t **, u_int32_t *);
 
@@ -184,31 +182,15 @@ ifnet_allocate(
 }
 
 errno_t
-ifnet_reference(
-	ifnet_t	ifp)
+ifnet_reference(ifnet_t ifp)
 {
-	int	oldval;
-	
-	if (ifp == NULL) return EINVAL;
-	
-	oldval = OSIncrementAtomic(&ifp->if_refcnt);
-	
-	return 0;
+	return (dlil_if_ref(ifp));
 }
 
 errno_t
-ifnet_release(
-	ifnet_t	ifp)
+ifnet_release(ifnet_t ifp)
 {
-	int	oldval;
-	
-	if (ifp == NULL) return EINVAL;
-	
-	oldval = OSDecrementAtomic(&ifp->if_refcnt);
-	if (oldval == 0)
-		panic("ifnet_release - refcount decremented past zero!");
-	
-	return 0;
+	return (dlil_if_free(ifp));
 }
 
 errno_t 
@@ -256,27 +238,22 @@ ifnet_index(
 }
 
 errno_t
-ifnet_set_flags(
-	ifnet_t interface,
-	u_int16_t new_flags,
-	u_int16_t mask)
+ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask)
 {
-	int lock;
-	
-	if (interface == NULL) return EINVAL;
-	lock = (interface->if_lock != 0);
-	
-	if (lock) ifnet_lock_exclusive(interface);
-	
+	if (interface == NULL)
+		return (EINVAL);
+
+	ifnet_lock_exclusive(interface);
+
 	/* If we are modifying the up/down state, call if_updown */
-	if (lock && (mask & IFF_UP) != 0) {
+	if ((mask & IFF_UP) != 0) {
 		if_updown(interface, (new_flags & IFF_UP) == IFF_UP);
 	}
-	
+
 	interface->if_flags = (new_flags & mask) | (interface->if_flags & ~mask);
-	if (lock) ifnet_lock_done(interface);
-	
-	return 0;
+	ifnet_lock_done(interface);
+
+	return (0);
 }
 
 u_int16_t
@@ -287,21 +264,16 @@ ifnet_flags(
 }
 
 errno_t
-ifnet_set_eflags(
-	ifnet_t interface,
-	u_int32_t new_flags,
-	u_int32_t mask)
+ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask)
 {
-	int lock;
-	
-	if (interface == NULL) return EINVAL;
-	lock = (interface->if_lock != 0);
-	
-	if (lock) ifnet_lock_exclusive(interface);
+	if (interface == NULL)
+		return (EINVAL);
+
+	ifnet_lock_exclusive(interface);
 	interface->if_eflags = (new_flags & mask) | (interface->if_eflags & ~mask);
-	if (lock) ifnet_lock_done(interface);
-	
-	return 0;
+	ifnet_lock_done(interface);
+
+	return (0);
 }
 
 u_int32_t
@@ -312,19 +284,28 @@ ifnet_eflags(
 }
 
 errno_t
-ifnet_set_idle_flags(ifnet_t ifp, u_int32_t new_flags, u_int32_t mask)
+ifnet_set_idle_flags_locked(ifnet_t ifp, u_int32_t new_flags, u_int32_t mask)
 {
-#if IFNET_ROUTE_REFCNT
-	int lock, before, after;
+	int before, after;
 
 	if (ifp == NULL)
 		return (EINVAL);
 
-	lck_mtx_lock(rnh_lock);
+	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
+	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
 
-	lock = (ifp->if_lock != NULL);
-	if (lock)
-		ifnet_lock_exclusive(ifp);
+	/*
+	 * If this is called prior to ifnet attach, the actual work will
+	 * be done at attach time.  Otherwise, if it is called after
+	 * ifnet detach, then it is a no-op.
+	 */
+	if (!ifnet_is_attached(ifp, 0)) {
+		ifp->if_idle_new_flags = new_flags;
+		ifp->if_idle_new_flags_mask = mask;
+		return (0);
+	} else {
+		ifp->if_idle_new_flags = ifp->if_idle_new_flags_mask = 0;
+	}
 
 	before = ifp->if_idle_flags;
 	ifp->if_idle_flags = (new_flags & mask) | (ifp->if_idle_flags & ~mask);
@@ -345,49 +326,140 @@ ifnet_set_idle_flags(ifnet_t ifp, u_int32_t new_flags, u_int32_t mask)
 			rt_aggdrain(1);
 	}
 
-	if (lock)
-		ifnet_lock_done(ifp);
+	return (0);
+}
+
+errno_t
+ifnet_set_idle_flags(ifnet_t ifp, u_int32_t new_flags, u_int32_t mask)
+{
+	errno_t err;
 
+	lck_mtx_lock(rnh_lock);
+	ifnet_lock_exclusive(ifp);
+	err = ifnet_set_idle_flags_locked(ifp, new_flags, mask);
+	ifnet_lock_done(ifp);
 	lck_mtx_unlock(rnh_lock);
 
-	return (0);
-#else
-#pragma unused(ifp, new_flags, mask)
-	return (ENOTSUP);
-#endif /* IFNET_ROUTE_REFCNT */
+	return (err);
 }
 
 u_int32_t
 ifnet_idle_flags(ifnet_t ifp)
 {
-#if IFNET_ROUTE_REFCNT
 	return ((ifp == NULL) ? 0 : ifp->if_idle_flags);
-#else
-#pragma unused(ifp)
-	return (0);
-#endif /* IFNET_ROUTE_REFCNT */
+}
+
+errno_t ifnet_set_capabilities_supported(ifnet_t ifp, u_int32_t new_caps,
+    u_int32_t mask)
+{
+	errno_t error = 0;
+	int tmp;
+
+	if (ifp == NULL)
+		return EINVAL;
+	
+	ifnet_lock_exclusive(ifp);
+	tmp = (new_caps & mask) | (ifp->if_capabilities & ~mask);
+	if ((tmp & ~IFCAP_VALID))
+		error = EINVAL;
+	else
+		ifp->if_capabilities = tmp;
+	ifnet_lock_done(ifp);
+	
+	return error;
+}
+
+u_int32_t ifnet_capabilities_supported(ifnet_t ifp)
+{
+	return ((ifp == NULL) ? 0 : ifp->if_capabilities);
+}
+
+
+errno_t ifnet_set_capabilities_enabled(ifnet_t ifp, u_int32_t new_caps,
+    u_int32_t mask)
+{
+	errno_t error = 0;
+	int tmp;
+	struct kev_msg        ev_msg;
+	struct net_event_data ev_data;
+
+	if (ifp == NULL)
+		return EINVAL;
+	
+	ifnet_lock_exclusive(ifp);
+	tmp = (new_caps & mask) | (ifp->if_capenable & ~mask);
+	if ((tmp & ~IFCAP_VALID) || (tmp & ~ifp->if_capabilities))
+		error = EINVAL;
+	else
+		ifp->if_capenable = tmp;
+	ifnet_lock_done(ifp);
+	
+	/* Notify application of the change */
+	bzero(&ev_data, sizeof(struct net_event_data));
+	bzero(&ev_msg, sizeof(struct kev_msg));
+	ev_msg.vendor_code    = KEV_VENDOR_APPLE;
+	ev_msg.kev_class      = KEV_NETWORK_CLASS;
+	ev_msg.kev_subclass   = KEV_DL_SUBCLASS;
+
+	ev_msg.event_code = KEV_DL_IFCAP_CHANGED;
+	strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+	ev_data.if_family = ifp->if_family;
+	ev_data.if_unit   = (u_int32_t) ifp->if_unit;
+	ev_msg.dv[0].data_length = sizeof(struct net_event_data);
+	ev_msg.dv[0].data_ptr    = &ev_data;
+	ev_msg.dv[1].data_length = 0;
+	kev_post_msg(&ev_msg);
+
+	return error;
+}
+
+u_int32_t ifnet_capabilities_enabled(ifnet_t ifp)
+{
+	return ((ifp == NULL) ? 0 : ifp->if_capenable);
+	
+	return 0;
 }
 
 static const ifnet_offload_t offload_mask = IFNET_CSUM_IP | IFNET_CSUM_TCP |
 			IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT |
+			IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_IPV6_FRAGMENT |
 			IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING | IFNET_VLAN_MTU |
 			IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
 
+static const ifnet_offload_t any_offload_csum = IFNET_CSUM_IP | IFNET_CSUM_TCP |
+			IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT |
+			IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 |
+			IFNET_CSUM_SUM16;
+
+
 errno_t
-ifnet_set_offload(
-	ifnet_t interface,
-	ifnet_offload_t offload)
+ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload)
 {
-	int lock;
-	
-	if (interface == NULL) return EINVAL;
-	lock = (interface->if_lock != 0);
+	u_int32_t ifcaps = 0;
 	
-	if (lock) ifnet_lock_exclusive(interface);
-	interface->if_hwassist = (offload & offload_mask);
-	if (lock) ifnet_lock_done(interface);
-	
-	return 0;
+	if (interface == NULL)
+		return (EINVAL);
+
+	ifnet_lock_exclusive(interface);
+	interface->if_hwassist = (offload & offload_mask);	
+	ifnet_lock_done(interface);
+
+	if ((offload & any_offload_csum))
+		ifcaps |= IFCAP_HWCSUM;
+	if ((offload & IFNET_TSO_IPV4))
+		ifcaps |= IFCAP_TSO4;
+	if ((offload & IFNET_TSO_IPV6))
+		ifcaps |= IFCAP_TSO6;
+	if ((offload & IFNET_VLAN_MTU))
+		ifcaps |= IFCAP_VLAN_MTU;
+	if ((offload & IFNET_VLAN_TAGGING))
+		ifcaps |= IFCAP_VLAN_HWTAGGING;
+	if (ifcaps != 0) {
+		(void) ifnet_set_capabilities_supported(interface, ifcaps, IFCAP_VALID);
+		(void) ifnet_set_capabilities_enabled(interface, ifcaps, IFCAP_VALID);
+	}
+
+	return (0);
 }
 
 ifnet_offload_t
@@ -466,13 +538,14 @@ ifnet_get_tso_mtu(
 	return error;
 }
 
-errno_t 
+errno_t
 ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
 {
-	int lock;
-        struct kev_msg        ev_msg;
-        struct net_event_data ev_data;
-	
+	struct kev_msg        ev_msg;
+	struct net_event_data ev_data;
+
+	bzero(&ev_data, sizeof(struct net_event_data));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	if (interface == NULL)
 		return EINVAL;
 
@@ -480,15 +553,11 @@ ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
 	if ((properties & mask) & ~IF_WAKE_VALID_FLAGS)
 		return EINVAL;
 
-	lock = (interface->if_lock != 0);
-
-	if (lock) 
-		ifnet_lock_exclusive(interface);
+	ifnet_lock_exclusive(interface);
 
 	interface->if_wake_properties = (properties & mask) | (interface->if_wake_properties & ~mask);
 
-	if (lock) 
-		ifnet_lock_done(interface);
+	ifnet_lock_done(interface);
 
 	(void) ifnet_touch_lastchange(interface);
 
@@ -505,7 +574,7 @@ ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask)
 	ev_msg.dv[0].data_ptr    = &ev_data;
 	ev_msg.dv[1].data_length = 0;
 	kev_post_msg(&ev_msg);
-	
+
 	return 0;
 }
 
@@ -515,55 +584,43 @@ ifnet_get_wake_flags(ifnet_t interface)
 	return interface == NULL ? 0 : interface->if_wake_properties;
 }
 
-
-
-
 /*
  * Should MIB data store a copy?
  */
 errno_t
-ifnet_set_link_mib_data(
-	ifnet_t interface,
-	void* mibData,
-	u_int32_t mibLen)
+ifnet_set_link_mib_data(ifnet_t interface, void *mibData, u_int32_t mibLen)
 {
-	int lock;
-	
-	if (interface == NULL) return EINVAL;
-	lock = (interface->if_lock != 0);
-	
-	if (lock) ifnet_lock_exclusive(interface);
+	if (interface == NULL)
+		return (EINVAL);
+
+	ifnet_lock_exclusive(interface);
 	interface->if_linkmib = (void*)mibData;
 	interface->if_linkmiblen = mibLen;
-	if (lock) ifnet_lock_done(interface);
-	return 0;
+	ifnet_lock_done(interface);
+	return (0);
 }
 
 errno_t
-ifnet_get_link_mib_data(
-	ifnet_t interface,
-	void *mibData,
-	u_int32_t *mibLen)
+ifnet_get_link_mib_data(ifnet_t interface, void *mibData, u_int32_t *mibLen)
 {
 	errno_t	result = 0;
-	int lock;
-	
-	if (interface == NULL) return EINVAL;
-	lock = (interface->if_lock != NULL);
-	
-	if (lock) ifnet_lock_shared(interface);
+
+	if (interface == NULL)
+		return (EINVAL);
+
+	ifnet_lock_shared(interface);
 	if (*mibLen < interface->if_linkmiblen)
 		result = EMSGSIZE;
 	if (result == 0 && interface->if_linkmib == NULL)
 		result = ENOTSUP;
-	
+
 	if (result == 0) {
 		*mibLen = interface->if_linkmiblen;
 		bcopy(interface->if_linkmib, mibData, *mibLen);
 	}
-	if (lock) ifnet_lock_done(interface);
-	
-	return result;
+	ifnet_lock_done(interface);
+
+	return (result);
 }
 
 u_int32_t
@@ -634,15 +691,12 @@ ifnet_type(
 
 #if 0
 errno_t
-ifnet_set_typelen(
-	ifnet_t interface,
-	u_char typelen)
+ifnet_set_typelen(ifnet_t interface, u_char typelen)
 {
-	int lock = (interface->if_lock != 0);
-	if (lock) ifnet_lock_exclusive(interface);
+	ifnet_lock_exclusive(interface);
 	interface->if_data.ifi_typelen = typelen;
-	if (lock) ifnet_lock_done(interface);
-	return 0;
+	ifnet_lock_done(interface);
+	return (0);
 }
 
 u_char
@@ -733,310 +787,283 @@ ifnet_baudrate(
 }
 
 errno_t
-ifnet_stat_increment(
-	ifnet_t interface,
-	const struct ifnet_stat_increment_param *counts)
+ifnet_stat_increment(ifnet_t interface,
+    const struct ifnet_stat_increment_param *counts)
 {
-	struct dlil_threading_info *thread;
-	if (interface == NULL) return EINVAL;
-
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
+	if (interface == NULL)
+		return (EINVAL);
 
-	lck_mtx_lock(thread->input_lck);
+	atomic_add_64(&interface->if_data.ifi_ipackets, counts->packets_in);
+	atomic_add_64(&interface->if_data.ifi_ibytes, counts->bytes_in);
+	atomic_add_64(&interface->if_data.ifi_ierrors, counts->errors_in);
 
-	interface->if_data.ifi_ipackets += counts->packets_in;
-	interface->if_data.ifi_ibytes += counts->bytes_in;
-	interface->if_data.ifi_ierrors += counts->errors_in;
+	atomic_add_64(&interface->if_data.ifi_opackets, counts->packets_out);
+	atomic_add_64(&interface->if_data.ifi_obytes, counts->bytes_out);
+	atomic_add_64(&interface->if_data.ifi_oerrors, counts->errors_out);
 
-	interface->if_data.ifi_opackets += counts->packets_out;
-	interface->if_data.ifi_obytes += counts->bytes_out;
-	interface->if_data.ifi_oerrors += counts->errors_out;
+	atomic_add_64(&interface->if_data.ifi_collisions, counts->collisions);
+	atomic_add_64(&interface->if_data.ifi_iqdrops, counts->dropped);
 
-	interface->if_data.ifi_collisions += counts->collisions;
-	interface->if_data.ifi_iqdrops += counts->dropped;
-	
 	/* Touch the last change time. */
 	TOUCHLASTCHANGE(&interface->if_lastchange);
 
-	lck_mtx_unlock(thread->input_lck);
-	
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_stat_increment_in(
-	ifnet_t interface,
-	u_int32_t packets_in,
-	u_int32_t bytes_in,
-	u_int32_t errors_in)
+ifnet_stat_increment_in(ifnet_t interface, u_int32_t packets_in,
+    u_int32_t bytes_in, u_int32_t errors_in)
 {
-	struct dlil_threading_info *thread;
-
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-	lck_mtx_lock(thread->input_lck);
+	if (interface == NULL)
+		return (EINVAL);
 
-	interface->if_data.ifi_ipackets += packets_in;
-	interface->if_data.ifi_ibytes += bytes_in;
-	interface->if_data.ifi_ierrors += errors_in;
+	atomic_add_64(&interface->if_data.ifi_ipackets, packets_in);
+	atomic_add_64(&interface->if_data.ifi_ibytes, bytes_in);
+	atomic_add_64(&interface->if_data.ifi_ierrors, errors_in);
 
 	TOUCHLASTCHANGE(&interface->if_lastchange);
 
-	lck_mtx_unlock(thread->input_lck);
-	
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_stat_increment_out(
-	ifnet_t interface,
-	u_int32_t packets_out,
-	u_int32_t bytes_out,
-	u_int32_t errors_out)
+ifnet_stat_increment_out(ifnet_t interface, u_int32_t packets_out,
+    u_int32_t bytes_out, u_int32_t errors_out)
 {
-	struct dlil_threading_info *thread;
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-	lck_mtx_lock(thread->input_lck);
+	if (interface == NULL)
+		return (EINVAL);
 
-	interface->if_data.ifi_opackets += packets_out;
-	interface->if_data.ifi_obytes += bytes_out;
-	interface->if_data.ifi_oerrors += errors_out;
+	atomic_add_64(&interface->if_data.ifi_opackets, packets_out);
+	atomic_add_64(&interface->if_data.ifi_obytes, bytes_out);
+	atomic_add_64(&interface->if_data.ifi_oerrors, errors_out);
 
 	TOUCHLASTCHANGE(&interface->if_lastchange);
 
-	lck_mtx_unlock(thread->input_lck);
-	
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_set_stat(
-	ifnet_t interface,
-	const struct ifnet_stats_param *stats)
+ifnet_set_stat(ifnet_t interface, const struct ifnet_stats_param *stats)
 {
-	struct dlil_threading_info *thread;
+	if (interface == NULL)
+		return (EINVAL);
 
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
+	atomic_set_64(&interface->if_data.ifi_ipackets, stats->packets_in);
+	atomic_set_64(&interface->if_data.ifi_ibytes, stats->bytes_in);
+	atomic_set_64(&interface->if_data.ifi_imcasts, stats->multicasts_in);
+	atomic_set_64(&interface->if_data.ifi_ierrors, stats->errors_in);
 
-	lck_mtx_lock(thread->input_lck);
+	atomic_set_64(&interface->if_data.ifi_opackets, stats->packets_out);
+	atomic_set_64(&interface->if_data.ifi_obytes, stats->bytes_out);
+	atomic_set_64(&interface->if_data.ifi_omcasts, stats->multicasts_out);
+	atomic_set_64(&interface->if_data.ifi_oerrors, stats->errors_out);
 
-	interface->if_data.ifi_ipackets = stats->packets_in;
-	interface->if_data.ifi_ibytes = stats->bytes_in;
-	interface->if_data.ifi_imcasts = stats->multicasts_in;
-	interface->if_data.ifi_ierrors = stats->errors_in;
-	
-	interface->if_data.ifi_opackets = stats->packets_out;
-	interface->if_data.ifi_obytes = stats->bytes_out;
-	interface->if_data.ifi_omcasts = stats->multicasts_out;
-	interface->if_data.ifi_oerrors = stats->errors_out;
-	
-	interface->if_data.ifi_collisions = stats->collisions;
-	interface->if_data.ifi_iqdrops = stats->dropped;
-	interface->if_data.ifi_noproto = stats->no_protocol;
+	atomic_set_64(&interface->if_data.ifi_collisions, stats->collisions);
+	atomic_set_64(&interface->if_data.ifi_iqdrops, stats->dropped);
+	atomic_set_64(&interface->if_data.ifi_noproto, stats->no_protocol);
 
 	/* Touch the last change time. */
 	TOUCHLASTCHANGE(&interface->if_lastchange);
 
-	lck_mtx_unlock(thread->input_lck);
-	
 	return 0;
 }
 
 errno_t
-ifnet_stat(
-	ifnet_t interface,
-	struct ifnet_stats_param *stats)
+ifnet_stat(ifnet_t interface, struct ifnet_stats_param *stats)
 {
-	struct dlil_threading_info *thread;
-
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-	lck_mtx_lock(thread->input_lck);
+	if (interface == NULL)
+		return (EINVAL);
 
-	stats->packets_in = interface->if_data.ifi_ipackets;
-	stats->bytes_in = interface->if_data.ifi_ibytes;
-	stats->multicasts_in = interface->if_data.ifi_imcasts;
-	stats->errors_in = interface->if_data.ifi_ierrors;
+	atomic_get_64(stats->packets_in, &interface->if_data.ifi_ipackets);
+	atomic_get_64(stats->bytes_in, &interface->if_data.ifi_ibytes);
+	atomic_get_64(stats->multicasts_in, &interface->if_data.ifi_imcasts);
+	atomic_get_64(stats->errors_in, &interface->if_data.ifi_ierrors);
 
-	stats->packets_out = interface->if_data.ifi_opackets;
-	stats->bytes_out = interface->if_data.ifi_obytes;
-	stats->multicasts_out = interface->if_data.ifi_omcasts;
-	stats->errors_out = interface->if_data.ifi_oerrors;
+	atomic_get_64(stats->packets_out, &interface->if_data.ifi_opackets);
+	atomic_get_64(stats->bytes_out, &interface->if_data.ifi_obytes);
+	atomic_get_64(stats->multicasts_out, &interface->if_data.ifi_omcasts);
+	atomic_get_64(stats->errors_out, &interface->if_data.ifi_oerrors);
 
-	stats->collisions = interface->if_data.ifi_collisions;
-	stats->dropped = interface->if_data.ifi_iqdrops;
-	stats->no_protocol = interface->if_data.ifi_noproto;
+	atomic_get_64(stats->collisions, &interface->if_data.ifi_collisions);
+	atomic_get_64(stats->dropped, &interface->if_data.ifi_iqdrops);
+	atomic_get_64(stats->no_protocol, &interface->if_data.ifi_noproto);
 
-	lck_mtx_unlock(thread->input_lck);
-	
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_touch_lastchange(
-	ifnet_t interface)
+ifnet_touch_lastchange(ifnet_t interface)
 {
-	struct dlil_threading_info *thread;
-
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-	lck_mtx_lock(thread->input_lck);
+	if (interface == NULL)
+		return (EINVAL);
 
 	TOUCHLASTCHANGE(&interface->if_lastchange);
 
-	lck_mtx_unlock(thread->input_lck);
-	
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_lastchange(
-	ifnet_t interface,
-	struct timeval *last_change)
+ifnet_lastchange(ifnet_t interface, struct timeval *last_change)
 {
-	struct dlil_threading_info *thread;
-
-	if (interface == NULL) return EINVAL;
-	
-       	if ((thread = interface->if_input_thread) == NULL || (dlil_multithreaded_input == 0))
-		thread = dlil_lo_thread_ptr;
-
-	lck_mtx_lock(thread->input_lck);
+	if (interface == NULL)
+		return (EINVAL);
 
 	*last_change = interface->if_data.ifi_lastchange;
-	
-	lck_mtx_unlock(thread->input_lck);
-	
 #if IF_LASTCHANGEUPTIME
 	/* Crude conversion from uptime to calendar time */
 	last_change->tv_sec += boottime_sec();
 #endif
-
-	return 0;
+	return (0);
 }
 
 errno_t
-ifnet_get_address_list(
-	ifnet_t interface,
-	ifaddr_t **addresses)
+ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses)
 {
-	if (addresses == NULL) return EINVAL;
-	return ifnet_get_address_list_family(interface, addresses, 0);
+	return (addresses == NULL ? EINVAL :
+	    ifnet_get_address_list_family(interface, addresses, 0));
 }
 
+struct ifnet_addr_list {
+	SLIST_ENTRY(ifnet_addr_list)	ifal_le;
+	struct ifaddr			*ifal_ifa;
+};
+
 errno_t
-ifnet_get_address_list_family(
-	ifnet_t interface,
-	ifaddr_t **addresses,
-	sa_family_t	family)
+ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses,
+    sa_family_t family)
+{
+	return (ifnet_get_address_list_family_internal(interface, addresses,
+	    family, 0, M_NOWAIT));
+}
+
+__private_extern__ errno_t
+ifnet_get_address_list_family_internal(ifnet_t interface, ifaddr_t **addresses,
+    sa_family_t family, int detached, int how)
 {
+	SLIST_HEAD(, ifnet_addr_list) ifal_head;
+	struct ifnet_addr_list *ifal, *ifal_tmp;
 	struct ifnet *ifp;
 	int count = 0;
-	int cmax = 0;
-	
-	if (addresses == NULL) return EINVAL;
+	errno_t err = 0;
+
+	SLIST_INIT(&ifal_head);
+
+	if (addresses == NULL) {
+		err = EINVAL;
+		goto done;
+	}
 	*addresses = NULL;
-	
+
+	if (detached) {
+		/*
+		 * Interface has been detached, so skip the lookup
+		 * at ifnet_head and go directly to inner loop.
+		 */
+		ifp = interface;
+		if (ifp == NULL) {
+			err = EINVAL;
+			goto done;
+		}
+		goto one;
+	}
+
 	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet, if_link)
-	{
-		if (interface && ifp != interface) continue;
-		
+	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+		if (interface != NULL && ifp != interface)
+			continue;
+one:
 		ifnet_lock_shared(ifp);
-		if ((ifp->if_eflags & IFEF_DETACHING) == 0) {
-			if (interface == NULL || interface == ifp)
-			{
-				struct ifaddr *addr;
-				TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link)
-				{
-					if (family == 0 || addr->ifa_addr->sa_family == family)
-						cmax++;
+		if (interface == NULL || interface == ifp) {
+			struct ifaddr *ifa;
+			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+				IFA_LOCK(ifa);
+				if (family != 0 &&
+				    ifa->ifa_addr->sa_family != family) {
+					IFA_UNLOCK(ifa);
+					continue;
 				}
+				MALLOC(ifal, struct ifnet_addr_list *,
+				    sizeof (*ifal), M_TEMP, how);
+				if (ifal == NULL) {
+					IFA_UNLOCK(ifa);
+					ifnet_lock_done(ifp);
+					if (!detached)
+						ifnet_head_done();
+					err = ENOMEM;
+					goto done;
+				}
+				ifal->ifal_ifa = ifa;
+				IFA_ADDREF_LOCKED(ifa);
+				SLIST_INSERT_HEAD(&ifal_head, ifal, ifal_le);
+				++count;
+				IFA_UNLOCK(ifa);
 			}
 		}
-		else if (interface != NULL) {
-			ifnet_lock_done(ifp);
-			ifnet_head_done();
-			return ENXIO;
-		}
 		ifnet_lock_done(ifp);
+		if (detached)
+			break;
 	}
-	
-	MALLOC(*addresses, ifaddr_t*, sizeof(ifaddr_t) * (cmax + 1), M_TEMP, M_NOWAIT);
-	if (*addresses == NULL) {
+	if (!detached)
 		ifnet_head_done();
-		return ENOMEM;
+
+	if (count == 0) {
+		err = ENXIO;
+		goto done;
 	}
-	
-	TAILQ_FOREACH(ifp, &ifnet, if_link)
-	{
-		if (interface && ifp != interface) continue;
-		
-		ifnet_lock_shared(ifp);
-		if ((ifp->if_eflags & IFEF_DETACHING) == 0) {
-			if (interface == NULL || (struct ifnet*)interface == ifp)
-			{
-				struct ifaddr *addr;
-				TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link)
-				{
-					if (count + 1 > cmax) break;
-					if (family == 0 || addr->ifa_addr->sa_family == family) {
-						(*addresses)[count] = (ifaddr_t)addr;
-						ifaddr_reference((*addresses)[count]);
-						count++;
-					}
-				}
-			}
-		}
-		ifnet_lock_done(ifp);
-		if (interface || count == cmax)
-			break;
+	MALLOC(*addresses, ifaddr_t *, sizeof (ifaddr_t) * (count + 1),
+	    M_TEMP, how);
+	if (*addresses == NULL) {
+		err = ENOMEM;
+		goto done;
 	}
-	ifnet_head_done();
-	(*addresses)[cmax] = 0;
-	
-	return 0;
+	bzero(*addresses, sizeof (ifaddr_t) * (count + 1));
+
+done:
+	SLIST_FOREACH_SAFE(ifal, &ifal_head, ifal_le, ifal_tmp) {
+		SLIST_REMOVE(&ifal_head, ifal, ifnet_addr_list, ifal_le);
+		if (err == 0)
+			(*addresses)[--count] = ifal->ifal_ifa;
+		else
+			IFA_REMREF(ifal->ifal_ifa);
+		FREE(ifal, M_TEMP);
+	}
+
+	return (err);
 }
 
 void
-ifnet_free_address_list(
-	ifaddr_t *addresses)
+ifnet_free_address_list(ifaddr_t *addresses)
 {
 	int i;
-	
-	if (addresses == NULL) return;
-	
+
+	if (addresses == NULL)
+		return;
+
 	for (i = 0; addresses[i] != NULL; i++)
-	{
-		ifaddr_release(addresses[i]);
-	}
-	
+		IFA_REMREF(addresses[i]);
+
 	FREE(addresses, M_TEMP);
 }
 
-void*
-ifnet_lladdr(
-	ifnet_t	interface)
+void *
+ifnet_lladdr(ifnet_t interface)
 {
-	if (interface == NULL) return NULL;
-	return LLADDR(SDL(interface->if_addrhead.tqh_first->ifa_addr));
+	struct ifaddr *ifa;
+	void *lladdr;
+
+	if (interface == NULL)
+		return (NULL);
+
+	/*
+	 * if_lladdr points to the permanent link address of
+	 * the interface; it never gets deallocated.
+	 */
+	ifa = interface->if_lladdr;
+	IFA_LOCK_SPIN(ifa);
+	lladdr = LLADDR(SDL(ifa->ifa_addr));
+	IFA_UNLOCK(ifa);
+
+	return (lladdr);
 }
 
 errno_t
@@ -1068,74 +1095,80 @@ ifnet_llbroadcast_copy_bytes(
 }
 
 errno_t
-ifnet_lladdr_copy_bytes(
-	ifnet_t	interface,
-	void*	lladdr,
-	size_t	lladdr_len)
+ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t	lladdr_len)
 {
 	struct sockaddr_dl *sdl;
-	if (interface == NULL || lladdr == NULL) return EINVAL;
-	
-	sdl = SDL(interface->if_addrhead.tqh_first->ifa_addr);
-	
-	while (1) {
-		if (lladdr_len != sdl->sdl_alen) {
-			bzero(lladdr, lladdr_len);
-			return EMSGSIZE;
-		}
-		bcopy(LLADDR(sdl), lladdr, lladdr_len);
-		if (bcmp(lladdr, LLADDR(sdl), lladdr_len) == 0 &&
-			lladdr_len == sdl->sdl_alen)
-			break;
+	struct ifaddr *ifa;
+
+	if (interface == NULL || lladdr == NULL)
+		return (EINVAL);
+
+	/*
+	 * if_lladdr points to the permanent link address of
+	 * the interface; it never gets deallocated.
+	 */
+	ifa = interface->if_lladdr;
+	IFA_LOCK_SPIN(ifa);
+	sdl = SDL(ifa->ifa_addr);
+	if (lladdr_len != sdl->sdl_alen) {
+		bzero(lladdr, lladdr_len);
+		IFA_UNLOCK(ifa);
+		return (EMSGSIZE);
 	}
-	return 0;
+	bcopy(LLADDR(sdl), lladdr, lladdr_len);
+	IFA_UNLOCK(ifa);
+
+	return (0);
 }
 
 static errno_t
-ifnet_set_lladdr_internal(
-	ifnet_t interface,
-	const void *lladdr,
-	size_t lladdr_len,
-	u_char new_type,
-	int apply_type)
+ifnet_set_lladdr_internal(ifnet_t interface, const void *lladdr,
+    size_t lladdr_len, u_char new_type, int apply_type)
 {
 	struct ifaddr *ifa;
-	struct sockaddr_dl	*sdl;
 	errno_t	error = 0;
-	
-	if (interface == NULL) return EINVAL;
-	
-	if (lladdr_len != 0 && (lladdr_len != interface->if_addrlen || lladdr == 0))
-		return EINVAL;
-	
+
+	if (interface == NULL)
+		return (EINVAL);
+
 	ifnet_head_lock_shared();
+	ifnet_lock_exclusive(interface);
+	if (lladdr_len != 0 &&
+	    (lladdr_len != interface->if_addrlen || lladdr == 0)) {
+		ifnet_lock_done(interface);
+		ifnet_head_done();
+		return (EINVAL);
+	}
 	ifa = ifnet_addrs[interface->if_index - 1];
 	if (ifa != NULL) {
+		struct sockaddr_dl *sdl;
+
+		IFA_LOCK_SPIN(ifa);
 		sdl = (struct sockaddr_dl*)ifa->ifa_addr;
 		if (lladdr_len != 0) {
 			bcopy(lladdr, LLADDR(sdl), lladdr_len);
-		}
-		else {
+		} else {
 			bzero(LLADDR(sdl), interface->if_addrlen);
 		}
 		sdl->sdl_alen = lladdr_len;
-		
+
 		if (apply_type) {
 			sdl->sdl_type = new_type;
 		}
-	}
-	else {
+		IFA_UNLOCK(ifa);
+	} else {
 		error = ENXIO;
 	}
+	ifnet_lock_done(interface);
 	ifnet_head_done();
-	
+
 	/* Generate a kernel event */
 	if (error == 0) {
 		dlil_post_msg(interface, KEV_DL_SUBCLASS,
 			KEV_DL_LINK_ADDRESS_CHANGED, NULL, 0);
 	}
-	
-	return error;
+
+	return (error);
 }
 
 errno_t
@@ -1158,64 +1191,68 @@ ifnet_set_lladdr_and_type(
 }
 
 errno_t
-ifnet_add_multicast(
-	ifnet_t interface,
-	const struct sockaddr *maddr,
-	ifmultiaddr_t *address)
+ifnet_add_multicast(ifnet_t interface, const struct sockaddr *maddr,
+    ifmultiaddr_t *ifmap)
 {
-	if (interface == NULL || maddr == NULL) return EINVAL;
-	return if_addmulti(interface, maddr, address);
+	if (interface == NULL || maddr == NULL)
+		return (EINVAL);
+
+	/* Don't let users screw up protocols' entries. */
+	if (maddr->sa_family != AF_UNSPEC && maddr->sa_family != AF_LINK)
+		return (EINVAL);
+
+	return (if_addmulti_anon(interface, maddr, ifmap));
 }
 
 errno_t
-ifnet_remove_multicast(
-	ifmultiaddr_t address)
+ifnet_remove_multicast(ifmultiaddr_t ifma)
 {
-	if (address == NULL) return EINVAL;
-	return if_delmultiaddr(address, 0);
+	struct sockaddr *maddr;
+
+	if (ifma == NULL)
+		return (EINVAL);
+
+	maddr = ifma->ifma_addr;
+	/* Don't let users screw up protocols' entries. */
+	if (maddr->sa_family != AF_UNSPEC && maddr->sa_family != AF_LINK)
+		return (EINVAL);
+
+	return (if_delmulti_anon(ifma->ifma_ifp, maddr));
 }
 
-errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses)
+errno_t
+ifnet_get_multicast_list(ifnet_t ifp, ifmultiaddr_t **addresses)
 {
 	int count = 0;
 	int cmax = 0;
 	struct ifmultiaddr *addr;
-	int lock;
-	
-	if (interface == NULL || addresses == NULL)
-		return EINVAL;
-	
-	lock = (interface->if_lock != 0);
-	if (lock) ifnet_lock_shared(interface);
-	if ((interface->if_eflags & IFEF_DETACHING) == 0) {
-		LIST_FOREACH(addr, &interface->if_multiaddrs, ifma_link)
-		{
-			cmax++;
-		}
-	}
-	else {
-		if (lock) ifnet_lock_done(interface);
-		return ENXIO;
+
+	if (ifp == NULL || addresses == NULL)
+		return (EINVAL);
+
+	ifnet_lock_shared(ifp);
+	LIST_FOREACH(addr, &ifp->if_multiaddrs, ifma_link) {
+		cmax++;
 	}
-	
-	MALLOC(*addresses, ifmultiaddr_t*, sizeof(ifmultiaddr_t) * (cmax + 1), M_TEMP, M_NOWAIT);
+
+	MALLOC(*addresses, ifmultiaddr_t *, sizeof (ifmultiaddr_t) * (cmax + 1),
+	    M_TEMP, M_NOWAIT);
 	if (*addresses == NULL) {
-		if (lock) ifnet_lock_done(interface);
-		return ENOMEM;
+		ifnet_lock_done(ifp);
+		return (ENOMEM);
 	}
-	
-	LIST_FOREACH(addr, &interface->if_multiaddrs, ifma_link)
-	{
+
+	LIST_FOREACH(addr, &ifp->if_multiaddrs, ifma_link) {
 		if (count + 1 > cmax)
 			break;
 		(*addresses)[count] = (ifmultiaddr_t)addr;
 		ifmaddr_reference((*addresses)[count]);
 		count++;
 	}
-	(*addresses)[cmax] = 0;
-	if (lock) ifnet_lock_done(interface);
-	
-	return 0;
+	(*addresses)[cmax] = NULL;
+	ifnet_lock_done(ifp);
+
+	return (0);
 }
 
 void
@@ -1235,44 +1272,42 @@ ifnet_free_multicast_list(
 }
 
 errno_t
-ifnet_find_by_name(
-	const char *ifname,
-	ifnet_t *interface)
+ifnet_find_by_name(const char *ifname, ifnet_t *ifpp)
 {
 	struct ifnet *ifp;
 	int	namelen;
-	
-	if (ifname == NULL) return EINVAL;
-	
+
+	if (ifname == NULL)
+		return (EINVAL);
+
 	namelen = strlen(ifname);
-	
-	*interface = NULL;
-	
+
+	*ifpp = NULL;
+
 	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet, if_link)
-	{
-		struct ifaddr *ifa = ifnet_addrs[ifp->if_index - 1];
+	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+		struct ifaddr *ifa;
 		struct sockaddr_dl *ll_addr;
 
-		if (!ifa || !ifa->ifa_addr)
+		ifa = ifnet_addrs[ifp->if_index - 1];
+		if (ifa == NULL)
 			continue;
 
+		IFA_LOCK(ifa);
 		ll_addr = (struct sockaddr_dl *)ifa->ifa_addr;
 
-		if ((ifp->if_eflags & IFEF_DETACHING) == 0 &&
-			namelen == ll_addr->sdl_nlen &&
-			(strncmp(ll_addr->sdl_data, ifname, ll_addr->sdl_nlen) == 0))
-		{
+		if (namelen == ll_addr->sdl_nlen &&
+		    !strncmp(ll_addr->sdl_data, ifname, ll_addr->sdl_nlen)) {
+			IFA_UNLOCK(ifa);
+			*ifpp = ifp;
+			ifnet_reference(*ifpp);
 			break;
 		}
-	}
-	if (ifp) {
-		*interface = ifp;
-		ifnet_reference(*interface);
+		IFA_UNLOCK(ifa);
 	}
 	ifnet_head_done();
-	
-	return (ifp == NULL) ? ENXIO : 0;
+
+	return ((ifp == NULL) ? ENXIO : 0);
 }
 
 errno_t
@@ -1287,54 +1322,74 @@ ifnet_list_get_all(ifnet_family_t family, ifnet_t **list, u_int32_t *count)
 	return (ifnet_list_get_common(family, TRUE, list, count));
 }
 
+struct ifnet_list {
+	SLIST_ENTRY(ifnet_list)	ifl_le;
+	struct ifnet		*ifl_ifp;
+};
+
 static errno_t
 ifnet_list_get_common(ifnet_family_t family, boolean_t get_all, ifnet_t **list,
     u_int32_t *count)
 {
+#pragma unused(get_all)
+	SLIST_HEAD(, ifnet_list) ifl_head;
+	struct ifnet_list *ifl, *ifl_tmp;
 	struct ifnet *ifp;
-	u_int32_t cmax = 0;
-	*count = 0;
-	errno_t	result = 0;
-
-	if (list == NULL || count == NULL)
-		return (EINVAL);
-
-	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet, if_link) {
-		if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
-			continue;
-		if (family == IFNET_FAMILY_ANY || ifp->if_family == family)
-			cmax++;
-	}
+	int cnt = 0;
+	errno_t err = 0;
 
-	if (cmax == 0)
-		result = ENXIO;
+	SLIST_INIT(&ifl_head);
 
-	if (result == 0) {
-		MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1),
-		    M_TEMP, M_NOWAIT);
-		if (*list == NULL)
-			result = ENOMEM;
+	if (list == NULL || count == NULL) {
+		err = EINVAL;
+		goto done;
 	}
+	*count = 0;
+	*list = NULL;
 
-	if (result == 0) {
-		TAILQ_FOREACH(ifp, &ifnet, if_link) {
-			if ((ifp->if_eflags & IFEF_DETACHING) && !get_all)
-				continue;
-			if (*count + 1 > cmax)
-				break;
-			if (family == IFNET_FAMILY_ANY ||
-			    ((ifnet_family_t)ifp->if_family) == family) {
-				(*list)[*count] = (ifnet_t)ifp;
-				ifnet_reference((*list)[*count]);
-				(*count)++;
+	ifnet_head_lock_shared();
+	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+		if (family == IFNET_FAMILY_ANY || ifp->if_family == family) {
+			MALLOC(ifl, struct ifnet_list *, sizeof (*ifl),
+			    M_TEMP, M_NOWAIT);
+			if (ifl == NULL) {
+				ifnet_head_done();
+				err = ENOMEM;
+				goto done;
 			}
+			ifl->ifl_ifp = ifp;
+			ifnet_reference(ifp);
+			SLIST_INSERT_HEAD(&ifl_head, ifl, ifl_le);
+			++cnt;
 		}
-		(*list)[*count] = NULL;
 	}
 	ifnet_head_done();
 
-	return (result);
+	if (cnt == 0) {
+		err = ENXIO;
+		goto done;
+	}
+
+	MALLOC(*list, ifnet_t *, sizeof (ifnet_t) * (cnt + 1),
+	    M_TEMP, M_NOWAIT);
+	if (*list == NULL) {
+		err = ENOMEM;
+		goto done;
+	}
+	bzero(*list, sizeof (ifnet_t) * (cnt + 1));
+	*count = cnt;
+
+done:
+	SLIST_FOREACH_SAFE(ifl, &ifl_head, ifl_le, ifl_tmp) {
+		SLIST_REMOVE(&ifl_head, ifl, ifnet_list, ifl_le);
+		if (err == 0)
+			(*list)[--cnt] = ifl->ifl_ifp;
+		else
+			ifnet_release(ifl->ifl_ifp);
+		FREE(ifl, M_TEMP);
+	}
+
+	return (err);
 }
 
 void
@@ -1345,9 +1400,8 @@ ifnet_list_free(ifnet_t *interfaces)
 	if (interfaces == NULL)
 		return;
 
-	for (i = 0; interfaces[i]; i++) {
+	for (i = 0; interfaces[i]; i++)
 		ifnet_release(interfaces[i]);
-	}
 
 	FREE(interfaces, M_TEMP);
 }
@@ -1357,97 +1411,132 @@ ifnet_list_free(ifnet_t *interfaces)
 /****************************************************************************/
 
 errno_t
-ifaddr_reference(
-	ifaddr_t ifa)
+ifaddr_reference(ifaddr_t ifa)
 {
-	if (ifa == NULL) return EINVAL;
-	ifaref(ifa);
-	return 0;
+	if (ifa == NULL)
+		return (EINVAL);
+
+	IFA_ADDREF(ifa);
+	return (0);
 }
 
 errno_t
-ifaddr_release(
-	ifaddr_t ifa)
+ifaddr_release(ifaddr_t ifa)
 {
-	if (ifa == NULL) return EINVAL;
-	ifafree(ifa);
-	return 0;
+	if (ifa == NULL)
+		return (EINVAL);
+
+	IFA_REMREF(ifa);
+	return (0);
 }
 
 sa_family_t
-ifaddr_address_family(
-	ifaddr_t ifa)
+ifaddr_address_family(ifaddr_t ifa)
 {
-	if (ifa && ifa->ifa_addr)
-		return ifa->ifa_addr->sa_family;
-	
-	return 0;
+	sa_family_t family = 0;
+
+	if (ifa != NULL) {
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr != NULL)
+			family = ifa->ifa_addr->sa_family;
+		IFA_UNLOCK(ifa);
+	}
+	return (family);
 }
 
 errno_t
-ifaddr_address(
-	ifaddr_t ifa,
-	struct sockaddr *out_addr,
-	u_int32_t addr_size)
+ifaddr_address(ifaddr_t ifa, struct sockaddr *out_addr, u_int32_t addr_size)
 {
 	u_int32_t copylen;
-	
-	if (ifa == NULL || out_addr == NULL) return EINVAL;
-	if (ifa->ifa_addr == NULL) return ENOTSUP;
-	
-	copylen = (addr_size >= ifa->ifa_addr->sa_len) ? ifa->ifa_addr->sa_len : addr_size;
+
+	if (ifa == NULL || out_addr == NULL)
+		return (EINVAL);
+
+	IFA_LOCK_SPIN(ifa);
+	if (ifa->ifa_addr == NULL) {
+		IFA_UNLOCK(ifa);
+		return (ENOTSUP);
+	}
+
+	copylen = (addr_size >= ifa->ifa_addr->sa_len) ?
+	    ifa->ifa_addr->sa_len : addr_size;
 	bcopy(ifa->ifa_addr, out_addr, copylen);
-	
-	if (ifa->ifa_addr->sa_len > addr_size) return EMSGSIZE;
-	
-	return 0;
+
+	if (ifa->ifa_addr->sa_len > addr_size) {
+		IFA_UNLOCK(ifa);
+		return (EMSGSIZE);
+	}
+
+	IFA_UNLOCK(ifa);
+	return (0);
 }
 
 errno_t
-ifaddr_dstaddress(
-	ifaddr_t ifa,
-	struct sockaddr *out_addr,
-	u_int32_t addr_size)
+ifaddr_dstaddress(ifaddr_t ifa, struct sockaddr *out_addr, u_int32_t addr_size)
 {
 	u_int32_t copylen;
-	if (ifa == NULL || out_addr == NULL) return EINVAL;
-	if (ifa->ifa_dstaddr == NULL) return ENOTSUP;
-	
-	copylen = (addr_size >= ifa->ifa_dstaddr->sa_len) ? ifa->ifa_dstaddr->sa_len : addr_size;
+
+	if (ifa == NULL || out_addr == NULL)
+		return (EINVAL);
+
+	IFA_LOCK_SPIN(ifa);
+	if (ifa->ifa_dstaddr == NULL) {
+		IFA_UNLOCK(ifa);
+		return (ENOTSUP);
+	}
+
+	copylen = (addr_size >= ifa->ifa_dstaddr->sa_len) ?
+	    ifa->ifa_dstaddr->sa_len : addr_size;
 	bcopy(ifa->ifa_dstaddr, out_addr, copylen);
 
-	if (ifa->ifa_dstaddr->sa_len > addr_size) return EMSGSIZE;
-	
-	return 0;
+	if (ifa->ifa_dstaddr->sa_len > addr_size) {
+		IFA_UNLOCK(ifa);
+		return (EMSGSIZE);
+	}
+
+	IFA_UNLOCK(ifa);
+	return (0);
 }
 
 errno_t
-ifaddr_netmask(
-	ifaddr_t ifa,
-	struct sockaddr *out_addr,
-	u_int32_t addr_size)
+ifaddr_netmask(ifaddr_t ifa, struct sockaddr *out_addr, u_int32_t addr_size)
 {
 	u_int32_t copylen;
-	if (ifa == NULL || out_addr == NULL) return EINVAL;
-	if (ifa->ifa_netmask == NULL) return ENOTSUP;
-	
-	copylen = addr_size >= ifa->ifa_netmask->sa_len ? ifa->ifa_netmask->sa_len : addr_size;
+
+	if (ifa == NULL || out_addr == NULL)
+		return (EINVAL);
+
+	IFA_LOCK_SPIN(ifa);
+	if (ifa->ifa_netmask == NULL) {
+		IFA_UNLOCK(ifa);
+		return (ENOTSUP);
+	}
+
+	copylen = addr_size >= ifa->ifa_netmask->sa_len ?
+	    ifa->ifa_netmask->sa_len : addr_size;
 	bcopy(ifa->ifa_netmask, out_addr, copylen);
-	
-	if (ifa->ifa_netmask->sa_len > addr_size) return EMSGSIZE;
-	
-	return 0;
+
+	if (ifa->ifa_netmask->sa_len > addr_size) {
+		IFA_UNLOCK(ifa);
+		return (EMSGSIZE);
+	}
+
+	IFA_UNLOCK(ifa);
+	return (0);
 }
 
 ifnet_t
-ifaddr_ifnet(
-	ifaddr_t ifa)
+ifaddr_ifnet(ifaddr_t ifa)
 {
 	struct ifnet *ifp;
-	if (ifa == NULL) return NULL;
+
+	if (ifa == NULL)
+		return (NULL);
+
+	/* ifa_ifp is set once at creation time; it is never changed */
 	ifp = ifa->ifa_ifp;
-	
-	return (ifnet_t)ifp;
+
+	return (ifp);
 }
 
 ifaddr_t
@@ -1494,60 +1583,70 @@ ifaddr_findbestforaddr(
 }
 
 errno_t
-ifmaddr_reference(
-	ifmultiaddr_t ifmaddr)
+ifmaddr_reference(ifmultiaddr_t ifmaddr)
 {
-	if (ifmaddr == NULL) return EINVAL;
-	ifma_reference(ifmaddr);
-	return 0;
+	if (ifmaddr == NULL)
+		return (EINVAL);
+
+	IFMA_ADDREF(ifmaddr);
+	return (0);
 }
 
 errno_t
-ifmaddr_release(
-	ifmultiaddr_t ifmaddr)
+ifmaddr_release(ifmultiaddr_t ifmaddr)
 {
-	if (ifmaddr == NULL) return EINVAL;
-	ifma_release(ifmaddr);	
-	return 0;
+	if (ifmaddr == NULL)
+		return (EINVAL);
+
+	IFMA_REMREF(ifmaddr);
+	return (0);
 }
 
 errno_t
-ifmaddr_address(
-	ifmultiaddr_t ifmaddr,
-	struct sockaddr *out_addr,
-	u_int32_t addr_size)
+ifmaddr_address(ifmultiaddr_t ifma, struct sockaddr *out_addr,
+    u_int32_t addr_size)
 {
 	u_int32_t copylen;
-	
-	if (ifmaddr == NULL || out_addr == NULL) return EINVAL;
-	if (ifmaddr->ifma_addr == NULL) return ENOTSUP;
-	
-	copylen = addr_size >= ifmaddr->ifma_addr->sa_len ? ifmaddr->ifma_addr->sa_len : addr_size;
-	bcopy(ifmaddr->ifma_addr, out_addr, copylen);
-	
-	if (ifmaddr->ifma_addr->sa_len > addr_size) return EMSGSIZE;
-	
-	return 0;
+
+	if (ifma == NULL || out_addr == NULL)
+		return (EINVAL);
+
+	IFMA_LOCK(ifma);
+	if (ifma->ifma_addr == NULL) {
+		IFMA_UNLOCK(ifma);
+		return (ENOTSUP);
+	}
+
+	copylen = (addr_size >= ifma->ifma_addr->sa_len ?
+	    ifma->ifma_addr->sa_len : addr_size);
+	bcopy(ifma->ifma_addr, out_addr, copylen);
+
+	if (ifma->ifma_addr->sa_len > addr_size) {
+		IFMA_UNLOCK(ifma);
+		return (EMSGSIZE);
+	}
+	IFMA_UNLOCK(ifma);
+	return (0);
 }
 
 errno_t
-ifmaddr_lladdress(
-	ifmultiaddr_t ifmaddr,
-	struct sockaddr *out_addr,
-	u_int32_t addr_size)
+ifmaddr_lladdress(ifmultiaddr_t ifma, struct sockaddr *out_addr,
+    u_int32_t addr_size)
 {
-	if (ifmaddr == NULL || out_addr == NULL) return EINVAL;
-	if (ifmaddr->ifma_ll == NULL) return ENOTSUP;
-	
-	return ifmaddr_address(ifmaddr->ifma_ll, out_addr, addr_size);
+	struct ifmultiaddr *ifma_ll;
+
+	if (ifma == NULL || out_addr == NULL)
+		return (EINVAL);
+	if ((ifma_ll = ifma->ifma_ll) == NULL)
+		return (ENOTSUP);
+
+	return (ifmaddr_address(ifma_ll, out_addr, addr_size));
 }
 
 ifnet_t
-ifmaddr_ifnet(
-	ifmultiaddr_t ifmaddr)
+ifmaddr_ifnet(ifmultiaddr_t ifma)
 {
-	if (ifmaddr == NULL || ifmaddr->ifma_ifp == NULL) return NULL;
-	return ifmaddr->ifma_ifp;
+	return (ifma == NULL ? NULL : ifma->ifma_ifp);
 }
 
 /******************************************************************************/
diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h
index e22eba1c7..e2fd084b6 100644
--- a/bsd/net/kpi_interface.h
+++ b/bsd/net/kpi_interface.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,6 +66,7 @@ struct ifnet_demux_desc;
 	@constant IFNET_FAMILY_STF A 6to4 interface.
 	@constant IFNET_FAMILY_FIREWIRE An IEEE 1394 (firewire) interface.
 	@constant IFNET_FAMILY_BOND A virtual bonded interface.
+	@constant IFNET_FAMILY_CELLULAR A cellular interface.
 */
 
 enum {
@@ -83,7 +84,8 @@ enum {
 	IFNET_FAMILY_FAITH		= 11,
 	IFNET_FAMILY_STF		= 12,
 	IFNET_FAMILY_FIREWIRE		= 13,
-	IFNET_FAMILY_BOND		= 14
+	IFNET_FAMILY_BOND		= 14,
+	IFNET_FAMILY_CELLULAR		= 15
 };
 /*!
 	@typedef ifnet_family_t
@@ -129,6 +131,9 @@ typedef u_int32_t protocol_family_t;
 	@constant IFNET_CSUM_UDP Hardware will calculate UDP checksums.
 	@constant IFNET_CSUM_FRAGMENT Hardware will checksum IP fragments.
 	@constant IFNET_IP_FRAGMENT Hardware will fragment IP packets.
+	@constant IFNET_CSUM_TCPIPV6 Hardware will calculate TCP IPv6 checksums.
+	@constant IFNET_CSUM_UDPIPV6 Hardware will calculate UDP IPv6 checksums.
+	@constant IFNET_IPV6_FRAGMENT Hardware will fragment IPv6 packets.
 	@constant IFNET_VLAN_TAGGING Hardware will generate VLAN headers.
 	@constant IFNET_VLAN_MTU Hardware supports VLAN MTU.
 	@constant IFNET_MULTIPAGES Driver is capable of handling packets
@@ -147,8 +152,15 @@ typedef u_int32_t protocol_family_t;
                 If the Interface driver sets this flag, TCP will send larger frames (up to 64KB) as one
                 frame to the adapter which will perform the final packetization. The maximum TSO segment
                 supported by the interface can be set with "ifnet_set_tso_mtu". To retreive the real MTU
-                for the TCP connection the function "mbuf_get_tso_requested" is used by the driver.
+                for the TCP connection the function "mbuf_get_tso_requested" is used by the driver. Note
+		that if TSO is active, all the packets will be flagged for TSO, not just large packets.
         @constant IFNET_TSO_IPV6 Hardware supports IPv6 TCP Segment Offloading.
+                If the Interface driver sets this flag, TCP IPv6 will send larger frames (up to 64KB) as one
+                frame to the adapter which will perform the final packetization. The maximum TSO segment
+                supported by the interface can be set with "ifnet_set_tso_mtu". To retreive the real MTU
+                for the TCP IPv6 connection the function "mbuf_get_tso_requested" is used by the driver.
+		Note that if TSO is active, all the packets will be flagged for TSO, not just large packets.
+
 */
 
 enum {
@@ -157,6 +169,9 @@ enum {
 	IFNET_CSUM_UDP		= 0x00000004,
 	IFNET_CSUM_FRAGMENT	= 0x00000008,
 	IFNET_IP_FRAGMENT	= 0x00000010,
+	IFNET_CSUM_TCPIPV6	= 0x00000020,
+	IFNET_CSUM_UDPIPV6	= 0x00000040,
+	IFNET_IPV6_FRAGMENT	= 0x00000080,
 #ifdef KERNEL_PRIVATE
 	IFNET_CSUM_SUM16	= 0x00001000,
 #endif /* KERNEL_PRIVATE */
@@ -839,7 +854,7 @@ extern const char *ifnet_name(ifnet_t interface);
 	@function ifnet_family
 	@discussion Returns the family of the interface.
 	@param interface Interface to retrieve the unit number from.
-	@result Unit number.
+	@result Interface family type.
  */
 extern ifnet_family_t ifnet_family(ifnet_t interface);
 
@@ -942,16 +957,92 @@ extern u_int32_t ifnet_idle_flags(ifnet_t interface);
 
 #endif /* KERNEL_PRIVATE */
 
+/*!
+	@function ifnet_set_capabilities_supported
+	@discussion Specify the capabilities supported by the interface.
+	@discussion  This function lets you specify which capabilities are supported 
+		by the interface. Typically this function is called by the driver when 
+		the interface gets attached to the system.
+		The mask allows to control which capability to set or unset.
+		The kernel will effectively take the lock, then set the
+		interface's flags to (if_capabilities & ~mask) | (new_caps & mask).
+
+		This function is intended to be called by the driver. A kext
+		must not call this function on an interface the kext does not
+		own.
+	@param interface Interface to set the capabilities on.
+	@param new_caps The value of the capabilities that should be set or unset. These
+		flags are defined in net/if.h
+	@param mask Which capabilities that should be affected. These
+		flags are defined in net/if.h
+	@result 0 on success otherwise the errno error.
+ */
+extern errno_t ifnet_set_capabilities_supported(ifnet_t interface, u_int32_t new_caps,
+    u_int32_t mask);
+
+/*!
+	@function ifnet_capabilities_supported
+	@discussion Retrieve the interface capabilities supported by the interface.
+	@param interface Interface to retrieve the capabilities from.
+	@result Flags. Capabilities flags are defined in net/if.h
+ */
+extern u_int32_t ifnet_capabilities_supported(ifnet_t interface);
+
+/*!
+	@function ifnet_set_capabilities_enabled
+	@discussion Enable and/or disable the interface capabilities to match new_caps.
+	@discussion Sets the interface capabilities to new_caps. This function
+		lets you specify which capabilities you want to change using the mask.
+		The kernel will effectively take the lock, then set the
+		interface's flags to (if_capenable & ~mask) | (new_caps & mask).
+
+		This function is intended to be called by the driver. A kext
+		must not call this function on an interface the kext does not
+		own.
+		
+		Typically this function is called by the driver when the interface is 
+		created to specify which of the supported capabilities are enabled by 
+		default. This function is also meant to be called when the driver handles  
+		the interface ioctl SIOCSIFCAP.
+		
+		The driver should call ifnet_set_offlad() to indicate the corresponding  
+		hardware offload bits that will be used by the networking stack.
+		
+		It is an error to enable a capability that is not marked as 
+		supported by the interface.
+	@param interface Interface to set the capabilities on.
+	@param new_caps The value of the capabilities that should be set or unset. These
+		flags are defined in net/if.h
+	@param mask Which capabilities that should be affected. These
+		flags are defined in net/if.h
+	@result 0 on success otherwise the errno error.
+ */
+extern errno_t ifnet_set_capabilities_enabled(ifnet_t interface, u_int32_t new_caps,
+    u_int32_t mask);
+
+/*!
+	@function ifnet_capabilities_enabled
+	@discussion Retrieve the interface capabilities enabled on the interface.
+	@param interface Interface to retrieve the capabilities from.
+	@result Flags. Capabilities flags are defined in net/if.h
+ */
+extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface);
+
+
 /*!
 	@function ifnet_set_offload
 	@discussion Sets a bitfield to indicate special hardware offload
 		support provided by the interface such as hardware checksums and
 		VLAN. This replaces the if_hwassist flags field. Any flags
 		unrecognized by the stack will not be set.
+
+		Note the system will automatically set the interface capabilities 
+		that correspond to the offload flags modified -- i.e. the driver 
+		does not have to call ifnet_set_capabilities_enabled() and 
+		ifnet_set_capabilities_supported().
 	@param interface The interface.
 	@param offload The new set of flags indicating which offload options
 		the device supports.
-	@param mask The mask of flags to be modified.
 	@result 0 on success otherwise the errno error.
  */
 extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload);
@@ -1446,6 +1537,11 @@ extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses);
 extern errno_t ifnet_get_address_list_family(ifnet_t interface,
     ifaddr_t **addresses, sa_family_t family);
 
+#ifdef KERNEL_PRIVATE
+__private_extern__ errno_t ifnet_get_address_list_family_internal(ifnet_t,
+    ifaddr_t **, sa_family_t, int, int);
+#endif /* KERNEL_PRIVATE */
+
 /*!
 	@function ifnet_free_address_list
 	@discussion Free a list of addresses returned from
@@ -1543,9 +1639,9 @@ extern errno_t ifnet_resolve_multicast(ifnet_t ifp,
 		ifnet_remove_multicast and making sure you no longer have any
 		references to the multicast.
 	@param interface The interface.
-	@param maddr The multicast address to join. Either a physical
-		address or logical address to be translated to a physical
-		address.
+	@param maddr The multicast address (AF_UNSPEC/AF_LINK) to join. Either
+		a physical address or logical address to be translated to a
+		physical address.
 	@param multicast The resulting ifmultiaddr_t multicast address.
 	@result 0 on success otherwise the errno error.
  */
diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c
index a48cd249a..6c3043c94 100644
--- a/bsd/net/kpi_protocol.c
+++ b/bsd/net/kpi_protocol.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -154,14 +154,14 @@ proto_register_input(
 	}
 
 	
-	lck_mtx_lock(thread->input_lck);
+	lck_mtx_lock(&thread->input_lck);
 	entry->next = proto_input_add_list;
 	proto_input_add_list = entry;
 	
 	thread->input_waiting |= DLIL_PROTO_REGISTER;
 	if ((thread->input_waiting & DLIL_INPUT_RUNNING) == 0)
 		wakeup((caddr_t)&thread->input_waiting);
-	lck_mtx_unlock(thread->input_lck);
+	lck_mtx_unlock(&thread->input_lck);
 	
 	return 0;
 }
@@ -219,14 +219,14 @@ proto_input_run(void)
 	mbuf_t packet_list;
 	int i, locked = 0;
 
-	lck_mtx_assert(thread->input_lck,  LCK_MTX_ASSERT_NOTOWNED);
+	lck_mtx_assert(&thread->input_lck,  LCK_MTX_ASSERT_NOTOWNED);
 
 	if ((thread->input_waiting & DLIL_PROTO_REGISTER) != 0) {
-		lck_mtx_lock(thread->input_lck);
+		lck_mtx_lock_spin(&thread->input_lck);
 		entry = proto_input_add_list;
 		proto_input_add_list = NULL;
 		thread->input_waiting &= ~DLIL_PROTO_REGISTER;
-		lck_mtx_unlock(thread->input_lck);
+		lck_mtx_unlock(&thread->input_lck);
 		proto_delayed_attach(entry);
 	}
 	/*
@@ -237,7 +237,7 @@ proto_input_run(void)
 		for (entry = proto_hash[i]; entry && proto_total_waiting;
 			 entry = entry->next) {
 			if (entry->inject_first) {
-				lck_mtx_lock(thread->input_lck);
+				lck_mtx_lock_spin(&thread->input_lck);
 				thread->input_waiting &= ~DLIL_PROTO_WAITING;
 
 				packet_list = entry->inject_first;
@@ -246,7 +246,7 @@ proto_input_run(void)
 				entry->inject_last = NULL;
 				proto_total_waiting--;
 
-				lck_mtx_unlock(thread->input_lck);
+				lck_mtx_unlock(&thread->input_lck);
 
 				if (entry->domain && (entry->domain->dom_flags & DOM_REENTRANT) == 0) {
 					lck_mtx_lock(entry->domain->dom_mtx);
@@ -333,7 +333,7 @@ proto_inject(
 	}
 	
 	if (entry) {
-		lck_mtx_lock(thread->input_lck);
+		lck_mtx_lock(&thread->input_lck);
 		if (entry->inject_first == NULL) {
 			proto_total_waiting++;
 			thread->input_waiting |= DLIL_PROTO_WAITING;
@@ -346,7 +346,7 @@ proto_inject(
 		if ((thread->input_waiting & DLIL_INPUT_RUNNING) == 0) {
 			wakeup((caddr_t)&thread->input_waiting);
 		}
-		lck_mtx_unlock(thread->input_lck);
+		lck_mtx_unlock(&thread->input_lck);
 	}
 	else
 	{
diff --git a/bsd/net/multicast_list.c b/bsd/net/multicast_list.c
index 68fbf23b0..e91aeeb11 100644
--- a/bsd/net/multicast_list.c
+++ b/bsd/net/multicast_list.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -151,5 +151,6 @@ multicast_list_program(struct multicast_list * mc_list,
 	(void)multicast_list_remove(mc_list);
 	*mc_list = new_mc_list;
     }
+    ifnet_free_multicast_list(source_multicast_list);
     return (error);
 }
diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c
index 1797d16f4..51c218910 100644
--- a/bsd/net/ndrv.c
+++ b/bsd/net/ndrv.c
@@ -100,6 +100,8 @@ extern struct domain ndrvdomain;
 extern struct protosw ndrvsw;
 extern lck_mtx_t *domain_proto_mtx;
 
+#define NDRV_PROTODEMUX_COUNT	10
+
 /*
  * Verify these values match.
  * To keep clients from including dlil.h, we define
@@ -703,6 +705,8 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt)
 		return ENOTSUP; // version is too new!
 	else if (ndrvSpec.version < 1)
 		return EINVAL; // version is not valid
+	else if (ndrvSpec.demux_count > NDRV_PROTODEMUX_COUNT || ndrvSpec.demux_count == 0)
+		return EINVAL; // demux_count is not valid
 	
 	bzero(&proto_param, sizeof(proto_param));
 	proto_param.demux_count = ndrvSpec.demux_count;
diff --git a/bsd/net/ndrv.h b/bsd/net/ndrv.h
index 6f61df9f5..7e9fc9700 100644
--- a/bsd/net/ndrv.h
+++ b/bsd/net/ndrv.h
@@ -109,7 +109,7 @@ struct ndrv_demux_desc
  * Field:
  *	version		:	must be NDRV_PROTOCOL_DESC_VERS
  *	protocol_family	:	unique identifier for this protocol
- *	demux_count	:	number of demux_list descriptors in demux_list
+ *	demux_count	:	number of demux_list descriptors in demux_list; maximum of 10
  *	demux_list	:	pointer to array of demux descriptors
  */
 struct ndrv_protocol_desc
diff --git a/bsd/net/net_osdep.h b/bsd/net/net_osdep.h
index 4208380cf..a17921f57 100644
--- a/bsd/net/net_osdep.h
+++ b/bsd/net/net_osdep.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -244,9 +244,10 @@
  *	NetBSD 1.5: always use IFAREF whenever reference gets added.
  *		always use IFAFREE whenever reference gets freed.
  *		IFAFREE frees ifaddr when ifa_refcnt reaches 0.
- *	Darwin: always use ifaref whenever reference gets added.
- *		always use ifafree whenever reference gets freed.
- *		ifaref and ifafree are responsible for determining when to free.
+ *	Darwin: always use IFA_ADDREF whenever reference gets added.
+ *		always use IFA_REMREF whenever reference gets freed.
+ *		IFA_ADDREF and IFA_REMREF are responsible for determining
+ *		when to free.
  *	others: do not increase refcnt for ifp->if_addrlist and in_ifaddr.
  *		use IFAFREE once when ifaddr is disconnected from
  *		ifp->if_addrlist and in_ifaddr.  IFAFREE frees ifaddr when
@@ -267,11 +268,6 @@ extern const char *if_name(struct ifnet *);
 #define if_addrlist	if_addrhead
 #define if_list		if_link
 
-/* sys/net/if.h */
-#ifndef __APPLE__
-#define IFAREF(ifa)	do { ++(ifa)->ifa_refcnt; } while (0)
-#endif
-
 #define WITH_CONVERT_AND_STRIP_IP_LEN
 
 #if 1				/* at this moment, all OSes do this */
diff --git a/bsd/net/net_str_id.c b/bsd/net/net_str_id.c
index 7f4fcd52f..bc28f03c4 100644
--- a/bsd/net/net_str_id.c
+++ b/bsd/net/net_str_id.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008,2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -52,8 +52,6 @@ static lck_mtx_t						*net_str_id_lock = NULL;
 static u_int32_t nsi_kind_next[NSI_MAX_KIND] = { FIRST_NET_STR_ID, FIRST_NET_STR_ID, FIRST_NET_STR_ID };
 static u_int32_t nsi_next_id = FIRST_NET_STR_ID;
 
-#if NETMIBS
-
 extern int sysctl_if_family_ids SYSCTL_HANDLER_ARGS;
 
 SYSCTL_DECL(_net_link_generic_system);
@@ -61,9 +59,6 @@ SYSCTL_DECL(_net_link_generic_system);
 SYSCTL_PROC(_net_link_generic_system, OID_AUTO, if_family_ids, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
 	0, 0, sysctl_if_family_ids, "S, if_family_id", "Interface Family ID table");
 
-#endif /* NETMIBS */
-
-
 __private_extern__ void
 net_str_id_init(void)
 {
@@ -153,8 +148,6 @@ net_str_id_find_internal(const char	*string, u_int32_t *out_id,
 }
 
 
-#if NETMIBS
-
 #define ROUNDUP32(a) \
         ((a) > 0 ? (1 + (((a) - 1) | (sizeof(uint32_t) - 1))) : sizeof(uint32_t))
 
@@ -210,6 +203,3 @@ done:
 		_FREE(iffmid, M_TEMP);
 	return error;
 }
-
-#endif /* NETMIBS */
-
diff --git a/bsd/net/netsrc.c b/bsd/net/netsrc.c
new file mode 100644
index 000000000..2c1037c26
--- /dev/null
+++ b/bsd/net/netsrc.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/socket.h>
+#include <sys/kern_control.h>
+#include <sys/mcache.h>
+#include <sys/socketvar.h>
+
+#include <kern/debug.h>
+
+#include <libkern/libkern.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+
+#include <net/netsrc.h>
+
+static errno_t	netsrc_ctlsend(kern_ctl_ref, uint32_t, void *, mbuf_t, int);
+static errno_t	netsrc_ctlconnect(kern_ctl_ref, struct sockaddr_ctl *, void **);
+static errno_t	netsrc_ipv4(kern_ctl_ref, uint32_t, struct netsrc_req *); 
+static errno_t	netsrc_ipv6(kern_ctl_ref, uint32_t, struct netsrc_req *);
+
+static kern_ctl_ref	netsrc_ctlref = NULL;
+
+__private_extern__ void
+netsrc_init(void)
+{
+	errno_t error;
+	struct kern_ctl_reg netsrc_ctl = {
+		.ctl_connect = netsrc_ctlconnect,
+		.ctl_send    = netsrc_ctlsend,
+	};
+
+	strlcpy(netsrc_ctl.ctl_name, NETSRC_CTLNAME, sizeof(NETSRC_CTLNAME));
+
+	if ((error = ctl_register(&netsrc_ctl, &netsrc_ctlref)))
+		printf("%s: ctl_register failed %d\n", __func__, error);
+}
+
+static errno_t
+netsrc_ctlconnect(kern_ctl_ref kctl, struct sockaddr_ctl *sac, void **uinfo)
+{
+#pragma unused(kctl, sac, uinfo)
+
+	/*
+	 * We don't need to do anything here. This callback is only necessary
+	 * for ctl_register() to succeed.
+	 */
+	return (0);
+}
+
+static errno_t
+netsrc_ctlsend(kern_ctl_ref kctl, uint32_t unit, void *uinfo, mbuf_t m,
+    int flags)
+{
+#pragma unused(uinfo, flags)
+	errno_t error;
+	struct netsrc_req *nrq, storage;
+
+	if (mbuf_pkthdr_len(m) < sizeof(*nrq)) {
+		error = EINVAL;
+		goto out;
+	}
+	if (mbuf_len(m) >= sizeof(*nrq))
+		nrq = mbuf_data(m);
+	else {
+		mbuf_copydata(m, 0, sizeof(storage), &storage);
+		nrq = &storage;
+	}
+	/* We only have one version right now. */
+	if (nrq->nrq_ver != NETSRC_VERSION1) {
+		error = EINVAL;
+		goto out;
+	}
+	switch (nrq->nrq_sin.sin_family) {
+	case AF_INET:
+		error = netsrc_ipv4(kctl, unit, nrq);
+		break;
+	case AF_INET6:
+		error = netsrc_ipv6(kctl, unit, nrq);
+		break;
+	default:
+		printf("%s: invalid family\n", __func__);
+		error = EINVAL;
+	}
+out:
+	mbuf_freem(m);
+
+	return (error);
+
+}
+
+static errno_t
+netsrc_ipv4(kern_ctl_ref kctl, uint32_t unit, struct netsrc_req *nrq)
+{
+	errno_t error = EHOSTUNREACH;
+	struct sockaddr_in *dstsin;
+	struct rtentry *rt;
+	struct in_ifaddr *ia;
+	struct netsrc_rep nrp;
+	struct sockaddr_in6 v4entry = {
+		.sin6_family = AF_INET6,
+		.sin6_len = sizeof(struct sockaddr_in6),
+		.sin6_addr = IN6ADDR_V4MAPPED_INIT,
+	};
+	struct in6_addrpolicy *policy;
+
+	dstsin = &nrq->nrq_sin;
+
+	if (dstsin->sin_len < sizeof (*dstsin) ||
+	    dstsin->sin_addr.s_addr == INADDR_ANY)
+		return (EINVAL);
+
+	lck_mtx_lock(rnh_lock);
+	rt = rt_lookup(TRUE, (struct sockaddr *)dstsin, NULL,
+	    rt_tables[AF_INET], nrq->nrq_ifscope);
+	lck_mtx_unlock(rnh_lock);
+	if (!rt)
+		return (EHOSTUNREACH);
+	lck_rw_lock_shared(in_ifaddr_rwlock);
+	TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
+		if (ia->ia_ifp == rt->rt_ifp) {
+			memset(&nrp, 0, sizeof(nrp));
+			memcpy(&nrp.nrp_sin, IA_SIN(ia), sizeof(nrp.nrp_sin));
+			IFA_UNLOCK(&ia->ia_ifa);
+			v4entry.sin6_addr.s6_addr32[3] =
+			    nrp.nrp_sin.sin_addr.s_addr;
+			policy = in6_addrsel_lookup_policy(&v4entry);
+			if (policy->label != -1) {
+				nrp.nrp_label = policy->label;
+				nrp.nrp_precedence = policy->preced;
+				/* XXX might not be true */
+				nrp.nrp_dstlabel = policy->label;
+				nrp.nrp_dstprecedence = policy->preced;
+			}
+			error = ctl_enqueuedata(kctl, unit, &nrp,
+			    sizeof(nrp), CTL_DATA_EOR);
+			break;
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
+	lck_rw_done(in_ifaddr_rwlock);
+	if (rt)
+		rtfree(rt);
+
+	return (error);
+}
+
+static errno_t
+netsrc_ipv6(kern_ctl_ref kctl, uint32_t unit, struct netsrc_req *nrq)
+{
+	struct sockaddr_in6 *dstsin6;
+	struct in6_addr *in6, storage;
+	struct in6_ifaddr *ia;
+	struct route_in6 ro;
+	int error = EHOSTUNREACH;
+	struct netsrc_rep nrp;
+
+	dstsin6 = &nrq->nrq_sin6;
+
+	if (dstsin6->sin6_len < sizeof (*dstsin6) ||
+	    IN6_IS_ADDR_UNSPECIFIED(&dstsin6->sin6_addr))
+		return (EINVAL);
+
+	memset(&ro, 0, sizeof(ro));
+	lck_mtx_lock(rnh_lock);
+	ro.ro_rt = rt_lookup(TRUE, (struct sockaddr *)dstsin6, NULL,
+	    rt_tables[AF_INET6], nrq->nrq_ifscope);
+	lck_mtx_unlock(rnh_lock);
+	if (!ro.ro_rt)
+		return (EHOSTUNREACH);
+	in6 = in6_selectsrc(dstsin6, NULL, NULL, &ro, NULL, &storage,
+	    nrq->nrq_ifscope, &error);
+	if (ro.ro_rt)
+		rtfree(ro.ro_rt);
+	if (!in6 || error)
+		return (error);
+	memset(&nrp, 0, sizeof(nrp));
+	nrp.nrp_sin6.sin6_family = AF_INET6;
+	nrp.nrp_sin6.sin6_len    = sizeof(nrp.nrp_sin6);
+	memcpy(&nrp.nrp_sin6.sin6_addr, in6, sizeof(nrp.nrp_sin6.sin6_addr));
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
+	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		if (memcmp(&ia->ia_addr.sin6_addr, in6, sizeof(*in6)) == 0) {
+			struct sockaddr_in6 sin6;
+			struct in6_addrpolicy *policy;
+
+			if (ia->ia6_flags & IN6_IFF_TEMPORARY)
+				nrp.nrp_flags |= NETSRC_IP6_FLAG_TEMPORARY;
+			if (ia->ia6_flags & IN6_IFF_TENTATIVE)
+				nrp.nrp_flags |= NETSRC_IP6_FLAG_TENTATIVE;
+			if (ia->ia6_flags & IN6_IFF_DEPRECATED)
+				nrp.nrp_flags |= NETSRC_IP6_FLAG_DEPRECATED;
+			sin6.sin6_family = AF_INET6;
+			sin6.sin6_len    = sizeof(sin6);
+			memcpy(&sin6.sin6_addr, in6, sizeof(*in6));
+			policy = in6_addrsel_lookup_policy(&sin6);
+			if (policy->label != -1) {
+				nrp.nrp_label = policy->label;
+				nrp.nrp_precedence = policy->preced;
+			}
+			memcpy(&sin6.sin6_addr, &dstsin6->sin6_addr,
+			    sizeof(dstsin6->sin6_addr));
+			policy = in6_addrsel_lookup_policy(&sin6);
+			if (policy->label != -1) {
+				nrp.nrp_dstlabel = policy->label;
+				nrp.nrp_dstprecedence = policy->preced;
+			}
+			break;
+		}
+	}
+	lck_rw_done(&in6_ifaddr_rwlock);
+	error = ctl_enqueuedata(kctl, unit, &nrp, sizeof(nrp),
+	    CTL_DATA_EOR);
+
+	return (error);
+}
diff --git a/EXTERNAL_HEADERS/architecture/ppc/cframe.h b/bsd/net/netsrc.h
similarity index 58%
rename from EXTERNAL_HEADERS/architecture/ppc/cframe.h
rename to bsd/net/netsrc.h
index 0db3fce7d..54ba8d8be 100644
--- a/EXTERNAL_HEADERS/architecture/ppc/cframe.h
+++ b/bsd/net/netsrc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,26 +25,46 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/* Copyright (c) 1991 NeXT Software, Inc.  All rights reserved.
- *
- *	File:	architecture/ppc/cframe.h
- *	Author:	Mike DeMoney, NeXT Software, Inc.
- *
- *	This include file defines C calling sequence defines
- *	for ppc port.
- */
 
-#ifndef	_ARCH_PPC_CFRAME_H_
-#define	_ARCH_PPC_CFRAME_H_
-
-#if defined (__ppc64__)
-#define	C_ARGSAVE_LEN	64	/* at least 64 bytes of arg save */
-#define	C_STACK_ALIGN	32	/* stack must be 32 byte aligned */
-#define	C_RED_ZONE	320	/* 320 bytes to skip over saved registers */
-#else
-#define	C_ARGSAVE_LEN	32	/* at least 32 bytes of arg save */
-#define	C_STACK_ALIGN	16	/* stack must be 16 byte aligned */
-#define	C_RED_ZONE	224	/* 224 bytes to skip over saved registers */
+#ifndef __NET_NETSRC_H__
+
+#define	NETSRC_CTLNAME	"com.apple.netsrc"
+
+#define	NETSRC_VERSION1	1
+#define	NETSRC_CURVERS	NETSRC_VERSION1
+
+struct netsrc_req {
+	unsigned int nrq_ver;
+	unsigned int nrq_ifscope;
+	union {
+		struct sockaddr_in  _usin;
+		struct sockaddr_in6 _usin6;
+	} _usa;
+};
+
+#define	nrq_sin		_usa._usin
+#define	nrq_sin6	_usa._usin6
+
+struct netsrc_rep {
+	union {
+		struct sockaddr_in  _usin;
+		struct sockaddr_in6 _usin6;
+	} _usa;
+#define	NETSRC_IP6_FLAG_TENTATIVE	0x0001
+#define	NETSRC_IP6_FLAG_TEMPORARY	0x0002
+#define	NETSRC_IP6_FLAG_DEPRECATED	0x0004
+	uint16_t nrp_flags;
+	uint16_t nrp_label;
+	uint16_t nrp_precedence;
+	uint16_t nrp_dstlabel;
+	uint16_t nrp_dstprecedence;
+};
+
+#define	nrp_sin		_usa._usin
+#define	nrp_sin6	_usa._usin6
+
+#ifdef KERNEL_PRIVATE
+__private_extern__ void netsrc_init(void);
 #endif
 
-#endif	/* _ARCH_PPC_CFRAME_H_ */
+#endif /* __NET_NETSRC_H__ */
diff --git a/bsd/net/ntstat.c b/bsd/net/ntstat.c
new file mode 100644
index 000000000..4bb6e1c28
--- /dev/null
+++ b/bsd/net/ntstat.c
@@ -0,0 +1,1954 @@
+/*
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/socket.h>
+#include <sys/kern_control.h>
+#include <sys/mcache.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <kern/clock.h>
+#include <kern/debug.h>
+
+#include <libkern/libkern.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+#include <libkern/locks.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <net/ntstat.h>
+
+#include <netinet/ip_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/in6_var.h>
+
+__private_extern__ int	nstat_collect = 1;
+SYSCTL_INT(_net, OID_AUTO, statistics, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &nstat_collect, 0, "Collect detailed statistics");
+
+typedef struct nstat_control_state
+{
+	struct nstat_control_state	*next;
+	u_int32_t					watching;
+	decl_lck_mtx_data(, mtx);
+	kern_ctl_ref				kctl;
+	u_int32_t					unit;
+	nstat_src_ref_t				next_srcref;
+	struct nstat_src			*srcs;
+	int							cleanup;
+	int							suser;
+} nstat_control_state;
+
+static void nstat_control_register(void);
+
+static volatile OSMallocTag	nstat_malloc_tag = NULL;
+static nstat_control_state	*nstat_controls = NULL;
+static uint64_t				nstat_idle_time = 0ULL;
+static decl_lck_mtx_data(, nstat_mtx);
+
+static void
+nstat_copy_sa_out(
+	const struct sockaddr	*src,
+	struct sockaddr			*dst,
+	int						maxlen)
+{
+	if (src->sa_len > maxlen) return;
+	
+	bcopy(src, dst, src->sa_len);
+	if (src->sa_family == AF_INET6 &&
+		src->sa_len >= sizeof(struct sockaddr_in6))
+	{
+		struct sockaddr_in6	*sin6 = (struct sockaddr_in6*)dst;
+		if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr))
+		{
+			if (sin6->sin6_scope_id == 0)
+				sin6->sin6_scope_id = ntohs(sin6->sin6_addr.__u6_addr.__u6_addr16[1]);
+			sin6->sin6_addr.__u6_addr.__u6_addr16[1] = 0;
+		}
+	}
+}
+
+static void
+nstat_ip_to_sockaddr(
+	const struct in_addr	*ip,
+	u_int16_t				port,
+	struct sockaddr_in		*sin,
+	u_int32_t				maxlen)
+{
+	if (maxlen < sizeof(struct sockaddr_in))
+		return;
+	
+	sin->sin_family = AF_INET;
+	sin->sin_len = sizeof(*sin);
+	sin->sin_port = port;
+	sin->sin_addr = *ip;
+}
+
+static void
+nstat_ip6_to_sockaddr(
+	const struct in6_addr	*ip6,
+	u_int16_t				port,
+	struct sockaddr_in6		*sin6,
+	u_int32_t				maxlen)
+{
+	if (maxlen < sizeof(struct sockaddr_in6))
+		return;
+	
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_len = sizeof(*sin6);
+	sin6->sin6_port = port;
+	sin6->sin6_addr = *ip6;
+	if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr))
+	{
+		sin6->sin6_scope_id = ntohs(sin6->sin6_addr.__u6_addr.__u6_addr16[1]);
+		sin6->sin6_addr.__u6_addr.__u6_addr16[1] = 0;
+	}
+}
+
+#pragma mark -- Network Statistic Providers --
+
+typedef struct nstat_provider
+{
+	struct nstat_provider	*next;
+	nstat_provider_id_t		nstat_provider_id;
+	size_t					nstat_descriptor_length;
+	errno_t					(*nstat_lookup)(const void *data, u_int32_t length, nstat_provider_cookie_t *out_cookie);
+	int						(*nstat_gone)(nstat_provider_cookie_t cookie);
+	errno_t					(*nstat_counts)(nstat_provider_cookie_t cookie, struct nstat_counts *out_counts, int *out_gone);
+	errno_t					(*nstat_watcher_add)(nstat_control_state *state);
+	void					(*nstat_watcher_remove)(nstat_control_state *state);
+	errno_t					(*nstat_copy_descriptor)(nstat_provider_cookie_t cookie, void *data, u_int32_t len);
+	void					(*nstat_release)(nstat_provider_cookie_t cookie);
+} nstat_provider;
+
+static errno_t nstat_control_source_add(u_int64_t context, nstat_control_state *state, nstat_provider *provider, nstat_provider_cookie_t cookie);
+struct nstat_provider	*nstat_providers = NULL;
+
+static struct nstat_provider*
+nstat_find_provider_by_id(
+	nstat_provider_id_t	id)
+{
+	struct nstat_provider	*provider;
+	
+	for (provider = nstat_providers; provider != NULL; provider = provider->next)
+	{
+		if (provider->nstat_provider_id == id)
+			break;
+	}
+	
+	return provider;
+}
+
+static errno_t
+nstat_lookup_entry(
+	nstat_provider_id_t		id,
+	const void				*data,
+	u_int32_t				length,
+	nstat_provider			**out_provider,
+	nstat_provider_cookie_t	*out_cookie)
+{
+	*out_provider = nstat_find_provider_by_id(id);
+	if (*out_provider == NULL)
+	{
+		printf("%s:%d: provider %u not found\n", __FUNCTION__, __LINE__, id);
+		return ENOENT;
+	}
+	
+	return (*out_provider)->nstat_lookup(data, length, out_cookie);
+}
+
+static void nstat_init_route_provider(void);
+static void nstat_init_tcp_provider(void);
+static void nstat_init_udp_provider(void);
+
+static void
+nstat_init(void)
+{
+	if (nstat_malloc_tag != NULL) return;
+	
+	OSMallocTag tag = OSMalloc_Tagalloc(NET_STAT_CONTROL_NAME, OSMT_DEFAULT);
+	if (!OSCompareAndSwapPtr(NULL, tag, &nstat_malloc_tag))
+	{
+		OSMalloc_Tagfree(tag);
+		tag = nstat_malloc_tag;
+	}
+	else
+	{
+		// we need to initialize other things, we do it here as this code path will only be hit once;
+		nstat_init_route_provider();
+		nstat_init_tcp_provider();
+		nstat_init_udp_provider();
+		nstat_control_register();
+	}
+}
+
+#pragma mark -- Aligned Buffer Allocation --
+
+struct align_header
+{
+	u_int32_t	offset;
+	u_int32_t	length;
+};
+
+static void*
+nstat_malloc_aligned(
+	u_int32_t	length,
+	u_int8_t	alignment,
+	OSMallocTag	tag)
+{
+	struct align_header	*hdr = NULL;
+	u_int32_t	size = length + sizeof(*hdr) + alignment - 1;
+	
+	u_int8_t	*buffer = OSMalloc(size, tag);
+	if (buffer == NULL) return NULL;
+	
+	u_int8_t	*aligned = buffer + sizeof(*hdr);
+	aligned = (u_int8_t*)P2ROUNDUP(aligned, alignment);
+	
+	hdr = (struct align_header*)(aligned - sizeof(*hdr));
+	hdr->offset = aligned - buffer;
+	hdr->length = size;
+	
+	return aligned;
+}
+
+static void
+nstat_free_aligned(
+	void		*buffer,
+	OSMallocTag	tag)
+{
+	struct align_header *hdr = (struct align_header*)((u_int8_t*)buffer - sizeof(*hdr));
+	OSFree(((char*)buffer) - hdr->offset, hdr->length, tag);
+}
+
+#pragma mark -- Route Provider --
+
+static nstat_provider	nstat_route_provider;
+
+static errno_t
+nstat_route_lookup(
+	const void				*data,
+	u_int32_t 				length,
+	nstat_provider_cookie_t	*out_cookie)
+{
+	// rt_lookup doesn't take const params but it doesn't modify the parameters for
+	// the lookup. So...we use a union to eliminate the warning.
+	union
+	{
+		struct sockaddr *sa;
+		const struct sockaddr *const_sa;
+	} dst, mask;
+	
+	const nstat_route_add_param	*param = (const nstat_route_add_param*)data;
+	*out_cookie = NULL;
+	
+	if (length < sizeof(*param))
+	{
+		printf("%s:%d: expected %lu byte param, received %u\n", __FUNCTION__, __LINE__, sizeof(*param), length);
+		return EINVAL;
+	}
+	
+	if (param->dst.v4.sin_family == 0 ||
+		param->dst.v4.sin_family > AF_MAX ||
+		(param->mask.v4.sin_family != 0 && param->mask.v4.sin_family != param->dst.v4.sin_family))
+	{
+		printf("%s:%d invalid family (dst=%d, mask=%d)\n", __FUNCTION__, __LINE__,
+			param->dst.v4.sin_family, param->mask.v4.sin_family);
+		return EINVAL;
+	}
+	
+	if (param->dst.v4.sin_len > sizeof(param->dst) ||
+		(param->mask.v4.sin_family && param->mask.v4.sin_len > sizeof(param->mask.v4.sin_len)))
+	{
+		printf("%s:%d invalid length (dst=%d, mask=%d)\n", __FUNCTION__, __LINE__,
+			param->dst.v4.sin_len, param->mask.v4.sin_len);
+	}
+	
+	// TBD: Need to validate length of sockaddr for different families?
+	dst.const_sa = (const struct sockaddr*)&param->dst;
+	mask.const_sa = param->mask.v4.sin_family ? (const struct sockaddr*)&param->mask : NULL;
+	
+	struct radix_node_head	*rnh = rt_tables[dst.sa->sa_family];
+	if (rnh == NULL) return EAFNOSUPPORT;
+	
+	lck_mtx_lock(rnh_lock);
+	struct rtentry *rt = rt_lookup(TRUE, dst.sa, mask.sa, rnh, param->ifindex);
+	lck_mtx_unlock(rnh_lock);
+	
+	if (rt) *out_cookie = (nstat_provider_cookie_t)rt;
+	
+	return rt ? 0 : ENOENT;
+}
+
+static int
+nstat_route_gone(
+	nstat_provider_cookie_t	cookie)
+{
+	struct rtentry		*rt = (struct rtentry*)cookie;
+	return ((rt->rt_flags & RTF_UP) == 0) ? 1 : 0;
+}
+
+static errno_t
+nstat_route_counts(
+	nstat_provider_cookie_t	cookie,
+	struct nstat_counts		*out_counts,
+	int						*out_gone)
+{
+	struct rtentry		*rt = (struct rtentry*)cookie;
+	struct nstat_counts	*rt_stats = rt->rt_stats;
+	
+	*out_gone = 0;
+	
+	if ((rt->rt_flags & RTF_UP) == 0) *out_gone = 1;
+	
+	if (rt_stats)
+	{
+		atomic_get_64(out_counts->nstat_rxpackets, &rt_stats->nstat_rxpackets);
+		atomic_get_64(out_counts->nstat_rxbytes, &rt_stats->nstat_rxbytes);
+		atomic_get_64(out_counts->nstat_txpackets, &rt_stats->nstat_txpackets);
+		atomic_get_64(out_counts->nstat_txbytes, &rt_stats->nstat_txbytes);
+		out_counts->nstat_rxduplicatebytes = rt_stats->nstat_rxduplicatebytes;
+		out_counts->nstat_rxoutoforderbytes = rt_stats->nstat_rxoutoforderbytes;
+		out_counts->nstat_txretransmit = rt_stats->nstat_txretransmit;
+		out_counts->nstat_connectattempts = rt_stats->nstat_connectattempts;
+		out_counts->nstat_connectsuccesses = rt_stats->nstat_connectsuccesses;
+		out_counts->nstat_min_rtt = rt_stats->nstat_min_rtt;
+		out_counts->nstat_avg_rtt = rt_stats->nstat_avg_rtt;
+		out_counts->nstat_var_rtt = rt_stats->nstat_var_rtt;
+	}
+	else
+		bzero(out_counts, sizeof(*out_counts));
+	
+	return 0;
+}
+
+static void
+nstat_route_release(
+	nstat_provider_cookie_t cookie)
+{
+	rtfree((struct rtentry*)cookie);
+}
+
+static u_int32_t	nstat_route_watchers = 0;
+
+static int
+nstat_route_walktree_add(
+	struct radix_node	*rn,
+	void				*context)
+{
+	errno_t	result = 0;
+	struct rtentry *rt = (struct rtentry *)rn;
+	nstat_control_state	*state	= (nstat_control_state*)context;
+
+	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
+
+	/* RTF_UP can't change while rnh_lock is held */
+	if ((rt->rt_flags & RTF_UP) != 0)
+	{
+		/* Clear RTPRF_OURS if the route is still usable */
+		RT_LOCK(rt);
+		if (rt_validate(rt)) {
+			RT_ADDREF_LOCKED(rt);
+			RT_UNLOCK(rt);
+		} else {
+			RT_UNLOCK(rt);
+			rt = NULL;
+		}
+
+		/* Otherwise if RTF_CONDEMNED, treat it as if it were down */
+		if (rt == NULL)
+			return (0);
+
+		result = nstat_control_source_add(0, state, &nstat_route_provider, rt);
+		if (result != 0)
+			rtfree_locked(rt);
+	}
+	
+	return result;
+}
+
+static errno_t
+nstat_route_add_watcher(
+	nstat_control_state	*state)
+{
+	int i;
+	errno_t result = 0;
+	OSIncrementAtomic(&nstat_route_watchers);
+	
+	lck_mtx_lock(rnh_lock);
+	for (i = 1; i < AF_MAX; i++)
+	{
+		struct radix_node_head *rnh;
+		rnh = rt_tables[i];
+		if (!rnh) continue;
+		
+		result = rnh->rnh_walktree(rnh, nstat_route_walktree_add, state);
+		if (result != 0)
+		{
+			printf("%s:%d rnh_walktree failed: %d\n", __FUNCTION__, __LINE__, result);
+			break;
+		}
+	}
+	lck_mtx_unlock(rnh_lock);
+	
+	return result;
+}
+
+__private_extern__ void
+nstat_route_new_entry(
+	struct rtentry	*rt)
+{
+	if (nstat_route_watchers == 0)
+		return;
+	
+	lck_mtx_lock(&nstat_mtx);
+	if ((rt->rt_flags & RTF_UP) != 0)
+	{
+		nstat_control_state	*state;
+		for (state = nstat_controls; state; state = state->next)
+		{
+			if ((state->watching & (1 << NSTAT_PROVIDER_ROUTE)) != 0)
+			{
+				// this client is watching routes
+				// acquire a reference for the route
+				RT_ADDREF(rt);
+				
+				// add the source, if that fails, release the reference
+				if (nstat_control_source_add(0, state, &nstat_route_provider, rt) != 0)
+					RT_REMREF(rt);
+			}
+		}
+	}
+	lck_mtx_unlock(&nstat_mtx);
+}
+
+static void
+nstat_route_remove_watcher(
+	__unused nstat_control_state	*state)
+{
+	OSDecrementAtomic(&nstat_route_watchers);
+}
+
+static errno_t
+nstat_route_copy_descriptor(
+	nstat_provider_cookie_t	cookie,
+	void					*data,
+	u_int32_t				len)
+{
+	nstat_route_descriptor	*desc = (nstat_route_descriptor*)data;
+	if (len < sizeof(*desc))
+	{
+		printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(*desc), len);
+		return EINVAL;
+	}
+	bzero(desc, sizeof(*desc));
+	
+	struct rtentry	*rt = (struct rtentry*)cookie;
+	desc->id = (uintptr_t)rt;
+	desc->parent_id = (uintptr_t)rt->rt_parent;
+	desc->gateway_id = (uintptr_t)rt->rt_gwroute;
+
+	
+	// key/dest
+	struct sockaddr	*sa;
+	if ((sa = rt_key(rt)))
+		nstat_copy_sa_out(sa, &desc->dst.sa, sizeof(desc->dst));
+	
+	// mask
+	if ((sa = rt_mask(rt)) && sa->sa_len <= sizeof(desc->mask))
+		memcpy(&desc->mask, sa, sa->sa_len);
+	
+	// gateway
+	if ((sa = rt->rt_gateway))
+		nstat_copy_sa_out(sa, &desc->gateway.sa, sizeof(desc->gateway));
+	
+	if (rt->rt_ifp)
+		desc->ifindex = rt->rt_ifp->if_index;
+	
+	desc->flags = rt->rt_flags;
+	
+	return 0;
+}
+
+static void
+nstat_init_route_provider(void)
+{
+	bzero(&nstat_route_provider, sizeof(nstat_route_provider));
+	nstat_route_provider.nstat_descriptor_length = sizeof(nstat_route_descriptor);
+	nstat_route_provider.nstat_provider_id = NSTAT_PROVIDER_ROUTE;
+	nstat_route_provider.nstat_lookup = nstat_route_lookup;
+	nstat_route_provider.nstat_gone = nstat_route_gone;
+	nstat_route_provider.nstat_counts = nstat_route_counts;
+	nstat_route_provider.nstat_release = nstat_route_release;
+	nstat_route_provider.nstat_watcher_add = nstat_route_add_watcher;
+	nstat_route_provider.nstat_watcher_remove = nstat_route_remove_watcher;
+	nstat_route_provider.nstat_copy_descriptor = nstat_route_copy_descriptor;
+	nstat_route_provider.next = nstat_providers;
+	nstat_providers = &nstat_route_provider;
+}
+
+#pragma mark -- Route Collection --
+
+static struct nstat_counts*
+nstat_route_attach(
+	struct rtentry	*rte)
+{
+	struct nstat_counts *result = rte->rt_stats;
+	if (result) return result;
+	
+	if (nstat_malloc_tag == NULL) nstat_init();
+	
+	result = nstat_malloc_aligned(sizeof(*result), sizeof(u_int64_t), nstat_malloc_tag);
+	if (!result) return result;
+	
+	bzero(result, sizeof(*result));
+	
+	if (!OSCompareAndSwapPtr(NULL, result, &rte->rt_stats))
+	{
+		nstat_free_aligned(result, nstat_malloc_tag);
+		result = rte->rt_stats;
+	}
+	
+	return result;
+}
+
+__private_extern__ void
+nstat_route_detach(
+	struct rtentry	*rte)
+{
+	if (rte->rt_stats)
+	{
+		nstat_free_aligned(rte->rt_stats, nstat_malloc_tag);
+		rte->rt_stats = NULL;
+	}
+}
+
+__private_extern__ void
+nstat_route_connect_attempt(
+	struct rtentry	*rte)
+{
+	while (rte)
+	{
+		struct nstat_counts*	stats = nstat_route_attach(rte);
+		if (stats)
+		{
+			OSIncrementAtomic(&stats->nstat_connectattempts);
+		}
+		
+		rte = rte->rt_parent;
+	}
+}
+
+__private_extern__ void
+nstat_route_connect_success(
+	struct rtentry	*rte)
+{
+	// This route
+	while (rte)
+	{
+		struct nstat_counts*	stats = nstat_route_attach(rte);
+		if (stats)
+		{
+			OSIncrementAtomic(&stats->nstat_connectsuccesses);
+		}
+		
+		rte = rte->rt_parent;
+	}
+}
+
+__private_extern__ void
+nstat_route_tx(
+	struct rtentry	*rte,
+	u_int32_t		packets,
+	u_int32_t		bytes,
+	u_int32_t		flags)
+{
+	while (rte)
+	{
+		struct nstat_counts*	stats = nstat_route_attach(rte);
+		if (stats)
+		{
+			if ((flags & NSTAT_TX_FLAG_RETRANSMIT) != 0)
+			{
+				OSAddAtomic(bytes, &stats->nstat_txretransmit);
+			}
+			else
+			{
+				OSAddAtomic64((SInt64)packets, (SInt64*)&stats->nstat_txpackets);
+				OSAddAtomic64((SInt64)bytes, (SInt64*)&stats->nstat_txbytes);
+			}
+		}
+		
+		rte = rte->rt_parent;
+	}
+}
+
+__private_extern__ void
+nstat_route_rx(
+	struct rtentry	*rte,
+	u_int32_t		packets,
+	u_int32_t		bytes,
+	u_int32_t		flags)
+{
+	while (rte)
+	{
+		struct nstat_counts*	stats = nstat_route_attach(rte);
+		if (stats)
+		{
+			if (flags == 0)
+			{
+				OSAddAtomic64((SInt64)packets, (SInt64*)&stats->nstat_rxpackets);
+				OSAddAtomic64((SInt64)bytes, (SInt64*)&stats->nstat_rxbytes);
+			}
+			else
+			{
+				if (flags & NSTAT_RX_FLAG_OUT_OF_ORDER)
+					OSAddAtomic(bytes, &stats->nstat_rxoutoforderbytes);
+				if (flags & NSTAT_RX_FLAG_DUPLICATE)
+					OSAddAtomic(bytes, &stats->nstat_rxduplicatebytes);
+			}
+		}
+		
+		rte = rte->rt_parent;
+	}
+}
+
+__private_extern__ void
+nstat_route_rtt(
+	struct rtentry	*rte,
+	u_int32_t		rtt,
+	u_int32_t		rtt_var)
+{
+	const int32_t	factor = 8;
+	
+	while (rte)
+	{
+		struct nstat_counts*	stats = nstat_route_attach(rte);
+		if (stats)
+		{
+			int32_t	oldrtt;
+			int32_t	newrtt;
+			
+			// average
+			do
+			{
+				oldrtt = stats->nstat_avg_rtt;
+				if (oldrtt == 0)
+				{
+					newrtt = rtt;
+				}
+				else
+				{
+					newrtt = oldrtt - (oldrtt - (int32_t)rtt) / factor;
+				}
+				if (oldrtt == newrtt) break;
+			} while (!OSCompareAndSwap(oldrtt, newrtt, &stats->nstat_avg_rtt));
+			
+			// minimum
+			do
+			{
+				oldrtt = stats->nstat_min_rtt;
+				if (oldrtt != 0 && oldrtt < (int32_t)rtt)
+				{
+					break;
+				}
+			} while (!OSCompareAndSwap(oldrtt, rtt, &stats->nstat_min_rtt));
+			
+			// variance
+			do
+			{
+				oldrtt = stats->nstat_var_rtt;
+				if (oldrtt == 0)
+				{
+					newrtt = rtt_var;
+				}
+				else
+				{
+					newrtt = oldrtt - (oldrtt - (int32_t)rtt_var) / factor;
+				}
+				if (oldrtt == newrtt) break;
+			} while (!OSCompareAndSwap(oldrtt, newrtt, &stats->nstat_var_rtt));
+		}
+		
+		rte = rte->rt_parent;
+	}
+}
+
+#pragma mark -- TCP Provider --
+
+static nstat_provider	nstat_tcp_provider;
+
+static errno_t
+nstat_tcpudp_lookup(
+	struct inpcbinfo		*inpinfo,
+	const void				*data,
+	u_int32_t 				length,
+	nstat_provider_cookie_t	*out_cookie)
+{
+	// parameter validation
+	const nstat_tcp_add_param	*param = (const nstat_tcp_add_param*)data;
+	if (length < sizeof(*param))
+	{
+		printf("%s:%d expected %lu byte param, received %u\n", __FUNCTION__, __LINE__, sizeof(*param), length);
+		return EINVAL;
+	}
+	
+	// src and dst must match
+	if (param->remote.v4.sin_family != 0 &&
+		param->remote.v4.sin_family != param->local.v4.sin_family)
+	{
+		printf("%s:%d src family (%d) and dst family (%d) don't match\n",
+			__FUNCTION__, __LINE__, param->local.v4.sin_family, param->remote.v4.sin_family);
+		return EINVAL;
+	}
+	
+	struct inpcb *inp = NULL;
+	
+	switch (param->local.v4.sin_family)
+	{
+		case AF_INET:
+		{
+			if (param->local.v4.sin_len != sizeof(param->local.v4) ||
+		  		(param->remote.v4.sin_family != 0 &&
+		  		 param->remote.v4.sin_len != sizeof(param->remote.v4)))
+		  	{
+				printf("%s:%d invalid length for v4 src (%d) or dst (%d), should be %lu\n",
+					__FUNCTION__, __LINE__, param->local.v4.sin_len, param->remote.v4.sin_len,
+					sizeof(param->remote.v4));
+				return EINVAL;
+		  	}
+		  	
+			inp = in_pcblookup_hash(inpinfo, param->remote.v4.sin_addr, param->remote.v4.sin_port,
+						param->local.v4.sin_addr, param->local.v4.sin_port, 1, NULL);
+		}
+		break;
+		
+#if INET6
+		case AF_INET6:
+		{
+			union
+			{
+				const struct in6_addr 	*in6c;
+				struct in6_addr			*in6;
+			} local, remote;
+			
+			if (param->local.v6.sin6_len != sizeof(param->local.v6) ||
+		  		(param->remote.v6.sin6_family != 0 &&
+				 param->remote.v6.sin6_len != sizeof(param->remote.v6)))
+			{
+				printf("%s:%d invalid length for v6 src (%d) or dst (%d), should be %lu\n",
+					__FUNCTION__, __LINE__, param->local.v6.sin6_len, param->remote.v6.sin6_len,
+					sizeof(param->remote.v6));
+				return EINVAL;
+			}
+			
+			local.in6c = &param->local.v6.sin6_addr;
+			remote.in6c = &param->remote.v6.sin6_addr;
+			
+			inp = in6_pcblookup_hash(inpinfo, remote.in6, param->remote.v6.sin6_port,
+						local.in6, param->local.v6.sin6_port, 1, NULL);
+		}
+		break;
+#endif
+		
+		default:
+			printf("%s:%d unsupported address family %d\n", __FUNCTION__, __LINE__, param->local.v4.sin_family);
+			return EINVAL;
+	}
+	
+	if (inp == NULL) return ENOENT;
+	
+	// At this point we have a ref to the inpcb
+	*out_cookie = inp;
+	return 0;
+}
+
+static errno_t
+nstat_tcp_lookup(
+	const void				*data,
+	u_int32_t 				length,
+	nstat_provider_cookie_t	*out_cookie)
+{
+	return nstat_tcpudp_lookup(&tcbinfo, data, length, out_cookie);
+}
+
+static int
+nstat_tcp_gone(
+	nstat_provider_cookie_t	cookie)
+{
+	struct inpcb	*inp = (struct inpcb*)cookie;
+	struct tcpcb	*tp = intotcpcb(inp);
+	return (inp->inp_state == INPCB_STATE_DEAD || tp->t_state == TCPS_TIME_WAIT) ? 1 : 0;
+}
+
+static errno_t
+nstat_tcp_counts(
+	nstat_provider_cookie_t	cookie,
+	struct nstat_counts		*out_counts,
+	int						*out_gone)
+{
+	struct inpcb	*inp = (struct inpcb*)cookie;
+	struct tcpcb	*tp = intotcpcb(inp);
+	
+	bzero(out_counts, sizeof(*out_counts));
+	
+	*out_gone = 0;
+	
+	// if the pcb is in the dead state, we should stop using it
+	if (inp->inp_state == INPCB_STATE_DEAD || tp->t_state == TCPS_TIME_WAIT)
+	{
+		*out_gone = 1;
+	}
+	
+	if (tp->t_state > TCPS_LISTEN)
+	{
+		atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets);
+		atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes);
+		atomic_get_64(out_counts->nstat_txpackets, &inp->inp_stat->txpackets);
+		atomic_get_64(out_counts->nstat_txbytes, &inp->inp_stat->txbytes);
+		out_counts->nstat_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
+		out_counts->nstat_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
+		out_counts->nstat_txretransmit = tp->t_stat.txretransmitbytes;
+		out_counts->nstat_connectattempts = tp->t_state >= TCPS_SYN_SENT ? 1 : 0;
+		out_counts->nstat_connectsuccesses = tp->t_state >= TCPS_ESTABLISHED ? 1 : 0;
+		out_counts->nstat_avg_rtt = tp->t_srtt;
+		out_counts->nstat_min_rtt = tp->t_rttbest;
+		out_counts->nstat_var_rtt = tp->t_rttvar;
+	}
+	
+	return 0;
+}
+
+static void
+nstat_tcp_release(
+	nstat_provider_cookie_t	cookie)
+{
+	struct inpcb *inp = (struct inpcb*)cookie;
+	in_pcb_checkstate(inp, WNT_RELEASE, 0);
+}
+
+static u_int32_t	nstat_tcp_watchers = 0;
+
+static errno_t
+nstat_tcp_add_watcher(
+	nstat_control_state	*state)
+{
+	OSIncrementAtomic(&nstat_tcp_watchers);
+	
+	lck_rw_lock_shared(tcbinfo.mtx);
+	
+	// Add all current tcp inpcbs. Ignore those in timewait
+	struct inpcb *inp;
+	for (inp = LIST_FIRST(tcbinfo.listhead); inp; inp = LIST_NEXT(inp, inp_list))
+	{
+		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+			continue;
+		
+		if (nstat_control_source_add(0, state, &nstat_tcp_provider, inp) != 0)
+		{
+			in_pcb_checkstate(inp, WNT_RELEASE, 0);
+			break;
+		}
+	}
+	
+	lck_rw_done(tcbinfo.mtx);
+	
+	return 0;
+}
+
+static void
+nstat_tcp_remove_watcher(
+	__unused nstat_control_state	*state)
+{
+	OSDecrementAtomic(&nstat_tcp_watchers);
+}
+
+__private_extern__ void
+nstat_tcp_new_pcb(
+	struct inpcb	*inp)
+{
+	if (nstat_tcp_watchers == 0)
+		return;
+	
+	lck_mtx_lock(&nstat_mtx);
+	nstat_control_state	*state;
+	for (state = nstat_controls; state; state = state->next)
+	{
+		if ((state->watching & (1 << NSTAT_PROVIDER_TCP)) != 0)
+		{
+			// this client is watching tcp
+			// acquire a reference for it
+			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+				break;
+			
+			// add the source, if that fails, release the reference
+			if (nstat_control_source_add(0, state, &nstat_tcp_provider, inp) != 0)
+			{
+				in_pcb_checkstate(inp, WNT_RELEASE, 0);
+				break;
+			}
+		}
+	}
+	lck_mtx_unlock(&nstat_mtx);
+}
+
+static errno_t
+nstat_tcp_copy_descriptor(
+	nstat_provider_cookie_t	cookie,
+	void					*data,
+	u_int32_t				len)
+{
+	if (len < sizeof(nstat_tcp_descriptor))
+	{
+		printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(nstat_tcp_descriptor), len);
+		return EINVAL;
+	}
+	
+	nstat_tcp_descriptor	*desc = (nstat_tcp_descriptor*)data;
+	struct inpcb			*inp = (struct inpcb*)cookie;
+	struct tcpcb			*tp = intotcpcb(inp);
+	
+	bzero(desc, sizeof(*desc));
+	
+	if (inp->inp_vflag & INP_IPV6)
+	{
+		nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport,
+			&desc->local.v6, sizeof(desc->local));
+		nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport,
+			&desc->remote.v6, sizeof(desc->remote));
+	}
+	else if (inp->inp_vflag & INP_IPV4)
+	{
+		nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport,
+			&desc->local.v4, sizeof(desc->local));
+		nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport,
+			&desc->remote.v4, sizeof(desc->remote));
+	}
+	
+	desc->state = intotcpcb(inp)->t_state;
+	if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_ifp)
+		desc->ifindex = inp->inp_route.ro_rt->rt_ifp->if_index;
+	
+	// danger - not locked, values could be bogus
+	desc->txunacked = tp->snd_max - tp->snd_una;
+	desc->txwindow = tp->snd_wnd;
+	desc->txcwindow = tp->snd_cwnd;
+	
+	struct socket *so = inp->inp_socket;
+	if (so)
+	{
+		// TBD - take the socket lock around these to make sure
+		// they're in sync?
+		desc->upid = so->last_upid;
+		desc->pid = so->last_pid;
+		
+		proc_name(desc->pid, desc->pname, sizeof(desc->pname));
+		desc->pname[sizeof(desc->pname) - 1] = 0;
+		
+		desc->sndbufsize = so->so_snd.sb_hiwat;
+		desc->sndbufused = so->so_snd.sb_cc;
+		desc->rcvbufsize = so->so_rcv.sb_hiwat;
+		desc->rcvbufused = so->so_rcv.sb_cc;
+	}
+	
+	return 0;
+}
+
+static void
+nstat_init_tcp_provider(void)
+{
+	bzero(&nstat_tcp_provider, sizeof(nstat_tcp_provider));
+	nstat_tcp_provider.nstat_descriptor_length = sizeof(nstat_tcp_descriptor);
+	nstat_tcp_provider.nstat_provider_id = NSTAT_PROVIDER_TCP;
+	nstat_tcp_provider.nstat_lookup = nstat_tcp_lookup;
+	nstat_tcp_provider.nstat_gone = nstat_tcp_gone;
+	nstat_tcp_provider.nstat_counts = nstat_tcp_counts;
+	nstat_tcp_provider.nstat_release = nstat_tcp_release;
+	nstat_tcp_provider.nstat_watcher_add = nstat_tcp_add_watcher;
+	nstat_tcp_provider.nstat_watcher_remove = nstat_tcp_remove_watcher;
+	nstat_tcp_provider.nstat_copy_descriptor = nstat_tcp_copy_descriptor;
+	nstat_tcp_provider.next = nstat_providers;
+	nstat_providers = &nstat_tcp_provider;
+}
+
+#pragma mark -- UDP Provider --
+
+static nstat_provider	nstat_udp_provider;
+
+static errno_t
+nstat_udp_lookup(
+	const void				*data,
+	u_int32_t 				length,
+	nstat_provider_cookie_t	*out_cookie)
+{
+	return nstat_tcpudp_lookup(&udbinfo, data, length, out_cookie);
+}
+
+static int
+nstat_udp_gone(
+	nstat_provider_cookie_t	cookie)
+{
+	struct inpcb	*inp = (struct inpcb*)cookie;
+	return (inp->inp_state == INPCB_STATE_DEAD) ? 1 : 0;
+}
+
+static errno_t
+nstat_udp_counts(
+	nstat_provider_cookie_t	cookie,
+	struct nstat_counts		*out_counts,
+	int						*out_gone)
+{
+	struct inpcb	*inp = (struct inpcb*)cookie;
+	
+	*out_gone = 0;
+	
+	// if the pcb is in the dead state, we should stop using it
+	if (inp->inp_state == INPCB_STATE_DEAD)
+	{
+		*out_gone = 1;
+	}
+	
+	atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets);
+	atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes);
+	atomic_get_64(out_counts->nstat_txpackets, &inp->inp_stat->txpackets);
+	atomic_get_64(out_counts->nstat_txbytes, &inp->inp_stat->txbytes);
+	
+	return 0;
+}
+
+static void
+nstat_udp_release(
+	nstat_provider_cookie_t	cookie)
+{
+	struct inpcb *inp = (struct inpcb*)cookie;
+	in_pcb_checkstate(inp, WNT_RELEASE, 0);
+}
+
+static u_int32_t	nstat_udp_watchers = 0;
+
+static errno_t
+nstat_udp_add_watcher(
+	nstat_control_state	*state)
+{
+	OSIncrementAtomic(&nstat_udp_watchers);
+	
+	lck_rw_lock_shared(tcbinfo.mtx);
+	
+	// Add all current tcp inpcbs. Ignore those in timewait
+	struct inpcb *inp;
+	for (inp = LIST_FIRST(udbinfo.listhead); inp; inp = LIST_NEXT(inp, inp_list))
+	{
+		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+			continue;
+		
+		if (nstat_control_source_add(0, state, &nstat_udp_provider, inp) != 0)
+		{
+			in_pcb_checkstate(inp, WNT_RELEASE, 0);
+			break;
+		}
+	}
+	
+	lck_rw_done(tcbinfo.mtx);
+	
+	return 0;
+}
+
+static void
+nstat_udp_remove_watcher(
+	__unused nstat_control_state	*state)
+{
+	OSDecrementAtomic(&nstat_udp_watchers);
+}
+
+__private_extern__ void
+nstat_udp_new_pcb(
+	struct inpcb	*inp)
+{
+	if (nstat_udp_watchers == 0)
+		return;
+	
+	lck_mtx_lock(&nstat_mtx);
+	nstat_control_state	*state;
+	for (state = nstat_controls; state; state = state->next)
+	{
+		if ((state->watching & (1 << NSTAT_PROVIDER_UDP)) != 0)
+		{
+			// this client is watching tcp
+			// acquire a reference for it
+			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+				break;
+			
+			// add the source, if that fails, release the reference
+			if (nstat_control_source_add(0, state, &nstat_udp_provider, inp) != 0)
+			{
+				in_pcb_checkstate(inp, WNT_RELEASE, 0);
+				break;
+			}
+		}
+	}
+	lck_mtx_unlock(&nstat_mtx);
+}
+
+static errno_t
+nstat_udp_copy_descriptor(
+	nstat_provider_cookie_t	cookie,
+	void					*data,
+	u_int32_t				len)
+{
+	if (len < sizeof(nstat_udp_descriptor))
+	{
+		printf("%s:%d invalid length, wanted %lu, got %d\n", __FUNCTION__, __LINE__, sizeof(nstat_tcp_descriptor), len);
+		return EINVAL;
+	}
+	
+	nstat_udp_descriptor	*desc = (nstat_udp_descriptor*)data;
+	struct inpcb			*inp = (struct inpcb*)cookie;
+	
+	bzero(desc, sizeof(*desc));
+	
+	if (inp->inp_vflag & INP_IPV6)
+	{
+		nstat_ip6_to_sockaddr(&inp->in6p_laddr, inp->inp_lport,
+			&desc->local.v6, sizeof(desc->local));
+		nstat_ip6_to_sockaddr(&inp->in6p_faddr, inp->inp_fport,
+			&desc->remote.v6, sizeof(desc->remote));
+	}
+	else if (inp->inp_vflag & INP_IPV4)
+	{
+		nstat_ip_to_sockaddr(&inp->inp_laddr, inp->inp_lport,
+			&desc->local.v4, sizeof(desc->local));
+		nstat_ip_to_sockaddr(&inp->inp_faddr, inp->inp_fport,
+			&desc->remote.v4, sizeof(desc->remote));
+	}
+	
+	if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_ifp)
+		desc->ifindex = inp->inp_route.ro_rt->rt_ifp->if_index;
+	
+	struct socket *so = inp->inp_socket;
+	if (so)
+	{
+		// TBD - take the socket lock around these to make sure
+		// they're in sync?
+		desc->upid = so->last_upid;
+		desc->pid = so->last_pid;
+		
+		desc->rcvbufsize = so->so_rcv.sb_hiwat;
+		desc->rcvbufused = so->so_rcv.sb_cc;
+		
+		proc_name(desc->pid, desc->pname, sizeof(desc->pname));
+		desc->pname[sizeof(desc->pname) - 1] = 0;
+	}
+	
+	return 0;
+}
+
+static void
+nstat_init_udp_provider(void)
+{
+	bzero(&nstat_udp_provider, sizeof(nstat_udp_provider));
+	nstat_udp_provider.nstat_provider_id = NSTAT_PROVIDER_UDP;
+	nstat_udp_provider.nstat_descriptor_length = sizeof(nstat_udp_descriptor);
+	nstat_udp_provider.nstat_lookup = nstat_udp_lookup;
+	nstat_udp_provider.nstat_gone = nstat_udp_gone;
+	nstat_udp_provider.nstat_counts = nstat_udp_counts;
+	nstat_udp_provider.nstat_watcher_add = nstat_udp_add_watcher;
+	nstat_udp_provider.nstat_watcher_remove = nstat_udp_remove_watcher;
+	nstat_udp_provider.nstat_copy_descriptor = nstat_udp_copy_descriptor;
+	nstat_udp_provider.nstat_release = nstat_udp_release;
+	nstat_udp_provider.next = nstat_providers;
+	nstat_providers = &nstat_udp_provider;
+}
+
+#pragma mark -- Kernel Control Socket --
+
+typedef struct nstat_src
+{
+	struct nstat_src		*next;
+	nstat_src_ref_t			srcref;
+	nstat_provider			*provider;
+	nstat_provider_cookie_t	cookie;
+} nstat_src;
+
+static kern_ctl_ref	nstat_ctlref = NULL;
+static lck_grp_t	*nstat_lck_grp = NULL;
+
+static errno_t	nstat_control_connect(kern_ctl_ref kctl, struct sockaddr_ctl *sac, void **uinfo);
+static errno_t	nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void *uinfo);
+static errno_t	nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags);
+static int		nstat_control_send_description(nstat_control_state *state, nstat_src *src, u_int64_t context);
+static void		nstat_control_cleanup_source(nstat_control_state *state, struct nstat_src *src);
+
+
+static void*
+nstat_idle_check(
+	__unused thread_call_param_t p0,
+	__unused thread_call_param_t p1)
+{
+	lck_mtx_lock(&nstat_mtx);
+	
+	nstat_idle_time = 0ULL;
+	
+	nstat_control_state *control;
+	nstat_src	*dead = NULL;
+	nstat_src	*dead_list = NULL;
+	for (control = nstat_controls; control; control = control->next)
+	{
+		lck_mtx_lock(&control->mtx);
+		nstat_src	**srcpp = &control->srcs;
+		
+		while(*srcpp != NULL)
+		{
+			if ((*srcpp)->provider->nstat_gone((*srcpp)->cookie))
+			{
+				// Pull it off the list
+				dead = *srcpp;
+				*srcpp = (*srcpp)->next;
+				
+				// send a last description
+				nstat_control_send_description(control, dead, 0ULL);
+				
+				// send the source removed notification
+				nstat_msg_src_removed	removed;
+				removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
+				removed.hdr.context = 0;
+				removed.srcref = dead->srcref;
+				errno_t result = ctl_enqueuedata(control->kctl, control->unit, &removed, sizeof(removed), CTL_DATA_EOR);
+				if (result != 0) printf("%s:%d ctl_enqueuedata failed: %d\n", __FUNCTION__, __LINE__, result);
+				
+				// Put this on the list to release later
+				dead->next = dead_list;
+				dead_list = dead;
+			}
+			else
+			{
+				srcpp = &(*srcpp)->next;
+			}
+		}
+		lck_mtx_unlock(&control->mtx);
+	}
+	
+	if (nstat_controls)
+	{
+		clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time);
+		thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time);
+	}
+	
+	lck_mtx_unlock(&nstat_mtx);
+	
+	// Release the sources now that we aren't holding lots of locks
+	while (dead_list)
+	{
+		dead = dead_list;
+		dead_list = dead->next;
+		
+		nstat_control_cleanup_source(NULL, dead);
+	}
+	
+	return NULL;
+}
+
+static void
+nstat_control_register(void)
+{
+	// Create our lock group first
+	lck_grp_attr_t	*grp_attr = lck_grp_attr_alloc_init();
+	lck_grp_attr_setdefault(grp_attr);
+	nstat_lck_grp = lck_grp_alloc_init("network statistics kctl", grp_attr);
+	lck_grp_attr_free(grp_attr);
+	
+	lck_mtx_init(&nstat_mtx, nstat_lck_grp, NULL);
+	
+	// Register the control
+	struct kern_ctl_reg	nstat_control;
+	bzero(&nstat_control, sizeof(nstat_control));	
+	strlcpy(nstat_control.ctl_name, NET_STAT_CONTROL_NAME, sizeof(nstat_control.ctl_name));
+	nstat_control.ctl_connect = nstat_control_connect;
+	nstat_control.ctl_disconnect = nstat_control_disconnect;
+	nstat_control.ctl_send = nstat_control_send;
+	
+	errno_t result = ctl_register(&nstat_control, &nstat_ctlref);
+	if (result != 0)
+		printf("%s:%d ctl_register failed: %d", __FUNCTION__, __LINE__, result);
+}
+
+static void
+nstat_control_cleanup_source(
+	nstat_control_state	*state,
+	struct nstat_src	*src)
+{
+	if (state)
+	{
+		nstat_msg_src_removed	removed;
+		removed.hdr.type = NSTAT_MSG_TYPE_SRC_REMOVED;
+		removed.hdr.context = 0;
+		removed.srcref = src->srcref;
+		errno_t result = ctl_enqueuedata(state->kctl, state->unit, &removed, sizeof(removed), CTL_DATA_EOR);
+		if (result != 0) printf("%s:%d ctl_enqueuedata failed: %d\n", __FUNCTION__, __LINE__, result);
+	}
+	
+	// Cleanup the source if we found it.
+	src->provider->nstat_release(src->cookie);
+	OSFree(src, sizeof(*src), nstat_malloc_tag);
+}
+
+static errno_t
+nstat_control_connect(
+	kern_ctl_ref		kctl,
+	struct sockaddr_ctl	*sac,
+	void				**uinfo)
+{
+	nstat_control_state	*state = OSMalloc(sizeof(*state), nstat_malloc_tag);
+	if (state == NULL) return ENOMEM;
+	
+	bzero(state, sizeof(*state));
+	lck_mtx_init(&state->mtx, nstat_lck_grp, NULL);
+	state->kctl = kctl;
+	state->unit = sac->sc_unit;
+	*uinfo = state;
+	
+	// check if we're super user
+	proc_t	pself = proc_self();
+	state->suser = proc_suser(pself) == 0;
+	proc_rele(pself);
+	
+	lck_mtx_lock(&nstat_mtx);
+	state->next = nstat_controls;
+	nstat_controls = state;
+	
+	if (nstat_idle_time == 0ULL)
+	{
+		clock_interval_to_deadline(60, NSEC_PER_SEC, &nstat_idle_time);
+		thread_call_func_delayed((thread_call_func_t)nstat_idle_check, NULL, nstat_idle_time);
+	}
+	
+	lck_mtx_unlock(&nstat_mtx);
+	
+	return 0;
+}
+
+static errno_t
+nstat_control_disconnect(
+	__unused kern_ctl_ref	kctl,
+	__unused u_int32_t		unit,
+	__unused void			*uinfo)
+{
+	u_int32_t	watching;
+	nstat_control_state	*state = (nstat_control_state*)uinfo;
+	
+	// pull it out of the global list of states
+	lck_mtx_lock(&nstat_mtx);
+	nstat_control_state	**statepp;
+	for (statepp = &nstat_controls; *statepp; statepp = &(*statepp)->next)
+	{
+		if (*statepp == state)
+		{
+			*statepp = state->next;
+			break;
+		}
+	}
+	lck_mtx_unlock(&nstat_mtx);
+	
+	lck_mtx_lock(&state->mtx);
+	// Stop watching for sources
+	nstat_provider	*provider;
+	watching = state->watching;
+	state->watching = 0;
+	for (provider = nstat_providers; provider && watching;  provider = provider->next)
+	{
+		if ((watching & (1 << provider->nstat_provider_id)) != 0)
+		{
+			watching &= ~(1 << provider->nstat_provider_id);
+			provider->nstat_watcher_remove(state);
+		}
+	}
+	
+	// set cleanup flags
+	state->cleanup = TRUE;
+	
+	// Copy out the list of sources
+	nstat_src	*srcs = state->srcs;
+	state->srcs = NULL;
+	lck_mtx_unlock(&state->mtx);
+	
+	while (srcs)
+	{
+		nstat_src	*src;
+		
+		// pull it out of the list
+		src = srcs;
+		srcs = src->next;
+		
+		// clean it up
+		nstat_control_cleanup_source(NULL, src);
+	}
+	
+	OSFree(state, sizeof(*state), nstat_malloc_tag);
+	
+	return 0;
+}
+
+static nstat_src_ref_t
+nstat_control_next_src_ref(
+	nstat_control_state	*state)
+{
+	int i = 0;
+	nstat_src_ref_t	toReturn = NSTAT_SRC_REF_INVALID;
+	
+	for (i = 0; i < 1000 && toReturn == NSTAT_SRC_REF_INVALID; i++)
+	{
+		if (state->next_srcref == NSTAT_SRC_REF_INVALID ||
+			state->next_srcref == NSTAT_SRC_REF_ALL)
+		{
+			state->next_srcref = 1;
+		}
+		
+		nstat_src	*src;
+		for (src = state->srcs; src; src = src->next)
+		{
+			if (src->srcref == state->next_srcref)
+				break;
+		}
+		
+		if (src == NULL) toReturn = state->next_srcref;
+		state->next_srcref++;
+	}
+	
+	return toReturn;
+}
+
+static int
+nstat_control_send_description(
+	nstat_control_state	*state,
+	nstat_src			*src,
+	u_int64_t			context)
+{
+	// Provider doesn't support getting the descriptor? Done.
+	if (src->provider->nstat_descriptor_length == 0 ||
+		src->provider->nstat_copy_descriptor == NULL)
+	{
+		lck_mtx_unlock(&state->mtx);
+		printf("%s:%d - provider doesn't support descriptions\n", __FUNCTION__, __LINE__);
+		return EOPNOTSUPP;
+	}
+	
+	// Allocate storage for the descriptor message
+	mbuf_t			msg;
+	unsigned int	one = 1;
+	u_int32_t		size = offsetof(nstat_msg_src_description, data) + src->provider->nstat_descriptor_length;
+	if (mbuf_allocpacket(MBUF_WAITOK, size, &one, &msg) != 0)
+	{
+		lck_mtx_unlock(&state->mtx);
+		printf("%s:%d - failed to allocate response\n", __FUNCTION__, __LINE__);
+		return ENOMEM;
+	}
+	
+	nstat_msg_src_description	*desc = (nstat_msg_src_description*)mbuf_data(msg);
+	mbuf_setlen(msg, size);
+	mbuf_pkthdr_setlen(msg, mbuf_len(msg));
+	
+	// Query the provider for the provider specific bits
+	errno_t	result = src->provider->nstat_copy_descriptor(src->cookie, desc->data, src->provider->nstat_descriptor_length);
+	
+	if (result != 0)
+	{
+		mbuf_freem(msg);
+		printf("%s:%d - provider failed to copy descriptor %d\n", __FUNCTION__, __LINE__, result);
+		return result;
+	}
+	
+	desc->hdr.context = context;
+	desc->hdr.type = NSTAT_MSG_TYPE_SRC_DESC;
+	desc->srcref = src->srcref;
+	desc->provider = src->provider->nstat_provider_id;
+	
+	result = ctl_enqueuembuf(state->kctl, state->unit, msg, CTL_DATA_EOR);
+	if (result != 0)
+	{
+		printf("%s:%d ctl_enqueuembuf returned error %d\n", __FUNCTION__, __LINE__, result);
+		mbuf_freem(msg);
+	}
+	
+	return result;
+}
+
+static errno_t
+nstat_control_handle_add_request(
+	nstat_control_state	*state,
+	mbuf_t				m)
+{
+	errno_t	result;
+	
+	// Verify the header fits in the first mbuf
+	if (mbuf_len(m) < offsetof(nstat_msg_add_src_req, param))
+	{
+		printf("mbuf_len(m)=%lu, offsetof(nstat_msg_add_src_req*, param)=%lu\n",
+			mbuf_len(m), offsetof(nstat_msg_add_src_req, param));
+		return EINVAL;
+	}
+	
+	// Calculate the length of the parameter field
+	int32_t	paramlength = mbuf_pkthdr_len(m) - offsetof(nstat_msg_add_src_req, param);
+	if (paramlength < 0 || paramlength > 2 * 1024)
+	{
+		printf("invalid paramlength=%d\n", paramlength);
+		return EINVAL;
+	}
+	
+	nstat_provider			*provider;
+	nstat_provider_cookie_t	cookie;
+	nstat_msg_add_src_req	*req = mbuf_data(m);
+	if (mbuf_pkthdr_len(m) > mbuf_len(m))
+	{
+		// parameter is too large, we need to make a contiguous copy
+		void	*data = OSMalloc(paramlength, nstat_malloc_tag);
+		
+		if (!data) return ENOMEM;
+		result = mbuf_copydata(m, offsetof(nstat_msg_add_src_req, param), paramlength, data);
+		if (result == 0)
+			result = nstat_lookup_entry(req->provider, data, paramlength, &provider, &cookie);
+		OSFree(data, paramlength, nstat_malloc_tag);
+	}
+	else
+	{
+		result = nstat_lookup_entry(req->provider, (void*)&req->param, paramlength, &provider, &cookie);
+	}
+	
+	if (result != 0)
+	{
+		printf("nstat_lookup_entry failed: %d\n", result);
+		return result;
+	}
+	
+	result = nstat_control_source_add(req->hdr.context, state, provider, cookie);
+	if (result != 0)
+		provider->nstat_release(cookie);
+	
+	return result;
+}
+
+static int
+nstat_perm_check(
+	__unused nstat_control_state	*state)
+{
+	int allow = 0;
+#if !REQUIRE_ROOT_FOR_STATS
+	allow = 1;
+#else
+	// If the socket was created by a priv process, allow
+	if (state->suser) return 1;
+	
+	// If the current process is priv, allow
+	proc_t	self = proc_self();
+	allow = proc_suser(self) == 0;
+	proc_rele(self);
+	
+	// TBD: check for entitlement, root check is too coarse
+#endif /* REQUIRE_ROOT_FOR_STATS */
+	
+	return allow;
+}
+
+static errno_t
+nstat_control_handle_add_all(
+	nstat_control_state	*state,
+	mbuf_t				m)
+{
+	errno_t	result = 0;
+	
+	if (!nstat_perm_check(state))
+	{
+		return EPERM;
+	}
+	
+	// Verify the header fits in the first mbuf
+	if (mbuf_len(m) < sizeof(nstat_msg_add_all_srcs))
+	{
+		printf("mbuf_len(m)=%lu, sizeof(nstat_msg_add_all_srcs)=%lu\n",
+			mbuf_len(m), sizeof(nstat_msg_add_all_srcs));
+		return EINVAL;
+	}
+	
+	nstat_msg_add_all_srcs	*req = mbuf_data(m);
+	nstat_provider			*provider = nstat_find_provider_by_id(req->provider);
+	
+	if (!provider) return ENOENT;
+	if (provider->nstat_watcher_add == NULL) return ENOTSUP;
+	
+	// Make sure we don't add the provider twice
+	lck_mtx_lock(&state->mtx);
+	if ((state->watching & (1 << provider->nstat_provider_id)) != 0)
+		result = EALREADY;
+	state->watching |= (1 << provider->nstat_provider_id);
+	lck_mtx_unlock(&state->mtx);
+	if (result != 0) return result;
+	
+	result = provider->nstat_watcher_add(state);
+	if (result != 0)
+	{
+		lck_mtx_lock(&state->mtx);
+		state->watching &= ~(1 << provider->nstat_provider_id);
+		lck_mtx_unlock(&state->mtx);
+	}
+	
+	if (result == 0)
+	{
+		// Notify the client
+		nstat_msg_hdr	success;
+		success.context = req->hdr.context;
+		success.type = NSTAT_MSG_TYPE_SUCCESS;
+		success.pad = 0;
+		if (ctl_enqueuedata(state->kctl, state->unit, &success, sizeof(success), CTL_DATA_EOR) != 0)
+			printf("%s:%d - failed to enqueue success message\n", __FUNCTION__, __LINE__);
+	}
+	
+	return result;
+}
+
+static errno_t
+nstat_control_source_add(
+	u_int64_t				context,
+	nstat_control_state		*state,
+	nstat_provider			*provider,
+	nstat_provider_cookie_t	cookie)
+{
+	// Fill out source added message
+	mbuf_t					msg = NULL;
+	unsigned int			one = 1;
+	
+	if (mbuf_allocpacket(MBUF_WAITOK, sizeof(nstat_msg_src_added), &one, &msg) != 0)
+		return ENOMEM;
+	
+	mbuf_setlen(msg, sizeof(nstat_msg_src_added));
+	mbuf_pkthdr_setlen(msg, mbuf_len(msg));
+	nstat_msg_src_added	*add = mbuf_data(msg);
+	bzero(add, sizeof(*add));
+	add->hdr.type = NSTAT_MSG_TYPE_SRC_ADDED;
+	add->hdr.context = context;
+	add->provider = provider->nstat_provider_id;
+	
+	// Allocate storage for the source
+	nstat_src	*src = OSMalloc(sizeof(*src), nstat_malloc_tag);
+	if (src == NULL)
+	{
+		mbuf_freem(msg);
+		return ENOMEM;
+	}
+	
+	// Fill in the source, including picking an unused source ref
+	lck_mtx_lock(&state->mtx);
+	
+	add->srcref = src->srcref = nstat_control_next_src_ref(state);
+	if (state->cleanup || src->srcref == NSTAT_SRC_REF_INVALID)
+	{
+		lck_mtx_unlock(&state->mtx);
+		OSFree(src, sizeof(*src), nstat_malloc_tag);
+		mbuf_freem(msg);
+		return EINVAL;
+	}
+	src->provider = provider;
+	src->cookie = cookie;
+	
+	// send the source added message
+	errno_t result = ctl_enqueuembuf(state->kctl, state->unit, msg, CTL_DATA_EOR);
+	if (result != 0)
+	{
+		lck_mtx_unlock(&state->mtx);
+		printf("%s:%d ctl_enqueuembuf failed: %d\n", __FUNCTION__, __LINE__, result);
+		OSFree(src, sizeof(*src), nstat_malloc_tag);
+		mbuf_freem(msg);
+		return result;
+	}
+	
+	// Put the 	source in the list
+	src->next = state->srcs;
+	state->srcs = src;
+	
+	// send the description message
+	// not useful as the source is often not complete
+//	nstat_control_send_description(state, src, 0ULL);
+	
+	lck_mtx_unlock(&state->mtx);
+	
+	return 0;
+}
+
+static errno_t
+nstat_control_handle_remove_request(
+	nstat_control_state	*state,
+	mbuf_t				m)
+{
+	nstat_src_ref_t			srcref = NSTAT_SRC_REF_INVALID;
+	
+	if (mbuf_copydata(m, offsetof(nstat_msg_rem_src_req, srcref), sizeof(srcref), &srcref) != 0)
+	{
+		printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(nstat_msg_rem_src_req));
+		return EINVAL;
+	}
+	
+	lck_mtx_lock(&state->mtx);
+	
+	// Remove this source as we look for it
+	nstat_src	**nextp;
+	nstat_src	*src = NULL;
+	for (nextp = &state->srcs; *nextp; nextp = &(*nextp)->next)
+	{
+		if ((*nextp)->srcref == srcref)
+		{
+			src = *nextp;
+			*nextp = src->next;
+			break;
+		}
+	}
+	
+	lck_mtx_unlock(&state->mtx);
+	
+	if (src) nstat_control_cleanup_source(state, src);
+	
+	return src ? 0 : ENOENT;
+}
+
+static errno_t
+nstat_control_handle_query_request(
+	nstat_control_state	*state,
+	mbuf_t				m)
+{
+	// TBD: handle this from another thread so we can enqueue a lot of data
+	// As written, if a client requests query all, this function will be 
+	// called from their send of the request message. We will attempt to write
+	// responses and succeed until the buffer fills up. Since the clients thread
+	// is blocked on send, it won't be reading unless the client has two threads
+	// using this socket, one for read and one for write. Two threads probably
+	// won't work with this code anyhow since we don't have proper locking in
+	// place yet.
+	nstat_src				*dead_srcs = NULL;
+	errno_t					result = ENOENT;
+	nstat_msg_query_src_req	req;
+	if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
+	{
+		printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(req));
+		return EINVAL;
+	}
+	
+	lck_mtx_lock(&state->mtx);
+	nstat_src	**srcpp = &state->srcs;
+	while (*srcpp != NULL)
+	{
+		int	gone;
+		gone = 0;
+		
+		if (req.srcref == NSTAT_SRC_REF_ALL ||
+			(*srcpp)->srcref == req.srcref)
+		{
+			nstat_msg_src_counts	counts;
+			counts.hdr.type = NSTAT_MSG_TYPE_SRC_COUNTS;
+			counts.hdr.context = req.hdr.context;
+			counts.srcref = (*srcpp)->srcref;
+			bzero(&counts.counts, sizeof(counts.counts));
+			result = (*srcpp)->provider->nstat_counts((*srcpp)->cookie, &counts.counts, &gone);
+			
+			if (result == 0)
+			{
+				result = ctl_enqueuedata(state->kctl, state->unit, &counts, sizeof(counts), CTL_DATA_EOR);
+				if (result != 0)
+				{
+					printf("%s:%d ctl_enqueuedata failed: %d\n", __FUNCTION__, __LINE__, result);
+				}
+			}
+			else
+			{
+				printf("%s:%d provider->nstat_counts failed: %d\n", __FUNCTION__, __LINE__, result);
+			}
+			
+			if (gone)
+			{
+				// send one last descriptor message so client may see last state
+				nstat_control_send_description(state, *srcpp, 0ULL);
+				
+				// pull src out of the list
+				nstat_src	*src = *srcpp;
+				*srcpp = src->next;
+				
+				src->next = dead_srcs;
+				dead_srcs = src;
+			}
+			
+			if (req.srcref != NSTAT_SRC_REF_ALL)
+				break;
+		}
+		
+		if (!gone)
+			srcpp = &(*srcpp)->next;
+	}
+	lck_mtx_unlock(&state->mtx);
+	
+	while (dead_srcs)
+	{
+		nstat_src	*src;
+		
+		src = dead_srcs;
+		dead_srcs = src->next;
+		
+		// release src and send notification
+		nstat_control_cleanup_source(state, src);
+	}
+	
+	if (req.srcref == NSTAT_SRC_REF_ALL)
+	{
+		nstat_msg_hdr	success;
+		success.context = req.hdr.context;
+		success.type = NSTAT_MSG_TYPE_SUCCESS;
+		success.pad = 0;
+		if (ctl_enqueuedata(state->kctl, state->unit, &success, sizeof(success), CTL_DATA_EOR) != 0)
+			printf("%s:%d - failed to enqueue success message\n", __FUNCTION__, __LINE__);
+		result = 0;
+	}
+	
+	return result;
+}
+
+static errno_t
+nstat_control_handle_get_src_description(
+	nstat_control_state	*state,
+	mbuf_t				m)
+{
+	nstat_msg_get_src_description	req;
+	if (mbuf_copydata(m, 0, sizeof(req), &req) != 0)
+	{
+		printf("%s:%d - invalid length %u, expected %lu\n", __FUNCTION__, __LINE__, (u_int32_t)mbuf_pkthdr_len(m), sizeof(req));
+		return EINVAL;
+	}
+	
+	// Find the source
+	lck_mtx_lock(&state->mtx);
+	nstat_src	*src;
+	for (src = state->srcs; src; src = src->next)
+	{
+		if (src->srcref == req.srcref)
+			break;
+	}
+	
+	// No source? Done.
+	if (!src)
+	{
+		lck_mtx_unlock(&state->mtx);
+		printf("%s:%d - no matching source\n", __FUNCTION__, __LINE__);
+		return ENOENT;
+	}
+	
+	errno_t result = nstat_control_send_description(state, src, req.hdr.context);
+	lck_mtx_unlock(&state->mtx);
+	
+	return result;
+}
+
+static errno_t
+nstat_control_send(
+	kern_ctl_ref	kctl,
+	u_int32_t		unit,
+	__unused void	*uinfo,
+	mbuf_t			m,
+	__unused int	flags)
+{
+	nstat_control_state	*state = (nstat_control_state*)uinfo;
+	struct nstat_msg_hdr	*hdr;
+	struct nstat_msg_hdr	storage;
+	errno_t					result = 0;
+	
+	if (mbuf_pkthdr_len(m) < sizeof(hdr))
+	{
+		// Is this the right thing to do?
+		printf("%s:%d - message too short, was %ld expected %lu\n", __FUNCTION__, __LINE__,
+			mbuf_pkthdr_len(m), sizeof(*hdr));
+		mbuf_freem(m);
+		return EINVAL;
+	}
+	
+	if (mbuf_len(m) >= sizeof(*hdr))
+	{
+		hdr = mbuf_data(m);
+	}
+	else
+	{
+		mbuf_copydata(m, 0, sizeof(storage), &storage);
+		hdr = &storage;
+	}
+	
+	switch (hdr->type)
+	{
+		case NSTAT_MSG_TYPE_ADD_SRC:
+			result = nstat_control_handle_add_request(state, m);
+			break;
+		
+		case NSTAT_MSG_TYPE_ADD_ALL_SRCS:
+			result = nstat_control_handle_add_all(state, m);
+			break;
+		
+		case NSTAT_MSG_TYPE_REM_SRC:
+			result = nstat_control_handle_remove_request(state, m);
+			break;
+		
+		case NSTAT_MSG_TYPE_QUERY_SRC:
+			result = nstat_control_handle_query_request(state, m);
+			break;
+		
+		case NSTAT_MSG_TYPE_GET_SRC_DESC:
+			result = nstat_control_handle_get_src_description(state, m);
+			break;
+		
+		default:
+			printf("%s:%d - unknown message type %d\n", __FUNCTION__, __LINE__, hdr->type);
+			result = EINVAL;
+			break;
+	}
+	
+	if (result != 0)
+	{
+		struct nstat_msg_error	err;
+		
+		err.hdr.type = NSTAT_MSG_TYPE_ERROR;
+		err.hdr.context = hdr->context;
+		err.error = result;
+		
+		result = ctl_enqueuedata(kctl, unit, &err, sizeof(err), CTL_DATA_EOR);
+	}
+	
+	mbuf_freem(m);
+	
+	return result;
+}
diff --git a/bsd/net/ntstat.h b/bsd/net/ntstat.h
new file mode 100644
index 000000000..4bbb3dc1b
--- /dev/null
+++ b/bsd/net/ntstat.h
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef __NTSTAT_H__
+#define __NTSTAT_H__
+#include <netinet/in.h>
+
+#ifdef PRIVATE
+#pragma pack(push, 4)
+#pragma mark -- Common Data Structures --
+
+#define __NSTAT_REVISION__	1
+
+typedef	u_int32_t	nstat_provider_id_t;
+typedef	u_int32_t	nstat_src_ref_t;
+
+typedef struct nstat_counts
+{
+	/* Counters */
+	u_int64_t	nstat_rxpackets	__attribute__((aligned(8)));
+	u_int64_t	nstat_rxbytes	__attribute__((aligned(8)));
+	u_int64_t	nstat_txpackets	__attribute__((aligned(8)));
+	u_int64_t	nstat_txbytes	__attribute__((aligned(8)));
+
+	u_int32_t	nstat_rxduplicatebytes;
+	u_int32_t	nstat_rxoutoforderbytes;
+	u_int32_t	nstat_txretransmit;
+	
+	u_int32_t	nstat_connectattempts;
+	u_int32_t	nstat_connectsuccesses;
+	
+	u_int32_t	nstat_min_rtt;
+	u_int32_t	nstat_avg_rtt;
+	u_int32_t	nstat_var_rtt;
+} nstat_counts;
+
+#pragma mark -- Network Statistics Providers --
+
+enum
+{
+	NSTAT_PROVIDER_ROUTE	= 1
+	,NSTAT_PROVIDER_TCP		= 2
+	,NSTAT_PROVIDER_UDP		= 3
+};
+
+typedef struct nstat_route_add_param
+{
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} dst;
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} mask;
+	u_int32_t	ifindex;
+} nstat_route_add_param;
+
+typedef struct nstat_tcp_add_param
+{
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} local;
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} remote;
+} nstat_tcp_add_param;
+
+typedef struct nstat_tcp_descriptor
+{
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} local;
+	
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} remote;
+	
+	u_int32_t	ifindex;
+	
+	u_int32_t	state;
+	
+	u_int32_t	sndbufsize;
+	u_int32_t	sndbufused;
+	u_int32_t	rcvbufsize;
+	u_int32_t	rcvbufused;
+	u_int32_t	txunacked;
+	u_int32_t	txwindow;
+	u_int32_t	txcwindow;
+	
+	u_int64_t	upid;
+	u_int32_t	pid;
+	char		pname[64];
+} nstat_tcp_descriptor;
+
+typedef struct nstat_tcp_add_param	nstat_udp_add_param;
+
+typedef struct nstat_udp_descriptor
+{
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} local;
+	
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+	} remote;
+	
+	u_int32_t	ifindex;
+	
+	u_int32_t	rcvbufsize;
+	u_int32_t	rcvbufused;
+	
+	u_int64_t	upid;
+	u_int32_t	pid;
+	char		pname[64];
+} nstat_udp_descriptor;
+
+typedef struct nstat_route_descriptor
+{
+	u_int64_t	id;
+	u_int64_t	parent_id;
+	u_int64_t	gateway_id;
+	
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+		struct sockaddr		sa;
+	} dst;
+	
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+		struct sockaddr		sa;
+	} mask;
+	
+	union
+	{
+		struct sockaddr_in	v4;
+		struct sockaddr_in6	v6;
+		struct sockaddr		sa;
+	} gateway;
+	
+	u_int32_t	ifindex;
+	u_int32_t	flags;
+	
+} nstat_route_descriptor;
+
+#pragma mark -- Network Statistics User Client --
+
+#define	NET_STAT_CONTROL_NAME	"com.apple.network.statistics"
+
+enum
+{
+	// generice respnse messages
+	NSTAT_MSG_TYPE_SUCCESS			= 0
+	,NSTAT_MSG_TYPE_ERROR			= 1
+	
+	// Requests
+	,NSTAT_MSG_TYPE_ADD_SRC			= 1001
+	,NSTAT_MSG_TYPE_ADD_ALL_SRCS	= 1002
+	,NSTAT_MSG_TYPE_REM_SRC			= 1003
+	,NSTAT_MSG_TYPE_QUERY_SRC		= 1004
+	,NSTAT_MSG_TYPE_GET_SRC_DESC	= 1005
+	
+	// Responses/Notfications
+	,NSTAT_MSG_TYPE_SRC_ADDED		= 10001
+	,NSTAT_MSG_TYPE_SRC_REMOVED		= 10002
+	,NSTAT_MSG_TYPE_SRC_DESC		= 10003
+	,NSTAT_MSG_TYPE_SRC_COUNTS		= 10004
+};
+
+enum
+{
+	NSTAT_SRC_REF_ALL		= 0xffffffff
+	,NSTAT_SRC_REF_INVALID	= 0
+};
+
+typedef struct nstat_msg_hdr
+{
+	u_int64_t	context;
+	u_int32_t	type;
+	u_int32_t	pad; // unused for now
+} nstat_msg_hdr;
+
+typedef struct nstat_msg_error
+{
+	nstat_msg_hdr	hdr;
+	u_int32_t		error;	// errno error
+} nstat_msg_error;
+
+typedef struct nstat_msg_add_src
+{
+	nstat_msg_hdr		hdr;
+	nstat_provider_id_t	provider;
+	u_int8_t			param[];
+} nstat_msg_add_src_req;
+
+typedef struct nstat_msg_add_all_srcs
+{
+	nstat_msg_hdr		hdr;
+	nstat_provider_id_t	provider;
+} nstat_msg_add_all_srcs;
+
+typedef struct nstat_msg_src_added
+{
+	nstat_msg_hdr		hdr;
+	nstat_provider_id_t	provider;
+	nstat_src_ref_t		srcref;
+} nstat_msg_src_added;
+
+typedef struct nstat_msg_rem_src
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+} nstat_msg_rem_src_req;
+
+typedef struct nstat_msg_get_src_description
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+} nstat_msg_get_src_description;
+
+typedef struct nstat_msg_src_description
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+	nstat_provider_id_t	provider;
+	u_int8_t			data[];
+} nstat_msg_src_description;
+
+typedef struct nstat_msg_query_src
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+} nstat_msg_query_src_req;
+
+typedef struct nstat_msg_src_counts
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+	nstat_counts		counts;
+} nstat_msg_src_counts;
+
+typedef struct nstat_msg_src_removed
+{
+	nstat_msg_hdr		hdr;
+	nstat_src_ref_t		srcref;
+} nstat_msg_src_removed;
+
+#pragma pack(pop)
+
+#endif /* PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/mcache.h>
+
+#pragma mark -- Generic Network Statistics Provider --
+
+typedef	void *	nstat_provider_cookie_t;
+
+#pragma mark -- Route Statistics Gathering Functions --
+struct rtentry;
+
+enum
+{
+	NSTAT_TX_FLAG_RETRANSMIT	= 1
+};
+
+enum
+{
+	NSTAT_RX_FLAG_DUPLICATE 	= 1,
+	NSTAT_RX_FLAG_OUT_OF_ORDER	= 2
+};
+
+// indicates whether or not collection of statistics is enabled
+extern int	nstat_collect;
+
+// Route collection routines
+void nstat_route_connect_attempt(struct rtentry *rte);
+void nstat_route_connect_success(struct rtentry *rte);
+void nstat_route_tx(struct rtentry *rte, u_int32_t packets, u_int32_t bytes, u_int32_t flags);
+void nstat_route_rx(struct rtentry *rte, u_int32_t packets, u_int32_t bytes, u_int32_t flags);
+void nstat_route_rtt(struct rtentry *rte, u_int32_t rtt, u_int32_t rtt_var);
+void nstat_route_detach(struct rtentry *rte);
+
+// watcher support
+struct inpcb;
+void nstat_tcp_new_pcb(struct inpcb *inp);
+void nstat_udp_new_pcb(struct inpcb *inp);
+void nstat_route_new_entry(struct rtentry *rt);
+
+// locked_add_64 uses atomic operations on 32bit so the 64bit
+// value can be properly read. The values are only ever incremented
+// while under the socket lock, so on 64bit we don't actually need
+// atomic operations to increment.
+#if defined(__LP64__)
+#define	locked_add_64(__addr, __count) do { \
+	*(__addr) += (__count); \
+} while (0)
+#else
+#define	locked_add_64(__addr, __count) do { \
+	atomic_add_64((__addr), (__count)); \
+} while (0)
+#endif
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* __NTSTAT_H__ */
diff --git a/bsd/net/pf.c b/bsd/net/pf.c
index 47134bda3..62b39f2ad 100644
--- a/bsd/net/pf.c
+++ b/bsd/net/pf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -379,6 +379,7 @@ pf_lazy_makewritable(struct pf_pdesc *pd, struct mbuf *m, int len)
 		pd->lmw = len;
 		if (len >= 0 && m != pd->mp) {
 			pd->mp = m;
+			pd->pf_mtag = pf_find_mtag(m);
 
 			switch (pd->af) {
 			case AF_INET: {
@@ -2356,18 +2357,42 @@ pf_change_ap(int dir, struct mbuf *m, struct pf_addr *a, u_int16_t *p,
 #endif /* INET */
 #if INET6
 	case AF_INET6:
-		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
-		    ao.addr16[0], an->addr16[0], u),
-		    ao.addr16[1], an->addr16[1], u),
-		    ao.addr16[2], an->addr16[2], u),
-		    ao.addr16[3], an->addr16[3], u),
-		    ao.addr16[4], an->addr16[4], u),
-		    ao.addr16[5], an->addr16[5], u),
-		    ao.addr16[6], an->addr16[6], u),
-		    ao.addr16[7], an->addr16[7], u),
-		    po, pn, u);
+		/*
+		 * If the packet is originated from an ALG on the NAT gateway
+		 * (source address is loopback or local), in which case the
+		 * TCP/UDP checksum field contains the pseudo header checksum
+		 * that's not yet complemented.
+		 */
+		if (dir == PF_OUT && m != NULL &&
+		    (m->m_flags & M_PKTHDR) &&
+		    (m->m_pkthdr.csum_flags & (CSUM_TCPIPV6 | CSUM_UDPIPV6))) {
+			/* Pseudo-header checksum does not include ports */
+			*pc = ~pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(~*pc,
+		    		ao.addr16[0], an->addr16[0], u),
+		    		ao.addr16[1], an->addr16[1], u),
+		    		ao.addr16[2], an->addr16[2], u),
+		    		ao.addr16[3], an->addr16[3], u),
+		    		ao.addr16[4], an->addr16[4], u),
+		    		ao.addr16[5], an->addr16[5], u),
+		    		ao.addr16[6], an->addr16[6], u),
+		    		ao.addr16[7], an->addr16[7], u),
+		    		po, pn, u);
+		} else {
+			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+		    		pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    		ao.addr16[0], an->addr16[0], u),
+		    		ao.addr16[1], an->addr16[1], u),
+		    		ao.addr16[2], an->addr16[2], u),
+		    		ao.addr16[3], an->addr16[3], u),
+		    		ao.addr16[4], an->addr16[4], u),
+		    		ao.addr16[5], an->addr16[5], u),
+		    		ao.addr16[6], an->addr16[6], u),
+		    		ao.addr16[7], an->addr16[7], u),
+		    		po, pn, u);
+		}
 		break;
 #endif /* INET6 */
 	}
@@ -2721,7 +2746,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af,
 		h6->ip6_hlim = IPV6_DEFHLIM;
 
 		bzero(&ro6, sizeof (ro6));
-		ip6_output(m, NULL, &ro6, 0, NULL, NULL, 0);
+		ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL);
 		if (ro6.ro_rt != NULL)
 			rtfree(ro6.ro_rt);
 		break;
@@ -3959,8 +3984,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
 {
 	struct pf_addr		*saddr, *daddr;
 	u_int16_t		 sport, dport;
-	struct inpcbinfo	*pi;
-	struct inpcb		*inp = NULL;
+	struct inpcbinfo	*pi; 
+	int 			inp = 0;
 
 	if (pd == NULL)
 		return (-1);
@@ -4001,10 +4026,10 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
 	switch (pd->af) {
 #if INET
 	case AF_INET:
-		inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, dport,
-		    0, NULL);
+		inp = in_pcblookup_hash_exists(pi, saddr->v4, sport, daddr->v4, dport,
+		    0, &pd->lookup.uid, &pd->lookup.gid, NULL);
 #if INET6
-		if (inp == NULL) {
+		if (inp == 0) {
 			struct in6_addr s6, d6;
 
 			memset(&s6, 0, sizeof (s6));
@@ -4017,25 +4042,26 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
 			memcpy(&d6.s6_addr32[3], &daddr->v4,
 			    sizeof (daddr->v4));
 
-			inp = in6_pcblookup_hash(pi, &s6, sport,
-			    &d6, dport, 0, NULL);
-			if (inp == NULL) {
-				inp = in_pcblookup_hash(pi, saddr->v4, sport,
-				    daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
-				if (inp == NULL) {
-					inp = in6_pcblookup_hash(pi, &s6, sport,
+			inp = in6_pcblookup_hash_exists(pi, &s6, sport,
+			    &d6, dport, 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
+			if (inp == 0) {
+				inp = in_pcblookup_hash_exists(pi, saddr->v4, sport,
+				    daddr->v4, dport, INPLOOKUP_WILDCARD, &pd->lookup.uid, &pd->lookup.gid, NULL);
+				if (inp == 0) {
+					inp = in6_pcblookup_hash_exists(pi, &s6, sport,
 					    &d6, dport, INPLOOKUP_WILDCARD,
-					    NULL);
-					if (inp == NULL)
+					    &pd->lookup.uid, &pd->lookup.gid, NULL);
+					if (inp == 0)
 						return (-1);
 				}
 			}
 		}
 #else
-		if (inp == NULL) {
-			inp = in_pcblookup_hash(pi, saddr->v4, sport,
-			    daddr->v4, dport, INPLOOKUP_WILDCARD, NULL);
-			if (inp == NULL)
+		if (inp == 0) {
+			inp = in_pcblookup_hash_exists(pi, saddr->v4, sport,
+			    daddr->v4, dport, INPLOOKUP_WILDCARD, 
+			    &pd->lookup.uid, &pd->lookup.gid, NULL);
+			if (inp == 0)
 				return (-1);
 		}
 #endif /* !INET6 */
@@ -4043,24 +4069,22 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd)
 #endif /* INET */
 #if INET6
 	case AF_INET6:
-		inp = in6_pcblookup_hash(pi, &saddr->v6, sport, &daddr->v6,
-		    dport, 0, NULL);
-		if (inp == NULL) {
-			inp = in6_pcblookup_hash(pi, &saddr->v6, sport,
-			    &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL);
-			if (inp == NULL)
+		inp = in6_pcblookup_hash_exists(pi, &saddr->v6, sport, &daddr->v6,
+		    dport, 0, &pd->lookup.uid, &pd->lookup.gid, NULL);
+		if (inp == 0) {
+			inp = in6_pcblookup_hash_exists(pi, &saddr->v6, sport,
+			    &daddr->v6, dport, INPLOOKUP_WILDCARD,
+			    &pd->lookup.uid, &pd->lookup.gid, NULL);
+			if (inp == 0)
 				return (-1);
 		}
 		break;
 #endif /* INET6 */
-
+                            
 	default:
 		return (-1);
 	}
 
-	if (inp != NULL)
-		in_pcb_checkstate(inp, WNT_RELEASE, 0);
-
 	return (1);
 }
 
@@ -8162,10 +8186,12 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 		}
 
 		ifp = ro->ro_rt->rt_ifp;
+		RT_LOCK(ro->ro_rt);
 		ro->ro_rt->rt_use++;
 
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = satosin(ro->ro_rt->rt_gateway);
+		RT_UNLOCK(ro->ro_rt);
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
@@ -8277,7 +8303,14 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 	}
 
 	m1 = m0;
+
+	/* PR-8933605: send ip_len,ip_off to ip_fragment in host byte order */
+#if BYTE_ORDER != BIG_ENDIAN		
+	NTOHS(ip->ip_off);
+	NTOHS(ip->ip_len);
+#endif
 	error = ip_fragment(m0, ifp, ifp->if_mtu, sw_csum);
+	
 	if (error) {
 		m0 = NULL;
 		goto bad;
@@ -8365,7 +8398,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 		if ((pf_mtag = pf_get_mtag(m0)) == NULL)
 			goto bad;
 		pf_mtag->flags |= PF_TAG_GENERATED;
-		ip6_output(m0, NULL, NULL, 0, NULL, NULL, 0);
+		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		return;
 	}
 
@@ -8410,7 +8443,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
 	if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
 		dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) {
-		error = nd6_output(ifp, ifp, m0, dst, NULL, 0);
+		error = nd6_output(ifp, ifp, m0, dst, NULL);
 	} else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
 		if (r->rt != PF_DUPTO)
@@ -9536,6 +9569,7 @@ pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff,
 	pp->pool_zone = zinit(size, 1024 * size, PAGE_SIZE, wchan);
 	if (pp->pool_zone != NULL) {
 		zone_change(pp->pool_zone, Z_EXPAND, TRUE);
+		zone_change(pp->pool_zone, Z_CALLERACCT, FALSE);
 		pp->pool_hiwat = pp->pool_limit = (unsigned int)-1;
 		pp->pool_name = wchan;
 	}
@@ -9622,8 +9656,8 @@ pf_get_mtag(struct mbuf *m)
 
 	if ((mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF,
 	    NULL)) == NULL) {
-		mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF,
-		    sizeof (struct pf_mtag), M_NOWAIT);
+		mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF,
+		    sizeof (struct pf_mtag), M_NOWAIT, m);
 		if (mtag == NULL)
 			return (NULL);
 		bzero(mtag + 1, sizeof (struct pf_mtag));
diff --git a/bsd/net/pf_if.c b/bsd/net/pf_if.c
index 06873fce6..4c05205ba 100644
--- a/bsd/net/pf_if.c
+++ b/bsd/net/pf_if.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -436,28 +436,45 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
 		return;
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
-		if (ia->ifa_addr == NULL)
+		IFA_LOCK(ia);
+		if (ia->ifa_addr == NULL) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		af = ia->ifa_addr->sa_family;
-		if (af != AF_INET && af != AF_INET6)
+		if (af != AF_INET && af != AF_INET6) {
+			IFA_UNLOCK(ia);
 			continue;
-		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
+		}
+		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		if ((flags & PFI_AFLAG_BROADCAST) &&
-		    !(ifp->if_flags & IFF_BROADCAST))
+		    !(ifp->if_flags & IFF_BROADCAST)) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		if ((flags & PFI_AFLAG_PEER) &&
-		    !(ifp->if_flags & IFF_POINTOPOINT))
+		    !(ifp->if_flags & IFF_POINTOPOINT)) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
 		    IN6_IS_ADDR_LINKLOCAL(
-		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
+		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) {
+			IFA_UNLOCK(ia);
 			continue;
+		}
 		if (flags & PFI_AFLAG_NOALIAS) {
-			if (af == AF_INET && got4)
+			if (af == AF_INET && got4) {
+				IFA_UNLOCK(ia);
 				continue;
-			if (af == AF_INET6 && got6)
+			}
+			if (af == AF_INET6 && got6) {
+				IFA_UNLOCK(ia);
 				continue;
+			}
 		}
 		if (af == AF_INET)
 			got4 = 1;
@@ -480,6 +497,7 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags)
 			pfi_address_add(ia->ifa_dstaddr, af, net2);
 		else
 			pfi_address_add(ia->ifa_addr, af, net2);
+		IFA_UNLOCK(ia);
 	}
 	ifnet_lock_done(ifp);
 }
diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c
index 9165abfd4..25763d8f5 100644
--- a/bsd/net/pf_ioctl.c
+++ b/bsd/net/pf_ioctl.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -79,6 +79,7 @@
 #include <sys/kauth.h>
 #include <sys/conf.h>
 #include <sys/mcache.h>
+#include <sys/queue.h>
 
 #include <mach/vm_param.h>
 
@@ -134,6 +135,8 @@ static int pf_rollback_altq(u_int32_t);
 static int pf_commit_altq(u_int32_t);
 static int pf_enable_altq(struct pf_altq *);
 static int pf_disable_altq(struct pf_altq *);
+static void pf_altq_copyin(struct pf_altq *, struct pf_altq *);
+static void pf_altq_copyout(struct pf_altq *, struct pf_altq *);
 #endif /* ALTQ */
 static int pf_begin_rules(u_int32_t *, int, const char *);
 static int pf_rollback_rules(u_int32_t, int, char *);
@@ -145,10 +148,14 @@ static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *, u_int8_t);
 static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
 #endif
 static int pf_commit_rules(u_int32_t, int, char *);
+static void pf_rule_copyin(struct pf_rule *, struct pf_rule *, struct proc *);
+static void pf_rule_copyout(struct pf_rule *, struct pf_rule *);
 static void pf_state_export(struct pfsync_state *, struct pf_state_key *,
     struct pf_state *);
 static void pf_state_import(struct pfsync_state *, struct pf_state_key *,
     struct pf_state *);
+static void pf_pooladdr_copyin(struct pf_pooladdr *, struct pf_pooladdr *);
+static void pf_pooladdr_copyout(struct pf_pooladdr *, struct pf_pooladdr *);
 
 #define	PF_CDEV_MAJOR	(-1)
 
@@ -180,7 +187,16 @@ static void pf_detach_hooks(void);
  * and used in pf_af_hook() for performance optimization, such that packets
  * will enter pf_test() or pf_test6() only when PF is running.
  */
-static int pf_is_enabled;
+int pf_is_enabled = 0;
+
+/*
+ * These are the pf enabled reference counting variables
+ */
+static u_int64_t pf_enabled_ref_count;
+static u_int32_t nr_tokens = 0;
+
+SLIST_HEAD(list_head, pfioc_kernel_token);
+static struct list_head token_list_head;
 
 struct pf_rule		 pf_default_rule;
 #if ALTQ
@@ -230,6 +246,78 @@ struct thread *pf_purge_thread;
 
 extern void pfi_kifaddr_update(void *);
 
+/* pf enable ref-counting helper functions */
+static u_int64_t                generate_token(void);
+static int                      remove_token(struct pfioc_remove_token *);
+static void                     invalidate_all_tokens(void);
+
+static u_int64_t
+generate_token(void)
+{
+	u_int64_t token_value;
+	struct pfioc_kernel_token *new_token;
+
+	new_token = _MALLOC(sizeof (struct pfioc_kernel_token), M_TEMP, M_WAITOK|M_ZERO);
+
+	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+	if (new_token == NULL) {
+		/* malloc failed! bail! */
+		printf("%s: unable to allocate pf token structure!", __func__);
+		return 0;
+	}
+
+	token_value = (u_int64_t)(uintptr_t)new_token;
+
+	new_token->token.token_value = token_value;
+	new_token->token.pid = proc_pid(current_proc());
+	proc_name(new_token->token.pid, new_token->token.proc_name,
+		sizeof (new_token->token.proc_name));
+	new_token->token.timestamp = pf_calendar_time_second();
+
+	SLIST_INSERT_HEAD(&token_list_head, new_token, next);
+	nr_tokens++;
+
+	return token_value;
+}
+
+static int
+remove_token(struct pfioc_remove_token *tok)
+{
+	struct pfioc_kernel_token *entry, *tmp;
+
+	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+	SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
+		if (tok->token_value == entry->token.token_value) {
+			SLIST_REMOVE(&token_list_head, entry, pfioc_kernel_token, next);
+			_FREE(entry, M_TEMP);
+			nr_tokens--;
+			return 0;    /* success */
+		}
+	}
+
+	printf("pf : remove failure\n");
+	return ESRCH;    /* failure */
+}
+
+static void
+invalidate_all_tokens(void)
+{
+	struct pfioc_kernel_token *entry, *tmp;
+
+	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+	SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
+		SLIST_REMOVE(&token_list_head, entry, pfioc_kernel_token, next);
+		_FREE(entry, M_TEMP);
+	}
+
+	nr_tokens = 0;
+
+	return;
+}
+
 void
 pfinit(void)
 {
@@ -859,6 +947,27 @@ pf_disable_altq(struct pf_altq *altq)
 
 	return (error);
 }
+
+static void
+pf_altq_copyin(struct pf_altq *src, struct pf_altq *dst)
+{
+	bcopy(src, dst, sizeof (struct pf_altq));
+
+	dst->ifname[sizeof (dst->ifname) - 1] = '\0';
+	dst->qname[sizeof (dst->qname) - 1] = '\0';
+	dst->parent[sizeof (dst->parent) - 1] = '\0';
+	dst->altq_disc = NULL;
+	TAILQ_INIT(&dst->entries);
+}
+
+static void
+pf_altq_copyout(struct pf_altq *src, struct pf_altq *dst)
+{
+	bcopy(src, dst, sizeof (struct pf_altq));
+
+	dst->altq_disc = NULL;
+	TAILQ_INIT(&dst->entries);
+}
 #endif /* ALTQ */
 
 static int
@@ -951,7 +1060,7 @@ pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
 		PF_MD5_UPD(pfr, xport.range.port[0]);
 		PF_MD5_UPD(pfr, xport.range.port[1]);
 		PF_MD5_UPD(pfr, xport.range.op);
-	    break;
+		break;
 
 	default:
 		break;
@@ -1067,6 +1176,53 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
 	return (0);
 }
 
+static void
+pf_rule_copyin(struct pf_rule *src, struct pf_rule *dst, struct proc *p)
+{
+	bcopy(src, dst, sizeof (struct pf_rule));
+
+	dst->label[sizeof (dst->label) - 1] = '\0';
+	dst->ifname[sizeof (dst->ifname) - 1] = '\0';
+	dst->qname[sizeof (dst->qname) - 1] = '\0';
+	dst->pqname[sizeof (dst->pqname) - 1] = '\0';
+	dst->tagname[sizeof (dst->tagname) - 1] = '\0';
+	dst->match_tagname[sizeof (dst->match_tagname) - 1] = '\0';
+	dst->overload_tblname[sizeof (dst->overload_tblname) - 1] = '\0';
+
+	dst->cuid = kauth_cred_getuid(p->p_ucred);
+	dst->cpid = p->p_pid;
+
+	dst->anchor = NULL;
+	dst->kif = NULL;
+	dst->overload_tbl = NULL;
+
+	TAILQ_INIT(&dst->rpool.list);
+	dst->rpool.cur = NULL;
+
+	/* initialize refcounting */
+	dst->states = 0;
+	dst->src_nodes = 0;
+
+	dst->entries.tqe_prev = NULL;
+	dst->entries.tqe_next = NULL;
+}
+
+static void
+pf_rule_copyout(struct pf_rule *src, struct pf_rule *dst)
+{
+	bcopy(src, dst, sizeof (struct pf_rule));
+
+	dst->anchor = NULL;
+	dst->kif = NULL;
+	dst->overload_tbl = NULL;
+
+	TAILQ_INIT(&dst->rpool.list);
+	dst->rpool.cur = NULL;
+
+	dst->entries.tqe_prev = NULL;
+	dst->entries.tqe_next = NULL;
+}
+
 static void
 pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk,
     struct pf_state *s)
@@ -1176,6 +1332,27 @@ pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk,
 	s->bytes[0] = s->bytes[1] = 0;
 }
 
+static void
+pf_pooladdr_copyin(struct pf_pooladdr *src, struct pf_pooladdr *dst)
+{
+	bcopy(src, dst, sizeof (struct pf_pooladdr));
+
+	dst->entries.tqe_prev = NULL;
+	dst->entries.tqe_next = NULL;
+	dst->ifname[sizeof (dst->ifname) - 1] = '\0';
+	dst->kif = NULL;
+}
+
+static void
+pf_pooladdr_copyout(struct pf_pooladdr *src, struct pf_pooladdr *dst)
+{
+	bcopy(src, dst, sizeof (struct pf_pooladdr));
+
+	dst->entries.tqe_prev = NULL;
+	dst->entries.tqe_next = NULL;
+	dst->kif = NULL;
+}
+
 static int
 pf_setup_pfsync_matching(struct pf_ruleset *rs)
 {
@@ -1216,6 +1393,38 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs)
 	return (0);
 }
 
+static void
+pf_start(void)
+{
+	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+	VERIFY(pf_is_enabled == 0);
+
+	pf_is_enabled = 1;
+	pf_status.running = 1;
+	pf_status.since = pf_calendar_time_second();
+	if (pf_status.stateid == 0) {
+		pf_status.stateid = pf_time_second();
+		pf_status.stateid = pf_status.stateid << 32;
+	}
+	wakeup(pf_purge_thread_fn);
+	DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
+}
+
+static void
+pf_stop(void)
+{
+	lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED);
+
+	VERIFY(pf_is_enabled);
+
+	pf_status.running = 0;
+	pf_is_enabled = 0;
+	pf_status.since = pf_calendar_time_second();
+	wakeup(pf_purge_thread_fn);
+	DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+}
+
 static int
 pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 {
@@ -1282,7 +1491,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 	if (!(flags & FWRITE))
 		switch (cmd) {
 		case DIOCSTART:
+		case DIOCSTARTREF:
 		case DIOCSTOP:
+		case DIOCSTOPREF:
+		case DIOCGETSTARTERS:
 		case DIOCGETRULES:
 		case DIOCGETADDRS:
 		case DIOCGETADDR:
@@ -1316,7 +1528,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		case DIOCRSETADDRS:
 		case DIOCRSETTFLAGS:
 			if (((struct pfioc_table *)addr)->pfrio_flags &
-			    PFR_FLAG_DUMMY) {
+			PFR_FLAG_DUMMY) {
 				flags |= FWRITE; /* need write lock for dummy */
 				break; /* dummy operation ok */
 			}
@@ -1341,20 +1553,41 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 
 	case DIOCSTART:
 		if (pf_status.running) {
+			/*
+			 * Increment the reference for a simple -e enable, so
+			 * that even if other processes drop their references,
+			 * pf will still be available to processes that turned
+			 * it on without taking a reference
+			 */
+			if (nr_tokens == pf_enabled_ref_count) {
+				pf_enabled_ref_count++;
+				VERIFY(pf_enabled_ref_count != 0);
+			}
 			error = EEXIST;
 		} else if (pf_purge_thread == NULL) {
 			error = ENOMEM;
 		} else {
-			pf_is_enabled = 1;
-			pf_status.running = 1;
-			pf_status.since = pf_calendar_time_second();
-			if (pf_status.stateid == 0) {
-				pf_status.stateid = pf_time_second();
-				pf_status.stateid = pf_status.stateid << 32;
+			pf_start();
+			pf_enabled_ref_count++;
+			VERIFY(pf_enabled_ref_count != 0);
+		}
+		break;
+
+	case DIOCSTARTREF:    /* returns a token */
+		if (pf_purge_thread == NULL) {
+			error = ENOMEM;
+		} else {
+			if ((*(u_int64_t *)addr = generate_token()) != 0) {
+				if (pf_is_enabled == 0) {
+					pf_start();
+				}
+				pf_enabled_ref_count++;
+				VERIFY(pf_enabled_ref_count != 0);
+			} else {
+				error = ENOMEM;
+				DPFPRINTF(PF_DEBUG_URGENT,
+					("pf: unable to generate token\n"));
 			}
-			mbuf_growth_aggressive();
-			wakeup(pf_purge_thread_fn);
-			DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
 		}
 		break;
 
@@ -1362,23 +1595,102 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		if (!pf_status.running) {
 			error = ENOENT;
 		} else {
-			mbuf_growth_normal();
-			pf_status.running = 0;
-			pf_is_enabled = 0;
-			pf_status.since = pf_calendar_time_second();
-			wakeup(pf_purge_thread_fn);
-			DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
+			pf_stop();
+			pf_enabled_ref_count = 0;
+			invalidate_all_tokens();
 		}
 		break;
 
+	case DIOCSTOPREF:
+		if (!pf_status.running) {
+			error = ENOENT;
+		} else {
+			if ((error = remove_token(
+					(struct pfioc_remove_token*)addr))==0) {
+				VERIFY(pf_enabled_ref_count != 0);
+				pf_enabled_ref_count--;
+				// return currently held references
+				((struct pfioc_remove_token *)addr)->refcount
+					= pf_enabled_ref_count;
+				DPFPRINTF(PF_DEBUG_MISC,
+					("pf: enabled refcount decremented\n"));
+			} else {
+				error = EINVAL;
+				DPFPRINTF(PF_DEBUG_URGENT,
+					("pf: token mismatch\n"));
+				break;
+			}
+
+			if (pf_enabled_ref_count  == 0)
+				pf_stop();
+		}
+		break;
+
+	case DIOCGETSTARTERS: {
+		struct pfioc_tokens		*g_token = (struct pfioc_tokens *)addr;
+		struct pfioc_token		*tokens;
+		struct pfioc_kernel_token	*entry, *tmp;
+		user_addr_t			token_buf;
+		int				g_token_size_copy;
+		char				*ptr;
+
+		if (nr_tokens == 0) {
+			error = ENOENT;
+			break;
+		}
+
+		g_token_size_copy = g_token->size;
+
+		if (g_token->size == 0) {
+			g_token->size = sizeof (struct pfioc_token) * nr_tokens;
+			break;
+		}
+
+		token_buf = PF_USER_ADDR(addr, pfioc_tokens, pgt_buf);
+		tokens = _MALLOC(sizeof(struct pfioc_token) * nr_tokens,
+			M_TEMP, M_WAITOK);
+
+		if (tokens == NULL) {
+			error = ENOMEM;
+			break;
+		}
+
+		ptr = (void *)tokens;
+		SLIST_FOREACH_SAFE(entry, &token_list_head, next, tmp) {
+			if ((unsigned)g_token_size_copy 
+				< sizeof(struct pfioc_token))
+			break;    /* no more buffer space left */
+
+			((struct pfioc_token *)(ptr))->token_value = entry->token.token_value;
+			((struct pfioc_token *)(ptr))->timestamp = entry->token.timestamp;
+			((struct pfioc_token *)(ptr))->pid = entry->token.pid;
+			memcpy(((struct pfioc_token *)(ptr))->proc_name, entry->token.proc_name, 
+				PFTOK_PROCNAME_LEN);
+			ptr += sizeof(struct pfioc_token);
+
+			g_token_size_copy -= sizeof(struct pfioc_token);
+		}
+
+		if (g_token_size_copy < g_token->size) {
+			error = copyout(tokens, token_buf, 
+				g_token->size - g_token_size_copy);
+		}
+
+		g_token->size -= g_token_size_copy;
+		_FREE(tokens, M_TEMP);
+
+		break;
+		}
+
 	case DIOCADDRULE: {
 		struct pfioc_rule	*pr = (struct pfioc_rule *)addr;
 		struct pf_ruleset	*ruleset;
 		struct pf_rule		*rule, *tail;
 		struct pf_pooladdr	*apa;
-		int			 rs_num;
+		int			rs_num;
 
-		pr->anchor[sizeof (pr->anchor) - 1] = 0;
+		pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+		pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
@@ -1406,16 +1718,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENOMEM;
 			break;
 		}
-		bcopy(&pr->rule, rule, sizeof (struct pf_rule));
-		rule->cuid = kauth_cred_getuid(p->p_ucred);
-		rule->cpid = p->p_pid;
-		rule->anchor = NULL;
-		rule->kif = NULL;
-		TAILQ_INIT(&rule->rpool.list);
-		/* initialize refcounting */
-		rule->states = 0;
-		rule->src_nodes = 0;
-		rule->entries.tqe_prev = NULL;
+		pf_rule_copyin(&pr->rule, rule, p);
 #if !INET
 		if (rule->af == AF_INET) {
 			pool_put(&pf_rule_pl, rule);
@@ -1526,7 +1829,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pf_rule		*tail;
 		int			 rs_num;
 
-		pr->anchor[sizeof (pr->anchor) - 1] = 0;
+		pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+		pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
@@ -1553,7 +1857,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pf_rule		*rule;
 		int			 rs_num, i;
 
-		pr->anchor[sizeof (pr->anchor) - 1] = 0;
+		pr->anchor[sizeof (pr->anchor) - 1] = '\0';
+		pr->anchor_call[sizeof (pr->anchor_call) - 1] = '\0';
 		ruleset = pf_find_ruleset(pr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
@@ -1575,7 +1880,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = EBUSY;
 			break;
 		}
-		bcopy(rule, &pr->rule, sizeof (struct pf_rule));
+		pf_rule_copyout(rule, &pr->rule);
 		if (pf_anchor_copyout(ruleset, rule, pr)) {
 			error = EBUSY;
 			break;
@@ -1620,6 +1925,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = EINVAL;
 			break;
 		}
+		pcr->anchor[sizeof (pcr->anchor) - 1] = '\0';
+		pcr->anchor_call[sizeof (pcr->anchor_call) - 1] = '\0';
 		ruleset = pf_find_ruleset(pcr->anchor);
 		if (ruleset == NULL) {
 			error = EINVAL;
@@ -1652,13 +1959,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = ENOMEM;
 				break;
 			}
-			bcopy(&pcr->rule, newrule, sizeof (struct pf_rule));
-			newrule->cuid = kauth_cred_getuid(p->p_ucred);
-			newrule->cpid = p->p_pid;
-			TAILQ_INIT(&newrule->rpool.list);
-			/* initialize refcounting */
-			newrule->states = 0;
-			newrule->entries.tqe_prev = NULL;
+			pf_rule_copyin(&pcr->rule, newrule, p);
 #if !INET
 			if (newrule->af == AF_INET) {
 				pool_put(&pf_rule_pl, newrule);
@@ -1816,6 +2117,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr;
 		int			 killed = 0;
 
+		psk->psk_ifname[sizeof (psk->psk_ifname) - 1] = '\0';
 		for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) {
 			nexts = RB_NEXT(pf_state_tree_id, &tree_id, s);
 
@@ -2268,7 +2570,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENOMEM;
 			break;
 		}
-		bcopy(&pa->altq, altq, sizeof (struct pf_altq));
+		pf_altq_copyin(&pa->altq, altq);
 
 		/*
 		 * if this is for a queue, find the discipline and
@@ -2297,7 +2599,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		}
 
 		TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries);
-		bcopy(altq, &pa->altq, sizeof (struct pf_altq));
+		pf_altq_copyout(altq, &pa->altq);
 		break;
 	}
 
@@ -2331,7 +2633,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = EBUSY;
 			break;
 		}
-		bcopy(altq, &pa->altq, sizeof (struct pf_altq));
+		pf_altq_copyout(altq, &pa->altq);
 		break;
 	}
 
@@ -2381,6 +2683,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 	case DIOCADDADDR: {
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 
+		pp->anchor[sizeof (pp->anchor) - 1] = '\0';
 		if (pp->ticket != ticket_pabuf) {
 			error = EBUSY;
 			break;
@@ -2408,7 +2711,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENOMEM;
 			break;
 		}
-		bcopy(&pp->addr, pa, sizeof (struct pf_pooladdr));
+		pf_pooladdr_copyin(&pp->addr, pa);
 		if (pa->ifname[0]) {
 			pa->kif = pfi_kif_get(pa->ifname);
 			if (pa->kif == NULL) {
@@ -2433,6 +2736,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 
 		pp->nr = 0;
+		pp->anchor[sizeof (pp->anchor) - 1] = '\0';
 		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
 		    pp->r_num, 0, 1, 0);
 		if (pool == NULL) {
@@ -2448,6 +2752,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pfioc_pooladdr	*pp = (struct pfioc_pooladdr *)addr;
 		u_int32_t		 nr = 0;
 
+		pp->anchor[sizeof (pp->anchor) - 1] = '\0';
 		pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action,
 		    pp->r_num, 0, 1, 1);
 		if (pool == NULL) {
@@ -2463,7 +2768,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = EBUSY;
 			break;
 		}
-		bcopy(pa, &pp->addr, sizeof (struct pf_pooladdr));
+		pf_pooladdr_copyout(pa, &pp->addr);
 		pfi_dynaddr_copyout(&pp->addr.addr);
 		pf_tbladdr_copyout(&pp->addr.addr);
 		pf_rtlabel_copyout(&pp->addr.addr);
@@ -2487,6 +2792,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			break;
 		}
 
+		pca->anchor[sizeof (pca->anchor) - 1] = '\0';
 		ruleset = pf_find_ruleset(pca->anchor);
 		if (ruleset == NULL) {
 			error = EBUSY;
@@ -2504,7 +2810,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = ENOMEM;
 				break;
 			}
-			bcopy(&pca->addr, newpa, sizeof (struct pf_pooladdr));
+			pf_pooladdr_copyin(&pca->addr, newpa);
 #if !INET
 			if (pca->af == AF_INET) {
 				pool_put(&pf_pooladdr_pl, newpa);
@@ -2585,7 +2891,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pf_ruleset	*ruleset;
 		struct pf_anchor	*anchor;
 
-		pr->path[sizeof (pr->path) - 1] = 0;
+		pr->path[sizeof (pr->path) - 1] = '\0';
+		pr->name[sizeof (pr->name) - 1] = '\0';
 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
 			error = EINVAL;
 			break;
@@ -2610,7 +2917,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		struct pf_anchor	*anchor;
 		u_int32_t		 nr = 0;
 
-		pr->path[sizeof (pr->path) - 1] = 0;
+		pr->path[sizeof (pr->path) - 1] = '\0';
 		if ((ruleset = pf_find_ruleset(pr->path)) == NULL) {
 			error = EINVAL;
 			break;
@@ -2645,6 +2952,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2684,6 +2992,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_get_tables(&io->pfrio_table, buf,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2697,6 +3006,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_get_tstats(&io->pfrio_table, buf,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2736,6 +3046,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
 		    io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2749,6 +3060,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_add_addrs(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
@@ -2763,6 +3075,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_del_addrs(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
@@ -2777,6 +3090,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_set_addrs(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
 		    &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
@@ -2792,6 +3106,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_get_addrs(&io->pfrio_table, buf,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2805,6 +3120,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_get_astats(&io->pfrio_table, buf,
 		    &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
 		break;
@@ -2818,6 +3134,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_clr_astats(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
@@ -2832,6 +3149,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_tst_addrs(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
 		    PFR_FLAG_USERIOCTL);
@@ -2846,6 +3164,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 			error = ENODEV;
 			break;
 		}
+		pfr_table_copyin_cleanup(&io->pfrio_table);
 		error = pfr_ina_define(&io->pfrio_table, buf,
 		    io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
 		    io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
@@ -2885,6 +3204,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = EFAULT;
 				goto fail;
 			}
+			ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
 			case PF_RULESET_ALTQ:
 #if ALTQ
@@ -2954,6 +3274,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = EFAULT;
 				goto fail;
 			}
+			ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
 			case PF_RULESET_ALTQ:
 #if ALTQ
@@ -3019,6 +3340,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = EFAULT;
 				goto fail;
 			}
+			ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
 			case PF_RULESET_ALTQ:
 #if ALTQ
@@ -3077,6 +3399,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				error = EFAULT;
 				goto fail;
 			}
+			ioe->anchor[sizeof (ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
 			case PF_RULESET_ALTQ:
 #if ALTQ
@@ -3155,6 +3478,10 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 				    n->conn_rate.count * diff /
 				    n->conn_rate.seconds;
 
+			_RB_PARENT(pstore, entry) = NULL;
+			RB_LEFT(pstore, entry) = RB_RIGHT(pstore, entry) = NULL;
+			pstore->kif = NULL;
+
 			error = copyout(pstore, buf, sizeof (*pstore));
 			if (error) {
 				_FREE(pstore, M_TEMP);
@@ -3387,9 +3714,11 @@ pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input)
 	}
 #if BYTE_ORDER != BIG_ENDIAN
 	else {
-		ip = mtod(*mp, struct ip *);
-		NTOHS(ip->ip_len);
-		NTOHS(ip->ip_off);
+		if (*mp != NULL) {
+			ip = mtod(*mp, struct ip *);
+			NTOHS(ip->ip_len);
+			NTOHS(ip->ip_off);
+		}
 	}
 #endif
 	return (error);
@@ -3402,10 +3731,6 @@ pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input)
 {
 	int error = 0;
 
-#if 0
-	/*
-	 * TODO: once we support IPv6 hardware checksum offload
-	 */
 	/*
 	 * If the packet is outbound, is originated locally, is flagged for
 	 * delayed UDP/TCP checksum calculation, and is about to be processed
@@ -3414,16 +3739,15 @@ pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input)
 	 * it properly.
 	 */
 	if (!input && (*mp)->m_pkthdr.rcvif == NULL) {
-		static const int mask = CSUM_DELAY_DATA;
+		static const int mask = CSUM_DELAY_IPV6_DATA;
 		const int flags = (*mp)->m_pkthdr.csum_flags &
 		    ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
 
 		if (flags & mask) {
-			in6_delayed_cksum(*mp);
+			in6_delayed_cksum(*mp, sizeof(struct ip6_hdr));
 			(*mp)->m_pkthdr.csum_flags &= ~mask;
 		}
 	}
-#endif
 
 	if (pf_test6(input ? PF_IN : PF_OUT, ifp, mp, NULL) != PF_PASS) {
 		if (*mp != NULL) {
@@ -3449,7 +3773,8 @@ pf_ifaddr_hook(struct ifnet *ifp, unsigned long cmd)
 	case SIOCAIFADDR:
 	case SIOCDIFADDR:
 #if INET6
-	case SIOCAIFADDR_IN6:
+	case SIOCAIFADDR_IN6_32:
+	case SIOCAIFADDR_IN6_64:
 	case SIOCDIFADDR_IN6:
 #endif /* INET6 */
 		if (ifp->if_pf_kif != NULL)
diff --git a/bsd/net/pf_osfp.c b/bsd/net/pf_osfp.c
index b7e579d5c..89d71e889 100644
--- a/bsd/net/pf_osfp.c
+++ b/bsd/net/pf_osfp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -312,7 +312,7 @@ int
 pf_osfp_add(struct pf_osfp_ioctl *fpioc)
 {
 	struct pf_os_fingerprint *fp, fpadd;
-	struct pf_osfp_entry *entry;
+	struct pf_osfp_entry *entry, *uentry;
 
 	memset(&fpadd, 0, sizeof (fpadd));
 	fpadd.fp_tcpopts = fpioc->fp_tcpopts;
@@ -324,6 +324,12 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc)
 	fpadd.fp_wscale = fpioc->fp_wscale;
 	fpadd.fp_ttl = fpioc->fp_ttl;
 
+	uentry = &fpioc->fp_os;
+	uentry->fp_entry.sle_next = NULL;
+	uentry->fp_class_nm[sizeof (uentry->fp_class_nm) - 1] = '\0';
+	uentry->fp_version_nm[sizeof (uentry->fp_version_nm) - 1] = '\0';
+	uentry->fp_subtype_nm[sizeof (uentry->fp_subtype_nm) - 1] = '\0';
+
 	DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d "
 	    "(TS=%s,M=%s%d,W=%s%d) %x\n",
 	    fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm,
@@ -527,6 +533,7 @@ pf_osfp_get(struct pf_osfp_ioctl *fpioc)
 				fpioc->fp_getnum = num;
 				memcpy(&fpioc->fp_os, entry,
 				    sizeof (fpioc->fp_os));
+				fpioc->fp_os.fp_entry.sle_next = NULL;
 				return (0);
 			}
 		}
diff --git a/bsd/net/pf_table.c b/bsd/net/pf_table.c
index 735c65b81..ea3b529f5 100644
--- a/bsd/net/pf_table.c
+++ b/bsd/net/pf_table.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -1190,6 +1190,7 @@ pfr_add_tables(user_addr_t tbl, int size, int *nadd, int flags)
 	for (i = 0; i < size; i++, tbl += sizeof (key.pfrkt_t)) {
 		if (COPYIN(tbl, &key.pfrkt_t, sizeof (key.pfrkt_t), flags))
 			senderr(EFAULT);
+		pfr_table_copyin_cleanup(&key.pfrkt_t);
 		if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK,
 		    flags & PFR_FLAG_USERIOCTL))
 			senderr(EINVAL);
@@ -1266,6 +1267,7 @@ pfr_del_tables(user_addr_t tbl, int size, int *ndel, int flags)
 	for (i = 0; i < size; i++, tbl += sizeof (key.pfrkt_t)) {
 		if (COPYIN(tbl, &key.pfrkt_t, sizeof (key.pfrkt_t), flags))
 			return (EFAULT);
+		pfr_table_copyin_cleanup(&key.pfrkt_t);
 		if (pfr_validate_table(&key.pfrkt_t, 0,
 		    flags & PFR_FLAG_USERIOCTL))
 			return (EINVAL);
@@ -1385,6 +1387,7 @@ pfr_clr_tstats(user_addr_t tbl, int size, int *nzero, int flags)
 	for (i = 0; i < size; i++, tbl += sizeof (key.pfrkt_t)) {
 		if (COPYIN(tbl, &key.pfrkt_t, sizeof (key.pfrkt_t), flags))
 			return (EFAULT);
+		pfr_table_copyin_cleanup(&key.pfrkt_t);
 		if (pfr_validate_table(&key.pfrkt_t, 0, 0))
 			return (EINVAL);
 		p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key);
@@ -1420,6 +1423,7 @@ pfr_set_tflags(user_addr_t tbl, int size, int setflag, int clrflag,
 	for (i = 0; i < size; i++, tbl += sizeof (key.pfrkt_t)) {
 		if (COPYIN(tbl, &key.pfrkt_t, sizeof (key.pfrkt_t), flags))
 			return (EFAULT);
+		pfr_table_copyin_cleanup(&key.pfrkt_t);
 		if (pfr_validate_table(&key.pfrkt_t, 0,
 		    flags & PFR_FLAG_USERIOCTL))
 			return (EINVAL);
@@ -1730,6 +1734,13 @@ pfr_commit_ktable(struct pfr_ktable *kt, u_int64_t tzero)
 	pfr_setflags_ktable(kt, nflags);
 }
 
+void
+pfr_table_copyin_cleanup(struct pfr_table *tbl)
+{
+	tbl->pfrt_anchor[sizeof (tbl->pfrt_anchor) - 1] = '\0';
+	tbl->pfrt_name[sizeof (tbl->pfrt_name) - 1] = '\0';
+}
+
 static int
 pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved)
 {
diff --git a/bsd/net/pfkeyv2.h b/bsd/net/pfkeyv2.h
index fa89f14c7..e452e1d2e 100644
--- a/bsd/net/pfkeyv2.h
+++ b/bsd/net/pfkeyv2.h
@@ -412,6 +412,7 @@ struct sadb_sastat {
 #define SADB_X_EXT_NATT_KEEPALIVE	0x0004	/* Local node is behind NAT, send keepalives */
 											/* Should only be set for outbound SAs */
 #define SADB_X_EXT_NATT_MULTIPLEUSERS 0x0008	/* For use on VPN server - support multiple users 	*/
+#define SADB_X_EXT_NATT_DETECTED_PEER 0x0010
 
 #endif /* PRIVATE */	
 
diff --git a/bsd/net/pfvar.h b/bsd/net/pfvar.h
index fc35db9e7..58c4b3969 100644
--- a/bsd/net/pfvar.h
+++ b/bsd/net/pfvar.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -77,7 +77,7 @@ extern "C" {
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/tree.h>
+#include <libkern/tree.h>
 
 #include <net/radix.h>
 #include <netinet/in.h>
@@ -1771,6 +1771,55 @@ struct pfioc_states_64 {
 };
 #endif /* KERNEL */
 
+#define PFTOK_PROCNAME_LEN    64
+#pragma pack(1)
+struct pfioc_token {
+	u_int64_t			token_value;
+	u_int64_t			timestamp;
+	pid_t				pid;
+	char				proc_name[PFTOK_PROCNAME_LEN];
+};
+#pragma pack()
+
+struct pfioc_kernel_token {
+	SLIST_ENTRY(pfioc_kernel_token)	next;
+	struct pfioc_token		token;
+};
+
+struct pfioc_remove_token {
+	u_int64_t                token_value;
+	u_int64_t                refcount;
+};
+
+struct pfioc_tokens {
+	int	size;
+	union {
+		caddr_t				pgtu_buf;
+		struct pfioc_token		*pgtu_tokens;
+	} pgt_u __attribute__((aligned(8)));
+#define pgt_buf		pgt_u.pgtu_buf
+#define pgt_tokens	pgt_u.pgtu_tokens
+};
+
+#ifdef KERNEL
+struct pfioc_tokens_32 {
+	int	size;
+	union {
+		user32_addr_t		pgtu_buf;
+		user32_addr_t		pgtu_tokens;
+	} pgt_u __attribute__((aligned(8)));
+};
+
+struct pfioc_tokens_64 {
+	int	size;
+	union {
+		user64_addr_t		pgtu_buf;
+		user64_addr_t		pgtu_tokens;
+	} pgt_u __attribute__((aligned(8)));
+};
+#endif /* KERNEL */
+
+
 struct pfioc_src_nodes {
 	int	psn_len;
 	union {
@@ -1860,6 +1909,7 @@ struct pfioc_trans_64 {
 };
 #endif /* KERNEL */
 
+
 #define PFR_FLAG_ATOMIC		0x00000001
 #define PFR_FLAG_DUMMY		0x00000002
 #define PFR_FLAG_FEEDBACK	0x00000004
@@ -1955,12 +2005,15 @@ struct pfioc_iface_64 {
 #define DIOCSTART	_IO  ('D',  1)
 #define DIOCSTOP	_IO  ('D',  2)
 #define DIOCADDRULE	_IOWR('D',  4, struct pfioc_rule)
+#define DIOCGETSTARTERS	_IOWR('D',  5, struct pfioc_tokens)
 #define DIOCGETRULES	_IOWR('D',  6, struct pfioc_rule)
 #define DIOCGETRULE	_IOWR('D',  7, struct pfioc_rule)
-/* XXX cut 8 - 17 */
+#define DIOCSTARTREF	_IOR ('D',  8, u_int64_t)
+#define DIOCSTOPREF	_IOWR('D',  9, struct pfioc_remove_token)
+/* XXX cut 10 - 17 */
 #define DIOCCLRSTATES	_IOWR('D', 18, struct pfioc_state_kill)
 #define DIOCGETSTATE	_IOWR('D', 19, struct pfioc_state)
-#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if)
+#define DIOCSETSTATUSIF	_IOWR('D', 20, struct pfioc_if)
 #define DIOCGETSTATUS	_IOWR('D', 21, struct pf_status)
 #define DIOCCLRSTATUS	_IO  ('D', 22)
 #define DIOCNATLOOK	_IOWR('D', 23, struct pfioc_natlook)
@@ -1995,23 +2048,23 @@ struct pfioc_iface_64 {
 #define	DIOCRDELTABLES	_IOWR('D', 62, struct pfioc_table)
 #define	DIOCRGETTABLES	_IOWR('D', 63, struct pfioc_table)
 #define	DIOCRGETTSTATS	_IOWR('D', 64, struct pfioc_table)
-#define DIOCRCLRTSTATS  _IOWR('D', 65, struct pfioc_table)
+#define DIOCRCLRTSTATS	_IOWR('D', 65, struct pfioc_table)
 #define	DIOCRCLRADDRS	_IOWR('D', 66, struct pfioc_table)
 #define	DIOCRADDADDRS	_IOWR('D', 67, struct pfioc_table)
 #define	DIOCRDELADDRS	_IOWR('D', 68, struct pfioc_table)
 #define	DIOCRSETADDRS	_IOWR('D', 69, struct pfioc_table)
 #define	DIOCRGETADDRS	_IOWR('D', 70, struct pfioc_table)
 #define	DIOCRGETASTATS	_IOWR('D', 71, struct pfioc_table)
-#define DIOCRCLRASTATS  _IOWR('D', 72, struct pfioc_table)
+#define DIOCRCLRASTATS	_IOWR('D', 72, struct pfioc_table)
 #define	DIOCRTSTADDRS	_IOWR('D', 73, struct pfioc_table)
 #define	DIOCRSETTFLAGS	_IOWR('D', 74, struct pfioc_table)
 #define DIOCRINADEFINE	_IOWR('D', 77, struct pfioc_table)
 #define DIOCOSFPFLUSH	_IO('D', 78)
 #define DIOCOSFPADD	_IOWR('D', 79, struct pf_osfp_ioctl)
 #define DIOCOSFPGET	_IOWR('D', 80, struct pf_osfp_ioctl)
-#define DIOCXBEGIN      _IOWR('D', 81, struct pfioc_trans)
-#define DIOCXCOMMIT     _IOWR('D', 82, struct pfioc_trans)
-#define DIOCXROLLBACK   _IOWR('D', 83, struct pfioc_trans)
+#define DIOCXBEGIN	_IOWR('D', 81, struct pfioc_trans)
+#define DIOCXCOMMIT	_IOWR('D', 82, struct pfioc_trans)
+#define DIOCXROLLBACK	_IOWR('D', 83, struct pfioc_trans)
 #define DIOCGETSRCNODES	_IOWR('D', 84, struct pfioc_src_nodes)
 #define DIOCCLRSRCNODES	_IO('D', 85)
 #define DIOCSETHOSTID	_IOWR('D', 86, u_int32_t)
@@ -2158,6 +2211,7 @@ __private_extern__ int pfr_pool_get(struct pfr_ktable *, int *,
     struct pf_addr *, struct pf_addr **, struct pf_addr **, sa_family_t);
 __private_extern__ void pfr_dynaddr_update(struct pfr_ktable *,
     struct pfi_dynaddr *);
+__private_extern__ void pfr_table_copyin_cleanup(struct pfr_table *);
 __private_extern__ struct pfr_ktable *pfr_attach_table(struct pf_ruleset *,
     char *);
 __private_extern__ void pfr_detach_table(struct pfr_ktable *);
@@ -2248,6 +2302,9 @@ __private_extern__ struct pf_anchor_global pf_anchors;
 __private_extern__ struct pf_anchor pf_main_anchor;
 #define pf_main_ruleset	pf_main_anchor.ruleset
 
+__private_extern__ int pf_is_enabled;
+#define PF_IS_ENABLED (pf_is_enabled != 0)
+
 /* these ruleset functions can be linked into userland programs (pfctl) */
 __private_extern__ int pf_get_ruleset_number(u_int8_t);
 __private_extern__ void pf_init_ruleset(struct pf_ruleset *);
diff --git a/bsd/net/ppp_deflate.c b/bsd/net/ppp_deflate.c
index 5968578b0..4541def29 100644
--- a/bsd/net/ppp_deflate.c
+++ b/bsd/net/ppp_deflate.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -316,7 +316,7 @@ z_compress(arg, mret, mp, orig_len, maxolen)
     }
     ++state->seqno;
 
-    rptr += (proto > 0xff)? 2: 3;	/* skip 1st proto byte if 0 */
+    rptr += (proto > 0xff)? 2: 3;	/* skip 1st proto byte if  0 */
     state->strm.next_in = rptr;
     state->strm.avail_in = mtod(mp, u_char *) + mp->m_len - rptr;
     mp = mp->m_next;
diff --git a/bsd/net/route.c b/bsd/net/route.c
index 5f1580e8f..5ed681a0f 100644
--- a/bsd/net/route.c
+++ b/bsd/net/route.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,16 +70,25 @@
 #include <sys/domain.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
 #include <kern/lock.h>
 #include <kern/zalloc.h>
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+
+#if INET6
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#endif /* INET6 */
 
 #include <net/if_dl.h>
 
@@ -187,7 +196,6 @@
  */
 
 #define	equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
-#define	SA(p) ((struct sockaddr *)(p))
 
 extern void kdp_set_gateway_mac (void *gatewaymac);
 
@@ -261,11 +269,6 @@ struct rtentry_dbg {
 	TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
 };
 
-#define atomic_add_16_ov(a, n)	\
-	((uint16_t) OSAddAtomic16(n, (volatile SInt16 *)a))
-#define atomic_add_32_ov(a, n)	\
-	((uint32_t) OSAddAtomic(n, a))
-
 /* List of trash route entries protected by rnh_lock */
 static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
 
@@ -285,33 +288,34 @@ static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t,
 static int rtrequest_common_locked(int, struct sockaddr *,
     struct sockaddr *, struct sockaddr *, int, struct rtentry **,
     unsigned int);
+static struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t);
 static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int);
-static inline void sa_set_ifscope(struct sockaddr *, unsigned int);
-static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *,
-    unsigned int);
-static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *,
-    unsigned int);
+static inline void sin6_set_ifscope(struct sockaddr *, unsigned int);
+static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int);
+static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *);
+static struct sockaddr *sa_copy(struct sockaddr *, struct sockaddr_storage *,
+    unsigned int *);
+static struct sockaddr *ma_copy(int, struct sockaddr *,
+    struct sockaddr_storage *, unsigned int);
 static struct sockaddr *sa_trim(struct sockaddr *, int);
 static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
     unsigned int);
-static struct radix_node *node_lookup_default(void);
+static struct radix_node *node_lookup_default(int);
 static int rn_match_ifscope(struct radix_node *, void *);
 static struct ifaddr *ifa_ifwithroute_common_locked(int,
     const struct sockaddr *, const struct sockaddr *, unsigned int);
 static struct rtentry *rte_alloc(void);
 static void rte_free(struct rtentry *);
 static void rtfree_common(struct rtentry *, boolean_t);
-#if IFNET_ROUTE_REFCNT
 static void rte_if_ref(struct ifnet *, int);
-#endif /* IFNET_ROUTE_REFCNT */
 
 uint32_t route_generation = 0;
 
 /*
- * sockaddr_in with embedded interface scope; this is used internally
- * to keep track of scoped route entries in the routing table.  The
- * fact that such a scope is embedded in the structure is an artifact
- * of the current implementation which could change in future.
+ * sockaddr_in with scope ID field; this is used internally to keep
+ * track of scoped route entries in the routing table.  The fact that
+ * such a value is embedded in the structure is an artifact of the
+ * current implementation which could change in future.
  */
 struct sockaddr_inifscope {
 	__uint8_t	sin_len;
@@ -330,11 +334,14 @@ struct sockaddr_inifscope {
 			__uint32_t	ifscope;
 		} _in_index;
 	} un;
-#define	sin_ifscope	un._in_index.ifscope
+#define	sin_scope_id	un._in_index.ifscope
 };
 
+#define	SA(sa)		((struct sockaddr *)(size_t)(sa))
 #define	SIN(sa)		((struct sockaddr_in *)(size_t)(sa))
+#define	SIN6(sa)	((struct sockaddr_in6 *)(size_t)(sa))
 #define	SINIFSCOPE(sa)	((struct sockaddr_inifscope *)(size_t)(sa))
+#define	SIN6IFSCOPE(sa)	SIN6(sa)
 
 #define	ASSERT_SINIFSCOPE(sa) {						\
 	if ((sa)->sa_family != AF_INET ||				\
@@ -342,6 +349,12 @@ struct sockaddr_inifscope {
 		panic("%s: bad sockaddr_in %p\n", __func__, sa);	\
 }
 
+#define	ASSERT_SIN6IFSCOPE(sa) {					\
+	if ((sa)->sa_family != AF_INET6 ||				\
+	    (sa)->sa_len < sizeof (struct sockaddr_in6))		\
+		panic("%s: bad sockaddr_in %p\n", __func__, sa);	\
+}
+
 /*
  * Argument to leaf-matching routine; at present it is scoped routing
  * specific but can be expanded in future to include other search filters.
@@ -358,27 +371,36 @@ static struct sockaddr sin_def = {
 	sizeof (struct sockaddr_in), AF_INET, { 0, }
 };
 
+static struct sockaddr_in6 sin6_def = {
+	sizeof (struct sockaddr_in6), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0
+};
+
 /*
  * Interface index (scope) of the primary interface; determined at
  * the time when the default, non-scoped route gets added, changed
  * or deleted.  Protected by rnh_lock.
  */
 static unsigned int primary_ifscope = IFSCOPE_NONE;
+static unsigned int primary6_ifscope = IFSCOPE_NONE;
+
+#define	INET_DEFAULT(sa)	\
+	((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0)
 
-#define	INET_DEFAULT(dst)	\
-	((dst)->sa_family == AF_INET && SIN(dst)->sin_addr.s_addr == 0)
+#define	INET6_DEFAULT(sa)						\
+	((sa)->sa_family == AF_INET6 &&					\
+	IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr))
 
+#define	SA_DEFAULT(sa)	(INET_DEFAULT(sa) || INET6_DEFAULT(sa))
 #define	RT(r)		((struct rtentry *)r)
+#define	RN(r)		((struct radix_node *)r)
 #define	RT_HOST(r)	(RT(r)->rt_flags & RTF_HOST)
 
-#if IFNET_ROUTE_REFCNT
 SYSCTL_DECL(_net_idle_route);
 
 static int rt_if_idle_expire_timeout = RT_IF_IDLE_EXPIRE_TIMEOUT;
 SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW,
     &rt_if_idle_expire_timeout, 0, "Default expiration time on routes for "
     "interface idle reference counting");
-#endif /* IFNET_ROUTE_REFCNT */
 
 /*
  * Given a route, determine whether or not it is the non-scoped default
@@ -386,88 +408,189 @@ SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW,
  * a separate place when rt is in the process of being created.
  */
 boolean_t
-rt_inet_default(struct rtentry *rt, struct sockaddr *dst)
+rt_primary_default(struct rtentry *rt, struct sockaddr *dst)
 {
-	return (INET_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
+	return (SA_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
 }
 
 /*
  * Set the ifscope of the primary interface; caller holds rnh_lock.
  */
 void
-set_primary_ifscope(unsigned int ifscope)
+set_primary_ifscope(int af, unsigned int ifscope)
 {
-	primary_ifscope = ifscope;
+	if (af == AF_INET)
+		primary_ifscope = ifscope;
+	else
+		primary6_ifscope = ifscope;
 }
 
 /*
  * Return the ifscope of the primary interface; caller holds rnh_lock.
  */
 unsigned int
-get_primary_ifscope(void)
+get_primary_ifscope(int af)
 {
-	return (primary_ifscope);
+	return (af == AF_INET ? primary_ifscope : primary6_ifscope);
 }
 
 /*
- * Embed ifscope into a given a sockaddr_in.
+ * Set the scope ID of a given a sockaddr_in.
  */
-static inline void
-sa_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
+void
+sin_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
 {
 	/* Caller must pass in sockaddr_in */
 	ASSERT_SINIFSCOPE(sa);
 
-	SINIFSCOPE(sa)->sin_ifscope = ifscope;
+	SINIFSCOPE(sa)->sin_scope_id = ifscope;
 }
 
 /*
- * Given a sockaddr_in, return the embedded ifscope to the caller.
+ * Set the scope ID of given a sockaddr_in6.
+ */
+static inline void
+sin6_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
+{
+	/* Caller must pass in sockaddr_in6 */
+	ASSERT_SIN6IFSCOPE(sa);
+
+	SIN6IFSCOPE(sa)->sin6_scope_id = ifscope;
+}
+
+/*
+ * Given a sockaddr_in, return the scope ID to the caller.
  */
 unsigned int
-sa_get_ifscope(struct sockaddr *sa)
+sin_get_ifscope(struct sockaddr *sa)
 {
 	/* Caller must pass in sockaddr_in */
 	ASSERT_SINIFSCOPE(sa);
 
-	return (SINIFSCOPE(sa)->sin_ifscope);
+	return (SINIFSCOPE(sa)->sin_scope_id);
 }
 
 /*
- * Copy a sockaddr_in src to dst and embed ifscope into dst.
+ * Given a sockaddr_in6, return the scope ID to the caller.
+ */
+unsigned int
+sin6_get_ifscope(struct sockaddr *sa)
+{
+	/* Caller must pass in sockaddr_in6 */
+	ASSERT_SIN6IFSCOPE(sa);
+
+	return (SIN6IFSCOPE(sa)->sin6_scope_id);
+}
+
+static inline void
+sin6_set_embedded_ifscope(struct sockaddr *sa, unsigned int ifscope)
+{
+	/* Caller must pass in sockaddr_in6 */
+	ASSERT_SIN6IFSCOPE(sa);
+	VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa)->sin6_addr)));
+
+	SIN6(sa)->sin6_addr.s6_addr16[1] = htons(ifscope);
+}
+
+static inline unsigned int
+sin6_get_embedded_ifscope(struct sockaddr *sa)
+{
+	/* Caller must pass in sockaddr_in6 */
+	ASSERT_SIN6IFSCOPE(sa);
+
+	return (ntohs(SIN6(sa)->sin6_addr.s6_addr16[1]));
+}
+
+/*
+ * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst.
+ *
+ * To clear the scope ID, pass is a NULL pifscope.  To set the scope ID, pass
+ * in a non-NULL pifscope with non-zero ifscope.  Otherwise if pifscope is
+ * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact.
+ * In any case, the effective scope ID value is returned to the caller via
+ * pifscope, if it is non-NULL.
  */
 static struct sockaddr *
-sin_copy(struct sockaddr_in *src, struct sockaddr_in *dst, unsigned int ifscope)
+sa_copy(struct sockaddr *src, struct sockaddr_storage *dst,
+    unsigned int *pifscope)
 {
-	*dst = *src;
-	sa_set_ifscope(SA(dst), ifscope);
+	int af = src->sa_family;
+	unsigned int ifscope = (pifscope != NULL) ? *pifscope : IFSCOPE_NONE;
+
+	VERIFY(af == AF_INET || af == AF_INET6);
+
+	bzero(dst, sizeof (*dst));
+
+	if (af == AF_INET) {
+		bcopy(src, dst, sizeof (struct sockaddr_in));
+		if (pifscope == NULL || ifscope != IFSCOPE_NONE)
+			sin_set_ifscope(SA(dst), ifscope);
+	} else {
+		bcopy(src, dst, sizeof (struct sockaddr_in6));
+		if (pifscope != NULL &&
+		    IN6_IS_SCOPE_EMBED(&SIN6(dst)->sin6_addr)) {
+			unsigned int eifscope;
+			/*
+			 * If the address contains the embedded scope ID,
+			 * use that as the value for sin6_scope_id as long
+			 * the caller doesn't insist on clearing it (by
+			 * passing NULL) or setting it.
+			 */
+			eifscope = sin6_get_embedded_ifscope(SA(dst));
+			if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE)
+				ifscope = eifscope;
+			sin6_set_ifscope(SA(dst), ifscope);
+			/*
+			 * If sin6_scope_id is set but the address doesn't
+			 * contain the equivalent embedded value, set it.
+			 */
+			if (ifscope != IFSCOPE_NONE && eifscope != ifscope)
+				sin6_set_embedded_ifscope(SA(dst), ifscope);
+		} else if (pifscope == NULL || ifscope != IFSCOPE_NONE) {
+			sin6_set_ifscope(SA(dst), ifscope);
+		}
+	}
+
+	if (pifscope != NULL) {
+		*pifscope = (af == AF_INET) ? sin_get_ifscope(SA(dst)) :
+		    sin6_get_ifscope(SA(dst));
+	}
 
 	return (SA(dst));
 }
 
 /*
- * Copy a mask from src to a sockaddr_in dst and embed ifscope into dst.
+ * Copy a mask from src to a dst storage and set scope ID into dst.
  */
 static struct sockaddr *
-mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope)
+ma_copy(int af, struct sockaddr *src, struct sockaddr_storage *dst,
+    unsigned int ifscope)
 {
-	/* We know dst is at least the size of sockaddr{_in} */
+	VERIFY(af == AF_INET || af == AF_INET6);
+
 	bzero(dst, sizeof (*dst));
 	rt_maskedcopy(src, SA(dst), src);
 
 	/*
 	 * The length of the mask sockaddr would need to be adjusted
-	 * to cover the additional sin_ifscope field; when ifscope is
-	 * IFSCOPE_NONE, we'd end up clearing the embedded ifscope on
+	 * to cover the additional {sin,sin6}_ifscope field; when ifscope
+	 * is IFSCOPE_NONE, we'd end up clearing the scope ID field on
 	 * the destination mask in addition to extending the length
 	 * of the sockaddr, as a side effect.  This is okay, as any
 	 * trailing zeroes would be skipped by rn_addmask prior to
 	 * inserting or looking up the mask in the mask tree.
 	 */
-	SINIFSCOPE(dst)->sin_ifscope = ifscope;
-	SINIFSCOPE(dst)->sin_len =
-	    offsetof(struct sockaddr_inifscope, sin_ifscope) +
-	    sizeof (SINIFSCOPE(dst)->sin_ifscope);
+	if (af == AF_INET) {
+		SINIFSCOPE(dst)->sin_scope_id = ifscope;
+		SINIFSCOPE(dst)->sin_len =
+		    offsetof(struct sockaddr_inifscope, sin_scope_id) +
+		    sizeof (SINIFSCOPE(dst)->sin_scope_id);
+	} else {
+		SIN6IFSCOPE(dst)->sin6_scope_id = ifscope;
+		SIN6IFSCOPE(dst)->sin6_len =
+		    offsetof(struct sockaddr_in6, sin6_scope_id) +
+		    sizeof (SIN6IFSCOPE(dst)->sin6_scope_id);
+	}
 
 	return (SA(dst));
 }
@@ -501,15 +624,15 @@ sa_trim(struct sockaddr *sa, int skip)
 }
 
 /*
- * Called by rtm_msg{1,2} routines to "scrub" the embedded interface scope
- * away from the socket address structure, so that clients of the routing
- * socket will not be confused by the presence of the embedded scope, or the
- * side effect of the increased length due to that.  The source sockaddr is
- * not modified; instead, the scrubbing happens on the destination sockaddr
- * storage that is passed in by the caller.
+ * Called by rtm_msg{1,2} routines to "scrub" the scope ID field away from
+ * the socket address structure, so that clients of the routing socket will
+ * not be confused by the presence of the information, or the side effect of
+ * the increased length due to that.  The source sockaddr is not modified;
+ * instead, the scrubbing happens on the destination sockaddr storage that
+ * is passed in by the caller.
  */
 struct sockaddr *
-rtm_scrub_ifscope(int idx, struct sockaddr *hint, struct sockaddr *sa,
+rtm_scrub_ifscope(int type, int idx, struct sockaddr *hint, struct sockaddr *sa,
     struct sockaddr_storage *ss)
 {
 	struct sockaddr *ret = sa;
@@ -517,39 +640,64 @@ rtm_scrub_ifscope(int idx, struct sockaddr *hint, struct sockaddr *sa,
 	switch (idx) {
 	case RTAX_DST:
 		/*
-		 * If this is for an AF_INET destination address, call
-		 * sin_copy() with IFSCOPE_NONE as it does what we need.
+		 * If this is for an AF_INET/AF_INET6 destination address,
+		 * call sa_copy() to clear the scope ID field.
 		 */
 		if (sa->sa_family == AF_INET &&
-		    SINIFSCOPE(sa)->sin_ifscope != IFSCOPE_NONE) {
-			bzero(ss, sizeof (*ss));
-			ret = sin_copy(SIN(sa), SIN(ss), IFSCOPE_NONE);
+		    SINIFSCOPE(sa)->sin_scope_id != IFSCOPE_NONE) {
+			ret = sa_copy(sa, ss, NULL);
+		} else if (sa->sa_family == AF_INET6 &&
+		    SIN6IFSCOPE(sa)->sin6_scope_id != IFSCOPE_NONE) {
+			ret = sa_copy(sa, ss, NULL);
 		}
 		break;
 
 	case RTAX_NETMASK: {
+		int skip, af;
 		/*
-		 * If this is for a mask, we can't tell whether or not
-		 * there is an embedded interface scope, as the span of
-		 * bytes between sa_len and the beginning of the mask
-		 * (offset of sin_addr in the case of AF_INET) may be
-		 * filled with all-ones by rn_addmask(), and hence we
-		 * cannot rely on sa_family.  Because of this, we use
-		 * the sa_family of the hint sockaddr (RTAX_{DST,IFA})
-		 * as indicator as to whether or not the mask is to be
-		 * treated as one for AF_INET.  Clearing the embedded
-		 * scope involves setting it to IFSCOPE_NONE followed
-		 * by calling sa_trim() to trim trailing zeroes from
-		 * the storage sockaddr, which reverses what was done
-		 * earlier by mask_copy() on the source sockaddr.
+		 * If this is for a mask, we can't tell whether or not there
+		 * is an valid scope ID value, as the span of bytes between
+		 * sa_len and the beginning of the mask (offset of sin_addr in
+		 * the case of AF_INET, or sin6_addr for AF_INET6) may be
+		 * filled with all-ones by rn_addmask(), and hence we cannot
+		 * rely on sa_family.  Because of this, we use the sa_family
+		 * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to
+		 * whether or not the mask is to be treated as one for AF_INET
+		 * or AF_INET6.  Clearing the scope ID field involves setting
+		 * it to IFSCOPE_NONE followed by calling sa_trim() to trim
+		 * trailing zeroes from the storage sockaddr, which reverses
+		 * what was done earlier by ma_copy() on the source sockaddr.
 		 */
-		int skip = offsetof(struct sockaddr_in, sin_addr);
-		if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss) &&
-		    hint != NULL && hint->sa_family == AF_INET) {
+		if (hint == NULL ||
+		    ((af = hint->sa_family) != AF_INET && af != AF_INET6))
+			break;	/* nothing to do */
+
+		skip = (af == AF_INET) ?
+		    offsetof(struct sockaddr_in, sin_addr) :
+		    offsetof(struct sockaddr_in6, sin6_addr);
+
+		if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss)) {
 			bzero(ss, sizeof (*ss));
 			bcopy(sa, ss, sa->sa_len);
-			SINIFSCOPE(ss)->sin_ifscope = IFSCOPE_NONE;
+			/*
+			 * Don't use {sin,sin6}_set_ifscope() as sa_family
+			 * and sa_len for the netmask might not be set to
+			 * the corresponding expected values of the hint.
+			 */
+			if (hint->sa_family == AF_INET)
+				SINIFSCOPE(ss)->sin_scope_id = IFSCOPE_NONE;
+			else
+				SIN6IFSCOPE(ss)->sin6_scope_id = IFSCOPE_NONE;
 			ret = sa_trim(SA(ss), skip);
+
+			/*
+			 * For AF_INET6 mask, set sa_len appropriately unless
+			 * this is requested via systl_dumpentry(), in which
+			 * case we return the raw value.
+			 */
+			if (hint->sa_family == AF_INET6 &&
+			    type != RTM_GET && type != RTM_GET2)
+				SA(ret)->sa_len = sizeof (struct sockaddr_in6);
 		}
 		break;
 	}
@@ -569,11 +717,14 @@ rn_match_ifscope(struct radix_node *rn, void *arg)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct matchleaf_arg *ma = arg;
+	int af = rt_key(rt)->sa_family;
 
-	if (!(rt->rt_flags & RTF_IFSCOPE) || rt_key(rt)->sa_family != AF_INET)
+	if (!(rt->rt_flags & RTF_IFSCOPE) || (af != AF_INET && af != AF_INET6))
 		return (0);
 
-	return (SINIFSCOPE(rt_key(rt))->sin_ifscope == ma->ifscope);
+	return (af == AF_INET ?
+	    (SINIFSCOPE(rt_key(rt))->sin_scope_id == ma->ifscope) :
+	    (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope));
 }
 
 static void
@@ -624,6 +775,7 @@ route_init(void)
 		panic("route_init: failed allocating rte_zone");
 
 	zone_change(rte_zone, Z_EXPAND, TRUE);
+	zone_change(rte_zone, Z_CALLERACCT, FALSE);
 	zone_change(rte_zone, Z_NOENCRYPT, TRUE);
 
 	TAILQ_INIT(&rttrash_head);
@@ -648,16 +800,9 @@ rtalloc(struct route *ro)
 }
 
 void
-rtalloc_ign_locked(struct route *ro, uint32_t ignore)
+rtalloc_scoped(struct route *ro, unsigned int ifscope)
 {
-	return (rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE));
-}
-
-void
-rtalloc_scoped_ign_locked(struct route *ro, uint32_t ignore,
-    unsigned int ifscope)
-{
-	return (rtalloc_ign_common_locked(ro, ignore, ifscope));
+	rtalloc_scoped_ign(ro, 0, ifscope);
 }
 
 static void
@@ -689,7 +834,7 @@ rtalloc_ign(struct route *ro, uint32_t ignore)
 {
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
 	lck_mtx_lock(rnh_lock);
-	rtalloc_ign_locked(ro, ignore);
+	rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE);
 	lck_mtx_unlock(rnh_lock);
 }
 
@@ -698,11 +843,11 @@ rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope)
 {
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
 	lck_mtx_lock(rnh_lock);
-	rtalloc_scoped_ign_locked(ro, ignore, ifscope);
+	rtalloc_ign_common_locked(ro, ignore, ifscope);
 	lck_mtx_unlock(rnh_lock);
 }
 
-struct rtentry *
+static struct rtentry *
 rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags)
 {
 	return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
@@ -910,6 +1055,9 @@ rtfree_common(struct rtentry *rt, boolean_t locked)
 	 * resources associated with the route.
 	 */
 	if (!(rt->rt_flags & RTF_UP)) {
+		struct rtentry *rt_parent;
+		struct ifaddr *rt_ifa;
+
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic("rt %p freed while in radix tree\n", rt);
 		/*
@@ -922,25 +1070,15 @@ rtfree_common(struct rtentry *rt, boolean_t locked)
 			    rtd_trash_link);
 		}
 
-		/*
-		* Route is no longer in the tree and refcnt is 0;
-		* we have exclusive access, so destroy it.
-		*/
-		RT_UNLOCK(rt);
-
 		/*
 		 * release references on items we hold them on..
 		 * e.g other routes and ifaddrs.
 		 */
-		if (rt->rt_parent != NULL) {
-			rtfree_locked(rt->rt_parent);
+		if ((rt_parent = rt->rt_parent) != NULL)
 			rt->rt_parent = NULL;
-		}
 
-		if (rt->rt_ifa != NULL) {
-			ifafree(rt->rt_ifa);
+		if ((rt_ifa = rt->rt_ifa) != NULL)
 			rt->rt_ifa = NULL;
-		}
 
 		/*
 		 * Now free any attached link-layer info.
@@ -953,6 +1091,18 @@ rtfree_common(struct rtentry *rt, boolean_t locked)
 			rt->rt_llinfo = NULL;
 		}
 
+		/*
+		 * Route is no longer in the tree and refcnt is 0;
+		 * we have exclusive access, so destroy it.
+		 */
+		RT_UNLOCK(rt);
+
+		if (rt_parent != NULL)
+			rtfree_locked(rt_parent);
+
+		if (rt_ifa != NULL)
+			IFA_REMREF(rt_ifa);
+
 		/*
 		 * The key is separately alloc'd so free it (see rt_setgate()).
 		 * This also frees the gateway, as they are always malloc'd
@@ -960,6 +1110,11 @@ rtfree_common(struct rtentry *rt, boolean_t locked)
 		 */
 		R_Free(rt_key(rt));
 
+		/*
+		 * Free any statistics that may have been allocated
+		 */
+		nstat_route_detach(rt);
+
 		/*
 		 * and the rtentry itself of course
 		 */
@@ -1057,16 +1212,19 @@ rtsetifa(struct rtentry *rt, struct ifaddr* ifa)
 	if (rt->rt_ifa == ifa)
 		return;
 
+	/* Become a regular mutex, just in case */
+	RT_CONVERT_LOCK(rt);
+
 	/* Release the old ifa */
 	if (rt->rt_ifa)
-		ifafree(rt->rt_ifa);
+		IFA_REMREF(rt->rt_ifa);
 
 	/* Set rt_ifa */
 	rt->rt_ifa = ifa;
 
 	/* Take a reference to the ifa */
 	if (rt->rt_ifa)
-		ifaref(rt->rt_ifa);
+		IFA_ADDREF(rt->rt_ifa);
 }
 
 /*
@@ -1086,11 +1244,23 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
 	struct rt_addrinfo info;
 	struct ifaddr *ifa = NULL;
 	unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
-	struct sockaddr_in sin;
+	struct sockaddr_storage ss;
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
 	lck_mtx_lock(rnh_lock);
 
+	/*
+	 * Transform src into the internal routing table form for
+	 * comparison against rt_gateway below.
+	 */
+#if INET6
+	if ((src->sa_family == AF_INET && ip_doscopedroute) ||
+	    (src->sa_family == AF_INET6 && ip6_doscopedroute))
+#else
+	if (src->sa_family == AF_INET && ip_doscopedroute)
+#endif /* !INET6 */
+		src = sa_copy(src, &ss, &ifscope);
+
 	/*
 	 * Verify the gateway is directly reachable; if scoped routing
 	 * is enabled, verify that it is reachable from the interface
@@ -1106,31 +1276,29 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
 	if (rt != NULL)
 		RT_LOCK(rt);
 
-	/* Embed scope in src for comparison against rt_gateway below */
-	if (ip_doscopedroute && src->sa_family == AF_INET)
-		src = sin_copy(SIN(src), &sin, ifscope);
-
 	/*
 	 * If the redirect isn't from our current router for this dst,
 	 * it's either old or wrong.  If it redirects us to ourselves,
 	 * we have a routing loop, perhaps as a result of an interface
-	 * going down recently.
+	 * going down recently.  Holding rnh_lock here prevents the
+	 * possibility of rt_ifa/ifa's ifa_addr from changing (e.g.
+	 * in_ifinit), so okay to access ifa_addr without locking.
 	 */
 	if (!(flags & RTF_DONE) && rt != NULL &&
 	     (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr,
 	     ifa->ifa_addr))) {
 		error = EINVAL;
 	} else {
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 		if ((ifa = ifa_ifwithaddr(gateway))) {
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = NULL;
 			error = EHOSTUNREACH;
 		}
 	}
 
 	if (ifa) {
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 		ifa = NULL;
 	}
 
@@ -1265,25 +1433,36 @@ ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
 
 static struct ifaddr *
 ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
-    const struct sockaddr *gateway, unsigned int ifscope)
+    const struct sockaddr *gw, unsigned int ifscope)
 {
 	struct ifaddr *ifa = NULL;
 	struct rtentry *rt = NULL;
-	struct sockaddr_in dst_in, gw_in;
+	struct sockaddr_storage dst_ss, gw_ss;
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
-	if (ip_doscopedroute) {
-		/*
-		 * Just in case the sockaddr passed in by the caller
-		 * contains embedded scope, make sure to clear it since
-		 * IPv4 interface addresses aren't scoped.
-		 */
-		if (dst != NULL && dst->sa_family == AF_INET)
-			dst = sin_copy(SIN(dst), &dst_in, IFSCOPE_NONE);
-		if (gateway != NULL && gateway->sa_family == AF_INET)
-			gateway = sin_copy(SIN(gateway), &gw_in, IFSCOPE_NONE);
-	}
+	/*
+	 * Just in case the sockaddr passed in by the caller
+	 * contains a scope ID, make sure to clear it since
+	 * interface addresses aren't scoped.
+	 */
+#if INET6
+	if (dst != NULL &&
+	    ((dst->sa_family == AF_INET && ip_doscopedroute) ||
+	    (dst->sa_family == AF_INET6 && ip6_doscopedroute)))
+#else
+	if (dst != NULL && dst->sa_family == AF_INET && ip_doscopedroute)
+#endif /* !INET6 */
+		dst = sa_copy(SA(dst), &dst_ss, NULL);
+
+#if INET6
+	if (gw != NULL &&
+	    ((gw->sa_family == AF_INET && ip_doscopedroute) ||
+	    (gw->sa_family == AF_INET6 && ip6_doscopedroute)))
+#else
+	if (gw != NULL && gw->sa_family == AF_INET && ip_doscopedroute)
+#endif /* !INET6 */
+		gw = sa_copy(SA(gw), &gw_ss, NULL);
 
 	if (!(flags & RTF_GATEWAY)) {
 		/*
@@ -1297,17 +1476,17 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 			ifa = ifa_ifwithdstaddr(dst);
 		}
 		if (ifa == NULL)
-			ifa = ifa_ifwithaddr_scoped(gateway, ifscope);
+			ifa = ifa_ifwithaddr_scoped(gw, ifscope);
 	} else {
 		/*
 		 * If we are adding a route to a remote net
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
-		ifa = ifa_ifwithdstaddr(gateway);
+		ifa = ifa_ifwithdstaddr(gw);
 	}
 	if (ifa == NULL)
-		ifa = ifa_ifwithnet_scoped(gateway, ifscope);
+		ifa = ifa_ifwithnet_scoped(gw, ifscope);
 	if (ifa == NULL) {
 		/* Workaround to avoid gcc warning regarding const variable */
 		rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
@@ -1315,19 +1494,27 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 		if (rt != NULL) {
 			RT_LOCK_SPIN(rt);
 			ifa = rt->rt_ifa;
-			if (ifa != NULL)
-				ifaref(ifa);
+			if (ifa != NULL) {
+				/* Become a regular mutex */
+				RT_CONVERT_LOCK(rt);
+				IFA_ADDREF(ifa);
+			}
 			RT_REMREF_LOCKED(rt);
 			RT_UNLOCK(rt);
 			rt = NULL;
 		}
 	}
+	/*
+	 * Holding rnh_lock here prevents the possibility of ifa from
+	 * changing (e.g. in_ifinit), so it is safe to access its
+	 * ifa_addr (here and down below) without locking.
+	 */
 	if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) {
 		struct ifaddr *newifa;
 		/* Callee adds reference to newifa upon success */
 		newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
 		if (newifa != NULL) {
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = newifa;
 		}
 	}
@@ -1337,18 +1524,21 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 	 * that may not agree with info garnered from the interfaces.
 	 * The routing table should carry more precedence than the
 	 * interfaces in this matter.  Must be careful not to stomp
-	 * on new entries from rtinit, hence (ifa->ifa_addr != gateway).
+	 * on new entries from rtinit, hence (ifa->ifa_addr != gw).
 	 */
 	if ((ifa == NULL ||
-	    !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gateway)) &&
-	    (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gateway,
+	    !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gw)) &&
+	    (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gw,
 	    0, 0, ifscope)) != NULL) {
 		if (ifa != NULL)
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		RT_LOCK_SPIN(rt);
 		ifa = rt->rt_ifa;
-		if (ifa != NULL)
-			ifaref(ifa);
+		if (ifa != NULL) {
+			/* Become a regular mutex */
+			RT_CONVERT_LOCK(rt);
+			IFA_ADDREF(ifa);
+		}
 		RT_REMREF_LOCKED(rt);
 		RT_UNLOCK(rt);
 	}
@@ -1359,7 +1549,7 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
 	 */
 	if ((flags & RTF_IFSCOPE) &&
 	    ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 		ifa = NULL;
 	}
 
@@ -1400,7 +1590,7 @@ rtrequest_scoped_locked(int req, struct sockaddr *dst,
  * Do appropriate manipulations of a routing tree given all the bits of
  * info needed.
  *
- * Embedding the scope in the radix key is an internal job that should be
+ * Storing the scope ID in the radix key is an internal job that should be
  * left to routines in this module.  Callers should specify the scope value
  * to the "scoped" variants of route routines instead of manipulating the
  * key itself.  This is typically done when creating a scoped route, e.g.
@@ -1422,59 +1612,79 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa = NULL;
 	struct sockaddr *ndst, *dst = dst0;
-	struct sockaddr_in sin, mask;
+	struct sockaddr_storage ss, mask;
+	struct timeval curr_calendartime;
+	int af = dst->sa_family;
+	void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
+
 #define senderr(x) { error = x ; goto bad; }
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
-	if ((rnh = rt_tables[dst->sa_family]) == 0)
+	if ((rnh = rt_tables[af]) == NULL)
 		senderr(ESRCH);
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree
 	 */
 	if (flags & RTF_HOST)
-		netmask = 0;
+		netmask = NULL;
 
 	/*
-	 * If RTF_IFSCOPE is specified, use a local copy of the destination
-	 * address to embed the scope into.  This logic is repeated below
+	 * If Scoped Routing is enabled, use a local copy of the destination
+	 * address to store the scope ID into.  This logic is repeated below
 	 * in the RTM_RESOLVE handler since the caller does not normally
-	 * specify such a flag during a resolve; instead it passes in the
-	 * route used for cloning for which the scope info is derived from.
-	 * Note also that in the case of RTM_DELETE, the address passed in
-	 * by the caller might already contain the embedded scope info when
-	 * it is the key itself, thus making RTF_IFSCOPE unnecessary; one
-	 * instance where it is explicitly set is inside route_output()
-	 * as part of handling a routing socket request.
+	 * specify such a flag during a resolve, as well as for the handling
+	 * of IPv4 link-local address; instead, it passes in the route used for
+	 * cloning for which the scope info is derived from.  Note also that
+	 * in the case of RTM_DELETE, the address passed in by the caller
+	 * might already contain the scope ID info when it is the key itself,
+	 * thus making RTF_IFSCOPE unnecessary; one instance where it is
+	 * explicitly set is inside route_output() as part of handling a
+	 * routing socket request.
 	 */
-	if (req != RTM_RESOLVE && (flags & RTF_IFSCOPE)) {
-		/* Scoped routing is for AF_INET only */
-		if (dst->sa_family != AF_INET ||
-		    (req == RTM_ADD && !ip_doscopedroute))
-			senderr(EINVAL);
+#if INET6
+	if (req != RTM_RESOLVE &&
+	    ((af == AF_INET && ip_doscopedroute) ||
+	    (af == AF_INET6 && ip6_doscopedroute))) {
+#else
+	if (req != RTM_RESOLVE && af == AF_INET && ip_doscopedroute) {
+#endif /* !INET6 */
+		/* Transform dst into the internal routing table form */
+		dst = sa_copy(dst, &ss, &ifscope);
 
-		if (ifscope == IFSCOPE_NONE) {
-			flags &= ~RTF_IFSCOPE;
-		} else {
-			/* Embed ifscope into the key (local copy) */
-			dst = sin_copy(SIN(dst), &sin, ifscope);
+		/* Transform netmask into the internal routing table form */
+		if (netmask != NULL)
+			netmask = ma_copy(af, netmask, &mask, ifscope);
 
-			/* Embed ifscope into netmask (local copy) */
-			if (netmask != NULL)
-				netmask = mask_copy(netmask, &mask, ifscope);
-		}
+		if (ifscope != IFSCOPE_NONE)
+			flags |= RTF_IFSCOPE;
+	} else {
+		if ((flags & RTF_IFSCOPE) && (af != AF_INET && af != AF_INET6))
+			senderr(EINVAL);
+
+#if INET6
+		if ((af == AF_INET && !ip_doscopedroute) ||
+		    (af == AF_INET6 && !ip6_doscopedroute))
+#else
+		if (af == AF_INET && !ip_doscopedroute)
+#endif /* !INET6 */
+			ifscope = IFSCOPE_NONE;
 	}
 
+	if (ifscope == IFSCOPE_NONE)
+		flags &= ~RTF_IFSCOPE;
+
 	switch (req) {
-	case RTM_DELETE:
+	case RTM_DELETE: {
+		struct rtentry *gwrt = NULL;
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
 		 */
-		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
+		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
 			senderr(ESRCH);
 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtrequest delete");
@@ -1512,20 +1722,22 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 
 		/*
 		 * Remove any external references we may have.
-		 * This might result in another rtentry being freed if
-		 * we held its last reference.
 		 */
-		if (rt->rt_gwroute != NULL) {
-			rtfree_locked(rt->rt_gwroute);
+		if ((gwrt = rt->rt_gwroute) != NULL)
 			rt->rt_gwroute = NULL;
-		}
 
 		/*
 		 * give the protocol a chance to keep things in sync.
 		 */
-		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
-			ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
-		ifa = NULL;
+		if ((ifa = rt->rt_ifa) != NULL) {
+			IFA_LOCK_SPIN(ifa);
+			ifa_rtrequest = ifa->ifa_rtrequest;
+			IFA_UNLOCK(ifa);
+			if (ifa_rtrequest != NULL)
+				ifa_rtrequest(RTM_DELETE, rt, NULL);
+			/* keep reference on rt_ifa */
+			ifa = NULL;
+		}
 
 		/*
 		 * one more rtentry floating around that is not
@@ -1541,18 +1753,23 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 		 * If this is the (non-scoped) default route, clear
 		 * the interface index used for the primary ifscope.
 		 */
-		if (rt_inet_default(rt, rt_key(rt)))
-			set_primary_ifscope(IFSCOPE_NONE);
-
-#if IFNET_ROUTE_REFCNT
-		if (rt->rt_if_ref_fn != NULL) {
-			rt->rt_if_ref_fn(rt->rt_ifp, -1);
-			rt->rt_flags &= ~RTF_IFREF;
+		if (rt_primary_default(rt, rt_key(rt))) {
+			set_primary_ifscope(rt_key(rt)->sa_family,
+			    IFSCOPE_NONE);
 		}
-#endif /* IFNET_ROUTE_REFCNT */
+		rt_clear_idleref(rt);
 
 		RT_UNLOCK(rt);
 
+		/*
+		 * This might result in another rtentry being freed if
+		 * we held its last reference.  Do this after the rtentry
+		 * lock is dropped above, as it could lead to the same
+		 * lock being acquired if gwrt is a clone of rt.
+		 */
+		if (gwrt != NULL)
+			rtfree_locked(gwrt);
+
 		/*
 		 * If the caller wants it, then it can have it,
 		 * but it's up to it to free the rtentry as we won't be
@@ -1566,9 +1783,9 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 			rtfree_locked(rt);
 		}
 		break;
-
+	}
 	case RTM_RESOLVE:
-		if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
+		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
 			senderr(EINVAL);
 		/*
 		 * If cloning, we have the parent route given by the caller
@@ -1581,40 +1798,55 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 		 * of rt_rmx.
 		 */
 		ifa = rt->rt_ifa;
-		ifaref(ifa);
+		IFA_ADDREF(ifa);
 		flags = rt->rt_flags &
 		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
 		flags |= RTF_WASCLONED;
 		gateway = rt->rt_gateway;
-		if ((netmask = rt->rt_genmask) == 0)
+		if ((netmask = rt->rt_genmask) == NULL)
 			flags |= RTF_HOST;
 
-		if (!ip_doscopedroute || dst->sa_family != AF_INET)
+#if INET6
+		if ((af != AF_INET && af != AF_INET6) ||
+		    (af == AF_INET && !ip_doscopedroute) ||
+		    (af == AF_INET6 && !ip6_doscopedroute))
+#else
+		if (af != AF_INET || !ip_doscopedroute)
+#endif /* !INET6 */
 			goto makeroute;
+
 		/*
 		 * When scoped routing is enabled, cloned entries are
 		 * always scoped according to the interface portion of
 		 * the parent route.  The exception to this are IPv4
 		 * link local addresses.
 		 */
-		if (!IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
+		if (af == AF_INET &&
+		    IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
+			ifscope = IFSCOPE_NONE;
+			flags &= ~RTF_IFSCOPE;
+		} else {
 			if (flags & RTF_IFSCOPE) {
-				ifscope = sa_get_ifscope(rt_key(rt));
+				ifscope = (af == AF_INET) ?
+				    sin_get_ifscope(rt_key(rt)) :
+				    sin6_get_ifscope(rt_key(rt));
 			} else {
 				ifscope = rt->rt_ifp->if_index;
 				flags |= RTF_IFSCOPE;
 			}
-		} else {
-			ifscope = IFSCOPE_NONE;
-			flags &= ~RTF_IFSCOPE;
+			VERIFY(ifscope != IFSCOPE_NONE);
 		}
 
-		/* Embed or clear ifscope into/from the key (local copy) */
-		dst = sin_copy(SIN(dst), &sin, ifscope);
+		/*
+		 * Transform dst into the internal routing table form,
+		 * clearing out the scope ID field if ifscope isn't set.
+		 */
+		dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ?
+		    NULL : &ifscope);
 
-		/* Embed or clear ifscope into/from netmask (local copy) */
+		/* Transform netmask into the internal routing table form */
 		if (netmask != NULL)
-			netmask = mask_copy(netmask, &mask, ifscope);
+			netmask = ma_copy(af, netmask, &mask, ifscope);
 
 		goto makeroute;
 
@@ -1631,10 +1863,13 @@ rtrequest_common_locked(int req, struct sockaddr *dst0,
 		if (ifa == NULL)
 			senderr(ENETUNREACH);
 makeroute:
+		getmicrotime(&curr_calendartime);
 		if ((rt = rte_alloc()) == NULL)
 			senderr(ENOBUFS);
 		Bzero(rt, sizeof(*rt));
 		rte_lock_init(rt);
+                rt->base_calendartime = curr_calendartime.tv_sec;
+		rt->base_uptime = net_uptime();
 		RT_LOCK(rt);
 		rt->rt_flags = RTF_UP | flags;
 
@@ -1644,6 +1879,7 @@ makeroute:
 		 */
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
 			RT_UNLOCK(rt);
+			nstat_route_detach(rt);
 			rte_lock_destroy(rt);
 			rte_free(rt);
 			senderr(error);
@@ -1712,23 +1948,24 @@ makeroute:
 		 * If it still failed to go into the tree,
 		 * then un-make it (this should be a function)
 		 */
-		if (rn == 0) {
+		if (rn == NULL) {
 			if (rt->rt_gwroute) {
 				rtfree_locked(rt->rt_gwroute);
 				rt->rt_gwroute = NULL;
 			}
 			if (rt->rt_ifa) {
-				ifafree(rt->rt_ifa);
+				IFA_REMREF(rt->rt_ifa);
 				rt->rt_ifa = NULL;
 			}
 			R_Free(rt_key(rt));
 			RT_UNLOCK(rt);
+			nstat_route_detach(rt);
 			rte_lock_destroy(rt);
 			rte_free(rt);
 			senderr(EEXIST);
 		}
 
-		rt->rt_parent = 0;
+		rt->rt_parent = NULL;
 
 		/*
 		 * If we got here from RESOLVE, then we are cloning so clone
@@ -1741,42 +1978,46 @@ makeroute:
 		 */
 		if (req == RTM_RESOLVE) {
 			RT_LOCK_SPIN(*ret_nrt);
-			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
+			VERIFY((*ret_nrt)->rt_expire == 0 || (*ret_nrt)->rt_rmx.rmx_expire != 0);
+			VERIFY((*ret_nrt)->rt_expire != 0 || (*ret_nrt)->rt_rmx.rmx_expire == 0);
+			rt->rt_rmx = (*ret_nrt)->rt_rmx;
+			rt_setexpire(rt, (*ret_nrt)->rt_expire);
 			if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
 				rt->rt_parent = (*ret_nrt);
 				RT_ADDREF_LOCKED(*ret_nrt);
 			}
 			RT_UNLOCK(*ret_nrt);
 
-#if IFNET_ROUTE_REFCNT
 			/*
 			 * Enable interface reference counting for unicast
 			 * cloned routes and bump up the reference count.
 			 */
 			if (rt->rt_parent != NULL &&
 			    !(rt->rt_flags & (RTF_BROADCAST | RTF_MULTICAST))) {
-				rt->rt_if_ref_fn = rte_if_ref;
-				rt->rt_if_ref_fn(rt->rt_ifp, 1);
-				rt->rt_flags |= RTF_IFREF;
+				rt_set_idleref(rt);
 			}
-#endif /* IFNET_ROUTE_REFCNT */
 		}
 
 		/*
 		 * if this protocol has something to add to this then
 		 * allow it to do that as well.
 		 */
-		if (ifa->ifa_rtrequest)
-			ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0));
-		ifafree(ifa);
-		ifa = 0;
+		IFA_LOCK_SPIN(ifa);
+		ifa_rtrequest = ifa->ifa_rtrequest;
+		IFA_UNLOCK(ifa);
+		if (ifa_rtrequest != NULL)
+			ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL));
+		IFA_REMREF(ifa);
+		ifa = NULL;
 
 		/*
 		 * If this is the (non-scoped) default route, record
 		 * the interface index used for the primary ifscope.
 		 */
-		if (rt_inet_default(rt, rt_key(rt)))
-			set_primary_ifscope(rt->rt_ifp->if_index);
+		if (rt_primary_default(rt, rt_key(rt))) {
+			set_primary_ifscope(rt_key(rt)->sa_family,
+			    rt->rt_ifp->if_index);
+		}
 
 		/*
 		 * actually return a resultant rtentry and
@@ -1793,7 +2034,7 @@ makeroute:
 		 * hasn't been added to the tree yet.
 		 */
 		if (req == RTM_ADD &&
-		    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
+		    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
 			struct rtfc_arg arg;
 			arg.rnh = rnh;
 			arg.rt0 = rt;
@@ -1803,22 +2044,19 @@ makeroute:
 		} else {
 			RT_UNLOCK(rt);
 		}
+		
+		nstat_route_new_entry(rt);
 		break;
 	}
 bad:
 	if (ifa)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	return (error);
 }
 
 int
-rtrequest(
-	int req,
-	struct sockaddr *dst,
-	struct sockaddr *gateway,
-	struct sockaddr *netmask,
-	int flags,
-	struct rtentry **ret_nrt)
+rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway,
+    struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
 {
 	int error;
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
@@ -1827,6 +2065,21 @@ rtrequest(
 	lck_mtx_unlock(rnh_lock);
 	return (error);
 }
+
+int
+rtrequest_scoped(int req, struct sockaddr *dst, struct sockaddr *gateway,
+    struct sockaddr *netmask, int flags, struct rtentry **ret_nrt,
+    unsigned int ifscope)
+{
+	int error;
+	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
+	lck_mtx_lock(rnh_lock);
+	error = rtrequest_scoped_locked(req, dst, gateway, netmask, flags,
+	    ret_nrt, ifscope);
+	lck_mtx_unlock(rnh_lock);
+	return (error);
+}
+
 /*
  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
  * (i.e., the routes related to it by the operation of cloning).  This
@@ -2018,11 +2271,16 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 		struct rtentry *gwrt;
 		unsigned int ifscope;
 
-		ifscope = (dst->sa_family == AF_INET) ?
-		    sa_get_ifscope(dst) : IFSCOPE_NONE;
+		if (dst->sa_family == AF_INET)
+			ifscope = sin_get_ifscope(dst);
+		else if (dst->sa_family == AF_INET6)
+			ifscope = sin6_get_ifscope(dst);
+		else
+			ifscope = IFSCOPE_NONE;
 
 		RT_UNLOCK(rt);
-		gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
+		gwrt = rtalloc1_scoped_locked(gate, 1,
+		    RTF_CLONING | RTF_PRCLONING, ifscope);
 		if (gwrt != NULL)
 			RT_LOCK_ASSERT_NOTHELD(gwrt);
 		RT_LOCK(rt);
@@ -2082,8 +2340,10 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 		 * primary ifscope.  Also done in rt_setif() to take care
 		 * of the non-redirect cases.
 		 */
-		if (rt_inet_default(rt, dst) && rt->rt_ifp != NULL)
-			set_primary_ifscope(rt->rt_ifp->if_index);
+		if (rt_primary_default(rt, dst) && rt->rt_ifp != NULL) {
+			set_primary_ifscope(dst->sa_family,
+			    rt->rt_ifp->if_index);
+		}
 
 		/*
 		 * Tell the kernel debugger about the new default gateway
@@ -2095,8 +2355,8 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 		 */
 		if ((dst->sa_family == AF_INET) &&
 		    gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
-		    (gwrt->rt_ifp->if_index == get_primary_ifscope() ||
-		    get_primary_ifscope() == IFSCOPE_NONE))
+		    (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) ||
+		    get_primary_ifscope(AF_INET) == IFSCOPE_NONE))
 			kdp_set_gateway_mac(SDL(gwrt->rt_gateway)->sdl_data);
 	}
 
@@ -2142,11 +2402,16 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 	 * For consistency between rt_gateway and rt_key(gwrt).
 	 */
 	if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
-	    (rt->rt_gwroute->rt_flags & RTF_IFSCOPE) &&
-	    rt->rt_gateway->sa_family == AF_INET &&
-	    rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
-		sa_set_ifscope(rt->rt_gateway,
-		    sa_get_ifscope(rt_key(rt->rt_gwroute)));
+	    (rt->rt_gwroute->rt_flags & RTF_IFSCOPE)) {
+		if (rt->rt_gateway->sa_family == AF_INET &&
+		    rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
+			sin_set_ifscope(rt->rt_gateway,
+			    sin_get_ifscope(rt_key(rt->rt_gwroute)));
+		} else if (rt->rt_gateway->sa_family == AF_INET6 &&
+		    rt_key(rt->rt_gwroute)->sa_family == AF_INET6) {
+			sin6_set_ifscope(rt->rt_gateway,
+			    sin6_get_ifscope(rt_key(rt->rt_gwroute)));
+		}
 	}
 
 	/*
@@ -2192,32 +2457,35 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
 }
 
 /*
- * Lookup an AF_INET scoped or non-scoped route depending on the ifscope
- * value passed in by the caller (IFSCOPE_NONE implies non-scoped).
+ * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the
+ * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped).
  */
 static struct radix_node *
 node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
     unsigned int ifscope)
 {
-	struct radix_node_head *rnh = rt_tables[AF_INET];
+	struct radix_node_head *rnh;
 	struct radix_node *rn;
-	struct sockaddr_in sin, mask;
+	struct sockaddr_storage ss, mask;
+	int af = dst->sa_family;
 	struct matchleaf_arg ma = { ifscope };
 	rn_matchf_t *f = rn_match_ifscope;
 	void *w = &ma;
 
-	if (dst->sa_family != AF_INET)
+	if (af != AF_INET && af != AF_INET6)
 		return (NULL);
 
+	rnh = rt_tables[af];
+
 	/*
-	 * Embed ifscope into the search key; for a non-scoped
-	 * search this will clear out any embedded scope value.
+	 * Transform dst into the internal routing table form,
+	 * clearing out the scope ID field if ifscope isn't set.
 	 */
-	dst = sin_copy(SIN(dst), &sin, ifscope);
+	dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? NULL : &ifscope);
 
-	/* Embed (or clear) ifscope into netmask */
+	/* Transform netmask into the internal routing table form */
 	if (netmask != NULL)
-		netmask = mask_copy(netmask, &mask, ifscope);
+		netmask = ma_copy(af, netmask, &mask, ifscope);
 
 	if (ifscope == IFSCOPE_NONE)
 		f = w = NULL;
@@ -2230,13 +2498,18 @@ node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
 }
 
 /*
- * Lookup the AF_INET non-scoped default route.
+ * Lookup the AF_INET/AF_INET6 non-scoped default route.
  */
 static struct radix_node *
-node_lookup_default(void)
+node_lookup_default(int af)
 {
-	struct radix_node_head *rnh = rt_tables[AF_INET];
-	return (rnh->rnh_lookup(&sin_def, NULL, rnh));
+	struct radix_node_head *rnh;
+
+	VERIFY(af == AF_INET || af == AF_INET6);
+	rnh = rt_tables[af];
+
+	return (af == AF_INET ? rnh->rnh_lookup(&sin_def, NULL, rnh) :
+	    rnh->rnh_lookup(&sin6_def, NULL, rnh));
 }
 
 /*
@@ -2250,10 +2523,10 @@ node_lookup_default(void)
  * per-interface route instance.  This permits multiple route entries having
  * the same destination (but not necessarily the same gateway) to exist in
  * the routing table; each of these entries is specific to the corresponding
- * interface.  This is made possible by embedding the scope value into the
+ * interface.  This is made possible by storing the scope ID value into the
  * radix key, thus making each route entry unique.  These scoped entries
  * exist along with the regular, non-scoped entries in the same radix tree
- * for a given address family (currently AF_INET only); the scope logically
+ * for a given address family (AF_INET/AF_INET6); the scope logically
  * partitions it into multiple per-interface sub-trees.
  *
  * When a scoped route lookup is performed, the routing table is searched for
@@ -2267,7 +2540,9 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
     struct radix_node_head *rnh, unsigned int ifscope)
 {
 	struct radix_node *rn0, *rn;
-	boolean_t dontcare = (ifscope == IFSCOPE_NONE);
+	boolean_t dontcare;
+	int af = dst->sa_family;
+	struct sockaddr_storage dst_ss, mask_ss;
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
@@ -2277,11 +2552,14 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
 	/*
 	 * Non-scoped route lookup.
 	 */
-	if (!ip_doscopedroute || dst->sa_family != AF_INET) {
-		if (lookup_only)
-			rn = rnh->rnh_lookup(dst, netmask, rnh);
-		else
-			rn = rnh->rnh_matchaddr(dst, rnh);
+#if INET6
+	if ((af != AF_INET && af != AF_INET6) ||
+	    (af == AF_INET && !ip_doscopedroute) ||
+	    (af == AF_INET6 && !ip6_doscopedroute)) {
+#else
+	if (af != AF_INET || !ip_doscopedroute) {
+#endif /* !INET6 */
+		rn = rnh->rnh_matchaddr(dst, rnh);
 
 		/*
 		 * Don't return a root node; also, rnh_matchaddr callback
@@ -2303,6 +2581,12 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
 		return (RT(rn));
 	}
 
+	/* Transform dst/netmask into the internal routing table form */
+	dst = sa_copy(dst, &dst_ss, &ifscope);
+	if (netmask != NULL)
+		netmask = ma_copy(af, netmask, &mask_ss, ifscope);
+	dontcare = (ifscope == IFSCOPE_NONE);
+
 	/*
 	 * Scoped route lookup:
 	 *
@@ -2316,10 +2600,13 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
 	/*
 	 * If the caller did not specify a scope, use the primary scope
 	 * derived from the system's non-scoped default route.  If, for
-	 * any reason, there is no primary interface, return what we have.
+	 * any reason, there is no primary interface, ifscope will be
+	 * set to IFSCOPE_NONE; if the above lookup resulted in a route,
+	 * we'll do a more-specific search below, scoped to the interface
+	 * of that route.
 	 */
-	if (dontcare && (ifscope = get_primary_ifscope()) == IFSCOPE_NONE)
-		goto done;
+	if (dontcare)
+		ifscope = get_primary_ifscope(af);
 
 	/*
 	 * Keep the original result if either of the following is true:
@@ -2381,7 +2668,7 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
 	 *	as a more specific route.
 	 */
 	if (rn == NULL || (rn0 != NULL &&
-	    ((INET_DEFAULT(rt_key(RT(rn))) && !INET_DEFAULT(rt_key(RT(rn0)))) ||
+	    ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) ||
 	    (!RT_HOST(rn) && RT_HOST(rn0)))))
 		rn = rn0;
 
@@ -2389,23 +2676,20 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
 	 * If we still don't have a route, use the non-scoped default
 	 * route as long as the interface portion satistifes the scope.
 	 */
-	if (rn == NULL && (rn = node_lookup_default()) != NULL &&
+	if (rn == NULL && (rn = node_lookup_default(af)) != NULL &&
 	    RT(rn)->rt_ifp->if_index != ifscope)
 		rn = NULL;
 
-done:
 	if (rn != NULL) {
 		/*
-		 * Manually clear RTPRF_OURS using in_validate() and
+		 * Manually clear RTPRF_OURS using rt_validate() and
 		 * bump up the reference count after, and not before;
-		 * we only get here for AF_INET.  node_lookup() has
-		 * done the check against RNF_ROOT, so we can be sure
+		 * we only get here for AF_INET/AF_INET6.  node_lookup()
+		 * has done the check against RNF_ROOT, so we can be sure
 		 * that we're not returning a root node here.
 		 */
 		RT_LOCK_SPIN(RT(rn));
-		if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) {
-			if (!lookup_only)
-				(void) in_validate(rn);
+		if (rt_validate(RT(rn))) {
 			RT_ADDREF_LOCKED(RT(rn));
 			RT_UNLOCK(RT(rn));
 		} else {
@@ -2417,6 +2701,25 @@ done:
 	return (RT(rn));
 }
 
+boolean_t
+rt_validate(struct rtentry *rt)
+{
+	RT_LOCK_ASSERT_HELD(rt);
+
+	if (!(rt->rt_flags & RTF_CONDEMNED)) {
+		int af = rt_key(rt)->sa_family;
+
+		if (af == AF_INET)
+			(void) in_validate(RN(rt));
+		else if (af == AF_INET6)
+			(void) in6_validate(RN(rt));
+	} else {
+		rt = NULL;
+	}
+
+	return (rt != NULL);
+}
+
 /*
  * Set up a routing table entry, normally
  * for an interface.
@@ -2440,8 +2743,14 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 	struct sockaddr *deldst;
 	struct mbuf *m = 0;
 	struct rtentry *nrt = 0;
+	u_int32_t ifa_flags;
 	int error;
 
+	/*
+	 * Holding rnh_lock here prevents the possibility of ifa from
+	 * changing (e.g. in_ifinit), so it is safe to access its
+	 * ifa_{dst}addr (here and down below) without locking.
+	 */
 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
 	/*
 	 * If it's a delete, check that if it exists, it's on the correct
@@ -2513,8 +2822,11 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 	/*
 	 * Do the actual request
 	 */
+	IFA_LOCK_SPIN(ifa);
+	ifa_flags = ifa->ifa_flags;
+	IFA_UNLOCK(ifa);
 	error = rtrequest_locked(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
-			flags | ifa->ifa_flags, &nrt);
+			flags | ifa_flags, &nrt);
 	if (m)
 		(void) m_free(m);
 	/*
@@ -2544,6 +2856,9 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 		 * have already existed or something. (XXX)
 		 */
 		if (rt->rt_ifa != ifa) {
+			void (*ifa_rtrequest)
+			    (int, struct rtentry *, struct sockaddr *);
+
 			if (!(rt->rt_ifa->ifa_ifp->if_flags &
 			    (IFF_POINTOPOINT|IFF_LOOPBACK)))
 				printf("rtinit: wrong ifa (%p) was (%p)\n",
@@ -2553,22 +2868,31 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 			 * remove anything it has associated with
 			 * this route and ifaddr.
 			 */
-			if (rt->rt_ifa->ifa_rtrequest)
-			    rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
+			IFA_LOCK_SPIN(rt->rt_ifa);
+			ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
+			IFA_UNLOCK(rt->rt_ifa);
+			if (ifa_rtrequest != NULL)
+				ifa_rtrequest(RTM_DELETE, rt, SA(0));
 			/*
 			 * Set the route's ifa.
 			 */
 			rtsetifa(rt, ifa);
-#if IFNET_ROUTE_REFCNT
-			/*
-			 * Adjust route ref count for the interfaces.
-			 */
-			if (rt->rt_if_ref_fn != NULL &&
-			    rt->rt_ifp != ifa->ifa_ifp) {
-				rt->rt_if_ref_fn(ifa->ifa_ifp, 1);
-				rt->rt_if_ref_fn(rt->rt_ifp, -1);
+
+			if (rt->rt_ifp != ifa->ifa_ifp) {
+				/*
+				 * Purge any link-layer info caching.
+				 */
+				if (rt->rt_llinfo_purge != NULL)
+					rt->rt_llinfo_purge(rt);
+				/*
+				 * Adjust route ref count for the interfaces.
+				 */
+				if (rt->rt_if_ref_fn != NULL) {
+					rt->rt_if_ref_fn(ifa->ifa_ifp, 1);
+					rt->rt_if_ref_fn(rt->rt_ifp, -1);
+				}
 			}
-#endif /* IFNET_ROUTE_REFCNT */
+
 			/*
 			 * And substitute in references to the ifaddr
 			 * we are adding.
@@ -2579,8 +2903,11 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 			 * Now ask the protocol to check if it needs
 			 * any special processing in its new form.
 			 */
-			if (ifa->ifa_rtrequest)
-			    ifa->ifa_rtrequest(RTM_ADD, rt, SA(0));
+			IFA_LOCK_SPIN(ifa);
+			ifa_rtrequest = ifa->ifa_rtrequest;
+			IFA_UNLOCK(ifa);
+			if (ifa_rtrequest != NULL)
+				ifa_rtrequest(RTM_ADD, rt, SA(0));
 		}
 		/*
 		 * notify any listenning routing agents of the change
@@ -2604,7 +2931,6 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
 u_int64_t
 rt_expiry(struct rtentry *rt, u_int64_t base, u_int32_t delta)
 {
-#if IFNET_ROUTE_REFCNT
 	u_int64_t retval;
 
 	/*
@@ -2619,10 +2945,29 @@ rt_expiry(struct rtentry *rt, u_int64_t base, u_int32_t delta)
 		retval = base + MIN(rt_if_idle_expire_timeout, delta);
 
 	return (retval);
-#else
-#pragma unused(rt)
-	return (base + delta);
-#endif /* IFNET_ROUTE_REFCNT */
+}
+
+void
+rt_set_idleref(struct rtentry *rt)
+{
+	RT_LOCK_ASSERT_HELD(rt);
+
+	rt_clear_idleref(rt);
+	rt->rt_if_ref_fn = rte_if_ref;
+	rt->rt_if_ref_fn(rt->rt_ifp, 1);
+	rt->rt_flags |= RTF_IFREF;
+}
+
+void
+rt_clear_idleref(struct rtentry *rt)
+{
+	RT_LOCK_ASSERT_HELD(rt);
+
+	if (rt->rt_if_ref_fn != NULL) {
+		rt->rt_if_ref_fn(rt->rt_ifp, -1);
+		rt->rt_flags &= ~RTF_IFREF;
+		rt->rt_if_ref_fn = NULL;
+	}
 }
 
 static void
@@ -2703,7 +3048,6 @@ rte_free(struct rtentry *p)
 	zfree(rte_zone, p);
 }
 
-#if IFNET_ROUTE_REFCNT
 static void
 rte_if_ref(struct ifnet *ifp, int cnt)
 {
@@ -2749,7 +3093,6 @@ rte_if_ref(struct ifnet *ifp, int cnt)
 		kev_post_msg(&ev_msg);
 	}
 }
-#endif /* IFNET_ROUTE_REFCNT */
 
 static inline struct rtentry *
 rte_alloc_debug(void)
@@ -2799,3 +3142,50 @@ ctrace_record(ctrace_t *tr)
 	bzero(tr->pc, sizeof (tr->pc));
 	(void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE);
 }
+
+__private_extern__ void
+route_copyout(
+	struct route *dst,
+	const struct route *src,
+	size_t length)
+{
+	/* Copy everything (rt, dst, flags) from ifnet */
+	bcopy(src, dst, length);
+
+	/* Hold one reference for the local copy of struct route */
+	if (dst->ro_rt != NULL)
+		RT_ADDREF(dst->ro_rt);
+}
+
+__private_extern__ void
+route_copyin(
+	struct route *src,
+	struct route *dst,
+	size_t length)
+{
+	/* No cached route in the ifnet? */
+	if (dst->ro_rt == NULL) {
+		/*
+		 * Copy everything (rt, dst, flags) from ip_forward();
+		 * the reference to the route was held at the time
+		 * it was allocated and is kept intact.
+		 */
+		bcopy(src, dst, length);
+	} else if (src->ro_rt != NULL) {
+		/*
+		 * If the same, update just the ro_flags and ditch the one
+		 * in the local copy.  Else ditch the one that is currently
+		 * cached, and cache the new route.
+		 */
+		if (dst->ro_rt == src->ro_rt) {
+			dst->ro_flags = src->ro_flags;
+			rtfree(src->ro_rt);
+		} else {
+			rtfree(dst->ro_rt);
+			bcopy(src, dst, length);
+		}
+	}
+
+	/* This function consumes the reference */
+	src->ro_rt = NULL;
+}
diff --git a/bsd/net/route.h b/bsd/net/route.h
index 71eb4f8f8..47aa3f902 100644
--- a/bsd/net/route.h
+++ b/bsd/net/route.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,6 +95,15 @@ struct route {
 
 #define	ROF_SRCIF_SELECTED	0x1 /* source interface was selected */
 
+/*
+ * Route reachability info (private)
+ */
+struct rt_reach_info {
+	u_int32_t		ri_refcnt;	/* reference count */
+	u_int32_t		ri_probes;	/* total # of probes */
+	u_int64_t		ri_snd_expire;	/* transmit expiration (calendar) time */
+	u_int64_t		ri_rcv_expire;	/* receive expiration (calendar) time */
+};
 #else
 struct route;
 #endif /* PRIVATE */
@@ -159,6 +168,9 @@ struct rtentry {
 	struct	ifaddr *rt_ifa;		/* the answer: interface addr to use */
 	struct	sockaddr *rt_genmask;	/* for generation of cloned routes */
 	void	*rt_llinfo;		/* pointer to link level info cache */
+	void	(*rt_llinfo_get_ri)	/* llinfo get reachability info fn */
+	    (struct rtentry *, struct rt_reach_info *);
+	void	(*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */
 	void	(*rt_llinfo_free)(void *); /* link level info free function */
 	struct	rt_metrics rt_rmx;	/* metrics used by rx'ing protocols */
 	struct	rtentry *rt_gwroute;	/* implied entry for gatewayed routes */
@@ -168,10 +180,15 @@ struct rtentry {
 	 * See bsd/net/route.c for synchronization notes.
 	 */
 	decl_lck_mtx_data(, rt_lock);	/* lock for routing entry */
-#if IFNET_ROUTE_REFCNT
+	struct nstat_counts	*rt_stats;
 	void	(*rt_if_ref_fn)(struct ifnet *, int); /* interface ref func */
-#endif /* IFNET_ROUTE_REFCNT */
+
+	uint64_t rt_expire;		/* expiration time in uptime seconds */
+	uint64_t base_calendartime;	/* calendar time upon entry creation */
+	uint64_t base_uptime;/* 	uptime upon entry creation */
 };
+
+extern void rt_setexpire(struct rtentry *, uint64_t);
 #endif /* KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -251,6 +268,27 @@ struct rt_msghdr2 {
 	struct rt_metrics rtm_rmx;	/* metrics themselves */
 };
 
+#ifdef PRIVATE
+/*
+ * Extended routing message header (private).
+ */
+struct rt_msghdr_ext {
+	u_short	rtm_msglen;	/* to skip over non-understood messages */
+	u_char	rtm_version;	/* future binary compatibility */
+	u_char	rtm_type;	/* message type */
+	u_int32_t rtm_index;	/* index for associated ifp */
+	u_int32_t rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
+	u_int32_t rtm_reserved;	/* for future use */
+	u_int32_t rtm_addrs;	/* bitmask identifying sockaddrs in msg */
+	pid_t	rtm_pid;	/* identify sender */
+	int	rtm_seq;	/* for sender to identify action */
+	int	rtm_errno;	/* why failed */
+	u_int32_t rtm_use;	/* from rtentry */
+	u_int32_t rtm_inits;	/* which metrics we are initializing */
+	struct rt_metrics rtm_rmx;	/* metrics themselves */
+	struct rt_reach_info rtm_ri;	/* route reachability info */
+};
+#endif /* PRIVATE */
 
 #define RTM_VERSION	5	/* Up the ante and ignore older versions */
 
@@ -279,6 +317,9 @@ struct rt_msghdr2 {
 #define RTM_IFINFO2	0x12	/* */
 #define RTM_NEWMADDR2	0x13	/* */
 #define RTM_GET2	0x14	/* */
+#ifdef PRIVATE
+#define	RTM_GET_EXT	0x15
+#endif /* PRIVATE */
 
 /*
  * Bitmask values for rtm_inits and rmx_locks.
@@ -445,18 +486,16 @@ extern void rt_missmsg(int, struct rt_addrinfo *, int, int);
 extern void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 extern void rt_newmaddrmsg(int, struct ifmultiaddr *);
 extern int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
-extern void set_primary_ifscope(unsigned int);
-extern unsigned int get_primary_ifscope(void);
-extern boolean_t rt_inet_default(struct rtentry *, struct sockaddr *);
+extern void set_primary_ifscope(int, unsigned int);
+extern unsigned int get_primary_ifscope(int);
+extern boolean_t rt_primary_default(struct rtentry *, struct sockaddr *);
 extern struct rtentry *rt_lookup(boolean_t, struct sockaddr *,
     struct sockaddr *, struct radix_node_head *, unsigned int);
 extern void rtalloc(struct route *);
+extern void rtalloc_scoped(struct route *, unsigned int);
 extern void rtalloc_ign(struct route *, uint32_t);
-extern void rtalloc_ign_locked(struct route *, uint32_t);
 extern void rtalloc_scoped_ign(struct route *, uint32_t, unsigned int);
-extern void rtalloc_scoped_ign_locked(struct route *, uint32_t, unsigned int);
 extern struct rtentry *rtalloc1(struct sockaddr *, int, uint32_t);
-extern struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t);
 extern struct rtentry *rtalloc1_scoped(struct sockaddr *, int, uint32_t,
     unsigned int);
 extern struct rtentry *rtalloc1_scoped_locked(struct sockaddr *, int,
@@ -478,19 +517,30 @@ extern void rtredirect(struct ifnet *, struct sockaddr *, struct sockaddr *,
     struct sockaddr *, int, struct sockaddr *, struct rtentry **);
 extern int rtrequest(int, struct sockaddr *,
     struct sockaddr *, struct sockaddr *, int, struct rtentry **);
+extern int rtrequest_scoped(int, struct sockaddr *, struct sockaddr *,
+    struct sockaddr *, int, struct rtentry **, unsigned int);
 extern int rtrequest_locked(int, struct sockaddr *,
     struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 extern int rtrequest_scoped_locked(int, struct sockaddr *, struct sockaddr *,
     struct sockaddr *, int, struct rtentry **, unsigned int);
-extern unsigned int sa_get_ifscope(struct sockaddr *);
+extern void sin_set_ifscope(struct sockaddr *, unsigned int);
+extern unsigned int sin_get_ifscope(struct sockaddr *);
+extern unsigned int sin6_get_ifscope(struct sockaddr *);
 extern void rt_lock(struct rtentry *, boolean_t);
 extern void rt_unlock(struct rtentry *);
-extern struct sockaddr *rtm_scrub_ifscope(int, struct sockaddr *,
+extern struct sockaddr *rtm_scrub_ifscope(int, int, struct sockaddr *,
     struct sockaddr *, struct sockaddr_storage *);
 extern u_int64_t rt_expiry(struct rtentry *, u_int64_t, u_int32_t);
-#if IFNET_ROUTE_REFCNT
+extern void rt_set_idleref(struct rtentry *);
+extern void rt_clear_idleref(struct rtentry *);
 extern void rt_aggdrain(int);
-#endif /* IFNET_ROUTE_REFCNT */
+extern boolean_t rt_validate(struct rtentry *);
+
+#ifdef XNU_KERNEL_PRIVATE
+extern void route_copyin(struct route *src, struct route *dst, size_t length);
+extern void route_copyout(struct route *dst, const struct route *src, size_t length);
+#endif /* XNU_KERNEL_PRIVATE */
+
 #endif /* KERNEL_PRIVATE */
 
 #endif
diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c
index 819f8349c..42b20064a 100644
--- a/bsd/net/rtsock.c
+++ b/bsd/net/rtsock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,6 +73,7 @@
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 #include <kern/lock.h>
 
 #include <net/if.h>
@@ -95,7 +96,6 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
 static struct	sockaddr route_dst = { 2, PF_ROUTE, { 0, } };
 static struct	sockaddr route_src = { 2, PF_ROUTE, { 0, } };
 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, { 0, } };
-static struct	sockproto route_proto = { PF_ROUTE,  0 };
 
 struct walkarg {
 	int	w_tmemsize;
@@ -108,30 +108,21 @@ static struct mbuf *rt_msg1(int, struct rt_addrinfo *);
 static int	rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *);
 static int	rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
+static int	sysctl_dumpentry_ext(struct radix_node *rn, void *vw);
 static int	sysctl_iflist(int af, struct walkarg *w);
 static int	sysctl_iflist2(int af, struct walkarg *w);
-static int	 route_output(struct mbuf *, struct socket *);
-static void	 rt_setmetrics(u_int32_t, struct rt_metrics *, struct rt_metrics *);
+static int	route_output(struct mbuf *, struct socket *);
+static void	rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *);
+static void	rt_getmetrics(struct rtentry *, struct rt_metrics *);
 static void	rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *,
 		    struct sockaddr *, unsigned int);
-#if IFNET_ROUTE_REFCNT
 static void rt_drainall(void);
-#endif /* IFNET_ROUTE_REFCNT */
 
 #define	SIN(sa)		((struct sockaddr_in *)(size_t)(sa))
 
-/* Sleazy use of local variables throughout file, warning!!!! */
-#define dst	info.rti_info[RTAX_DST]
-#define gate	info.rti_info[RTAX_GATEWAY]
-#define netmask	info.rti_info[RTAX_NETMASK]
-#define genmask	info.rti_info[RTAX_GENMASK]
-#define ifpaddr	info.rti_info[RTAX_IFP]
-#define ifaaddr	info.rti_info[RTAX_IFA]
-#define brdaddr	info.rti_info[RTAX_BRD]
 
 SYSCTL_NODE(_net, OID_AUTO, idle, CTLFLAG_RW, 0, "idle network monitoring");
 
-#if IFNET_ROUTE_REFCNT
 static struct timeval last_ts;
 
 SYSCTL_NODE(_net_idle, OID_AUTO, route, CTLFLAG_RW, 0, "idle route monitoring");
@@ -140,7 +131,15 @@ static int rt_if_idle_drain_interval = RT_IF_IDLE_DRAIN_INTERVAL;
 SYSCTL_INT(_net_idle_route, OID_AUTO, drain_interval, CTLFLAG_RW,
     &rt_if_idle_drain_interval, 0, "Default interval for draining "
     "routes when doing interface idle reference counting.");
-#endif /* IFNET_ROUTE_REFCNT */
+
+/* 
+ * This macro calculates skew in wall clock, just in case the user changes the
+ * system time. This skew adjustment is required because we now keep the route
+ * expiration times in uptime terms in the kernel, but the userland still
+ * expects expiration times in terms of calendar times.
+ */
+#define CALCULATE_CLOCKSKEW(cc, ic, cu, iu)\
+    ((cc.tv_sec - ic) - (cu - iu))
 
 /*
  * It really doesn't make any sense at all for this code to share much
@@ -322,6 +321,7 @@ route_output(struct mbuf *m, struct socket *so)
 	struct radix_node_head *rnh;
 	struct rt_addrinfo info;
 	int len, error = 0;
+	sa_family_t dst_sa_family = 0;
 	struct ifnet *ifp = NULL;
 #ifndef __APPLE__
 	struct proc  *curproc = current_proc();
@@ -344,17 +344,17 @@ route_output(struct mbuf *m, struct socket *so)
 	len = m->m_pkthdr.len;
 	if (len < sizeof(*rtm) ||
 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
-		dst = NULL;
+		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
 	R_Malloc(rtm, struct rt_msghdr *, len);
 	if (rtm == NULL) {
-		dst = NULL;
+		info.rti_info[RTAX_DST] = NULL;
 		senderr(ENOBUFS);
 	}
 	m_copydata(m, 0, len, (caddr_t)rtm);
 	if (rtm->rtm_version != RTM_VERSION) {
-		dst = NULL;
+		info.rti_info[RTAX_DST] = NULL;
 		senderr(EPROTONOSUPPORT);
 	}
 
@@ -374,51 +374,52 @@ route_output(struct mbuf *m, struct socket *so)
 	 * may perform operations other than RTM_GET
 	 */
 	if (rtm->rtm_type != RTM_GET && (so->so_state & SS_PRIV) == 0) {
-		dst = NULL;
+		info.rti_info[RTAX_DST] = NULL;
 		senderr(EPERM);
 	}
 
 	rtm->rtm_pid = proc_selfpid();
 	info.rti_addrs = rtm->rtm_addrs;
 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
-		dst = NULL;
+		info.rti_info[RTAX_DST] = NULL;
 		senderr(EINVAL);
 	}
-	if (dst == NULL || (dst->sa_family >= AF_MAX) ||
-	    (gate != NULL && (gate->sa_family >= AF_MAX))) {
+	if (info.rti_info[RTAX_DST] == NULL || (info.rti_info[RTAX_DST]->sa_family >= AF_MAX) ||
+	    (info.rti_info[RTAX_GATEWAY] != NULL && (info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))) {
 		senderr(EINVAL);
 	}
 
-	if (dst->sa_family == AF_INET && dst->sa_len != sizeof (dst_in)) {
+	if (info.rti_info[RTAX_DST]->sa_family == AF_INET && info.rti_info[RTAX_DST]->sa_len != sizeof (dst_in)) {
 		/* At minimum, we need up to sin_addr */
-		if (dst->sa_len < offsetof(struct sockaddr_in, sin_zero))
+		if (info.rti_info[RTAX_DST]->sa_len < offsetof(struct sockaddr_in, sin_zero))
 			senderr(EINVAL);
 		bzero(&dst_in, sizeof (dst_in));
 		dst_in.sin_len = sizeof (dst_in);
 		dst_in.sin_family = AF_INET;
-		dst_in.sin_port = SIN(dst)->sin_port;
-		dst_in.sin_addr = SIN(dst)->sin_addr;
-		dst = (struct sockaddr *)&dst_in;
+		dst_in.sin_port = SIN(info.rti_info[RTAX_DST])->sin_port;
+		dst_in.sin_addr = SIN(info.rti_info[RTAX_DST])->sin_addr;
+		info.rti_info[RTAX_DST] = (struct sockaddr *)&dst_in;
+		dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
 	}
 
-	if (gate != NULL &&
-	    gate->sa_family == AF_INET && gate->sa_len != sizeof (gate_in)) {
+	if (info.rti_info[RTAX_GATEWAY] != NULL &&
+	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET && info.rti_info[RTAX_GATEWAY]->sa_len != sizeof (gate_in)) {
 		/* At minimum, we need up to sin_addr */
-		if (gate->sa_len < offsetof(struct sockaddr_in, sin_zero))
+		if (info.rti_info[RTAX_GATEWAY]->sa_len < offsetof(struct sockaddr_in, sin_zero))
 			senderr(EINVAL);
 		bzero(&gate_in, sizeof (gate_in));
 		gate_in.sin_len = sizeof (gate_in);
 		gate_in.sin_family = AF_INET;
-		gate_in.sin_port = SIN(gate)->sin_port;
-		gate_in.sin_addr = SIN(gate)->sin_addr;
-		gate = (struct sockaddr *)&gate_in;
+		gate_in.sin_port = SIN(info.rti_info[RTAX_GATEWAY])->sin_port;
+		gate_in.sin_addr = SIN(info.rti_info[RTAX_GATEWAY])->sin_addr;
+		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gate_in;
 	}
 
-	if (genmask) {
+	if (info.rti_info[RTAX_GENMASK]) {
 		struct radix_node *t;
-		t = rn_addmask((caddr_t)genmask, 0, 1);
-		if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0)
-			genmask = (struct sockaddr *)(t->rn_key);
+		t = rn_addmask((caddr_t)info.rti_info[RTAX_GENMASK], 0, 1);
+		if (t && Bcmp(info.rti_info[RTAX_GENMASK], t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0)
+			info.rti_info[RTAX_GENMASK] = (struct sockaddr *)(t->rn_key);
 		else
 			senderr(ENOBUFS);
 	}
@@ -427,16 +428,27 @@ route_output(struct mbuf *m, struct socket *so)
 	 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
 	 */
 	if (rtm->rtm_flags & RTF_IFSCOPE) {
-		/* Scoped routing is for AF_INET only */
-		if (dst->sa_family != AF_INET)
+		if (info.rti_info[RTAX_DST]->sa_family != AF_INET && info.rti_info[RTAX_DST]->sa_family != AF_INET6)
 			senderr(EINVAL);
 		ifscope = rtm->rtm_index;
 	}
 
+	/*
+	 * For AF_INET, always zero out the embedded scope ID.  If this is
+	 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
+	 * flag and the corresponding rtm_index value.  This is to prevent
+	 * false interpretation of the scope ID because it's using the sin_zero
+	 * field, which might not be properly cleared by the requestor.
+	 */
+	if (info.rti_info[RTAX_DST]->sa_family == AF_INET)
+		sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE);
+	if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET)
+		sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE);
+
 	switch (rtm->rtm_type) {
 
 		case RTM_ADD:
-			if (gate == NULL)
+			if (info.rti_info[RTAX_GATEWAY] == NULL)
 				senderr(EINVAL);
 
 #ifdef __APPLE__
@@ -454,21 +466,21 @@ route_output(struct mbuf *m, struct socket *so)
 {
 #define satosinaddr(sa) (((struct sockaddr_in *)sa)->sin_addr.s_addr)
 	
-			if (check_routeselfref && (dst && dst->sa_family == AF_INET) && 
-				(netmask && satosinaddr(netmask) == INADDR_BROADCAST) &&
-				(gate && satosinaddr(dst) == satosinaddr(gate))) {
+			if (check_routeselfref && (info.rti_info[RTAX_DST] && info.rti_info[RTAX_DST]->sa_family == AF_INET) && 
+				(info.rti_info[RTAX_NETMASK] && satosinaddr(info.rti_info[RTAX_NETMASK]) == INADDR_BROADCAST) &&
+				(info.rti_info[RTAX_GATEWAY] && satosinaddr(info.rti_info[RTAX_DST]) == satosinaddr(info.rti_info[RTAX_GATEWAY]))) {
 					log(LOG_WARNING, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n",
-						(ntohl(satosinaddr(gate)>>24))&0xff,
-						(ntohl(satosinaddr(gate)>>16))&0xff,
-						(ntohl(satosinaddr(gate)>>8))&0xff,
-						(ntohl(satosinaddr(gate)))&0xff);
+						(ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>24))&0xff,
+						(ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>16))&0xff,
+						(ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>8))&0xff,
+						(ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])))&0xff);
 						
 					senderr(EINVAL);
 			}
 }
 #endif	
-			error = rtrequest_scoped_locked(RTM_ADD, dst, gate,
-			    netmask, rtm->rtm_flags, &saved_nrt, ifscope);
+			error = rtrequest_scoped_locked(RTM_ADD, info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY],
+			    info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, ifscope);
 			if (error == 0 && saved_nrt) {
 				RT_LOCK(saved_nrt);
 #ifdef __APPLE__
@@ -499,24 +511,24 @@ route_output(struct mbuf *m, struct socket *so)
 				 * rarely encountered.
 				 * dwiggins@bbn.com
 				 */
-	
-				rt_setif(saved_nrt, ifpaddr, ifaaddr, gate,
+       
+				rt_setif(saved_nrt, info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY],
 				    ifscope);
 #endif
 				rt_setmetrics(rtm->rtm_inits,
-					&rtm->rtm_rmx, &saved_nrt->rt_rmx);
+				    &rtm->rtm_rmx, saved_nrt);
 				saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
 				saved_nrt->rt_rmx.rmx_locks |=
-					(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
-				saved_nrt->rt_genmask = genmask;
+				    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
+				saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
 				RT_REMREF_LOCKED(saved_nrt);
 				RT_UNLOCK(saved_nrt);
 			}
 			break;
 
 		case RTM_DELETE:
-			error = rtrequest_scoped_locked(RTM_DELETE, dst,
-			    gate, netmask, rtm->rtm_flags, &saved_nrt, ifscope);
+			error = rtrequest_scoped_locked(RTM_DELETE, info.rti_info[RTAX_DST],
+			    info.rti_info[RTAX_GATEWAY], info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, ifscope);
 			if (error == 0) {
 				rt = saved_nrt;
 				RT_LOCK(rt);
@@ -527,18 +539,23 @@ route_output(struct mbuf *m, struct socket *so)
 		case RTM_GET:
 		case RTM_CHANGE:
 		case RTM_LOCK:
-			if ((rnh = rt_tables[dst->sa_family]) == NULL)
+			if ((rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]) == NULL)
 				senderr(EAFNOSUPPORT);
 
 			/*
 			 * Lookup the best match based on the key-mask pair;
 			 * callee adds a reference and checks for root node.
 			 */
-			rt = rt_lookup(TRUE, dst, netmask, rnh, ifscope);
+			rt = rt_lookup(TRUE, info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh, ifscope);
 			if (rt == NULL)
 				senderr(ESRCH);
 			RT_LOCK(rt);
 
+			/*
+			 * Holding rnh_lock here prevents the possibility of
+			 * ifa from changing (e.g. in_ifinit), so it is safe
+			 * to access its ifa_addr (down below) without locking.
+			 */
 			switch(rtm->rtm_type) {
 
 				case RTM_GET: {
@@ -546,52 +563,63 @@ route_output(struct mbuf *m, struct socket *so)
 				report:
 					ifa2 = NULL;
 					RT_LOCK_ASSERT_HELD(rt);
-					dst = rt_key(rt);
-					gate = rt->rt_gateway;
-					netmask = rt_mask(rt);
-					genmask = rt->rt_genmask;
+					info.rti_info[RTAX_DST] = rt_key(rt);
+					dst_sa_family = info.rti_info[RTAX_DST]->sa_family;
+					info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+					info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+					info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
 					if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
 						ifp = rt->rt_ifp;
 						if (ifp) {
 							ifnet_lock_shared(ifp);
-							ifa2 = ifp->if_addrhead.tqh_first;
-							ifpaddr = ifa2->ifa_addr;
-							ifaref(ifa2);
+							ifa2 = ifp->if_lladdr;
+							info.rti_info[RTAX_IFP] = ifa2->ifa_addr;
+							IFA_ADDREF(ifa2);
 							ifnet_lock_done(ifp);
-							ifaaddr = rt->rt_ifa->ifa_addr;
+							info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 							rtm->rtm_index = ifp->if_index;
 						} else {
-							ifpaddr = 0;
-							ifaaddr = 0;
+							info.rti_info[RTAX_IFP] = NULL;
+							info.rti_info[RTAX_IFA] = NULL;
 						}
+					} else if ((ifp = rt->rt_ifp) != NULL) {
+						rtm->rtm_index = ifp->if_index;
 					}
+					if (ifa2 != NULL)
+						IFA_LOCK(ifa2);
 					len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
 						(struct walkarg *)0);
+					if (ifa2 != NULL)
+						IFA_UNLOCK(ifa2);
 					if (len > rtm->rtm_msglen) {
 						struct rt_msghdr *new_rtm;
 						R_Malloc(new_rtm, struct rt_msghdr *, len);
 						if (new_rtm == 0) {
 							RT_UNLOCK(rt);
 							if (ifa2 != NULL)
-								ifafree(ifa2);
+								IFA_REMREF(ifa2);
 							senderr(ENOBUFS);
 						}
 						Bcopy(rtm, new_rtm, rtm->rtm_msglen);
 						R_Free(rtm); rtm = new_rtm;
 					}
+					if (ifa2 != NULL)
+						IFA_LOCK(ifa2);
 					(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
 						(struct walkarg *)0);
+					if (ifa2 != NULL)
+						IFA_UNLOCK(ifa2);
 					rtm->rtm_flags = rt->rt_flags;
-					rtm->rtm_rmx = rt->rt_rmx;
+					rt_getmetrics(rt, &rtm->rtm_rmx);
 					rtm->rtm_addrs = info.rti_addrs;
 					if (ifa2 != NULL)
-						ifafree(ifa2);
+						IFA_REMREF(ifa2);
 					}
 					break;
 
 				case RTM_CHANGE:
-					if (gate && (error = rt_setgate(rt,
-					    rt_key(rt), gate))) {
+					if (info.rti_info[RTAX_GATEWAY] && (error = rt_setgate(rt,
+					    rt_key(rt), info.rti_info[RTAX_GATEWAY]))) {
 						RT_UNLOCK(rt);
 						senderr(error);
 					}
@@ -602,8 +630,8 @@ route_output(struct mbuf *m, struct socket *so)
 					 * flags on the default route without changing the
 					 * default gateway.  Changing flags still doesn't work.
 					 */
-					if ((rt->rt_flags & RTF_GATEWAY) && !gate)
-						gate = rt->rt_gateway;
+					if ((rt->rt_flags & RTF_GATEWAY) && !info.rti_info[RTAX_GATEWAY])
+						info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		
 #ifdef __APPLE__
 					/*
@@ -611,19 +639,19 @@ route_output(struct mbuf *m, struct socket *so)
 					 * equivalent to the code found at this very spot
 					 * in BSD.
 					 */
-					rt_setif(rt, ifpaddr, ifaaddr, gate,
+					rt_setif(rt, info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY],
 					    ifscope);
 #endif
 		
 					rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
-							&rt->rt_rmx);
+							rt);
 #ifndef __APPLE__
 					/* rt_setif, called above does this for us on darwin */
 					if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
-						   rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate);
+						rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info.rti_info[RTAX_GATEWAY]);
 #endif
-					if (genmask)
-						rt->rt_genmask = genmask;
+					if (info.rti_info[RTAX_GENMASK])
+						rt->rt_genmask = info.rti_info[RTAX_GENMASK];
 					/*
 					 * Fall into
 					 */
@@ -684,10 +712,11 @@ flush:
 		if (error)
 			return error;
 	} else {
+		struct	sockproto	route_proto = {PF_ROUTE, 0};
 		if (rp)
 			rp->rcb_proto.sp_family = 0; /* Avoid us */
-		if (dst)
-			route_proto.sp_protocol = dst->sa_family;
+		if (dst_sa_family != 0)
+			route_proto.sp_protocol = dst_sa_family;
 		if (m) {
 			socket_unlock(so, 0);
 			raw_input(m, &route_proto, &route_src, &route_dst);
@@ -700,10 +729,28 @@ flush:
 	return (error);
 }
 
+void
+rt_setexpire(struct rtentry *rt, uint64_t expiry)
+{
+	/* set both rt_expire and rmx_expire */
+	rt->rt_expire = expiry;
+	if (expiry) {
+		rt->rt_rmx.rmx_expire = expiry + rt->base_calendartime -
+		    rt->base_uptime;
+	} else
+		rt->rt_rmx.rmx_expire = 0;
+}
+
 static void
-rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rt_metrics *out)
+rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out)
 {
-#define metric(f, e) if (which & (f)) out->e = in->e;
+	struct timeval curr_calendar_time;
+	uint64_t curr_uptime;
+	
+	getmicrotime(&curr_calendar_time);
+        curr_uptime = net_uptime();
+
+#define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
 	metric(RTV_RPIPE, rmx_recvpipe);
 	metric(RTV_SPIPE, rmx_sendpipe);
 	metric(RTV_SSTHRESH, rmx_ssthresh);
@@ -713,17 +760,65 @@ rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rt_metrics *out)
 	metric(RTV_MTU, rmx_mtu);
 	metric(RTV_EXPIRE, rmx_expire);
 #undef metric
+				
+	if (out->rt_rmx.rmx_expire > 0) {
+		/* account for system time change */
+		curr_uptime = net_uptime();
+		getmicrotime(&curr_calendar_time);
+		out->base_calendartime +=
+		    CALCULATE_CLOCKSKEW(curr_calendar_time,
+		    out->base_calendartime,
+		    curr_uptime, out->base_uptime);
+		rt_setexpire(out, 
+		    out->rt_rmx.rmx_expire - 
+		    out->base_calendartime + 
+		    out->base_uptime);
+	} else {
+		rt_setexpire(out, 0);
+	}
+	
+	VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0);
+	VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0);
+}
+
+static void
+rt_getmetrics(struct rtentry *in, struct rt_metrics *out)
+{
+	struct timeval curr_calendar_time;
+	uint64_t curr_uptime;
+
+	VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0);
+	VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0);
+	
+	*out = in->rt_rmx; 
+	
+	if (in->rt_expire) {
+		/* account for system time change */
+		getmicrotime(&curr_calendar_time);
+		curr_uptime = net_uptime();
+
+		in->base_calendartime +=
+		    CALCULATE_CLOCKSKEW(curr_calendar_time,
+			in->base_calendartime,
+			curr_uptime, in->base_uptime);
+	
+		out->rmx_expire = in->base_calendartime +
+		    in->rt_expire - in->base_uptime;
+	} else
+		out->rmx_expire = 0;
 }
 
 /*
- * Set route's interface given ifpaddr, ifaaddr, and gateway.
+ * Set route's interface given info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], and gateway.
  */
 static void
 rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
     struct sockaddr *Gate, unsigned int ifscope)
 {
-	struct ifaddr *ifa = 0;
-	struct ifnet  *ifp = 0;
+	struct ifaddr *ifa = NULL;
+	struct ifnet *ifp = NULL;
+	void (*ifa_rtrequest)
+	    (int, struct rtentry *, struct sockaddr *);
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
@@ -740,6 +835,9 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
 	/* Add an extra ref for ourselves */
 	RT_ADDREF_LOCKED(rt);
 
+	/* Become a regular mutex, just in case */
+	RT_CONVERT_LOCK(rt);
+
 	/*
 	 * New gateway could require new ifaddr, ifp; flags may also
 	 * be different; ifp may be specified by ll sockaddr when
@@ -747,11 +845,11 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
 	 */
 	if (Ifpaddr && (ifa = ifa_ifwithnet_scoped(Ifpaddr, ifscope)) &&
 	    (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) {
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 		ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp);
 	} else {
 		if (ifa) {
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = 0;
 		}
 		if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) {
@@ -761,7 +859,7 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
 				ifnet_lock_shared(ifp);
 				ifa = TAILQ_FIRST(&ifp->if_addrhead);
 				if (ifa != NULL)
-					ifaref(ifa);
+					IFA_ADDREF(ifa);
 				ifnet_lock_done(ifp);
 			}
 		} else if (Ifaaddr &&
@@ -783,7 +881,7 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
 			/* Don't update a defunct route */
 			if (rt->rt_flags & RTF_CONDEMNED) {
 				if (ifa != NULL)
-					ifafree(ifa);
+					IFA_REMREF(ifa);
 				/* Release extra ref */
 				RT_REMREF_LOCKED(rt);
 				return;
@@ -793,39 +891,63 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr,
 	if (ifa) {
 		struct ifaddr *oifa = rt->rt_ifa;
 		if (oifa != ifa) {
-			if (oifa && oifa->ifa_rtrequest)
-				oifa->ifa_rtrequest(RTM_DELETE, rt, Gate);
+			if (oifa != NULL) {
+				IFA_LOCK_SPIN(oifa);
+				ifa_rtrequest = oifa->ifa_rtrequest;
+				IFA_UNLOCK(oifa);
+				if (ifa_rtrequest != NULL)
+					ifa_rtrequest(RTM_DELETE, rt, Gate);
+			}
 			rtsetifa(rt, ifa);
-#if IFNET_ROUTE_REFCNT
-			/*
-			 * Adjust route ref count for the interfaces.
-			 */
-			if (rt->rt_if_ref_fn != NULL && rt->rt_ifp != ifp) {
-				rt->rt_if_ref_fn(ifp, 1);
-				rt->rt_if_ref_fn(rt->rt_ifp, -1);
+
+			if (rt->rt_ifp != ifp) {
+				/*
+				 * Purge any link-layer info caching.
+				 */
+				if (rt->rt_llinfo_purge != NULL)
+					rt->rt_llinfo_purge(rt);
+
+				/*
+				 * Adjust route ref count for the interfaces.
+				 */
+				if (rt->rt_if_ref_fn != NULL) {
+					rt->rt_if_ref_fn(ifp, 1);
+					rt->rt_if_ref_fn(rt->rt_ifp, -1);
+				}
 			}
-#endif /* IFNET_ROUTE_REFCNT */
 			rt->rt_ifp = ifp;
 			/*
 			 * If this is the (non-scoped) default route, record
 			 * the interface index used for the primary ifscope.
 			 */
-			if (rt_inet_default(rt, rt_key(rt)))
-				set_primary_ifscope(rt->rt_ifp->if_index);
+			if (rt_primary_default(rt, rt_key(rt))) {
+				set_primary_ifscope(rt_key(rt)->sa_family,
+				    rt->rt_ifp->if_index);
+			}
 			rt->rt_rmx.rmx_mtu = ifp->if_mtu;
-			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
-				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate);
-			ifafree(ifa);
+			if (rt->rt_ifa != NULL) {
+				IFA_LOCK_SPIN(rt->rt_ifa);
+				ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
+				IFA_UNLOCK(rt->rt_ifa);
+				if (ifa_rtrequest != NULL)
+					ifa_rtrequest(RTM_ADD, rt, Gate);
+			}
+			IFA_REMREF(ifa);
 			/* Release extra ref */
 			RT_REMREF_LOCKED(rt);
 			return;
 		}
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	}
 
 	/* XXX: to reset gateway to correct value, at RTM_CHANGE */
-	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
-		rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate);
+	if (rt->rt_ifa != NULL) {
+		IFA_LOCK_SPIN(rt->rt_ifa);
+		ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
+		IFA_UNLOCK(rt->rt_ifa);
+		if (ifa_rtrequest != NULL)
+			ifa_rtrequest(RTM_ADD, rt, Gate);
+	}
 
 	/* Release extra ref */
 	RT_REMREF_LOCKED(rt);
@@ -935,7 +1057,7 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo)
 				hint = rtinfo->rti_info[RTAX_IFA];
 
 			/* Scrub away any trace of embedded interface scope */
-			sa = rtm_scrub_ifscope(i, hint, sa, &ss);
+			sa = rtm_scrub_ifscope(type, i, hint, sa, &ss);
 			break;
 
 		default:
@@ -990,6 +1112,10 @@ again:
 		len = sizeof(struct ifma_msghdr2);
 		break;
 
+	case RTM_GET_EXT:
+		len = sizeof (struct rt_msghdr_ext);
+		break;
+
 	case RTM_GET2:
 		len = sizeof(struct rt_msghdr2);
 		break;
@@ -1014,7 +1140,7 @@ again:
 				hint = rtinfo->rti_info[RTAX_IFA];
 
 			/* Scrub away any trace of embedded interface scope */
-			sa = rtm_scrub_ifscope(i, hint, sa, &ss);
+			sa = rtm_scrub_ifscope(type, i, hint, sa, &ss);
 			break;
 
 		default:
@@ -1070,6 +1196,7 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
+	struct	sockproto	route_proto = {PF_ROUTE, 0};
 
 	if (route_cb.any_count == 0)
 		return;
@@ -1080,7 +1207,7 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
-	route_proto.sp_protocol = sa ? sa->sa_family : 0;
+	route_proto.sp_family = sa ? sa->sa_family : 0;
 	raw_input(m, &route_proto, &route_src, &route_dst);
 }
 
@@ -1095,6 +1222,7 @@ rt_ifmsg(
 	struct if_msghdr *ifm;
 	struct mbuf *m;
 	struct rt_addrinfo info;
+	struct	sockproto	route_proto = {PF_ROUTE, 0};
 
 	if (route_cb.any_count == 0)
 		return;
@@ -1107,7 +1235,6 @@ rt_ifmsg(
 	ifm->ifm_flags = (u_short)ifp->if_flags;
 	if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
 	ifm->ifm_addrs = 0;
-	route_proto.sp_protocol = 0;
 	raw_input(m, &route_proto, &route_src, &route_dst);
 }
 
@@ -1120,7 +1247,7 @@ rt_ifmsg(
  * copies of it.
  *
  * Since this is coming from the interface, it is expected that the
- * interface will be locked.  Caller must hold rt_lock.
+ * interface will be locked.  Caller must hold rnh_lock and rt_lock.
  */
 void
 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
@@ -1130,11 +1257,16 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 	int pass;
 	struct mbuf *m = 0;
 	struct ifnet *ifp = ifa->ifa_ifp;
+	struct	sockproto	route_proto = {PF_ROUTE, 0};
 
+	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 	RT_LOCK_ASSERT_HELD(rt);
 
 	if (route_cb.any_count == 0)
 		return;
+
+	/* Become a regular mutex, just in case */
+	RT_CONVERT_LOCK(rt);
 	for (pass = 1; pass < 3; pass++) {
 		bzero((caddr_t)&info, sizeof(info));
 		if ((cmd == RTM_ADD && pass == 1) ||
@@ -1142,21 +1274,32 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 			struct ifa_msghdr *ifam;
 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
 
-			/* Lock ifp for if_addrhead */
+			/* Lock ifp for if_lladdr */
 			ifnet_lock_shared(ifp);
-			ifaaddr = sa = ifa->ifa_addr;
-			ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr;
-			netmask = ifa->ifa_netmask;
-			brdaddr = ifa->ifa_dstaddr;
+			IFA_LOCK(ifa);
+			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
+			/*
+			 * Holding ifnet lock here prevents the link address
+			 * from changing contents, so no need to hold its
+			 * lock.  The link address is always present; it's
+			 * never freed.
+			 */
+			info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
+			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
 			if ((m = rt_msg1(ncmd, &info)) == NULL) {
+				IFA_UNLOCK(ifa);
 				ifnet_lock_done(ifp);
 				continue;
 			}
+			IFA_UNLOCK(ifa);
 			ifnet_lock_done(ifp);
 			ifam = mtod(m, struct ifa_msghdr *);
 			ifam->ifam_index = ifp->if_index;
+			IFA_LOCK_SPIN(ifa);
 			ifam->ifam_metric = ifa->ifa_metric;
 			ifam->ifam_flags = ifa->ifa_flags;
+			IFA_UNLOCK(ifa);
 			ifam->ifam_addrs = info.rti_addrs;
 		}
 		if ((cmd == RTM_ADD && pass == 2) ||
@@ -1165,9 +1308,9 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 
 			if (rt == 0)
 				continue;
-			netmask = rt_mask(rt);
-			dst = sa = rt_key(rt);
-			gate = rt->rt_gateway;
+			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+			info.rti_info[RTAX_DST] = sa = rt_key(rt);
+			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			if ((m = rt_msg1(cmd, &info)) == NULL)
 				continue;
 			rtm = mtod(m, struct rt_msghdr *);
@@ -1193,35 +1336,34 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 	struct mbuf *m = 0;
 	struct ifnet *ifp = ifma->ifma_ifp;
 	struct ifma_msghdr *ifmam;
+	struct	sockproto	route_proto = {PF_ROUTE, 0};
 
 	if (route_cb.any_count == 0)
 		return;
 
+	/* Lock ifp for if_lladdr */
+	ifnet_lock_shared(ifp);
 	bzero((caddr_t)&info, sizeof(info));
-	ifaaddr = ifma->ifma_addr;
-	/* Lock ifp for if_addrhead */
-	if (ifp != NULL)
-		ifnet_lock_shared(ifp);
-	if (ifp && ifp->if_addrhead.tqh_first)
-		ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr;
-	else
-		ifpaddr = NULL;
+	IFMA_LOCK(ifma);
+	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
+	info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;	/* lladdr doesn't need lock */
+
 	/*
 	 * If a link-layer address is present, present it as a ``gateway''
 	 * (similarly to how ARP entries, e.g., are presented).
 	 */
-	gate = ifma->ifma_ll->ifma_addr;
+	info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ? ifma->ifma_ll->ifma_addr : NULL;
 	if ((m = rt_msg1(cmd, &info)) == NULL) {
-		if (ifp != NULL)
-			ifnet_lock_done(ifp);
+		IFMA_UNLOCK(ifma);
+		ifnet_lock_done(ifp);
 		return;
 	}
-	if (ifp != NULL)
-		ifnet_lock_done(ifp);
 	ifmam = mtod(m, struct ifma_msghdr *);
-	ifmam->ifmam_index = ifp ? ifp->if_index : 0;
+	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
 	route_proto.sp_protocol = ifma->ifma_addr->sa_family;
+	IFMA_UNLOCK(ifma);
+	ifnet_lock_done(ifp);
 	raw_input(m, &route_proto, &route_src, &route_dst);
 }
 
@@ -1242,10 +1384,11 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 		return 0;
 	}
 	bzero((caddr_t)&info, sizeof(info));
-	dst = rt_key(rt);
-	gate = rt->rt_gateway;
-	netmask = rt_mask(rt);
-	genmask = rt->rt_genmask;
+	info.rti_info[RTAX_DST] = rt_key(rt);
+	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
+
 	if (w->w_op != NET_RT_DUMP2) {
 		size = rt_msg2(RTM_GET, &info, 0, w);
 		if (w->w_req && w->w_tmem) {
@@ -1253,201 +1396,418 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
 
 			rtm->rtm_flags = rt->rt_flags;
 			rtm->rtm_use = rt->rt_use;
-			rtm->rtm_rmx = rt->rt_rmx;
+			rt_getmetrics(rt, &rtm->rtm_rmx);
 			rtm->rtm_index = rt->rt_ifp->if_index;
 			rtm->rtm_pid = 0;
-                        rtm->rtm_seq = 0;
-                        rtm->rtm_errno = 0;
+			rtm->rtm_seq = 0;
+			rtm->rtm_errno = 0;
 			rtm->rtm_addrs = info.rti_addrs;
 			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 			RT_UNLOCK(rt);
 			return (error);
 		}
 	} else {
-                size = rt_msg2(RTM_GET2, &info, 0, w);
-                if (w->w_req && w->w_tmem) {
-                        struct rt_msghdr2 *rtm = (struct rt_msghdr2 *)w->w_tmem;
-
-                        rtm->rtm_flags = rt->rt_flags;
-                        rtm->rtm_use = rt->rt_use;
-                        rtm->rtm_rmx = rt->rt_rmx;
-                        rtm->rtm_index = rt->rt_ifp->if_index;
-                        rtm->rtm_refcnt = rt->rt_refcnt;
+		size = rt_msg2(RTM_GET2, &info, 0, w);
+		if (w->w_req && w->w_tmem) {
+			struct rt_msghdr2 *rtm = (struct rt_msghdr2 *)w->w_tmem;
+
+			rtm->rtm_flags = rt->rt_flags;
+			rtm->rtm_use = rt->rt_use;
+			rt_getmetrics(rt, &rtm->rtm_rmx);
+			rtm->rtm_index = rt->rt_ifp->if_index;
+			rtm->rtm_refcnt = rt->rt_refcnt;
 			if (rt->rt_parent)
 				rtm->rtm_parentflags = rt->rt_parent->rt_flags;
 			else
 				rtm->rtm_parentflags = 0;
-                        rtm->rtm_reserved = 0;
-                        rtm->rtm_addrs = info.rti_addrs;
-                        error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
+			rtm->rtm_reserved = 0;
+			rtm->rtm_addrs = info.rti_addrs;
+			error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
 			RT_UNLOCK(rt);
-                        return (error);
-
+			return (error);
 		}
 	}
 	RT_UNLOCK(rt);
 	return (error);
 }
 
+/*
+ * This is used for dumping extended information from route entries.
+ */
 int
-sysctl_iflist(
-	int	af,
-	struct	walkarg *w)
+sysctl_dumpentry_ext(struct radix_node *rn, void *vw)
+{
+	struct walkarg *w = vw;
+	struct rtentry *rt = (struct rtentry *)rn;
+	int error = 0, size;
+	struct rt_addrinfo info;
+
+	RT_LOCK(rt);
+	if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) {
+		RT_UNLOCK(rt);
+		return (0);
+	}
+	bzero(&info, sizeof (info));
+	info.rti_info[RTAX_DST] = rt_key(rt);
+	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
+	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
+	
+	size = rt_msg2(RTM_GET_EXT, &info, 0, w);
+	if (w->w_req && w->w_tmem) {
+		struct rt_msghdr_ext *ertm = (struct rt_msghdr_ext *)w->w_tmem;
+
+		ertm->rtm_flags = rt->rt_flags;
+		ertm->rtm_use = rt->rt_use;
+		rt_getmetrics(rt, &ertm->rtm_rmx);
+		ertm->rtm_index = rt->rt_ifp->if_index;
+		ertm->rtm_pid = 0;
+		ertm->rtm_seq = 0;
+		ertm->rtm_errno = 0;
+		ertm->rtm_addrs = info.rti_addrs;
+		if (rt->rt_llinfo_get_ri == NULL)
+			bzero(&ertm->rtm_ri, sizeof (ertm->rtm_ri));
+		else
+			rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri);
+
+		error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size);
+		RT_UNLOCK(rt);
+		return (error);
+	}
+	RT_UNLOCK(rt);
+	return (error);
+}
+
+/*
+ * rdar://9307819
+ * To avoid to call copyout() while holding locks and to cause problems  
+ * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct 
+ * the list in two passes. In the first pass we compute the total
+ * length of the data we are going to copyout, then we release
+ * all locks to allocate a temporary buffer that gets filled 
+ * in the second pass.
+ *
+ * Note that we are verifying the assumption that _MALLOC returns a buffer 
+ * that is at least 32 bits aligned and that the messages and addresses are 
+ * 32 bits aligned.
+ */
+
+int
+sysctl_iflist(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct	rt_addrinfo info;
 	int	len, error = 0;
+	int	pass = 0;
+	int	total_len = 0, current_len = 0;
+	char	*total_buffer = NULL, *cp = NULL;
 
 	bzero((caddr_t)&info, sizeof(info));
-	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-		if (error)
-			break;
-		if (w->w_arg && w->w_arg != ifp->if_index)
-			continue;
-		ifnet_lock_shared(ifp);
-		ifa = ifp->if_addrhead.tqh_first;
-		ifpaddr = ifa->ifa_addr;
-		len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w);
-		ifpaddr = 0;
-		if (w->w_req && w->w_tmem) {
-			struct if_msghdr *ifm;
-
-			ifm = (struct if_msghdr *)w->w_tmem;
-			ifm->ifm_index = ifp->if_index;
-			ifm->ifm_flags = (u_short)ifp->if_flags;
-			if_data_internal_to_if_data(ifp, &ifp->if_data, &ifm->ifm_data);
-			ifm->ifm_addrs = info.rti_addrs;
-			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
-			if (error) {
-				ifnet_lock_done(ifp);
+	
+	for (pass = 0; pass < 2; pass++) {
+		ifnet_head_lock_shared();
+		
+		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+			if (error)
 				break;
-			}
-		}
-		while ((ifa = ifa->ifa_link.tqe_next) != 0) {
-			if (af && af != ifa->ifa_addr->sa_family)
+			if (w->w_arg && w->w_arg != ifp->if_index)
 				continue;
-			ifaaddr = ifa->ifa_addr;
-			netmask = ifa->ifa_netmask;
-			brdaddr = ifa->ifa_dstaddr;
-			len = rt_msg2(RTM_NEWADDR, &info, 0, w);
-			if (w->w_req && w->w_tmem) {
-				struct ifa_msghdr *ifam;
-
-				ifam = (struct ifa_msghdr *)w->w_tmem;
-				ifam->ifam_index = ifa->ifa_ifp->if_index;
-				ifam->ifam_flags = ifa->ifa_flags;
-				ifam->ifam_metric = ifa->ifa_metric;
-				ifam->ifam_addrs = info.rti_addrs;
-				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
-				if (error)
+			ifnet_lock_shared(ifp);
+			/*
+			 * Holding ifnet lock here prevents the link address from
+			 * changing contents, so no need to hold the ifa lock.
+			 * The link address is always present; it's never freed.
+			 */
+			ifa = ifp->if_lladdr;
+			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
+			len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, NULL);
+			if (pass == 0) {
+				total_len += len;
+			} else {
+				struct if_msghdr *ifm;
+
+				if (current_len + len > total_len) {
+					ifnet_lock_done(ifp);
+					printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n",
+						current_len, len, total_len);
+					error = ENOBUFS;
 					break;
+				}
+				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
+				len = rt_msg2(RTM_IFINFO, &info, (caddr_t)cp, NULL);
+				info.rti_info[RTAX_IFP] = NULL;
+	
+				ifm = (struct if_msghdr *)cp;
+				ifm->ifm_index = ifp->if_index;
+				ifm->ifm_flags = (u_short)ifp->if_flags;
+				if_data_internal_to_if_data(ifp, &ifp->if_data,
+					&ifm->ifm_data);
+				ifm->ifm_addrs = info.rti_addrs;
+
+				cp += len;
+				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+				current_len += len;
 			}
+			while ((ifa = ifa->ifa_link.tqe_next) != 0) {
+				IFA_LOCK(ifa);
+				if (af && af != ifa->ifa_addr->sa_family) {
+					IFA_UNLOCK(ifa);
+					continue;
+				}
+				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
+				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+				len = rt_msg2(RTM_NEWADDR, &info, 0, 0);
+				if (pass == 0) {
+					total_len += len;
+				} else {
+					struct ifa_msghdr *ifam;
+
+					if (current_len + len > total_len) {
+						IFA_UNLOCK(ifa);
+						printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n",
+							current_len, len, total_len);
+						error = ENOBUFS;
+						break;
+					}
+					len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, NULL);
+	
+					ifam = (struct ifa_msghdr *)cp;
+					ifam->ifam_index = ifa->ifa_ifp->if_index;
+					ifam->ifam_flags = ifa->ifa_flags;
+					ifam->ifam_metric = ifa->ifa_metric;
+					ifam->ifam_addrs = info.rti_addrs;
+
+					cp += len;
+					VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+					current_len += len;
+				}
+				IFA_UNLOCK(ifa);
+			}
+			ifnet_lock_done(ifp);
+			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 
+				info.rti_info[RTAX_BRD] = NULL;
+		}
+		
+		ifnet_head_done();
+		
+		if (error)
+			break;
+		
+		if (pass == 0) {
+			/* Better to return zero length buffer than ENOBUFS */
+			if (total_len == 0)
+				total_len = 1;
+			total_len += total_len >> 3;
+			total_buffer = _MALLOC(total_len, M_RTABLE, M_ZERO | M_WAITOK);
+			if (total_buffer == NULL) {
+				printf("sysctl_iflist: _MALLOC(%d) failed\n", total_len);
+				error = ENOBUFS;
+				break;
+			}
+			cp = total_buffer;
+			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+		} else {
+			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
+			if (error)
+				break;
 		}
-		ifnet_lock_done(ifp);
-		ifaaddr = netmask = brdaddr = 0;
 	}
-	ifnet_head_done();
+	
+	if (total_buffer != NULL)
+		_FREE(total_buffer, M_RTABLE);
+	
 	return error;
 }
 
 int
-sysctl_iflist2(
-	int	af,
-	struct	walkarg *w)
+sysctl_iflist2(int af, struct walkarg *w)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	struct	rt_addrinfo info;
 	int	len, error = 0;
-	
+	int	pass = 0;
+	int	total_len = 0, current_len = 0;
+	char	*total_buffer = NULL, *cp = NULL;
+
 	bzero((caddr_t)&info, sizeof(info));
-	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-		if (error)
-			break;
-		if (w->w_arg && w->w_arg != ifp->if_index)
-			continue;
-		ifnet_lock_shared(ifp);
-		ifa = ifp->if_addrhead.tqh_first;
-		ifpaddr = ifa->ifa_addr;
-		len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)0, w);
-		ifpaddr = 0;
-		if (w->w_req && w->w_tmem) {
-			struct if_msghdr2 *ifm;
-
-			ifm = (struct if_msghdr2 *)w->w_tmem;
-			ifm->ifm_addrs = info.rti_addrs;
-			ifm->ifm_flags = (u_short)ifp->if_flags;
-			ifm->ifm_index = ifp->if_index;
-			ifm->ifm_snd_len = ifp->if_snd.ifq_len;
-			ifm->ifm_snd_maxlen = ifp->if_snd.ifq_maxlen;
-			ifm->ifm_snd_drops = ifp->if_snd.ifq_drops;
-			ifm->ifm_timer = ifp->if_timer;
-			if_data_internal_to_if_data64(ifp, &ifp->if_data, &ifm->ifm_data);
-			error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
-			if (error) {
-				ifnet_lock_done(ifp);
+
+	for (pass = 0; pass < 2; pass++) {
+		ifnet_head_lock_shared();
+		
+		TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+			if (error)
 				break;
-			}
-		}
-		while ((ifa = ifa->ifa_link.tqe_next) != 0) {
-			if (af && af != ifa->ifa_addr->sa_family)
+			if (w->w_arg && w->w_arg != ifp->if_index)
 				continue;
-			ifaaddr = ifa->ifa_addr;
-			netmask = ifa->ifa_netmask;
-			brdaddr = ifa->ifa_dstaddr;
-			len = rt_msg2(RTM_NEWADDR, &info, 0, w);
-			if (w->w_req && w->w_tmem) {
-				struct ifa_msghdr *ifam;
-
-				ifam = (struct ifa_msghdr *)w->w_tmem;
-				ifam->ifam_index = ifa->ifa_ifp->if_index;
-				ifam->ifam_flags = ifa->ifa_flags;
-				ifam->ifam_metric = ifa->ifa_metric;
-				ifam->ifam_addrs = info.rti_addrs;
-				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
-				if (error)
+			ifnet_lock_shared(ifp);
+			/*
+			 * Holding ifnet lock here prevents the link address from
+			 * changing contents, so no need to hold the ifa lock.
+			 * The link address is always present; it's never freed.
+			 */
+			ifa = ifp->if_lladdr;
+			info.rti_info[RTAX_IFP] = ifa->ifa_addr;
+			len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)0, NULL);
+			if (pass == 0) {
+				total_len += len;
+			} else {
+				struct if_msghdr2 *ifm;
+
+				if (current_len + len > total_len) {
+					ifnet_lock_done(ifp);
+					printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
+						current_len, len, total_len);
+					error = ENOBUFS;
 					break;
+				}
+				info.rti_info[RTAX_IFP] = ifa->ifa_addr;
+				len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)cp, NULL);
+				info.rti_info[RTAX_IFP] = NULL;
+		
+				ifm = (struct if_msghdr2 *)cp;
+				ifm->ifm_addrs = info.rti_addrs;
+				ifm->ifm_flags = (u_short)ifp->if_flags;
+				ifm->ifm_index = ifp->if_index;
+				ifm->ifm_snd_len = ifp->if_snd.ifq_len;
+				ifm->ifm_snd_maxlen = ifp->if_snd.ifq_maxlen;
+				ifm->ifm_snd_drops = ifp->if_snd.ifq_drops;
+				ifm->ifm_timer = ifp->if_timer;
+				if_data_internal_to_if_data64(ifp, &ifp->if_data,
+					&ifm->ifm_data);
+
+				cp += len;
+				VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+				current_len += len;
 			}
-		}
-		if (error) {
-			ifnet_lock_done(ifp);
-			break;
-		}
-		{
-			struct ifmultiaddr *ifma;
-			
-			for (ifma = ifp->if_multiaddrs.lh_first; ifma;
-				ifma = ifma->ifma_link.le_next) {
-				if (af && af != ifma->ifma_addr->sa_family)
+			while ((ifa = ifa->ifa_link.tqe_next) != 0) {
+				IFA_LOCK(ifa);
+				if (af && af != ifa->ifa_addr->sa_family) {
+					IFA_UNLOCK(ifa);
 					continue;
-				bzero((caddr_t)&info, sizeof(info));
-				ifaaddr = ifma->ifma_addr;
-				if (ifp->if_addrhead.tqh_first)
-					ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr;
-				if (ifma->ifma_ll)
-					gate = ifma->ifma_ll->ifma_addr;
-				len = rt_msg2(RTM_NEWMADDR2, &info, 0, w);
-				if (w->w_req && w->w_tmem) {
-					struct ifma_msghdr2 *ifmam;
-
-					ifmam = (struct ifma_msghdr2 *)w->w_tmem;
-					ifmam->ifmam_addrs = info.rti_addrs;
-					ifmam->ifmam_flags = 0;
-					ifmam->ifmam_index = ifma->ifma_ifp->if_index;
-					ifmam->ifmam_refcount = ifma->ifma_refcount;
-					error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
-					if (error)
+				}
+				info.rti_info[RTAX_IFA] = ifa->ifa_addr;
+				info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
+				info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
+				len = rt_msg2(RTM_NEWADDR, &info, 0, 0);
+				if (pass == 0) {
+					total_len += len;
+				} else {
+					struct ifa_msghdr *ifam;
+	
+					if (current_len + len > total_len) {
+						IFA_UNLOCK(ifa);
+						printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
+							current_len, len, total_len);
+						error = ENOBUFS;
 						break;
+					}
+					len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, 0);
+
+					ifam = (struct ifa_msghdr *)cp;
+					ifam->ifam_index = ifa->ifa_ifp->if_index;
+					ifam->ifam_flags = ifa->ifa_flags;
+					ifam->ifam_metric = ifa->ifa_metric;
+					ifam->ifam_addrs = info.rti_addrs;
+
+					cp += len;
+					VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+					current_len += len;
+				}
+				IFA_UNLOCK(ifa);
+			}
+			if (error) {
+				ifnet_lock_done(ifp);
+				break;
+			}
+			{
+				struct ifmultiaddr *ifma;
+	
+				for (ifma = LIST_FIRST(&ifp->if_multiaddrs);
+					ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) {
+					struct ifaddr *ifa0;
+	
+					IFMA_LOCK(ifma);
+					if (af && af != ifma->ifma_addr->sa_family) {
+						IFMA_UNLOCK(ifma);
+						continue;
+					}
+					bzero((caddr_t)&info, sizeof(info));
+					info.rti_info[RTAX_IFA] = ifma->ifma_addr;
+					/*
+					 * Holding ifnet lock here prevents the link
+					 * address from changing contents, so no need
+					 * to hold the ifa0 lock.  The link address is
+					 * always present; it's never freed.
+					 */
+					ifa0 = ifp->if_lladdr;
+					info.rti_info[RTAX_IFP] = ifa0->ifa_addr;
+					if (ifma->ifma_ll != NULL)
+						info.rti_info[RTAX_GATEWAY] = ifma->ifma_ll->ifma_addr;
+					len = rt_msg2(RTM_NEWMADDR2, &info, 0, 0);
+					if (pass == 0) {
+						total_len += len;
+					} else {
+						struct ifma_msghdr2 *ifmam;
+	
+						if (current_len + len > total_len) {
+							IFMA_UNLOCK(ifma);
+							printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
+								current_len, len, total_len);
+							error = ENOBUFS;
+							break;
+						}
+						len = rt_msg2(RTM_NEWMADDR2, &info, (caddr_t)cp, 0);
+
+						ifmam = (struct ifma_msghdr2 *)cp;
+						ifmam->ifmam_addrs = info.rti_addrs;
+						ifmam->ifmam_flags = 0;
+						ifmam->ifmam_index =
+							ifma->ifma_ifp->if_index;
+						ifmam->ifmam_refcount =
+							ifma->ifma_reqcnt;
+
+						cp += len;
+						VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+						current_len += len;
+					}
+					IFMA_UNLOCK(ifma);
 				}
 			}
+			ifnet_lock_done(ifp);
+			info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 
+				info.rti_info[RTAX_BRD] = NULL;
+		}
+		ifnet_head_done();
+		
+		if (error)
+			break;
+		
+		if (pass == 0) {
+			/* Better to return zero length buffer than ENOBUFS */
+			if (total_len == 0)
+				total_len = 1;
+			total_len += total_len >> 3;
+			total_buffer = _MALLOC(total_len, M_RTABLE, M_ZERO | M_WAITOK);
+			if (total_buffer == NULL) {
+				printf("sysctl_iflist2: _MALLOC(%d) failed\n", total_len);
+				error = ENOBUFS;
+				break;
+			}
+			cp = total_buffer;
+			VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t)));
+		} else {
+			error = SYSCTL_OUT(w->w_req, total_buffer, current_len);
+			if (error)
+				break;
 		}
-		ifnet_lock_done(ifp);
-		ifaaddr = netmask = brdaddr = 0;
 	}
-	ifnet_head_done();
+	
+	if (total_buffer != NULL)
+		_FREE(total_buffer, M_RTABLE);
+	
 	return error;
 }
 
@@ -1467,16 +1827,15 @@ sysctl_rtstat(struct sysctl_req *req)
 static int
 sysctl_rttrash(struct sysctl_req *req)
 {
-        int error;
-
-        error = SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
-        if (error)
-                return (error);
+	int error;
+        
+	error = SYSCTL_OUT(req, &rttrash, sizeof(rttrash));
+	if (error)
+		return (error);
 
-        return 0;
+	return 0;
 }
 
-#if IFNET_ROUTE_REFCNT
 /*
  * Called from pfslowtimo(), protected by domain_proto_mtx
  */
@@ -1503,9 +1862,11 @@ rt_drainall(void)
 		timerclear(&last_ts);
 
 		in_rtqdrain();		/* protocol cloned routes: INET */
-		in6_rtqdrain();		/* protocol cloned routes: INET6 */
 		in_arpdrain(NULL);	/* cloned routes: ARP */
+#if INET6
+		in6_rtqdrain();		/* protocol cloned routes: INET6 */
 		nd6_drain(NULL);	/* cloned routes: ND6 */
+#endif /* INET6 */
 
 		last_ts.tv_sec = current_ts.tv_sec;
 		last_ts.tv_usec = current_ts.tv_usec;
@@ -1522,7 +1883,6 @@ rt_aggdrain(int on)
 	else
 		routedomain.dom_protosw->pr_flags &= ~PR_AGGDRAIN;
 }
-#endif /* IFNET_ROUTE_REFCNT */
 
 static int
 sysctl_rtsock SYSCTL_HANDLER_ARGS
@@ -1556,7 +1916,17 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS
 		for (i = 1; i <= AF_MAX; i++)
 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
 			    (error = rnh->rnh_walktree(rnh,
-							sysctl_dumpentry, &w)))
+			    sysctl_dumpentry, &w)))
+				break;
+		lck_mtx_unlock(rnh_lock);
+		break;
+	case NET_RT_DUMPX:
+	case NET_RT_DUMPX_FLAGS:
+		lck_mtx_lock(rnh_lock);
+		for (i = 1; i <= AF_MAX; i++)
+			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
+			    (error = rnh->rnh_walktree(rnh,
+			    sysctl_dumpentry_ext, &w)))
 				break;
 		lck_mtx_unlock(rnh_lock);
 		break;
@@ -1578,7 +1948,7 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS
 	return (error);
 }
 
-SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
+SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_rtsock, "");
 
 /*
  * Definitions of protocols supported in the ROUTE domain.
@@ -1587,12 +1957,7 @@ static struct protosw routesw[] = {
 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
   0,		route_output,	raw_ctlinput,	0,
   0,
-  raw_init,	0,		0,
-#if IFNET_ROUTE_REFCNT
-  rt_drainall,
-#else
-  0,
-#endif /* IFNET_ROUTE_REFCNT */
+  raw_init,	0,		0,		rt_drainall,
   0, 
   &route_usrreqs,
   0,			0,		0,
diff --git a/bsd/net/rtsock_mip.c b/bsd/net/rtsock_mip.c
deleted file mode 100644
index 2acd2585d..000000000
--- a/bsd/net/rtsock_mip.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*	$KAME$	*/
-
-/* to be included from net/rtsock.c - ugly but necessary for portability */
-/*
- * Mobile IPv6 addition.
- * Send a routing message to all routing socket listeners.
- */
-void
-rt_mip6msg(cmd, ifp, rt)
-	int cmd;
-	struct ifnet *ifp;
-	register struct rtentry *rt;
-{
-	struct rt_addrinfo info;
-	struct sockaddr *sa = 0;
-	struct mbuf *m = 0;
-	register struct rt_msghdr *rtm;
-
-#ifdef MIP6_DEBUG
-	printf("route_cb.any_count = %d\n", route_cb.any_count);
-#endif
-	bzero((caddr_t)&info, sizeof(info));
-
-	if (rt == 0 || ifp == 0)
-		return;
-	netmask = rt_mask(rt);
-	dst = sa = rt_key(rt);
-	gate = rt->rt_gateway;
-	genmask = rt->rt_genmask;
-	if ((m = rt_msg1(cmd, &info)) == NULL) {
-#ifdef MIP6_DEBUG
-		printf("failure... \n");
-#endif
-		return;
-	}
-	rtm = mtod(m, struct rt_msghdr *);
-	rtm->rtm_index = ifp->if_index;
-	rtm->rtm_flags |= rt->rt_flags;
-	rtm->rtm_rmx = rt->rt_rmx;
-	rtm->rtm_addrs = info.rti_addrs;
-	rtm->rtm_flags |= RTF_DONE;
-
-	route_proto.sp_protocol = sa ? sa->sa_family : 0;
-#ifdef __bsdi__
-	raw_input(m, NULL, &route_proto, &route_src, &route_dst);
-#else
-	raw_input(m, &route_proto, &route_src, &route_dst);
-#endif
-}
diff --git a/bsd/netat/Makefile b/bsd/netat/Makefile
index 9282cd445..3f307255c 100644
--- a/bsd/netat/Makefile
+++ b/bsd/netat/Makefile
@@ -10,14 +10,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 SETUP_SUBDIRS = 	\
diff --git a/bsd/netat/asp_proto.c b/bsd/netat/asp_proto.c
index bd471ff5a..8f1621b54 100644
--- a/bsd/netat/asp_proto.c
+++ b/bsd/netat/asp_proto.c
@@ -319,7 +319,7 @@ return (
 static char mbuf_str[100];
 char *mbuf_totals() 
 {
-  sprintf(mbuf_str, 
+  snprintf(mbuf_str, sizeof(mbuf_str),
 	  /*
 	  "dat = %d, prot = %d, ioc = %d, err = %d, hu = %d, ack = %d, nak = %d, ctl = %d",
 	  */
diff --git a/bsd/netat/at.c b/bsd/netat/at.c
index 572b7f58b..ae6120798 100644
--- a/bsd/netat/at.c
+++ b/bsd/netat/at.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -128,6 +128,21 @@ static int set_zones(zone_usage_t *ifz)
 	return(0);
 } /* set_zones */
 
+static int
+at_domifattach(struct ifnet *ifp, at_ifaddr_t *ifID)
+{
+	int error;
+	
+	if ((error = proto_plumb(PF_APPLETALK, ifp))) {
+		if (error != EEXIST)
+			log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n",
+			    __func__, error, ifp->if_name, ifp->if_unit);
+	} else if (ifID)
+		ifID->at_was_attached = 1;
+
+	return (error);
+}
+
 /*
   * Generic internet control operations (ioctl's).
   * ifp is 0 if not an interface-specific ioctl.
@@ -580,10 +595,10 @@ at_control(so, cmd, data, ifp)
 
 			ifID->aa_ifp = ifp;
 			ifa = &ifID->aa_ifa;
-			error = proto_plumb(PF_APPLETALK, ifp);
+			error = at_domifattach(ifp, ifID);
 			if (error == EEXIST) {
-			    ifID->at_was_attached = 1;
-			    error = 0;
+				ifID->at_was_attached = 1;
+				error = 0;
 			}
 			if (error != 0) {
 				break;
@@ -592,27 +607,36 @@ at_control(so, cmd, data, ifp)
 			ifID->cable_multicast_addr = etalk_multicast_addr;
 			xpatcnt++;
 			ifnet_lock_exclusive(ifp);
-			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 
-				if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) &&
-				      (sdl->sdl_family == AF_LINK)) {
-				    bcopy(LLADDR(sdl), ifID->xaddr, sizeof(ifID->xaddr));
+			/*
+			 * Holding ifnet lock here prevents the link address
+			 * from changing contents, so no need to hold the ifa
+			 * lock.  The link address is always present; it's
+			 * never freed.
+			 */
+			sdl = (struct sockaddr_dl *)ifp->if_lladdr->ifa_addr;
+			bcopy(LLADDR(sdl), ifID->xaddr, sizeof(ifID->xaddr));
 #ifdef APPLETALK_DEBUG
-				    kprintf("SIOCSIFADDR: local enet address is %x.%x.%x.%x.%x.%x\n", 
-					    ifID->xaddr[0], ifID->xaddr[1], 
-					    ifID->xaddr[2], ifID->xaddr[3], 
-					    ifID->xaddr[4], ifID->xaddr[5]);
+			kprintf("SIOCSIFADDR: local enet address is "
+			    "%x.%x.%x.%x.%x.%x\n",
+			    ifID->xaddr[0], ifID->xaddr[1],
+			    ifID->xaddr[2], ifID->xaddr[3],
+			    ifID->xaddr[4], ifID->xaddr[5]);
 #endif
-				    break;
-				  }
 
 			/* attach the AppleTalk address to the ifnet structure */
 			ifa = &ifID->aa_ifa;
+			ifa_lock_init(ifa);
+			VERIFY(!(ifa->ifa_debug & IFD_ALLOC));
 			ifa->ifa_addr = (struct sockaddr *)&ifID->ifNodeAddress;
 			ifID->ifNodeAddress.sat_len = sizeof(struct sockaddr_at);
 			ifID->ifNodeAddress.sat_family =  AF_APPLETALK;
 			/* the address itself will be filled in when ifThisNode
 			   is set */
+			IFA_LOCK(ifa);
 			if_attach_ifa(ifp, ifa);
+			/* add a reference for at_interfaces[] */
+			IFA_ADDREF_LOCKED(ifa);
+			IFA_UNLOCK(ifa);
 			ifnet_lock_done(ifp);
 		}
 	  break;
@@ -678,11 +702,7 @@ at_control(so, cmd, data, ifp)
 			error = EACCES;
 			break;
 		}
-		error = proto_plumb(PF_APPLETALK, ifp);
-		if (ifID != NULL
-		    && (error == 0 || error == EEXIST)) {
-			ifID->at_was_attached = 1;
-		}
+		error = at_domifattach(ifp, ifID);
 		break;
 
 	case SIOCPROTODETACH:
@@ -713,6 +733,7 @@ void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *addres
 	struct kev_atalk_data  	at_event_data;
 	struct kev_msg  		ev_msg;
 
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 	ev_msg.kev_class      = KEV_NETWORK_CLASS;
 	ev_msg.kev_subclass   = KEV_ATALK_SUBCLASS;
@@ -739,3 +760,22 @@ void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *addres
 	
 	kev_post_msg(&ev_msg);
 }
+
+
+/*
+ * This is untested; the code is here only for completeness.
+ */
+void
+at_purgeaddrs(struct ifnet *ifp)
+{
+	at_ifaddr_t *ifID = NULL;
+	int pat_id;
+
+        /* Find address for this interface, if it exists */
+	for (pat_id = 0; pat_id < xpatcnt; pat_id++) {
+		if (at_interfaces[pat_id].aa_ifp == ifp) {
+			ifID = &at_interfaces[pat_id];
+			elap_offline(ifID);
+		}
+	}
+}
diff --git a/bsd/netat/at_var.h b/bsd/netat/at_var.h
index 64025eb6f..1513f9a82 100644
--- a/bsd/netat/at_var.h
+++ b/bsd/netat/at_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -351,7 +351,7 @@ void init_ddp_handler(void);
 int  elap_wput(gref_t *gref, gbuf_t *m);
 int at_ioctl(struct atpcb *, u_long, caddr_t, int );
 
-
+extern void at_purgeaddrs(struct ifnet *);
 
 #endif /* KERNEL_PRIVATE */
 #endif /* __APPLE_API_OBSOLETE */
diff --git a/bsd/netat/ddp.c b/bsd/netat/ddp.c
index 31467fa6b..0e2ebea4b 100644
--- a/bsd/netat/ddp.c
+++ b/bsd/netat/ddp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -384,12 +384,17 @@ void  ddp_rem_if(ifID)
 	struct ifaddr *ifa = &ifID->aa_ifa;
 
 	/* un-do processing done in SIOCSIFADDR */
-	if (ifa->ifa_addr) {
-		ifnet_lock_exclusive(ifID->aa_ifp);
+	ifnet_lock_exclusive(ifID->aa_ifp);
+	IFA_LOCK(ifa);
+	if (ifa->ifa_debug & IFD_ATTACHED) {
 		if_detach_ifa(ifID->aa_ifp, ifa);
 		ifa->ifa_addr = NULL;
-		ifnet_lock_done(ifID->aa_ifp);
 	}
+	IFA_UNLOCK(ifa);
+	/* release reference held for at_interfaces[] */
+	IFA_REMREF(ifa);
+	ifnet_lock_done(ifID->aa_ifp);
+
 	if (ifID->at_was_attached == 0 && ifID->aa_ifp != NULL) {
 		(void)proto_unplumb(PF_APPLETALK, ifID->aa_ifp);
 	}
diff --git a/bsd/netat/ddp_lap.c b/bsd/netat/ddp_lap.c
index c7e075c5a..42f81cdad 100644
--- a/bsd/netat/ddp_lap.c
+++ b/bsd/netat/ddp_lap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -1602,7 +1602,7 @@ int at_reg_mcast(ifID, data)
 			 *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, 
 			 (unsigned)ifID));
 
-		if (if_addmulti(nddp, (struct sockaddr *)&sdl, 0))
+		if (if_addmulti_anon(nddp, (struct sockaddr *)&sdl, NULL))
 			return -1;
 	}
 	return 0;
@@ -1641,7 +1641,7 @@ int at_unreg_mcast(ifID, data)
 			 (unsigned)ifID));
 		bzero(data, sizeof(struct etalk_addr));
 
-		if (if_delmulti(nddp, (struct sockaddr *)&sdl))
+		if (if_delmulti_anon(nddp, (struct sockaddr *)&sdl))
 			return -1;
 	}
 	return 0;
@@ -1687,7 +1687,7 @@ int at_reg_mcast(ifID, data)
 			 *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, 
 			 (unsigned)ifID));
 
-		if (if_addmulti(nddp, (struct sockaddr *)&sdl, 0))
+		if (if_addmulti_anon(nddp, (struct sockaddr *)&sdl, NULL))
 			return -1;
 	}
 
@@ -1724,7 +1724,7 @@ int at_unreg_mcast(ifID, data)
 			 (unsigned)ifID));
 		bzero(data, ETHERNET_ADDR_LEN);	
 
-		if (if_delmulti(nddp, (struct sockaddr *)&sdl))
+		if (if_delmulti_anon(nddp, (struct sockaddr *)&sdl))
 			return(-1);
 	}
 
diff --git a/bsd/netat/sys_glue.c b/bsd/netat/sys_glue.c
index dd22563be..acb307fbc 100644
--- a/bsd/netat/sys_glue.c
+++ b/bsd/netat/sys_glue.c
@@ -97,10 +97,10 @@ int RouterMix = RT_MIX_DEFAULT; /* default for nbr of ppsec */
 SYSCTL_INT(_net_appletalk, OID_AUTO, routermix, CTLFLAG_WR, 
 			&RouterMix, 0, "Appletalk RouterMix");
 at_ddp_stats_t at_ddp_stats;		/* DDP statistics */
-SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD | CTLFLAG_LOCKED,
 	      &at_ddp_stats, at_ddp_stats, "AppleTalk DDP Stats");
 extern int atp_resp_seqno2big;
-SYSCTL_INT(_net_appletalk, OID_AUTO, atp_resp_seqno2big, CTLFLAG_RD,
+SYSCTL_INT(_net_appletalk, OID_AUTO, atp_resp_seqno2big, CTLFLAG_RD | CTLFLAG_LOCKED,
                         &atp_resp_seqno2big, 0, "Appletalk ATP seqno too big count");
 
 static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p );
diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile
index de3d2890a..91973125c 100644
--- a/bsd/netinet/Makefile
+++ b/bsd/netinet/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
@@ -32,12 +28,12 @@ KERNELFILES = \
 	kpi_ipfilter.h in_arp.h
 
 PRIVATE_DATAFILES = \
-	if_fddi.h if_atm.h ip_dummynet.h \
+	ip_dummynet.h \
 	tcp_debug.h \
 	in_gif.h ip_compat.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
-	ip_ecn.h ip_encap.h ip_flow.h
+	ip_ecn.h ip_encap.h
 
 INSTALL_MI_LIST	= ${DATAFILES}
 
@@ -48,7 +44,6 @@ EXPORT_MI_LIST	= ${DATAFILES} ${KERNELFILES}
 EXPORT_MI_DIR = ${INSTALL_MI_DIR}
 
 INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
-
 INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES}
 
 include $(MakeInc_rule)
diff --git a/bsd/netinet/icmp6.h b/bsd/netinet/icmp6.h
index e3c559b11..aab7c4ffd 100644
--- a/bsd/netinet/icmp6.h
+++ b/bsd/netinet/icmp6.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000,2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000,2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,6 +95,10 @@
 #define _NETINET_ICMP6_H_
 #include <sys/appleapiopts.h>
 
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/mcache.h>
+#endif
+
 #define ICMPV6_PLD_MAXLEN	1232	/* IPV6_MMTU - sizeof(struct ip6_hdr)
 					   - sizeof(struct icmp6_hdr) */
 
@@ -107,7 +111,7 @@ struct icmp6_hdr {
 		u_int16_t	icmp6_un_data16[2]; /* type-specific field */
 		u_int8_t	icmp6_un_data8[4];  /* type-specific field */
 	} icmp6_dataun;
-};
+} __attribute__((__packed__));
 
 #define icmp6_data32	icmp6_dataun.icmp6_un_data32
 #define icmp6_data16	icmp6_dataun.icmp6_un_data16
@@ -125,23 +129,27 @@ struct icmp6_hdr {
 
 #define ICMP6_ECHO_REQUEST		128	/* echo service */
 #define ICMP6_ECHO_REPLY		129	/* echo reply */
+#define MLD_LISTENER_QUERY		130 	/* multicast listener query */
+#define MLD_LISTENER_REPORT		131	/* multicast listener report */
+#define MLD_LISTENER_DONE		132	/* multicast listener done */
+#define MLD_LISTENER_REDUCTION MLD_LISTENER_DONE /* RFC3542 definition */
+
+/* RFC2292 decls */
 #define ICMP6_MEMBERSHIP_QUERY		130	/* group membership query */
-#define MLD6_LISTENER_QUERY		130 	/* multicast listener query */
 #define ICMP6_MEMBERSHIP_REPORT		131	/* group membership report */
-#define MLD6_LISTENER_REPORT		131	/* multicast listener report */
 #define ICMP6_MEMBERSHIP_REDUCTION	132	/* group membership termination */
-#define MLD6_LISTENER_DONE		132	/* multicast listener done */
 
 #ifndef KERNEL
-#define	MLD_LISTENER_QUERY		MLD6_LISTENER_QUERY
-#define	MLD_LISTENER_REPORT		MLD6_LISTENER_REPORT
-#define	MLD_LISTENER_DONE		MLD6_LISTENER_DONE
-#endif /* !KERNEL */
+/* the followings are for backward compatibility to old KAME apps. */
+#define MLD6_LISTENER_QUERY	MLD_LISTENER_QUERY
+#define MLD6_LISTENER_REPORT	MLD_LISTENER_REPORT
+#define MLD6_LISTENER_DONE	MLD_LISTENER_DONE
+#endif
 
 #define ND_ROUTER_SOLICIT		133	/* router solicitation */
-#define ND_ROUTER_ADVERT		134	/* router advertisment */
+#define ND_ROUTER_ADVERT		134	/* router advertisement */
 #define ND_NEIGHBOR_SOLICIT		135	/* neighbor solicitation */
-#define ND_NEIGHBOR_ADVERT		136	/* neighbor advertisment */
+#define ND_NEIGHBOR_ADVERT		136	/* neighbor advertisement */
 #define ND_REDIRECT			137	/* redirect */
 
 #define ICMP6_ROUTER_RENUMBERING	138	/* router renumbering */
@@ -152,20 +160,18 @@ struct icmp6_hdr {
 #define ICMP6_FQDN_REPLY		140	/* FQDN reply */
 #define ICMP6_NI_QUERY			139	/* node information request */
 #define ICMP6_NI_REPLY			140	/* node information reply */
+#define MLDV2_LISTENER_REPORT	143	/* RFC3810 listener report */
 
 /* The definitions below are experimental. TBA */
-#define MLD6_MTRACE_RESP		200	/* mtrace response(to sender) */
-#define MLD6_MTRACE			201	/* mtrace messages */
+#define MLD_MTRACE_RESP			200	/* mtrace resp (to sender) */
+#define MLD_MTRACE			201	/* mtrace messages */
 
 #ifndef KERNEL
-#define	MLD_MTRACE_RESP			MLD6_MTRACE_RESP
-#define	MLD_MTRACE			MLD6_MTRACE
-#endif /* !KERNEL */
+#define MLD6_MTRACE_RESP	MLD_MTRACE_RESP
+#define MLD6_MTRACE		MLD_MTRACE
+#endif
 
-#define ICMP6_HADISCOV_REQUEST		202	/* XXX To be defined */
-#define ICMP6_HADISCOV_REPLY		203	/* XXX To be defined */
-  
-#define ICMP6_MAXTYPE			203
+#define ICMP6_MAXTYPE			201
 
 #define ICMP6_DST_UNREACH_NOROUTE	0	/* no route to destination */
 #define ICMP6_DST_UNREACH_ADMIN	 	1	/* administratively prohibited */
@@ -202,16 +208,30 @@ struct icmp6_hdr {
 /*
  * Multicast Listener Discovery
  */
-struct mld6_hdr {
-	struct icmp6_hdr	mld6_hdr;
-	struct in6_addr		mld6_addr; /* multicast address */
-};
+struct mld_hdr {
+	struct icmp6_hdr	mld_icmp6_hdr;
+	struct in6_addr		mld_addr; /* multicast address */
+} __attribute__((__packed__));
+
+/* definitions to provide backward compatibility to old KAME applications */
+#ifndef KERNEL
+#define mld6_hdr	mld_hdr
+#define mld6_type	mld_type
+#define mld6_code	mld_code
+#define mld6_cksum	mld_cksum
+#define mld6_maxdelay	mld_maxdelay
+#define mld6_reserved	mld_reserved
+#define mld6_addr	mld_addr
+#endif
 
-#define mld6_type	mld6_hdr.icmp6_type
-#define mld6_code	mld6_hdr.icmp6_code
-#define mld6_cksum	mld6_hdr.icmp6_cksum
-#define mld6_maxdelay	mld6_hdr.icmp6_data16[0]
-#define mld6_reserved	mld6_hdr.icmp6_data16[1]
+/* shortcut macro definitions */
+#define mld_type	mld_icmp6_hdr.icmp6_type
+#define mld_code	mld_icmp6_hdr.icmp6_code
+#define mld_cksum	mld_icmp6_hdr.icmp6_cksum
+#define mld_maxdelay	mld_icmp6_hdr.icmp6_data16[0]
+#define mld_reserved	mld_icmp6_hdr.icmp6_data16[1]
+#define mld_v2_reserved	mld_icmp6_hdr.icmp6_data16[0]
+#define mld_v2_numrecs	mld_icmp6_hdr.icmp6_data16[1]
 
 /*
  * Neighbor Discovery
@@ -220,7 +240,7 @@ struct mld6_hdr {
 struct nd_router_solicit {	/* router solicitation */
 	struct icmp6_hdr 	nd_rs_hdr;
 	/* could be followed by options */
-};
+}__attribute__((__packed__));
 
 #define nd_rs_type	nd_rs_hdr.icmp6_type
 #define nd_rs_code	nd_rs_hdr.icmp6_code
@@ -232,7 +252,7 @@ struct nd_router_advert {	/* router advertisement */
 	u_int32_t		nd_ra_reachable;	/* reachable time */
 	u_int32_t		nd_ra_retransmit;	/* retransmit timer */
 	/* could be followed by options */
-};
+} __attribute__((__packed__));
 
 #define nd_ra_type		nd_ra_hdr.icmp6_type
 #define nd_ra_code		nd_ra_hdr.icmp6_code
@@ -260,7 +280,7 @@ struct nd_neighbor_solicit {	/* neighbor solicitation */
 	struct icmp6_hdr	nd_ns_hdr;
 	struct in6_addr		nd_ns_target;	/*target address */
 	/* could be followed by options */
-};
+}__attribute__((__packed__));
 
 #define nd_ns_type		nd_ns_hdr.icmp6_type
 #define nd_ns_code		nd_ns_hdr.icmp6_code
@@ -271,7 +291,7 @@ struct nd_neighbor_advert {	/* neighbor advertisement */
 	struct icmp6_hdr	nd_na_hdr;
 	struct in6_addr		nd_na_target;	/* target address */
 	/* could be followed by options */
-};
+}__attribute__((__packed__));
 
 #define nd_na_type		nd_na_hdr.icmp6_type
 #define nd_na_code		nd_na_hdr.icmp6_code
@@ -294,7 +314,7 @@ struct nd_redirect {		/* redirect */
 	struct in6_addr		nd_rd_target;	/* target address */
 	struct in6_addr		nd_rd_dst;	/* destination address */
 	/* could be followed by options */
-};
+}__attribute__((__packed__));
 
 #define nd_rd_type		nd_rd_hdr.icmp6_type
 #define nd_rd_code		nd_rd_hdr.icmp6_code
@@ -305,13 +325,14 @@ struct nd_opt_hdr {		/* Neighbor discovery option header */
 	u_int8_t	nd_opt_type;
 	u_int8_t	nd_opt_len;
 	/* followed by option specific data*/
-};
+}__attribute__((__packed__));
 
 #define ND_OPT_SOURCE_LINKADDR		1
 #define ND_OPT_TARGET_LINKADDR		2
 #define ND_OPT_PREFIX_INFORMATION	3
 #define ND_OPT_REDIRECTED_HEADER	4
 #define ND_OPT_MTU			5
+#define ND_OPT_RDNSS			25	/* RFC 5006 */
 
 #define ND_OPT_ROUTE_INFO		200	/* draft-ietf-ipngwg-router-preference, not officially assigned yet */
 
@@ -324,7 +345,7 @@ struct nd_opt_prefix_info {	/* prefix information */
 	u_int32_t	nd_opt_pi_preferred_time;
 	u_int32_t	nd_opt_pi_reserved2;
 	struct in6_addr	nd_opt_pi_prefix;
-};
+}__attribute__((__packed__));
 
 #define ND_OPT_PI_FLAG_ONLINK		0x80
 #define ND_OPT_PI_FLAG_AUTO		0x40
@@ -335,14 +356,14 @@ struct nd_opt_rd_hdr {		/* redirected header */
 	u_int16_t	nd_opt_rh_reserved1;
 	u_int32_t	nd_opt_rh_reserved2;
 	/* followed by IP header and data */
-};
+} __attribute__((__packed__));
 
 struct nd_opt_mtu {		/* MTU option */
 	u_int8_t	nd_opt_mtu_type;
 	u_int8_t	nd_opt_mtu_len;
 	u_int16_t	nd_opt_mtu_reserved;
 	u_int32_t	nd_opt_mtu_mtu;
-};
+}__attribute__((__packed__));
 
 struct nd_opt_route_info {	/* route info */
 	u_int8_t	nd_opt_rti_type;
@@ -350,8 +371,16 @@ struct nd_opt_route_info {	/* route info */
 	u_int8_t	nd_opt_rti_prefixlen;
 	u_int8_t	nd_opt_rti_flags;
 	u_int32_t	nd_opt_rti_lifetime;
-	/* followed by prefix */
-};
+	/* prefix follows */
+}__attribute__((__packed__));
+
+struct nd_opt_rdnss {	/* recursive domain name system servers */
+    u_int8_t		nd_opt_rdnss_type;
+    u_int8_t		nd_opt_rdnss_len;
+    u_int16_t		nd_opt_rdnss_reserved;
+    u_int32_t		nd_opt_rdnss_lifetime;
+    struct in6_addr	nd_opt_rdnss_addr[1];
+} __attribute__((__packed__));
 
 /*
  * icmp6 namelookup
@@ -366,7 +395,7 @@ struct icmp6_namelookup {
 	u_int8_t	icmp6_nl_name[3];
 #endif
 	/* could be followed by options */
-};
+}__attribute__((__packed__));
 
 /*
  * icmp6 node information
@@ -375,7 +404,7 @@ struct icmp6_nodeinfo {
 	struct icmp6_hdr icmp6_ni_hdr;
 	u_int8_t icmp6_ni_nonce[8];
 	/* could be followed by reply data */
-};
+}__attribute__((__packed__));
 
 #define ni_type		icmp6_ni_hdr.icmp6_type
 #define ni_code		icmp6_ni_hdr.icmp6_code
@@ -438,7 +467,7 @@ struct ni_reply_fqdn {
 	u_int32_t ni_fqdn_ttl;	/* TTL */
 	u_int8_t ni_fqdn_namelen; /* length in octets of the FQDN */
 	u_int8_t ni_fqdn_name[3]; /* XXX: alignment */
-};
+}__attribute__((__packed__));
 
 /*
  * Router Renumbering. as router-renum-08.txt
@@ -449,7 +478,7 @@ struct icmp6_router_renum {	/* router renumbering header */
 	u_int8_t	rr_flags;
 	u_int16_t	rr_maxdelay;
 	u_int32_t	rr_reserved;
-};
+} __attribute__((__packed__));
 
 #define ICMP6_RR_FLAGS_TEST		0x80
 #define ICMP6_RR_FLAGS_REQRESULT	0x40
@@ -471,7 +500,7 @@ struct rr_pco_match {		/* match prefix part */
 	u_int8_t	rpm_maxlen;
 	u_int16_t	rpm_reserved;
 	struct	in6_addr	rpm_prefix;
-};
+} __attribute__((__packed__));
 
 #define RPM_PCO_ADD		1
 #define RPM_PCO_CHANGE		2
@@ -487,7 +516,7 @@ struct rr_pco_use {		/* use prefix part */
 	u_int32_t	rpu_pltime;
 	u_int32_t	rpu_flags;
 	struct	in6_addr rpu_prefix;
-};
+} __attribute__((__packed__));
 #define ICMP6_RR_PCOUSE_RAFLAGS_ONLINK	0x80
 #define ICMP6_RR_PCOUSE_RAFLAGS_AUTO	0x40
 
@@ -505,7 +534,7 @@ struct rr_result {		/* router renumbering result message */
 	u_int8_t	rrr_matchedlen;
 	u_int32_t	rrr_ifid;
 	struct	in6_addr rrr_prefix;
-};
+} __attribute__((__packed__));
 #if BYTE_ORDER == BIG_ENDIAN
 #define ICMP6_RR_RESULT_FLAGS_OOB		0x0002
 #define ICMP6_RR_RESULT_FLAGS_FORBIDDEN		0x0001
@@ -613,24 +642,32 @@ struct icmp6stat {
 /*
  * Names for ICMP sysctl objects
  */
-#define ICMPV6CTL_STATS		1
-#define ICMPV6CTL_REDIRACCEPT	2	/* accept/process redirects */
-#define ICMPV6CTL_REDIRTIMEOUT	3	/* redirect cache time */
-#define ICMPV6CTL_ND6_PRUNE	6
-#define ICMPV6CTL_ND6_DELAY	8
-#define ICMPV6CTL_ND6_UMAXTRIES	9
+#define ICMPV6CTL_STATS				1
+#define ICMPV6CTL_REDIRACCEPT		2	/* accept/process redirects */
+#define ICMPV6CTL_REDIRTIMEOUT		3	/* redirect cache time */
+#if 0	/*obsoleted*/
+#define ICMPV6CTL_ERRRATELIMIT		5	/* ICMPv6 error rate limitation */
+#endif
+#define ICMPV6CTL_ND6_PRUNE			6
+#define ICMPV6CTL_ND6_DELAY			8
+#define ICMPV6CTL_ND6_UMAXTRIES		9
 #define ICMPV6CTL_ND6_MMAXTRIES		10
 #define ICMPV6CTL_ND6_USELOOPBACK	11
 /*#define ICMPV6CTL_ND6_PROXYALL	12	obsoleted, do not reuse here */
-#define ICMPV6CTL_NODEINFO	13
-#define ICMPV6CTL_ERRPPSLIMIT	14	/* ICMPv6 error pps limitation */
+#define ICMPV6CTL_NODEINFO			13
+#define ICMPV6CTL_ERRPPSLIMIT		14	/* ICMPv6 error pps limitation */
 #define ICMPV6CTL_ND6_MAXNUDHINT	15
-#define ICMPV6CTL_MTUDISC_HIWAT	16
-#define ICMPV6CTL_MTUDISC_LOWAT	17
-#define ICMPV6CTL_ND6_DEBUG	18
-#define ICMPV6CTL_ND6_DRLIST	19
-#define ICMPV6CTL_ND6_PRLIST	20
-#define ICMPV6CTL_MAXID		21
+#define ICMPV6CTL_MTUDISC_HIWAT		16
+#define ICMPV6CTL_MTUDISC_LOWAT		17
+#define ICMPV6CTL_ND6_DEBUG			18
+#define ICMPV6CTL_ND6_DRLIST		19
+#define ICMPV6CTL_ND6_PRLIST		20
+#define ICMPV6CTL_MLD_MAXSRCFILTER	21
+#define ICMPV6CTL_MLD_SOMAXSRC		22
+#define ICMPV6CTL_MLD_VERSION		23
+#define ICMPV6CTL_ND6_MAXQLEN		24
+#define	ICMPV6CTL_ND6_ACCEPT_6TO4	25
+#define ICMPV6CTL_MAXID				26
 
 #ifdef KERNEL_PRIVATE
 #define ICMPV6CTL_NAMES { \
@@ -655,6 +692,11 @@ struct icmp6stat {
 	{ "nd6_debug", CTLTYPE_INT }, \
 	{ 0, 0 }, \
 	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ 0, 0 }, \
+	{ "nd6_accept_6to4", CTLTYPE_INT }, \
 }
 
 #define RTF_PROBEMTU	RTF_PROTO1
@@ -667,8 +709,8 @@ struct	in6_multi;
 void	icmp6_init(void);
 void	icmp6_paramerror(struct mbuf *, int);
 void	icmp6_error(struct mbuf *, int, int, int);
-int	icmp6_input(struct mbuf **, int *);
-void	icmp6_fasttimo(void);
+void	icmp6_error2(struct mbuf *, int, int, int, struct ifnet *);
+int	icmp6_input(struct mbuf **, int *, int);
 void	icmp6_reflect(struct mbuf *, size_t);
 void	icmp6_prepare(struct mbuf *);
 void	icmp6_redirect_input(struct mbuf *, int);
@@ -677,14 +719,17 @@ void	icmp6_redirect_output(struct mbuf *, struct rtentry *);
 struct	ip6ctlparam;
 void	icmp6_mtudisc_update(struct ip6ctlparam *, int);
 
+extern lck_rw_t icmp6_ifs_rwlock;
 /* XXX: is this the right place for these macros? */
 #define icmp6_ifstat_inc(ifp, tag) \
-do {								\
+do {									\
+	lck_rw_lock_shared(&icmp6_ifs_rwlock);				\
 	if ((ifp) && (ifp)->if_index <= if_index			\
-	 && (ifp)->if_index < icmp6_ifstatmax			\
-	 && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) {	\
-		icmp6_ifstat[(ifp)->if_index]->tag++;		\
-	}							\
+	 && (ifp)->if_index < icmp6_ifstatmax				\
+	 && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) {		\
+		atomic_add_64(&icmp6_ifstat[(ifp)->if_index]->tag, 1);	\
+	}								\
+	lck_rw_done(&icmp6_ifs_rwlock);					\
 } while (0)
 
 #define icmp6_ifoutstat_inc(ifp, type, code) \
@@ -692,7 +737,7 @@ do { \
 		icmp6_ifstat_inc(ifp, ifs6_out_msg); \
  		if (type < ICMP6_INFOMSG_MASK) \
  			icmp6_ifstat_inc(ifp, ifs6_out_error); \
-		switch(type) { \
+		switch (type) { \
 		 case ICMP6_DST_UNREACH: \
 			 icmp6_ifstat_inc(ifp, ifs6_out_dstunreach); \
 			 if (code == ICMP6_DST_UNREACH_ADMIN) \
@@ -713,13 +758,13 @@ do { \
 		 case ICMP6_ECHO_REPLY: \
 			 icmp6_ifstat_inc(ifp, ifs6_out_echoreply); \
 			 break; \
-		 case MLD6_LISTENER_QUERY: \
+		 case MLD_LISTENER_QUERY: \
 			 icmp6_ifstat_inc(ifp, ifs6_out_mldquery); \
 			 break; \
-		 case MLD6_LISTENER_REPORT: \
+		 case MLD_LISTENER_REPORT: \
 			 icmp6_ifstat_inc(ifp, ifs6_out_mldreport); \
 			 break; \
-		 case MLD6_LISTENER_DONE: \
+		 case MLD_LISTENER_DONE: \
 			 icmp6_ifstat_inc(ifp, ifs6_out_mlddone); \
 			 break; \
 		 case ND_ROUTER_SOLICIT: \
@@ -742,6 +787,12 @@ do { \
 
 extern int	icmp6_rediraccept;	/* accept/process redirects */
 extern int	icmp6_redirtimeout;	/* cache time for redirect routes */
+
+#define ICMP6_NODEINFO_FQDNOK		0x1
+#define ICMP6_NODEINFO_NODEADDROK	0x2
+#define ICMP6_NODEINFO_TMPADDROK	0x4
+#define ICMP6_NODEINFO_GLOBALOK		0x8
+
 #endif /* KERNEL_PRIVATE */
 
 #endif /* !_NETINET_ICMP6_H_ */
diff --git a/bsd/netinet/if_atm.c b/bsd/netinet/if_atm.c
deleted file mode 100644
index 0fa54144f..000000000
--- a/bsd/netinet/if_atm.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*      $NetBSD: if_atm.c,v 1.6 1996/10/13 02:03:01 christos Exp $       */
-
-/*
- *
- * Copyright (c) 1996 Charles D. Cranor and Washington University.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Charles D. Cranor and 
- *	Washington University.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/if_atm.c,v 1.8 1999/12/07 17:39:06 shin Exp $
- */
-
-/*
- * IP <=> ATM address resolution.
- */
-
-#if defined(INET) || defined(INET6)
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/queue.h>
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/syslog.h>
-
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/route.h>
-#include <net/if_atm.h>
-
-#include <netinet/in.h>
-#include <netinet/if_atm.h>
-#include <net/dlil.h>
-
-
-#if NATM
-#include <netnatm/natm.h>
-#endif
-
-
-#define SDL(s) ((struct sockaddr_dl *)s)
-
-/*
- * atm_rtrequest: handle ATM rt request (in support of generic code)
- *   inputs: "req" = request code
- *           "rt" = route entry
- *           "sa" = sockaddr
- */
-
-void
-atm_rtrequest(req, rt, sa)
-	int req;
-	register struct rtentry *rt;
-	struct sockaddr *sa;
-{
-	register struct sockaddr *gate = rt->rt_gateway;
-	struct atm_pseudoioctl api;
-#if NATM
-	struct sockaddr_in *sin;
-	struct natmpcb *npcb = NULL;
-	struct atm_pseudohdr *aph;
-#endif
-	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
-
-	if (rt->rt_flags & RTF_GATEWAY)   /* link level requests only */
-		return;
-
-	switch (req) {
-
-	case RTM_RESOLVE: /* resolve: only happens when cloning */
-		printf("atm_rtrequest: RTM_RESOLVE request detected?\n");
-		break;
-
-	case RTM_ADD:
-
-		/*
-		 * route added by a command (e.g. ifconfig, route, arp...).
-		 *
-		 * first check to see if this is not a host route, in which
-		 * case we are being called via "ifconfig" to set the address.
-		 */
-
-		if ((rt->rt_flags & RTF_HOST) == 0) { 
-			rt_setgate(rt,rt_key(rt),(struct sockaddr *)&null_sdl);
-			gate = rt->rt_gateway;
-			SDL(gate)->sdl_type = rt->rt_ifp->if_type;
-			SDL(gate)->sdl_index = rt->rt_ifp->if_index;
-			break;
-		}
-
-		if ((rt->rt_flags & RTF_CLONING) != 0) {
-			printf("atm_rtrequest: cloning route detected?\n");
-			break;
-		}
-		if (gate->sa_family != AF_LINK ||
-		    gate->sa_len < sizeof(null_sdl)) {
-			log(LOG_DEBUG, "atm_rtrequest: bad gateway value");
-			break;
-		}
-
-#if DIAGNOSTIC
-		if (rt->rt_ifp->if_ioctl == NULL) panic("atm null ioctl");
-#endif
-
-#if NATM
-		/*
-		 * let native ATM know we are using this VCI/VPI
-		 * (i.e. reserve it)
-		 */
-		sin = (struct sockaddr_in *) rt_key(rt);
-		if (sin->sin_family != AF_INET)
-			goto failed;
-		aph = (struct atm_pseudohdr *) LLADDR(SDL(gate));
-		npcb = npcb_add(NULL, rt->rt_ifp, ATM_PH_VCI(aph), 
-						ATM_PH_VPI(aph));
-		if (npcb == NULL) 
-			goto failed;
-		npcb->npcb_flags |= NPCB_IP;
-		npcb->ipaddr.s_addr = sin->sin_addr.s_addr;
-		/* XXX: move npcb to llinfo when ATM ARP is ready */
-		rt->rt_llinfo = (caddr_t) npcb;
-		rt->rt_flags |= RTF_LLINFO;
-#endif
-		/*
-		 * let the lower level know this circuit is active
-		 */
-		bcopy(LLADDR(SDL(gate)), &api.aph, sizeof(api.aph));
-		api.rxhand = NULL;
-		if (ifnet_ioctl(rt->rt_ifpm 0, SIOCATMENA, &api) != 0) {
-			printf("atm: couldn't add VC\n");
-			goto failed;
-		}
-
-		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
-		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
-
-		break;
-
-failed:
-#if NATM
-		if (npcb) {
-			npcb_free(npcb, NPCB_DESTROY);
-			rt->rt_llinfo = NULL;
-			rt->rt_flags &= ~RTF_LLINFO;
-		}
-#endif
-		rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
-			rt_mask(rt), 0, (struct rtentry **) 0);
-		break;
-
-	case RTM_DELETE:
-
-#if NATM
-		/*
-		 * tell native ATM we are done with this VC
-		 */
-
-		if (rt->rt_flags & RTF_LLINFO) {
-			npcb_free((struct natmpcb *)rt->rt_llinfo, 
-								NPCB_DESTROY);
-			rt->rt_llinfo = NULL;
-			rt->rt_flags &= ~RTF_LLINFO;
-		}
-#endif
-		/*
-		 * tell the lower layer to disable this circuit
-		 */
-
-		bcopy(LLADDR(SDL(gate)), &api.aph, sizeof(api.aph));
-		api.rxhand = NULL;
-		ifnet_ioctl(rt->rt_ifp, 0, SIOCATMDIS, &api);
-
-		break;
-	}
-}
-
-/*
- * atmresolve:
- *   inputs:
- *     [1] "rt" = the link level route to use (or null if need to look one up)
- *     [2] "m" = mbuf containing the data to be sent
- *     [3] "dst" = sockaddr_in (IP) address of dest.
- *   output:
- *     [4] "desten" = ATM pseudo header which we will fill in VPI/VCI info
- *   return: 
- *     0 == resolve FAILED; note that "m" gets m_freem'd in this case
- *     1 == resolve OK; desten contains result
- *
- *   XXX: will need more work if we wish to support ATMARP in the kernel,
- *   but this is enough for PVCs entered via the "route" command.
- */
-
-int
-atmresolve(rt, m, dst, desten)
-
-register struct rtentry *rt;
-struct mbuf *m;
-register struct sockaddr *dst;
-register struct atm_pseudohdr *desten;	/* OUT */
-
-{
-	struct sockaddr_dl *sdl;
-
-	if (m->m_flags & (M_BCAST|M_MCAST)) {
-		log(LOG_INFO, "atmresolve: BCAST/MCAST packet detected/dumped");
-		goto bad;
-	}
-
-	if (rt == NULL) {
-		rt = RTALLOC1(dst, 0);
-		if (rt == NULL) goto bad; /* failed */
-		rtunref(rt);	/* don't keep LL references */
-		if ((rt->rt_flags & RTF_GATEWAY) != 0 || 
-			(rt->rt_flags & RTF_LLINFO) == 0 ||
-			/* XXX: are we using LLINFO? */
-			rt->rt_gateway->sa_family != AF_LINK) {
-				goto bad;
-		}
-	}
-
-	/*
-	 * note that rt_gateway is a sockaddr_dl which contains the 
-	 * atm_pseudohdr data structure for this route.   we currently
-	 * don't need any rt_llinfo info (but will if we want to support
-	 * ATM ARP [c.f. if_ether.c]).
-	 */
-
-	sdl = SDL(rt->rt_gateway);
-
-	/*
-	 * Check the address family and length is valid, the address
-	 * is resolved; otherwise, try to resolve.
-	 */
-
-
-	if (sdl->sdl_family == AF_LINK && sdl->sdl_alen == sizeof(*desten)) {
-		bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
-		return(1);	/* ok, go for it! */
-	}
-
-	/*
-	 * we got an entry, but it doesn't have valid link address
-	 * info in it (it is prob. the interface route, which has
-	 * sdl_alen == 0).    dump packet.  (fall through to "bad").
-	 */
-
-bad:
-	m_freem(m);
-	return(0);
-}
-#endif /* INET */
diff --git a/bsd/netinet/if_atm.h b/bsd/netinet/if_atm.h
deleted file mode 100644
index 989fa974d..000000000
--- a/bsd/netinet/if_atm.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* $FreeBSD: src/sys/netinet/if_atm.h,v 1.2.6.1 2000/08/03 01:07:02 peter Exp $ */
-/*      $NetBSD: if_atm.h,v 1.2 1996/07/03 17:17:17 chuck Exp $       */
-
-/*
- *
- * Copyright (c) 1996 Charles D. Cranor and Washington University.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Charles D. Cranor and 
- * 	Washington University.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * if_atm.h
- */
-#include <sys/appleapiopts.h>
-
-#ifdef KERNEL_PRIVATE
-struct atm_pseudohdr;
-struct mbuf;
-struct rtentry;
-struct sockaddr;
-
-void atm_rtrequest(int, struct rtentry *, struct sockaddr *);
-int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *, 
-		struct atm_pseudohdr *);
-#endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet/if_fddi.h b/bsd/netinet/if_fddi.h
deleted file mode 100644
index fb9f81f10..000000000
--- a/bsd/netinet/if_fddi.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1982, 1986, 1993
- *	The Regents of the University of California.  All rights reserved.
- * Copyright (c) 1995 Matt Thomas (thomas@lkg.dec.com)
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)if_fddi.h	8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/if_fddi.h,v 1.8 1999/12/29 04:40:58 peter Exp $
- */
-
-#ifndef _NETINET_IF_FDDI_H_
-#define _NETINET_IF_FDDI_H_
-#include <sys/appleapiopts.h>
-
-/*
- * Structure of an 100Mb/s FDDI header.
- */
-struct	fddi_header {
-	u_char	fddi_fc;
-	u_char	fddi_dhost[6];
-	u_char	fddi_shost[6];
-};
-
-#define	FDDIIPMTU		4352
-#define	FDDIMTU			4470
-#define	FDDIMIN			3
-
-#define	FDDIFC_C		0x80	/* 0b10000000 */
-#define	FDDIFC_L		0x40	/* 0b01000000 */
-#define	FDDIFC_F		0x30	/* 0b00110000 */
-#define	FDDIFC_Z		0x0F	/* 0b00001111 */
-
-#define	FDDIFC_LLC_ASYNC	0x50
-#define	FDDIFC_LLC_PRIO0	0
-#define	FDDIFC_LLC_PRIO1	1
-#define	FDDIFC_LLC_PRIO2	2
-#define	FDDIFC_LLC_PRIO3	3
-#define	FDDIFC_LLC_PRIO4	4
-#define	FDDIFC_LLC_PRIO5	5
-#define	FDDIFC_LLC_PRIO6	6
-#define	FDDIFC_LLC_PRIO7	7
-#define FDDIFC_LLC_SYNC         0xd0
-#define	FDDIFC_SMT		0x40
-
-#ifdef KERNEL_PRIVATE
-#define	fddibroadcastaddr	etherbroadcastaddr
-#define	fddi_ipmulticast_min	ether_ipmulticast_min
-#define	fddi_ipmulticast_max	ether_ipmulticast_max
-#define	fddi_addmulti		ether_addmulti
-#define	fddi_delmulti		ether_delmulti
-#define	fddi_sprintf		ether_sprintf
-
-void    fddi_ifattach(struct ifnet *);
-void    fddi_input(struct ifnet *, struct fddi_header *, struct mbuf *);
-int     fddi_output(struct ifnet *,
-           struct mbuf *, struct sockaddr *, struct rtentry *); 
-#endif /* KERNEL_PRIVATE */
-
-#endif /* _NETINET_IF_FDDI_H_ */
diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c
index bc2b80d68..d10484ddf 100644
--- a/bsd/netinet/igmp.c
+++ b/bsd/netinet/igmp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,7 +25,8 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
+/*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
  * Copyright (c) 1988 Stephen Deering.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -72,15 +73,19 @@
 
 /*
  * Internet Group Management Protocol (IGMP) routines.
+ * [RFC1112, RFC2236, RFC3376]
  *
  * Written by Steve Deering, Stanford, May 1988.
  * Modified by Rosen Sharma, Stanford, Aug 1994.
  * Modified by Bill Fenner, Xerox PARC, Feb 1995.
  * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
+ * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
  *
  * MULTICAST Revision: 3.5.1.4
  */
 
+#include <sys/cdefs.h>
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
@@ -89,6 +94,10 @@
 #include <sys/protosw.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+
+#include <libkern/libkern.h>
+#include <kern/zalloc.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -100,449 +109,3659 @@
 #include <netinet/ip_var.h>
 #include <netinet/igmp.h>
 #include <netinet/igmp_var.h>
+#include <netinet/kpi_ipfilter_var.h>
+
+#ifdef IGMP_DEBUG
+__inline__ char *
+inet_ntoa(struct in_addr ina)
+{
+	static char buf[4*sizeof "123"];
+	unsigned char *ucp = (unsigned char *)&ina;
 
-#if CONFIG_MACF_NET
-#include <security/mac_framework.h>
+	snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+		ucp[0] & 0xff,
+		ucp[1] & 0xff,
+		ucp[2] & 0xff,
+		ucp[3] & 0xff);
+	return buf;
+}
 #endif
 
-#ifndef __APPLE__
-static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
+static void	igi_initvar(struct igmp_ifinfo *, struct ifnet *, int);
+static struct igmp_ifinfo *igi_alloc(int);
+static void	igi_free(struct igmp_ifinfo *);
+static void	igi_delete(const struct ifnet *);
+static void	igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *,
+    int, const int, struct ifnet *);
+static void	igmp_final_leave(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_handle_state_change(struct in_multi *,
+		    struct igmp_ifinfo *);
+static int	igmp_initial_join(struct in_multi *, struct igmp_ifinfo *);
+static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
+		    const struct igmp *);
+static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
+		    const struct igmp *);
+static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
+		    /*const*/ struct igmpv3 *);
+static int	igmp_input_v3_group_query(struct in_multi *, 
+		     int, /*const*/ struct igmpv3 *);
+static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
+		    /*const*/ struct igmp *);
+void		igmp_sendpkt(struct mbuf *, struct ifnet *);
+static __inline__ int	igmp_isgroupreported(const struct in_addr);
+static struct mbuf *
+		igmp_ra_alloc(void);
+#ifdef IGMP_DEBUG
+static const char *	igmp_rec_type_to_str(const int);
 #endif
+static void	igmp_set_version(struct igmp_ifinfo *, const int);
+static void	igmp_flush_relq(struct igmp_ifinfo *);
+static int	igmp_v1v2_queue_report(struct in_multi *, const int);
+static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
+static void	igmp_v1v2_process_querier_timers(struct igmp_ifinfo *);
+static void	igmp_v2_update_group(struct in_multi *, const int);
+static void	igmp_v3_cancel_link_timers(struct igmp_ifinfo *);
+static void	igmp_v3_dispatch_general_query(struct igmp_ifinfo *);
+static struct mbuf *
+		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
+static int	igmp_v3_enqueue_group_record(struct ifqueue *,
+		    struct in_multi *, const int, const int, const int);
+static int	igmp_v3_enqueue_filter_change(struct ifqueue *,
+		    struct in_multi *);
+static void	igmp_v3_process_group_timers(struct igmp_ifinfo *,
+		    struct ifqueue *, struct ifqueue *, struct in_multi *,
+		    const int);
+static int	igmp_v3_merge_state_changes(struct in_multi *,
+		    struct ifqueue *);
+static void	igmp_v3_suppress_group_record(struct in_multi *);
+static int	sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS;
+static int	sysctl_igmp_gsr SYSCTL_HANDLER_ARGS;
+static int	sysctl_igmp_default_version SYSCTL_HANDLER_ARGS;
 
-static struct router_info *
-		find_rti(struct ifnet *ifp, int wait);
+struct mbuf		*m_raopt;		 /* Router Alert option */
 
-static struct igmpstat igmpstat;
+static int interface_timers_running;		/* IGMPv3 general
+				  		 * query response */
+static int state_change_timers_running;		/* IGMPv3 state-change
+					 	 * retransmit */
+static int current_state_timers_running;	/* IGMPv1/v2 host
+						 * report; IGMPv3 g/sg
+						 * query response */
 
-SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD,
-	&igmpstat, igmpstat, "");
+static LIST_HEAD(, igmp_ifinfo) igi_head;
+static struct igmpstat_v3 igmpstat_v3 = {
+	.igps_version = IGPS_VERSION_3,
+	.igps_len = sizeof(struct igmpstat_v3),
+};
+static struct igmpstat igmpstat; /* old IGMPv2 stats structure */
+static struct timeval igmp_gsrdelay = {10, 0};
 
-static int igmp_timers_are_running;
-static uint32_t igmp_all_hosts_group;
-static uint32_t igmp_all_rtrs_group;
-static struct mbuf *router_alert;
-static struct router_info *Head;
+static int igmp_recvifkludge = 1;
+static int igmp_sendra = 1;
+static int igmp_sendlocal = 1;
+static int igmp_v1enable = 1;
+static int igmp_v2enable = 1;
+static int igmp_legacysupp = 0;
+static int igmp_default_version = IGMP_VERSION_3;
 
-static void igmp_sendpkt(struct in_multi *, int, uint32_t);
+SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
+    &igmpstat, igmpstat, "");
+SYSCTL_STRUCT(_net_inet_igmp, OID_AUTO, v3stats,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &igmpstat_v3, igmpstat_v3, "");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_recvifkludge, 0,
+    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_sendra, 0,
+    "Send IP Router Alert option in IGMPv2/v3 messages");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_sendlocal, 0,
+    "Send IGMP membership reports for 224.0.0.0/24 groups");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_v1enable, 0,
+    "Enable backwards compatibility with IGMPv1");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_v2enable, 0,
+    "Enable backwards compatibility with IGMPv2");
+SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &igmp_legacysupp, 0,
+    "Allow v1/v2 reports to suppress v3 group responses");
+SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
+  CTLTYPE_INT | CTLFLAG_RW,
+  &igmp_default_version, 0, sysctl_igmp_default_version, "I",
+    "Default version of IGMP to run on each interface");
+SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
+    CTLTYPE_INT | CTLFLAG_RW,
+    &igmp_gsrdelay.tv_sec, 0, sysctl_igmp_gsr, "I",
+    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
+#ifdef IGMP_DEBUG
+int igmp_debug = 0;
+SYSCTL_INT(_net_inet_igmp, OID_AUTO,
+	debug, CTLFLAG_RW | CTLFLAG_LOCKED, &igmp_debug, 0, "");
+#endif
 
-void
-igmp_init(void)
-{
-	struct ipoption *ra;
+SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
+    sysctl_igmp_ifinfo, "Per-interface IGMPv3 state");
 
-	/*
-	 * To avoid byte-swapping the same value over and over again.
-	 */
-	igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP);
-	igmp_all_rtrs_group = htonl(INADDR_ALLRTRS_GROUP);
+/* Lock group and attribute for igmp_mtx */
+static lck_attr_t	*igmp_mtx_attr;
+static lck_grp_t	*igmp_mtx_grp;
+static lck_grp_attr_t	*igmp_mtx_grp_attr;
 
-	igmp_timers_are_running = 0;
+/*
+ * Locking and reference counting:
+ *
+ * igmp_mtx mainly protects igi_head.  In cases where both igmp_mtx and
+ * in_multihead_lock must be held, the former must be acquired first in order
+ * to maintain lock ordering.  It is not a requirement that igmp_mtx be
+ * acquired first before in_multihead_lock, but in case both must be acquired
+ * in succession, the correct lock ordering must be followed.
+ *
+ * Instead of walking the if_multiaddrs list at the interface and returning
+ * the ifma_protospec value of a matching entry, we search the global list
+ * of in_multi records and find it that way; this is done with in_multihead
+ * lock held.  Doing so avoids the race condition issues that many other BSDs
+ * suffer from (therefore in our implementation, ifma_protospec will never be
+ * NULL for as long as the in_multi is valid.)
+ *
+ * The above creates a requirement for the in_multi to stay in in_multihead
+ * list even after the final IGMP leave (in IGMPv3 mode) until no longer needs
+ * be retransmitted (this is not required for IGMPv1/v2.)  In order to handle
+ * this, the request and reference counts of the in_multi are bumped up when
+ * the state changes to IGMP_LEAVING_MEMBER, and later dropped in the timeout
+ * handler.  Each in_multi holds a reference to the underlying igmp_ifinfo.
+ *
+ * Thus, the permitted lock oder is:
+ *
+ *	igmp_mtx, in_multihead_lock, inm_lock, igi_lock
+ *
+ * Any may be taken independently, but if any are held at the same time,
+ * the above lock order must be followed.
+ */
+static decl_lck_mtx_data(, igmp_mtx);
+static int igmp_timers_are_running;
 
-	/*
-	 * Construct a Router Alert option to use in outgoing packets
-	 */
-	MGET(router_alert, M_WAIT, MT_DATA);
-	ra = mtod(router_alert, struct ipoption *);
-	ra->ipopt_dst.s_addr = 0;
-	ra->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
-	ra->ipopt_list[1] = 0x04;	/* 4 bytes long */
-	ra->ipopt_list[2] = 0x00;
-	ra->ipopt_list[3] = 0x00;
-	router_alert->m_len = sizeof(ra->ipopt_dst) + ra->ipopt_list[1];
+#define	IGI_ZONE_MAX		64		/* maximum elements in zone */
+#define	IGI_ZONE_NAME		"igmp_ifinfo"	/* zone name */
 
-	Head = (struct router_info *) 0;
-}
+static unsigned int igi_size;			/* size of zone element */
+static struct zone *igi_zone;			/* zone for igmp_ifinfo */
 
-static struct router_info *
-find_rti(
-	struct ifnet *ifp, int wait)
+#ifdef IGMP_DEBUG
+static __inline char *
+inet_ntoa_haddr(in_addr_t haddr)
 {
-	struct router_info *rti = Head;
-	
-	
-#if IGMP_DEBUG
-	printf("[igmp.c, _find_rti] --> entering \n");
-#endif
-	while (rti) {
-		if (rti->rti_ifp == ifp) {
-#if IGMP_DEBUG
-			printf("[igmp.c, _find_rti] --> found old entry \n");
-#endif
-			return rti;
-		}
-		rti = rti->rti_next;
-	}
-	
-	MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, wait);
-	if (rti != NULL)
-	{
-		rti->rti_ifp = ifp;
-		rti->rti_type = IGMP_V2_ROUTER;
-		rti->rti_time = 0;
-		rti->rti_next = Head;
-		Head = rti;
-	}
-#if IGMP_DEBUG
-	if (rti) printf("[igmp.c, _find_rti] --> created an entry \n");
+	struct in_addr ia;
+
+	ia.s_addr = htonl(haddr);
+	return (inet_ntoa(ia));
+}
 #endif
-	return rti;
+/*
+ * Retrieve or set default IGMP version.
+ */
+static int
+sysctl_igmp_default_version SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg2)
+	int	 error;
+	int	 new;
+
+	lck_mtx_lock(&igmp_mtx);
+
+	error = SYSCTL_OUT(req, arg1, sizeof(int));
+	if (error || !req->newptr)
+		goto out_locked;
+
+	new = igmp_default_version;
+
+	error = SYSCTL_IN(req, &new, sizeof(int));
+	if (error)
+		goto out_locked;
+
+	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	IGMP_PRINTF(("change igmp_default_version from %d to %d\n",
+	     igmp_default_version, new));
+
+	igmp_default_version = new;
+
+out_locked:
+	lck_mtx_unlock(&igmp_mtx);
+	return (error);
 }
 
-void
-igmp_input(
-	struct mbuf *m,
-	int iphlen)
+/*
+ * Retrieve or set threshold between group-source queries in seconds.
+ *
+ */
+static int
+sysctl_igmp_gsr SYSCTL_HANDLER_ARGS
 {
-	struct igmp *igmp;
-	struct ip *ip;
-	int igmplen;
-	struct ifnet *ifp = m->m_pkthdr.rcvif;
-	int minlen;
-	struct in_multi *inm;
-	struct in_ifaddr *ia;
-	struct in_multistep step;
-	struct router_info *rti;
-	
-	int timer; /** timer value in the igmp query header **/
+#pragma unused(arg1, arg2)
+	int error;
+	int i;
 
-	++igmpstat.igps_rcv_total;
+	lck_mtx_lock(&igmp_mtx);
 
-	ip = mtod(m, struct ip *);
-	igmplen = ip->ip_len;
+	i = igmp_gsrdelay.tv_sec;
 
-	/*
-	 * Validate lengths
-	 */
-	if (igmplen < IGMP_MINLEN) {
-		++igmpstat.igps_rcv_tooshort;
-		m_freem(m);
-		return;
-	}
-	minlen = iphlen + IGMP_MINLEN;
-	if ((m->m_flags & M_EXT || m->m_len < minlen) &&
-	    (m = m_pullup(m, minlen)) == 0) {
-		++igmpstat.igps_rcv_tooshort;
-		return;
-	}
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
 
-	/*
-	 * Validate checksum
-	 */
-	m->m_data += iphlen;
-	m->m_len -= iphlen;
-	igmp = mtod(m, struct igmp *);
-	if (in_cksum(m, igmplen)) {
-		++igmpstat.igps_rcv_badsum;
-		m_freem(m);
-		return;
+	if (i < -1 || i >= 60) {
+		error = EINVAL;
+		goto out_locked;
 	}
-	m->m_data -= iphlen;
-	m->m_len += iphlen;
 
-	ip = mtod(m, struct ip *);
-	timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
-	if (timer == 0)
-		timer = 1;
-	rti = find_rti(ifp, M_NOWAIT);
-	if (rti == NULL) {
-		m_freem(m);
-		return;
-	}
+	igmp_gsrdelay.tv_sec = i;
 
-	/*
-	 * In the IGMPv2 specification, there are 3 states and a flag.
-	 *
-	 * In Non-Member state, we simply don't have a membership record.
-	 * In Delaying Member state, our timer is running (inm->inm_timer)
-	 * In Idle Member state, our timer is not running (inm->inm_timer==0)
-	 *
-	 * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if
-	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
-	 * if I sent the last report.
-	 */
-	switch (igmp->igmp_type) {
+out_locked:
+	lck_mtx_unlock(&igmp_mtx);
+	return (error);
+}
 
-	case IGMP_MEMBERSHIP_QUERY:
-		++igmpstat.igps_rcv_queries;
+/*
+ * Expose struct igmp_ifinfo to userland, keyed by ifindex.
+ * For use by ifmcstat(8).
+ *
+ */
+static int
+sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+	int			*name;
+	int			 error;
+	u_int			 namelen;
+	struct ifnet		*ifp;
+	struct igmp_ifinfo	*igi;
+	struct igmp_ifinfo_u	igi_u;
 
-		if (ifp->if_flags & IFF_LOOPBACK)
-			break;
+	name = (int *)arg1;
+	namelen = arg2;
 
-		if (igmp->igmp_code == 0) {
-			/*
-			 * Old router.  Remember that the querier on this
-			 * interface is old, and set the timer to the
-			 * value in RFC 1112.
-			 */
+	if (req->newptr != USER_ADDR_NULL)
+		return (EPERM);
 
-			rti->rti_type = IGMP_V1_ROUTER;
-			rti->rti_time = 0;
+	if (namelen != 1)
+		return (EINVAL);
 
-			timer = IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ;
+	lck_mtx_lock(&igmp_mtx);
 
-			if (ip->ip_dst.s_addr != igmp_all_hosts_group ||
-			    igmp->igmp_group.s_addr != 0) {
-				++igmpstat.igps_rcv_badqueries;
-				m_freem(m);
-				return;
-			}
-		} else {
-			/*
-			 * New router.  Simply do the new validity check.
-			 */
-			
-			if (igmp->igmp_group.s_addr != 0 &&
-			    !IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
-				++igmpstat.igps_rcv_badqueries;
-				m_freem(m);
-				return;
-			}
-		}
+	if (name[0] <= 0 || name[0] > (u_int)if_index) {
+		error = ENOENT;
+		goto out_locked;
+	}
 
-		/*
-		 * - Start the timers in all of our membership records
-		 *   that the query applies to for the interface on
-		 *   which the query arrived excl. those that belong
-		 *   to the "all-hosts" group (224.0.0.1).
-		 * - Restart any timer that is already running but has
-		 *   a value longer than the requested timeout.
-		 * - Use the value specified in the query message as
-		 *   the maximum timeout.
-		 */
-		lck_mtx_lock(rnh_lock);
-		IN_FIRST_MULTI(step, inm);
-		while (inm != NULL) {
-			if (inm->inm_ifp == ifp &&
-			    inm->inm_addr.s_addr != igmp_all_hosts_group &&
-			    (igmp->igmp_group.s_addr == 0 ||
-			     igmp->igmp_group.s_addr == inm->inm_addr.s_addr)) {
-				if (inm->inm_timer == 0 ||
-				    inm->inm_timer > timer) {
-					inm->inm_timer =
-						IGMP_RANDOM_DELAY(timer);
-					igmp_timers_are_running = 1;
-				}
-			}
-			IN_NEXT_MULTI(step, inm);
+	error = ENOENT;
+
+	ifnet_head_lock_shared();
+	ifp = ifindex2ifnet[name[0]];
+	ifnet_head_done();
+	if (ifp == NULL)
+		goto out_locked;
+
+	bzero(&igi_u, sizeof (igi_u));
+
+	LIST_FOREACH(igi, &igi_head, igi_link) {
+		IGI_LOCK(igi);
+		if (ifp != igi->igi_ifp) {
+			IGI_UNLOCK(igi);
+			continue;
 		}
-		lck_mtx_unlock(rnh_lock);
+		igi_u.igi_ifindex = igi->igi_ifp->if_index;
+		igi_u.igi_version = igi->igi_version;
+		igi_u.igi_v1_timer = igi->igi_v1_timer;
+		igi_u.igi_v2_timer = igi->igi_v2_timer;
+		igi_u.igi_v3_timer = igi->igi_v3_timer;
+		igi_u.igi_flags = igi->igi_flags;
+		igi_u.igi_rv = igi->igi_rv;
+		igi_u.igi_qi = igi->igi_qi;
+		igi_u.igi_qri = igi->igi_qri;
+		igi_u.igi_uri = igi->igi_uri;
+		IGI_UNLOCK(igi);
 
+		error = SYSCTL_OUT(req, &igi_u, sizeof (igi_u));
 		break;
+	}
 
-	case IGMP_V1_MEMBERSHIP_REPORT:
-	case IGMP_V2_MEMBERSHIP_REPORT:
-		/*
-		 * For fast leave to work, we have to know that we are the
-		 * last person to send a report for this group.  Reports
-		 * can potentially get looped back if we are a multicast
-		 * router, so discard reports sourced by me.
-		 */
-		IFP_TO_IA(ifp, ia);
-		if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
-			ifafree(&ia->ia_ifa);
-			break;
-		}
+out_locked:
+	lck_mtx_unlock(&igmp_mtx);
+	return (error);
+}
 
-		++igmpstat.igps_rcv_reports;
+/*
+ * Dispatch an entire queue of pending packet chains
+ *
+ * Must not be called with inm_lock held.
+ */
+static void
+igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit,
+    const int loop, struct ifnet *ifp)
+{
+	struct mbuf *m;
+	struct ip *ip;
 
-		if (ifp->if_flags & IFF_LOOPBACK) {
-			if (ia != NULL)
-				ifafree(&ia->ia_ifa);
+	if (igi != NULL)
+		IGI_LOCK_ASSERT_HELD(igi);
+
+	for (;;) {
+		IF_DEQUEUE(ifq, m);
+		if (m == NULL)
 			break;
-		}
+		IGMP_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
+		ip = mtod(m, struct ip *);
+		if (loop)
+			m->m_flags |= M_IGMP_LOOP;
+		if (igi != NULL)
+			IGI_UNLOCK(igi);
+		igmp_sendpkt(m, ifp);
+		if (igi != NULL)
+			IGI_LOCK(igi);
+		if (--limit == 0)
+			break;
+	}
 
-		if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) {
-			++igmpstat.igps_rcv_badreports;
-			m_freem(m);
-			if (ia != NULL)
-				ifafree(&ia->ia_ifa);
-			return;
-		}
+	if (igi != NULL)
+		IGI_LOCK_ASSERT_HELD(igi);
+}
 
-		/*
-		 * KLUDGE: if the IP source address of the report has an
-		 * unspecified (i.e., zero) subnet number, as is allowed for
-		 * a booting host, replace it with the correct subnet number
-		 * so that a process-level multicast routing demon can
-		 * determine which subnet it arrived from.  This is necessary
-		 * to compensate for the lack of any way for a process to
-		 * determine the arrival interface of an incoming packet.
-		 */
-		if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0)
-			if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+/*
+ * Filter outgoing IGMP report state by group.
+ *
+ * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
+ * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
+ * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
+ * this may break certain IGMP snooping switches which rely on the old
+ * report behaviour.
+ *
+ * Return zero if the given group is one for which IGMP reports
+ * should be suppressed, or non-zero if reports should be issued.
+ */
 
-		/*
-		 * If we belong to the group being reported, stop
-		 * our timer for that group.
-		 */
-		ifnet_lock_shared(ifp);
-		IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
-		ifnet_lock_done(ifp);
+static __inline__
+int igmp_isgroupreported(const struct in_addr addr)
+{
 
-		if (inm != NULL) {
-			inm->inm_timer = 0;
-			++igmpstat.igps_rcv_ourreports;
+	if (in_allhosts(addr) ||
+	    ((!igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
+		return (0);
 
-			inm->inm_state = IGMP_OTHERMEMBER;
-		}
+	return (1);
+}
 
-		if (ia != NULL)
-			ifafree(&ia->ia_ifa);
-		break;
-	}
+/*
+ * Construct a Router Alert option to use in outgoing packets.
+ */
+static struct mbuf *
+igmp_ra_alloc(void)
+{
+	struct mbuf	*m;
+	struct ipoption	*p;
 
-	/*
-	 * Pass all valid IGMP packets up to any process(es) listening
-	 * on a raw IGMP socket.
-	 */
-	rip_input(m, iphlen);
+	MGET(m, M_WAITOK, MT_DATA);
+	p = mtod(m, struct ipoption *);
+	p->ipopt_dst.s_addr = INADDR_ANY;
+	p->ipopt_list[0] = IPOPT_RA;	/* Router Alert Option */
+	p->ipopt_list[1] = 0x04;	/* 4 bytes long */
+	p->ipopt_list[2] = IPOPT_EOL;	/* End of IP option list */
+	p->ipopt_list[3] = 0x00;	/* pad byte */
+	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
+
+	return (m);
 }
 
-int
-igmp_joingroup(struct in_multi *inm)
+/*
+ * Attach IGMP when PF_INET is attached to an interface.
+ */
+struct igmp_ifinfo *
+igmp_domifattach(struct ifnet *ifp, int how)
 {
+	struct igmp_ifinfo *igi;
 
-	if (inm->inm_addr.s_addr == igmp_all_hosts_group
-	    || inm->inm_ifp->if_flags & IFF_LOOPBACK) {
-		inm->inm_timer = 0;
-		inm->inm_state = IGMP_OTHERMEMBER;
-	} else {
-		inm->inm_rti = find_rti(inm->inm_ifp, M_WAITOK);
-		if (inm->inm_rti == NULL) return ENOMEM;
-		igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
-		inm->inm_timer = IGMP_RANDOM_DELAY(
-					IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ);
-		inm->inm_state = IGMP_IREPORTEDLAST;
-		igmp_timers_are_running = 1;
-	}
-	return 0;
+	IGMP_PRINTF(("%s: called for ifp %p(%s)\n",
+	    __func__, ifp, ifp->if_name));
+
+	igi = igi_alloc(how);
+	if (igi == NULL)
+		return (NULL);
+
+	lck_mtx_lock(&igmp_mtx);
+
+	IGI_LOCK(igi);
+	igi_initvar(igi, ifp, 0);
+	igi->igi_debug |= IFD_ATTACHED;
+	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
+	IGI_ADDREF_LOCKED(igi); /* hold a reference for caller */
+	IGI_UNLOCK(igi);
+
+	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
+
+	lck_mtx_unlock(&igmp_mtx);
+
+	IGMP_PRINTF(("allocate igmp_ifinfo for ifp %p(%s)\n",
+	     ifp, ifp->if_name));
+
+	return (igi);
 }
 
+/*
+ * Attach IGMP when PF_INET is reattached to an interface.  Caller is
+ * expected to have an outstanding reference to the igi.
+ */
 void
-igmp_leavegroup(struct in_multi *inm)
+igmp_domifreattach(struct igmp_ifinfo *igi)
 {
-	if (inm->inm_state == IGMP_IREPORTEDLAST &&
-	    inm->inm_addr.s_addr != igmp_all_hosts_group &&
-	    !(inm->inm_ifp->if_flags & IFF_LOOPBACK) &&
-	    inm->inm_rti->rti_type != IGMP_V1_ROUTER)
-		igmp_sendpkt(inm, IGMP_V2_LEAVE_GROUP, igmp_all_rtrs_group);
+	struct ifnet *ifp;
+
+	lck_mtx_lock(&igmp_mtx);
+
+	IGI_LOCK(igi);
+	VERIFY(!(igi->igi_debug & IFD_ATTACHED));
+	ifp = igi->igi_ifp;
+	VERIFY(ifp != NULL);
+	igi_initvar(igi, ifp, 1);
+	igi->igi_debug |= IFD_ATTACHED;
+	IGI_ADDREF_LOCKED(igi); /* hold a reference for igi_head */
+	IGI_UNLOCK(igi);
+
+	LIST_INSERT_HEAD(&igi_head, igi, igi_link);
+
+	lck_mtx_unlock(&igmp_mtx);
+
+	IGMP_PRINTF(("reattached igmp_ifinfo for ifp %p(%s)\n",
+	     ifp, ifp->if_name));
 }
 
+/*
+ * Hook for domifdetach.
+ */
 void
-igmp_fasttimo(void)
+igmp_domifdetach(struct ifnet *ifp)
 {
-	struct in_multi *inm;
-	struct in_multistep step;
+	IGMP_PRINTF(("%s: called for ifp %p(%s%d)\n",
+	    __func__, ifp, ifp->if_name, ifp->if_unit));
+
+	lck_mtx_lock(&igmp_mtx);
+	igi_delete(ifp);
+	lck_mtx_unlock(&igmp_mtx);
+}
+
+/*
+ * Called at interface detach time.  Note that we only flush all deferred
+ * responses and record releases; all remaining inm records and their source
+ * entries related to this interface are left intact, in order to handle
+ * the reattach case.
+ */
+static void
+igi_delete(const struct ifnet *ifp)
+{
+	struct igmp_ifinfo *igi, *tigi;
+
+	lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_OWNED);
+
+	LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) {
+		IGI_LOCK(igi);
+		if (igi->igi_ifp == ifp) {
+			/*
+			 * Free deferred General Query responses.
+			 */
+			IF_DRAIN(&igi->igi_gq);
+			IF_DRAIN(&igi->igi_v2q);
+			igmp_flush_relq(igi);
+			VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
+			igi->igi_debug &= ~IFD_ATTACHED;
+			IGI_UNLOCK(igi);
+
+			LIST_REMOVE(igi, igi_link);
+			IGI_REMREF(igi); /* release igi_head reference */
+			return;
+		}
+		IGI_UNLOCK(igi);
+	}
+	panic("%s: igmp_ifinfo not found for ifp %p\n", __func__,  ifp);
+}
+
+static void
+igi_initvar(struct igmp_ifinfo *igi, struct ifnet *ifp, int reattach)
+{
+	IGI_LOCK_ASSERT_HELD(igi);
+
+	igi->igi_ifp = ifp;
+	igi->igi_version = igmp_default_version;
+	igi->igi_flags = 0;
+	igi->igi_rv = IGMP_RV_INIT;
+	igi->igi_qi = IGMP_QI_INIT;
+	igi->igi_qri = IGMP_QRI_INIT;
+	igi->igi_uri = IGMP_URI_INIT;
+
+	/* ifnet is not yet attached; no need to hold ifnet lock */
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		igi->igi_flags |= IGIF_SILENT;
+
+	if (!reattach)
+		SLIST_INIT(&igi->igi_relinmhead);
 
 	/*
-	 * Quick check to see if any work needs to be done, in order
-	 * to minimize the overhead of fasttimo processing.
+	 * Responses to general queries are subject to bounds.
 	 */
+	igi->igi_gq.ifq_maxlen =  IGMP_MAX_RESPONSE_PACKETS;
+	igi->igi_v2q.ifq_maxlen = IGMP_MAX_RESPONSE_PACKETS;
+}
 
-	if (!igmp_timers_are_running)
-		return;
+static struct igmp_ifinfo *
+igi_alloc(int how)
+{
+	struct igmp_ifinfo *igi;
 
-	igmp_timers_are_running = 0;
-	IN_FIRST_MULTI(step, inm);
-	while (inm != NULL) {
-		if (inm->inm_timer == 0) {
-			/* do nothing */
-		} else if ((--inm->inm_timer == 0) && (inm->inm_rti != NULL)) {
-			igmp_sendpkt(inm, inm->inm_rti->rti_type, 0);
-			inm->inm_state = IGMP_IREPORTEDLAST;
-		} else {
-			igmp_timers_are_running = 1;
-		}
-		IN_NEXT_MULTI(step, inm);
+	igi = (how == M_WAITOK) ? zalloc(igi_zone) : zalloc_noblock(igi_zone);
+	if (igi != NULL) {
+		bzero(igi, igi_size);
+		lck_mtx_init(&igi->igi_lock, igmp_mtx_grp, igmp_mtx_attr);
+		igi->igi_debug |= IFD_ALLOC;
+	}
+	return (igi);
+}
+
+static void
+igi_free(struct igmp_ifinfo *igi)
+{
+	IGI_LOCK(igi);
+	if (igi->igi_debug & IFD_ATTACHED) {
+		panic("%s: attached igi=%p is being freed", __func__, igi);
+		/* NOTREACHED */
+	} else if (igi->igi_ifp != NULL) {
+		panic("%s: ifp not NULL for igi=%p", __func__, igi);
+		/* NOTREACHED */
+	} else if (!(igi->igi_debug & IFD_ALLOC)) {
+		panic("%s: igi %p cannot be freed", __func__, igi);
+		/* NOTREACHED */
+	} else if (igi->igi_refcnt != 0) {
+		panic("%s: non-zero refcnt igi=%p", __func__, igi);
+		/* NOTREACHED */
 	}
+	igi->igi_debug &= ~IFD_ALLOC;
+	IGI_UNLOCK(igi);
+
+	lck_mtx_destroy(&igi->igi_lock, igmp_mtx_grp);
+	zfree(igi_zone, igi);
 }
 
 void
-igmp_slowtimo(void)
+igi_addref(struct igmp_ifinfo *igi, int locked)
 {
-	struct router_info *rti =  Head;
+	if (!locked)
+		IGI_LOCK_SPIN(igi);
+	else
+		IGI_LOCK_ASSERT_HELD(igi);
 
-#if IGMP_DEBUG
-	printf("[igmp.c,_slowtimo] -- > entering \n");
-#endif
-	while (rti) {
-	    if (rti->rti_type == IGMP_V1_ROUTER) {
-		rti->rti_time++;
-		if (rti->rti_time >= IGMP_AGE_THRESHOLD) {
-			rti->rti_type = IGMP_V2_ROUTER;
-		}
-	    }
-	    rti = rti->rti_next;
+	if (++igi->igi_refcnt == 0) {
+		panic("%s: igi=%p wraparound refcnt", __func__, igi);
+		/* NOTREACHED */
 	}
-#if IGMP_DEBUG	
-	printf("[igmp.c,_slowtimo] -- > exiting \n");
-#endif
+	if (!locked)
+		IGI_UNLOCK(igi);
 }
 
-static void
-igmp_sendpkt(struct in_multi *inm, int type, uint32_t addr)
+void
+igi_remref(struct igmp_ifinfo *igi)
 {
-        struct mbuf *m;
-        struct igmp *igmp;
-        struct ip *ip;
-        struct ip_moptions imo;
-	struct route ro;
+	struct ifnet *ifp;
 
-        MGETHDR(m, M_DONTWAIT, MT_HEADER);	/* MAC-OK */
-        if (m == NULL)
-                return;
+	IGI_LOCK_SPIN(igi);
 
-	m->m_pkthdr.rcvif = lo_ifp;
-#if CONFIG_MACF_NET
-	mac_mbuf_label_associate_linklayer(inm->inm_ifp, m);
-#endif
-	m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;
-	MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip));
-	m->m_data += sizeof(struct ip);
-        m->m_len = IGMP_MINLEN;
-	m->m_pkthdr.csum_flags = 0;
-	m->m_pkthdr.csum_data = 0;
-        igmp = mtod(m, struct igmp *);
-        igmp->igmp_type   = type;
-        igmp->igmp_code   = 0;
-        igmp->igmp_group  = inm->inm_addr;
-        igmp->igmp_cksum  = 0;
-        igmp->igmp_cksum  = in_cksum(m, IGMP_MINLEN);
-
-        m->m_data -= sizeof(struct ip);
-        m->m_len += sizeof(struct ip);
-        ip = mtod(m, struct ip *);
-        ip->ip_tos        = 0;
-        ip->ip_len        = sizeof(struct ip) + IGMP_MINLEN;
-        ip->ip_off        = 0;
-        ip->ip_p          = IPPROTO_IGMP;
-        ip->ip_src.s_addr = INADDR_ANY;
-        ip->ip_dst.s_addr = addr ? addr : igmp->igmp_group.s_addr;
-
-        imo.imo_multicast_ifp  = inm->inm_ifp;
-        imo.imo_multicast_ttl  = 1;
-		imo.imo_multicast_vif  = -1;
-#if MROUTING
-        /*
-         * Request loopback of the report if we are acting as a multicast
-         * router, so that the process-level routing demon can hear it.
-         */
-        imo.imo_multicast_loop = (ip_mrouter != NULL);
-#else
-        imo.imo_multicast_loop = 0;
-#endif
+	if (igi->igi_refcnt == 0) {
+		panic("%s: igi=%p negative refcnt", __func__, igi);
+		/* NOTREACHED */
+	}
+
+	--igi->igi_refcnt;
+	if (igi->igi_refcnt > 0) {
+		IGI_UNLOCK(igi);
+		return;
+	}
+
+	ifp = igi->igi_ifp;
+	igi->igi_ifp = NULL;
+	IF_DRAIN(&igi->igi_gq);
+	IF_DRAIN(&igi->igi_v2q);
+	igmp_flush_relq(igi);
+	VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
+	IGI_UNLOCK(igi);
+
+	IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp %p(%s%d)\n",
+	    __func__, ifp, ifp->if_name, ifp->if_unit));
+
+	igi_free(igi);
+}
+
+/*
+ * Process a received IGMPv1 query.
+ * Return non-zero if the message should be dropped.
+ */
+static int
+igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	struct in_multistep	step;
 
 	/*
-	 * XXX
-	 * Do we have to worry about reentrancy here?  Don't think so.
+	 * IGMPv1 Host Membership Queries SHOULD always be addressed to
+	 * 224.0.0.1. They are always treated as General Queries.
+	 * igmp_group is always ignored. Do not drop it as a userland
+	 * daemon may wish to see it.
 	 */
-	bzero(&ro, sizeof (ro));
-        (void) ip_output(m, router_alert, &ro, 0, &imo, NULL);
-	if (ro.ro_rt != NULL) {
-		rtfree(ro.ro_rt);
-		ro.ro_rt = NULL;
+	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
+		IGMPSTAT_INC(igps_rcv_badqueries);
+		OIGMPSTAT_INC(igps_rcv_badqueries);
+		return (0);
 	}
+	IGMPSTAT_INC(igps_rcv_gen_queries);
 
-        ++igmpstat.igps_snd_reports;
-}
+	igi = IGMP_IFINFO(ifp);
+	VERIFY(igi != NULL);
+
+	IGI_LOCK(igi);
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		IGMP_PRINTF(("ignore v1 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		IGI_UNLOCK(igi);
+		return (0);
+	}
+	/*
+	 * Switch to IGMPv1 host compatibility mode.
+	 */
+	igmp_set_version(igi, IGMP_VERSION_1);
+	IGI_UNLOCK(igi);
 
+	IGMP_PRINTF(("process v1 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
+	    ifp->if_unit));
+
+	/*
+	 * Start the timers in all of our group records
+	 * for the interface on which the query arrived,
+	 * except those which are already running.
+	 */
+	in_multihead_lock_shared();
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		INM_LOCK(inm);
+		if (inm->inm_ifp != ifp)
+			goto next;
+		if (inm->inm_timer != 0)
+			goto next;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			inm->inm_timer = IGMP_RANDOM_DELAY(
+			    IGMP_V1V2_MAX_RI * PR_SLOWHZ);
+			current_state_timers_running = 1;
+			break;
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+next:
+		INM_UNLOCK(inm);
+		IN_NEXT_MULTI(step, inm);
+	}
+	in_multihead_lock_done();
+
+	return (0);
+}
+
+/*
+ * Process a received IGMPv2 general or group-specific query.
+ */
+static int
+igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
+    const struct igmp *igmp)
+{
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	int			 is_general_query;
+	uint16_t		 timer;
+
+	is_general_query = 0;
+
+	/*
+	 * Validate address fields upfront.
+	 */
+	if (in_nullhost(igmp->igmp_group)) {
+		/*
+		 * IGMPv2 General Query.
+		 * If this was not sent to the all-hosts group, ignore it.
+		 */
+		if (!in_allhosts(ip->ip_dst))
+			return (0);
+		IGMPSTAT_INC(igps_rcv_gen_queries);
+		is_general_query = 1;
+	} else {
+		/* IGMPv2 Group-Specific Query. */
+		IGMPSTAT_INC(igps_rcv_group_queries);
+	}
+
+	igi = IGMP_IFINFO(ifp);
+	VERIFY(igi != NULL);
+
+	IGI_LOCK(igi);
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		IGMP_PRINTF(("ignore v2 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		IGI_UNLOCK(igi);
+		return(0);
+	}
+	/*
+	 * Ignore v2 query if in v1 Compatibility Mode.
+	 */
+	if (igi->igi_version == IGMP_VERSION_1) {
+		IGI_UNLOCK(igi);
+		return (0);
+	}
+	igmp_set_version(igi, IGMP_VERSION_2);
+	IGI_UNLOCK(igi);
+
+	timer = igmp->igmp_code * PR_SLOWHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	if (is_general_query) {
+		struct in_multistep step;
+
+		IGMP_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		/*
+		 * For each reporting group joined on this
+		 * interface, kick the report timer.
+		 */
+		in_multihead_lock_shared();
+		IN_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			INM_LOCK(inm);
+			if (inm->inm_ifp == ifp)
+				igmp_v2_update_group(inm, timer);
+			INM_UNLOCK(inm);
+			IN_NEXT_MULTI(step, inm);
+		}
+		in_multihead_lock_done();
+	} else {
+		/*
+		 * Group-specific IGMPv2 query, we need only
+		 * look up the single group to process it.
+		 */
+		in_multihead_lock_shared();
+		IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
+		in_multihead_lock_done();
+		if (inm != NULL) {
+			INM_LOCK(inm);
+			IGMP_PRINTF(("process v2 query %s on ifp %p(%s%d)\n",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
+			    ifp->if_unit));
+			igmp_v2_update_group(inm, timer);
+			INM_UNLOCK(inm);
+			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Update the report timer on a group in response to an IGMPv2 query.
+ *
+ * If we are becoming the reporting member for this group, start the timer.
+ * If we already are the reporting member for this group, and timer is
+ * below the threshold, reset it.
+ *
+ * We may be updating the group for the first time since we switched
+ * to IGMPv3. If we are, then we must clear any recorded source lists,
+ * and transition to REPORTING state; the group timer is overloaded
+ * for group and group-source query responses. 
+ *
+ * Unlike IGMPv3, the delay per group should be jittered
+ * to avoid bursts of IGMPv2 reports.
+ */
+static void
+igmp_v2_update_group(struct in_multi *inm, const int timer)
+{
+
+	IGMP_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
+	    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
+	    inm->inm_ifp->if_unit, timer));
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+		break;
+	case IGMP_REPORTING_MEMBER:
+		if (inm->inm_timer != 0 &&
+		    inm->inm_timer <= timer) {
+			IGMP_PRINTF(("%s: REPORTING and timer running, "
+			    "skipping.\n", __func__));
+			break;
+		}
+		/* FALLTHROUGH */
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		IGMP_PRINTF(("%s: ->REPORTING\n", __func__));
+		inm->inm_state = IGMP_REPORTING_MEMBER;
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		current_state_timers_running = 1;
+		break;
+	case IGMP_SLEEPING_MEMBER:
+		IGMP_PRINTF(("%s: ->AWAKENING\n", __func__));
+		inm->inm_state = IGMP_AWAKENING_MEMBER;
+		break;
+	case IGMP_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Process a received IGMPv3 general, group-specific or
+ * group-and-source-specific query.
+ * Assumes m has already been pulled up to the full IGMP message length.
+ * Return 0 if successful, otherwise an appropriate error code is returned.
+ */
+static int
+igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
+    /*const*/ struct igmpv3 *igmpv3)
+{
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	int			 is_general_query;
+	uint32_t		 maxresp, nsrc, qqi;
+	uint16_t		 timer;
+	uint8_t			 qrv;
+
+	is_general_query = 0;
+
+	IGMP_PRINTF(("process v3 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
+	    ifp->if_unit));
+
+	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
+	if (maxresp >= 128) {
+		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
+			  (IGMP_EXP(igmpv3->igmp_code) + 3);
+	}
+
+	/*
+	 * Robustness must never be less than 2 for on-wire IGMPv3.
+	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
+	 * an exception for interfaces whose IGMPv3 state changes
+	 * are redirected to loopback (e.g. MANET).
+	 */
+	qrv = IGMP_QRV(igmpv3->igmp_misc);
+	if (qrv < 2) {
+		IGMP_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
+		    qrv, IGMP_RV_INIT));
+		qrv = IGMP_RV_INIT;
+	}
+
+	qqi = igmpv3->igmp_qqi;
+	if (qqi >= 128) {
+		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
+		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
+	}
+
+	timer = maxresp * PR_SLOWHZ / IGMP_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	nsrc = ntohs(igmpv3->igmp_numsrc);
+
+	/*
+	 * Validate address fields and versions upfront before
+	 * accepting v3 query.
+	 */
+	if (in_nullhost(igmpv3->igmp_group)) {
+		/*
+		 * IGMPv3 General Query.
+		 *
+		 * General Queries SHOULD be directed to 224.0.0.1.
+		 * A general query with a source list has undefined
+		 * behaviour; discard it.
+		 */
+		IGMPSTAT_INC(igps_rcv_gen_queries);
+		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
+			IGMPSTAT_INC(igps_rcv_badqueries);
+			OIGMPSTAT_INC(igps_rcv_badqueries);
+			return (0);
+		}
+		is_general_query = 1;
+	} else {
+		/* Group or group-source specific query. */
+		if (nsrc == 0)
+			IGMPSTAT_INC(igps_rcv_group_queries);
+		else
+			IGMPSTAT_INC(igps_rcv_gsr_queries);
+	}
+
+	igi = IGMP_IFINFO(ifp);
+	VERIFY(igi != NULL);
+
+	IGI_LOCK(igi);
+	if (igi->igi_flags & IGIF_LOOPBACK) {
+		IGMP_PRINTF(("ignore v3 query on IGIF_LOOPBACK ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		IGI_UNLOCK(igi);
+		return (0);
+	}
+
+	/*
+	 * Discard the v3 query if we're in Compatibility Mode.
+	 * The RFC is not obviously worded that hosts need to stay in
+	 * compatibility mode until the Old Version Querier Present
+	 * timer expires.
+	 */
+	if (igi->igi_version != IGMP_VERSION_3) {
+		IGMP_PRINTF(("ignore v3 query in v%d mode on ifp %p(%s%d)\n",
+		    igi->igi_version, ifp, ifp->if_name, ifp->if_unit));
+		IGI_UNLOCK(igi);
+		return (0);
+	}
+
+	igmp_set_version(igi, IGMP_VERSION_3);
+	igi->igi_rv = qrv;
+	igi->igi_qi = qqi;
+	igi->igi_qri = maxresp;
+
+
+	IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, qrv, qqi,
+	    maxresp));
+
+	if (is_general_query) {
+		/*
+		 * Schedule a current-state report on this ifp for
+		 * all groups, possibly containing source lists.
+		 * If there is a pending General Query response
+		 * scheduled earlier than the selected delay, do
+		 * not schedule any other reports.
+		 * Otherwise, reset the interface timer.
+		 */
+		IGMP_PRINTF(("process v3 general query on ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
+			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
+			interface_timers_running = 1;
+		}
+		IGI_UNLOCK(igi);
+	} else {
+		IGI_UNLOCK(igi);
+		/*
+		 * Group-source-specific queries are throttled on
+		 * a per-group basis to defeat denial-of-service attempts.
+		 * Queries for groups we are not a member of on this
+		 * link are simply ignored.
+		 */
+		in_multihead_lock_shared();
+		IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm);
+		in_multihead_lock_done();
+		if (inm == NULL)
+			return (0);
+
+		INM_LOCK(inm);
+#ifndef __APPLE__
+		/* TODO: need ratecheck equivalent */
+		if (nsrc > 0) {
+			if (!ratecheck(&inm->inm_lastgsrtv,
+			    &igmp_gsrdelay)) {
+				IGMP_PRINTF(("%s: GS query throttled.\n",
+				    __func__));
+				IGMPSTAT_INC(igps_drop_gsr_queries);
+				INM_UNLOCK(inm);
+				INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
+				return (0);
+			}
+		}
+#endif
+		IGMP_PRINTF(("process v3 %s query on ifp %p(%s%d)\n",
+		     inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_name,
+		     ifp->if_unit));
+		/*
+		 * If there is a pending General Query response
+		 * scheduled sooner than the selected delay, no
+		 * further report need be scheduled.
+		 * Otherwise, prepare to respond to the
+		 * group-specific or group-and-source query.
+		 */
+		IGI_LOCK(igi);
+		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
+			IGI_UNLOCK(igi);
+			igmp_input_v3_group_query(inm, timer, igmpv3);
+		} else {
+			IGI_UNLOCK(igi);
+		}
+		INM_UNLOCK(inm);
+		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
+	}
+
+	return (0);
+}
+
+/*
+ * Process a recieved IGMPv3 group-specific or group-and-source-specific
+ * query.
+ * Return <0 if any error occured. Currently this is ignored.
+ */
+static int
+igmp_input_v3_group_query(struct in_multi *inm, 
+    int timer, /*const*/ struct igmpv3 *igmpv3)
+{
+	int			 retval;
+	uint16_t		 nsrc;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	retval = 0;
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		return (retval);
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		break;
+	}
+
+	nsrc = ntohs(igmpv3->igmp_numsrc);
+
+	/*
+	 * Deal with group-specific queries upfront.
+	 * If any group query is already pending, purge any recorded
+	 * source-list state if it exists, and schedule a query response
+	 * for this group-specific query.
+	 */
+	if (nsrc == 0) {
+		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
+		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
+			inm_clear_recorded(inm);
+			timer = min(inm->inm_timer, timer);
+		}
+		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		current_state_timers_running = 1;
+		return (retval);
+	}
+
+	/*
+	 * Deal with the case where a group-and-source-specific query has
+	 * been received but a group-specific query is already pending.
+	 */
+	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
+		timer = min(inm->inm_timer, timer);
+		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+		current_state_timers_running = 1;
+		return (retval);
+	}
+
+	/*
+	 * Finally, deal with the case where a group-and-source-specific
+	 * query has been received, where a response to a previous g-s-r
+	 * query exists, or none exists.
+	 * In this case, we need to parse the source-list which the Querier
+	 * has provided us with and check if we have any source list filter
+	 * entries at T1 for these sources. If we do not, there is no need
+	 * schedule a report and the query may be dropped.
+	 * If we do, we must record them and schedule a current-state
+	 * report for those sources.
+	 * FIXME: Handling source lists larger than 1 mbuf requires that
+	 * we pass the mbuf chain pointer down to this function, and use
+	 * m_getptr() to walk the chain.
+	 */
+	if (inm->inm_nsrc > 0) {
+		const struct in_addr	*ap;
+		int			 i, nrecorded;
+
+		ap = (const struct in_addr *)(igmpv3 + 1);
+		nrecorded = 0;
+		for (i = 0; i < nsrc; i++, ap++) {
+			retval = inm_record_source(inm, ap->s_addr);
+			if (retval < 0)
+				break;
+			nrecorded += retval;
+		}
+		if (nrecorded > 0) {
+			IGMP_PRINTF(("%s: schedule response to SG query\n",
+			    __func__));
+			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
+			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
+			current_state_timers_running = 1;
+		}
+	}
+
+	return (retval);
+}
+
+/*
+ * Process a received IGMPv1 host membership report.
+ *
+ * NOTE: 0.0.0.0 workaround breaks const correctness.
+ */
+static int
+igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
+    /*const*/ struct igmp *igmp)
+{
+	struct in_ifaddr *ia;
+	struct in_multi *inm;
+
+	IGMPSTAT_INC(igps_rcv_reports);
+	OIGMPSTAT_INC(igps_rcv_reports);
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		return (0);
+
+	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) ||
+	    !in_hosteq(igmp->igmp_group, ip->ip_dst))) {
+		IGMPSTAT_INC(igps_rcv_badreports);
+		OIGMPSTAT_INC(igps_rcv_badreports);
+		return (EINVAL);
+	}
+
+	/*
+	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
+	 * Booting clients may use the source address 0.0.0.0. Some
+	 * IGMP daemons may not know how to use IP_RECVIF to determine
+	 * the interface upon which this message was received.
+	 * Replace 0.0.0.0 with the subnet address if told to do so.
+	 */
+	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
+		IFP_TO_IA(ifp, ia);
+		if (ia != NULL) {
+			IFA_LOCK(&ia->ia_ifa);
+			ip->ip_src.s_addr = htonl(ia->ia_subnet);
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
+		}
+	}
+
+	IGMP_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
+	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit));
+
+	/*
+	 * IGMPv1 report suppression.
+	 * If we are a member of this group, and our membership should be
+	 * reported, stop our group timer and transition to the 'lazy' state.
+	 */
+	in_multihead_lock_shared();
+	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
+	in_multihead_lock_done();
+	if (inm != NULL) {
+		struct igmp_ifinfo *igi;
+
+		INM_LOCK(inm);
+
+		igi = inm->inm_igi;
+		VERIFY(igi != NULL);
+
+		IGMPSTAT_INC(igps_rcv_ourreports);
+		OIGMPSTAT_INC(igps_rcv_ourreports);
+
+		/*
+		 * If we are in IGMPv3 host mode, do not allow the
+		 * other host's IGMPv1 report to suppress our reports
+		 * unless explicitly configured to do so.
+		 */
+		IGI_LOCK(igi);
+		if (igi->igi_version == IGMP_VERSION_3) {
+			if (igmp_legacysupp)
+				igmp_v3_suppress_group_record(inm);
+			IGI_UNLOCK(igi);
+			INM_UNLOCK(inm);
+			INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
+			return (0);
+		}
+
+		INM_LOCK_ASSERT_HELD(inm);
+		inm->inm_timer = 0;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
+			    ifp->if_unit));
+		case IGMP_SLEEPING_MEMBER:
+			inm->inm_state = IGMP_SLEEPING_MEMBER;
+			break;
+		case IGMP_REPORTING_MEMBER:
+			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
+			    ifp->if_unit));
+			if (igi->igi_version == IGMP_VERSION_1)
+				inm->inm_state = IGMP_LAZY_MEMBER;
+			else if (igi->igi_version == IGMP_VERSION_2)
+				inm->inm_state = IGMP_SLEEPING_MEMBER;
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+		IGI_UNLOCK(igi);
+		INM_UNLOCK(inm);
+		INM_REMREF(inm); /* from IN_LOOKUP_MULTI */
+	}
+
+	return (0);
+}
+
+/*
+ * Process a received IGMPv2 host membership report.
+ *
+ * NOTE: 0.0.0.0 workaround breaks const correctness.
+ */
+static int
+igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
+    /*const*/ struct igmp *igmp)
+{
+	struct in_ifaddr *ia;
+	struct in_multi *inm;
+
+	/*
+	 * Make sure we don't hear our own membership report.  Fast
+	 * leave requires knowing that we are the only member of a
+	 * group.
+	 */
+	IFP_TO_IA(ifp, ia);
+	if (ia != NULL) {
+		IFA_LOCK(&ia->ia_ifa);
+		if (in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
+			return (0);
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
+
+	IGMPSTAT_INC(igps_rcv_reports);
+	OIGMPSTAT_INC(igps_rcv_reports);
+
+	if (ifp->if_flags & IFF_LOOPBACK) {
+		if (ia != NULL)
+			IFA_REMREF(&ia->ia_ifa);
+		return (0);
+	}
+
+	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
+	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
+		if (ia != NULL)
+			IFA_REMREF(&ia->ia_ifa);
+		IGMPSTAT_INC(igps_rcv_badreports);
+		OIGMPSTAT_INC(igps_rcv_badreports);
+		return (EINVAL);
+	}
+
+	/*
+	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
+	 * Booting clients may use the source address 0.0.0.0. Some
+	 * IGMP daemons may not know how to use IP_RECVIF to determine
+	 * the interface upon which this message was received.
+	 * Replace 0.0.0.0 with the subnet address if told to do so.
+	 */
+	if (igmp_recvifkludge && in_nullhost(ip->ip_src)) {
+		if (ia != NULL) {
+			IFA_LOCK(&ia->ia_ifa);
+			ip->ip_src.s_addr = htonl(ia->ia_subnet);
+			IFA_UNLOCK(&ia->ia_ifa);
+		}
+	}
+	if (ia != NULL)
+		IFA_REMREF(&ia->ia_ifa);
+
+	IGMP_PRINTF(("process v2 report %s on ifp %p(%s%d)\n",
+	     inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit));
+
+	/*
+	 * IGMPv2 report suppression.
+	 * If we are a member of this group, and our membership should be
+	 * reported, and our group timer is pending or about to be reset,
+	 * stop our group timer by transitioning to the 'lazy' state.
+	 */
+	in_multihead_lock_shared();
+	IN_LOOKUP_MULTI(&igmp->igmp_group, ifp, inm);
+	in_multihead_lock_done();
+	if (inm != NULL) {
+		struct igmp_ifinfo *igi;
+
+		INM_LOCK(inm);
+		igi = inm->inm_igi;
+		VERIFY(igi != NULL);
+
+		IGMPSTAT_INC(igps_rcv_ourreports);
+		OIGMPSTAT_INC(igps_rcv_ourreports);
+
+		/*
+		 * If we are in IGMPv3 host mode, do not allow the
+		 * other host's IGMPv1 report to suppress our reports
+		 * unless explicitly configured to do so.
+		 */
+		IGI_LOCK(igi);
+		if (igi->igi_version == IGMP_VERSION_3) {
+			if (igmp_legacysupp)
+				igmp_v3_suppress_group_record(inm);
+			IGI_UNLOCK(igi);
+			INM_UNLOCK(inm);
+			INM_REMREF(inm);
+			return (0);
+		}
+
+		inm->inm_timer = 0;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+			break;
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
+			    inet_ntoa(igmp->igmp_group), ifp, ifp->if_name,
+			    ifp->if_unit));
+		case IGMP_LAZY_MEMBER:
+			inm->inm_state = IGMP_LAZY_MEMBER;
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+		IGI_UNLOCK(igi);
+		INM_UNLOCK(inm);
+		INM_REMREF(inm);
+	}
+
+	return (0);
+}
+
+void
+igmp_input(struct mbuf *m, int off)
+{
+	int iphlen;
+	struct ifnet *ifp;
+	struct igmp *igmp;
+	struct ip *ip;
+	int igmplen;
+	int minlen;
+	int queryver;
+
+	IGMP_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
+
+	ifp = m->m_pkthdr.rcvif;
+
+	IGMPSTAT_INC(igps_rcv_total);
+	OIGMPSTAT_INC(igps_rcv_total);
+
+	ip = mtod(m, struct ip *);
+	iphlen = off;
+
+	/* By now, ip_len no longer contains the length of IP header */
+	igmplen = ip->ip_len;
+
+	/*
+	 * Validate lengths.
+	 */
+	if (igmplen < IGMP_MINLEN) {
+		IGMPSTAT_INC(igps_rcv_tooshort);
+		OIGMPSTAT_INC(igps_rcv_tooshort);
+		m_freem(m);
+		return;
+	}
+
+	/*
+	 * Always pullup to the minimum size for v1/v2 or v3
+	 * to amortize calls to m_pulldown().
+	 */
+	if (igmplen >= IGMP_V3_QUERY_MINLEN)
+		minlen = IGMP_V3_QUERY_MINLEN;
+	else
+		minlen = IGMP_MINLEN;
+
+	M_STRUCT_GET(igmp, struct igmp *, m, off, minlen);
+	if (igmp == NULL) {
+		IGMPSTAT_INC(igps_rcv_tooshort);
+		OIGMPSTAT_INC(igps_rcv_tooshort);
+		return;
+	}
+
+	/*
+	 * Validate checksum.
+	 */
+	m->m_data += iphlen;
+	m->m_len -= iphlen;
+	if (in_cksum(m, igmplen)) {
+		IGMPSTAT_INC(igps_rcv_badsum);
+		OIGMPSTAT_INC(igps_rcv_badsum);
+		m_freem(m);
+		return;
+	}
+	m->m_data -= iphlen;
+	m->m_len += iphlen;
+
+	/*
+	 * IGMP control traffic is link-scope, and must have a TTL of 1.
+	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
+	 * probe packets may come from beyond the LAN.
+	 */
+	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
+		IGMPSTAT_INC(igps_rcv_badttl);
+		m_freem(m);
+		return;
+	}
+
+	switch (igmp->igmp_type) {
+	case IGMP_HOST_MEMBERSHIP_QUERY:
+		if (igmplen == IGMP_MINLEN) {
+			if (igmp->igmp_code == 0)
+				queryver = IGMP_VERSION_1;
+			else
+				queryver = IGMP_VERSION_2;
+		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
+			queryver = IGMP_VERSION_3;
+		} else {
+			IGMPSTAT_INC(igps_rcv_tooshort);
+			OIGMPSTAT_INC(igps_rcv_tooshort);
+			m_freem(m);
+			return;
+		}
+
+		OIGMPSTAT_INC(igps_rcv_queries);
+
+		switch (queryver) {
+		case IGMP_VERSION_1:
+			IGMPSTAT_INC(igps_rcv_v1v2_queries);
+			if (!igmp_v1enable)
+				break;
+			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
+				m_freem(m);
+				return;
+			}
+			break;
+
+		case IGMP_VERSION_2:
+			IGMPSTAT_INC(igps_rcv_v1v2_queries);
+			if (!igmp_v2enable)
+				break;
+			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
+				m_freem(m);
+				return;
+			}
+			break;
+
+		case IGMP_VERSION_3: {
+				struct igmpv3 *igmpv3;
+				uint16_t igmpv3len;
+				uint16_t srclen;
+				int nsrc;
+
+				IGMPSTAT_INC(igps_rcv_v3_queries);
+				igmpv3 = (struct igmpv3 *)igmp;
+				/*
+				 * Validate length based on source count.
+				 */
+				nsrc = ntohs(igmpv3->igmp_numsrc);
+				srclen = sizeof(struct in_addr) * nsrc;
+				if (igmplen < (IGMP_V3_QUERY_MINLEN + srclen)) {
+					IGMPSTAT_INC(igps_rcv_tooshort);
+					OIGMPSTAT_INC(igps_rcv_tooshort);
+					m_freem(m);
+					return;
+				}
+				igmpv3len = IGMP_V3_QUERY_MINLEN + srclen;
+				M_STRUCT_GET(igmpv3, struct igmpv3 *, m,
+				    off, igmpv3len);
+				if (igmpv3 == NULL) {
+					IGMPSTAT_INC(igps_rcv_tooshort);
+					OIGMPSTAT_INC(igps_rcv_tooshort);
+					return;
+				}
+				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
+					m_freem(m);
+					return;
+				}
+			}
+			break;
+		}
+		break;
+
+	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
+		if (!igmp_v1enable)
+			break;
+		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
+			m_freem(m);
+			return;
+		}
+		break;
+
+	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
+		if (!igmp_v2enable)
+			break;
+#ifndef __APPLE__
+		if (!ip_checkrouteralert(m))
+			IGMPSTAT_INC(igps_rcv_nora);
+#endif
+		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
+			m_freem(m);
+			return;
+		}
+		break;
+
+	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
+		/*
+		 * Hosts do not need to process IGMPv3 membership reports,
+		 * as report suppression is no longer required.
+		 */
+#ifndef __APPLE__
+		if (!ip_checkrouteralert(m))
+			IGMPSTAT_INC(igps_rcv_nora);
+#endif
+		break;
+
+	default:
+		break;
+	}
+
+	lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED);
+	/*
+	 * Pass all valid IGMP packets up to any process(es) listening on a
+	 * raw IGMP socket.
+	 */
+	rip_input(m, off);
+}
+
+
+/*
+ * IGMP slowtimo handler.
+ * Combiles both the slow and fast timer into one. We loose some responsivness but
+ * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
+ *
+ */
+void
+igmp_slowtimo(void)
+{
+	struct ifqueue		 scq;	/* State-change packets */
+	struct ifqueue		 qrq;	/* Query response packets */
+	struct ifnet		*ifp;
+	struct igmp_ifinfo	*igi;
+	struct in_multi		*inm;
+	int			 loop = 0, uri_fasthz = 0;
+
+	lck_mtx_lock(&igmp_mtx);
+
+	LIST_FOREACH(igi, &igi_head, igi_link) {
+		IGI_LOCK(igi);
+		igmp_v1v2_process_querier_timers(igi);
+		IGI_UNLOCK(igi);
+	}
+
+	/*
+	 * NOTE: previously handled by fasttimo
+	 *
+	 * Quick check to see if any work needs to be done, in order to
+	 * minimize the overhead of fasttimo processing.
+	 */
+	if (!current_state_timers_running &&
+	    !interface_timers_running &&
+	    !state_change_timers_running) {
+		lck_mtx_unlock(&igmp_mtx);
+		return;
+	}
+
+	/*
+	 * IGMPv3 General Query response timer processing.
+	 */
+	if (interface_timers_running) {
+		interface_timers_running = 0;
+		LIST_FOREACH(igi, &igi_head, igi_link) {
+			IGI_LOCK(igi);
+			if (igi->igi_v3_timer == 0) {
+				/* Do nothing. */
+			} else if (--igi->igi_v3_timer == 0) {
+				igmp_v3_dispatch_general_query(igi);
+			} else {
+				interface_timers_running = 1;
+			}
+			IGI_UNLOCK(igi);
+		}
+	}
+
+	if (!current_state_timers_running &&
+	    !state_change_timers_running)
+		goto out_locked;
+
+	current_state_timers_running = 0;
+	state_change_timers_running = 0;
+
+	memset(&qrq, 0, sizeof(struct ifqueue));
+	qrq.ifq_maxlen = IGMP_MAX_G_GS_PACKETS;
+
+	memset(&scq, 0, sizeof(struct ifqueue));
+	scq.ifq_maxlen =  IGMP_MAX_STATE_CHANGE_PACKETS;
+
+	/*
+	 * IGMPv1/v2/v3 host report and state-change timer processing.
+	 * Note: Processing a v3 group timer may remove a node.
+	 */
+	LIST_FOREACH(igi, &igi_head, igi_link) {
+		struct in_multistep step;
+
+		IGI_LOCK(igi);
+		ifp = igi->igi_ifp;
+		loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
+		uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * PR_SLOWHZ);
+		IGI_UNLOCK(igi);
+
+		in_multihead_lock_shared();
+		IN_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			INM_LOCK(inm);
+			if (inm->inm_ifp != ifp)
+				goto next;
+
+			IGI_LOCK(igi);
+			switch (igi->igi_version) {
+				case IGMP_VERSION_1:
+				case IGMP_VERSION_2:
+					igmp_v1v2_process_group_timer(inm,
+					    igi->igi_version);
+					break;
+				case IGMP_VERSION_3:
+					igmp_v3_process_group_timers(igi, &qrq,
+					    &scq, inm, uri_fasthz);
+					break;
+			}
+			IGI_UNLOCK(igi);
+next:
+			INM_UNLOCK(inm);
+			IN_NEXT_MULTI(step, inm);
+		}
+		in_multihead_lock_done();
+
+		IGI_LOCK(igi);
+		if (igi->igi_version == IGMP_VERSION_1 ||
+		    igi->igi_version == IGMP_VERSION_2) {
+			igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop, ifp);
+		} else if (igi->igi_version == IGMP_VERSION_3) {
+			IGI_UNLOCK(igi);
+			igmp_dispatch_queue(NULL, &qrq, 0, loop, ifp);
+			igmp_dispatch_queue(NULL, &scq, 0, loop, ifp);
+			VERIFY(qrq.ifq_len == 0);
+			VERIFY(scq.ifq_len == 0);
+			IGI_LOCK(igi);
+		}
+		/*
+		 * In case there are still any pending membership reports
+		 * which didn't get drained at version change time.
+		 */
+		IF_DRAIN(&igi->igi_v2q);
+		/*
+		 * Release all deferred inm records, and drain any locally
+		 * enqueued packets; do it even if the current IGMP version
+		 * for the link is no longer IGMPv3, in order to handle the
+		 * version change case.
+		 */
+		igmp_flush_relq(igi);
+		VERIFY(SLIST_EMPTY(&igi->igi_relinmhead));
+		IGI_UNLOCK(igi);
+
+		IF_DRAIN(&qrq);
+		IF_DRAIN(&scq);
+	}
+
+out_locked:
+	lck_mtx_unlock(&igmp_mtx);
+}
+
+/*
+ * Free the in_multi reference(s) for this IGMP lifecycle.
+ *
+ * Caller must be holding igi_lock.
+ */
+static void
+igmp_flush_relq(struct igmp_ifinfo *igi)
+{
+	struct in_multi *inm;
+
+again:
+	IGI_LOCK_ASSERT_HELD(igi);
+	inm = SLIST_FIRST(&igi->igi_relinmhead);
+	if (inm != NULL) {
+		int lastref;
+
+		SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele);
+		IGI_UNLOCK(igi);
+
+		in_multihead_lock_exclusive();
+		INM_LOCK(inm);
+		VERIFY(inm->inm_nrelecnt != 0);
+		inm->inm_nrelecnt--;
+		lastref = in_multi_detach(inm);
+		VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
+		    inm->inm_reqcnt == 0));
+		INM_UNLOCK(inm);
+		in_multihead_lock_done();
+		/* from igi_relinmhead */
+		INM_REMREF(inm);
+		/* from in_multihead list */
+		if (lastref)
+			INM_REMREF(inm);
+
+		IGI_LOCK(igi);
+		goto again;
+	}
+}
+
+/*
+ * Update host report group timer for IGMPv1/v2.
+ * Will update the global pending timer flags.
+ */
+static void
+igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version)
+{
+	int report_timer_expired;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
+
+	if (inm->inm_timer == 0) {
+		report_timer_expired = 0;
+	} else if (--inm->inm_timer == 0) {
+		report_timer_expired = 1;
+	} else {
+		current_state_timers_running = 1;
+		return;
+	}
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		break;
+	case IGMP_REPORTING_MEMBER:
+		if (report_timer_expired) {
+			inm->inm_state = IGMP_IDLE_MEMBER;
+			(void) igmp_v1v2_queue_report(inm,
+			    (igmp_version == IGMP_VERSION_2) ?
+			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
+			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
+			INM_LOCK_ASSERT_HELD(inm);
+			IGI_LOCK_ASSERT_HELD(inm->inm_igi);
+		}
+		break;
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Update a group's timers for IGMPv3.
+ * Will update the global pending timer flags.
+ * Note: Unlocked read from igi.
+ */
+static void
+igmp_v3_process_group_timers(struct igmp_ifinfo *igi,
+    struct ifqueue *qrq, struct ifqueue *scq,
+    struct in_multi *inm, const int uri_fasthz)
+{
+	int query_response_timer_expired;
+	int state_change_retransmit_timer_expired;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_HELD(igi);
+	VERIFY(igi == inm->inm_igi);
+
+	query_response_timer_expired = 0;
+	state_change_retransmit_timer_expired = 0;
+
+	/*
+	 * During a transition from v1/v2 compatibility mode back to v3,
+	 * a group record in REPORTING state may still have its group
+	 * timer active. This is a no-op in this function; it is easier
+	 * to deal with it here than to complicate the slow-timeout path.
+	 */
+	if (inm->inm_timer == 0) {
+		query_response_timer_expired = 0;
+	} else if (--inm->inm_timer == 0) {
+		query_response_timer_expired = 1;
+	} else {
+		current_state_timers_running = 1;
+	}
+
+	if (inm->inm_sctimer == 0) {
+		state_change_retransmit_timer_expired = 0;
+	} else if (--inm->inm_sctimer == 0) {
+		state_change_retransmit_timer_expired = 1;
+	} else {
+		state_change_timers_running = 1;
+	}
+
+	/* We are in fasttimo, so be quick about it. */
+	if (!state_change_retransmit_timer_expired &&
+	    !query_response_timer_expired)
+		return;
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_LAZY_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+		break;
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		/*
+		 * Respond to a previously pending Group-Specific
+		 * or Group-and-Source-Specific query by enqueueing
+		 * the appropriate Current-State report for
+		 * immediate transmission.
+		 */
+		if (query_response_timer_expired) {
+			int retval;
+
+			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
+			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
+			IGMP_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			/* XXX Clear recorded sources for next time. */
+			inm_clear_recorded(inm);
+		}
+		/* FALLTHROUGH */
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		if (state_change_retransmit_timer_expired) {
+			/*
+			 * State-change retransmission timer fired.
+			 * If there are any further pending retransmissions,
+			 * set the global pending state-change flag, and
+			 * reset the timer.
+			 */
+			if (--inm->inm_scrv > 0) {
+				inm->inm_sctimer = uri_fasthz;
+				state_change_timers_running = 1;
+			}
+			/*
+			 * Retransmit the previously computed state-change
+			 * report. If there are no further pending
+			 * retransmissions, the mbuf queue will be consumed.
+			 * Update T0 state to T1 as we have now sent
+			 * a state-change.
+			 */
+			(void) igmp_v3_merge_state_changes(inm, scq);
+
+			inm_commit(inm);
+			IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+			    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
+			    inm->inm_ifp->if_unit));
+
+			/*
+			 * If we are leaving the group for good, make sure
+			 * we release IGMP's reference to it.
+			 * This release must be deferred using a SLIST,
+			 * as we are called from a loop which traverses
+			 * the in_multihead list.
+			 */
+			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
+			    inm->inm_scrv == 0) {
+				inm->inm_state = IGMP_NOT_MEMBER;
+				/*
+				 * A reference has already been held in
+				 * igmp_final_leave() for this inm, so
+				 * no need to hold another one.  We also
+				 * bumped up its request count then, so
+				 * that it stays in in_multihead.  Both
+				 * of them will be released when it is
+				 * dequeued later on.
+				 */
+				VERIFY(inm->inm_nrelecnt != 0);
+				SLIST_INSERT_HEAD(&igi->igi_relinmhead,
+				    inm, inm_nrele);
+			}
+		}
+		break;
+	}
+}
+
+/*
+ * Suppress a group's pending response to a group or source/group query.
+ *
+ * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
+ * Do NOT update ST1/ST0 as this operation merely suppresses
+ * the currently pending group record.
+ * Do NOT suppress the response to a general query. It is possible but
+ * it would require adding another state or flag.
+ */
+static void
+igmp_v3_suppress_group_record(struct in_multi *inm)
+{
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
+
+	VERIFY(inm->inm_igi->igi_version == IGMP_VERSION_3);
+
+	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
+	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
+		return;
+
+	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
+		inm_clear_recorded(inm);
+
+	inm->inm_timer = 0;
+	inm->inm_state = IGMP_REPORTING_MEMBER;
+}
+
+/*
+ * Switch to a different IGMP version on the given interface,
+ * as per Section 7.2.1.
+ */
+static void
+igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version)
+{
+	int old_version_timer;
+
+	IGI_LOCK_ASSERT_HELD(igi);
+
+	IGMP_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
+	    igmp_version, igi->igi_ifp, igi->igi_ifp->if_name,
+	    igi->igi_ifp->if_unit));
+
+	if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) {
+		/*
+		 * Compute the "Older Version Querier Present" timer as per
+		 * Section 8.12.
+		 */
+		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
+		old_version_timer *= PR_SLOWHZ;
+
+		if (igmp_version == IGMP_VERSION_1) {
+			igi->igi_v1_timer = old_version_timer;
+			igi->igi_v2_timer = 0;
+		} else if (igmp_version == IGMP_VERSION_2) {
+			igi->igi_v1_timer = 0;
+			igi->igi_v2_timer = old_version_timer;
+		}
+	}
+
+	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
+		if (igi->igi_version != IGMP_VERSION_2) {
+			igi->igi_version = IGMP_VERSION_2;
+			igmp_v3_cancel_link_timers(igi);
+		}
+	} else if (igi->igi_v1_timer > 0) {
+		if (igi->igi_version != IGMP_VERSION_1) {
+			igi->igi_version = IGMP_VERSION_1;
+			igmp_v3_cancel_link_timers(igi);
+		}
+	}
+
+	IGI_LOCK_ASSERT_HELD(igi);
+}
+
+/*
+ * Cancel pending IGMPv3 timers for the given link and all groups
+ * joined on it; state-change, general-query, and group-query timers.
+ *
+ * Only ever called on a transition from v3 to Compatibility mode. Kill
+ * the timers stone dead (this may be expensive for large N groups), they
+ * will be restarted if Compatibility Mode deems that they must be due to
+ * query processing.
+ */
+static void
+igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	struct in_multi		*inm;
+	struct in_multistep	step;
+
+	IGI_LOCK_ASSERT_HELD(igi);
+
+	IGMP_PRINTF(("%s: cancel v3 timers on ifp %p(%s%d)\n", __func__,
+	    igi->igi_ifp, igi->igi_ifp->if_name, igi->igi_ifp->if_unit));
+
+	/*
+	 * Stop the v3 General Query Response on this link stone dead.
+	 * If fasttimo is woken up due to interface_timers_running,
+	 * the flag will be cleared if there are no pending link timers.
+	 */
+	igi->igi_v3_timer = 0;
+
+	/*
+	 * Now clear the current-state and state-change report timers
+	 * for all memberships scoped to this link.
+	 */
+	ifp = igi->igi_ifp;
+	IGI_UNLOCK(igi);
+
+	in_multihead_lock_shared();
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		INM_LOCK(inm);
+		if (inm->inm_ifp != ifp)
+			goto next;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			/*
+			 * These states are either not relevant in v3 mode,
+			 * or are unreported. Do nothing.
+			 */
+			break;
+		case IGMP_LEAVING_MEMBER:
+			/*
+			 * If we are leaving the group and switching to
+			 * compatibility mode, we need to release the final
+			 * reference held for issuing the INCLUDE {}, and
+			 * transition to REPORTING to ensure the host leave
+			 * message is sent upstream to the old querier --
+			 * transition to NOT would lose the leave and race.
+			 * During igmp_final_leave(), we bumped up both the
+			 * request and reference counts.  Since we cannot
+			 * call in_multi_detach() here, defer this task to
+			 * the timer routine.
+			 */
+			VERIFY(inm->inm_nrelecnt != 0);
+			IGI_LOCK(igi);
+			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
+			IGI_UNLOCK(igi);
+			/* FALLTHROUGH */
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+			inm_clear_recorded(inm);
+			/* FALLTHROUGH */
+		case IGMP_REPORTING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			break;
+		}
+		/*
+		 * Always clear state-change and group report timers.
+		 * Free any pending IGMPv3 state-change records.
+		 */
+		inm->inm_sctimer = 0;
+		inm->inm_timer = 0;
+		IF_DRAIN(&inm->inm_scq);
+next:
+		INM_UNLOCK(inm);
+		IN_NEXT_MULTI(step, inm);
+	}
+	in_multihead_lock_done();
+
+	IGI_LOCK(igi);
+}
+
+/*
+ * Update the Older Version Querier Present timers for a link.
+ * See Section 7.2.1 of RFC 3376.
+ */
+static void
+igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi)
+{
+	IGI_LOCK_ASSERT_HELD(igi);
+
+	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
+		/*
+		 * IGMPv1 and IGMPv2 Querier Present timers expired.
+		 *
+		 * Revert to IGMPv3.
+		 */
+		if (igi->igi_version != IGMP_VERSION_3) {
+			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_name,
+			    igi->igi_ifp->if_unit));
+			igi->igi_version = IGMP_VERSION_3;
+			IF_DRAIN(&igi->igi_v2q);
+		}
+	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
+		/*
+		 * IGMPv1 Querier Present timer expired,
+		 * IGMPv2 Querier Present timer running.
+		 * If IGMPv2 was disabled since last timeout,
+		 * revert to IGMPv3.
+		 * If IGMPv2 is enabled, revert to IGMPv2.
+		 */
+		if (!igmp_v2enable) {
+			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_name,
+			    igi->igi_ifp->if_unit));
+			igi->igi_v2_timer = 0;
+			igi->igi_version = IGMP_VERSION_3;
+			IF_DRAIN(&igi->igi_v2q);
+		} else {
+			--igi->igi_v2_timer;
+			if (igi->igi_version != IGMP_VERSION_2) {
+				IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
+				    __func__, igi->igi_version, IGMP_VERSION_2,
+				    igi->igi_ifp, igi->igi_ifp->if_name,
+				    igi->igi_ifp->if_unit));
+				igi->igi_version = IGMP_VERSION_2;
+				IF_DRAIN(&igi->igi_gq);
+			}
+		}
+	} else if (igi->igi_v1_timer > 0) {
+		/*
+		 * IGMPv1 Querier Present timer running.
+		 * Stop IGMPv2 timer if running.
+		 *
+		 * If IGMPv1 was disabled since last timeout,
+		 * revert to IGMPv3.
+		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
+		 */
+		if (!igmp_v1enable) {
+			IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
+			    __func__, igi->igi_version, IGMP_VERSION_3,
+			    igi->igi_ifp, igi->igi_ifp->if_name,
+			    igi->igi_ifp->if_unit));
+			igi->igi_v1_timer = 0;
+			igi->igi_version = IGMP_VERSION_3;
+			IF_DRAIN(&igi->igi_v2q);
+		} else {
+			--igi->igi_v1_timer;
+		}
+		if (igi->igi_v2_timer > 0) {
+			IGMP_PRINTF(("%s: cancel v2 timer on %p(%s%d)\n",
+			    __func__, igi->igi_ifp, igi->igi_ifp->if_name,
+			    igi->igi_ifp->if_unit));
+			igi->igi_v2_timer = 0;
+		}
+	}
+}
+
+/*
+ * Dispatch an IGMPv1/v2 host report or leave message.
+ * These are always small enough to fit inside a single mbuf.
+ */
+static int
+igmp_v1v2_queue_report(struct in_multi *inm, const int type)
+{
+	struct ifnet		*ifp;
+	struct igmp		*igmp;
+	struct ip		*ip;
+	struct mbuf		*m;
+	int			error = 0;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
+
+	ifp = inm->inm_ifp;
+
+	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	if (m == NULL)
+		return (ENOMEM);
+	MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
+
+	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
+
+	m->m_data += sizeof(struct ip);
+	m->m_len = sizeof(struct igmp);
+
+	igmp = mtod(m, struct igmp *);
+	igmp->igmp_type = type;
+	igmp->igmp_code = 0;
+	igmp->igmp_group = inm->inm_addr;
+	igmp->igmp_cksum = 0;
+	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
+
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = 0;
+	ip->ip_len = sizeof(struct ip) + sizeof(struct igmp);
+	ip->ip_off = 0;
+	ip->ip_p = IPPROTO_IGMP;
+	ip->ip_src.s_addr = INADDR_ANY;
+
+	if (type == IGMP_HOST_LEAVE_MESSAGE)
+		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
+	else
+		ip->ip_dst = inm->inm_addr;
+
+	m->m_flags |= M_IGMPV2;
+	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
+		m->m_flags |= M_IGMP_LOOP;
+
+	/*
+	 * Due to the fact that at this point we are possibly holding
+	 * in_multihead_lock in shared or exclusive mode, we can't call
+	 * igmp_sendpkt() here since that will eventually call ip_output(),
+	 * which will try to lock in_multihead_lock and cause a deadlock.
+	 * Instead we defer the work to the igmp_slowtimo() thread, thus
+	 * avoiding unlocking in_multihead_lock here.
+	 */
+	if (IF_QFULL(&inm->inm_igi->igi_v2q)) {
+		IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__));
+		error = ENOMEM;
+		m_freem(m);
+	} else
+		IF_ENQUEUE(&inm->inm_igi->igi_v2q, m);
+
+	return (error);
+}
+
+/*
+ * Process a state change from the upper layer for the given IPv4 group.
+ *
+ * Each socket holds a reference on the in_multi in its own ip_moptions.
+ * The socket layer will have made the necessary updates to the group
+ * state, it is now up to IGMP to issue a state change report if there
+ * has been any change between T0 (when the last state-change was issued)
+ * and T1 (now).
+ *
+ * We use the IGMPv3 state machine at group level. The IGMP module
+ * however makes the decision as to which IGMP protocol version to speak.
+ * A state change *from* INCLUDE {} always means an initial join.
+ * A state change *to* INCLUDE {} always means a final leave.
+ *
+ * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
+ * save ourselves a bunch of work; any exclusive mode groups need not
+ * compute source filter lists.
+ */
+int
+igmp_change_state(struct in_multi *inm)
+{
+	struct igmp_ifinfo *igi;
+	struct ifnet *ifp;
+	int error = 0;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	VERIFY(inm->inm_igi != NULL);
+	IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi);
+
+	/*
+	 * Try to detect if the upper layer just asked us to change state
+	 * for an interface which has now gone away.
+	 */
+	VERIFY(inm->inm_ifma != NULL);
+	ifp = inm->inm_ifma->ifma_ifp;
+	/*
+	 * Sanity check that netinet's notion of ifp is the same as net's.
+	 */
+	VERIFY(inm->inm_ifp == ifp);
+
+	igi = IGMP_IFINFO(ifp);
+	VERIFY(igi != NULL);
+
+	/*
+	 * If we detect a state transition to or from MCAST_UNDEFINED
+	 * for this group, then we are starting or finishing an IGMP
+	 * life cycle for this group.
+	 */
+	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
+		IGMP_PRINTF(("%s: inm transition %d -> %d\n", __func__,
+		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode));
+		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
+			IGMP_PRINTF(("%s: initial join\n", __func__));
+			error = igmp_initial_join(inm, igi);
+			goto out;
+		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
+			IGMP_PRINTF(("%s: final leave\n", __func__));
+			igmp_final_leave(inm, igi);
+			goto out;
+		}
+	} else {
+		IGMP_PRINTF(("%s: filter set change\n", __func__));
+	}
+
+	error = igmp_handle_state_change(inm, igi);
+out:
+	return (error);
+}
+
+/*
+ * Perform the initial join for an IGMP group.
+ *
+ * When joining a group:
+ *  If the group should have its IGMP traffic suppressed, do nothing.
+ *  IGMPv1 starts sending IGMPv1 host membership reports.
+ *  IGMPv2 starts sending IGMPv2 host membership reports.
+ *  IGMPv3 will schedule an IGMPv3 state-change report containing the
+ *  initial state of the membership.
+ */
+static int
+igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	struct ifqueue		*ifq;
+	int			 error, retval, syncstates;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_NOTHELD(igi);
+
+	IGMP_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
+
+	error = 0;
+	syncstates = 1;
+
+	ifp = inm->inm_ifp;
+
+	IGI_LOCK(igi);
+	VERIFY(igi->igi_ifp == ifp);
+
+	/*
+	 * Groups joined on loopback or marked as 'not reported',
+	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
+	 * are never reported in any IGMP protocol exchanges.
+	 * All other groups enter the appropriate IGMP state machine
+	 * for the version in use on this link.
+	 * A link marked as IGIF_SILENT causes IGMP to be completely
+	 * disabled for the link.
+	 */
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (igi->igi_flags & IGIF_SILENT) ||
+	    !igmp_isgroupreported(inm->inm_addr)) {
+		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
+		    __func__));
+		inm->inm_state = IGMP_SILENT_MEMBER;
+		inm->inm_timer = 0;
+	} else {
+		/*
+		 * Deal with overlapping in_multi lifecycle.
+		 * If this group was LEAVING, then make sure
+		 * we drop the reference we picked up to keep the
+		 * group around for the final INCLUDE {} enqueue.
+		 * Since we cannot call in_multi_detach() here,
+		 * defer this task to the timer routine.
+		 */
+		if (igi->igi_version == IGMP_VERSION_3 &&
+		    inm->inm_state == IGMP_LEAVING_MEMBER) {
+			VERIFY(inm->inm_nrelecnt != 0);
+			SLIST_INSERT_HEAD(&igi->igi_relinmhead, inm, inm_nrele);
+		}
+
+		inm->inm_state = IGMP_REPORTING_MEMBER;
+
+		switch (igi->igi_version) {
+		case IGMP_VERSION_1:
+		case IGMP_VERSION_2:
+			inm->inm_state = IGMP_IDLE_MEMBER;
+			error = igmp_v1v2_queue_report(inm,
+			    (igi->igi_version == IGMP_VERSION_2) ?
+			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
+			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
+
+			INM_LOCK_ASSERT_HELD(inm);
+			IGI_LOCK_ASSERT_HELD(igi);
+
+			if (error == 0) {
+				inm->inm_timer = IGMP_RANDOM_DELAY(
+				    IGMP_V1V2_MAX_RI * PR_SLOWHZ);
+				current_state_timers_running = 1;
+			}
+			break;
+
+		case IGMP_VERSION_3:
+			/*
+			 * Defer update of T0 to T1, until the first copy
+			 * of the state change has been transmitted.
+			 */
+			syncstates = 0;
+
+			/*
+			 * Immediately enqueue a State-Change Report for
+			 * this interface, freeing any previous reports.
+			 * Don't kick the timers if there is nothing to do,
+			 * or if an error occurred.
+			 */
+			ifq = &inm->inm_scq;
+			IF_DRAIN(ifq);
+			retval = igmp_v3_enqueue_group_record(ifq, inm, 1,
+			    0, 0);
+			IGMP_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			if (retval <= 0) {
+				error = retval * -1;
+				break;
+			}
+
+			/*
+			 * Schedule transmission of pending state-change
+			 * report up to RV times for this link. The timer
+			 * will fire at the next igmp_fasttimo (~200ms),
+			 * giving us an opportunity to merge the reports.
+			 */
+			if (igi->igi_flags & IGIF_LOOPBACK) {
+				inm->inm_scrv = 1;
+			} else {
+				VERIFY(igi->igi_rv > 1);
+				inm->inm_scrv = igi->igi_rv;
+			}
+			inm->inm_sctimer = 1;
+			state_change_timers_running = 1;
+
+			error = 0;
+			break;
+		}
+	}
+	IGI_UNLOCK(igi);
+
+	/*
+	 * Only update the T0 state if state change is atomic,
+	 * i.e. we don't need to wait for a timer to fire before we
+	 * can consider the state change to have been communicated.
+	 */
+	if (syncstates) {
+		inm_commit(inm);
+		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
+		    inm->inm_ifp->if_unit));
+	}
+
+	return (error);
+}
+
+/*
+ * Issue an intermediate state change during the IGMP life-cycle.
+ */
+static int
+igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	int			 retval;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_NOTHELD(igi);
+
+	IGMP_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
+
+	ifp = inm->inm_ifp;
+
+	IGI_LOCK(igi);
+	VERIFY(igi->igi_ifp == ifp);
+
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (igi->igi_flags & IGIF_SILENT) ||
+	    !igmp_isgroupreported(inm->inm_addr) ||
+	    (igi->igi_version != IGMP_VERSION_3)) {
+		IGI_UNLOCK(igi);
+		if (!igmp_isgroupreported(inm->inm_addr)) {
+			IGMP_PRINTF(("%s: not kicking state "
+			    "machine for silent group\n", __func__));
+		}
+		IGMP_PRINTF(("%s: nothing to do\n", __func__));
+		inm_commit(inm);
+		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name));
+		return (0);
+	}
+
+	IF_DRAIN(&inm->inm_scq);
+
+	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
+	IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
+	if (retval <= 0) {
+		IGI_UNLOCK(igi);
+		return (-retval);
+	}
+	/*
+	 * If record(s) were enqueued, start the state-change
+	 * report timer for this group.
+	 */
+	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
+	inm->inm_sctimer = 1;
+	state_change_timers_running = 1;
+	IGI_UNLOCK(igi);
+
+	return (0);
+}
+
+/*
+ * Perform the final leave for an IGMP group.
+ *
+ * When leaving a group:
+ *  IGMPv1 does nothing.
+ *  IGMPv2 sends a host leave message, if and only if we are the reporter.
+ *  IGMPv3 enqueues a state-change report containing a transition
+ *  to INCLUDE {} for immediate transmission.
+ */
+static void
+igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi)
+{
+	int syncstates = 1;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_NOTHELD(igi);
+
+	IGMP_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
+	    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp,
+	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
+
+	switch (inm->inm_state) {
+	case IGMP_NOT_MEMBER:
+	case IGMP_SILENT_MEMBER:
+	case IGMP_LEAVING_MEMBER:
+		/* Already leaving or left; do nothing. */
+		IGMP_PRINTF(("%s: not kicking state machine for silent group\n",
+		    __func__));
+		break;
+	case IGMP_REPORTING_MEMBER:
+	case IGMP_IDLE_MEMBER:
+	case IGMP_G_QUERY_PENDING_MEMBER:
+	case IGMP_SG_QUERY_PENDING_MEMBER:
+		IGI_LOCK(igi);
+		if (igi->igi_version == IGMP_VERSION_2) {
+			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
+			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
+				panic("%s: IGMPv3 state reached, not IGMPv3 "
+				    "mode\n", __func__);
+				/* NOTREACHED */
+			}
+			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
+
+			INM_LOCK_ASSERT_HELD(inm);
+			IGI_LOCK_ASSERT_HELD(igi);
+
+			inm->inm_state = IGMP_NOT_MEMBER;
+		} else if (igi->igi_version == IGMP_VERSION_3) {
+			/*
+			 * Stop group timer and all pending reports.
+			 * Immediately enqueue a state-change report
+			 * TO_IN {} to be sent on the next fast timeout,
+			 * giving us an opportunity to merge reports.
+			 */
+			IF_DRAIN(&inm->inm_scq);
+			inm->inm_timer = 0;
+			if (igi->igi_flags & IGIF_LOOPBACK) {
+				inm->inm_scrv = 1;
+			} else {
+				inm->inm_scrv = igi->igi_rv;
+			}
+			IGMP_PRINTF(("%s: Leaving %s/%s%d with %d "
+			    "pending retransmissions.\n", __func__,
+			    inet_ntoa(inm->inm_addr),
+			    inm->inm_ifp->if_name, inm->inm_ifp->if_unit,
+			    inm->inm_scrv));
+			if (inm->inm_scrv == 0) {
+				inm->inm_state = IGMP_NOT_MEMBER;
+				inm->inm_sctimer = 0;
+			} else {
+				int retval;
+				/*
+				 * Stick around in the in_multihead list;
+				 * the final detach will be issued by
+				 * igmp_v3_process_group_timers() when
+				 * the retransmit timer expires.
+				 */
+				INM_ADDREF_LOCKED(inm);
+				VERIFY(inm->inm_debug & IFD_ATTACHED);
+				inm->inm_reqcnt++;
+				VERIFY(inm->inm_reqcnt >= 1);
+				inm->inm_nrelecnt++;
+				VERIFY(inm->inm_nrelecnt != 0);
+
+				retval = igmp_v3_enqueue_group_record(
+				    &inm->inm_scq, inm, 1, 0, 0);
+				KASSERT(retval != 0,
+				    ("%s: enqueue record = %d\n", __func__,
+				     retval));
+
+				inm->inm_state = IGMP_LEAVING_MEMBER;
+				inm->inm_sctimer = 1;
+				state_change_timers_running = 1;
+				syncstates = 0;
+			}
+		}
+		IGI_UNLOCK(igi);
+		break;
+	case IGMP_LAZY_MEMBER:
+	case IGMP_SLEEPING_MEMBER:
+	case IGMP_AWAKENING_MEMBER:
+		/* Our reports are suppressed; do nothing. */
+		break;
+	}
+
+	if (syncstates) {
+		inm_commit(inm);
+		IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+		    inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
+		    inm->inm_ifp->if_unit));
+		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+		IGMP_PRINTF(("%s: T1 now MCAST_UNDEFINED for %s/%s%d\n",
+		    __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name,
+		    inm->inm_ifp->if_unit));
+	}
+}
+
+/*
+ * Enqueue an IGMPv3 group record to the given output queue.
+ *
+ * XXX This function could do with having the allocation code
+ * split out, and the multiple-tree-walks coalesced into a single
+ * routine as has been done in igmp_v3_enqueue_filter_change().
+ *
+ * If is_state_change is zero, a current-state record is appended.
+ * If is_state_change is non-zero, a state-change report is appended.
+ *
+ * If is_group_query is non-zero, an mbuf packet chain is allocated.
+ * If is_group_query is zero, and if there is a packet with free space
+ * at the tail of the queue, it will be appended to providing there
+ * is enough free space.
+ * Otherwise a new mbuf packet chain is allocated.
+ *
+ * If is_source_query is non-zero, each source is checked to see if
+ * it was recorded for a Group-Source query, and will be omitted if
+ * it is not both in-mode and recorded.
+ *
+ * The function will attempt to allocate leading space in the packet
+ * for the IP/IGMP header to be prepended without fragmenting the chain.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm,
+    const int is_state_change, const int is_group_query,
+    const int is_source_query)
+{
+	struct igmp_grouprec	 ig;
+	struct igmp_grouprec	*pig;
+	struct ifnet		*ifp;
+	struct ip_msource	*ims, *nims;
+	struct mbuf		*m0, *m, *md;
+	int			 error, is_filter_list_change;
+	int			 minrec0len, m0srcs, msrcs, nbytes, off;
+	int			 record_has_sources;
+	int			 now;
+	int			 type;
+	in_addr_t		 naddr;
+	uint8_t			 mode;
+
+	INM_LOCK_ASSERT_HELD(inm);
+	IGI_LOCK_ASSERT_HELD(inm->inm_igi);
+
+	error = 0;
+	ifp = inm->inm_ifp;
+	is_filter_list_change = 0;
+	m = NULL;
+	m0 = NULL;
+	m0srcs = 0;
+	msrcs = 0;
+	nbytes = 0;
+	nims = NULL;
+	record_has_sources = 1;
+	pig = NULL;
+	type = IGMP_DO_NOTHING;
+	mode = inm->inm_st[1].iss_fmode;
+
+	/*
+	 * If we did not transition out of ASM mode during t0->t1,
+	 * and there are no source nodes to process, we can skip
+	 * the generation of source records.
+	 */
+	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
+	    inm->inm_nsrc == 0)
+		record_has_sources = 0;
+
+	if (is_state_change) {
+		/*
+		 * Queue a state change record.
+		 * If the mode did not change, and there are non-ASM
+		 * listeners or source filters present,
+		 * we potentially need to issue two records for the group.
+		 * If we are transitioning to MCAST_UNDEFINED, we need
+		 * not send any sources.
+		 * If there are ASM listeners, and there was no filter
+		 * mode transition of any kind, do nothing.
+		 */
+		if (mode != inm->inm_st[0].iss_fmode) {
+			if (mode == MCAST_EXCLUDE) {
+				IGMP_PRINTF(("%s: change to EXCLUDE\n",
+				    __func__));
+				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
+			} else {
+				IGMP_PRINTF(("%s: change to INCLUDE\n",
+				    __func__));
+				type = IGMP_CHANGE_TO_INCLUDE_MODE;
+				if (mode == MCAST_UNDEFINED)
+					record_has_sources = 0;
+			}
+		} else {
+			if (record_has_sources) {
+				is_filter_list_change = 1;
+			} else {
+				type = IGMP_DO_NOTHING;
+			}
+		}
+	} else {
+		/*
+		 * Queue a current state record.
+		 */
+		if (mode == MCAST_EXCLUDE) {
+			type = IGMP_MODE_IS_EXCLUDE;
+		} else if (mode == MCAST_INCLUDE) {
+			type = IGMP_MODE_IS_INCLUDE;
+			VERIFY(inm->inm_st[1].iss_asm == 0);
+		}
+	}
+
+	/*
+	 * Generate the filter list changes using a separate function.
+	 */
+	if (is_filter_list_change)
+		return (igmp_v3_enqueue_filter_change(ifq, inm));
+
+	if (type == IGMP_DO_NOTHING) {
+		IGMP_PRINTF(("%s: nothing to do for %s/%s%d\n",
+		    __func__, inet_ntoa(inm->inm_addr),
+		    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
+		return (0);
+	}
+
+	/*
+	 * If any sources are present, we must be able to fit at least
+	 * one in the trailing space of the tail packet's mbuf,
+	 * ideally more.
+	 */
+	minrec0len = sizeof(struct igmp_grouprec);
+	if (record_has_sources)
+		minrec0len += sizeof(in_addr_t);
+
+	IGMP_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
+	    igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr),
+	    inm->inm_ifp->if_name, inm->inm_ifp->if_unit));
+
+	/*
+	 * Check if we have a packet in the tail of the queue for this
+	 * group into which the first group record for this group will fit.
+	 * Otherwise allocate a new packet.
+	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
+	 * Note: Group records for G/GSR query responses MUST be sent
+	 * in their own packet.
+	 */
+	m0 = ifq->ifq_tail;
+	if (!is_group_query &&
+	    m0 != NULL &&
+	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
+	    (m0->m_pkthdr.len + minrec0len) <
+	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
+		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+		m = m0;
+		IGMP_PRINTF(("%s: use existing packet\n", __func__));
+	} else {
+		if (IF_QFULL(ifq)) {
+			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
+			return (-ENOMEM);
+		}
+		m = NULL;
+		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+		if (!is_state_change && !is_group_query) {
+			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+			if (m)
+				m->m_data += IGMP_LEADINGSPACE;
+		}
+		if (m == NULL) {
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			if (m)
+				MH_ALIGN(m, IGMP_LEADINGSPACE);
+		}
+		if (m == NULL)
+			return (-ENOMEM);
+
+		IGMP_PRINTF(("%s: allocated first packet\n", __func__));
+	}
+
+	/*
+	 * Append group record.
+	 * If we have sources, we don't know how many yet.
+	 */
+	ig.ig_type = type;
+	ig.ig_datalen = 0;
+	ig.ig_numsrc = 0;
+	ig.ig_group = inm->inm_addr;
+	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
+		if (m != m0)
+			m_freem(m);
+		IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
+		return (-ENOMEM);
+	}
+	nbytes += sizeof(struct igmp_grouprec);
+
+	/*
+	 * Append as many sources as will fit in the first packet.
+	 * If we are appending to a new packet, the chain allocation
+	 * may potentially use clusters; use m_getptr() in this case.
+	 * If we are appending to an existing packet, we need to obtain
+	 * a pointer to the group record after m_append(), in case a new
+	 * mbuf was allocated.
+	 * Only append sources which are in-mode at t1. If we are
+	 * transitioning to MCAST_UNDEFINED state on the group, do not
+	 * include source entries.
+	 * Only report recorded sources in our filter set when responding
+	 * to a group-source query.
+	 */
+	if (record_has_sources) {
+		if (m == m0) {
+			md = m_last(m);
+			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
+			    md->m_len - nbytes);
+		} else {
+			md = m_getptr(m, 0, &off);
+			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
+			    off);
+		}
+		msrcs = 0;
+		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
+			IGMP_PRINTF(("%s: visit node %s\n", __func__,
+			    inet_ntoa_haddr(ims->ims_haddr)));
+			now = ims_get_mode(inm, ims, 1);
+			IGMP_PRINTF(("%s: node is %d\n", __func__, now));
+			if ((now != mode) ||
+			    (now == mode && mode == MCAST_UNDEFINED)) {
+				IGMP_PRINTF(("%s: skip node\n", __func__));
+				continue;
+			}
+			if (is_source_query && ims->ims_stp == 0) {
+				IGMP_PRINTF(("%s: skip unrecorded node\n",
+				    __func__));
+				continue;
+			}
+			IGMP_PRINTF(("%s: append node\n", __func__));
+			naddr = htonl(ims->ims_haddr);
+			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
+				if (m != m0)
+					m_freem(m);
+				IGMP_PRINTF(("%s: m_append() failed.\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			nbytes += sizeof(in_addr_t);
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		IGMP_PRINTF(("%s: msrcs is %d this packet\n", __func__,
+		    msrcs));
+		pig->ig_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(in_addr_t));
+	}
+
+	if (is_source_query && msrcs == 0) {
+		IGMP_PRINTF(("%s: no recorded sources to report\n", __func__));
+		if (m != m0)
+			m_freem(m);
+		return (0);
+	}
+
+	/*
+	 * We are good to go with first packet.
+	 */
+	if (m != m0) {
+		IGMP_PRINTF(("%s: enqueueing first packet\n", __func__));
+		m->m_pkthdr.vt_nrecs = 1;
+		m->m_pkthdr.rcvif = ifp;
+		IF_ENQUEUE(ifq, m);
+	} else {
+		m->m_pkthdr.vt_nrecs++;
+	}
+	/*
+	 * No further work needed if no source list in packet(s).
+	 */
+	if (!record_has_sources)
+		return (nbytes);
+
+	/*
+	 * Whilst sources remain to be announced, we need to allocate
+	 * a new packet and fill out as many sources as will fit.
+	 * Always try for a cluster first.
+	 */
+	while (nims != NULL) {
+		if (IF_QFULL(ifq)) {
+			IGMP_PRINTF(("%s: outbound queue full\n", __func__));
+			return (-ENOMEM);
+		}
+		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		if (m)
+			m->m_data += IGMP_LEADINGSPACE;
+		if (m == NULL) {
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+			if (m)
+				MH_ALIGN(m, IGMP_LEADINGSPACE);
+		}
+		if (m == NULL)
+			return (-ENOMEM);
+		md = m_getptr(m, 0, &off);
+		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
+		IGMP_PRINTF(("%s: allocated next packet\n", __func__));
+
+		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
+			if (m != m0)
+				m_freem(m);
+			IGMP_PRINTF(("%s: m_append() failed.\n", __func__));
+			return (-ENOMEM);
+		}
+		m->m_pkthdr.vt_nrecs = 1;
+		nbytes += sizeof(struct igmp_grouprec);
+
+		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
+
+		msrcs = 0;
+		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
+			IGMP_PRINTF(("%s: visit node %s\n", __func__,
+			    inet_ntoa_haddr(ims->ims_haddr)));
+			now = ims_get_mode(inm, ims, 1);
+			if ((now != mode) ||
+			    (now == mode && mode == MCAST_UNDEFINED)) {
+				IGMP_PRINTF(("%s: skip node\n", __func__));
+				continue;
+			}
+			if (is_source_query && ims->ims_stp == 0) {
+				IGMP_PRINTF(("%s: skip unrecorded node\n",
+				    __func__));
+				continue;
+			}
+			IGMP_PRINTF(("%s: append node\n", __func__));
+			naddr = htonl(ims->ims_haddr);
+			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
+				if (m != m0)
+					m_freem(m);
+				IGMP_PRINTF(("%s: m_append() failed.\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		pig->ig_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(in_addr_t));
+
+		IGMP_PRINTF(("%s: enqueueing next packet\n", __func__));
+		m->m_pkthdr.rcvif = ifp;
+		IF_ENQUEUE(ifq, m);
+	}
+
+	return (nbytes);
+}
+
+/*
+ * Type used to mark record pass completion.
+ * We exploit the fact we can cast to this easily from the
+ * current filter modes on each ip_msource node.
+ */
+typedef enum {
+	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
+	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
+	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
+	REC_FULL = REC_ALLOW | REC_BLOCK
+} rectype_t;
+
+/*
+ * Enqueue an IGMPv3 filter list change to the given output queue.
+ *
+ * Source list filter state is held in an RB-tree. When the filter list
+ * for a group is changed without changing its mode, we need to compute
+ * the deltas between T0 and T1 for each source in the filter set,
+ * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
+ *
+ * As we may potentially queue two record types, and the entire R-B tree
+ * needs to be walked at once, we break this out into its own function
+ * so we can generate a tightly packed queue of packets.
+ *
+ * XXX This could be written to only use one tree walk, although that makes
+ * serializing into the mbuf chains a bit harder. For now we do two walks
+ * which makes things easier on us, and it may or may not be harder on
+ * the L2 cache.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm)
+{
+	static const int MINRECLEN =
+	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
+	struct ifnet		*ifp;
+	struct igmp_grouprec	 ig;
+	struct igmp_grouprec	*pig;
+	struct ip_msource	*ims, *nims;
+	struct mbuf		*m, *m0, *md;
+	in_addr_t		 naddr;
+	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
+	int			 nallow, nblock;
+	uint8_t			 mode, now, then;
+	rectype_t		 crt, drt, nrt;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	if (inm->inm_nsrc == 0 ||
+	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
+		return (0);
+
+	ifp = inm->inm_ifp;			/* interface */
+	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
+	crt = REC_NONE;	/* current group record type */
+	drt = REC_NONE;	/* mask of completed group record types */
+	nrt = REC_NONE;	/* record type for current node */
+	m0srcs = 0;	/* # source which will fit in current mbuf chain */
+	nbytes = 0;	/* # of bytes appended to group's state-change queue */
+	npbytes = 0;	/* # of bytes appended this packet */
+	rsrcs = 0;	/* # sources encoded in current record */
+	schanged = 0;	/* # nodes encoded in overall filter change */
+	nallow = 0;	/* # of source entries in ALLOW_NEW */
+	nblock = 0;	/* # of source entries in BLOCK_OLD */
+	nims = NULL;	/* next tree node pointer */
+
+	/*
+	 * For each possible filter record mode.
+	 * The first kind of source we encounter tells us which
+	 * is the first kind of record we start appending.
+	 * If a node transitioned to UNDEFINED at t1, its mode is treated
+	 * as the inverse of the group's filter mode.
+	 */
+	while (drt != REC_FULL) {
+		do {
+			m0 = ifq->ifq_tail;
+			if (m0 != NULL &&
+			    (m0->m_pkthdr.vt_nrecs + 1 <=
+			     IGMP_V3_REPORT_MAXRECS) &&
+			    (m0->m_pkthdr.len + MINRECLEN) <
+			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
+				m = m0;
+				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+					    sizeof(struct igmp_grouprec)) /
+				    sizeof(in_addr_t);
+				IGMP_PRINTF(("%s: use previous packet\n",
+				    __func__));
+			} else {
+				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+				if (m)
+					m->m_data += IGMP_LEADINGSPACE;
+				if (m == NULL) {
+					m = m_gethdr(M_DONTWAIT, MT_DATA);
+					if (m)
+						MH_ALIGN(m, IGMP_LEADINGSPACE);
+				}
+				if (m == NULL) {
+					IGMP_PRINTF(("%s: m_get*() failed\n",
+					    __func__));
+					return (-ENOMEM);
+				}
+				m->m_pkthdr.vt_nrecs = 0;
+				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
+				    sizeof(struct igmp_grouprec)) /
+				    sizeof(in_addr_t);
+				npbytes = 0;
+				IGMP_PRINTF(("%s: allocated new packet\n",
+				    __func__));
+			}
+			/*
+			 * Append the IGMP group record header to the
+			 * current packet's data area.
+			 * Recalculate pointer to free space for next
+			 * group record, in case m_append() allocated
+			 * a new mbuf or cluster.
+			 */
+			memset(&ig, 0, sizeof(ig));
+			ig.ig_group = inm->inm_addr;
+			if (!m_append(m, sizeof(ig), (void *)&ig)) {
+				if (m != m0)
+					m_freem(m);
+				IGMP_PRINTF(("%s: m_append() failed\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			npbytes += sizeof(struct igmp_grouprec);
+			if (m != m0) {
+				/* new packet; offset in c hain */
+				md = m_getptr(m, npbytes -
+				    sizeof(struct igmp_grouprec), &off);
+				pig = (struct igmp_grouprec *)(mtod(md,
+				    uint8_t *) + off);
+			} else {
+				/* current packet; offset from last append */
+				md = m_last(m);
+				pig = (struct igmp_grouprec *)(mtod(md,
+				    uint8_t *) + md->m_len -
+				    sizeof(struct igmp_grouprec));
+			}
+			/*
+			 * Begin walking the tree for this record type
+			 * pass, or continue from where we left off
+			 * previously if we had to allocate a new packet.
+			 * Only report deltas in-mode at t1.
+			 * We need not report included sources as allowed
+			 * if we are in inclusive mode on the group,
+			 * however the converse is not true.
+			 */
+			rsrcs = 0;
+			if (nims == NULL)
+				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
+			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
+				IGMP_PRINTF(("%s: visit node %s\n",
+				    __func__, inet_ntoa_haddr(ims->ims_haddr)));
+				now = ims_get_mode(inm, ims, 1);
+				then = ims_get_mode(inm, ims, 0);
+				IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n",
+				    __func__, then, now));
+				if (now == then) {
+					IGMP_PRINTF(("%s: skip unchanged\n",
+					    __func__));
+					continue;
+				}
+				if (mode == MCAST_EXCLUDE &&
+				    now == MCAST_INCLUDE) {
+					IGMP_PRINTF(("%s: skip IN src on EX "
+					    "group\n", __func__));
+					continue;
+				}
+				nrt = (rectype_t)now;
+				if (nrt == REC_NONE)
+					nrt = (rectype_t)(~mode & REC_FULL);
+				if (schanged++ == 0) {
+					crt = nrt;
+				} else if (crt != nrt)
+					continue;
+				naddr = htonl(ims->ims_haddr);
+				if (!m_append(m, sizeof(in_addr_t),
+				    (void *)&naddr)) {
+					if (m != m0)
+						m_freem(m);
+					IGMP_PRINTF(("%s: m_append() failed\n",
+					    __func__));
+					return (-ENOMEM);
+				}
+				nallow += !!(crt == REC_ALLOW);
+				nblock += !!(crt == REC_BLOCK);
+				if (++rsrcs == m0srcs)
+					break;
+			}
+			/*
+			 * If we did not append any tree nodes on this
+			 * pass, back out of allocations.
+			 */
+			if (rsrcs == 0) {
+				npbytes -= sizeof(struct igmp_grouprec);
+				if (m != m0) {
+					IGMP_PRINTF(("%s: m_free(m)\n",
+					    __func__));
+					m_freem(m);
+				} else {
+					IGMP_PRINTF(("%s: m_adj(m, -ig)\n",
+					    __func__));
+					m_adj(m, -((int)sizeof(
+					    struct igmp_grouprec)));
+				}
+				continue;
+			}
+			npbytes += (rsrcs * sizeof(in_addr_t));
+			if (crt == REC_ALLOW)
+				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
+			else if (crt == REC_BLOCK)
+				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
+			pig->ig_numsrc = htons(rsrcs);
+			/*
+			 * Count the new group record, and enqueue this
+			 * packet if it wasn't already queued.
+			 */
+			m->m_pkthdr.vt_nrecs++;
+			m->m_pkthdr.rcvif = ifp;
+			if (m != m0)
+				IF_ENQUEUE(ifq, m);
+			nbytes += npbytes;
+		} while (nims != NULL);
+		drt |= crt;
+		crt = (~crt & REC_FULL);
+	}
+
+	IGMP_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
+	    nallow, nblock));
+
+	return (nbytes);
+}
+
+static int
+igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq)
+{
+	struct ifqueue	*gq;
+	struct mbuf	*m;		/* pending state-change */
+	struct mbuf	*m0;		/* copy of pending state-change */
+	struct mbuf	*mt;		/* last state-change in packet */
+	struct mbuf	*n;
+	int		 docopy, domerge;
+	u_int		 recslen;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	docopy = 0;
+	domerge = 0;
+	recslen = 0;
+
+	/*
+	 * If there are further pending retransmissions, make a writable
+	 * copy of each queued state-change message before merging.
+	 */
+	if (inm->inm_scrv > 0)
+		docopy = 1;
+
+	gq = &inm->inm_scq;
+#ifdef IGMP_DEBUG
+	if (gq->ifq_head == NULL) {
+		IGMP_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
+		    __func__, inm));
+	}
+#endif
+
+	/*
+	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
+	 * packet might not always be at the head of the ifqueue.
+	 */
+	m = gq->ifq_head;
+	while (m != NULL) {
+		/*
+		 * Only merge the report into the current packet if
+		 * there is sufficient space to do so; an IGMPv3 report
+		 * packet may only contain 65,535 group records.
+		 * Always use a simple mbuf chain concatentation to do this,
+		 * as large state changes for single groups may have
+		 * allocated clusters.
+		 */
+		domerge = 0;
+		mt = ifscq->ifq_tail;
+		if (mt != NULL) {
+			recslen = m_length(m);
+
+			if ((mt->m_pkthdr.vt_nrecs +
+			    m->m_pkthdr.vt_nrecs <=
+			    IGMP_V3_REPORT_MAXRECS) &&
+			    (mt->m_pkthdr.len + recslen <=
+			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
+				domerge = 1;
+		}
+
+		if (!domerge && IF_QFULL(gq)) {
+			IGMP_PRINTF(("%s: outbound queue full, skipping whole "
+			    "packet %p\n", __func__, m));
+			n = m->m_nextpkt;
+			if (!docopy) {
+				IF_REMQUEUE(gq, m);
+				m_freem(m);
+			}
+			m = n;
+			continue;
+		}
+
+		if (!docopy) {
+			IGMP_PRINTF(("%s: dequeueing %p\n", __func__, m));
+			n = m->m_nextpkt;
+			IF_REMQUEUE(gq, m);
+			m0 = m;
+			m = n;
+		} else {
+			IGMP_PRINTF(("%s: copying %p\n", __func__, m));
+			m0 = m_dup(m, M_NOWAIT);
+			if (m0 == NULL)
+				return (ENOMEM);
+			m0->m_nextpkt = NULL;
+			m = m->m_nextpkt;
+		}
+
+		if (!domerge) {
+			IGMP_PRINTF(("%s: queueing %p to ifscq %p)\n",
+			    __func__, m0, ifscq));
+			m0->m_pkthdr.rcvif = inm->inm_ifp;
+			IF_ENQUEUE(ifscq, m0);
+		} else {
+			struct mbuf *mtl;	/* last mbuf of packet mt */
+
+			IGMP_PRINTF(("%s: merging %p with ifscq tail %p)\n",
+			    __func__, m0, mt));
+
+			mtl = m_last(mt);
+			m0->m_flags &= ~M_PKTHDR;
+			mt->m_pkthdr.len += recslen;
+			mt->m_pkthdr.vt_nrecs +=
+			    m0->m_pkthdr.vt_nrecs;
+
+			mtl->m_next = m0;
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Respond to a pending IGMPv3 General Query.
+ */
+static void
+igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi)
+{
+	struct ifnet		*ifp;
+	struct in_multi		*inm;
+	struct in_multistep	step;
+	int			 retval, loop;
+
+	IGI_LOCK_ASSERT_HELD(igi);
+
+	VERIFY(igi->igi_version == IGMP_VERSION_3);
+
+	ifp = igi->igi_ifp;
+	IGI_UNLOCK(igi);
+
+	in_multihead_lock_shared();
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		INM_LOCK(inm);
+		if (inm->inm_ifp != ifp)
+			goto next;
+
+		switch (inm->inm_state) {
+		case IGMP_NOT_MEMBER:
+		case IGMP_SILENT_MEMBER:
+			break;
+		case IGMP_REPORTING_MEMBER:
+		case IGMP_IDLE_MEMBER:
+		case IGMP_LAZY_MEMBER:
+		case IGMP_SLEEPING_MEMBER:
+		case IGMP_AWAKENING_MEMBER:
+			inm->inm_state = IGMP_REPORTING_MEMBER;
+			IGI_LOCK(igi);
+			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
+			    inm, 0, 0, 0);
+			IGI_UNLOCK(igi);
+			IGMP_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			break;
+		case IGMP_G_QUERY_PENDING_MEMBER:
+		case IGMP_SG_QUERY_PENDING_MEMBER:
+		case IGMP_LEAVING_MEMBER:
+			break;
+		}
+next:
+		INM_UNLOCK(inm);
+		IN_NEXT_MULTI(step, inm);
+	}
+	in_multihead_lock_done();
+
+	IGI_LOCK(igi);
+	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
+	igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST,
+	    loop, ifp);
+	IGI_LOCK_ASSERT_HELD(igi);
+	/*
+	 * Slew transmission of bursts over 500ms intervals.
+	 */
+	if (igi->igi_gq.ifq_head != NULL) {
+		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
+		    IGMP_RESPONSE_BURST_INTERVAL);
+		interface_timers_running = 1;
+	}
+}
+
+/*
+ * Transmit the next pending IGMP message in the output queue.
+ *
+ * Must not be called with inm_lock or igi_lock held.
+ */
+void
+igmp_sendpkt(struct mbuf *m, struct ifnet *ifp)
+{
+	struct ip_moptions	*imo;
+	struct mbuf		*ipopts, *m0;
+	int			 error;
+	struct route		ro;
+
+	IGMP_PRINTF(("%s: transmit %p\n", __func__, m));
+
+	/*
+	 * Check if the ifnet is still attached.
+	 */
+	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
+		IGMP_PRINTF(("%s: dropped %p as ifp u went away.\n",
+		    __func__, m));
+		m_freem(m);
+		OSAddAtomic(1, &ipstat.ips_noroute);
+		return;
+	}
+
+	ipopts = igmp_sendra ? m_raopt : NULL;
+
+	imo = ip_allocmoptions(M_WAITOK);
+	if (imo == NULL) {
+		m_freem(m);
+		return;
+	}
+
+	imo->imo_multicast_ttl  = 1;
+	imo->imo_multicast_vif  = -1;
+#if MROUTING
+	imo->imo_multicast_loop = (ip_mrouter != NULL);
+#else
+	imo->imo_multicast_loop = 0;
+#endif
+
+	/*
+	 * If the user requested that IGMP traffic be explicitly
+	 * redirected to the loopback interface (e.g. they are running a
+	 * MANET interface and the routing protocol needs to see the
+	 * updates), handle this now.
+	 */
+	if (m->m_flags & M_IGMP_LOOP)
+		imo->imo_multicast_ifp = lo_ifp;
+	else
+		imo->imo_multicast_ifp = ifp;
+
+	if (m->m_flags & M_IGMPV2) {
+		m0 = m;
+	} else {
+		m0 = igmp_v3_encap_report(ifp, m);
+		if (m0 == NULL) {
+			/*
+			 * If igmp_v3_encap_report() failed, then M_PREPEND()
+			 * already freed the original mbuf chain.
+			 * This means that we don't have to m_freem(m) here.
+			 */
+			IGMP_PRINTF(("%s: dropped %p\n", __func__, m));
+			IMO_REMREF(imo);
+			atomic_add_32(&ipstat.ips_odropped, 1);
+			return;
+		}
+	}
+
+	m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP);
+	m0->m_pkthdr.rcvif = lo_ifp;
+#ifdef MAC
+	mac_netinet_igmp_send(ifp, m0);
+#endif
+	bzero(&ro, sizeof (ro));
+	error = ip_output(m0, ipopts, &ro, 0, imo, NULL);
+	if (ro.ro_rt != NULL) {
+		rtfree(ro.ro_rt);
+		ro.ro_rt = NULL;
+	}
+
+	IMO_REMREF(imo);
+
+	if (error) {
+		IGMP_PRINTF(("%s: ip_output(%p) = %d\n", __func__, m0, error));
+		return;
+	}
+
+	IGMPSTAT_INC(igps_snd_reports);
+	OIGMPSTAT_INC(igps_snd_reports);
+}
+/*
+ * Encapsulate an IGMPv3 report.
+ *
+ * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
+ * chain has already had its IP/IGMPv3 header prepended. In this case
+ * the function will not attempt to prepend; the lengths and checksums
+ * will however be re-computed.
+ *
+ * Returns a pointer to the new mbuf chain head, or NULL if the
+ * allocation failed.
+ */
+static struct mbuf *
+igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
+{
+	struct igmp_report	*igmp;
+	struct ip		*ip;
+	int			 hdrlen, igmpreclen;
+
+	VERIFY((m->m_flags & M_PKTHDR));
+
+	igmpreclen = m_length(m);
+	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
+
+	if (m->m_flags & M_IGMPV3_HDR) {
+		igmpreclen -= hdrlen;
+	} else {
+		M_PREPEND(m, hdrlen, M_DONTWAIT);
+		if (m == NULL)
+			return (NULL);
+		m->m_flags |= M_IGMPV3_HDR;
+	}
+
+	IGMP_PRINTF(("%s: igmpreclen is %d\n", __func__, igmpreclen));
+
+	m->m_data += sizeof(struct ip);
+	m->m_len -= sizeof(struct ip);
+
+	igmp = mtod(m, struct igmp_report *);
+	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
+	igmp->ir_rsv1 = 0;
+	igmp->ir_rsv2 = 0;
+	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
+	igmp->ir_cksum = 0;
+	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
+	m->m_pkthdr.vt_nrecs = 0;
+
+	m->m_data -= sizeof(struct ip);
+	m->m_len += sizeof(struct ip);
+
+	ip = mtod(m, struct ip *);
+	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
+	ip->ip_len = hdrlen + igmpreclen;
+	ip->ip_off = IP_DF;
+	ip->ip_p = IPPROTO_IGMP;
+	ip->ip_sum = 0;
+
+	ip->ip_src.s_addr = INADDR_ANY;
+
+	if (m->m_flags & M_IGMP_LOOP) {
+		struct in_ifaddr *ia;
+
+		IFP_TO_IA(ifp, ia);
+		if (ia != NULL) {
+			IFA_LOCK(&ia->ia_ifa);
+			ip->ip_src = ia->ia_addr.sin_addr;
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
+		}
+	}
+
+	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
+
+	return (m);
+}
+
+#ifdef IGMP_DEBUG
+static const char *
+igmp_rec_type_to_str(const int type)
+{
+	switch (type) {
+		case IGMP_CHANGE_TO_EXCLUDE_MODE:
+			return "TO_EX";
+			break;
+		case IGMP_CHANGE_TO_INCLUDE_MODE:
+			return "TO_IN";
+			break;
+		case IGMP_MODE_IS_EXCLUDE:
+			return "MODE_EX";
+			break;
+		case IGMP_MODE_IS_INCLUDE:
+			return "MODE_IN";
+			break;
+		case IGMP_ALLOW_NEW_SOURCES:
+			return "ALLOW_NEW";
+			break;
+		case IGMP_BLOCK_OLD_SOURCES:
+			return "BLOCK_OLD";
+			break;
+		default:
+			break;
+	}
+	return "unknown";
+}
+#endif
+
+void
+igmp_init(void)
+{
+
+	IGMP_PRINTF(("%s: initializing\n", __func__));
+
+	igmp_timers_are_running = 0;
+
+	/* Setup lock group and attribute for igmp_mtx */
+	igmp_mtx_grp_attr = lck_grp_attr_alloc_init();
+	igmp_mtx_grp = lck_grp_alloc_init("igmp_mtx", igmp_mtx_grp_attr);
+	igmp_mtx_attr = lck_attr_alloc_init();
+	lck_mtx_init(&igmp_mtx, igmp_mtx_grp, igmp_mtx_attr);
+
+	LIST_INIT(&igi_head);
+	m_raopt = igmp_ra_alloc();
+
+	igi_size = sizeof (struct igmp_ifinfo);
+	igi_zone = zinit(igi_size, IGI_ZONE_MAX * igi_size,
+	    0, IGI_ZONE_NAME);
+	if (igi_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IGI_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(igi_zone, Z_EXPAND, TRUE);
+	zone_change(igi_zone, Z_CALLERACCT, FALSE);
+}
diff --git a/bsd/netinet/igmp.h b/bsd/netinet/igmp.h
index 3774cd860..28352317d 100644
--- a/bsd/netinet/igmp.h
+++ b/bsd/netinet/igmp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -77,8 +77,11 @@
  * MULTICAST Revision: 3.5.1.2
  */
 
+/* Minimum length of any IGMP protocol message. */
+#define IGMP_MINLEN			8
+
 /*
- * IGMP packet format.
+ * IGMPv1/v2 query and host report format.
  */
 struct igmp {
 	u_char		igmp_type;	/* version & type of IGMP message  */
@@ -87,38 +90,91 @@ struct igmp {
 	struct in_addr	igmp_group;	/* group address being reported    */
 };					/*  (zero for queries)             */
 
-#define IGMP_MINLEN		     8
+/*
+ * IGMP v3 query format.
+ */
+struct igmpv3 {
+	u_char		igmp_type;	/* version & type of IGMP message  */
+	u_char		igmp_code;	/* subtype for routing msgs        */
+	u_short		igmp_cksum;	/* IP-style checksum               */
+	struct in_addr	igmp_group;	/* group address being reported    */
+					/*  (zero for queries)             */
+	u_char		igmp_misc;	/* reserved/suppress/robustness    */
+	u_char		igmp_qqi;	/* querier's query interval        */
+	u_short		igmp_numsrc;	/* number of sources               */
+	/*struct in_addr	igmp_sources[1];*/ /* source addresses */
+};
+#define IGMP_V3_QUERY_MINLEN		12
+#define IGMP_EXP(x)			(((x) >> 4) & 0x07)
+#define IGMP_MANT(x)			((x) & 0x0f)
+#define IGMP_QRESV(x)			(((x) >> 4) & 0x0f)
+#define IGMP_SFLAG(x)			(((x) >> 3) & 0x01)
+#define IGMP_QRV(x)			((x) & 0x07)
+
+struct igmp_grouprec {
+	u_char		ig_type;	/* record type */
+	u_char		ig_datalen;	/* length of auxiliary data */
+	u_short		ig_numsrc;	/* number of sources */
+	struct in_addr	ig_group;	/* group address being reported */
+	/*struct in_addr	ig_sources[1];*/ /* source addresses */
+};
+#define IGMP_GRPREC_HDRLEN		8
 
 /*
- * Message types, including version number.
+ * IGMPv3 host membership report header.
  */
-#define IGMP_MEMBERSHIP_QUERY   	0x11	/* membership query         */
-#define IGMP_V1_MEMBERSHIP_REPORT	0x12	/* Ver. 1 membership report */
-#define IGMP_V2_MEMBERSHIP_REPORT	0x16	/* Ver. 2 membership report */
-#define IGMP_V2_LEAVE_GROUP		0x17	/* Leave-group message	    */
+struct igmp_report {
+	u_char		ir_type;	/* IGMP_v3_HOST_MEMBERSHIP_REPORT */
+	u_char		ir_rsv1;	/* must be zero */
+	u_short		ir_cksum;	/* checksum */
+	u_short		ir_rsv2;	/* must be zero */
+	u_short		ir_numgrps;	/* number of group records */
+	/*struct	igmp_grouprec ir_groups[1];*/	/* group records */
+};
+#define IGMP_V3_REPORT_MINLEN		8
+#define IGMP_V3_REPORT_MAXRECS		65535
 
+/*
+ * Message types, including version number.
+ */
+#define IGMP_HOST_MEMBERSHIP_QUERY	0x11	/* membership query         */
+#define IGMP_v1_HOST_MEMBERSHIP_REPORT	0x12	/* Ver. 1 membership report */
 #define IGMP_DVMRP			0x13	/* DVMRP routing message    */
-#define IGMP_PIM			0x14	/* PIM routing message	    */
-
-#define IGMP_MTRACE_RESP		0x1e  /* traceroute resp.(to sender)*/
-#define IGMP_MTRACE			0x1f  /* mcast traceroute messages  */
+#define IGMP_PIM			0x14	/* PIMv1 message (historic) */
+#define IGMP_v2_HOST_MEMBERSHIP_REPORT	0x16	/* Ver. 2 membership report */
+#define IGMP_HOST_LEAVE_MESSAGE		0x17	/* Leave-group message     */
+#define IGMP_MTRACE_REPLY		0x1e	/* mtrace(8) reply */
+#define IGMP_MTRACE_QUERY		0x1f	/* mtrace(8) probe */
+#define IGMP_v3_HOST_MEMBERSHIP_REPORT	0x22	/* Ver. 3 membership report */
 
-#define IGMP_MAX_HOST_REPORT_DELAY   10    /* max delay for response to     */
-					   /*  query (in seconds) according */
-					   /*  to RFC1112                   */
+/*
+ * IGMPv3 report modes.
+ */
+#define IGMP_DO_NOTHING			0	/* don't send a record */
+#define IGMP_MODE_IS_INCLUDE		1	/* MODE_IN */
+#define IGMP_MODE_IS_EXCLUDE		2	/* MODE_EX */
+#define IGMP_CHANGE_TO_INCLUDE_MODE	3	/* TO_IN */
+#define IGMP_CHANGE_TO_EXCLUDE_MODE	4	/* TO_EX */
+#define IGMP_ALLOW_NEW_SOURCES		5	/* ALLOW_NEW */
+#define IGMP_BLOCK_OLD_SOURCES		6	/* BLOCK_OLD */
 
+/*
+ * IGMPv3 query types.
+ */
+#define IGMP_V3_GENERAL_QUERY		1
+#define IGMP_V3_GROUP_QUERY		2
+#define IGMP_V3_GROUP_SOURCE_QUERY	3
 
-#define IGMP_TIMER_SCALE     10		/* denotes that the igmp code field */
-					/* specifies time in 10th of seconds*/
+/*
+ * Maximum report interval for IGMP v1/v2 host membership reports [RFC 1112]
+ */
+#define IGMP_V1V2_MAX_RI		10
+#define IGMP_MAX_HOST_REPORT_DELAY	IGMP_V1V2_MAX_RI
 
 /*
- * The following four defininitions are for backwards compatibility.
- * They should be removed as soon as all applications are updated to
- * use the new constant names.
+ * IGMP_TIMER_SCALE denotes that the igmp code field specifies
+ * time in tenths of a second.
  */
-#define IGMP_HOST_MEMBERSHIP_QUERY	IGMP_MEMBERSHIP_QUERY
-#define IGMP_HOST_MEMBERSHIP_REPORT	IGMP_V1_MEMBERSHIP_REPORT
-#define IGMP_HOST_NEW_MEMBERSHIP_REPORT	IGMP_V2_MEMBERSHIP_REPORT
-#define IGMP_HOST_LEAVE_MESSAGE		IGMP_V2_LEAVE_GROUP
+#define IGMP_TIMER_SCALE		10
 
 #endif /* _NETINET_IGMP_H_ */
diff --git a/bsd/netinet/igmp_var.h b/bsd/netinet/igmp_var.h
index 5e9f7e983..8fdaab868 100644
--- a/bsd/netinet/igmp_var.h
+++ b/bsd/netinet/igmp_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -68,7 +68,6 @@
 #define _NETINET_IGMP_VAR_H_
 #include <sys/appleapiopts.h>
 
-
 /*
  * Internet Group Management Protocol (IGMP),
  * implementation-specific definitions.
@@ -78,6 +77,48 @@
  * MULTICAST Revision: 3.5.1.3
  */
 
+struct igmpstat_v3 {
+	/*
+	 * Structure header (to insulate ABI changes).
+	 */
+	uint32_t igps_version;		/* version of this structure */
+	uint32_t igps_len;		/* length of this structure */
+	/*
+	 * Message statistics.
+	 */
+	uint64_t igps_rcv_total;	/* total IGMP messages received */
+	uint64_t igps_rcv_tooshort;	/* received with too few bytes */
+	uint64_t igps_rcv_badttl;	/* received with ttl other than 1 */
+	uint64_t igps_rcv_badsum;	/* received with bad checksum */
+	/*
+	 * Query statistics.
+	 */
+	uint64_t igps_rcv_v1v2_queries;	/* received IGMPv1/IGMPv2 queries */
+	uint64_t igps_rcv_v3_queries;	/* received IGMPv3 queries */
+	uint64_t igps_rcv_badqueries;	/* received invalid queries */
+	uint64_t igps_rcv_gen_queries;	/* received general queries */
+	uint64_t igps_rcv_group_queries;/* received group queries */
+	uint64_t igps_rcv_gsr_queries;	/* received group-source queries */
+	uint64_t igps_drop_gsr_queries;	/* dropped group-source queries */
+	/*
+	 * Report statistics.
+	 */
+	uint64_t igps_rcv_reports;	/* received membership reports */
+	uint64_t igps_rcv_badreports;	/* received invalid reports */
+	uint64_t igps_rcv_ourreports;	/* received reports for our groups */
+	uint64_t igps_rcv_nora;		/* received w/o Router Alert option */
+	uint64_t igps_snd_reports;	/* sent membership reports */
+	/*
+	 * Padding for future additions.
+	 */
+	uint64_t __igps_pad[4];
+} __attribute__((aligned(8)));
+
+/*
+ * Old IGMPv2 stat structure for backward compatibility
+ *
+ */
+
 struct igmpstat {
 	u_int	igps_rcv_total;		/* total IGMP messages received */
 	u_int	igps_rcv_tooshort;	/* received with too few bytes */
@@ -90,41 +131,189 @@ struct igmpstat {
 	u_int	igps_snd_reports;	/* sent membership reports */
 };
 
-#ifdef KERNEL_PRIVATE
-#ifdef KERNEL
+#define IGPS_VERSION_3	3
+#define IGPS_VERSION3_LEN		168
+
+#ifdef PRIVATE
+/*
+ * Per-interface IGMP router version information.
+ */
+#ifndef XNU_KERNEL_PRIVATE
+struct igmp_ifinfo {
+#else
+struct igmp_ifinfo_u {
+#endif /* XNU_KERNEL_PRIVATE */
+	uint32_t igi_ifindex;	/* interface this instance belongs to */
+	uint32_t igi_version;	/* IGMPv3 Host Compatibility Mode */
+	uint32_t igi_v1_timer;	/* IGMPv1 Querier Present timer (s) */
+	uint32_t igi_v2_timer;	/* IGMPv2 Querier Present timer (s) */
+	uint32_t igi_v3_timer;	/* IGMPv3 General Query (interface) timer (s)*/
+	uint32_t igi_flags;	/* IGMP per-interface flags */
+	uint32_t igi_rv;	/* IGMPv3 Robustness Variable */
+	uint32_t igi_qi;	/* IGMPv3 Query Interval (s) */
+	uint32_t igi_qri;	/* IGMPv3 Query Response Interval (s) */
+	uint32_t igi_uri;	/* IGMPv3 Unsolicited Report Interval (s) */
+};
+
+#define IGIF_SILENT	0x00000001	/* Do not use IGMP on this ifp */
+#define IGIF_LOOPBACK	0x00000002	/* Send IGMP reports to loopback */
+
+/*
+ * IGMP version tag.
+ */
+#define IGMP_VERSION_NONE		0 /* Invalid */
+#define IGMP_VERSION_1			1
+#define IGMP_VERSION_2			2
+#define IGMP_VERSION_3			3 /* Default */
+#endif /* PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+#include <libkern/libkern.h>
+#define IGMP_DEBUG 1
+#ifdef IGMP_DEBUG
+extern char * inet_ntoa(struct in_addr);
+extern int igmp_debug;
+
+#define IGMP_PRINTF(x)	do { if (igmp_debug) printf x; } while (0)
+#else
+#define	IGMP_PRINTF(x)
+#endif
+
+#define	OIGMPSTAT_ADD(name, val)	atomic_add_32(&igmpstat.name , (val))
+#define	OIGMPSTAT_INC(name)		OIGMPSTAT_ADD(name, 1)
+
+#define	IGMPSTAT_ADD(name, val)		atomic_add_64(&igmpstat_v3.name , (val))
+#define	IGMPSTAT_INC(name)		IGMPSTAT_ADD(name, 1)
+
 #define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
 
+#define IGMP_MAX_STATE_CHANGES		24 /* Max pending changes per group */
+
 /*
- * States for IGMPv2's leave processing
+ * IGMP per-group states.
  */
-#define IGMP_OTHERMEMBER			0
-#define IGMP_IREPORTEDLAST			1
+#define IGMP_NOT_MEMBER			0 /* Can garbage collect in_multi */
+#define IGMP_SILENT_MEMBER		1 /* Do not perform IGMP for group */
+#define IGMP_REPORTING_MEMBER		2 /* IGMPv1/2/3 we are reporter */
+#define IGMP_IDLE_MEMBER		3 /* IGMPv1/2 we reported last */
+#define IGMP_LAZY_MEMBER		4 /* IGMPv1/2 other member reporting */
+#define IGMP_SLEEPING_MEMBER		5 /* IGMPv1/2 start query response */
+#define IGMP_AWAKENING_MEMBER		6 /* IGMPv1/2 group timer will start */
+#define IGMP_G_QUERY_PENDING_MEMBER	7 /* IGMPv3 group query pending */
+#define IGMP_SG_QUERY_PENDING_MEMBER	8 /* IGMPv3 source query pending */
+#define IGMP_LEAVING_MEMBER		9 /* IGMPv3 dying gasp (pending last */
+					  /* retransmission of INCLUDE {}) */
+/*
+ * IGMPv3 protocol control variables.
+ */
+#define IGMP_RV_INIT		2	/* Robustness Variable */
+#define IGMP_RV_MIN		1
+#define IGMP_RV_MAX		7
+
+#define IGMP_QI_INIT		125	/* Query Interval (s) */
+#define IGMP_QI_MIN		1
+#define IGMP_QI_MAX		255
+
+#define IGMP_QRI_INIT		10	/* Query Response Interval (s) */
+#define IGMP_QRI_MIN		1
+#define IGMP_QRI_MAX		255
+
+#define IGMP_URI_INIT		3	/* Unsolicited Report Interval (s) */
+#define IGMP_URI_MIN		0
+#define IGMP_URI_MAX		10
+
+#define IGMP_MAX_G_GS_PACKETS		8 /* # of packets to answer G/GS */
+#define IGMP_MAX_STATE_CHANGE_PACKETS	8 /* # of packets per state change */
+#define IGMP_MAX_RESPONSE_PACKETS	16 /* # of packets for general query */
+#define IGMP_MAX_RESPONSE_BURST		4 /* # of responses to send at once */
+#define IGMP_RESPONSE_BURST_INTERVAL	(PR_SLOWHZ)	/* 500ms */
 
 /*
- * We must remember what version the subnet's querier is.
- * We conveniently use the IGMP message type for the proper
- * membership report to keep this state.
+ * IGMP-specific mbuf flags.
  */
-#define IGMP_V1_ROUTER				IGMP_V1_MEMBERSHIP_REPORT
-#define IGMP_V2_ROUTER				IGMP_V2_MEMBERSHIP_REPORT
+#define M_IGMPV2	M_PROTO1	/* Packet is IGMPv2 */
+#define M_IGMPV3_HDR	M_PROTO2	/* Packet has IGMPv3 headers */
+#define M_GROUPREC	M_PROTO3	/* mbuf chain is a group record */
+#define M_IGMP_LOOP	M_LOOP		/* transmit on loif, not real ifp */
+
+/*
+ * Default amount of leading space for IGMPv3 to allocate at the
+ * beginning of its mbuf packet chains, to avoid fragmentation and
+ * unnecessary allocation of leading mbufs.
+ */
+#define RAOPT_LEN	4		/* Length of IP Router Alert option */
+#define	IGMP_LEADINGSPACE		\
+	(sizeof(struct ip) + RAOPT_LEN + sizeof(struct igmp_report))
+
+struct igmp_ifinfo {
+	decl_lck_mtx_data(, igi_lock);
+	uint32_t igi_refcnt;	/* reference count */
+	uint32_t igi_debug;	/* see ifa_debug flags */
+	LIST_ENTRY(igmp_ifinfo) igi_link;
+	struct ifnet *igi_ifp;	/* interface this instance belongs to */
+	uint32_t igi_version;	/* IGMPv3 Host Compatibility Mode */
+	uint32_t igi_v1_timer;	/* IGMPv1 Querier Present timer (s) */
+	uint32_t igi_v2_timer;	/* IGMPv2 Querier Present timer (s) */
+	uint32_t igi_v3_timer;	/* IGMPv3 General Query (interface) timer (s)*/
+	uint32_t igi_flags;	/* IGMP per-interface flags */
+	uint32_t igi_rv;	/* IGMPv3 Robustness Variable */
+	uint32_t igi_qi;	/* IGMPv3 Query Interval (s) */
+	uint32_t igi_qri;	/* IGMPv3 Query Response Interval (s) */
+	uint32_t igi_uri;	/* IGMPv3 Unsolicited Report Interval (s) */
+	SLIST_HEAD(,in_multi)	igi_relinmhead; /* released groups */
+	struct ifqueue	 igi_gq;	/* queue of general query responses */
+	struct ifqueue   igi_v2q; /* queue of v1/v2 packets */
+};
+
+#define	IGI_LOCK_ASSERT_HELD(_igi)					\
+	lck_mtx_assert(&(_igi)->igi_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IGI_LOCK_ASSERT_NOTHELD(_igi)					\
+	lck_mtx_assert(&(_igi)->igi_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IGI_LOCK(_igi)							\
+	lck_mtx_lock(&(_igi)->igi_lock)
+
+#define	IGI_LOCK_SPIN(_igi)						\
+	lck_mtx_lock_spin(&(_igi)->igi_lock)
+
+#define	IGI_CONVERT_LOCK(_igi) do {					\
+	IGI_LOCK_ASSERT_HELD(_igi);					\
+	lck_mtx_convert_spin(&(_igi)->igi_lock);			\
+} while (0)
+
+#define	IGI_UNLOCK(_igi)						\
+	lck_mtx_unlock(&(_igi)->igi_lock)
+
+#define	IGI_ADDREF(_igi)						\
+	igi_addref(_igi, 0)
+
+#define	IGI_ADDREF_LOCKED(_igi)						\
+	igi_addref(_igi, 1)
+
+#define	IGI_REMREF(_igi)						\
+	igi_remref(_igi)
 
 /*
- * Revert to new router if we haven't heard from an old router in
- * this amount of time.
+ * Per-link IGMP context.
  */
-#define IGMP_AGE_THRESHOLD			540
+#define IGMP_IFINFO(ifp)	((ifp)->if_igi)
 
-void	igmp_init(void) __attribute__((section("__TEXT, initcode")));
-void	igmp_input(struct mbuf *, int);
-int		igmp_joingroup(struct in_multi *);
-void	igmp_leavegroup(struct in_multi *);
-void	igmp_fasttimo(void);
-void	igmp_slowtimo(void);
+extern void igmp_init(void) __attribute__((section("__TEXT, initcode")));
+extern int igmp_change_state(struct in_multi *);
+extern struct igmp_ifinfo *igmp_domifattach(struct ifnet *, int);
+extern void igmp_domifreattach(struct igmp_ifinfo *);
+extern void igmp_domifdetach(struct ifnet *);
+extern void igmp_input(struct mbuf *, int);
+extern int igmp_joingroup(struct in_multi *);
+extern void igmp_leavegroup(struct in_multi *);
+extern void igmp_slowtimo(void);
+extern void igi_addref(struct igmp_ifinfo *, int);
+extern void igi_remref(struct igmp_ifinfo *);
 
 SYSCTL_DECL(_net_inet_igmp);
 
-#endif /* KERNEL */
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*
  * Names for IGMP sysctl objects
@@ -132,11 +321,11 @@ SYSCTL_DECL(_net_inet_igmp);
 #define IGMPCTL_STATS		1	/* statistics (read-only) */
 #define IGMPCTL_MAXID		2
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define IGMPCTL_NAMES { \
 	{ 0, 0 }, \
 	{ "stats", CTLTYPE_STRUCT }, \
 }
 
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #endif
diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c
index 32b8c64f6..85b9d38af 100644
--- a/bsd/netinet/in.c
+++ b/bsd/netinet/in.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -72,6 +72,7 @@
 #include <sys/sysctl.h>
 #include <sys/kern_event.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 #include <kern/zalloc.h>
 
 #include <pexpert/pexpert.h>
@@ -122,42 +123,66 @@ static void in_iahash_remove(struct in_ifaddr *);
 static void in_iahash_insert(struct in_ifaddr *);
 static void in_iahash_insert_ptp(struct in_ifaddr *);
 static struct in_ifaddr *in_ifaddr_alloc(int);
+static void in_ifaddr_attached(struct ifaddr *);
+static void in_ifaddr_detached(struct ifaddr *);
 static void in_ifaddr_free(struct ifaddr *);
 static void in_ifaddr_trace(struct ifaddr *, int);
 
 static int subnetsarelocal = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&subnetsarelocal, 0, "");
 
-struct in_multihead in_multihead; /* XXX BSS initialization */
-
 /* Track whether or not the SIOCARPIPLL ioctl has been called */
 __private_extern__	u_int32_t	ipv4_ll_arp_aware = 0;
 
+#define	INIFA_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int inifa_trace_hist_size = INIFA_TRACE_HIST_SIZE;
+
 struct in_ifaddr_dbg {
 	struct in_ifaddr	inifa;			/* in_ifaddr */
 	struct in_ifaddr	inifa_old;		/* saved in_ifaddr */
-	u_int16_t		inifa_refhold_cnt;	/* # of ifaref */
-	u_int16_t		inifa_refrele_cnt;	/* # of ifafree */
+	u_int16_t		inifa_refhold_cnt;	/* # of IFA_ADDREF */
+	u_int16_t		inifa_refrele_cnt;	/* # of IFA_REMREF */
 	/*
 	 * Alloc and free callers.
 	 */
 	ctrace_t		inifa_alloc;
 	ctrace_t		inifa_free;
 	/*
-	 * Circular lists of ifaref and ifafree callers.
+	 * Circular lists of IFA_ADDREF and IFA_REMREF callers.
 	 */
-	ctrace_t		inifa_refhold[CTRACE_HIST_SIZE];
-	ctrace_t		inifa_refrele[CTRACE_HIST_SIZE];
+	ctrace_t		inifa_refhold[INIFA_TRACE_HIST_SIZE];
+	ctrace_t		inifa_refrele[INIFA_TRACE_HIST_SIZE];
+	/*
+	 * Trash list linkage
+	 */
+	TAILQ_ENTRY(in_ifaddr_dbg) inifa_trash_link;
 };
 
-static unsigned int inifa_debug;		/* debug flags */
+/* List of trash in_ifaddr entries protected by inifa_trash_lock */
+static TAILQ_HEAD(, in_ifaddr_dbg) inifa_trash_head;
+static decl_lck_mtx_data(, inifa_trash_lock);
+
+#if DEBUG
+static unsigned int inifa_debug = 1;		/* debugging (enabled) */
+#else
+static unsigned int inifa_debug;		/* debugging (disabled) */
+#endif /* !DEBUG */
 static unsigned int inifa_size;			/* size of zone element */
 static struct zone *inifa_zone;			/* zone for in_ifaddr */
 
 #define	INIFA_ZONE_MAX		64		/* maximum elements in zone */
 #define	INIFA_ZONE_NAME		"in_ifaddr"	/* zone name */
 
+/*
+ * Return 1 if the address is
+ * - loopback
+ * - unicast or multicast link local
+ * - routed via a link level gateway
+ * - belongs to a directly connected (sub)net
+ */
 int
 inaddr_local(struct in_addr in)
 {
@@ -165,20 +190,27 @@ inaddr_local(struct in_addr in)
 	struct sockaddr_in sin;
 	int local = 0;
 
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof (sin);
-	sin.sin_addr = in;
-	rt = rtalloc1((struct sockaddr *)&sin, 0, 0);
-
-	if (rt != NULL) {
-		RT_LOCK_SPIN(rt);
-		if (rt->rt_gateway->sa_family == AF_LINK ||
-		    (rt->rt_ifp->if_flags & IFF_LOOPBACK))
+	if (ntohl(in.s_addr) == INADDR_LOOPBACK || IN_LINKLOCAL(ntohl(in.s_addr))) {
+		local = 1;
+	} else if (ntohl(in.s_addr) >= INADDR_UNSPEC_GROUP &&
+		ntohl(in.s_addr) <= INADDR_MAX_LOCAL_GROUP) {
 			local = 1;
-		RT_UNLOCK(rt);
-		rtfree(rt);
 	} else {
-		local = in_localaddr(in);
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof (sin);
+		sin.sin_addr = in;
+		rt = rtalloc1((struct sockaddr *)&sin, 0, 0);
+
+		if (rt != NULL) {
+			RT_LOCK_SPIN(rt);
+			if (rt->rt_gateway->sa_family == AF_LINK ||
+			    (rt->rt_ifp->if_flags & IFF_LOOPBACK))
+				local = 1;
+			RT_UNLOCK(rt);
+			rtfree(rt);
+		} else {
+			local = in_localaddr(in);
+		}
 	}
 	return (local);
 }
@@ -198,20 +230,28 @@ in_localaddr(struct in_addr in)
 	if (subnetsarelocal) {
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		for (ia = in_ifaddrhead.tqh_first; ia; 
-		     ia = ia->ia_link.tqe_next)
+		     ia = ia->ia_link.tqe_next) {
+			IFA_LOCK(&ia->ia_ifa);
 			if ((i & ia->ia_netmask) == ia->ia_net) {
+				IFA_UNLOCK(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 				return (1);
 			}
+			IFA_UNLOCK(&ia->ia_ifa);
+		}
 		lck_rw_done(in_ifaddr_rwlock);
 	} else {
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		for (ia = in_ifaddrhead.tqh_first; ia;
-		     ia = ia->ia_link.tqe_next)
+		     ia = ia->ia_link.tqe_next) {
+			IFA_LOCK(&ia->ia_ifa);
 			if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
+				IFA_UNLOCK(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 				return (1);
 			}
+			IFA_UNLOCK(&ia->ia_ifa);
+		}
 		lck_rw_done(in_ifaddr_rwlock);
 	}
 	return (0);
@@ -292,6 +332,18 @@ in_len2mask(struct in_addr *mask, int len)
 
 static int in_interfaces;	/* number of external internet interfaces */
 
+static int
+in_domifattach(struct ifnet *ifp)
+{
+	int error;
+
+	if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST)
+		log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n",
+		    __func__, error, ifp->if_name, ifp->if_unit);
+
+	return (error);
+}
+
 /*
  * Generic internet control operations (ioctl's).
  * Ifp is 0 if not an interface-specific ioctl.
@@ -331,6 +383,8 @@ in_control(
 	struct kev_msg        ev_msg;
 	struct kev_in_data    in_event_data;
 
+	bzero(&in_event_data, sizeof(struct kev_in_data));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	switch (cmd) {
 	case SIOCALIFADDR:
 	case SIOCDLIFADDR:
@@ -354,19 +408,24 @@ in_control(
 		for (iap = in_ifaddrhead.tqh_first; iap; 
 		     iap = iap->ia_link.tqe_next)
 			if (iap->ia_ifp == ifp) {
+				IFA_LOCK(&iap->ia_ifa);
 				if (((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr.s_addr ==
 				    iap->ia_addr.sin_addr.s_addr) {
 					ia = iap;
+					IFA_UNLOCK(&iap->ia_ifa);
 					break;
 				} else if (ia == NULL) {
 					ia = iap;
-					if (ifr->ifr_addr.sa_family != AF_INET)
+					if (ifr->ifr_addr.sa_family != AF_INET) {
+						IFA_UNLOCK(&iap->ia_ifa);
 						break;
+					}
 				}
+				IFA_UNLOCK(&iap->ia_ifa);
 			}
 		/* take a reference on ia before releasing lock */
 		if (ia != NULL) {
-			ifaref(&ia->ia_ifa);
+			IFA_ADDREF(&ia->ia_ifa);
 		}
 		lck_rw_done(in_ifaddr_rwlock);
 	}
@@ -393,19 +452,19 @@ in_control(
 
 			lck_rw_lock_shared(in_ifaddr_rwlock);
 			for (oia = ia; ia; ia = ia->ia_link.tqe_next) {
+				IFA_LOCK(&ia->ia_ifa);
 				if (ia->ia_ifp == ifp  &&
 				    ia->ia_addr.sin_addr.s_addr ==
-				    ifra->ifra_addr.sin_addr.s_addr)
+				    ifra->ifra_addr.sin_addr.s_addr) {
+					IFA_ADDREF_LOCKED(&ia->ia_ifa);
+					IFA_UNLOCK(&ia->ia_ifa);
 					break;
-			}
-			/* take a reference on ia before releasing lock */
-			if (ia != NULL && ia != oia) {
-				ifaref(&ia->ia_ifa);
+				}
+				IFA_UNLOCK(&ia->ia_ifa);
 			}
 			lck_rw_done(in_ifaddr_rwlock);
-			if (oia != NULL && oia != ia) {
-				ifafree(&oia->ia_ifa);
-			}
+			if (oia != NULL)
+				IFA_REMREF(&oia->ia_ifa);
 			if ((ifp->if_flags & IFF_POINTOPOINT)
 			    && (cmd == SIOCAIFADDR)
 			    && (ifra->ifra_dstaddr.sin_addr.s_addr
@@ -426,7 +485,13 @@ in_control(
 	case SIOCSIFADDR:
 	case SIOCSIFNETMASK:
 	case SIOCSIFDSTADDR:
-		if ((so->so_state & SS_PRIV) == 0) {
+		/* socket is NULL if called from in_purgeaddrs() */
+		if (so != NULL && (so->so_state & SS_PRIV) == 0) {
+			error = EPERM;
+			goto done;
+		}
+		/* in case it's NULL, make sure it came from the kernel */
+		if (so == NULL && p != kernproc) {
 			error = EPERM;
 			goto done;
 		}
@@ -439,21 +504,22 @@ in_control(
 			error = EINVAL;
 			goto done;
 		}
-		if (ia == (struct in_ifaddr *)0) {
+		if (ia == NULL) {
 			ia = in_ifaddr_alloc(M_WAITOK);
-			if (ia == (struct in_ifaddr *)NULL) {
+			if (ia == NULL) {
 				error = ENOBUFS;
 				goto done;
 			}
-			IA_HASH_INIT(ia);
+			ifnet_lock_exclusive(ifp);
 			ifa = &ia->ia_ifa;
+			IFA_LOCK(ifa);
 			/* Hold a reference for this routine */
-			ifaref(ifa);
+			IFA_ADDREF_LOCKED(ifa);
+			IA_HASH_INIT(ia);
 			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
 			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
 			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
 			ia->ia_sockmask.sin_len = 8;
-			ifnet_lock_exclusive(ifp);
 			if (ifp->if_flags & IFF_BROADCAST) {
 				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
 				ia->ia_broadaddr.sin_family = AF_INET;
@@ -463,22 +529,25 @@ in_control(
 				in_interfaces++;
 			/* if_attach_ifa() holds a reference for ifa_link */
 			if_attach_ifa(ifp, ifa);
+			/*
+			 * If we have to go through in_ifinit(), make sure
+			 * to avoid installing route(s) based on this address
+			 * via PFC_IFUP event, before the link resolver (ARP)
+			 * initializes it.
+			 */
+			if (cmd == SIOCAIFADDR || cmd == SIOCSIFADDR)
+				ifa->ifa_debug |= IFD_NOTREADY;
+			IFA_UNLOCK(ifa);
 			ifnet_lock_done(ifp);
 			lck_rw_lock_exclusive(in_ifaddr_rwlock);
 			/* Hold a reference for ia_link */
-			ifaref(ifa);
+			IFA_ADDREF(ifa);
 			TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link);
 			lck_rw_done(in_ifaddr_rwlock);
-
-			/* Generic protocol plumbing */
-
-			if ((error = proto_plumb(PF_INET, ifp))) {
-				if (error != EEXIST) {
-					kprintf("in.c: warning can't plumb proto if=%s%d type %d error=%d\n",
-						ifp->if_name, ifp->if_unit, ifp->if_type, error);
-				}
-				error = 0; /*discard error, can be cold with unsupported interfaces */
-			}
+			error = in_domifattach(ifp);
+			/* discard error,can be cold with unsupported interfaces */
+			if (error)
+				error = 0;
 		}
 		break;
 
@@ -531,7 +600,9 @@ in_control(
 		break;
 
 	case SIOCGIFADDR:
+		IFA_LOCK(&ia->ia_ifa);
 		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCGIFBRDADDR:
@@ -539,7 +610,9 @@ in_control(
 			error = EINVAL;
 			break;
 		}
+		IFA_LOCK(&ia->ia_ifa);
 		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCGIFDSTADDR:
@@ -547,11 +620,15 @@ in_control(
 			error = EINVAL;
 			break;
 		}
+		IFA_LOCK(&ia->ia_ifa);
 		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCGIFNETMASK:
+		IFA_LOCK(&ia->ia_ifa);
 		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCSIFDSTADDR:
@@ -559,23 +636,28 @@ in_control(
 			error = EINVAL;
 			break;
 		}
+		IFA_LOCK(&ia->ia_ifa);
 		oldaddr = ia->ia_dstaddr;
 		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
 		if (ia->ia_dstaddr.sin_family == AF_INET)
 			ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in);
+		IFA_UNLOCK(&ia->ia_ifa);
 		error = ifnet_ioctl(ifp, PF_INET, SIOCSIFDSTADDR, ia);
+		IFA_LOCK(&ia->ia_ifa);
 		if (error == EOPNOTSUPP) {
 			error = 0;
 		}
 		if (error) {
 			ia->ia_dstaddr = oldaddr;
+			IFA_UNLOCK(&ia->ia_ifa);
 			break;
 		}
+		IFA_LOCK_ASSERT_HELD(&ia->ia_ifa);
 
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
-	
+
 		ev_msg.event_code = KEV_INET_SIFDSTADDR;
 
 		if (ia->ia_ifa.ifa_dstaddr)
@@ -590,6 +672,7 @@ in_control(
 		in_event_data.ia_subnet       = ia->ia_subnet;
 		in_event_data.ia_subnetmask   = ia->ia_subnetmask;
 		in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+		IFA_UNLOCK(&ia->ia_ifa);
 		strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		in_event_data.link_data.if_family = ifp->if_family;
 		in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -600,14 +683,22 @@ in_control(
 
 		kev_post_msg(&ev_msg);
 
-
+		lck_mtx_lock(rnh_lock);
+		IFA_LOCK(&ia->ia_ifa);
 		if (ia->ia_flags & IFA_ROUTE) {
 			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
-			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			IFA_UNLOCK(&ia->ia_ifa);
+			rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
+			IFA_LOCK(&ia->ia_ifa);
 			ia->ia_ifa.ifa_dstaddr =
-					(struct sockaddr *)&ia->ia_dstaddr;
-			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
+			    (struct sockaddr *)&ia->ia_dstaddr;
+			IFA_UNLOCK(&ia->ia_ifa);
+			rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD,
+			    RTF_HOST|RTF_UP);
+		} else {
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
+		lck_mtx_unlock(rnh_lock);
 		break;
 
 	case SIOCSIFBRDADDR:
@@ -615,12 +706,13 @@ in_control(
 			error = EINVAL;
 			break;
 		}
+		IFA_LOCK(&ia->ia_ifa);
 		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
 
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
-	
+
 		ev_msg.event_code = KEV_INET_SIFBRDADDR;
 
 		if (ia->ia_ifa.ifa_dstaddr)
@@ -635,6 +727,7 @@ in_control(
 		in_event_data.ia_subnet       = ia->ia_subnet;
 		in_event_data.ia_subnetmask   = ia->ia_subnetmask;
 		in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+		IFA_UNLOCK(&ia->ia_ifa);
 		strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		in_event_data.link_data.if_family = ifp->if_family;
 		in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -661,34 +754,41 @@ in_control(
 		break;
 
 	case SIOCPROTOATTACH:
-		error = proto_plumb(PF_INET, ifp);
+		error = in_domifattach(ifp);
 		break;
-                
+
 	case SIOCPROTODETACH:
-                // if an ip address is still present, refuse to detach
+                /*
+		 * If an IPv4 address is still present, refuse to detach.
+		 */
 		ifnet_lock_shared(ifp);
-		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 
-			if (ifa->ifa_addr->sa_family == AF_INET)
+		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family == AF_INET) {
+				IFA_UNLOCK(ifa);
 				break;
+			}
+			IFA_UNLOCK(ifa);
+		}
 		ifnet_lock_done(ifp);
-		if (ifa != 0) {
+		if (ifa != NULL) {
 			error =  EBUSY;
 			break;
 		}
 
 		error = proto_unplumb(PF_INET, ifp);
 		break;
-		
 
 	case SIOCSIFNETMASK: {
 		u_long i;
-		
+
 		i = ifra->ifra_addr.sin_addr.s_addr;
+		IFA_LOCK(&ia->ia_ifa);
 		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i);
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
-	
+
 		ev_msg.event_code = KEV_INET_SIFNETMASK;
 
 		if (ia->ia_ifa.ifa_dstaddr)
@@ -703,6 +803,7 @@ in_control(
 		in_event_data.ia_subnet       = ia->ia_subnet;
 		in_event_data.ia_subnetmask   = ia->ia_subnetmask;
 		in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+		IFA_UNLOCK(&ia->ia_ifa);
 		strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		in_event_data.link_data.if_family = ifp->if_family;
 		in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -720,6 +821,7 @@ in_control(
 		hostIsNew = 1;
 		error = 0;
 
+		IFA_LOCK(&ia->ia_ifa);
 		if (ia->ia_addr.sin_family == AF_INET) {
 			if (ifra->ifra_addr.sin_len == 0) {
 				ifra->ifra_addr = ia->ia_addr;
@@ -729,7 +831,9 @@ in_control(
 				hostIsNew = 0;
 		}
 		if (ifra->ifra_mask.sin_len) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			in_ifscrub(ifp, ia, 0);
+			IFA_LOCK(&ia->ia_ifa);
 			ia->ia_sockmask = ifra->ifra_mask;
 			ia->ia_subnetmask =
 			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
@@ -737,19 +841,25 @@ in_control(
 		}
 		if ((ifp->if_flags & IFF_POINTOPOINT) &&
 		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			in_ifscrub(ifp, ia, 0);
+			IFA_LOCK(&ia->ia_ifa);
 			ia->ia_dstaddr = ifra->ifra_dstaddr;
 			ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in);
 			maskIsNew  = 1; /* We lie; but the effect's the same */
 		}
 		if (ifra->ifra_addr.sin_family == AF_INET &&
 		    (hostIsNew || maskIsNew)) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
+		} else {
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
 #if PF
 		if (!error)
 			(void) pf_ifaddr_hook(ifp, cmd);
 #endif /* PF */
+		IFA_LOCK(&ia->ia_ifa);
 		if ((ifp->if_flags & IFF_BROADCAST) &&
 		    (ifra->ifra_broadaddr.sin_family == AF_INET))
 			ia->ia_broadaddr = ifra->ifra_broadaddr;
@@ -780,6 +890,7 @@ in_control(
 		     in_event_data.ia_subnet       = ia->ia_subnet;
 		     in_event_data.ia_subnetmask   = ia->ia_subnetmask;
 		     in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+		     IFA_UNLOCK(&ia->ia_ifa);
 		     strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		     in_event_data.link_data.if_family = ifp->if_family;
 		     in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -789,6 +900,8 @@ in_control(
 		     ev_msg.dv[1].data_length = 0;
 
 		     kev_post_msg(&ev_msg);
+		} else {
+		     IFA_UNLOCK(&ia->ia_ifa);
 		}
 		break;
 
@@ -804,9 +917,10 @@ in_control(
 		ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 		ev_msg.kev_class      = KEV_NETWORK_CLASS;
 		ev_msg.kev_subclass   = KEV_INET_SUBCLASS;
-		
+
 		ev_msg.event_code = KEV_INET_ADDR_DELETED;
 
+		IFA_LOCK(&ia->ia_ifa);
 		if (ia->ia_ifa.ifa_dstaddr)
 		     in_event_data.ia_dstaddr = 
 			  ((struct sockaddr_in *)ia->ia_ifa.ifa_dstaddr)->sin_addr;
@@ -819,6 +933,7 @@ in_control(
 		in_event_data.ia_subnet       = ia->ia_subnet;
 		in_event_data.ia_subnetmask   = ia->ia_subnetmask;
 		in_event_data.ia_netbroadcast = ia->ia_netbroadcast;
+		IFA_UNLOCK(&ia->ia_ifa);
 		strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ);
 		in_event_data.link_data.if_family = ifp->if_family;
 		in_event_data.link_data.if_unit  = (u_int32_t) ifp->if_unit;
@@ -830,10 +945,12 @@ in_control(
 		ifa = &ia->ia_ifa;
 		lck_rw_lock_exclusive(in_ifaddr_rwlock);
 		/* Release ia_link reference */
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 		TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
+		IFA_LOCK(ifa);
 		if (IA_IS_HASHED(ia))
 			in_iahash_remove(ia);
+		IFA_UNLOCK(ifa);
 		lck_rw_done(in_ifaddr_rwlock);
 
 		/*
@@ -841,31 +958,42 @@ in_control(
 		 */
 		in_ifscrub(ifp, ia, 0);
 		ifnet_lock_exclusive(ifp);
+		IFA_LOCK(ifa);
 		/* if_detach_ifa() releases ifa_link reference */
 		if_detach_ifa(ifp, ifa);
-#ifdef __APPLE__
+		/* Our reference to this address is dropped at the bottom */
+		IFA_UNLOCK(ifa);
+
 	       /*
 		* If the interface supports multicast, and no address is left,
 		* remove the "all hosts" multicast group from that interface.
 		*/
-		if (ifp->if_flags & IFF_MULTICAST) {
-			struct in_addr addr;
-			struct in_multi *inm = NULL;
+		if ((ifp->if_flags & IFF_MULTICAST) != 0 ||
+			ifp->if_allhostsinm != NULL ) {
 
-			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) 
-				if (ifa->ifa_addr->sa_family == AF_INET)
+			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+				IFA_LOCK(ifa);
+				if (ifa->ifa_addr->sa_family == AF_INET) {
+					IFA_UNLOCK(ifa);
 					break;
-
-			if (ifa == 0) {
-				addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
-				IN_LOOKUP_MULTI(addr, ifp, inm);
+				}
+				IFA_UNLOCK(ifa);
 			}
 			ifnet_lock_done(ifp);
-			if (inm)
-		  	  	in_delmulti(&inm);
-		} else 
+
+			lck_mtx_lock(&ifp->if_addrconfig_lock);
+			if (ifa == NULL && ifp->if_allhostsinm != NULL) {
+				struct in_multi *inm = ifp->if_allhostsinm;
+				ifp->if_allhostsinm = NULL;
+
+				in_delmulti(inm);
+				/* release the reference for allhostsinm pointer */
+				INM_REMREF(inm); 
+			}
+			lck_mtx_unlock(&ifp->if_addrconfig_lock);
+		} else {
 			ifnet_lock_done(ifp);
-#endif
+		}
 
 		/* Post the kernel event */
 		kev_post_msg(&ev_msg);
@@ -881,7 +1009,7 @@ in_control(
 				error = 0;
 
 			/* Release reference from ifa_ifpgetprimary() */
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		}
 #if PF
 		(void) pf_ifaddr_hook(ifp, cmd);
@@ -933,7 +1061,6 @@ in_control(
 
             /* Multicast options */
             if (cloned_inp->inp_moptions != NULL) {
-                int			i;
                 struct ip_moptions	*cloned_imo = cloned_inp->inp_moptions;
                 struct ip_moptions	*imo = inp->inp_moptions;
 
@@ -942,35 +1069,15 @@ in_control(
                      * No multicast option buffer attached to the pcb;
                      * allocate one.
                      */
-                    imo = (struct ip_moptions*)
-                        _MALLOC(sizeof(*imo), M_IPMOPTS, M_WAITOK);
+                    imo = ip_allocmoptions(M_WAITOK);
                     if (imo == NULL) {
                         error2 = ENOBUFS;
                         break;
                     }
                     inp->inp_moptions = imo;
                 }
-                imo->imo_multicast_ifp = cloned_imo->imo_multicast_ifp;
-                imo->imo_multicast_vif = cloned_imo->imo_multicast_vif;
-                imo->imo_multicast_ttl = cloned_imo->imo_multicast_ttl;
-                imo->imo_multicast_loop = cloned_imo->imo_multicast_loop;
-                imo->imo_num_memberships = cloned_imo->imo_num_memberships;
-                for (i = 0; i < cloned_imo->imo_num_memberships; i++) {
-                    imo->imo_membership[i] =
-                    in_addmulti(&cloned_imo->imo_membership[i]->inm_addr,
-                                cloned_imo->imo_membership[i]->inm_ifp);
-					if (imo->imo_membership[i] == NULL) {
-						error2 = ENOBUFS;
-						break;
-					}
-                }
-                if (i < cloned_imo->imo_num_memberships) {
-                	/* Failed, perform cleanup */
-                	for (i--; i >= 0; i--)
-                		in_delmulti(&imo->imo_membership[i]);
-                	imo->imo_num_memberships = 0;
-                	break;
-                }
+
+		error2 = imo_clone(cloned_imo, imo);
             }
         }
         break;
@@ -982,7 +1089,7 @@ in_control(
 	}
  done:
 	if (ia != NULL) {
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 	}
 	return (error);
 }
@@ -1017,7 +1124,7 @@ in_lifaddr_ioctl(
 	/* sanity checks */
 	if (!data || !ifp) {
 		panic("invalid argument to in_lifaddr_ioctl");
-		/*NOTRECHED*/
+		/*NOTREACHED*/
 	}
 
 	switch (cmd) {
@@ -1112,21 +1219,30 @@ in_lifaddr_ioctl(
 
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
-			if (!cmp)
+			}
+			if (!cmp) {
+				IFA_UNLOCK(ifa);
 				break;
+			}
 			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
 			candidate.s_addr &= mask.s_addr;
+			IFA_UNLOCK(ifa);
 			if (candidate.s_addr == match.s_addr)
 				break;
 		}
+		if (ifa != NULL)
+			IFA_ADDREF(ifa);
 		ifnet_lock_done(ifp);
 		if (!ifa)
 			return EADDRNOTAVAIL;
 		ia = (struct in_ifaddr *)ifa;
 
 		if (cmd == SIOCGLIFADDR) {
+			IFA_LOCK(ifa);
 			/* fill in the if_laddrreq structure */
 			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
 
@@ -1141,6 +1257,8 @@ in_lifaddr_ioctl(
 
 			iflr->flags = 0;	/*XXX*/
 
+			IFA_UNLOCK(ifa);
+			IFA_REMREF(ifa);
 			return 0;
 		} else {
 			struct in_aliasreq ifra;
@@ -1150,6 +1268,7 @@ in_lifaddr_ioctl(
 			bcopy(iflr->iflr_name, ifra.ifra_name,
 				sizeof(ifra.ifra_name));
 
+			IFA_LOCK(ifa);
 			bcopy(&ia->ia_addr, &ifra.ifra_addr,
 				ia->ia_addr.sin_len);
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
@@ -1158,7 +1277,8 @@ in_lifaddr_ioctl(
 			}
 			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
 				ia->ia_sockmask.sin_len);
-
+			IFA_UNLOCK(ifa);
+			IFA_REMREF(ifa);
 			return in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
 					  ifp, p);
 		}
@@ -1172,21 +1292,23 @@ in_lifaddr_ioctl(
  * Delete any existing route for an interface.
  */
 void
-in_ifscrub(
-	struct ifnet *ifp,
-	struct in_ifaddr *ia,
-	int locked)
+in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, int locked)
 {
-
-	if ((ia->ia_flags & IFA_ROUTE) == 0)
+	IFA_LOCK(&ia->ia_ifa);
+	if ((ia->ia_flags & IFA_ROUTE) == 0) {
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
+	}
+	IFA_UNLOCK(&ia->ia_ifa);
 	if (!locked)
 		lck_mtx_lock(rnh_lock);
 	if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
 		rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
 	else
 		rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, 0);
+	IFA_LOCK(&ia->ia_ifa);
 	ia->ia_flags &= ~IFA_ROUTE;
+	IFA_UNLOCK(&ia->ia_ifa);
 	if (!locked)
 		lck_mtx_unlock(rnh_lock);
 }
@@ -1197,12 +1319,20 @@ in_ifscrub(
 static void
 in_iahash_remove(struct in_ifaddr *ia)
 {
-	if (!IA_IS_HASHED(ia))
-		panic("attempt to remove wrong ia %p from hash table\n", ia);
+        lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
+	IFA_LOCK_ASSERT_HELD(&ia->ia_ifa);
 
+	if (!IA_IS_HASHED(ia)) {
+		panic("attempt to remove wrong ia %p from hash table\n", ia);
+		/* NOTREACHED */
+	}
 	TAILQ_REMOVE(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
 	IA_HASH_INIT(ia);
-	ifafree(&ia->ia_ifa);
+	if (IFA_REMREF_LOCKED(&ia->ia_ifa) == NULL) {
+		panic("%s: unexpected (missing) refcnt ifa=%p", __func__,
+		    &ia->ia_ifa);
+		/* NOTREACHED */
+	}
 }
 
 /*
@@ -1211,13 +1341,18 @@ in_iahash_remove(struct in_ifaddr *ia)
 static void
 in_iahash_insert(struct in_ifaddr *ia)
 {
-	if (ia->ia_addr.sin_family != AF_INET)
+        lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
+	IFA_LOCK_ASSERT_HELD(&ia->ia_ifa);
+
+	if (ia->ia_addr.sin_family != AF_INET) {
 		panic("attempt to insert wrong ia %p into hash table\n", ia);
-	else if (IA_IS_HASHED(ia))
+		/* NOTREACHED */
+	} else if (IA_IS_HASHED(ia)) {
 		panic("attempt to double-insert ia %p into hash table\n", ia);
-
+		/* NOTREACHED */
+	}
 	TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
-	ifaref(&ia->ia_ifa);
+	IFA_ADDREF_LOCKED(&ia->ia_ifa);
 }
 
 /*
@@ -1236,22 +1371,39 @@ in_iahash_insert_ptp(struct in_ifaddr *ia)
 	struct in_ifaddr *tmp_ifa;
 	struct ifnet *tmp_ifp;
 
-	if (ia->ia_addr.sin_family != AF_INET)
+        lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
+	IFA_LOCK_ASSERT_HELD(&ia->ia_ifa);
+
+	if (ia->ia_addr.sin_family != AF_INET) {
 		panic("attempt to insert wrong ia %p into hash table\n", ia);
-	else if (IA_IS_HASHED(ia))
+		/* NOTREACHED */
+	} else if (IA_IS_HASHED(ia)) {
 		panic("attempt to double-insert ia %p into hash table\n", ia);
-        
-	TAILQ_FOREACH(tmp_ifa, INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia_hash)
-		if (IA_SIN(tmp_ifa)->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
+		/* NOTREACHED */
+	}
+	IFA_UNLOCK(&ia->ia_ifa);
+	TAILQ_FOREACH(tmp_ifa, INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+	    ia_hash) {
+		IFA_LOCK(&tmp_ifa->ia_ifa);
+		/* ia->ia_addr won't change, so check without lock */
+		if (IA_SIN(tmp_ifa)->sin_addr.s_addr ==
+		    ia->ia_addr.sin_addr.s_addr) {
+			IFA_UNLOCK(&tmp_ifa->ia_ifa);
 			break;
+		}
+		IFA_UNLOCK(&tmp_ifa->ia_ifa);
+	}
 	tmp_ifp = (tmp_ifa == NULL) ? NULL : tmp_ifa->ia_ifp;
 
-	if (tmp_ifp == NULL)
-		TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
-	else
-		TAILQ_INSERT_TAIL(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
-	
-	ifaref(&ia->ia_ifa);
+	IFA_LOCK(&ia->ia_ifa);
+	if (tmp_ifp == NULL) {
+		TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+		    ia, ia_hash);
+	} else {
+		TAILQ_INSERT_TAIL(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
+		    ia, ia_hash);
+	}
+	IFA_ADDREF_LOCKED(&ia->ia_ifa);
 }
 
 /*
@@ -1273,9 +1425,10 @@ in_ifinit(
 	int oldremoved = 0;
 
 	/* Take an extra reference for this routine */
-	ifaref(&ia->ia_ifa);
+	IFA_ADDREF(&ia->ia_ifa);
 
 	lck_rw_lock_exclusive(in_ifaddr_rwlock);
+	IFA_LOCK(&ia->ia_ifa);
 	oldaddr = ia->ia_addr;
 	if (IA_IS_HASHED(ia)) {
 		oldremoved = 1;
@@ -1285,8 +1438,9 @@ in_ifinit(
 	ia->ia_addr.sin_len = sizeof (*sin);
 	if ((ifp->if_flags & IFF_POINTOPOINT))
 		in_iahash_insert_ptp(ia);
-	else	
+	else
 		in_iahash_insert(ia);
+	IFA_UNLOCK(&ia->ia_ifa);
 	lck_rw_done(in_ifaddr_rwlock);
 
 	/*
@@ -1315,10 +1469,11 @@ in_ifinit(
 	}
 
 	/* Release reference from ifa_ifpgetprimary() */
-	ifafree(ifa0);
+	IFA_REMREF(ifa0);
 
 	if (error) {
 		lck_rw_lock_exclusive(in_ifaddr_rwlock);
+		IFA_LOCK(&ia->ia_ifa);
 		if (IA_IS_HASHED(ia))
 			in_iahash_remove(ia);
 		ia->ia_addr = oldaddr;
@@ -1328,17 +1483,27 @@ in_ifinit(
 			else
 				in_iahash_insert(ia);
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 		lck_rw_done(in_ifaddr_rwlock);
 		/* Release extra reference taken above */
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 		return (error);
 	}
 	lck_mtx_lock(rnh_lock);
+	IFA_LOCK(&ia->ia_ifa);
+	/*
+	 * Address has been initialized by the link resolver (ARP)
+	 * via ifnet_ioctl() above; it may now generate route(s).
+	 */
+	ia->ia_ifa.ifa_debug &= ~IFD_NOTREADY;
 	if (scrub) {
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
+		IFA_UNLOCK(&ia->ia_ifa);
 		in_ifscrub(ifp, ia, 1);
+		IFA_LOCK(&ia->ia_ifa);
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	}
+	IFA_LOCK_ASSERT_HELD(&ia->ia_ifa);
 	if (IN_CLASSA(i))
 		ia->ia_netmask = IN_CLASSA_NET;
 	else if (IN_CLASSB(i))
@@ -1372,16 +1537,21 @@ in_ifinit(
 		flags |= RTF_HOST;
 	} else if (ifp->if_flags & IFF_POINTOPOINT) {
 		if (ia->ia_dstaddr.sin_family != AF_INET) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			lck_mtx_unlock(rnh_lock);
 			/* Release extra reference taken above */
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 			return (0);
 		}
 		ia->ia_dstaddr.sin_len = sizeof (*sin);
 		flags |= RTF_HOST;
 	}
-	if ((error = rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0)
+	IFA_UNLOCK(&ia->ia_ifa);
+	if ((error = rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0) {
+		IFA_LOCK(&ia->ia_ifa);
 		ia->ia_flags |= IFA_ROUTE;
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
 	lck_mtx_unlock(rnh_lock);
 
 	/* XXX check if the subnet route points to the same interface */
@@ -1393,19 +1563,29 @@ in_ifinit(
 	 * multicast group on that interface.
 	 */
 	if (ifp->if_flags & IFF_MULTICAST) {
-		struct in_multi *inm;
 		struct in_addr addr;
 
+		lck_mtx_lock(&ifp->if_addrconfig_lock);
 		addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
-		ifnet_lock_shared(ifp);
-		IN_LOOKUP_MULTI(addr, ifp, inm);
-		ifnet_lock_done(ifp);
-		if (inm == 0)
-			in_addmulti(&addr, ifp);
+		if (ifp->if_allhostsinm == NULL) {
+			struct in_multi *inm;
+			inm = in_addmulti(&addr, ifp);
+
+			if (inm != NULL) {
+				/* keep the reference on inm added by 
+				 * in_addmulti above for storing the 
+				 * pointer in allhostsinm 
+				 */
+				ifp->if_allhostsinm = inm;
+			} else {
+				printf("Failed to add membership to all-hosts multicast address on interface %s%d\n", ifp->if_name, ifp->if_unit);
+			}
+		}
+		lck_mtx_unlock(&ifp->if_addrconfig_lock);
 	}
 
 	/* Release extra reference taken above */
-	ifafree(&ia->ia_ifa);
+	IFA_REMREF(&ia->ia_ifa);
 	return (error);
 }
 
@@ -1414,18 +1594,15 @@ in_ifinit(
  * Return 1 if the address might be a local broadcast address.
  */
 int
-in_broadcast(
-	struct in_addr in,
-	struct ifnet *ifp)
+in_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	u_int32_t t;
 
-	if (in.s_addr == INADDR_BROADCAST ||
-	    in.s_addr == INADDR_ANY)
-		return 1;
+	if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY)
+		return (1);
 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
-		return 0;
+		return (0);
 	t = ntohl(in.s_addr);
 	/*
 	 * Look through the list of addresses for a match
@@ -1434,10 +1611,7 @@ in_broadcast(
 #define ia ((struct in_ifaddr *)ifa)
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-		if (ifa->ifa_addr == NULL) {
-			ifnet_lock_done(ifp);
-			return (0);
-		}
+		IFA_LOCK(ifa);
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
 		     in.s_addr == ia->ia_netbroadcast.s_addr ||
@@ -1451,140 +1625,76 @@ in_broadcast(
 		      * address.
 		      */
 		     ia->ia_subnetmask != (u_int32_t)0xffffffff) {
+			IFA_UNLOCK(ifa);
 			ifnet_lock_done(ifp);
-			return 1;
+			return (1);
 		}
+		IFA_UNLOCK(ifa);
 	}
 	ifnet_lock_done(ifp);
 	return (0);
 #undef ia
 }
 
-static void
-in_free_inm(
-	void*	ifma_protospec)
-{
-	struct in_multi *inm = ifma_protospec;
-	
-	/*
-	 * No remaining claims to this record; let IGMP know that
-	 * we are leaving the multicast group.
-	 */
-	igmp_leavegroup(inm);
-	lck_mtx_lock(rnh_lock);
-	LIST_REMOVE(inm, inm_link);
-	lck_mtx_unlock(rnh_lock);
-	FREE(inm, M_IPMADDR);
-}
-
-/*
- * Add an address to the list of IP multicast addresses for a given interface.
- */
-struct in_multi *
-in_addmulti(
-	struct in_addr *ap,
-	struct ifnet *ifp)
+void
+in_purgeaddrs(struct ifnet *ifp)
 {
-	struct in_multi *inm;
-	int error;
-	struct sockaddr_in sin;
-	struct ifmultiaddr *ifma;
+	struct ifaddr **ifap;
+	int err, i;
 
 	/*
-	 * Call generic routine to add membership or increment
-	 * refcount.  It wants addresses in the form of a sockaddr,
-	 * so we build one here (being careful to zero the unused bytes).
+	 * Be nice, and try the civilized way first.  If we can't get
+	 * rid of them this way, then do it the rough way.  We must
+	 * only get here during detach time, after the ifnet has been
+	 * removed from the global list and arrays.
 	 */
-	bzero(&sin, sizeof sin);
-	sin.sin_family = AF_INET;
-	sin.sin_len = sizeof sin;
-	sin.sin_addr = *ap;
-	error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma);
-	if (error) {
-		return 0;
-	}
-
-	/*
-	 * If ifma->ifma_protospec is null, then if_addmulti() created
-	 * a new record.  Otherwise, we are done.
-	 */
-	if (ifma->ifma_protospec != 0) {
-		return ifma->ifma_protospec;
-	}
-
-	inm = (struct in_multi *) _MALLOC(sizeof(*inm), M_IPMADDR, M_WAITOK);
-	if (inm == NULL) {
-		return (NULL);
-	}
-
-	bzero(inm, sizeof *inm);
-	inm->inm_addr = *ap;
-	inm->inm_ifp = ifp;
-	inm->inm_ifma = ifma;
-	lck_mtx_lock(rnh_lock);
-	if (ifma->ifma_protospec == NULL) {
-		ifma->ifma_protospec = inm;
-		ifma->ifma_free = in_free_inm;
-		LIST_INSERT_HEAD(&in_multihead, inm, inm_link);
-	}
-	lck_mtx_unlock(rnh_lock);
-	
-	if (ifma->ifma_protospec != inm) {
-		_FREE(inm, M_IPMADDR);
-		return ifma->ifma_protospec;
-	}
-
-	/*
-	 * Let IGMP know that we have joined a new IP multicast group.
-	 */
-	error = igmp_joingroup(inm);
-	if (error) {
-		char addrbuf[16];
-		
-		/*
-		 * We can't free the inm because someone else may already be
-		 * using it. Once we put it in to ifma->ifma_protospec, it
-		 * must exist as long as the ifma does. Might be nice to flag
-		 * the error so we can try igmp_joingroup the next time through.
-		 */
-		log(LOG_ERR, "igmp_joingroup error %d joining multicast %s on %s%d\n",
-			error, inet_ntop(AF_INET, &sin.sin_addr, addrbuf, sizeof(addrbuf)),
-			ifp->if_name, ifp->if_unit);
-	}
-	
-	return (inm);
-}
-
-/*
- * Delete a multicast address record.
- */
-void
-in_delmulti(
-	struct in_multi **inm)
-{
-	struct in_multi	*inm2;
-	
-	lck_mtx_lock(rnh_lock);
-	LIST_FOREACH(inm2, &in_multihead, inm_link) {
-		if (inm2 == *inm)
-			break;
-	}
-	if (inm2 != *inm) {
-		lck_mtx_unlock(rnh_lock);
-		printf("in_delmulti - ignoring invalid inm (%p)\n", *inm);
-		return;
-	}
-	lck_mtx_unlock(rnh_lock);
-	
-	/* We intentionally do this a bit differently than BSD */
-	if ((*inm)->inm_ifma) {
-		if_delmultiaddr((*inm)->inm_ifma, 0);
-		ifma_release((*inm)->inm_ifma);
+	err = ifnet_get_address_list_family_internal(ifp, &ifap, AF_INET, 1,
+	    M_WAITOK);
+	if (err == 0 && ifap != NULL) {
+		for (i = 0; ifap[i] != NULL; i++) {
+			struct ifaliasreq ifr;
+			struct ifaddr *ifa;
+
+			ifa = ifap[i];
+			bzero(&ifr, sizeof (ifr));
+			IFA_LOCK(ifa);
+			ifr.ifra_addr = *ifa->ifa_addr;
+			if (ifa->ifa_dstaddr != NULL)
+				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
+			IFA_UNLOCK(ifa);
+			err = in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
+			    kernproc);
+			/* if we lost the race, ignore it */
+			if (err == EADDRNOTAVAIL)
+				err = 0;
+			if (err != 0) {
+				char s_addr[MAX_IPv4_STR_LEN];
+				char s_dstaddr[MAX_IPv4_STR_LEN];
+				struct in_addr *s, *d;
+
+				IFA_LOCK(ifa);
+				s = &((struct sockaddr_in *)
+				    ifa->ifa_addr)->sin_addr;
+				d = &((struct sockaddr_in *)
+				    ifa->ifa_dstaddr)->sin_addr;
+				(void) inet_ntop(AF_INET, &s->s_addr, s_addr,
+				    sizeof (s_addr));
+				(void) inet_ntop(AF_INET, &d->s_addr, s_dstaddr,
+				    sizeof (s_dstaddr));
+				IFA_UNLOCK(ifa);
+
+				printf("%s: SIOCDIFADDR ifp=%p ifa_addr=%s "
+				    "ifa_dstaddr=%s (err=%d)\n", __func__, ifp,
+				    s_addr, s_dstaddr, err);
+			}
+		}
+		ifnet_free_address_list(ifap);
+	} else if (err != 0 && err != ENXIO) {
+		printf("%s: error retrieving list of AF_INET addresses for "
+		    "ifp=%p (err=%d)\n", __func__, ifp, err);
 	}
-	*inm = NULL;
 }
 
-#if !NFSCLIENT
 int inet_aton(char *cp, struct in_addr *pin);
 int
 inet_aton(char * cp, struct in_addr * pin)
@@ -1605,7 +1715,19 @@ inet_aton(char * cp, struct in_addr * pin)
     }
     return (TRUE);
 }
-#endif
+
+int inet_ntoa2(struct in_addr * pin, char * cp, const int len);
+int inet_ntoa2(struct in_addr * pin, char * cp, const int len)
+{
+    int ret;
+
+    /* address is in network byte order */
+   ret = snprintf(cp, len, "%u.%u.%u.%u", pin->s_addr & 0xFF, 
+                  (pin->s_addr >> 8) & 0xFF, (pin->s_addr >> 16) & 0xFF,
+                  (pin->s_addr >> 24) & 0xFF);
+
+   return ret < len ? TRUE : FALSE;
+}
 
 /*
  * Called as part of ip_init
@@ -1613,6 +1735,8 @@ inet_aton(char * cp, struct in_addr * pin)
 void
 in_ifaddr_init(void)
 {
+	in_multi_init();
+
 	PE_parse_boot_argn("ifa_debug", &inifa_debug, sizeof (inifa_debug));
 
 	inifa_size = (inifa_debug == 0) ? sizeof (struct in_ifaddr) :
@@ -1620,10 +1744,15 @@ in_ifaddr_init(void)
 
 	inifa_zone = zinit(inifa_size, INIFA_ZONE_MAX * inifa_size,
 	    0, INIFA_ZONE_NAME);
-	if (inifa_zone == NULL)
+	if (inifa_zone == NULL) {
 		panic("%s: failed allocating %s", __func__, INIFA_ZONE_NAME);
-
+		/* NOTREACHED */
+	}
 	zone_change(inifa_zone, Z_EXPAND, TRUE);
+	zone_change(inifa_zone, Z_CALLERACCT, FALSE);
+
+	lck_mtx_init(&inifa_trash_lock, ifa_mtx_grp, ifa_mtx_attr);
+	TAILQ_INIT(&inifa_trash_head);
 }
 
 static struct in_ifaddr *
@@ -1637,11 +1766,14 @@ in_ifaddr_alloc(int how)
 		bzero(inifa, inifa_size);
 		inifa->ia_ifa.ifa_free = in_ifaddr_free;
 		inifa->ia_ifa.ifa_debug |= IFD_ALLOC;
+		ifa_lock_init(&inifa->ia_ifa);
 		if (inifa_debug != 0) {
 			struct in_ifaddr_dbg *inifa_dbg =
 			    (struct in_ifaddr_dbg *)inifa;
 			inifa->ia_ifa.ifa_debug |= IFD_DEBUG;
 			inifa->ia_ifa.ifa_trace = in_ifaddr_trace;
+			inifa->ia_ifa.ifa_attached = in_ifaddr_attached;
+			inifa->ia_ifa.ifa_detached = in_ifaddr_detached;
 			ctrace_record(&inifa_dbg->inifa_alloc);
 		}
 	}
@@ -1651,21 +1783,79 @@ in_ifaddr_alloc(int how)
 static void
 in_ifaddr_free(struct ifaddr *ifa)
 {
-	if (ifa->ifa_refcnt != 0)
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (ifa->ifa_refcnt != 0) {
 		panic("%s: ifa %p bad ref cnt", __func__, ifa);
-	if (!(ifa->ifa_debug & IFD_ALLOC))
+		/* NOTREACHED */
+	} if (!(ifa->ifa_debug & IFD_ALLOC)) {
 		panic("%s: ifa %p cannot be freed", __func__, ifa);
-
+		/* NOTREACHED */
+	}
 	if (ifa->ifa_debug & IFD_DEBUG) {
 		struct in_ifaddr_dbg *inifa_dbg = (struct in_ifaddr_dbg *)ifa;
 		ctrace_record(&inifa_dbg->inifa_free);
 		bcopy(&inifa_dbg->inifa, &inifa_dbg->inifa_old,
 		    sizeof (struct in_ifaddr));
+		if (ifa->ifa_debug & IFD_TRASHED) {
+			/* Become a regular mutex, just in case */
+			IFA_CONVERT_LOCK(ifa);
+			lck_mtx_lock(&inifa_trash_lock);
+			TAILQ_REMOVE(&inifa_trash_head, inifa_dbg,
+			    inifa_trash_link);
+			lck_mtx_unlock(&inifa_trash_lock);
+			ifa->ifa_debug &= ~IFD_TRASHED;
+		}
 	}
+	IFA_UNLOCK(ifa);
+	ifa_lock_destroy(ifa);
 	bzero(ifa, sizeof (struct in_ifaddr));
 	zfree(inifa_zone, ifa);
 }
 
+static void
+in_ifaddr_attached(struct ifaddr *ifa)
+{
+	struct in_ifaddr_dbg *inifa_dbg = (struct in_ifaddr_dbg *)ifa;
+
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
+		panic("%s: ifa %p has no debug structure", __func__, ifa);
+		/* NOTREACHED */
+	}
+	if (ifa->ifa_debug & IFD_TRASHED) {
+		/* Become a regular mutex, just in case */
+		IFA_CONVERT_LOCK(ifa);
+		lck_mtx_lock(&inifa_trash_lock);
+		TAILQ_REMOVE(&inifa_trash_head, inifa_dbg, inifa_trash_link);
+		lck_mtx_unlock(&inifa_trash_lock);
+		ifa->ifa_debug &= ~IFD_TRASHED;
+	}
+}
+
+static void
+in_ifaddr_detached(struct ifaddr *ifa)
+{
+	struct in_ifaddr_dbg *inifa_dbg = (struct in_ifaddr_dbg *)ifa;
+
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
+		panic("%s: ifa %p has no debug structure", __func__, ifa);
+		/* NOTREACHED */
+	} else if (ifa->ifa_debug & IFD_TRASHED) {
+		panic("%s: ifa %p is already in trash list", __func__, ifa);
+		/* NOTREACHED */
+	}
+	ifa->ifa_debug |= IFD_TRASHED;
+	/* Become a regular mutex, just in case */
+	IFA_CONVERT_LOCK(ifa);
+	lck_mtx_lock(&inifa_trash_lock);
+	TAILQ_INSERT_TAIL(&inifa_trash_head, inifa_dbg, inifa_trash_link);
+	lck_mtx_unlock(&inifa_trash_lock);
+}
+
 static void
 in_ifaddr_trace(struct ifaddr *ifa, int refhold)
 {
@@ -1674,9 +1864,10 @@ in_ifaddr_trace(struct ifaddr *ifa, int refhold)
 	u_int32_t idx;
 	u_int16_t *cnt;
 
-	if (!(ifa->ifa_debug & IFD_DEBUG))
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
 		panic("%s: ifa %p has no debug structure", __func__, ifa);
-
+		/* NOTREACHED */
+	}
 	if (refhold) {
 		cnt = &inifa_dbg->inifa_refhold_cnt;
 		tr = inifa_dbg->inifa_refhold;
@@ -1685,6 +1876,6 @@ in_ifaddr_trace(struct ifaddr *ifa, int refhold)
 		tr = inifa_dbg->inifa_refrele;
 	}
 
-	idx = OSAddAtomic16(1, (volatile SInt16 *)cnt) % CTRACE_HIST_SIZE;
+	idx = atomic_add_16_ov(cnt, 1) % INIFA_TRACE_HIST_SIZE;
 	ctrace_record(&tr[idx]);
 }
diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h
index fc38f8401..4e66c26c7 100644
--- a/bsd/netinet/in.h
+++ b/bsd/netinet/in.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -67,6 +67,10 @@
 #include <sys/_types.h>
 #include <stdint.h>		/* uint(8|16|32)_t */
 
+#ifndef KERNEL
+#include <Availability.h>
+#endif
+
 #ifndef _IN_ADDR_T
 #define _IN_ADDR_T
 typedef	__uint32_t	in_addr_t;	/* base type for internet address */
@@ -207,10 +211,11 @@ typedef	__uint16_t	in_port_t;
 #define	IPPROTO_ENCAP		98		/* encapsulation header */
 #define	IPPROTO_APES		99		/* any private encr. scheme */
 #define	IPPROTO_GMTP		100		/* GMTP*/
-#define	IPPROTO_IPCOMP	108		/* payload compression (IPComp) */
 /* 101-254: Partly Unassigned */
 #define	IPPROTO_PIM		103		/* Protocol Independent Mcast */
+#define IPPROTO_IPCOMP		108		/* payload compression (IPComp) */
 #define	IPPROTO_PGM		113		/* PGM */
+#define IPPROTO_SCTP		132		/* SCTP */
 /* 255: Reserved */
 /* BSD Private, local use, namespace incursion */
 #define	IPPROTO_DIVERT		254		/* divert pseudo-protocol */
@@ -341,6 +346,7 @@ struct in_addr {
 #define	IN_BADCLASS(i)		(((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
 
 #define	INADDR_LOOPBACK		(u_int32_t)0x7f000001
+
 #ifndef KERNEL
 #define	INADDR_NONE		0xffffffff		/* -1 return */
 #endif
@@ -348,11 +354,25 @@ struct in_addr {
 #define	INADDR_UNSPEC_GROUP	(u_int32_t)0xe0000000	/* 224.0.0.0 */
 #define	INADDR_ALLHOSTS_GROUP	(u_int32_t)0xe0000001	/* 224.0.0.1 */
 #define	INADDR_ALLRTRS_GROUP	(u_int32_t)0xe0000002	/* 224.0.0.2 */
+#define	INADDR_ALLRPTS_GROUP	(u_int32_t)0xe0000016	/* 224.0.0.22, IGMPv3 */
+#define	INADDR_CARP_GROUP	(u_int32_t)0xe0000012	/* 224.0.0.18 */
+#define	INADDR_PFSYNC_GROUP	(u_int32_t)0xe00000f0	/* 224.0.0.240 */
+#define	INADDR_ALLMDNS_GROUP	(u_int32_t)0xe00000fb	/* 224.0.0.251 */
 #define	INADDR_MAX_LOCAL_GROUP	(u_int32_t)0xe00000ff	/* 224.0.0.255 */
 
 #ifdef __APPLE__
 #define IN_LINKLOCALNETNUM	(u_int32_t)0xA9FE0000 /* 169.254.0.0 */
 #define IN_LINKLOCAL(i)		(((u_int32_t)(i) & IN_CLASSB_NET) == IN_LINKLOCALNETNUM)
+#define IN_LOOPBACK(i)		(((u_int32_t)(i) & 0xff000000) == 0x7f000000)
+#define IN_ZERONET(i)		(((u_int32_t)(i) & 0xff000000) == 0)
+
+#define	IN_PRIVATE(i)	((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
+			 (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
+			 (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
+
+#define	IN_LOCAL_GROUP(i)	(((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
+ 
+#define	IN_ANY_LOCAL(i)		(IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
 #endif
 
 #define	IN_LOOPBACKNET		127			/* official! */
@@ -415,7 +435,9 @@ struct ip_opts {
 #define IP_STRIPHDR      	23   /* bool: drop receive of raw IP header */
 #endif
 #define IP_RECVTTL		24   /* bool; receive reception TTL w/dgram */
-#define	IP_BOUND_IF		25   /* set/get bound interface */
+#define	IP_BOUND_IF		25   /* int; set/get bound interface */
+#define	IP_PKTINFO		26   /* get pktinfo on recv socket, set src on sent dgram  */
+#define	IP_RECVPKTINFO		IP_PKTINFO	/* receive pktinfo w/dgram */
 
 
 #define	IP_FW_ADD     		40   /* add a firewall rule to chain */
@@ -440,24 +462,53 @@ struct ip_opts {
 #define	IP_DUMMYNET_GET		64   /* get entire dummynet pipes */
 
 #define	IP_TRAFFIC_MGT_BACKGROUND	65   /* int*; get background IO flags; set background IO */
+#define	IP_MULTICAST_IFINDEX	66   /* int*; set/get IP multicast i/f index */
+
+/* IPv4 Source Filter Multicast API [RFC3678] */
+#define	IP_ADD_SOURCE_MEMBERSHIP	70   /* join a source-specific group */
+#define	IP_DROP_SOURCE_MEMBERSHIP	71   /* drop a single source */
+#define	IP_BLOCK_SOURCE			72   /* block a source */
+#define	IP_UNBLOCK_SOURCE		73   /* unblock a source */
+
+/* The following option is private; do not use it from user applications. */
+#define	IP_MSFILTER			74   /* set/get filter list */
+
+/* Protocol Independent Multicast API [RFC3678] */
+#define	MCAST_JOIN_GROUP		80   /* join an any-source group */
+#define	MCAST_LEAVE_GROUP		81   /* leave all sources for group */
+#define	MCAST_JOIN_SOURCE_GROUP		82   /* join a source-specific group */
+#define	MCAST_LEAVE_SOURCE_GROUP	83   /* leave a single source */
+#define	MCAST_BLOCK_SOURCE		84   /* block a source */
+#define	MCAST_UNBLOCK_SOURCE		85   /* unblock a source */
 
 #ifdef PRIVATE
 #define	IP_FORCE_OUT_IFP	69   /* deprecated; use IP_BOUND_IF instead */
-#endif
-
-/* Background socket configuration flags */
-#ifdef __APPLE_API_UNSTABLE
-#define TRAFFIC_MGT_SO_BACKGROUND	0x0001	/* background socket */
-#define TRAFFIC_MGT_SO_BG_SUPPRESSED	0x0002	/* currently throttled */
-#define TRAFFIC_MGT_SO_BG_REGULATE	0x0004	/* traffic is regulated */
-#endif /* __APPLE_API_UNSTABLE */
+#define	IP_NO_IFT_CELLULAR	6969 /* for internal use only */
+#define	IP_NO_IFT_PDP		IP_NO_IFT_CELLULAR /* deprecated */
+#define	IP_OUT_IF		9696 /* for internal use only */
+#endif /* PRIVATE */
 
 /*
  * Defaults and limits for options
  */
 #define	IP_DEFAULT_MULTICAST_TTL  1	/* normally limit m'casts to 1 hop  */
 #define	IP_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
-#define	IP_MAX_MEMBERSHIPS	20	/* per socket */
+
+/*
+ * The imo_membership vector for each socket is now dynamically allocated at
+ * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
+ * according to a power-of-two increment.
+ */
+#define	IP_MIN_MEMBERSHIPS	31
+#define	IP_MAX_MEMBERSHIPS	4095
+
+/*
+ * Default resource limits for IPv4 multicast source filtering.
+ * These may be modified by sysctl.
+ */
+#define	IP_MAX_GROUP_SRC_FILTER		512	/* sources per group */
+#define	IP_MAX_SOCK_SRC_FILTER		128	/* sources per socket/group */
+#define	IP_MAX_SOCK_MUTE_FILTER		128	/* XXX no longer used */
 
 /*
  * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
@@ -467,6 +518,105 @@ struct ip_mreq {
 	struct	in_addr imr_interface;	/* local IP address of interface */
 };
 
+/*
+ * Modified argument structure for IP_MULTICAST_IF, obtained from Linux.
+ * This is used to specify an interface index for multicast sends, as
+ * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available).
+ */
+struct ip_mreqn {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_address;	/* local IP address of interface */
+	int		imr_ifindex;	/* Interface index; cast to uint32_t */
+};
+
+#pragma pack(4)
+/*
+ * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678]
+ */
+struct ip_mreq_source {
+	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
+	struct	in_addr imr_sourceaddr;	/* IP address of source */
+	struct	in_addr imr_interface;	/* local IP address of interface */
+};
+
+/*
+ * Argument structures for Protocol-Independent Multicast Source
+ * Filter APIs. [RFC3678]
+ */
+struct group_req {
+	uint32_t		gr_interface;	/* interface index */
+	struct sockaddr_storage	gr_group;	/* group address */
+};
+
+struct group_source_req {
+	uint32_t		gsr_interface;	/* interface index */
+	struct sockaddr_storage	gsr_group;	/* group address */
+	struct sockaddr_storage	gsr_source;	/* source address */
+};
+
+#ifndef __MSFILTERREQ_DEFINED
+#define __MSFILTERREQ_DEFINED
+/*
+ * The following structure is private; do not use it from user applications.
+ * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between
+ * the RFC 3678 libc functions and the kernel.
+ */
+struct __msfilterreq {
+	uint32_t		 msfr_ifindex;	/* interface index */
+	uint32_t		 msfr_fmode;	/* filter mode for group */
+	uint32_t		 msfr_nsrcs;	/* # of sources in msfr_srcs */
+	uint32_t		__msfr_align;	
+	struct sockaddr_storage	 msfr_group;	/* group address */
+	struct sockaddr_storage	*msfr_srcs;
+};
+
+#ifdef XNU_KERNEL_PRIVATE
+struct __msfilterreq32 {
+	uint32_t		 msfr_ifindex;	/* interface index */
+	uint32_t		 msfr_fmode;	/* filter mode for group */
+	uint32_t		 msfr_nsrcs;	/* # of sources in msfr_srcs */
+	uint32_t		__msfr_align;	
+	struct sockaddr_storage	 msfr_group;	/* group address */
+	user32_addr_t		 msfr_srcs;
+};
+
+struct __msfilterreq64 {
+	uint32_t		 msfr_ifindex;	/* interface index */
+	uint32_t		 msfr_fmode;	/* filter mode for group */
+	uint32_t		 msfr_nsrcs;	/* # of sources in msfr_srcs */
+	uint32_t		__msfr_align;	
+	struct sockaddr_storage	 msfr_group;	/* group address */
+	user64_addr_t		 msfr_srcs;
+};
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* __MSFILTERREQ_DEFINED */
+
+#pragma pack()
+struct sockaddr;
+
+#ifndef KERNEL
+/*
+ * Advanced (Full-state) APIs [RFC3678]
+ * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter().
+ * We use uint32_t here to be consistent.
+ */
+int	setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t,
+	    uint32_t, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+int	getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *,
+	    uint32_t *, struct in_addr *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+int	setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+	    uint32_t, uint32_t, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+int	getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+	    uint32_t *, uint32_t *, struct sockaddr_storage *) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_NA);
+#endif
+
+/*
+ * Filter modes; also used to represent per-socket filter mode internally.
+ */
+#define	MCAST_UNDEFINED	0	/* fmode: not yet defined */
+#define	MCAST_INCLUDE	1	/* fmode: include these source(s) */
+#define	MCAST_EXCLUDE	2	/* fmode: exclude these source(s) */
+
 /*
  * Argument for IP_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
@@ -476,6 +626,31 @@ struct ip_mreq {
 #define	IP_PORTRANGE_LOW	2	/* "low" - vouchsafe security */
 
 
+/*
+ * IP_PKTINFO: Packet information (equivalent to  RFC2292 sec 5 for IPv4)
+ * This structure is used for 
+ *
+ * 1) Receiving ancilliary data about the datagram if IP_PKTINFO sockopt is 
+ *    set on the socket. In this case ipi_ifindex will contain the interface
+ *    index the datagram was received on, ipi_addr is the IP address the 
+ *    datagram was received to.
+ *
+ * 2) Sending a datagram using a specific interface or IP source address.
+ *    if ipi_ifindex is set to non-zero when in_pktinfo is passed as 
+ *    ancilliary data of type IP_PKTINFO, this will be used as the source
+ *    interface to send the datagram from. If ipi_ifindex is null, ip_spec_dst
+ *    will be used for the source address.
+ *
+ *    Note: if IP_BOUND_IF is set on the socket, ipi_ifindex in the ancillary
+ *    IP_PKTINFO option silently overrides the bound interface when it is
+ *    specified during send time.
+ */
+struct in_pktinfo {
+	unsigned int	ipi_ifindex;	/* send/recv interface index */
+	struct in_addr	ipi_spec_dst;	/* Local address */
+	struct in_addr	ipi_addr;	/* IP Header dst address */
+};
+
 /*
  * Definitions for inet sysctl operations.
  *
@@ -616,6 +791,11 @@ extern int in_localaddr(struct in_addr);
 extern u_int32_t in_netof(struct in_addr);
 
 extern int inaddr_local(struct in_addr);
+
+#define	in_hosteq(s, t)	((s).s_addr == (t).s_addr)
+#define	in_nullhost(x)	((x).s_addr == INADDR_ANY)
+#define	in_allhosts(x)	((x).s_addr == htonl(INADDR_ALLHOSTS_GROUP))
+
 #endif /* KERNEL_PRIVATE */
 #define MAX_IPv4_STR_LEN	16
 #define MAX_IPv6_STR_LEN	64
diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c
index 886528306..8a4dfcd14 100644
--- a/bsd/netinet/in_arp.c
+++ b/bsd/netinet/in_arp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,11 +70,14 @@
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
 #include <string.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/dlil.h>
 #include <net/if_types.h>
+#include <net/if_llreach.h>
 #include <net/route.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_var.h>
@@ -83,7 +86,6 @@
 #define	SA(p) ((struct sockaddr *)(p))
 #define SIN(s) ((struct sockaddr_in *)s)
 #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen))
-#define	rt_expire rt_rmx.rmx_expire
 #define	equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 
 static const size_t MAX_HW_LEN = 10;
@@ -100,16 +102,26 @@ static int arpt_down = 20;	/* once declared down, don't send for 20 sec */
 int apple_hwcksum_tx = 1;
 int apple_hwcksum_rx = 1;
 
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
-	   &arpt_prune, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, 
-	   &arpt_keep, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW,
-	   &arpt_down, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx, CTLFLAG_RW,
-	   &apple_hwcksum_tx, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, CTLFLAG_RW,
-	   &apple_hwcksum_rx, 0, "");
+static int arp_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, "");
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, "");
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, "");
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_tx, 0, "");
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_rx, 0, "");
+
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, LL_BASE_REACHABLE,
+    "default ARP link-layer reachability max lifetime (in seconds)");
 
 struct llinfo_arp {
 	/*
@@ -121,7 +133,10 @@ struct llinfo_arp {
 	 * The following are protected by rt_lock
 	 */
 	struct	mbuf *la_hold;		/* last packet until resolved/timeout */
-	int32_t	la_asked;		/* last time we QUERIED for this addr */
+	struct	if_llreach *la_llreach;	/* link-layer reachability record */
+	u_int64_t la_lastused;		/* last used timestamp */
+	u_int32_t la_asked;		/* # of requests sent */
+	u_int32_t la_persist;		/* expirable, but stays around */
 };
 
 /*
@@ -140,7 +155,7 @@ struct llinfo_arp {
  *
  *	- Routing lock (rnh_lock)
  *
- * la_hold, la_asked
+ * la_hold, la_asked, la_llreach, la_lastused
  *
  *	- Routing entry lock (rt_lock)
  *
@@ -153,33 +168,36 @@ static LIST_HEAD(, llinfo_arp) llinfo_arp;
 
 static int	arp_inuse, arp_allocated;
 
-static int	arp_maxtries = 5;
+static u_int32_t arp_maxtries = 5;
 static int	useloopback = 1; /* use loopback interface for local traffic */
 static int	arp_proxyall = 0;
 static int	arp_sendllconflict = 0;
 
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &arp_maxtries, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &useloopback, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &arp_proxyall, 0, "");
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &arp_sendllconflict, 0, "");
 
-static int log_arp_warnings = 0;
+static int log_arp_warnings = 0;	/* Thread safe: no accumulated state */
 
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings, CTLFLAG_RW,
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings,
+	CTLFLAG_RW | CTLFLAG_LOCKED,
 	&log_arp_warnings, 0,
 	"log arp warning messages");
 
-static int keep_announcements = 1;
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements, CTLFLAG_RW,
+static int keep_announcements = 1;	/* Thread safe: no aging of state */
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, keep_announcements,
+	CTLFLAG_RW | CTLFLAG_LOCKED,
 	&keep_announcements, 0,
 	"keep arp announcements");
 
-static int send_conflicting_probes = 1;
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes, CTLFLAG_RW,
+static int send_conflicting_probes = 1;	/* Thread safe: no accumulated state */
+SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes,
+	CTLFLAG_RW | CTLFLAG_LOCKED,
 	&send_conflicting_probes, 0,
 	"send conflicting link-local arp probes");
 
@@ -188,6 +206,13 @@ static errno_t arp_lookup_route(const struct in_addr *, int,
 static void arptimer(void *);
 static struct llinfo_arp *arp_llinfo_alloc(void);
 static void arp_llinfo_free(void *);
+static void arp_llinfo_purge(struct rtentry *);
+static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
+
+static __inline void arp_llreach_use(struct llinfo_arp *);
+static __inline int arp_llreach_reachable(struct llinfo_arp *);
+static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *,
+    unsigned int, boolean_t);
 
 extern u_int32_t	ipv4_ll_arp_aware;
 
@@ -214,6 +239,7 @@ arp_init(void)
 		panic("%s: failed allocating llinfo_arp_zone", __func__);
 
 	zone_change(llinfo_arp_zone, Z_EXPAND, TRUE);
+	zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE);
 
 	arpinit_done = 1;
 
@@ -243,9 +269,194 @@ arp_llinfo_free(void *arg)
 		la->la_hold = NULL;
 	}
 
+	/* Purge any link-layer info caching */
+	VERIFY(la->la_rt->rt_llinfo == la);
+	if (la->la_rt->rt_llinfo_purge != NULL)
+		la->la_rt->rt_llinfo_purge(la->la_rt);
+
 	zfree(llinfo_arp_zone, la);
 }
 
+static void
+arp_llinfo_purge(struct rtentry *rt)
+{
+	struct llinfo_arp *la = rt->rt_llinfo;
+
+	RT_LOCK_ASSERT_HELD(rt);
+	VERIFY(rt->rt_llinfo_purge == arp_llinfo_purge && la != NULL);
+
+	if (la->la_llreach != NULL) {
+		RT_CONVERT_LOCK(rt);
+		ifnet_llreach_free(la->la_llreach);
+		la->la_llreach = NULL;
+	}
+	la->la_lastused = 0;
+}
+
+static void
+arp_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
+{
+	struct llinfo_arp *la = rt->rt_llinfo;
+	struct if_llreach *lr = la->la_llreach;
+
+	if (lr == NULL) {
+		bzero(ri, sizeof (*ri));
+	} else {
+		IFLR_LOCK(lr);
+		/* Export to rt_reach_info structure */
+		ifnet_lr2ri(lr, ri);
+		/* Export ARP send expiration time */
+		ri->ri_snd_expire = ifnet_llreach_up2cal(lr, la->la_lastused);
+		IFLR_UNLOCK(lr);
+	}
+}
+
+void
+arp_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
+{
+	/* Nothing more to do if it's disabled */
+	if (arp_llreach_base == 0)
+		return;
+
+	ifnet_llreach_set_reachable(ifp, ETHERTYPE_IP, addr, alen);
+}
+
+static __inline void
+arp_llreach_use(struct llinfo_arp *la)
+{
+	if (la->la_llreach != NULL)
+		la->la_lastused = net_uptime();
+}
+
+static __inline int
+arp_llreach_reachable(struct llinfo_arp *la)
+{
+	struct if_llreach *lr;
+	const char *why = NULL;
+
+	/* Nothing more to do if it's disabled; pretend it's reachable  */
+	if (arp_llreach_base == 0)
+		return (1);
+
+	if ((lr = la->la_llreach) == NULL) {
+		/*
+		 * Link-layer reachability record isn't present for this
+		 * ARP entry; pretend it's reachable and use it as is.
+		 */
+		return (1);
+	} else if (ifnet_llreach_reachable(lr)) {
+		/*
+		 * Record is present, it's not shared with other ARP
+		 * entries and a packet has recently been received
+		 * from the remote host; consider it reachable.
+		 */
+		if (lr->lr_reqcnt == 1)
+			return (1);
+
+		/* Prime it up, if this is the first time */
+		if (la->la_lastused == 0) {
+			VERIFY(la->la_llreach != NULL);
+			arp_llreach_use(la);
+		}
+
+		/*
+		 * Record is present and shared with one or more ARP
+		 * entries, and a packet has recently been received
+		 * from the remote host.  Since it's shared by more
+		 * than one IP addresses, we can't rely on the link-
+		 * layer reachability alone; consider it reachable if
+		 * this ARP entry has been used "recently."
+		 */
+		if (ifnet_llreach_reachable_delta(lr, la->la_lastused))
+			return (1);
+
+		why = "has alias(es) and hasn't been used in a while";
+	} else {
+		why = "haven't heard from it in a while";
+	}
+
+	if (log_arp_warnings) {
+		char tmp[MAX_IPv4_STR_LEN];
+		u_int64_t now = net_uptime();
+
+		log(LOG_DEBUG, "%s%d: ARP probe(s) needed for %s; "
+		    "%s [lastused %lld, lastrcvd %lld] secs ago\n",
+		    lr->lr_ifp->if_name, lr->lr_ifp->if_unit, inet_ntop(AF_INET,
+		    &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why,
+		    (la->la_lastused ?  (int64_t)(now - la->la_lastused) : -1),
+		    (lr->lr_lastrcvd ?  (int64_t)(now - lr->lr_lastrcvd) : -1));
+
+	}
+	return (0);
+}
+
+/*
+ * Obtain a link-layer source cache entry for the sender.
+ *
+ * NOTE: This is currently only for ARP/Ethernet.
+ */
+static void
+arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
+    unsigned int alen, boolean_t solicited)
+{
+	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+	if (arp_llreach_base != 0 &&
+	    rt->rt_expire != 0 && rt->rt_ifp != lo_ifp &&
+	    ifp->if_addrlen == IF_LLREACH_MAXLEN &&	/* Ethernet */
+	    alen == ifp->if_addrlen) {
+		struct llinfo_arp *la = rt->rt_llinfo;
+		struct if_llreach *lr;
+		const char *why = NULL, *type = "";
+
+		/* Become a regular mutex, just in case */
+		RT_CONVERT_LOCK(rt);
+
+		if ((lr = la->la_llreach) != NULL) {
+			type = (solicited ? "ARP reply" : "ARP announcement");
+			/*
+			 * If target has changed, create a new record;
+			 * otherwise keep existing record.
+			 */
+			IFLR_LOCK(lr);
+			if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
+				IFLR_UNLOCK(lr);
+				/* Purge any link-layer info caching */
+				VERIFY(rt->rt_llinfo_purge != NULL);
+				rt->rt_llinfo_purge(rt);
+				lr = NULL;
+				why = " for different target HW address; "
+				    "using new llreach record";
+			} else {
+				lr->lr_probes = 0;	/* reset probe count */
+				IFLR_UNLOCK(lr);
+				if (solicited) {
+					why = " for same target HW address; "
+					    "keeping existing llreach record";
+				}
+			}
+		}
+
+		if (lr == NULL) {
+			lr = la->la_llreach = ifnet_llreach_alloc(ifp,
+			    ETHERTYPE_IP, addr, alen, arp_llreach_base);
+			if (lr != NULL) {
+				lr->lr_probes = 0;	/* reset probe count */
+				if (why == NULL)
+					why = "creating new llreach record";
+			}
+		}
+
+		if (log_arp_warnings && lr != NULL && why != NULL) {
+			char tmp[MAX_IPv4_STR_LEN];
+
+			log(LOG_DEBUG, "%s%d: %s%s for %s\n", ifp->if_name,
+			    ifp->if_unit, type, why, inet_ntop(AF_INET,
+			    &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp)));
+		}
+	}
+}
+
 /*
  * Free an arp entry.
  */
@@ -264,6 +475,16 @@ arptfree(struct llinfo_arp *la)
 		la->la_asked = 0;
 		rt->rt_flags &= ~RTF_REJECT;
 		RT_UNLOCK(rt);
+	} else if (la->la_persist) {
+		/*
+		 * Instead of issuing RTM_DELETE, stop this route entry
+		 * from holding an interface idle reference count; if
+		 * the route is later reused, arp_validate() will revert
+		 * this action.
+		 */
+		if (rt->rt_refcnt == 0)
+			rt_clear_idleref(rt);
+		RT_UNLOCK(rt);
 	} else {
 		/*
 		 * Safe to drop rt_lock and use rt_key, since holding
@@ -281,16 +502,18 @@ in_arpdrain(void *ignored_arg)
 {
 #pragma unused (ignored_arg)
 	struct llinfo_arp *la, *ola;
-	struct timeval timenow;
+	uint64_t timenow;
 
 	lck_mtx_lock(rnh_lock);
 	la = llinfo_arp.lh_first;
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 	while ((ola = la) != 0) {
 		struct rtentry *rt = la->la_rt;
 		la = la->la_le.le_next;
 		RT_LOCK(rt);
-		if (rt->rt_expire && rt->rt_expire <= timenow.tv_sec)
+		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+		if (rt->rt_expire && rt->rt_expire <= timenow)
 			arptfree(ola); /* timer has expired, clear */
 		else
 			RT_UNLOCK(rt);
@@ -298,6 +521,20 @@ in_arpdrain(void *ignored_arg)
 	lck_mtx_unlock(rnh_lock);
 }
 
+void
+arp_validate(struct rtentry *rt)
+{
+	struct llinfo_arp *la = rt->rt_llinfo;
+
+	RT_LOCK_ASSERT_HELD(rt);
+	/*
+	 * If this is a persistent ARP entry, make it count towards the
+	 * interface idleness just like before arptfree() was called.
+	 */
+	if (la->la_persist)
+		rt_set_idleref(rt);
+}
+
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
@@ -322,7 +559,7 @@ arp_rtrequest(
 	struct sockaddr *gate = rt->rt_gateway;
 	struct llinfo_arp *la = rt->rt_llinfo;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}};
-	struct timeval timenow;
+	uint64_t timenow;
 
 	if (!arpinit_done) {
 		panic("%s: ARP has not been initialized", __func__);
@@ -333,7 +570,7 @@ arp_rtrequest(
 
 	if (rt->rt_flags & RTF_GATEWAY)
 		return;
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 	switch (req) {
 
 	case RTM_ADD:
@@ -358,12 +595,14 @@ arp_rtrequest(
 				 * In case we're called before 1.0 sec.
 				 * has elapsed.
 				 */
-				rt->rt_expire = MAX(timenow.tv_sec, 1);
+				rt_setexpire(rt, MAX(timenow, 1));
 			}
 			break;
 		}
 		/* Announce a new entry if requested. */
 		if (rt->rt_flags & RTF_ANNOUNCE) {
+			if (la != NULL)
+				arp_llreach_use(la); /* Mark use timestamp */
 			RT_UNLOCK(rt);
 			dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
 			    SDL(gate), rt_key(rt), NULL, rt_key(rt));
@@ -391,6 +630,8 @@ arp_rtrequest(
 				log(LOG_DEBUG, "%s: malloc failed\n", __func__);
 			break;
 		}
+		rt->rt_llinfo_get_ri = arp_llinfo_get_ri;
+		rt->rt_llinfo_purge = arp_llinfo_purge;
 		rt->rt_llinfo_free = arp_llinfo_free;
 
 		arp_inuse++, arp_allocated++;
@@ -402,14 +643,16 @@ arp_rtrequest(
 		/*
 		 * This keeps the multicast addresses from showing up
 		 * in `arp -a' listings as unresolved.  It's not actually
-		 * functional.  Then the same for broadcast.
+		 * functional.  Then the same for broadcast.  For IPv4
+		 * link-local address, keep the entry around even after
+		 * it has expired.
 		 */
 		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
 			RT_UNLOCK(rt);
 			dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
 			    sizeof(struct sockaddr_dl));
 			RT_LOCK(rt);
-			rt->rt_expire = 0;
+			rt_setexpire(rt, 0);
 		}
 		else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
 			struct sockaddr_dl	*gate_ll = SDL(gate);
@@ -421,35 +664,60 @@ arp_rtrequest(
 			gate_ll->sdl_family = AF_LINK;
 			gate_ll->sdl_len = sizeof(struct sockaddr_dl);
 			/* In case we're called before 1.0 sec. has elapsed */
-			rt->rt_expire = MAX(timenow.tv_sec, 1);
+			rt_setexpire(rt, MAX(timenow, 1));
+		} else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
+			/*
+			 * The persistent bit implies that once the ARP
+			 * entry has reached it expiration time, the idle
+			 * reference count to the interface will be released,
+			 * but the ARP entry itself stays in the routing table
+			 * until it is explicitly removed.
+			 */
+			la->la_persist = 1;
+			rt->rt_flags |= RTF_STATIC;
 		}
 
+		/* Become a regular mutex, just in case */
+		RT_CONVERT_LOCK(rt);
+		IFA_LOCK_SPIN(rt->rt_ifa);
 		if (SIN(rt_key(rt))->sin_addr.s_addr ==
 		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
-		    /*
-		     * This test used to be
-		     *	if (loif.if_flags & IFF_UP)
-		     * It allowed local traffic to be forced
-		     * through the hardware by configuring the loopback down.
-		     * However, it causes problems during network configuration
-		     * for boards that can't receive packets they send.
-		     * It is now necessary to clear "useloopback" and remove
-		     * the route to force traffic out to the hardware.
-		     */
-			rt->rt_expire = 0;
-			ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
+			IFA_UNLOCK(rt->rt_ifa);
+			/*
+			 * This test used to be
+			 *	if (loif.if_flags & IFF_UP)
+			 * It allowed local traffic to be forced through the
+			 * hardware by configuring the loopback down.  However,
+			 * it causes problems during network configuration
+			 * for boards that can't receive packets they send.
+			 * It is now necessary to clear "useloopback" and
+			 * remove the route to force traffic out to the
+			 * hardware.
+			 */
+			rt_setexpire(rt, 0);
+			ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)),
+			    SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
 			if (useloopback) {
-#if IFNET_ROUTE_REFCNT
-				/* Adjust route ref count for the interfaces */
-				if (rt->rt_if_ref_fn != NULL &&
-				    rt->rt_ifp != lo_ifp) {
-					rt->rt_if_ref_fn(lo_ifp, 1);
-					rt->rt_if_ref_fn(rt->rt_ifp, -1);
+				if (rt->rt_ifp != lo_ifp) {
+					/*
+					 * Purge any link-layer info caching.
+					 */
+					if (rt->rt_llinfo_purge != NULL)
+						rt->rt_llinfo_purge(rt);
+
+					/*
+					 * Adjust route ref count for the
+					 * interfaces.
+					 */
+					if (rt->rt_if_ref_fn != NULL) {
+						rt->rt_if_ref_fn(lo_ifp, 1);
+						rt->rt_if_ref_fn(rt->rt_ifp, -1);
+					}
 				}
-#endif /* IFNET_ROUTE_REFCNT */
 				rt->rt_ifp = lo_ifp;
 			}
-
+		} else {
+			IFA_UNLOCK(rt->rt_ifa);
 		}
 		break;
 
@@ -466,10 +734,18 @@ arp_rtrequest(
 		LIST_REMOVE(la, la_le);
 		la->la_le.le_next = NULL;
 		la->la_le.le_prev = NULL;
+
+		/*
+		 * Purge any link-layer info caching.
+		 */
+		if (rt->rt_llinfo_purge != NULL)
+			rt->rt_llinfo_purge(rt);
+
 		rt->rt_flags &= ~RTF_LLINFO;
-		if (la->la_hold != NULL)
+		if (la->la_hold != NULL) {
 			m_freem(la->la_hold);
-		la->la_hold = NULL;
+			la->la_hold = NULL;
+		}
 	}
 }
 
@@ -518,6 +794,13 @@ arp_lookup_route(const struct in_addr *addr, int create, int proxy,
 	sin.sin_addr.s_addr = addr->s_addr;
 	sin.sin_other = proxy ? SIN_PROXY : 0;
 
+	/*
+	 * If the destination is a link-local address, don't
+	 * constrain the lookup (don't scope it).
+	 */
+	if (IN_LINKLOCAL(ntohl(addr->s_addr)))
+		ifscope = IFSCOPE_NONE;
+
 	rt = rtalloc1_scoped((struct sockaddr*)&sin, create, 0, ifscope);
 	if (rt == NULL)
 		return (ENETUNREACH);
@@ -592,7 +875,7 @@ __private_extern__ errno_t
 arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0,
      route_t *out_route)
 {
-	struct timeval timenow;
+	uint64_t timenow;
 	route_t rt = hint0, hint = hint0;
 	errno_t error = 0;
 
@@ -728,9 +1011,11 @@ lookup:
 		}
 
 		if (rt->rt_flags & RTF_REJECT) {
-			getmicrotime(&timenow);
-			if (rt->rt_rmx.rmx_expire == 0 ||
-			    timenow.tv_sec < rt->rt_rmx.rmx_expire) {
+			VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+			VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+			timenow = net_uptime();
+			if (rt->rt_expire == 0 ||
+			    timenow < rt->rt_expire) {
 				RT_UNLOCK(rt);
 				senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH);
 			}
@@ -774,8 +1059,9 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
 	route_t	route = NULL;	/* output route */
 	errno_t	result = 0;
 	struct sockaddr_dl	*gateway;
-	struct llinfo_arp	*llinfo;
-	struct timeval timenow;
+	struct llinfo_arp	*llinfo = NULL;
+	uint64_t timenow;
+	int unreachable = 0;
 
 	if (net_dest->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
@@ -849,7 +1135,7 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
 			RT_LOCK_ASSERT_HELD(route);
 	}
 
-	if (result || route == NULL || route->rt_llinfo == NULL) {
+	if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) {
 		char	tmp[MAX_IPv4_STR_LEN];
 
 		/* In case result is 0 but no route, return an error */
@@ -868,13 +1154,22 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
 	 * Now that we have the right route, is it filled in?
 	 */
 	gateway = SDL(route->rt_gateway);
-	getmicrotime(&timenow);
-	if ((route->rt_rmx.rmx_expire == 0 ||
-	    route->rt_rmx.rmx_expire > timenow.tv_sec) && gateway != NULL &&
-	    gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0) {
+	timenow = net_uptime();
+	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
+	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
+	if ((route->rt_expire == 0 ||
+	    route->rt_expire > timenow) && gateway != NULL &&
+	    gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0 &&
+	    !(unreachable = !arp_llreach_reachable(llinfo))) {
 		bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len));
 		result = 0;
+		arp_llreach_use(llinfo);	/* Mark use timestamp */
 		goto release;
+	} else if (unreachable) {
+		/*
+		 * Discard existing answer in case we need to probe.
+		 */
+		gateway->sdl_alen = 0;
 	}
 
 	if (ifp->if_flags & IFF_NOARP) {
@@ -885,34 +1180,51 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest,
 	/*
 	 * Route wasn't complete/valid. We need to arp.
 	 */
-	llinfo = route->rt_llinfo;
 	if (packet != NULL) {
 		if (llinfo->la_hold != NULL)
 			m_freem(llinfo->la_hold);
 		llinfo->la_hold = packet;
 	}
 
-	if (route->rt_rmx.rmx_expire) {
+	if (route->rt_expire) {
 		route->rt_flags &= ~RTF_REJECT;
 		if (llinfo->la_asked == 0 ||
-		    route->rt_rmx.rmx_expire != timenow.tv_sec) {
-			route->rt_rmx.rmx_expire = timenow.tv_sec;
+		    route->rt_expire != timenow) {
+			rt_setexpire(route, timenow);
 			if (llinfo->la_asked++ < arp_maxtries) {
 				struct ifaddr *rt_ifa = route->rt_ifa;
-				ifaref(rt_ifa);
+				struct sockaddr *sa;
+
+				/* Become a regular mutex, just in case */
+				RT_CONVERT_LOCK(route);
+				/* Update probe count, if applicable */
+				if (llinfo->la_llreach != NULL) {
+					IFLR_LOCK_SPIN(llinfo->la_llreach);
+					llinfo->la_llreach->lr_probes++;
+					IFLR_UNLOCK(llinfo->la_llreach);
+				}
+				IFA_LOCK_SPIN(rt_ifa);
+				IFA_ADDREF_LOCKED(rt_ifa);
+				sa = rt_ifa->ifa_addr;
+				IFA_UNLOCK(rt_ifa);
+				arp_llreach_use(llinfo); /* Mark use timestamp */
 				RT_UNLOCK(route);
 				dlil_send_arp(ifp, ARPOP_REQUEST, NULL,
-				    rt_ifa->ifa_addr, NULL,
-				    (const struct sockaddr*)net_dest);
-				ifafree(rt_ifa);
+				    sa, NULL, (const struct sockaddr*)net_dest);
+				IFA_REMREF(rt_ifa);
 				RT_LOCK(route);
 				result = EJUSTRETURN;
 				goto release;
 			} else {
 				route->rt_flags |= RTF_REJECT;
-				route->rt_rmx.rmx_expire = rt_expiry(route,
-				    route->rt_rmx.rmx_expire, arpt_down);
+				rt_setexpire(route, rt_expiry(route,
+				    route->rt_expire, arpt_down));
 				llinfo->la_asked = 0;
+				/*
+				 * Clear la_hold; don't free the packet since
+				 * we're not returning EJUSTRETURN; the caller
+				 * will handle the freeing.
+				 */
 				llinfo->la_hold = NULL;
 				result = EHOSTUNREACH;
 				goto release;
@@ -950,52 +1262,61 @@ arp_ip_handle_input(
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct in_ifaddr *best_ia = NULL;
+	struct sockaddr_in best_ia_sin;
 	route_t	route = NULL;
 	char buf[3 * MAX_HW_LEN]; // enough for MAX_HW_LEN byte hw address
 	struct llinfo_arp *llinfo;
 	errno_t	error;
 	int created_announcement = 0;
 	int bridged = 0, is_bridge = 0;
-	
+
 	/* Do not respond to requests for 0.0.0.0 */
 	if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST)
 		goto done;
-	
- 	if (ifp->if_bridge)
+
+	if (ifp->if_bridge)
 		bridged = 1;
 	if (ifp->if_type == IFT_BRIDGE)
 		is_bridge = 1;
 
 	/*
 	 * Determine if this ARP is for us
-	 * For a bridge, we want to check the address irrespective 
+	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface.
 	 */
 	lck_rw_lock_shared(in_ifaddr_rwlock);
 	TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
-			(ia->ia_ifp == ifp)) &&
+		    (ia->ia_ifp == ifp)) &&
 		    ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) {
-				best_ia = ia;
-				ifaref(&best_ia->ia_ifa);
-				lck_rw_done(in_ifaddr_rwlock);
-				goto match;
+			best_ia = ia;
+			best_ia_sin = best_ia->ia_addr;
+			IFA_ADDREF_LOCKED(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+			lck_rw_done(in_ifaddr_rwlock);
+			goto match;
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 
 	TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
-			(ia->ia_ifp == ifp)) &&
+		    (ia->ia_ifp == ifp)) &&
 		    ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) {
-				best_ia = ia;
-				ifaref(&best_ia->ia_ifa);
-				lck_rw_done(in_ifaddr_rwlock);
-				goto match;
+			best_ia = ia;
+			best_ia_sin = best_ia->ia_addr;
+			IFA_ADDREF_LOCKED(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+			lck_rw_done(in_ifaddr_rwlock);
+			goto match;
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 
-#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)								\
-	(ia->ia_ifp->if_bridge == ifp->if_softc &&								\
+#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)					\
+	(ia->ia_ifp->if_bridge == ifp->if_softc &&				\
 	!bcmp(ifnet_lladdr(ia->ia_ifp), ifnet_lladdr(ifp), ifp->if_addrlen) &&	\
 	addr == ia->ia_addr.sin_addr.s_addr)
 	/*
@@ -1005,14 +1326,20 @@ arp_ip_handle_input(
 	 * meant to be destined to the bridge member.
 	 */
 	if (is_bridge) {
-		TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) {
-			if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr, ifp, ia)) {
+		TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr),
+		    ia_hash) {
+			IFA_LOCK_SPIN(&ia->ia_ifa);
+			if (BDG_MEMBER_MATCHES_ARP(target_ip->sin_addr.s_addr,
+			    ifp, ia)) {
 				ifp = ia->ia_ifp;
 				best_ia = ia;
-				ifaref(&best_ia->ia_ifa);
+				best_ia_sin = best_ia->ia_addr;
+				IFA_ADDREF_LOCKED(&ia->ia_ifa);
+				IFA_UNLOCK(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 				goto match;
 			}
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
 	}
 	lck_rw_done(in_ifaddr_rwlock);
@@ -1024,10 +1351,15 @@ arp_ip_handle_input(
 	 */
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-		if (ifa->ifa_addr->sa_family != AF_INET)
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		best_ia = (struct in_ifaddr *)ifa;
-		ifaref(&best_ia->ia_ifa);
+		best_ia_sin = best_ia->ia_addr;
+		IFA_ADDREF_LOCKED(ifa);
+		IFA_UNLOCK(ifa);
 		ifnet_lock_done(ifp);
 		goto match;
 	}
@@ -1042,15 +1374,17 @@ arp_ip_handle_input(
 
 match:
 	/* If the packet is from this interface, ignore the packet */
-	if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_len)) {
+	if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_alen)) {
 		goto done;
 	}
 
 	/* Check for a conflict */
-	if (!bridged && sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) {
+	if (!bridged && sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) {
 		struct kev_msg        ev_msg;
 		struct kev_in_collision	*in_collision;
 		u_char	storage[sizeof(struct kev_in_collision) + MAX_HW_LEN];
+		bzero(&ev_msg, sizeof(struct kev_msg));
+		bzero(storage, (sizeof(struct kev_in_collision) + MAX_HW_LEN));
 		in_collision = (struct kev_in_collision*)storage;
 		log(LOG_ERR, "%s%d duplicate IP address %s sent from address %s\n",
 			ifp->if_name, ifp->if_unit,
@@ -1083,7 +1417,7 @@ match:
 	 * entry locked, upon success.
 	 */
 	error = arp_lookup_route(&sender_ip->sin_addr,
-	    (target_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr &&
+	    (target_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr &&
 	    sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index);
 
 	if (error == 0)
@@ -1142,6 +1476,9 @@ match:
 						sdl_addr_to_hex(sender_hw, buf, sizeof(buf)),
 						ifp->if_name, ifp->if_unit);
 					}
+					/* Mark use timestamp */
+					if (route->rt_llinfo != NULL)
+						arp_llreach_use(route->rt_llinfo);
 					/* We're done with the route */
 					RT_REMREF_LOCKED(route);
 					RT_UNLOCK(route);
@@ -1152,21 +1489,19 @@ match:
 					 * This will not force the device to pick a new number if the device
 					 * has already assigned that number.
 					 * This will not imply to the device that we own that address.
+					 * The link address is always present; it's never freed.
 					 */
 					ifnet_lock_shared(ifp);
-					ifa = TAILQ_FIRST(&ifp->if_addrhead);
-					if (ifa != NULL)
-						ifaref(ifa);
+					ifa = ifp->if_lladdr;
+					IFA_ADDREF(ifa);
 					ifnet_lock_done(ifp);
 					dlil_send_arp_internal(ifp, ARPOP_REQUEST,
-						ifa != NULL ? SDL(ifa->ifa_addr) : NULL,
+						SDL(ifa->ifa_addr),
 						(const struct sockaddr*)sender_ip, sender_hw,
 						(const struct sockaddr*)target_ip);
-					if (ifa != NULL) {
-						ifafree(ifa);
-						ifa = NULL;
-					}
-			 	}
+					IFA_REMREF(ifa);
+					ifa = NULL;
+				}
 			}
 			goto respond;
 		} else if (keep_announcements != 0
@@ -1203,6 +1538,8 @@ match:
 	}
 
 	RT_LOCK_ASSERT_HELD(route);
+	VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0);
+	VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0);
 	gateway = SDL(route->rt_gateway);
 	if (!bridged && route->rt_ifp != ifp) {
 		if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) {
@@ -1218,7 +1555,7 @@ match:
 		}
 		else {
 			/* Don't change a permanent address */
-			if (route->rt_rmx.rmx_expire == 0) {
+			if (route->rt_expire == 0) {
 				goto respond;
 			}
 
@@ -1249,14 +1586,19 @@ match:
 				lck_mtx_unlock(rnh_lock);
 				goto respond;
 			}
-#if IFNET_ROUTE_REFCNT
-			/* Adjust route ref count for the interfaces */
-			if (route->rt_if_ref_fn != NULL &&
-			    route->rt_ifp != ifp) {
-				route->rt_if_ref_fn(ifp, 1);
-				route->rt_if_ref_fn(route->rt_ifp, -1);
+			if (route->rt_ifp != ifp) {
+				/*
+				 * Purge any link-layer info caching.
+				 */
+				if (route->rt_llinfo_purge != NULL)
+					route->rt_llinfo_purge(route);
+
+				/* Adjust route ref count for the interfaces */
+				if (route->rt_if_ref_fn != NULL) {
+					route->rt_if_ref_fn(ifp, 1);
+					route->rt_if_ref_fn(route->rt_ifp, -1);
+				}
 			}
-#endif /* IFNET_ROUTE_REFCNT */
 			/* Change the interface when the existing route is on */
 			route->rt_ifp = ifp;
 			rtsetifa(route, &best_ia->ia_ifa);
@@ -1274,7 +1616,7 @@ match:
 	}
 
 	if (gateway->sdl_alen && bcmp(LLADDR(gateway), CONST_LLADDR(sender_hw), gateway->sdl_alen)) {
-		if (route->rt_rmx.rmx_expire && log_arp_warnings) {
+		if (route->rt_expire && log_arp_warnings) {
 			char buf2[3 * MAX_HW_LEN];
 			log(LOG_INFO, "arp: %s moved from %s to %s on %s%d\n",
 			    inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str,
@@ -1283,7 +1625,7 @@ match:
 			    sdl_addr_to_hex(sender_hw, buf2, sizeof(buf2)),
 			    ifp->if_name, ifp->if_unit);
 		}
-		else if (route->rt_rmx.rmx_expire == 0) {
+		else if (route->rt_expire == 0) {
 			if (log_arp_warnings) {
 				log(LOG_ERR, "arp: %s attempts to modify "
 				    "permanent entry for %s on %s%d\n",
@@ -1302,22 +1644,26 @@ match:
 	bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen);
 
 	/* Update the expire time for the route and clear the reject flag */
-	if (route->rt_rmx.rmx_expire) {
-		struct timeval timenow;
-
-		getmicrotime(&timenow);
-		route->rt_rmx.rmx_expire =
-		    rt_expiry(route, timenow.tv_sec, arpt_keep);
+	if (route->rt_expire) {
+		uint64_t timenow;
+                                            
+		timenow = net_uptime();
+		rt_setexpire(route,
+		    rt_expiry(route, timenow, arpt_keep));
 	}
 	route->rt_flags &= ~RTF_REJECT;
 
+	/* cache the gateway (sender HW) address */
+	arp_llreach_alloc(route, ifp, LLADDR(gateway), gateway->sdl_alen,
+	    (arpop == ARPOP_REPLY));
+
 	/* update the llinfo, send a queued packet if there is one */
 	llinfo = route->rt_llinfo;
 	llinfo->la_asked = 0;
 	if (llinfo->la_hold) {
 		struct mbuf *m0;
 		m0 = llinfo->la_hold;
-		llinfo->la_hold = 0;
+		llinfo->la_hold = NULL;
 
 		RT_UNLOCK(route);
 		dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0);
@@ -1327,6 +1673,9 @@ match:
 
 respond:
 	if (route != NULL) {
+		/* Mark use timestamp if we're going to send a reply */
+		if (arpop == ARPOP_REQUEST && route->rt_llinfo != NULL)
+			arp_llreach_use(route->rt_llinfo);
 		RT_REMREF_LOCKED(route);
 		RT_UNLOCK(route);
 		route = NULL;
@@ -1336,7 +1685,7 @@ respond:
 		goto done;
 
 	/* If we are not the target, check if we should proxy */
-	if (target_ip->sin_addr.s_addr != best_ia->ia_addr.sin_addr.s_addr) {
+	if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) {
 		/*
 		 * Find a proxy route; callee holds a reference on the
 		 * route and returns with the route entry locked, upon
@@ -1390,6 +1739,9 @@ respond:
 				goto done;
 			}
 		}
+		/* Mark use timestamp */
+		if (route->rt_llinfo != NULL)
+			arp_llreach_use(route->rt_llinfo);
 		RT_REMREF_LOCKED(route);
 		RT_UNLOCK(route);
 	}
@@ -1400,16 +1752,19 @@ respond:
 
 done:
 	if (best_ia != NULL)
-		ifafree(&best_ia->ia_ifa);
+		IFA_REMREF(&best_ia->ia_ifa);
 	return 0;
 }
 
 void
-arp_ifinit(
-	struct ifnet *ifp,
-	struct ifaddr *ifa)
+arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
+	struct sockaddr *sa;
+
+	IFA_LOCK(ifa);
 	ifa->ifa_rtrequest = arp_rtrequest;
 	ifa->ifa_flags |= RTF_CLONING;
-	dlil_send_arp(ifp, ARPOP_REQUEST, NULL, ifa->ifa_addr, NULL, ifa->ifa_addr);
+	sa = ifa->ifa_addr;
+	IFA_UNLOCK(ifa);
+	dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa);
 }
diff --git a/bsd/netinet/in_arp.h b/bsd/netinet/in_arp.h
index 9b1a740ac..99a106572 100644
--- a/bsd/netinet/in_arp.h
+++ b/bsd/netinet/in_arp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -67,8 +67,11 @@ extern errno_t inet_arp_lookup(ifnet_t interface,
     size_t ll_dest_len, route_t hint, mbuf_t packet);
 #endif /* BSD_KERNEL_PRIVATE */
 #ifdef KERNEL_PRIVATE
+struct in_addr;
 extern void arp_init(void);
 extern void in_arpdrain(void *);
+extern void arp_validate(struct rtentry *);
+extern void arp_llreach_set_reachable(struct ifnet *, void *, unsigned int);
 /* arp_lookup_ip is obsolete, use inet_arp_lookup */
 extern errno_t arp_lookup_ip(ifnet_t interface,
     const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest,
diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c
index cf3e3dbca..1fcafd583 100644
--- a/bsd/netinet/in_cksum.c
+++ b/bsd/netinet/in_cksum.c
@@ -93,7 +93,7 @@ union q_util {
         u_int64_t q;
 };
 
-#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define ADDCARRY(x)  do { if (x > 65535) { x -= 65535; } } while (0)
 
 #define REDUCE32                                                          \
     {                                                                     \
@@ -118,7 +118,7 @@ inet_cksum_simple(struct mbuf *m, int len)
 	return (inet_cksum(m, 0, 0, len));
 }
 
-inline u_short
+u_short
 in_addword(u_short a, u_short b)
 {
         union l_util l_util;
@@ -128,7 +128,7 @@ in_addword(u_short a, u_short b)
 	return (sum);
 }
 
-inline u_short
+u_short
 in_pseudo(u_int a, u_int b, u_int c)
 {
         u_int64_t sum;
@@ -141,77 +141,7 @@ in_pseudo(u_int a, u_int b, u_int c)
 
 }
 
-#if defined(__ppc__)
-
-extern u_short xsum_assym(u_short *p, int len, u_short xsum, int odd);
-
-u_int16_t
-inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
-    unsigned int len)
-{
-	u_short *w;
-	u_int32_t sum = 0;
-	int mlen = 0;
-	int starting_on_odd  = 0;
-
-	KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);
-
-	/* sanity check */
-	if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
-		panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
-		    m->m_pkthdr.len, skip, len);
-	}
-
-	/* include pseudo header checksum? */
-	if (nxt != 0) {
-		struct ip *iph;
-
-		if (m->m_len < sizeof (struct ip))
-			panic("inet_cksum: bad mbuf chain");
-
-		iph = mtod(m, struct ip *);
-		sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr,
-		    htonl(len + nxt));
-	}
-
-	if (skip != 0) {
-		for (; skip && m; m = m->m_next) {
-			if (m->m_len > skip) {
-				mlen = m->m_len - skip;
-				w = (u_short *)(m->m_data+skip);
-				goto skip_start;
-			} else {
-				skip -= m->m_len;
-			}
-		}
-	}
-
-	for (;m && len; m = m->m_next) {
-		if (m->m_len == 0)
-			continue;
-		mlen = m->m_len;
-		w = mtod(m, u_short *);
-
-skip_start:
-		if (len < mlen)
-			mlen = len;
-		sum = xsum_assym(w, mlen, sum, starting_on_odd);
-		len -= mlen;
-		if (mlen & 0x1)
-		{
-		    if (starting_on_odd)
-			starting_on_odd = 0;
-		    else
-			starting_on_odd = 1;
-		}
-	}
-
-	KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0);
-
-	return (~sum & 0xffff);
-}
-
-#elif defined(__arm__) && __ARM_ARCH__ >= 6
+#if defined(__arm__) && __ARM_ARCH__ >= 6
 
 extern int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum);
 
diff --git a/bsd/netinet/in_dhcp.c b/bsd/netinet/in_dhcp.c
index c6fdffdd8..90fc06ae5 100644
--- a/bsd/netinet/in_dhcp.c
+++ b/bsd/netinet/in_dhcp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1988-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1988-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -281,12 +281,6 @@ link_print(struct sockaddr_dl * dl_p)
 {
     int i;
 
-#if 0
-    printf("len %d index %d family %d type 0x%x nlen %d alen %d"
-	   " slen %d addr ", dl_p->sdl_len, 
-	   dl_p->sdl_index,  dl_p->sdl_family, dl_p->sdl_type,
-	   dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen);
-#endif
     for (i = 0; i < dl_p->sdl_alen; i++) 
 	printf("%s%x", i ? ":" : "", 
 	       (link_address(dl_p))[i]);
@@ -297,19 +291,7 @@ link_print(struct sockaddr_dl * dl_p)
 static struct sockaddr_dl *
 link_from_ifnet(struct ifnet * ifp)
 {
-    struct ifaddr * addr;
-
-    ifnet_lock_shared(ifp);
-    TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link) {
-	if (addr->ifa_addr->sa_family == AF_LINK) {
-	    struct sockaddr_dl * dl_p = (struct sockaddr_dl *)(addr->ifa_addr);
-	    
-	    ifnet_lock_done(ifp);
-	    return (dl_p);
-	}
-    }
-    ifnet_lock_done(ifp);
-    return (NULL);
+    return ((struct sockaddr_dl *)ifp->if_lladdr->ifa_addr);
 }
 
 /*
diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c
index 482aef5e4..9a6cb3db6 100644
--- a/bsd/netinet/in_gif.c
+++ b/bsd/netinet/in_gif.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -94,7 +94,7 @@
 #include <net/net_osdep.h>
 
 int ip_gif_ttl = GIF_TTL;
-SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ip_gif_ttl,	0, "");
 
 int
@@ -111,7 +111,7 @@ in_gif_output(
 	struct ip iphdr;	/* capsule IP header, host byte ordered */
 	int proto, error;
 	u_int8_t tos;
-	struct ip_out_args ipoa = { IFSCOPE_NONE };
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 
 	if (sin_src == NULL || sin_dst == NULL ||
 	    sin_src->sin_family != AF_INET ||
@@ -371,10 +371,13 @@ gif_encapcheck4(
 	{
 		if ((ifnet_flags(ia4->ia_ifa.ifa_ifp) & IFF_BROADCAST) == 0)
 			continue;
+		IFA_LOCK(&ia4->ia_ifa);
 		if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+			IFA_UNLOCK(&ia4->ia_ifa);
 			lck_rw_done(in_ifaddr_rwlock);
 			return 0;
 		}
+		IFA_UNLOCK(&ia4->ia_ifa);
 	}
 	lck_rw_done(in_ifaddr_rwlock);
 
@@ -393,11 +396,6 @@ gif_encapcheck4(
 		if (rt != NULL)
 			RT_LOCK(rt);
 		if (rt == NULL || rt->rt_ifp != m->m_pkthdr.rcvif) {
-#if 0
-			log(LOG_WARNING, "%s: packet from 0x%x dropped "
-			    "due to ingress filter\n", if_name(&sc->gif_if),
-			    (u_int32_t)ntohl(sin.sin_addr.s_addr));
-#endif
 			if (rt != NULL) {
 				RT_UNLOCK(rt);
 				rtfree(rt);
diff --git a/bsd/netinet/in_mcast.c b/bsd/netinet/in_mcast.c
new file mode 100644
index 000000000..1854fd26e
--- /dev/null
+++ b/bsd/netinet/in_mcast.c
@@ -0,0 +1,3641 @@
+/*
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2007-2009 Bruce Simpson.
+ * Copyright (c) 2005 Robert N. M. Watson.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * IPv4 multicast socket, group, and socket option processing module.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/sysctl.h>
+#include <sys/tree.h>
+#include <sys/mcache.h>
+
+#include <kern/zalloc.h>
+
+#include <pexpert/pexpert.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp_var.h>
+
+#ifndef __SOCKUNION_DECLARED
+union sockunion {
+	struct sockaddr_storage	ss;
+	struct sockaddr		sa;
+	struct sockaddr_dl	sdl;
+	struct sockaddr_in	sin;
+};
+typedef union sockunion sockunion_t;
+#define __SOCKUNION_DECLARED
+#endif /* __SOCKUNION_DECLARED */
+
+/*
+ * Functions with non-static linkage defined in this file should be
+ * declared in in_var.h:
+ *  imo_multi_filter()
+ *  in_addmulti()
+ *  in_delmulti()
+ *  in_joingroup()
+ *  in_leavegroup()
+ * and ip_var.h:
+ *  inp_freemoptions()
+ *  inp_getmoptions()
+ *  inp_setmoptions()
+ *
+ * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
+ * and in_delmulti().
+ */
+static void	imf_commit(struct in_mfilter *);
+static int	imf_get_source(struct in_mfilter *imf,
+		    const struct sockaddr_in *psin,
+		    struct in_msource **);
+static struct in_msource *
+		imf_graft(struct in_mfilter *, const uint8_t,
+		    const struct sockaddr_in *);
+static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
+static void	imf_rollback(struct in_mfilter *);
+static void	imf_reap(struct in_mfilter *);
+static int	imo_grow(struct ip_moptions *, size_t);
+static size_t	imo_match_group(const struct ip_moptions *,
+		    const struct ifnet *, const struct sockaddr *);
+static struct in_msource *
+		imo_match_source(const struct ip_moptions *, const size_t,
+		    const struct sockaddr *);
+static void	ims_merge(struct ip_msource *ims,
+		    const struct in_msource *lims, const int rollback);
+static int	in_getmulti(struct ifnet *, const struct in_addr *,
+		    struct in_multi **);
+static int	in_joingroup(struct ifnet *, const struct in_addr *,
+		    struct in_mfilter *, struct in_multi **);
+static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+		    const int noalloc, struct ip_msource **pims);
+static int	inm_is_ifp_detached(const struct in_multi *);
+static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
+static void	inm_reap(struct in_multi *);
+static struct ip_moptions *
+		inp_findmoptions(struct inpcb *);
+static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
+static struct ifnet *
+		inp_lookup_mcast_ifp(const struct inpcb *,
+		    const struct sockaddr_in *, const struct in_addr);
+static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
+static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
+static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
+static int	sysctl_ip_mcast_filters SYSCTL_HANDLER_ARGS;
+static struct ifnet * ip_multicast_if(struct in_addr *, unsigned int *);
+static __inline__ int ip_msource_cmp(const struct ip_msource *,
+    const struct ip_msource *);
+
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPv4 multicast");
+
+static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
+SYSCTL_LONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &in_mcast_maxgrpsrc, "Max source filters per group");
+
+static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
+SYSCTL_LONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &in_mcast_maxsocksrc, 
+    "Max source filters per socket");
+
+int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
+SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_LOCKED, 
+    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
+
+SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
+    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_ip_mcast_filters,
+    "Per-interface stack-wide source filters");
+
+RB_GENERATE_PREV(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
+
+#define	INM_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int inm_trace_hist_size = INM_TRACE_HIST_SIZE;
+
+struct in_multi_dbg {
+	struct in_multi		inm;			/* in_multi */
+	u_int16_t		inm_refhold_cnt;	/* # of ref */
+	u_int16_t		inm_refrele_cnt;	/* # of rele */
+	/*
+	 * Circular lists of inm_addref and inm_remref callers.
+	 */
+	ctrace_t		inm_refhold[INM_TRACE_HIST_SIZE];
+	ctrace_t		inm_refrele[INM_TRACE_HIST_SIZE];
+	/*
+	 * Trash list linkage
+	 */
+	TAILQ_ENTRY(in_multi_dbg) inm_trash_link;
+};
+
+/* List of trash in_multi entries protected by inm_trash_lock */
+static TAILQ_HEAD(, in_multi_dbg) inm_trash_head;
+static decl_lck_mtx_data(, inm_trash_lock);
+
+#define	INM_ZONE_MAX		64		/* maximum elements in zone */
+#define	INM_ZONE_NAME		"in_multi"	/* zone name */
+
+#if DEBUG
+static unsigned int inm_debug = 1;		/* debugging (enabled) */
+#else
+static unsigned int inm_debug;			/* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int inm_size;			/* size of zone element */
+static struct zone *inm_zone;			/* zone for in_multi */
+
+#define	IPMS_ZONE_MAX		64		/* maximum elements in zone */
+#define	IPMS_ZONE_NAME		"ip_msource"	/* zone name */
+
+static unsigned int ipms_size;			/* size of zone element */
+static struct zone *ipms_zone;			/* zone for ip_msource */
+
+#define	INMS_ZONE_MAX		64		/* maximum elements in zone */
+#define	INMS_ZONE_NAME		"in_msource"	/* zone name */
+
+static unsigned int inms_size;			/* size of zone element */
+static struct zone *inms_zone;			/* zone for in_msource */
+
+/* Lock group and attribute for in_multihead_lock lock */
+static lck_attr_t	*in_multihead_lock_attr;
+static lck_grp_t	*in_multihead_lock_grp;
+static lck_grp_attr_t	*in_multihead_lock_grp_attr;
+
+static decl_lck_rw_data(, in_multihead_lock);
+struct in_multihead in_multihead;
+
+static struct in_multi *in_multi_alloc(int);
+static void in_multi_free(struct in_multi *);
+static void in_multi_attach(struct in_multi *);
+static void inm_trace(struct in_multi *, int);
+
+static struct ip_msource *ipms_alloc(int);
+static void ipms_free(struct ip_msource *);
+static struct in_msource *inms_alloc(int);
+static void inms_free(struct in_msource *);
+
+#define	IMO_CAST_TO_NONCONST(x)	((struct ip_moptions *)(void *)(uintptr_t)x)
+#define	INM_CAST_TO_NONCONST(x)	((struct in_multi *)(void *)(uintptr_t)x)
+
+static __inline int
+ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b)
+{
+
+	if (a->ims_haddr < b->ims_haddr)
+		return (-1);
+	if (a->ims_haddr == b->ims_haddr)
+		return (0);
+	return (1);
+}
+
+/*
+ * Inline function which wraps assertions for a valid ifp.
+ */
+static __inline__ int
+inm_is_ifp_detached(const struct in_multi *inm)
+{
+	VERIFY(inm->inm_ifma != NULL);
+	VERIFY(inm->inm_ifp == inm->inm_ifma->ifma_ifp);
+
+	return (!ifnet_is_attached(inm->inm_ifp, 0));
+}
+
+/*
+ * Initialize an in_mfilter structure to a known state at t0, t1
+ * with an empty source filter list.
+ */
+static __inline__ void
+imf_init(struct in_mfilter *imf, const int st0, const int st1)
+{
+	memset(imf, 0, sizeof(struct in_mfilter));
+	RB_INIT(&imf->imf_sources);
+	imf->imf_st[0] = st0;
+	imf->imf_st[1] = st1;
+}
+
+/*
+ * Resize the ip_moptions vector to the next power-of-two minus 1.
+ */
+static int
+imo_grow(struct ip_moptions *imo, size_t newmax)
+{
+	struct in_multi		**nmships;
+	struct in_multi		**omships;
+	struct in_mfilter	 *nmfilters;
+	struct in_mfilter	 *omfilters;
+	size_t			  idx;
+	size_t			  oldmax;
+
+	IMO_LOCK_ASSERT_HELD(imo);
+
+	nmships = NULL;
+	nmfilters = NULL;
+	omships = imo->imo_membership;
+	omfilters = imo->imo_mfilters;
+	oldmax = imo->imo_max_memberships;
+	if (newmax == 0)
+		newmax = ((oldmax + 1) * 2) - 1;
+
+	if (newmax > IP_MAX_MEMBERSHIPS)
+		return (ETOOMANYREFS);
+
+	if ((nmships = (struct in_multi **)_REALLOC(omships,
+	    sizeof (struct in_multi *) * newmax, M_IPMOPTS,
+	    M_WAITOK | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	imo->imo_membership = nmships;
+
+	if ((nmfilters = (struct in_mfilter *)_REALLOC(omfilters,
+	    sizeof (struct in_mfilter) * newmax, M_INMFILTER,
+	    M_WAITOK | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	imo->imo_mfilters = nmfilters;
+
+	/* Initialize newly allocated source filter heads. */
+	for (idx = oldmax; idx < newmax; idx++)
+		imf_init(&nmfilters[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
+
+	imo->imo_max_memberships = newmax;
+
+	return (0);
+}
+
+/*
+ * Find an IPv4 multicast group entry for this ip_moptions instance
+ * which matches the specified group, and optionally an interface.
+ * Return its index into the array, or -1 if not found.
+ */
+static size_t
+imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group)
+{
+	const struct sockaddr_in *gsin;
+	struct in_multi	*pinm;
+	int		  idx;
+	int		  nmships;
+
+	IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo));
+
+	gsin = (const struct sockaddr_in *)group;
+
+	/* The imo_membership array may be lazy allocated. */
+	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
+		return (-1);
+
+	nmships = imo->imo_num_memberships;
+	for (idx = 0; idx < nmships; idx++) {
+		pinm = imo->imo_membership[idx];
+		if (pinm == NULL)
+			continue;
+		INM_LOCK(pinm);
+		if ((ifp == NULL || (pinm->inm_ifp == ifp)) &&
+		    in_hosteq(pinm->inm_addr, gsin->sin_addr)) {
+			INM_UNLOCK(pinm);
+			break;
+		}
+		INM_UNLOCK(pinm);
+	}
+	if (idx >= nmships)
+		idx = -1;
+
+	return (idx);
+}
+
+/*
+ * Find an IPv4 multicast source entry for this imo which matches
+ * the given group index for this socket, and source address.
+ *
+ * NOTE: This does not check if the entry is in-mode, merely if
+ * it exists, which may not be the desired behaviour.
+ */
+static struct in_msource *
+imo_match_source(const struct ip_moptions *imo, const size_t gidx,
+    const struct sockaddr *src)
+{
+	struct ip_msource	 find;
+	struct in_mfilter	*imf;
+	struct ip_msource	*ims;
+	const sockunion_t	*psa;
+
+	IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo));
+
+	VERIFY(src->sa_family == AF_INET);
+	VERIFY(gidx != (size_t)-1 && gidx < imo->imo_num_memberships);
+
+	/* The imo_mfilters array may be lazy allocated. */
+	if (imo->imo_mfilters == NULL)
+		return (NULL);
+	imf = &imo->imo_mfilters[gidx];
+
+	/* Source trees are keyed in host byte order. */
+	psa = (const sockunion_t *)src;
+	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+
+	return ((struct in_msource *)ims);
+}
+
+/*
+ * Perform filtering for multicast datagrams on a socket by group and source.
+ *
+ * Returns 0 if a datagram should be allowed through, or various error codes
+ * if the socket was not a member of the group, or the source was muted, etc.
+ */
+int
+imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group, const struct sockaddr *src)
+{
+	size_t gidx;
+	struct in_msource *ims;
+	int mode;
+
+	IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo));
+	VERIFY(ifp != NULL);
+
+	gidx = imo_match_group(imo, ifp, group);
+	if (gidx == (size_t)-1)
+		return (MCAST_NOTGMEMBER);
+
+	/*
+	 * Check if the source was included in an (S,G) join.
+	 * Allow reception on exclusive memberships by default,
+	 * reject reception on inclusive memberships by default.
+	 * Exclude source only if an in-mode exclude filter exists.
+	 * Include source only if an in-mode include filter exists.
+	 * NOTE: We are comparing group state here at IGMP t1 (now)
+	 * with socket-layer t0 (since last downcall).
+	 */
+	mode = imo->imo_mfilters[gidx].imf_st[1];
+	ims = imo_match_source(imo, gidx, src);
+
+	if ((ims == NULL && mode == MCAST_INCLUDE) ||
+	    (ims != NULL && ims->imsl_st[0] != mode)) {
+		return (MCAST_NOTSMEMBER);
+	}
+
+	return (MCAST_PASS);
+}
+
+int
+imo_clone(struct ip_moptions *from, struct ip_moptions *to)
+{
+	int i, err = 0;
+
+	IMO_LOCK(from);
+	IMO_LOCK(to);
+
+        to->imo_multicast_ifp = from->imo_multicast_ifp;
+        to->imo_multicast_vif = from->imo_multicast_vif;
+        to->imo_multicast_ttl = from->imo_multicast_ttl;
+        to->imo_multicast_loop = from->imo_multicast_loop;
+
+	/*
+	 * We're cloning, so drop any existing memberships and source
+	 * filters on the destination ip_moptions.
+	 */
+	for (i = 0; i < to->imo_num_memberships; ++i) {
+		struct in_mfilter *imf;
+
+		imf = to->imo_mfilters ? &to->imo_mfilters[i] : NULL;
+		if (imf != NULL)
+			imf_leave(imf);
+
+		(void) in_leavegroup(to->imo_membership[i], imf);
+
+		if (imf != NULL)
+			imf_purge(imf);
+
+		INM_REMREF(to->imo_membership[i]);
+		to->imo_membership[i] = NULL;
+	}
+	to->imo_num_memberships = 0;
+
+	VERIFY(to->imo_max_memberships != 0 && from->imo_max_memberships != 0);
+	if (to->imo_max_memberships < from->imo_max_memberships) {
+		/*
+		 * Ensure source and destination ip_moptions memberships
+		 * and source filters arrays are at least equal in size.
+		 */
+		err = imo_grow(to, from->imo_max_memberships);
+		if (err != 0)
+			goto done;
+	}
+	VERIFY(to->imo_max_memberships >= from->imo_max_memberships);
+
+	/*
+	 * Source filtering doesn't apply to OpenTransport socket,
+	 * so simply hold additional reference count per membership.
+	 */
+        for (i = 0; i < from->imo_num_memberships; i++) {
+		to->imo_membership[i] = from->imo_membership[i];
+		INM_ADDREF(from->imo_membership[i]);
+		to->imo_num_memberships++;
+        }
+	VERIFY(to->imo_num_memberships == from->imo_num_memberships);
+
+done:
+	IMO_UNLOCK(to);
+	IMO_UNLOCK(from);
+
+	return (err);
+}
+
+/*
+ * Find and return a reference to an in_multi record for (ifp, group),
+ * and bump its reference count.
+ * If one does not exist, try to allocate it, and update link-layer multicast
+ * filters on ifp to listen for group.
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+in_getmulti(struct ifnet *ifp, const struct in_addr *group,
+    struct in_multi **pinm)
+{
+	struct sockaddr_in	 gsin;
+	struct ifmultiaddr	*ifma;
+	struct in_multi		*inm;
+	int			error;
+
+	in_multihead_lock_shared();
+	IN_LOOKUP_MULTI(group, ifp, inm);
+	if (inm != NULL) {
+		INM_LOCK(inm);
+		VERIFY(inm->inm_reqcnt >= 1);
+		inm->inm_reqcnt++;
+		VERIFY(inm->inm_reqcnt != 0);
+		*pinm = inm;
+		INM_UNLOCK(inm);
+		in_multihead_lock_done();
+		/*
+		 * We already joined this group; return the inm
+		 * with a refcount held (via lookup) for caller.
+		 */
+		return (0);
+	}
+	in_multihead_lock_done();
+
+	bzero(&gsin, sizeof(gsin));
+	gsin.sin_family = AF_INET;
+	gsin.sin_len = sizeof(struct sockaddr_in);
+	gsin.sin_addr = *group;
+
+	/*
+	 * Check if a link-layer group is already associated
+	 * with this network-layer group on the given ifnet.
+	 */
+	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * See comments in inm_remref() for access to ifma_protospec.
+	 */
+	in_multihead_lock_exclusive();
+	IFMA_LOCK(ifma);
+	if ((inm = ifma->ifma_protospec) != NULL) {
+		VERIFY(ifma->ifma_addr != NULL);
+		VERIFY(ifma->ifma_addr->sa_family == AF_INET);
+		INM_ADDREF(inm);	/* for caller */
+		IFMA_UNLOCK(ifma);
+		INM_LOCK(inm);
+		VERIFY(inm->inm_ifma == ifma);
+		VERIFY(inm->inm_ifp == ifp);
+		VERIFY(in_hosteq(inm->inm_addr, *group));
+		if (inm->inm_debug & IFD_ATTACHED) {
+			VERIFY(inm->inm_reqcnt >= 1);
+			inm->inm_reqcnt++;
+			VERIFY(inm->inm_reqcnt != 0);
+			*pinm = inm;
+			INM_UNLOCK(inm);
+			in_multihead_lock_done();
+			IFMA_REMREF(ifma);
+			/*
+			 * We lost the race with another thread doing
+			 * in_getmulti(); since this group has already
+			 * been joined; return the inm with a refcount
+			 * held for caller.
+			 */
+			return (0);
+		}
+		/*
+		 * We lost the race with another thread doing in_delmulti();
+		 * the inm referring to the ifma has been detached, thus we
+		 * reattach it back to the in_multihead list and return the
+		 * inm with a refcount held for the caller.
+		 */
+		in_multi_attach(inm);
+		VERIFY((inm->inm_debug &
+		    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
+		*pinm = inm;
+		INM_UNLOCK(inm);
+		in_multihead_lock_done();
+		IFMA_REMREF(ifma);
+		return (0);
+	}
+	IFMA_UNLOCK(ifma);
+
+	/*
+	 * A new in_multi record is needed; allocate and initialize it.
+	 * We DO NOT perform an IGMP join as the in_ layer may need to
+	 * push an initial source list down to IGMP to support SSM.
+	 *
+	 * The initial source filter state is INCLUDE, {} as per the RFC.
+	 */
+	inm = in_multi_alloc(M_WAITOK);
+	if (inm == NULL) {
+		in_multihead_lock_done();
+		IFMA_REMREF(ifma);
+		return (ENOMEM);
+	}
+	INM_LOCK(inm);
+	inm->inm_addr = *group;
+	inm->inm_ifp = ifp;
+	inm->inm_igi = IGMP_IFINFO(ifp);
+	VERIFY(inm->inm_igi != NULL);
+	IGI_ADDREF(inm->inm_igi);
+	inm->inm_ifma = ifma;		/* keep refcount from if_addmulti() */
+	inm->inm_state = IGMP_NOT_MEMBER;
+	/*
+	 * Pending state-changes per group are subject to a bounds check.
+	 */
+	inm->inm_scq.ifq_maxlen = IGMP_MAX_STATE_CHANGES;
+	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
+	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+	RB_INIT(&inm->inm_srcs);
+	*pinm = inm;
+	in_multi_attach(inm);
+	VERIFY((inm->inm_debug & (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
+	INM_ADDREF_LOCKED(inm);		/* for caller */
+	INM_UNLOCK(inm);
+
+	IFMA_LOCK(ifma);
+	VERIFY(ifma->ifma_protospec == NULL);
+	ifma->ifma_protospec = inm;
+	IFMA_UNLOCK(ifma);
+	in_multihead_lock_done();
+
+	return (0);
+}
+
+/*
+ * Clear recorded source entries for a group.
+ * Used by the IGMP code.
+ * FIXME: Should reap.
+ */
+void
+inm_clear_recorded(struct in_multi *inm)
+{
+	struct ip_msource	*ims;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+		if (ims->ims_stp) {
+			ims->ims_stp = 0;
+			--inm->inm_st[1].iss_rec;
+		}
+	}
+	VERIFY(inm->inm_st[1].iss_rec == 0);
+}
+
+/*
+ * Record a source as pending for a Source-Group IGMPv3 query.
+ * This lives here as it modifies the shared tree.
+ *
+ * inm is the group descriptor.
+ * naddr is the address of the source to record in network-byte order.
+ *
+ * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
+ * lazy-allocate a source node in response to an SG query.
+ * Otherwise, no allocation is performed. This saves some memory
+ * with the trade-off that the source will not be reported to the
+ * router if joined in the window between the query response and
+ * the group actually being joined on the local host.
+ *
+ * Return 0 if the source didn't exist or was already marked as recorded.
+ * Return 1 if the source was marked as recorded by this function.
+ * Return <0 if any error occured (negated errno code).
+ */
+int
+inm_record_source(struct in_multi *inm, const in_addr_t naddr)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims, *nims;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	find.ims_haddr = ntohl(naddr);
+	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+	if (ims && ims->ims_stp)
+		return (0);
+	if (ims == NULL) {
+		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+			return (-ENOSPC);
+		nims = ipms_alloc(M_WAITOK);
+		if (nims == NULL)
+			return (-ENOMEM);
+		nims->ims_haddr = find.ims_haddr;
+		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+		++inm->inm_nsrc;
+		ims = nims;
+	}
+
+	/*
+	 * Mark the source as recorded and update the recorded
+	 * source count.
+	 */
+	++ims->ims_stp;
+	++inm->inm_st[1].iss_rec;
+
+	return (1);
+}
+
+/*
+ * Return a pointer to an in_msource owned by an in_mfilter,
+ * given its source address.
+ * Lazy-allocate if needed. If this is a new entry its filter state is
+ * undefined at t0.
+ *
+ * imf is the filter set being modified.
+ * haddr is the source address in *host* byte-order.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static int
+imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
+    struct in_msource **plims)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+	int			 error;
+
+	error = 0;
+	ims = NULL;
+	lims = NULL;
+
+	/* key is host byte order */
+	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+	lims = (struct in_msource *)ims;
+	if (lims == NULL) {
+		if (imf->imf_nsrc == in_mcast_maxsocksrc)
+			return (ENOSPC);
+		lims = inms_alloc(M_WAITOK);
+		if (lims == NULL)
+			return (ENOMEM);
+		lims->ims_haddr = find.ims_haddr;
+		lims->imsl_st[0] = MCAST_UNDEFINED;
+		RB_INSERT(ip_msource_tree, &imf->imf_sources,
+		    (struct ip_msource *)lims);
+		++imf->imf_nsrc;
+	}
+
+	*plims = lims;
+
+	return (error);
+}
+
+/*
+ * Graft a source entry into an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being in the new filter mode at t1.
+ *
+ * Return the pointer to the new node, otherwise return NULL.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static struct in_msource *
+imf_graft(struct in_mfilter *imf, const uint8_t st1,
+    const struct sockaddr_in *psin)
+{
+	struct in_msource	*lims;
+
+	lims = inms_alloc(M_WAITOK);
+	if (lims == NULL)
+		return (NULL);
+	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
+	lims->imsl_st[0] = MCAST_UNDEFINED;
+	lims->imsl_st[1] = st1;
+	RB_INSERT(ip_msource_tree, &imf->imf_sources,
+	    (struct ip_msource *)lims);
+	++imf->imf_nsrc;
+
+	return (lims);
+}
+
+/*
+ * Prune a source entry from an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being left at t1, it is not freed.
+ *
+ * Return 0 if no error occurred, otherwise return an errno value.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static int
+imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	/* key is host byte order */
+	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
+	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
+	if (ims == NULL)
+		return (ENOENT);
+	lims = (struct in_msource *)ims;
+	lims->imsl_st[1] = MCAST_UNDEFINED;
+	return (0);
+}
+
+/*
+ * Revert socket-layer filter set deltas at t1 to t0 state.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static void
+imf_rollback(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+	struct in_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == lims->imsl_st[1]) {
+			/* no change at t1 */
+			continue;
+		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
+			/* revert change to existing source at t1 */
+			lims->imsl_st[1] = lims->imsl_st[0];
+		} else {
+			/* revert source added t1 */
+			IGMP_PRINTF(("%s: free inms %p\n", __func__, lims));
+			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+			inms_free(lims);
+			imf->imf_nsrc--;
+		}
+	}
+	imf->imf_st[1] = imf->imf_st[0];
+}
+
+/*
+ * Mark socket-layer filter set as INCLUDE {} at t1.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+void
+imf_leave(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		lims->imsl_st[1] = MCAST_UNDEFINED;
+	}
+	imf->imf_st[1] = MCAST_INCLUDE;
+}
+
+/*
+ * Mark socket-layer filter set deltas as committed.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static void
+imf_commit(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		lims->imsl_st[0] = lims->imsl_st[1];
+	}
+	imf->imf_st[0] = imf->imf_st[1];
+}
+
+/*
+ * Reap unreferenced sources from socket-layer filter set.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+static void
+imf_reap(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+	struct in_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		lims = (struct in_msource *)ims;
+		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
+		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
+			IGMP_PRINTF(("%s: free inms %p\n", __func__, lims));
+			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+			inms_free(lims);
+			imf->imf_nsrc--;
+		}
+	}
+}
+
+/*
+ * Purge socket-layer filter set.
+ *
+ * Caller is expected to be holding imo_lock.
+ */
+void
+imf_purge(struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *tims;
+	struct in_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
+		lims = (struct in_msource *)ims;
+		IGMP_PRINTF(("%s: free inms %p\n", __func__, lims));
+		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
+		inms_free(lims);
+		imf->imf_nsrc--;
+	}
+	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
+	VERIFY(RB_EMPTY(&imf->imf_sources));
+}
+
+/*
+ * Look up a source filter entry for a multicast group.
+ *
+ * inm is the group descriptor to work with.
+ * haddr is the host-byte-order IPv4 address to look up.
+ * noalloc may be non-zero to suppress allocation of sources.
+ * *pims will be set to the address of the retrieved or allocated source.
+ *
+ * Return 0 if successful, otherwise return a non-zero error code.
+ */
+static int
+inm_get_source(struct in_multi *inm, const in_addr_t haddr,
+    const int noalloc, struct ip_msource **pims)
+{
+	struct ip_msource	 find;
+	struct ip_msource	*ims, *nims;
+#ifdef IGMP_DEBUG
+	struct in_addr ia;
+#endif
+	INM_LOCK_ASSERT_HELD(inm);
+
+	find.ims_haddr = haddr;
+	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
+	if (ims == NULL && !noalloc) {
+		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
+			return (ENOSPC);
+		nims = ipms_alloc(M_WAITOK);
+		if (nims == NULL)
+			return (ENOMEM);
+		nims->ims_haddr = haddr;
+		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
+		++inm->inm_nsrc;
+		ims = nims;
+#ifdef IGMP_DEBUG
+		ia.s_addr = htonl(haddr);
+		IGMP_PRINTF(("%s: allocated %s as %p\n", __func__,
+		    inet_ntoa(ia), ims));
+#endif
+	}
+
+	*pims = ims;
+	return (0);
+}
+
+/*
+ * Helper function to derive the filter mode on a source entry
+ * from its internal counters. Predicates are:
+ *  A source is only excluded if all listeners exclude it.
+ *  A source is only included if no listeners exclude it,
+ *  and at least one listener includes it.
+ * May be used by ifmcstat(8).
+ */
+uint8_t
+ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims,
+    uint8_t t)
+{
+	INM_LOCK_ASSERT_HELD(INM_CAST_TO_NONCONST(inm));
+
+	t = !!t;
+	if (inm->inm_st[t].iss_ex > 0 &&
+	    inm->inm_st[t].iss_ex == ims->ims_st[t].ex)
+		return (MCAST_EXCLUDE);
+	else if (ims->ims_st[t].in > 0 && ims->ims_st[t].ex == 0)
+		return (MCAST_INCLUDE);
+	return (MCAST_UNDEFINED);
+}
+
+/*
+ * Merge socket-layer source into IGMP-layer source.
+ * If rollback is non-zero, perform the inverse of the merge.
+ */
+static void
+ims_merge(struct ip_msource *ims, const struct in_msource *lims,
+    const int rollback)
+{
+	int n = rollback ? -1 : 1;
+#ifdef IGMP_DEBUG
+	struct in_addr ia;
+
+	ia.s_addr = htonl(ims->ims_haddr);
+#endif
+
+	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
+		IGMP_PRINTF(("%s: t1 ex -= %d on %s\n",
+		    __func__, n, inet_ntoa(ia)));
+		ims->ims_st[1].ex -= n;
+	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
+		IGMP_PRINTF(("%s: t1 in -= %d on %s\n",
+		    __func__, n, inet_ntoa(ia)));
+		ims->ims_st[1].in -= n;
+	}
+
+	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
+		IGMP_PRINTF(("%s: t1 ex += %d on %s\n",
+		    __func__, n, inet_ntoa(ia)));
+		ims->ims_st[1].ex += n;
+	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
+		IGMP_PRINTF(("%s: t1 in += %d on %s\n",
+		    __func__, n, inet_ntoa(ia)));
+		ims->ims_st[1].in += n;
+	}
+}
+
+/*
+ * Atomically update the global in_multi state, when a membership's
+ * filter list is being updated in any way.
+ *
+ * imf is the per-inpcb-membership group filter pointer.
+ * A fake imf may be passed for in-kernel consumers.
+ *
+ * XXX This is a candidate for a set-symmetric-difference style loop
+ * which would eliminate the repeated lookup from root of ims nodes,
+ * as they share the same key space.
+ *
+ * If any error occurred this function will back out of refcounts
+ * and return a non-zero value.
+ */
+static int
+inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+	struct ip_msource	*ims, *nims;
+	struct in_msource	*lims;
+	int			 schanged, error;
+	int			 nsrc0, nsrc1;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	schanged = 0;
+	error = 0;
+	nsrc1 = nsrc0 = 0;
+
+	/*
+	 * Update the source filters first, as this may fail.
+	 * Maintain count of in-mode filters at t0, t1. These are
+	 * used to work out if we transition into ASM mode or not.
+	 * Maintain a count of source filters whose state was
+	 * actually modified by this operation.
+	 */
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
+		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
+		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
+		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
+		++schanged;
+		if (error)
+			break;
+		ims_merge(nims, lims, 0);
+	}
+	if (error) {
+		struct ip_msource *bims;
+
+		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
+			lims = (struct in_msource *)ims;
+			if (lims->imsl_st[0] == lims->imsl_st[1])
+				continue;
+			(void) inm_get_source(inm, lims->ims_haddr, 1, &bims);
+			if (bims == NULL)
+				continue;
+			ims_merge(bims, lims, 1);
+		}
+		goto out_reap;
+	}
+
+	IGMP_PRINTF(("%s: imf filters in-mode: %d at t0, %d at t1\n",
+	    __func__, nsrc0, nsrc1));
+
+	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
+	if (imf->imf_st[0] == imf->imf_st[1] &&
+	    imf->imf_st[1] == MCAST_INCLUDE) {
+		if (nsrc1 == 0) {
+			IGMP_PRINTF(("%s: --in on inm at t1\n", __func__));
+			--inm->inm_st[1].iss_in;
+		}
+	}
+
+	/* Handle filter mode transition on socket. */
+	if (imf->imf_st[0] != imf->imf_st[1]) {
+		IGMP_PRINTF(("%s: imf transition %d to %d\n",
+		    __func__, imf->imf_st[0], imf->imf_st[1]));
+
+		if (imf->imf_st[0] == MCAST_EXCLUDE) {
+			IGMP_PRINTF(("%s: --ex on inm at t1\n", __func__));
+			--inm->inm_st[1].iss_ex;
+		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
+			IGMP_PRINTF(("%s: --in on inm at t1\n", __func__));
+			--inm->inm_st[1].iss_in;
+		}
+
+		if (imf->imf_st[1] == MCAST_EXCLUDE) {
+			IGMP_PRINTF(("%s: ex++ on inm at t1\n", __func__));
+			inm->inm_st[1].iss_ex++;
+		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
+			IGMP_PRINTF(("%s: in++ on inm at t1\n", __func__));
+			inm->inm_st[1].iss_in++;
+		}
+	}
+
+	/*
+	 * Track inm filter state in terms of listener counts.
+	 * If there are any exclusive listeners, stack-wide
+	 * membership is exclusive.
+	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
+	 * If no listeners remain, state is undefined at t1,
+	 * and the IGMP lifecycle for this group should finish.
+	 */
+	if (inm->inm_st[1].iss_ex > 0) {
+		IGMP_PRINTF(("%s: transition to EX\n", __func__));
+		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
+	} else if (inm->inm_st[1].iss_in > 0) {
+		IGMP_PRINTF(("%s: transition to IN\n", __func__));
+		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
+	} else {
+		IGMP_PRINTF(("%s: transition to UNDEF\n", __func__));
+		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
+	}
+
+	/* Decrement ASM listener count on transition out of ASM mode. */
+	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
+		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
+		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
+			IGMP_PRINTF(("%s: --asm on inm at t1\n", __func__));
+			--inm->inm_st[1].iss_asm;
+		}
+	}
+
+	/* Increment ASM listener count on transition to ASM mode. */
+	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
+		IGMP_PRINTF(("%s: asm++ on inm at t1\n", __func__));
+		inm->inm_st[1].iss_asm++;
+	}
+
+	IGMP_PRINTF(("%s: merged imf %p to inm %p\n", __func__, imf, inm));
+	inm_print(inm);
+
+out_reap:
+	if (schanged > 0) {
+		IGMP_PRINTF(("%s: sources changed; reaping\n", __func__));
+		inm_reap(inm);
+	}
+	return (error);
+}
+
+/*
+ * Mark an in_multi's filter set deltas as committed.
+ * Called by IGMP after a state change has been enqueued.
+ */
+void
+inm_commit(struct in_multi *inm)
+{
+	struct ip_msource	*ims;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	IGMP_PRINTF(("%s: commit inm %p\n", __func__, inm));
+	IGMP_PRINTF(("%s: pre commit:\n", __func__));
+	inm_print(inm);
+
+	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+		ims->ims_st[0] = ims->ims_st[1];
+	}
+	inm->inm_st[0] = inm->inm_st[1];
+}
+
+/*
+ * Reap unreferenced nodes from an in_multi's filter set.
+ */
+static void
+inm_reap(struct in_multi *inm)
+{
+	struct ip_msource	*ims, *tims;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
+		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
+		    ims->ims_stp != 0)
+			continue;
+		IGMP_PRINTF(("%s: free ims %p\n", __func__, ims));
+		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+		ipms_free(ims);
+		inm->inm_nsrc--;
+	}
+}
+
+/*
+ * Purge all source nodes from an in_multi's filter set.
+ */
+void
+inm_purge(struct in_multi *inm)
+{
+	struct ip_msource	*ims, *tims;
+
+	INM_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
+		IGMP_PRINTF(("%s: free ims %p\n", __func__, ims));
+		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
+		ipms_free(ims);
+		inm->inm_nsrc--;
+	}
+}
+
+/*
+ * Join a multicast group; real entry point.
+ *
+ * Only preserves atomicity at inm level.
+ * NOTE: imf argument cannot be const due to sys/tree.h limitations.
+ *
+ * If the IGMP downcall fails, the group is not joined, and an error
+ * code is returned.
+ */
+static int
+in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
+    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
+{
+	struct in_mfilter	 timf;
+	struct in_multi		*inm = NULL;
+	int			 error = 0;
+
+	IGMP_PRINTF(("%s: join %s on %p(%s%d))\n", __func__,
+	    inet_ntoa(*gina), ifp, ifp->if_name, ifp->if_unit));
+
+	*pinm = NULL;
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		imf = &timf;
+	}
+
+	error = in_getmulti(ifp, gina, &inm);
+	if (error) {
+		IGMP_PRINTF(("%s: in_getmulti() failure\n", __func__));
+		return (error);
+	}
+
+	IGMP_PRINTF(("%s: merge inm state\n", __func__));
+
+	INM_LOCK(inm);
+	error = inm_merge(inm, imf);
+	if (error) {
+		IGMP_PRINTF(("%s: failed to merge inm state\n", __func__));
+		goto out_inm_release;
+	}
+
+	IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+	error = igmp_change_state(inm);
+	if (error) {
+		IGMP_PRINTF(("%s: failed to update source\n", __func__));
+		goto out_inm_release;
+	}
+
+out_inm_release:
+	if (error) {
+		IGMP_PRINTF(("%s: dropping ref on %p\n", __func__, inm));
+		INM_UNLOCK(inm);
+		INM_REMREF(inm);
+	} else {
+		INM_UNLOCK(inm);
+		*pinm = inm;	/* keep refcount from in_getmulti() */
+	}
+
+	return (error);
+}
+
+/*
+ * Leave a multicast group; real entry point.
+ * All source filters will be expunged.
+ *
+ * Only preserves atomicity at inm level.
+ *
+ * Note: This is not the same as inm_release(*) as this function also
+ * makes a state change downcall into IGMP.
+ */
+int
+in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
+{
+	struct in_mfilter	 timf;
+	int			 error, lastref;
+
+	error = 0;
+
+	INM_LOCK_ASSERT_NOTHELD(inm);
+
+        in_multihead_lock_exclusive();
+        INM_LOCK(inm);
+
+	IGMP_PRINTF(("%s: leave inm %p, %s/%s%d, imf %p\n", __func__,
+	    inm, inet_ntoa(inm->inm_addr),
+	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_name),
+	    inm->inm_ifp->if_unit, imf));
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
+		imf = &timf;
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 *
+	 * As this particular invocation should not cause any memory
+	 * to be allocated, and there is no opportunity to roll back
+	 * the transaction, it MUST NOT fail.
+	 */
+	IGMP_PRINTF(("%s: merge inm state\n", __func__));
+
+	error = inm_merge(inm, imf);
+	KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__));
+
+	IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+	error = igmp_change_state(inm);
+#if IGMP_DEBUG
+	if (error)
+		IGMP_PRINTF(("%s: failed igmp downcall\n", __func__));
+#endif
+        lastref = in_multi_detach(inm);
+        VERIFY(!lastref || (!(inm->inm_debug & IFD_ATTACHED) &&
+            inm->inm_reqcnt == 0));
+	INM_UNLOCK(inm);
+        in_multihead_lock_done();
+
+        if (lastref)
+		INM_REMREF(inm);	/* for in_multihead list */
+
+	return (error);
+}
+
+/*
+ * Join an IPv4 multicast group in (*,G) exclusive mode.
+ * The group must be a 224.0.0.0/24 link-scope group.
+ * This KPI is for legacy kernel consumers only.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+	struct in_multi *pinm = NULL;
+	int error;
+
+	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
+	    ("%s: %s not in 224.0.0.0/24\n", __func__, inet_ntoa(*ap)));
+
+	error = in_joingroup(ifp, ap, NULL, &pinm);
+	VERIFY(pinm != NULL || error != 0);
+
+	return (pinm);
+}
+
+/*
+ * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
+ * This KPI is for legacy kernel consumers only.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+
+	(void) in_leavegroup(inm, NULL);
+}
+
+/*
+ * Block or unblock an ASM multicast source on an inpcb.
+ * This implements the delta-based API described in RFC 3678.
+ *
+ * The delta-based API applies only to exclusive-mode memberships.
+ * An IGMP downcall will be performed.
+ *
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims;
+	struct in_multi			*inm;
+	size_t				 idx;
+	uint16_t			 fmode;
+	int				 error, doblock;
+	unsigned int			 ifindex = 0;
+
+	ifp = NULL;
+	error = 0;
+	doblock = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+
+	switch (sopt->sopt_name) {
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE: {
+		struct ip_mreq_source	 mreqs;
+
+		error = sooptcopyin(sopt, &mreqs,
+		    sizeof(struct ip_mreq_source),
+		    sizeof(struct ip_mreq_source));
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		ssa->sin.sin_family = AF_INET;
+		ssa->sin.sin_len = sizeof(struct sockaddr_in);
+		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+
+		if (!in_nullhost(mreqs.imr_interface))
+			ifp = ip_multicast_if(&mreqs.imr_interface, &ifindex);
+
+		if (sopt->sopt_name == IP_BLOCK_SOURCE)
+			doblock = 1;
+
+		IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp));
+		break;
+	    }
+
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = sooptcopyin(sopt, &gsr,
+		    sizeof(struct group_source_req),
+		    sizeof(struct group_source_req));
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (ssa->sin.sin_family != AF_INET ||
+		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		ifnet_head_lock_shared();
+		if (gsr.gsr_interface == 0 ||
+		    (u_int)if_index < gsr.gsr_interface) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+
+		ifp = ifindex2ifnet[gsr.gsr_interface];
+		ifnet_head_done();
+
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+
+		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
+			doblock = 1;
+		break;
+
+	default:
+		IGMP_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Check if we are actually a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IMO_LOCK(imo);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+
+	VERIFY(imo->imo_mfilters != NULL);
+	imf = &imo->imo_mfilters[idx];
+	inm = imo->imo_membership[idx];
+
+	/*
+	 * Attempting to use the delta-based API on an
+	 * non exclusive-mode membership is an error.
+	 */
+	fmode = imf->imf_st[0];
+	if (fmode != MCAST_EXCLUDE) {
+		error = EINVAL;
+		goto out_imo_locked;
+	}
+
+	/*
+	 * Deal with error cases up-front:
+	 *  Asked to block, but already blocked; or
+	 *  Asked to unblock, but nothing to unblock.
+	 * If adding a new block entry, allocate it.
+	 */
+	ims = imo_match_source(imo, idx, &ssa->sa);
+	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
+		IGMP_PRINTF(("%s: source %s %spresent\n", __func__,
+		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "));
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	if (doblock) {
+		IGMP_PRINTF(("%s: %s source\n", __func__, "block"));
+		ims = imf_graft(imf, fmode, &ssa->sin);
+		if (ims == NULL)
+			error = ENOMEM;
+	} else {
+		IGMP_PRINTF(("%s: %s source\n", __func__, "allow"));
+		error = imf_prune(imf, &ssa->sin);
+	}
+
+	if (error) {
+		IGMP_PRINTF(("%s: merge imf state failed\n", __func__));
+		goto out_imf_rollback;
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	INM_LOCK(inm);
+	IGMP_PRINTF(("%s: merge inm state\n", __func__));
+	error = inm_merge(inm, imf);
+	if (error) {
+		IGMP_PRINTF(("%s: failed to merge inm state\n", __func__));
+		INM_UNLOCK(inm);
+		goto out_imf_rollback;
+	}
+
+	IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+	error = igmp_change_state(inm);
+	INM_UNLOCK(inm);
+#if IGMP_DEBUG
+	if (error)
+		IGMP_PRINTF(("%s: failed igmp downcall\n", __func__));
+#endif
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+out_imo_locked:
+	IMO_UNLOCK(imo);
+	IMO_REMREF(imo);	/* from inp_findmoptions() */
+	return (error);
+}
+
+/*
+ * Given an inpcb, return its multicast options structure pointer.
+ *
+ * Caller is responsible for locking the inpcb, and releasing the
+ * extra reference held on the imo, upon a successful return.
+ */
+static struct ip_moptions *
+inp_findmoptions(struct inpcb *inp)
+{
+	struct ip_moptions	 *imo;
+	struct in_multi		**immp;
+	struct in_mfilter	 *imfp;
+	size_t			  idx;
+
+	if ((imo = inp->inp_moptions) != NULL) {
+		IMO_ADDREF(imo);	/* for caller */
+		return (imo);
+	}
+
+	imo = ip_allocmoptions(M_WAITOK);
+	if (imo == NULL)
+		return (NULL);
+
+	immp = _MALLOC(sizeof (*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
+	    M_WAITOK | M_ZERO);
+	if (immp == NULL) {
+		IMO_REMREF(imo);
+		return (NULL);
+	}
+
+	imfp = _MALLOC(sizeof (struct in_mfilter) * IP_MIN_MEMBERSHIPS,
+	    M_INMFILTER, M_WAITOK | M_ZERO);
+	if (imfp == NULL) {
+		_FREE(immp, M_IPMOPTS);
+		IMO_REMREF(imo);
+		return (NULL);
+	}
+
+	imo->imo_multicast_ifp = NULL;
+	imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	imo->imo_multicast_vif = -1;
+	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+	imo->imo_multicast_loop = in_mcast_loop;
+	imo->imo_num_memberships = 0;
+	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+	imo->imo_membership = immp;
+
+	/* Initialize per-group source filters. */
+	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
+		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
+
+	imo->imo_mfilters = imfp;
+	inp->inp_moptions = imo; /* keep reference from ip_allocmoptions() */
+	IMO_ADDREF(imo);	/* for caller */
+
+	return (imo);
+}
+/*
+ * Atomically get source filters on a socket for an IPv4 multicast group.
+ */
+static int
+inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq64	msfr, msfr64;
+	struct __msfilterreq32	msfr32;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	struct in_mfilter	*imf;
+	struct ip_msource	*ims;
+	struct in_msource	*lims;
+	struct sockaddr_in	*psin;
+	struct sockaddr_storage	*ptss;
+	struct sockaddr_storage	*tss;
+	int			 error;
+	size_t			 idx, nsrcs, ncsrcs;
+	user_addr_t 		 tmp_ptr;
+
+	imo = inp->inp_moptions;
+	VERIFY(imo != NULL);
+
+	if (IS_64BIT_PROCESS(current_proc())) {
+		error = sooptcopyin(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64),
+		    sizeof(struct __msfilterreq64));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr64, sizeof(msfr));
+	} else {
+		error = sooptcopyin(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32),
+		    sizeof(struct __msfilterreq32));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr32, sizeof(msfr));
+	}
+
+	ifnet_head_lock_shared();
+	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
+		ifnet_head_done();
+		return (EADDRNOTAVAIL);
+	}
+
+	ifp = ifindex2ifnet[msfr.msfr_ifindex];
+	ifnet_head_done();
+
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+		
+	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
+		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
+
+	IMO_LOCK(imo);
+	/*
+	 * Lookup group on the socket.
+	 */
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->imo_mfilters == NULL) {
+		IMO_UNLOCK(imo);
+		return (EADDRNOTAVAIL);
+	}
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * Ignore memberships which are in limbo.
+	 */
+	if (imf->imf_st[1] == MCAST_UNDEFINED) {
+		IMO_UNLOCK(imo);
+		return (EAGAIN);
+	}
+	msfr.msfr_fmode = imf->imf_st[1];
+
+	/*
+	 * If the user specified a buffer, copy out the source filter
+	 * entries to userland gracefully.
+	 * We only copy out the number of entries which userland
+	 * has asked for, but we always tell userland how big the
+	 * buffer really needs to be.
+	 */
+
+	if (IS_64BIT_PROCESS(current_proc())) 
+		tmp_ptr = msfr64.msfr_srcs;
+	else
+		tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
+
+	tss = NULL;
+	if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
+		tss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK | M_ZERO);
+		if (tss == NULL) {
+			IMO_UNLOCK(imo);
+			return (ENOBUFS);
+		}
+	}
+
+	/*
+	 * Count number of sources in-mode at t0.
+	 * If buffer space exists and remains, copy out source entries.
+	 */
+	nsrcs = msfr.msfr_nsrcs;
+	ncsrcs = 0;
+	ptss = tss;
+	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
+		lims = (struct in_msource *)ims;
+		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
+		    lims->imsl_st[0] != imf->imf_st[0])
+			continue;
+		if (tss != NULL && nsrcs > 0) {
+			psin = (struct sockaddr_in *)ptss;
+			psin->sin_family = AF_INET;
+			psin->sin_len = sizeof(struct sockaddr_in);
+			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
+			psin->sin_port = 0;
+			++ptss;
+			--nsrcs;
+			++ncsrcs;
+		}
+	}
+
+	IMO_UNLOCK(imo);
+
+	if (tss != NULL) {
+		error = copyout(tss, tmp_ptr,
+		    sizeof(struct sockaddr_storage) * ncsrcs);
+		FREE(tss, M_TEMP);
+		if (error)
+			return (error);
+	}
+
+	msfr.msfr_nsrcs = ncsrcs;
+	if (IS_64BIT_PROCESS(current_proc())) {
+		msfr64.msfr_ifindex = msfr.msfr_ifindex;
+		msfr64.msfr_fmode   = msfr.msfr_fmode;
+		msfr64.msfr_nsrcs   = msfr.msfr_nsrcs;
+		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+		    sizeof(struct sockaddr_storage));
+		error = sooptcopyout(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64));
+	} else {
+		msfr32.msfr_ifindex = msfr.msfr_ifindex;
+		msfr32.msfr_fmode   = msfr.msfr_fmode;
+		msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
+		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+		    sizeof(struct sockaddr_storage));
+		error = sooptcopyout(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32));
+	}
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_mreqn		 mreqn;
+	struct ip_moptions	*imo;
+	struct ifnet		*ifp;
+	struct in_ifaddr	*ia;
+	int			 error, optval;
+	unsigned int		 ifindex;
+	u_char			 coptval;
+
+	imo = inp->inp_moptions;
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
+		return (EOPNOTSUPP);
+	}
+
+	error = 0;
+	switch (sopt->sopt_name) {
+#ifdef MROUTING
+	case IP_MULTICAST_VIF:
+		if (imo != NULL) {
+			IMO_LOCK(imo);
+			optval = imo->imo_multicast_vif;
+			IMO_UNLOCK(imo);
+		} else
+			optval = -1;
+		error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+#endif /* MROUTING */
+
+	case IP_MULTICAST_IF:
+		memset(&mreqn, 0, sizeof(struct ip_mreqn));
+		if (imo != NULL) {
+			IMO_LOCK(imo);
+			ifp = imo->imo_multicast_ifp;
+			if (!in_nullhost(imo->imo_multicast_addr)) {
+				mreqn.imr_address = imo->imo_multicast_addr;
+			} else if (ifp != NULL) {
+				mreqn.imr_ifindex = ifp->if_index;
+				IFP_TO_IA(ifp, ia);
+				if (ia != NULL) {
+					IFA_LOCK_SPIN(&ia->ia_ifa);
+					mreqn.imr_address =
+					    IA_SIN(ia)->sin_addr;
+					IFA_UNLOCK(&ia->ia_ifa);
+					IFA_REMREF(&ia->ia_ifa);
+				}
+			}
+			IMO_UNLOCK(imo);
+		}
+		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+			error = sooptcopyout(sopt, &mreqn,
+			    sizeof(struct ip_mreqn));
+		} else {
+			error = sooptcopyout(sopt, &mreqn.imr_address,
+			    sizeof(struct in_addr));
+		}
+		break;
+
+	case IP_MULTICAST_IFINDEX:
+		if (imo != NULL)
+			IMO_LOCK(imo);
+		if (imo == NULL || imo->imo_multicast_ifp == NULL) {
+			ifindex = 0;
+		} else {
+			ifindex = imo->imo_multicast_ifp->if_index;
+		}
+		if (imo != NULL)
+			IMO_UNLOCK(imo);
+		error = sooptcopyout(sopt, &ifindex, sizeof (ifindex));
+		break;
+
+	case IP_MULTICAST_TTL:
+		if (imo == NULL) 
+			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
+		else {
+			IMO_LOCK(imo);
+			optval = coptval = imo->imo_multicast_ttl;
+			IMO_UNLOCK(imo);
+		}
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MULTICAST_LOOP:
+		if (imo == 0)
+			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
+		else {
+			IMO_LOCK(imo);
+			optval = coptval = imo->imo_multicast_loop;
+			IMO_UNLOCK(imo);
+		}
+		if (sopt->sopt_valsize == sizeof(u_char))
+			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+		else
+			error = sooptcopyout(sopt, &optval, sizeof(int));
+		break;
+
+	case IP_MSFILTER:
+		if (imo == NULL) {
+			error = EADDRNOTAVAIL;
+		} else {
+			error = inp_get_source_filters(inp, sopt);
+		}
+		break;
+
+	default:
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Look up the ifnet to use for a multicast group membership,
+ * given the IPv4 address of an interface, and the IPv4 group address.
+ *
+ * This routine exists to support legacy multicast applications
+ * which do not understand that multicast memberships are scoped to
+ * specific physical links in the networking stack, or which need
+ * to join link-scope groups before IPv4 addresses are configured.
+ *
+ * If inp is non-NULL and is bound to an interface, use this socket's
+ * inp_boundif for any required routing table lookup.
+ *
+ * If the route lookup fails, attempt to use the first non-loopback
+ * interface with multicast capability in the system as a
+ * last resort. The legacy IPv4 ASM API requires that we do
+ * this in order to allow groups to be joined when the routing
+ * table has not yet been populated during boot.
+ *
+ * Returns NULL if no ifp could be found.
+ *
+ */
+static struct ifnet *
+inp_lookup_mcast_ifp(const struct inpcb *inp,
+    const struct sockaddr_in *gsin, const struct in_addr ina)
+{
+	struct ifnet	*ifp;
+	unsigned int	 ifindex = 0;
+
+	VERIFY(gsin->sin_family == AF_INET);
+	VERIFY(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)));
+
+	ifp = NULL;
+	if (!in_nullhost(ina)) {
+		struct in_addr new_ina;
+		memcpy(&new_ina, &ina, sizeof(struct in_addr));
+		ifp = ip_multicast_if(&new_ina, &ifindex);
+	} else {
+		struct route ro;
+		unsigned int ifscope = IFSCOPE_NONE;
+
+		if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
+			ifscope = inp->inp_boundif;
+
+		bzero(&ro, sizeof (ro));
+		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
+		rtalloc_scoped_ign(&ro, 0, ifscope);
+		if (ro.ro_rt != NULL) {
+			ifp = ro.ro_rt->rt_ifp;
+			VERIFY(ifp != NULL);
+			rtfree(ro.ro_rt);
+		} else {
+			struct in_ifaddr *ia;
+			struct ifnet *mifp;
+
+			mifp = NULL;
+			lck_rw_lock_shared(in_ifaddr_rwlock);
+			TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+				IFA_LOCK_SPIN(&ia->ia_ifa);
+				mifp = ia->ia_ifp;
+				IFA_UNLOCK(&ia->ia_ifa);
+				if (!(mifp->if_flags & IFF_LOOPBACK) &&
+				     (mifp->if_flags & IFF_MULTICAST)) {
+					ifp = mifp;
+					break;
+				}
+			}
+			lck_rw_done(in_ifaddr_rwlock);
+		}
+	}
+
+	return (ifp);
+}
+
+/*
+ * Join an IPv4 multicast group, possibly with a source.
+ *
+ * NB: sopt->sopt_val might point to the kernel address space. This means that
+ * we were called by the IPv6 stack due to the presence of an IPv6 v4 mapped
+ * address. In this scenario, sopt_p points to kernproc and sooptcopyin() will
+ * just issue an in-kernel memcpy.
+ */
+int
+inp_join_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_multi			*inm = NULL;
+	struct in_msource		*lims;
+	size_t				 idx;
+	int				 error, is_new;
+
+	ifp = NULL;
+	imf = NULL;
+	error = 0;
+	is_new = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP: {
+		struct ip_mreq_source	 mreqs;
+
+		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Do argument switcharoo from ip_mreq into
+			 * ip_mreq_source to avoid using two instances.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error) {
+			IGMP_PRINTF(("%s: error copyin IP_ADD_MEMBERSHIP/"
+			    "IP_ADD_SOURCE_MEMBERSHIP %d err=%d\n",
+			    __func__, sopt->sopt_name, error));
+			return (error);
+		}
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+
+		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+			return (EINVAL);
+
+		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
+		    mreqs.imr_interface);
+		IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp));
+		break;
+	}
+
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		/*
+		 * Overwrite the port field if present, as the sockaddr
+		 * being copied in may be matched with a binary comparison.
+		 */
+		gsa->sin.sin_port = 0;
+		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+			ssa->sin.sin_port = 0;
+		}
+
+		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+			return (EINVAL);
+
+		ifnet_head_lock_shared();
+		if (gsr.gsr_interface == 0 ||
+		    (u_int)if_index < gsr.gsr_interface) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+		ifp = ifindex2ifnet[gsr.gsr_interface];
+		ifnet_head_done();
+
+		break;
+
+	default:
+		IGMP_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EADDRNOTAVAIL);
+
+	imo = inp_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IMO_LOCK(imo);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1) {
+		is_new = 1;
+	} else {
+		inm = imo->imo_membership[idx];
+		imf = &imo->imo_mfilters[idx];
+		if (ssa->ss.ss_family != AF_UNSPEC) {
+			/*
+			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
+			 * is an error. On an existing inclusive membership,
+			 * it just adds the source to the filter list.
+			 */
+			if (imf->imf_st[1] != MCAST_INCLUDE) {
+				error = EINVAL;
+				goto out_imo_locked;
+			}
+			/*
+			 * Throw out duplicates.
+			 *
+			 * XXX FIXME: This makes a naive assumption that
+			 * even if entries exist for *ssa in this imf,
+			 * they will be rejected as dupes, even if they
+			 * are not valid in the current mode (in-mode).
+			 *
+			 * in_msource is transactioned just as for anything
+			 * else in SSM -- but note naive use of inm_graft()
+			 * below for allocating new filter entries.
+			 *
+			 * This is only an issue if someone mixes the
+			 * full-state SSM API with the delta-based API,
+			 * which is discouraged in the relevant RFCs.
+			 */
+			lims = imo_match_source(imo, idx, &ssa->sa);
+			if (lims != NULL /*&&
+			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
+				error = EADDRNOTAVAIL;
+				goto out_imo_locked;
+			}
+		} else {
+			/*
+			 * MCAST_JOIN_GROUP on an existing exclusive
+			 * membership is an error; return EADDRINUSE
+			 * to preserve 4.4BSD API idempotence, and
+			 * avoid tedious detour to code below.
+			 * NOTE: This is bending RFC 3678 a bit.
+			 *
+			 * On an existing inclusive membership, this is also
+			 * an error; if you want to change filter mode,
+			 * you must use the userland API setsourcefilter().
+			 * XXX We don't reject this for imf in UNDEFINED
+			 * state at t1, because allocation of a filter
+			 * is atomic with allocation of a membership.
+			 */
+			error = EINVAL;
+			/* See comments above for EADDRINUSE */
+			if (imf->imf_st[1] == MCAST_EXCLUDE)
+				error = EADDRINUSE;
+			goto out_imo_locked;
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	if (is_new) {
+		if (imo->imo_num_memberships == imo->imo_max_memberships) {
+			error = imo_grow(imo, 0);
+			if (error)
+				goto out_imo_locked;
+		}
+		/*
+		 * Allocate the new slot upfront so we can deal with
+		 * grafting the new source filter in same code path
+		 * as for join-source on existing membership.
+		 */
+		idx = imo->imo_num_memberships;
+		imo->imo_membership[idx] = NULL;
+		imo->imo_num_memberships++;
+		VERIFY(imo->imo_mfilters != NULL);
+		imf = &imo->imo_mfilters[idx];
+		VERIFY(RB_EMPTY(&imf->imf_sources));
+	}
+
+	/*
+	 * Graft new source into filter list for this inpcb's
+	 * membership of the group. The in_multi may not have
+	 * been allocated yet if this is a new membership, however,
+	 * the in_mfilter slot will be allocated and must be initialized.
+	 */
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		/* Membership starts in IN mode */
+		if (is_new) {
+			IGMP_PRINTF(("%s: new join w/source\n", __func__));
+			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
+		} else {
+			IGMP_PRINTF(("%s: %s source\n", __func__, "allow"));
+		}
+		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
+		if (lims == NULL) {
+			IGMP_PRINTF(("%s: merge imf state failed\n",
+			    __func__));
+			error = ENOMEM;
+			goto out_imo_free;
+		}
+	} else {
+		/* No address specified; Membership starts in EX mode */
+		if (is_new) {
+			IGMP_PRINTF(("%s: new join w/o source\n", __func__));
+			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+
+	if (is_new) {
+		VERIFY(inm == NULL);
+		error = in_joingroup(ifp, &gsa->sin.sin_addr, imf, &inm);
+		VERIFY(inm != NULL || error != 0);
+		if (error)
+			goto out_imo_free;
+		imo->imo_membership[idx] = inm;	/* from in_joingroup() */
+	} else {
+		IGMP_PRINTF(("%s: merge inm state\n", __func__));
+		INM_LOCK(inm);
+		error = inm_merge(inm, imf);
+		if (error) {
+			IGMP_PRINTF(("%s: failed to merge inm state\n",
+			    __func__));
+			INM_UNLOCK(inm);
+			goto out_imf_rollback;
+		}
+		IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+		error = igmp_change_state(inm);
+		INM_UNLOCK(inm);
+		if (error) {
+			IGMP_PRINTF(("%s: failed igmp downcall\n",
+			    __func__));
+			goto out_imf_rollback;
+		}
+	}
+
+out_imf_rollback:
+	if (error) {
+		imf_rollback(imf);
+		if (is_new)
+			imf_purge(imf);
+		else
+			imf_reap(imf);
+	} else {
+		imf_commit(imf);
+	}
+
+out_imo_free:
+	if (error && is_new) {
+		VERIFY(inm == NULL);
+		imo->imo_membership[idx] = NULL;
+		--imo->imo_num_memberships;
+	}
+
+out_imo_locked:
+	IMO_UNLOCK(imo);
+	IMO_REMREF(imo);	/* from inp_findmoptions() */
+	return (error);
+}
+
+/*
+ * Leave an IPv4 multicast group on an inpcb, possibly with a source.
+ *
+ * NB: sopt->sopt_val might point to the kernel address space. Refer to the
+ * block comment on top of inp_join_group() for more information.
+ */
+int
+inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	struct ip_mreq_source		 mreqs;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in_mfilter		*imf;
+	struct ip_moptions		*imo;
+	struct in_msource		*ims;
+	struct in_multi			*inm = NULL;
+	size_t				 idx;
+	int				 error, is_final;
+	unsigned int			 ifindex = 0;
+
+	ifp = NULL;
+	error = 0;
+	is_final = 1;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	switch (sopt->sopt_name) {
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq),
+			    sizeof(struct ip_mreq));
+			/*
+			 * Swap interface and sourceaddr arguments,
+			 * as ip_mreq and ip_mreq_source are laid
+			 * out differently.
+			 */
+			mreqs.imr_interface = mreqs.imr_sourceaddr;
+			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			error = sooptcopyin(sopt, &mreqs,
+			    sizeof(struct ip_mreq_source),
+			    sizeof(struct ip_mreq_source));
+		}
+		if (error)
+			return (error);
+
+		gsa->sin.sin_family = AF_INET;
+		gsa->sin.sin_len = sizeof(struct sockaddr_in);
+		gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+			ssa->sin.sin_family = AF_INET;
+			ssa->sin.sin_len = sizeof(struct sockaddr_in);
+			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+		}
+		/*
+		 * Attempt to look up hinted ifp from interface address.
+		 * Fallthrough with null ifp iff lookup fails, to
+		 * preserve 4.4BSD mcast API idempotence.
+		 * XXX NOTE WELL: The RFC 3678 API is preferred because
+		 * using an IPv4 address as a key is racy.
+		 */
+		if (!in_nullhost(mreqs.imr_interface))
+			ifp = ip_multicast_if(&mreqs.imr_interface, &ifindex);
+
+		IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n",
+		    __func__, inet_ntoa(mreqs.imr_interface), ifp));
+
+		break;
+
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin.sin_family != AF_INET ||
+		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
+			return (EINVAL);
+
+		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			if (ssa->sin.sin_family != AF_INET ||
+			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
+				return (EINVAL);
+		}
+
+		ifnet_head_lock_shared();
+		if (gsr.gsr_interface == 0 ||
+		    (u_int)if_index < gsr.gsr_interface) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+
+		ifp = ifindex2ifnet[gsr.gsr_interface];
+		ifnet_head_done();
+		break;
+
+	default:
+		IGMP_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	/*
+	 * Find the membership in the membership array.
+	 */
+	imo = inp_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IMO_LOCK(imo);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1) {
+		error = EADDRNOTAVAIL;
+		goto out_locked;
+	}
+	inm = imo->imo_membership[idx];
+	imf = &imo->imo_mfilters[idx];
+
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		IGMP_PRINTF(("%s: opt=%d is_final=0\n", __func__,
+		    sopt->sopt_name));
+		is_final = 0;
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	/*
+	 * If we were instructed only to leave a given source, do so.
+	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
+	 */
+	if (is_final) {
+		imf_leave(imf);
+	} else {
+		if (imf->imf_st[0] == MCAST_EXCLUDE) {
+			error = EADDRNOTAVAIL;
+			goto out_locked;
+		}
+		ims = imo_match_source(imo, idx, &ssa->sa);
+		if (ims == NULL) {
+			IGMP_PRINTF(("%s: source %s %spresent\n", __func__,
+			    inet_ntoa(ssa->sin.sin_addr), "not "));
+			error = EADDRNOTAVAIL;
+			goto out_locked;
+		}
+		IGMP_PRINTF(("%s: %s source\n", __func__, "block"));
+		error = imf_prune(imf, &ssa->sin);
+		if (error) {
+			IGMP_PRINTF(("%s: merge imf state failed\n",
+			    __func__));
+			goto out_locked;
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+
+	if (is_final) {
+		/*
+		 * Give up the multicast address record to which
+		 * the membership points.  Reference held in imo
+		 * will be released below.
+		 */
+		(void) in_leavegroup(inm, imf);
+	} else {
+		IGMP_PRINTF(("%s: merge inm state\n", __func__));
+		INM_LOCK(inm);
+		error = inm_merge(inm, imf);
+		if (error) {
+			IGMP_PRINTF(("%s: failed to merge inm state\n",
+			    __func__));
+			INM_UNLOCK(inm);
+			goto out_imf_rollback;
+		}
+
+		IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+		error = igmp_change_state(inm);
+		if (error) {
+			IGMP_PRINTF(("%s: failed igmp downcall\n", __func__));
+		}
+		INM_UNLOCK(inm);
+	}
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+	if (is_final) {
+		/* Remove the gap in the membership and filter array. */
+		VERIFY(inm == imo->imo_membership[idx]);
+		imo->imo_membership[idx] = NULL;
+		INM_REMREF(inm);
+		for (++idx; idx < imo->imo_num_memberships; ++idx) {
+			imo->imo_membership[idx-1] = imo->imo_membership[idx];
+			imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
+		}
+		imo->imo_num_memberships--;
+	}
+
+out_locked:
+	IMO_UNLOCK(imo);
+	IMO_REMREF(imo);	/* from inp_findmoptions() */
+	return (error);
+}
+
+/*
+ * Select the interface for transmitting IPv4 multicast datagrams.
+ *
+ * Either an instance of struct in_addr or an instance of struct ip_mreqn
+ * may be passed to this socket option. An address of INADDR_ANY or an
+ * interface index of 0 is used to remove a previous selection.
+ * When no interface is selected, one is chosen for every send.
+ */
+static int
+inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct in_addr		 addr;
+	struct ip_mreqn		 mreqn;
+	struct ifnet		*ifp;
+	struct ip_moptions	*imo;
+	int			 error = 0 ;
+	unsigned int		 ifindex = 0;
+
+	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+		/*
+		 * An interface index was specified using the
+		 * Linux-derived ip_mreqn structure.
+		 */
+		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
+		    sizeof(struct ip_mreqn));
+		if (error)
+			return (error);
+
+		ifnet_head_lock_shared();
+		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex) {
+			ifnet_head_done();
+			return (EINVAL);
+		}
+
+		if (mreqn.imr_ifindex == 0) {
+			ifp = NULL;
+		} else {
+			ifp = ifindex2ifnet[mreqn.imr_ifindex];
+			if (ifp == NULL) {
+				ifnet_head_done();
+				return (EADDRNOTAVAIL);
+			}
+		}
+		ifnet_head_done();
+	} else {
+		/*
+		 * An interface was specified by IPv4 address.
+		 * This is the traditional BSD usage.
+		 */
+		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
+		    sizeof(struct in_addr));
+		if (error)
+			return (error);
+		if (in_nullhost(addr)) {
+			ifp = NULL;
+		} else {
+			ifp = ip_multicast_if(&addr, &ifindex);
+			if (ifp == NULL) {
+				IGMP_PRINTF(("%s: can't find ifp for addr=%s\n",
+				    __func__, inet_ntoa(addr)));
+				return (EADDRNOTAVAIL);
+			}
+		}
+#ifdef IGMP_DEBUG0
+		IGMP_PRINTF(("%s: ifp = %p, addr = %s\n", __func__, ifp,
+		    inet_ntoa(addr)));
+#endif
+	}
+
+	/* Reject interfaces which do not support multicast. */
+	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EOPNOTSUPP);
+
+	imo = inp_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IMO_LOCK(imo);
+	imo->imo_multicast_ifp = ifp;
+	if (ifindex)
+		imo->imo_multicast_addr = addr;
+	else
+		imo->imo_multicast_addr.s_addr = INADDR_ANY;
+	IMO_UNLOCK(imo);
+	IMO_REMREF(imo);	/* from inp_findmoptions() */
+
+	return (0);
+}
+
+/*
+ * Atomically set source filters on a socket for an IPv4 multicast group.
+ */
+static int
+inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq64	 msfr, msfr64;
+	struct __msfilterreq32	 msfr32;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct in_mfilter	*imf;
+	struct ip_moptions	*imo;
+	struct in_multi		*inm;
+	size_t		 	 idx;
+	int			 error;
+	user_addr_t 		 tmp_ptr;
+
+	if (IS_64BIT_PROCESS(current_proc())) {
+		error = sooptcopyin(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64),
+		    sizeof(struct __msfilterreq64));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr64, sizeof(msfr));
+	} else {
+		error = sooptcopyin(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32),
+		    sizeof(struct __msfilterreq32));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr32, sizeof(msfr));
+	}
+
+	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
+		return (ENOBUFS);
+
+	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
+	     msfr.msfr_fmode != MCAST_INCLUDE))
+		return (EINVAL);
+
+	if (msfr.msfr_group.ss_family != AF_INET ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+		return (EINVAL);
+
+	gsa->sin.sin_port = 0;	/* ignore port */
+
+	ifnet_head_lock_shared();
+	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
+		ifnet_head_done();
+		return (EADDRNOTAVAIL);
+	}
+
+	ifp = ifindex2ifnet[msfr.msfr_ifindex];
+	ifnet_head_done();
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+
+	/*
+	 * Check if this socket is a member of this group.
+	 */
+	imo = inp_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IMO_LOCK(imo);
+	idx = imo_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->imo_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+	inm = imo->imo_membership[idx];
+	imf = &imo->imo_mfilters[idx];
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	imf->imf_st[1] = msfr.msfr_fmode;
+
+	/*
+	 * Apply any new source filters, if present.
+	 * Make a copy of the user-space source vector so
+	 * that we may copy them with a single copyin. This
+	 * allows us to deal with page faults up-front.
+	 */
+	if (msfr.msfr_nsrcs > 0) {
+		struct in_msource	*lims;
+		struct sockaddr_in	*psin;
+		struct sockaddr_storage	*kss, *pkss;
+		int			 i;
+
+		if (IS_64BIT_PROCESS(current_proc()))
+			tmp_ptr = msfr64.msfr_srcs;
+		else
+			tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
+
+		IGMP_PRINTF(("%s: loading %lu source list entries\n",
+		    __func__, (unsigned long)msfr.msfr_nsrcs));
+		kss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK);
+		if (kss == NULL) {
+			error = ENOMEM;
+			goto out_imo_locked;
+		}
+		error = copyin(tmp_ptr, kss,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		if (error) {
+			FREE(kss, M_TEMP);
+			goto out_imo_locked;
+		}
+
+		/*
+		 * Mark all source filters as UNDEFINED at t1.
+		 * Restore new group filter mode, as imf_leave()
+		 * will set it to INCLUDE.
+		 */
+		imf_leave(imf);
+		imf->imf_st[1] = msfr.msfr_fmode;
+
+		/*
+		 * Update socket layer filters at t1, lazy-allocating
+		 * new entries. This saves a bunch of memory at the
+		 * cost of one RB_FIND() per source entry; duplicate
+		 * entries in the msfr_nsrcs vector are ignored.
+		 * If we encounter an error, rollback transaction.
+		 *
+		 * XXX This too could be replaced with a set-symmetric
+		 * difference like loop to avoid walking from root
+		 * every time, as the key space is common.
+		 */
+		for (i = 0, pkss = kss; (u_int)i < msfr.msfr_nsrcs;
+		    i++, pkss++) {
+			psin = (struct sockaddr_in *)pkss;
+			if (psin->sin_family != AF_INET) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+			if (psin->sin_len != sizeof(struct sockaddr_in)) {
+				error = EINVAL;
+				break;
+			}
+			error = imf_get_source(imf, psin, &lims);
+			if (error)
+				break;
+			lims->imsl_st[1] = imf->imf_st[1];
+		}
+		FREE(kss, M_TEMP);
+	}
+
+	if (error)
+		goto out_imf_rollback;
+
+	/*
+	 * Begin state merge transaction at IGMP layer.
+	 */
+	INM_LOCK(inm);
+	IGMP_PRINTF(("%s: merge inm state\n", __func__));
+	error = inm_merge(inm, imf);
+	if (error) {
+		IGMP_PRINTF(("%s: failed to merge inm state\n", __func__));
+		INM_UNLOCK(inm);
+		goto out_imf_rollback;
+	}
+
+	IGMP_PRINTF(("%s: doing igmp downcall\n", __func__));
+	error = igmp_change_state(inm);
+	INM_UNLOCK(inm);
+#ifdef IGMP_DEBUG
+	if (error)
+		IGMP_PRINTF(("%s: failed igmp downcall\n", __func__));
+#endif
+
+out_imf_rollback:
+	if (error)
+		imf_rollback(imf);
+	else
+		imf_commit(imf);
+
+	imf_reap(imf);
+
+out_imo_locked:
+	IMO_UNLOCK(imo);
+	IMO_REMREF(imo);	/* from inp_findmoptions() */
+
+	return (error);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ *
+ * Many of the socket options handled in this function duplicate the
+ * functionality of socket options in the regular unicast API. However,
+ * it is not possible to merge the duplicate code, because the idempotence
+ * of the IPv4 multicast part of the BSD Sockets API must be preserved;
+ * the effects of these options must be treated as separate and distinct.
+ *
+ * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
+ * is refactored to no longer use vifs.
+ */
+int
+inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip_moptions	*imo;
+	int			 error;
+	unsigned int		 ifindex;
+	struct ifnet		*ifp;
+
+	error = 0;
+
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+		return (EOPNOTSUPP);
+
+	switch (sopt->sopt_name) {
+#if MROUTING
+	case IP_MULTICAST_VIF: {
+		int vifi;
+		/*
+		 * Select a multicast VIF for transmission.
+		 * Only useful if multicast forwarding is active.
+		 */
+		if (legal_vif_num == NULL) {
+			error = EOPNOTSUPP;
+			break;
+		}
+		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
+		if (error)
+			break;
+		if (!legal_vif_num(vifi) && (vifi != -1)) {
+			error = EINVAL;
+			break;
+		}
+		imo = inp_findmoptions(inp);
+		if (imo == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		IMO_LOCK(imo);
+		imo->imo_multicast_vif = vifi;
+		IMO_UNLOCK(imo);
+		IMO_REMREF(imo);	/* from inp_findmoptions() */
+		break;
+	}
+#endif
+	case IP_MULTICAST_IF:
+		error = inp_set_multicast_if(inp, sopt);
+		break;
+
+	case IP_MULTICAST_IFINDEX:
+		/*
+		 * Select the interface for outgoing multicast packets.
+		 */
+		error = sooptcopyin(sopt, &ifindex, sizeof (ifindex),
+		    sizeof (ifindex));
+		if (error)
+			break;
+
+		imo = inp_findmoptions(inp);
+		if (imo == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		/*
+		 * Index 0 is used to remove a previous selection.
+		 * When no interface is selected, a default one is
+		 * chosen every time a multicast packet is sent.
+		 */
+		if (ifindex == 0) {
+			IMO_LOCK(imo);
+			imo->imo_multicast_ifp = NULL;
+			IMO_UNLOCK(imo);
+			IMO_REMREF(imo);	/* from inp_findmoptions() */
+			break;
+		}
+
+		ifnet_head_lock_shared();
+		/* Don't need to check is ifindex is < 0 since it's unsigned */
+		if ((unsigned int)if_index < ifindex) {
+			ifnet_head_done();
+			IMO_REMREF(imo);	/* from inp_findmoptions() */
+			error = ENXIO;	/* per IPV6_MULTICAST_IF */
+			break;
+		}
+		ifp = ifindex2ifnet[ifindex];
+		ifnet_head_done();
+
+		/* If it's detached or isn't a multicast interface, bail out */
+		if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
+			IMO_REMREF(imo);	/* from inp_findmoptions() */
+			error = EADDRNOTAVAIL;
+			break;
+		}
+		IMO_LOCK(imo);
+		imo->imo_multicast_ifp = ifp;
+		/*
+		 * Clear out any remnants of past IP_MULTICAST_IF.  The addr
+		 * isn't really used anywhere in the kernel; we could have
+		 * iterated thru the addresses of the interface and pick one
+		 * here, but that is redundant since ip_getmoptions() already
+		 * takes care of that for INADDR_ANY.
+		 */
+		imo->imo_multicast_addr.s_addr = INADDR_ANY;
+		IMO_UNLOCK(imo);
+		IMO_REMREF(imo);	/* from inp_findmoptions() */
+		break;
+
+	case IP_MULTICAST_TTL: {
+		u_char ttl;
+
+		/*
+		 * Set the IP time-to-live for outgoing multicast packets.
+		 * The original multicast API required a char argument,
+		 * which is inconsistent with the rest of the socket API.
+		 * We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int ittl;
+
+			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
+			    sizeof(u_int));
+			if (error)
+				break;
+			if (ittl > 255) {
+				error = EINVAL;
+				break;
+			}
+			ttl = (u_char)ittl;
+		}
+		imo = inp_findmoptions(inp);
+		if (imo == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		IMO_LOCK(imo);
+		imo->imo_multicast_ttl = ttl;
+		IMO_UNLOCK(imo);
+		IMO_REMREF(imo);	/* from inp_findmoptions() */
+		break;
+	}
+
+	case IP_MULTICAST_LOOP: {
+		u_char loop;
+
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.  The original multicast API required a
+		 * char argument, which is inconsistent with the rest
+		 * of the socket API.  We allow either a char or an int.
+		 */
+		if (sopt->sopt_valsize == sizeof(u_char)) {
+			error = sooptcopyin(sopt, &loop, sizeof(u_char),
+			    sizeof(u_char));
+			if (error)
+				break;
+		} else {
+			u_int iloop;
+
+			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
+					    sizeof(u_int));
+			if (error)
+				break;
+			loop = (u_char)iloop;
+		}
+		imo = inp_findmoptions(inp);
+		if (imo == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		IMO_LOCK(imo);
+		imo->imo_multicast_loop = !!loop;
+		IMO_UNLOCK(imo);
+		IMO_REMREF(imo);	/* from inp_findmoptions() */
+		break;
+	}
+
+	case IP_ADD_MEMBERSHIP:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		error = inp_join_group(inp, sopt);
+		break;
+
+	case IP_DROP_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		error = inp_leave_group(inp, sopt);
+		break;
+
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = inp_block_unblock_source(inp, sopt);
+		break;
+
+	case IP_MSFILTER:
+		error = inp_set_source_filters(inp, sopt);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Expose IGMP's multicast filter mode and source list(s) to userland,
+ * keyed by (ifindex, group).
+ * The filter mode is written out as a uint32_t, followed by
+ * 0..n of struct in_addr.
+ * For use by ifmcstat(8).
+ */
+static int
+sysctl_ip_mcast_filters SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+
+	struct in_addr			 src, group;
+	struct ifnet			*ifp;
+	struct in_multi			*inm;
+	struct in_multistep		step;
+	struct ip_msource		*ims;
+	int				*name;
+	int				 retval = 0;
+	u_int				 namelen;
+	uint32_t			 fmode, ifindex;
+
+	name = (int *)arg1;
+	namelen = (u_int)arg2;
+
+	if (req->newptr != USER_ADDR_NULL)
+		return (EPERM);
+
+	if (namelen != 2)
+		return (EINVAL);
+
+	ifindex = name[0];
+	ifnet_head_lock_shared();
+	if (ifindex <= 0 || ifindex > (u_int)if_index) {
+		IGMP_PRINTF(("%s: ifindex %u out of range\n",
+		    __func__, ifindex));
+		ifnet_head_done();
+		return (ENOENT);
+	}
+
+	group.s_addr = name[1];
+	if (!IN_MULTICAST(ntohl(group.s_addr))) {
+		IGMP_PRINTF(("%s: group %s is not multicast\n",
+		    __func__, inet_ntoa(group)));
+		ifnet_head_done();
+		return (EINVAL);
+	}
+
+	ifp = ifindex2ifnet[ifindex];
+	ifnet_head_done();
+	if (ifp == NULL) {
+		IGMP_PRINTF(("%s: no ifp for ifindex %u\n", __func__, ifindex));
+		return (ENOENT);
+	}
+
+	in_multihead_lock_shared();
+	IN_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		INM_LOCK(inm);
+		if (inm->inm_ifp != ifp)
+			goto next;
+
+		if (!in_hosteq(inm->inm_addr, group))
+			goto next;
+
+		fmode = inm->inm_st[1].iss_fmode;
+		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
+		if (retval != 0) {
+			INM_UNLOCK(inm);
+			break;		/* abort */
+		}
+		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
+#ifdef IGMP_DEBUG
+			struct in_addr ina;
+			ina.s_addr = htonl(ims->ims_haddr);
+			IGMP_PRINTF(("%s: visit node %s\n", __func__,
+			    inet_ntoa(ina)));
+#endif
+			/*
+			 * Only copy-out sources which are in-mode.
+			 */
+			if (fmode != ims_get_mode(inm, ims, 1)) {
+				IGMP_PRINTF(("%s: skip non-in-mode\n",
+				    __func__));
+				continue; /* process next source */
+			}
+			src.s_addr = htonl(ims->ims_haddr);
+			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
+			if (retval != 0)
+				break;	/* process next inm */
+		}
+next:
+		INM_UNLOCK(inm);
+		IN_NEXT_MULTI(step, inm);
+	}
+	in_multihead_lock_done();
+
+	return (retval);
+}
+
+/*
+ * XXX
+ * The whole multicast option thing needs to be re-thought.
+ * Several of these options are equally applicable to non-multicast
+ * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
+ * standard option (IP_TTL).
+ */
+/*
+ * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
+ */
+static struct ifnet *
+ip_multicast_if(struct in_addr *a, unsigned int *ifindexp)
+{
+	unsigned int ifindex;
+	struct ifnet *ifp;
+
+	if (ifindexp != NULL)
+		*ifindexp = 0;
+	if (ntohl(a->s_addr) >> 24 == 0) {
+		ifindex = ntohl(a->s_addr) & 0xffffff;
+		ifnet_head_lock_shared();
+		/* Don't need to check is ifindex is < 0 since it's unsigned */
+		if ((unsigned int)if_index < ifindex) {
+			ifnet_head_done();
+			return (NULL);
+		}
+		ifp = ifindex2ifnet[ifindex];
+		ifnet_head_done();
+		if (ifp != NULL && ifindexp != NULL)
+			*ifindexp = ifindex;
+	} else {
+		INADDR_TO_IFP(*a, ifp);
+	}
+	return (ifp);
+}
+
+void
+in_multi_init(void)
+{
+	PE_parse_boot_argn("ifa_debug", &inm_debug, sizeof (inm_debug));
+
+	/* Setup lock group and attribute for in_multihead */
+	in_multihead_lock_grp_attr = lck_grp_attr_alloc_init();
+	in_multihead_lock_grp = lck_grp_alloc_init("in_multihead",
+	    in_multihead_lock_grp_attr);
+	in_multihead_lock_attr = lck_attr_alloc_init();
+	lck_rw_init(&in_multihead_lock, in_multihead_lock_grp,
+	    in_multihead_lock_attr);
+
+	lck_mtx_init(&inm_trash_lock, in_multihead_lock_grp,
+	    in_multihead_lock_attr);
+	TAILQ_INIT(&inm_trash_head);
+
+	inm_size = (inm_debug == 0) ? sizeof (struct in_multi) :
+	    sizeof (struct in_multi_dbg);
+	inm_zone = zinit(inm_size, INM_ZONE_MAX * inm_size,
+	    0, INM_ZONE_NAME);
+	if (inm_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, INM_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(inm_zone, Z_EXPAND, TRUE);
+
+	ipms_size = sizeof (struct ip_msource);
+	ipms_zone = zinit(ipms_size, IPMS_ZONE_MAX * ipms_size,
+	    0, IPMS_ZONE_NAME);
+	if (ipms_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IPMS_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(ipms_zone, Z_EXPAND, TRUE);
+
+	inms_size = sizeof (struct in_msource);
+	inms_zone = zinit(inms_size, INMS_ZONE_MAX * inms_size,
+	    0, INMS_ZONE_NAME);
+	if (inms_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, INMS_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(inms_zone, Z_EXPAND, TRUE);
+}
+
+static struct in_multi *
+in_multi_alloc(int how)
+{
+	struct in_multi *inm;
+
+	inm = (how == M_WAITOK) ? zalloc(inm_zone) : zalloc_noblock(inm_zone);
+	if (inm != NULL) {
+		bzero(inm, inm_size);
+		lck_mtx_init(&inm->inm_lock, in_multihead_lock_grp,
+		    in_multihead_lock_attr);
+		inm->inm_debug |= IFD_ALLOC;
+		if (inm_debug != 0) {
+			inm->inm_debug |= IFD_DEBUG;
+			inm->inm_trace = inm_trace;
+		}
+	}
+	return (inm);
+}
+
+static void
+in_multi_free(struct in_multi *inm)
+{
+	INM_LOCK(inm);
+	if (inm->inm_debug & IFD_ATTACHED) {
+		panic("%s: attached inm=%p is being freed", __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_ifma != NULL) {
+		panic("%s: ifma not NULL for inm=%p", __func__, inm);
+		/* NOTREACHED */
+	} else if (!(inm->inm_debug & IFD_ALLOC)) {
+		panic("%s: inm %p cannot be freed", __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_refcount != 0) {
+		panic("%s: non-zero refcount inm=%p", __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_reqcnt != 0) {
+		panic("%s: non-zero reqcnt inm=%p", __func__, inm);
+		/* NOTREACHED */
+	}
+
+	/* Free any pending IGMPv3 state-change records */
+	IF_DRAIN(&inm->inm_scq);
+
+	inm->inm_debug &= ~IFD_ALLOC;
+	if ((inm->inm_debug & (IFD_DEBUG | IFD_TRASHED)) ==
+	    (IFD_DEBUG | IFD_TRASHED)) {
+		lck_mtx_lock(&inm_trash_lock);
+		TAILQ_REMOVE(&inm_trash_head, (struct in_multi_dbg *)inm,
+		    inm_trash_link);
+		lck_mtx_unlock(&inm_trash_lock);
+		inm->inm_debug &= ~IFD_TRASHED;
+	}
+	INM_UNLOCK(inm);
+
+	lck_mtx_destroy(&inm->inm_lock, in_multihead_lock_grp);
+	zfree(inm_zone, inm);
+}
+
+static void
+in_multi_attach(struct in_multi *inm)
+{
+	in_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
+	INM_LOCK_ASSERT_HELD(inm);
+
+	if (inm->inm_debug & IFD_ATTACHED) {
+		panic("%s: Attempt to attach an already attached inm=%p",
+		    __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_debug & IFD_TRASHED) {
+		panic("%s: Attempt to reattach a detached inm=%p",
+		    __func__, inm);
+		/* NOTREACHED */
+	}
+
+	inm->inm_reqcnt++;
+	VERIFY(inm->inm_reqcnt == 1);
+	INM_ADDREF_LOCKED(inm);
+	inm->inm_debug |= IFD_ATTACHED;
+	/*
+	 * Reattach case:  If debugging is enabled, take it
+	 * out of the trash list and clear IFD_TRASHED.
+	 */
+	if ((inm->inm_debug & (IFD_DEBUG | IFD_TRASHED)) ==
+	    (IFD_DEBUG | IFD_TRASHED)) {
+		/* Become a regular mutex, just in case */
+		INM_CONVERT_LOCK(inm);
+		lck_mtx_lock(&inm_trash_lock);
+		TAILQ_REMOVE(&inm_trash_head, (struct in_multi_dbg *)inm,
+		    inm_trash_link);
+		lck_mtx_unlock(&inm_trash_lock);
+		inm->inm_debug &= ~IFD_TRASHED;
+	}
+
+	LIST_INSERT_HEAD(&in_multihead, inm, inm_link);
+}
+
+int
+in_multi_detach(struct in_multi *inm)
+{
+	in_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
+	INM_LOCK_ASSERT_HELD(inm);
+
+	if (inm->inm_reqcnt == 0) {
+		panic("%s: inm=%p negative reqcnt", __func__, inm);
+		/* NOTREACHED */
+	}
+
+	--inm->inm_reqcnt;
+	if (inm->inm_reqcnt > 0)
+		return (0);
+
+	if (!(inm->inm_debug & IFD_ATTACHED)) {
+		panic("%s: Attempt to detach an unattached record inm=%p",
+		    __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_debug & IFD_TRASHED) {
+		panic("%s: inm %p is already in trash list", __func__, inm);
+		/* NOTREACHED */
+	}
+
+	/*
+	 * NOTE: Caller calls IFMA_REMREF
+	 */
+	inm->inm_debug &= ~IFD_ATTACHED;
+	LIST_REMOVE(inm, inm_link);
+
+	if (inm->inm_debug & IFD_DEBUG) {
+		/* Become a regular mutex, just in case */
+		INM_CONVERT_LOCK(inm);
+		lck_mtx_lock(&inm_trash_lock);
+		TAILQ_INSERT_TAIL(&inm_trash_head,
+		    (struct in_multi_dbg *)inm, inm_trash_link);
+		lck_mtx_unlock(&inm_trash_lock);
+		inm->inm_debug |= IFD_TRASHED;
+	}
+
+	return (1);
+}
+
+void
+inm_addref(struct in_multi *inm, int locked)
+{
+	if (!locked)
+		INM_LOCK_SPIN(inm);
+	else
+		INM_LOCK_ASSERT_HELD(inm);
+
+	if (++inm->inm_refcount == 0) {
+		panic("%s: inm=%p wraparound refcnt", __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_trace != NULL) {
+		(*inm->inm_trace)(inm, TRUE);
+	}
+	if (!locked)
+		INM_UNLOCK(inm);
+}
+
+void
+inm_remref(struct in_multi *inm, int locked)
+{
+	struct ifmultiaddr *ifma;
+	struct igmp_ifinfo *igi;
+
+	if (!locked)
+		INM_LOCK_SPIN(inm);
+	else
+		INM_LOCK_ASSERT_HELD(inm);
+
+	if (inm->inm_refcount == 0 || (inm->inm_refcount == 1 && locked)) {
+		panic("%s: inm=%p negative/missing refcnt", __func__, inm);
+		/* NOTREACHED */
+	} else if (inm->inm_trace != NULL) {
+		(*inm->inm_trace)(inm, FALSE);
+	}
+
+	--inm->inm_refcount;
+	if (inm->inm_refcount > 0) {
+		if (!locked)
+			INM_UNLOCK(inm);
+		return;
+	}
+
+	/*
+	 * Synchronization with in_getmulti().  In the event the inm has been
+	 * detached, the underlying ifma would still be in the if_multiaddrs
+	 * list, and thus can be looked up via if_addmulti().  At that point,
+	 * the only way to find this inm is via ifma_protospec.  To avoid
+	 * race conditions between the last inm_remref() of that inm and its
+	 * use via ifma_protospec, in_multihead lock is used for serialization.
+	 * In order to avoid violating the lock order, we must drop inm_lock
+	 * before acquiring in_multihead lock.  To prevent the inm from being
+	 * freed prematurely, we hold an extra reference.
+	 */
+	++inm->inm_refcount;
+	INM_UNLOCK(inm);
+	in_multihead_lock_shared();
+	INM_LOCK_SPIN(inm);
+	--inm->inm_refcount;
+	if (inm->inm_refcount > 0) {
+		/* We've lost the race, so abort since inm is still in use */
+		INM_UNLOCK(inm);
+		in_multihead_lock_done();
+		/* If it was locked, return it as such */
+		if (locked)
+			INM_LOCK(inm);
+		return;
+	}
+	inm_purge(inm);
+	ifma = inm->inm_ifma;
+	inm->inm_ifma = NULL;
+	inm->inm_ifp = NULL;
+	igi = inm->inm_igi;
+	inm->inm_igi = NULL;
+	INM_UNLOCK(inm);
+	IFMA_LOCK_SPIN(ifma);
+	ifma->ifma_protospec = NULL;
+	IFMA_UNLOCK(ifma);
+	in_multihead_lock_done();
+
+	in_multi_free(inm);
+	if_delmulti_ifma(ifma);
+	/* Release reference held to the underlying ifmultiaddr */
+	IFMA_REMREF(ifma);
+
+	if (igi != NULL)
+		IGI_REMREF(igi);
+}
+
+static void
+inm_trace(struct in_multi *inm, int refhold)
+{
+	struct in_multi_dbg *inm_dbg = (struct in_multi_dbg *)inm;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
+
+	if (!(inm->inm_debug & IFD_DEBUG)) {
+		panic("%s: inm %p has no debug structure", __func__, inm);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &inm_dbg->inm_refhold_cnt;
+		tr = inm_dbg->inm_refhold;
+	} else {
+		cnt = &inm_dbg->inm_refrele_cnt;
+		tr = inm_dbg->inm_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % INM_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
+void
+in_multihead_lock_exclusive(void)
+{
+	lck_rw_lock_exclusive(&in_multihead_lock);
+}
+
+void
+in_multihead_lock_shared(void)
+{
+	lck_rw_lock_shared(&in_multihead_lock);
+}
+
+void
+in_multihead_lock_assert(int what)
+{
+	lck_rw_assert(&in_multihead_lock, what);
+}
+
+void
+in_multihead_lock_done(void)
+{
+	lck_rw_done(&in_multihead_lock);
+}
+
+static struct ip_msource *
+ipms_alloc(int how)
+{
+	struct ip_msource *ims;
+
+	ims = (how == M_WAITOK) ? zalloc(ipms_zone) : zalloc_noblock(ipms_zone);
+	if (ims != NULL)
+		bzero(ims, ipms_size);
+
+	return (ims);
+}
+
+static void
+ipms_free(struct ip_msource *ims)
+{
+	zfree(ipms_zone, ims);
+}
+
+static struct in_msource *
+inms_alloc(int how)
+{
+	struct in_msource *inms;
+
+	inms = (how == M_WAITOK) ? zalloc(inms_zone) :
+	    zalloc_noblock(inms_zone);
+	if (inms != NULL)
+		bzero(inms, inms_size);
+
+	return (inms);
+}
+
+static void
+inms_free(struct in_msource *inms)
+{
+	zfree(inms_zone, inms);
+}
+
+#ifdef IGMP_DEBUG
+
+static const char *inm_modestrs[] = { "un\n", "in", "ex" };
+
+static const char *
+inm_mode_str(const int mode)
+{
+	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
+		return (inm_modestrs[mode]);
+	return ("??");
+}
+
+static const char *inm_statestrs[] = {
+	"not-member\n",
+	"silent\n",
+	"idle\n",
+	"lazy\n",
+	"sleeping\n",
+	"awakening\n",
+	"query-pending\n",
+	"sg-query-pending\n",
+	"leaving"
+};
+
+static const char *
+inm_state_str(const int state)
+{
+	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
+		return (inm_statestrs[state]);
+	return ("??");
+}
+
+/*
+ * Dump an in_multi structure to the console.
+ */
+void
+inm_print(const struct in_multi *inm)
+{
+	int t;
+
+	INM_LOCK_ASSERT_HELD(INM_CAST_TO_NONCONST(inm));
+
+	if (igmp_debug == 0)
+		return;
+
+	printf("%s: --- begin inm %p ---\n", __func__, inm);
+	printf("addr %s ifp %p(%s%d) ifma %p\n",
+	    inet_ntoa(inm->inm_addr),
+	    inm->inm_ifp,
+	    inm->inm_ifp->if_name,
+	    inm->inm_ifp->if_unit,
+	    inm->inm_ifma);
+	printf("timer %u state %s refcount %u scq.len %u\n",
+	    inm->inm_timer,
+	    inm_state_str(inm->inm_state),
+	    inm->inm_refcount,
+	    inm->inm_scq.ifq_len);
+	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
+	    inm->inm_igi,
+	    inm->inm_nsrc,
+	    inm->inm_sctimer,
+	    inm->inm_scrv);
+	for (t = 0; t < 2; t++) {
+		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
+		    inm_mode_str(inm->inm_st[t].iss_fmode),
+		    inm->inm_st[t].iss_asm,
+		    inm->inm_st[t].iss_ex,
+		    inm->inm_st[t].iss_in,
+		    inm->inm_st[t].iss_rec);
+	}
+	printf("%s: --- end inm %p ---\n", __func__, inm);
+}
+
+#else
+
+void
+inm_print(__unused const struct in_multi *inm)
+{
+
+}
+
+#endif
diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c
index 696222176..51eeca0b3 100644
--- a/bsd/netinet/in_pcb.c
+++ b/bsd/netinet/in_pcb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -75,6 +75,9 @@
 #endif
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <sys/kauth.h>
+#include <sys/priv.h>
 #include <libkern/OSAtomic.h>
 
 #include <machine/limits.h>
@@ -156,17 +159,17 @@ sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
 
 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports");
 
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
-SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 
 extern int	udp_use_randomport;
@@ -233,14 +236,17 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *
 	}
 	mac_inpcb_label_associate(so, inp);
 #endif
+	// make sure inp_stat is always 64bit aligned
+	inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t));
+	if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store)
+		+ sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) {
+		panic("insufficient space to align inp_stat");
+	}
+
 	so->so_pcb = (caddr_t)inp;
 
 	if (so->so_proto->pr_flags & PR_PCBLOCK) {
-		inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr);
-		if (inp->inpcb_mtx == NULL) {
-			printf("in_pcballoc: can't alloc mutex! so=%p\n", so);
-			return(ENOMEM);
-		}
+		lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr);
 	}
 
 #if IPSEC
@@ -297,7 +303,7 @@ in_pcblookup_local_and_cleanup(
 	if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
 		struct socket *so = inp->inp_socket;
 		
-		lck_mtx_lock(inp->inpcb_mtx);
+		lck_mtx_lock(&inp->inpcb_mtx);
 		
 		if (so->so_usecount == 0) {
 			if (inp->inp_state != INPCB_STATE_DEAD)
@@ -306,7 +312,7 @@ in_pcblookup_local_and_cleanup(
 			inp = NULL;
 		}
 		else {
-			lck_mtx_unlock(inp->inpcb_mtx);
+			lck_mtx_unlock(&inp->inpcb_mtx);
 		}
 	}
 	
@@ -324,6 +330,8 @@ in_pcb_conflict_post_msg(u_int16_t port)
 	struct kev_msg        ev_msg;
 	struct kev_in_portinuse	in_portinuse;
 
+	bzero(&in_portinuse, sizeof(struct kev_in_portinuse));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	in_portinuse.port = ntohs(port);	/* port in host order */
 	in_portinuse.req_pid = proc_selfpid();
 	ev_msg.vendor_code = KEV_VENDOR_APPLE;
@@ -344,7 +352,7 @@ in_pcb_conflict_post_msg(u_int16_t port)
  *		EACCES			Permission denied
  *		EADDRINUSE		Address in use
  *		EAGAIN			Resource unavailable, try again
- *		proc_suser:EPERM	Operation not permitted
+ *		priv_check_cred:EPERM	Operation not permitted
  */
 int
 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
@@ -356,6 +364,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 	u_short lport = 0, rand_port = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, randomport, conflict = 0;
+	kauth_cred_t cred;
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
@@ -366,6 +375,8 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 	socket_unlock(so, 0); /* keep reference on socket */
 	lck_rw_lock_exclusive(pcbinfo->mtx);
 	if (nam) {
+		unsigned int outif = 0;
+
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin)) {
 			lck_rw_done(pcbinfo->mtx);
@@ -403,7 +414,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 				return (EADDRNOTAVAIL);
 			}
 			else {
-				ifafree(ifa);
+				IFA_LOCK(ifa);
+				outif = ifa->ifa_ifp->if_index;
+				IFA_UNLOCK(ifa);
+				IFA_REMREF(ifa);
 			}
 		}
 		if (lport) {
@@ -411,10 +425,15 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 
 			/* GROSS */
 #if !CONFIG_EMBEDDED
-			if (ntohs(lport) < IPPORT_RESERVED && proc_suser(p)) {
-				lck_rw_done(pcbinfo->mtx);
-				socket_lock(so, 0);
-				return (EACCES);
+			if (ntohs(lport) < IPPORT_RESERVED) {
+				cred = kauth_cred_proc_ref(p);
+				error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+				kauth_cred_unref(&cred);
+				if (error != 0) {
+					lck_rw_done(pcbinfo->mtx);
+					socket_lock(so, 0);
+					return (EACCES);
+				}
 			}
 #endif
 			if (so->so_uid &&
@@ -487,6 +506,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 			}
 		}
 		inp->inp_laddr = sin->sin_addr;
+		inp->inp_last_outif = outif;
 	}
 	if (lport == 0) {
 		u_short first, last;
@@ -502,7 +522,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 			last  = ipport_hilastauto;
 			lastport = &pcbinfo->lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
-			if ((error = proc_suser(p)) != 0) {
+			cred = kauth_cred_proc_ref(p);
+			error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+			kauth_cred_unref(&cred);
+			if (error != 0) {
 				lck_rw_done(pcbinfo->mtx);
 				socket_lock(so, 0);
 				return error;
@@ -541,6 +564,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 					lck_rw_done(pcbinfo->mtx);
 					socket_lock(so, 0);
 					inp->inp_laddr.s_addr = INADDR_ANY;
+					inp->inp_last_outif = 0;
 					return (EADDRNOTAVAIL);
 				}
 				--*lastport;
@@ -564,6 +588,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 					lck_rw_done(pcbinfo->mtx);
 					socket_lock(so, 0);
 					inp->inp_laddr.s_addr = INADDR_ANY;
+					inp->inp_last_outif = 0;
 					return (EADDRNOTAVAIL);
 				}
 				++*lastport;
@@ -579,6 +604,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 	if (in_pcbinshash(inp, 1) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
+		inp->inp_last_outif = 0;
 		lck_rw_done(pcbinfo->mtx);
 		return (EAGAIN);
 	}
@@ -605,7 +631,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
  */
 int
 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
-	    struct sockaddr_in **plocal_sin)
+    struct sockaddr_in *plocal_sin, unsigned int *out_ifscope)
 {
 	struct in_ifaddr *ia;
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
@@ -619,6 +645,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 
 	lck_rw_lock_shared(in_ifaddr_rwlock);
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
+		ia = TAILQ_FIRST(&in_ifaddrhead);
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
@@ -629,21 +656,34 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define sintosa(sin)	((struct sockaddr *)(sin))
 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (sin->sin_addr.s_addr == INADDR_ANY)
-		    sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
+			sin->sin_addr = IA_SIN(ia)->sin_addr;
 		else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST &&
-		  (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST))
-		    sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr;
+		    (ia->ia_ifp->if_flags & IFF_BROADCAST))
+			sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
+		IFA_UNLOCK(&ia->ia_ifa);
+		ia = NULL;
 	}
 	lck_rw_done(in_ifaddr_rwlock);
 
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		struct route *ro;
-		unsigned int ifscope;
-
+		unsigned int ifscope = IFSCOPE_NONE;
+		unsigned int nocell;
+		/*
+		 * If the socket is bound to a specifc interface, the
+		  * optional scoped takes precedence over that if it
+		  * is set by the caller.
+		 */
 		ia = (struct in_ifaddr *)0;
-		ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-		    inp->inp_boundif : IFSCOPE_NONE;
+
+		if (out_ifscope != NULL && *out_ifscope != IFSCOPE_NONE)
+			ifscope = *out_ifscope;
+		else if (inp->inp_flags & INP_BOUND_IF)
+			ifscope = inp->inp_boundif;
+
+		nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 		/*
 		 * If route is known or can be allocated now,
 		 * our src addr is taken from the i/f, else punt.
@@ -672,10 +712,23 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
 				sin->sin_addr;
-			rtalloc_scoped_ign(ro, 0, ifscope);
+			rtalloc_scoped(ro, ifscope);
 			if (ro->ro_rt != NULL)
 				RT_LOCK_SPIN(ro->ro_rt);
 		}
+		/*
+		 * If the route points to a cellular interface and the
+		 * caller forbids our using interfaces of such type,
+		 * pretend that there is no route.
+		 */
+		if (nocell && ro->ro_rt != NULL) {
+			RT_LOCK_ASSERT_HELD(ro->ro_rt);
+			if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) {
+				RT_UNLOCK(ro->ro_rt);
+				rtfree(ro->ro_rt);
+				ro->ro_rt = NULL;
+			}
+		}
 		/*
 		 * If we found a route, use the address
 		 * corresponding to the outgoing interface
@@ -683,11 +736,13 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 		 * to our address on another net goes to loopback).
 		 */
 		if (ro->ro_rt != NULL) {
-			RT_LOCK_ASSERT_HELD(ro->ro_rt);
+			/* Become a regular mutex */
+			RT_CONVERT_LOCK(ro->ro_rt);
 			if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
 				ia = ifatoia(ro->ro_rt->rt_ifa);
-				if (ia)
-					ifaref(&ia->ia_ifa);
+				if (ia) {
+					IFA_ADDREF(&ia->ia_ifa);
+				}
 			}
 			RT_UNLOCK(ro->ro_rt);
 		}
@@ -705,9 +760,19 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 				lck_rw_lock_shared(in_ifaddr_rwlock);
 				ia = TAILQ_FIRST(&in_ifaddrhead);
 				if (ia)
-					ifaref(&ia->ia_ifa);
+					IFA_ADDREF(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 			}
+			/*
+			 * If the source address belongs to a cellular interface
+			 * and the socket forbids our using interfaces of such
+			 * type, pretend that there is no source address.
+			 */
+			if (nocell && ia != NULL &&
+			    ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
+				IFA_REMREF(&ia->ia_ifa);
+				ia = NULL;
+			}
 			if (ia == 0)
 				return (EADDRNOTAVAIL);
 		}
@@ -722,29 +787,37 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
+			IMO_LOCK(imo);
 			if (imo->imo_multicast_ifp != NULL && (ia == NULL ||
 				ia->ia_ifp != imo->imo_multicast_ifp)) {
 				ifp = imo->imo_multicast_ifp;
 				if (ia)
-					ifafree(&ia->ia_ifa);
+					IFA_REMREF(&ia->ia_ifa);
 				lck_rw_lock_shared(in_ifaddr_rwlock);
 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 					if (ia->ia_ifp == ifp)
 						break;
 				}
 				if (ia)
-					ifaref(&ia->ia_ifa);
+					IFA_ADDREF(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
-				if (ia == 0)
+				if (ia == 0) {
+					IMO_UNLOCK(imo);
 					return (EADDRNOTAVAIL);
+				}
 			}
+			IMO_UNLOCK(imo);
 		}
 		/*
 		 * Don't do pcblookup call here; return interface in plocal_sin
 		 * and exit to caller, that will do the lookup.
 		 */
-		*plocal_sin = &ia->ia_addr;
-		ifafree(&ia->ia_ifa);
+		IFA_LOCK_SPIN(&ia->ia_ifa);
+		*plocal_sin = ia->ia_addr;
+		if (out_ifscope != NULL)
+			*out_ifscope = ia->ia_ifp->if_index;
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 	}
 	return(0);
 }
@@ -757,9 +830,9 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
  * then pick one.
  */
 int
-in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
+in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, unsigned int *ifscope)
 {
-	struct sockaddr_in *ifaddr;
+	struct sockaddr_in ifaddr;
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct inpcb *pcb;
 	int error;
@@ -767,14 +840,23 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 	/*
 	 *   Call inner routine, to assign local interface address.
 	 */
-	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
+	if ((error = in_pcbladdr(inp, nam, &ifaddr, ifscope)) != 0)
 		return(error);
 
 	socket_unlock(inp->inp_socket, 0);
 	pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
-	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
+	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr,
 	    inp->inp_lport, 0, NULL);
 	socket_lock(inp->inp_socket, 0);
+
+	/* Check if the socket is still in a valid state. When we unlock this 
+	 * embryonic socket, it can get aborted if another thread is closing 
+	 * the listener (radar 7947600).
+	 */
+	if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) {
+		return ECONNREFUSED;
+	}
+
 	if (pcb != NULL) {
 		in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
 		return (EADDRINUSE);
@@ -791,7 +873,8 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 			lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
 			socket_lock(inp->inp_socket, 0);
 		}
-		inp->inp_laddr = ifaddr->sin_addr;
+		inp->inp_laddr = ifaddr.sin_addr;
+		inp->inp_last_outif = ifscope ? *ifscope : IFSCOPE_NONE;
 		inp->inp_flags |= INP_INADDR_ANY;
 	}
 	 else {
@@ -858,6 +941,7 @@ in_pcbdetach(struct inpcb *inp)
 #endif
 	if ((so->so_flags & SOF_PCBCLEARING) == 0) {
 		struct rtentry *rt;
+		struct ip_moptions *imo;
 
 		inp->inp_vflag = 0;
 		if (inp->inp_options) 
@@ -866,8 +950,10 @@ in_pcbdetach(struct inpcb *inp)
 			inp->inp_route.ro_rt = NULL;
 			rtfree(rt);
 		}
-		ip_freemoptions(inp->inp_moptions);
+		imo = inp->inp_moptions;
 		inp->inp_moptions = NULL;
+		if (imo != NULL)
+			IMO_REMREF(imo);
 		sofreelastref(so, 0);
 		inp->inp_state = INPCB_STATE_DEAD;
 		so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
@@ -886,9 +972,10 @@ in_pcbdispose(struct inpcb *inp)
 		printf("in_pcbdispose: not dead yet? so=%p\n", so);
 	}
 #endif
-
 	if (so && so->so_usecount != 0)
-		panic("in_pcbdispose: use count=%x so=%p\n", so->so_usecount, so);
+		panic("%s: so %p so_usecount %d so_lockhistory %s\n",
+			__func__, so, so->so_usecount,
+			(so != NULL) ? solockhistory_nr(so) : "--");
 
 	lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE);
 
@@ -909,8 +996,8 @@ in_pcbdispose(struct inpcb *inp)
 			}
 			if (so->so_head != NULL)
 				panic("in_pcbdispose, so=%p head still exist\n", so);
-  			lck_mtx_unlock(inp->inpcb_mtx);	
-  			lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp);	
+  			lck_mtx_unlock(&inp->inpcb_mtx);	
+  			lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp);	
 		}
 		so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */
 		so->so_saved_pcb = (caddr_t) inp;
@@ -1075,7 +1162,7 @@ in_losing(struct inpcb *inp)
 		if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) {
 			inp->inp_route.ro_rt = NULL;
 			rtfree(rt);
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		}
 		/*
 		 * A new route can be allocated
@@ -1099,7 +1186,7 @@ in_rtchange(struct inpcb *inp, __unused int errno)
 		if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
 			return; /* we can't remove the route now. not sure if still ok to use src */
 		}
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 		rtfree(rt);
 		inp->inp_route.ro_rt = NULL;
 		/*
@@ -1200,6 +1287,131 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 	}
 }
 
+/*
+ * Check if PCB exists in hash list.
+ */
+int
+in_pcblookup_hash_exists(
+	struct inpcbinfo *pcbinfo,
+	struct in_addr faddr,
+	u_int fport_arg,
+	struct in_addr laddr,
+	u_int lport_arg,
+	int wildcard,
+	uid_t *uid,
+	gid_t *gid,
+	__unused struct ifnet *ifp)
+{
+	struct inpcbhead *head;
+	struct inpcb *inp;
+	u_short fport = fport_arg, lport = lport_arg;
+	int found;
+
+	*uid = UID_MAX;
+	*gid = GID_MAX;
+       
+	/*
+	 * We may have found the pcb in the last lookup - check this first.
+	 */
+
+	lck_rw_lock_shared(pcbinfo->mtx);
+
+	/*
+	 * First look for an exact match.
+	 */
+	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+	    pcbinfo->hashmask)];
+	LIST_FOREACH(inp, head, inp_hash) {
+#if INET6
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_faddr.s_addr == faddr.s_addr &&
+		    inp->inp_laddr.s_addr == laddr.s_addr &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			if ((found = (inp->inp_socket != NULL))) {
+				/*
+				 * Found.
+				 */
+				*uid = inp->inp_socket->so_uid;
+				*gid = inp->inp_socket->so_gid;
+			}
+			lck_rw_done(pcbinfo->mtx);
+			return (found);
+		}
+	}
+	if (wildcard) {
+		struct inpcb *local_wild = NULL;
+#if INET6
+		struct inpcb *local_wild_mapped = NULL;
+#endif
+
+		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+		    pcbinfo->hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+#if INET6
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr == INADDR_ANY &&
+			    inp->inp_lport == lport) {
+#if defined(NFAITH) && NFAITH > 0
+				if (ifp && ifp->if_type == IFT_FAITH &&
+				    (inp->inp_flags & INP_FAITH) == 0)
+					continue;
+#endif
+				if (inp->inp_laddr.s_addr == laddr.s_addr) {
+					if ((found = (inp->inp_socket != NULL))) {
+						*uid = inp->inp_socket->so_uid;
+						*gid = inp->inp_socket->so_gid;
+					}
+					lck_rw_done(pcbinfo->mtx);
+					return (found);
+				}
+				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#if INET6
+					if (inp->inp_socket &&
+					    INP_CHECK_SOCKAF(inp->inp_socket,
+					    AF_INET6))
+						local_wild_mapped = inp;
+					else
+#endif /* INET6 */
+					local_wild = inp;
+				}
+			}
+		}
+		if (local_wild == NULL) {
+#if INET6
+			if (local_wild_mapped != NULL) {
+				if ((found = (local_wild_mapped->inp_socket != NULL))) {
+					*uid = local_wild_mapped->inp_socket->so_uid;
+					*gid = local_wild_mapped->inp_socket->so_gid;
+				}
+				lck_rw_done(pcbinfo->mtx);
+				return (found);
+			}
+#endif /* INET6 */
+			lck_rw_done(pcbinfo->mtx);
+			return (0);
+		}
+		if (local_wild != NULL) {
+			if ((found = (local_wild->inp_socket != NULL))) {
+				*uid = local_wild->inp_socket->so_uid;
+				*gid = local_wild->inp_socket->so_gid;
+			}
+			lck_rw_done(pcbinfo->mtx);
+			return (found);
+		}
+	}
+
+	/*
+	 * Not found.
+	 */
+	lck_rw_done(pcbinfo->mtx);
+	return (0);
+}
+
 /*
  * Lookup PCB in hash list.
  */
@@ -1336,10 +1548,15 @@ in_pcbinshash(struct inpcb *inp, int locked)
 
         if (!locked) {
                 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
-                /*lock inversion issue, mostly with udp multicast packets */
+                	/*lock inversion issue, mostly with udp multicast packets */
                         socket_unlock(inp->inp_socket, 0);
                         lck_rw_lock_exclusive(pcbinfo->mtx);
                         socket_lock(inp->inp_socket, 0);
+			if (inp->inp_state == INPCB_STATE_DEAD) {
+				/* The socket got dropped when it was unlocked */
+				lck_rw_done(pcbinfo->mtx);
+				return(ECONNABORTED);
+			}
                 }
         }
 
@@ -1458,6 +1675,7 @@ in_pcb_checkstate(struct inpcb *pcb, int mode, int locked)
 			if (locked == 0)
 				socket_lock(pcb->inp_socket, 1);
 			pcb->inp_state = INPCB_STATE_DEAD;
+
 stopusing:
 			if (pcb->inp_socket->so_usecount < 0)
 				panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket);
@@ -1569,25 +1787,26 @@ inpcb_to_xinpcb64(
         struct inpcb *inp,
         struct xinpcb64 *xinp)
 {
-        xinp->inp_fport = inp->inp_fport;
-        xinp->inp_lport = inp->inp_lport;
-        xinp->inp_gencnt = inp->inp_gencnt;
-        xinp->inp_flags = inp->inp_flags;
-        xinp->inp_flow = inp->inp_flow;
-        xinp->inp_vflag = inp->inp_vflag;
-        xinp->inp_ip_ttl = inp->inp_ip_ttl;
-        xinp->inp_ip_p = inp->inp_ip_p;
-        xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
-        xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
-        xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
-        xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
-        xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
+	xinp->inp_fport = inp->inp_fport;
+	xinp->inp_lport = inp->inp_lport;
+	xinp->inp_gencnt = inp->inp_gencnt;
+	xinp->inp_flags = inp->inp_flags;
+	xinp->inp_flow = inp->inp_flow;
+	xinp->inp_vflag = inp->inp_vflag;
+	xinp->inp_ip_ttl = inp->inp_ip_ttl;
+	xinp->inp_ip_p = inp->inp_ip_p;
+	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
+	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
+	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
+	xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
+	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
 	xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
-        xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
+	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
 }
 
 #endif /* !CONFIG_EMBEDDED */
 
+
 /*
  * The following routines implement this scheme:
  *
@@ -1619,7 +1838,7 @@ inp_route_copyout(struct inpcb *inp, struct route *dst)
 {
 	struct route *src = &inp->inp_route;
 
-	lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 
 	/*
 	 * If the route in the PCB is not for IPv4, blow it away;
@@ -1629,13 +1848,8 @@ inp_route_copyout(struct inpcb *inp, struct route *dst)
 		rtfree(src->ro_rt);
 		src->ro_rt = NULL;
 	}
-
-	/* Copy everything (rt, dst, flags) from PCB */
-	bcopy(src, dst, sizeof (*dst));
-
-	/* Hold one reference for the local copy of struct route */
-	if (dst->ro_rt != NULL)
-		RT_ADDREF(dst->ro_rt);
+	
+	route_copyout(dst, src, sizeof(*dst));
 }
 
 void
@@ -1643,33 +1857,61 @@ inp_route_copyin(struct inpcb *inp, struct route *src)
 {
 	struct route *dst = &inp->inp_route;
 
-	lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+	lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 
 	/* Minor sanity check */
 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
 		panic("%s: wrong or corrupted route: %p", __func__, src);
 
-	/* No cached route in the PCB? */
-	if (dst->ro_rt == NULL) {
-		/*
-		 * Copy everything (rt, dst, flags) from ip_output();
-		 * the reference to the route was held at the time
-		 * it was allocated and is kept intact.
-		 */
-		bcopy(src, dst, sizeof (*dst));
-	} else if (src->ro_rt != NULL) {
-		/*
-		 * If the same, update just the ro_flags and ditch the one
-		 * in the local copy.  Else ditch the one that is currently
-		 * cached, and cache what we got back from ip_output().
-		 */
-		if (dst->ro_rt == src->ro_rt) {
-			dst->ro_flags = src->ro_flags;
-			rtfree(src->ro_rt);
-			src->ro_rt = NULL;
-		} else {
-			rtfree(dst->ro_rt);
-			bcopy(src, dst, sizeof (*dst));
-		}
+	route_copyin(src, dst, sizeof(*src));
+}
+
+/*
+ * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option.
+ */
+void
+inp_bindif(struct inpcb *inp, unsigned int ifscope)
+{
+	/*
+	 * A zero interface scope value indicates an "unbind".
+	 * Otherwise, take in whatever value the app desires;
+	 * the app may already know the scope (or force itself
+	 * to such a scope) ahead of time before the interface
+	 * gets attached.  It doesn't matter either way; any
+	 * route lookup from this point on will require an
+	 * exact match for the embedded interface scope.
+	 */
+	inp->inp_boundif = ifscope;
+	if (inp->inp_boundif == IFSCOPE_NONE)
+		inp->inp_flags &= ~INP_BOUND_IF;
+	else
+		inp->inp_flags |= INP_BOUND_IF;
+
+	/* Blow away any cached route in the PCB */
+	if (inp->inp_route.ro_rt != NULL) {
+		rtfree(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = NULL;
+	}
+}
+
+/*
+ * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option.
+ */
+int
+inp_nocellular(struct inpcb *inp, unsigned int val)
+{
+	if (val) {
+		inp->inp_flags |= INP_NO_IFT_CELLULAR;
+	} else if (inp->inp_flags & INP_NO_IFT_CELLULAR) {
+		/* once set, it cannot be unset */
+		return (EINVAL);
 	}
+
+	/* Blow away any cached route in the PCB */
+	if (inp->inp_route.ro_rt != NULL) {
+		rtfree(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = NULL;
+	}
+
+	return (0);
 }
diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h
index a793f3a12..728b93e33 100644
--- a/bsd/netinet/in_pcb.h
+++ b/bsd/netinet/in_pcb.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -100,8 +100,8 @@ typedef	u_quad_t	inp_gen_t;
 
 /*
  * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
- * So, AF_INET6 null laddr is also used as AF_INET null laddr,
- * by utilize following structure. (At last, same as INRIA)
+ * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
+ * the following structure.
  */
 struct in_addr_4in6 {
 	u_int32_t	ia46_pad32[3];
@@ -120,6 +120,14 @@ struct	icmp6_filter;
 struct	label;
 #endif
 
+struct inp_stat
+{
+	u_int64_t	rxpackets;
+	u_int64_t	rxbytes;
+	u_int64_t	txpackets;
+	u_int64_t	txbytes;
+};
+
 struct inpcb {
 	LIST_ENTRY(inpcb) inp_hash;	/* hash list */
 	int		inp_wantcnt;		/* pcb wanted count. protected by pcb list lock */
@@ -127,7 +135,7 @@ struct inpcb {
 	u_short	inp_fport;		/* foreign port */
 	u_short	inp_lport;		/* local port */
 	LIST_ENTRY(inpcb) inp_list;	/* list for all PCBs of this proto */
-	caddr_t	inp_ppcb;		/* pointer to per-protocol pcb */
+	void	*inp_ppcb;		/* pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* PCB list info */
 	struct	socket *inp_socket;	/* back pointer to socket */
 	u_char	nat_owner;		/* Used to NAT TCP/UDP traffic */
@@ -187,16 +195,15 @@ struct inpcb {
 	int	hash_element;           /* Array index of pcb's hash list    */
 	caddr_t inp_saved_ppcb;		/* place to save pointer while cached */
 	struct inpcbpolicy *inp_sp;
-#ifdef _KERN_LOCKS_H_
-	lck_mtx_t *inpcb_mtx;	/* inpcb per-socket mutex */
-#else
-	void	  *inpcb_mtx;
-#endif
+	decl_lck_mtx_data( ,inpcb_mtx);	/* inpcb per-socket mutex */
 	unsigned int inp_boundif;	/* interface scope for INP_BOUND_IF */
-	u_int32_t inp_reserved[3];	/* reserved for future use */
+	unsigned int inp_last_outif;	/* last known outgoing interface */
+	u_int32_t inp_reserved[2];	/* reserved for future use */
 #if CONFIG_MACF_NET
 	struct label *inp_label;	/* MAC label */
 #endif
+	struct inp_stat	*inp_stat;
+	u_int8_t		inp_stat_store[sizeof(struct inp_stat) + sizeof(u_int64_t)];
 };
 
 #endif /* KERNEL_PRIVATE */
@@ -355,29 +362,70 @@ struct	xinpcb64 {
 	u_char			inp_vflag;
 	u_char			inp_ip_ttl;	/* time to live */
 	u_char			inp_ip_p;	/* protocol */
-        union {					/* foreign host table entry */
-                struct  in_addr_4in6	inp46_foreign;
-                struct  in6_addr	inp6_foreign;
-        }			inp_dependfaddr;
-        union {					/* local host table entry */
-                struct  in_addr_4in6	inp46_local;
-                struct  in6_addr	inp6_local;
-        }			inp_dependladdr;
-        struct {
-                u_char		inp4_ip_tos;	/* type of service */
-        }			inp_depend4;
-        struct {
-                u_int8_t        inp6_hlim;
-		int		inp6_cksum;
-                u_short		inp6_ifindex;
-                short   	inp6_hops;
-        }			inp_depend6;
-        struct  xsocket64       xi_socket;
+	union {					/* foreign host table entry */
+			struct  in_addr_4in6	inp46_foreign;
+			struct  in6_addr	inp6_foreign;
+	}			inp_dependfaddr;
+	union {					/* local host table entry */
+			struct  in_addr_4in6	inp46_local;
+			struct  in6_addr	inp6_local;
+	}			inp_dependladdr;
+	struct {
+			u_char		inp4_ip_tos;	/* type of service */
+	}			inp_depend4;
+	struct {
+			u_int8_t        inp6_hlim;
+	int		inp6_cksum;
+			u_short		inp6_ifindex;
+			short   	inp6_hops;
+	}			inp_depend6;
+	struct  xsocket64       xi_socket;
 	u_quad_t		xi_alignment_hack;
 };
 
 #endif /* !CONFIG_EMBEDDED */
 
+#ifdef PRIVATE
+
+struct xinpcb_list_entry {
+    u_int64_t   le_next;
+    u_int64_t   le_prev;
+};
+
+struct	xinpcb_n {
+	u_int32_t		xi_len;		/* length of this structure */
+	u_int32_t		xi_kind;		/* XSO_INPCB */
+	u_int64_t		xi_inpp;
+	u_short 		inp_fport;	/* foreign port */
+	u_short			inp_lport;	/* local port */
+	u_int64_t		inp_ppcb;	/* pointer to per-protocol pcb */
+	inp_gen_t		inp_gencnt;	/* generation count of this instance */
+	int				inp_flags;	/* generic IP/datagram flags */
+	u_int32_t		inp_flow;
+	u_char			inp_vflag;
+	u_char			inp_ip_ttl;	/* time to live */
+	u_char			inp_ip_p;	/* protocol */
+	union {					/* foreign host table entry */
+		struct  in_addr_4in6	inp46_foreign;
+		struct  in6_addr	inp6_foreign;
+	}				inp_dependfaddr;
+	union {					/* local host table entry */
+		struct  in_addr_4in6	inp46_local;
+		struct  in6_addr	inp6_local;
+	}				inp_dependladdr;
+	struct {
+		u_char		inp4_ip_tos;	/* type of service */
+	}				inp_depend4;
+	struct {
+		u_int8_t	inp6_hlim;
+		int			inp6_cksum;
+		u_short		inp6_ifindex;
+		short		inp6_hops;
+	}				inp_depend6;
+};
+
+#endif /* PRIVATE */
+
 struct	xinpgen {
 	u_int32_t xig_len;	/* length of this structure */
 	u_int	xig_count;	/* number of PCBs at this time */
@@ -419,6 +467,7 @@ struct	xinpgen {
 #define	in6p_ppcb	inp_ppcb  /* for KAME src sync over BSD*'s */
 #define	in6p_state	inp_state
 #define	in6p_wantcnt	inp_wantcnt
+#define	in6p_last_outif	inp_last_outif
 
 #ifdef KERNEL_PRIVATE
 struct inpcbport {
@@ -477,31 +526,36 @@ struct inpcbinfo {		/* XXX documentation, prefixes */
 #ifdef __APPLE__
 #define INP_STRIPHDR		0x200	/* Strip headers in raw_ip, for OT support */
 #endif
-#define  INP_FAITH			0x400   /* accept FAITH'ed connections */
+#define  INP_FAITH		0x400   /* accept FAITH'ed connections */
 #define  INP_INADDR_ANY 	0x800   /* local address wasn't specified */
 
 #define INP_RECVTTL		0x1000
 #define	INP_UDP_NOCKSUM		0x2000	/* Turn off outbound UDP checksum */
 #define	INP_BOUND_IF		0x4000	/* bind socket to an ifindex */
 
-#define IN6P_IPV6_V6ONLY	0x008000 /* restrict AF_INET6 socket for v6 */
-
-#define	IN6P_PKTINFO		0x010000 /* receive IP6 dst and I/F */
-#define	IN6P_HOPLIMIT		0x020000 /* receive hoplimit */
-#define	IN6P_HOPOPTS		0x040000 /* receive hop-by-hop options */
-#define	IN6P_DSTOPTS		0x080000 /* receive dst options after rthdr */
-#define	IN6P_RTHDR			0x100000 /* receive routing header */
+#define IN6P_IPV6_V6ONLY	0x8000 /* restrict AF_INET6 socket for v6 */
+#define	IN6P_PKTINFO		0x10000 /* receive IP6 dst and I/F */
+#define	IN6P_HOPLIMIT		0x20000 /* receive hoplimit */
+#define	IN6P_HOPOPTS		0x40000 /* receive hop-by-hop options */
+#define	IN6P_DSTOPTS		0x80000 /* receive dst options after rthdr */
+#define	IN6P_RTHDR		0x100000 /* receive routing header */
 #define	IN6P_RTHDRDSTOPTS	0x200000 /* receive dstoptions before rthdr */
-#define	IN6P_TCLASS			0x400000 /* receive traffic class value */
+#define	IN6P_TCLASS		0x400000 /* receive traffic class value */
 #define	IN6P_AUTOFLOWLABEL	0x800000 /* attach flowlabel automatically */
-#define	IN6P_BINDV6ONLY		0x10000000 /* do not grab IPv4 traffic */
+#define	IN6P_BINDV6ONLY		0x1000000 /* do not grab IPv4 traffic */
+#define	IN6P_RFC2292		0x2000000 /* used RFC2292 API on the socket */
+#define	IN6P_MTU		0x4000000 /* receive path MTU */
+#define	INP_PKTINFO		0x8000000 /* receive and send PKTINFO for IPv4 */
+
+#define	INP_NO_IFT_CELLULAR	0x20000000 /* do not use IFT_CELLULAR route */
 
 #ifdef KERNEL_PRIVATE
 #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
-				 INP_RECVIF|INP_RECVTTL|\
+				 INP_RECVIF|INP_RECVTTL|INP_PKTINFO|\
 				 IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
 				 IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
-				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL)
+				 IN6P_TCLASS|IN6P_RFC2292|IN6P_MTU)
+
 #define	INP_UNMAPPABLEOPTS	(IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\
 				 IN6P_TCLASS|IN6P_AUTOFLOWLABEL)
 
@@ -513,6 +567,7 @@ struct inpcbinfo {		/* XXX documentation, prefixes */
 #define	IN6P_MTUDISC		INP_MTUDISC
 #define	IN6P_FAITH		INP_FAITH
 #define	IN6P_CONTROLOPTS INP_CONTROLOPTS
+#define	IN6P_NO_IFT_CELLULAR	INP_NO_IFT_CELLULAR
 	/*
 	 * socket AF version is {newer than,or include}
 	 * actual datagram AF version
@@ -530,6 +585,7 @@ struct inpcbinfo {		/* XXX documentation, prefixes */
 #define	sotoin6pcb(so)	sotoinpcb(so) /* for KAME src sync over BSD*'s */
 
 #define	INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
+#define	INP_SOCKTYPE(so) so->so_proto->pr_type
 
 #define	INP_CHECK_SOCKAF(so, af) 	(INP_SOCKAF(so) == af)
 
@@ -541,6 +597,8 @@ extern int	ipport_lastauto;
 extern int	ipport_hifirstauto;
 extern int	ipport_hilastauto;
 
+struct sysctl_req;
+
 #define INPCB_STATE_INUSE	0x1	/* freshly allocated PCB, it's in use */
 #define INPCB_STATE_CACHED	0x2	/* this pcb is sitting in a a cache */
 #define INPCB_STATE_DEAD	0x3	/* should treat as gone, will be garbage collected and freed */
@@ -553,19 +611,21 @@ extern void	in_losing(struct inpcb *);
 extern void	in_rtchange(struct inpcb *, int);
 extern int	in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *);
 extern int	in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *);
-extern int	in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *);
+extern int	in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *, unsigned int *);
 extern void	in_pcbdetach(struct inpcb *);
 extern void	in_pcbdispose (struct inpcb *);
 extern void	in_pcbdisconnect(struct inpcb *);
 extern int	in_pcbinshash(struct inpcb *, int);
 extern int	in_pcbladdr(struct inpcb *, struct sockaddr *,
-		    struct sockaddr_in **);
+		    struct sockaddr_in *, unsigned int *);
 extern struct inpcb *in_pcblookup_local(struct inpcbinfo *, struct in_addr,
 		    u_int, int);
 extern struct inpcb *in_pcblookup_local_and_cleanup(struct inpcbinfo *,
 		    struct in_addr, u_int, int);
 extern struct inpcb *in_pcblookup_hash(struct inpcbinfo *, struct in_addr,
 		    u_int, struct in_addr, u_int, int, struct ifnet *);
+extern int	in_pcblookup_hash_exists(struct inpcbinfo *, struct in_addr,
+		    u_int, struct in_addr, u_int, int, uid_t *, gid_t *, struct ifnet *);
 extern void	in_pcbnotifyall(struct inpcbinfo *, struct in_addr, int,
 		    void (*)(struct inpcb *, int));
 extern void	in_pcbrehash(struct inpcb *);
@@ -580,8 +640,11 @@ extern void	inpcb_to_compat(struct inpcb *inp,
 extern void	inpcb_to_xinpcb64(struct inpcb *inp,
 		        struct xinpcb64 *xinp);
 #endif
+extern int get_pcblist_n(short , struct sysctl_req *, struct inpcbinfo *);
 extern void	inp_route_copyout(struct inpcb *, struct route *);
 extern void	inp_route_copyin(struct inpcb *, struct route *);
+extern void	inp_bindif(struct inpcb *, unsigned int);
+extern int	inp_nocellular(struct inpcb *, unsigned int);
 
 #endif /* KERNEL */
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet/in_pcblist.c b/bsd/netinet/in_pcblist.c
new file mode 100644
index 000000000..9ff8839b5
--- /dev/null
+++ b/bsd/netinet/in_pcblist.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 1982, 1986, 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/dtrace.h>
+
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+
+#ifndef ROUNDUP64
+#define ROUNDUP64(x) P2ROUNDUP((x), sizeof(u_int64_t))
+#endif
+
+#ifndef ADVANCE64
+#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n))
+#endif
+
+
+void sotoxsocket_n(struct socket *, struct xsocket_n *);
+void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *);
+void sbtoxsockstat_n(struct socket *, struct xsockstat_n *);
+void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *);
+void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *);
+
+__private_extern__ void
+sotoxsocket_n(struct socket *so, struct xsocket_n *xso)
+{
+	xso->xso_len = sizeof(struct xsocket_n);
+	xso->xso_kind = XSO_SOCKET;
+
+	if (so != NULL) {
+		xso->xso_so = (u_int64_t)(uintptr_t)so;
+		xso->so_type = so->so_type;
+		xso->so_options = so->so_options;
+		xso->so_linger = so->so_linger;
+		xso->so_state = so->so_state;
+		xso->so_pcb = (u_int64_t)(uintptr_t)so->so_pcb;
+		if (so->so_proto) {
+			xso->xso_protocol = so->so_proto->pr_protocol;
+			xso->xso_family = so->so_proto->pr_domain->dom_family;
+		} else {
+			xso->xso_protocol = xso->xso_family = 0;
+		}
+		xso->so_qlen = so->so_qlen;
+		xso->so_incqlen = so->so_incqlen;
+		xso->so_qlimit = so->so_qlimit;
+		xso->so_timeo = so->so_timeo;
+		xso->so_error = so->so_error;
+		xso->so_pgid = so->so_pgid;
+		xso->so_oobmark = so->so_oobmark;
+		xso->so_uid = so->so_uid;
+	}
+}
+
+__private_extern__ void
+sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb)
+{
+	xsb->xsb_len = sizeof(struct xsockbuf_n);
+	xsb->xsb_kind = (sb->sb_flags & SB_RECV) ? XSO_RCVBUF : XSO_SNDBUF;
+
+	if (sb != NULL) {
+		xsb->sb_cc = sb->sb_cc;
+		xsb->sb_hiwat = sb->sb_hiwat;
+		xsb->sb_mbcnt = sb->sb_mbcnt;
+		xsb->sb_mbmax = sb->sb_mbmax;
+		xsb->sb_lowat = sb->sb_lowat;
+		xsb->sb_flags = sb->sb_flags;
+		xsb->sb_timeo = (short)
+			(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick;
+		if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0)
+			xsb->sb_timeo = 1;
+	}
+}
+
+__private_extern__ void
+sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst)
+{
+	int i;
+
+	xst->xst_len = sizeof(struct xsockstat_n);
+	xst->xst_kind = XSO_STATS;
+	
+	for (i = 0; i < SO_TC_STATS_MAX; i++) {
+		xst->xst_tc_stats[i].rxpackets = so->so_tc_stats[i].rxpackets;
+		xst->xst_tc_stats[i].rxbytes = so->so_tc_stats[i].rxbytes;
+		xst->xst_tc_stats[i].txpackets = so->so_tc_stats[i].txpackets;
+		xst->xst_tc_stats[i].txbytes = so->so_tc_stats[i].txbytes;
+	}
+}
+
+__private_extern__ void
+inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp)
+{
+	xinp->xi_len = sizeof(struct xinpcb_n);
+	xinp->xi_kind = XSO_INPCB;
+	xinp->xi_inpp = (u_int64_t)(uintptr_t)inp;
+	xinp->inp_fport = inp->inp_fport;
+	xinp->inp_lport = inp->inp_lport;
+	xinp->inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb;
+	xinp->inp_gencnt = inp->inp_gencnt;
+	xinp->inp_flags = inp->inp_flags;
+	xinp->inp_flow = inp->inp_flow;
+	xinp->inp_vflag = inp->inp_vflag;
+	xinp->inp_ip_ttl = inp->inp_ip_ttl;
+	xinp->inp_ip_p = inp->inp_ip_p;
+	xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign;
+	xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local;
+	xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos;
+	xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim;
+	xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum;
+	xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex;
+	xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops;
+}
+
+__private_extern__ void
+tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt)
+{
+	int i;
+
+	xt->xt_len = sizeof(struct xtcpcb_n);
+	xt->xt_kind = XSO_TCPCB;
+
+	xt->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first;
+	xt->t_dupacks = tp->t_dupacks;
+	for (i = 0; i < TCPT_NTIMERS_EXT; i++)
+		xt->t_timer[i] = tp->t_timer[i];
+	xt->t_state = tp->t_state;
+	xt->t_flags = tp->t_flags;
+	xt->t_force = tp->t_force;
+	xt->snd_una = tp->snd_una;
+	xt->snd_max = tp->snd_max;
+	xt->snd_nxt = tp->snd_nxt;
+	xt->snd_up = tp->snd_up;
+	xt->snd_wl1 = tp->snd_wl1;
+	xt->snd_wl2 = tp->snd_wl2;
+	xt->iss = tp->iss;
+	xt->irs = tp->irs;
+	xt->rcv_nxt = tp->rcv_nxt;
+	xt->rcv_adv = tp->rcv_adv;
+	xt->rcv_wnd = tp->rcv_wnd;
+	xt->rcv_up = tp->rcv_up;
+	xt->snd_wnd = tp->snd_wnd;
+	xt->snd_cwnd = tp->snd_cwnd;
+	xt->snd_ssthresh = tp->snd_ssthresh;
+	xt->t_maxopd = tp->t_maxopd;
+	xt->t_rcvtime = tp->t_rcvtime;
+	xt->t_starttime = tp->t_starttime;
+	xt->t_rtttime = tp->t_rtttime;
+	xt->t_rtseq = tp->t_rtseq;
+	xt->t_rxtcur = tp->t_rxtcur;
+	xt->t_maxseg = tp->t_maxseg;
+	xt->t_srtt = tp->t_srtt;
+	xt->t_rttvar = tp->t_rttvar;
+	xt->t_rxtshift = tp->t_rxtshift;
+	xt->t_rttmin = tp->t_rttmin;
+	xt->t_rttupdated = tp->t_rttupdated;
+	xt->max_sndwnd = tp->max_sndwnd;
+	xt->t_softerror = tp->t_softerror;
+	xt->t_oobflags = tp->t_oobflags;
+	xt->t_iobc = tp->t_iobc;
+	xt->snd_scale = tp->snd_scale;
+	xt->rcv_scale = tp->rcv_scale;
+	xt->request_r_scale = tp->request_r_scale;
+	xt->requested_s_scale = tp->requested_s_scale;
+	xt->ts_recent = tp->ts_recent;
+	xt->ts_recent_age = tp->ts_recent_age;
+	xt->last_ack_sent = tp->last_ack_sent;
+	xt->cc_send = tp->cc_send;
+	xt->cc_recv = tp->cc_recv;
+	xt->snd_recover = tp->snd_recover;
+	xt->snd_cwnd_prev = tp->snd_cwnd_prev;
+	xt->snd_ssthresh_prev = tp->snd_ssthresh_prev;
+	xt->t_badrxtwin = tp->t_badrxtwin;
+}
+
+__private_extern__ int
+get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo)
+{
+	int error = 0;
+	int i, n;
+	struct inpcb *inp, **inp_list = NULL;
+	inp_gen_t gencnt;
+	struct xinpgen xig;
+	void *buf = NULL;
+	size_t item_size = ROUNDUP64(sizeof(struct xinpcb_n)) +  
+						ROUNDUP64(sizeof(struct xsocket_n)) +
+						2 * ROUNDUP64(sizeof(struct xsockbuf_n)) + 
+						ROUNDUP64(sizeof(struct xsockstat_n));
+
+	if (proto == IPPROTO_TCP)
+		item_size += ROUNDUP64(sizeof(struct xtcpcb_n));
+
+	/*
+	 * The process of preparing the PCB list is too time-consuming and
+	 * resource-intensive to repeat twice on every request.
+	 */
+	lck_rw_lock_exclusive(pcbinfo->mtx);
+	if (req->oldptr == USER_ADDR_NULL) {
+			n = pcbinfo->ipi_count;
+			req->oldidx = 2 * (sizeof xig)
+					+ (n + n/8) * item_size;
+			goto done;
+	}
+
+	if (req->newptr != USER_ADDR_NULL) {
+			error = EPERM;
+			goto done;
+	}
+
+	/*
+	 * OK, now we're committed to doing something.
+	 */
+	gencnt = pcbinfo->ipi_gencnt;
+	n = pcbinfo->ipi_count;
+
+	bzero(&xig, sizeof(xig));
+	xig.xig_len = sizeof xig;
+	xig.xig_count = n;
+	xig.xig_gen = gencnt;
+	xig.xig_sogen = so_gencnt;
+	error = SYSCTL_OUT(req, &xig, sizeof xig);
+	if (error) {
+			goto done;
+	}
+    /*
+     * We are done if there is no pcb
+     */
+    if (n == 0) {
+			goto done;
+    }
+
+	buf = _MALLOC(item_size, M_TEMP, M_WAITOK);
+	if (buf == 0) {
+			error = ENOMEM;
+			goto done;
+	}
+
+	inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
+	if (inp_list == 0) {
+			error = ENOMEM;
+			goto done;
+	}
+
+	for (inp = pcbinfo->listhead->lh_first, i = 0; inp && i < n;
+		 inp = inp->inp_list.le_next) {
+			if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD)
+					inp_list[i++] = inp;
+	}
+	n = i;
+
+	error = 0;
+	for (i = 0; i < n; i++) {
+		inp = inp_list[i];
+		if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
+			struct xinpcb_n *xi = (struct xinpcb_n *)buf;
+			struct xsocket_n *xso = (struct xsocket_n *)ADVANCE64(xi, sizeof(*xi));
+			struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)ADVANCE64(xso, sizeof(*xso));
+			struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)ADVANCE64(xsbrcv, sizeof(*xsbrcv));
+			struct xsockstat_n *xsostats = (struct xsockstat_n *)ADVANCE64(xsbsnd, sizeof(*xsbsnd));
+			
+			bzero(buf, item_size);
+
+			inpcb_to_xinpcb_n(inp, xi);
+			sotoxsocket_n(inp->inp_socket, xso);
+			sbtoxsockbuf_n(inp->inp_socket ? &inp->inp_socket->so_rcv : NULL, xsbrcv);
+			sbtoxsockbuf_n(inp->inp_socket ? &inp->inp_socket->so_snd : NULL, xsbsnd);
+			sbtoxsockstat_n(inp->inp_socket, xsostats);
+			if (proto == IPPROTO_TCP) {
+				struct  xtcpcb_n *xt = (struct xtcpcb_n *)ADVANCE64(xsostats, sizeof(*xsostats));
+				
+				/*
+				 * inp->inp_ppcb, can only be NULL on
+				 * an initialization race window.
+				 * No need to lock.
+				 */
+				if (inp->inp_ppcb == NULL)
+					continue;
+				
+				tcpcb_to_xtcpcb_n((struct tcpcb *)inp->inp_ppcb, xt);
+			}
+			error = SYSCTL_OUT(req, buf, item_size);
+		}
+	}
+	if (!error) {
+		/*
+		 * Give the user an updated idea of our state.
+		 * If the generation differs from what we told
+		 * her before, she knows that something happened
+		 * while we were processing this request, and it
+		 * might be necessary to retry.
+		 */
+		bzero(&xig, sizeof(xig));
+		xig.xig_len = sizeof xig;
+		xig.xig_gen = pcbinfo->ipi_gencnt;
+		xig.xig_sogen = so_gencnt;
+		xig.xig_count = pcbinfo->ipi_count;
+		error = SYSCTL_OUT(req, &xig, sizeof xig);
+	}
+done:
+	lck_rw_done(pcbinfo->mtx);
+	if (inp_list)
+		FREE(inp_list, M_TEMP);
+	if (buf)
+		FREE(buf, M_TEMP);
+	return error;
+}
+
diff --git a/bsd/netinet/in_proto.c b/bsd/netinet/in_proto.c
index f08af184f..7c979683a 100644
--- a/bsd/netinet/in_proto.c
+++ b/bsd/netinet/in_proto.c
@@ -164,7 +164,7 @@ struct protosw inetsw[] = {
 { SOCK_RAW,	&inetdomain,	IPPROTO_IGMP,	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   igmp_input,	0,		0,		rip_ctloutput,
   0,
-  igmp_init,	igmp_fasttimo,	igmp_slowtimo,	0,
+  igmp_init,	0,		igmp_slowtimo,	0,
   0,
   &rip_usrreqs,
   0,		rip_unlock,	0,	{ 0, 0 },	0,	{ 0 }
diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c
index 37ca3d250..2d0c2735d 100644
--- a/bsd/netinet/in_rmx.c
+++ b/bsd/netinet/in_rmx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -75,13 +75,16 @@
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/mbuf.h>
+#include <sys/protosw.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 #include <kern/lock.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/in_arp.h>
 
 extern int tvtohz(struct timeval *);
 extern int	in_inithead(void **head, int off);
@@ -139,11 +142,15 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
 			rt->rt_flags |= RTF_BROADCAST;
 		} else {
+			/* Become a regular mutex */
+			RT_CONVERT_LOCK(rt);
+			IFA_LOCK_SPIN(rt->rt_ifa);
 #define satosin(sa) ((struct sockaddr_in *)sa)
 			if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr
 			    == sin->sin_addr.s_addr)
 				rt->rt_flags |= RTF_LOCAL;
 #undef satosin
+			IFA_UNLOCK(rt->rt_ifa);
 		}
 	}
 
@@ -160,7 +167,7 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		 * ARP entry and delete it if so.
 		 */
 		rt2 = rtalloc1_scoped_locked(rt_key(rt), 0,
-		    RTF_CLONING | RTF_PRCLONING, sa_get_ifscope(rt_key(rt)));
+		    RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt)));
 		if (rt2) {
 			RT_LOCK(rt2);
 			if ((rt2->rt_flags & RTF_LLINFO) &&
@@ -199,9 +206,17 @@ in_validate(struct radix_node *rn)
 	RT_LOCK_ASSERT_HELD(rt);
 
 	/* This is first reference? */
-	if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) {
-		rt->rt_flags &= ~RTPRF_OURS;
-		rt->rt_rmx.rmx_expire = 0;
+	if (rt->rt_refcnt == 0) {
+		if (rt->rt_flags & RTPRF_OURS) {
+			/* It's one of ours; unexpire it */
+			rt->rt_flags &= ~RTPRF_OURS;
+			rt_setexpire(rt, 0);
+		} else if ((rt->rt_flags & RTF_LLINFO) &&
+		    (rt->rt_flags & RTF_HOST) && rt->rt_gateway != NULL &&
+		    rt->rt_gateway->sa_family == AF_LINK) {
+			/* It's ARP; let it be handled there */
+			arp_validate(rt);
+		}
 	}
 	return (rn);
 }
@@ -236,19 +251,19 @@ in_matroute_args(void *v_arg, struct radix_node_head *head,
 
 static int rtq_reallyold = 60*60;
 	/* one hour is ``really old'' */
-SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW | CTLFLAG_LOCKED, 
     &rtq_reallyold , 0, 
     "Default expiration time on dynamically learned routes");
 				   
 static int rtq_minreallyold = 10;
 	/* never automatically crank down to less */
-SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW | CTLFLAG_LOCKED, 
     &rtq_minreallyold , 0, 
     "Minimum time to attempt to hold onto dynamically learned routes");
 				   
 static int rtq_toomany = 128;
 	/* 128 cached routes is ``too many'' */
-SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW | CTLFLAG_LOCKED, 
     &rtq_toomany , 0, "Upper limit on dynamically learned routes");
 
 #ifdef __APPLE__
@@ -265,12 +280,12 @@ SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW,
  * If for some reason a circular route is needed, turn this sysctl (net.inet.ip.check_route_selfref) to zero.
  */
 int check_routeselfref = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW | CTLFLAG_LOCKED,
     &check_routeselfref , 0, "");
 #endif
 
 int use_routegenid = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW | CTLFLAG_LOCKED,
     &use_routegenid , 0, "");
 
 /*
@@ -319,12 +334,12 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head)
 			RT_LOCK(rt);
 		}
 	} else {
-		struct timeval timenow;
+		uint64_t timenow;
 
-		getmicrotime(&timenow);
+		timenow = net_uptime();
 		rt->rt_flags |= RTPRF_OURS;
-		rt->rt_rmx.rmx_expire =
-		    rt_expiry(rt, timenow.tv_sec, rtq_reallyold);
+		rt_setexpire(rt,
+		    rt_expiry(rt, timenow, rtq_reallyold));
 	}
 }
 
@@ -334,7 +349,7 @@ struct rtqk_arg {
 	int killed;
 	int found;
 	int updating;
-	time_t nextstop;
+	uint64_t nextstop;
 };
 
 /*
@@ -348,16 +363,18 @@ in_rtqkill(struct radix_node *rn, void *rock)
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
-	struct timeval timenow;
+	uint64_t timenow;
 
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
 	RT_LOCK(rt);
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
 
-		if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) {
+		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+		if (ap->draining || rt->rt_expire <= timenow) {
 			if (rt->rt_refcnt > 0)
 				panic("rtqkill route really not free");
 
@@ -380,13 +397,13 @@ in_rtqkill(struct radix_node *rn, void *rock)
 			}
 		} else {
 			if (ap->updating &&
-			    (unsigned)(rt->rt_rmx.rmx_expire - timenow.tv_sec) >
+			    (rt->rt_expire - timenow) >
 			    rt_expiry(rt, 0, rtq_reallyold)) {
-				rt->rt_rmx.rmx_expire = rt_expiry(rt,
-				    timenow.tv_sec, rtq_reallyold);
+				rt_setexpire(rt, rt_expiry(rt,
+				    timenow, rtq_reallyold));
 			}
 			ap->nextstop = lmin(ap->nextstop,
-					    rt->rt_rmx.rmx_expire);
+					    rt->rt_expire);
 			RT_UNLOCK(rt);
 		}
 	} else {
@@ -411,16 +428,16 @@ in_rtqtimo(void *rock)
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	struct timeval atv;
-	static time_t last_adjusted_timeout = 0;
-	struct timeval timenow;
+	static uint64_t last_adjusted_timeout = 0;
+	uint64_t timenow;
 
 	lck_mtx_lock(rnh_lock);
 	/* Get the timestamp after we acquire the lock for better accuracy */
-	getmicrotime(&timenow);
+        timenow = net_uptime();
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
-	arg.nextstop = timenow.tv_sec + rtq_timeout;
+	arg.nextstop = timenow + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	rnh->rnh_walktree(rnh, in_rtqkill, &arg);
 
@@ -433,14 +450,14 @@ in_rtqtimo(void *rock)
 	 * hard.
 	 */
 	if((arg.found - arg.killed > rtq_toomany)
-	   && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout)
+	   && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout)
 	   && rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2*rtq_reallyold / 3;
 		if(rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
-		last_adjusted_timeout = timenow.tv_sec;
+		last_adjusted_timeout = timenow;
 #if DIAGNOSTIC
 		log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
 		    rtq_reallyold);
@@ -451,7 +468,7 @@ in_rtqtimo(void *rock)
 	}
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - timenow.tv_sec;
+	atv.tv_sec = arg.nextstop - timenow;
 	lck_mtx_unlock(rnh_lock);
 	timeout(in_rtqtimo_funnel, rock, tvtohz(&atv));
 }
@@ -557,8 +574,13 @@ in_ifadown(struct ifaddr *ifa, int delete)
 
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
+	/*
+	 * Holding rnh_lock here prevents the possibility of
+	 * ifa from changing (e.g. in_ifinit), so it is safe
+	 * to access its ifa_addr without locking.
+	 */
 	if (ifa->ifa_addr->sa_family != AF_INET)
-		return 1;
+		return (1);
 
 	/* trigger route cache reevaluation */
 	if (use_routegenid)
@@ -568,6 +590,8 @@ in_ifadown(struct ifaddr *ifa, int delete)
 	arg.ifa = ifa;
 	arg.del = delete;
 	rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
+	IFA_LOCK_SPIN(ifa);
 	ifa->ifa_flags &= ~IFA_ROUTE;
-	return 0;
+	IFA_UNLOCK(ifa);
+	return (0);
 }
diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c
new file mode 100644
index 000000000..54b5fcc1d
--- /dev/null
+++ b/bsd/netinet/in_tclass.c
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/filedesc.h>
+#include <sys/file_internal.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_cc.h>
+
+extern char *proc_name_address(void *p);
+
+static int tfp_count = 0;
+
+static TAILQ_HEAD(, tclass_for_proc) tfp_head = TAILQ_HEAD_INITIALIZER(tfp_head);
+
+struct tclass_for_proc {
+	TAILQ_ENTRY(tclass_for_proc)	tfp_link;
+	int 							tfp_class;
+	pid_t							tfp_pid;
+	char							tfp_pname[MAXCOMLEN + 1];
+};
+
+extern void tcp_set_background_cc(struct socket *);
+extern void tcp_set_foreground_cc(struct socket *);
+
+int dscp_code_from_mbuf_tclass(int );
+
+static int get_pid_tclass(pid_t , int *);
+static int get_pname_tclass(const char * , int *);
+static int set_pid_tclass(pid_t , int );
+static int set_pname_tclass(const char * , int );
+static int purge_tclass_for_proc(void);
+static int flush_tclass_for_proc(void);
+
+
+static lck_grp_attr_t *tclass_lck_grp_attr = NULL;  /* mutex group attributes */
+static lck_grp_t *tclass_lck_grp = NULL;            /* mutex group definition */
+static lck_attr_t *tclass_lck_attr = NULL;          /* mutex attributes */
+static lck_mtx_t *tclass_lock = NULL;
+
+/*
+ * Must be called with tclass_lock held
+ */
+static struct tclass_for_proc *
+find_tfp_by_pid(pid_t pid)
+{
+	struct tclass_for_proc *tfp;
+	
+	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
+		if (tfp->tfp_pid == pid)
+			break;
+	}
+	return tfp;
+}
+
+/*
+ * Must be called with tclass_lock held
+ */
+static struct tclass_for_proc *
+find_tfp_by_pname(const char *pname)
+{
+	struct tclass_for_proc *tfp;
+	
+	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
+		if (strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)
+			break;
+	}
+	return tfp;
+}
+
+static int
+get_tclass_for_curr_proc(void)
+{
+	struct tclass_for_proc *tfp;
+	int sotc = SO_TC_BE;
+	proc_t p = current_proc();	/* Not ref counted */
+	pid_t pid = proc_pid(p);
+	char *pname = proc_name_address(p);
+	
+	lck_mtx_lock(tclass_lock);
+	
+	TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
+		if ((tfp->tfp_pid == pid) ||
+			(tfp->tfp_pid == -1 && strncmp(pname, tfp->tfp_pname, sizeof(tfp->tfp_pname)) == 0)) {
+			sotc = tfp->tfp_class;
+			break;
+		} 
+	}
+
+	lck_mtx_unlock(tclass_lock);
+
+	return sotc;
+}
+
+/*
+ * Purge entries with PIDs of exited processes
+ */
+int
+purge_tclass_for_proc(void)
+{
+	int error = 0;
+	struct tclass_for_proc *tfp, *tvar;
+
+	lck_mtx_lock(tclass_lock);
+
+	TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
+		proc_t p;
+		
+		if (tfp->tfp_pid == -1)
+			continue;
+		if ((p = proc_find(tfp->tfp_pid)) == NULL) {
+			tfp_count--;
+			TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
+					
+			_FREE(tfp, M_TEMP);
+		} else {
+			proc_rele(p);
+		}
+	}
+
+	lck_mtx_unlock(tclass_lock);
+	
+	return error;
+}
+
+/*
+ * Remove one entry
+ * Must be called with tclass_lock held
+ */
+static void
+free_tclass_for_proc(struct tclass_for_proc *tfp)
+{
+	if (tfp == NULL)
+		return;
+	tfp_count--;
+	TAILQ_REMOVE(&tfp_head, tfp, tfp_link);
+	_FREE(tfp, M_TEMP);
+}
+
+/*
+ * Remove all entries
+ */
+int
+flush_tclass_for_proc(void)
+{
+	int error = 0;
+	struct tclass_for_proc *tfp, *tvar;
+
+	lck_mtx_lock(tclass_lock);
+
+	TAILQ_FOREACH_SAFE(tfp, &tfp_head, tfp_link, tvar) {
+		free_tclass_for_proc(tfp);
+	}
+	
+	lck_mtx_unlock(tclass_lock);
+		
+	return error;
+
+}
+
+/*
+ * Must be called with tclass_lock held
+ */
+static struct tclass_for_proc *
+alloc_tclass_for_proc(pid_t pid, const char *pname, int tclass)
+{
+	struct tclass_for_proc *tfp;
+	
+	if (pid == -1 && pname == NULL)
+		return NULL;
+
+	tfp = _MALLOC(sizeof(struct tclass_for_proc), M_TEMP, M_NOWAIT | M_ZERO);
+	if (tfp == NULL)
+		return NULL;
+	
+	tfp->tfp_pid = pid;
+	tfp->tfp_class = tclass;
+	/*
+	 * Add per pid entries before per proc name so we can find 
+	 * a specific instance of a process before the general name base entry.
+	 */
+	if (pid != -1) {
+		TAILQ_INSERT_HEAD(&tfp_head, tfp, tfp_link);
+	} else {
+		strlcpy(tfp->tfp_pname, pname, sizeof(tfp->tfp_pname));
+		TAILQ_INSERT_TAIL(&tfp_head, tfp, tfp_link);
+	}
+	
+	tfp_count++;
+
+	return tfp;
+}
+
+/*
+ * -1 for tclass means to remove the entry
+ */
+int 
+set_pid_tclass(pid_t pid, int tclass)
+{
+	int error = EINVAL;
+	proc_t p = NULL;
+	struct filedesc *fdp;
+	struct fileproc *fp;
+	struct tclass_for_proc *tfp;
+	int i;
+
+	p = proc_find(pid);
+	if (p == NULL) {
+		printf("set_pid_tclass proc_find(%d) \n", pid);
+		goto done;
+	}
+	
+	/* Need a tfp */
+	lck_mtx_lock(tclass_lock);
+	
+	tfp = find_tfp_by_pid(pid);
+	if (tclass == -1) {
+		if (tfp != NULL) {
+			free_tclass_for_proc(tfp);
+			error = 0;
+		}
+		lck_mtx_unlock(tclass_lock);
+		goto done;
+	} else {
+		if (tfp == NULL) {
+			tfp = alloc_tclass_for_proc(pid, NULL, tclass);
+			if (tfp == NULL) {
+				lck_mtx_unlock(tclass_lock);
+				error = ENOBUFS;
+				goto done;
+			}
+		} else {
+			tfp->tfp_class = tclass;
+		}
+	}
+	lck_mtx_unlock(tclass_lock);
+
+	if (tfp != NULL) {
+		proc_fdlock(p);
+		
+		fdp = p->p_fd;
+		for (i = 0; i < fdp->fd_nfiles; i++) {
+			struct socket *so;
+			
+			fp = fdp->fd_ofiles[i];
+			if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+				fp->f_fglob->fg_type != DTYPE_SOCKET)
+				continue;
+			
+			so = (struct socket *)fp->f_fglob->fg_data;
+			if (so->so_proto->pr_domain->dom_family != AF_INET && 
+				so->so_proto->pr_domain->dom_family != AF_INET6)
+				continue;
+			socket_lock(so, 1);
+			error = so_set_traffic_class(so, tclass != -1 ? tclass : SO_TC_BE);
+			socket_unlock(so, 1);
+			if (error != 0) {
+				printf("set_pid_tclass so_set_traffic_class(%p, %d) failed %d\n", so, tclass, error);
+				error = 0;
+			}
+		}
+		
+		proc_fdunlock(p);
+	}
+	
+	error = 0;	
+done:
+	if (p != NULL)
+		proc_rele(p);
+	
+	return error;
+}
+
+int 
+set_pname_tclass(const char *pname, int tclass)
+{
+	int error = EINVAL;
+	struct tclass_for_proc *tfp;
+
+	lck_mtx_lock(tclass_lock);
+	
+	tfp = find_tfp_by_pname(pname);
+	if (tclass == -1) {
+		if (tfp != NULL)
+			free_tclass_for_proc(tfp);
+	} else {
+		if (tfp == NULL) {
+			tfp = alloc_tclass_for_proc(-1, pname, tclass);
+			if (tfp == NULL) {
+				lck_mtx_unlock(tclass_lock);
+				error = ENOBUFS;
+				goto done;
+			}
+		} else {
+			tfp->tfp_class = tclass;
+		}
+	}
+	lck_mtx_unlock(tclass_lock);
+	
+	error = 0;	
+done:
+	
+	return error;
+}
+
+int 
+get_pid_tclass(pid_t pid, int *tclass)
+{
+	int error = EINVAL;
+	proc_t p = NULL;
+	struct tclass_for_proc *tfp;
+	
+	*tclass = -1; /* Means not set */
+
+	p = proc_find(pid);
+	if (p == NULL) {
+		printf("get_pid_tclass proc_find(%d) \n", pid);
+		goto done;
+	}
+	
+	/* Need a tfp */
+	lck_mtx_lock(tclass_lock);
+	
+	tfp = find_tfp_by_pid(pid);
+	if (tfp != NULL) {
+		*tclass = tfp->tfp_class ;
+		error = 0;
+	}
+	lck_mtx_unlock(tclass_lock);
+done:
+	if (p != NULL)
+		proc_rele(p);
+	
+	return error;
+}
+
+int 
+get_pname_tclass(const char *pname, int *tclass)
+{
+	int error = EINVAL;
+	struct tclass_for_proc *tfp;
+	
+	*tclass = -1; /* Means not set */
+
+	/* Need a tfp */
+	lck_mtx_lock(tclass_lock);
+	
+	tfp = find_tfp_by_pname(pname);
+	if (tfp != NULL) {
+		*tclass = tfp->tfp_class ;
+		error = 0;
+	}
+	lck_mtx_unlock(tclass_lock);
+	
+	return error;
+}
+
+
+
+/*
+ * Setting options requires privileges
+ */
+__private_extern__ int 
+so_set_tcdbg(struct socket *so, struct so_tcdbg *so_tcdbg)
+{
+	int error = 0;
+	
+	if ((so->so_state & SS_PRIV) == 0)
+		return EPERM;
+
+	socket_unlock(so, 0);
+
+	switch (so_tcdbg->so_tcdbg_cmd) {
+		case SO_TCDBG_PID:
+			error = set_pid_tclass(so_tcdbg->so_tcdbg_pid, so_tcdbg->so_tcdbg_tclass);
+			break;
+		
+		case SO_TCDBG_PNAME:
+			error = set_pname_tclass(so_tcdbg->so_tcdbg_pname, so_tcdbg->so_tcdbg_tclass);
+			break;
+		
+		case SO_TCDBG_PURGE:
+			error = purge_tclass_for_proc();
+			break;
+		
+		case SO_TCDBG_FLUSH:
+			error = flush_tclass_for_proc();
+			break;
+		
+		default:
+			error = EINVAL;
+			break;
+		
+	}
+
+	socket_lock(so, 0);
+
+	return error;
+}
+
+/*
+ * Not required to be privileged to get
+ */
+__private_extern__ int 
+sogetopt_tcdbg(struct socket *so, struct sockopt *sopt)
+{
+	int error = 0;
+	struct so_tcdbg so_tcdbg;
+	void *buf = NULL;
+	size_t len = sopt->sopt_valsize;
+
+	error = sooptcopyin(sopt, &so_tcdbg, sizeof(struct so_tcdbg), sizeof(struct so_tcdbg));
+	if (error != 0)
+		return error;
+	
+	sopt->sopt_valsize = len;
+	
+	socket_unlock(so, 0);
+
+	switch (so_tcdbg.so_tcdbg_cmd) {
+		case SO_TCDBG_PID:
+			error = get_pid_tclass(so_tcdbg.so_tcdbg_pid, &so_tcdbg.so_tcdbg_tclass);
+			break;
+		
+		case SO_TCDBG_PNAME:
+			error = get_pname_tclass(so_tcdbg.so_tcdbg_pname, &so_tcdbg.so_tcdbg_tclass);
+			break;
+		
+		case SO_TCDBG_COUNT:
+			lck_mtx_lock(tclass_lock);
+			so_tcdbg.so_tcdbg_count = tfp_count;
+			lck_mtx_unlock(tclass_lock);
+			break;
+
+		case SO_TCDBG_LIST: {
+			struct tclass_for_proc *tfp;
+			int n, alloc_count;
+			struct so_tcdbg *ptr;
+
+			lck_mtx_lock(tclass_lock);
+			if ((alloc_count = tfp_count) == 0) {
+				lck_mtx_unlock(tclass_lock);
+				error = EINVAL;
+				break;
+			}
+			len = alloc_count * sizeof(struct so_tcdbg);
+			lck_mtx_unlock(tclass_lock);
+
+			buf = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
+			if (buf == NULL) {
+				error = ENOBUFS;
+				break;
+			}
+
+			lck_mtx_lock(tclass_lock);
+			n = 0;
+			ptr = (struct so_tcdbg *)buf;
+			TAILQ_FOREACH(tfp, &tfp_head, tfp_link) {
+				if (++n > alloc_count)
+					break;
+				if (tfp->tfp_pid != -1) {
+					ptr->so_tcdbg_cmd = SO_TCDBG_PID;
+					ptr->so_tcdbg_pid = tfp->tfp_pid;
+				} else {
+					ptr->so_tcdbg_cmd = SO_TCDBG_PNAME;
+					ptr->so_tcdbg_pid = -1;
+					strlcpy(ptr->so_tcdbg_pname, tfp->tfp_pname, sizeof(ptr->so_tcdbg_pname));
+				}
+				ptr->so_tcdbg_tclass = tfp->tfp_class;
+				ptr++;
+			}
+			
+			lck_mtx_unlock(tclass_lock);
+			}
+			break;
+		
+		default:
+			error = EINVAL;
+			break;
+		
+	}
+
+	socket_lock(so, 0);
+
+	if (error == 0) {
+		if (buf == NULL) {
+			error = sooptcopyout(sopt, &so_tcdbg, sizeof(struct so_tcdbg));
+		} else {
+			error = sooptcopyout(sopt, buf, len);
+			_FREE(buf, M_TEMP);
+		}
+	}
+	return error;
+}
+
+
+__private_extern__ int
+so_set_traffic_class(struct socket *so, int optval)
+{
+	int error = 0;
+	
+	if (optval < SO_TC_BE || optval > SO_TC_VO) {
+		error = EINVAL;
+	} else {
+		so->so_traffic_class = optval;
+	
+		if ((INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) && 
+			INP_SOCKTYPE(so) == SOCK_STREAM) {
+			set_tcp_stream_priority(so);
+		}
+	}
+	return error;
+}
+
+__private_extern__ void
+so_set_default_traffic_class(struct socket *so)
+{
+	int sotc = SO_TC_BE;
+
+	if (tfp_count > 0 && (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) {
+		sotc = get_tclass_for_curr_proc();
+	}
+	
+	so->so_traffic_class = sotc;
+	
+	return;
+}
+
+
+__private_extern__ int
+mbuf_traffic_class_from_control(struct mbuf *control)
+{
+	struct cmsghdr *cm;
+	
+	for (cm = M_FIRST_CMSGHDR(control); 
+		 cm != NULL; 
+		 cm = M_NXT_CMSGHDR(control, cm)) {
+		int tc;
+
+		if (cm->cmsg_len < sizeof(struct cmsghdr))
+			break;
+		
+		if (cm->cmsg_level != SOL_SOCKET ||
+			cm->cmsg_type != SO_TRAFFIC_CLASS)
+			continue;
+		if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
+			continue;
+		
+		tc = *(int *)CMSG_DATA(cm);
+		
+		switch (tc) {
+			case SO_TC_BE:
+				return MBUF_TC_BE;
+			case SO_TC_BK:
+				return MBUF_TC_BK;
+			case SO_TC_VI:
+				return MBUF_TC_VI;
+			case SO_TC_VO:
+				return MBUF_TC_VO;
+			default:
+				break;
+		}
+	}
+	
+	return MBUF_TC_UNSPEC;
+}
+
+__private_extern__  int
+dscp_code_from_mbuf_tclass(int mtc)
+{
+	int dscp_code;
+	
+	switch (mtc) {
+		default:
+		case MBUF_TC_BE:
+			dscp_code = 0;
+			break;
+		case MBUF_TC_BK:
+			dscp_code = 0x08;
+			break;
+		case MBUF_TC_VI:
+			dscp_code = 0x20;
+			break;
+		case MBUF_TC_VO:
+			dscp_code = 0x30;
+			break;
+	}
+	
+	return dscp_code;
+}
+
+__private_extern__ void
+so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off)
+{
+	uint32_t sotc = m->m_pkthdr.prio;
+
+	if (sotc >= SO_TC_STATS_MAX)
+		sotc = SO_TC_BE;
+	
+	so->so_tc_stats[sotc].rxpackets += 1;
+	so->so_tc_stats[sotc].rxbytes += ((m->m_flags & M_PKTHDR) ? m->m_pkthdr.len : 0) + off;
+
+	return;
+}
+
+__private_extern__ void
+set_tcp_stream_priority(struct socket *so)
+{
+	struct tcpcb *tp = intotcpcb(sotoinpcb(so));
+
+	/* If the socket was marked as a background socket or if the
+	 * traffic class is set to background with traffic class socket 
+	 * option then make both send and recv side of the stream to be 
+	 * background. The variable sotcdb which can be set with sysctl 
+	 * is used to disable these settings for testing.
+	 */
+	if (soisbackground(so) || so->so_traffic_class == SO_TC_BK) {
+		if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) {
+			if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+				tcp_set_foreground_cc(so);
+		} else {
+			if (tp->tcp_cc_index != TCP_CC_ALGO_BACKGROUND_INDEX)
+				tcp_set_background_cc(so);
+		}
+		
+		/* Set receive side background flags */
+		if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0) {
+			so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
+		} else {
+			so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG;
+		}
+	} else {
+		so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG);
+		if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX)
+			tcp_set_foreground_cc(so);
+	}
+	return;
+}
+
+/*
+ * Set traffic class to an IPv4 or IPv6 packet
+ * - mark the mbuf
+ * - set the DSCP code following the WMM mapping
+ */
+__private_extern__ void
+set_packet_tclass(struct mbuf *m, struct socket *so, int in_mtc, int isipv6)
+{
+	int mtc = MBUF_TC_BE; /* Best effort by default */
+	struct inpcb *inp = sotoinpcb(so);	 /* in6pcb and inpcb are the same */
+	struct ip *ip = mtod(m, struct ip *);
+#if INET6
+	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+#endif /* INET6 */
+	
+	if (!(m->m_flags & M_PKTHDR))
+		return;
+	
+	/* 
+	 * Here is the precedence:
+	 * 1) TRAFFIC_MGT_SO_BACKGROUND trumps all
+	 * 2) Traffic class passed via ancillary data to sendmsdg(2)
+	 * 3) Traffic class socket option last
+	 */
+	if (soisbackground(so)) {
+		mtc = MBUF_TC_BK;
+	} else if (in_mtc != MBUF_TC_UNSPEC) {
+		if (in_mtc >= MBUF_TC_BE && in_mtc <= MBUF_TC_VO)
+			mtc = in_mtc;
+	} else {
+		switch (so->so_traffic_class) {
+			case SO_TC_BE:
+				mtc = MBUF_TC_BE;
+				break;
+			case SO_TC_BK:
+				mtc = MBUF_TC_BK;
+				break;
+			case SO_TC_VI:
+				mtc = MBUF_TC_VI;
+				break;
+			case SO_TC_VO:
+				mtc = MBUF_TC_VO;
+				break;
+			default:
+				break;
+		}
+	}
+	
+	/*
+	 * Set the traffic class in the mbuf packet header prio field
+	 */
+	if ((sotcdb & SOTCDB_NO_MTC))
+		goto no_mbtc;
+	m->m_pkthdr.prio = mtc;
+	
+no_mbtc:
+	/*
+         * Quick exit when best effort
+	 */
+	if (mtc == MBUF_TC_BE)
+		goto no_dscp;
+	/*
+	 * Now let set the DSCP code in IPv4 or IPv6 header
+	 * By default do this only for local traffic if a code is not already set
+	 */
+	if ((sotcdb & SOTCDB_NO_DSCP))
+		goto no_dscp;
+		
+	/*
+	 * Test if a IP TOS or IPV6 TCLASS has already been set on the socket or the raw packet
+	 */
+	if ((sotcdb & SOTCDB_NO_DSCPTST) == 0) {
+#if INET6
+		if (isipv6) 
+		{
+			if ((so->so_type == SOCK_RAW && (ip6->ip6_flow & htonl(0xff << 20)) != 0) ||
+			    (inp->in6p_outputopts && inp->in6p_outputopts->ip6po_tclass != -1))
+				goto no_dscp;
+		} 
+		else 
+#endif /* INET6 */
+		{
+			if ((so->so_type == SOCK_RAW && (inp->inp_flags & INP_HDRINCL)) ||
+				inp->inp_ip_tos != 0)
+				goto no_dscp;
+		}
+	}
+	
+	/*
+	 * Test if destination is local
+	 */
+	if ((sotcdb & SOTCDB_NO_LCLTST) == 0) {
+		int islocal = 0;
+		struct route *ro = &inp->inp_route;
+
+		if (so->so_type == SOCK_STREAM) {
+			struct tcpcb *tp = intotcpcb(inp);
+			
+			if ((tp->t_flags & TF_LOCAL))
+				islocal = 1;
+		}
+		else
+#if INET6
+		if (isipv6) 
+		{
+			if ((ro != NULL && ro->ro_rt != NULL &&
+				 (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
+				  (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
+				 in6addr_local(&ip6->ip6_dst))
+				islocal = 1;
+		} 
+		else
+#endif /* INET6 */
+		{
+			if ((ro != NULL && ro->ro_rt != NULL && 
+				 (ro->ro_rt->rt_gateway->sa_family == AF_LINK ||
+				  (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))) ||
+				 inaddr_local(ip->ip_dst))
+				islocal = 1;
+		}
+		if (islocal == 0)
+			goto no_dscp;
+	}
+
+#if INET6
+	if (isipv6)
+		ip6->ip6_flow |=
+			htonl(dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 20);
+	else
+#endif /* INET6 */
+		ip->ip_tos |= dscp_code_from_mbuf_tclass(m->m_pkthdr.prio) << 2;
+	
+no_dscp:
+	/*
+	 * For TCP with background traffic class switch CC algo based on sysctl
+	 */
+	if (so->so_type == SOCK_STREAM) {
+		set_tcp_stream_priority(so);
+	}
+	
+	/*
+	 * Assume socket and mbuf traffic class values are the same
+	 * Also assume the socket lock is held
+	 */
+	so->so_tc_stats[mtc].txpackets += 1;
+	so->so_tc_stats[mtc].txbytes += m->m_pkthdr.len;
+	
+	return;
+}
+
+__private_extern__ void
+socket_tclass_init(void)
+{
+	tclass_lck_grp_attr = lck_grp_attr_alloc_init();
+	tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr);
+	tclass_lck_attr = lck_attr_alloc_init();
+	if ((tclass_lock = lck_mtx_alloc_init(tclass_lck_grp, tclass_lck_attr)) == NULL) {
+			panic("failed to allocate memory for tclass\n");
+	}
+}
+
+
diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h
index df7a968af..0b5d373de 100644
--- a/bsd/netinet/in_var.h
+++ b/bsd/netinet/in_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,7 @@
 #include <sys/kern_event.h>
 #endif
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #include <net/route.h>
 
 /*
@@ -96,7 +96,7 @@ struct in_ifaddr {
 	struct sockaddr_in	ia_sockmask;	/* reserve space for general netmask */
 	TAILQ_ENTRY(in_ifaddr)	ia_hash;	/* hash bucket entry */
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct	in_aliasreq {
 	char			ifra_name[IFNAMSIZ];	/* if name, e.g. "en0" */
@@ -155,9 +155,10 @@ struct kev_in_portinuse {
 #define KEV_INET_PORTINUSE    8	/* use ken_in_portinuse */
 #endif
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #include <net/if_var.h>
 #include <kern/locks.h>
+#include <sys/tree.h>
 /*
  * Given a pointer to an in_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in.
@@ -195,9 +196,14 @@ extern int apple_hwcksum_rx;
 	struct in_ifaddr *ia;						\
 									\
 	lck_rw_lock_shared(in_ifaddr_rwlock);				\
-	TAILQ_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash)		\
-		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr)	\
+	TAILQ_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) {	\
+		IFA_LOCK_SPIN(&ia->ia_ifa);				\
+		if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) {	\
+			IFA_UNLOCK(&ia->ia_ifa);			\
 			break;						\
+		}							\
+		IFA_UNLOCK(&ia->ia_ifa);				\
+	}								\
 	(ifp) = (ia == NULL) ? NULL : ia->ia_ifp;			\
 	lck_rw_done(in_ifaddr_rwlock);					\
 }
@@ -217,7 +223,7 @@ extern int apple_hwcksum_rx;
 	    (ia) = TAILQ_NEXT((ia), ia_link))				\
 		continue;						\
 	if ((ia) != NULL)						\
-		ifaref(&(ia)->ia_ifa);					\
+		IFA_ADDREF(&(ia)->ia_ifa);				\
 	lck_rw_done(in_ifaddr_rwlock);					\
 }
 
@@ -226,31 +232,153 @@ extern int apple_hwcksum_rx;
  * to change that - as it might break a number of things
  */
 
+/*
+ * Legacy IPv4 IGMP per-link structure.
+ */
 struct router_info {
 	struct ifnet *rti_ifp;
 	int    rti_type; /* type of router which is querier on this interface */
 	int    rti_time; /* # of slow timeouts since last old query */
-	struct router_info *rti_next;
+	SLIST_ENTRY(router_info) rti_list;
+};
+
+/*
+ * IPv4 multicast IGMP-layer source entry.
+ */
+struct ip_msource {
+	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
+	in_addr_t		ims_haddr;	/* host byte order */
+	struct ims_st {
+		uint16_t	ex;		/* # of exclusive members */
+		uint16_t	in;		/* # of inclusive members */
+	}			ims_st[2];	/* state at t0, t1 */
+	uint8_t			ims_stp;	/* pending query */
+};
+
+/*
+ * IPv4 multicast PCB-layer source entry.
+ */
+struct in_msource {
+	RB_ENTRY(ip_msource)	ims_link;	/* RB tree links */
+	in_addr_t		ims_haddr;	/* host byte order */
+	uint8_t			imsl_st[2];	/* state before/at commit */
 };
 
+RB_HEAD(ip_msource_tree, ip_msource);	/* define struct ip_msource_tree */
+
+RB_PROTOTYPE_SC_PREV(__private_extern__, ip_msource_tree, ip_msource,
+    ims_link, ip_msource_cmp);
+
+/*
+ * IPv4 multicast PCB-layer group filter descriptor.
+ */
+struct in_mfilter {
+	struct ip_msource_tree	imf_sources; /* source list for (S,G) */
+	u_long			imf_nsrc;    /* # of source entries */
+	uint8_t			imf_st[2];   /* state before/at commit */
+};
+
+struct igmp_ifinfo;
+
 /*
- * Internet multicast address structure.  There is one of these for each IP
- * multicast group to which this host belongs on a given network interface.
- * For every entry on the interface's if_multiaddrs list which represents
- * an IP multicast group, there is one of these structures.  They are also
- * kept on a system-wide list to make it easier to keep our legacy IGMP code
- * compatible with the rest of the world (see IN_FIRST_MULTI et al, below).
+ * IPv4 group descriptor.
+ *
+ * For every entry on an ifnet's if_multiaddrs list which represents
+ * an IP multicast group, there is one of these structures.
+ *
+ * If any source filters are present, then a node will exist in the RB-tree
+ * to permit fast lookup by source whenever an operation takes place.
+ * This permits pre-order traversal when we issue reports.
+ * Source filter trees are kept separately from the socket layer to
+ * greatly simplify locking.
+ *
+ * When IGMPv3 is active, inm_timer is the response to group query timer.
+ * The state-change timer inm_sctimer is separate; whenever state changes
+ * for the group the state change record is generated and transmitted,
+ * and kept if retransmissions are necessary.
+ *
+ * FUTURE: inm_link is now only used when groups are being purged
+ * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but
+ * because it is at the very start of the struct, we can't do this
+ * w/o breaking the ABI for ifmcstat.
  */
 struct in_multi {
+	decl_lck_mtx_data(, inm_lock);
+	u_int32_t inm_refcount;		/* reference count */
+	u_int32_t inm_reqcnt;		/* request count for this address */
+	u_int32_t inm_debug;		/* see ifa_debug flags */
 	LIST_ENTRY(in_multi) inm_link;	/* queue macro glue */
 	struct	in_addr inm_addr;	/* IP multicast address, convenience */
 	struct	ifnet *inm_ifp;		/* back pointer to ifnet */
 	struct	ifmultiaddr *inm_ifma;	/* back pointer to ifmultiaddr */
-	u_int	inm_timer;		/* IGMP membership report timer */
+	u_int	inm_timer;		/* IGMPv1/v2 group / v3 query timer  */
 	u_int	inm_state;		/*  state of the membership */
-	struct	router_info *inm_rti;	/* router info*/
+	void *inm_rti;			/* unused, legacy field */
+
+	/* New fields for IGMPv3 follow. */
+	struct igmp_ifinfo	*inm_igi;	/* IGMP info */
+	SLIST_ENTRY(in_multi)	 inm_nrele;	/* to-be-released by IGMP */
+	u_int32_t		 inm_nrelecnt;	/* deferred release count */
+	struct ip_msource_tree	 inm_srcs;	/* tree of sources */
+	u_long			 inm_nsrc;	/* # of tree entries */
+
+	struct ifqueue		 inm_scq;	/* queue of pending
+						 * state-change packets */
+	struct timeval		 inm_lastgsrtv;	/* Time of last G-S-R query */
+	uint16_t		 inm_sctimer;	/* state-change timer */
+	uint16_t		 inm_scrv;	/* state-change rexmit count */
+
+	/*
+	 * SSM state counters which track state at T0 (the time the last
+	 * state-change report's RV timer went to zero) and T1
+	 * (time of pending report, i.e. now).
+	 * Used for computing IGMPv3 state-change reports. Several refcounts
+	 * are maintained here to optimize for common use-cases.
+	 */
+	struct inm_st {
+		uint16_t	iss_fmode;	/* IGMP filter mode */
+		uint16_t	iss_asm;	/* # of ASM listeners */
+		uint16_t	iss_ex;		/* # of exclusive members */
+		uint16_t	iss_in;		/* # of inclusive members */
+		uint16_t	iss_rec;	/* # of recorded sources */
+	}			inm_st[2];	/* state at t0, t1 */
+
+	void (*inm_trace)		/* callback fn for tracing refs */
+	    (struct in_multi *, int);
 };
 
+#define	INM_LOCK_ASSERT_HELD(_inm)					\
+	lck_mtx_assert(&(_inm)->inm_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	INM_LOCK_ASSERT_NOTHELD(_inm)					\
+	lck_mtx_assert(&(_inm)->inm_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	INM_LOCK(_inm)							\
+	lck_mtx_lock(&(_inm)->inm_lock)
+
+#define	INM_LOCK_SPIN(_inm)						\
+	lck_mtx_lock_spin(&(_inm)->inm_lock)
+
+#define	INM_CONVERT_LOCK(_inm) do {					\
+	INM_LOCK_ASSERT_HELD(_inm);					\
+	lck_mtx_convert_spin(&(_inm)->inm_lock);			\
+} while (0)
+
+#define	INM_UNLOCK(_inm)						\
+	lck_mtx_unlock(&(_inm)->inm_lock)
+
+#define	INM_ADDREF(_inm)						\
+	inm_addref(_inm, 0)
+
+#define	INM_ADDREF_LOCKED(_inm)						\
+	inm_addref(_inm, 1)
+
+#define	INM_REMREF(_inm)						\
+	inm_remref(_inm, 0)
+
+#define	INM_REMREF_LOCKED(_inm)						\
+	inm_remref(_inm, 1)
+
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_ip);
 SYSCTL_DECL(_net_inet_raw);
@@ -269,22 +397,36 @@ struct in_multistep {
 /*
  * Macro for looking up the in_multi record for a given IP multicast address
  * on a given interface.  If no matching record is found, "inm" is set null.
+ *
+ * We do this differently compared other BSD implementations; instead of
+ * walking the if_multiaddrs list at the interface and returning the
+ * ifma_protospec value of a matching entry, we search the global list
+ * of in_multi records and find it that way.  Otherwise either the two
+ * structures (in_multi, ifmultiaddr) need to be ref counted both ways,
+ * which will make things too complicated, or they need to reside in the
+ * same protected domain, which they aren't.
+ *
+ * Must be called with in_multihead_lock held.
  */
-#define IN_LOOKUP_MULTI(addr, ifp, inm) \
-	/* struct in_addr addr; */ \
-	/* struct ifnet *ifp; */ \
-	/* struct in_multi *inm; */ \
-do { \
-	struct ifmultiaddr *ifma; \
-\
-	LIST_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { \
-		if (ifma->ifma_addr->sa_family == AF_INET \
-		    && ((struct sockaddr_in *)ifma->ifma_addr)->sin_addr.s_addr == \
-		    (addr).s_addr) \
-			break; \
-	} \
-	(inm) = ifma ? ifma->ifma_protospec : NULL; \
-} while(0)
+#define IN_LOOKUP_MULTI(addr, ifp, inm)					\
+	/* struct in_addr *addr; */					\
+	/* struct ifnet *ifp; */					\
+	/* struct in_multi *inm; */					\
+do {									\
+	struct in_multistep _step;					\
+	IN_FIRST_MULTI(_step, inm);					\
+	while ((inm) != NULL) {						\
+		INM_LOCK_SPIN(inm);					\
+		if ((inm)->inm_ifp == (ifp) &&				\
+		    (inm)->inm_addr.s_addr == (addr)->s_addr) {		\
+			INM_ADDREF_LOCKED(inm);				\
+			INM_UNLOCK(inm);				\
+			break;						\
+		}							\
+		INM_UNLOCK(inm);					\
+		IN_NEXT_MULTI(_step, inm);				\
+	}								\
+} while (0)
 
 /*
  * Macro to step through all of the in_multi records, one at a time.
@@ -292,28 +434,57 @@ do { \
  * provide.  IN_FIRST_MULTI(), below, must be called to initialize "step"
  * and get the first record.  Both macros return a NULL "inm" when there
  * are no remaining records.
+ *
+ * Must be called with in_multihead_lock held.
  */
-#define IN_NEXT_MULTI(step, inm) \
-	/* struct in_multistep  step; */ \
-	/* struct in_multi *inm; */ \
-do { \
-	if (((inm) = (step).i_inm) != NULL) \
-		(step).i_inm = LIST_NEXT((step).i_inm, inm_link); \
-} while(0)
-
-#define IN_FIRST_MULTI(step, inm) \
-	/* struct in_multistep step; */ \
-	/* struct in_multi *inm; */ \
-do { \
-	(step).i_inm = LIST_FIRST(&in_multihead); \
-	IN_NEXT_MULTI((step), (inm)); \
-} while(0)
+#define IN_NEXT_MULTI(step, inm)					\
+	/* struct in_multistep  step; */				\
+	/* struct in_multi *inm; */					\
+do {									\
+	in_multihead_lock_assert(LCK_RW_ASSERT_HELD);			\
+	if (((inm) = (step).i_inm) != NULL)				\
+		(step).i_inm = LIST_NEXT((step).i_inm, inm_link);	\
+} while (0)
+
+#define IN_FIRST_MULTI(step, inm)					\
+	/* struct in_multistep step; */					\
+	/* struct in_multi *inm; */					\
+do {									\
+	in_multihead_lock_assert(LCK_RW_ASSERT_HELD);			\
+	(step).i_inm = LIST_FIRST(&in_multihead);			\
+	IN_NEXT_MULTI((step), (inm));					\
+} while (0)
 
 struct	route;
+struct	ip_moptions;
+
+/*
+ * Return values for imo_multi_filter().
+ */
+#define MCAST_PASS		0	/* Pass */
+#define MCAST_NOTGMEMBER	1	/* This host not a member of group */
+#define MCAST_NOTSMEMBER	2	/* This host excluded source */
+#define MCAST_MUTED		3	/* [deprecated] */
 
 extern void in_ifaddr_init(void);
+extern int imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
+    const struct sockaddr *, const struct sockaddr *);
+extern int imo_clone(struct ip_moptions *, struct ip_moptions *);
+extern void inm_commit(struct in_multi *);
+extern void inm_clear_recorded(struct in_multi *);
+extern void inm_print(const struct in_multi *);
+extern int inm_record_source(struct in_multi *inm, const in_addr_t);
+extern void inm_release(struct in_multi *);
+extern void in_multi_init(void);
 extern struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
-extern void in_delmulti(struct in_multi **);
+extern void in_delmulti(struct in_multi *);
+extern int in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *);
+extern int in_multi_detach(struct in_multi *);
+extern void inm_addref(struct in_multi *, int);
+extern void inm_remref(struct in_multi *, int);
+extern void inm_purge(struct in_multi *);
+extern uint8_t ims_get_mode(const struct in_multi *,
+    const struct ip_msource *, uint8_t);
 extern int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
     struct proc *);
 extern void in_rtqdrain(void);
@@ -321,14 +492,20 @@ extern struct radix_node *in_validate(struct radix_node *);
 extern void ip_input(struct mbuf *);
 extern int in_ifadown(struct ifaddr *ifa, int);
 extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int);
-extern int ipflow_fastforward(struct mbuf *);
-#if IPFLOW
-extern void ipflow_create(const struct route *, struct mbuf *);
-extern void ipflow_slowtimo(void);
-#endif /* IPFLOW */
 extern u_int32_t inaddr_hashval(u_int32_t);
-
-#endif /* KERNEL_PRIVATE */
+extern void in_purgeaddrs(struct ifnet *);
+extern void	imf_leave(struct in_mfilter *);
+extern void	imf_purge(struct in_mfilter *);
+
+struct inpcb;
+
+__private_extern__ int inp_join_group(struct inpcb *, struct sockopt *);
+__private_extern__ int inp_leave_group(struct inpcb *, struct sockopt *);
+__private_extern__ void in_multihead_lock_exclusive(void);
+__private_extern__ void in_multihead_lock_shared(void);
+__private_extern__ void in_multihead_lock_assert(int);
+__private_extern__ void in_multihead_lock_done(void);
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* INET6 stuff */
 #include <netinet6/in6_var.h>
diff --git a/bsd/netinet/ip6.h b/bsd/netinet/ip6.h
index 203e86a64..a740ddc49 100644
--- a/bsd/netinet/ip6.h
+++ b/bsd/netinet/ip6.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -170,7 +170,10 @@ struct ip6_dest {
 #define IP6OPT_JUMBO		0xC2	/* 11 0 00010 = 194 */
 #define IP6OPT_NSAP_ADDR	0xC3	/* 11 0 00011 */
 #define IP6OPT_TUNNEL_LIMIT	0x04	/* 00 0 00100 */
+#ifndef KERNEL_PRIVATE
 #define IP6OPT_RTALERT		0x05	/* 00 0 00101 (KAME definition) */
+#endif
+#define IP6OPT_ROUTER_ALERT	0x05	/* 00 0 00101 (RFC3542, recommended) */
 
 #define IP6OPT_RTALERT_LEN	4
 #define IP6OPT_RTALERT_MLD	0	/* Datagram contains an MLD message */
@@ -178,10 +181,6 @@ struct ip6_dest {
 #define IP6OPT_RTALERT_ACTNET	2 	/* contains an Active Networks msg */
 #define IP6OPT_MINLEN		2
 
-#define IP6OPT_BINDING_UPDATE	0xc6	/* 11 0 00110 */
-#define IP6OPT_BINDING_ACK	0x07	/* 00 0 00111 */
-#define IP6OPT_BINDING_REQ	0x08	/* 00 0 01000 */
-#define IP6OPT_HOME_ADDRESS	0xc9	/* 11 0 01001 */
 #define IP6OPT_EID		0x8a	/* 10 0 01010 */
 
 #define IP6OPT_TYPE(o)		((o) & 0xC0)
@@ -192,8 +191,56 @@ struct ip6_dest {
 
 #define IP6OPT_MUTABLE		0x20
 
+/* IPv6 options: common part */
+struct ip6_opt {
+	u_int8_t ip6o_type;
+	u_int8_t ip6o_len;
+} __attribute__((__packed__));
+
+/* Jumbo Payload Option */
+struct ip6_opt_jumbo {
+	u_int8_t ip6oj_type;
+	u_int8_t ip6oj_len;
+	u_int8_t ip6oj_jumbo_len[4];
+} __attribute__((__packed__));
 #define IP6OPT_JUMBO_LEN	6
 
+/* NSAP Address Option */
+struct ip6_opt_nsap {
+	u_int8_t ip6on_type;
+	u_int8_t ip6on_len;
+	u_int8_t ip6on_src_nsap_len;
+	u_int8_t ip6on_dst_nsap_len;
+	/* followed by source NSAP */
+	/* followed by destination NSAP */
+}__attribute__((__packed__));
+
+/* Tunnel Limit Option */
+struct ip6_opt_tunnel {
+	u_int8_t ip6ot_type;
+	u_int8_t ip6ot_len;
+	u_int8_t ip6ot_encap_limit;
+}__attribute__((__packed__));
+
+/* Router Alert Option */
+struct ip6_opt_router {
+	u_int8_t ip6or_type;
+	u_int8_t ip6or_len;
+	u_int8_t ip6or_value[2];
+}__attribute__((__packed__));
+/* Router alert values (in network byte order) */
+#if BYTE_ORDER == BIG_ENDIAN
+#define IP6_ALERT_MLD	0x0000
+#define IP6_ALERT_RSVP	0x0001
+#define IP6_ALERT_AN	0x0002
+#else
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define IP6_ALERT_MLD	0x0000
+#define IP6_ALERT_RSVP	0x0100
+#define IP6_ALERT_AN	0x0200
+#endif /* LITTLE_ENDIAN */
+#endif
+
 /* Routing header */
 struct ip6_rthdr {
 	u_int8_t  ip6r_nxt;	/* next header */
@@ -235,13 +282,14 @@ struct ip6_frag {
 /*
  * Internet implementation parameters.
  */
-#define IPV6_MAXHLIM	255	/* maximun hoplimit */
+#define IPV6_MAXHLIM	255	/* maximum hoplimit */
 #define IPV6_DEFHLIM	64	/* default hlim */
 #define IPV6_FRAGTTL	120	/* ttl for fragment packets, in slowtimo tick */
-#define IPV6_HLIMDEC	1	/* subtracted when forwaeding */
+#define IPV6_HLIMDEC	1	/* subtracted when forwarding */
 
 #define IPV6_MMTU	1280	/* minimal MTU and reassembly. 1024 + 256 */
 #define IPV6_MAXPACKET	65535	/* ip6 max packet size without Jumbo payload*/
+#define IPV6_MAXOPTHDR	2048	/* max option header size, 256 64-bit words */
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -291,45 +339,12 @@ do {									\
  * with type "typ".
  * IP6_EXTHDR_GET0 does the same, except that it aligns the structure at the
  * very top of mbuf.  GET0 is likely to make memory copy than GET.
- *
- * XXX we're now testing this, needs m_pulldown()
  */
-#define IP6_EXTHDR_GET(val, typ, m, off, len) \
-do {									\
-	struct mbuf *t;							\
-	int tmp;							\
-	if ((m)->m_len >= (off) + (len))				\
-		(val) = (typ)(mtod((m), caddr_t) + (off));		\
-	else {								\
-		t = m_pulldown((m), (off), (len), &tmp);		\
-		if (t) {						\
-			if (t->m_len < tmp + (len))			\
-				panic("m_pulldown malfunction");	\
-			(val) = (typ)(mtod(t, caddr_t) + tmp);		\
-		} else {						\
-			(val) = (typ)NULL;				\
-			(m) = NULL;					\
-		}							\
-	}								\
-} while (0)
+#define IP6_EXTHDR_GET(val, typ, m, off, len)				\
+	M_STRUCT_GET(val, typ, m, off, len)
 
-#define IP6_EXTHDR_GET0(val, typ, m, off, len) \
-do {									\
-	struct mbuf *t;							\
-	if ((off) == 0)							\
-		(val) = (typ)mtod(m, caddr_t);				\
-	else {								\
-		t = m_pulldown((m), (off), (len), NULL);		\
-		if (t) {						\
-			if (t->m_len < (len))				\
-				panic("m_pulldown malfunction");	\
-			(val) = (typ)mtod(t, caddr_t);			\
-		} else {						\
-			(val) = (typ)NULL;				\
-			(m) = NULL;					\
-		}							\
-	}								\
-} while (0)
+#define IP6_EXTHDR_GET0(val, typ, m, off, len)				\
+	M_STRUCT_GET0(val, typ, m, off, len)
 
 #endif /* KERNEL_PRIVATE */
 #endif /* !_NETINET_IP6_H_ */
diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c
index e3c771e6d..600a796d1 100644
--- a/bsd/netinet/ip_divert.c
+++ b/bsd/netinet/ip_divert.c
@@ -236,12 +236,14 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule)
 		/* Find IP address for receive interface */
 		ifnet_lock_shared(m->m_pkthdr.rcvif);
 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr == NULL)
-				continue;
-			if (ifa->ifa_addr->sa_family != AF_INET)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			divsrc.sin_addr =
 			    ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr;
+			IFA_UNLOCK(ifa);
 			break;
 		}
 		ifnet_lock_done(m->m_pkthdr.rcvif);
@@ -314,14 +316,10 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 	struct ip *const ip = mtod(m, struct ip *);
 	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
 	int error = 0;
-#if PKT_PRIORITY
-	mbuf_traffic_class_t mtc = MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
+	mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
 
 	if (control != NULL) {
-#if PKT_PRIORITY
 		mtc = mbuf_traffic_class_from_control(control);
-#endif /* PKT_PRIORITY */
 
 		m_freem(control);		/* XXX */
 	}
@@ -332,8 +330,8 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 		int	len = 0;
 		char	*c = sin->sin_zero;
 
-		mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT,
-				sizeof(struct divert_tag), M_NOWAIT);
+		mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT,
+				sizeof(struct divert_tag), M_NOWAIT, m);
 		if (mtag == NULL) {
 			error = ENOBUFS;
 			goto cantsend;
@@ -359,8 +357,9 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 
 	/* Reinject packet into the system as incoming or outgoing */
 	if (!sin || sin->sin_addr.s_addr == 0) {
-		struct ip_out_args ipoa = { IFSCOPE_NONE };
+		struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 		struct route ro;
+		struct ip_moptions *imo;
 
 		/*
 		 * Don't allow both user specified and setsockopt options,
@@ -382,10 +381,11 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 		/* Copy the cached route and take an extra reference */
 		inp_route_copyout(inp, &ro);
 
-#if PKT_PRIORITY
-		set_traffic_class(m, so, mtc);
-#endif /* PKT_PRIORITY */
+		set_packet_tclass(m, so, mtc, 0);
 
+		imo = inp->inp_moptions;
+		if (imo != NULL)
+			IMO_ADDREF(imo);
 		socket_unlock(so, 0);
 #if CONFIG_MACF_NET
 		mac_mbuf_label_associate_inpcb(inp, m);
@@ -394,9 +394,11 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 		error = ip_output(m, inp->inp_options, &ro,
 			(so->so_options & SO_DONTROUTE) |
 			IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS,
-			inp->inp_moptions, &ipoa);
+			imo, &ipoa);
 
 		socket_lock(so, 0);
+		if (imo != NULL)
+			IMO_REMREF(imo);
 		/* Synchronize cached PCB route */
 		inp_route_copyin(inp, &ro);
 	} else {
@@ -417,7 +419,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
 				goto cantsend;
 			}
 			m->m_pkthdr.rcvif = ifa->ifa_ifp;
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		}
 #if CONFIG_MACF_NET
 		mac_mbuf_label_associate_socket(so, m);
@@ -462,7 +464,7 @@ div_attach(struct socket *so, int proto, struct proc *p)
 
 #ifdef MORE_DICVLOCK_DEBUG
 	printf("div_attach: so=%p sopcb=%p lock=%x ref=%x\n",
-			so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount);
+			so, so->so_pcb, &(((struct inpcb *)so->so_pcb)->inpcb_mtx), so->so_usecount);
 #endif
 	return 0;
 }
@@ -474,7 +476,7 @@ div_detach(struct socket *so)
 
 #ifdef MORE_DICVLOCK_DEBUG
 	printf("div_detach: so=%p sopcb=%p lock=%x ref=%x\n",
-			so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount);
+			so, so->so_pcb, &(((struct inpcb *)so->so_pcb)->inpcb_mtx), so->so_usecount);
 #endif
 	inp = sotoinpcb(so);
 	if (inp == 0)
@@ -656,11 +658,11 @@ div_lock(struct socket *so, int refcount, void *lr)
 #ifdef MORE_DICVLOCK_DEBUG
 	printf("div_lock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n",
 	    so, so->so_pcb, so->so_pcb ?
-	    ((struct inpcb *)so->so_pcb)->inpcb_mtx : NULL,
+	    &(((struct inpcb *)so->so_pcb)->inpcb_mtx) : NULL,
 	    so->so_usecount, lr_saved);
 #endif
 	if (so->so_pcb) {
-		lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+		lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
 	} else  {
 		panic("div_lock: so=%p NO PCB! lr=%p lrh= lrh= %s\n", 
 		    so, lr_saved, solockhistory_nr(so));
@@ -697,7 +699,7 @@ div_unlock(struct socket *so, int refcount, void *lr)
 #ifdef MORE_DICVLOCK_DEBUG
 	printf("div_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n",
 	    so, so->so_pcb, so->so_pcb ?
-	    ((struct inpcb *)so->so_pcb)->inpcb_mtx : NULL,
+	    &(((struct inpcb *)so->so_pcb)->inpcb_mtx) : NULL,
 	    so->so_usecount, lr_saved);
 #endif
 	if (refcount)
@@ -713,7 +715,7 @@ div_unlock(struct socket *so, int refcount, void *lr)
 		    so, so->so_usecount, lr_saved, solockhistory_nr(so));
 		/* NOTREACHED */
 	}
-	mutex_held = ((struct inpcb *)so->so_pcb)->inpcb_mtx;
+	mutex_held = &((struct inpcb *)so->so_pcb)->inpcb_mtx;
 
 	if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) {
 		lck_rw_lock_exclusive(divcbinfo.mtx);
@@ -739,7 +741,7 @@ div_getlock(struct socket *so, __unused int locktype)
 		if (so->so_usecount < 0)
 			panic("div_getlock: so=%p usecount=%x lrh= %s\n", 
 			    so, so->so_usecount, solockhistory_nr(so));
-		return(inpcb->inpcb_mtx);
+		return(&inpcb->inpcb_mtx);
 	} else {
 		panic("div_getlock: so=%p NULL NO PCB lrh= %s\n", 
 		    so, solockhistory_nr(so));
diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c
index 54fceaef4..048cff004 100644
--- a/bsd/netinet/ip_dummynet.c
+++ b/bsd/netinet/ip_dummynet.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -89,6 +89,7 @@
 #include <sys/socketvar.h>
 #include <sys/time.h>
 #include <sys/sysctl.h>
+//#include <sys/mcache.h>
 #include <net/if.h>
 #include <net/route.h>
 #include <net/kpi_protocol.h>
@@ -121,6 +122,8 @@ static int red_lookup_depth = 256;	/* RED - default lookup table depth */
 static int red_avg_pkt_size = 512;      /* RED - default medium packet size */
 static int red_max_pkt_size = 1500;     /* RED - default max packet size */
 
+static int serialize = 0;
+
 /*
  * Three heaps contain queues and pipes that the scheduler handles:
  *
@@ -152,9 +155,6 @@ static void	ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
  */
 static void dummynet_send(struct mbuf *m);
 
-/* Flag to signify the existance of a dequeued packet chain */
-static int serialize = 0;
-
 #define	HASHSIZE	16
 #define	HASH(num)	((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
 static struct dn_pipe_head	pipehash[HASHSIZE];	/* all pipes */
@@ -163,36 +163,36 @@ static struct dn_flow_set_head	flowsethash[HASHSIZE];	/* all flowsets */
 
 #ifdef SYSCTL_NODE
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
-		CTLFLAG_RW, 0, "Dummynet");
+		CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Dummynet");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
-	    CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &dn_hash_size, 0, "Default hash table size");
 SYSCTL_QUAD(_net_inet_ip_dummynet, OID_AUTO, curr_time,
-	    CTLFLAG_RD, &curr_time, "Current tick");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, &curr_time, "Current tick");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
-	    CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, &ready_heap.size, 0, "Size of ready heap");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
-	    CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, &extract_heap.size, 0, "Size of extract heap");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
-	    CTLFLAG_RD, &searches, 0, "Number of queue searches");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, &searches, 0, "Number of queue searches");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
-	    CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
+	    CTLFLAG_RD | CTLFLAG_LOCKED, &search_steps, 0, "Number of queue search steps");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
-	    CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &pipe_expire, 0, "Expire queue if empty");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
-	    CTLFLAG_RW, &dn_max_ratio, 0, 
+	    CTLFLAG_RW | CTLFLAG_LOCKED, &dn_max_ratio, 0, 
 	"Max ratio between dynamic queues and buckets");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
-	CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
+	CTLFLAG_RD | CTLFLAG_LOCKED, &red_lookup_depth, 0, "Depth of RED lookup table");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
-	CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
+	CTLFLAG_RD | CTLFLAG_LOCKED, &red_avg_pkt_size, 0, "RED Medium packet size");
 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
-	CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+	CTLFLAG_RD | CTLFLAG_LOCKED, &red_max_pkt_size, 0, "RED Max packet size");
 #endif
 
 #ifdef DUMMYNET_DEBUG
 int	dummynet_debug = 0;
 #ifdef SYSCTL_NODE
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug,
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, &dummynet_debug,
 	    0, "control debugging printfs");
 #endif
 #define	DPRINTF(X)	if (dummynet_debug) printf X
@@ -457,6 +457,7 @@ char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp)
 	
 	pipe_bp->pipe_nr = p->pipe_nr;
 	pipe_bp->bandwidth = p->bandwidth;
+	pipe_bp->delay = p->delay;
 	bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_32));
 	pipe_bp->scheduler_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->scheduler_heap.p);
 	bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_32));
@@ -497,6 +498,7 @@ char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp)
 	
 	pipe_bp->pipe_nr = p->pipe_nr;
 	pipe_bp->bandwidth = p->bandwidth;
+	pipe_bp->delay = p->delay;
 	bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_64));
 	pipe_bp->scheduler_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->scheduler_heap.p);
 	bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_64));
@@ -648,47 +650,6 @@ heap_extract(struct dn_heap *h, void *obj)
     }
 }
 
-#if 0
-/*
- * change object position and update references
- * XXX this one is never used!
- */
-static void
-heap_move(struct dn_heap *h, dn_key new_key, void *object)
-{
-    int temp;
-    int i ;
-    int maxelt = h->elements-1 ;
-    struct dn_heap_entry buf ;
-
-    if (h->offset <= 0)
-	panic("cannot move items on this heap");
-
-    i = *((int *)((char *)object + h->offset));
-    if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
-	h->p[i].key = new_key ;
-	for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
-		 i = temp ) { /* bubble up */
-	    HEAP_SWAP(h->p[i], h->p[temp], buf) ;
-	    SET_OFFSET(h, i);
-	}
-    } else {		/* must move down */
-	h->p[i].key = new_key ;
-	while ( (temp = HEAP_LEFT(i)) <= maxelt ) { /* found left child */
-	    if ((temp != maxelt) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
-		temp++ ; /* select child with min key */
-	    if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
-		HEAP_SWAP(h->p[i], h->p[temp], buf) ;
-		SET_OFFSET(h, i);
-	    } else
-		break ;
-	    i = temp ;
-	}
-    }
-    SET_OFFSET(h, i);
-}
-#endif /* heap_move, unused */
-
 /*
  * heapify() will reorganize data inside an array to maintain the
  * heap property. It is needed when we delete a bunch of entries.
@@ -757,10 +718,10 @@ transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 {
     struct mbuf *m ;
     struct dn_pkt_tag *pkt ;
+    u_int64_t schedule_time;
 
 	lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED);
-
-	/* Extract packets only if no pending chain is being currently processed */
+	ASSERT(serialize >= 0);
 	if (serialize == 0) {
 		while ((m = pipe->head) != NULL) {
 			pkt = dn_tag_get(m);
@@ -774,9 +735,13 @@ transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 				*head = m;
 			*tail = m;
 		}
+		
 		if (*tail != NULL)
 			(*tail)->m_nextpkt = NULL;
-	}
+		}
+
+		schedule_time = DN_KEY_LEQ(pkt->output_time, curr_time) ?
+		    curr_time+1 : pkt->output_time;
 
     /* if there are leftover packets, put the pipe into the heap for next ready event */
     if ((m = pipe->head) != NULL) {
@@ -784,7 +749,7 @@ transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 		/* XXX should check errors on heap_insert, by draining the
 		 * whole pipe p and hoping in the future we are more successful
 		 */
-		heap_insert(&extract_heap, pkt->output_time, pipe);
+		heap_insert(&extract_heap, schedule_time, pipe);
     }
 }
 
@@ -1105,21 +1070,17 @@ dummynet(__unused void * unused)
 			break;
 		}
 	}
-
-	/* 
-	 * If a packet chain has been dequeued, set serialize=1 so that new 
-	 * packets don't get dispatched out of turn 
-	 */
+ 
 	if (head != NULL)
-		serialize = 1;
-
-    lck_mtx_unlock(dn_mutex);
+		serialize++;
+	
+	lck_mtx_unlock(dn_mutex);
 
 	/* Send out the de-queued list of ready-to-send packets */
 	if (head != NULL) {
 		dummynet_send(head);
 		lck_mtx_lock(dn_mutex);
-		serialize = 0;
+		serialize--;
 		lck_mtx_unlock(dn_mutex);
 	}
 }
@@ -1193,13 +1154,19 @@ if_tx_rdy(struct ifnet *ifp)
 	p->numbytes = 0 ; /* mark ready for I/O */
 	ready_event_wfq(p, &head, &tail);
     }
+	
+	if (head != NULL) {
+		serialize++;
+	}
+	
 	lck_mtx_unlock(dn_mutex);
 
 	
 	/* Send out the de-queued list of ready-to-send packets */
-	if (head != NULL)
+	if (head != NULL) {
 		dummynet_send(head);
-
+		serialize--;
+	}
     return 0;
 }
 
@@ -1214,6 +1181,7 @@ expire_queues(struct dn_flow_set *fs)
     int i, initial_elements = fs->rq_elements ;
 	struct timeval timenow;
 
+	/* reviewed for getmicrotime usage */
 	getmicrotime(&timenow);
 
     if (fs->last_expired == timenow.tv_sec)
@@ -1564,8 +1532,8 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
 	goto dropit ;
 
     /* XXX expensive to zero, see if we can remove it*/
-    mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET,
-    		sizeof(struct dn_pkt_tag), M_NOWAIT);
+    mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET,
+    		sizeof(struct dn_pkt_tag), M_NOWAIT, m);
     if ( mtag == NULL )
 		goto dropit ;		/* cannot allocate packet header	*/
     m_tag_prepend(m, mtag);	/* attach to mbuf chain */
@@ -1591,7 +1559,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
 	if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */
 	    fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
 
-	pkt->dn_dst = fwa->dst;
+	bcopy (fwa->dst, &pkt->dn_dst, sizeof(pkt->dn_dst));
 	pkt->flags = fwa->flags;
 	if (fwa->ipoa != NULL)
 		pkt->ipoa = *(fwa->ipoa);
@@ -1619,7 +1587,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
 	if (pipe->bandwidth)
 	    t = SET_TICKS(m, q, pipe);
 	q->sched_time = curr_time ;
-	if (t == 0)	/* must process it now */
+	if (t == 0)     /* must process it now */
 	    ready_event( q , &head, &tail );
 	else
 	    heap_insert(&ready_heap, curr_time + t , q );
@@ -1682,9 +1650,10 @@ done:
 		ts.tv_nsec = 1 * 1000000;	// 1ms
 		timer_enabled = 1;
 		bsd_timeout(dummynet, NULL, &ts);
-    }
+	}
 
 	lck_mtx_unlock(dn_mutex);
+
 	if (head != NULL)
 		dummynet_send(head);
 
@@ -1964,9 +1933,9 @@ set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
 	    x->qsize = 1024*1024 ;
     } else {
 	if (x->qsize == 0)
-	    x->qsize = 50 ;
+	    x->qsize = 50;
 	if (x->qsize > 100)
-	    x->qsize = 50 ;
+	    x->qsize = 50;
     }
     /* configuring RED */
     if ( x->flags_fs & DN_IS_RED )
diff --git a/bsd/netinet/ip_dummynet.h b/bsd/netinet/ip_dummynet.h
index 83f38d24e..e5dd1f337 100644
--- a/bsd/netinet/ip_dummynet.h
+++ b/bsd/netinet/ip_dummynet.h
@@ -157,7 +157,7 @@ struct dn_pkt_tag {
 
     dn_key output_time;		/* when the pkt is due for delivery	*/
     struct ifnet *ifp;		/* interface, for ip_output		*/
-    struct sockaddr_in *dn_dst ;
+    struct sockaddr_in dn_dst ;
     struct route ro;		/* route, for ip_output. MUST COPY	*/
     int flags ;			/* flags, for ip_output (IPv6 ?)	*/
     struct ip_out_args ipoa;	/* output args, for ip_output. MUST COPY */
diff --git a/bsd/netinet/ip_encap.c b/bsd/netinet/ip_encap.c
index 6dd02fe9e..0d487326a 100644
--- a/bsd/netinet/ip_encap.c
+++ b/bsd/netinet/ip_encap.c
@@ -259,9 +259,7 @@ encap4_input(m, off)
 
 #if INET6
 int
-encap6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+encap6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
@@ -269,10 +267,8 @@ encap6_input(mp, offp)
 	const struct ip6protosw *psw;
 	struct encaptab *ep, *match;
 	int prio, matchprio;
-	int proto;
 
 	ip6 = mtod(m, struct ip6_hdr *);
-	proto = ip6->ip6_nxt;
 
 	bzero(&s, sizeof(s));
 	s.sin6_family = AF_INET6;
@@ -315,7 +311,7 @@ encap6_input(mp, offp)
 		psw = (const struct ip6protosw *)match->psw;
 		if (psw && psw->pr_input) {
 			encap_fillarg(m, match);
-			return (*psw->pr_input)(mp, offp);
+			return (*psw->pr_input)(mp, offp, proto);
 		} else {
 			m_freem(m);
 			return IPPROTO_DONE;
@@ -323,7 +319,7 @@ encap6_input(mp, offp)
 	}
 
 	/* last resort: inject to raw socket */
-	return rip6_input(mp, offp);
+	return rip6_input(mp, offp, proto);
 }
 #endif
 
@@ -532,8 +528,8 @@ encap_fillarg(
 	struct m_tag	*tag;
 	struct encaptabtag *et;
 	
-	tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP,
-					  sizeof(struct encaptabtag), M_WAITOK);
+	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP,
+					  sizeof(struct encaptabtag), M_WAITOK, m);
 	
 	if (tag != NULL) {
 		et = (struct encaptabtag*)(tag + 1);
diff --git a/bsd/netinet/ip_encap.h b/bsd/netinet/ip_encap.h
index 66dfb2588..1c65ea956 100644
--- a/bsd/netinet/ip_encap.h
+++ b/bsd/netinet/ip_encap.h
@@ -77,7 +77,7 @@ struct encaptab {
 
 void	encap_init(void) __attribute__((section("__TEXT, initcode")));
 void	encap4_input(struct mbuf *, int);
-int	encap6_input(struct mbuf **, int *);
+int	encap6_input(struct mbuf **, int *, int);
 const struct encaptab *encap_attach(int, int, const struct sockaddr *,
 	const struct sockaddr *, const struct sockaddr *,
 	const struct sockaddr *, const struct protosw *, void *);
diff --git a/bsd/netinet/ip_flow.c b/bsd/netinet/ip_flow.c
deleted file mode 100644
index be5aa9495..000000000
--- a/bsd/netinet/ip_flow.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/protosw.h>
-#include <sys/socket.h>
-#include <sys/kernel.h>
-
-#include <sys/sysctl.h>
-#include <libkern/OSAtomic.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/in_var.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_flow.h>
-#include <net/dlil.h>
-
-#if IPFLOW
-
-#define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
-#define IPFLOW_HASHBITS		6	/* should not be a multiple of 8 */
-#define	IPFLOW_HASHSIZE		(1 << IPFLOW_HASHBITS)
-static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE];
-static int ipflow_inuse;
-#define	IPFLOW_MAX		256
-
-#ifdef __APPLE__
-#define M_IPFLOW M_TEMP
-#endif
-
-static int ipflow_active = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
-    &ipflow_active, 0, "Enable flow-based IP forwarding");
-
-#ifndef __APPLE__
-static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
-#endif
-
-static unsigned
-ipflow_hash(
-	struct in_addr dst,
-	struct in_addr src,
-	unsigned tos)
-{
-	unsigned hash = tos;
-	int idx;
-	for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
-		hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
-	return hash & (IPFLOW_HASHSIZE-1);
-}
-
-static struct ipflow *
-ipflow_lookup(
-	const struct ip *ip)
-{
-	unsigned hash;
-	struct ipflow *ipf;
-
-	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
-
-	ipf = LIST_FIRST(&ipflows[hash]);
-	while (ipf != NULL) {
-		if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
-		    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
-		    && ip->ip_tos == ipf->ipf_tos)
-			break;
-		ipf = LIST_NEXT(ipf, ipf_next);
-	}
-	return ipf;
-}
-
-int
-ipflow_fastforward(
-	struct mbuf *m)
-{
-	struct ip *ip;
-	struct ipflow *ipf;
-	struct rtentry *rt;
-	struct sockaddr *dst;
-	int error;
-
-	/*
-	 * Are we forwarding packets?  Big enough for an IP packet?
-	 */
-	if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
-		return 0;
-	/*
-	 * IP header with no option and valid version and length
-	 */
-	ip = mtod(m, struct ip *);
-	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
-	    || ntohs(ip->ip_len) > m->m_pkthdr.len)
-		return 0;
-	/*
-	 * Find a flow.
-	 */
-	if ((ipf = ipflow_lookup(ip)) == NULL)
-		return 0;
-
-	/*
-	 * Route and interface still up?
-	 */
-	rt = ipf->ipf_ro.ro_rt;
-	if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0)
-		return 0;
-
-	/*
-	 * Packet size OK?  TTL?
-	 */
-	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
-		return 0;
-
-	/*
-	 * Everything checks out and so we can forward this packet.
-	 * Modify the TTL and incrementally change the checksum.
-	 */
-	ip->ip_ttl -= IPTTLDEC;
-	if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
-		ip->ip_sum += htons(IPTTLDEC << 8) + 1;
-	} else {
-		ip->ip_sum += htons(IPTTLDEC << 8);
-	}
-
-	/*
-	 * Send the packet on its way.  All we can get back is ENOBUFS
-	 */
-	ipf->ipf_uses++;
-	ipf->ipf_timer = IPFLOW_TIMER;
-
-	if (rt->rt_flags & RTF_GATEWAY)
-		dst = rt->rt_gateway;
-	else
-		dst = &ipf->ipf_ro.ro_dst;
-#ifdef __APPLE__
-	/* Not sure the rt_dlt is valid here !! XXX */
-	if ((error = dlil_output(rt->rt_ifp, PF_INET, m, (caddr_t) rt, dst, 0)) != 0) {
-
-#else
-	if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
-#endif
-		if (error == ENOBUFS)
-			ipf->ipf_dropped++;
-		else
-			ipf->ipf_errors++;
-	}
-	return 1;
-}
-
-static void
-ipflow_addstats(
-	struct ipflow *ipf)
-{
-	ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
-	OSAddAtomic(ipf->ipf_errors + ipf->ipf_dropped, &ipstat.ips_cantforward);
-	OSAddAtomic(ipf->ipf_uses, &ipstat.ips_forward);
-	OSAddAtomic(ipf->ipf_uses, &ipstat.ips_fastforward);
-}
-
-static void
-ipflow_free(
-	struct ipflow *ipf)
-{
-	/*
-	 * Remove the flow from the hash table (at elevated IPL).
-	 * Once it's off the list, we can deal with it at normal
-	 * network IPL.
-	 */
-	LIST_REMOVE(ipf, ipf_next);
-	ipflow_addstats(ipf);
-	rtfree(ipf->ipf_ro.ro_rt);
-	ipflow_inuse--;
-	FREE(ipf, M_IPFLOW);
-}
-
-static struct ipflow *
-ipflow_reap(
-	void)
-{
-	struct ipflow *ipf, *maybe_ipf = NULL;
-	int idx;
-
-	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
-		ipf = LIST_FIRST(&ipflows[idx]);
-		while (ipf != NULL) {
-			/*
-			 * If this no longer points to a valid route
-			 * reclaim it.
-			 */
-			if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
-				goto done;
-			/*
-			 * choose the one that's been least recently used
-			 * or has had the least uses in the last 1.5 
-			 * intervals.
-			 */
-			if (maybe_ipf == NULL
-			    || ipf->ipf_timer < maybe_ipf->ipf_timer
-			    || (ipf->ipf_timer == maybe_ipf->ipf_timer
-				&& ipf->ipf_last_uses + ipf->ipf_uses <
-				      maybe_ipf->ipf_last_uses +
-					maybe_ipf->ipf_uses))
-				maybe_ipf = ipf;
-			ipf = LIST_NEXT(ipf, ipf_next);
-		}
-	}
-	ipf = maybe_ipf;
-    done:
-	/*
-	 * Remove the entry from the flow table.
-	 */
-	LIST_REMOVE(ipf, ipf_next);
-	ipflow_addstats(ipf);
-	rtfree(ipf->ipf_ro.ro_rt);
-	return ipf;
-}
-/* note: called under the ip_mutex lock */
-void
-ipflow_slowtimo(
-	void)
-{
-	struct ipflow *ipf;
-	int idx;
-
-	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
-		ipf = LIST_FIRST(&ipflows[idx]);
-		while (ipf != NULL) {
-			struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
-			if (--ipf->ipf_timer == 0) {
-				ipflow_free(ipf);
-			} else {
-				ipf->ipf_last_uses = ipf->ipf_uses;
-				ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
-				OSAddAtomic(ipf->ipf_uses, &ipstat.ips_forward);
-				OSAddAtomic(ipf->ipf_uses, &ipstat.ips_fastforward);
-				ipstat.ips_forward += ipf->ipf_uses;
-				ipstat.ips_fastforward += ipf->ipf_uses;
-				ipf->ipf_uses = 0;
-			}
-			ipf = next_ipf;
-		}
-	}
-}
-
-void
-ipflow_create(
-	const struct route *ro,
-	struct mbuf *m)
-{
-	const struct ip *const ip = mtod(m, struct ip *);
-	struct ipflow *ipf;
-	unsigned hash;
-
-	/*
-	 * Don't create cache entries for ICMP messages.
-	 */
-	if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
-		return;
-	/*
-	 * See if an existing flow struct exists.  If so remove it from it's
-	 * list and free the old route.  If not, try to malloc a new one
-	 * (if we aren't at our limit).
-	 */
-	ipf = ipflow_lookup(ip);
-	if (ipf == NULL) {
-		if (ipflow_inuse == IPFLOW_MAX) {
-			ipf = ipflow_reap();
-		} else {
-			ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW,
-						       M_NOWAIT);
-			if (ipf == NULL)
-				return;
-			ipflow_inuse++;
-		}
-		bzero((caddr_t) ipf, sizeof(*ipf));
-	} else {
-		LIST_REMOVE(ipf, ipf_next);
-		ipflow_addstats(ipf);
-		rtfree(ipf->ipf_ro.ro_rt);
-		ipf->ipf_uses = ipf->ipf_last_uses = 0;
-		ipf->ipf_errors = ipf->ipf_dropped = 0;
-	}
-
-	/*
-	 * Fill in the updated information.
-	 */
-	ipf->ipf_ro = *ro;
-	RT_ADDREF(ro->ro_rt);
-	ipf->ipf_dst = ip->ip_dst;
-	ipf->ipf_src = ip->ip_src;
-	ipf->ipf_tos = ip->ip_tos;
-	ipf->ipf_timer = IPFLOW_TIMER;
-	/*
-	 * Insert into the approriate bucket of the flow table.
-	 */
-	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
-	LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next);
-}
-#else /* !IPFLOW */
-int
-ipflow_fastforward(struct mbuf *m)
-{
-#pragma unused(m)
-	/*
-	 * Since this symbol is exported (albeit unsupported), just return
-	 * false to keep things (e.g. PPP) happy, in case ipflow is not
-	 * compiled in.
-	 */
-	return (0);
-}
-#endif /* !IPFLOW */
diff --git a/bsd/netinet/ip_flow.h b/bsd/netinet/ip_flow.h
deleted file mode 100644
index 972d96351..000000000
--- a/bsd/netinet/ip_flow.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_flow.h,v 1.2 1999/08/28 00:49:22 peter Exp $
- */
-
-#ifndef _NETINET_IP_FLOW_H
-#define _NETINET_IP_FLOW_H
-#include <sys/appleapiopts.h>
-
-#ifdef KERNEL_PRIVATE
-struct ipflow {
-	LIST_ENTRY(ipflow) ipf_next;	/* next ipflow in bucket */
-	struct in_addr ipf_dst;		/* destination address */
-	struct in_addr ipf_src;		/* source address */
-
-	u_int8_t ipf_tos;		/* type-of-service */
-	struct route ipf_ro;		/* associated route entry */
-	u_int32_t ipf_uses;		/* number of uses in this period */
-
-	int ipf_timer;			/* remaining lifetime of this entry */
-	u_int32_t ipf_dropped;		/* ENOBUFS returned by if_output */
-	u_int32_t ipf_errors;		/* other errors returned by if_output */
-	u_int32_t ipf_last_uses;		/* number of uses in last period */
-};
-#endif /* KERNEL_PRIVATE */
-
-#endif
diff --git a/bsd/netinet/ip_fw.h b/bsd/netinet/ip_fw.h
index 6755fab56..53ead3fa0 100644
--- a/bsd/netinet/ip_fw.h
+++ b/bsd/netinet/ip_fw.h
@@ -42,6 +42,7 @@
 
 #ifndef _IP_FW_H
 #define _IP_FW_H
+#ifdef __APPLE_API_OBSOLETE
 
 #include <sys/appleapiopts.h>
 
@@ -324,4 +325,5 @@ extern struct ipfw_flow_id last_pkt ;
 #endif /* KERNEL_PRIVATE */
 
 #endif /* !IPFW2 */
+#endif /* __APPLE_API_OBSOLETE */
 #endif /* _IP_FW_H */
diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c
index 9be482912..bb66be5d7 100644
--- a/bsd/netinet/ip_fw2.c
+++ b/bsd/netinet/ip_fw2.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -165,21 +165,21 @@ static int ipfw_sysctl SYSCTL_HANDLER_ARGS;
 
 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Firewall");
 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
-    CTLTYPE_INT | CTLFLAG_RW,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &fw_enable, 0, ipfw_sysctl, "I", "Enable ipfw");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW | CTLFLAG_LOCKED,
     &autoinc_step, 0, "Rule number autincrement step");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
-    CTLFLAG_RW,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &fw_one_pass, 0,
     "Only do a single pass through ipfw when using dummynet(4)");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug,
-    CTLFLAG_RW,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &fw_debug, 0, "Enable printing of debug ip_fw statements");
 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose,
-    CTLFLAG_RW,
+    CTLFLAG_RW | CTLFLAG_LOCKED,
     &fw_verbose, 0, "Log matches to ipfw rules");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW | CTLFLAG_LOCKED,
     &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
 
 /*
@@ -251,29 +251,29 @@ static u_int32_t static_len_64;	/* size in bytes of static rules for 64 bit clie
 static u_int32_t dyn_count;		/* # of dynamic rules */
 static u_int32_t dyn_max = 4096;	/* max # of dynamic rules */
 
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_buckets, 0, "Number of dyn. buckets");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD | CTLFLAG_LOCKED,
     &curr_dyn_buckets, 0, "Current Number of dyn. buckets");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD | CTLFLAG_LOCKED,
     &dyn_count, 0, "Number of dyn. rules");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_max, 0, "Max number of dyn. rules");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD | CTLFLAG_LOCKED,
     &static_count, 0, "Number of static rules");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
     &dyn_keepalive, 0, "Enable keepalives for dyn. rules");
 
 
@@ -338,6 +338,8 @@ void    ipfwsyslog( int level, const char *format,...)
     unsigned char       pri;
     int			loglen;
 
+	bzero(msgBuf, msgsize);
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	va_start( ap, format );
         loglen = vsnprintf(msgBuf, msgsize, format, ap);
         va_end( ap );
@@ -965,15 +967,18 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
 
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
-			if (ia->ifa_addr == NULL)
-				continue;
-			if (ia->ifa_addr->sa_family != AF_INET)
+			IFA_LOCK(ia);
+			if (ia->ifa_addr->sa_family != AF_INET) {
+				IFA_UNLOCK(ia);
 				continue;
+			}
 			if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
 			    (ia->ifa_addr))->sin_addr.s_addr) {
+				IFA_UNLOCK(ia);
 				ifnet_lock_done(ifp);
 				return(1);	/* match */
 			}
+			IFA_UNLOCK(ia);
 		}
 		ifnet_lock_done(ifp);
 	}
diff --git a/bsd/netinet/ip_fw2.h b/bsd/netinet/ip_fw2.h
index 0485bcbc2..5e093b170 100644
--- a/bsd/netinet/ip_fw2.h
+++ b/bsd/netinet/ip_fw2.h
@@ -55,6 +55,7 @@
 
 #ifndef _IPFW2_H
 #define _IPFW2_H
+#ifdef __APPLE_API_OBSOLETE
 
 /*
  * Define IP Firewall event subclass, and associated events.
@@ -634,4 +635,5 @@ extern int fw_enable;
 #endif /* IPFIREWALL */
 #endif /* KERNEL */
 
+#endif /* __APPLE_API_OBSOLETE */
 #endif /* _IPFW2_H */
diff --git a/bsd/netinet/ip_fw2_compat.c b/bsd/netinet/ip_fw2_compat.c
index 766fa8fc8..712f49241 100644
--- a/bsd/netinet/ip_fw2_compat.c
+++ b/bsd/netinet/ip_fw2_compat.c
@@ -1592,7 +1592,7 @@ ipfw_version_latest_to_zero(struct ip_fw *curr_rule, struct ip_old_fw *rule_vers
 		rule_vers0->pipe_ptr      = CAST_DOWN_EXPLICIT(void*, rule_vers1.pipe_ptr);
 		rule_vers0->next_rule_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.next_rule_ptr);
 
-		if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB;
+		if (rule_vers1.fw_ipflg & IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB;
 	}
 	else {
 		struct ip_fw_compat_32	rule_vers1;
@@ -1620,7 +1620,7 @@ ipfw_version_latest_to_zero(struct ip_fw *curr_rule, struct ip_old_fw *rule_vers
 		rule_vers0->pipe_ptr      = CAST_DOWN_EXPLICIT(void*, rule_vers1.pipe_ptr);
 		rule_vers0->next_rule_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.next_rule_ptr);
 
-		if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB;
+		if (rule_vers1.fw_ipflg & IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB;
 	}
 
 }
diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c
index 7f55b2a5f..48ea2f0f5 100644
--- a/bsd/netinet/ip_icmp.c
+++ b/bsd/netinet/ip_icmp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -124,23 +124,23 @@
  */
 
 struct	icmpstat icmpstat;
-SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 	&icmpstat, icmpstat, "");
 
 static int	icmpmaskrepl = 0;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&icmpmaskrepl, 0, "");
 
 static int	icmptimestamp = 0;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_TIMESTAMP, timestamp, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&icmptimestamp, 0, "");
 
 static int	drop_redirect = 0;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&drop_redirect, 0, "");
 
 static int	log_redirect = 0;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&log_redirect, 0, "");
 
 #if ICMP_BANDLIM 
@@ -151,12 +151,12 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
  */     
     
 static int      icmplim = 250;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&icmplim, 0, "");
 #else
 
 static int      icmplim = -1;
-SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD | CTLFLAG_LOCKED,
 	&icmplim, 0, "");
 	
 #endif 
@@ -166,7 +166,7 @@ SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
  */
 
 static int	icmpbmcastecho = 1;
-SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&icmpbmcastecho, 0, "");
 
 
@@ -537,8 +537,10 @@ icmp_input(struct mbuf *m, int hlen)
 			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
 		if (ia == 0)
 			break;
+		IFA_LOCK(&ia->ia_ifa);
 		if (ia->ia_ifp == 0) {
-			ifafree(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 			ia = NULL;
 			break;
 		}
@@ -550,7 +552,8 @@ icmp_input(struct mbuf *m, int hlen)
 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
 			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
 		}
-		ifafree(&ia->ia_ifa);
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 reflect:
 		ip->ip_len += hlen;	/* since ip_input deducts this */
 		icmpstat.icps_reflect++;
@@ -662,8 +665,13 @@ icmp_reflect(struct mbuf *m)
 	 */
 	lck_rw_lock_shared(in_ifaddr_rwlock);
 	TAILQ_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) {
-		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+		IFA_LOCK(&ia->ia_ifa);
+		if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) {
+			IFA_ADDREF_LOCKED(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
 			goto match;
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 	/*
 	 * Slow path; check for broadcast addresses.  Find a source
@@ -671,13 +679,16 @@ icmp_reflect(struct mbuf *m)
 	 * let IP handle the source interface selection work.
 	 */
 	for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) {
+		IFA_LOCK(&ia->ia_ifa);
 		if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) &&
-		    t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr)
+		    t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) {
+			IFA_ADDREF_LOCKED(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
 			break;
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 match:
-	if (ia)
-		ifaref(&ia->ia_ifa);
 	lck_rw_done(in_ifaddr_rwlock);
 	icmpdst.sin_addr = t;
 	if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif)
@@ -695,16 +706,18 @@ match:
 			m_freem(m);
 			goto done;
 		}
-		ifaref(&ia->ia_ifa);
+		IFA_ADDREF(&ia->ia_ifa);
 		lck_rw_done(in_ifaddr_rwlock);
 	}
 #if CONFIG_MACF_NET
 	mac_netinet_icmp_reply(m);
 #endif
+	IFA_LOCK_SPIN(&ia->ia_ifa);
 	t = IA_SIN(ia)->sin_addr;
+	IFA_UNLOCK(&ia->ia_ifa);
 	ip->ip_src = t;
 	ip->ip_ttl = ip_defttl;
-	ifafree(&ia->ia_ifa);
+	IFA_REMREF(&ia->ia_ifa);
 	ia = NULL;
 
 	if (optlen > 0) {
@@ -797,10 +810,10 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
 	int hlen;
 	struct icmp *icp;
 	struct route ro;
-	struct ip_out_args ipoa = { IFSCOPE_NONE };
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 
 	if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
-		ipoa.ipoa_ifscope = m->m_pkthdr.rcvif->if_index;
+		ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index;
 
 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 	m->m_data += hlen;
@@ -1037,6 +1050,7 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 		case IP_RECVDSTADDR:
 		case IP_RETOPTS:
 		case IP_MULTICAST_IF:
+		case IP_MULTICAST_IFINDEX:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
@@ -1054,6 +1068,7 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 #if CONFIG_FORCE_OUT_IFP
                 case IP_FORCE_OUT_IFP:
 #endif
+		case IP_NO_IFT_CELLULAR:
 			error = rip_ctloutput(so, sopt);
 			break;
 
@@ -1109,12 +1124,15 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n
 			}
 			TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr),
 			    ia_hash) {
+				IFA_LOCK(&ia->ia_ifa);
 				if (IA_SIN(ia)->sin_addr.s_addr ==
 				    ip->ip_src.s_addr) {
+					IFA_UNLOCK(&ia->ia_ifa);
 					lck_rw_done(in_ifaddr_rwlock);
 					socket_lock(so, 0);
 					goto ours;
 				}
+				IFA_UNLOCK(&ia->ia_ifa);
 			}
 			lck_rw_done(in_ifaddr_rwlock);
 			socket_lock(so, 0);
diff --git a/bsd/netinet/ip_id.c b/bsd/netinet/ip_id.c
index 7a6fef876..46c7fecd9 100644
--- a/bsd/netinet/ip_id.c
+++ b/bsd/netinet/ip_id.c
@@ -137,6 +137,7 @@ ip_initid(void)
 	struct timeval timenow;
 	
 	getmicrotime(&timenow);
+	read_random((void *) &tmp, sizeof(tmp));
 	ru_x = (tmp & 0xFFFF) % RU_M;
 
 	/* 15 bits of random seed */
diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c
index 10156a869..761b4b40c 100644
--- a/bsd/netinet/ip_input.c
+++ b/bsd/netinet/ip_input.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,6 +80,8 @@
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <mach/mach_time.h>
 
 #include <machine/endian.h>
 
@@ -93,6 +95,7 @@
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/kpi_protocol.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -113,6 +116,7 @@
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/bootp.h>
+#include <mach/sdt.h>
 
 #if CONFIG_MACF_NET
 #include <security/mac_framework.h>
@@ -167,46 +171,46 @@ SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding,
     sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces");
 
 static int	ipsendredirects = 1; /* XXX */
-SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipsendredirects, 0, "Enable sending IP redirects");
 
 int	ip_defttl = IPDEFTTL;
-SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
 static int	ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
 
 static int	ip_acceptsourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
-    CTLFLAG_RW, &ip_acceptsourceroute, 0, 
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ip_acceptsourceroute, 0, 
     "Enable accepting source routed IP packets");
 
 static int	ip_keepfaith = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ip_keepfaith,	0,
 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 static int	nipq = 0;	/* total # of reass queues */
 static int	maxnipq;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&maxnipq, 0,
 	"Maximum number of IPv4 fragment reassembly queue entries");
 
 static int    maxfragsperpacket;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&maxfragsperpacket, 0,
 	"Maximum number of IPv4 fragments allowed per packet");
 
 static int    maxfrags;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&maxfrags, 0, "Maximum number of IPv4 fragments allowed");
 
 static int    currentfrags = 0;
 
 int	ip_doscopedroute = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED,
      &ip_doscopedroute, 0, "Enable IPv4 scoped routing");
 
 /*
@@ -223,7 +227,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW,
  * packets for those addresses are received.
  */
 static int	ip_checkinterface = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
 
@@ -251,13 +255,13 @@ static u_int32_t inaddr_nhash;			/* hash table size */
 static u_int32_t inaddr_hashp;			/* next largest prime */
 
 struct	ifqueue ipintrq;
-SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
-SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD | CTLFLAG_LOCKED,
     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
-SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 /* Packet reassembly stuff */
@@ -279,13 +283,13 @@ lck_mtx_t 		*inet_domain_mutex;
 extern lck_mtx_t 	*domain_proto_mtx;
 
 #if IPCTL_DEFMTU
-SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #if IPSTEALTH
 static int	ipstealth = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ipstealth, 0, "");
 #endif
 
@@ -304,17 +308,17 @@ ip_dn_io_t *ip_dn_io_ptr;
 int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL;
 #endif /* IPFIREWALL */
 
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local");
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local");
 
 struct ip_linklocal_stat ip_linklocal_stat;
-SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD | CTLFLAG_LOCKED,
         &ip_linklocal_stat, ip_linklocal_stat,
         "Number of link local packets with TTL less than 255");
 
-SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "link local input");
+SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local input");
 
 int ip_linklocal_in_allowbadttl = 1;
-SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW | CTLFLAG_LOCKED,
         &ip_linklocal_in_allowbadttl, 0,
         "Allow incoming link local packets with TTL less than 255");
 
@@ -359,7 +363,7 @@ void	in_dinit(void);
 extern u_short ip_id;
 
 int	ip_use_randomid = 1;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_use_randomid, 0, "Randomize IP packets IDs");
 #endif
 
@@ -379,6 +383,9 @@ ip_init(void)
 
 	if (!ip_initialized)
 	{
+		PE_parse_boot_argn("net.inet.ip.scopedroute",
+		    &ip_doscopedroute, sizeof (ip_doscopedroute));
+
 		in_ifaddr_init();
 
 		in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init();
@@ -391,6 +398,8 @@ ip_init(void)
 		TAILQ_INIT(&in_ifaddrhead);
 		in_ifaddrhashtbl_init();
 
+		ip_moptions_init();
+
 		pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW);
 		if (pr == 0)
 			panic("ip_init");
@@ -531,9 +540,7 @@ in_dinit(void)
 
 	if (!inetdomain_initted)
 	{
-#if 0
-		kprintf("Initing %d protosw entries\n", in_proto_count);
-#endif
+		/* kprintf("Initing %d protosw entries\n", in_proto_count); */
 		dp = &inetdomain;
 		dp->dom_flags = DOM_REENTRANT;
 
@@ -637,6 +644,9 @@ ip_input(struct mbuf *m)
 #endif
 	ipfilter_t inject_filter_ref = 0;
 
+	/* Check if the mbuf is still valid after interface filter processing */
+	MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
+
 #if IPFIREWALL
 	args.eh = NULL;
 	args.oif = NULL;
@@ -707,6 +717,11 @@ ipfw_tags_done:
 	if (inject_filter_ref != 0) {
 		ip = mtod(m, struct ip *);
 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
+
+		DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, 
+			struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+			struct ip *, ip, struct ip6_hdr *, NULL);
+               
 		ip->ip_len = ntohs(ip->ip_len) - hlen;
 		ip->ip_off = ntohs(ip->ip_off);
 		ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref);
@@ -801,6 +816,30 @@ ipfw_tags_done:
 		goto bad;
 	}
 
+	DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, 
+		struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+		struct ip *, ip, struct ip6_hdr *, NULL);
+
+	/*
+	 * Naively assume we can attribute inbound data to the route we would
+	 * use to send to this destination. Asymetric routing breaks this
+	 * assumption, but it still allows us to account for traffic from
+	 * a remote node in the routing table.
+	 * this has a very significant performance impact so we bypass
+	 * if nstat_collect is disabled. We may also bypass if the
+	 * protocol is tcp in the future because tcp will have a route that
+	 * we can use to attribute the data to. That does mean we would not
+	 * account for forwarded tcp traffic.
+	 */
+	if (nstat_collect) {
+		struct rtentry *rt =
+		    ifnet_cached_rtlookup_inet(m->m_pkthdr.rcvif, ip->ip_src);
+		if (rt != NULL) {
+			nstat_route_rx(rt, 1, m->m_pkthdr.len, 0);
+			rtfree(rt);
+		}
+	}
+
 	/*
 	 * Convert fields to host representation.
 	 */
@@ -839,36 +878,29 @@ tooshort:
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 
-#if IPSEC
-	if (ipsec_bypass == 0 && ipsec_gethist(m, NULL))
-		goto pass;
-#endif
-
-	/*
-	 * IpHack's section.
-	 * Right now when no processing on packet has done
-	 * and it is still fresh out of network we do our black
-	 * deals with it.
-	 * - Firewall: deny/allow/divert
-	 * - Xlate: translate packet's addr/port (NAT).
-	 * - Pipe: pass pkt through dummynet.
-	 * - Wrap: fake packet's addr/port <unimpl.>
-	 * - Encapsulate: put it in another IP and send out. <unimp.>
- 	 */
 #if PF
 	/* Invoke inbound packet filter */
-	if (pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE) != 0) {
-		if (m != NULL) {
-			panic("%s: unexpected packet %p\n", __func__, m);
-			/* NOTREACHED */
-		}
-		/* Already freed by callee */
-		return;
+	if (PF_IS_ENABLED) { 
+		int error;
+		error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE);
+		if (error != 0) {
+			if (m != NULL) {
+				panic("%s: unexpected packet %p\n", __func__, m);
+				/* NOTREACHED */
+			}
+			/* Already freed by callee */
+			return;
+		} 
+		ip = mtod(m, struct ip *);
+		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 	}
-	ip = mtod(m, struct ip *);
-	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #endif /* PF */
 
+#if IPSEC
+	if (ipsec_bypass == 0 && ipsec_gethist(m, NULL))
+		goto pass;
+#endif
+
 #if IPFIREWALL
 #if DUMMYNET
 iphack:
@@ -1015,11 +1047,14 @@ pass:
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			lck_rw_done(in_ifaddr_rwlock);
 			goto ours;
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 	lck_rw_done(in_ifaddr_rwlock);
 
@@ -1037,15 +1072,20 @@ pass:
 
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			if (ifa->ifa_addr->sa_family != AF_INET)
+			IFA_LOCK_SPIN(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    pkt_dst.s_addr || ia->ia_netbroadcast.s_addr ==
 			    pkt_dst.s_addr) {
+				IFA_UNLOCK(ifa);
 				ifnet_lock_done(ifp);
 				goto ours;
 			}
+			IFA_UNLOCK(ifa);
 		}
 		ifnet_lock_done(ifp);
 	}
@@ -1085,14 +1125,15 @@ pass:
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
-		ifnet_lock_shared(ifp);
-		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
-		ifnet_lock_done(ifp);
+		in_multihead_lock_shared();
+		IN_LOOKUP_MULTI(&ip->ip_dst, ifp, inm);
+		in_multihead_lock_done();
 		if (inm == NULL) {
 			OSAddAtomic(1, &ipstat.ips_notmember);
 			m_freem(m);
 			return;
 		}
+		INM_REMREF(inm);
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST)
@@ -1350,9 +1391,9 @@ found:
 			struct m_tag *fwd_tag;
 			struct ip_fwd_tag	*ipfwd_tag;
 			
-			fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+			fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
 			    KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag),
-			    M_NOWAIT);
+			    M_NOWAIT, m);
 			if (fwd_tag == NULL) {
 				goto bad;
 			}
@@ -1731,9 +1772,6 @@ ip_slowtimo(void)
 			}
 		}
 	}
-#if IPFLOW
-	ipflow_slowtimo();
-#endif
 	lck_mtx_unlock(ip_mutex);
 }
 
@@ -1843,7 +1881,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop)
 				break;
 			}
 			else {
-				ifafree(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);
 				ia = NULL;
 			}
 			off--;			/* 0 origin */
@@ -1903,9 +1941,11 @@ nosourcerouting:
 				goto bad;
 			}
 			ip->ip_dst = ipaddr.sin_addr;
+			IFA_LOCK(&ia->ia_ifa);
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
-			ifafree(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 			ia = NULL;
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			/*
@@ -1942,9 +1982,11 @@ nosourcerouting:
 					goto bad;
 				}
 			}
+			IFA_LOCK(&ia->ia_ifa);
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
-			ifafree(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 			ia = NULL;
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			break;
@@ -1987,10 +2029,12 @@ nosourcerouting:
 							    m->m_pkthdr.rcvif);
 				if (ia == 0)
 					continue;
+				IFA_LOCK(&ia->ia_ifa);
 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
 				    sizeof(struct in_addr));
+				IFA_UNLOCK(&ia->ia_ifa);
 				ipt->ipt_ptr += sizeof(struct in_addr);
-				ifafree(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);
 				ia = NULL;
 				break;
 
@@ -2005,7 +2049,7 @@ nosourcerouting:
 				    sizeof(struct in_addr));
 				if ((ia = (struct in_ifaddr*)ifa_ifwithaddr((SA)&ipaddr)) == 0)
 					continue;
-				ifafree(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);
 				ia = NULL;
 				ipt->ipt_ptr += sizeof(struct in_addr);
 				break;
@@ -2057,7 +2101,7 @@ ip_rtaddr(struct in_addr dst)
 
 	RT_LOCK(ro.ro_rt);
 	if ((rt_ifa = ro.ro_rt->rt_ifa) != NULL)
-		ifaref(rt_ifa);
+		IFA_ADDREF(rt_ifa);
 	RT_UNLOCK(ro.ro_rt);
 	rtfree(ro.ro_rt);
 
@@ -2204,12 +2248,12 @@ sysctl_ipforwarding SYSCTL_HANDLER_ARGS
 		for (i = 0; i <= if_index; i++) {
 			struct ifnet *ifp = ifindex2ifnet[i];
 			if (ifp != NULL) {
-				lck_mtx_lock(ifp->if_fwd_route_lock);
-				if (ifp->if_fwd_route.ro_rt != NULL) {
+				lck_mtx_lock(&ifp->if_cached_route_lock);
+				if (ifp->if_fwd_route.ro_rt != NULL)
 					rtfree(ifp->if_fwd_route.ro_rt);
-					ifp->if_fwd_route.ro_rt = NULL;
-				}
-				lck_mtx_unlock(ifp->if_fwd_route_lock);
+				bzero(&ifp->if_fwd_route,
+				    sizeof (ifp->if_fwd_route));
+				lck_mtx_unlock(&ifp->if_cached_route_lock);
 			}
 		}
 		ifnet_head_done();
@@ -2228,20 +2272,16 @@ ip_fwd_route_copyout(struct ifnet *ifp, struct route *dst)
 {
 	struct route *src = &ifp->if_fwd_route;
 
-	lck_mtx_lock(ifp->if_fwd_route_lock);
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
 
 	/* Minor sanity check */
 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
 		panic("%s: wrong or corrupted route: %p", __func__, src);
 
-	/* Copy everything (rt, dst, flags) from ifnet */
-	bcopy(src, dst, sizeof (*dst));
-
-	/* Hold one reference for the local copy of struct route */
-	if (dst->ro_rt != NULL)
-		RT_ADDREF(dst->ro_rt);
+	route_copyout(dst, src, sizeof(*dst));
 
-	lck_mtx_unlock(ifp->if_fwd_route_lock);
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
 }
 
 static void
@@ -2249,37 +2289,17 @@ ip_fwd_route_copyin(struct ifnet *ifp, struct route *src)
 {
 	struct route *dst = &ifp->if_fwd_route;
 
-	lck_mtx_lock(ifp->if_fwd_route_lock);
+	lck_mtx_lock_spin(&ifp->if_cached_route_lock);
+	lck_mtx_convert_spin(&ifp->if_cached_route_lock);
 
 	/* Minor sanity check */
 	if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET)
 		panic("%s: wrong or corrupted route: %p", __func__, src);
 
-	/* No cached route in the ifnet? */
-	if (dst->ro_rt == NULL) {
-		/*
-		 * Copy everything (rt, dst, flags) from ip_forward();
-		 * the reference to the route was held at the time
-		 * it was allocated and is kept intact.
-		 */
-		bcopy(src, dst, sizeof (*dst));
-	} else if (src->ro_rt != NULL) {
-		/*
-		 * If the same, update just the ro_flags and ditch the one
-		 * in the local copy.  Else ditch the one that is currently
-		 * cached, and cache what we got back from ip_output().
-		 */
-		if (dst->ro_rt == src->ro_rt) {
-			dst->ro_flags = src->ro_flags;
-			rtfree(src->ro_rt);
-			src->ro_rt = NULL;
-		} else {
-			rtfree(dst->ro_rt);
-			bcopy(src, dst, sizeof (*dst));
-		}
-	}
+	if (ifp->if_fwd_cacheok)
+		route_copyin(src, dst, sizeof(*src));
 
-	lck_mtx_unlock(ifp->if_fwd_route_lock);
+	lck_mtx_unlock(&ifp->if_cached_route_lock);
 }
 
 /*
@@ -2311,7 +2331,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 	n_long dest;
 	struct in_addr pkt_dst;
 	u_int32_t nextmtu = 0;
-	struct ip_out_args ipoa = { IFSCOPE_NONE };
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 #if PF
 	struct pf_mtag *pf_mtag;
@@ -2355,7 +2375,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 #if PF
 	pf_mtag = pf_find_mtag(m);
 	if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE)
-		ipoa.ipoa_ifscope = pf_mtag->rtableid;
+		ipoa.ipoa_boundif = pf_mtag->rtableid;
 #endif /* PF */
 
 	ip_fwd_route_copyout(ifp, &fwd_rt);
@@ -2372,7 +2392,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 		sin->sin_len = sizeof (*sin);
 		sin->sin_addr = pkt_dst;
 
-		rtalloc_scoped_ign(&fwd_rt, RTF_PRCLONING, ipoa.ipoa_ifscope);
+		rtalloc_scoped_ign(&fwd_rt, RTF_PRCLONING, ipoa.ipoa_boundif);
 		if (fwd_rt.ro_rt == NULL) {
 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
 			goto done;
@@ -2417,24 +2437,27 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
-	    ipsendredirects && !srcrt) {
-#define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
+	    ipsendredirects && !srcrt && rt->rt_ifa != NULL) {
+		struct in_ifaddr *ia = (struct in_ifaddr *)rt->rt_ifa;
 		u_int32_t src = ntohl(ip->ip_src.s_addr);
 
-		if (RTA(rt) &&
-		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
-		    if (rt->rt_flags & RTF_GATEWAY)
-			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
-		    else
-			dest = pkt_dst.s_addr;
-		    /* Router requirements says to only send host redirects */
-		    type = ICMP_REDIRECT;
-		    code = ICMP_REDIRECT_HOST;
+		/* Become a regular mutex */
+		RT_CONVERT_LOCK(rt);
+		IFA_LOCK_SPIN(&ia->ia_ifa);
+		if ((src & ia->ia_subnetmask) == ia->ia_subnet) {
+			if (rt->rt_flags & RTF_GATEWAY)
+				dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
+			else
+				dest = pkt_dst.s_addr;
+			/* Router requirements says to only send host redirects */
+			type = ICMP_REDIRECT;
+			code = ICMP_REDIRECT_HOST;
 #if DIAGNOSTIC
-		    if (ipprintfs)
-		        printf("redirect (%d) to %lx\n", code, (u_int32_t)dest);
+			if (ipprintfs)
+				printf("redirect (%d) to %lx\n", code, (u_int32_t)dest);
 #endif
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 	RT_UNLOCK(rt);
 
@@ -2444,9 +2467,9 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 		struct m_tag *tag;
 		struct ip_fwd_tag	*ipfwd_tag;
 
-		tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+		tag = m_tag_create(KERNEL_MODULE_TAG_ID,
 		    KERNEL_TAG_TYPE_IPFORWARD,
-		    sizeof (*ipfwd_tag), M_NOWAIT);
+		    sizeof (*ipfwd_tag), M_NOWAIT, m);
 		if (tag == NULL) {
 			error = ENOBUFS;
 			m_freem(m);
@@ -2473,9 +2496,6 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 			OSAddAtomic(1, &ipstat.ips_redirectsent);
 		else {
 			if (mcopy) {
-#if IPFLOW
-				ipflow_create(&fwd_rt, mcopy);
-#endif
 				/*
 				 * If we didn't have to go thru ipflow and
 				 * the packet was successfully consumed by
@@ -2580,6 +2600,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 						}
 						sav = key_allocsa_policy(&saidx);
 						if (sav != NULL) {
+							lck_mtx_lock(sadb_mutex);
 							if (sav->sah != NULL) {
 								ro = &sav->sah->sa_route;
 								if (ro->ro_rt != NULL) {
@@ -2591,7 +2612,8 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 									RT_UNLOCK(ro->ro_rt);
 								}
 							}
-							key_freesav(sav, KEY_SADB_UNLOCKED);
+							key_freesav(sav, KEY_SADB_LOCKED);
+							lck_mtx_unlock(sadb_mutex);
 						}
 					}
 				}
@@ -2617,27 +2639,41 @@ done:
 	ip_fwd_route_copyin(ifp, &fwd_rt);
 }
 
-void
+int
 ip_savecontrol(
 	struct inpcb *inp,
 	struct mbuf **mp,
 	struct ip *ip,
 	struct mbuf *m)
 {
+	*mp = NULL;
 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 		struct timeval tv;
 
 		microtime(&tv);
-		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
-			SCM_TIMESTAMP, SOL_SOCKET);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t) &tv, sizeof(tv),
+			SCM_TIMESTAMP, SOL_SOCKET, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
+	if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+		uint64_t time;
+
+		time = mach_absolute_time();
+		mp = sbcreatecontrol_mbuf((caddr_t) &time, sizeof(time),
+			SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp);
+		
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
+	} 
 	if (inp->inp_flags & INP_RECVDSTADDR) {
-		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
-		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t) &ip->ip_dst,
+			sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
 #ifdef notyet
 	/* XXX
@@ -2646,17 +2682,19 @@ ip_savecontrol(
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
-		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
-		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t) opts_deleted_above,
+			sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
-		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
-		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t) ip_srcroute(),
+			sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
@@ -2669,24 +2707,27 @@ ip_savecontrol(
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		ifnet_head_lock_shared();
-		if (((ifp = m->m_pkthdr.rcvif)) 
-		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
+		if ((ifp = m->m_pkthdr.rcvif) != NULL &&
+		    ifp->if_index && (ifp->if_index <= if_index)) {
 			struct ifaddr *ifa = ifnet_addrs[ifp->if_index - 1];
 
 			if (!ifa || !ifa->ifa_addr)
 				goto makedummy;
 
+			IFA_LOCK_SPIN(ifa);
 			sdp = (struct sockaddr_dl *)ifa->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
-			if ((sdp->sdl_family != AF_LINK)
-			|| (sdp->sdl_len > sizeof(sdlbuf))) {
+			if ((sdp->sdl_family != AF_LINK) ||
+			    (sdp->sdl_len > sizeof(sdlbuf))) {
+				IFA_UNLOCK(ifa);
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
+			IFA_UNLOCK(ifa);
 		} else {
-makedummy:	
+makedummy:
 			sdl2->sdl_len
 				= offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
@@ -2694,15 +2735,46 @@ makedummy:
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		ifnet_head_done();
-		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
-			IP_RECVIF, IPPROTO_IP);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t) sdl2, sdl2->sdl_len,
+			IP_RECVIF, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
-		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, sizeof(ip->ip_ttl), IP_RECVTTL, IPPROTO_IP);
-		if (*mp) mp = &(*mp)->m_next;
+		mp = sbcreatecontrol_mbuf((caddr_t)&ip->ip_ttl, sizeof(ip->ip_ttl), 
+			IP_RECVTTL, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
+	}
+	if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) {
+		int tc = m->m_pkthdr.prio;
+		
+		mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc),
+			SO_TRAFFIC_CLASS, SOL_SOCKET, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
+	}
+	if (inp->inp_flags & INP_PKTINFO) {
+		struct in_pktinfo pi;
+
+		bzero(&pi, sizeof(struct in_pktinfo));
+		bcopy(&ip->ip_dst, &pi.ipi_addr, sizeof(struct in_addr));
+		pi.ipi_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
+		
+		mp = sbcreatecontrol_mbuf((caddr_t)&pi, sizeof(struct in_pktinfo), 
+			IP_RECVPKTINFO, IPPROTO_IP, mp);
+		if (*mp == NULL) {
+			goto no_mbufs;
+		}
 	}
+	return 0;
+
+no_mbufs:
+	ipstat.ips_pktdropcntrl++;
+	return ENOBUFS;
 }
 
 int
diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c
index e61d2ed64..f33537ef8 100644
--- a/bsd/netinet/ip_mroute.c
+++ b/bsd/netinet/ip_mroute.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -131,12 +131,12 @@ int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *) = _ip_mforward;
 
 int
-_mrt_ioctl(__unused int req, __unused caddr_t data, __unused struct proc *p)
+_mrt_ioctl(__unused u_long req, __unused caddr_t data, __unused struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
-int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
+int (*mrt_ioctl)(u_long, caddr_t, struct proc *) = _mrt_ioctl;
 
 void
 rsvp_input(struct mbuf *m, int iphlen)		/* XXX must fixup manually */
@@ -293,7 +293,7 @@ static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *so, struct sockopt *m);
 static int	X_ip_mrouter_set(struct socket *so, struct sockopt *m);
 static int	X_legal_vif_num(int vif);
-static int	X_mrt_ioctl(int cmd, caddr_t data);
+static int	X_mrt_ioctl(u_long cmd, caddr_t data);
 
 static int get_sg_cnt(struct sioc_sg_req *);
 static int get_vif_cnt(struct sioc_vif_req *);
@@ -493,7 +493,7 @@ int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
  * Handle ioctl commands to obtain information from the cache
  */
 static int
-X_mrt_ioctl(int cmd, caddr_t data)
+X_mrt_ioctl(u_long cmd, caddr_t data)
 {
     int error = 0;
 
@@ -512,7 +512,7 @@ X_mrt_ioctl(int cmd, caddr_t data)
 }
 
 #if !defined(MROUTE_LKM) || !MROUTE_LKM
-int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
+int (*mrt_ioctl)(u_long, caddr_t) = X_mrt_ioctl;
 #endif
 
 /*
@@ -695,7 +695,7 @@ add_vif(struct vifctl *vifcp)
     ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
     if (ifa == 0) return EADDRNOTAVAIL;
     ifp = ifa->ifa_ifp;
-    ifafree(ifa);
+    IFA_REMREF(ifa);
     ifa = NULL;
 
     if (vifcp->vifc_flags & VIFF_TUNNEL) {
@@ -1099,7 +1099,10 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
 	return 1;
     }
 
+    if (imo != NULL)
+	IMO_LOCK(imo);
     if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
+	IMO_UNLOCK(imo);
 	if (ip->ip_ttl < 255)
 		ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
@@ -1110,6 +1113,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
 		vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
 	}
 	return (ip_mdq(m, ifp, NULL, vifi));
+    } else if (imo != NULL) {
+	IMO_UNLOCK(imo);
     }
     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
 	printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
@@ -1807,7 +1812,6 @@ tbf_dq_sel(struct vif *vifp, struct ip *ip)
 static void
 tbf_send_packet(struct vif *vifp, struct mbuf *m)
 {
-    struct ip_moptions imo;
     int error;
     static struct route ro;
 
@@ -1816,10 +1820,18 @@ tbf_send_packet(struct vif *vifp, struct mbuf *m)
 	ip_output(m, (struct mbuf *)0, &vifp->v_route,
 		  IP_FORWARDING, (struct ip_moptions *)0, NULL);
     } else {
-	imo.imo_multicast_ifp  = vifp->v_ifp;
-	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
-	imo.imo_multicast_loop = 1;
-	imo.imo_multicast_vif  = -1;
+	struct ip_moptions *imo;
+
+	imo = ip_allocmoptions(M_DONTWAIT);
+	if (imo == NULL) {
+		error = ENOMEM;
+		goto done;
+	}
+
+	imo->imo_multicast_ifp  = vifp->v_ifp;
+	imo->imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
+	imo->imo_multicast_loop = 1;
+	imo->imo_multicast_vif  = -1;
 
 	/*
 	 * Re-entrancy should not be a problem here, because
@@ -1828,8 +1840,10 @@ tbf_send_packet(struct vif *vifp, struct mbuf *m)
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, (struct mbuf *)0, &ro,
-			  IP_FORWARDING, &imo, NULL);
+			  IP_FORWARDING, imo, NULL);
 
+	IMO_REMREF(imo);
+done:
 	if (mrtdebug & DEBUG_XMIT)
 	    log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 
 		vifp - viftable, error);
diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h
index 71c39440a..f234e20ab 100644
--- a/bsd/netinet/ip_mroute.h
+++ b/bsd/netinet/ip_mroute.h
@@ -298,9 +298,9 @@ extern int	(*ip_mrouter_set)(struct socket *, struct sockopt *);
 extern int	(*ip_mrouter_get)(struct socket *, struct sockopt *);
 extern int	(*ip_mrouter_done)(void);
 #if MROUTING
-extern int	(*mrt_ioctl)(int, caddr_t);
+extern int	(*mrt_ioctl)(u_long, caddr_t);
 #else
-extern int	(*mrt_ioctl)(int, caddr_t, struct proc *);
+extern int	(*mrt_ioctl)(u_long, caddr_t, struct proc *);
 #endif
 
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c
index 07d74f97f..57f522919 100644
--- a/bsd/netinet/ip_output.c
+++ b/bsd/netinet/ip_output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,12 +79,17 @@
 #include <sys/socketvar.h>
 #include <kern/locks.h>
 #include <sys/sysctl.h>
+#include <sys/mcache.h>
 
 #include <machine/endian.h>
+#include <pexpert/pexpert.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
+#include <net/if_types.h>
 #include <net/route.h>
+#include <net/ntstat.h>
+#include <net/net_osdep.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -124,6 +129,7 @@
 
 #include <netinet/ip_fw.h>
 #include <netinet/ip_divert.h>
+#include <mach/sdt.h>
 
 #if DUMMYNET
 #include <netinet/ip_dummynet.h>
@@ -144,20 +150,14 @@
 u_short ip_id;
 
 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
-static struct ifnet *ip_multicast_if(struct in_addr *, int *);
 static void	ip_mloopback(struct ifnet *, struct mbuf *,
 	struct sockaddr_in *, int);
-static int	ip_getmoptions(struct sockopt *, struct ip_moptions *);
 static int	ip_pcbopts(int, struct mbuf **, struct mbuf *);
-static int	ip_setmoptions(struct sockopt *, struct ip_moptions **);
+static void	imo_trace(struct ip_moptions *, int);
 
 static void ip_out_cksum_stats(int, u_int32_t);
 static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int);
-static void ip_bindif(struct inpcb *, unsigned int);
 
-int ip_createmoptions(struct ip_moptions **imop);
-int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
-int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
 int	ip_optcopy(struct ip *, struct ip *);
 void in_delayed_cksum_offset(struct mbuf *, int );
 void in_cksum_offset(struct mbuf* , size_t );
@@ -175,18 +175,50 @@ extern int ipsec_bypass;
 #endif
 
 static int	ip_maxchainsent = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_maxchainsent, 0, "use dlil_output_list");
 #if DEBUG
 static int forge_ce = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &forge_ce, 0, "Forge ECN CE");
 #endif /* DEBUG */
 
 static int ip_select_srcif_debug = 0;
-SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
     &ip_select_srcif_debug, 0, "log source interface selection debug info");
 
+#define	IMO_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int imo_trace_hist_size = IMO_TRACE_HIST_SIZE;
+
+struct ip_moptions_dbg {
+	struct ip_moptions	imo;			/* ip_moptions */
+	u_int16_t		imo_refhold_cnt;	/* # of IMO_ADDREF */
+	u_int16_t		imo_refrele_cnt;	/* # of IMO_REMREF */
+	/*
+	 * Alloc and free callers.
+	 */
+	ctrace_t		imo_alloc;
+	ctrace_t		imo_free;
+	/*
+	 * Circular lists of IMO_ADDREF and IMO_REMREF callers.
+	 */
+	ctrace_t		imo_refhold[IMO_TRACE_HIST_SIZE];
+	ctrace_t		imo_refrele[IMO_TRACE_HIST_SIZE];
+};
+
+#if DEBUG
+static unsigned int imo_debug = 1;	/* debugging (enabled) */
+#else
+static unsigned int imo_debug;		/* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int imo_size;		/* size of zone element */
+static struct zone *imo_zone;		/* zone for ip_moptions */
+
+#define	IMO_ZONE_MAX		64		/* maximum elements in zone */
+#define	IMO_ZONE_NAME		"ip_moptions"	/* zone name */
+
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
@@ -244,11 +276,12 @@ ip_output_list(
 	struct ifnet *ifp = NULL;
 	struct mbuf *m = m0, **mppn = NULL;
 	int hlen = sizeof (struct ip);
-	int len = 0, off, error = 0;
+	int len = 0, error = 0;
 	struct sockaddr_in *dst = NULL;
 	struct in_ifaddr *ia = NULL, *src_ia = NULL;
 	int isbroadcast, sw_csum;
 	struct in_addr pkt_dst;
+	struct ipf_pktopts *ippo = NULL, ipf_pktopts;
 #if IPSEC
 	struct route iproute;
 	struct socket *so = NULL;
@@ -258,18 +291,24 @@ ip_output_list(
 	int fwd_rewrite_src = 0;
 #endif
 #if IPFIREWALL
+	int off;
 	struct ip_fw_args args;
+	struct m_tag	*tag;
+	struct sockaddr_in *next_hop_from_ipfwd_tag = NULL;
 #endif
 	int didfilter = 0;
 	ipfilter_t inject_filter_ref = 0;
-	struct m_tag	*tag;
+#if DUMMYNET
 	struct route	saved_route;
 	struct ip_out_args saved_ipoa;
+	struct sockaddr_in dst_buf;
+#endif /* DUMMYNET */
 	struct mbuf * packetlist;
 	int pktcnt = 0, tso = 0;
+	u_int32_t	bytecnt = 0;
 	unsigned int ifscope;
+	unsigned int nocell;
 	boolean_t select_srcif;
-
 	KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
 	packetlist = m0;
@@ -296,7 +335,8 @@ ip_output_list(
 		ro = &saved_route;
 
 		imo = NULL;
-		dst = dn_tag->dn_dst;
+		bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf));
+		dst = &dst_buf;
 		ifp = dn_tag->ifp;
 		flags = dn_tag->flags;
 		saved_ipoa = dn_tag->ipoa;
@@ -323,8 +363,8 @@ ip_output_list(
 		struct ip_fwd_tag	*ipfwd_tag;
 
 		ipfwd_tag = (struct ip_fwd_tag *)(tag+1);
-		args.next_hop = ipfwd_tag->next_hop;
-
+		next_hop_from_ipfwd_tag = ipfwd_tag->next_hop;
+		
 		m_tag_delete(m0, tag);
 	}
 ipfw_tags_done:
@@ -340,6 +380,9 @@ ipfw_tags_done:
 		      mtod(m, struct ip *)->ip_p);
 #endif
 
+	bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
+	ippo = &ipf_pktopts;
+
 	/*
 	 * At present the IP_OUTARGS flag implies a request for IP to
 	 * perform source interface selection.  In the forwarding case,
@@ -348,12 +391,22 @@ ipfw_tags_done:
 	 */
 	if (ip_doscopedroute && (flags & IP_OUTARGS)) {
 		select_srcif = !(flags & IP_FORWARDING);
-		ifscope = ipoa->ipoa_ifscope;
+		ifscope = ipoa->ipoa_boundif;
+		ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
+		ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
 	} else {
 		select_srcif = FALSE;
 		ifscope = IFSCOPE_NONE;
 	}
 
+	if (flags & IP_OUTARGS) {
+		nocell = ipoa->ipoa_nocell;
+		if (nocell)
+			ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+	} else {
+		nocell = 0;
+	}
+
 #if IPFIREWALL
 	if (args.rule != NULL) {	/* dummynet already saw us */
 		ip = mtod(m, struct ip *);
@@ -361,8 +414,11 @@ ipfw_tags_done:
 		if (ro->ro_rt != NULL) {
 			RT_LOCK_SPIN(ro->ro_rt);
 			ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
-			if (ia)
-				ifaref(&ia->ia_ifa);
+			if (ia) {
+				/* Become a regular mutex */
+				RT_CONVERT_LOCK(ro->ro_rt);
+				IFA_ADDREF(&ia->ia_ifa);
+			}
 			RT_UNLOCK(ro->ro_rt);
 		}
 #if IPSEC
@@ -397,11 +453,29 @@ loopit:
 	}
 	ip = mtod(m, struct ip *);
 #if IPFIREWALL
+	/*
+	 * rdar://8542331
+	 *
+	 * When dealing with a packet chain, we need to reset "next_hop" because 
+	 * "dst" may have been changed to the gateway address below for the previous
+	 * packet of the chain. This could cause the route to be inavertandly changed 
+	 * to the route to the gateway address (instead of the route to the destination).
+	 */
+	args.next_hop = next_hop_from_ipfwd_tag;
 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
 #else
 	pkt_dst = ip->ip_dst;
 #endif
 
+	/*
+	 * We must not send if the packet is destined to network zero.
+	 * RFC1122 3.2.1.3 (a) and (b).
+	 */
+	if (IN_ZERONET(ntohl(pkt_dst.s_addr))) {
+		error = EHOSTUNREACH;
+		goto bad;
+	}
+
 	/*
 	 * Fill in IP header.
 	 */
@@ -450,7 +524,7 @@ loopit:
 				error = EADDRNOTAVAIL;
 				goto bad;
 			}
-			ifafree(&src_ia->ia_ifa);
+			IFA_REMREF(&src_ia->ia_ifa);
 		}
 		/*
 		 * Test rt_flags without holding rt_lock for performance
@@ -487,7 +561,7 @@ loopit:
 #define sintosa(sin)	((struct sockaddr *)(sin))
 	if (flags & IP_ROUTETOIF) {
 		if (ia)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) {
 			if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
 				OSAddAtomic(1, &ipstat.ips_noroute);
@@ -499,15 +573,14 @@ loopit:
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) &&
-	    imo != NULL && imo->imo_multicast_ifp != NULL) {
+	    imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
-		ifp = imo->imo_multicast_ifp;
 		isbroadcast = 0;
 		if (ia != NULL)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 
 		/* Macro takes reference on ia */
 		IFP_TO_IA(ifp, ia);
@@ -530,6 +603,18 @@ loopit:
 			/* Find the source interface */
 			ifa = in_selectsrcif(ip, ro, ifscope);
 
+			/*
+			 * If the source address belongs to a cellular interface
+			 * and the caller forbids our using interfaces of such
+			 * type, pretend that there is no source address.
+			 */
+			if (nocell && ifa != NULL &&
+			    ifa->ifa_ifp->if_type == IFT_CELLULAR) {
+				IFA_REMREF(ifa);
+				error = EADDRNOTAVAIL;
+				goto bad;
+			}
+
 			/*
 			 * If the source address is spoofed (in the case
 			 * of IP_RAWOUTPUT), or if this is destined for
@@ -560,7 +645,7 @@ loopit:
 			if (ifa != NULL) {
 				if (ifscope == IFSCOPE_NONE)
 					ifscope = ifa->ifa_ifp->if_index;
-				ifafree(ifa);
+				IFA_REMREF(ifa);
 				cloneok = (!(flags & IP_RAWOUTPUT) &&
 				    !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))));
 			}
@@ -611,6 +696,23 @@ loopit:
 				rtalloc_ign(ro, ign);
 			else
 				rtalloc_scoped_ign(ro, ign, ifscope);
+
+			/*
+			 * If the route points to a cellular interface and the
+			 * caller forbids our using interfaces of such type,
+			 * pretend that there is no route.
+			 */
+			if (nocell && ro->ro_rt != NULL) {
+				RT_LOCK_SPIN(ro->ro_rt);
+				if (ro->ro_rt->rt_ifp->if_type ==
+				    IFT_CELLULAR) {
+					RT_UNLOCK(ro->ro_rt);
+					rtfree(ro->ro_rt);
+					ro->ro_rt = NULL;
+				} else {
+					RT_UNLOCK(ro->ro_rt);
+				}
+			}
 		}
 
 		if (ro->ro_rt == NULL) {
@@ -620,11 +722,14 @@ loopit:
 		}
 
 		if (ia)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		RT_LOCK_SPIN(ro->ro_rt);
 		ia = ifatoia(ro->ro_rt->rt_ifa);
-		if (ia)
-			ifaref(&ia->ia_ifa);
+		if (ia) {
+			/* Become a regular mutex */
+			RT_CONVERT_LOCK(ro->ro_rt);
+			IFA_ADDREF(&ia->ia_ifa);
+		}
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_use++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
@@ -641,6 +746,9 @@ loopit:
 
 	if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
 		struct in_multi *inm;
+		u_int32_t vif;
+		u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL;
+		u_int8_t loop = IP_DEFAULT_MULTICAST_LOOP;
 
 		m->m_flags |= M_MCAST;
 		/*
@@ -653,22 +761,28 @@ loopit:
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
-			if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = imo->imo_multicast_ttl;
-			if (imo->imo_multicast_ifp != NULL) {
+			IMO_LOCK(imo);
+			vif = imo->imo_multicast_vif;
+			ttl = imo->imo_multicast_ttl;
+			loop = imo->imo_multicast_loop;
+			if ((flags & IP_RAWOUTPUT) == 0)
+				ip->ip_ttl = ttl;
+			if (imo->imo_multicast_ifp != NULL)
 				ifp = imo->imo_multicast_ifp;
-			}
+			IMO_UNLOCK(imo);
 #if MROUTING
-			if (imo->imo_multicast_vif != -1 && 
-				((flags & IP_RAWOUTPUT) == 0 || ip->ip_src.s_addr == INADDR_ANY))
-				ip->ip_src.s_addr =
-					ip_mcast_src(imo->imo_multicast_vif);
+			if (vif != -1 && ((flags & IP_RAWOUTPUT) == 0 ||
+			    ip->ip_src.s_addr == INADDR_ANY))
+				ip->ip_src.s_addr = ip_mcast_src(vif);
 #endif /* MROUTING */
-		} else
-			if ((flags & IP_RAWOUTPUT) == 0) ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
+		} else if ((flags & IP_RAWOUTPUT) == 0) {
+			vif = -1;
+			ip->ip_ttl = ttl;
+		}
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
-		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
+		if (imo == NULL || vif == -1) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				OSAddAtomic(1, &ipstat.ips_noroute);
 				error = ENETUNREACH;
@@ -682,11 +796,15 @@ loopit:
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			struct in_ifaddr *ia1;
 			lck_rw_lock_shared(in_ifaddr_rwlock);
-			TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link)
+			TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) {
+				IFA_LOCK_SPIN(&ia1->ia_ifa);
 				if (ia1->ia_ifp == ifp) {
 					ip->ip_src = IA_SIN(ia1)->sin_addr;
+					IFA_UNLOCK(&ia1->ia_ifa);
 					break;
 				}
+				IFA_UNLOCK(&ia1->ia_ifa);
+			}
 			lck_rw_done(in_ifaddr_rwlock);
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				error = ENETUNREACH;
@@ -694,11 +812,10 @@ loopit:
 			}
 		}
 
-		ifnet_lock_shared(ifp);
-		IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
-		ifnet_lock_done(ifp);
-		if (inm != NULL &&
-		   (imo == NULL || imo->imo_multicast_loop)) {
+		in_multihead_lock_shared();
+		IN_LOOKUP_MULTI(&pkt_dst, ifp, inm);
+		in_multihead_lock_done();
+		if (inm != NULL && (imo == NULL || loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
@@ -707,17 +824,16 @@ loopit:
 			if (!TAILQ_EMPTY(&ipv4_filters)) {
 				struct ipfilter	*filter;
 				int seen = (inject_filter_ref == 0);
-				struct ipf_pktopts *ippo = 0, ipf_pktopts;
 
-				if (imo) {						
-					ippo = &ipf_pktopts;
-					ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp;
-					ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl;
-					ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop;
+				if (imo != NULL) {
+					ipf_pktopts.ippo_flags |= IPPOF_MCAST_OPTS;
+					ipf_pktopts.ippo_mcast_ifnet = ifp;
+					ipf_pktopts.ippo_mcast_ttl = ttl;
+					ipf_pktopts.ippo_mcast_loop = loop;
 				}
-				
+
 				ipf_ref();
-				
+
 				/* 4135317 - always pass network byte order to filter */
 
 #if BYTE_ORDER != BIG_ENDIAN
@@ -734,15 +850,17 @@ loopit:
 						result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
 						if (result == EJUSTRETURN) {
 							ipf_unref();
+							INM_REMREF(inm);
 							goto done;
 						}
 						if (result != 0) {
 							ipf_unref();
+							INM_REMREF(inm);
 							goto bad;
 						}
 					}
 				}
-				
+
 				/* set back to host byte order */
 				ip = mtod(m, struct ip *);
 
@@ -778,15 +896,18 @@ loopit:
 				 * as prescribed by rsvpd.
 				 */
 				if (!rsvp_on)
-				  imo = NULL;
+					imo = NULL;
 				if (ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
+					if (inm != NULL)
+						INM_REMREF(inm);
 					goto done;
 				}
 			}
 		}
 #endif /* MROUTING */
-
+		if (inm != NULL)
+			INM_REMREF(inm);
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
@@ -808,7 +929,9 @@ loopit:
 	 * of outgoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		ip->ip_src = IA_SIN(ia)->sin_addr;
+		IFA_UNLOCK(&ia->ia_ifa);
 #if IPFIREWALL_FORWARD
 		/* Keep note that we did this - if the firewall changes
 		 * the next-hop, our interface may change, changing the
@@ -847,26 +970,30 @@ loopit:
 sendit:
 #if PF
 	/* Invoke outbound packet filter */
-	if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) {
-		if (packetlist == m0) {
-			packetlist = m;
-			mppn = NULL;
-		}
-		if (m != NULL) {
-			m0 = m;
-			/* Next packet in the chain */
-			goto loopit;
-		} else if (packetlist != NULL) {
-			/* No more packet; send down the chain */
-			goto sendchain;
+	if ( PF_IS_ENABLED) {
+		int rc;
+		rc = pf_af_hook(ifp, mppn, &m, AF_INET, FALSE);
+		if (rc != 0) {
+			if (packetlist == m0) {
+				packetlist = m;
+				mppn = NULL;
+			}
+			if (m != NULL) {
+				m0 = m;
+				/* Next packet in the chain */
+				goto loopit;
+			} else if (packetlist != NULL) {
+				/* No more packet; send down the chain */
+				goto sendchain;
+			}
+			/* Nothing left; we're done */
+			goto done;
 		}
-		/* Nothing left; we're done */
-		goto done;
+		m0 = m;
+		ip = mtod(m, struct ip *);
+		pkt_dst = ip->ip_dst;
+		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 	}
-	m0 = m;
-	ip = mtod(m, struct ip *);
-	pkt_dst = ip->ip_dst;
-	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #endif /* PF */
         /*
          * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt
@@ -882,7 +1009,8 @@ sendit:
 	if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) {
 		struct ipfilter	*filter;
 		int seen = (inject_filter_ref == 0);
-	
+		ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
+
 		/* Check that a TSO frame isn't passed to a filter.
 		 * This could happen if a filter is inserted while
 		 * TCP is sending the TSO packet.
@@ -907,7 +1035,7 @@ sendit:
 					seen = 1;
 			} else if (filter->ipf_filter.ipf_output) {
 				errno_t result;
-				result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
+				result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
 				if (result == EJUSTRETURN) {
 					ipf_unref();
 					goto done;
@@ -1011,6 +1139,10 @@ sendit:
 	HTONS(ip->ip_off);
 #endif
 
+	DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+		struct ip *, ip, struct ifnet *, ifp,
+		struct ip *, ip, struct ip6_hdr *, NULL);
+
 	error = ipsec4_output(&state, sp, flags);
     
 	m0 = m = state.m;
@@ -1071,7 +1203,7 @@ sendit:
 		rtfree(ro->ro_rt);
 		ro->ro_rt = NULL;
 		if (src_ia != NULL)
-			ifafree(&src_ia->ia_ifa);
+			IFA_REMREF(&src_ia->ia_ifa);
 	}
 
 	if (ro->ro_rt == NULL) {
@@ -1085,11 +1217,14 @@ sendit:
 		}
 	} else {
 		if (ia)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		RT_LOCK_SPIN(ro->ro_rt);
 		ia = ifatoia(ro->ro_rt->rt_ifa);
-		if (ia)
-			ifaref(&ia->ia_ifa);
+		if (ia) {
+			/* Become a regular mutex */
+			RT_CONVERT_LOCK(ro->ro_rt);
+			IFA_ADDREF(&ia->ia_ifa);
+		}
 		ifp = ro->ro_rt->rt_ifp;
 		RT_UNLOCK(ro->ro_rt);
 	}
@@ -1107,6 +1242,8 @@ sendit:
 	if (!TAILQ_EMPTY(&ipv4_filters)) {
 		struct ipfilter	*filter;
 		
+		ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS;
+
 		/* Check that a TSO frame isn't passed to a filter.
 		 * This could happen if a filter is inserted while
 		 * TCP is sending the TSO packet.
@@ -1128,7 +1265,7 @@ sendit:
 		TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) {
 			if (filter->ipf_filter.ipf_output) {
 				errno_t result;
-				result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0);
+				result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
 				if (result == EJUSTRETURN) {
 					ipf_unref();
 					goto done;
@@ -1310,31 +1447,35 @@ skip_ipsec:
 				 * of ours, we pretend to
 				 * be the destination for this packet.
 				 */
+				IFA_LOCK_SPIN(&ia_fw->ia_ifa);
 				if (IA_SIN(ia_fw)->sin_addr.s_addr ==
-						 dst->sin_addr.s_addr)
+				    dst->sin_addr.s_addr) {
+					IFA_UNLOCK(&ia_fw->ia_ifa);
 					break;
+				}
+				IFA_UNLOCK(&ia_fw->ia_ifa);
 			}
 			lck_rw_done(in_ifaddr_rwlock);
 			if (ia_fw) {
 				/* tell ip_input "dont filter" */
 				struct m_tag 		*fwd_tag;
 				struct ip_fwd_tag	*ipfwd_tag;
-				
-				fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID,
+
+				fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID,
 				    KERNEL_TAG_TYPE_IPFORWARD,
-				    sizeof (*ipfwd_tag), M_NOWAIT);
+				    sizeof (*ipfwd_tag), M_NOWAIT, m);
 				if (fwd_tag == NULL) {
 					error = ENOBUFS;
 					goto bad;
 				}
-				
+
 				ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1);
 				ipfwd_tag->next_hop = args.next_hop;
 
 				m_tag_prepend(m, fwd_tag);
 
 				if (m->m_pkthdr.rcvif == NULL)
-					m->m_pkthdr.rcvif = ifunit("lo0");
+					m->m_pkthdr.rcvif = lo_ifp;
 				if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & 
 						m->m_pkthdr.csum_flags) == 0) {
 					if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
@@ -1387,8 +1528,11 @@ skip_ipsec:
 
 			RT_LOCK_SPIN(ro_fwd->ro_rt);
 			ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa);
-			if (ia_fw != NULL)
-				ifaref(&ia_fw->ia_ifa);
+			if (ia_fw != NULL) {
+				/* Become a regular mutex */
+				RT_CONVERT_LOCK(ro_fwd->ro_rt);
+				IFA_ADDREF(&ia_fw->ia_ifa);
+			}
 			ifp = ro_fwd->ro_rt->rt_ifp;
 			ro_fwd->ro_rt->rt_use++;
 			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
@@ -1412,9 +1556,12 @@ skip_ipsec:
 			 * interface, do it again, from the new one.
 			 */
 			if (ia_fw != NULL) {
-				if (fwd_rewrite_src)
+				if (fwd_rewrite_src) {
+					IFA_LOCK_SPIN(&ia_fw->ia_ifa);
 					ip->ip_src = IA_SIN(ia_fw)->sin_addr;
-				ifafree(&ia_fw->ia_ifa);
+					IFA_UNLOCK(&ia_fw->ia_ifa);
+				}
+				IFA_REMREF(&ia_fw->ia_ifa);
 			}
 			goto pass ;
 		}
@@ -1427,9 +1574,9 @@ skip_ipsec:
 		error = EACCES; /* not sure this is the right error msg */
 		goto done;
 	}
-#endif /* IPFIREWALL */
 
 pass:
+#endif /* IPFIREWALL */
 #if __APPLE__
 	/* Do not allow loopback address to wind up on a wire */
 	if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
@@ -1526,11 +1673,14 @@ pass:
 			ipsec_delaux(m);
 #endif
 		if (packetchain == 0) {
+			if (ro->ro_rt && nstat_collect)
+				nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
 			error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
 			    (struct sockaddr *)dst);
 			goto done;
 		}
 		else { /* packet chaining allows us to reuse the route for all packets */
+			bytecnt += m->m_pkthdr.len;
 			mppn = &m->m_nextpkt;
 			m = m->m_nextpkt;
 			if (m == NULL) {
@@ -1539,10 +1689,13 @@ sendchain:
 #endif /* PF */
 				if (pktcnt > ip_maxchainsent)
 					ip_maxchainsent = pktcnt;
+				if (ro->ro_rt && nstat_collect)
+					nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0);
 				//send
 				error = ifnet_output(ifp, PF_INET, packetlist,
 				    ro->ro_rt, (struct sockaddr *)dst);
 				pktcnt = 0;
+				bytecnt = 0;
 				goto done;
 	
 			}
@@ -1556,23 +1709,28 @@ sendchain:
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 
-	if (ip->ip_off & IP_DF  || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) {
+	if (ip->ip_off & IP_DF  || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) ||
+		pktcnt > 0) {
 		error = EMSGSIZE;
 		/*
 		 * This case can happen if the user changed the MTU
-		 *   
 		 * of an interface after enabling IP on it.  Because
 		 * most netifs don't keep track of routes pointing to
 		 * them, there is no way for one to update all its
 		 * routes when the MTU is changed.
 		 */
-		RT_LOCK_SPIN(ro->ro_rt);
-		if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
-		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
-		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
-			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+		if (ro->ro_rt) {
+			RT_LOCK_SPIN(ro->ro_rt);
+			if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
+			    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
+			    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
+				ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+			}
+			RT_UNLOCK(ro->ro_rt);
+		}
+		if (pktcnt > 0) {
+			m0 = packetlist;
 		}
-		RT_UNLOCK(ro->ro_rt);
 		OSAddAtomic(1, &ipstat.ips_cantfrag);
 		goto bad;
 	}
@@ -1604,6 +1762,8 @@ sendchain:
 #endif
 			if ((packetchain != 0)  && (pktcnt > 0))
 				panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist);
+			if (ro->ro_rt && nstat_collect)
+				nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0);
 			error = ifnet_output(ifp, PF_INET, m, ro->ro_rt,
 			    (struct sockaddr *)dst);
 		} else
@@ -1615,7 +1775,7 @@ sendchain:
 
 done:
 	if (ia) {
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 		ia = NULL;
 	}
 #if IPSEC
@@ -1781,8 +1941,11 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
 	struct ip *ip;
 	unsigned char buf[sizeof(struct ip)];
 	u_short csum, offset, ip_len;
-	struct mbuf *m = m0;
-	
+
+        /* Save copy of first mbuf pointer and the ip_offset before modifying */
+        struct mbuf *m = m0;
+        int ip_offset_copy = ip_offset;
+
 	while (ip_offset >= m->m_len) {
 		ip_offset -= m->m_len;
 		m = m->m_next;
@@ -1823,12 +1986,12 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset)
 	 * is bogus and we give up.
 	 */
 	ip_len = ip->ip_len;
-	if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
+	if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
 		ip_len = SWAP16(ip_len);
-		if (ip_len != (m0->m_pkthdr.len - ip_offset)) {
+		if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) {
 			printf("in_delayed_cksum_offset: ip_len %d (%d) "
 			    "doesn't match actual length %d\n", ip->ip_len,
-			    ip_len, (m0->m_pkthdr.len - ip_offset));
+			    ip_len, (m0->m_pkthdr.len - ip_offset_copy));
 			return;
 		}
 	}
@@ -1880,6 +2043,10 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset)
 	int hlen = 0;
 	unsigned char buf[sizeof(struct ip)];
 	int swapped = 0;
+
+        /* Save copy of first mbuf pointer and the ip_offset before modifying */
+        struct mbuf* m0 = m;
+        size_t ip_offset_copy = ip_offset;
 	
 	while (ip_offset >= m->m_len) {
 		ip_offset -= m->m_len;
@@ -1927,15 +2094,15 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset)
 	 * the length and check again.  If it still fails, then the packet
 	 * is bogus and we give up.
 	 */
-	if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
+	if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
 		ip->ip_len = SWAP16(ip->ip_len);
 		swapped = 1;
-		if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) {
+		if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) {
 			ip->ip_len = SWAP16(ip->ip_len);
 			printf("in_cksum_offset: ip_len %d (%d) "
 			    "doesn't match actual length %lu\n",
 			    ip->ip_len, SWAP16(ip->ip_len),
-			    (m->m_pkthdr.len - ip_offset));
+			    (m0->m_pkthdr.len - ip_offset_copy));
 			return;
 		}
 	}
@@ -2120,6 +2287,7 @@ ip_ctloutput(so, sopt)
 #if defined(NFAITH) && NFAITH > 0
 		case IP_FAITH:
 #endif
+		case IP_RECVPKTINFO:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
@@ -2164,6 +2332,9 @@ ip_ctloutput(so, sopt)
 				OPTSET(INP_FAITH);
 				break;
 #endif
+			case IP_RECVPKTINFO:
+				OPTSET(INP_PKTINFO);
+				break;
 			}
 			break;
 #undef OPTSET
@@ -2200,14 +2371,14 @@ ip_ctloutput(so, sopt)
 					break;
 			}
 
-			if (sopt->sopt_valsize == 0 || ifname[0] == NULL) {
+			if (sopt->sopt_valsize == 0 || ifname[0] == '\0') {
 				/* Unbind this socket from any interface */
 				ifscope = IFSCOPE_NONE;
 			} else {
 				ifnet_t	ifp;
 
 				/* Verify name is NULL terminated */
-				if (ifname[sopt->sopt_valsize - 1] != NULL) {
+				if (ifname[sopt->sopt_valsize - 1] != '\0') {
 					error = EINVAL;
 					break;
 				}
@@ -2227,17 +2398,33 @@ ip_ctloutput(so, sopt)
 				 */
 				ifnet_release(ifp);
 			}
-			ip_bindif(inp, ifscope);
+			inp_bindif(inp, ifscope);
 		}
 		break;
 #endif
+		/*
+		 * Multicast socket options are processed by the in_mcast
+		 * module.
+		 */
 		case IP_MULTICAST_IF:
+		case IP_MULTICAST_IFINDEX:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
-			error = ip_setmoptions(sopt, &inp->inp_moptions);
+		case IP_ADD_SOURCE_MEMBERSHIP:
+		case IP_DROP_SOURCE_MEMBERSHIP:
+		case IP_BLOCK_SOURCE:
+		case IP_UNBLOCK_SOURCE:
+		case IP_MSFILTER:
+		case MCAST_JOIN_GROUP:
+		case MCAST_LEAVE_GROUP:
+		case MCAST_JOIN_SOURCE_GROUP:
+		case MCAST_LEAVE_SOURCE_GROUP:
+		case MCAST_BLOCK_SOURCE:
+		case MCAST_UNBLOCK_SOURCE:
+			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
@@ -2277,10 +2464,6 @@ ip_ctloutput(so, sopt)
 			struct mbuf *m;
 			int optname;
 
-                        if (sopt->sopt_valsize > MCLBYTES) {
-                                error = EMSGSIZE;
-                                break;
-                        }
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
@@ -2306,13 +2489,11 @@ ip_ctloutput(so, sopt)
 				break;
 
 			if (background) {
-				socket_set_traffic_mgt_flags(so,
-				    TRAFFIC_MGT_SO_BACKGROUND |
-				    TRAFFIC_MGT_SO_BG_REGULATE);
+				socket_set_traffic_mgt_flags_locked(so,
+				    TRAFFIC_MGT_SO_BACKGROUND);
 			} else {
-				socket_clear_traffic_mgt_flags(so,
-				    TRAFFIC_MGT_SO_BACKGROUND |
-				    TRAFFIC_MGT_SO_BG_REGULATE);
+				socket_clear_traffic_mgt_flags_locked(so,
+				    TRAFFIC_MGT_SO_BACKGROUND);
 			}
 
 			break;
@@ -2331,11 +2512,11 @@ ip_ctloutput(so, sopt)
 		 * on the destination address type (e.g.  unicast, multicast,
 		 * or broadcast if applicable) or whether or not the host is
 		 * directly reachable.  Note that in the multicast transmit
-		 * case, IP_MULTICAST_IF takes precedence over IP_BOUND_IF,
-		 * since the former practically bypasses the routing table;
-		 * in this case, IP_BOUND_IF sets the default interface used
-		 * for sending multicast packets in the absence of an explicit
-		 * transmit interface set via IP_MULTICAST_IF.
+		 * case, IP_MULTICAST_{IF,IFINDEX} takes precedence over
+		 * IP_BOUND_IF, since the former practically bypasses the
+		 * routing table; in this case, IP_BOUND_IF sets the default
+		 * interface used for sending multicast packets in the absence
+		 * of an explicit multicast transmit interface.
 		 */
 		case IP_BOUND_IF:
 			/* This option is settable only for IPv4 */
@@ -2350,7 +2531,28 @@ ip_ctloutput(so, sopt)
 			if (error)
 				break;
 
-			ip_bindif(inp, optval);
+			inp_bindif(inp, optval);
+			break;
+
+		case IP_NO_IFT_CELLULAR:
+			/* This option is settable only for IPv4 */
+			if (!(inp->inp_vflag & INP_IPV4)) {
+				error = EINVAL;
+				break;
+			}
+
+			error = sooptcopyin(sopt, &optval, sizeof (optval),
+			    sizeof (optval));
+
+			if (error)
+				break;
+
+			error = inp_nocellular(inp, optval);
+			break;
+
+		case IP_OUT_IF:
+			/* This option is not settable */
+			error = EINVAL;
 			break;
 
 		default:
@@ -2383,6 +2585,7 @@ ip_ctloutput(so, sopt)
 #if defined(NFAITH) && NFAITH > 0
 		case IP_FAITH:
 #endif
+		case IP_RECVPKTINFO:
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
@@ -2429,17 +2632,20 @@ ip_ctloutput(so, sopt)
 				optval = OPTBIT(INP_FAITH);
 				break;
 #endif
+			case IP_RECVPKTINFO:
+				optval = OPTBIT(INP_PKTINFO);
+				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_MULTICAST_IF:
+		case IP_MULTICAST_IFINDEX:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
-		case IP_ADD_MEMBERSHIP:
-		case IP_DROP_MEMBERSHIP:
-			error = ip_getmoptions(sopt, inp->inp_moptions);
+		case IP_MSFILTER:
+			error = inp_getmoptions(inp, sopt);
 			break;
 
 #if IPSEC
@@ -2465,7 +2671,7 @@ ip_ctloutput(so, sopt)
 #if TRAFFIC_MGT
 		case IP_TRAFFIC_MGT_BACKGROUND:
 		{
-			unsigned	background = so->so_traffic_mgt_flags;
+			unsigned	background = (so->so_traffic_mgt_flags &  TRAFFIC_MGT_SO_BACKGROUND);
 			return (sooptcopyout(sopt, &background, sizeof(background)));
 			break;
 		}
@@ -2477,6 +2683,16 @@ ip_ctloutput(so, sopt)
 			error = sooptcopyout(sopt, &optval, sizeof (optval));
 			break;
 
+		case IP_NO_IFT_CELLULAR:
+			optval = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+			error = sooptcopyout(sopt, &optval, sizeof (optval));
+			break;
+
+		case IP_OUT_IF:
+			optval = inp->inp_last_outif;
+			error = sooptcopyout(sopt, &optval, sizeof (optval));
+			break;
+
 		default:
 			error = ENOPROTOOPT;
 			break;
@@ -2591,471 +2807,138 @@ bad:
 	return (EINVAL);
 }
 
-/*
- * XXX
- * The whole multicast option thing needs to be re-thought.
- * Several of these options are equally applicable to non-multicast
- * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
- * standard option (IP_TTL).
- */
-
-/*
- * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
- */
-static struct ifnet *
-ip_multicast_if(a, ifindexp)
-	struct in_addr *a;
-	int *ifindexp;
+void
+ip_moptions_init(void)
 {
-	int ifindex;
-	struct ifnet *ifp;
+	PE_parse_boot_argn("ifa_debug", &imo_debug, sizeof (imo_debug));
 
-	if (ifindexp)
-		*ifindexp = 0;
-	if (ntohl(a->s_addr) >> 24 == 0) {
-		ifindex = ntohl(a->s_addr) & 0xffffff;
-		ifnet_head_lock_shared();
-		if (ifindex < 0 || if_index < ifindex) {
-			ifnet_head_done();
-			return NULL;
-		}
-		ifp = ifindex2ifnet[ifindex];
-		ifnet_head_done();
-		if (ifindexp)
-			*ifindexp = ifindex;
-	} else {
-		INADDR_TO_IFP(*a, ifp);
+	imo_size = (imo_debug == 0) ? sizeof (struct ip_moptions) :
+	    sizeof (struct ip_moptions_dbg);
+
+	imo_zone = zinit(imo_size, IMO_ZONE_MAX * imo_size, 0,
+	    IMO_ZONE_NAME);
+	if (imo_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IMO_ZONE_NAME);
+		/* NOTREACHED */
 	}
-	return ifp;
+	zone_change(imo_zone, Z_EXPAND, TRUE);
 }
 
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-static int
-ip_setmoptions(sopt, imop)
-	struct sockopt *sopt;
-	struct ip_moptions **imop;
+void
+imo_addref(struct ip_moptions *imo, int locked)
 {
-	int error = 0;
-	struct in_addr addr;
-	struct ip_mreq mreq;
-	struct ifnet *ifp = NULL;
-	struct ip_moptions *imo = *imop;
-	int ifindex;
-
-	if (imo == NULL) {
-		/*
-		 * No multicast option buffer attached to the pcb;
-		 * allocate one and initialize to default values.
-		 */
-		error = ip_createmoptions(imop);
-		if (error != 0)
-			return error;
-		imo = *imop;
-	}
-
-	switch (sopt->sopt_name) {
-	/* store an index number for the vif you wanna use in the send */
-#if MROUTING
-	case IP_MULTICAST_VIF: 
-		{
-			int i;
-			if (legal_vif_num == 0) {
-				error = EOPNOTSUPP;
-				break;
-			}
-			error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
-			if (error)
-				break;
-			if (!legal_vif_num(i) && (i != -1)) {
-				error = EINVAL;
-				break;
-			}
-			imo->imo_multicast_vif = i;
-			break;
-		}
-#endif /* MROUTING */
-
-	case IP_MULTICAST_IF:
-		/*
-		 * Select the interface for outgoing multicast packets.
-		 */
-		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
-		if (error)
-			break;
-		/*
-		 * INADDR_ANY is used to remove a previous selection.
-		 * When no interface is selected, a default one is
-		 * chosen every time a multicast packet is sent.
-		 */
-		if (addr.s_addr == INADDR_ANY) {
-			imo->imo_multicast_ifp = NULL;
-			break;
-		}
-		/*
-		 * The selected interface is identified by its local
-		 * IP address.  Find the interface and confirm that
-		 * it supports multicasting.
-		 */
-		ifp = ip_multicast_if(&addr, &ifindex);
-		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		imo->imo_multicast_ifp = ifp;
-		if (ifindex)
-			imo->imo_multicast_addr = addr;
-		else
-			imo->imo_multicast_addr.s_addr = INADDR_ANY;
-		break;
-
-	case IP_MULTICAST_TTL:
-		/*
-		 * Set the IP time-to-live for outgoing multicast packets.
-		 * The original multicast API required a char argument,
-		 * which is inconsistent with the rest of the socket API.
-		 * We allow either a char or an int.
-		 */
-		if (sopt->sopt_valsize == 1) {
-			u_char ttl;
-			error = sooptcopyin(sopt, &ttl, 1, 1);
-			if (error)
-				break;
-			imo->imo_multicast_ttl = ttl;
-		} else {
-			u_int ttl;
-			error = sooptcopyin(sopt, &ttl, sizeof ttl, 
-					    sizeof ttl);
-			if (error)
-				break;
-			if (ttl > 255)
-				error = EINVAL;
-			else
-				imo->imo_multicast_ttl = ttl;
-		}
-		break;
-
-	case IP_MULTICAST_LOOP:
-		/*
-		 * Set the loopback flag for outgoing multicast packets.
-		 * Must be zero or one.  The original multicast API required a
-		 * char argument, which is inconsistent with the rest
-		 * of the socket API.  We allow either a char or an int.
-		 */
-		if (sopt->sopt_valsize == 1) {
-			u_char loop;
-			error = sooptcopyin(sopt, &loop, 1, 1);
-			if (error)
-				break;
-			imo->imo_multicast_loop = !!loop;
-		} else {
-			u_int loop;
-			error = sooptcopyin(sopt, &loop, sizeof loop,
-					    sizeof loop);
-			if (error)
-				break;
-			imo->imo_multicast_loop = !!loop;
-		}
-		break;
-
-	case IP_ADD_MEMBERSHIP:
-		/*
-		 * Add a multicast group membership.
-		 * Group must be a valid IP multicast address.
-		 */
-		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
-		if (error)
-			break;
-		
-		error = ip_addmembership(imo, &mreq);
-		break;
-
-	case IP_DROP_MEMBERSHIP:
-		/*
-		 * Drop a multicast group membership.
-		 * Group must be a valid IP multicast address.
-		 */
-		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
-		if (error)
-			break;
-		
-		error = ip_dropmembership(imo, &mreq);
-		break;
-
-	default:
-		error = EOPNOTSUPP;
-		break;
-	}
+	if (!locked)
+		IMO_LOCK(imo);
+	else
+		IMO_LOCK_ASSERT_HELD(imo);
 
-	/*
-	 * If all options have default values, no need to keep the mbuf.
-	 */
-	if (imo->imo_multicast_ifp == NULL &&
-	    imo->imo_multicast_vif == (u_int32_t)-1 &&
-	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
-	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
-	    imo->imo_num_memberships == 0) {
-		FREE(*imop, M_IPMOPTS);
-		*imop = NULL;
+	if (++imo->imo_refcnt == 0) {
+		panic("%s: imo %p wraparound refcnt\n", __func__, imo);
+		/* NOTREACHED */
+	} else if (imo->imo_trace != NULL) {
+		(*imo->imo_trace)(imo, TRUE);
 	}
 
-	return (error);
+	if (!locked)
+		IMO_UNLOCK(imo);
 }
 
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-__private_extern__ int
-ip_createmoptions(
-	struct ip_moptions **imop)
-{
-	struct ip_moptions *imo;
-	imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS,
-		M_WAITOK);
-
-	if (imo == NULL)
-		return (ENOBUFS);
-	*imop = imo;
-	imo->imo_multicast_ifp = NULL;
-	imo->imo_multicast_addr.s_addr = INADDR_ANY;
-	imo->imo_multicast_vif = -1;
-	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
-	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
-	imo->imo_num_memberships = 0;
-	
-	return 0;
-}
-
-/*
- * Add membership to an IPv4 multicast.
- */
-__private_extern__ int
-ip_addmembership(
-	struct ip_moptions *imo,
-	struct ip_mreq *mreq)
+void
+imo_remref(struct ip_moptions *imo)
 {
-	struct route ro;
-	struct sockaddr_in *dst;
-	struct ifnet *ifp = NULL;
-	int error = 0;
 	int i;
 
-	bzero((caddr_t)&ro, sizeof(ro));
-
-	if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
-		error = EINVAL;
-		goto done;
-	}
-	/*
-	 * If no interface address was provided, use the interface of
-	 * the route to the given multicast address.
-	 */
-	if (mreq->imr_interface.s_addr == INADDR_ANY) {
-		dst = (struct sockaddr_in *)&ro.ro_dst;
-		dst->sin_len = sizeof(*dst);
-		dst->sin_family = AF_INET;
-		dst->sin_addr = mreq->imr_multiaddr;
-		rtalloc_ign(&ro, 0);
-		if (ro.ro_rt != NULL) {
-			ifp = ro.ro_rt->rt_ifp;
-		} else {
-			/* If there's no default route, try using loopback */
-			mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK);
-		}
+	IMO_LOCK(imo);
+	if (imo->imo_refcnt == 0) {
+		panic("%s: imo %p negative refcnt", __func__, imo);
+		/* NOTREACHED */
+	} else if (imo->imo_trace != NULL) {
+		(*imo->imo_trace)(imo, FALSE);
 	}
 
-	if (ifp == NULL) {
-		ifp = ip_multicast_if(&mreq->imr_interface, NULL);
+	--imo->imo_refcnt;
+	if (imo->imo_refcnt > 0) {
+		IMO_UNLOCK(imo);
+		return;
 	}
 
-	/*
-	 * See if we found an interface, and confirm that it
-	 * supports multicast.
-	 */
-	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-		error = EADDRNOTAVAIL;
-		goto done;
-	}
-	/*
-	 * See if the membership already exists or if all the
-	 * membership slots are full.
-	 */
 	for (i = 0; i < imo->imo_num_memberships; ++i) {
-		if (imo->imo_membership[i]->inm_ifp == ifp &&
-			imo->imo_membership[i]->inm_addr.s_addr
-					== mreq->imr_multiaddr.s_addr)
-			break;
-	}
-	if (i < imo->imo_num_memberships) {
-		error = EADDRINUSE;
-		goto done;
-	}
-	if (i == IP_MAX_MEMBERSHIPS) {
-		error = ETOOMANYREFS;
-		goto done;
-	}
-	/*
-	 * Everything looks good; add a new record to the multicast
-	 * address list for the given interface.
-	 */
-	if ((imo->imo_membership[i] =
-		in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
-		error = ENOBUFS;
-		goto done;
-	}
-	++imo->imo_num_memberships;
+		struct in_mfilter *imf;
 
-done:
-	if (ro.ro_rt != NULL)
-		rtfree(ro.ro_rt);
+		imf = imo->imo_mfilters ? &imo->imo_mfilters[i] : NULL;
+		if (imf != NULL)
+			imf_leave(imf);
 
-	return error;
-}
+		(void) in_leavegroup(imo->imo_membership[i], imf);
 
-/*
- * Drop membership of an IPv4 multicast.
- */
-__private_extern__ int
-ip_dropmembership(
-	struct ip_moptions *imo,
-	struct ip_mreq *mreq)
-{
-	int error = 0;
-	struct ifnet* ifp = NULL;
-	int i;
-	
-	if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
-		error = EINVAL;
-		return error;
-	}
+		if (imf != NULL)
+			imf_purge(imf);
 
-	/*
-	 * If an interface address was specified, get a pointer
-	 * to its ifnet structure.
-	 */
-	if (mreq->imr_interface.s_addr == INADDR_ANY)
-		ifp = NULL;
-	else {
-		ifp = ip_multicast_if(&mreq->imr_interface, NULL);
-		if (ifp == NULL) {
-			error = EADDRNOTAVAIL;
-			return error;
-		}
+		INM_REMREF(imo->imo_membership[i]);
+		imo->imo_membership[i] = NULL;
 	}
-	/*
-	 * Find the membership in the membership array.
-	 */
-	for (i = 0; i < imo->imo_num_memberships; ++i) {
-		if ((ifp == NULL ||
-			 imo->imo_membership[i]->inm_ifp == ifp) &&
-			 imo->imo_membership[i]->inm_addr.s_addr ==
-			 mreq->imr_multiaddr.s_addr)
-			break;
+	imo->imo_num_memberships = 0;
+	if (imo->imo_mfilters != NULL) {
+		FREE(imo->imo_mfilters, M_INMFILTER);
+		imo->imo_mfilters = NULL;
 	}
-	if (i == imo->imo_num_memberships) {
-		error = EADDRNOTAVAIL;
-		return error;
+	if (imo->imo_membership != NULL) {
+		FREE(imo->imo_membership, M_IPMOPTS);
+		imo->imo_membership = NULL;
 	}
-	/*
-	 * Give up the multicast address record to which the
-	 * membership points.
-	 */
-	in_delmulti(&imo->imo_membership[i]);
-	/*
-	 * Remove the gap in the membership array.
-	 */
-	for (++i; i < imo->imo_num_memberships; ++i)
-		imo->imo_membership[i-1] = imo->imo_membership[i];
-	--imo->imo_num_memberships;
-	
-	return error;
-}
-
-/*
- * Return the IP multicast options in response to user getsockopt().
- */
-static int
-ip_getmoptions(sopt, imo)
-	struct sockopt *sopt;
-	register struct ip_moptions *imo;
-{
-	struct in_addr addr;
-	struct in_ifaddr *ia;
-	int error, optval;
-	u_char coptval;
-
-	error = 0;
-	switch (sopt->sopt_name) {
-#if MROUTING
-	case IP_MULTICAST_VIF: 
-		if (imo != NULL)
-			optval = imo->imo_multicast_vif;
-		else
-			optval = -1;
-		error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
-#endif /* MROUTING */
+	IMO_UNLOCK(imo);
 
-	case IP_MULTICAST_IF:
-		if (imo == NULL || imo->imo_multicast_ifp == NULL)
-			addr.s_addr = INADDR_ANY;
-		else if (imo->imo_multicast_addr.s_addr) {
-			/* return the value user has set */
-			addr = imo->imo_multicast_addr;
-		} else {
-			IFP_TO_IA(imo->imo_multicast_ifp, ia);
-			addr.s_addr = (ia == NULL) ? INADDR_ANY
-				: IA_SIN(ia)->sin_addr.s_addr;
-			if (ia != NULL)
-				ifafree(&ia->ia_ifa);
-		}
-		error = sooptcopyout(sopt, &addr, sizeof addr);
-		break;
-
-	case IP_MULTICAST_TTL:
-		if (imo == 0)
-			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
-		else
-			optval = coptval = imo->imo_multicast_ttl;
-		if (sopt->sopt_valsize == 1)
-			error = sooptcopyout(sopt, &coptval, 1);
-		else
-			error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
+	lck_mtx_destroy(&imo->imo_lock, ifa_mtx_grp);
 
-	case IP_MULTICAST_LOOP:
-		if (imo == 0)
-			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
-		else
-			optval = coptval = imo->imo_multicast_loop;
-		if (sopt->sopt_valsize == 1)
-			error = sooptcopyout(sopt, &coptval, 1);
-		else
-			error = sooptcopyout(sopt, &optval, sizeof optval);
-		break;
+	if (!(imo->imo_debug & IFD_ALLOC)) {
+		panic("%s: imo %p cannot be freed", __func__, imo);
+		/* NOTREACHED */
+	}
+	zfree(imo_zone, imo);
+}
 
-	default:
-		error = ENOPROTOOPT;
-		break;
+static void
+imo_trace(struct ip_moptions *imo, int refhold)
+{
+	struct ip_moptions_dbg *imo_dbg = (struct ip_moptions_dbg *)imo;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
+
+	if (!(imo->imo_debug & IFD_DEBUG)) {
+		panic("%s: imo %p has no debug structure", __func__, imo);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &imo_dbg->imo_refhold_cnt;
+		tr = imo_dbg->imo_refhold;
+	} else {
+		cnt = &imo_dbg->imo_refrele_cnt;
+		tr = imo_dbg->imo_refrele;
 	}
-	return (error);
+
+	idx = atomic_add_16_ov(cnt, 1) % IMO_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
 }
 
-/*
- * Discard the IP multicast options.
- */
-void
-ip_freemoptions(imo)
-	register struct ip_moptions *imo;
+struct ip_moptions *
+ip_allocmoptions(int how)
 {
-	register int i;
+	struct ip_moptions *imo;
 
+	imo = (how == M_WAITOK) ? zalloc(imo_zone) : zalloc_noblock(imo_zone);
 	if (imo != NULL) {
-		for (i = 0; i < imo->imo_num_memberships; ++i)
-			in_delmulti(&imo->imo_membership[i]);
-		FREE(imo, M_IPMOPTS);
+		bzero(imo, imo_size);
+		lck_mtx_init(&imo->imo_lock, ifa_mtx_grp, ifa_mtx_attr);
+		imo->imo_debug |= IFD_ALLOC;
+		if (imo_debug != 0) {
+			imo->imo_debug |= IFD_DEBUG;
+			imo->imo_trace = imo_trace;
+		}
+		IMO_ADDREF(imo);
 	}
+
+	return (imo);
 }
 
 /*
@@ -3174,6 +3057,8 @@ ip_mloopback(ifp, m, dst, hlen)
  * without any locks based on the assumption that ip_output() is single-
  * threaded per-pcb, i.e. for any given pcb there can only be one thread
  * performing output at the IP layer.
+ *
+ * This routine is analogous to in6_selectroute() for IPv6.
  */
 static struct ifaddr *
 in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
@@ -3215,9 +3100,9 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 		 */
 		if (scope == IFSCOPE_NONE) {
 			scope = rt_ifp->if_index;
-			if (scope != get_primary_ifscope() &&
+			if (scope != get_primary_ifscope(AF_INET) &&
 			    ro->ro_rt->generation_id != route_generation)
-				scope = get_primary_ifscope();
+				scope = get_primary_ifscope(AF_INET);
 		}
 
 		ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope);
@@ -3232,7 +3117,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 			 */
 			ifa = (struct ifaddr *)ifa_foraddr(src.s_addr);
 			if (ifa != NULL) {
-				ifafree(ifa);
+				IFA_REMREF(ifa);
 				ifa = NULL;
 				ifscope = IFSCOPE_NONE;
 			}
@@ -3240,16 +3125,14 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 
 		if (ip_select_srcif_debug && ifa != NULL) {
 			if (ro->ro_rt != NULL) {
-				printf("%s->%s ifscope %d->%d ifa_if %s%d "
-				    "ro_if %s%d\n", s_src, s_dst, ifscope,
-				    scope, ifa->ifa_ifp->if_name,
-				    ifa->ifa_ifp->if_unit, rt_ifp->if_name,
-				    rt_ifp->if_unit);
+				printf("%s->%s ifscope %d->%d ifa_if %s "
+				    "ro_if %s\n", s_src, s_dst, ifscope,
+				    scope, if_name(ifa->ifa_ifp),
+				    if_name(rt_ifp));
 			} else {
-				printf("%s->%s ifscope %d->%d ifa_if %s%d\n",
+				printf("%s->%s ifscope %d->%d ifa_if %s\n",
 				    s_src, s_dst, ifscope, scope,
-				    ifa->ifa_ifp->if_name,
-				    ifa->ifa_ifp->if_unit);
+				    if_name(ifa->ifa_ifp));
 			}
 		}
 	}
@@ -3296,7 +3179,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 				if (ifa->ifa_ifp != rt->rt_ifp) {
 					oifa = ifa;
 					ifa = rt->rt_ifa;
-					ifaref(ifa);
+					IFA_ADDREF(ifa);
 					RT_UNLOCK(rt);
 				} else {
 					RT_UNLOCK(rt);
@@ -3322,8 +3205,8 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 					 * as well as the route interface
 					 * address, and use this instead.
 					 */
-					ifafree(oifa);
-					ifafree(ifa);
+					IFA_REMREF(oifa);
+					IFA_REMREF(ifa);
 					ifa = iifa;
 				} else if (!ipforwarding ||
 				    (rt->rt_flags & RTF_GATEWAY)) {
@@ -3334,7 +3217,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 					 * original one, and let the caller
 					 * do a scoped route lookup.
 					 */
-					ifafree(ifa);
+					IFA_REMREF(ifa);
 					ifa = oifa;
 				} else {
 					/*
@@ -3347,7 +3230,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 					 * the original one and use the route
 					 * interface address instead.
 					 */
-					ifafree(oifa);
+					IFA_REMREF(oifa);
 				}
 			}
 		} else if (ifa != NULL && ro->ro_rt != NULL &&
@@ -3359,15 +3242,14 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 			 * as the interface used by the known route; drop the
 			 * original one and use the route interface address.
 			 */
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = ro->ro_rt->rt_ifa;
-			ifaref(ifa);
+			IFA_ADDREF(ifa);
 		}
 
 		if (ip_select_srcif_debug && ifa != NULL) {
-			printf("%s->%s ifscope %d ifa_if %s%d\n",
-			    s_src, s_dst, ifscope, ifa->ifa_ifp->if_name,
-			    ifa->ifa_ifp->if_unit);
+			printf("%s->%s ifscope %d ifa_if %s\n",
+			    s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
 		}
 	}
 
@@ -3384,16 +3266,14 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 	    !(ro->ro_rt->rt_flags & RTF_UP))) {
 		if (ip_select_srcif_debug) {
 			if (ifa != NULL) {
-				printf("%s->%s ifscope %d ro_if %s%d != "
-				    "ifa_if %s%d (cached route cleared)\n",
-				    s_src, s_dst, ifscope, rt_ifp->if_name,
-				    rt_ifp->if_unit, ifa->ifa_ifp->if_name,
-				    ifa->ifa_ifp->if_unit);
+				printf("%s->%s ifscope %d ro_if %s != "
+				    "ifa_if %s (cached route cleared)\n",
+				    s_src, s_dst, ifscope, if_name(rt_ifp),
+				    if_name(ifa->ifa_ifp));
 			} else {
-				printf("%s->%s ifscope %d ro_if %s%d "
+				printf("%s->%s ifscope %d ro_if %s "
 				    "(no ifa_if found)\n",
-				    s_src, s_dst, ifscope, rt_ifp->if_name,
-				    rt_ifp->if_unit);
+				    s_src, s_dst, ifscope, if_name(rt_ifp));
 			}
 		}
 
@@ -3414,7 +3294,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 		 */
 		if (IN_LINKLOCAL(ntohl(dst.s_addr)) &&
 		    !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) {
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = NULL;
 		}
 	}
@@ -3444,31 +3324,3 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope)
 
 	return (ifa);
 }
-
-/*
- * Handler for setting IP_FORCE_OUT_IFP or IP_BOUND_IF socket option.
- */
-static void
-ip_bindif(struct inpcb *inp, unsigned int ifscope)
-{
-	/*
-	 * A zero interface scope value indicates an "unbind".
-	 * Otherwise, take in whatever value the app desires;
-	 * the app may already know the scope (or force itself
-	 * to such a scope) ahead of time before the interface
-	 * gets attached.  It doesn't matter either way; any
-	 * route lookup from this point on will require an
-	 * exact match for the embedded interface scope.
-	 */
-	inp->inp_boundif = ifscope;
-	if (inp->inp_boundif == IFSCOPE_NONE)
-		inp->inp_flags &= ~INP_BOUND_IF;
-	else
-		inp->inp_flags |= INP_BOUND_IF;
-
-	/* Blow away any cached route in the PCB */
-	if (inp->inp_route.ro_rt != NULL) {
-		rtfree(inp->inp_route.ro_rt);
-		inp->inp_route.ro_rt = NULL;
-	}
-}
diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h
index 9d4044d4c..971a88126 100644
--- a/bsd/netinet/ip_var.h
+++ b/bsd/netinet/ip_var.h
@@ -121,7 +121,7 @@ struct ipq {
  */
 #endif /* KERNEL_PRIVATE */
 #define MAX_IPOPTLEN	40
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 
 struct ipoption {
 	struct	in_addr ipopt_dst;	/* first-hop dst if source routed */
@@ -133,21 +133,57 @@ struct ipoption {
  * passed to ip_output when IP multicast options are in use.
  */
 struct ip_moptions {
+	decl_lck_mtx_data(, imo_lock);
+	uint32_t imo_refcnt;		/* ref count */
+	uint32_t imo_debug;		/* see ifa_debug flags */
 	struct	ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
 	u_char	imo_multicast_ttl;	/* TTL for outgoing multicasts */
 	u_char	imo_multicast_loop;	/* 1 => hear sends if a member */
 	u_short	imo_num_memberships;	/* no. memberships this socket */
-	struct	in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
-	u_int32_t	imo_multicast_vif;	/* vif num outgoing multicasts */
+	u_short	imo_max_memberships;	/* max memberships this socket */
+	struct	in_multi **imo_membership;	/* group memberships */
+	struct	in_mfilter *imo_mfilters;	/* source filters */
+	u_int32_t imo_multicast_vif;	/* vif num outgoing multicasts */
 	struct	in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
+	void (*imo_trace)		/* callback fn for tracing refs */
+	    (struct ip_moptions *, int);
 };
 
+#define	IMO_LOCK_ASSERT_HELD(_imo)					\
+	lck_mtx_assert(&(_imo)->imo_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IMO_LOCK_ASSERT_NOTHELD(_imo)					\
+	lck_mtx_assert(&(_imo)->imo_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IMO_LOCK(_imo)							\
+	lck_mtx_lock(&(_imo)->imo_lock)
+
+#define	IMO_LOCK_SPIN(_imo)						\
+	lck_mtx_lock_spin(&(_imo)->imo_lock)
+
+#define	IMO_CONVERT_LOCK(_imo) do {					\
+	IMO_LOCK_ASSERT_HELD(_imo);					\
+	lck_mtx_convert_spin(&(_imo)->imo_lock);			\
+} while (0)
+
+#define	IMO_UNLOCK(_imo)						\
+	lck_mtx_unlock(&(_imo)->imo_lock)
+
+#define	IMO_ADDREF(_imo)						\
+	imo_addref(_imo, 0)
+
+#define	IMO_ADDREF_LOCKED(_imo)						\
+	imo_addref(_imo, 1)
+
+#define	IMO_REMREF(_imo)						\
+	imo_remref(_imo)
+
 /* mbuf tag for ip_forwarding info */
 struct ip_fwd_tag {
 	struct sockaddr_in *next_hop;	/* next_hop */
 };
 
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct	ipstat {
 	u_int32_t	ips_total;		/* total packets received */
@@ -179,6 +215,9 @@ struct	ipstat {
 	u_int32_t	ips_notmember;		/* multicasts for unregistered grps */
 	u_int32_t	ips_nogif;		/* no match gif found */
 	u_int32_t	ips_badaddr;		/* invalid address on header */
+#ifdef PRIVATE
+	u_int32_t	ips_pktdropcntrl;		/* pkt dropped, no mbufs for control data */
+#endif /* PRIVATE */
 };
 
 struct ip_linklocal_stat {
@@ -206,7 +245,8 @@ struct sockopt;
  * Extra information passed to ip_output when IP_OUTARGS is set.
  */
 struct ip_out_args {
-	unsigned int	ipoa_ifscope;	/* interface scope */
+	unsigned int	ipoa_boundif;	/* bound outgoing interface */
+	unsigned int	ipoa_nocell;	/* don't use IFT_CELLULAR */
 };
 
 extern struct	ipstat	ipstat;
@@ -224,9 +264,15 @@ extern int rsvp_on;
 extern struct	pr_usrreqs rip_usrreqs;
 extern int	ip_doscopedroute;
 
+extern void ip_moptions_init(void);
+extern struct ip_moptions *ip_allocmoptions(int);
+extern int inp_getmoptions(struct inpcb *, struct sockopt *);
+extern int inp_setmoptions(struct inpcb *, struct sockopt *);
+extern void imo_addref(struct ip_moptions *, int);
+extern void imo_remref(struct ip_moptions *);
+
 int	 ip_ctloutput(struct socket *, struct sockopt *sopt);
 void	 ip_drain(void);
-void	 ip_freemoptions(struct ip_moptions *);
 void	 ip_init(void) __attribute__((section("__TEXT, initcode")));
 extern int	 (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 			  struct ip_moptions *);
@@ -235,7 +281,7 @@ extern int ip_output(struct mbuf *, struct mbuf *, struct route *, int,
 extern int ip_output_list(struct mbuf *, int, struct mbuf *, struct route *,
     int, struct ip_moptions *, struct ip_out_args *);
 struct in_ifaddr *ip_rtaddr(struct in_addr);
-void	 ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
+int	 ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
 		struct mbuf *);
 void	 ip_slowtimo(void);
 struct mbuf *
diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c
index 6aea8ccf2..b03f56cd1 100644
--- a/bsd/netinet/kpi_ipfilter.c
+++ b/bsd/netinet/kpi_ipfilter.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -235,7 +235,7 @@ ipf_inject_input(
 	}
 	
 	if (filter_ref == 0 && m->m_pkthdr.rcvif == 0) {
-		m->m_pkthdr.rcvif = ifunit("lo0");
+		m->m_pkthdr.rcvif = lo_ifp;
 		m->m_pkthdr.csum_data = 0;
 		m->m_pkthdr.csum_flags = 0;
 		if (vers == 4) {
@@ -245,8 +245,8 @@ ipf_inject_input(
 		}
 	}
 	if (filter_ref != 0) {
-		mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT,
-					 	   sizeof (ipfilter_t), M_NOWAIT);
+		mtag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT,
+					 	   sizeof (ipfilter_t), M_NOWAIT, m);
 		if (mtag == NULL) {
 			error = ENOMEM;
 			goto done;
@@ -262,58 +262,54 @@ done:
 }
 
 static errno_t
-ipf_injectv4_out(
-	mbuf_t data,
-	ipfilter_t filter_ref,
-	ipf_pktopts_t options)
+ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
 {
 	struct route ro;
-	struct sockaddr_in	*sin = (struct sockaddr_in*)&ro.ro_dst;
 	struct ip	*ip;
 	struct mbuf	*m = (struct mbuf*)data;
 	errno_t error = 0;
-	struct m_tag *mtag = 0;
-	struct ip_moptions *imo = 0, ip_moptions;
-	
+	struct m_tag *mtag = NULL;
+	struct ip_moptions *imo = NULL;
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+
 	/* Make the IP header contiguous in the mbuf */
-	if ((size_t)m->m_len < sizeof(struct ip)) {
-		m = m_pullup(m, sizeof(struct ip));
-		if (m == NULL) return ENOMEM;
+	if ((size_t)m->m_len < sizeof (struct ip)) {
+		m = m_pullup(m, sizeof (struct ip));
+		if (m == NULL)
+			return (ENOMEM);
 	}
-	ip = (struct ip*)m_mtod(m);
-	
+	ip = (struct ip *)m_mtod(m);
+
 	if (filter_ref != 0) {
-		mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT,
-					 	   sizeof (ipfilter_t), M_NOWAIT);
+		mtag = m_tag_create(KERNEL_MODULE_TAG_ID,
+		    KERNEL_TAG_TYPE_IPFILT, sizeof (ipfilter_t), M_NOWAIT, m);
 		if (mtag == NULL) {
 			m_freem(m);
-			return ENOMEM;
+			return (ENOMEM);
 		}
-		*(ipfilter_t*)(mtag+1) = filter_ref;
+		*(ipfilter_t *)(mtag + 1) = filter_ref;
 		m_tag_prepend(m, mtag);
 	}
-	
-	if (options && (options->ippo_flags & IPPOF_MCAST_OPTS)) {
-		imo = &ip_moptions;
-		
-		bzero(imo, sizeof(struct ip6_moptions));
+
+	if (options != NULL && (options->ippo_flags & IPPOF_MCAST_OPTS) &&
+	    (imo = ip_allocmoptions(M_DONTWAIT)) != NULL) {
 		imo->imo_multicast_ifp = options->ippo_mcast_ifnet;
 		imo->imo_multicast_ttl = options->ippo_mcast_ttl;
 		imo->imo_multicast_loop = options->ippo_mcast_loop;
 	}
-	
-	/* Fill out a route structure and get a route */
-	bzero(&ro, sizeof(struct route));
-	sin->sin_len = sizeof(struct sockaddr_in);
-	sin->sin_family = AF_INET;
-	sin->sin_port = 0;
-	sin->sin_addr = ip->ip_dst;
-	rtalloc(&ro);
-	if (ro.ro_rt == NULL) {
-		m_freem(m);
-		return ENETUNREACH;
+
+	if (options != NULL &&
+	    (options->ippo_flags & (IPPOF_BOUND_IF | IPPOF_NO_IFT_CELLULAR))) {
+		if (options->ippo_flags & IPPOF_BOUND_IF) {
+			ipoa.ipoa_boundif = options->ippo_flags >>
+			    IPPOF_SHIFT_IFSCOPE;
+		}
+		if (options->ippo_flags & IPPOF_NO_IFT_CELLULAR)
+			ipoa.ipoa_nocell = 1;
 	}
-	
+
+	bzero(&ro, sizeof(struct route));
+
 	/* Put ip_len and ip_off in host byte order, ip_output expects that */
 
 #if BYTE_ORDER != BIG_ENDIAN
@@ -321,88 +317,85 @@ ipf_injectv4_out(
 	NTOHS(ip->ip_off);
 #endif
 
-	/* Send  */
-	error = ip_output(m, NULL, &ro, IP_ALLOWBROADCAST | IP_RAWOUTPUT, imo, NULL);
-	
+	/* Send; enforce source interface selection via IP_OUTARGS flag */
+	error = ip_output(m, NULL, &ro,
+	    IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS, imo, &ipoa);
+
 	/* Release the route */
 	if (ro.ro_rt)
 		rtfree(ro.ro_rt);
-	
-	return error;
+
+	if (imo != NULL)
+		IMO_REMREF(imo);
+
+	return (error);
 }
 
 #if INET6
 static errno_t
-ipf_injectv6_out(
-	mbuf_t data,
-	ipfilter_t filter_ref,
-	ipf_pktopts_t options)
+ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options)
 {
 	struct route_in6 ro;
-	struct sockaddr_in6	*sin6 = &ro.ro_dst;
 	struct ip6_hdr	*ip6;
 	struct mbuf	*m = (struct mbuf*)data;
 	errno_t error = 0;
-	struct m_tag *mtag = 0;
-	struct ip6_moptions *im6o = 0, ip6_moptions;
-	
+	struct m_tag *mtag = NULL;
+	struct ip6_moptions *im6o = NULL;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+
 	/* Make the IP header contiguous in the mbuf */
 	if ((size_t)m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
-		if (m == NULL) return ENOMEM;
+		if (m == NULL)
+			return (ENOMEM);
 	}
 	ip6 = (struct ip6_hdr*)m_mtod(m);
 
 	if (filter_ref != 0) {
-		mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT,
-					 	   sizeof (ipfilter_t), M_NOWAIT);
+		mtag = m_tag_create(KERNEL_MODULE_TAG_ID,
+		    KERNEL_TAG_TYPE_IPFILT, sizeof (ipfilter_t), M_NOWAIT, m);
 		if (mtag == NULL) {
 			m_freem(m);
-			return ENOMEM;
+			return (ENOMEM);
 		}
-		*(ipfilter_t*)(mtag+1) = filter_ref;
+		*(ipfilter_t *)(mtag + 1) = filter_ref;
 		m_tag_prepend(m, mtag);
 	}
-	
-	if (options && (options->ippo_flags & IPPOF_MCAST_OPTS)) {
-		im6o = &ip6_moptions;
-		
-		bzero(im6o, sizeof(struct ip6_moptions));
+
+	if (options != NULL && (options->ippo_flags & IPPOF_MCAST_OPTS) &&
+	    (im6o = ip6_allocmoptions(M_DONTWAIT)) != NULL) {
 		im6o->im6o_multicast_ifp = options->ippo_mcast_ifnet;
 		im6o->im6o_multicast_hlim = options->ippo_mcast_ttl;
 		im6o->im6o_multicast_loop = options->ippo_mcast_loop;
 	}
-	
-	
-	/* Fill out a route structure and get a route */
-	bzero(&ro, sizeof(struct route_in6));
-	sin6->sin6_len = sizeof(struct sockaddr_in6);
-	sin6->sin6_family = AF_INET6;
-	sin6->sin6_addr = ip6->ip6_dst;
-#if 0
-	/* This is breaks loopback multicast! */
-	/* The scope ID should already at s6_addr16[1] */
-	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
-		/* Hack, pull the scope_id out of the dest addr */
-		sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
-		ip6->ip6_dst.s6_addr16[1] = 0;
-	} else
-		sin6->sin6_scope_id = 0;
-#endif
-	rtalloc((struct route*)&ro);
-	if (ro.ro_rt == NULL) {
-		m_freem(m);
-		return ENETUNREACH;
+
+	if (options != NULL &&
+	    (options->ippo_flags & (IPPOF_BOUND_IF | IPPOF_NO_IFT_CELLULAR))) {
+		if (options->ippo_flags & IPPOF_BOUND_IF) {
+			ip6oa.ip6oa_boundif = options->ippo_flags >>
+			    IPPOF_SHIFT_IFSCOPE;
+		}
+		if (options->ippo_flags & IPPOF_NO_IFT_CELLULAR)
+			ip6oa.ip6oa_nocell = 1;
 	}
-	
-	/* Send  */
-	error = ip6_output(m, NULL, &ro, 0, im6o, NULL, 0);
-	
+
+	bzero(&ro, sizeof(struct route_in6));
+
+	/*
+	 * Send  mbuf and ifscope information. Check for correctness
+	 * of ifscope information is done while searching for a route in 
+	 * ip6_output.
+	 */
+	error = ip6_output(m, NULL, &ro, IPV6_OUTARGS, im6o, NULL, &ip6oa);
+
 	/* Release the route */
 	if (ro.ro_rt)
 		rtfree(ro.ro_rt);
-	
-	return error;
+
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+
+	return (error);
 }
 #endif /* INET6 */
 
diff --git a/bsd/netinet/kpi_ipfilter.h b/bsd/netinet/kpi_ipfilter.h
index 3d2aaaac9..1f7fae6f0 100644
--- a/bsd/netinet/kpi_ipfilter.h
+++ b/bsd/netinet/kpi_ipfilter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -50,7 +50,12 @@ struct ipf_pktopts {
 	int				ippo_mcast_loop;
 	u_int8_t			ippo_mcast_ttl;
 };
-#define IPPOF_MCAST_OPTS 0x1
+#define IPPOF_MCAST_OPTS	0x1
+#ifdef PRIVATE
+#define IPPOF_BOUND_IF		0x2
+#define IPPOF_NO_IFT_CELLULAR	0x4
+#define IPPOF_SHIFT_IFSCOPE	16
+#endif /* PRIVATE */
 
 typedef struct ipf_pktopts *ipf_pktopts_t;
 
diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c
index c03fde7d0..0b63a3c0d 100644
--- a/bsd/netinet/raw_ip.c
+++ b/bsd/netinet/raw_ip.c
@@ -200,7 +200,7 @@ rip_input(m, iphlen)
 	register struct inpcb *inp;
 	struct inpcb *last = 0;
 	struct mbuf *opts = 0;
-	int skipit;
+	int skipit = 0, ret = 0;
 
 	ripsrc.sin_addr = ip->ip_src;
 	lck_rw_lock_shared(ripcbinfo.mtx);
@@ -220,9 +220,9 @@ rip_input(m, iphlen)
 		if (last) {
 			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
 		
+			skipit = 0;
 #if IPSEC
 			/* check AH/ESP integrity. */
-			skipit = 0;
 			if (ipsec_bypass == 0 && n) {
 				if (ipsec4_in_reject_so(n, last->inp_socket)) {
 					m_freem(n);
@@ -235,27 +235,36 @@ rip_input(m, iphlen)
 #if CONFIG_MACF_NET
 			if (n && skipit == 0) {
 				if (mac_inpcb_check_deliver(last, n, AF_INET,
-				    SOCK_RAW) != 0)
+				    SOCK_RAW) != 0) {
+					m_freem(n);
 					skipit = 1;
+				}
 			}
 #endif
 			if (n && skipit == 0) {
 				int error = 0;
-				if (last->inp_flags & INP_CONTROLOPTS ||
-				    last->inp_socket->so_options & SO_TIMESTAMP)
-				    ip_savecontrol(last, &opts, ip, n);
+				if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
+				    (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
+				    (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+					ret = ip_savecontrol(last, &opts, ip, n);
+					if (ret != 0) {
+						m_freem(n);
+						m_freem(opts);
+						last = inp;
+						continue;
+					}
+				}
 				if (last->inp_flags & INP_STRIPHDR) {
 					n->m_len -= iphlen;
 					n->m_pkthdr.len -= iphlen;
 					n->m_data += iphlen;
 				}
-// ###LOCK need to lock that socket?
+				so_recv_data_stat(last->inp_socket, m, 0);
 				if (sbappendaddr(&last->inp_socket->so_rcv,
 				    (struct sockaddr *)&ripsrc, n,
 				    opts, &error) != 0) {
 					sorwakeup(last->inp_socket);
-				}
-				else {
+				} else {
 					if (error) {
 						/* should notify about lost packet */
 						kprintf("rip_input can't append to socket\n");
@@ -266,10 +275,10 @@ rip_input(m, iphlen)
 		}
 		last = inp;
 	}
-	lck_rw_done(ripcbinfo.mtx);
+
+	skipit = 0;
 #if IPSEC
 	/* check AH/ESP integrity. */
-	skipit = 0;
 	if (ipsec_bypass == 0 && last) {
 		if (ipsec4_in_reject_so(m, last->inp_socket)) {
 			m_freem(m);
@@ -282,20 +291,30 @@ rip_input(m, iphlen)
 #endif /*IPSEC*/
 #if CONFIG_MACF_NET
 	if (last && skipit == 0) {
-		if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0)
+		if (mac_inpcb_check_deliver(last, m, AF_INET, SOCK_RAW) != 0) {
 			skipit = 1;
+			m_freem(m);
+		}
 	}
 #endif
 	if (skipit == 0) {
 		if (last) {
-			if (last->inp_flags & INP_CONTROLOPTS ||
-				last->inp_socket->so_options & SO_TIMESTAMP)
-				ip_savecontrol(last, &opts, ip, m);
+			if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
+				(last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
+				(last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+				ret = ip_savecontrol(last, &opts, ip, m);
+				if (ret != 0) {
+					m_freem(m);
+					m_freem(opts);
+					goto unlock;		
+				}
+			}
 			if (last->inp_flags & INP_STRIPHDR) {
 				m->m_len -= iphlen;
 				m->m_pkthdr.len -= iphlen;
 				m->m_data += iphlen;
 			}
+			so_recv_data_stat(last->inp_socket, m, 0);
 			if (sbappendaddr(&last->inp_socket->so_rcv,
 				(struct sockaddr *)&ripsrc, m, opts, NULL) != 0) {
 				sorwakeup(last->inp_socket);
@@ -308,6 +327,12 @@ rip_input(m, iphlen)
 			OSAddAtomic(-1, &ipstat.ips_delivered);
 		}
 	}
+unlock:
+	/*
+	 * Keep the list locked because socket filter may force the socket lock 
+	 * to be released when calling sbappendaddr() -- see rdar://7627704
+	 */
+	lck_rw_done(ripcbinfo.mtx);
 }
 
 /*
@@ -325,21 +350,19 @@ rip_output(
 	register struct inpcb *inp = sotoinpcb(so);
 	int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST;
 	struct ip_out_args ipoa;
+	struct ip_moptions *imo;
 	int error = 0;
-#if PKT_PRIORITY
-	mbuf_traffic_class_t mtc = MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
+	mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
 
 	if (control != NULL) {
-#if PKT_PRIORITY
 		mtc = mbuf_traffic_class_from_control(control);
-#endif /* PKT_PRIORITY */
 
 		m_freem(control);
 	}
 	/* If socket was bound to an ifindex, tell ip_output about it */
-	ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+	ipoa.ipoa_boundif = (inp->inp_flags & INP_BOUND_IF) ?
 	    inp->inp_boundif : IFSCOPE_NONE;
+	ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 	flags |= IP_OUTARGS;
 
 	/*
@@ -401,35 +424,52 @@ rip_output(
 		inp->inp_route.ro_rt = NULL;
 	}
 
-#if PKT_PRIORITY
-	set_traffic_class(m, so, mtc);
-#endif /* PKT_PRIORITY */
+	set_packet_tclass(m, so, mtc, 0);
 
 #if CONFIG_MACF_NET
 	mac_mbuf_label_associate_inpcb(inp, m);
 #endif
 
+	imo = inp->inp_moptions;
+	if (imo != NULL)
+		IMO_ADDREF(imo);
 	/*
 	 * The domain lock is held across ip_output, so it is okay
 	 * to pass the PCB cached route pointer directly to IP and
 	 * the modules beneath it.
 	 */
 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
-	    inp->inp_moptions, &ipoa);
+	    imo, &ipoa);
 
-#if IFNET_ROUTE_REFCNT
-	/*
-	 * Always discard the cached route for unconnected socket
-	 * or if it is a non-unicast route.
-	 */
-	if (inp->inp_route.ro_rt != NULL &&
-	    ((inp->inp_route.ro_rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
-	    inp->inp_socket == NULL ||
-	    inp->inp_socket->so_state != SS_ISCONNECTED)) {
-		rtfree(inp->inp_route.ro_rt);
-		inp->inp_route.ro_rt = NULL;
+	if (imo != NULL)
+		IMO_REMREF(imo);
+
+	if (inp->inp_route.ro_rt != NULL) {
+		struct rtentry *rt = inp->inp_route.ro_rt;
+		unsigned int outif;
+
+		if ((rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST)) ||
+		    inp->inp_socket == NULL ||
+		    !(inp->inp_socket->so_state & SS_ISCONNECTED)) {
+			rt = NULL;	/* unusable */
+		}
+		/*
+		 * Always discard the cached route for unconnected
+		 * socket or if it is a multicast route.
+		 */
+		if (rt == NULL) {
+			rtfree(inp->inp_route.ro_rt);
+			inp->inp_route.ro_rt = NULL;
+		}
+		/*
+		 * If this is a connected socket and the destination
+		 * route is unicast, update outif with that of the route
+		 * interface index used by IP.
+		 */
+		if (rt != NULL &&
+		    (outif = rt->rt_ifp->if_index) != inp->inp_last_outif)
+			inp->inp_last_outif = outif;
 	}
-#endif /* IFNET_ROUTE_REFCNT */
 
 	return (error);
 }
@@ -642,10 +682,12 @@ rip_ctlinput(
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		for (ia = in_ifaddrhead.tqh_first; ia;
 		     ia = ia->ia_link.tqe_next) {
-			if (ia->ia_ifa.ifa_addr == sa
-			    && (ia->ia_flags & IFA_ROUTE)) {
+			IFA_LOCK(&ia->ia_ifa);
+			if (ia->ia_ifa.ifa_addr == sa &&
+			    (ia->ia_flags & IFA_ROUTE)) {
 				done = 1;
-				ifaref(&ia->ia_ifa);
+				IFA_ADDREF_LOCKED(&ia->ia_ifa);
+				IFA_UNLOCK(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 				lck_mtx_lock(rnh_lock);
 				/*
@@ -660,9 +702,10 @@ rip_ctlinput(
 				 */
 				in_ifadown(&ia->ia_ifa, 1);
 				lck_mtx_unlock(rnh_lock);
-				ifafree(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);
 				break;
 			}
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
 		if (!done)
 			lck_rw_done(in_ifaddr_rwlock);
@@ -672,14 +715,22 @@ rip_ctlinput(
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		for (ia = in_ifaddrhead.tqh_first; ia;
 		     ia = ia->ia_link.tqe_next) {
-			if (ia->ia_ifa.ifa_addr == sa)
+			IFA_LOCK(&ia->ia_ifa);
+			if (ia->ia_ifa.ifa_addr == sa) {
+				/* keep it locked */
 				break;
+			}
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
-		if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) {
+		if (ia == NULL || (ia->ia_flags & IFA_ROUTE) ||
+		    (ia->ia_ifa.ifa_debug & IFD_NOTREADY)) {
+			if (ia != NULL)
+				IFA_UNLOCK(&ia->ia_ifa);
 			lck_rw_done(in_ifaddr_rwlock);
 			return;
 		}
-		ifaref(&ia->ia_ifa);
+		IFA_ADDREF_LOCKED(&ia->ia_ifa);
+		IFA_UNLOCK(&ia->ia_ifa);
 		lck_rw_done(in_ifaddr_rwlock);
 
 		flags = RTF_UP;
@@ -690,9 +741,12 @@ rip_ctlinput(
 			flags |= RTF_HOST;
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
-		if (err == 0)
+		if (err == 0) {
+			IFA_LOCK_SPIN(&ia->ia_ifa);
 			ia->ia_flags |= IFA_ROUTE;
-		ifafree(&ia->ia_ifa);
+			IFA_UNLOCK(&ia->ia_ifa);
+		}
+		IFA_REMREF(&ia->ia_ifa);
 		break;
 	}
 }
@@ -700,9 +754,9 @@ rip_ctlinput(
 u_int32_t	rip_sendspace = RIPSNDQ;
 u_int32_t	rip_recvspace = RIPRCVQ;
 
-SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
-SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,
     &rip_recvspace, 0, "Maximum incoming raw IP datagram size");
 
 static int
@@ -770,6 +824,7 @@ rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 	struct inpcb *inp = sotoinpcb(so);
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct ifaddr *ifa = NULL;
+	unsigned int outif = 0;
 
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
@@ -781,10 +836,13 @@ rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 		return EADDRNOTAVAIL;
 	}
 	else if (ifa) {
-		ifafree(ifa);
-		ifa = NULL;
+		IFA_LOCK(ifa);
+		outif = ifa->ifa_ifp->if_index;
+		IFA_UNLOCK(ifa);
+		IFA_REMREF(ifa);
 	}
 	inp->inp_laddr = addr->sin_addr;
+	inp->inp_last_outif = outif;
 	return 0;
 }
 
@@ -815,7 +873,7 @@ rip_shutdown(struct socket *so)
 
 __private_extern__ int
 rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam,
-	 __unused struct mbuf *control, __unused struct proc *p)
+	struct mbuf *control, __unused struct proc *p)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	register u_int32_t dst;
@@ -979,7 +1037,7 @@ rip_pcblist SYSCTL_HANDLER_ARGS
 	return error;
 }
 
-SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 	    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 #if !CONFIG_EMBEDDED
@@ -1082,11 +1140,26 @@ rip_pcblist64 SYSCTL_HANDLER_ARGS
         return error;
 }
 
-SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
             rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets");
 
 #endif /* !CONFIG_EMBEDDED */
 
+
+static int
+rip_pcblist_n SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	int error = 0;
+	
+	error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo);
+	
+	return error;
+}
+
+SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
+            rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets");
+
 struct pr_usrreqs rip_usrreqs = {
 	rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect,
 	pru_connect2_notsupp, in_control, rip_detach, rip_disconnect,
diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h
index 3b4d8f92f..a3a183bfe 100644
--- a/bsd/netinet/tcp.h
+++ b/bsd/netinet/tcp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -185,8 +185,6 @@ struct tcphdr {
 
 #define TCP_MAX_WINSHIFT	14	/* maximum window shift */
 
-#define TCP_MAXBURST		4 	/* maximum segments in a burst */
-
 #define TCP_MAXHLEN	(0xf<<2)	/* max length of header in bytes */
 #define TCP_MAXOLEN	(TCP_MAXHLEN - sizeof(struct tcphdr))
 					/* max space left for options */
@@ -202,6 +200,90 @@ struct tcphdr {
 #define TCP_NOOPT               0x08    /* don't use TCP options */
 #define TCP_KEEPALIVE           0x10    /* idle time used when SO_KEEPALIVE is enabled */
 #define TCP_CONNECTIONTIMEOUT   0x20    /* connection timeout */
+#define PERSIST_TIMEOUT		0x40	/* time after which a connection in
+					 *  persist timeout will terminate.
+					 *  see draft-ananth-tcpm-persist-02.txt
+					 */
+#define TCP_RXT_CONNDROPTIME	0x80	/* time after which tcp retransmissions will be 
+					 * stopped and the connection will be dropped
+					 */
+#define TCP_RXT_FINDROP	0x100	/* when this option is set, drop a connection 
+					 * after retransmitting the FIN 3 times. It will
+					 * prevent holding too many mbufs in socket 
+					 * buffer queues.
+					 */
+#ifdef PRIVATE
+#define	TCP_INFO				0x200	/* retrieve tcp_info structure */
+
+/*
+ * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
+ * the caller to query certain information about the state of a TCP
+ * connection.  We provide an overlapping set of fields with the Linux
+ * implementation, but since this is a fixed size structure, room has been
+ * left for growth.  In order to maximize potential future compatibility with
+ * the Linux API, the same variable names and order have been adopted, and
+ * padding left to make room for omitted fields in case they are added later.
+ *
+ * XXX: This is currently an unstable ABI/API, in that it is expected to
+ * change.
+ */
+#pragma pack(4)
+
+#define	TCPI_OPT_TIMESTAMPS	0x01
+#define	TCPI_OPT_SACK		0x02
+#define	TCPI_OPT_WSCALE		0x04
+#define	TCPI_OPT_ECN		0x08
+
+struct tcp_info {
+	u_int8_t	tcpi_state;			/* TCP FSM state. */
+	u_int8_t	tcpi_options;		/* Options enabled on conn. */
+	u_int8_t	tcpi_snd_wscale;	/* RFC1323 send shift value. */
+	u_int8_t	tcpi_rcv_wscale;	/* RFC1323 recv shift value. */
+
+	u_int32_t	tcpi_snd_mss;		/* Max segment size for send. */
+	u_int32_t	tcpi_rcv_mss;		/* Max segment size for receive. */
+
+	u_int32_t	tcpi_snd_ssthresh;	/* Slow start threshold. */
+	u_int32_t	tcpi_snd_cwnd;		/* Send congestion window. */
+
+	u_int32_t	tcpi_rcv_space;		/* Advertised recv window. */
+
+	u_int32_t	tcpi_snd_wnd;		/* Advertised send window. */
+	u_int32_t	tcpi_snd_bwnd;		/* Bandwidth send window. */
+	u_int32_t	tcpi_snd_nxt;		/* Next egress seqno */
+	u_int32_t	tcpi_rcv_nxt;		/* Next ingress seqno */
+	
+	int32_t		tcpi_last_outif;	/* if_index of interface used to send last */
+};
+
+/*
+ * Note that IPv6 link local addresses should have the appropriate scope ID
+ */
+
+struct info_tuple {
+	u_int8_t	itpl_proto;
+	union {
+		struct sockaddr		_itpl_sa;
+		struct sockaddr_in	_itpl_sin;
+		struct sockaddr_in6	_itpl_sin6;
+	} itpl_localaddr;
+	union {
+		struct sockaddr		_itpl_sa;
+		struct sockaddr_in	_itpl_sin;
+		struct sockaddr_in6	_itpl_sin6;
+	} itpl_remoteaddr;
+};
+
+#define itpl_local_sa		itpl_localaddr._itpl_sa
+#define itpl_local_sin 		itpl_localaddr._itpl_sin
+#define itpl_local_sin6		itpl_localaddr._itpl_sin6
+#define itpl_remote_sa 		itpl_remoteaddr._itpl_sa
+#define itpl_remote_sin		itpl_remoteaddr._itpl_sin
+#define itpl_remote_sin6	itpl_remoteaddr._itpl_sin6
+
+#pragma pack()
+
+#endif /* PRIVATE */
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
 #endif
diff --git a/bsd/netinet/tcp_cc.h b/bsd/netinet/tcp_cc.h
new file mode 100644
index 000000000..c78ba3531
--- /dev/null
+++ b/bsd/netinet/tcp_cc.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
+ * made possible in part by a grant from the Cisco University Research Program
+ * Fund at Community Foundation Silicon Valley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_CC_H_
+#define _NETINET_CC_H_
+
+#ifdef KERNEL
+
+#include <netinet/tcp_var.h>
+
+#define TCP_CC_ALGO_NEWRENO_INDEX 0	/* default congestion control algorithm */
+#define TCP_CC_ALGO_BACKGROUND_INDEX 1	/* congestion control for background transport */
+#define TCP_CC_ALGO_COUNT 2		/* Count of CC algorithms defined */
+
+#define TCP_CA_NAME_MAX 16		/* Maximum characters in the name of a CC algorithm */
+
+/*
+ * Structure to hold definition various actions defined by a congestion control
+ * algorithm for TCP. This can be used to change the congestion control on a 
+ * connection based on the user settings of priority of a connection.
+ */
+struct tcp_cc_algo {
+	char name[TCP_CA_NAME_MAX];
+	uint32_t num_sockets;
+	uint32_t flags;
+
+	/* init the congestion algorithm for the specified control block */
+	int (*init) (struct tcpcb *tp);
+
+	/* cleanup any state that is stored in the connection related to the algorithm */
+	int (*cleanup) (struct tcpcb *tp); 
+
+	/* initialize cwnd at the start of a connection */
+	void (*cwnd_init) (struct tcpcb *tp);
+
+	/* called on the receipt of in-sequence ack during congestion avoidance phase */
+	void (*inseq_ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
+
+	/* called on the receipt of a valid ack */
+	void (*ack_rcvd) (struct tcpcb *tp, struct tcphdr *th);
+
+	/* called before entering FR */
+	void (*pre_fr) (struct tcpcb *tp, struct tcphdr *th);
+
+	/*  after exiting FR */
+	void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
+
+	/* perform tasks when data transfer resumes after an idle period */
+	void (*after_idle) (struct tcpcb *tp);
+
+	/* perform tasks when the connection's retransmit timer expires */
+	void (*after_timeout) (struct tcpcb *tp);
+
+	/* Whether or not to delay the ack */
+	int (*delay_ack)(struct tcpcb *tp, struct tcphdr *th);
+
+	/* Switch a connection to this CC algorithm after sending some packets */
+	void (*switch_to)(struct tcpcb *tp, uint16_t old_cc_index); 
+
+} __attribute__((aligned(4)));
+
+extern struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
+
+#define CC_ALGO(tp) (tcp_cc_algo_list[tp->tcp_cc_index])
+
+#endif /* KERNEL */
+#endif /* _NETINET_CC_H_ */
diff --git a/bsd/netinet/tcp_debug.c b/bsd/netinet/tcp_debug.c
index 58b1141b2..8ba9eb6af 100644
--- a/bsd/netinet/tcp_debug.c
+++ b/bsd/netinet/tcp_debug.c
@@ -96,7 +96,7 @@
 
 #if TCPDEBUG
 __private_extern__ int	tcpconsdebug = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcpconsdebug, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcpconsdebug, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcpconsdebug, 0, "Turn tcp debugging on or off");
 #endif
 
diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c
index b65e9d5c6..6f06b2b14 100644
--- a/bsd/netinet/tcp_input.c
+++ b/bsd/netinet/tcp_input.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -86,6 +86,7 @@
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -95,6 +96,7 @@
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM	*/
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <mach/sdt.h>
 #if INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
@@ -107,6 +109,8 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_cc.h>
+#include <kern/zalloc.h>
 #if INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -131,10 +135,6 @@ struct tcphdr tcp_savetcp;
 
 #include <sys/kdebug.h>
 
-#ifndef __APPLE__
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-#endif
-
 #define DBG_LAYER_BEG		NETDBG_CODE(DBG_NETTCP, 0)
 #define DBG_LAYER_END		NETDBG_CODE(DBG_NETTCP, 2)
 #define DBG_FNC_TCP_INPUT       NETDBG_CODE(DBG_NETTCP, (3 << 8))
@@ -150,26 +150,31 @@ extern int ipsec_bypass;
 struct	tcpstat tcpstat;
 
 static int log_in_vain = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED,
     &log_in_vain, 0, "Log all incoming TCP connections");
 
 static int blackhole = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&blackhole, 0, "Do not send RST when dropping refused connections");
 
 int tcp_delack_enabled = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_delack_enabled, 0, 
     "Delay ACK to try and piggyback it onto a data packet");
 
 int tcp_lq_overflow = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_lq_overflow, 0, 
     "Listen Queue Overflow");
 
+int tcp_recv_bg = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbg, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_recv_bg, 0, 
+    "Receive background");
+
 #if TCP_DROP_SYNFIN
 static int drop_synfin = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW | CTLFLAG_LOCKED,
     &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
 #endif
 
@@ -177,59 +182,85 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
     "TCP Segment Reassembly Queue");
 
 __private_extern__ int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_reass_maxseg, 0,
     "Global maximum number of TCP Segments in Reassembly Queue");
 
 __private_extern__ int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcp_reass_qsize, 0,
     "Global number of TCP Segments currently in Reassembly Queue");
 
 static int tcp_reass_overflows = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcp_reass_overflows, 0,
     "Global number of TCP Segment Reassembly Queue Overflows");
 
 
 __private_extern__ int slowlink_wsize = 8192;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&slowlink_wsize, 0, "Maximum advertised window size for slowlink");
 
-static int maxseg_unacked = 8;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, CTLFLAG_RW,
+int maxseg_unacked = 8;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&maxseg_unacked, 0, "Maximum number of outstanding segments left unacked");
 
-static int	tcp_do_rfc3465 = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW,
+int	tcp_do_rfc3465 = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_do_rfc3465, 0, "");
 
-static int	tcp_do_rfc3465_lim2 = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, CTLFLAG_RW,
+int	tcp_do_rfc3465_lim2 = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_do_rfc3465_lim2, 0, "Appropriate bytes counting w/ L=2*SMSS");
 
+int	rtt_samples_per_slot = 20;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_samples_per_slot, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&rtt_samples_per_slot, 0, "Number of RTT samples stored for rtt history");
+
+int	tcp_allowed_iaj = ALLOWED_IAJ;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_allowed_iaj, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_allowed_iaj, 0, "Allowed inter-packet arrival jiter");
+
+int	tcp_acc_iaj_high_thresh = ACC_IAJ_HIGH_THRESH;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_high_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_acc_iaj_high_thresh, 0, "Used in calculating maximum accumulated IAJ");
+
 #if CONFIG_IFEF_NOWINDOWSCALE
 int tcp_obey_ifef_nowindowscale = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_obey_ifef_nowindowscale, 0, "");
 #endif
 
 extern int tcp_TCPTV_MIN;
+extern int tcp_acc_iaj_high;
+extern int tcp_acc_iaj_react_limit;
+extern struct zone *tcp_reass_zone;
+
 
 u_int32_t tcp_now;
+struct timeval tcp_uptime;	/* uptime when tcp_now was last updated */
+lck_spin_t *tcp_uptime_lock;	/* Used to sychronize updates to tcp_now */
 
 struct inpcbhead tcb;
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 struct inpcbinfo tcbinfo;
 
-static void	 tcp_dooptions(struct tcpcb *,
-	    u_char *, int, struct tcphdr *, struct tcpopt *, unsigned int);
+static void tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *,
+    struct tcpopt *, unsigned int);
 static void	 tcp_pulloutofband(struct socket *,
 	    struct tcphdr *, struct mbuf *, int);
 static int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *,
 				struct mbuf *);
 static void	tcp_xmit_timer(struct tcpcb *, int);
 static inline unsigned int tcp_maxmtu(struct rtentry *);
+static inline int tcp_stretch_ack_enable(struct tcpcb *tp);
+
+#if TRAFFIC_MGT
+static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, int reset_size);
+void compute_iaj(struct tcpcb *tp);
+static inline void clear_iaj_state(struct tcpcb *tp);
+#endif /* TRAFFIC_MGT */
+
 #if INET6
 static inline unsigned int tcp_maxmtu6(struct rtentry *);
 #endif
@@ -247,9 +278,7 @@ do { \
 #define ND6_HINT(tp)
 #endif
 
-extern u_int32_t	*delack_bitmask;
-
-extern void	add_to_time_wait(struct tcpcb *);
+extern void	add_to_time_wait(struct tcpcb *, uint32_t delay);
 extern void postevent(struct socket *, struct sockbuf *, int);
 
 extern  void    ipfwsyslog( int level, const char *format,...);
@@ -269,37 +298,140 @@ __private_extern__ int tcp_win_scale;
 #define log_in_vain_log( a ) { log a; }
 #endif
 
+int tcp_rcvunackwin = TCPTV_UNACKWIN;
+int tcp_maxrcvidle = TCPTV_MAXRCVIDLE;
+int tcp_rcvsspktcnt = TCP_RCV_SS_PKTCOUNT;
 
-/*
- * Indicate whether this ack should be delayed.  
- * We can delay the ack if:
- *  - delayed acks are enabled (set to 1) and
- *      - our last ack wasn't a 0-sized window.  We never want to delay
- *	  the ack that opens up a 0-sized window.
- *  - delayed acks are enabled (set to 2, "more compatible") and
- *      - our last ack wasn't a 0-sized window.
- *      - if the peer hasn't sent us a TH_PUSH data packet (this solves 3649245)
- *      - the peer hasn't sent us a TH_PUSH data packet, if he did, take this as a clue that we
- *        need to ACK with no delay. This helps higher level protocols who won't send
- *        us more data even if the window is open because their last "segment" hasn't been ACKed
- *  - delayed acks are enabled (set to 3, "streaming detection") and
- *  	- if we receive more than "maxseg_unacked"  full packets per second on this socket
- *		- if we don't have more than "maxseg_unacked" delayed so far
- *  	- if those criteria aren't met, acts like "2". Allowing faster acking while browsing for example.
- *
- */
-#define DELAY_ACK(tp) \
-	(((tcp_delack_enabled == 1) && ((tp->t_flags & TF_RXWIN0SENT) == 0)) || \
-	 (((tcp_delack_enabled == 2) && (tp->t_flags & TF_RXWIN0SENT) == 0) && \
-	   ((thflags & TH_PUSH) == 0) && ((tp->t_flags & TF_DELACK) == 0)) || \
-	 (((tcp_delack_enabled == 3) && (tp->t_flags & TF_RXWIN0SENT) == 0) && \
-	   (tp->t_rcvtime == 0) && ((thflags & TH_PUSH) == 0) && \
-	   (((tp->t_unacksegs == 0)) || \
-	   ((tp->rcv_byps > (maxseg_unacked * tp->t_maxseg))  && (tp->t_unacksegs < maxseg_unacked)))))
+#define DELAY_ACK(tp, th) (CC_ALGO(tp)->delay_ack != NULL && CC_ALGO(tp)->delay_ack(tp, th))
 
 static int tcp_dropdropablreq(struct socket *head);
 static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th);
 
+static void update_base_rtt(struct tcpcb *tp, uint32_t rtt);
+uint32_t get_base_rtt(struct tcpcb *tp);
+void tcp_set_background_cc(struct socket *so);
+void tcp_set_foreground_cc(struct socket *so);
+static void tcp_set_new_cc(struct socket *so, uint16_t cc_index);
+
+#if TRAFFIC_MGT
+void
+reset_acc_iaj(struct tcpcb *tp)
+{
+	tp->acc_iaj = 0;
+	tp->iaj_rwintop = 0;
+	clear_iaj_state(tp);
+}
+
+static inline void
+update_iaj_state(struct tcpcb *tp, uint32_t size, int rst_size)
+{
+	if (rst_size > 0)
+		tp->iaj_size = 0;
+	if (tp->iaj_size == 0 || size >= tp->iaj_size) {
+		tp->iaj_size = size;
+		tp->iaj_rcv_ts = tcp_now;
+		tp->iaj_small_pkt = 0;
+	}
+}
+
+static inline void
+clear_iaj_state(struct tcpcb *tp)
+{
+	tp->iaj_rcv_ts = 0;
+}
+
+/* For every 32 bit unsigned integer(v), this function will find the 
+ * largest integer n such that (n*n <= v). This takes at most 16 iterations 
+ * irrespective of the value of v and does not involve multiplications. 
+ */
+static inline int
+isqrt(unsigned int val) {
+	unsigned int sqrt_cache[11] = {0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100};
+	unsigned int temp, g=0, b=0x8000, bshft=15;
+	if ( val <= 100) {
+		for (g = 0; g <= 10; ++g) {
+			if (sqrt_cache[g] > val) {
+				g--;
+				break;
+			} else if (sqrt_cache[g] == val) {
+				break;
+			}
+		}
+	} else {
+		do {
+			temp = (((g << 1) + b) << (bshft--));
+			if (val >= temp) {
+				g += b;
+				val -= temp;
+			}
+			b >>= 1;
+		} while ( b > 0 && val > 0);
+	}
+	return(g);
+} 
+
+void
+compute_iaj(struct tcpcb *tp)
+{
+	/* When accumulated IAJ reaches MAX_ACC_IAJ in milliseconds, throttle the
+	 * receive window to a minimum of MIN_IAJ_WIN packets
+	 */
+#define MAX_ACC_IAJ (tcp_acc_iaj_high_thresh + tcp_acc_iaj_react_limit)
+
+	uint32_t allowed_iaj, acc_iaj = 0;
+	uint32_t cur_iaj = tcp_now - tp->iaj_rcv_ts;
+
+	uint32_t mean, temp;
+	int32_t cur_iaj_dev;
+	cur_iaj_dev = (cur_iaj - tp->avg_iaj);	
+	
+	/* Allow a jitter of "allowed_iaj" milliseconds. Some connections may have a
+	 * constant jitter more than that. We detect this by using
+	 * standard deviation.
+	 */
+	allowed_iaj = tp->avg_iaj + tp->std_dev_iaj;
+	if (allowed_iaj < tcp_allowed_iaj)
+		allowed_iaj = tcp_allowed_iaj;
+
+	/* Initially when the connection starts, the senders congestion window 
+	 * is small. During this period we avoid throttling a connection because
+	 * we do not have a good starting point for allowed_iaj. IAJ_IGNORE_PKTCNT
+	 * is used to quietly gloss over the first few packets.
+	 */
+	if (tp->iaj_pktcnt > IAJ_IGNORE_PKTCNT) {
+		if ( cur_iaj <= allowed_iaj ) {
+			if (tp->acc_iaj >= 2)
+				acc_iaj = tp->acc_iaj - 2;
+			else
+				acc_iaj = 0;
+		} else {
+			acc_iaj = tp->acc_iaj + (cur_iaj - allowed_iaj);
+		}
+
+		if (acc_iaj > MAX_ACC_IAJ)
+			acc_iaj = MAX_ACC_IAJ;
+		tp->acc_iaj = acc_iaj;
+	}
+
+	/* Compute weighted average where the history has a weight of
+	 * 15 out of 16 and the current value has a weight of 1 out of 16. 
+	 * This will make the short-term measurements have more weight.
+	 */
+	tp->avg_iaj = (((tp->avg_iaj << 4) - tp->avg_iaj) + cur_iaj) >> 4;
+
+	/* Compute Root-mean-square of deviation where mean is a weighted
+	 * average as described above
+	 */
+	temp = tp->std_dev_iaj * tp->std_dev_iaj;
+	mean = (((temp << 4) - temp) + (cur_iaj_dev * cur_iaj_dev)) >> 4;
+	
+	tp->std_dev_iaj = isqrt(mean);
+
+	DTRACE_TCP3(iaj, struct tcpcb *, tp, uint32_t, cur_iaj, uint32_t, allowed_iaj);
+
+	return;
+}
+#endif /* TRAFFIC_MGT */
 
 static int
 tcp_reass(tp, th, tlenp, m)
@@ -322,6 +454,27 @@ tcp_reass(tp, th, tlenp, m)
 	 */
 	if (th == NULL)
 		goto present;
+	
+	/* If the reassembly queue already has entries or if we are going to add 
+	 * a new one, then the connection has reached a loss state. 
+	 * Reset the stretch-ack algorithm at this point.
+	 */
+	if ((tp->t_flags & TF_STRETCHACK) != 0)
+		tcp_reset_stretch_ack(tp);
+
+	/* When the connection reaches a loss state, we need to send more acks
+	 * for a period of time so that the sender's congestion window will
+	 * open. Wait until we see some packets on the connection before 
+	 * stretching acks again.
+	 */
+	tp->t_flagsext |= TF_RCVUNACK_WAITSS;
+	tp->rcv_waitforss = 0;
+
+
+#if TRAFFIC_MGT
+	if (tp->acc_iaj > 0)
+		reset_acc_iaj(tp);
+#endif /* TRAFFIC_MGT */	
 
 	/*
 	 * Limit the number of segments in the reassembly queue to prevent
@@ -340,8 +493,7 @@ tcp_reass(tp, th, tlenp, m)
 	}
 
 	/* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
-	MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
-	       M_NOWAIT);
+	te = (struct tseg_qent *) zalloc_noblock(tcp_reass_zone);
 	if (te == NULL) {
 		tcpstat.tcps_rcvmemdrop++;
 		m_freem(m);
@@ -371,8 +523,14 @@ tcp_reass(tp, th, tlenp, m)
 			if (i >= *tlenp) {
 				tcpstat.tcps_rcvduppack++;
 				tcpstat.tcps_rcvdupbyte += *tlenp;
+				if (nstat_collect) {
+					nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_DUPLICATE);
+					locked_add_64(&tp->t_inpcb->inp_stat->rxpackets, 1);
+					locked_add_64(&tp->t_inpcb->inp_stat->rxbytes, *tlenp);
+					tp->t_stat.rxduplicatebytes += *tlenp;
+				}
 				m_freem(m);
-				FREE(te, M_TSEGQ);
+				zfree(tcp_reass_zone, te);
 				tcp_reass_qsize--;
 				/*
 				 * Try to present any queued data
@@ -389,6 +547,12 @@ tcp_reass(tp, th, tlenp, m)
 	}
 	tcpstat.tcps_rcvoopack++;
 	tcpstat.tcps_rcvoobyte += *tlenp;
+	if (nstat_collect) {
+		nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_OUT_OF_ORDER);
+		locked_add_64(&tp->t_inpcb->inp_stat->rxpackets, 1);
+		locked_add_64(&tp->t_inpcb->inp_stat->rxbytes, *tlenp);
+		tp->t_stat.rxoutoforderbytes += *tlenp;
+	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
@@ -408,7 +572,7 @@ tcp_reass(tp, th, tlenp, m)
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
-		FREE(q, M_TSEGQ);
+		zfree(tcp_reass_zone, q);
 		tcp_reass_qsize--;
 		q = nq;
 	}
@@ -442,10 +606,11 @@ present:
 		if (so->so_state & SS_CANTRCVMORE)
 			m_freem(q->tqe_m);
 		else {
+			so_recv_data_stat(so, q->tqe_m, 0); /* XXXX */
 			if (sbappendstream(&so->so_rcv, q->tqe_m))
 				dowakeup = 1;
 		}
-		FREE(q, M_TSEGQ);
+		zfree(tcp_reass_zone, q);
 		tcp_reass_qsize--;
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
@@ -480,15 +645,15 @@ present:
  */
 static void
 tcp_reduce_congestion_window(
-	struct tcpcb	*tp)
+	struct tcpcb	*tp, struct tcphdr *th)
 {
-	u_int win;
-	
-	win = min(tp->snd_wnd, tp->snd_cwnd) /
-		2 / tp->t_maxseg;
-	if (win < 2)
-		win = 2;
-	tp->snd_ssthresh = win * tp->t_maxseg;
+	/*
+	 * If the current tcp cc module has
+	 * defined a hook for tasks to run
+	 * before entering FR, call it
+	 */
+	if (CC_ALGO(tp)->pre_fr != NULL)
+		CC_ALGO(tp)->pre_fr(tp, th);
 	ENTER_FASTRECOVERY(tp);
 	tp->snd_recover = tp->snd_max;
 	tp->t_timer[TCPT_REXMT] = 0;
@@ -505,10 +670,9 @@ tcp_reduce_congestion_window(
  */
 #if INET6
 int
-tcp6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	register struct mbuf *m = *mp;
 	struct in6_ifaddr *ia6;
 
@@ -519,20 +683,71 @@ tcp6_input(mp, offp)
 	 * better place to put this in?
 	 */
 	ia6 = ip6_getdstifaddr(m);
-	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {		
-		struct ip6_hdr *ip6;
+	if (ia6 != NULL) {
+		IFA_LOCK_SPIN(&ia6->ia_ifa);
+		if (ia6->ia6_flags & IN6_IFF_ANYCAST) {
+			struct ip6_hdr *ip6;
 
-		ip6 = mtod(m, struct ip6_hdr *);
-		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
+			IFA_UNLOCK(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
+			ip6 = mtod(m, struct ip6_hdr *);
+			icmp6_error(m, ICMP6_DST_UNREACH,
+			    ICMP6_DST_UNREACH_ADDR,
 			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
-		return IPPROTO_DONE;
+			return (IPPROTO_DONE);
+		}
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 	}
 
 	tcp_input(m, *offp);
-	return IPPROTO_DONE;
+	return (IPPROTO_DONE);
 }
 #endif
 
+/* A receiver will evaluate the flow of packets on a connection 
+ * to see if it can reduce ack traffic. The receiver will start 
+ * stretching acks if all of the following conditions are met:
+ * 1. tcp_delack_enabled is set to 3
+ * 2. If the bytes received in the last 100ms is greater than a threshold
+ *      defined by maxseg_unacked
+ * 3. If the connection has not been idle for tcp_maxrcvidle period.
+ * 4. If the connection has seen enough packets to let the slow-start 
+ *      finish after connection establishment or after some packet loss.
+ *
+ * The receiver will stop stretching acks if there is congestion/reordering
+ * as indicated by packets on reassembly queue or an ECN. If the delayed-ack 
+ * timer fires while stretching acks, it means that the packet flow has gone 
+ * below the threshold defined by maxseg_unacked and the receiver will stop
+ * stretching acks. The receiver gets no indication when slow-start is completed 
+ * or when the connection reaches an idle state. That is why we use 
+ * tcp_rcvsspktcnt to cover slow-start and tcp_maxrcvidle to identify idle 
+ * state.
+ */
+ static inline int
+ tcp_stretch_ack_enable(struct tcpcb *tp) {
+ 	if (tp->rcv_by_unackwin >= (maxseg_unacked * tp->t_maxseg) &&
+		TSTMP_GT(tp->rcv_unackwin + tcp_maxrcvidle, tcp_now) &&
+		(((tp->t_flagsext & TF_RCVUNACK_WAITSS) == 0) ||
+		(tp->rcv_waitforss >= tcp_rcvsspktcnt))) {
+		return(1);
+	}
+	return(0);
+}
+
+/* Reset the state related to stretch-ack algorithm. This will make
+ * the receiver generate an ack every other packet. The receiver
+ * will start re-evaluating the rate at which packets come to decide 
+ * if it can benefit by lowering the ack traffic.
+ */
+void
+tcp_reset_stretch_ack(struct tcpcb *tp)
+{
+	tp->t_flags &= ~(TF_STRETCHACK);
+	tp->rcv_by_unackwin = 0;
+	tp->rcv_unackwin = tcp_now + tcp_rcvunackwin;
+}
+
 void
 tcp_input(m, off0)
 	struct mbuf *m;
@@ -565,7 +780,8 @@ tcp_input(m, off0)
 #endif
 	struct m_tag *fwd_tag;
 	u_char ip_ecn = IPTOS_ECN_NOTECT;
-	unsigned int ifscope;
+	unsigned int ifscope, nocell = 0;
+	uint8_t isconnected, isdisconnected;
 
 	/*
 	 * Record the interface where this segment arrived on; this does not
@@ -579,6 +795,11 @@ tcp_input(m, off0)
 	else
 		ifscope = IFSCOPE_NONE;
 
+        /* Since this is an entry point for input processing of tcp packets, we
+         * can update the tcp clock here.
+         */
+        calculate_tcp_clock();
+
 	/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
 	if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
 		fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID,
@@ -616,12 +837,29 @@ tcp_input(m, off0)
 		/* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
 		ip6 = mtod(m, struct ip6_hdr *);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
-		if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
-			tcpstat.tcps_rcvbadsum++;
-			goto dropnosock;
-		}
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
 
+		if ((apple_hwcksum_rx != 0) && (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
+			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+				th->th_sum = m->m_pkthdr.csum_data;
+			else
+				th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
+		    			&ip6->ip6_dst, htonl(sizeof(struct tcphdr)),
+		    			htonl(IPPROTO_TCP));
+
+			th->th_sum ^= 0xffff;
+			if (th->th_sum) {
+				tcpstat.tcps_rcvbadsum++;
+				goto dropnosock;
+			}
+		}
+		else {
+			if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
+				tcpstat.tcps_rcvbadsum++;
+				goto dropnosock;
+			}
+		}
+
 		KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport),
 		     (((ip6->ip6_src.s6_addr16[0]) << 16) | (ip6->ip6_dst.s6_addr16[0])),
 		     th->th_seq, th->th_ack, th->th_win);
@@ -637,6 +875,11 @@ tcp_input(m, off0)
 			/* XXX stat */
 			goto dropnosock;
 		}
+		DTRACE_TCP5(receive, sruct mbuf *, m, struct inpcb *, NULL,
+			struct ip6_hdr *, ip6, struct tcpcb *, NULL, 
+			struct tcphdr *, th);
+        
+	ip_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 	} else
 #endif /* INET6 */
 	{
@@ -662,6 +905,9 @@ tcp_input(m, off0)
 	th = (struct tcphdr *)((caddr_t)ip + off0);
 	tlen = ip->ip_len;
 
+	DTRACE_TCP5(receive, struct mbuf *, m, struct inpcb *, NULL,
+		struct ip *, ip, struct tcpcb *, NULL, struct tcphdr *, th);
+
 	KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport),
 		     (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)),
 		     th->th_seq, th->th_ack, th->th_win);
@@ -821,6 +1067,10 @@ tcp_input(m, off0)
 	 * Locate pcb for segment.
 	 */
 findpcb:
+
+	isconnected = FALSE;
+	isdisconnected = FALSE;
+
 #if IPFIREWALL_FORWARD
 	if (next_hop != NULL
 #if INET6
@@ -871,6 +1121,13 @@ findpcb:
 	 */
 	if (inp != NULL && (inp->inp_flags & INP_BOUND_IF))
 		ifscope = inp->inp_boundif;
+	/*
+	 * If the PCB is present and the socket isn't allowed to use
+	 * the cellular interface, indicate it as such for tcp_respond.
+	 */
+	if (inp != NULL && (inp->inp_flags & INP_NO_IFT_CELLULAR))
+		nocell = 1;
+
 #if IPSEC
 	if (ipsec_bypass == 0)  {
 #if INET6
@@ -981,7 +1238,7 @@ findpcb:
 		goto dropnosock;
 	}
 
-	tcp_lock(so, 1, (void *)2);
+	tcp_lock(so, 1, 0);
 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
 		tcp_unlock(so, 1, (void *)2);
 		inp = NULL;	// pretend we didn't find it 
@@ -1034,10 +1291,13 @@ findpcb:
 			struct inpcb *oinp = sotoinpcb(so);
 #endif /* INET6 */
 			unsigned int head_ifscope;
+			unsigned int head_nocell;
 
 			/* Get listener's bound-to-interface, if any */
 			head_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
 			    inp->inp_boundif : IFSCOPE_NONE;
+			/* Get listener's no-cellular information, if any */
+			head_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 
 			/*
 			 * If the state is LISTEN then ignore segment if it contains an RST.
@@ -1130,11 +1390,18 @@ findpcb:
 			if (isipv6 && !ip6_use_deprecated) {
 				struct in6_ifaddr *ia6;
 
-				if ((ia6 = ip6_getdstifaddr(m)) &&
-				    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
-					tp = NULL;
-					rstreason = BANDLIM_RST_OPENPORT;
-					goto dropwithreset;
+				ia6 = ip6_getdstifaddr(m);
+				if (ia6 != NULL) {
+					IFA_LOCK_SPIN(&ia6->ia_ifa);
+					if (ia6->ia6_flags & IN6_IFF_DEPRECATED) {
+						IFA_UNLOCK(&ia6->ia_ifa);
+						IFA_REMREF(&ia6->ia_ifa);
+						tp = NULL;
+						rstreason = BANDLIM_RST_OPENPORT;
+						goto dropwithreset;
+					}
+					IFA_UNLOCK(&ia6->ia_ifa);
+					IFA_REMREF(&ia6->ia_ifa);
 				}
 			}
 #endif
@@ -1203,14 +1470,16 @@ findpcb:
 			 * can only be set to a non-zero value earlier if
 			 * the listener has such a flag set.
 			 */
-#if INET6
-			if (head_ifscope != IFSCOPE_NONE && !isipv6) {
-#else
 			if (head_ifscope != IFSCOPE_NONE) {
-#endif /* INET6 */
 				inp->inp_flags |= INP_BOUND_IF;
 				inp->inp_boundif = head_ifscope;
 			}
+			/*
+			 * Inherit INP_NO_IFT_CELLULAR from listener.
+			 */
+			if (head_nocell) {
+				inp->inp_flags |= INP_NO_IFT_CELLULAR;
+			}
 #if INET6
 			if (isipv6)
 				inp->in6p_laddr = ip6->ip6_dst;
@@ -1277,8 +1546,11 @@ findpcb:
 			}
 #endif
 			/* inherit states from the listener */
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_LISTEN);
 			tp->t_state = TCPS_LISTEN;
 			tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY);
+			tp->t_flagsext |= (tp0->t_flagsext & TF_RXTFINDROP);
 			tp->t_keepinit = tp0->t_keepinit;
 			tp->t_inpcb->inp_ip_ttl = tp0->t_inpcb->inp_ip_ttl;
 
@@ -1300,9 +1572,8 @@ findpcb:
 			KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN | DBG_FUNC_END,0,0,0,0,0);
 		}
 	}
-#if 1
-	lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+	lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
  	/*
  	 * Radar 3529618
 	 * This is the second part of the MSS DoS prevention code (after
@@ -1329,13 +1600,9 @@ findpcb:
 	 * this check.
 	 *
 	 * Account for packet if payload packet, skip over ACK, etc.
-	 *
-	 * The packet per second count is done all the time and is also used
-	 * by "DELAY_ACK" to detect streaming situations.
-	 *
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED && tlen > 0) {
-		if (tp->rcv_reset > tcp_now) {
+		if (TSTMP_GT(tp->rcv_reset, tcp_now)) {
 			tp->rcv_pps++;
 			tp->rcv_byps += tlen + off;
 			if (tp->rcv_byps > tp->rcv_maxbyps)
@@ -1369,25 +1636,30 @@ findpcb:
 			tp->rcv_pps = 1;
 			tp->rcv_byps = tlen + off;
 		}
+
+		/* Evaluate the rate of arrival of packets to see if the 
+		 * receiver can reduce the ack traffic. The algorithm to 
+		 * stretch acks will be enabled if the connection meets 
+		 * certain criteria defined in tcp_stretch_ack_enable function.
+		 */
+		if ((tp->t_flagsext & TF_RCVUNACK_WAITSS) != 0) {
+			tp->rcv_waitforss++;
+		}
+		if (tcp_stretch_ack_enable(tp)) {
+			tp->t_flags |= TF_STRETCHACK;
+			tp->t_flagsext &= ~(TF_RCVUNACK_WAITSS);
+			tp->rcv_waitforss = 0;
+		} else {
+			tp->t_flags &= ~(TF_STRETCHACK);
+		}
+		if (TSTMP_GT(tp->rcv_unackwin, tcp_now)) {
+			tp->rcv_by_unackwin += (tlen + off);
+		} else {
+			tp->rcv_unackwin = tcp_now + tcp_rcvunackwin;
+			tp->rcv_by_unackwin = tlen + off;
+		}
 	}
 	
-#if TRAFFIC_MGT
-	if (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_REGULATE) {
-		tcpstat.tcps_bg_rcvtotal++;
-
-		 /* Take snapshots of pkts recv;
-		  * tcpcb should have been initialized to 0 when allocated, 
-		  * so if 0 then this is the first time we're doing this
-		  */
-		if (!tp->tot_recv_snapshot) {
-			 tp->tot_recv_snapshot = tcpstat.tcps_rcvtotal;
-		 }
-		if (!tp->bg_recv_snapshot) {
-			 tp->bg_recv_snapshot = tcpstat.tcps_bg_rcvtotal;
-		 }
-	}
-#endif /* TRAFFIC_MGT */
-
 	/*
 	   Explicit Congestion Notification - Flag that we need to send ECT if
 		+ The IP Congestion experienced flag was set.
@@ -1413,14 +1685,22 @@ findpcb:
 	if ((thflags & TH_CWR) == TH_CWR) {
 		tp->ecn_flags &= ~TE_SENDECE;
 	}
+
+	/* If we received an  explicit notification of congestion in 
+	 * ip tos ecn bits or by the CWR bit in TCP header flags, reset
+	 * the ack-strteching state.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags & TF_STRETCHACK) != 0 &&
+		((ip_ecn == IPTOS_ECN_CE) || ((thflags & TH_CWR) == TH_CWR)))
+		tcp_reset_stretch_ack(tp);
 	
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 */
-	tp->t_rcvtime = 0;
+	tp->t_rcvtime = tcp_now;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
-		tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
+		tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp));
 
 	/*
 	 * Process options if not in LISTEN state,
@@ -1451,6 +1731,64 @@ findpcb:
 		}
 	}
 
+#if TRAFFIC_MGT
+	/* Compute inter-packet arrival jitter. According to RFC 3550, inter-packet 
+	 * arrival jitter is defined as the difference in packet spacing at the 
+	 * receiver compared to the sender for a pair of packets. When two packets 
+	 * of maximum segment size come one after the other with consecutive 
+	 * sequence numbers, we consider them as packets sent together at the 
+	 * sender and use them as a pair to compute inter-packet arrival jitter.
+	 * This metric indicates the delay induced by the network components due
+	 * to queuing in edge/access routers.
+	 */
+	if (tp->t_state == TCPS_ESTABLISHED &&
+	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK|TH_ECE|TH_PUSH)) == TH_ACK &&
+	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+	    ((to.to_flags & TOF_TS) == 0 ||
+            TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
+	    th->th_seq == tp->rcv_nxt &&
+	    LIST_EMPTY(&tp->t_segq)) {
+		if (tp->iaj_pktcnt <= IAJ_IGNORE_PKTCNT) {
+			tp->iaj_pktcnt++;
+		}
+
+		if ( tp->iaj_size == 0 || tlen > tp->iaj_size ||
+			(tlen == tp->iaj_size && tp->iaj_rcv_ts == 0)) {
+			/* State related to inter-arrival jitter is uninitialized 
+			 * or we are trying to find a good first packet to start 
+			 * computing the metric
+			 */
+			update_iaj_state(tp, tlen, 0);
+		} else {
+			if (tlen == tp->iaj_size) {
+				/* Compute inter-arrival jitter taking this packet 
+				 * as the second packet
+				 */
+				compute_iaj(tp);
+			} 
+			if (tlen  < tp->iaj_size) {
+				/* There is a smaller packet in the stream.
+				 * Some times the maximum size supported on a path can 
+				 * change if there is a new link with smaller MTU. 
+				 * The receiver will not know about this change. 
+				 * If there are too many packets smaller than iaj_size, 
+				 * we try to learn the iaj_size again.
+				 */
+				tp->iaj_small_pkt++;
+				if (tp->iaj_small_pkt > RESET_IAJ_SIZE_THRESH) {
+					update_iaj_state(tp, tlen, 1);
+				} else {
+					clear_iaj_state(tp);
+				}
+			} else {
+				update_iaj_state(tp, tlen, 0);
+			}
+		}
+	} else {
+		clear_iaj_state(tp);
+	}
+#endif /* TRAFFIC_MGT */
+
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
@@ -1498,11 +1836,10 @@ findpcb:
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    tp->snd_cwnd >= tp->snd_ssthresh &&
-			    ((!tcp_do_newreno && !tp->sack_enable &&
-			      tp->t_dupacks < tcprexmtthresh) ||
-			     ((tcp_do_newreno || tp->sack_enable) &&
-			      !IN_FASTRECOVERY(tp) && to.to_nsacks == 0 &&
-			      TAILQ_EMPTY(&tp->snd_holes)))) {
+			    (!IN_FASTRECOVERY(tp) && 
+			    ((!tp->sack_enable && tp->t_dupacks < tcprexmtthresh) ||
+			     (tp->sack_enable && to.to_nsacks == 0 &&
+			      TAILQ_EMPTY(&tp->snd_holes))))) {
 				/*
 				 * this is a pure ack for outstanding data.
 				 */
@@ -1511,7 +1848,7 @@ findpcb:
 				 * "bad retransmit" recovery
 				 */
 				if (tp->t_rxtshift == 1 &&
-				    tcp_now < tp->t_badrxtwin) {
+				    TSTMP_LT(tcp_now, tp->t_badrxtwin)) {
 					++tcpstat.tcps_sndrexmitbad;
 					tp->snd_cwnd = tp->snd_cwnd_prev;
 					tp->snd_ssthresh =
@@ -1521,6 +1858,11 @@ findpcb:
 					    ENTER_FASTRECOVERY(tp);
 					tp->snd_nxt = tp->snd_max;
 					tp->t_badrxtwin = 0;
+					tp->t_rxtshift = 0;
+					tp->rxt_start = 0;
+					DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
+						struct tcpcb *, tp, struct tcphdr *, th,
+						int32_t, TCP_CC_BAD_REXMT_RECOVERY);
 				}
 				/*
 				 * Recalculate the transmit timer / rtt.
@@ -1530,33 +1872,29 @@ findpcb:
 				 * timestamps of 0 or we could calculate a
 				 * huge RTT and blow up the retransmit timer.
 				 */
-				if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0)) { /* Makes sure we already have a TS */
-					if (!tp->t_rttlow ||
-					    tp->t_rttlow > tcp_now - to.to_tsecr)
-						tp->t_rttlow = tcp_now - to.to_tsecr;
+				if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0) &&
+					TSTMP_GEQ(tcp_now, to.to_tsecr)) { 
 					tcp_xmit_timer(tp,
 					    tcp_now - to.to_tsecr);
 				} else if (tp->t_rtttime &&
 					    SEQ_GT(th->th_ack, tp->t_rtseq)) {
-					if (!tp->t_rttlow ||
-					    tp->t_rttlow > tcp_now - tp->t_rtttime)
-						tp->t_rttlow = tcp_now - tp->t_rtttime;
-					tcp_xmit_timer(tp, tp->t_rtttime);
+					tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
 				}
 				acked = th->th_ack - tp->snd_una;
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
-				/*
-				 * Grow the congestion window, if the
-				 * connection is cwnd bound.
+				
+				/* Handle an ack that is in sequence during congestion
+				 * avoidance phase. The calculations in this function 
+				 * assume that snd_una is not updated yet. 
 				 */
-			    	if (tp->snd_cwnd < tp->snd_wnd) {
-					tp->t_bytes_acked += acked;
-					if (tp->t_bytes_acked > tp->snd_cwnd) {
-						tp->t_bytes_acked -= tp->snd_cwnd;
-						tp->snd_cwnd += tp->t_maxseg;
-					}
-				}
+				if (CC_ALGO(tp)->inseq_ack_rcvd != NULL)
+					CC_ALGO(tp)->inseq_ack_rcvd(tp, th);
+
+				DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, struct tcphdr *, th,
+					int32_t, TCP_CC_INSEQ_ACK_RCVD);
+
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
@@ -1583,13 +1921,14 @@ findpcb:
 				if (tp->snd_una == tp->snd_max)
 					tp->t_timer[TCPT_REXMT] = 0;
 				else if (tp->t_timer[TCPT_PERSIST] == 0)
-					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+					tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
 				sowwakeup(so); /* has to be done with socket lock held */
 				if ((so->so_snd.sb_cc) || (tp->t_flags & TF_ACKNOW)) {
-			    		tp->t_unacksegs = 0;
 					(void) tcp_output(tp);
 				}
+
+				tcp_check_timer_state(tp);
 				tcp_unlock(so, 1, 0);
 				KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 				return;
@@ -1619,10 +1958,15 @@ findpcb:
 			tp->rcv_up = tp->rcv_nxt;
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
+			if (nstat_collect) {
+				locked_add_64(&inp->inp_stat->rxpackets, 1);
+				locked_add_64(&inp->inp_stat->rxbytes, tlen);
+			}
 			ND6_HINT(tp);	/* some progress has been done */
 			/*
 			 * Add data to socket buffer.
 			 */
+			so_recv_data_stat(so, m, 0);
 			m_adj(m, drop_hdrlen);	/* delayed header drop */
 			if (sbappendstream(&so->so_rcv, m))
 				sorwakeup(so);
@@ -1639,14 +1983,17 @@ findpcb:
 		     			(((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)),
 			     		th->th_seq, th->th_ack, th->th_win); 
 			}
-			if (DELAY_ACK(tp))  {
-			    	tp->t_flags |= TF_DELACK;
+			if (DELAY_ACK(tp, th))  {
+				if ((tp->t_flags & TF_DELACK) == 0) {
+			    		tp->t_flags |= TF_DELACK;
+					tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
+				}
 			    	tp->t_unacksegs++;
 			} else {
-			    	tp->t_unacksegs = 0;
 				tp->t_flags |= TF_ACKNOW;
 				tcp_output(tp);
 			}
+			tcp_check_timer_state(tp);
 			tcp_unlock(so, 1, 0);
 			KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 			return;
@@ -1659,9 +2006,8 @@ findpcb:
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
-#if 1
-	lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+	lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
 	{ int win;
 
 	win = tcp_sbspace(tp);
@@ -1692,9 +2038,7 @@ findpcb:
 		register struct sockaddr_in6 *sin6;
 #endif
 
-#if 1
-		lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 #if INET6
 		if (isipv6) {
 			MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
@@ -1719,9 +2063,7 @@ findpcb:
 		} else
 #endif
 	    {
-#if 0
-			lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+			lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 			MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
 		       M_NOWAIT);
 			if (sin == NULL)
@@ -1734,7 +2076,7 @@ findpcb:
 			laddr = inp->inp_laddr;
 			if (inp->inp_laddr.s_addr == INADDR_ANY)
 				inp->inp_laddr = ip->ip_dst;
-			if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0)) {
+			if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0, NULL)) {
 				inp->inp_laddr = laddr;
 				FREE(sin, M_SONAME);
 				goto drop;
@@ -1768,9 +2110,15 @@ findpcb:
 		tp->snd_wnd = tiwin;	/* initial send-window */
 		tp->t_flags |= TF_ACKNOW;
 		tp->t_unacksegs = 0;
+		DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+			struct tcpcb *, tp, int32_t, TCPS_SYN_RECEIVED);
 		tp->t_state = TCPS_SYN_RECEIVED;
-		tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit;
+		tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 
+			tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
 		dropsocket = 0;		/* committed to socket */
+
+		/* reset the incomp processing flag */
+		so->so_flags &= ~(SOF_INCOMP_INPROGRESS);
 		tcpstat.tcps_accepts++;
 		if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE | TH_CWR)) {
 			/* ECN-setup SYN */
@@ -1860,13 +2208,15 @@ findpcb:
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
-			if (DELAY_ACK(tp) && tlen != 0) {
-				tp->t_flags |= TF_DELACK;
+			if (DELAY_ACK(tp, th) && tlen != 0) {
+				if ((tp->t_flags & TF_DELACK) == 0) {
+					tp->t_flags |= TF_DELACK;
+					tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
+				}
 			    	tp->t_unacksegs++;
 			}
 			else {
 				tp->t_flags |= TF_ACKNOW;
-			    	tp->t_unacksegs = 0;
 			}
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
@@ -1874,30 +2224,35 @@ findpcb:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
-			tp->t_starttime = 0;
+			tp->t_starttime = tcp_now;
 			if (tp->t_flags & TF_NEEDFIN) {
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
 				tp->t_state = TCPS_FIN_WAIT_1;
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED);
 				tp->t_state = TCPS_ESTABLISHED;
-				tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
+				tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp));
+				if (nstat_collect)
+					nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt);
 			}
-			/* soisconnected may lead to socket_unlock in case of upcalls,
-			 * make sure this is done when everything is setup.
-			 */
-			soisconnected(so);
+			isconnected = TRUE;
 		} else {
-		/*
-		 *  Received initial SYN in SYN-SENT[*] state => simul-
-		 *  taneous open.  If segment contains CC option and there is
-		 *  a cached CC, apply TAO test; if it succeeds, connection is
-		 *  half-synchronized.  Otherwise, do 3-way handshake:
-		 *        SYN-SENT -> SYN-RECEIVED
-		 *        SYN-SENT* -> SYN-RECEIVED*
-		 */
+			/*
+			 *  Received initial SYN in SYN-SENT[*] state => simul-
+		  	 *  taneous open.  If segment contains CC option and there is
+			 *  a cached CC, apply TAO test; if it succeeds, connection is
+			 *  half-synchronized.  Otherwise, do 3-way handshake:
+			 *        SYN-SENT -> SYN-RECEIVED
+			 *        SYN-SENT* -> SYN-RECEIVED*
+			 */
 			tp->t_flags |= TF_ACKNOW;
 			tp->t_timer[TCPT_REXMT] = 0;
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_SYN_RECEIVED);
 			tp->t_state = TCPS_SYN_RECEIVED;
 
 		}
@@ -2044,6 +2399,8 @@ trimthenstep6:
 				so->so_error = ECONNRESET;
 			close:
 				postevent(so, 0, EV_RESET);
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_CLOSED);
 				tp->t_state = TCPS_CLOSED;
 				tcpstat.tcps_drops++;
 				tp = tcp_close(tp);
@@ -2061,10 +2418,6 @@ trimthenstep6:
 		goto drop;
 	}
 
-#if 0
-	lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
-
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
@@ -2090,6 +2443,12 @@ trimthenstep6:
 			tcpstat.tcps_rcvduppack++;
 			tcpstat.tcps_rcvdupbyte += tlen;
 			tcpstat.tcps_pawsdrop++;
+			if (nstat_collect) {
+				nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, tlen, NSTAT_RX_FLAG_DUPLICATE);
+				locked_add_64(&inp->inp_stat->rxpackets, 1);
+				locked_add_64(&inp->inp_stat->rxbytes, tlen);
+				tp->t_stat.rxduplicatebytes += tlen;
+			}
 			if (tlen)
 				goto dropafterack;
 			goto drop;
@@ -2136,7 +2495,6 @@ trimthenstep6:
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
-			tp->t_unacksegs = 0;
 			todrop = tlen;
 			tcpstat.tcps_rcvduppack++;
 			tcpstat.tcps_rcvdupbyte += todrop;
@@ -2144,6 +2502,12 @@ trimthenstep6:
 			tcpstat.tcps_rcvpartduppack++;
 			tcpstat.tcps_rcvpartdupbyte += todrop;
 		}
+		if (nstat_collect) {
+			nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, todrop, NSTAT_RX_FLAG_DUPLICATE);
+			locked_add_64(&inp->inp_stat->rxpackets, 1);
+			locked_add_64(&inp->inp_stat->rxbytes, todrop);
+			tp->t_stat.rxduplicatebytes += todrop;
+		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
@@ -2199,7 +2563,6 @@ trimthenstep6:
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
-			    	tp->t_unacksegs = 0;
 				tcpstat.tcps_rcvwinprobe++;
 			} else
 				goto dropafterack;
@@ -2281,19 +2644,27 @@ trimthenstep6:
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->snd_scale = tp->requested_s_scale;
 			tp->rcv_scale = tp->request_r_scale;
+			tp->snd_wnd = th->th_win << tp->snd_scale;
+			tiwin = tp->snd_wnd;
 		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
-		tp->t_starttime = 0;
+		tp->t_starttime = tcp_now;
 		if (tp->t_flags & TF_NEEDFIN) {
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_1);
 			tp->t_state = TCPS_FIN_WAIT_1;
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED);
 			tp->t_state = TCPS_ESTABLISHED;
-			tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
+			tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp));
+			if (nstat_collect)
+				nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt);
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
@@ -2306,10 +2677,7 @@ trimthenstep6:
 
 		/* FALLTHROUGH */
 
-		/* soisconnected may lead to socket_unlock in case of upcalls,
-		 * make sure this is done when everything is setup.
-		 */
-		soisconnected(so);
+		isconnected = TRUE;
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
@@ -2364,8 +2732,7 @@ trimthenstep6:
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
-					 ((tcp_do_newreno || tp->sack_enable) &&
-					  IN_FASTRECOVERY(tp))) {
+					  IN_FASTRECOVERY(tp)) {
 					if (tp->sack_enable && IN_FASTRECOVERY(tp)) {
 						int awnd;
 						
@@ -2384,12 +2751,15 @@ trimthenstep6:
 						}
 					} else
 						tp->snd_cwnd += tp->t_maxseg;
-					tp->t_unacksegs = 0;
+
+					DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+						struct tcpcb *, tp, struct tcphdr *, th,
+						int32_t, TCP_CC_IN_FASTRECOVERY);
+
 					(void) tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
 					tcp_seq onxt = tp->snd_nxt;
-					u_int win;
 
 					/*
 					 * If we're doing sack, check to
@@ -2403,18 +2773,21 @@ trimthenstep6:
 							tp->t_dupacks = 0;
 							break;
 						}
-					} else if (tcp_do_newreno) {
+					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
-					win = min(tp->snd_wnd, tp->snd_cwnd) /
-					    2 / tp->t_maxseg;
-					if (win < 2)
-						win = 2;
-					tp->snd_ssthresh = win * tp->t_maxseg;
+					
+					/*
+					 * If the current tcp cc module has 
+					 * defined a hook for tasks to run
+					 * before entering FR, call it
+					 */
+					if (CC_ALGO(tp)->pre_fr != NULL)
+						CC_ALGO(tp)->pre_fr(tp, th);
 					ENTER_FASTRECOVERY(tp);
 					tp->snd_recover = tp->snd_max;
 					tp->t_timer[TCPT_REXMT] = 0;
@@ -2424,18 +2797,24 @@ trimthenstep6:
 						tcpstat.tcps_sack_recovery_episode++;
 						tp->sack_newdata = tp->snd_nxt;
 						tp->snd_cwnd = tp->t_maxseg;
-						tp->t_unacksegs = 0;
+
+						DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+							struct tcpcb *, tp, struct tcphdr *, th,
+							int32_t, TCP_CC_ENTER_FASTRECOVERY);
+
 						(void) tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = tp->t_maxseg;
-					tp->t_unacksegs = 0;
 					(void) tcp_output(tp);
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     tp->t_maxseg * tp->t_dupacks;
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
+					DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+						struct tcpcb *, tp, struct tcphdr *, th,
+						int32_t, TCP_CC_ENTER_FASTRECOVERY);
 					goto drop;
 				}
 			} else
@@ -2446,69 +2825,33 @@ trimthenstep6:
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
-		if (!IN_FASTRECOVERY(tp)) {
+		if (IN_FASTRECOVERY(tp)) {
+			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+				if (tp->sack_enable)
+					tcp_sack_partialack(tp, th);
+				else
+					tcp_newreno_partial_ack(tp, th);			
+				
+				DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, struct tcphdr *, th,
+					int32_t, TCP_CC_PARTIAL_ACK);
+			} else {
+				EXIT_FASTRECOVERY(tp);
+				if (CC_ALGO(tp)->post_fr != NULL)
+					CC_ALGO(tp)->post_fr(tp, th);
+				tp->t_dupacks = 0;
+
+				DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, struct tcphdr *, th,
+					int32_t, TCP_CC_EXIT_FASTRECOVERY);
+			}
+		} else {
 			/*
-			 * We were not in fast recovery.  Reset the duplicate ack
+			 * We were not in fast recovery. Reset the duplicate ack
 			 * counter.
 			 */
 			tp->t_dupacks = 0;
 		}
-		/*
-		 * If the congestion window was inflated to account
-		 * for the other side's cached packets, retract it.
-		 */
-		else {
-			if (tcp_do_newreno || tp->sack_enable) {
-				if (SEQ_LT(th->th_ack, tp->snd_recover)) {
-					if (tp->sack_enable)
-						tcp_sack_partialack(tp, th);
-					else
-						tcp_newreno_partial_ack(tp, th);			
-				}
-				else {
-					if (tcp_do_newreno) {
-						int32_t ss = tp->snd_max - th->th_ack;
-	
-						/*
-						 * Complete ack.  Inflate the congestion window to
-						 * ssthresh and exit fast recovery.
-						 *
-						 * Window inflation should have left us with approx.
-						 * snd_ssthresh outstanding data.  But in case we
-						 * would be inclined to send a burst, better to do
-						 * it via the slow start mechanism.
-						 */
-						if (ss < tp->snd_ssthresh)
-							tp->snd_cwnd = ss + tp->t_maxseg;
-						else
-							tp->snd_cwnd = tp->snd_ssthresh;
-					}
-					else {
-						/*
-						 * Clamp the congestion window to the crossover point
-						 * and exit fast recovery.
-						 */
-						if (tp->snd_cwnd > tp->snd_ssthresh)
-							tp->snd_cwnd = tp->snd_ssthresh;					
-					}
-	
-					EXIT_FASTRECOVERY(tp);
-					tp->t_dupacks = 0;
-					tp->t_bytes_acked = 0;
-				}
-			}
-			else {
-				/*
-				 * Clamp the congestion window to the crossover point
-				 * and exit fast recovery in non-newreno and non-SACK case.
-				 */
-				if (tp->snd_cwnd > tp->snd_ssthresh)
-					tp->snd_cwnd = tp->snd_ssthresh;					
-				EXIT_FASTRECOVERY(tp);
-				tp->t_dupacks = 0;
-				tp->t_bytes_acked = 0;
-			}
-		}
 
 
 		/*
@@ -2545,7 +2888,8 @@ process_ACK:
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
-		if (tp->t_rxtshift == 1 && tcp_now < tp->t_badrxtwin) {
+		if (tp->t_rxtshift == 1 && 
+			TSTMP_LT(tcp_now, tp->t_badrxtwin)) {
 			++tcpstat.tcps_sndrexmitbad;
 			tp->snd_cwnd = tp->snd_cwnd_prev;
 			tp->snd_ssthresh = tp->snd_ssthresh_prev;
@@ -2554,6 +2898,12 @@ process_ACK:
 				ENTER_FASTRECOVERY(tp);
 			tp->snd_nxt = tp->snd_max;
 			tp->t_badrxtwin = 0;	/* XXX probably not required */ 
+			tp->t_rxtshift = 0;
+			tp->rxt_start = 0;
+
+			DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, struct tcphdr *, th,
+				int32_t, TCP_CC_BAD_REXMT_RECOVERY);
 		}
 
 		/*
@@ -2571,14 +2921,11 @@ process_ACK:
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
-		if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0)) {
-			if (!tp->t_rttlow || tp->t_rttlow > tcp_now - to.to_tsecr)
-				tp->t_rttlow = tcp_now - to.to_tsecr;
+		if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0) &&
+			TSTMP_GEQ(tcp_now, to.to_tsecr)) {
 			tcp_xmit_timer(tp, tcp_now - to.to_tsecr);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
-			if (!tp->t_rttlow || tp->t_rttlow > tcp_now - tp->t_rtttime)
-				tp->t_rttlow = tcp_now - tp->t_rtttime;
-			tcp_xmit_timer(tp, tp->t_rtttime);
+			tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
 		}
 
 		/*
@@ -2591,7 +2938,7 @@ process_ACK:
 			tp->t_timer[TCPT_REXMT] = 0;
 			needoutput = 1;
 		} else if (tp->t_timer[TCPT_PERSIST] == 0)
-			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+			tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
@@ -2600,78 +2947,35 @@ process_ACK:
 		if (acked == 0)
 			goto step6;
 
-		/*
-		 * When new data is acked, open the congestion window.
-		 */
 		if ((thflags & TH_ECE) != 0 &&
 			(tp->ecn_flags & TE_SETUPSENT) != 0) {
 			/*
 			 * Reduce the congestion window if we haven't done so.
 			 */
-			if (!(tp->sack_enable && IN_FASTRECOVERY(tp)) &&
-				!(tcp_do_newreno && SEQ_LEQ(th->th_ack, tp->snd_recover))) {
-				tcp_reduce_congestion_window(tp);
+			if (!tp->sack_enable && !IN_FASTRECOVERY(tp) &&
+				SEQ_GEQ(th->th_ack, tp->snd_recover)) {
+				tcp_reduce_congestion_window(tp, th);
+				DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, struct tcphdr *, th, 
+					int32_t, TCP_CC_ECN_RCVD);
 			}
-		} else if ((!tcp_do_newreno && !tp->sack_enable) ||
-		    !IN_FASTRECOVERY(tp)) {
-			/*
-			 * RFC 3465 - Appropriate Byte Counting.
-			 *
-			 * If the window is currently less than ssthresh,
-			 * open the window by the number of bytes ACKed by
-			 * the last ACK, however clamp the window increase
-			 * to an upper limit "L".
-			 *
-			 * In congestion avoidance phase, open the window by
-			 * one segment each time "bytes_acked" grows to be
-			 * greater than or equal to the congestion window.
-			 */
-
-			register u_int cw = tp->snd_cwnd;
-			register u_int incr = tp->t_maxseg;
-
-			if (tcp_do_rfc3465) {
-
-				if (cw >= tp->snd_ssthresh) {
-					tp->t_bytes_acked += acked;
-					if (tp->t_bytes_acked >= cw) {
-					/* Time to increase the window. */
-						tp->t_bytes_acked -= cw;
-					} else {
-					/* No need to increase yet. */
-						incr = 0;
-					}
-				} else {
-					/*
-					 * If the user explicitly enables RFC3465
-					 * use 2*SMSS for the "L" param.  Otherwise
-					 * use the more conservative 1*SMSS.
-					 *
-					 * (See RFC 3465 2.3 Choosing the Limit)
-					 */
-					u_int abc_lim;
-
-					abc_lim = (tcp_do_rfc3465_lim2 &&
-						tp->snd_nxt == tp->snd_max) ? incr * 2 : incr;
-
-					incr = lmin(acked, abc_lim);
-				}
-			}
-			else {
-				/*
-  				 * If the window gives us less than ssthresh packets
-			   	 * in flight, open exponentially (segsz per packet).
-				 * Otherwise open linearly: segsz per window
-				 * (segsz^2 / cwnd per packet).
-				 */
-		
-					if (cw >= tp->snd_ssthresh) {
-						incr = max((incr * incr / cw), 1);
-					}
-			}
-
+		}
 
-			tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+		/*
+		 * When new data is acked, open the congestion window.
+		 * The specifics of how this is achieved are up to the
+		 * congestion control algorithm in use for this connection.
+		 *
+		 * The calculations in this function assume that snd_una is
+		 * not updated yet.
+		 */
+		if (!IN_FASTRECOVERY(tp)) {
+			if (CC_ALGO(tp)->ack_rcvd != NULL)
+				CC_ALGO(tp)->ack_rcvd(tp, th);
+			
+			DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, struct tcphdr *, th,
+				int32_t, TCP_CC_ACK_RCVD);
 		}
 		if (acked > so->so_snd.sb_cc) {
 			tp->snd_wnd -= so->so_snd.sb_cc;
@@ -2683,15 +2987,15 @@ process_ACK:
 			ourfinisacked = 0;
 		}
 		/* detect una wraparound */
-		if ((tcp_do_newreno || tp->sack_enable) &&
-		    !IN_FASTRECOVERY(tp) &&
+		if ( !IN_FASTRECOVERY(tp) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
-		if ((tcp_do_newreno || tp->sack_enable) &&
-		    IN_FASTRECOVERY(tp) &&
+
+		if (IN_FASTRECOVERY(tp) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover))
 			EXIT_FASTRECOVERY(tp);
+
 		tp->snd_una = th->th_ack;
 		if (tp->sack_enable) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
@@ -2723,10 +3027,12 @@ process_ACK:
 				 * we'll hang forever.
 				 */
 				if (so->so_state & SS_CANTRCVMORE) {
-					tp->t_timer[TCPT_2MSL] = tcp_maxidle;
-					add_to_time_wait(tp);
-					soisdisconnected(so);
+					add_to_time_wait(tp, tcp_maxidle);
+					isconnected = FALSE;
+					isdisconnected = TRUE;
 				}
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_2);
 				tp->t_state = TCPS_FIN_WAIT_2;
 				/* fall through and make sure we also recognize data ACKed with the FIN */
 			}
@@ -2741,17 +3047,18 @@ process_ACK:
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_TIME_WAIT);
 				tp->t_state = TCPS_TIME_WAIT;
 				tcp_canceltimers(tp);
 				/* Shorten TIME_WAIT [RFC-1644, p.28] */
 				if (tp->cc_recv != 0 &&
-				    tp->t_starttime < (u_int32_t)tcp_msl)
-					tp->t_timer[TCPT_2MSL] =
-					    tp->t_rxtcur * TCPTV_TWTRUNC;
+				    ((int)(tcp_now - tp->t_starttime)) < tcp_msl)
+					add_to_time_wait(tp, tp->t_rxtcur * TCPTV_TWTRUNC);
 				else
-					tp->t_timer[TCPT_2MSL] = 2 * tcp_msl;
-				add_to_time_wait(tp);
-				soisdisconnected(so);
+					add_to_time_wait(tp, 2 * tcp_msl);
+				isconnected = FALSE;
+				isdisconnected = TRUE;
 			}
 			tp->t_flags |= TF_ACKNOW;
 			break;
@@ -2775,8 +3082,7 @@ process_ACK:
 		 * it and restart the finack timer.
 		 */
 		case TCPS_TIME_WAIT:
-			tp->t_timer[TCPT_2MSL] = 2 * tcp_msl;
-			add_to_time_wait(tp);
+			add_to_time_wait(tp, 2 * tcp_msl);
 			goto dropafterack;
 		}
 	}
@@ -2856,7 +3162,7 @@ step6:
 		     )
 			tcp_pulloutofband(so, th, m,
 				drop_hdrlen);	/* hdr drop is delayed */
-	} else
+	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
@@ -2864,8 +3170,27 @@ step6:
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
-dodata:							/* XXX */
+	}
+dodata:
 
+	/* Set socket's connect or disconnect state correcly before doing data.
+	 * The following might unlock the socket if there is an upcall or a socket
+	 * filter.
+	 */
+	if (isconnected) {
+		soisconnected(so);
+	} else if (isdisconnected) {
+		soisdisconnected(so);
+	}
+
+	/* Let's check the state of pcb just to make sure that it did not get closed 
+	 * when we unlocked above
+	 */
+	if (inp->inp_state == INPCB_STATE_DEAD) {
+		/* Just drop the packet that we are processing and return */
+		goto drop;
+	}
+	
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
@@ -2894,25 +3219,31 @@ dodata:							/* XXX */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
-			if (DELAY_ACK(tp) && ((tp->t_flags & TF_ACKNOW) == 0)) {
-				tp->t_flags |= TF_DELACK;
+			if (DELAY_ACK(tp, th) && ((tp->t_flags & TF_ACKNOW) == 0)) {
+				if ((tp->t_flags & TF_DELACK) == 0) {
+					tp->t_flags |= TF_DELACK;
+					tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
+				}
 			    	tp->t_unacksegs++;
 			}         
 			else {
-			    	tp->t_unacksegs = 0;
 				tp->t_flags |= TF_ACKNOW;
 			}
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
+			if (nstat_collect) {
+				locked_add_64(&inp->inp_stat->rxpackets, 1);
+				locked_add_64(&inp->inp_stat->rxbytes, tlen);
+			}
 			ND6_HINT(tp);
+			so_recv_data_stat(so, m, drop_hdrlen);
 			if (sbappendstream(&so->so_rcv, m))
 				sorwakeup(so);
 		} else {
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
-			tp->t_unacksegs = 0;
 		}
 
 		if (tlen > 0 && tp->sack_enable)
@@ -2965,13 +3296,15 @@ dodata:							/* XXX */
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
-			if (DELAY_ACK(tp) && (tp->t_flags & TF_NEEDSYN)) {
-				tp->t_flags |= TF_DELACK;
+			if (DELAY_ACK(tp, th) && (tp->t_flags & TF_NEEDSYN)) {
+				if ((tp->t_flags & TF_DELACK) == 0) {
+					tp->t_flags |= TF_DELACK;
+					tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack);
+				}
 			    	tp->t_unacksegs++;
 			}
 			else {
 				tp->t_flags |= TF_ACKNOW;
-			    	tp->t_unacksegs = 0;
 			}
 			tp->rcv_nxt++;
 		}
@@ -2982,8 +3315,10 @@ dodata:							/* XXX */
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
-			tp->t_starttime = 0;
+			tp->t_starttime = tcp_now;
 		case TCPS_ESTABLISHED:
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_CLOSE_WAIT);
 			tp->t_state = TCPS_CLOSE_WAIT;
 			break;
 
@@ -2992,6 +3327,8 @@ dodata:							/* XXX */
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_CLOSING);
 			tp->t_state = TCPS_CLOSING;
 			break;
 
@@ -3001,21 +3338,20 @@ dodata:							/* XXX */
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
+			DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+				struct tcpcb *, tp, int32_t, TCPS_TIME_WAIT);
 			tp->t_state = TCPS_TIME_WAIT;
 			tcp_canceltimers(tp);
 			/* Shorten TIME_WAIT [RFC-1644, p.28] */
 			if (tp->cc_recv != 0 &&
-			    tp->t_starttime < (u_int32_t)tcp_msl) {
-				tp->t_timer[TCPT_2MSL] =
-				    tp->t_rxtcur * TCPTV_TWTRUNC;
+				((int)(tcp_now - tp->t_starttime)) < tcp_msl) {
+				add_to_time_wait(tp, tp->t_rxtcur * TCPTV_TWTRUNC);
 				/* For transaction client, force ACK now. */
 				tp->t_flags |= TF_ACKNOW;
 			    	tp->t_unacksegs = 0;
 			}
 			else
-				tp->t_timer[TCPT_2MSL] = 2 * tcp_msl;
-
-			add_to_time_wait(tp);
+				add_to_time_wait(tp, 2 * tcp_msl);
 			soisdisconnected(so);
 			break;
 
@@ -3023,8 +3359,7 @@ dodata:							/* XXX */
 		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
 		 */
 		case TCPS_TIME_WAIT:
-			tp->t_timer[TCPT_2MSL] = 2 * tcp_msl;
-			add_to_time_wait(tp);
+			add_to_time_wait(tp, 2 * tcp_msl);
 			break;
 		}
 	}
@@ -3038,9 +3373,12 @@ dodata:							/* XXX */
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW)) {
-		tp->t_unacksegs = 0;
 		(void) tcp_output(tp);
 	}
+
+	tcp_check_timer_state(tp);
+
+	
 	tcp_unlock(so, 1, 0);
 	KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 	return;
@@ -3074,8 +3412,9 @@ dropafterack:
 #endif
 	m_freem(m);
 	tp->t_flags |= TF_ACKNOW;
-    	tp->t_unacksegs = 0;
 	(void) tcp_output(tp);
+
+	/* Don't need to check timer state as we should have done it during tcp_output */
 	tcp_unlock(so, 1, 0);
 	KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 	return;
@@ -3119,22 +3458,22 @@ dropwithreset:
 	if (thflags & TH_ACK)
 		/* mtod() below is safe as long as hdr dropping is delayed */
 		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
-		    TH_RST, ifscope);
+		    TH_RST, ifscope, nocell);
 	else {
 		if (thflags & TH_SYN)
 			tlen++;
 		/* mtod() below is safe as long as hdr dropping is delayed */
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
-		    (tcp_seq)0, TH_RST|TH_ACK, ifscope);
+		    (tcp_seq)0, TH_RST|TH_ACK, ifscope, nocell);
 	}
 	/* destroy temporarily created socket */
 	if (dropsocket) {
 		(void) soabort(so); 
 		tcp_unlock(so, 1, 0);
 	}
-	else
-		if ((inp != NULL) && (nosock == 0))
-			tcp_unlock(so, 1, 0);
+	else if ((inp != NULL) && (nosock == 0)) {
+		tcp_unlock(so, 1, 0);
+	}
 	KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 	return;
 dropnosock:
@@ -3154,9 +3493,9 @@ drop:
 		(void) soabort(so); 
 		tcp_unlock(so, 1, 0);
 	}
-	else
-		if (nosock == 0)
-			tcp_unlock(so, 1, 0);
+	else if (nosock == 0) {
+		tcp_unlock(so, 1, 0);
+	}
 	KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0);
 	return;
 }
@@ -3301,6 +3640,36 @@ tcp_pulloutofband(so, th, m, off)
 	panic("tcp_pulloutofband");
 }
 
+uint32_t
+get_base_rtt(struct tcpcb *tp) 
+{
+	uint32_t base_rtt = 0, i;
+	for (i = 0; i < N_RTT_BASE; ++i) {
+		if (tp->rtt_hist[i] != 0 &&
+			(base_rtt == 0 || tp->rtt_hist[i] < base_rtt))
+			base_rtt = tp->rtt_hist[i];
+	}
+	return base_rtt;
+}
+
+/* Each value of RTT base represents the minimum RTT seen in a minute.
+ * We keep upto N_RTT_BASE minutes worth of history.
+ */
+void
+update_base_rtt(struct tcpcb *tp, uint32_t rtt)
+{
+	if (++tp->rtt_count >= rtt_samples_per_slot) {
+		int i=0;
+		for (i = (N_RTT_BASE-1); i > 0; --i) {
+			tp->rtt_hist[i] = tp->rtt_hist[i-1];
+		}
+		tp->rtt_hist[0] = rtt;
+		tp->rtt_count = 0;
+	} else {
+		tp->rtt_hist[0] = min(tp->rtt_hist[0], rtt);
+	}
+}
+
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
@@ -3314,15 +3683,26 @@ tcp_xmit_timer(tp, rtt)
 
 	tcpstat.tcps_rttupdated++;
 	tp->t_rttupdated++;
+
+	if (rtt > 0) {
+		tp->t_rttcur = rtt;
+		update_base_rtt(tp, rtt);
+	}
+
 	if (tp->t_srtt != 0) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
-		 * binary point (i.e., scaled by 8).  The following magic
+		 * binary point (i.e., scaled by 32).  The following magic
 		 * is equivalent to the smoothing algorithm in rfc793 with
 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
-		 * point).  Adjust rtt to origin 0.
+		 * point).
+		 *
+		 * Freebsd adjusts rtt to origin 0 by subtracting 1 from the provided
+		 * rtt value. This was required because of the way t_rtttime was
+		 * initiailised to 1 before. Since we changed t_rtttime to be based on
+		 * tcp_now, this extra adjustment is not needed.
 		 */
-		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
+		delta = (rtt << TCP_DELTA_SHIFT)
 			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		if ((tp->t_srtt += delta) <= 0)
@@ -3355,8 +3735,10 @@ tcp_xmit_timer(tp, rtt)
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
+	nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, tp->t_rttvar);
 	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
+	tp->rxt_start = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar.
@@ -3370,7 +3752,8 @@ tcp_xmit_timer(tp, rtt)
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
-		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+		max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX, 
+		TCP_ADD_REXMTSLOP(tp));
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
@@ -3471,12 +3854,14 @@ tcp_mss(tp, offer, input_ifscope)
 
 #if INET6
 	if (isipv6) {
-		rt = tcp_rtlookup6(inp);
+		rt = tcp_rtlookup6(inp, input_ifscope);
 		if (rt != NULL &&
 		    (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) ||
 		    IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) ||
-		    rt->rt_gateway->sa_family == AF_LINK))
-		         isnetlocal = TRUE;
+		    rt->rt_gateway->sa_family == AF_LINK ||
+		    in6_localaddr(&inp->in6p_faddr))) {
+			tp->t_flags |= TF_LOCAL;
+		}
 	}
 	else
 #endif /* INET6 */
@@ -3484,9 +3869,13 @@ tcp_mss(tp, offer, input_ifscope)
 		rt = tcp_rtlookup(inp, input_ifscope);
 		if (rt != NULL &&
 		    (rt->rt_gateway->sa_family == AF_LINK ||
-		    rt->rt_ifp->if_flags & IFF_LOOPBACK))
-		         isnetlocal = TRUE;
+		    rt->rt_ifp->if_flags & IFF_LOOPBACK ||
+		    in_localaddr(inp->inp_faddr))) {
+			tp->t_flags |= TF_LOCAL;
+		}
 	}
+	isnetlocal = (tp->t_flags & TF_LOCAL);
+
 	if (rt == NULL) {
 		tp->t_maxopd = tp->t_maxseg =
 #if INET6
@@ -3554,7 +3943,7 @@ tcp_mss(tp, offer, input_ifscope)
 		if (rt->rt_rmx.rmx_locks & RTV_RTT)
 			tp->t_rttmin = rtt / (RTM_RTTUNIT / TCP_RETRANSHZ);
 		else
-			tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCP_RETRANSHZ;
+			tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCPTV_REXMTMIN;
 		tp->t_srtt = rtt / (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTT_SCALE));
 		tcpstat.tcps_usedrtt++;
 		if (rt->rt_rmx.rmx_rttvar) {
@@ -3568,10 +3957,11 @@ tcp_mss(tp, offer, input_ifscope)
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 			      ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
-			      tp->t_rttmin, TCPTV_REXMTMAX);
+			      tp->t_rttmin, TCPTV_REXMTMAX, 
+			      TCP_ADD_REXMTSLOP(tp));
 	}
 	else
-		tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCP_RETRANSHZ;
+		tp->t_rttmin = isnetlocal ? tcp_TCPTV_MIN : TCPTV_REXMTMIN;
 
 #if INET6
 	mss = (isipv6 ? tcp_maxmtu6(rt) : tcp_maxmtu(rt));
@@ -3651,14 +4041,7 @@ tcp_mss(tp, offer, input_ifscope)
 		(void)sbreserve(&so->so_rcv, bufsize);
 	}
 
-	/*
-	 * Set the slow-start flight size depending on whether this
-	 * is a local network or not.
-	 */
-	if (isnetlocal)
-		tp->snd_cwnd = mss * ss_fltsz_local;
-	else 
-		tp->snd_cwnd = mss * ss_fltsz;
+	set_tcp_stream_priority(so);
 
 	if (rt->rt_rmx.rmx_ssthresh) {
 		/*
@@ -3673,6 +4056,17 @@ tcp_mss(tp, offer, input_ifscope)
 		tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	}
 
+
+	/*
+	 * Set the slow-start flight size depending on whether this
+	 * is a local network or not.
+	 */
+	if (CC_ALGO(tp)->cwnd_init != NULL)
+		CC_ALGO(tp)->cwnd_init(tp);
+
+	DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, struct tcpcb *, tp,
+		struct tcphdr *, NULL, int32_t, TCP_CC_CWND_INIT);
+
 	/* Route locked during lookup above */
 	RT_UNLOCK(rt);
 }
@@ -3701,7 +4095,7 @@ tcp_mssopt(tp)
 
 #if INET6
 	if (isipv6)
-		rt = tcp_rtlookup6(tp->t_inpcb);
+		rt = tcp_rtlookup6(tp->t_inpcb, IFSCOPE_NONE);
 	else
 #endif /* INET6 */
 	rt = tcp_rtlookup(tp->t_inpcb, IFSCOPE_NONE);
@@ -3736,7 +4130,7 @@ tcp_mssopt(tp)
 /*
  * On a partial ack arrives, force the retransmission of the
  * next unacknowledged segment.  Do not clear tp->t_dupacks.
- * By setting snd_nxt to ti_ack, this forces retransmission timer to
+ * By setting snd_nxt to th_ack, this forces retransmission timer to
  * be started again.
  */
 static void
@@ -3756,7 +4150,6 @@ tcp_newreno_partial_ack(tp, th)
 		 */
 		tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
 		tp->t_flags |= TF_ACKNOW;
-	    	tp->t_unacksegs = 0;
 		(void) tcp_output(tp);
 		tp->snd_cwnd = ocwnd;
 		if (SEQ_GT(onxt, tp->snd_nxt))
@@ -3828,11 +4221,13 @@ tcp_dropdropablreq(struct socket *head)
 			 * and being dropped by another input thread.
 			 * If we can't get a hold on this mutex, then grab the next socket in line.
 			 */
-			if (lck_mtx_try_lock(inp->inpcb_mtx)) {
+			if (lck_mtx_try_lock(&inp->inpcb_mtx)) {
 				so->so_usecount++;
-				if ((so->so_usecount == 2) && so->so_state & SS_INCOMP) 
+				if ((so->so_usecount == 2) && 
+				    (so->so_state & SS_INCOMP) != 0 &&
+				    (so->so_flags & SOF_INCOMP_INPROGRESS) == 0) 
 					break;
-				else {/* don't use if beeing accepted or used in any other way */
+				else {/* don't use if being accepted or used in any other way */
 					in_pcb_checkstate(inp, WNT_RELEASE, 1);
 					tcp_unlock(so, 1, 0);
 				}
@@ -3851,44 +4246,120 @@ tcp_dropdropablreq(struct socket *head)
 	if (!so)
 		return 0;
 
-	TAILQ_REMOVE(&head->so_incomp, so, so_list);
-	tcp_unlock(head, 0, 0);
-
 	/* Makes sure socket is still in the right state to be discarded */
 
 	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
 		tcp_unlock(so, 1, 0);
-		tcp_lock(head, 0, 0);
 		return 0;
 	}
 
 	if (so->so_usecount != 2 || !(so->so_state & SS_INCOMP)) {
-		/* do not discard: that socket is beeing accepted */
+		/* do not discard: that socket is being accepted */
 		tcp_unlock(so, 1, 0);
-		tcp_lock(head, 0, 0);
 		return 0;
 	}
 
-	so->so_head = NULL;
+	TAILQ_REMOVE(&head->so_incomp, so, so_list);
+	tcp_unlock(head, 0, 0);
 
-	/* 
-	 * We do not want to lose track of the PCB right away in case we receive 
-	 * more segments from the peer
-	 */
+	lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 	tp = sototcpcb(so);
 	so->so_flags |= SOF_OVERFLOW;
-	tp->t_state = TCPS_TIME_WAIT;
-	(void) tcp_close(tp);
+	so->so_head = NULL;
+
+	tcp_close(tp);
 	tp->t_unacksegs = 0;
+
+	if (inp->inp_wantcnt > 0 && inp->inp_wantcnt != WNT_STOPUSING) {
+		/* Some one has a wantcnt on this pcb. Since WNT_ACQUIRE
+		 * doesn't require a lock, it could have happened while
+		 * we are holding the lock. This pcb will have to
+		 * be garbage collected later.
+		 * Release the reference held for so_incomp queue
+		 */
+		so->so_usecount--;
+
+		tcp_unlock(so, 1, 0);
+	} else {
+		/* Unlock this socket and leave the reference on. We need to
+		 * acquire the pcbinfo lock in order to fully dispose it off 
+		 */
+		tcp_unlock(so, 0, 0);
+
+		lck_rw_lock_exclusive(tcbinfo.mtx);
+
+		tcp_lock(so, 0, 0);
+
+		/* Release the reference held for so_incomp queue */
+		so->so_usecount--;
+
+		if (so->so_usecount != 1 || 
+		    (inp->inp_wantcnt > 0 && inp->inp_wantcnt != WNT_STOPUSING)) {
+			/* There is an extra wantcount or usecount that must
+			 * have been added when the socket was unlocked. This
+			 * socket will have to be garbage collected later
+			 */
+			tcp_unlock(so, 1, 0);
+		} else {
+
+			/* Drop the reference held for this function */
+			so->so_usecount--;
+
+			in_pcbdispose(inp);
+		}
+		lck_rw_done(tcbinfo.mtx);
+	}
 	tcpstat.tcps_drops++;
-	tcp_canceltimers(tp);
-	add_to_time_wait(tp);
-	
-	tcp_unlock(so, 1, 0);
+
 	tcp_lock(head, 0, 0);
 	head->so_incqlen--;
 	head->so_qlen--;
-	return 1;
+	return(1);
+}
+
+/* Set background congestion control on a socket */
+void
+tcp_set_background_cc(struct socket *so)
+{
+	tcp_set_new_cc(so, TCP_CC_ALGO_BACKGROUND_INDEX);
+}
+
+/* Set foreground congestion control on a socket */
+void
+tcp_set_foreground_cc(struct socket *so)
+{
+	tcp_set_new_cc(so, TCP_CC_ALGO_NEWRENO_INDEX);
+}
+
+static void
+tcp_set_new_cc(struct socket *so, uint16_t cc_index)
+{
+	struct inpcb *inp = sotoinpcb(so);
+	struct tcpcb *tp = intotcpcb(inp);
+	uint16_t old_cc_index = 0;
+	if (tp->tcp_cc_index != cc_index) {
+
+		old_cc_index = tp->tcp_cc_index;
+
+		if (CC_ALGO(tp)->cleanup != NULL)
+			CC_ALGO(tp)->cleanup(tp);
+		tp->tcp_cc_index = cc_index;
+
+		/* Decide if the connection is just starting or if
+		 * we have sent some packets on it.
+		 */
+		if (tp->snd_nxt > tp->iss) {
+			/* Already sent some packets */
+			if (CC_ALGO(tp)->switch_to != NULL)
+				CC_ALGO(tp)->switch_to(tp, old_cc_index);
+		} else {	
+			if (CC_ALGO(tp)->init != NULL)
+				CC_ALGO(tp)->init(tp);
+		}
+		DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp,
+			struct tcpcb *, tp, struct tcphdr *, NULL,
+			int32_t, TCP_CC_CHANGE_ALGO);
+	}
 }
 
 static int
@@ -3908,7 +4379,7 @@ tcp_getstat SYSCTL_HANDLER_ARGS
 
 }
 
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
     tcp_getstat, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 
 static int
@@ -3936,5 +4407,5 @@ sysctl_rexmtthresh SYSCTL_HANDLER_ARGS
 	return (0);
 }
 
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmt_thresh, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcprexmtthresh, 0, &sysctl_rexmtthresh, "I", "Duplicate ACK Threshold for Fast Retransmit");
diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c
new file mode 100644
index 000000000..5baf28bea
--- /dev/null
+++ b/bsd/netinet/tcp_ledbat.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+#include <sys/mcache.h>
+#include <sys/sysctl.h>
+
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#if INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_cc.h>
+
+#include <libkern/OSAtomic.h>
+
+/* This file implements an alternate TCP congestion control algorithm
+ * for background transport developed by LEDBAT working group at IETF and
+ * described in draft: draft-ietf-ledbat-congestion-02
+ */
+
+int tcp_ledbat_init(struct tcpcb *tp);
+int tcp_ledbat_cleanup(struct tcpcb *tp);
+void tcp_ledbat_cwnd_init(struct tcpcb *tp);
+void tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_pre_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_after_idle(struct tcpcb *tp);
+void tcp_ledbat_after_timeout(struct tcpcb *tp);
+int tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th);
+void tcp_ledbat_switch_cc(struct tcpcb *tp, uint16_t old_cc_index);
+
+struct tcp_cc_algo tcp_cc_ledbat = {
+	.name = "ledbat",
+	.init = tcp_ledbat_init,
+	.cleanup = tcp_ledbat_cleanup,
+	.cwnd_init = tcp_ledbat_cwnd_init,
+	.inseq_ack_rcvd = tcp_ledbat_inseq_ack_rcvd,
+	.ack_rcvd = tcp_ledbat_ack_rcvd,
+	.pre_fr = tcp_ledbat_pre_fr,
+	.post_fr = tcp_ledbat_post_fr,
+	.after_idle = tcp_ledbat_after_idle,
+	.after_timeout = tcp_ledbat_after_timeout,
+	.delay_ack = tcp_ledbat_delay_ack,
+	.switch_to = tcp_ledbat_switch_cc
+};
+
+extern int tcp_do_rfc3465;
+extern int tcp_do_rfc3465_lim2;
+extern uint32_t get_base_rtt(struct tcpcb *tp);
+
+/* Target queuing delay in milliseconds. This includes the processing 
+ * and scheduling delay on both of the end-hosts. A LEDBAT sender tries 
+ * to keep queuing delay below this limit. When the queuing delay
+ * goes above this limit, a LEDBAT sender will start reducing the 
+ * congestion window.
+ *
+ * The LEDBAT draft says that target queue delay MUST be 100 ms for 
+ * inter-operability.
+ */
+int target_qdelay = 100;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKED, 
+	&target_qdelay , 100, "Target queuing delay");
+
+/* Allowed increase and tether are used to place an upper bound on
+ * congestion window based on the amount of data that is outstanding.
+ * This will limit the congestion window when the amount of data in 
+ * flight is little because the application is writing to the socket
+ * intermittently and is preventing the connection from becoming idle . 
+ *
+ * max_allowed_cwnd = allowed_increase + (tether * flight_size)
+ * cwnd = min(cwnd, max_allowed_cwnd)
+ *
+ * 'Allowed_increase' parameter is set to 2. If the flight size is zero, then
+ * we want the congestion window to be at least 2 packets to reduce the
+ * delay induced by delayed ack. This helps when the receiver is acking every
+ * other packet.
+ * 
+ * 'Tether' is also set to 2. We do not want this to limit the growth of cwnd
+ * during slow-start.
+ */ 
+int allowed_increase = 2;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_allowed_increase, CTLFLAG_RW | CTLFLAG_LOCKED, 
+	&allowed_increase, 1, "Additive constant used to calculate max allowed congestion window");
+
+/* Left shift for cwnd to get tether value of 2 */
+int tether_shift = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_tether_shift, CTLFLAG_RW | CTLFLAG_LOCKED, 
+	&tether_shift, 1, "Tether shift for max allowed congestion window");
+
+/* Start with an initial window of 2. This will help to get more accurate 
+ * minimum RTT measurement in the beginning. It will help to probe
+ * the path slowly and will not add to the existing delay if the path is
+ * already congested. Using 2 packets will reduce the delay induced by delayed-ack.
+ */
+uint32_t bg_ss_fltsz = 2;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_ss_fltsz, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&bg_ss_fltsz, 2, "Initial congestion window for background transport");
+
+extern int rtt_samples_per_slot;
+
+static void update_cwnd(struct tcpcb *tp, uint32_t incr) {
+	uint32_t max_allowed_cwnd = 0, flight_size = 0;
+	uint32_t qdelay, base_rtt;
+	int32_t off_target;
+
+	base_rtt = get_base_rtt(tp);
+
+	/* If we do not have a good RTT measurement yet, increment
+	 * congestion window by the default value.  
+	 */
+	if (base_rtt == 0 || tp->t_rttcur == 0) {
+		tp->snd_cwnd += incr;
+		goto check_max;
+	}
+		
+	qdelay = tp->t_rttcur - base_rtt;
+	off_target = (int32_t)(target_qdelay - qdelay);
+
+	if (off_target >= 0) {
+		/* Delay decreased or remained the same, we can increase 
+		 * the congestion window according to RFC 3465.
+		 *
+		 * Move background slow-start threshold to current
+		 * congestion window so that the next time (after some idle
+		 * period), we can attempt to do slow-start till here if there 
+		 * is no increase in rtt
+		 */
+		if (tp->bg_ssthresh < tp->snd_cwnd)
+			tp->bg_ssthresh = tp->snd_cwnd;
+		tp->snd_cwnd += incr;	
+
+	} else {
+		/* In response to an increase in rtt, reduce the congestion 
+		 * window by one-eighth. This will help to yield immediately 
+		 * to a competing stream.
+		 */
+		uint32_t redwin;
+
+		redwin = tp->snd_cwnd >> 3;  
+		tp->snd_cwnd -= redwin;
+		if (tp->snd_cwnd < bg_ss_fltsz * tp->t_maxseg)
+			tp->snd_cwnd = bg_ss_fltsz * tp->t_maxseg;
+
+		/* Lower background slow-start threshold so that the connection 
+		 * will go into congestion avoidance phase
+		 */
+		if (tp->bg_ssthresh > tp->snd_cwnd)
+			tp->bg_ssthresh = tp->snd_cwnd;
+	}
+check_max:
+	/* Calculate the outstanding flight size and restrict the
+	 * congestion window to a factor of flight size.
+	 */
+	flight_size = tp->snd_max - tp->snd_una;
+
+	max_allowed_cwnd = (allowed_increase * tp->t_maxseg) 
+		+ (flight_size << tether_shift);
+	tp->snd_cwnd = min(tp->snd_cwnd, max_allowed_cwnd);
+	return;
+}
+
+int tcp_ledbat_init(struct tcpcb *tp) {
+#pragma unused(tp)
+	OSIncrementAtomic((volatile SInt32 *)&tcp_cc_ledbat.num_sockets);
+	return 0;
+}
+
+int tcp_ledbat_cleanup(struct tcpcb *tp) {
+#pragma unused(tp)
+	OSDecrementAtomic((volatile SInt32 *)&tcp_cc_ledbat.num_sockets);
+	return 0;
+}
+
+/* Initialize the congestion window for a connection 
+ * 
+ */
+
+void
+tcp_ledbat_cwnd_init(struct tcpcb *tp) {
+	tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz;
+	tp->bg_ssthresh = tp->snd_ssthresh;
+}
+
+/* Function to handle an in-sequence ack which is fast-path processing 
+ * of an in sequence ack in tcp_input function (called as header prediction). 
+ * This gets called only during congestion avoidance phase.
+ */
+void
+tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
+	int acked = 0;
+	u_int32_t incr = 0;
+
+	acked = th->th_ack - tp->snd_una;
+	tp->t_bytes_acked += acked;
+	if (tp->t_bytes_acked > tp->snd_cwnd) {
+		tp->t_bytes_acked -= tp->snd_cwnd;
+		incr = tp->t_maxseg;
+	}
+
+	if (tp->snd_cwnd < tp->snd_wnd && incr > 0) {
+		update_cwnd(tp, incr);
+	}
+}
+/* Function to process an ack.
+ */
+void
+tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
+	/*
+	 * RFC 3465 - Appropriate Byte Counting.
+	 *
+	 * If the window is currently less than ssthresh,
+	 * open the window by the number of bytes ACKed by
+	 * the last ACK, however clamp the window increase
+	 * to an upper limit "L".
+	 *
+	 * In congestion avoidance phase, open the window by
+	 * one segment each time "bytes_acked" grows to be
+	 * greater than or equal to the congestion window.
+	 */
+
+	register u_int cw = tp->snd_cwnd;
+	register u_int incr = tp->t_maxseg;
+	int acked = 0;
+
+	acked = th->th_ack - tp->snd_una;
+	tp->t_bytes_acked += acked;
+	if (cw >= tp->bg_ssthresh) {
+		/* congestion-avoidance */
+		if (tp->t_bytes_acked < cw) {
+			/* No need to increase yet. */
+			incr = 0;
+		}
+	} else {
+		/*
+		 * If the user explicitly enables RFC3465
+		 * use 2*SMSS for the "L" param.  Otherwise
+		 * use the more conservative 1*SMSS.
+		 *
+		 * (See RFC 3465 2.3 Choosing the Limit)
+		 */
+		u_int abc_lim;
+
+		abc_lim = (tcp_do_rfc3465_lim2 &&
+			tp->snd_nxt == tp->snd_max) ? incr * 2 : incr;
+
+		incr = lmin(acked, abc_lim);
+	}
+	if (tp->t_bytes_acked >= cw)
+		tp->t_bytes_acked -= cw;
+	if (incr > 0) 
+		update_cwnd(tp, incr);
+}
+
+void
+tcp_ledbat_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
+#pragma unused(th)
+
+	uint32_t win;
+
+	win = min(tp->snd_wnd, tp->snd_cwnd) / 
+		2 / tp->t_maxseg;
+	if ( win < 2 )
+		win = 2;
+	tp->snd_ssthresh = win * tp->t_maxseg; 
+	if (tp->bg_ssthresh > tp->snd_ssthresh)
+		tp->bg_ssthresh = tp->snd_ssthresh;
+}
+
+void
+tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th) {
+	int32_t ss;
+
+	ss = tp->snd_max - th->th_ack;
+
+	/*
+	 * Complete ack.  Inflate the congestion window to
+	 * ssthresh and exit fast recovery.
+	 *
+	 * Window inflation should have left us with approx.
+	 * snd_ssthresh outstanding data.  But in case we
+	 * would be inclined to send a burst, better to do
+	 * it via the slow start mechanism.
+	 */
+	if (ss < (int32_t)tp->snd_ssthresh)
+		tp->snd_cwnd = ss + tp->t_maxseg;
+	else
+		tp->snd_cwnd = tp->snd_ssthresh;
+	tp->t_bytes_acked = 0;
+}
+
+/*
+ * Function to handle connections that have been idle for
+ * some time. Slow start to get ack "clock" running again.
+ * Clear base history after idle time.
+ */
+void
+tcp_ledbat_after_idle(struct tcpcb *tp) {
+	int32_t n = N_RTT_BASE, i = (N_RTT_BASE - 1);
+
+	/* Decide how many base history entries have to be cleared 
+	 * based on how long the connection has been idle.
+	 */
+	
+	if (tp->t_rttcur > 0) {
+		int32_t nrtt, idle_time;
+
+		idle_time = tcp_now - tp->t_rcvtime;
+		nrtt = idle_time / tp->t_rttcur; 
+		n = nrtt / rtt_samples_per_slot;
+		if (n > N_RTT_BASE)
+			n = N_RTT_BASE;
+	}
+	for (i = (N_RTT_BASE - 1); n > 0; --i, --n) {
+		tp->rtt_hist[i] = 0;
+	}
+	for (n = (N_RTT_BASE - 1); i >= 0; --i, --n) {
+		tp->rtt_hist[n] = tp->rtt_hist[i];
+		tp->rtt_hist[i] = 0;
+	}
+	
+	/* Reset the congestion window */
+	tp->snd_cwnd = tp->t_maxseg * bg_ss_fltsz;
+}
+
+/* Function to change the congestion window when the retransmit 
+ * timer fires. The behavior is the same as that for best-effort
+ * TCP, reduce congestion window to one segment and start probing
+ * the link using "slow start". The slow start threshold is set
+ * to half of the current window. Lower the background slow start
+ * threshold also.
+ */
+void
+tcp_ledbat_after_timeout(struct tcpcb *tp) {
+	if (tp->t_state >=  TCPS_ESTABLISHED) {
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_bytes_acked = 0;
+		tp->t_dupacks = 0;
+
+		if (tp->bg_ssthresh > tp->snd_ssthresh)
+			tp->bg_ssthresh = tp->snd_ssthresh;
+	}
+}
+
+/*
+ * Indicate whether this ack should be delayed.
+ * We can delay the ack if:
+ *      - our last ack wasn't a 0-sized window.
+ *      - the peer hasn't sent us a TH_PUSH data packet: if he did, take this 
+ * 	as a clue that we need to ACK without any delay. This helps higher 
+ *	level protocols who won't send us more data even if the window is 
+ * 	open because their last "segment" hasn't been ACKed
+ * Otherwise the receiver will ack every other full-sized segment or when the
+ * delayed ack timer fires. This will help to generate better rtt estimates for 
+ * the other end if it is a ledbat sender.
+ * 
+ */
+
+int
+tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th) {
+	if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
+		(th->th_flags & TH_PUSH) == 0 &&
+		(tp->t_flags & TF_DELACK) == 0)
+		return(1);
+	return(0);
+}
+
+/* Change a connection to use ledbat. First, lower bg_ssthresh value
+ * if it needs to be. 
+ */
+void
+tcp_ledbat_switch_cc(struct tcpcb *tp, uint16_t old_cc_index) {
+#pragma unused(old_cc_index)
+	uint32_t cwnd;
+
+	if (tp->bg_ssthresh == 0 || tp->bg_ssthresh > tp->snd_ssthresh)
+		tp->bg_ssthresh = tp->snd_ssthresh;
+
+	cwnd = min(tp->snd_wnd, tp->snd_cwnd);
+
+	if (tp->snd_cwnd > tp->bg_ssthresh)
+		cwnd = cwnd / tp->t_maxseg;
+	else
+		cwnd = cwnd / 2 / tp->t_maxseg;
+
+	if (cwnd < bg_ss_fltsz)
+		cwnd = bg_ss_fltsz;
+
+	tp->snd_cwnd = cwnd * tp->t_maxseg;
+	tp->t_bytes_acked = 0;
+
+	OSIncrementAtomic((volatile SInt32 *)&tcp_cc_ledbat.num_sockets);
+}
diff --git a/bsd/netinet/tcp_newreno.c b/bsd/netinet/tcp_newreno.c
new file mode 100644
index 000000000..5c9db2de9
--- /dev/null
+++ b/bsd/netinet/tcp_newreno.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/protosw.h>
+
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+
+#if INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_cc.h>
+#include <libkern/OSAtomic.h>
+
+int tcp_newreno_init(struct tcpcb *tp);
+int tcp_newreno_cleanup(struct tcpcb *tp);
+void tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp);
+void tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_post_fr(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_after_idle(struct tcpcb *tp);
+void tcp_newreno_after_timeout(struct tcpcb *tp);
+int tcp_newreno_delay_ack(struct tcpcb *tp, struct tcphdr *th);
+void tcp_newreno_switch_cc(struct tcpcb *tp, uint16_t old_index);
+
+struct tcp_cc_algo tcp_cc_newreno = {
+	.name = "newreno",
+	.init = tcp_newreno_init,
+	.cleanup = tcp_newreno_cleanup,
+	.cwnd_init = tcp_newreno_cwnd_init_or_reset,
+	.inseq_ack_rcvd = tcp_newreno_inseq_ack_rcvd,
+	.ack_rcvd = tcp_newreno_ack_rcvd,
+	.pre_fr = tcp_newreno_pre_fr,
+	.post_fr = tcp_newreno_post_fr,
+	.after_idle = tcp_newreno_cwnd_init_or_reset,
+	.after_timeout = tcp_newreno_after_timeout,
+	.delay_ack = tcp_newreno_delay_ack,
+	.switch_to = tcp_newreno_switch_cc
+};
+
+extern int tcp_do_rfc3465;
+extern int tcp_do_rfc3465_lim2;
+extern int maxseg_unacked;
+
+int tcp_newreno_init(struct tcpcb *tp) {
+#pragma unused(tp)
+	OSIncrementAtomic((volatile SInt32 *)&tcp_cc_newreno.num_sockets);
+	return 0;
+}
+
+int tcp_newreno_cleanup(struct tcpcb *tp) {
+#pragma unused(tp)
+	OSDecrementAtomic((volatile SInt32 *)&tcp_cc_newreno.num_sockets);
+	return 0;
+}
+
+/* Initialize the congestion window for a connection or
+ * handles connections that have been idle for
+ * some time. In this state, no acks are
+ * expected to clock out any data we send --
+ * slow start to get ack "clock" running again.
+ *
+ * Set the slow-start flight size depending on whether
+ * this is a local network or not.
+ */
+void
+tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) {
+	if ( tp->t_flags & TF_LOCAL )
+		tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local;
+        else {
+		/* Calculate initial cwnd according to RFC3390,
+		 * - On a standard link, this will result in a higher cwnd
+		 * and improve initial transfer rate.
+		 * - Keep the old ss_fltsz sysctl for ABI compabitility issues.
+		 * but it will be overriden if tcp_do_rfc3390 sysctl is set.
+		 */
+
+		if (tcp_do_rfc3390) 
+			tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380));
+
+		else
+			tp->snd_cwnd = tp->t_maxseg * ss_fltsz;
+	}
+}
+
+
+/* Function to handle an in-sequence ack during congestion avoidance phase.
+ * This will get called from header prediction code.
+ */
+void
+tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
+	int acked = 0;
+	acked = th->th_ack - tp->snd_una;
+	/*
+	 * Grow the congestion window, if the
+	 * connection is cwnd bound.
+	 */
+	if (tp->snd_cwnd < tp->snd_wnd) {
+		tp->t_bytes_acked += acked;
+		if (tp->t_bytes_acked > tp->snd_cwnd) {
+			tp->t_bytes_acked -= tp->snd_cwnd;
+			tp->snd_cwnd += tp->t_maxseg;
+		}
+	}
+}
+/* Function to process an ack.
+ */
+void
+tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) {
+	/*
+	 * RFC 3465 - Appropriate Byte Counting.
+	 *
+	 * If the window is currently less than ssthresh,
+	 * open the window by the number of bytes ACKed by
+	 * the last ACK, however clamp the window increase
+	 * to an upper limit "L".
+	 *
+	 * In congestion avoidance phase, open the window by
+	 * one segment each time "bytes_acked" grows to be
+	 * greater than or equal to the congestion window.
+	 */
+
+	register u_int cw = tp->snd_cwnd;
+	register u_int incr = tp->t_maxseg;
+	int acked = 0;
+
+	acked = th->th_ack - tp->snd_una;
+	if (tcp_do_rfc3465) {
+
+		if (cw >= tp->snd_ssthresh) {
+			tp->t_bytes_acked += acked;
+			if (tp->t_bytes_acked >= cw) {
+				/* Time to increase the window. */
+				tp->t_bytes_acked -= cw;
+			} else {
+				/* No need to increase yet. */
+				incr = 0;
+			}
+		} else {
+			/*
+			 * If the user explicitly enables RFC3465
+			 * use 2*SMSS for the "L" param.  Otherwise
+			 * use the more conservative 1*SMSS.
+			 *
+			 * (See RFC 3465 2.3 Choosing the Limit)
+			 */
+			u_int abc_lim;
+
+			abc_lim = (tcp_do_rfc3465_lim2 &&
+				tp->snd_nxt == tp->snd_max) ? incr * 2 : incr;
+
+			incr = lmin(acked, abc_lim);
+		}
+	} else {
+		/*
+		 * If the window gives us less than ssthresh packets
+		 * in flight, open exponentially (segsz per packet).
+		 * Otherwise open linearly: segsz per window
+		 * (segsz^2 / cwnd per packet).
+		 */
+
+		if (cw >= tp->snd_ssthresh)
+			incr = max((incr * incr / cw), 1);
+	}
+	tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+}
+
+void
+tcp_newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th) {
+#pragma unused(th)
+
+	uint32_t win;
+
+	win = min(tp->snd_wnd, tp->snd_cwnd) / 
+		2 / tp->t_maxseg;
+	if ( win < 2 )
+		win = 2;
+	tp->snd_ssthresh = win * tp->t_maxseg; 
+}
+
+void
+tcp_newreno_post_fr(struct tcpcb *tp, struct tcphdr *th) {
+	int32_t ss;
+
+	ss = tp->snd_max - th->th_ack;
+
+	/*
+	 * Complete ack.  Inflate the congestion window to
+	 * ssthresh and exit fast recovery.
+	 *
+	 * Window inflation should have left us with approx.
+	 * snd_ssthresh outstanding data.  But in case we
+	 * would be inclined to send a burst, better to do
+	 * it via the slow start mechanism.
+	 */
+	if (ss < (int32_t)tp->snd_ssthresh)
+		tp->snd_cwnd = ss + tp->t_maxseg;
+	else
+		tp->snd_cwnd = tp->snd_ssthresh;
+	tp->t_bytes_acked = 0;
+}
+
+/* Function to change the congestion window when the retransmit 
+ * timer fires.
+ */
+void
+tcp_newreno_after_timeout(struct tcpcb *tp) {
+	/*
+	 * Close the congestion window down to one segment
+	 * (we'll open it by one segment for each ack we get).
+	 * Since we probably have a window's worth of unacked
+	 * data accumulated, this "slow start" keeps us from
+	 * dumping all that data as back-to-back packets (which
+	 * might overwhelm an intermediate gateway).
+	 *
+	 * There are two phases to the opening: Initially we
+	 * open by one mss on each ack.  This makes the window
+	 * size increase exponentially with time.  If the
+	 * window is larger than the path can handle, this
+	 * exponential growth results in dropped packet(s)
+	 * almost immediately.  To get more time between
+	 * drops but still "push" the network to take advantage
+	 * of improving conditions, we switch from exponential
+	 * to linear window opening at some threshhold size.
+	 * For a threshhold, we use half the current window
+	 * size, truncated to a multiple of the mss.
+	 *
+	 * (the minimum cwnd that will give us exponential
+	 * growth is 2 mss.  We don't allow the threshhold
+	 * to go below this.)
+	 */
+	if (tp->t_state >=  TCPS_ESTABLISHED) {
+		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+		if (win < 2)
+			win = 2;
+		tp->snd_cwnd = tp->t_maxseg;
+		tp->snd_ssthresh = win * tp->t_maxseg;
+		tp->t_bytes_acked = 0;
+		tp->t_dupacks = 0;
+	}
+}
+
+/*
+ * Indicate whether this ack should be delayed.
+ * We can delay the ack if:
+ *  - delayed acks are enabled and set to 1, same as when value is set to 2. 
+ *    We kept this for binary compatibility.
+ *  - delayed acks are enabled and set to 2, will "ack every other packet"
+ *      - if our last ack wasn't a 0-sized window.
+ *      - if the peer hasn't sent us a TH_PUSH data packet (this solves 3649245). 
+ *	  	If TH_PUSH is set, take this as a clue that we need to ACK 
+ * 		with no delay. This helps higher level protocols who won't send
+ *		us more data even if the window is open because their 
+ *		last "segment" hasn't been ACKed
+ *  - delayed acks are enabled and set to 3,  will do "streaming detection" 
+ *    (see the comment in tcp_input.c) and
+ *      - if we receive more than "maxseg_unacked" full packets in the last 100ms
+ * 	- if the connection is not in slow-start or idle or loss/recovery states
+ *      - if those criteria aren't met, it will ack every other packet.
+ */
+
+int
+tcp_newreno_delay_ack(struct tcpcb *tp, struct tcphdr *th) {
+	switch (tcp_delack_enabled) {
+	case 1:
+	case 2:
+		if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
+			(th->th_flags & TH_PUSH) == 0 &&
+			(tp->t_flags & TF_DELACK) == 0)
+			return(1);
+		break;
+	case 3:
+		if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
+			(th->th_flags & TH_PUSH) == 0 &&
+			((tp->t_unacksegs == 0) ||
+			((tp->t_flags & TF_STRETCHACK) != 0 &&
+			tp->t_unacksegs < (maxseg_unacked - 1))))
+			return(1);
+		break;
+	}
+	return(0);
+}
+
+/* Switch to newreno from a different CC. If the connection is in
+ * congestion avoidance state, it can continue to use the current
+ * congestion window because it is going to be conservative. But
+ * if the connection is in slow-start, we will halve the congestion
+ * window and let newreno work from there. 
+ */
+void
+tcp_newreno_switch_cc(struct tcpcb *tp, uint16_t old_index) {
+#pragma unused(old_index)
+
+	uint32_t cwnd = min(tp->snd_wnd, tp->snd_cwnd);
+	if (tp->snd_cwnd >= tp->snd_ssthresh) {
+		cwnd = cwnd / tp->t_maxseg;
+	} else { 
+		cwnd = cwnd / 2 / tp->t_maxseg;
+	}
+	if (cwnd < 1)
+		cwnd = 1;
+	tp->snd_cwnd = cwnd * tp->t_maxseg;
+
+	/* Start counting bytes for RFC 3465 again */
+	tp->t_bytes_acked = 0;
+
+	OSIncrementAtomic((volatile SInt32 *)&tcp_cc_newreno.num_sockets);
+}
diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c
index 69a2c2aed..5c310770d 100644
--- a/bsd/netinet/tcp_output.c
+++ b/bsd/netinet/tcp_output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -81,6 +81,7 @@
 #include <sys/socketvar.h>
 
 #include <net/route.h>
+#include <net/ntstat.h>
 #include <net/if_var.h>
 
 #include <netinet/in.h>
@@ -89,6 +90,7 @@
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
+#include <mach/sdt.h>
 #if INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet/ip6.h>
@@ -101,10 +103,12 @@
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
+#include <netinet/tcp_cc.h>
 #if TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #include <sys/kdebug.h>
+#include <mach/sdt.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
@@ -118,48 +122,55 @@
 #define DBG_LAYER_END		NETDBG_CODE(DBG_NETTCP, 3)
 #define DBG_FNC_TCP_OUTPUT	NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1)
 
-
 #ifdef notyet
 extern struct mbuf *m_copypack();
 #endif
 
 int path_mtu_discovery = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&path_mtu_discovery, 1, "Enable Path MTU Discovery");
 
 int ss_fltsz = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowstart_flightsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ss_fltsz, 1, "Slow start flight size");
 
 int ss_fltsz_local = 8; /* starts with eight segments max */
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, local_slowstart_flightsize, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ss_fltsz_local, 1, "Slow start flight size for local networks");
 
-int     tcp_do_newreno = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
-        0, "Enable NewReno Algorithms");
-
 int	tcp_do_tso = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_do_tso, 0, "Enable TCP Segmentation Offload");
 
 
 int     tcp_ecn_outbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out, CTLFLAG_RW, &tcp_ecn_outbound,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_outbound,
         0, "Initiate ECN for outbound connections");
 
 int     tcp_ecn_inbound = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in, CTLFLAG_RW, &tcp_ecn_inbound,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_negotiate_in, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_ecn_inbound,
         0, "Allow ECN negotiation for inbound connections");
 
 int	tcp_packet_chaining = 50;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, CTLFLAG_RW, &tcp_packet_chaining,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_packet_chaining,
         0, "Enable TCP output packet chaining");
 
 int	tcp_output_unlocked = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, socket_unlocked_on_output, CTLFLAG_RW, &tcp_output_unlocked,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, socket_unlocked_on_output, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_output_unlocked,
         0, "Unlock TCP when sending packets down to IP");
 
+int tcp_do_rfc3390 = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&tcp_do_rfc3390, 1, "Calculate intial slowstart cwnd depending on MSS");
+
+int tcp_min_iaj_win = MIN_IAJ_WIN;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, min_iaj_win, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&tcp_min_iaj_win, 1, "Minimum recv win based on inter-packet arrival jitter");
+
+int tcp_acc_iaj_react_limit = ACC_IAJ_REACT_LIMIT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, acc_iaj_react_limit, CTLFLAG_RW | CTLFLAG_LOCKED,
+        &tcp_acc_iaj_react_limit, 1, "Accumulated IAJ when receiver starts to react");
+
 static int32_t packchain_newlist = 0;
 static int32_t packchain_looped = 0;
 static int32_t packchain_sent = 0;
@@ -181,10 +192,13 @@ extern int 		ip_use_randomid;
 #endif /* RANDOM_IP_ID */
 extern u_int32_t dlil_filter_count;
 extern u_int32_t kipf_count;
+extern int tcp_recv_bg;
 
 static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int,
     struct mbuf *, int, int, int32_t);
 
+static inline int is_tcp_recv_bg(struct socket *so);
+
 static __inline__ u_int16_t
 get_socket_id(struct socket * s)
 {
@@ -200,6 +214,12 @@ get_socket_id(struct socket * s)
 	return (val);
 }
 
+static inline int
+is_tcp_recv_bg(struct socket *so)
+{
+	return (so->so_traffic_mgt_flags & TRAFFIC_MGT_TCP_RECVBG);
+}
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  *
@@ -242,10 +262,10 @@ tcp_output(struct tcpcb *tp)
 #ifdef IPSEC
 	unsigned ipsec_optlen = 0;
 #endif
-	int maxburst = TCP_MAXBURST;
 	int    last_off = 0;
 	int    m_off;
-	struct mbuf *m_last = NULL;
+	int    idle_time = 0;
+	struct mbuf *m_lastm = NULL;
 	struct mbuf *m_head = NULL;
 	struct mbuf *packetlist = NULL;
 	struct mbuf *tp_inp_options = tp->t_inpcb->inp_depend4.inp4_options;
@@ -265,28 +285,17 @@ tcp_output(struct tcpcb *tp)
 	 * to send, then transmit; otherwise, investigate further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
-	if (idle && tp->t_rcvtime >= tp->t_rxtcur) {
-		/*
-		 * We have been idle for "a while" and no acks are
-		 * expected to clock out any data we send --
-		 * slow start to get ack "clock" running again.
-		 *
-		 * Set the slow-start flight size depending on whether
-		 * this is a local network or not.
-		 */
-		if (
-#if INET6
-		    (isipv6 && in6_localaddr(&tp->t_inpcb->in6p_faddr)) ||
-		    (!isipv6 &&
-#endif
-		     in_localaddr(tp->t_inpcb->inp_faddr)
-#if INET6
-		     )
-#endif
-		    )
-			tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local;
-		else     
-			tp->snd_cwnd = tp->t_maxseg * ss_fltsz;
+
+	/* Since idle_time is signed integer, the following integer subtraction
+	 * will take care of wrap around of tcp_now
+	 */
+	idle_time = tcp_now - tp->t_rcvtime;
+	if (idle && idle_time >= tp->t_rxtcur) {
+		if (CC_ALGO(tp)->after_idle != NULL) 
+			CC_ALGO(tp)->after_idle(tp);
+		DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
+			struct tcpcb *, tp, struct tcphdr *, NULL,
+			int32_t, TCP_CC_IDLE_TIMEOUT);
 	}
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
@@ -344,14 +353,16 @@ again:
 
 			/* set Retransmit  timer if it wasn't set
 			 * reset Persist timer and shift register as the
-			 * adversed peer window may not be valid anymore
+			 * advertised peer window may not be valid anymore
 			 */
 
                         if (!tp->t_timer[TCPT_REXMT]) {
-                                tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+                                tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 				if (tp->t_timer[TCPT_PERSIST]) {
 					tp->t_timer[TCPT_PERSIST] = 0;
 					tp->t_rxtshift = 0;
+					tp->t_persist_stop = 0;
+					tp->rxt_start = 0;
 				}
 			}
 
@@ -364,10 +375,12 @@ again:
 				tcp_drop(tp, EADDRNOTAVAIL);
 				return(EADDRNOTAVAIL);
 			}
-			else
+			else {
+				tcp_check_timer_state(tp);
 				return(0); /* silently ignore, keep data in socket: address may be back */
+			}
 		}
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 
 		/*
 		 * Address is still valid; check for multipages capability
@@ -463,6 +476,12 @@ again:
 			tcpstat.tcps_sack_rexmits++;
 			tcpstat.tcps_sack_rexmit_bytes +=
 			    min(len, tp->t_maxseg);
+			if (nstat_collect) {
+				nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, min(len, tp->t_maxseg), NSTAT_TX_FLAG_RETRANSMIT);
+				locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1);
+				locked_add_64(&tp->t_inpcb->inp_stat->txbytes, min(len, tp->t_maxseg));
+				tp->t_stat.txretransmitbytes += min(len, tp->t_maxseg);
+			}
 		}
 		else 
 			len = 0;
@@ -507,6 +526,8 @@ after_sack_rexmit:
 		} else {
 			tp->t_timer[TCPT_PERSIST] = 0;
 			tp->t_rxtshift = 0;
+			tp->rxt_start = 0;
+			tp->t_persist_stop = 0;
 		}
 	}
 
@@ -587,6 +608,8 @@ after_sack_rexmit:
 			    (TF_CLOSING|TF_SENDINPROG)) == TF_CLOSING) {
 				tp->t_flags &= ~TF_CLOSING;
 				(void) tcp_close(tp);
+			} else {
+				tcp_check_timer_state(tp);
 			}
 			KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,
 			    0,0,0,0,0);
@@ -604,7 +627,12 @@ after_sack_rexmit:
 		flags &= ~TH_FIN;
 	}
 
-	if (len < 0) {
+	/* The check here used to be (len < 0). Some times len is zero when
+	 * the congestion window is closed and we need to check if persist timer
+	 * has to be set in that case. But don't set persist until connection 
+	 * is established.
+	 */  
+	if (len <= 0 && !(flags & TH_SYN)) {
 		/*
 		 * If FIN has been sent but not acked,
 		 * but we haven't been called to retransmit,
@@ -619,6 +647,7 @@ after_sack_rexmit:
 		if (sendwin == 0) {
 			tp->t_timer[TCPT_REXMT] = 0;
 			tp->t_rxtshift = 0;
+			tp->rxt_start = 0;
 			tp->snd_nxt = tp->snd_una;
 			if (tp->t_timer[TCPT_PERSIST] == 0)
 				tcp_setpersist(tp);
@@ -782,7 +811,7 @@ after_sack_rexmit:
 	if (tp->sack_enable && (tp->t_state >= TCPS_ESTABLISHED) && SEQ_GT(tp->snd_max, tp->snd_una) &&
 		tp->t_timer[TCPT_REXMT] == 0 &&
 	    tp->t_timer[TCPT_PERSIST] == 0) {
-			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+			tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 			goto just_return;
 	} 
 	/*
@@ -810,6 +839,7 @@ after_sack_rexmit:
 	if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 &&
 	    tp->t_timer[TCPT_PERSIST] == 0) {
 		tp->t_rxtshift = 0;
+		tp->rxt_start = 0;
 		tcp_setpersist(tp);
 	}
 just_return:
@@ -833,6 +863,8 @@ just_return:
 	if ((tp->t_flags & (TF_CLOSING|TF_SENDINPROG)) == TF_CLOSING) {
 		tp->t_flags &= ~TF_CLOSING;
 		(void) tcp_close(tp);
+	} else {
+		tcp_check_timer_state(tp);
 	}
 	KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
 	return (0);
@@ -1136,9 +1168,19 @@ send:
 		else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			tcpstat.tcps_sndrexmitpack++;
 			tcpstat.tcps_sndrexmitbyte += len;
+			if (nstat_collect) {
+				nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, len, NSTAT_TX_FLAG_RETRANSMIT);
+				locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1);
+				locked_add_64(&tp->t_inpcb->inp_stat->txbytes, len);
+				tp->t_stat.txretransmitbytes += len;
+			}
 		} else {
 			tcpstat.tcps_sndpack++;
 			tcpstat.tcps_sndbyte += len;
+			if (nstat_collect) {
+				locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1);
+				locked_add_64(&tp->t_inpcb->inp_stat->txbytes, len);
+			}
 		}
 #ifdef notyet
 		if ((m = m_copypack(so->so_snd.sb_mb, off,
@@ -1221,7 +1263,7 @@ send:
 				 * setting the mbuf pointer to NULL is sufficient to disable the hint mechanism.
 				 */
 			        if (m_head != so->so_snd.sb_mb || sack_rxmit || last_off != off)
-				        m_last = NULL;
+				        m_lastm = NULL;
 				last_off = off + len;
 				m_head = so->so_snd.sb_mb;
 	
@@ -1235,7 +1277,7 @@ send:
 				 * m_copym_with_hdrs will always return the last mbuf pointer and the offset into it that
 				 * it acted on to fullfill the current request, whether a valid 'hint' was passed in or not
 				 */
-			        if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, len, M_DONTWAIT, &m_last, &m_off)) == NULL) {
+			        if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, len, M_DONTWAIT, &m_lastm, &m_off)) == NULL) {
 				        error = ENOBUFS;
 					goto out;
 				}
@@ -1285,6 +1327,10 @@ send:
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		tcp_fillheaders(tp, ip6, th);
+		if ((tp->ecn_flags & TE_SENDIPECT) != 0 && len &&
+			!SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+			ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+		}
 	} else
 #endif /* INET6 */
 	{
@@ -1349,13 +1395,25 @@ send:
 	if (tp->t_flags & TF_SLOWLINK && slowlink_wsize > 0) {
 		if (recwin > (int32_t)slowlink_wsize) 
 			recwin = slowlink_wsize;
-		th->th_win = htons((u_short) (recwin>>tp->rcv_scale));
 	}
-	else {
-		if (recwin > (int32_t)(TCP_MAXWIN << tp->rcv_scale))
-			recwin = (int32_t)(TCP_MAXWIN << tp->rcv_scale);
-		th->th_win = htons((u_short) (recwin>>tp->rcv_scale));
+
+#if TRAFFIC_MGT
+	if (tcp_recv_bg == 1  || is_tcp_recv_bg(so)) {
+		if (tp->acc_iaj > tcp_acc_iaj_react_limit) {
+			uint32_t min_iaj_win = tcp_min_iaj_win * tp->t_maxseg;
+			if (tp->iaj_rwintop == 0 ||
+				SEQ_LT(tp->iaj_rwintop, tp->rcv_adv))
+				tp->iaj_rwintop = tp->rcv_adv; 
+			if (SEQ_LT(tp->iaj_rwintop, tp->rcv_nxt + min_iaj_win))
+				tp->iaj_rwintop =  tp->rcv_nxt + min_iaj_win;
+			recwin = min(tp->iaj_rwintop - tp->rcv_nxt, recwin);
+		}
 	}
+#endif /* TRAFFIC_MGT */
+
+	if (recwin > (int32_t)(TCP_MAXWIN << tp->rcv_scale))
+		recwin = (int32_t)(TCP_MAXWIN << tp->rcv_scale);
+	th->th_win = htons((u_short) (recwin>>tp->rcv_scale));
 
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
@@ -1365,7 +1423,7 @@ send:
 	 * to read more data then can be buffered prior to transmitting on
 	 * the connection.
 	 */
-	if (recwin == 0)
+	if (th->th_win == 0)
 		tp->t_flags |= TF_RXWIN0SENT;
 	else
 		tp->t_flags &= ~TF_RXWIN0SENT;
@@ -1387,13 +1445,17 @@ send:
 	 */
 	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 #if INET6
-	if (isipv6)
+	if (isipv6) {
 		/*
 		 * ip6_plen is not need to be filled now, and will be filled
 		 * in ip6_output.
 		 */
-		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
-				       sizeof(struct tcphdr) + optlen + len);
+		m->m_pkthdr.csum_flags = CSUM_TCPIPV6;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+		if (len + optlen)
+			th->th_sum = in_addword(th->th_sum, 
+				htons((u_short)(optlen + len)));
+	}
 	else
 #endif /* INET6 */
 	{
@@ -1407,7 +1469,6 @@ send:
 	/*
 	 * Enable TSO and specify the size of the segments.
 	 * The TCP pseudo header checksum is always provided.
-	 * XXX: Fixme: This is currently not the case for IPv6.
 	 */
 	if (tso) {
 #if INET6
@@ -1450,7 +1511,7 @@ send:
 			 * not currently timing anything.
 			 */
 			if (tp->t_rtttime == 0) {
-				tp->t_rtttime = 1;
+				tp->t_rtttime = tcp_now;
 				tp->t_rtseq = startseq;
 				tcpstat.tcps_segstimed++;
 			}
@@ -1471,8 +1532,10 @@ timer:
 			if (tp->t_timer[TCPT_PERSIST]) {
 				tp->t_timer[TCPT_PERSIST] = 0;
 				tp->t_rxtshift = 0;
+				tp->rxt_start = 0;
+				tp->t_persist_stop = 0;
 			}
-			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+			tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 		}
 	} else {
 		/*
@@ -1510,6 +1573,15 @@ timer:
 	 */
 #if INET6
 	if (isipv6) {
+		struct rtentry *rt6;
+		struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+		unsigned int outif;
+
+		KERNEL_DEBUG(DBG_LAYER_BEG,
+		     ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
+		     (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) |
+		      (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)),
+		     0,0,0);
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt.
@@ -1531,46 +1603,40 @@ timer:
 #endif /*IPSEC*/
 		m->m_pkthdr.socket_id = socket_id;
 
-#if PKT_PRIORITY
-		set_traffic_class(m, so, MBUF_TC_NONE);
-#endif /* PKT_PRIORITY */
-		error = ip6_output(m,
-			    inp6_pktopts,
-			    &tp->t_inpcb->in6p_route,
-			    (so_options & SO_DONTROUTE), NULL, NULL, 0);
+		rt6 = tp->t_inpcb->in6p_route.ro_rt;
+		if (rt6 != NULL && rt6->rt_ifp != NULL 
+			&& rt6->rt_ifp != lo_ifp)
+			set_packet_tclass(m, so, MBUF_TC_UNSPEC, 1);
+
+		DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb, struct ip6_hdr *, ip6,
+			struct tcpcb *, tp, struct tcphdr *, th);
+
+		if (tp->t_inpcb->inp_flags & INP_BOUND_IF)
+			ip6oa.ip6oa_boundif = tp->t_inpcb->inp_boundif;
+
+		ip6oa.ip6oa_nocell = (tp->t_inpcb->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+
+		error = ip6_output(m, inp6_pktopts, &tp->t_inpcb->in6p_route,
+		    (so_options & SO_DONTROUTE) | IPV6_OUTARGS, NULL, NULL,
+		    &ip6oa);
+
+		/* Refresh rt6 as we may have lost the route while in ip6_output() */
+		if ((rt6 = tp->t_inpcb->in6p_route.ro_rt) != NULL && 
+		    (outif = rt6->rt_ifp->if_index) != tp->t_inpcb->in6p_last_outif)
+			tp->t_inpcb->in6p_last_outif = outif;
 	} else
 #endif /* INET6 */
     {
 	ip->ip_len = m->m_pkthdr.len;
-#if INET6
- 	if (isipv6)
- 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb,
- 					    tp->t_inpcb->in6p_route.ro_rt ?
- 					    tp->t_inpcb->in6p_route.ro_rt->rt_ifp
- 					    : NULL);
- 	else
-#endif /* INET6 */
 	ip->ip_ttl = tp->t_inpcb->inp_ip_ttl;	/* XXX */
 	ip->ip_tos |= (tp->t_inpcb->inp_ip_tos & ~IPTOS_ECN_MASK);	/* XXX */
 
 
-#if INET6
-	if (isipv6) {
-		KERNEL_DEBUG(DBG_LAYER_BEG,
-		     ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
-		     (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) |
-		      (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)),
-		     0,0,0);
-	}
-        else 
-#endif
-	{
-		KERNEL_DEBUG(DBG_LAYER_BEG,
-		     ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
-		     (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) |
-		      (tp->t_inpcb->inp_faddr.s_addr & 0xffff)),
-		     0,0,0);
-	}
+	KERNEL_DEBUG(DBG_LAYER_BEG,
+	     ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport),
+	     (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) |
+	      (tp->t_inpcb->inp_faddr.s_addr & 0xffff)),
+	     0,0,0);
 
 	/*
 	 * See if we should do MTU discovery.
@@ -1596,11 +1662,18 @@ timer:
 	lost = 0;
 	m->m_pkthdr.socket_id = socket_id;
 	m->m_nextpkt = NULL;
-#if PKT_PRIORITY
-	set_traffic_class(m, so, MBUF_TC_NONE);
-#endif /* PKT_PRIORITY */
+
+	if (tp->t_inpcb->inp_route.ro_rt != NULL && 
+		tp->t_inpcb->inp_route.ro_rt->rt_ifp != NULL &&
+		tp->t_inpcb->inp_route.ro_rt->rt_ifp != lo_ifp)
+		set_packet_tclass(m, so, MBUF_TC_UNSPEC, 0);
+
 	tp->t_pktlist_sentlen += len;
 	tp->t_lastchain++;
+
+	DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb,
+		struct ip *, ip, struct tcpcb *, tp, struct tcphdr *, th);
+
 	if (tp->t_pktlist_head != NULL) {
 		tp->t_pktlist_tail->m_nextpkt = m;
 		tp->t_pktlist_tail = m;
@@ -1685,12 +1758,17 @@ out:
 		if (error == ENOBUFS) {
 			if (!tp->t_timer[TCPT_REXMT] &&
 				 !tp->t_timer[TCPT_PERSIST])
-					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+					tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
 			tp->snd_cwnd = tp->t_maxseg;
 			tp->t_bytes_acked = 0;
 
+			tcp_check_timer_state(tp);
 			KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
+
+			DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
+				struct tcpcb *, tp, struct tcphdr *, NULL,
+				int32_t, TCP_CC_OUTPUT_ERROR);
 			return (0);
 		}
 		if (error == EMSGSIZE) {
@@ -1710,21 +1788,26 @@ out:
 				tp->t_flags &= ~TF_TSO;
 
 			tcp_mtudisc(tp->t_inpcb, 0);
+			tcp_check_timer_state(tp);
+
 			KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
 			return 0;
 		}
 		if ((error == EHOSTUNREACH || error == ENETDOWN)
 		    && TCPS_HAVERCVDSYN(tp->t_state)) {
 			tp->t_softerror = error;
+			tcp_check_timer_state(tp);
 			KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
 			return (0);
 		}
+		tcp_check_timer_state(tp);
 		KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
 		return (error);
 	}
 
 	tcpstat.tcps_sndtotal++;
 
+#if INET6
 	/*
 	 * Data sent (as far as we can tell).
 	 * If this advertises a larger window than any other segment,
@@ -1733,18 +1816,21 @@ out:
 	 * we unlock the socket.
 	 *  NOTE: for now, this is done in tcp_ip_output for IPv4
 	 */
-#if INET6
 	if (isipv6) {
 		if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 			tp->rcv_adv = tp->rcv_nxt + recwin;
 		tp->last_ack_sent = tp->rcv_nxt;
 		tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
+		tp->t_timer[TCPT_DELACK] = 0;
+		tp->t_unacksegs = 0;
 	}
 #endif
 
 	KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
-	if (sendalot && (!tcp_do_newreno || --maxburst))
+	if (sendalot)
 		goto again;
+
+	tcp_check_timer_state(tp);
 	return (0);
 }
 
@@ -1758,13 +1844,12 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
 	struct inpcb *inp = tp->t_inpcb;
 	struct ip_out_args ipoa;
 	struct route ro;
-#if CONFIG_OUT_IF
 	unsigned int outif;
-#endif /* CONFIG_OUT_IF */
 
 	/* If socket was bound to an ifindex, tell ip_output about it */
-	ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+	ipoa.ipoa_boundif = (inp->inp_flags & INP_BOUND_IF) ?
 	    inp->inp_boundif : IFSCOPE_NONE;
+	ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 	flags |= IP_OUTARGS;
 
 	/* Copy the cached route and take an extra reference */
@@ -1781,14 +1866,21 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
+	tp->t_timer[TCPT_DELACK] = 0;
+	tp->t_unacksegs = 0;
 
 	/*
 	 * If allowed, unlock TCP socket while in IP
 	 * but only if the connection is established and
-	 * if we're not sending from an upcall.
+	 * in a normal mode where reentrancy on the tcpcb won't be
+	 * an issue:
+	 * - there is no SACK episode
+	 * - we're not in Fast Recovery mode
+	 * - if we're not sending from an upcall.
 	 */
 	if (tcp_output_unlocked && ((so->so_flags & SOF_UPCALLINUSE) == 0) &&
-	    (tp->t_state == TCPS_ESTABLISHED) && (sack_in_progress == 0)) {
+	    (tp->t_state == TCPS_ESTABLISHED) && (sack_in_progress == 0) &&
+	    ((tp->t_flags & TF_FASTRECOVERY) == 0)) {
 		unlocked = TRUE;
 		socket_unlock(so, 0);
 	}
@@ -1828,7 +1920,6 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
 			 */
 			cnt = 0;
 		}
-	
 		error = ip_output_list(pkt, cnt, opt, &ro, flags, 0, &ipoa);
 		if (chain || error) {
 			/*
@@ -1846,6 +1937,10 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
 	if (unlocked)
 		socket_lock(so, 0);
 
+	if (ro.ro_rt != NULL &&
+	    (outif = ro.ro_rt->rt_ifp->if_index) != inp->inp_last_outif)
+		inp->inp_last_outif = outif;
+
 	/* Synchronize cached PCB route */
 	inp_route_copyin(inp, &ro);
 
@@ -1858,14 +1953,27 @@ tcp_setpersist(tp)
 {
 	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 
-	if (tp->t_timer[TCPT_REXMT])
-		panic("tcp_setpersist: retransmit pending");
+	/* If a PERSIST_TIMER option was set we will limit the
+	 * time the persist timer will be active for that connection
+	 * in order to avoid DOS by using zero window probes.
+	 * see rdar://5805356
+	 */
+
+	if ((tp->t_persist_timeout != 0) &&
+       	    (tp->t_timer[TCPT_PERSIST] == 0) &&
+       	    (tp->t_persist_stop == 0)) {
+		tp->t_persist_stop = tcp_now + tp->t_persist_timeout;
+	}
+
 	/*
 	 * Start/restart persistance timer.
 	 */
 	TCPT_RANGESET(tp->t_timer[TCPT_PERSIST],
 	    t * tcp_backoff[tp->t_rxtshift],
-	    TCPTV_PERSMIN, TCPTV_PERSMAX);
+	    TCPTV_PERSMIN, TCPTV_PERSMAX,
+	    TCP_ADD_REXMTSLOP(tp));
+	tp->t_timer[TCPT_PERSIST] = OFFSET_FROM_START(tp, tp->t_timer[TCPT_PERSIST]);
+
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c
index 5842ad2b8..69fb8a7d0 100644
--- a/bsd/netinet/tcp_sack.c
+++ b/bsd/netinet/tcp_sack.c
@@ -103,20 +103,20 @@
 #endif /*IPSEC*/
 
 int	tcp_do_sack = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, &tcp_do_sack, 0,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_sack, 0,
 	"Enable/Disable TCP SACK support");
 static int tcp_sack_maxholes = 128;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_sack_maxholes, 0, 
     "Maximum number of TCP SACK holes allowed per connection");
 
 static int tcp_sack_globalmaxholes = 65536;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&tcp_sack_globalmaxholes, 0, 
     "Global maximum number of TCP SACK holes");
 
 static int tcp_sack_globalholes = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcp_sack_globalholes, 0,
     "Global number of TCP SACK holes currently allocated");
 
@@ -203,6 +203,18 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
 
 	/* Save the number of SACK blocks. */
 	tp->rcv_numsacks = num_head + num_saved;
+
+	/* If we are requesting SACK recovery, reset the stretch-ack state
+	 * so that connection will generate more acks after recovery and
+	 * sender's cwnd will open.
+	 */
+	if ((tp->t_flags & TF_STRETCHACK) != 0 && tp->rcv_numsacks > 0)
+		tcp_reset_stretch_ack(tp);
+
+#if TRAFFIC_MGT
+	if (tp->acc_iaj > 0 && tp->rcv_numsacks > 0) 
+		reset_acc_iaj(tp);
+#endif /* TRAFFIC_MGT */
 }
 
 /*
diff --git a/bsd/netinet/tcp_seq.h b/bsd/netinet/tcp_seq.h
index 89a16ef79..df7bfa4e9 100644
--- a/bsd/netinet/tcp_seq.h
+++ b/bsd/netinet/tcp_seq.h
@@ -79,6 +79,8 @@
 
 /* for modulo comparisons of timestamps */
 #define TSTMP_LT(a,b)	((int)((a)-(b)) < 0)
+#define TSTMP_GT(a,b)	((int)((a)-(b)) > 0)
+#define TSTMP_LEQ(a,b)	((int)((a)-(b)) <= 0)
 #define TSTMP_GEQ(a,b)	((int)((a)-(b)) >= 0)
 
 /*
diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c
index f1b220bc2..8cf658482 100644
--- a/bsd/netinet/tcp_subr.c
+++ b/bsd/netinet/tcp_subr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -82,6 +82,7 @@
 #include <sys/protosw.h>
 #include <sys/random.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 #include <kern/locks.h>
 #include <kern/zalloc.h>
 
@@ -112,6 +113,9 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_cc.h>
+#include <kern/thread_call.h>
+
 #if INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -136,6 +140,7 @@
 
 #include <libkern/crypto/md5.h>
 #include <sys/kdebug.h>
+#include <mach/sdt.h>
 
 #define DBG_FNC_TCP_CLOSE	NETDBG_CODE(DBG_NETTCP, ((5 << 8) | 2))
 
@@ -147,13 +152,13 @@ extern int ipsec_bypass;
 #endif
 
 int 	tcp_mssdflt = TCP_MSS;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
 
 #if INET6
 int	tcp_v6mssdflt = TCP6_MSS;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
-	CTLFLAG_RW, &tcp_v6mssdflt , 0,
+	CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_v6mssdflt , 0,
 	"Default TCP Maximum Segment Size for IPv6");
 #endif
 
@@ -166,7 +171,7 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
  * checking. This setting prevents us from sending too small packets.
  */
 int	tcp_minmss = TCP_MINMSS;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
 
 /*
@@ -182,51 +187,70 @@ __private_extern__ int     tcp_minmssoverload = TCP_MINMSSOVERLOAD;
 #else
 __private_extern__ int     tcp_minmssoverload = 0;
 #endif
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to"
     "be under the MINMSS Size");
 
 static int	tcp_do_rfc1323 = 1;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
 
+// Not used
 static int	tcp_do_rfc1644 = 0;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions");
 
 static int	do_tcpdrain = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW | CTLFLAG_LOCKED, &do_tcpdrain, 0,
      "Enable tcp_drain routine for extra help when low on mbufs");
 
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, 
     &tcbinfo.ipi_count, 0, "Number of active PCBs");
 
 static int	icmp_may_rst = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp_may_rst, 0, 
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static int	tcp_strict_rfc1948 = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, strict_rfc1948, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_strict_rfc1948, 0, "Determines if RFC1948 is followed exactly");
 
 static int	tcp_isn_reseed_interval = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret");
 static int 	tcp_background_io_enabled = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_enabled, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_enabled, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_background_io_enabled, 0, "Background IO Enabled");
 
-int 	tcp_TCPTV_MIN = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_min, CTLFLAG_RW,
+int 	tcp_TCPTV_MIN = 100;	/* 100ms minimum RTT */
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_min, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_TCPTV_MIN, 0, "min rtt value allowed");
 
+int tcp_rexmt_slop = TCPTV_REXMTSLOP;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmt_slop, CTLFLAG_RW,
+	&tcp_rexmt_slop, 0, "Slop added to retransmit timeout");
+
 __private_extern__ int tcp_use_randomport = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_use_randomport, 0, "Randomize TCP port numbers");
 
+extern struct tcp_cc_algo tcp_cc_newreno;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets, CTLFLAG_RD | CTLFLAG_LOCKED,
+	&tcp_cc_newreno.num_sockets, 0, "Number of sockets using newreno");
+
+extern struct tcp_cc_algo tcp_cc_ledbat;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_sockets, CTLFLAG_RD | CTLFLAG_LOCKED,
+	&tcp_cc_ledbat.num_sockets, 0, "Number of sockets using background transport");
+
 static void	tcp_cleartaocache(void);
 static void	tcp_notify(struct inpcb *, int);
+static void	tcp_cc_init(void);
+
 struct zone	*sack_hole_zone;
+struct zone	*tcp_reass_zone;
+
+/* The array containing pointers to currently implemented TCP CC algorithms */
+struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
 
 extern unsigned int total_mb_cnt;
 extern unsigned int total_cl_cnt;
@@ -247,7 +271,7 @@ extern int path_mtu_discovery;
 #endif
 
 __private_extern__ int	tcp_tcbhashsize = TCBHASHSIZE;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD | CTLFLAG_LOCKED,
      &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 /*
@@ -259,25 +283,25 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD,
  * to be changed, eventually, for greater efficiency).
  */
 #define	ALIGNMENT	32
-#define	ALIGNM1		(ALIGNMENT - 1)
 struct	inp_tp {
-	union {
-		struct	inpcb inp;
-		char	align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
-	} inp_tp_u;
-	struct	tcpcb tcb;
+	struct	inpcb	inp;
+	struct	tcpcb	tcb __attribute__((aligned(ALIGNMENT)));
 };
 #undef ALIGNMENT
-#undef ALIGNM1
 
 extern struct	inpcbhead	time_wait_slots[];
-extern u_int32_t	*delack_bitmask;
+extern struct tcptimerlist tcp_timer_list;
 
 int  get_inpcb_str_size(void);
 int  get_tcp_str_size(void);
 
 static void tcpcb_to_otcpcb(struct tcpcb *, struct otcpcb *);
 
+static lck_attr_t *tcp_uptime_mtx_attr = NULL;		/* mutex attributes */
+static lck_grp_t *tcp_uptime_mtx_grp = NULL;		/* mutex group definition */
+static lck_grp_attr_t *tcp_uptime_mtx_grp_attr = NULL;	/* mutex group attributes */
+
+
 int  get_inpcb_str_size(void)
 {
 	return sizeof(struct inpcb);
@@ -291,6 +315,17 @@ int  get_tcp_str_size(void)
 
 int	tcp_freeq(struct tcpcb *tp);
 
+/*
+ * Initialize TCP congestion control algorithms.
+ */
+
+void
+tcp_cc_init(void)
+{
+	bzero(&tcp_cc_algo_list, sizeof(tcp_cc_algo_list));
+	tcp_cc_algo_list[TCP_CC_ALGO_NEWRENO_INDEX] = &tcp_cc_newreno;
+	tcp_cc_algo_list[TCP_CC_ALGO_BACKGROUND_INDEX] = &tcp_cc_ledbat;
+}
 
 /*
  * Tcp initialization
@@ -310,9 +345,10 @@ tcp_init()
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
-	read_random(&tcp_now, sizeof(tcp_now));
-	tcp_now = tcp_now & 0x3fffffff; /* Starts tcp internal 100ms clock at a random value */
 
+	microuptime(&tcp_uptime);
+	read_random(&tcp_now, sizeof(tcp_now));
+	tcp_now = tcp_now & 0x3fffffff; /* Starts tcp internal clock at a random value */
 
 	LIST_INIT(&tcb);
 	tcbinfo.listhead = &tcb;
@@ -325,10 +361,26 @@ tcp_init()
 	tcbinfo.hashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.hashmask);
 	tcbinfo.porthashbase = hashinit(tcp_tcbhashsize, M_PCB,
 					&tcbinfo.porthashmask);
-	str_size = (vm_size_t) sizeof(struct inp_tp);
+	str_size = P2ROUNDUP(sizeof(struct inp_tp), sizeof(u_int64_t));
 	tcbinfo.ipi_zone = (void *) zinit(str_size, 120000*str_size, 8192, "tcpcb");
+	zone_change(tcbinfo.ipi_zone, Z_CALLERACCT, FALSE);
+	zone_change(tcbinfo.ipi_zone, Z_EXPAND, TRUE);
+
+	str_size = P2ROUNDUP(sizeof(struct sackhole), sizeof(u_int64_t));
 	sack_hole_zone = zinit(str_size, 120000*str_size, 8192, "sack_hole zone");
+	zone_change(sack_hole_zone, Z_CALLERACCT, FALSE);
+	zone_change(sack_hole_zone, Z_EXPAND, TRUE);
+
 	tcp_reass_maxseg = nmbclusters / 16;
+	str_size = P2ROUNDUP(sizeof(struct tseg_qent), sizeof(u_int64_t));
+	tcp_reass_zone = zinit(str_size, (tcp_reass_maxseg + 1) * str_size,
+		0, "tcp_reass_zone");
+	if (tcp_reass_zone == NULL) {
+		panic("%s: failed allocating tcp_reass_zone", __func__);
+		/* NOTREACHED */
+	}
+	zone_change(tcp_reass_zone, Z_CALLERACCT, FALSE);
+	zone_change(tcp_reass_zone, Z_EXPAND, TRUE);
 
 #if INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
@@ -341,10 +393,10 @@ tcp_init()
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
-        /*
+	/*
 	 * allocate lock group attribute and group for tcp pcb mutexes
 	 */
-     pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init();
+	pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init();
 	pcbinfo->mtx_grp = lck_grp_alloc_init("tcppcb", pcbinfo->mtx_grp_attr);
 		
 	/*
@@ -357,18 +409,37 @@ tcp_init()
 		return;	/* pretty much dead if this fails... */
 	}
 
-	delack_bitmask = _MALLOC((4 * tcp_tcbhashsize)/32, M_PCB, M_WAITOK);
-	if (delack_bitmask == 0) 
-	     panic("Delack Memory");
-
-	for (i=0; i < (tcbinfo.hashsize / 32); i++)
-	         delack_bitmask[i] = 0;
-
 	for (i=0; i < N_TIME_WAIT_SLOTS; i++) {
 	     LIST_INIT(&time_wait_slots[i]);
 	}
 
-	timeout(tcp_fasttimo, NULL, hz/TCP_RETRANSHZ);
+	bzero(&tcp_timer_list, sizeof(tcp_timer_list));
+	LIST_INIT(&tcp_timer_list.lhead);
+	/*
+	 * allocate lock group attribute, group and attribute for the tcp timer list
+	 */
+	tcp_timer_list.mtx_grp_attr = lck_grp_attr_alloc_init();
+	tcp_timer_list.mtx_grp = lck_grp_alloc_init("tcptimerlist", tcp_timer_list.mtx_grp_attr);
+	tcp_timer_list.mtx_attr = lck_attr_alloc_init();
+	if ((tcp_timer_list.mtx = lck_mtx_alloc_init(tcp_timer_list.mtx_grp, tcp_timer_list.mtx_attr)) == NULL) {
+		panic("failed to allocate memory for tcp_timer_list.mtx\n");
+	};
+	tcp_timer_list.fast_quantum = TCP_FASTTIMER_QUANTUM;
+	tcp_timer_list.slow_quantum = TCP_SLOWTIMER_QUANTUM;
+	if ((tcp_timer_list.call = thread_call_allocate(tcp_run_timerlist, NULL)) == NULL) {
+		panic("failed to allocate call entry 1 in tcp_init\n");
+	}
+
+	/*
+	 * allocate lock group attribute, group and attribute for tcp_uptime_lock
+	 */
+	tcp_uptime_mtx_grp_attr = lck_grp_attr_alloc_init();
+	tcp_uptime_mtx_grp = lck_grp_alloc_init("tcpuptime", tcp_uptime_mtx_grp_attr);
+	tcp_uptime_mtx_attr = lck_attr_alloc_init();
+	tcp_uptime_lock = lck_spin_alloc_init(tcp_uptime_mtx_grp, tcp_uptime_mtx_attr);
+
+	/* Initialize TCP congestion control algorithms list */
+	tcp_cc_init();
 }
 
 /*
@@ -398,7 +469,9 @@ tcp_fillheaders(tp, ip_ptr, tcp_ptr)
 		ip6->ip6_plen = sizeof(struct tcphdr);
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
-		tcp_hdr->th_sum = 0;
+		tcp_hdr->th_sum = in6_cksum_phdr(&inp->in6p_laddr,
+		    &inp->in6p_faddr, htonl(sizeof(struct tcphdr)),
+		    htonl(IPPROTO_TCP));
 	} else
 #endif
 	{
@@ -474,7 +547,8 @@ tcp_respond(
 	tcp_seq ack,
 	tcp_seq seq,
 	int flags,
-	unsigned int ifscope
+	unsigned int ifscope,
+	unsigned int nocell
 	)
 {
 	register int tlen;
@@ -489,6 +563,7 @@ tcp_respond(
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
+	unsigned int outif;
 
 #if INET6
 	isipv6 = IP_VHL_V(((struct ip *)ipgen)->ip_vhl) == 6;
@@ -614,9 +689,11 @@ tcp_respond(
 #if INET6
 	if (isipv6) {
 		nth->th_sum = 0;
-		nth->th_sum = in6_cksum(m, IPPROTO_TCP,
-					sizeof(struct ip6_hdr),
-					tlen - sizeof(struct ip6_hdr));
+		nth->th_sum = in6_cksum_phdr(&ip6->ip6_src,
+		    &ip6->ip6_dst, htons((u_short)(tlen - sizeof(struct ip6_hdr))),
+		    		htonl(IPPROTO_TCP));
+		m->m_pkthdr.csum_flags = CSUM_TCPIPV6;
+		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 		ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
 					       ro6 && ro6->ro_rt ?
 					       ro6->ro_rt->rt_ifp :
@@ -639,21 +716,29 @@ tcp_respond(
 		return;
 	}
 #endif
-#if PKT_PRIORITY
-	if (tp != NULL) 
-		set_traffic_class(m, tp->t_inpcb->inp_socket, MBUF_TC_NONE);
-#endif /* PKT_PRIORITY */
+
+	if (tp != NULL)
+		set_packet_tclass(m, tp->t_inpcb->inp_socket, MBUF_TC_UNSPEC, isipv6);
+
 #if INET6
 	if (isipv6) {
-		(void)ip6_output(m, NULL, ro6, 0, NULL, NULL, 0);
-		if (ro6 == &sro6 && ro6->ro_rt) {
-			rtfree(ro6->ro_rt);
-			ro6->ro_rt = NULL;
+		struct ip6_out_args ip6oa = { ifscope, nocell };
+
+		(void) ip6_output(m, NULL, ro6, IPV6_OUTARGS, NULL,
+		    NULL, &ip6oa);
+		if (ro6->ro_rt != NULL) {
+			if (ro6 == &sro6) {
+				rtfree(ro6->ro_rt);
+				ro6->ro_rt = NULL;
+			} else if ((outif = ro6->ro_rt->rt_ifp->if_index) !=
+			    tp->t_inpcb->in6p_last_outif) {
+				tp->t_inpcb->in6p_last_outif = outif;
+			}
 		}
 	} else
 #endif /* INET6 */
 	{
-		struct ip_out_args ipoa = { ifscope };
+		struct ip_out_args ipoa = { ifscope, nocell };
 
 		if (ro != &sro) {
 			/* Copy the cached route and take an extra reference */
@@ -665,6 +750,10 @@ tcp_respond(
 		(void) ip_output(m, NULL, &sro, IP_OUTARGS, NULL, &ipoa);
 
 		if (ro != &sro) {
+			if (sro.ro_rt != NULL &&
+			    (outif = sro.ro_rt->rt_ifp->if_index) !=
+			    tp->t_inpcb->inp_last_outif)
+				tp->t_inpcb->inp_last_outif = outif;
 			/* Synchronize cached PCB route */
 			inp_route_copyin(tp->t_inpcb, &sro);
 		} else if (sro.ro_rt != NULL) {
@@ -690,13 +779,15 @@ tcp_newtcpcb(inp)
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
+	calculate_tcp_clock();
+
 	if (so->cached_in_sock_layer == 0) {
 	     it = (struct inp_tp *)inp;
 	     tp = &it->tcb;
 	}
 	else
 	     tp = (struct tcpcb *) inp->inp_saved_ppcb;
-
+	
 	bzero((char *) tp, sizeof(struct tcpcb));
 	LIST_INIT(&tp->t_segq);
 	tp->t_maxseg = tp->t_maxopd =
@@ -719,12 +810,25 @@ tcp_newtcpcb(inp)
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_TCPTV_MIN;
 	tp->t_rxtcur = TCPTV_RTOBASE;
+
+	/* Initialize congestion control algorithm for this connection 
+	 * to newreno by default
+	 */
+	tp->tcp_cc_index = TCP_CC_ALGO_NEWRENO_INDEX;
+	if (CC_ALGO(tp)->init != NULL) {
+		CC_ALGO(tp)->init(tp);
+	}
+
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh_prev = TCP_MAXWIN << TCP_MAX_WINSHIFT;
-	tp->t_rcvtime = 0;
+	tp->t_rcvtime = tcp_now;
 	tp->t_bw_rtttime = 0;
+	tp->tentry.timer_start = tcp_now;
+	tp->t_persist_timeout = tcp_max_persist_timeout;
+	tp->t_persist_stop = 0;
+	tp->t_flagsext |= TF_RCVUNACK_WAITSS;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
@@ -746,8 +850,13 @@ tcp_drop(tp, errno)
 	int errno;
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
-     
+#if CONFIG_DTRACE
+	struct inpcb *inp = tp->t_inpcb;
+#endif /* CONFIG_DTRACE */
+
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
+		DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+			struct tcpcb *, tp, int32_t, TCPS_CLOSED);
 		tp->t_state = TCPS_CLOSED;
 		(void) tcp_output(tp);
 		tcpstat.tcps_drops++;
@@ -778,26 +887,10 @@ tcp_close(tp)
 	int dosavessthresh;
 
 	if ( inp->inp_ppcb == NULL) /* tcp_close was called previously, bail */
-		return NULL;
-
-	/* Clear the timers before we delete the PCB. */
-	{
-		int i;
-		for (i = 0; i < TCPT_NTIMERS; i++) {
-			tp->t_timer[i] = 0;
-		}
-	}
+		return(NULL);
 
+	tcp_canceltimers(tp);
 	KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_START, tp,0,0,0,0);
-	switch (tp->t_state) 
-	{
-	case TCPS_ESTABLISHED:
-	case TCPS_FIN_WAIT_1:
-	case TCPS_CLOSING:
-	case TCPS_CLOSE_WAIT:
-	case TCPS_LAST_ACK:
-	     break;
-	}
 
 	/*
 	 * If another thread for this tcp is currently in ip (indicated by
@@ -816,6 +909,10 @@ tcp_close(tp)
 		return (NULL);
 	}
 
+	if (CC_ALGO(tp)->cleanup != NULL) {
+		CC_ALGO(tp)->cleanup(tp);
+	}
+
 #if INET6
 	rt = isipv6 ? inp->in6p_route.ro_rt : inp->inp_route.ro_rt;
 #else
@@ -853,8 +950,11 @@ tcp_close(tp)
 		if (rt == NULL || !(rt->rt_flags & RTF_UP) ||
 		    ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr ==
 		    INADDR_ANY || rt->generation_id != route_generation) {
-			if (tp->t_state >= TCPS_CLOSE_WAIT)
+			if (tp->t_state >= TCPS_CLOSE_WAIT) {
+				DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+					struct tcpcb *, tp, int32_t, TCPS_CLOSING);
 				tp->t_state = TCPS_CLOSING;
+			}
 			goto no_valid_rt;
 		}
 
@@ -961,17 +1061,23 @@ no_valid_rt:
 	if (so->cached_in_sock_layer)
 	    inp->inp_saved_ppcb = (caddr_t) tp;
 #endif
+	/* Issue a wakeup before detach so that we don't miss
+	 * a wakeup
+	 */
+	sodisconnectwakeup(so);
 
-	soisdisconnected(so);
 #if INET6
 	if (INP_CHECK_SOCKAF(so, AF_INET6))
 		in6_pcbdetach(inp);
 	else
 #endif /* INET6 */
 	in_pcbdetach(inp);
+
+	/* Call soisdisconnected after detach because it might unlock the socket */
+	soisdisconnected(so);
 	tcpstat.tcps_closed++;
 	KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_END, tcpstat.tcps_closed,0,0,0,0);
-	return ((struct tcpcb *)0);
+	return(NULL);
 }
 
 int
@@ -985,7 +1091,7 @@ tcp_freeq(tp)
 	while((q = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
-		FREE(q, M_TSEGQ);
+		zfree(tcp_reass_zone, q);
 		tcp_reass_qsize--;
 		rv = 1;
 	}
@@ -1019,7 +1125,7 @@ tcp_drain()
 					       != NULL) {
 					LIST_REMOVE(te, tqe_q);
 					m_freem(te->tqe_m);
-					FREE(te, M_TSEGQ);
+					zfree(tcp_reass_zone, te);
 					tcp_reass_qsize--;
 				}
 			}
@@ -1083,7 +1189,7 @@ tcpcb_to_otcpcb(struct tcpcb *tp, struct otcpcb *otp)
 
 	otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first;
 	otp->t_dupacks = tp->t_dupacks;
-	for (i = 0; i < TCPT_NTIMERS; i++)
+	for (i = 0; i < TCPT_NTIMERS_EXT; i++)
 		otp->t_timer[i] = tp->t_timer[i];
 	otp->t_inpcb = (_TCPCB_PTR(struct inpcb *))(uintptr_t)tp->t_inpcb;
 	otp->t_state = tp->t_state;
@@ -1258,7 +1364,7 @@ tcp_pcblist SYSCTL_HANDLER_ARGS
 	return error;
 }
 
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 	    tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 #if !CONFIG_EMBEDDED
@@ -1270,7 +1376,7 @@ tcpcb_to_xtcpcb64(struct tcpcb *tp, struct xtcpcb64 *otp)
 
         otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first;
         otp->t_dupacks = tp->t_dupacks;
-        for (i = 0; i < TCPT_NTIMERS; i++)
+        for (i = 0; i < TCPT_NTIMERS_EXT; i++)
                 otp->t_timer[i] = tp->t_timer[i];
         otp->t_state = tp->t_state;
         otp->t_flags = tp->t_flags;
@@ -1406,44 +1512,60 @@ tcp_pcblist64 SYSCTL_HANDLER_ARGS
         for (i = 0; i < n; i++) {
                 inp = inp_list[i];
                 if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
-                        struct xtcpcb64 xt;
-
-                        bzero(&xt, sizeof(xt));
-                        xt.xt_len = sizeof xt;
-			inpcb_to_xinpcb64(inp, &xt.xt_inpcb);
-			xt.xt_inpcb.inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb;
-                        if (inp->inp_ppcb != NULL)
-                                tcpcb_to_xtcpcb64((struct tcpcb *)inp->inp_ppcb, &xt);
-                        if (inp->inp_socket)
-                                sotoxsocket64(inp->inp_socket, &xt.xt_inpcb.xi_socket);
-                        error = SYSCTL_OUT(req, &xt, sizeof xt);
+					struct xtcpcb64 xt;
+					
+					bzero(&xt, sizeof(xt));
+					xt.xt_len = sizeof xt;
+					inpcb_to_xinpcb64(inp, &xt.xt_inpcb);
+					xt.xt_inpcb.inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb;
+					if (inp->inp_ppcb != NULL)
+						tcpcb_to_xtcpcb64((struct tcpcb *)inp->inp_ppcb, &xt);
+					if (inp->inp_socket)
+						sotoxsocket64(inp->inp_socket, &xt.xt_inpcb.xi_socket);
+					error = SYSCTL_OUT(req, &xt, sizeof xt);
                 }
         }
         if (!error) {
-                /*
-                 * Give the user an updated idea of our state.
-                 * If the generation differs from what we told
-                 * her before, she knows that something happened
-                 * while we were processing this request, and it
-                 * might be necessary to retry.
-                 */
-                bzero(&xig, sizeof(xig));
-                xig.xig_len = sizeof xig;
-                xig.xig_gen = tcbinfo.ipi_gencnt;
-                xig.xig_sogen = so_gencnt;
-                xig.xig_count = tcbinfo.ipi_count;
-                error = SYSCTL_OUT(req, &xig, sizeof xig);
+			/*
+			 * Give the user an updated idea of our state.
+			 * If the generation differs from what we told
+			 * her before, she knows that something happened
+			 * while we were processing this request, and it
+			 * might be necessary to retry.
+			 */
+			bzero(&xig, sizeof(xig));
+			xig.xig_len = sizeof xig;
+			xig.xig_gen = tcbinfo.ipi_gencnt;
+			xig.xig_sogen = so_gencnt;
+			xig.xig_count = tcbinfo.ipi_count;
+			error = SYSCTL_OUT(req, &xig, sizeof xig);
         }
         FREE(inp_list, M_TEMP);
         lck_rw_done(tcbinfo.mtx);
         return error;
 }
 
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
             tcp_pcblist64, "S,xtcpcb64", "List of active TCP connections");
 
 #endif /* !CONFIG_EMBEDDED */
 
+static int
+tcp_pcblist_n SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	int error = 0;
+	
+	error = get_pcblist_n(IPPROTO_TCP, req, &tcbinfo);
+	
+	return error;
+}
+
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
+            tcp_pcblist_n, "S,xtcpcb_n", "List of active TCP connections");
+
+
 void
 tcp_ctlinput(cmd, sa, vip)
 	int cmd;
@@ -1618,10 +1740,10 @@ tcp6_ctlinput(cmd, sa, d)
 
 		in6_pcbnotify(&tcbinfo, sa, th.th_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src,
-		    th.th_sport, cmd, notify);
+		    th.th_sport, cmd, NULL, notify);
 	} else {
 		in6_pcbnotify(&tcbinfo, sa, 0,
-		    (struct sockaddr *)(size_t)sa6_src, 0, cmd, notify);
+		    (struct sockaddr *)(size_t)sa6_src, 0, cmd, NULL, notify);
 	}
 }
 #endif /* INET6 */
@@ -1773,7 +1895,7 @@ tcp_mtudisc(
 	if (tp) {
 #if INET6
 		if (isipv6)
-			rt = tcp_rtlookup6(inp);
+			rt = tcp_rtlookup6(inp, IFSCOPE_NONE);
 		else
 #endif /* INET6 */
 		rt = tcp_rtlookup(inp, IFSCOPE_NONE);
@@ -1837,6 +1959,11 @@ tcp_mtudisc(
 
 		tp->t_maxseg = mss;
 
+		/*
+		 * Reset the slow-start flight size as it may depends on the new MSS
+		 */
+		if (CC_ALGO(tp)->cwnd_init != NULL)
+			CC_ALGO(tp)->cwnd_init(tp);
 		tcpstat.tcps_mturesent++;
 		tp->t_rtttime = 0;
 		tp->snd_nxt = tp->snd_una;
@@ -1889,7 +2016,7 @@ tcp_rtlookup(inp, input_ifscope)
 
 			if (rt != NULL)
 				RT_UNLOCK(rt);
-			rtalloc_scoped_ign(ro, 0, ifscope);
+			rtalloc_scoped(ro, ifscope);
 			if ((rt = ro->ro_rt) != NULL)
 				RT_LOCK(rt);
 		}
@@ -1934,8 +2061,9 @@ tcp_rtlookup(inp, input_ifscope)
 
 #if INET6
 struct rtentry *
-tcp_rtlookup6(inp)
+tcp_rtlookup6(inp, input_ifscope)
 	struct inpcb *inp;
+	unsigned int input_ifscope;
 {
 	struct route_in6 *ro6;
 	struct rtentry *rt;
@@ -1952,14 +2080,26 @@ tcp_rtlookup6(inp)
 		/* No route yet, so try to acquire one */
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			struct sockaddr_in6 *dst6;
+			unsigned int ifscope;
 
 			dst6 = (struct sockaddr_in6 *)&ro6->ro_dst;
 			dst6->sin6_family = AF_INET6;
 			dst6->sin6_len = sizeof(*dst6);
 			dst6->sin6_addr = inp->in6p_faddr;
+
+			/*
+			 * If the socket was bound to an interface, then
+			 * the bound-to-interface takes precedence over
+			 * the inbound interface passed in by the caller
+			 * (if we get here as part of the output path then
+			 * input_ifscope is IFSCOPE_NONE).
+			 */
+			ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+			    inp->inp_boundif : input_ifscope;
+
 			if (rt != NULL)
 				RT_UNLOCK(rt);
-			rtalloc_ign((struct route *)ro6, 0);
+			rtalloc_scoped((struct route *)ro6, ifscope);
 			if ((rt = ro6->ro_rt) != NULL)
 				RT_LOCK(rt);
 		}
@@ -2068,7 +2208,7 @@ tcp_gettaocache(inp)
 
 #if INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0)
-		rt = tcp_rtlookup6(inp);
+		rt = tcp_rtlookup6(inp, IFSCOPE_NONE);
 	else
 #endif /* INET6 */
 	rt = tcp_rtlookup(inp, IFSCOPE_NONE);
@@ -2112,7 +2252,7 @@ tcp_lock(struct socket *so, int refcount, void *lr)
 		lr_saved = lr;
 
 	if (so->so_pcb != NULL) {
-		lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+		lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
 	} else  {
 		panic("tcp_lock: so=%p NO PCB! lr=%p lrh= %s\n", 
 		    so, lr_saved, solockhistory_nr(so));
@@ -2143,7 +2283,7 @@ tcp_unlock(struct socket *so, int refcount, void *lr)
 
 #ifdef MORE_TCPLOCK_DEBUG
 	printf("tcp_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n",
-	    so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx,
+	    so, so->so_pcb, &((struct inpcb *)so->so_pcb)->inpcb_mtx,
 	    so->so_usecount, lr_saved);
 #endif
 	if (refcount)
@@ -2159,11 +2299,11 @@ tcp_unlock(struct socket *so, int refcount, void *lr)
 		    so, so->so_usecount, lr_saved, solockhistory_nr(so));
 		/* NOTREACHED */
 	} else {
-		lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx,
+		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
 		    LCK_MTX_ASSERT_OWNED);
 		so->unlock_lr[so->next_unlock_lr] = lr_saved;
 		so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
-		lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+		lck_mtx_unlock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
 	}
 	return (0);
 }
@@ -2179,7 +2319,7 @@ tcp_getlock(
 		if (so->so_usecount < 0)
 			panic("tcp_getlock: so=%p usecount=%x lrh= %s\n", 
 			    so, so->so_usecount, solockhistory_nr(so));	
-		return(inp->inpcb_mtx);
+		return(&inp->inpcb_mtx);
 	}
 	else {
 		panic("tcp_getlock: so=%p NULL so_pcb %s\n", 
@@ -2199,17 +2339,7 @@ tcp_sbspace(struct tcpcb *tp)
 	if (space < 0) 
 		space = 0;
 
-#if TRAFFIC_MGT
-	if (tp->t_inpcb->inp_socket->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_REGULATE) {
-		if (tcp_background_io_enabled &&
-			tp->t_inpcb->inp_socket->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_SUPPRESSED) {
-			tp->t_flags |= TF_RXWIN0SENT;
-			return 0; /* Triggers TCP window closing by responding there is no space */
-		}
-	}
-#endif /* TRAFFIC_MGT */
-
-	/* Avoid inscreasing window size if the current window
+	/* Avoid increasing window size if the current window
 	 * is already very low, we could be in "persist" mode and
 	 * we could break some apps (see rdar://5409343)
 	 */
@@ -2252,11 +2382,6 @@ tcp_set_tso(tp, ifp)
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 
 	if (isipv6) {
-		/*
-		 * Radar 6921834: Disable TSO IPv6 because there is no support
-		 * for TSO & HW checksum in ip6_output yet
-		 */
-#if 0
 		if (ifp && ifp->if_hwassist & IFNET_TSO_IPV6) {
 			tp->t_flags |= TF_TSO;
 			if (ifp->if_tso_v6_mtu != 0) 
@@ -2266,7 +2391,6 @@ tcp_set_tso(tp, ifp)
 		} else
 				tp->t_flags &= ~TF_TSO;
 
-#endif
 	} else 
 #endif /* INET6 */
 
@@ -2281,4 +2405,50 @@ tcp_set_tso(tp, ifp)
 				tp->t_flags &= ~TF_TSO;
 	}
 }
+
+#define TIMEVAL_TO_TCPHZ(_tv_) ((_tv_).tv_sec * TCP_RETRANSHZ + (_tv_).tv_usec / TCP_RETRANSHZ_TO_USEC)
+
+/* Function to calculate the tcp clock. The tcp clock will get updated
+ * at the boundaries of the tcp layer. This is done at 3 places:
+ * 1. Right before processing an input tcp packet 
+ * 2. Whenever a connection wants to access the network using tcp_usrreqs
+ * 3. When a tcp timer fires or before tcp slow timeout
+ *
+ */
+
+void
+calculate_tcp_clock()
+{
+	struct timeval tv = tcp_uptime;
+	struct timeval interval = {0, TCP_RETRANSHZ_TO_USEC};
+	struct timeval now, hold_now;
+	uint32_t incr = 0;
+
+	timevaladd(&tv, &interval);
+	microuptime(&now);
+	if (timevalcmp(&now, &tv, >)) {
+		/* time to update the clock */
+		lck_spin_lock(tcp_uptime_lock);
+		if (timevalcmp(&tcp_uptime, &now, >=)) {
+			/* clock got updated while we were waiting for the lock */
+			lck_spin_unlock(tcp_uptime_lock);
+			return;
+			}
+
+		microuptime(&now);
+		hold_now = now;
+		tv = tcp_uptime;
+		timevalsub(&now, &tv);
+
+		incr = TIMEVAL_TO_TCPHZ(now);
+		if (incr > 0) {
+			tcp_uptime = hold_now;
+			tcp_now += incr;
+		}
+
+                lck_spin_unlock(tcp_uptime_lock);
+        }
+        return;
+}
+
 /* DSEP Review Done pl-20051213-v02 @3253,@3391,@3400 */
diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c
index fd66419d0..706ec823c 100644
--- a/bsd/netinet/tcp_timer.c
+++ b/bsd/netinet/tcp_timer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -71,6 +71,8 @@
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
+#include <sys/mcache.h>
+#include <sys/queue.h>
 #include <kern/locks.h>
 
 #include <kern/cpu_number.h>	/* before tcp_seq.h, for tcp_random18() */
@@ -89,6 +91,7 @@
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#include <netinet/tcp_cc.h>
 #if INET6
 #include <netinet6/tcp6_var.h>
 #endif
@@ -97,23 +100,31 @@
 #include <netinet/tcp_debug.h>
 #endif
 #include <sys/kdebug.h>
+#include <mach/sdt.h>
 
 extern void postevent(struct socket *, struct sockbuf *,
                                                int);
 #define DBG_FNC_TCP_FAST	NETDBG_CODE(DBG_NETTCP, (5 << 8))
 #define DBG_FNC_TCP_SLOW	NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1)
 
+#define TIMERENTRY_TO_TP(te) ((struct tcpcb *)((uintptr_t)te - offsetof(struct tcpcb, tentry.le.le_next))) 
+
+#define VERIFY_NEXT_LINK(elm,field) do {	\
+	if (LIST_NEXT((elm),field) != NULL && 	\
+	    LIST_NEXT((elm),field)->field.le_prev !=	\
+		&((elm)->field.le_next))	\
+		panic("Bad link elm %p next->prev != elm", (elm));	\
+} while(0)
+
+#define VERIFY_PREV_LINK(elm,field) do {	\
+	if (*(elm)->field.le_prev != (elm))	\
+		panic("Bad link elm %p prev->next != elm", (elm));	\
+} while(0)
+
 static int 	background_io_trigger = 5;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_trigger, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_trigger, CTLFLAG_RW | CTLFLAG_LOCKED,
     &background_io_trigger, 0, "Background IO Trigger Setting");
 
-/*
- * NOTE - WARNING
- *
- *
- * 
- *
- */
 static int
 sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
 {
@@ -136,25 +147,42 @@ sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
 }
 
 int	tcp_keepinit;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
 
 int	tcp_keepidle;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
 
 int	tcp_keepintvl;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
 
 int	tcp_msl;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
 
+/* 
+ * Avoid DoS via TCP Robustness in Persist Condition (see http://www.ietf.org/id/draft-ananth-tcpm-persist-02.txt)
+ * by allowing a system wide maximum persistence timeout value when in Zero Window Probe mode.
+ * Expressed in milliseconds to be consistent without timeout related values, the TCP socket option is in seconds.
+ */
+u_int32_t tcp_max_persist_timeout = 0;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, max_persist_timeout, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &tcp_max_persist_timeout, 0, sysctl_msec_to_ticks, "I", "Maximum persistence timout for ZWP");
+
 static int	always_keepalive = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
+/* This parameter determines how long the timer list will stay in fast mode even
+ * though all connections are idle. In fast mode, the timer will fire more frequently
+ * anticipating new data.
+ */
+int timer_fastmode_idlemax = TCP_FASTMODE_IDLEGEN_MAX;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_fastmode_idlemax, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&timer_fastmode_idlemax, 0, "Maximum idle generations in fast mode");
+
 /*
  * See tcp_syn_backoff[] for interval values between SYN retransmits;
  * the value set below defines the number of retransmits, before we
@@ -163,16 +191,25 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
  * two options.
  */
 static int tcp_broken_peer_syn_rxmit_thres = 7;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_broken_peer_syn_rxmit_thres, 0, "Number of retransmitted SYNs before "
     "TCP disables rfc1323 and rfc1644 during the rest of attempts");
 
+static int tcp_timer_advanced = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_timer_advanced, CTLFLAG_RD | CTLFLAG_LOCKED,
+    &tcp_timer_advanced, 0, "Number of times one of the timers was advanced");
+
+static int tcp_resched_timerlist = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_resched_timerlist, CTLFLAG_RD | CTLFLAG_LOCKED,
+    &tcp_resched_timerlist, 0, 
+    "Number of times timer list was rescheduled as part of processing a packet");
+
 int	tcp_pmtud_black_hole_detect = 1 ;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_pmtud_black_hole_detect, 0, "Path MTU Discovery Black Hole Detection");
 
 int	tcp_pmtud_black_hole_mss = 1200 ;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_pmtud_black_hole_mss, 0, "Path MTU Discovery Black Hole Detection lowered MSS");
 
 static int	tcp_keepcnt = TCPTV_KEEPCNT;
@@ -182,28 +219,68 @@ int	tcp_maxpersistidle;
 	/* max idle time in persist */
 int	tcp_maxidle;
 
+/* TCP delack timer is set to 100 ms. Since the processing of timer list in fast
+ * mode will happen no faster than 100 ms, the delayed ack timer will fire some where 
+ * between 100 and 200 ms.
+ */
+int	tcp_delack = TCP_RETRANSHZ / 10;
+
 struct	inpcbhead	time_wait_slots[N_TIME_WAIT_SLOTS];
 int		cur_tw_slot = 0;
 
-u_int32_t		*delack_bitmask;
+/* tcp timer list */
+struct tcptimerlist tcp_timer_list;
 
-void	add_to_time_wait_locked(struct tcpcb *tp);
-void	add_to_time_wait(struct tcpcb *tp) ;
+/* The frequency of running through the TCP timer list in 
+ * fast and slow mode can be configured.
+ */
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, timer_fastquantum, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&tcp_timer_list.fast_quantum, TCP_FASTTIMER_QUANTUM, 
+	"Frequency of running timer list in fast mode");
+
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, timer_slowquantum, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&tcp_timer_list.slow_quantum, TCP_SLOWTIMER_QUANTUM, 
+	"Frequency of running timer list in slow mode");
+
+static void tcp_remove_timer(struct tcpcb *tp);
+static void tcp_sched_timerlist(uint32_t offset);
+static uint32_t tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index);
+static void tcp_sched_timers(struct tcpcb *tp);
+static inline void tcp_set_lotimer_index(struct tcpcb *);
+
+/* Macro to compare two timers. If there is a reset of the sign bit, it is 
+ * safe to assume that the timer has wrapped around. By doing signed comparision, 
+ * we take care of wrap around such that the value with the sign bit reset is 
+ * actually ahead of the other.
+ */
+
+static inline int32_t
+timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { 
+	return (int32_t)((t1 + toff1) - (t2 + toff2));
+};
+
+/* Returns true if the timer is on the timer list */
+#define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST)
+
+
+void	add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay);
+void	add_to_time_wait(struct tcpcb *tp, uint32_t delay) ;
 
 static void tcp_garbage_collect(struct inpcb *, int);
 
-void	add_to_time_wait_locked(struct tcpcb *tp) 
+void	add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay) 
 {
 	int		tw_slot;
-    struct inpcbinfo *pcbinfo	= &tcbinfo;
+	struct inpcbinfo *pcbinfo	= &tcbinfo;
+	uint32_t timer;
 
 	/* pcb list should be locked when we get here */	
 	lck_rw_assert(pcbinfo->mtx, LCK_RW_ASSERT_EXCLUSIVE);
 
 	LIST_REMOVE(tp->t_inpcb, inp_list);
 
-	if (tp->t_timer[TCPT_2MSL] <= 0) 
-	    tp->t_timer[TCPT_2MSL] = 1;
+	/* if (tp->t_timer[TCPT_2MSL] <= 0) 
+	    tp->t_timer[TCPT_2MSL] = 1; */
 
 	/*
 	 * Because we're pulling this pcb out of the main TCP pcb list,
@@ -211,19 +288,19 @@ void	add_to_time_wait_locked(struct tcpcb *tp)
 	 * higher timer granularity.
 	 */
 
-	tp->t_timer[TCPT_2MSL] = (tp->t_timer[TCPT_2MSL] / TCP_RETRANSHZ) * PR_SLOWHZ;
+	timer = (delay / TCP_RETRANSHZ) * PR_SLOWHZ;
 	tp->t_rcvtime = (tp->t_rcvtime / TCP_RETRANSHZ) * PR_SLOWHZ;
 
-	tp->t_rcvtime += tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1); 
+	tp->t_rcvtime += timer & (N_TIME_WAIT_SLOTS - 1); 
 
-	tw_slot = (tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot; 
+	tw_slot = (timer & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot; 
 	if (tw_slot >= N_TIME_WAIT_SLOTS)
 	    tw_slot -= N_TIME_WAIT_SLOTS;
 
 	LIST_INSERT_HEAD(&time_wait_slots[tw_slot], tp->t_inpcb, inp_list);
 }
 
-void	add_to_time_wait(struct tcpcb *tp) 
+void	add_to_time_wait(struct tcpcb *tp, uint32_t delay) 
 {
     	struct inpcbinfo *pcbinfo		= &tcbinfo;
 	
@@ -232,97 +309,10 @@ void	add_to_time_wait(struct tcpcb *tp)
 		lck_rw_lock_exclusive(pcbinfo->mtx);
 		tcp_lock(tp->t_inpcb->inp_socket, 0, 0);
 	}
-	add_to_time_wait_locked(tp);
+	add_to_time_wait_locked(tp, delay);
 	lck_rw_done(pcbinfo->mtx);
 }
 
-
-
-
-/*
- * Fast timeout routine for processing delayed acks
- */
-void
-tcp_fasttimo(void *arg)
-{
-#pragma unused(arg)
-    struct inpcb *inp;
-    register struct tcpcb *tp;
-    struct socket *so;
-#if TCPDEBUG
-    int ostate;
-#endif
-
-
-    struct inpcbinfo *pcbinfo	= &tcbinfo;
-
-    int delack_done = 0;
-
-    KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_START, 0,0,0,0,0);
-
-
-    lck_rw_lock_shared(pcbinfo->mtx);
-
-    /* Walk the list of valid tcpcbs and send ACKS on the ones with DELACK bit set */
-
-    LIST_FOREACH(inp, &tcb, inp_list) {
-
-	so = inp->inp_socket;
-
-	if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 
-		continue;
-
-	tcp_lock(so, 1, 0);
-
-	if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING)  && so->so_usecount == 1) {
-		tcp_unlock(so, 1, 0);
-		continue;
-	}
-
-	tp = intotcpcb(inp);
-
-	if (tp == 0 || tp->t_state == TCPS_LISTEN) {
-		tcp_unlock(so, 1, 0);
-		continue; 
-	}
-
-
-	/* Only run the retransmit timer in that case */
-	if (tp->t_timer[0] && --tp->t_timer[0] == 0) {
-		tp = tcp_timers(tp, 0);
-		if (tp == NULL)
-			goto tpgone;
-	}
-
-	/* TCP pcb  timers following the tcp_now clock rate */
-
-	tp->t_rcvtime++;
-	tp->t_starttime++;
-	if (tp->t_rtttime)
-		tp->t_rtttime++;	
-
-	/*
-	 * Process delayed acks (if enabled) according to PR_FASTHZ, not the retrans timer
-	 */
-
-	if (tcp_delack_enabled && (tcp_now % (TCP_RETRANSHZ/PR_FASTHZ)) && tp->t_flags & TF_DELACK) {
-		delack_done++;
-		tp->t_flags &= ~TF_DELACK;
-		tp->t_flags |= TF_ACKNOW;
-		tcpstat.tcps_delack++;
-		tp->t_unacksegs = 0;
-		(void) tcp_output(tp);
-	}
-tpgone:
-	tcp_unlock(so, 1, 0);
-    }
-    KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_done, 0, tcpstat.tcps_delack,0,0);
-    lck_rw_done(pcbinfo->mtx);
-
-    tcp_now++;
-    timeout(tcp_fasttimo, 0, hz/TCP_RETRANSHZ);
-}
-
 static void
 tcp_garbage_collect(struct inpcb *inp, int istimewait)
 {
@@ -339,12 +329,12 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait)
 	 * overflow sockets that are eligible for garbage collection have
 	 * their usecounts set to 1.
 	 */
-	if (so->so_usecount > 1 || !lck_mtx_try_lock_spin(inp->inpcb_mtx))
+	if (so->so_usecount > 1 || !lck_mtx_try_lock_spin(&inp->inpcb_mtx))
 		return;
 
 	/* Check again under the lock */
 	if (so->so_usecount > 1) {
-		lck_mtx_unlock(inp->inpcb_mtx);
+		lck_mtx_unlock(&inp->inpcb_mtx);
 		return;
 	}
 
@@ -365,7 +355,7 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait)
 
 		if (inp->inp_state != INPCB_STATE_DEAD) {
 			/* Become a regular mutex */
-			lck_mtx_convert_spin(inp->inpcb_mtx);
+			lck_mtx_convert_spin(&inp->inpcb_mtx);
 #if INET6
 			if (INP_CHECK_SOCKAF(so, AF_INET6))
 				in6_pcbdetach(inp);
@@ -374,10 +364,10 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait)
 			in_pcbdetach(inp);
 		}
 		so->so_usecount--;
-		lck_mtx_unlock(inp->inpcb_mtx);
+		lck_mtx_unlock(&inp->inpcb_mtx);
 		return;
 	} else if (inp->inp_wantcnt != WNT_STOPUSING) {
-		lck_mtx_unlock(inp->inpcb_mtx);
+		lck_mtx_unlock(&inp->inpcb_mtx);
 		return;
 	}
 
@@ -392,8 +382,10 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait)
 	 * socket is dropped at the end of tcp_input().
 	 */
 	if (so->so_usecount == 0) {
+		DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp,
+			struct tcpcb *, tp, int32_t, TCPS_CLOSED);
 		/* Become a regular mutex */
-		lck_mtx_convert_spin(inp->inpcb_mtx);
+		lck_mtx_convert_spin(&inp->inpcb_mtx);
 		if (inp->inp_state != INPCB_STATE_DEAD) {
 #if INET6
 			if (INP_CHECK_SOCKAF(so, AF_INET6))
@@ -404,20 +396,15 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait)
 		}
 		in_pcbdispose(inp);
 	} else {
-		lck_mtx_unlock(inp->inpcb_mtx);
+		lck_mtx_unlock(&inp->inpcb_mtx);
 	}
 }
 
-static int bg_cnt = 0;
-#define BG_COUNTER_MAX 3
-
 void
 tcp_slowtimo(void)
 {
 	struct inpcb *inp, *nxt;
 	struct tcpcb *tp;
-	struct socket *so;
-	int i;
 #if TCPDEBUG
 	int ostate;
 #endif
@@ -432,114 +419,14 @@ tcp_slowtimo(void)
 
 	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
 
-	lck_rw_lock_shared(pcbinfo->mtx);
-
-	bg_cnt++;
-
-    	LIST_FOREACH(inp, &tcb, inp_list) {
-
-		so = inp->inp_socket;
+	/* Update tcp_now here as it may get used while processing the slow timer */
+	calculate_tcp_clock();
 
-		if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) 
-			continue;
-
-		tcp_lock(so, 1, 0);
-
-		if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING)  && so->so_usecount == 1) {
-			tcp_unlock(so, 1, 0);
-			continue;
-		}
-		tp = intotcpcb(inp);
-		if (tp == 0 || tp->t_state == TCPS_LISTEN) {
-			tcp_unlock(so, 1, 0);
-			continue; 
-		}
-
-		tp = intotcpcb(inp);
-
-		if (tp == 0 || tp->t_state == TCPS_LISTEN) 
-			goto tpgone;
-
-#if TRAFFIC_MGT
-	        if (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_REGULATE && 
-	        	bg_cnt > BG_COUNTER_MAX) {
-			u_int32_t	curr_recvtotal = tcpstat.tcps_rcvtotal;
-			u_int32_t	curr_bg_recvtotal = tcpstat.tcps_bg_rcvtotal;
-			u_int32_t	bg_recvdiff = curr_bg_recvtotal - tp->bg_recv_snapshot;
-			u_int32_t	tot_recvdiff = curr_recvtotal - tp->tot_recv_snapshot;
-			u_int32_t	fg_recv_change = tot_recvdiff - bg_recvdiff;
-			u_int32_t	recv_change;
-			
-			if (!(so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BG_SUPPRESSED)) {
-				if (tot_recvdiff) 
-					recv_change = (fg_recv_change * 100) / tot_recvdiff;
-				else 
-					recv_change = 0;
-
-				if (recv_change > background_io_trigger) {
-					socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BG_SUPPRESSED);
-				}
-				
-				tp->tot_recv_snapshot = curr_recvtotal;
-				tp->bg_recv_snapshot = curr_bg_recvtotal;
-			}
-			else {	// SUPPRESSED
-				// this allows for bg traffic to subside before we start measuring total traffic change
-				if (tot_recvdiff)
-					recv_change = (bg_recvdiff * 100) / tot_recvdiff;
-				else
-					recv_change = 0;
-					
-				if (recv_change < background_io_trigger) {
-					// Draconian for now: if there is any change at all, keep suppressed
-					if (!tot_recvdiff) {
-						socket_clear_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BG_SUPPRESSED);
-						tp->t_unacksegs = 0;
-						(void) tcp_output(tp);	// open window
-					}
-				}
-
-				tp->tot_recv_snapshot = curr_recvtotal;
-				tp->bg_recv_snapshot = curr_bg_recvtotal;
-			}
-		}
-#endif /* TRAFFIC_MGT */
-
-		for (i = 1; i < TCPT_NTIMERS; i++) {
-			if (tp->t_timer[i] != 0) {
-				tp->t_timer[i] -= TCP_RETRANSHZ/PR_SLOWHZ;
-			       	if (tp->t_timer[i] <=  0) {
-#if TCPDEBUG
-					ostate = tp->t_state;
-#endif
-
-					tp->t_timer[i] = 0; /* account for granularity change between tcp_now and slowtimo */
-					tp = tcp_timers(tp, i);
-					if (tp == NULL)
-						goto tpgone;
-#if TCPDEBUG
-					if (tp->t_inpcb->inp_socket->so_options
-					    & SO_DEBUG)
-						tcp_trace(TA_USER, ostate, tp,
-							  (void *)0,
-							  (struct tcphdr *)0,
-							  PRU_SLOWTIMO);
-#endif
-				}
-			}
-		}
-tpgone:
-		tcp_unlock(so, 1, 0);
-	}
-	
-	if (bg_cnt > 3) 
-		bg_cnt = 0;
-
-	/* Second part of tcp_slowtimo: garbage collect socket/tcpcb
-	 * We need to acquire the list lock exclusively to do this
+	/* Garbage collect socket/tcpcb: We need to acquire the list lock 
+	 * exclusively to do this
 	 */
 
-	if (lck_rw_lock_shared_to_exclusive(pcbinfo->mtx) == FALSE) {
+	if (lck_rw_try_lock_exclusive(pcbinfo->mtx) == FALSE) {
 		if (tcp_gc_done == TRUE) {	/* don't sweat it this time. cleanup was done last time */
 			tcp_gc_done = FALSE;
 			KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0);
@@ -617,8 +504,11 @@ tcp_canceltimers(tp)
 {
 	register int i;
 
+	tcp_remove_timer(tp);
 	for (i = 0; i < TCPT_NTIMERS; i++)
 		tp->t_timer[i] = 0;
+	tp->tentry.timer_start = tcp_now;
+	tp->tentry.index = TCPT_NONE;
 }
 
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
@@ -641,6 +531,7 @@ tcp_timers(tp, timer)
 	struct socket *so_tmp;
 	struct tcptemp *t_template;
 	int optlen = 0;
+	int idle_time = 0;
 
 #if TCPDEBUG
 	int ostate;
@@ -651,6 +542,7 @@ tcp_timers(tp, timer)
 #endif /* INET6 */
 
 	so_tmp = tp->t_inpcb->inp_socket;
+	idle_time = tcp_now - tp->t_rcvtime;
 
 	switch (timer) {
 
@@ -666,8 +558,8 @@ tcp_timers(tp, timer)
 		tcp_free_sackholes(tp);
 		if (tp->t_state != TCPS_TIME_WAIT &&
 		    tp->t_state != TCPS_FIN_WAIT_2 &&
-		    tp->t_rcvtime < tcp_maxidle) {
-			tp->t_timer[TCPT_2MSL] = (u_int32_t)tcp_keepintvl;
+		    ((idle_time > 0) && (idle_time < tcp_maxidle))) {
+			tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, (u_int32_t)tcp_keepintvl);
 		}
 		else {
 			tp = tcp_close(tp);
@@ -682,9 +574,26 @@ tcp_timers(tp, timer)
 	 */
 	case TCPT_REXMT:
 		tcp_free_sackholes(tp);
-		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+		/* Drop a connection in the retransmit timer
+		 * 1. If we have retransmitted more than TCP_MAXRXTSHIFT times
+		 * 2. If the time spent in this retransmission episode is more than
+		 *    the time limit set with TCP_RXT_CONNDROPTIME socket option
+		 * 3. If TCP_RXT_FINDROP socket option was set and we have already
+		 *    retransmitted the FIN 3 times without receiving an ack
+		 */
+		if (++tp->t_rxtshift > TCP_MAXRXTSHIFT ||
+			(tp->rxt_conndroptime > 0 && tp->rxt_start > 0 && 
+			(tcp_now - tp->rxt_start) >= tp->rxt_conndroptime) ||
+			((tp->t_flagsext & TF_RXTFINDROP) != 0 &&
+			(tp->t_flags & TF_SENTFIN) != 0 &&
+			tp->t_rxtshift >= 4)) {
+
+			if ((tp->t_flagsext & TF_RXTFINDROP) != 0) {
+				tcpstat.tcps_rxtfindrop++;
+			} else {
+				tcpstat.tcps_timeoutdrop++;
+			}
 			tp->t_rxtshift = TCP_MAXRXTSHIFT;
-			tcpstat.tcps_timeoutdrop++;
 			tp = tcp_drop(tp, tp->t_softerror ?
 			    tp->t_softerror : ETIMEDOUT);
 			postevent(so_tmp, 0, EV_TIMEOUT);			
@@ -709,6 +618,11 @@ tcp_timers(tp, timer)
 			else
 				  tp->t_flags &= ~TF_WASFRECOVERY;
 			tp->t_badrxtwin = tcp_now  + (tp->t_srtt >> (TCP_RTT_SHIFT)); 
+
+			/* Set the time at which retransmission on this 
+			 * connection started
+			 */
+			tp->rxt_start = tcp_now;
 		}
 		tcpstat.tcps_rexmttimeo++;
 		if (tp->t_state == TCPS_SYN_SENT)
@@ -716,8 +630,9 @@ tcp_timers(tp, timer)
 		else
 			rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 		TCPT_RANGESET(tp->t_rxtcur, rexmt,
-			tp->t_rttmin, TCPTV_REXMTMAX);
-		tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+			tp->t_rttmin, TCPTV_REXMTMAX, 
+			TCP_ADD_REXMTSLOP(tp));
+		tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
 
 		/*
 		 * Check for potential Path MTU Discovery Black Hole 
@@ -745,6 +660,12 @@ tcp_timers(tp, timer)
 							tcp_mssdflt;
 				}
 				tp->t_maxseg = tp->t_maxopd - optlen;
+
+				/*
+	 			 * Reset the slow-start flight size as it may depends on the new MSS
+	 			 */
+				if (CC_ALGO(tp)->cwnd_init != NULL)
+					CC_ALGO(tp)->cwnd_init(tp);
 			}
 			/*
 			 * If further retransmissions are still unsuccessful with a lowered MTU,
@@ -759,6 +680,11 @@ tcp_timers(tp, timer)
 					optlen = tp->t_maxopd - tp->t_maxseg;
 					tp->t_maxopd = tp->t_pmtud_saved_maxopd;
 					tp->t_maxseg = tp->t_maxopd - optlen;
+					/*
+	 			 	* Reset the slow-start flight size as it may depends on the new MSS
+	 			 	*/
+					if (CC_ALGO(tp)->cwnd_init != NULL)
+						CC_ALGO(tp)->cwnd_init(tp);
 				}
 			}
 		}
@@ -806,41 +732,17 @@ tcp_timers(tp, timer)
 		 * If timing a segment in this window, stop the timer.
 		 */
 		tp->t_rtttime = 0;
-		/*
-		 * Close the congestion window down to one segment
-		 * (we'll open it by one segment for each ack we get).
-		 * Since we probably have a window's worth of unacked
-		 * data accumulated, this "slow start" keeps us from
-		 * dumping all that data as back-to-back packets (which
-		 * might overwhelm an intermediate gateway).
-		 *
-		 * There are two phases to the opening: Initially we
-		 * open by one mss on each ack.  This makes the window
-		 * size increase exponentially with time.  If the
-		 * window is larger than the path can handle, this
-		 * exponential growth results in dropped packet(s)
-		 * almost immediately.  To get more time between
-		 * drops but still "push" the network to take advantage
-		 * of improving conditions, we switch from exponential
-		 * to linear window opening at some threshhold size.
-		 * For a threshhold, we use half the current window
-		 * size, truncated to a multiple of the mss.
-		 *
-		 * (the minimum cwnd that will give us exponential
-		 * growth is 2 mss.  We don't allow the threshhold
-		 * to go below this.)
-		 */
-		if (tp->t_state >=  TCPS_ESTABLISHED) {
-			u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
-			if (win < 2)
-				win = 2;
-			tp->snd_cwnd = tp->t_maxseg;
-			tp->snd_ssthresh = win * tp->t_maxseg;
-			tp->t_bytes_acked = 0;
-			tp->t_dupacks = 0;
-			tp->t_unacksegs = 0;
-		}
+
+		if (CC_ALGO(tp)->after_timeout != NULL)
+			CC_ALGO(tp)->after_timeout(tp);
+
+		tp->t_dupacks = 0;
 		EXIT_FASTRECOVERY(tp);
+
+		DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb,
+			struct tcpcb *, tp, struct tcphdr *, NULL,
+			int32_t, TCP_CC_REXMT_TIMEOUT);
+
 		(void) tcp_output(tp);
 		break;
 
@@ -856,10 +758,15 @@ tcp_timers(tp, timer)
 		 * backoff, drop the connection if the idle time
 		 * (no responses to probes) reaches the maximum
 		 * backoff that we would use if retransmitting.
+		 * 
+		 * Drop the connection if we reached the maximum allowed time for 
+		 * Zero Window Probes without a non-zero update from the peer. 
+		 * See rdar://5805356
 		 */
-		if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
-		    (tp->t_rcvtime >= tcp_maxpersistidle ||
-		    tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
+		if ((tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+		    (idle_time >= tcp_maxpersistidle ||
+		    idle_time >= TCP_REXMTVAL(tp) * tcp_totbackoff)) || 
+		    ((tp->t_persist_stop != 0) && (tp->t_persist_stop <= tcp_now))) {
 			tcpstat.tcps_persistdrop++;
 			so_tmp = tp->t_inpcb->inp_socket;
 			tp = tcp_drop(tp, ETIMEDOUT);
@@ -868,7 +775,6 @@ tcp_timers(tp, timer)
 		}
 		tcp_setpersist(tp);
 		tp->t_force = 1;
-		tp->t_unacksegs = 0;
 		(void) tcp_output(tp);
 		tp->t_force = 0;
 		break;
@@ -884,7 +790,7 @@ tcp_timers(tp, timer)
 		if ((always_keepalive ||
 		    tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
 		    (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) {
-		    	if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (u_int32_t)tcp_maxidle)
+		    	if (idle_time >= TCP_KEEPIDLE(tp) + (u_int32_t)tcp_maxidle)
 				goto dropit;
 			/*
 			 * Send a packet designed to force a response
@@ -901,21 +807,45 @@ tcp_timers(tp, timer)
 			tcpstat.tcps_keepprobe++;
 			t_template = tcp_maketemplate(tp);
 			if (t_template) {
-				unsigned int ifscope;
+				unsigned int ifscope, nocell = 0;
 
 				if (tp->t_inpcb->inp_flags & INP_BOUND_IF)
 					ifscope = tp->t_inpcb->inp_boundif;
 				else
 					ifscope = IFSCOPE_NONE;
 
+				/*
+				 * If the socket isn't allowed to use the
+				 * cellular interface, indicate it as such.
+				 */
+				if (tp->t_inpcb->inp_flags & INP_NO_IFT_CELLULAR)
+					nocell = 1;
+
 				tcp_respond(tp, t_template->tt_ipgen,
 				    &t_template->tt_t, (struct mbuf *)NULL,
-				    tp->rcv_nxt, tp->snd_una - 1, 0, ifscope);
+				    tp->rcv_nxt, tp->snd_una - 1, 0, ifscope,
+				    nocell);
 				(void) m_free(dtom(t_template));
 			}
-			tp->t_timer[TCPT_KEEP] = tcp_keepintvl;
+			tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, tcp_keepintvl);
 		} else
-			tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
+			tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp));
+		break;
+	case TCPT_DELACK:
+		if (tcp_delack_enabled && (tp->t_flags & TF_DELACK)) {
+			tp->t_flags &= ~TF_DELACK;
+			tp->t_timer[TCPT_DELACK] = 0;
+			tp->t_flags |= TF_ACKNOW;
+
+			/* If delayed ack timer fired while we are stretching acks, 
+			 * go back to acking every other packet
+			 */
+			if ((tp->t_flags & TF_STRETCHACK) != 0)
+				tcp_reset_stretch_ack(tp);
+
+			tcpstat.tcps_delack++;
+			(void) tcp_output(tp);
+		}
 		break;
 
 #if TCPDEBUG
@@ -931,3 +861,462 @@ tcp_timers(tp, timer)
 	}
 	return (tp);
 }
+
+/* Remove a timer entry from timer list */
+void
+tcp_remove_timer(struct tcpcb *tp)
+{
+	struct tcptimerlist *listp = &tcp_timer_list;
+
+	lck_mtx_assert(&tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+	if (!(TIMER_IS_ON_LIST(tp))) {
+		return;
+	}
+	lck_mtx_lock(listp->mtx);
+	
+	/* Check if pcb is on timer list again after acquiring the lock */
+	if (!(TIMER_IS_ON_LIST(tp))) {
+		lck_mtx_unlock(listp->mtx);
+		return;
+	}
+	
+	if (listp->next_te != NULL && listp->next_te == &tp->tentry)
+		listp->next_te = LIST_NEXT(&tp->tentry, le);
+
+	LIST_REMOVE(&tp->tentry, le);
+	tp->t_flags &= ~(TF_TIMER_ONLIST);
+
+	listp->entries--;
+	lck_mtx_unlock(listp->mtx);
+
+	tp->tentry.le.le_next = NULL;
+	tp->tentry.le.le_prev = NULL;
+}
+
+/* Function to check if the timerlist needs to be rescheduled to run
+ * the timer entry correctly. Basically, this is to check if we can avoid
+ * taking the list lock.
+ */
+
+static boolean_t
+need_to_resched_timerlist(uint32_t runtime, uint16_t index) {
+	struct tcptimerlist *listp = &tcp_timer_list;
+	int32_t diff;
+	boolean_t is_fast;
+
+	if (runtime == 0 || index == TCPT_NONE)
+		return FALSE;
+	is_fast = !(IS_TIMER_SLOW(index));
+
+	/* If the list is being processed then the state of the list is in flux.
+	 * In this case always acquire the lock and set the state correctly.
+	 */
+	if (listp->running) {
+		return TRUE;
+	}
+
+	diff = timer_diff(listp->runtime, 0, runtime, 0);
+	if (diff <= 0) {
+		/* The list is going to run before this timer */
+		return FALSE;
+	} else {
+		if (is_fast) {
+			if (diff <= listp->fast_quantum)
+				return FALSE;
+		} else {
+			if (diff <= listp->slow_quantum)
+				return FALSE;
+		}
+	}
+	return TRUE;
+}
+
+void
+tcp_sched_timerlist(uint32_t offset) 
+{
+
+	uint64_t deadline = 0;
+	struct tcptimerlist *listp = &tcp_timer_list;
+
+	lck_mtx_assert(listp->mtx, LCK_MTX_ASSERT_OWNED);
+
+	listp->runtime = tcp_now + offset;
+
+	clock_interval_to_deadline(offset, NSEC_PER_SEC / TCP_RETRANSHZ,
+		&deadline);
+
+	thread_call_enter_delayed(listp->call, deadline);
+}
+
+/* Function to run the timers for a connection.
+ *
+ * Returns the offset of next timer to be run for this connection which 
+ * can be used to reschedule the timerlist.
+ */
+uint32_t
+tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index) {
+
+        struct socket *so;
+        uint16_t i = 0, index = TCPT_NONE, lo_index = TCPT_NONE;
+        uint32_t timer_val, offset = 0, lo_timer = 0;
+	int32_t diff;
+	boolean_t needtorun[TCPT_NTIMERS];
+	int count = 0;
+
+        VERIFY(tp != NULL);
+        bzero(needtorun, sizeof(needtorun));
+
+        tcp_lock(tp->t_inpcb->inp_socket, 1, 0);
+
+        so = tp->t_inpcb->inp_socket;
+	/* Release the want count on inp */ 
+	if (in_pcb_checkstate(tp->t_inpcb, WNT_RELEASE, 1) == WNT_STOPUSING) {
+		if (TIMER_IS_ON_LIST(tp)) {
+			tcp_remove_timer(tp);
+		}
+
+		/* Looks like the TCP connection got closed while we 
+		 * were waiting for the lock.. Done
+		 */
+		goto done;
+	}
+
+        /* Since the timer thread needs to wait for tcp lock, it may race
+         * with another thread that can cancel or reschedule the timer that is
+         * about to run. Check if we need to run anything.
+         */
+	index = tp->tentry.index;
+	timer_val = tp->t_timer[index];
+
+        if (index == TCPT_NONE || tp->tentry.runtime == 0) 
+		goto done;
+
+	diff = timer_diff(tp->tentry.runtime, 0, tcp_now, 0);
+	if (diff > 0) {
+		if (tp->tentry.index != TCPT_NONE) {
+			offset = diff;
+			*(next_index) = tp->tentry.index;
+		}
+		goto done;
+	}
+
+	tp->t_timer[index] = 0;
+	if (timer_val > 0) {
+		tp = tcp_timers(tp, index);
+		if (tp == NULL) 
+			goto done;
+	}
+	
+	/* Check if there are any other timers that need to be run. While doing it,
+	 * adjust the timer values wrt tcp_now.
+	 */
+	for (i = 0; i < TCPT_NTIMERS; ++i) {
+		if (tp->t_timer[i] != 0) {
+			diff = timer_diff(tp->tentry.timer_start, tp->t_timer[i], tcp_now, 0);
+			if (diff <= 0) {
+				tp->t_timer[i] = 0;
+				needtorun[i] = TRUE;
+				count++;
+			} else {
+				tp->t_timer[i] = diff;
+				needtorun[i] = FALSE;
+				if (lo_timer == 0 || diff < lo_timer) {
+					lo_timer = diff;
+					lo_index = i;
+				}
+			}
+		}
+	}
+	
+	tp->tentry.timer_start = tcp_now;
+	tp->tentry.index = lo_index;
+	if (lo_index != TCPT_NONE) {
+		tp->tentry.runtime = tp->tentry.timer_start + tp->t_timer[lo_index];
+	} else {
+		tp->tentry.runtime = 0;
+	}
+
+	if (count > 0) {
+		/* run any other timers that are also outstanding at this time. */
+		for (i = 0; i < TCPT_NTIMERS; ++i) {
+			if (needtorun[i]) {
+				tp->t_timer[i] = 0;
+				tp = tcp_timers(tp, i);
+				if (tp == NULL) 
+					goto done;
+			}
+		}
+		tcp_set_lotimer_index(tp);
+	}
+
+	if (tp->tentry.index < TCPT_NONE) {
+		offset = tp->t_timer[tp->tentry.index];
+		*(next_index) = tp->tentry.index;
+	}
+
+done:
+	if (tp != NULL && tp->tentry.index == TCPT_NONE) {
+		tcp_remove_timer(tp);
+	}
+        tcp_unlock(so, 1, 0);
+        return offset;
+}
+
+void
+tcp_run_timerlist(void * arg1, void * arg2) {
+
+#pragma unused(arg1, arg2)
+	
+	struct tcptimerentry *te, *next_te;
+	struct tcptimerlist *listp = &tcp_timer_list;
+	struct tcpcb *tp;
+	uint32_t next_timer = 0;
+	uint16_t index = TCPT_NONE;
+	boolean_t need_fast = FALSE;
+	uint32_t active_count = 0;
+	uint32_t mode = TCP_TIMERLIST_FASTMODE;
+
+	calculate_tcp_clock();
+
+	lck_mtx_lock(listp->mtx);
+
+	listp->running = TRUE;
+	
+	LIST_FOREACH_SAFE(te, &listp->lhead, le, next_te) {
+		uint32_t offset = 0;
+		uint32_t runtime = te->runtime;
+		if (TSTMP_GT(runtime, tcp_now)) {
+			offset = timer_diff(runtime, 0, tcp_now, 0);
+			if (next_timer == 0 || offset < next_timer) {
+				next_timer = offset;
+			}
+			continue;
+		}
+		active_count++;
+
+		tp = TIMERENTRY_TO_TP(te);
+
+		/* Acquire an inp wantcnt on the inpcb so that the socket won't get
+		 * detached even if tcp_close is called
+		 */
+		if (in_pcb_checkstate(tp->t_inpcb, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
+			/* Some how this pcb went into dead state while on the timer list,
+			 * just take it off the list. Since the timer list entry pointers 
+			 * are protected by the timer list lock, we can do it here
+			 */
+			if (TIMER_IS_ON_LIST(tp)) {
+				tp->t_flags &= ~(TF_TIMER_ONLIST);
+				LIST_REMOVE(&tp->tentry, le);
+				listp->entries--;
+
+				tp->tentry.le.le_next = NULL;
+				tp->tentry.le.le_prev = NULL;
+			}
+			continue;
+		}
+
+		/* Store the next timerentry pointer before releasing the list lock.
+		 * If that entry has to be removed when we release the lock, this
+		 * pointer will be updated to the element after that.
+		 */
+		listp->next_te = next_te; 
+
+		VERIFY_NEXT_LINK(&tp->tentry, le);
+		VERIFY_PREV_LINK(&tp->tentry, le);
+
+		lck_mtx_unlock(listp->mtx);
+
+		index = TCPT_NONE;
+		offset = tcp_run_conn_timer(tp, &index);
+		
+		lck_mtx_lock(listp->mtx);
+
+		next_te = listp->next_te;
+		listp->next_te = NULL;
+
+		if (offset > 0) {
+			if (index < TCPT_NONE) {
+				/* Check if this is a fast_timer. */
+				if (!need_fast && !(IS_TIMER_SLOW(index))) {
+					need_fast = TRUE;
+				}
+
+				if (next_timer == 0 || offset < next_timer) {
+					next_timer = offset;
+				}
+			}
+		}
+	}
+
+	if (!LIST_EMPTY(&listp->lhead)) {
+		if (listp->mode == TCP_TIMERLIST_FASTMODE) {
+			if (need_fast || active_count > 0 || 
+				listp->pref_mode == TCP_TIMERLIST_FASTMODE) {
+				listp->idlegen = 0;
+			} else {
+				listp->idlegen++;
+				if (listp->idlegen > timer_fastmode_idlemax) {
+					mode = TCP_TIMERLIST_SLOWMODE;
+					listp->idlegen = 0;
+				}
+			}
+		} else {
+			if (!need_fast) {
+				mode = TCP_TIMERLIST_SLOWMODE;
+			}
+		}
+
+		if (mode == TCP_TIMERLIST_FASTMODE || 
+			listp->pref_mode == TCP_TIMERLIST_FASTMODE) {
+			next_timer = listp->fast_quantum;
+		} else {
+			if (listp->pref_offset != 0 && 
+				listp->pref_offset < next_timer)
+				next_timer = listp->pref_offset;
+			if (next_timer < listp->slow_quantum)
+				next_timer = listp->slow_quantum;
+		}
+
+		listp->mode = mode;
+
+		tcp_sched_timerlist(next_timer);
+	} else {
+		/* No need to reschedule this timer */
+		listp->runtime = 0;
+	}
+
+	listp->running = FALSE;
+	listp->pref_mode = 0;
+	listp->pref_offset = 0;
+
+	lck_mtx_unlock(listp->mtx);
+}
+
+/* Function to verify if a change in timer state is required for a connection */
+void 
+tcp_sched_timers(struct tcpcb *tp) 
+{
+	struct tcptimerentry *te = &tp->tentry;
+	uint16_t index = te->index;
+	struct tcptimerlist *listp = &tcp_timer_list;
+	uint32_t offset = 0;
+	boolean_t is_fast;
+	int list_locked = 0;
+
+	if (tp->t_inpcb->inp_state == INPCB_STATE_DEAD) {
+		/* Just return without adding the dead pcb to the list */
+		if (TIMER_IS_ON_LIST(tp)) {
+			tcp_remove_timer(tp);
+		}
+		return;
+	}
+
+	if (index == TCPT_NONE) {
+		tcp_remove_timer(tp);
+		return;
+	}
+
+	is_fast = !(IS_TIMER_SLOW(index));
+	offset = te->runtime - tcp_now;
+	if (offset == 0) {
+		offset = 1;
+		tcp_timer_advanced++;
+	}
+	if (is_fast)
+		offset = listp->fast_quantum;
+
+	if (!TIMER_IS_ON_LIST(tp)) {
+		if (!list_locked) {
+			lck_mtx_lock(listp->mtx);
+			list_locked = 1;
+		}
+
+		LIST_INSERT_HEAD(&listp->lhead, te, le);
+		tp->t_flags |= TF_TIMER_ONLIST;
+
+        	listp->entries++;
+        	if (listp->entries > listp->maxentries)
+                	listp->maxentries = listp->entries;
+
+		/* if the list is not scheduled, just schedule it */
+		if (listp->runtime == 0)
+			goto schedule;
+
+	}
+
+
+	/* timer entry is currently on the list */
+	if (need_to_resched_timerlist(te->runtime, index)) {
+		tcp_resched_timerlist++;
+	
+		if (!list_locked) {
+			lck_mtx_lock(listp->mtx);
+			list_locked = 1;
+		}
+
+		VERIFY_NEXT_LINK(te, le);
+		VERIFY_PREV_LINK(te, le);
+
+		if (listp->running) {
+			if (is_fast) {
+				listp->pref_mode = TCP_TIMERLIST_FASTMODE;
+			} else if (listp->pref_offset == 0 ||
+				((int)offset) < listp->pref_offset) {
+				listp->pref_offset = offset;
+			}
+		} else {
+			int32_t diff;
+			diff = timer_diff(listp->runtime, 0, tcp_now, offset);
+			if (diff <= 0) {
+				/* The list is going to run before this timer */
+				goto done;
+			} else {
+				goto schedule;
+			}
+		}
+	}
+	goto done;
+
+schedule:
+	if (is_fast) {
+		listp->mode = TCP_TIMERLIST_FASTMODE;
+		listp->idlegen = 0;
+	}
+	tcp_sched_timerlist(offset);
+
+done:
+	if (list_locked)
+		lck_mtx_unlock(listp->mtx);
+
+	return;
+}
+		
+void
+tcp_set_lotimer_index(struct tcpcb *tp) {
+	uint16_t i, lo_index = TCPT_NONE;
+	uint32_t lo_timer = 0;
+	for (i = 0; i < TCPT_NTIMERS; ++i) {
+		if (tp->t_timer[i] != 0 &&
+			(lo_timer == 0 || tp->t_timer[i] < lo_timer)) {
+			lo_timer = tp->t_timer[i];
+			lo_index = i;
+		}
+	}
+	tp->tentry.index = lo_index;
+	if (lo_index != TCPT_NONE) {
+		tp->tentry.runtime = tp->tentry.timer_start + tp->t_timer[lo_index];
+	} else {
+		tp->tentry.runtime = 0;
+	}
+}
+
+void
+tcp_check_timer_state(struct tcpcb *tp) {
+
+	lck_mtx_assert(&tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+	tcp_set_lotimer_index(tp);
+
+	tcp_sched_timers(tp);
+	return;
+}
diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h
index c4ea59c6b..df1162053 100644
--- a/bsd/netinet/tcp_timer.h
+++ b/bsd/netinet/tcp_timer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,16 +65,25 @@
 #define _NETINET_TCP_TIMER_H_
 #include <sys/appleapiopts.h>
 
+#ifdef KERNEL
+#include <kern/thread_call.h>
+#endif /* KERNEL */
+
 /*
- * Definitions of the TCP timers.  These timers are counted
- * down PR_SLOWHZ times a second.
+ * Definitions of the TCP timers.
  */
-#define	TCPT_NTIMERS	4
+#define	TCPT_NTIMERS	5
+
+/* Keep the external definition the same for binary compatibility */
+#define TCPT_NTIMERS_EXT	4
 
 #define	TCPT_REXMT	0		/* retransmit */
 #define	TCPT_PERSIST	1		/* retransmit persistence */
 #define	TCPT_KEEP	2		/* keep alive */
 #define	TCPT_2MSL	3		/* 2*msl quiet time timer */
+#define	TCPT_DELACK	4		/* delayed ack timer */
+#define	TCPT_MAX	4
+#define	TCPT_NONE	(TCPT_MAX + 1)	
 
 /*
  * The TCPT_REXMT timer is used to force retransmissions.
@@ -119,7 +128,7 @@
  */
 #define	TCPTV_MSL	( 15*TCP_RETRANSHZ)		/* max seg lifetime (hah!) */
 #define	TCPTV_SRTTBASE	0			/* base roundtrip time;
-						   if 0, no idea yet */
+						   if  0, no idea yet */
 #define	TCPTV_RTOBASE	(  1*TCP_RETRANSHZ)		/* assumed RTO if no info */
 #define	TCPTV_SRTTDFLT	(  1*TCP_RETRANSHZ)		/* assumed RTT if no info */
 
@@ -131,9 +140,18 @@
 #define	TCPTV_KEEPINTVL	( 75*TCP_RETRANSHZ)		/* default probe interval */
 #define	TCPTV_KEEPCNT	8			/* max probes before drop */
 
-//#define	TCPTV_MIN	(  3*TCP_RETRANSHZ)		/* minimum allowable value */
-#define	TCPTV_MIN	(1) 	/* minimum allowable value */
-#define	TCPTV_REXMTMAX	( 64*TCP_RETRANSHZ)		/* max allowable REXMT value */
+#define	TCPTV_REXMTMAX	( 64*TCP_RETRANSHZ )	/* max allowable REXMT value */
+#define	TCPTV_REXMTMIN	( TCP_RETRANSHZ/33 )	/* min REXMT for non-local connections */
+#define TCPTV_UNACKWIN	( TCP_RETRANSHZ/10 )	/* Window for counting rcv bytes to see if 
+						   ack-stretching can start (default 100 ms) */
+#define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) 	/* Receiver idle time, avoid ack-stretching after that*/
+
+/* No ack stretching during slow-start, until we see some packets.
+ * By the time the receiver gets 512 packets, the senders cwnd 
+ * should open by a few hundred packets considering the progression
+ * during slow-start.
+ */
+#define TCP_RCV_SS_PKTCOUNT     512
 
 #define TCPTV_TWTRUNC	8			/* RTO factor to truncate TW */
 
@@ -143,15 +161,81 @@
 
 #ifdef	TCPTIMERS
 static char *tcptimers[] =
-    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+    { "REXMT", "PERSIST", "KEEP", "2MSL" , "DELACK"};
 #endif
 
 #ifdef KERNEL
+
+/* We consider persist, keep and 2msl as slow timers which can be coalesced
+ * at a higher granularity (500 ms). Rexmt and delayed ack are considered fast
+ * timers which fire in the order of 100ms.
+ *
+ * The following conditional is to check if a timer is one of the slow timers. This 
+ * is fast and works well for now. If we add more slow timers for any reason, 
+ * we may need to change this.
+ */
+#define IS_TIMER_SLOW(ind) ((ind & 0x3) != 0)
+
+struct tcptimerlist;
+
+struct tcptimerentry {
+        LIST_ENTRY(tcptimerentry) le;	/* links for timer list */ 
+        uint32_t timer_start;		/* tcp clock when the timer was started */
+	uint16_t index;			/* index of lowest timer that needs to run first */
+	uint32_t runtime;		/* deadline at which the first timer has to fire */
+};
+
+LIST_HEAD(timerlisthead, tcptimerentry);
+
+struct tcptimerlist {
+	struct timerlisthead lhead;	/* head of the list of timer entries */
+	lck_mtx_t *mtx;			/* lock to protect the list */
+	lck_attr_t *mtx_attr;		/* mutex attributes */
+	lck_grp_t *mtx_grp;		/* mutex group definition */
+	lck_grp_attr_t *mtx_grp_attr;	/* mutex group attributes */
+	uint32_t fast_quantum;		/* minimum time quantum to coalesce fast timers */
+	uint32_t slow_quantum;		/* minimum time quantum to coalesce slow timers */
+	thread_call_t call;		/* call entry */
+	uint32_t runtime;		/* time at which this list is going to run */
+	uint32_t entries;		/* Number of entries on the list */
+	uint32_t maxentries;		/* Max number of entries at any time */
+
+	/* Set desired mode when timer list running */
+	boolean_t running;		/* Set when timer list is being processed */
+#define TCP_TIMERLIST_FASTMODE 0x1
+#define TCP_TIMERLIST_SLOWMODE 0x2
+	uint32_t mode;			/* Current mode, fast or slow */
+	uint32_t pref_mode;		/* Preferred mode set by a connection, fast or slow */
+	uint32_t pref_offset;		/* Preferred offset set by a connection */
+	uint32_t idlegen;		/* Number of times the list has been idle in fast mode */
+	struct tcptimerentry *next_te;	/* Store the next timer entry pointer to process */
+
+};
+
+#define TCP_FASTMODE_IDLEGEN_MAX 20	/* Approximately 2 seconds */
+
 /*
- * Force a time value to be in a certain range.
+ * Minimum retransmit timeout is set to 30ms. We add a slop of 
+ * 200 ms to the retransmit value to account for processing 
+ * variance and delayed ack. This extra 200ms will help to avoid 
+ * spurious retransmits by taking into consideration the receivers 
+ * that wait for delayed ack timer instead of generating an ack 
+ * for every two packets.
+ *
+ * On a local link, the minimum retransmit timeout is 100ms and
+ * variance is set to 0. This will make the sender a little bit more
+ * aggressive on local link. When the connection is not established yet,
+ * there is no need to add an extra 200ms to retransmit timeout because
+ * the initial value is high (1s) and delayed ack is not a problem in 
+ * that case.
  */
-#define	TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
-	(tv) = (value); \
+#define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 )	/* rexmt slop allowed (200 ms) */
+
+/* macro to decide when retransmit slop (described above) should be added */
+#define TCP_ADD_REXMTSLOP(tp) ((tp->t_flags & TF_LOCAL) != 0 || tp->t_state >= TCPS_ESTABLISHED) 
+
+#define	TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \
+	(tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \
 	if ((uint32_t)(tv) < (uint32_t)(tvmin)) \
 		(tv) = (tvmin); \
 	else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \
@@ -166,16 +250,15 @@ extern int tcp_keepinit;		/* time to establish connection */
 extern int tcp_keepidle;		/* time before keepalive probes begin */
 extern int tcp_keepintvl;		/* time between keepalive probes */
 extern int tcp_maxidle;			/* time to drop after starting probes */
+extern int tcp_delack;			/* delayed ack timer */
 extern int tcp_maxpersistidle;
 extern int tcp_msl;
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern int tcp_backoff[];
+extern int tcp_rexmt_slop;
+extern u_int32_t tcp_max_persist_timeout;	/* Maximum persistence for Zero Window Probes */
 
-void	tcp_timer_2msl(void *xtp);
-void	tcp_timer_keep(void *xtp);
-void	tcp_timer_persist(void *xtp);
-void	tcp_timer_rexmt(void *xtp);
-void	tcp_timer_delack(void *xtp);
+#define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start)
 
 #endif /* KERNEL */
 #endif /* PRIVATE */
diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c
index d477b60d5..d4fddb517 100644
--- a/bsd/netinet/tcp_usrreq.c
+++ b/bsd/netinet/tcp_usrreq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,7 @@
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -105,6 +106,11 @@
 #include <netinet6/ipsec.h>
 #endif /*IPSEC*/
 
+void	tcp_fill_info(struct tcpcb *, struct tcp_info *);
+errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
+
+int tcp_sysctl_info(struct sysctl_oid *, void *, int , struct sysctl_req *);
+
 /*
  * TCP protocol interface to socket abstraction.
  */
@@ -121,26 +127,26 @@ static struct tcpcb *
 		tcp_usrclosed(struct tcpcb *);
 
 __private_extern__ int	tcp_win_scale = 3;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, win_scale_factor, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_win_scale, 0, "Window scaling factor");
 
 static u_int32_t tcps_in_sw_cksum;
-SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcps_in_sw_cksum, 0,
     "Number of received packets checksummed in software");
 
 static u_int64_t tcps_in_sw_cksum_bytes;
-SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD,
+SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcps_in_sw_cksum_bytes,
     "Amount of received data checksummed in software");
 
 static u_int32_t tcps_out_sw_cksum;
-SYSCTL_UINT(_net_inet_tcp, OID_AUTO, out_sw_cksum, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_tcp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcps_out_sw_cksum, 0,
     "Number of transmitted packets checksummed in software");
 
 static u_int64_t tcps_out_sw_cksum_bytes;
-SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD,
+SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
     &tcps_out_sw_cksum_bytes,
     "Amount of transmitted data checksummed in software");
 
@@ -160,9 +166,13 @@ __private_extern__ unsigned int	tcp_sockthreshold = 64;
 #else
 __private_extern__ unsigned int	tcp_sockthreshold = 0;
 #endif
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
 
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
+    0 , 0, tcp_sysctl_info, "S", "TCP info per tuple");
+
 /*
  * TCP attaches to socket via pru_attach(), reserving space,
  * and an internet control block.
@@ -186,7 +196,7 @@ tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
 		error = EISCONN;
 		goto out;
 	}
-
+	
 	error = tcp_attach(so, p);
 	if (error)
 		goto out;
@@ -217,14 +227,15 @@ tcp_usr_detach(struct socket *so)
 	if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
 		return EINVAL;	/* XXX */
 	}
-#if 1
-	lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+	lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 	tp = intotcpcb(inp);
 	/* In case we got disconnected from the peer */
         if (tp == 0) 
 	    goto out;
 	TCPDEBUG1();
+
+	calculate_tcp_clock();
+
 	tp = tcp_disconnect(tp);
 out:
 	TCPDEBUG2(PRU_DETACH);
@@ -238,6 +249,7 @@ out:
 				     } \
 				     tp = intotcpcb(inp); \
 				     TCPDEBUG1(); \
+				     calculate_tcp_clock(); \
 		     } while(0)
 			     
 #define COMMON_END(req)	out: TCPDEBUG2(req); return error; goto out
@@ -415,6 +427,8 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 
+	calculate_tcp_clock();
+
 	if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
 		error = EAFNOSUPPORT;
 		goto out;
@@ -505,9 +519,7 @@ tcp_usr_disconnect(struct socket *so)
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp;
 	
-#if 1
-	lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
-#endif
+	lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 	COMMON_START();
         /* In case we got disconnected from the peer */
         if (tp == 0)
@@ -529,6 +541,8 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 	struct tcpcb *tp = NULL;
 	TCPDEBUG0;
 
+	in_setpeeraddr(so, nam);
+		
 	if (so->so_state & SS_ISDISCONNECTED) {
 		error = ECONNABORTED;
 		goto out;
@@ -538,7 +552,9 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
-	in_setpeeraddr(so, nam);
+
+	calculate_tcp_clock();
+
 	COMMON_END(PRU_ACCEPT);
 }
 
@@ -560,6 +576,9 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
+
+	calculate_tcp_clock();
+
 	in6_mapped_peeraddr(so, nam);
 	COMMON_END(PRU_ACCEPT);
 }
@@ -681,6 +700,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
 #endif /* INET6 */
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
+
+	calculate_tcp_clock();
+
 	if (control) {
 		/* TCP doesn't do control messages (rights, creds, etc) */
 		if (control->m_len) {
@@ -878,10 +900,11 @@ tcp_connect(tp, nam, p)
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *otp;
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
-	struct sockaddr_in *ifaddr;
+	struct sockaddr_in ifaddr;
 	struct rmxp_tao *taop;
 	struct rmxp_tao tao_noncached;
 	int error;
+	unsigned int outif = 0;
 
 	if (inp->inp_lport == 0) {
 		error = in_pcbbind(inp, (struct sockaddr *)0, p);
@@ -894,7 +917,7 @@ tcp_connect(tp, nam, p)
 	 * earlier incarnation of this same connection still in
 	 * TIME_WAIT state, creating an ADDRINUSE error.
 	 */
-	error = in_pcbladdr(inp, nam, &ifaddr);
+	error = in_pcbladdr(inp, nam, &ifaddr, &outif);
 	if (error)
 		return error;
 
@@ -902,7 +925,7 @@ tcp_connect(tp, nam, p)
 	oinp = in_pcblookup_hash(inp->inp_pcbinfo,
 	    sin->sin_addr, sin->sin_port,
 	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
-						: ifaddr->sin_addr,
+						: ifaddr.sin_addr,
 	    inp->inp_lport,  0, NULL);
 
 	tcp_lock(inp->inp_socket, 0, 0);
@@ -917,7 +940,7 @@ tcp_connect(tp, nam, p)
 
 		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
 		otp->t_state == TCPS_TIME_WAIT &&
-		    otp->t_starttime < (u_int32_t)tcp_msl &&
+		    ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
 		    (otp->t_flags & TF_RCVD_CC))
 			otp = tcp_close(otp);
 		else {
@@ -930,7 +953,7 @@ tcp_connect(tp, nam, p)
 			tcp_unlock(oinp->inp_socket, 1, 0);
 	}
 skip_oinp:
-	if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr :
+	if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr.sin_addr.s_addr :
 		 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
 	    inp->inp_lport == sin->sin_port)
 			return EINVAL;
@@ -940,8 +963,10 @@ skip_oinp:
 		lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
 		socket_lock(inp->inp_socket, 0);
 	}
-	if (inp->inp_laddr.s_addr == INADDR_ANY)
-		inp->inp_laddr = ifaddr->sin_addr;
+	if (inp->inp_laddr.s_addr == INADDR_ANY) {
+		inp->inp_laddr = ifaddr.sin_addr;
+		inp->inp_last_outif = outif;
+	}
 	inp->inp_faddr = sin->sin_addr;
 	inp->inp_fport = sin->sin_port;
 	in_pcbrehash(inp);
@@ -968,9 +993,12 @@ skip_oinp:
 	soisconnecting(so);
 	tcpstat.tcps_connattempt++;
 	tp->t_state = TCPS_SYN_SENT;
-	tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit;
+	tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 
+		tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
+	if (nstat_collect)
+		nstat_route_connect_attempt(inp->inp_route.ro_rt);
 
 	/*
 	 * Generate a CC value for this connection and
@@ -1008,6 +1036,7 @@ tcp6_connect(tp, nam, p)
 	struct rmxp_tao *taop;
 	struct rmxp_tao tao_noncached;
 	int error;
+	unsigned int outif = 0;
 
 	if (inp->inp_lport == 0) {
 		error = in6_pcbbind(inp, (struct sockaddr *)0, p);
@@ -1020,7 +1049,7 @@ tcp6_connect(tp, nam, p)
 	 * earlier incarnation of this same connection still in
 	 * TIME_WAIT state, creating an ADDRINUSE error.
 	 */
-	error = in6_pcbladdr(inp, nam, &addr6);
+	error = in6_pcbladdr(inp, nam, &addr6, &outif);
 	if (error)
 		return error;
 	tcp_unlock(inp->inp_socket, 0, 0);
@@ -1034,7 +1063,7 @@ tcp6_connect(tp, nam, p)
 	if (oinp) {
 		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
 		    otp->t_state == TCPS_TIME_WAIT &&
-		    otp->t_starttime < (u_int32_t)tcp_msl &&
+		    ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
 		    (otp->t_flags & TF_RCVD_CC))
 			otp = tcp_close(otp);
 		else
@@ -1046,8 +1075,10 @@ tcp6_connect(tp, nam, p)
 		lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
 		socket_lock(inp->inp_socket, 0);
 	}
-	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		inp->in6p_laddr = addr6;
+		inp->in6p_last_outif = outif;
+	}
 	inp->in6p_faddr = sin6->sin6_addr;
 	inp->inp_fport = sin6->sin6_port;
 	if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0)
@@ -1063,9 +1094,12 @@ tcp6_connect(tp, nam, p)
 	soisconnecting(so);
 	tcpstat.tcps_connattempt++;
 	tp->t_state = TCPS_SYN_SENT;
-	tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit;
+	tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 
+		tp->t_keepinit ? tp->t_keepinit : tcp_keepinit);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
+	if (nstat_collect)
+		nstat_route_connect_attempt(inp->inp_route.ro_rt);
 
 	/*
 	 * Generate a CC value for this connection and
@@ -1089,6 +1123,132 @@ tcp6_connect(tp, nam, p)
 }
 #endif /* INET6 */
 
+/*
+ * Export TCP internal state information via a struct tcp_info
+ */
+__private_extern__ void
+tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
+{
+	bzero(ti, sizeof(*ti));
+
+	ti->tcpi_state = tp->t_state;
+	
+    if (tp->t_state > TCPS_LISTEN) {
+		if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
+			ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+		if (tp->t_flags & TF_SACK_PERMIT)
+			ti->tcpi_options |= TCPI_OPT_SACK;
+		if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
+			ti->tcpi_options |= TCPI_OPT_WSCALE;
+			ti->tcpi_snd_wscale = tp->snd_scale;
+			ti->tcpi_rcv_wscale = tp->rcv_scale;
+		}
+		
+		ti->tcpi_snd_mss = tp->t_maxseg;
+		ti->tcpi_rcv_mss = tp->t_maxseg;
+
+		ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
+		ti->tcpi_snd_cwnd = tp->snd_cwnd;
+	
+		ti->tcpi_rcv_space = tp->rcv_wnd;
+
+		ti->tcpi_snd_wnd = tp->snd_wnd;
+		ti->tcpi_snd_bwnd = tp->snd_bwnd;
+		ti->tcpi_snd_nxt = tp->snd_nxt;
+		ti->tcpi_rcv_nxt = tp->rcv_nxt;
+		
+		ti->tcpi_last_outif = tp->t_inpcb->inp_last_outif;
+	}
+}
+
+__private_extern__ errno_t
+tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
+{
+	struct inpcbinfo *pcbinfo = NULL;
+	struct inpcb *inp = NULL;
+	struct socket *so;
+	struct tcpcb *tp;
+	
+	if (itpl->itpl_proto == IPPROTO_TCP)
+		pcbinfo = &tcbinfo;
+	else
+		return EINVAL;
+	
+	if (itpl->itpl_local_sa.sa_family == AF_INET &&
+		itpl->itpl_remote_sa.sa_family == AF_INET) {
+		inp = in_pcblookup_hash(pcbinfo, 
+								itpl->itpl_remote_sin.sin_addr,
+								itpl->itpl_remote_sin.sin_port,
+								itpl->itpl_local_sin.sin_addr,
+								itpl->itpl_local_sin.sin_port,
+								0, NULL);
+	} else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
+		itpl->itpl_remote_sa.sa_family == AF_INET6) {
+		struct in6_addr ina6_local;
+		struct in6_addr ina6_remote;
+		
+		ina6_local = itpl->itpl_local_sin6.sin6_addr;
+		if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && itpl->itpl_local_sin6.sin6_scope_id)
+			ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id);
+
+		ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
+		if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && itpl->itpl_remote_sin6.sin6_scope_id)
+			ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id);
+		
+		inp = in6_pcblookup_hash(pcbinfo, 
+								&ina6_remote,
+								itpl->itpl_remote_sin6.sin6_port,
+								&ina6_local,
+								itpl->itpl_local_sin6.sin6_port,
+								0, NULL);
+	} else
+		return EINVAL;
+	if (inp == NULL || (so = inp->inp_socket) == NULL)
+		return ENOENT;
+
+	socket_lock(so, 0);
+	if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
+		socket_unlock(so, 0);
+		return ENOENT;
+	}
+	tp = intotcpcb(inp);
+
+	tcp_fill_info(tp, ti);
+	socket_unlock(so, 0);
+
+	return 0;
+}
+
+
+__private_extern__ int 
+tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
+{
+	int error;
+	struct tcp_info ti;
+	struct info_tuple itpl;
+	
+	if (req->newptr == USER_ADDR_NULL) {
+		return EINVAL;
+	}
+	if (req->newlen < sizeof(struct info_tuple)) {
+		return EINVAL;
+	}
+	error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
+	if (error != 0) {
+		return error;
+	}
+	error = tcp_fill_info_for_info_tuple(&itpl, &ti);
+	if (error != 0) {
+		return error;
+	}
+	error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
+	if (error != 0) {
+		return error;
+	}
+	
+	return 0;
+}
+
 /*
  * The new sockopt interface makes it possible for us to block in the
  * copyin/out step (if we take a page fault).  Taking a page fault at
@@ -1124,6 +1284,8 @@ tcp_ctloutput(so, sopt)
                 return (ECONNRESET);
         }
 
+	calculate_tcp_clock();
+
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
@@ -1155,7 +1317,17 @@ tcp_ctloutput(so, sopt)
 			else
 				tp->t_flags &= ~opt;
 			break;
-
+		case TCP_RXT_FINDROP:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+				sizeof optval);
+			if (error)
+				break;
+			opt = TF_RXTFINDROP;
+			if (optval)
+				tp->t_flagsext |= opt;
+			else
+				tp->t_flagsext &= ~opt;
+			break;
 		case TCP_MAXSEG:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
@@ -1178,7 +1350,9 @@ tcp_ctloutput(so, sopt)
 				error = EINVAL;
 			else {
 				tp->t_keepidle = optval * TCP_RETRANSHZ;
-				tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp); /* reset the timer to new value */
+				tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, 
+					TCP_KEEPIDLE(tp)); /* reset the timer to new value */
+				tcp_check_timer_state(tp);
 			}
                         break;
 
@@ -1193,6 +1367,26 @@ tcp_ctloutput(so, sopt)
 				tp->t_keepinit = optval * TCP_RETRANSHZ;
 			break;
 		
+		case PERSIST_TIMEOUT:
+			error = sooptcopyin(sopt, &optval, sizeof optval,
+						sizeof optval);
+			if (error)
+				break;
+			if (optval < 0)
+				error = EINVAL;
+			else 
+				tp->t_persist_timeout = optval * TCP_RETRANSHZ;
+			break;
+		case TCP_RXT_CONNDROPTIME:
+			error = sooptcopyin(sopt, &optval, sizeof(optval),
+					sizeof(optval));
+			if (error)
+				break;
+			if (optval < 0)
+				error = EINVAL;
+			else
+				tp->rxt_conndroptime = optval * TCP_RETRANSHZ;
+			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
@@ -1219,6 +1413,22 @@ tcp_ctloutput(so, sopt)
 		case TCP_CONNECTIONTIMEOUT:
 			optval = tp->t_keepinit / TCP_RETRANSHZ;
 			break;
+		case PERSIST_TIMEOUT:
+			optval = tp->t_persist_timeout / TCP_RETRANSHZ;
+			break;
+		case TCP_RXT_CONNDROPTIME:
+			optval = tp->rxt_conndroptime / TCP_RETRANSHZ;
+			break;
+		case TCP_RXT_FINDROP:
+			optval = tp->t_flagsext & TF_RXTFINDROP;
+			break; 
+		case TCP_INFO: {
+			struct tcp_info ti;
+
+			tcp_fill_info(tp, &ti);
+			error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
+			goto done;
+		}
 		default:
 			error = ENOPROTOOPT;
 			break;
@@ -1227,6 +1437,7 @@ tcp_ctloutput(so, sopt)
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 		break;
 	}
+done:
 	return (error);
 }
 
@@ -1272,9 +1483,9 @@ sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
 	return error;
 }
 
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW, 
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW, 
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
 
 
@@ -1353,6 +1564,9 @@ tcp_attach(so, p)
 		so->so_state |= nofd;
 		return (ENOBUFS);
 	}
+	if (nstat_collect) {
+		nstat_tcp_new_pcb(inp);
+	}
 	tp->t_state = TCPS_CLOSED;
 	return (0);
 }
@@ -1425,7 +1639,7 @@ tcp_usrclosed(tp)
 		soisdisconnected(tp->t_inpcb->inp_socket);
 		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (tp->t_state == TCPS_FIN_WAIT_2)
-			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+			tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, tcp_maxidle);
 	}
 	return (tp);
 }
diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h
index 0fa518d78..4066829cb 100644
--- a/bsd/netinet/tcp_var.h
+++ b/bsd/netinet/tcp_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,11 +79,57 @@ struct name {				\
 #define _TCPCB_LIST_HEAD(name, type)	LIST_HEAD(name, type)
 #endif
 
-#define TCP_RETRANSHZ	    10		/* tcp retrans timer (100ms) per hz */		
+#define TCP_RETRANSHZ	1000	/* granularity of TCP timestamps, 1ms */		
+#define TCP_TIMERHZ	100		/* frequency of TCP fast timer, 100 ms */
+
+/* Minimum time quantum within which the timers are coalesced */
+#define TCP_FASTTIMER_QUANTUM   TCP_TIMERHZ	/* fast mode, once every 100ms */
+#define TCP_SLOWTIMER_QUANTUM   TCP_RETRANSHZ / PR_SLOWHZ	/* slow mode, once every 500ms */
+
+#define TCP_RETRANSHZ_TO_USEC 1000
 
 #ifdef KERNEL_PRIVATE
 #define N_TIME_WAIT_SLOTS   128     	/* must be power of 2 */
 
+/* Base RTT is stored for N_MIN_RTT_HISTORY slots. This is used to
+ * estimate expected minimum RTT for delay based congestion control
+ * algorithms.
+ */
+#define N_RTT_BASE	5
+
+/* Always allow at least 4 packets worth of recv window when adjusting
+ * recv window using inter-packet arrival jitter.
+ */
+#define MIN_IAJ_WIN 4
+
+/* A variation in delay of this many milliseconds is tolerable. This limit has to 
+ * be low but greater than zero. We also use standard deviation on jitter to adjust
+ * this limit for different link and connection types.
+ */
+#define ALLOWED_IAJ 5
+
+/* Ignore the first few packets on a connection until the ACK clock gets going
+ */
+#define IAJ_IGNORE_PKTCNT 40
+
+/* Let the accumulated IAJ value increase by this threshold at most. This limit
+ * will control how many ALLOWED_IAJ measurements a receiver will have to see
+ * before opening the receive window
+ */
+#define ACC_IAJ_HIGH_THRESH 100
+
+/* When accumulated IAJ reaches this value, the receiver starts to react by 
+ * closing the window
+ */
+#define ACC_IAJ_REACT_LIMIT 200
+
+/* If the number of small packets (smaller than IAJ packet size) seen on a 
+ * connection is more than this threshold, reset the size and learn it again.
+ * This is needed because the sender might send smaller segments after PMTU
+ * discovery and the receiver has to learn the new size.
+ */
+#define RESET_IAJ_SIZE_THRESH 20
+
 /*
  * Kernel variables for tcp.
  */
@@ -133,9 +179,8 @@ struct tcptemp {
 struct tcpcb {
 	struct	tsegqe_head t_segq;
 	int	t_dupacks;		/* consecutive dup acks recd */
-	struct	tcptemp	*unused;	/* unused now: was t_template */
-
-	int	t_timer[TCPT_NTIMERS];	/* tcp timers */
+	uint32_t t_timer[TCPT_NTIMERS];	/* tcp timers */
+	struct tcptimerentry tentry;	/* entry in timer list */
 
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
@@ -157,11 +202,9 @@ struct tcpcb {
 #define	TF_RCVD_CC	0x04000		/* a CC was received in SYN */
 #define	TF_SENDCCNEW	0x08000		/* send CCnew instead of CC in SYN */
 #define	TF_MORETOCOME	0x10000		/* More data to be appended to sock */
-#define	TF_LQ_OVERFLOW	0x20000		/* UNUSED listen queue overflow */
+#define	TF_LOCAL	0x20000		/* connection to a host on local link */
 #define	TF_RXWIN0SENT	0x40000		/* sent a receiver win 0 in response */
 #define	TF_SLOWLINK	0x80000		/* route is a on a modem speed link */
-
-
 #define	TF_LASTIDLE	0x100000	/* connection was previously idle */
 #define	TF_FASTRECOVERY	0x200000	/* in NewReno Fast Recovery */
 #define	TF_WASFRECOVERY	0x400000	/* was in NewReno Fast Recovery */
@@ -172,6 +215,8 @@ struct tcpcb {
 #define	TF_CLOSING	0x8000000	/* pending tcp close */
 #define TF_TSO		0x10000000	/* TCP Segment Offloading is enable on this connection */
 #define TF_BLACKHOLE	0x20000000	/* Path MTU Discovery Black Hole detection */
+#define TF_TIMER_ONLIST 0x40000000	/* pcb is on tcp_timer_list */
+#define TF_STRETCHACK	0x80000000	/* receiver is going to delay acks */
 
 	int	t_force;		/* 1 if forcing out a byte */
 
@@ -199,14 +244,14 @@ struct tcpcb {
 					 * for slow start exponential to
 					 * linear switch
 					 */
-	u_int32_t	snd_bandwidth;		/* calculated bandwidth or 0 */
+	u_int32_t	snd_bandwidth;	/* calculated bandwidth or 0 */
 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
 
 	u_int	t_maxopd;		/* mss plus options */
 
-	u_int32_t	t_rcvtime;		/* inactivity time */
-	u_int32_t	t_starttime;		/* time connection was established */
-	int	t_rtttime;		/* round trip time */
+	u_int32_t	t_rcvtime;	/* time at which a packet was received */
+	u_int32_t	t_starttime;	/* time connection was established */
+	int	t_rtttime;		/* tcp clock when rtt calculation was started */
 	tcp_seq	t_rtseq;		/* sequence number being timed */
 
 	int	t_bw_rtttime;		/* used for bandwidth calculation */
@@ -220,7 +265,10 @@ struct tcpcb {
 	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rttmin;		/* minimum rtt allowed */
 	u_int	t_rttbest;		/* best rtt we've seen */
+	u_int	t_rttcur;		/* most recent value of rtt */
 	u_int32_t	t_rttupdated;		/* number of times rtt sampled */
+	u_int32_t	rxt_conndroptime;	/* retxmt conn gets dropped after this time, when set */
+	u_int32_t	rxt_start;		/* time at a connection starts retransmitting */
 	u_int32_t	max_sndwnd;		/* largest window peer has offered */
 
 	int	t_softerror;		/* possible error not yet reported */
@@ -234,6 +282,7 @@ struct tcpcb {
 	u_char	rcv_scale;		/* window scaling for recv window */
 	u_char	request_r_scale;	/* pending window scaling */
 	u_char	requested_s_scale;
+	u_int16_t	tcp_cc_index;	/* index of congestion control algorithm */
 	u_int32_t	ts_recent;		/* timestamp echo data */
 
 	u_int32_t	ts_recent_age;		/* when last updated */
@@ -251,24 +300,36 @@ struct tcpcb {
 	int     t_keepidle;		/* keepalive idle timer (override global if > 0) */
 	int	t_lastchain;		/* amount of packets chained last time around */
 	int	t_unacksegs;		/* received but unacked segments: used for delaying acks */
+	u_int32_t	t_persist_timeout;	/* ZWP persistence limit as set by PERSIST_TIMEOUT */
+	u_int32_t	t_persist_stop;		/* persistence limit deadline if triggered by ZWP */
 
 
 /* 3529618 MSS overload prevention */
 	u_int32_t	rcv_reset;
 	u_int32_t	rcv_pps;
 	u_int32_t	rcv_byps;
-	u_int32_t  rcv_maxbyps;
+	u_int32_t	rcv_maxbyps;
+
+/* Receiver state for stretch-ack algorithm */
+	u_int32_t	rcv_unackwin;	/* to measure win for stretching acks */
+	u_int32_t	rcv_by_unackwin; /* bytes seen during the last ack-stretching win */
+	u_int16_t	rcv_waitforss;	/* wait for packets during slow-start */
+	u_int16_t		ecn_flags;
+#define TE_SETUPSENT		0x01	/* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
+#define TE_SETUPRECEIVED	0x02	/* Indicate we have received ECN-SETUP SYN or SYN-ACK */
+#define TE_SENDIPECT		0x04	/* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
+#define TE_SENDCWR		0x08	/* Indicate that the next non-retransmit should have the TCP CWR flag set */
+#define TE_SENDECE		0x10	/* Indicate that the next packet should have the TCP ECE flag set */
 	tcp_seq snd_high;		/* for use in NewReno Fast Recovery */
 	tcp_seq snd_high_prev;	/* snd_high prior to retransmit */
-
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	u_char	snd_limited;		/* segments limited transmitted */
 /* anti DoS counters */
 	u_int32_t	rcv_second;		/* start of interval second */
+
 /* SACK related state */
 	int	sack_enable;		/* enable SACK for this connection */
 	int	snd_numholes;		/* number of holes seen by sender */
-
 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
 						/* SACK scoreboard (sorted) */
 	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
@@ -277,18 +338,7 @@ struct tcpcb {
 	tcp_seq sack_newdata;		/* New data xmitted in this recovery
    					   episode starts at this seq number */
 	struct sackhint	sackhint;	/* SACK scoreboard hint */
-	int	t_rttlow;		/* smallest observerved RTT */
-	u_long		ecn_flags;
-#define TE_SETUPSENT		0x01	/* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
-#define TE_SETUPRECEIVED	0x02	/* Indicate we have received ECN-SETUP SYN or SYN-ACK */
-#define TE_SENDIPECT		0x04	/* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
-#define TE_SENDCWR			0x08	/* Indicate that the next non-retransmit should have the TCP CWR flag set */
-#define TE_SENDECE			0x10	/* Indicate that the next packet should have the TCP ECE flag set */
 	
-#if TRAFFIC_MGT
-	u_int32_t		tot_recv_snapshot;	/* snapshot of global total pkts received */
-	u_int32_t		bg_recv_snapshot;	/* snapshot of global background pkts received */
-#endif /* TRAFFIC_MGT */
 	u_int32_t	t_pktlist_sentlen; /* total bytes in transmit chain */
 	struct mbuf	*t_pktlist_head; /* First packet in transmit chain */
 	struct mbuf	*t_pktlist_tail; /* Last packet in transmit chain */
@@ -296,12 +346,57 @@ struct tcpcb {
 	int		t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */
 	u_int32_t	tso_max_segment_size;	/* TCP Segment Offloading maximum segment unit for NIC */
 	u_int 		t_pmtud_saved_maxopd;	/* MSS saved before performing PMTU-D BlackHole detection */
+	
+	struct
+	{
+		u_int32_t	rxduplicatebytes;
+		u_int32_t	rxoutoforderbytes;
+		u_int32_t	txretransmitbytes;
+		u_int32_t	unused_pad_to_8;
+	} t_stat;
+	
+	/* Background congestion related state */
+	uint32_t	rtt_hist[N_RTT_BASE];	/* history of minimum RTT */
+	uint32_t	rtt_count;		/* Number of RTT samples in recent base history */
+	uint32_t	bg_ssthresh;		/* Slow start threshold until delay increases */
+	uint32_t	t_flagsext;		/* Another field to accommodate more flags */
+#define TF_RXTFINDROP	0x1			/* Drop conn after retransmitting FIN 3 times */
+#define TF_RCVUNACK_WAITSS	0x2		/* set when the receiver should not stretch acks */
+
+#if TRAFFIC_MGT
+	/* Inter-arrival jitter related state */
+	uint32_t 	iaj_rcv_ts;		/* tcp clock when the first packet was received */
+	uint16_t	iaj_size;		/* Size of packet for iaj measurement */
+	uint16_t	iaj_small_pkt;		/* Count of packets smaller than iaj_size */
+	uint16_t	iaj_pktcnt;		/* packet count, to avoid throttling initially */
+	uint16_t	acc_iaj;		/* Accumulated iaj */
+	tcp_seq 	iaj_rwintop;		/* recent max advertised window */
+	uint32_t	avg_iaj;		/* Mean */
+	uint32_t	std_dev_iaj;		/* Standard deviation */
+#endif /* TRAFFIC_MGT */
 };
 
 #define IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
 #define ENTER_FASTRECOVERY(tp)	tp->t_flags |= TF_FASTRECOVERY
 #define EXIT_FASTRECOVERY(tp)	tp->t_flags &= ~TF_FASTRECOVERY
 
+#if CONFIG_DTRACE
+enum tcp_cc_event {
+	TCP_CC_CWND_INIT,
+	TCP_CC_INSEQ_ACK_RCVD,
+	TCP_CC_ACK_RCVD,
+	TCP_CC_ENTER_FASTRECOVERY,
+	TCP_CC_IN_FASTRECOVERY,
+	TCP_CC_EXIT_FASTRECOVERY,
+	TCP_CC_PARTIAL_ACK,
+	TCP_CC_IDLE_TIMEOUT,
+	TCP_CC_REXMT_TIMEOUT,
+	TCP_CC_ECN_RCVD,
+	TCP_CC_BAD_REXMT_RECOVERY,
+	TCP_CC_OUTPUT_ERROR,
+	TCP_CC_CHANGE_ALGO
+};
+#endif /* CONFIG_DTRACE */
 
 /*
  * Structure to hold TCP options that are only used during segment
@@ -346,18 +441,19 @@ struct rmxp_tao {
 #define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
 
 /*
- * The smoothed round-trip time and estimated variance
+ * The rtt measured is in milliseconds as the timestamp granularity is 
+ * a millisecond. The smoothed round-trip time and estimated variance
  * are stored as fixed point numbers scaled by the values below.
  * For convenience, these scales are also used in smoothing the average
  * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
- * With these scales, srtt has 3 bits to the right of the binary point,
- * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
+ * With these scales, srtt has 5 bits to the right of the binary point,
+ * and thus an "ALPHA" of 0.875.  rttvar has 4 bits to the right of the
  * binary point, and is smoothed with an ALPHA of 0.75.
  */
 #define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
-#define	TCP_RTT_SHIFT		5	/* shift for srtt; 3 bits frac. */
-#define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 2 bits */
-#define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 2 bits */
+#define	TCP_RTT_SHIFT		5	/* shift for srtt; 5 bits frac. */
+#define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 4 bits */
+#define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 4 bits */
 #define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
 
 /*
@@ -399,7 +495,7 @@ struct tcpcb {
 	int	t_dupacks;		/* consecutive dup acks recd */
 	u_int32_t unused;		/* unused now: was t_template */
 
-	int	t_timer[TCPT_NTIMERS];	/* tcp timers */
+	int	t_timer[TCPT_NTIMERS_EXT];	/* tcp timers */
 
 	_TCPCB_PTR(struct inpcb *) t_inpcb;	/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
@@ -452,7 +548,7 @@ struct tcpcb {
 					 */
 	u_int	t_maxopd;		/* mss plus options */
 
-	u_int32_t t_rcvtime;		/* inactivity time */
+	u_int32_t t_rcvtime;		/* time at which a packet was received */
 	u_int32_t t_starttime;		/* time connection was established */
 	int	t_rtttime;		/* round trip time */
 	tcp_seq	t_rtseq;		/* sequence number being timed */
@@ -595,9 +691,8 @@ struct	tcpstat {
 	u_int32_t  tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
 	u_int32_t  tcps_sack_sboverflow;	    /* SACK sendblock overflow   */
 
-#if TRAFFIC_MGT
 	u_int32_t	tcps_bg_rcvtotal;	/* total background packets received */
-#endif /* TRAFFIC_MGT */
+	u_int32_t	tcps_rxtfindrop;	/* drop conn after retransmitting FIN */
 };
 
 #pragma pack(4)
@@ -633,7 +728,7 @@ struct  xtcpcb64 {
         u_int64_t t_segq;
         int     t_dupacks;              /* consecutive dup acks recd */
 
-        int     t_timer[TCPT_NTIMERS];  /* tcp timers */
+        int t_timer[TCPT_NTIMERS_EXT];  /* tcp timers */
 
         int     t_state;                /* state of this connection */
         u_int   t_flags;
@@ -665,7 +760,7 @@ struct  xtcpcb64 {
                                          */
         u_int   t_maxopd;               /* mss plus options */
 
-        u_int32_t t_rcvtime;            /* inactivity time */
+        u_int32_t t_rcvtime;            /* time at which a packet was received */
         u_int32_t t_starttime;          /* time connection was established */
         int     t_rtttime;              /* round trip time */
         tcp_seq t_rtseq;                /* sequence number being timed */
@@ -707,6 +802,87 @@ struct  xtcpcb64 {
 
 #endif /* !CONFIG_EMBEDDED */
 
+#ifdef PRIVATE
+
+struct  xtcpcb_n {
+	u_int32_t      		xt_len;
+	u_int32_t			xt_kind;		/* XSO_TCPCB */
+
+	u_int64_t t_segq;
+	int     t_dupacks;              /* consecutive dup acks recd */
+	
+	int t_timer[TCPT_NTIMERS_EXT];  /* tcp timers */
+	
+	int     t_state;                /* state of this connection */
+	u_int   t_flags;
+	
+	int     t_force;                /* 1 if forcing out a byte */
+	
+	tcp_seq snd_una;                /* send unacknowledged */
+	tcp_seq snd_max;                /* highest sequence number sent;
+									 * used to recognize retransmits
+									 */
+	tcp_seq snd_nxt;                /* send next */
+	tcp_seq snd_up;                 /* send urgent pointer */
+	
+	tcp_seq snd_wl1;                /* window update seg seq number */
+	tcp_seq snd_wl2;                /* window update seg ack number */
+	tcp_seq iss;                    /* initial send sequence number */
+	tcp_seq irs;                    /* initial receive sequence number */
+	
+	tcp_seq rcv_nxt;                /* receive next */
+	tcp_seq rcv_adv;                /* advertised window */
+	u_int32_t rcv_wnd;              /* receive window */
+	tcp_seq rcv_up;                 /* receive urgent pointer */
+	
+	u_int32_t snd_wnd;              /* send window */
+	u_int32_t snd_cwnd;             /* congestion-controlled window */
+	u_int32_t snd_ssthresh;         /* snd_cwnd size threshold for
+									 * for slow start exponential to
+									 * linear switch
+									 */
+	u_int   t_maxopd;               /* mss plus options */
+	
+	u_int32_t t_rcvtime;            /* time at which a packet was received */
+	u_int32_t t_starttime;          /* time connection was established */
+	int     t_rtttime;              /* round trip time */
+	tcp_seq t_rtseq;                /* sequence number being timed */
+	
+	int     t_rxtcur;               /* current retransmit value (ticks) */
+	u_int   t_maxseg;               /* maximum segment size */
+	int     t_srtt;                 /* smoothed round-trip time */
+	int     t_rttvar;               /* variance in round-trip time */
+	
+	int     t_rxtshift;             /* log(2) of rexmt exp. backoff */
+	u_int   t_rttmin;               /* minimum rtt allowed */
+	u_int32_t t_rttupdated;         /* number of times rtt sampled */
+	u_int32_t max_sndwnd;           /* largest window peer has offered */
+	
+	int     t_softerror;            /* possible error not yet reported */
+	/* out-of-band data */
+	char    t_oobflags;             /* have some */
+	char    t_iobc;                 /* input character */
+	/* RFC 1323 variables */
+	u_char  snd_scale;              /* window scaling for send window */
+	u_char  rcv_scale;              /* window scaling for recv window */
+	u_char  request_r_scale;        /* pending window scaling */
+	u_char  requested_s_scale;
+	u_int32_t ts_recent;            /* timestamp echo data */
+	
+	u_int32_t ts_recent_age;        /* when last updated */
+	tcp_seq last_ack_sent;
+	/* RFC 1644 variables */
+	tcp_cc  cc_send;                /* send connection count */
+	tcp_cc  cc_recv;                /* receive connection count */
+	tcp_seq snd_recover;            /* for use in fast recovery */
+	/* experimental */
+	u_int32_t snd_cwnd_prev;        /* cwnd prior to retransmit */
+	u_int32_t snd_ssthresh_prev;    /* ssthresh prior to retransmit */
+	u_int32_t t_badrxtwin;          /* window for retransmit recovery */
+};
+
+#endif /* PRIVATE */
+
 #pragma pack()
 
 /*
@@ -760,11 +936,14 @@ extern	struct tcpstat tcpstat;	/* tcp statistics */
 extern	int tcp_mssdflt;	/* XXX */
 extern	int tcp_minmss;
 extern	int tcp_minmssoverload;
-extern	int tcp_do_newreno;
 extern	int ss_fltsz;
 extern	int ss_fltsz_local;
+extern 	int tcp_do_rfc3390;		/* Calculate ss_fltsz according to RFC 3390 */
 #ifdef __APPLE__
 extern	u_int32_t tcp_now;		/* for RFC 1323 timestamps */ 
+extern struct timeval tcp_uptime;
+extern lck_spin_t *tcp_uptime_lock;
+
 extern	int tcp_delack_enabled;
 #endif /* __APPLE__ */
 
@@ -782,7 +961,6 @@ int	 tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
 	 tcp_drop(struct tcpcb *, int);
 void	 tcp_drain(void);
-void	 tcp_fasttimo(void *);
 struct rmxp_tao *
 	 tcp_gettaocache(struct inpcb *);
 void	 tcp_init(void) __attribute__((section("__TEXT, initcode")));
@@ -796,10 +974,13 @@ struct tcpcb *
 int	 tcp_output(struct tcpcb *);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int,
-	    unsigned int);
+	    unsigned int, unsigned int);
 struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int);
 void	 tcp_setpersist(struct tcpcb *);
 void	 tcp_slowtimo(void);
+void 	 tcp_check_timer_state(struct tcpcb *tp);
+void	 tcp_run_timerlist(void *arg1, void *arg2);
+
 struct tcptemp *
 	 tcp_maketemplate(struct tcpcb *);
 void	 tcp_fillheaders(struct tcpcb *, void *, void *);
@@ -816,10 +997,16 @@ void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 int32_t	 tcp_sbspace(struct tcpcb *tp);
 void	 tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp);
+void	 tcp_reset_stretch_ack(struct tcpcb *tp);
 
+#if TRAFFIC_MGT
+void	 reset_acc_iaj(struct tcpcb *tp);
+#endif /* TRAFFIC_MGT */
 
 int	 tcp_lock (struct socket *, int, void *);
 int	 tcp_unlock (struct socket *, int, void *);
+void	 calculate_tcp_clock(void);
+
 #ifdef _KERN_LOCKS_H_
 lck_mtx_t *	 tcp_getlock (struct socket *, int);
 #else
diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c
index 500cdcc90..37cc4153c 100644
--- a/bsd/netinet/udp_usrreq.c
+++ b/bsd/netinet/udp_usrreq.c
@@ -72,6 +72,8 @@
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
+#include <net/ntstat.h>
 
 #include <kern/zalloc.h>
 
@@ -121,35 +123,35 @@ static int	udpcksum = 1;
 #else
 static int	udpcksum = 0;		/* XXX */
 #endif
-SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW | CTLFLAG_LOCKED,
 		&udpcksum, 0, "");
 
 static u_int32_t udps_in_sw_cksum;
-SYSCTL_UINT(_net_inet_udp, OID_AUTO, in_sw_cksum, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_udp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
     &udps_in_sw_cksum, 0,
     "Number of received packets checksummed in software");
 
 static u_int64_t udps_in_sw_cksum_bytes;
-SYSCTL_QUAD(_net_inet_udp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD,
+SYSCTL_QUAD(_net_inet_udp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
     &udps_in_sw_cksum_bytes,
     "Amount of received data checksummed in software");
 
 static u_int32_t udps_out_sw_cksum;
-SYSCTL_UINT(_net_inet_udp, OID_AUTO, out_sw_cksum, CTLFLAG_RD,
+SYSCTL_UINT(_net_inet_udp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
     &udps_out_sw_cksum, 0,
     "Number of transmitted packets checksummed in software");
 
 static u_int64_t udps_out_sw_cksum_bytes;
-SYSCTL_QUAD(_net_inet_udp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD,
+SYSCTL_QUAD(_net_inet_udp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
     &udps_out_sw_cksum_bytes,
     "Amount of transmitted data checksummed in software");
 
 int	log_in_vain = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW, 
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED,
     &log_in_vain, 0, "Log all incoming UDP packets");
 
 static int	blackhole = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&blackhole, 0, "Do not send port unreachables for refused connects");
 
 struct	inpcbhead udb;		/* from udp_var.h */
@@ -179,13 +181,13 @@ static int udp_gc_done = FALSE; /* Garbage collection performed last slowtimo */
 #endif
 
 struct	udpstat udpstat;	/* from udp_var.h */
-SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
     &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
-SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, CTLFLAG_RD, 
+SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, 
     &udbinfo.ipi_count, 0, "Number of active PCBs");
 
 __private_extern__ int udp_use_randomport = 1;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports, CTLFLAG_RW,
+SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED,
     &udp_use_randomport, 0, "Randomize UDP port numbers");
 
 #if INET6
@@ -254,13 +256,15 @@ udp_input(m, iphlen)
 	register struct udphdr *uh;
 	register struct inpcb *inp;
 	struct mbuf *opts = 0;
-	int len;
+	int len, isbroadcast;
 	struct ip save_ip;
 	struct sockaddr *append_sa;
 	struct inpcbinfo *pcbinfo = &udbinfo;
 	struct sockaddr_in udp_in = {
 		sizeof (udp_in), AF_INET, 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }
 	};
+	struct ip_moptions *imo = NULL;
+	int foundmembership = 0, ret = 0;
 #if INET6
 	struct udp_in6 udp_in6 = {
 		{ sizeof (udp_in6.uin6_sin), AF_INET6, 0, 0,
@@ -365,8 +369,9 @@ doudpcksum:
 		udpstat.udps_nosum++;
 #endif
 
-	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
-	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+	isbroadcast = in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif);
+
+	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || isbroadcast) {
 
 		int reuse_sock = 0, mcast_delivered = 0;
 
@@ -409,6 +414,11 @@ doudpcksum:
                         if ((inp->inp_vflag & INP_IPV4) == 0)
                                 continue;
 #endif
+			if ((inp->inp_moptions == NULL) && 
+				(ntohl(ip->ip_dst.s_addr) != INADDR_ALLHOSTS_GROUP) && 
+				(isbroadcast == 0) )
+				continue;
+
 
 			if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
 				continue;
@@ -441,6 +451,35 @@ doudpcksum:
 				}
 			}
 
+			if (isbroadcast == 0 && (ntohl(ip->ip_dst.s_addr) != INADDR_ALLHOSTS_GROUP)) {	
+				if((imo = inp->inp_moptions) == NULL) {
+					udp_unlock(inp->inp_socket, 1, 0);
+					continue;
+				} else {
+					struct sockaddr_in	 group;
+					int			 blocked;
+
+					IMO_LOCK(imo);
+
+					bzero(&group, sizeof(struct sockaddr_in));
+					group.sin_len = sizeof(struct sockaddr_in);
+					group.sin_family = AF_INET;
+					group.sin_addr = ip->ip_dst;
+
+					blocked = imo_multi_filter(imo, m->m_pkthdr.rcvif,
+						(struct sockaddr *)&group,
+						(struct sockaddr *)&udp_in);
+					if (blocked == MCAST_PASS) 
+						foundmembership = 1;
+				
+					IMO_UNLOCK(imo);
+					if (!foundmembership) {
+						udp_unlock(inp->inp_socket, 1, 0);
+						continue;
+					}
+					foundmembership = 0;
+				}
+			}
 			reuse_sock = inp->inp_socket->so_options& (SO_REUSEPORT|SO_REUSEADDR);
 			{
 #if IPSEC
@@ -537,21 +576,9 @@ doudpcksum:
 		}
 		else if (payload_len == 4 && *(u_int32_t*)((caddr_t)uh + sizeof(struct udphdr)) != 0) {
 			/* UDP encapsulated IPSec packet to pass through NAT */
-			size_t stripsiz;
-
-			stripsiz = sizeof(struct udphdr);
-
-			ip = mtod(m, struct ip *);
-			ovbcopy((caddr_t)ip, (caddr_t)(((u_char *)ip) + stripsiz), iphlen);
-			m->m_data += stripsiz;
-			m->m_len -= stripsiz;
-			m->m_pkthdr.len -= stripsiz;
-			ip = mtod(m, struct ip *);
-			ip->ip_len = ip->ip_len - stripsiz;
-			ip->ip_p = IPPROTO_ESP;
-
 			KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0);
-			esp4_input(m, iphlen);
+			/* preserve the udp header */
+			esp4_input(m, iphlen + sizeof(struct udphdr));
 			return;
 		}
 	}
@@ -624,8 +651,9 @@ doudpcksum:
 	 */
 	udp_in.sin_port = uh->uh_sport;
 	udp_in.sin_addr = ip->ip_src;
-	if (inp->inp_flags & INP_CONTROLOPTS
-	    || inp->inp_socket->so_options & SO_TIMESTAMP) {
+	if ((inp->inp_flags & INP_CONTROLOPTS) != 0
+	    || (inp->inp_socket->so_options & SO_TIMESTAMP) != 0
+	    || (inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
 #if INET6
 		if (inp->inp_vflag & INP_IPV6) {
 			int savedflags;
@@ -633,11 +661,17 @@ doudpcksum:
 			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
 			savedflags = inp->inp_flags;
 			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
-			ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m);
+			ret = ip6_savecontrol(inp, m, &opts);
 			inp->inp_flags = savedflags;
 		} else
 #endif
-		ip_savecontrol(inp, &opts, ip, m);
+		{
+			ret = ip_savecontrol(inp, &opts, ip, m);
+		}
+		if (ret != 0) {
+			udp_unlock(inp->inp_socket, 1, 0);
+			goto bad;
+		}
 	}
  	m_adj(m, iphlen + sizeof(struct udphdr));
 
@@ -651,10 +685,14 @@ doudpcksum:
 	} else
 #endif
 	append_sa = (struct sockaddr *)&udp_in;
+	if (nstat_collect) {
+		locked_add_64(&inp->inp_stat->rxpackets, 1);
+		locked_add_64(&inp->inp_stat->rxbytes, m->m_pkthdr.len);
+	}
+	so_recv_data_stat(inp->inp_socket, m, 0);
 	if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts, NULL) == 0) {
 		udpstat.udps_fullsock++;
-	}
-	else {
+	} else {
 		sorwakeup(inp->inp_socket);
 	}
 	udp_unlock(inp->inp_socket, 1, 0);
@@ -702,6 +740,7 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
 {
 	struct sockaddr *append_sa;
 	struct mbuf *opts = 0;
+	int ret = 0;
 
 #if CONFIG_MACF_NET
 	if (mac_inpcb_check_deliver(last, n, AF_INET, SOCK_DGRAM) != 0) {
@@ -709,8 +748,9 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
 		return;
 	}
 #endif
-	if (last->inp_flags & INP_CONTROLOPTS ||
-	    last->inp_socket->so_options & SO_TIMESTAMP) {
+	if ((last->inp_flags & INP_CONTROLOPTS) != 0 ||
+	    (last->inp_socket->so_options & SO_TIMESTAMP) != 0 ||
+	    (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
 #if INET6
 		if (last->inp_vflag & INP_IPV6) {
 			int savedflags;
@@ -721,11 +761,20 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
 			}
 			savedflags = last->inp_flags;
 			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
-			ip6_savecontrol(last, &opts, &pudp_ip6->uip6_ip6, n);
+			ret = ip6_savecontrol(last, n, &opts);
+			if (ret != 0) {
+				last->inp_flags = savedflags;
+				goto error;
+			}
 			last->inp_flags = savedflags;
 		} else
 #endif
-		ip_savecontrol(last, &opts, ip, n);
+		{
+			ret = ip_savecontrol(last, &opts, ip, n);
+			if (ret != 0) {
+				goto error;
+			}
+		}
 	}
 #if INET6
 	if (last->inp_vflag & INP_IPV6) {
@@ -737,11 +786,22 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
 	} else
 #endif
 	append_sa = (struct sockaddr *)pudp_in;
+	if (nstat_collect) {
+		locked_add_64(&last->inp_stat->rxpackets, 1);
+		locked_add_64(&last->inp_stat->rxbytes, n->m_pkthdr.len);
+	}
+	so_recv_data_stat(last->inp_socket, n, 0);
 	m_adj(n, off);
 	if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts, NULL) == 0) {
 		udpstat.udps_fullsock++;
-	} else
+	} else {
 		sorwakeup(last->inp_socket);
+	}
+	return;
+error:
+	m_freem(n);
+	m_freem(opts);
+	return;
 }
 
 /*
@@ -952,7 +1012,7 @@ udp_pcblist SYSCTL_HANDLER_ARGS
 	return error;
 }
 
-SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
 
 #if !CONFIG_EMBEDDED
@@ -1055,11 +1115,27 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS
         return error;
 }
 
-SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0,
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
             udp_pcblist64, "S,xinpcb64", "List of active UDP sockets");
 
 #endif /* !CONFIG_EMBEDDED */
 
+static int
+udp_pcblist_n SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg1, arg2)
+	int error = 0;
+	
+	error = get_pcblist_n(IPPROTO_UDP, req, &udbinfo);
+	
+	return error;
+}
+
+
+SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
+            udp_pcblist_n, "S,xinpcb_n", "List of active UDP sockets");
+
+
 static __inline__ u_int16_t
 get_socket_id(struct socket * s)
 {
@@ -1075,6 +1151,69 @@ get_socket_id(struct socket * s)
 	return (val);
 }
 
+static int
+udp_check_pktinfo(struct mbuf *control, unsigned int *ifindex, struct in_addr *laddr)
+{
+	struct cmsghdr *cm = 0;
+	struct in_pktinfo *pktinfo;	
+	struct ifnet *ifp;
+
+	/*
+	 * XXX: Currently, we assume all the optional information is stored
+	 * in a single mbuf.
+	 */
+	if (control->m_next)
+		return (EINVAL);
+
+	if (control->m_len < CMSG_LEN(0))
+		return (EINVAL);
+
+	for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
+		if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
+			return (EINVAL);
+	
+		if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_PKTINFO)
+			continue;
+
+		if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) 
+			return (EINVAL);
+
+		pktinfo =  (struct in_pktinfo *)CMSG_DATA(cm);
+
+		/* Check for a valid ifindex in pktinfo */
+		ifnet_head_lock_shared();
+
+		if (pktinfo->ipi_ifindex > if_index) {
+			ifnet_head_done();
+			return (ENXIO);
+		}
+
+		/* If ipi_ifindex is specified it takes precedence over ipi_spec_dst */
+
+		if (pktinfo->ipi_ifindex) {
+			ifp = ifindex2ifnet[pktinfo->ipi_ifindex];
+			if (ifp == NULL) {
+				ifnet_head_done();
+				return (ENXIO);
+			}
+
+			ifnet_head_done();
+
+			*ifindex = pktinfo->ipi_ifindex;
+			laddr->s_addr = INADDR_ANY;
+			break;
+		}
+
+		ifnet_head_done();
+
+		/* Use the provided ipi_spec_dst address for temp source address */
+		*ifindex = 0;
+		*laddr = pktinfo->ipi_spec_dst;
+		break;
+	}
+	return (0);
+}
+
 static int
 udp_output(inp, m, addr, control, p)
 	register struct inpcb *inp;
@@ -1086,28 +1225,34 @@ udp_output(inp, m, addr, control, p)
 	register struct udpiphdr *ui;
 	register int len = m->m_pkthdr.len;
 	struct sockaddr_in *sin;
-	struct in_addr origladdr, laddr, faddr;
+	struct in_addr origladdr, laddr, faddr, pi_laddr;
 	u_short lport, fport;
-	struct sockaddr_in *ifaddr;
-	int error = 0, udp_dodisconnect = 0;
+	struct sockaddr_in ifaddr;
+	int error = 0, udp_dodisconnect = 0, pktinfo = 0;
 	struct socket *so = inp->inp_socket;
 	int soopts = 0;
 	struct mbuf *inpopts;
 	struct ip_moptions *mopts;
 	struct route ro;
-	struct ip_out_args ipoa;
-#if PKT_PRIORITY
-	mbuf_traffic_class_t mtc = MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+	mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
+	unsigned int origoutif;
+
+	pi_laddr.s_addr = INADDR_ANY;
 
 	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0);
 
 	if (control != NULL) {
-#if PKT_PRIORITY
 		mtc = mbuf_traffic_class_from_control(control);
-#endif /* PKT_PRIORITY */
+
+		error = udp_check_pktinfo(control, &ipoa.ipoa_boundif, &pi_laddr);
+
 		m_freem(control);
+		if (error)
+			goto release;
+		pktinfo++;
 	}
+
 	KERNEL_DEBUG(DBG_LAYER_OUT_BEG, inp->inp_fport, inp->inp_lport,
 		     inp->inp_laddr.s_addr, inp->inp_faddr.s_addr,
 		     (htons((u_short)len + sizeof (struct udphdr))));
@@ -1117,11 +1262,16 @@ udp_output(inp, m, addr, control, p)
 		goto release;
 	}
 
-        lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+        lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
 
-	/* If socket was bound to an ifindex, tell ip_output about it */
-	ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ?
-	    inp->inp_boundif : IFSCOPE_NONE;
+	/*
+	 * If socket was bound to an ifindex, tell ip_output about it.
+	 * If the ancillary IP_PKTINFO option contains an interface index,
+	 * it takes precedence over the one specified by IP_BOUND_IF.
+	 */
+	if (ipoa.ipoa_boundif == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF))
+		ipoa.ipoa_boundif = inp->inp_boundif;
+	ipoa.ipoa_nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 	soopts |= IP_OUTARGS;
 
 	/* If there was a routing change, discard cached route and check
@@ -1134,22 +1284,45 @@ udp_output(inp, m, addr, control, p)
 
 		/* src address is gone? */
 		if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) {
-			if (inp->inp_flags & INP_INADDR_ANY) {
-				/* new src will be set later */
-				inp->inp_laddr.s_addr = INADDR_ANY;
-			} else {
+			if (((inp->inp_flags & INP_INADDR_ANY) == 0) || (so->so_state & SS_ISCONNECTED)) {
+				/* Rdar://5448998
+				 * If the source address is gone, return an error if:
+				 * - the source was specified
+				 * - the socket was already connected
+				 */
 				error = EADDRNOTAVAIL;
 				goto release;
+			} else {
+				/* new src will be set later */
+				inp->inp_laddr.s_addr = INADDR_ANY;
+				inp->inp_last_outif = 0;
 			}
 		}
 		if (ia != NULL)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		if (inp->inp_route.ro_rt != NULL)
 			rtfree(inp->inp_route.ro_rt);
 		inp->inp_route.ro_rt = NULL;
 	}
 
-	origladdr= laddr = inp->inp_laddr;
+	origoutif = inp->inp_last_outif;
+
+	/* IP_PKTINFO option check.
+	 * If a temporary scope or src address is provided, use it for this packet only
+	 * and make sure we forget it after sending this datagram.
+	 */
+
+	if (pi_laddr.s_addr != INADDR_ANY ||
+	    (ipoa.ipoa_boundif != IFSCOPE_NONE && pktinfo)) {
+		laddr = pi_laddr; /* temp src address for this datagram only */
+		origladdr.s_addr = INADDR_ANY;
+		udp_dodisconnect = 1; /* we don't want to keep the laddr or route */
+		inp->inp_flags |= INP_INADDR_ANY; /* remember we don't care about src addr.*/
+	} else {
+		origladdr = laddr = inp->inp_laddr;
+	}
+
+	origoutif = inp->inp_last_outif;
 	faddr = inp->inp_faddr;
 	lport = inp->inp_lport;
 	fport = inp->inp_fport;
@@ -1165,8 +1338,11 @@ udp_output(inp, m, addr, control, p)
 			 * In case we don't have a local port set, go through the full connect.
 			 * We don't have a local port yet (ie, we can't be looked up),
 			 * so it's not an issue if the input runs at the same time we do this.
-		 	 */
-			error = in_pcbconnect(inp, addr, p);
+			 */
+
+			if (pi_laddr.s_addr != INADDR_ANY)	/* if we have a source address specified, use that */
+				inp->inp_laddr = pi_laddr;
+			error = in_pcbconnect(inp, addr, p, &ipoa.ipoa_boundif); /* if a scope is specified, use it */
 			if (error) {
 				goto release;
 			}
@@ -1176,19 +1352,21 @@ udp_output(inp, m, addr, control, p)
 			fport = inp->inp_fport;
 			udp_dodisconnect = 1;
 		}
-		else {	
+		else {
 			/* Fast path case
 			 * we have a full address and a local port.
 			 * use those info to build the packet without changing the pcb
 			 * and interfering with the input path. See 3851370
+			 * Note: if we may have a scope from IP_PKTINFO but the
+			 * priority is always given to the scope provided by INP_BOUND_IF.
 			 */
 			if (laddr.s_addr == INADDR_ANY) {
-			   if ((error = in_pcbladdr(inp, addr, &ifaddr)) != 0)
+			   if ((error = in_pcbladdr(inp, addr, &ifaddr, &ipoa.ipoa_boundif)) != 0)
 				   goto release;
-			   laddr = ifaddr->sin_addr;
+			   laddr = ifaddr.sin_addr;
 			   inp->inp_flags |= INP_INADDR_ANY; /* from pcbconnect: remember we don't care about src addr.*/
 			}
-				   
+
 			faddr = sin->sin_addr;
 			fport = sin->sin_port;
 		}
@@ -1256,68 +1434,63 @@ udp_output(inp, m, addr, control, p)
 	inpopts = inp->inp_options;
 	soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
 	mopts = inp->inp_moptions;
+	if (mopts != NULL)
+		IMO_ADDREF(mopts);
 
 	/* Copy the cached route and take an extra reference */
 	inp_route_copyout(inp, &ro);
 
-#if PKT_PRIORITY
-	set_traffic_class(m, so, mtc);
-#endif /* PKT_PRIORITY */
+	set_packet_tclass(m, so, mtc, 0);
 
 	socket_unlock(so, 0);
-	/* XXX jgraessley please look at XXX */
 	error = ip_output_list(m, 0, inpopts, &ro, soopts, mopts, &ipoa);
+	m = NULL;
 	socket_lock(so, 0);
+	if (mopts != NULL)
+		IMO_REMREF(mopts);
 
+	if (error == 0 && nstat_collect) {
+		locked_add_64(&inp->inp_stat->txpackets, 1);
+		locked_add_64(&inp->inp_stat->txbytes, len);
+	}
 	/* Synchronize PCB cached route */
 	inp_route_copyin(inp, &ro);
 
+abort:
 	if (udp_dodisconnect) {
-#if IFNET_ROUTE_REFCNT
 		/* Always discard the cached route for unconnected socket */
 		if (inp->inp_route.ro_rt != NULL) {
 			rtfree(inp->inp_route.ro_rt);
 			inp->inp_route.ro_rt = NULL;
 		}
-#endif /* IFNET_ROUTE_REFCNT */
 		in_pcbdisconnect(inp);
 		inp->inp_laddr = origladdr;	/* XXX rehash? */
-	}
-#if IFNET_ROUTE_REFCNT
-	else if (inp->inp_route.ro_rt != NULL &&
-	    (inp->inp_route.ro_rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST))) {
-		/* Always discard non-unicast cached route */
-		rtfree(inp->inp_route.ro_rt);
-		inp->inp_route.ro_rt = NULL;
-	}
-#endif /* IFNET_ROUTE_REFCNT */
+		inp->inp_last_outif = origoutif;
+	} else if (inp->inp_route.ro_rt != NULL) {
+		struct rtentry *rt = inp->inp_route.ro_rt;
+		unsigned int outif;
 
-	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0,0,0,0);
-	return (error);
-
-abort:
-        if (udp_dodisconnect) {
-#if IFNET_ROUTE_REFCNT
-		/* Always discard the cached route for unconnected socket */
-		if (inp->inp_route.ro_rt != NULL) {
+		if (rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST))
+			rt = NULL;	/* unusable */
+		/*
+		 * Always discard if it is a multicast or broadcast route.
+		 */
+		if (rt == NULL) {
 			rtfree(inp->inp_route.ro_rt);
 			inp->inp_route.ro_rt = NULL;
 		}
-#endif /* IFNET_ROUTE_REFCNT */
-		in_pcbdisconnect(inp);
-		inp->inp_laddr = origladdr; /* XXX rehash? */
-        }
-#if IFNET_ROUTE_REFCNT
-	else if (inp->inp_route.ro_rt != NULL &&
-	    (inp->inp_route.ro_rt->rt_flags & (RTF_MULTICAST|RTF_BROADCAST))) {
-		/* Always discard non-unicast cached route */
-		rtfree(inp->inp_route.ro_rt);
-		inp->inp_route.ro_rt = NULL;
+		/*
+		 * If the destination route is unicast, update outif with
+		 * that of the route interface index used by IP.
+		 */
+		if (rt != NULL &&
+		    (outif = rt->rt_ifp->if_index) != inp->inp_last_outif)
+			inp->inp_last_outif = outif;
 	}
-#endif /* IFNET_ROUTE_REFCNT */
 
 release:
-	m_freem(m);
+	if (m != NULL)
+		m_freem(m);
 	KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0,0,0,0);
 	return (error);
 }
@@ -1362,10 +1535,10 @@ sysctl_udp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
         return error;
 }
 
-SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &udp_recvspace, 0, &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size");
 
-SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &udp_sendspace, 0, &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size");
 
 static int
@@ -1400,6 +1573,7 @@ udp_attach(struct socket *so, __unused int proto, struct proc *p)
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_ttl = ip_defttl;
+	nstat_udp_new_pcb(inp);
 	return 0;
 }
 
@@ -1431,7 +1605,7 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 		return EINVAL;
 	if (inp->inp_faddr.s_addr != INADDR_ANY)
 		return EISCONN;
-	error = in_pcbconnect(inp, nam, p);
+	error = in_pcbconnect(inp, nam, p, NULL);
 	if (error == 0) 
 		soisconnected(so);
 	return error;
@@ -1464,6 +1638,7 @@ udp_disconnect(struct socket *so)
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
+	inp->inp_last_outif = 0;
 	return 0;
 }
 
@@ -1514,9 +1689,9 @@ udp_lock(struct socket *so, int refcount, void *debug)
 		lr_saved = debug;
 
 	if (so->so_pcb) {
-		lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx,
+		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
 		    LCK_MTX_ASSERT_NOTOWNED);
-		lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+		lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
 	} else {
 		panic("udp_lock: so=%p NO PCB! lr=%p lrh= %s\n", 
 		    so, lr_saved, solockhistory_nr(so));
@@ -1548,11 +1723,11 @@ udp_unlock(struct socket *so, int refcount, void *debug)
 		    so, lr_saved, solockhistory_nr(so));
 		/* NOTREACHED */
 	} else {
-		lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx,
+		lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx,
 		    LCK_MTX_ASSERT_OWNED);
 		so->unlock_lr[so->next_unlock_lr] = lr_saved;
 		so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
-		lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx);
+		lck_mtx_unlock(&((struct inpcb *)so->so_pcb)->inpcb_mtx);
 	}
 
 
@@ -1566,7 +1741,7 @@ udp_getlock(struct socket *so, __unused int locktype)
 
 
 	if (so->so_pcb)
-		return(inp->inpcb_mtx);
+		return(&inp->inpcb_mtx);
 	else {
 		panic("udp_getlock: so=%p NULL so_pcb lrh= %s\n", 
 			so, solockhistory_nr(so));
@@ -1598,7 +1773,7 @@ udp_slowtimo()
 			continue;
 
 		so = inp->inp_socket;
-		if (!lck_mtx_try_lock(inp->inpcb_mtx))	/* skip if busy, no hurry for cleanup... */
+		if (!lck_mtx_try_lock(&inp->inpcb_mtx))	/* skip if busy, no hurry for cleanup... */
 			continue;
 
 		if (so->so_usecount == 0) {
@@ -1612,7 +1787,7 @@ udp_slowtimo()
 			}
 			in_pcbdispose(inp);
 		} else {
-			lck_mtx_unlock(inp->inpcb_mtx);
+			lck_mtx_unlock(&inp->inpcb_mtx);
 		}
 	}
 	lck_rw_done(pcbinfo->mtx);
diff --git a/bsd/netinet6/Makefile b/bsd/netinet6/Makefile
index fc12c8bef..f765bace4 100644
--- a/bsd/netinet6/Makefile
+++ b/bsd/netinet6/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
@@ -25,12 +21,12 @@ DATAFILES = \
 	in6_var.h ip6_mroute.h nd6.h ip6_fw.h
 
 PRIVATE_DATAFILES = \
-	in6_pcb.h ip6_var.h pim6_var.h
+	in6_pcb.h ip6_var.h pim6_var.h mld6_var.h
 
 PRIVATE_KERNELFILES = \
 	ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \
 	 in6_prefix.h ip6_ecn.h ip6_fw.h  \
-	ip6protosw.h ipcomp6.h ipsec6.h mld6_var.h  \
+	ip6protosw.h ipcomp6.h ipsec6.h \
 	raw_ip6.h scope6_var.h tcp6_var.h udp6_var.h
 
 INSTALL_MI_LIST	= ${DATAFILES}
diff --git a/bsd/netinet6/ah.h b/bsd/netinet6/ah.h
index f77826a0d..cf9ddad2a 100644
--- a/bsd/netinet6/ah.h
+++ b/bsd/netinet6/ah.h
@@ -71,7 +71,7 @@ struct ah_algorithm {
 	const char *name;
 	int (*init)(struct ah_algorithm_state *, struct secasvar *);
 	void (*update)(struct ah_algorithm_state *, caddr_t, size_t);
-	void (*result)(struct ah_algorithm_state *, caddr_t);
+	void (*result)(struct ah_algorithm_state *, caddr_t, size_t);
 };
 
 #define	AH_MAXSUMSIZE	64 // sha2-512's output size
diff --git a/bsd/netinet6/ah6.h b/bsd/netinet6/ah6.h
index 22cda6f12..688f946d5 100644
--- a/bsd/netinet6/ah6.h
+++ b/bsd/netinet6/ah6.h
@@ -41,7 +41,7 @@
 #ifdef KERNEL_PRIVATE
 struct secasvar;
 
-extern int ah6_input(struct mbuf **, int *);
+extern int ah6_input(struct mbuf **, int *, int);
 extern int ah6_output(struct mbuf *, u_char *, struct mbuf *,
 	struct secasvar *);
 extern int ah6_calccksum(struct mbuf *, caddr_t, size_t,
diff --git a/bsd/netinet6/ah_core.c b/bsd/netinet6/ah_core.c
index 042550b78..27098a76f 100644
--- a/bsd/netinet6/ah_core.c
+++ b/bsd/netinet6/ah_core.c
@@ -119,42 +119,42 @@ static int ah_sumsiz_zero(struct secasvar *);
 static int ah_none_mature(struct secasvar *);
 static int ah_none_init(struct ah_algorithm_state *, struct secasvar *);
 static void ah_none_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_none_result(struct ah_algorithm_state *, caddr_t);
+static void ah_none_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_keyed_md5_mature(struct secasvar *);
 static int ah_keyed_md5_init(struct ah_algorithm_state *, struct secasvar *);
 static void ah_keyed_md5_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_keyed_md5_result(struct ah_algorithm_state *, caddr_t);
+static void ah_keyed_md5_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_keyed_sha1_mature(struct secasvar *);
 static int ah_keyed_sha1_init(struct ah_algorithm_state *, struct secasvar *);
 static void ah_keyed_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_keyed_sha1_result(struct ah_algorithm_state *, caddr_t);
+static void ah_keyed_sha1_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_hmac_md5_mature(struct secasvar *);
 static int ah_hmac_md5_init(struct ah_algorithm_state *, struct secasvar *);
 static void ah_hmac_md5_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_hmac_md5_result(struct ah_algorithm_state *, caddr_t);
+static void ah_hmac_md5_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_hmac_sha1_mature(struct secasvar *);
 static int ah_hmac_sha1_init(struct ah_algorithm_state *, struct secasvar *);
 static void ah_hmac_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_hmac_sha1_result(struct ah_algorithm_state *, caddr_t);
+static void ah_hmac_sha1_result(struct ah_algorithm_state *, caddr_t, size_t);
 #if ALLCRYPTO
 static int ah_sumsiz_sha2_256(struct secasvar *);
 static int ah_hmac_sha2_256_mature(struct secasvar *);
 static int ah_hmac_sha2_256_init(struct ah_algorithm_state *,
 	struct secasvar *);
 static void ah_hmac_sha2_256_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_hmac_sha2_256_result(struct ah_algorithm_state *, caddr_t);
+static void ah_hmac_sha2_256_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_sumsiz_sha2_384(struct secasvar *);
 static int ah_hmac_sha2_384_mature(struct secasvar *);
 static int ah_hmac_sha2_384_init(struct ah_algorithm_state *,
 	struct secasvar *);
 static void ah_hmac_sha2_384_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_hmac_sha2_384_result(struct ah_algorithm_state *, caddr_t);
+static void ah_hmac_sha2_384_result(struct ah_algorithm_state *, caddr_t, size_t);
 static int ah_sumsiz_sha2_512(struct secasvar *);
 static int ah_hmac_sha2_512_mature(struct secasvar *);
 static int ah_hmac_sha2_512_init(struct ah_algorithm_state *,
 	struct secasvar *);
 static void ah_hmac_sha2_512_loop(struct ah_algorithm_state *, caddr_t, size_t);
-static void ah_hmac_sha2_512_result(struct ah_algorithm_state *, caddr_t);
+static void ah_hmac_sha2_512_result(struct ah_algorithm_state *, caddr_t, size_t);
 #endif /* ALLCRYPTO */
 
 static void ah_update_mbuf(struct mbuf *, int, int,
@@ -280,7 +280,8 @@ ah_none_loop(
 static void
 ah_none_result(
 	__unused struct ah_algorithm_state *state,
-	__unused caddr_t addr)
+	__unused caddr_t addr,
+	__unused size_t l)
 {
 }
 
@@ -363,9 +364,10 @@ ah_keyed_md5_loop(state, addr, len)
 }
 
 static void
-ah_keyed_md5_result(state, addr)
+ah_keyed_md5_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
 	u_char digest[16];
 
@@ -379,7 +381,7 @@ ah_keyed_md5_result(state, addr)
 	}
 	MD5Final(&digest[0], (MD5_CTX *)state->foo);
 	FREE(state->foo, M_TEMP);
-	bcopy(&digest[0], (void *)addr, sizeof(digest));
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 }
 
 static int
@@ -484,9 +486,10 @@ ah_keyed_sha1_loop(state, addr, len)
 }
 
 static void
-ah_keyed_sha1_result(state, addr)
+ah_keyed_sha1_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
 	u_char digest[SHA1_RESULTLEN];	/* SHA-1 generates 160 bits */
 	SHA1_CTX *ctxt;
@@ -500,7 +503,7 @@ ah_keyed_sha1_result(state, addr)
 			(u_int)_KEYLEN(state->sav->key_auth));
 	}
 	SHA1Final((caddr_t)&digest[0], ctxt);
-	bcopy(&digest[0], (void *)addr, HMACSIZE);
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -601,9 +604,10 @@ ah_hmac_md5_loop(state, addr, len)
 }
 
 static void
-ah_hmac_md5_result(state, addr)
+ah_hmac_md5_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
 	u_char digest[16];
 	u_char *ipad;
@@ -624,7 +628,7 @@ ah_hmac_md5_result(state, addr)
 	MD5Update(ctxt, &digest[0], sizeof(digest));
 	MD5Final(&digest[0], ctxt);
 
-	bcopy(&digest[0], (void *)addr, HMACSIZE);
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -727,9 +731,10 @@ ah_hmac_sha1_loop(state, addr, len)
 }
 
 static void
-ah_hmac_sha1_result(state, addr)
+ah_hmac_sha1_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
 	u_char digest[SHA1_RESULTLEN];	/* SHA-1 generates 160 bits */
 	u_char *ipad;
@@ -750,7 +755,7 @@ ah_hmac_sha1_result(state, addr)
 	SHA1Update(ctxt, (caddr_t)&digest[0], sizeof(digest));
 	SHA1Final((caddr_t)&digest[0], ctxt);
 
-	bcopy(&digest[0], (void *)addr, HMACSIZE);
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -869,10 +874,12 @@ ah_hmac_sha2_256_loop(state, addr, len)
 }
 
 static void
-ah_hmac_sha2_256_result(state, addr)
+ah_hmac_sha2_256_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
+	u_char digest[SHA256_DIGEST_LENGTH];
 	u_char *ipad;
 	u_char *opad;
 	SHA256_CTX *ctxt;
@@ -884,13 +891,14 @@ ah_hmac_sha2_256_result(state, addr)
 	opad = (u_char *)(ipad + 64);
 	ctxt = (SHA256_CTX *)(opad + 64);
 
-	SHA256_Final((u_int8_t *)addr, ctxt);
+	SHA256_Final((u_int8_t *)digest, ctxt);
 
-	bzero(ctxt, sizeof(*ctxt));
 	SHA256_Init(ctxt);
 	SHA256_Update(ctxt, opad, 64);
-	SHA256_Update(ctxt, (const u_int8_t *)addr, SHA256_DIGEST_LENGTH);
-	SHA256_Final((u_int8_t *)addr, ctxt);
+	SHA256_Update(ctxt, (const u_int8_t *)digest, sizeof(digest));
+	SHA256_Final((u_int8_t *)digest, ctxt);
+
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -1009,10 +1017,12 @@ ah_hmac_sha2_384_loop(state, addr, len)
 }
 
 static void
-ah_hmac_sha2_384_result(state, addr)
+ah_hmac_sha2_384_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
+	u_char digest[SHA384_DIGEST_LENGTH];
 	u_char *ipad;
 	u_char *opad;
 	SHA384_CTX *ctxt;
@@ -1024,13 +1034,14 @@ ah_hmac_sha2_384_result(state, addr)
 	opad = (u_char *)(ipad + 128);
 	ctxt = (SHA384_CTX *)(opad + 128);
 
-	SHA384_Final((u_int8_t *)addr, ctxt);
+	SHA384_Final((u_int8_t *)digest, ctxt);
 
-	bzero(ctxt, sizeof(*ctxt));
 	SHA384_Init(ctxt);
 	SHA384_Update(ctxt, opad, 128);
-	SHA384_Update(ctxt, (const u_int8_t *)addr, SHA384_DIGEST_LENGTH);
-	SHA384_Final((u_int8_t *)addr, ctxt);
+	SHA384_Update(ctxt, (const u_int8_t *)digest, sizeof(digest));
+	SHA384_Final((u_int8_t *)digest, ctxt);
+
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -1149,10 +1160,12 @@ ah_hmac_sha2_512_loop(state, addr, len)
 }
 
 static void
-ah_hmac_sha2_512_result(state, addr)
+ah_hmac_sha2_512_result(state, addr, l)
 	struct ah_algorithm_state *state;
 	caddr_t addr;
+	size_t l;
 {
+	u_char digest[SHA512_DIGEST_LENGTH];
 	u_char *ipad;
 	u_char *opad;
 	SHA512_CTX *ctxt;
@@ -1164,13 +1177,14 @@ ah_hmac_sha2_512_result(state, addr)
 	opad = (u_char *)(ipad + 128);
 	ctxt = (SHA512_CTX *)(opad + 128);
 
-	SHA512_Final((u_int8_t *)addr, ctxt);
+	SHA512_Final((u_int8_t *)digest, ctxt);
 
-	bzero(ctxt, sizeof(*ctxt));
 	SHA512_Init(ctxt);
 	SHA512_Update(ctxt, opad, 128);
-	SHA512_Update(ctxt, (const u_int8_t *)addr, SHA512_DIGEST_LENGTH);
-	SHA512_Final((u_int8_t *)addr, ctxt);
+	SHA512_Update(ctxt, (const u_int8_t *)digest, sizeof(digest));
+	SHA512_Final((u_int8_t *)digest, ctxt);
+
+	bcopy(&digest[0], (void *)addr, sizeof(digest) > l ? l : sizeof(digest));
 
 	FREE(state->foo, M_TEMP);
 }
@@ -1453,7 +1467,7 @@ again:
 		goto fail;
 	}
 
-	(algo->result)(&algos, (caddr_t) &sumbuf[0]);
+	(algo->result)(&algos, (caddr_t) &sumbuf[0], sizeof(sumbuf));
 	bcopy(&sumbuf[0], ahdat, (*algo->sumsiz)(sav));
 
 	if (n)
@@ -1680,7 +1694,7 @@ ah6_calccksum(m, ahdat, len, algo, sav)
 		goto fail;
 	}
 
-	(algo->result)(&algos, (caddr_t) &sumbuf[0]);
+	(algo->result)(&algos, (caddr_t) &sumbuf[0], sizeof(sumbuf));
 	bcopy(&sumbuf[0], ahdat, (*algo->sumsiz)(sav));
 
 	/* just in case */
diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c
index fcffc1ded..a448295b7 100644
--- a/bsd/netinet6/ah_input.c
+++ b/bsd/netinet6/ah_input.c
@@ -116,6 +116,7 @@
 
 #include <net/kpi_protocol.h>
 #include <netinet/kpi_ipfilter_var.h>
+#include <mach/sdt.h>
 
 #include <net/net_osdep.h>
 
@@ -416,6 +417,9 @@ ah4_input(struct mbuf *m, int off)
 		stripsiz = sizeof(struct newah) + siz1;
 	}
 	if (ipsec4_tunnel_validate(m, off + stripsiz, nxt, sav, &ifamily)) {
+		ifaddr_t ifa;
+		struct sockaddr_storage addr;
+
 		/*
 		 * strip off all the headers that precedes AH.
 		 *	IP xx AH IP' payload -> IP' payload
@@ -481,7 +485,25 @@ ah4_input(struct mbuf *m, int off)
 			IPSEC_STAT_INCREMENT(ipsecstat.in_nomem);
 			goto fail;
 		}
-		proto_input(PF_INET, m);
+
+		if (ip_doscopedroute) {
+			struct sockaddr_in *ipaddr;
+
+			bzero(&addr, sizeof(addr));
+			ipaddr = (__typeof__(ipaddr))&addr;
+			ipaddr->sin_family = AF_INET;
+			ipaddr->sin_len = sizeof(*ipaddr);
+			ipaddr->sin_addr = ip->ip_dst;
+
+			// update the receiving interface address based on the inner address
+			ifa = ifa_ifwithaddr((struct sockaddr *)&addr);
+			if (ifa) {
+				m->m_pkthdr.rcvif = ifa->ifa_ifp;
+				IFA_REMREF(ifa);
+			}
+		}
+		if (proto_input(PF_INET, m) != 0)
+			goto fail;
 		nxt = IPPROTO_DONE;
 	} else {
 		/*
@@ -549,6 +571,10 @@ ah4_input(struct mbuf *m, int off)
 			goto fail;
 		}
 
+		DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+                        struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+                        struct ip *, ip, struct ip6_hdr *, NULL);
+
 		if (nxt != IPPROTO_DONE) {
 			if ((ip_protox[nxt]->pr_flags & PR_LASTHDR) != 0 &&
 			    ipsec4_in_reject(m, NULL)) {
@@ -583,10 +609,9 @@ fail:
 
 #if INET6
 int
-ah6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+ah6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp;
 	int off = *offp;
 	struct ip6_hdr *ip6;
@@ -825,6 +850,9 @@ ah6_input(mp, offp)
 		stripsiz = sizeof(struct newah) + siz1;
 	}
 	if (ipsec6_tunnel_validate(m, off + stripsiz, nxt, sav)) {
+		ifaddr_t ifa;
+		struct sockaddr_storage addr;
+
 		/*
 		 * strip off all the headers that precedes AH.
 		 *	IP6 xx AH IP6' payload -> IP6' payload
@@ -875,7 +903,26 @@ ah6_input(mp, offp)
 			IPSEC_STAT_INCREMENT(ipsec6stat.in_nomem);
 			goto fail;
 		}
-		proto_input(PF_INET6, m);
+
+		if (ip6_doscopedroute) {
+			struct sockaddr_in6 *ip6addr;
+
+			bzero(&addr, sizeof(addr));
+			ip6addr = (__typeof__(ip6addr))&addr;
+			ip6addr->sin6_family = AF_INET6;
+			ip6addr->sin6_len = sizeof(*ip6addr);
+			ip6addr->sin6_addr = ip6->ip6_dst;
+
+			// update the receiving interface address based on the inner address
+			ifa = ifa_ifwithaddr((struct sockaddr *)&addr);
+			if (ifa) {
+				m->m_pkthdr.rcvif = ifa->ifa_ifp;
+				IFA_REMREF(ifa);
+			}
+		}
+
+		if (proto_input(PF_INET6, m) != 0)
+			goto fail;
 		nxt = IPPROTO_DONE;
 	} else {
 		/*
diff --git a/bsd/netinet6/dest6.c b/bsd/netinet6/dest6.c
index ae7a18b8a..993ee1a91 100644
--- a/bsd/netinet6/dest6.c
+++ b/bsd/netinet6/dest6.c
@@ -54,17 +54,13 @@
  * Destination options header processing.
  */
 int
-dest6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+dest6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp;
 	int off = *offp, dstoptlen, optlen;
 	struct ip6_dest *dstopts;
 	u_int8_t *opt;
-	struct ip6_hdr *ip6;
-
-	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
@@ -107,7 +103,7 @@ dest6_input(mp, offp)
 
 		default:		/* unknown option */
 			optlen = ip6_unknown_opt(opt, m,
-			    opt - mtod(m, u_int8_t *), 0);
+			    opt - mtod(m, u_int8_t *));
 			if (optlen == -1)
 				return (IPPROTO_DONE);
 			optlen += 2;
diff --git a/bsd/netinet6/esp6.h b/bsd/netinet6/esp6.h
index e0c40b37f..7b054cd50 100644
--- a/bsd/netinet6/esp6.h
+++ b/bsd/netinet6/esp6.h
@@ -69,7 +69,7 @@
 #ifdef KERNEL_PRIVATE
 extern int esp6_output(struct mbuf *, u_char *, struct mbuf *,
 	struct secasvar *);
-extern int esp6_input(struct mbuf **, int *);
+extern int esp6_input(struct mbuf **, int *, int);
 
 extern void esp6_ctlinput(int, struct sockaddr *, void *);
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c
index 3bae5bd18..905de9ba2 100644
--- a/bsd/netinet6/esp_core.c
+++ b/bsd/netinet6/esp_core.c
@@ -1203,7 +1203,7 @@ esp_auth(m0, skip, length, sav, sum)
 			break;
 		}
 	}
-	(*algo->result)(&s, (caddr_t) sumbuf);
+	(*algo->result)(&s, (caddr_t) sumbuf, sizeof(sumbuf));
 	bcopy(sumbuf, sum, siz);	/*XXX*/
 	KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 6,0,0,0,0);
 	return 0;
diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c
index b228fb035..c64150319 100644
--- a/bsd/netinet6/esp_input.c
+++ b/bsd/netinet6/esp_input.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -119,6 +119,7 @@
 #include <netinet/kpi_ipfilter_var.h>
 
 #include <net/net_osdep.h>
+#include <mach/sdt.h>
 
 #include <sys/kdebug.h>
 #define DBG_LAYER_BEG		NETDBG_CODE(DBG_NETIPSEC, 1)
@@ -136,16 +137,37 @@ extern struct protosw inetsw[];
 	(sizeof(struct esp) < sizeof(struct newesp) \
 		? sizeof(struct newesp) : sizeof(struct esp))
 
+static struct ip *
+esp4_input_strip_UDP_encap (struct mbuf *m, int iphlen)
+{
+	// strip the udp header that's encapsulating ESP
+	struct ip *ip;
+	size_t     stripsiz = sizeof(struct udphdr);
+
+	ip = mtod(m, __typeof__(ip));
+	ovbcopy((caddr_t)ip, (caddr_t)(((u_char *)ip) + stripsiz), iphlen);
+	m->m_data += stripsiz;
+	m->m_len -= stripsiz;
+	m->m_pkthdr.len -= stripsiz;
+	ip = mtod(m, __typeof__(ip));
+	ip->ip_len = ip->ip_len - stripsiz;
+	ip->ip_p = IPPROTO_ESP;
+	return ip;
+}
+
 void
 esp4_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ip *ip;
+#if INET6
 	struct ip6_hdr *ip6;
+#endif /* INET6 */
 	struct esp *esp;
 	struct esptail esptail;
 	u_int32_t spi;
+	u_int32_t seq;
 	struct secasvar *sav = NULL;
 	size_t taillen;
 	u_int16_t nxt;
@@ -175,6 +197,14 @@ esp4_input(m, off)
 	}
 
 	ip = mtod(m, struct ip *);
+	// expect udp-encap and esp packets only
+	if (ip->ip_p != IPPROTO_ESP &&
+	    !(ip->ip_p == IPPROTO_UDP && off >= sizeof(struct udphdr))) {
+		ipseclog((LOG_DEBUG,
+			  "IPv4 ESP input: invalid protocol type\n"));
+		IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+		goto bad;
+	}
 	esp = (struct esp *)(((u_int8_t *)ip) + off);
 #ifdef _IP_VHL
 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
@@ -222,6 +252,7 @@ esp4_input(m, off)
 		goto bad;
 	}
 
+	seq = ntohl(((struct newesp *)esp)->esp_seq);
 	if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
 	 && (sav->alg_auth && sav->key_auth)))
 		goto noreplaycheck;
@@ -233,7 +264,7 @@ esp4_input(m, off)
 	/*
 	 * check for sequence number.
 	 */
-	if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav))
+	if (ipsec_chkreplay(seq, sav))
 		; /*okey*/
 	else {
 		IPSEC_STAT_INCREMENT(ipsecstat.in_espreplay);
@@ -298,7 +329,7 @@ esp4_input(m, off)
 	 * update sequence number.
 	 */
 	if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) {
-		if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) {
+		if (ipsec_updatereplay(seq, sav)) {
 			IPSEC_STAT_INCREMENT(ipsecstat.in_espreplay);
 			goto bad;
 		}
@@ -388,9 +419,40 @@ noreplaycheck:
 #else
 	ip->ip_len = htons(ntohs(ip->ip_len) - taillen);
 #endif
+	if (ip->ip_p == IPPROTO_UDP) {
+		// offset includes the outer ip and udp header lengths.
+		if (m->m_len < off) {
+			m = m_pullup(m, off);
+			if (!m) {
+				ipseclog((LOG_DEBUG,
+					  "IPv4 ESP input: invalid udp encapsulated ESP packet length \n"));
+				IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
+				goto bad;
+			}
+		}
+
+		// check the UDP encap header to detect changes in the source port, and then strip the header
+		off -= sizeof(struct udphdr); // off no longer includes the udphdr's size
+		// if peer is behind nat and this is the latest esp packet
+		if ((sav->flags & SADB_X_EXT_NATT_DETECTED_PEER) != 0 &&
+		    (sav->flags & SADB_X_EXT_OLD) == 0 &&
+		    seq && sav->replay &&
+		    seq >= sav->replay->lastseq)  {
+			struct udphdr *encap_uh = (__typeof__(encap_uh))((caddr_t)ip + off);
+			if (encap_uh->uh_sport &&
+			    encap_uh->uh_sport != sav->remote_ike_port) {
+				sav->remote_ike_port = encap_uh->uh_sport;
+			}
+		}
+		ip = esp4_input_strip_UDP_encap(m, off);
+		esp = (struct esp *)(((u_int8_t *)ip) + off);
+	}
 
 	/* was it transmitted over the IPsec tunnel SA? */
 	if (ipsec4_tunnel_validate(m, off + esplen + ivlen, nxt, sav, &ifamily)) {
+		ifaddr_t ifa;
+		struct sockaddr_storage addr;
+
 		/*
 		 * strip off all the headers that precedes ESP header.
 		 *	IP4 xx ESP IP4' payload -> IP4' payload
@@ -403,6 +465,8 @@ noreplaycheck:
 		tos = ip->ip_tos;
 		m_adj(m, off + esplen + ivlen);
 		if (ifamily == AF_INET) {
+			struct sockaddr_in *ipaddr;
+
 			if (m->m_len < sizeof(*ip)) {
 				m = m_pullup(m, sizeof(*ip));
 				if (!m) {
@@ -421,8 +485,18 @@ noreplaycheck:
 				IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
 				goto bad;
 			}
+
+			if (ip_doscopedroute) {
+				bzero(&addr, sizeof(addr));
+				ipaddr = (__typeof__(ipaddr))&addr;
+				ipaddr->sin_family = AF_INET;
+				ipaddr->sin_len = sizeof(*ipaddr);
+				ipaddr->sin_addr = ip->ip_dst;
+			}
 #if INET6
 		} else if (ifamily == AF_INET6) {
+			struct sockaddr_in6 *ip6addr;
+
 #ifndef PULLDOWN_TEST
 			/*
 			 * m_pullup is prohibited in KAME IPv6 input processing
@@ -452,7 +526,15 @@ noreplaycheck:
 			    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 				IPSEC_STAT_INCREMENT(ipsecstat.in_inval);
 				goto bad;
-			}		
+			}
+
+			if (ip6_doscopedroute) {
+				bzero(&addr, sizeof(addr));
+				ip6addr = (__typeof__(ip6addr))&addr;
+				ip6addr->sin6_family = AF_INET6;
+				ip6addr->sin6_len = sizeof(*ip6addr);
+				ip6addr->sin6_addr = ip6->ip6_dst;
+			}
 #endif /* INET6 */
 		} else {
 			ipseclog((LOG_ERR, "ipsec tunnel unsupported address family "
@@ -466,10 +548,21 @@ noreplaycheck:
 			IPSEC_STAT_INCREMENT(ipsecstat.in_nomem);
 			goto bad;
 		}
-		
+
+		if (ip_doscopedroute || ip6_doscopedroute) {
+			// update the receiving interface address based on the inner address
+			ifa = ifa_ifwithaddr((struct sockaddr *)&addr);
+			if (ifa) {
+				m->m_pkthdr.rcvif = ifa->ifa_ifp;
+				IFA_REMREF(ifa);
+			}
+		}
+
 		/* Clear the csum flags, they can't be valid for the inner headers */
 		m->m_pkthdr.csum_flags = 0;
-		proto_input(ifamily == AF_INET ? PF_INET : PF_INET6, m);
+		if (proto_input(ifamily == AF_INET ? PF_INET : PF_INET6, m) != 0)
+			goto bad;
+
 		nxt = IPPROTO_DONE;
 		KERNEL_DEBUG(DBG_FNC_ESPIN | DBG_FUNC_END, 2,0,0,0,0);
 	} else {
@@ -554,6 +647,11 @@ noreplaycheck:
 				udp->uh_sport = htons(sav->remote_ike_port);
 				udp->uh_sum = 0;
 			}
+
+			DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+                        	struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+                        	struct ip *, ip, struct ip6_hdr *, NULL);
+
 			ip_proto_dispatch_in(m, off, nxt, 0);
 		} else
 			m_freem(m);
@@ -583,16 +681,16 @@ bad:
 
 #if INET6
 int
-esp6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+esp6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp;
 	int off = *offp;
 	struct ip6_hdr *ip6;
 	struct esp *esp;
 	struct esptail esptail;
 	u_int32_t spi;
+	u_int32_t seq;
 	struct secasvar *sav = NULL;
 	size_t taillen;
 	u_int16_t nxt;
@@ -667,6 +765,8 @@ esp6_input(mp, offp)
 		goto bad;
 	}
 
+	seq = ntohl(((struct newesp *)esp)->esp_seq);
+
 	if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
 	 && (sav->alg_auth && sav->key_auth)))
 		goto noreplaycheck;
@@ -678,7 +778,7 @@ esp6_input(mp, offp)
 	/*
 	 * check for sequence number.
 	 */
-	if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav))
+	if (ipsec_chkreplay(seq, sav))
 		; /*okey*/
 	else {
 		IPSEC_STAT_INCREMENT(ipsec6stat.in_espreplay);
@@ -740,7 +840,7 @@ esp6_input(mp, offp)
 	 * update sequence number.
 	 */
 	if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) {
-		if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) {
+		if (ipsec_updatereplay(seq, sav)) {
 			IPSEC_STAT_INCREMENT(ipsec6stat.in_espreplay);
 			goto bad;
 		}
@@ -828,6 +928,9 @@ noreplaycheck:
 
 	/* was it transmitted over the IPsec tunnel SA? */
 	if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) {
+		ifaddr_t ifa;
+		struct sockaddr_storage addr;
+
 		/*
 		 * strip off all the headers that precedes ESP header.
 		 *	IP6 xx ESP IP6' payload -> IP6' payload
@@ -872,7 +975,26 @@ noreplaycheck:
 			IPSEC_STAT_INCREMENT(ipsec6stat.in_nomem);
 			goto bad;
 		}
-		proto_input(PF_INET6, m);
+
+		if (ip6_doscopedroute) {
+			struct sockaddr_in6 *ip6addr;
+
+			bzero(&addr, sizeof(addr));
+			ip6addr = (__typeof__(ip6addr))&addr;
+			ip6addr->sin6_family = AF_INET6;
+			ip6addr->sin6_len = sizeof(*ip6addr);
+			ip6addr->sin6_addr = ip6->ip6_dst;
+
+			// update the receiving interface address based on the inner address
+			ifa = ifa_ifwithaddr((struct sockaddr *)&addr);
+			if (ifa) {
+				m->m_pkthdr.rcvif = ifa->ifa_ifp;
+				IFA_REMREF(ifa);
+			}
+		}
+
+		if (proto_input(PF_INET6, m) != 0)
+			goto bad;
 		nxt = IPPROTO_DONE;
 	} else {
 		/*
diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c
index ea75e5acb..b6b68b920 100644
--- a/bsd/netinet6/frag6.c
+++ b/bsd/netinet6/frag6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -77,6 +77,7 @@
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
@@ -102,9 +103,6 @@ u_int frag6_nfragpackets;
 static u_int frag6_nfrags;
 struct	ip6q ip6q;	/* ip6 reassemble queue */
 
-#ifndef __APPLE__
-MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
-#endif
 
 extern lck_mtx_t *inet6_domain_mutex;
 /*
@@ -162,10 +160,9 @@ frag6_init()
  * 	 inet6_domain_mutex is protecting he frag6 queue manipulation.
  */
 int
-frag6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+frag6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp, *t;
 	struct ip6_hdr *ip6;
 	struct ip6_frag *ip6f;
@@ -176,6 +173,8 @@ frag6_input(mp, offp)
 	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
 	struct ifnet *dstifp;
 	struct ifaddr *ifa = NULL;
+	u_int8_t ecn, ecn0;
+
 #ifdef IN6_IFSTAT_STRICT
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
@@ -204,7 +203,7 @@ frag6_input(mp, offp)
 	if (ro.ro_rt != NULL) {
 		RT_LOCK(ro.ro_rt);
 		if ((ifa = ro.ro_rt->rt_ifa) != NULL) {
-			ifaref(ifa);
+			IFA_ADDREF(ifa);
 			dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp;
 		}
 		RT_UNLOCK(ro.ro_rt);
@@ -222,7 +221,7 @@ frag6_input(mp, offp)
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
 		if (ifa != NULL)
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		return IPPROTO_DONE;
 	}
 
@@ -239,7 +238,7 @@ frag6_input(mp, offp)
 			    offsetof(struct ip6_hdr, ip6_plen));
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
 		if (ifa != NULL)
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		return IPPROTO_DONE;
 	}
 
@@ -298,10 +297,11 @@ frag6_input(mp, offp)
 		q6->ip6q_nxtp	= (u_char *)nxtp;
 #endif
 		q6->ip6q_ident	= ip6f->ip6f_ident;
-		q6->ip6q_arrive = 0; /* Is it used anywhere? */
 		q6->ip6q_ttl 	= IPV6_FRAGTTL;
 		q6->ip6q_src	= ip6->ip6_src;
 		q6->ip6q_dst	= ip6->ip6_dst;
+		q6->ip6q_ecn	=
+		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
 
 		q6->ip6q_nfrag	= 0;
@@ -332,7 +332,7 @@ frag6_input(mp, offp)
 					offsetof(struct ip6_frag, ip6f_offlg));
 			frag6_doing_reass = 0;
 			if (ifa != NULL)
-				ifafree(ifa);
+				IFA_REMREF(ifa);
 			return(IPPROTO_DONE);
 		}
 	}
@@ -342,7 +342,7 @@ frag6_input(mp, offp)
 				offsetof(struct ip6_frag, ip6f_offlg));
 		frag6_doing_reass = 0;
 		if (ifa != NULL)
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		return(IPPROTO_DONE);
 	}
 	/*
@@ -387,10 +387,6 @@ frag6_input(mp, offp)
 	if (ip6af == NULL)
 		goto dropfrag;
 	bzero(ip6af, sizeof(*ip6af));
-	ip6af->ip6af_head = ip6->ip6_flow;
-	ip6af->ip6af_len = ip6->ip6_plen;
-	ip6af->ip6af_nxt = ip6->ip6_nxt;
-	ip6af->ip6af_hlim = ip6->ip6_hlim;
 	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
 	ip6af->ip6af_off = fragoff;
 	ip6af->ip6af_frglen = frgpartlen;
@@ -402,6 +398,26 @@ frag6_input(mp, offp)
 		goto insert;
 	}
 
+	/*
+	 * Handle ECN by comparing this segment with the first one;
+	 * if CE is set, do not lose CE.
+	 * drop if CE and not-ECT are mixed for the same packet.
+	 */
+	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
+	ecn0 = q6->ip6q_ecn;
+	if (ecn == IPTOS_ECN_CE) {
+		if (ecn0 == IPTOS_ECN_NOTECT) {
+			FREE(ip6af, M_FTABLE);
+			goto dropfrag;
+		}
+		if (ecn0 != IPTOS_ECN_CE)
+			q6->ip6q_ecn = IPTOS_ECN_CE;
+	}
+	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
+		FREE(ip6af, M_FTABLE);
+		goto dropfrag;
+	}
+
 	/*
 	 * Find a segment which begins after this one does.
 	 */
@@ -450,6 +466,11 @@ frag6_input(mp, offp)
 	 * If the incoming framgent overlaps some existing fragments in
 	 * the reassembly queue, drop it, since it is dangerous to override
 	 * existing fragments from a security point of view.
+	 * We don't know which fragment is the bad guy - here we trust
+	 * fragment that came in earlier, with no real reason.
+	 *
+	 * Note: due to changes after disabling this part, mbuf passed to
+	 * m_adj() below now does not meet the requirement.
 	 */
 	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
@@ -501,7 +522,7 @@ insert:
 		if (af6->ip6af_off != next) {
 			frag6_doing_reass = 0;
 			if (ifa != NULL)
-				ifafree(ifa);
+				IFA_REMREF(ifa);
 			return IPPROTO_DONE;
 		}
 		next += af6->ip6af_frglen;
@@ -509,7 +530,7 @@ insert:
 	if (af6->ip6af_up->ip6af_mff) {
 		frag6_doing_reass = 0;
 		if (ifa != NULL)
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		return IPPROTO_DONE;
 	}
 
@@ -538,15 +559,17 @@ insert:
 	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
 	ip6->ip6_src = q6->ip6q_src;
 	ip6->ip6_dst = q6->ip6q_dst;
+	if (q6->ip6q_ecn == IPTOS_ECN_CE)
+		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
+
 	nxt = q6->ip6q_nxt;
 #if notyet
 	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 #endif
 
-	/*
-	 * Delete frag6 header with as a few cost as possible.
-	 */
-	if (offset < m->m_len) {
+	/* Delete frag6 header */
+	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+		/* This is the only possible case with !PULLDOWN_TEST */
 		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
 			offset);
 		m->m_data += sizeof(struct ip6_frag);
@@ -596,7 +619,7 @@ insert:
 
 	frag6_doing_reass = 0;
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	return nxt;
 
  dropfrag:
@@ -605,7 +628,7 @@ insert:
 	m_freem(m);
 	frag6_doing_reass = 0;
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	return IPPROTO_DONE;
 }
 
@@ -636,7 +659,7 @@ frag6_freef(q6)
 			/* adjust pointer */
 			ip6 = mtod(m, struct ip6_hdr *);
 
-			/* restoure source and destination addresses */
+			/* restore source and destination addresses */
 			ip6->ip6_src = q6->ip6q_src;
 			ip6->ip6_dst = q6->ip6q_dst;
 			icmp6_error(m, ICMP6_TIME_EXCEEDED,
diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c
index 02d19734f..43a61a6d2 100644
--- a/bsd/netinet6/icmp6.c
+++ b/bsd/netinet6/icmp6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -123,6 +123,7 @@
 #include <netinet6/nd6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/ip6protosw.h>
+#include <netinet6/scope6_var.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
@@ -148,8 +149,6 @@ static int icmp6errpps_count = 0;
 static struct timeval icmp6errppslim_last;
 extern int icmp6_nodeinfo;
 extern struct inpcbinfo ripcbinfo;
-extern lck_mtx_t *ip6_mutex; 
-extern lck_mtx_t *nd6_mutex;
 extern lck_mtx_t *inet6_domain_mutex;
 
 static void icmp6_errcount(struct icmp6errstat *, int, int);
@@ -169,19 +168,12 @@ static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int);
 static int icmp6_notify_error(struct mbuf *, int, int, int);
 
-#ifdef COMPAT_RFC1885
-/*
- * XXX: Compiled out for now, but if enabled we must use a lock for accesses,
- *	or easier, define it locally inside icmp6_reflect() and don't cache.
- */
-static struct route_in6 icmp6_reflect_rt;
-#endif
 
 
 void
 icmp6_init()
 {
-	mld6_init();
+	mld_init();
 }
 
 static void
@@ -242,13 +234,44 @@ icmp6_errcount(stat, type, code)
 	stat->icp6errs_unknown++;
 }
 
+/*
+ * A wrapper function for icmp6_error() necessary when the erroneous packet
+ * may not contain enough scope zone information.
+ */
+void
+icmp6_error2(struct mbuf *m, int type, int code, int param,
+    struct ifnet *ifp)
+{
+	struct ip6_hdr *ip6;
+
+	if (ifp == NULL)
+		return;
+
+#ifndef PULLDOWN_TEST
+	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr),return );
+#else
+	if (m->m_len < sizeof(struct ip6_hdr)) {
+		m = m_pullup(m, sizeof(struct ip6_hdr));
+		if (m == NULL)
+			return;
+	}
+#endif
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
+		return;
+	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
+		return;
+
+	icmp6_error(m, type, code, param);
+}
+
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
-icmp6_error(m, type, code, param)
-	struct mbuf *m;
-	int type, code, param;
+icmp6_error(struct mbuf *m, int type, int code, int param)
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
@@ -258,7 +281,6 @@ icmp6_error(m, type, code, param)
 
 	icmp6stat.icp6s_error++;
 
-	lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 	/* count per-type-code statistics */
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
 
@@ -281,9 +303,15 @@ icmp6_error(m, type, code, param)
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
-	 * Multicast destination check. For unrecognized option errors,
-	 * this check has already done in ip6_unknown_opt(), so we can
-	 * check only for other errors.
+	 * If the destination address of the erroneous packet is a multicast
+	 * address, or the packet was sent using link-layer multicast,
+	 * we should basically suppress sending an error (RFC 2463, Section
+	 * 2.4).
+	 * We have two exceptions (the item e.2 in that section):
+	 * - the Pakcet Too Big message can be sent for path MTU discovery.
+	 * - the Parameter Problem Message that can be allowed an icmp6 error
+	 *   in the option type field.  This check has been done in
+	 *   ip6_unknown_opt(), so we can just check the type and code.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
@@ -292,7 +320,10 @@ icmp6_error(m, type, code, param)
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
-	/* Source address check. XXX: the case of anycast source? */
+	/*
+	 * RFC 2463, 2.4 (e.5): source address check.
+	 * XXX: the case of anycast source?
+	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
@@ -361,10 +392,8 @@ icmp6_error(m, type, code, param)
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
-	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
-		oip6->ip6_src.s6_addr16[1] = 0;
-	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
-		oip6->ip6_dst.s6_addr16[1] = 0;
+	in6_clearscope(&oip6->ip6_src);
+	in6_clearscope(&oip6->ip6_dst);
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
@@ -373,7 +402,7 @@ icmp6_error(m, type, code, param)
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
-	 * icmp6_error() can be called from both input and outut path,
+	 * icmp6_error() can be called from both input and output path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
@@ -387,7 +416,7 @@ icmp6_error(m, type, code, param)
 
   freeit:
 	/*
-	 * If we can't tell wheter or not we can generate ICMP6, free it.
+	 * If we can't tell whether or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
@@ -396,17 +425,19 @@ icmp6_error(m, type, code, param)
  * Process a received ICMP6 message.
  */
 int
-icmp6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+icmp6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp, *n;
+	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 
+	ifp = m->m_pkthdr.rcvif;
+
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), return IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
@@ -423,6 +454,26 @@ icmp6_input(mp, offp)
 		goto freeit;
 	}
 
+	/*
+	 * Check multicast group membership.
+	 * Note: SSM filters are not applied for ICMPv6 traffic.
+	 */
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		struct in6_multi	*inm;
+
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, inm);
+		in6_multihead_lock_done();
+
+		if (inm == NULL) {
+			ip6stat.ip6s_notmember++;
+			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+			goto freeit;
+		} else {
+			IN6M_REMREF(inm);
+		}
+	}
+
 	/*
 	 * calculate the checksum
 	 */
@@ -449,7 +500,7 @@ icmp6_input(mp, offp)
 	if (faithprefix(&ip6->ip6_dst)) {
 		/*
 		 * Deliver very specific ICMP6 type only.
-		 * This is important to deilver TOOBIG.  Otherwise PMTUD
+		 * This is important to deliver TOOBIG.  Otherwise PMTUD
 		 * will not work.
 		 */
 		switch (icmp6->icmp6_type) {
@@ -468,7 +519,6 @@ icmp6_input(mp, offp)
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
 
-
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
@@ -483,30 +533,21 @@ icmp6_input(mp, offp)
 		case ICMP6_DST_UNREACH_ADDR:
 			code = PRC_HOSTDEAD;
 			break;
-#ifdef COMPAT_RFC1885
-		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
-			code = PRC_UNREACH_SRCFAIL;
-			break;
-#else
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
-#endif
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
-
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
-		if (code != 0)
-			goto badcode;
 
 		code = PRC_MSGSIZE;
 
@@ -521,8 +562,10 @@ icmp6_input(mp, offp)
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
+			code = PRC_TIMXCEED_INTRANS;
+			break;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
-			code += PRC_TIMXCEED_INTRANS;
+			code = PRC_TIMXCEED_REASS;
 			break;
 		default:
 			goto badcode;
@@ -631,11 +674,12 @@ icmp6_input(mp, offp)
 			goto badcode;
 		break;
 
-	case MLD6_LISTENER_QUERY:
-	case MLD6_LISTENER_REPORT:
-		if (icmp6len < sizeof(struct mld6_hdr))
+	case MLD_LISTENER_QUERY:
+	case MLD_LISTENER_REPORT:
+
+		if (icmp6len < sizeof(struct mld_hdr))
 			goto badlen;
-		if (icmp6->icmp6_type == MLD6_LISTENER_QUERY) /* XXX: ugly... */
+		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
 		else
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
@@ -647,31 +691,32 @@ icmp6_input(mp, offp)
 
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
-			mld6_input(m, off);
-			m = NULL;
+			if (mld_input(m, off, icmp6len) == IPPROTO_DONE)
+				m = NULL;
 			goto freeit;
 		}
-		mld6_input(n, off);
+		if (mld_input(n, off, icmp6len) != IPPROTO_DONE)
+			m_freem(n);
 		/* m stays. */
 		goto rate_limit_checked;
 		break;
 
-	case MLD6_LISTENER_DONE:
+	case MLD_LISTENER_DONE:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
-		if (icmp6len < sizeof(struct mld6_hdr))	/* necessary? */
+		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
 			goto badlen;
 		break;		/* nothing to be done in kernel */
 
-	case MLD6_MTRACE_RESP:
-	case MLD6_MTRACE:
-		/* XXX: these two are experimental. not officially defind. */
+	case MLD_MTRACE_RESP:
+	case MLD_MTRACE:
+		/* XXX: these two are experimental.  not officially defined. */
 		/* XXX: per-interface statistics? */
 		break;		/* just pass it to applications */
 
 	case ICMP6_NI_QUERY:
 		if (!icmp6_nodeinfo)
 			break;
-
+//### LD 10/20 Check fbsd differences here. Not sure we're more advanced or not.
 		/* By RFC 4620 refuse to answer queries from global scope addresses */ 
 		if ((icmp6_nodeinfo & 8) != 8 && in6_addrscope(&ip6->ip6_src) == IPV6_ADDR_SCOPE_GLOBAL)
 			break;
@@ -948,7 +993,7 @@ icmp6_notify_error(m, off, icmp6len, code)
 					return(-1);
 				}
 #endif
-				
+
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
@@ -1023,7 +1068,7 @@ icmp6_notify_error(m, off, icmp6len, code)
 					       eoff, sizeof(*fh));
 				if (fh == NULL) {
 					icmp6stat.icp6s_tooshort++;
-					return(-1);
+					return (-1);
 				}
 #endif
 				/*
@@ -1055,14 +1100,23 @@ icmp6_notify_error(m, off, icmp6len, code)
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
-			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
+		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			icmp6stat.icp6s_tooshort++;
-			return(-1);
+			return (-1);
 		}
 #endif
 
+		/*
+		 * retrieve parameters from the inner IPv6 header, and convert
+		 * them into sockaddr structures.
+		 * XXX: there is no guarantee that the source or destination
+		 * addresses of the inner packet are in the same scope as
+		 * the addresses of the icmp packet.  But there is no other
+		 * way to determine the zone.
+		 */
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
+
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
@@ -1070,39 +1124,16 @@ icmp6_notify_error(m, off, icmp6len, code)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
-		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
-							  &icmp6dst.sin6_addr);
-#ifndef SCOPEDROUTING
-		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
-				   NULL, NULL)) {
-			/* should be impossbile */
-			nd6log((LOG_DEBUG,
-			    "icmp6_notify_error: in6_embedscope failed\n"));
+		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
-		}
-#endif
-
-		/*
-		 * retrieve parameters from the inner IPv6 header, and convert
-		 * them into sockaddr structures.
-		 */
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
-		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
-							  &icmp6src.sin6_addr);
-#ifndef SCOPEDROUTING
-		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
-				   NULL, NULL)) {
-			/* should be impossbile */
-			nd6log((LOG_DEBUG,
-			    "icmp6_notify_error: in6_embedscope failed\n"));
+		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
-		}
-#endif
 		icmp6src.sin6_flowinfo =
-			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
+		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
@@ -1145,9 +1176,16 @@ icmp6_mtudisc_update(ip6cp, validated)
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 sin6;
+	/*
+	 * we reject ICMPv6 too big with abnormally small value.
+	 * XXX what is the good definition of "abnormally small"?
+	 */
+	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
+		return;
 
 	if (!validated)
 		return;
+
 	/*
 	 * In case the suggested mtu is less than IPV6_MMTU, we
 	 * only need to remember that it was for above mentioned
@@ -1167,19 +1205,16 @@ icmp6_mtudisc_update(ip6cp, validated)
 		    htons(m->m_pkthdr.rcvif->if_index);
 	}
 	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
-	rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_CLONING | RTF_PRCLONING);
+	rt = rtalloc1_scoped((struct sockaddr *)&sin6, 0,
+	    RTF_CLONING | RTF_PRCLONING, m->m_pkthdr.rcvif->if_index);
 	if (rt != NULL) {
 		RT_LOCK(rt);
 		if ((rt->rt_flags & RTF_HOST) &&
-		    !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
-			if (mtu < IPV6_MMTU) {
-					/* xxx */
-				rt->rt_rmx.rmx_locks |= RTV_MTU;
-			} else if (mtu < rt->rt_ifp->if_mtu &&
-			    rt->rt_rmx.rmx_mtu > mtu) {
-				icmp6stat.icp6s_pmtuchg++;
-				rt->rt_rmx.rmx_mtu = mtu;
-			}
+		    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
+		    mtu < IN6_LINKMTU(rt->rt_ifp) &&
+		    rt->rt_rmx.rmx_mtu > mtu) {
+			icmp6stat.icp6s_pmtuchg++;
+			rt->rt_rmx.rmx_mtu = mtu;
 		}
 		RT_UNLOCK(rt);
 		rtfree(rt);
@@ -1189,7 +1224,7 @@ icmp6_mtudisc_update(ip6cp, validated)
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
- * 
+ *
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
@@ -1216,7 +1251,6 @@ ni6_input(m, off)
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
-	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
@@ -1225,40 +1259,59 @@ ni6_input(m, off)
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
-		return NULL;
+		return (NULL);
 	}
 #endif
 
+	/*
+	 * Validate IPv6 source address.
+	 * The default configuration MUST be to refuse answering queries from
+	 * global-scope addresses according to RFC4602.
+	 * Notes:
+	 *  - it's not very clear what "refuse" means; this implementation
+	 *    simply drops it.
+	 *  - it's not very easy to identify global-scope (unicast) addresses
+	 *    since there are many prefixes for them.  It should be safer
+	 *    and in practice sufficient to check "all" but loopback and
+	 *    link-local (note that site-local unicast was deprecated and
+	 *    ULA is defined as global scope-wise)
+	 */
+	if ((icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
+	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
+	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
+		goto bad;
+
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
-	 * [icmp-name-lookups-07, Section 4.]
+	 * [RFC4602, Section 5.]
 	 */
-	bzero(&sin6, sizeof(sin6));
-	sin6.sin6_family = AF_INET6;
-	sin6.sin6_len = sizeof(struct sockaddr_in6);
-	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
-	/* XXX scopeid */
-	if ((ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)&sin6)) != NULL) {
-		/* unicast/anycast, fine */
-		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
-		    (icmp6_nodeinfo & 4) == 0) {
-		    ifafree(&ia6->ia_ifa);
-		    ia6 = NULL;
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
+			goto bad;
+		/* else it's a link-local multicast, fine */
+	} else {		/* unicast or anycast */
+		struct in6_ifaddr *ia6;
+
+		if ((ia6 = ip6_getdstifaddr(m)) == NULL)
+			goto bad; /* XXX impossible */
+
+		IFA_LOCK(&ia6->ia_ifa);
+		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
+		    !(icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
+			IFA_UNLOCK(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
 			goto bad;
 		}
-		ifafree(&ia6->ia_ifa);
-		ia6 = NULL;
-	} else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
-		; /* link-local multicast, fine */
-	else
-		goto bad;
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
+	}
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
@@ -1272,6 +1325,7 @@ ni6_input(m, off)
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
+	case NI_QTYPE_IPV4ADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
@@ -1291,7 +1345,7 @@ ni6_input(m, off)
 				goto bad;
 #endif
 
-			if (subjlen != sizeof(sin6.sin6_addr))
+			if (subjlen != sizeof(struct in6_addr))
 				goto bad;
 
 			/*
@@ -1313,18 +1367,16 @@ ni6_input(m, off)
 			    subjlen, (caddr_t)&sin6.sin6_addr);
 			sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							      &sin6.sin6_addr);
-#ifndef SCOPEDROUTING
-			in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL);
-#endif
+			in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL,
+			    NULL);
 			bzero(&sin6_d, sizeof(sin6_d));
 			sin6_d.sin6_family = AF_INET6; /* not used, actually */
 			sin6_d.sin6_len = sizeof(sin6_d); /* ditto */
 			sin6_d.sin6_addr = ip6->ip6_dst;
 			sin6_d.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 								&ip6->ip6_dst);
-#ifndef SCOPEDROUTING
-			in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL);
-#endif
+			in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL,
+			    NULL);
 			subj = (char *)&sin6;
 			if (SA6_ARE_ADDR_EQUAL(&sin6, &sin6_d))
 				break;
@@ -1333,7 +1385,8 @@ ni6_input(m, off)
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
-			 * destination X with subject Y, if scope(X) > scope(Y).
+			 * destination X with subject Y,
+			 * if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
@@ -1376,11 +1429,12 @@ ni6_input(m, off)
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
-		if ((icmp6_nodeinfo & 1) == 0)
+		if ((icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
-		if ((icmp6_nodeinfo & 2) == 0)
+	case NI_QTYPE_IPV4ADDR:
+		if ((icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
 			goto bad;
 		break;
 	}
@@ -1399,13 +1453,16 @@ ni6_input(m, off)
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, &ifp, subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
-					  sizeof(u_int32_t))) > MCLBYTES)
+		    sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
+	case NI_QTYPE_IPV4ADDR:
+		/* unsupported - should respond with unknown Qtype? */
+		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
-		 * `unknown Qtype' in this case. However we regard the case
+		 * `unknown Qtype' in this case.  However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
@@ -1423,7 +1480,9 @@ ni6_input(m, off)
 	MGETHDR(n, M_DONTWAIT, m->m_type);	/* MAC-OK */
 	if (n == NULL) {
 		m_freem(m);
-		return(NULL);
+		if (ifp != NULL)
+			ifnet_release(ifp);
+		return (NULL);
 	}
 	M_COPY_PKTHDR(n, m); /* just for recvif */
 	if (replylen > MHLEN) {
@@ -1500,13 +1559,17 @@ ni6_input(m, off)
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
-	return(n);
+	if (ifp != NULL)
+		ifnet_release(ifp);
+	return (n);
 
-  bad:
+bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
-	return(NULL);
+	if (ifp != NULL)
+		ifnet_release(ifp);
+	return (NULL);
 }
 #undef hostnamelen
 
@@ -1693,6 +1756,9 @@ ni6_addrs(ni6, ifpp, subj)
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
+	if (ifpp != NULL)
+		*ifpp = NULL;
+
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
@@ -1705,7 +1771,7 @@ ni6_addrs(ni6, ifpp, subj)
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
-			return(0);
+			return (0);
 		}
 	}
 
@@ -1715,8 +1781,11 @@ ni6_addrs(ni6, ifpp, subj)
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
@@ -1737,18 +1806,25 @@ ni6_addrs(ni6, ifpp, subj)
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
-				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
-				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
-				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			default:
+				IFA_UNLOCK(ifa);
 				continue;
 			}
 
@@ -1757,17 +1833,24 @@ ni6_addrs(ni6, ifpp, subj)
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
-			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
+			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) {
+				IFA_UNLOCK(ifa);
 				continue; /* we need only unicast addresses */
+			}
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
-			    (icmp6_nodeinfo & 4) == 0) {
+			    (icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
+				IFA_UNLOCK(ifa);
 				continue;
 			}
 			addrsofif++; /* count the address */
+			IFA_UNLOCK(ifa);
 		}
 		ifnet_lock_done(ifp);
 		if (iffound) {
-			*ifpp = ifp;
+			if (ifpp != NULL) {
+				*ifpp = ifp;
+				ifnet_reference(ifp);
+			}
 			ifnet_head_done();
 			return(addrsofif);
 		}
@@ -1776,7 +1859,7 @@ ni6_addrs(ni6, ifpp, subj)
 	}
 	ifnet_head_done();
 
-	return(addrs);
+	return (addrs);
 }
 
 static int
@@ -1798,20 +1881,23 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
 	getmicrotime(&timenow);
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
-		return(0);	/* needless to copy */
+		return (0);	/* needless to copy */
 
   again:
 
 	ifnet_head_lock_shared();
-	if (ifp == NULL) ifp = TAILQ_FIRST(&ifnet_head);
-	
+	if (ifp == NULL)
+		ifp = TAILQ_FIRST(&ifnet_head);
+
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
 		ifnet_lock_shared(ifp);
 		for (ifa = ifp->if_addrlist.tqh_first; ifa;
-		     ifa = ifa->ifa_list.tqe_next)
-		{
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+		     ifa = ifa->ifa_list.tqe_next) {
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
@@ -1825,45 +1911,57 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
+				IFA_UNLOCK(ifa);
 				continue;
-			}
-			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
-				 allow_deprecated != 0)
+			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
+			    allow_deprecated != 0) {
+				IFA_UNLOCK(ifa);
 				continue; /* we now collect deprecated addrs */
-
+			}
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
-				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
-				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
-				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
+				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				break;
 			default:
+				IFA_UNLOCK(ifa);
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
-			 * XXX: just experimental. not in the spec.
+			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
-			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
+			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
-			    (icmp6_nodeinfo & 4) == 0) {
+			    (icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
+				IFA_UNLOCK(ifa);
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
+				IFA_UNLOCK(ifa);
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
@@ -1890,7 +1988,8 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
-			if (ifa6->ia6_lifetime.ia6t_expire == 0)
+			if (ifa6->ia6_lifetime.ia6t_expire == 0 &&
+			    (ifa6->ia6_flags & IN6_IFF_TEMPORARY) == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
@@ -1899,7 +1998,7 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
 				else
 					ltime = 0;
 			}
-			
+
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
@@ -1910,10 +2009,11 @@ ni6_store_addrs(ni6, nni6, ifp0, resid)
 			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
 				((struct in6_addr *)cp)->s6_addr16[1] = 0;
 			cp += sizeof(struct in6_addr);
-			
+
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) +
 				   sizeof(u_int32_t));
+			IFA_UNLOCK(ifa);
 		}
 		ifnet_lock_done(ifp);
 		if (ifp0)	/* we need search only on the specified IF */
@@ -1946,6 +2046,7 @@ icmp6_rip6_input(mp, off)
 	struct sockaddr_in6 rip6src;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
+	int ret = 0;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
@@ -1958,21 +2059,22 @@ icmp6_rip6_input(mp, off)
 	}
 #endif
 
+	/*
+	 * XXX: the address may have embedded scope zone ID, which should be
+	 * hidden from applications.
+	 */
 	bzero(&rip6src, sizeof(rip6src));
-	rip6src.sin6_len = sizeof(struct sockaddr_in6);
 	rip6src.sin6_family = AF_INET6;
-	/* KAME hack: recover scopeid */
-	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
-
+	rip6src.sin6_len = sizeof(struct sockaddr_in6);
+	rip6src.sin6_addr = ip6->ip6_src;
+	if (sa6_recoverscope(&rip6src)) 
+		return (IPPROTO_DONE);
+	
 	lck_rw_lock_shared(ripcbinfo.mtx);
 	LIST_FOREACH(in6p, &ripcb, inp_list)
 	{
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
-#if HAVE_NRL_INPCB
-		if (!(in6p->in6p_flags & INP_IPV6))
-			continue;
-#endif
 		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
@@ -1988,10 +2090,20 @@ icmp6_rip6_input(mp, off)
 		if (last) {
 			struct	mbuf *n;
 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
-				if (last->in6p_flags & IN6P_CONTROLOPTS)
-					ip6_savecontrol(last, &opts, ip6, n);
+				if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 ||
+				    (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+				    (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+					ret = ip6_savecontrol(last, n, &opts);
+					if (ret != 0) {
+						m_freem(n);
+						m_freem(opts);
+						last = in6p;
+						continue;
+					}
+				}
 				/* strip intermediate headers */
 				m_adj(n, off);
+				so_recv_data_stat(last->in6p_socket, m, 0);
 				if (sbappendaddr(&last->in6p_socket->so_rcv,
 						 (struct sockaddr *)&rip6src,
 						 n, opts, NULL) != 0) {
@@ -2002,21 +2114,35 @@ icmp6_rip6_input(mp, off)
 		}
 		last = in6p;
 	}
-	lck_rw_done(ripcbinfo.mtx);
 	if (last) {
-		if (last->in6p_flags & IN6P_CONTROLOPTS)
-			ip6_savecontrol(last, &opts, ip6, m);
+		if ((last->in6p_flags & INP_CONTROLOPTS) != 0 ||
+		    (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+		    (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+			ret = ip6_savecontrol(last, m, &opts);
+			if (ret != 0) {
+				goto error;
+			}
+		}
 		/* strip intermediate headers */
 		m_adj(m, off);
+		so_recv_data_stat(last->in6p_socket, m, 0);
 		if (sbappendaddr(&last->in6p_socket->so_rcv,
 				 (struct sockaddr *)&rip6src, m, opts, NULL) != 0) {
 			sorwakeup(last->in6p_socket);
 		}
 	} else {
-		m_freem(m);
-		ip6stat.ip6s_delivered--;
+		goto error;
 	}
+	lck_rw_done(ripcbinfo.mtx);
 	return IPPROTO_DONE;
+
+error:
+	lck_rw_done(ripcbinfo.mtx);
+	m_freem(m);
+	m_freem(opts);
+	ip6stat.ip6s_delivered--;
+	return IPPROTO_DONE;		
+	
 }
 
 /*
@@ -2036,11 +2162,11 @@ icmp6_reflect(m, off)
 	int type, code;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 sa6_src, sa6_dst;
-#ifdef COMPAT_RFC1885
-	int mtu = IPV6_MMTU;
-	struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst;
-#endif
 	u_int32_t oflow;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+
+	if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL)
+		ip6oa.ip6oa_boundif = m->m_pkthdr.rcvif->if_index;
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
@@ -2098,74 +2224,42 @@ icmp6_reflect(m, off)
 	 * XXX: make sure to embed scope zone information, using
 	 * already embedded IDs or the received interface (if any).
 	 * Note that rcvif may be NULL.
-	 * TODO: scoped routing case (XXX).
 	 */
 	bzero(&sa6_src, sizeof(sa6_src));
 	sa6_src.sin6_family = AF_INET6;
 	sa6_src.sin6_len = sizeof(sa6_src);
 	sa6_src.sin6_addr = ip6->ip6_dst;
 	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
-	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL);
+	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL, NULL);
 	bzero(&sa6_dst, sizeof(sa6_dst));
 	sa6_dst.sin6_family = AF_INET6;
 	sa6_dst.sin6_len = sizeof(sa6_dst);
 	sa6_dst.sin6_addr = t;
 	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
-	in6_embedscope(&t, &sa6_dst, NULL, NULL);
+	in6_embedscope(&t, &sa6_dst, NULL, NULL, NULL);
 
-#ifdef COMPAT_RFC1885
-	/*
-	 * xxx guess MTU
-	 * RFC 1885 requires that echo reply should be truncated if it
-	 * does not fit in with (return) path MTU, but the description was
-	 * removed in the new spec.
-	 */
-	if (icmp6_reflect_rt.ro_rt == NULL ||
-	    !(icmp6_reflect_rt.ro_rt->rt_flags & RTF_UP) ||
-	    icmp6_reflect_rt.ro_rt->generation_id != route_generation ||
-	    ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) {
-		if (icmp6_reflect_rt.ro_rt) {
-			rtfree(icmp6_reflect_rt.ro_rt);
-			icmp6_reflect_rt.ro_rt = 0;
-		}
-		bzero(sin6, sizeof(*sin6));
-		sin6->sin6_family = PF_INET6;
-		sin6->sin6_len = sizeof(struct sockaddr_in6);
-		sin6->sin6_addr = ip6->ip6_dst;
-
-		rtalloc_ign((struct route *)&icmp6_reflect_rt.ro_rt,
-			    RTF_PRCLONING);
-	}
-
-	if (icmp6_reflect_rt.ro_rt == 0)
-		goto bad;
-
-	RT_LOCK(icmp6_reflect_rt.ro_rt);
-	if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST)
-	    && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu)
-		mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu;
-	RT_UNLOCK(icmp6_reflect_rt.ro_rt);
-
-	if (mtu < m->m_pkthdr.len) {
-		plen -= (m->m_pkthdr.len - mtu);
-		m_adj(m, mtu - m->m_pkthdr.len);
-	}
-#endif
 	/*
 	 * If the incoming packet was addressed directly to us(i.e. unicast),
 	 * use dst as the src for the reply.
-	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
+	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
+	 * Note that ip6_getdstifaddr() may fail if we are in an error handling
+	 * procedure of an outgoing packet of our own, in which case we need
+	 * to search in the ifaddr list.
 	 */
-	lck_mtx_lock(nd6_mutex);
-	for (ia = in6_ifaddrs; ia; ia = ia->ia_next)
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
+	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK(&ia->ia_ifa);
 		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
 		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			src = &t;
 			break;
 		}
-	lck_mtx_unlock(nd6_mutex);
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
+	lck_rw_done(&in6_ifaddr_rwlock);
 	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
 		/*
 		 * This is the case if the dst is our link-local address
@@ -2174,8 +2268,9 @@ icmp6_reflect(m, off)
 		src = &t;
 	}
 
-	if (src == 0) {
+	if (src == NULL) {
 		int e;
+		struct sockaddr_in6 sin6;
 		struct route_in6 ro;
 
 		/*
@@ -2183,8 +2278,14 @@ icmp6_reflect(m, off)
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
+
 		bzero(&ro, sizeof(ro));
-		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &src_storage, &e);
+		src = in6_selectsrc(&sin6, NULL, NULL, &ro, &outif,
+		    &src_storage, ip6oa.ip6oa_boundif, &e);
 		if (ro.ro_rt)
 			rtfree(ro.ro_rt); /* XXX: we could use this */
 		if (src == NULL) {
@@ -2195,10 +2296,8 @@ icmp6_reflect(m, off)
 			goto bad;
 		}
 	}
-
-	ip6->ip6_src = *src;
-
 	oflow = ip6->ip6_flow; /* Save for later */
+	ip6->ip6_src = *src;
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
@@ -2206,14 +2305,16 @@ icmp6_reflect(m, off)
 		ip6->ip6_flow |= (oflow & htonl(0x0ff00000));
 	}
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
+	lck_rw_lock_shared(nd_if_rwlock);
+	if (outif)
+		ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim) {
 		/* XXX: This may not be the outgoing interface */
-		lck_rw_lock_shared(nd_if_rwlock);
 		ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim;
-		lck_rw_done(nd_if_rwlock);
 	} else {
 		ip6->ip6_hlim = ip6_defhlim;
 	}
+	lck_rw_done(nd_if_rwlock);
 	/* Use the same traffic class as in the request to match IPv4 */
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
@@ -2230,28 +2331,24 @@ icmp6_reflect(m, off)
 		(void)ipsec_setsocket(m, NULL);
 #endif /*IPSEC*/
 
-#ifdef COMPAT_RFC1885
-	ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, 0);
-#else
-	ip6_output(m, NULL, NULL, 0, NULL, &outif, 0);
-#endif
-	if (outif)
+	if (outif != NULL) {
+		ifnet_release(outif);
+		outif = NULL;
+	}
+	ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa);
+	if (outif != NULL) {
 		icmp6_ifoutstat_inc(outif, type, code);
-
+		ifnet_release(outif);
+	}
 	return;
 
- bad:
+bad:
 	m_freem(m);
+	if (outif != NULL)
+		ifnet_release(outif);
 	return;
 }
 
-void
-icmp6_fasttimo()
-{
-
-	mld6_fasttimeo();
-}
-
 static const char *
 icmp6_redirect_diag(src6, dst6, tgt6)
 	struct in6_addr *src6;
@@ -2307,10 +2404,10 @@ icmp6_redirect_input(m, off)
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
-	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
-		redtgt6.s6_addr16[1] = htons(ifp->if_index);
-	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
-		reddst6.s6_addr16[1] = htons(ifp->if_index);
+	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
+	    in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
+		goto freeit;
+	}
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
@@ -2335,7 +2432,7 @@ icmp6_redirect_input(m, off)
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
-	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0);
+	rt = rtalloc1_scoped((struct sockaddr *)&sin6, 0, 0, ifp->if_index);
 	if (rt) {
 		RT_LOCK(rt);
 		if (rt->rt_gateway == NULL ||
@@ -2494,6 +2591,7 @@ icmp6_redirect_output(m0, rt)
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
 
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
 
@@ -2565,8 +2663,10 @@ icmp6_redirect_output(m0, rt)
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
+		IFA_LOCK(&ia->ia_ifa);
 		ifp_ll6 = ia->ia_addr.sin6_addr;
-		ifafree(&ia->ia_ifa);
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 	}
 
 	/* get ip6 linklocal address for the router. */
@@ -2622,42 +2722,44 @@ icmp6_redirect_output(m0, rt)
 	if (!router_ll6)
 		goto nolladdropt;
 
-    {
-	/* target lladdr option */
-	struct rtentry *rt_router = NULL;
-	int len;
-	struct sockaddr_dl *sdl;
-	struct nd_opt_hdr *nd_opt;
-	char *lladdr;
-
-	/* Callee returns a locked route upon success */
-	rt_router = nd6_lookup(router_ll6, 0, ifp, 0);
-	if (!rt_router)
-		goto nolladdropt;
-	RT_LOCK_ASSERT_HELD(rt_router);
-	len = sizeof(*nd_opt) + ifp->if_addrlen;
-	len = (len + 7) & ~7;	/* round by 8 */
-	/* safety check */
-	if (len + (p - (u_char *)ip6) > maxlen) {
+	{
+		/* target lladdr option */
+		struct rtentry *rt_router = NULL;
+		int len;
+		struct sockaddr_dl *sdl;
+		struct nd_opt_hdr *nd_opt;
+		char *lladdr;
+
+		/* Callee returns a locked route upon success */
+		rt_router = nd6_lookup(router_ll6, 0, ifp, 0);
+		if (!rt_router)
+			goto nolladdropt;
+		RT_LOCK_ASSERT_HELD(rt_router);
+		len = sizeof(*nd_opt) + ifp->if_addrlen;
+		len = (len + 7) & ~7;	/* round by 8 */
+		/* safety check */
+		if (len + (p - (u_char *)ip6) > maxlen) {
+			RT_REMREF_LOCKED(rt_router);
+			RT_UNLOCK(rt_router);
+			goto nolladdropt;
+		}
+
+		if (!(rt_router->rt_flags & RTF_GATEWAY) &&
+			(rt_router->rt_flags & RTF_LLINFO) &&
+			(rt_router->rt_gateway->sa_family == AF_LINK) &&
+			(sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
+			sdl->sdl_alen) {
+				nd_opt = (struct nd_opt_hdr *)p;
+				nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+				nd_opt->nd_opt_len = len >> 3;
+				lladdr = (char *)(nd_opt + 1);
+				bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
+				p += len;
+		}
 		RT_REMREF_LOCKED(rt_router);
 		RT_UNLOCK(rt_router);
-		goto nolladdropt;
-	}
-	if (!(rt_router->rt_flags & RTF_GATEWAY) &&
-	    (rt_router->rt_flags & RTF_LLINFO) &&
-	    (rt_router->rt_gateway->sa_family == AF_LINK) &&
-	    (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
-	    sdl->sdl_alen) {
-		nd_opt = (struct nd_opt_hdr *)p;
-		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
-		nd_opt->nd_opt_len = len >> 3;
-		lladdr = (char *)(nd_opt + 1);
-		bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
-		p += len;
-	}
-	RT_REMREF_LOCKED(rt_router);
-	RT_UNLOCK(rt_router);
-    }
+	}	
+
 nolladdropt:;
 
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
@@ -2741,20 +2843,11 @@ nolladdropt:;
     }
 noredhdropt:;
 
-	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
-		sip6->ip6_src.s6_addr16[1] = 0;
-	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
-		sip6->ip6_dst.s6_addr16[1] = 0;
-#if 0
-	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
-		ip6->ip6_src.s6_addr16[1] = 0;
-	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
-		ip6->ip6_dst.s6_addr16[1] = 0;
-#endif
-	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
-		nd_rd->nd_rd_target.s6_addr16[1] = 0;
-	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
-		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
+	/* XXX: clear embedded link IDs in the inner header */
+	in6_clearscope(&sip6->ip6_src);
+	in6_clearscope(&sip6->ip6_dst);
+	in6_clearscope(&nd_rd->nd_rd_target);
+	in6_clearscope(&nd_rd->nd_rd_dst);
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
@@ -2768,10 +2861,14 @@ noredhdropt:;
 	if (ipsec_bypass == 0)
 		(void)ipsec_setsocket(m, NULL);
 #endif /*IPSEC*/
-	ip6_output(m, NULL, NULL, 0, NULL, &outif, 0);
+
+	ip6oa.ip6oa_boundif = ifp->if_index;
+
+	ip6_output(m, NULL, NULL, IPV6_OUTARGS, NULL, &outif, &ip6oa);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
+		ifnet_release(outif);
 	}
 	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
 
@@ -2786,11 +2883,6 @@ fail:
 		m_freem(m0);
 }
 
-#if HAVE_NRL_INPCB
-#define sotoin6pcb	sotoinpcb
-#define in6pcb		inpcb
-#define in6p_icmp6filt	inp_icmp6filt
-#endif
 /*
  * ICMPv6 socket option processing.
  */
@@ -2823,7 +2915,7 @@ icmp6_ctloutput(so, sopt)
 		    {
 			struct icmp6_filter *p;
 
-			if (optlen != sizeof(*p)) {
+			if (optlen != 0 && optlen != sizeof(*p)) {
 				error = EMSGSIZE;
 				break;
 			}
@@ -2831,8 +2923,17 @@ icmp6_ctloutput(so, sopt)
 				error = EINVAL;
 				break;
 			}
-			error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen,
-				optlen);
+
+			if (optlen == 0) {
+				/* According to RFC 3542, an installed filter can be 
+				 * cleared by issuing a setsockopt for ICMP6_FILTER
+				 * with a zero length.
+				 */
+				ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
+			} else {
+				error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen,
+					optlen);
+			}
 			break;
 		    }
 
@@ -2851,7 +2952,7 @@ icmp6_ctloutput(so, sopt)
 				break;
 			}
 			error = sooptcopyout(sopt, inp->in6p_icmp6filt,
-				sizeof(struct icmp6_filter));
+					min(sizeof(struct icmp6_filter), optlen));
 			break;
 		    }
 
@@ -2864,11 +2965,6 @@ icmp6_ctloutput(so, sopt)
 
 	return(error);
 }
-#if HAVE_NRL_INPCB
-#undef sotoin6pcb
-#undef in6pcb
-#undef in6p_icmp6filt
-#endif
 
 /*
  * ICMPv6 socket datagram option processing.
@@ -2892,16 +2988,19 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 		return EINVAL;
 		
 	switch (sopt->sopt_name) {
-		case IPV6_PKTOPTIONS:
 		case IPV6_UNICAST_HOPS:
 		case IPV6_CHECKSUM:
 		case IPV6_FAITH:
 		case IPV6_V6ONLY:
+		case IPV6_USE_MIN_MTU:
+		case IPV6_RECVRTHDR:
+		case IPV6_RECVPKTINFO:
+		case IPV6_RECVHOPLIMIT:
+		case IPV6_PATHMTU:
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
-		case IPV6_RTHDR:
 		case IPV6_MULTICAST_IF:
 		case IPV6_MULTICAST_HOPS:
 		case IPV6_MULTICAST_LOOP:
@@ -2911,6 +3010,15 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 		case IPV6_IPSEC_POLICY:
 		case IPV6_RECVTCLASS:
 		case IPV6_TCLASS:
+		case IPV6_2292PKTOPTIONS:
+		case IPV6_2292PKTINFO:
+		case IPV6_2292HOPLIMIT:
+		case IPV6_2292HOPOPTS:
+		case IPV6_2292DSTOPTS:
+		case IPV6_2292RTHDR:
+		case IPV6_BOUND_IF:
+		case IPV6_NO_IFT_CELLULAR:
+
 			return ip6_ctloutput(so, sopt);
 		
 		default:
@@ -2921,23 +3029,24 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt)
 }
 
 __private_extern__ int
-icmp6_dgram_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam,
-         struct mbuf *control, __unused struct proc *p)
+icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m,
+    struct sockaddr *nam, struct mbuf *control, struct proc *p)
 {
+#pragma unused(flags, p)
 	int error = 0;
 	struct inpcb *inp = sotoinpcb(so);
 	struct sockaddr_in6 tmp;
-	struct sockaddr_in6 *dst;
+	struct sockaddr_in6 *dst = (struct sockaddr_in6 *)nam;
 	struct icmp6_hdr *icmp6;
 
 	if (so->so_uid == 0)
-		return rip6_output(m, so, (struct sockaddr_in6 *) nam, control);
+		return rip6_output(m, so, (struct sockaddr_in6 *) nam, control, 0);
 
 	/* always copy sockaddr to avoid overwrites */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
-				m_freem(m);
-				return EISCONN;
+			m_freem(m);
+			return EISCONN;
 		}
 		/* XXX */
 		bzero(&tmp, sizeof(tmp));
@@ -2948,8 +3057,8 @@ icmp6_dgram_send(struct socket *so, __unused int flags, struct mbuf *m, struct s
 		dst = &tmp;
 	} else {
 		if (nam == NULL) {
-				m_freem(m);
-				return ENOTCONN;
+			m_freem(m);
+			return ENOTCONN;
 		}
 		tmp = *(struct sockaddr_in6 *)nam;
 		dst = &tmp;
@@ -2988,7 +3097,7 @@ icmp6_dgram_send(struct socket *so, __unused int flags, struct mbuf *m, struct s
 	}
 #endif
 
-	return rip6_output(m, so, (struct sockaddr_in6 *) nam, control);
+	return rip6_output(m, so, (struct sockaddr_in6 *) nam, control, 0);
 bad:
 	m_freem(m);
 	return error;
@@ -3124,4 +3233,3 @@ icmp6_ratelimit(
 
 	return ret;
 }
-
diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c
index a9fd82b98..f11a99041 100644
--- a/bsd/netinet6/in6.c
+++ b/bsd/netinet6/in6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -106,6 +106,8 @@
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/kern_event.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
 
 #include <kern/locks.h>
 #include <kern/zalloc.h>
@@ -122,11 +124,10 @@
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
-#ifndef SCOPEDROUTING
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
-#endif
+#include <netinet/icmp6.h>
 
 #include <netinet6/nd6.h>
 #include <netinet/ip6.h>
@@ -135,9 +136,8 @@
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
-#ifndef SCOPEDROUTING
+#include <netinet6/in6_var.h>
 #include <netinet6/in6_pcb.h>
-#endif
 
 #include <net/net_osdep.h>
 
@@ -145,9 +145,6 @@
 #include <net/pfvar.h>
 #endif /* PF */
 
-#ifndef __APPLE__
-MALLOC_DEFINE(M_IPMADDR, "in6_multi", "internet multicast address");
-#endif
  /*
  * Definitions of some costant IP6 addresses.
  */
@@ -159,8 +156,12 @@ const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allv2routers =
+	IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
+const struct in6_addr in6mask7 = IN6MASK7;
+const struct in6_addr in6mask16 = IN6MASK16;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
@@ -173,36 +174,53 @@ static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t,
 	struct ifnet *, struct proc *);
 static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
 			   struct sockaddr_in6 *, int);
-static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *, int);
+static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 static struct in6_ifaddr *in6_ifaddr_alloc(int);
+static void in6_ifaddr_attached(struct ifaddr *);
+static void in6_ifaddr_detached(struct ifaddr *);
 static void in6_ifaddr_free(struct ifaddr *);
 static void in6_ifaddr_trace(struct ifaddr *, int);
 static struct in6_aliasreq *in6_aliasreq_to_native(void *, int,
     struct in6_aliasreq *);
 
-struct in6_multihead in6_multihead;	/* XXX BSS initialization */
 extern lck_mtx_t *nd6_mutex;
-extern lck_mtx_t *ip6_mutex;
 extern int in6_init2done;
 
+#define	IN6IFA_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int in6ifa_trace_hist_size = IN6IFA_TRACE_HIST_SIZE;
+
 struct in6_ifaddr_dbg {
 	struct in6_ifaddr	in6ifa;			/* in6_ifaddr */
 	struct in6_ifaddr	in6ifa_old;		/* saved in6_ifaddr */
-	u_int16_t		in6ifa_refhold_cnt;	/* # of ifaref */
-	u_int16_t		in6ifa_refrele_cnt;	/* # of ifafree */
+	u_int16_t		in6ifa_refhold_cnt;	/* # of IFA_ADDREF */
+	u_int16_t		in6ifa_refrele_cnt;	/* # of IFA_REMREF */
 	/*
 	 * Alloc and free callers.
 	 */
 	ctrace_t		in6ifa_alloc;
 	ctrace_t		in6ifa_free;
 	/*
-	 * Circular lists of ifaref and ifafree callers.
+	 * Circular lists of IFA_ADDREF and IFA_REMREF callers.
+	 */
+	ctrace_t		in6ifa_refhold[IN6IFA_TRACE_HIST_SIZE];
+	ctrace_t		in6ifa_refrele[IN6IFA_TRACE_HIST_SIZE];
+	/*
+	 * Trash list linkage
 	 */
-	ctrace_t		in6ifa_refhold[CTRACE_HIST_SIZE];
-	ctrace_t		in6ifa_refrele[CTRACE_HIST_SIZE];
+	TAILQ_ENTRY(in6_ifaddr_dbg) in6ifa_trash_link;
 };
 
-static unsigned int in6ifa_debug;		/* debug flags */
+/* List of trash in6_ifaddr entries protected by in6ifa_trash_lock */
+static TAILQ_HEAD(, in6_ifaddr_dbg) in6ifa_trash_head;
+static decl_lck_mtx_data(, in6ifa_trash_lock);
+
+#if DEBUG
+static unsigned int in6ifa_debug = 1;		/* debugging (enabled) */
+#else
+static unsigned int in6ifa_debug;		/* debugging (disabled) */
+#endif /* !DEBUG */
 static unsigned int in6ifa_size;		/* size of zone element */
 static struct zone *in6ifa_zone;		/* zone for in6_ifaddr */
 
@@ -232,6 +250,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa)
 	 * would be happy.  Note that we assume the caller of the function
 	 * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest,
 	 * which changes the outgoing interface to the loopback interface.
+	 * ifa_addr for INET6 is set once during init; no need to hold lock.
 	 */
 	lck_mtx_lock(rnh_lock);
 	e = rtrequest_locked(cmd, ifa->ifa_addr, ifa->ifa_addr,
@@ -290,7 +309,10 @@ in6_ifaddloop(struct ifaddr *ifa)
 {
 	struct rtentry *rt;
 
-	/* If there is no loopback entry, allocate one. */
+	/*
+	 * If there is no loopback entry, allocate one.  ifa_addr for
+	 * INET6 is set once during init; no need to hold lock.
+	 */
 	rt = rtalloc1(ifa->ifa_addr, 0, 0);
 	if (rt != NULL)
 		RT_LOCK(rt);
@@ -312,7 +334,7 @@ in6_ifaddloop(struct ifaddr *ifa)
  * if it exists.
  */
 static void
-in6_ifremloop(struct ifaddr *ifa, int locked)
+in6_ifremloop(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia;
 	struct rtentry *rt;
@@ -334,26 +356,29 @@ in6_ifremloop(struct ifaddr *ifa, int locked)
 	 * (probably p2p) interfaces.
 	 * XXX: we should avoid such a configuration in IPv6...
 	 */
-	if (!locked)
-		lck_mtx_lock(nd6_mutex);
+	lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
 	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK(&ia->ia_ifa);
 		if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) {
 			ia_count++;
-			if (ia_count > 1)
+			if (ia_count > 1) {
+				IFA_UNLOCK(&ia->ia_ifa);
 				break;
+			}
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
-	if (!locked)
-		lck_mtx_unlock(nd6_mutex);
+	lck_rw_done(&in6_ifaddr_rwlock);
 
 	if (ia_count == 1) {
 		/*
 		 * Before deleting, check if a corresponding loopbacked host
 		 * route surely exists.  With this check, we can avoid to
 		 * delete an interface direct route whose destination is same
-		 * as the address being removed.  This can happen when remofing
+		 * as the address being removed.  This can happen when removing
 		 * a subnet-router anycast address on an interface attahced
-		 * to a shared medium.
+		 * to a shared medium.  ifa_addr for INET6 is set once during
+		 * init; no need to hold lock.
 		 */
 		rt = rtalloc1(ifa->ifa_addr, 0, 0);
 		if (rt != NULL) {
@@ -370,43 +395,6 @@ in6_ifremloop(struct ifaddr *ifa, int locked)
 	}
 }
 
-#if 0
-/* Not used */
-int
-in6_ifindex2scopeid(idx)
-	int idx;
-{
-	struct ifnet *ifp;
-	struct ifaddr *ifa;
-	struct sockaddr_in6 *sin6;
-
-	ifnet_head_lock_shared();
-	if (idx <= 0 || if_index < idx) {
-		ifnet_head_done();
-		return -1;
-	}
-	
-	ifp = ifindex2ifnet[idx];
-	ifnet_head_done();
-
-	ifnet_lock_shared(ifp);
-	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
-	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
-			continue;
-		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
-		if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
-			int scopeid = sin6->sin6_scope_id & 0xffff;
-			ifnet_lock_done(ifp);
-			return scopeid;
-		}
-	}
-	ifnet_lock_done(ifp);
-
-	return -1;
-}
-#endif
-
 
 int
 in6_mask2len(mask, lim0)
@@ -416,8 +404,8 @@ in6_mask2len(mask, lim0)
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
-	if (lim0 == NULL ||
-	    lim0 - (u_char *)mask > sizeof(*mask)) /* ignore the scope_id part */
+	/* ignore the scope_id part */
+	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
@@ -437,12 +425,12 @@ in6_mask2len(mask, lim0)
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
-			return(-1);
+			return (-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
-				return(-1);
+				return (-1);
 	}
-	
+
 	return x * 8 + y;
 }
 
@@ -536,6 +524,25 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		return (mrt6_ioctl(cmd, data));
 	}
 #endif
+
+	switch(cmd) {
+	case SIOCAADDRCTL_POLICY:
+	case SIOCDADDRCTL_POLICY:
+        if (!privileged)
+			return (EPERM);
+		return (in6_src_ioctl(cmd, data));
+	}
+
+	switch (cmd) {
+	case SIOCDRADD_IN6_32:
+	case SIOCDRADD_IN6_64:
+	case SIOCDRDEL_IN6_32:
+	case SIOCDRDEL_IN6_64:
+                if (!privileged)
+                        return (EPERM);
+		return (defrtrlist_ioctl(cmd, data));
+	}
+
 	if (ifp == NULL)
 		return (EOPNOTSUPP);
 
@@ -612,7 +619,16 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 	}
 
 	/*
-	 * Point ifra and sa6 to the right places depending on the command.
+	 * Find address for this interface, if it exists.
+	 *
+	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
+	 * only, and used the first interface address as the target of other
+	 * operations (without checking ifra_addr).  This was because netinet
+	 * code/API assumed at most 1 interface address per interface.
+	 * Since IPv6 allows a node to assign multiple addresses
+	 * on a single interface, we almost always look and check the
+	 * presence of ifra_addr, and reject invalid ones here.
+	 * It also decreases duplicated code among SIOC*_IN6 operations.
 	 */
 	switch (cmd) {
 	case SIOCLL_START_32:
@@ -643,6 +659,9 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 	case SIOCGIFAFLAG_IN6:
+	case SIOCSNDFLUSH_IN6:
+	case SIOCSPFXFLUSH_IN6:
+	case SIOCSRTRFLUSH_IN6:
 	case SIOCGIFALIFETIME_IN6:
 	case SIOCSIFALIFETIME_IN6:
 	case SIOCGIFSTAT_IN6:
@@ -665,25 +684,39 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		/* NOTREACHED */
 
 	case SIOCAUTOCONF_STOP: {
-		struct in6_ifaddr *nia = NULL;
-
 		ifnet_lock_exclusive(ifp);
 		ifp->if_eflags &= ~IFEF_ACCEPT_RTADVD;
 		ifnet_lock_done(ifp);
 
-		/* nuke prefix list.  this may try to remove some ifaddrs as well */
-		in6_purgeprefix(ifp);
-
-		/* removed autoconfigured address from interface */
-		lck_mtx_lock(nd6_mutex);
-		for (ia = in6_ifaddrs; ia != NULL; ia = nia) {
-			nia = ia->ia_next;
-			if (ia->ia_ifa.ifa_ifp != ifp)
+		/* Remove autoconfigured address from interface */
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		ia = in6_ifaddrs;
+		while (ia != NULL) {
+			if (ia->ia_ifa.ifa_ifp != ifp) {
+				ia = ia->ia_next;
 				continue;
-			if (ia->ia6_flags & IN6_IFF_AUTOCONF)
-				in6_purgeaddr(&ia->ia_ifa, 1);
+			}
+			IFA_LOCK(&ia->ia_ifa);
+			if (ia->ia6_flags & IN6_IFF_AUTOCONF) {
+				IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for us */
+				IFA_UNLOCK(&ia->ia_ifa);
+				lck_rw_done(&in6_ifaddr_rwlock);
+				in6_purgeaddr(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);	/* for us */
+				lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+				/*
+				 * Purging the address caused in6_ifaddr_rwlock
+				 * to be dropped and reacquired;
+				 * therefore search again from the beginning
+				 * of in6_ifaddrs list.
+				 */
+				ia = in6_ifaddrs;
+				continue;
+			}
+			IFA_UNLOCK(&ia->ia_ifa);
+			ia = ia->ia_next;
 		}
-		lck_mtx_unlock(nd6_mutex);
+		lck_rw_done(&in6_ifaddr_rwlock);
 		return (0);
 	}
 
@@ -694,7 +727,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		 * be done here.  They are currently done in in6_ifattach()
 		 * for the interfaces that need it.
 		 */
-		if (((ifp->if_type == IFT_PPP) || ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0))  &&
+		if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0  &&
 		    ifra->ifra_addr.sin6_family == AF_INET6 &&
 		    ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 			/* some interfaces may provide LinkLocal addresses */
@@ -706,28 +739,41 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		/* NOTREACHED */
 
 	case SIOCLL_STOP: {
-		struct in6_ifaddr *nia = NULL;
-
-		/* removed link local addresses from interface */
-
-		lck_mtx_lock(nd6_mutex);
-		for (ia = in6_ifaddrs; ia != NULL; ia = nia) {
-			nia = ia->ia_next;
-			if (ia->ia_ifa.ifa_ifp != ifp)
+		/* Remove link local addresses from interface */
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		ia = in6_ifaddrs;
+		while (ia != NULL) {
+			if (ia->ia_ifa.ifa_ifp != ifp) {
+				ia = ia->ia_next;
+				continue;
+			}
+			IFA_LOCK(&ia->ia_ifa);
+			if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
+				IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for us */
+				IFA_UNLOCK(&ia->ia_ifa);
+				lck_rw_done(&in6_ifaddr_rwlock);
+				in6_purgeaddr(&ia->ia_ifa);
+				IFA_REMREF(&ia->ia_ifa);	/* for us */
+				lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+				/*
+				 * Purging the address caused in6_ifaddr_rwlock
+				 * to be dropped and reacquired;
+				 * therefore search again from the beginning
+				 * of in6_ifaddrs list.
+				 */
+				ia = in6_ifaddrs;
 				continue;
-			if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
-				in6_purgeaddr(&ia->ia_ifa, 1);
+			}
+			IFA_UNLOCK(&ia->ia_ifa);
+			ia = ia->ia_next;
 		}
-		lck_mtx_unlock(nd6_mutex);
+		lck_rw_done(&in6_ifaddr_rwlock);
 		return (0);
 	}
 
 	case SIOCPROTOATTACH_IN6_32:
 	case SIOCPROTOATTACH_IN6_64:
-		if ((error = proto_plumb(PF_INET6, ifp)))
-			printf("SIOCPROTOATTACH_IN6: %s "
-				   "error=%d\n", if_name(ifp), error);
-		return (error);
+		return (in6_domifattach(ifp));
 		/* NOTREACHED */
 
 	case SIOCPROTODETACH_IN6:
@@ -772,8 +818,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
-		 * on a single interface, SIOCSIFxxx ioctls are not suitable
-		 * and should be unused.
+		 * on a single interface, SIOCSIFxxx ioctls are deprecated.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		error = EINVAL;
@@ -782,10 +827,10 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
-		 * "ifconfig if0 delete" to remove first IPv4 address on the
-		 * interface.  For IPv6, as the spec allow multiple interface
-		 * address from the day one, we consider "remove the first one"
-		 * semantics to be not preferable.
+		 * "ifconfig if0 delete" to remove the first IPv4 address on
+		 * the interface.  For IPv6, as the spec allows multiple
+		 * interface address from the day one, we consider "remove the
+		 * first one" semantics to be not preferable.
 		 */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
@@ -840,13 +885,17 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 
 			lt = (struct in6_addrlifetime_64 *)
 			    &ifr->ifr_ifru.ifru_lifetime;
-			if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME
-			 && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) {
+			if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
+			      || lt->ia6t_vltime != ND6_INFINITE_LIFETIME)
+			    && lt->ia6t_vltime + timenow.tv_sec <
+			       timenow.tv_sec) {
 				error = EINVAL;
 				goto ioctl_cleanup;
 			}
-			if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME
-			 && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) {
+			if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
+			      || lt->ia6t_pltime != ND6_INFINITE_LIFETIME)
+			    && lt->ia6t_pltime + timenow.tv_sec <
+			       timenow.tv_sec) {
 				error = EINVAL;
 				goto ioctl_cleanup;
 			}
@@ -855,13 +904,17 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 
 			lt = (struct in6_addrlifetime_32 *)
 			    &ifr->ifr_ifru.ifru_lifetime;
-			if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME
-			 && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) {
+			if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
+			      || lt->ia6t_vltime != ND6_INFINITE_LIFETIME)
+			    && lt->ia6t_vltime + timenow.tv_sec <
+			       timenow.tv_sec) {
 				error = EINVAL;
 				goto ioctl_cleanup;
 			}
-			if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME
-			 && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) {
+			if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0
+			      || lt->ia6t_pltime != ND6_INFINITE_LIFETIME)
+			    && lt->ia6t_pltime + timenow.tv_sec <
+			       timenow.tv_sec) {
 				error = EINVAL;
 				goto ioctl_cleanup;
 			}
@@ -870,8 +923,15 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 	}
 
 	switch (cmd) {
+
 	case SIOCGIFADDR_IN6:
+		IFA_LOCK(&ia->ia_ifa);
 		ifr->ifr_addr = ia->ia_addr;
+		IFA_UNLOCK(&ia->ia_ifa);
+		if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) {
+			IFA_REMREF(&ia->ia_ifa);
+			return (error);
+		}
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
@@ -883,15 +943,25 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		 * XXX: should we check if ifa_dstaddr is NULL and return
 		 * an error?
 		 */
+		IFA_LOCK(&ia->ia_ifa);
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
+		IFA_UNLOCK(&ia->ia_ifa);
+		if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) {
+			IFA_REMREF(&ia->ia_ifa);
+			return (error);
+		}
 		break;
 
 	case SIOCGIFNETMASK_IN6:
+		IFA_LOCK(&ia->ia_ifa);
 		ifr->ifr_addr = ia->ia_prefixmask;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCGIFAFLAG_IN6:
+		IFA_LOCK(&ia->ia_ifa);
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCGIFSTAT_IN6:
@@ -900,7 +970,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			goto ioctl_cleanup;
 		}
 		index = ifp->if_index;
-		lck_mtx_lock(ip6_mutex);
+		lck_rw_lock_shared(&in6_ifs_rwlock);
 		if (in6_ifstat == NULL || index >= in6_ifstatmax
 		 || in6_ifstat[index] == NULL) {
 			/* return EAFNOSUPPORT? */
@@ -909,7 +979,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		} else {
 			ifr->ifr_ifru.ifru_stat = *in6_ifstat[index];
 		}
-		lck_mtx_unlock(ip6_mutex);
+		lck_rw_done(&in6_ifs_rwlock);
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
@@ -918,7 +988,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			goto ioctl_cleanup;
 		}
 		index = ifp->if_index;
-		lck_mtx_lock(ip6_mutex);
+		lck_rw_lock_shared(&icmp6_ifs_rwlock);
 		if (icmp6_ifstat == NULL || index >= icmp6_ifstatmax ||
 		    icmp6_ifstat[index] == NULL) {
 			/* return EAFNOSUPPORT? */
@@ -927,10 +997,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		} else {
 			ifr->ifr_ifru.ifru_icmp6stat = *icmp6_ifstat[index];
 		}
-		lck_mtx_unlock(ip6_mutex);
+		lck_rw_done(&icmp6_ifs_rwlock);
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
+		IFA_LOCK(&ia->ia_ifa);
 		if (p64) {
 			struct in6_addrlifetime_64 *lt;
 
@@ -954,9 +1025,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			lt->ia6t_pltime =
 			    (uint32_t)ia->ia6_lifetime.ia6t_pltime;
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCSIFALIFETIME_IN6:
+		IFA_LOCK(&ia->ia_ifa);
 		if (p64) {
 			struct in6_addrlifetime_64 *lt;
 
@@ -979,16 +1052,19 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			ia->ia6_lifetime.ia6t_pltime = lt->ia6t_pltime;
 		}
 		/* for sanity */
-		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
+		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME ||
+		    (ia->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			ia->ia6_lifetime.ia6t_expire =
 				timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime;
 		} else
 			ia->ia6_lifetime.ia6t_expire = 0;
-		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
+		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME ||
+		    (ia->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 			ia->ia6_lifetime.ia6t_preferred =
 				timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime;
 		} else
 			ia->ia6_lifetime.ia6t_preferred = 0;
+		IFA_UNLOCK(&ia->ia_ifa);
 		break;
 
 	case SIOCAIFADDR_IN6_32:
@@ -996,17 +1072,13 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		int i;
 		struct nd_prefix pr0, *pr;
 
-		/* Attempt to attache the protocol, in case it isn't attached */
-		error = proto_plumb(PF_INET6, ifp);
+		/* Attempt to attach the protocol, in case it isn't attached */
+		error = in6_domifattach(ifp);
 		if (error) {
-			if (error != EEXIST) {
-				printf("SIOCAIFADDR_IN6: %s can't plumb "
-				    "protocol error=%d\n", if_name(ifp), error);
+			if (error == EEXIST)
+				error = 0;
+			else
 				goto ioctl_cleanup;
-			}
-
-			/* Ignore, EEXIST */
-			error = 0;
 		} else {
 			/* PF_INET6 wasn't previously attached */
 			if ((error = in6_if_up(ifp, NULL)) != 0)
@@ -1017,7 +1089,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
-		if ((error = in6_update_ifa(ifp, ifra, ia, M_WAITOK)) != 0)
+		if ((error = in6_update_ifa(ifp, ifra, ia, 0, M_WAITOK)) != 0)
 			goto ioctl_cleanup;
 
 		/*
@@ -1056,6 +1128,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
+		pr0.ndpr_stateflags |= NDPRF_STATIC;
 
 		/* add the prefix if there's one. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
@@ -1063,7 +1136,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
-			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
+			if ((error = nd6_prelist_add(&pr0, NULL, &pr, FALSE)) != 0)
 				goto ioctl_cleanup;
 			if (pr == NULL) {
 				log(LOG_ERR, "nd6_prelist_add succedded but "
@@ -1073,19 +1146,21 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 			}
 		}
 		if (ia != NULL)
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 			/* XXX: this should not happen! */
 			log(LOG_ERR, "in6_control: addition succeeded, but"
 			    " no ifaddr\n");
 		} else {
+			IFA_LOCK(&ia->ia_ifa);
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
 			    ia->ia6_ndpr == NULL) { /* new autoconfed addr */
-				lck_mtx_lock(nd6_mutex);
-				pr->ndpr_refcnt++;
-				lck_mtx_unlock(nd6_mutex);
+				NDPR_LOCK(pr);
+				pr->ndpr_addrcnt++;
+				VERIFY(pr->ndpr_addrcnt != 0);
 				ia->ia6_ndpr = pr;
+				NDPR_ADDREF_LOCKED(pr);	/* for addr reference */
 
 				/*
 				 * If this is the first autoconf address from
@@ -1093,8 +1168,12 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 				 * as well (when specified).
 				 */
 				if (ip6_use_tempaddr &&
-				    pr->ndpr_refcnt == 1) {
+				    pr->ndpr_addrcnt == 1) {
 					int e;
+
+					NDPR_UNLOCK(pr);
+					IFA_UNLOCK(&ia->ia_ifa);
+
 					if ((e = in6_tmpifadd(ia, 1,
 					    M_WAITOK)) != 0) {
 						log(LOG_NOTICE, "in6_control: "
@@ -1103,19 +1182,25 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 						    "errno=%d\n",
 						    e);
 					}
+				} else {
+					NDPR_UNLOCK(pr);
+					IFA_UNLOCK(&ia->ia_ifa);
 				}
+			} else {
+				IFA_UNLOCK(&ia->ia_ifa);
 			}
-
 			/*
 			 * this might affect the status of autoconfigured
 			 * addresses, that is, this address might make
 			 * other addresses detached.
 			 */
-			pfxlist_onlink_check(0);
+			lck_mtx_lock(nd6_mutex);
+			pfxlist_onlink_check();
+			lck_mtx_unlock(nd6_mutex);
 		}
 
 		/* Drop use count held above during lookup/add */
-		ndpr_rele(pr, FALSE);
+		NDPR_REMREF(pr);
 #if PF
 		pf_ifaddr_hook(ifp, cmd);
 #endif /* PF */
@@ -1129,24 +1214,29 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
-		 * XXX: theoretically, we don't have to warry about such
+		 * XXX: theoretically, we don't have to worry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
+		 * Note that in6_purgeaddr() will decrement ndpr_addrcnt.
 		 */
+		IFA_LOCK(&ia->ia_ifa);
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr,
 					     NULL);
-		if (pr0.ndpr_plen == 128)
+		if (pr0.ndpr_plen == 128) {
+			IFA_UNLOCK(&ia->ia_ifa);
 			goto purgeaddr;
+		}
 		pr0.ndpr_prefix = ia->ia_addr;
 		pr0.ndpr_mask = ia->ia_prefixmask.sin6_addr;
 		for (i = 0; i < 4; i++) {
 			pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
 				ia->ia_prefixmask.sin6_addr.s6_addr32[i];
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 		/*
 		 * The logic of the following condition is a bit complicated.
 		 * We expire the prefix when
@@ -1155,20 +1245,24 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
 		 * 2. the address does not obey autoconf and there is no
 		 *    other owner of the prefix.
 		 */
-		if ((pr = nd6_prefix_lookup(&pr0)) != NULL &&
-		    (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
-		      pr->ndpr_refcnt == 1) ||
-		     ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 &&
-		      pr->ndpr_refcnt == 0))) {
-			pr->ndpr_expire = 1; /* XXX: just for expiration */
-		}
+		if ((pr = nd6_prefix_lookup(&pr0)) != NULL) {
+			IFA_LOCK(&ia->ia_ifa);
+			NDPR_LOCK(pr);
+			if (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 &&
+			    pr->ndpr_addrcnt == 1) ||
+			    ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 &&
+			    pr->ndpr_addrcnt == 0)) {
+				pr->ndpr_expire = 1; /* XXX: just for expiration */
+			}
+			NDPR_UNLOCK(pr);
+			IFA_UNLOCK(&ia->ia_ifa);
 
-		/* Drop use count held above during lookup */
-		if (pr != NULL)
-			ndpr_rele(pr, FALSE);
+			/* Drop use count held above during lookup */
+			NDPR_REMREF(pr);
+		}
 
 purgeaddr:
-		in6_purgeaddr(&ia->ia_ifa, 0);
+		in6_purgeaddr(&ia->ia_ifa);
 #if PF
 		pf_ifaddr_hook(ifp, cmd);
 #endif /* PF */
@@ -1181,7 +1275,7 @@ purgeaddr:
 	}
 ioctl_cleanup:
 	if (ia != NULL)
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 	return (error);
 }
 
@@ -1189,23 +1283,23 @@ ioctl_cleanup:
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
- * XXX: should this be performed under splnet()?
  */
 int
-in6_update_ifa(ifp, ifra, ia, how)
-	struct ifnet *ifp;
-	struct in6_aliasreq *ifra;
-	struct in6_ifaddr *ia;
-	int how;
+in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
+    struct in6_ifaddr *ia, int flags, int how)
 {
 	int error = 0, hostIsNew = 0, plen = -1;
 	struct in6_ifaddr *oia;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
+	struct in6_multi *in6m_sol = NULL;
+	struct in6_multi_mship *imm;
 	struct timeval timenow;
+	struct rtentry *rt;
+	struct ifaddr *ifa = NULL;
+	int delay;
 
 
-	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return(EINVAL);
@@ -1245,14 +1339,15 @@ in6_update_ifa(ifp, ifra, ia, how)
 				    (u_char *)&ifra->ifra_prefixmask +
 				    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
-			return(EINVAL);
-	}
-	else {
+			return (EINVAL);
+	} else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
+		IFA_LOCK(&ia->ia_ifa);
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
@@ -1260,27 +1355,25 @@ in6_update_ifa(ifp, ifra, ia, how)
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
-	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) &&
+	if (((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 ) &&
 	    (dst6.sin6_family == AF_INET6)) {
 		int scopeid;
 
-#ifndef SCOPEDROUTING
 		if ((error = in6_recoverscope(&dst6,
 					      &ifra->ifra_dstaddr.sin6_addr,
 					      ifp)) != 0)
 			return(error);
-#endif
+
 		scopeid = in6_addr2scopeid(ifp, &dst6.sin6_addr);
 		if (dst6.sin6_scope_id == 0) /* user omit to specify the ID. */
 			dst6.sin6_scope_id = scopeid;
 		else if (dst6.sin6_scope_id != scopeid)
 			return(EINVAL); /* scope ID mismatch. */
-#ifndef SCOPEDROUTING
-		if ((error = in6_embedscope(&dst6.sin6_addr, &dst6, NULL, NULL))
-		    != 0)
+
+		if ((error = in6_embedscope(&dst6.sin6_addr, &dst6, NULL, NULL,
+		    NULL)) != 0)
 			return(error);
 		dst6.sin6_scope_id = 0; /* XXX */
-#endif
 	}
 	/*
 	 * The destination address can be specified only for a p2p or a
@@ -1308,7 +1401,8 @@ in6_update_ifa(ifp, ifra, ia, how)
 
 	getmicrotime(&timenow);
 	lt = &ifra->ifra_lifetime;
-	if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME
+	if ((lt->ia6t_vltime != ND6_INFINITE_LIFETIME
+	     || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0)
 	    && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) {
 		return EINVAL;
 	}
@@ -1321,7 +1415,8 @@ in6_update_ifa(ifp, ifra, ia, how)
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(&ifra->ifra_addr.sin6_addr));
 	}
-	if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME
+	if ((lt->ia6t_pltime != ND6_INFINITE_LIFETIME
+	     || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0)
 	    && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) {
 		return EINVAL;
 	}
@@ -1340,11 +1435,15 @@ in6_update_ifa(ifp, ifra, ia, how)
 		 */
 		ia = in6_ifaddr_alloc(how);
 		if (ia == NULL)
-			return ENOBUFS;
-		/* Initialize the address and masks */
+			return (ENOBUFS);
+		ifnet_lock_exclusive(ifp);
+		IFA_LOCK(&ia->ia_ifa);
+		LIST_INIT(&ia->ia6_memberships);
+		/* Initialize the address and masks, and put time stamp */
 		ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 		ia->ia_addr.sin6_family = AF_INET6;
 		ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
+		ia->ia6_createtime = timenow.tv_sec;
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 			/*
 			 * XXX: some functions expect that ifa_dstaddr is not
@@ -1359,21 +1458,34 @@ in6_update_ifa(ifp, ifra, ia, how)
 			= (struct sockaddr *)&ia->ia_prefixmask;
 
 		ia->ia_ifp = ifp;
-		ifaref(&ia->ia_ifa);
-		lck_mtx_lock(nd6_mutex);
+		/* if_attach_ifa() holds a reference for ifa_link */
+		if_attach_ifa(ifp, &ia->ia_ifa);
+		/* hold a reference for this routine */
+		IFA_ADDREF_LOCKED(&ia->ia_ifa);
+		IFA_UNLOCK(&ia->ia_ifa);
+		ifnet_lock_done(ifp);
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		/* Hold a reference for in6_ifaddrs link */
+		IFA_ADDREF(&ia->ia_ifa);
 		if ((oia = in6_ifaddrs) != NULL) {
 			for ( ; oia->ia_next; oia = oia->ia_next)
 				continue;
 			oia->ia_next = ia;
-		} else
+		} else {
 			in6_ifaddrs = ia;
-		lck_mtx_unlock(nd6_mutex);
-
-		ifnet_lock_exclusive(ifp);
-		if_attach_ifa(ifp, &ia->ia_ifa);
-		ifnet_lock_done(ifp);
+		}
+		lck_rw_done(&in6_ifaddr_rwlock);
+	} else {
+		/* hold a reference for this routine */
+		IFA_ADDREF(&ia->ia_ifa);
 	}
 
+	ifa = &ia->ia_ifa;
+	IFA_LOCK(ifa);
+
+	/* update timestamp */
+	ia->ia6_updatetime = timenow.tv_sec;
+
 	/* set prefix mask */
 	if (ifra->ifra_prefixmask.sin6_len) {
 		/*
@@ -1388,6 +1500,7 @@ in6_update_ifa(ifp, ifra, ia, how)
 			    " existing (%s) address should not be changed\n",
 			    ip6_sprintf(&ia->ia_addr.sin6_addr));
 			error = EINVAL;
+			IFA_UNLOCK(ifa);
 			goto unlink;
 		}
 		ia->ia_prefixmask = ifra->ifra_prefixmask;
@@ -1396,82 +1509,145 @@ in6_update_ifa(ifp, ifra, ia, how)
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
-	 * p2p or loopback (see the check above.) 
+	 * p2p or loopback (see the check above.)
 	 */
 	if (dst6.sin6_family == AF_INET6 &&
-	    !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr,
-				&ia->ia_dstaddr.sin6_addr)) {
-		int e;
-
-		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
-		    (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
-		    != 0) {
-			log(LOG_ERR, "in6_update_ifa: failed to remove "
-			    "a route to the old destination: %s\n",
-			    ip6_sprintf(&ia->ia_addr.sin6_addr));
-			/* proceed anyway... */
-		}
-		else
+	    !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
+		if ((ia->ia_flags & IFA_ROUTE)) {
+			int e;
+
+			IFA_UNLOCK(ifa);
+			if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE,
+			    RTF_HOST)) != 0) {
+				log(LOG_ERR, "in6_update_ifa: failed to remove "
+				    "a route to the old destination: %s\n",
+				    ip6_sprintf(&ia->ia_addr.sin6_addr));
+				/* proceed anyway... */
+			}
+			IFA_LOCK(ifa);
+		} else {
 			ia->ia_flags &= ~IFA_ROUTE;
+		}
+		IFA_LOCK_ASSERT_HELD(ifa);
 		ia->ia_dstaddr = dst6;
 	}
 
+	/*
+	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
+	 * to see if the address is deprecated or invalidated, but initialize
+	 * these members for applications.
+	 */
+	ia->ia6_lifetime = ifra->ifra_lifetime;
+	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME
+	    || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) {
+		ia->ia6_lifetime.ia6t_expire =
+		    timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime;
+	} else
+		ia->ia6_lifetime.ia6t_expire = 0;
+	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME
+	    || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) {
+		ia->ia6_lifetime.ia6t_preferred =
+		    timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime;
+	} else
+		ia->ia6_lifetime.ia6t_preferred = 0;
+
+	IFA_UNLOCK(ifa);
 	/* reset the interface and routing table appropriately. */
 	if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
 		goto unlink;
 
+	IFA_LOCK(ifa);
 	/*
-	 * Beyond this point, we should call in6_purgeaddr upon an error,
-	 * not just go to unlink. 
+	 * configure address flags.
 	 */
-
-#if 0				/* disable this mechanism for now */
-	/* update prefix list */
-	if (hostIsNew &&
-	    (ifra->ifra_flags & IN6_IFF_NOPFX) == 0) { /* XXX */
-		int iilen;
-
-		iilen = (sizeof(ia->ia_prefixmask.sin6_addr) << 3) - plen;
-		if ((error = in6_prefix_add_ifid(iilen, ia)) != 0) {
-			in6_purgeaddr((struct ifaddr *)ia, 0);
-			return(error);
-		}
+	ia->ia6_flags = ifra->ifra_flags;
+	/*
+	 * backward compatibility - if IN6_IFF_DEPRECATED is set from the
+	 * userland, make it deprecated.
+	 */
+	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
+		ia->ia6_lifetime.ia6t_pltime = 0;
+		ia->ia6_lifetime.ia6t_preferred = timenow.tv_sec;
 	}
-#endif
+	/*
+	 * Make the address tentative before joining multicast addresses,
+	 * so that corresponding MLD responses would not have a tentative
+	 * source address.
+	 */
+	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
+	if (hostIsNew && in6if_do_dad(ifp))
+		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
+	/*
+	 * We are done if we have simply modified an existing address.
+	 */
+	if (!hostIsNew) {
+		IFA_UNLOCK(ifa);
+		/* release reference held for this routine */
+		IFA_REMREF(ifa);
+		return (error);
+	}
+	/*
+	 * Beyond this point, we should call in6_purgeaddr upon an error,
+	 * not just go to unlink.
+	 */
+	IFA_LOCK_ASSERT_HELD(ifa);
+	/* Join necessary multicast groups */
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		struct sockaddr_in6 mltaddr, mltmask;
-		struct in6_multi *in6m;
+		struct in6_addr llsol;
 
-		if (hostIsNew) {
+		IFA_UNLOCK(ifa);
+		/* join solicited multicast addr for new host id */
+		bzero(&llsol, sizeof(struct in6_addr));
+		llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
+		llsol.s6_addr32[1] = 0;
+		llsol.s6_addr32[2] = htonl(1);
+		llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
+		llsol.s6_addr8[12] = 0xff;
+		if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
+			/* XXX: should not happen */
+			log(LOG_ERR, "in6_update_ifa: "
+			    "in6_setscope failed\n");
+			goto cleanup;
+		}
+		delay = 0;
+		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
-			 * join solicited multicast addr for new host id
+			 * We need a random delay for DAD on the address
+			 * being configured.  It also means delaying
+			 * transmission of the corresponding MLD report to
+			 * avoid report collision.
+			 * [draft-ietf-ipv6-rfc2462bis-02.txt]
 			 */
-			struct in6_addr llsol;
-			bzero(&llsol, sizeof(struct in6_addr));
-			llsol.s6_addr16[0] = htons(0xff02);
-			llsol.s6_addr16[1] = htons(ifp->if_index);
-			llsol.s6_addr32[1] = 0;
-			llsol.s6_addr32[2] = htonl(1);
-			llsol.s6_addr32[3] =
-				ifra->ifra_addr.sin6_addr.s6_addr32[3];
-			llsol.s6_addr8[12] = 0xff;
-			(void)in6_addmulti(&llsol, ifp, &error, 0);
-			if (error != 0) {
-				log(LOG_WARNING,
-				    "in6_update_ifa: addmulti failed for "
-				    "%s on %s (errno=%d)\n",
-				    ip6_sprintf(&llsol), if_name(ifp),
-				    error);
-				in6_purgeaddr((struct ifaddr *)ia, 0);
-				return(error);
-			}
+			delay = random() %
+			    (MAX_RTR_SOLICITATION_DELAY * PR_SLOWHZ);
+		}
+		imm = in6_joingroup(ifp, &llsol, &error, delay);
+		if (imm == NULL) {
+			nd6log((LOG_WARNING,
+			    "in6_update_ifa: addmulti failed for "
+			    "%s on %s (errno=%d)\n",
+			    ip6_sprintf(&llsol), if_name(ifp),
+			    error));
+			in6_purgeaddr((struct ifaddr *)ia);
+			/* release reference held for this routine */
+			IFA_REMREF(ifa);
+			return (error);
 		}
+		in6m_sol = imm->i6mm_maddr;
+		/* take a refcount for this routine */
+		IN6M_ADDREF(in6m_sol);
+
+		IFA_LOCK_SPIN(ifa);
+		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+		IFA_UNLOCK(ifa);
 
 		bzero(&mltmask, sizeof(mltmask));
 		mltmask.sin6_len = sizeof(struct sockaddr_in6);
 		mltmask.sin6_family = AF_INET6;
 		mltmask.sin6_addr = in6mask32;
+#define	MLTMASK_LEN  4	/* mltmask's masklen (=32bit=4octet) */
 
 		/*
 		 * join link-local all-nodes address
@@ -1480,111 +1656,134 @@ in6_update_ifa(ifp, ifra, ia, how)
 		mltaddr.sin6_len = sizeof(struct sockaddr_in6);
 		mltaddr.sin6_family = AF_INET6;
 		mltaddr.sin6_addr = in6addr_linklocal_allnodes;
-		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
+		if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) !=
+		    0)
+			goto cleanup; /* XXX: should not fail */
 
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
-		ifnet_lock_done(ifp);
-		if (in6m == NULL) {
-			rtrequest(RTM_ADD,
-				  (struct sockaddr *)&mltaddr,
-				  (struct sockaddr *)&ia->ia_addr,
-				  (struct sockaddr *)&mltmask,
-				  RTF_UP|RTF_CLONING,  /* xxx */
-				  (struct rtentry **)0);
-			(void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error, 0);
-			if (error != 0) {
-				log(LOG_WARNING,
-				    "in6_update_ifa: addmulti failed for "
-				    "%s on %s (errno=%d)\n",
-				    ip6_sprintf(&mltaddr.sin6_addr), 
-				    if_name(ifp), error);
+		/*
+		 * XXX: do we really need this automatic routes?
+		 * We should probably reconsider this stuff.  Most applications
+		 * actually do not need the routes, since they usually specify
+		 * the outgoing interface.
+		 */
+		rt = rtalloc1_scoped((struct sockaddr *)&mltaddr, 0, 0UL,
+		    ia->ia_ifp->if_index);
+		if (rt) {
+			if (memcmp(&mltaddr.sin6_addr,
+			    &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
+			    MLTMASK_LEN)) {
+				rtfree(rt);
+				rt = NULL;
 			}
 		}
+		if (!rt) {
+			error = rtrequest_scoped(RTM_ADD,
+			    (struct sockaddr *)&mltaddr,
+			    (struct sockaddr *)&ia->ia_addr,
+			    (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
+			    NULL, ia->ia_ifp->if_index);
+			if (error)
+				goto cleanup;
+		} else {
+			rtfree(rt);
+		}
+
+		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+		if (!imm) {
+			nd6log((LOG_WARNING,
+			    "in6_update_ifa: addmulti failed for "
+			    "%s on %s (errno=%d)\n",
+			    ip6_sprintf(&mltaddr.sin6_addr),
+			    if_name(ifp), error));
+			goto cleanup;
+		}
+		IFA_LOCK_SPIN(ifa);
+		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+		IFA_UNLOCK(ifa);
 
 		/*
 		 * join node information group address
 		 */
 #define hostnamelen	strlen(hostname)
+		delay = 0;
+		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
+			/*
+			 * The spec doesn't say anything about delay for this
+			 * group, but the same logic should apply.
+			 */
+			delay = random() %
+			    (MAX_RTR_SOLICITATION_DELAY * PR_SLOWHZ);
+		}
 		if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr)
 		    == 0) {
-		    ifnet_lock_shared(ifp);
-			IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
-			ifnet_lock_done(ifp);
-			if (in6m == NULL && ia != NULL) {
-				(void)in6_addmulti(&mltaddr.sin6_addr,
-				    ifp, &error, 0);
-				if (error != 0) {
-					log(LOG_WARNING, "in6_update_ifa: "
-					    "addmulti failed for "
-					    "%s on %s (errno=%d)\n",
-					    ip6_sprintf(&mltaddr.sin6_addr), 
-					    if_name(ifp), error);
-				}
+			imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
+			    delay); /* XXX jinmei */
+			if (!imm) {
+				nd6log((LOG_WARNING, "in6_update_ifa: "
+				    "addmulti failed for %s on %s "
+				    "(errno=%d)\n",
+				    ip6_sprintf(&mltaddr.sin6_addr),
+				    if_name(ifp), error));
+				/* XXX not very fatal, go on... */
+			} else {
+				IFA_LOCK_SPIN(ifa);
+				LIST_INSERT_HEAD(&ia->ia6_memberships,
+				    imm, i6mm_chain);
+				IFA_UNLOCK(ifa);
 			}
 		}
 #undef hostnamelen
 
 		/*
-		 * join node-local all-nodes address, on loopback.
-		 * XXX: since "node-local" is obsoleted by interface-local,
-		 *      we have to join the group on every interface with
-		 *      some interface-boundary restriction.
+		 * join interface-local all-nodes address.
+		 * (ff01::1%ifN, and ff01::%ifN/32)
 		 */
-		if (ifp->if_flags & IFF_LOOPBACK) {
-			struct in6_ifaddr *ia_loop;
-
-			struct in6_addr loop6 = in6addr_loopback;
-			ia_loop = in6ifa_ifpwithaddr(ifp, &loop6);
-
-			mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
-
-			ifnet_lock_shared(ifp);
-			IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
-			ifnet_lock_done(ifp);
-			if (in6m == NULL && ia_loop != NULL) {
-				rtrequest(RTM_ADD,
-					  (struct sockaddr *)&mltaddr,
-					  (struct sockaddr *)&ia_loop->ia_addr,
-					  (struct sockaddr *)&mltmask,
-					  RTF_UP,
-					  (struct rtentry **)0);
-				(void)in6_addmulti(&mltaddr.sin6_addr, ifp,
-						   &error, 0);
-				if (error != 0) {
-					log(LOG_WARNING, "in6_update_ifa: "
-					    "addmulti failed for %s on %s "
-					    "(errno=%d)\n",
-					    ip6_sprintf(&mltaddr.sin6_addr), 
-					    if_name(ifp), error);
-				}
+		mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
+		if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL))
+		    != 0)
+			goto cleanup; /* XXX: should not fail */
+		/* XXX: again, do we really need the route? */
+		rt = rtalloc1_scoped((struct sockaddr *)&mltaddr, 0, 0UL,
+		    ia->ia_ifp->if_index);
+		if (rt) {
+			if (memcmp(&mltaddr.sin6_addr,
+			    &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
+			    MLTMASK_LEN)) {
+				rtfree(rt);
+				rt = NULL;
 			}
-			if (ia_loop != NULL)
-				ifafree(&ia_loop->ia_ifa);
 		}
+		if (!rt) {
+			error = rtrequest_scoped(RTM_ADD,
+			    (struct sockaddr *)&mltaddr,
+			    (struct sockaddr *)&ia->ia_addr,
+			    (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
+			    NULL, ia->ia_ifp->if_index);
+			if (error)
+				goto cleanup;
+		} else
+			rtfree(rt);
+
+		imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
+		if (!imm) {
+			nd6log((LOG_WARNING, "in6_update_ifa: "
+			    "addmulti failed for %s on %s "
+			    "(errno=%d)\n",
+			    ip6_sprintf(&mltaddr.sin6_addr),
+			    if_name(ifp), error));
+			goto cleanup;
+		}
+		IFA_LOCK(ifa);
+		LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
+		/* keep it locked */
+#undef	MLTMASK_LEN
 	}
-
-	ia->ia6_flags = ifra->ifra_flags;
-	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/*safety*/
-	ia->ia6_flags &= ~IN6_IFF_NODAD;	/* Mobile IPv6 */
-
-	ia->ia6_lifetime = ifra->ifra_lifetime;
-	/* for sanity */
-	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
-		ia->ia6_lifetime.ia6t_expire =
-			timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime;
-	} else
-		ia->ia6_lifetime.ia6t_expire = 0;
-	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
-		ia->ia6_lifetime.ia6t_preferred =
-			timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime;
-	} else
-		ia->ia6_lifetime.ia6t_preferred = 0;
-
+	IFA_LOCK_ASSERT_HELD(ifa);
 	/*
-	 * make sure to initialize ND6 information.  this is to workaround
+	 * Make sure to initialize ND6 information.  this is to workaround
 	 * issues with interfaces with IPv6 addresses, which have never brought
 	 * up.  We are assuming that it is safe to nd6_ifattach multiple times.
+	 * NOTE: this is how stf0 gets initialized
 	 */
 	if ((error = nd6_ifattach(ifp)) != 0)
 		return error;
@@ -1594,29 +1793,74 @@ in6_update_ifa(ifp, ifra, ia, how)
 	 * XXX It may be of use, if we can administratively
 	 * disable DAD.
 	 */
-	if (in6if_do_dad(ifp) && (ifra->ifra_flags & IN6_IFF_NODAD) == 0) {
-		ia->ia6_flags |= IN6_IFF_TENTATIVE;
-		nd6_dad_start((struct ifaddr *)ia, NULL);
-	}
+	if (hostIsNew && in6if_do_dad(ifp) &&
+	    ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
+	    (ia->ia6_flags & IN6_IFF_TENTATIVE))
+	{
+		int mindelay, maxdelay;
 
-	return(error);
+		IFA_UNLOCK(ifa);
+		delay = 0;
+		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
+			/*
+			 * We need to impose a delay before sending an NS
+			 * for DAD.  Check if we also needed a delay for the
+			 * corresponding MLD message.  If we did, the delay
+			 * should be larger than the MLD delay (this could be
+			 * relaxed a bit, but this simple logic is at least
+			 * safe).
+			 */
+			mindelay = 0;
+			if (in6m_sol != NULL) {
+				IN6M_LOCK(in6m_sol);
+				if (in6m_sol->in6m_state == MLD_REPORTING_MEMBER)
+					mindelay = in6m_sol->in6m_timer;
+				IN6M_UNLOCK(in6m_sol);
+			}
+			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
+			if (maxdelay - mindelay == 0)
+				delay = 0;
+			else {
+				delay =
+				    (random() % (maxdelay - mindelay)) +
+				    mindelay;
+			}
+		}
+		nd6_dad_start((struct ifaddr *)ia, &delay);
+	} else {
+		IFA_UNLOCK(ifa);
+	}
+done:
+	/* release reference held for this routine */
+	if (ifa != NULL)
+		IFA_REMREF(ifa);
+	if (in6m_sol != NULL)
+		IN6M_REMREF(in6m_sol);
+	return (error);
 
-  unlink:
+unlink:
 	/*
 	 * XXX: if a change of an existing address failed, keep the entry
 	 * anyway.
 	 */
-	if (hostIsNew)
-		in6_unlink_ifa(ia, ifp, 0);
-	return(error);
+	if (hostIsNew) {
+		in6_unlink_ifa(ia, ifp);
+	}
+	goto done;
+
+cleanup:
+	in6_purgeaddr(&ia->ia_ifa);
+	goto done;
 }
 
 void
-in6_purgeaddr(
-	struct ifaddr *ifa, int nd6_locked)
+in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
+	struct in6_multi_mship *imm;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
@@ -1625,9 +1869,11 @@ in6_purgeaddr(
 	 * delete route to the destination of the address being purged.
 	 * The interface must be p2p or loopback in this case.
 	 */
+	IFA_LOCK(ifa);
 	if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) {
 		int e;
 
+		IFA_UNLOCK(ifa);
 		if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST))
 		    != 0) {
 			log(LOG_ERR, "in6_purgeaddr: failed to remove "
@@ -1636,73 +1882,71 @@ in6_purgeaddr(
 			    ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp),
 			    e);
 			/* proceed anyway... */
-		}
-		else
+		} else {
+			IFA_LOCK_SPIN(ifa);
 			ia->ia_flags &= ~IFA_ROUTE;
+			IFA_UNLOCK(ifa);
+		}
+	} else {
+		IFA_UNLOCK(ifa);
 	}
+	IFA_LOCK_ASSERT_NOTHELD(ifa);
 
 	/* Remove ownaddr's loopback rtentry, if it exists. */
-	in6_ifremloop(&(ia->ia_ifa), nd6_locked);
-
-	if (ifp->if_flags & IFF_MULTICAST) {
-		/*
-		 * delete solicited multicast addr for deleting host id
-		 */
-		struct in6_multi *in6m;
-		struct in6_addr llsol;
-		bzero(&llsol, sizeof(struct in6_addr));
-		llsol.s6_addr16[0] = htons(0xff02);
-		llsol.s6_addr16[1] = htons(ifp->if_index);
-		llsol.s6_addr32[1] = 0;
-		llsol.s6_addr32[2] = htonl(1);
-		llsol.s6_addr32[3] =
-			ia->ia_addr.sin6_addr.s6_addr32[3];
-		llsol.s6_addr8[12] = 0xff;
+	in6_ifremloop(&(ia->ia_ifa));
 
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(llsol, ifp, in6m);
-		ifnet_lock_done(ifp);
-		if (in6m)
-			in6_delmulti(in6m, nd6_locked);
+	/*
+	 * leave from multicast groups we have joined for the interface
+	 */
+	IFA_LOCK(ifa);
+	while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+		LIST_REMOVE(imm, i6mm_chain);
+		IFA_UNLOCK(ifa);
+		in6_leavegroup(imm);
+		IFA_LOCK(ifa);
 	}
+	IFA_UNLOCK(ifa);
 
-	in6_unlink_ifa(ia, ifp, nd6_locked);
+	/* in6_unlink_ifa() will need exclusive access */
+	in6_unlink_ifa(ia, ifp);
 	in6_post_msg(ifp, KEV_INET6_ADDR_DELETED, ia);
 }
 
 static void
-in6_unlink_ifa(ia, ifp, nd6_locked)
-	struct in6_ifaddr *ia;
-	struct ifnet *ifp;
-	int nd6_locked;
+in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
-	int plen, iilen;
 	struct in6_ifaddr *oia;
+	struct ifaddr *ifa;
+	int unlinked;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+	ifa = &ia->ia_ifa;
+	IFA_ADDREF(ifa);
 
 	ifnet_lock_exclusive(ifp);
-	if_detach_ifa(ifp, &ia->ia_ifa);
+	IFA_LOCK(ifa);
+	if (ifa->ifa_debug & IFD_ATTACHED)
+		if_detach_ifa(ifp, ifa);
+	IFA_UNLOCK(ifa);
 	ifnet_lock_done(ifp);
 
-	if (!nd6_locked)
-		lck_mtx_lock(nd6_mutex);
+	unlinked = 1;
+	lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
 	oia = ia;
-	if (oia == (ia = in6_ifaddrs)) 
+	if (oia == (ia = in6_ifaddrs)) {
 		in6_ifaddrs = ia->ia_next;
-	else {
+	} else {
 		while (ia->ia_next && (ia->ia_next != oia))
 			ia = ia->ia_next;
-		if (ia->ia_next)
+		if (ia->ia_next) {
 			ia->ia_next = oia->ia_next;
-		else {
+		} else {
 			/* search failed */
 			printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n");
+			unlinked = 0;
 		}
 	}
-	if (oia->ia6_ifpr) {	/* check for safety */
-		plen = in6_mask2len(&oia->ia_prefixmask.sin6_addr, NULL);
-		iilen = (sizeof(oia->ia_prefixmask.sin6_addr) << 3) - plen;
-		in6_prefix_remove_ifid(iilen, oia);
-	}
 
 	/*
 	 * When an autoconfigured address is being removed, release the
@@ -1710,48 +1954,76 @@ in6_unlink_ifa(ia, ifp, nd6_locked)
 	 * affect the status of other (detached) addresses, call
 	 * pfxlist_onlink_check().
 	 */
+	ifa = &oia->ia_ifa;
+	IFA_LOCK(ifa);
 	if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) {
 		if (oia->ia6_ndpr == NULL) {
 			log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address "
 			    "%p has no prefix\n", oia);
 		} else {
-			oia->ia6_ndpr->ndpr_refcnt--;
+			struct nd_prefix *pr = oia->ia6_ndpr;
+
 			oia->ia6_flags &= ~IN6_IFF_AUTOCONF;
 			oia->ia6_ndpr = NULL;
+			NDPR_LOCK(pr);
+			VERIFY(pr->ndpr_addrcnt != 0);
+			pr->ndpr_addrcnt--;
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);	/* release addr reference */
 		}
-
-		pfxlist_onlink_check(1);
-	}
-	if (!nd6_locked)
+		IFA_UNLOCK(ifa);
+		lck_rw_done(&in6_ifaddr_rwlock);
+		lck_mtx_lock(nd6_mutex);
+		pfxlist_onlink_check();
 		lck_mtx_unlock(nd6_mutex);
-
+	} else {
+		IFA_UNLOCK(ifa);
+		lck_rw_done(&in6_ifaddr_rwlock);
+	}
 
 	/*
 	 * release another refcnt for the link from in6_ifaddrs.
-	 * Note that we should decrement the refcnt at least once for all *BSD.
+	 * Do this only if it's not already unlinked in the event that we lost
+	 * the race, since in6_ifaddr_rwlock was momentarily dropped above.
 	 */
-	ifafree(&oia->ia_ifa);
+	if (unlinked)
+		IFA_REMREF(ifa);
 
+	/* release reference held for this routine */
+	IFA_REMREF(ifa);
 }
 
 void
-in6_purgeif(ifp)
-	struct ifnet *ifp;
+in6_purgeif(struct ifnet *ifp)
 {
-	struct in6_ifaddr *ia, *nia = NULL;
+	struct in6_ifaddr *ia;
 
-	if (ifp == NULL || &ifp->if_addrlist == NULL)
+	if (ifp == NULL)
 		return;
-	
-	lck_mtx_lock(nd6_mutex);
-	for (ia = in6_ifaddrs; ia != NULL; ia = nia)
-	{
-		nia = ia->ia_next;
-		if (ia->ia_ifa.ifa_ifp != ifp)
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+	lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+	ia = in6_ifaddrs;
+	while (ia != NULL) {
+		if (ia->ia_ifa.ifa_ifp != ifp) {
+			ia = ia->ia_next;
 			continue;
-		in6_purgeaddr(&ia->ia_ifa, 1);
+		}
+		IFA_ADDREF(&ia->ia_ifa);	/* for us */
+		lck_rw_done(&in6_ifaddr_rwlock);
+		in6_purgeaddr(&ia->ia_ifa);
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		IFA_REMREF(&ia->ia_ifa);	/* for us */
+		/*
+		 * Purging the address would have caused
+		 * in6_ifaddr_rwlock to be dropped and reacquired;
+		 * therefore search again from the beginning
+		 * of in6_ifaddrs list.
+		 */
+		ia = in6_ifaddrs;
 	}
-	lck_mtx_unlock(nd6_mutex);
+	lck_rw_done(&in6_ifaddr_rwlock);
 
 	in6_ifdetach(ifp);
 }
@@ -1791,7 +2063,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 	/* sanity checks */
 	if (!data || !ifp) {
 		panic("invalid argument to in6_lifaddr_ioctl");
-		/*NOTRECHED*/
+		/*NOTREACHED*/
 	}
 
 	switch (cmd) {
@@ -1845,9 +2117,11 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 			ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
 			if (!ifa)
 				return EADDRNOTAVAIL;
+			IFA_LOCK_SPIN(ifa);
 			hostaddr = *IFA_IN6(ifa);
+			IFA_UNLOCK(ifa);
 			hostid_found = 1;
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			ifa = NULL;
 
 		 	/* prefixlen must be <= 64. */
@@ -1855,10 +2129,10 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 				return EINVAL;
 			prefixlen = iflr->prefixlen;
 
-			/* hostaddr part must be zero. */
+			/* hostid part must be zero. */
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
-			if (sin6->sin6_addr.s6_addr32[2] != 0
-			 || sin6->sin6_addr.s6_addr32[3] != 0) {
+			if (sin6->sin6_addr.s6_addr32[2] != 0 ||
+			    sin6->sin6_addr.s6_addr32[3] != 0) {
 				return EINVAL;
 			}
 		} else
@@ -1890,7 +2164,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 		}
 
 		ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
-		in6_len2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
+		in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
 
 		ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
 		if (!p64) {
@@ -1935,7 +2209,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 		bzero(&mask, sizeof(mask));
 		if (iflr->flags & IFLR_PREFIX) {
 			/* lookup a prefix rather than address. */
-			in6_len2mask(&mask, iflr->prefixlen);
+			in6_prefixlen2mask(&mask, iflr->prefixlen);
 
 			sin6 = (struct sockaddr_in6 *)&iflr->addr;
 			bcopy(&sin6->sin6_addr, &match, sizeof(match));
@@ -1955,7 +2229,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 				cmp = 0;	/* XXX */
 			} else {
 				/* on deleting an address, do exact match */
-				in6_len2mask(&mask, 128);
+				in6_prefixlen2mask(&mask, 128);
 				sin6 = (struct sockaddr_in6 *)&iflr->addr;
 				bcopy(&sin6->sin6_addr, &match, sizeof(match));
 
@@ -1966,13 +2240,18 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 		ifnet_lock_shared(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
-			if (!cmp)
+			}
+			if (!cmp) {
+				IFA_UNLOCK(ifa);
 				break;
+			}
 
 			bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
-#ifndef SCOPEDROUTING
+			IFA_UNLOCK(ifa);
 			/*
 			 * XXX: this is adhoc, but is necessary to allow
 			 * a user to specify fe80::/64 (not /10) for a
@@ -1980,7 +2259,6 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 			 */
 			if (IN6_IS_ADDR_LINKLOCAL(&candidate))
 				candidate.s6_addr16[1] = 0;
-#endif
 			candidate.s6_addr32[0] &= mask.s6_addr32[0];
 			candidate.s6_addr32[1] &= mask.s6_addr32[1];
 			candidate.s6_addr32[2] &= mask.s6_addr32[2];
@@ -1988,30 +2266,28 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 			if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
 				break;
 		}
+		if (ifa != NULL)
+			IFA_ADDREF(ifa);
 		ifnet_lock_done(ifp);
 		if (!ifa)
 			return EADDRNOTAVAIL;
 		ia = ifa2ia6(ifa);
 
 		if (cmd == SIOCGLIFADDR) {
-#ifndef SCOPEDROUTING
 			struct sockaddr_in6 *s6;
-#endif
 
+			IFA_LOCK(ifa);
 			/* fill in the if_laddrreq structure */
 			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
-#ifndef SCOPEDROUTING		/* XXX see above */
 			s6 = (struct sockaddr_in6 *)&iflr->addr;
 			if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) {
 				s6->sin6_addr.s6_addr16[1] = 0;
 				s6->sin6_scope_id =
 					in6_addr2scopeid(ifp, &s6->sin6_addr);
 			}
-#endif
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
 				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
 					ia->ia_dstaddr.sin6_len);
-#ifndef SCOPEDROUTING		/* XXX see above */
 				s6 = (struct sockaddr_in6 *)&iflr->dstaddr;
 				if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) {
 					s6->sin6_addr.s6_addr16[1] = 0;
@@ -2019,7 +2295,6 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 						in6_addr2scopeid(ifp,
 								 &s6->sin6_addr);
 				}
-#endif
 			} else
 				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
 
@@ -2028,7 +2303,8 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 					     NULL);
 
 			iflr->flags = ia->ia6_flags;	/* XXX */
-
+			IFA_UNLOCK(ifa);
+			IFA_REMREF(ifa);
 			return 0;
 		} else {
 			struct in6_aliasreq ifra;
@@ -2038,6 +2314,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 			bcopy(iflr->iflr_name, ifra.ifra_name,
 				sizeof(ifra.ifra_name));
 
+			IFA_LOCK(ifa);
 			bcopy(&ia->ia_addr, &ifra.ifra_addr,
 				ia->ia_addr.sin6_len);
 			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
@@ -2051,6 +2328,8 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
 				ia->ia_prefixmask.sin6_len);
 
 			ifra.ifra_flags = ia->ia6_flags;
+			IFA_UNLOCK(ifa);
+			IFA_REMREF(ifa);
 			if (!p64) {
 #if defined(__LP64__)
 				struct in6_aliasreq_32 ifra_32;
@@ -2120,24 +2399,30 @@ in6_ifinit(ifp, ia, sin6, newhost)
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		ifacount++;
+		IFA_UNLOCK(ifa);
 	}
 	ifnet_lock_done(ifp);
 
+	ifa = &ia->ia_ifa;
+	IFA_LOCK_SPIN(ifa);
 	ia->ia_addr = *sin6;
-
+	IFA_UNLOCK(ifa);
 
 	if (ifacount <= 1 && 
 	    (error = ifnet_ioctl(ifp, PF_INET6, SIOCSIFADDR, ia))) {
-		if (error) {
+		if (error == EOPNOTSUPP)
+			error = 0;
+		else if (error)
 			return(error);
-		}
 	}
 
+	IFA_LOCK(ifa);
 	ia->ia_ifa.ifa_metric = ifp->if_metric;
 
 	/* we could do in(6)_socktrim here, but just omit it at this moment. */
@@ -2150,11 +2435,14 @@ in6_ifinit(ifp, ia, sin6, newhost)
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) {
+		IFA_UNLOCK(ifa);
 		if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD,
-				    RTF_UP | RTF_HOST)) != 0)
+		    RTF_UP | RTF_HOST)) != 0)
 			return(error);
+		IFA_LOCK(ifa);
 		ia->ia_flags |= IFA_ROUTE;
 	}
+	IFA_LOCK_ASSERT_HELD(ifa);
 	if (plen < 128) {
 		/*
 		 * The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
@@ -2166,104 +2454,19 @@ in6_ifinit(ifp, ia, sin6, newhost)
 	if (newhost) {
 		/* set the rtrequest function to create llinfo */
 		ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
+		IFA_UNLOCK(ifa);
 		in6_ifaddloop(&(ia->ia_ifa));
+	} else {
+		IFA_UNLOCK(ifa);
 	}
 
 	return(error);
 }
 
-/*
- * Add an address to the list of IP6 multicast addresses for a
- * given interface.
- */
-struct	in6_multi *
-in6_addmulti(maddr6, ifp, errorp, nd6_locked)
-	struct in6_addr *maddr6;
-	struct ifnet *ifp;
-	int *errorp;
-	int nd6_locked;
-{
-	struct	in6_multi *in6m;
-	struct sockaddr_in6 sin6;
-	struct ifmultiaddr *ifma;
-
-	*errorp = 0;
-
-	/*
-	 * Call generic routine to add membership or increment
-	 * refcount.  It wants addresses in the form of a sockaddr,
-	 * so we build one here (being careful to zero the unused bytes).
-	 */
-	bzero(&sin6, sizeof sin6);
-	sin6.sin6_family = AF_INET6;
-	sin6.sin6_len = sizeof sin6;
-	sin6.sin6_addr = *maddr6;
-	*errorp = if_addmulti(ifp, (struct sockaddr *)&sin6, &ifma);
-	if (*errorp) {
-		return 0;
-	}
-
-	/*
-	 * If ifma->ifma_protospec is null, then if_addmulti() created
-	 * a new record.  Otherwise, we are done.
-	 */
-	if (ifma->ifma_protospec != 0)
-		return ifma->ifma_protospec;
-
-	/* XXX - if_addmulti uses M_WAITOK.  Can this really be called
-	   at interrupt time?  If so, need to fix if_addmulti. XXX */
-	in6m = (struct in6_multi *)_MALLOC(sizeof(*in6m), M_IPMADDR, M_NOWAIT);
-	if (in6m == NULL) {
-		return (NULL);
-	}
-
-	bzero(in6m, sizeof *in6m);
-	in6m->in6m_addr = *maddr6;
-	in6m->in6m_ifp = ifp;
-	in6m->in6m_ifma = ifma;
-	ifma->ifma_protospec = in6m;
-	if (nd6_locked == 0)
-		lck_mtx_lock(nd6_mutex);
-	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
-	if (nd6_locked == 0)
-		lck_mtx_unlock(nd6_mutex);
-
-	/*
-	 * Let MLD6 know that we have joined a new IP6 multicast
-	 * group.
-	 */
-	mld6_start_listening(in6m);
-	return(in6m);
-}
-
-/*
- * Delete a multicast address record.
- */
 void
-in6_delmulti(
-	struct in6_multi *in6m, int nd6locked)
+in6_purgeaddrs(struct ifnet *ifp)
 {
-	struct ifmultiaddr *ifma = in6m->in6m_ifma;
-
-	if (ifma && ifma->ifma_usecount == 1) {
-		/*
-		 * No remaining claims to this record; let MLD6 know
-		 * that we are leaving the multicast group.
-		 */
-		mld6_stop_listening(in6m);
-		ifma->ifma_protospec = 0;
-		if (nd6locked == 0)
-			lck_mtx_lock(nd6_mutex);
-		LIST_REMOVE(in6m, in6m_entry);
-		if (nd6locked == 0)
-			lck_mtx_unlock(nd6_mutex);
-		FREE(in6m, M_IPMADDR);
-	}
-	/* XXX - should be separate API for when we have an ifma? */
-	if (ifma) {
-		if_delmultiaddr(ifma, 0);
-		ifma_release(ifma);
-	}
+	in6_purgeif(ifp);
 }
 
 /*
@@ -2279,19 +2482,23 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags)
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
-			     ignoreflags) != 0)
+			     ignoreflags) != 0) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
+			IFA_ADDREF_LOCKED(ifa);	/* for caller */
+			IFA_UNLOCK(ifa);
 			break;
 		}
+		IFA_UNLOCK(ifa);
 	}
-	if (ifa != NULL)
-		ifaref(ifa);
 	ifnet_lock_done(ifp);
 
 	return((struct in6_ifaddr *)ifa);
@@ -2310,15 +2517,18 @@ in6ifa_ifpwithaddr(ifp, addr)
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr == NULL)
-			continue;	/* just for safety */
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
+		}
+		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) {
+			IFA_ADDREF_LOCKED(ifa);	/* for caller */
+			IFA_UNLOCK(ifa);
 			break;
+		}
+		IFA_UNLOCK(ifa);
 	}
-	if (ifa != NULL)
-		ifaref(ifa);
 	ifnet_lock_done(ifp);
 
 	return((struct in6_ifaddr *)ifa);
@@ -2385,7 +2595,7 @@ in6addr_local(struct in6_addr *in6)
 	struct sockaddr_in6 sin6;
 	int local = 0;
 
-	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
+	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_SCOPE_LINKLOCAL(in6))
 		return (1);
 
 	sin6.sin6_family = AF_INET6;
@@ -2406,48 +2616,48 @@ in6addr_local(struct in6_addr *in6)
 }
 
 int
-in6_localaddr(in6)
-	struct in6_addr *in6;
+in6_localaddr(struct in6_addr *in6)
 {
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
-		return 1;
+		return (1);
 
-	lck_mtx_lock(nd6_mutex);
-	for (ia = in6_ifaddrs; ia; ia = ia->ia_next)
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
+	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
-					      &ia->ia_prefixmask.sin6_addr)) {
-			lck_mtx_unlock(nd6_mutex);
-			return 1;
+		    &ia->ia_prefixmask.sin6_addr)) {
+			IFA_UNLOCK(&ia->ia_ifa);
+			lck_rw_done(&in6_ifaddr_rwlock);
+			return (1);
 		}
-
-	lck_mtx_unlock(nd6_mutex);
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
+	lck_rw_done(&in6_ifaddr_rwlock);
 	return (0);
 }
 
 int
-in6_is_addr_deprecated(sa6)
-	struct sockaddr_in6 *sa6;
+in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct in6_ifaddr *ia;
 
-	lck_mtx_lock(nd6_mutex);
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
 	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
-				       &sa6->sin6_addr) &&
-#if SCOPEDROUTING
-		    ia->ia_addr.sin6_scope_id == sa6->sin6_scope_id &&
-#endif
+		    &sa6->sin6_addr) &&
 		    (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
-			lck_mtx_unlock(nd6_mutex);
+			IFA_UNLOCK(&ia->ia_ifa);
+			lck_rw_done(&in6_ifaddr_rwlock);
 			return(1); /* true */
 		}
-
 		/* XXX: do we still have to go thru the rest of the list? */
+		IFA_UNLOCK(&ia->ia_ifa);
 	}
 
-	lck_mtx_unlock(nd6_mutex);
+	lck_rw_done(&in6_ifaddr_rwlock);
 	return(0);		/* false */
 }
 
@@ -2542,9 +2752,7 @@ in6_ifawithscope(
 	struct in6_ifaddr *ifa_best = NULL;
 	
 	if (oifp == NULL) {
-#if 0
-		printf("in6_ifawithscope: output interface is not specified\n");
-#endif
+		/* output interface is not specified */
 		return(NULL);
 	}
 
@@ -2567,9 +2775,11 @@ in6_ifawithscope(
 		{
 			int tlen = -1, dscopecmp, bscopecmp, matchcmp;
 
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
-
+			}
 			src_scope = in6_addrscope(IFA_IN6(ifa));
 
 			/*
@@ -2577,18 +2787,21 @@ in6_ifawithscope(
 			 * nor a duplicated address.
 			 */
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
-			    IN6_IFF_NOTREADY)
+			    IN6_IFF_NOTREADY) {
+				IFA_UNLOCK(ifa);
 				continue;
-
+			}
 			/* XXX: is there any case to allow anycasts? */
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
-			    IN6_IFF_ANYCAST)
+			    IN6_IFF_ANYCAST) {
+				IFA_UNLOCK(ifa);
 				continue;
-
+			}
 			if (((struct in6_ifaddr *)ifa)->ia6_flags &
-			    IN6_IFF_DETACHED)
+			    IN6_IFF_DETACHED) {
+				IFA_UNLOCK(ifa);
 				continue;
-
+			}
 			/*
 			 * If this is the first address we find,
 			 * keep it anyway.
@@ -2620,9 +2833,10 @@ in6_ifawithscope(
 			    IN6_ARE_SCOPE_CMP(src_scope, dst_scope) >= 0)
 				goto replace; /* (A) */
 			if (IN6_ARE_SCOPE_CMP(src_scope, dst_scope) < 0 &&
-			    IN6_ARE_SCOPE_CMP(best_scope, dst_scope) >= 0)
+			    IN6_ARE_SCOPE_CMP(best_scope, dst_scope) >= 0) {
+				IFA_UNLOCK(ifa);
 				continue; /* (B) */
-
+			}
 			/*
 			 * A deprecated address SHOULD NOT be used in new
 			 * communications if an alternate (non-deprecated)
@@ -2635,16 +2849,19 @@ in6_ifawithscope(
 				 * Ignore any deprecated addresses if
 				 * specified by configuration.
 				 */
-				if (!ip6_use_deprecated)
+				if (!ip6_use_deprecated) {
+					IFA_UNLOCK(ifa);
 					continue;
-
+				}
 				/*
 				 * If we have already found a non-deprecated
 				 * candidate, just ignore deprecated addresses.
 				 */
 				if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED)
-				    == 0)
+				    == 0) {
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 			}
 
 			/*
@@ -2660,7 +2877,7 @@ in6_ifawithscope(
 
 			/*
 			 * When we use temporary addresses described in
-			 * RFC 3041, we prefer temporary addresses to
+			 * RFC 4941, we prefer temporary addresses to
 			 * public autoconf addresses.  Again, note the
 			 * invariants from (A) and (B).  Also note that we
 			 * don't have any preference between static addresses
@@ -2685,6 +2902,7 @@ in6_ifawithscope(
 				    (ifat->ia6_flags &
 				     (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY))
 				     == IN6_IFF_AUTOCONF) {
+					IFA_UNLOCK(ifa);
 					continue;
 				}
 			}
@@ -2745,8 +2963,10 @@ in6_ifawithscope(
 			if (bscopecmp == 0) {
 				struct ifnet *bifp = ifa_best->ia_ifp;
 
-				if (bifp == oifp && ifp != oifp) /* (1) */
+				if (bifp == oifp && ifp != oifp) { /* (1) */
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				if (bifp != oifp && ifp == oifp) /* (2) */
 					goto replace;
 
@@ -2761,16 +2981,20 @@ in6_ifawithscope(
 				matchcmp = tlen - blen;
 				if (matchcmp > 0) /* (3) */
 					goto replace;
+				IFA_UNLOCK(ifa);
 				continue; /* (4) */
 			}
 			if (dscopecmp > 0) {
-				if (bscopecmp > 0) /* (5) */
+				if (bscopecmp > 0) { /* (5) */
+					IFA_UNLOCK(ifa);
 					continue;
+				}
 				goto replace; /* (6) */
 			}
 			if (dscopecmp < 0) {
 				if (bscopecmp > 0) /* (7) */
 					goto replace;
+				IFA_UNLOCK(ifa);
 				continue; /* (8) */
 			}
 
@@ -2778,14 +3002,15 @@ in6_ifawithscope(
 			if (bscopecmp < 0)
 				goto replace; /* (9) */
 
-		  replace:
-		  	ifaref(ifa);
-		  	if (ifa_best)
-		  		ifafree(&ifa_best->ia_ifa);
-			ifa_best = (struct in6_ifaddr *)ifa;
+replace:
+			IFA_ADDREF_LOCKED(ifa);	/* for ifa_best */
 			blen = tlen >= 0 ? tlen :
 				in6_matchlen(IFA_IN6(ifa), dst);
-			best_scope = in6_addrscope(&ifa_best->ia_addr.sin6_addr);
+			best_scope = in6_addrscope(&ifa2ia6(ifa)->ia_addr.sin6_addr);
+			IFA_UNLOCK(ifa);
+			if (ifa_best)
+				IFA_REMREF(&ifa_best->ia_ifa);
+			ifa_best = (struct in6_ifaddr *)ifa;
 		}
 		ifnet_lock_done(ifp);
 	}
@@ -2795,6 +3020,7 @@ in6_ifawithscope(
 	if (ifa_best == NULL)
 		ip6stat.ip6s_sources_none++;
 	else {
+		IFA_LOCK_SPIN(&ifa_best->ia_ifa);
 		if (oifp == ifa_best->ia_ifp)
 			ip6stat.ip6s_sources_sameif[best_scope]++;
 		else
@@ -2807,6 +3033,7 @@ in6_ifawithscope(
 
 		if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) != 0)
 			ip6stat.ip6s_sources_deprecated[best_scope]++;
+		IFA_UNLOCK(&ifa_best->ia_ifa);
 	}
 
 	return(ifa_best);
@@ -2823,7 +3050,7 @@ in6_ifawithifp(
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
-	struct in6_ifaddr *besta = 0;
+	struct in6_ifaddr *besta = NULL;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	dep[0] = dep[1] = NULL;
@@ -2837,20 +3064,32 @@ in6_ifawithifp(
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_ANYCAST) {
+			IFA_UNLOCK(ifa);
 			continue; /* XXX: is there any case to allow anycast? */
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_NOTREADY) {
+			IFA_UNLOCK(ifa);
 			continue; /* don't use this interface */
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_DETACHED) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated) {
+				IFA_ADDREF_LOCKED(ifa);	/* for dep[0] */
+				IFA_UNLOCK(ifa);
 				if (dep[0] != NULL)
-					ifafree(&dep[0]->ia_ifa);
+					IFA_REMREF(&dep[0]->ia_ifa);
 				dep[0] = (struct in6_ifaddr *)ifa;
-				ifaref(ifa);
+			} else {
+				IFA_UNLOCK(ifa);
 			}
 			continue;
 		}
@@ -2860,51 +3099,77 @@ in6_ifawithifp(
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
-				if (blen == -1)
+				if (blen == -1) {
+					IFA_UNLOCK(ifa);
+					IFA_LOCK(&besta->ia_ifa);
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
+					IFA_UNLOCK(&besta->ia_ifa);
+					IFA_LOCK(ifa);
+				}
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
+					IFA_ADDREF_LOCKED(ifa);	/* for besta */
+					IFA_UNLOCK(ifa);
+					IFA_REMREF(&besta->ia_ifa);
 					besta = (struct in6_ifaddr *)ifa;
+				} else {
+					IFA_UNLOCK(ifa);
 				}
-			} else
+			} else {
 				besta = (struct in6_ifaddr *)ifa;
+				IFA_ADDREF_LOCKED(ifa);	/* for besta */
+				IFA_UNLOCK(ifa);
+			}
+		} else {
+			IFA_UNLOCK(ifa);
 		}
 	}
 	if (besta) {
-		ifaref(&besta->ia_ifa);
 		ifnet_lock_done(ifp);
 		if (dep[0] != NULL)
-			ifafree(&dep[0]->ia_ifa);
+			IFA_REMREF(&dep[0]->ia_ifa);
 		return(besta);
 	}
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_ANYCAST) {
+			IFA_UNLOCK(ifa);
 			continue; /* XXX: is there any case to allow anycast? */
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_NOTREADY) {
+			IFA_UNLOCK(ifa);
 			continue; /* don't use this interface */
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_DETACHED) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
+		}
+		if (ifa2ia6(ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (ip6_use_deprecated) {
+				IFA_ADDREF_LOCKED(ifa);	/* for dep[1] */
+				IFA_UNLOCK(ifa);
 				if (dep[1] != NULL)
-					ifafree(&dep[1]->ia_ifa);
+					IFA_REMREF(&dep[1]->ia_ifa);
 				dep[1] = (struct in6_ifaddr *)ifa;
-				ifaref(ifa);
+			} else {
+				IFA_UNLOCK(ifa);
 			}
 			continue;
 		}
-		if (ifa != NULL)
-			ifaref(ifa);
+		IFA_ADDREF_LOCKED(ifa);	/* for caller */
+		IFA_UNLOCK(ifa);
 		ifnet_lock_done(ifp);
 		if (dep[0] != NULL)
-			ifafree(&dep[0]->ia_ifa);
+			IFA_REMREF(&dep[0]->ia_ifa);
 		if (dep[1] != NULL)
-			ifafree(&dep[1]->ia_ifa);
+			IFA_REMREF(&dep[1]->ia_ifa);
 		return (struct in6_ifaddr *)ifa;
 	}
 	ifnet_lock_done(ifp);
@@ -2912,7 +3177,7 @@ in6_ifawithifp(
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0]) {
 		if (dep[1] != NULL)
-			ifafree(&dep[1]->ia_ifa);
+			IFA_REMREF(&dep[1]->ia_ifa);
 		return dep[0];
 	}
 	if (dep[1])
@@ -2945,14 +3210,22 @@ in6_if_up(
 		return error;
 
 	dad_delay = 0;
+
 	ifnet_lock_exclusive(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK_SPIN(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		ia = (struct in6_ifaddr *)ifa;
-		if (ia->ia6_flags & IN6_IFF_TENTATIVE)
+		if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
+			IFA_UNLOCK(ifa);
 			nd6_dad_start(ifa, &dad_delay);
+		} else {
+			IFA_UNLOCK(ifa);
+		}
 	}
 	ifnet_lock_done(ifp);
 
@@ -2966,6 +3239,15 @@ in6if_do_dad(
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return(0);
 
+	/*
+	 * Skip DAD on service triggered interfaces, for now,
+	 * until we have support for Opportunistic Duplicate
+	 * Address Detection [RFC 4429] and we can then back
+	 * this out.
+	 */
+	if (ifp->if_eflags & IFEF_SERVICE_TRIGGERED)
+		return (0);
+
 	switch (ifp->if_type) {
 #if IFT_DUMMY
 	case IFT_DUMMY:
@@ -3018,7 +3300,69 @@ in6_setmaxmtu()
 	if (maxmtu)	/* update only when maxmtu is positive */
 		in6_maxmtu = maxmtu;
 }
-
+/*
+ * Provide the length of interface identifiers to be used for the link attached
+ * to the given interface.  The length should be defined in "IPv6 over
+ * xxx-link" document.  Note that address architecture might also define
+ * the length for a particular set of address prefixes, regardless of the
+ * link type.  As clarified in rfc2462bis, those two definitions should be
+ * consistent, and those really are as of August 2004.
+ */
+int
+in6_if2idlen(struct ifnet *ifp)
+{
+	switch (ifp->if_type) {
+	case IFT_ETHER:		/* RFC2464 */
+	case IFT_IEEE8023ADLAG:	/* IEEE802.3ad Link Aggregate */
+#ifdef IFT_PROPVIRTUAL
+	case IFT_PROPVIRTUAL:	/* XXX: no RFC. treat it as ether */
+#endif
+#ifdef IFT_L2VLAN
+	case IFT_L2VLAN:	/* ditto */
+#endif
+#ifdef IFT_IEEE80211
+	case IFT_IEEE80211:	/* ditto */
+#endif
+#ifdef IFT_MIP
+	case IFT_MIP:	/* ditto */
+#endif
+		return (64);
+	case IFT_FDDI:		/* RFC2467 */
+		return (64);
+	case IFT_ISO88025:	/* RFC2470 (IPv6 over Token Ring) */
+		return (64);
+	case IFT_PPP:		/* RFC2472 */
+		return (64);
+	case IFT_ARCNET:	/* RFC2497 */
+		return (64);
+	case IFT_FRELAY:	/* RFC2590 */
+		return (64);
+	case IFT_IEEE1394:	/* RFC3146 */
+		return (64);
+	case IFT_GIF:
+		return (64);	/* draft-ietf-v6ops-mech-v2-07 */
+	case IFT_LOOP:
+		return (64);	/* XXX: is this really correct? */
+	case IFT_OTHER:
+		return (64);	/* for utun interfaces */
+	case IFT_CELLULAR:
+		return (64);	/* Packet Data over Cellular */
+	default:
+		/*
+		 * Unknown link type:
+		 * It might be controversial to use the today's common constant
+		 * of 64 for these cases unconditionally.  For full compliance,
+		 * we should return an error in this case.  On the other hand,
+		 * if we simply miss the standard for the link type or a new
+		 * standard is defined for a new link type, the IFID length
+		 * is very likely to be the common constant.  As a compromise,
+		 * we always use the constant, but make an explicit notice
+		 * indicating the "unknown" case.
+		 */
+		printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
+		return (64);
+	}
+}
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
@@ -3030,7 +3374,7 @@ in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
-	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];	
+	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
@@ -3096,11 +3440,14 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa)
 	struct kev_msg        ev_msg;
 	struct kev_in6_data   in6_event_data;
 
+	bzero(&in6_event_data, sizeof(struct kev_in6_data));
+	bzero(&ev_msg, sizeof(struct kev_msg));
 	ev_msg.vendor_code    = KEV_VENDOR_APPLE;
 	ev_msg.kev_class      = KEV_NETWORK_CLASS;
 	ev_msg.kev_subclass   = KEV_INET6_SUBCLASS;
 	ev_msg.event_code     = event_code;
 
+	IFA_LOCK(&ifa->ia_ifa);
 	in6_event_data.ia_addr         = ifa->ia_addr;
 	in6_event_data.ia_net          = ifa->ia_net;
 	in6_event_data.ia_dstaddr      = ifa->ia_dstaddr;
@@ -3116,6 +3463,7 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa)
 	    ifa->ia6_lifetime.ia6t_vltime;
 	in6_event_data.ia_lifetime.ia6t_pltime =
 	    ifa->ia6_lifetime.ia6t_pltime;
+	IFA_UNLOCK(&ifa->ia_ifa);
 
 	if (ifp != NULL) {
 		strncpy(&in6_event_data.link_data.if_name[0],
@@ -3137,6 +3485,8 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa)
 void
 in6_ifaddr_init(void)
 {
+	in6_multi_init();
+
 	PE_parse_boot_argn("ifa_debug", &in6ifa_debug, sizeof (in6ifa_debug));
 
 	in6ifa_size = (in6ifa_debug == 0) ? sizeof (struct in6_ifaddr) :
@@ -3144,10 +3494,15 @@ in6_ifaddr_init(void)
 
 	in6ifa_zone = zinit(in6ifa_size, IN6IFA_ZONE_MAX * in6ifa_size,
 	    0, IN6IFA_ZONE_NAME);
-	if (in6ifa_zone == NULL)
+	if (in6ifa_zone == NULL) {
 		panic("%s: failed allocating %s", __func__, IN6IFA_ZONE_NAME);
-
+		/* NOTREACHED */
+	}
 	zone_change(in6ifa_zone, Z_EXPAND, TRUE);
+	zone_change(in6ifa_zone, Z_CALLERACCT, FALSE);
+
+	lck_mtx_init(&in6ifa_trash_lock, ifa_mtx_grp, ifa_mtx_attr);
+	TAILQ_INIT(&in6ifa_trash_head);
 }
 
 static struct in6_ifaddr *
@@ -3161,11 +3516,14 @@ in6_ifaddr_alloc(int how)
 		bzero(in6ifa, in6ifa_size);
 		in6ifa->ia_ifa.ifa_free = in6_ifaddr_free;
 		in6ifa->ia_ifa.ifa_debug |= IFD_ALLOC;
+		ifa_lock_init(&in6ifa->ia_ifa);
 		if (in6ifa_debug != 0) {
 			struct in6_ifaddr_dbg *in6ifa_dbg =
 			    (struct in6_ifaddr_dbg *)in6ifa;
 			in6ifa->ia_ifa.ifa_debug |= IFD_DEBUG;
 			in6ifa->ia_ifa.ifa_trace = in6_ifaddr_trace;
+			in6ifa->ia_ifa.ifa_attached = in6_ifaddr_attached;
+			in6ifa->ia_ifa.ifa_detached = in6_ifaddr_detached;
 			ctrace_record(&in6ifa_dbg->in6ifa_alloc);
 		}
 	}
@@ -3175,22 +3533,80 @@ in6_ifaddr_alloc(int how)
 static void
 in6_ifaddr_free(struct ifaddr *ifa)
 {
-	if (ifa->ifa_refcnt != 0)
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (ifa->ifa_refcnt != 0) {
 		panic("%s: ifa %p bad ref cnt", __func__, ifa);
-	if (!(ifa->ifa_debug & IFD_ALLOC))
+		/* NOTREACHED */
+	} else if (!(ifa->ifa_debug & IFD_ALLOC)) {
 		panic("%s: ifa %p cannot be freed", __func__, ifa);
-
+		/* NOTREACHED */
+	}
 	if (ifa->ifa_debug & IFD_DEBUG) {
 		struct in6_ifaddr_dbg *in6ifa_dbg =
 		    (struct in6_ifaddr_dbg *)ifa;
 		ctrace_record(&in6ifa_dbg->in6ifa_free);
 		bcopy(&in6ifa_dbg->in6ifa, &in6ifa_dbg->in6ifa_old,
 		    sizeof (struct in6_ifaddr));
+		if (ifa->ifa_debug & IFD_TRASHED) {
+			/* Become a regular mutex, just in case */
+			IFA_CONVERT_LOCK(ifa);
+			lck_mtx_lock(&in6ifa_trash_lock);
+			TAILQ_REMOVE(&in6ifa_trash_head, in6ifa_dbg,
+			    in6ifa_trash_link);
+			lck_mtx_unlock(&in6ifa_trash_lock);
+			ifa->ifa_debug &= ~IFD_TRASHED;
+		}
 	}
+	IFA_UNLOCK(ifa);
+	ifa_lock_destroy(ifa);
 	bzero(ifa, sizeof (struct in6_ifaddr));
 	zfree(in6ifa_zone, ifa);
 }
 
+static void
+in6_ifaddr_attached(struct ifaddr *ifa)
+{
+	struct in6_ifaddr_dbg *in6ifa_dbg = (struct in6_ifaddr_dbg *)ifa;
+
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
+		panic("%s: ifa %p has no debug structure", __func__, ifa);
+		/* NOTREACHED */
+	}
+	if (ifa->ifa_debug & IFD_TRASHED) {
+		/* Become a regular mutex, just in case */
+		IFA_CONVERT_LOCK(ifa);
+		lck_mtx_lock(&in6ifa_trash_lock);
+		TAILQ_REMOVE(&in6ifa_trash_head, in6ifa_dbg, in6ifa_trash_link);
+		lck_mtx_unlock(&in6ifa_trash_lock);
+		ifa->ifa_debug &= ~IFD_TRASHED;
+	}
+}
+
+static void
+in6_ifaddr_detached(struct ifaddr *ifa)
+{
+	struct in6_ifaddr_dbg *in6ifa_dbg = (struct in6_ifaddr_dbg *)ifa;
+
+	IFA_LOCK_ASSERT_HELD(ifa);
+
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
+		panic("%s: ifa %p has no debug structure", __func__, ifa);
+		/* NOTREACHED */
+	} else if (ifa->ifa_debug & IFD_TRASHED) {
+		panic("%s: ifa %p is already in trash list", __func__, ifa);
+		/* NOTREACHED */
+	}
+	ifa->ifa_debug |= IFD_TRASHED;
+	/* Become a regular mutex, just in case */
+	IFA_CONVERT_LOCK(ifa);
+	lck_mtx_lock(&in6ifa_trash_lock);
+	TAILQ_INSERT_TAIL(&in6ifa_trash_head, in6ifa_dbg, in6ifa_trash_link);
+	lck_mtx_unlock(&in6ifa_trash_lock);
+}
+
 static void
 in6_ifaddr_trace(struct ifaddr *ifa, int refhold)
 {
@@ -3199,9 +3615,10 @@ in6_ifaddr_trace(struct ifaddr *ifa, int refhold)
 	u_int32_t idx;
 	u_int16_t *cnt;
 
-	if (!(ifa->ifa_debug & IFD_DEBUG))
+	if (!(ifa->ifa_debug & IFD_DEBUG)) {
 		panic("%s: ifa %p has no debug structure", __func__, ifa);
-
+		/* NOTREACHED */
+	}
 	if (refhold) {
 		cnt = &in6ifa_dbg->in6ifa_refhold_cnt;
 		tr = in6ifa_dbg->in6ifa_refhold;
@@ -3210,6 +3627,6 @@ in6_ifaddr_trace(struct ifaddr *ifa, int refhold)
 		tr = in6ifa_dbg->in6ifa_refrele;
 	}
 
-	idx = OSAddAtomic16(1, (volatile SInt16 *)cnt) % CTRACE_HIST_SIZE;
+	idx = atomic_add_16_ov(cnt, 1) % IN6IFA_TRACE_HIST_SIZE;
 	ctrace_record(&tr[idx]);
 }
diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h
index fb0479fde..c0838ec43 100644
--- a/bsd/netinet6/in6.h
+++ b/bsd/netinet6/in6.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -114,7 +114,7 @@ typedef __uint8_t		sa_family_t;
  * has the table of implementation/integration differences.
  */
 #define __KAME__
-#define __KAME_VERSION		"20010528/apple-darwin"
+#define __KAME_VERSION		"2009/apple-darwin"
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 /*
@@ -192,6 +192,10 @@ struct sockaddr_in6 {
  * Local definition for masks
  */
 #define IN6MASK0	{{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}}
+#define IN6MASK7	{{{ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
+#define IN6MASK16	{{{ 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6MASK32	{{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \
 			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6MASK64	{{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
@@ -206,6 +210,8 @@ struct sockaddr_in6 {
 extern const struct sockaddr_in6 sa6_any;
 
 extern const struct in6_addr in6mask0;
+extern const struct in6_addr in6mask7;
+extern const struct in6_addr in6mask16;
 extern const struct in6_addr in6mask32;
 extern const struct in6_addr in6mask64;
 extern const struct in6_addr in6mask96;
@@ -250,12 +256,21 @@ extern const struct in6_addr in6mask128;
 #define IN6ADDR_NODELOCAL_ALLNODES_INIT \
 	{{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
+#define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \
+	{{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \
 	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \
 	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}}
+#define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \
+	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}}
+#define IN6ADDR_V4MAPPED_INIT \
+	{{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+	    0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}}
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
 extern const struct in6_addr in6addr_any;
@@ -264,6 +279,7 @@ extern const struct in6_addr in6addr_loopback;
 extern const struct in6_addr in6addr_nodelocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allrouters;
+extern const struct in6_addr in6addr_linklocal_allv2routers;
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
 /*
@@ -326,6 +342,11 @@ extern const struct in6_addr in6addr_linklocal_allrouters;
 	 (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \
 	 (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff)))
 
+/*
+ * 6to4
+ */
+#define	IN6_IS_ADDR_6TO4(x)	(ntohs((x)->s6_addr16[0]) == 0x2002)
+
 /*
  * KAME Scope Values
  */
@@ -339,6 +360,7 @@ extern const struct in6_addr in6addr_linklocal_allrouters;
 #define IPV6_ADDR_SCOPE_GLOBAL		0x0e
 #else
 #define __IPV6_ADDR_SCOPE_NODELOCAL	0x01
+#define __IPV6_ADDR_SCOPE_INTFACELOCAL	0x01
 #define __IPV6_ADDR_SCOPE_LINKLOCAL	0x02
 #define __IPV6_ADDR_SCOPE_SITELOCAL	0x05
 #define __IPV6_ADDR_SCOPE_ORGLOCAL	0x08	/* just used in this file */
@@ -359,6 +381,11 @@ extern const struct in6_addr in6addr_linklocal_allrouters;
  */
 #define IN6_IS_ADDR_MULTICAST(a)	((a)->s6_addr[0] == 0xff)
 
+/*
+ * Unique Local IPv6 Unicast Addresses (per RFC 4193)
+ */
+#define IN6_IS_ADDR_UNIQUE_LOCAL(a)	(((a)->s6_addr[0] == 0xfc) || ((a)->s6_addr[0] == 0xfd))
+
 #ifdef KERNEL	/*XXX nonstandard*/
 #define IPV6_ADDR_MC_SCOPE(a)		((a)->s6_addr[1] & 0x0f)
 #else
@@ -450,6 +477,35 @@ struct route_in6 {
  */
 /* no hdrincl */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+/*
+ * RFC 3542 define the following socket options in a manner incompatible 
+ * with RFC 2292:
+ *   IPV6_PKTINFO
+ *   IPV6_HOPLIMIT
+ *   IPV6_NEXTHOP
+ *   IPV6_HOPOPTS
+ *   IPV6_DSTOPTS
+ *   IPV6_RTHDR
+ * 
+ * To use the new IPv6 Sockets options introduced by RFC 3542
+ * the constant __APPLE_USE_RFC_3542 must be defined before 
+ * including <netinet/in.h>
+ *
+ * To use the old IPv6 Sockets options from RFC 2292
+ * the constant __APPLE_USE_RFC_2292 must be defined before
+ * including <netinet/in.h>
+ *
+ * Note that eventually RFC 3542 is going to be the 
+ * default and RFC 2292 will be obsolete.
+ */
+#ifdef XNU_KERNEL_PRIVATE
+#define __APPLE_USE_RFC_3542 1
+#endif /* XNU_KERNEL_PRIVATE */
+
+#if defined(__APPLE_USE_RFC_3542) && defined(__APPLE_USE_RFC_2292)
+#error "__APPLE_USE_RFC_3542 and __APPLE_USE_RFC_2292 cannot be both defined"
+#endif
+
 #if 0 /* the followings are relic in IPv4 and hence are disabled */
 #define IPV6_OPTIONS		1  /* buf/ip6_opts; set/get IP6 options */
 #define IPV6_RECVOPTS		5  /* bool; receive all IP6 opts w/dgram */
@@ -469,14 +525,24 @@ struct route_in6 {
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define IPV6_PORTRANGE		14 /* int; range to choose for unspec port */
 #define ICMP6_FILTER		18 /* icmp6_filter; icmp6 filter */
-/* RFC2292 options */
-#define IPV6_PKTINFO		19 /* bool; send/recv if, src/dst addr */
-#define IPV6_HOPLIMIT		20 /* bool; hop limit */
-#define IPV6_NEXTHOP		21 /* bool; next hop addr */
-#define IPV6_HOPOPTS		22 /* bool; hop-by-hop option */
-#define IPV6_DSTOPTS		23 /* bool; destination option */
-#define IPV6_RTHDR		24 /* bool; routing header */
-#define IPV6_PKTOPTIONS		25 /* buf/cmsghdr; set/get IPv6 options */
+#define IPV6_2292PKTINFO	19 /* bool; send/recv if, src/dst addr */
+#define IPV6_2292HOPLIMIT	20 /* bool; hop limit */
+#define IPV6_2292NEXTHOP	21 /* bool; next hop addr */
+#define IPV6_2292HOPOPTS	22 /* bool; hop-by-hop option */
+#define IPV6_2292DSTOPTS	23 /* bool; destinaion option */
+#define IPV6_2292RTHDR		24 /* ip6_rthdr: routing header */
+#define IPV6_2292PKTOPTIONS	25 /* buf/cmsghdr; set/get IPv6 options */
+				   /* obsoleted by RFC3542 */
+
+#ifdef __APPLE_USE_RFC_2292
+#define IPV6_PKTINFO        IPV6_2292PKTINFO
+#define IPV6_HOPLIMIT       IPV6_2292HOPLIMIT
+#define IPV6_NEXTHOP        IPV6_2292NEXTHOP
+#define IPV6_HOPOPTS        IPV6_2292HOPOPTS
+#define IPV6_DSTOPTS        IPV6_2292DSTOPTS
+#define IPV6_RTHDR          IPV6_2292RTHDR
+#define IPV6_PKTOPTIONS     IPV6_2292PKTOPTIONS
+#endif /* __APPLE_USE_RFC_2292 */
 
 #define IPV6_CHECKSUM		26 /* int; checksum offset for raw socket */
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
@@ -500,8 +566,80 @@ struct route_in6 {
 #define IPV6_FW_GET		34 /* get entire firewall rule chain */
 #endif /* 1 */
 
-#define IPV6_RECVTCLASS         35 /* bool; recv traffic class values */
-#define IPV6_TCLASS             36 /* int; send traffic class value */
+/* APPLE: NOTE the value of those 2 options is kept unchanged from 
+ * 	  previous version of darwin/OS X for binary compatibility reasons
+ * 	  and differ from FreeBSD (values 57 and 61). See below.
+ */
+#define IPV6_RECVTCLASS		35 /* bool; recv traffic class values */
+#define IPV6_TCLASS		36 /* int; send traffic class value */
+
+#ifdef __APPLE_USE_RFC_3542
+/* new socket options introduced in RFC3542 */
+#define IPV6_RTHDRDSTOPTS	57 /* ip6_dest; send dst option before rthdr
+				    * APPLE: Value purposely different than FreeBSD (35) to avoid
+				    * collision with definition of IPV6_RECVTCLASS in previous
+				    * darwin implementations */
+
+#define IPV6_RECVPKTINFO	61 /* bool; recv if, dst addr 
+				    * APPLE: Value purposely different than FreeBSD (36) to avoid
+				    * collision with definition of IPV6_TCLASS in previous
+				    * darwin implementations */
+
+#define IPV6_RECVHOPLIMIT	37 /* bool; recv hop limit */
+#define IPV6_RECVRTHDR		38 /* bool; recv routing header */
+#define IPV6_RECVHOPOPTS	39 /* bool; recv hop-by-hop option */
+#define IPV6_RECVDSTOPTS	40 /* bool; recv dst option after rthdr */
+#ifdef KERNEL
+#define IPV6_RECVRTHDRDSTOPTS	41 /* bool; recv dst option before rthdr */
+#endif
+
+#define IPV6_USE_MIN_MTU	42 /* bool; send packets at the minimum MTU */
+#define IPV6_RECVPATHMTU	43 /* bool; notify an according MTU */
+
+#define IPV6_PATHMTU		44 /* mtuinfo; get the current path MTU (sopt),
+				      4 bytes int; MTU notification (cmsg) */
+#if 0 /*obsoleted during 2292bis -> 3542*/
+#define IPV6_REACHCONF		45 /* no data; ND reachability confirm
+				      (cmsg only/not in of RFC3542) */
+#endif
+/* more new socket options introduced in RFC3542 */
+#define IPV6_3542PKTINFO	46 /* in6_pktinfo; send if, src addr */
+#define IPV6_3542HOPLIMIT	47 /* int; send hop limit */
+#define IPV6_3542NEXTHOP	48 /* sockaddr; next hop addr */
+#define IPV6_3542HOPOPTS	49 /* ip6_hbh; send hop-by-hop option */
+#define IPV6_3542DSTOPTS	50 /* ip6_dest; send dst option befor rthdr */
+#define IPV6_3542RTHDR		51 /* ip6_rthdr; send routing header */
+
+#define IPV6_PKTINFO        IPV6_3542PKTINFO
+#define IPV6_HOPLIMIT       IPV6_3542HOPLIMIT
+#define IPV6_NEXTHOP        IPV6_3542NEXTHOP
+#define IPV6_HOPOPTS        IPV6_3542HOPOPTS
+#define IPV6_DSTOPTS        IPV6_3542DSTOPTS
+#define IPV6_RTHDR          IPV6_3542RTHDR
+
+#define IPV6_AUTOFLOWLABEL	59 /* bool; attach flowlabel automagically */
+
+#define IPV6_DONTFRAG		62 /* bool; disable IPv6 fragmentation */
+
+#define IPV6_PREFER_TEMPADDR	63 /* int; prefer temporary addresses as
+				    * the source address.
+				    */
+
+/*
+ * The following option is private; do not use it from user applications.
+ * It is deliberately defined to the same value as IP_MSFILTER.
+ */
+#define	IPV6_MSFILTER		74 /* struct __msfilterreq;
+				    * set/get multicast source filter list.
+				    */
+#endif /* __APPLE_USE_RFC_3542 */
+
+#define	IPV6_BOUND_IF		125 /* int; set/get bound interface */
+
+#ifdef PRIVATE
+#define	IPV6_NO_IFT_CELLULAR	6969 /* for internal use only */
+#define	IPV6_OUT_IF		9696 /* for internal use only */
+#endif /* PRIVATE */
 
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
 
@@ -515,6 +653,21 @@ struct route_in6 {
 #define IPV6_DEFAULT_MULTICAST_HOPS 1	/* normally limit m'casts to 1 hop  */
 #define IPV6_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
 
+/*
+ * The im6o_membership vector for each socket is now dynamically allocated at
+ * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
+ * according to a power-of-two increment.
+ */
+#define	IPV6_MIN_MEMBERSHIPS	31
+#define	IPV6_MAX_MEMBERSHIPS	4095
+
+/*
+ * Default resource limits for IPv6 multicast source filtering.
+ * These may be modified by sysctl.
+ */
+#define	IPV6_MAX_GROUP_SRC_FILTER	512	/* sources per group */
+#define	IPV6_MAX_SOCK_SRC_FILTER	128	/* sources per socket/group */
+
 /*
  * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP.
  */
@@ -524,13 +677,21 @@ struct ipv6_mreq {
 };
 
 /*
- * IPV6_PKTINFO: Packet information(RFC2292 sec 5)
+ * IPV6_2292PKTINFO: Packet information(RFC2292 sec 5)
  */
 struct in6_pktinfo {
 	struct in6_addr	ipi6_addr;	/* src/dst IPv6 address */
 	unsigned int	ipi6_ifindex;	/* send/recv interface index */
 };
 
+/*
+ * Control structure for IPV6_RECVPATHMTU socket option.
+ */
+struct ip6_mtuinfo {
+	struct sockaddr_in6 ip6m_addr;	/* or sockaddr_storage? */
+	uint32_t ip6m_mtu;
+};
+
 /*
  * Argument for IPV6_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
@@ -582,22 +743,27 @@ struct in6_pktinfo {
 #define IPV6CTL_RTMINEXPIRE	26	/* min value for expiration time */
 #define IPV6CTL_RTMAXCACHE	27	/* trigger level for dynamic expire */
 
-#define IPV6CTL_USETEMPADDR	32	/* use temporary addresses (RFC3041) */
+#define IPV6CTL_USETEMPADDR	32	/* use temporary addresses [RFC 4941] */
 #define IPV6CTL_TEMPPLTIME	33	/* preferred lifetime for tmpaddrs */
 #define IPV6CTL_TEMPVLTIME	34	/* valid lifetime for tmpaddrs */
 #define IPV6CTL_AUTO_LINKLOCAL	35	/* automatic link-local addr assign */
 #define IPV6CTL_RIP6STATS	36	/* raw_ip6 stats */
+#define IPV6CTL_PREFER_TEMPADDR	37	/* prefer temporary addr as src */
+#define IPV6CTL_ADDRCTLPOLICY	38	/* get/set address selection policy */
+#define IPV6CTL_USE_DEFAULTZONE	39	/* use default scope zone */
 
-#define IPV6CTL_MAXFRAGS        41      /* max fragments */
+#define IPV6CTL_MAXFRAGS	41	/* max fragments */
+#define IPV6CTL_MCAST_PMTU	44	/* enable pMTU discovery for multicast? */
 
 #define IPV6CTL_NEIGHBORGCTHRESH 46
 #define IPV6CTL_MAXIFPREFIXES	47
 #define IPV6CTL_MAXIFDEFROUTERS 48
 #define IPV6CTL_MAXDYNROUTES	49
+#define	ICMPV6CTL_ND6_ONLINKNSRFC4861	50
 
 /* New entries should be added here from current IPV6CTL_MAXID value. */
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
-#define IPV6CTL_MAXID		50
+#define IPV6CTL_MAXID		51
 
 #ifdef KERNEL_PRIVATE
 #define CTL_IPV6PROTO_NAMES { \
@@ -651,7 +817,6 @@ struct in6_pktinfo {
  */
 #define	M_AUTHIPHDR	M_PROTO2
 #define	M_DECRYPTED	M_PROTO3
-#define	M_LOOP		M_PROTO4
 #define	M_AUTHIPDGM	M_PROTO5
 
 struct cmsghdr;
@@ -676,12 +841,65 @@ extern void in6_sin_2_v4mapsin6(struct sockaddr_in *sin,
     struct sockaddr_in6 *sin6);
 extern void in6_sin6_2_sin_in_sock(struct sockaddr *nam);
 extern int in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam);
+extern void in6_delayed_cksum(struct mbuf *, u_int16_t);
 
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	sin6tosa(sin6)	((struct sockaddr *)(sin6))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
 extern int in6addr_local(struct in6_addr *);
+
+#define DEBUG_HWCKSUM 1 /* IPv6 Hardware checksum on/off */
+/*
+ * in6_cksum_phdr:
+ *
+ *	Compute significant parts of the IPv6 checksum pseudo-header
+ *	for use in a delayed TCP/UDP checksum calculation.
+ *
+ *	Args:
+ *
+ *		src		Source IPv6 address
+ *		dst		Destination IPv6 address
+ *		len		htonl(proto-hdr-len)
+ *		nxt		htonl(next-proto-number)
+ *
+ *	NOTE: We expect the src and dst addresses to be 16-bit
+ *	aligned!
+ */
+static __inline u_int16_t __unused
+in6_cksum_phdr(const struct in6_addr *src, const struct in6_addr *dst,
+    u_int32_t len, u_int32_t nxt)
+{
+	u_int32_t sum = 0;
+	const u_int16_t *w;
+
+	/*LINTED*/
+	w = (const u_int16_t *) src;
+	sum += w[0];
+	if (!IN6_IS_SCOPE_LINKLOCAL(src))
+		sum += w[1];
+	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
+	sum += w[6]; sum += w[7];
+
+	/*LINTED*/
+	w = (const u_int16_t *) dst;
+	sum += w[0];
+	if (!IN6_IS_SCOPE_LINKLOCAL(dst))
+		sum += w[1];
+	sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
+	sum += w[6]; sum += w[7];
+
+	sum += (u_int16_t)(len >> 16) + (u_int16_t)(len /*& 0xffff*/);
+
+	sum += (u_int16_t)(nxt >> 16) + (u_int16_t)(nxt /*& 0xffff*/);
+
+	sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);
+
+	if (sum > 0xffff)
+		sum -= 0xffff;
+
+	return (sum);
+}
 #endif /* KERNEL_PRIVATE */
 
 #ifndef KERNEL
@@ -708,23 +926,24 @@ extern int inet6_rthdr_segments(const struct cmsghdr *);
 extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int);
 extern int inet6_rthdr_getflags(const struct cmsghdr *, int);
 
-extern int inet6_opt_init(void *, size_t);
-extern int inet6_opt_append(void *, size_t, int, __uint8_t,
-				 size_t, __uint8_t, void **);
-extern int inet6_opt_finish(void *, size_t, int);
-extern int inet6_opt_set_val(void *, size_t, void *, int);
-
-extern int inet6_opt_next(void *, size_t, int, __uint8_t *,
-			       size_t *, void **);
-extern int inet6_opt_find(void *, size_t, int, __uint8_t,
-			  size_t *, void **);
-extern int inet6_opt_get_val(void *, size_t, void *, int);
-extern size_t inet6_rth_space(int, int);
-extern void *inet6_rth_init(void *, int, int, int);
+extern int inet6_opt_init(void *, socklen_t);
+extern int inet6_opt_append(void *, socklen_t, int, __uint8_t,
+				 socklen_t, __uint8_t, void **);
+extern int inet6_opt_finish(void *, socklen_t, int);
+extern int inet6_opt_set_val(void *, int, void *, socklen_t);
+
+extern int inet6_opt_next(void *, socklen_t, int, __uint8_t *,
+			       socklen_t *, void **);
+extern int inet6_opt_find(void *, socklen_t, int, __uint8_t,
+			  socklen_t *, void **);
+extern int inet6_opt_get_val(void *, int, void *, socklen_t);
+extern socklen_t inet6_rth_space(int, int);
+extern void *inet6_rth_init(void *, socklen_t, int, int);
 extern int inet6_rth_add(void *, const struct in6_addr *);
 extern int inet6_rth_reverse(const void *, void *);
 extern int inet6_rth_segments(const void *);
 extern struct in6_addr *inet6_rth_getaddr(const void *, int);
+extern void addrsel_policy_init(void);
 __END_DECLS
 #endif /* !KERNEL */
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
diff --git a/bsd/netinet6/in6_cksum.c b/bsd/netinet6/in6_cksum.c
index d964493dc..f0352eb72 100644
--- a/bsd/netinet6/in6_cksum.c
+++ b/bsd/netinet6/in6_cksum.c
@@ -224,7 +224,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off,
  * code and should be modified for each CPU to be as fast as possible.
  */
 
-#define ADDCARRY(x)  (x > 65535 ? x -= 65535 : x)
+#define ADDCARRY(x)  do { if (x > 65535) { x -= 65535; } } while (0)
 #define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
 
 /*
diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c
index 332271e88..d620db95e 100644
--- a/bsd/netinet6/in6_gif.c
+++ b/bsd/netinet6/in6_gif.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -192,11 +192,9 @@ in6_gif_output(
 		m_freem(m);
 		return ENETUNREACH;
 	}
-	if (ifp->if_flags & IFF_LINK1)
-		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
-	else
-		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
-	ip6->ip6_flow &= ~ntohl(0xff00000);
+	ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
+		       &otos, &itos);
+	ip6->ip6_flow &= ~htonl(0xff << 20);
 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
 
 	if (dst->sin6_family != sin6_dst->sin6_family ||
@@ -244,22 +242,19 @@ in6_gif_output(
 	 * it is too painful to ask for resend of inner packet, to achieve
 	 * path MTU discovery for encapsulated packets.
 	 */
-	return(ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, 0));
+	return(ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL));
 #else
-	return(ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, 0));
+	return(ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL));
 #endif
 }
 
-int in6_gif_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+int in6_gif_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ifnet *gifp = NULL;
 	struct ip6_hdr *ip6;
 	int af = 0;
 	u_int32_t otos;
-	u_int8_t proto;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
@@ -271,7 +266,6 @@ int in6_gif_input(mp, offp)
 		return IPPROTO_DONE;
 	}
 
-	proto = ip6->ip6_nxt;
 	otos = ip6->ip6_flow;
 	m_adj(m, *offp);
 
@@ -360,9 +354,6 @@ gif_validate6(
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(struct sockaddr_in6);
 		sin6.sin6_addr = ip6->ip6_src;
-#ifndef SCOPEDROUTING
-		sin6.sin6_scope_id = 0; /* XXX */
-#endif
 
 		rt = rtalloc1((struct sockaddr *)&sin6, 0, 0);
 		if (rt != NULL)
diff --git a/bsd/netinet6/in6_gif.h b/bsd/netinet6/in6_gif.h
index 8383c6b4e..8baafdd43 100644
--- a/bsd/netinet6/in6_gif.h
+++ b/bsd/netinet6/in6_gif.h
@@ -37,7 +37,7 @@
 #ifdef KERNEL_PRIVATE
 #define GIF_HLIM	30
 
-int in6_gif_input(struct mbuf **, int *);
+int in6_gif_input(struct mbuf **, int *, int);
 int in6_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *);
 int gif_encapcheck6(const struct mbuf *, int, int, void *);
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c
index 5995b212d..10bf295f4 100644
--- a/bsd/netinet6/in6_ifattach.c
+++ b/bsd/netinet6/in6_ifattach.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,7 @@
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_pcb.h>
+#include <netinet/icmp6.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
@@ -97,7 +98,6 @@ size_t in6_ifstatmax = 0;
 size_t icmp6_ifstatmax = 0;
 u_int32_t in6_maxmtu = 0;
 extern lck_mtx_t *nd6_mutex;
-extern lck_mtx_t *inet6_domain_mutex;
 
 #if IP6_AUTO_LINKLOCAL
 int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
@@ -105,13 +105,14 @@ int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
 int ip6_auto_linklocal = 1;	/* enable by default */
 #endif
 
+int loopattach6_done = 0;
+
 extern struct inpcbinfo udbinfo;
 extern struct inpcbinfo ripcbinfo;
-extern lck_mtx_t *ip6_mutex;
 
 static int get_rand_ifid(struct ifnet *, struct in6_addr *);
 static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *);
-static int get_hw_ifid(struct ifnet *, struct in6_addr *);
+int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
 static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
 static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *, struct in6_aliasreq *);
 static int in6_ifattach_loopback(struct ifnet *);
@@ -133,6 +134,8 @@ static int in6_ifattach_loopback(struct ifnet *);
  * The goal here is to get an interface identifier that is
  * (1) random enough and (2) does not change across reboot.
  * We currently use MD5(hostname) for it.
+ *
+ * in6 - upper 64bits are preserved
  */
 static int
 get_rand_ifid(
@@ -141,7 +144,7 @@ get_rand_ifid(
 {
 	MD5_CTX ctxt;
 	u_int8_t digest[16];
-	int len	= strlen(hostname);
+	int hostnlen	= strlen(hostname);
 
 #if 0
 	/* we need at least several letters as seed for ifid */
@@ -152,7 +155,7 @@ get_rand_ifid(
 	/* generate 8 bytes of pseudo-random value. */
 	bzero(&ctxt, sizeof(ctxt));
 	MD5Init(&ctxt);
-	MD5Update(&ctxt, hostname, len);
+	MD5Update(&ctxt, hostname, hostnlen);
 	MD5Final(digest, &ctxt);
 
 	/* assumes sizeof(digest) > sizeof(ifid) */
@@ -179,7 +182,7 @@ generate_tmp_ifid(
 	u_int32_t val32;
 	struct timeval tv;
 
-	/* If there's no hisotry, start with a random seed. */
+	/* If there's no history, start with a random seed. */
 	bzero(nullbuf, sizeof(nullbuf));
 	if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
 		int i;
@@ -213,7 +216,7 @@ generate_tmp_ifid(
 	MD5Final(digest, &ctxt);
 
 	/*
-	 * RFC 3041 3.2.1. (3)
+	 * RFC 4941 3.2.1. (3)
 	 * Take the left-most 64-bits of the MD5 digest and set bit 6 (the
 	 * left-most bit is numbered 0) to zero.
 	 */
@@ -226,8 +229,8 @@ generate_tmp_ifid(
 	 * use a random non-zero value as the last resort.
 	 */
 	if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) {
-		log(LOG_INFO,
-		    "generate_tmp_ifid: computed MD5 value is zero.\n");
+		nd6log((LOG_INFO,
+		    "generate_tmp_ifid: computed MD5 value is zero.\n"));
 
 		microtime(&tv);
 		val32 = random() ^ tv.tv_usec;
@@ -235,10 +238,10 @@ generate_tmp_ifid(
 	}
 
 	/*
-	 * RFC 3041 3.2.1. (4)
+	 * RFC 4941 3.2.1. (4)
 	 * Take the rightmost 64-bits of the MD5 digest and save them in
 	 * stable storage as the history value to be used in the next
-	 * iteration of the algorithm. 
+	 * iteration of the algorithm.
 	 */
 	bcopy(&digest[8], seed0, 8);
 
@@ -257,42 +260,35 @@ generate_tmp_ifid(
 /*
  * Get interface identifier for the specified interface.
  * XXX assumes single sockaddr_dl (AF_LINK address) per an interface
+ *
+ * in6 - upper 64bits are preserved
  */
-static int
-get_hw_ifid(
+int
+in6_get_hw_ifid(
 	struct ifnet *ifp,
 	struct in6_addr *in6)	/* upper 64bits are preserved */
 {
-	struct ifaddr *ifa;
+	struct ifaddr *ifa = NULL;
 	struct sockaddr_dl *sdl;
 	u_int8_t *addr;
 	size_t addrlen;
 	static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 	static u_int8_t allone[8] =
 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	int err = -1;
 
 	/* Why doesn't this code use ifnet_addrs? */
 	ifnet_lock_shared(ifp);
-	for (ifa = ifp->if_addrlist.tqh_first;
-	     ifa;
-	     ifa = ifa->ifa_list.tqe_next)
-	{
-		if (ifa->ifa_addr->sa_family != AF_LINK)
-			continue;
-		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
-		if (sdl == NULL)
-			continue;
-		if (sdl->sdl_alen == 0)
-			continue;
-
-		goto found;
+	ifa = ifp->if_lladdr;
+	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+	if (sdl->sdl_alen == 0) {
+		ifnet_lock_done(ifp);
+		return (-1);
 	}
+	IFA_ADDREF(ifa);	/* for this routine */
 	ifnet_lock_done(ifp);
 
-	return -1;
-
-found:
-	ifnet_lock_done(ifp);
+	IFA_LOCK(ifa);
 	addr = (u_int8_t *) LLADDR(sdl);
 	addrlen = sdl->sdl_alen;
 
@@ -300,6 +296,7 @@ found:
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_FDDI:
+	case IFT_ISO88025:
 	case IFT_ATM:
 	case IFT_IEEE1394:
 	case IFT_L2VLAN:
@@ -315,7 +312,7 @@ found:
 
 		/* look at IEEE802/EUI64 only */
 		if (addrlen != 8 && addrlen != 6)
-			return -1;
+			goto done;
 
 		/*
 		 * check for invalid MAC address - on bsdi, we see it a lot
@@ -323,9 +320,9 @@ found:
 		 * card insertion.
 		 */
 		if (bcmp(addr, allzero, addrlen) == 0)
-			return -1;
+			goto done;
 		if (bcmp(addr, allone, addrlen) == 0)
-			return -1;
+			goto done;
 
 		/* make EUI64 address */
 		if (addrlen == 8)
@@ -344,9 +341,9 @@ found:
 
 	case IFT_ARCNET:
 		if (addrlen != 1)
-			return -1;
+			goto done;
 		if (!addr[0])
-			return -1;
+			goto done;
 
 		bzero(&in6->s6_addr[8], 8);
 		in6->s6_addr[15] = addr[0];
@@ -368,15 +365,18 @@ found:
 		 * identifier source (can be renumbered).
 		 * we don't do this.
 		 */
-		return -1;
+		goto done;
+
+	case IFT_CELLULAR:
+		goto done;
 
 	default:
-		return -1;
+		goto done;
 	}
 
 	/* sanity check: g bit must not indicate "group" */
 	if (EUI64_GROUP(in6))
-		return -1;
+		goto done;
 
 	/* convert EUI64 into IPv6 interface identifier */
 	EUI64_TO_IFID(in6);
@@ -387,16 +387,27 @@ found:
 	 */
 	if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
 	    bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
-		return -1;
+		goto done;
 	}
 
-	return 0;
+	err = 0;	/* found */
+
+done:
+	/* This must not be the last reference to the lladdr */
+	if (IFA_REMREF_LOCKED(ifa) == NULL) {
+		panic("%s: unexpected (missing) refcnt ifa=%p", __func__, ifa);
+		/* NOTREACHED */
+	}
+	IFA_UNLOCK(ifa);
+	return (err);
 }
 
 /*
  * Get interface identifier for the specified interface.  If it is not
  * available on ifp0, borrow interface identifier from other information
  * sources.
+ *
+ * altifp - secondary EUI64 source
  */
 static int
 get_ifid(
@@ -407,14 +418,14 @@ get_ifid(
 	struct ifnet *ifp;
 
 	/* first, try to get it from the interface itself */
-	if (get_hw_ifid(ifp0, in6) == 0) {
+	if (in6_get_hw_ifid(ifp0, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n",
 		    if_name(ifp0)));
 		goto success;
 	}
 
 	/* try secondary EUI64 source. this basically is for ATM PVC */
-	if (altifp && get_hw_ifid(altifp, in6) == 0) {
+	if (altifp && in6_get_hw_ifid(altifp, in6) == 0) {
 		nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n",
 		    if_name(ifp0), if_name(altifp)));
 		goto success;
@@ -425,7 +436,7 @@ get_ifid(
 	TAILQ_FOREACH(ifp, &ifnet_head, if_list) {
 		if (ifp == ifp0)
 			continue;
-		if (get_hw_ifid(ifp, in6) != 0)
+		if (in6_get_hw_ifid(ifp, in6) != 0)
 			continue;
 
 		/*
@@ -488,18 +499,14 @@ in6_ifattach_linklocal(
 	 */
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
 
-	if (((ifp->if_type == IFT_PPP) ||  ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0)) &&
-			 ifra_passed != NULL)  /* PPP provided both addresses for us */
+	if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 &&
+			 ifra_passed != NULL)  /* interface provided both addresses for us */
 		bcopy(&ifra_passed->ifra_addr, &(ifra.ifra_addr), sizeof(struct sockaddr_in6));
 	else {
 		ifra.ifra_addr.sin6_family = AF_INET6;
 		ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 		ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80);
-#if SCOPEDROUTING
-		ifra.ifra_addr.sin6_addr.s6_addr16[1] = 0
-#else
-		ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); /* XXX */
-#endif
+		ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 		ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
 		if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 			ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
@@ -508,58 +515,42 @@ in6_ifattach_linklocal(
 			if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) {
 				nd6log((LOG_ERR,
 			    "	%s: no ifid available\n", if_name(ifp)));
-				return -1;
+				return EADDRNOTAVAIL;
 			}
 		}
-#if SCOPEDROUTING
-		ifra.ifra_addr.sin6_scope_id =
-			in6_addr2scopeid(ifp,  &ifra.ifra_addr.sin6_addr);
-#endif
 	}
+	if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
+		return (EADDRNOTAVAIL);
+
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	ifra.ifra_prefixmask.sin6_addr = in6mask64;
-#if SCOPEDROUTING
-	/* take into accound the sin6_scope_id field for routing */
-	ifra.ifra_prefixmask.sin6_scope_id = 0xffffffff;
-#endif
 	/* link-local addresses should NEVER expire. */
 	ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
 	ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
 
-	/*
-	 * Do not let in6_update_ifa() do DAD, since we need a random delay
-	 * before sending an NS at the first time the interface becomes up.
-	 * Instead, in6_if_up() will start DAD with a proper random delay.
-	 */
-	ifra.ifra_flags |= IN6_IFF_NODAD;
-
 	/*
 	 * Now call in6_update_ifa() to do a bunch of procedures to configure
-	 * a link-local address. We can set NULL to the 3rd argument, because
+	 * a link-local address. We can set the 3rd argument to NULL, because
 	 * we know there's no other link-local address on the interface
 	 * and therefore we are adding one (instead of updating one).
 	 */
-	if ((error = in6_update_ifa(ifp, &ifra, NULL, M_WAITOK)) != 0) {
+	if ((error = in6_update_ifa(ifp, &ifra, NULL,
+				    IN6_IFAUPDATE_DADDELAY, M_WAITOK)) != 0) {
 		/*
 		 * XXX: When the interface does not support IPv6, this call
 		 * would fail in the SIOCSIFADDR ioctl.  I believe the
 		 * notification is rather confusing in this case, so just
-		 * supress it.  (jinmei@kame.net 20010130)
+		 * suppress it.  (jinmei@kame.net 20010130)
 		 */
 		if (error != EAFNOSUPPORT)
-			log(LOG_NOTICE, "in6_ifattach_linklocal: failed to "
+			nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to "
 			    "configure a link-local address on %s "
 			    "(errno=%d)\n",
-			    if_name(ifp), error);
-		return(-1);
+			    if_name(ifp), error));
+		return (EADDRNOTAVAIL);
 	}
 
-	/*
-	 * Adjust ia6_flags so that in6_if_up will perform DAD.
-	 * XXX: Some P2P interfaces seem not to send packets just after
-	 * becoming up, so we skip p2p interfaces for safety.
-	 */
 	ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */
 #if DIAGNOSTIC
 	if (!ia) {
@@ -567,19 +558,15 @@ in6_ifattach_linklocal(
 		/*NOTREACHED*/
 	}
 #endif
-	if (in6if_do_dad(ifp) && (ifp->if_flags & IFF_POINTOPOINT) == 0) {
-		ia->ia6_flags &= ~IN6_IFF_NODAD;
-		ia->ia6_flags |= IN6_IFF_TENTATIVE;
-	}
-
 	/*
-	 * Make the link-local prefix (fe80::/64%link) as on-link.
+	 * Make the link-local prefix (fe80::%link/64) as on-link.
 	 * Since we'd like to manage prefixes separately from addresses,
 	 * we make an ND6 prefix structure for the link-local prefix,
 	 * and add it to the prefix list as a never-expire prefix.
 	 * XXX: this change might affect some existing code base...
 	 */
 	bzero(&pr0, sizeof(pr0));
+	lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
 	pr0.ndpr_ifp = ifp;
 	/* this should be 64 at this moment. */
 	pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
@@ -598,6 +585,7 @@ in6_ifattach_linklocal(
 	pr0.ndpr_raf_auto = 1;	/* probably meaningless */
 	pr0.ndpr_vltime = ND6_INFINITE_LIFETIME;
 	pr0.ndpr_pltime = ND6_INFINITE_LIFETIME;
+	pr0.ndpr_stateflags |= NDPRF_STATIC;
 	/*
 	 * Since there is no other link-local addresses, nd6_prefix_lookup()
 	 * probably returns NULL.  However, we cannot always expect the result.
@@ -606,21 +594,23 @@ in6_ifattach_linklocal(
 	 * valid with referring to the old link-local address.
 	 */
 	if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
-		if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
-			printf("in6_ifattach_linklocal: nd6_prelist_add failed %d\n", error);
-			ifafree(&ia->ia_ifa);
+		if ((error = nd6_prelist_add(&pr0, NULL, &pr, TRUE)) != 0) {
+			IFA_REMREF(&ia->ia_ifa);
+			lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp);
 			return(error);
 		}
 	}
 
 	if (ia != NULL) {
 		in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia);
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 	}
 
 	/* Drop use count held above during lookup/add */
 	if (pr != NULL)
-		ndpr_rele(pr, FALSE);
+		NDPR_REMREF(pr);
+
+	lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp);
 
 	return 0;
 }
@@ -670,11 +660,11 @@ in6_ifattach_loopback(
 	 * We are sure that this is a newly assigned address, so we can set
 	 * NULL to the 3rd arg.
 	 */
-	if ((error = in6_update_ifa(ifp, &ifra, NULL, M_WAITOK)) != 0) {
-		log(LOG_ERR, "in6_ifattach_loopback: failed to configure "
+	if ((error = in6_update_ifa(ifp, &ifra, NULL, 0, M_WAITOK)) != 0) {
+		nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure "
 		    "the loopback address on %s (errno=%d)\n",
-		    if_name(ifp), error);
-		return(-1);
+		    if_name(ifp), error));
+		return (EADDRNOTAVAIL);
 	}
 
 	return 0;
@@ -724,76 +714,33 @@ in6_nigroup(
 	MD5Final(digest, &ctxt);
 
 	bzero(in6, sizeof(*in6));
-	in6->s6_addr16[0] = htons(0xff02);
-	if (ifp)
-		in6->s6_addr16[1] = htons(ifp->if_index);
+	in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
 	in6->s6_addr8[11] = 2;
 	bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
+	if (in6_setscope(in6, ifp, NULL))
+		return (-1); /* XXX: should not fail */
 
 	return 0;
 }
 
-void
-in6_nigroup_attach(
-	const char *name,
-	int namelen)
+int
+in6_domifattach(struct ifnet *ifp)
 {
-	struct ifnet *ifp;
-	struct sockaddr_in6 mltaddr;
-	struct in6_multi *in6m;
-	int error;
-
-	bzero(&mltaddr, sizeof(mltaddr));
-	mltaddr.sin6_family = AF_INET6;
-	mltaddr.sin6_len = sizeof(struct sockaddr_in6);
-	if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0)
-		return;
+	int error = 0;
 
-	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet_head, if_list) {
-		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
-		ifnet_lock_done(ifp);
-		if (!in6m) {
-			if (!in6_addmulti(&mltaddr.sin6_addr, ifp, &error, 0)) {
-				nd6log((LOG_ERR, "%s: failed to join %s "
-				    "(errno=%d)\n", if_name(ifp),
-				    ip6_sprintf(&mltaddr.sin6_addr), 
-				    error));
-			}
-		}
+	if ((error = proto_plumb(PF_INET6, ifp))) {
+		if (error != EEXIST)
+			log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n",
+			    __func__, error, ifp->if_name, ifp->if_unit);
+	} else {
+		nd6_ifattach(ifp);
+		scope6_ifattach(ifp);
 	}
-	ifnet_head_done();
-}
-
-void
-in6_nigroup_detach(
-	const char *name,
-	int namelen)
-{
-	struct ifnet *ifp;
-	struct sockaddr_in6 mltaddr;
-	struct in6_multi *in6m;
-
-	bzero(&mltaddr, sizeof(mltaddr));
-	mltaddr.sin6_family = AF_INET6;
-	mltaddr.sin6_len = sizeof(struct sockaddr_in6);
-	if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0)
-		return;
 
-	ifnet_head_lock_shared();
-	TAILQ_FOREACH(ifp, &ifnet_head, if_list) {
-		mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m);
-		ifnet_lock_done(ifp);
-		if (in6m)
-			in6_delmulti(in6m, 0);
-	}
-	ifnet_head_done();
+	return (error);
 }
 
+
 /*
  * XXX multiple loopback interface needs more care.  for instance,
  * nodelocal address needs to be configured onto only one of them.
@@ -807,8 +754,10 @@ in6_ifattach(
 {
 	static size_t if_indexlim = 8;
 	struct in6_ifaddr *ia;
+	struct in6_addr in6;
 	int error;
 
+	lck_rw_lock_exclusive(&in6_ifs_rwlock);
 	/*
 	 * We have some arrays that should be indexed by if_index.
 	 * since if_index will grow dynamically, they should grow too.
@@ -821,7 +770,6 @@ in6_ifattach(
 			if_indexlim <<= 1;
 	}
     
-	lck_mtx_lock(ip6_mutex);
 	/* grow in6_ifstat */
 	if (in6_ifstatmax < if_indexlim) {
 		size_t n;
@@ -830,7 +778,7 @@ in6_ifattach(
 		n = if_indexlim * sizeof(struct in6_ifstat *);
 		q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK);
 		if (q == NULL) {
-			lck_mtx_unlock(ip6_mutex);
+			lck_rw_done(&in6_ifs_rwlock);
 			return ENOBUFS;
 		}
 		bzero(q, n);
@@ -847,17 +795,14 @@ in6_ifattach(
 		in6_ifstat[ifp->if_index] = (struct in6_ifstat *)
 			_MALLOC(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK);
 		if (in6_ifstat[ifp->if_index] == NULL) {
-			lck_mtx_unlock(ip6_mutex);
+			lck_rw_done(&in6_ifs_rwlock);
 			return ENOBUFS;
 		}
 		bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat));
 	}
-	lck_mtx_unlock(ip6_mutex);
+	lck_rw_done(&in6_ifs_rwlock);
 
-	/* grow icmp6_ifstat, use inet6_domain_mutex as that is used in 
-         * icmp6 routines 
-         */
-	lck_mtx_lock(inet6_domain_mutex);
+	lck_rw_lock_exclusive(&icmp6_ifs_rwlock);
 	if (icmp6_ifstatmax < if_indexlim) {
 		size_t n;
 		caddr_t q;
@@ -865,7 +810,7 @@ in6_ifattach(
 		n = if_indexlim * sizeof(struct icmp6_ifstat *);
 		q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK);
 		if (q == NULL) {
-			lck_mtx_unlock(inet6_domain_mutex);
+			lck_rw_done(&icmp6_ifs_rwlock);
 			return ENOBUFS;
 		}
 		bzero(q, n);
@@ -882,12 +827,12 @@ in6_ifattach(
 		icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *)
 			_MALLOC(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK);
 		if (icmp6_ifstat[ifp->if_index] == NULL) {
-			lck_mtx_unlock(inet6_domain_mutex);
+			lck_rw_done(&icmp6_ifs_rwlock);
 			return ENOBUFS;
 		}
 		bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat));
 	}
-	lck_mtx_unlock(inet6_domain_mutex);
+	lck_rw_done(&icmp6_ifs_rwlock);
 
 	/* initialize NDP variables */
 	if ((error = nd6_ifattach(ifp)) != 0)
@@ -919,9 +864,9 @@ in6_ifattach(
 	 * usually, we require multicast capability to the interface
 	 */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
-		log(LOG_INFO, "in6_ifattach: "
-		    "%s is not multicast capable, IPv6 not enabled\n",
-		    if_name(ifp));
+		nd6log((LOG_INFO, "in6_ifattach: ",
+	       	    "%s is not multicast capable, IPv6 not enabled\n",
+		    if_name(ifp)));
 		return EINVAL;
 	}
 
@@ -930,12 +875,23 @@ in6_ifattach(
 	 * XXX multiple loopback interface case.
 	 */
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
-		if (in6_ifattach_loopback(ifp) != 0)
-			printf("in6_ifattach: in6_ifattach_loopback failed\n");
+		struct in6_ifaddr *ia6 = NULL;
+		if (!OSCompareAndSwap(0, 1, (UInt32 *)&loopattach6_done)) {
+			in6 = in6addr_loopback;
+			if ((ia6 = in6ifa_ifpwithaddr(ifp, &in6)) == NULL) {
+				if (in6_ifattach_loopback(ifp) != 0) {
+					OSCompareAndSwap(1, 0, (UInt32 *)&loopattach6_done);
+					return EINVAL;
+				}
+			}
+			else {
+				IFA_REMREF(&ia6->ia_ifa);
+			}
+		}
 	}
 
 	/*
-	 * assign a link-local address, if there's none. 
+	 * assign a link-local address, if there's none.
 	 */
 	if (ip6_auto_linklocal) {
 		ia = in6ifa_ifpforlinklocal(ifp, 0);
@@ -943,13 +899,13 @@ in6_ifattach(
 			if (in6_ifattach_linklocal(ifp, altifp, ifra) == 0) {
 				/* linklocal address assigned */
 			} else {
-				log(LOG_INFO, "in6_ifattach: %s failed to "
+				nd6log((LOG_INFO, "in6_ifattach: %s failed to "
 				    "attach a linklocal address.\n",
-				    if_name(ifp));
+				    if_name(ifp)));
 				/* failed to assign linklocal address. bark? */
 			}
 		} else {
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 		}
 	}
 
@@ -966,83 +922,147 @@ statinit:
 
 /*
  * NOTE: in6_ifdetach() does not support loopback if at this moment.
- * We don't need this function in bsdi, because interfaces are never removed
- * from the ifnet list in bsdi.
  */
 void
-in6_ifdetach(
-	struct ifnet *ifp)
+in6_ifdetach(struct ifnet *ifp)
 {
-	struct in6_ifaddr *ia, *oia, *nia;
-	struct ifaddr *ifa, *next;
+	struct in6_ifaddr *ia, *oia;
+	struct ifaddr *ifa;
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
+	struct in6_multi_mship *imm;
+	int unlinked;
 
-	/* nuke prefix list.  this may try to remove some of ifaddrs as well */
-	in6_purgeprefix(ifp);
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
 	/* remove neighbor management table */
 	nd6_purge(ifp);
 
 	/* nuke any of IPv6 addresses we have */
-	
-	lck_mtx_lock(nd6_mutex);
-	for (ia = in6_ifaddrs; ia != NULL; ia = nia) {
-		nia = ia->ia_next;
-		if (ia->ia_ifa.ifa_ifp != ifp)
+	lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+	ia = in6_ifaddrs;
+	while (ia != NULL) {
+		if (ia->ia_ifa.ifa_ifp != ifp) {
+			ia = ia->ia_next;
 			continue;
-		in6_purgeaddr(&ia->ia_ifa, 1);
+		}
+		IFA_ADDREF(&ia->ia_ifa);	/* for us */
+		lck_rw_done(&in6_ifaddr_rwlock);
+		in6_purgeaddr(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);	/* for us */
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		/*
+		 * Purging the address caused in6_ifaddr_rwlock
+		 * to be dropped and reacquired;
+		 * therefore search again from the beginning
+		 * of in6_ifaddrs list.
+		 */
+		ia = in6_ifaddrs;
 	}
-	lck_mtx_unlock(nd6_mutex);
+	lck_rw_done(&in6_ifaddr_rwlock);
 
 	ifnet_lock_exclusive(ifp);
 
 	/* undo everything done by in6_ifattach(), just in case */
-	for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next)
-	{
-		next = ifa->ifa_list.tqe_next;
-
-
-		if (ifa->ifa_addr->sa_family != AF_INET6
-		 || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) {
+	ifa = TAILQ_FIRST(&ifp->if_addrlist);
+	while (ifa != NULL) {
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6 ||
+		    !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->
+		    sin6_addr)) {
+			IFA_UNLOCK(ifa);
+			ifa = TAILQ_NEXT(ifa, ifa_list);
 			continue;
 		}
 
 		ia = (struct in6_ifaddr *)ifa;
 
-		/* remove from the routing table */
-		if ((ia->ia_flags & IFA_ROUTE) &&
-		    (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0))) {
-			(void) rtrequest(RTM_DELETE,
-				(struct sockaddr *)&ia->ia_addr,
-				(struct sockaddr *)&ia->ia_addr,
-				(struct sockaddr *)&ia->ia_prefixmask,
-				rt->rt_flags, (struct rtentry **)0);
-			rtfree(rt);
+		/* hold a reference for this routine */
+		IFA_ADDREF_LOCKED(ifa);
+		/* remove from the linked list */
+		if_detach_ifa(ifp, ifa);
+		IFA_UNLOCK(ifa);
+
+		/*
+		 * Leaving the multicast group(s) may involve freeing the
+		 * link address multicast structure(s) for the interface,
+		 * which is protected by ifnet lock.  To avoid violating
+		 * lock ordering, we must drop ifnet lock before doing so.
+		 * The ifa won't go away since we held a refcnt above.
+		 */
+		ifnet_lock_done(ifp);
+
+		/*
+		 * We have to do this work manually here instead of calling
+		 * in6_purgeaddr() since in6_purgeaddr() uses the RTM_HOST flag.
+		 */
+
+		/*
+		 * leave from multicast groups we have joined for the interface
+		 */
+		IFA_LOCK(ifa);
+		while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+			LIST_REMOVE(imm, i6mm_chain);
+			IFA_UNLOCK(ifa);
+			in6_leavegroup(imm);
+			IFA_LOCK(ifa);
 		}
 
-		/* remove from the linked list */
-		if_detach_ifa(ifp, &ia->ia_ifa);
+		/* remove from the routing table */
+		if (ia->ia_flags & IFA_ROUTE) {
+			IFA_UNLOCK(ifa);
+			rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0);
+			if (rt != NULL) {
+				(void) rtrequest(RTM_DELETE,
+					(struct sockaddr *)&ia->ia_addr,
+					(struct sockaddr *)&ia->ia_addr,
+					(struct sockaddr *)&ia->ia_prefixmask,
+					rt->rt_flags, (struct rtentry **)0);
+				rtfree(rt);
+			}
+		} else {
+			IFA_UNLOCK(ifa);
+		}
 
 		/* also remove from the IPv6 address chain(itojun&jinmei) */
+		unlinked = 1;
 		oia = ia;
-		lck_mtx_lock(nd6_mutex);
-		if (oia == (ia = in6_ifaddrs))
+		lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+		if (oia == (ia = in6_ifaddrs)) {
 			in6_ifaddrs = ia->ia_next;
-		else {
+		} else {
 			while (ia->ia_next && (ia->ia_next != oia))
 				ia = ia->ia_next;
-			if (ia->ia_next)
+			if (ia->ia_next) {
 				ia->ia_next = oia->ia_next;
-			else {
-				nd6log((LOG_ERR, 
+			} else {
+				nd6log((LOG_ERR,
 				    "%s: didn't unlink in6ifaddr from "
 				    "list\n", if_name(ifp)));
+				unlinked = 0;
 			}
 		}
-		lck_mtx_unlock(nd6_mutex);
+		lck_rw_done(&in6_ifaddr_rwlock);
+
+		ifa = &oia->ia_ifa;
+		/*
+		 * release another refcnt for the link from in6_ifaddrs.
+		 * Do this only if it's not already unlinked in the event
+		 * that we lost the race, since in6_ifaddr_rwlock was momentarily
+		 * dropped above.
+		 */
+		if (unlinked)
+			IFA_REMREF(ifa);
+		/* release reference held for this routine */
+		IFA_REMREF(ifa);
 
-		ifafree(&oia->ia_ifa);
+		/*
+		 * This is suboptimal, but since we dropped ifnet lock above
+		 * the list might have changed.  Repeat the search from the
+		 * beginning until we find the first eligible IPv6 address.
+		 */
+		ifnet_lock_exclusive(ifp);
+		ifa = TAILQ_FIRST(&ifp->if_addrlist);
 	}
 	ifnet_lock_done(ifp);
 
@@ -1128,7 +1148,7 @@ in6_tmpaddrtimer(
 		bzero(nullbuf, sizeof(nullbuf));
 		for (i = 1; i < nd_ifinfo_indexlim + 1; i++) {
 			ndi = &nd_ifinfo[i];
-			if (ndi->flags != ND6_IFF_PERFORMNUD)
+			if ((ndi->flags | ND6_IFF_PERFORMNUD) != ND6_IFF_PERFORMNUD)
 				continue;
 			if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
 				/*
diff --git a/bsd/netinet6/in6_ifattach.h b/bsd/netinet6/in6_ifattach.h
index 7fa627f2d..40ffa0379 100644
--- a/bsd/netinet6/in6_ifattach.h
+++ b/bsd/netinet6/in6_ifattach.h
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
 /*	$KAME: in6_ifattach.h,v 1.4 2000/02/22 14:04:18 itojun Exp $	*/
 
 /*
@@ -34,13 +61,12 @@
 #include <sys/appleapiopts.h>
 
 #ifdef KERNEL_PRIVATE
-void in6_nigroup_attach(const char *, int);
-void in6_nigroup_detach(const char *, int);
-int in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *);
-void in6_ifdetach(struct ifnet *);
-void in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
-void in6_tmpaddrtimer(void *);
-int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
+extern int in6_domifattach(struct ifnet *);
+extern int in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *);
+extern void in6_ifdetach(struct ifnet *);
+extern void in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int);
+extern void in6_tmpaddrtimer(void *);
+extern int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *);
 #endif /* KERNEL_PRIVATE */
 
 #endif /* _NETINET6_IN6_IFATTACH_H_ */
diff --git a/bsd/netinet6/in6_mcast.c b/bsd/netinet6/in6_mcast.c
new file mode 100644
index 000000000..05670d211
--- /dev/null
+++ b/bsd/netinet6/in6_mcast.c
@@ -0,0 +1,3490 @@
+/*
+ * Copyright (c) 2010-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * Copyright (c) 2009 Bruce Simpson.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * IPv6 multicast socket, group, and socket option processing module.
+ * Normative references: RFC 2292, RFC 3492, RFC 3542, RFC 3678, RFC 3810.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/sysctl.h>
+#include <sys/tree.h>
+#include <sys/mcache.h>
+
+#include <kern/zalloc.h>
+
+#include <pexpert/pexpert.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet6/nd6.h>
+#include <netinet6/mld6_var.h>
+#include <netinet6/scope6_var.h>
+
+#ifndef __SOCKUNION_DECLARED
+union sockunion {
+	struct sockaddr_storage	ss;
+	struct sockaddr		sa;
+	struct sockaddr_dl	sdl;
+	struct sockaddr_in6	sin6;
+};
+typedef union sockunion sockunion_t;
+#define __SOCKUNION_DECLARED
+#endif /* __SOCKUNION_DECLARED */
+
+static void	im6f_commit(struct in6_mfilter *);
+static int	im6f_get_source(struct in6_mfilter *imf,
+		    const struct sockaddr_in6 *psin,
+		    struct in6_msource **);
+static struct in6_msource *
+		im6f_graft(struct in6_mfilter *, const uint8_t,
+		    const struct sockaddr_in6 *);
+static int	im6f_prune(struct in6_mfilter *, const struct sockaddr_in6 *);
+static void	im6f_rollback(struct in6_mfilter *);
+static void	im6f_reap(struct in6_mfilter *);
+static int	im6o_grow(struct ip6_moptions *, size_t);
+static size_t	im6o_match_group(const struct ip6_moptions *,
+		    const struct ifnet *, const struct sockaddr *);
+static struct in6_msource *
+		im6o_match_source(const struct ip6_moptions *, const size_t,
+		    const struct sockaddr *);
+static void	im6s_merge(struct ip6_msource *ims,
+		    const struct in6_msource *lims, const int rollback);
+static int	in6_mc_get(struct ifnet *, const struct in6_addr *,
+		    struct in6_multi **);
+static int	in6m_get_source(struct in6_multi *inm,
+		    const struct in6_addr *addr, const int noalloc,
+		    struct ip6_msource **pims);
+static int	in6m_is_ifp_detached(const struct in6_multi *);
+static int	in6m_merge(struct in6_multi *, /*const*/ struct in6_mfilter *);
+static void	in6m_reap(struct in6_multi *);
+static struct ip6_moptions *
+		in6p_findmoptions(struct inpcb *);
+static int	in6p_get_source_filters(struct inpcb *, struct sockopt *);
+static int	in6p_lookup_v4addr(struct ipv6_mreq *, struct ip_mreq *);
+static int	in6p_join_group(struct inpcb *, struct sockopt *);
+static int	in6p_leave_group(struct inpcb *, struct sockopt *);
+static struct ifnet *
+		in6p_lookup_mcast_ifp(const struct inpcb *,
+		    const struct sockaddr_in6 *);
+static int	in6p_block_unblock_source(struct inpcb *, struct sockopt *);
+static int	in6p_set_multicast_if(struct inpcb *, struct sockopt *);
+static int	in6p_set_source_filters(struct inpcb *, struct sockopt *);
+static int	sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS;
+static __inline__ int ip6_msource_cmp(const struct ip6_msource *,
+		    const struct ip6_msource *);
+
+SYSCTL_DECL(_net_inet6_ip6);	/* XXX Not in any common header. */
+
+SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, mcast, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPv6 multicast");
+
+static unsigned long in6_mcast_maxgrpsrc = IPV6_MAX_GROUP_SRC_FILTER;
+SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxgrpsrc,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxgrpsrc, 
+    "Max source filters per group");
+
+static unsigned long in6_mcast_maxsocksrc = IPV6_MAX_SOCK_SRC_FILTER;
+SYSCTL_LONG(_net_inet6_ip6_mcast, OID_AUTO, maxsocksrc,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &in6_mcast_maxsocksrc, 
+    "Max source filters per socket");
+
+int in6_mcast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
+SYSCTL_INT(_net_inet6_ip6_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &in6_mcast_loop, 0, "Loopback multicast datagrams by default");
+
+SYSCTL_NODE(_net_inet6_ip6_mcast, OID_AUTO, filters,
+    CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_ip6_mcast_filters,
+    "Per-interface stack-wide source filters");
+
+RB_GENERATE_PREV(ip6_msource_tree, ip6_msource, im6s_link, ip6_msource_cmp);
+
+#define	IN6M_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int in6m_trace_hist_size = IN6M_TRACE_HIST_SIZE;
+
+struct in6_multi_dbg {
+	struct in6_multi	in6m;			/* in6_multi */
+	u_int16_t		in6m_refhold_cnt;	/* # of ref */
+	u_int16_t		in6m_refrele_cnt;	/* # of rele */
+	/*
+	 * Circular lists of in6m_addref and in6m_remref callers.
+	 */
+	ctrace_t		in6m_refhold[IN6M_TRACE_HIST_SIZE];
+	ctrace_t		in6m_refrele[IN6M_TRACE_HIST_SIZE];
+	/*
+	 * Trash list linkage
+	 */
+	TAILQ_ENTRY(in6_multi_dbg) in6m_trash_link;
+};
+
+/* List of trash in6_multi entries protected by in6m_trash_lock */
+static TAILQ_HEAD(, in6_multi_dbg) in6m_trash_head;
+static decl_lck_mtx_data(, in6m_trash_lock);
+
+#if DEBUG
+static unsigned int in6m_debug = 1;		/* debugging (enabled) */
+#else
+static unsigned int in6m_debug;			/* debugging (disabled) */
+#endif /* !DEBUG */
+static unsigned int in6m_size;			/* size of zone element */
+static struct zone *in6m_zone;			/* zone for in6_multi */
+
+#define	IN6M_ZONE_MAX		64		/* maximum elements in zone */
+#define	IN6M_ZONE_NAME		"in6_multi"	/* zone name */
+
+static unsigned int imm_size;			/* size of zone element */
+static struct zone *imm_zone;			/* zone for in6_multi_mship */
+
+#define	IMM_ZONE_MAX		64		/* maximum elements in zone */
+#define	IMM_ZONE_NAME		"in6_multi_mship" /* zone name */
+
+#define	IP6MS_ZONE_MAX		64		/* maximum elements in zone */
+#define	IP6MS_ZONE_NAME		"ip6_msource"	/* zone name */
+
+static unsigned int ip6ms_size;			/* size of zone element */
+static struct zone *ip6ms_zone;			/* zone for ip6_msource */
+
+#define	IN6MS_ZONE_MAX		64		/* maximum elements in zone */
+#define	IN6MS_ZONE_NAME		"in6_msource"	/* zone name */
+
+static unsigned int in6ms_size;			/* size of zone element */
+static struct zone *in6ms_zone;			/* zone for in6_msource */
+
+/* Lock group and attribute for in6_multihead_lock lock */
+static lck_attr_t	*in6_multihead_lock_attr;
+static lck_grp_t	*in6_multihead_lock_grp;
+static lck_grp_attr_t	*in6_multihead_lock_grp_attr;
+
+static decl_lck_rw_data(, in6_multihead_lock);
+struct in6_multihead in6_multihead;
+
+static struct in6_multi *in6_multi_alloc(int);
+static void in6_multi_free(struct in6_multi *);
+static void in6_multi_attach(struct in6_multi *);
+static struct in6_multi_mship *in6_multi_mship_alloc(int);
+static void in6_multi_mship_free(struct in6_multi_mship *);
+static void in6m_trace(struct in6_multi *, int);
+
+static struct ip6_msource *ip6ms_alloc(int);
+static void ip6ms_free(struct ip6_msource *);
+static struct in6_msource *in6ms_alloc(int);
+static void in6ms_free(struct in6_msource *);
+
+#define	IM6O_CAST_TO_NONCONST(x) ((struct ip6_moptions *)(void *)(uintptr_t)x)
+#define	IN6M_CAST_TO_NONCONST(x) ((struct in6_multi *)(void *)(uintptr_t)x)
+
+/*
+ * IPv6 source tree comparison function.
+ *
+ * An ordered predicate is necessary; bcmp() is not documented to return
+ * an indication of order, memcmp() is, and is an ISO C99 requirement.
+ */
+static __inline int
+ip6_msource_cmp(const struct ip6_msource *a, const struct ip6_msource *b)
+{
+	return (memcmp(&a->im6s_addr, &b->im6s_addr, sizeof(struct in6_addr)));
+}
+
+/*
+ * Inline function which wraps assertions for a valid ifp.
+ */
+static __inline__ int
+in6m_is_ifp_detached(const struct in6_multi *inm)
+{
+	VERIFY(inm->in6m_ifma != NULL);
+	VERIFY(inm->in6m_ifp == inm->in6m_ifma->ifma_ifp);
+
+	return (!ifnet_is_attached(inm->in6m_ifp, 0));
+}
+
+/*
+ * Initialize an in6_mfilter structure to a known state at t0, t1
+ * with an empty source filter list.
+ */
+static __inline__ void
+im6f_init(struct in6_mfilter *imf, const int st0, const int st1)
+{
+	memset(imf, 0, sizeof(struct in6_mfilter));
+	RB_INIT(&imf->im6f_sources);
+	imf->im6f_st[0] = st0;
+	imf->im6f_st[1] = st1;
+}
+
+/*
+ * Resize the ip6_moptions vector to the next power-of-two minus 1.
+ */
+static int
+im6o_grow(struct ip6_moptions *imo, size_t newmax)
+{
+	struct in6_multi	**nmships;
+	struct in6_multi	**omships;
+	struct in6_mfilter	 *nmfilters;
+	struct in6_mfilter	 *omfilters;
+	size_t			  idx;
+	size_t			  oldmax;
+
+	IM6O_LOCK_ASSERT_HELD(imo);
+
+	nmships = NULL;
+	nmfilters = NULL;
+	omships = imo->im6o_membership;
+	omfilters = imo->im6o_mfilters;
+	oldmax = imo->im6o_max_memberships;
+	if (newmax == 0)
+		newmax = ((oldmax + 1) * 2) - 1;
+
+	if (newmax > IPV6_MAX_MEMBERSHIPS)
+		return (ETOOMANYREFS);
+
+	if ((nmships = (struct in6_multi **)_REALLOC(omships,
+	    sizeof (struct in6_multi *) * newmax, M_IP6MOPTS,
+	    M_WAITOK | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	imo->im6o_membership = nmships;
+
+	if ((nmfilters = (struct in6_mfilter *)_REALLOC(omfilters,
+	    sizeof (struct in6_mfilter) * newmax, M_IN6MFILTER,
+	    M_WAITOK | M_ZERO)) == NULL)
+		return (ENOMEM);
+
+	imo->im6o_mfilters = nmfilters;
+
+	/* Initialize newly allocated source filter heads. */
+	for (idx = oldmax; idx < newmax; idx++)
+		im6f_init(&nmfilters[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
+
+	imo->im6o_max_memberships = newmax;
+
+	return (0);
+}
+
+/*
+ * Find an IPv6 multicast group entry for this ip6_moptions instance
+ * which matches the specified group, and optionally an interface.
+ * Return its index into the array, or -1 if not found.
+ */
+static size_t
+im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group)
+{
+	const struct sockaddr_in6 *gsin6;
+	struct in6_multi *pinm;
+	int		  idx;
+	int		  nmships;
+
+	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
+
+	gsin6 = (const struct sockaddr_in6 *)group;
+
+	/* The im6o_membership array may be lazy allocated. */
+	if (imo->im6o_membership == NULL || imo->im6o_num_memberships == 0)
+		return (-1);
+
+	nmships = imo->im6o_num_memberships;
+	for (idx = 0; idx < nmships; idx++) {
+		pinm = imo->im6o_membership[idx];
+		if (pinm == NULL)
+			continue;
+		IN6M_LOCK(pinm);
+		if ((ifp == NULL || (pinm->in6m_ifp == ifp)) &&
+		    IN6_ARE_ADDR_EQUAL(&pinm->in6m_addr,
+		    &gsin6->sin6_addr)) {
+			IN6M_UNLOCK(pinm);
+			break;
+		}
+		IN6M_UNLOCK(pinm);
+	}
+	if (idx >= nmships)
+		idx = -1;
+
+	return (idx);
+}
+
+/*
+ * Find an IPv6 multicast source entry for this imo which matches
+ * the given group index for this socket, and source address.
+ *
+ * XXX TODO: The scope ID, if present in src, is stripped before
+ * any comparison. We SHOULD enforce scope/zone checks where the source
+ * filter entry has a link scope.
+ *
+ * NOTE: This does not check if the entry is in-mode, merely if
+ * it exists, which may not be the desired behaviour.
+ */
+static struct in6_msource *
+im6o_match_source(const struct ip6_moptions *imo, const size_t gidx,
+    const struct sockaddr *src)
+{
+	struct ip6_msource	 find;
+	struct in6_mfilter	*imf;
+	struct ip6_msource	*ims;
+	const sockunion_t	*psa;
+
+	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
+
+	VERIFY(src->sa_family == AF_INET6);
+	VERIFY(gidx != (size_t)-1 && gidx < imo->im6o_num_memberships);
+
+	/* The im6o_mfilters array may be lazy allocated. */
+	if (imo->im6o_mfilters == NULL)
+		return (NULL);
+	imf = &imo->im6o_mfilters[gidx];
+
+	psa = (const sockunion_t *)src;
+	find.im6s_addr = psa->sin6.sin6_addr;
+	in6_clearscope(&find.im6s_addr);		/* XXX */
+	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
+
+	return ((struct in6_msource *)ims);
+}
+
+/*
+ * Perform filtering for multicast datagrams on a socket by group and source.
+ *
+ * Returns 0 if a datagram should be allowed through, or various error codes
+ * if the socket was not a member of the group, or the source was muted, etc.
+ */
+int
+im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp,
+    const struct sockaddr *group, const struct sockaddr *src)
+{
+	size_t gidx;
+	struct in6_msource *ims;
+	int mode;
+
+	IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo));
+	VERIFY(ifp != NULL);
+
+	gidx = im6o_match_group(imo, ifp, group);
+	if (gidx == (size_t)-1)
+		return (MCAST_NOTGMEMBER);
+
+	/*
+	 * Check if the source was included in an (S,G) join.
+	 * Allow reception on exclusive memberships by default,
+	 * reject reception on inclusive memberships by default.
+	 * Exclude source only if an in-mode exclude filter exists.
+	 * Include source only if an in-mode include filter exists.
+	 * NOTE: We are comparing group state here at MLD t1 (now)
+	 * with socket-layer t0 (since last downcall).
+	 */
+	mode = imo->im6o_mfilters[gidx].im6f_st[1];
+	ims = im6o_match_source(imo, gidx, src);
+
+	if ((ims == NULL && mode == MCAST_INCLUDE) ||
+	    (ims != NULL && ims->im6sl_st[0] != mode))
+		return (MCAST_NOTSMEMBER);
+
+	return (MCAST_PASS);
+}
+
+/*
+ * Find and return a reference to an in6_multi record for (ifp, group),
+ * and bump its reference count.
+ * If one does not exist, try to allocate it, and update link-layer multicast
+ * filters on ifp to listen for group.
+ * Assumes the IN6_MULTI lock is held across the call.
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+in6_mc_get(struct ifnet *ifp, const struct in6_addr *group,
+    struct in6_multi **pinm)
+{
+	struct sockaddr_in6	 gsin6;
+	struct ifmultiaddr	*ifma;
+	struct in6_multi	*inm;
+	int			 error;
+
+	*pinm = NULL;
+
+	in6_multihead_lock_shared();
+	IN6_LOOKUP_MULTI(group, ifp, inm);
+	if (inm != NULL) {
+		IN6M_LOCK(inm);
+		VERIFY(inm->in6m_reqcnt >= 1);
+		inm->in6m_reqcnt++;
+		VERIFY(inm->in6m_reqcnt != 0);
+		*pinm = inm;
+		IN6M_UNLOCK(inm);
+		in6_multihead_lock_done();
+		/*
+		 * We already joined this group; return the in6m
+		 * with a refcount held (via lookup) for caller.
+		 */
+		return (0);
+	}
+	in6_multihead_lock_done();
+
+	memset(&gsin6, 0, sizeof(gsin6));
+	gsin6.sin6_family = AF_INET6;
+	gsin6.sin6_len = sizeof(struct sockaddr_in6);
+	gsin6.sin6_addr = *group;
+
+	/*
+	 * Check if a link-layer group is already associated
+	 * with this network-layer group on the given ifnet.
+	 */
+	error = if_addmulti(ifp, (struct sockaddr *)&gsin6, &ifma);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * See comments in in6m_remref() for access to ifma_protospec.
+	 */
+	in6_multihead_lock_exclusive();
+	IFMA_LOCK(ifma);
+	if ((inm = ifma->ifma_protospec) != NULL) {
+		VERIFY(ifma->ifma_addr != NULL);
+		VERIFY(ifma->ifma_addr->sa_family == AF_INET6);
+		IN6M_ADDREF(inm);	/* for caller */
+		IFMA_UNLOCK(ifma);
+		IN6M_LOCK(inm);
+		VERIFY(inm->in6m_ifma == ifma);
+		VERIFY(inm->in6m_ifp == ifp);
+		VERIFY(IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, group));
+		if (inm->in6m_debug & IFD_ATTACHED) {
+			VERIFY(inm->in6m_reqcnt >= 1);
+			inm->in6m_reqcnt++;
+			VERIFY(inm->in6m_reqcnt != 0);
+			*pinm = inm;
+			IN6M_UNLOCK(inm);
+			in6_multihead_lock_done();
+			IFMA_REMREF(ifma);
+			/*
+			 * We lost the race with another thread doing
+			 * in6_mc_get(); since this group has already
+			 * been joined; return the inm with a refcount
+			 * held for caller.
+			 */
+			return (0);
+		}
+		/*
+		 * We lost the race with another thread doing in6_delmulti();
+		 * the inm referring to the ifma has been detached, thus we
+		 * reattach it back to the in6_multihead list, and return the
+		 * inm with a refcount held for the caller.
+		 */
+		in6_multi_attach(inm);
+		VERIFY((inm->in6m_debug &
+		    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
+		*pinm = inm;
+		IN6M_UNLOCK(inm);
+		in6_multihead_lock_done();
+		IFMA_REMREF(ifma);
+		return (0);
+	}
+	IFMA_UNLOCK(ifma);
+
+	/*
+	 * A new in6_multi record is needed; allocate and initialize it.
+	 * We DO NOT perform an MLD join as the in6_ layer may need to
+	 * push an initial source list down to MLD to support SSM.
+	 *
+	 * The initial source filter state is INCLUDE, {} as per the RFC.
+	 * Pending state-changes per group are subject to a bounds check.
+	 */
+	inm = in6_multi_alloc(M_WAITOK);
+	if (inm == NULL) {
+		in6_multihead_lock_done();
+		IFMA_REMREF(ifma);
+		return (ENOMEM);
+	}
+	IN6M_LOCK(inm);
+	inm->in6m_addr = *group;
+	inm->in6m_ifp = ifp;
+	inm->in6m_mli = MLD_IFINFO(ifp);
+	VERIFY(inm->in6m_mli != NULL);
+	MLI_ADDREF(inm->in6m_mli);
+	inm->in6m_ifma = ifma;		/* keep refcount from if_addmulti() */
+	inm->in6m_state = MLD_NOT_MEMBER;
+	/*
+	 * Pending state-changes per group are subject to a bounds check.
+	 */
+	inm->in6m_scq.ifq_maxlen = MLD_MAX_STATE_CHANGES;
+	inm->in6m_st[0].iss_fmode = MCAST_UNDEFINED;
+	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
+	RB_INIT(&inm->in6m_srcs);
+	*pinm = inm;
+	in6_multi_attach(inm);
+	VERIFY((inm->in6m_debug &
+	    (IFD_ATTACHED | IFD_TRASHED)) == IFD_ATTACHED);
+	IN6M_ADDREF_LOCKED(inm);	/* for caller */
+	IN6M_UNLOCK(inm);
+
+	IFMA_LOCK(ifma);
+	VERIFY(ifma->ifma_protospec == NULL);
+	ifma->ifma_protospec = inm;
+	IFMA_UNLOCK(ifma);
+	in6_multihead_lock_done();
+
+	return (0);
+}
+
+/*
+ * Clear recorded source entries for a group.
+ * Used by the MLD code. Caller must hold the IN6_MULTI lock.
+ * FIXME: Should reap.
+ */
+void
+in6m_clear_recorded(struct in6_multi *inm)
+{
+	struct ip6_msource	*ims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
+		if (ims->im6s_stp) {
+			ims->im6s_stp = 0;
+			--inm->in6m_st[1].iss_rec;
+		}
+	}
+	VERIFY(inm->in6m_st[1].iss_rec == 0);
+}
+
+/*
+ * Record a source as pending for a Source-Group MLDv2 query.
+ * This lives here as it modifies the shared tree.
+ *
+ * inm is the group descriptor.
+ * naddr is the address of the source to record in network-byte order.
+ *
+ * If the net.inet6.mld.sgalloc sysctl is non-zero, we will
+ * lazy-allocate a source node in response to an SG query.
+ * Otherwise, no allocation is performed. This saves some memory
+ * with the trade-off that the source will not be reported to the
+ * router if joined in the window between the query response and
+ * the group actually being joined on the local host.
+ *
+ * VIMAGE: XXX: Currently the mld_sgalloc feature has been removed.
+ * This turns off the allocation of a recorded source entry if
+ * the group has not been joined.
+ *
+ * Return 0 if the source didn't exist or was already marked as recorded.
+ * Return 1 if the source was marked as recorded by this function.
+ * Return <0 if any error occured (negated errno code).
+ */
+int
+in6m_record_source(struct in6_multi *inm, const struct in6_addr *addr)
+{
+	struct ip6_msource	 find;
+	struct ip6_msource	*ims, *nims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	find.im6s_addr = *addr;
+	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
+	if (ims && ims->im6s_stp)
+		return (0);
+	if (ims == NULL) {
+		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
+			return (-ENOSPC);
+		nims = ip6ms_alloc(M_WAITOK);
+		if (nims == NULL)
+			return (-ENOMEM);
+		nims->im6s_addr = find.im6s_addr;
+		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
+		++inm->in6m_nsrc;
+		ims = nims;
+	}
+
+	/*
+	 * Mark the source as recorded and update the recorded
+	 * source count.
+	 */
+	++ims->im6s_stp;
+	++inm->in6m_st[1].iss_rec;
+
+	return (1);
+}
+
+/*
+ * Return a pointer to an in6_msource owned by an in6_mfilter,
+ * given its source address.
+ * Lazy-allocate if needed. If this is a new entry its filter state is
+ * undefined at t0.
+ *
+ * imf is the filter set being modified.
+ * addr is the source address.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static int
+im6f_get_source(struct in6_mfilter *imf, const struct sockaddr_in6 *psin,
+    struct in6_msource **plims)
+{
+	struct ip6_msource	 find;
+	struct ip6_msource	*ims;
+	struct in6_msource	*lims;
+	int			 error;
+
+	error = 0;
+	ims = NULL;
+	lims = NULL;
+
+	find.im6s_addr = psin->sin6_addr;
+	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
+	lims = (struct in6_msource *)ims;
+	if (lims == NULL) {
+		if (imf->im6f_nsrc == in6_mcast_maxsocksrc)
+			return (ENOSPC);
+		lims = in6ms_alloc(M_WAITOK);
+		if (lims == NULL)
+			return (ENOMEM);
+		lims->im6s_addr = find.im6s_addr;
+		lims->im6sl_st[0] = MCAST_UNDEFINED;
+		RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
+		    (struct ip6_msource *)lims);
+		++imf->im6f_nsrc;
+	}
+
+	*plims = lims;
+
+	return (error);
+}
+
+/*
+ * Graft a source entry into an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being in the new filter mode at t1.
+ *
+ * Return the pointer to the new node, otherwise return NULL.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static struct in6_msource *
+im6f_graft(struct in6_mfilter *imf, const uint8_t st1,
+    const struct sockaddr_in6 *psin)
+{
+	struct in6_msource	*lims;
+
+	lims = in6ms_alloc(M_WAITOK);
+	if (lims == NULL)
+		return (NULL);
+	lims->im6s_addr = psin->sin6_addr;
+	lims->im6sl_st[0] = MCAST_UNDEFINED;
+	lims->im6sl_st[1] = st1;
+	RB_INSERT(ip6_msource_tree, &imf->im6f_sources,
+	    (struct ip6_msource *)lims);
+	++imf->im6f_nsrc;
+
+	return (lims);
+}
+
+/*
+ * Prune a source entry from an existing socket-layer filter set,
+ * maintaining any required invariants and checking allocations.
+ *
+ * The source is marked as being left at t1, it is not freed.
+ *
+ * Return 0 if no error occurred, otherwise return an errno value.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static int
+im6f_prune(struct in6_mfilter *imf, const struct sockaddr_in6 *psin)
+{
+	struct ip6_msource	 find;
+	struct ip6_msource	*ims;
+	struct in6_msource	*lims;
+
+	find.im6s_addr = psin->sin6_addr;
+	ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find);
+	if (ims == NULL)
+		return (ENOENT);
+	lims = (struct in6_msource *)ims;
+	lims->im6sl_st[1] = MCAST_UNDEFINED;
+	return (0);
+}
+
+/*
+ * Revert socket-layer filter set deltas at t1 to t0 state.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static void
+im6f_rollback(struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims, *tims;
+	struct in6_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
+		lims = (struct in6_msource *)ims;
+		if (lims->im6sl_st[0] == lims->im6sl_st[1]) {
+			/* no change at t1 */
+			continue;
+		} else if (lims->im6sl_st[0] != MCAST_UNDEFINED) {
+			/* revert change to existing source at t1 */
+			lims->im6sl_st[1] = lims->im6sl_st[0];
+		} else {
+			/* revert source added t1 */
+			MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
+			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
+			in6ms_free(lims);
+			imf->im6f_nsrc--;
+		}
+	}
+	imf->im6f_st[1] = imf->im6f_st[0];
+}
+
+/*
+ * Mark socket-layer filter set as INCLUDE {} at t1.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+void
+im6f_leave(struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims;
+	struct in6_msource	*lims;
+
+	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
+		lims = (struct in6_msource *)ims;
+		lims->im6sl_st[1] = MCAST_UNDEFINED;
+	}
+	imf->im6f_st[1] = MCAST_INCLUDE;
+}
+
+/*
+ * Mark socket-layer filter set deltas as committed.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static void
+im6f_commit(struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims;
+	struct in6_msource	*lims;
+
+	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
+		lims = (struct in6_msource *)ims;
+		lims->im6sl_st[0] = lims->im6sl_st[1];
+	}
+	imf->im6f_st[0] = imf->im6f_st[1];
+}
+
+/*
+ * Reap unreferenced sources from socket-layer filter set.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+static void
+im6f_reap(struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims, *tims;
+	struct in6_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
+		lims = (struct in6_msource *)ims;
+		if ((lims->im6sl_st[0] == MCAST_UNDEFINED) &&
+		    (lims->im6sl_st[1] == MCAST_UNDEFINED)) {
+			MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
+			RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
+			in6ms_free(lims);
+			imf->im6f_nsrc--;
+		}
+	}
+}
+
+/*
+ * Purge socket-layer filter set.
+ *
+ * Caller is expected to be holding im6o_lock.
+ */
+void
+im6f_purge(struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims, *tims;
+	struct in6_msource	*lims;
+
+	RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) {
+		lims = (struct in6_msource *)ims;
+		MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims));
+		RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims);
+		in6ms_free(lims);
+		imf->im6f_nsrc--;
+	}
+	imf->im6f_st[0] = imf->im6f_st[1] = MCAST_UNDEFINED;
+	VERIFY(RB_EMPTY(&imf->im6f_sources));
+}
+
+/*
+ * Look up a source filter entry for a multicast group.
+ *
+ * inm is the group descriptor to work with.
+ * addr is the IPv6 address to look up.
+ * noalloc may be non-zero to suppress allocation of sources.
+ * *pims will be set to the address of the retrieved or allocated source.
+ *
+ * Return 0 if successful, otherwise return a non-zero error code.
+ */
+static int
+in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr,
+    const int noalloc, struct ip6_msource **pims)
+{
+	struct ip6_msource	 find;
+	struct ip6_msource	*ims, *nims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	find.im6s_addr = *addr;
+	ims = RB_FIND(ip6_msource_tree, &inm->in6m_srcs, &find);
+	if (ims == NULL && !noalloc) {
+		if (inm->in6m_nsrc == in6_mcast_maxgrpsrc)
+			return (ENOSPC);
+		nims = ip6ms_alloc(M_WAITOK);
+		if (nims == NULL)
+			return (ENOMEM);
+		nims->im6s_addr = *addr;
+		RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims);
+		++inm->in6m_nsrc;
+		ims = nims;
+		MLD_PRINTF(("%s: allocated %s as %p\n", __func__,
+		    ip6_sprintf(addr), ims));
+	}
+
+	*pims = ims;
+	return (0);
+}
+
+/*
+ * Helper function to derive the filter mode on a source entry
+ * from its internal counters. Predicates are:
+ *  A source is only excluded if all listeners exclude it.
+ *  A source is only included if no listeners exclude it,
+ *  and at least one listener includes it.
+ * May be used by ifmcstat(8).
+ */
+uint8_t
+im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims,
+    uint8_t t)
+{
+	IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm));
+
+	t = !!t;
+	if (inm->in6m_st[t].iss_ex > 0 &&
+	    inm->in6m_st[t].iss_ex == ims->im6s_st[t].ex)
+		return (MCAST_EXCLUDE);
+	else if (ims->im6s_st[t].in > 0 && ims->im6s_st[t].ex == 0)
+		return (MCAST_INCLUDE);
+	return (MCAST_UNDEFINED);
+}
+
+/*
+ * Merge socket-layer source into MLD-layer source.
+ * If rollback is non-zero, perform the inverse of the merge.
+ */
+static void
+im6s_merge(struct ip6_msource *ims, const struct in6_msource *lims,
+    const int rollback)
+{
+	int n = rollback ? -1 : 1;
+
+	if (lims->im6sl_st[0] == MCAST_EXCLUDE) {
+		MLD_PRINTF(("%s: t1 ex -= %d on %s\n", __func__, n,
+		    ip6_sprintf(&lims->im6s_addr)));
+		ims->im6s_st[1].ex -= n;
+	} else if (lims->im6sl_st[0] == MCAST_INCLUDE) {
+		MLD_PRINTF(("%s: t1 in -= %d on %s\n", __func__, n,
+		    ip6_sprintf(&lims->im6s_addr)));
+		ims->im6s_st[1].in -= n;
+	}
+
+	if (lims->im6sl_st[1] == MCAST_EXCLUDE) {
+		MLD_PRINTF(("%s: t1 ex += %d on %s\n", __func__, n,
+		    ip6_sprintf(&lims->im6s_addr)));
+		ims->im6s_st[1].ex += n;
+	} else if (lims->im6sl_st[1] == MCAST_INCLUDE) {
+		MLD_PRINTF(("%s: t1 in += %d on %s\n", __func__, n,
+		    ip6_sprintf(&lims->im6s_addr)));
+		ims->im6s_st[1].in += n;
+	}
+}
+
+/*
+ * Atomically update the global in6_multi state, when a membership's
+ * filter list is being updated in any way.
+ *
+ * imf is the per-inpcb-membership group filter pointer.
+ * A fake imf may be passed for in-kernel consumers.
+ *
+ * XXX This is a candidate for a set-symmetric-difference style loop
+ * which would eliminate the repeated lookup from root of ims nodes,
+ * as they share the same key space.
+ *
+ * If any error occurred this function will back out of refcounts
+ * and return a non-zero value.
+ */
+static int
+in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
+{
+	struct ip6_msource	*ims, *nims;
+	struct in6_msource	*lims;
+	int			 schanged, error;
+	int			 nsrc0, nsrc1;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	schanged = 0;
+	error = 0;
+	nsrc1 = nsrc0 = 0;
+
+	/*
+	 * Update the source filters first, as this may fail.
+	 * Maintain count of in-mode filters at t0, t1. These are
+	 * used to work out if we transition into ASM mode or not.
+	 * Maintain a count of source filters whose state was
+	 * actually modified by this operation.
+	 */
+	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
+		lims = (struct in6_msource *)ims;
+		if (lims->im6sl_st[0] == imf->im6f_st[0]) nsrc0++;
+		if (lims->im6sl_st[1] == imf->im6f_st[1]) nsrc1++;
+		if (lims->im6sl_st[0] == lims->im6sl_st[1]) continue;
+		error = in6m_get_source(inm, &lims->im6s_addr, 0, &nims);
+		++schanged;
+		if (error)
+			break;
+		im6s_merge(nims, lims, 0);
+	}
+	if (error) {
+		struct ip6_msource *bims;
+
+		RB_FOREACH_REVERSE_FROM(ims, ip6_msource_tree, nims) {
+			lims = (struct in6_msource *)ims;
+			if (lims->im6sl_st[0] == lims->im6sl_st[1])
+				continue;
+			(void) in6m_get_source(inm, &lims->im6s_addr, 1, &bims);
+			if (bims == NULL)
+				continue;
+			im6s_merge(bims, lims, 1);
+		}
+		goto out_reap;
+	}
+
+	MLD_PRINTF(("%s: imf filters in-mode: %d at t0, %d at t1\n",
+	    __func__, nsrc0, nsrc1));
+
+	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
+	if (imf->im6f_st[0] == imf->im6f_st[1] &&
+	    imf->im6f_st[1] == MCAST_INCLUDE) {
+		if (nsrc1 == 0) {
+			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
+			--inm->in6m_st[1].iss_in;
+		}
+	}
+
+	/* Handle filter mode transition on socket. */
+	if (imf->im6f_st[0] != imf->im6f_st[1]) {
+		MLD_PRINTF(("%s: imf transition %d to %d\n",
+		    __func__, imf->im6f_st[0], imf->im6f_st[1]));
+
+		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
+			MLD_PRINTF(("%s: --ex on inm at t1\n", __func__));
+			--inm->in6m_st[1].iss_ex;
+		} else if (imf->im6f_st[0] == MCAST_INCLUDE) {
+			MLD_PRINTF(("%s: --in on inm at t1\n", __func__));
+			--inm->in6m_st[1].iss_in;
+		}
+
+		if (imf->im6f_st[1] == MCAST_EXCLUDE) {
+			MLD_PRINTF(("%s: ex++ on inm at t1\n", __func__));
+			inm->in6m_st[1].iss_ex++;
+		} else if (imf->im6f_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
+			MLD_PRINTF(("%s: in++ on inm at t1\n", __func__));
+			inm->in6m_st[1].iss_in++;
+		}
+	}
+
+	/*
+	 * Track inm filter state in terms of listener counts.
+	 * If there are any exclusive listeners, stack-wide
+	 * membership is exclusive.
+	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
+	 * If no listeners remain, state is undefined at t1,
+	 * and the MLD lifecycle for this group should finish.
+	 */
+	if (inm->in6m_st[1].iss_ex > 0) {
+		MLD_PRINTF(("%s: transition to EX\n", __func__));
+		inm->in6m_st[1].iss_fmode = MCAST_EXCLUDE;
+	} else if (inm->in6m_st[1].iss_in > 0) {
+		MLD_PRINTF(("%s: transition to IN\n", __func__));
+		inm->in6m_st[1].iss_fmode = MCAST_INCLUDE;
+	} else {
+		MLD_PRINTF(("%s: transition to UNDEF\n", __func__));
+		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
+	}
+
+	/* Decrement ASM listener count on transition out of ASM mode. */
+	if (imf->im6f_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
+		if ((imf->im6f_st[1] != MCAST_EXCLUDE) ||
+		    (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
+			MLD_PRINTF(("%s: --asm on inm at t1\n", __func__));
+			--inm->in6m_st[1].iss_asm;
+		}
+	}
+
+	/* Increment ASM listener count on transition to ASM mode. */
+	if (imf->im6f_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
+		MLD_PRINTF(("%s: asm++ on inm at t1\n", __func__));
+		inm->in6m_st[1].iss_asm++;
+	}
+
+	MLD_PRINTF(("%s: merged imf %p to inm %p\n", __func__, imf, inm));
+	in6m_print(inm);
+
+out_reap:
+	if (schanged > 0) {
+		MLD_PRINTF(("%s: sources changed; reaping\n", __func__));
+		in6m_reap(inm);
+	}
+	return (error);
+}
+
+/*
+ * Mark an in6_multi's filter set deltas as committed.
+ * Called by MLD after a state change has been enqueued.
+ */
+void
+in6m_commit(struct in6_multi *inm)
+{
+	struct ip6_msource	*ims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	MLD_PRINTF(("%s: commit inm %p\n", __func__, inm));
+	MLD_PRINTF(("%s: pre commit:\n", __func__));
+	in6m_print(inm);
+
+	RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
+		ims->im6s_st[0] = ims->im6s_st[1];
+	}
+	inm->in6m_st[0] = inm->in6m_st[1];
+}
+
+/*
+ * Reap unreferenced nodes from an in6_multi's filter set.
+ */
+static void
+in6m_reap(struct in6_multi *inm)
+{
+	struct ip6_msource	*ims, *tims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
+		if (ims->im6s_st[0].ex > 0 || ims->im6s_st[0].in > 0 ||
+		    ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 ||
+		    ims->im6s_stp != 0)
+			continue;
+		MLD_PRINTF(("%s: free ims %p\n", __func__, ims));
+		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
+		ip6ms_free(ims);
+		inm->in6m_nsrc--;
+	}
+}
+
+/*
+ * Purge all source nodes from an in6_multi's filter set.
+ */
+void
+in6m_purge(struct in6_multi *inm)
+{
+	struct ip6_msource	*ims, *tims;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) {
+		MLD_PRINTF(("%s: free ims %p\n", __func__, ims));
+		RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims);
+		ip6ms_free(ims);
+		inm->in6m_nsrc--;
+	}
+}
+
+/*
+ * Join a multicast address w/o sources.
+ * KAME compatibility entry point.
+ *
+ */
+struct in6_multi_mship *
+in6_joingroup(struct ifnet *ifp, struct in6_addr *mcaddr,
+    int *errorp, int delay)
+{
+	struct in6_multi_mship *imm;
+	int error;
+
+	*errorp = 0;
+
+	imm = in6_multi_mship_alloc(M_WAITOK);
+	if (imm == NULL) {
+		*errorp = ENOBUFS;
+		return (NULL);
+	}
+
+	delay = (delay * PR_SLOWHZ) / hz;
+
+	error = in6_mc_join(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay);
+	if (error) {
+		*errorp = error;
+		in6_multi_mship_free(imm);
+		return (NULL);
+	}
+
+	return (imm);
+}
+
+/*
+ * Leave a multicast address w/o sources.
+ * KAME compatibility entry point.
+ */
+int
+in6_leavegroup(struct in6_multi_mship *imm)
+{
+	if (imm->i6mm_maddr != NULL) {
+		in6_mc_leave(imm->i6mm_maddr, NULL);
+		IN6M_REMREF(imm->i6mm_maddr);
+		imm->i6mm_maddr = NULL;
+	}
+	in6_multi_mship_free(imm);
+	return 0;
+}
+
+/*
+ * Join a multicast group; real entry point.
+ *
+ * Only preserves atomicity at inm level.
+ * NOTE: imf argument cannot be const due to sys/tree.h limitations.
+ *
+ * If the MLD downcall fails, the group is not joined, and an error
+ * code is returned.
+ */
+int
+in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr,
+    /*const*/ struct in6_mfilter *imf, struct in6_multi **pinm,
+    const int delay)
+{
+	struct in6_mfilter	 timf;
+	struct in6_multi	*inm = NULL;
+	int			 error = 0;
+
+	/*
+	 * Sanity: Check scope zone ID was set for ifp, if and
+	 * only if group is scoped to an interface.
+	 */
+	VERIFY(IN6_IS_ADDR_MULTICAST(mcaddr));
+	if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) {
+		VERIFY(mcaddr->s6_addr16[1] != 0);
+	}
+
+	MLD_PRINTF(("%s: join %s on %p(%s%d))\n", __func__,
+	    ip6_sprintf(mcaddr), ifp, ifp->if_name, ifp->if_unit));
+
+	*pinm = NULL;
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		im6f_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		imf = &timf;
+	}
+
+	error = in6_mc_get(ifp, mcaddr, &inm);
+	if (error) {
+		MLD_PRINTF(("%s: in6_mc_get() failure\n", __func__));
+		return (error);
+	}
+
+	MLD_PRINTF(("%s: merge inm state\n", __func__));
+
+	IN6M_LOCK(inm);
+	error = in6m_merge(inm, imf);
+	if (error) {
+		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
+		goto out_in6m_release;
+	}
+
+	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+	error = mld_change_state(inm, delay);
+	if (error) {
+		MLD_PRINTF(("%s: failed to update source\n", __func__));
+		goto out_in6m_release;
+	}
+
+out_in6m_release:
+	if (error) {
+		MLD_PRINTF(("%s: dropping ref on %p\n", __func__, inm));
+		IN6M_UNLOCK(inm);
+		IN6M_REMREF(inm);
+	} else {
+		IN6M_UNLOCK(inm);
+		*pinm = inm;	/* keep refcount from in6_mc_get() */
+	}
+
+	return (error);
+}
+
+/*
+ * Leave a multicast group; real entry point.
+ * All source filters will be expunged.
+ *
+ * Only preserves atomicity at inm level.
+ *
+ * Holding the write lock for the INP which contains imf
+ * is highly advisable. We can't assert for it as imf does not
+ * contain a back-pointer to the owning inp.
+ *
+ * Note: This is not the same as in6m_release(*) as this function also
+ * makes a state change downcall into MLD.
+ */
+int
+in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf)
+{
+	struct in6_mfilter	 timf;
+	int			 error, lastref;
+
+	error = 0;
+
+	IN6M_LOCK_ASSERT_NOTHELD(inm);
+
+	in6_multihead_lock_exclusive();
+	IN6M_LOCK(inm);
+
+	MLD_PRINTF(("%s: leave inm %p, %s/%s%d, imf %p\n", __func__,
+	    inm, ip6_sprintf(&inm->in6m_addr),
+	    (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_name),
+	    inm->in6m_ifp->if_unit, imf));
+
+	/*
+	 * If no imf was specified (i.e. kernel consumer),
+	 * fake one up and assume it is an ASM join.
+	 */
+	if (imf == NULL) {
+		im6f_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
+		imf = &timf;
+	}
+
+	/*
+	 * Begin state merge transaction at MLD layer.
+	 *
+	 * As this particular invocation should not cause any memory
+	 * to be allocated, and there is no opportunity to roll back
+	 * the transaction, it MUST NOT fail.
+	 */
+	MLD_PRINTF(("%s: merge inm state\n", __func__));
+
+	error = in6m_merge(inm, imf);
+	KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__));
+
+	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+	error = mld_change_state(inm, 0);
+#if MLD_DEBUG
+	if (error)
+		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
+#endif
+	lastref = in6_multi_detach(inm);
+	VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
+	    inm->in6m_reqcnt == 0));
+	IN6M_UNLOCK(inm);
+	in6_multihead_lock_done();
+
+	if (lastref)
+		IN6M_REMREF(inm);	/* for in6_multihead list */
+
+	return (error);
+}
+
+/*
+ * Block or unblock an ASM multicast source on an inpcb.
+ * This implements the delta-based API described in RFC 3678.
+ *
+ * The delta-based API applies only to exclusive-mode memberships.
+ * An MLD downcall will be performed.
+ *
+ * Return 0 if successful, otherwise return an appropriate error code.
+ */
+static int
+in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in6_mfilter		*imf;
+	struct ip6_moptions		*imo;
+	struct in6_msource		*ims;
+	struct in6_multi		*inm;
+	size_t				 idx;
+	uint16_t			 fmode;
+	int				 error, doblock;
+
+	ifp = NULL;
+	error = 0;
+	doblock = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+
+	switch (sopt->sopt_name) {
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = sooptcopyin(sopt, &gsr,
+		    sizeof(struct group_source_req),
+		    sizeof(struct group_source_req));
+		if (error)
+			return (error);
+
+		if (gsa->sin6.sin6_family != AF_INET6 ||
+		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+			return (EINVAL);
+
+		if (ssa->sin6.sin6_family != AF_INET6 ||
+		    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+			return (EINVAL);
+
+		ifnet_head_lock_shared();
+		if (gsr.gsr_interface == 0 ||
+		    (u_int)if_index < gsr.gsr_interface) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+
+		ifp = ifindex2ifnet[gsr.gsr_interface];
+		ifnet_head_done();
+
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+
+		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
+			doblock = 1;
+		break;
+
+	default:
+		MLD_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
+		return (EINVAL);
+
+	(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+
+	/*
+	 * Check if we are actually a member of this group.
+	 */
+	imo = in6p_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IM6O_LOCK(imo);
+	idx = im6o_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+
+	VERIFY(imo->im6o_mfilters != NULL);
+	imf = &imo->im6o_mfilters[idx];
+	inm = imo->im6o_membership[idx];
+
+	/*
+	 * Attempting to use the delta-based API on an
+	 * non exclusive-mode membership is an error.
+	 */
+	fmode = imf->im6f_st[0];
+	if (fmode != MCAST_EXCLUDE) {
+		error = EINVAL;
+		goto out_imo_locked;
+	}
+
+	/*
+	 * Deal with error cases up-front:
+	 *  Asked to block, but already blocked; or
+	 *  Asked to unblock, but nothing to unblock.
+	 * If adding a new block entry, allocate it.
+	 */
+	ims = im6o_match_source(imo, idx, &ssa->sa);
+	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
+		MLD_PRINTF(("%s: source %s %spresent\n", __func__,
+		    ip6_sprintf(&ssa->sin6.sin6_addr),
+		    doblock ? "" : "not "));
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+	if (doblock) {
+		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
+		ims = im6f_graft(imf, fmode, &ssa->sin6);
+		if (ims == NULL)
+			error = ENOMEM;
+	} else {
+		MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
+		error = im6f_prune(imf, &ssa->sin6);
+	}
+
+	if (error) {
+		MLD_PRINTF(("%s: merge imf state failed\n", __func__));
+		goto out_im6f_rollback;
+	}
+
+	/*
+	 * Begin state merge transaction at MLD layer.
+	 */
+	IN6M_LOCK(inm);
+	MLD_PRINTF(("%s: merge inm state\n", __func__));
+	error = in6m_merge(inm, imf);
+	if (error) {
+		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
+		IN6M_UNLOCK(inm);
+		goto out_im6f_rollback;
+	}
+
+	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+	error = mld_change_state(inm, 0);
+	IN6M_UNLOCK(inm);
+#if MLD_DEBUG
+	if (error)
+		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
+#endif
+
+out_im6f_rollback:
+	if (error)
+		im6f_rollback(imf);
+	else
+		im6f_commit(imf);
+
+	im6f_reap(imf);
+
+out_imo_locked:
+	IM6O_UNLOCK(imo);
+	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
+	return (error);
+}
+
+/*
+ * Given an inpcb, return its multicast options structure pointer.  Accepts
+ * an unlocked inpcb pointer, but will return it locked.  May sleep.
+ *
+ */
+static struct ip6_moptions *
+in6p_findmoptions(struct inpcb *inp)
+{
+	struct ip6_moptions	 *imo;
+	struct in6_multi	**immp;
+	struct in6_mfilter	 *imfp;
+	size_t			  idx;
+
+	if ((imo = inp->in6p_moptions) != NULL) {
+		IM6O_ADDREF(imo);	/* for caller */
+		return (imo);
+	}
+
+	imo = ip6_allocmoptions(M_WAITOK);
+	if (imo == NULL)
+		return (NULL);
+
+	immp = _MALLOC(sizeof (*immp) * IPV6_MIN_MEMBERSHIPS, M_IP6MOPTS,
+	    M_WAITOK | M_ZERO);
+	if (immp == NULL) {
+		IM6O_REMREF(imo);
+		return (NULL);
+	}
+
+	imfp = _MALLOC(sizeof (struct in6_mfilter) * IPV6_MIN_MEMBERSHIPS,
+	    M_IN6MFILTER, M_WAITOK | M_ZERO);
+	if (imfp == NULL) {
+		_FREE(immp, M_IP6MOPTS);
+		IM6O_REMREF(imo);
+		return (NULL);
+	}
+
+	imo->im6o_multicast_ifp = NULL;
+	imo->im6o_multicast_hlim = ip6_defmcasthlim;
+	imo->im6o_multicast_loop = in6_mcast_loop;
+	imo->im6o_num_memberships = 0;
+	imo->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
+	imo->im6o_membership = immp;
+
+	/* Initialize per-group source filters. */
+	for (idx = 0; idx < IPV6_MIN_MEMBERSHIPS; idx++)
+		im6f_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
+
+	imo->im6o_mfilters = imfp;
+	inp->in6p_moptions = imo; /* keep reference from ip6_allocmoptions() */
+	IM6O_ADDREF(imo);	/* for caller */
+
+	return (imo);
+}
+
+/*
+ * Atomically get source filters on a socket for an IPv6 multicast group.
+ * Called with INP lock held; returns with lock released.
+ */
+static int
+in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq64	msfr, msfr64;
+	struct __msfilterreq32	msfr32;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct ip6_moptions	*imo;
+	struct in6_mfilter	*imf;
+	struct ip6_msource	*ims;
+	struct in6_msource	*lims;
+	struct sockaddr_in6	*psin;
+	struct sockaddr_storage	*ptss;
+	struct sockaddr_storage	*tss;
+	int	 		 error;
+	size_t		 	 idx, nsrcs, ncsrcs;
+	user_addr_t 		 tmp_ptr;
+
+	imo = inp->in6p_moptions;
+	VERIFY(imo != NULL);
+
+	if (IS_64BIT_PROCESS(current_proc())) {
+		error = sooptcopyin(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64),
+		    sizeof(struct __msfilterreq64));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr64, sizeof(msfr));
+	} else {
+		error = sooptcopyin(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32),
+		    sizeof(struct __msfilterreq32));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr32, sizeof(msfr));
+	}
+
+	if (msfr.msfr_group.ss_family != AF_INET6 ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
+		return (EINVAL);
+
+	ifnet_head_lock_shared();
+	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
+		ifnet_head_done();
+		return (EADDRNOTAVAIL);
+	}
+	ifp = ifindex2ifnet[msfr.msfr_ifindex];
+	ifnet_head_done();
+
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+		
+	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
+		msfr.msfr_nsrcs = in6_mcast_maxsocksrc;
+
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+
+	IM6O_LOCK(imo);
+	/*
+	 * Lookup group on the socket.
+	 */
+	idx = im6o_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
+		IM6O_UNLOCK(imo);
+		return (EADDRNOTAVAIL);
+	}
+	imf = &imo->im6o_mfilters[idx];
+
+	/*
+	 * Ignore memberships which are in limbo.
+	 */
+	if (imf->im6f_st[1] == MCAST_UNDEFINED) {
+		IM6O_UNLOCK(imo);
+		return (EAGAIN);
+	}
+	msfr.msfr_fmode = imf->im6f_st[1];
+
+	/*
+	 * If the user specified a buffer, copy out the source filter
+	 * entries to userland gracefully.
+	 * We only copy out the number of entries which userland
+	 * has asked for, but we always tell userland how big the
+	 * buffer really needs to be.
+	 */
+	tss = NULL;
+
+	if (IS_64BIT_PROCESS(current_proc())) 
+		tmp_ptr = msfr64.msfr_srcs;
+	else
+		tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
+
+	if (tmp_ptr != USER_ADDR_NULL && msfr.msfr_nsrcs > 0) {
+		tss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK | M_ZERO);
+		if (tss == NULL) {
+			IM6O_UNLOCK(imo);
+			return (ENOBUFS);
+		}
+	}
+
+	/*
+	 * Count number of sources in-mode at t0.
+	 * If buffer space exists and remains, copy out source entries.
+	 */
+	nsrcs = msfr.msfr_nsrcs;
+	ncsrcs = 0;
+	ptss = tss;
+	RB_FOREACH(ims, ip6_msource_tree, &imf->im6f_sources) {
+		lims = (struct in6_msource *)ims;
+		if (lims->im6sl_st[0] == MCAST_UNDEFINED ||
+		    lims->im6sl_st[0] != imf->im6f_st[0])
+			continue;
+		if (tss != NULL && nsrcs > 0) {
+			psin = (struct sockaddr_in6 *)ptss;
+			psin->sin6_family = AF_INET6;
+			psin->sin6_len = sizeof(struct sockaddr_in6);
+			psin->sin6_addr = lims->im6s_addr;
+			psin->sin6_port = 0;
+			--nsrcs;
+			++ptss;
+			++ncsrcs;
+		}
+	}
+
+	IM6O_UNLOCK(imo);
+
+	if (tss != NULL) {
+		error = copyout(tss, tmp_ptr,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		FREE(tss, M_TEMP);
+		if (error)
+			return (error);
+	}
+
+	msfr.msfr_nsrcs = ncsrcs;
+	if (IS_64BIT_PROCESS(current_proc())) {
+		msfr64.msfr_ifindex = msfr.msfr_ifindex;
+		msfr64.msfr_fmode   = msfr.msfr_fmode;
+		msfr64.msfr_nsrcs   = msfr.msfr_nsrcs;
+		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+		    sizeof(struct sockaddr_storage));
+		error = sooptcopyout(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64));
+	} else {
+		msfr32.msfr_ifindex = msfr.msfr_ifindex;
+		msfr32.msfr_fmode   = msfr.msfr_fmode;
+		msfr32.msfr_nsrcs   = msfr.msfr_nsrcs;
+		memcpy(&msfr64.msfr_group, &msfr.msfr_group,
+		    sizeof(struct sockaddr_storage));
+		error = sooptcopyout(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32));
+	}
+
+	return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip6_moptions	*im6o;
+	int			 error;
+	u_int			 optval;
+
+	im6o = inp->in6p_moptions;
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
+		return (EOPNOTSUPP);
+	}
+
+	error = 0;
+	switch (sopt->sopt_name) {
+	case IPV6_MULTICAST_IF:
+		if (im6o != NULL)
+			IM6O_LOCK(im6o);
+		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) {
+			optval = 0;
+		} else {
+			optval = im6o->im6o_multicast_ifp->if_index;
+		}
+		if (im6o != NULL)
+			IM6O_UNLOCK(im6o);
+		error = sooptcopyout(sopt, &optval, sizeof(u_int));
+		break;
+
+	case IPV6_MULTICAST_HOPS:
+		if (im6o == NULL) {
+			optval = ip6_defmcasthlim;
+		} else {
+			IM6O_LOCK(im6o);
+			optval = im6o->im6o_multicast_hlim;
+			IM6O_UNLOCK(im6o);
+		}
+		error = sooptcopyout(sopt, &optval, sizeof(u_int));
+		break;
+
+	case IPV6_MULTICAST_LOOP:
+		if (im6o == NULL) {
+			optval = in6_mcast_loop; /* XXX VIMAGE */
+		} else {
+			IM6O_LOCK(im6o);
+			optval = im6o->im6o_multicast_loop;
+			IM6O_UNLOCK(im6o);
+		}
+		error = sooptcopyout(sopt, &optval, sizeof(u_int));
+		break;
+
+	case IPV6_MSFILTER:
+		if (im6o == NULL) {
+			error = EADDRNOTAVAIL;
+		} else {
+			error = in6p_get_source_filters(inp, sopt);
+		}
+		break;
+
+	default:
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Look up the ifnet to use for a multicast group membership,
+ * given the address of an IPv6 group.
+ *
+ * This routine exists to support legacy IPv6 multicast applications.
+ *
+ * If inp is non-NULL and is bound to an interface, use this socket's
+ * inp_boundif for any required routing table lookup.
+ *
+ * If the route lookup fails, return NULL.
+ *
+ * FUTURE: Support multiple forwarding tables for IPv6.
+ *
+ * Returns NULL if no ifp could be found.
+ */
+static struct ifnet *
+in6p_lookup_mcast_ifp(const struct inpcb *in6p,
+    const struct sockaddr_in6 *gsin6)
+{
+	struct route_in6	 ro6;
+	struct ifnet		*ifp;
+	unsigned int		ifscope = IFSCOPE_NONE;
+
+	VERIFY(in6p == NULL || (in6p->inp_vflag & INP_IPV6));
+	VERIFY(gsin6->sin6_family == AF_INET6);
+	if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0)  
+		return NULL;
+
+	if (in6p != NULL && (in6p->inp_flags & INP_BOUND_IF))
+		ifscope = in6p->inp_boundif;
+
+	ifp = NULL;
+	memset(&ro6, 0, sizeof(struct route_in6));
+	memcpy(&ro6.ro_dst, gsin6, sizeof(struct sockaddr_in6));
+	rtalloc_scoped_ign((struct route *)&ro6, 0, ifscope);
+	if (ro6.ro_rt != NULL) {
+		ifp = ro6.ro_rt->rt_ifp;
+		VERIFY(ifp != NULL);
+		rtfree(ro6.ro_rt);
+	}
+
+	return (ifp);
+}
+
+/*
+ * Since ipv6_mreq contains an ifindex and ip_mreq contains an AF_INET
+ * address, we need to lookup the AF_INET address when translating an
+ * ipv6_mreq structure into an ipmreq structure.
+ * This is used when userland performs multicast setsockopt() on AF_INET6
+ * sockets with AF_INET multicast addresses (IPv6 v4 mapped addresses).
+ */
+static int
+in6p_lookup_v4addr(struct ipv6_mreq *mreq, struct ip_mreq *v4mreq)
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa;
+	struct sockaddr_in *sin;
+
+	ifnet_head_lock_shared();
+	if (mreq->ipv6mr_interface > (unsigned int)if_index) {
+		ifnet_head_done();
+		return (EADDRNOTAVAIL);
+	} else
+		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
+	ifnet_head_done();
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+	ifa = ifa_ifpgetprimary(ifp, AF_INET);
+	if (ifa == NULL)
+		return (EADDRNOTAVAIL);
+	sin = (struct sockaddr_in *)ifa->ifa_addr;
+	v4mreq->imr_interface.s_addr = sin->sin_addr.s_addr;
+	IFA_REMREF(ifa);
+
+	return (0);
+}
+
+/*
+ * Join an IPv6 multicast group, possibly with a source.
+ *
+ * FIXME: The KAME use of the unspecified address (::)
+ * to join *all* multicast groups is currently unsupported.
+ */
+static int
+in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in6_mfilter		*imf;
+	struct ip6_moptions		*imo;
+	struct in6_multi		*inm = NULL;
+	struct in6_msource		*lims = NULL;
+	size_t				 idx;
+	int				 error, is_new;
+	uint32_t			scopeid = 0;
+
+	ifp = NULL;
+	imf = NULL;
+	error = 0;
+	is_new = 0;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	/*
+	 * Chew everything into struct group_source_req.
+	 * Overwrite the port field if present, as the sockaddr
+	 * being copied in may be matched with a binary comparison.
+	 * Ignore passed-in scope ID.
+	 */
+	switch (sopt->sopt_name) {
+	case IPV6_JOIN_GROUP: {
+		struct ipv6_mreq mreq;
+    		struct sockaddr_in6 *gsin6;
+
+		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
+		    sizeof(struct ipv6_mreq));
+		if (error)
+			return (error);
+		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
+			struct ip_mreq v4mreq;
+			struct sockopt v4sopt;
+
+			v4mreq.imr_multiaddr.s_addr =
+			    mreq.ipv6mr_multiaddr.s6_addr32[3];
+			if (mreq.ipv6mr_interface == 0) 
+				v4mreq.imr_interface.s_addr = INADDR_ANY;
+			else
+				error = in6p_lookup_v4addr(&mreq, &v4mreq);
+			if (error)
+				return (error);
+			v4sopt.sopt_dir     = SOPT_SET;
+			v4sopt.sopt_level   = sopt->sopt_level; 
+			v4sopt.sopt_name    = IP_ADD_MEMBERSHIP;
+			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
+			v4sopt.sopt_valsize = sizeof(v4mreq);
+			v4sopt.sopt_p       = kernproc;
+
+			return (inp_join_group(inp, &v4sopt));
+		}
+		gsa->sin6.sin6_family = AF_INET6;
+		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
+		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
+
+		gsin6 = &gsa->sin6;
+
+		/* Only allow IPv6 multicast addresses */	
+		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {  
+			return (EINVAL);
+		}
+
+		if (mreq.ipv6mr_interface == 0) {
+			ifp = in6p_lookup_mcast_ifp(inp, gsin6);
+		} else {
+			ifnet_head_lock_shared();
+			if ((u_int)if_index < mreq.ipv6mr_interface) {
+				ifnet_head_done();
+				return (EADDRNOTAVAIL);
+			    }
+			ifp = ifindex2ifnet[mreq.ipv6mr_interface];
+			ifnet_head_done();
+		}
+		MLD_PRINTF(("%s: ipv6mr_interface = %d, ifp = %p\n",
+		    __func__, mreq.ipv6mr_interface, ifp));
+		break;
+	}
+
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin6.sin6_family != AF_INET6 ||
+		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+			return (EINVAL);
+
+		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+			if (ssa->sin6.sin6_family != AF_INET6 ||
+			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+				return (EINVAL);
+			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+				return (EINVAL);
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&ssa->sin6.sin6_addr);
+			ssa->sin6.sin6_port = 0;
+			ssa->sin6.sin6_scope_id = 0;
+		}
+
+		ifnet_head_lock_shared();
+		if (gsr.gsr_interface == 0 ||
+		    (u_int)if_index < gsr.gsr_interface) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+		ifp = ifindex2ifnet[gsr.gsr_interface];
+		ifnet_head_done();
+		break;
+
+	default:
+		MLD_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
+		return (EINVAL);
+
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EADDRNOTAVAIL);
+
+	gsa->sin6.sin6_port = 0;
+	gsa->sin6.sin6_scope_id = 0;
+
+	/*
+	 * Always set the scope zone ID on memberships created from userland.
+	 * Use the passed-in ifp to do this.
+	 */
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, &scopeid);
+	/*
+	 * Some addresses are not valid without an embedded scopeid.
+	 * This check must be present because otherwise we will later hit
+	 * a VERIFY() in in6_mc_join().
+	 */
+	if ((IN6_IS_ADDR_MC_LINKLOCAL(&gsa->sin6.sin6_addr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(&gsa->sin6.sin6_addr)) && scopeid == 0)
+		return (EINVAL);
+
+	imo = in6p_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IM6O_LOCK(imo);
+	idx = im6o_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1) {
+		is_new = 1;
+	} else {
+		inm = imo->im6o_membership[idx];
+		imf = &imo->im6o_mfilters[idx];
+		if (ssa->ss.ss_family != AF_UNSPEC) {
+			/*
+			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
+			 * is an error. On an existing inclusive membership,
+			 * it just adds the source to the filter list.
+			 */
+			if (imf->im6f_st[1] != MCAST_INCLUDE) {
+				error = EINVAL;
+				goto out_imo_locked;
+			}
+			/*
+			 * Throw out duplicates.
+			 *
+			 * XXX FIXME: This makes a naive assumption that
+			 * even if entries exist for *ssa in this imf,
+			 * they will be rejected as dupes, even if they
+			 * are not valid in the current mode (in-mode).
+			 *
+			 * in6_msource is transactioned just as for anything
+			 * else in SSM -- but note naive use of in6m_graft()
+			 * below for allocating new filter entries.
+			 *
+			 * This is only an issue if someone mixes the
+			 * full-state SSM API with the delta-based API,
+			 * which is discouraged in the relevant RFCs.
+			 */
+			lims = im6o_match_source(imo, idx, &ssa->sa);
+			if (lims != NULL /*&&
+			    lims->im6sl_st[1] == MCAST_INCLUDE*/) {
+				error = EADDRNOTAVAIL;
+				goto out_imo_locked;
+			}
+		} else {
+			/*
+			 * MCAST_JOIN_GROUP on an existing exclusive
+			 * membership is an error; return EADDRINUSE
+			 * to preserve 4.4BSD API idempotence, and
+			 * avoid tedious detour to code below.
+			 * NOTE: This is bending RFC 3678 a bit.
+			 *
+			 * On an existing inclusive membership, this is also
+			 * an error; if you want to change filter mode,
+			 * you must use the userland API setsourcefilter().
+			 * XXX We don't reject this for imf in UNDEFINED
+			 * state at t1, because allocation of a filter
+			 * is atomic with allocation of a membership.
+			 */
+			error = EINVAL;
+			/* See comments above for EADDRINUSE */
+			if (imf->im6f_st[1] == MCAST_EXCLUDE)
+				error = EADDRINUSE;
+			goto out_imo_locked;
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	if (is_new) {
+		if (imo->im6o_num_memberships == imo->im6o_max_memberships) {
+			error = im6o_grow(imo, 0);
+			if (error)
+				goto out_imo_locked;
+		}
+		/*
+		 * Allocate the new slot upfront so we can deal with
+		 * grafting the new source filter in same code path
+		 * as for join-source on existing membership.
+		 */
+		idx = imo->im6o_num_memberships;
+		imo->im6o_membership[idx] = NULL;
+		imo->im6o_num_memberships++;
+		VERIFY(imo->im6o_mfilters != NULL);
+		imf = &imo->im6o_mfilters[idx];
+		VERIFY(RB_EMPTY(&imf->im6f_sources));
+	}
+
+	/*
+	 * Graft new source into filter list for this inpcb's
+	 * membership of the group. The in6_multi may not have
+	 * been allocated yet if this is a new membership, however,
+	 * the in_mfilter slot will be allocated and must be initialized.
+	 *
+	 * Note: Grafting of exclusive mode filters doesn't happen
+	 * in this path.
+	 * XXX: Should check for non-NULL lims (node exists but may
+	 * not be in-mode) for interop with full-state API.
+	 */
+	if (ssa->ss.ss_family != AF_UNSPEC) {
+		/* Membership starts in IN mode */
+		if (is_new) {
+			MLD_PRINTF(("%s: new join w/source\n", __func__);
+			im6f_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE));
+		} else {
+			MLD_PRINTF(("%s: %s source\n", __func__, "allow"));
+		}
+		lims = im6f_graft(imf, MCAST_INCLUDE, &ssa->sin6);
+		if (lims == NULL) {
+			MLD_PRINTF(("%s: merge imf state failed\n",
+			    __func__));
+			error = ENOMEM;
+			goto out_im6o_free;
+		}
+	} else {
+		/* No address specified; Membership starts in EX mode */
+		if (is_new) {
+			MLD_PRINTF(("%s: new join w/o source", __func__));
+			im6f_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at MLD layer.
+	 */
+
+	if (is_new) {
+		VERIFY(inm == NULL);
+		error = in6_mc_join(ifp, &gsa->sin6.sin6_addr, imf, &inm, 0);
+		VERIFY(inm != NULL || error != 0);
+		if (error)
+			goto out_im6o_free;
+		imo->im6o_membership[idx] = inm; /* from in6_mc_join() */
+	} else {
+		MLD_PRINTF(("%s: merge inm state\n", __func__));
+		IN6M_LOCK(inm);
+		error = in6m_merge(inm, imf);
+		if (error) {
+			MLD_PRINTF(("%s: failed to merge inm state\n",
+			    __func__));
+			IN6M_UNLOCK(inm);
+			goto out_im6f_rollback;
+		}
+		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+		error = mld_change_state(inm, 0);
+		IN6M_UNLOCK(inm);
+		if (error) {
+			MLD_PRINTF(("%s: failed mld downcall\n",
+			    __func__));
+			goto out_im6f_rollback;
+		}
+	}
+
+out_im6f_rollback:
+	if (error) {
+		im6f_rollback(imf);
+		if (is_new)
+			im6f_purge(imf);
+		else
+			im6f_reap(imf);
+	} else {
+		im6f_commit(imf);
+	}
+
+out_im6o_free:
+	if (error && is_new) {
+		VERIFY(inm == NULL);
+		imo->im6o_membership[idx] = NULL;
+		--imo->im6o_num_memberships;
+	}
+
+out_imo_locked:
+	IM6O_UNLOCK(imo);
+	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
+	return (error);
+}
+
+/*
+ * Leave an IPv6 multicast group on an inpcb, possibly with a source.
+ */
+static int
+in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ipv6_mreq		 mreq;
+	struct group_source_req		 gsr;
+	sockunion_t			*gsa, *ssa;
+	struct ifnet			*ifp;
+	struct in6_mfilter		*imf;
+	struct ip6_moptions		*imo;
+	struct in6_msource		*ims;
+	struct in6_multi		*inm = NULL;
+	uint32_t			 ifindex = 0;
+	size_t				 idx;
+	int				 error, is_final;
+
+	ifp = NULL;
+	error = 0;
+	is_final = 1;
+
+	memset(&gsr, 0, sizeof(struct group_source_req));
+	gsa = (sockunion_t *)&gsr.gsr_group;
+	gsa->ss.ss_family = AF_UNSPEC;
+	ssa = (sockunion_t *)&gsr.gsr_source;
+	ssa->ss.ss_family = AF_UNSPEC;
+
+	/*
+	 * Chew everything passed in up into a struct group_source_req
+	 * as that is easier to process.
+	 * Note: Any embedded scope ID in the multicast group passed
+	 * in by userland is ignored, the interface index is the recommended
+	 * mechanism to specify an interface; see below.
+	 */
+	switch (sopt->sopt_name) {
+	case IPV6_LEAVE_GROUP: {
+    		struct sockaddr_in6 *gsin6;
+
+		error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq),
+		    sizeof(struct ipv6_mreq));
+		if (error)
+			return (error);
+		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
+			struct ip_mreq v4mreq;
+			struct sockopt v4sopt;
+
+			v4mreq.imr_multiaddr.s_addr =
+			    mreq.ipv6mr_multiaddr.s6_addr32[3];
+			if (mreq.ipv6mr_interface == 0) 
+				v4mreq.imr_interface.s_addr = INADDR_ANY;
+			else
+				error = in6p_lookup_v4addr(&mreq, &v4mreq);
+			if (error)
+				return (error);
+			v4sopt.sopt_dir     = SOPT_SET;
+			v4sopt.sopt_level   = sopt->sopt_level; 
+			v4sopt.sopt_name    = IP_DROP_MEMBERSHIP;
+			v4sopt.sopt_val     = CAST_USER_ADDR_T(&v4mreq);
+			v4sopt.sopt_valsize = sizeof(v4mreq);
+			v4sopt.sopt_p       = kernproc;
+
+			return (inp_leave_group(inp, &v4sopt));
+		}
+		gsa->sin6.sin6_family = AF_INET6;
+		gsa->sin6.sin6_len = sizeof(struct sockaddr_in6);
+		gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr;
+		gsa->sin6.sin6_port = 0;
+		gsa->sin6.sin6_scope_id = 0;
+		ifindex = mreq.ipv6mr_interface;
+		gsin6 = &gsa->sin6;
+		/* Only allow IPv6 multicast addresses */	
+		if (IN6_IS_ADDR_MULTICAST(&gsin6->sin6_addr) == 0) {  
+			return (EINVAL);
+		}
+		break;
+	}
+
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_req),
+			    sizeof(struct group_req));
+		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			error = sooptcopyin(sopt, &gsr,
+			    sizeof(struct group_source_req),
+			    sizeof(struct group_source_req));
+		}
+		if (error)
+			return (error);
+
+		if (gsa->sin6.sin6_family != AF_INET6 ||
+		    gsa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+			return (EINVAL);
+		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+			if (ssa->sin6.sin6_family != AF_INET6 ||
+			    ssa->sin6.sin6_len != sizeof(struct sockaddr_in6))
+				return (EINVAL);
+			if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr))
+				return (EINVAL);
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&ssa->sin6.sin6_addr);
+		}
+		gsa->sin6.sin6_port = 0;
+		gsa->sin6.sin6_scope_id = 0;
+		ifindex = gsr.gsr_interface;
+		break;
+
+	default:
+		MLD_PRINTF(("%s: unknown sopt_name %d\n",
+		    __func__, sopt->sopt_name));
+		return (EOPNOTSUPP);
+		break;
+	}
+
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
+		return (EINVAL);
+
+	/*
+	 * Validate interface index if provided. If no interface index
+	 * was provided separately, attempt to look the membership up
+	 * from the default scope as a last resort to disambiguate
+	 * the membership we are being asked to leave.
+	 * XXX SCOPE6 lock potentially taken here.
+	 */
+	if (ifindex != 0) {
+		ifnet_head_lock_shared();
+		if ((u_int)if_index < ifindex) {
+			ifnet_head_done();
+			return (EADDRNOTAVAIL);
+		}
+		ifp = ifindex2ifnet[ifindex];
+		ifnet_head_done();
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+		(void) in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+	} else {
+		error = sa6_embedscope(&gsa->sin6, ip6_use_defzone);
+		if (error)
+			return (EADDRNOTAVAIL);
+		/*
+		 * Some badly behaved applications don't pass an ifindex
+		 * or a scope ID, which is an API violation. In this case,
+		 * perform a lookup as per a v6 join.
+		 *
+		 * XXX For now, stomp on zone ID for the corner case.
+		 * This is not the 'KAME way', but we need to see the ifp
+		 * directly until such time as this implementation is
+		 * refactored, assuming the scope IDs are the way to go.
+		 */
+		ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]);
+		if (ifindex == 0) {
+			MLD_PRINTF(("%s: warning: no ifindex, looking up "
+			    "ifp for group %s.\n", __func__,
+			    ip6_sprintf(&gsa->sin6.sin6_addr)));
+			ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6);
+		} else {
+			ifnet_head_lock_shared();
+			ifp = ifindex2ifnet[ifindex];
+			ifnet_head_done();
+		}
+		if (ifp == NULL)
+			return (EADDRNOTAVAIL);
+	}
+
+	VERIFY(ifp != NULL);
+	MLD_PRINTF(("%s: ifp = %p\n", __func__, ifp));
+
+	/*
+	 * Find the membership in the membership array.
+	 */
+	imo = in6p_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IM6O_LOCK(imo);
+	idx = im6o_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1) {
+		error = EADDRNOTAVAIL;
+		goto out_locked;
+	}
+	inm = imo->im6o_membership[idx];
+	imf = &imo->im6o_mfilters[idx];
+
+	if (ssa->ss.ss_family != AF_UNSPEC)
+		is_final = 0;
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	/*
+	 * If we were instructed only to leave a given source, do so.
+	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
+	 */
+	if (is_final) {
+		im6f_leave(imf);
+	} else {
+		if (imf->im6f_st[0] == MCAST_EXCLUDE) {
+			error = EADDRNOTAVAIL;
+			goto out_locked;
+		}
+		ims = im6o_match_source(imo, idx, &ssa->sa);
+		if (ims == NULL) {
+			MLD_PRINTF(("%s: source %p %spresent\n", __func__,
+			    ip6_sprintf(&ssa->sin6.sin6_addr),
+			    "not "));
+			error = EADDRNOTAVAIL;
+			goto out_locked;
+		}
+		MLD_PRINTF(("%s: %s source\n", __func__, "block"));
+		error = im6f_prune(imf, &ssa->sin6);
+		if (error) {
+			MLD_PRINTF(("%s: merge imf state failed\n",
+			    __func__));
+			goto out_locked;
+		}
+	}
+
+	/*
+	 * Begin state merge transaction at MLD layer.
+	 */
+
+	if (is_final) {
+		/*
+		 * Give up the multicast address record to which
+		 * the membership points.  Reference held in im6o
+		 * will be released below.
+		 */
+		(void) in6_mc_leave(inm, imf);
+	} else {
+		MLD_PRINTF(("%s: merge inm state\n", __func__));
+		IN6M_LOCK(inm);
+		error = in6m_merge(inm, imf);
+		if (error) {
+			MLD_PRINTF(("%s: failed to merge inm state\n",
+			    __func__));
+			IN6M_UNLOCK(inm);
+			goto out_im6f_rollback;
+		}
+
+		MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+		error = mld_change_state(inm, 0);
+		if (error) {
+			MLD_PRINTF(("%s: failed mld downcall\n", __func__));
+		}
+		IN6M_UNLOCK(inm);
+	}
+
+out_im6f_rollback:
+	if (error)
+		im6f_rollback(imf);
+	else
+		im6f_commit(imf);
+
+	im6f_reap(imf);
+
+	if (is_final) {
+		/* Remove the gap in the membership array. */
+		VERIFY(inm == imo->im6o_membership[idx]);
+		imo->im6o_membership[idx] = NULL;
+		IN6M_REMREF(inm);
+		for (++idx; idx < imo->im6o_num_memberships; ++idx) {
+			imo->im6o_membership[idx-1] = imo->im6o_membership[idx];
+			imo->im6o_mfilters[idx-1] = imo->im6o_mfilters[idx];
+		}
+		imo->im6o_num_memberships--;
+	}
+
+out_locked:
+	IM6O_UNLOCK(imo);
+	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
+	return (error);
+}
+
+/*
+ * Select the interface for transmitting IPv6 multicast datagrams.
+ *
+ * Either an instance of struct in6_addr or an instance of struct ipv6_mreqn
+ * may be passed to this socket option. An address of in6addr_any or an
+ * interface index of 0 is used to remove a previous selection.
+ * When no interface is selected, one is chosen for every send.
+ */
+static int
+in6p_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ifnet		*ifp;
+	struct ip6_moptions	*imo;
+	u_int			 ifindex;
+	int			 error;
+
+	if (sopt->sopt_valsize != sizeof(u_int))
+		return (EINVAL);
+
+	error = sooptcopyin(sopt, &ifindex, sizeof(u_int), sizeof(u_int));
+	if (error)
+		return (error);
+
+	ifnet_head_lock_shared();
+	if ((u_int)if_index < ifindex) {
+		ifnet_head_done();
+		return (EINVAL);
+	}
+
+	ifp = ifindex2ifnet[ifindex];
+	ifnet_head_done();
+	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+		return (EADDRNOTAVAIL);
+
+	imo = in6p_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IM6O_LOCK(imo);
+	imo->im6o_multicast_ifp = ifp;
+	IM6O_UNLOCK(imo);
+	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
+
+	return (0);
+}
+
+/*
+ * Atomically set source filters on a socket for an IPv6 multicast group.
+ *
+ */
+static int
+in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct __msfilterreq64	 msfr, msfr64;
+	struct __msfilterreq32	 msfr32;
+	sockunion_t		*gsa;
+	struct ifnet		*ifp;
+	struct in6_mfilter	*imf;
+	struct ip6_moptions	*imo;
+	struct in6_multi	*inm;
+	size_t			 idx;
+	int			 error;
+	user_addr_t 		 tmp_ptr;
+
+	if (IS_64BIT_PROCESS(current_proc())) {
+		error = sooptcopyin(sopt, &msfr64,
+		    sizeof(struct __msfilterreq64),
+		    sizeof(struct __msfilterreq64));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr64, sizeof(msfr));
+	} else {
+		error = sooptcopyin(sopt, &msfr32,
+		    sizeof(struct __msfilterreq32),
+		    sizeof(struct __msfilterreq32));
+		if (error)
+			return (error);
+		/* we never use msfr.msfr_srcs; */
+		memcpy(&msfr, &msfr32, sizeof(msfr));
+	}
+
+	if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc)
+		return (ENOBUFS);
+
+	if (msfr.msfr_fmode != MCAST_EXCLUDE &&
+	     msfr.msfr_fmode != MCAST_INCLUDE)
+		return (EINVAL);
+
+	if (msfr.msfr_group.ss_family != AF_INET6 ||
+	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6))
+		return (EINVAL);
+
+	gsa = (sockunion_t *)&msfr.msfr_group;
+	if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr))
+		return (EINVAL);
+
+	gsa->sin6.sin6_port = 0;	/* ignore port */
+
+	ifnet_head_lock_shared();
+	if (msfr.msfr_ifindex == 0 || (u_int)if_index < msfr.msfr_ifindex) {
+		ifnet_head_done();
+		return (EADDRNOTAVAIL);
+	}
+	ifp = ifindex2ifnet[msfr.msfr_ifindex];
+	ifnet_head_done();
+	if (ifp == NULL)
+		return (EADDRNOTAVAIL);
+
+	(void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL);
+
+	/*
+	 * Take the INP write lock.
+	 * Check if this socket is a member of this group.
+	 */
+	imo = in6p_findmoptions(inp);
+	if (imo == NULL)
+		return (ENOMEM);
+
+	IM6O_LOCK(imo);
+	idx = im6o_match_group(imo, ifp, &gsa->sa);
+	if (idx == (size_t)-1 || imo->im6o_mfilters == NULL) {
+		error = EADDRNOTAVAIL;
+		goto out_imo_locked;
+	}
+	inm = imo->im6o_membership[idx];
+	imf = &imo->im6o_mfilters[idx];
+
+	/*
+	 * Begin state merge transaction at socket layer.
+	 */
+
+	imf->im6f_st[1] = msfr.msfr_fmode;
+
+	/*
+	 * Apply any new source filters, if present.
+	 * Make a copy of the user-space source vector so
+	 * that we may copy them with a single copyin. This
+	 * allows us to deal with page faults up-front.
+	 */
+	if (msfr.msfr_nsrcs > 0) {
+		struct in6_msource	*lims;
+		struct sockaddr_in6	*psin;
+		struct sockaddr_storage	*kss, *pkss;
+		unsigned int		 i;
+
+		if (IS_64BIT_PROCESS(current_proc())) 
+			tmp_ptr = msfr64.msfr_srcs;
+		else
+			tmp_ptr = CAST_USER_ADDR_T(msfr32.msfr_srcs);
+
+		MLD_PRINTF(("%s: loading %lu source list entries\n",
+		    __func__, (unsigned long)msfr.msfr_nsrcs));
+		kss = _MALLOC(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+		    M_TEMP, M_WAITOK);
+		if (kss == NULL) {
+			error = ENOMEM;
+			goto out_imo_locked;
+		}
+
+		error = copyin(tmp_ptr, kss,
+		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+		if (error) {
+			FREE(kss, M_TEMP);
+			goto out_imo_locked;
+		}
+
+		/*
+		 * Mark all source filters as UNDEFINED at t1.
+		 * Restore new group filter mode, as im6f_leave()
+		 * will set it to INCLUDE.
+		 */
+		im6f_leave(imf);
+		imf->im6f_st[1] = msfr.msfr_fmode;
+
+		/*
+		 * Update socket layer filters at t1, lazy-allocating
+		 * new entries. This saves a bunch of memory at the
+		 * cost of one RB_FIND() per source entry; duplicate
+		 * entries in the msfr_nsrcs vector are ignored.
+		 * If we encounter an error, rollback transaction.
+		 *
+		 * XXX This too could be replaced with a set-symmetric
+		 * difference like loop to avoid walking from root
+		 * every time, as the key space is common.
+		 */
+		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
+			psin = (struct sockaddr_in6 *)pkss;
+			if (psin->sin6_family != AF_INET6) {
+				error = EAFNOSUPPORT;
+				break;
+			}
+			if (psin->sin6_len != sizeof(struct sockaddr_in6)) {
+				error = EINVAL;
+				break;
+			}
+			if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) {
+				error = EINVAL;
+				break;
+			}
+			/*
+			 * TODO: Validate embedded scope ID in source
+			 * list entry against passed-in ifp, if and only
+			 * if source list filter entry is iface or node local.
+			 */
+			in6_clearscope(&psin->sin6_addr);
+			error = im6f_get_source(imf, psin, &lims);
+			if (error)
+				break;
+			lims->im6sl_st[1] = imf->im6f_st[1];
+		}
+		FREE(kss, M_TEMP);
+	}
+
+	if (error)
+		goto out_im6f_rollback;
+
+	/*
+	 * Begin state merge transaction at MLD layer.
+	 */
+	IN6M_LOCK(inm);
+	MLD_PRINTF(("%s: merge inm state\n", __func__));
+	error = in6m_merge(inm, imf);
+	if (error) {
+		MLD_PRINTF(("%s: failed to merge inm state\n", __func__));
+		IN6M_UNLOCK(inm);
+		goto out_im6f_rollback;
+	}
+
+	MLD_PRINTF(("%s: doing mld downcall\n", __func__));
+	error = mld_change_state(inm, 0);
+	IN6M_UNLOCK(inm);
+#if MLD_DEBUG
+	if (error)
+		MLD_PRINTF(("%s: failed mld downcall\n", __func__));
+#endif
+
+out_im6f_rollback:
+	if (error)
+		im6f_rollback(imf);
+	else
+		im6f_commit(imf);
+
+	im6f_reap(imf);
+
+out_imo_locked:
+	IM6O_UNLOCK(imo);
+	IM6O_REMREF(imo);	/* from in6p_findmoptions() */
+
+	return (error);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ *
+ * Many of the socket options handled in this function duplicate the
+ * functionality of socket options in the regular unicast API. However,
+ * it is not possible to merge the duplicate code, because the idempotence
+ * of the IPv6 multicast part of the BSD Sockets API must be preserved;
+ * the effects of these options must be treated as separate and distinct.
+ *
+ */
+int
+ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+	struct ip6_moptions	*im6o;
+	int			 error;
+
+	error = 0;
+
+	/*
+	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+	 * or is a divert socket, reject it.
+	 */
+	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+		return (EOPNOTSUPP);
+
+	switch (sopt->sopt_name) {
+	case IPV6_MULTICAST_IF:
+		error = in6p_set_multicast_if(inp, sopt);
+		break;
+
+	case IPV6_MULTICAST_HOPS: {
+		int hlim;
+
+		if (sopt->sopt_valsize != sizeof(int)) {
+			error = EINVAL;
+			break;
+		}
+		error = sooptcopyin(sopt, &hlim, sizeof(hlim), sizeof(int));
+		if (error)
+			break;
+		if (hlim < -1 || hlim > 255) {
+			error = EINVAL;
+			break;
+		} else if (hlim == -1) {
+			hlim = ip6_defmcasthlim;
+		}
+		im6o = in6p_findmoptions(inp);
+		if (im6o == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		IM6O_LOCK(im6o);
+		im6o->im6o_multicast_hlim = hlim;
+		IM6O_UNLOCK(im6o);
+		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
+		break;
+	}
+
+	case IPV6_MULTICAST_LOOP: {
+		u_int loop;
+
+		/*
+		 * Set the loopback flag for outgoing multicast packets.
+		 * Must be zero or one.
+		 */
+		if (sopt->sopt_valsize != sizeof(u_int)) {
+			error = EINVAL;
+			break;
+		}
+		error = sooptcopyin(sopt, &loop, sizeof(u_int), sizeof(u_int));
+		if (error)
+			break;
+		if (loop > 1) {
+			error = EINVAL;
+			break;
+		}
+		im6o = in6p_findmoptions(inp);
+		if (im6o == NULL) {
+			error = ENOMEM;
+			break;
+		}
+		IM6O_LOCK(im6o);
+		im6o->im6o_multicast_loop = loop;
+		IM6O_UNLOCK(im6o);
+		IM6O_REMREF(im6o);	/* from in6p_findmoptions() */
+		break;
+	}
+
+	case IPV6_JOIN_GROUP:
+	case MCAST_JOIN_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+		error = in6p_join_group(inp, sopt);
+		break;
+
+	case IPV6_LEAVE_GROUP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+		error = in6p_leave_group(inp, sopt);
+		break;
+
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+		error = in6p_block_unblock_source(inp, sopt);
+		break;
+
+	case IPV6_MSFILTER:
+		error = in6p_set_source_filters(inp, sopt);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+/*
+ * Expose MLD's multicast filter mode and source list(s) to userland,
+ * keyed by (ifindex, group).
+ * The filter mode is written out as a uint32_t, followed by
+ * 0..n of struct in6_addr.
+ * For use by ifmcstat(8).
+ */
+static int
+sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+
+	struct in6_addr			 mcaddr;
+	struct in6_addr			 src;
+	struct ifnet			*ifp;
+	struct in6_multi		*inm;
+	struct in6_multistep		step;
+	struct ip6_msource		*ims;
+	int				*name;
+	int				 retval = 0;
+	u_int				 namelen;
+	uint32_t			 fmode, ifindex;
+
+	name = (int *)arg1;
+	namelen = arg2;
+
+	if (req->newptr != USER_ADDR_NULL)
+		return (EPERM);
+
+	/* int: ifindex + 4 * 32 bits of IPv6 address */
+	if (namelen != 5)
+		return (EINVAL);
+
+	ifindex = name[0];
+	ifnet_head_lock_shared();
+	if (ifindex <= 0 || ifindex > (u_int)if_index) {
+		MLD_PRINTF(("%s: ifindex %u out of range\n",
+		    __func__, ifindex));
+		ifnet_head_done();
+		return (ENOENT);
+	}
+
+	memcpy(&mcaddr, &name[1], sizeof(struct in6_addr));
+	if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) {
+		MLD_PRINTF(("%s: group %s is not multicast\n",
+		    __func__, ip6_sprintf(&mcaddr)));
+		ifnet_head_done();
+		return (EINVAL);
+	}
+
+	ifp = ifindex2ifnet[ifindex];
+	ifnet_head_done();
+	if (ifp == NULL) {
+		MLD_PRINTF(("%s: no ifp for ifindex %u\n", __func__, ifindex));
+		return (ENOENT);
+	}
+	/*
+	 * Internal MLD lookups require that scope/zone ID is set.
+	 */
+	(void)in6_setscope(&mcaddr, ifp, NULL);
+
+	in6_multihead_lock_shared();
+	IN6_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		IN6M_LOCK(inm);
+		if (inm->in6m_ifp != ifp)
+			goto next;
+
+		if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr))
+			goto next;
+
+		fmode = inm->in6m_st[1].iss_fmode;
+		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
+		if (retval != 0) {
+			IN6M_UNLOCK(inm);
+			break;		/* abort */
+		}
+		RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) {
+			MLD_PRINTF(("%s: visit node %p\n", __func__, ims));
+			/*
+			 * Only copy-out sources which are in-mode.
+			 */
+			if (fmode != im6s_get_mode(inm, ims, 1)) {
+				MLD_PRINTF(("%s: skip non-in-mode\n",
+				    __func__));
+				continue; /* process next source */
+			}
+			src = ims->im6s_addr;
+			retval = SYSCTL_OUT(req, &src, sizeof(struct in6_addr));
+			if (retval != 0)
+				break;	/* process next inm */
+		}
+next:
+		IN6M_UNLOCK(inm);
+		IN6_NEXT_MULTI(step, inm);
+	}
+	in6_multihead_lock_done();
+
+	return (retval);
+}
+
+void
+in6_multi_init(void)
+{
+	PE_parse_boot_argn("ifa_debug", &in6m_debug, sizeof (in6m_debug));
+
+	/* Setup lock group and attribute for in6_multihead */
+	in6_multihead_lock_grp_attr = lck_grp_attr_alloc_init();
+	in6_multihead_lock_grp = lck_grp_alloc_init("in6_multihead",
+	    in6_multihead_lock_grp_attr);
+	in6_multihead_lock_attr = lck_attr_alloc_init();
+	lck_rw_init(&in6_multihead_lock, in6_multihead_lock_grp,
+	    in6_multihead_lock_attr);
+
+	lck_mtx_init(&in6m_trash_lock, in6_multihead_lock_grp,
+	    in6_multihead_lock_attr);
+	TAILQ_INIT(&in6m_trash_head);
+
+	in6m_size = (in6m_debug == 0) ? sizeof (struct in6_multi) :
+	    sizeof (struct in6_multi_dbg);
+	in6m_zone = zinit(in6m_size, IN6M_ZONE_MAX * in6m_size,
+	    0, IN6M_ZONE_NAME);
+	if (in6m_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IN6M_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(in6m_zone, Z_EXPAND, TRUE);
+
+	imm_size = sizeof (struct in6_multi_mship);
+	imm_zone = zinit(imm_size, IMM_ZONE_MAX * imm_size, 0, IMM_ZONE_NAME);
+	if (imm_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IMM_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(imm_zone, Z_EXPAND, TRUE);
+
+	ip6ms_size = sizeof (struct ip6_msource);
+	ip6ms_zone = zinit(ip6ms_size, IP6MS_ZONE_MAX * ip6ms_size,
+	    0, IP6MS_ZONE_NAME);
+	if (ip6ms_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IP6MS_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(ip6ms_zone, Z_EXPAND, TRUE);
+
+	in6ms_size = sizeof (struct in6_msource);
+	in6ms_zone = zinit(in6ms_size, IN6MS_ZONE_MAX * in6ms_size,
+	    0, IN6MS_ZONE_NAME);
+	if (in6ms_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IN6MS_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(in6ms_zone, Z_EXPAND, TRUE);
+}
+
+static struct in6_multi *
+in6_multi_alloc(int how)
+{
+	struct in6_multi *in6m;
+
+	in6m = (how == M_WAITOK) ? zalloc(in6m_zone) :
+	    zalloc_noblock(in6m_zone);
+	if (in6m != NULL) {
+		bzero(in6m, in6m_size);
+		lck_mtx_init(&in6m->in6m_lock, in6_multihead_lock_grp,
+		    in6_multihead_lock_attr);
+		in6m->in6m_debug |= IFD_ALLOC;
+		if (in6m_debug != 0) {
+			in6m->in6m_debug |= IFD_DEBUG;
+			in6m->in6m_trace = in6m_trace;
+		}
+	}
+	return (in6m);
+}
+
+static void
+in6_multi_free(struct in6_multi *in6m)
+{
+	IN6M_LOCK(in6m);
+	if (in6m->in6m_debug & IFD_ATTACHED) {
+		panic("%s: attached in6m=%p is being freed", __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_ifma != NULL) {
+		panic("%s: ifma not NULL for in6m=%p", __func__, in6m);
+		/* NOTREACHED */
+	} else if (!(in6m->in6m_debug & IFD_ALLOC)) {
+		panic("%s: in6m %p cannot be freed", __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_refcount != 0) {
+		panic("%s: non-zero refcount in6m=%p", __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_reqcnt != 0) {
+		panic("%s: non-zero reqcnt in6m=%p", __func__, in6m);
+		/* NOTREACHED */
+	}
+
+	/* Free any pending MLDv2 state-change records */
+	IF_DRAIN(&in6m->in6m_scq);
+
+	in6m->in6m_debug &= ~IFD_ALLOC;
+	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
+	    (IFD_DEBUG | IFD_TRASHED)) {
+		lck_mtx_lock(&in6m_trash_lock);
+		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
+		    in6m_trash_link);
+		lck_mtx_unlock(&in6m_trash_lock);
+		in6m->in6m_debug &= ~IFD_TRASHED;
+	}
+	IN6M_UNLOCK(in6m);
+
+	lck_mtx_destroy(&in6m->in6m_lock, in6_multihead_lock_grp);
+	zfree(in6m_zone, in6m);
+}
+
+static void
+in6_multi_attach(struct in6_multi *in6m)
+{
+	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
+	IN6M_LOCK_ASSERT_HELD(in6m);
+
+	if (in6m->in6m_debug & IFD_ATTACHED) {
+		panic("%s: Attempt to attach an already attached in6m=%p",
+		    __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_debug & IFD_TRASHED) {
+		panic("%s: Attempt to reattach a detached in6m=%p",
+		    __func__, in6m);
+		/* NOTREACHED */
+	}
+
+	in6m->in6m_reqcnt++;
+	VERIFY(in6m->in6m_reqcnt == 1);
+	IN6M_ADDREF_LOCKED(in6m);
+	in6m->in6m_debug |= IFD_ATTACHED;
+	/*
+	 * Reattach case:  If debugging is enabled, take it
+	 * out of the trash list and clear IFD_TRASHED.
+	 */
+	if ((in6m->in6m_debug & (IFD_DEBUG | IFD_TRASHED)) ==
+	    (IFD_DEBUG | IFD_TRASHED)) {
+		/* Become a regular mutex, just in case */
+		IN6M_CONVERT_LOCK(in6m);
+		lck_mtx_lock(&in6m_trash_lock);
+		TAILQ_REMOVE(&in6m_trash_head, (struct in6_multi_dbg *)in6m,
+		    in6m_trash_link);
+		lck_mtx_unlock(&in6m_trash_lock);
+		in6m->in6m_debug &= ~IFD_TRASHED;
+	}
+
+	LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
+}
+
+int
+in6_multi_detach(struct in6_multi *in6m)
+{
+	in6_multihead_lock_assert(LCK_RW_ASSERT_EXCLUSIVE);
+	IN6M_LOCK_ASSERT_HELD(in6m);
+
+	if (in6m->in6m_reqcnt == 0) {
+		panic("%s: in6m=%p negative reqcnt", __func__, in6m);
+		/* NOTREACHED */
+	}
+
+	--in6m->in6m_reqcnt;
+	if (in6m->in6m_reqcnt > 0)
+		return (0);
+
+	if (!(in6m->in6m_debug & IFD_ATTACHED)) {
+		panic("%s: Attempt to detach an unattached record in6m=%p",
+		    __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_debug & IFD_TRASHED) {
+		panic("%s: in6m %p is already in trash list", __func__, in6m);
+		/* NOTREACHED */
+	}
+
+	/*
+	 * NOTE: Caller calls IFMA_REMREF
+	 */
+	in6m->in6m_debug &= ~IFD_ATTACHED;
+	LIST_REMOVE(in6m, in6m_entry);
+
+	if (in6m->in6m_debug & IFD_DEBUG) {
+		/* Become a regular mutex, just in case */
+		IN6M_CONVERT_LOCK(in6m);
+		lck_mtx_lock(&in6m_trash_lock);
+		TAILQ_INSERT_TAIL(&in6m_trash_head,
+		    (struct in6_multi_dbg *)in6m, in6m_trash_link);
+		lck_mtx_unlock(&in6m_trash_lock);
+		in6m->in6m_debug |= IFD_TRASHED;
+	}
+
+	return (1);
+}
+
+void
+in6m_addref(struct in6_multi *in6m, int locked)
+{
+	if (!locked)
+		IN6M_LOCK_SPIN(in6m);
+	else
+		IN6M_LOCK_ASSERT_HELD(in6m);
+
+	if (++in6m->in6m_refcount == 0) {
+		panic("%s: in6m=%p wraparound refcnt", __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_trace != NULL) {
+		(*in6m->in6m_trace)(in6m, TRUE);
+	}
+	if (!locked)
+		IN6M_UNLOCK(in6m);
+}
+
+void
+in6m_remref(struct in6_multi *in6m, int locked)
+{
+	struct ifmultiaddr *ifma;
+	struct mld_ifinfo *mli;
+
+	if (!locked)
+		IN6M_LOCK_SPIN(in6m);
+	else
+		IN6M_LOCK_ASSERT_HELD(in6m);
+
+	if (in6m->in6m_refcount == 0 || (in6m->in6m_refcount == 1 && locked)) {
+		panic("%s: in6m=%p negative refcnt", __func__, in6m);
+		/* NOTREACHED */
+	} else if (in6m->in6m_trace != NULL) {
+		(*in6m->in6m_trace)(in6m, FALSE);
+	}
+
+	--in6m->in6m_refcount;
+	if (in6m->in6m_refcount > 0) {
+		if (!locked)
+			IN6M_UNLOCK(in6m);
+		return;
+	}
+
+	/*
+	 * Synchronization with in6_mc_get().  In the event the in6m has been
+	 * detached, the underlying ifma would still be in the if_multiaddrs
+	 * list, and thus can be looked up via if_addmulti().  At that point,
+	 * the only way to find this in6m is via ifma_protospec.  To avoid
+	 * race conditions between the last in6m_remref() of that in6m and its
+	 * use via ifma_protospec, in6_multihead lock is used for serialization.
+	 * In order to avoid violating the lock order, we must drop in6m_lock
+	 * before acquiring in6_multihead lock.  To prevent the in6m from being
+	 * freed prematurely, we hold an extra reference.
+	 */
+	++in6m->in6m_refcount;
+	IN6M_UNLOCK(in6m);
+	in6_multihead_lock_shared();
+	IN6M_LOCK_SPIN(in6m);
+	--in6m->in6m_refcount;
+	if (in6m->in6m_refcount > 0) {
+		/* We've lost the race, so abort since in6m is still in use */
+		IN6M_UNLOCK(in6m);
+		in6_multihead_lock_done();
+		/* If it was locked, return it as such */
+		if (locked)
+			IN6M_LOCK(in6m);
+		return;
+	}
+	in6m_purge(in6m);
+	ifma = in6m->in6m_ifma;
+	in6m->in6m_ifma = NULL;
+	in6m->in6m_ifp = NULL;
+	mli = in6m->in6m_mli;
+	in6m->in6m_mli = NULL;
+	IN6M_UNLOCK(in6m);
+	IFMA_LOCK_SPIN(ifma);
+	ifma->ifma_protospec = NULL;
+	IFMA_UNLOCK(ifma);
+	in6_multihead_lock_done();
+
+	in6_multi_free(in6m);
+	if_delmulti_ifma(ifma);
+	/* Release reference held to the underlying ifmultiaddr */
+	IFMA_REMREF(ifma);
+
+	if (mli != NULL)
+		MLI_REMREF(mli);
+}
+
+static void
+in6m_trace(struct in6_multi *in6m, int refhold)
+{
+	struct in6_multi_dbg *in6m_dbg = (struct in6_multi_dbg *)in6m;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
+
+	if (!(in6m->in6m_debug & IFD_DEBUG)) {
+		panic("%s: in6m %p has no debug structure", __func__, in6m);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &in6m_dbg->in6m_refhold_cnt;
+		tr = in6m_dbg->in6m_refhold;
+	} else {
+		cnt = &in6m_dbg->in6m_refrele_cnt;
+		tr = in6m_dbg->in6m_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % IN6M_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
+static struct in6_multi_mship *
+in6_multi_mship_alloc(int how)
+{
+	struct in6_multi_mship *imm;
+
+	imm = (how == M_WAITOK) ? zalloc(imm_zone) : zalloc_noblock(imm_zone);
+	if (imm != NULL)
+		bzero(imm, imm_size);
+
+	return (imm);
+}
+
+static void
+in6_multi_mship_free(struct in6_multi_mship *imm)
+{
+	if (imm->i6mm_maddr != NULL) {
+		panic("%s: i6mm_maddr not NULL for imm=%p", __func__, imm);
+		/* NOTREACHED */
+	}
+	zfree(imm_zone, imm);
+}
+
+void
+in6_multihead_lock_exclusive(void)
+{
+	lck_rw_lock_exclusive(&in6_multihead_lock);
+}
+
+void
+in6_multihead_lock_shared(void)
+{
+	lck_rw_lock_shared(&in6_multihead_lock);
+}
+
+void
+in6_multihead_lock_assert(int what)
+{
+	lck_rw_assert(&in6_multihead_lock, what);
+}
+
+void
+in6_multihead_lock_done(void)
+{
+	lck_rw_done(&in6_multihead_lock);
+}
+
+static struct ip6_msource *
+ip6ms_alloc(int how)
+{
+	struct ip6_msource *i6ms;
+
+	i6ms = (how == M_WAITOK) ? zalloc(ip6ms_zone) :
+	    zalloc_noblock(ip6ms_zone);
+	if (i6ms != NULL)
+		bzero(i6ms, ip6ms_size);
+
+	return (i6ms);
+}
+
+static void
+ip6ms_free(struct ip6_msource *i6ms)
+{
+	zfree(ip6ms_zone, i6ms);
+}
+
+static struct in6_msource *
+in6ms_alloc(int how)
+{
+	struct in6_msource *in6ms;
+
+	in6ms = (how == M_WAITOK) ? zalloc(in6ms_zone) :
+	    zalloc_noblock(in6ms_zone);
+	if (in6ms != NULL)
+		bzero(in6ms, in6ms_size);
+
+	return (in6ms);
+}
+
+static void
+in6ms_free(struct in6_msource *in6ms)
+{
+	zfree(in6ms_zone, in6ms);
+}
+
+#ifdef MLD_DEBUG
+
+static const char *in6m_modestrs[] = { "un\n", "in", "ex" };
+
+static const char *
+in6m_mode_str(const int mode)
+{
+	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
+		return (in6m_modestrs[mode]);
+	return ("??");
+}
+
+static const char *in6m_statestrs[] = {
+	"not-member\n",
+	"silent\n",
+	"idle\n",
+	"lazy\n",
+	"sleeping\n",
+	"awakening\n",
+	"query-pending\n",
+	"sg-query-pending\n",
+	"leaving"
+};
+
+static const char *
+in6m_state_str(const int state)
+{
+	if (state >= MLD_NOT_MEMBER && state <= MLD_LEAVING_MEMBER)
+		return (in6m_statestrs[state]);
+	return ("??");
+}
+
+/*
+ * Dump an in6_multi structure to the console.
+ */
+void
+in6m_print(const struct in6_multi *inm)
+{
+	int t;
+
+	IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm));
+
+	if (mld_debug == 0)
+		return;
+
+	printf("%s: --- begin in6m %p ---\n", __func__, inm);
+	printf("addr %s ifp %p(%s%d) ifma %p\n",
+	    ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp,
+	    inm->in6m_ifp->if_name,
+	    inm->in6m_ifp->if_unit,
+	    inm->in6m_ifma);
+	printf("timer %u state %s refcount %u scq.len %u\n",
+	    inm->in6m_timer,
+	    in6m_state_str(inm->in6m_state),
+	    inm->in6m_refcount,
+	    inm->in6m_scq.ifq_len);
+	printf("mli %p nsrc %lu sctimer %u scrv %u\n",
+	    inm->in6m_mli,
+	    inm->in6m_nsrc,
+	    inm->in6m_sctimer,
+	    inm->in6m_scrv);
+	for (t = 0; t < 2; t++) {
+		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
+		    in6m_mode_str(inm->in6m_st[t].iss_fmode),
+		    inm->in6m_st[t].iss_asm,
+		    inm->in6m_st[t].iss_ex,
+		    inm->in6m_st[t].iss_in,
+		    inm->in6m_st[t].iss_rec);
+	}
+	printf("%s: --- end in6m %p ---\n", __func__, inm);
+}
+
+#else 
+
+void
+in6m_print(__unused const struct in6_multi *inm)
+{
+
+}
+
+#endif
diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c
index 20f39a34d..2ea4d7a5d 100644
--- a/bsd/netinet6/in6_pcb.c
+++ b/bsd/netinet6/in6_pcb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -102,6 +102,8 @@
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/priv.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -160,7 +162,7 @@ in6_pcblookup_local_and_cleanup(
 	if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
 		struct socket *so = inp->inp_socket;
 		
-		lck_mtx_lock(inp->inpcb_mtx);
+		lck_mtx_lock(&inp->inpcb_mtx);
 		
 		if (so->so_usecount == 0) {
 			if (inp->inp_state != INPCB_STATE_DEAD)
@@ -169,23 +171,23 @@ in6_pcblookup_local_and_cleanup(
 			inp = NULL;
 		}
 		else {
-			lck_mtx_unlock(inp->inpcb_mtx);
+			lck_mtx_unlock(&inp->inpcb_mtx);
 		}
 	}
 	
 	return inp;
 }
+
 int
-in6_pcbbind(
-	struct inpcb *inp,
-	struct sockaddr *nam,
-	struct proc *p)
+in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short	lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+	int error;
+	kauth_cred_t cred;
 
 	if (!in6_ifaddrs) /* XXX broken! */
 		return (EADDRNOTAVAIL);
@@ -196,6 +198,8 @@ in6_pcbbind(
 	socket_unlock(so, 0); /* keep reference */
 	lck_rw_lock_exclusive(pcbinfo->mtx);
 	if (nam) {
+		unsigned int outif = 0;
+
 		sin6 = (struct sockaddr_in6 *)nam;
 		if (nam->sa_len != sizeof(*sin6)) {
 			lck_rw_done(pcbinfo->mtx);
@@ -212,7 +216,8 @@ in6_pcbbind(
 		}
 
 		/* KAME hack: embed scopeid */
-		if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) {
+		if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL,
+		    NULL) != 0) {
 			lck_rw_done(pcbinfo->mtx);
 			socket_lock(so, 0);
 			return EINVAL;
@@ -232,10 +237,10 @@ in6_pcbbind(
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-			struct ifaddr *ia = NULL;
+			struct ifaddr *ifa;
 
 			sin6->sin6_port = 0;		/* yech... */
-			if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) {
+			if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) {
 				lck_rw_done(pcbinfo->mtx);
 				socket_lock(so, 0);
 				return(EADDRNOTAVAIL);
@@ -247,26 +252,34 @@ in6_pcbbind(
 			 * We should allow to bind to a deprecated address, since
 			 * the application dare to use it.
 			 */
-			if (ia &&
-			    ((struct in6_ifaddr *)ia)->ia6_flags &
-			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
-			    	ifafree(ia);
-				lck_rw_done(pcbinfo->mtx);
-				socket_lock(so, 0);
-				return(EADDRNOTAVAIL);
+			if (ifa != NULL) {
+				IFA_LOCK_SPIN(ifa);
+				if (((struct in6_ifaddr *)ifa)->ia6_flags &
+				    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
+					IFA_UNLOCK(ifa);
+					IFA_REMREF(ifa);
+					lck_rw_done(pcbinfo->mtx);
+					socket_lock(so, 0);
+					return(EADDRNOTAVAIL);
+				}
+				outif = ifa->ifa_ifp->if_index;
+				IFA_UNLOCK(ifa);
+				IFA_REMREF(ifa);
 			}
-			ifafree(ia);
-			ia = NULL;
 		}
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
-			if (ntohs(lport) < IPV6PORT_RESERVED &&
-                            ((so->so_state & SS_PRIV) == 0)) {
-				lck_rw_done(pcbinfo->mtx);
-				socket_lock(so, 0);
-				return(EACCES);
+			if (ntohs(lport) < IPV6PORT_RESERVED) {
+				cred = kauth_cred_proc_ref(p);
+				error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+				kauth_cred_unref(&cred);
+				if (error != 0) {
+					lck_rw_done(pcbinfo->mtx);
+					socket_lock(so, 0);
+					return(EACCES);
+				}
 			}
 
 			if (so->so_uid &&
@@ -335,6 +348,7 @@ in6_pcbbind(
 			}
 		}
 		inp->in6p_laddr = sin6->sin6_addr;
+		inp->in6p_last_outif = outif;
 	}
 	socket_lock(so, 0);
 	if (lport == 0) {
@@ -349,10 +363,11 @@ in6_pcbbind(
 		if (in_pcbinshash(inp, 1) != 0) {
 			inp->in6p_laddr = in6addr_any;
 			inp->inp_lport = 0;
+			inp->in6p_last_outif = 0;
 			lck_rw_done(pcbinfo->mtx);
 			return (EAGAIN);
 		}
-	}	
+	}
 	lck_rw_done(pcbinfo->mtx);
 	sflt_notify(so, sock_evt_bound, NULL);
 	return(0);
@@ -371,17 +386,14 @@ in6_pcbbind(
  */
 
 int
-in6_pcbladdr(
-	struct inpcb *inp,
-	struct sockaddr *nam,
-	struct in6_addr *plocal_addr6)
+in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
+    struct in6_addr *plocal_addr6, unsigned int *poutif)
 {
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct in6_addr *addr6 = NULL;
 	struct in6_addr src_storage;
-	
-	struct ifnet *ifp = NULL;
 	int error = 0;
+	unsigned int ifscope;
 
 	if (nam->sa_len != sizeof (*sin6))
 		return (EINVAL);
@@ -391,7 +403,7 @@ in6_pcbladdr(
 		return (EADDRNOTAVAIL);
 
 	/* KAME hack: embed scopeid */
-	if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0)
+	if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL, NULL) != 0)
 		return EINVAL;
 
 	if (in6_ifaddrs) {
@@ -402,33 +414,37 @@ in6_pcbladdr(
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			sin6->sin6_addr = in6addr_loopback;
 	}
-	{
-		/*
-		 * XXX: in6_selectsrc might replace the bound local address
-		 * with the address specified by setsockopt(IPV6_PKTINFO).
-		 * Is it the intended behavior?
-		 */
-		addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
-					      inp->in6p_moptions,
-					      &inp->in6p_route,
-					      &inp->in6p_laddr, &src_storage, &error);
-		if (addr6 == 0) {
-			if (error == 0)
-				error = EADDRNOTAVAIL;
-			return(error);
-		}
-		*plocal_addr6 = *addr6;
-		/*
-		 * Don't do pcblookup call here; return interface in
-		 * plocal_addr6
-		 * and exit to caller, that will do the lookup.
-		 */
+
+	ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+	   inp->inp_boundif : IFSCOPE_NONE;
+
+	/*
+	 * XXX: in6_selectsrc might replace the bound local address
+	 * with the address specified by setsockopt(IPV6_PKTINFO).
+	 * Is it the intended behavior?
+	 */
+	addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp,
+	    &inp->in6p_route, NULL, &src_storage, ifscope, &error);
+	if (addr6 == 0) {
+		if (error == 0)
+			error = EADDRNOTAVAIL;
+		return(error);
 	}
 
-	/* XXX: what is the point in doing this? */
-	if (inp->in6p_route.ro_rt)
-		ifp = inp->in6p_route.ro_rt->rt_ifp;
+	if (poutif != NULL) {
+		struct rtentry *rt;
+		if ((rt = inp->in6p_route.ro_rt) != NULL)
+			*poutif = rt->rt_ifp->if_index;
+		else
+			*poutif = 0;
+	}
 
+	*plocal_addr6 = *addr6;
+	/*
+	 * Don't do pcblookup call here; return interface in
+	 * plocal_addr6
+	 * and exit to caller, that will do the lookup.
+	 */
 	return(0);
 }
 
@@ -440,21 +456,22 @@ in6_pcbladdr(
  * then pick one.
  */
 int
-in6_pcbconnect(inp, nam, p)
-	struct inpcb *inp;
-	struct sockaddr *nam;
-	struct proc *p;
+in6_pcbconnect(
+	struct inpcb *inp,
+	struct sockaddr *nam,
+	struct proc *p)
 {
 	struct in6_addr addr6;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct inpcb *pcb;
 	int error;
+	unsigned int outif = 0;
 
 	/*
 	 * Call inner routine, to assign local interface address.
 	 * in6_pcbladdr() may automatically fill in sin6_scope_id.
 	 */
-	if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
+	if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0)
 		return(error);
 	socket_unlock(inp->inp_socket, 0);
 	pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
@@ -474,6 +491,7 @@ in6_pcbconnect(inp, nam, p)
 				return (error);
 		}
 		inp->in6p_laddr = addr6;
+		inp->in6p_last_outif = outif;
 	}
 	if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
 		/*lock inversion issue, mostly with udp multicast packets */
@@ -495,8 +513,8 @@ in6_pcbconnect(inp, nam, p)
 }
 
 void
-in6_pcbdisconnect(inp)
-	struct inpcb *inp;
+in6_pcbdisconnect(
+	struct inpcb *inp)
 {
 	if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
 		/*lock inversion issue, mostly with udp multicast packets */
@@ -515,8 +533,8 @@ in6_pcbdisconnect(inp)
 }
 
 void
-in6_pcbdetach(inp)
-	struct inpcb *inp;
+in6_pcbdetach(
+	struct inpcb *inp)
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
@@ -533,13 +551,15 @@ in6_pcbdetach(inp)
 	inp->inp_state = INPCB_STATE_DEAD;
 
 	if ((so->so_flags & SOF_PCBCLEARING) == 0) {
+		struct ip_moptions *imo;
+		struct ip6_moptions *im6o;
+
 		inp->inp_vflag = 0;
 		so->so_flags |= SOF_PCBCLEARING;
 		inp->inp_gencnt = ++ipi->ipi_gencnt;
 		if (inp->in6p_options)
 			m_freem(inp->in6p_options);
  		ip6_freepcbopts(inp->in6p_outputopts);
- 		ip6_freemoptions(inp->in6p_moptions);
 		if (inp->in6p_route.ro_rt) {
 			rtfree(inp->in6p_route.ro_rt);
 			inp->in6p_route.ro_rt = NULL;
@@ -547,16 +567,23 @@ in6_pcbdetach(inp)
 		/* Check and free IPv4 related resources in case of mapped addr */
 		if (inp->inp_options)
 			(void)m_free(inp->inp_options);
- 		ip_freemoptions(inp->inp_moptions);
+
+		im6o = inp->in6p_moptions;
+		inp->in6p_moptions = NULL;
+		if (im6o != NULL)
+			IM6O_REMREF(im6o);
+
+		imo = inp->inp_moptions;
 		inp->inp_moptions = NULL;
-	
+		if (imo != NULL)
+			IMO_REMREF(imo);
 	}
 }
 
 struct sockaddr *
-in6_sockaddr(port, addr_p)
-	in_port_t port;
-	struct in6_addr *addr_p;
+in6_sockaddr(
+	in_port_t port,
+	struct in6_addr *addr_p)
 {
 	struct sockaddr_in6 *sin6;
 
@@ -579,9 +606,9 @@ in6_sockaddr(port, addr_p)
 }
 
 struct sockaddr *
-in6_v4mapsin6_sockaddr(port, addr_p)
-	in_port_t port;
-	struct in_addr *addr_p;
+in6_v4mapsin6_sockaddr(
+	in_port_t port,
+	struct in_addr *addr_p)
 {
 	struct sockaddr_in sin;
 	struct sockaddr_in6 *sin6_p;
@@ -612,9 +639,9 @@ in6_v4mapsin6_sockaddr(port, addr_p)
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
-in6_setsockaddr(so, nam)
-	struct socket *so;
-	struct sockaddr **nam;
+in6_setsockaddr(
+	struct socket *so,
+	struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in6_addr addr;
@@ -634,9 +661,9 @@ in6_setsockaddr(so, nam)
 }
 
 int
-in6_setpeeraddr(so, nam)
-	struct socket *so;
-	struct sockaddr **nam;
+in6_setpeeraddr(
+	struct socket *so,
+	struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in6_addr addr;
@@ -701,17 +728,15 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
- *
- * Must be called at splnet.
  */
 void
-in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify)
+in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr *dst;
 	const struct sockaddr *src;
 	u_int fport_arg, lport_arg;
 	int cmd;
-//	struct inpcb *(*notify)(struct inpcb *, int);
+	void *cmdarg;
 	void (*notify)(struct inpcb *, int);
 {
 	struct inpcb *inp, *ninp;
@@ -758,6 +783,22 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify)
  		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 
+		/*
+		 * If the error designates a new path MTU for a destination
+		 * and the application (associated with this socket) wanted to
+		 * know the value, notify. Note that we notify for all
+		 * disconnected sockets if the corresponding application
+		 * wanted. This is because some UDP applications keep sending
+		 * sockets disconnected.
+		 * XXX: should we avoid to notify the value to TCP sockets?
+		 */
+		if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
+		    (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+		     IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
+			ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
+					(u_int32_t *)cmdarg);
+		}
+
 		/*
 		 * Detect if we should notify the error. If no source and
 		 * destination ports are specifed, but non-zero flowinfo and
@@ -799,11 +840,11 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify)
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
-in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
-	struct inpcbinfo *pcbinfo;
-	struct in6_addr *laddr;
-	u_int lport_arg;
-	int wild_okay;
+in6_pcblookup_local(
+	struct inpcbinfo *pcbinfo,
+	struct in6_addr *laddr,
+	u_int lport_arg,
+	int wild_okay)
 {
 	struct inpcb *inp;
 	int matchwild = 3, wildcard;
@@ -883,47 +924,6 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 		return (match);
 	}
 }
-#ifndef APPLE
-/* this is not used in Darwin */
-void
-in6_pcbpurgeif0(
-	struct in6pcb *head,
-	struct ifnet *ifp)
-{
-	struct in6pcb *in6p;
-	struct ip6_moptions *im6o;
-	struct in6_multi_mship *imm, *nimm;
-
-	for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
-		im6o = in6p->in6p_moptions;
-		if ((in6p->inp_vflag & INP_IPV6) &&
-		    im6o) {
-			/*
-			 * Unselect the outgoing interface if it is being
-			 * detached.
-			 */
-			if (im6o->im6o_multicast_ifp == ifp)
-				im6o->im6o_multicast_ifp = NULL;
-
-			/*
-			 * Drop multicast group membership if we joined
-			 * through the interface being detached.
-			 * XXX controversial - is it really legal for kernel
-			 * to force this?
-			 */
-			for (imm = im6o->im6o_memberships.lh_first;
-			     imm != NULL; imm = nimm) {
-				nimm = imm->i6mm_chain.le_next;
-				if (imm->i6mm_maddr->in6m_ifp == ifp) {
-					LIST_REMOVE(imm, i6mm_chain);
-					in6_delmulti(imm->i6mm_maddr);
-					FREE(imm, M_IPMADDR);
-				}
-			}
-		}
-	}
-}
-#endif
 
 /*
  * Check for alternatives when higher level complains
@@ -932,8 +932,8 @@ in6_pcbpurgeif0(
  * (by a redirect), time to try a default gateway again.
  */
 void
-in6_losing(in6p)
-	struct inpcb *in6p;
+in6_losing(
+	struct inpcb *in6p)
 {
 	struct rtentry *rt;
 	struct rt_addrinfo info;
@@ -987,6 +987,104 @@ in6_rtchange(
 	}
 }
 
+/*
+ * Check if PCB exists hash list. Also returns uid and gid of socket
+ */
+int
+in6_pcblookup_hash_exists(
+	struct inpcbinfo *pcbinfo,
+	struct in6_addr *faddr,
+	u_int fport_arg,
+	struct in6_addr *laddr,
+	u_int lport_arg,
+	int wildcard,
+	uid_t *uid,
+	gid_t *gid,
+	__unused struct ifnet *ifp)
+{
+	struct inpcbhead *head;
+	struct inpcb *inp;
+	u_short fport = fport_arg, lport = lport_arg;
+	int faith;
+	int found;
+
+#if defined(NFAITH) && NFAITH > 0
+	faith = faithprefix(laddr);
+#else
+	faith = 0;
+#endif
+
+	*uid = UID_MAX;
+	*gid = GID_MAX;
+
+	lck_rw_lock_shared(pcbinfo->mtx);
+
+	/*
+	 * First look for an exact match.
+	 */
+	head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
+					      lport, fport,
+					      pcbinfo->hashmask)];
+	LIST_FOREACH(inp, head, inp_hash) {
+		if ((inp->inp_vflag & INP_IPV6) == 0)
+			continue;
+		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			if ((found = (inp->inp_socket != NULL))) {
+				/*
+				 * Found. Check if pcb is still valid
+				 */
+				*uid = inp->inp_socket->so_uid;
+				*gid = inp->inp_socket->so_gid;
+			}
+			lck_rw_done(pcbinfo->mtx);
+			return (found);
+		}
+	}
+	if (wildcard) {
+		struct inpcb *local_wild = NULL;
+
+		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+						      pcbinfo->hashmask)];
+		LIST_FOREACH(inp, head, inp_hash) {
+			if ((inp->inp_vflag & INP_IPV6) == 0)
+				continue;
+			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+			    inp->inp_lport == lport) {
+				if (faith && (inp->inp_flags & INP_FAITH) == 0)
+					continue;
+				if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
+						       laddr)) {
+					if ((found = (inp->inp_socket != NULL))) {
+						*uid = inp->inp_socket->so_uid;
+						*gid = inp->inp_socket->so_gid;
+					}
+					lck_rw_done(pcbinfo->mtx);
+					return (found);
+				}
+				else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+					local_wild = inp;
+			}
+		}
+		if (local_wild) {
+			if ((found = (local_wild->inp_socket != NULL))) {
+				*uid = local_wild->inp_socket->so_uid;
+				*gid = local_wild->inp_socket->so_gid;
+			}
+			lck_rw_done(pcbinfo->mtx);
+			return (found);
+		}
+	}
+
+	/*
+	 * Not found.
+	 */
+	lck_rw_done(pcbinfo->mtx);
+	return (0);
+}
+
 /*
  * Lookup PCB in hash list.
  */
diff --git a/bsd/netinet6/in6_pcb.h b/bsd/netinet6/in6_pcb.h
index 58476cc5e..d83836bbc 100644
--- a/bsd/netinet6/in6_pcb.h
+++ b/bsd/netinet6/in6_pcb.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -104,13 +104,16 @@ extern int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *);
 extern int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *);
 extern void in6_pcbdetach(struct inpcb *);
 extern void in6_pcbdisconnect(struct inpcb *);
-extern int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *);
+extern int in6_pcbladdr(struct inpcb *, struct sockaddr *,
+    struct in6_addr *, unsigned int *);
 extern struct inpcb *in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *,
     u_int, int);
 extern struct inpcb *in6_pcblookup_hash(struct inpcbinfo *, struct in6_addr *,
     u_int, struct in6_addr *, u_int, int, struct ifnet *);
+extern int in6_pcblookup_hash_exists(struct inpcbinfo *, struct in6_addr *,
+    u_int, struct in6_addr *, u_int, int, uid_t *, gid_t *, struct ifnet *);
 extern void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *, u_int,
-    const struct sockaddr *, u_int, int, void (*)(struct inpcb *, int));
+    const struct sockaddr *, u_int, int, void *, void (*)(struct inpcb *, int));
 extern void in6_rtchange(struct inpcb *, int);
 extern struct sockaddr *in6_sockaddr(in_port_t port, struct in6_addr *addr_p);
 extern struct sockaddr *in6_v4mapsin6_sockaddr(in_port_t port,
@@ -119,9 +122,6 @@ extern int in6_setpeeraddr(struct socket *so, struct sockaddr **nam);
 extern int in6_setsockaddr(struct socket *so, struct sockaddr **nam);
 extern int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam);
 extern int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam);
-extern struct in6_addr *in6_selectsrc(struct sockaddr_in6 *,
-    struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
-    struct in6_addr *, struct in6_addr *, int *);
 extern int in6_selecthlim(struct in6pcb *, struct ifnet *);
 extern int in6_pcbsetport(struct in6_addr *, struct inpcb *,
     struct proc *, int);
diff --git a/bsd/netinet6/in6_prefix.c b/bsd/netinet6/in6_prefix.c
index 891917965..da85f486d 100644
--- a/bsd/netinet6/in6_prefix.c
+++ b/bsd/netinet6/in6_prefix.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -227,18 +227,25 @@ search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr)
 	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		if (ipr->ipr_plen <=
-		    in6_matchlen(&ipr->ipr_prefix.sin6_addr, IFA_IN6(ifa)))
+		    in6_matchlen(&ipr->ipr_prefix.sin6_addr, IFA_IN6(ifa))) {
+			/* keep it locked */
 			break;
+		}
+		IFA_UNLOCK(ifa);
 	}
 	if (ifa == NULL) {
 		ifnet_lock_done(ifp);
 		return NULL;
 	}
-
+	IFA_LOCK_ASSERT_HELD(ifa);
 	rpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr);
+	IFA_UNLOCK(ifa);
 	if (rpp != 0) {
 		ifnet_lock_done(ifp);
 		return rpp;
@@ -302,24 +309,31 @@ mark_matched_prefixes(u_int32_t cmd, struct ifnet *ifp, struct in6_rrenumreq *ir
 	{
 		struct rr_prefix *rpp;
 
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 		matchlen = in6_matchlen(&irr->irr_matchprefix.sin6_addr,
 					IFA_IN6(ifa));
 		if (irr->irr_m_minlen > matchlen ||
-		    irr->irr_m_maxlen < matchlen || irr->irr_m_len > matchlen)
- 			continue;
+		    irr->irr_m_maxlen < matchlen || irr->irr_m_len > matchlen) {
+			IFA_UNLOCK(ifa);
+			continue;
+		}
 		rpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr);
 		if (rpp != 0) {
 			matched = 1;
 			rpp->rp_statef_addmark = 1;
 			if (cmd == SIOCCIFPREFIX_IN6)
 				rpp->rp_statef_delmark = 1;
-		} else
+		} else {
 			log(LOG_WARNING, "in6_prefix.c: mark_matched_prefixes:"
 			    "no back pointer to ifprefix for %s. "
 			    "ND autoconfigured addr?\n",
 			    ip6_sprintf(IFA_IN6(ifa)));
+		}
+		IFA_UNLOCK(ifa);
 	}
 	ifnet_lock_done(ifp);
 	return matched;
@@ -447,15 +461,17 @@ assign_ra_entry(struct rr_prefix *rpp, int iilen, struct in6_ifaddr *ia)
 		return error;
 
 	/* copy interface id part */
+	IFA_LOCK(&ia->ia_ifa);
 	bit_copy((caddr_t)&rap->ra_ifid, sizeof(rap->ra_ifid) << 3,
-		 (caddr_t)IA6_IN6(ia),
-		 sizeof(*IA6_IN6(ia)) << 3, rpp->rp_plen, iilen);
+	    (caddr_t)IA6_IN6(ia), sizeof(*IA6_IN6(ia)) << 3,
+	    rpp->rp_plen, iilen);
 	/* link to ia, and put into list */
 	rap->ra_addr = ia;
-	ifaref(&rap->ra_addr->ia_ifa);
+	IFA_ADDREF_LOCKED(&rap->ra_addr->ia_ifa);
 #if 0 /* Can't do this now, because rpp may be on th stack. should fix it? */
 	ia->ia6_ifpr = rp2ifpr(rpp);
 #endif
+	IFA_UNLOCK(&ia->ia_ifa);
 	lck_mtx_lock(prefix6_mutex);
 	LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry);
 	lck_mtx_unlock(prefix6_mutex);
@@ -478,9 +494,11 @@ in6_prefix_add_llifid(__unused int iilen, struct in6_ifaddr *ia)
 	if ((error = create_ra_entry(&rap)) != 0)
 		return(error);
 	/* copy interface id part */
+	IFA_LOCK(&ia->ia_ifa);
 	bit_copy((caddr_t)&rap->ra_ifid, sizeof(rap->ra_ifid) << 3,
 		 (caddr_t)IA6_IN6(ia), sizeof(*IA6_IN6(ia)) << 3,
 		 64, (sizeof(rap->ra_ifid) << 3) - 64);
+	IFA_UNLOCK(&ia->ia_ifa);
 	/* XXX: init dummy so */
 	bzero(&so, sizeof(so));
 	/* insert into list */
@@ -500,6 +518,7 @@ in6_prefix_add_llifid(__unused int iilen, struct in6_ifaddr *ia)
 	return 0;
 }
 
+#if 0
 /*
  * add an address to an interface.  if the interface id portion is new,
  * we will add new interface address (prefix database + new interface id).
@@ -507,17 +526,24 @@ in6_prefix_add_llifid(__unused int iilen, struct in6_ifaddr *ia)
 int
 in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia)
 {
-	int plen = (sizeof(*IA6_IN6(ia)) << 3) - iilen;
+	struct in6_addr addr;
+	int plen;
 	struct ifprefix *ifpr;
 	struct rp_addr *rap;
 	int error = 0;
 
-	if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
+	IFA_LOCK_SPIN(&ia->ia_ifa);
+	addr = *IA6_IN6(ia);
+	plen = (sizeof(*IA6_IN6(ia)) << 3) - iilen;
+	IFA_UNLOCK(&ia->ia_ifa);
+
+	if (IN6_IS_ADDR_LINKLOCAL(&addr))
 		return(in6_prefix_add_llifid(iilen, ia));
-	ifpr = in6_prefixwithifp(ia->ia_ifp, plen, IA6_IN6(ia));
+	ifpr = in6_prefixwithifp(ia->ia_ifp, plen, &addr);
 	if (ifpr == NULL) {
 		struct rr_prefix rp;
 		struct socket so;
+		struct ifnet *ifp;
 		int pplen = (plen == 128) ? 64 : plen; /* XXX hardcoded 64 is bad */
 
 		/* allocate a prefix for ia, with default properties */
@@ -525,14 +551,12 @@ in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia)
 		/* init rp */
 		bzero(&rp, sizeof(rp));
 		rp.rp_type = IN6_PREFIX_RR;
-		rp.rp_ifp = ia->ia_ifp;
+		rp.rp_ifp = ifp = ia->ia_ifp;
 		rp.rp_plen = pplen;
 		rp.rp_prefix.sin6_len = sizeof(rp.rp_prefix);
 		rp.rp_prefix.sin6_family = AF_INET6;
 		bit_copy((char *)RP_IN6(&rp), sizeof(*RP_IN6(&rp)) << 3,
-			 (char *)&ia->ia_addr.sin6_addr,
-			 sizeof(ia->ia_addr.sin6_addr) << 3,
-			 0, pplen);
+			 (char *)&addr, sizeof (addr) << 3, 0, pplen);
 		rp.rp_vltime = rp.rp_pltime = RR_INFINITE_LIFETIME;
 		rp.rp_raf_onlink = 1;
 		rp.rp_raf_auto = 1;
@@ -541,7 +565,9 @@ in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia)
 		rp.rp_origin = PR_ORIG_RR; /* can be renumbered */
 
 		/* create ra_entry */
+		ifnet_lock_shared(ifp);
 		error = link_stray_ia6s(&rp);
+		ifnet_lock_done(ifp);
 		if (error != 0) {
 			free_rp_entries(&rp);
 			return error;
@@ -559,53 +585,69 @@ in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia)
 			return error;
 
 		/* search again */
-		ifpr = in6_prefixwithifp(ia->ia_ifp, pplen, IA6_IN6(ia));
+		ifpr = in6_prefixwithifp(ia->ia_ifp, pplen, &addr);
 		if (ifpr == NULL)
 			return 0;
 	}
-	rap = search_ifidwithprefix(ifpr2rp(ifpr), IA6_IN6(ia));
+	rap = search_ifidwithprefix(ifpr2rp(ifpr), &addr);
 	if (rap != NULL) {
 		if (rap->ra_addr == NULL) {
 			rap->ra_addr = ia;
-			ifaref(&rap->ra_addr->ia_ifa);
+			IFA_ADDREF(&rap->ra_addr->ia_ifa);
 		} else if (rap->ra_addr != ia) {
 			/* There may be some inconsistencies between addrs. */
 			log(LOG_ERR, "ip6_prefix.c: addr %s/%d matched prefix"
 			    " already has another ia %p(%s) on its ifid list\n",
-			    ip6_sprintf(IA6_IN6(ia)), plen,
-			    rap->ra_addr,
+			    ip6_sprintf(&addr), plen, rap->ra_addr,
 			    ip6_sprintf(IA6_IN6(rap->ra_addr)));
 			return EADDRINUSE /* XXX */;
 		}
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		ia->ia6_ifpr = ifpr;
+		IFA_UNLOCK(&ia->ia_ifa);
 		return 0;
 	}
 	error = assign_ra_entry(ifpr2rp(ifpr), iilen, ia);
-	if (error == 0)
+	if (error == 0) {
+		IFA_LOCK_SPIN(&ia->ia_ifa);
 		ia->ia6_ifpr = ifpr;
+		IFA_UNLOCK(&ia->ia_ifa);
+	}
 	return (error);
 }
+#endif
 
+#if 0
 void
 in6_prefix_remove_ifid(__unused int iilen, struct in6_ifaddr *ia)
 {
 	struct rp_addr *rap;
+	struct in6_addr addr;
+	struct ifprefix *ifpr;
 
-	if (ia->ia6_ifpr == NULL)
+	IFA_LOCK_SPIN(&ia->ia_ifa);
+	if ((ifpr = ia->ia6_ifpr) == NULL) {
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
-	rap = search_ifidwithprefix(ifpr2rp(ia->ia6_ifpr), IA6_IN6(ia));
+	}
+	addr = *IA6_IN6(ia);
+	IFA_UNLOCK(&ia->ia_ifa);
+	rap = search_ifidwithprefix(ifpr2rp(ifpr), &addr);
 	if (rap != NULL) {
 		lck_mtx_lock(prefix6_mutex);
 		LIST_REMOVE(rap, ra_entry);
 		lck_mtx_unlock(prefix6_mutex);
-		if (rap->ra_addr)
-			ifafree(&rap->ra_addr->ia_ifa);
+		if (rap->ra_addr) {
+			IFA_REMREF(&rap->ra_addr->ia_ifa);
+			rap->ra_addr = NULL;
+		}
 		FREE(rap, M_RR_ADDR);
 	}
 
-	if (LIST_EMPTY(&ifpr2rp(ia->ia6_ifpr)->rp_addrhead))
-		rp_remove(ifpr2rp(ia->ia6_ifpr));
+	if (LIST_EMPTY(&ifpr2rp(ifpr)->rp_addrhead))
+		rp_remove(ifpr2rp(ifpr));
 }
+#endif
 
 void
 in6_purgeprefix(
@@ -665,20 +707,29 @@ add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap)
 
 	ia6 = in6ifa_ifpwithaddr(rpp->rp_ifp, &ifra.ifra_addr.sin6_addr);
 	if (ia6 != NULL) {
+		struct in6_ifaddr *ria6 = NULL;
+
+		IFA_LOCK(&ia6->ia_ifa);
 		if (ia6->ia6_ifpr == NULL) {
 			/* link this addr and the prefix each other */
-			if (rap->ra_addr)
-				ifafree(&rap->ra_addr->ia_ifa);
+			if (rap->ra_addr != NULL)
+				ria6 = rap->ra_addr;
 			/* Reference held in in6ifa_ifpwithaddr() */
 			rap->ra_addr = ia6;
 			ia6->ia6_ifpr = rp2ifpr(rpp);
+			IFA_UNLOCK(&ia6->ia_ifa);
+			if (ria6 != NULL)
+				IFA_REMREF(&ria6->ia_ifa);
 			return;
 		}
 		if (ia6->ia6_ifpr == rp2ifpr(rpp)) {
-			if (rap->ra_addr)
-				ifafree(&rap->ra_addr->ia_ifa);
+			if (rap->ra_addr != NULL)
+				ria6 = rap->ra_addr;
 			/* Reference held in in6ifa_ifpwithaddr() */
 			rap->ra_addr = ia6;
+			IFA_UNLOCK(&ia6->ia_ifa);
+			if (ria6 != NULL)
+				IFA_REMREF(&ria6->ia_ifa);
 			return;
 		}
 		/*
@@ -697,7 +748,8 @@ add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap)
 		    ip6_sprintf(&ifra.ifra_addr.sin6_addr), rpp->rp_plen,
 		    ip6_sprintf(IA6_IN6(ia6)),
 		    in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL));
-		ifafree(&ia6->ia_ifa);
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
 		return;
 	}
 	/* propagate ANYCAST flag if it is set for ancestor addr */
@@ -803,8 +855,10 @@ rrpr_update(struct socket *so, struct rr_prefix *new)
 			LIST_REMOVE(rap, ra_entry);
 			if (search_ifidwithprefix(rpp, &rap->ra_ifid)
 			    != NULL) {
-				if (rap->ra_addr)
-					ifafree(&rap->ra_addr->ia_ifa);
+				if (rap->ra_addr) {
+					IFA_REMREF(&rap->ra_addr->ia_ifa);
+					rap->ra_addr = NULL;
+				}
 				FREE(rap, M_RR_ADDR);
 				continue;
 			}
@@ -870,11 +924,14 @@ rrpr_update(struct socket *so, struct rr_prefix *new)
 	 * init the prefix pointer.
 	 */
 	lck_mtx_lock(prefix6_mutex);
-	LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry)
-	{
-		if (rap->ra_addr != NULL) {
-			if (rap->ra_addr->ia6_ifpr == NULL)
-				rap->ra_addr->ia6_ifpr = rp2ifpr(rpp);
+	LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry) {
+		struct in6_ifaddr *ia6;
+
+		if ((ia6 = rap->ra_addr) != NULL) {
+			IFA_LOCK(&ia6->ia_ifa);
+			if (ia6->ia6_ifpr == NULL)
+				ia6->ia6_ifpr = rp2ifpr(rpp);
+			IFA_UNLOCK(&ia6->ia_ifa);
 			continue;
 		}
 		add_each_addr(so, rpp, rap);
@@ -967,13 +1024,20 @@ init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr,
 	{
 		struct rp_addr *rap;
 		int error = 0;
+		struct in6_ifaddr *ia6;
 
 		if ((error = create_ra_entry(&rap)) != 0)
 			return error;
 		rap->ra_ifid = orap->ra_ifid;
-		rap->ra_flags.anycast = (orap->ra_addr != NULL &&
-					 (orap->ra_addr->ia6_flags &
-					  IN6_IFF_ANYCAST) != 0) ? 1 : 0;
+		ia6 = orap->ra_addr->ia_ifa;
+		if (ia6 != NULL) {
+			IFA_LOCK(&ia6->ia_ifa);
+			rap->ra_flags.anycast =
+			    ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0) ? 1 : 0;
+			IFA_UNLOCK(&ia6->ia_ifa);
+		} else {
+			rap->ra_flags.anycast = 0;
+		}
 		LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry);
 	}
 	rpp->rp_vltime = irr->irr_vltime;
@@ -1005,8 +1069,10 @@ free_rp_entries(struct rr_prefix *rpp)
 
 		rap = LIST_FIRST(&rpp->rp_addrhead);
 		LIST_REMOVE(rap, ra_entry);
-		if (rap->ra_addr)
-			ifafree(&rap->ra_addr->ia_ifa);
+		if (rap->ra_addr) {
+			IFA_REMREF(&rap->ra_addr->ia_ifa);
+			rap->ra_addr = NULL;
+		}
 		FREE(rap, M_RR_ADDR);
 	}
 	lck_mtx_unlock(prefix6_mutex);
@@ -1054,10 +1120,14 @@ unprefer_prefix(struct rr_prefix *rpp)
 	lck_mtx_lock(prefix6_mutex);
 	for (rap = rpp->rp_addrhead.lh_first; rap != NULL;
 	     rap = rap->ra_entry.le_next) {
-		if (rap->ra_addr == NULL)
+		struct in6_ifaddr *ia6;
+
+		if ((ia6 = rap->ra_addr) == NULL)
 			continue;
-		rap->ra_addr->ia6_lifetime.ia6t_preferred = timenow.tv_sec;
-		rap->ra_addr->ia6_lifetime.ia6t_pltime = 0;
+		IFA_LOCK(&ia6->ia_ifa);
+		ia6->ia6_lifetime.ia6t_preferred = timenow.tv_sec;
+		ia6->ia6_lifetime.ia6t_pltime = 0;
+		IFA_UNLOCK(&ia6->ia_ifa);
 	}
 	lck_mtx_unlock(prefix6_mutex);
 
@@ -1074,20 +1144,24 @@ delete_each_prefix(struct rr_prefix *rpp, u_char origin)
 	lck_mtx_lock(prefix6_mutex);
 	while (rpp->rp_addrhead.lh_first != NULL) {
 		struct rp_addr *rap;
+		struct in6_ifaddr *ia6;
 
 		rap = LIST_FIRST(&rpp->rp_addrhead);
 		if (rap == NULL) {
 			break;
 		}
 		LIST_REMOVE(rap, ra_entry);
-		if (rap->ra_addr == NULL) {
+		if ((ia6 = rap->ra_addr) == NULL) {
 			FREE(rap, M_RR_ADDR);
 			continue;
 		}
-		rap->ra_addr->ia6_ifpr = NULL;
+		rap->ra_addr = NULL;
+		IFA_LOCK(&ia6->ia_ifa);
+		ia6->ia6_ifpr = NULL;
+		IFA_UNLOCK(&ia6->ia_ifa);
 
-		in6_purgeaddr(&rap->ra_addr->ia_ifa, 0);
-		ifafree(&rap->ra_addr->ia_ifa);
+		in6_purgeaddr(&ia6->ia_ifa, 0);
+		IFA_REMREF(&ia6->ia_ifa);
 		FREE(rap, M_RR_ADDR);
 	}
 	rp_remove(rpp);
@@ -1122,6 +1196,8 @@ link_stray_ia6s(struct rr_prefix *rpp)
 {
 	struct ifaddr *ifa;
 
+	ifnet_lock_assert(rpp->rp_ifp, IFNET_LCK_ASSERT_OWNED);
+
 	for (ifa = rpp->rp_ifp->if_addrlist.tqh_first; ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 	{
@@ -1129,11 +1205,15 @@ link_stray_ia6s(struct rr_prefix *rpp)
 		struct rr_prefix *orpp;
 		int error = 0;
 
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-		if (rpp->rp_plen > in6_matchlen(RP_IN6(rpp), IFA_IN6(ifa)))
+		}
+		if (rpp->rp_plen > in6_matchlen(RP_IN6(rpp), IFA_IN6(ifa))) {
+			IFA_UNLOCK(ifa);
 			continue;
-
+		}
 		orpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr);
 		if (orpp != NULL) {
 			if (!in6_are_prefix_equal(RP_IN6(orpp), RP_IN6(rpp),
@@ -1144,8 +1224,10 @@ link_stray_ia6s(struct rr_prefix *rpp)
 				    ip6_sprintf(IFA_IN6(ifa)), orpp->rp_plen,
 				    ip6_sprintf(RP_IN6(rpp)),
 				    rpp->rp_plen);
+			IFA_UNLOCK(ifa);
 			continue;
 		}
+		IFA_UNLOCK(ifa);
 		if ((error = assign_ra_entry(rpp,
 					      (sizeof(rap->ra_ifid) << 3) -
 					      rpp->rp_plen,
@@ -1237,23 +1319,28 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data,
 		rp_tmp.rp_origin = ipr->ipr_origin;
 
 		/* create rp_addr entries, usually at least for lladdr */
+		ifnet_lock_shared(ifp);
 		if ((error = link_stray_ia6s(&rp_tmp)) != 0) {
+			ifnet_lock_done(ifp);
 			free_rp_entries(&rp_tmp);
 			break;
 		}
-		ifnet_lock_exclusive(ifp);
 		for (ifa = ifp->if_addrlist.tqh_first;
 		     ifa;
 		     ifa = ifa->ifa_list.tqe_next)
 		{
-			if (ifa->ifa_addr == NULL)
-				continue;	/* just for safety */
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
-			if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa)) == 0)
+			}
+			if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa)) == 0) {
+				IFA_UNLOCK(ifa);
 				continue;
-
+			}
 			if ((error = create_ra_entry(&rap)) != 0) {
+				IFA_UNLOCK(ifa);
+				ifnet_lock_done(ifp);
 				free_rp_entries(&rp_tmp);
 				goto bad;
 			}
@@ -1264,6 +1351,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data,
 				 sizeof(*IFA_IN6(ifa)) << 3,
 				 rp_tmp.rp_plen,
 				 (sizeof(rap->ra_ifid) << 3) - rp_tmp.rp_plen);
+			IFA_UNLOCK(ifa);
 			/* insert into list */
 			lck_mtx_lock(prefix6_mutex);
 			LIST_INSERT_HEAD(&rp_tmp.rp_addrhead, rap, ra_entry);
@@ -1292,30 +1380,3 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data,
 }
 #endif
 
-void
-in6_rr_timer(__unused void *ignored_arg)
-{
-	struct rr_prefix *rpp;
-	struct timeval timenow;
-
-	getmicrotime(&timenow);
-
-	/* expire */
-	lck_mtx_lock(prefix6_mutex);
-	rpp = LIST_FIRST(&rr_prefix);
-	while (rpp) {
-		if (rpp->rp_expire && rpp->rp_expire < timenow.tv_sec) {
-			struct rr_prefix *next_rpp;
-
-			next_rpp = LIST_NEXT(rpp, rp_entry);
-			delete_each_prefix(rpp, PR_ORIG_KERNEL);
-			rpp = next_rpp;
-			continue;
-		}
-		if (rpp->rp_preferred && rpp->rp_preferred < timenow.tv_sec)
-			unprefer_prefix(rpp);
-		rpp = LIST_NEXT(rpp, rp_entry);
-	}
-	lck_mtx_unlock(prefix6_mutex);
-	timeout(in6_rr_timer, (caddr_t)0, ip6_rr_prune * hz);
-}
diff --git a/bsd/netinet6/in6_prefix.h b/bsd/netinet6/in6_prefix.h
index f69562ae4..fa3567676 100644
--- a/bsd/netinet6/in6_prefix.h
+++ b/bsd/netinet6/in6_prefix.h
@@ -85,7 +85,6 @@ LIST_HEAD(rr_prhead, rr_prefix);
 
 extern struct rr_prhead rr_prefix;
 
-void in6_rr_timer(void *);
 int delete_each_prefix (struct rr_prefix *rpp, u_char origin);
 
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c
index b7dbae799..c0228feeb 100644
--- a/bsd/netinet6/in6_proto.c
+++ b/bsd/netinet6/in6_proto.c
@@ -129,6 +129,7 @@
 #include <netinet6/pim6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_prefix.h>
+#include <netinet6/mld6_var.h>
 
 #include <netinet6/ip6_mroute.h>
 
@@ -217,7 +218,7 @@ struct ip6protosw inet6sw[] = {
 { SOCK_RAW,	&inet6domain,	IPPROTO_ICMPV6,	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   icmp6_input,	rip6_pr_output,	rip6_ctlinput,	rip6_ctloutput,
   0,
-  icmp6_init,	icmp6_fasttimo,	0,		0,
+  icmp6_init,	0,		mld_slowtimo,		0,
   0,
   &rip6_usrreqs,
   0,		rip_unlock,		0,
@@ -226,7 +227,7 @@ struct ip6protosw inet6sw[] = {
 { SOCK_DGRAM,     &inet6domain,   IPPROTO_ICMPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   icmp6_input,  rip6_pr_output, rip6_ctlinput,  icmp6_dgram_ctloutput,
   0,
-  icmp6_init,   icmp6_fasttimo, 0,              0,
+  icmp6_init,   0, 		mld_slowtimo,              0,
   0,
   &icmp6_dgram_usrreqs,
   0,            rip_unlock,             0,
@@ -398,7 +399,7 @@ int	ip6_accept_rtadv = 0;	/* "IPV6FORWARDING ? 0 : 1" is dangerous */
 int	ip6_maxfragpackets;	/* initialized in frag6.c:frag6_init() */
 int	ip6_maxfrags;
 int	ip6_log_interval = 5;
-int	ip6_hdrnestlimit = 50;	/* appropriate? */
+int	ip6_hdrnestlimit = 15;	/* How many header options will we process? */
 int	ip6_dad_count = 1;	/* DupAddrDetectionTransmits */
 u_int32_t ip6_flow_seq;
 int	ip6_auto_flowlabel = 1;
@@ -406,16 +407,19 @@ int	ip6_gif_hlim = 0;
 int	ip6_use_deprecated = 1;	/* allow deprecated addr (RFC2462 5.5.4) */
 int	ip6_rr_prune = 5;	/* router renumbering prefix
 				 * walk list every 5 sec.    */
-int	ip6_v6only = 0;		/* Mapped addresses on by default -  Radar 3347718 */
+int	ip6_mcast_pmtu = 0;	/* enable pMTU discovery for multicast? */
+int	ip6_v6only = 0;		/* Mapped addresses off by default -  Radar 3347718  -- REVISITING FOR 10.7 -- TESTING WITH MAPPED@ OFF */
 
 int	ip6_neighborgcthresh = 1024;	/* Threshold # of NDP entries for GC */
 int	ip6_maxifprefixes = 16;		/* Max acceptable prefixes via RA per IF */
 int	ip6_maxifdefrouters = 16;	/* Max acceptable def routers via RA */
 int	ip6_maxdynroutes = 1024;	/* Max # of routes created via redirect */
+int	ip6_only_allow_rfc4193_prefix = 0;	/* Only allow RFC4193 style Unique Local IPv6 Unicast prefixes */
 
 u_int32_t ip6_id = 0UL;
 int	ip6_keepfaith = 0;
 time_t	ip6_log_time = (time_t)0L;
+int	nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (as in RFC 4861) */
 
 /* icmp6 */
 /*
@@ -450,7 +454,7 @@ int	udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
 /*
  * sysctl related items.
  */
-SYSCTL_NODE(_net,	PF_INET6,	inet6,	CTLFLAG_RW,	0,
+SYSCTL_NODE(_net,	PF_INET6,	inet6,	CTLFLAG_RW | CTLFLAG_LOCKED,	0,
 	"Internet6 Family");
 
 /* net.inet6 */
@@ -475,7 +479,8 @@ sysctl_ip6_temppltime SYSCTL_HANDLER_ARGS
 		return (error);
 	old = ip6_temp_preferred_lifetime;
 	error = SYSCTL_IN(req, arg1, sizeof(int));
-	if (ip6_temp_preferred_lifetime <
+	if (ip6_temp_preferred_lifetime > ND6_MAX_LIFETIME ||
+	    ip6_temp_preferred_lifetime <
 	    ip6_desync_factor + ip6_temp_regen_advance) {
 		ip6_temp_preferred_lifetime = old;
 		return(EINVAL);
@@ -495,7 +500,8 @@ sysctl_ip6_tempvltime SYSCTL_HANDLER_ARGS
 		return (error);
 	old = ip6_temp_valid_lifetime;
 	error = SYSCTL_IN(req, arg1, sizeof(int));
-	if (ip6_temp_valid_lifetime < ip6_temp_preferred_lifetime) {
+	if (ip6_temp_valid_lifetime > ND6_MAX_LIFETIME ||
+	    ip6_temp_valid_lifetime < ip6_temp_preferred_lifetime) {
 		ip6_temp_preferred_lifetime = old;
 		return(EINVAL);
 	}
@@ -503,90 +509,103 @@ sysctl_ip6_tempvltime SYSCTL_HANDLER_ARGS
 }
 
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING,
-	forwarding, CTLFLAG_RW, 	&ip6_forwarding,	0, "");
+	forwarding, CTLFLAG_RW | CTLFLAG_LOCKED, 	&ip6_forwarding,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS,
-	redirect, CTLFLAG_RW,		&ip6_sendredirects,	0, "");
+	redirect, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_sendredirects,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM,
-	hlim, CTLFLAG_RW,		&ip6_defhlim,	0, "");
-SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD,
+	hlim, CTLFLAG_RW | CTLFLAG_LOCKED,		&ip6_defhlim,	0, "");
+SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 	&ip6stat, ip6stat, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS,
-	maxfragpackets, CTLFLAG_RW,	&ip6_maxfragpackets,	0, "");
+	maxfragpackets, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_maxfragpackets,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS,
-        maxfrags, CTLFLAG_RW,           &ip6_maxfrags,  0, "");
+        maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED,           &ip6_maxfrags,  0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV,
-	accept_rtadv, CTLFLAG_RW,	&ip6_accept_rtadv,	0, "");
+	accept_rtadv, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&ip6_accept_rtadv,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH,
-	keepfaith, CTLFLAG_RW,		&ip6_keepfaith,	0, "");
+	keepfaith, CTLFLAG_RW | CTLFLAG_LOCKED,		&ip6_keepfaith,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL,
-	log_interval, CTLFLAG_RW,	&ip6_log_interval,	0, "");
+	log_interval, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_log_interval,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT,
-	hdrnestlimit, CTLFLAG_RW,	&ip6_hdrnestlimit,	0, "");
+	hdrnestlimit, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_hdrnestlimit,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT,
-	dad_count, CTLFLAG_RW,	&ip6_dad_count,	0, "");
+	dad_count, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_dad_count,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL,
-	auto_flowlabel, CTLFLAG_RW,	&ip6_auto_flowlabel,	0, "");
+	auto_flowlabel, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_auto_flowlabel,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM,
-	defmcasthlim, CTLFLAG_RW,	&ip6_defmcasthlim,	0, "");
+	defmcasthlim, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_defmcasthlim,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM,
-	gifhlim, CTLFLAG_RW,	&ip6_gif_hlim,			0, "");
+	gifhlim, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_gif_hlim,			0, "");
 SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION,
-	kame_version, CTLFLAG_RD, (void *)((uintptr_t)(__KAME_VERSION)),		0, "");
+	kame_version, CTLFLAG_RD | CTLFLAG_LOCKED, (void *)((uintptr_t)(__KAME_VERSION)),		0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED,
-	use_deprecated, CTLFLAG_RW,	&ip6_use_deprecated,	0, "");
+	use_deprecated, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_use_deprecated,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE,
-	rr_prune, CTLFLAG_RW,	&ip6_rr_prune,			0, "");
+	rr_prune, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_rr_prune,			0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR,
-	use_tempaddr, CTLFLAG_RW, &ip6_use_tempaddr,		0, "");
+	use_tempaddr, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_use_tempaddr,		0, "");
 SYSCTL_OID(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
-	   CTLTYPE_INT|CTLFLAG_RW, &ip6_temp_preferred_lifetime, 0,
+	   CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_temp_preferred_lifetime, 0,
 	   sysctl_ip6_temppltime, "I", "");
 SYSCTL_OID(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
-	   CTLTYPE_INT|CTLFLAG_RW, &ip6_temp_valid_lifetime, 0,
+	   CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_temp_valid_lifetime, 0,
 	   sysctl_ip6_tempvltime, "I", "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY,
-	v6only,	CTLFLAG_RW,	&ip6_v6only,			0, "");
+	v6only,	CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_v6only,		0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL,
-	auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal,	0, "");
-SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
+	auto_linklocal, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_auto_linklocal,	0, "");
+SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 	&rip6stat, rip6stat, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR,
+	prefer_tempaddr, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_prefer_tempaddr,	0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE,
+	use_defaultzone, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_use_defzone,		0,"");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU,
+	mcast_pmtu, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_mcast_pmtu,	0, "");
 #if MROUTING
-SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
+SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD | CTLFLAG_LOCKED,
         &mrt6stat, mrt6stat, "");
 #endif
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
-	neighborgcthresh, CTLFLAG_RW,	&ip6_neighborgcthresh,	0, "");
+	neighborgcthresh, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_neighborgcthresh,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
-	maxifprefixes, CTLFLAG_RW,	&ip6_maxifprefixes,	0, "");
+	maxifprefixes, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_maxifprefixes,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFDEFROUTERS,
-	maxifdefrouters, CTLFLAG_RW,	&ip6_maxifdefrouters,	0, "");
+	maxifdefrouters, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_maxifdefrouters,	0, "");
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXDYNROUTES,
-	maxdynroutes, CTLFLAG_RW,	&ip6_maxdynroutes,	0, "");
-
+	maxdynroutes, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_maxdynroutes,	0, "");
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO,
+	only_allow_rfc4193_prefixes, CTLFLAG_RW | CTLFLAG_LOCKED,
+	&ip6_only_allow_rfc4193_prefix,	0, "");
 
 /* net.inet6.icmp6 */
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT,
-	rediraccept, CTLFLAG_RW,	&icmp6_rediraccept,	0, "");
+	rediraccept, CTLFLAG_RW | CTLFLAG_LOCKED,	&icmp6_rediraccept,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT,
-	redirtimeout, CTLFLAG_RW,	&icmp6_redirtimeout,	0, "");
-SYSCTL_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD,
+	redirtimeout, CTLFLAG_RW | CTLFLAG_LOCKED,	&icmp6_redirtimeout,	0, "");
+SYSCTL_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 	&icmp6stat, icmp6stat, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE,
-	nd6_prune, CTLFLAG_RW,		&nd6_prune,	0, "");
+	nd6_prune, CTLFLAG_RW | CTLFLAG_LOCKED,		&nd6_prune,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY,
-	nd6_delay, CTLFLAG_RW,		&nd6_delay,	0, "");
+	nd6_delay, CTLFLAG_RW | CTLFLAG_LOCKED,		&nd6_delay,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES,
-	nd6_umaxtries, CTLFLAG_RW,	&nd6_umaxtries,	0, "");
+	nd6_umaxtries, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_umaxtries,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES,
-	nd6_mmaxtries, CTLFLAG_RW,	&nd6_mmaxtries,	0, "");
+	nd6_mmaxtries, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_mmaxtries,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK,
-	nd6_useloopback, CTLFLAG_RW,	&nd6_useloopback, 0, "");
+	nd6_useloopback, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_useloopback, 0, "");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ACCEPT_6TO4,
+	nd6_accept_6to4, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_accept_6to4, 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO,
-	nodeinfo, CTLFLAG_RW,	&icmp6_nodeinfo,	0, "");
+	nodeinfo, CTLFLAG_RW | CTLFLAG_LOCKED,	&icmp6_nodeinfo,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT,
-	errppslimit, CTLFLAG_RW,	&icmp6errppslim,	0, "");
+	errppslimit, CTLFLAG_RW | CTLFLAG_LOCKED,	&icmp6errppslim,	0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT,
-	nd6_maxnudhint, CTLFLAG_RW,	&nd6_maxnudhint, 0, "");
+	nd6_maxnudhint, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_maxnudhint, 0, "");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG,
-	nd6_debug, CTLFLAG_RW,	&nd6_debug,		0, "");
-
+	nd6_debug, CTLFLAG_RW | CTLFLAG_LOCKED,	&nd6_debug,		0, "");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
+	nd6_onlink_ns_rfc4861, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_onlink_ns_rfc4861, 0,
+	"Accept 'on-link' nd6 NS in compliance with RFC 4861.");
diff --git a/bsd/netinet6/in6_rmx.c b/bsd/netinet6/in6_rmx.c
index d0ad6f2b1..63a66121d 100644
--- a/bsd/netinet6/in6_rmx.c
+++ b/bsd/netinet6/in6_rmx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -108,8 +108,10 @@
 #include <kern/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/protosw.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 #include <kern/lock.h>
 
 #include <net/if.h>
@@ -130,8 +132,8 @@
 
 extern int	in6_inithead(void **head, int off);
 static void	in6_rtqtimo(void *rock);
-static void in6_mtutimo(void *rock);
-extern int tvtohz(struct timeval *);
+static void	in6_mtutimo(void *rock);
+extern int	tvtohz(struct timeval *);
 
 static struct radix_node *in6_matroute_args(void *, struct radix_node_head *,
     rn_matchf_t *, void *);
@@ -195,11 +197,13 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 	 * should elaborate the code.
 	 */
 	if (rt->rt_flags & RTF_HOST) {
+		IFA_LOCK_SPIN(rt->rt_ifa);
 		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
 					->sin6_addr,
 				       &sin6->sin6_addr)) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
+		IFA_UNLOCK(rt->rt_ifa);
 	}
 
 	if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
@@ -214,8 +218,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		 * Find out if it is because of an
 		 * ARP entry and delete it if so.
 		 */
-		rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0,
-				RTF_CLONING | RTF_PRCLONING);
+		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
+		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
 		if (rt2) {
 			RT_LOCK(rt2);
 			if ((rt2->rt_flags & RTF_LLINFO) &&
@@ -253,8 +257,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
 		 *	net route entry, 3ffe:0501:: -> if0.
 		 *	This case should not raise an error.
 		 */
-		rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0,
-				RTF_CLONING | RTF_PRCLONING);
+		rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0,
+		    RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt)));
 		if (rt2) {
 			RT_LOCK(rt2);
 			if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
@@ -294,6 +298,24 @@ in6_deleteroute(void * v_arg, void *netmask_arg, struct radix_node_head *head)
 	return (rn);
 }
 
+/*
+ * Validate (unexpire) an expiring AF_INET6 route.
+ */
+struct radix_node *
+in6_validate(struct radix_node *rn)
+{
+	struct rtentry *rt = (struct rtentry *)rn;
+
+	RT_LOCK_ASSERT_HELD(rt);
+
+	/* This is first reference? */
+	if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) {
+		rt->rt_flags &= ~RTPRF_OURS;
+		rt_setexpire(rt, 0);
+	}
+	return (rn);
+}
+
 /*
  * Similar to in6_matroute_args except without the leaf-matching parameters.
  */
@@ -313,16 +335,11 @@ in6_matroute_args(void *v_arg, struct radix_node_head *head,
     rn_matchf_t *f, void *w)
 {
 	struct radix_node *rn = rn_match_args(v_arg, head, f, w);
-	struct rtentry *rt = (struct rtentry *)rn;
 
-	/* This is first reference? */
-	if (rt != NULL) {
-		RT_LOCK_SPIN(rt);
-		if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) {
-			rt->rt_flags &= ~RTPRF_OURS;
-			rt->rt_rmx.rmx_expire = 0;
-		}
-		RT_UNLOCK(rt);
+	if (rn != NULL) {
+		RT_LOCK_SPIN((struct rtentry *)rn);
+		in6_validate(rn);
+		RT_UNLOCK((struct rtentry *)rn);
 	}
 	return (rn);
 }
@@ -332,17 +349,17 @@ SYSCTL_DECL(_net_inet6_ip6);
 static int rtq_reallyold = 60*60;
 	/* one hour is ``really old'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
-	CTLFLAG_RW, &rtq_reallyold , 0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold , 0, "");
 
 static int rtq_minreallyold = 10;
 	/* never automatically crank down to less */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
-	CTLFLAG_RW, &rtq_minreallyold , 0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold , 0, "");
 
 static int rtq_toomany = 128;
 	/* 128 cached routes is ``too many'' */
 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
-	CTLFLAG_RW, &rtq_toomany , 0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany , 0, "");
 
 
 /*
@@ -394,12 +411,12 @@ in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head)
 			RT_LOCK(rt);
 		}
 	} else {
-		struct timeval timenow;
+		uint64_t timenow;
 
-		getmicrotime(&timenow);
+		timenow = net_uptime();
 		rt->rt_flags |= RTPRF_OURS;
-		rt->rt_rmx.rmx_expire =
-		    rt_expiry(rt, timenow.tv_sec, rtq_reallyold);
+		rt_setexpire(rt,
+		    rt_expiry(rt, timenow, rtq_reallyold));
 	}
 }
 
@@ -410,7 +427,7 @@ struct rtqk_arg {
 	int draining;
 	int killed;
 	int found;
-	time_t nextstop;
+	uint64_t nextstop;
 };
 
 /*
@@ -426,16 +443,17 @@ in6_rtqkill(struct radix_node *rn, void *rock)
 	struct rtqk_arg *ap = rock;
 	struct rtentry *rt = (struct rtentry *)rn;
 	int err;
-	struct timeval timenow;
+	uint64_t timenow;
 
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
 	RT_LOCK(rt);
 	if (rt->rt_flags & RTPRF_OURS) {
 		ap->found++;
-
-		if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec ||
+		VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+		VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+		if (ap->draining || rt->rt_expire <= timenow ||
 		    ((rt->rt_flags & RTF_DYNAMIC) != 0 &&
 		    ip6_maxdynroutes >= 0 &&
 		    in6dynroutes > ip6_maxdynroutes / 2)) {
@@ -461,13 +479,13 @@ in6_rtqkill(struct radix_node *rn, void *rock)
 			}
 		} else {
 			if (ap->updating &&
-			    (unsigned)(rt->rt_rmx.rmx_expire - timenow.tv_sec) >
+			    (rt->rt_expire - timenow) >
 			    rt_expiry(rt, 0, rtq_reallyold)) {
-				rt->rt_rmx.rmx_expire = rt_expiry(rt,
-				    timenow.tv_sec, rtq_reallyold);
+				rt_setexpire(rt, rt_expiry(rt,
+				    timenow, rtq_reallyold));
 			}
 			ap->nextstop = lmin(ap->nextstop,
-					    rt->rt_rmx.rmx_expire);
+					    rt->rt_expire);
 			RT_UNLOCK(rt);
 		}
 	} else {
@@ -486,16 +504,16 @@ in6_rtqtimo(void *rock)
 	struct radix_node_head *rnh = rock;
 	struct rtqk_arg arg;
 	struct timeval atv;
-	static time_t last_adjusted_timeout = 0;
-	struct timeval timenow;
+	static uint64_t last_adjusted_timeout = 0;
+	uint64_t timenow;
 
 	lck_mtx_lock(rnh_lock);
 	/* Get the timestamp after we acquire the lock for better accuracy */
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 
 	arg.found = arg.killed = 0;
 	arg.rnh = rnh;
-	arg.nextstop = timenow.tv_sec + rtq_timeout;
+	arg.nextstop = timenow + rtq_timeout;
 	arg.draining = arg.updating = 0;
 	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
 
@@ -508,14 +526,14 @@ in6_rtqtimo(void *rock)
 	 * hard.
 	 */
 	if ((arg.found - arg.killed > rtq_toomany)
-	   && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout)
+	   && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout)
 	   && rtq_reallyold > rtq_minreallyold) {
 		rtq_reallyold = 2*rtq_reallyold / 3;
 		if (rtq_reallyold < rtq_minreallyold) {
 			rtq_reallyold = rtq_minreallyold;
 		}
 
-		last_adjusted_timeout = timenow.tv_sec;
+		last_adjusted_timeout = timenow;
 #if DIAGNOSTIC
 		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
 		    rtq_reallyold);
@@ -526,7 +544,7 @@ in6_rtqtimo(void *rock)
 	}
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop - timenow.tv_sec;
+	atv.tv_sec = arg.nextstop - timenow;
 	lck_mtx_unlock(rnh_lock);
 	timeout(in6_rtqtimo, rock, tvtohz(&atv));
 }
@@ -536,7 +554,7 @@ in6_rtqtimo(void *rock)
  */
 struct mtuex_arg {
 	struct radix_node_head *rnh;
-	time_t nextstop;
+	uint64_t nextstop;
 };
 
 static int
@@ -544,21 +562,23 @@ in6_mtuexpire(struct radix_node *rn, void *rock)
 {
 	struct rtentry *rt = (struct rtentry *)rn;
 	struct mtuex_arg *ap = rock;
-	struct timeval timenow;
+	uint64_t timenow;
 
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 
 	/* sanity */
 	if (!rt)
 		panic("rt == NULL in in6_mtuexpire");
 
 	RT_LOCK(rt);
-	if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
-		if (rt->rt_rmx.rmx_expire <= timenow.tv_sec) {
+	VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+	VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+	if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
+		if (rt->rt_expire <= timenow) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop,
-					rt->rt_rmx.rmx_expire);
+					rt->rt_expire);
 		}
 	}
 	RT_UNLOCK(rt);
@@ -574,24 +594,24 @@ in6_mtutimo(void *rock)
 	struct radix_node_head *rnh = rock;
 	struct mtuex_arg arg;
 	struct timeval atv;
-	struct timeval timenow;
+	uint64_t timenow, timo;
 
-	getmicrotime(&timenow);
+	timenow = net_uptime();
 
 	arg.rnh = rnh;
-	arg.nextstop = timenow.tv_sec + MTUTIMO_DEFAULT;
+	arg.nextstop = timenow + MTUTIMO_DEFAULT;
 	lck_mtx_lock(rnh_lock);
 	rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
 
 	atv.tv_usec = 0;
-	atv.tv_sec = arg.nextstop;
-	if (atv.tv_sec < timenow.tv_sec) {
+	timo = arg.nextstop;
+	if (timo < timenow) {
 #if DIAGNOSTIC
 		log(LOG_DEBUG, "IPv6: invalid mtu expiration time on routing table\n");
 #endif
-		arg.nextstop = timenow.tv_sec + 30;	/*last resort*/
+		arg.nextstop = timenow + 30;	/*last resort*/
 	}
-	atv.tv_sec -= timenow.tv_sec;
+	atv.tv_sec = timo - timenow;
 	lck_mtx_unlock(rnh_lock);
 	timeout(in6_mtutimo, rock, tvtohz(&atv));
 }
diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c
index 71441847b..1eb5cd60f 100644
--- a/bsd/netinet6/in6_src.c
+++ b/bsd/netinet6/in6_src.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -104,9 +104,13 @@
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/kauth.h>
+#include <sys/priv.h>
 #include <kern/lock.h>
 
 #include <net/if.h>
+#include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -118,273 +122,950 @@
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
-#if ENABLE_DEFAULT_SCOPE
-#include <netinet6/scope6_var.h> 
-#endif
 
 #include <net/net_osdep.h>
 
 #include "loop.h"
 
+SYSCTL_DECL(_net_inet6_ip6);
+
+static int ip6_select_srcif_debug = 0;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
+    "log source interface selection debug info");
+
+#define ADDR_LABEL_NOTAPP (-1)
+struct in6_addrpolicy defaultaddrpolicy;
+
+int ip6_prefer_tempaddr = 1;
+#ifdef ENABLE_ADDRSEL
+extern lck_mtx_t *addrsel_mutex;
+#define	ADDRSEL_LOCK()		lck_mtx_lock(addrsel_mutex)
+#define	ADDRSEL_UNLOCK()	lck_mtx_unlock(addrsel_mutex)
+#else
+#define	ADDRSEL_LOCK()
+#define	ADDRSEL_UNLOCK()
+#endif
+
+static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
+	struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
+	struct ifnet **, struct rtentry **, int, int, unsigned int,
+	unsigned int);
+static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
+	struct ip6_moptions *, struct route_in6 *ro, unsigned int,
+	unsigned int, struct ifnet **);
+static void init_policy_queue(void);
+static int add_addrsel_policyent(const struct in6_addrpolicy *);
+#ifdef ENABLE_ADDRSEL
+static int delete_addrsel_policyent(const struct in6_addrpolicy *);
+#endif
+static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *),
+	void *);
+static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
+static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
+void addrsel_policy_init(void);
+
 /*
  * Return an IPv6 address, which is the most appropriate for a given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and returns
  * an entry to the caller for later use.
  */
+#define REPLACE(r) do {\
+	if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
+		sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
+		ip6stat.ip6s_sources_rule[(r)]++; \
+	goto replace; \
+} while(0)
+#define NEXTSRC(r) do {\
+	if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
+		sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
+		ip6stat.ip6s_sources_rule[(r)]++; \
+	goto next;		/* XXX: we can't use 'continue' here */ \
+} while(0)
+#define BREAK(r) do { \
+	if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
+		sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
+		ip6stat.ip6s_sources_rule[(r)]++; \
+	goto out;		/* XXX: we can't use 'break' here */ \
+} while(0)
+
 struct in6_addr *
-in6_selectsrc(
-	struct sockaddr_in6 *dstsock,
-	struct ip6_pktopts *opts,
-	struct ip6_moptions *mopts,
-	struct route_in6 *ro,
-	struct in6_addr *laddr,
-	struct in6_addr *src_storage,
-	int *errorp)
+in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct inpcb *inp, struct route_in6 *ro,
+    struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope,
+    int *errorp)
 {
-	struct in6_addr *dst;
-	struct in6_ifaddr *ia6 = 0;
+	struct in6_addr dst;
+	struct ifnet *ifp = NULL;
+	struct in6_ifaddr *ia = NULL, *ia_best = NULL;
 	struct in6_pktinfo *pi = NULL;
+	int dst_scope = -1, best_scope = -1, best_matchlen = -1;
+	struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
+	u_int32_t odstzone;
+	int prefer_tempaddr;
+	struct ip6_moptions *mopts;
+	struct timeval timenow;
+	unsigned int nocell;
+	boolean_t islocal = FALSE;
 
-	dst = &dstsock->sin6_addr;
+	getmicrotime(&timenow);
+
+	dst = dstsock->sin6_addr; /* make a copy for local operation */
 	*errorp = 0;
+	if (ifpp != NULL)
+		*ifpp = NULL;
+
+	if (inp != NULL) {
+		mopts = inp->in6p_moptions;
+		nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+	} else {
+		mopts = NULL;
+		nocell = 0;
+	}
 
 	/*
 	 * If the source address is explicitly specified by the caller,
-	 * use it.
+	 * check if the requested source address is indeed a unicast address
+	 * assigned to the node, and can be used as the packet's source
+	 * address.  If everything is okay, use the address as source.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
-	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
-		return(&pi->ipi6_addr);
+	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
+		struct sockaddr_in6 srcsock;
+		struct in6_ifaddr *ia6;
+
+		/* get the outgoing interface */
+		if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope,
+		    nocell, &ifp)) != 0) {
+			return (NULL);
+		}
+
+		/*
+		 * determine the appropriate zone id of the source based on
+		 * the zone of the destination and the outgoing interface.
+		 * If the specified address is ambiguous wrt the scope zone,
+		 * the interface must be specified; otherwise, ifa_ifwithaddr()
+		 * will fail matching the address.
+		 */
+		bzero(&srcsock, sizeof(srcsock));
+		srcsock.sin6_family = AF_INET6;
+		srcsock.sin6_len = sizeof(srcsock);
+		srcsock.sin6_addr = pi->ipi6_addr;
+		if (ifp) {
+			*errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
+			if (*errorp != 0) {
+				ifnet_release(ifp);
+				return (NULL);
+			}
+		}
+		ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
+		if (ia6 == NULL) {
+			*errorp = EADDRNOTAVAIL;
+			if (ifp != NULL)
+				ifnet_release(ifp);
+			return (NULL);
+		}
+		IFA_LOCK_SPIN(&ia6->ia_ifa);
+		if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
+		    (nocell && (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) {
+			IFA_UNLOCK(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
+			*errorp = EADDRNOTAVAIL;
+			if (ifp != NULL)
+				ifnet_release(ifp);
+			return (NULL);
+		}
+
+		*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
+		IFA_UNLOCK(&ia6->ia_ifa);
+		IFA_REMREF(&ia6->ia_ifa);
+		if (ifpp != NULL) {
+			/* if ifp is non-NULL, refcnt held in in6_selectif() */
+			*ifpp = ifp;
+		} else if (ifp != NULL) {
+			ifnet_release(ifp);
+		}
+		return (src_storage);
+	}
 
 	/*
-	 * If the source address is not specified but the socket(if any)
-	 * is already bound, use the bound address.
+	 * Otherwise, if the socket has already bound the source, just use it.
 	 */
-	if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
-		return(laddr);
+	if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 
+		return (&inp->in6p_laddr);
 
 	/*
-	 * If the caller doesn't specify the source address but
-	 * the outgoing interface, use an address associated with
-	 * the interface.
+	 * If the address is not specified, choose the best one based on
+	 * the outgoing interface and the destination address.
 	 */
-	if (pi && pi->ipi6_ifindex) {
-		ifnet_t out_ifp = NULL;
-		ifnet_head_lock_shared();
-		if (pi->ipi6_ifindex > if_index) {
-			ifnet_head_done();
-                        *errorp = EADDRNOTAVAIL;
-			return(0);
-		} else {
-			out_ifp = ifindex2ifnet[pi->ipi6_ifindex];
+
+	/* get the outgoing interface */
+	if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope, nocell,
+	    &ifp)) != 0)
+		return (NULL);
+
+#ifdef DIAGNOSTIC
+	if (ifp == NULL)	/* this should not happen */
+		panic("in6_selectsrc: NULL ifp");
+#endif
+	*errorp = in6_setscope(&dst, ifp, &odstzone);
+	if (*errorp != 0) {
+		if (ifp != NULL)
+			ifnet_release(ifp);
+		return (NULL);
+	}
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
+
+	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		int new_scope = -1, new_matchlen = -1;
+		struct in6_addrpolicy *new_policy = NULL;
+		u_int32_t srczone, osrczone, dstzone;
+		struct in6_addr src;
+		struct ifnet *ifp1 = ia->ia_ifp;
+
+		IFA_LOCK(&ia->ia_ifa);
+		/*
+		 * We'll never take an address that breaks the scope zone
+		 * of the destination.  We also skip an address if its zone
+		 * does not contain the outgoing interface.
+		 * XXX: we should probably use sin6_scope_id here.
+		 */
+		if (in6_setscope(&dst, ifp1, &dstzone) ||
+		    odstzone != dstzone)
+			goto next;
+
+		src = ia->ia_addr.sin6_addr;
+		if (in6_setscope(&src, ifp, &osrczone) ||
+		    in6_setscope(&src, ifp1, &srczone) ||
+		    osrczone != srczone)
+			goto next;
+
+		/* avoid unusable addresses */
+		if ((ia->ia6_flags &
+		     (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED)))
+			goto next;
+
+		if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
+			goto next;
+
+		/* Rule 1: Prefer same address */
+		if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
+			BREAK(1); /* there should be no better candidate */
+
+		if (ia_best == NULL)
+			REPLACE(0);
+
+		/* Rule 2: Prefer appropriate scope */
+		if (dst_scope < 0)
+			dst_scope = in6_addrscope(&dst);
+		new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
+		if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
+			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
+				REPLACE(2);
+			NEXTSRC(2);
+		} else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
+			if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
+				NEXTSRC(2);
+			REPLACE(2);
 		}
-		ifnet_head_done();
-		
-		/* XXX boundary check is assumed to be already done. */
-		ia6 = in6_ifawithscope(out_ifp, dst);
-		if (ia6 == 0) {
-			*errorp = EADDRNOTAVAIL;
-			return(0);
+
+		/*
+		 * Rule 3: Avoid deprecated addresses.  Note that the case of
+		 * !ip6_use_deprecated is already rejected above.
+		 */
+		if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
+			NEXTSRC(3);
+		if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
+			REPLACE(3);
+
+		/* Rule 4: Prefer home addresses */
+		/*
+		 * XXX: This is a TODO.  We should probably merge the MIP6
+		 * case above.
+		 */
+
+		/* Rule 5: Prefer outgoing interface */
+		if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
+			NEXTSRC(5);
+		if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
+			REPLACE(5);
+
+		/*
+		 * Rule 6: Prefer matching label
+		 * Note that best_policy should be non-NULL here.
+		 */
+		if (dst_policy == NULL)
+			dst_policy = in6_addrsel_lookup_policy(dstsock);
+		if (dst_policy->label != ADDR_LABEL_NOTAPP) {
+			new_policy = in6_addrsel_lookup_policy(&ia->ia_addr);
+			if (dst_policy->label == best_policy->label &&
+			    dst_policy->label != new_policy->label)
+				NEXTSRC(6);
+			if (dst_policy->label != best_policy->label &&
+			    dst_policy->label == new_policy->label)
+				REPLACE(6);
 		}
-		*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-		ifafree(&ia6->ia_ifa);
-		return src_storage;
+
+		/*
+		 * Rule 7: Prefer public addresses.
+		 * We allow users to reverse the logic by configuring
+		 * a sysctl variable, so that privacy conscious users can
+		 * always prefer temporary addresses.
+		 * Don't use temporary addresses for local destinations or
+		 * for multicast addresses unless we were passed in an option.
+		 */
+		if (IN6_IS_ADDR_MULTICAST(&dst) ||
+		    in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >=
+		    in6_mask2len(&ia_best->ia_prefixmask.sin6_addr, NULL))
+			islocal = TRUE;
+		if (opts == NULL ||
+		    opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
+			prefer_tempaddr = islocal ? 0 : ip6_prefer_tempaddr;
+		} else if (opts->ip6po_prefer_tempaddr ==
+		    IP6PO_TEMPADDR_NOTPREFER) {
+			prefer_tempaddr = 0;
+		} else
+			prefer_tempaddr = 1;
+		if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
+		    (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
+			if (prefer_tempaddr)
+				REPLACE(7);
+			else
+				NEXTSRC(7);
+		}
+		if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
+		    !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
+			if (prefer_tempaddr)
+				NEXTSRC(7);
+			else
+				REPLACE(7);
+		}
+
+		/*
+		 * Rule 8: prefer addresses on alive interfaces.
+		 * This is a KAME specific rule.
+		 */
+		if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
+		    !(ia->ia_ifp->if_flags & IFF_UP))
+			NEXTSRC(8);
+		if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
+		    (ia->ia_ifp->if_flags & IFF_UP))
+			REPLACE(8);
+
+		/*
+		 * Rule 14: Use longest matching prefix.
+		 * Note: in the address selection draft, this rule is
+		 * documented as "Rule 8".  However, since it is also
+		 * documented that this rule can be overridden, we assign
+		 * a large number so that it is easy to assign smaller numbers
+		 * to more preferred rules.
+		 */
+		new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
+		if (best_matchlen < new_matchlen)
+			REPLACE(14);
+		if (new_matchlen < best_matchlen)
+			NEXTSRC(14);
+
+		/* Rule 15 is reserved. */
+
+		/*
+		 * Last resort: just keep the current candidate.
+		 * Or, do we need more rules?
+		 */
+		IFA_UNLOCK(&ia->ia_ifa);
+		continue;
+
+replace:
+		best_scope = (new_scope >= 0 ? new_scope :
+			      in6_addrscope(&ia->ia_addr.sin6_addr));
+		best_policy = (new_policy ? new_policy :
+			       in6_addrsel_lookup_policy(&ia->ia_addr));
+		best_matchlen = (new_matchlen >= 0 ? new_matchlen :
+				 in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
+		IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for ia_best */
+		IFA_UNLOCK(&ia->ia_ifa);
+		if (ia_best != NULL)
+			IFA_REMREF(&ia_best->ia_ifa);
+		ia_best = ia;
+		continue;
+
+next:
+		IFA_UNLOCK(&ia->ia_ifa);
+		continue;
+
+out:
+		IFA_ADDREF_LOCKED(&ia->ia_ifa);	/* for ia_best */
+		IFA_UNLOCK(&ia->ia_ifa);
+		if (ia_best != NULL)
+			IFA_REMREF(&ia_best->ia_ifa);
+		ia_best = ia;
+		break;
+	}
+
+	lck_rw_done(&in6_ifaddr_rwlock);
+
+	if (nocell && ia_best != NULL &&
+	    (ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR)) {
+		IFA_REMREF(&ia_best->ia_ifa);
+		ia_best = NULL;
+	}
+
+	if ( (ia = ia_best) == NULL) {
+		*errorp = EADDRNOTAVAIL;
+		if (ifp != NULL)
+			ifnet_release(ifp);
+		return (NULL);
 	}
 
+	IFA_LOCK_SPIN(&ia->ia_ifa);
+	*src_storage = satosin6(&ia->ia_addr)->sin6_addr;
+	IFA_UNLOCK(&ia->ia_ifa);
+	IFA_REMREF(&ia->ia_ifa);
+	if (ifpp != NULL) {
+		/* if ifp is non-NULL, refcnt held in in6_selectif() */
+		*ifpp = ifp;
+	} else if (ifp != NULL) {
+		ifnet_release(ifp);
+	}
+	return (src_storage);
+}
+
+/*
+ * Given a source IPv6 address (and route, if available), determine the best
+ * interface to send the packet from.  Checking for (and updating) the
+ * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
+ * without any locks, based on the assumption that in the event this is
+ * called from ip6_output(), the output operation is single-threaded per-pcb,
+ * i.e. for any given pcb there can only be one thread performing output at
+ * the IPv6 layer.
+ *
+ * This routine is analogous to in_selectsrcif() for IPv4.
+ *
+ * clone - meaningful only for bsdi and freebsd
+ */
+static int
+selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
+    struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
+    struct ifnet **retifp, struct rtentry **retrt, int clone,
+    int norouteok, unsigned int ifscope, unsigned int nocell)
+{
+	int error = 0;
+	struct ifnet *ifp = NULL;
+	struct route_in6 *route = NULL;
+	struct sockaddr_in6 *sin6_next;
+	struct in6_pktinfo *pi = NULL;
+	struct in6_addr *dst = &dstsock->sin6_addr;
+	struct ifaddr *ifa = NULL;
+	char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
+	boolean_t select_srcif;
+
+#if 0
+	char ip6buf[INET6_ADDRSTRLEN];
+
+	if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
+	    dstsock->sin6_addr.s6_addr32[1] == 0 &&
+	    !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
+		printf("in6_selectroute: strange destination %s\n",
+		       ip6_sprintf(ip6buf, &dstsock->sin6_addr));
+	} else {
+		printf("in6_selectroute: destination = %s%%%d\n",
+		       ip6_sprintf(ip6buf, &dstsock->sin6_addr),
+		       dstsock->sin6_scope_id); /* for debug */
+	}
+#endif
+
+	if (retifp != NULL)
+		*retifp = NULL;
+
+	if (retrt != NULL)
+		*retrt = NULL;
+
+	if (ip6_select_srcif_debug) {
+		struct in6_addr src;
+		src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any;
+		(void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src));
+		(void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst));
+	}
+
+	/*
+	 * If the destination address is UNSPECIFIED addr, bail out.
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(dst)) {
+		error = EHOSTUNREACH;
+		goto done;
+	}
+
+	/*
+	 * Perform source interface selection only if Scoped Routing
+	 * is enabled and a source address that isn't unspecified.
+	 */
+	select_srcif = (ip6_doscopedroute && srcsock != NULL &&
+	    !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
+
 	/*
-	 * If the destination address is a link-local unicast address or
-	 * a multicast address, and if the outgoing interface is specified
-	 * by the sin6_scope_id filed, use an address associated with the
-	 * interface.
-	 * XXX: We're now trying to define more specific semantics of
-	 *      sin6_scope_id field, so this part will be rewritten in
-	 *      the near future.
+	 * If Scoped Routing is disabled, ignore the given ifscope.
+	 * Otherwise even if source selection won't be performed,
+	 * we still obey IPV6_BOUND_IF.
 	 */
-	if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
-	    dstsock->sin6_scope_id) {
+	if (!ip6_doscopedroute && ifscope != IFSCOPE_NONE)
+		ifscope = IFSCOPE_NONE;
+
+	/* If the caller specified the outgoing interface explicitly, use it */
+	if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL &&
+	    pi->ipi6_ifindex != 0) {
 		/*
-		 * I'm not sure if boundary check for scope_id is done
-		 * somewhere...
-		 *
-		 * Since sin6_scope_id is unsigned, we only need to check against if_index.
+		 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF.
 		 */
-		ifnet_t out_ifp = NULL;
+		ifscope = pi->ipi6_ifindex;
 		ifnet_head_lock_shared();
-		if (if_index < dstsock->sin6_scope_id) {
-			*errorp = ENXIO; /* XXX: better error? */
-			ifnet_head_done();
-			return(0);
+		/* ifp may be NULL if detached or out of range */
+		ifp = (ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL;
+		ifnet_head_done();
+		if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
+			/*
+			 * We do not have to check or get the route for
+			 * multicast.  If the caller didn't ask/care for
+			 * the route and we have no interface to use,
+			 * it's an error.
+			 */
+			if (ifp == NULL)
+				error = EHOSTUNREACH;
+			goto done;
 		} else {
-			out_ifp = ifindex2ifnet[dstsock->sin6_scope_id];
+			goto getsrcif;
 		}
-		ifnet_head_done();
+	}
 
-		ia6 = in6_ifawithscope(out_ifp, dst);
-		if (ia6 == 0) {
-			*errorp = EADDRNOTAVAIL;
-			return(0);
+	/*
+	 * If the destination address is a multicast address and the outgoing
+	 * interface for the address is specified by the caller, use it.
+	 */
+	if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
+		IM6O_LOCK(mopts);
+		if ((ifp = mopts->im6o_multicast_ifp) != NULL) {
+			IM6O_UNLOCK(mopts);
+			goto done; /* we do not need a route for multicast. */
 		}
-		*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-		ifafree(&ia6->ia_ifa);
-		return src_storage;
+		IM6O_UNLOCK(mopts);
+	}
+
+getsrcif:
+	/*
+	 * If the outgoing interface was not set via IPV6_BOUND_IF or
+	 * IPV6_PKTINFO, use the scope ID in the destination address.
+	 */
+	if (ip6_doscopedroute && ifscope == IFSCOPE_NONE)
+		ifscope = dstsock->sin6_scope_id;
+
+	/*
+	 * Perform source interface selection; the source IPv6 address
+	 * must belong to one of the addresses of the interface used
+	 * by the route.  For performance reasons, do this only if
+	 * there is no route, or if the routing table has changed,
+	 * or if we haven't done source interface selection on this
+	 * route (for this PCB instance) before.
+	 */
+	if (!select_srcif || (ro != NULL && ro->ro_rt != NULL &&
+	    (ro->ro_rt->rt_flags & RTF_UP) &&
+	    ro->ro_rt->generation_id == route_generation &&
+	    (ro->ro_flags & ROF_SRCIF_SELECTED))) {
+		if (ro != NULL && ro->ro_rt != NULL) {
+			ifa = ro->ro_rt->rt_ifa;
+			IFA_ADDREF(ifa);
+		}
+		goto getroute;
 	}
 
 	/*
-	 * If the destination address is a multicast address and
-	 * the outgoing interface for the address is specified
-	 * by the caller, use an address associated with the interface.
-	 * There is a sanity check here; if the destination has node-local
-	 * scope, the outgoing interfacde should be a loopback address.
-	 * Even if the outgoing interface is not specified, we also
-	 * choose a loopback interface as the outgoing interface.
+	 * Given the source IPv6 address, find a suitable source interface
+	 * to use for transmission; if a scope ID has been specified,
+	 * optimize the search by looking at the addresses only for that
+	 * interface.  This is still suboptimal, however, as we need to
+	 * traverse the per-interface list.
 	 */
-	if (IN6_IS_ADDR_MULTICAST(dst)) {
-		struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
+	if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) {
+		unsigned int scope = ifscope;
+		struct ifnet *rt_ifp;
+
+		rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
 
-		if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
-			ifp = lo_ifp;
+		/*
+		 * If no scope is specified and the route is stale (pointing
+		 * to a defunct interface) use the current primary interface;
+		 * this happens when switching between interfaces configured
+		 * with the same IPv6 address.  Otherwise pick up the scope
+		 * information from the route; the ULP may have looked up a
+		 * correct route and we just need to verify it here and mark
+		 * it with the ROF_SRCIF_SELECTED flag below.
+		 */
+		if (scope == IFSCOPE_NONE) {
+			scope = rt_ifp->if_index;
+			if (scope != get_primary_ifscope(AF_INET6) &&
+			    ro->ro_rt->generation_id != route_generation)
+				scope = get_primary_ifscope(AF_INET6);
 		}
 
-		if (ifp) {
-			ia6 = in6_ifawithscope(ifp, dst);
-			if (ia6 == 0) {
-				*errorp = EADDRNOTAVAIL;
-				return(0);
+		ifa = (struct ifaddr *)
+		    ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
+
+		if (ip6_select_srcif_debug && ifa != NULL) {
+			if (ro->ro_rt != NULL) {
+				printf("%s->%s ifscope %d->%d ifa_if %s "
+				    "ro_if %s\n", s_src, s_dst, ifscope,
+				    scope, if_name(ifa->ifa_ifp),
+				    if_name(rt_ifp));
+			} else {
+				printf("%s->%s ifscope %d->%d ifa_if %s\n",
+				    s_src, s_dst, ifscope, scope,
+				    if_name(ifa->ifa_ifp));
 			}
-			*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-			ifafree(&ia6->ia_ifa);
-			return src_storage;
 		}
 	}
 
 	/*
-	 * If the next hop address for the packet is specified
-	 * by caller, use an address associated with the route
-	 * to the next hop.
+	 * Slow path; search for an interface having the corresponding source
+	 * IPv6 address if the scope was not specified by the caller, and:
+	 *
+	 *   1) There currently isn't any route, or,
+	 *   2) The interface used by the route does not own that source
+	 *	IPv6 address; in this case, the route will get blown away
+	 *	and we'll do a more specific scoped search using the newly
+	 *	found interface.
 	 */
-	{
-		struct sockaddr_in6 *sin6_next;
-		struct rtentry *rt;
-
-		if (opts && opts->ip6po_nexthop) {
-			sin6_next = satosin6(opts->ip6po_nexthop);
-			rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0);
-			if (rt != NULL) {
-				RT_LOCK_ASSERT_HELD(rt);
-				ia6 = in6_ifawithscope(rt->rt_ifp, dst);
-				if (ia6 == 0) {
-					ia6 = ifatoia6(rt->rt_ifa);
-					if (ia6 != NULL)
-						ifaref(&ia6->ia_ifa);
-				}
+	if (ifa == NULL && ifscope == IFSCOPE_NONE) {
+		ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
+
+		if (ip6_select_srcif_debug && ifa != NULL) {
+			printf("%s->%s ifscope %d ifa_if %s\n",
+			    s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
+		}
+
+	}
+
+getroute:
+	if (ifa != NULL)
+		ifscope = ifa->ifa_ifp->if_index;
+
+	/*
+	 * If the next hop address for the packet is specified by the caller,
+	 * use it as the gateway.
+	 */
+	if (opts != NULL && opts->ip6po_nexthop != NULL) {
+		struct route_in6 *ron;
+
+		sin6_next = satosin6(opts->ip6po_nexthop);
+
+		/* at this moment, we only support AF_INET6 next hops */
+		if (sin6_next->sin6_family != AF_INET6) {
+			error = EAFNOSUPPORT; /* or should we proceed? */
+			goto done;
+		}
+
+		/*
+		 * If the next hop is an IPv6 address, then the node identified
+		 * by that address must be a neighbor of the sending host.
+		 */
+		ron = &opts->ip6po_nextroute;
+		if (ron->ro_rt != NULL)
+			RT_LOCK(ron->ro_rt);
+		if ((ron->ro_rt != NULL &&
+		    ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
+		    (RTF_UP | RTF_LLINFO) ||
+		    ron->ro_rt->generation_id != route_generation ||
+		    (select_srcif && (ifa == NULL ||
+		    ifa->ifa_ifp != ron->ro_rt->rt_ifp)))) ||
+		    !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
+		    &sin6_next->sin6_addr)) {
+			if (ron->ro_rt != NULL) {
+				RT_UNLOCK(ron->ro_rt);
+				rtfree(ron->ro_rt);
+				ron->ro_rt = NULL;
 			}
-			if (ia6 == 0) {
-				*errorp = EADDRNOTAVAIL;
-				if (rt != NULL) {
-					RT_REMREF_LOCKED(rt);
-					RT_UNLOCK(rt);
+			*satosin6(&ron->ro_dst) = *sin6_next;
+		}
+		if (ron->ro_rt == NULL) {
+			rtalloc_scoped((struct route *)ron, ifscope);
+			if (ron->ro_rt != NULL)
+				RT_LOCK(ron->ro_rt);
+			if (ron->ro_rt == NULL ||
+			    !(ron->ro_rt->rt_flags & RTF_LLINFO) ||
+			    !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))->
+			    sin6_addr, &sin6_next->sin6_addr)) {
+				if (ron->ro_rt != NULL) {
+					RT_UNLOCK(ron->ro_rt);
+					rtfree(ron->ro_rt);
+					ron->ro_rt = NULL;
 				}
-				return(0);
+				error = EHOSTUNREACH;
+				goto done;
+			}
+		}
+		route = ron;
+		ifp = ron->ro_rt->rt_ifp;
+
+		/*
+		 * When cloning is required, try to allocate a route to the
+		 * destination so that the caller can store path MTU
+		 * information.
+		 */
+		if (!clone) {
+			if (select_srcif) {
+				/* Keep the route locked */
+				goto validateroute;
 			}
-			*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-			ifafree(&ia6->ia_ifa);
-			RT_REMREF_LOCKED(rt);
-			RT_UNLOCK(rt);
-			return src_storage;
+			RT_UNLOCK(ron->ro_rt);
+			goto done;
 		}
+		RT_UNLOCK(ron->ro_rt);
 	}
 
 	/*
-	 * If route is known or can be allocated now,
-	 * our src addr is taken from the i/f, else punt.
+	 * Use a cached route if it exists and is valid, else try to allocate
+	 * a new one.  Note that we should check the address family of the
+	 * cached destination, in case of sharing the cache with IPv4.
 	 */
-	if (ro) {
+	if (ro == NULL)
+		goto done;
+	if (ro->ro_rt != NULL)
+		RT_LOCK(ro->ro_rt);
+	if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
+	    satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
+	    ro->ro_rt->generation_id != route_generation ||
+	    !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
+	    (select_srcif && (ifa == NULL ||
+	    ifa->ifa_ifp != ro->ro_rt->rt_ifp)))) {
+		RT_UNLOCK(ro->ro_rt);
+		rtfree(ro->ro_rt);
+		ro->ro_rt = NULL;
+	}
+	if (ro->ro_rt == NULL) {
+		struct sockaddr_in6 *sa6;
+
 		if (ro->ro_rt != NULL)
-			RT_LOCK(ro->ro_rt);
-		if (ro->ro_rt != NULL &&
-		    (!(ro->ro_rt->rt_flags & RTF_UP) ||
-		     satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
-		     ro->ro_rt->generation_id != route_generation ||
-		     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
-					 dst))) {
 			RT_UNLOCK(ro->ro_rt);
-			rtfree(ro->ro_rt);
-			ro->ro_rt = NULL;
-		}
-		if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL) {
-			struct sockaddr_in6 *sa6;
-
-			if (ro->ro_rt != NULL)
-				RT_UNLOCK(ro->ro_rt);
-			/* No route yet, so try to acquire one */
-			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
-			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
-			sa6->sin6_family = AF_INET6;
-			sa6->sin6_len = sizeof(struct sockaddr_in6);
-			sa6->sin6_addr = *dst;
-#if SCOPEDROUTING
-			sa6->sin6_scope_id = dstsock->sin6_scope_id;
-#endif
-			if (IN6_IS_ADDR_MULTICAST(dst)) {
-				ro->ro_rt = rtalloc1(
-				    &((struct route *)ro)->ro_dst, 0, 0);
-			} else {
-				rtalloc_ign((struct route *)ro, 0);
-			}
-			if (ro->ro_rt != NULL)
-				RT_LOCK(ro->ro_rt);
+		/* No route yet, so try to acquire one */
+		bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
+		sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
+		sa6->sin6_family = AF_INET6;
+		sa6->sin6_len = sizeof(struct sockaddr_in6);
+		sa6->sin6_addr = *dst;
+		if (IN6_IS_ADDR_MULTICAST(dst)) {
+			ro->ro_rt = rtalloc1_scoped(
+			    &((struct route *)ro)->ro_dst, 0, 0, ifscope);
+		} else {
+			rtalloc_scoped((struct route *)ro, ifscope);
 		}
+		if (ro->ro_rt != NULL)
+			RT_LOCK(ro->ro_rt);
+	}
 
+	/*
+	 * Do not care about the result if we have the nexthop
+	 * explicitly specified (in case we're asked to clone.)
+	 */
+	if (opts != NULL && opts->ip6po_nexthop != NULL) {
+		if (ro->ro_rt != NULL)
+			RT_UNLOCK(ro->ro_rt);
+		goto done;
+	}
+
+	if (ro->ro_rt != NULL) {
+		RT_LOCK_ASSERT_HELD(ro->ro_rt);
+		ifp = ro->ro_rt->rt_ifp;
+	} else {
+		error = EHOSTUNREACH;
+	}
+	route = ro;
+
+validateroute:
+	if (select_srcif) {
+		boolean_t has_route = (route != NULL && route->ro_rt != NULL);
+
+		if (has_route)
+			RT_LOCK_ASSERT_HELD(route->ro_rt);
 		/*
-		 * in_pcbconnect() checks out IFF_LOOPBACK to skip using
-		 * the address. But we don't know why it does so.
-		 * It is necessary to ensure the scope even for lo0
-		 * so doesn't check out IFF_LOOPBACK.
+		 * If there is a non-loopback route with the wrong interface,
+		 * or if there is no interface configured with such an address,
+		 * blow it away.  Except for local/loopback, we look for one
+		 * with a matching interface scope/index.
 		 */
-		if (ro->ro_rt != NULL) {
-			RT_LOCK_ASSERT_HELD(ro->ro_rt);
-			ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
-			if (ia6 == 0) {
-				ia6 = ifatoia6(ro->ro_rt->rt_ifa);
-				if (ia6)
-					ifaref(&ia6->ia_ifa);
+		if (has_route && (ifa == NULL ||
+		    (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
+		    !(route->ro_rt->rt_flags & RTF_UP))) {
+			if (ip6_select_srcif_debug) {
+				if (ifa != NULL) {
+					printf("%s->%s ifscope %d ro_if %s "
+					    "!= ifa_if %s (cached route "
+					    "cleared)\n", s_src, s_dst,
+					    ifscope, if_name(ifp),
+					    if_name(ifa->ifa_ifp));
+				} else {
+					printf("%s->%s ifscope %d ro_if %s "
+					    "(no ifa_if found)\n", s_src,
+					    s_dst, ifscope, if_name(ifp));
+				}
 			}
+			RT_UNLOCK(route->ro_rt);
+			rtfree(route->ro_rt);
+			route->ro_rt = NULL;
+			route->ro_flags &= ~ROF_SRCIF_SELECTED;
+			error = EHOSTUNREACH;
+			/* Undo the settings done above */
+			route = NULL;
+			ifp = NULL;
+		} else if (has_route) {
+			route->ro_flags |= ROF_SRCIF_SELECTED;
+			route->ro_rt->generation_id = route_generation;
+			RT_UNLOCK(route->ro_rt);
+		}
+	} else {
+		if (ro->ro_rt != NULL)
 			RT_UNLOCK(ro->ro_rt);
+		if (ifp != NULL && opts != NULL &&
+		    opts->ip6po_pktinfo != NULL &&
+		    opts->ip6po_pktinfo->ipi6_ifindex != 0) {
+			/*
+			 * Check if the outgoing interface conflicts with the
+			 * interface specified by ipi6_ifindex (if specified).
+			 * Note that loopback interface is always okay.
+			 * (this may happen when we are sending a packet to
+			 * one of our own addresses.)
+			 */
+			if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index !=
+			    opts->ip6po_pktinfo->ipi6_ifindex) {
+				error = EHOSTUNREACH;
+				goto done;
+			}
 		}
-#if 0
+	}
+
+done:
+	if (nocell && error == 0) {
+		if ((ifp != NULL && ifp->if_type == IFT_CELLULAR) ||
+		    (route != NULL && route->ro_rt != NULL &&
+		    route->ro_rt->rt_ifp->if_type == IFT_CELLULAR)) {
+			if (route != NULL && route->ro_rt != NULL) {
+				rtfree(route->ro_rt);
+				route->ro_rt = NULL;
+				route->ro_flags &= ~ROF_SRCIF_SELECTED;
+				route = NULL;
+			}
+			ifp = NULL;
+			error = EHOSTUNREACH;
+		}
+	}
+
+	if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) {
 		/*
-		 * xxx The followings are necessary? (kazu)
-		 * I don't think so.
-		 * It's for SO_DONTROUTE option in IPv4.(jinmei)
+		 * This can happen if the caller did not pass a cached route
+		 * nor any other hints.  We treat this case an error.
 		 */
-		if (ia6 == 0) {
-			struct sockaddr_in6 sin6 = {sizeof(sin6), AF_INET6, 0};
-
-			sin6->sin6_addr = *dst;
+		error = EHOSTUNREACH;
+	}
+	if (error == EHOSTUNREACH)
+		ip6stat.ip6s_noroute++;
 
-			ia6 = ifatoia6(ifa_ifwithdstaddr(sin6tosa(&sin6)));
-			if (ia6 == 0)
-				ia6 = ifatoia6(ifa_ifwithnet(sin6tosa(&sin6)));
-			if (ia6 == 0)
-				return(0);
-			*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-			ifafree(&ia6->ia_ifa);
-			return src_storage;
+	if (error == 0) {
+		if (retifp != NULL) {
+			if (ifp != NULL)
+				ifnet_reference(ifp);	/* for caller */
+			*retifp = ifp;
 		}
-#endif /* 0 */
-		if (ia6 == 0) {
-			*errorp = EHOSTUNREACH;	/* no route */
-			return(0);
-		}
-		*src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
-		ifafree(&ia6->ia_ifa);
-		return src_storage;
+		if (retrt != NULL && route != NULL)
+			*retrt = route->ro_rt;	/* ro_rt may be NULL */
+	} else if (select_srcif && ip6_select_srcif_debug) {
+		printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
+		    s_src, s_dst, ifscope,
+		    (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
+		    (ifp != NULL) ? if_name(ifp) : "NONE", error);
 	}
 
-	*errorp = EADDRNOTAVAIL;
-	return(0);
+	if (ifa != NULL)
+		IFA_REMREF(ifa);
+
+	return (error);
+}
+
+static int
+in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct ip6_moptions *mopts, struct route_in6 *ro, unsigned int ifscope,
+    unsigned int nocell, struct ifnet **retifp)
+{
+	int error;
+	struct route_in6 sro;
+	struct rtentry *rt = NULL;
+
+	if (ro == NULL) {
+		bzero(&sro, sizeof(sro));
+		ro = &sro;
+	}
+
+	if ((error = selectroute(NULL, dstsock, opts, mopts, ro, retifp,
+	    &rt, 0, 1, ifscope, nocell)) != 0) {
+		if (ro == &sro && rt && rt == sro.ro_rt)
+			rtfree(rt);
+		return (error);
+	}
+
+	/*
+	 * do not use a rejected or black hole route.
+	 * XXX: this check should be done in the L2 output routine.
+	 * However, if we skipped this check here, we'd see the following
+	 * scenario:
+	 * - install a rejected route for a scoped address prefix
+	 *   (like fe80::/10)
+	 * - send a packet to a destination that matches the scoped prefix,
+	 *   with ambiguity about the scope zone.
+	 * - pick the outgoing interface from the route, and disambiguate the
+	 *   scope zone with the interface.
+	 * - ip6_output() would try to get another route with the "new"
+	 *   destination, which may be valid.
+	 * - we'd see no error on output.
+	 * Although this may not be very harmful, it should still be confusing.
+	 * We thus reject the case here.
+	 */
+	if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
+		int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+
+		if (ro == &sro && rt && rt == sro.ro_rt)
+			rtfree(rt);
+		return (flags);
+	}
+
+	/*
+	 * Adjust the "outgoing" interface.  If we're going to loop the packet
+	 * back to ourselves, the ifp would be the loopback interface.
+	 * However, we'd rather know the interface associated to the
+	 * destination address (which should probably be one of our own
+	 * addresses.)
+	 */
+	if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) {
+		if (*retifp != NULL)
+			ifnet_release(*retifp);
+		*retifp = rt->rt_ifa->ifa_ifp;
+		ifnet_reference(*retifp);
+	}
+
+	if (ro == &sro && rt && rt == sro.ro_rt)
+		rtfree(rt);
+	return (0);
+}
+
+/*
+ * clone - meaningful only for bsdi and freebsd
+ */
+int
+in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
+    struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
+    struct ifnet **retifp, struct rtentry **retrt, int clone,
+    unsigned int ifscope, unsigned int nocell)
+{
+
+	return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp,
+	    retrt, clone, 0, ifscope, nocell));
 }
 
 /*
@@ -429,6 +1110,7 @@ in6_pcbsetport(
 	u_int16_t lport = 0, first, last, *lastport;
 	int count, error = 0, wild = 0;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+	kauth_cred_t cred;
 	if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
 		if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
 			socket_unlock(inp->inp_socket, 0);
@@ -448,7 +1130,10 @@ in6_pcbsetport(
 		last  = ipport_hilastauto;
 		lastport = &pcbinfo->lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
-                if ((error = proc_suser(p)) != 0) {
+		cred = kauth_cred_proc_ref(p);
+		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+		kauth_cred_unref(&cred);
+		if (error != 0) {
 			if (!locked)
 				lck_rw_done(pcbinfo->mtx);
 			return error;
@@ -481,6 +1166,7 @@ in6_pcbsetport(
 				 * occurred above.
 				 */
 				inp->in6p_laddr = in6addr_any;
+				inp->in6p_last_outif = 0;
 				if (!locked)
 					lck_rw_done(pcbinfo->mtx);
 				return (EAGAIN);
@@ -504,6 +1190,7 @@ in6_pcbsetport(
 				 * occurred above.
 				 */
 				inp->in6p_laddr = in6addr_any;
+				inp->in6p_last_outif = 0;
 				if (!locked)
 					lck_rw_done(pcbinfo->mtx);
 				return (EAGAIN);
@@ -520,6 +1207,7 @@ in6_pcbsetport(
 	if (in_pcbinshash(inp, 1) != 0) {
 		inp->in6p_laddr = in6addr_any;
 		inp->inp_lport = 0;
+		inp->in6p_last_outif = 0;
 		if (!locked)
 			lck_rw_done(pcbinfo->mtx);
 		return (EAGAIN);
@@ -530,6 +1218,350 @@ in6_pcbsetport(
 	return(0);
 }
 
+/*
+ * * The followings are implementation of the policy table using a
+ * * simple tail queue.
+ * * XXX such details should be hidden.
+ * * XXX implementation using binary tree should be more efficient.
+ * */
+struct addrsel_policyent {
+        TAILQ_ENTRY(addrsel_policyent) ape_entry;
+        struct in6_addrpolicy ape_policy;
+};
+
+TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
+
+struct addrsel_policyhead addrsel_policytab;
+
+static void
+init_policy_queue(void)
+{
+
+        TAILQ_INIT(&addrsel_policytab);
+}
+
+void
+addrsel_policy_init(void)
+{
+	/*
+	 * Default address selection policy based on RFC 3484 and
+	 * draft-arifumi-6man-rfc3484-revise-03.
+	 */
+	static const struct in6_addrpolicy defaddrsel[] = {
+		/* localhost */
+		{ .addr     = { .sin6_family = AF_INET6,
+				.sin6_addr   = IN6ADDR_LOOPBACK_INIT,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK128,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 60,
+		  .label    = 0 },
+		/* ULA */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr   = {{{ 0xfc }}},
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK7,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 50,
+		  .label    = 1 },
+		/* any IPv6 src */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr   = IN6ADDR_ANY_INIT,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK0,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 40,
+		  .label    = 2 },
+		/* any IPv4 src */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr   = IN6ADDR_V4MAPPED_INIT,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK96,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 30,
+		  .label    = 3 },
+		/* 6to4 */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr   = {{{ 0x20, 0x02 }}},
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK16,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 20,
+		  .label    = 4 },
+		/* Teredo */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr   = {{{ 0x20, 0x01 }}},
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr   = IN6MASK32,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 10,
+		  .label    = 5 },
+		/* v4 compat addresses */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr = IN6ADDR_ANY_INIT,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr = IN6MASK96,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 1,
+		  .label    = 10 },
+		/* site-local (deprecated) */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr = {{{ 0xfe, 0xc0 }}},
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr = IN6MASK16,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 1,
+		  .label    = 11 },
+		/* 6bone (deprecated) */
+		{ .addr	    = { .sin6_family = AF_INET6,
+				.sin6_addr = {{{ 0x3f, 0xfe }}},
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .addrmask = { .sin6_family = AF_INET6,
+			        .sin6_addr = IN6MASK16,
+				.sin6_len    = sizeof(struct sockaddr_in6) },
+		  .preced   = 1,
+		  .label    = 12 },
+	};
+	int i;
+
+	init_policy_queue();
+
+	/* initialize the "last resort" policy */
+	bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy));
+	defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
+
+	for (i = 0; i < sizeof(defaddrsel) / sizeof(defaddrsel[0]); i++)
+		add_addrsel_policyent(&defaddrsel[i]);
+
+}
+
+struct in6_addrpolicy *
+in6_addrsel_lookup_policy(struct sockaddr_in6 *key)
+{
+	struct in6_addrpolicy *match = NULL;
+
+	ADDRSEL_LOCK();
+	match = match_addrsel_policy(key);
+
+	if (match == NULL)
+		match = &defaultaddrpolicy;
+	else
+		match->use++;
+	ADDRSEL_UNLOCK();
+
+	return (match);
+}
+
+static struct in6_addrpolicy *
+match_addrsel_policy(struct sockaddr_in6 *key)
+{
+	struct addrsel_policyent *pent;
+	struct in6_addrpolicy *bestpol = NULL, *pol;
+	int matchlen, bestmatchlen = -1;
+	u_char *mp, *ep, *k, *p, m;
+
+	TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) {
+		matchlen = 0;
+
+		pol = &pent->ape_policy;
+		mp = (u_char *)&pol->addrmask.sin6_addr;
+		ep = mp + 16;	/* XXX: scope field? */
+		k = (u_char *)&key->sin6_addr;
+		p = (u_char *)&pol->addr.sin6_addr;
+		for (; mp < ep && *mp; mp++, k++, p++) {
+			m = *mp;
+			if ((*k & m) != *p)
+				goto next; /* not match */
+			if (m == 0xff) /* short cut for a typical case */
+				matchlen += 8;
+			else {
+				while (m >= 0x80) {
+					matchlen++;
+					m <<= 1;
+				}
+			}
+		}
+
+		/* matched.  check if this is better than the current best. */
+		if (bestpol == NULL ||
+		    matchlen > bestmatchlen) {
+			bestpol = pol;
+			bestmatchlen = matchlen;
+		}
+
+	  next:
+		continue;
+	}
+
+	return (bestpol);
+} 
+
+static int
+add_addrsel_policyent(const struct in6_addrpolicy *newpolicy)
+{
+	struct addrsel_policyent *new, *pol;
+
+	MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
+	       M_WAITOK);
+
+	ADDRSEL_LOCK();
+
+	/* duplication check */
+	TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
+		if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
+				       &pol->ape_policy.addr.sin6_addr) &&
+		    IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
+				       &pol->ape_policy.addrmask.sin6_addr)) {
+			ADDRSEL_UNLOCK();
+			FREE(new, M_IFADDR);
+			return (EEXIST);	/* or override it? */
+		}
+	}
+
+	bzero(new, sizeof(*new));
+
+	/* XXX: should validate entry */
+	new->ape_policy = *newpolicy;
+
+	TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
+	ADDRSEL_UNLOCK();
+
+	return (0);
+}
+#ifdef ENABLE_ADDRSEL
+static int
+delete_addrsel_policyent(const struct in6_addrpolicy *key)
+{
+	struct addrsel_policyent *pol;
+
+
+	ADDRSEL_LOCK();
+
+	/* search for the entry in the table */
+	TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
+		if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
+		    &pol->ape_policy.addr.sin6_addr) &&
+		    IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
+		    &pol->ape_policy.addrmask.sin6_addr)) {
+			break;
+		}
+	}
+	if (pol == NULL) {
+		ADDRSEL_UNLOCK();
+		return (ESRCH);
+	}
+
+	TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
+	FREE(pol, M_IFADDR);
+	pol = NULL;
+	ADDRSEL_UNLOCK();
+
+	return (0);
+}
+#endif /* ENABLE_ADDRSEL */
+
+int
+walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *),
+    void *w)
+{
+	struct addrsel_policyent *pol;
+	int error = 0;
+
+	ADDRSEL_LOCK();
+	TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
+		if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
+			ADDRSEL_UNLOCK();
+			return (error);
+		}
+	}
+	ADDRSEL_UNLOCK();
+	return (error);
+}
+/*
+ * Subroutines to manage the address selection policy table via sysctl.
+ */
+struct walkarg {
+	struct sysctl_req *w_req;
+};
+
+
+static int
+dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg)
+{
+	int error = 0;
+	struct walkarg *w = arg;
+
+	error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
+
+	return (error);
+}
+
+static int
+in6_src_sysctl SYSCTL_HANDLER_ARGS 
+{
+#pragma unused(oidp, arg1, arg2)
+struct walkarg w;
+
+	if (req->newptr)
+		return EPERM;
+	bzero(&w, sizeof(w));
+	w.w_req = req;
+
+	return (walk_addrsel_policy(dump_addrsel_policyent, &w));
+}
+
+
+SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
+	CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, "");
+int
+in6_src_ioctl(u_long cmd, caddr_t data)
+{
+	int i;
+	struct in6_addrpolicy ent0;
+
+	if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
+		return (EOPNOTSUPP); /* check for safety */
+
+	ent0 = *(struct in6_addrpolicy *)data;
+
+	if (ent0.label == ADDR_LABEL_NOTAPP)
+		return (EINVAL);
+	/* check if the prefix mask is consecutive. */
+	if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
+		return (EINVAL);
+	/* clear trailing garbages (if any) of the prefix address. */
+	for (i = 0; i < 4; i++) {
+		ent0.addr.sin6_addr.s6_addr32[i] &=
+			ent0.addrmask.sin6_addr.s6_addr32[i];
+	}
+	ent0.use = 0;
+
+	switch (cmd) {
+	case SIOCAADDRCTL_POLICY:
+#ifdef ENABLE_ADDRSEL
+		return (add_addrsel_policyent(&ent0));
+#else
+		return (ENOTSUP);
+#endif
+	case SIOCDADDRCTL_POLICY:
+#ifdef ENABLE_ADDRSEL
+		return (delete_addrsel_policyent(&ent0));
+#else
+		return (ENOTSUP);
+#endif
+	}
+
+	return (0);		/* XXX: compromise compilers */
+}
+
 /*
  * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
  * If the address scope of is link-local, embed the interface index in the
@@ -549,21 +1581,17 @@ int
 in6_embedscope(
 	struct in6_addr *in6,
 	const struct sockaddr_in6 *sin6,
-#ifdef HAVE_NRL_INPCB
-	struct inpcb *in6p,
-#define in6p_outputopts	inp_outputopts6
-#define in6p_moptions	inp_moptions6
-#else
 	struct in6pcb *in6p,
-#endif
-	struct ifnet **ifpp)
+	struct ifnet **ifpp,
+	struct ip6_pktopts *opt)
 {
 	struct ifnet *ifp = NULL;
 	u_int32_t scopeid;
+	struct ip6_pktopts *optp = NULL;
 
 	*in6 = sin6->sin6_addr;
 	scopeid = sin6->sin6_scope_id;
-	if (ifpp)
+	if (ifpp != NULL)
 		*ifpp = NULL;
 
 	/*
@@ -578,21 +1606,31 @@ in6_embedscope(
 
 	if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
 		struct in6_pktinfo *pi;
+		struct ifnet *im6o_multicast_ifp = NULL;
+
+		if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
+		    in6p->in6p_moptions != NULL) {
+			IM6O_LOCK(in6p->in6p_moptions);
+			im6o_multicast_ifp =
+			    in6p->in6p_moptions->im6o_multicast_ifp;
+			IM6O_UNLOCK(in6p->in6p_moptions);
+		}
 
+		if (opt)
+			optp = opt;
+		else if (in6p)
+			optp = in6p->in6p_outputopts;
 		/*
 		 * KAME assumption: link id == interface id
 		 */
-
 		ifnet_head_lock_shared();
-		if (in6p && in6p->in6p_outputopts &&
-		    (pi = in6p->in6p_outputopts->ip6po_pktinfo) &&
+		if (in6p && optp && (pi = optp->ip6po_pktinfo) &&
 		    pi->ipi6_ifindex) {
 			ifp = ifindex2ifnet[pi->ipi6_ifindex];
 			in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
 		} else if (in6p && IN6_IS_ADDR_MULTICAST(in6) &&
-			   in6p->in6p_moptions &&
-			   in6p->in6p_moptions->im6o_multicast_ifp) {
-			ifp = in6p->in6p_moptions->im6o_multicast_ifp;
+		    in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) {
+			ifp = im6o_multicast_ifp;
 			in6->s6_addr16[1] = htons(ifp->if_index);
 		} else if (scopeid) {
 			/* 
@@ -610,16 +1648,15 @@ in6_embedscope(
 		}
 		ifnet_head_done();
 
-		if (ifpp)
+		if (ifpp != NULL) {
+			if (ifp != NULL)
+				ifnet_reference(ifp);	/* for caller */
 			*ifpp = ifp;
+		}
 	}
 
 	return 0;
 }
-#if HAVE_NRL_INPCB
-#undef in6p_outputopts
-#undef in6p_moptions
-#endif
 
 /*
  * generate standard sockaddr_in6 from embedded form.
@@ -667,15 +1704,3 @@ in6_recoverscope(
 
 	return 0;
 }
-
-/*
- * just clear the embedded scope identifer.
- * XXX: currently used for bsdi4 only as a supplement function.
- */
-void
-in6_clearscope(addr)
-	struct in6_addr *addr;
-{
-	if (IN6_IS_SCOPE_LINKLOCAL(addr))
-		addr->s6_addr16[1] = 0;
-}
diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h
index e423627db..67f8fa429 100644
--- a/bsd/netinet6/in6_var.h
+++ b/bsd/netinet6/in6_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -97,6 +97,11 @@
 #define _NETINET6_IN6_VAR_H_
 #include <sys/appleapiopts.h>
 
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/tree.h>
+#include <sys/mcache.h>
+#endif
+
 #ifdef __APPLE__
 #include <sys/kern_event.h>
 #endif
@@ -116,7 +121,7 @@ struct in6_addrlifetime {
 	u_int32_t ia6t_pltime;	/* prefix lifetime */
 };
 
-#if defined(KERNEL_PRIVATE)
+#ifdef XNU_KERNEL_PRIVATE
 struct in6_addrlifetime_32 {
 	u_int32_t ia6t_expire;
 	u_int32_t ia6t_preferred;
@@ -125,9 +130,9 @@ struct in6_addrlifetime_32 {
 };
 
 struct in6_addrlifetime_64 {
-	time_t	ia6t_expire;
-	time_t	ia6t_preferred	__attribute__((aligned(8)));
-	u_int32_t ia6t_vltime	__attribute__((aligned(8)));
+	u_int64_t ia6t_expire;
+	u_int64_t ia6t_preferred;
+	u_int32_t ia6t_vltime;
 	u_int32_t ia6t_pltime;
 };
 
@@ -150,13 +155,29 @@ struct	in6_ifaddr {
 	int	ia6_flags;
 
 	struct in6_addrlifetime ia6_lifetime;
+	time_t	ia6_createtime; /* the creation time of this address, which is
+				 * currently used for temporary addresses only.
+				 */
+	time_t	ia6_updatetime;
+
 	struct ifprefix *ia6_ifpr; /* back pointer to ifprefix */
 
-	struct nd_prefix *ia6_ndpr; /* back pointer to the ND prefix
-				     * (for autoconfigured addresses only)
-				     */
+	/* back pointer to the ND prefix (for autoconfigured addresses only) */
+	struct nd_prefix *ia6_ndpr;
+
+	/* multicast addresses joined from the kernel */
+	LIST_HEAD(, in6_multi_mship) ia6_memberships;
+};
+#endif /* XNU_KERNEL_PRIVATE */
+
+/* control structure to manage address selection policy */
+struct in6_addrpolicy {
+	struct sockaddr_in6 addr; /* prefix address */
+	struct sockaddr_in6 addrmask; /* prefix mask */
+	int preced;		/* precedence */
+	int label;		/* matching label */
+	u_quad_t use;		/* statistics */
 };
-#endif /* KERNEL_PRIVATE */
 
 /*
  * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
@@ -282,7 +303,7 @@ struct	in6_ifreq {
 	union {
 		struct	sockaddr_in6 ifru_addr;
 		struct	sockaddr_in6 ifru_dstaddr;
-		short	ifru_flags;
+		int	ifru_flags;
 		int	ifru_flags6;
 		int	ifru_metric;
 		caddr_t	ifru_data;
@@ -302,7 +323,7 @@ struct	in6_aliasreq {
 	struct in6_addrlifetime ifra_lifetime;
 };
 
-#if defined(KERNEL_PRIVATE)
+#ifdef XNU_KERNEL_PRIVATE
 struct	in6_aliasreq_32 {
 	char	ifra_name[IFNAMSIZ];
 	struct	sockaddr_in6 ifra_addr;
@@ -320,7 +341,7 @@ struct	in6_aliasreq_64 {
 	int	ifra_flags;
 	struct in6_addrlifetime_64 ifra_lifetime;
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* prefix type macro */
 #define IN6_PREFIX_ND	1
@@ -404,7 +425,7 @@ struct	in6_rrenumreq {
 #define irr_rrf_decrvalid	irr_flags.prf_rr.decrvalid
 #define irr_rrf_decrprefd	irr_flags.prf_rr.decrprefd
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * Given a pointer to an in6_ifaddr (ifaddr),
  * return a pointer to the addr as a sockaddr_in6
@@ -418,7 +439,7 @@ struct	in6_rrenumreq {
 #define IFA_DSTIN6(x)	(&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr)
 
 #define IFPR_IN6(x)	(&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*
  * Event data, internet6 style.
@@ -455,10 +476,10 @@ struct kev_in6_data {
 #define KEV_INET6_NEW_RTADV_ADDR 	5	/* Autoconf router advertised address has appeared */
 #define KEV_INET6_DEFROUTER 		6	/* Default router dectected by kernel */
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 /* Utility function used inside netinet6 kernel code for generating events */
 void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)	(	\
 	(((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
@@ -481,37 +502,37 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
 
 #define SIOCDIFADDR_IN6		 _IOW('i', 25, struct in6_ifreq)
 #define SIOCAIFADDR_IN6		 _IOW('i', 26, struct in6_aliasreq)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCAIFADDR_IN6_32	 _IOW('i', 26, struct in6_aliasreq_32)
 #define SIOCAIFADDR_IN6_64	 _IOW('i', 26, struct in6_aliasreq_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define SIOCSIFPHYADDR_IN6	_IOW('i', 62, struct in6_aliasreq)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCSIFPHYADDR_IN6_32	_IOW('i', 62, struct in6_aliasreq_32)
 #define SIOCSIFPHYADDR_IN6_64	_IOW('i', 62, struct in6_aliasreq_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define	SIOCGIFPSRCADDR_IN6	_IOWR('i', 63, struct in6_ifreq)
 #define	SIOCGIFPDSTADDR_IN6	_IOWR('i', 64, struct in6_ifreq)
 #define SIOCGIFAFLAG_IN6	_IOWR('i', 73, struct in6_ifreq)
 #define SIOCGDRLST_IN6		_IOWR('i', 74, struct in6_drlist)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCGDRLST_IN6_32	_IOWR('i', 74, struct in6_drlist_32)
 #define SIOCGDRLST_IN6_64	_IOWR('i', 74, struct in6_drlist_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define SIOCGPRLST_IN6		_IOWR('i', 75, struct in6_prlist)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCGPRLST_IN6_32	_IOWR('i', 75, struct in6_prlist_32)
 #define SIOCGPRLST_IN6_64	_IOWR('i', 75, struct in6_prlist_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define OSIOCGIFINFO_IN6	_IOWR('i', 108, struct in6_ondireq)
 #define SIOCGIFINFO_IN6		_IOWR('i', 76, struct in6_ondireq)
 #define SIOCSNDFLUSH_IN6	_IOWR('i', 77, struct in6_ifreq)
 #define SIOCGNBRINFO_IN6	_IOWR('i', 78, struct in6_nbrinfo)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCGNBRINFO_IN6_32	_IOWR('i', 78, struct in6_nbrinfo_32)
 #define SIOCGNBRINFO_IN6_64	_IOWR('i', 78, struct in6_nbrinfo_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define SIOCSPFXFLUSH_IN6	_IOWR('i', 79, struct in6_ifreq)
 #define SIOCSRTRFLUSH_IN6	_IOWR('i', 80, struct in6_ifreq)
 
@@ -522,12 +543,12 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
 
 #define SIOCSDEFIFACE_IN6	_IOWR('i', 85, struct in6_ndifreq)
 #define SIOCGDEFIFACE_IN6	_IOWR('i', 86, struct in6_ndifreq)
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCSDEFIFACE_IN6_32	_IOWR('i', 85, struct in6_ndifreq_32)
 #define SIOCSDEFIFACE_IN6_64	_IOWR('i', 85, struct in6_ndifreq_64)
 #define SIOCGDEFIFACE_IN6_32	_IOWR('i', 86, struct in6_ndifreq_32)
 #define SIOCGDEFIFACE_IN6_64	_IOWR('i', 86, struct in6_ndifreq_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define SIOCSIFINFO_FLAGS	_IOWR('i', 87, struct in6_ndireq) /* XXX */
 
@@ -548,30 +569,44 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
 				      struct sioc_sg_req6) /* get s,g pkt cnt */
 #define SIOCGETMIFCNT_IN6	_IOWR('u', 107, \
 				      struct sioc_mif_req6) /* get pkt cnt per if */
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCGETMIFCNT_IN6_32	_IOWR('u', 107, struct sioc_mif_req6_32)
 #define SIOCGETMIFCNT_IN6_64	_IOWR('u', 107, struct sioc_mif_req6_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
+
+#define SIOCAADDRCTL_POLICY	_IOW('u', 108, struct in6_addrpolicy)
+#define SIOCDADDRCTL_POLICY	_IOW('u', 109, struct in6_addrpolicy)
 
 #ifdef PRIVATE
 /*
  * temporary control calls to attach/detach IP to/from an ethernet interface 
  */
 #define SIOCPROTOATTACH_IN6 _IOWR('i', 110, struct in6_aliasreq)    /* attach proto to interface */
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCPROTOATTACH_IN6_32	_IOWR('i', 110, struct in6_aliasreq_32)
 #define SIOCPROTOATTACH_IN6_64	_IOWR('i', 110, struct in6_aliasreq_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq)    /* detach proto from interface */
 
 #define SIOCLL_START _IOWR('i', 130, struct in6_aliasreq)    /* start aquiring linklocal on interface */
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define SIOCLL_START_32		_IOWR('i', 130, struct in6_aliasreq_32)
 #define SIOCLL_START_64		_IOWR('i', 130, struct in6_aliasreq_64)
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #define SIOCLL_STOP _IOWR('i', 131, struct in6_ifreq)    /* deconfigure linklocal from interface */
 #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq)    /* accept rtadvd on this interface */
 #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq)    /* stop accepting rtadv for this interface */
+
+#define SIOCDRADD_IN6 _IOWR('u', 134, struct in6_defrouter)
+#ifdef XNU_KERNEL_PRIVATE
+#define SIOCDRADD_IN6_32 _IOWR('u', 134, struct in6_defrouter_32)
+#define SIOCDRADD_IN6_64 _IOWR('u', 134, struct in6_defrouter_64)
+#endif /* XNU_KERNEL_PRIVATE */
+#define SIOCDRDEL_IN6 _IOWR('u', 135, struct in6_defrouter)
+#ifdef XNU_KERNEL_PRIVATE
+#define SIOCDRDEL_IN6_32 _IOWR('u', 135, struct in6_defrouter_32)
+#define SIOCDRDEL_IN6_64 _IOWR('u', 135, struct in6_defrouter_64)
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* PRIVATE */
 
 #define IN6_IFF_ANYCAST		0x01	/* anycast address */
@@ -596,7 +631,7 @@ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *);
 #define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b))
 #endif /* KERNEL */
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 extern struct in6_ifaddr *in6_ifaddrs;
 
 extern struct in6_ifstat **in6_ifstat;
@@ -604,23 +639,25 @@ extern size_t in6_ifstatmax;
 extern struct icmp6stat icmp6stat;
 extern struct icmp6_ifstat **icmp6_ifstat;
 extern size_t icmp6_ifstatmax;
+extern lck_rw_t in6_ifs_rwlock;
 #define in6_ifstat_inc(ifp, tag) \
-do {							\
-	int _z_index = ifp ? ifp->if_index : 0;		\
-	if ((_z_index) && _z_index <= if_index		\
-	 && _z_index < (signed)in6_ifstatmax		\
-	 && in6_ifstat && in6_ifstat[_z_index]) {	\
-		in6_ifstat[_z_index]->tag++;		\
-	}						\
+do {								\
+	lck_rw_lock_shared(&in6_ifs_rwlock);			\
+	int _z_index = ifp ? ifp->if_index : 0;			\
+	if ((_z_index) && _z_index <= if_index			\
+	 && _z_index < (signed)in6_ifstatmax			\
+	 && in6_ifstat && in6_ifstat[_z_index]) {		\
+		atomic_add_64(&in6_ifstat[_z_index]->tag, 1);	\
+	}							\
+	lck_rw_done(&in6_ifs_rwlock);				\
 } while (0)
 
+__private_extern__ lck_rw_t in6_ifaddr_rwlock;
+
 extern struct ifqueue ip6intrq;		/* IP6 packet input queue */
 extern struct in6_addr zeroin6_addr;
 extern u_char inet6ctlerrmap[];
 extern u_int32_t in6_maxmtu;
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_IPMADDR);
-#endif /* MALLOC_DECLARE */
 
 /*
  * Macro for finding the internet address structure (in6_ifaddr) corresponding
@@ -631,35 +668,156 @@ MALLOC_DECLARE(M_IPMADDR);
 /* struct ifnet *ifp; */				\
 /* struct in6_ifaddr *ia; */				\
 do {									\
-	struct ifaddr *ifa;						\
-	for (ifa = (ifp)->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) {	\
-		if (!ifa->ifa_addr)					\
-			continue;					\
-		if (ifa->ifa_addr->sa_family == AF_INET6)		\
+	struct ifaddr *_ifa;						\
+	ifnet_lock_assert(ifp, LCK_RW_ASSERT_HELD);			\
+	for (_ifa = (ifp)->if_addrlist.tqh_first; _ifa != NULL;		\
+	    _ifa = _ifa->ifa_list.tqe_next) {				\
+		IFA_LOCK(_ifa);						\
+		if (_ifa->ifa_addr->sa_family == AF_INET6) {		\
+			IFA_ADDREF_LOCKED(_ifa);			\
+			IFA_UNLOCK(_ifa);				\
 			break;						\
+		}							\
+		IFA_UNLOCK(_ifa);					\
 	}								\
-	(ia) = (struct in6_ifaddr *)ifa;				\
+	(ia) = (struct in6_ifaddr *)_ifa;				\
 } while (0)
 
 /*
- * Multi-cast membership entry.  One for each group/ifp that a PCB
- * belongs to.
+ * IPv6 multicast MLD-layer source entry.
+ */
+struct ip6_msource {
+	RB_ENTRY(ip6_msource)	im6s_link;	/* RB tree links */
+	struct in6_addr		im6s_addr;
+	struct im6s_st {
+		uint16_t	ex;		/* # of exclusive members */
+		uint16_t	in;		/* # of inclusive members */
+	}			im6s_st[2];	/* state at t0, t1 */
+	uint8_t			im6s_stp;	/* pending query */
+};
+
+RB_HEAD(ip6_msource_tree, ip6_msource);
+
+RB_PROTOTYPE_SC_PREV(__private_extern__, ip6_msource_tree, ip6_msource,
+    im6s_link, ip6_msource_cmp);
+
+/*
+ * IPv6 multicast PCB-layer source entry.
+ *
+ * NOTE: overlapping use of struct ip6_msource fields at start.
+ */
+struct in6_msource {
+	RB_ENTRY(ip6_msource)	im6s_link;	/* Common field */
+	struct in6_addr		im6s_addr;	/* Common field */
+	uint8_t			im6sl_st[2];	/* state before/at commit */
+};
+
+/*
+ * IPv6 multicast PCB-layer group filter descriptor.
+ */
+struct in6_mfilter {
+	struct ip6_msource_tree	im6f_sources; /* source list for (S,G) */
+	u_long			im6f_nsrc;    /* # of source entries */
+	uint8_t			im6f_st[2];   /* state before/at commit */
+};
+
+/*
+ * Legacy KAME IPv6 multicast membership descriptor.
  */
 struct in6_multi_mship {
 	struct	in6_multi *i6mm_maddr;	/* Multicast address pointer */
 	LIST_ENTRY(in6_multi_mship) i6mm_chain;  /* multicast options chain */
 };
 
+struct mld_ifinfo;
+
+/*
+ * The request count here is a count of requests for this address, not a
+ * count of pointers to this structure.
+ */
 struct	in6_multi {
+	decl_lck_mtx_data(, in6m_lock);
+	u_int32_t in6m_refcount;	/* reference count */
+	u_int32_t in6m_reqcnt;		/* request count for this address */
+	u_int32_t in6m_debug;		/* see ifa_debug flags */
 	LIST_ENTRY(in6_multi) in6m_entry; /* list glue */
 	struct	in6_addr in6m_addr;	/* IP6 multicast address */
 	struct	ifnet *in6m_ifp;	/* back pointer to ifnet */
 	struct	ifmultiaddr *in6m_ifma;	/* back pointer to ifmultiaddr */
-	u_int	in6m_refcount;		/* # membership claims by sockets */
 	u_int	in6m_state;		/* state of the membership */
 	u_int	in6m_timer;		/* MLD6 listener report timer */
+	/* New fields for MLDv2 follow. */
+	struct mld_ifinfo	*in6m_mli;	/* MLD info */
+	SLIST_ENTRY(in6_multi)	 in6m_nrele;	/* to-be-released by MLD */
+	u_int32_t		 in6m_nrelecnt;	/* deferred release count */
+	struct ip6_msource_tree	 in6m_srcs;	/* tree of sources */
+	u_long			 in6m_nsrc;	/* # of tree entries */
+
+	struct ifqueue		 in6m_scq;	/* queue of pending
+						 * state-change packets */
+	struct timeval		 in6m_lastgsrtv;	/* last G-S-R query */
+	uint16_t		 in6m_sctimer;	/* state-change timer */
+	uint16_t		 in6m_scrv;	/* state-change rexmit count */
+	/*
+	 * SSM state counters which track state at T0 (the time the last
+	 * state-change report's RV timer went to zero) and T1
+	 * (time of pending report, i.e. now).
+	 * Used for computing MLDv2 state-change reports. Several refcounts
+	 * are maintained here to optimize for common use-cases.
+	 */
+	struct in6m_st {
+		uint16_t	iss_fmode;	/* MLD filter mode */
+		uint16_t	iss_asm;	/* # of ASM listeners */
+		uint16_t	iss_ex;		/* # of exclusive members */
+		uint16_t	iss_in;		/* # of inclusive members */
+		uint16_t	iss_rec;	/* # of recorded sources */
+	}			in6m_st[2];	/* state at t0, t1 */
+
+	void (*in6m_trace)		/* callback fn for tracing refs */
+	    (struct in6_multi *, int);
 };
 
+#define	IN6M_LOCK_ASSERT_HELD(_in6m)					\
+	lck_mtx_assert(&(_in6m)->in6m_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IN6M_LOCK_ASSERT_NOTHELD(_in6m)					\
+	lck_mtx_assert(&(_in6m)->in6m_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IN6M_LOCK(_in6m)							\
+	lck_mtx_lock(&(_in6m)->in6m_lock)
+
+#define	IN6M_LOCK_SPIN(_in6m)						\
+	lck_mtx_lock_spin(&(_in6m)->in6m_lock)
+
+#define	IN6M_CONVERT_LOCK(_in6m) do {					\
+	IN6M_LOCK_ASSERT_HELD(_in6m);					\
+	lck_mtx_convert_spin(&(_in6m)->in6m_lock);			\
+} while (0)
+
+#define	IN6M_UNLOCK(_in6m)						\
+	lck_mtx_unlock(&(_in6m)->in6m_lock)
+
+#define	IN6M_ADDREF(_in6m)						\
+	in6m_addref(_in6m, 0)
+
+#define	IN6M_ADDREF_LOCKED(_in6m)					\
+	in6m_addref(_in6m, 1)
+
+#define	IN6M_REMREF(_in6m)						\
+	in6m_remref(_in6m, 0)
+
+#define	IN6M_REMREF_LOCKED(_in6m)					\
+	in6m_remref(_in6m, 1)
+
+#define IN6M_TIMER_UNDEF -1
+
+/* flags to in6_update_ifa */
+#define IN6_IFAUPDATE_DADDELAY	0x1 /* first time to configure an address */
+
+struct ip6_moptions;
+struct sockopt;
+struct inpcb;
+
 extern LIST_HEAD(in6_multihead, in6_multi) in6_multihead;
 
 /*
@@ -674,23 +832,36 @@ struct	in6_multistep {
 /*
  * Macros for looking up the in6_multi record for a given IP6 multicast
  * address on a given interface. If no matching record is found, "in6m"
- * returns NLL.
+ * returns NULL.
+ *
+ * We do this differently compared other BSD implementations; instead of
+ * walking the if_multiaddrs list at the interface and returning the
+ * ifma_protospec value of a matching entry, we search the global list
+ * of in6_multi records and find it that way.  Otherwise either the two
+ * structures (in6_multi, ifmultiaddr) need to be ref counted both ways,
+ * which will make things too complicated, or they need to reside in the
+ * same protected domain, which they aren't.
+ *
+ * Must be called with in6_multihead_lock held.
  */
-
-#define IN6_LOOKUP_MULTI(addr, ifp, in6m)			\
-/* struct in6_addr addr; */					\
-/* struct ifnet *ifp; */					\
-/* struct in6_multi *in6m; */					\
-do { \
-	struct ifmultiaddr *_ifma; \
-	for (_ifma = (ifp)->if_multiaddrs.lh_first; _ifma; \
-	     _ifma = _ifma->ifma_link.le_next) { \
-		if (_ifma->ifma_addr->sa_family == AF_INET6 \
-		    && IN6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)_ifma->ifma_addr)->sin6_addr, \
-					  &(addr))) \
-			break; \
-	} \
-	(in6m) = (struct in6_multi *)(_ifma ? _ifma->ifma_protospec : 0); \
+#define IN6_LOOKUP_MULTI(addr, ifp, in6m)				\
+	/* struct in6_addr *addr; */					\
+	/* struct ifnet *ifp; */					\
+	/* struct in6_multi *in6m; */					\
+do {									\
+	struct in6_multistep _step;					\
+	IN6_FIRST_MULTI(_step, in6m);					\
+	while ((in6m) != NULL) {					\
+		IN6M_LOCK_SPIN(in6m);					\
+		if ((in6m)->in6m_ifp == (ifp) &&			\
+		    IN6_ARE_ADDR_EQUAL(&(in6m)->in6m_addr, (addr))) {	\
+			IN6M_ADDREF_LOCKED(in6m);			\
+			IN6M_UNLOCK(in6m);				\
+			break;						\
+		}							\
+		IN6M_UNLOCK(in6m);					\
+		IN6_NEXT_MULTI(_step, in6m);				\
+	}								\
 } while(0)
 
 /*
@@ -699,34 +870,58 @@ do { \
  * provide.  IN6_FIRST_MULTI(), below, must be called to initialize "step"
  * and get the first record.  Both macros return a NULL "in6m" when there
  * are no remaining records.
+ *
+ * Must be called with in6_multihead_lock held.
  */
 #define IN6_NEXT_MULTI(step, in6m)					\
-/* struct in6_multistep step; */					\
-/* struct in6_multi *in6m; */						\
-do { \
-	if (((in6m) = (step).i_in6m) != NULL) \
-		(step).i_in6m = (step).i_in6m->in6m_entry.le_next; \
-} while(0)
+	/* struct in6_multistep step; */				\
+	/* struct in6_multi *in6m; */					\
+do {									\
+	in6_multihead_lock_assert(LCK_RW_ASSERT_HELD);			\
+	if (((in6m) = (step).i_in6m) != NULL)				\
+		(step).i_in6m = (step).i_in6m->in6m_entry.le_next;	\
+} while (0)
 
-#define IN6_FIRST_MULTI(step, in6m)		\
-/* struct in6_multistep step; */		\
-/* struct in6_multi *in6m */			\
-do { \
-	(step).i_in6m = in6_multihead.lh_first; \
-		IN6_NEXT_MULTI((step), (in6m)); \
-} while(0)
+#define IN6_FIRST_MULTI(step, in6m)					\
+	/* struct in6_multistep step; */				\
+	/* struct in6_multi *in6m */					\
+do {									\
+	in6_multihead_lock_assert(LCK_RW_ASSERT_HELD);			\
+	(step).i_in6m = in6_multihead.lh_first;				\
+		IN6_NEXT_MULTI((step), (in6m));				\
+} while (0)
 
-extern struct in6_multi *in6_addmulti(struct in6_addr *, struct ifnet *,
-    int *, int);
-extern void in6_delmulti(struct in6_multi *, int);
+/* Multicast private KPIs. */
+extern int im6o_mc_filter(const struct ip6_moptions *, const struct ifnet *,
+    const struct sockaddr *, const struct sockaddr *);
+extern int in6_mc_join(struct ifnet *, const struct in6_addr *,
+    struct in6_mfilter *, struct in6_multi **, int);
+extern int in6_mc_leave(struct in6_multi *, struct in6_mfilter *);
+extern void in6m_clear_recorded(struct in6_multi *);
+extern void in6m_commit(struct in6_multi *);
+extern void in6m_purge(struct in6_multi *);
+extern void in6m_print(const struct in6_multi *);
+extern int in6m_record_source(struct in6_multi *, const struct in6_addr *);
+extern int ip6_getmoptions(struct inpcb *, struct sockopt *);
+extern int ip6_setmoptions(struct inpcb *, struct sockopt *);
+
+/* Legacy KAME multicast private KPIs. */
+extern struct in6_multi_mship *in6_joingroup(struct ifnet *,
+    struct in6_addr *, int *, int);
+extern int in6_leavegroup(struct in6_multi_mship *);
+
+extern void in6_multi_init(void);
+extern void in6m_addref(struct in6_multi *, int);
+extern void in6m_remref(struct in6_multi *, int);
+extern int in6_multi_detach(struct in6_multi *);
 extern int in6_ifindex2scopeid(int);
 extern int in6_mask2len(struct in6_addr *, u_char *);
 extern void in6_len2mask(struct in6_addr *, int);
 extern int in6_control(struct socket *, u_long, caddr_t, struct ifnet *,
     struct proc *);
 extern int in6_update_ifa(struct ifnet *, struct in6_aliasreq *,
-    struct in6_ifaddr *, int);
-extern void in6_purgeaddr(struct ifaddr *, int);
+    struct in6_ifaddr *, int, int);
+extern void in6_purgeaddr(struct ifaddr *);
 extern int in6if_do_dad(struct ifnet *);
 extern void in6_purgeif(struct ifnet *);
 extern void in6_savemkludge(struct in6_ifaddr *);
@@ -744,21 +939,35 @@ extern void in6_prefixlen2mask(struct in6_addr *maskp, int len);
 extern int in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia);
 extern void in6_prefix_remove_ifid(int iilen, struct in6_ifaddr *ia);
 extern void in6_purgeprefix(struct ifnet *);
+extern void in6_purgeaddrs(struct ifnet *);
 
 extern int in6_is_addr_deprecated(struct sockaddr_in6 *);
+extern uint8_t im6s_get_mode(const struct in6_multi *,
+    const struct ip6_msource *, uint8_t);
+
+extern void im6f_leave(struct in6_mfilter *);
+extern void im6f_purge(struct in6_mfilter *);
 
 struct inpcb;
+struct ip6_pktopts;
 
 extern int in6_embedscope(struct in6_addr *, const struct sockaddr_in6 *,
-    struct inpcb *, struct ifnet **);
+    struct inpcb *, struct ifnet **, struct ip6_pktopts *);
 extern int in6_recoverscope(struct sockaddr_in6 *, const struct in6_addr *,
     struct ifnet *);
-extern void in6_clearscope(struct in6_addr *);
 extern void in6_aliasreq_64_to_32(struct in6_aliasreq_64 *,
     struct in6_aliasreq_32 *);
 extern void in6_aliasreq_32_to_64(struct in6_aliasreq_32 *,
     struct in6_aliasreq_64 *);
 extern void in6_ifaddr_init(void);
 extern void in6_rtqdrain(void);
-#endif /* KERNEL_PRIVATE */
+extern struct radix_node *in6_validate(struct radix_node *);
+extern int  in6_if2idlen(struct ifnet *);
+extern int in6_src_ioctl (u_long, caddr_t);
+
+__private_extern__ void in6_multihead_lock_exclusive(void);
+__private_extern__ void in6_multihead_lock_shared(void);
+__private_extern__ void in6_multihead_lock_assert(int);
+__private_extern__ void in6_multihead_lock_done(void);
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* _NETINET6_IN6_VAR_H_ */
diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c
index 202d8ccb1..f2d6e3bd6 100644
--- a/bsd/netinet6/ip6_forward.c
+++ b/bsd/netinet6/ip6_forward.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -84,6 +84,7 @@
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
 
 #include <netinet/in_pcb.h>
 
@@ -95,7 +96,6 @@
 #include <netkey/key.h>
 extern int ipsec_bypass;
 #endif /* IPSEC */
-extern lck_mtx_t *ip6_mutex;
 
 #include <netinet6/ip6_fw.h>
 
@@ -120,7 +120,7 @@ extern lck_mtx_t *ip6_mutex;
 
 void
 ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
-    int srcrt, int locked)
+    int srcrt)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct sockaddr_in6 *dst;
@@ -128,14 +128,24 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy = NULL;
 	struct ifnet *ifp, *origifp;	/* maybe unnecessary */
+	u_int32_t inzone, outzone;
+	struct in6_addr src_in6, dst_in6;
 #if IPSEC
 	struct secpolicy *sp = NULL;
 #endif
 	struct timeval timenow;
 	int	tunneledv4 = 0;
+	unsigned int ifscope = IFSCOPE_NONE;
+#if PF
+	struct pf_mtag *pf_mtag;
+#endif /* PF */
 
 	getmicrotime(&timenow);
-
+#if PF
+	pf_mtag = pf_find_mtag(m);
+	if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE)
+		ifscope = pf_mtag->rtableid;
+#endif /* PF */
 
 #if IPSEC
 	/*
@@ -181,12 +191,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 
 	if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 		/* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
-		if (locked)
-			lck_mtx_unlock(ip6_mutex);
 		icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				ICMP6_TIME_EXCEED_TRANSIT, 0);
-		if (locked)
-			lck_mtx_lock(ip6_mutex);
 		return;
 	}
 	ip6->ip6_hlim -= IPV6_HLIMDEC;
@@ -293,11 +299,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	state.ro = NULL;	/* update at ipsec6_output_tunnel() */
 	state.dst = NULL;	/* update at ipsec6_output_tunnel() */
 
-	if (locked)
-		lck_mtx_unlock(ip6_mutex);
 	error = ipsec6_output_tunnel(&state, sp, 0, &tunneledv4);
-	if (locked)
-		lck_mtx_lock(ip6_mutex);
 	key_freesp(sp, KEY_SADB_UNLOCKED);
 	if (tunneledv4)
 		return;  /* packet is gone - sent over IPv4 */
@@ -334,15 +336,6 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
     skip_ipsec:
 #endif /* IPSEC */
 
-	/*
-	 * If "locked", ip6forward_rt points to the globally defined
-	 * struct route cache which requires ip6_mutex, e.g. when this
-	 * is called from ip6_input().  Else the caller is responsible
-	 * for the struct route and its serialization (if needed), e.g.
-	 * when this is called from ip6_rthdr0().
-	 */
-	if (locked)
-		lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_OWNED);
 	dst = (struct sockaddr_in6 *)&ip6forward_rt->ro_dst;
 	if ((rt = ip6forward_rt->ro_rt) != NULL) {
 		RT_LOCK(rt);
@@ -364,8 +357,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 				ip6forward_rt->ro_rt = NULL;
 			}
 			/* this probably fails but give it a try again */
-			rtalloc_ign((struct route *)ip6forward_rt,
-			    RTF_PRCLONING);
+			rtalloc_scoped_ign((struct route *)ip6forward_rt,
+			    RTF_PRCLONING, ifscope);
 			if ((rt = ip6forward_rt->ro_rt) != NULL) {
 				RT_LOCK(rt);
 				/* Take an extra ref for ourselves */
@@ -376,14 +369,9 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 		if (rt == NULL) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
-			if (mcopy) {
-				if (locked)
-					lck_mtx_unlock(ip6_mutex);
+			if (mcopy)
 				icmp6_error(mcopy, ICMP6_DST_UNREACH,
 					    ICMP6_DST_UNREACH_NOROUTE, 0);
-				if (locked)
-					lck_mtx_lock(ip6_mutex);
-			}
 			m_freem(m);
 			return;
 		}
@@ -403,18 +391,14 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 		dst->sin6_family = AF_INET6;
 		dst->sin6_addr = ip6->ip6_dst;
 
-		rtalloc_ign((struct route *)ip6forward_rt, RTF_PRCLONING);
+		rtalloc_scoped_ign((struct route *)ip6forward_rt,
+		    RTF_PRCLONING, ifscope);
 		if ((rt = ip6forward_rt->ro_rt) == NULL) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
-			if (mcopy) {
-				if (locked)
-					lck_mtx_unlock(ip6_mutex);
+			if (mcopy)
 				icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_NOROUTE, 0);
-				if (locked)
-					lck_mtx_lock(ip6_mutex);
-			}
 			m_freem(m);
 			return;
 		}
@@ -424,14 +408,29 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	}
 
 	/*
-	 * Scope check: if a packet can't be delivered to its destination
-	 * for the reason that the destination is beyond the scope of the
-	 * source address, discard the packet and return an icmp6 destination
-	 * unreachable error with Code 2 (beyond scope of source address).
-	 * [draft-ietf-ipngwg-icmp-v3-02.txt, Section 3.1]
+	 * Source scope check: if a packet can't be delivered to its
+	 * destination for the reason that the destination is beyond the scope
+	 * of the source address, discard the packet and return an icmp6
+	 * destination unreachable error with Code 2 (beyond scope of source
+	 * address).  We use a local copy of ip6_src, since in6_setscope()
+	 * will possibly modify its first argument.
+	 * [draft-ietf-ipngwg-icmp-v3-04.txt, Section 3.1]
 	 */
-	if (in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_src) !=
-	    in6_addr2scopeid(rt->rt_ifp, &ip6->ip6_src)) {
+	src_in6 = ip6->ip6_src;
+	if (in6_setscope(&src_in6, rt->rt_ifp, &outzone)) {
+		/* XXX: this should not happen */
+		ip6stat.ip6s_cantforward++;
+		ip6stat.ip6s_badscope++;
+		m_freem(m);
+		return;
+	}
+	if (in6_setscope(&src_in6, m->m_pkthdr.rcvif, &inzone)) {
+		ip6stat.ip6s_cantforward++;
+		ip6stat.ip6s_badscope++;
+		m_freem(m);
+		return;
+	}
+	if (inzone != outzone) {
 		ip6stat.ip6s_cantforward++;
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard);
@@ -450,17 +449,30 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 		RT_REMREF_LOCKED(rt);
 		RT_UNLOCK(rt);
 		if (mcopy) {
-			if (locked)
-				lck_mtx_unlock(ip6_mutex);
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_BEYONDSCOPE, 0);
-			if (locked)
-				lck_mtx_lock(ip6_mutex);
 		}
 		m_freem(m);
 		return;
 	}
 
+	/*
+	 * Destination scope check: if a packet is going to break the scope
+	 * zone of packet's destination address, discard it.  This case should
+	 * usually be prevented by appropriately-configured routing table, but
+	 * we need an explicit check because we may mistakenly forward the
+	 * packet to a different zone by (e.g.) a default route.
+	 */
+	dst_in6 = ip6->ip6_dst;
+	if (in6_setscope(&dst_in6, m->m_pkthdr.rcvif, &inzone) != 0 ||
+	    in6_setscope(&dst_in6, rt->rt_ifp, &outzone) != 0 ||
+	    inzone != outzone) {
+		ip6stat.ip6s_cantforward++;
+		ip6stat.ip6s_badscope++;
+		m_freem(m);
+		return;
+	}
+
 	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
 		in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig);
 		if (mcopy) {
@@ -475,7 +487,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 #if IPSEC
 			/*
 			 * When we do IPsec tunnel ingress, we need to play
-			 * with if_mtu value (decrement IPsec header size
+			 * with the link value (decrement IPsec header size
 			 * from mtu value).  The code is much simpler than v4
 			 * case, as we have the outgoing interface for
 			 * encapsulated packet as "rt->rt_ifp".
@@ -499,11 +511,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 			/* Release extra ref */
 			RT_REMREF_LOCKED(rt);
 			RT_UNLOCK(rt);
-			if (locked)
-				lck_mtx_unlock(ip6_mutex);
 			icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
-			if (locked)
-				lck_mtx_lock(ip6_mutex);
 		} else {
 			/* Release extra ref */
 			RT_REMREF_LOCKED(rt);
@@ -525,7 +533,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	 * Also, don't send redirect if forwarding using a route
 	 * modified by a redirect.
 	 */
-	if (rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
+	if (ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
 		if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
 			/*
@@ -540,12 +548,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 			 */
 			RT_REMREF_LOCKED(rt);	/* Release extra ref */
 			RT_UNLOCK(rt);
-			if (locked)
-				lck_mtx_unlock(ip6_mutex);
 			icmp6_error(mcopy, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0);
-			if (locked)
-				lck_mtx_lock(ip6_mutex);
 			m_freem(m);
 			return;
 		}
@@ -611,29 +615,21 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	}
 	else
 		origifp = rt->rt_ifp;
-#ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
-#endif
 
 	ifp = rt->rt_ifp;
 	/* Drop the lock but retain the extra ref */
 	RT_UNLOCK(rt);
 
 #if PF
-	if (locked)
-		lck_mtx_unlock(ip6_mutex);
-
 	/* Invoke outbound packet filter */
 	error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
 
-	if (locked)
-		lck_mtx_lock(ip6_mutex);
-
 	if (error) {
 		if (m != NULL) {
 			panic("%s: unexpected packet %p\n", __func__, m);
@@ -645,7 +641,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt,
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* PF */
 
-	error = nd6_output(ifp, origifp, m, dst, rt, locked);
+	error = nd6_output(ifp, origifp, m, dst, rt);
 	if (error) {
 		in6_ifstat_inc(ifp, ifs6_out_discard);
 		ip6stat.ip6s_cantforward++;
@@ -697,11 +693,7 @@ senderr:
 		code = ICMP6_DST_UNREACH_ADDR;
 		break;
 	}
-	if (locked)
-		lck_mtx_unlock(ip6_mutex);
 	icmp6_error(mcopy, type, code, 0);
-	if (locked)
-		lck_mtx_lock(ip6_mutex);
 	/* Release extra ref */
 	RT_REMREF(rt);
 	return;
diff --git a/bsd/netinet6/ip6_fw.c b/bsd/netinet6/ip6_fw.c
index f1b9f0508..ae221caad 100644
--- a/bsd/netinet6/ip6_fw.c
+++ b/bsd/netinet6/ip6_fw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -150,11 +150,11 @@ static int ip6fw_sysctl SYSCTL_HANDLER_ARGS;
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Firewall");
 SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, 
-	CTLTYPE_INT | CTLFLAG_RW,
+	CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ip6_fw_enable, 0, ip6fw_sysctl, "I", "Enable ip6fw");
-SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, debug, CTLFLAG_RW, &fw6_debug, 0, "");
-SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose, CTLFLAG_RW, &fw6_verbose, 0, "");
-SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &fw6_verbose_limit, 0, "");
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fw6_debug, 0, "");
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED, &fw6_verbose, 0, "");
+SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &fw6_verbose_limit, 0, "");
 
 static int
 ip6fw_sysctl SYSCTL_HANDLER_ARGS
@@ -202,7 +202,6 @@ static void cp_to_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule);
 static void cp_from_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule);
 
 static char err_prefix[] = "ip6_fw_ctl:";
-extern lck_mtx_t *ip6_mutex;
 
 /*
  * Returns 1 if the port is matched by the vector, 0 otherwise
@@ -390,17 +389,21 @@ iface_match(struct ifnet *ifp, union ip6_fw_if *ifu, int byname)
 		struct ifaddr *ia;
 
 		ifnet_lock_shared(ifp);
-		for (ia = ifp->if_addrlist.tqh_first; ia; ia = ia->ifa_list.tqe_next)
+		for (ia = ifp->if_addrlist.tqh_first; ia;
+		    ia = ia->ifa_list.tqe_next)
 		{
-
-			if (ia->ifa_addr == NULL)
-				continue;
-			if (ia->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK_SPIN(ia);
+			if (ia->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ia);
 				continue;
+			}
 			if (!IN6_ARE_ADDR_EQUAL(&ifu->fu_via_ip6,
 			    &(((struct sockaddr_in6 *)
-			    (ia->ifa_addr))->sin6_addr))) 
+			    (ia->ifa_addr))->sin6_addr))) {
+				IFA_UNLOCK(ia);
 				continue;
+			}
+			IFA_UNLOCK(ia);
 			ifnet_lock_done(ifp);
 			return(1);
 		}
@@ -558,7 +561,7 @@ ip6_fw_chk(struct ip6_hdr **pip6,
 	struct ip6_fw_chain *chain;
 	struct ip6_fw *rule = NULL;
 	struct ip6_hdr *ip6 = *pip6;
-	struct ifnet *const rif = ((*m)->m_flags & M_LOOP) ? ifunit("lo0") : (*m)->m_pkthdr.rcvif;
+	struct ifnet *const rif = ((*m)->m_flags & M_LOOP) ? lo_ifp : (*m)->m_pkthdr.rcvif;
 	u_short offset = 0;
 	int off = sizeof(struct ip6_hdr), nxt = ip6->ip6_nxt;
 	u_short src_port, dst_port;
@@ -870,18 +873,15 @@ got_match:
 			}
 			bcopy(&ti, ip6, sizeof(ti));
 			tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1),
-				*m, ack, seq, flags, IFSCOPE_NONE);
+				*m, ack, seq, flags, IFSCOPE_NONE, 0);
 			*m = NULL;
 			break;
 		  }
 		default:	/* Send an ICMP unreachable using code */
 			if (oif)
 				(*m)->m_pkthdr.rcvif = oif;
-			lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_OWNED);
-			lck_mtx_unlock(ip6_mutex);
 			icmp6_error(*m, ICMP6_DST_UNREACH,
 			    rule->fw_reject_code, 0);
-			lck_mtx_lock(ip6_mutex);
 			*m = NULL;
 			break;
 		}
@@ -962,6 +962,7 @@ add_entry6(struct ip6_fw_head *chainptr, struct ip6_fw *frwl)
 		}
 	}
 
+	bcopy(ftmp, frwl, sizeof(struct ip6_fw));
 	splx(s);
 	return (0);
 }
@@ -1400,6 +1401,17 @@ ip6_fw_ctl(struct sockopt *sopt)
 				ip6fw_kev_post_msg(KEV_IP6FW_ADD);
 			} else
 				error = EINVAL;
+
+			if (is64user){
+				struct ip6_fw_64 userrule_64;
+				cp_to_user_64( &userrule_64, &rule);
+				error = sooptcopyout(sopt, &userrule_64, userrulesize);
+			}
+			else {
+				struct ip6_fw_32 userrule_32;
+				cp_to_user_32( &userrule_32, &rule);
+				error = sooptcopyout(sopt, &userrule_32, userrulesize);
+			}
 			break;
 
 		case IPV6_FW_DEL:
diff --git a/bsd/netinet6/ip6_fw.h b/bsd/netinet6/ip6_fw.h
index 32f4f280b..92f913f29 100644
--- a/bsd/netinet6/ip6_fw.h
+++ b/bsd/netinet6/ip6_fw.h
@@ -42,6 +42,7 @@
 
 #ifndef _IP6_FW_H
 #define _IP6_FW_H
+#ifdef __APPLE_API_OBSOLETE
 
 #include <sys/appleapiopts.h>
 
@@ -343,4 +344,5 @@ extern	int ip6_fw_enable;
 
 #endif /* KERNEL_PRIVATE */
 
+#endif /* __APPLE_API_OBSOLETE */
 #endif /* _IP6_FW_H */
diff --git a/bsd/netinet6/ip6_id.c b/bsd/netinet6/ip6_id.c
new file mode 100644
index 000000000..26fffd286
--- /dev/null
+++ b/bsd/netinet6/ip6_id.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*-
+ * Copyright (C) 2003 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$KAME: ip6_id.c,v 1.13 2003/09/16 09:11:19 itojun Exp $
+ */
+
+/*-
+ * Copyright 1998 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Theo de Raadt <deraadt@openbsd.org> came up with the idea of using
+ * such a mathematical system to generate more random (yet non-repeating)
+ * ids to solve the resolver/named problem.  But Niels designed the
+ * actual system based on the constraints.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $
+ */
+
+#include <sys/cdefs.h>
+
+/*
+ * seed = random (bits - 1) bit
+ * n = prime, g0 = generator to n,
+ * j = random so that gcd(j,n-1) == 1
+ * g = g0^j mod n will be a generator again.
+ *
+ * X[0] = random seed.
+ * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator
+ * with a = 7^(even random) mod m,
+ *      b = random with gcd(b,m) == 1
+ *      m = constant and a maximal period of m-1.
+ *
+ * The transaction id is determined by:
+ * id[n] = seed xor (g^X[n] mod n)
+ *
+ * Effectivly the id is restricted to the lower (bits - 1) bits, thus
+ * yielding two different cycles by toggling the msb on and off.
+ * This avoids reuse issues caused by reseeding.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/random.h>
+#include <libkern/libkern.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+
+#ifndef INT32_MAX
+#define INT32_MAX	0x7fffffffU
+#endif
+
+struct randomtab {
+	const int	ru_bits; /* resulting bits */
+	const long	ru_out;	/* Time after wich will be reseeded */
+	const u_int32_t ru_max;	/* Uniq cycle, avoid blackjack prediction */
+	const u_int32_t ru_gen;	/* Starting generator */
+	const u_int32_t ru_n;	/* ru_n: prime, ru_n - 1: product of pfacts[] */
+	const u_int32_t ru_agen; /* determine ru_a as ru_agen^(2*rand) */
+	const u_int32_t ru_m;	/* ru_m = 2^x*3^y */
+	const u_int32_t pfacts[4];	/* factors of ru_n */
+
+	u_int32_t ru_counter;
+	u_int32_t ru_msb;
+
+	u_int32_t ru_x;
+	u_int32_t ru_seed, ru_seed2;
+	u_int32_t ru_a, ru_b;
+	u_int32_t ru_g;
+	long ru_reseed;
+};
+
+static struct randomtab randomtab_32 = {
+	32,			/* resulting bits */
+	180,			/* Time after wich will be reseeded */
+	1000000000,		/* Uniq cycle, avoid blackjack prediction */
+	2,			/* Starting generator */
+	2147483629,		/* RU_N-1 = 2^2*3^2*59652323 */
+	7,			/* determine ru_a as RU_AGEN^(2*rand) */
+	1836660096,		/* RU_M = 2^7*3^15 - don't change */
+	{ 2, 3, 59652323, 0 },	/* factors of ru_n */
+	0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static struct randomtab randomtab_20 = {
+	20,			/* resulting bits */
+	180,			/* Time after wich will be reseeded */
+	200000,			/* Uniq cycle, avoid blackjack prediction */
+	2,			/* Starting generator */
+	524269,			/* RU_N-1 = 2^2*3^2*14563 */
+	7,			/* determine ru_a as RU_AGEN^(2*rand) */
+	279936,			/* RU_M = 2^7*3^7 - don't change */
+	{ 2, 3, 14563, 0 },	/* factors of ru_n */
+	0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static u_int32_t pmod(u_int32_t, u_int32_t, u_int32_t);
+static void initid(struct randomtab *);
+static u_int32_t randomid(struct randomtab *);
+
+/*
+ * Do a fast modular exponation, returned value will be in the range
+ * of 0 - (mod-1)
+ */
+static u_int32_t
+pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod)
+{
+	u_int64_t s, t, u;
+
+	s = 1;
+	t = gen;
+	u = expo;
+
+	while (u) {
+		if (u & 1)
+			s = (s * t) % mod;
+		u >>= 1;
+		t = (t * t) % mod;
+	}
+	return (s);
+}
+
+/*
+ * Initalizes the seed and chooses a suitable generator. Also toggles
+ * the msb flag. The msb flag is used to generate two distinct
+ * cycles of random numbers and thus avoiding reuse of ids.
+ *
+ * This function is called from id_randomid() when needed, an
+ * application does not have to worry about it.
+ */
+static void
+initid(struct randomtab *p)
+{
+	u_int32_t j, i;
+	int noprime = 1;
+	struct timeval timenow;
+	
+	getmicrotime(&timenow);
+
+	p->ru_x = random() % p->ru_m;
+
+	/* (bits - 1) bits of random seed */
+	p->ru_seed = random() & (~0U >> (32 - p->ru_bits + 1));
+	p->ru_seed2 = random() & (~0U >> (32 - p->ru_bits + 1));
+
+	/* Determine the LCG we use */
+	p->ru_b = (random() & (~0U >> (32 - p->ru_bits))) | 1;
+	p->ru_a = pmod(p->ru_agen,
+	    (random() & (~0U >> (32 - p->ru_bits))) & (~1U), p->ru_m);
+	while (p->ru_b % 3 == 0)
+		p->ru_b += 2;
+
+	j = random() % p->ru_n;
+
+	/*
+	 * Do a fast gcd(j, RU_N - 1), so we can find a j with
+	 * gcd(j, RU_N - 1) == 1, giving a new generator for
+	 * RU_GEN^j mod RU_N
+	 */
+	while (noprime) {
+		for (i = 0; p->pfacts[i] > 0; i++)
+			if (j % p->pfacts[i] == 0)
+				break;
+
+		if (p->pfacts[i] == 0)
+			noprime = 0;
+		else
+			j = (j + 1) % p->ru_n;
+	}
+
+	p->ru_g = pmod(p->ru_gen, j, p->ru_n);
+	p->ru_counter = 0;
+
+	p->ru_reseed = timenow.tv_sec + p->ru_out;
+	p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1));
+}
+
+static u_int32_t
+randomid(struct randomtab *p)
+{
+	int i, n;
+	u_int32_t tmp;
+	struct timeval timenow;
+
+	getmicrotime(&timenow);
+
+	if (p->ru_counter >= p->ru_max || timenow.tv_sec > p->ru_reseed)
+		initid(p);
+
+	tmp = random();
+
+	/* Skip a random number of ids */
+	n = tmp & 0x3; tmp = tmp >> 2;
+	if (p->ru_counter + n >= p->ru_max)
+		initid(p);
+
+	for (i = 0; i <= n; i++) {
+		/* Linear Congruential Generator */
+		p->ru_x = (u_int32_t)((u_int64_t)p->ru_a * p->ru_x + p->ru_b) % p->ru_m;
+	}
+
+	p->ru_counter += i;
+
+	return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) |
+	    p->ru_msb;
+}
+
+u_int32_t
+ip6_randomid(void)
+{
+
+	return randomid(&randomtab_32);
+}
+
+u_int32_t
+ip6_randomflowlabel(void)
+{
+
+	return randomid(&randomtab_20) & 0xfffff;
+}
diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c
index 0a8320298..ae8fecd68 100644
--- a/bsd/netinet6/ip6_input.c
+++ b/bsd/netinet6/ip6_input.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,8 +105,13 @@
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
+#include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/kauth.h>
+#include <sys/mcache.h>
+#include <mach/mach_time.h>
+
+#include <pexpert/pexpert.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -114,6 +119,7 @@
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/kpi_protocol.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -128,7 +134,8 @@
 #include <netinet/icmp6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/nd6.h>
-#include <netinet6/in6_prefix.h>
+#include <netinet6/scope6_var.h>
+#include <mach/sdt.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
@@ -159,7 +166,14 @@ extern struct ip6protosw inet6sw[];
 
 struct ip6protosw *  ip6_protox[IPPROTO_MAX];
 static int ip6qmaxlen = IFQ_MAXLEN;
-struct in6_ifaddr *in6_ifaddrs;
+
+static lck_grp_attr_t	*in6_ifaddr_rwlock_grp_attr;
+static lck_grp_t	*in6_ifaddr_rwlock_grp;
+static lck_attr_t	*in6_ifaddr_rwlock_attr;
+decl_lck_rw_data(, in6_ifaddr_rwlock);
+
+/* Protected by in6_ifaddr_rwlock */
+struct in6_ifaddr *in6_ifaddrs = NULL;
 
 int ip6_forward_srcrt;			/* XXX */
 int ip6_sourcecheck;			/* XXX */
@@ -168,7 +182,14 @@ const int int6intrq_present = 1;
 
 int ip6_ours_check_algorithm;
 int in6_init2done = 0;
+int in6_init_done = 0;
 
+#define _CASSERT(x)	\
+	switch (0) { case 0: case (x): ; }
+#define IN6_IFSTAT_REQUIRE_ALIGNED_64(f)	\
+	_CASSERT(!(offsetof(struct in6_ifstat, f) % sizeof (uint64_t)))
+#define ICMP6_IFSTAT_REQUIRE_ALIGNED_64(f)	\
+	_CASSERT(!(offsetof(struct icmp6_ifstat, f) % sizeof (uint64_t)))
 
 #if IPFW2
 /* firewall hooks */
@@ -181,17 +202,23 @@ struct ip6stat ip6stat;
 
 #ifdef __APPLE__
 struct ifqueue ip6intrq;
-lck_mtx_t 		*ip6_mutex;
+decl_lck_mtx_data(, ip6_init_mutex);
 lck_mtx_t 		*dad6_mutex;
 lck_mtx_t 		*nd6_mutex;
 lck_mtx_t		*prefix6_mutex;
 lck_mtx_t		*scope6_mutex;
+#ifdef ENABLE_ADDRSEL
+lck_mtx_t		*addrsel_mutex;
+#endif
+decl_lck_rw_data(, in6_ifs_rwlock);
+decl_lck_rw_data(, icmp6_ifs_rwlock);
 lck_attr_t		*ip6_mutex_attr;
 lck_grp_t		*ip6_mutex_grp;
 lck_grp_attr_t		*ip6_mutex_grp_attr;
 extern lck_mtx_t	*inet6_domain_mutex;
 #endif
 extern int loopattach_done;
+extern void addrsel_policy_init(void);
 
 static void ip6_init2(void *);
 static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
@@ -209,6 +236,11 @@ void stfattach(void);
 
 extern lck_mtx_t *domain_proto_mtx;
 
+SYSCTL_DECL(_net_inet6_ip6);
+
+int	ip6_doscopedroute = 1;
+SYSCTL_INT(_net_inet6_ip6, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED,
+     &ip6_doscopedroute, 0, "Enable IPv6 scoped routing");
 
 static void
 ip6_proto_input(
@@ -229,6 +261,9 @@ ip6_init()
 	int i;
 	struct timeval tv;
 
+	PE_parse_boot_argn("net.inet6.ip6.scopedroute", &ip6_doscopedroute,
+	    sizeof (ip6_doscopedroute));
+
 #if DIAGNOSTIC
 	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
 		panic("sizeof(protosw) != sizeof(ip6protosw)");
@@ -251,9 +286,6 @@ ip6_init()
 	ip6_mutex_grp = lck_grp_alloc_init("ip6", ip6_mutex_grp_attr);
 	ip6_mutex_attr = lck_attr_alloc_init();
 
-	if ((ip6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
-		panic("ip6_init: can't alloc ip6_mutex\n");
-	}
 	if ((dad6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
 		panic("ip6_init: can't alloc dad6_mutex\n");
 	}
@@ -269,14 +301,90 @@ ip6_init()
 		panic("ip6_init: can't alloc scope6_mutex\n");
 	}
 
+#ifdef ENABLE_ADDRSEL
+	if ((addrsel_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) {
+		panic("ip6_init: can't alloc addrsel_mutex\n");
+	}
+#endif
+
+	lck_rw_init(&in6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr);
+	lck_rw_init(&icmp6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr);
+	lck_mtx_init(&ip6_init_mutex, ip6_mutex_grp, ip6_mutex_attr);
 
 	inet6domain.dom_flags = DOM_REENTRANT;	
 
 	ip6intrq.ifq_maxlen = ip6qmaxlen;
+
+	in6_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init();
+	in6_ifaddr_rwlock_grp = lck_grp_alloc_init("in6_ifaddr_rwlock",
+	    in6_ifaddr_rwlock_grp_attr);
+	in6_ifaddr_rwlock_attr = lck_attr_alloc_init();
+	lck_rw_init(&in6_ifaddr_rwlock, in6_ifaddr_rwlock_grp,
+	    in6_ifaddr_rwlock_attr);
+
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_receive);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_hdrerr);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_toobig);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_noroute);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_addrerr);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_protounknown);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_truncated);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_discard);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_deliver);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_forward);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_request);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_discard);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_fragok);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_fragfail);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_fragcreat);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_reass_reqd);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_reass_ok);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_reass_fail);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_mcast);
+	IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mcast); 
+
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_msg);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_error);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_dstunreach);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_adminprohib);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_timeexceed);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_paramprob);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_pkttoobig);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_echo);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_echoreply);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_routersolicit);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_routeradvert);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_neighborsolicit);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_neighboradvert);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_redirect);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_mldquery);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_mldreport);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_mlddone);
+
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_msg);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_error);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_dstunreach);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_adminprohib);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_timeexceed);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_paramprob);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_pkttoobig);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_echo);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_echoreply);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_routersolicit);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_routeradvert);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_neighborsolicit);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_neighboradvert);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_redirect);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mldquery);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mldreport);
+	ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mlddone);
+
 	in6_ifaddr_init();
+	ip6_moptions_init();
 	nd6_init();
 	frag6_init();
 	icmp6_init();
+	addrsel_policy_init();
 	/*
 	 * in many cases, random() here does NOT return random number
 	 * as initialization during bootstrap time occur in fixed order.
@@ -310,9 +418,6 @@ ip6_init2(
 	/* nd6_timer_init */
 	timeout(nd6_timer, (caddr_t)0, hz);
 
-	/* router renumbering prefix list maintenance */
-	timeout(in6_rr_timer, (caddr_t)0, hz);
-
 	/* timer for regeneranation of temporary addresses randomize ID */
 	timeout(in6_tmpaddrtimer, (caddr_t)0,
 		(ip6_temp_preferred_lifetime - ip6_desync_factor -
@@ -327,42 +432,27 @@ ip6_init2(
 #if NSTF
 	stfattach();
 #endif
-#else
-	/* nd6_timer_init */
-
-	callout_init(&nd6_timer_ch);
-	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
-
-	/* router renumbering prefix list maintenance */
-	callout_init(&in6_rr_timer_ch);
-	callout_reset(&in6_rr_timer_ch, hz, in6_rr_timer, NULL);
-
-	/* timer for regeneranation of temporary addresses randomize ID */
-	callout_reset(&in6_tmpaddrtimer_ch,
-		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
-		       ip6_temp_regen_advance) * hz,
-		      in6_tmpaddrtimer, NULL);
 #endif
-
 	in6_init2done = 1;
-}
 
-#if __FreeBSD__
-/* cheat */
-/* This must be after route_init(), which is now SI_ORDER_THIRD */
-SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
-#endif
+	lck_mtx_lock(&ip6_init_mutex);
+	in6_init_done = 1;
+	wakeup(&in6_init_done);
+	lck_mtx_unlock(&ip6_init_mutex);
+}
 
-/*
- * ip6_forward_rt contains the route entry that was recently used during
- * the forwarding of an IPv6 packet and thus acts as a route cache.  Access
- * to this variable is protected by the global lock ip6_mutex.
- */
-static struct route_in6 ip6_forward_rt;
+void
+ip6_fin()
+{
+	lck_mtx_lock(&ip6_init_mutex);
+	while (in6_init_done == 0) {
+		(void) msleep(&in6_init_done, &ip6_init_mutex, 0, "ip6_fin()", NULL);
+	}
+	lck_mtx_unlock(&ip6_init_mutex);
+}
 
 void
-ip6_input(m)
-	struct mbuf *m;
+ip6_input(struct mbuf *m)
 {
 	struct ip6_hdr *ip6;
 	int off = sizeof(struct ip6_hdr), nest;
@@ -372,6 +462,16 @@ ip6_input(m)
 	struct ifnet *deliverifp = NULL;
 	ipfilter_t inject_ipfref = 0;
 	int seen;
+	struct in6_ifaddr *ia6 = NULL;
+	struct route_in6 ip6_forward_rt;
+	struct sockaddr_in6 *dst6;
+
+	bzero(&ip6_forward_rt, sizeof(ip6_forward_rt));
+
+	/* Check if the packet we received is valid after interface filter
+	 * processing
+	 */
+	MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif);
 
 	/*
 	 * No need to proccess packet twice if we've 
@@ -402,7 +502,6 @@ ip6_input(m)
 	 */
 	ip6_delaux(m);
 
-	lck_mtx_lock(ip6_mutex);
 	/*
 	 * mbuf statistics
 	 */
@@ -425,6 +524,15 @@ ip6_input(m)
 #undef M2MMAX
 	}
 
+	/* drop the packet if IPv6 operation is disabled on the IF */
+	lck_rw_lock_shared(nd_if_rwlock);
+	if (m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim &&
+	    (nd_ifinfo[m->m_pkthdr.rcvif->if_index].flags & ND6_IFF_IFDISABLED)) {
+		lck_rw_done(nd_if_rwlock);
+		goto bad;
+	}
+	lck_rw_done(nd_if_rwlock);
+
 	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
 	ip6stat.ip6s_total++;
 
@@ -447,11 +555,8 @@ ip6_input(m)
 				n = NULL;
 			}
 		}
-		if (n == NULL) {
-			m_freem(m);
-			lck_mtx_unlock(ip6_mutex);
-			return;	/*ENOBUFS*/
-		}
+		if (n == NULL)
+			goto bad;
 
 		m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = m->m_pkthdr.len;
@@ -459,7 +564,7 @@ ip6_input(m)
 		m = n;
 	}
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr),
-		{lck_mtx_unlock(ip6_mutex); return;}); 
+		{goto done;}); 
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
@@ -468,8 +573,7 @@ ip6_input(m)
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) {
 			ip6stat.ip6s_toosmall++;
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
-			lck_mtx_unlock(ip6_mutex);
-			return;
+			goto done;
 		}
 	}
 
@@ -495,10 +599,8 @@ ip6_input(m)
 			m_freem(m);
 			m = NULL;
 		}
-		if (!m) {
-			lck_mtx_unlock(ip6_mutex);
-			return;
-		}
+		if (!m)
+			goto done;
 	}
 #endif
 
@@ -514,9 +616,14 @@ ip6_input(m)
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
-	if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
-	     IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) &&
-	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
+	if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
+	    !(m->m_flags & M_LOOP)) {
+		/*
+		 * In this case, the packet should come from the loopback
+		 * interface.  However, we cannot just check the if_flags,
+		 * because ip6_mloopback() passes the "actual" interface
+		 * as the outgoing/incoming interface.
+		 */
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
@@ -526,13 +633,13 @@ ip6_input(m)
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
-	 * IPv6 src ::ffff:127.0.0.1).	Be cautious.
+	 * IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
 	 * support IPv4-less kernel compilation, we cannot support SIIT
 	 * environment at all.  So, it makes more sense for us to reject any
 	 * malicious packets for non-SIIT environment, than try to do a
-	 * partical support for SIIT environment.
+	 * partial support for SIIT environment.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
@@ -556,19 +663,42 @@ ip6_input(m)
 	}
 #endif
 
+	/*
+	 * Naively assume we can attribute inbound data to the route we would
+	 * use to send to this destination. Asymetric routing breaks this
+	 * assumption, but it still allows us to account for traffic from
+	 * a remote node in the routing table.
+	 * this has a very significant performance impact so we bypass
+	 * if nstat_collect is disabled. We may also bypass if the
+	 * protocol is tcp in the future because tcp will have a route that
+	 * we can use to attribute the data to. That does mean we would not
+	 * account for forwarded tcp traffic.
+	 */
+	if (nstat_collect) {
+		struct rtentry *rte =
+		    ifnet_cached_rtlookup_inet6(m->m_pkthdr.rcvif,
+		    &ip6->ip6_src);
+		if (rte != NULL) {
+			nstat_route_rx(rte, 1, m->m_pkthdr.len, 0);
+			rtfree(rte);
+		}
+	}
+
 #if PF
 	/* Invoke inbound packet filter */
-	lck_mtx_unlock(ip6_mutex);
-	if (pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE) != 0) {
-		if (m != NULL) {
-			panic("%s: unexpected packet %p\n", __func__, m);
-			/* NOTREACHED */
+	if (PF_IS_ENABLED) {
+		int error;
+		error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE);
+		if (error != 0) {
+			if (m != NULL) {
+				panic("%s: unexpected packet %p\n", __func__, m);
+				/* NOTREACHED */
+			}
+			/* Already freed by callee */
+			goto done;
 		}
-		/* Already freed by callee */
-		return;
+		ip6 = mtod(m, struct ip6_hdr *);
 	}
-	ip6 = mtod(m, struct ip6_hdr *);
-	lck_mtx_lock(ip6_mutex);
 #endif /* PF */
 
 	/* drop packets if interface ID portion is already filled */
@@ -592,39 +722,11 @@ ip6_input(m)
 		ip6->ip6_dst.s6_addr16[1]
 			= htons(m->m_pkthdr.rcvif->if_index);
 
-#if 0 /* this case seems to be unnecessary. (jinmei, 20010401) */
-	/*
-	 * We use rt->rt_ifp to determine if the address is ours or not.
-	 * If rt_ifp is lo0, the address is ours.
-	 * The problem here is, rt->rt_ifp for fe80::%lo0/64 is set to lo0,
-	 * so any address under fe80::%lo0/64 will be mistakenly considered
-	 * local.  The special case is supplied to handle the case properly
-	 * by actually looking at interface addresses
-	 * (using in6ifa_ifpwithaddr).
-	 */
-	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) != 0 &&
-	    IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
-	    struct in6_ifaddr *ia6;
-		if (!(ia6 = in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst))) {
-			lck_mtx_unlock(ip6_mutex);
-			icmp6_error(m, ICMP6_DST_UNREACH,
-			    ICMP6_DST_UNREACH_ADDR, 0);
-			/* m is already freed */
-			return;
-		}
-		ifafree(&ia6->ia_ifa);
-
-		ours = 1;
-		deliverifp = m->m_pkthdr.rcvif;
-		goto hbhcheck;
-	}
-#endif
-
 	/*
 	 * Multicast check
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-		struct	in6_multi *in6m = 0;
+		struct	in6_multi *in6m = NULL;
 		struct ifnet *ifp = m->m_pkthdr.rcvif;
 
 		in6_ifstat_inc(ifp, ifs6_in_mcast);
@@ -632,16 +734,18 @@ ip6_input(m)
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
-		ifnet_lock_done(ifp);
-		if (in6m)
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
+		in6_multihead_lock_done();
+		if (in6m != NULL) {
+			IN6M_REMREF(in6m);
 			ours = 1;
+		}
+		else 
 #if MROUTING
-		else if (!ip6_mrouter) {
-#else
-		else {
+		if (!ip6_mrouter)
 #endif
+		{
 			ip6stat.ip6s_notmember++;
 			ip6stat.ip6s_cantforward++;
 			in6_ifstat_inc(ifp, ifs6_in_discard);
@@ -651,42 +755,18 @@ ip6_input(m)
 		goto hbhcheck;
 	}
 
-	if (ip6_forward_rt.ro_rt != NULL)
-		RT_LOCK(ip6_forward_rt.ro_rt);
 	/*
 	 *  Unicast check
 	 */
-	if (ip6_forward_rt.ro_rt != NULL &&
-	    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) &&
-	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
-	    &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr) &&
-	    ip6_forward_rt.ro_rt->generation_id == route_generation) {
-		ip6stat.ip6s_forward_cachehit++;
-	} else {
-		struct sockaddr_in6 *dst6;
-
-		if (ip6_forward_rt.ro_rt != NULL) {
-			/* route is down/stale or destination is different */
-			ip6stat.ip6s_forward_cachemiss++;
-			RT_UNLOCK(ip6_forward_rt.ro_rt);
-			rtfree(ip6_forward_rt.ro_rt);
-			ip6_forward_rt.ro_rt = NULL;
-		}
-
-		bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
-		dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
-		dst6->sin6_len = sizeof(struct sockaddr_in6);
-		dst6->sin6_family = AF_INET6;
-		dst6->sin6_addr = ip6->ip6_dst;
-#if SCOPEDROUTING
-		ip6_forward_rt.ro_dst.sin6_scope_id =
-			in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_dst);
-#endif
+	dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
+	dst6->sin6_len = sizeof(struct sockaddr_in6);
+	dst6->sin6_family = AF_INET6;
+	dst6->sin6_addr = ip6->ip6_dst;
 
-		rtalloc_ign((struct route *)&ip6_forward_rt, RTF_PRCLONING);
-		if (ip6_forward_rt.ro_rt != NULL)
-			RT_LOCK(ip6_forward_rt.ro_rt);
-	}
+	rtalloc_scoped_ign((struct route *)&ip6_forward_rt,
+	    RTF_PRCLONING, IFSCOPE_NONE);
+	if (ip6_forward_rt.ro_rt != NULL)
+		RT_LOCK(ip6_forward_rt.ro_rt);
 
 #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
 
@@ -726,8 +806,7 @@ ip6_input(m)
 				&rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
 #endif
 	    ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
-		struct in6_ifaddr *ia6 =
-			(struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
+		ia6 = (struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
 
 		/*
 		 * record address information into m_aux.
@@ -738,31 +817,32 @@ ip6_input(m)
 		 * packets to a tentative, duplicated, or somehow invalid
 		 * address must not be accepted.
 		 */
+		RT_CONVERT_LOCK(ip6_forward_rt.ro_rt);	/* just in case */
+		IFA_LOCK_SPIN(&ia6->ia_ifa);
 		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
+			IFA_UNLOCK(&ia6->ia_ifa);
 			/* this address is ready */
 			ours = 1;
 			deliverifp = ia6->ia_ifp;	/* correct? */
 			/* Count the packet in the ip address stats */
-#ifndef __APPLE__
 
-			ia6->ia_ifa.if_ipackets++;
-			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
-#endif
 			RT_UNLOCK(ip6_forward_rt.ro_rt);
+			ia6 = NULL;
 			goto hbhcheck;
-		} else {
-			RT_UNLOCK(ip6_forward_rt.ro_rt);
-			/* address is not ready, so discard the packet. */
-			nd6log((LOG_INFO,
-			    "ip6_input: packet to an unready address %s->%s\n",
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst)));
-			goto bad;
 		}
+		IFA_UNLOCK(&ia6->ia_ifa);
+		RT_UNLOCK(ip6_forward_rt.ro_rt);
+		/* address is not ready, so discard the packet. */
+		nd6log((LOG_INFO,
+		    "ip6_input: packet to an unready address %s->%s\n",
+		    ip6_sprintf(&ip6->ip6_src),
+		    ip6_sprintf(&ip6->ip6_dst)));
+		ia6 = NULL;
+		goto bad;
 	}
 
 	/*
-	 * FAITH(Firewall Aided Internet Translator)
+	 * FAITH (Firewall Aided Internet Translator)
 	 */
 #if defined(NFAITH) && 0 < NFAITH
 	if (ip6_keepfaith) {
@@ -796,9 +876,7 @@ ip6_input(m)
 	 * as our interface address (e.g. multicast addresses, addresses
 	 * within FAITH prefixes and such).
 	 */
-	if (deliverifp && !ip6_getdstifaddr(m)) {
-		struct in6_ifaddr *ia6;
-
+	if (deliverifp && (ia6 = ip6_getdstifaddr(m)) == NULL) {
 		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
 		if (ia6) {
 			if (!ip6_setdstifaddr(m, ia6)) {
@@ -808,10 +886,16 @@ ip6_input(m)
 				 * to the upper layers.
 				 */
 			}
-			ifafree(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
+			ia6 = NULL;
 		}
 	}
 
+	if (ia6 != NULL) {
+		IFA_REMREF(&ia6->ia_ifa);
+		ia6 = NULL;
+	}
+
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
@@ -825,8 +909,7 @@ ip6_input(m)
 #if 0	/*touches NULL pointer*/
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
-			lck_mtx_unlock(ip6_mutex);
-			return;	/* m have already been freed */
+			goto done;	/* m have already been freed */
 		}
 
 		/* adjust pointer */
@@ -840,17 +923,16 @@ ip6_input(m)
 		if (ip6->ip6_plen == 0 && plen == 0) {
 			/*
 			 * Note that if a valid jumbo payload option is
-			 * contained, ip6_hoptops_input() must set a valid
-			 * (non-zero) payload length to the variable plen. 
+			 * contained, ip6_hopopts_input() must set a valid
+			 * (non-zero) payload length to the variable plen.
 			 */
 			ip6stat.ip6s_badoptions++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
-			lck_mtx_unlock(ip6_mutex);
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
-			return;
+			goto done;
 		}
 #ifndef PULLDOWN_TEST
 		/* ip6_hopopts_input() ensures that mbuf is contiguous */
@@ -860,18 +942,31 @@ ip6_input(m)
 			sizeof(struct ip6_hbh));
 		if (hbh == NULL) {
 			ip6stat.ip6s_tooshort++;
-			lck_mtx_unlock(ip6_mutex);
-			return;
+			goto done;
 		}
 #endif
 		nxt = hbh->ip6h_nxt;
 
 		/*
-		 * accept the packet if a router alert option is included
-		 * and we act as an IPv6 router.
+		 * If we are acting as a router and the packet contains a
+		 * router alert option, see if we know the option value.
+		 * Currently, we only support the option value for MLD, in which
+		 * case we should pass the packet to the multicast routing
+		 * daemon.
 		 */
-		if (rtalert != ~0 && ip6_forwarding)
-			ours = 1;
+		if (rtalert != ~0 && ip6_forwarding) {
+			switch (rtalert) {
+			case IP6OPT_RTALERT_MLD:
+				ours = 1;
+				break;
+			default:
+				/*
+				 * RFC2711 requires unrecognized values must be
+				 * silently ignored.
+				 */
+				break;
+			}
+		}
 	} else
 		nxt = ip6->ip6_nxt;
 
@@ -909,20 +1004,14 @@ ip6_input(m)
 #if MROUTING
 		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
 			ip6stat.ip6s_cantforward++;
-			m_freem(m);
-			lck_mtx_unlock(ip6_mutex);
-			return;
+			goto bad;
 		}
 #endif
-		if (!ours) {
-			m_freem(m);
-			lck_mtx_unlock(ip6_mutex);
-			return;
-		}
+		if (!ours)
+			goto bad;
 	} else if (!ours) {
-		ip6_forward(m, &ip6_forward_rt, 0, 1);
-		lck_mtx_unlock(ip6_mutex);
-		return;
+		ip6_forward(m, &ip6_forward_rt, 0);
+		goto done;
 	}	
 
 	ip6 = mtod(m, struct ip6_hdr *);
@@ -949,17 +1038,16 @@ ip6_input(m)
 	ip6stat.ip6s_delivered++;
 	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
 
-	lck_mtx_unlock(ip6_mutex);
 injectit:
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		struct ipfilter *filter;
-		int (*pr_input)(struct mbuf **, int *);
+		int (*pr_input)(struct mbuf **, int *, int);
 
 		if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
 			ip6stat.ip6s_toomanyhdr++;
-			goto badunlocked;
+			goto bad;
 		}
 
 		/*
@@ -969,24 +1057,9 @@ injectit:
 		if (m->m_pkthdr.len < off) {
 			ip6stat.ip6s_tooshort++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
-			goto badunlocked;
-		}
-
-#if 0
-		/*
-		 * do we need to do it for every header?  yeah, other
-		 * functions can play with it (like re-allocate and copy).
-		 */
-		mhist = ip6_addaux(m);
-		if (mhist && M_TRAILINGSPACE(mhist) >= sizeof(nxt)) {
-			hist = mtod(mhist, caddr_t) + mhist->m_len;
-			bcopy(&nxt, hist, sizeof(nxt));
-			mhist->m_len += sizeof(nxt);
-		} else {
-			ip6stat.ip6s_toomanyhdr++;
 			goto bad;
 		}
-#endif
+
 
 #if IPSEC
 		/*
@@ -997,7 +1070,7 @@ injectit:
 		if ((ipsec_bypass == 0) && (ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0) {
 			if (ipsec6_in_reject(m, NULL)) {
 				IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio);
-				goto badunlocked;
+				goto bad;
 		    }
 		}
 #endif
@@ -1018,36 +1091,40 @@ injectit:
 						filter->ipf_filter.cookie, (mbuf_t*)&m, off, nxt);
 					if (result == EJUSTRETURN) {
 						ipf_unref();
-						return;
+						goto done;
 					}
 					if (result != 0) {
 						ipf_unref();
-						m_freem(m);
-						return;
+						goto bad;
 					}
 				}
 			}
 			ipf_unref();
 		}
 
+		DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+			struct ip6_hdr *, ip6, struct ifnet *, m->m_pkthdr.rcvif,
+			struct ip *, NULL, struct ip6_hdr *, ip6);
+
 		if ((pr_input = ip6_protox[nxt]->pr_input) == NULL) {
 			m_freem(m);
 			m = NULL;
 			nxt = IPPROTO_DONE;
 		} else if (!(ip6_protox[nxt]->pr_flags & PR_PROTOLOCK)) {
 			lck_mtx_lock(inet6_domain_mutex);
-			nxt = pr_input(&m, &off);
+			nxt = pr_input(&m, &off, nxt);
 			lck_mtx_unlock(inet6_domain_mutex);
 		} else {
-			nxt = pr_input(&m, &off);
+			nxt = pr_input(&m, &off, nxt);
 		}
 	}
+done:
+	if (ip6_forward_rt.ro_rt != NULL)
+		rtfree(ip6_forward_rt.ro_rt);
 	return;
  bad:
-	lck_mtx_unlock(ip6_mutex);
- badunlocked:
 	m_freem(m);
-	return;
+	goto done;
 }
 
 /*
@@ -1060,8 +1137,13 @@ ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
 	struct ip6aux *n;
 
 	n = ip6_addaux(m);
-	if (n)
+	if (n != NULL) {
+		if (ia6 != NULL)
+			IFA_ADDREF(&ia6->ia_ifa);
+		if (n->ip6a_dstia6 != NULL)
+			IFA_REMREF(&n->ip6a_dstia6->ia_ifa);
 		n->ip6a_dstia6 = ia6;
+	}
 	return (struct ip6aux *)n;	/* NULL if failed to set */
 }
 
@@ -1072,10 +1154,12 @@ ip6_getdstifaddr(m)
 	struct ip6aux *n;
 
 	n = ip6_findaux(m);
-	if (n)
-		return n->ip6a_dstia6;
-	else
-		return NULL;
+	if (n != NULL) {
+		if (n->ip6a_dstia6 != NULL)
+			IFA_ADDREF(&n->ip6a_dstia6->ia_ifa);
+		return (n->ip6a_dstia6);
+	}
+	return (NULL);
 }
 
 /*
@@ -1083,11 +1167,8 @@ ip6_getdstifaddr(m)
  * included, the real payload length will be stored in plenp.
  */
 static int
-ip6_hopopts_input(plenp, rtalertp, mp, offp)
-	u_int32_t *plenp;
-	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
-	struct mbuf **mp;
-	int *offp;
+ip6_hopopts_input(uint32_t *plenp, uint32_t *rtalertp, struct mbuf **mp,
+    int *offp)
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
@@ -1123,11 +1204,11 @@ ip6_hopopts_input(plenp, rtalertp, mp, offp)
 
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
-		return(-1);
+		return (-1);
 
 	*offp = off;
 	*mp = m;
-	return(0);
+	return (0);
 }
 
 /*
@@ -1167,7 +1248,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			}
 			optlen = *(opt + 1) + 2;
 			break;
-		case IP6OPT_RTALERT:
+		case IP6OPT_ROUTER_ALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				ip6stat.ip6s_toosmall++;
@@ -1175,11 +1256,9 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
-				lck_mtx_unlock(ip6_mutex);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 1 - opthead);
-				lck_mtx_lock(ip6_mutex);
 				return(-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
@@ -1194,11 +1273,9 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
-				lck_mtx_unlock(ip6_mutex);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 1 - opthead);
-				lck_mtx_lock(ip6_mutex);
 				return(-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
@@ -1210,11 +1287,9 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				ip6stat.ip6s_badoptions++;
-				lck_mtx_unlock(ip6_mutex);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt - opthead);
-				lck_mtx_lock(ip6_mutex);
 				return(-1);
 			}
 
@@ -1236,11 +1311,9 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			 */
 			if (*plenp != 0) {
 				ip6stat.ip6s_badoptions++;
-				lck_mtx_unlock(ip6_mutex);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 2 - opthead);
-				lck_mtx_lock(ip6_mutex);
 				return(-1);
 			}
 #endif
@@ -1250,11 +1323,9 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				ip6stat.ip6s_badoptions++;
-				lck_mtx_unlock(ip6_mutex);
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 2 - opthead);
-				lck_mtx_lock(ip6_mutex);
 				return(-1);
 			}
 			*plenp = jumboplen;
@@ -1266,9 +1337,8 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
-			    erroff + opt - opthead, 1);
+			    erroff + opt - opthead);
 			if (optlen == -1) {
-				/* ip6_unknown opt unlocked ip6_mutex */
 				return(-1);
 			}
 			optlen += 2;
@@ -1290,11 +1360,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
  * is not continuous in order to return an ICMPv6 error.
  */
 int
-ip6_unknown_opt(optp, m, off, locked)
-	u_int8_t *optp;
-	struct mbuf *m;
-	int off;
-	int locked;
+ip6_unknown_opt(uint8_t *optp, struct mbuf *m, int off)
 {
 	struct ip6_hdr *ip6;
 
@@ -1306,11 +1372,7 @@ ip6_unknown_opt(optp, m, off, locked)
 		return(-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		ip6stat.ip6s_badoptions++;
-		if (locked)
-			lck_mtx_unlock(ip6_mutex);
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
-		if (locked)
-			lck_mtx_lock(ip6_mutex);
 		return(-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		ip6stat.ip6s_badoptions++;
@@ -1318,109 +1380,147 @@ ip6_unknown_opt(optp, m, off, locked)
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
-		else {
-			if (locked)
-				lck_mtx_unlock(ip6_mutex);
+		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
-			if (locked)
-				lck_mtx_lock(ip6_mutex);
-		}
 		return(-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
-	return(-1);
+	return (-1);
 }
 
 /*
  * Create the "control" list for this pcb.
- * The function will not modify mbuf chain at all.
+ * These functions will not modify mbuf chain at all.
  *
- * with KAME mbuf chain restriction:
+ * With KAME mbuf chain restriction:
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
+ *
+ * ip6_savecontrol_v4 will handle those options that are possible to be
+ * set on a v4-mapped socket.
+ * ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
+ * options and handle the v6-only ones itself.
  */
-void
-ip6_savecontrol(in6p, mp, ip6, m)
-	struct inpcb *in6p;
-	struct mbuf **mp;
-	struct ip6_hdr *ip6;
-	struct mbuf *m;
+struct mbuf **
+ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
+    int *v4only)
 {
-	int rthdr_exist = 0;
+	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
-#if SO_TIMESTAMP
-	if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) {
+	if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
 		struct timeval tv;
 
 		microtime(&tv);
-		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
-				      SCM_TIMESTAMP, SOL_SOCKET);
-		if (*mp) {
-			mp = &(*mp)->m_next;
-		}
+		mp = sbcreatecontrol_mbuf((caddr_t) &tv, sizeof(tv),
+		    SCM_TIMESTAMP, SOL_SOCKET, mp);
+		if (*mp == NULL) 
+			return NULL;
 	}
-#endif
+        if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+                uint64_t time;
 
-	/* some OSes call this logic with IPv4 packet, for SO_TIMESTAMP */
-	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
-		return;
+                time = mach_absolute_time();
+                mp = sbcreatecontrol_mbuf((caddr_t) &time, sizeof(time),
+                        SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp);
 
+			if (*mp == NULL) 
+				return NULL;
+        }
+	if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) {
+		int tc = m->m_pkthdr.prio;
+		
+		mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc),
+			SO_TRAFFIC_CLASS, SOL_SOCKET, mp);
+		if (*mp == NULL) 
+			return NULL;
+	}
+
+	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
+		if (v4only != NULL)
+			*v4only = 1;
+		return (mp);
+	}
+
+#define IS2292(inp, x, y)	(((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
 	/* RFC 2292 sec. 5 */
-	if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
+	if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
+
 		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
-		if (IN6_IS_SCOPE_LINKLOCAL(&pi6.ipi6_addr))
-			pi6.ipi6_addr.s6_addr16[1] = 0;
-		pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif)
-					? m->m_pkthdr.rcvif->if_index
-					: 0;
-		*mp = sbcreatecontrol((caddr_t) &pi6,
-			sizeof(struct in6_pktinfo), IPV6_PKTINFO,
-			IPPROTO_IPV6);
-		if (*mp)
-			mp = &(*mp)->m_next;
+		in6_clearscope(&pi6.ipi6_addr);	/* XXX */
+		pi6.ipi6_ifindex =
+		    (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
+
+		mp = sbcreatecontrol_mbuf((caddr_t) &pi6,
+		    sizeof(struct in6_pktinfo),
+		    IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6, mp);
+		if (*mp == NULL) 
+			return NULL;
 	}
 
-	if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) {
+	if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim = ip6->ip6_hlim & 0xff;
-		*mp = sbcreatecontrol((caddr_t) &hlim,
-			sizeof(int), IPV6_HOPLIMIT, IPPROTO_IPV6);
-		if (*mp)
-			mp = &(*mp)->m_next;
+
+		mp = sbcreatecontrol_mbuf((caddr_t) &hlim, sizeof(int),
+		    IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
+		    IPPROTO_IPV6, mp);
+		if (*mp == NULL) 
+			return NULL;
 	}
 
-        if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
-                u_int32_t flowinfo;
-                int tclass;
+	if (v4only != NULL)
+		*v4only = 0;
+	return (mp);
+}
+
+int
+ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
+{
+	struct mbuf **np;
+	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+	int v4only = 0;
+
+	*mp = NULL;
+	np = ip6_savecontrol_v4(in6p, m, mp, &v4only);
+	if (np == NULL)
+		goto no_mbufs;
 
-                flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
-                flowinfo >>= 20;
+	mp = np;
+	if (v4only)
+		return(0);
 
-                tclass = flowinfo & 0xff;
-                *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
-                    IPV6_TCLASS, IPPROTO_IPV6);
-                if (*mp)
-                        mp = &(*mp)->m_next;
-        }
+	if ((in6p->inp_flags & IN6P_TCLASS) != 0) {
+		u_int32_t flowinfo;
+		int tclass;
+
+		flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
+		flowinfo >>= 20;
+
+		tclass = flowinfo & 0xff;
+		mp = sbcreatecontrol_mbuf((caddr_t) &tclass, sizeof(tclass),
+		    IPV6_TCLASS, IPPROTO_IPV6, mp);
+		if (*mp == NULL) 
+			goto no_mbufs;
+	}
 
 	/*
 	 * IPV6_HOPOPTS socket option.  Recall that we required super-user
 	 * privilege for the option (see ip6_ctloutput), but it might be too
 	 * strict, since there might be some hop-by-hop options which can be
 	 * returned to normal user.
-	 * See RFC 2292 section 6.
+	 * See also RFC 2292 section 6 (or RFC 3542 section 8).
 	 */
-	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
+	if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
-		 * just after the IPv6 header, which fact is assured through
-		 * the IPv6 input processing.
+		 * just after the IPv6 header, which is assured through the
+		 * IPv6 input processing.
 		 */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
@@ -1438,67 +1538,38 @@ ip6_savecontrol(in6p, mp, ip6, m)
 			    ip6->ip6_nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
-				return;
+				return(0);
 			}
 			hbh = mtod(ext, struct ip6_hbh *);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (hbhlen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
-				return;
+				return(0);
 			}
 #endif
 
 			/*
-			 * XXX: We copy whole the header even if a jumbo
-			 * payload option is included, which option is to
-			 * be removed before returning in the RFC 2292.
-			 * Note: this constraint is removed in 2292bis.
+			 * XXX: We copy the whole header even if a
+			 * jumbo payload option is included, the option which
+			 * is to be removed before returning according to
+			 * RFC2292.
+			 * Note: this constraint is removed in RFC3542
 			 */
-			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
-					      IPV6_HOPOPTS, IPPROTO_IPV6);
-			if (*mp)
-				mp = &(*mp)->m_next;
+			mp = sbcreatecontrol_mbuf((caddr_t)hbh, hbhlen,
+			    IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
+			    IPPROTO_IPV6, mp);
+
 #if PULLDOWN_TEST
 			m_freem(ext);
 #endif
-		}
-	}
-
-	/* IPV6_DSTOPTS and IPV6_RTHDR socket options */
-	if ((in6p->in6p_flags & (IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
-		int proto, off, nxt;
-
-		/*
-		 * go through the header chain to see if a routing header is
-		 * contained in the packet. We need this information to store
-		 * destination options headers (if any) properly.
-		 * XXX: performance issue. We should record this info when
-		 * processing extension headers in incoming routine.
-		 * (todo) use m_aux? 
-		 */
-		proto = IPPROTO_IPV6;
-		off = 0;
-		nxt = -1;
-		while (1) {
-			int newoff;
-
-			newoff = ip6_nexthdr(m, off, proto, &nxt);
-			if (newoff < 0)
-				break;
-			if (newoff < off) /* invalid, check for safety */
-				break;
-			if ((proto = nxt) == IPPROTO_ROUTING) {
-				rthdr_exist = 1;
-				break;
+			if (*mp == NULL) {
+				goto no_mbufs;
 			}
-			off = newoff;
 		}
 	}
 
-	if ((in6p->in6p_flags &
-	     (IN6P_RTHDR | IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
-		ip6 = mtod(m, struct ip6_hdr *);
+	if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
@@ -1543,7 +1614,7 @@ ip6_savecontrol(in6p, mp, ip6, m)
 			ext = ip6_pullexthdr(m, off, nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
-				return;
+				return(0);
 			}
 			ip6e = mtod(ext, struct ip6_ext *);
 			if (nxt == IPPROTO_AH)
@@ -1553,30 +1624,39 @@ ip6_savecontrol(in6p, mp, ip6, m)
 			if (elen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
-				return;
+				return(0);
 			}
 #endif
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
-				if ((in6p->in6p_flags & IN6P_DSTOPTS) == 0)
+				if (!(in6p->inp_flags & IN6P_DSTOPTS))
 					break;
 
-				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
-						      IPV6_DSTOPTS,
-						      IPPROTO_IPV6);
-				if (*mp)
-					mp = &(*mp)->m_next;
+				mp = sbcreatecontrol_mbuf((caddr_t)ip6e, elen,
+				    IS2292(in6p,
+					IPV6_2292DSTOPTS, IPV6_DSTOPTS),
+				    IPPROTO_IPV6, mp);
+					if (*mp == NULL) {
+#if PULLDOWN_TEST
+					m_freem(ext);
+#endif
+					goto no_mbufs;
+				}
 				break;
 			case IPPROTO_ROUTING:
-				if (!in6p->in6p_flags & IN6P_RTHDR)
+				if (!in6p->inp_flags & IN6P_RTHDR)
 					break;
 
-				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
-						      IPV6_RTHDR,
-						      IPPROTO_IPV6);
-				if (*mp)
-					mp = &(*mp)->m_next;
+				mp = sbcreatecontrol_mbuf((caddr_t)ip6e, elen,
+				    IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
+				    IPPROTO_IPV6, mp);
+				if (*mp == NULL) {
+#if PULLDOWN_TEST
+					m_freem(ext);
+#endif
+					goto no_mbufs;
+				}
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
@@ -1584,7 +1664,7 @@ ip6_savecontrol(in6p, mp, ip6, m)
 
 			default:
 				/*
-			 	 * other cases have been filtered in the above.
+				 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
@@ -1608,7 +1688,49 @@ ip6_savecontrol(in6p, mp, ip6, m)
 	  loopend:
 		;
 	}
+	return(0);
+no_mbufs:
+	ip6stat.ip6s_pktdropcntrl++;
+	/* XXX increment a stat to show the failure */
+	return(ENOBUFS);
+}
+#undef IS2292
+
+void
+ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
+{
+	struct socket *so;
+	struct mbuf *m_mtu;
+	struct ip6_mtuinfo mtuctl;
+
+	so =  in6p->inp_socket;
 
+	if (mtu == NULL)
+		return;
+
+#ifdef DIAGNOSTIC
+	if (so == NULL)		/* I believe this is impossible */
+		panic("ip6_notify_pmtu: socket is NULL");
+#endif
+
+	bzero(&mtuctl, sizeof(mtuctl));	/* zero-clear for safety */
+	mtuctl.ip6m_mtu = *mtu;
+	mtuctl.ip6m_addr = *dst;
+	if (sa6_recoverscope(&mtuctl.ip6m_addr))
+		return;
+
+	if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
+	    IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
+		return;
+
+	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu, NULL)
+	    == 0) {
+		m_freem(m_mtu);
+		/* XXX: should count statistics */
+	} else
+		sorwakeup(so);
+
+	return;
 }
 
 #if PULLDOWN_TEST
@@ -1837,8 +1959,8 @@ ip6_addaux(
 	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
 	if (tag == NULL) {
 		/* Allocate a tag */
-		tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6,
-		    sizeof (struct ip6aux), M_DONTWAIT);
+		tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6,
+		    sizeof (struct ip6aux), M_DONTWAIT, m);
 
 		/* Attach it to the mbuf */
 		if (tag) {
@@ -1855,7 +1977,7 @@ ip6_findaux(
 {
 	struct m_tag	*tag;
 	
-	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL);
+	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
 	
 	return tag ? (struct ip6aux*)(tag + 1) : NULL;
 }
@@ -1866,12 +1988,35 @@ ip6_delaux(
 {
 	struct m_tag	*tag;
 
-	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL);
+	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, NULL);
 	if (tag) {
 		m_tag_delete(m, tag);
 	}
 }
 
+/*
+ * Called by m_tag_free().
+ */
+void
+ip6_destroyaux(struct ip6aux *n)
+{
+	if (n->ip6a_dstia6 != NULL) {
+		IFA_REMREF(&n->ip6a_dstia6->ia_ifa);
+		n->ip6a_dstia6 = NULL;
+	}
+}
+
+/*
+ * Called by m_tag_copy()
+ */
+void
+ip6_copyaux(struct ip6aux *src, struct ip6aux *dst)
+{
+	bcopy(src, dst, sizeof (*dst));
+	if (dst->ip6a_dstia6 != NULL)
+		IFA_ADDREF(&dst->ip6a_dstia6->ia_ifa);
+}
+
 /*
  * System control for IP6
  */
diff --git a/bsd/netinet6/ip6_mroute.c b/bsd/netinet6/ip6_mroute.c
index da8c4fc96..39f146284 100644
--- a/bsd/netinet6/ip6_mroute.c
+++ b/bsd/netinet6/ip6_mroute.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -104,7 +104,9 @@
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
 #include <netinet6/ip6_mroute.h>
+#include <netinet/icmp6.h>
 #include <netinet6/pim6.h>
 #include <netinet6/pim6_var.h>
 
@@ -127,7 +129,6 @@ static int socket_send(struct socket *, struct mbuf *,
 static int register_send(struct ip6_hdr *, struct mif6 *,
 			      struct mbuf *);
 
-extern lck_mtx_t *ip6_mutex;
 /*
  * Globals.  All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static,
  * except for netstat or debugging purposes.
@@ -258,9 +259,6 @@ static int del_m6if(mifi_t *);
 static int add_m6fc(struct mf6cctl *);
 static int del_m6fc(struct mf6cctl *);
 
-#ifndef __APPLE__
-static struct callout expire_upcalls_ch;
-#endif
 /*
  * Handle MRT setsockopt commands to modify the multicast routing tables.
  */
@@ -478,12 +476,7 @@ ip6_mrouter_init(so, v, cmd)
 
 	pim6 = 0;/* used for stubbing out/in pim stuff */
 
-#ifndef __APPLE__
-	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
-	    expire_upcalls, NULL);
-#else
 	timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
-#endif
 
 #if MRT6DEBUG
 	if (mrt6debug)
@@ -540,20 +533,12 @@ ip6_mrouter_done()
 			}
 		}
 	}
-#if notyet
-	bzero((caddr_t)qtable, sizeof(qtable));
-	bzero((caddr_t)tbftable, sizeof(tbftable));
-#endif
 	bzero((caddr_t)mif6table, sizeof(mif6table));
 	nummifs = 0;
 
 	pim6 = 0; /* used to stub out/in pim specific code */
 
-#ifndef __APPLE__
-	callout_stop(&expire_upcalls_ch);
-#else
 	untimeout(expire_upcalls, (caddr_t)NULL);
-#endif
 
 	/*
 	 * Free all multicast forwarding cache entries.
@@ -617,7 +602,9 @@ add_m6if(mifcp)
 		return EINVAL;
 	mifp = mif6table + mifcp->mif6c_mifi;
 	if (mifp->m6_ifp)
-		return EADDRINUSE; /* XXX: is it appropriate? */
+		return (EADDRINUSE); /* XXX: is it appropriate? */
+	if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index)
+		return (ENXIO);
 
 	ifnet_head_lock_shared();
 	if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index) {
@@ -653,10 +640,7 @@ add_m6if(mifcp)
 
 	mifp->m6_flags     = mifcp->mif6c_flags;
 	mifp->m6_ifp       = ifp;
-#if notyet
-	/* scaling up here allows division by 1024 in critical code */
-	mifp->m6_rate_limit = mifcp->mif6c_rate_limit * 1024 / 1000;
-#endif
+
 	/* initialize per mif pkt counters */
 	mifp->m6_pkt_in    = 0;
 	mifp->m6_pkt_out   = 0;
@@ -705,10 +689,6 @@ del_m6if(mifip)
 		if_allmulti(ifp, 0);
 	}
 
-#if notyet
-	bzero((caddr_t)qtable[*mifip], sizeof(qtable[*mifip]));
-	bzero((caddr_t)mifp->m6_tbf, sizeof(*(mifp->m6_tbf)));
-#endif
 	bzero((caddr_t)mifp, sizeof(*mifp));
 
 	/* Adjust nummifs down */
@@ -1285,12 +1265,7 @@ expire_upcalls(
 		}
 	}
 
-#ifndef __APPLE__
-	callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
-	    expire_upcalls, NULL);
-#else
 	timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
-#endif
 }
 
 /*
@@ -1306,6 +1281,9 @@ ip6_mdq(m, ifp, rt)
 	mifi_t mifi, iif;
 	struct mif6 *mifp;
 	int plen = m->m_pkthdr.len;
+	struct in6_addr src0, dst0; /* copies for local work */
+	u_int32_t iszone, idzone, oszone, odzone;
+	int error = 0;
 
 /*
  * Macro to send packet on mif.  Since RSVP packets don't get counted on
@@ -1437,7 +1415,14 @@ ip6_mdq(m, ifp, rt)
 	 * For each mif, forward a copy of the packet if there are group
 	 * members downstream on the interface.
 	 */
-	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++)
+	src0 = ip6->ip6_src;
+	dst0 = ip6->ip6_dst;
+	if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
+	    (error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
+		ip6stat.ip6s_badscope++;
+		return (error);
+	}
+	for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
 		if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
 			/*
 			 * check if the outgoing packet is going to break
@@ -1445,23 +1430,25 @@ ip6_mdq(m, ifp, rt)
 			 * XXX For packets through PIM register tunnel
 			 * interface, we believe a routing daemon.
 			 */
-			if ((mif6table[rt->mf6c_parent].m6_flags &
-			     MIFF_REGISTER) == 0 &&
-			    (mif6table[mifi].m6_flags & MIFF_REGISTER) == 0 &&
-			    (in6_addr2scopeid(ifp, &ip6->ip6_dst) !=
-			     in6_addr2scopeid(mif6table[mifi].m6_ifp,
-					      &ip6->ip6_dst) ||
-			     in6_addr2scopeid(ifp, &ip6->ip6_src) !=
-			     in6_addr2scopeid(mif6table[mifi].m6_ifp,
-					      &ip6->ip6_src))) {
-				ip6stat.ip6s_badscope++;
-				continue;
+			if (!(mif6table[rt->mf6c_parent].m6_flags &
+			      MIFF_REGISTER) &&
+			    !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
+				if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
+				    &oszone) ||
+				    in6_setscope(&dst0, mif6table[mifi].m6_ifp,
+				    &odzone) ||
+				    iszone != oszone ||
+				    idzone != odzone) {
+					ip6stat.ip6s_badscope++;
+					continue;
+				}
 			}
 
 			mifp->m6_pkt_out++;
 			mifp->m6_bytes_out += plen;
 			MC6_SEND(ip6, mifp, m);
 		}
+	}
 	return 0;
 }
 
@@ -1501,16 +1488,22 @@ phyint_send(ip6, mifp, m)
 	 * sending queue.
 	 */
 	if (m->m_pkthdr.rcvif == NULL) {
-		struct ip6_moptions im6o;
+		struct ip6_moptions *im6o;
 
-		im6o.im6o_multicast_ifp = ifp;
-		/* XXX: ip6_output will override ip6->ip6_hlim */
-		im6o.im6o_multicast_hlim = ip6->ip6_hlim;
-		im6o.im6o_multicast_loop = 1;
-		error = ip6_output(mb_copy, NULL, &ro,
-				   IPV6_FORWARDING, &im6o, NULL, 0);
+		im6o = ip6_allocmoptions(M_DONTWAIT);
+		if (im6o == NULL) {
+			m_freem(mb_copy);
+			return;
+		}
 
+		im6o->im6o_multicast_ifp = ifp;
+		/* XXX: ip6_output will override ip6->ip6_hlim */
+		im6o->im6o_multicast_hlim = ip6->ip6_hlim;
+		im6o->im6o_multicast_loop = 1;
+		error = ip6_output(mb_copy, NULL, &ro, IPV6_FORWARDING,
+		    im6o, NULL, NULL);
 
+		IM6O_REMREF(im6o);
 #if MRT6DEBUG
 		if (mrt6debug & DEBUG_XMIT)
 			log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
@@ -1524,10 +1517,11 @@ phyint_send(ip6, mifp, m)
 	 * on the outgoing interface, loop back a copy.
 	 */
 	dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
-	ifnet_lock_shared(ifp);
+	in6_multihead_lock_shared();
 	IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
-	ifnet_lock_done(ifp);
+	in6_multihead_lock_done();
 	if (in6m != NULL) {
+		IN6M_REMREF(in6m);
 		dst6->sin6_len = sizeof(struct sockaddr_in6);
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_addr = ip6->ip6_dst;
@@ -1552,10 +1546,8 @@ phyint_send(ip6, mifp, m)
 		mb_copy->m_pkthdr.csum_data = 0;
 		mb_copy->m_pkthdr.csum_flags = 0;
 
-		lck_mtx_unlock(ip6_mutex);
 		error = dlil_output(ifp, PF_INET6, mb_copy,
 				NULL, (struct sockaddr *)&ro.ro_dst, 0);
-		lck_mtx_lock(ip6_mutex);
 #else
 		error = (*ifp->if_output)(ifp, mb_copy,
 					  (struct sockaddr *)&ro.ro_dst,
@@ -1567,21 +1559,28 @@ phyint_send(ip6, mifp, m)
 			    mifp - mif6table, error);
 #endif
 	} else {
-#if MULTICAST_PMTUD
-		icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
-#else
+		/*
+		 * pMTU discovery is intentionally disabled by default, since
+		 * various router may notify pMTU in multicast, which can be
+		 * a DDoS to a router
+		 */
+		if (ip6_mcast_pmtu)
+			icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 #if MRT6DEBUG
-		if (mrt6debug & DEBUG_XMIT)
-			log(LOG_DEBUG,
-			    "phyint_send: packet too big on %s o %s g %s"
-			    " size %d(discarded)\n",
-			    if_name(ifp),
-			    ip6_sprintf(&ip6->ip6_src),
-			    ip6_sprintf(&ip6->ip6_dst),
-			    mb_copy->m_pkthdr.len);
+		else {
+			if (mrt6debug & DEBUG_XMIT) {
+				log(LOG_DEBUG,
+				    "phyint_send: packet too big on %s o %s "
+				    "g %s size %d(discarded)\n",
+				    if_name(ifp),
+				    ip6_sprintf(&ip6->ip6_src),
+				    ip6_sprintf(&ip6->ip6_dst),
+				    mb_copy->m_pkthdr.len);
+			}
+		}
 #endif /* MRT6DEBUG */
 		m_freem(mb_copy); /* simply discard the packet */
-#endif
+		
 	}
 }
 
@@ -1666,9 +1665,7 @@ register_send(ip6, mif, m)
  * is stripped off, and the inner packet is passed to register_mforward.
  */
 int
-pim6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+pim6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct pim *pim; /* pointer to a pim struct */
 	struct ip6_hdr *ip6;
@@ -1676,13 +1673,11 @@ pim6_input(mp, offp)
 	struct mbuf *m = *mp;
 	int minlen;
 	int off = *offp;
-	int proto;
 
 	++pim6stat.pim6s_rcv_total;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	pimlen = m->m_pkthdr.len - *offp;
-	proto = ip6->ip6_nxt;
 
 	/*
 	 * Validate lengths
@@ -1881,9 +1876,7 @@ pim6_input(mp, offp)
 #ifdef __APPLE__
 
                 if (lo_ifp) {
-		    lck_mtx_unlock(ip6_mutex);
                     dlil_output(lo_ifp, PF_INET6, m, 0, (struct sockaddr *)&dst, 0);
-		    lck_mtx_lock(ip6_mutex);
 		}
                 else {
                     printf("Warning: pim6_input call to dlil_find_dltag failed!\n");
diff --git a/bsd/netinet6/ip6_mroute.h b/bsd/netinet6/ip6_mroute.h
index 5eef448db..193efea2a 100644
--- a/bsd/netinet6/ip6_mroute.h
+++ b/bsd/netinet6/ip6_mroute.h
@@ -79,9 +79,9 @@
 /*
  * Multicast Routing set/getsockopt commands.
  */
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define MRT6_OINIT		100	/* initialize forwarder (omrt6msg) */
-#endif
+#endif /* XNU_KERNEL_PRIVATE */
 #define MRT6_DONE		101	/* shut down forwarder */
 #define MRT6_ADD_MIF		102	/* add multicast interface */
 #define MRT6_DEL_MIF		103	/* delete multicast interface */
@@ -164,7 +164,7 @@ struct mrt6stat {
 	u_quad_t mrt6s_upq_sockfull;	/* upcalls dropped - socket full   */
 };
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #if MRT6_OINIT
 /*
  * Struct used to communicate from kernel to multicast router
@@ -185,7 +185,7 @@ struct omrt6msg {
 	struct in6_addr  im6_src, im6_dst;
 };
 #endif
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*
  * Structure used to communicate from kernel to multicast router.
@@ -229,7 +229,7 @@ struct sioc_mif_req6 {
 	u_quad_t obytes;	/* Output byte count on mif		*/
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct sioc_mif_req6_32 {
 	mifi_t mifi;
 	u_quad_t icount;
@@ -245,7 +245,7 @@ struct sioc_mif_req6_64 {
 	u_quad_t ibytes;
 	u_quad_t obytes;
 } __attribute__((aligned(8)));
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef PRIVATE
 /*
@@ -314,14 +314,14 @@ struct rtdetq {		/* XXX: rtdetq is also defined in ip_mroute.h */
 #endif /* _NETINET_IP_MROUTE_H_ */
 
 #if MROUTING
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 extern struct mrt6stat mrt6stat;
 
 extern int ip6_mrouter_set(struct socket *, struct sockopt *);
 extern int ip6_mrouter_get(struct socket *, struct sockopt *);
 extern int ip6_mrouter_done(void);
 extern int mrt6_ioctl(u_long, caddr_t);
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* PRIVATE */
 #endif
 
diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c
index 309686f7f..7abf54ca7 100644
--- a/bsd/netinet6/ip6_output.c
+++ b/bsd/netinet6/ip6_output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -110,6 +110,11 @@
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/kauth.h>
+#include <sys/mcache.h>
+#include <sys/sysctl.h>
+#include <kern/zalloc.h>
+
+#include <pexpert/pexpert.h>
 
 #include <net/if.h>
 #include <net/route.h>
@@ -120,10 +125,13 @@
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
+#include <netinet6/ip6protosw.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+#include <mach/sdt.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
@@ -133,7 +141,6 @@
 #include <netkey/key.h>
 extern int ipsec_bypass;
 #endif /* IPSEC */
-extern lck_mtx_t *nd6_mutex;
 
 #if CONFIG_MACF_NET
 #include <security/mac.h>
@@ -161,23 +168,54 @@ struct ip6_exthdrs {
 	struct mbuf *ip6e_dest2;
 };
 
+int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 			    struct socket *, struct sockopt *sopt);
-static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt);
+static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto);
 static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt);
-static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt);
-static int ip6_setmoptions(int, struct inpcb *, struct mbuf *);
-static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
+static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto);
+static void im6o_trace(struct ip6_moptions *, int);
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 				  struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
+static int ip6_getpmtu (struct route_in6 *, struct route_in6 *,
+	struct ifnet *, struct in6_addr *, u_int32_t *, int *);
+
+#define	IM6O_TRACE_HIST_SIZE	32	/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
+
+struct ip6_moptions_dbg {
+	struct ip6_moptions	im6o;			/* ip6_moptions */
+	u_int16_t		im6o_refhold_cnt;	/* # of IM6O_ADDREF */
+	u_int16_t		im6o_refrele_cnt;	/* # of IM6O_REMREF */
+	/*
+	 * Alloc and free callers.
+	 */
+	ctrace_t		im6o_alloc;
+	ctrace_t		im6o_free;
+	/*
+	 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
+	 */
+	ctrace_t		im6o_refhold[IM6O_TRACE_HIST_SIZE];
+	ctrace_t		im6o_refrele[IM6O_TRACE_HIST_SIZE];
+};
+
+#if DEBUG
+static unsigned int im6o_debug = 1;	/* debugging (enabled) */
+#else
+static unsigned int im6o_debug;		/* debugging (disabled) */
+#endif /* !DEBUG */
+
+static unsigned int im6o_size;		/* size of zone element */
+static struct zone *im6o_zone;		/* zone for ip6_moptions */
+
+#define	IM6O_ZONE_MAX		64		/* maximum elements in zone */
+#define	IM6O_ZONE_NAME		"ip6_moptions"	/* zone name */
 
-extern int ip_createmoptions(struct ip_moptions **imop);
-extern int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
-extern int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq);
-extern lck_mtx_t *ip6_mutex;
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
@@ -198,32 +236,39 @@ ip6_output(
 	int flags,
 	struct ip6_moptions *im6o,
 	struct ifnet **ifpp,	/* XXX: just for statistics */
-	int    locked)		
+	struct ip6_out_args *ip6oa)
 {
 	struct ip6_hdr *ip6, *mhip6;
-	struct ifnet *ifp, *origifp;
+	struct ifnet *ifp = NULL, *origifp = NULL;
 	struct mbuf *m = m0;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
-	struct sockaddr_in6 *dst;
+	struct rtentry *rt = NULL;
+	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_int32_t mtu;
+	int alwaysfrag = 0, dontfrag = 0;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
-	struct in6_addr finaldst;
+	struct in6_addr finaldst, src0, dst0;
+	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int needipsec = 0;
 	ipfilter_t inject_filter_ref;
-	
+	int tso;
+	unsigned int ifscope;
+	unsigned int nocell;
+	boolean_t select_srcif;
+	struct ipf_pktopts *ippo = NULL, ipf_pktopts;
+	u_int32_t ifmtu;
+
 #if IPSEC
 	int needipsectun = 0;
 	struct socket *so = NULL;
 	struct secpolicy *sp = NULL;
 
-	if (!locked)
-		lck_mtx_lock(ip6_mutex);
 	/* for AH processing. stupid to have "socket" variable in IP layer... */
 	if (ipsec_bypass == 0)
 	{
@@ -232,10 +277,32 @@ ip6_output(
 	}
 #endif /* IPSEC */
 
+	bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
+	ippo = &ipf_pktopts;
+
 	ip6 = mtod(m, struct ip6_hdr *);
 	inject_filter_ref = ipf_get_inject_filter(m);
+	
 	finaldst = ip6->ip6_dst;
 
+	if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
+		select_srcif = !(flags & (IPV6_FORWARDING | IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL));
+		ifscope = ip6oa->ip6oa_boundif;
+		ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
+		ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
+	} else {
+		select_srcif = FALSE;
+		ifscope = IFSCOPE_NONE;
+	}
+
+	if (flags & IPV6_OUTARGS) {
+		nocell = ip6oa->ip6oa_nocell;
+		if (nocell)
+			ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
+	} else {
+		nocell = 0;
+	}
+
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
@@ -253,7 +320,19 @@ ip6_output(
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
-		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
+		if (opt->ip6po_rthdr) {
+			/*
+			 * Destination options header(1st part)
+			 * This only makes sense with a routing header.
+			 * See Section 9.2 of RFC 3542.
+			 * Disabling this part just for MIP6 convenience is
+			 * a bad idea.  We need to think carefully about a
+			 * way to make the advanced API coexist with MIP6
+			 * options, which might automatically be inserted in
+			 * the kernel.
+			 */
+			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
+		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
@@ -314,12 +393,24 @@ ip6_output(
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
-	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
-	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
-	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
+	if (exthdrs.ip6e_hbh)
+		optlen += exthdrs.ip6e_hbh->m_len;
+	if (exthdrs.ip6e_dest1)
+		optlen += exthdrs.ip6e_dest1->m_len;
+	if (exthdrs.ip6e_rthdr)
+		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
+
 	/* NOTE: we don't add AH/ESP length here. do that later. */
-	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
+	if (exthdrs.ip6e_dest2)
+		optlen += exthdrs.ip6e_dest2->m_len;
+
+
+	if (needipsec &&
+	    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
+		in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
+		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
+	}
 
 	/*
 	 * If we need IPsec, or there is at least one extension header,
@@ -419,14 +510,14 @@ ip6_output(
 			struct ipfilter	*filter;
 			int seen = (inject_filter_ref == 0);
 			int	fixscope = 0;
-			struct ipf_pktopts *ippo = 0, ipf_pktopts;
-						
+
 			if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-				ippo = &ipf_pktopts;
-				ippo->ippo_flags = IPPOF_MCAST_OPTS;
+				ippo->ippo_flags |= IPPOF_MCAST_OPTS;
+				IM6O_LOCK(im6o);
 				ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
 				ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
 				ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
+				IM6O_UNLOCK(im6o);
 			}
 
 			/* Hack: embed the scope_id in the destination */
@@ -436,7 +527,6 @@ ip6_output(
 				ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id);
 			}
 			{
-				lck_mtx_unlock(ip6_mutex);
 				ipf_ref();
 				TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
 					/*
@@ -452,18 +542,15 @@ ip6_output(
 						result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
 						if (result == EJUSTRETURN) {
 							ipf_unref();
-							locked = 1; /* Don't want to take lock to unlock it right away */
 							goto done;
 						}
 						if (result != 0) {
 							ipf_unref();
-							locked = 1; /* Don't want to take lock to unlock it right away */
 							goto bad;
 						}
 					}
 				}
 				ipf_unref();
-				lck_mtx_lock(ip6_mutex);
 			}
 			ip6 = mtod(m, struct ip6_hdr *);
 			/* Hack: cleanup embedded scope_id if we put it there */
@@ -495,10 +582,8 @@ ip6_output(
 
 		bzero(&state, sizeof(state));
 		state.m = m;
-		lck_mtx_unlock(ip6_mutex);
 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
 			&needipsectun);
-		lck_mtx_lock(ip6_mutex);
 		m = state.m;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_trans. */
@@ -524,13 +609,13 @@ ip6_output(
 			/* ah6_output doesn't modify mbuf chain */
 			rh->ip6r_segleft = segleft_org;
 		}
-	    }
-skip_ipsec2:;
-#endif
+	  }
 	}
+skip_ipsec2:
+#endif
 
 	/*
-	 * If there is a routing header, replace destination address field
+	 * If there is a routing header, replace the destination address field
 	 * with the first hop of the routing header.
 	 */
 	if (exthdrs.ip6e_rthdr) {
@@ -538,17 +623,38 @@ skip_ipsec2:;
 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
 						  struct ip6_rthdr *));
 		struct ip6_rthdr0 *rh0;
+		struct in6_addr *addr;
+		struct sockaddr_in6 sa;
 
-		finaldst = ip6->ip6_dst;
 		switch (rh->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			 rh0 = (struct ip6_rthdr0 *)rh;
-			 ip6->ip6_dst = rh0->ip6r0_addr[0];
-			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
-			       (caddr_t)&rh0->ip6r0_addr[0],
-			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
-				 );
-			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
+			 addr = (struct in6_addr *)(rh0 + 1);
+
+			 /*
+			  * construct a sockaddr_in6 form of
+			  * the first hop.
+			  *
+			  * XXX: we may not have enough
+			  * information about its scope zone;
+			  * there is no standard API to pass
+			  * the information from the
+			  * application.
+			  */
+			 bzero(&sa, sizeof(sa));
+			 sa.sin6_family = AF_INET6;
+			 sa.sin6_len = sizeof(sa);
+			 sa.sin6_addr = addr[0];
+			 if ((error = sa6_embedscope(&sa,
+			     ip6_use_defzone)) != 0) {
+				 goto bad;
+			 }
+			 ip6->ip6_dst = sa.sin6_addr;
+			 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
+			     * (rh0->ip6r0_segleft - 1));
+			 addr[rh0->ip6r0_segleft - 1] = finaldst;
+			 /* XXX */
+			 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
 			 break;
 		default:	/* is it possible? */
 			 error = EINVAL;
@@ -558,7 +664,7 @@ skip_ipsec2:;
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
-	    (flags & IPV6_DADOUTPUT) == 0) {
+	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
@@ -582,6 +688,38 @@ skip_ipsec2:;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
+
+	if (ro && ro->ro_rt)
+		RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
+	/*
+	 * if specified, try to fill in the traffic class field.
+	 * do not override if a non-zero value is already set.
+	 * we check the diffserv field and the ecn field separately.
+	 */
+	if (opt && opt->ip6po_tclass >= 0) {
+		int mask = 0;
+
+		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
+			mask |= 0xfc;
+		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
+			mask |= 0x03;
+		if (mask != 0)
+			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
+	}
+
+	/* fill in or override the hop limit field, if necessary. */
+	if (opt && opt->ip6po_hlim != -1)
+		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
+	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		if (im6o != NULL) {
+			IM6O_LOCK(im6o);
+			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
+			IM6O_UNLOCK(im6o);
+		} else {
+			ip6->ip6_hlim = ip6_defmcasthlim;
+		}
+	}
+
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up. If not, free it and try again.
@@ -602,17 +740,14 @@ skip_ipsec2:;
 		dst->sin6_family = AF_INET6;
 		dst->sin6_len = sizeof(struct sockaddr_in6);
 		dst->sin6_addr = ip6->ip6_dst;
-#if SCOPEDROUTING
-		/* XXX: sin6_scope_id should already be fixed at this point */
-		if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
-			dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]);
-#endif
 	}
 #if IPSEC
 	if (needipsec && needipsectun) {
 		struct ipsec_output_state state;
 		int tunneledv4 = 0;
-
+#if CONFIG_DTRACE
+		struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
+#endif /* CONFIG_DTRACE */
 		/*
 		 * All the extension headers will become inaccessible
 		 * (since they can be encrypted).
@@ -628,9 +763,13 @@ skip_ipsec2:;
 		state.m = m;
 		state.ro = (struct route *)ro;
 		state.dst = (struct sockaddr *)dst;
-		lck_mtx_unlock(ip6_mutex);
+
+		/* Added a trace here so that we can see packets inside a tunnel */
+		DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
+			struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
+			struct ip *, NULL, struct ip6_hdr *, ip6); 
+
 		error = ipsec6_output_tunnel(&state, sp, flags, &tunneledv4);
-		lck_mtx_lock(ip6_mutex);
 		if (tunneledv4)	/* tunneled in IPv4 - packet is gone */
 			goto done;
 		m = state.m;
@@ -657,182 +796,147 @@ skip_ipsec2:;
 			}
 			goto bad;
 		}
-
+		/* 
+		 * The packet has been encapsulated so the ifscope is no longer valid
+		 * since it does not apply to the outer address: ignore the ifscope.
+		 */
+		ifscope = IFSCOPE_NONE;
+		if (opt != NULL && opt->ip6po_pktinfo != NULL) {
+			if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
+				opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
+		}
 		exthdrs.ip6e_ip6 = m;
 	}
 #endif /* IPSEC */
 
-	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
-		/* Unicast */
-
-#define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
-#define sin6tosa(sin6)	((struct sockaddr *)(sin6))
-		/* xxx
-		 * interface selection comes here
-		 * if an interface is specified from an upper layer,
-		 * ifp must point it.
-		 */
-		if (ro->ro_rt == NULL) {
-			/*
-			 * non-bsdi always clone routes, if parent is
-			 * PRF_CLONING.
-			 */
-			rtalloc_ign((struct route *)ro, 0);
-		}
-		if (ro->ro_rt == NULL) {
-			ip6stat.ip6s_noroute++;
-			error = EHOSTUNREACH;
-			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
-			goto bad;
-		}
-		RT_LOCK_SPIN(ro->ro_rt);
-		ia = ifatoia6(ro->ro_rt->rt_ifa);
-		if (ia != NULL)
-			ifaref(&ia->ia_ifa);
-		ifp = ro->ro_rt->rt_ifp;
-		ro->ro_rt->rt_use++;
-		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
-			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
-		RT_UNLOCK(ro->ro_rt);
-		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
+	/* for safety */
+	if (ifp != NULL) {
+		ifnet_release(ifp);
+		ifp = NULL;
+	}
 
-		in6_ifstat_inc(ifp, ifs6_out_request);
+	/* adjust pointer */
+	ip6 = mtod(m, struct ip6_hdr *);
 
-		/*
-		 * Check if the outgoing interface conflicts with
-		 * the interface specified by ifi6_ifindex (if specified).
-		 * Note that loopback interface is always okay.
-		 * (this may happen when we are sending a packet to one of
-		 *  our own addresses.)
-		 */
-		if (opt && opt->ip6po_pktinfo
-		 && opt->ip6po_pktinfo->ipi6_ifindex) {
-			if (!(ifp->if_flags & IFF_LOOPBACK)
-			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
-				ip6stat.ip6s_noroute++;
-				in6_ifstat_inc(ifp, ifs6_out_discard);
-				error = EHOSTUNREACH;
-				goto bad;
-			}
+	if (select_srcif) {
+		bzero(&src_sa, sizeof(src_sa));
+		src_sa.sin6_family = AF_INET6;
+		src_sa.sin6_len = sizeof(src_sa);
+		src_sa.sin6_addr = ip6->ip6_src;
+	}
+	bzero(&dst_sa, sizeof(dst_sa));
+	dst_sa.sin6_family = AF_INET6;
+	dst_sa.sin6_len = sizeof(dst_sa);
+	dst_sa.sin6_addr = ip6->ip6_dst;
+
+	if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
+	    &dst_sa, opt, im6o, ro, &ifp, &rt, 0, ifscope, nocell)) != 0) {
+		switch (error) {
+		case EHOSTUNREACH:
+			ip6stat.ip6s_noroute++;
+			break;
+		case EADDRNOTAVAIL:
+		default:
+			break; /* XXX statistics? */
 		}
-
+		if (ifp != NULL)
+			in6_ifstat_inc(ifp, ifs6_out_discard);
+		goto bad;
+	}
+	if (rt == NULL) {
 		/*
-		 * if specified, try to fill in the traffic class field.
-		 * do not override if a non-zero value is already set.
-		 * we check the diffserv field and the ecn field separately.
+		 * If in6_selectroute() does not return a route entry,
+		 * dst may not have been updated.
 		 */
-		if (opt && opt->ip6po_tclass >= 0) {
-			int mask = 0;
-	
-			if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
-				mask |= 0xfc;
-			if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
-				mask |= 0x03;
-			if (mask != 0)
-				ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
-		}
+		*dst = dst_sa;	/* XXX */
+	}
 
-		if (opt && opt->ip6po_hlim != -1)
-			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
-	} else {
-		/* Multicast */
-		struct	in6_multi *in6m;
+	/*
+	 * then rt (for unicast) and ifp must be non-NULL valid values.
+	 */
+	if ((flags & IPV6_FORWARDING) == 0) {
+		/* XXX: the FORWARDING flag can be set for mrouting. */
+		in6_ifstat_inc(ifp, ifs6_out_request);
+	}
+	if (rt != NULL) {
+		RT_LOCK(rt);
+		ia = (struct in6_ifaddr *)(rt->rt_ifa);
+		if (ia != NULL)
+			IFA_ADDREF(&ia->ia_ifa);
+		rt->rt_use++;
+		RT_UNLOCK(rt);
+	}
 
-		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
+	/*
+	 * The outgoing interface must be in the zone of source and
+	 * destination addresses.  We should use ia_ifp to support the
+	 * case of sending packets to an address of our own.
+	 */
+	if (ia != NULL && ia->ia_ifp) {
+		ifnet_reference(ia->ia_ifp);
+		if (origifp != NULL)
+			ifnet_release(origifp);
+		origifp = ia->ia_ifp;
+	} else {
+		if (ifp != NULL)
+			ifnet_reference(ifp);
+		if (origifp != NULL)
+			ifnet_release(origifp);
+		origifp = ifp;
+	}
+	src0 = ip6->ip6_src;
+	if (in6_setscope(&src0, origifp, &zone))
+		goto badscope;
+	bzero(&src_sa, sizeof(src_sa));
+	src_sa.sin6_family = AF_INET6;
+	src_sa.sin6_len = sizeof(src_sa);
+	src_sa.sin6_addr = ip6->ip6_src;
+	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
+		goto badscope;
+
+	dst0 = ip6->ip6_dst;
+	if (in6_setscope(&dst0, origifp, &zone))
+		goto badscope;
+	/* re-initialize to be sure */
+	bzero(&dst_sa, sizeof(dst_sa));
+	dst_sa.sin6_family = AF_INET6;
+	dst_sa.sin6_len = sizeof(dst_sa);
+	dst_sa.sin6_addr = ip6->ip6_dst;
+	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
+		goto badscope;
+	}
 
-		/*
-		 * See if the caller provided any multicast options
-		 */
-		ifp = NULL;
-		if (im6o != NULL) {
-			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
-			if (im6o->im6o_multicast_ifp != NULL)
-				ifp = im6o->im6o_multicast_ifp;
-		} else
-			ip6->ip6_hlim = ip6_defmcasthlim;
+	/* scope check is done. */
+	goto routefound;
 
-		/*
-		 * See if the caller provided the outgoing interface
-		 * as an ancillary data.
-		 * Boundary check for ifindex is assumed to be already done.
-		 */
-		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) {
-			unsigned int index = opt->ip6po_pktinfo->ipi6_ifindex;
-			ifnet_head_lock_shared();
-			if (index > 0 && index <= if_index) {
-				ifp = ifindex2ifnet[index];
-			}
-			ifnet_head_done();
-		}
+  badscope:
+	ip6stat.ip6s_badscope++;
+	in6_ifstat_inc(origifp, ifs6_out_discard);
+	if (error == 0)
+		error = EHOSTUNREACH; /* XXX */
+	goto bad;
 
-		/*
-		 * If the destination is a node-local scope multicast,
-		 * the packet should be loop-backed only.
-		 */
-		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
+  routefound:
+	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
-			 * If the outgoing interface is already specified,
-			 * it should be a loopback interface.
+			 * The nexthop is explicitly specified by the
+			 * application.  We assume the next hop is an IPv6
+			 * address.
 			 */
-			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
-				ip6stat.ip6s_badscope++;
-				error = ENETUNREACH; /* XXX: better error? */
-				/* XXX correct ifp? */
-				in6_ifstat_inc(ifp, ifs6_out_discard);
-				goto bad;
-			} else {
-				ifp = lo_ifp;
-			}
-		}
-
-		/*
-		 * if specified, try to fill in the traffic class field.
-		 * do not override if a non-zero value is already set.
-		 * we check the diffserv field and the ecn field separately.
-		 */
-		if (opt && opt->ip6po_tclass >= 0) {
-			int mask = 0;
-	
-			if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
-				mask |= 0xfc;
-			if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
-				mask |= 0x03;
-			if (mask != 0)
-				ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
+			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
+		else if ((rt->rt_flags & RTF_GATEWAY))
+			dst = (struct sockaddr_in6 *)rt->rt_gateway;
+	}
 
-		if (opt && opt->ip6po_hlim != -1)
-			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
+	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
+	} else {
+		struct	in6_multi *in6m;
 
-		/*
-		 * If caller did not provide an interface lookup a
-		 * default in the routing table.  This is either a
-		 * default for the speicfied group (i.e. a host
-		 * route), or a multicast default (a route for the
-		 * ``net'' ff00::/8).
-		 */
-		if (ifp == NULL) {
-			if (ro->ro_rt == NULL) {
-				ro->ro_rt = rtalloc1(
-				    (struct sockaddr *)&ro->ro_dst, 0, 0);
-			}
-			if (ro->ro_rt == NULL) {
-				ip6stat.ip6s_noroute++;
-				error = EHOSTUNREACH;
-				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
-				goto bad;
-			}
-			RT_LOCK_SPIN(ro->ro_rt);
-			ia = ifatoia6(ro->ro_rt->rt_ifa);
-			if (ia != NULL)
-				ifaref(&ia->ia_ifa);
-			ifp = ro->ro_rt->rt_ifp;
-			ro->ro_rt->rt_use++;
-			RT_UNLOCK(ro->ro_rt);
-		}
+		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 
-		if ((flags & IPV6_FORWARDING) == 0)
-			in6_ifstat_inc(ifp, ifs6_out_request);
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 
 		/*
@@ -844,11 +948,15 @@ skip_ipsec2:;
 			error = ENETUNREACH;
 			goto bad;
 		}
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
-		ifnet_lock_done(ifp);
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
+		in6_multihead_lock_done();
+		if (im6o != NULL)
+			IM6O_LOCK(im6o);
 		if (in6m != NULL &&
 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
+			if (im6o != NULL)
+				IM6O_UNLOCK(im6o);
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
@@ -856,6 +964,8 @@ skip_ipsec2:;
 			 */
 			ip6_mloopback(ifp, m, dst);
 		} else {
+			if (im6o != NULL)
+				IM6O_UNLOCK(im6o);
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
@@ -870,13 +980,25 @@ skip_ipsec2:;
 			 */
 #if MROUTING
 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
+				/*
+				 * XXX: ip6_mforward expects that rcvif is NULL
+				 * when it is called from the originating path.
+				 * However, it is not always the case, since
+				 * some versions of MGETHDR() does not
+				 * initialize the field.
+				 */
+				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
+					if (in6m != NULL)
+						IN6M_REMREF(in6m);
 					goto done;
 				}
 			}
 #endif
 		}
+		if (in6m != NULL)
+			IN6M_REMREF(in6m);
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
@@ -885,7 +1007,8 @@ skip_ipsec2:;
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
-		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
+		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
+		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
@@ -895,122 +1018,48 @@ skip_ipsec2:;
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
-	if (ifpp)
+	if (ifpp != NULL) {
+		ifnet_reference(ifp);	/* for caller */
+		if (*ifpp != NULL)
+			ifnet_release(*ifpp);
 		*ifpp = ifp;
-
-	/*
-	 * Determine path MTU.
-	 */
-	if (ro_pmtu != ro) {
-		/* The first hop and the final destination may differ. */
-		struct sockaddr_in6 *sin6_fin =
-			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
-		if (ro_pmtu->ro_rt != NULL &&
-		    (!(ro_pmtu->ro_rt->rt_flags & RTF_UP) ||
-		    ro_pmtu->ro_rt->generation_id != route_generation ||
-		    !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) {
-			rtfree(ro_pmtu->ro_rt);
-			ro_pmtu->ro_rt = NULL;
-		}
-		if (ro_pmtu->ro_rt == NULL) {
-			bzero(sin6_fin, sizeof(*sin6_fin));
-			sin6_fin->sin6_family = AF_INET6;
-			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
-			sin6_fin->sin6_addr = finaldst;
-
-			rtalloc((struct route *)ro_pmtu);
-		}
 	}
-	if (ro_pmtu->ro_rt != NULL) {
-		u_int32_t ifmtu;
-
-		lck_rw_lock_shared(nd_if_rwlock);
-		ifmtu = IN6_LINKMTU(ifp);
-		lck_rw_done(nd_if_rwlock);
 
-		RT_LOCK_SPIN(ro_pmtu->ro_rt);
-		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
-		if (mtu > ifmtu || mtu == 0) {
-			/*
-			 * The MTU on the route is larger than the MTU on
-			 * the interface!  This shouldn't happen, unless the
-			 * MTU of the interface has been changed after the
-			 * interface was brought up.  Change the MTU in the
-			 * route to match the interface MTU (as long as the
-			 * field isn't locked).
-			 *
-			 * if MTU on the route is 0, we need to fix the MTU.
-			 * this case happens with path MTU discovery timeouts.
-			 */
-			 mtu = ifmtu;
-			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
-				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
-		}
-		RT_UNLOCK(ro_pmtu->ro_rt);
-	} else {
-		lck_rw_lock_shared(nd_if_rwlock);
-		mtu = IN6_LINKMTU(ifp);
-		lck_rw_done(nd_if_rwlock);
-	}
+	/* Determine path MTU. */
+	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
+	    &alwaysfrag)) != 0)
+		goto bad;
 
 	/*
-	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
+	 * The caller of this function may specify to use the minimum MTU
+	 * in some cases.
+	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
+	 * setting.  The logic is a bit complicated; by default, unicast
+	 * packets will follow path MTU while multicast packets will be sent at
+	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
+	 * including unicast ones will be sent at the minimum MTU.  Multicast
+	 * packets will always be sent at the minimum MTU unless
+	 * IP6PO_MINMTU_DISABLE is explicitly specified.
+	 * See RFC 3542 for more details.
 	 */
-	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
-		mtu = IPV6_MMTU;
-
-	/* Fake scoped addresses */
-	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
-		/*
-		 * If source or destination address is a scoped address, and
-		 * the packet is going to be sent to a loopback interface,
-		 * we should keep the original interface.
-		 */
-
-		/*
-		 * XXX: this is a very experimental and temporary solution.
-		 * We eventually have sockaddr_in6 and use the sin6_scope_id
-		 * field of the structure here.
-		 * We rely on the consistency between two scope zone ids
-		 * of source and destination, which should already be assured.
-		 * Larger scopes than link will be supported in the future. 
-		 */
-		u_short index = 0;
-		origifp = NULL;
-		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
-			index = ntohs(ip6->ip6_src.s6_addr16[1]);
-		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
-			index = ntohs(ip6->ip6_dst.s6_addr16[1]);
-		ifnet_head_lock_shared();
-		if (index > 0 && index <= if_index) {
-			origifp = ifindex2ifnet[index]; 
+	if (mtu > IPV6_MMTU) {
+		if ((flags & IPV6_MINMTU))
+			mtu = IPV6_MMTU;
+		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
+			mtu = IPV6_MMTU;
+		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
+			 (opt == NULL ||
+			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
+			mtu = IPV6_MMTU;
 		}
-		ifnet_head_done();
-		/*
-		 * XXX: origifp can be NULL even in those two cases above.
-		 * For example, if we remove the (only) link-local address
-		 * from the loopback interface, and try to send a link-local
-		 * address without link-id information.  Then the source
-		 * address is ::1, and the destination address is the
-		 * link-local address with its s6_addr16[1] being zero.
-		 * What is worse, if the packet goes to the loopback interface
-		 * by a default rejected route, the null pointer would be
-		 * passed to looutput, and the kernel would hang.
-		 * The following last resort would prevent such disaster.
-		 */
-		if (origifp == NULL)
-			origifp = ifp;
 	}
-	else
-		origifp = ifp;
-#ifndef SCOPEDROUTING
+
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
-#endif
 
 #if IPFW2
 	/*
@@ -1038,8 +1087,7 @@ skip_ipsec2:;
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
-		u_int32_t dummy1; /* XXX unused */
-		u_int32_t dummy2; /* XXX unused */
+		u_int32_t dummy; /* XXX unused */
 
 #if DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
@@ -1053,11 +1101,9 @@ skip_ipsec2:;
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
-		if (ip6_process_hopopts(m,
-					(u_int8_t *)(hbh + 1),
-					((hbh->ip6h_len + 1) << 3) -
-					sizeof(struct ip6_hbh),
-					&dummy1, &dummy2) < 0) {
+		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
+		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
+		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
@@ -1067,75 +1113,127 @@ skip_ipsec2:;
 	}
 
 #if PF
-	lck_mtx_unlock(ip6_mutex);
-
-	/* Invoke outbound packet filter */
-	error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
-
-	lck_mtx_lock(ip6_mutex);
+	if (PF_IS_ENABLED) {
+		/* Invoke outbound packet filter */
+		error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
 
-	if (error) {
-		if (m != NULL) {
-			panic("%s: unexpected packet %p\n", __func__, m);
-			/* NOTREACHED */
+		if (error) {
+			if (m != NULL) {
+				panic("%s: unexpected packet %p\n", __func__, m);
+				/* NOTREACHED */
+			}
+			/* Already freed by callee */
+			goto done;
 		}
-		/* Already freed by callee */
-		goto done;
+		ip6 = mtod(m, struct ip6_hdr *);
 	}
-	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* PF */
 
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
+	 *
+	 * the logic here is rather complex:
+	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
+	 * 1-a:	send as is if tlen <= path mtu
+	 * 1-b:	fragment if tlen > path mtu
+	 *
+	 * 2: if user asks us not to fragment (dontfrag == 1)
+	 * 2-a:	send as is if tlen <= interface mtu
+	 * 2-b:	error if tlen > interface mtu
+	 *
+	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
+	 *	always fragment
+	 *
+	 * 4: if dontfrag == 1 && alwaysfrag == 1
+	 *	error, as we cannot handle this conflicting request
 	 */
 	tlen = m->m_pkthdr.len;
-	if (tlen <= mtu
-#if notyet
-	    /*
-	     * On any link that cannot convey a 1280-octet packet in one piece,
-	     * link-specific fragmentation and reassembly must be provided at
-	     * a layer below IPv6. [RFC 2460, sec.5]
-	     * Thus if the interface has ability of link-level fragmentation,
-	     * we can just send the packet even if the packet size is
-	     * larger than the link's MTU.
-	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
-	     */
-	
-	    || ifp->if_flags & IFF_FRAGMENTABLE
-#endif
-	    )
-	{
- 		/* Record statistics for this interface address. */
- 		if (ia && !(flags & IPV6_FORWARDING)) {
-#ifndef __APPLE__
- 			ia->ia_ifa.if_opackets++;
- 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
-#endif
- 		}
+
+	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
+		dontfrag = 1;
+	else
+		dontfrag = 0;
+	if (dontfrag && alwaysfrag) {	/* case 4 */
+		/* conflicting request - can't transmit */
+		error = EMSGSIZE;
+		goto bad;
+	}
+
+	lck_rw_lock_shared(nd_if_rwlock);
+	ifmtu = IN6_LINKMTU(ifp);
+	lck_rw_done(nd_if_rwlock);
+
+	if (dontfrag && tlen > ifmtu) {	/* case 2-b */
+		/*
+		 * Even if the DONTFRAG option is specified, we cannot send the
+		 * packet when the data length is larger than the MTU of the
+		 * outgoing interface.
+		 * Notify the error by sending IPV6_PATHMTU ancillary data as
+		 * well as returning an error code (the latter is not described
+		 * in the API spec.)
+		 */
+		u_int32_t mtu32;
+		struct ip6ctlparam ip6cp;
+
+		mtu32 = (u_int32_t)mtu;
+		bzero(&ip6cp, sizeof(ip6cp));
+		ip6cp.ip6c_cmdarg = (void *)&mtu32;
+		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
+		    (void *)&ip6cp);
+
+		error = EMSGSIZE;
+		goto bad;
+	}
+
+	/*
+	 * transmit packet without fragmentation
+	 */
+	tso = (ifp->if_hwassist & IFNET_TSO_IPV6) &&
+	    (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6);
+	if (dontfrag || (!alwaysfrag &&		/* case 1-a and 2-a */
+	    (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
+		int sw_csum;
+
+		ip6 = mtod(m, struct ip6_hdr *);
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 
-		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, 1);
+		if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */
+			sw_csum = m->m_pkthdr.csum_flags;
+		else
+			sw_csum = m->m_pkthdr.csum_flags &
+			    ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
+
+		if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) {
+			in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
+			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
+		}
+		if (ro->ro_rt)
+			RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
+		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
-	} else if (mtu < IPV6_MMTU) {
-		/*
-		 * note that path MTU is never less than IPV6_MMTU
-		 * (see icmp6_input).
-		 */
+	}
+
+	/*
+	 * try to fragment the packet.  case 1-b and 3
+	 */
+	if (mtu < IPV6_MMTU) {
+		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
-	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
+	} else if (ip6->ip6_plen == 0) {
+		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		struct mbuf **mnext, *m_frgpart;
-		struct ip6_frag *ip6f = NULL;
-		u_int32_t id = htonl(ip6_id++);
+		struct ip6_frag *ip6f;
+		u_int32_t id = htonl(ip6_randomid());
 		u_char nextproto;
 
 		/*
@@ -1174,6 +1272,11 @@ skip_ipsec2:;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
+		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
+			in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
+			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
+		}
+
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
@@ -1254,7 +1357,7 @@ sendorfree:
 			/* clean ipsec history once it goes out of the node */
 			ipsec_delaux(m);
 #endif
-			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, 1);
+			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 
 		} else
 			m_freem(m);
@@ -1264,8 +1367,6 @@ sendorfree:
 		ip6stat.ip6s_fragmented++;
 
 done:
-	if (!locked)
-		lck_mtx_unlock(ip6_mutex);
 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
 		rtfree(ro->ro_rt);
 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
@@ -1278,8 +1379,12 @@ done:
 #endif /* IPSEC */
 
 	if (ia != NULL)
-		ifafree(&ia->ia_ifa);
-	return(error);
+		IFA_REMREF(&ia->ia_ifa);
+	if (ifp != NULL)
+		ifnet_release(ifp);
+	if (origifp != NULL)
+		ifnet_release(origifp);
+	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
@@ -1311,7 +1416,7 @@ ip6_copyexthdr(mp, hdr, hlen)
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
-			return(ENOBUFS);
+			return (ENOBUFS);
 		}
 	}
 	m->m_len = hlen;
@@ -1319,9 +1424,29 @@ ip6_copyexthdr(mp, hdr, hlen)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
-	return(0);
+	return (0);
 }
 
+/*
+ * Process a delayed payload checksum calculation.
+ */
+void
+in6_delayed_cksum(struct mbuf *m, uint16_t offset)
+{
+	uint16_t csum;
+
+	csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
+	if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) {
+		csum = 0xffff;
+	}
+
+	offset += (m->m_pkthdr.csum_data & 0xffff);
+	if ((offset + sizeof(csum)) > m->m_len) {
+		m_copyback(m, offset, sizeof(csum), &csum);
+	} else {
+		*(uint16_t *)(mtod(m, char *) + offset) = csum;
+	}
+}
 /*
  * Insert jumbo payload option.
  */
@@ -1345,7 +1470,7 @@ ip6_insert_jumboopt(exthdrs, plen)
 	if (exthdrs->ip6e_hbh == 0) {
 		MGET(mopt, M_DONTWAIT, MT_DATA);
 		if (mopt == 0)
-			return(ENOBUFS);
+			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
@@ -1361,7 +1486,7 @@ ip6_insert_jumboopt(exthdrs, plen)
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
-			int oldoptlen = mopt->m_len;
+			u_int32_t oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
@@ -1369,7 +1494,7 @@ ip6_insert_jumboopt(exthdrs, plen)
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
-				return(ENOBUFS);
+				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
@@ -1384,11 +1509,11 @@ ip6_insert_jumboopt(exthdrs, plen)
 				}
 			}
 			if (!n)
-				return(ENOBUFS);
+				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
-			      oldoptlen);
-			optbuf = (u_char *) (mtod(n, caddr_t) + oldoptlen);
+			    oldoptlen);
+			optbuf = mtod(n, u_char *) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
@@ -1415,7 +1540,7 @@ ip6_insert_jumboopt(exthdrs, plen)
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
-	return(0);
+	return (0);
 #undef JUMBOOPTLEN
 }
 
@@ -1432,9 +1557,9 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp)
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
-			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
+		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
 		if (n == 0)
-			return(ENOBUFS);
+			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
@@ -1446,8 +1571,8 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp)
 	if ((mlast->m_flags & M_EXT) == 0 &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
-		*frghdrp =
-			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
+		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
+		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
@@ -1456,18 +1581,102 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp)
 
 		MGET(mfrg, M_DONTWAIT, MT_DATA);
 		if (mfrg == 0)
-			return(ENOBUFS);
+			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
-	return(0);
+	return (0);
 }
 
 extern int load_ipfw(void);
+static int
+ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
+    struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
+    int *alwaysfragp)
+{
+	u_int32_t mtu = 0;
+	int alwaysfrag = 0;
+	int error = 0;
 
-/*
+	if (ro_pmtu != ro) {
+		/* The first hop and the final destination may differ. */
+		struct sockaddr_in6 *sa6_dst =
+		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
+		if (ro_pmtu->ro_rt &&
+		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
+		     ro_pmtu->ro_rt->generation_id != route_generation ||
+		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
+			rtfree(ro_pmtu->ro_rt);
+			ro_pmtu->ro_rt = (struct rtentry *)NULL;
+		}
+		if (ro_pmtu->ro_rt == NULL) {
+			bzero(sa6_dst, sizeof(*sa6_dst));
+			sa6_dst->sin6_family = AF_INET6;
+			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
+			sa6_dst->sin6_addr = *dst;
+
+			rtalloc_scoped((struct route *)ro_pmtu,
+			    ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
+		}
+	}
+
+
+	if (ro_pmtu->ro_rt != NULL) {
+		u_int32_t ifmtu;
+
+		lck_rw_lock_shared(nd_if_rwlock);
+		ifmtu = IN6_LINKMTU(ifp);
+		lck_rw_done(nd_if_rwlock);
+
+		RT_LOCK_SPIN(ro_pmtu->ro_rt);
+		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
+		if (mtu > ifmtu || mtu == 0) {
+			/*
+			 * The MTU on the route is larger than the MTU on
+			 * the interface!  This shouldn't happen, unless the
+			 * MTU of the interface has been changed after the
+			 * interface was brought up.  Change the MTU in the
+			 * route to match the interface MTU (as long as the
+			 * field isn't locked).
+			 *
+			 * if MTU on the route is 0, we need to fix the MTU.
+			 * this case happens with path MTU discovery timeouts.
+			 */
+			 mtu = ifmtu;
+			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
+				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
+		}
+		else if (mtu < IPV6_MMTU) {
+			/*
+			 * RFC2460 section 5, last paragraph:
+			 * if we record ICMPv6 too big message with
+			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
+			 * or smaller, with framgent header attached.
+			 * (fragment header is needed regardless from the
+			 * packet size, for translators to identify packets)
+			 */
+			alwaysfrag = 1;
+			mtu = IPV6_MMTU;
+		} 
+		RT_UNLOCK(ro_pmtu->ro_rt);
+	} else {
+		if (ifp) {
+			lck_rw_lock_shared(nd_if_rwlock);
+			mtu = IN6_LINKMTU(ifp);
+			lck_rw_done(nd_if_rwlock);
+		} else
+			error = EHOSTUNREACH; /* XXX */
+	}
+
+	*mtup = mtu;
+	if (alwaysfragp)
+		*alwaysfragp = alwaysfrag;
+	return (error);
+}
+
+/*
  * IP6 socket option processing.
  */
 int
@@ -1475,6 +1684,8 @@ ip6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
+	int optdatalen, uproto;
+	void *optdata;
 	int privileged;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error = 0, optval = 0;
@@ -1491,6 +1702,7 @@ ip6_ctloutput(so, sopt)
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	p = sopt->sopt_p;
+	uproto = (int)so->so_proto->pr_protocol;
 
 	privileged = (proc_suser(p) == 0);
 
@@ -1499,14 +1711,10 @@ ip6_ctloutput(so, sopt)
 
 		case SOPT_SET:
 			switch (optname) {
-			case IPV6_PKTOPTIONS:
+			case IPV6_2292PKTOPTIONS:
 			{
 				struct mbuf *m;
 
-				if (sopt->sopt_valsize > MCLBYTES) {
-					error = EMSGSIZE;
-					break;
-				}
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
@@ -1532,12 +1740,23 @@ ip6_ctloutput(so, sopt)
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
+			case IPV6_RECVHOPOPTS:
+			case IPV6_RECVDSTOPTS:
+			case IPV6_RECVRTHDRDSTOPTS:
+					if (!privileged)
+						break;
+				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
-			case IPV6_CHECKSUM:
+			case IPV6_HOPLIMIT:
 			case IPV6_FAITH:
 
+			case IPV6_RECVPKTINFO:
+			case IPV6_RECVHOPLIMIT:
+			case IPV6_RECVRTHDR:
+			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_V6ONLY:
+			case IPV6_AUTOFLOWLABEL:
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
@@ -1554,8 +1773,7 @@ ip6_ctloutput(so, sopt)
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
-
-						if ((in6p->in6p_vflag &
+						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
@@ -1563,18 +1781,103 @@ ip6_ctloutput(so, sopt)
 #define OPTSET(bit) \
 do { \
 	if (optval) \
-		in6p->in6p_flags |= (bit); \
+		in6p->inp_flags |= (bit); \
 	else \
-		in6p->in6p_flags &= ~(bit); \
-} while (0)
-#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
+		in6p->inp_flags &= ~(bit); \
+} while (/*CONSTCOND*/ 0)
+#define OPTSET2292(bit) \
+do { \
+	in6p->inp_flags |= IN6P_RFC2292; \
+	if (optval) \
+		in6p->inp_flags |= (bit); \
+	else \
+		in6p->inp_flags &= ~(bit); \
+} while (/*CONSTCOND*/ 0)
+#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
+
+				case IPV6_RECVPKTINFO:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_PKTINFO);
+					break;
+
+				case IPV6_HOPLIMIT:
+				{
+					struct ip6_pktopts **optp;
+
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					optp = &in6p->in6p_outputopts;
+					error = ip6_pcbopt(IPV6_HOPLIMIT,
+					    (u_char *)&optval, sizeof(optval),
+					    optp, uproto);
+					break;
+				}
+
+				case IPV6_RECVHOPLIMIT:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_HOPLIMIT);
+					break;
+
+				case IPV6_RECVHOPOPTS:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_HOPOPTS);
+					break;
+
+				case IPV6_RECVDSTOPTS:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_DSTOPTS);
+					break;
 
-				case IPV6_CHECKSUM:
-					in6p->in6p_cksum = optval;
+				case IPV6_RECVRTHDRDSTOPTS:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_RTHDRDSTOPTS);
+					break;
+
+				case IPV6_RECVRTHDR:
+					/* cannot mix with RFC2292 */
+					if (OPTBIT(IN6P_RFC2292)) {
+						error = EINVAL;
+						break;
+					}
+					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_FAITH:
-					OPTSET(IN6P_FAITH);
+					OPTSET(INP_FAITH);
+					break;
+
+				case IPV6_RECVPATHMTU:
+					/*
+					 * We ignore this option for TCP
+					 * sockets.
+					 * (RFC3542 leaves this case
+					 * unspecified.)
+					 */
+					if (uproto != IPPROTO_TCP)
+						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
@@ -1583,30 +1886,54 @@ do { \
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
-					if (in6p->in6p_lport ||
-					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
-					{
+					if (in6p->inp_lport ||
+					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
-						in6p->in6p_vflag &= ~INP_IPV4;
+						in6p->inp_vflag &= ~INP_IPV4;
 					else
-						in6p->in6p_vflag |= INP_IPV4;
+						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
-					/* cannot mix with RFC2292 XXX */
+					/* we can mix with RFC2292 */
 					OPTSET(IN6P_TCLASS);
 					break;
+				case IPV6_AUTOFLOWLABEL:
+					OPTSET(IN6P_AUTOFLOWLABEL);
+					break;
+
 				}
 				break;
 
-			case IPV6_PKTINFO:
-			case IPV6_HOPLIMIT:
-			case IPV6_HOPOPTS:
-			case IPV6_DSTOPTS:
-			case IPV6_RTHDR:
+			case IPV6_TCLASS:
+			case IPV6_DONTFRAG:
+			case IPV6_USE_MIN_MTU:
+			case IPV6_PREFER_TEMPADDR:
+				if (optlen != sizeof(optval)) {
+					error = EINVAL;
+					break;
+				}
+				error = sooptcopyin(sopt, &optval,
+					sizeof optval, sizeof optval);
+				if (error)
+					break;
+				{
+					struct ip6_pktopts **optp;
+					optp = &in6p->in6p_outputopts;
+					error = ip6_pcbopt(optname,
+					    (u_char *)&optval, sizeof(optval),
+					    optp, uproto);
+					break;
+				}
+
+			case IPV6_2292PKTINFO:
+			case IPV6_2292HOPLIMIT:
+			case IPV6_2292HOPOPTS:
+			case IPV6_2292DSTOPTS:
+			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
@@ -1617,68 +1944,74 @@ do { \
 				if (error)
 					break;
 				switch (optname) {
-				case IPV6_PKTINFO:
-					OPTSET(IN6P_PKTINFO);
+				case IPV6_2292PKTINFO:
+					OPTSET2292(IN6P_PKTINFO);
 					break;
-				case IPV6_HOPLIMIT:
-					OPTSET(IN6P_HOPLIMIT);
+				case IPV6_2292HOPLIMIT:
+					OPTSET2292(IN6P_HOPLIMIT);
 					break;
-				case IPV6_HOPOPTS:
+				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (!privileged)
 						return(EPERM);
-					OPTSET(IN6P_HOPOPTS);
+					OPTSET2292(IN6P_HOPOPTS);
 					break;
-				case IPV6_DSTOPTS:
+				case IPV6_2292DSTOPTS:
 					if (!privileged)
 						return(EPERM);
-					OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
+					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
-				case IPV6_RTHDR:
-					OPTSET(IN6P_RTHDR);
+				case IPV6_2292RTHDR:
+					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
-#undef OPTSET
+			case IPV6_3542PKTINFO:
+			case IPV6_3542HOPOPTS:
+			case IPV6_3542RTHDR:
+			case IPV6_3542DSTOPTS:
+			case IPV6_RTHDRDSTOPTS:
+			case IPV6_3542NEXTHOP:
+			{
+				/* new advanced API (RFC3542) */
+				struct mbuf *m;
 
-			case IPV6_TCLASS:
-				if (optlen != sizeof(optval)) {
+				/* cannot mix with RFC2292 */
+				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
-				error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
-				if (error)
+				error = soopt_getm(sopt, &m);
+				if (error != 0)
+					break;
+				error = soopt_mcopyin(sopt, m);
+				if (error) {
+					m_freem(m);
 					break;
-				error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), &in6p->in6p_outputopts);
+				}
+				error = ip6_pcbopt(optname, mtod(m, u_char *), 
+					m->m_len, &in6p->in6p_outputopts, uproto);
+				m_freem(m);
 				break;
+			}
+#undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
-			    {
-				struct mbuf *m;
-				if (sopt->sopt_valsize > MLEN) {
-					error = EMSGSIZE;
-					break;
-				}
-				/* XXX */
-				MGET(m, sopt->sopt_p != kernproc ?
-				    M_WAIT : M_DONTWAIT, MT_HEADER);
-				if (m == 0) {
-					error = ENOBUFS;
-					break;
-				}
-				m->m_len = sopt->sopt_valsize;
-				error = sooptcopyin(sopt, mtod(m, char *),
-						    m->m_len, m->m_len);
-				error =	ip6_setmoptions(sopt->sopt_name, in6p, m);
-				(void)m_free(m);
-			    }
+			case IPV6_MSFILTER:
+			case MCAST_BLOCK_SOURCE:
+			case MCAST_UNBLOCK_SOURCE:
+			case MCAST_JOIN_GROUP:
+			case MCAST_LEAVE_GROUP:
+			case MCAST_JOIN_SOURCE_GROUP:
+			case MCAST_LEAVE_SOURCE_GROUP:
+				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
@@ -1689,18 +2022,18 @@ do { \
 
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
-					in6p->in6p_flags &= ~(IN6P_LOWPORT);
-					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
+					in6p->inp_flags &= ~(INP_LOWPORT);
+					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
-					in6p->in6p_flags &= ~(IN6P_LOWPORT);
-					in6p->in6p_flags |= IN6P_HIGHPORT;
+					in6p->inp_flags &= ~(INP_LOWPORT);
+					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
-					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
-					in6p->in6p_flags |= IN6P_LOWPORT;
+					in6p->inp_flags &= ~(INP_HIGHPORT);
+					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
@@ -1716,10 +2049,6 @@ do { \
 				size_t len = 0;
 				struct mbuf *m;
 
-                                if (sopt->sopt_valsize > MCLBYTES) {
-                                        error = EMSGSIZE;
-                                        break;
-                                }
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
@@ -1751,6 +2080,47 @@ do { \
 				break;
 #endif /* IPFIREWALL */
 
+			/*
+			 * IPv6 variant of IP_BOUND_IF; for details see
+			 * comments on IP_BOUND_IF in ip_ctloutput().
+			 */
+			case IPV6_BOUND_IF:
+				/* This option is settable only on IPv6 */
+				if (!(in6p->inp_vflag & INP_IPV6)) {
+					error = EINVAL;
+					break;
+				}
+
+				error = sooptcopyin(sopt, &optval,
+				    sizeof (optval), sizeof (optval));
+
+				if (error)
+					break;
+
+				inp_bindif(in6p, optval);
+				break;
+
+			case IPV6_NO_IFT_CELLULAR:
+				/* This option is settable only for IPv6 */
+				if (!(in6p->inp_vflag & INP_IPV6)) {
+					error = EINVAL;
+					break;
+				}
+
+				error = sooptcopyin(sopt, &optval,
+				    sizeof (optval), sizeof (optval));
+
+				if (error)
+					break;
+
+				error = inp_nocellular(in6p, optval);
+				break;
+
+			case IPV6_OUT_IF:
+				/* This option is not settable */
+				error = EINVAL;
+				break;
+
 			default:
 				error = ENOPROTOOPT;
 				break;
@@ -1760,41 +2130,69 @@ do { \
 		case SOPT_GET:
 			switch (optname) {
 
-			case IPV6_PKTOPTIONS:
-				if (in6p->in6p_options) {
-					struct mbuf *m;
-					m = m_copym(in6p->in6p_options,
-					    0, M_COPYALL, M_WAIT);
-					if (m == NULL) {
-						error = ENOBUFS;
-						break;
-					}
-					error = soopt_mcopyout(sopt, m);
-					if (error == 0)
-						m_freem(m);
-				} else
-					sopt->sopt_valsize = 0;
+			case IPV6_2292PKTOPTIONS:
+				/*
+				 * RFC3542 (effectively) deprecated the
+				 * semantics of the 2292-style pktoptions.
+				 * Since it was not reliable in nature (i.e.,
+				 * applications had to expect the lack of some
+				 * information after all), it would make sense
+				 * to simplify this part by always returning
+				 * empty data.
+				 */
+				sopt->sopt_valsize = 0;
 				break;
 
+			case IPV6_RECVHOPOPTS:
+			case IPV6_RECVDSTOPTS:
+			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
-			case IPV6_CHECKSUM:
+			case IPV6_RECVPKTINFO:
+			case IPV6_RECVHOPLIMIT:
+			case IPV6_RECVRTHDR:
+			case IPV6_RECVPATHMTU:
 
 			case IPV6_FAITH:
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
+			case IPV6_AUTOFLOWLABEL:
 				switch (optname) {
 
+				case IPV6_RECVHOPOPTS:
+					optval = OPTBIT(IN6P_HOPOPTS);
+					break;
+
+				case IPV6_RECVDSTOPTS:
+					optval = OPTBIT(IN6P_DSTOPTS);
+					break;
+
+				case IPV6_RECVRTHDRDSTOPTS:
+					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
+					break;
+
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
-				case IPV6_CHECKSUM:
-					optval = in6p->in6p_cksum;
+				case IPV6_RECVPKTINFO:
+					optval = OPTBIT(IN6P_PKTINFO);
+					break;
+
+				case IPV6_RECVHOPLIMIT:
+					optval = OPTBIT(IN6P_HOPLIMIT);
+					break;
+
+				case IPV6_RECVRTHDR:
+					optval = OPTBIT(IN6P_RTHDR);
+					break;
+
+				case IPV6_RECVPATHMTU:
+					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_FAITH:
-					optval = OPTBIT(IN6P_FAITH);
+					optval = OPTBIT(INP_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
@@ -1804,10 +2202,10 @@ do { \
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
-					flags = in6p->in6p_flags;
-					if (flags & IN6P_HIGHPORT)
+					flags = in6p->inp_flags;
+					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
-					else if (flags & IN6P_LOWPORT)
+					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
@@ -1817,64 +2215,93 @@ do { \
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
+				case IPV6_AUTOFLOWLABEL:
+					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
+					break;
 				}
+				if (error)
+					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
-			case IPV6_PKTINFO:
-			case IPV6_HOPLIMIT:
-			case IPV6_HOPOPTS:
-			case IPV6_RTHDR:
-			case IPV6_DSTOPTS:
-				if ((optname == IPV6_HOPOPTS ||
-				    optname == IPV6_DSTOPTS) &&
-				    !privileged)
-					return(EPERM);
+			case IPV6_PATHMTU:
+			{
+				u_int32_t pmtu = 0;
+				struct ip6_mtuinfo mtuinfo;
+				struct route_in6 sro;
+
+				bzero(&sro, sizeof(sro));
+
+				if (!(so->so_state & SS_ISCONNECTED))
+					return (ENOTCONN);
+				/*
+				 * XXX: we dot not consider the case of source
+				 * routing, or optional information to specify
+				 * the outgoing interface.
+				 */
+				error = ip6_getpmtu(&sro, NULL, NULL,
+				    &in6p->in6p_faddr, &pmtu, NULL);
+				if (sro.ro_rt)
+					rtfree(sro.ro_rt);
+				if (error)
+					break;
+				if (pmtu > IPV6_MAXPACKET)
+					pmtu = IPV6_MAXPACKET;
+
+				bzero(&mtuinfo, sizeof(mtuinfo));
+				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
+				optdata = (void *)&mtuinfo;
+				optdatalen = sizeof(mtuinfo);
+				error = sooptcopyout(sopt, optdata,
+				    optdatalen);
+				break;
+			}
+
+			case IPV6_2292PKTINFO:
+			case IPV6_2292HOPLIMIT:
+			case IPV6_2292HOPOPTS:
+			case IPV6_2292RTHDR:
+			case IPV6_2292DSTOPTS:
 				switch (optname) {
-				case IPV6_PKTINFO:
+				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
-				case IPV6_HOPLIMIT:
+				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
-				case IPV6_HOPOPTS:
-					if (!privileged)
-						return(EPERM);
+				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
-				case IPV6_RTHDR:
+				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
-				case IPV6_DSTOPTS:
-					if (!privileged)
-						return(EPERM);
+				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
-					sizeof optval);
+				    sizeof optval);
 				break;
-
+			case IPV6_PKTINFO:
+			case IPV6_HOPOPTS:
+			case IPV6_RTHDR:
+			case IPV6_DSTOPTS:
+			case IPV6_RTHDRDSTOPTS:
+			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
-				error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt);
+			case IPV6_DONTFRAG:
+			case IPV6_USE_MIN_MTU:
+			case IPV6_PREFER_TEMPADDR:
+				error = ip6_getpcbopt(in6p->in6p_outputopts,
+				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
-			case IPV6_JOIN_GROUP:
-			case IPV6_LEAVE_GROUP:
-			    {
-				struct mbuf *m;
-				error = ip6_getmoptions(sopt->sopt_name,
-						in6p->in6p_moptions, &m);
-				if (error == 0)
-					error = sooptcopyout(sopt,
-						mtod(m, char *), m->m_len);
-				if (m != NULL)
-					m_freem(m);
-			    }
+			case IPV6_MSFILTER:
+				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #if IPSEC
@@ -1885,10 +2312,6 @@ do { \
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 
-                                if (sopt->sopt_valsize > MCLBYTES) {
-                                        error = EMSGSIZE;
-                                        break;
-                                }
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
@@ -1921,6 +2344,26 @@ do { \
 				break;
 #endif /* IPFIREWALL */
 
+			case IPV6_BOUND_IF:
+				if (in6p->inp_flags & INP_BOUND_IF)
+					optval = in6p->inp_boundif;
+				error = sooptcopyout(sopt, &optval,
+				    sizeof (optval));
+				break;
+
+			case IPV6_NO_IFT_CELLULAR:
+				optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
+				    ? 1 : 0;
+				error = sooptcopyout(sopt, &optval,
+				    sizeof (optval));
+				break;
+
+			case IPV6_OUT_IF:
+				optval = in6p->in6p_last_outif;
+				error = sooptcopyout(sopt, &optval,
+				    sizeof (optval));
+				break;
+
 			default:
 				error = ENOPROTOOPT;
 				break;
@@ -1933,35 +2376,105 @@ do { \
 	return(error);
 }
 
-/*
- * Set up IP6 options in pcb for insertion in output packets or
- * specifying behavior of outgoing packets.
- */
-static int
-ip6_pcbopts(
-	struct ip6_pktopts **pktopt,
-	struct mbuf *m,
-	__unused struct socket *so,
-	struct sockopt *sopt)
+int
+ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
-	struct ip6_pktopts *opt = *pktopt;
-	int error = 0, priv;
-	struct proc *p = sopt->sopt_p;
+	int error = 0, optval, optlen;
+	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
+	struct inpcb *in6p = sotoinpcb(so);
+	int level, op, optname;
 
-	/* turn off any old options. */
-	if (opt) {
-#if DIAGNOSTIC
-		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
-		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
-		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
-			printf("ip6_pcbopts: all specified options are cleared.\n");
-#endif
-		ip6_clearpktopts(opt, 1, -1);
-	} else {
-		opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
-		if (opt == NULL)
-			return ENOBUFS;
-	}
+	level = sopt->sopt_level;
+	op = sopt->sopt_dir;
+	optname = sopt->sopt_name;
+	optlen = sopt->sopt_valsize;
+
+	if (level != IPPROTO_IPV6) {
+		return (EINVAL);
+	}
+
+	switch (optname) {
+	case IPV6_CHECKSUM:
+		/*
+		 * For ICMPv6 sockets, no modification allowed for checksum
+		 * offset, permit "no change" values to help existing apps.
+		 *
+		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
+		 * for an ICMPv6 socket will fail."
+		 * The current behavior does not meet RFC3542.
+		 */
+		switch (op) {
+		case SOPT_SET:
+			if (optlen != sizeof(int)) {
+				error = EINVAL;
+				break;
+			}
+			error = sooptcopyin(sopt, &optval, sizeof(optval),
+					    sizeof(optval));
+			if (error)
+				break;
+			if ((optval % 2) != 0) {
+				/* the API assumes even offset values */
+				error = EINVAL;
+			} else if (so->so_proto->pr_protocol ==
+			    IPPROTO_ICMPV6) {
+				if (optval != icmp6off)
+					error = EINVAL;
+			} else
+				in6p->in6p_cksum = optval;
+			break;
+
+		case SOPT_GET:
+			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
+				optval = icmp6off;
+			else
+				optval = in6p->in6p_cksum;
+
+			error = sooptcopyout(sopt, &optval, sizeof(optval));
+			break;
+
+		default:
+			error = EINVAL;
+			break;
+		}
+		break;
+
+	default:
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Set up IP6 options in pcb for insertion in output packets or
+ * specifying behavior of outgoing packets.
+ */
+static int
+ip6_pcbopts(
+	struct ip6_pktopts **pktopt,
+	struct mbuf *m,
+	__unused struct socket *so,
+	__unused struct sockopt *sopt)
+{
+	struct ip6_pktopts *opt = *pktopt;
+	int error = 0;
+
+	/* turn off any old options. */
+	if (opt) {
+#if DIAGNOSTIC
+		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
+		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
+		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
+			printf("ip6_pcbopts: all specified options are cleared.\n");
+#endif
+		ip6_clearpktopts(opt, -1);
+	} else {
+		opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
+		if (opt == NULL)
+			return ENOBUFS;
+	}
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
@@ -1974,11 +2487,9 @@ ip6_pcbopts(
 		return(0);
 	}
 
-	priv = (proc_suser(p) == 0);
-
 	/*  set options specified by user. */
-	if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) {
-		ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */
+	if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) {
+		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		FREE(opt, M_IP6OPT);
 		return(error);
 	}
@@ -1986,19 +2497,36 @@ ip6_pcbopts(
 	return(0);
 }
 
+/*
+ * initialize ip6_pktopts.  beware that there are non-zero default values in
+ * the struct.
+ */
+void
+ip6_initpktopts(struct ip6_pktopts *opt)
+{
+
+	bzero(opt, sizeof(*opt));
+	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
+	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
+	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
+	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
+}
+
 static int
-ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt)
+ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	opt = *pktopt;
 	if (opt == NULL) {
 		opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
+		if (opt == NULL)
+			return(ENOBUFS);
 		ip6_initpktopts(opt);
 		*pktopt = opt;
 	}
 
-	return (ip6_setpktopt(optname, buf, len, opt));
+	return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
 }
 
 static int
@@ -2006,15 +2534,85 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
-	int deftclass = 0;
+	struct ip6_ext *ip6e;
 	int error = 0;
+	struct in6_pktinfo null_pktinfo;
+	int deftclass = 0, on;
+	int defminmtu = IP6PO_MINMTU_MCASTONLY;
+	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
+	case IPV6_PKTINFO:
+		if (pktopt && pktopt->ip6po_pktinfo)
+			optdata = (void *)pktopt->ip6po_pktinfo;
+		else {
+			/* XXX: we don't have to do this every time... */
+			bzero(&null_pktinfo, sizeof(null_pktinfo));
+			optdata = (void *)&null_pktinfo;
+		}
+		optdatalen = sizeof(struct in6_pktinfo);
+		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
-			optdata = &pktopt->ip6po_tclass;
+			optdata = (void *)&pktopt->ip6po_tclass;
+		else
+			optdata = (void *)&deftclass;
+		optdatalen = sizeof(int);
+		break;
+	case IPV6_HOPOPTS:
+		if (pktopt && pktopt->ip6po_hbh) {
+			optdata = (void *)pktopt->ip6po_hbh;
+			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
+			optdatalen = (ip6e->ip6e_len + 1) << 3;
+		}
+		break;
+	case IPV6_RTHDR:
+		if (pktopt && pktopt->ip6po_rthdr) {
+			optdata = (void *)pktopt->ip6po_rthdr;
+			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
+			optdatalen = (ip6e->ip6e_len + 1) << 3;
+		}
+		break;
+	case IPV6_RTHDRDSTOPTS:
+		if (pktopt && pktopt->ip6po_dest1) {
+			optdata = (void *)pktopt->ip6po_dest1;
+			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
+			optdatalen = (ip6e->ip6e_len + 1) << 3;
+		}
+		break;
+	case IPV6_DSTOPTS:
+		if (pktopt && pktopt->ip6po_dest2) {
+			optdata = (void *)pktopt->ip6po_dest2;
+			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
+			optdatalen = (ip6e->ip6e_len + 1) << 3;
+		}
+		break;
+	case IPV6_NEXTHOP:
+		if (pktopt && pktopt->ip6po_nexthop) {
+			optdata = (void *)pktopt->ip6po_nexthop;
+			optdatalen = pktopt->ip6po_nexthop->sa_len;
+		}
+		break;
+	case IPV6_USE_MIN_MTU:
+		if (pktopt)
+			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
-			optdata = &deftclass;
+			optdata = (void *)&defminmtu;
+		optdatalen = sizeof(int);
+		break;
+	case IPV6_DONTFRAG:
+		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
+			on = 1;
+		else
+			on = 0;
+		optdata = (void *)&on;
+		optdatalen = sizeof(on);
+		break;
+	case IPV6_PREFER_TEMPADDR:
+		if (pktopt)
+			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
+		else
+			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
@@ -2025,81 +2623,48 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
-	return (error);
-}
-
-static int
-ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt)
-{
-	switch (optname) {
-	case IPV6_TCLASS:
-	{
-		int tclass;
 
-		if (len != sizeof(int))
-			return (EINVAL);
-		tclass = *(int *)buf;
-		if (tclass < -1 || tclass > 255)
-			return (EINVAL);
-
-		opt->ip6po_tclass = tclass;
-		break;
-	}
-
-	default:
-		return (ENOPROTOOPT);
-	} /* end of switch */
-
-	return (0);
-}
-
-/*
- * initialize ip6_pktopts.  beware that there are non-zero default values in
- * the struct.
- */
-void
-ip6_initpktopts(opt)
-	struct ip6_pktopts *opt;
-{
-	bzero(opt, sizeof(*opt));
-	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
-	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
+	return (error);
 }
 
 void
-ip6_clearpktopts(pktopt, needfree, optname)
+ip6_clearpktopts(pktopt, optname)
 	struct ip6_pktopts *pktopt;
-	int needfree, optname;
+	int optname;
 {
 	if (pktopt == NULL)
 		return;
 
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_pktinfo)
+	if (optname == -1 || optname == IPV6_PKTINFO) {
+		if (pktopt->ip6po_pktinfo)
 			FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
-	if (optname == -1)
+	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
-	if (optname == -1)
+	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_nexthop)
+	if (optname == -1 || optname == IPV6_NEXTHOP) {
+		if (pktopt->ip6po_nextroute.ro_rt) {
+			rtfree(pktopt->ip6po_nextroute.ro_rt);
+			pktopt->ip6po_nextroute.ro_rt = NULL;
+		}
+		if (pktopt->ip6po_nexthop)
 			FREE(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_hbh)
+	if (optname == -1 || optname == IPV6_HOPOPTS) {
+		if (pktopt->ip6po_hbh)
 			FREE(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_dest1)
+	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
+		if (pktopt->ip6po_dest1)
 			FREE(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
+	if (optname == -1 || optname == IPV6_RTHDR) {
+		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
@@ -2107,8 +2672,8 @@ ip6_clearpktopts(pktopt, needfree, optname)
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
-	if (optname == -1) {
-		if (needfree && pktopt->ip6po_dest2)
+	if (optname == -1 || optname == IPV6_DSTOPTS) {
+		if (pktopt->ip6po_dest2)
 			FREE(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
@@ -2126,25 +2691,17 @@ do {\
 	}\
 } while (0)
 
-struct ip6_pktopts *
-ip6_copypktopts(src, canwait)
-	struct ip6_pktopts *src;
-	int canwait;
+static int
+copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
-	struct ip6_pktopts *dst;
-
-	if (src == NULL) {
+	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
-		return(NULL);
+		return (EINVAL);
 	}
 
-	dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
-	if (dst == NULL && canwait == M_NOWAIT)
-		return (NULL);
-	bzero(dst, sizeof(*dst));
-
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
+	dst->ip6po_flags = src->ip6po_flags;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
 					    M_IP6OPT, canwait);
@@ -2164,20 +2721,33 @@ ip6_copypktopts(src, canwait)
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
-	return(dst);
+	return (0);
 
   bad:
-	if (dst->ip6po_pktinfo) FREE(dst->ip6po_pktinfo, M_IP6OPT);
-	if (dst->ip6po_nexthop) FREE(dst->ip6po_nexthop, M_IP6OPT);
-	if (dst->ip6po_hbh) FREE(dst->ip6po_hbh, M_IP6OPT);
-	if (dst->ip6po_dest1) FREE(dst->ip6po_dest1, M_IP6OPT);
-	if (dst->ip6po_dest2) FREE(dst->ip6po_dest2, M_IP6OPT);
-	if (dst->ip6po_rthdr) FREE(dst->ip6po_rthdr, M_IP6OPT);
-	FREE(dst, M_IP6OPT);
-	return(NULL);
+	ip6_clearpktopts(dst, -1);
+	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
+struct ip6_pktopts *
+ip6_copypktopts(struct ip6_pktopts *src, int canwait)
+{
+	int error;
+	struct ip6_pktopts *dst;
+
+	dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
+	if (dst == NULL)
+		return (NULL);
+	ip6_initpktopts(dst);
+
+	if ((error = copypktopts(dst, src, canwait)) != 0) {
+		FREE(dst, M_IP6OPT);
+		return (NULL);
+	}
+
+	return (dst);
+}
+
 void
 ip6_freepcbopts(pktopt)
 	struct ip6_pktopts *pktopt;
@@ -2185,707 +2755,596 @@ ip6_freepcbopts(pktopt)
 	if (pktopt == NULL)
 		return;
 
-	ip6_clearpktopts(pktopt, 1, -1);
+	ip6_clearpktopts(pktopt, -1);
 
 	FREE(pktopt, M_IP6OPT);
 }
 
-/*
- * Set the IP6 multicast options in response to user setsockopt().
- */
-static int
-ip6_setmoptions(
-	int optname,
-	struct inpcb* in6p,
-	struct mbuf *m)
+void
+ip6_moptions_init(void)
 {
-	int error = 0;
-	u_int loop, ifindex;
-	struct ipv6_mreq *mreq;
-	struct ifnet *ifp;
-	struct ip6_moptions **im6op = &in6p->in6p_moptions;
-	struct ip6_moptions *im6o = *im6op;
-	struct ip_moptions *imo;
-	struct route_in6 ro;
-	struct sockaddr_in6 *dst;
-	struct in6_multi_mship *imm;
-
-	if (im6o == NULL) {
-		/*
-		 * No multicast option buffer attached to the pcb;
-		 * allocate one and initialize to default values.
-		 */
-		im6o = (struct ip6_moptions *)
-			_MALLOC(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
-
-		if (im6o == NULL)
-			return(ENOBUFS);
-		*im6op = im6o;
-		im6o->im6o_multicast_ifp = NULL;
-		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
-		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
-		LIST_INIT(&im6o->im6o_memberships);
-	}
-	
-	if (in6p->inp_moptions == NULL) {
-		/*
-		 * No IPv4 multicast option buffer attached to the pcb;
-		 * call ip_createmoptions to allocate one and initialize
-		 * to default values.
-		 */
-		error = ip_createmoptions(&in6p->inp_moptions);
-		if (error != 0)
-			return error;
-	}
-	imo = in6p->inp_moptions;
-
-	switch (optname) {
-
-	case IPV6_MULTICAST_IF:
-		/*
-		 * Select the interface for outgoing multicast packets.
-		 */
-		if (m == NULL || m->m_len != sizeof(u_int)) {
-			error = EINVAL;
-			break;
-		}
-		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
-
-		ifnet_head_lock_shared();
-		/* Don't need to check is ifindex is < 0 since it's unsigned */
-		if (if_index < ifindex) {
-			error = ENXIO;	/* XXX EINVAL? */
-			ifnet_head_done();
-			break;
-		}
-		ifp = ifindex2ifnet[ifindex];
-		ifnet_head_done();
-		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		im6o->im6o_multicast_ifp = ifp;
-		imo->imo_multicast_ifp = ifp;
-		break;
-
-	case IPV6_MULTICAST_HOPS:
-	    {
-		/*
-		 * Set the IP6 hoplimit for outgoing multicast packets.
-		 */
-		int optval;
-		if (m == NULL || m->m_len != sizeof(int)) {
-			error = EINVAL;
-			break;
-		}
-		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
-		if (optval < -1 || optval >= 256)
-			error = EINVAL;
-		else if (optval == -1) {
-			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
-			imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
-		} else {
-			im6o->im6o_multicast_hlim = optval;
-			imo->imo_multicast_ttl = optval;
-		}
-		break;
-	    }
-
-	case IPV6_MULTICAST_LOOP:
-		/*
-		 * Set the loopback flag for outgoing multicast packets.
-		 * Must be zero or one.
-		 */
-		if (m == NULL || m->m_len != sizeof(u_int)) {
-			error = EINVAL;
-			break;
-		}
-		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
-		if (loop > 1) {
-			error = EINVAL;
-			break;
-		}
-		im6o->im6o_multicast_loop = loop;
-		imo->imo_multicast_loop = loop;
-		break;
-
-	case IPV6_JOIN_GROUP:
-		/*
-		 * Add a multicast group membership.
-		 * Group must be a valid IP6 multicast address.
-		 */
-		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
-			error = EINVAL;
-			break;
-		}
-		mreq = mtod(m, struct ipv6_mreq *);
-		/*
-		 * If the interface is specified, validate it.
-		 *
-		 * Don't need to check if it's < 0, since it's unsigned
-		 */
-		ifnet_head_lock_shared();
-		if (if_index < mreq->ipv6mr_interface) {
-			ifnet_head_done();
-			error = ENXIO;	/* XXX EINVAL? */
-			break;
-		}
-		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
-		ifnet_head_done();
-
-		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
-			/*
-			 * We use the unspecified address to specify to accept
-			 * all multicast addresses. Only super user is allowed
-			 * to do this.
-			 */
-			if (suser(kauth_cred_get(), 0))
-			{
-				error = EACCES;
-				break;
-			}
-		} else if (IN6_IS_ADDR_V4MAPPED(&mreq->ipv6mr_multiaddr)) {
-			struct ip_mreq v4req;
-			
-			v4req.imr_multiaddr.s_addr = mreq->ipv6mr_multiaddr.s6_addr32[3];
-			v4req.imr_interface.s_addr = INADDR_ANY;
-			
-			/* Find an IPv4 address on the specified interface. */
-			if (mreq->ipv6mr_interface != 0) {
-				struct in_ifaddr *ifa;
-
-				lck_rw_lock_shared(in_ifaddr_rwlock);
-				TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) {
-					if (ifa->ia_ifp == ifp) {
-						v4req.imr_interface = IA_SIN(ifa)->sin_addr;
-						break;
-					}
-				}
-				lck_rw_done(in_ifaddr_rwlock);
-				
-				if (v4req.imr_multiaddr.s_addr == 0) {
-					/* Interface has no IPv4 address. */
-					error = EINVAL;
-					break;
-				}
-			}
-			
-			error = ip_addmembership(imo, &v4req);
-			break;
-		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
-			error = EINVAL;
-			break;
-		}
-		/*
-		 * If no interface was explicitly specified, choose an
-		 * appropriate one according to the given multicast address.
-		 */
-		if (mreq->ipv6mr_interface == 0) {
-			/*
-			 * If the multicast address is in node-local scope,
-			 * the interface should be a loopback interface.
-			 * Otherwise, look up the routing table for the
-			 * address, and choose the outgoing interface.
-			 *   XXX: is it a good approach?
-			 */
-			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
-				ifp = lo_ifp;
-			} else {
-				ro.ro_rt = NULL;
-				dst = (struct sockaddr_in6 *)&ro.ro_dst;
-				bzero(dst, sizeof(*dst));
-				dst->sin6_len = sizeof(struct sockaddr_in6);
-				dst->sin6_family = AF_INET6;
-				dst->sin6_addr = mreq->ipv6mr_multiaddr;
-				rtalloc((struct route *)&ro);
-				if (ro.ro_rt == NULL) {
-					error = EADDRNOTAVAIL;
-					break;
-				}
-				ifp = ro.ro_rt->rt_ifp;
-				rtfree(ro.ro_rt);
-				ro.ro_rt = NULL;
-			}
-		}
+	PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
 
-		/*
-		 * See if we found an interface, and confirm that it
-		 * supports multicast
-		 */
-		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
-			error = EADDRNOTAVAIL;
-			break;
-		}
-		/*
-		 * Put interface index into the multicast address,
-		 * if the address has link-local scope.
-		 */
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
-			mreq->ipv6mr_multiaddr.s6_addr16[1]
-				= htons(mreq->ipv6mr_interface);
-		}
-		/*
-		 * See if the membership already exists.
-		 */
-		lck_mtx_lock(nd6_mutex);
-		for (imm = im6o->im6o_memberships.lh_first;
-		     imm != NULL; imm = imm->i6mm_chain.le_next)
-			if (imm->i6mm_maddr->in6m_ifp == ifp &&
-			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
-					       &mreq->ipv6mr_multiaddr))
-				break;
-		if (imm != NULL) {
-			error = EADDRINUSE;
-			lck_mtx_unlock(nd6_mutex);
-			break;
-		}
-		/*
-		 * Everything looks good; add a new record to the multicast
-		 * address list for the given interface.
-		 */
-		imm = _MALLOC(sizeof(*imm), M_IPMADDR, M_WAITOK);
-		if (imm == NULL) {
-			error = ENOBUFS;
-			lck_mtx_unlock(nd6_mutex);
-			break;
-		}
-		if ((imm->i6mm_maddr =
-		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error, 1)) == NULL) {
-			FREE(imm, M_IPMADDR);
-			lck_mtx_unlock(nd6_mutex);
-			break;
-		}
-		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
-		lck_mtx_unlock(nd6_mutex);
-		break;
-
-	case IPV6_LEAVE_GROUP:
-		/*
-		 * Drop a multicast group membership.
-		 * Group must be a valid IP6 multicast address.
-		 */
-		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
-			error = EINVAL;
-			break;
-		}
-		mreq = mtod(m, struct ipv6_mreq *);
-		/*
-		 * If an interface address was specified, get a pointer
-		 * to its ifnet structure.
-		 *
-		 * Don't need to check if it's < 0, since it's unsigned.
-		 */
-		ifnet_head_lock_shared();
-		if (if_index < mreq->ipv6mr_interface) {
-			ifnet_head_done();
-			error = ENXIO;	/* XXX EINVAL? */
-			break;
-		}
-		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
-		ifnet_head_done();
-		
-		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
-			if (suser(kauth_cred_get(), 0)) {
-				error = EACCES;
-				break;
-			}
-		} else if (IN6_IS_ADDR_V4MAPPED(&mreq->ipv6mr_multiaddr)) {
-			struct ip_mreq v4req;
-			
-			v4req.imr_multiaddr.s_addr = mreq->ipv6mr_multiaddr.s6_addr32[3];
-			v4req.imr_interface.s_addr = INADDR_ANY;
-			
-			if (ifp != NULL) {
-				struct in_ifaddr *ifa;
-				
-				lck_rw_lock_shared(in_ifaddr_rwlock);
-				TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) {
-					if (ifa->ia_ifp == ifp) {
-						v4req.imr_interface = IA_SIN(ifa)->sin_addr;
-						break;
-					}
-				}
-				lck_rw_done(in_ifaddr_rwlock);
-			}
-			
-			error = ip_dropmembership(imo, &v4req);
-			break;
-		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
-			error = EINVAL;
-			break;
-		}
-		/*
-		 * Put interface index into the multicast address,
-		 * if the address has link-local scope.
-		 */
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
-			mreq->ipv6mr_multiaddr.s6_addr16[1]
-				= htons(mreq->ipv6mr_interface);
-		}
-		/*
-		 * Find the membership in the membership list.
-		 */
-		lck_mtx_lock(nd6_mutex);
-		for (imm = im6o->im6o_memberships.lh_first;
-		     imm != NULL; imm = imm->i6mm_chain.le_next) {
-			if ((ifp == NULL ||
-			     imm->i6mm_maddr->in6m_ifp == ifp) &&
-			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
-					       &mreq->ipv6mr_multiaddr))
-				break;
-		}
-		if (imm == NULL) {
-			/* Unable to resolve interface */
-			error = EADDRNOTAVAIL;
-			lck_mtx_unlock(nd6_mutex);
-			break;
-		}
-		/*
-		 * Give up the multicast address record to which the
-		 * membership points.
-		 */
-		LIST_REMOVE(imm, i6mm_chain);
-		in6_delmulti(imm->i6mm_maddr, 1);
-		lck_mtx_unlock(nd6_mutex);
-		FREE(imm, M_IPMADDR);
-		break;
-
-	default:
-		error = EOPNOTSUPP;
-		break;
+	im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
+	    sizeof (struct ip6_moptions_dbg);
+
+	im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
+	    IM6O_ZONE_NAME);
+	if (im6o_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
+		/* NOTREACHED */
 	}
+	zone_change(im6o_zone, Z_EXPAND, TRUE);
+}
 
-	/*
-	 * If all options have default values, no need to keep the mbuf.
-	 */
-	lck_mtx_lock(nd6_mutex);
-	if (im6o->im6o_multicast_ifp == NULL &&
-	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
-	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
-	    im6o->im6o_memberships.lh_first == NULL) {
-		FREE(*im6op, M_IPMOPTS);
-		*im6op = NULL;
-	}
-	if (imo->imo_multicast_ifp == NULL &&
-	    imo->imo_multicast_vif == -1 &&
-	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
-	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
-	    imo->imo_num_memberships == 0) {
-		ip_freemoptions(imo);
-		in6p->inp_moptions = 0;
-	}
-	lck_mtx_unlock(nd6_mutex);
+void
+im6o_addref(struct ip6_moptions *im6o, int locked)
+{
+	if (!locked)
+		IM6O_LOCK(im6o);
+	else
+		IM6O_LOCK_ASSERT_HELD(im6o);
 
-	return(error);
+	if (++im6o->im6o_refcnt == 0) {
+		panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
+		/* NOTREACHED */
+	} else if (im6o->im6o_trace != NULL) {
+		(*im6o->im6o_trace)(im6o, TRUE);
+	}
+
+	if (!locked)
+		IM6O_UNLOCK(im6o);
 }
 
-/*
- * Return the IP6 multicast options in response to user getsockopt().
- */
-static int
-ip6_getmoptions(optname, im6o, mp)
-	int optname;
-	struct ip6_moptions *im6o;
-	struct mbuf **mp;
+void
+im6o_remref(struct ip6_moptions *im6o)
 {
-	u_int *hlim, *loop, *ifindex;
+	int i;
 
-	*mp = m_get(M_WAIT, MT_HEADER);		/*XXX*/
-	if (*mp == NULL)
-		return ENOBUFS;
+	IM6O_LOCK(im6o);
+	if (im6o->im6o_refcnt == 0) {
+		panic("%s: im6o %p negative refcnt", __func__, im6o);
+		/* NOTREACHED */
+	} else if (im6o->im6o_trace != NULL) {
+		(*im6o->im6o_trace)(im6o, FALSE);
+	}
 
-	switch (optname) {
+	--im6o->im6o_refcnt;
+	if (im6o->im6o_refcnt > 0) {
+		IM6O_UNLOCK(im6o);
+		return;
+	}
 
-	case IPV6_MULTICAST_IF:
-		ifindex = mtod(*mp, u_int *);
-		(*mp)->m_len = sizeof(u_int);
-		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
-			*ifindex = 0;
-		else
-			*ifindex = im6o->im6o_multicast_ifp->if_index;
-		return(0);
+	for (i = 0; i < im6o->im6o_num_memberships; ++i) {
+		struct in6_mfilter *imf;
 
-	case IPV6_MULTICAST_HOPS:
-		hlim = mtod(*mp, u_int *);
-		(*mp)->m_len = sizeof(u_int);
-		if (im6o == NULL)
-			*hlim = ip6_defmcasthlim;
-		else
-			*hlim = im6o->im6o_multicast_hlim;
-		return(0);
+		imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
+		if (imf != NULL)
+			im6f_leave(imf);
 
-	case IPV6_MULTICAST_LOOP:
-		loop = mtod(*mp, u_int *);
-		(*mp)->m_len = sizeof(u_int);
-		if (im6o == NULL)
-			*loop = ip6_defmcasthlim;
-		else
-			*loop = im6o->im6o_multicast_loop;
-		return(0);
+		(void) in6_mc_leave(im6o->im6o_membership[i], imf);
 
-	default:
-		return(EOPNOTSUPP);
+		if (imf != NULL)
+			im6f_purge(imf);
+
+		IN6M_REMREF(im6o->im6o_membership[i]);
+		im6o->im6o_membership[i] = NULL;
+	}
+	im6o->im6o_num_memberships = 0;
+	if (im6o->im6o_mfilters != NULL) {
+		FREE(im6o->im6o_mfilters, M_IN6MFILTER);
+		im6o->im6o_mfilters = NULL;
+	}
+	if (im6o->im6o_membership != NULL) {
+		FREE(im6o->im6o_membership, M_IP6MOPTS);
+		im6o->im6o_membership = NULL;
+	}
+	IM6O_UNLOCK(im6o);
+
+	lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
+
+	if (!(im6o->im6o_debug & IFD_ALLOC)) {
+		panic("%s: im6o %p cannot be freed", __func__, im6o);
+		/* NOTREACHED */
 	}
+	zfree(im6o_zone, im6o);
 }
 
-/*
- * Discard the IP6 multicast options.
- */
-void
-ip6_freemoptions(im6o)
-	struct ip6_moptions *im6o;
+static void
+im6o_trace(struct ip6_moptions *im6o, int refhold)
 {
-	struct in6_multi_mship *imm;
+	struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
 
-	if (im6o == NULL)
-		return;
-	
-	lck_mtx_lock(nd6_mutex);
-	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
-		LIST_REMOVE(imm, i6mm_chain);
-		if (imm->i6mm_maddr)
-			in6_delmulti(imm->i6mm_maddr, 1);
-		FREE(imm, M_IPMADDR);
-	}
-	lck_mtx_unlock(nd6_mutex);
-	FREE(im6o, M_IPMOPTS);
+	if (!(im6o->im6o_debug & IFD_DEBUG)) {
+		panic("%s: im6o %p has no debug structure", __func__, im6o);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &im6o_dbg->im6o_refhold_cnt;
+		tr = im6o_dbg->im6o_refhold;
+	} else {
+		cnt = &im6o_dbg->im6o_refrele_cnt;
+		tr = im6o_dbg->im6o_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
+struct ip6_moptions *
+ip6_allocmoptions(int how)
+{
+	struct ip6_moptions *im6o;
+
+	im6o = (how == M_WAITOK) ?
+	    zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
+	if (im6o != NULL) {
+		bzero(im6o, im6o_size);
+		lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
+		im6o->im6o_debug |= IFD_ALLOC;
+		if (im6o_debug != 0) {
+			im6o->im6o_debug |= IFD_DEBUG;
+			im6o->im6o_trace = im6o_trace;
+		}
+		IM6O_ADDREF(im6o);
+	}
+
+	return (im6o);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
-ip6_setpktoptions(control, opt, priv, needcopy)
-	struct mbuf *control;
-	struct ip6_pktopts *opt;
-	int priv, needcopy;
+ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
+    struct ip6_pktopts *stickyopt, int uproto)
 {
 	struct cmsghdr *cm = 0;
 
-	if (control == 0 || opt == 0)
-		return(EINVAL);
+	if (control == NULL || opt == NULL)
+		return (EINVAL);
 
 	ip6_initpktopts(opt);
+	if (stickyopt) {
+		int error;
+
+		/*
+		 * If stickyopt is provided, make a local copy of the options
+		 * for this particular packet, then override them by ancillary
+		 * objects.
+		 * XXX: copypktopts() does not copy the cached route to a next
+		 * hop (if any).  This is not very good in terms of efficiency,
+		 * but we can allow this since this option should be rarely
+		 * used.
+		 */
+		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
+			return (error);
+	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
-		return(EINVAL);
+		return (EINVAL);
 
-	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
-		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
-		cm = mtod(control, struct cmsghdr *);
-		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
-			return(EINVAL);
+	if (control->m_len < CMSG_LEN(0))
+		return (EINVAL);
+
+	for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
+		int error;
+
+		if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
+			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
+		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
+		    cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
+		if (error)
+			return (error);
+	}
+
+	return (0);
+}
+/*
+ * Set a particular packet option, as a sticky option or an ancillary data
+ * item.  "len" can be 0 only when it's a sticky option.
+ * We have 4 cases of combination of "sticky" and "cmsg":
+ * "sticky=0, cmsg=0": impossible
+ * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
+ * "sticky=1, cmsg=0": RFC3542 socket option
+ * "sticky=1, cmsg=1": RFC2292 socket option
+ */
+static int
+ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
+    int sticky, int cmsg, int uproto)
+{
+	int minmtupolicy, preftemp;
+	int error;
+
+	if (!sticky && !cmsg) {
+#ifdef DIAGNOSTIC
+		printf("ip6_setpktopt: impossible case\n");
+#endif
+		return (EINVAL);
+	}
+
+	/*
+	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
+	 * not be specified in the context of RFC3542.  Conversely,
+	 * RFC3542 types should not be specified in the context of RFC2292.
+	 */
+	if (!cmsg) {
+		switch (optname) {
+		case IPV6_2292PKTINFO:
+		case IPV6_2292HOPLIMIT:
+		case IPV6_2292NEXTHOP:
+		case IPV6_2292HOPOPTS:
+		case IPV6_2292DSTOPTS:
+		case IPV6_2292RTHDR:
+		case IPV6_2292PKTOPTIONS:
+			return (ENOPROTOOPT);
+		}
+	}
+	if (sticky && cmsg) {
+		switch (optname) {
+		case IPV6_PKTINFO:
+		case IPV6_HOPLIMIT:
+		case IPV6_NEXTHOP:
+		case IPV6_HOPOPTS:
+		case IPV6_DSTOPTS:
+		case IPV6_RTHDRDSTOPTS:
+		case IPV6_RTHDR:
+		case IPV6_USE_MIN_MTU:
+		case IPV6_DONTFRAG:
+		case IPV6_TCLASS:
+		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
+			return (ENOPROTOOPT);
+		}
+	}
+
+	switch (optname) {
+	case IPV6_2292PKTINFO:
+	case IPV6_PKTINFO:
+	{
+		struct ifnet *ifp = NULL;
+		struct in6_pktinfo *pktinfo;
+
+		if (len != sizeof(struct in6_pktinfo))
+			return (EINVAL);
+
+		pktinfo = (struct in6_pktinfo *)buf;
+
 		/*
-		 * XXX should check if RFC2292 API is mixed with 2292bis API
+		 * An application can clear any sticky IPV6_PKTINFO option by
+		 * doing a "regular" setsockopt with ipi6_addr being
+		 * in6addr_any and ipi6_ifindex being zero.
+		 * [RFC 3542, Section 6]
 		 */
-		switch (cm->cmsg_type) {
-		case IPV6_PKTINFO:
-			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
-				return(EINVAL);
-			if (needcopy) {
-				/* XXX: Is it really WAITOK? */
-				opt->ip6po_pktinfo =
-					_MALLOC(sizeof(struct in6_pktinfo),
-					       M_IP6OPT, M_WAITOK);
-				if (opt->ip6po_pktinfo == NULL)
-					return ENOBUFS;
-				bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo,
-				    sizeof(struct in6_pktinfo));
-			} else
-				opt->ip6po_pktinfo =
-					(struct in6_pktinfo *)CMSG_DATA(cm);
-			if (opt->ip6po_pktinfo->ipi6_ifindex &&
-			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
-				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
-					htons(opt->ip6po_pktinfo->ipi6_ifindex);
-
-			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index) {
-				return(ENXIO);
-			}
+		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
+		    pktinfo->ipi6_ifindex == 0 &&
+		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
+			ip6_clearpktopts(opt, optname);
+			break;
+		}
 
-			/*
-			 * Check if the requested source address is indeed a
-			 * unicast address assigned to the node, and can be
-			 * used as the packet's source address.
-			 */
-			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
-				struct in6_ifaddr *ia6;
-				struct sockaddr_in6 sin6;
-
-				bzero(&sin6, sizeof(sin6));
-				sin6.sin6_len = sizeof(sin6);
-				sin6.sin6_family = AF_INET6;
-				sin6.sin6_addr =
-					opt->ip6po_pktinfo->ipi6_addr;
-				ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
-				if (ia6 == NULL ||
-				    (ia6->ia6_flags & (IN6_IFF_ANYCAST |
-						       IN6_IFF_NOTREADY)) != 0) {
-					if (ia6) ifafree(&ia6->ia_ifa);
-					return(EADDRNOTAVAIL);
-				}
-				ifafree(&ia6->ia_ifa);
-				ia6 = NULL;
+		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
+		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
+			return (EINVAL);
+		}
+
+		/* validate the interface index if specified. */
+		ifnet_head_lock_shared();
+
+		if (pktinfo->ipi6_ifindex > if_index) {
+			ifnet_head_done();
+			return (ENXIO);
+		}
+		
+		if (pktinfo->ipi6_ifindex) {
+			ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
+			if (ifp == NULL) {
+				ifnet_head_done();
+				return (ENXIO);
 			}
-			break;
+		}
+		
+		ifnet_head_done();
 
-		case IPV6_HOPLIMIT:
-			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
-				return(EINVAL);
+		/*
+		 * We store the address anyway, and let in6_selectsrc()
+		 * validate the specified address.  This is because ipi6_addr
+		 * may not have enough information about its scope zone, and
+		 * we may need additional information (such as outgoing
+		 * interface or the scope zone of a destination address) to
+		 * disambiguate the scope.
+		 * XXX: the delay of the validation may confuse the
+		 * application when it is used as a sticky option.
+		 */
+		if (opt->ip6po_pktinfo == NULL) {
+			opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
+			    M_IP6OPT, M_NOWAIT);
+			if (opt->ip6po_pktinfo == NULL)
+				return (ENOBUFS);
+		}
+		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
+		break;
+	}
 
-			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
-			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
-				return(EINVAL);
-			break;
+	case IPV6_2292HOPLIMIT:
+	case IPV6_HOPLIMIT:
+	{
+		int *hlimp;
 
-		case IPV6_TCLASS:
-			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
-				return(EINVAL);
+		/*
+		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
+		 * to simplify the ordering among hoplimit options.
+		 */
+		if (optname == IPV6_HOPLIMIT && sticky)
+			return (ENOPROTOOPT);
 
-			opt->ip6po_tclass = *(int *)CMSG_DATA(cm);
-			if (opt->ip6po_tclass < -1 || opt->ip6po_tclass > 255)
-				return (EINVAL);
-			break;
+		if (len != sizeof(int))
+			return (EINVAL);
+		hlimp = (int *)buf;
+		if (*hlimp < -1 || *hlimp > 255)
+			return (EINVAL);
 
-		case IPV6_NEXTHOP:
-			if (!priv)
-				return(EPERM);
-
-			if (cm->cmsg_len < sizeof(u_char) ||
-			    /* check if cmsg_len is large enough for sa_len */
-			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
-				return(EINVAL);
-
-			if (needcopy) {
-				opt->ip6po_nexthop =
-					_MALLOC(*CMSG_DATA(cm),
-					       M_IP6OPT, M_WAITOK);
-				if (opt->ip6po_nexthop == NULL)
-					return ENOBUFS;
-				bcopy(CMSG_DATA(cm),
-				      opt->ip6po_nexthop,
-				      *CMSG_DATA(cm));
-			} else
-				opt->ip6po_nexthop =
-					(struct sockaddr *)CMSG_DATA(cm);
+		opt->ip6po_hlim = *hlimp;
+		break;
+	}
+
+	case IPV6_TCLASS:
+	{
+		int tclass;
+
+		if (len != sizeof(int))
+			return (EINVAL);
+		tclass = *(int *)buf;
+		if (tclass < -1 || tclass > 255)
+			return (EINVAL);
+
+		opt->ip6po_tclass = tclass;
+		break;
+	}
+
+	case IPV6_2292NEXTHOP:
+	case IPV6_NEXTHOP:
+		error = suser(kauth_cred_get(), 0);
+		if (error)
+			return (EACCES);
+
+		if (len == 0) {	/* just remove the option */
+			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
+		}
 
-		case IPV6_HOPOPTS:
+		/* check if cmsg_len is large enough for sa_len */
+		if (len < sizeof(struct sockaddr) || len < *buf)
+			return (EINVAL);
+
+		switch (((struct sockaddr *)buf)->sa_family) {
+		case AF_INET6:
 		{
-			struct ip6_hbh *hbh;
-			int hbhlen;
-
-			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
-				return(EINVAL);
-			hbh = (struct ip6_hbh *)CMSG_DATA(cm);
-			hbhlen = (hbh->ip6h_len + 1) << 3;
-			if (cm->cmsg_len != CMSG_LEN(hbhlen))
-				return(EINVAL);
-
-			if (needcopy) {
-				opt->ip6po_hbh =
-					_MALLOC(hbhlen, M_IP6OPT, M_WAITOK);
-				if (opt->ip6po_hbh == NULL)
-					return ENOBUFS;
-				bcopy(hbh, opt->ip6po_hbh, hbhlen);
-			} else
-				opt->ip6po_hbh = hbh;
+			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
+
+			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
+				return (EINVAL);
+
+			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
+			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
+				return (EINVAL);
+			}
+			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
+			    != 0) {
+				return (error);
+			}
 			break;
 		}
+		case AF_LINK:	/* should eventually be supported */
+		default:
+			return (EAFNOSUPPORT);
+		}
 
-		case IPV6_DSTOPTS:
-		{
-			struct ip6_dest *dest, **newdest;
-			int destlen;
-
-			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
-				return(EINVAL);
-			dest = (struct ip6_dest *)CMSG_DATA(cm);
-			destlen = (dest->ip6d_len + 1) << 3;
-			if (cm->cmsg_len != CMSG_LEN(destlen))
-				return(EINVAL);
-
-			/* 
-			 * The old advacned API is ambiguous on this
-			 * point. Our approach is to determine the
-			 * position based according to the existence
-			 * of a routing header. Note, however, that
-			 * this depends on the order of the extension
-			 * headers in the ancillary data; the 1st part
-			 * of the destination options header must
-			 * appear before the routing header in the
-			 * ancillary data, too.
-			 * RFC2292bis solved the ambiguity by
-			 * introducing separate cmsg types.
+		/* turn off the previous option, then set the new option. */
+		ip6_clearpktopts(opt, IPV6_NEXTHOP);
+		opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
+		if (opt->ip6po_nexthop == NULL)
+			return (ENOBUFS);
+		bcopy(buf, opt->ip6po_nexthop, *buf);
+		break;
+
+	case IPV6_2292HOPOPTS:
+	case IPV6_HOPOPTS:
+	{
+		struct ip6_hbh *hbh;
+		int hbhlen;
+
+		/*
+		 * XXX: We don't allow a non-privileged user to set ANY HbH
+		 * options, since per-option restriction has too much
+		 * overhead.
+		 */
+		error = suser(kauth_cred_get(), 0);
+		if (error)
+			return (EACCES);
+
+		if (len == 0) {
+			ip6_clearpktopts(opt, IPV6_HOPOPTS);
+			break;	/* just remove the option */
+		}
+
+		/* message length validation */
+		if (len < sizeof(struct ip6_hbh))
+			return (EINVAL);
+		hbh = (struct ip6_hbh *)buf;
+		hbhlen = (hbh->ip6h_len + 1) << 3;
+		if (len != hbhlen)
+			return (EINVAL);
+
+		/* turn off the previous option, then set the new option. */
+		ip6_clearpktopts(opt, IPV6_HOPOPTS);
+		opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
+		if (opt->ip6po_hbh == NULL)
+			return (ENOBUFS);
+		bcopy(hbh, opt->ip6po_hbh, hbhlen);
+
+		break;
+	}
+
+	case IPV6_2292DSTOPTS:
+	case IPV6_DSTOPTS:
+	case IPV6_RTHDRDSTOPTS:
+	{
+		struct ip6_dest *dest, **newdest = NULL;
+		int destlen;
+
+		error = suser(kauth_cred_get(), 0);
+		if (error)
+			return (EACCES);
+
+		if (len == 0) {
+			ip6_clearpktopts(opt, optname);
+			break;	/* just remove the option */
+		}
+
+		/* message length validation */
+		if (len < sizeof(struct ip6_dest))
+			return (EINVAL);
+		dest = (struct ip6_dest *)buf;
+		destlen = (dest->ip6d_len + 1) << 3;
+		if (len != destlen)
+			return (EINVAL);
+
+		/*
+		 * Determine the position that the destination options header
+		 * should be inserted; before or after the routing header.
+		 */
+		switch (optname) {
+		case IPV6_2292DSTOPTS:
+			/*
+			 * The old advacned API is ambiguous on this point.
+			 * Our approach is to determine the position based
+			 * according to the existence of a routing header.
+			 * Note, however, that this depends on the order of the
+			 * extension headers in the ancillary data; the 1st
+			 * part of the destination options header must appear
+			 * before the routing header in the ancillary data,
+			 * too.
+			 * RFC3542 solved the ambiguity by introducing
+			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
-
-			if (needcopy) {
-				*newdest = _MALLOC(destlen, M_IP6OPT, M_WAITOK);
-				if (*newdest == NULL)
-					return ENOBUFS;
-				bcopy(dest, *newdest, destlen);
-			} else
-				*newdest = dest;
-
+			break;
+		case IPV6_RTHDRDSTOPTS:
+			newdest = &opt->ip6po_dest1;
+			break;
+		case IPV6_DSTOPTS:
+			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
-		case IPV6_RTHDR:
-		{
-			struct ip6_rthdr *rth;
-			int rthlen;
-
-			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
-				return(EINVAL);
-			rth = (struct ip6_rthdr *)CMSG_DATA(cm);
-			rthlen = (rth->ip6r_len + 1) << 3;
-			if (cm->cmsg_len != CMSG_LEN(rthlen))
-				return(EINVAL);
-
-			switch (rth->ip6r_type) {
-			case IPV6_RTHDR_TYPE_0:
-				/* must contain one addr */
-				if (rth->ip6r_len == 0)
-					return(EINVAL);
-				/* length must be even */
-				if (rth->ip6r_len % 2)
-					return(EINVAL);
-				if (rth->ip6r_len / 2 != rth->ip6r_segleft)
-					return(EINVAL);
-				break;
-			default:
-				return(EINVAL);	/* not supported */
-			}
+		/* turn off the previous option, then set the new option. */
+		ip6_clearpktopts(opt, optname);
+		*newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
+		if (*newdest == NULL)
+			return (ENOBUFS);
+		bcopy(dest, *newdest, destlen);
 
-			if (needcopy) {
-				opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT,
-							  M_WAITOK);
-				if (opt->ip6po_rthdr == NULL)
-					return ENOBUFS;
-				bcopy(rth, opt->ip6po_rthdr, rthlen);
-			} else
-				opt->ip6po_rthdr = rth;
+		break;
+	}
 
-			break;
+	case IPV6_2292RTHDR:
+	case IPV6_RTHDR:
+	{
+		struct ip6_rthdr *rth;
+		int rthlen;
+
+		if (len == 0) {
+			ip6_clearpktopts(opt, IPV6_RTHDR);
+			break;	/* just remove the option */
 		}
 
+		/* message length validation */
+		if (len < sizeof(struct ip6_rthdr))
+			return (EINVAL);
+		rth = (struct ip6_rthdr *)buf;
+		rthlen = (rth->ip6r_len + 1) << 3;
+		if (len != rthlen)
+			return (EINVAL);
+
+		switch (rth->ip6r_type) {
+		case IPV6_RTHDR_TYPE_0:
+			if (rth->ip6r_len == 0)	/* must contain one addr */
+				return (EINVAL);
+			if (rth->ip6r_len % 2) /* length must be even */
+				return (EINVAL);
+			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
+				return (EINVAL);
+			break;
 		default:
-			return(ENOPROTOOPT);
+			return (EINVAL);	/* not supported */
 		}
+
+		/* turn off the previous option */
+		ip6_clearpktopts(opt, IPV6_RTHDR);
+		opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
+		if (opt->ip6po_rthdr == NULL)
+			return (ENOBUFS);
+		bcopy(rth, opt->ip6po_rthdr, rthlen);
+
+		break;
 	}
 
-	return(0);
+	case IPV6_USE_MIN_MTU:
+		if (len != sizeof(int))
+			return (EINVAL);
+		minmtupolicy = *(int *)buf;
+		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
+		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
+		    minmtupolicy != IP6PO_MINMTU_ALL) {
+			return (EINVAL);
+		}
+		opt->ip6po_minmtu = minmtupolicy;
+		break;
+
+	case IPV6_DONTFRAG:
+		if (len != sizeof(int))
+			return (EINVAL);
+
+		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
+			/*
+			 * we ignore this option for TCP sockets.
+			 * (RFC3542 leaves this case unspecified.)
+			 */
+			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
+		} else
+			opt->ip6po_flags |= IP6PO_DONTFRAG;
+		break;
+
+	case IPV6_PREFER_TEMPADDR:
+		if (len != sizeof(int))
+			return (EINVAL);
+		preftemp = *(int *)buf;
+		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
+		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
+		    preftemp != IP6PO_TEMPADDR_PREFER) {
+			return (EINVAL);
+		}
+		opt->ip6po_prefer_tempaddr = preftemp;
+		break;
+
+	default:
+		return (ENOPROTOOPT);
+	} /* end of switch */
+
+	return (0);
 }
 
 /*
@@ -2927,28 +3386,28 @@ ip6_mloopback(
 #endif
 
 	ip6 = mtod(copym, struct ip6_hdr *);
-#ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
-#endif
 
 #ifdef __APPLE__
 
 	/* Makes sure the HW checksum flags are cleaned before sending the packet */
 
+	if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
+		in6_delayed_cksum(copym, sizeof(struct ip6_hdr));
+		copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
+	}
 	copym->m_pkthdr.rcvif = 0;
 	copym->m_pkthdr.csum_data = 0;
 	copym->m_pkthdr.csum_flags = 0;
 
 	if (lo_ifp) {
 		copym->m_pkthdr.rcvif = ifp;
-		lck_mtx_unlock(ip6_mutex);
 		dlil_output(lo_ifp, PF_INET6, copym, 0, (struct sockaddr *)dst, 0);
-		lck_mtx_lock(ip6_mutex);
 	} else
 		m_free(copym);
 #else
@@ -3002,7 +3461,7 @@ ip6_optlen(in6p)
 
 	len = 0;
 #define elen(x) \
-    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
+	(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
@@ -3013,4 +3472,3 @@ ip6_optlen(in6p)
 	return len;
 #undef elen
 }
-
diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h
index 9aa8e0e3f..acb9c3857 100644
--- a/bsd/netinet6/ip6_var.h
+++ b/bsd/netinet6/ip6_var.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -97,37 +97,29 @@
 #define _NETINET6_IP6_VAR_H_
 #include <sys/appleapiopts.h>
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * IP6 reassembly queue structure.  Each fragment
  * being reassembled is attached to one of these structures.
  */
 struct	ip6q {
-	u_int32_t	ip6q_head;
-	u_int16_t	ip6q_len;
-	u_int8_t	ip6q_nxt;	/* ip6f_nxt in first fragment */
-	u_int8_t	ip6q_hlim;
 	struct ip6asfrag *ip6q_down;
 	struct ip6asfrag *ip6q_up;
 	u_int32_t	ip6q_ident;
-	u_int8_t	ip6q_arrive;
+	u_int8_t	ip6q_nxt;
+	u_int8_t	ip6q_ecn;
 	u_int8_t	ip6q_ttl;
-	struct in6_addr	ip6q_src, ip6q_dst;
+	struct in6_addr ip6q_src, ip6q_dst;
 	struct ip6q	*ip6q_next;
 	struct ip6q	*ip6q_prev;
 	int		ip6q_unfrglen;	/* len of unfragmentable part */
 #if notyet
 	u_char	*ip6q_nxtp;
 #endif
-	int			ip6q_nfrag;     /* number of fragments */
+	int		ip6q_nfrag;	/* # of fragments */
 };
 
 struct	ip6asfrag {
-	u_int32_t	ip6af_head;
-	u_int16_t	ip6af_len;
-	u_int8_t	ip6af_nxt;
-	u_int8_t	ip6af_hlim;
-	/* must not override the above members during reassembling */
 	struct ip6asfrag *ip6af_down;
 	struct ip6asfrag *ip6af_up;
 	struct mbuf	*ip6af_m;
@@ -140,12 +132,49 @@ struct	ip6asfrag {
 #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
 
 struct	ip6_moptions {
+	decl_lck_mtx_data(, im6o_lock);
+	uint32_t im6o_refcnt;		/* ref count */
+	uint32_t im6o_debug;		/* see ifa_debug flags */
 	struct	ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */
 	u_char	im6o_multicast_hlim;	/* hoplimit for outgoing multicasts */
 	u_char	im6o_multicast_loop;	/* 1 >= hear sends if a member */
-	LIST_HEAD(, in6_multi_mship) im6o_memberships;
+	u_short	im6o_num_memberships;	/* no. memberships this socket */
+	u_short	im6o_max_memberships;	/* max memberships this socket */
+	struct	in6_multi **im6o_membership;	/* group memberships */
+	struct	in6_mfilter *im6o_mfilters;	/* source filters */
+	void (*im6o_trace)		/* callback fn for tracing refs */
+	    (struct ip6_moptions *, int);
 };
 
+#define	IM6O_LOCK_ASSERT_HELD(_im6o)					\
+	lck_mtx_assert(&(_im6o)->im6o_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	IM6O_LOCK_ASSERT_NOTHELD(_im6o)					\
+	lck_mtx_assert(&(_im6o)->im6o_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	IM6O_LOCK(_im6o)						\
+	lck_mtx_lock(&(_im6o)->im6o_lock)
+
+#define	IM6O_LOCK_SPIN(_im6o)						\
+	lck_mtx_lock_spin(&(_im6o)->im6o_lock)
+
+#define	IM6O_CONVERT_LOCK(_im6o) do {					\
+	IM6O_LOCK_ASSERT_HELD(_im6o);					\
+	lck_mtx_convert_spin(&(_im6o)->im6o_lock);			\
+} while (0)
+
+#define	IM6O_UNLOCK(_im6o)						\
+	lck_mtx_unlock(&(_im6o)->im6o_lock)
+
+#define	IM6O_ADDREF(_im6o)						\
+	im6o_addref(_im6o, 0)
+
+#define	IM6O_ADDREF_LOCKED(_im6o)					\
+	im6o_addref(_im6o, 1)
+
+#define	IM6O_REMREF(_im6o)						\
+	im6o_remref(_im6o)
+
 /*
  * Control options for outgoing packets
  */
@@ -158,6 +187,14 @@ struct	ip6po_rhinfo {
 #define ip6po_rthdr	ip6po_rhinfo.ip6po_rhi_rthdr
 #define ip6po_route	ip6po_rhinfo.ip6po_rhi_route
 
+/* Nexthop related info */
+struct	ip6po_nhinfo {
+	struct	sockaddr *ip6po_nhi_nexthop;
+	struct	route_in6 ip6po_nhi_route; /* Route to the nexthop */
+};
+#define ip6po_nexthop	ip6po_nhinfo.ip6po_nhi_nexthop
+#define ip6po_nextroute	ip6po_nhinfo.ip6po_nhi_route
+
 struct	ip6_pktopts {
 	struct	mbuf *ip6po_m;	/* Pointer to mbuf storing the data */
 	int	ip6po_hlim;	/* Hoplimit for outgoing packets */
@@ -165,8 +202,9 @@ struct	ip6_pktopts {
 	/* Outgoing IF/address information */
 	struct	in6_pktinfo *ip6po_pktinfo;
 
-	struct	sockaddr *ip6po_nexthop; /* Next-hop address */
-	
+	/* Next-hop address information */
+	struct	ip6po_nhinfo ip6po_nhinfo;
+
 	struct	ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */
 
 	/* Destination options header (before a routing header) */
@@ -178,13 +216,32 @@ struct	ip6_pktopts {
 	/* Destination options header (after a routing header) */
 	struct	ip6_dest *ip6po_dest2;
 
-	int     ip6po_tclass;   /* traffic class */
+	int	ip6po_tclass;	/* traffic class */
+
+	int	ip6po_minmtu;  /* fragment vs PMTU discovery policy */
+#define IP6PO_MINMTU_MCASTONLY	-1 /* default; send at min MTU for multicast*/
+#define IP6PO_MINMTU_DISABLE	 0 /* always perform pmtu disc */
+#define IP6PO_MINMTU_ALL	 1 /* always send at min MTU */
+
+	int	ip6po_prefer_tempaddr;  /* whether temporary addresses are
+					   preferred as source address */
+#define IP6PO_TEMPADDR_SYSTEM	-1 /* follow the system default */
+#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */
+#define IP6PO_TEMPADDR_PREFER	 1 /* prefer temporary address */
+
+	int ip6po_flags;
+#if 0	/* parameters in this block is obsolete. do not reuse the values. */
+#define IP6PO_REACHCONF	0x01	/* upper-layer reachability confirmation. */
+#define IP6PO_MINMTU	0x02	/* use minimum MTU (IPV6_USE_MIN_MTU) */
+#endif
+#define IP6PO_DONTFRAG	0x04	/* disable fragmentation (IPV6_DONTFRAG) */
+#define IP6PO_USECOA	0x08	/* use care of address */
 };
 
 /*
  * Control options for incoming packets
  */
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct	ip6stat {
 	u_quad_t ip6s_total;		/* total packets received */
@@ -201,7 +258,7 @@ struct	ip6stat {
 	u_quad_t ip6s_localout;		/* total ip packets generated here */
 	u_quad_t ip6s_odropped;		/* lost packets due to nobufs, etc. */
 	u_quad_t ip6s_reassembled;	/* total packets reassembled ok */
-	u_quad_t ip6s_fragmented;	/* datagrams sucessfully fragmented */
+	u_quad_t ip6s_fragmented;	/* datagrams successfully fragmented */
 	u_quad_t ip6s_ofragments;	/* output fragments created */
 	u_quad_t ip6s_cantfrag;		/* don't fragment flag was set, etc. */
 	u_quad_t ip6s_badoptions;	/* error in option processing */
@@ -240,11 +297,17 @@ struct	ip6stat {
 	 * from the destination is chosen.
 	 */
 	u_quad_t ip6s_sources_otherscope[16];
-	/* number of times that an deprecated address is chosen */
+	/* number of times that a deprecated address is chosen */
 	u_quad_t ip6s_sources_deprecated[16];
 
 	u_quad_t ip6s_forward_cachehit;
 	u_quad_t ip6s_forward_cachemiss;
+
+	/* number of times that each rule of source selection is applied. */
+	u_quad_t ip6s_sources_rule[16];
+#ifdef PRIVATE
+	u_quad_t ip6s_pktdropcntrl;	/* pkt dropped, no mbufs for control data */
+#endif /* PRIVATE */
 };
 
 #ifdef KERNEL_PRIVATE
@@ -279,12 +342,28 @@ struct ip6aux {
 };
 
 /* flags passed to ip6_output as last parameter */
-#define	IPV6_DADOUTPUT		0x01	/* DAD */
+#define	IPV6_UNSPECSRC		0x01	/* allow :: as the source address */
 #define	IPV6_FORWARDING		0x02	/* most of IPv6 header exists */
 #define	IPV6_MINMTU		0x04	/* use minimum MTU (IPV6_USE_MIN_MTU) */
+#define	IPV6_FLAG_NOSRCIFSEL	0x80	/* bypas source address selection */
+#define	IPV6_OUTARGS		0x100	/* has ancillary output info */
+
+#ifdef __NO_STRICT_ALIGNMENT
+#define IP6_HDR_ALIGNED_P(ip)	1
+#else
+#define IP6_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
+#endif
+
+/*
+ * Extra information passed to ip6_output when IP6_OUTARGS is set.
+ */
+struct ip6_out_args {
+	unsigned int	ip6oa_boundif;	/* bound outgoing interface */
+	unsigned int	ip6oa_nocell;	/* don't use IFT_CELLULAR */
+};
 
 extern struct	ip6stat ip6stat;	/* statistics */
-extern u_int32_t ip6_id;		/* fragment identifier */
+extern u_int32_t ip6_id; 		/* fragment identifier */
 extern int	ip6_defhlim;		/* default hop limit */
 extern int	ip6_defmcasthlim;	/* default multicast hop limit */
 extern int	ip6_forwarding;		/* act as router? */
@@ -293,7 +372,8 @@ extern int	ip6_gif_hlim;		/* Hop limit for gif encap packet */
 extern int	ip6_use_deprecated;	/* allow deprecated addr as source */
 extern int	ip6_rr_prune;		/* router renumbering prefix
 					 * walk list every 5 sec.    */
-#define ip6_mapped_addr_on	(!ip6_v6only)
+extern int	ip6_mcast_pmtu;		/* enable pMTU discovery for multicast? */
+#define ip6_mapped_addr_on      (!ip6_v6only)
 extern int	ip6_v6only;
 
 extern int	ip6_neighborgcthresh;	/* Threshold # of NDP entries for GC */
@@ -304,8 +384,8 @@ extern int	ip6_maxdynroutes;	/* Max # of routes created via redirect */
 extern struct socket *ip6_mrouter; 	/* multicast routing daemon */
 #endif
 extern int	ip6_sendredirects;	/* send IP redirects when forwarding? */
-extern int	ip6_maxfragpackets; 	/* Maximum packets in reassembly queue */
-extern int      ip6_maxfrags;   	/* Maximum fragments in reassembly queue */
+extern int	ip6_maxfragpackets; /* Maximum packets in reassembly queue */
+extern int	ip6_maxfrags;	/* Maximum fragments in reassembly queue */
 extern int	ip6_sourcecheck;	/* Verify source interface */
 extern int	ip6_sourcecheck_interval; /* Interval between log messages */
 extern int	ip6_accept_rtadv;	/* Acts as a host not a router */
@@ -314,6 +394,7 @@ extern int	ip6_log_interval;
 extern time_t	ip6_log_time;
 extern int	ip6_hdrnestlimit; /* upper limit of # of extension headers */
 extern int	ip6_dad_count;		/* DupAddrDetectionTransmits */
+extern int	ip6_only_allow_rfc4193_prefix;	/* RFC4193 Unique Local Unicast Prefixes only */
 
 extern u_int32_t ip6_flow_seq;
 extern int ip6_auto_flowlabel;
@@ -325,9 +406,16 @@ extern int   ip6_lowportmin;		/* minimum reserved port */
 extern int   ip6_lowportmax;		/* maximum reserved port */
 
 extern int	ip6_use_tempaddr; /* whether to use temporary addresses. */
+extern int	ip6_prefer_tempaddr; /* whether to prefer temporary addresses
+					in the source address selection */
+extern int	ip6_use_defzone; /* whether to use the default scope zone
+				    when unspecified */
 
 extern struct	pr_usrreqs rip6_usrreqs;
 extern struct   pr_usrreqs icmp6_dgram_usrreqs;
+
+extern int	ip6_doscopedroute;
+
 struct sockopt;
 
 struct inpcb;
@@ -340,51 +428,71 @@ int 	icmp6_dgram_attach(struct socket *, int , struct proc *);
 
 struct in6_ifaddr;
 void	ip6_init(void);
+void ip6_fin(void);
 void	ip6_input(struct mbuf *);
 struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *);
 void	ip6_freepcbopts(struct ip6_pktopts *);
-void	ip6_freemoptions(struct ip6_moptions *);
-int	ip6_unknown_opt(u_int8_t *, struct mbuf *, int, int);
+int	ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
 char *	ip6_get_prevhdr(struct mbuf *, int);
 int	ip6_nexthdr(struct mbuf *, int, int, int *);
 int	ip6_lasthdr(struct mbuf *, int, int, int *);
 
+extern void ip6_moptions_init(void);
+extern struct ip6_moptions *ip6_allocmoptions(int);
+extern void im6o_addref(struct ip6_moptions *, int);
+extern void im6o_remref(struct ip6_moptions *);
+
 struct ip6aux *ip6_addaux(struct mbuf *);
 struct ip6aux *ip6_findaux(struct mbuf *);
 void	ip6_delaux(struct mbuf *);
+extern void ip6_destroyaux(struct ip6aux *);
+extern void ip6_copyaux(struct ip6aux *, struct ip6aux *);
 
 int	ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int	ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *,
 				 u_int32_t *);
-void	ip6_savecontrol(struct inpcb *, struct mbuf **, struct ip6_hdr *,
-			     struct mbuf *);
-void	ip6_forward(struct mbuf *, struct route_in6 *, int, int);
-
+struct mbuf	**ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
+	    struct mbuf **, int *);
+int	ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
+void	ip6_forward(struct mbuf *, struct route_in6 *, int);
+void	ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *,
+			     u_int32_t *));
 void	ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *);
-int	ip6_output(struct mbuf *, struct ip6_pktopts *,
-			struct route_in6 *,
-			int,
-			struct ip6_moptions *, struct ifnet **, int locked);
+int	ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *,
+	    int, struct ip6_moptions *, struct ifnet **,
+	    struct ip6_out_args *);
 int	ip6_ctloutput(struct socket *, struct sockopt *sopt);
 void	ip6_initpktopts(struct ip6_pktopts *);
 int	ip6_setpktoptions(struct mbuf *, struct ip6_pktopts *, int, int);
-void	ip6_clearpktopts(struct ip6_pktopts *, int, int);
+void	ip6_clearpktopts(struct ip6_pktopts *, int);
 struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
 int	ip6_optlen(struct inpcb *);
 
-int	route6_input(struct mbuf **, int *);
+int	route6_input(struct mbuf **, int *, int);
 
 void	frag6_init(void);
-int	frag6_input(struct mbuf **, int *);
+int	frag6_input(struct mbuf **, int *, int);
 void	frag6_slowtimo(void);
 void	frag6_drain(void);
 
-int	rip6_input(struct mbuf **mp, int *offset);
+int	rip6_input(struct mbuf **, int *, int);
 void	rip6_ctlinput(int, struct sockaddr *, void *);
 int	rip6_ctloutput(struct socket *so, struct sockopt *sopt);
-int	rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *);
+int	rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *, int);
+
+int	dest6_input(struct mbuf **, int *, int);
+extern struct in6_addr *in6_selectsrc(struct sockaddr_in6 *,
+	    struct ip6_pktopts *, struct inpcb *, struct route_in6 *,
+	    struct ifnet **, struct in6_addr *, unsigned int, int *);
+extern struct in6_addrpolicy *
+	in6_addrsel_lookup_policy(struct sockaddr_in6 *);
+int in6_selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
+	struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
+	struct ifnet **, struct rtentry **, int, unsigned int, unsigned int);
+int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, int uproto);
+u_int32_t ip6_randomid(void);
+u_int32_t ip6_randomflowlabel(void);
 
-int	dest6_input(struct mbuf **, int *);
 #endif /* KERNEL */
 #endif /* KERNEL_PRIVATE */
 
diff --git a/bsd/netinet6/ip6protosw.h b/bsd/netinet6/ip6protosw.h
index 303f61964..dbadffc81 100644
--- a/bsd/netinet6/ip6protosw.h
+++ b/bsd/netinet6/ip6protosw.h
@@ -1,6 +1,6 @@
 /*	$FreeBSD: src/sys/netinet6/ip6protosw.h,v 1.2.2.3 2001/07/03 11:01:54 ume Exp $	*/
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -122,7 +122,7 @@ struct pr_usrreqs;
  * argument type for the last arg of pr_ctlinput().
  * should be consulted only with AF_INET6 family.
  *
- * IPv6 ICMP IPv6 [exthdrs] finalhdr paylaod
+ * IPv6 ICMP IPv6 [exthdrs] finalhdr payload
  * ^    ^    ^              ^
  * |    |    ip6c_ip6       ip6c_off
  * |    ip6c_icmp6
@@ -157,7 +157,7 @@ struct ip6protosw {
 	short	pr_protocol;		/* protocol number */
         unsigned int pr_flags;          /* see below */
 /* protocol-protocol hooks */
-	int	(*pr_input)(struct mbuf **, int *);
+	int	(*pr_input)(struct mbuf **, int *, int);
 					/* input to protocol (from below) */
 	int	(*pr_output)(struct mbuf *m, struct socket *so,
 				     struct sockaddr_in6 *, struct mbuf *);
@@ -173,8 +173,12 @@ struct ip6protosw {
 
 /* utility hooks */
 	void	(*pr_init)(void);	/* initialization hook */
+#if __APPLE__
+	void	(*pr_unused)(void);	/* placeholder - fasttimo is removed */
+#else
 	void	(*pr_fasttimo)(void);
 					/* fast timeout (200ms) */
+#endif
 	void	(*pr_slowtimo)(void);
 					/* slow timeout (500ms) */
 	void	(*pr_drain)(void);
diff --git a/bsd/netinet6/ipcomp6.h b/bsd/netinet6/ipcomp6.h
index 8fd6fdba9..2bd7b6678 100644
--- a/bsd/netinet6/ipcomp6.h
+++ b/bsd/netinet6/ipcomp6.h
@@ -40,7 +40,7 @@
 #include <netinet6/ipsec.h>
 
 #ifdef KERNEL_PRIVATE
-extern int ipcomp6_input(struct mbuf **, int *);
+extern int ipcomp6_input(struct mbuf **, int *, int);
 extern int ipcomp6_output(struct mbuf *, u_char *, struct mbuf *,
 	struct secasvar *);
 #endif /* KERNEL_PRIVATE */
diff --git a/bsd/netinet6/ipcomp_input.c b/bsd/netinet6/ipcomp_input.c
index c9473dd8b..3c6a9a43d 100644
--- a/bsd/netinet6/ipcomp_input.c
+++ b/bsd/netinet6/ipcomp_input.c
@@ -78,6 +78,7 @@
 #include <netkey/keydb.h>
 
 #include <net/net_osdep.h>
+#include <mach/sdt.h>
 
 #define IPLEN_FLIPPED
 
@@ -214,6 +215,11 @@ ipcomp4_input(struct mbuf *m, int off)
 			IPSEC_STAT_INCREMENT(ipsecstat.in_polvio);
 			goto fail;
 		}
+
+		DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL,
+                        struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif,
+                        struct ip *, ip, struct ip6_hdr *, NULL);
+
 		ip_proto_dispatch_in(m, off, nxt, 0);
 	} else
 		m_freem(m);
@@ -233,10 +239,9 @@ fail:
 
 #if INET6
 int
-ipcomp6_input(mp, offp)
-	struct mbuf **mp;
-	int *offp;
+ipcomp6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m, *md;
 	int off;
 	struct ip6_hdr *ip6;
diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c
index 6a7da3d2b..91fd6db6d 100644
--- a/bsd/netinet6/ipsec.c
+++ b/bsd/netinet6/ipsec.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -143,7 +143,6 @@ int ipsec_debug = 0;
 #define DBG_FNC_IPSEC_OUT		NETDBG_CODE(DBG_NETIPSEC, (3 << 8))
 
 extern lck_mtx_t *sadb_mutex;
-extern lck_mtx_t *ip6_mutex;
 
 struct ipsecstat ipsecstat;
 int ip4_ah_cleartos = 1;
@@ -169,33 +168,33 @@ SYSCTL_DECL(_net_inet6_ipsec6);
 #endif
 /* net.inet.ipsec */
 SYSCTL_STRUCT(_net_inet_ipsec, IPSECCTL_STATS,
-	stats, CTLFLAG_RD,	&ipsecstat,	ipsecstat, "");
-SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, CTLTYPE_INT|CTLFLAG_RW,
+	stats, CTLFLAG_RD | CTLFLAG_LOCKED,	&ipsecstat,	ipsecstat, "");
+SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED,
 	&ip4_def_policy.policy,	0, &sysctl_def_policy, "I", "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
-	CTLFLAG_RW, &ip4_esp_trans_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip4_esp_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
-	CTLFLAG_RW, &ip4_esp_net_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip4_esp_net_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
-	CTLFLAG_RW, &ip4_ah_trans_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip4_ah_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
-	CTLFLAG_RW, &ip4_ah_net_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip4_ah_net_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS,
-	ah_cleartos, CTLFLAG_RW,	&ip4_ah_cleartos,	0, "");
+	ah_cleartos, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip4_ah_cleartos,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK,
-	ah_offsetmask, CTLFLAG_RW,	&ip4_ah_offsetmask,	0, "");
+	ah_offsetmask, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip4_ah_offsetmask,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT,
-	dfbit, CTLFLAG_RW,	&ip4_ipsec_dfbit,	0, "");
+	dfbit, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip4_ipsec_dfbit,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN,
-	ecn, CTLFLAG_RW,	&ip4_ipsec_ecn,	0, "");
+	ecn, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip4_ipsec_ecn,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG,
-	debug, CTLFLAG_RW,	&ipsec_debug,	0, "");
+	debug, CTLFLAG_RW | CTLFLAG_LOCKED,	&ipsec_debug,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ESP_RANDPAD,
-	esp_randpad, CTLFLAG_RW,	&ip4_esp_randpad,	0, "");
+	esp_randpad, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip4_esp_randpad,	0, "");
 
 /* for performance, we bypass ipsec until a security policy is set */
 int ipsec_bypass = 1;
-SYSCTL_INT(_net_inet_ipsec, OID_AUTO, bypass, CTLFLAG_RD, &ipsec_bypass,0, "");
+SYSCTL_INT(_net_inet_ipsec, OID_AUTO, bypass, CTLFLAG_RD | CTLFLAG_LOCKED, &ipsec_bypass,0, "");
 
 /*
  * NAT Traversal requires a UDP port for encapsulation,
@@ -204,7 +203,7 @@ SYSCTL_INT(_net_inet_ipsec, OID_AUTO, bypass, CTLFLAG_RD, &ipsec_bypass,0, "");
  * for nat traversal.
  */
 SYSCTL_INT(_net_inet_ipsec, OID_AUTO, esp_port,
-		   CTLFLAG_RW, &esp_udp_encap_port, 0, "");
+		   CTLFLAG_RW | CTLFLAG_LOCKED, &esp_udp_encap_port, 0, "");
 
 #if INET6
 struct ipsecstat ipsec6stat;
@@ -218,23 +217,23 @@ int ip6_esp_randpad = -1;
 
 /* net.inet6.ipsec6 */
 SYSCTL_STRUCT(_net_inet6_ipsec6, IPSECCTL_STATS,
-	stats, CTLFLAG_RD, &ipsec6stat, ipsecstat, "");
+	stats, CTLFLAG_RD | CTLFLAG_LOCKED, &ipsec6stat, ipsecstat, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY,
-	def_policy, CTLFLAG_RW,	&ip6_def_policy.policy,	0, "");
+	def_policy, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_def_policy.policy,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
-	CTLFLAG_RW, &ip6_esp_trans_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_esp_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
-	CTLFLAG_RW, &ip6_esp_net_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_esp_net_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
-	CTLFLAG_RW, &ip6_ah_trans_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_ah_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
-	CTLFLAG_RW, &ip6_ah_net_deflev,	0, "");
+	CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_ah_net_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN,
-	ecn, CTLFLAG_RW,	&ip6_ipsec_ecn,	0, "");
+	ecn, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_ipsec_ecn,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG,
-	debug, CTLFLAG_RW,	&ipsec_debug,	0, "");
+	debug, CTLFLAG_RW | CTLFLAG_LOCKED,	&ipsec_debug,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD,
-	esp_randpad, CTLFLAG_RW,	&ip6_esp_randpad,	0, "");
+	esp_randpad, CTLFLAG_RW | CTLFLAG_LOCKED,	&ip6_esp_randpad,	0, "");
 #endif /* INET6 */
 
 static int ipsec_setspidx_mbuf(struct secpolicyindex *, u_int, u_int,
@@ -1717,7 +1716,7 @@ ipsec_get_reqlevel(isr)
 		? (ipsec_debug						      \
 			? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\
 				(lev), IPSEC_LEVEL_REQUIRE)		      \
-			: 0),						      \
+			: (void)0),									  \
 			(lev) = IPSEC_LEVEL_REQUIRE,			      \
 			(lev)						      \
 		: (lev))
@@ -2961,13 +2960,19 @@ ipsec4_output(
 			}
 			ip = mtod(state->m, struct ip *);
 
+			// grab sadb_mutex, before updating sah's route cache
+			lck_mtx_lock(sadb_mutex);
 			state->ro = &sav->sah->sa_route;
 			state->dst = (struct sockaddr *)&state->ro->ro_dst;
 			dst4 = (struct sockaddr_in *)state->dst;
+			if (state->ro->ro_rt != NULL) {
+				RT_LOCK(state->ro->ro_rt);
+			}
 			if (state->ro->ro_rt != NULL &&
 			    (state->ro->ro_rt->generation_id != route_generation ||
 			    !(state->ro->ro_rt->rt_flags & RTF_UP) ||
 			    dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+				RT_UNLOCK(state->ro->ro_rt);
 				rtfree(state->ro->ro_rt);
 				state->ro->ro_rt = NULL;
 			}
@@ -2976,11 +2981,14 @@ ipsec4_output(
 				dst4->sin_len = sizeof(*dst4);
 				dst4->sin_addr = ip->ip_dst;
 				rtalloc(state->ro);
-			}
-			if (state->ro->ro_rt == 0) {
-				OSAddAtomic(1, &ipstat.ips_noroute);
-				error = EHOSTUNREACH;
-				goto bad;
+				if (state->ro->ro_rt == 0) {
+					OSAddAtomic(1, &ipstat.ips_noroute);
+					error = EHOSTUNREACH;
+					// release sadb_mutex, after updating sah's route cache
+					lck_mtx_unlock(sadb_mutex);
+					goto bad;
+				}
+				RT_LOCK(state->ro->ro_rt);
 			}
 
 			/*
@@ -2996,6 +3004,9 @@ ipsec4_output(
 				state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
 				dst4 = (struct sockaddr_in *)state->dst;
 			}
+			RT_UNLOCK(state->ro->ro_rt);
+			// release sadb_mutex, after updating sah's route cache
+			lck_mtx_unlock(sadb_mutex);
 		}
 
 		state->m = ipsec4_splithdr(state->m);
@@ -3384,7 +3395,8 @@ ipsec6_output_tunnel(
 				struct ip *ip;
 				struct sockaddr_in* dst4;
 				struct route *ro4 = NULL;
-				struct ip_out_args ipoa = { IFSCOPE_NONE };
+				struct route  ro4_copy;
+				struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
 
 				/*
 				 * must be last isr because encapsulated IPv6 packet
@@ -3406,12 +3418,18 @@ ipsec6_output_tunnel(
 				/* Now we have an IPv4 packet */
 				ip = mtod(state->m, struct ip *);
 
+				// grab sadb_mutex, to update sah's route cache and get a local copy of it
+				lck_mtx_lock(sadb_mutex);
 				ro4 = &sav->sah->sa_route;
 				dst4 = (struct sockaddr_in *)&ro4->ro_dst;
+				if (ro4->ro_rt) {
+					RT_LOCK(ro4->ro_rt);
+				}
 				if (ro4->ro_rt != NULL &&
 				    (ro4->ro_rt->generation_id != route_generation ||
 				    !(ro4->ro_rt->rt_flags & RTF_UP) ||
 				    dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+					RT_UNLOCK(ro4->ro_rt);
 					rtfree(ro4->ro_rt);
 					ro4->ro_rt = NULL;
 				}
@@ -3419,10 +3437,18 @@ ipsec6_output_tunnel(
 					dst4->sin_family = AF_INET;
 					dst4->sin_len = sizeof(*dst4);
 					dst4->sin_addr = ip->ip_dst;
+				} else {
+					RT_UNLOCK(ro4->ro_rt);
 				}
+				route_copyout(&ro4_copy, ro4, sizeof(ro4_copy));
+				// release sadb_mutex, after updating sah's route cache and getting a local copy
+				lck_mtx_unlock(sadb_mutex);
 				state->m = ipsec4_splithdr(state->m);
 				if (!state->m) {
 					error = ENOMEM;
+					if (ro4_copy.ro_rt != NULL) {
+						rtfree(ro4_copy.ro_rt);
+					}
 					goto bad;
 				}
 				switch (isr->saidx.proto) {
@@ -3430,6 +3456,9 @@ ipsec6_output_tunnel(
 #if IPSEC_ESP
 					if ((error = esp4_output(state->m, sav)) != 0) {
 						state->m = NULL;
+						if (ro4_copy.ro_rt != NULL) {
+							rtfree(ro4_copy.ro_rt);
+						}
 						goto bad;
 					}
 					break;
@@ -3438,17 +3467,26 @@ ipsec6_output_tunnel(
 					m_freem(state->m);
 					state->m = NULL;
 					error = EINVAL;
+					if (ro4_copy.ro_rt != NULL) {
+						rtfree(ro4_copy.ro_rt);
+					}
 					goto bad;
 #endif
 				case IPPROTO_AH:
 					if ((error = ah4_output(state->m, sav)) != 0) {
 						state->m = NULL;
+						if (ro4_copy.ro_rt != NULL) {
+							rtfree(ro4_copy.ro_rt);
+						}
 						goto bad;
 					}
 					break;
 				case IPPROTO_IPCOMP:
 					if ((error = ipcomp4_output(state->m, sav)) != 0) {
 						state->m = NULL;
+						if (ro4_copy.ro_rt != NULL) {
+							rtfree(ro4_copy.ro_rt);
+						}
 						goto bad;
 					}
 					break;
@@ -3459,17 +3497,27 @@ ipsec6_output_tunnel(
 					m_freem(state->m);
 					state->m = NULL;
 					error = EINVAL;
+					if (ro4_copy.ro_rt != NULL) {
+						rtfree(ro4_copy.ro_rt);
+					}
 					goto bad;
 				}
 		
 				if (state->m == 0) {
 					error = ENOMEM;
+					if (ro4_copy.ro_rt != NULL) {
+						rtfree(ro4_copy.ro_rt);
+					}
 					goto bad;
 				}
 				ip = mtod(state->m, struct ip *);
 				ip->ip_len = ntohs(ip->ip_len);  /* flip len field before calling ip_output */
-				error = ip_output(state->m, NULL, ro4, IP_OUTARGS, NULL, &ipoa);
+				error = ip_output(state->m, NULL, &ro4_copy, IP_OUTARGS, NULL, &ipoa);
 				state->m = NULL;
+				// grab sadb_mutex, to synchronize the sah's route cache with the local copy
+				lck_mtx_lock(sadb_mutex);
+				route_copyin(&ro4_copy, ro4, sizeof(ro4_copy));
+				lck_mtx_unlock(sadb_mutex);
 				if (error != 0)
 					goto bad;
 				goto done;
@@ -3481,14 +3529,20 @@ ipsec6_output_tunnel(
 				error = EAFNOSUPPORT;
 				goto bad;
 			}
-			
+
+			// grab sadb_mutex, before updating sah's route cache
+			lck_mtx_lock(sadb_mutex);
 			state->ro = &sav->sah->sa_route;
 			state->dst = (struct sockaddr *)&state->ro->ro_dst;
 			dst6 = (struct sockaddr_in6 *)state->dst;
+			if (state->ro->ro_rt) {
+				RT_LOCK(state->ro->ro_rt);
+			}
 			if (state->ro->ro_rt != NULL &&
 			    (state->ro->ro_rt->generation_id != route_generation ||
 			    !(state->ro->ro_rt->rt_flags & RTF_UP) ||
 			    !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) {
+				RT_UNLOCK(state->ro->ro_rt);
 				rtfree(state->ro->ro_rt);
 				state->ro->ro_rt = NULL;
 			}
@@ -3498,11 +3552,16 @@ ipsec6_output_tunnel(
 				dst6->sin6_len = sizeof(*dst6);
 				dst6->sin6_addr = ip6->ip6_dst;
 				rtalloc(state->ro);
+				if (state->ro->ro_rt) {
+					RT_LOCK(state->ro->ro_rt);
+				}
 			}
 			if (state->ro->ro_rt == 0) {
 				ip6stat.ip6s_noroute++;
 				IPSEC_STAT_INCREMENT(ipsec6stat.out_noroute);
 				error = EHOSTUNREACH;
+				// release sadb_mutex, after updating sah's route cache
+				lck_mtx_unlock(sadb_mutex);
 				goto bad;
 			}
 
@@ -3519,6 +3578,9 @@ ipsec6_output_tunnel(
 				state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
 				dst6 = (struct sockaddr_in6 *)state->dst;
 			}
+			RT_UNLOCK(state->ro->ro_rt);
+			// release sadb_mutex, after updating sah's route cache
+			lck_mtx_unlock(sadb_mutex);
 		}
 
 		state->m = ipsec6_splithdr(state->m);
@@ -3982,8 +4044,8 @@ ipsec_addaux(
 		struct ipsec_tag	*itag;
 		
 		/* Allocate a tag */
-		tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPSEC,
-						  IPSEC_TAG_SIZE, M_DONTWAIT);
+		tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPSEC,
+						  IPSEC_TAG_SIZE, M_DONTWAIT, m);
 		
 		if (tag) {
 			itag = (struct ipsec_tag*)(tag + 1);
@@ -4128,7 +4190,8 @@ ipsec_send_natt_keepalive(
 	struct udphdr *uh;
 	struct ip *ip;
 	int error;
-	struct ip_out_args ipoa = { IFSCOPE_NONE };
+	struct ip_out_args ipoa = { IFSCOPE_NONE, 0 };
+	struct route ro;
 
 	lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
 	
@@ -4168,8 +4231,23 @@ ipsec_send_natt_keepalive(
 	uh->uh_ulen = htons(1 + sizeof(struct udphdr));
 	uh->uh_sum = 0;
 	*(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF;
-	
-	error = ip_output(m, NULL, &sav->sah->sa_route, IP_OUTARGS | IP_NOIPSEC, NULL, &ipoa);
+
+	// grab sadb_mutex, to get a local copy of sah's route cache
+	lck_mtx_lock(sadb_mutex);
+	if (sav->sah->sa_route.ro_rt != NULL &&
+	    rt_key(sav->sah->sa_route.ro_rt)->sa_family != AF_INET) {
+		rtfree(sav->sah->sa_route.ro_rt);
+		sav->sah->sa_route.ro_rt = NULL;
+	}
+	route_copyout(&ro, &sav->sah->sa_route, sizeof(ro));
+	lck_mtx_unlock(sadb_mutex);
+
+	error = ip_output(m, NULL, &ro, IP_OUTARGS | IP_NOIPSEC, NULL, &ipoa);
+
+	// grab sadb_mutex, to synchronize the sah's route cache with the local copy
+	lck_mtx_lock(sadb_mutex);
+	route_copyin(&ro, &sav->sah->sa_route, sizeof(ro));
+	lck_mtx_unlock(sadb_mutex);
 	if (error == 0) {
 		sav->natt_last_activity = natt_now;
 		return TRUE;
diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c
index 7e9a882e8..90bcb94b4 100644
--- a/bsd/netinet6/mld6.c
+++ b/bsd/netinet6/mld6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,13 +25,8 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-
-/*	$FreeBSD: src/sys/netinet6/mld6.c,v 1.4.2.2 2001/07/03 11:01:54 ume Exp $	*/
-/*	$KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $	*/
-
-/*
- * Copyright (C) 1998 WIDE Project.
- * All rights reserved.
+/*-
+ * Copyright (c) 2009 Bruce Simpson.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -41,14 +36,14 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the project nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -103,426 +98,3312 @@
  * Version 2.0.
  */
 
+#include <sys/cdefs.h>
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
-#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mcache.h>
+
+#include <kern/zalloc.h>
 
 #include <net/if.h>
+#include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
+#include <netinet6/mld6.h>
 #include <netinet6/mld6_var.h>
 
-#include <net/net_osdep.h>
+/* Lock group and attribute for mld6_mtx */
+static lck_attr_t       *mld_mtx_attr;
+static lck_grp_t        *mld_mtx_grp;
+static lck_grp_attr_t   *mld_mtx_grp_attr;
+
+/*
+ * Locking and reference counting:
+ *
+ * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
+ * in6_multihead_lock must be held, the former must be acquired first in order
+ * to maintain lock ordering.  It is not a requirement that mld_mtx be
+ * acquired first before in6_multihead_lock, but in case both must be acquired
+ * in succession, the correct lock ordering must be followed.
+ *
+ * Instead of walking the if_multiaddrs list at the interface and returning
+ * the ifma_protospec value of a matching entry, we search the global list
+ * of in6_multi records and find it that way; this is done with in6_multihead
+ * lock held.  Doing so avoids the race condition issues that many other BSDs
+ * suffer from (therefore in our implementation, ifma_protospec will never be
+ * NULL for as long as the in6_multi is valid.)
+ *
+ * The above creates a requirement for the in6_multi to stay in in6_multihead
+ * list even after the final MLD leave (in MLDv2 mode) until no longer needs
+ * be retransmitted (this is not required for MLDv1.)  In order to handle
+ * this, the request and reference counts of the in6_multi are bumped up when
+ * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
+ * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
+ *
+ * Thus, the permitted lock oder is:
+ *
+ *	mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
+ *
+ * Any may be taken independently, but if any are held at the same time,
+ * the above lock order must be followed.
+ */
+static decl_lck_mtx_data(, mld_mtx);
+
+static void	mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
+static struct mld_ifinfo *mli_alloc(int);
+static void	mli_free(struct mld_ifinfo *);
+static void	mli_delete(const struct ifnet *);
+static void	mld_dispatch_packet(struct mbuf *);
+static void	mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
+static int	mld_handle_state_change(struct in6_multi *,
+		    struct mld_ifinfo *);
+static int	mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
+		    const int);
+#ifdef MLD_DEBUG
+static const char *	mld_rec_type_to_str(const int);
+#endif
+static void	mld_set_version(struct mld_ifinfo *, const int);
+static void	mld_flush_relq(struct mld_ifinfo *);
+static void	mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
+static int	mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
+		    /*const*/ struct mld_hdr *);
+static int	mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
+		    /*const*/ struct mld_hdr *);
+static void	mld_v1_process_group_timer(struct in6_multi *, const int);
+static void	mld_v1_process_querier_timers(struct mld_ifinfo *);
+static int	mld_v1_transmit_report(struct in6_multi *, const int);
+static void	mld_v1_update_group(struct in6_multi *, const int);
+static void	mld_v2_cancel_link_timers(struct mld_ifinfo *);
+static void	mld_v2_dispatch_general_query(struct mld_ifinfo *);
+static struct mbuf *
+		mld_v2_encap_report(struct ifnet *, struct mbuf *);
+static int	mld_v2_enqueue_filter_change(struct ifqueue *,
+		    struct in6_multi *);
+static int	mld_v2_enqueue_group_record(struct ifqueue *,
+		    struct in6_multi *, const int, const int, const int,
+		    const int);
+static int	mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
+		    struct mbuf *, const int, const int);
+static int	mld_v2_merge_state_changes(struct in6_multi *,
+		    struct ifqueue *);
+static void	mld_v2_process_group_timers(struct mld_ifinfo *,
+		    struct ifqueue *, struct ifqueue *,
+		    struct in6_multi *, const int);
+static int	mld_v2_process_group_query(struct in6_multi *,
+		    int, struct mbuf *, const int);
+static int	sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
+static int	sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
+
+/*
+ * Normative references: RFC 2710, RFC 3590, RFC 3810.
+ *
+ *  XXX LOR PREVENTION
+ *  A special case for IPv6 is the in6_setscope() routine. ip6_output()
+ *  will not accept an ifp; it wants an embedded scope ID, unlike
+ *  ip_output(), which happily takes the ifp given to it. The embedded
+ *  scope ID is only used by MLD to select the outgoing interface.
+ *
+ *  As such, we exploit the fact that the scope ID is just the interface
+ *  index, and embed it in the IPv6 destination address accordingly.
+ *  This is potentially NOT VALID for MLDv1 reports, as they
+ *  are always sent to the multicast group itself; as MLDv2
+ *  reports are always sent to ff02::16, this is not an issue
+ *  when MLDv2 is in use.
+ */
+
+#define	MLD_EMBEDSCOPE(pin6, zoneid) \
+	(pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)
+
+static struct timeval mld_gsrdelay = {10, 0};
+static LIST_HEAD(, mld_ifinfo) mli_head;
+
+static int interface_timers_running6;
+static int state_change_timers_running6;
+static int current_state_timers_running6;
+
+static decl_lck_mtx_data(, mld6_mtx);
+
+#define	MLD_LOCK()			\
+	lck_mtx_lock(&mld6_mtx)
+#define	MLD_LOCK_ASSERT_HELD()		\
+	lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_OWNED)
+#define	MLD_LOCK_ASSERT_NOTHELD()	\
+	lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_NOTOWNED)
+#define	MLD_UNLOCK()			\
+	lck_mtx_unlock(&mld6_mtx)
+
+#define	MLI_ZONE_MAX		64		/* maximum elements in zone */
+#define	MLI_ZONE_NAME		"mld_ifinfo"	/* zone name */
+
+static unsigned int mli_size;			/* size of zone element */
+static struct zone *mli_zone;			/* zone for mld_ifinfo */
+
+SYSCTL_DECL(_net_inet6);	/* Note: Not in any common header. */
+
+SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
+    "IPv6 Multicast Listener Discovery");
+SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+    &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
+    "Rate limit for MLDv2 Group-and-Source queries in seconds");
+
+SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
+   sysctl_mld_ifinfo, "Per-interface MLDv2 state");
+
+static int	mld_v1enable = 1;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &mld_v1enable, 0, "Enable fallback to MLDv1");
+
+static int	mld_use_allow = 1;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
+
+#ifdef MLD_DEBUG
+int mld_debug = 0;
+SYSCTL_INT(_net_inet6_mld, OID_AUTO,
+	debug, CTLFLAG_RW | CTLFLAG_LOCKED,	&mld_debug, 0, "");
+#endif
+/*
+ * Packed Router Alert option structure declaration.
+ */
+struct mld_raopt {
+	struct ip6_hbh		hbh;
+	struct ip6_opt		pad;
+	struct ip6_opt_router	ra;
+} __packed;
+
+/*
+ * Router Alert hop-by-hop option header.
+ */
+static struct mld_raopt mld_ra = {
+	.hbh = { 0, 0 },
+	.pad = { .ip6o_type = IP6OPT_PADN, 0 },
+	.ra = {
+	    .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
+	    .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
+	    .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
+	        (IP6OPT_RTALERT_MLD & 0xFF) }
+	}
+};
+static struct ip6_pktopts mld_po;
+
+/*
+ * Retrieve or set threshold between group-source queries in seconds.
+ */
+static int
+sysctl_mld_gsr SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int error;
+	int i;
+
+	MLD_LOCK();
+
+	i = mld_gsrdelay.tv_sec;
+
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || !req->newptr)
+		goto out_locked;
+
+	if (i < -1 || i >= 60) {
+		error = EINVAL;
+		goto out_locked;
+	}
+
+	mld_gsrdelay.tv_sec = i;
+
+out_locked:
+	MLD_UNLOCK();
+	return (error);
+}
+/*
+ * Expose struct mld_ifinfo to userland, keyed by ifindex.
+ * For use by ifmcstat(8).
+ *
+ */
+static int
+sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp)
+	int			*name;
+	int			 error;
+	u_int			 namelen;
+	struct ifnet		*ifp;
+	struct mld_ifinfo	*mli;
+	struct mld_ifinfo_u	mli_u;
+
+	name = (int *)arg1;
+	namelen = arg2;
+
+	if (req->newptr != USER_ADDR_NULL)
+		return (EPERM);
+
+	if (namelen != 1)
+		return (EINVAL);
+
+	MLD_LOCK();
+
+	if (name[0] <= 0 || name[0] > (u_int)if_index) {
+		error = ENOENT;
+		goto out_locked;
+	}
+
+	error = ENOENT;
+
+	ifnet_head_lock_shared();
+	ifp = ifindex2ifnet[name[0]];
+	ifnet_head_done();
+	if (ifp == NULL)
+		goto out_locked;
+
+	bzero(&mli_u, sizeof (mli_u));
+
+	LIST_FOREACH(mli, &mli_head, mli_link) {
+		MLI_LOCK(mli);
+		if (ifp != mli->mli_ifp) {
+			MLI_UNLOCK(mli);
+			continue;
+		}
+
+		mli_u.mli_ifindex = mli->mli_ifp->if_index;
+		mli_u.mli_version = mli->mli_version;
+		mli_u.mli_v1_timer = mli->mli_v1_timer;
+		mli_u.mli_v2_timer = mli->mli_v2_timer;
+		mli_u.mli_flags = mli->mli_flags;
+		mli_u.mli_rv = mli->mli_rv;
+		mli_u.mli_qi = mli->mli_qi;
+		mli_u.mli_qri = mli->mli_qri;
+		mli_u.mli_uri = mli->mli_uri;
+		MLI_UNLOCK(mli);
+
+		error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
+		break;
+	}
+
+out_locked:
+	MLD_UNLOCK();
+	return (error);
+}
+
+/*
+ * Dispatch an entire queue of pending packet chains.
+ *
+ * Must not be called with in6m_lock held.
+ */
+static void
+mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
+{
+	struct mbuf *m;
+
+	if (mli != NULL)
+		MLI_LOCK_ASSERT_HELD(mli);
+
+	for (;;) {
+		IF_DEQUEUE(ifq, m);
+		if (m == NULL)
+			break;
+		MLD_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
+		if (mli != NULL)
+			MLI_UNLOCK(mli);
+		mld_dispatch_packet(m);
+		if (mli != NULL)
+			MLI_LOCK(mli);
+		if (--limit == 0)
+			break;
+	}
 
-#if CONFIG_MACF_NET
-#include <security/mac.h>
-#endif /* MAC_NET */
+	if (mli != NULL)
+		MLI_LOCK_ASSERT_HELD(mli);
+}
 
 /*
- * Protocol constants
+ * Filter outgoing MLD report state by group.
+ *
+ * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
+ * and node-local addresses. However, kernel and socket consumers
+ * always embed the KAME scope ID in the address provided, so strip it
+ * when performing comparison.
+ * Note: This is not the same as the *multicast* scope.
+ *
+ * Return zero if the given group is one for which MLD reports
+ * should be suppressed, or non-zero if reports should be issued.
  */
+static __inline__ int
+mld_is_addr_reported(const struct in6_addr *addr)
+{
+
+	VERIFY(IN6_IS_ADDR_MULTICAST(addr));
+
+	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
+		return (0);
+
+	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
+		struct in6_addr tmp = *addr;
+		in6_clearscope(&tmp);
+		if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
+			return (0);
+	}
+
+	return (1);
+}
 
-/* denotes that the MLD max response delay field specifies time in milliseconds */
-#define MLD6_TIMER_SCALE	1000
 /*
- * time between repetitions of a node's initial report of interest in a
- * multicast address(in seconds)
+ * Attach MLD when PF_INET6 is attached to an interface.
  */
-#define MLD6_UNSOLICITED_REPORT_INTERVAL	10
+struct mld_ifinfo *
+mld_domifattach(struct ifnet *ifp, int how)
+{
+	struct mld_ifinfo *mli;
+
+	MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
+	    __func__, ifp, ifp->if_name, ifp->if_unit));
+
+	mli = mli_alloc(how);
+	if (mli == NULL)
+		return (NULL);
+
+	MLD_LOCK();
+
+	MLI_LOCK(mli);
+	mli_initvar(mli, ifp, 0);
+	mli->mli_debug |= IFD_ATTACHED;
+	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
+	MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
+	MLI_UNLOCK(mli);
+
+	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
+
+	MLD_UNLOCK();
 
-extern lck_mtx_t *nd6_mutex;
-static struct ip6_pktopts ip6_opts;
-static int mld6_timers_are_running;
-static int mld6_init_done = 0 ;
-/* XXX: These are necessary for KAME's link-local hack */
-static struct in6_addr mld6_all_nodes_linklocal = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-static struct in6_addr mld6_all_routers_linklocal = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+	MLD_PRINTF(("allocate mld_ifinfo for ifp %p(%s%d)\n",
+	     ifp, ifp->if_name, ifp->if_unit));
 
-static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *);
+	return (mli);
+}
 
+/*
+ * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
+ * expected to have an outstanding reference to the mli.
+ */
 void
-mld6_init()
+mld_domifreattach(struct mld_ifinfo *mli)
 {
-	static u_int8_t hbh_buf[8];
-	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
-	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
+	struct ifnet *ifp;
 
-	if (mld6_init_done)
-		return;
+	MLD_LOCK();
 
-	mld6_init_done = 1;
-	mld6_timers_are_running = 0;
+	MLI_LOCK(mli);
+	VERIFY(!(mli->mli_debug & IFD_ATTACHED));
+	ifp = mli->mli_ifp;
+	VERIFY(ifp != NULL);
+	mli_initvar(mli, ifp, 1);
+	mli->mli_debug |= IFD_ATTACHED;
+	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
+	MLI_UNLOCK(mli);
 
-	/* ip6h_nxt will be fill in later */
-	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
+	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 
-	/* XXX: grotty hard coding... */
-	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
-	hbh_buf[3] = 0;
-	hbh_buf[4] = IP6OPT_RTALERT;
-	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
-	bcopy((caddr_t)&rtalert_code, &hbh_buf[6], sizeof(u_int16_t));
+	MLD_UNLOCK();
 
-	ip6_initpktopts(&ip6_opts);
-	ip6_opts.ip6po_hbh = hbh;
+	MLD_PRINTF(("reattached mld_ifinfo for ifp %p(%s%d)\n",
+	     ifp, ifp->if_name, ifp->if_unit));
 }
 
+/*
+ * Hook for domifdetach.
+ */
 void
-mld6_start_listening(
-	struct in6_multi *in6m)
-{
-	/*
-	 * RFC2710 page 10:
-	 * The node never sends a Report or Done for the link-scope all-nodes
-	 * address.
-	 * MLD messages are never sent for multicast addresses whose scope is 0
-	 * (reserved) or 1 (node-local).
-	 */
-	mld6_all_nodes_linklocal.s6_addr16[1] =
-		htons(in6m->in6m_ifp->if_index); /* XXX */
-	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &mld6_all_nodes_linklocal) ||
-	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
-		in6m->in6m_timer = 0;
-		in6m->in6m_state = MLD6_OTHERLISTENER;
-	} else {
-		mld6_sendpkt(in6m, MLD6_LISTENER_REPORT, NULL);
-		in6m->in6m_timer = MLD6_RANDOM_DELAY(
-			MLD6_UNSOLICITED_REPORT_INTERVAL * PR_FASTHZ);
-		in6m->in6m_state = MLD6_IREPORTEDLAST;
-		mld6_timers_are_running = 1;
+mld_domifdetach(struct ifnet *ifp)
+{
+
+	MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
+	    __func__, ifp, ifp->if_name, ifp->if_unit));
+
+	MLD_LOCK();
+	mli_delete(ifp);
+	MLD_UNLOCK();
+}
+
+/*
+ * Called at interface detach time.  Note that we only flush all deferred
+ * responses and record releases; all remaining inm records and their source
+ * entries related to this interface are left intact, in order to handle
+ * the reattach case.
+ */
+static void
+mli_delete(const struct ifnet *ifp)
+{
+	struct mld_ifinfo *mli, *tmli;
+
+	MLD_LOCK_ASSERT_HELD();
+
+	LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
+		MLI_LOCK(mli);
+		if (mli->mli_ifp == ifp) {
+			/*
+			 * Free deferred General Query responses.
+			 */
+			IF_DRAIN(&mli->mli_gq);
+			IF_DRAIN(&mli->mli_v1q);
+			mld_flush_relq(mli);
+			VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
+			mli->mli_debug &= ~IFD_ATTACHED;
+			MLI_UNLOCK(mli);
+
+			LIST_REMOVE(mli, mli_link);
+			MLI_REMREF(mli); /* release mli_head reference */
+			return;
+		}
+		MLI_UNLOCK(mli);
+	}
+	panic("%s: mld_ifinfo not found for ifp %p\n", __func__,  ifp);
+}
+
+static void
+mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
+{
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	mli->mli_ifp = ifp;
+	mli->mli_version = MLD_VERSION_2;
+	mli->mli_flags = 0;
+	mli->mli_rv = MLD_RV_INIT;
+	mli->mli_qi = MLD_QI_INIT;
+	mli->mli_qri = MLD_QRI_INIT;
+	mli->mli_uri = MLD_URI_INIT;
+
+	/* ifnet is not yet attached; no need to hold ifnet lock */
+	if (!(ifp->if_flags & IFF_MULTICAST))
+		mli->mli_flags |= MLIF_SILENT;
+	if (mld_use_allow)
+		mli->mli_flags |= MLIF_USEALLOW;
+	if (!reattach)
+		SLIST_INIT(&mli->mli_relinmhead);
+
+	/*
+	 * Responses to general queries are subject to bounds.
+	 */
+	mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
+	mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
+}
+
+static struct mld_ifinfo *
+mli_alloc(int how)
+{
+	struct mld_ifinfo *mli;
+
+	mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
+	if (mli != NULL) {
+		bzero(mli, mli_size);
+		lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
+		mli->mli_debug |= IFD_ALLOC;
+	}
+	return (mli);
+}
+
+static void
+mli_free(struct mld_ifinfo *mli)
+{
+	MLI_LOCK(mli);
+	if (mli->mli_debug & IFD_ATTACHED) {
+		panic("%s: attached mli=%p is being freed", __func__, mli);
+		/* NOTREACHED */
+	} else if (mli->mli_ifp != NULL) {
+		panic("%s: ifp not NULL for mli=%p", __func__, mli);
+		/* NOTREACHED */
+	} else if (!(mli->mli_debug & IFD_ALLOC)) {
+		panic("%s: mli %p cannot be freed", __func__, mli);
+		/* NOTREACHED */
+	} else if (mli->mli_refcnt != 0) {
+		panic("%s: non-zero refcnt mli=%p", __func__, mli);
+		/* NOTREACHED */
 	}
+	mli->mli_debug &= ~IFD_ALLOC;
+	MLI_UNLOCK(mli);
+
+	lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
+	zfree(mli_zone, mli);
 }
 
 void
-mld6_stop_listening(
-	struct in6_multi *in6m)
+mli_addref(struct mld_ifinfo *mli, int locked)
 {
-	mld6_all_nodes_linklocal.s6_addr16[1] =
-		htons(in6m->in6m_ifp->if_index); /* XXX */
-	mld6_all_routers_linklocal.s6_addr16[1] =
-		htons(in6m->in6m_ifp->if_index); /* XXX: necessary when mrouting */
+	if (!locked)
+		MLI_LOCK_SPIN(mli);
+	else
+		MLI_LOCK_ASSERT_HELD(mli);
 
-	if (in6m->in6m_state == MLD6_IREPORTEDLAST &&
-	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &mld6_all_nodes_linklocal)) &&
-	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) > IPV6_ADDR_SCOPE_NODELOCAL)
-		mld6_sendpkt(in6m, MLD6_LISTENER_DONE,
-			     &mld6_all_routers_linklocal);
+	if (++mli->mli_refcnt == 0) {
+		panic("%s: mli=%p wraparound refcnt", __func__, mli);
+		/* NOTREACHED */
+	}
+	if (!locked)
+		MLI_UNLOCK(mli);
 }
 
 void
-mld6_input(
-	struct mbuf *m,
-	int off)
-{
-	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
-	struct mld6_hdr *mldh;
-	struct ifnet *ifp = m->m_pkthdr.rcvif;
-	struct in6_multi *in6m;
-	struct in6_ifaddr *ia;
-	struct ifmultiaddr *ifma;
-	int timer;		/* timer value in the MLD query header */
-
-#ifndef PULLDOWN_TEST
-	IP6_EXTHDR_CHECK(m, off, sizeof(*mldh), return);
-	mldh = (struct mld6_hdr *)(mtod(m, caddr_t) + off);
-#else
-	IP6_EXTHDR_GET(mldh, struct mld6_hdr *, m, off, sizeof(*mldh));
-	if (mldh == NULL) {
-		icmp6stat.icp6s_tooshort++;
-		return;
+mli_remref(struct mld_ifinfo *mli)
+{
+	struct ifnet *ifp;
+
+	MLI_LOCK_SPIN(mli);
+
+	if (mli->mli_refcnt == 0) {
+		panic("%s: mli=%p negative refcnt", __func__, mli);
+		/* NOTREACHED */
 	}
-#endif
 
-	/* source address validation */
-	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
-	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
-		log(LOG_ERR,
-		    "mld6_input: src %s is not link-local (grp=%s)\n",
-		    ip6_sprintf(&ip6->ip6_src),
-		    ip6_sprintf(&mldh->mld6_addr));
-		/*
-		 * spec (RFC2710) does not explicitly
-		 * specify to discard the packet from a non link-local
-		 * source address. But we believe it's expected to do so.
-		 * XXX: do we have to allow :: as source?
-		 */
-		m_freem(m);
+	--mli->mli_refcnt;
+	if (mli->mli_refcnt > 0) {
+		MLI_UNLOCK(mli);
 		return;
 	}
 
+	ifp = mli->mli_ifp;
+	mli->mli_ifp = NULL;
+	IF_DRAIN(&mli->mli_gq);
+	IF_DRAIN(&mli->mli_v1q);
+	mld_flush_relq(mli);
+	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
+	MLI_UNLOCK(mli);
+
+	MLD_PRINTF(("%s: freeing mld_ifinfo for ifp %p(%s%d)\n",
+	    __func__, ifp, ifp->if_name, ifp->if_unit));
+
+	mli_free(mli);
+}
+
+/*
+ * Process a received MLDv1 general or address-specific query.
+ * Assumes that the query header has been pulled up to sizeof(mld_hdr).
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
+ */
+static int
+mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
+    /*const*/ struct mld_hdr *mld)
+{
+	struct mld_ifinfo	*mli;
+	struct in6_multi	*inm;
+	int			 is_general_query;
+	uint16_t		 timer;
+
+	is_general_query = 0;
+
+	if (!mld_v1enable) {
+		MLD_PRINTF(("ignore v1 query %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&mld->mld_addr),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (0);
+	}
+
 	/*
-	 * In the MLD6 specification, there are 3 states and a flag.
-	 *
-	 * In Non-Listener state, we simply don't have a membership record.
-	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
-	 * In Idle Listener state, our timer is not running (in6m->in6m_timer==0)
-	 *
-	 * The flag is in6m->in6m_state, it is set to MLD6_OTHERLISTENER if
-	 * we have heard a report from another member, or MLD6_IREPORTEDLAST
-	 * if we sent the last report.
+	 * RFC3810 Section 6.2: MLD queries must originate from
+	 * a router's link-local address.
 	 */
-	switch(mldh->mld6_type) {
-	case MLD6_LISTENER_QUERY:
-		if (ifp->if_flags & IFF_LOOPBACK)
-			break;
-
-		if (!IN6_IS_ADDR_UNSPECIFIED(&mldh->mld6_addr) &&
-		    !IN6_IS_ADDR_MULTICAST(&mldh->mld6_addr))
-			break;	/* print error or log stat? */
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr))
-			mldh->mld6_addr.s6_addr16[1] =
-				htons(ifp->if_index); /* XXX */
+	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&ip6->ip6_src),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (0);
+	}
 
+	/*
+	 * Do address field validation upfront before we accept
+	 * the query.
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 		/*
-		 * - Start the timers in all of our membership records
-		 *   that the query applies to for the interface on
-		 *   which the query arrived excl. those that belong
-		 *   to the "all-nodes" group (ff02::1).
-		 * - Restart any timer that is already running but has
-		 *   A value longer than the requested timeout.
-		 * - Use the value specified in the query message as
-		 *   the maximum timeout.
+		 * MLDv1 General Query.
+		 * If this was not sent to the all-nodes group, ignore it.
 		 */
-		ifnet_lock_exclusive(ifp);
-		IFP_TO_IA6(ifp, ia);
-		if (ia == NULL)
-			break;
+		struct in6_addr		 dst;
 
+		dst = ip6->ip6_dst;
+		in6_clearscope(&dst);
+		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
+			return (EINVAL);
+		is_general_query = 1;
+	} else {
 		/*
-		 * XXX: System timer resolution is too low to handle Max
-		 * Response Delay, so set 1 to the internal timer even if
-		 * the calculated value equals to zero when Max Response
-		 * Delay is positive.
+		 * Embed scope ID of receiving interface in MLD query for
+		 * lookup whilst we don't hold other locks.
 		 */
-		timer = ntohs(mldh->mld6_maxdelay)*PR_FASTHZ/MLD6_TIMER_SCALE;
-		if (timer == 0 && mldh->mld6_maxdelay)
-			timer = 1;
-		mld6_all_nodes_linklocal.s6_addr16[1] =
-			htons(ifp->if_index); /* XXX */
-		
-		LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
-		{
-			if (ifma->ifma_addr->sa_family != AF_INET6)
-				continue;
-			in6m = (struct in6_multi *)ifma->ifma_protospec;
-			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr,
-					&mld6_all_nodes_linklocal) ||
-			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
-			    IPV6_ADDR_SCOPE_LINKLOCAL)
-				continue;
+		in6_setscope(&mld->mld_addr, ifp, NULL);
+	}
 
-			if (IN6_IS_ADDR_UNSPECIFIED(&mldh->mld6_addr) ||
-			    IN6_ARE_ADDR_EQUAL(&mldh->mld6_addr,
-						&in6m->in6m_addr))
-			{
-				if (timer == 0) {
-					/* send a report immediately */
-					mld6_sendpkt(in6m, MLD6_LISTENER_REPORT,
-						NULL);
-					in6m->in6m_timer = 0; /* reset timer */
-					in6m->in6m_state = MLD6_IREPORTEDLAST;
-				}
-				else if (in6m->in6m_timer == 0 || /*idle state*/
-					in6m->in6m_timer > timer) {
-					in6m->in6m_timer =
-						MLD6_RANDOM_DELAY(timer);
-					mld6_timers_are_running = 1;
-				}
-			}
-		}
-		ifnet_lock_done(ifp);
+	/*
+	 * Switch to MLDv1 host compatibility mode.
+	 */
+	mli = MLD_IFINFO(ifp);
+	VERIFY(mli != NULL);
 
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr))
-			mldh->mld6_addr.s6_addr16[1] = 0; /* XXX */
-		break;
-	case MLD6_LISTENER_REPORT:
-		/*
-		 * For fast leave to work, we have to know that we are the
-		 * last person to send a report for this group.  Reports
-		 * can potentially get looped back if we are a multicast
-		 * router, so discard reports sourced by me.
-		 * Note that it is impossible to check IFF_LOOPBACK flag of
-		 * ifp for this purpose, since ip6_mloopback pass the physical
-		 * interface to looutput.
-		 */
-		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
-			break;
+	MLI_LOCK(mli);
+	mld_set_version(mli, MLD_VERSION_1);
+	MLI_UNLOCK(mli);
 
-		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld6_addr))
-			break;
+	timer = (ntohs(mld->mld_maxdelay) * PR_SLOWHZ) / MLD_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
+
+	if (is_general_query) {
+		struct in6_multistep step;
 
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr))
-			mldh->mld6_addr.s6_addr16[1] =
-				htons(ifp->if_index); /* XXX */
+		MLD_PRINTF(("process v1 general query on ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
 		/*
-		 * If we belong to the group being reported, stop
-		 * our timer for that group.
+		 * For each reporting group joined on this
+		 * interface, kick the report timer.
 		 */
-		ifnet_lock_shared(ifp);
-		IN6_LOOKUP_MULTI(mldh->mld6_addr, ifp, in6m);
-		if (in6m) {
-			in6m->in6m_timer = 0; /* transit to idle state */
-			in6m->in6m_state = MLD6_OTHERLISTENER; /* clear flag */
+		in6_multihead_lock_shared();
+		IN6_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			IN6M_LOCK(inm);
+			if (inm->in6m_ifp == ifp)
+				mld_v1_update_group(inm, timer);
+			IN6M_UNLOCK(inm);
+			IN6_NEXT_MULTI(step, inm);
 		}
-		ifnet_lock_done(ifp);
+		in6_multihead_lock_done();
+	} else {
+		/*
+		 * MLDv1 Group-Specific Query.
+		 * If this is a group-specific MLDv1 query, we need only
+		 * look up the single group to process it.
+		 */
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
+		in6_multihead_lock_done();
 
-		if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr))
-			mldh->mld6_addr.s6_addr16[1] = 0; /* XXX */
-		break;
-	default:		/* this is impossible */
-		log(LOG_ERR, "mld6_input: illegal type(%d)", mldh->mld6_type);
-		break;
+		if (inm != NULL) {
+			IN6M_LOCK(inm);
+			MLD_PRINTF(("process v1 query %s on ifp %p(%s%d)\n",
+			    ip6_sprintf(&mld->mld_addr),
+			    ifp, ifp->if_name, ifp->if_unit));
+			mld_v1_update_group(inm, timer);
+			IN6M_UNLOCK(inm);
+			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
+		}
+		/* XXX Clear embedded scope ID as userland won't expect it. */
+		in6_clearscope(&mld->mld_addr);
 	}
 
-	m_freem(m);
+	return (0);
 }
 
-void
-mld6_fasttimeo()
+/*
+ * Update the report timer on a group in response to an MLDv1 query.
+ *
+ * If we are becoming the reporting member for this group, start the timer.
+ * If we already are the reporting member for this group, and timer is
+ * below the threshold, reset it.
+ *
+ * We may be updating the group for the first time since we switched
+ * to MLDv2. If we are, then we must clear any recorded source lists,
+ * and transition to REPORTING state; the group timer is overloaded
+ * for group and group-source query responses. 
+ *
+ * Unlike MLDv2, the delay per group should be jittered
+ * to avoid bursts of MLDv1 reports.
+ */
+static void
+mld_v1_update_group(struct in6_multi *inm, const int timer)
 {
-	struct in6_multi *in6m;
-	struct in6_multistep step;
+	IN6M_LOCK_ASSERT_HELD(inm);
 
-	/*
-	 * Quick check to see if any work needs to be done, in order
-	 * to minimize the overhead of fasttimo processing.
-	 */
-	if (!mld6_timers_are_running)
-		return;
+	MLD_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
+	    ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, timer));
 
-	lck_mtx_lock(nd6_mutex);
-	mld6_timers_are_running = 0;
-	IN6_FIRST_MULTI(step, in6m);
-	while (in6m != NULL) {
-		if (in6m->in6m_timer == 0) {
-			/* do nothing */
-		} else if (--in6m->in6m_timer == 0) {
-			mld6_sendpkt(in6m, MLD6_LISTENER_REPORT, NULL);
-			in6m->in6m_state = MLD6_IREPORTEDLAST;
-		} else {
-			mld6_timers_are_running = 1;
+	switch (inm->in6m_state) {
+	case MLD_NOT_MEMBER:
+	case MLD_SILENT_MEMBER:
+		break;
+	case MLD_REPORTING_MEMBER:
+		if (inm->in6m_timer != 0 &&
+		    inm->in6m_timer <= timer) {
+			MLD_PRINTF(("%s: REPORTING and timer running, "
+			    "skipping.\n", __func__));
+			break;
 		}
-		IN6_NEXT_MULTI(step, in6m);
+		/* FALLTHROUGH */
+	case MLD_SG_QUERY_PENDING_MEMBER:
+	case MLD_G_QUERY_PENDING_MEMBER:
+	case MLD_IDLE_MEMBER:
+	case MLD_LAZY_MEMBER:
+	case MLD_AWAKENING_MEMBER:
+		MLD_PRINTF(("%s: ->REPORTING\n", __func__));
+		inm->in6m_state = MLD_REPORTING_MEMBER;
+		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
+		current_state_timers_running6 = 1;
+		break;
+	case MLD_SLEEPING_MEMBER:
+		MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
+		inm->in6m_state = MLD_AWAKENING_MEMBER;
+		break;
+	case MLD_LEAVING_MEMBER:
+		break;
 	}
-	lck_mtx_unlock(nd6_mutex);
 }
 
-static void
-mld6_sendpkt(
-	struct in6_multi *in6m,
-	int type,
-	const struct in6_addr *dst)
+/*
+ * Process a received MLDv2 general, group-specific or
+ * group-and-source-specific query.
+ *
+ * Assumes that the query header has been pulled up to sizeof(mldv2_query).
+ *
+ * Return 0 if successful, otherwise an appropriate error code is returned.
+ */
+static int
+mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
+    struct mbuf *m, const int off, const int icmp6len)
 {
-	struct mbuf *mh, *md;
-	struct mld6_hdr *mldh;
-	struct ip6_hdr *ip6;
-	struct ip6_moptions im6o;
-	struct in6_ifaddr *ia;
-	struct ifnet *ifp = in6m->in6m_ifp;
-	struct ifnet *outif = NULL;
+	struct mld_ifinfo	*mli;
+	struct mldv2_query	*mld;
+	struct in6_multi	*inm;
+	uint32_t		 maxdelay, nsrc, qqi;
+	int			 is_general_query;
+	uint16_t		 timer;
+	uint8_t			 qrv;
 
-	/*
-	 * At first, find a link local address on the outgoing interface
-	 * to use as the source address of the MLD packet.
-	 */
-	if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST))
-	    == NULL)
-		return;
+	is_general_query = 0;
 
 	/*
-	 * Allocate mbufs to store ip6 header and MLD header.
-	 * We allocate 2 mbufs and make chain in advance because
-	 * it is more convenient when inserting the hop-by-hop option later.
+	 * RFC3810 Section 6.2: MLD queries must originate from
+	 * a router's link-local address.
 	 */
-	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
-	if (mh == NULL) {
-		ifafree(&ia->ia_ifa);
-		return;
-	}
-	MGET(md, M_DONTWAIT, MT_DATA);
-	if (md == NULL) {
-		m_free(mh);
-		ifafree(&ia->ia_ifa);
-		return;
+	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
+		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&ip6->ip6_src),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (0);
 	}
-	mh->m_next = md;
 
-	mh->m_pkthdr.rcvif = NULL;
-#ifdef __darwin8_notyet
-#if CONFIG_MACF_NET
-	mac_create_mbuf_linklayer(in6m->in6m_ifp, m);
-#endif
-#endif
-	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld6_hdr);
-	mh->m_len = sizeof(struct ip6_hdr);
-	MH_ALIGN(mh, sizeof(struct ip6_hdr));
+	MLD_PRINTF(("input v2 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
+	    ifp->if_unit));
 
-	/* fill in the ip6 header */
-	ip6 = mtod(mh, struct ip6_hdr *);
-	ip6->ip6_flow = 0;
-	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
-	ip6->ip6_vfc |= IPV6_VERSION;
-	/* ip6_plen will be set later */
-	ip6->ip6_nxt = IPPROTO_ICMPV6;
-	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
-	ip6->ip6_src = ia->ia_addr.sin6_addr;
-	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
-
-	/* fill in the MLD header */
-	md->m_len = sizeof(struct mld6_hdr);
-	mldh = mtod(md, struct mld6_hdr *);
-	mldh->mld6_type = type;
-	mldh->mld6_code = 0;
-	mldh->mld6_cksum = 0;
-	/* XXX: we assume the function will not be called for query messages */
-	mldh->mld6_maxdelay = 0;
-	mldh->mld6_reserved = 0;
-	mldh->mld6_addr = in6m->in6m_addr;
-	if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr))
-		mldh->mld6_addr.s6_addr16[1] = 0; /* XXX */
-	mldh->mld6_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
-				     sizeof(struct mld6_hdr));
-
-	/* construct multicast option */
-	bzero(&im6o, sizeof(im6o));
-	im6o.im6o_multicast_ifp = ifp;
-	im6o.im6o_multicast_hlim = 1;
-
-	/*
-	 * Request loopback of the report if we are acting as a multicast
-	 * router, so that the process-level routing daemon can hear it.
-	 */
-#if MROUTING
-	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
-#else
-	im6o.im6o_multicast_loop = 0;
-#endif
+	mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
 
-	/* increment output statictics */
-	icmp6stat.icp6s_outhist[type]++;
+	maxdelay = ntohs(mld->mld_maxdelay);	/* in 1/10ths of a second */
+	if (maxdelay >= 32678) {
+		maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
+			   (MLD_MRC_EXP(maxdelay) + 3);
+	}
+	timer = (maxdelay * PR_SLOWHZ) / MLD_TIMER_SCALE;
+	if (timer == 0)
+		timer = 1;
 
-	ip6_output(mh, &ip6_opts, NULL, 0, &im6o, &outif, 0);
-	if (outif) {
-		icmp6_ifstat_inc(outif, ifs6_out_msg);
-		switch (type) {
-		case MLD6_LISTENER_QUERY:
-			icmp6_ifstat_inc(outif, ifs6_out_mldquery);
-			break;
-		case MLD6_LISTENER_REPORT:
-			icmp6_ifstat_inc(outif, ifs6_out_mldreport);
-			break;
-		case MLD6_LISTENER_DONE:
-			icmp6_ifstat_inc(outif, ifs6_out_mlddone);
-			break;
-		}
+	qrv = MLD_QRV(mld->mld_misc);
+	if (qrv < 2) {
+		MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
+		    qrv, MLD_RV_INIT));
+		qrv = MLD_RV_INIT;
 	}
-	ifafree(&ia->ia_ifa);
-}
 
+	qqi = mld->mld_qqi;
+	if (qqi >= 128) {
+		qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
+		     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
+	}
+
+	nsrc = ntohs(mld->mld_numsrc);
+	if (nsrc > MLD_MAX_GS_SOURCES)
+		return (EMSGSIZE);
+	if (icmp6len < sizeof(struct mldv2_query) +
+	    (nsrc * sizeof(struct in6_addr)))
+		return (EMSGSIZE);
+
+	/*
+	 * Do further input validation upfront to avoid resetting timers
+	 * should we need to discard this query.
+	 */
+	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
+		/*
+		 * General Queries SHOULD be directed to ff02::1.
+		 * A general query with a source list has undefined
+		 * behaviour; discard it.
+		 */
+		struct in6_addr		 dst;
+
+		dst = ip6->ip6_dst;
+		in6_clearscope(&dst);
+		if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
+		    nsrc > 0)
+			return (EINVAL);
+		is_general_query = 1;
+	} else {
+		/*
+		 * Embed scope ID of receiving interface in MLD query for
+		 * lookup whilst we don't hold other locks (due to KAME
+		 * locking lameness). We own this mbuf chain just now.
+		 */
+		in6_setscope(&mld->mld_addr, ifp, NULL);
+	}
+
+	mli = MLD_IFINFO(ifp);
+	VERIFY(mli != NULL);
+
+	MLI_LOCK(mli);
+	/*
+	 * Discard the v2 query if we're in Compatibility Mode.
+	 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
+	 * until the Old Version Querier Present timer expires.
+	 */
+	if (mli->mli_version != MLD_VERSION_2) {
+		MLI_UNLOCK(mli);
+		return (0);
+	}
+
+	mld_set_version(mli, MLD_VERSION_2);
+	mli->mli_rv = qrv;
+	mli->mli_qi = qqi;
+	mli->mli_qri = maxdelay;
+
+	MLD_PRINTF(("%s: qrv %d qi %d maxdelay %d\n", __func__, qrv, qqi,
+	    maxdelay));
+
+	if (is_general_query) {
+		/*
+		 * MLDv2 General Query.
+		 *
+		 * Schedule a current-state report on this ifp for
+		 * all groups, possibly containing source lists.
+		 *
+		 * If there is a pending General Query response
+		 * scheduled earlier than the selected delay, do
+		 * not schedule any other reports.
+		 * Otherwise, reset the interface timer.
+		 */
+		MLD_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
+		    ifp, ifp->if_name, ifp->if_unit));
+		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
+			mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
+			interface_timers_running6 = 1;
+		}
+		MLI_UNLOCK(mli);
+	} else {
+		MLI_UNLOCK(mli);
+		/*
+		 * MLDv2 Group-specific or Group-and-source-specific Query.
+		 *
+		 * Group-source-specific queries are throttled on
+		 * a per-group basis to defeat denial-of-service attempts.
+		 * Queries for groups we are not a member of on this
+		 * link are simply ignored.
+		 */
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
+		in6_multihead_lock_done();
+		if (inm == NULL)
+			return (0);
+
+		IN6M_LOCK(inm);
+#ifndef __APPLE__
+		/* TODO: need ratecheck equivalent */
+		if (nsrc > 0) {
+			if (!ratecheck(&inm->in6m_lastgsrtv,
+			    &mld_gsrdelay)) {
+				MLD_PRINTF(("%s: GS query throttled.\n",
+				    __func__));
+				IN6M_UNLOCK(inm);
+				IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
+				return (0);
+			}
+		}
+#endif
+		MLD_PRINTF(("process v2 group query on ifp %p(%s%d)\n",
+		     ifp, ifp->if_name, ifp->if_unit));
+		/*
+		 * If there is a pending General Query response
+		 * scheduled sooner than the selected delay, no
+		 * further report need be scheduled.
+		 * Otherwise, prepare to respond to the
+		 * group-specific or group-and-source query.
+		 */
+		MLI_LOCK(mli);
+		if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
+			MLI_UNLOCK(mli);
+			mld_v2_process_group_query(inm, timer, m, off);
+		} else {
+			MLI_UNLOCK(mli);
+		}
+		IN6M_UNLOCK(inm);
+		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
+		/* XXX Clear embedded scope ID as userland won't expect it. */
+		in6_clearscope(&mld->mld_addr);
+	}
+
+	return (0);
+}
+
+/*
+ * Process a recieved MLDv2 group-specific or group-and-source-specific
+ * query.
+ * Return <0 if any error occured. Currently this is ignored.
+ */
+static int
+mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
+    const int off)
+{
+	struct mldv2_query	*mld;
+	int			 retval;
+	uint16_t		 nsrc;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	retval = 0;
+	mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
+
+	switch (inm->in6m_state) {
+	case MLD_NOT_MEMBER:
+	case MLD_SILENT_MEMBER:
+	case MLD_SLEEPING_MEMBER:
+	case MLD_LAZY_MEMBER:
+	case MLD_AWAKENING_MEMBER:
+	case MLD_IDLE_MEMBER:
+	case MLD_LEAVING_MEMBER:
+		return (retval);
+		break;
+	case MLD_REPORTING_MEMBER:
+	case MLD_G_QUERY_PENDING_MEMBER:
+	case MLD_SG_QUERY_PENDING_MEMBER:
+		break;
+	}
+
+	nsrc = ntohs(mld->mld_numsrc);
+
+	/*
+	 * Deal with group-specific queries upfront.
+	 * If any group query is already pending, purge any recorded
+	 * source-list state if it exists, and schedule a query response
+	 * for this group-specific query.
+	 */
+	if (nsrc == 0) {
+		if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
+		    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
+			in6m_clear_recorded(inm);
+			timer = min(inm->in6m_timer, timer);
+		}
+		inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
+		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
+		current_state_timers_running6 = 1;
+		return (retval);
+	}
+
+	/*
+	 * Deal with the case where a group-and-source-specific query has
+	 * been received but a group-specific query is already pending.
+	 */
+	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
+		timer = min(inm->in6m_timer, timer);
+		inm->in6m_timer = MLD_RANDOM_DELAY(timer);
+		current_state_timers_running6 = 1;
+		return (retval);
+	}
+
+	/*
+	 * Finally, deal with the case where a group-and-source-specific
+	 * query has been received, where a response to a previous g-s-r
+	 * query exists, or none exists.
+	 * In this case, we need to parse the source-list which the Querier
+	 * has provided us with and check if we have any source list filter
+	 * entries at T1 for these sources. If we do not, there is no need
+	 * schedule a report and the query may be dropped.
+	 * If we do, we must record them and schedule a current-state
+	 * report for those sources.
+	 */
+	if (inm->in6m_nsrc > 0) {
+		struct mbuf		*m;
+		uint8_t			*sp;
+		int			 i, nrecorded;
+		int			 soff;
+
+		m = m0;
+		soff = off + sizeof(struct mldv2_query);
+		nrecorded = 0;
+		for (i = 0; i < nsrc; i++) {
+			sp = mtod(m, uint8_t *) + soff;
+			retval = in6m_record_source(inm,
+			    (const struct in6_addr *)sp);
+			if (retval < 0)
+				break;
+			nrecorded += retval;
+			soff += sizeof(struct in6_addr);
+			if (soff >= m->m_len) {
+				soff = soff - m->m_len;
+				m = m->m_next;
+				if (m == NULL)
+					break;
+			}
+		}
+		if (nrecorded > 0) {
+			MLD_PRINTF(( "%s: schedule response to SG query\n",
+			    __func__));
+			inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
+			inm->in6m_timer = MLD_RANDOM_DELAY(timer);
+			current_state_timers_running6 = 1;
+		}
+	}
+
+	return (retval);
+}
+
+/*
+ * Process a received MLDv1 host membership report.
+ * Assumes mld points to mld_hdr in pulled up mbuf chain.
+ *
+ * NOTE: Can't be fully const correct as we temporarily embed scope ID in
+ * mld_addr. This is OK as we own the mbuf chain.
+ */
+static int
+mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
+    /*const*/ struct mld_hdr *mld)
+{
+	struct in6_addr		 src, dst;
+	struct in6_ifaddr	*ia;
+	struct in6_multi	*inm;
+
+	if (!mld_v1enable) {
+		MLD_PRINTF(("ignore v1 report %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&mld->mld_addr),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (0);
+	}
+
+	if (ifp->if_flags & IFF_LOOPBACK)
+		return (0);
+
+	/*
+	 * MLDv1 reports must originate from a host's link-local address,
+	 * or the unspecified address (when booting).
+	 */
+	src = ip6->ip6_src;
+	in6_clearscope(&src);
+	if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
+		MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&ip6->ip6_src),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (EINVAL);
+	}
+
+	/*
+	 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
+	 * group, and must be directed to the group itself.
+	 */
+	dst = ip6->ip6_dst;
+	in6_clearscope(&dst);
+	if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
+	    !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
+		MLD_PRINTF(("ignore v1 query dst %s on ifp %p(%s%d)\n",
+		    ip6_sprintf(&ip6->ip6_dst),
+		    ifp, ifp->if_name, ifp->if_unit));
+		return (EINVAL);
+	}
+
+	/*
+	 * Make sure we don't hear our own membership report, as fast
+	 * leave requires knowing that we are the only member of a
+	 * group. Assume we used the link-local address if available,
+	 * otherwise look for ::.
+	 *
+	 * XXX Note that scope ID comparison is needed for the address
+	 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
+	 * performed for the on-wire address.
+	 */
+	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
+	if (ia != NULL) {
+		IFA_LOCK(&ia->ia_ifa);
+		if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
+			IFA_UNLOCK(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
+			return (0);
+		}
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
+	} else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
+		return (0);
+	}
+
+	MLD_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
+	    ip6_sprintf(&mld->mld_addr), ifp, ifp->if_name, ifp->if_unit));
+
+	/*
+	 * Embed scope ID of receiving interface in MLD query for lookup
+	 * whilst we don't hold other locks (due to KAME locking lameness).
+	 */
+	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
+		in6_setscope(&mld->mld_addr, ifp, NULL);
+
+	/*
+	 * MLDv1 report suppression.
+	 * If we are a member of this group, and our membership should be
+	 * reported, and our group timer is pending or about to be reset,
+	 * stop our group timer by transitioning to the 'lazy' state.
+	 */
+	in6_multihead_lock_shared();
+	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
+	in6_multihead_lock_done();
+
+	if (inm != NULL) {
+		struct mld_ifinfo *mli;
+
+		IN6M_LOCK(inm);
+		mli = inm->in6m_mli;
+		VERIFY(mli != NULL);
+
+		MLI_LOCK(mli);
+		/*
+		 * If we are in MLDv2 host mode, do not allow the
+		 * other host's MLDv1 report to suppress our reports.
+		 */
+		if (mli->mli_version == MLD_VERSION_2) {
+			MLI_UNLOCK(mli);
+			IN6M_UNLOCK(inm);
+			IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
+			goto out;
+		}
+		MLI_UNLOCK(mli);
+
+		inm->in6m_timer = 0;
+
+		switch (inm->in6m_state) {
+		case MLD_NOT_MEMBER:
+		case MLD_SILENT_MEMBER:
+		case MLD_SLEEPING_MEMBER:
+			break;
+		case MLD_REPORTING_MEMBER:
+		case MLD_IDLE_MEMBER:
+		case MLD_AWAKENING_MEMBER:
+			MLD_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
+			    ip6_sprintf(&mld->mld_addr),
+			    ifp, ifp->if_name, ifp->if_unit));
+		case MLD_LAZY_MEMBER:
+			inm->in6m_state = MLD_LAZY_MEMBER;
+			break;
+		case MLD_G_QUERY_PENDING_MEMBER:
+		case MLD_SG_QUERY_PENDING_MEMBER:
+		case MLD_LEAVING_MEMBER:
+			break;
+		}
+		IN6M_UNLOCK(inm);
+		IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
+	}
+
+out:
+	/* XXX Clear embedded scope ID as userland won't expect it. */
+	in6_clearscope(&mld->mld_addr);
+
+	return (0);
+}
+
+/*
+ * MLD input path.
+ *
+ * Assume query messages which fit in a single ICMPv6 message header
+ * have been pulled up.
+ * Assume that userland will want to see the message, even if it
+ * otherwise fails kernel input validation; do not free it.
+ * Pullup may however free the mbuf chain m if it fails.
+ *
+ * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
+ */
+int
+mld_input(struct mbuf *m, int off, int icmp6len)
+{
+	struct ifnet	*ifp;
+	struct ip6_hdr	*ip6;
+	struct mld_hdr	*mld;
+	int		 mldlen;
+
+	MLD_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
+
+	ifp = m->m_pkthdr.rcvif;
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	/* Pullup to appropriate size. */
+	mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
+	if (mld->mld_type == MLD_LISTENER_QUERY &&
+	    icmp6len >= sizeof(struct mldv2_query)) {
+		mldlen = sizeof(struct mldv2_query);
+	} else {
+		mldlen = sizeof(struct mld_hdr);
+	}
+	IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
+	if (mld == NULL) {
+		icmp6stat.icp6s_badlen++;
+		return (IPPROTO_DONE);
+	}
+
+	/*
+	 * Userland needs to see all of this traffic for implementing
+	 * the endpoint discovery portion of multicast routing.
+	 */
+	switch (mld->mld_type) {
+	case MLD_LISTENER_QUERY:
+		icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
+		if (icmp6len == sizeof(struct mld_hdr)) {
+			if (mld_v1_input_query(ifp, ip6, mld) != 0)
+				return (0);
+		} else if (icmp6len >= sizeof(struct mldv2_query)) {
+			if (mld_v2_input_query(ifp, ip6, m, off,
+			    icmp6len) != 0)
+				return (0);
+		}
+		break;
+	case MLD_LISTENER_REPORT:
+		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
+		if (mld_v1_input_report(ifp, ip6, mld) != 0)
+			return (0);
+		break;
+	case MLDV2_LISTENER_REPORT:
+		icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
+		break;
+	case MLD_LISTENER_DONE:
+		icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * MLD6 slowtimo handler.
+ * Combiles both the slow and fast timer into one. We loose some responsivness but
+ * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
+ */
+void
+mld_slowtimo(void)
+{
+	struct ifqueue		 scq;	/* State-change packets */
+	struct ifqueue		 qrq;	/* Query response packets */
+	struct ifnet		*ifp;
+	struct mld_ifinfo	*mli;
+	struct in6_multi	*inm;
+	int			 uri_fasthz = 0;
+
+	MLD_LOCK();
+
+	LIST_FOREACH(mli, &mli_head, mli_link) {
+		MLI_LOCK(mli);
+		mld_v1_process_querier_timers(mli);
+		MLI_UNLOCK(mli);
+	}
+
+	/*
+	 * Quick check to see if any work needs to be done, in order to
+	 * minimize the overhead of fasttimo processing.
+	 */
+	if (!current_state_timers_running6 &&
+	    !interface_timers_running6 &&
+	    !state_change_timers_running6) {
+		MLD_UNLOCK();
+		return;
+	}
+
+	/*
+	 * MLDv2 General Query response timer processing.
+	 */
+	if (interface_timers_running6) {
+#if 0
+		MLD_PRINTF(("%s: interface timers running\n", __func__));
+#endif
+		interface_timers_running6 = 0;
+		LIST_FOREACH(mli, &mli_head, mli_link) {
+			MLI_LOCK(mli);
+			if (mli->mli_v2_timer == 0) {
+				/* Do nothing. */
+			} else if (--mli->mli_v2_timer == 0) {
+				mld_v2_dispatch_general_query(mli);
+			} else {
+				interface_timers_running6 = 1;
+			}
+			MLI_UNLOCK(mli);
+		}
+	}
+
+	if (!current_state_timers_running6 &&
+	    !state_change_timers_running6)
+		goto out_locked;
+
+	current_state_timers_running6 = 0;
+	state_change_timers_running6 = 0;
+#if 0
+	MLD_PRINTF(("%s: state change timers running\n", __func__));
+#endif
+
+	memset(&qrq, 0, sizeof(struct ifqueue));
+	qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
+
+	memset(&scq, 0, sizeof(struct ifqueue));
+	scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
+
+	/*
+	 * MLD host report and state-change timer processing.
+	 * Note: Processing a v2 group timer may remove a node.
+	 */
+	LIST_FOREACH(mli, &mli_head, mli_link) {
+		struct in6_multistep step;
+
+		MLI_LOCK(mli);
+		ifp = mli->mli_ifp;
+		uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_SLOWHZ);
+		MLI_UNLOCK(mli);
+
+		in6_multihead_lock_shared();
+		IN6_FIRST_MULTI(step, inm);
+		while (inm != NULL) {
+			IN6M_LOCK(inm);
+			if (inm->in6m_ifp != ifp)
+				goto next;
+
+			MLI_LOCK(mli);
+			switch (mli->mli_version) {
+			case MLD_VERSION_1:
+				mld_v1_process_group_timer(inm,
+				    mli->mli_version);
+				break;
+			case MLD_VERSION_2:
+				mld_v2_process_group_timers(mli, &qrq,
+				    &scq, inm, uri_fasthz);
+				break;
+			}
+			MLI_UNLOCK(mli);
+next:
+			IN6M_UNLOCK(inm);
+			IN6_NEXT_MULTI(step, inm);
+		}
+		in6_multihead_lock_done();
+
+		MLI_LOCK(mli);
+		if (mli->mli_version == MLD_VERSION_1) {
+			mld_dispatch_queue(mli, &mli->mli_v1q, 0);
+		} else if (mli->mli_version == MLD_VERSION_2) {
+			MLI_UNLOCK(mli);
+			mld_dispatch_queue(NULL, &qrq, 0);
+			mld_dispatch_queue(NULL, &scq, 0);
+			VERIFY(qrq.ifq_len == 0);
+			VERIFY(scq.ifq_len == 0);
+			MLI_LOCK(mli);
+		}
+		/*
+		 * In case there are still any pending membership reports
+		 * which didn't get drained at version change time.
+		 */
+		IF_DRAIN(&mli->mli_v1q);
+		/*
+		 * Release all deferred inm records, and drain any locally
+		 * enqueued packets; do it even if the current MLD version
+		 * for the link is no longer MLDv2, in order to handle the
+		 * version change case.
+		 */
+		mld_flush_relq(mli);
+		VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
+		MLI_UNLOCK(mli);
+
+		IF_DRAIN(&qrq);
+		IF_DRAIN(&scq);
+	}
+
+out_locked:
+	MLD_UNLOCK();
+}
+
+/*
+ * Free the in6_multi reference(s) for this MLD lifecycle.
+ *
+ * Caller must be holding mli_lock.
+ */
+static void
+mld_flush_relq(struct mld_ifinfo *mli)
+{
+	struct in6_multi *inm;
+
+again:
+	MLI_LOCK_ASSERT_HELD(mli);
+	inm = SLIST_FIRST(&mli->mli_relinmhead);
+	if (inm != NULL) {
+		int lastref;
+
+		SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
+		MLI_UNLOCK(mli);
+
+		in6_multihead_lock_exclusive();
+		IN6M_LOCK(inm);
+		VERIFY(inm->in6m_nrelecnt != 0);
+		inm->in6m_nrelecnt--;
+		lastref = in6_multi_detach(inm);
+		VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
+		    inm->in6m_reqcnt == 0));
+		IN6M_UNLOCK(inm);
+		in6_multihead_lock_done();
+		/* from mli_relinmhead */
+		IN6M_REMREF(inm);
+		/* from in6_multihead_list */
+		if (lastref)
+			IN6M_REMREF(inm);
+
+		MLI_LOCK(mli);
+		goto again;
+	}
+}
+
+/*
+ * Update host report group timer.
+ * Will update the global pending timer flags.
+ */
+static void
+mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
+{
+#pragma unused(mld_version)
+	int report_timer_expired;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
+
+	if (inm->in6m_timer == 0) {
+		report_timer_expired = 0;
+	} else if (--inm->in6m_timer == 0) {
+		report_timer_expired = 1;
+	} else {
+		current_state_timers_running6 = 1;
+		return;
+	}
+
+	switch (inm->in6m_state) {
+	case MLD_NOT_MEMBER:
+	case MLD_SILENT_MEMBER:
+	case MLD_IDLE_MEMBER:
+	case MLD_LAZY_MEMBER:
+	case MLD_SLEEPING_MEMBER:
+	case MLD_AWAKENING_MEMBER:
+		break;
+	case MLD_REPORTING_MEMBER:
+		if (report_timer_expired) {
+			inm->in6m_state = MLD_IDLE_MEMBER;
+			(void) mld_v1_transmit_report(inm,
+			     MLD_LISTENER_REPORT);
+			IN6M_LOCK_ASSERT_HELD(inm);
+			MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
+		}
+		break;
+	case MLD_G_QUERY_PENDING_MEMBER:
+	case MLD_SG_QUERY_PENDING_MEMBER:
+	case MLD_LEAVING_MEMBER:
+		break;
+	}
+}
+
+/*
+ * Update a group's timers for MLDv2.
+ * Will update the global pending timer flags.
+ * Note: Unlocked read from mli.
+ */
+static void
+mld_v2_process_group_timers(struct mld_ifinfo *mli,
+    struct ifqueue *qrq, struct ifqueue *scq,
+    struct in6_multi *inm, const int uri_fasthz)
+{
+	int query_response_timer_expired;
+	int state_change_retransmit_timer_expired;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_HELD(mli);
+	VERIFY(mli == inm->in6m_mli);
+
+	query_response_timer_expired = 0;
+	state_change_retransmit_timer_expired = 0;
+
+	/*
+	 * During a transition from compatibility mode back to MLDv2,
+	 * a group record in REPORTING state may still have its group
+	 * timer active. This is a no-op in this function; it is easier
+	 * to deal with it here than to complicate the slow-timeout path.
+	 */
+	if (inm->in6m_timer == 0) {
+		query_response_timer_expired = 0;
+	} else if (--inm->in6m_timer == 0) {
+		query_response_timer_expired = 1;
+	} else {
+		current_state_timers_running6 = 1;
+	}
+
+	if (inm->in6m_sctimer == 0) {
+		state_change_retransmit_timer_expired = 0;
+	} else if (--inm->in6m_sctimer == 0) {
+		state_change_retransmit_timer_expired = 1;
+	} else {
+		state_change_timers_running6 = 1;
+	}
+
+	/* We are in fasttimo, so be quick about it. */
+	if (!state_change_retransmit_timer_expired &&
+	    !query_response_timer_expired)
+		return;
+
+	switch (inm->in6m_state) {
+	case MLD_NOT_MEMBER:
+	case MLD_SILENT_MEMBER:
+	case MLD_SLEEPING_MEMBER:
+	case MLD_LAZY_MEMBER:
+	case MLD_AWAKENING_MEMBER:
+	case MLD_IDLE_MEMBER:
+		break;
+	case MLD_G_QUERY_PENDING_MEMBER:
+	case MLD_SG_QUERY_PENDING_MEMBER:
+		/*
+		 * Respond to a previously pending Group-Specific
+		 * or Group-and-Source-Specific query by enqueueing
+		 * the appropriate Current-State report for
+		 * immediate transmission.
+		 */
+		if (query_response_timer_expired) {
+			int retval;
+
+			retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
+			    (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
+			    0);
+			MLD_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			inm->in6m_state = MLD_REPORTING_MEMBER;
+			in6m_clear_recorded(inm);
+		}
+		/* FALLTHROUGH */
+	case MLD_REPORTING_MEMBER:
+	case MLD_LEAVING_MEMBER:
+		if (state_change_retransmit_timer_expired) {
+			/*
+			 * State-change retransmission timer fired.
+			 * If there are any further pending retransmissions,
+			 * set the global pending state-change flag, and
+			 * reset the timer.
+			 */
+			if (--inm->in6m_scrv > 0) {
+				inm->in6m_sctimer = uri_fasthz;
+				state_change_timers_running6 = 1;
+			}
+			/*
+			 * Retransmit the previously computed state-change
+			 * report. If there are no further pending
+			 * retransmissions, the mbuf queue will be consumed.
+			 * Update T0 state to T1 as we have now sent
+			 * a state-change.
+			 */
+			(void) mld_v2_merge_state_changes(inm, scq);
+
+			in6m_commit(inm);
+			MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+			    ip6_sprintf(&inm->in6m_addr),
+			    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+
+			/*
+			 * If we are leaving the group for good, make sure
+			 * we release MLD's reference to it.
+			 * This release must be deferred using a SLIST,
+			 * as we are called from a loop which traverses
+			 * the in_ifmultiaddr TAILQ.
+			 */
+			if (inm->in6m_state == MLD_LEAVING_MEMBER &&
+			    inm->in6m_scrv == 0) {
+				inm->in6m_state = MLD_NOT_MEMBER;
+				/*
+				 * A reference has already been held in
+				 * mld_final_leave() for this inm, so
+				 * no need to hold another one.  We also
+				 * bumped up its request count then, so
+				 * that it stays in in6_multihead.  Both
+				 * of them will be released when it is
+				 * dequeued later on.
+				 */
+				VERIFY(inm->in6m_nrelecnt != 0);
+				SLIST_INSERT_HEAD(&mli->mli_relinmhead,
+				    inm, in6m_nrele);
+			}
+		}
+		break;
+	}
+}
+
+/*
+ * Switch to a different version on the given interface,
+ * as per Section 9.12.
+ */
+static void
+mld_set_version(struct mld_ifinfo *mli, const int mld_version)
+{
+	int old_version_timer;
+
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	MLD_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
+	    mld_version, mli->mli_ifp, mli->mli_ifp->if_name,
+	    mli->mli_ifp->if_unit));
+
+	if (mld_version == MLD_VERSION_1) {
+		/*
+		 * Compute the "Older Version Querier Present" timer as per
+		 * Section 9.12.
+		 */
+		old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
+		old_version_timer *= PR_SLOWHZ;
+		mli->mli_v1_timer = old_version_timer;
+	}
+
+	if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
+		mli->mli_version = MLD_VERSION_1;
+		mld_v2_cancel_link_timers(mli);
+	}
+
+	MLI_LOCK_ASSERT_HELD(mli);
+}
+
+/*
+ * Cancel pending MLDv2 timers for the given link and all groups
+ * joined on it; state-change, general-query, and group-query timers.
+ */
+static void
+mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
+{
+	struct ifnet		*ifp;
+	struct in6_multi	*inm;
+	struct in6_multistep	step;
+
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	MLD_PRINTF(("%s: cancel v2 timers on ifp %p(%s%d)\n", __func__,
+	    mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
+
+	/*
+	 * Fast-track this potentially expensive operation
+	 * by checking all the global 'timer pending' flags.
+	 */
+	if (!interface_timers_running6 &&
+	    !state_change_timers_running6 &&
+	    !current_state_timers_running6)
+		return;
+
+	mli->mli_v2_timer = 0;
+	ifp = mli->mli_ifp;
+	MLI_UNLOCK(mli);
+
+	in6_multihead_lock_shared();
+	IN6_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		IN6M_LOCK(inm);
+		if (inm->in6m_ifp != ifp)
+			goto next;
+
+		switch (inm->in6m_state) {
+		case MLD_NOT_MEMBER:
+		case MLD_SILENT_MEMBER:
+		case MLD_IDLE_MEMBER:
+		case MLD_LAZY_MEMBER:
+		case MLD_SLEEPING_MEMBER:
+		case MLD_AWAKENING_MEMBER:
+			break;
+		case MLD_LEAVING_MEMBER:
+			/*
+			 * If we are leaving the group and switching
+			 * version, we need to release the final
+			 * reference held for issuing the INCLUDE {}.
+			 * During mld_final_leave(), we bumped up both the
+			 * request and reference counts.  Since we cannot
+			 * call in6_multi_detach() here, defer this task to
+			 * the timer routine.
+			 */
+			VERIFY(inm->in6m_nrelecnt != 0);
+			MLI_LOCK(mli);
+			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
+			    in6m_nrele);
+			MLI_UNLOCK(mli);
+			/* FALLTHROUGH */
+		case MLD_G_QUERY_PENDING_MEMBER:
+		case MLD_SG_QUERY_PENDING_MEMBER:
+			in6m_clear_recorded(inm);
+			/* FALLTHROUGH */
+		case MLD_REPORTING_MEMBER:
+			inm->in6m_sctimer = 0;
+			inm->in6m_timer = 0;
+			inm->in6m_state = MLD_REPORTING_MEMBER;
+			/*
+			 * Free any pending MLDv2 state-change records.
+			 */
+			IF_DRAIN(&inm->in6m_scq);
+			break;
+		}
+next:
+		IN6M_UNLOCK(inm);
+		IN6_NEXT_MULTI(step, inm);
+	}
+	in6_multihead_lock_done();
+
+	MLI_LOCK(mli);
+}
+
+/*
+ * Update the Older Version Querier Present timers for a link.
+ * See Section 9.12 of RFC 3810.
+ */
+static void
+mld_v1_process_querier_timers(struct mld_ifinfo *mli)
+{
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
+		/*
+		 * MLDv1 Querier Present timer expired; revert to MLDv2.
+		 */
+		MLD_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
+		    __func__, mli->mli_version, MLD_VERSION_2,
+		    mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
+		mli->mli_version = MLD_VERSION_2;
+	}
+}
+
+/*
+ * Transmit an MLDv1 report immediately.
+ */
+static int
+mld_v1_transmit_report(struct in6_multi *in6m, const int type)
+{
+	struct ifnet		*ifp;
+	struct in6_ifaddr	*ia;
+	struct ip6_hdr		*ip6;
+	struct mbuf		*mh, *md;
+	struct mld_hdr		*mld;
+	int			error = 0;
+
+	IN6M_LOCK_ASSERT_HELD(in6m);
+	MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
+
+	ifp = in6m->in6m_ifp;
+	/* ia may be NULL if link-local address is tentative. */
+	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
+
+	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+	if (mh == NULL) {
+		if (ia != NULL)
+			IFA_REMREF(&ia->ia_ifa);
+		return (ENOMEM);
+	}
+	MGET(md, M_DONTWAIT, MT_DATA);
+	if (md == NULL) {
+		m_free(mh);
+		if (ia != NULL)
+			IFA_REMREF(&ia->ia_ifa);
+		return (ENOMEM);
+	}
+	mh->m_next = md;
+
+	/*
+	 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
+	 * that ether_output() does not need to allocate another mbuf
+	 * for the header in the most common case.
+	 */
+	MH_ALIGN(mh, sizeof(struct ip6_hdr));
+	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
+	mh->m_len = sizeof(struct ip6_hdr);
+
+	ip6 = mtod(mh, struct ip6_hdr *);
+	ip6->ip6_flow = 0;
+	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+	ip6->ip6_vfc |= IPV6_VERSION;
+	ip6->ip6_nxt = IPPROTO_ICMPV6;
+	if (ia != NULL)
+		IFA_LOCK(&ia->ia_ifa);
+	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
+	if (ia != NULL) {
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
+		ia = NULL;
+	}
+	ip6->ip6_dst = in6m->in6m_addr;
+
+	md->m_len = sizeof(struct mld_hdr);
+	mld = mtod(md, struct mld_hdr *);
+	mld->mld_type = type;
+	mld->mld_code = 0;
+	mld->mld_cksum = 0;
+	mld->mld_maxdelay = 0;
+	mld->mld_reserved = 0;
+	mld->mld_addr = in6m->in6m_addr;
+	in6_clearscope(&mld->mld_addr);
+	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
+	    sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
+
+	mh->m_flags |= M_MLDV1;
+
+	
+	/*
+	 * Due to the fact that at this point we are possibly holding
+	 * in6_multihead_lock in shared or exclusive mode, we can't call
+	 * mld_dispatch_packet() here since that will eventually call
+	 * ip6_output(), which will try to lock in6_multihead_lock and cause
+	 * a deadlock.
+	 * Instead we defer the work to the mld_slowtimo() thread, thus
+	 * avoiding unlocking in_multihead_lock here.
+	 */
+        if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
+                MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
+                error = ENOMEM;
+                m_freem(mh);
+        } else
+                IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
+
+	return (error);
+}
+
+/*
+ * Process a state change from the upper layer for the given IPv6 group.
+ *
+ * Each socket holds a reference on the in6_multi in its own ip_moptions.
+ * The socket layer will have made the necessary updates to.the group
+ * state, it is now up to MLD to issue a state change report if there
+ * has been any change between T0 (when the last state-change was issued)
+ * and T1 (now).
+ *
+ * We use the MLDv2 state machine at group level. The MLd module
+ * however makes the decision as to which MLD protocol version to speak.
+ * A state change *from* INCLUDE {} always means an initial join.
+ * A state change *to* INCLUDE {} always means a final leave.
+ *
+ * If delay is non-zero, and the state change is an initial multicast
+ * join, the state change report will be delayed by 'delay' ticks
+ * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise
+ * the initial MLDv2 state change report will be delayed by whichever
+ * is sooner, a pending state-change timer or delay itself.
+ */
+int
+mld_change_state(struct in6_multi *inm, const int delay)
+{
+	struct mld_ifinfo *mli;
+	struct ifnet *ifp;
+	int error = 0;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	VERIFY(inm->in6m_mli != NULL);
+	MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
+
+	/*
+	 * Try to detect if the upper layer just asked us to change state
+	 * for an interface which has now gone away.
+	 */
+	VERIFY(inm->in6m_ifma != NULL);
+	ifp = inm->in6m_ifma->ifma_ifp;
+	/*
+	 * Sanity check that netinet6's notion of ifp is the same as net's.
+	 */
+	VERIFY(inm->in6m_ifp == ifp);
+
+	mli = MLD_IFINFO(ifp);
+	VERIFY(mli != NULL);
+
+	/*
+	 * If we detect a state transition to or from MCAST_UNDEFINED
+	 * for this group, then we are starting or finishing an MLD
+	 * life cycle for this group.
+	 */
+	if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
+		MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
+		    inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
+		if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
+			MLD_PRINTF(("%s: initial join\n", __func__));
+			error = mld_initial_join(inm, mli, delay);
+			goto out;
+		} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
+			MLD_PRINTF(("%s: final leave\n", __func__));
+			mld_final_leave(inm, mli);
+			goto out;
+		}
+	} else {
+		MLD_PRINTF(("%s: filter set change\n", __func__));
+	}
+
+	error = mld_handle_state_change(inm, mli);
+
+out:
+	return (error);
+}
+
+/*
+ * Perform the initial join for an MLD group.
+ *
+ * When joining a group:
+ *  If the group should have its MLD traffic suppressed, do nothing.
+ *  MLDv1 starts sending MLDv1 host membership reports.
+ *  MLDv2 will schedule an MLDv2 state-change report containing the
+ *  initial state of the membership.
+ *
+ * If the delay argument is non-zero, then we must delay sending the
+ * initial state change for delay ticks (in units of PR_FASTHZ).
+ */
+static int
+mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
+    const int delay)
+{
+	struct ifnet		*ifp;
+	struct ifqueue		*ifq;
+	int			 error, retval, syncstates;
+	int			 odelay;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_NOTHELD(mli);
+
+	MLD_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
+	    __func__, ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+
+	error = 0;
+	syncstates = 1;
+
+	ifp = inm->in6m_ifp;
+
+	MLI_LOCK(mli);
+	VERIFY(mli->mli_ifp == ifp);
+
+	/*
+	 * Groups joined on loopback or marked as 'not reported',
+	 * enter the MLD_SILENT_MEMBER state and
+	 * are never reported in any protocol exchanges.
+	 * All other groups enter the appropriate state machine
+	 * for the version in use on this link.
+	 * A link marked as MLIF_SILENT causes MLD to be completely
+	 * disabled for the link.
+	 */
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (mli->mli_flags & MLIF_SILENT) ||
+	    !mld_is_addr_reported(&inm->in6m_addr)) {
+		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
+		    __func__));
+		inm->in6m_state = MLD_SILENT_MEMBER;
+		inm->in6m_timer = 0;
+	} else {
+		/*
+		 * Deal with overlapping in6_multi lifecycle.
+		 * If this group was LEAVING, then make sure
+		 * we drop the reference we picked up to keep the
+		 * group around for the final INCLUDE {} enqueue.
+		 * Since we cannot call in6_multi_detach() here,
+		 * defer this task to the timer routine.
+		 */
+		if (mli->mli_version == MLD_VERSION_2 &&
+		    inm->in6m_state == MLD_LEAVING_MEMBER) {
+			VERIFY(inm->in6m_nrelecnt != 0);
+			SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
+			    in6m_nrele);
+		}
+
+		inm->in6m_state = MLD_REPORTING_MEMBER;
+
+		switch (mli->mli_version) {
+		case MLD_VERSION_1:
+			/*
+			 * If a delay was provided, only use it if
+			 * it is greater than the delay normally
+			 * used for an MLDv1 state change report,
+			 * and delay sending the initial MLDv1 report
+			 * by not transitioning to the IDLE state.
+			 */
+			odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_SLOWHZ);
+			if (delay) {
+				inm->in6m_timer = max(delay, odelay);
+				current_state_timers_running6 = 1;
+			} else {
+				inm->in6m_state = MLD_IDLE_MEMBER;
+				error = mld_v1_transmit_report(inm,
+				     MLD_LISTENER_REPORT);
+
+				IN6M_LOCK_ASSERT_HELD(inm);
+				MLI_LOCK_ASSERT_HELD(mli);
+
+				if (error == 0) {
+					inm->in6m_timer = odelay;
+					current_state_timers_running6 = 1;
+				}
+			}
+			break;
+
+		case MLD_VERSION_2:
+			/*
+			 * Defer update of T0 to T1, until the first copy
+			 * of the state change has been transmitted.
+			 */
+			syncstates = 0;
+
+			/*
+			 * Immediately enqueue a State-Change Report for
+			 * this interface, freeing any previous reports.
+			 * Don't kick the timers if there is nothing to do,
+			 * or if an error occurred.
+			 */
+			ifq = &inm->in6m_scq;
+			IF_DRAIN(ifq);
+			retval = mld_v2_enqueue_group_record(ifq, inm, 1,
+			    0, 0, (mli->mli_flags & MLIF_USEALLOW));
+			MLD_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			if (retval <= 0) {
+				error = retval * -1;
+				break;
+			}
+
+			/*
+			 * Schedule transmission of pending state-change
+			 * report up to RV times for this link. The timer
+			 * will fire at the next mld_fasttimo (~200ms),
+			 * giving us an opportunity to merge the reports.
+			 *
+			 * If a delay was provided to this function, only
+			 * use this delay if sooner than the existing one.
+			 */
+			VERIFY(mli->mli_rv > 1);
+			inm->in6m_scrv = mli->mli_rv;
+			if (delay) {
+				if (inm->in6m_sctimer > 1) {
+					inm->in6m_sctimer =
+					    min(inm->in6m_sctimer, delay);
+				} else
+					inm->in6m_sctimer = delay;
+			} else
+				inm->in6m_sctimer = 1;
+			state_change_timers_running6 = 1;
+
+			error = 0;
+			break;
+		}
+	}
+	MLI_UNLOCK(mli);
+
+	/*
+	 * Only update the T0 state if state change is atomic,
+	 * i.e. we don't need to wait for a timer to fire before we
+	 * can consider the state change to have been communicated.
+	 */
+	if (syncstates) {
+		in6m_commit(inm);
+		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+		    ip6_sprintf(&inm->in6m_addr),
+		    inm->in6m_ifp->if_name, ifp->if_unit));
+	}
+
+	return (error);
+}
+
+/*
+ * Issue an intermediate state change during the life-cycle.
+ */
+static int
+mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
+{
+	struct ifnet		*ifp;
+	int			 retval;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_NOTHELD(mli);
+
+	MLD_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
+	    __func__, ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+
+	ifp = inm->in6m_ifp;
+
+	MLI_LOCK(mli);
+	VERIFY(mli->mli_ifp == ifp);
+
+	if ((ifp->if_flags & IFF_LOOPBACK) ||
+	    (mli->mli_flags & MLIF_SILENT) ||
+	    !mld_is_addr_reported(&inm->in6m_addr) ||
+	    (mli->mli_version != MLD_VERSION_2)) {
+		MLI_UNLOCK(mli);
+		if (!mld_is_addr_reported(&inm->in6m_addr)) {
+			MLD_PRINTF(("%s: not kicking state machine for silent "
+			    "group\n", __func__));
+		}
+		MLD_PRINTF(("%s: nothing to do\n", __func__));
+		in6m_commit(inm);
+		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+		    ip6_sprintf(&inm->in6m_addr),
+		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+		return (0);
+	}
+
+	IF_DRAIN(&inm->in6m_scq);
+
+	retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
+	    (mli->mli_flags & MLIF_USEALLOW));
+	MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
+	if (retval <= 0) {
+		MLI_UNLOCK(mli);
+		return (-retval);
+	}
+	/*
+	 * If record(s) were enqueued, start the state-change
+	 * report timer for this group.
+	 */
+	inm->in6m_scrv = mli->mli_rv;
+	inm->in6m_sctimer = 1;
+	state_change_timers_running6 = 1;
+	MLI_UNLOCK(mli);
+
+	return (0);
+}
+
+/*
+ * Perform the final leave for a multicast address.
+ *
+ * When leaving a group:
+ *  MLDv1 sends a DONE message, if and only if we are the reporter.
+ *  MLDv2 enqueues a state-change report containing a transition
+ *  to INCLUDE {} for immediate transmission.
+ */
+static void
+mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
+{
+	int syncstates = 1;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_NOTHELD(mli);
+
+	MLD_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
+	    __func__, ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+
+	switch (inm->in6m_state) {
+	case MLD_NOT_MEMBER:
+	case MLD_SILENT_MEMBER:
+	case MLD_LEAVING_MEMBER:
+		/* Already leaving or left; do nothing. */
+		MLD_PRINTF(("%s: not kicking state machine for silent group\n",
+		    __func__));
+		break;
+	case MLD_REPORTING_MEMBER:
+	case MLD_IDLE_MEMBER:
+	case MLD_G_QUERY_PENDING_MEMBER:
+	case MLD_SG_QUERY_PENDING_MEMBER:
+		MLI_LOCK(mli);
+		if (mli->mli_version == MLD_VERSION_1) {
+			if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
+			    inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
+				panic("%s: MLDv2 state reached, not MLDv2 "
+				    "mode\n", __func__);
+				/* NOTREACHED */
+			}
+			mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
+
+			IN6M_LOCK_ASSERT_HELD(inm);
+			MLI_LOCK_ASSERT_HELD(mli);
+
+			inm->in6m_state = MLD_NOT_MEMBER;
+		} else if (mli->mli_version == MLD_VERSION_2) {
+			/*
+			 * Stop group timer and all pending reports.
+			 * Immediately enqueue a state-change report
+			 * TO_IN {} to be sent on the next fast timeout,
+			 * giving us an opportunity to merge reports.
+			 */
+			IF_DRAIN(&inm->in6m_scq);
+			inm->in6m_timer = 0;
+			inm->in6m_scrv = mli->mli_rv;
+			MLD_PRINTF(("%s: Leaving %s/%s%d with %d "
+			    "pending retransmissions.\n", __func__,
+			    ip6_sprintf(&inm->in6m_addr),
+			    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit,
+			    inm->in6m_scrv));
+			if (inm->in6m_scrv == 0) {
+				inm->in6m_state = MLD_NOT_MEMBER;
+				inm->in6m_sctimer = 0;
+			} else {
+				int retval;
+				/*
+				 * Stick around in the in6_multihead list;
+				 * the final detach will be issued by
+				 * mld_v2_process_group_timers() when
+				 * the retransmit timer expires.
+				 */
+				IN6M_ADDREF_LOCKED(inm);
+				VERIFY(inm->in6m_debug & IFD_ATTACHED);
+				inm->in6m_reqcnt++;
+				VERIFY(inm->in6m_reqcnt >= 1);
+				inm->in6m_nrelecnt++;
+				VERIFY(inm->in6m_nrelecnt != 0);
+
+				retval = mld_v2_enqueue_group_record(
+				    &inm->in6m_scq, inm, 1, 0, 0,
+				    (mli->mli_flags & MLIF_USEALLOW));
+				KASSERT(retval != 0,
+				    ("%s: enqueue record = %d\n", __func__,
+				     retval));
+
+				inm->in6m_state = MLD_LEAVING_MEMBER;
+				inm->in6m_sctimer = 1;
+				state_change_timers_running6 = 1;
+				syncstates = 0;
+			}
+		}
+		MLI_UNLOCK(mli);
+		break;
+	case MLD_LAZY_MEMBER:
+	case MLD_SLEEPING_MEMBER:
+	case MLD_AWAKENING_MEMBER:
+		/* Our reports are suppressed; do nothing. */
+		break;
+	}
+
+	if (syncstates) {
+		in6m_commit(inm);
+		MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
+		    ip6_sprintf(&inm->in6m_addr),
+		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+		inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
+		MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for %p/%s%d\n",
+		    __func__, &inm->in6m_addr, inm->in6m_ifp->if_name,
+		    inm->in6m_ifp->if_unit));
+	}
+}
+
+/*
+ * Enqueue an MLDv2 group record to the given output queue.
+ *
+ * If is_state_change is zero, a current-state record is appended.
+ * If is_state_change is non-zero, a state-change report is appended.
+ *
+ * If is_group_query is non-zero, an mbuf packet chain is allocated.
+ * If is_group_query is zero, and if there is a packet with free space
+ * at the tail of the queue, it will be appended to providing there
+ * is enough free space.
+ * Otherwise a new mbuf packet chain is allocated.
+ *
+ * If is_source_query is non-zero, each source is checked to see if
+ * it was recorded for a Group-Source query, and will be omitted if
+ * it is not both in-mode and recorded.
+ *
+ * If use_block_allow is non-zero, state change reports for initial join
+ * and final leave, on an inclusive mode group with a source list, will be
+ * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
+ *
+ * The function will attempt to allocate leading space in the packet
+ * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
+    const int is_state_change, const int is_group_query,
+    const int is_source_query, const int use_block_allow)
+{
+	struct mldv2_record	 mr;
+	struct mldv2_record	*pmr;
+	struct ifnet		*ifp;
+	struct ip6_msource	*ims, *nims;
+	struct mbuf		*m0, *m, *md;
+	int			 error, is_filter_list_change;
+	int			 minrec0len, m0srcs, msrcs, nbytes, off;
+	int			 record_has_sources;
+	int			 now;
+	int			 type;
+	uint8_t			 mode;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
+
+	error = 0;
+	ifp = inm->in6m_ifp;
+	is_filter_list_change = 0;
+	m = NULL;
+	m0 = NULL;
+	m0srcs = 0;
+	msrcs = 0;
+	nbytes = 0;
+	nims = NULL;
+	record_has_sources = 1;
+	pmr = NULL;
+	type = MLD_DO_NOTHING;
+	mode = inm->in6m_st[1].iss_fmode;
+
+	/*
+	 * If we did not transition out of ASM mode during t0->t1,
+	 * and there are no source nodes to process, we can skip
+	 * the generation of source records.
+	 */
+	if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
+	    inm->in6m_nsrc == 0)
+		record_has_sources = 0;
+
+	if (is_state_change) {
+		/*
+		 * Queue a state change record.
+		 * If the mode did not change, and there are non-ASM
+		 * listeners or source filters present,
+		 * we potentially need to issue two records for the group.
+		 * If there are ASM listeners, and there was no filter
+		 * mode transition of any kind, do nothing.
+		 *
+		 * If we are transitioning to MCAST_UNDEFINED, we need
+		 * not send any sources. A transition to/from this state is
+		 * considered inclusive with some special treatment.
+		 *
+		 * If we are rewriting initial joins/leaves to use
+		 * ALLOW/BLOCK, and the group's membership is inclusive,
+		 * we need to send sources in all cases.
+		 */
+		if (mode != inm->in6m_st[0].iss_fmode) {
+			if (mode == MCAST_EXCLUDE) {
+				MLD_PRINTF(("%s: change to EXCLUDE\n",
+				    __func__));
+				type = MLD_CHANGE_TO_EXCLUDE_MODE;
+			} else {
+				MLD_PRINTF(("%s: change to INCLUDE\n",
+				    __func__));
+				if (use_block_allow) {
+					/*
+					 * XXX
+					 * Here we're interested in state
+					 * edges either direction between
+					 * MCAST_UNDEFINED and MCAST_INCLUDE.
+					 * Perhaps we should just check
+					 * the group state, rather than
+					 * the filter mode.
+					 */
+					if (mode == MCAST_UNDEFINED) {
+						type = MLD_BLOCK_OLD_SOURCES;
+					} else {
+						type = MLD_ALLOW_NEW_SOURCES;
+					}
+				} else {
+					type = MLD_CHANGE_TO_INCLUDE_MODE;
+					if (mode == MCAST_UNDEFINED)
+						record_has_sources = 0;
+				}
+			}
+		} else {
+			if (record_has_sources) {
+				is_filter_list_change = 1;
+			} else {
+				type = MLD_DO_NOTHING;
+			}
+		}
+	} else {
+		/*
+		 * Queue a current state record.
+		 */
+		if (mode == MCAST_EXCLUDE) {
+			type = MLD_MODE_IS_EXCLUDE;
+		} else if (mode == MCAST_INCLUDE) {
+			type = MLD_MODE_IS_INCLUDE;
+			VERIFY(inm->in6m_st[1].iss_asm == 0);
+		}
+	}
+
+	/*
+	 * Generate the filter list changes using a separate function.
+	 */
+	if (is_filter_list_change)
+		return (mld_v2_enqueue_filter_change(ifq, inm));
+
+	if (type == MLD_DO_NOTHING) {
+		MLD_PRINTF(("%s: nothing to do for %s/%s%d\n",
+		    __func__, ip6_sprintf(&inm->in6m_addr),
+		    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+		return (0);
+	}
+
+	/*
+	 * If any sources are present, we must be able to fit at least
+	 * one in the trailing space of the tail packet's mbuf,
+	 * ideally more.
+	 */
+	minrec0len = sizeof(struct mldv2_record);
+	if (record_has_sources)
+		minrec0len += sizeof(struct in6_addr);
+	MLD_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
+	    mld_rec_type_to_str(type),
+	    ip6_sprintf(&inm->in6m_addr),
+	    inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
+
+	/*
+	 * Check if we have a packet in the tail of the queue for this
+	 * group into which the first group record for this group will fit.
+	 * Otherwise allocate a new packet.
+	 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
+	 * Note: Group records for G/GSR query responses MUST be sent
+	 * in their own packet.
+	 */
+	m0 = ifq->ifq_tail;
+	if (!is_group_query &&
+	    m0 != NULL &&
+	    (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
+	    (m0->m_pkthdr.len + minrec0len) <
+	     (ifp->if_mtu - MLD_MTUSPACE)) {
+		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+			    sizeof(struct mldv2_record)) /
+			    sizeof(struct in6_addr);
+		m = m0;
+		MLD_PRINTF(("%s: use existing packet\n", __func__));
+	} else {
+		if (IF_QFULL(ifq)) {
+			MLD_PRINTF(("%s: outbound queue full\n", __func__));
+			return (-ENOMEM);
+		}
+		m = NULL;
+		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
+		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
+		if (!is_state_change && !is_group_query)
+			m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		if (m == NULL)
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+		if (m == NULL)
+			return (-ENOMEM);
+
+		MLD_PRINTF(("%s: allocated first packet\n", __func__));
+	}
+
+	/*
+	 * Append group record.
+	 * If we have sources, we don't know how many yet.
+	 */
+	mr.mr_type = type;
+	mr.mr_datalen = 0;
+	mr.mr_numsrc = 0;
+	mr.mr_addr = inm->in6m_addr;
+	in6_clearscope(&mr.mr_addr);
+	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
+		if (m != m0)
+			m_freem(m);
+		MLD_PRINTF(("%s: m_append() failed.\n", __func__));
+		return (-ENOMEM);
+	}
+	nbytes += sizeof(struct mldv2_record);
+
+	/*
+	 * Append as many sources as will fit in the first packet.
+	 * If we are appending to a new packet, the chain allocation
+	 * may potentially use clusters; use m_getptr() in this case.
+	 * If we are appending to an existing packet, we need to obtain
+	 * a pointer to the group record after m_append(), in case a new
+	 * mbuf was allocated.
+	 *
+	 * Only append sources which are in-mode at t1. If we are
+	 * transitioning to MCAST_UNDEFINED state on the group, and
+	 * use_block_allow is zero, do not include source entries.
+	 * Otherwise, we need to include this source in the report.
+	 *
+	 * Only report recorded sources in our filter set when responding
+	 * to a group-source query.
+	 */
+	if (record_has_sources) {
+		if (m == m0) {
+			md = m_last(m);
+			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
+			    md->m_len - nbytes);
+		} else {
+			md = m_getptr(m, 0, &off);
+			pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
+			    off);
+		}
+		msrcs = 0;
+		RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
+		    nims) {
+			MLD_PRINTF(("%s: visit node %s\n", __func__,
+			    ip6_sprintf(&ims->im6s_addr)));
+			now = im6s_get_mode(inm, ims, 1);
+			MLD_PRINTF(("%s: node is %d\n", __func__, now));
+			if ((now != mode) ||
+			    (now == mode &&
+			     (!use_block_allow && mode == MCAST_UNDEFINED))) {
+				MLD_PRINTF(("%s: skip node\n", __func__));
+				continue;
+			}
+			if (is_source_query && ims->im6s_stp == 0) {
+				MLD_PRINTF(("%s: skip unrecorded node\n",
+				    __func__));
+				continue;
+			}
+			MLD_PRINTF(("%s: append node\n", __func__));
+			if (!m_append(m, sizeof(struct in6_addr),
+			    (void *)&ims->im6s_addr)) {
+				if (m != m0)
+					m_freem(m);
+				MLD_PRINTF(("%s: m_append() failed.\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			nbytes += sizeof(struct in6_addr);
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
+		    msrcs));
+		pmr->mr_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(struct in6_addr));
+	}
+
+	if (is_source_query && msrcs == 0) {
+		MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
+		if (m != m0)
+			m_freem(m);
+		return (0);
+	}
+
+	/*
+	 * We are good to go with first packet.
+	 */
+	if (m != m0) {
+		MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
+		m->m_pkthdr.vt_nrecs = 1;
+		m->m_pkthdr.rcvif = ifp;
+		IF_ENQUEUE(ifq, m);
+	} else {
+		m->m_pkthdr.vt_nrecs++;
+	}
+	/*
+	 * No further work needed if no source list in packet(s).
+	 */
+	if (!record_has_sources)
+		return (nbytes);
+
+	/*
+	 * Whilst sources remain to be announced, we need to allocate
+	 * a new packet and fill out as many sources as will fit.
+	 * Always try for a cluster first.
+	 */
+	while (nims != NULL) {
+		if (IF_QFULL(ifq)) {
+			MLD_PRINTF(("%s: outbound queue full\n", __func__));
+			return (-ENOMEM);
+		}
+		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+		if (m == NULL)
+			m = m_gethdr(M_DONTWAIT, MT_DATA);
+		if (m == NULL)
+			return (-ENOMEM);
+		md = m_getptr(m, 0, &off);
+		pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
+		MLD_PRINTF(("%s: allocated next packet\n", __func__));
+
+		if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
+			if (m != m0)
+				m_freem(m);
+			MLD_PRINTF(("%s: m_append() failed.\n", __func__));
+			return (-ENOMEM);
+		}
+		m->m_pkthdr.vt_nrecs = 1;
+		nbytes += sizeof(struct mldv2_record);
+
+		m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
+		    sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
+
+		msrcs = 0;
+		RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
+			MLD_PRINTF(("%s: visit node %s\n",
+			    __func__, ip6_sprintf(&ims->im6s_addr)));
+			now = im6s_get_mode(inm, ims, 1);
+			if ((now != mode) ||
+			    (now == mode &&
+			     (!use_block_allow && mode == MCAST_UNDEFINED))) {
+				MLD_PRINTF(("%s: skip node\n", __func__));
+				continue;
+			}
+			if (is_source_query && ims->im6s_stp == 0) {
+				MLD_PRINTF(("%s: skip unrecorded node\n",
+				    __func__));
+				continue;
+			}
+			MLD_PRINTF(("%s: append node\n", __func__));
+			if (!m_append(m, sizeof(struct in6_addr),
+			    (void *)&ims->im6s_addr)) {
+				if (m != m0)
+					m_freem(m);
+				MLD_PRINTF(("%s: m_append() failed.\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			++msrcs;
+			if (msrcs == m0srcs)
+				break;
+		}
+		pmr->mr_numsrc = htons(msrcs);
+		nbytes += (msrcs * sizeof(struct in6_addr));
+
+		MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
+		m->m_pkthdr.rcvif = ifp;
+		IF_ENQUEUE(ifq, m);
+	}
+
+	return (nbytes);
+}
+
+/*
+ * Type used to mark record pass completion.
+ * We exploit the fact we can cast to this easily from the
+ * current filter modes on each ip_msource node.
+ */
+typedef enum {
+	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
+	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
+	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
+	REC_FULL = REC_ALLOW | REC_BLOCK
+} rectype_t;
+
+/*
+ * Enqueue an MLDv2 filter list change to the given output queue.
+ *
+ * Source list filter state is held in an RB-tree. When the filter list
+ * for a group is changed without changing its mode, we need to compute
+ * the deltas between T0 and T1 for each source in the filter set,
+ * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
+ *
+ * As we may potentially queue two record types, and the entire R-B tree
+ * needs to be walked at once, we break this out into its own function
+ * so we can generate a tightly packed queue of packets.
+ *
+ * XXX This could be written to only use one tree walk, although that makes
+ * serializing into the mbuf chains a bit harder. For now we do two walks
+ * which makes things easier on us, and it may or may not be harder on
+ * the L2 cache.
+ *
+ * If successful the size of all data appended to the queue is returned,
+ * otherwise an error code less than zero is returned, or zero if
+ * no record(s) were appended.
+ */
+static int
+mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
+{
+	static const int MINRECLEN =
+	    sizeof(struct mldv2_record) + sizeof(struct in6_addr);
+	struct ifnet		*ifp;
+	struct mldv2_record	 mr;
+	struct mldv2_record	*pmr;
+	struct ip6_msource	*ims, *nims;
+	struct mbuf		*m, *m0, *md;
+	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
+	int			 nallow, nblock;
+	uint8_t			 mode, now, then;
+	rectype_t		 crt, drt, nrt;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	if (inm->in6m_nsrc == 0 ||
+	    (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
+		return (0);
+
+	ifp = inm->in6m_ifp;			/* interface */
+	mode = inm->in6m_st[1].iss_fmode;	/* filter mode at t1 */
+	crt = REC_NONE;	/* current group record type */
+	drt = REC_NONE;	/* mask of completed group record types */
+	nrt = REC_NONE;	/* record type for current node */
+	m0srcs = 0;	/* # source which will fit in current mbuf chain */
+	npbytes = 0;	/* # of bytes appended this packet */
+	nbytes = 0;	/* # of bytes appended to group's state-change queue */
+	rsrcs = 0;	/* # sources encoded in current record */
+	schanged = 0;	/* # nodes encoded in overall filter change */
+	nallow = 0;	/* # of source entries in ALLOW_NEW */
+	nblock = 0;	/* # of source entries in BLOCK_OLD */
+	nims = NULL;	/* next tree node pointer */
+
+	/*
+	 * For each possible filter record mode.
+	 * The first kind of source we encounter tells us which
+	 * is the first kind of record we start appending.
+	 * If a node transitioned to UNDEFINED at t1, its mode is treated
+	 * as the inverse of the group's filter mode.
+	 */
+	while (drt != REC_FULL) {
+		do {
+			m0 = ifq->ifq_tail;
+			if (m0 != NULL &&
+			    (m0->m_pkthdr.vt_nrecs + 1 <=
+			     MLD_V2_REPORT_MAXRECS) &&
+			    (m0->m_pkthdr.len + MINRECLEN) <
+			     (ifp->if_mtu - MLD_MTUSPACE)) {
+				m = m0;
+				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
+					    sizeof(struct mldv2_record)) /
+					    sizeof(struct in6_addr);
+				MLD_PRINTF(("%s: use previous packet\n",
+				    __func__));
+			} else {
+				m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+				if (m == NULL)
+					m = m_gethdr(M_DONTWAIT, MT_DATA);
+				if (m == NULL) {
+					MLD_PRINTF(("%s: m_get*() failed\n",
+					    __func__));
+					return (-ENOMEM);
+				}
+				m->m_pkthdr.vt_nrecs = 0;
+				m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
+				    sizeof(struct mldv2_record)) /
+				    sizeof(struct in6_addr);
+				npbytes = 0;
+				MLD_PRINTF(("%s: allocated new packet\n",
+				    __func__));
+			}
+			/*
+			 * Append the MLD group record header to the
+			 * current packet's data area.
+			 * Recalculate pointer to free space for next
+			 * group record, in case m_append() allocated
+			 * a new mbuf or cluster.
+			 */
+			memset(&mr, 0, sizeof(mr));
+			mr.mr_addr = inm->in6m_addr;
+			in6_clearscope(&mr.mr_addr);
+			if (!m_append(m, sizeof(mr), (void *)&mr)) {
+				if (m != m0)
+					m_freem(m);
+				MLD_PRINTF(("%s: m_append() failed\n",
+				    __func__));
+				return (-ENOMEM);
+			}
+			npbytes += sizeof(struct mldv2_record);
+			if (m != m0) {
+				/* new packet; offset in chain */
+				md = m_getptr(m, npbytes -
+				    sizeof(struct mldv2_record), &off);
+				pmr = (struct mldv2_record *)(mtod(md,
+				    uint8_t *) + off);
+			} else {
+				/* current packet; offset from last append */
+				md = m_last(m);
+				pmr = (struct mldv2_record *)(mtod(md,
+				    uint8_t *) + md->m_len -
+				    sizeof(struct mldv2_record));
+			}
+			/*
+			 * Begin walking the tree for this record type
+			 * pass, or continue from where we left off
+			 * previously if we had to allocate a new packet.
+			 * Only report deltas in-mode at t1.
+			 * We need not report included sources as allowed
+			 * if we are in inclusive mode on the group,
+			 * however the converse is not true.
+			 */
+			rsrcs = 0;
+			if (nims == NULL) {
+				nims = RB_MIN(ip6_msource_tree,
+				    &inm->in6m_srcs);
+			}
+			RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
+				MLD_PRINTF(("%s: visit node %s\n", __func__,
+				    ip6_sprintf(&ims->im6s_addr)));
+				now = im6s_get_mode(inm, ims, 1);
+				then = im6s_get_mode(inm, ims, 0);
+				MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
+				    __func__, then, now));
+				if (now == then) {
+					MLD_PRINTF(("%s: skip unchanged\n",
+					    __func__));
+					continue;
+				}
+				if (mode == MCAST_EXCLUDE &&
+				    now == MCAST_INCLUDE) {
+					MLD_PRINTF(("%s: skip IN src on EX "
+					    "group\n", __func__));
+					continue;
+				}
+				nrt = (rectype_t)now;
+				if (nrt == REC_NONE)
+					nrt = (rectype_t)(~mode & REC_FULL);
+				if (schanged++ == 0) {
+					crt = nrt;
+				} else if (crt != nrt)
+					continue;
+				if (!m_append(m, sizeof(struct in6_addr),
+				    (void *)&ims->im6s_addr)) {
+					if (m != m0)
+						m_freem(m);
+					MLD_PRINTF(("%s: m_append() failed\n",
+					    __func__));
+					return (-ENOMEM);
+				}
+				nallow += !!(crt == REC_ALLOW);
+				nblock += !!(crt == REC_BLOCK);
+				if (++rsrcs == m0srcs)
+					break;
+			}
+			/*
+			 * If we did not append any tree nodes on this
+			 * pass, back out of allocations.
+			 */
+			if (rsrcs == 0) {
+				npbytes -= sizeof(struct mldv2_record);
+				if (m != m0) {
+					MLD_PRINTF(("%s: m_free(m)\n",
+					    __func__));
+					m_freem(m);
+				} else {
+					MLD_PRINTF(("%s: m_adj(m, -mr)\n",
+					    __func__));
+					m_adj(m, -((int)sizeof(
+					    struct mldv2_record)));
+				}
+				continue;
+			}
+			npbytes += (rsrcs * sizeof(struct in6_addr));
+			if (crt == REC_ALLOW)
+				pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
+			else if (crt == REC_BLOCK)
+				pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
+			pmr->mr_numsrc = htons(rsrcs);
+			/*
+			 * Count the new group record, and enqueue this
+			 * packet if it wasn't already queued.
+			 */
+			m->m_pkthdr.vt_nrecs++;
+			m->m_pkthdr.rcvif = ifp;
+			if (m != m0)
+				IF_ENQUEUE(ifq, m);
+			nbytes += npbytes;
+		} while (nims != NULL);
+		drt |= crt;
+		crt = (~crt & REC_FULL);
+	}
+
+	MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
+	    nallow, nblock));
+
+	return (nbytes);
+}
+
+static int
+mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
+{
+	struct ifqueue	*gq;
+	struct mbuf	*m;		/* pending state-change */
+	struct mbuf	*m0;		/* copy of pending state-change */
+	struct mbuf	*mt;		/* last state-change in packet */
+	struct mbuf	*n;
+	int		 docopy, domerge;
+	u_int		 recslen;
+
+	IN6M_LOCK_ASSERT_HELD(inm);
+
+	docopy = 0;
+	domerge = 0;
+	recslen = 0;
+
+	/*
+	 * If there are further pending retransmissions, make a writable
+	 * copy of each queued state-change message before merging.
+	 */
+	if (inm->in6m_scrv > 0)
+		docopy = 1;
+
+	gq = &inm->in6m_scq;
+#ifdef MLD_DEBUG
+	if (gq->ifq_head == NULL) {
+		MLD_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
+		    __func__, inm));
+	}
+#endif
+
+	/*
+	 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
+	 * packet might not always be at the head of the ifqueue.
+	 */
+	m = gq->ifq_head;
+	while (m != NULL) {
+		/*
+		 * Only merge the report into the current packet if
+		 * there is sufficient space to do so; an MLDv2 report
+		 * packet may only contain 65,535 group records.
+		 * Always use a simple mbuf chain concatentation to do this,
+		 * as large state changes for single groups may have
+		 * allocated clusters.
+		 */
+		domerge = 0;
+		mt = ifscq->ifq_tail;
+		if (mt != NULL) {
+			recslen = m_length(m);
+
+			if ((mt->m_pkthdr.vt_nrecs +
+			    m->m_pkthdr.vt_nrecs <=
+			    MLD_V2_REPORT_MAXRECS) &&
+			    (mt->m_pkthdr.len + recslen <=
+			    (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
+				domerge = 1;
+		}
+
+		if (!domerge && IF_QFULL(gq)) {
+			MLD_PRINTF(("%s: outbound queue full, skipping whole "
+			    "packet %p\n", __func__, m));
+			n = m->m_nextpkt;
+			if (!docopy) {
+				IF_REMQUEUE(gq, m);
+				m_freem(m);
+			}
+			m = n;
+			continue;
+		}
+
+		if (!docopy) {
+			MLD_PRINTF(("%s: dequeueing %p\n", __func__, m));
+			n = m->m_nextpkt;
+			IF_REMQUEUE(gq, m);
+			m0 = m;
+			m = n;
+		} else {
+			MLD_PRINTF(("%s: copying %p\n", __func__, m));
+			m0 = m_dup(m, M_NOWAIT);
+			if (m0 == NULL)
+				return (ENOMEM);
+			m0->m_nextpkt = NULL;
+			m = m->m_nextpkt;
+		}
+
+		if (!domerge) {
+			MLD_PRINTF(("%s: queueing %p to ifscq %p)\n",
+			    __func__, m0, ifscq));
+			m0->m_pkthdr.rcvif = inm->in6m_ifp;
+			IF_ENQUEUE(ifscq, m0);
+		} else {
+			struct mbuf *mtl;	/* last mbuf of packet mt */
+
+			MLD_PRINTF(("%s: merging %p with ifscq tail %p)\n",
+			    __func__, m0, mt));
+
+			mtl = m_last(mt);
+			m0->m_flags &= ~M_PKTHDR;
+			mt->m_pkthdr.len += recslen;
+			mt->m_pkthdr.vt_nrecs +=
+			    m0->m_pkthdr.vt_nrecs;
+
+			mtl->m_next = m0;
+		}
+	}
+
+	return (0);
+}
+
+/*
+ * Respond to a pending MLDv2 General Query.
+ */
+static void
+mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
+{
+	struct ifnet		*ifp;
+	struct in6_multi	*inm;
+	struct in6_multistep	step;
+	int			 retval;
+
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	VERIFY(mli->mli_version == MLD_VERSION_2);
+
+	ifp = mli->mli_ifp;
+	MLI_UNLOCK(mli);
+
+	in6_multihead_lock_shared();
+	IN6_FIRST_MULTI(step, inm);
+	while (inm != NULL) {
+		IN6M_LOCK(inm);
+		if (inm->in6m_ifp != ifp)
+			goto next;
+
+		switch (inm->in6m_state) {
+		case MLD_NOT_MEMBER:
+		case MLD_SILENT_MEMBER:
+			break;
+		case MLD_REPORTING_MEMBER:
+		case MLD_IDLE_MEMBER:
+		case MLD_LAZY_MEMBER:
+		case MLD_SLEEPING_MEMBER:
+		case MLD_AWAKENING_MEMBER:
+			inm->in6m_state = MLD_REPORTING_MEMBER;
+			MLI_LOCK(mli);
+			retval = mld_v2_enqueue_group_record(&mli->mli_gq,
+			    inm, 0, 0, 0, 0);
+			MLI_UNLOCK(mli);
+			MLD_PRINTF(("%s: enqueue record = %d\n",
+			    __func__, retval));
+			break;
+		case MLD_G_QUERY_PENDING_MEMBER:
+		case MLD_SG_QUERY_PENDING_MEMBER:
+		case MLD_LEAVING_MEMBER:
+			break;
+		}
+next:
+		IN6M_UNLOCK(inm);
+		IN6_NEXT_MULTI(step, inm);
+	}
+	in6_multihead_lock_done();
+
+	MLI_LOCK(mli);
+	mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
+	MLI_LOCK_ASSERT_HELD(mli);
+
+	/*
+	 * Slew transmission of bursts over 500ms intervals.
+	 */
+	if (mli->mli_gq.ifq_head != NULL) {
+		mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
+		    MLD_RESPONSE_BURST_INTERVAL);
+		interface_timers_running6 = 1;
+	}
+}
+
+/*
+ * Transmit the next pending message in the output queue.
+ *
+ * Must not be called with in6m_lockm or mli_lock held.
+ */
+static void
+mld_dispatch_packet(struct mbuf *m)
+{
+	struct ip6_moptions	*im6o;
+	struct ifnet		*ifp;
+	struct ifnet		*oifp = NULL;
+	struct mbuf		*m0;
+	struct mbuf		*md;
+	struct ip6_hdr		*ip6;
+	struct mld_hdr		*mld;
+	int			 error;
+	int			 off;
+	int			 type;
+
+	MLD_PRINTF(("%s: transmit %p\n", __func__, m));
+
+	/*
+	 * Check if the ifnet is still attached.
+	 */
+	ifp = m->m_pkthdr.rcvif;
+	if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
+		MLD_PRINTF(("%s: dropped %p as ifindex %u went away.\n",
+		    __func__, m, (u_int)if_index));
+		m_freem(m);
+		ip6stat.ip6s_noroute++;
+		return;
+	}
+
+	im6o = ip6_allocmoptions(M_WAITOK);
+	if (im6o == NULL) {
+		m_freem(m);
+		return;
+	}
+
+	im6o->im6o_multicast_hlim  = 1;
+#if MROUTING
+	im6o->im6o_multicast_loop = (ip6_mrouter != NULL);
+#else
+	im6o->im6o_multicast_loop = 0;
+#endif
+	im6o->im6o_multicast_ifp = ifp;
+
+	if (m->m_flags & M_MLDV1) {
+		m0 = m;
+	} else {
+		m0 = mld_v2_encap_report(ifp, m);
+		if (m0 == NULL) {
+			MLD_PRINTF(("%s: dropped %p\n", __func__, m));
+			/*
+			 * mld_v2_encap_report() has already freed our mbuf.
+			 */
+			IM6O_REMREF(im6o);
+			ip6stat.ip6s_odropped++;
+			return;
+		}
+	}
+
+	m->m_flags &= ~(M_PROTOFLAGS);
+	m0->m_pkthdr.rcvif = lo_ifp;
+
+	ip6 = mtod(m0, struct ip6_hdr *);
+#if 0
+	(void) in6_setscope(&ip6->ip6_dst, ifp, NULL);	/* XXX LOR */
+#else
+	/*
+	 * XXX XXX Break some KPI rules to prevent an LOR which would
+	 * occur if we called in6_setscope() at transmission.
+	 * See comments at top of file.
+	 */
+	MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
+#endif
+
+	/*
+	 * Retrieve the ICMPv6 type before handoff to ip6_output(),
+	 * so we can bump the stats.
+	 */
+	md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
+	mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
+	type = mld->mld_type;
+
+	error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
+	    &oifp, NULL);
+
+	IM6O_REMREF(im6o);
+
+	if (error) {
+		MLD_PRINTF(("%s: ip6_output(%p) = %d\n", __func__, m0, error));
+		if (oifp != NULL)
+			ifnet_release(oifp);
+		return;
+	}
+
+	icmp6stat.icp6s_outhist[type]++;
+	if (oifp != NULL) {
+		icmp6_ifstat_inc(oifp, ifs6_out_msg);
+		switch (type) {
+		case MLD_LISTENER_REPORT:
+		case MLDV2_LISTENER_REPORT:
+			icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
+			break;
+		case MLD_LISTENER_DONE:
+			icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
+			break;
+		}
+		ifnet_release(oifp);
+	}
+}
+
+/*
+ * Encapsulate an MLDv2 report.
+ *
+ * KAME IPv6 requires that hop-by-hop options be passed separately,
+ * and that the IPv6 header be prepended in a separate mbuf.
+ *
+ * Returns a pointer to the new mbuf chain head, or NULL if the
+ * allocation failed.
+ */
+static struct mbuf *
+mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
+{
+	struct mbuf		*mh;
+	struct mldv2_report	*mld;
+	struct ip6_hdr		*ip6;
+	struct in6_ifaddr	*ia;
+	int			 mldreclen;
+
+	VERIFY(m->m_flags & M_PKTHDR);
+
+	/*
+	 * RFC3590: OK to send as :: or tentative during DAD.
+	 */
+	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
+	if (ia == NULL)
+		MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
+
+	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
+	if (mh == NULL) {
+		if (ia != NULL)
+			IFA_REMREF(&ia->ia_ifa);
+		m_freem(m);
+		return (NULL);
+	}
+	MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
+
+	mldreclen = m_length(m);
+	MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
+
+	mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
+	mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
+	    sizeof(struct mldv2_report) + mldreclen;
+
+	ip6 = mtod(mh, struct ip6_hdr *);
+	ip6->ip6_flow = 0;
+	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+	ip6->ip6_vfc |= IPV6_VERSION;
+	ip6->ip6_nxt = IPPROTO_ICMPV6;
+	if (ia != NULL)
+		IFA_LOCK(&ia->ia_ifa);
+	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
+	if (ia != NULL) {
+		IFA_UNLOCK(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
+		ia = NULL;
+	}
+	ip6->ip6_dst = in6addr_linklocal_allv2routers;
+	/* scope ID will be set in netisr */
+
+	mld = (struct mldv2_report *)(ip6 + 1);
+	mld->mld_type = MLDV2_LISTENER_REPORT;
+	mld->mld_code = 0;
+	mld->mld_cksum = 0;
+	mld->mld_v2_reserved = 0;
+	mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
+	m->m_pkthdr.vt_nrecs = 0;
+	m->m_flags &= ~M_PKTHDR;
+
+	mh->m_next = m;
+	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
+	    sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
+	return (mh);
+}
+
+#ifdef MLD_DEBUG
+static const char *
+mld_rec_type_to_str(const int type)
+{
+	switch (type) {
+		case MLD_CHANGE_TO_EXCLUDE_MODE:
+			return "TO_EX";
+			break;
+		case MLD_CHANGE_TO_INCLUDE_MODE:
+			return "TO_IN";
+			break;
+		case MLD_MODE_IS_EXCLUDE:
+			return "MODE_EX";
+			break;
+		case MLD_MODE_IS_INCLUDE:
+			return "MODE_IN";
+			break;
+		case MLD_ALLOW_NEW_SOURCES:
+			return "ALLOW_NEW";
+			break;
+		case MLD_BLOCK_OLD_SOURCES:
+			return "BLOCK_OLD";
+			break;
+		default:
+			break;
+	}
+	return "unknown";
+}
+#endif
+
+void
+mld_init(void)
+{
+
+	MLD_PRINTF(("%s: initializing\n", __func__));
+
+        /* Setup lock group and attribute for mld6_mtx */
+        mld_mtx_grp_attr = lck_grp_attr_alloc_init();
+        mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
+        mld_mtx_attr = lck_attr_alloc_init();
+        lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
+
+	ip6_initpktopts(&mld_po);
+	mld_po.ip6po_hlim = 1;
+	mld_po.ip6po_hbh = &mld_ra.hbh;
+	mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
+	mld_po.ip6po_flags = IP6PO_DONTFRAG;
+	LIST_INIT(&mli_head);
+
+	mli_size = sizeof (struct mld_ifinfo);
+	mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
+	    0, MLI_ZONE_NAME);
+	if (mli_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(mli_zone, Z_EXPAND, TRUE);
+	zone_change(mli_zone, Z_CALLERACCT, FALSE);
+}
diff --git a/bsd/netinet6/mld6.h b/bsd/netinet6/mld6.h
new file mode 100644
index 000000000..ceb41365c
--- /dev/null
+++ b/bsd/netinet6/mld6.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2009 Bruce Simpson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_MLD6_H_
+#define _NETINET6_MLD6_H_
+
+/*
+ * Multicast Listener Discovery (MLD) definitions.
+ */
+
+/* Minimum length of any MLD protocol message. */
+#define MLD_MINLEN	sizeof(struct icmp6_hdr)
+
+/*
+ * MLD v2 query format.
+ * See <netinet/icmp6.h> for struct mld_hdr
+ * (MLDv1 query and host report format).
+ */
+struct mldv2_query {
+	struct icmp6_hdr	mld_icmp6_hdr;	/* ICMPv6 header */
+	struct in6_addr		mld_addr;	/* address being queried */
+	uint8_t		mld_misc;	/* reserved/suppress/robustness   */
+	uint8_t		mld_qqi;	/* querier's query interval       */
+	uint16_t	mld_numsrc;	/* number of sources              */
+	/* followed by 1..numsrc source addresses */
+} __attribute__((__packed__));
+#define MLD_V2_QUERY_MINLEN		sizeof(struct mldv2_query)
+#define MLD_MRC_EXP(x)			((ntohs((x)) >> 12) & 0x0007)
+#define MLD_MRC_MANT(x)			(ntohs((x)) & 0x0fff)
+#define MLD_QQIC_EXP(x)			(((x) >> 4) & 0x07)
+#define MLD_QQIC_MANT(x)		((x) & 0x0f)
+#define MLD_QRESV(x)			(((x) >> 4) & 0x0f)
+#define MLD_SFLAG(x)			(((x) >> 3) & 0x01)
+#define MLD_QRV(x)			((x) & 0x07)
+
+/*
+ * MLDv2 host membership report header.
+ * mld_type: MLDV2_LISTENER_REPORT
+ */
+struct mldv2_report {
+	struct icmp6_hdr	mld_icmp6_hdr;
+	/* followed by 1..numgrps records */
+} __attribute__((__packed__));
+/* overlaid on struct icmp6_hdr. */
+#define mld_numrecs	mld_icmp6_hdr.icmp6_data16[1]
+
+struct mldv2_record {
+	uint8_t			mr_type;	/* record type */
+	uint8_t			mr_datalen;	/* length of auxiliary data */
+	uint16_t		mr_numsrc;	/* number of sources */
+	struct in6_addr		mr_addr;	/* address being reported */
+	/* followed by 1..numsrc source addresses */
+} __attribute__((__packed__));
+#define MLD_V2_REPORT_MAXRECS		65535
+
+/*
+ * MLDv2 report modes.
+ */
+#define MLD_DO_NOTHING			0	/* don't send a record */
+#define MLD_MODE_IS_INCLUDE		1	/* MODE_IN */
+#define MLD_MODE_IS_EXCLUDE		2	/* MODE_EX */
+#define MLD_CHANGE_TO_INCLUDE_MODE	3	/* TO_IN */
+#define MLD_CHANGE_TO_EXCLUDE_MODE	4	/* TO_EX */
+#define MLD_ALLOW_NEW_SOURCES		5	/* ALLOW_NEW */
+#define MLD_BLOCK_OLD_SOURCES		6	/* BLOCK_OLD */
+
+/*
+ * MLDv2 query types.
+ */
+#define MLD_V2_GENERAL_QUERY		1
+#define MLD_V2_GROUP_QUERY		2
+#define MLD_V2_GROUP_SOURCE_QUERY	3
+
+/*
+ * Maximum report interval for MLDv1 host membership reports.
+ */
+#define MLD_V1_MAX_RI			10
+
+/*
+ * MLD_TIMER_SCALE denotes that the MLD code field specifies
+ * time in milliseconds.
+ */
+#define MLD_TIMER_SCALE			1000
+
+#endif /* _NETINET6_MLD6_H_ */
diff --git a/bsd/netinet6/mld6_var.h b/bsd/netinet6/mld6_var.h
index bbeda1ff9..7652cdca9 100644
--- a/bsd/netinet6/mld6_var.h
+++ b/bsd/netinet6/mld6_var.h
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*	$FreeBSD: src/sys/netinet6/mld6_var.h,v 1.1.2.1 2000/07/15 07:14:36 kris Exp $	*/
 /*	$KAME: mld6_var.h,v 1.4 2000/03/25 07:23:54 sumikawa Exp $	*/
 
@@ -34,21 +62,186 @@
 #define _NETINET6_MLD6_VAR_H_
 #include <sys/appleapiopts.h>
 
-#ifdef KERNEL_PRIVATE
+/*
+ * Multicast Listener Discovery (MLD)
+ * implementation-specific definitions.
+ */
+
+#ifdef PRIVATE
+/*
+ * Per-link MLD state.
+ */
+#ifndef XNU_KERNEL_PRIVATE
+struct mld_ifinfo {
+#else
+struct mld_ifinfo_u {
+#endif /* XNU_KERNEL_PRIVATE */
+	uint32_t mli_ifindex;	/* interface this instance belongs to */
+	uint32_t mli_version;	/* MLDv1 Host Compatibility Mode */
+	uint32_t mli_v1_timer;	/* MLDv1 Querier Present timer (s) */
+	uint32_t mli_v2_timer;	/* MLDv2 General Query (interface) timer (s)*/
+	uint32_t mli_flags;	/* MLD per-interface flags */
+	uint32_t mli_rv;	/* MLDv2 Robustness Variable */
+	uint32_t mli_qi;	/* MLDv2 Query Interval (s) */
+	uint32_t mli_qri;	/* MLDv2 Query Response Interval (s) */
+	uint32_t mli_uri;	/* MLDv2 Unsolicited Report Interval (s) */
+	uint32_t _pad;
+};
+
+#define MLIF_SILENT	0x00000001	/* Do not use MLD on this ifp */
+#define MLIF_USEALLOW	0x00000002	/* Use ALLOW/BLOCK for joins/leaves */
+
+/*
+ * MLD version tag.
+ */
+#define MLD_VERSION_NONE		0 /* Invalid */
+#define MLD_VERSION_1			1
+#define MLD_VERSION_2			2 /* Default */
+#endif /* PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+#include <sys/syslog.h>
+
+#define MLD_DEBUG 1
+#ifdef MLD_DEBUG
+extern int mld_debug;
+#define MLD_PRINTF(x)	do { if (mld_debug) printf x; } while (0)
+#else
+#define	MLD_PRINTF(x)
+#endif
+
+#define MLD_RANDOM_DELAY(X)		(random() % (X) + 1)
+#define MLD_MAX_STATE_CHANGES		24 /* Max pending changes per group */
+
+/*
+ * MLD per-group states.
+ */
+#define MLD_NOT_MEMBER			0 /* Can garbage collect group */
+#define MLD_SILENT_MEMBER		1 /* Do not perform MLD for group */
+#define MLD_REPORTING_MEMBER		2 /* MLDv1 we are reporter */
+#define MLD_IDLE_MEMBER			3 /* MLDv1 we reported last */
+#define MLD_LAZY_MEMBER			4 /* MLDv1 other member reporting */
+#define MLD_SLEEPING_MEMBER		5 /* MLDv1 start query response */
+#define MLD_AWAKENING_MEMBER		6 /* MLDv1 group timer will start */
+#define MLD_G_QUERY_PENDING_MEMBER	7 /* MLDv2 group query pending */
+#define MLD_SG_QUERY_PENDING_MEMBER	8 /* MLDv2 source query pending */
+#define MLD_LEAVING_MEMBER		9 /* MLDv2 dying gasp (pending last */
+					  /* retransmission of INCLUDE {}) */
+/*
+ * MLDv2 protocol control variables.
+ */
+#define MLD_RV_INIT		2	/* Robustness Variable */
+#define MLD_RV_MIN		1
+#define MLD_RV_MAX		7
+
+#define MLD_QI_INIT		125	/* Query Interval (s) */
+#define MLD_QI_MIN		1
+#define MLD_QI_MAX		255
 
-#define MLD6_RANDOM_DELAY(X) (random() % (X) + 1)
+#define MLD_QRI_INIT		10	/* Query Response Interval (s) */
+#define MLD_QRI_MIN		1
+#define MLD_QRI_MAX		255
+
+#define MLD_URI_INIT		3	/* Unsolicited Report Interval (s) */
+#define MLD_URI_MIN		0
+#define MLD_URI_MAX		10
+
+#define MLD_MAX_GS_SOURCES		256 /* # of sources in rx GS query */
+#define MLD_MAX_G_GS_PACKETS		8 /* # of packets to answer G/GS */
+#define MLD_MAX_STATE_CHANGE_PACKETS	8 /* # of packets per state change */
+#define MLD_MAX_RESPONSE_PACKETS	16 /* # of packets for general query */
+#define MLD_MAX_RESPONSE_BURST		4 /* # of responses to send at once */
+#define MLD_RESPONSE_BURST_INTERVAL	(PR_SLOWHZ)	/* 500ms */
+
+/*
+ * MLD-specific mbuf flags.
+ */
+#define M_MLDV1		M_PROTO1	/* Packet is MLDv1 */
+#define M_GROUPREC	M_PROTO3	/* mbuf chain is a group record */
+
+/*
+ * Leading space for MLDv2 reports inside MTU.
+ *
+ * NOTE: This differs from IGMPv3 significantly. KAME IPv6 requires
+ * that a fully formed mbuf chain *without* the Router Alert option
+ * is passed to ip6_output(), however we must account for it in the
+ * MTU if we need to split an MLDv2 report into several packets.
+ *
+ * We now put the MLDv2 report header in the initial mbuf containing
+ * the IPv6 header.
+ */
+#define	MLD_MTUSPACE	(sizeof(struct ip6_hdr) + sizeof(struct mld_raopt) + \
+			 sizeof(struct icmp6_hdr))
+
+struct mld_ifinfo {
+	decl_lck_mtx_data(, mli_lock);
+	uint32_t mli_refcnt;	/* reference count */
+	uint32_t mli_debug;	/* see ifa_debug flags */
+	LIST_ENTRY(mld_ifinfo) mli_link;
+	struct ifnet *mli_ifp;	/* interface this instance belongs to */
+	uint32_t mli_version;	/* MLDv1 Host Compatibility Mode */
+	uint32_t mli_v1_timer;	/* MLDv1 Querier Present timer (s) */
+	uint32_t mli_v2_timer;	/* MLDv2 General Query (interface) timer (s)*/
+	uint32_t mli_flags;	/* MLD per-interface flags */
+	uint32_t mli_rv;	/* MLDv2 Robustness Variable */
+	uint32_t mli_qi;	/* MLDv2 Query Interval (s) */
+	uint32_t mli_qri;	/* MLDv2 Query Response Interval (s) */
+	uint32_t mli_uri;	/* MLDv2 Unsolicited Report Interval (s) */
+	SLIST_HEAD(,in6_multi)	mli_relinmhead; /* released groups */
+	struct ifqueue	 mli_gq;	/* queue of general query responses */
+	struct ifqueue   mli_v1q; /* MLDv1 message queue */
+};
+
+#define	MLI_LOCK_ASSERT_HELD(_mli)					\
+	lck_mtx_assert(&(_mli)->mli_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	MLI_LOCK_ASSERT_NOTHELD(_mli)					\
+	lck_mtx_assert(&(_mli)->mli_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	MLI_LOCK(_mli)							\
+	lck_mtx_lock(&(_mli)->mli_lock)
+
+#define	MLI_LOCK_SPIN(_mli)						\
+	lck_mtx_lock_spin(&(_mli)->mli_lock)
+
+#define	MLI_CONVERT_LOCK(_mli) do {					\
+	MLI_LOCK_ASSERT_HELD(_mli);					\
+	lck_mtx_convert_spin(&(_mli)->mli_lock);			\
+} while (0)
+
+#define	MLI_UNLOCK(_mli)						\
+	lck_mtx_unlock(&(_mli)->mli_lock)
+
+#define	MLI_ADDREF(_mli)						\
+	mli_addref(_mli, 0)
+
+#define	MLI_ADDREF_LOCKED(_mli)						\
+	mli_addref(_mli, 1)
+
+#define	MLI_REMREF(_mli)						\
+	mli_remref(_mli)
 
 /*
- * States for MLD stop-listening processing
+ * Per-link MLD context.
  */
-#define MLD6_OTHERLISTENER			0
-#define MLD6_IREPORTEDLAST			1
+#define MLD_IFINFO(ifp)	((ifp)->if_mli)
+
+extern int mld_change_state(struct in6_multi *, const int);
+extern struct mld_ifinfo *mld_domifattach(struct ifnet *, int);
+extern void mld_domifreattach(struct mld_ifinfo *);
+extern void mld_domifdetach(struct ifnet *);
+extern void mld_fasttimo(void);
+extern void mld_ifdetach(struct ifnet *);
+extern int mld_input(struct mbuf *, int, int);
+extern void mld_slowtimo(void);
+extern void mld_init(void);
+extern void mli_addref(struct mld_ifinfo *, int);
+extern void mli_remref(struct mld_ifinfo *);
+
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_inet6_mld);
+#endif
 
-void	mld6_init(void);
-void	mld6_input(struct mbuf *, int);
-void	mld6_start_listening(struct in6_multi *);
-void	mld6_stop_listening(struct in6_multi *);
-void	mld6_fasttimeo(void);
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #endif /* _NETINET6_MLD6_VAR_H_ */
diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c
index d71746042..77ab7630a 100644
--- a/bsd/netinet6/nd6.c
+++ b/bsd/netinet6/nd6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,8 @@
 #include <sys/syslog.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
+#include <sys/mcache.h>
+
 #include <kern/queue.h>
 #include <kern/zalloc.h>
 
@@ -86,19 +88,19 @@
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
-#include <net/if_atm.h>
+#include <net/if_llreach.h>
 #include <net/route.h>
 #include <net/dlil.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_arp.h>
 #include <netinet/if_ether.h>
-#include <netinet/if_fddi.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
-#include <netinet6/in6_prefix.h>
+#include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 
 #include "loop.h"
@@ -125,6 +127,7 @@ int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
 int nd6_maxndopt = 10;	/* max # of ND options allowed */
 
 int nd6_maxnudhint = 0;	/* max # of subsequent upper layer hints */
+int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
 
 #if ND6_DEBUG
 int nd6_debug = 1;
@@ -132,6 +135,8 @@ int nd6_debug = 1;
 int nd6_debug = 0;
 #endif
 
+static int nd6_is_new_addr_neighbor (struct sockaddr_in6 *, struct ifnet *);
+
 /* for debugging? */
 static int nd6_inuse, nd6_allocated;
 
@@ -151,7 +156,8 @@ static int nd6_inuse, nd6_allocated;
  *
  *	- Routing lock (rnh_lock)
  *
- * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags
+ * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags,
+ * ln_llreach, ln_lastused
  *
  *	- Routing entry lock (rt_lock)
  *
@@ -161,7 +167,7 @@ static int nd6_inuse, nd6_allocated;
  * freed until the route itself is freed.
  */
 struct llinfo_nd6 llinfo_nd6 = {
-	&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0
+	&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL, 0
 };
 
 /* Protected by nd_if_rwlock */
@@ -177,16 +183,22 @@ lck_rw_t		*nd_if_rwlock;
 struct nd_drhead nd_defrouter;
 struct nd_prhead nd_prefix = { 0 };
 
+/* Serialization variables for nd6_drain() */
+static boolean_t nd6_drain_busy;
+static void *nd6_drain_waitchan = &nd6_drain_busy;
+static int nd6_drain_waiters = 0;
+
 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 static struct sockaddr_in6 all1_sa;
 
 static int regen_tmpaddr(struct in6_ifaddr *);
-extern lck_mtx_t *ip6_mutex;
 extern lck_mtx_t *nd6_mutex;
 
 static void nd6_slowtimo(void *ignored_arg);
 static struct llinfo_nd6 *nd6_llinfo_alloc(void);
 static void nd6_llinfo_free(void *);
+static void nd6_llinfo_purge(struct rtentry *);
+static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 
 static void nd6_siocgdrlst(void *, int);
 static void nd6_siocgprlst(void *, int);
@@ -249,6 +261,10 @@ nd6_init()
 		panic("%s: failed allocating llinfo_nd6_zone", __func__);
 
 	zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
+	zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE);
+
+	nd6_nbr_init();
+	nd6_rtr_init();
 
 	nd6_init_done = 1;
 
@@ -278,9 +294,48 @@ nd6_llinfo_free(void *arg)
 		ln->ln_hold = NULL;
 	}
 
+	/* Purge any link-layer info caching */
+	VERIFY(ln->ln_rt->rt_llinfo == ln);
+	if (ln->ln_rt->rt_llinfo_purge != NULL)
+		ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
+
 	zfree(llinfo_nd6_zone, ln);
 }
 
+static void
+nd6_llinfo_purge(struct rtentry *rt)
+{
+	struct llinfo_nd6 *ln = rt->rt_llinfo;
+
+	RT_LOCK_ASSERT_HELD(rt);
+	VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
+
+	if (ln->ln_llreach != NULL) {
+		RT_CONVERT_LOCK(rt);
+		ifnet_llreach_free(ln->ln_llreach);
+		ln->ln_llreach = NULL;
+	}
+	ln->ln_lastused = 0;
+}
+
+static void
+nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
+{
+	struct llinfo_nd6 *ln = rt->rt_llinfo;
+	struct if_llreach *lr = ln->ln_llreach;
+
+	if (lr == NULL) {
+		bzero(ri, sizeof (*ri));
+	} else {
+		IFLR_LOCK(lr);
+		/* Export to rt_reach_info structure */
+		ifnet_lr2ri(lr, ri);
+		/* Export ND6 send expiration time */
+		ri->ri_snd_expire = ifnet_llreach_up2cal(lr, ln->ln_lastused);
+		IFLR_UNLOCK(lr);
+	}
+}
+
 int
 nd6_ifattach(struct ifnet *ifp)
 {
@@ -338,7 +393,6 @@ nd6_ifattach(struct ifnet *ifp)
 	ND.basereachable = REACHABLE_TIME;
 	ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
 	ND.retrans = RETRANS_TIMER;
-	ND.receivedra = 0;
 	ND.flags = ND6_IFF_PERFORMNUD;
 	lck_rw_done(nd_if_rwlock);
 	nd6_setmtu(ifp);
@@ -378,8 +432,8 @@ nd6_setmtu(struct ifnet *ifp)
 	 * the sanity checks related to the maximum MTU allowed for the
 	 * interface (a value that is known only by the interface layer),
 	 * by sending the request down via ifnet_ioctl().  The use of the
-	 * ND level maxmtu and linkmtu (the latter obtained via RA) are done
-	 * via IN6_LINKMTU() which does further checking against if_mtu.
+	 * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
+	 * does further checking against if_mtu.
 	 */
 	maxmtu = ndi->maxmtu = ifp->if_mtu;
 
@@ -394,6 +448,7 @@ nd6_setmtu(struct ifnet *ifp)
 		    "new link MTU on %s%d (%u) is too small for IPv6\n",
 		    ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu);
 	}
+	ndi->linkmtu = ifp->if_mtu;
 	lck_rw_done(nd_if_rwlock);
 
 	/* also adjust in6_maxmtu if necessary. */
@@ -480,16 +535,16 @@ nd6_options(
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
 
-	if (!ndopts)
-		panic("ndopts == NULL in nd6_options\n");
-	if (!ndopts->nd_opts_last)
-		panic("uninitialized ndopts in nd6_options\n");
-	if (!ndopts->nd_opts_search)
+	if (ndopts == NULL)
+		panic("ndopts == NULL in nd6_options");
+	if (ndopts->nd_opts_last == NULL)
+		panic("uninitialized ndopts in nd6_options");
+	if (ndopts->nd_opts_search == NULL)
 		return 0;
 
 	while (1) {
 		nd_opt = nd6_option(ndopts);
-		if (!nd_opt && !ndopts->nd_opts_last) {
+		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 			/*
 			 * Message validation requires that all included
 			 * options have a length that is greater than zero.
@@ -499,7 +554,7 @@ nd6_options(
 			return -1;
 		}
 
-		if (!nd_opt)
+		if (nd_opt == NULL)
 			goto skip1;
 
 		switch (nd_opt->nd_opt_type) {
@@ -525,6 +580,9 @@ nd6_options(
 			ndopts->nd_opts_pi_end =
 				(struct nd_opt_prefix_info *)nd_opt;
 			break;
+		case ND_OPT_RDNSS:
+		    	/* ignore */
+		    	break;
 		default:
 			/*
 			 * Unknown options must be silently ignored,
@@ -581,7 +639,6 @@ again:
 		struct rtentry *rt;
 		struct sockaddr_in6 *dst;
 		struct llinfo_nd6 *next;
-		struct nd_ifinfo ndi;
 
 		/* ln_next/prev/rt is protected by rnh_lock */
 		next = ln->ln_next;
@@ -634,7 +691,6 @@ again:
 			ln = next;
 			continue;
 		}
-		ndi = nd_ifinfo[ifp->if_index];
 		lck_rw_done(nd_if_rwlock);
 
 		RT_LOCK_ASSERT_HELD(rt);
@@ -643,13 +699,15 @@ again:
 		case ND6_LLINFO_INCOMPLETE:
 			if (ln->ln_asked < nd6_mmaxtries) {
 				ln->ln_asked++;
+				lck_rw_lock_shared(nd_if_rwlock);
 				ln->ln_expire = timenow.tv_sec +
-				    ndi.retrans / 1000;
+				    nd_ifinfo[ifp->if_index].retrans / 1000;
+				lck_rw_done(nd_if_rwlock);
 				RT_ADDREF_LOCKED(rt);
 				RT_UNLOCK(rt);
 				lck_mtx_unlock(rnh_lock);
 				nd6_ns_output(ifp, NULL, &dst->sin6_addr,
-					ln, 0, 0);
+					ln, 0);
 				RT_REMREF(rt);
 			} else {
 				struct mbuf *m = ln->ln_hold;
@@ -701,22 +759,26 @@ again:
 			break;
 
 		case ND6_LLINFO_DELAY:
-			if ((ndi.flags & ND6_IFF_PERFORMNUD) != 0) {
+			lck_rw_lock_shared(nd_if_rwlock);
+			if ((nd_ifinfo[ifp->if_index].flags &
+			    ND6_IFF_PERFORMNUD) != 0) {
 				/* We need NUD */
 				ln->ln_asked = 1;
 				ln->ln_state = ND6_LLINFO_PROBE;
 				ln->ln_expire = timenow.tv_sec +
-					ndi.retrans / 1000;
+				    nd_ifinfo[ifp->if_index].retrans / 1000;
+				lck_rw_done(nd_if_rwlock);
 				RT_ADDREF_LOCKED(rt);
 				RT_UNLOCK(rt);
 				lck_mtx_unlock(rnh_lock);
 				nd6_ns_output(ifp, &dst->sin6_addr,
-				    &dst->sin6_addr, ln, 0, 0);
+				    &dst->sin6_addr, ln, 0);
 				lck_mtx_assert(rnh_lock,
 				    LCK_MTX_ASSERT_NOTOWNED);
 				RT_REMREF(rt);
 				goto again;
 			}
+			lck_rw_done(nd_if_rwlock);
 			ln->ln_state = ND6_LLINFO_STALE; /* XXX */
 			ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
 			    nd6_gctimer);
@@ -726,13 +788,15 @@ again:
 		case ND6_LLINFO_PROBE:
 			if (ln->ln_asked < nd6_umaxtries) {
 				ln->ln_asked++;
+				lck_rw_lock_shared(nd_if_rwlock);
 				ln->ln_expire = timenow.tv_sec +
-				    ndi.retrans / 1000;
+				    nd_ifinfo[ifp->if_index].retrans / 1000;
+				lck_rw_done(nd_if_rwlock);
 				RT_ADDREF_LOCKED(rt);
 				RT_UNLOCK(rt);
 				lck_mtx_unlock(rnh_lock);
 				nd6_ns_output(ifp, &dst->sin6_addr,
-				    &dst->sin6_addr, ln, 0, 0);
+				    &dst->sin6_addr, ln, 0);
 				RT_REMREF(rt);
 			} else {
 				RT_UNLOCK(rt);
@@ -771,12 +835,13 @@ again:
 		if (dr->expire && dr->expire < timenow.tv_sec) {
 			struct nd_defrouter *t;
 			t = TAILQ_NEXT(dr, dr_entry);
-			defrtrlist_del(dr, 1);
+			defrtrlist_del(dr);
 			dr = t;
 		} else {
 			dr = TAILQ_NEXT(dr, dr_entry);
 		}
 	}
+	lck_mtx_unlock(nd6_mutex);
 
 	/*
 	 * expire interface addresses.
@@ -784,22 +849,21 @@ again:
 	 * However, from a stricter speci-confrmance standpoint, we should
 	 * rather separate address lifetimes and prefix lifetimes.
 	 */
-  addrloop:
+addrloop:
+	lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
 	for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
 		nia6 = ia6->ia_next;
+		IFA_LOCK(&ia6->ia_ifa);
+		/*
+		 * Extra reference for ourselves; it's no-op if
+		 * we don't have to regenerate temporary address,
+		 * otherwise it protects the address from going
+		 * away since we drop in6_ifaddr_rwlock below.
+		 */
+		IFA_ADDREF_LOCKED(&ia6->ia_ifa);
 		/* check address lifetime */
 		lt6 = &ia6->ia6_lifetime;
 		if (IFA6_IS_INVALID(ia6)) {
-			int regen = 0;
-
-			/*
-			 * Extra reference for ourselves; it's no-op if
-			 * we don't have to regenerate temporary address,
-			 * otherwise it protects the address from going
-			 * away since we drop nd6_mutex below.
-			 */
-			ifaref(&ia6->ia_ifa);
-
 			/*
 			 * If the expiring address is temporary, try
 			 * regenerating a new one.  This would be useful when
@@ -818,20 +882,27 @@ again:
 				 * hang. This is safe because the goto addrloop 
 				 * leads to a reevaluation of the in6_ifaddrs list
 				 */
-				lck_mtx_unlock(nd6_mutex);
-				if (regen_tmpaddr(ia6) == 0) 
-					regen = 1;
-				lck_mtx_lock(nd6_mutex);
+				IFA_UNLOCK(&ia6->ia_ifa);
+				lck_rw_done(&in6_ifaddr_rwlock);
+				(void) regen_tmpaddr(ia6);
+			} else {
+				IFA_UNLOCK(&ia6->ia_ifa);
+				lck_rw_done(&in6_ifaddr_rwlock);
 			}
 
-			in6_purgeaddr(&ia6->ia_ifa, 1);
+			/*
+			 * Purging the address would have caused
+			 * in6_ifaddr_rwlock to be dropped and reacquired;
+			 * therefore search again from the beginning
+			 * of in6_ifaddrs list.
+			 */
+			in6_purgeaddr(&ia6->ia_ifa);
 
 			/* Release extra reference taken above */
-			ifafree(&ia6->ia_ifa);
-
-			if (regen)
-				goto addrloop; /* XXX: see below */
+			IFA_REMREF(&ia6->ia_ifa);
+			goto addrloop;
 		}
+		IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
 		if (IFA6_IS_DEPRECATED(ia6)) {
 			int oldflags = ia6->ia6_flags;
 
@@ -846,7 +917,8 @@ again:
 			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
 
 				/* see NOTE above */
-				lck_mtx_unlock(nd6_mutex);
+				IFA_UNLOCK(&ia6->ia_ifa);
+				lck_rw_done(&in6_ifaddr_rwlock);
 				if (regen_tmpaddr(ia6) == 0) {
 					/*
 					 * A new temporary address is
@@ -860,10 +932,13 @@ again:
 					 * loop just for safety.  Or does this 
 					 * significantly reduce performance??
 					 */
-					lck_mtx_lock(nd6_mutex);
+					/* Release extra reference */
+					IFA_REMREF(&ia6->ia_ifa);
 					goto addrloop;
 				}
-				lck_mtx_lock(nd6_mutex);
+				lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+			} else {
+				IFA_UNLOCK(&ia6->ia_ifa);
 			}
 		} else {
 			/*
@@ -871,8 +946,26 @@ again:
 			 * preferred.
 			 */
 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
+			IFA_UNLOCK(&ia6->ia_ifa);
 		}
+		lck_rw_assert(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
+		/* Release extra reference taken above */
+		IFA_REMREF(&ia6->ia_ifa);
 	}
+	lck_rw_done(&in6_ifaddr_rwlock);
+
+	lck_mtx_lock(nd6_mutex);
+	/*
+	 * Since we drop the nd6_mutex in prelist_remove, we want to run this
+	 * section single threaded.
+	 */
+	while (nd6_drain_busy) {
+		nd6_drain_waiters++;
+		msleep(nd6_drain_waitchan, nd6_mutex, (PZERO-1),
+		    __func__, NULL);
+		lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	}
+	nd6_drain_busy = TRUE;
 
 	/* expire prefix list */
 	pr = nd_prefix.lh_first;
@@ -882,19 +975,38 @@ again:
 		 * since pltime is just for autoconf, pltime processing for
 		 * prefix is not necessary.
 		 */
+		NDPR_LOCK(pr);
+		if (pr->ndpr_stateflags & NDPRF_PROCESSED) {
+			NDPR_UNLOCK(pr);
+			pr = pr->ndpr_next;
+			continue;
+		}
 		if (pr->ndpr_expire && pr->ndpr_expire < timenow.tv_sec) {
-			struct nd_prefix *t;
-			t = pr->ndpr_next;
-
 			/*
 			 * address expiration and prefix expiration are
 			 * separate.  NEVER perform in6_purgeaddr here.
 			 */
-
-			prelist_remove(pr, 1);
-			pr = t;
-		} else
+			pr->ndpr_stateflags |= NDPRF_PROCESSED;
+			NDPR_ADDREF_LOCKED(pr);
+			prelist_remove(pr);
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);
+			pr = nd_prefix.lh_first;
+		} else {
+			pr->ndpr_stateflags |= NDPRF_PROCESSED;
+			NDPR_UNLOCK(pr);
 			pr = pr->ndpr_next;
+		}
+	}
+	LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
+		NDPR_LOCK(pr);
+		pr->ndpr_stateflags &= ~NDPRF_PROCESSED;
+		NDPR_UNLOCK(pr);
+	}
+	nd6_drain_busy = FALSE;
+	if (nd6_drain_waiters > 0) {
+		nd6_drain_waiters = 0;
+		wakeup(nd6_drain_waitchan);
 	}
 	lck_mtx_unlock(nd6_mutex);
 }
@@ -921,25 +1033,29 @@ regen_tmpaddr(
 	getmicrotime(&timenow);
 
 	ifp = ia6->ia_ifa.ifa_ifp;
-	ifnet_lock_exclusive(ifp);
+	ifnet_lock_shared(ifp);
 	for (ifa = ifp->if_addrlist.tqh_first; ifa;
 	     ifa = ifa->ifa_list.tqe_next)
 	{
 		struct in6_ifaddr *it6;
 
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-
+		}
 		it6 = (struct in6_ifaddr *)ifa;
 
 		/* ignore no autoconf addresses. */
-		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
+		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+			IFA_UNLOCK(ifa);
 			continue;
-
+		}
 		/* ignore autoconf addresses with different prefixes. */
-		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
+		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) {
+			IFA_UNLOCK(ifa);
 			continue;
-
+		}
 		/*
 		 * Now we are looking at an autoconf address with the same
 		 * prefix as ours.  If the address is temporary and is still
@@ -949,6 +1065,9 @@ regen_tmpaddr(
 		 */
 		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    !IFA6_IS_DEPRECATED(it6)) {
+			IFA_UNLOCK(ifa);
+			if (public_ifa6 != NULL)
+				IFA_REMREF(&public_ifa6->ia_ifa);
 			public_ifa6 = NULL;
 			break;
 		}
@@ -959,8 +1078,15 @@ regen_tmpaddr(
 		 * loop here, because there may be a still-preferred temporary
 		 * address with the prefix.
 		 */
-		if (!IFA6_IS_DEPRECATED(it6))
-		    public_ifa6 = it6;
+		if (!IFA6_IS_DEPRECATED(it6)) {
+			IFA_ADDREF_LOCKED(ifa);	/* for public_ifa6 */
+			IFA_UNLOCK(ifa);
+			if (public_ifa6 != NULL)
+				IFA_REMREF(&public_ifa6->ia_ifa);
+			public_ifa6 = it6;
+		} else {
+			IFA_UNLOCK(ifa);
+		}
 	}
 	ifnet_lock_done(ifp);
 
@@ -970,8 +1096,10 @@ regen_tmpaddr(
 		if ((e = in6_tmpifadd(public_ifa6, 0, M_WAITOK)) != 0) {
 			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
 			    " tmp addr,errno=%d\n", e);
+			IFA_REMREF(&public_ifa6->ia_ifa);
 			return(-1);
 		}
+		IFA_REMREF(&public_ifa6->ia_ifa);
 		return(0);
 	}
 
@@ -987,7 +1115,7 @@ nd6_purge(
 	struct ifnet *ifp)
 {
 	struct llinfo_nd6 *ln;
-	struct nd_defrouter *dr, *ndr, drany;
+	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
 	/* Nuke default router list entries toward ifp */
@@ -999,18 +1127,38 @@ nd6_purge(
 		 */
 		for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
 			ndr = TAILQ_NEXT(dr, dr_entry);
+			if (dr->stateflags & NDDRF_INSTALLED)
+				continue;
 			if (dr->ifp == ifp)
-				defrtrlist_del(dr, 1);
+				defrtrlist_del(dr);
 		}
 		dr = TAILQ_FIRST(&nd_defrouter);
 		if (dr->ifp == ifp)
-			defrtrlist_del(dr, 1);
+			defrtrlist_del(dr);
+	}
+
+	for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = ndr) {
+		ndr = TAILQ_NEXT(dr, dr_entry);
+		if (!(dr->stateflags & NDDRF_INSTALLED))
+			continue;
+
+		if (dr->ifp == ifp)
+			defrtrlist_del(dr);
 	}
 
 	/* Nuke prefix list entries toward ifp */
 	for (pr = nd_prefix.lh_first; pr; pr = npr) {
 		npr = pr->ndpr_next;
+		NDPR_LOCK(pr);
 		if (pr->ndpr_ifp == ifp) {
+			/*
+			 * Because if_detach() does *not* release prefixes
+			 * while purging addresses the reference count will
+			 * still be above zero. We therefore reset it to
+			 * make sure that the prefix really gets purged.
+			 */
+			pr->ndpr_addrcnt = 0;
+
 			/*
 			 * Previously, pr->ndpr_addr is removed as well,
 			 * but I strongly believe we don't have to do it.
@@ -1019,27 +1167,28 @@ nd6_purge(
 			 * by itself.
 			 * (jinmei@kame.net 20010129)
 			 */
-			prelist_remove(pr, 1);
+			NDPR_ADDREF_LOCKED(pr);
+			prelist_remove(pr);
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);
+		} else {
+			NDPR_UNLOCK(pr);
 		}
 	}
+	lck_mtx_unlock(nd6_mutex);
 
 	/* cancel default outgoing interface setting */
 	if (nd6_defifindex == ifp->if_index) {
-		/* Release nd6_mutex as it will be acquired 
-		 * during nd6_setdefaultiface again
-		 */ 
-		lck_mtx_unlock(nd6_mutex);
 		nd6_setdefaultiface(0);
-		lck_mtx_lock(nd6_mutex);
 	}
 
 	if (!ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { 
+		lck_mtx_lock(nd6_mutex);
 		/* refresh default router list */
-		bzero(&drany, sizeof(drany));
-		defrouter_delreq(&drany, 0);
-		defrouter_select();
+		defrouter_reset();
+		defrouter_select(ifp);
+		lck_mtx_unlock(nd6_mutex);
 	}
-	lck_mtx_unlock(nd6_mutex);
 
 	/*
 	 * Nuke neighbor cache entries for the ifp.
@@ -1098,28 +1247,31 @@ nd6_lookup(
 {
 	struct rtentry *rt;
 	struct sockaddr_in6 sin6;
+	unsigned int ifscope;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
-#if SCOPEDROUTING
-	sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
-#endif
-	if (rt_locked)
-		lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 
-	rt = rt_locked ? rtalloc1_locked((struct sockaddr *)&sin6, create, 0) :
-	    rtalloc1((struct sockaddr *)&sin6, create, 0);
+	ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
+	if (rt_locked) {
+		lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
+		rt = rtalloc1_scoped_locked((struct sockaddr *)&sin6,
+		    create, 0, ifscope);
+	} else {
+		rt = rtalloc1_scoped((struct sockaddr *)&sin6,
+		    create, 0, ifscope);
+	}
 
 	if (rt != NULL) {
 		RT_LOCK(rt);
 		if ((rt->rt_flags & RTF_LLINFO) == 0) {
 			/*
-			 * This is the case for the default route.  If we
-			 * want to create a neighbor cache for the address,
-			 * we should free the route for the destination and
-			 * allocate an interface route.
+			 * This is the case for the default route.
+			 * If we want to create a neighbor cache for the
+			 * address, we should free the route for the
+			 * destination and allocate an interface route.
 			 */
 			if (create) {
 				RT_UNLOCK(rt);
@@ -1134,6 +1286,7 @@ nd6_lookup(
 	if (rt == NULL) {
 		if (create && ifp) {
 			struct ifaddr *ifa;
+			u_int32_t ifa_flags;
 			int e;
 
 			/*
@@ -1155,11 +1308,14 @@ nd6_lookup(
 			 */
 			if (!rt_locked)
 				lck_mtx_lock(rnh_lock);
-			if ((e = rtrequest_locked(RTM_ADD,
+			IFA_LOCK_SPIN(ifa);
+			ifa_flags = ifa->ifa_flags;
+			IFA_UNLOCK(ifa);
+			if ((e = rtrequest_scoped_locked(RTM_ADD,
 			    (struct sockaddr *)&sin6, ifa->ifa_addr,
 			    (struct sockaddr *)&all1_sa,
-			    (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) &
-			    ~RTF_CLONING, &rt)) != 0) {
+			    (ifa_flags | RTF_HOST | RTF_LLINFO) &
+			    ~RTF_CLONING, &rt, ifscope)) != 0) {
 				if (e != EEXIST)
 					log(LOG_ERR, "%s: failed to add route "
 					    "for a neighbor(%s), errno=%d\n",
@@ -1167,7 +1323,7 @@ nd6_lookup(
 			}
 			if (!rt_locked)
 				lck_mtx_unlock(rnh_lock);
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 			if (rt == NULL)
 				return(NULL);
 
@@ -1191,10 +1347,13 @@ nd6_lookup(
 	 *      it might be the loopback interface if the entry is for our
 	 *      own address on a non-loopback interface. Instead, we should
 	 *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
-	 *      interface.
+	 *	interface.
+	 * Note also that ifa_ifp and ifp may differ when we connect two
+	 * interfaces to a same link, install a link prefix to an interface,
+	 * and try to install a neighbor cache on an interface that does not
+	 * have a route to the prefix.
 	 */
-	if (ifp == NULL || (ifp->if_type == IFT_PPP) ||
-	    (ifp->if_eflags & IFEF_NOAUTOIPV6LL) ||
+	if (ifp == NULL || 
 	    (rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
 	    rt->rt_gateway->sa_family != AF_LINK ||  rt->rt_llinfo == NULL ||
 	    (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
@@ -1215,73 +1374,132 @@ nd6_lookup(
 }
 
 /*
- * Detect if a given IPv6 address identifies a neighbor on a given link.
- * XXX: should take care of the destination of a p2p link?
+ * Test whether a given IPv6 address is a neighbor or not, ignoring
+ * the actual neighbor cache.  The neighbor cache is ignored in order
+ * to not reenter the routing code from within itself.
  */
-int
-nd6_is_addr_neighbor(
+static int
+nd6_is_new_addr_neighbor(
 	struct sockaddr_in6 *addr,
-	struct ifnet *ifp,
-	int    rt_locked)
+	struct ifnet *ifp)
 {
-	struct ifaddr *ifa;
-	struct rtentry *rt;
-	int i;
+	struct nd_prefix *pr;
+	struct ifaddr *dstaddr;
 
-#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
-#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
 	/*
 	 * A link-local address is always a neighbor.
-	 * XXX: we should use the sin6_scope_id field rather than the embedded
-	 * interface index.
+	 * XXX: a link does not necessarily specify a single interface.
 	 */
-	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
-	    ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
-		return(1);
+	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
+		struct sockaddr_in6 sin6_copy;
+		u_int32_t zone;
+
+		/*
+		 * We need sin6_copy since sa6_recoverscope() may modify the
+		 * content (XXX).
+		 */
+		sin6_copy = *addr;
+		if (sa6_recoverscope(&sin6_copy))
+			return (0); /* XXX: should be impossible */
+		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
+			return (0);
+		if (sin6_copy.sin6_scope_id == zone)
+			return (1);
+		else
+			return (0);
+	}
 
 	/*
 	 * If the address matches one of our addresses,
 	 * it should be a neighbor.
+	 * If the address matches one of our on-link prefixes, it should be a
+	 * neighbor.
 	 */
-	ifnet_lock_shared(ifp);
-	for (ifa = ifp->if_addrlist.tqh_first;
-	     ifa;
-	     ifa = ifa->ifa_list.tqe_next)
-	{
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+		NDPR_LOCK(pr);
+		if (pr->ndpr_ifp != ifp) {
+			NDPR_UNLOCK(pr);
+			continue;
+		}
+		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
+			NDPR_UNLOCK(pr);
 			continue;
+		}
+		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
+		    &addr->sin6_addr, &pr->ndpr_mask)) {
+			NDPR_UNLOCK(pr);
+			return (1);
+		}
+		NDPR_UNLOCK(pr);
+	}
 
-		for (i = 0; i < 4; i++) {
-			if ((IFADDR6(ifa).s6_addr32[i] ^
-			     addr->sin6_addr.s6_addr32[i]) &
-				IFMASK6(ifa).s6_addr32[i])
-				continue;
+	/*
+	 * If the address is assigned on the node of the other side of
+	 * a p2p interface, the address should be a neighbor.
+	 */
+	dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
+	if (dstaddr != NULL) {
+		if (dstaddr->ifa_ifp == ifp) {
+			IFA_REMREF(dstaddr);
+			return (1);
 		}
-		ifnet_lock_done(ifp);
-		return(1);
+		IFA_REMREF(dstaddr);
+		dstaddr = NULL;
 	}
-	ifnet_lock_done(ifp);
+
+	/*
+	 * If the default router list is empty, all addresses are regarded
+	 * as on-link, and thus, as a neighbor.
+	 * XXX: we restrict the condition to hosts, because routers usually do
+	 * not have the "default router list".
+	 */
+	if (!ip6_forwarding && TAILQ_FIRST(&nd_defrouter) == NULL &&
+	    nd6_defifindex == ifp->if_index) {
+		return (1);
+	}
+
+	return (0);
+}
+
+
+/*
+ * Detect if a given IPv6 address identifies a neighbor on a given link.
+ * XXX: should take care of the destination of a p2p link?
+ */
+int
+nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, int rt_locked)
+{
+	struct rtentry *rt;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+	lck_mtx_lock(nd6_mutex);
+	if (nd6_is_new_addr_neighbor(addr, ifp)) {
+		lck_mtx_unlock(nd6_mutex);
+		return (1);
+	}
+	lck_mtx_unlock(nd6_mutex);
 
 	/*
 	 * Even if the address matches none of our addresses, it might be
-	 * in the neighbor cache.  Callee returns a locked route upon
-	 * success.
+	 * in the neighbor cache.
 	 */
 	if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
 		RT_LOCK_ASSERT_HELD(rt);
 		RT_REMREF_LOCKED(rt);
 		RT_UNLOCK(rt);
-		return(1);
+		return (1);
 	}
 
-	return(0);
-#undef IFADDR6
-#undef IFMASK6
+	return (0);
 }
 
 /*
  * Free an nd6 llinfo entry.
+ * Since the function would cause significant changes in the kernel, DO NOT
+ * make it global, unless you have a strong reason for the change, and are sure
+ * that the change is safe.
  */
 void
 nd6_free(
@@ -1324,12 +1542,15 @@ nd6_free(
 			 * See a corresponding comment in nd6_na_input().
 			 */
 			RT_UNLOCK(rt);
+			lck_mtx_unlock(nd6_mutex);
 			rt6_flush(&in6, rt->rt_ifp);
+			lck_mtx_lock(nd6_mutex);
 		} else {
 			RT_UNLOCK(rt);
 		}
 
 		if (dr) {
+			NDDR_REMREF(dr);
 			/*
 			 * Unreachablity of a router might affect the default
 			 * router selection and on-link detection of advertised
@@ -1353,21 +1574,12 @@ nd6_free(
 			 * the check now.
 			 */
 			RT_UNLOCK(rt);
-			pfxlist_onlink_check(1);
+			pfxlist_onlink_check();
 
-			if (dr == TAILQ_FIRST(&nd_defrouter)) {
-				/*
-				 * It is used as the current default router,
-				 * so we have to move it to the end of the
-				 * list and choose a new one.
-				 * XXX: it is not very efficient if this is
-				 *      the only router.
-				 */
-				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
-				TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
-
-				defrouter_select();
-			}
+			/*
+			 * refresh default router list
+			 */
+			defrouter_select(rt->rt_ifp);
 		}
 		RT_LOCK_ASSERT_NOTHELD(rt);
 	} else {
@@ -1390,7 +1602,7 @@ nd6_free(
 /*
  * Upper-layer reachability hint for Neighbor Unreachability Detection.
  *
- * XXX cost-effective metods?
+ * XXX cost-effective methods?
  */
 void
 nd6_nud_hint(
@@ -1444,8 +1656,8 @@ nd6_nud_hint(
 	ln->ln_state = ND6_LLINFO_REACHABLE;
 	if (ln->ln_expire) {
 		lck_rw_lock_shared(nd_if_rwlock);
-		ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
-			nd_ifinfo[rt->rt_ifp->if_index].reachable);
+		ln->ln_expire = timenow.tv_sec +
+			nd_ifinfo[rt->rt_ifp->if_index].reachable;
 		lck_rw_done(nd_if_rwlock);
 	}
 done:
@@ -1490,17 +1702,17 @@ nd6_rtrequest(
 		if (!nd6_need_cache(ifp)) {	/* stf case */
 			no_nd_cache = 1;
 		} else {
+			struct sockaddr_in6 sin6;
+
+			rtkey_to_sa6(rt, &sin6);
 			/*
 			 * nd6_is_addr_neighbor() may call nd6_lookup(),
 			 * therefore we drop rt_lock to avoid deadlock
-			 * during the lookup.  Using rt_key(rt) is still
-			 * safe because it won't change while rnh_lock
-			 * is held.
+			 * during the lookup.
 			 */
 			RT_ADDREF_LOCKED(rt);
 			RT_UNLOCK(rt);
-			no_nd_cache = !nd6_is_addr_neighbor(
-			    (struct sockaddr_in6 *)rt_key(rt), ifp, 1);
+			no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1);
 			RT_LOCK(rt);
 			RT_REMREF_LOCKED(rt);
 		}
@@ -1535,12 +1747,13 @@ nd6_rtrequest(
 		 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
 		 *	   rt->rt_flags |= RTF_CLONING;
 		 */
-		if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
+		if ((rt->rt_flags & RTF_CLONING) ||
+		    ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
 			/*
-			 * Case 1: This route should come from
-			 * a route to interface.  RTF_LLINFO flag is set
-			 * for a host route whose destination should be
-			 * treated as on-link.
+			 * Case 1: This route should come from a route to
+			 * interface (RTF_CLONING case) or the route should be
+			 * treated as on-link but is currently not
+			 * (RTF_LLINFO && ln == NULL case).
 			 */
 			if (rt_setgate(rt, rt_key(rt),
 			    (struct sockaddr *)&null_sdl) == 0) {
@@ -1575,15 +1788,6 @@ nd6_rtrequest(
 		 *   (or should we allow proxy ND configuration only for
 		 *   routers?  there's no mention about proxy ND from hosts)
 		 */
-#if 0
-		/* XXX it does not work */
-		if (rt->rt_flags & RTF_ANNOUNCE)
-			nd6_na_output(ifp,
-			      &SIN6(rt_key(rt))->sin6_addr,
-			      &SIN6(rt_key(rt))->sin6_addr,
-			      ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
-			      1, NULL);
-#endif
 		/* FALLTHROUGH */
 	case RTM_RESOLVE:
 		if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
@@ -1593,9 +1797,12 @@ nd6_rtrequest(
 			 */
 			if (gate->sa_family != AF_LINK ||
 			    gate->sa_len < sizeof(null_sdl)) {
-				log(LOG_DEBUG,
-				    "nd6_rtrequest: bad gateway value: %s\n",
-				    if_name(ifp));
+				/* Don't complain in case of RTM_ADD */
+				if (req == RTM_RESOLVE) {
+					log(LOG_DEBUG,
+					    "nd6_rtrequest: bad gateway "
+					        "value: %s\n", if_name(ifp));
+				}
 				break;
 			}
 			SDL(gate)->sdl_type = ifp->if_type;
@@ -1612,6 +1819,8 @@ nd6_rtrequest(
 			log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
 			break;
 		}
+		rt->rt_llinfo_get_ri = nd6_llinfo_get_ri;
+		rt->rt_llinfo_purge = nd6_llinfo_purge;
 		rt->rt_llinfo_free = nd6_llinfo_free;
 
 		nd6_inuse++;
@@ -1688,14 +1897,22 @@ nd6_rtrequest(
 				SDL(gate)->sdl_alen = ifp->if_addrlen;
 			}
 			if (nd6_useloopback) {
-#if IFNET_ROUTE_REFCNT
-				/* Adjust route ref count for the interfaces */
-				if (rt->rt_if_ref_fn != NULL &&
-				    rt->rt_ifp != lo_ifp) {
-					rt->rt_if_ref_fn(lo_ifp, 1);
-					rt->rt_if_ref_fn(rt->rt_ifp, -1);
+				if (rt->rt_ifp != lo_ifp) {
+					/*
+					 * Purge any link-layer info caching.
+					 */
+					if (rt->rt_llinfo_purge != NULL)
+						rt->rt_llinfo_purge(rt);
+
+					/*
+					 * Adjust route ref count for the
+					 * interfaces.
+					 */
+					if (rt->rt_if_ref_fn != NULL) {
+						rt->rt_if_ref_fn(lo_ifp, 1);
+						rt->rt_if_ref_fn(rt->rt_ifp, -1);
+					}
 				}
-#endif /* IFNET_ROUTE_REFCNT */
 				rt->rt_ifp = lo_ifp;	/* XXX */
 				/*
 				 * Make sure rt_ifa be equal to the ifaddr
@@ -1709,7 +1926,7 @@ nd6_rtrequest(
 					rtsetifa(rt, ifa);
 				}
 			}
-			ifafree(ifa);
+			IFA_REMREF(ifa);
 		} else if (rt->rt_flags & RTF_ANNOUNCE) {
 			ln->ln_expire = 0;
 			ln->ln_state = ND6_LLINFO_REACHABLE;
@@ -1718,26 +1935,30 @@ nd6_rtrequest(
 			/* join solicited node multicast for proxy ND */
 			if (ifp->if_flags & IFF_MULTICAST) {
 				struct in6_addr llsol;
+				struct in6_multi *in6m;
 				int error;
 
 				llsol = SIN6(rt_key(rt))->sin6_addr;
-				llsol.s6_addr16[0] = htons(0xff02);
-				llsol.s6_addr16[1] = htons(ifp->if_index);
+				llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 				llsol.s6_addr32[1] = 0;
 				llsol.s6_addr32[2] = htonl(1);
 				llsol.s6_addr8[12] = 0xff;
-
-				if (!in6_addmulti(&llsol, ifp, &error, 0)) {
+				if (in6_setscope(&llsol, ifp, NULL))
+					break;
+				error = in6_mc_join(ifp, &llsol, NULL, &in6m, 0);
+				if (error) {
 					nd6log((LOG_ERR, "%s: failed to join "
 					    "%s (errno=%d)\n", if_name(ifp),
 					    ip6_sprintf(&llsol), error));
+				} else {
+					IN6M_REMREF(in6m);
 				}
 			}
 		}
 		break;
 
 	case RTM_DELETE:
-		if (!ln)
+		if (ln == NULL)
 			break;
 		/* leave from solicited node multicast for proxy ND */
 		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
@@ -1746,17 +1967,19 @@ nd6_rtrequest(
 			struct in6_multi *in6m;
 
 			llsol = SIN6(rt_key(rt))->sin6_addr;
-			llsol.s6_addr16[0] = htons(0xff02);
-			llsol.s6_addr16[1] = htons(ifp->if_index);
+			llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 			llsol.s6_addr32[1] = 0;
 			llsol.s6_addr32[2] = htonl(1);
 			llsol.s6_addr8[12] = 0xff;
-
-			ifnet_lock_shared(ifp);
-			IN6_LOOKUP_MULTI(llsol, ifp, in6m);
-			ifnet_lock_done(ifp);
-			if (in6m)
-				in6_delmulti(in6m, 0);
+			if (in6_setscope(&llsol, ifp, NULL) == 0) {
+				in6_multihead_lock_shared();
+				IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
+				in6_multihead_lock_done();
+				if (in6m != NULL) {
+					in6_mc_leave(in6m, NULL);
+					IN6M_REMREF(in6m);
+				}
+			}
 		}
 		nd6_inuse--;
 		/*
@@ -1767,10 +1990,18 @@ nd6_rtrequest(
 		 */
 		if (ln->ln_flags & ND6_LNF_IN_USE)
 			LN_DEQUEUE(ln);
+
+		/*
+		 * Purge any link-layer info caching.
+		 */
+		if (rt->rt_llinfo_purge != NULL)
+			rt->rt_llinfo_purge(rt);
+
 		rt->rt_flags &= ~RTF_LLINFO;
-		if (ln->ln_hold != NULL)
+		if (ln->ln_hold != NULL) {
 			m_freem(ln->ln_hold);
-		ln->ln_hold = NULL;
+			ln->ln_hold = NULL;
+		}
 	}
 }
 
@@ -1835,7 +2066,6 @@ nd6_siocgprlst(void *data, int data_is_64)
 	struct in6_prlist_64 *prl_64 = (struct in6_prlist_64 *)data;
 	struct in6_prlist_32 *prl_32 = (struct in6_prlist_32 *)data;
 	struct nd_prefix *pr;
-	struct rr_prefix *rpp;
 	int i = 0;
 
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
@@ -1852,8 +2082,9 @@ nd6_siocgprlst(void *data, int data_is_64)
 			struct nd_pfxrouter *pfr;
 			int j;
 
+			NDPR_LOCK(pr);
 			(void) in6_embedscope(&prl_64->prefix[i].prefix,
-			    &pr->ndpr_prefix, NULL, NULL);
+			    &pr->ndpr_prefix, NULL, NULL, NULL);
 			prl_64->prefix[i].raflags = pr->ndpr_raf;
 			prl_64->prefix[i].prefixlen = pr->ndpr_plen;
 			prl_64->prefix[i].vltime = pr->ndpr_vltime;
@@ -1884,27 +2115,12 @@ nd6_siocgprlst(void *data, int data_is_64)
 			}
 			prl_64->prefix[i].advrtrs = j;
 			prl_64->prefix[i].origin = PR_ORIG_RA;
+			NDPR_UNLOCK(pr);
 
 			i++;
 			pr = pr->ndpr_next;
 		}
 
-		for (rpp = LIST_FIRST(&rr_prefix); rpp;
-		     rpp = LIST_NEXT(rpp, rp_entry)) {
-			if (i >= PRLSTSIZ)
-				break;
-			(void) in6_embedscope(&prl_64->prefix[i].prefix,
-			    &pr->ndpr_prefix, NULL, NULL);
-			prl_64->prefix[i].raflags = rpp->rp_raf;
-			prl_64->prefix[i].prefixlen = rpp->rp_plen;
-			prl_64->prefix[i].vltime = rpp->rp_vltime;
-			prl_64->prefix[i].pltime = rpp->rp_pltime;
-			prl_64->prefix[i].if_index = rpp->rp_ifp->if_index;
-			prl_64->prefix[i].expire = rpp->rp_expire;
-			prl_64->prefix[i].advrtrs = 0;
-			prl_64->prefix[i].origin = rpp->rp_origin;
-			i++;
-		}
 		return;
 	}
 	/* For 32-bit process */
@@ -1912,8 +2128,9 @@ nd6_siocgprlst(void *data, int data_is_64)
 		struct nd_pfxrouter *pfr;
 		int j;
 
+		NDPR_LOCK(pr);
 		(void) in6_embedscope(&prl_32->prefix[i].prefix,
-		    &pr->ndpr_prefix, NULL, NULL);
+		    &pr->ndpr_prefix, NULL, NULL, NULL);
 		prl_32->prefix[i].raflags = pr->ndpr_raf;
 		prl_32->prefix[i].prefixlen = pr->ndpr_plen;
 		prl_32->prefix[i].vltime = pr->ndpr_vltime;
@@ -1944,27 +2161,11 @@ nd6_siocgprlst(void *data, int data_is_64)
 		}
 		prl_32->prefix[i].advrtrs = j;
 		prl_32->prefix[i].origin = PR_ORIG_RA;
+		NDPR_UNLOCK(pr);
 
 		i++;
 		pr = pr->ndpr_next;
 	}
-
-	for (rpp = LIST_FIRST(&rr_prefix); rpp;
-	     rpp = LIST_NEXT(rpp, rp_entry)) {
-		if (i >= PRLSTSIZ)
-			break;
-		(void) in6_embedscope(&prl_32->prefix[i].prefix,
-		    &pr->ndpr_prefix, NULL, NULL);
-		prl_32->prefix[i].raflags = rpp->rp_raf;
-		prl_32->prefix[i].prefixlen = rpp->rp_plen;
-		prl_32->prefix[i].vltime = rpp->rp_vltime;
-		prl_32->prefix[i].pltime = rpp->rp_pltime;
-		prl_32->prefix[i].if_index = rpp->rp_ifp->if_index;
-		prl_32->prefix[i].expire = rpp->rp_expire;
-		prl_32->prefix[i].advrtrs = 0;
-		prl_32->prefix[i].origin = rpp->rp_origin;
-		i++;
-	}
 }
 
 int
@@ -1972,7 +2173,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_ondireq *ondi = (struct in6_ondireq *)data;
-	struct nd_defrouter *dr, any;
+	struct nd_defrouter *dr;
 	struct nd_prefix *pr;
 	struct rtentry *rt;
 	int i = ifp->if_index, error = 0;
@@ -2018,7 +2219,6 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		ondi->ndi.flags = nd_ifinfo[i].flags;
 		ondi->ndi.recalctm = nd_ifinfo[i].recalctm;
 		ondi->ndi.chlim = nd_ifinfo[i].chlim;
-		ondi->ndi.receivedra = nd_ifinfo[i].receivedra;
 		lck_rw_done(nd_if_rwlock);
 		break;
 
@@ -2040,10 +2240,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		 * xxx sumikawa: should not delete route if default
 		 * route equals to the top of default router list
 		 */
-		bzero(&any, sizeof(any));
 		lck_mtx_lock(nd6_mutex);
-		defrouter_delreq(&any, 1);
-		defrouter_select();
+		defrouter_reset();
+		defrouter_select(ifp);
 		lck_mtx_unlock(nd6_mutex);
 		/* xxx sumikawa: flush prefix list */
 		break;
@@ -2051,28 +2250,75 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 	case SIOCSPFXFLUSH_IN6: {
 		/* flush all the prefix advertised by routers */
 		struct nd_prefix *next;
-		lck_mtx_lock(nd6_mutex);
 
+		lck_mtx_lock(nd6_mutex);
 		for (pr = nd_prefix.lh_first; pr; pr = next) {
-			struct in6_ifaddr *ia, *ia_next;
+			struct in6_ifaddr *ia;
 
 			next = pr->ndpr_next;
 
-			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
+			NDPR_LOCK(pr);
+			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
+				NDPR_UNLOCK(pr);
 				continue; /* XXX */
-
+			}
+			if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
+				NDPR_UNLOCK(pr);
+				continue;
+			}
 			/* do we really have to remove addresses as well? */
-			for (ia = in6_ifaddrs; ia; ia = ia_next) {
-				/* ia might be removed.  keep the next ptr. */
-				ia_next = ia->ia_next;
+			NDPR_ADDREF_LOCKED(pr);
+			NDPR_UNLOCK(pr);
+			lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+			ia = in6_ifaddrs;
+			while (ia != NULL) {
+				IFA_LOCK(&ia->ia_ifa);
+				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+					IFA_UNLOCK(&ia->ia_ifa);
+					ia = ia->ia_next;
+					continue;
+				}
 
-				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
+				if (ia->ia6_ndpr == pr) {
+					IFA_ADDREF_LOCKED(&ia->ia_ifa);
+					IFA_UNLOCK(&ia->ia_ifa);
+					lck_rw_done(&in6_ifaddr_rwlock);
+					lck_mtx_unlock(nd6_mutex);
+					in6_purgeaddr(&ia->ia_ifa);
+					lck_mtx_lock(nd6_mutex);
+					lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
+					IFA_REMREF(&ia->ia_ifa);
+					/*
+					 * Purging the address caused
+					 * in6_ifaddr_rwlock to be
+					 * dropped and
+					 * reacquired; therefore search again
+					 * from the beginning of in6_ifaddrs.
+					 * The same applies for the prefix list.
+					 */
+					ia = in6_ifaddrs;
+					next = nd_prefix.lh_first;
 					continue;
 
-				if (ia->ia6_ndpr == pr)
-					in6_purgeaddr(&ia->ia_ifa, 1);
+				}
+				IFA_UNLOCK(&ia->ia_ifa);
+				ia = ia->ia_next;
 			}
-			prelist_remove(pr, 1);
+			lck_rw_done(&in6_ifaddr_rwlock);
+			NDPR_LOCK(pr);
+			prelist_remove(pr);
+			NDPR_UNLOCK(pr);
+			/*
+			 * If we were trying to restart this loop
+			 * above by changing the value of 'next', we might
+			 * end up freeing the only element on the list
+			 * when we call NDPR_REMREF().
+			 * When this happens, we also have get out of this
+			 * loop because we have nothing else to do.
+			 */
+			if (pr == next)
+				next = NULL;
+			NDPR_REMREF(pr);
 		}
 		lck_mtx_unlock(nd6_mutex);
 		break;
@@ -2090,9 +2336,12 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 			 */
 			for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
 				next = TAILQ_NEXT(dr, dr_entry);
-				defrtrlist_del(dr, 1);
+				if (ifp == lo_ifp || dr->ifp == ifp)
+					defrtrlist_del(dr);
 			}
-			defrtrlist_del(TAILQ_FIRST(&nd_defrouter), 1);
+			if (ifp == lo_ifp ||
+			    TAILQ_FIRST(&nd_defrouter)->ifp == ifp)
+				defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
 		}
 		lck_mtx_unlock(nd6_mutex);
 		break;
@@ -2183,8 +2432,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
 		struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
 
-		return (nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ?
-		    ndif_64->ifindex : ndif_32->ifindex));
+		error = nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ?
+		    ndif_64->ifindex : ndif_32->ifindex);
+		return (error);
 		/* NOTREACHED */
 	}
 	}
@@ -2214,9 +2464,9 @@ nd6_cache_lladdr(
 	int newstate = 0;
 	struct timeval timenow;
 
-	if (!ifp)
+	if (ifp == NULL)
 		panic("ifp == NULL in nd6_cache_lladdr");
-	if (!from)
+	if (from == NULL)
 		panic("from == NULL in nd6_cache_lladdr");
 
 	/* nothing must be updated for unspecified address */
@@ -2236,12 +2486,6 @@ nd6_cache_lladdr(
 
 	rt = nd6_lookup(from, 0, ifp, 0);
 	if (rt == NULL) {
-#if 0
-		/* nothing must be done if there's no lladdr */
-		if (!lladdr || !lladdrlen)
-			return;
-#endif
-
 		if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
 			return;
 		RT_LOCK_ASSERT_HELD(rt);
@@ -2257,6 +2501,8 @@ nd6_cache_lladdr(
 		is_newentry = 0;
 	}
 
+	if (rt == NULL)
+		return;
 	if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
 fail:
 		RT_UNLOCK(rt);
@@ -2264,10 +2510,10 @@ fail:
 		rtfree(rt);
 		return;
 	}
-	ln = rt->rt_llinfo;
-	if (!ln)
+	ln = (struct llinfo_nd6 *)rt->rt_llinfo;
+	if (ln == NULL)
 		goto fail;
-	if (!rt->rt_gateway)
+	if (rt->rt_gateway == NULL)
 		goto fail;
 	if (rt->rt_gateway->sa_family != AF_LINK)
 		goto fail;
@@ -2300,18 +2546,21 @@ fail:
 		 */
 		sdl->sdl_alen = ifp->if_addrlen;
 		bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
+
+		/* cache the gateway (sender HW) address */
+		nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
 	}
 
 	if (!is_newentry) {
-		if ((!olladdr && lladdr)		/* (3) */
-		 || (olladdr && lladdr && llchange)) {	/* (5) */
+		if ((!olladdr && lladdr != NULL) ||	/* (3) */
+		    (olladdr && lladdr != NULL && llchange)) {	/* (5) */
 			do_update = 1;
 			newstate = ND6_LLINFO_STALE;
 		} else					/* (1-2,4) */
 			do_update = 0;
 	} else {
 		do_update = 1;
-		if (!lladdr)				/* (6) */
+		if (lladdr == NULL)			/* (6) */
 			newstate = ND6_LLINFO_NOSTATE;
 		else					/* (7) */
 			newstate = ND6_LLINFO_STALE;
@@ -2331,18 +2580,19 @@ fail:
 			 * we must set the timer now, although it is actually
 			 * meaningless.
 			 */
-			ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
-			    nd6_gctimer);
+			ln->ln_expire = timenow.tv_sec + nd6_gctimer;
 			ln->ln_hold = NULL;
 
 			if (m != NULL) {
+				struct sockaddr_in6 sin6;
+
+				rtkey_to_sa6(rt, &sin6);
 				/*
 				 * we assume ifp is not a p2p here, so just
 				 * set the 2nd argument as the 1st one.
 				 */
 				RT_UNLOCK(rt);
-				nd6_output(ifp, ifp, m,
-				    (struct sockaddr_in6 *)rt_key(rt), rt, 0);
+				nd6_output(ifp, ifp, m, &sin6, rt);
 				RT_LOCK(rt);
 			}
 		} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
@@ -2375,7 +2625,7 @@ fail:
 	 *	0	n	y	--	(3)	c   s     s
 	 *	0	y	y	n	(4)	c   s     s
 	 *	0	y	y	y	(5)	c   s     s
-	 *	1	--	n	--	(6) c	c 	c s
+	 *	1	--	n	--	(6) c	c	c s
 	 *	1	--	y	--	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
@@ -2391,8 +2641,8 @@ fail:
 	case ND_REDIRECT:
 		/*
 		 * If the icmp is a redirect to a better router, always set the
-		 * is_router flag. Otherwise, if the entry is newly created,
-		 * clear the flag. [RFC 2461, sec 8.3]
+		 * is_router flag.  Otherwise, if the entry is newly created,
+		 * clear the flag.  [RFC 2461, sec 8.3]
 		 */
 		if (code == ND_REDIRECT_ROUTER)
 			ln->ln_router = 1;
@@ -2409,8 +2659,8 @@ fail:
 		/*
 		 * Mark an entry with lladdr as a router.
 		 */
-		if ((!is_newentry && (olladdr || lladdr))	/* (2-5) */
-		 || (is_newentry && lladdr)) {			/* (7) */
+		if ((!is_newentry && (olladdr || lladdr)) ||	/* (2-5) */
+		    (is_newentry && lladdr)) {			/* (7) */
 			ln->ln_router = 1;
 		}
 		break;
@@ -2436,7 +2686,7 @@ fail:
 		RT_REMREF_LOCKED(rt);
 		RT_UNLOCK(rt);
 		lck_mtx_lock(nd6_mutex);
-		defrouter_select();
+		defrouter_select(ifp);
 		lck_mtx_unlock(nd6_mutex);
 	} else {
 		RT_REMREF_LOCKED(rt);
@@ -2475,7 +2725,7 @@ nd6_slowtimo(
 #define senderr(e) { error = (e); goto bad;}
 int
 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
-    struct sockaddr_in6 *dst, struct rtentry *hint0, int locked)
+    struct sockaddr_in6 *dst, struct rtentry *hint0)
 {
 	struct mbuf *m = m0;
 	struct rtentry *rt = hint0, *hint = hint0;
@@ -2520,14 +2770,14 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 		if (!(rt->rt_flags & RTF_UP)) {
 			RT_REMREF_LOCKED(rt);
 			RT_UNLOCK(rt);
-			if ((hint = rt = rtalloc1((struct sockaddr *)dst,
-			    1, 0)) != NULL) {
+			if ((hint = rt = rtalloc1_scoped((struct sockaddr *)dst,
+			    1, 0, ifp->if_index)) != NULL) {
 				RT_LOCK_SPIN(rt);
 				if (rt->rt_ifp != ifp) {
 					/* XXX: loop care? */
 					RT_UNLOCK(rt);
 					error = nd6_output(ifp, origifp, m0,
-					    dst, rt, locked);
+					    dst, rt);
 					rtfree(rt);
 					return (error);
 				}
@@ -2541,7 +2791,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 			struct in6_ifaddr *ia6 = NULL;
 			struct sockaddr_in6 gw6;
 
-			gw6 = *((struct sockaddr_in6 *)rt->rt_gateway);
+			rtgw_to_sa6(rt, &gw6);
 			/*
 			 * Must drop rt_lock since nd6_is_addr_neighbor()
 			 * calls nd6_lookup() and acquires rnh_lock.
@@ -2564,7 +2814,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 				 * XXX: we may need a more generic rule here.
 				 */
 				if (ia6 != NULL)
-					ifafree(&ia6->ia_ifa);
+					IFA_REMREF(&ia6->ia_ifa);
 				if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
 					senderr(EHOSTUNREACH);
 				goto sendpkt;
@@ -2601,7 +2851,8 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
 				RT_UNLOCK(rt);
 				rtfree(gwrt);
 lookup:
-				gwrt = rtalloc1((struct sockaddr *)&gw6, 1, 0);
+				gwrt = rtalloc1_scoped((struct sockaddr *)&gw6,
+				    1, 0, ifp->if_index);
 
 				RT_LOCK(rt);
 				/*
@@ -2680,6 +2931,12 @@ lookup:
 	if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
 		ln = rt->rt_llinfo;
 	} else {
+		struct sockaddr_in6 sin6;
+		/*
+		 * Clear out Scope ID field in case it is set.
+		 */
+		sin6 = *dst;
+		sin6.sin6_scope_id = 0;
 		/*
 		 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 		 * the condition below is not very efficient.  But we believe
@@ -2689,7 +2946,7 @@ lookup:
 		 */
 		if (rt != NULL)
 			RT_UNLOCK(rt);
-		if (nd6_is_addr_neighbor(dst, ifp, 0)) {
+		if (nd6_is_addr_neighbor(&sin6, ifp, 0)) {
 			/* "rtrele" may have been used, so clean up "rt" now */
 			if (rt != NULL) {
 				/* Don't free "hint0" */
@@ -2795,7 +3052,7 @@ lookup:
 		lck_rw_done(nd_if_rwlock);
 		RT_UNLOCK(rt);
 		/* We still have a reference on rt (for ln) */
-		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, locked);
+		nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
 	} else {
 		RT_UNLOCK(rt);
 	}
@@ -2828,19 +3085,20 @@ sendpkt:
 	if (rt != NULL)
 		RT_LOCK_ASSERT_NOTHELD(rt);
 
-	/* Clean up HW checksum flags before sending the packet */
-	m->m_pkthdr.csum_data = 0;
-	m->m_pkthdr.csum_flags = 0;
+	/* discard the packet if IPv6 operation is disabled on the interface */
+	lck_rw_lock_shared(nd_if_rwlock);
+	if ((nd_ifinfo[ifp->if_index].flags & ND6_IFF_IFDISABLED)) {
+		lck_rw_done(nd_if_rwlock);
+		error = ENETDOWN; /* better error? */
+		goto bad;
+	}
+	lck_rw_done(nd_if_rwlock);
 
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/* forwarding rules require the original scope_id */
 		m->m_pkthdr.rcvif = origifp;
-		if (locked)
-			lck_mtx_unlock(ip6_mutex);
 		error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
 		    (struct sockaddr *)dst, 0);
-		if (locked)
-			lck_mtx_lock(ip6_mutex);
 		goto release;
 	} else {
 		/* Do not allow loopback address to wind up on a wire */
@@ -2862,13 +3120,20 @@ sendpkt:
 		}
 	}
 
+	if (rt != NULL) {
+		RT_LOCK_SPIN(rt);
+		/* Mark use timestamp */
+		if (rt->rt_llinfo != NULL)
+			nd6_llreach_use(rt->rt_llinfo);
+		RT_UNLOCK(rt);
+	}
+
+	if (hint && nstat_collect)
+		nstat_route_tx(hint, 1, m->m_pkthdr.len, 0);
+
 	m->m_pkthdr.rcvif = NULL;
-	if (locked)
-		lck_mtx_unlock(ip6_mutex);
 	error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt,
 	    (struct sockaddr *)dst, 0);
-	if (locked)
-		lck_mtx_lock(ip6_mutex);
 	goto release;
 
 bad:
@@ -2923,8 +3188,13 @@ nd6_need_cache(
 #if IFT_IEEE80211
 	case IFT_IEEE80211:
 #endif
-	case IFT_BRIDGE:
 	case IFT_GIF:		/* XXX need more cases? */
+	case IFT_PPP:
+#if IFT_TUNNEL
+	case IFT_TUNNEL:
+#endif
+	case IFT_BRIDGE:
+	case IFT_CELLULAR:
 		return(1);
 	default:
 		return(0);
@@ -3110,6 +3380,8 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
 					    "default router list (%s)\n",
 					    ip6_sprintf(&dr->rtaddr));
 				d->flags = dr->flags;
+				d->stateflags = dr->stateflags;
+				d->stateflags &= ~NDDRF_PROCESSED;
 				d->rtlifetime = dr->rtlifetime;
 				d->expire = dr->expire;
 				d->if_index = dr->ifp->if_index;
@@ -3140,6 +3412,8 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
 					    "default router list (%s)\n",
 					    ip6_sprintf(&dr->rtaddr));
 				d_32->flags = dr->flags;
+				d_32->stateflags = dr->stateflags;
+				d_32->stateflags &= ~NDDRF_PROCESSED;
 				d_32->rtlifetime = dr->rtlifetime;
 				d_32->expire = dr->expire;
 				d_32->if_index = dr->ifp->if_index;
@@ -3184,6 +3458,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 				bzero(p, sizeof (*p));
 				sin6 = (struct sockaddr_in6 *)(p + 1);
 
+				NDPR_LOCK(pr);
 				p->prefix = pr->ndpr_prefix;
 				if (in6_recoverscope(&p->prefix,
 				    &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
@@ -3196,7 +3471,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 				p->pltime = pr->ndpr_pltime;
 				p->if_index = pr->ndpr_ifp->if_index;
 				p->expire = pr->ndpr_expire;
-				p->refcnt = pr->ndpr_refcnt;
+				p->refcnt = pr->ndpr_addrcnt;
 				p->flags = pr->ndpr_stateflags;
 				p->origin = PR_ORIG_RA;
 				advrtrs = 0;
@@ -3222,6 +3497,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 					advrtrs++;
 				}
 				p->advrtrs = advrtrs;
+				NDPR_UNLOCK(pr);
 			} else {
 				panic("buffer too short");
 			}
@@ -3246,6 +3522,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 				bzero(p_32, sizeof (*p_32));
 				sin6 = (struct sockaddr_in6 *)(p_32 + 1);
 
+				NDPR_LOCK(pr);
 				p_32->prefix = pr->ndpr_prefix;
 				if (in6_recoverscope(&p_32->prefix,
 				    &p_32->prefix.sin6_addr, pr->ndpr_ifp) != 0)
@@ -3258,7 +3535,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 				p_32->pltime = pr->ndpr_pltime;
 				p_32->if_index = pr->ndpr_ifp->if_index;
 				p_32->expire = pr->ndpr_expire;
-				p_32->refcnt = pr->ndpr_refcnt;
+				p_32->refcnt = pr->ndpr_addrcnt;
 				p_32->flags = pr->ndpr_stateflags;
 				p_32->origin = PR_ORIG_RA;
 				advrtrs = 0;
@@ -3284,6 +3561,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 					advrtrs++;
 				}
 				p_32->advrtrs = advrtrs;
+				NDPR_UNLOCK(pr);
 			} else {
 				panic("buffer too short");
 			}
@@ -3297,7 +3575,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
 	return (error);
 }
 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
-	CTLFLAG_RD, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter","");
+	CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter","");
 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
-	CTLFLAG_RD, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter","");
+	CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter","");
 
diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h
index 26fca3c11..601e075aa 100644
--- a/bsd/netinet6/nd6.h
+++ b/bsd/netinet6/nd6.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -69,7 +69,7 @@
 
 #include <sys/queue.h>
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #include <kern/locks.h>
 
 struct	llinfo_nd6 {
@@ -89,12 +89,14 @@ struct	llinfo_nd6 {
 	short	ln_router;	/* 2^0: ND6 router bit */
 	int	ln_byhint;	/* # of times we made it reachable by UL hint */
 	u_int32_t ln_flags;	/* flags; see below */
+	struct	if_llreach *ln_llreach;	/* link-layer reachability record */
+	u_int64_t ln_lastused;	/* last used timestamp */
 };
 
 /* Values for ln_flags */
 #define	ND6_LNF_TIMER_SKIP	0x1	/* modified by nd6_timer() */
 #define	ND6_LNF_IN_USE		0x2	/* currently in llinfo_nd6 list */
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define ND6_LLINFO_PURGE	-3
 #define ND6_LLINFO_NOSTATE	-2
@@ -112,16 +114,27 @@ struct	llinfo_nd6 {
 #define ND6_LLINFO_DELAY	3
 #define ND6_LLINFO_PROBE	4
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
-#endif /* KERNEL_PRIVATE */
-
-#if !defined(KERNEL_PRIVATE)
+#define ND6_LLINFO_PERMANENT(n) (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))
+#define ND6_IFF_PERFORMNUD	0x1
+#define ND6_IFF_ACCEPT_RTADV	0x2 /* APPLE: not used. Innterface specific router advertisments are
+				     * handled with a specific ifnet flag: IFEF_ACCEPT_RTADVD
+				     */
+#define ND6_IFF_PREFER_SOURCE	0x4 /* APPLE: NOT USED not related to ND. */
+#define ND6_IFF_IFDISABLED	0x8 /* IPv6 operation is disabled due to
+				     * DAD failure.  (XXX: not ND-specific)
+				     */
+#define ND6_IFF_DONT_SET_IFROUTE	0x10 /* NOT USED */
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#if !defined(XNU_KERNEL_PRIVATE)
 struct nd_ifinfo {
 #else
 /* For binary compatibility, this structure must not change */
 struct nd_ifinfo_compat {
-#endif /* !KERNEL_PRIVATE */
+#endif /* !XNU_KERNEL_PRIVATE */
 	u_int32_t linkmtu;		/* LinkMTU */
 	u_int32_t maxmtu;		/* Upper bound of LinkMTU */
 	u_int32_t basereachable;	/* BaseReachableTime */
@@ -137,7 +150,7 @@ struct nd_ifinfo_compat {
 	u_int8_t randomid[8];	/* current random ID */
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct nd_ifinfo {
 	u_int32_t linkmtu;		/* LinkMTU */
 	u_int32_t maxmtu;		/* Upper bound of LinkMTU */
@@ -147,7 +160,7 @@ struct nd_ifinfo {
 	u_int32_t flags;		/* Flags */
 	int recalctm;			/* BaseReacable re-calculation timer */
 	u_int8_t chlim;			/* CurHopLimit */
-	u_int8_t receivedra;
+	u_int8_t initialized; /* Flag to see the entry is initialized */
 	/* the following 3 members are for privacy extension for addrconf */
 	u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
 	u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
@@ -156,7 +169,7 @@ struct nd_ifinfo {
 	int32_t nprefixes;
 	int32_t ndefrouters;
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define ND6_IFF_PERFORMNUD	0x1
 
@@ -169,7 +182,7 @@ struct in6_nbrinfo {
 	int	expire;		/* lifetime for NDP state transition */
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct in6_nbrinfo_32 {
 	char ifname[IFNAMSIZ];
 	struct in6_addr addr;
@@ -187,7 +200,7 @@ struct in6_nbrinfo_64 {
 	int	state;
 	int	expire;
 } __attribute__((aligned(8)));
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 #define DRLSTSIZ 10
 #define PRLSTSIZ 10
@@ -203,7 +216,7 @@ struct	in6_drlist {
 	} defrouter[DRLSTSIZ];
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct	in6_drlist_32 {
 	char ifname[IFNAMSIZ];
 	struct {
@@ -225,20 +238,30 @@ struct	in6_drlist_64 {
 		u_short if_index	__attribute__((aligned(8)));
 	} defrouter[DRLSTSIZ] __attribute__((aligned(8)));
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
+
+/* valid values for stateflags */
+#define	NDDRF_INSTALLED	0x1	/* installed in the routing table */
+#define	NDDRF_IFSCOPE	0x2	/* installed as a scoped route */
+#define	NDDRF_STATIC	0x4	/* for internal use only */
+#ifdef XNU_KERNEL_PRIVATE
+#define	NDDRF_PROCESSED	0x10
+#endif
 
 struct	in6_defrouter {
 	struct	sockaddr_in6 rtaddr;
 	u_char	flags;
+	u_char	stateflags;
 	u_short	rtlifetime;
 	u_long	expire;
 	u_short if_index;
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct	in6_defrouter_32 {
 	struct	sockaddr_in6 rtaddr;
 	u_char	flags;
+	u_char	stateflags;
 	u_short	rtlifetime;
 	u_int32_t expire;
 	u_short if_index;
@@ -247,11 +270,12 @@ struct	in6_defrouter_32 {
 struct	in6_defrouter_64 {
 	struct	sockaddr_in6 rtaddr;
 	u_char	flags;
+	u_char	stateflags;
 	u_short	rtlifetime;
 	u_long	expire		__attribute__((aligned(8)));
 	u_short if_index	__attribute__((aligned(8)));
 } __attribute__((aligned(8)));
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct	in6_prlist {
 	char ifname[IFNAMSIZ];
@@ -269,7 +293,7 @@ struct	in6_prlist {
 	} prefix[PRLSTSIZ];
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct	in6_prlist_32 {
 	char ifname[IFNAMSIZ];
 	struct {
@@ -302,7 +326,7 @@ struct	in6_prlist_64 {
 		struct	in6_addr advrtr[DRLSTSIZ];
 	} prefix[PRLSTSIZ];
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct in6_prefix {
 	struct	sockaddr_in6 prefix;
@@ -319,7 +343,7 @@ struct in6_prefix {
 	/* struct sockaddr_in6 advrtr[] */
 };
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 struct in6_prefix_32 {
 	struct	sockaddr_in6 prefix;
 	struct prf_ra raflags;
@@ -331,7 +355,7 @@ struct in6_prefix_32 {
 	u_int32_t flags;
 	int refcnt;
 	u_short if_index;
-	u_short advrtrs;
+	u_short advrtrs; /* number of advertisement routers */
 	/* struct sockaddr_in6 advrtr[] */
 };
 
@@ -349,7 +373,7 @@ struct in6_prefix_64 {
 	u_short advrtrs;
 	/* struct sockaddr_in6 advrtr[] */
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 struct	in6_ondireq {
 	char ifname[IFNAMSIZ];
@@ -366,7 +390,7 @@ struct	in6_ondireq {
 	} ndi;
 };
 
-#if !defined(KERNEL_PRIVATE)
+#if !defined(XNU_KERNEL_PRIVATE)
 struct	in6_ndireq {
 	char ifname[IFNAMSIZ];
 	struct nd_ifinfo ndi;
@@ -376,14 +400,17 @@ struct	in6_ndireq {
 	char ifname[IFNAMSIZ];
 	struct nd_ifinfo_compat ndi;
 };
-#endif /* !KERNEL_PRIVATE */
+#endif /* !XNU_KERNEL_PRIVATE */
 
 struct	in6_ndifreq {
 	char ifname[IFNAMSIZ];
 	u_long ifindex;
 };
 
-#if defined(KERNEL_PRIVATE)
+#define MAX_RTR_SOLICITATION_DELAY	1	/* 1sec */
+#define RTR_SOLICITATION_INTERVAL	4	/* 4sec */
+
+#if defined(XNU_KERNEL_PRIVATE)
 struct	in6_ndifreq_32 {
 	char ifname[IFNAMSIZ];
 	u_int32_t ifindex;
@@ -393,11 +420,16 @@ struct	in6_ndifreq_64 {
 	char ifname[IFNAMSIZ];
 	u_long ifindex	__attribute__((aligned(8)));
 };
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* Prefix status */
 #define NDPRF_ONLINK		0x1
 #define NDPRF_DETACHED		0x2
+#define	NDPRF_STATIC		0x100
+#define	NDPRF_IFSCOPE		0x1000
+#ifdef XNU_KERNEL_PRIVATE
+#define	NDPRF_PROCESSED		0x08000
+#endif
 
 /* protocol constants */
 #define MAX_RTR_SOLICITATION_DELAY	1	/*1sec*/
@@ -405,8 +437,9 @@ struct	in6_ndifreq_64 {
 #define MAX_RTR_SOLICITATIONS		3
 
 #define ND6_INFINITE_LIFETIME		0xffffffff
+#define ND6_MAX_LIFETIME		0x7fffffff
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * Protects nd_ifinfo[]
  */
@@ -448,18 +481,59 @@ __private_extern__ lck_rw_t *nd_if_rwlock;
 
 TAILQ_HEAD(nd_drhead, nd_defrouter);
 struct	nd_defrouter {
+	decl_lck_mtx_data(, nddr_lock);
+	uint32_t	nddr_refcount;
+	uint32_t	nddr_debug;
 	TAILQ_ENTRY(nd_defrouter) dr_entry;
-	struct	in6_addr rtaddr;
-	u_char	flags;		/* flags on RA message */
-	u_short	rtlifetime;
+	struct		in6_addr rtaddr;
+	u_char		flags;		/* flags on RA message */
+	u_char		stateflags;
+	u_short		rtlifetime;
 	u_int32_t	expire;
-	u_int32_t	advint;		/* Mobile IPv6 addition (milliseconds) */
-	u_int32_t	advint_expire;	/* Mobile IPv6 addition */
-	int	advints_lost;	/* Mobile IPv6 addition */
-	struct  ifnet *ifp;
+	struct  ifnet	*ifp;
+	unsigned int	genid;
+	int		err;
+	void (*nddr_trace)		/* callback fn for tracing refs */
+	    (struct nd_defrouter *, int);
 };
 
+#define	NDDR_LOCK_ASSERT_HELD(_nddr)					\
+	lck_mtx_assert(&(_nddr)->nddr_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	NDDR_LOCK_ASSERT_NOTHELD(_nddr)					\
+	lck_mtx_assert(&(_nddr)->nddr_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	NDDR_LOCK(_nddr)						\
+	lck_mtx_lock(&(_nddr)->nddr_lock)
+
+#define	NDDR_LOCK_SPIN(_nddr)						\
+	lck_mtx_lock_spin(&(_nddr)->nddr_lock)
+
+#define	NDDR_CONVERT_LOCK(_nddr) do {					\
+	NDPR_LOCK_ASSERT_HELD(_nddr);					\
+	lck_mtx_convert_spin(&(_nddr)->nddr_lock);			\
+} while (0)
+
+#define	NDDR_UNLOCK(_nddr)						\
+	lck_mtx_unlock(&(_nddr)->nddr_lock)
+
+#define	NDDR_ADDREF(_nddr)						\
+	nddr_addref(_nddr, 0)
+
+#define	NDDR_ADDREF_LOCKED(_nddr)					\
+	nddr_addref(_nddr, 1)
+
+#define	NDDR_REMREF(_nddr) do {						\
+	(void) nddr_remref(_nddr, 0);					\
+} while (0)
+
+#define	NDDR_REMREF_LOCKED(_nddr)					\
+	nddr_remref(_nddr, 1)
+
 struct nd_prefix {
+	decl_lck_mtx_data(, ndpr_lock);
+	u_int32_t	ndpr_refcount;	/* reference count */
+	u_int32_t	ndpr_debug;	/* see ifa_debug flags */
 	struct ifnet *ndpr_ifp;
 	LIST_ENTRY(nd_prefix) ndpr_entry;
 	struct sockaddr_in6 ndpr_prefix;	/* prefix */
@@ -467,15 +541,17 @@ struct nd_prefix {
 	struct in6_addr ndpr_addr; /* address that is derived from the prefix */
 	u_int32_t ndpr_vltime;	/* advertised valid lifetime */
 	u_int32_t ndpr_pltime;	/* advertised preferred lifetime */
-	time_t ndpr_expire;	/* expiration time of the prefix */
 	time_t ndpr_preferred;	/* preferred time of the prefix */
+	time_t ndpr_expire;	/* expiration time of the prefix */
+	time_t ndpr_lastupdate; /* reception time of last advertisement */
 	struct prf_ra ndpr_flags;
 	u_int32_t ndpr_stateflags; /* actual state flags */
 	/* list of routers that advertise the prefix: */
 	LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
 	u_char	ndpr_plen;
-	int	ndpr_refcnt;	/* reference counter from addresses */
-	int	ndpr_usecnt;	/* actual use count; prevents free */
+	int	ndpr_addrcnt;	/* reference counter from addresses */
+	void (*ndpr_trace)		/* callback fn for tracing refs */
+	    (struct nd_prefix *, int);
 };
 
 #define ndpr_next		ndpr_entry.le_next
@@ -483,13 +559,46 @@ struct nd_prefix {
 #define ndpr_raf		ndpr_flags
 #define ndpr_raf_onlink		ndpr_flags.onlink
 #define ndpr_raf_auto		ndpr_flags.autonomous
-
+#define ndpr_raf_router		ndpr_flags.router
 /*
  * We keep expired prefix for certain amount of time, for validation purposes.
  * 1800s = MaxRtrAdvInterval
  */
 #define NDPR_KEEP_EXPIRED	(1800 * 2)
 
+#define	NDPR_LOCK_ASSERT_HELD(_ndpr)					\
+	lck_mtx_assert(&(_ndpr)->ndpr_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	NDPR_LOCK_ASSERT_NOTHELD(_ndpr)					\
+	lck_mtx_assert(&(_ndpr)->ndpr_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	NDPR_LOCK(_ndpr)						\
+	lck_mtx_lock(&(_ndpr)->ndpr_lock)
+
+#define	NDPR_LOCK_SPIN(_ndpr)						\
+	lck_mtx_lock_spin(&(_ndpr)->ndpr_lock)
+
+#define	NDPR_CONVERT_LOCK(_ndpr) do {					\
+	NDPR_LOCK_ASSERT_HELD(_ndpr);					\
+	lck_mtx_convert_spin(&(_ndpr)->ndpr_lock);			\
+} while (0)
+
+#define	NDPR_UNLOCK(_ndpr)						\
+	lck_mtx_unlock(&(_ndpr)->ndpr_lock)
+
+#define	NDPR_ADDREF(_ndpr)						\
+	ndpr_addref(_ndpr, 0)
+
+#define	NDPR_ADDREF_LOCKED(_ndpr)					\
+	ndpr_addref(_ndpr, 1)
+
+#define	NDPR_REMREF(_ndpr) do {						\
+	(void) ndpr_remref(_ndpr, 0);					\
+} while (0)
+
+#define	NDPR_REMREF_LOCKED(_ndpr)					\
+	ndpr_remref(_ndpr, 1)
+
 /*
  * Message format for use in obtaining information about prefixes
  * from inet6 sysctl function
@@ -533,6 +642,7 @@ extern int nd6_delay;
 extern int nd6_umaxtries;
 extern int nd6_mmaxtries;
 extern int nd6_useloopback;
+extern int nd6_accept_6to4;
 extern int nd6_maxnudhint;
 extern int nd6_gctimer;
 extern struct llinfo_nd6 llinfo_nd6;
@@ -541,20 +651,21 @@ extern struct nd_drhead nd_defrouter;
 extern struct nd_prhead nd_prefix;
 extern int nd6_debug;
 extern size_t nd_ifinfo_indexlim;
+extern int nd6_onlink_ns_rfc4861;
 
-#define nd6log(x)	do { if (nd6_debug) log x; } while (0)
-
-extern struct callout nd6_timer_ch;
+#define nd6log(x)	do { if (nd6_debug >= 1) log x; } while (0)
+#define nd6log2(x)	do { if (nd6_debug >= 2) log x; } while (0)
 
 /* nd6_rtr.c */
 extern int nd6_defifindex;
 extern int ip6_desync_factor;	/* seconds */
+/* ND6_INFINITE_LIFETIME does not apply to temporary addresses */
 extern u_int32_t ip6_temp_preferred_lifetime; /* seconds */
 extern u_int32_t ip6_temp_valid_lifetime; /* seconds */
 extern int ip6_temp_regen_advance; /* seconds */
 
 union nd_opts {
-	struct nd_opt_hdr *nd_opt_array[9];	/*max = home agent info*/
+	struct nd_opt_hdr *nd_opt_array[8];	/* max = target address list */
 	struct {
 		struct nd_opt_hdr *zero;
 		struct nd_opt_hdr *src_lladdr;
@@ -562,9 +673,6 @@ union nd_opts {
 		struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
 		struct nd_opt_rd_hdr *rh;
 		struct nd_opt_mtu *mtu;
-		struct nd_opt_hdr *six;
-		struct nd_opt_advint *adv;
-		struct nd_opt_hai *hai;
 		struct nd_opt_hdr *search;	/* multiple opts */
 		struct nd_opt_hdr *last;	/* multiple opts */
 		int done;
@@ -577,8 +685,6 @@ union nd_opts {
 #define nd_opts_pi_end		nd_opt_each.pi_end
 #define nd_opts_rh		nd_opt_each.rh
 #define nd_opts_mtu		nd_opt_each.mtu
-#define nd_opts_adv		nd_opt_each.adv
-#define nd_opts_hai		nd_opt_each.hai
 #define nd_opts_search		nd_opt_each.search
 #define nd_opts_last		nd_opt_each.last
 #define nd_opts_done		nd_opt_each.done
@@ -604,49 +710,62 @@ extern int nd6_ioctl(u_long, caddr_t, struct ifnet *);
 extern void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
     char *, int, int, int);
 extern int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
-    struct sockaddr_in6 *, struct rtentry *, int);
+    struct sockaddr_in6 *, struct rtentry *);
 extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *,
     struct sockaddr *, u_char *);
 extern int nd6_need_cache(struct ifnet *);
 extern void nd6_drain(void *);
 
 /* nd6_nbr.c */
+extern void nd6_nbr_init(void);
 extern void nd6_na_input(struct mbuf *, int, int);
 extern void nd6_na_output(struct ifnet *, const struct in6_addr *,
     const struct in6_addr *, u_int32_t, int, struct sockaddr *);
 extern void nd6_ns_input(struct mbuf *, int, int);
 extern void nd6_ns_output(struct ifnet *, const struct in6_addr *,
-    const struct in6_addr *, struct llinfo_nd6 *, int, int);
+    const struct in6_addr *, struct llinfo_nd6 *, int);
 extern caddr_t nd6_ifptomac(struct ifnet *);
 extern void nd6_dad_start(struct ifaddr *, int *);
 extern void nd6_dad_stop(struct ifaddr *);
-extern void nd6_dad_duplicated(struct ifaddr *);
+extern void nd6_dad_duplicated(struct ifaddr *, boolean_t);
+extern void nd6_llreach_alloc(struct rtentry *, struct ifnet *, void *,
+    unsigned int, boolean_t);
+extern void nd6_llreach_set_reachable(struct ifnet *, void *, unsigned int);
+extern void nd6_llreach_use(struct llinfo_nd6 *);
 
 /* nd6_rtr.c */
+extern void nd6_rtr_init(void);
 extern void nd6_rs_input(struct mbuf *, int, int);
 extern void nd6_ra_input(struct mbuf *, int, int);
 extern void prelist_del(struct nd_prefix *);
-extern void defrouter_addreq(struct nd_defrouter *);
-extern void defrouter_delreq(struct nd_defrouter *, int);
-extern void defrouter_select(void);
-extern void defrtrlist_del(struct nd_defrouter *, int);
-extern void prelist_remove(struct nd_prefix *, int);
+extern void defrouter_addreq(struct nd_defrouter *, boolean_t);
+extern void defrouter_delreq(struct nd_defrouter *);
+extern void defrouter_select(struct ifnet *);
+extern void defrouter_reset(void);
+extern int defrtrlist_ioctl(u_long, caddr_t);
+extern void defrtrlist_del(struct nd_defrouter *);
+extern int defrtrlist_add_static(struct nd_defrouter *);
+extern int defrtrlist_del_static(struct nd_defrouter *);
+extern void prelist_remove(struct nd_prefix *);
 extern int prelist_update(struct nd_prefix *, struct nd_defrouter *,
-    struct mbuf *);
+    struct mbuf *, int);
 extern int nd6_prelist_add(struct nd_prefix *, struct nd_defrouter *,
-    struct nd_prefix **);
-extern int nd6_prefix_onlink(struct nd_prefix *, int, int);
+    struct nd_prefix **, boolean_t);
+extern int nd6_prefix_onlink(struct nd_prefix *);
+extern int nd6_prefix_onlink_scoped(struct nd_prefix *, unsigned int);
 extern int nd6_prefix_offlink(struct nd_prefix *);
-extern void pfxlist_onlink_check(int);
+extern void pfxlist_onlink_check(void);
 extern struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
 extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *);
 extern int in6_init_prefix_ltimes(struct nd_prefix *ndpr);
 extern void rt6_flush(struct in6_addr *, struct ifnet *);
 extern int nd6_setdefaultiface(int);
 extern int in6_tmpifadd(const struct in6_ifaddr *, int, int);
-extern void ndpr_hold(struct nd_prefix *, boolean_t);
-extern void ndpr_rele(struct nd_prefix *, boolean_t);
-#endif /* KERNEL_PRIVATE */
+extern void nddr_addref(struct nd_defrouter *, int);
+extern struct nd_defrouter *nddr_remref(struct nd_defrouter *, int);
+extern void ndpr_addref(struct nd_prefix *, int);
+extern struct nd_prefix *ndpr_remref(struct nd_prefix *, int);
+#endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL
 
diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c
index 5b0d744a6..b2abd8169 100644
--- a/bsd/netinet6/nd6_nbr.c
+++ b/bsd/netinet6/nd6_nbr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -67,11 +67,19 @@
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/mcache.h>
+#include <sys/protosw.h>
 #include <kern/queue.h>
 
+#include <kern/locks.h>
+#include <kern/zalloc.h>
+
 #include <net/if.h>
+#include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
+#include <net/if_llreach.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -80,6 +88,7 @@
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
 #include <netinet/icmp6.h>
 
 #if IPSEC
@@ -96,27 +105,147 @@ extern int ipsec_bypass;
 
 struct dadq;
 static struct dadq *nd6_dad_find(struct ifaddr *);
-#ifndef __APPLE__
-static void nd6_dad_starttimer(struct dadq *, int);
-static void nd6_dad_stoptimer(struct dadq *);
-#else
 void nd6_dad_stoptimer(struct ifaddr *);
-#endif
 static void nd6_dad_timer(struct ifaddr *);
 static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
 static void nd6_dad_ns_input(struct ifaddr *);
-static void nd6_dad_na_input(struct ifaddr *);
+static void nd6_dad_na_input(struct ifaddr *, caddr_t, int);
+static void dad_addref(struct dadq *, int);
+static void dad_remref(struct dadq *);
+static struct dadq *nd6_dad_attach(struct dadq *, struct ifaddr *);
+static void nd6_dad_detach(struct dadq *, struct ifaddr *);
 
 static int dad_ignore_ns = 0;	/* ignore NS in DAD - specwise incorrect*/
 static int dad_maxtry = 15;	/* max # of *tries* to transmit DAD packet */
 
+static unsigned int dad_size;			/* size of zone element */
+static struct zone *dad_zone;			/* zone for dadq */
+
+#define	DAD_ZONE_MAX	64			/* maximum elements in zone */
+#define	DAD_ZONE_NAME	"nd6_dad"		/* zone name */
+
+#define	DAD_LOCK_ASSERT_HELD(_dp)					\
+	lck_mtx_assert(&(_dp)->dad_lock, LCK_MTX_ASSERT_OWNED)
+
+#define	DAD_LOCK_ASSERT_NOTHELD(_dp)					\
+	lck_mtx_assert(&(_dp)->dad_lock, LCK_MTX_ASSERT_NOTOWNED)
+
+#define	DAD_LOCK(_dp)							\
+	lck_mtx_lock(&(_dp)->dad_lock)
+
+#define	DAD_LOCK_SPIN(_dp)						\
+	lck_mtx_lock_spin(&(_dp)->dad_lock)
+
+#define	DAD_CONVERT_LOCK(_dp) do {					\
+	DAD_LOCK_ASSERT_HELD(_dp);					\
+	lck_mtx_convert_spin(&(_dp)->dad_lock);				\
+} while (0)
+
+#define	DAD_UNLOCK(_dp)							\
+	lck_mtx_unlock(&(_dp)->dad_lock)
+
+#define	DAD_ADDREF(_dp)							\
+	dad_addref(_dp, 0)
+
+#define	DAD_ADDREF_LOCKED(_dp)						\
+	dad_addref(_dp, 1)
+
+#define	DAD_REMREF(_dp)							\
+	dad_remref(_dp)
+
 extern lck_mtx_t *dad6_mutex;
 extern lck_mtx_t *nd6_mutex;
+extern int in6_get_hw_ifid(struct ifnet *, struct in6_addr *);
+
+static int nd6_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */
+
+SYSCTL_DECL(_net_inet6_icmp6);
+
+SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_llreach_base,
+    CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_llreach_base, LL_BASE_REACHABLE,
+    "default ND6 link-layer reachability max lifetime (in seconds)");
+
+#define SIN6(s)	((struct sockaddr_in6 *)s)
+
 /*
- * Input an Neighbor Solicitation Message.
+ * Obtain a link-layer source cache entry for the sender.
+ *
+ * NOTE: This is currently only for ND6/Ethernet.
+ */
+void
+nd6_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr,
+    unsigned int alen, boolean_t solicited)
+{
+	struct llinfo_nd6 *ln = rt->rt_llinfo;
+
+	if (nd6_llreach_base != 0 &&
+	    ln->ln_expire != 0 && rt->rt_ifp != lo_ifp &&
+	    ifp->if_addrlen == IF_LLREACH_MAXLEN &&	/* Ethernet */
+	    alen == ifp->if_addrlen) {
+		struct if_llreach *lr;
+		const char *why = NULL, *type = "";
+
+		/* Become a regular mutex, just in case */
+		RT_CONVERT_LOCK(rt);
+
+		if ((lr = ln->ln_llreach) != NULL) {
+			type = (solicited ? "ND6 advertisement" :
+			    "ND6 unsolicited announcement");
+			/*
+			 * If target has changed, create a new record;
+			 * otherwise keep existing record.
+			 */
+			IFLR_LOCK(lr);
+			if (bcmp(addr, lr->lr_key.addr, alen) != 0) {
+				IFLR_UNLOCK(lr);
+				/* Purge any link-layer info caching */
+				VERIFY(rt->rt_llinfo_purge != NULL);
+				rt->rt_llinfo_purge(rt);
+				lr = NULL;
+				why = " for different target HW address; "
+				    "using new llreach record";
+			} else {
+				lr->lr_probes = 0;	/* reset probe count */
+				IFLR_UNLOCK(lr);
+				if (solicited) {
+					why = " for same target HW address; "
+					    "keeping existing llreach record";
+				}
+			}
+		}
+
+		if (lr == NULL) {
+			lr = ln->ln_llreach = ifnet_llreach_alloc(ifp,
+			    ETHERTYPE_IPV6, addr, alen, nd6_llreach_base);
+			if (lr != NULL) {
+				lr->lr_probes = 0;	/* reset probe count */
+				if (why == NULL)
+					why = "creating new llreach record";
+			}
+		}
+
+		if (nd6_debug && lr != NULL && why != NULL) {
+			char tmp[MAX_IPv6_STR_LEN];
+
+			nd6log((LOG_DEBUG, "%s%d: %s%s for %s\n", ifp->if_name,
+			    ifp->if_unit, type, why, inet_ntop(AF_INET6,
+			    &SIN6(rt_key(rt))->sin6_addr, tmp, sizeof (tmp))));
+		}
+	}
+}
+
+void
+nd6_llreach_use(struct llinfo_nd6 *ln)
+{
+	if (ln->ln_llreach != NULL)
+		ln->ln_lastused = net_uptime();
+}
+
+/*
+ * Input a Neighbor Solicitation Message.
  *
  * Based on RFC 2461
- * Based on RFC 2462 (duplicated address detection)
+ * Based on RFC 2462 (duplicate address detection)
  */
 void
 nd6_ns_input(
@@ -151,6 +280,8 @@ nd6_ns_input(
 #endif
 	ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */
 	taddr6 = nd_ns->nd_ns_target;
+	if (in6_setscope(&taddr6, ifp, NULL) != 0)
+		goto bad;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
@@ -161,18 +292,36 @@ nd6_ns_input(
 	}
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
-		/* dst has to be solicited node multicast address. */
-		if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL
+		/* dst has to be a solicited node multicast address. */
+		if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
 		    /* don't check ifindex portion */
-		    && daddr6.s6_addr32[1] == 0
-		    && daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE
-		    && daddr6.s6_addr8[12] == 0xff) {
+		    daddr6.s6_addr32[1] == 0 &&
+		    daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
+		    daddr6.s6_addr8[12] == 0xff) {
 			; /* good */
 		} else {
 			nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
 				"(wrong ip6 dst)\n"));
 			goto bad;
 		}
+	} else if (!nd6_onlink_ns_rfc4861) {
+		struct sockaddr_in6 src_sa6;
+
+		/*
+		 * According to recent IETF discussions, it is not a good idea
+		 * to accept a NS from an address which would not be deemed
+		 * to be a neighbor otherwise.  This point is expected to be
+		 * clarified in future revisions of the specification.
+		 */
+		bzero(&src_sa6, sizeof(src_sa6));
+		src_sa6.sin6_family = AF_INET6;
+		src_sa6.sin6_len = sizeof(src_sa6);
+		src_sa6.sin6_addr = saddr6;
+		if (!nd6_is_addr_neighbor(&src_sa6, ifp, 0)) {
+			nd6log((LOG_INFO, "nd6_ns_input: "
+				"NS packet from non-neighbor\n"));
+			goto bad;
+		}
 	}
 
 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
@@ -180,9 +329,6 @@ nd6_ns_input(
 		goto bad;
 	}
 
-	if (IN6_IS_SCOPE_LINKLOCAL(&taddr6))
-		taddr6.s6_addr16[1] = htons(ifp->if_index);
-
 	icmp6len -= sizeof(*nd_ns);
 	nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
@@ -196,7 +342,7 @@ nd6_ns_input(
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
-	
+
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
 		nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
 		    "(link-layer address option)\n"));
@@ -213,12 +359,6 @@ nd6_ns_input(
 	 * In implementation, we add target link-layer address by default.
 	 * We do not add one in MUST NOT cases.
 	 */
-#if 0 /* too much! */
-	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &daddr6);
-	if (ifa && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST))
-		tlladdr = 0;
-	else
-#endif
 	if (!IN6_IS_ADDR_MULTICAST(&daddr6))
 		tlladdr = 0;
 	else
@@ -234,16 +374,18 @@ nd6_ns_input(
 	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
 
 	/* (2) check. */
-	if (!ifa) {
+	if (ifa == NULL) {
 		struct rtentry *rt;
 		struct sockaddr_in6 tsin6;
 
-		bzero(&tsin6, sizeof tsin6);		
+		bzero(&tsin6, sizeof tsin6);
 		tsin6.sin6_len = sizeof(struct sockaddr_in6);
 		tsin6.sin6_family = AF_INET6;
 		tsin6.sin6_addr = taddr6;
 
-		rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0);
+		rt = rtalloc1_scoped((struct sockaddr *)&tsin6, 0, 0,
+		    ifp->if_index);
+
 		if (rt != NULL) {
 			RT_LOCK(rt);
 			if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
@@ -262,7 +404,7 @@ nd6_ns_input(
 			rtfree(rt);
 		}
 	}
-	if (!ifa) {
+	if (ifa == NULL) {
 		/*
 		 * We've got an NS packet, and we don't have that adddress
 		 * assigned for us.  We MUST silently ignore it.
@@ -270,11 +412,15 @@ nd6_ns_input(
 		 */
 		goto freeit;
 	}
+	IFA_LOCK(ifa);
 	myaddr6 = *IFA_IN6(ifa);
 	anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
 	tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
-	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
+	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) {
+		IFA_UNLOCK(ifa);
 		goto freeit;
+	}
+	IFA_UNLOCK(ifa);
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
@@ -306,7 +452,7 @@ nd6_ns_input(
 	if (tentative) {
 		/*
 		 * If source address is unspecified address, it is for
-		 * duplicated address detection.
+		 * duplicate address detection.
 		 *
 		 * If not, the packet is for addess resolution;
 		 * silently ignore it.
@@ -327,7 +473,8 @@ nd6_ns_input(
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
 		saddr6 = in6addr_linklocal_allnodes;
-		saddr6.s6_addr16[1] = htons(ifp->if_index);
+		if (in6_setscope(&saddr6, ifp, NULL) != 0)
+			goto bad;
 		nd6_na_output(ifp, &saddr6, &taddr6,
 			      ((anycast || proxy || !tlladdr)
 				      ? 0 : ND_NA_FLAG_OVERRIDE)
@@ -346,7 +493,7 @@ nd6_ns_input(
  freeit:
 	m_freem(m);
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	return;
 
  bad:
@@ -356,17 +503,17 @@ nd6_ns_input(
 	icmp6stat.icp6s_badns++;
 	m_freem(m);
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 }
 
 /*
- * Output an Neighbor Solicitation Message. Caller specifies:
+ * Output a Neighbor Solicitation Message. Caller specifies:
  *	- ICMP6 header source IP6 address
  *	- ND6 header target IP6 address
  *	- ND6 header source datalink address
  *
  * Based on RFC 2461
- * Based on RFC 2462 (duplicated address detection)
+ * Based on RFC 2462 (duplicate address detection)
  *
  * Caller must bump up ln->ln_rt refcnt to make sure 'ln' doesn't go
  * away if there is a llinfo_nd6 passed in.
@@ -377,22 +524,29 @@ nd6_ns_output(
 	const struct in6_addr *daddr6,
 	const struct in6_addr *taddr6,
 	struct llinfo_nd6 *ln,	/* for source address determination */
-	int dad,	/* duplicated address detection */
-	int locked)
+	int dad)	/* duplicated address detection */
 {
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_solicit *nd_ns;
 	struct in6_ifaddr *ia = NULL;
-	struct ip6_moptions im6o;
+	struct in6_addr *src, src_in, src_storage;
+	struct ip6_moptions *im6o = NULL;
+        struct ifnet *outif = NULL;
 	int icmp6len;
 	int maxlen;
+	int flags;
 	caddr_t mac;
-	struct ifnet *outif = NULL;
-	
+	struct route_in6 ro;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+
+	bzero(&ro, sizeof(ro));
+
 	if (IN6_IS_ADDR_MULTICAST(taddr6))
 		return;
 
+	ip6oa.ip6oa_boundif = ifp->if_index;
+
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_ns);
 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
@@ -418,9 +572,16 @@ nd6_ns_output(
 
 	if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
 		m->m_flags |= M_MCAST;
-		im6o.im6o_multicast_ifp = ifp;
-		im6o.im6o_multicast_hlim = 255;
-		im6o.im6o_multicast_loop = 0;
+
+		im6o = ip6_allocmoptions(M_DONTWAIT);
+		if (im6o == NULL) {
+			m_freem(m);
+			return;
+		}
+
+		im6o->im6o_multicast_ifp = ifp;
+		im6o->im6o_multicast_hlim = 255;
+		im6o->im6o_multicast_loop = 0;
 	}
 
 	icmp6len = sizeof(*nd_ns);
@@ -439,27 +600,15 @@ nd6_ns_output(
 		ip6->ip6_dst = *daddr6;
 	else {
 		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
-		ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
+		ip6->ip6_dst.s6_addr16[1] = 0;
 		ip6->ip6_dst.s6_addr32[1] = 0;
 		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
 		ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
 		ip6->ip6_dst.s6_addr8[12] = 0xff;
+		if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
+			goto bad;
 	}
 	if (!dad) {
-#if 0	/* KAME way, exact address scope match */
-		/*
-		 * Select a source whose scope is the same as that of the dest.
-		 * Typically, the dest is link-local solicitation multicast
-		 * (i.e. neighbor discovery) or link-local/global unicast
-		 * (i.e. neighbor un-reachability detection).
-		 */
-		ia = in6_ifawithifp(ifp, &ip6->ip6_dst);
-		if (ia == NULL) {
-			m_freem(m);
-			return;
-		}
-		ip6->ip6_src = ia->ia_addr.sin6_addr;
-#else	/* spec-wise correct */
 		/*
 		 * RFC2461 7.2.2:
 		 * "If the source address of the packet prompting the
@@ -473,72 +622,82 @@ nd6_ns_output(
 		 * (saddr6), if:
 		 * - saddr6 is given from the caller (by giving "ln"), and
 		 * - saddr6 belongs to the outgoing interface.
-		 * Otherwise, we perform a scope-wise match.
+		 * Otherwise, we perform the source address selection as usual.
 		 */
-		struct ip6_hdr *hip6 = NULL;	/* hold ip6 */
-		struct in6_addr saddr6;
+		struct ip6_hdr *hip6;		/* hold ip6 */
+		struct in6_addr *hsrc = NULL;
 
 		/* Caller holds ref on this route */
 		if (ln != NULL) {
 			RT_LOCK(ln->ln_rt);
+			/*
+			 * assuming every packet in ln_hold has the same IP
+			 * header
+			 */
 			if (ln->ln_hold != NULL) {
 				hip6 = mtod(ln->ln_hold, struct ip6_hdr *);
 				/* XXX pullup? */
 				if (sizeof (*hip6) < ln->ln_hold->m_len)
-					saddr6 = hip6->ip6_src;
+					hsrc = &hip6->ip6_src;
 				else
-					hip6 = NULL;
+					hsrc = NULL;
+			}
+			/* Update probe count, if applicable */
+			if (ln->ln_llreach != NULL) {
+				IFLR_LOCK_SPIN(ln->ln_llreach);
+				ln->ln_llreach->lr_probes++;
+				IFLR_UNLOCK(ln->ln_llreach);
 			}
-			/*
-			 * hip6 is used only to indicate whether or
-			 * not there is a valid source address from
-			 * the held packet in ln_hold.  For obvious
-			 * reasons we should not dereference it after
-			 * releasing the lock though we can simply
-			 * test if it's non-NULL.
-			 */
 			RT_UNLOCK(ln->ln_rt);
-		}
 
-		if (ia != NULL)
-			ifafree(&ia->ia_ifa);
-		if (hip6 != NULL && (ia = in6ifa_ifpwithaddr(ifp, &saddr6))) {
-			bcopy(&saddr6, &ip6->ip6_src, sizeof (saddr6));
-		} else {
-			ia = in6_ifawithifp(ifp, &ip6->ip6_dst);
-			if (ia == NULL) {
-				if (ln != NULL) {
-					RT_LOCK(ln->ln_rt);
-					if (ln->ln_hold != NULL)
-						m_freem(ln->ln_hold);
-					ln->ln_hold = NULL;
-					RT_UNLOCK(ln->ln_rt);
-				}
-				m_freem(m);
-				return;
-			}
-			ip6->ip6_src = ia->ia_addr.sin6_addr;
 		}
 		if (ia != NULL) {
-			ifafree(&ia->ia_ifa);
+			IFA_REMREF(&ia->ia_ifa);
 			ia = NULL;
 		}
-#endif
+		if (hsrc != NULL && (ia = in6ifa_ifpwithaddr(ifp, hsrc))) {
+			src = hsrc;
+			IFA_REMREF(&ia->ia_ifa);
+			ia = NULL;
+		} else {
+			int error;
+			struct sockaddr_in6 dst_sa;
+
+			bzero(&dst_sa, sizeof(dst_sa));
+			dst_sa.sin6_family = AF_INET6;
+			dst_sa.sin6_len = sizeof(dst_sa);
+			dst_sa.sin6_addr = ip6->ip6_dst;
+
+			src = in6_selectsrc(&dst_sa, NULL,
+			    NULL, &ro, NULL, &src_storage, ip6oa.ip6oa_boundif,
+			    &error);
+			if (src == NULL) {
+				nd6log((LOG_DEBUG,
+				    "nd6_ns_output: source can't be "
+				    "determined: dst=%s, error=%d\n",
+				    ip6_sprintf(&dst_sa.sin6_addr),
+				    error));
+				goto bad;
+			}
+		}
 	} else {
 		/*
 		 * Source address for DAD packet must always be IPv6
 		 * unspecified address. (0::0)
+		 * We actually don't have to 0-clear the address (we did it
+		 * above), but we do so here explicitly to make the intention
+		 * clearer.
 		 */
-		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
+		bzero(&src_in, sizeof(src_in));
+		src = &src_in;
 	}
+	ip6->ip6_src = *src;
 	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
 	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
 	nd_ns->nd_ns_code = 0;
 	nd_ns->nd_ns_reserved = 0;
 	nd_ns->nd_ns_target = *taddr6;
-
-	if (IN6_IS_SCOPE_LINKLOCAL(&nd_ns->nd_ns_target))
-		nd_ns->nd_ns_target.s6_addr16[1] = 0;
+	in6_clearscope(&nd_ns->nd_ns_target); /* XXX */
 
 	/*
 	 * Add source link-layer address option.
@@ -577,19 +736,43 @@ nd6_ns_output(
 	if (ipsec_bypass == 0)
 		(void)ipsec_setsocket(m, NULL);
 #endif
-	ip6_output(m, NULL, NULL, dad ? IPV6_DADOUTPUT : 0, &im6o, &outif, locked);
+	flags = dad ? IPV6_UNSPECSRC : 0;
+	flags |= IPV6_OUTARGS;
+
+	ip6_output(m, NULL, NULL, flags, im6o, &outif, &ip6oa);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_neighborsolicit);
+		ifnet_release(outif);
 	}
 	icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]++;
+
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+	if (ro.ro_rt) {		/* we don't cache this route. */
+		rtfree(ro.ro_rt);
+	}
+	if (ia != NULL)
+		IFA_REMREF(&ia->ia_ifa);
+	return;
+
+bad:
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+	if (ro.ro_rt) {
+		rtfree(ro.ro_rt);
+	}
+	m_freem(m);
+	if (ia != NULL)
+		IFA_REMREF(&ia->ia_ifa);
+	return;
 }
 
 /*
  * Neighbor advertisement input handling.
  *
  * Based on RFC 2461
- * Based on RFC 2462 (duplicated address detection)
+ * Based on RFC 2462 (duplicate address detection)
  *
  * the following items are not implemented yet:
  * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
@@ -604,9 +787,6 @@ nd6_na_input(
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_neighbor_advert *nd_na;
-#if 0
-	struct in6_addr saddr6 = ip6->ip6_src;
-#endif
 	struct in6_addr daddr6 = ip6->ip6_dst;
 	struct in6_addr taddr6;
 	int flags;
@@ -640,14 +820,15 @@ nd6_na_input(
 		return;
 	}
 #endif
-	taddr6 = nd_na->nd_na_target;
+
 	flags = nd_na->nd_na_flags_reserved;
 	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
 	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
 	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
 
-	if (IN6_IS_SCOPE_LINKLOCAL(&taddr6))
-		taddr6.s6_addr16[1] = htons(ifp->if_index);
+	taddr6 = nd_na->nd_na_target;
+	if (in6_setscope(&taddr6, ifp, NULL))
+		goto bad;	/* XXX: impossible */
 
 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
 		nd6log((LOG_ERR,
@@ -687,10 +868,14 @@ nd6_na_input(
 	 *
 	 * Otherwise, process as defined in RFC 2461.
 	 */
-	if (ifa
-	 && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
-		nd6_dad_na_input(ifa);
-		goto freeit;
+	if (ifa != NULL) {
+		IFA_LOCK(ifa);
+		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE) {
+			IFA_UNLOCK(ifa);
+			nd6_dad_na_input(ifa, lladdr, lladdrlen);
+			goto freeit;
+		}
+		IFA_UNLOCK(ifa);
 	}
 
 	/* Just for safety, maybe unnecessary. */
@@ -710,8 +895,8 @@ nd6_na_input(
 	}
 
 	/*
-	 * If no neighbor cache entry is found, NA SHOULD silently be discarded.
-	 * Callee returns a locked route upon success.
+	 * If no neighbor cache entry is found, NA SHOULD silently be
+	 * discarded.
 	 */
 	if ((rt = nd6_lookup(&taddr6, 0, ifp, 0)) == NULL)
 		goto freeit;
@@ -762,7 +947,9 @@ nd6_na_input(
 			 * affect the status of associated prefixes..
 			 */
 			RT_UNLOCK(rt);
-			pfxlist_onlink_check(0);
+			lck_mtx_lock(nd6_mutex);
+			pfxlist_onlink_check();
+			lck_mtx_unlock(nd6_mutex);
 			RT_LOCK(rt);
 		}
 	} else {
@@ -802,7 +989,7 @@ nd6_na_input(
 		 *	1	1	y	n	(2a) L *->REACHABLE
 		 *	1	1	y	y	(2a) L *->REACHABLE
 		 */
-		if (!is_override && (lladdr && llchange)) {	   /* (1) */
+		if (!is_override && (lladdr != NULL && llchange)) {  /* (1) */
 			/*
 			 * If state is REACHABLE, make it STALE.
 			 * no other updates should be done.
@@ -866,13 +1053,14 @@ nd6_na_input(
 			 * Lock to protect the default router list.
 			 * XXX: this might be unnecessary, since this function
 			 * is only called under the network software interrupt
-			 * context.  However, we keep it just for safety.  
+			 * context.  However, we keep it just for safety.
 			 */
 			RT_UNLOCK(rt);
 			lck_mtx_lock(nd6_mutex);
 			dr = defrouter_lookup(in6, rt_ifp);
 			if (dr) {
-				defrtrlist_del(dr, 1);
+				defrtrlist_del(dr);
+				NDDR_REMREF(dr);
 				lck_mtx_unlock(nd6_mutex);
 			}
 			else {
@@ -894,33 +1082,51 @@ nd6_na_input(
 	}
 	RT_LOCK_ASSERT_HELD(rt);
 	rt->rt_flags &= ~RTF_REJECT;
+
+	/* cache the gateway (sender HW) address */
+	nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, TRUE);
+
+	/* update the llinfo, send a queued packet if there is one */
 	ln->ln_asked = 0;
 	if (ln->ln_hold != NULL) {
-		struct mbuf *n = ln->ln_hold;
-		ln->ln_hold = NULL;
+		struct mbuf *m_hold, *m_hold_next;
+		struct sockaddr_in6 sin6;
+
+		rtkey_to_sa6(rt, &sin6);
 		/*
-		 * we assume ifp is not a loopback here, so just set the 2nd
-		 * argument as the 1st one.
+		 * reset the ln_hold in advance, to explicitly
+		 * prevent a ln_hold lookup in nd6_output()
+		 * (wouldn't happen, though...)
 		 */
-		RT_UNLOCK(rt);
-		nd6_output(ifp, ifp, n, (struct sockaddr_in6 *)rt_key(rt),
-		    rt, 0);
-		RT_LOCK_SPIN(rt);
+		for (m_hold = ln->ln_hold;
+		    m_hold; m_hold = m_hold_next) {
+			m_hold_next = m_hold->m_nextpkt;
+			m_hold->m_nextpkt = NULL;
+			/*
+			 * we assume ifp is not a loopback here, so just set
+			 * the 2nd argument as the 1st one.
+			 */
+			RT_UNLOCK(rt);
+			nd6_output(ifp, ifp, m_hold, &sin6, rt);
+			RT_LOCK_SPIN(rt);
+		}
+		ln->ln_hold = NULL;
+
 	}
 	RT_REMREF_LOCKED(rt);
 	RT_UNLOCK(rt);
 
- freeit:
+freeit:
 	m_freem(m);
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 	return;
 
- bad:
+bad:
 	icmp6stat.icp6s_badna++;
 	m_freem(m);
 	if (ifa != NULL)
-		ifafree(ifa);
+		IFA_REMREF(ifa);
 }
 
 /*
@@ -931,11 +1137,14 @@ nd6_na_input(
  * the following items are not implemented yet:
  * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
  * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
+ *
+ * tlladdr - 1 if include target link-layer address
+ * sdl0 - sockaddr_dl (= proxy NA) or NULL
  */
 void
 nd6_na_output(
 	struct ifnet *ifp,
-	const struct in6_addr *daddr6,
+	const struct in6_addr *daddr6_0,
 	const struct in6_addr *taddr6,
 	uint32_t flags,
 	int tlladdr,		/* 1 if include target link-layer address */
@@ -944,12 +1153,20 @@ nd6_na_output(
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_advert *nd_na;
-	struct in6_ifaddr *ia = NULL;
-	struct ip6_moptions im6o;
-	int icmp6len;
-	int maxlen;
+	struct ip6_moptions *im6o = NULL;
 	caddr_t mac = NULL;
-	struct ifnet *outif = NULL;
+	struct route_in6 ro;
+	struct in6_addr *src, src_storage, daddr6;
+	struct sockaddr_in6 dst_sa;
+	int icmp6len, maxlen, error;
+        struct ifnet *outif = NULL;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+
+	bzero(&ro, sizeof(ro));
+
+	daddr6 = *daddr6_0;	/* make a local copy for modification */
+
+	ip6oa.ip6oa_boundif = ifp->if_index;
 
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_na);
@@ -974,11 +1191,18 @@ nd6_na_output(
 		return;
 	m->m_pkthdr.rcvif = NULL;
 
-	if (IN6_IS_ADDR_MULTICAST(daddr6)) {
+	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
 		m->m_flags |= M_MCAST;
-		im6o.im6o_multicast_ifp = ifp;
-		im6o.im6o_multicast_hlim = 255;
-		im6o.im6o_multicast_loop = 0;
+
+		im6o = ip6_allocmoptions(M_DONTWAIT);
+		if (im6o == NULL) {
+			m_freem(m);
+			return;
+		}
+
+		im6o->im6o_multicast_ifp = ifp;
+		im6o->im6o_multicast_hlim = 255;
+		im6o->im6o_multicast_loop = 0;
 	}
 
 	icmp6len = sizeof(*nd_na);
@@ -992,35 +1216,44 @@ nd6_na_output(
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
-	if (IN6_IS_ADDR_UNSPECIFIED(daddr6)) {
+	if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
 		/* reply to DAD */
-		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
-		ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
-		ip6->ip6_dst.s6_addr32[1] = 0;
-		ip6->ip6_dst.s6_addr32[2] = 0;
-		ip6->ip6_dst.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
+		daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
+		daddr6.s6_addr16[1] = 0;
+		daddr6.s6_addr32[1] = 0;
+		daddr6.s6_addr32[2] = 0;
+		daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
+		if (in6_setscope(&daddr6, ifp, NULL))
+			goto bad;
+
 		flags &= ~ND_NA_FLAG_SOLICITED;
 	} else
-		ip6->ip6_dst = *daddr6;
+		ip6->ip6_dst = daddr6;
+
+	bzero(&dst_sa, sizeof(struct sockaddr_in6));
+	dst_sa.sin6_family = AF_INET6;
+	dst_sa.sin6_len = sizeof(struct sockaddr_in6);
+	dst_sa.sin6_addr = daddr6;
 
 	/*
 	 * Select a source whose scope is the same as that of the dest.
 	 */
-	ia = in6_ifawithifp(ifp, &ip6->ip6_dst);
-	if (ia == NULL) {
-		m_freem(m);
-		return;
+	bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
+	src = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &src_storage,
+	    ip6oa.ip6oa_boundif, &error);
+	if (src == NULL) {
+		nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
+		    "determined: dst=%s, error=%d\n",
+		    ip6_sprintf(&dst_sa.sin6_addr), error));
+		goto bad;
 	}
-	ip6->ip6_src = ia->ia_addr.sin6_addr;
-	ifafree(&ia->ia_ifa);
-	ia = NULL;
+	ip6->ip6_src = *src;
 
 	nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
 	nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
 	nd_na->nd_na_code = 0;
 	nd_na->nd_na_target = *taddr6;
-	if (IN6_IS_SCOPE_LINKLOCAL(&nd_na->nd_na_target))
-		nd_na->nd_na_target.s6_addr16[1] = 0;
+	in6_clearscope(&nd_na->nd_na_target); /* XXX */
 
 	/*
 	 * "tlladdr" indicates NS's condition for adding tlladdr or not.
@@ -1046,7 +1279,7 @@ nd6_na_output(
 	if (tlladdr && mac) {
 		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
-		
+
 		/* roundup to 8 bytes alignment! */
 		optlen = (optlen + 7) & ~7;
 
@@ -1071,23 +1304,63 @@ nd6_na_output(
 	if (ipsec_bypass == 0)
 		(void)ipsec_setsocket(m, NULL);
 #endif
-	ip6_output(m, NULL, NULL, 0, &im6o, &outif, 0);
+	ip6_output(m, NULL, NULL, IPV6_OUTARGS, im6o, &outif, &ip6oa);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_neighboradvert);
+		ifnet_release(outif);
 	}
 	icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]++;
+
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+	if (ro.ro_rt) {
+		rtfree(ro.ro_rt);
+	}
+	return;
+
+bad:
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+	if (ro.ro_rt) {
+		rtfree(ro.ro_rt);
+	}
+	m_freem(m);
+	return;
 }
 
 caddr_t
 nd6_ifptomac(
 	struct ifnet *ifp)
 {
-	return ((caddr_t)ifnet_lladdr(ifp));
+	switch (ifp->if_type) {
+	case IFT_ARCNET:
+	case IFT_ETHER:
+	case IFT_IEEE8023ADLAG:
+	case IFT_FDDI:
+	case IFT_IEEE1394:
+#ifdef IFT_L2VLAN
+	case IFT_L2VLAN:
+#endif
+#ifdef IFT_IEEE80211
+	case IFT_IEEE80211:
+#endif
+#ifdef IFT_CARP
+	case IFT_CARP:
+#endif
+	case IFT_BRIDGE:
+	case IFT_ISO88025:
+		return ((caddr_t)ifnet_lladdr(ifp));
+	default:
+		return NULL;
+	}
 }
 
 TAILQ_HEAD(dadq_head, dadq);
 struct dadq {
+	decl_lck_mtx_data(, dad_lock);
+	u_int32_t dad_refcount;	/* reference count */
+	int dad_attached;
 	TAILQ_ENTRY(dadq) dad_list;
 	struct ifaddr *dad_ifa;
 	int dad_count;		/* max NS to send */
@@ -1095,28 +1368,46 @@ struct dadq {
 	int dad_ns_ocount;	/* NS sent so far */
 	int dad_ns_icount;
 	int dad_na_icount;
+	int dad_na_ixcount;	/* Count of IFDISABLED eligible NA rx'd */
 };
 
 static struct dadq_head dadq;
-static int dad_init = 0;
+
+void
+nd6_nbr_init(void)
+{
+	TAILQ_INIT(&dadq);
+
+	dad_size = sizeof (struct dadq);
+	dad_zone = zinit(dad_size, DAD_ZONE_MAX * dad_size, 0, DAD_ZONE_NAME);
+	if (dad_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, DAD_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(dad_zone, Z_EXPAND, TRUE);
+	zone_change(dad_zone, Z_CALLERACCT, FALSE);
+}
 
 static struct dadq *
-nd6_dad_find(
-	struct ifaddr *ifa)
+nd6_dad_find(struct ifaddr *ifa)
 {
 	struct dadq *dp;
+
 	lck_mtx_lock(dad6_mutex);
 	for (dp = dadq.tqh_first; dp; dp = dp->dad_list.tqe_next) {
+		DAD_LOCK_SPIN(dp);
 		if (dp->dad_ifa == ifa) {
+			DAD_ADDREF_LOCKED(dp);
+			DAD_UNLOCK(dp);
 			lck_mtx_unlock(dad6_mutex);
-			return dp;
+			return (dp);
 		}
+		DAD_UNLOCK(dp);
 	}
 	lck_mtx_unlock(dad6_mutex);
-	return NULL;
+	return (NULL);
 }
 
-#ifdef  __APPLE__
 void
 nd6_dad_stoptimer(
 	struct ifaddr *ifa)
@@ -1124,28 +1415,9 @@ nd6_dad_stoptimer(
 
 	untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa);
 }
-#else
-static void
-nd6_dad_starttimer(
-	struct dadq *dp,
-	int ticks)
-{
-
-	callout_reset(&dp->dad_timer_ch, ticks,
-	    (void (*)(void *))nd6_dad_timer, (void *)dp->dad_ifa);
-}
-
-static void
-nd6_dad_stoptimer(
-	struct dadq *dp)
-{
-
-	callout_stop(&dp->dad_timer_ch);
-}
-#endif
 
 /*
- * Start Duplicated Address Detection (DAD) for specified interface address.
+ * Start Duplicate Address Detection (DAD) for specified interface address.
  */
 void
 nd6_dad_start(
@@ -1155,43 +1427,45 @@ nd6_dad_start(
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
 
-	if (!dad_init) {
-		TAILQ_INIT(&dadq);
-		dad_init++;
-	}
-
 	/*
 	 * If we don't need DAD, don't do it.
 	 * There are several cases:
 	 * - DAD is disabled (ip6_dad_count == 0)
 	 * - the interface address is anycast
 	 */
+	IFA_LOCK(&ia->ia_ifa);
 	if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
 		log(LOG_DEBUG,
 			"nd6_dad_start: called with non-tentative address "
 			"%s(%s)\n",
 			ip6_sprintf(&ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
 	}
 	if (ia->ia6_flags & IN6_IFF_ANYCAST) {
 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
 	}
 	if (!ip6_dad_count) {
 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
 	}
-	if (!ifa->ifa_ifp)
+	IFA_UNLOCK(&ia->ia_ifa);
+	if (ifa->ifa_ifp == NULL)
 		panic("nd6_dad_start: ifa->ifa_ifp == NULL");
-	if (!(ifa->ifa_ifp->if_flags & IFF_UP))
+	if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
 		return;
-	if (nd6_dad_find(ifa) != NULL) {
+	}
+	if ((dp = nd6_dad_find(ifa)) != NULL) {
+		DAD_REMREF(dp);
 		/* DAD already in progress */
 		return;
 	}
 
-	dp = _MALLOC(sizeof(*dp), M_IP6NDP, M_NOWAIT);
+	dp = zalloc(dad_zone);
 	if (dp == NULL) {
 		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
 			"%s(%s)\n",
@@ -1199,10 +1473,11 @@ nd6_dad_start(
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
-	bzero(dp, sizeof(*dp));
-	lck_mtx_lock(dad6_mutex);
-	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
-	lck_mtx_unlock(dad6_mutex);
+	bzero(dp, dad_size);
+	lck_mtx_init(&dp->dad_lock, ifa_mtx_grp, ifa_mtx_attr);
+
+	/* Callee adds one reference for us */
+	dp = nd6_dad_attach(dp, ifa);
 
 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
 	    ip6_sprintf(&ia->ia_addr.sin6_addr)));
@@ -1213,11 +1488,6 @@ nd6_dad_start(
 	 * first packet to be sent from the interface after interface
 	 * (re)initialization.
 	 */
-	dp->dad_ifa = ifa;
-	ifaref(ifa);	/*just for safety*/
-	dp->dad_count = ip6_dad_count;
-	dp->dad_ns_icount = dp->dad_na_icount = 0;
-	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
 	if (tick_delay == NULL) {
 		u_int32_t retrans;
 		nd6_dad_ns_output(dp, ifa);
@@ -1236,19 +1506,61 @@ nd6_dad_start(
 		timeout((void (*)(void *))nd6_dad_timer, (void *)ifa,
 			ntick);
 	}
+
+	DAD_REMREF(dp);		/* drop our reference */
+}
+
+static struct dadq *
+nd6_dad_attach(struct dadq *dp, struct ifaddr *ifa)
+{
+	lck_mtx_lock(dad6_mutex);
+	DAD_LOCK(dp);
+	dp->dad_ifa = ifa;
+	IFA_ADDREF(ifa);	/* for dad_ifa */
+	dp->dad_count = ip6_dad_count;
+	dp->dad_ns_icount = dp->dad_na_icount = 0;
+	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
+	dp->dad_na_ixcount = 0;
+	VERIFY(!dp->dad_attached);
+	dp->dad_attached = 1;
+	DAD_ADDREF_LOCKED(dp);	/* for caller */
+	DAD_ADDREF_LOCKED(dp);	/* for dadq_head list */
+	TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
+	DAD_UNLOCK(dp);
+	lck_mtx_unlock(dad6_mutex);
+
+	return (dp);
+}
+
+static void
+nd6_dad_detach(struct dadq *dp, struct ifaddr *ifa)
+{
+	int detached;
+
+	lck_mtx_lock(dad6_mutex);
+	DAD_LOCK(dp);
+	if ((detached = dp->dad_attached)) {
+		VERIFY(dp->dad_ifa == ifa);
+		TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
+		dp->dad_list.tqe_next = NULL;
+		dp->dad_list.tqe_prev = NULL;
+		dp->dad_attached = 0;
+	}
+	DAD_UNLOCK(dp);
+	lck_mtx_unlock(dad6_mutex);
+	if (detached) {
+		DAD_REMREF(dp);		/* drop dadq_head reference */
+	}
 }
 
 /*
  * terminate DAD unconditionally.  used for address removals.
  */
 void
-nd6_dad_stop(
-	struct ifaddr *ifa)
+nd6_dad_stop(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
-	if (!dad_init)
-		return;
 	dp = nd6_dad_find(ifa);
 	if (!dp) {
 		/* DAD wasn't started yet */
@@ -1257,21 +1569,42 @@ nd6_dad_stop(
 
 	untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa);
 
-	lck_mtx_lock(dad6_mutex);
-	TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
-	lck_mtx_unlock(dad6_mutex);
-	FREE(dp, M_IP6NDP);
-	dp = NULL;
-	ifafree(ifa);
+	nd6_dad_detach(dp, ifa);
+	DAD_REMREF(dp);		/* drop our reference */
 }
 
 
 static void
-nd6_dad_timer(
-	struct ifaddr *ifa)
+nd6_unsol_na_output(struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
-	struct dadq *dp;
+	struct ifnet *ifp = ifa->ifa_ifp;
+	struct in6_addr saddr6, taddr6;
+
+	if ((ifp->if_flags & IFF_UP) == 0 ||
+	    (ifp->if_flags & IFF_RUNNING) == 0)
+		return;
+
+	IFA_LOCK_SPIN(&ia->ia_ifa);
+	taddr6 = ia->ia_addr.sin6_addr;
+	IFA_UNLOCK(&ia->ia_ifa);
+	if (in6_setscope(&taddr6, ifp, NULL) != 0)
+		return;
+	saddr6 = in6addr_linklocal_allnodes;
+	if (in6_setscope(&saddr6, ifp, NULL) != 0)
+		return;
+
+	nd6log((LOG_INFO, "%s: sending unsolicited NA\n",
+	    if_name(ifa->ifa_ifp)));
+
+	nd6_na_output(ifp, &saddr6, &taddr6, ND_NA_FLAG_OVERRIDE, 1, NULL);
+}
+
+static void
+nd6_dad_timer(struct ifaddr *ifa)
+{
+	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
+	struct dadq *dp = NULL;
 
 	/* Sanity check */
 	if (ia == NULL) {
@@ -1283,11 +1616,13 @@ nd6_dad_timer(
 		log(LOG_ERR, "nd6_dad_timer: DAD structure not found\n");
 		goto done;
 	}
+	IFA_LOCK(&ia->ia_ifa);
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
 			"%s(%s)\n",
 			ip6_sprintf(&ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
+		IFA_UNLOCK(&ia->ia_ifa);
 		goto done;
 	}
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
@@ -1295,26 +1630,26 @@ nd6_dad_timer(
 			"%s(%s)\n",
 			ip6_sprintf(&ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
+		IFA_UNLOCK(&ia->ia_ifa);
 		goto done;
 	}
+	IFA_UNLOCK(&ia->ia_ifa);
 
 	/* timeouted with IFF_{RUNNING,UP} check */
+	DAD_LOCK(dp);
 	if (dp->dad_ns_tcount > dad_maxtry) {
+		DAD_UNLOCK(dp);
 		nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
 			if_name(ifa->ifa_ifp)));
 
-		lck_mtx_lock(dad6_mutex);
-		TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
-		lck_mtx_unlock(dad6_mutex);
-		FREE(dp, M_IP6NDP);
-		dp = NULL;
-		ifafree(ifa);
+		nd6_dad_detach(dp, ifa);
 		goto done;
 	}
 
 	/* Need more checks? */
 	if (dp->dad_ns_ocount < dp->dad_count) {
 		u_int32_t retrans;
+		DAD_UNLOCK(dp);
 		/*
 		 * We have more NS to go.  Send NS packet for DAD.
 		 */
@@ -1341,71 +1676,50 @@ nd6_dad_timer(
 		}
 
 		if (dp->dad_ns_icount) {
-#if 0 /* heuristics */
-			/*
-			 * if
-			 * - we have sent many(?) DAD NS, and
-			 * - the number of NS we sent equals to the
-			 *   number of NS we've got, and
-			 * - we've got no NA
-			 * we may have a faulty network card/driver which
-			 * loops back multicasts to myself.
-			 */
-			if (3 < dp->dad_count
-			 && dp->dad_ns_icount == dp->dad_count
-			 && dp->dad_na_icount == 0) {
-				log(LOG_INFO, "DAD questionable for %s(%s): "
-					"network card loops back multicast?\n",
-					ip6_sprintf(&ia->ia_addr.sin6_addr),
-					if_name(ifa->ifa_ifp));
-				/* XXX consider it a duplicate or not? */
-				/* duplicate++; */
-			} else {
-				/* We've seen NS, means DAD has failed. */
-				duplicate++;
-			}
-#else
 			/* We've seen NS, means DAD has failed. */
 			duplicate++;
-#endif
 		}
+		DAD_UNLOCK(dp);
 
 		if (duplicate) {
 			/* (*dp) will be freed in nd6_dad_duplicated() */
-			dp = NULL;
-			nd6_dad_duplicated(ifa);
+			nd6_dad_duplicated(ifa, TRUE);
 		} else {
 			/*
 			 * We are done with DAD.  No NA came, no NS came.
-			 * duplicated address found.
+			 * No duplicate address found.
 			 */
+			IFA_LOCK_SPIN(&ia->ia_ifa);
 			ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
+			IFA_UNLOCK(&ia->ia_ifa);
 
 			nd6log((LOG_DEBUG,
 			    "%s: DAD complete for %s - no duplicates found\n",
 			    if_name(ifa->ifa_ifp),
 			    ip6_sprintf(&ia->ia_addr.sin6_addr)));
-
-			lck_mtx_lock(dad6_mutex);
-			TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
-			lck_mtx_unlock(dad6_mutex);
+			/*
+			 * Send an Unsolicited Neighbor Advertisement so that
+			 * other machines on the network are aware of us
+			 * (important when we are waking from sleep).
+			 */
+			nd6_unsol_na_output(ifa);
 			in6_post_msg(ia->ia_ifp, KEV_INET6_NEW_USER_ADDR, ia);
-			FREE(dp, M_IP6NDP);
-			dp = NULL;
-			ifafree(ifa);
+			nd6_dad_detach(dp, ifa);
 		}
 	}
 
 done:
-	return;
+	if (dp != NULL)
+		DAD_REMREF(dp);		/* drop our reference */
 }
 
 void
-nd6_dad_duplicated(
-	struct ifaddr *ifa)
+nd6_dad_duplicated(struct ifaddr *ifa, boolean_t dontignhwdup)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
+	struct ifnet *ifp = ifa->ifa_ifp;
+	int hwdupposs;
 
 	dp = nd6_dad_find(ifa);
 	if (dp == NULL) {
@@ -1413,80 +1727,99 @@ nd6_dad_duplicated(
 		return;
 	}
 
+	hwdupposs = 0;
+	IFA_LOCK(&ia->ia_ifa);
+	DAD_LOCK(dp);
 	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
-	    "NS in/out=%d/%d, NA in=%d\n",
-	    if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr),
-	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
-
+	    "NS in/out=%d/%d, NA in=%d inx=%d\n",
+	    if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr),
+	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount,
+	    dp->dad_na_ixcount);
+	hwdupposs = dp->dad_na_ixcount;
+	DAD_UNLOCK(dp);
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
+	IFA_UNLOCK(&ia->ia_ifa);
 
 	/* We are done with DAD, with duplicated address found. (failure) */
 	untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa);
 
-
+	IFA_LOCK(&ia->ia_ifa);
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
-	    if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr));
+	    if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr));
 	log(LOG_ERR, "%s: manual intervention required\n",
-	    if_name(ifa->ifa_ifp));
-
-	lck_mtx_lock(dad6_mutex);
-	TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
-	lck_mtx_unlock(dad6_mutex);
-	FREE(dp, M_IP6NDP);
-	dp = NULL;
-	ifafree(ifa);
+	    if_name(ifp));
+	IFA_UNLOCK(&ia->ia_ifa);
+	
+	if (hwdupposs ||
+	    (dontignhwdup && IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))) {
+		log(LOG_ERR, "%s: possible hardware address duplication "
+		    "detected, disable IPv6\n", if_name(ifp));
+		
+		lck_rw_lock_shared(nd_if_rwlock);
+		nd_ifinfo[ifp->if_index].flags |=
+		    ND6_IFF_IFDISABLED;
+		lck_rw_done(nd_if_rwlock);
+	}
+	
+	/* Send an event to the configuration agent so that the
+	 * duplicate address will be notified to the user and will
+	 * be removed.
+	 */
+	in6_post_msg(ifp, KEV_INET6_NEW_USER_ADDR, ia);
+	nd6_dad_detach(dp, ifa);
+	DAD_REMREF(dp);		/* drop our reference */
 }
 
 static void
-nd6_dad_ns_output(
-	struct dadq *dp,
-	struct ifaddr *ifa)
+nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct ifnet *ifp = ifa->ifa_ifp;
+	struct in6_addr taddr6;
 
+	DAD_LOCK(dp);
 	dp->dad_ns_tcount++;
 	if ((ifp->if_flags & IFF_UP) == 0) {
-#if 0
-		printf("%s: interface down?\n", if_name(ifp));
-#endif
+		DAD_UNLOCK(dp);
 		return;
 	}
 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
-#if 0
-		printf("%s: interface not running?\n", if_name(ifp));
-#endif
+		DAD_UNLOCK(dp);
 		return;
 	}
 
 	dp->dad_ns_ocount++;
-	nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1, 0);
+	DAD_UNLOCK(dp);
+	IFA_LOCK_SPIN(&ia->ia_ifa);
+	taddr6 = ia->ia_addr.sin6_addr;
+	IFA_UNLOCK(&ia->ia_ifa);
+	nd6_ns_output(ifp, NULL, &taddr6, NULL, 1);
 }
 
 static void
-nd6_dad_ns_input(
-	struct ifaddr *ifa)
+nd6_dad_ns_input(struct ifaddr *ifa)
 {
-	struct in6_ifaddr *ia;
-	const struct in6_addr *taddr6;
 	struct dadq *dp;
 	int duplicate;
+	struct ifnet *ifp;
 
-	if (!ifa)
+	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_ns_input");
 
-	ia = (struct in6_ifaddr *)ifa;
-	taddr6 = &ia->ia_addr.sin6_addr;
+	ifp = ifa->ifa_ifp;
 	duplicate = 0;
 	dp = nd6_dad_find(ifa);
 
 	/* Quickhack - completely ignore DAD NS packets */
 	if (dad_ignore_ns) {
+		struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
+		IFA_LOCK(&ia->ia_ifa);
 		nd6log((LOG_INFO,
 		    "nd6_dad_ns_input: ignoring DAD NS packet for "
-		    "address %s(%s)\n", ip6_sprintf(taddr6),
+		    "address %s(%s)\n", ip6_sprintf(&ia->ia_addr.sin6_addr),
 		    if_name(ifa->ifa_ifp)));
+		IFA_UNLOCK(&ia->ia_ifa);
 		return;
 	}
 
@@ -1494,37 +1827,184 @@ nd6_dad_ns_input(
 	 * if I'm yet to start DAD, someone else started using this address
 	 * first.  I have a duplicate and you win.
 	 */
-	if (!dp || dp->dad_ns_ocount == 0)
+	if (dp != NULL)
+		DAD_LOCK(dp);
+	if (dp == NULL || dp->dad_ns_ocount == 0)
 		duplicate++;
 
 	/* XXX more checks for loopback situation - see nd6_dad_timer too */
 
 	if (duplicate) {
-		dp = NULL;	/* will be freed in nd6_dad_duplicated() */
-		nd6_dad_duplicated(ifa);
-	} else {
+		if (dp != NULL) {
+			DAD_UNLOCK(dp);
+			DAD_REMREF(dp);
+			dp = NULL;
+		}
+		nd6_dad_duplicated(ifa, TRUE);
+	} else if (dp != NULL) {
 		/*
 		 * not sure if I got a duplicate.
 		 * increment ns count and see what happens.
 		 */
-		if (dp)
-			dp->dad_ns_icount++;
+		dp->dad_ns_icount++;
+		DAD_UNLOCK(dp);
+		DAD_REMREF(dp);
 	}
 }
 
 static void
-nd6_dad_na_input(
-	struct ifaddr *ifa)
+nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen)
 {
+	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
+	int hwdupposs;
 
-	if (!ifa)
+	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_na_input");
 
 	dp = nd6_dad_find(ifa);
-	if (dp)
-		dp->dad_na_icount++;
-
+	if (dp == NULL) {
+		log(LOG_ERR, "nd6_dad_na_input: DAD structure not found\n");
+		return;
+	}
+	
+	/*
+	 * If the address is a link-local address formed from an interface
+	 * identifier based on the hardware address which is supposed to be
+	 * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
+	 * operation on the interface SHOULD be disabled according to RFC 4862,
+	 * section 5.4.5, but here we decide not to disable if the target
+	 * hardware address is not also ours, which is a transitory possibility
+	 * in the presence of network-resident sleep proxies on the local link.
+	 */
+	hwdupposs = 0;
+	IFA_LOCK(ifa);
+	if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
+		struct ifnet *ifp;
+		struct in6_addr in6;
+		
+		IFA_UNLOCK(ifa);
+		ifp = ifa->ifa_ifp;
+		
+		/*
+		 * To avoid over-reaction, we only apply this logic when we are
+		 * very sure that hardware addresses are supposed to be unique.
+		 */
+		switch (ifp->if_type) {
+		case IFT_BRIDGE:
+		case IFT_ETHER:
+		case IFT_FDDI:
+		case IFT_ATM:
+		case IFT_IEEE1394:
+#ifdef IFT_IEEE80211
+		case IFT_IEEE80211:
+#endif
+			/* Check if our hardware address matches the target */
+			if (lladdr != NULL && lladdrlen > 0) {
+				struct ifaddr *llifa;
+				struct sockaddr_dl *sdl;
+				
+				llifa = ifp->if_lladdr;
+				IFA_LOCK(llifa);
+				sdl = (struct sockaddr_dl *)llifa->ifa_addr;
+				if (lladdrlen == sdl->sdl_alen ||
+				    bcmp(lladdr, LLADDR(sdl), lladdrlen) == 0)
+					hwdupposs = 1;
+				IFA_UNLOCK(llifa);
+			}
+			in6 = ia->ia_addr.sin6_addr;
+			if (in6_get_hw_ifid(ifp, &in6) != 0)
+				break;
+			/*
+			 * Apply this logic only to the EUI-64 form of
+			 * link-local interface identifiers.
+			 */
+			IFA_LOCK(ifa);
+			if (hwdupposs &&
+			    !IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
+				hwdupposs = 0;
+			} else if (lladdr == NULL &&
+			    IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
+				/*
+				 * We received a NA with no target link-layer
+				 * address option. This means that someone else
+				 * has our address. Mark it as a hardware
+				 * duplicate so we disable IPv6 later on.
+				 */
+				hwdupposs = 1;
+			}
+			IFA_UNLOCK(ifa);
+			break;
+		default:
+			break;
+		}
+	} else {
+		IFA_UNLOCK(ifa);
+	}
+	
+	DAD_LOCK_SPIN(dp);
+	dp->dad_na_icount++;
+	if (hwdupposs)
+		dp->dad_na_ixcount++;
+	DAD_UNLOCK(dp);
+	DAD_REMREF(dp);
+	
 	/* remove the address. */
-	nd6_dad_duplicated(ifa);
+	nd6_dad_duplicated(ifa, FALSE);
+}
+
+static void
+dad_addref(struct dadq *dp, int locked)
+{
+	if (!locked)
+		DAD_LOCK_SPIN(dp);
+	else
+		DAD_LOCK_ASSERT_HELD(dp);
+
+	if (++dp->dad_refcount == 0) {
+		panic("%s: dad %p wraparound refcnt\n", __func__, dp);
+		/* NOTREACHED */
+	}
+	if (!locked)
+		DAD_UNLOCK(dp);
+}
+
+static void
+dad_remref(struct dadq *dp)
+{
+	struct ifaddr *ifa;
+
+	DAD_LOCK_SPIN(dp);
+	if (dp->dad_refcount == 0)
+		panic("%s: dad %p negative refcnt\n", __func__, dp);
+	--dp->dad_refcount;
+	if (dp->dad_refcount > 0) {
+		DAD_UNLOCK(dp);
+		return;
+	}
+	DAD_UNLOCK(dp);
+
+	if (dp->dad_attached ||
+	    dp->dad_list.tqe_next != NULL || dp->dad_list.tqe_prev != NULL) {
+		panic("%s: attached dad=%p is being freed", __func__, dp);
+		/* NOTREACHED */
+	}
+
+	if ((ifa = dp->dad_ifa) != NULL) {
+		IFA_REMREF(ifa);	/* drop dad_ifa reference */
+		dp->dad_ifa = NULL;
+	}
+
+	lck_mtx_destroy(&dp->dad_lock, ifa_mtx_grp);
+	zfree(dad_zone, dp);
+}
+
+void
+nd6_llreach_set_reachable(struct ifnet *ifp, void *addr, unsigned int alen)
+{
+	/* Nothing more to do if it's disabled */
+	if (nd6_llreach_base == 0)
+		return;
+
+	ifnet_llreach_set_reachable(ifp, ETHERTYPE_IPV6, addr, alen);
 }
diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c
index 10e965185..2e5c5eae5 100644
--- a/bsd/netinet6/nd6_rtr.c
+++ b/bsd/netinet6/nd6_rtr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -70,7 +70,11 @@
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
+#include <sys/mcache.h>
+
 #include <kern/lock.h>
+#include <kern/zalloc.h>
+#include <machine/machine_routines.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -91,41 +95,170 @@
 
 #define SDL(s)	((struct sockaddr_dl *)s)
 
+static struct nd_defrouter *defrtrlist_update_common(struct nd_defrouter *,
+    boolean_t);
 static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
-static struct in6_ifaddr *in6_ifadd(struct nd_prefix *,
-	struct in6_addr *);
+
+static struct in6_ifaddr *in6_ifadd(struct nd_prefix *, int);
+static void defrtrlist_sync(struct ifnet *);
+
+static void defrouter_select_common(struct ifnet *, int);
+
 static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
 	struct nd_defrouter *);
 static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
 static void pfxrtr_del(struct nd_pfxrouter *);
 static struct nd_pfxrouter *find_pfxlist_reachable_router(struct nd_prefix *);
-static void defrouter_addifreq(struct ifnet *);
 static void nd6_rtmsg(int, struct rtentry *);
 
-static void in6_init_address_ltimes(struct nd_prefix *ndpr,
-					 struct in6_addrlifetime *lt6);
+static int nd6_prefix_onlink_common(struct nd_prefix *, boolean_t,
+    unsigned int);
+static struct nd_prefix *nd6_prefix_equal_lookup(struct nd_prefix *, boolean_t);
+static void nd6_prefix_sync(struct ifnet *);
+
+static void in6_init_address_ltimes(struct nd_prefix *,
+    struct in6_addrlifetime *, boolean_t);
 
 static int rt6_deleteroute(struct radix_node *, void *);
 
+static struct nd_defrouter *nddr_alloc(int);
+static void nddr_free(struct nd_defrouter *);
+static void nddr_trace(struct nd_defrouter *, int);
+
+static struct nd_prefix *ndpr_alloc(int);
+static void ndpr_free(struct nd_prefix *);
+static void ndpr_trace(struct nd_prefix *, int);
+
 extern int nd6_recalc_reachtm_interval;
 
 static struct ifnet *nd6_defifp;
 int nd6_defifindex;
+static unsigned int nd6_defrouter_genid;
+
+int ip6_use_tempaddr = 1; /* use temp addr by default for testing now */
 
-int ip6_use_tempaddr = 0;
+int nd6_accept_6to4 = 1;
 
 int ip6_desync_factor;
 u_int32_t ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
 u_int32_t ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
 /*
  * shorter lifetimes for debugging purposes.
-int ip6_temp_preferred_lifetime = 800;
-static int ip6_temp_valid_lifetime = 1800;
+u_int32_t ip6_temp_preferred_lifetime = 800;
+static u_int32_t ip6_temp_valid_lifetime = 1800;
 */
 int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;
 
 extern lck_mtx_t *nd6_mutex;
 
+/* Serialization variables for single thread access to nd_prefix */
+static boolean_t nd_prefix_busy;
+static void *nd_prefix_waitchan = &nd_prefix_busy;
+static int nd_prefix_waiters = 0;
+
+/* Serialization variables for single thread access to nd_defrouter */
+static boolean_t nd_defrouter_busy;
+static void *nd_defrouter_waitchan = &nd_defrouter_busy;
+static int nd_defrouter_waiters = 0;
+
+/* RTPREF_MEDIUM has to be 0! */
+#define RTPREF_HIGH	1
+#define RTPREF_MEDIUM	0
+#define RTPREF_LOW	(-1)
+#define RTPREF_RESERVED	(-2)
+#define RTPREF_INVALID	(-3)	/* internal */
+
+#define	NDPR_TRACE_HIST_SIZE	32		/* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int ndpr_trace_hist_size = NDPR_TRACE_HIST_SIZE;
+
+struct nd_prefix_dbg {
+	struct nd_prefix	ndpr_pr;		/* nd_prefix */
+	u_int16_t		ndpr_refhold_cnt;	/* # of ref */
+	u_int16_t		ndpr_refrele_cnt;	/* # of rele */
+	/*
+	 * Circular lists of ndpr_addref and ndpr_remref callers.
+	 */
+	ctrace_t		ndpr_refhold[NDPR_TRACE_HIST_SIZE];
+	ctrace_t		ndpr_refrele[NDPR_TRACE_HIST_SIZE];
+};
+
+static unsigned int ndpr_debug;			/* debug flags */
+static unsigned int ndpr_size;			/* size of zone element */
+static struct zone *ndpr_zone;			/* zone for nd_prefix */
+
+#define	NDPR_ZONE_MAX	64			/* maximum elements in zone */
+#define	NDPR_ZONE_NAME	"nd6_prefix"		/* zone name */
+
+#define	NDDR_TRACE_HIST_SIZE	32              /* size of trace history */
+
+/* For gdb */
+__private_extern__ unsigned int nddr_trace_hist_size = NDDR_TRACE_HIST_SIZE;
+
+struct nd_defrouter_dbg {
+	struct nd_defrouter	nddr_dr;		/* nd_defrouter */
+	uint16_t		nddr_refhold_cnt;	/* # of ref */
+	uint16_t		nddr_refrele_cnt;	/* # of rele */
+	/*
+	 * Circular lists of ndpr_addref and ndpr_remref callers.
+	 */
+	ctrace_t		nddr_refhold[NDDR_TRACE_HIST_SIZE];
+	ctrace_t		nddr_refrele[NDDR_TRACE_HIST_SIZE];
+};
+
+static unsigned int nddr_debug;			/* debug flags */
+static unsigned int nddr_size;			/* size of zone element */
+static struct zone *nddr_zone;			/* zone for nd_defrouter */
+
+#define	NDDR_ZONE_MAX	64			/* maximum elements in zone */
+#define	NDDR_ZONE_NAME	"nd6_defrouter"		/* zone name */
+
+static unsigned int ndprtr_size;		/* size of zone element */
+static struct zone *ndprtr_zone;		/* zone for nd_pfxrouter */
+
+#define	NDPRTR_ZONE_MAX	64			/* maximum elements in zone */
+#define	NDPRTR_ZONE_NAME "nd6_pfxrouter"	/* zone name */
+
+void
+nd6_rtr_init(void)
+{
+	PE_parse_boot_argn("ifa_debug", &ndpr_debug, sizeof (ndpr_debug));
+	PE_parse_boot_argn("ifa_debug", &nddr_debug, sizeof (nddr_debug));
+
+	ndpr_size = (ndpr_debug == 0) ? sizeof (struct nd_prefix) :
+	    sizeof (struct nd_prefix_dbg);
+	ndpr_zone = zinit(ndpr_size, NDPR_ZONE_MAX * ndpr_size, 0,
+	    NDPR_ZONE_NAME);
+	if (ndpr_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, NDPR_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(ndpr_zone, Z_EXPAND, TRUE);
+	zone_change(ndpr_zone, Z_CALLERACCT, FALSE);
+
+	nddr_size = (nddr_debug == 0) ? sizeof (struct nd_defrouter) :
+	    sizeof (struct nd_defrouter_dbg);
+	nddr_zone = zinit(nddr_size, NDDR_ZONE_MAX * nddr_size, 0,
+	    NDDR_ZONE_NAME);
+	if (nddr_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, NDDR_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(nddr_zone, Z_EXPAND, TRUE);
+	zone_change(nddr_zone, Z_CALLERACCT, FALSE);
+
+	ndprtr_size = sizeof (struct nd_pfxrouter);
+	ndprtr_zone = zinit(ndprtr_size, NDPRTR_ZONE_MAX * ndprtr_size, 0,
+	    NDPRTR_ZONE_NAME);
+	if (ndprtr_zone == NULL) {
+		panic("%s: failed allocating %s", __func__, NDPRTR_ZONE_NAME);
+		/* NOTREACHED */
+	}
+	zone_change(ndprtr_zone, Z_EXPAND, TRUE);
+	zone_change(ndprtr_zone, Z_CALLERACCT, FALSE);
+}
+
 /*
  * Receive Router Solicitation Message - just for routers.
  * Router solicitation/advertisement is mostly managed by userland program
@@ -143,17 +276,8 @@ nd6_rs_input(
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_solicit *nd_rs;
 	struct in6_addr saddr6 = ip6->ip6_src;
-#if 0
-	struct in6_addr daddr6 = ip6->ip6_dst;
-#endif
 	char *lladdr = NULL;
 	int lladdrlen = 0;
-#if 0
-	struct sockaddr_dl *sdl = (struct sockaddr_dl *)NULL;
-	struct llinfo_nd6 *ln = (struct llinfo_nd6 *)NULL;
-	struct rtentry *rt = NULL;
-	int is_newentry;
-#endif
 	union nd_opts ndopts;
 
 	/* If I'm not a router, ignore it. */
@@ -170,11 +294,25 @@ nd6_rs_input(
 	}
 
 	/*
-	 * Don't update the neighbor cache, if src = ::.
-	 * This indicates that the src has no IP address assigned yet.
-	 */
-	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
+	 * Don't update the neighbor cache, if src = :: or a non-neighbor.
+	 * The former case indicates that the src has no IP address assigned
+	 * yet.  See nd6_ns_input() for the latter case.
+ 	 */
+ 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src))
 		goto freeit;
+	else {
+		struct sockaddr_in6 src_sa6;
+
+		bzero(&src_sa6, sizeof(src_sa6));
+		src_sa6.sin6_family = AF_INET6;
+		src_sa6.sin6_len = sizeof(src_sa6);
+		src_sa6.sin6_addr = ip6->ip6_src;
+		if (!nd6_is_addr_neighbor(&src_sa6, ifp, 0)) {
+			nd6log((LOG_INFO, "nd6_rs_input: "
+				"RS packet from non-neighbor\n"));
+			goto freeit;
+		}
+	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len, return);
@@ -238,14 +376,9 @@ nd6_ra_input(
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_router_advert *nd_ra;
 	struct in6_addr saddr6 = ip6->ip6_src;
-#if 0
-	struct in6_addr daddr6 = ip6->ip6_dst;
-	int flags; /* = nd_ra->nd_ra_flags_reserved; */
-	int is_managed = ((flags & ND_RA_FLAG_MANAGED) != 0);
-	int is_other = ((flags & ND_RA_FLAG_OTHER) != 0);
-#endif
+	int mcast = 0;
 	union nd_opts ndopts;
-	struct nd_defrouter *dr;
+	struct nd_defrouter *dr = NULL;
 	struct timeval timenow;
 
 	getmicrotime(&timenow);
@@ -292,20 +425,22 @@ nd6_ra_input(
 	struct nd_defrouter dr0;
 	u_int32_t advreachable = nd_ra->nd_ra_reachable;
 
+	/* remember if this is a multicasted advertisement */
+	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
+		mcast = 1;
+
 	lck_rw_lock_shared(nd_if_rwlock);
 	if (ifp->if_index >= nd_ifinfo_indexlim) {
 		lck_rw_done(nd_if_rwlock);
 		goto freeit;
 	}
 	ndi = &nd_ifinfo[ifp->if_index];
+	bzero(&dr0, sizeof (dr0));
 	dr0.rtaddr = saddr6;
 	dr0.flags  = nd_ra->nd_ra_flags_reserved;
 	dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
 	dr0.expire = timenow.tv_sec + dr0.rtlifetime;
 	dr0.ifp = ifp;
-	dr0.advint = 0;		/* Mobile IPv6 */
-	dr0.advint_expire = 0;	/* Mobile IPv6 */
-	dr0.advints_lost = 0;	/* Mobile IPv6 */
 	/* unspecified or not? (RFC 2461 6.3.4) */
 	if (advreachable) {
 		advreachable = ntohl(advreachable);
@@ -322,7 +457,9 @@ nd6_ra_input(
 		ndi->chlim = nd_ra->nd_ra_curhoplimit;
 	lck_rw_done(nd_if_rwlock);
 	ndi = NULL;
+	lck_mtx_lock(nd6_mutex);
 	dr = defrtrlist_update(&dr0);
+	lck_mtx_unlock(nd6_mutex);
     }
 
 	/*
@@ -366,18 +503,9 @@ nd6_ra_input(
 				continue;
 			}
 
-			/* aggregatable unicast address, rfc2374 */
-			if ((pi->nd_opt_pi_prefix.s6_addr8[0] & 0xe0) == 0x20
-			 && pi->nd_opt_pi_prefix_len != 64) {
-				nd6log((LOG_INFO,
-				    "nd6_ra_input: invalid prefixlen "
-				    "%d for rfc2374 prefix %s, ignored\n",
-				    pi->nd_opt_pi_prefix_len,
-				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
-				continue;
-			}
-
 			bzero(&pr, sizeof(pr));
+			lck_mtx_init(&pr.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
+			NDPR_LOCK(&pr);
 			pr.ndpr_prefix.sin6_family = AF_INET6;
 			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
 			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
@@ -392,10 +520,35 @@ nd6_ra_input(
 			pr.ndpr_pltime =
 				ntohl(pi->nd_opt_pi_preferred_time);
 
-			if (in6_init_prefix_ltimes(&pr))
-				continue; /* prefix lifetime init failed */
+			/*
+			 * Exceptions to stateless autoconfiguration processing:
+			 * + nd6_accept_6to4 == 0 && address has 6to4 prefix
+			 * + ip6_only_allow_rfc4193_prefix != 0 && address not RFC 4193
+			 */
+			if (ip6_only_allow_rfc4193_prefix &&
+			    !IN6_IS_ADDR_UNIQUE_LOCAL(&pi->nd_opt_pi_prefix)) {
+				nd6log((LOG_INFO,
+				    "nd6_ra_input: no SLAAC on prefix %s [not RFC 4193]\n",
+				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
+				pr.ndpr_raf_auto = 0;
+			}
+			else if (!nd6_accept_6to4 &&
+				     IN6_IS_ADDR_6TO4(&pi->nd_opt_pi_prefix)) {
+				nd6log((LOG_INFO,
+				    "nd6_ra_input: no SLAAC on prefix %s [6to4]\n",
+				    ip6_sprintf(&pi->nd_opt_pi_prefix)));
+				pr.ndpr_raf_auto = 0;
+			}
 
-			(void)prelist_update(&pr, dr, m);
+			if (in6_init_prefix_ltimes(&pr)) {
+				NDPR_UNLOCK(&pr);
+				lck_mtx_destroy(&pr.ndpr_lock, ifa_mtx_grp);
+				continue; /* prefix lifetime init failed */
+			} else {
+				NDPR_UNLOCK(&pr);
+			}
+			(void)prelist_update(&pr, dr, m, mcast);
+			lck_mtx_destroy(&pr.ndpr_lock, ifa_mtx_grp);
 		}
 	}
 
@@ -475,16 +628,20 @@ nd6_ra_input(
 	 * router's neighbor cache, which might also affect our on-link
 	 * detection of adveritsed prefixes.
 	 */
-	pfxlist_onlink_check(0);
+	lck_mtx_lock(nd6_mutex);
+	pfxlist_onlink_check();
+	lck_mtx_unlock(nd6_mutex);
     }
 
  freeit:
 	m_freem(m);
+	if (dr)
+		NDDR_REMREF(dr);
 	return;
 
  bad:
 	icmp6stat.icp6s_badra++;
-	m_freem(m);
+	goto freeit;
 }
 
 /*
@@ -503,13 +660,16 @@ nd6_rtmsg(cmd, rt)
 	RT_LOCK_ASSERT_HELD(rt);
 
 	bzero((caddr_t)&info, sizeof(info));
-	/* Lock ifp for if_addrlist */
+	/* Lock ifp for if_lladdr */
 	ifnet_lock_shared(ifp);
 	info.rti_info[RTAX_DST] = rt_key(rt);
 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-	info.rti_info[RTAX_IFP] =
-		TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr;
+	/*
+	 * ifa_addr pointers for both should always be valid
+	 * in this context; no need to hold locks.
+	 */
+	info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr;
 	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
 
 	rt_missmsg(cmd, &info, rt->rt_flags, 0);
@@ -517,11 +677,21 @@ nd6_rtmsg(cmd, rt)
 }
 
 void
-defrouter_addreq(
-	struct nd_defrouter *new)
+defrouter_addreq(struct nd_defrouter *new, boolean_t scoped)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *newrt = NULL;
+	unsigned int ifscope;
+	int err;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+	if (new->stateflags & NDDRF_INSTALLED)
+		return;
+
+	nd6log2((LOG_INFO, "%s: adding default router %s, scoped=%d, "
+	    "static=%d\n", if_name(new->ifp), ip6_sprintf(&new->rtaddr),
+	    scoped, (new->stateflags & NDDRF_STATIC) ? 1 : 0));
 
 	Bzero(&def, sizeof(def));
 	Bzero(&mask, sizeof(mask));
@@ -532,65 +702,28 @@ defrouter_addreq(
 	def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = new->rtaddr;
 
-	(void) rtrequest(RTM_ADD, (struct sockaddr *)&def,
+	ifscope = scoped ? new->ifp->if_index : IFSCOPE_NONE;
+
+	err = rtrequest_scoped(RTM_ADD, (struct sockaddr *)&def,
 	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
-	    RTF_GATEWAY, &newrt);
+	    RTF_GATEWAY, &newrt, ifscope);
+
 	if (newrt) {
 		RT_LOCK(newrt);
 		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
 		RT_REMREF_LOCKED(newrt);
 		RT_UNLOCK(newrt);
-	}
-	return;
-}
-
-/* Add a route to a given interface as default */
-void
-defrouter_addifreq(
-	struct ifnet *ifp)
-{
-	struct sockaddr_in6 def, mask;
-	struct ifaddr *ifa = NULL;
-	struct rtentry *newrt = NULL;
-	int error;
-	u_int32_t flags;
-
-	bzero(&def, sizeof(def));
-	bzero(&mask, sizeof(mask));
-
-	def.sin6_len = mask.sin6_len = sizeof(struct sockaddr_in6);
-	def.sin6_family = mask.sin6_family = AF_INET6;
-
-	/*
-	 * Search for an ifaddr beloging to the specified interface.
-	 * XXX: An IPv6 address are required to be assigned on the interface.
-	 */
-	if ((ifa = ifaof_ifpforaddr((struct sockaddr *)&def, ifp)) == NULL) {
-		nd6log((LOG_ERR,	/* better error? */
-		    "defrouter_addifreq: failed to find an ifaddr "
-		    "to install a route to interface %s\n",
-		    if_name(ifp)));
-		return;
-	}
-
-	flags = ifa->ifa_flags;
-	error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr,
-	    (struct sockaddr *)&mask, flags, &newrt);
-	if (error != 0) {
-		nd6log((LOG_ERR,
-		    "defrouter_addifreq: failed to install a route to "
-		    "interface %s (errno = %d)\n",
-		    if_name(ifp), error));
+		new->stateflags |= NDDRF_INSTALLED;
+		if (ifscope != IFSCOPE_NONE)
+			new->stateflags |= NDDRF_IFSCOPE;
+		new->genid = nd6_defrouter_genid;
 	} else {
-		if (newrt) {
-			RT_LOCK(newrt);
-			nd6_rtmsg(RTM_ADD, newrt);
-			RT_REMREF_LOCKED(newrt);
-			RT_UNLOCK(newrt);
-		}
-		in6_post_msg(ifp, KEV_INET6_DEFROUTER, (struct in6_ifaddr *)ifa);
+		nd6log((LOG_ERR, "%s: failed to add default router "
+		    "%s on %s scoped %d (errno = %d)\n", __func__,
+		    ip6_sprintf(&gate.sin6_addr), if_name(new->ifp),
+		    (ifscope != IFSCOPE_NONE), err));
 	}
-	ifafree(ifa);
+	new->err = err;
 }
 
 struct nd_defrouter *
@@ -600,25 +733,47 @@ defrouter_lookup(
 {
 	struct nd_defrouter *dr;
 
-
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
-		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
+		NDDR_LOCK(dr);
+		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+			NDDR_ADDREF_LOCKED(dr);
+			NDDR_UNLOCK(dr);
 			return(dr);
+		}
+		NDDR_UNLOCK(dr);
 	}
 
-	return(NULL);		/* search failed */
+	return (NULL);		/* search failed */
 }
 
+/*
+ * Remove the default route for a given router.
+ * This is just a subroutine function for defrouter_select(), and should
+ * not be called from anywhere else.
+ */
 void
-defrouter_delreq(
-	struct nd_defrouter *dr,
-	int dofree)
+defrouter_delreq(struct nd_defrouter *dr)
 {
 	struct sockaddr_in6 def, mask, gate;
 	struct rtentry *oldrt = NULL;
+	unsigned int ifscope;
+	int err;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
+
+	/* ifp would be NULL for the "drany" case */
+	if (dr->ifp != NULL && !(dr->stateflags & NDDRF_INSTALLED))
+		return;
+
+	NDDR_LOCK_ASSERT_HELD(dr);
+
+	nd6log2((LOG_INFO, "%s: removing default router %s, scoped=%d, "
+	    "static=%d\n", dr->ifp != NULL ? if_name(dr->ifp) : "ANY",
+	    ip6_sprintf(&dr->rtaddr), (dr->stateflags & NDDRF_IFSCOPE) ? 1 : 0,
+	    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
 
 	Bzero(&def, sizeof(def));
 	Bzero(&mask, sizeof(mask));
@@ -629,28 +784,155 @@ defrouter_delreq(
 	def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6;
 	gate.sin6_addr = dr->rtaddr;
 
-	(void) rtrequest(RTM_DELETE, (struct sockaddr *)&def,
-	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
-	    RTF_GATEWAY, &oldrt);
+	if (dr->ifp != NULL) {
+		ifscope = (dr->stateflags & NDDRF_IFSCOPE) ?
+		    dr->ifp->if_index : IFSCOPE_NONE;
+	} else {
+		ifscope = IFSCOPE_NONE;
+	}
+	err = rtrequest_scoped(RTM_DELETE,
+	    (struct sockaddr *)&def, (struct sockaddr *)&gate,
+	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, ifscope);
+
 	if (oldrt) {
 		RT_LOCK(oldrt);
 		nd6_rtmsg(RTM_DELETE, oldrt);
 		RT_UNLOCK(oldrt);
 		rtfree(oldrt);
+	} else if (err != ESRCH) {
+		nd6log((LOG_ERR, "%s: failed to delete default router "
+		    "%s on %s scoped %d (errno = %d)\n", __func__,
+		    ip6_sprintf(&gate.sin6_addr), dr->ifp != NULL ?
+		    if_name(dr->ifp) : "ANY", (ifscope != IFSCOPE_NONE), err));
+	}
+	/* ESRCH means it's no longer in the routing table; ignore it */
+	if (oldrt != NULL || err == ESRCH) {
+		dr->stateflags &= ~NDDRF_INSTALLED;
+		if (ifscope != IFSCOPE_NONE)
+			dr->stateflags &= ~NDDRF_IFSCOPE;
+	}
+	dr->err = 0;
+}
+
+
+/*
+ * remove all default routes from default router list
+ */
+void
+defrouter_reset(void)
+{
+	struct nd_defrouter *dr, drany;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	dr = TAILQ_FIRST(&nd_defrouter);
+	while (dr) {
+		NDDR_LOCK(dr);
+		if (dr->stateflags & NDDRF_INSTALLED) {
+			NDDR_ADDREF_LOCKED(dr);
+			NDDR_UNLOCK(dr);
+			lck_mtx_unlock(nd6_mutex);
+			NDDR_LOCK(dr);
+			defrouter_delreq(dr);
+			NDDR_UNLOCK(dr);
+			lck_mtx_lock(nd6_mutex);
+			NDDR_REMREF(dr);
+			dr = TAILQ_FIRST(&nd_defrouter);
+		} else {
+			NDDR_UNLOCK(dr);
+			dr = TAILQ_NEXT(dr, dr_entry);
+		}
+	}
+
+	/* Nuke primary (non-scoped) default router */
+	if (ip6_doscopedroute) {
+		bzero(&drany, sizeof (drany));
+		lck_mtx_init(&drany.nddr_lock, ifa_mtx_grp, ifa_mtx_attr);
+		lck_mtx_unlock(nd6_mutex);
+		NDDR_LOCK(&drany);
+		defrouter_delreq(&drany);
+		NDDR_UNLOCK(&drany);
+		lck_mtx_destroy(&drany.nddr_lock, ifa_mtx_grp);
+		lck_mtx_lock(nd6_mutex);
+	}
+
+}
+
+int
+defrtrlist_ioctl(u_long cmd, caddr_t data)
+{
+	struct in6_defrouter_32 *r_32 = (struct in6_defrouter_32 *)data;
+	struct in6_defrouter_64 *r_64 = (struct in6_defrouter_64 *)data;
+	struct nd_defrouter dr0;
+	unsigned int ifindex;
+	struct ifnet *dr_ifp;
+	int error = 0, add = 0;
+
+	switch (cmd) {
+	case SIOCDRADD_IN6_32:
+	case SIOCDRADD_IN6_64:
+		++add;
+		/* FALLTHRU */
+	case SIOCDRDEL_IN6_32:
+	case SIOCDRDEL_IN6_64:
+		bzero(&dr0, sizeof (dr0));
+		if (cmd == SIOCDRADD_IN6_64 || cmd == SIOCDRDEL_IN6_64) {
+			dr0.rtaddr = r_64->rtaddr.sin6_addr;
+			dr0.flags = r_64->flags;
+			ifindex = r_64->if_index;
+		} else {
+			dr0.rtaddr = r_32->rtaddr.sin6_addr;
+			dr0.flags = r_32->flags;
+			ifindex = r_32->if_index;
+		}
+		ifnet_head_lock_shared();
+		/* Don't need to check is ifindex is < 0 since it's unsigned */
+		if (if_index < ifindex ||
+		    (dr_ifp = ifindex2ifnet[ifindex]) == NULL) {
+			ifnet_head_done();
+			error = EINVAL;
+			break;
+		}
+		dr0.ifp = dr_ifp;
+		ifnet_head_done();
+
+		if (IN6_IS_SCOPE_EMBED(&dr0.rtaddr)) {
+			uint16_t *scope = &dr0.rtaddr.s6_addr16[1];
+
+			if (*scope == 0) {
+				*scope = htons(dr_ifp->if_index);
+			} else if (*scope != htons(dr_ifp->if_index)) {
+				error = EINVAL;
+				break;
+			}
+		}
+
+		if (add)
+			error = defrtrlist_add_static(&dr0);
+		if (!add || error != 0) {
+			int err = defrtrlist_del_static(&dr0);
+			if (!add)
+				error = err;
+		}
+		break;
+
+	default:
+		error = EOPNOTSUPP; /* check for safety */
+		break;
 	}
 
-	if (dofree)		/* XXX: necessary? */
-		FREE(dr, M_IP6NDP);
+	return (error);
 }
 
 void
-defrtrlist_del(
-	struct nd_defrouter *dr, int nd6locked)
+defrtrlist_del(struct nd_defrouter *dr)
 {
 	struct nd_defrouter *deldr = NULL;
 	struct nd_prefix *pr;
 	struct ifnet *ifp = dr->ifp;
 
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
 	/*
 	 * Flush all the routing table entries that use the router
 	 * as a next hop.
@@ -658,38 +940,60 @@ defrtrlist_del(
 	if (!ip6_forwarding &&
 	    (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
 		/* above is a good condition? */
+		NDDR_ADDREF(dr);
+		lck_mtx_unlock(nd6_mutex);
 		rt6_flush(&dr->rtaddr, ifp);
+		lck_mtx_lock(nd6_mutex);
+		NDDR_REMREF(dr);
 	}
 
-	if (nd6locked == 0)
-		lck_mtx_lock(nd6_mutex);
 	if (dr == TAILQ_FIRST(&nd_defrouter))
 		deldr = dr;	/* The router is primary. */
 
 	TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
+	++nd6_defrouter_genid;
+
+	nd6log2((LOG_INFO, "%s: freeing defrouter %s\n", if_name(dr->ifp),
+	    ip6_sprintf(&dr->rtaddr)));
+
+	/*
+	 * Delete it from the routing table.
+	 */
+	NDDR_ADDREF(dr);
+	lck_mtx_unlock(nd6_mutex);
+	NDDR_LOCK(dr);
+	defrouter_delreq(dr);
+	NDDR_UNLOCK(dr);
+	lck_mtx_lock(nd6_mutex);
+	NDDR_REMREF(dr);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
 	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
 		struct nd_pfxrouter *pfxrtr;
+
+		NDPR_LOCK(pr);
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
 			pfxrtr_del(pfxrtr);
+		NDPR_UNLOCK(pr);
 	}
-	pfxlist_onlink_check(1);
+
+	pfxlist_onlink_check();
 
 	/*
-	 * If the router is the primary one, choose a new one.
-	 * Note that defrouter_select() will remove the current gateway
-	 * from the routing table.
+	 * If the router is the primary one, choose a new one.  If Scoped
+	 * Routing is enabled, always try to pick another eligible router
+	 * on this interface.
 	 */
-	if (deldr)
-		defrouter_select();
+	if ((deldr || ip6_doscopedroute) && !ip6_forwarding &&
+	    (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD)))
+		defrouter_select(ifp);
 
 	lck_rw_lock_shared(nd_if_rwlock);
 	if (ifp->if_index < nd_ifinfo_indexlim) {
 		struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-		ndi->ndefrouters--;
+		atomic_add_32(&ndi->ndefrouters, -1);
 		if (ndi->ndefrouters < 0) {
 			log(LOG_WARNING, "defrtrlist_del: negative "
 			    "count on %s\n", if_name(ifp));
@@ -697,177 +1001,822 @@ defrtrlist_del(
 	}
 	lck_rw_done(nd_if_rwlock);
 
-	if (nd6locked == 0)
-		lck_mtx_unlock(nd6_mutex);
+	NDDR_REMREF(dr);	/* remove list reference */
+}
+
+int
+defrtrlist_add_static(struct nd_defrouter *new)
+{
+	struct nd_defrouter *dr;
+	int err = 0;
+
+	new->rtlifetime = -1;
+	new->stateflags |= NDDRF_STATIC;
+
+	/* we only want the preference level */
+	new->flags &= ND_RA_FLAG_RTPREF_MASK;
+
+	lck_mtx_lock(nd6_mutex);
+	dr = defrouter_lookup(&new->rtaddr, new->ifp);
+	if (dr != NULL && !(dr->stateflags & NDDRF_STATIC)) {
+		err = EINVAL;
+	} else {
+		if (dr != NULL)
+			NDDR_REMREF(dr);
+		dr = defrtrlist_update(new);
+		if (dr != NULL)
+			err = dr->err;
+		else
+			err = ENOMEM;
+	}
+	if (dr != NULL)
+		NDDR_REMREF(dr);
+	lck_mtx_unlock(nd6_mutex);
+
+	return (err);
+}
+
+int
+defrtrlist_del_static(struct nd_defrouter *new)
+{
+	struct nd_defrouter *dr;
+
+	lck_mtx_lock(nd6_mutex);
+	dr = defrouter_lookup(&new->rtaddr, new->ifp);
+	if (dr == NULL || !(dr->stateflags & NDDRF_STATIC)) {
+		if (dr != NULL)
+			NDDR_REMREF(dr);
+		dr = NULL;
+	} else {
+		defrtrlist_del(dr);
+		NDDR_REMREF(dr);
+	}
+	lck_mtx_unlock(nd6_mutex);
 
-	FREE(dr, M_IP6NDP);
+	return (dr != NULL ? 0 : EINVAL);
+}
+
+/*
+ * for default router selection
+ * regards router-preference field as a 2-bit signed integer
+ */
+static int
+rtpref(struct nd_defrouter *dr)
+{
+	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
+	case ND_RA_FLAG_RTPREF_HIGH:
+		return (RTPREF_HIGH);
+	case ND_RA_FLAG_RTPREF_MEDIUM:
+	case ND_RA_FLAG_RTPREF_RSV:
+		return (RTPREF_MEDIUM);
+	case ND_RA_FLAG_RTPREF_LOW:
+		return (RTPREF_LOW);
+	default:
+		/*
+		 * This case should never happen.  If it did, it would mean a
+		 * serious bug of kernel internal.  We thus always bark here.
+		 * Or, can we even panic?
+		 */
+		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
+		return (RTPREF_INVALID);
+	}
+	/* NOTREACHED */
 }
 
 /*
- * Default Router Selection according to Section 6.3.6 of RFC 2461:
- * 1) Routers that are reachable or probably reachable should be
- *    preferred.
+ * Default Router Selection according to Section 6.3.6 of RFC 2461 and
+ * draft-ietf-ipngwg-router-selection:
+ *
+ * 1) Routers that are reachable or probably reachable should be preferred.
+ *    If we have more than one (probably) reachable router, prefer ones
+ *    with the highest router preference.
  * 2) When no routers on the list are known to be reachable or
  *    probably reachable, routers SHOULD be selected in a round-robin
- *    fashion.
+ *    fashion, regardless of router preference values.
  * 3) If the Default Router List is empty, assume that all
  *    destinations are on-link.
+ *
+ * When Scoped Routing is enabled, the selection logic is amended as follows:
+ *
+ * a) When a default interface is specified, the primary/non-scoped default
+ *    router will be set to the reachable router on that link (if any) with
+ *    the highest router preference.
+ * b) When there are more than one routers on the same link, the one with
+ *    the highest router preference will be installed, either as scoped or
+ *    non-scoped route entry.  If they all share the same preference value,
+ *    the one installed will be the static or the first encountered reachable
+ *    router, i.e. static one wins over dynamic.
+ * c) When no routers on the list are known to be reachable, or probably
+ *    reachable, no round-robin selection will take place when the default
+ *    interface is set.
+ *
+ * We assume nd_defrouter is sorted by router preference value.
+ * Since the code below covers both with and without router preference cases,
+ * we do not need to classify the cases by ifdef.
  */
-void
-defrouter_select()
+static void
+defrouter_select_common(struct ifnet *ifp, int ignore)
 {
-	struct nd_defrouter *dr, anydr;
+	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
+	struct nd_defrouter *installed_dr0 = NULL;
 	struct rtentry *rt = NULL;
 	struct llinfo_nd6 *ln = NULL;
+	int  update = 0;
+	boolean_t found_installedrt = FALSE;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 
 	/*
-	 * Search for a (probably) reachable router from the list.
+	 * This function should be called only when acting as an autoconfigured
+	 * host.  Although the remaining part of this function is not effective
+	 * if the node is not an autoconfigured host, we explicitly exclude
+	 * such cases here for safety.
 	 */
-	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	if (ip6_forwarding || (!ignore && !ip6_accept_rtadv &&
+	    !(ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
+		nd6log((LOG_WARNING,
+		    "defrouter_select: called unexpectedly (forwarding=%d, "
+		    "accept_rtadv=%d)\n", ip6_forwarding, ip6_accept_rtadv));
+		return;
+	}
+
+	/*
+	 * Let's handle easy case (3) first:
+	 * If default router list is empty, there's nothing to be done.
+	 */
+	if (!TAILQ_FIRST(&nd_defrouter))
+		return;
+
+	/*
+	 * Due to the number of times we drop nd6_mutex, we need to
+	 * serialize this function.
+	 */
+	while (nd_defrouter_busy) {
+		nd_defrouter_waiters++;
+		msleep(nd_defrouter_waitchan, nd6_mutex, (PZERO-1),
+		    __func__, NULL);
+		lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	}
+	nd_defrouter_busy = TRUE;
 
+	/*
+	 * Search for a (probably) reachable router from the list.
+	 * We just pick up the first reachable one (if any), assuming that
+	 * the ordering rule of the list described in defrtrlist_update().
+	 *
+	 * For all intents and purposes of Scoped Routing:
+	 *	selected_dr	= candidate for primary router
+	 *	installed_dr	= currently installed primary router
+	 */
 	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
 	     dr = TAILQ_NEXT(dr, dr_entry)) {
+		boolean_t reachable;
+
 		/* Callee returns a locked route upon success */
+		reachable = FALSE;
+		NDDR_ADDREF(dr);	/* for this for loop */
+		lck_mtx_unlock(nd6_mutex);
 		if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp, 0)) != NULL) {
 			RT_LOCK_ASSERT_HELD(rt);
 			if ((ln = rt->rt_llinfo) != NULL &&
 			    ND6_IS_LLINFO_PROBREACH(ln)) {
-				RT_REMREF_LOCKED(rt);
-				RT_UNLOCK(rt);
-				/* Got it, and move it to the head */
-				TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
-				TAILQ_INSERT_HEAD(&nd_defrouter, dr, dr_entry);
-				break;
+				reachable = TRUE;
+				if (selected_dr == NULL &&
+				    (!ip6_doscopedroute ||
+				    dr->ifp == nd6_defifp)) {
+					selected_dr = dr;
+					NDDR_ADDREF(selected_dr);
+				}
 			}
 			RT_REMREF_LOCKED(rt);
 			RT_UNLOCK(rt);
+			rt = NULL;
 		}
-	}
+		lck_mtx_lock(nd6_mutex);
 
-	if ((dr = TAILQ_FIRST(&nd_defrouter))) {
-		/*
-		 * De-install the previous default gateway and install
-		 * a new one.
-		 * Note that if there is no reachable router in the list,
-		 * the head entry will be used anyway.
-		 * XXX: do we have to check the current routing table entry?
-		 */
-		bzero(&anydr, sizeof(anydr));
-		defrouter_delreq(&anydr, 0);
-		defrouter_addreq(dr);
-	}
-	else {
-		/*
-		 * The Default Router List is empty, so install the default
-		 * route to an inteface.
-		 * XXX: The specification does not say this mechanism should
-		 * be restricted to hosts, but this would be not useful
-		 * (even harmful) for routers.
-		 */
-		if (!ip6_forwarding) {
+		/* Handle case (b) */
+		if (ip6_doscopedroute && dr->ifp == nd6_defifp &&
+		    (selected_dr == NULL || rtpref(dr) > rtpref(selected_dr) ||
+		    (rtpref(dr) == rtpref(selected_dr) &&
+		    (dr->stateflags & NDDRF_STATIC) &&
+		    !(selected_dr->stateflags & NDDRF_STATIC)))) {
+			if (selected_dr)
+				NDDR_REMREF(selected_dr);
+			selected_dr = dr;
+			NDDR_ADDREF(selected_dr);
+		}
+
+		if (!(dr->stateflags & NDDRF_INSTALLED)) {
 			/*
-			 * De-install the current default route
-			 * in advance.
+			 * If the router hasn't been installed and it is
+			 * reachable, try to install it later on below.
+			 * If it's static, try to install it anyway.
 			 */
-			bzero(&anydr, sizeof(anydr));
-			defrouter_delreq(&anydr, 0);
-			if (nd6_defifp) {
-				/*
-				 * Install a route to the default interface
-				 * as default route.
-				 * XXX: we enable this for host only, because
-				 * this may override a default route installed
-				 * a user process (e.g. routing daemon) in a
-				 * router case.
-				 */
-				defrouter_addifreq(nd6_defifp);
+			if (reachable || (dr->stateflags & NDDRF_STATIC)) {
+				dr->genid = -1;
+				++update;
+				nd6log2((LOG_INFO, "%s: possible router %s, "
+				    "scoped=%d, static=%d\n", if_name(dr->ifp),
+				    ip6_sprintf(&dr->rtaddr),
+				    (dr->stateflags & NDDRF_IFSCOPE) ? 1 : 0,
+				    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
+			}
+			NDDR_REMREF(dr);	/* for this for loop */
+			continue;
+		}
+
+		/* Record the currently installed primary/non-scoped router */
+		if (!ip6_doscopedroute || !(dr->stateflags & NDDRF_IFSCOPE)) {
+			if (installed_dr == NULL) {
+				installed_dr = dr;
+				NDDR_ADDREF(installed_dr);
 			} else {
-				nd6log((LOG_INFO, "defrouter_select: "
-				    "there's no default router and no default"
-				    " interface\n"));
+				/* this should not happen; warn for diagnosis */
+				log(LOG_ERR, "defrouter_select: more than one "
+				    "%s default router is installed\n",
+				    ip6_doscopedroute ? "non-scoped" : "");
 			}
 		}
+		NDDR_REMREF(dr);	/* for this for loop */
 	}
 
-	return;
-}
+	/* If none was selected, use the currently installed one */
+	if (ip6_doscopedroute && selected_dr == NULL && installed_dr != NULL) {
+		selected_dr = installed_dr;
+		NDDR_ADDREF(selected_dr);
+	}
 
-static struct nd_defrouter *
-defrtrlist_update(
-	struct nd_defrouter *new)
-{
-	struct nd_defrouter *dr, *n;
-	struct ifnet *ifp = new->ifp;
-	struct nd_ifinfo *ndi;
+	/*
+	 * Install the unreachable one(s) if necesssary.
+	 */
+	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
+	     dr = TAILQ_NEXT(dr, dr_entry)) {
+		struct nd_defrouter *_dr;
 
-	lck_mtx_lock(nd6_mutex);
-	if ((dr = defrouter_lookup(&new->rtaddr, ifp)) != NULL) {
-		/* entry exists */
-		if (new->rtlifetime == 0) {
-			defrtrlist_del(dr, 1);
-			dr = NULL;
-		} else {
-			/* override */
-			dr->flags = new->flags; /* xxx flag check */
+		if (!ip6_doscopedroute)
+			break;
+
+		NDDR_LOCK(dr);
+
+		/* If already (or will be) installed, skip */
+		if ((dr->stateflags & NDDRF_INSTALLED) || dr->genid == -1) {
+			NDDR_UNLOCK(dr);
+			continue;
+		}
+
+		/* See if there is already a default router for the link */
+		for (_dr = TAILQ_FIRST(&nd_defrouter); _dr;
+		     _dr = TAILQ_NEXT(_dr, dr_entry)) {
+			if (_dr != dr)
+				NDDR_LOCK(_dr);
+			if (_dr == dr || _dr->ifp != dr->ifp) {
+				if (_dr != dr)
+					NDDR_UNLOCK(_dr);
+				continue;
+			}
+
+			if ((_dr->stateflags & NDDRF_INSTALLED) ||
+			    _dr->genid == -1) {
+				if (_dr != dr)
+					NDDR_UNLOCK(_dr);
+				break;
+			}
+			if (_dr != dr)
+				NDDR_UNLOCK(_dr);
+		}
+
+		/* If none so far, schedule it to be installed below */
+		if (_dr == NULL) {
+			dr->genid = -1;
+			++update;
+			nd6log2((LOG_INFO, "%s: possible router %s, "
+			    "static=%d (unreachable)\n", if_name(dr->ifp),
+			    ip6_sprintf(&dr->rtaddr),
+			    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
+		}
+		NDDR_UNLOCK(dr);
+	}
+
+	dr = selected_dr;
+	if (dr != NULL) {
+		nd6log2((LOG_INFO, "%s: considering primary default router %s, "
+		    "static=%d [round 1]\n", if_name(dr->ifp),
+		    ip6_sprintf(&dr->rtaddr),
+		    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
+	}
+
+	/*
+	 * If none of the default routers was found to be reachable,
+	 * round-robin the list regardless of preference, except when
+	 * Scoped Routing is enabled per case (c).
+	 *
+	 * Otherwise, if we have an installed router, check if the selected
+	 * (reachable) router should really be preferred to the installed one.
+	 * We only prefer the new router when the old one is not reachable
+	 * or when the new one has a really higher preference value.
+	 */
+	if (!ip6_doscopedroute && selected_dr == NULL) {
+		if (installed_dr == NULL ||
+		    !TAILQ_NEXT(installed_dr, dr_entry)) {
+			selected_dr = TAILQ_FIRST(&nd_defrouter);
+			if (selected_dr)
+				NDDR_ADDREF(selected_dr);
+		} else {
+			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
+			if (selected_dr)
+				NDDR_ADDREF(selected_dr);
+		}
+	} else if (selected_dr != NULL && installed_dr != NULL) {
+		lck_mtx_unlock(nd6_mutex);
+		rt = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp, 0);
+		if (rt) {
+			RT_LOCK_ASSERT_HELD(rt);
+			if ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) &&
+			    ND6_IS_LLINFO_PROBREACH(ln) &&
+			    (!ip6_doscopedroute ||
+				installed_dr->ifp == nd6_defifp) &&
+			    rtpref(selected_dr) <= rtpref(installed_dr)) {
+				NDDR_REMREF(selected_dr);
+				selected_dr = installed_dr;
+				NDDR_ADDREF(selected_dr);
+			}
+			RT_REMREF_LOCKED(rt);
+			RT_UNLOCK(rt);
+			rt = NULL;
+			found_installedrt = TRUE;
+		}
+		lck_mtx_lock(nd6_mutex);
+	}
+
+	if (ip6_doscopedroute) {
+		/*
+		 * If the installed primary router is not on the current
+		 * IPv6 default interface, demote it to a scoped entry.
+		 */
+		if (installed_dr != NULL && installed_dr->ifp != nd6_defifp &&
+		    !(installed_dr->stateflags & NDDRF_IFSCOPE)) {
+			if (selected_dr != NULL &&
+			    selected_dr->ifp != nd6_defifp) {
+				NDDR_REMREF(selected_dr);
+				selected_dr = NULL;
+			}
+			++update;
+		}
+
+		/*
+		 * If the selected router is currently scoped, make sure
+		 * we update (it needs to be promoted to primary.)
+		 */
+		if (selected_dr != NULL &&
+		    (selected_dr->stateflags & NDDRF_IFSCOPE))
+			++update;
+
+		/*
+		 * If the installed router is no longe reachable, remove
+		 * it and install the selected router instead.
+		 */
+		if (installed_dr != NULL && selected_dr != NULL &&
+		    installed_dr != selected_dr && found_installedrt == FALSE) {
+			installed_dr0 = installed_dr;	/* skip it below */
+			/* NB: we previousled referenced installed_dr */
+			installed_dr = NULL;
+			selected_dr->genid = -1;
+			++update;
+		}
+	}
+
+	/*
+	 * If Scoped Routing is enabled and there's nothing to update,
+	 * just return.  Otherwise, if Scoped Routing is disabled and if
+	 * the selected router is different than the installed one,
+	 * remove the installed router and install the selected one.
+	 */
+	dr = selected_dr;
+	VERIFY(dr != NULL || ip6_doscopedroute);
+	if (!ip6_doscopedroute || !update) {
+		if (dr == NULL)
+			goto out;
+
+		if (dr != installed_dr) {
+			nd6log2((LOG_INFO, "%s: no update, selected router %s, "
+			    "installed router %s\n", if_name(dr->ifp),
+			    ip6_sprintf(&dr->rtaddr), installed_dr != NULL ?
+			    ip6_sprintf(&installed_dr->rtaddr) : "NONE"));
+		} else {
+			nd6log2((LOG_INFO, "%s: no update, router is %s\n",
+			    if_name(dr->ifp), ip6_sprintf(&dr->rtaddr)));
+		}
+		if (!ip6_doscopedroute && installed_dr != dr) {
+			/* 
+			 * No need to ADDREF dr because at this point
+			 * dr points to selected_dr, which already holds
+			 * a reference.
+			 */
+			lck_mtx_unlock(nd6_mutex);
+			if (installed_dr) {
+				NDDR_LOCK(installed_dr);
+				defrouter_delreq(installed_dr);
+				NDDR_UNLOCK(installed_dr);
+			}
+			NDDR_LOCK(dr);
+			defrouter_addreq(dr, FALSE);
+			NDDR_UNLOCK(dr);
+			lck_mtx_lock(nd6_mutex);
+		}
+		goto out;
+	}
+
+	/*
+	 * Scoped Routing is enabled and we need to update.  The selected
+	 * router needs to be installed as primary/non-scoped entry.  If
+	 * there is any existing entry that is non-scoped, remove it from
+	 * the routing table and reinstall it as scoped entry.
+	 */
+	if (dr != NULL) {
+		nd6log2((LOG_INFO, "%s: considering primary default router %s, "
+		    "static=%d [round 2]\n", if_name(dr->ifp),
+		    ip6_sprintf(&dr->rtaddr),
+		    (dr->stateflags & NDDRF_STATIC) ? 1 : 0));
+	}
+
+	/*
+	 * On the following while loops we use two flags:
+	 *   dr->genid
+	 *   NDDRF_PROCESSED
+	 *
+	 * genid is used to skip entries that are not to be added/removed on the
+	 * second while loop.
+	 * NDDRF_PROCESSED is used to skip entries that were already processed.
+	 * This is necessary because we drop the nd6_mutex and start the while
+	 * loop again.
+	 */
+	TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
+		NDDR_LOCK(dr);
+		VERIFY((dr->stateflags & NDDRF_PROCESSED) == 0);
+		NDDR_UNLOCK(dr);
+	}
+	/* Remove conflicting entries */
+	dr = TAILQ_FIRST(&nd_defrouter);
+	while (dr) {
+		NDDR_LOCK(dr);
+		if (!(dr->stateflags & NDDRF_INSTALLED) ||
+		    dr->stateflags & NDDRF_PROCESSED) {
+			NDDR_UNLOCK(dr);
+			dr = TAILQ_NEXT(dr, dr_entry);
+			continue;
+		}
+		dr->stateflags |= NDDRF_PROCESSED;
+
+		/* A NULL selected_dr will remove primary default route */
+		if ((dr == selected_dr && (dr->stateflags & NDDRF_IFSCOPE)) ||
+		    (dr != selected_dr && !(dr->stateflags & NDDRF_IFSCOPE))) {
+			NDDR_ADDREF_LOCKED(dr);
+			NDDR_UNLOCK(dr);
+			lck_mtx_unlock(nd6_mutex);
+			NDDR_LOCK(dr);
+			defrouter_delreq(dr);
+			NDDR_UNLOCK(dr);
+			lck_mtx_lock(nd6_mutex);
+			NDDR_LOCK(dr);
+			if (dr && dr != installed_dr0)
+				dr->genid = -1;
+			NDDR_UNLOCK(dr);
+			NDDR_REMREF(dr);
+			/*
+			 * Since we lost nd6_mutex, we have to start over.
+			 */
+			dr = TAILQ_FIRST(&nd_defrouter);
+			continue;
+		}
+		NDDR_UNLOCK(dr);
+		dr = TAILQ_NEXT(dr, dr_entry);
+	}
+
+	/* -1 is a special number, make sure we don't use it for genid */
+	if (++nd6_defrouter_genid == -1)
+		nd6_defrouter_genid = 1;
+
+	TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
+		NDDR_LOCK(dr);
+		dr->stateflags &= ~NDDRF_PROCESSED;
+		NDDR_UNLOCK(dr);
+	}
+	/* Add the entries back */
+	dr = TAILQ_FIRST(&nd_defrouter);
+	while (dr) {
+		struct nd_defrouter *_dr;
+
+		NDDR_LOCK(dr);
+		if (dr->stateflags & NDDRF_PROCESSED ||
+		    dr->genid != -1) {
+			NDDR_UNLOCK(dr);
+			dr = TAILQ_NEXT(dr, dr_entry);
+			continue;
+		}
+		dr->stateflags |= NDDRF_PROCESSED;
+
+		/* Handle case (b) */
+		for (_dr = TAILQ_FIRST(&nd_defrouter); _dr;
+		     _dr = TAILQ_NEXT(_dr, dr_entry)) {
+			if (_dr == dr)
+				continue;
+			/*
+			 * This is safe because we previously checked if
+			 * _dr == dr.
+			 */
+			NDDR_LOCK(_dr);
+			if (_dr->ifp == dr->ifp && rtpref(_dr) >= rtpref(dr) &&
+			    (_dr->stateflags & NDDRF_INSTALLED)) {
+				NDDR_ADDREF_LOCKED(_dr);
+				NDDR_UNLOCK(_dr);
+				break;
+			}
+			NDDR_UNLOCK(_dr);
+		}
+
+		/* If same preference and i/f, static entry takes precedence */
+		if (_dr != NULL && rtpref(_dr) == rtpref(dr) &&
+		    !(_dr->stateflags & NDDRF_STATIC) &&
+		    (dr->stateflags & NDDRF_STATIC)) {
+			lck_mtx_unlock(nd6_mutex);
+			NDDR_LOCK(_dr);
+			defrouter_delreq(_dr);
+			NDDR_UNLOCK(_dr);
+			lck_mtx_lock(nd6_mutex);
+			NDDR_REMREF(_dr);
+			_dr = NULL;
+		}
+
+		if (_dr == NULL && !(dr->stateflags & NDDRF_INSTALLED)) {
+			NDDR_ADDREF_LOCKED(dr);
+			NDDR_UNLOCK(dr);
+			lck_mtx_unlock(nd6_mutex);
+			NDDR_LOCK(dr);
+			defrouter_addreq(dr, (selected_dr == NULL ||
+			    dr->ifp != selected_dr->ifp));
+			dr->genid = nd6_defrouter_genid;
+			NDDR_UNLOCK(dr);
+			lck_mtx_lock(nd6_mutex);
+			NDDR_REMREF(dr);
+			/*
+			 * Since we lost nd6_mutex, we have to start over.
+			 */
+			dr = TAILQ_FIRST(&nd_defrouter);
+			continue;
+		}
+		NDDR_UNLOCK(dr);
+		dr = TAILQ_NEXT(dr, dr_entry);
+	}
+out:
+	TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
+		NDDR_LOCK(dr);
+		dr->stateflags &= ~NDDRF_PROCESSED;
+		NDDR_UNLOCK(dr);
+	}
+	if (selected_dr)
+		NDDR_REMREF(selected_dr);
+	if (installed_dr)
+		NDDR_REMREF(installed_dr);
+	if (installed_dr0)
+		NDDR_REMREF(installed_dr0);
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	VERIFY(nd_defrouter_busy);
+	nd_defrouter_busy = FALSE;
+	if (nd_defrouter_waiters > 0) {
+		nd_defrouter_waiters = 0;
+		wakeup(nd_defrouter_waitchan);
+	}
+}
+
+void
+defrouter_select(struct ifnet *ifp)
+{
+	return (defrouter_select_common(ifp, 0));
+}
+
+static struct nd_defrouter *
+defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped)
+{
+	struct nd_defrouter *dr, *n;
+	struct ifnet *ifp = new->ifp;
+	struct nd_ifinfo *ndi;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	if ((dr = defrouter_lookup(&new->rtaddr, ifp)) != NULL) {
+		/* entry exists */
+		if (new->rtlifetime == 0) {
+			defrtrlist_del(dr);
+			NDDR_REMREF(dr);
+			dr = NULL;
+		} else {
+			int oldpref = rtpref(dr);
+
+			/* override */
+			dr->flags = new->flags; /* xxx flag check */
 			dr->rtlifetime = new->rtlifetime;
 			dr->expire = new->expire;
+
+			/*
+			 * If the preference does not change, there's no need
+			 * to sort the entries.  If Scoped Routing is enabled,
+			 * put the primary/non-scoped router at the top of the
+			 * list of routers in the same preference band, unless
+			 * it's already at that position.
+			 */
+			if (ip6_doscopedroute) {
+				struct nd_defrouter *p = NULL;
+
+				/* same preference and scoped; just return */
+				if (rtpref(new) == oldpref && scoped)
+					return (dr);
+
+				n = TAILQ_FIRST(&nd_defrouter);
+				while (n != NULL) {
+					/* preference changed; sort it */
+					if (rtpref(new) != oldpref)
+						break;
+
+					/* not at the top of band; sort it */
+					if (n != dr && rtpref(n) == oldpref &&
+					    (!p || rtpref(p) > rtpref(n)))
+						break;
+
+					p = n;
+					n = TAILQ_NEXT(n, dr_entry);
+				}
+
+				/* nothing has changed, just return */
+				if (n == NULL && (scoped ||
+				    !(dr->stateflags & NDDRF_IFSCOPE)))
+					return (dr);
+			} else if (rtpref(new) == oldpref) {
+				return (dr);
+			}
+
+			/*
+			 * preferred router may be changed, so relocate
+			 * this router.
+			 * XXX: calling TAILQ_REMOVE directly is a bad manner.
+			 * However, since defrtrlist_del() has many side
+			 * effects, we intentionally do so here.
+			 * defrouter_select() below will handle routing
+			 * changes later.
+			 */
+			TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
+			new->stateflags = dr->stateflags;
+			new->stateflags &= ~NDDRF_PROCESSED;
+
+			lck_rw_lock_shared(nd_if_rwlock);
+			VERIFY(ifp->if_index < nd_ifinfo_indexlim);
+			ndi = &nd_ifinfo[ifp->if_index];
+			lck_rw_done(nd_if_rwlock);
+			n = dr;
+			goto insert;
 		}
-		lck_mtx_unlock(nd6_mutex);
-		return(dr);
+		return (dr);
 	}
 
+	VERIFY(dr == NULL);
+
 	/* entry does not exist */
 	if (new->rtlifetime == 0) {
-		lck_mtx_unlock(nd6_mutex);
 		return(NULL);
 	}
 
-	n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT);
+	n = nddr_alloc(M_WAITOK);
 	if (n == NULL) {
-		lck_mtx_unlock(nd6_mutex);
 		return(NULL);
 	}
 
 	lck_rw_lock_shared(nd_if_rwlock);
+	ndi = &nd_ifinfo[ifp->if_index];
 	if (ifp->if_index >= nd_ifinfo_indexlim)
 		goto freeit;
-	ndi = &nd_ifinfo[ifp->if_index];
 	if (ip6_maxifdefrouters >= 0 &&
 	    ndi->ndefrouters >= ip6_maxifdefrouters) {
 freeit:
 		lck_rw_done(nd_if_rwlock);
-		lck_mtx_unlock(nd6_mutex);
-		FREE(n, M_IP6NDP);
+		nddr_free(n);
 		return (NULL);
 	}
-	ndi->ndefrouters++;
+
+	NDDR_ADDREF(n);	/* for the nd_defrouter list */
+	NDDR_ADDREF(n);	/* for the caller */
+
+	++nd6_defrouter_genid;
+	atomic_add_32(&ndi->ndefrouters, 1);
 	lck_rw_done(nd_if_rwlock);
 
-	bzero(n, sizeof(*n));
-	*n = *new;
+	nd6log2((LOG_INFO, "%s: allocating defrouter %s\n", if_name(ifp),
+	    ip6_sprintf(&new->rtaddr)));
+
+	NDDR_LOCK(n);
+	memcpy(&n->rtaddr, &new->rtaddr, sizeof(n->rtaddr));
+	n->flags = new->flags;
+	n->stateflags = new->stateflags;
+	n->stateflags &= ~NDDRF_PROCESSED;
+	n->rtlifetime = new->rtlifetime;
+	n->expire = new->expire;
+	n->ifp = new->ifp;
+	n->genid = new->genid;
+	n->err = new->err;
+	NDDR_UNLOCK(n);
+insert:
 
 	/*
-	 * Insert the new router at the end of the Default Router List.
-	 * If there is no other router, install it anyway. Otherwise,
-	 * just continue to use the current default router.
+	 * Insert the new router in the Default Router List;
+	 * The Default Router List should be in the descending order
+	 * of router-preferece.  When Scoped Routing is disabled, routers
+	 * with the same preference are sorted in the arriving time order;
+	 * otherwise, the first entry in the list of routers having the same
+	 * preference is the primary default router, when the interface used
+	 * by the entry is the default interface.
 	 */
-	TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
-	if (TAILQ_FIRST(&nd_defrouter) == n)
-		defrouter_select();
 
-	lck_mtx_unlock(nd6_mutex);
-	return(n);
+	/* insert at the end of the group */
+	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
+	     dr = TAILQ_NEXT(dr, dr_entry)) {
+		if (rtpref(n) > rtpref(dr) ||
+		    (ip6_doscopedroute && !scoped && rtpref(n) == rtpref(dr)))
+			break;
+	}
+	if (dr)
+		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
+	else
+		TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
+
+	/* Ignore auto-configuration checks for static route entries */
+	defrouter_select_common(ifp, (n->stateflags & NDDRF_STATIC));
+
+	return (n);
+}
+
+static struct nd_defrouter *
+defrtrlist_update(struct nd_defrouter *new)
+{
+	struct nd_defrouter *dr;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	dr = defrtrlist_update_common(new,
+	    (nd6_defifp != NULL && new->ifp != nd6_defifp));
+
+	return (dr);
+}
+
+static void
+defrtrlist_sync(struct ifnet *ifp)
+{
+	struct nd_defrouter *dr, new;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	if (!ip6_doscopedroute) {
+		defrouter_select(ifp);
+		return;
+	}
+
+	for (dr = TAILQ_FIRST(&nd_defrouter); dr;
+	     dr = TAILQ_NEXT(dr, dr_entry)) {
+		NDDR_LOCK(dr);
+		if (dr->ifp == ifp && (dr->stateflags & NDDRF_INSTALLED))
+			break;
+		NDDR_UNLOCK(dr);
+	}
+
+	if (dr == NULL) {
+		/*
+		 * Set ignore flag; the chosen default interface might
+		 * not be configured to accept RAs.
+		 */
+		defrouter_select_common(ifp, 1);
+	} else {
+		memcpy(&new.rtaddr, &dr->rtaddr, sizeof(new.rtaddr));
+		new.flags = dr->flags;
+		new.stateflags = dr->stateflags;
+		new.stateflags &= ~NDDRF_PROCESSED;
+		new.rtlifetime = dr->rtlifetime;
+		new.expire = dr->expire;
+		new.ifp = dr->ifp;
+		new.genid = dr->genid;
+		new.err = dr->err;
+		NDDR_UNLOCK(dr);
+		dr = defrtrlist_update_common(&new, FALSE);
+		if (dr)
+			NDDR_REMREF(dr);
+	}
 }
 
 static struct nd_pfxrouter *
-pfxrtr_lookup(
-	struct nd_prefix *pr,
-	struct nd_defrouter *dr)
+pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *search;
-	
+
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
-	for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
+	NDPR_LOCK_ASSERT_HELD(pr);
+
+	for (search = pr->ndpr_advrtrs.lh_first; search;
+	    search = search->pfr_next) {
 		if (search->router == dr)
 			break;
 	}
@@ -876,23 +1825,24 @@ pfxrtr_lookup(
 }
 
 static void
-pfxrtr_add(
-	struct nd_prefix *pr,
-	struct nd_defrouter *dr)
+pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
 {
 	struct nd_pfxrouter *new;
 
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	NDPR_LOCK_ASSERT_NOTHELD(pr);
 
-	new = (struct nd_pfxrouter *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT);
+	new = zalloc(ndprtr_zone);
 	if (new == NULL)
 		return;
 	bzero(new, sizeof(*new));
 	new->router = dr;
 
+	NDPR_LOCK(pr);
 	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
-
-	pfxlist_onlink_check(1);
+	NDPR_UNLOCK(pr);
+	
+	pfxlist_onlink_check();
 }
 
 static void
@@ -901,65 +1851,32 @@ pfxrtr_del(
 {
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
 	LIST_REMOVE(pfr, pfr_entry);
-	FREE(pfr, M_IP6NDP);
+	zfree(ndprtr_zone, pfr);
 }
 
 struct nd_prefix *
-nd6_prefix_lookup(
-	struct nd_prefix *pr)
+nd6_prefix_lookup(struct nd_prefix *pr)
 {
 	struct nd_prefix *search;
 
 	lck_mtx_lock(nd6_mutex);
 	for (search = nd_prefix.lh_first; search; search = search->ndpr_next) {
+		NDPR_LOCK(search);
 		if (pr->ndpr_ifp == search->ndpr_ifp &&
 		    pr->ndpr_plen == search->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
-					 &search->ndpr_prefix.sin6_addr,
-					 pr->ndpr_plen)
-		    ) {
+		    &search->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
+			NDPR_ADDREF_LOCKED(search);
+			NDPR_UNLOCK(search);
 			break;
 		}
+		NDPR_UNLOCK(search);
 	}
-	if (search != NULL)
-		ndpr_hold(search, TRUE);
 	lck_mtx_unlock(nd6_mutex);
 
 	return(search);
 }
 
-void
-ndpr_hold(struct nd_prefix *pr, boolean_t locked)
-{
-	if (!locked)
-		lck_mtx_lock(nd6_mutex);
-
-	if (pr->ndpr_usecnt < 0)
-		panic("%s: bad usecnt %d for pr %p\n", __func__,
-		    pr->ndpr_usecnt, pr);
-
-	pr->ndpr_usecnt++;
-
-	if (!locked)
-		lck_mtx_unlock(nd6_mutex);
-}
-
-void
-ndpr_rele(struct nd_prefix *pr, boolean_t locked)
-{
-	if (!locked)
-		lck_mtx_lock(nd6_mutex);
-
-	if (pr->ndpr_usecnt <= 0)
-		panic("%s: bad usecnt %d for pr %p\n", __func__,
-		    pr->ndpr_usecnt, pr);
-
-	pr->ndpr_usecnt--;
-
-	if (!locked)
-		lck_mtx_unlock(nd6_mutex);
-}
-
 static void
 purge_detached(struct ifnet *ifp)
 {
@@ -969,52 +1886,75 @@ purge_detached(struct ifnet *ifp)
 
 	lck_mtx_lock(nd6_mutex);
 
-	for (pr = nd_prefix.lh_first; pr; pr = pr_next) {
+	pr = nd_prefix.lh_first;
+repeat:
+	while (pr) {
 		pr_next = pr->ndpr_next;
+		NDPR_LOCK(pr);
 		if (pr->ndpr_ifp != ifp ||
 		    IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
 		    ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
-		    !LIST_EMPTY(&pr->ndpr_advrtrs)))
+		    !LIST_EMPTY(&pr->ndpr_advrtrs))) {
+			NDPR_UNLOCK(pr);
+			pr = pr_next;
 			continue;
-repeat:
+		}
+		NDPR_UNLOCK(pr);
 		ifnet_lock_shared(ifp);
 		for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa_next) {
 			ifa_next = ifa->ifa_list.tqe_next;
-			if (ifa->ifa_addr->sa_family != AF_INET6)
+			IFA_LOCK(ifa);
+			if (ifa->ifa_addr->sa_family != AF_INET6) {
+				IFA_UNLOCK(ifa);
 				continue;
+			}
 			ia = (struct in6_ifaddr *)ifa;
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) ==
 			    IN6_IFF_AUTOCONF && ia->ia6_ndpr == pr) {
-				ifaref(ifa);
+				IFA_ADDREF_LOCKED(ifa);	/* for us */
+				IFA_UNLOCK(ifa);
 				/*
 				 * Purging the address requires writer access
 				 * to the address list, so drop the ifnet lock
 				 * now and repeat from beginning.
 				 */
 				ifnet_lock_done(ifp);
-				in6_purgeaddr(ifa, 1);
-				ifafree(ifa);
+				lck_mtx_unlock(nd6_mutex);
+				in6_purgeaddr(ifa);
+				lck_mtx_lock(nd6_mutex);
+				IFA_REMREF(ifa); /* drop ours */
+				pr = nd_prefix.lh_first;
 				goto repeat;
 			}
+			IFA_UNLOCK(ifa);
 		}
 		ifnet_lock_done(ifp);
-		if (pr->ndpr_refcnt == 0)
-			prelist_remove(pr, 1);
+		NDPR_LOCK(pr);
+		if (pr->ndpr_addrcnt == 0) {
+			NDPR_ADDREF_LOCKED(pr);
+			prelist_remove(pr);
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);
+		} else {
+			NDPR_UNLOCK(pr);
+		}
+		pr = pr_next;
 	}
 
 	lck_mtx_unlock(nd6_mutex);
 }
 
 int
-nd6_prelist_add(
-	struct nd_prefix *pr,
-	struct nd_defrouter *dr,
-	struct nd_prefix **newp)
+nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr,
+    struct nd_prefix **newp, boolean_t force_scoped)
 {
 	struct nd_prefix *new = NULL;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_ifinfo *ndi = NULL;
-	int i;
+	int i, error;
+	struct timeval timenow;
+
+	getmicrotime(&timenow);
 
 	if (ip6_maxifprefixes >= 0) {
 		lck_rw_lock_shared(nd_if_rwlock);
@@ -1041,15 +1981,32 @@ nd6_prelist_add(
 		lck_rw_done(nd_if_rwlock);
 	}
 
-	new = (struct nd_prefix *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT);
+	new = ndpr_alloc(M_WAITOK);
 	if (new == NULL)
 		return ENOMEM;
-	bzero(new, sizeof(*new));
-	*new = *pr;
-	if (newp != NULL)
-		*newp = new;
 
-	/* initilization */
+	NDPR_LOCK(new);
+	NDPR_LOCK(pr);
+	new->ndpr_ifp = pr->ndpr_ifp;
+	new->ndpr_prefix = pr->ndpr_prefix;
+	new->ndpr_plen = pr->ndpr_plen;
+	new->ndpr_vltime = pr->ndpr_vltime;
+	new->ndpr_pltime = pr->ndpr_pltime;
+	new->ndpr_flags = pr->ndpr_flags;
+	if (pr->ndpr_stateflags & NDPRF_STATIC)
+		new->ndpr_stateflags |= NDPRF_STATIC;
+	NDPR_UNLOCK(pr);
+	if ((error = in6_init_prefix_ltimes(new)) != 0) {
+		NDPR_UNLOCK(new);
+		ndpr_free(new);
+		return(error);
+	}
+	new->ndpr_lastupdate = timenow.tv_sec;
+	if (newp != NULL) {
+		*newp = new;
+		NDPR_ADDREF_LOCKED(new);	/* for caller */
+	}
+	/* initialization */
 	LIST_INIT(&new->ndpr_advrtrs);
 	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
 	/* make prefix in the canonical form */
@@ -1057,22 +2014,25 @@ nd6_prelist_add(
 		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
 			new->ndpr_mask.s6_addr32[i];
 
-	/* link ndpr_entry to nd_prefix list */
+	NDPR_UNLOCK(new);
+
 	lck_mtx_lock(nd6_mutex);
+	/* link ndpr_entry to nd_prefix list */
 	LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry);
-
-	new->ndpr_usecnt = 0;
-	ndpr_hold(new, TRUE);
+	new->ndpr_debug |= IFD_ATTACHED;
+	NDPR_ADDREF(new);	/* for nd_prefix list */
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
 		int e;
 
-		if ((e = nd6_prefix_onlink(new, 0, 1)) != 0) {
+		if ((e = nd6_prefix_onlink_common(new, force_scoped,
+		    new->ndpr_ifp->if_index)) != 0) {
 			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
-			    "the prefix %s/%d on-link on %s (errno=%d)\n",
-			    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
-			    pr->ndpr_plen, if_name(ifp), e));
+			    "the prefix %s/%d on-link %s on %s (errno=%d)\n",
+			    ip6_sprintf(&new->ndpr_prefix.sin6_addr),
+			    new->ndpr_plen, force_scoped ? "scoped" :
+			    "non-scoped", if_name(ifp), e));
 			/* proceed anyway. XXX: is it correct? */
 		}
 	}
@@ -1088,7 +2048,7 @@ nd6_prelist_add(
 	 * isn't necessary since the array never shrinks.
 	 */
 	ndi = &nd_ifinfo[ifp->if_index];
-	ndi->nprefixes++;
+	atomic_add_32(&ndi->nprefixes, 1);
 	lck_rw_done(nd_if_rwlock);
 
 	lck_mtx_unlock(nd6_mutex);
@@ -1096,54 +2056,63 @@ nd6_prelist_add(
 	return 0;
 }
 
+/*
+ * Caller must have held an extra reference on nd_prefix.
+ */
 void
-prelist_remove(
-	struct nd_prefix *pr, int nd6locked)
+prelist_remove(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfr, *next;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	int e;
 
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	NDPR_LOCK_ASSERT_HELD(pr);
+
 	/* make sure to invalidate the prefix until it is really freed. */
 	pr->ndpr_vltime = 0;
 	pr->ndpr_pltime = 0;
-#if 0
+
 	/*
 	 * Though these flags are now meaningless, we'd rather keep the value
-	 * not to confuse users when executing "ndp -p".
+	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
+	 * when executing "ndp -p".
 	 */
-	pr->ndpr_raf_onlink = 0;
-	pr->ndpr_raf_auto = 0;
-#endif
-	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
-	    (e = nd6_prefix_offlink(pr)) != 0) {
-		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
-		    "on %s, errno=%d\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
-		    pr->ndpr_plen, if_name(ifp), e));
-		/* what should we do? */
-	}
 
-	if (nd6locked == 0)
+	if ((pr->ndpr_stateflags & NDPRF_ONLINK)) {
+		NDPR_ADDREF_LOCKED(pr);
+		NDPR_UNLOCK(pr);
+		lck_mtx_unlock(nd6_mutex);
+		if ((e = nd6_prefix_offlink(pr)) != 0) {
+			nd6log((LOG_ERR, "prelist_remove: failed to make "
+			    "%s/%d offlink on %s, errno=%d\n",
+			    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+			    pr->ndpr_plen, if_name(ifp), e));
+			/* what should we do? */
+		}
 		lck_mtx_lock(nd6_mutex);
+		NDPR_LOCK(pr);
+		if (NDPR_REMREF_LOCKED(pr) == NULL)
+			return;
+	}
 
-	if (pr->ndpr_usecnt > 0 || pr->ndpr_refcnt > 0)
-		goto done;	/* notice here? */
+	if (pr->ndpr_addrcnt > 0)
+		return;	/* notice here? */
 
 	/* unlink ndpr_entry from nd_prefix list */
 	LIST_REMOVE(pr, ndpr_entry);
+	pr->ndpr_debug &= ~IFD_ATTACHED;
 
 	/* free list of routers that adversed the prefix */
 	for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
 		next = pfr->pfr_next;
-
-		FREE(pfr, M_IP6NDP);
+		pfxrtr_del(pfr);
 	}
 
 	lck_rw_lock_shared(nd_if_rwlock);
 	if (ifp->if_index < nd_ifinfo_indexlim) {
 		struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
-		ndi->nprefixes--;
+		atomic_add_32(&ndi->nprefixes, -1);
 		if (ndi->nprefixes < 0) {
 			log(LOG_WARNING, "prelist_remove: negative "
 			    "count on %s\n", if_name(ifp));
@@ -1151,19 +2120,21 @@ prelist_remove(
 	}
 	lck_rw_done(nd_if_rwlock);
 
-	FREE(pr, M_IP6NDP);
+	/* This must not be the last reference to the nd_prefix */
+	if (NDPR_REMREF_LOCKED(pr) == NULL) {
+		panic("%s: unexpected (missing) refcnt ndpr=%p", __func__, pr);
+		/* NOTREACHED */
+	}
 
-	pfxlist_onlink_check(1);
-done:
-	if (nd6locked == 0)
-		lck_mtx_unlock(nd6_mutex);
+	pfxlist_onlink_check();
 }
 
 int
 prelist_update(
 	struct nd_prefix *new,
 	struct nd_defrouter *dr, /* may be NULL */
-	struct mbuf *m)
+	struct mbuf *m,
+	int mcast)
 {
 	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
 	struct ifaddr *ifa;
@@ -1175,6 +2146,9 @@ prelist_update(
 	struct in6_addrlifetime lt6_tmp;
 	struct timeval timenow;
 
+	/* no need to lock "new" here, as it is local to the caller */
+	NDPR_LOCK_ASSERT_NOTHELD(new);
+
 	auth = 0;
 	if (m) {
 		/*
@@ -1199,6 +2173,8 @@ prelist_update(
 		 * and the autonomous (A) bit should NOT be changed from 1
 		 * to 0.
 		 */
+		lck_mtx_lock(nd6_mutex);
+		NDPR_LOCK(pr);
 		if (new->ndpr_raf_onlink == 1)
 			pr->ndpr_raf_onlink = 1;
 		if (new->ndpr_raf_auto == 1)
@@ -1214,7 +2190,8 @@ prelist_update(
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			int e;
 
-			if ((e = nd6_prefix_onlink(pr, 0, 0)) != 0) {
+			NDPR_UNLOCK(pr);
+			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "prelist_update: failed to make "
 				    "the prefix %s/%d on-link on %s "
@@ -1223,11 +2200,15 @@ prelist_update(
 				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
 				/* proceed anyway. XXX: is it correct? */
 			}
+			NDPR_LOCK(pr);
 		}
-		
-		lck_mtx_lock(nd6_mutex);
-		if (dr && pfxrtr_lookup(pr, dr) == NULL)
+
+		if (dr && pfxrtr_lookup(pr, dr) == NULL) {
+			NDPR_UNLOCK(pr);
 			pfxrtr_add(pr, dr);
+		} else {
+			NDPR_UNLOCK(pr);
+		}
 		lck_mtx_unlock(nd6_mutex);
 	} else {
 		struct nd_prefix *newpr = NULL;
@@ -1241,7 +2222,7 @@ prelist_update(
 
 		bzero(&new->ndpr_addr, sizeof(struct in6_addr));
 
-		error = nd6_prelist_add(new, dr, &newpr);
+		error = nd6_prelist_add(new, dr, &newpr, FALSE);
 		if (error != 0 || newpr == NULL) {
 			nd6log((LOG_NOTICE, "prelist_update: "
 			    "nd6_prelist_add failed for %s/%d on %s "
@@ -1256,9 +2237,10 @@ prelist_update(
 		 * XXX: from the ND point of view, we can ignore a prefix
 		 * with the on-link bit being zero.  However, we need a
 		 * prefix structure for references from autoconfigured
-		 * addresses.  Thus, we explicitly make suret that the prefix
+		 * addresses.  Thus, we explicitly make sure that the prefix
 		 * itself expires now.
 		 */
+		NDPR_LOCK(newpr);
 		if (newpr->ndpr_raf_onlink == 0) {
 			newpr->ndpr_vltime = 0;
 			newpr->ndpr_pltime = 0;
@@ -1266,6 +2248,7 @@ prelist_update(
 		}
 
 		pr = newpr;
+		NDPR_UNLOCK(newpr);
 	}
 
 	/*
@@ -1282,84 +2265,109 @@ prelist_update(
 	 * nd6_ra_input.
 	 */
 
+	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
+	if (new->ndpr_pltime > new->ndpr_vltime) {
+		error = EINVAL;	/* XXX: won't be used */
+		goto end;
+	}
+
 	/*
-	 * 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime.
-	 * This should have been done in nd6_ra_input.
+	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
+	 * an address configured by stateless autoconfiguration already in the
+	 * list of addresses associated with the interface, and the Valid
+	 * Lifetime is not 0, form an address.  We first check if we have
+	 * a matching prefix.
+	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
+	 * consider autoconfigured addresses while RFC2462 simply said
+	 * "address".
 	 */
 
- 	/*
-	 * 5.5.3 (d). If the prefix advertised does not match the prefix of an
-	 * address already in the list, and the Valid Lifetime is not 0,
-	 * form an address.  Note that even a manually configured address
-	 * should reject autoconfiguration of a new address.
-	 */
-	getmicrotime(&timenow);
+ 	getmicrotime(&timenow);
 
-	ifnet_lock_exclusive(ifp);
+	ifnet_lock_shared(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 	{
 		struct in6_ifaddr *ifa6;
-		int ifa_plen;
-		u_int32_t storedlifetime;
+		u_int32_t remaininglifetime;
 
-		if (ifa->ifa_addr->sa_family != AF_INET6)
+		IFA_LOCK(ifa);
+		if (ifa->ifa_addr->sa_family != AF_INET6) {
+			IFA_UNLOCK(ifa);
 			continue;
-
+		}
 		ifa6 = (struct in6_ifaddr *)ifa;
 
+		/*
+		 * We only consider autoconfigured addresses as per rfc2462bis.
+		 */
+		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) {
+			IFA_UNLOCK(ifa);
+			continue;
+		}
 		/*
 		 * Spec is not clear here, but I believe we should concentrate
 		 * on unicast (i.e. not anycast) addresses.
 		 * XXX: other ia6_flags? detached or duplicated?
 		 */
-		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
+		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0) {
+			IFA_UNLOCK(ifa);
 			continue;
-		
-		ifa_plen = in6_mask2len(&ifa6->ia_prefixmask.sin6_addr, NULL);
-		if (ifa_plen != new->ndpr_plen ||
-		    !in6_are_prefix_equal(&ifa6->ia_addr.sin6_addr,
-					  &new->ndpr_prefix.sin6_addr,
-					  ifa_plen))
+		}
+		/*
+		 * Ignore the address if it is not associated with a prefix
+		 * or is associated with a prefix that is different from this
+		 * one.  (pr is never NULL here)
+		 */
+		if (ifa6->ia6_ndpr != pr) {
+			IFA_UNLOCK(ifa);
 			continue;
+		}
 
-		if (ia6_match == NULL) /* remember the first one */
+		if (ia6_match == NULL) { /* remember the first one */
 			ia6_match = ifa6;
-
-		if ((ifa6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
-			continue;
+			IFA_ADDREF_LOCKED(ifa);	/* for ia6_match */
+		}
 
 		/*
 		 * An already autoconfigured address matched.  Now that we
 		 * are sure there is at least one matched address, we can
 		 * proceed to 5.5.3. (e): update the lifetimes according to the
 		 * "two hours" rule and the privacy extension.
+		 * We apply some clarifications in rfc2462bis:
+		 * - use remaininglifetime instead of storedlifetime as a
+		 *   variable name
+		 * - remove the dead code in the "two-hour" rule
 		 */
 #define TWOHOUR		(120*60)
 		lt6_tmp = ifa6->ia6_lifetime;
 
-		storedlifetime = IFA6_IS_INVALID(ifa6) ? 0 :
-			(lt6_tmp.ia6t_expire - timenow.tv_sec);
+		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
+			remaininglifetime = ND6_INFINITE_LIFETIME;
+		else if (timenow.tv_sec - ifa6->ia6_updatetime >
+			 lt6_tmp.ia6t_vltime) {
+			/*
+			 * The case of "invalid" address.  We should usually
+			 * not see this case.
+			 */
+			remaininglifetime = 0;
+		} else
+			remaininglifetime = lt6_tmp.ia6t_vltime -
+			    (timenow.tv_sec - ifa6->ia6_updatetime);
+
+		/* when not updating, keep the current stored lifetime. */
+		lt6_tmp.ia6t_vltime = remaininglifetime;
 
 		if (TWOHOUR < new->ndpr_vltime ||
-		    storedlifetime < new->ndpr_vltime) {
+		    remaininglifetime < new->ndpr_vltime) {
 			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
-		} else if (storedlifetime <= TWOHOUR
-#if 0
-			   /*
-			    * This condition is logically redundant, so we just
-			    * omit it.
-			    * See IPng 6712, 6717, and 6721.
-			    */
-			   && new->ndpr_vltime <= storedlifetime
-#endif
-			) {
+		} else if (remaininglifetime <= TWOHOUR) {
 			if (auth) {
 				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
 			}
 		} else {
 			/*
 			 * new->ndpr_vltime <= TWOHOUR &&
-			 * TWOHOUR < storedlifetime
+			 * TWOHOUR < remaininglifetime
 			 */
 			lt6_tmp.ia6t_vltime = TWOHOUR;
 		}
@@ -1367,57 +2375,108 @@ prelist_update(
 		/* The 2 hour rule is not imposed for preferred lifetime. */
 		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
 
-		in6_init_address_ltimes(pr, &lt6_tmp);
-
-		/*
-		 * When adjusting the lifetimes of an existing temporary
-		 * address, only lower the lifetimes.
-		 * RFC 3041 3.3. (1).
-		 * XXX: how should we modify ia6t_[pv]ltime?
-		 */
+		/* Special handling for lifetimes of temporary addresses. */
 		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
-			if (lt6_tmp.ia6t_expire == 0 || /* no expire */
-			    lt6_tmp.ia6t_expire >
-			    ifa6->ia6_lifetime.ia6t_expire) {
-				lt6_tmp.ia6t_expire =
-					ifa6->ia6_lifetime.ia6t_expire;
-			}
-			if (lt6_tmp.ia6t_preferred == 0 || /* no expire */
-			    lt6_tmp.ia6t_preferred >
-			    ifa6->ia6_lifetime.ia6t_preferred) {
-				lt6_tmp.ia6t_preferred =
-					ifa6->ia6_lifetime.ia6t_preferred;
-			}
+			u_int32_t maxvltime, maxpltime;
+			
+			/* Constrain lifetimes to system limits. */
+			if (lt6_tmp.ia6t_vltime > ip6_temp_valid_lifetime)
+				lt6_tmp.ia6t_vltime = ip6_temp_valid_lifetime;
+			if (lt6_tmp.ia6t_pltime > ip6_temp_preferred_lifetime)
+				lt6_tmp.ia6t_pltime =
+				    ip6_temp_preferred_lifetime -
+				    ip6_desync_factor;
+
+			/*
+			 * According to RFC 4941, section 3.3 (1), we only
+			 * update the lifetimes when they are in the maximum
+			 * intervals.
+			 */
+			if (ip6_temp_valid_lifetime >
+			    (u_int32_t)((timenow.tv_sec - ifa6->ia6_createtime) +
+			    ip6_desync_factor)) {
+				maxvltime = ip6_temp_valid_lifetime -
+				    (timenow.tv_sec - ifa6->ia6_createtime) -
+				    ip6_desync_factor;
+			} else
+				maxvltime = 0;
+			if (ip6_temp_preferred_lifetime >
+			    (u_int32_t)((timenow.tv_sec - ifa6->ia6_createtime) +
+			    ip6_desync_factor)) {
+				maxpltime = ip6_temp_preferred_lifetime -
+				    (timenow.tv_sec - ifa6->ia6_createtime) -
+				    ip6_desync_factor;
+			} else
+				maxpltime = 0;
+
+			if (lt6_tmp.ia6t_vltime > maxvltime)
+				lt6_tmp.ia6t_vltime = maxvltime;
+			if (lt6_tmp.ia6t_pltime > maxpltime)
+				lt6_tmp.ia6t_pltime = maxpltime;
 		}
 
+		in6_init_address_ltimes(pr, &lt6_tmp,
+		    !!(ifa6->ia6_flags & IN6_IFF_TEMPORARY));
+		
 		ifa6->ia6_lifetime = lt6_tmp;
+		ifa6->ia6_updatetime = timenow.tv_sec;
+		IFA_UNLOCK(ifa);
 	}
 	ifnet_lock_done(ifp);
 	if (ia6_match == NULL && new->ndpr_vltime) {
+		int ifidlen;
+
 		/*
+		 * 5.5.3 (d) (continued)
 		 * No address matched and the valid lifetime is non-zero.
 		 * Create a new address.
 		 */
-		if ((ia6 = in6_ifadd(new, NULL)) != NULL) {
+
+		/*
+		 * Prefix Length check:
+		 * If the sum of the prefix length and interface identifier
+		 * length does not equal 128 bits, the Prefix Information
+		 * option MUST be ignored.  The length of the interface
+		 * identifier is defined in a separate link-type specific
+		 * document.
+		 */
+		ifidlen = in6_if2idlen(ifp);
+		if (ifidlen < 0) {
+			/* this should not happen, so we always log it. */
+			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
+			    if_name(ifp));
+			goto end;
+		}
+		NDPR_LOCK(pr);
+		if (ifidlen + pr->ndpr_plen != 128) {
+			nd6log((LOG_INFO,
+			    "prelist_update: invalid prefixlen "
+			    "%d for %s, ignored\n",
+			    pr->ndpr_plen, if_name(ifp)));
+			NDPR_UNLOCK(pr);
+			goto end;
+		}
+		NDPR_UNLOCK(pr);
+
+		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
 			/*
 			 * note that we should use pr (not new) for reference.
 			 */
-			lck_mtx_lock(nd6_mutex);
-			pr->ndpr_refcnt++;
-			lck_mtx_unlock(nd6_mutex);
+			IFA_LOCK(&ia6->ia_ifa);
+			NDPR_LOCK(pr);
 			ia6->ia6_ndpr = pr;
-
-#if 0
-			/* XXXYYY Don't do this, according to Jinmei. */
-			pr->ndpr_addr = new->ndpr_addr;
-#endif
+			NDPR_ADDREF_LOCKED(pr);	/* for addr reference */
+			pr->ndpr_addrcnt++;
+			VERIFY(pr->ndpr_addrcnt != 0);
+			NDPR_UNLOCK(pr);
+			IFA_UNLOCK(&ia6->ia_ifa);
 
 			/*
-			 * RFC 3041 3.3 (2).
+			 * RFC 4941 3.3 (2).
 			 * When a new public address is created as described
 			 * in RFC2462, also create a new temporary address.
 			 *
-			 * RFC 3041 3.5.
+			 * RFC 4941 3.5.
 			 * When an interface connects to a new link, a new
 			 * randomized interface identifier should be generated
 			 * immediately together with a new set of temporary
@@ -1426,35 +2485,264 @@ prelist_update(
 			 */
 			if (ip6_use_tempaddr) {
 				int e;
-				if ((e = in6_tmpifadd(ia6, 1, M_NOWAIT)) != 0) {
+				if ((e = in6_tmpifadd(ia6, 1, M_WAITOK)) != 0) {
 					nd6log((LOG_NOTICE, "prelist_update: "
 					    "failed to create a temporary "
 					    "address, errno=%d\n",
 					    e));
 				}
 			}
-			ifafree(&ia6->ia_ifa);
+			IFA_REMREF(&ia6->ia_ifa);
 			ia6 = NULL;
 
-			/*
-			 * A newly added address might affect the status
-			 * of other addresses, so we check and update it.
-			 * XXX: what if address duplication happens?
-			 */
-			pfxlist_onlink_check(0);
-		} else {
-			/* just set an error. do not bark here. */
-			error = EADDRNOTAVAIL; /* XXX: might be unused. */
-		}
+			/*
+			 * A newly added address might affect the status
+			 * of other addresses, so we check and update it.
+			 * XXX: what if address duplication happens?
+			 */
+			lck_mtx_lock(nd6_mutex);
+			pfxlist_onlink_check();
+			lck_mtx_unlock(nd6_mutex);
+		} else {
+			/* just set an error. do not bark here. */
+			error = EADDRNOTAVAIL; /* XXX: might be unused. */
+		}
+	}
+
+afteraddrconf:
+
+end:
+	if (pr != NULL)
+		NDPR_REMREF(pr);
+	if (ia6_match != NULL)
+		IFA_REMREF(&ia6_match->ia_ifa);
+	return error;
+}
+
+/*
+ * Neighbor Discover Default Router structure reference counting routines.
+ */
+static struct nd_defrouter *
+nddr_alloc(int how)
+{
+	struct nd_defrouter *dr;
+
+	dr = (how == M_WAITOK) ? zalloc(nddr_zone) : zalloc_noblock(nddr_zone);
+	if (dr != NULL) {
+		bzero(dr, nddr_size);
+		lck_mtx_init(&dr->nddr_lock, ifa_mtx_grp, ifa_mtx_attr);
+		dr->nddr_debug |= IFD_ALLOC;
+		if (nddr_debug != 0) {
+			dr->nddr_debug |= IFD_DEBUG;
+			dr->nddr_trace = nddr_trace;
+		}
+	}
+	return (dr);
+}
+
+static void
+nddr_free(struct nd_defrouter *dr)
+{
+	NDDR_LOCK(dr);
+	if (dr->nddr_debug & IFD_ATTACHED) {
+		panic("%s: attached nddr %p is being freed", __func__, dr);
+		/* NOTREACHED */
+	} else if (!(dr->nddr_debug & IFD_ALLOC)) {
+		panic("%s: nddr %p cannot be freed", __func__, dr);
+		/* NOTREACHED */
+	}
+	dr->nddr_debug &= ~IFD_ALLOC;
+	NDDR_UNLOCK(dr);
+
+	lck_mtx_destroy(&dr->nddr_lock, ifa_mtx_grp);
+	zfree(nddr_zone, dr);
+}
+
+static void
+nddr_trace(struct nd_defrouter *dr, int refhold)
+{
+	struct nd_defrouter_dbg *dr_dbg = (struct nd_defrouter_dbg *)dr;
+	ctrace_t *tr;
+	uint32_t idx;
+	uint16_t *cnt;
+
+	if (!(dr->nddr_debug & IFD_DEBUG)) {
+		panic("%s: nddr %p has no debug structure", __func__, dr);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &dr_dbg->nddr_refhold_cnt;
+		tr = dr_dbg->nddr_refhold;
+	} else {
+		cnt = &dr_dbg->nddr_refrele_cnt;
+		tr = dr_dbg->nddr_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % NDDR_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
+void
+nddr_addref(struct nd_defrouter *nddr, int locked)
+{
+
+	if (!locked)
+		NDDR_LOCK_SPIN(nddr);
+	else
+		NDDR_LOCK_ASSERT_HELD(nddr);
+
+	if (++nddr->nddr_refcount == 0) {
+		panic("%s: nddr %p wraparound refcnt\n", __func__, nddr);
+		/* NOTREACHED */
+	} else if (nddr->nddr_trace != NULL) {
+		(*nddr->nddr_trace)(nddr, TRUE);
+	}
+
+	if (!locked)
+		NDDR_UNLOCK(nddr);
+}
+
+struct nd_defrouter *
+nddr_remref(struct nd_defrouter *nddr, int locked)
+{
+
+	if (!locked)
+		NDDR_LOCK_SPIN(nddr);
+	else
+		NDDR_LOCK_ASSERT_HELD(nddr);
+
+	if (nddr->nddr_refcount == 0) {
+		panic("%s: nddr %p negative refcnt\n", __func__, nddr);
+		/* NOTREACHED */
+	} else if (nddr->nddr_trace != NULL) {
+		(*nddr->nddr_trace)(nddr, FALSE);
+	}
+
+	if (--nddr->nddr_refcount == 0) {
+		NDDR_UNLOCK(nddr);
+		nddr_free(nddr);
+		nddr = NULL;
+	}
+
+	if (!locked && nddr != NULL)
+		NDDR_UNLOCK(nddr);
+
+	return (nddr);
+}
+
+/*
+ * Neighbor Discover Prefix structure reference counting routines.
+ */
+static struct nd_prefix *
+ndpr_alloc(int how)
+{
+	struct nd_prefix *pr;
+
+	pr = (how == M_WAITOK) ? zalloc(ndpr_zone) : zalloc_noblock(ndpr_zone);
+	if (pr != NULL) {
+		bzero(pr, ndpr_size);
+		lck_mtx_init(&pr->ndpr_lock, ifa_mtx_grp, ifa_mtx_attr);
+		pr->ndpr_debug |= IFD_ALLOC;
+		if (ndpr_debug != 0) {
+			pr->ndpr_debug |= IFD_DEBUG;
+			pr->ndpr_trace = ndpr_trace;
+		}
+	}
+	return (pr);
+}
+
+static void
+ndpr_free(struct nd_prefix *pr)
+{
+	NDPR_LOCK(pr);
+	if (pr->ndpr_debug & IFD_ATTACHED) {
+		panic("%s: attached ndpr %p is being freed", __func__, pr);
+		/* NOTREACHED */
+	} else if (!(pr->ndpr_debug & IFD_ALLOC)) {
+		panic("%s: ndpr %p cannot be freed", __func__, pr);
+		/* NOTREACHED */
+	}
+	pr->ndpr_debug &= ~IFD_ALLOC;
+	NDPR_UNLOCK(pr);
+
+	lck_mtx_destroy(&pr->ndpr_lock, ifa_mtx_grp);
+	zfree(ndpr_zone, pr);
+}
+
+static void
+ndpr_trace(struct nd_prefix *pr, int refhold)
+{
+	struct nd_prefix_dbg *pr_dbg = (struct nd_prefix_dbg *)pr;
+	ctrace_t *tr;
+	u_int32_t idx;
+	u_int16_t *cnt;
+
+	if (!(pr->ndpr_debug & IFD_DEBUG)) {
+		panic("%s: ndpr %p has no debug structure", __func__, pr);
+		/* NOTREACHED */
+	}
+	if (refhold) {
+		cnt = &pr_dbg->ndpr_refhold_cnt;
+		tr = pr_dbg->ndpr_refhold;
+	} else {
+		cnt = &pr_dbg->ndpr_refrele_cnt;
+		tr = pr_dbg->ndpr_refrele;
+	}
+
+	idx = atomic_add_16_ov(cnt, 1) % NDPR_TRACE_HIST_SIZE;
+	ctrace_record(&tr[idx]);
+}
+
+void
+ndpr_addref(struct nd_prefix *ndpr, int locked)
+{
+	if (!locked)
+		NDPR_LOCK_SPIN(ndpr);
+	else
+		NDPR_LOCK_ASSERT_HELD(ndpr);
+
+	if (++ndpr->ndpr_refcount == 0) {
+		panic("%s: ndpr %p wraparound refcnt\n", __func__, ndpr);
+		/* NOTREACHED */
+	} else if (ndpr->ndpr_trace != NULL) {
+		(*ndpr->ndpr_trace)(ndpr, TRUE);
+	}
+
+	if (!locked)
+		NDPR_UNLOCK(ndpr);
+}
+
+struct nd_prefix *
+ndpr_remref(struct nd_prefix *ndpr, int locked)
+{
+	if (!locked)
+		NDPR_LOCK_SPIN(ndpr);
+	else
+		NDPR_LOCK_ASSERT_HELD(ndpr);
+
+	if (ndpr->ndpr_refcount == 0) {
+		panic("%s: ndpr %p negative refcnt\n", __func__, ndpr);
+		/* NOTREACHED */
+	} else if (ndpr->ndpr_trace != NULL) {
+		(*ndpr->ndpr_trace)(ndpr, FALSE);
 	}
 
-afteraddrconf:
+	if (--ndpr->ndpr_refcount == 0) {
+		if (ndpr->ndpr_addrcnt != 0) {
+			panic("%s: freeing ndpr %p with outstanding address "
+			    "reference (%d)", __func__, ndpr,
+			    ndpr->ndpr_addrcnt);
+			/* NOTREACHED */
+		}
+		NDPR_UNLOCK(ndpr);
+		ndpr_free(ndpr);
+		ndpr = NULL;
+	}
 
-end:
-	if (pr != NULL)
-		ndpr_rele(pr, FALSE);
+	if (!locked && ndpr != NULL)
+		NDPR_UNLOCK(ndpr);
 
-	return error;
+	return (ndpr);
 }
 
 /*
@@ -1463,17 +2751,19 @@ end:
  * XXX: lengthy function name...
  */
 static struct nd_pfxrouter *
-find_pfxlist_reachable_router(
-	struct nd_prefix *pr)
+find_pfxlist_reachable_router(struct nd_prefix *pr)
 {
 	struct nd_pfxrouter *pfxrtr;
 	struct rtentry *rt;
 	struct llinfo_nd6 *ln;
 
 	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	NDPR_LOCK_ASSERT_HELD(pr);
 
 	for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr;
 	     pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
+		NDPR_UNLOCK(pr);
+		lck_mtx_unlock(nd6_mutex);
 		/* Callee returns a locked route upon success */
 		if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0,
 		    pfxrtr->router->ifp, 0)) != NULL) {
@@ -1482,14 +2772,19 @@ find_pfxlist_reachable_router(
 			    ND6_IS_LLINFO_PROBREACH(ln)) {
 				RT_REMREF_LOCKED(rt);
 				RT_UNLOCK(rt);
+				lck_mtx_lock(nd6_mutex);
+				NDPR_LOCK(pr);
 				break;	/* found */
 			}
 			RT_REMREF_LOCKED(rt);
 			RT_UNLOCK(rt);
 		}
+		lck_mtx_lock(nd6_mutex);
+		NDPR_LOCK(pr);
 	}
+	NDPR_LOCK_ASSERT_HELD(pr);
 
-	return(pfxrtr);
+	return (pfxrtr);
 
 }
 
@@ -1507,61 +2802,150 @@ find_pfxlist_reachable_router(
  * is no router around us.
  */
 void
-pfxlist_onlink_check(int nd6locked)
+pfxlist_onlink_check(void)
 {
-	struct nd_prefix *pr;
+	struct nd_prefix *pr, *prclear;
 	struct in6_ifaddr *ifa;
+	struct nd_defrouter *dr;
+	struct nd_pfxrouter *pfxrtr = NULL;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	while (nd_prefix_busy) {
+		nd_prefix_waiters++;
+		msleep(nd_prefix_waitchan, nd6_mutex, (PZERO-1),
+		    __func__, NULL);
+		lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+	}
+	nd_prefix_busy = TRUE;
 
 	/*
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
-	if (nd6locked == 0)
-		lck_mtx_lock(nd6_mutex);
-	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
-	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
-		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
+	pr = nd_prefix.lh_first;
+	while (pr) {
+		NDPR_LOCK(pr);
+		if (pr->ndpr_stateflags & NDPRF_PROCESSED) {
+			NDPR_UNLOCK(pr);
+			pr = pr->ndpr_next;
+			continue;
+		}
+		NDPR_ADDREF_LOCKED(pr);
+		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr) &&
+		    (pr->ndpr_debug & IFD_ATTACHED)) {
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);
 			break;
+		}
+		pr->ndpr_stateflags |= NDPRF_PROCESSED;
+		NDPR_UNLOCK(pr);
+		NDPR_REMREF(pr);
+		/*
+		 * Since find_pfxlist_reachable_router() drops the nd6_mutex, we
+		 * have to start over, but the NDPRF_PROCESSED flag will stop
+		 * us from checking the same prefix twice.
+		 */
+		pr = nd_prefix.lh_first;
+	}
+	LIST_FOREACH(prclear, &nd_prefix, ndpr_entry) {
+		NDPR_LOCK(prclear);
+		prclear->ndpr_stateflags &= ~NDPRF_PROCESSED;
+		NDPR_UNLOCK(prclear);
 	}
 
-	if (pr) {
+	/*
+	 * If we have no such prefix, check whether we still have a router
+	 * that does not advertise any prefixes.
+	 */
+	if (pr == NULL) {
+		for (dr = TAILQ_FIRST(&nd_defrouter); dr;
+		    dr = TAILQ_NEXT(dr, dr_entry)) {
+			struct nd_prefix *pr0;
+
+			for (pr0 = nd_prefix.lh_first; pr0;
+			    pr0 = pr0->ndpr_next) {
+				NDPR_LOCK(pr0);
+				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL) {
+					NDPR_UNLOCK(pr0);
+					break;
+				}
+				NDPR_UNLOCK(pr0);
+			}
+			if (pfxrtr != NULL)
+				break;
+		}
+	}
+	if (pr != NULL || (TAILQ_FIRST(&nd_defrouter) && pfxrtr == NULL)) {
 		/*
-		 * There is at least one prefix that has a reachable router.
+		 * There is at least one prefix that has a reachable router,
+		 * or at least a router which probably does not advertise
+		 * any prefixes.  The latter would be the case when we move
+		 * to a new link where we have a router that does not provide
+		 * prefixes and we configure an address by hand.
 		 * Detach prefixes which have no reachable advertising
 		 * router, and attach other prefixes.
 		 */
-		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
-			/* XXX: a link-local prefix should never be detached */
-			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
-				continue;
-
+		pr = nd_prefix.lh_first;
+		while (pr) {
+			NDPR_LOCK(pr);
 			/*
-			 * we aren't interested in prefixes without the L bit
-			 * set.
+			 * We aren't interested prefixes already processed,
+			 * nor in prefixes without the L bit
+			 * set nor in static prefixes
 			 */
-			if (pr->ndpr_raf_onlink == 0)
+			if (pr->ndpr_raf_onlink == 0 ||
+			    pr->ndpr_stateflags & NDPRF_PROCESSED ||
+			    pr->ndpr_stateflags & NDPRF_STATIC) {
+				NDPR_UNLOCK(pr);
+				pr = pr->ndpr_next;
 				continue;
-
+			}
+			NDPR_ADDREF_LOCKED(pr);
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
-			    find_pfxlist_reachable_router(pr) == NULL)
+			    find_pfxlist_reachable_router(pr) == NULL &&
+			    (pr->ndpr_debug & IFD_ATTACHED))
 				pr->ndpr_stateflags |= NDPRF_DETACHED;
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
-			    find_pfxlist_reachable_router(pr) != 0)
+			    find_pfxlist_reachable_router(pr) != NULL &&
+			    (pr->ndpr_debug & IFD_ATTACHED))
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
+			pr->ndpr_stateflags |= NDPRF_PROCESSED;
+			NDPR_UNLOCK(pr);
+			NDPR_REMREF(pr);
+			/*
+			 * Since find_pfxlist_reachable_router() drops the
+			 * nd6_mutex, we have to start over, but the
+			 * NDPRF_PROCESSED flag will stop us from checking
+			 * the same prefix twice.
+			 */
+			pr = nd_prefix.lh_first;
 		}
 	} else {
 		/* there is no prefix that has a reachable router */
 		for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
-			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
+			NDPR_LOCK(pr);
+			if (pr->ndpr_raf_onlink == 0 ||
+			    pr->ndpr_stateflags & NDPRF_STATIC) {
+				NDPR_UNLOCK(pr);
 				continue;
-
-			if (pr->ndpr_raf_onlink == 0)
-				continue;
-
+			}
 			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
 				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
+			NDPR_UNLOCK(pr);
 		}
 	}
+	LIST_FOREACH(prclear, &nd_prefix, ndpr_entry) {
+		NDPR_LOCK(prclear);
+		prclear->ndpr_stateflags &= ~NDPRF_PROCESSED;
+		NDPR_UNLOCK(prclear);
+	}
+	VERIFY(nd_prefix_busy);
+	nd_prefix_busy = FALSE;
+	if (nd_prefix_waiters > 0) {
+		nd_prefix_waiters = 0;
+		wakeup(nd_prefix_waitchan);
+	}
 
 	/*
 	 * Remove each interface route associated with a (just) detached
@@ -1571,17 +2955,21 @@ pfxlist_onlink_check(int nd6locked)
 	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
 	 * so we don't have to care about them.
 	 */
-	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+	pr = nd_prefix.lh_first;
+	while (pr) {
 		int e;
 
-		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
-			continue;
-
-		if (pr->ndpr_raf_onlink == 0)
+		NDPR_LOCK(pr);
+		if (pr->ndpr_raf_onlink == 0 ||
+		    pr->ndpr_stateflags & NDPRF_STATIC) {
+			NDPR_UNLOCK(pr);
+			pr = pr->ndpr_next;
 			continue;
-
+		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+			NDPR_UNLOCK(pr);
+			lck_mtx_unlock(nd6_mutex);
 			if ((e = nd6_prefix_offlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
@@ -1589,18 +2977,25 @@ pfxlist_onlink_check(int nd6locked)
 				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, e));
 			}
+			lck_mtx_lock(nd6_mutex);
+			pr = nd_prefix.lh_first;
+			continue;
 		}
 		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
 		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
 		    pr->ndpr_raf_onlink) {
-			if ((e = nd6_prefix_onlink(pr, 0, 1)) != 0) {
+			NDPR_UNLOCK(pr);
+			if ((e = nd6_prefix_onlink(pr)) != 0) {
 				nd6log((LOG_ERR,
 				    "pfxlist_onlink_check: failed to "
 				    "make %s/%d offlink, errno=%d\n",
 				    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 				    pr->ndpr_plen, e));
 			}
+		} else {
+			NDPR_UNLOCK(pr);
 		}
+		pr = pr->ndpr_next;
 	}
 
 	/*
@@ -1611,118 +3006,298 @@ pfxlist_onlink_check(int nd6locked)
 	 * always be attached.
 	 * The precise detection logic is same as the one for prefixes.
 	 */
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
 	for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
-		if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
-			continue;
+		struct nd_prefix *ndpr;
 
-		if (ifa->ia6_ndpr == NULL) {
+		IFA_LOCK(&ifa->ia_ifa);
+		if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+			IFA_UNLOCK(&ifa->ia_ifa);
+			continue;
+		}
+		if ((ndpr = ifa->ia6_ndpr) == NULL) {
 			/*
 			 * This can happen when we first configure the address
 			 * (i.e. the address exists, but the prefix does not).
 			 * XXX: complicated relationships...
 			 */
+			IFA_UNLOCK(&ifa->ia_ifa);
 			continue;
 		}
+		NDPR_ADDREF(ndpr);
+		IFA_UNLOCK(&ifa->ia_ifa);
 
-		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
+		NDPR_LOCK(ndpr);
+		if (find_pfxlist_reachable_router(ndpr)) {
+			NDPR_UNLOCK(ndpr);
+			NDPR_REMREF(ndpr);
 			break;
+		}
+		NDPR_UNLOCK(ndpr);
+		NDPR_REMREF(ndpr);
 	}
 	if (ifa) {
 		for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
-			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
-				continue;
+			struct nd_prefix *ndpr;
 
-			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
+			IFA_LOCK(&ifa->ia_ifa);
+			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+				IFA_UNLOCK(&ifa->ia_ifa);
 				continue;
-
-			if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
-				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
-			else
+			}
+			if ((ndpr = ifa->ia6_ndpr) == NULL) {
+				/* XXX: see above. */
+				IFA_UNLOCK(&ifa->ia_ifa);
+				continue;
+			}
+			NDPR_ADDREF(ndpr);
+			IFA_UNLOCK(&ifa->ia_ifa);
+			NDPR_LOCK(ndpr);
+			if (find_pfxlist_reachable_router(ndpr)) {
+				NDPR_UNLOCK(ndpr);
+				IFA_LOCK(&ifa->ia_ifa);
+				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
+					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
+					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
+					IFA_UNLOCK(&ifa->ia_ifa);
+					nd6_dad_start((struct ifaddr *)ifa, 0);
+				} else {
+					IFA_UNLOCK(&ifa->ia_ifa);
+				}
+			} else {
+				NDPR_UNLOCK(ndpr);
+				IFA_LOCK(&ifa->ia_ifa);
 				ifa->ia6_flags |= IN6_IFF_DETACHED;
+				IFA_UNLOCK(&ifa->ia_ifa);
+			}
+			NDPR_REMREF(ndpr);
 		}
 	}
 	else {
 		for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) {
-			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
+			IFA_LOCK(&ifa->ia_ifa);
+			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
+				IFA_UNLOCK(&ifa->ia_ifa);
 				continue;
+			}
+			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
+				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
+				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
+				IFA_UNLOCK(&ifa->ia_ifa);
+				/* Do we need a delay in this case? */
+				nd6_dad_start((struct ifaddr *)ifa, 0);
+			} else {
+				IFA_UNLOCK(&ifa->ia_ifa);
+			}
+		}
+	}
+	lck_rw_done(&in6_ifaddr_rwlock);
+}
+
+static struct nd_prefix *
+nd6_prefix_equal_lookup(struct nd_prefix *pr, boolean_t primary_only)
+{
+	struct nd_prefix *opr;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
+		if (opr == pr)
+			continue;
+
+		NDPR_LOCK(opr);
+		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
+			NDPR_UNLOCK(opr);
+			continue;
+		}
+		if (opr->ndpr_plen == pr->ndpr_plen &&
+		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
+		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen) &&
+		    (!primary_only ||
+		    !(opr->ndpr_stateflags & NDPRF_IFSCOPE))) {
+			NDPR_ADDREF_LOCKED(opr);
+			NDPR_UNLOCK(opr);
+			return (opr);
+		}
+		NDPR_UNLOCK(opr);
+	}
+	return (NULL);
+}
+
+/*
+ * Synchronize the interface routes of similar prefixes on different
+ * interfaces; the one using the default interface would be (re)installed
+ * as a primary/non-scoped entry, and the rest as scoped entri(es).
+ */
+static void
+nd6_prefix_sync(struct ifnet *ifp)
+{
+	struct nd_prefix *pr, *opr;
+	int err = 0;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
+	if (!ip6_doscopedroute || ifp == NULL)
+		return;
 
-			ifa->ia6_flags &= ~IN6_IFF_DETACHED;
+	for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
+		NDPR_LOCK(pr);
+		if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
+			NDPR_UNLOCK(pr);
+			continue;
+		}
+		if (pr->ndpr_ifp == ifp &&
+		    (pr->ndpr_stateflags & NDPRF_IFSCOPE) &&
+		    !IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
+			NDPR_UNLOCK(pr);
+			break;
 		}
+		NDPR_UNLOCK(pr);
 	}
-	if (nd6locked == 0)
+
+	if (pr == NULL)
+		return;
+
+	/* Remove conflicting entries */
+	opr = nd6_prefix_equal_lookup(pr, TRUE);
+	if (opr != NULL) {
 		lck_mtx_unlock(nd6_mutex);
+		err = nd6_prefix_offlink(opr);
+		lck_mtx_lock(nd6_mutex);
+		if (err != 0) {
+			nd6log((LOG_ERR,
+			    "%s: failed to make %s/%d offlink on %s, "
+			    "errno=%d\n", __func__,
+			    ip6_sprintf(&opr->ndpr_prefix.sin6_addr),
+			    opr->ndpr_plen, if_name(opr->ndpr_ifp), err));
+		}
+	} else {
+		nd6log((LOG_ERR,
+		    "%s: scoped %s/%d on %s has no matching unscoped prefix\n",
+		    __func__, ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen, if_name(pr->ndpr_ifp)));
+	}
+
+	lck_mtx_unlock(nd6_mutex);
+	err = nd6_prefix_offlink(pr);
+	lck_mtx_lock(nd6_mutex);
+	if (err != 0) {
+		nd6log((LOG_ERR,
+		    "%s: failed to make %s/%d offlink on %s, errno=%d\n",
+		    __func__, ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen, if_name(pr->ndpr_ifp), err));
+	}
+
+	/* Add the entries back */
+	if (opr != NULL) {
+		err = nd6_prefix_onlink_scoped(opr, opr->ndpr_ifp->if_index);
+		if (err != 0) {
+			nd6log((LOG_ERR,
+			    "%s: failed to make %s/%d scoped onlink on %s, "
+			    "errno=%d\n", __func__,
+			    ip6_sprintf(&opr->ndpr_prefix.sin6_addr),
+			    opr->ndpr_plen, if_name(opr->ndpr_ifp), err));
+		}
+	}
+
+	err = nd6_prefix_onlink_scoped(pr, IFSCOPE_NONE);
+	if (err != 0) {
+		nd6log((LOG_ERR,
+		    "%s: failed to make %s/%d onlink on %s, errno=%d\n",
+		    __func__, ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen, if_name(pr->ndpr_ifp), err));
+	}
+
+	if (err != 0) {
+		nd6log((LOG_ERR,
+		    "%s: error promoting %s/%d to %s from %s\n",
+		    __func__, ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen, if_name(pr->ndpr_ifp),
+		    (opr != NULL) ? if_name(opr->ndpr_ifp) : "NONE"));
+	} else {
+		nd6log2((LOG_INFO,
+		    "%s: %s/%d promoted, previously on %s\n",
+		    if_name(pr->ndpr_ifp),
+		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen,
+		    (opr != NULL) ? if_name(opr->ndpr_ifp) : "NONE"));
+	}
+
+	if (opr != NULL)
+		NDPR_REMREF(opr);
 }
 
-int
-nd6_prefix_onlink(
-	struct nd_prefix *pr, int rtlocked, int nd6locked)
+static int
+nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped,
+    unsigned int ifscope)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp = pr->ndpr_ifp;
-	struct sockaddr_in6 mask6;
+	struct sockaddr_in6 mask6, prefix;
 	struct nd_prefix *opr;
 	u_int32_t rtflags;
 	int error = 0;
 	struct rtentry *rt = NULL;
 
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
+
 	/* sanity check */
+	NDPR_LOCK(pr);
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 		nd6log((LOG_ERR,
-		    "nd6_prefix_onlink: %s/%d is already on-link\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen);
-		return(EEXIST));
+		    "nd6_prefix_onlink: %s/%d on %s scoped=%d is already "
+		     "on-link\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		     pr->ndpr_plen, if_name(pr->ndpr_ifp),
+		     (pr->ndpr_stateflags & NDPRF_IFSCOPE) ? 1 : 0);
+		NDPR_UNLOCK(pr);
+		return (EEXIST));
 	}
+	NDPR_UNLOCK(pr);
 
 	/*
 	 * Add the interface route associated with the prefix.  Before
 	 * installing the route, check if there's the same prefix on another
 	 * interface, and the prefix has already installed the interface route.
-	 * Although such a configuration is expected to be rare, we explicitly
-	 * allow it.
 	 */
-	if (nd6locked == 0)
-		lck_mtx_lock(nd6_mutex);
-	else
-		lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
-	for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
-		if (opr == pr)
-			continue;
-
-		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
-			continue;
-
-		if (opr->ndpr_plen == pr->ndpr_plen &&
-		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
-					 &opr->ndpr_prefix.sin6_addr,
-					 pr->ndpr_plen)) {
-			if (nd6locked == 0)
-				lck_mtx_unlock(nd6_mutex);
-			return(0);
-		}
+	opr = nd6_prefix_equal_lookup(pr, FALSE);
+	if (opr != NULL)
+		NDPR_REMREF(opr);
+
+	if (!ip6_doscopedroute) {
+		/* if an interface route already exists, just return */
+		if (opr != NULL)
+			return (0);
+		ifscope = IFSCOPE_NONE;
+	} else if (!force_scoped) {
+		/*
+		 * If a primary/non-scoped interface route already exists,
+		 * install the new one as a scoped entry.  If the existing
+		 * interface route is scoped, install new as non-scoped.
+		 */
+		ifscope = (opr != NULL) ? ifp->if_index : IFSCOPE_NONE;
+		opr = nd6_prefix_equal_lookup(pr, TRUE);
+		if (opr != NULL)
+			NDPR_REMREF(opr);
+		else if (ifscope != IFSCOPE_NONE)
+			ifscope = IFSCOPE_NONE;
 	}
 
-	if (nd6locked == 0)
-		lck_mtx_unlock(nd6_mutex);
 	/*
-	 * We prefer link-local addresses as the associated interface address. 
+	 * We prefer link-local addresses as the associated interface address.
 	 */
 	/* search for a link-local addr */
 	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
 						      IN6_IFF_NOTREADY|
 						      IN6_IFF_ANYCAST);
 	if (ifa == NULL) {
-		/* XXX: freebsd does not have ifa_ifwithaf */
-		ifnet_lock_exclusive(ifp);
-		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
-		{
-			if (ifa->ifa_addr->sa_family == AF_INET6)
-				break;
-		}
-		if (ifa != NULL)
-			ifaref(ifa);
+		struct in6_ifaddr *ia6;
+		ifnet_lock_shared(ifp);
+		IFP_TO_IA6(ifp, ia6);
 		ifnet_lock_done(ifp);
+		if (ia6 != NULL)
+			ifa = &ia6->ia_ifa;
 		/* should we care about ia6_flags? */
 	}
+	NDPR_LOCK(pr);
 	if (ifa == NULL) {
 		/*
 		 * This can still happen, when, for example, we receive an RA
@@ -1735,7 +3310,8 @@ nd6_prefix_onlink(
 		    " to add route for a prefix(%s/%d) on %s\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp)));
-		return(0);
+		NDPR_UNLOCK(pr);
+		return (0);
 	}
 
 	/*
@@ -1745,11 +3321,12 @@ nd6_prefix_onlink(
 	bzero(&mask6, sizeof(mask6));
 	mask6.sin6_len = sizeof(mask6);
 	mask6.sin6_addr = pr->ndpr_mask;
+	prefix = pr->ndpr_prefix;
+	NDPR_UNLOCK(pr);
 
-	if (rtlocked == 0)
-		lck_mtx_lock(rnh_lock);
-
+	IFA_LOCK_SPIN(ifa);
 	rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP;
+	IFA_UNLOCK(ifa);
 	if (nd6_need_cache(ifp)) {
 		/* explicitly set in case ifa_flags does not set the flag. */
 		rtflags |= RTF_CLONING;
@@ -1759,54 +3336,81 @@ nd6_prefix_onlink(
 		 */
 		rtflags &= ~RTF_CLONING;
 	}
-	error = rtrequest_locked(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
-			  ifa->ifa_addr, (struct sockaddr *)&mask6,
-			  rtflags, &rt);
-	if (error == 0) {
-		if (rt != NULL) { /* this should be non NULL, though */
-			RT_LOCK(rt);
-			nd6_rtmsg(RTM_ADD, rt);
-			RT_UNLOCK(rt);
-		}
-		pr->ndpr_stateflags |= NDPRF_ONLINK;
-	}
-	else {
+
+	lck_mtx_unlock(nd6_mutex);
+
+	error = rtrequest_scoped(RTM_ADD, (struct sockaddr *)&prefix,
+	    ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt,
+	    ifscope);
+
+	if (rt != NULL) {
+		RT_LOCK(rt);
+		nd6_rtmsg(RTM_ADD, rt);
+		RT_UNLOCK(rt);
+		RT_REMREF(rt);
+	} else {
+		NDPR_LOCK(pr);
 		nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
-		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
-		    "errno = %d\n",
+		    " prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx,"
+		    " scoped=%d, errno = %d\n",
 		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
 		    pr->ndpr_plen, if_name(ifp),
 		    ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
-		    ip6_sprintf(&mask6.sin6_addr), rtflags, error));
+		    ip6_sprintf(&mask6.sin6_addr), rtflags,
+		    (ifscope != IFSCOPE_NONE), error));
+		NDPR_UNLOCK(pr);
 	}
 
-	if (rt != NULL)
-		RT_REMREF(rt);
+	lck_mtx_lock(nd6_mutex);
+
+	NDPR_LOCK(pr);
+	pr->ndpr_stateflags &= ~NDPRF_IFSCOPE;
+	if (rt != NULL || error == EEXIST) {
+		pr->ndpr_stateflags |= NDPRF_ONLINK;
+		if (ifscope != IFSCOPE_NONE)
+			pr->ndpr_stateflags |= NDPRF_IFSCOPE;
+	}
+	NDPR_UNLOCK(pr);
 
-	if (rtlocked == 0)
-		lck_mtx_unlock(rnh_lock);
+	IFA_REMREF(ifa);
 
-	ifafree(ifa);
+	return (error);
+}
 
-	return(error);
+int
+nd6_prefix_onlink(struct nd_prefix *pr)
+{
+	return (nd6_prefix_onlink_common(pr, FALSE, IFSCOPE_NONE));
 }
 
 int
-nd6_prefix_offlink(
-	struct nd_prefix *pr)
+nd6_prefix_onlink_scoped(struct nd_prefix *pr, unsigned int ifscope)
 {
-	int error = 0;
+	return (nd6_prefix_onlink_common(pr, TRUE, ifscope));
+}
+
+int
+nd6_prefix_offlink(struct nd_prefix *pr)
+{
+	int plen, error = 0;
 	struct ifnet *ifp = pr->ndpr_ifp;
 	struct nd_prefix *opr;
-	struct sockaddr_in6 sa6, mask6;
+	struct sockaddr_in6 sa6, mask6, prefix;
 	struct rtentry *rt = NULL;
+	unsigned int ifscope;
+
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
 	/* sanity check */
+	NDPR_LOCK(pr);
 	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 		nd6log((LOG_ERR,
-		    "nd6_prefix_offlink: %s/%d is already off-link\n",
-		    ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
-		return(EEXIST);
+		    "nd6_prefix_offlink: %s/%d on %s scoped=%d is already "
+		    "off-link\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+		    pr->ndpr_plen, if_name(pr->ndpr_ifp),
+		    (pr->ndpr_stateflags & NDPRF_IFSCOPE) ? 1 : 0));
+		NDPR_UNLOCK(pr);
+		return (EEXIST);
 	}
 
 	bzero(&sa6, sizeof(sa6));
@@ -1818,48 +3422,66 @@ nd6_prefix_offlink(
 	mask6.sin6_family = AF_INET6;
 	mask6.sin6_len = sizeof(sa6);
 	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
-	lck_mtx_lock(rnh_lock);
-	error = rtrequest_locked(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
-			  (struct sockaddr *)&mask6, 0, &rt);
-	if (error == 0) {
-		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
+	prefix = pr->ndpr_prefix;
+	plen = pr->ndpr_plen;
+	NDPR_UNLOCK(pr);
 
+	ifscope = (pr->ndpr_stateflags & NDPRF_IFSCOPE) ?
+	    ifp->if_index : IFSCOPE_NONE;
+
+	error = rtrequest_scoped(RTM_DELETE, (struct sockaddr *)&sa6,
+	    NULL, (struct sockaddr *)&mask6, 0, &rt, ifscope);
+
+	if (rt != NULL) {
 		/* report the route deletion to the routing socket. */
-		if (rt != NULL) {
-			RT_LOCK(rt);
-			nd6_rtmsg(RTM_DELETE, rt);
-			RT_UNLOCK(rt);
-		}
+		RT_LOCK(rt);
+		nd6_rtmsg(RTM_DELETE, rt);
+		RT_UNLOCK(rt);
+		rtfree(rt);
 
 		/*
-		 * There might be the same prefix on another interface,
-		 * the prefix which could not be on-link just because we have
-		 * the interface route (see comments in nd6_prefix_onlink).
-		 * If there's one, try to make the prefix on-link on the
-		 * interface.
+		 * The following check takes place only when Scoped Routing
+		 * is not enabled.  There might be the same prefix on another
+		 * interface, the prefix which could not be on-link just
+		 * because we have the interface route (see comments in
+		 * nd6_prefix_onlink).  If there's one, try to make the prefix
+		 * on-link on the interface.
 		 */
-		lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
-		for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
-			if (opr == pr)
-				continue;
+		lck_mtx_lock(nd6_mutex);
+		opr = nd_prefix.lh_first;
+		while (opr) {
+			/* does not apply in the Scoped Routing case */
+			if (ip6_doscopedroute)
+				break;
 
-			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
+			if (opr == pr) {
+				opr = opr->ndpr_next;
 				continue;
+			}
 
+			NDPR_LOCK(opr);
+			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
+				NDPR_UNLOCK(opr);
+				opr = opr->ndpr_next;
+				continue;
+			}
 			/*
 			 * KAME specific: detached prefixes should not be
 			 * on-link.
 			 */
-			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
+			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0) {
+				NDPR_UNLOCK(opr);
+				opr = opr->ndpr_next;
 				continue;
-
-			if (opr->ndpr_plen == pr->ndpr_plen &&
-			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
-						 &opr->ndpr_prefix.sin6_addr,
-						 pr->ndpr_plen)) {
+			}
+			if (opr->ndpr_plen == plen &&
+			    in6_are_prefix_equal(&prefix.sin6_addr,
+		            &opr->ndpr_prefix.sin6_addr, plen)) {
 				int e;
 
-				if ((e = nd6_prefix_onlink(opr, 1, 1)) != 0) {
+				NDPR_UNLOCK(opr);
+				lck_mtx_unlock(nd6_mutex);
+				if ((e = nd6_prefix_onlink(opr)) != 0) {
 					nd6log((LOG_ERR,
 					    "nd6_prefix_offlink: failed to "
 					    "recover a prefix %s/%d from %s "
@@ -1868,40 +3490,41 @@ nd6_prefix_offlink(
 					    opr->ndpr_plen, if_name(ifp),
 					    if_name(opr->ndpr_ifp), e));
 				}
+				lck_mtx_lock(nd6_mutex);
+				opr = nd_prefix.lh_first;
+			} else {
+				NDPR_UNLOCK(opr);
+				opr = opr->ndpr_next;
 			}
 		}
-	}
-	else {
-		/* XXX: can we still set the NDPRF_ONLINK flag? */
+		lck_mtx_unlock(nd6_mutex);
+	} else {
 		nd6log((LOG_ERR,
 		    "nd6_prefix_offlink: failed to delete route: "
-		    "%s/%d on %s (errno = %d)\n",
-		    ip6_sprintf(&sa6.sin6_addr), pr->ndpr_plen, if_name(ifp),
-		    error));
+		    "%s/%d on %s, scoped %d, (errno = %d)\n",
+		    ip6_sprintf(&sa6.sin6_addr), plen, if_name(ifp),
+		    (ifscope != IFSCOPE_NONE), error));
 	}
 
-	if (rt != NULL)
-		rtfree_locked(rt);
-
-	lck_mtx_unlock(rnh_lock);
+	NDPR_LOCK(pr);
+	pr->ndpr_stateflags &= ~(NDPRF_ONLINK | NDPRF_IFSCOPE);
+	NDPR_UNLOCK(pr);
 
-	return(error);
+	return (error);
 }
 
 static struct in6_ifaddr *
 in6_ifadd(
 	struct nd_prefix *pr,
-	struct in6_addr  *ifid)   /* Mobile IPv6 addition */
+	int mcast)
 {
 	struct ifnet *ifp = pr->ndpr_ifp;
-	struct ifaddr *ifa;
 	struct in6_aliasreq ifra;
 	struct in6_ifaddr *ia, *ib;
 	int error, plen0;
+	int updateflags;
 	struct in6_addr mask;
-	int prefixlen = pr->ndpr_plen;
-
-	in6_len2mask(&mask, prefixlen);
+	int prefixlen;
 
 	/*
 	 * find a link-local address (will be interface ID).
@@ -1915,41 +3538,32 @@ in6_ifadd(
 	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
 	 * for multiple addresses on a single interface, and possible shortcut
 	 * of DAD.  we omitted DAD for this reason in the past.
-	 * (3) a user can prevent autoconfiguration of global address 
+	 * (3) a user can prevent autoconfiguration of global address
 	 * by removing link-local address by hand (this is partly because we
-	 * don't have other way to control the use of IPv6 on a interface.
+	 * don't have other way to control the use of IPv6 on an interface.
 	 * this has been our design choice - cf. NRL's "ifconfig auto").
 	 * (4) it is easier to manage when an interface has addresses
 	 * with the same interface identifier, than to have multiple addresses
 	 * with different interface identifiers.
-	 *
-	 * Mobile IPv6 addition: allow for caller to specify a wished interface
-	 * ID. This is to not break connections when moving addresses between
-	 * interfaces.
 	 */
-	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);/* 0 is OK? */
-	if (ifa)
-		ib = (struct in6_ifaddr *)ifa;
-	else
-		return NULL;
-
-#if 0 /* don't care link local addr state, and always do DAD */
-	/* if link-local address is not eligible, do not autoconfigure. */
-	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) {
-		printf("in6_ifadd: link-local address not ready\n");
-		ifafree(ifa);
-		return NULL;
-	}
-#endif
+	ib = in6ifa_ifpforlinklocal(ifp, 0);/* 0 is OK? */
+	if (ib == NULL)
+		return (NULL);
 
-	/* prefixlen + ifidlen must be equal to 128 */
+	IFA_LOCK(&ib->ia_ifa);
+	NDPR_LOCK(pr);
+	prefixlen = pr->ndpr_plen;
+	in6_len2mask(&mask, prefixlen);
 	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
+	/* prefixlen + ifidlen must be equal to 128 */
 	if (prefixlen != plen0) {
 		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
 		    "(prefix=%d ifid=%d)\n",
 		    if_name(ifp), prefixlen, 128 - plen0));
-		ifafree(ifa);
-		return NULL;
+		NDPR_UNLOCK(pr);
+		IFA_UNLOCK(&ib->ia_ifa);
+		IFA_REMREF(&ib->ia_ifa);
+		return (NULL);
 	}
 
 	/* make ifaddr */
@@ -1971,53 +3585,57 @@ in6_ifadd(
 	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
 
 	/* interface ID */
-	if (ifid == NULL || IN6_IS_ADDR_UNSPECIFIED(ifid))
-		ifid = &ib->ia_addr.sin6_addr;
-	ifra.ifra_addr.sin6_addr.s6_addr32[0]
-		|= (ifid->s6_addr32[0] & ~mask.s6_addr32[0]);
-	ifra.ifra_addr.sin6_addr.s6_addr32[1]
-		|= (ifid->s6_addr32[1] & ~mask.s6_addr32[1]);
-	ifra.ifra_addr.sin6_addr.s6_addr32[2]
-		|= (ifid->s6_addr32[2] & ~mask.s6_addr32[2]);
-	ifra.ifra_addr.sin6_addr.s6_addr32[3]
-		|= (ifid->s6_addr32[3] & ~mask.s6_addr32[3]);
-	    
+	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
+	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
+	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
+	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
+	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
+	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
+	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
+	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
+
 	/* new prefix mask. */
 	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	ifra.ifra_prefixmask.sin6_family = AF_INET6;
 	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
 	      sizeof(ifra.ifra_prefixmask.sin6_addr));
 
-	/*
-	 * lifetime.
-	 * XXX: in6_init_address_ltimes would override these values later.
-	 * We should reconsider this logic. 
-	 */
+	/* lifetimes. */
 	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
 	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
 
 	/* XXX: scope zone ID? */
 
 	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
+
+	NDPR_UNLOCK(pr);
+	IFA_UNLOCK(&ib->ia_ifa);
+	IFA_REMREF(&ib->ia_ifa);
+
 	/*
-	 * temporarily set the nopfx flag to avoid conflict.
-	 * XXX: we should reconsider the entire mechanism about prefix
-	 * manipulation.
+	 * Make sure that we do not have this address already.  This should
+	 * usually not happen, but we can still see this case, e.g., if we
+	 * have manually configured the exact address to be configured.
 	 */
-	ifra.ifra_flags |= IN6_IFF_NOPFX;
+	if ((ib = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr)) != NULL) {
+		IFA_REMREF(&ib->ia_ifa);
+		/* this should be rare enough to make an explicit log */
+		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
+		    ip6_sprintf(&ifra.ifra_addr.sin6_addr));
+		return (NULL);
+	}
 
 	/*
-	 * keep the new address, regardless of the result of in6_update_ifa.
-	 * XXX: this address is now meaningless.
-	 * We should reconsider its role.
+	 * Allocate ifaddr structure, link into chain, etc.
+	 * If we are going to create a new address upon receiving a multicasted
+	 * RA, we need to impose a random delay before starting DAD.
+	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
 	 */
-	pr->ndpr_addr = ifra.ifra_addr.sin6_addr;
-
-	ifafree(ifa);
-	ifa = NULL;
-
-	/* allocate ifaddr structure, link into chain, etc. */
-	if ((error = in6_update_ifa(ifp, &ifra, NULL, M_NOWAIT)) != 0) {
+	updateflags = 0;
+	if (mcast)
+		updateflags |= IN6_IFAUPDATE_DADDELAY;
+	error = in6_update_ifa(ifp, &ifra, NULL, updateflags, M_WAITOK);
+	if (error != 0) {
 		nd6log((LOG_ERR,
 		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
 		    ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp),
@@ -2032,6 +3650,8 @@ in6_ifadd(
 	return(ia);		/* this must NOT be NULL. */
 }
 
+#define	IA6_NONCONST(i) ((struct in6_ifaddr *)(uintptr_t)(i))
+
 int
 in6_tmpifadd(
 	const struct in6_ifaddr *ia0, /* corresponding public address */
@@ -2043,14 +3663,18 @@ in6_tmpifadd(
 	struct in6_aliasreq ifra;
 	int i, error;
 	int trylimit = 3;	/* XXX: adhoc value */
+	int updateflags;
 	u_int32_t randid[2];
 	time_t vltime0, pltime0;
 	struct timeval timenow;
+	struct in6_addr addr;
+	struct nd_prefix *ndpr;
 
 	getmicrotime(&timenow);
 
 	bzero(&ifra, sizeof(ifra));
 	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
+	IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa);
 	ifra.ifra_addr = ia0->ia_addr;
 	/* copy prefix mask */
 	ifra.ifra_prefixmask = ia0->ia_prefixmask;
@@ -2059,24 +3683,26 @@ in6_tmpifadd(
 		ifra.ifra_addr.sin6_addr.s6_addr32[i]
 			&= ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
 	}
+	addr = ia0->ia_addr.sin6_addr;
+	IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa);
 
-  again:
+again:
 	in6_get_tmpifid(ifp, (u_int8_t *)randid,
-			(const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8],
-			forcegen);
-	ifra.ifra_addr.sin6_addr.s6_addr32[2]
-		|= (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
-	ifra.ifra_addr.sin6_addr.s6_addr32[3]
-		|= (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
+	    (const u_int8_t *)&addr.s6_addr[8], forcegen);
+
+	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
+	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
+	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
+	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
 
 	/*
-	 * If by chance the new temporary address is the same as an address
-	 * already assigned to the interface, generate a new randomized
-	 * interface identifier and repeat this step.
-	 * RFC 3041 3.3 (4).
+	 * in6_get_tmpifid() quite likely provided a unique interface ID.
+	 * However, we may still have a chance to see collision, because
+	 * there may be a time lag between generation of the ID and generation
+	 * of the address.  So, we'll do one more sanity check.
 	 */
 	if ((ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr)) != NULL) {
-		ifafree(&ia->ia_ifa);
+		IFA_REMREF(&ia->ia_ifa);
 		if (trylimit-- == 0) {
 			nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find "
 			    "a unique random IFID\n"));
@@ -2093,25 +3719,22 @@ in6_tmpifadd(
          * of the public address or TEMP_PREFERRED_LIFETIME -
          * DESYNC_FACTOR.
 	 */
-	if (ia0->ia6_lifetime.ia6t_expire != 0) {
-		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
-			(ia0->ia6_lifetime.ia6t_expire - timenow.tv_sec);
-		if (vltime0 > ip6_temp_valid_lifetime)
-			vltime0 = ip6_temp_valid_lifetime;
-	} else
+	IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa);
+	vltime0 = IFA6_IS_INVALID(ia0)
+	    ? 0
+	    : (ia0->ia6_lifetime.ia6t_vltime -
+	      (timenow.tv_sec - ia0->ia6_updatetime));
+	if (vltime0 > ip6_temp_valid_lifetime)
 		vltime0 = ip6_temp_valid_lifetime;
-	if (ia0->ia6_lifetime.ia6t_preferred != 0) {
-		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
-			(ia0->ia6_lifetime.ia6t_preferred - timenow.tv_sec);
-		if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor){
-			pltime0 = ip6_temp_preferred_lifetime -
-				ip6_desync_factor;
-		}
-	} else
+	pltime0 = IFA6_IS_DEPRECATED(ia0)
+	    ? 0
+	    : (ia0->ia6_lifetime.ia6t_pltime -
+	      (timenow.tv_sec - ia0->ia6_updatetime));
+	if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor)
 		pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor;
 	ifra.ifra_lifetime.ia6t_vltime = vltime0;
 	ifra.ifra_lifetime.ia6t_pltime = pltime0;
-
+	IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa);
 	/*
 	 * A temporary address is created only if this calculated Preferred
 	 * Lifetime is greater than REGEN_ADVANCE time units.
@@ -2124,8 +3747,13 @@ in6_tmpifadd(
 	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
 
 	/* allocate ifaddr structure, link into chain, etc. */
-	if ((error = in6_update_ifa(ifp, &ifra, NULL, how)) != 0)
-		return(error);
+	updateflags = 0;
+
+	if (how)
+		updateflags |= IN6_IFAUPDATE_DADDELAY;
+
+	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags, how)) != 0)
+		return (error);
 
 	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
 	if (newia == NULL) {	/* XXX: can it happen? */
@@ -2134,9 +3762,37 @@ in6_tmpifadd(
 		    "no ifaddr\n"));
 		return(EINVAL); /* XXX */
 	}
-	lck_mtx_lock(nd6_mutex);
-	newia->ia6_ndpr = ia0->ia6_ndpr;
-	newia->ia6_ndpr->ndpr_refcnt++;
+	IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa);
+	ndpr = ia0->ia6_ndpr;
+	if (ndpr == NULL) {
+		/*
+		 * We lost the race with another thread that has purged
+		 * ia0 address; in this case, purge the tmp addr as well.
+		 */
+		nd6log((LOG_ERR, "in6_tmpifadd: no public address\n"));
+		VERIFY(!(ia0->ia6_flags & IN6_IFF_AUTOCONF));
+		IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa);
+		in6_purgeaddr(&newia->ia_ifa);
+		IFA_REMREF(&newia->ia_ifa);
+		return (EADDRNOTAVAIL);
+	}
+	NDPR_ADDREF(ndpr);	/* for us */
+	IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa);
+	IFA_LOCK(&newia->ia_ifa);
+	if (newia->ia6_ndpr != NULL) {
+		NDPR_LOCK(newia->ia6_ndpr);
+		VERIFY(newia->ia6_ndpr->ndpr_addrcnt != 0);
+		newia->ia6_ndpr->ndpr_addrcnt--;
+		NDPR_UNLOCK(newia->ia6_ndpr);
+		NDPR_REMREF(newia->ia6_ndpr);	/* release addr reference */
+	}
+	newia->ia6_ndpr = ndpr;
+	NDPR_LOCK(newia->ia6_ndpr);
+	newia->ia6_ndpr->ndpr_addrcnt++;
+	VERIFY(newia->ia6_ndpr->ndpr_addrcnt != 0);
+	NDPR_ADDREF_LOCKED(newia->ia6_ndpr);	/* for addr reference */
+	NDPR_UNLOCK(newia->ia6_ndpr);
+	IFA_UNLOCK(&newia->ia_ifa);
 	/*
 	 * A newly added address might affect the status of other addresses.
 	 * XXX: when the temporary address is generated with a new public
@@ -2145,18 +3801,25 @@ in6_tmpifadd(
 	 * and, in fact, we surely need the check when we create a new
 	 * temporary address due to deprecation of an old temporary address.
 	 */
-	pfxlist_onlink_check(1);
+	lck_mtx_lock(nd6_mutex);
+	pfxlist_onlink_check();
 	lck_mtx_unlock(nd6_mutex);
-	ifafree(&newia->ia_ifa);
+	IFA_REMREF(&newia->ia_ifa);
+
+	/* remove our reference */
+	NDPR_REMREF(ndpr);
 
 	return(0);
-}	    
+}
+#undef IA6_NONCONST
 
 int
 in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 {
 	struct timeval timenow;
 
+	NDPR_LOCK_ASSERT_HELD(ndpr);
+
 	getmicrotime(&timenow);
 	/* check if preferred lifetime > valid lifetime.  RFC2462 5.5.3 (c) */
 	if (ndpr->ndpr_pltime > ndpr->ndpr_vltime) {
@@ -2178,14 +3841,15 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr)
 }
 
 static void
-in6_init_address_ltimes(__unused struct nd_prefix *new, struct in6_addrlifetime *lt6)
+in6_init_address_ltimes(__unused struct nd_prefix *new,
+    struct in6_addrlifetime *lt6, boolean_t is_temporary)
 {
 	struct timeval timenow;
 
 	getmicrotime(&timenow);
 	/* Valid lifetime must not be updated unless explicitly specified. */
 	/* init ia6t_expire */
-	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
+	if (!is_temporary && lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_expire = 0;
 	else {
 		lt6->ia6t_expire = timenow.tv_sec;
@@ -2193,7 +3857,7 @@ in6_init_address_ltimes(__unused struct nd_prefix *new, struct in6_addrlifetime
 	}
 
 	/* init ia6t_preferred */
-	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
+	if (!is_temporary && lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
 		lt6->ia6t_preferred = 0;
 	else {
 		lt6->ia6t_preferred = timenow.tv_sec;
@@ -2281,6 +3945,8 @@ nd6_setdefaultiface(
 {
 	int error = 0;
 	ifnet_t def_ifp = NULL;
+	
+	lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
 
 	ifnet_head_lock_shared();
 	if (ifindex < 0 || if_index < ifindex) {
@@ -2292,12 +3958,21 @@ nd6_setdefaultiface(
 
 	lck_mtx_lock(nd6_mutex);
 	if (nd6_defifindex != ifindex) {
+		struct ifnet *odef_ifp = nd6_defifp;
+
 		nd6_defifindex = ifindex;
 		if (nd6_defifindex > 0)
 			nd6_defifp = def_ifp;
 		else
 			nd6_defifp = NULL;
 
+		if (nd6_defifp != NULL)
+			nd6log((LOG_INFO, "%s: is now the default "
+			    "interface (was %s)\n", if_name(nd6_defifp),
+			    odef_ifp != NULL ? if_name(odef_ifp) : "NONE"));
+		else
+			nd6log((LOG_INFO, "No default interface set\n"));
+
 		/*
 		 * If the Default Router List is empty, install a route
 		 * to the specified interface as default or remove the default
@@ -2306,8 +3981,10 @@ nd6_setdefaultiface(
 		 * we do this here to avoid re-install the default route
 		 * if the list is NOT empty.
 		 */
-		if (TAILQ_FIRST(&nd_defrouter) == NULL)
-			defrouter_select();
+		if (ip6_doscopedroute || TAILQ_FIRST(&nd_defrouter) == NULL) {
+			defrtrlist_sync(nd6_defifp);
+			nd6_prefix_sync(nd6_defifp);
+		}
 
 		/*
 		 * Our current implementation assumes one-to-one maping between
@@ -2316,7 +3993,7 @@ nd6_setdefaultiface(
 		 */
 		scope6_setdefault(nd6_defifp);
 	}
-
 	lck_mtx_unlock(nd6_mutex);
+
 	return(error);
 }
diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c
index 169b7992d..f5c48648e 100644
--- a/bsd/netinet6/raw_ip6.c
+++ b/bsd/netinet6/raw_ip6.c
@@ -138,6 +138,7 @@ extern struct	inpcbhead ripcb;
 extern struct	inpcbinfo ripcbinfo;
 extern u_int32_t	rip_sendspace;
 extern u_int32_t	rip_recvspace;
+extern int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
 
 struct rip6stat rip6stat;
 
@@ -149,7 +150,8 @@ struct rip6stat rip6stat;
 int
 rip6_input(
 	struct	mbuf **mp,
-	int	*offp)
+	int	*offp,
+	int	proto)
 {
 	struct mbuf *m = *mp;
 	register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -157,7 +159,7 @@ rip6_input(
 	struct inpcb *last = 0;
 	struct mbuf *opts = NULL;
 	struct sockaddr_in6 rip6src;
-	int proto = ip6->ip6_nxt;
+	int ret;
 
 	rip6stat.rip6s_ipackets++;
 
@@ -206,11 +208,20 @@ rip6_input(
 			} else
 #endif /*IPSEC*/
 			if (n) {
-				if (last->in6p_flags & IN6P_CONTROLOPTS ||
-				    last->in6p_socket->so_options & SO_TIMESTAMP)
-					ip6_savecontrol(last, &opts, ip6, n);
+				if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 ||
+				    (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+				    (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+					ret = ip6_savecontrol(last, n, &opts);
+					if (ret != 0) {
+						m_freem(n);
+						m_freem(opts);
+						last = in6p;
+						continue;
+					} 
+				}
 				/* strip intermediate headers */
 				m_adj(n, *offp);
+				so_recv_data_stat(last->in6p_socket, m, 0);
 				if (sbappendaddr(&last->in6p_socket->so_rcv,
 						(struct sockaddr *)&rip6src,
 						 n, opts, NULL) == 0) {
@@ -222,7 +233,7 @@ rip6_input(
 		}
 		last = in6p;
 	}
-	lck_rw_done(ripcbinfo.mtx);
+	
 #if IPSEC
 	/*
 	 * Check AH/ESP integrity.
@@ -235,11 +246,21 @@ rip6_input(
 	} else
 #endif /*IPSEC*/
 	if (last) {
-		if (last->in6p_flags & IN6P_CONTROLOPTS ||
-		    last->in6p_socket->so_options & SO_TIMESTAMP)
-			ip6_savecontrol(last, &opts, ip6, m);
+		if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 ||
+		    (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+		    (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+			ret = ip6_savecontrol(last, m, &opts);
+			if (ret != 0) {
+				m_freem(m);
+				m_freem(opts);
+				ip6stat.ip6s_delivered--;
+				goto unlock;
+			}
+				
+		}
 		/* strip intermediate headers */
 		m_adj(m, *offp);
+		so_recv_data_stat(last->in6p_socket, m, 0);
 		if (sbappendaddr(&last->in6p_socket->so_rcv,
 				(struct sockaddr *)&rip6src, m, opts, NULL) == 0) {
 			rip6stat.rip6s_fullsock++;
@@ -259,6 +280,10 @@ rip6_input(
 		}
 		ip6stat.ip6s_delivered--;
 	}
+
+unlock:
+	lck_rw_done(ripcbinfo.mtx);
+
 	return IPPROTO_DONE;
 }
 
@@ -270,6 +295,7 @@ rip6_ctlinput(
 {
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
+	void *cmdarg = NULL;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
@@ -294,6 +320,7 @@ rip6_ctlinput(
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
+		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
@@ -302,7 +329,7 @@ rip6_ctlinput(
 	}
 
 	(void) in6_pcbnotify(&ripcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
-			     0, cmd, notify);
+			     0, cmd, cmdarg, notify);
 }
 
 /*
@@ -314,7 +341,8 @@ rip6_output(
 	register struct mbuf *m,
 	struct socket *so,
 	struct sockaddr_in6 *dstsock,
-	struct mbuf *control)
+	struct mbuf *control,
+	int israw)
 {
 	struct in6_addr *dst;
 	struct ip6_hdr *ip6;
@@ -322,25 +350,29 @@ rip6_output(
 	u_int	plen = m->m_pkthdr.len;
 	int error = 0;
 	struct ip6_pktopts opt, *optp = 0;
+	struct ip6_moptions *im6o = NULL;
 	struct ifnet *oifp = NULL;
 	int type = 0, code = 0;		/* for ICMPv6 output statistics only */
-	int priv = 0;
-#if PKT_PRIORITY
-	mbuf_traffic_class_t mtc = MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
+	mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+	int flags = IPV6_OUTARGS;
+
+	if (dstsock && IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) {
+		m_freem(m);
+		return (EINVAL);
+	}
 
 	in6p = sotoin6pcb(so);
 
-	priv = 0;
-	if (so->so_uid == 0)
-		priv = 1;
+	ip6oa.ip6oa_boundif = (in6p->inp_flags & INP_BOUND_IF) ?
+	    in6p->inp_boundif : IFSCOPE_NONE;
+	ip6oa.ip6oa_nocell = (in6p->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
+
 	dst = &dstsock->sin6_addr;
 	if (control) {
-#if PKT_PRIORITY
 		mtc = mbuf_traffic_class_from_control(control);
-#endif /* PKT_PRIORITY */
 
-		if ((error = ip6_setpktoptions(control, &opt, priv, 0)) != 0)
+		if ((error = ip6_setpktopts(control, &opt, NULL, so->so_proto->pr_protocol)) != 0)
 			goto bad;
 		optp = &opt;
 	} else
@@ -374,6 +406,8 @@ rip6_output(
 	 */
 	ip6->ip6_dst = *dst;
 
+	im6o = in6p->in6p_moptions;
+
 	/*
 	 * If the scope of the destination is link-local, embed the interface
 	 * index in the address.
@@ -382,7 +416,13 @@ rip6_output(
 	 */
 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
 		struct in6_pktinfo *pi;
+		struct ifnet *im6o_multicast_ifp = NULL;
 
+		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && im6o != NULL) {
+			IM6O_LOCK(im6o);
+			im6o_multicast_ifp = im6o->im6o_multicast_ifp;
+			IM6O_UNLOCK(im6o);
+		}
 		/*
 		 * XXX Boundary check is assumed to be already done in
 		 * ip6_setpktoptions().
@@ -391,10 +431,12 @@ rip6_output(
 		if (optp && (pi = optp->ip6po_pktinfo) && pi->ipi6_ifindex) {
 			ip6->ip6_dst.s6_addr16[1] = htons(pi->ipi6_ifindex);
 			oifp = ifindex2ifnet[pi->ipi6_ifindex];
+			if (oifp != NULL)
+				ifnet_reference(oifp);
 		} else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
-			 in6p->in6p_moptions &&
-			 in6p->in6p_moptions->im6o_multicast_ifp) {
-			oifp = in6p->in6p_moptions->im6o_multicast_ifp;
+		    im6o != NULL && im6o_multicast_ifp != NULL) {
+			oifp = im6o_multicast_ifp;
+			ifnet_reference(oifp);
 			ip6->ip6_dst.s6_addr16[1] = htons(oifp->if_index);
 		} else if (dstsock->sin6_scope_id) {
 			/* 
@@ -421,11 +463,13 @@ rip6_output(
 		struct in6_addr *in6a;
 		struct in6_addr	storage;
 		u_short index = 0;
-		if ((in6a = in6_selectsrc(dstsock, optp,
-					  in6p->in6p_moptions,
-					  &in6p->in6p_route,
-					  &in6p->in6p_laddr,
-					  &storage, &error)) == 0) {
+		
+		if (israw != 0 && optp && optp->ip6po_pktinfo && !IN6_IS_ADDR_UNSPECIFIED(&optp->ip6po_pktinfo->ipi6_addr)) {
+			in6a = &optp->ip6po_pktinfo->ipi6_addr;
+			flags |= IPV6_FLAG_NOSRCIFSEL;
+		} else if ((in6a = in6_selectsrc(dstsock, optp, in6p,
+		    &in6p->in6p_route, NULL, &storage, ip6oa.ip6oa_boundif,
+		    &error)) == 0) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			goto bad;
@@ -436,11 +480,15 @@ rip6_output(
 			if (in6p->in6p_route.ro_rt->rt_ifp != NULL)
 				index = in6p->in6p_route.ro_rt->rt_ifp->if_index;
 			RT_UNLOCK(in6p->in6p_route.ro_rt);
+			if (oifp != NULL)
+				ifnet_release(oifp);
 			ifnet_head_lock_shared();
 			if (index == 0 || if_index < index) {
 				panic("bad if_index on interface from route");
 			}
 			oifp = ifindex2ifnet[index];
+			if (oifp != NULL)
+				ifnet_reference(oifp);
 			ifnet_head_done();
 		}
 	}
@@ -463,7 +511,7 @@ rip6_output(
 			off = offsetof(struct icmp6_hdr, icmp6_cksum);
 		else
 			off = in6p->in6p_cksum;
-		if (plen < off + 1) {
+		if (plen < (unsigned int)(off + 1)) {
 			error = EINVAL;
 			goto bad;
 		}
@@ -494,26 +542,48 @@ rip6_output(
 		in6p->in6p_route.ro_rt = NULL;
 	}
 
-#if PKT_PRIORITY
-	set_traffic_class(m, so, mtc);
-#endif /* PKT_PRIORITY */
+	if (oifp != NULL) {
+		ifnet_release(oifp);
+		oifp = NULL;
+	}
 
-	error = ip6_output(m, optp, &in6p->in6p_route, 0,
-			   in6p->in6p_moptions, &oifp, 0);
+	set_packet_tclass(m, so, mtc, 1);
+	
+	if (im6o != NULL)
+		IM6O_ADDREF(im6o);
 
-#if IFNET_ROUTE_REFCNT
-	/*
-	 * Always discard the cached route for unconnected socket
-	 * or if it is a multicast route.
-	 */
-	if (in6p->in6p_route.ro_rt != NULL &&
-	    ((in6p->in6p_route.ro_rt->rt_flags & RTF_MULTICAST) ||
-	    in6p->in6p_socket == NULL ||
-	    in6p->in6p_socket->so_state != SS_ISCONNECTED)) {
-		rtfree(in6p->in6p_route.ro_rt);
-		in6p->in6p_route.ro_rt = NULL;
+	error = ip6_output(m, optp, &in6p->in6p_route, flags, im6o,
+	    &oifp, &ip6oa);
+
+	if (im6o != NULL)
+		IM6O_REMREF(im6o);
+
+	if (in6p->in6p_route.ro_rt != NULL) {
+		struct rtentry *rt = in6p->in6p_route.ro_rt;
+		unsigned int outif;
+
+		if ((rt->rt_flags & RTF_MULTICAST) ||
+		    in6p->in6p_socket == NULL ||
+		    !(in6p->in6p_socket->so_state & SS_ISCONNECTED)) {
+			rt = NULL;	/* unusable */
+		}
+		/*
+		 * Always discard the cached route for unconnected
+		 * socket or if it is a multicast route.
+		 */
+		if (rt == NULL) {
+			rtfree(in6p->in6p_route.ro_rt);
+			in6p->in6p_route.ro_rt = NULL;
+		}
+		/*
+		 * If this is a connected socket and the destination
+		 * route is not multicast, update outif with that of
+		 * the route interface index used by IP.
+		 */
+		if (rt != NULL &&
+		    (outif = rt->rt_ifp->if_index) != in6p->in6p_last_outif)
+			in6p->in6p_last_outif = outif;
 	}
-#endif /* IFNET_ROUTE_REFCNT */
 
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		if (oifp)
@@ -535,9 +605,11 @@ rip6_output(
 	}
 	if (control) {
 		if (optp == &opt)
-			ip6_clearpktopts(optp, 0, -1);
+			ip6_clearpktopts(optp, -1);
 		m_freem(control);
 	}
+	if (oifp != NULL)
+		ifnet_release(oifp);
 	return(error);
 }
 
@@ -594,8 +666,13 @@ rip6_ctloutput(
 		case MRT6_PIM:
 #if MROUTING
 			error = ip6_mrouter_get(so, sopt);
+#else
+			error = ENOPROTOOPT;
+#endif /* MROUTING */
+			break;
+		case IPV6_CHECKSUM:
+			error = ip6_raw_ctloutput(so, sopt);
 			break;
-#endif
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
@@ -627,8 +704,13 @@ rip6_ctloutput(
 		case MRT6_PIM:
 #if MROUTING
 			error = ip6_mrouter_set(so, sopt);
-			break;
+#else
+			error = ENOPROTOOPT;
 #endif
+			break;
+		case IPV6_CHECKSUM:
+			error = ip6_raw_ctloutput(so, sopt);
+			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
@@ -714,7 +796,8 @@ rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
-	struct ifaddr *ia = NULL;
+	struct ifaddr *ifa = NULL;
+	unsigned int outif = 0;
 
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
@@ -727,18 +810,23 @@ rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
-	    (ia = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)
+	    (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)
 		return EADDRNOTAVAIL;
-	if (ia &&
-	    ((struct in6_ifaddr *)ia)->ia6_flags &
-	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
-	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
-	    if (ia) ifafree(ia);
-		return(EADDRNOTAVAIL);
+	if (ifa != NULL) {
+		IFA_LOCK(ifa);
+		if (((struct in6_ifaddr *)ifa)->ia6_flags &
+		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
+		     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
+			IFA_UNLOCK(ifa);
+			IFA_REMREF(ifa);
+			return(EADDRNOTAVAIL);
+		}
+		outif = ifa->ifa_ifp->if_index;
+		IFA_UNLOCK(ifa);
+		IFA_REMREF(ifa);
 	}
-	if (ia != NULL)
-		ifafree(ia);
 	inp->in6p_laddr = addr->sin6_addr;
+	inp->in6p_last_outif = outif;
 	return 0;
 }
 
@@ -753,6 +841,7 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 #if ENABLE_DEFAULT_SCOPE
 	struct sockaddr_in6 tmp;
 #endif
+	unsigned int outif = 0, ifscope;
 
 	if (nam->sa_len != sizeof(*addr))
 		return EINVAL;
@@ -768,14 +857,20 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p)
 		addr->sin6_scope_id = scope6_addr2default(&addr->sin6_addr);
 	}
 #endif
+
+	ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+	    inp->inp_boundif : IFSCOPE_NONE;
+
 	/* Source address selection. XXX: need pcblookup? */
-	in6a = in6_selectsrc(addr, inp->in6p_outputopts,
-			     inp->in6p_moptions, &inp->in6p_route,
-			     &inp->in6p_laddr, &storage, &error);
+	in6a = in6_selectsrc(addr, inp->in6p_outputopts, inp, &inp->in6p_route,
+	    NULL, &storage, ifscope, &error);
 	if (in6a == NULL)
 		return (error ? error : EADDRNOTAVAIL);
 	inp->in6p_laddr = *in6a;
 	inp->in6p_faddr = addr->sin6_addr;
+	if (inp->in6p_route.ro_rt != NULL)
+		outif = inp->in6p_route.ro_rt->rt_ifp->if_index;
+	inp->in6p_last_outif = outif;
 	soisconnected(so);
 	return 0;
 }
@@ -788,12 +883,13 @@ rip6_shutdown(struct socket *so)
 }
 
 static int
-rip6_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam,
-	 struct mbuf *control, __unused struct proc *p)
+rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
+	 struct mbuf *control, struct proc *p)
 {
+#pragma unused(flags, p)
 	struct inpcb *inp = sotoinpcb(so);
 	struct sockaddr_in6 tmp;
-	struct sockaddr_in6 *dst;
+	struct sockaddr_in6 *dst = (struct sockaddr_in6 *)nam;
 
 	/* always copy sockaddr to avoid overwrites */
 	if (so->so_state & SS_ISCONNECTED) {
@@ -821,7 +917,7 @@ rip6_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr
 		dst->sin6_scope_id = scope6_addr2default(&dst->sin6_addr);
 	}
 #endif
-	return rip6_output(m, so, dst, control);
+	return rip6_output(m, so, dst, control, 1);
 }
 
 struct pr_usrreqs rip6_usrreqs = {
diff --git a/bsd/netinet6/route6.c b/bsd/netinet6/route6.c
index 36617f2d0..a0dc6c6a6 100644
--- a/bsd/netinet6/route6.c
+++ b/bsd/netinet6/route6.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*	$FreeBSD: src/sys/netinet6/route6.c,v 1.1.2.3 2001/07/03 11:01:55 ume Exp $	*/
 /*	$KAME: route6.c,v 1.24 2001/03/14 03:07:05 itojun Exp $	*/
 
@@ -52,8 +80,9 @@ static int ip6_rthdr0(struct mbuf *, struct ip6_hdr *,
 #endif /* IP6_RTHDR0_ALLOWED */
 
 int
-route6_input(struct mbuf **mp, int *offp)
+route6_input(struct mbuf **mp, int *offp, int proto)
 {
+#pragma unused(proto)
 	struct ip6_hdr *ip6;
 	struct mbuf *m = *mp;
 	struct ip6_rthdr *rh;
@@ -143,7 +172,7 @@ ip6_rthdr0(m, ip6, rh0)
 	struct ip6_rthdr0 *rh0;
 {
 	int addrs, index;
-	struct in6_addr *nextaddr, tmpaddr;
+	struct in6_addr *nextaddr, tmpaddr, ia6 = NULL;
 	struct route_in6 ip6forward_rt;
 
 	if (rh0->ip6r0_segleft == 0)
@@ -156,20 +185,20 @@ ip6_rthdr0(m, ip6, rh0)
 		) {
 		/*
 		 * Type 0 routing header can't contain more than 23 addresses.
-		 * RFC 2462: this limitation was removed since stict/loose
+		 * RFC 2462: this limitation was removed since strict/loose
 		 * bitmap field was deleted.
 		 */
 		ip6stat.ip6s_badoptions++;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 			    (caddr_t)&rh0->ip6r0_len - (caddr_t)ip6);
-		return(-1);
+		return (-1);
 	}
 
 	if ((addrs = rh0->ip6r0_len / 2) < rh0->ip6r0_segleft) {
 		ip6stat.ip6s_badoptions++;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 			    (caddr_t)&rh0->ip6r0_segleft - (caddr_t)ip6);
-		return(-1);
+		return (-1);
 	}
 
 	index = addrs - rh0->ip6r0_segleft;
@@ -188,7 +217,7 @@ ip6_rthdr0(m, ip6, rh0)
 	    IN6_IS_ADDR_V4COMPAT(nextaddr)) {
 		ip6stat.ip6s_badoptions++;
 		m_freem(m);
-		return(-1);
+		return (-1);
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst) ||
@@ -196,16 +225,31 @@ ip6_rthdr0(m, ip6, rh0)
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badoptions++;
 		m_freem(m);
-		return(-1);
+		return (-1);
+	}
+
+	/*
+	 * Determine the scope zone of the next hop, based on the interface
+	 * of the current hop. [RFC4007, Section 9]
+	 * Then disambiguate the scope zone for the next hop (if necessary).
+	 */
+	if ((ia6 = ip6_getdstifaddr(m)) == NULL)
+		goto bad;
+	if (in6_setscope(nextaddr, ia6->ia_ifp, NULL) != 0) {
+		ip6stat.ip6s_badscope++;
+		IFA_REMREF(&ia6->ia_ifa);
+		ia6 = NULL;
+		goto bad;
 	}
+	IFA_REMREF(&ia6->ia_ifa);
+	ia6 = NULL;
 
 	/*
 	 * Swap the IPv6 destination address and nextaddr. Forward the packet.
 	 */
 	tmpaddr = *nextaddr;
 	*nextaddr = ip6->ip6_dst;
-	if (IN6_IS_ADDR_LINKLOCAL(nextaddr))
-		nextaddr->s6_addr16[1] = 0;
+	in6_clearscope(nextaddr); /* XXX */
 	ip6->ip6_dst = tmpaddr;
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
 		ip6->ip6_dst.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
diff --git a/bsd/netinet6/scope6.c b/bsd/netinet6/scope6.c
index 70e90dfa9..2d4eedf76 100644
--- a/bsd/netinet6/scope6.c
+++ b/bsd/netinet6/scope6.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*	$FreeBSD: src/sys/netinet6/scope6.c,v 1.3 2002/03/25 10:12:51 ume Exp $	*/
 /*	$KAME: scope6.c,v 1.10 2000/07/24 13:29:31 itojun Exp $	*/
 
@@ -36,6 +64,8 @@
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/mcache.h>
 
 #include <net/route.h>
 #include <net/if.h>
@@ -47,13 +77,12 @@
 
 extern lck_mtx_t *scope6_mutex;
 
-struct scope6_id {
-	/*
-	 * 16 is correspondent to 4bit multicast scope field.
-	 * i.e. from node-local to global with some reserved/unassigned types.
-	 */
-	u_int32_t s6id_list[16];
-};
+#ifdef ENABLE_DEFAULT_SCOPE
+int ip6_use_defzone = 1;
+#else
+int ip6_use_defzone = 0;
+#endif
+
 static size_t if_scope_indexlim = 8;
 struct scope6_id *scope6_ids = NULL;
 
@@ -103,6 +132,7 @@ scope6_ifattach(
 	 * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard.
 	 * Should we rather hardcode here?
 	 */
+	SID.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index;
 	SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index;
 #if MULTI_SCOPE
 	/* by default, we don't care about scope boundary for these scopes. */
@@ -133,14 +163,14 @@ scope6_set(
 
 	/*
 	 * TODO(XXX): after setting, we should reflect the changes to
-	 * interface addresses, routing table entries, PCB entries... 
+	 * interface addresses, routing table entries, PCB entries...
 	 */
 
 	lck_mtx_lock(scope6_mutex);
 	for (i = 0; i < 16; i++) {
 		if (idlist[i] &&
 		    idlist[i] != scope6_ids[ifp->if_index].s6id_list[i]) {
-			if (i == IPV6_ADDR_SCOPE_LINKLOCAL &&
+			if (i == IPV6_ADDR_SCOPE_INTFACELOCAL &&
 			    idlist[i] > if_index) {
 				/*
 				 * XXX: theoretically, there should be no
@@ -216,8 +246,8 @@ struct in6_addr *addr;
 		 * return scope doesn't work.
 		 */
 		switch (scope) {
-		case IPV6_ADDR_SCOPE_NODELOCAL:
-			return IPV6_ADDR_SCOPE_NODELOCAL;
+		case IPV6_ADDR_SCOPE_INTFACELOCAL:
+			return IPV6_ADDR_SCOPE_INTFACELOCAL;
 			break;
 		case IPV6_ADDR_SCOPE_LINKLOCAL:
 			return IPV6_ADDR_SCOPE_LINKLOCAL;
@@ -231,11 +261,15 @@ struct in6_addr *addr;
 		}
 	}
 
+	/*
+	 * Regard loopback and unspecified addresses as global, since
+	 * they have no ambiguity.
+	 */
 	if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) {
 		if (addr->s6_addr8[15] == 1) /* loopback */
-			return IPV6_ADDR_SCOPE_NODELOCAL;
-		if (addr->s6_addr8[15] == 0) /* unspecified */
 			return IPV6_ADDR_SCOPE_LINKLOCAL;
+		if (addr->s6_addr8[15] == 0) /* unspecified */
+			return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */
 	}
 
 	return IPV6_ADDR_SCOPE_GLOBAL;
@@ -282,6 +316,106 @@ in6_addr2scopeid(
 	return retid;
 }
 
+/*
+ * Validate the specified scope zone ID in the sin6_scope_id field.  If the ID
+ * is unspecified (=0), needs to be specified, and the default zone ID can be
+ * used, the default value will be used.
+ * This routine then generates the kernel-internal form: if the address scope
+ * of is interface-local or link-local, embed the interface index in the
+ * address.
+ */
+int
+sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
+{
+	struct ifnet *ifp;
+	u_int32_t zoneid;
+
+	if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
+		zoneid = scope6_addr2default(&sin6->sin6_addr);
+
+	if (zoneid != 0 &&
+	    (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr))) {
+		/*
+		 * At this moment, we only check interface-local and
+		 * link-local scope IDs, and use interface indices as the
+		 * zone IDs assuming a one-to-one mapping between interfaces
+		 * and links.
+		 */
+		if (if_index < zoneid)
+			return (ENXIO);
+		ifnet_head_lock_shared();
+		ifp = ifindex2ifnet[zoneid];
+		if (ifp == NULL) {/* XXX: this can happen for some OS */
+			ifnet_head_done();
+			return (ENXIO);
+		}
+		ifnet_head_done();
+		/* XXX assignment to 16bit from 32bit variable */
+		sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);
+
+		sin6->sin6_scope_id = 0;
+	}
+
+	return 0;
+}
+
+void
+rtkey_to_sa6(struct rtentry *rt, struct sockaddr_in6 *sin6)
+{
+	VERIFY(rt_key(rt)->sa_family == AF_INET6);
+
+	*sin6 = *((struct sockaddr_in6 *)rt_key(rt));
+	sin6->sin6_scope_id = 0;
+}
+
+void
+rtgw_to_sa6(struct rtentry *rt, struct sockaddr_in6 *sin6)
+{
+	VERIFY(rt->rt_flags & RTF_GATEWAY);
+
+	*sin6 = *((struct sockaddr_in6 *)rt->rt_gateway);
+	sin6->sin6_scope_id = 0;
+}
+
+/*
+ * generate standard sockaddr_in6 from embedded form.
+ */
+int
+sa6_recoverscope(struct sockaddr_in6 *sin6)
+{
+	u_int32_t zoneid;
+
+	if (sin6->sin6_scope_id != 0) {
+		log(LOG_NOTICE,
+		    "sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
+		    ip6_sprintf(&sin6->sin6_addr), sin6->sin6_scope_id);
+		/* XXX: proceed anyway... */
+	}
+	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
+	    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
+		/*
+		 * KAME assumption: link id == interface id
+		 */
+		zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]);
+		if (zoneid) {
+			/* sanity check */
+			if (if_index < zoneid)
+				return (ENXIO);
+			ifnet_head_lock_shared();
+			if (ifindex2ifnet[zoneid] == NULL) {
+				ifnet_head_done();
+				return (ENXIO);
+			}
+			ifnet_head_done();
+			sin6->sin6_addr.s6_addr16[1] = 0;
+			sin6->sin6_scope_id = zoneid;
+		}
+	}
+
+	return 0;
+}
+
 void
 scope6_setdefault(
 	struct ifnet *ifp)	/* note that this might be NULL */
@@ -294,11 +428,14 @@ scope6_setdefault(
 	 */
 	lck_mtx_lock(scope6_mutex);
 	if (ifp) {
+		scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] =
+			ifp->if_index;
 		scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] =
 			ifp->if_index;
-	}
-	else
+	} else {
+		scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = 0;
 		scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = 0;
+	}
 	lck_mtx_unlock(scope6_mutex);
 }
 
@@ -328,3 +465,93 @@ scope6_addr2default(
 	lck_mtx_unlock(scope6_mutex);
 	return (id);
 }
+
+/*
+ * Determine the appropriate scope zone ID for in6 and ifp.  If ret_id is
+ * non NULL, it is set to the zone ID.  If the zone ID needs to be embedded
+ * in the in6_addr structure, in6 will be modified.
+ *
+ * ret_id - unnecessary?
+ */
+int
+in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
+{
+	int scope;
+	u_int32_t zoneid = 0;
+	int index = ifp->if_index;
+
+#ifdef DIAGNOSTIC
+	if (scope6_ids == NULL) { /* should not happen */
+		panic("in6_setscope: scope array is NULL");
+		/* NOTREACHED */
+	}
+#endif
+
+	/*
+	 * special case: the loopback address can only belong to a loopback
+	 * interface.
+	 */
+	if (IN6_IS_ADDR_LOOPBACK(in6)) {
+		if (!(ifp->if_flags & IFF_LOOPBACK)) {
+			return (EINVAL);
+		} else {
+			if (ret_id != NULL)
+				*ret_id = 0; /* there's no ambiguity */
+			return (0);
+		}
+	}
+
+	scope = in6_addrscope(in6);
+
+#define SID scope6_ids[index]
+	lck_mtx_lock(scope6_mutex);
+	switch (scope) {
+	case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
+		zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
+		break;
+
+	case IPV6_ADDR_SCOPE_LINKLOCAL:
+		zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
+		break;
+
+	case IPV6_ADDR_SCOPE_SITELOCAL:
+		zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
+		break;
+
+	case IPV6_ADDR_SCOPE_ORGLOCAL:
+		zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
+		break;
+#undef SID
+	default:
+		zoneid = 0;	/* XXX: treat as global. */
+		break;
+	}
+	lck_mtx_unlock(scope6_mutex);
+
+	if (ret_id != NULL)
+		*ret_id = zoneid;
+
+	if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6))
+		in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+
+	return (0);
+}
+
+/*
+ * Just clear the embedded scope identifier.  Return 0 if the original address
+ * is intact; return non 0 if the address is modified.
+ */
+int
+in6_clearscope(struct in6_addr *in6)
+{
+	int modified = 0;
+
+	if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
+		if (in6->s6_addr16[1] != 0)
+			modified = 1;
+		in6->s6_addr16[1] = 0;
+	}
+
+	return (modified);
+}
+
diff --git a/bsd/netinet6/scope6_var.h b/bsd/netinet6/scope6_var.h
index 2b3a9954a..d028aefb8 100644
--- a/bsd/netinet6/scope6_var.h
+++ b/bsd/netinet6/scope6_var.h
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*	$FreeBSD: src/sys/netinet6/scope6_var.h,v 1.1.2.1 2000/07/15 07:14:38 kris Exp $	*/
 /*	$KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $	*/
 
@@ -35,13 +63,31 @@
 #include <sys/appleapiopts.h>
 
 #ifdef KERNEL_PRIVATE
+
+struct scope6_id {
+	/*
+	 * 16 is correspondent to 4bit multicast scope field.
+	 * i.e. from node-local to global with some reserved/unassigned types.
+	 */
+	u_int32_t s6id_list[16];
+};
+
+void	scope6_init (void);
 int	scope6_ifattach(struct ifnet *);
+void scope6_ifdetach (struct scope6_id *);
 int	scope6_set(struct ifnet *, u_int32_t *);
 int	scope6_get(struct ifnet *, u_int32_t *);
 void	scope6_setdefault(struct ifnet *);
 int	scope6_get_default(u_int32_t *);
 u_int32_t scope6_in6_addrscope(struct in6_addr *);
 u_int32_t scope6_addr2default(struct in6_addr *);
+int	sa6_embedscope (struct sockaddr_in6 *, int);
+int	sa6_recoverscope (struct sockaddr_in6 *);
+int	in6_setscope (struct in6_addr *, struct ifnet *, u_int32_t *);
+int	in6_clearscope (struct in6_addr *);
+extern void rtkey_to_sa6(struct rtentry *, struct sockaddr_in6 *);
+extern void rtgw_to_sa6(struct rtentry *, struct sockaddr_in6 *);
+
 #endif /* KERNEL_PRIVATE */
 
 #endif /* _NETINET6_SCOPE6_VAR_H_ */
diff --git a/bsd/netinet6/tcp6_var.h b/bsd/netinet6/tcp6_var.h
index 9d7c44968..5fded19e6 100644
--- a/bsd/netinet6/tcp6_var.h
+++ b/bsd/netinet6/tcp6_var.h
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
@@ -79,8 +107,8 @@ extern	int tcp_v6mssdflt;	/* XXX */
 struct	ip6_hdr;
 void	tcp6_ctlinput(int, struct sockaddr *, void *);
 void	tcp6_init(void);
-int	tcp6_input(struct mbuf **, int *);
-struct	rtentry *tcp_rtlookup6(struct inpcb *);
+int	tcp6_input(struct mbuf **, int *, int);
+struct	rtentry *tcp_rtlookup6(struct inpcb *, unsigned int);
 
 extern struct	pr_usrreqs tcp6_usrreqs;
 
diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c
index e3d3198f4..0fb9a6993 100644
--- a/bsd/netinet6/udp6_output.c
+++ b/bsd/netinet6/udp6_output.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -107,9 +107,12 @@
 #include <sys/proc.h>
 #include <sys/syslog.h>
 
+#include <machine/endian.h>
+
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_types.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
@@ -177,27 +180,28 @@ udp6_output(in6p, m, addr6, control, p)
 	struct in6_addr *laddr, *faddr;
 	u_short fport;
 	int error = 0;
-	struct ip6_pktopts opt, *stickyopt = in6p->in6p_outputopts;
-	int priv;
+	struct ip6_pktopts opt, *optp = NULL;
+	struct ip6_moptions *im6o;
 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
 	int flags;
 	struct sockaddr_in6 tmp;
 	struct	in6_addr storage;
-#if PKT_PRIORITY
-	mbuf_traffic_class_t mtc = MBUF_TC_NONE;
-#endif /* PKT_PRIORITY */
+	mbuf_traffic_class_t mtc = MBUF_TC_UNSPEC;
+	struct ip6_out_args ip6oa = { IFSCOPE_NONE, 0 };
+
+	if (in6p->inp_flags & INP_BOUND_IF)
+		ip6oa.ip6oa_boundif = in6p->inp_boundif;
 
-	priv = (proc_suser(p) == 0);
+	ip6oa.ip6oa_nocell = (in6p->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
 
 	if (control) {
-#if PKT_PRIORITY
 		mtc = mbuf_traffic_class_from_control(control);
-#endif /* PKT_PRIORITY */
 
-		if ((error = ip6_setpktoptions(control, &opt, priv, 0)) != 0)
+		if ((error = ip6_setpktopts(control, &opt, NULL, IPPROTO_UDP)) != 0)
 			goto release;
-		in6p->in6p_outputopts = &opt;
-	}
+		optp = &opt;
+	} else
+		optp = in6p->in6p_outputopts;
 
 	if (addr6) {
 		/*
@@ -246,16 +250,16 @@ udp6_output(in6p, m, addr6, control, p)
 		}
 
 		/* KAME hack: embed scopeid */
-		if (in6_embedscope(&sin6->sin6_addr, sin6, in6p, NULL) != 0) {
+		if (in6_embedscope(&sin6->sin6_addr, sin6, in6p, NULL,
+		    optp) != 0) {
 			error = EINVAL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
-			laddr = in6_selectsrc(sin6, in6p->in6p_outputopts,
-					      in6p->in6p_moptions,
-					      &in6p->in6p_route,
-					      &in6p->in6p_laddr, &storage, &error);
+			laddr = in6_selectsrc(sin6, optp,
+			    in6p, &in6p->in6p_route, NULL, &storage,
+			    ip6oa.ip6oa_boundif, &error);
 		} else
 			laddr = &in6p->in6p_laddr;	/* XXX */
 		if (laddr == NULL) {
@@ -333,12 +337,12 @@ udp6_output(in6p, m, addr6, control, p)
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
-		if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
-				sizeof(struct ip6_hdr), plen)) == 0) {
-			udp6->uh_sum = 0xffff;
-		}
+		udp6->uh_sum = in6_cksum_phdr(laddr, faddr,
+		    htonl(plen), htonl(IPPROTO_UDP));
+		m->m_pkthdr.csum_flags = CSUM_UDPIPV6;
+		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 
-		flags = 0;
+		flags = IPV6_OUTARGS;
 
 		udp6stat.udp6s_opackets++;
 #ifdef IPSEC
@@ -348,26 +352,50 @@ udp6_output(in6p, m, addr6, control, p)
 		}
 #endif /*IPSEC*/
 		m->m_pkthdr.socket_id = get_socket_id(in6p->in6p_socket);
+
+		set_packet_tclass(m, in6p->in6p_socket, mtc, 1);
+
+		im6o = in6p->in6p_moptions;
+		if (im6o != NULL)
+			IM6O_ADDREF(im6o);
+
+		error = ip6_output(m, optp, &in6p->in6p_route,
+		    flags, im6o, NULL, &ip6oa);
+
+		if (im6o != NULL)
+			IM6O_REMREF(im6o);
 		
-#if PKT_PRIORITY
-		set_traffic_class(m, in6p->in6p_socket, mtc);
-#endif /* PKT_PRIORITY */
-		error = ip6_output(m, in6p->in6p_outputopts, &in6p->in6p_route,
-		    flags, in6p->in6p_moptions, NULL, 0);
+		if (error == 0 && nstat_collect) {
+			locked_add_64(&in6p->inp_stat->txpackets, 1);
+			locked_add_64(&in6p->inp_stat->txbytes, ulen);
+		}
 
-#if IFNET_ROUTE_REFCNT
-		/*
-		 * Always discard the cached route for unconnected socket
-		 * or if it is a multicast route.
-		 */
-		if (in6p->in6p_route.ro_rt != NULL &&
-		    ((in6p->in6p_route.ro_rt->rt_flags & RTF_MULTICAST) ||
-		    in6p->in6p_socket == NULL ||
-		    in6p->in6p_socket->so_state != SS_ISCONNECTED)) {
-			rtfree(in6p->in6p_route.ro_rt);
-			in6p->in6p_route.ro_rt = NULL;
+		if (in6p->in6p_route.ro_rt != NULL) {
+			struct rtentry *rt = in6p->in6p_route.ro_rt;
+			unsigned int outif;
+
+			if ((rt->rt_flags & RTF_MULTICAST) ||
+			    in6p->in6p_socket == NULL ||
+			    !(in6p->in6p_socket->so_state & SS_ISCONNECTED)) {
+				rt = NULL;	/* unusable */
+			}
+			/*
+			 * Always discard the cached route for unconnected
+			 * socket or if it is a multicast route.
+			 */
+			if (rt == NULL) {
+				rtfree(in6p->in6p_route.ro_rt);
+				in6p->in6p_route.ro_rt = NULL;
+			}
+			/*
+			 * If this is a connected socket and the destination
+			 * route is not multicast, update outif with that of
+			 * the route interface index used by IP.
+			 */
+			if (rt != NULL && (outif = rt->rt_ifp->if_index) !=
+			    in6p->in6p_last_outif)
+				in6p->in6p_last_outif = outif;
 		}
-#endif /* IFNET_ROUTE_REFCNT */
 		break;
 	case AF_INET:
 		error = EAFNOSUPPORT;
@@ -380,8 +408,8 @@ release:
 
 releaseopt:
 	if (control) {
-		ip6_clearpktopts(in6p->in6p_outputopts, 0, -1);
-		in6p->in6p_outputopts = stickyopt;
+		if (optp == &opt)
+			ip6_clearpktopts(optp, -1);
 		m_freem(control);
 	}
 	return(error);
diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c
index fed294b90..c88c0d169 100644
--- a/bsd/netinet6/udp6_usrreq.c
+++ b/bsd/netinet6/udp6_usrreq.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 /*	$FreeBSD: src/sys/netinet6/udp6_usrreq.c,v 1.6.2.6 2001/07/29 19:32:40 ume Exp $	*/
 /*	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $	*/
 
@@ -83,6 +111,7 @@
 #include <net/if.h>
 #include <net/route.h>
 #include <net/if_types.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
@@ -104,7 +133,6 @@
 #include <netinet6/ipsec6.h>
 extern int ipsec_bypass;
 #endif /*IPSEC*/
-extern lck_mtx_t *nd6_mutex;
 
 /*
  * UDP protocol inplementation.
@@ -112,7 +140,6 @@ extern lck_mtx_t *nd6_mutex;
  */
 
 extern	struct protosw inetsw[];
-static	int in6_mcmatch(struct inpcb *, struct in6_addr *, struct ifnet *);
 static	int udp6_detach(struct socket *so);
 static void udp6_append(struct inpcb *, struct ip6_hdr *,
     struct sockaddr_in6 *, struct mbuf *, int);
@@ -131,53 +158,37 @@ extern int fw_verbose;
 #define log_in_vain_log( a ) { log a; }
 #endif
 
-static int
-in6_mcmatch(
-	struct inpcb *in6p,
-	register struct in6_addr *ia6,
-	struct ifnet *ifp)
-{
-	struct ip6_moptions *im6o = in6p->in6p_moptions;
-	struct in6_multi_mship *imm;
-
-	if (im6o == NULL)
-		return 0;
-
-	lck_mtx_lock(nd6_mutex);
-	for (imm = im6o->im6o_memberships.lh_first; imm != NULL;
-	     imm = imm->i6mm_chain.le_next) {
-		if ((ifp == NULL ||
-		     imm->i6mm_maddr->in6m_ifp == ifp) &&
-		    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
-				       ia6)) {
-			lck_mtx_unlock(nd6_mutex);
-			return 1;
-		}
-	}
-	lck_mtx_unlock(nd6_mutex);
-	return 0;
-}
-
 /*
  * subroutine of udp6_input(), mainly for source code readability.
  */
 static void
-udp6_append(struct inpcb *last, struct ip6_hdr *ip6,
+udp6_append(struct inpcb *last, __unused struct ip6_hdr *ip6,
     struct sockaddr_in6 *udp_in6, struct mbuf *n, int off)
 {
 	struct  mbuf *opts = NULL;
-
+	int ret = 0;
 #if CONFIG_MACF_NET
 	if (mac_inpcb_check_deliver(last, n, AF_INET6, SOCK_DGRAM) != 0) {
 		m_freem(n);
 		return;
 	}
 #endif
-	if (last->in6p_flags & IN6P_CONTROLOPTS ||
-	    last->in6p_socket->so_options & SO_TIMESTAMP)
-		ip6_savecontrol(last, &opts, ip6, n);
-
+	if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 ||
+	    (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+	    (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+		ret = ip6_savecontrol(last, n, &opts);
+		if (ret != 0) {
+			m_freem(n);
+			m_freem(opts);
+			return;
+		}
+	}
 	m_adj(n, off);
+	if (nstat_collect) {
+		locked_add_64(&last->inp_stat->rxpackets, 1);
+		locked_add_64(&last->inp_stat->rxbytes, n->m_pkthdr.len);
+	}
+	so_recv_data_stat(last->in6p_socket, n, 0);
 	if (sbappendaddr(&last->in6p_socket->so_rcv,
 	    (struct sockaddr *)udp_in6, n, opts, NULL) == 0)
 		udpstat.udps_fullsock++;
@@ -188,20 +199,25 @@ udp6_append(struct inpcb *last, struct ip6_hdr *ip6,
 int
 udp6_input(
 	struct mbuf **mp,
-	int *offp)
+	int *offp,
+	int proto)
 {
+#pragma unused(proto)
 	struct mbuf *m = *mp;
+	struct ifnet *ifp;
 	register struct ip6_hdr *ip6;
 	register struct udphdr *uh;
 	register struct inpcb *in6p;
 	struct  mbuf *opts = NULL;
 	int off = *offp;
-	int plen, ulen;
+	int plen, ulen, ret = 0;
 	struct sockaddr_in6 udp_in6;
 	struct inpcbinfo *pcbinfo = &udbinfo;
+	struct sockaddr_in6 fromsa;
 
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), return IPPROTO_DONE);
 
+	ifp = m->m_pkthdr.rcvif;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 #if defined(NFAITH) && 0 < NFAITH
@@ -223,20 +239,40 @@ udp6_input(
 		goto bad;
 	}
 
+	/* destination port of 0 is illegal, based on RFC768. */
+	if (uh->uh_dport == 0)
+		goto bad;
+
 	/*
 	 * Checksum extended UDP header and data.
 	 */
+	if (uh->uh_sum) {
+		if ((apple_hwcksum_rx != 0) && (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) {
+			uh->uh_sum = m->m_pkthdr.csum_data;
+			uh->uh_sum ^= 0xffff;
+		}
+		else {
+			if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
+				udpstat.udps_badsum++;
+				goto bad;
+			}
+		}
+	}
 #ifndef __APPLE__
-	if (uh->uh_sum == 0)
+	else
 		udpstat.udps_nosum++;
 #endif
-	else if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
-		udpstat.udps_badsum++;
-		goto bad;
-	}
+
+	/*
+	 * Construct sockaddr format source address.
+	 */
+	init_sin6(&fromsa, m);
+	fromsa.sin6_port = uh->uh_sport;
+
 
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		int reuse_sock = 0, mcast_delivered = 0;
+		struct ip6_moptions *imo;
 		struct mbuf *n = NULL;
 
 		/*
@@ -299,11 +335,27 @@ udp6_input(
 				udp_unlock(in6p->in6p_socket, 1, 0);
 				continue;
 			}
-			if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
-				if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
-							&ip6->ip6_dst) &&
-				    !in6_mcmatch(in6p, &ip6->ip6_dst,
-						 m->m_pkthdr.rcvif)) {
+
+			/*
+			 * Handle socket delivery policy for any-source
+			 * and source-specific multicast. [RFC3678]
+			 */
+			imo = in6p->in6p_moptions;
+			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+				struct sockaddr_in6	 mcaddr;
+				int			 blocked;
+
+				IM6O_LOCK(imo);	
+				bzero(&mcaddr, sizeof(struct sockaddr_in6));
+				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
+				mcaddr.sin6_family = AF_INET6;
+				mcaddr.sin6_addr = ip6->ip6_dst;
+
+				blocked = im6o_mc_filter(imo, ifp,
+					(struct sockaddr *)&mcaddr,
+					(struct sockaddr *)&fromsa);
+				IM6O_UNLOCK(imo);	
+				if (blocked != MCAST_PASS) {
 					udp_unlock(in6p->in6p_socket, 1, 0);
 					continue;
 				}
@@ -444,10 +496,21 @@ udp6_input(
 		
 	init_sin6(&udp_in6, m); /* general init */
 	udp_in6.sin6_port = uh->uh_sport;
-	if (in6p->in6p_flags & IN6P_CONTROLOPTS
-	    || in6p->in6p_socket->so_options & SO_TIMESTAMP)
-		ip6_savecontrol(in6p, &opts, ip6, m);
+	if ((in6p->in6p_flags & IN6P_CONTROLOPTS) != 0 || 
+		(in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0 ||
+		(in6p->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) {
+		ret = ip6_savecontrol(in6p, m, &opts);
+		if (ret != 0) {
+			udp_unlock(in6p->in6p_socket, 1, 0);
+			goto bad;
+		}
+	}
 	m_adj(m, off + sizeof(struct udphdr));
+	if (nstat_collect) {
+		locked_add_64(&in6p->inp_stat->rxpackets, 1);
+		locked_add_64(&in6p->inp_stat->rxbytes, m->m_pkthdr.len);
+	}
+	so_recv_data_stat(in6p->in6p_socket, m, 0);
 	if (sbappendaddr(&in6p->in6p_socket->so_rcv,
 			(struct sockaddr *)&udp_in6,
 			m, opts, NULL) == 0) {
@@ -527,10 +590,10 @@ udp6_ctlinput(
 
 		(void) in6_pcbnotify(&udbinfo, sa, uh.uh_dport,
 					(struct sockaddr*)ip6cp->ip6c_src,
-					uh.uh_sport, cmd, notify);
+					uh.uh_sport, cmd, NULL, notify);
 	} else
 		(void) in6_pcbnotify(&udbinfo, sa, 0, (struct sockaddr *)&sa6_src,
-				     0, cmd, notify);
+				     0, cmd, NULL, notify);
 }
 
 #ifndef __APPLE__
@@ -561,6 +624,12 @@ udp6_getcred SYSCTL_HANDLER_ARGS
 		error = ENOENT;
 		goto out;
 	}
+	/*
+	 * XXX This should not be copying out a credential!!!!  This
+	 * XXX is an opaque type, and is not intended to be introspected,
+	 * XXX and the size of this structure *WILL* change as planned MACF
+	 * XXX and kauth changes go forward.
+	 */
 	error = SYSCTL_OUT(req, inp->inp_socket->so_cred->pc_ucred,
 			   sizeof(*(kauth_cred_t)0));
 
@@ -619,6 +688,7 @@ udp6_attach(struct socket *so, __unused int proto, struct proc *p)
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = ip_defttl;
+	nstat_udp_new_pcb(inp);
 	return 0;
 }
 
@@ -676,7 +746,7 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
 			if (inp->inp_faddr.s_addr != INADDR_ANY)
 				return EISCONN;
 			in6_sin6_2_sin(&sin, sin6_p);
-			error = in_pcbconnect(inp, (struct sockaddr *)&sin, p);
+			error = in_pcbconnect(inp, (struct sockaddr *)&sin, p, NULL);
 			if (error == 0) {
 				inp->inp_vflag |= INP_IPV4;
 				inp->inp_vflag &= ~INP_IPV6;
@@ -732,6 +802,7 @@ udp6_disconnect(struct socket *so)
 
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
+	inp->in6p_last_outif = 0;
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	return 0;
 }
diff --git a/bsd/netinet6/udp6_var.h b/bsd/netinet6/udp6_var.h
index 18274d10f..bd6916e4e 100644
--- a/bsd/netinet6/udp6_var.h
+++ b/bsd/netinet6/udp6_var.h
@@ -72,7 +72,7 @@ SYSCTL_DECL(_net_inet6_udp6);
 extern struct	pr_usrreqs udp6_usrreqs;
 
 void	udp6_ctlinput(int, struct sockaddr *, void *);
-int	udp6_input(struct mbuf **, int *);
+int	udp6_input(struct mbuf **, int *, int);
 int	udp6_output(struct inpcb *inp, struct mbuf *m,
 			struct sockaddr *addr, struct mbuf *control,
 			struct proc *p);
diff --git a/bsd/netkey/Makefile b/bsd/netkey/Makefile
index def3c0629..1a68c8a44 100644
--- a/bsd/netkey/Makefile
+++ b/bsd/netkey/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c
index 73a605869..457f772ec 100644
--- a/bsd/netkey/key.c
+++ b/bsd/netkey/key.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -78,6 +78,7 @@
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/syslog.h>
+#include <sys/mcache.h>
 
 #include <kern/locks.h>
 
@@ -152,9 +153,6 @@ lck_grp_attr_t    *pfkey_stat_mutex_grp_attr;
 lck_attr_t        *pfkey_stat_mutex_attr;
 lck_mtx_t         *pfkey_stat_mutex;
 
-
-extern lck_mtx_t  *nd6_mutex;
-
 /*
  * Note on SA reference counting:
  * - SAs that are not in DEAD state will have (total external reference + 1)
@@ -270,61 +268,61 @@ static int ipsec_esp_auth = 0;
 static int ipsec_ah_keymin = 128;
 
 SYSCTL_DECL(_net_key);
-
-SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL,	debug,	CTLFLAG_RW, \
+/* Thread safe: no accumulated state */
+SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL,	debug,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_debug_level,	0,	"");
 
 
 /* max count of trial for the decision of spi value */
-SYSCTL_INT(_net_key, KEYCTL_SPI_TRY,		spi_trycnt,	CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_SPI_TRY,		spi_trycnt,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_spi_trycnt,	0,	"");
 
 /* minimum spi value to allocate automatically. */
-SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE,	spi_minval,	CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE,	spi_minval,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_spi_minval,	0,	"");
 
 /* maximun spi value to allocate automatically. */
-SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE,	spi_maxval,	CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE,	spi_maxval,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_spi_maxval,	0,	"");
 
 /* interval to initialize randseed */
-SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT,	int_random,	CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT,	int_random,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_int_random,	0,	"");
 
-/* lifetime for larval SA */
-SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME,	larval_lifetime, CTLFLAG_RW, \
+/* lifetime for larval SA; thread safe due to > compare */
+SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME,	larval_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_larval_lifetime,	0,	"");
 
 /* counter for blocking to send SADB_ACQUIRE to IKEd */
-SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT,	blockacq_count,	CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT,	blockacq_count,	CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_blockacq_count,	0,	"");
 
-/* lifetime for blocking to send SADB_ACQUIRE to IKEd */
-SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME,	blockacq_lifetime, CTLFLAG_RW, \
+/* lifetime for blocking to send SADB_ACQUIRE to IKEd: Thread safe, > compare */
+SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME,	blockacq_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&key_blockacq_lifetime,	0,	"");
 
 /* ESP auth */
-SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH,	esp_auth, CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH,	esp_auth, CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&ipsec_esp_auth,	0,	"");
 
 /* minimum ESP key length */
-SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN,	esp_keymin, CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN,	esp_keymin, CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&ipsec_esp_keymin,	0,	"");
 
 /* minimum AH key length */
-SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN,	ah_keymin, CTLFLAG_RW, \
+SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN,	ah_keymin, CTLFLAG_RW | CTLFLAG_LOCKED, \
 	&ipsec_ah_keymin,	0,	"");
 
 /* perfered old SA rather than new SA */
-SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA,	prefered_oldsa, CTLFLAG_RW,\
+SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA,	prefered_oldsa, CTLFLAG_RW | CTLFLAG_LOCKED,\
 	&key_preferred_oldsa,	0,	"");
 
 /* time between NATT keepalives in seconds, 0 disabled  */
-SYSCTL_INT(_net_key, KEYCTL_NATT_KEEPALIVE_INTERVAL, natt_keepalive_interval, CTLFLAG_RW,\
+SYSCTL_INT(_net_key, KEYCTL_NATT_KEEPALIVE_INTERVAL, natt_keepalive_interval, CTLFLAG_RW | CTLFLAG_LOCKED,\
 	&natt_keepalive_interval,	0,	"");
 
 /* PF_KEY statistics */
-SYSCTL_STRUCT(_net_key, KEYCTL_PFKEYSTAT, pfkeystat, CTLFLAG_RD,\
+SYSCTL_STRUCT(_net_key, KEYCTL_PFKEYSTAT, pfkeystat, CTLFLAG_RD | CTLFLAG_LOCKED,\
 	&pfkeystat, pfkeystat, "");
 
 #ifndef LIST_FOREACH
@@ -566,7 +564,7 @@ void key_init(void);
 
 /*
  * PF_KEY init
- * setup locks and call raw_init()
+ * setup locks, call raw_init(), and then init timer and associated data
  *
  */
 void
@@ -597,7 +595,46 @@ key_init(void)
 		LIST_INIT(&spihash[i]);
 
 	raw_init();
+
+	bzero((caddr_t)&key_cb, sizeof(key_cb));
 	
+	for (i = 0; i < IPSEC_DIR_MAX; i++) {
+		LIST_INIT(&sptree[i]);
+	}
+	ipsec_policy_count = 0;
+
+	LIST_INIT(&sahtree);
+
+	for (i = 0; i <= SADB_SATYPE_MAX; i++) {
+		LIST_INIT(&regtree[i]);
+	}
+	ipsec_sav_count = 0;
+
+#ifndef IPSEC_NONBLOCK_ACQUIRE
+	LIST_INIT(&acqtree);
+#endif
+	LIST_INIT(&spacqtree);
+
+	/* system default */
+#if INET
+	ip4_def_policy.policy = IPSEC_POLICY_NONE;
+	ip4_def_policy.refcnt++;	/*never reclaim this*/
+#endif
+#if INET6
+	ip6_def_policy.policy = IPSEC_POLICY_NONE;
+	ip6_def_policy.refcnt++;	/*never reclaim this*/
+#endif
+
+#ifndef IPSEC_DEBUG2
+	timeout((void *)key_timehandler, (void *)0, hz);
+#endif /*IPSEC_DEBUG2*/
+
+	/* initialize key statistics */
+	keystat.getspi_count = 1;
+
+#ifndef __APPLE__
+	printf("IPsec: Initialized Security Association Processing.\n");
+#endif
 }
 
 
@@ -609,9 +646,9 @@ key_init(void)
  *	others:	found and return the pointer.
  */
 struct secpolicy *
-key_allocsp(spidx, dir)
-	struct secpolicyindex *spidx;
-	u_int dir;
+key_allocsp(
+	struct secpolicyindex *spidx,
+	u_int dir)
 {
 	struct secpolicy *sp;
 	struct timeval tv;
@@ -670,8 +707,11 @@ found:
  * XXX slow
  */
 struct secpolicy *
-key_gettunnel(osrc, odst, isrc, idst)
-	struct sockaddr *osrc, *odst, *isrc, *idst;
+key_gettunnel(
+	struct sockaddr *osrc,
+	struct sockaddr *odst,
+	struct sockaddr *isrc,
+	struct sockaddr *idst)
 {
 	struct secpolicy *sp;
 	const int dir = IPSEC_DIR_INBOUND;
@@ -744,10 +784,10 @@ found:
  *	ENOENT: policy may be valid, but SA with REQUIRE is on acquiring.
  */
 int
-key_checkrequest(isr, saidx, sav)
-	struct ipsecrequest *isr;
-	struct secasindex *saidx;	
-	struct secasvar **sav;
+key_checkrequest(
+	struct ipsecrequest *isr,
+	struct secasindex *saidx,	
+	struct secasvar **sav)
 {
 	u_int level;
 	int error;
@@ -814,8 +854,8 @@ key_checkrequest(isr, saidx, sav)
 u_int32_t sah_search_calls = 0;
 u_int32_t sah_search_count = 0;
 struct secasvar *
-key_allocsa_policy(saidx)
-	struct secasindex *saidx;
+key_allocsa_policy(
+	struct secasindex *saidx)
 {
 	struct secashead *sah;
 	struct secasvar *sav;
@@ -879,10 +919,10 @@ key_allocsa_policy(saidx)
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
-key_do_allocsa_policy(sah, state, dstport)
-	struct secashead *sah;
-	u_int state;
-	u_int16_t dstport;
+key_do_allocsa_policy(
+	struct secashead *sah,
+	u_int state,
+	u_int16_t dstport)
 {
 	struct secasvar *sav, *nextsav, *candidate, *natt_candidate, *no_natt_candidate, *d;
 
@@ -1060,10 +1100,12 @@ key_do_allocsa_policy(sah, state, dstport)
  * keep source address in IPsec SA.  We see a tricky situation here.
  */
 struct secasvar *
-key_allocsa(family, src, dst, proto, spi)
-	u_int family, proto;
-	caddr_t src, dst;
-	u_int32_t spi;
+key_allocsa(
+	u_int family,
+	caddr_t src,
+	caddr_t dst,
+	u_int proto,
+	u_int32_t spi)
 {
 	struct secasvar *sav, *match;
 	u_int stateidx, state, tmpidx, matchidx;
@@ -1214,8 +1256,8 @@ found:
 }
 
 u_int16_t
-key_natt_get_translated_port(outsav)
-	struct secasvar *outsav;
+key_natt_get_translated_port(
+	struct secasvar *outsav)
 {
 
 	struct secasindex saidx;
@@ -1271,10 +1313,10 @@ found:
 }
 
 static int
-key_do_get_translated_port(sah, outsav, state)
-	struct secashead *sah;
-	struct secasvar *outsav; 
-	u_int state;
+key_do_get_translated_port(
+	struct secashead *sah,
+	struct secasvar *outsav,
+	u_int state)
 {
 	struct secasvar *currsav, *nextsav, *candidate;
 
@@ -1338,9 +1380,9 @@ key_do_get_translated_port(sah, outsav, state)
  * For both the packet without socket and key_freeso().
  */
 void
-key_freesp(sp, locked)
-	struct secpolicy *sp;
-	int locked;
+key_freesp(
+	struct secpolicy *sp,
+	int locked)
 {
 
 	/* sanity check */
@@ -1371,8 +1413,8 @@ static void key_freesp_so(struct secpolicy **);
  * For the packet with socket.
  */
 void
-key_freeso(so)
-	struct socket *so;
+key_freeso(
+	struct socket *so)
 {
 	
 	/* sanity check */
@@ -1429,8 +1471,8 @@ done:
 }
 
 static void
-key_freesp_so(sp)
-	struct secpolicy **sp;
+key_freesp_so(
+	struct secpolicy **sp)
 {
 
 	/* sanity check */
@@ -1464,9 +1506,9 @@ key_freesp_so(sp)
  * for a policy.
  */
 void
-key_freesav(sav, locked)
-	struct secasvar *sav;
-	int locked;
+key_freesav(
+	struct secasvar *sav,
+	int locked)
 {
 
 	/* sanity check */
@@ -1494,8 +1536,8 @@ key_freesav(sav, locked)
  * free security policy entry.
  */
 static void
-key_delsp(sp)
-	struct secpolicy *sp;
+key_delsp(
+	struct secpolicy *sp)
 {
 
 	/* sanity check */
@@ -1534,8 +1576,8 @@ key_delsp(sp)
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
-key_getsp(spidx)
-	struct secpolicyindex *spidx;
+key_getsp(
+	struct secpolicyindex *spidx)
 {
 	struct secpolicy *sp;
 
@@ -1563,8 +1605,8 @@ key_getsp(spidx)
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
-key_getspbyid(id)
-	u_int32_t id;
+key_getspbyid(
+	u_int32_t id)
 {
 	struct secpolicy *sp;
 
@@ -1592,7 +1634,7 @@ key_getspbyid(id)
 }
 
 struct secpolicy *
-key_newsp()
+key_newsp(void)
 {
 	struct secpolicy *newsp = NULL;
 	
@@ -1613,10 +1655,10 @@ key_newsp()
  * so must be set properly later.
  */
 struct secpolicy *
-key_msg2sp(xpl0, len, error)
-	struct sadb_x_policy *xpl0;
-	size_t len;
-	int *error;
+key_msg2sp(
+	struct sadb_x_policy *xpl0,
+	size_t len,
+	int *error)
 {
 	struct secpolicy *newsp;
 
@@ -1835,7 +1877,7 @@ key_msg2sp(xpl0, len, error)
 }
 
 static u_int32_t
-key_newreqid()
+key_newreqid(void)
 {
 	lck_mtx_lock(sadb_mutex);
 	static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;
@@ -1853,8 +1895,8 @@ key_newreqid()
  * copy secpolicy struct to sadb_x_policy structure indicated.
  */
 struct mbuf *
-key_sp2msg(sp)
-	struct secpolicy *sp;
+key_sp2msg(
+	struct secpolicy *sp)
 {
 	struct sadb_x_policy *xpl;
 	int tlen;
@@ -2006,10 +2048,10 @@ fail:
  * m will always be freed.
  */
 static int
-key_spdadd(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spdadd(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct sadb_x_policy *xpl0, *xpl;
@@ -2266,7 +2308,7 @@ key_spdadd(so, m, mhp)
  *	others: success.
  */
 static u_int32_t
-key_getnewspid()
+key_getnewspid(void)
 {
 	u_int32_t newid = 0;
 	int count = key_spi_trycnt;	/* XXX */
@@ -2304,10 +2346,10 @@ key_getnewspid()
  * m will always be freed.
  */
 static int
-key_spddelete(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spddelete(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct sadb_x_policy *xpl0;
@@ -2406,10 +2448,10 @@ key_spddelete(so, m, mhp)
  * m will always be freed.
  */
 static int
-key_spddelete2(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spddelete2(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	u_int32_t id;
 	struct secpolicy *sp;
@@ -2507,10 +2549,10 @@ key_spddelete2(so, m, mhp)
  * m will always be freed.
  */
 static int
-key_spdget(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spdget(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	u_int32_t id;
 	struct secpolicy *sp;
@@ -2562,8 +2604,8 @@ key_spdget(so, m, mhp)
  *    others: error number
  */
 int
-key_spdacquire(sp)
-	struct secpolicy *sp;
+key_spdacquire(
+	struct secpolicy *sp)
 {
 	struct mbuf *result = NULL, *m;
 	struct secspacq *newspacq;
@@ -2637,10 +2679,10 @@ fail:
  * m will always be freed.
  */
 static int
-key_spdflush(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spdflush(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_msg *newmsg;
 	struct secpolicy *sp;
@@ -2690,10 +2732,10 @@ key_spdflush(so, m, mhp)
  */
  	
 static int
-key_spddump(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_spddump(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct secpolicy *sp, **spbuf = NULL, **sp_ptr;
 	int cnt = 0, bufcount;
@@ -2762,10 +2804,11 @@ end:
 }
 
 static struct mbuf *
-key_setdumpsp(sp, type, seq, pid)
-	struct secpolicy *sp;
-	u_int8_t type;
-	u_int32_t seq, pid;
+key_setdumpsp(
+	struct secpolicy *sp,
+	u_int8_t type,
+	u_int32_t seq,
+	u_int32_t pid)
 {
 	struct mbuf *result = NULL, *m;
 
@@ -2820,8 +2863,8 @@ fail:
  * get PFKEY message length for security policy and request.
  */
 static u_int
-key_getspreqmsglen(sp)
-	struct secpolicy *sp;
+key_getspreqmsglen(
+	struct secpolicy *sp)
 {
 	u_int tlen;
 
@@ -2858,12 +2901,12 @@ key_getspreqmsglen(sp)
  *	others	: error number
  */
 static int
-key_spdexpire(sp)
-	struct secpolicy *sp;
+key_spdexpire(
+	struct secpolicy *sp)
 {
 	struct mbuf *result = NULL, *m;
 	int len;
-	int error = -1;
+	int error = EINVAL;
 	struct sadb_lifetime *lt;
 
 	lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED);
@@ -2969,9 +3012,9 @@ key_spdexpire(sp)
  *	others	: pointer to new SA head.
  */
 static struct secashead *
-key_newsah(saidx, dir)
-	struct secasindex *saidx;
-	u_int8_t           dir;
+key_newsah(
+	struct secasindex *saidx,
+	u_int8_t           dir)
 {
 	struct secashead *newsah;
 
@@ -3019,8 +3062,8 @@ key_newsah(saidx, dir)
  * delete SA index and all SA registerd.
  */
 static void
-key_delsah(sah)
-	struct secashead *sah;
+key_delsah(
+	struct secashead *sah)
 {
 	struct secasvar *sav, *nextsav;
 	u_int stateidx, state;
@@ -3092,11 +3135,11 @@ key_delsah(sah)
  * does not modify mbuf.  does not free mbuf on error.
  */
 static struct secasvar *
-key_newsav(m, mhp, sah, errp)
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
-	struct secashead *sah;
-	int *errp;
+key_newsav(
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp,
+	struct secashead *sah,
+	int *errp)
 {
 	struct secasvar *newsav;
 	const struct sadb_sa *xsa;
@@ -3187,8 +3230,8 @@ key_newsav(m, mhp, sah, errp)
  * free() SA variable entry.
  */
 static void
-key_delsav(sav)
-	struct secasvar *sav;
+key_delsav(
+	struct secasvar *sav)
 {
 
 	lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
@@ -3256,8 +3299,8 @@ key_delsav(sav)
  *	others	: found, pointer to a SA.
  */
 static struct secashead *
-key_getsah(saidx)
-	struct secasindex *saidx;
+key_getsah(
+	struct secasindex *saidx)
 {
 	struct secashead *sah;
 
@@ -3281,9 +3324,9 @@ key_getsah(saidx)
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
-key_checkspidup(saidx, spi)
-	struct secasindex *saidx;
-	u_int32_t spi;
+key_checkspidup(
+	struct secasindex *saidx,
+	u_int32_t spi)
 {
 	struct secasvar *sav;
 	u_int stateidx, state;
@@ -3314,9 +3357,9 @@ key_checkspidup(saidx, spi)
 }
 
 static void
-key_setspi(sav, spi)
-	struct secasvar *sav;
-	u_int32_t spi;
+key_setspi(
+	struct secasvar *sav,
+	u_int32_t spi)
 {
 	lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED);
 	sav->spi = spi;
@@ -3333,9 +3376,9 @@ key_setspi(sav, spi)
  *	others	: found, pointer to a SA.
  */
 static struct secasvar *
-key_getsavbyspi(sah, spi)
-	struct secashead *sah;
-	u_int32_t spi;
+key_getsavbyspi(
+	struct secashead *sah,
+	u_int32_t spi)
 {
 	struct secasvar *sav, *match;
 	u_int stateidx, state, matchidx;
@@ -3370,10 +3413,10 @@ key_getsavbyspi(sah, spi)
  * does not modify mbuf.  does not free mbuf on error.
  */
 static int
-key_setsaval(sav, m, mhp)
-	struct secasvar *sav;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_setsaval(
+	struct secasvar *sav,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 #if IPSEC_ESP
 	const struct esp_algorithm *algo;
@@ -3688,8 +3731,8 @@ key_setsaval(sav, m, mhp)
  *	other:	errno
  */
 static int
-key_mature(sav)
-	struct secasvar *sav;
+key_mature(
+	struct secasvar *sav)
 {
 	int mature;
 	int checkmask = 0;	/* 2^0: ealg  2^1: aalg  2^2: calg */
@@ -3867,10 +3910,12 @@ key_mature(sav)
  * subroutine for SADB_GET and SADB_DUMP.
  */
 static struct mbuf *
-key_setdumpsa(sav, type, satype, seq, pid)
-	struct secasvar *sav;
-	u_int8_t type, satype;
-	u_int32_t seq, pid;
+key_setdumpsa(
+	struct secasvar *sav,
+	u_int8_t type,
+	u_int8_t satype,
+	u_int32_t seq,
+	u_int32_t pid)
 {
 	struct mbuf *result = NULL, *tres = NULL, *m;
 	int l = 0;
@@ -4016,12 +4061,13 @@ fail:
  * set data into sadb_msg.
  */
 static struct mbuf *
-key_setsadbmsg(type, tlen, satype, seq, pid, reserved)
-	u_int8_t type, satype;
-	u_int16_t tlen;
-	u_int32_t seq;
-	pid_t pid;
-	u_int16_t reserved;
+key_setsadbmsg(
+	u_int8_t type,
+	u_int16_t tlen,
+	u_int8_t satype,
+	u_int32_t seq,
+	pid_t pid,
+	u_int16_t reserved)
 {
 	struct mbuf *m;
 	struct sadb_msg *p;
@@ -4062,8 +4108,8 @@ key_setsadbmsg(type, tlen, satype, seq, pid, reserved)
  * copy secasvar data into sadb_address.
  */
 static struct mbuf *
-key_setsadbsa(sav)
-	struct secasvar *sav;
+key_setsadbsa(
+	struct secasvar *sav)
 {
 	struct mbuf *m;
 	struct sadb_sa *p;
@@ -4096,11 +4142,11 @@ key_setsadbsa(sav)
  * set data into sadb_address.
  */
 static struct mbuf *
-key_setsadbaddr(exttype, saddr, prefixlen, ul_proto)
-	u_int16_t exttype;
-	struct sockaddr *saddr;
-	u_int8_t prefixlen;
-	u_int16_t ul_proto;
+key_setsadbaddr(
+	u_int16_t exttype,
+	struct sockaddr *saddr,
+	u_int8_t prefixlen,
+	u_int16_t ul_proto)
 {
 	struct mbuf *m;
 	struct sadb_address *p;
@@ -4218,11 +4264,12 @@ key_setsadbsastat (u_int32_t      dir,
  * set data into sadb_ident.
  */
 static struct mbuf *
-key_setsadbident(exttype, idtype, string, stringlen, id)
-	u_int16_t exttype, idtype;
-	caddr_t string;
-	int stringlen;
-	u_int64_t id;
+key_setsadbident(
+	u_int16_t exttype,
+	u_int16_t idtype,
+	caddr_t string,
+	int stringlen,
+	u_int64_t id)
 {
 	struct mbuf *m;
 	struct sadb_ident *p;
@@ -4257,9 +4304,10 @@ key_setsadbident(exttype, idtype, string, stringlen, id)
  * set data into sadb_x_sa2.
  */
 static struct mbuf *
-key_setsadbxsa2(mode, seq, reqid)
-	u_int8_t mode;
-	u_int32_t seq, reqid;
+key_setsadbxsa2(
+	u_int8_t mode,
+	u_int32_t seq,
+	u_int32_t reqid)
 {
 	struct mbuf *m;
 	struct sadb_x_sa2 *p;
@@ -4291,10 +4339,10 @@ key_setsadbxsa2(mode, seq, reqid)
  * set data into sadb_x_policy
  */
 static struct mbuf *
-key_setsadbxpolicy(type, dir, id)
-	u_int16_t type;
-	u_int8_t dir;
-	u_int32_t id;
+key_setsadbxpolicy(
+	u_int16_t type,
+	u_int8_t dir,
+	u_int32_t id)
 {
 	struct mbuf *m;
 	struct sadb_x_policy *p;
@@ -4325,9 +4373,9 @@ key_setsadbxpolicy(type, dir, id)
  * copy a buffer into the new buffer allocated.
  */
 static void *
-key_newbuf(src, len)
-	const void *src;
-	u_int len;
+key_newbuf(
+	const void *src,
+	u_int len)
 {
 	caddr_t new;
 
@@ -4352,8 +4400,8 @@ key_newbuf(src, len)
  *	0: false
  */
 int
-key_ismyaddr(sa)
-	struct sockaddr *sa;
+key_ismyaddr(
+	struct sockaddr *sa)
 {
 #if INET
 	struct sockaddr_in *sin;
@@ -4370,15 +4418,17 @@ key_ismyaddr(sa)
 		lck_rw_lock_shared(in_ifaddr_rwlock);
 		sin = (struct sockaddr_in *)sa;
 		for (ia = in_ifaddrhead.tqh_first; ia;
-		     ia = ia->ia_link.tqe_next)
-		{
+		     ia = ia->ia_link.tqe_next) {
+			IFA_LOCK_SPIN(&ia->ia_ifa);
 			if (sin->sin_family == ia->ia_addr.sin_family &&
 			    sin->sin_len == ia->ia_addr.sin_len &&
 			    sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
 			{
+				IFA_UNLOCK(&ia->ia_ifa);
 				lck_rw_done(in_ifaddr_rwlock);
 				return 1;
 			}
+			IFA_UNLOCK(&ia->ia_ifa);
 		}
 		lck_rw_done(in_ifaddr_rwlock);
 		break;
@@ -4402,19 +4452,22 @@ key_ismyaddr(sa)
 #include <netinet6/in6_var.h>
 
 static int
-key_ismyaddr6(sin6)
-	struct sockaddr_in6 *sin6;
+key_ismyaddr6(
+	struct sockaddr_in6 *sin6)
 {
 	struct in6_ifaddr *ia;
 	struct in6_multi *in6m;
 
-	lck_mtx_lock(nd6_mutex);
+	lck_rw_lock_shared(&in6_ifaddr_rwlock);
 	for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
+		IFA_LOCK(&ia->ia_ifa);
 		if (key_sockaddrcmp((struct sockaddr *)&sin6,
 		    (struct sockaddr *)&ia->ia_addr, 0) == 0) {
-			lck_mtx_unlock(nd6_mutex);
+			IFA_UNLOCK(&ia->ia_ifa);
+			lck_rw_done(&in6_ifaddr_rwlock);
 			return 1;
 		}
+		IFA_UNLOCK(&ia->ia_ifa);
 
 		/*
 		 * XXX Multicast
@@ -4423,15 +4476,16 @@ key_ismyaddr6(sin6)
 		 * XXX scope
 		 */
 		in6m = NULL;
-		ifnet_lock_shared(ia->ia_ifp);
-		IN6_LOOKUP_MULTI(sin6->sin6_addr, ia->ia_ifp, in6m);
-		ifnet_lock_done(ia->ia_ifp);
-		if (in6m) {
-			lck_mtx_unlock(nd6_mutex);
+		in6_multihead_lock_shared();
+		IN6_LOOKUP_MULTI(&sin6->sin6_addr, ia->ia_ifp, in6m);
+		in6_multihead_lock_done();
+		if (in6m != NULL) {
+			lck_rw_done(&in6_ifaddr_rwlock);
+			IN6M_REMREF(in6m);
 			return 1;
 		}
 	}
-	lck_mtx_unlock(nd6_mutex);
+	lck_rw_done(&in6_ifaddr_rwlock);
 
 	/* loopback, just for safety */
 	if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
@@ -4454,9 +4508,10 @@ key_ismyaddr6(sin6)
  *      0 : not equal
  */
 static int
-key_cmpsaidx(saidx0, saidx1, flag)
-	struct secasindex *saidx0, *saidx1;
-	int flag;
+key_cmpsaidx(
+	struct secasindex *saidx0,
+	struct secasindex *saidx1,
+	int flag)
 {
 	/* sanity */
 	if (saidx0 == NULL && saidx1 == NULL)
@@ -4517,8 +4572,9 @@ key_cmpsaidx(saidx0, saidx1, flag)
  *	0 : not equal
  */
 static int
-key_cmpspidx_exactly(spidx0, spidx1)
-	struct secpolicyindex *spidx0, *spidx1;
+key_cmpspidx_exactly(
+	struct secpolicyindex *spidx0,
+	struct secpolicyindex *spidx1)
 {
 	/* sanity */
 	if (spidx0 == NULL && spidx1 == NULL)
@@ -4554,8 +4610,9 @@ key_cmpspidx_exactly(spidx0, spidx1)
  *	0 : not equal
  */
 static int
-key_cmpspidx_withmask(spidx0, spidx1)
-	struct secpolicyindex *spidx0, *spidx1;
+key_cmpspidx_withmask(
+	struct secpolicyindex *spidx0,
+	struct secpolicyindex *spidx1)
 {
 	/* sanity */
 	if (spidx0 == NULL && spidx1 == NULL)
@@ -4652,10 +4709,10 @@ key_cmpspidx_withmask(spidx0, spidx1)
 
 /* returns 0 on match */
 static int
-key_sockaddrcmp(sa1, sa2, port)
-	struct sockaddr *sa1;
-	struct sockaddr *sa2;
-	int port;
+key_sockaddrcmp(
+	struct sockaddr *sa1,
+	struct sockaddr *sa2,
+	int port)
 {
 	if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len)
 		return 1;
@@ -4707,9 +4764,10 @@ key_sockaddrcmp(sa1, sa2, port)
  *	0 : not equal
  */
 static int
-key_bbcmp(p1, p2, bits)
-	caddr_t p1, p2;
-	u_int bits;
+key_bbcmp(
+	caddr_t p1,
+	caddr_t p2,
+	u_int bits)
 {
 	u_int8_t mask;
 
@@ -5154,7 +5212,7 @@ key_timehandler(void)
  * to initialize a seed for random()
  */
 static void
-key_srandom()
+key_srandom(void)
 {
 #ifdef __APPLE__
 	/* Our PRNG is based on Yarrow and doesn't need to be seeded */
@@ -5171,7 +5229,7 @@ key_srandom()
 }
 
 u_int32_t
-key_random()
+key_random(void)
 {
 	u_int32_t value;
 
@@ -5180,9 +5238,9 @@ key_random()
 }
 
 void
-key_randomfill(p, l)
-	void *p;
-	size_t l;
+key_randomfill(
+	void *p,
+	size_t l)
 {
 #ifdef __APPLE__
 
@@ -5217,8 +5275,8 @@ key_randomfill(p, l)
  *	0: invalid satype.
  */
 static u_int16_t
-key_satype2proto(satype)
-	u_int8_t satype;
+key_satype2proto(
+	u_int8_t satype)
 {
 	switch (satype) {
 	case SADB_SATYPE_UNSPEC:
@@ -5242,8 +5300,8 @@ key_satype2proto(satype)
  *	0: invalid protocol type.
  */
 static u_int8_t
-key_proto2satype(proto)
-	u_int16_t proto;
+key_proto2satype(
+	u_int16_t proto)
 {
 	switch (proto) {
 	case IPPROTO_AH:
@@ -5273,10 +5331,10 @@ key_proto2satype(proto)
  *	other if success, return pointer to the message to send.
  */
 static int
-key_getspi(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_getspi(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
@@ -5483,9 +5541,9 @@ key_getspi(so, m, mhp)
  *	others: success.
  */
 static u_int32_t
-key_do_getnewspi(spirange, saidx)
-	struct sadb_spirange *spirange;
-	struct secasindex *saidx;
+key_do_getnewspi(
+	struct sadb_spirange *spirange,
+	struct secasindex *saidx)
 {
 	u_int32_t newspi;
 	u_int32_t keymin, keymax;
@@ -5567,10 +5625,10 @@ key_do_getnewspi(spirange, saidx)
  * m will always be freed.
  */
 static int
-key_update(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_update(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
@@ -5743,9 +5801,9 @@ key_update(so, m, mhp)
  */
 #if IPSEC_DOSEQCHECK
 static struct secasvar *
-key_getsavbyseq(sah, seq)
-	struct secashead *sah;
-	u_int32_t seq;
+key_getsavbyseq(
+	struct secashead *sah,
+	u_int32_t seq)
 {
 	struct secasvar *sav;
 	u_int state;
@@ -5789,10 +5847,10 @@ key_getsavbyseq(sah, seq)
  * m will always be freed.
  */
 static int
-key_add(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_add(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
@@ -5925,10 +5983,10 @@ key_add(so, m, mhp)
 
 /* m is retained */
 static int
-key_setident(sah, m, mhp)
-	struct secashead *sah;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_setident(
+	struct secashead *sah,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	const struct sadb_ident *idsrc, *iddst;
 	int idsrclen, iddstlen;
@@ -6009,9 +6067,9 @@ key_setident(sah, m, mhp)
  * it is caller's responsibility to free the result. 
  */
 static struct mbuf *
-key_getmsgbuf_x1(m, mhp)
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_getmsgbuf_x1(
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct mbuf *n;
 	int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA,
@@ -6056,10 +6114,10 @@ static int key_delete_all(struct socket *, struct mbuf *,
  * m will always be freed.
  */
 static int
-key_delete(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_delete(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
@@ -6169,11 +6227,11 @@ key_delete(so, m, mhp)
  * delete all SAs for src/dst.  Called from key_delete().
  */
 static int
-key_delete_all(so, m, mhp, proto)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
-	u_int16_t proto;
+key_delete_all(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp,
+	u_int16_t proto)
 {
 	struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
@@ -6259,10 +6317,10 @@ key_delete_all(so, m, mhp, proto)
  * m will always be freed.
  */
 static int
-key_get(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_get(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_sa *sa0;
 	struct sadb_address *src0, *dst0;
@@ -6427,8 +6485,8 @@ key_getsastatbyspi (struct sastat *stat_arg,
 
 /* XXX make it sysctl-configurable? */
 static void
-key_getcomb_setlifetime(comb)
-	struct sadb_comb *comb;
+key_getcomb_setlifetime(
+	struct sadb_comb *comb)
 {
 
 	comb->sadb_comb_soft_allocations = 1;
@@ -6447,7 +6505,7 @@ key_getcomb_setlifetime(comb)
  * XXX no idea if the user wants ESP authentication or not
  */
 static struct mbuf *
-key_getcomb_esp()
+key_getcomb_esp(void)
 {
 	struct sadb_comb *comb;
 	const struct esp_algorithm *algo;
@@ -6529,7 +6587,7 @@ key_getcomb_esp()
  * XXX reorder combinations by preference
  */
 static struct mbuf *
-key_getcomb_ah()
+key_getcomb_ah(void)
 {
 	struct sadb_comb *comb;
 	const struct ah_algorithm *algo;
@@ -6588,7 +6646,7 @@ key_getcomb_ah()
  * XXX reorder combinations by preference
  */
 static struct mbuf *
-key_getcomb_ipcomp()
+key_getcomb_ipcomp(void)
 {
 	struct sadb_comb *comb;
 	const struct ipcomp_algorithm *algo;
@@ -6634,8 +6692,8 @@ key_getcomb_ipcomp()
  * XXX sysctl interface to ipsec_{ah,esp}_keymin
  */
 static struct mbuf *
-key_getprop(saidx)
-	const struct secasindex *saidx;
+key_getprop(
+	const struct secasindex *saidx)
 {
 	struct sadb_prop *prop;
 	struct mbuf *m, *n;
@@ -6698,9 +6756,9 @@ key_getprop(saidx)
  *    others: error number
  */
 static int
-key_acquire(saidx, sp)
-	struct secasindex *saidx;
-	struct secpolicy *sp;
+key_acquire(
+	struct secasindex *saidx,
+	struct secpolicy *sp)
 {
 	struct mbuf *result = NULL, *m;
 #ifndef IPSEC_NONBLOCK_ACQUIRE
@@ -6883,8 +6941,8 @@ key_acquire(saidx, sp)
 
 #ifndef IPSEC_NONBLOCK_ACQUIRE
 static struct secacq *
-key_newacq(saidx)
-	struct secasindex *saidx;
+key_newacq(
+	struct secasindex *saidx)
 {
 	struct secacq *newacq;
 	struct timeval tv;
@@ -6913,8 +6971,8 @@ key_newacq(saidx)
 }
 
 static struct secacq *
-key_getacq(saidx)
-	struct secasindex *saidx;
+key_getacq(
+	struct secasindex *saidx)
 {
 	struct secacq *acq;
 
@@ -6929,8 +6987,8 @@ key_getacq(saidx)
 }
 
 static struct secacq *
-key_getacqbyseq(seq)
-	u_int32_t seq;
+key_getacqbyseq(
+	u_int32_t seq)
 {
 	struct secacq *acq;
 
@@ -6946,8 +7004,8 @@ key_getacqbyseq(seq)
 #endif
 
 static struct secspacq *
-key_newspacq(spidx)
-	struct secpolicyindex *spidx;
+key_newspacq(
+	struct secpolicyindex *spidx)
 {
 	struct secspacq *acq;
 	struct timeval tv;
@@ -6975,8 +7033,8 @@ key_newspacq(spidx)
 }
 
 static struct secspacq *
-key_getspacq(spidx)
-	struct secpolicyindex *spidx;
+key_getspacq(
+	struct secpolicyindex *spidx)
 {
 	struct secspacq *acq;
 
@@ -7005,10 +7063,10 @@ key_getspacq(spidx)
  * m will always be freed.
  */
 static int
-key_acquire2(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_acquire2(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	const struct sadb_address *src0, *dst0;
 	struct secasindex saidx;
@@ -7134,10 +7192,10 @@ key_acquire2(so, m, mhp)
  * m will always be freed.
  */
 static int
-key_register(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_register(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct secreg *reg, *newreg = 0;
 	
@@ -7303,8 +7361,8 @@ key_register(so, m, mhp)
  * XXX: I want to do free a socket marked done SADB_RESIGER to socket.
  */
 void
-key_freereg(so)
-	struct socket *so;
+key_freereg(
+	struct socket *so)
 {
 	struct secreg *reg;
 	int i;
@@ -7344,8 +7402,8 @@ key_freereg(so)
  *	others	: error number
  */
 static int
-key_expire(sav)
-	struct secasvar *sav;
+key_expire(
+	struct secasvar *sav)
 {
 	int satype;
 	struct mbuf *result = NULL, *m;
@@ -7471,10 +7529,10 @@ key_expire(sav)
  * m will always be freed.
  */
 static int
-key_flush(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_flush(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct sadb_msg *newmsg;
 	struct secashead *sah, *nextsah;
@@ -7560,10 +7618,10 @@ struct sav_dump_elem {
 };
 
 static int
-key_dump(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_dump(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	struct secashead *sah;
 	struct secasvar *sav;
@@ -7679,10 +7737,10 @@ end:
  * m will always be freed.
  */
 static int
-key_promisc(so, m, mhp)
-	struct socket *so;
-	struct mbuf *m;
-	const struct sadb_msghdr *mhp;
+key_promisc(
+	struct socket *so,
+	struct mbuf *m,
+	const struct sadb_msghdr *mhp)
 {
 	int olen;
 	
@@ -7771,9 +7829,9 @@ static int (*key_typesw[])(struct socket *, struct mbuf *,
  *    length for buffer to send to user process.
  */
 int
-key_parse(m, so)
-	struct mbuf *m;
-	struct socket *so;
+key_parse(
+	struct mbuf *m,
+	struct socket *so)
 {
 	struct sadb_msg *msg;
 	struct sadb_msghdr mh;
@@ -8026,10 +8084,10 @@ senderror:
 }
 
 static int
-key_senderror(so, m, code)
-	struct socket *so;
-	struct mbuf *m;
-	int code;
+key_senderror(
+	struct socket *so,
+	struct mbuf *m,
+	int code)
 {
 	struct sadb_msg *msg;
 
@@ -8049,9 +8107,9 @@ key_senderror(so, m, code)
  * XXX larger-than-MCLBYTES extension?
  */
 static int
-key_align(m, mhp)
-	struct mbuf *m;
-	struct sadb_msghdr *mhp;
+key_align(
+	struct mbuf *m,
+	struct sadb_msghdr *mhp)
 {
 	struct mbuf *n;
 	struct sadb_ext *ext;
@@ -8156,9 +8214,9 @@ key_align(m, mhp)
 }
 
 static int
-key_validate_ext(ext, len)
-	const struct sadb_ext *ext;
-	int len;
+key_validate_ext(
+	const struct sadb_ext *ext,
+	int len)
 {
 	struct sockaddr *sa;
 	enum { NONE, ADDR } checktype = NONE;
@@ -8216,50 +8274,8 @@ key_validate_ext(ext, len)
 }
 
 void
-key_domain_init()
+key_domain_init(void)
 {
-	int i;
-
-	bzero((caddr_t)&key_cb, sizeof(key_cb));
-	
-	for (i = 0; i < IPSEC_DIR_MAX; i++) {
-		LIST_INIT(&sptree[i]);
-	}
-	ipsec_policy_count = 0;
-
-	LIST_INIT(&sahtree);
-
-	for (i = 0; i <= SADB_SATYPE_MAX; i++) {
-		LIST_INIT(&regtree[i]);
-	}
-	ipsec_sav_count = 0;
-
-#ifndef IPSEC_NONBLOCK_ACQUIRE
-	LIST_INIT(&acqtree);
-#endif
-	LIST_INIT(&spacqtree);
-
-	/* system default */
-#if INET
-	ip4_def_policy.policy = IPSEC_POLICY_NONE;
-	ip4_def_policy.refcnt++;	/*never reclaim this*/
-#endif
-#if INET6
-	ip6_def_policy.policy = IPSEC_POLICY_NONE;
-	ip6_def_policy.refcnt++;	/*never reclaim this*/
-#endif
-
-#ifndef IPSEC_DEBUG2
-	timeout((void *)key_timehandler, (void *)0, hz);
-#endif /*IPSEC_DEBUG2*/
-
-	/* initialize key statistics */
-	keystat.getspi_count = 1;
-
-#ifndef __APPLE__
-	printf("IPsec: Initialized Security Association Processing.\n");
-#endif
-
 	return;
 }
 
@@ -8290,9 +8306,9 @@ key_checktunnelsanity(
 
 /* record data transfer on SA, and update timestamps */
 void
-key_sa_recordxfer(sav, m)
-	struct secasvar *sav;
-	struct mbuf *m;
+key_sa_recordxfer(
+	struct secasvar *sav,
+	struct mbuf *m)
 {
 
 	
@@ -8343,8 +8359,8 @@ key_sa_recordxfer(sav, m)
 
 /* dumb version */
 void
-key_sa_routechange(dst)
-	struct sockaddr *dst;
+key_sa_routechange(
+	struct sockaddr *dst)
 {
 	struct secashead *sah;
 	struct route *ro;
@@ -8364,9 +8380,9 @@ key_sa_routechange(dst)
 }
 
 static void
-key_sa_chgstate(sav, state)
-	struct secasvar *sav;
-	u_int8_t state;
+key_sa_chgstate(
+	struct secasvar *sav,
+	u_int8_t state)
 {
 
 	if (sav == NULL)
@@ -8386,8 +8402,8 @@ key_sa_chgstate(sav, state)
 }
 
 void
-key_sa_stir_iv(sav)
-	struct secasvar *sav;
+key_sa_stir_iv(
+	struct secasvar *sav)
 {
 	lck_mtx_lock(sadb_mutex);
 	if (!sav->iv)
@@ -8398,8 +8414,8 @@ key_sa_stir_iv(sav)
 
 /* XXX too much? */
 static struct mbuf *
-key_alloc_mbuf(l)
-	int l;
+key_alloc_mbuf(
+	int l)
 {
 	struct mbuf *m = NULL, *n;
 	int len, t;
diff --git a/bsd/nfs/Makefile b/bsd/nfs/Makefile
index 10e246402..d4c4ce3cb 100644
--- a/bsd/nfs/Makefile
+++ b/bsd/nfs/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/nfs/krpc.h b/bsd/nfs/krpc.h
index 16fde5248..5f3b87677 100644
--- a/bsd/nfs/krpc.h
+++ b/bsd/nfs/krpc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,7 +44,7 @@ int krpc_portmap(struct sockaddr_in *sin,
 
 
 /*
- * RPC definitions for the portmapper
+ * RPC definitions for the portmapper (portmap and rpcbind)
  */
 #define	PMAPPORT		111
 #define	PMAPPROG		100000
@@ -56,6 +56,24 @@ int krpc_portmap(struct sockaddr_in *sin,
 #define	PMAPPROC_DUMP		4
 #define	PMAPPROC_CALLIT		5
 
+#define RPCBPROG		PMAPPROG
+#define RPCBVERS3		3
+#define RPCBVERS4		4
+#define RPCBPROC_NULL		0
+#define RPCBPROC_SET		1
+#define RPCBPROC_UNSET		2
+#define RPCBPROC_GETADDR	3
+#define RPCBPROC_DUMP		4
+#define RPCBPROC_CALLIT		5
+#define RPCBPROC_BCAST		RPCBPROC_CALLIT
+#define RPCBPROC_GETTIME	6
+#define RPCBPROC_UADDR2TADDR	7
+#define RPCBPROC_TADDR2UADDR	8
+#define RPCBPROC_GETVERSADDR	9
+#define RPCBPROC_INDIRECT	10
+#define RPCBPROC_GETADDRLIST	11
+#define RPCBPROC_GETSTAT	12
+
 
 /*
  * RPC definitions for bootparamd
diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h
index 821af8e5b..41b025389 100644
--- a/bsd/nfs/nfs.h
+++ b/bsd/nfs/nfs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -146,14 +146,71 @@ __private_extern__ int nfs_ticks;
  * (These sizes are always a power of 2. If the kernel malloc() changes
  *  to one that does not allocate space in powers of 2 size, then this all
  *  becomes bunk!).
- * Note that some of these structures come out of there own nfs zones.
-*/
+ * Note that some of these structures come out of their own nfs zones.
+ */
 #define NFS_NODEALLOC	1024
 #define NFS_MNTALLOC	1024
 #define NFS_SVCALLOC	512
 
+#define NFS_ARGSVERSION_XDR	88	/* NFS mount args are in XDR format */
+
+#define NFS_XDRARGS_VERSION_0	0
+#define NFS_MATTR_BITMAP_LEN	1		/* length of mount attributes bitmap */
+#define NFS_MFLAG_BITMAP_LEN	1		/* length of mount flags bitmap */
+
+/* NFS mount attributes */
+#define NFS_MATTR_FLAGS			0	/* mount flags (NFS_MATTR_*) */
+#define NFS_MATTR_NFS_VERSION		1	/* NFS protocol version */
+#define NFS_MATTR_NFS_MINOR_VERSION	2	/* NFS protocol minor version */
+#define NFS_MATTR_READ_SIZE		3	/* READ RPC size */
+#define NFS_MATTR_WRITE_SIZE		4	/* WRITE RPC size */
+#define NFS_MATTR_READDIR_SIZE		5	/* READDIR RPC size */
+#define NFS_MATTR_READAHEAD		6	/* block readahead count */
+#define NFS_MATTR_ATTRCACHE_REG_MIN	7	/* minimum attribute cache time */
+#define NFS_MATTR_ATTRCACHE_REG_MAX	8	/* maximum attribute cache time */
+#define NFS_MATTR_ATTRCACHE_DIR_MIN	9	/* minimum attribute cache time for dirs */
+#define NFS_MATTR_ATTRCACHE_DIR_MAX	10	/* maximum attribute cache time for dirs */
+#define NFS_MATTR_LOCK_MODE		11	/* advisory file locking mode (NFS_LOCK_MODE_*) */
+#define NFS_MATTR_SECURITY		12	/* RPC security flavors to use */
+#define NFS_MATTR_MAX_GROUP_LIST	13	/* max # of RPC AUTH_SYS groups */
+#define NFS_MATTR_SOCKET_TYPE		14	/* socket transport type as a netid-like string */
+#define NFS_MATTR_NFS_PORT		15	/* port # to use for NFS protocol */
+#define NFS_MATTR_MOUNT_PORT		16	/* port # to use for MOUNT protocol */
+#define NFS_MATTR_REQUEST_TIMEOUT	17	/* initial RPC request timeout value */
+#define NFS_MATTR_SOFT_RETRY_COUNT	18	/* max RPC retransmissions for soft mounts */
+#define NFS_MATTR_DEAD_TIMEOUT		19	/* how long until unresponsive mount is considered dead */
+#define NFS_MATTR_FH			20	/* file handle for mount directory */
+#define NFS_MATTR_FS_LOCATIONS		21	/* list of locations for the file system */
+#define NFS_MATTR_MNTFLAGS		22	/* VFS mount flags (MNT_*) */
+#define NFS_MATTR_MNTFROM		23	/* fixed string to use for "f_mntfromname" */
+
+/* NFS mount flags */
+#define NFS_MFLAG_SOFT			0	/* soft mount (requests fail if unresponsive) */
+#define NFS_MFLAG_INTR			1	/* allow operations to be interrupted */
+#define NFS_MFLAG_RESVPORT		2	/* use a reserved port */
+#define NFS_MFLAG_NOCONNECT		3	/* don't connect the socket (UDP) */
+#define NFS_MFLAG_DUMBTIMER		4	/* don't estimate RTT dynamically */
+#define NFS_MFLAG_CALLUMNT		5	/* call MOUNTPROC_UMNT on unmount */
+#define NFS_MFLAG_RDIRPLUS		6	/* request additional info when reading directories */
+#define NFS_MFLAG_NONEGNAMECACHE	7	/* don't do negative name caching */
+#define NFS_MFLAG_MUTEJUKEBOX		8	/* don't treat jukebox errors as unresponsive */
+#define NFS_MFLAG_EPHEMERAL		9	/* ephemeral (mirror) mount */
+#define NFS_MFLAG_NOCALLBACK		10	/* don't provide callback RPC service */
+#define NFS_MFLAG_NONAMEDATTR		11	/* don't use named attributes */
+#define NFS_MFLAG_NOACL			12	/* don't support ACLs */
+#define NFS_MFLAG_ACLONLY		13	/* only support ACLs - not mode */
+#define NFS_MFLAG_NFC			14	/* send NFC strings */
+#define NFS_MFLAG_NOQUOTA		15	/* don't support QUOTA requests */
+#define NFS_MFLAG_MNTUDP		16	/* MOUNT protocol should use UDP */
+#define NFS_MFLAG_MNTQUICK		17	/* use short timeouts while mounting */
+
+/* NFS advisory file locking modes */
+#define NFS_LOCK_MODE_ENABLED		0	/* advisory file locking enabled */
+#define NFS_LOCK_MODE_DISABLED		1	/* do not support advisory file locking */
+#define NFS_LOCK_MODE_LOCAL		2	/* perform advisory file locking locally */
+
 /*
- * Arguments to mount NFS
+ * Old-style arguments to mount NFS
  */
 #define NFS_ARGSVERSION	6		/* change when nfs_args changes */
 struct nfs_args {
@@ -197,115 +254,11 @@ struct nfs_args {
 	/* NFS_ARGSVERSION 5 ends here */
 	uint32_t	deadtimeout;	/* secs until unresponsive mount considered dead */
 };
-struct nfs_args5 {
-	int		version;	/* args structure version number */
-#ifdef KERNEL
-	user32_addr_t addr;		/* file server address */
-#else
-	struct sockaddr	*addr;		/* file server address */
-#endif
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-#ifdef KERNEL
-	user32_addr_t fh;		/* File handle to be mounted */
-#else
-	u_char		*fh;		/* File handle to be mounted */
-#endif
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-#ifdef KERNEL
-	user32_addr_t hostname;	/* server's name */
-#else
-	char		*hostname;	/* server's name */
-#endif
-	/* NFS_ARGSVERSION 3 ends here */
-	int		acregmin;	/* reg file min attr cache timeout */
-	int		acregmax;	/* reg file max attr cache timeout */
-	int		acdirmin;	/* dir min attr cache timeout */
-	int		acdirmax;	/* dir max attr cache timeout */
-	/* NFS_ARGSVERSION 4 ends here */
-	uint32_t	auth;		/* security mechanism flavor */
-};
-struct nfs_args4 {
-	int		version;	/* args structure version number */
-#ifdef KERNEL
-	user32_addr_t addr;		/* file server address */
-#else
-	struct sockaddr	*addr;		/* file server address */
-#endif
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-#ifdef KERNEL
-	user32_addr_t fh;		/* File handle to be mounted */
-#else
-	u_char		*fh;		/* File handle to be mounted */
-#endif
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-#ifdef KERNEL
-	user32_addr_t hostname;	/* server's name */
-#else
-	char		*hostname;	/* server's name */
-#endif
-	/* NFS_ARGSVERSION 3 ends here */
-	int		acregmin;	/* reg file min attr cache timeout */
-	int		acregmax;	/* reg file max attr cache timeout */
-	int		acdirmin;	/* dir min attr cache timeout */
-	int		acdirmax;	/* dir max attr cache timeout */
-};
 
-struct nfs_args3 {
-	int		version;	/* args structure version number */
-#ifdef KERNEL
-	user32_addr_t addr;		/* file server address */
-#else
-	struct sockaddr	*addr;		/* file server address */
-#endif
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-#ifdef KERNEL
-	user32_addr_t fh;		/* File handle to be mounted */
-#else
-	u_char		*fh;		/* File handle to be mounted */
-#endif
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-#ifdef KERNEL
-	user32_addr_t hostname;	/* server's name */
-#else
-	char		*hostname;	/* server's name */
-#endif
-};
+/* incremental size additions in each version of nfs_args */
+#define NFS_ARGSVERSION4_INCSIZE	(4 * sizeof(int))
+#define NFS_ARGSVERSION5_INCSIZE	(sizeof(uint32_t))
+#define NFS_ARGSVERSION6_INCSIZE	(sizeof(uint32_t))
 
 #ifdef KERNEL
 /* LP64 version of nfs_args.  all pointers and longs
@@ -341,83 +294,10 @@ struct user_nfs_args {
 	/* NFS_ARGSVERSION 5 ends here */
 	uint32_t	deadtimeout;	/* secs until unresponsive mount considered dead */
 };
-struct user_nfs_args5 {
-	int		version;	/* args structure version number */
-	user_addr_t	addr __attribute((aligned(8)));		/* file server address */
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-	user_addr_t	fh __attribute((aligned(8)));		/* File handle to be mounted */
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-	user_addr_t	hostname __attribute((aligned(8)));	/* server's name */
-	/* NFS_ARGSVERSION 3 ends here */
-	int		acregmin;	/* reg file min attr cache timeout */
-	int		acregmax;	/* reg file max attr cache timeout */
-	int		acdirmin;	/* dir min attr cache timeout */
-	int		acdirmax;	/* dir max attr cache timeout */
-	/* NFS_ARGSVERSION 4 ends here */
-	uint32_t	auth;		/* security mechanism flavor */
-};
-struct user_nfs_args4 {
-	int		version;	/* args structure version number */
-	user_addr_t	addr __attribute((aligned(8)));		/* file server address */
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-	user_addr_t	fh __attribute((aligned(8)));		/* File handle to be mounted */
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-	user_addr_t	hostname __attribute((aligned(8)));	/* server's name */
-	/* NFS_ARGSVERSION 3 ends here */
-	int		acregmin;	/* reg file min attr cache timeout */
-	int		acregmax;	/* reg file max attr cache timeout */
-	int		acdirmin;	/* dir min attr cache timeout */
-	int		acdirmax;	/* dir max attr cache timeout */
-};
-struct user_nfs_args3 {
-	int		version;	/* args structure version number */
-	user_addr_t	addr __attribute((aligned(8)));		/* file server address */
-	int		addrlen;	/* length of address */
-	int		sotype;		/* Socket type */
-	int		proto;		/* and Protocol */
-	user_addr_t	fh __attribute((aligned(8)));		/* File handle to be mounted */
-	int		fhsize;		/* Size, in bytes, of fh */
-	int		flags;		/* flags */
-	int		wsize;		/* write size in bytes */
-	int		rsize;		/* read size in bytes */
-	int		readdirsize;	/* readdir size in bytes */
-	int		timeo;		/* initial timeout in .1 secs */
-	int		retrans;	/* times to retry send */
-	int		maxgrouplist;	/* Max. size of group list */
-	int		readahead;	/* # of blocks to readahead */
-	int		leaseterm;	/* obsolete: Term (sec) of lease */
-	int		deadthresh;	/* obsolete: Retrans threshold */
-	user_addr_t	hostname __attribute((aligned(8)));	/* server's name */
-};
-
 #endif // KERNEL
 
 /*
- * NFS mount option flags
+ * Old-style NFS mount option flags
  */
 #define	NFSMNT_SOFT		0x00000001  /* soft mount (hard is default) */
 #define	NFSMNT_WSIZE		0x00000002  /* set write size */
@@ -446,6 +326,27 @@ struct user_nfs_args3 {
 #define	NFSMNT_SECFLAVOR	0x01000000  /* Use security flavor */
 #define	NFSMNT_SECSYSOK		0x02000000  /* Server can support auth sys */
 #define	NFSMNT_MUTEJUKEBOX	0x04000000  /* don't treat jukebox errors as unresponsive */
+#define	NFSMNT_NOQUOTA		0x08000000  /* don't support QUOTA requests */
+
+
+/*
+ * fs.nfs sysctl(3) NFS_MOUNTINFO defines
+ */
+#define NFS_MOUNT_INFO_VERSION	0	/* nfsstat mount information version */
+#define NFS_MIATTR_BITMAP_LEN	1	/* length of mount info attributes bitmap */
+#define NFS_MIFLAG_BITMAP_LEN	1	/* length of mount info flags bitmap */
+
+/* NFS mount info attributes */
+#define NFS_MIATTR_FLAGS		0	/* mount info flags bitmap (MIFLAG_*) */
+#define NFS_MIATTR_ORIG_ARGS		1	/* original mount args passed into mount call */
+#define NFS_MIATTR_CUR_ARGS 		2	/* current mount args values */
+#define NFS_MIATTR_CUR_LOC_INDEX	3	/* current fs location index */
+
+/* NFS mount info flags */
+#define NFS_MIFLAG_DEAD		0	/* mount is dead */
+#define NFS_MIFLAG_NOTRESP	1	/* server is unresponsive */
+#define NFS_MIFLAG_RECOVERY	2	/* mount in recovery */
+
 
 /*
  * Structures for the nfssvc(2) syscall. Not that anyone but nfsd
@@ -831,6 +732,7 @@ struct nfsstats {
  * Flags for nfsclnt() system call.
  */
 #define NFSCLNT_LOCKDANS	0x200
+#define NFSCLNT_LOCKDNOTIFY	0x400
 
 /*
  * fs.nfs sysctl(3) identifiers
@@ -839,6 +741,7 @@ struct nfsstats {
 #define NFS_EXPORTSTATS 3	/* gets exported directory stats */
 #define NFS_USERSTATS	4	/* gets exported directory active user stats */
 #define NFS_USERCOUNT	5	/* gets current count of active nfs users */
+#define NFS_MOUNTINFO	6	/* gets information about an NFS mount */
 
 #ifndef NFS_WDELAYHASHSIZ
 #define	NFS_WDELAYHASHSIZ 16	/* and with this */
@@ -882,6 +785,11 @@ struct nfs_open_file;
 struct nfs_lock_owner;
 struct nfs_file_lock;
 struct nfsreq;
+struct nfs_rpc_record_state;
+struct nfs_fs_locations;
+struct nfs_location_index;
+struct nfs_socket;
+struct nfs_socket_search;
 
 /*
  * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
@@ -926,6 +834,28 @@ struct nfsreq_cbinfo {
 	uint32_t		rcb_args[3];			/* additional callback args */
 };
 
+/*
+ * Arguments to use if a request needs to call SECINFO to handle a WRONGSEC error
+ *
+ * If only node is set, use the parent file handle and this node's name; otherwise,
+ * use any file handle and name provided.
+ */
+struct nfsreq_secinfo_args {
+	nfsnode_t		rsia_np;		/* the node */
+	const char		*rsia_name;		/* alternate name string */
+	u_char			*rsia_fh;		/* alternate file handle */
+	uint32_t		rsia_namelen;		/* length of string */
+	uint32_t		rsia_fhsize;		/* length of fh */
+};
+#define NFSREQ_SECINFO_SET(SI, NP, FH, FHSIZE, NAME, NAMELEN) \
+	do { \
+		(SI)->rsia_np = (NP); \
+		(SI)->rsia_fh = (FH); \
+		(SI)->rsia_fhsize = (FHSIZE); \
+		(SI)->rsia_name = (NAME); \
+		(SI)->rsia_namelen = (NAMELEN); \
+	} while (0)
+
 /*
  * NFS outstanding request list element
  */
@@ -959,8 +889,11 @@ struct nfsreq {
 	SLIST_HEAD(, gss_seq)	r_gss_seqlist;	/* RPCSEC_GSS sequence numbers */
 	uint32_t		r_gss_argoff;	/* RPCSEC_GSS offset to args */
 	uint32_t		r_gss_arglen;	/* RPCSEC_GSS arg length */
+	uint32_t		r_auth;		/* security flavor request sent with */
+	uint32_t		*r_wrongsec;	/* wrongsec: other flavors to try */
 	int			r_error;	/* request error */
 	struct nfsreq_cbinfo	r_callback;	/* callback info */
+	struct nfsreq_secinfo_args r_secinfo;	/* secinfo args */
 };
 
 /*
@@ -992,9 +925,10 @@ __private_extern__ lck_grp_t *nfs_request_grp;
 #define R_RESENDQ	0x00004000	/* async request currently on resendq */
 #define R_SENDING	0x00008000	/* request currently being sent */
 
+#define R_NOINTR	0x20000000	/* request should not be interupted by a signal */
 #define R_RECOVER	0x40000000	/* a state recovery RPC - during NFSSTA_RECOVER */
 #define R_SETUP		0x80000000	/* a setup RPC - during (re)connection */
-#define R_OPTMASK	0xc0000000	/* mask of all RPC option flags */
+#define R_OPTMASK	0xe0000000	/* mask of all RPC option flags */
 
 /* Flag values for r_lflags */
 #define RL_BUSY		0x0001		/* Locked. */
@@ -1002,10 +936,21 @@ __private_extern__ lck_grp_t *nfs_request_grp;
 #define RL_QUEUED	0x0004		/* request is on the queue */
 
 __private_extern__ u_int32_t nfs_xid, nfs_xidwrap;
-__private_extern__ int nfs_iosize, nfs_access_cache_timeout, nfs_access_delete, nfs_allow_async, nfs_statfs_rate_limit;
+__private_extern__ int nfs_iosize, nfs_allow_async, nfs_statfs_rate_limit;
+__private_extern__ int nfs_access_cache_timeout, nfs_access_delete, nfs_access_dotzfs, nfs_access_for_getattr;
 __private_extern__ int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des;
 __private_extern__ int nfs_tprintf_initial_delay, nfs_tprintf_delay;
 __private_extern__ int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes;
+__private_extern__ int nfs_idmap_ctrl, nfs_callback_port;
+
+/* bits for nfs_idmap_ctrl: */
+#define NFS_IDMAP_CTRL_USE_IDMAP_SERVICE		0x00000001 /* use the ID mapping service */
+#define NFS_IDMAP_CTRL_FALLBACK_NO_COMMON_IDS		0x00000002 /* fallback should NOT handle common IDs like "root" and "nobody" */
+#define NFS_IDMAP_CTRL_FALLBACK_NO_WELLKNOWN_IDS	0x00000004 /* fallback should NOT handle the well known "XXX@" IDs */
+#define NFS_IDMAP_CTRL_UNKNOWN_IS_99			0x00000008 /* for unknown IDs use uid/gid 99 instead of -2/nobody */
+#define NFS_IDMAP_CTRL_COMPARE_RESULTS			0x00000010 /* compare results of ID mapping service and fallback */
+#define NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS		0x00000020 /* log failed ID mapping attempts */
+#define NFS_IDMAP_CTRL_LOG_SUCCESSFUL_MAPPINGS		0x00000040 /* log successful ID mapping attempts */
 
 #define NFSIOD_MAX	(MIN(nfsiod_thread_max, NFS_MAXASYNCTHREAD))
 
@@ -1018,14 +963,6 @@ struct nfs_dulookup {
 	char du_smallname[48];		/* buffer for small names */
 };
 
-/*
- * Network address hash list element
- */
-union nethostaddr {
-	u_int32_t had_inetaddr;
-	mbuf_t had_nam;
-};
-
 /*
  * One nfsrv_sock structure is maintained for each socket the
  * server is servicing requests on.
@@ -1071,7 +1008,7 @@ struct nfsrv_sock {
 
 #define SLPNOLIST ((struct nfsrv_sock *)0xdeadbeef)	/* sentinel value for sockets not in the nfsrv_sockwg list */
 
-__private_extern__ struct nfsrv_sock *nfsrv_udpsock;
+__private_extern__ struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock;
 
 /*
  * global NFS server socket lists:
@@ -1148,7 +1085,7 @@ __private_extern__ lck_mtx_t *nfs_global_mutex;
 
 /* NFSv4 callback globals */
 __private_extern__ int nfs4_callback_timer_on;
-__private_extern__ in_port_t nfs4_cb_port;
+__private_extern__ in_port_t nfs4_cb_port, nfs4_cb_port6;
 
 /* nfs timer call structures */
 __private_extern__ thread_call_t	nfs_request_timer_call;
@@ -1180,15 +1117,23 @@ void	nfs4_mount_callback_shutdown(struct nfsmount *);
 void	nfs4_cb_accept(socket_t, void *, int);
 void	nfs4_cb_rcv(socket_t, void *, int);
 void	nfs4_callback_timer(void *, void *);
+int	nfs4_secinfo_rpc(struct nfsmount *, struct nfsreq_secinfo_args *, kauth_cred_t, uint32_t *, int *);
+int	nfs4_get_fs_locations(struct nfsmount *, nfsnode_t, u_char *, int, const char *, vfs_context_t, struct nfs_fs_locations *);
+void	nfs_fs_locations_cleanup(struct nfs_fs_locations *);
+void	nfs4_default_attrs_for_referral_trigger(nfsnode_t, char *, int, struct nfs_vattr *, fhandle_t *);
 
-int	nfs_connect(struct nfsmount *, int);
+int	nfs_sockaddr_cmp(struct sockaddr *, struct sockaddr *);
+int	nfs_connect(struct nfsmount *, int, int);
 void	nfs_disconnect(struct nfsmount *);
 void	nfs_need_reconnect(struct nfsmount *);
 void	nfs_mount_sock_thread_wake(struct nfsmount *);
 void	nfs_mount_check_dead_timeout(struct nfsmount *);
+void	nfs_rpc_record_state_init(struct nfs_rpc_record_state *);
+void	nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *);
+int	nfs_rpc_record_read(socket_t, struct nfs_rpc_record_state *, int, int *, mbuf_t *);
 
 int	nfs_getattr(nfsnode_t, struct nfs_vattr *, vfs_context_t, int);
-int	nfs_getattrcache(nfsnode_t, struct nfs_vattr *);
+int	nfs_getattrcache(nfsnode_t, struct nfs_vattr *, int);
 int	nfs_loadattrcache(nfsnode_t, struct nfs_vattr *, u_int64_t *, int);
 int	nfs_attrcachetimeout(nfsnode_t);
 
@@ -1196,6 +1141,7 @@ int	nfs_buf_page_inval(vnode_t vp, off_t offset);
 int	nfs_vinvalbuf(vnode_t, int, vfs_context_t, int);
 int	nfs_vinvalbuf2(vnode_t, int, thread_t, kauth_cred_t, int);
 int	nfs_vinvalbuf_internal(nfsnode_t, int, thread_t, kauth_cred_t, int, int);
+void	nfs_wait_bufs(nfsnode_t);
 
 int	nfs_request_create(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq **);
 void	nfs_request_destroy(struct nfsreq *);
@@ -1205,14 +1151,14 @@ int	nfs_request_add_header(struct nfsreq *);
 int	nfs_request_send(struct nfsreq *, int);
 void	nfs_request_wait(struct nfsreq *);
 int	nfs_request_finish(struct nfsreq *, struct nfsm_chain *, int *);
-int	nfs_request(nfsnode_t, mount_t, struct nfsm_chain *, int, vfs_context_t, struct nfsm_chain *, u_int64_t *, int *);
-int	nfs_request2(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, int, struct nfsm_chain *, u_int64_t *, int *);
+int	nfs_request(nfsnode_t, mount_t, struct nfsm_chain *, int, vfs_context_t, struct nfsreq_secinfo_args *, struct nfsm_chain *, u_int64_t *, int *);
+int	nfs_request2(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq_secinfo_args *, int, struct nfsm_chain *, u_int64_t *, int *);
 int	nfs_request_gss(mount_t, struct nfsm_chain *, thread_t,	kauth_cred_t, int, struct nfs_gss_clnt_ctx *, struct nfsm_chain *, int *);
-int	nfs_request_async(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq_cbinfo *cb, struct nfsreq **);
+int	nfs_request_async(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq_secinfo_args *, int, struct nfsreq_cbinfo *, struct nfsreq **);
 int	nfs_request_async_finish(struct nfsreq *, struct nfsm_chain *, u_int64_t *, int *);
 void	nfs_request_async_cancel(struct nfsreq *);
 void	nfs_request_timer(void *, void *);
-int	nfs_aux_request(struct nfsmount *, thread_t, struct sockaddr_in *, mbuf_t, uint32_t, int, int, struct nfsm_chain *);
+int	nfs_request_using_gss(struct nfsreq *);
 void	nfs_get_xid(uint64_t *);
 int	nfs_sigintr(struct nfsmount *, struct nfsreq *, thread_t, int);
 int	nfs_noremotehang(thread_t);
@@ -1221,6 +1167,24 @@ int	nfs_send(struct nfsreq *, int);
 int	nfs_sndlock(struct nfsreq *);
 void	nfs_sndunlock(struct nfsreq *);
 
+int	nfs_uaddr2sockaddr(const char *, struct sockaddr *);
+
+int	nfs_aux_request(struct nfsmount *, thread_t, struct sockaddr *, socket_t, int, mbuf_t, uint32_t, int, int, struct nfsm_chain *);
+int	nfs_portmap_lookup(struct nfsmount *, vfs_context_t, struct sockaddr *, socket_t, uint32_t, uint32_t, uint32_t, int);
+
+void	nfs_location_next(struct nfs_fs_locations *, struct nfs_location_index *);
+int	nfs_location_index_cmp(struct nfs_location_index *, struct nfs_location_index *);
+void	nfs_location_mntfromname(struct nfs_fs_locations *, struct nfs_location_index, char *, int, int);
+int	nfs_socket_create(struct nfsmount *, struct sockaddr *, int, in_port_t, uint32_t, uint32_t, int, struct nfs_socket **);
+void	nfs_socket_destroy(struct nfs_socket *);
+void	nfs_socket_options(struct nfsmount *, struct nfs_socket *);
+void	nfs_connect_upcall(socket_t, void *, int);
+int	nfs_connect_error_class(int);
+int	nfs_connect_search_loop(struct nfsmount *, struct nfs_socket_search *);
+void	nfs_socket_search_update_error(struct nfs_socket_search *, int);
+void	nfs_socket_search_cleanup(struct nfs_socket_search *);
+void	nfs_mount_connect_thread(void *, __unused wait_result_t);
+
 int	nfs_lookitup(nfsnode_t, char *, int, vfs_context_t, nfsnode_t *);
 void	nfs_dulookup_init(struct nfs_dulookup *, nfsnode_t, const char *, int, vfs_context_t);
 void	nfs_dulookup_start(struct nfs_dulookup *, nfsnode_t, vfs_context_t);
@@ -1229,16 +1193,28 @@ int	nfs_dir_buf_cache_lookup(nfsnode_t, nfsnode_t *, struct componentname *, vfs
 int	nfs_dir_buf_search(struct nfsbuf *, struct componentname *, fhandle_t *, struct nfs_vattr *, uint64_t *, time_t *, daddr64_t *, int);
 void	nfs_name_cache_purge(nfsnode_t, nfsnode_t, struct componentname *, vfs_context_t);
 
+uint32_t nfs4_ace_nfstype_to_vfstype(uint32_t, int *);
+uint32_t nfs4_ace_vfstype_to_nfstype(uint32_t, int *);
+uint32_t nfs4_ace_nfsflags_to_vfsflags(uint32_t);
+uint32_t nfs4_ace_vfsflags_to_nfsflags(uint32_t);
+uint32_t nfs4_ace_nfsmask_to_vfsrights(uint32_t);
+uint32_t nfs4_ace_vfsrights_to_nfsmask(uint32_t);
+int nfs4_id2guid(char *, guid_t *, int);
+int nfs4_guid2id(guid_t *, char *, int *, int);
+
 int	nfs_parsefattr(struct nfsm_chain *, int, struct nfs_vattr *);
-int	nfs4_parsefattr(struct nfsm_chain *, struct nfs_fsattr *, struct nfs_vattr *, fhandle_t *, struct dqblk *);
+int	nfs4_parsefattr(struct nfsm_chain *, struct nfs_fsattr *, struct nfs_vattr *, fhandle_t *, struct dqblk *, struct nfs_fs_locations *);
 void	nfs_vattr_set_supported(uint32_t *, struct vnode_attr *);
+void	nfs_vattr_set_bitmap(struct nfsmount *, uint32_t *, struct vnode_attr *);
 void	nfs3_pathconf_cache(struct nfsmount *, struct nfs_fsattr *);
+int	nfs3_mount_rpc(struct nfsmount *, struct sockaddr *, int, int, char *, vfs_context_t, int, fhandle_t *, struct nfs_sec *);
 void	nfs3_umount_rpc(struct nfsmount *, vfs_context_t, int);
-int	nfs_node_mode_slot(nfsnode_t, uid_t, int);
+int	nfs_node_access_slot(nfsnode_t, uid_t, int);
+void	nfs_vnode_notify(nfsnode_t, uint32_t);
 
 void	nfs_avoid_needless_id_setting_on_create(nfsnode_t, struct vnode_attr *, vfs_context_t);
 int	nfs4_create_rpc(vfs_context_t, nfsnode_t, struct componentname *, struct vnode_attr *, int, char *, nfsnode_t *);
-int	nfs_open_state_set_busy(nfsnode_t, vfs_context_t);
+int	nfs_open_state_set_busy(nfsnode_t, thread_t);
 void	nfs_open_state_clear_busy(nfsnode_t);
 struct nfs_open_owner *nfs_open_owner_find(struct nfsmount *, kauth_cred_t, int);
 void	nfs_open_owner_destroy(struct nfs_open_owner *);
@@ -1248,21 +1224,34 @@ int	nfs_open_owner_set_busy(struct nfs_open_owner *, thread_t);
 void	nfs_open_owner_clear_busy(struct nfs_open_owner *);
 void	nfs_owner_seqid_increment(struct nfs_open_owner *, struct nfs_lock_owner *, int);
 int	nfs_open_file_find(nfsnode_t, struct nfs_open_owner *, struct nfs_open_file **, uint32_t, uint32_t, int);
+int	nfs_open_file_find_internal(nfsnode_t, struct nfs_open_owner *, struct nfs_open_file **, uint32_t, uint32_t, int);
 void	nfs_open_file_destroy(struct nfs_open_file *);
 int	nfs_open_file_set_busy(struct nfs_open_file *, thread_t);
 void	nfs_open_file_clear_busy(struct nfs_open_file *);
+void	nfs_open_file_add_open(struct nfs_open_file *, uint32_t, uint32_t, int);
+void	nfs_open_file_remove_open_find(struct nfs_open_file *, uint32_t, uint32_t, uint32_t *, uint32_t *, int*);
+void	nfs_open_file_remove_open(struct nfs_open_file *, uint32_t, uint32_t);
 void	nfs_get_stateid(nfsnode_t, thread_t, kauth_cred_t, nfs_stateid *);
 int	nfs4_open(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t);
-int	nfs4_close(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t);
-int	nfs4_check_for_locks(struct nfs_open_owner *, struct nfs_open_file *);
-void	nfs4_reopen(struct nfs_open_file *, thread_t);
+int	nfs4_open_delegated(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t);
+int	nfs_close(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t);
+int	nfs_check_for_locks(struct nfs_open_owner *, struct nfs_open_file *);
+int	nfs4_reopen(struct nfs_open_file *, thread_t);
 int	nfs4_open_rpc(struct nfs_open_file *, vfs_context_t, struct componentname *, struct vnode_attr *, vnode_t, vnode_t *, int, int, int);
 int	nfs4_open_rpc_internal(struct nfs_open_file *, vfs_context_t, thread_t, kauth_cred_t, struct componentname *, struct vnode_attr *, vnode_t, vnode_t *, int, int, int);
+int	nfs4_open_confirm_rpc(struct nfsmount *, nfsnode_t, u_char *, int, struct nfs_open_owner *, nfs_stateid *, thread_t, kauth_cred_t, struct nfs_vattr *, uint64_t *);
 int	nfs4_open_reopen_rpc(struct nfs_open_file *, thread_t, kauth_cred_t, struct componentname *, vnode_t, vnode_t *, int, int);
 int	nfs4_open_reclaim_rpc(struct nfs_open_file *, int, int);
+int	nfs4_claim_delegated_open_rpc(struct nfs_open_file *, int, int, int);
+int	nfs4_claim_delegated_state_for_open_file(struct nfs_open_file *, int);
+int	nfs4_claim_delegated_state_for_node(nfsnode_t, int);
 int	nfs4_open_downgrade_rpc(nfsnode_t, struct nfs_open_file *, vfs_context_t);
 int	nfs4_close_rpc(nfsnode_t, struct nfs_open_file *, thread_t, kauth_cred_t, int);
-int	nfs4_delegreturn_rpc(struct nfsmount *, u_char *, int, struct nfs_stateid *, thread_t, kauth_cred_t);
+void	nfs4_delegation_return_enqueue(nfsnode_t);
+int	nfs4_delegation_return(nfsnode_t, int, thread_t, kauth_cred_t);
+int	nfs4_delegreturn_rpc(struct nfsmount *, u_char *, int, struct nfs_stateid *, int, thread_t, kauth_cred_t);
+void	nfs_release_open_state_for_node(nfsnode_t, int);
+void	nfs_revoke_open_state_for_node(nfsnode_t);
 struct nfs_lock_owner *nfs_lock_owner_find(nfsnode_t, proc_t, int);
 void	nfs_lock_owner_destroy(struct nfs_lock_owner *);
 void	nfs_lock_owner_ref(struct nfs_lock_owner *);
@@ -1273,37 +1262,52 @@ void	nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *, struct nfs_file_lo
 struct nfs_file_lock *nfs_file_lock_alloc(struct nfs_lock_owner *);
 void	nfs_file_lock_destroy(struct nfs_file_lock *);
 int	nfs_file_lock_conflict(struct nfs_file_lock *, struct nfs_file_lock *, int *);
-int	nfs4_lock_rpc(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, thread_t, kauth_cred_t);
-int	nfs4_unlock_rpc(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, vfs_context_t);
-int	nfs4_getlock(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t);
-int	nfs4_setlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, short, vfs_context_t);
-int	nfs4_unlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, uint64_t, uint64_t, int, vfs_context_t);
+int	nfs4_lock_rpc(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, int, thread_t, kauth_cred_t);
+int	nfs_unlock_rpc(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, thread_t, kauth_cred_t, int);
+int	nfs_advlock_getlock(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t);
+int	nfs_advlock_setlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, short, vfs_context_t);
+int	nfs_advlock_unlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, uint64_t, uint64_t, int, vfs_context_t);
+
+nfsnode_t nfs4_named_attr_dir_get(nfsnode_t, int, vfs_context_t);
+int	nfs4_named_attr_get(nfsnode_t, struct componentname *, uint32_t, int, vfs_context_t, nfsnode_t *, struct nfs_open_file **);
+int	nfs4_named_attr_remove(nfsnode_t, nfsnode_t, const char *, vfs_context_t);
 
-int	nfs_mount_state_in_use_start(struct nfsmount *);
+int	nfs_mount_state_in_use_start(struct nfsmount *, thread_t);
 int	nfs_mount_state_in_use_end(struct nfsmount *, int);
 int	nfs_mount_state_error_should_restart(int);
+int	nfs_mount_state_error_delegation_lost(int);
 uint	nfs_mount_state_max_restarts(struct nfsmount *);
 int	nfs_mount_state_wait_for_recovery(struct nfsmount *);
-void	nfs4_recover(struct nfsmount *);
+void	nfs_need_recover(struct nfsmount *nmp, int error);
+void	nfs_recover(struct nfsmount *);
 
 int	nfs_vnop_access(struct vnop_access_args *);
-
-int	nfs3_vnop_open(struct vnop_open_args *);
-int	nfs3_vnop_close(struct vnop_close_args *);
+int	nfs_vnop_remove(struct vnop_remove_args *);
+int	nfs_vnop_read(struct vnop_read_args *);
+int	nfs_vnop_write(struct vnop_write_args *);
+int	nfs_vnop_open(struct vnop_open_args *);
+int	nfs_vnop_close(struct vnop_close_args *);
+int	nfs_vnop_advlock(struct vnop_advlock_args *);
+int	nfs_vnop_mmap(struct vnop_mmap_args *);
+int	nfs_vnop_mnomap(struct vnop_mnomap_args *);
 
 int	nfs4_vnop_create(struct vnop_create_args *);
 int	nfs4_vnop_mknod(struct vnop_mknod_args *);
-int	nfs4_vnop_open(struct vnop_open_args *);
 int	nfs4_vnop_close(struct vnop_close_args *);
-int	nfs4_vnop_mmap(struct vnop_mmap_args *);
-int	nfs4_vnop_mnomap(struct vnop_mnomap_args *);
 int	nfs4_vnop_getattr(struct vnop_getattr_args *);
-int	nfs4_vnop_read(struct vnop_read_args *);
 int	nfs4_vnop_link(struct vnop_link_args *);
 int	nfs4_vnop_mkdir(struct vnop_mkdir_args *);
 int	nfs4_vnop_rmdir(struct vnop_rmdir_args *);
 int	nfs4_vnop_symlink(struct vnop_symlink_args *);
-int	nfs4_vnop_advlock(struct vnop_advlock_args *ap);
+int	nfs4_vnop_getxattr(struct vnop_getxattr_args *);
+int	nfs4_vnop_setxattr(struct vnop_setxattr_args *);
+int	nfs4_vnop_removexattr(struct vnop_removexattr_args *);
+int	nfs4_vnop_listxattr(struct vnop_listxattr_args *);
+#if NAMEDSTREAMS
+int	nfs4_vnop_getnamedstream(struct vnop_getnamedstream_args *);
+int	nfs4_vnop_makenamedstream(struct vnop_makenamedstream_args *);
+int	nfs4_vnop_removenamedstream(struct vnop_removenamedstream_args *);
+#endif
 
 int	nfs_read_rpc(nfsnode_t, uio_t, vfs_context_t);
 int	nfs_write_rpc(nfsnode_t, uio_t, vfs_context_t, int *, uint64_t *);
@@ -1311,8 +1315,8 @@ int	nfs_write_rpc2(nfsnode_t, uio_t, thread_t, kauth_cred_t, int *, uint64_t *);
 
 int	nfs3_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t);
 int	nfs4_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t);
-int	nfs3_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *);
-int	nfs4_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *);
+int	nfs3_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *);
+int	nfs4_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *);
 int	nfs3_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t);
 int	nfs4_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t);
 int	nfs3_read_rpc_async(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **);
@@ -1327,18 +1331,24 @@ int	nfs3_readdir_rpc(nfsnode_t, struct nfsbuf *, vfs_context_t);
 int	nfs4_readdir_rpc(nfsnode_t, struct nfsbuf *, vfs_context_t);
 int	nfs3_readlink_rpc(nfsnode_t, char *, uint32_t *, vfs_context_t);
 int	nfs4_readlink_rpc(nfsnode_t, char *, uint32_t *, vfs_context_t);
-int	nfs3_commit_rpc(nfsnode_t, u_int64_t, u_int64_t, kauth_cred_t);
-int	nfs4_commit_rpc(nfsnode_t, u_int64_t, u_int64_t, kauth_cred_t);
+int	nfs3_commit_rpc(nfsnode_t, uint64_t, uint64_t, kauth_cred_t, uint64_t);
+int	nfs4_commit_rpc(nfsnode_t, uint64_t, uint64_t, kauth_cred_t, uint64_t);
 int	nfs3_lookup_rpc_async(nfsnode_t, char *, int, vfs_context_t, struct nfsreq **);
 int	nfs4_lookup_rpc_async(nfsnode_t, char *, int, vfs_context_t, struct nfsreq **);
-int	nfs3_lookup_rpc_async_finish(nfsnode_t, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
-int	nfs4_lookup_rpc_async_finish(nfsnode_t, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
+int	nfs3_lookup_rpc_async_finish(nfsnode_t, char *, int, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
+int	nfs4_lookup_rpc_async_finish(nfsnode_t, char *, int, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
 int	nfs3_remove_rpc(nfsnode_t, char *, int, thread_t, kauth_cred_t);
 int	nfs4_remove_rpc(nfsnode_t, char *, int, thread_t, kauth_cred_t);
 int	nfs3_rename_rpc(nfsnode_t, char *, int, nfsnode_t, char *, int, vfs_context_t);
 int	nfs4_rename_rpc(nfsnode_t, char *, int, nfsnode_t, char *, int, vfs_context_t);
 int	nfs3_pathconf_rpc(nfsnode_t, struct nfs_fsattr *, vfs_context_t);
 int	nfs4_pathconf_rpc(nfsnode_t, struct nfs_fsattr *, vfs_context_t);
+int	nfs3_setlock_rpc(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, int, thread_t, kauth_cred_t);
+int	nfs4_setlock_rpc(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, int, thread_t, kauth_cred_t);
+int	nfs3_unlock_rpc(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, thread_t, kauth_cred_t);
+int	nfs4_unlock_rpc(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, thread_t, kauth_cred_t);
+int	nfs3_getlock_rpc(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t);
+int	nfs4_getlock_rpc(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t);
 
 void	nfsrv_active_user_list_reclaim(void);
 void	nfsrv_cleancache(void);
@@ -1363,7 +1373,7 @@ int	nfsrv_is_initialized(void);
 int	nfsrv_namei(struct nfsrv_descript *, vfs_context_t, struct nameidata *,
 			struct nfs_filehandle *, vnode_t *,
 			struct nfs_export **, struct nfs_export_options **);
-void	nfsrv_rcv(socket_t, caddr_t, int);
+void	nfsrv_rcv(socket_t, void *, int);
 void	nfsrv_rcv_locked(socket_t, struct nfsrv_sock *, int);
 int	nfsrv_rephead(struct nfsrv_descript *, struct nfsrv_sock *, struct nfsm_chain *, size_t);
 int	nfsrv_send(struct nfsrv_sock *, mbuf_t, mbuf_t);
@@ -1410,6 +1420,15 @@ struct nfs_diskless;
 int	nfs_boot_init(struct nfs_diskless *);
 int	nfs_boot_getfh(struct nfs_diskless *, int, int);
 
+#if CONFIG_TRIGGERS
+resolver_result_t nfs_mirror_mount_trigger_resolve(vnode_t, const struct componentname *, enum path_operation, int, void *, vfs_context_t);
+resolver_result_t nfs_mirror_mount_trigger_unresolve(vnode_t, int, void *, vfs_context_t);
+resolver_result_t nfs_mirror_mount_trigger_rearm(vnode_t, int, void *, vfs_context_t);
+int	nfs_mirror_mount_domount(vnode_t, vnode_t, vfs_context_t);
+void	nfs_ephemeral_mount_harvester_start(void);
+void	nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr);
+#endif
+
 __END_DECLS
 
 #endif	/* KERNEL */
diff --git a/bsd/nfs/nfs4_subs.c b/bsd/nfs/nfs4_subs.c
index 6b1786cda..3d65f7985 100644
--- a/bsd/nfs/nfs4_subs.c
+++ b/bsd/nfs/nfs4_subs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -87,7 +87,11 @@
  *
  * In an attempt to differentiate mounts we include the mntfromname and mntonname
  * strings to the client ID (as long as they fit).  We also make sure that the
- * value does not conflict with any existing values in use.
+ * value does not conflict with any existing values in use (changing the unique ID).
+ *
+ * Note that info such as the server's address may change over the lifetime of the
+ * mount.  But the client ID will not be updated because we don't want it changing
+ * simply because we switched to a different server address.
  */
 int
 nfs4_init_clientid(struct nfsmount *nmp)
@@ -120,7 +124,7 @@ nfs4_init_clientid(struct nfsmount *nmp)
 		return (ENOMEM);
 
 	vsfs = vfs_statfs(nmp->nm_mountp);
-	saddr = mbuf_data(nmp->nm_nam);
+	saddr = nmp->nm_saddr;
 	ncip->nci_idlen = sizeof(uint32_t) + sizeof(en0addr) + saddr->sa_len +
 		strlen(vsfs->f_mntfromname) + 1 + strlen(vsfs->f_mntonname) + 1;
 	if (ncip->nci_idlen > NFS4_OPAQUE_LIMIT)
@@ -199,10 +203,12 @@ nfs4_setclientid(struct nfsmount *nmp)
 	thread_t thd;
 	kauth_cred_t cred;
 	struct nfsm_chain nmreq, nmrep;
-	struct sockaddr_in sin;
-	uint8_t *addr;
-	char raddr[32];
-	int ralen = 0;
+	struct sockaddr_storage ss;
+	void *sinaddr = NULL;
+	char raddr[MAX_IPv6_STR_LEN];
+	char uaddr[MAX_IPv6_STR_LEN+16];
+	int ualen = 0;
+	in_port_t port;
 
 	thd = current_thread();
 	cred = IS_VALID_CRED(nmp->nm_mcred) ? nmp->nm_mcred : vfs_context_ucred(vfs_context_kernel());
@@ -224,26 +230,35 @@ nfs4_setclientid(struct nfsmount *nmp)
 	nfsm_chain_add_64(error, &nmreq, nmp->nm_mounttime);
 	nfsm_chain_add_32(error, &nmreq, nmp->nm_longid->nci_idlen);
 	nfsm_chain_add_opaque(error, &nmreq, nmp->nm_longid->nci_id, nmp->nm_longid->nci_idlen);
+	nfsmout_if(error);
 	/* cb_client4      callback; */
-	if (nmp->nm_cbid && nfs4_cb_port &&
-	    !(error = sock_getsockname(nmp->nm_so, (struct sockaddr*)&sin, sizeof(sin)))) {
-		/* assemble r_addr = h1.h2.h3.h4.p1.p2 */
-		/* h = source address of nmp->nm_so */
-		/* p = nfs4_cb_port */
-		addr = (uint8_t*)&sin.sin_addr.s_addr;
-		ralen = snprintf(raddr, sizeof(raddr), "%d.%d.%d.%d.%d.%d", 
-				addr[0], addr[1], addr[2], addr[3],
-				((nfs4_cb_port >> 8) & 0xff),
-				(nfs4_cb_port & 0xff));
-		/* make sure it fit, give up if it didn't */
-		if (ralen >= (int)sizeof(raddr))
-			ralen = 0;
-	}
-	if (ralen > 0) {
+	if (!NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid && nfs4_cb_port &&
+	    !sock_getsockname(nmp->nm_nso->nso_so, (struct sockaddr*)&ss, sizeof(ss))) {
+		if (ss.ss_family == AF_INET) {
+			sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
+			port = nfs4_cb_port;
+		} else if (ss.ss_family == AF_INET6) {
+			sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
+			port = nfs4_cb_port6;
+		}
+		if (sinaddr && port && (inet_ntop(ss.ss_family, sinaddr, raddr, sizeof(raddr)) == raddr)) {
+			/* assemble r_addr = universal address (nmp->nm_nso->nso_so source IP addr + port) */
+			ualen = snprintf(uaddr, sizeof(uaddr), "%s.%d.%d", raddr,
+					((port >> 8) & 0xff),
+					(port & 0xff));
+			/* make sure it fit, give up if it didn't */
+			if (ualen >= (int)sizeof(uaddr))
+				ualen = 0;
+		}
+	}
+	if (ualen > 0) {
 		/* add callback info */
 		nfsm_chain_add_32(error, &nmreq, NFS4_CALLBACK_PROG); /* callback program */
-		nfsm_chain_add_string(error, &nmreq, "tcp", 3); /* callback r_netid */
-		nfsm_chain_add_string(error, &nmreq, raddr, ralen); /* callback r_addr */
+		if (ss.ss_family == AF_INET)
+			nfsm_chain_add_string(error, &nmreq, "tcp", 3); /* callback r_netid */
+		else if (ss.ss_family == AF_INET6)
+			nfsm_chain_add_string(error, &nmreq, "tcp6", 4); /* callback r_netid */
+		nfsm_chain_add_string(error, &nmreq, uaddr, ualen); /* callback r_addr */
 		nfsm_chain_add_32(error, &nmreq, nmp->nm_cbid); /* callback_ident */
 	} else {
 		/* don't provide valid callback info */
@@ -255,9 +270,11 @@ nfs4_setclientid(struct nfsmount *nmp)
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, R_SETUP, &nmrep, &xid, &status);
+	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, R_SETUP, &nmrep, &xid, &status);
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
+	if (!error && (numops != 1) && status)
+		error = status;
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_SETCLIENTID);
 	if (error == NFSERR_CLID_INUSE)
 		printf("nfs4_setclientid: client ID in use?\n");
@@ -267,43 +284,57 @@ nfs4_setclientid(struct nfsmount *nmp)
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 
-	// SETCLIENTID_CONFIRM, PUTFH, GETATTR(FS)
-	numops = nmp->nm_dnp ? 3 : 1;
-	nfsm_chain_build_alloc_init(error, &nmreq, 28 * NFSX_UNSIGNED);
+	// SETCLIENTID_CONFIRM
+	numops = 1;
+	nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", numops);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID_CONFIRM);
 	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
 	nfsm_chain_add_64(error, &nmreq, verifier);
-	if (nmp->nm_dnp) {
-		/* refresh fs attributes too */
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, nmp->nm_dnp->n_fhp, nmp->nm_dnp->n_fhsize);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-		NFS_CLEAR_ATTRIBUTES(bitmap);
-		NFS4_PER_FS_ATTRIBUTES(bitmap);
-		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
-	}
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, R_SETUP, &nmrep, &xid, &status);
+	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, R_SETUP, &nmrep, &xid, &status);
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_SETCLIENTID_CONFIRM);
 	if (error)
 		printf("nfs4_setclientid: confirm error %d\n", error);
-	if (nmp->nm_dnp) {
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-		nfsmout_if(error);
-		lck_mtx_lock(&nmp->nm_lock);
-		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, NULL, NULL, NULL);
-		lck_mtx_unlock(&nmp->nm_lock);
-	}
+	lck_mtx_lock(&nmp->nm_lock);
+	if (!error)
+		nmp->nm_state |= NFSSTA_CLIENTID;
+	lck_mtx_unlock(&nmp->nm_lock);
 
+	nfsmout_if(error || !nmp->nm_dnp);
+
+	/* take the opportunity to refresh fs attributes too */
+	// PUTFH, GETATTR(FS)
+	numops = 2;
+	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "setclid_attr", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, nmp->nm_dnp->n_fhp, nmp->nm_dnp->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_CLEAR_ATTRIBUTES(bitmap);
+	NFS4_PER_FS_ATTRIBUTES(bitmap);
+	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, R_SETUP, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	lck_mtx_lock(&nmp->nm_lock);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	if (!error)
+		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, NULL, NULL, NULL, NULL);
+	lck_mtx_unlock(&nmp->nm_lock);
+	if (error)  /* ignore any error from the getattr */
+		error = 0;
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
@@ -341,7 +372,7 @@ nfs4_renew(struct nfsmount *nmp, int rpcflag)
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
-			current_thread(), cred, rpcflag, &nmrep, &xid, &status);
+			current_thread(), cred, NULL, rpcflag, &nmrep, &xid, &status);
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_RENEW);
@@ -381,8 +412,7 @@ out:
 	if (error && (error != ETIMEDOUT) &&
 	    (nmp->nm_clientid == clientid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
 		printf("nfs4_renew_timer: error %d, initiating recovery\n", error);
-		nmp->nm_state |= NFSSTA_RECOVER;
-		nfs_mount_sock_thread_wake(nmp);
+		nfs_need_recover(nmp, error);
 	}
 
 	interval = nmp->nm_fsattr.nfsa_lease / (error ? 4 : 2);
@@ -392,6 +422,1034 @@ out:
 	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
 }
 
+/*
+ * get the list of supported security flavors
+ *
+ * How we get them depends on what args we are given:
+ * 
+ * FH?   Name?  Action
+ * ----- -----  ------
+ * YES   YES    Use the fh and name provided
+ * YES   NO     4.1-only just use the fh provided
+ * NO    YES    Use the node's (or root) fh and the name provided
+ * NO    NO     Use the node's parent and the node's name (4.1 will just use node's fh)
+ */
+int
+nfs4_secinfo_rpc(struct nfsmount *nmp, struct nfsreq_secinfo_args *siap, kauth_cred_t cred, uint32_t *sec, int *seccountp)
+{
+	int error = 0, status, nfsvers, numops, namelen, fhsize;
+	vnode_t dvp = NULLVP;
+	nfsnode_t np, dnp;
+	u_char *fhp;
+	const char *vname = NULL, *name;
+	uint64_t xid;
+	struct nfsm_chain nmreq, nmrep;
+
+	*seccountp = 0;
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+	np = siap->rsia_np;
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	fhp = siap->rsia_fh;
+	fhsize = fhp ? siap->rsia_fhsize : 0;
+	name = siap->rsia_name;
+	namelen = name ? siap->rsia_namelen : 0;
+	if (name && !namelen)
+		namelen = strlen(name);
+	if (!fhp && name) {
+		if (!np)  /* use PUTROOTFH */
+			goto gotargs;
+		fhp = np->n_fhp;
+		fhsize = np->n_fhsize;
+	}
+	if (fhp && name)
+		goto gotargs;
+
+	if (!np)
+		return (EIO);
+	nfs_node_lock_force(np);
+	if ((vnode_vtype(NFSTOV(np)) != VDIR) && np->n_sillyrename) {
+		/*
+		 * The node's been sillyrenamed, so we need to use
+		 * the sillyrename directory/name to do the open.
+		 */
+		struct nfs_sillyrename *nsp = np->n_sillyrename;
+		dnp = nsp->nsr_dnp;
+		dvp = NFSTOV(dnp);
+		if ((error = vnode_get(dvp))) {
+			nfs_node_unlock(np);
+			goto nfsmout;
+		}
+		fhp = dnp->n_fhp;
+		fhsize = dnp->n_fhsize;
+		name = nsp->nsr_name;
+		namelen = nsp->nsr_namlen;
+	} else {
+		/*
+		 * [sigh] We can't trust VFS to get the parent right for named
+		 * attribute nodes.  (It likes to reparent the nodes after we've
+		 * created them.)  Luckily we can probably get the right parent
+		 * from the n_parent we have stashed away.
+		 */
+		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
+		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
+			dvp = NULL;
+		if (!dvp)
+			dvp = vnode_getparent(NFSTOV(np));
+		vname = vnode_getname(NFSTOV(np));
+		if (!dvp || !vname) {
+			if (!error)
+				error = EIO;
+			nfs_node_unlock(np);
+			goto nfsmout;
+		}
+		dnp = VTONFS(dvp);
+		fhp = dnp->n_fhp;
+		fhsize = dnp->n_fhsize;
+		name = vname;
+		namelen = strnlen(vname, MAXPATHLEN);
+	}
+	nfs_node_unlock(np);
+
+gotargs:
+	// PUT(ROOT)FH + SECINFO
+	numops = 2;
+	nfsm_chain_build_alloc_init(error, &nmreq,
+		4 * NFSX_UNSIGNED + NFSX_FH(nfsvers) + nfsm_rndup(namelen));
+	nfsm_chain_add_compound_header(error, &nmreq, "secinfo", numops);
+	numops--;
+	if (fhp) {
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize);
+	} else {
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
+	}
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_SECINFO);
+	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+			current_thread(), cred, NULL, 0, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, fhp ? NFS_OP_PUTFH : NFS_OP_PUTROOTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_SECINFO);
+	nfsmout_if(error);
+	error = nfsm_chain_get_secinfo(&nmrep, sec, seccountp);
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	if (vname)
+		vnode_putname(vname);
+	if (dvp != NULLVP)
+		vnode_put(dvp);
+	return (error);
+}
+
+/*
+ * Parse an NFSv4 SECINFO array to an array of pseudo flavors.
+ * (Note: also works for MOUNTv3 security arrays.)
+ */
+int
+nfsm_chain_get_secinfo(struct nfsm_chain *nmc, uint32_t *sec, int *seccountp)
+{
+	int error = 0, secmax, seccount, srvcount;
+	uint32_t flavor, val;
+	u_char oid[12];
+
+	seccount = srvcount = 0;
+	secmax = *seccountp;
+	*seccountp = 0;
+
+	nfsm_chain_get_32(error, nmc, srvcount);
+	while (!error && (srvcount > 0) && (seccount < secmax)) {
+		nfsm_chain_get_32(error, nmc, flavor);
+		nfsmout_if(error);
+		switch (flavor) {
+		case RPCAUTH_NONE:
+		case RPCAUTH_SYS:
+		case RPCAUTH_KRB5:
+		case RPCAUTH_KRB5I:
+		case RPCAUTH_KRB5P:
+			sec[seccount++] = flavor;
+			break;
+		case RPCSEC_GSS:
+			/* we only recognize KRB5, KRB5I, KRB5P */
+			nfsm_chain_get_32(error, nmc, val); /* OID length */
+			nfsmout_if(error);
+			if (val != sizeof(krb5_mech)) {
+				nfsm_chain_adv(error, nmc, val);
+				nfsm_chain_adv(error, nmc, 2*NFSX_UNSIGNED);
+				break;
+			}
+			nfsm_chain_get_opaque(error, nmc, val, oid); /* OID bytes */
+			nfsmout_if(error);
+			if (bcmp(oid, krb5_mech, sizeof(krb5_mech))) {
+				nfsm_chain_adv(error, nmc, 2*NFSX_UNSIGNED);
+				break;
+			}
+			nfsm_chain_get_32(error, nmc, val); /* QOP */
+			nfsm_chain_get_32(error, nmc, val); /* SERVICE */
+			nfsmout_if(error);
+			switch (val) {
+			case RPCSEC_GSS_SVC_NONE:
+				sec[seccount++] = RPCAUTH_KRB5;
+				break;
+			case RPCSEC_GSS_SVC_INTEGRITY:
+				sec[seccount++] = RPCAUTH_KRB5I;
+				break;
+			case RPCSEC_GSS_SVC_PRIVACY:
+				sec[seccount++] = RPCAUTH_KRB5P;
+				break;
+			}
+			break;
+		}
+		srvcount--;
+	}
+nfsmout:
+	if (!error)
+		*seccountp = seccount;
+	return (error);
+}
+
+
+/*
+ * Fetch the FS_LOCATIONS attribute for the node found at directory/name.
+ */
+int
+nfs4_get_fs_locations(
+	struct nfsmount *nmp,
+	nfsnode_t dnp,
+	u_char *fhp,
+	int fhsize,
+	const char *name,
+	vfs_context_t ctx,
+	struct nfs_fs_locations *nfslsp)
+{
+	int error = 0, numops, status;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
+	struct nfsreq rq, *req = &rq;
+	struct nfsreq_secinfo_args si;
+	struct nfsm_chain nmreq, nmrep;
+	uint64_t xid;
+
+	if (!fhp && dnp) {
+		fhp = dnp->n_fhp;
+		fhsize = dnp->n_fhsize;
+	}
+	if (!fhp)
+		return (EINVAL);
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	NFSREQ_SECINFO_SET(&si, NULL, fhp, fhsize, name, 0);
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "fs_locations", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp, fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
+	nfsm_chain_add_name(error, &nmreq, name, strlen(name), nmp);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_CLEAR_ATTRIBUTES(bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FS_LOCATIONS);
+	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request_async(dnp, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+	if (!error)
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOOKUP);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, NULL, nfslsp);
+nfsmout:
+	nfsm_chain_cleanup(&nmrep);
+	nfsm_chain_cleanup(&nmreq);
+	return (error);
+}
+
+/*
+ * Referral trigger nodes may not have many attributes provided by the
+ * server, so put some default values in place.
+ */
+void
+nfs4_default_attrs_for_referral_trigger(
+	nfsnode_t dnp,
+	char *name,
+	int namelen,
+	struct nfs_vattr *nvap,
+	fhandle_t *fhp)
+{
+	struct timeval now;
+	microtime(&now);
+	int len;
+
+	nvap->nva_flags = NFS_FFLAG_TRIGGER | NFS_FFLAG_TRIGGER_REFERRAL;
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_TYPE)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TYPE);
+		nvap->nva_type = VDIR;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_FSID)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FSID);
+		nvap->nva_fsid.major = 0;
+		nvap->nva_fsid.minor = 0;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) && dnp) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER);
+		nvap->nva_uid = dnp->n_vattr.nva_uid;
+		nvap->nva_uuuid = dnp->n_vattr.nva_uuuid;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) && dnp) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP);
+		nvap->nva_gid = dnp->n_vattr.nva_gid;
+		nvap->nva_guuid = dnp->n_vattr.nva_guuid;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_MODE)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_MODE);
+		nvap->nva_mode = 0777;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_SIZE)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SIZE);
+		nvap->nva_size = 0;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED);
+		nvap->nva_bytes = 0;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS);
+		nvap->nva_nlink = 2;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS);
+		nvap->nva_timesec[NFSTIME_ACCESS] = now.tv_sec;
+		nvap->nva_timensec[NFSTIME_ACCESS] = now.tv_usec * 1000;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY);
+		nvap->nva_timesec[NFSTIME_MODIFY] = now.tv_sec;
+		nvap->nva_timensec[NFSTIME_MODIFY] = now.tv_usec * 1000;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA);
+		nvap->nva_timesec[NFSTIME_CHANGE] = now.tv_sec;
+		nvap->nva_timensec[NFSTIME_CHANGE] = now.tv_usec * 1000;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_FILEID)) {
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEID);
+		nvap->nva_fileid = 42;
+	}
+	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_FILEHANDLE) && dnp && name && fhp) {
+		/* Build a fake filehandle made up of parent node pointer and name */
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEHANDLE);
+		bcopy(&dnp, &fhp->fh_data[0], sizeof(dnp));
+		len = sizeof(fhp->fh_data) - sizeof(dnp);
+		bcopy(name, &fhp->fh_data[0] + sizeof(dnp), MIN(len, namelen));
+		fhp->fh_len = sizeof(dnp) + namelen;
+		if (fhp->fh_len > (int)sizeof(fhp->fh_data))
+			fhp->fh_len = sizeof(fhp->fh_data);
+	}
+}
+
+/*
+ * Set NFS bitmap according to what's set in vnode_attr (and supported by the server).
+ */
+void
+nfs_vattr_set_bitmap(struct nfsmount *nmp, uint32_t *bitmap, struct vnode_attr *vap)
+{
+	int i;
+
+	NFS_CLEAR_ATTRIBUTES(bitmap);
+	if (VATTR_IS_ACTIVE(vap, va_data_size))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_SIZE);
+	if (VATTR_IS_ACTIVE(vap, va_acl) && (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_ACL);
+	if (VATTR_IS_ACTIVE(vap, va_flags)) {
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_ARCHIVE);
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_HIDDEN);
+	}
+	// NFS_BITMAP_SET(bitmap, NFS_FATTR_MIMETYPE)
+	if (VATTR_IS_ACTIVE(vap, va_mode) && !NMFLAG(nmp, ACLONLY))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_MODE);
+	if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_uuuid))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_OWNER);
+	if (VATTR_IS_ACTIVE(vap, va_gid) || VATTR_IS_ACTIVE(vap, va_guuid))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_OWNER_GROUP);
+	// NFS_BITMAP_SET(bitmap, NFS_FATTR_SYSTEM)
+	if (vap->va_vaflags & VA_UTIMES_NULL) {
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_ACCESS_SET);
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_MODIFY_SET);
+	} else {
+		if (VATTR_IS_ACTIVE(vap, va_access_time))
+			NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_ACCESS_SET);
+		if (VATTR_IS_ACTIVE(vap, va_modify_time))
+			NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_MODIFY_SET);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_backup_time))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_BACKUP);
+	if (VATTR_IS_ACTIVE(vap, va_create_time))
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_CREATE);
+	/* and limit to what is supported by server */
+	for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
+		bitmap[i] &= nmp->nm_fsattr.nfsa_supp_attr[i];
+}
+
+/*
+ * Convert between NFSv4 and VFS ACE types
+ */
+uint32_t
+nfs4_ace_nfstype_to_vfstype(uint32_t nfsacetype, int *errorp)
+{
+	switch (nfsacetype) {
+	case NFS_ACE_ACCESS_ALLOWED_ACE_TYPE:
+		return KAUTH_ACE_PERMIT;
+	case NFS_ACE_ACCESS_DENIED_ACE_TYPE:
+		return KAUTH_ACE_DENY;
+	case NFS_ACE_SYSTEM_AUDIT_ACE_TYPE:
+		return KAUTH_ACE_AUDIT;
+	case NFS_ACE_SYSTEM_ALARM_ACE_TYPE:
+		return KAUTH_ACE_ALARM;
+	}
+	*errorp = EBADRPC;
+	return 0;
+}
+
+uint32_t
+nfs4_ace_vfstype_to_nfstype(uint32_t vfstype, int *errorp)
+{
+	switch (vfstype) {
+	case KAUTH_ACE_PERMIT:
+		return NFS_ACE_ACCESS_ALLOWED_ACE_TYPE;
+	case KAUTH_ACE_DENY:
+		return NFS_ACE_ACCESS_DENIED_ACE_TYPE;
+	case KAUTH_ACE_AUDIT:
+		return NFS_ACE_SYSTEM_AUDIT_ACE_TYPE;
+	case KAUTH_ACE_ALARM:
+		return NFS_ACE_SYSTEM_ALARM_ACE_TYPE;
+	}
+	*errorp = EINVAL;
+	return 0;
+}
+
+/*
+ * Convert between NFSv4 and VFS ACE flags
+ */
+uint32_t
+nfs4_ace_nfsflags_to_vfsflags(uint32_t nfsflags)
+{
+	uint32_t vfsflags = 0;
+
+	if (nfsflags & NFS_ACE_FILE_INHERIT_ACE)
+		vfsflags |= KAUTH_ACE_FILE_INHERIT;
+	if (nfsflags & NFS_ACE_DIRECTORY_INHERIT_ACE)
+		vfsflags |= KAUTH_ACE_DIRECTORY_INHERIT;
+	if (nfsflags & NFS_ACE_NO_PROPAGATE_INHERIT_ACE)
+		vfsflags |= KAUTH_ACE_LIMIT_INHERIT;
+	if (nfsflags & NFS_ACE_INHERIT_ONLY_ACE)
+		vfsflags |= KAUTH_ACE_ONLY_INHERIT;
+	if (nfsflags & NFS_ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
+		vfsflags |= KAUTH_ACE_SUCCESS;
+	if (nfsflags & NFS_ACE_FAILED_ACCESS_ACE_FLAG)
+		vfsflags |= KAUTH_ACE_FAILURE;
+	if (nfsflags & NFS_ACE_INHERITED_ACE)
+		vfsflags |= KAUTH_ACE_INHERITED;
+
+	return (vfsflags);
+}
+
+uint32_t
+nfs4_ace_vfsflags_to_nfsflags(uint32_t vfsflags)
+{
+	uint32_t nfsflags = 0;
+
+	if (vfsflags & KAUTH_ACE_FILE_INHERIT)
+		nfsflags |= NFS_ACE_FILE_INHERIT_ACE;
+	if (vfsflags & KAUTH_ACE_DIRECTORY_INHERIT)
+		nfsflags |= NFS_ACE_DIRECTORY_INHERIT_ACE;
+	if (vfsflags & KAUTH_ACE_LIMIT_INHERIT)
+		nfsflags |= NFS_ACE_NO_PROPAGATE_INHERIT_ACE;
+	if (vfsflags & KAUTH_ACE_ONLY_INHERIT)
+		nfsflags |= NFS_ACE_INHERIT_ONLY_ACE;
+	if (vfsflags & KAUTH_ACE_SUCCESS)
+		nfsflags |= NFS_ACE_SUCCESSFUL_ACCESS_ACE_FLAG;
+	if (vfsflags & KAUTH_ACE_FAILURE)
+		nfsflags |= NFS_ACE_FAILED_ACCESS_ACE_FLAG;
+	if (vfsflags & KAUTH_ACE_INHERITED)
+		nfsflags |= NFS_ACE_INHERITED_ACE;
+
+	return (nfsflags);
+}
+
+/*
+ * Convert between NFSv4 ACE access masks and VFS access rights
+ */
+uint32_t
+nfs4_ace_nfsmask_to_vfsrights(uint32_t nfsmask)
+{
+	uint32_t vfsrights = 0;
+
+	if (nfsmask & NFS_ACE_READ_DATA)
+		vfsrights |= KAUTH_VNODE_READ_DATA;
+	if (nfsmask & NFS_ACE_LIST_DIRECTORY)
+		vfsrights |= KAUTH_VNODE_LIST_DIRECTORY;
+	if (nfsmask & NFS_ACE_WRITE_DATA)
+		vfsrights |= KAUTH_VNODE_WRITE_DATA;
+	if (nfsmask & NFS_ACE_ADD_FILE)
+		vfsrights |= KAUTH_VNODE_ADD_FILE;
+	if (nfsmask & NFS_ACE_APPEND_DATA)
+		vfsrights |= KAUTH_VNODE_APPEND_DATA;
+	if (nfsmask & NFS_ACE_ADD_SUBDIRECTORY)
+		vfsrights |= KAUTH_VNODE_ADD_SUBDIRECTORY;
+	if (nfsmask & NFS_ACE_READ_NAMED_ATTRS)
+		vfsrights |= KAUTH_VNODE_READ_EXTATTRIBUTES;
+	if (nfsmask & NFS_ACE_WRITE_NAMED_ATTRS)
+		vfsrights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES;
+	if (nfsmask & NFS_ACE_EXECUTE)
+		vfsrights |= KAUTH_VNODE_EXECUTE;
+	if (nfsmask & NFS_ACE_DELETE_CHILD)
+		vfsrights |= KAUTH_VNODE_DELETE_CHILD;
+	if (nfsmask & NFS_ACE_READ_ATTRIBUTES)
+		vfsrights |= KAUTH_VNODE_READ_ATTRIBUTES;
+	if (nfsmask & NFS_ACE_WRITE_ATTRIBUTES)
+		vfsrights |= KAUTH_VNODE_WRITE_ATTRIBUTES;
+	if (nfsmask & NFS_ACE_DELETE)
+		vfsrights |= KAUTH_VNODE_DELETE;
+	if (nfsmask & NFS_ACE_READ_ACL)
+		vfsrights |= KAUTH_VNODE_READ_SECURITY;
+	if (nfsmask & NFS_ACE_WRITE_ACL)
+		vfsrights |= KAUTH_VNODE_WRITE_SECURITY;
+	if (nfsmask & NFS_ACE_WRITE_OWNER)
+		vfsrights |= KAUTH_VNODE_CHANGE_OWNER;
+	if (nfsmask & NFS_ACE_SYNCHRONIZE)
+		vfsrights |= KAUTH_VNODE_SYNCHRONIZE;
+	if ((nfsmask & NFS_ACE_GENERIC_READ) == NFS_ACE_GENERIC_READ)
+		vfsrights |= KAUTH_ACE_GENERIC_READ;
+	if ((nfsmask & NFS_ACE_GENERIC_WRITE) == NFS_ACE_GENERIC_WRITE)
+		vfsrights |= KAUTH_ACE_GENERIC_WRITE;
+	if ((nfsmask & NFS_ACE_GENERIC_EXECUTE) == NFS_ACE_GENERIC_EXECUTE)
+		vfsrights |= KAUTH_ACE_GENERIC_EXECUTE;
+
+	return (vfsrights);
+}
+
+uint32_t
+nfs4_ace_vfsrights_to_nfsmask(uint32_t vfsrights)
+{
+	uint32_t nfsmask = 0;
+
+	if (vfsrights & KAUTH_VNODE_READ_DATA)
+		nfsmask |= NFS_ACE_READ_DATA;
+	if (vfsrights & KAUTH_VNODE_LIST_DIRECTORY)
+		nfsmask |= NFS_ACE_LIST_DIRECTORY;
+	if (vfsrights & KAUTH_VNODE_WRITE_DATA)
+		nfsmask |= NFS_ACE_WRITE_DATA;
+	if (vfsrights & KAUTH_VNODE_ADD_FILE)
+		nfsmask |= NFS_ACE_ADD_FILE;
+	if (vfsrights & KAUTH_VNODE_APPEND_DATA)
+		nfsmask |= NFS_ACE_APPEND_DATA;
+	if (vfsrights & KAUTH_VNODE_ADD_SUBDIRECTORY)
+		nfsmask |= NFS_ACE_ADD_SUBDIRECTORY;
+	if (vfsrights & KAUTH_VNODE_READ_EXTATTRIBUTES)
+		nfsmask |= NFS_ACE_READ_NAMED_ATTRS;
+	if (vfsrights & KAUTH_VNODE_WRITE_EXTATTRIBUTES)
+		nfsmask |= NFS_ACE_WRITE_NAMED_ATTRS;
+	if (vfsrights & KAUTH_VNODE_EXECUTE)
+		nfsmask |= NFS_ACE_EXECUTE;
+	if (vfsrights & KAUTH_VNODE_DELETE_CHILD)
+		nfsmask |= NFS_ACE_DELETE_CHILD;
+	if (vfsrights & KAUTH_VNODE_READ_ATTRIBUTES)
+		nfsmask |= NFS_ACE_READ_ATTRIBUTES;
+	if (vfsrights & KAUTH_VNODE_WRITE_ATTRIBUTES)
+		nfsmask |= NFS_ACE_WRITE_ATTRIBUTES;
+	if (vfsrights & KAUTH_VNODE_DELETE)
+		nfsmask |= NFS_ACE_DELETE;
+	if (vfsrights & KAUTH_VNODE_READ_SECURITY)
+		nfsmask |= NFS_ACE_READ_ACL;
+	if (vfsrights & KAUTH_VNODE_WRITE_SECURITY)
+		nfsmask |= NFS_ACE_WRITE_ACL;
+	if (vfsrights & KAUTH_VNODE_CHANGE_OWNER)
+		nfsmask |= NFS_ACE_WRITE_OWNER;
+	if (vfsrights & KAUTH_VNODE_SYNCHRONIZE)
+		nfsmask |= NFS_ACE_SYNCHRONIZE;
+	if (vfsrights & KAUTH_ACE_GENERIC_READ)
+		nfsmask |= NFS_ACE_GENERIC_READ;
+	if (vfsrights & KAUTH_ACE_GENERIC_WRITE)
+		nfsmask |= NFS_ACE_GENERIC_WRITE;
+	if (vfsrights & KAUTH_ACE_GENERIC_EXECUTE)
+		nfsmask |= NFS_ACE_GENERIC_EXECUTE;
+	if (vfsrights & KAUTH_ACE_GENERIC_ALL)
+		nfsmask |= (KAUTH_ACE_GENERIC_READ|KAUTH_ACE_GENERIC_WRITE|NFS_ACE_GENERIC_EXECUTE);
+
+	return (nfsmask);
+}
+
+/*
+ * Map an NFSv4 ID string to a VFS guid.
+ *
+ * Try to use the ID mapping service... but we may fallback to trying to do it ourselves.
+ */
+int
+nfs4_id2guid(/*const*/ char *id, guid_t *guidp, int isgroup)
+{
+	int error1 = 0, error = 0, compare;
+	guid_t guid1, guid2, *gp;
+	ntsid_t sid;
+	long num, unknown;
+	const char *p, *at;
+
+	*guidp = kauth_null_guid;
+	compare = ((nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE) &&
+		   (nfs_idmap_ctrl & NFS_IDMAP_CTRL_COMPARE_RESULTS));
+	unknown = (nfs_idmap_ctrl & NFS_IDMAP_CTRL_UNKNOWN_IS_99) ? 99 : -2;
+
+	/*
+	 * First check if it is just a simple numeric ID string or a special "XXX@" name.
+	 * If it's a number, there's no need trying to ask the IDMAP service to map it.
+	 * If it's a special "XXX@" name, we want to make sure to treat it as a group.
+	 */
+	num = 1;
+	at = NULL;
+	p = id;
+	while (*p) {
+		if ((*p < '0') || (*p > '9'))
+			num = 0;
+		if (*p == '@')
+			at = p;
+		p++;
+	}
+	if (at && !at[1] && !isgroup)
+		isgroup = 1;  /* special "XXX@" names should always be treated as groups */
+	if (num) {
+		/* must be numeric ID (or empty) */
+		num = *id ? strtol(id, NULL, 10) : unknown;
+		gp = guidp;
+		goto gotnumid;
+	}
+
+	if (nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE) {
+		/*
+		 * Ask the ID mapping service to map the ID string to a GUID.
+		 *
+		 * [sigh] this isn't a "pwnam/grnam" it's an NFS ID string!
+		 */
+		gp = compare ? &guid1 : guidp;
+		if (isgroup)
+			error = kauth_cred_grnam2guid(id, gp);
+		else
+			error = kauth_cred_pwnam2guid(id, gp);
+		if (error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS))
+			printf("nfs4_id2guid: idmap failed for %s %s error %d\n", id, isgroup ? "G" : " ", error);
+		if (!error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_SUCCESSFUL_MAPPINGS))
+			printf("nfs4_id2guid: idmap for %s %s got guid "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x\n",
+				id, isgroup ? "G" : " ",
+				gp->g_guid[0], gp->g_guid[1], gp->g_guid[2], gp->g_guid[3],
+				gp->g_guid[4], gp->g_guid[5], gp->g_guid[6], gp->g_guid[7],
+				gp->g_guid[8], gp->g_guid[9], gp->g_guid[10], gp->g_guid[11],
+				gp->g_guid[12], gp->g_guid[13], gp->g_guid[14], gp->g_guid[15]);
+		error1 = error;
+	}
+	if (error || compare || !(nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE)) {
+		/*
+		 * fallback path... see if we can come up with an answer ourselves.
+		 */
+		gp = compare ? &guid2 : guidp;
+
+		if (!(nfs_idmap_ctrl & NFS_IDMAP_CTRL_FALLBACK_NO_WELLKNOWN_IDS) && at && !at[1]) {
+			/* must be a special ACE "who" ID */
+			bzero(&sid, sizeof(sid));
+			sid.sid_kind = 1;
+			sid.sid_authcount = 1;
+			if (!strcmp(id, "OWNER@")) {
+				// S-1-3-0
+				sid.sid_authority[5] = 3;
+				sid.sid_authorities[0] = 0;
+			} else if (!strcmp(id, "GROUP@")) {
+				// S-1-3-1
+				sid.sid_authority[5] = 3;
+				sid.sid_authorities[0] = 1;
+			} else if (!strcmp(id, "EVERYONE@")) {
+				// S-1-1-0
+				sid.sid_authority[5] = 1;
+				sid.sid_authorities[0] = 0;
+			} else if (!strcmp(id, "INTERACTIVE@")) {
+				// S-1-5-4
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 4;
+			} else if (!strcmp(id, "NETWORK@")) {
+				// S-1-5-2
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 2;
+			} else if (!strcmp(id, "DIALUP@")) {
+				// S-1-5-1
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 1;
+			} else if (!strcmp(id, "BATCH@")) {
+				// S-1-5-3
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 3;
+			} else if (!strcmp(id, "ANONYMOUS@")) {
+				// S-1-5-7
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 7;
+			} else if (!strcmp(id, "AUTHENTICATED@")) {
+				// S-1-5-11
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 11;
+			} else if (!strcmp(id, "SERVICE@")) {
+				// S-1-5-6
+				sid.sid_authority[5] = 5;
+				sid.sid_authorities[0] = 6;
+			} else {
+				// S-1-0-0 "NOBODY"
+				sid.sid_authority[5] = 0;
+				sid.sid_authorities[0] = 0;
+			}
+			error = kauth_cred_ntsid2guid(&sid, gp);
+		} else {
+			if (!(nfs_idmap_ctrl & NFS_IDMAP_CTRL_FALLBACK_NO_COMMON_IDS) && at) {
+				/* must be user@domain */
+				/* try to identify some well-known IDs */
+				if (!strncmp(id, "root@", 5))
+					num = 0;
+				else if (!strncmp(id, "wheel@", 6))
+					num = 0;
+				else if (!strncmp(id, "nobody@", 7))
+					num = -2;
+				else if (!strncmp(id, "nfsnobody@", 10))
+					num = -2;
+				else
+					num = unknown;
+			} else if (!(nfs_idmap_ctrl & NFS_IDMAP_CTRL_FALLBACK_NO_COMMON_IDS) && !strcmp(id, "nobody")) {
+				num = -2;
+			} else {
+				num = unknown;
+			}
+gotnumid:
+			if (isgroup)
+				error = kauth_cred_gid2guid((gid_t)num, gp);
+			else
+				error = kauth_cred_uid2guid((uid_t)num, gp);
+		}
+		if (error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS))
+			printf("nfs4_id2guid: fallback map failed for %s %s error %d\n", id, isgroup ? "G" : " ", error);
+		if (!error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_SUCCESSFUL_MAPPINGS))
+			printf("nfs4_id2guid: fallback map for %s %s got guid "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x\n",
+				id, isgroup ? "G" : " ",
+				gp->g_guid[0], gp->g_guid[1], gp->g_guid[2], gp->g_guid[3],
+				gp->g_guid[4], gp->g_guid[5], gp->g_guid[6], gp->g_guid[7],
+				gp->g_guid[8], gp->g_guid[9], gp->g_guid[10], gp->g_guid[11],
+				gp->g_guid[12], gp->g_guid[13], gp->g_guid[14], gp->g_guid[15]);
+	}
+
+	if (compare) {
+		/* compare the results, log if different */
+		if (!error1 && !error) {
+			if (!kauth_guid_equal(&guid1, &guid2))
+				printf("nfs4_id2guid: idmap/fallback results differ for %s %s - "
+					"idmap %02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x "
+					"fallback %02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x\n",
+					id, isgroup ? "G" : " ",
+					guid1.g_guid[0], guid1.g_guid[1], guid1.g_guid[2], guid1.g_guid[3],
+					guid1.g_guid[4], guid1.g_guid[5], guid1.g_guid[6], guid1.g_guid[7],
+					guid1.g_guid[8], guid1.g_guid[9], guid1.g_guid[10], guid1.g_guid[11],
+					guid1.g_guid[12], guid1.g_guid[13], guid1.g_guid[14], guid1.g_guid[15],
+					guid2.g_guid[0], guid2.g_guid[1], guid2.g_guid[2], guid2.g_guid[3],
+					guid2.g_guid[4], guid2.g_guid[5], guid2.g_guid[6], guid2.g_guid[7],
+					guid2.g_guid[8], guid2.g_guid[9], guid2.g_guid[10], guid2.g_guid[11],
+					guid2.g_guid[12], guid2.g_guid[13], guid2.g_guid[14], guid2.g_guid[15]);
+			/* copy idmap result to output guid */
+			*guidp = guid1;
+		} else if (error1 && !error) {
+			printf("nfs4_id2guid: idmap/fallback results differ for %s %s - "
+				"idmap error %d "
+				"fallback %02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x\n",
+				id, isgroup ? "G" : " ",
+				error1,
+				guid2.g_guid[0], guid2.g_guid[1], guid2.g_guid[2], guid2.g_guid[3],
+				guid2.g_guid[4], guid2.g_guid[5], guid2.g_guid[6], guid2.g_guid[7],
+				guid2.g_guid[8], guid2.g_guid[9], guid2.g_guid[10], guid2.g_guid[11],
+				guid2.g_guid[12], guid2.g_guid[13], guid2.g_guid[14], guid2.g_guid[15]);
+			/* copy fallback result to output guid */
+			*guidp = guid2;
+		} else if (!error1 && error) {
+			printf("nfs4_id2guid: idmap/fallback results differ for %s %s - "
+				"idmap %02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x "
+				"fallback error %d\n",
+				id, isgroup ? "G" : " ",
+				guid1.g_guid[0], guid1.g_guid[1], guid1.g_guid[2], guid1.g_guid[3],
+				guid1.g_guid[4], guid1.g_guid[5], guid1.g_guid[6], guid1.g_guid[7],
+				guid1.g_guid[8], guid1.g_guid[9], guid1.g_guid[10], guid1.g_guid[11],
+				guid1.g_guid[12], guid1.g_guid[13], guid1.g_guid[14], guid1.g_guid[15],
+				error);
+			/* copy idmap result to output guid */
+			*guidp = guid1;
+			error = 0;
+		} else {
+			if (error1 != error)
+				printf("nfs4_id2guid: idmap/fallback results differ for %s %s - "
+					"idmap error %d fallback error %d\n",
+					id, isgroup ? "G" : " ", error1, error);
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Map a VFS guid to an NFSv4 ID string.
+ *
+ * Try to use the ID mapping service... but we may fallback to trying to do it ourselves.
+ */
+int
+nfs4_guid2id(guid_t *guidp, char *id, int *idlen, int isgroup)
+{
+	int error1 = 0, error = 0, compare;
+	int id1len, id2len, len;
+	char *id1buf, *id1;
+	char numbuf[32];
+	const char *id2 = NULL;
+
+	id1buf = id1 = NULL;
+	id1len = id2len = 0;
+	compare = ((nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE) &&
+		   (nfs_idmap_ctrl & NFS_IDMAP_CTRL_COMPARE_RESULTS));
+
+	if (nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE) {
+		/*
+		 * Ask the ID mapping service to map the GUID to an ID string.
+		 *
+		 * [sigh] this isn't a "pwnam" it's an NFS id string!
+		 */
+
+		/*
+		 * Stupid kauth_cred_guid2pwnam() function requires that the buffer
+		 * be at least MAXPATHLEN bytes long even though most if not all ID
+		 * strings will be much much shorter than that.
+		 */
+		if (compare || (*idlen < MAXPATHLEN)) {
+			MALLOC_ZONE(id1buf, char*, MAXPATHLEN, M_NAMEI, M_WAITOK);
+			if (!id1buf)
+				return (ENOMEM);
+			id1 = id1buf;
+			id1len = MAXPATHLEN;
+		} else {
+			id1 = id;
+			id1len = *idlen;
+		}
+
+		if (isgroup)
+			error = kauth_cred_guid2grnam(guidp, id1);
+		else
+			error = kauth_cred_guid2pwnam(guidp, id1);
+		if (error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS))
+			printf("nfs4_guid2id: idmap failed for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"error %d\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", error);
+		if (!error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_SUCCESSFUL_MAPPINGS))
+			printf("nfs4_guid2id: idmap for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"got ID %s\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", id1);
+		error1 = error;
+		if (!error) {
+			if (compare) {
+				id1len = strnlen(id1, id1len);
+			} else if (id1 == id1buf) {
+				/* copy idmap result to output buffer */
+				len = strlcpy(id, id1, *idlen);
+				if (len >= *idlen)
+					error = ENOSPC;
+				else
+					*idlen = len;
+			}
+		}
+	}
+	if (error || compare || !(nfs_idmap_ctrl & NFS_IDMAP_CTRL_USE_IDMAP_SERVICE)) {
+		/*
+		 * fallback path... see if we can come up with an answer ourselves.
+		 */
+		ntsid_t sid;
+		uid_t uid;
+
+		if (!(nfs_idmap_ctrl & NFS_IDMAP_CTRL_FALLBACK_NO_WELLKNOWN_IDS)) {
+			error = kauth_cred_guid2ntsid(guidp, &sid);
+			if (!error && (sid.sid_kind == 1) && (sid.sid_authcount == 1)) {
+				/* check if it's one of our well-known ACE WHO names */
+				if (sid.sid_authority[5] == 0) {
+					if (sid.sid_authorities[0] == 0) // S-1-0-0
+						id2 = "nobody@localdomain";
+				} else if (sid.sid_authority[5] == 1) {
+					if (sid.sid_authorities[0] == 0) // S-1-1-0
+						id2 = "EVERYONE@";
+				} else if (sid.sid_authority[5] == 3) {
+					if (sid.sid_authorities[0] == 0) // S-1-3-0
+						id2 = "OWNER@";
+					else if (sid.sid_authorities[0] == 1) // S-1-3-1
+						id2 = "GROUP@";
+				} else if (sid.sid_authority[5] == 5) {
+					if (sid.sid_authorities[0] == ntohl(1)) // S-1-5-1
+						id2 = "DIALUP@";
+					else if (sid.sid_authorities[0] == ntohl(2)) // S-1-5-2
+						id2 = "NETWORK@";
+					else if (sid.sid_authorities[0] == ntohl(3)) // S-1-5-3
+						id2 = "BATCH@";
+					else if (sid.sid_authorities[0] == ntohl(4)) // S-1-5-4
+						id2 = "INTERACTIVE@";
+					else if (sid.sid_authorities[0] == ntohl(6)) // S-1-5-6
+						id2 = "SERVICE@";
+					else if (sid.sid_authorities[0] == ntohl(7)) // S-1-5-7
+						id2 = "ANONYMOUS@";
+					else if (sid.sid_authorities[0] == ntohl(11)) // S-1-5-11
+						id2 = "AUTHENTICATED@";
+				}
+			}
+		}
+		if (!id2) {
+			/* OK, let's just try mapping it to a UID/GID */
+			if (isgroup)
+				error = kauth_cred_guid2gid(guidp, (gid_t*)&uid);
+			else
+				error = kauth_cred_guid2uid(guidp, &uid);
+			if (!error) {
+				if (!(nfs_idmap_ctrl & NFS_IDMAP_CTRL_FALLBACK_NO_COMMON_IDS)) {
+					/* map well known uid's to strings */
+					if (uid == 0)
+						id2 = isgroup ? "wheel@localdomain" : "root@localdomain";
+					else if (uid == (uid_t)-2)
+						id2 = "nobody@localdomain";
+				}
+				if (!id2) {
+					/* or just use a decimal number string. */
+					snprintf(numbuf, sizeof(numbuf), "%d", uid);
+					id2 = numbuf;
+				}
+			}
+		}
+		if (error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS))
+			printf("nfs4_guid2id: fallback map failed for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"error %d\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", error);
+		if (!error && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_SUCCESSFUL_MAPPINGS))
+			printf("nfs4_guid2id: fallback map for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"got ID %s\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", id2);
+		if (!error && id2) {
+			if (compare) {
+				id2len = strnlen(id2, MAXPATHLEN);
+			} else {
+				/* copy fallback result to output buffer */
+				len = strlcpy(id, id2, *idlen);
+				if (len >= *idlen)
+					error = ENOSPC;
+				else
+					*idlen = len;
+			}
+		}
+	}
+
+	if (compare) {
+		/* compare the results, log if different */
+		if (!error1 && !error) {
+			if ((id1len != id2len) || strncmp(id1, id2, id1len))
+				printf("nfs4_guid2id: idmap/fallback results differ for "
+					"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+					"idmap %s fallback %s\n",
+					guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+					guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+					guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+					guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+					isgroup ? "G" : " ", id1, id2);
+			if (id1 == id1buf) {
+				/* copy idmap result to output buffer */
+				len = strlcpy(id, id1, *idlen);
+				if (len >= *idlen)
+					error = ENOSPC;
+				else
+					*idlen = len;
+			}
+		} else if (error1 && !error) {
+			printf("nfs4_guid2id: idmap/fallback results differ for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"idmap error %d fallback %s\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", error1, id2);
+			/* copy fallback result to output buffer */
+			len = strlcpy(id, id2, *idlen);
+			if (len >= *idlen)
+				error = ENOSPC;
+			else
+				*idlen = len;
+		} else if (!error1 && error) {
+			printf("nfs4_guid2id: idmap/fallback results differ for "
+				"%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x_%02x%02x%02x%02x %s "
+				"idmap %s fallback error %d\n",
+				guidp->g_guid[0], guidp->g_guid[1], guidp->g_guid[2], guidp->g_guid[3],
+				guidp->g_guid[4], guidp->g_guid[5], guidp->g_guid[6], guidp->g_guid[7],
+				guidp->g_guid[8], guidp->g_guid[9], guidp->g_guid[10], guidp->g_guid[11],
+				guidp->g_guid[12], guidp->g_guid[13], guidp->g_guid[14], guidp->g_guid[15],
+				isgroup ? "G" : " ", id1, error);
+			if (id1 == id1buf) {
+				/* copy idmap result to output buffer */
+				len = strlcpy(id, id1, *idlen);
+				if (len >= *idlen)
+					error = ENOSPC;
+				else
+					*idlen = len;
+			}
+			error = 0;
+		} else {
+			if (error1 != error)
+				printf("nfs4_guid2id: idmap/fallback results differ for %s %s - "
+					"idmap error %d fallback error %d\n",
+					id, isgroup ? "G" : " ", error1, error);
+		}
+	}
+	if (id1buf)
+		FREE_ZONE(id1buf, MAXPATHLEN, M_NAMEI);
+	return (error);
+}
+
+
 /*
  * Set a vnode attr's supported bits according to the given bitmap
  */
@@ -403,11 +1461,10 @@ nfs_vattr_set_supported(uint32_t *bitmap, struct vnode_attr *vap)
 	// if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_CHANGE))
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_SIZE))
 		VATTR_SET_SUPPORTED(vap, va_data_size);
-	// if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_NAMED_ATTR))
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FSID))
 		VATTR_SET_SUPPORTED(vap, va_fsid);
-//	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL))
-//		VATTR_SET_SUPPORTED(vap, va_acl);
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL))
+		VATTR_SET_SUPPORTED(vap, va_acl);
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ARCHIVE))
 		VATTR_SET_SUPPORTED(vap, va_flags);
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FILEID))
@@ -419,10 +1476,14 @@ nfs_vattr_set_supported(uint32_t *bitmap, struct vnode_attr *vap)
 		VATTR_SET_SUPPORTED(vap, va_mode);
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_NUMLINKS))
 		VATTR_SET_SUPPORTED(vap, va_nlink);
-	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER))
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) {
 		VATTR_SET_SUPPORTED(vap, va_uid);
-	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP))
+		VATTR_SET_SUPPORTED(vap, va_uuuid);
+	}
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) {
 		VATTR_SET_SUPPORTED(vap, va_gid);
+		VATTR_SET_SUPPORTED(vap, va_guuid);
+	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_RAWDEV))
 		VATTR_SET_SUPPORTED(vap, va_rdev);
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_SPACE_USED))
@@ -450,15 +1511,20 @@ nfs4_parsefattr(
 	struct nfs_fsattr *nfsap,
 	struct nfs_vattr *nvap,
 	fhandle_t *fhp,
-	struct dqblk *dqbp)
+	struct dqblk *dqbp,
+	struct nfs_fs_locations *nfslsp)
 {
-	int error = 0, attrbytes;
-	uint32_t val, val2, val3, i, j;
-	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], len;
-	char *s;
+	int error = 0, error2, rderror = 0, attrbytes;
+	uint32_t val, val2, val3, i;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], len, slen;
+	char sbuf[64], *s;
 	struct nfs_fsattr nfsa_dummy;
 	struct nfs_vattr nva_dummy;
 	struct dqblk dqb_dummy;
+	kauth_acl_t acl = NULL;
+	uint32_t ace_type, ace_flags, ace_mask;
+	struct nfs_fs_locations nfsls_dummy;
+	struct sockaddr_storage ss;
 
 	/* if not interested in some values... throw 'em into a local dummy variable */
 	if (!nfsap)
@@ -467,8 +1533,14 @@ nfs4_parsefattr(
 		nvap = &nva_dummy;
 	if (!dqbp)
 		dqbp = &dqb_dummy;
+	if (!nfslsp)
+		nfslsp = &nfsls_dummy;
+	bzero(nfslsp, sizeof(*nfslsp));
 
 	attrbytes = val = val2 = val3 = 0;
+	s = sbuf;
+	slen = sizeof(sbuf);
+	NVATTR_INIT(nvap);
 
 	len = NFS_ATTR_BITMAP_LEN;
 	nfsm_chain_get_bitmap(error, nmc, bitmap, len);
@@ -489,17 +1561,19 @@ nfs4_parsefattr(
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_TYPE)) {
 		nfsm_chain_get_32(error, nmc, val);
 		nvap->nva_type = nfstov_type(val, NFS_VER4);
+		if ((val == NFATTRDIR) || (val == NFNAMEDATTR))
+			nvap->nva_flags |= NFS_FFLAG_IS_ATTR;
+		else
+			nvap->nva_flags &= ~NFS_FFLAG_IS_ATTR;
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FH_EXPIRE_TYPE)) {
 		nfsm_chain_get_32(error, nmc, val);
 		nfsmout_if(error);
-		if (val != NFS_FH_PERSISTENT)
-			printf("nfs: warning: non-persistent file handles!\n");
-		if (val & ~0xff)
-			printf("nfs: warning unknown fh type: 0x%x\n", val);
 		nfsap->nfsa_flags &= ~NFS_FSFLAG_FHTYPE_MASK;
 		nfsap->nfsa_flags |= val << NFS_FSFLAG_FHTYPE_SHIFT;
+		if (val & ~0xff)
+			printf("nfs: warning unknown fh type: 0x%x\n", val);
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_CHANGE)) {
@@ -529,9 +1603,9 @@ nfs4_parsefattr(
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_NAMED_ATTR)) {
 		nfsm_chain_get_32(error, nmc, val);
 		if (val)
-			nvap->nva_flags |= NFS_FFLAG_NAMED_ATTR;
+			nvap->nva_flags |= NFS_FFLAG_HAS_NAMED_ATTRS;
 		else
-			nvap->nva_flags &= ~NFS_FFLAG_NAMED_ATTR;
+			nvap->nva_flags &= ~NFS_FFLAG_HAS_NAMED_ATTRS;
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FSID)) {
@@ -552,26 +1626,79 @@ nfs4_parsefattr(
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_RDATTR_ERROR)) {
-		nfsm_chain_get_32(error, nmc, error);
+		nfsm_chain_get_32(error, nmc, rderror);
 		attrbytes -= NFSX_UNSIGNED;
-		nfsmout_if(error);
+		if (!rderror) { /* no error */
+			NFS_BITMAP_CLR(bitmap, NFS_FATTR_RDATTR_ERROR);
+			NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_RDATTR_ERROR);
+		}
 	}
-	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL)) { /* skip for now */
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL)) {
+		error2 = 0;
+		ace_type = ace_flags = ace_mask = 0;
 		nfsm_chain_get_32(error, nmc, val); /* ACE count */
+		if (!error && (val > KAUTH_ACL_MAX_ENTRIES))
+			error = EOVERFLOW;
+		if (!error && !((acl = kauth_acl_alloc(val))))
+			error = ENOMEM;
+		if (!error && acl) {
+			acl->acl_entrycount = val;
+			acl->acl_flags = 0;
+		}
+		attrbytes -= NFSX_UNSIGNED;
+		nfsm_assert(error, (attrbytes >= 0), EBADRPC);
 		for (i=0; !error && (i < val); i++) {
-			nfsm_chain_adv(error, nmc, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, nmc, val2); /* string length */
-			nfsm_chain_adv(error, nmc, nfsm_rndup(val2));
-			attrbytes -= 4*NFSX_UNSIGNED + nfsm_rndup(val2);
+			nfsm_chain_get_32(error, nmc, ace_type);
+			nfsm_chain_get_32(error, nmc, ace_flags);
+			nfsm_chain_get_32(error, nmc, ace_mask);
+			nfsm_chain_get_32(error, nmc, len);
+			acl->acl_ace[i].ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
+			acl->acl_ace[i].ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
+			acl->acl_ace[i].ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
+			if (!error && !error2 && (len >= slen)) {
+				if (s != sbuf) {
+					FREE(s, M_TEMP);
+					s = sbuf;
+					slen = sizeof(sbuf);
+				}
+				MALLOC(s, char*, len+16, M_TEMP, M_WAITOK);
+				if (s)
+					slen = len+16;
+				else
+					error2 = ENOMEM;
+			}
+			if (error2)
+				nfsm_chain_adv(error, nmc, nfsm_rndup(len));
+			else
+				nfsm_chain_get_opaque(error, nmc, len, s);
+			if (!error && !error2) {
+				s[len] = '\0';
+				error2 = nfs4_id2guid(s, &acl->acl_ace[i].ace_applicable,
+						(ace_flags & NFS_ACE_IDENTIFIER_GROUP));
+				if (error2 && (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS))
+					printf("nfs4_parsefattr: ACE WHO %s is no one, no guid?, error %d\n", s, error2);
+			}
+			attrbytes -= 4*NFSX_UNSIGNED + nfsm_rndup(len);
 			nfsm_assert(error, (attrbytes >= 0), EBADRPC);
 		}
+		nfsmout_if(error);
+		if ((nvap != &nva_dummy) && !error2) {
+			nvap->nva_acl = acl;
+			acl = NULL;
+		}
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACLSUPPORT)) {
+		/*
+		 * Support ACLs if: the server supports DENY/ALLOC ACEs and
+		 * (just to be safe) FATTR_ACL is in the supported list too.
+		 */
 		nfsm_chain_get_32(error, nmc, val);
-		if (val)
+		if ((val & (NFS_ACL_SUPPORT_ALLOW_ACL|NFS_ACL_SUPPORT_DENY_ACL)) &&
+		    NFS_BITMAP_ISSET(nfsap->nfsa_supp_attr, NFS_FATTR_ACL)) {
 			nfsap->nfsa_flags |= NFS_FSFLAG_ACL;
-		else
+		} else {
 			nfsap->nfsa_flags &= ~NFS_FSFLAG_ACL;
+		}
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ARCHIVE)) { /* SF_ARCHIVED */
@@ -640,23 +1767,151 @@ nfs4_parsefattr(
 		nfsm_chain_get_64(error, nmc, nfsap->nfsa_files_total);
 		attrbytes -= 2 * NFSX_UNSIGNED;
 	}
-	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FS_LOCATIONS)) { /* skip for now */
-		nfsm_chain_get_32(error, nmc, val); /* root path length */
-		nfsm_chain_adv(error, nmc, nfsm_rndup(val)); /* root path */
-		attrbytes -= (2 * NFSX_UNSIGNED) + nfsm_rndup(val);
-		nfsm_chain_get_32(error, nmc, val); /* location count */
-		for (i=0; !error && (i < val); i++) {
-			nfsm_chain_get_32(error, nmc, val2); /* server string length */
-			nfsm_chain_adv(error, nmc, nfsm_rndup(val2)); /* server string */
-			attrbytes -= (2 * NFSX_UNSIGNED) + nfsm_rndup(val2);
-			nfsm_chain_get_32(error, nmc, val2); /* pathname component count */
-			for (j=0; !error && (j < val2); j++) {
-				nfsm_chain_get_32(error, nmc, val3); /* component length */
-				nfsm_chain_adv(error, nmc, nfsm_rndup(val3)); /* component */
-				attrbytes -= NFSX_UNSIGNED + nfsm_rndup(val3);
-				nfsm_assert(error, (attrbytes >= 0), EBADRPC);
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_FS_LOCATIONS)) {
+		uint32_t loc, serv, comp;
+		struct nfs_fs_location *fsl;
+		struct nfs_fs_server *fss;
+		struct nfs_fs_path *fsp;
+
+		/* get root pathname */
+		fsp = &nfslsp->nl_root;
+		nfsm_chain_get_32(error, nmc, fsp->np_compcount); /* component count */
+		attrbytes -= NFSX_UNSIGNED;
+		/* sanity check component count */
+		if (!error && (fsp->np_compcount > MAXPATHLEN))
+			error = EBADRPC;
+		nfsmout_if(error);
+		if (fsp->np_compcount) {
+			MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsp->np_components)
+				error = ENOMEM;
+		}
+		for (comp = 0; comp < fsp->np_compcount; comp++) {
+			nfsm_chain_get_32(error, nmc, val); /* component length */
+			/* sanity check component length */
+			if (!error && (val == 0)) {
+				/*
+				 * Apparently some people think a path with zero components should
+				 * be encoded with one zero-length component.  So, just ignore any
+				 * zero length components.
+				 */
+				comp--;
+				fsp->np_compcount--;
+				if (fsp->np_compcount == 0) {
+					FREE(fsp->np_components, M_TEMP);
+					fsp->np_components = NULL;
+				}
+				attrbytes -= NFSX_UNSIGNED;
+				continue;
+			}
+			if (!error && ((val < 1) || (val > MAXPATHLEN)))
+				error = EBADRPC;
+			nfsmout_if(error);
+			MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsp->np_components[comp])
+				error = ENOMEM;
+			nfsmout_if(error);
+			nfsm_chain_get_opaque(error, nmc, val, fsp->np_components[comp]); /* component */
+			attrbytes -= NFSX_UNSIGNED + nfsm_rndup(val);
+		}
+		nfsm_chain_get_32(error, nmc, nfslsp->nl_numlocs); /* fs location count */
+		attrbytes -= NFSX_UNSIGNED;
+		/* sanity check location count */
+		if (!error && (nfslsp->nl_numlocs > 256))
+			error = EBADRPC;
+		nfsmout_if(error);
+		if (nfslsp->nl_numlocs > 0) {
+			MALLOC(nfslsp->nl_locations, struct nfs_fs_location **, nfslsp->nl_numlocs * sizeof(struct nfs_fs_location*), M_TEMP, M_WAITOK|M_ZERO);
+			if (!nfslsp->nl_locations)
+				error = ENOMEM;
+		}
+		nfsmout_if(error);
+		for (loc = 0; loc < nfslsp->nl_numlocs; loc++) {
+			nfsmout_if(error);
+			MALLOC(fsl, struct nfs_fs_location *, sizeof(struct nfs_fs_location), M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsl)
+				error = ENOMEM;
+			nfslsp->nl_locations[loc] = fsl;
+			nfsm_chain_get_32(error, nmc, fsl->nl_servcount); /* server count */
+			attrbytes -= NFSX_UNSIGNED;
+			/* sanity check server count */
+			if (!error && ((fsl->nl_servcount < 1) || (fsl->nl_servcount > 256)))
+				error = EBADRPC;
+			nfsmout_if(error);
+			MALLOC(fsl->nl_servers, struct nfs_fs_server **, fsl->nl_servcount * sizeof(struct nfs_fs_server*), M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsl->nl_servers)
+				error = ENOMEM;
+			for (serv = 0; serv < fsl->nl_servcount; serv++) {
+				nfsmout_if(error);
+				MALLOC(fss, struct nfs_fs_server *, sizeof(struct nfs_fs_server), M_TEMP, M_WAITOK|M_ZERO);
+				if (!fss)
+					error = ENOMEM;
+				fsl->nl_servers[serv] = fss;
+				nfsm_chain_get_32(error, nmc, val); /* server name length */
+				/* sanity check server name length */
+				if (!error && ((val < 1) || (val > MAXPATHLEN)))
+					error = EINVAL;
+				nfsmout_if(error);
+				MALLOC(fss->ns_name, char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+				if (!fss->ns_name)
+					error = ENOMEM;
+				nfsm_chain_get_opaque(error, nmc, val, fss->ns_name); /* server name */
+				attrbytes -= NFSX_UNSIGNED + nfsm_rndup(val);
+				nfsmout_if(error);
+				/* copy name to address if it converts to a sockaddr */
+				if (nfs_uaddr2sockaddr(fss->ns_name, (struct sockaddr*)&ss)) {
+					fss->ns_addrcount = 1;
+					MALLOC(fss->ns_addresses, char **, sizeof(char *), M_TEMP, M_WAITOK|M_ZERO);
+					if (!fss->ns_addresses)
+						error = ENOMEM;
+					nfsmout_if(error);
+					MALLOC(fss->ns_addresses[0], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+					if (!fss->ns_addresses[0])
+						error = ENOMEM;
+					nfsmout_if(error);
+					strlcpy(fss->ns_addresses[0], fss->ns_name, val+1);
+				}
+			}
+			/* get pathname */
+			fsp = &fsl->nl_path;
+			nfsm_chain_get_32(error, nmc, fsp->np_compcount); /* component count */
+			attrbytes -= NFSX_UNSIGNED;
+			/* sanity check component count */
+			if (!error && (fsp->np_compcount > MAXPATHLEN))
+				error = EINVAL;
+			nfsmout_if(error);
+			if (fsp->np_compcount) {
+				MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+				if (!fsp->np_components)
+					error = ENOMEM;
+			}
+			for (comp = 0; comp < fsp->np_compcount; comp++) {
+				nfsm_chain_get_32(error, nmc, val); /* component length */
+				/* sanity check component length */
+				if (!error && (val == 0)) {
+					/*
+					 * Apparently some people think a path with zero components should
+					 * be encoded with one zero-length component.  So, just ignore any
+					 * zero length components.
+					 */
+					comp--;
+					fsp->np_compcount--;
+					if (fsp->np_compcount == 0) {
+						FREE(fsp->np_components, M_TEMP);
+						fsp->np_components = NULL;
+					}
+					attrbytes -= NFSX_UNSIGNED;
+					continue;
+				}
+				if (!error && ((val < 1) || (val > MAXPATHLEN)))
+					error = EINVAL;
+				nfsmout_if(error);
+				MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+				if (!fsp->np_components[comp])
+					error = ENOMEM;
+				nfsm_chain_get_opaque(error, nmc, val, fsp->np_components[comp]); /* component */
+				attrbytes -= NFSX_UNSIGNED + nfsm_rndup(val);
 			}
-			nfsm_assert(error, (attrbytes >= 0), EBADRPC);
 		}
 		nfsm_assert(error, (attrbytes >= 0), EBADRPC);
 	}
@@ -724,34 +1979,68 @@ nfs4_parsefattr(
 		attrbytes -= NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) {
-		/* XXX Need ID mapping infrastructure - use ugly hack for now */
 		nfsm_chain_get_32(error, nmc, len);
-		nfsm_chain_get_opaque_pointer(error, nmc, len, s);
+		if (!error && (len >= slen)) {
+			if (s != sbuf) {
+				FREE(s, M_TEMP);
+				s = sbuf;
+				slen = sizeof(sbuf);
+			}
+			MALLOC(s, char*, len+16, M_TEMP, M_WAITOK);
+			if (s)
+				slen = len+16;
+			else
+				error = ENOMEM;
+		}
+		nfsm_chain_get_opaque(error, nmc, len, s);
+		if (!error) {
+			s[len] = '\0';
+			error = nfs4_id2guid(s, &nvap->nva_uuuid, 0);
+			if (!error)
+				error = kauth_cred_guid2uid(&nvap->nva_uuuid, &nvap->nva_uid);
+			if (error) {
+				/* unable to get either GUID or UID, set to default */
+				nvap->nva_uid = (uid_t)((nfs_idmap_ctrl & NFS_IDMAP_CTRL_UNKNOWN_IS_99) ? 99 : -2);
+				if (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS)
+					printf("nfs4_parsefattr: owner %s is no one, no %s?, error %d\n", s,
+						kauth_guid_equal(&nvap->nva_uuuid, &kauth_null_guid) ? "guid" : "uid",
+						error);
+				error = 0;
+			}
+		}
 		attrbytes -= NFSX_UNSIGNED + nfsm_rndup(len);
-		nfsmout_if(error);
-		if ((*s >= '0') && (*s <= '9'))
-			nvap->nva_uid = strtol(s, NULL, 10);
-		else if (!strncmp(s, "nobody@", 7))
-			nvap->nva_uid = -2;
-		else if (!strncmp(s, "root@", 5))
-			nvap->nva_uid = 0;
-		else
-			nvap->nva_uid = 99; /* unknown */
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) {
-		/* XXX Need ID mapping infrastructure - use ugly hack for now */
 		nfsm_chain_get_32(error, nmc, len);
-		nfsm_chain_get_opaque_pointer(error, nmc, len, s);
+		if (!error && (len >= slen)) {
+			if (s != sbuf) {
+				FREE(s, M_TEMP);
+				s = sbuf;
+				slen = sizeof(sbuf);
+			}
+			MALLOC(s, char*, len+16, M_TEMP, M_WAITOK);
+			if (s)
+				slen = len+16;
+			else
+				error = ENOMEM;
+		}
+		nfsm_chain_get_opaque(error, nmc, len, s);
+		if (!error) {
+			s[len] = '\0';
+			error = nfs4_id2guid(s, &nvap->nva_guuid, 1);
+			if (!error)
+				error = kauth_cred_guid2gid(&nvap->nva_guuid, &nvap->nva_gid);
+			if (error) {
+				/* unable to get either GUID or GID, set to default */
+				nvap->nva_gid = (gid_t)((nfs_idmap_ctrl & NFS_IDMAP_CTRL_UNKNOWN_IS_99) ? 99 : -2);
+				if (nfs_idmap_ctrl & NFS_IDMAP_CTRL_LOG_FAILED_MAPPINGS)
+					printf("nfs4_parsefattr: group %s is no one, no %s?, error %d\n", s,
+						kauth_guid_equal(&nvap->nva_guuid, &kauth_null_guid) ? "guid" : "gid",
+						error);
+				error = 0;
+			}
+		}
 		attrbytes -= NFSX_UNSIGNED + nfsm_rndup(len);
-		nfsmout_if(error);
-		if ((*s >= '0') && (*s <= '9'))
-			nvap->nva_gid = strtol(s, NULL, 10);
-		else if (!strncmp(s, "nobody@", 7))
-			nvap->nva_gid = -2;
-		else if (!strncmp(s, "root@", 5))
-			nvap->nva_uid = 0;
-		else
-			nvap->nva_gid = 99; /* unknown */
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_QUOTA_AVAIL_HARD)) {
 		nfsm_chain_get_64(error, nmc, dqbp->dqb_bhardlimit);
@@ -828,14 +2117,32 @@ nfs4_parsefattr(
 		nfsm_chain_adv(error, nmc, 4*NFSX_UNSIGNED); /* just skip it */
 		attrbytes -= 4 * NFSX_UNSIGNED;
 	}
-	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_MOUNTED_ON_FILEID)) { /* skip for now */
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_MOUNTED_ON_FILEID)) {
+#if CONFIG_TRIGGERS
+		/* we prefer the mounted on file ID, so just replace the fileid */
+		nfsm_chain_get_64(error, nmc, nvap->nva_fileid);
+#else
 		nfsm_chain_adv(error, nmc, 2*NFSX_UNSIGNED);
+#endif
 		attrbytes -= 2 * NFSX_UNSIGNED;
 	}
 	/* advance over any leftover attrbytes */
 	nfsm_assert(error, (attrbytes >= 0), EBADRPC);
 	nfsm_chain_adv(error, nmc, nfsm_rndup(attrbytes));
 nfsmout:
+	if (error)
+		nfs_fs_locations_cleanup(nfslsp);
+	if (!error && rderror)
+		error = rderror;
+	/* free up temporary resources */
+	if (s && (s != sbuf))
+		FREE(s, M_TEMP);
+	if (acl)
+		kauth_acl_free(acl);
+	if (error && nvap->nva_acl) {
+		kauth_acl_free(nvap->nva_acl);
+		nvap->nva_acl = NULL;
+	}
 	return (error);
 }
 
@@ -845,51 +2152,18 @@ nfsmout:
 int
 nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct nfsmount *nmp)
 {
-	int error = 0, attrbytes, slen, i;
-	uint32_t *pattrbytes;
+	int error = 0, attrbytes, slen, len, i, isgroup;
+	uint32_t *pattrbytes, val, acecount;;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
-	char s[32];
+	char sbuf[64], *s;
+	kauth_acl_t acl;
+	gid_t gid;
 
-	/*
-	 * Do this in two passes.
-	 * First calculate the bitmap, then pack
-	 * everything together and set the size.
-	 */
+	s = sbuf;
+	slen = sizeof(sbuf);
 
-	NFS_CLEAR_ATTRIBUTES(bitmap);
-	if (VATTR_IS_ACTIVE(vap, va_data_size))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_SIZE);
-	if (VATTR_IS_ACTIVE(vap, va_acl)) {
-		// NFS_BITMAP_SET(bitmap, NFS_FATTR_ACL)
-	}
-	if (VATTR_IS_ACTIVE(vap, va_flags)) {
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_ARCHIVE);
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_HIDDEN);
-	}
-	// NFS_BITMAP_SET(bitmap, NFS_FATTR_MIMETYPE)
-	if (VATTR_IS_ACTIVE(vap, va_mode))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_MODE);
-	if (VATTR_IS_ACTIVE(vap, va_uid))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_OWNER);
-	if (VATTR_IS_ACTIVE(vap, va_gid))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_OWNER_GROUP);
-	// NFS_BITMAP_SET(bitmap, NFS_FATTR_SYSTEM)
-	if (vap->va_vaflags & VA_UTIMES_NULL) {
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_ACCESS_SET);
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_MODIFY_SET);
-	} else {
-		if (VATTR_IS_ACTIVE(vap, va_access_time))
-			NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_ACCESS_SET);
-		if (VATTR_IS_ACTIVE(vap, va_modify_time))
-			NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_MODIFY_SET);
-	}
-	if (VATTR_IS_ACTIVE(vap, va_backup_time))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_BACKUP);
-	if (VATTR_IS_ACTIVE(vap, va_create_time))
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_TIME_CREATE);
-	/* and limit to what is supported by server */
-	for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
-		bitmap[i] &= nmp->nm_fsattr.nfsa_supp_attr[i];
+	/* First calculate the bitmap... */
+	nfs_vattr_set_bitmap(nmp, bitmap, vap);
 
 	/*
 	 * Now pack it all together:
@@ -905,7 +2179,43 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n
 		nfsm_chain_add_64(error, nmc, vap->va_data_size);
 		attrbytes += 2*NFSX_UNSIGNED;
 	}
-	// NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL)
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL)) {
+		acl = vap->va_acl;
+		if (!acl || (acl->acl_entrycount == KAUTH_FILESEC_NOACL))
+			acecount = 0;
+		else
+			acecount = acl->acl_entrycount;
+		nfsm_chain_add_32(error, nmc, acecount);
+		attrbytes += NFSX_UNSIGNED;
+		for (i=0; !error && (i < (int)acecount); i++) {
+			val = (acl->acl_ace[i].ace_flags & KAUTH_ACE_KINDMASK);
+			val = nfs4_ace_vfstype_to_nfstype(val, &error);
+			nfsm_chain_add_32(error, nmc, val);
+			val = nfs4_ace_vfsflags_to_nfsflags(acl->acl_ace[i].ace_flags);
+			nfsm_chain_add_32(error, nmc, val);
+			val = nfs4_ace_vfsrights_to_nfsmask(acl->acl_ace[i].ace_rights);
+			nfsm_chain_add_32(error, nmc, val);
+			len = slen;
+			isgroup = (kauth_cred_guid2gid(&acl->acl_ace[i].ace_applicable, &gid) == 0);
+			error = nfs4_guid2id(&acl->acl_ace[i].ace_applicable, s, &len, isgroup);
+			if (error == ENOSPC) {
+				if (s != sbuf) {
+					FREE(s, M_TEMP);
+					s = sbuf;
+				}
+				len += 8;
+				MALLOC(s, char*, len, M_TEMP, M_WAITOK);
+				if (s) {
+					slen = len;
+					error = nfs4_guid2id(&acl->acl_ace[i].ace_applicable, s, &len, isgroup);
+				} else {
+					error = ENOMEM;
+				}
+			}
+			nfsm_chain_add_name(error, nmc, s, len, nmp);
+			attrbytes += 4*NFSX_UNSIGNED + nfsm_rndup(len);
+		}
+	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ARCHIVE)) {
 		nfsm_chain_add_32(error, nmc, (vap->va_flags & SF_ARCHIVED) ? 1 : 0);
 		attrbytes += NFSX_UNSIGNED;
@@ -920,26 +2230,56 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n
 		attrbytes += NFSX_UNSIGNED;
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) {
-		/* XXX Need ID mapping infrastructure - use ugly hack for now */
-		if (vap->va_uid == 0)
-			slen = snprintf(s, sizeof(s), "root@localdomain");
-		else if (vap->va_uid == (uid_t)-2)
-			slen = snprintf(s, sizeof(s), "nobody@localdomain");
-		else
-			slen = snprintf(s, sizeof(s), "%d", vap->va_uid);
-		nfsm_chain_add_string(error, nmc, s, slen);
-		attrbytes += NFSX_UNSIGNED + nfsm_rndup(slen);
+		nfsmout_if(error);
+		/* if we have va_uuuid use it, otherwise use uid */
+		if (!VATTR_IS_ACTIVE(vap, va_uuuid)) {
+			error = kauth_cred_uid2guid(vap->va_uid, &vap->va_uuuid);
+			nfsmout_if(error);
+		}
+		len = slen;
+		error = nfs4_guid2id(&vap->va_uuuid, s, &len, 0);
+		if (error == ENOSPC) {
+			if (s != sbuf) {
+				FREE(s, M_TEMP);
+				s = sbuf;
+			}
+			len += 8;
+			MALLOC(s, char*, len, M_TEMP, M_WAITOK);
+			if (s) {
+				slen = len;
+				error = nfs4_guid2id(&vap->va_uuuid, s, &len, 0);
+			} else {
+				error = ENOMEM;
+			}
+		}
+		nfsm_chain_add_name(error, nmc, s, len, nmp);
+		attrbytes += NFSX_UNSIGNED + nfsm_rndup(len);
 	}
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) {
-		/* XXX Need ID mapping infrastructure - use ugly hack for now */
-		if (vap->va_gid == 0)
-			slen = snprintf(s, sizeof(s), "root@localdomain");
-		else if (vap->va_gid == (gid_t)-2)
-			slen = snprintf(s, sizeof(s), "nobody@localdomain");
-		else
-			slen = snprintf(s, sizeof(s), "%d", vap->va_gid);
-		nfsm_chain_add_string(error, nmc, s, slen);
-		attrbytes += NFSX_UNSIGNED + nfsm_rndup(slen);
+		nfsmout_if(error);
+		/* if we have va_guuid use it, otherwise use gid */
+		if (!VATTR_IS_ACTIVE(vap, va_guuid)) {
+			error = kauth_cred_gid2guid(vap->va_gid, &vap->va_guuid);
+			nfsmout_if(error);
+		}
+		len = slen;
+		error = nfs4_guid2id(&vap->va_guuid, s, &len, 1);
+		if (error == ENOSPC) {
+			if (s != sbuf) {
+				FREE(s, M_TEMP);
+				s = sbuf;
+			}
+			len += 8;
+			MALLOC(s, char*, len, M_TEMP, M_WAITOK);
+			if (s) {
+				slen = len;
+				error = nfs4_guid2id(&vap->va_guuid, s, &len, 1);
+			} else {
+				error = ENOMEM;
+			}
+		}
+		nfsm_chain_add_name(error, nmc, s, len, nmp);
+		attrbytes += NFSX_UNSIGNED + nfsm_rndup(len);
 	}
 	// NFS_BITMAP_SET(bitmap, NFS_FATTR_SYSTEM)
 	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_TIME_ACCESS_SET)) {
@@ -978,16 +2318,100 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n
 	/* Now, set the attribute data length */
 	*pattrbytes = txdr_unsigned(attrbytes);
 nfsmout:
+	if (s && (s != sbuf))
+		FREE(s, M_TEMP);
 	return (error);
 }
 
+/*
+ * Got the given error and need to start recovery (if not already started).
+ * Note: nmp must be locked!
+ */
+void
+nfs_need_recover(struct nfsmount *nmp, int error)
+{
+	int wake = !(nmp->nm_state & NFSSTA_RECOVER);
+
+	nmp->nm_state |= NFSSTA_RECOVER;
+	if ((error == NFSERR_ADMIN_REVOKED) ||
+	    (error == NFSERR_EXPIRED) ||
+	    (error == NFSERR_STALE_CLIENTID))
+		nmp->nm_state |= NFSSTA_RECOVER_EXPIRED;
+	if (wake)
+		nfs_mount_sock_thread_wake(nmp);
+}
+
+/*
+ * After recovery due to state expiry, check each node and
+ * drop any lingering delegation we thought we had.
+ *
+ * If a node has an open that is not lost and is not marked
+ * for reopen, then we hold onto any delegation because it is
+ * likely newly-granted.
+ */
+static void
+nfs4_expired_check_delegation(nfsnode_t np, vfs_context_t ctx)
+{
+	struct nfsmount *nmp = NFSTONMP(np);
+	struct nfs_open_file *nofp;
+	int drop = 1;
+
+	if ((np->n_flag & NREVOKE) || !(np->n_openflags & N_DELEG_MASK))
+		return;
+
+	lck_mtx_lock(&np->n_openlock);
+
+	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
+		if (!nofp->nof_opencnt)
+			continue;
+		if (nofp->nof_flags & NFS_OPEN_FILE_LOST)
+			continue;
+		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
+			continue;
+		/* we have an open that is not lost and not marked for reopen */
+		// XXX print out what's keeping this node from dropping the delegation.
+		NP(nofp->nof_np, "nfs4_expired_check_delegation: !drop: opencnt %d flags 0x%x access %d %d mmap %d %d",
+			nofp->nof_opencnt, nofp->nof_flags,
+			nofp->nof_access, nofp->nof_deny,
+			nofp->nof_mmap_access, nofp->nof_mmap_deny);
+		drop = 0;
+		break;
+	}
+
+	if (drop) {
+		/* need to drop a delegation */
+		if (np->n_dreturn.tqe_next != NFSNOLIST) {
+			/* remove this node from the delegation return list */
+			lck_mtx_lock(&nmp->nm_lock);
+			if (np->n_dreturn.tqe_next != NFSNOLIST) {
+				TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
+				np->n_dreturn.tqe_next = NFSNOLIST;
+			}
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
+		if (np->n_openflags & N_DELEG_MASK) {
+			np->n_openflags &= ~N_DELEG_MASK;
+			lck_mtx_lock(&nmp->nm_lock);
+			if (np->n_dlink.tqe_next != NFSNOLIST) {
+				TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
+				np->n_dlink.tqe_next = NFSNOLIST;
+			}
+			lck_mtx_unlock(&nmp->nm_lock);
+			nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid,
+				0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+		}
+	}
+
+	lck_mtx_unlock(&np->n_openlock);
+}
+
 /*
  * Recover state for an NFS mount.
  *
  * Iterates over all open files, reclaiming opens and lock state.
  */
 void
-nfs4_recover(struct nfsmount *nmp)
+nfs_recover(struct nfsmount *nmp)
 {
 	struct timespec ts = { 1, 0 };
 	int error, lost, reopen;
@@ -996,6 +2420,8 @@ nfs4_recover(struct nfsmount *nmp)
 	struct nfs_file_lock *nflp, *nextnflp;
 	struct nfs_lock_owner *nlop;
 	thread_t thd = current_thread();
+	nfsnode_t np, nextnp;
+	struct timeval now;
 
 restart:
 	error = 0;
@@ -1020,25 +2446,36 @@ restart:
 	} while (nmp->nm_stateinuse);
 	if (error) {
 		if (error == EPIPE)
-			printf("nfs recovery reconnecting\n");
+			printf("nfs recovery reconnecting for %s, 0x%x\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
 		else
-			printf("nfs recovery aborted\n");
+			printf("nfs recovery aborted for %s, 0x%x\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
 		lck_mtx_unlock(&nmp->nm_lock);
 		return;
 	}
 
-	printf("nfs recovery started\n");
+	microuptime(&now);
+	if (now.tv_sec == nmp->nm_recover_start) {
+		printf("nfs recovery throttled for %s, 0x%x\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
+		lck_mtx_unlock(&nmp->nm_lock);
+		tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", hz);
+		goto restart;
+	}
+	nmp->nm_recover_start = now.tv_sec;
 	if (++nmp->nm_stategenid == 0)
 		++nmp->nm_stategenid;
+	printf("nfs recovery started for %s, 0x%x\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
 	lck_mtx_unlock(&nmp->nm_lock);
 
 	/* for each open owner... */
 	TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) {
 		/* for each of its opens... */
 		TAILQ_FOREACH(nofp, &noop->noo_opens, nof_oolink) {
-			if (!nofp->nof_access || (nofp->nof_flags & NFS_OPEN_FILE_LOST))
+			if (!nofp->nof_access || (nofp->nof_flags & NFS_OPEN_FILE_LOST) || (nofp->nof_np->n_flag & NREVOKE))
 				continue;
 			lost = reopen = 0;
+			/* for NFSv2/v3, just skip straight to lock reclaim */
+			if (nmp->nm_vers < NFS_VER4)
+				goto reclaim_locks;
 			if (nofp->nof_rw_drw)
 				error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_BOTH);
 			if (!error && nofp->nof_w_drw)
@@ -1056,45 +2493,80 @@ restart:
 			 */
 			if (!error && nofp->nof_rw) {
 				error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
-				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE))
-					reopen = 1;
+				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) {
+					reopen = error;
+					error = 0;
+				}
 			}
-			if (!error && nofp->nof_w) {
+			if (!error && !reopen && nofp->nof_w) {
 				error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE);
-				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE))
-					reopen = 1;
+				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) {
+					reopen = error;
+					error = 0;
+				}
 			}
-			if (!error && nofp->nof_r) {
+			if (!error && !reopen && nofp->nof_r) {
 				error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE);
-				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE))
-					reopen = 1;
+				if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) {
+					reopen = error;
+					error = 0;
+				}
 			}
 
-			if (error) {
+			/*
+			 * If we hold delegated state but we don't have any non-delegated opens,
+			 * then we should attempt to claim that state now (but don't return the
+			 * delegation unless asked to).
+			 */
+			if ((nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
+				    nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
+				    nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r) &&
+				    (!nofp->nof_rw_drw && !nofp->nof_w_drw && !nofp->nof_r_drw &&
+				     !nofp->nof_rw_dw && !nofp->nof_w_dw && !nofp->nof_r_dw &&
+				     !nofp->nof_rw && !nofp->nof_w && !nofp->nof_r)) {
+				if (!error && !nfs_open_state_set_busy(nofp->nof_np, NULL)) {
+					error = nfs4_claim_delegated_state_for_node(nofp->nof_np, R_RECOVER);
+					if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN))
+						reopen = EAGAIN;
+					nfs_open_state_clear_busy(nofp->nof_np);
+					/* if claim didn't go well, we may need to return delegation now */
+					if (nofp->nof_np->n_openflags & N_DELEG_RETURN) {
+						nfs4_delegation_return(nofp->nof_np, R_RECOVER, thd, noop->noo_cred);
+						if (!(nmp->nm_sockflags & NMSOCK_READY))
+							error = ETIMEDOUT;  /* looks like we need a reconnect */
+					}
+				}
+			}
+
+			/*
+			 * Handle any issue claiming open state.
+			 * Potential reopens need to first confirm that there are no locks.
+			 */
+			if (error || reopen) {
 				/* restart recovery? */
 				if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) {
 					if (error == ETIMEDOUT)
 						nfs_need_reconnect(nmp);
 					tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0);
-					printf("nfs recovery restarting %d\n", error);
+					printf("nfs recovery restarting for %s, 0x%x, error %d\n",
+						vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
 					goto restart;
 				}
-				if (reopen && (nfs4_check_for_locks(noop, nofp) == 0)) {
+				if (reopen && (nfs_check_for_locks(noop, nofp) == 0)) {
 					/* just reopen the file on next access */
-					const char *vname = vnode_getname(NFSTOV(nofp->nof_np));
-					printf("nfs4_recover: %d, need reopen for %s\n", error, vname ? vname : "???");
-					vnode_putname(vname);
+					NP(nofp->nof_np, "nfs_recover: %d, need reopen for %d %p 0x%x", reopen,
+						kauth_cred_getuid(noop->noo_cred), nofp->nof_np, nofp->nof_np->n_flag);
 					lck_mtx_lock(&nofp->nof_lock);
 					nofp->nof_flags |= NFS_OPEN_FILE_REOPEN;
 					lck_mtx_unlock(&nofp->nof_lock);
-					error = 0;
 				} else {
 					/* open file state lost */
+					if (reopen)
+						NP(nofp->nof_np, "nfs_recover: %d, can't reopen because of locks %d %p", reopen,
+							kauth_cred_getuid(noop->noo_cred), nofp->nof_np);
 					lost = 1;
 					error = 0;
-					lck_mtx_lock(&nofp->nof_lock);
-					nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
-					lck_mtx_unlock(&nofp->nof_lock);
+					reopen = 0;
 				}
 			} else {
 				/* no error, so make sure the reopen flag isn't set */
@@ -1102,83 +2574,97 @@ restart:
 				nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
 				lck_mtx_unlock(&nofp->nof_lock);
 			}
+
 			/*
 			 * Scan this node's lock owner list for entries with this open owner,
 			 * then walk the lock owner's held lock list recovering each lock.
 			 */
-rescanlocks:
+reclaim_locks:
 			TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
+				if (lost || reopen)
+					break;
 				if (nlop->nlo_open_owner != noop)
 					continue;
 				TAILQ_FOREACH_SAFE(nflp, &nlop->nlo_locks, nfl_lolink, nextnflp) {
+					/* skip dead & blocked lock requests (shouldn't be any in the held lock list) */
 					if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
 						continue;
-					if (!lost) {
-						error = nfs4_lock_rpc(nofp->nof_np, nofp, nflp, 1, thd, noop->noo_cred);
-						if (!error)
-							continue;
-						/* restart recovery? */
-						if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) {
-							if (error == ETIMEDOUT)
-								nfs_need_reconnect(nmp);
-							tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0);
-							printf("nfs recovery restarting %d\n", error);
-							goto restart;
-						}
-						/* lock state lost - attempt to close file */ 
-						lost = 1;
-						error = nfs4_close_rpc(nofp->nof_np, nofp, NULL, noop->noo_cred, R_RECOVER);
-						if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) {
-							if (error == ETIMEDOUT)
-								nfs_need_reconnect(nmp);
-							tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0);
-							printf("nfs recovery restarting %d\n", error);
-							goto restart;
-						}
-						error = 0;
-						/* rescan locks so we can drop them all */
-						goto rescanlocks;
-					}
-					if (lost) {
-						/* kill/remove the lock */
-						lck_mtx_lock(&nofp->nof_np->n_openlock);
-						nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
-						lck_mtx_lock(&nlop->nlo_lock);
-						nextnflp = TAILQ_NEXT(nflp, nfl_lolink);
-						TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink);
-						lck_mtx_unlock(&nlop->nlo_lock);
-						if (nflp->nfl_blockcnt) {
-							/* wake up anyone blocked on this lock */
-							wakeup(nflp);
-						} else {
-							/* remove nflp from lock list and destroy */
-							TAILQ_REMOVE(&nofp->nof_np->n_locks, nflp, nfl_link);
-							nfs_file_lock_destroy(nflp);
-						}
-						lck_mtx_unlock(&nofp->nof_np->n_openlock);
+					/* skip delegated locks */
+					if (nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)
+						continue;
+					error = nmp->nm_funcs->nf_setlock_rpc(nofp->nof_np, nofp, nflp, 1, R_RECOVER, thd, noop->noo_cred);
+					if (error)
+						NP(nofp->nof_np, "nfs: lock reclaim (0x%llx, 0x%llx) %s %d",
+							nflp->nfl_start, nflp->nfl_end,
+							error ? "failed" : "succeeded", error);
+					if (!error)
+						continue;
+					/* restart recovery? */
+					if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) {
+						if (error == ETIMEDOUT)
+							nfs_need_reconnect(nmp);
+						tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0);
+						printf("nfs recovery restarting for %s, 0x%x, error %d\n",
+							vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
+						goto restart;
 					}
+					/* lock state lost - attempt to close file */ 
+					lost = 1;
+					error = 0;
+					break;
 				}
 			}
+
+			/*
+			 * If we've determined that we need to reopen the file then we probably
+			 * didn't receive any delegation we think we hold.  We should attempt to
+			 * return that delegation (and claim any delegated state).
+			 *
+			 * If we hold a delegation that is marked for return, then we should
+			 * return it now.
+			 */
+			if ((nofp->nof_np->n_openflags & N_DELEG_RETURN) ||
+			    (reopen && (nofp->nof_np->n_openflags & N_DELEG_MASK))) {
+				nfs4_delegation_return(nofp->nof_np, R_RECOVER, thd, noop->noo_cred);
+				if (!(nmp->nm_sockflags & NMSOCK_READY)) {
+					/* looks like we need a reconnect */
+					tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0);
+					printf("nfs recovery restarting for %s, 0x%x, error %d\n",
+						vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
+					goto restart;
+				}
+			}
+
 			if (lost) {
 				/* revoke open file state */
-				lck_mtx_lock(&nofp->nof_lock);
-				nofp->nof_flags |= NFS_OPEN_FILE_LOST;
-				lck_mtx_unlock(&nofp->nof_lock);
-				const char *vname = vnode_getname(NFSTOV(nofp->nof_np));
-				printf("nfs4_recover: state lost for %s\n", vname ? vname : "???");
-				vnode_putname(vname);
+				NP(nofp->nof_np, "nfs_recover: state lost for %d %p 0x%x",
+					kauth_cred_getuid(noop->noo_cred), nofp->nof_np, nofp->nof_np->n_flag);
+				nfs_revoke_open_state_for_node(nofp->nof_np);
 			}
 		}
 	}
 
 	if (!error) {
+		/* If state expired, make sure we're not holding onto any stale delegations */
 		lck_mtx_lock(&nmp->nm_lock);
-		nmp->nm_state &= ~NFSSTA_RECOVER;
+		if ((nmp->nm_vers >= NFS_VER4) && (nmp->nm_state & NFSSTA_RECOVER_EXPIRED)) {
+recheckdeleg:
+			TAILQ_FOREACH_SAFE(np, &nmp->nm_delegations, n_dlink, nextnp) {
+				lck_mtx_unlock(&nmp->nm_lock);
+				nfs4_expired_check_delegation(np, vfs_context_kernel());
+				lck_mtx_lock(&nmp->nm_lock);
+				if (nextnp == NFSNOLIST)
+					goto recheckdeleg;
+			}
+		}
+		nmp->nm_state &= ~(NFSSTA_RECOVER|NFSSTA_RECOVER_EXPIRED);
 		wakeup(&nmp->nm_state);
-		printf("nfs recovery completed\n");
+		printf("nfs recovery completed for %s, 0x%x\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
 		lck_mtx_unlock(&nmp->nm_lock);
 	} else {
-		printf("nfs recovery failed %d\n", error);
+		printf("nfs recovery failed for %s, 0x%x, error %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error);
 	}
 }
 
diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c
index ffd12d88f..ca874aa7c 100644
--- a/bsd/nfs/nfs4_vnops.c
+++ b/bsd/nfs/nfs4_vnops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -46,7 +46,9 @@
 #include <sys/ubc_internal.h>
 #include <sys/attr.h>
 #include <sys/signalvar.h>
-#include <sys/uio.h>
+#include <sys/uio_internal.h>
+#include <sys/xattr.h>
+#include <sys/paths.h>
 
 #include <vfs/vfs_support.h>
 
@@ -78,17 +80,22 @@
 #include <kern/sched_prim.h>
 
 int
-nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
+nfs4_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx)
 {
 	int error = 0, lockerror = ENOENT, status, numops, slot;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
 	struct timeval now;
-	uint32_t access = 0, supported = 0, missing;
+	uint32_t access_result = 0, supported = 0, missing;
 	struct nfsmount *nmp = NFSTONMP(np);
 	int nfsvers = nmp->nm_vers;
 	uid_t uid;
+	struct nfsreq_secinfo_args si;
 
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (0);
+
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -101,15 +108,14 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_ACCESS);
-	nfsm_chain_add_32(error, &nmreq, *mode);
+	nfsm_chain_add_32(error, &nmreq, *access);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -118,9 +124,9 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_ACCESS);
 	nfsm_chain_get_32(error, &nmrep, supported);
-	nfsm_chain_get_32(error, &nmrep, access);
+	nfsm_chain_get_32(error, &nmrep, access_result);
 	nfsmout_if(error);
-	if ((missing = (*mode & ~supported))) {
+	if ((missing = (*access & ~supported))) {
 		/* missing support for something(s) we wanted */
 		if (missing & NFS_ACCESS_DELETE) {
 			/*
@@ -129,25 +135,35 @@ nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 			 * and just let any subsequent delete action fail
 			 * if it really isn't deletable.
 			 */
-			access |= NFS_ACCESS_DELETE;
+			access_result |= NFS_ACCESS_DELETE;
 		}
 	}
+	/* ".zfs" subdirectories may erroneously give a denied answer for modify/delete */
+	if (nfs_access_dotzfs) {
+		vnode_t dvp = NULLVP;
+		if (np->n_flag & NISDOTZFSCHILD) /* may be able to create/delete snapshot dirs */
+			access_result |= (NFS_ACCESS_MODIFY|NFS_ACCESS_EXTEND|NFS_ACCESS_DELETE);
+		else if (((dvp = vnode_getparent(NFSTOV(np))) != NULLVP) && (VTONFS(dvp)->n_flag & NISDOTZFSCHILD))
+			access_result |= NFS_ACCESS_DELETE; /* may be able to delete snapshot dirs */
+		if (dvp != NULLVP)
+			vnode_put(dvp);
+	}
 	/* Some servers report DELETE support but erroneously give a denied answer. */
-	if ((*mode & NFS_ACCESS_DELETE) && nfs_access_delete && !(access & NFS_ACCESS_DELETE))
-		access |= NFS_ACCESS_DELETE;
+	if (nfs_access_delete && (*access & NFS_ACCESS_DELETE) && !(access_result & NFS_ACCESS_DELETE))
+		access_result |= NFS_ACCESS_DELETE;
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	nfsmout_if(error);
 
 	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
-	slot = nfs_node_mode_slot(np, uid, 1);
-	np->n_modeuid[slot] = uid;
+	slot = nfs_node_access_slot(np, uid, 1);
+	np->n_accessuid[slot] = uid;
 	microuptime(&now);
-	np->n_modestamp[slot] = now.tv_sec;
-	np->n_mode[slot] = access;
+	np->n_accessstamp[slot] = now.tv_sec;
+	np->n_access[slot] = access_result;
 
-	/* pass back the mode returned with this request */
-	*mode = np->n_mode[slot];
+	/* pass back the access returned with this request */
+	*access = np->n_access[slot];
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -162,18 +178,31 @@ nfs4_getattr_rpc(
 	mount_t mp,
 	u_char *fhp,
 	size_t fhsize,
+	int flags,
 	vfs_context_t ctx,
 	struct nfs_vattr *nvap,
 	u_int64_t *xidp)
 {
 	struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
-	int error = 0, status, nfsvers, numops;
+	int error = 0, status, nfsvers, numops, rpcflags = 0, acls;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
 
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
+
+	if (np && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)) {
+		nfs4_default_attrs_for_referral_trigger(VTONFS(np->n_parent), NULL, 0, nvap, NULL);
+		return (0);
+	}
+
+	if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */
+		rpcflags = R_RECOVER;
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -186,20 +215,29 @@ nfs4_getattr_rpc(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, fhp, fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	if ((flags & NGA_ACL) && acls)
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_ACL);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, mp, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, xidp, &status);
+	error = nfs_request2(np, mp, &nmreq, NFSPROC4_COMPOUND, 
+			vfs_context_thread(ctx), vfs_context_ucred(ctx),
+			NULL, rpcflags, &nmrep, xidp, &status);
 
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvap->nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL);
+	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
+	nfsmout_if(error);
+	if ((flags & NGA_ACL) && acls && !NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL)) {
+		/* we asked for the ACL but didn't get one... assume there isn't one */
+		NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_ACL);
+		nvap->nva_acl = NULL;
+	}
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
@@ -214,10 +252,14 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	uint32_t len = 0;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -230,14 +272,13 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -245,7 +286,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
 	nfsm_chain_get_32(error, &nmrep, len);
 	nfsmout_if(error);
@@ -280,12 +321,16 @@ nfs4_read_rpc_async(
 	int error = 0, nfsvers, numops;
 	nfs_stateid stateid;
 	struct nfsm_chain nmreq;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 
 	// PUTFH, READ, GETATTR
@@ -303,12 +348,11 @@ nfs4_read_rpc_async(
 	nfsm_chain_add_32(error, &nmreq, len);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, cb, reqp);
+	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
@@ -354,7 +398,7 @@ nfs4_read_rpc_async_finish(
 		error = nfsm_chain_get_uio(&nmrep, *lenp, uio);
 	}
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	if (!lockerror)
 		nfs_node_unlock(np);
 	if (eofp) {
@@ -363,6 +407,8 @@ nfs4_read_rpc_async_finish(
 		*eofp = eof;
 	}
 	nfsm_chain_cleanup(&nmrep);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
+		microuptime(&np->n_lastio);
 	return (error);
 }
 
@@ -378,15 +424,25 @@ nfs4_write_rpc_async(
 	struct nfsreq **reqp)
 {
 	struct nfsmount *nmp;
+	mount_t mp;
 	int error = 0, nfsvers, numops;
 	nfs_stateid stateid;
 	struct nfsm_chain nmreq;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+
+	/* for async mounts, don't bother sending sync write requests */
+	if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
+	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+		iomode = NFS_WRITE_UNSTABLE;
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 
 	// PUTFH, WRITE, GETATTR
@@ -407,13 +463,12 @@ nfs4_write_rpc_async(
 		error = nfsm_chain_add_uio(&nmreq, uio, len);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, cb, reqp);
+	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, cb, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
@@ -475,7 +530,7 @@ nfs4_write_rpc_async_finish(
 	}
 	lck_mtx_unlock(&nmp->nm_lock);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -484,6 +539,8 @@ nfsmout:
 	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
 		committed = NFS_WRITE_FILESYNC;
 	*iomodep = committed;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)
+		microuptime(&np->n_lastio);
 	return (error);
 }
 
@@ -500,11 +557,15 @@ nfs4_remove_rpc(
 	int nfsvers, numops;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(dnp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
 restart:
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
@@ -518,16 +579,15 @@ restart:
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_REMOVE);
-	nfsm_chain_add_string(error, &nmreq, name, namelen);
+	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status);
+	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(dnp)))
 		error = lockerror;
@@ -538,7 +598,7 @@ restart:
 	remove_error = error;
 	nfsm_chain_check_change_info(error, &nmrep, dnp);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
 	if (error && !lockerror)
 		NATTRINVALIDATE(dnp);
 nfsmout:
@@ -571,12 +631,18 @@ nfs4_rename_rpc(
 	struct nfsmount *nmp;
 	u_int64_t xid, savedxid;
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(fdnp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (fdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
+	NFSREQ_SECINFO_SET(&si, fdnp, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -594,23 +660,21 @@ nfs4_rename_rpc(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_RENAME);
-	nfsm_chain_add_string(error, &nmreq, fnameptr, fnamelen);
-	nfsm_chain_add_string(error, &nmreq, tnameptr, tnamelen);
+	nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
+	nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, fdnp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
 		error = lockerror;
@@ -625,13 +689,13 @@ nfs4_rename_rpc(
 	/* directory attributes: if we don't get them, make sure to invalidate */
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	savedxid = xid;
-	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
 	if (error && !lockerror)
 		NATTRINVALIDATE(tdnp);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	xid = savedxid;
-	nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, &xid);
 	if (error && !lockerror)
 		NATTRINVALIDATE(fdnp);
 nfsmout:
@@ -642,9 +706,6 @@ nfsmout:
 		tdnp->n_flag |= NMODIFIED;
 		nfs_node_unlock2(fdnp, tdnp);
 	}
-	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
-	if (error == EEXIST)
-		error = 0;
 	return (error);
 }
 
@@ -655,7 +716,7 @@ int
 nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 {
 	struct nfsmount *nmp;
-	int error = 0, lockerror, nfsvers, rdirplus, bigcookies, numops;
+	int error = 0, lockerror, nfsvers, namedattr, rdirplus, bigcookies, numops;
 	int i, status, more_entries = 1, eof, bp_dropped = 0;
 	uint32_t nmreaddirsize, nmrsize;
 	uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed;
@@ -669,6 +730,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 	const char *tag;
 	uint32_t entry_attrs[NFS_ATTR_BITMAP_LEN];
 	struct timeval now;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(dnp);
 	if (!nmp)
@@ -677,7 +739,11 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 	nmreaddirsize = nmp->nm_readdirsize;
 	nmrsize = nmp->nm_rsize;
 	bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
-	rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0;
+	namedattr = (dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) ? 1 : 0;
+	rdirplus = (NMFLAG(nmp, RDIRPLUS) || namedattr) ? 1 : 0;
+	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
 
 	/*
 	 * Set up attribute request for entries.
@@ -686,18 +752,15 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 	 */
 	if (rdirplus) {
 		tag = "readdirplus";
-		for (i=0; i < NFS_ATTR_BITMAP_LEN; i++)
-			entry_attrs[i] =
-				nfs_getattr_bitmap[i] &
-				nmp->nm_fsattr.nfsa_supp_attr[i];
+		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, entry_attrs);
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEHANDLE);
 	} else {
 		tag = "readdir";
 		NFS_CLEAR_ATTRIBUTES(entry_attrs);
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_TYPE);
 		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEID);
+		NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID);
 	}
-	/* XXX NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID); */
 	NFS_BITMAP_SET(entry_attrs, NFS_FATTR_RDATTR_ERROR);
 
 	/* lock to protect access to cookie verifier */
@@ -722,8 +785,10 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 	/*
 	 * The NFS client is responsible for the "." and ".." entries in the
 	 * directory.  So, we put them at the start of the first buffer.
+	 * Don't bother for attribute directories.
 	 */
-	if ((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) {
+	if (((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) &&
+	    !(dnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR)) {
 		fh.fh_len = 0;
 		fhlen = rdirplus ? fh.fh_len + 1 : 0;
 		xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0;
@@ -790,20 +855,19 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 		nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
 		numops--;
 		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-		nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+		nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
 		numops--;
 		nfsm_chain_add_32(error, &nmreq, NFS_OP_READDIR);
 		nfsm_chain_add_64(error, &nmreq, (cookie <= 2) ? 0 : cookie);
 		nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf);
 		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
 		nfsm_chain_add_32(error, &nmreq, nmrsize);
-		nfsm_chain_add_bitmap(error, &nmreq, entry_attrs, NFS_ATTR_BITMAP_LEN);
+		nfsm_chain_add_bitmap_supported(error, &nmreq, entry_attrs, nmp, dnp);
 		nfsm_chain_build_done(error, &nmreq);
 		nfsm_assert(error, (numops == 0), EPROTO);
 		nfs_node_unlock(dnp);
 		nfsmout_if(error);
-		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+		error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 		if ((lockerror = nfs_node_lock(dnp)))
 			error = lockerror;
@@ -813,7 +877,7 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 		nfsm_chain_get_32(error, &nmrep, numops);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-		nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
+		nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
 		nfsm_chain_op_check(error, &nmrep, NFS_OP_READDIR);
 		nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf);
 		nfsm_chain_get_32(error, &nmrep, more_entries);
@@ -898,14 +962,21 @@ nextbuffer:
 					nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen));
 			nfsmout_if(error);
 			nvattrp = rdirplus ? NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count) : &nvattr;
-			NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap);
-			error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL);
+			error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL, NULL);
+			if (!error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_ACL)) {
+				/* we do NOT want ACLs returned to us here */
+				NFS_BITMAP_CLR(nvattrp->nva_bitmap, NFS_FATTR_ACL);
+				if (nvattrp->nva_acl) {
+					kauth_acl_free(nvattrp->nva_acl);
+					nvattrp->nva_acl = NULL;
+				}
+			}
 			if (error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_RDATTR_ERROR)) {
-				/* OK, we didn't get attributes, whatever... */
-				if (rdirplus) /* mark the attributes invalid */
-					bzero(nvattrp, sizeof(struct nfs_vattr));
-				else
-					NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap);
+				/* OK, we may not have gotten all of the attributes but we will use what we can. */
+				if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
+					/* set this up to look like a referral trigger */
+					nfs4_default_attrs_for_referral_trigger(dnp, dp->d_name, namlen, nvattrp, &fh);
+				}
 				error = 0;
 			}
 			/* check for more entries after this one */
@@ -913,7 +984,9 @@ nextbuffer:
 			nfsmout_if(error);
 
 			/* Skip any "." and ".." entries returned from server. */
-			if ((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) {
+			/* Also skip any bothersome named attribute entries. */
+			if (((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) ||
+			    (namedattr && (namlen == 11) && (!strcmp(dp->d_name, "SUNWattr_ro") || !strcmp(dp->d_name, "SUNWattr_rw")))) {
 				lastcookie = cookie;
 				continue;
 			}
@@ -1001,23 +1074,30 @@ nfs4_lookup_rpc_async(
 	vfs_context_t ctx,
 	struct nfsreq **reqp)
 {
-	int error = 0, isdotdot = 0, getattrs = 1, nfsvers, numops;
+	int error = 0, isdotdot = 0, nfsvers, numops;
 	struct nfsm_chain nmreq;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
 	struct nfsmount *nmp;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(dnp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
-	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2))
+	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2)) {
 		isdotdot = 1;
+		NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
+	} else {
+		NFSREQ_SECINFO_SET(&si, dnp, dnp->n_fhp, dnp->n_fhsize, name, namelen);
+	}
 
 	nfsm_chain_null(&nmreq);
 
-	// PUTFH, GETATTR, LOOKUP(P), GETATTR (FH)
-	numops = getattrs ? 4 : 3;
+	// PUTFH, GETATTR, LOOKUP(P), GETFH, GETATTR (FH)
+	numops = 5;
 	nfsm_chain_build_alloc_init(error, &nmreq, 20 * NFSX_UNSIGNED + namelen);
 	nfsm_chain_add_compound_header(error, &nmreq, "lookup", numops);
 	numops--;
@@ -1025,50 +1105,59 @@ nfs4_lookup_rpc_async(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
 	numops--;
 	if (isdotdot) {
 		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
 	} else {
 		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
-		nfsm_chain_add_string(error, &nmreq, name, namelen);
-	}
-	if (getattrs) {
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
-		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
-		nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+		nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
 	}
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	/* some ".zfs" directories can't handle being asked for some attributes */
+	if ((dnp->n_flag & NISDOTZFS) && !isdotdot)
+		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
+	if ((dnp->n_flag & NISDOTZFSCHILD) && isdotdot)
+		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
+	if (((namelen == 4) && (name[0] == '.') && (name[1] == 'z') && (name[2] == 'f') && (name[3] == 's')))
+		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, reqp);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
 }
 
+
 int
 nfs4_lookup_rpc_async_finish(
 	nfsnode_t dnp,
-	__unused vfs_context_t ctx,
+	char *name,
+	int namelen,
+	vfs_context_t ctx,
 	struct nfsreq *req,
 	u_int64_t *xidp,
 	fhandle_t *fhp,
 	struct nfs_vattr *nvap)
 {
-	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
-	uint32_t val = 0;
+	int error = 0, lockerror = ENOENT, status, nfsvers, numops, isdotdot = 0;
+	uint32_t op = NFS_OP_LOOKUP;
 	u_int64_t xid;
 	struct nfsmount *nmp;
 	struct nfsm_chain nmrep;
 
 	nmp = NFSTONMP(dnp);
 	nfsvers = nmp->nm_vers;
+	if ((name[0] == '.') && (name[1] == '.') && (namelen == 2))
+		isdotdot = 1;
 
 	nfsm_chain_null(&nmrep);
 
@@ -1082,47 +1171,69 @@ nfs4_lookup_rpc_async_finish(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	if (xidp)
 		*xidp = xid;
-	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
-
-	// nfsm_chain_op_check(error, &nmrep, (isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP));
-	nfsm_chain_get_32(error, &nmrep, val);
-	nfsm_assert(error, (val == NFS_OP_LOOKUPP) || (val == NFS_OP_LOOKUP), EBADRPC);
-	nfsm_chain_get_32(error, &nmrep, val);
-	nfsm_assert(error, (val == NFS_OK), val);
+	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
 
+	nfsm_chain_op_check(error, &nmrep, (isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP));
 	nfsmout_if(error || !fhp || !nvap);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
+	nfsm_chain_get_32(error, &nmrep, fhp->fh_len);
+	nfsm_chain_get_opaque(error, &nmrep, fhp->fh_len, fhp->fh_data);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvap->nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, nvap, fhp, NULL);
-	if (!NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		error = EBADRPC;
-		goto nfsmout;
+	if ((error == NFSERR_MOVED) || (error == NFSERR_INVAL)) {
+		/* set this up to look like a referral trigger */
+		nfs4_default_attrs_for_referral_trigger(dnp, name, namelen, nvap, fhp);
+		error = 0;
+	} else {
+		nfsmout_if(error);
+		error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
 	}
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(dnp);
 	nfsm_chain_cleanup(&nmrep);
+	if (!error && (op == NFS_OP_LOOKUP) && (nmp->nm_state & NFSSTA_NEEDSECINFO)) {
+		/* We still need to get SECINFO to set default for mount. */
+		/* Do so for the first LOOKUP that returns successfully. */
+		struct nfs_sec sec;
+
+		sec.count = NX_MAX_SEC_FLAVORS;
+		error = nfs4_secinfo_rpc(nmp, &req->r_secinfo, vfs_context_ucred(ctx), sec.flavors, &sec.count);
+		/* [sigh] some implementations return "illegal" error for unsupported ops */
+		if (error == NFSERR_OP_ILLEGAL)
+			error = 0;
+		if (!error) {
+			/* set our default security flavor to the first in the list */
+			lck_mtx_lock(&nmp->nm_lock);
+			if (sec.count)
+				nmp->nm_auth = sec.flavors[0];
+			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
+	}
 	return (error);
 }
 
 int
 nfs4_commit_rpc(
 	nfsnode_t np,
-	u_int64_t offset,
-	u_int64_t count,
-	kauth_cred_t cred)
+	uint64_t offset,
+	uint64_t count,
+	kauth_cred_t cred,
+	uint64_t wverf)
 {
 	struct nfsmount *nmp;
 	int error = 0, lockerror, status, nfsvers, numops;
-	u_int64_t xid, wverf;
+	u_int64_t xid, newwverf;
 	uint32_t count32;
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
 		return (0);
 	nfsvers = nmp->nm_vers;
@@ -1132,6 +1243,7 @@ nfs4_commit_rpc(
 	else
 		count32 = count;
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -1148,13 +1260,12 @@ nfs4_commit_rpc(
 	nfsm_chain_add_32(error, &nmreq, count32);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
-			current_thread(), cred, 0, &nmrep, &xid, &status);
+			current_thread(), cred, &si, 0, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -1162,17 +1273,17 @@ nfs4_commit_rpc(
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_COMMIT);
-	nfsm_chain_get_64(error, &nmrep, wverf);
+	nfsm_chain_get_64(error, &nmrep, newwverf);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	if (!lockerror)
 		nfs_node_unlock(np);
 	nfsmout_if(error);
 	lck_mtx_lock(&nmp->nm_lock);
-	if (nmp->nm_verf != wverf) {
-		nmp->nm_verf = wverf;
+	if (nmp->nm_verf != newwverf)
+		nmp->nm_verf = newwverf;
+	if (wverf != newwverf)
 		error = NFSERR_STALEWRITEVERF;
-	}
 	lck_mtx_unlock(&nmp->nm_lock);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
@@ -1192,11 +1303,16 @@ nfs4_pathconf_rpc(
 	struct nfsmount *nmp = NFSTONMP(np);
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
 	struct nfs_vattr nvattr;
+	struct nfsreq_secinfo_args si;
 
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
+	NVATTR_INIT(&nvattr);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -1217,20 +1333,18 @@ nfs4_pathconf_rpc(
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_CHOWN_RESTRICTED);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_INSENSITIVE);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_CASE_PRESERVING);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL);
+	error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL, NULL);
 	nfsmout_if(error);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -1239,6 +1353,7 @@ nfs4_pathconf_rpc(
 	if (!lockerror)
 		nfs_node_unlock(np);
 nfsmout:
+	NVATTR_CLEANUP(&nvattr);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	return (error);
@@ -1254,79 +1369,102 @@ nfs4_vnop_getattr(
 	} */ *ap)
 {
 	struct vnode_attr *vap = ap->a_vap;
+	struct nfsmount *nmp;
 	struct nfs_vattr nva;
-	int error;
+	int error, acls, ngaflags;
+
+	if (!(nmp = VTONMP(ap->a_vp)))
+		return (ENXIO);
+	acls = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL);
 
-	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
+	ngaflags = NGA_CACHED;
+	if (VATTR_IS_ACTIVE(vap, va_acl) && acls)
+		ngaflags |= NGA_ACL;
+	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, ngaflags);
 	if (error)
 		return (error);
 
 	/* copy what we have in nva to *a_vap */
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_RAWDEV)) {
+	if (VATTR_IS_ACTIVE(vap, va_rdev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_RAWDEV)) {
 		dev_t rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
 		VATTR_RETURN(vap, va_rdev, rdev);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_NUMLINKS))
+	if (VATTR_IS_ACTIVE(vap, va_nlink) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_NUMLINKS))
 		VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SIZE))
+	if (VATTR_IS_ACTIVE(vap, va_data_size) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SIZE))
 		VATTR_RETURN(vap, va_data_size, nva.nva_size);
 	// VATTR_RETURN(vap, va_data_alloc, ???);
 	// VATTR_RETURN(vap, va_total_size, ???);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SPACE_USED))
+	if (VATTR_IS_ACTIVE(vap, va_total_alloc) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_SPACE_USED))
 		VATTR_RETURN(vap, va_total_alloc, nva.nva_bytes);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
+	if (VATTR_IS_ACTIVE(vap, va_uid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
 		VATTR_RETURN(vap, va_uid, nva.nva_uid);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
+	if (VATTR_IS_ACTIVE(vap, va_uuuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER))
+		VATTR_RETURN(vap, va_uuuid, nva.nva_uuuid);
+	if (VATTR_IS_ACTIVE(vap, va_gid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
 		VATTR_RETURN(vap, va_gid, nva.nva_gid);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_MODE))
-		VATTR_RETURN(vap, va_mode, nva.nva_mode);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) ||
-	    NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN)) {
+	if (VATTR_IS_ACTIVE(vap, va_guuid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_OWNER_GROUP))
+		VATTR_RETURN(vap, va_guuid, nva.nva_guuid);
+	if (VATTR_IS_ACTIVE(vap, va_mode)) {
+		if (NMFLAG(nmp, ACLONLY) || !NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_MODE))
+			VATTR_RETURN(vap, va_mode, 0777);
+		else
+			VATTR_RETURN(vap, va_mode, nva.nva_mode);
+	}
+	if (VATTR_IS_ACTIVE(vap, va_flags) &&
+	    (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) ||
+	     NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) ||
+	     (nva.nva_flags & NFS_FFLAG_TRIGGER))) {
 		uint32_t flags = 0;
-		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE))
+		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_ARCHIVE) &&
+		    (nva.nva_flags & NFS_FFLAG_ARCHIVED))
 			flags |= SF_ARCHIVED;
-		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN))
+		if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_HIDDEN) &&
+		    (nva.nva_flags & NFS_FFLAG_HIDDEN))
 			flags |= UF_HIDDEN;
 		VATTR_RETURN(vap, va_flags, flags);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_CREATE)) {
+	if (VATTR_IS_ACTIVE(vap, va_create_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_CREATE)) {
 		vap->va_create_time.tv_sec = nva.nva_timesec[NFSTIME_CREATE];
 		vap->va_create_time.tv_nsec = nva.nva_timensec[NFSTIME_CREATE];
 		VATTR_SET_SUPPORTED(vap, va_create_time);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_ACCESS)) {
+	if (VATTR_IS_ACTIVE(vap, va_access_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_ACCESS)) {
 		vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
 		vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
 		VATTR_SET_SUPPORTED(vap, va_access_time);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_MODIFY)) {
+	if (VATTR_IS_ACTIVE(vap, va_modify_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_MODIFY)) {
 		vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
 		vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
 		VATTR_SET_SUPPORTED(vap, va_modify_time);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_METADATA)) {
+	if (VATTR_IS_ACTIVE(vap, va_change_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_METADATA)) {
 		vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
 		vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
 		VATTR_SET_SUPPORTED(vap, va_change_time);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_BACKUP)) {
+	if (VATTR_IS_ACTIVE(vap, va_backup_time) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TIME_BACKUP)) {
 		vap->va_backup_time.tv_sec = nva.nva_timesec[NFSTIME_BACKUP];
 		vap->va_backup_time.tv_nsec = nva.nva_timensec[NFSTIME_BACKUP];
 		VATTR_SET_SUPPORTED(vap, va_backup_time);
 	}
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_FILEID))
+	if (VATTR_IS_ACTIVE(vap, va_fileid) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_FILEID))
 		VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TYPE))
+	if (VATTR_IS_ACTIVE(vap, va_type) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_TYPE))
 		VATTR_RETURN(vap, va_type, nva.nva_type);
-	if (NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_CHANGE))
+	if (VATTR_IS_ACTIVE(vap, va_filerev) && NFS_BITMAP_ISSET(nva.nva_bitmap, NFS_FATTR_CHANGE))
 		VATTR_RETURN(vap, va_filerev, nva.nva_change);
 
+	if (VATTR_IS_ACTIVE(vap, va_acl) && acls) {
+		VATTR_RETURN(vap, va_acl, nva.nva_acl);
+		nva.nva_acl = NULL;
+	}
+
 	// other attrs we might support someday:
 	// VATTR_RETURN(vap, va_encoding, ??? /* potentially unnormalized UTF-8? */);
-	// struct kauth_acl *va_acl;	/* access control list */
-	// guid_t	va_uuuid;	/* file owner UUID */
-	// guid_t	va_guuid;	/* file group UUID */
 
+	NVATTR_CLEANUP(&nva);
 	return (error);
 }
 
@@ -1337,15 +1475,20 @@ nfs4_setattr_rpc(
 	vfs_context_t ctx)
 {
 	struct nfsmount *nmp = NFSTONMP(np);
-	int error = 0, lockerror = ENOENT, status, nfsvers, numops;
+	int error = 0, setattr_error = 0, lockerror = ENOENT, status, nfsvers, numops;
 	u_int64_t xid, nextxid;
 	struct nfsm_chain nmreq, nmrep;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	uint32_t getbitmap[NFS_ATTR_BITMAP_LEN];
+	uint32_t setbitmap[NFS_ATTR_BITMAP_LEN];
 	nfs_stateid stateid;
+	struct nfsreq_secinfo_args si;
 
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
 	if (VATTR_IS_ACTIVE(vap, va_flags) && (vap->va_flags & ~(SF_ARCHIVED|UF_HIDDEN))) {
 		/* we don't support setting unsupported flags (duh!) */
@@ -1355,9 +1498,39 @@ nfs4_setattr_rpc(
 			return (ENOTSUP);	/* return ENOTSUP for chflags(2) */
 	}
 
+	/* don't bother requesting some changes if they don't look like they are changing */
+	if (VATTR_IS_ACTIVE(vap, va_uid) && (vap->va_uid == np->n_vattr.nva_uid))
+		VATTR_CLEAR_ACTIVE(vap, va_uid);
+	if (VATTR_IS_ACTIVE(vap, va_gid) && (vap->va_gid == np->n_vattr.nva_gid))
+		VATTR_CLEAR_ACTIVE(vap, va_gid);
+	if (VATTR_IS_ACTIVE(vap, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &np->n_vattr.nva_uuuid))
+		VATTR_CLEAR_ACTIVE(vap, va_uuuid);
+	if (VATTR_IS_ACTIVE(vap, va_guuid) && kauth_guid_equal(&vap->va_guuid, &np->n_vattr.nva_guuid))
+		VATTR_CLEAR_ACTIVE(vap, va_guuid);
+
+tryagain:
+	/* do nothing if no attributes will be sent */
+	nfs_vattr_set_bitmap(nmp, bitmap, vap);
+	if (!bitmap[0] && !bitmap[1])
+		return (0);
+
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
+	/*
+	 * Prepare GETATTR bitmap: if we are setting the ACL or mode, we
+	 * need to invalidate any cached ACL.  And if we had an ACL cached,
+	 * we might as well also fetch the new value.
+	 */
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, getbitmap);
+	if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_ACL) ||
+	    NFS_BITMAP_ISSET(bitmap, NFS_FATTR_MODE)) {
+		if (NACLVALID(np))
+			NFS_BITMAP_SET(getbitmap, NFS_FATTR_ACL);
+		NACLINVALIDATE(np);
+	}
+
 	// PUTFH, SETATTR, GETATTR
 	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 40 * NFSX_UNSIGNED);
@@ -1375,25 +1548,32 @@ nfs4_setattr_rpc(
 	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, getbitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_SETATTR);
+	nfsmout_if(error == EBADRPC);
+	setattr_error = error;
+	error = 0;
 	bmlen = NFS_ATTR_BITMAP_LEN;
-	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
-	nfsmout_if(error);
-	nfs_vattr_set_supported(bitmap, vap);
+	nfsm_chain_get_bitmap(error, &nmrep, setbitmap, bmlen);
+	if (!error) {
+		if (VATTR_IS_ACTIVE(vap, va_data_size) && (np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			microuptime(&np->n_lastio);
+		nfs_vattr_set_supported(setbitmap, vap);
+		error = setattr_error;
+	}
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	if (error)
 		NATTRINVALIDATE(np);
 	/*
@@ -1416,6 +1596,20 @@ nfsmout:
 		nfs_node_unlock(np);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
+	if ((setattr_error == EINVAL) && VATTR_IS_ACTIVE(vap, va_acl) && VATTR_IS_ACTIVE(vap, va_mode) && !NMFLAG(nmp, ACLONLY)) {
+		/*
+		 * Some server's may not like ACL/mode combos that get sent.
+		 * If it looks like that's what the server choked on, try setting
+		 * just the ACL and not the mode (unless it looks like everything
+		 * but mode was already successfully set).
+		 */
+		if (((bitmap[0] & setbitmap[0]) != bitmap[0]) ||
+		    ((bitmap[1] & (setbitmap[1]|NFS_FATTR_MODE)) != bitmap[1])) {
+			VATTR_CLEAR_ACTIVE(vap, va_mode);
+			error = 0;
+			goto tryagain;
+		}
+	}
 	return (error);
 }
 
@@ -1426,7 +1620,7 @@ int
 nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
 {
 	struct timespec ts = { 1, 0 };
-	int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	int error = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
 
 	lck_mtx_lock(&nmp->nm_lock);
 	while (nmp->nm_state & NFSSTA_RECOVER) {
@@ -1434,6 +1628,7 @@ nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
 			break;
 		nfs_mount_sock_thread_wake(nmp);
 		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
+		slpflag = 0;
 	}
 	lck_mtx_unlock(&nmp->nm_lock);
 
@@ -1447,19 +1642,24 @@ nfs_mount_state_wait_for_recovery(struct nfsmount *nmp)
  * the recovery thread until we're done).
  */
 int
-nfs_mount_state_in_use_start(struct nfsmount *nmp)
+nfs_mount_state_in_use_start(struct nfsmount *nmp, thread_t thd)
 {
 	struct timespec ts = { 1, 0 };
-	int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	int error = 0, slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
 
 	if (!nmp)
 		return (ENXIO);
 	lck_mtx_lock(&nmp->nm_lock);
+	if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD)) {
+		lck_mtx_unlock(&nmp->nm_lock);
+		return (ENXIO);
+	}
 	while (nmp->nm_state & NFSSTA_RECOVER) {
-		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
+		if ((error = nfs_sigintr(nmp, NULL, thd, 1)))
 			break;
 		nfs_mount_sock_thread_wake(nmp);
 		msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts);
+		slpflag = 0;
 	}
 	if (!error)
 		nmp->nm_stateinuse++;
@@ -1482,11 +1682,9 @@ nfs_mount_state_in_use_end(struct nfsmount *nmp, int error)
 		return (restart);
 	lck_mtx_lock(&nmp->nm_lock);
 	if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) {
-		if (!(nmp->nm_state & NFSSTA_RECOVER)) {
-			printf("nfs_mount_state_in_use_end: error %d, initiating recovery\n", error);
-			nmp->nm_state |= NFSSTA_RECOVER;
-			nfs_mount_sock_thread_wake(nmp);
-		}
+		printf("nfs_mount_state_in_use_end: error %d, initiating recovery for %s, 0x%x\n",
+			error, vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid);
+		nfs_need_recover(nmp, error);
 	}
 	if (nmp->nm_stateinuse > 0)
 		nmp->nm_stateinuse--;
@@ -1531,22 +1729,39 @@ nfs_mount_state_max_restarts(struct nfsmount *nmp)
 	return (MAX(nmp->nm_fsattr.nfsa_lease, 60));
 }
 
+/*
+ * Does the error mean we probably lost a delegation?
+ */
+int
+nfs_mount_state_error_delegation_lost(int error)
+{
+	switch (error) {
+	case NFSERR_STALE_STATEID:
+	case NFSERR_ADMIN_REVOKED:
+	case NFSERR_EXPIRED:
+	case NFSERR_OLD_STATEID:
+	case NFSERR_BAD_STATEID:
+	case NFSERR_GRACE: /* ugh! (stupid) RFC 3530 specifically disallows CLAIM_DELEGATE_CUR during grace period? */
+		return (1);
+	}
+	return (0);
+}
+
 
 /*
  * Mark an NFS node's open state as busy.
  */
 int
-nfs_open_state_set_busy(nfsnode_t np, vfs_context_t ctx)
+nfs_open_state_set_busy(nfsnode_t np, thread_t thd)
 {
 	struct nfsmount *nmp;
-	thread_t thd = vfs_context_thread(ctx);
 	struct timespec ts = {2, 0};
 	int error = 0, slpflag;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
 
 	lck_mtx_lock(&np->n_openlock);
 	while (np->n_openflags & N_OPENBUSY) {
@@ -1554,6 +1769,7 @@ nfs_open_state_set_busy(nfsnode_t np, vfs_context_t ctx)
 			break;
 		np->n_openflags |= N_OPENWANT;
 		msleep(&np->n_openflags, &np->n_openlock, slpflag, "nfs_open_state_set_busy", &ts);
+		slpflag = 0;
 	}
 	if (!error)
 		np->n_openflags |= N_OPENBUSY;
@@ -1688,7 +1904,7 @@ nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd)
 	nmp = noop->noo_mount;
 	if (!nmp)
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
 
 	lck_mtx_lock(&noop->noo_lock);
 	while (noop->noo_flags & NFS_OPEN_OWNER_BUSY) {
@@ -1696,6 +1912,7 @@ nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd)
 			break;
 		noop->noo_flags |= NFS_OPEN_OWNER_WANT;
 		msleep(noop, &noop->noo_lock, slpflag, "nfs_open_owner_set_busy", &ts);
+		slpflag = 0;
 	}
 	if (!error)
 		noop->noo_flags |= NFS_OPEN_OWNER_BUSY;
@@ -1761,6 +1978,24 @@ nfs_open_file_find(
 	uint32_t accessMode,
 	uint32_t denyMode,
 	int alloc)
+{
+	*nofpp = NULL;
+	return nfs_open_file_find_internal(np, noop, nofpp, accessMode, denyMode, alloc);
+}
+
+/*
+ * Internally, allow using a provisional nodeless nofp (passed in via *nofpp)
+ * if an existing one is not found.  This is used in "create" scenarios to
+ * officially add the provisional nofp to the node once the node is created.
+ */
+int
+nfs_open_file_find_internal(
+	nfsnode_t np,
+	struct nfs_open_owner *noop,
+	struct nfs_open_file **nofpp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	int alloc)
 {
 	struct nfs_open_file *nofp = NULL, *nofp2, *newnofp = NULL;
 
@@ -1777,7 +2012,6 @@ tryagain:
 		if ((accessMode & nofp2->nof_deny) || (denyMode & nofp2->nof_access)) {
 			/* This request conflicts with an existing open on this client. */
 			lck_mtx_unlock(&np->n_openlock);
-			*nofpp = NULL;
 			return (EACCES);
 		}
 	}
@@ -1786,14 +2020,12 @@ tryagain:
 	 * If this open owner doesn't have an open
 	 * file structure yet, we create one for it.
 	 */
-	if (!nofp && !newnofp && alloc) {
+	if (!nofp && !*nofpp && !newnofp && alloc) {
 		lck_mtx_unlock(&np->n_openlock);
 alloc:
 		MALLOC(newnofp, struct nfs_open_file *, sizeof(struct nfs_open_file), M_TEMP, M_WAITOK);
-		if (!newnofp) {
-			*nofpp = NULL;
+		if (!newnofp)
 			return (ENOMEM);
-		}
 		bzero(newnofp, sizeof(*newnofp));
 		lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL);
 		newnofp->nof_owner = noop;
@@ -1805,15 +2037,20 @@ alloc:
 		if (np)
 			goto tryagain;
 	}
-	if (!nofp && newnofp) {
-		if (np)
-			TAILQ_INSERT_HEAD(&np->n_opens, newnofp, nof_link);
-		nofp = newnofp;
+	if (!nofp) {
+		if (*nofpp) {
+			(*nofpp)->nof_np = np;
+			nofp = *nofpp;
+		} else {
+			nofp = newnofp;
+		}
+		if (nofp && np)
+			TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link);
 	}
 	if (np)
 		lck_mtx_unlock(&np->n_openlock);
 
-	if (newnofp && (nofp != newnofp))
+	if (alloc && newnofp && (nofp != newnofp))
 		nfs_open_file_destroy(newnofp);
 
 	*nofpp = nofp;
@@ -1848,7 +2085,7 @@ nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd)
 	nmp = nofp->nof_owner->noo_mount;
 	if (!nmp)
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
 
 	lck_mtx_lock(&nofp->nof_lock);
 	while (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
@@ -1856,6 +2093,7 @@ nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd)
 			break;
 		nofp->nof_flags |= NFS_OPEN_FILE_WANT;
 		msleep(nofp, &nofp->nof_lock, slpflag, "nfs_open_file_set_busy", &ts);
+		slpflag = 0;
 	}
 	if (!error)
 		nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
@@ -1884,147 +2122,525 @@ nfs_open_file_clear_busy(struct nfs_open_file *nofp)
 }
 
 /*
- * Get the current (delegation, lock, open, default) stateid for this node.
- * If node has a delegation, use that stateid.
- * If pid has a lock, use the lockowner's stateid.
- * Or use the open file's stateid.
- * If no open file, use a default stateid of all ones.
+ * Add the open state for the given access/deny modes to this open file.
  */
 void
-nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid)
+nfs_open_file_add_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode, int delegated)
 {
-	struct nfsmount *nmp = NFSTONMP(np);
-	proc_t p = thd ? get_bsdthreadtask_info(thd) : current_thread();  // XXX async I/O requests don't have a thread
-	struct nfs_open_owner *noop = NULL;
-	struct nfs_open_file *nofp = NULL;
-	struct nfs_lock_owner *nlop = NULL;
-	nfs_stateid *s = NULL;
-
-	if (np->n_openflags & N_DELEG_MASK)
-		s = &np->n_dstateid;
-	else if (p)
-		nlop = nfs_lock_owner_find(np, p, 0);
-	if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) {
-		/* we hold locks, use lock stateid */
-		s = &nlop->nlo_stateid;
-	} else if (((noop = nfs_open_owner_find(nmp, cred, 0))) &&
-		 (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) &&
-		 !(nofp->nof_flags & NFS_OPEN_FILE_LOST) &&
-		 nofp->nof_access) {
-		/* we (should) have the file open, use open stateid */
-		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
-			nfs4_reopen(nofp, thd);
-		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
-			s = &nofp->nof_stateid;
-	}
+	lck_mtx_lock(&nofp->nof_lock);
+	nofp->nof_access |= accessMode;
+	nofp->nof_deny |= denyMode;
 
-	if (s) {
-		sid->seqid = s->seqid;
-		sid->other[0] = s->other[0];
-		sid->other[1] = s->other[1];
-		sid->other[2] = s->other[2];
+	if (delegated) {
+		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_d_r++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_d_w++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_d_rw++;
+		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_d_r_dw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_d_w_dw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_d_rw_dw++;
+		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_d_r_drw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_d_w_drw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_d_rw_drw++;
+		}
 	} else {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_get_stateid: no stateid for %s\n", vname ? vname : "???");
-		vnode_putname(vname);
-		sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff;
+		if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_r++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_w++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_rw++;
+		} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_r_dw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_w_dw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_rw_dw++;
+		} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+			if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
+				nofp->nof_r_drw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
+				nofp->nof_w_drw++;
+			else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
+				nofp->nof_rw_drw++;
+		}
 	}
-	if (nlop)
-		nfs_lock_owner_rele(nlop);
-	if (noop)
-		nfs_open_owner_rele(noop);
+
+	nofp->nof_opencnt++;
+	lck_mtx_unlock(&nofp->nof_lock);
 }
 
 /*
- * We always send the open RPC even if this open's mode is a subset of all
- * the existing opens.  This makes sure that we will always be able to do a
- * downgrade to any of the open modes.
- *
- * Note: local conflicts should have already been checked. (nfs_open_file_find)
+ * Find which particular open combo will be closed and report what
+ * the new modes will be and whether the open was delegated.
  */
-int
-nfs4_open(
-	nfsnode_t np,
+void
+nfs_open_file_remove_open_find(
 	struct nfs_open_file *nofp,
 	uint32_t accessMode,
 	uint32_t denyMode,
-	vfs_context_t ctx)
+	uint32_t *newAccessMode,
+	uint32_t *newDenyMode,
+	int *delegated)
 {
-	vnode_t vp = NFSTOV(np);
-	vnode_t dvp = NULL;
-	struct componentname cn;
-	const char *vname = NULL;
-	size_t namelen;
-	char smallname[128];
-	char *filename = NULL;
-	int error = 0, readtoo = 0;
-
-	dvp = vnode_getparent(vp);
-	vname = vnode_getname(vp);
-	if (!dvp || !vname) {
-		error = EIO;
-		goto out;
-	}
-	filename = &smallname[0];
-	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
-	if (namelen >= sizeof(smallname)) {
-		namelen++;  /* snprintf result doesn't include '\0' */
-		MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK);
-		if (!filename) {
-			error = ENOMEM;
-			goto out;
-		}
-		snprintf(filename, namelen, "%s", vname);
-	}
-	bzero(&cn, sizeof(cn));
-	cn.cn_nameptr = filename;
-	cn.cn_namelen = namelen;
-
-	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
-		/*
-		 * Try to open it for read access too,
-		 * so the buffer cache can read data.
-		 */
-		readtoo = 1;
-		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
-	}
-tryagain:
-	error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode);
-	if (error) {
-		if (!nfs_mount_state_error_should_restart(error) && readtoo) {
-			/* try again without the extra read access */
-			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
-			readtoo = 0;
-			goto tryagain;
-		}
-		goto out;
-	}
-	nofp->nof_access |= accessMode;
-	nofp->nof_deny |= denyMode;
+	/*
+	 * Calculate new modes: a mode bit gets removed when there's only
+	 * one count in all the corresponding counts
+	 */
+	*newAccessMode = nofp->nof_access;
+	*newDenyMode = nofp->nof_deny;
 
+	if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
+	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_READ) &&
+	    ((nofp->nof_r + nofp->nof_d_r +
+	      nofp->nof_rw + nofp->nof_d_rw +
+	      nofp->nof_r_dw + nofp->nof_d_r_dw +
+	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
+	      nofp->nof_r_drw + nofp->nof_d_r_drw +
+	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
+		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
+	if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
+	    (nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE) &&
+	    ((nofp->nof_w + nofp->nof_d_w +
+	      nofp->nof_rw + nofp->nof_d_rw +
+	      nofp->nof_w_dw + nofp->nof_d_w_dw +
+	      nofp->nof_rw_dw + nofp->nof_d_rw_dw +
+	      nofp->nof_w_drw + nofp->nof_d_w_drw +
+	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
+		*newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE;
+	if ((denyMode & NFS_OPEN_SHARE_DENY_READ) &&
+	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) &&
+	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
+	      nofp->nof_w_drw + nofp->nof_d_w_drw +
+	      nofp->nof_rw_drw + nofp->nof_d_rw_drw) == 1))
+		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ;
+	if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
+	    (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) &&
+	    ((nofp->nof_r_drw + nofp->nof_d_r_drw +
+	      nofp->nof_w_drw + nofp->nof_d_w_drw +
+	      nofp->nof_rw_drw + nofp->nof_d_rw_drw +
+	      nofp->nof_r_dw + nofp->nof_d_r_dw +
+	      nofp->nof_w_dw + nofp->nof_d_w_dw +
+	      nofp->nof_rw_dw + nofp->nof_d_rw_dw) == 1))
+		*newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE;
+
+	/* Find the corresponding open access/deny mode counter. */
 	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
 		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
-			nofp->nof_r++;
+			*delegated = (nofp->nof_d_r != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
-			nofp->nof_w++;
+			*delegated = (nofp->nof_d_w != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
-			nofp->nof_rw++;
+			*delegated = (nofp->nof_d_rw != 0);
+		else
+			*delegated = 0;
 	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
 		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
-			nofp->nof_r_dw++;
+			*delegated = (nofp->nof_d_r_dw != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
-			nofp->nof_w_dw++;
+			*delegated = (nofp->nof_d_w_dw != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
-			nofp->nof_rw_dw++;
+			*delegated = (nofp->nof_d_rw_dw != 0);
+		else
+			*delegated = 0;
 	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
 		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ)
-			nofp->nof_r_drw++;
+			*delegated = (nofp->nof_d_r_drw != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE)
-			nofp->nof_w_drw++;
+			*delegated = (nofp->nof_d_w_drw != 0);
 		else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH)
-			nofp->nof_rw_drw++;
+			*delegated = (nofp->nof_d_rw_drw != 0);
+		else
+			*delegated = 0;
 	}
-	nofp->nof_opencnt++;
+}
+
+/*
+ * Remove the open state for the given access/deny modes to this open file.
+ */
+void
+nfs_open_file_remove_open(struct nfs_open_file *nofp, uint32_t accessMode, uint32_t denyMode)
+{
+	uint32_t newAccessMode, newDenyMode;
+	int delegated = 0;
+
+	lck_mtx_lock(&nofp->nof_lock);
+	nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
+
+	/* Decrement the corresponding open access/deny mode counter. */
+	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (delegated) {
+				if (nofp->nof_d_r == 0)
+					NP(nofp->nof_np, "nfs: open(R) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_r--;
+			} else {
+				if (nofp->nof_r == 0)
+					NP(nofp->nof_np, "nfs: open(R) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_r--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (delegated) {
+				if (nofp->nof_d_w == 0)
+					NP(nofp->nof_np, "nfs: open(W) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_w--;
+			} else {
+				if (nofp->nof_w == 0)
+					NP(nofp->nof_np, "nfs: open(W) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_w--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (delegated) {
+				if (nofp->nof_d_rw == 0)
+					NP(nofp->nof_np, "nfs: open(RW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_rw--;
+			} else {
+				if (nofp->nof_rw == 0)
+					NP(nofp->nof_np, "nfs: open(RW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_rw--;
+			}
+		}
+	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (delegated) {
+				if (nofp->nof_d_r_dw == 0)
+					NP(nofp->nof_np, "nfs: open(R,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_r_dw--;
+			} else {
+				if (nofp->nof_r_dw == 0)
+					NP(nofp->nof_np, "nfs: open(R,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_r_dw--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (delegated) {
+				if (nofp->nof_d_w_dw == 0)
+					NP(nofp->nof_np, "nfs: open(W,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_w_dw--;
+			} else {
+				if (nofp->nof_w_dw == 0)
+					NP(nofp->nof_np, "nfs: open(W,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_w_dw--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (delegated) {
+				if (nofp->nof_d_rw_dw == 0)
+					NP(nofp->nof_np, "nfs: open(RW,DW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_rw_dw--;
+			} else {
+				if (nofp->nof_rw_dw == 0)
+					NP(nofp->nof_np, "nfs: open(RW,DW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_rw_dw--;
+			}
+		}
+	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
+			if (delegated) {
+				if (nofp->nof_d_r_drw == 0)
+					NP(nofp->nof_np, "nfs: open(R,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_r_drw--;
+			} else {
+				if (nofp->nof_r_drw == 0)
+					NP(nofp->nof_np, "nfs: open(R,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_r_drw--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
+			if (delegated) {
+				if (nofp->nof_d_w_drw == 0)
+					NP(nofp->nof_np, "nfs: open(W,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_w_drw--;
+			} else {
+				if (nofp->nof_w_drw == 0)
+					NP(nofp->nof_np, "nfs: open(W,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_w_drw--;
+			}
+		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+			if (delegated) {
+				if (nofp->nof_d_rw_drw == 0)
+					NP(nofp->nof_np, "nfs: open(RW,DRW) delegated count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_d_rw_drw--;
+			} else {
+				if (nofp->nof_rw_drw == 0)
+					NP(nofp->nof_np, "nfs: open(RW,DRW) count underrun, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				else
+					nofp->nof_rw_drw--;
+			}
+		}
+	}
+
+	/* update the modes */
+	nofp->nof_access = newAccessMode;
+	nofp->nof_deny = newDenyMode;
+	nofp->nof_opencnt--;
+	lck_mtx_unlock(&nofp->nof_lock);
+}
+
+
+/*
+ * Get the current (delegation, lock, open, default) stateid for this node.
+ * If node has a delegation, use that stateid.
+ * If pid has a lock, use the lockowner's stateid.
+ * Or use the open file's stateid.
+ * If no open file, use a default stateid of all ones.
+ */
+void
+nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid)
+{
+	struct nfsmount *nmp = NFSTONMP(np);
+	proc_t p = thd ? get_bsdthreadtask_info(thd) : current_proc();  // XXX async I/O requests don't have a thread
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
+	struct nfs_lock_owner *nlop = NULL;
+	nfs_stateid *s = NULL;
+
+	if (np->n_openflags & N_DELEG_MASK) {
+		s = &np->n_dstateid;
+	} else {
+		if (p)
+			nlop = nfs_lock_owner_find(np, p, 0);
+		if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) {
+			/* we hold locks, use lock stateid */
+			s = &nlop->nlo_stateid;
+		} else if (((noop = nfs_open_owner_find(nmp, cred, 0))) &&
+			 (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) &&
+			 !(nofp->nof_flags & NFS_OPEN_FILE_LOST) &&
+			 nofp->nof_access) {
+			/* we (should) have the file open, use open stateid */
+			if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)
+				nfs4_reopen(nofp, thd);
+			if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
+				s = &nofp->nof_stateid;
+		}
+	}
+
+	if (s) {
+		sid->seqid = s->seqid;
+		sid->other[0] = s->other[0];
+		sid->other[1] = s->other[1];
+		sid->other[2] = s->other[2];
+	} else {
+		/* named attributes may not have a stateid for reads, so don't complain for them */
+		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			NP(np, "nfs_get_stateid: no stateid");
+		sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff;
+	}
+	if (nlop)
+		nfs_lock_owner_rele(nlop);
+	if (noop)
+		nfs_open_owner_rele(noop);
+}
+
+
+/*
+ * When we have a delegation, we may be able to perform the OPEN locally.
+ * Perform the OPEN by checking the delegation ACE and/or checking via ACCESS.
+ */
+int
+nfs4_open_delegated(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	vfs_context_t ctx)
+{
+	int error = 0, ismember, readtoo = 0, authorized = 0;
+	uint32_t action;
+	struct kauth_acl_eval eval;
+	kauth_cred_t cred = vfs_context_ucred(ctx);
+
+	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
+		/*
+		 * Try to open it for read access too,
+		 * so the buffer cache can read data.
+		 */
+		readtoo = 1;
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	}
+
+tryagain:
+	action = 0;
+	if (accessMode & NFS_OPEN_SHARE_ACCESS_READ)
+		action |= KAUTH_VNODE_READ_DATA;
+	if (accessMode & NFS_OPEN_SHARE_ACCESS_WRITE)
+		action |= KAUTH_VNODE_WRITE_DATA;
+
+	/* evaluate ACE (if we have one) */
+	if (np->n_dace.ace_flags) {
+		eval.ae_requested = action;
+		eval.ae_acl = &np->n_dace;
+		eval.ae_count = 1;
+		eval.ae_options = 0;
+		if (np->n_vattr.nva_uid == kauth_cred_getuid(cred))
+			eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
+		error = kauth_cred_ismember_gid(cred, np->n_vattr.nva_gid, &ismember);
+		if (!error && ismember)
+			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
+
+		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
+		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
+		eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
+		eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
+
+		error = kauth_acl_evaluate(cred, &eval);
+
+		if (!error && (eval.ae_result == KAUTH_RESULT_ALLOW))
+			authorized = 1;
+	}
+
+	if (!authorized) {
+		/* need to ask the server via ACCESS */
+		struct vnop_access_args naa;
+		naa.a_desc = &vnop_access_desc;
+		naa.a_vp = NFSTOV(np);
+		naa.a_action = action;
+		naa.a_context = ctx;
+		if (!(error = nfs_vnop_access(&naa)))
+			authorized = 1;
+	}
+
+	if (!authorized) {
+		if (readtoo) {
+			/* try again without the extra read access */
+			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
+			readtoo = 0;
+			goto tryagain;
+		}
+		return (error ? error : EACCES);
+	}
+
+	nfs_open_file_add_open(nofp, accessMode, denyMode, 1);
+
+	return (0);
+}
+
+
+/*
+ * Open a file with the given access/deny modes.
+ *
+ * If we have a delegation, we may be able to handle the open locally.
+ * Otherwise, we will always send the open RPC even if this open's mode is
+ * a subset of all the existing opens.  This makes sure that we will always
+ * be able to do a downgrade to any of the open modes.
+ *
+ * Note: local conflicts should have already been checked in nfs_open_file_find().
+ */
+int
+nfs4_open(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	vfs_context_t ctx)
+{
+	vnode_t vp = NFSTOV(np);
+	vnode_t dvp = NULL;
+	struct componentname cn;
+	const char *vname = NULL;
+	size_t namelen;
+	char smallname[128];
+	char *filename = NULL;
+	int error = 0, readtoo = 0;
+
+	/*
+	 * We can handle the OPEN ourselves if we have a delegation,
+	 * unless it's a read delegation and the open is asking for
+	 * either write access or deny read.  We also don't bother to
+	 * use the delegation if it's being returned.
+	 */
+	if (np->n_openflags & N_DELEG_MASK) {
+		if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
+			return (error);
+		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN) &&
+		    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ||
+		     (!(accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) && !(denyMode & NFS_OPEN_SHARE_DENY_READ)))) {
+			error = nfs4_open_delegated(np, nofp, accessMode, denyMode, ctx);
+			nfs_open_state_clear_busy(np);
+			return (error);
+		}
+		nfs_open_state_clear_busy(np);
+	}
+
+	/*
+	 * [sigh] We can't trust VFS to get the parent right for named
+	 * attribute nodes.  (It likes to reparent the nodes after we've
+	 * created them.)  Luckily we can probably get the right parent
+	 * from the n_parent we have stashed away.
+	 */
+	if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
+	    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
+		dvp = NULL;
+	if (!dvp)
+		dvp = vnode_getparent(vp);
+	vname = vnode_getname(vp);
+	if (!dvp || !vname) {
+		if (!error)
+			error = EIO;
+		goto out;
+	}
+	filename = &smallname[0];
+	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
+	if (namelen >= sizeof(smallname)) {
+		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
+		if (!filename) {
+			error = ENOMEM;
+			goto out;
+		}
+		snprintf(filename, namelen+1, "%s", vname);
+	}
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = filename;
+	cn.cn_namelen = namelen;
+
+	if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) {
+		/*
+		 * Try to open it for read access too,
+		 * so the buffer cache can read data.
+		 */
+		readtoo = 1;
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	}
+tryagain:
+	error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode);
+	if (error) {
+		if (!nfs_mount_state_error_should_restart(error) &&
+		    (error != EINTR) && (error != ERESTART) && readtoo) {
+			/* try again without the extra read access */
+			accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
+			readtoo = 0;
+			goto tryagain;
+		}
+		goto out;
+	}
+	nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
 out:
 	if (filename && (filename != &smallname[0]))
 		FREE(filename, M_TEMP);
@@ -2035,142 +2651,176 @@ out:
 	return (error);
 }
 
-
 int
-nfs4_vnop_open(
-	struct vnop_open_args /* {
+nfs_vnop_mmap(
+	struct vnop_mmap_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
-		int a_mode;
+		int a_fflags;
 		vfs_context_t a_context;
 	} */ *ap)
 {
 	vfs_context_t ctx = ap->a_context;
 	vnode_t vp = ap->a_vp;
 	nfsnode_t np = VTONFS(vp);
+	int error = 0, accessMode, denyMode, delegated;
 	struct nfsmount *nmp;
-	int error, accessMode, denyMode, opened = 0;
 	struct nfs_open_owner *noop = NULL;
 	struct nfs_open_file *nofp = NULL;
 
-	if (!(ap->a_mode & (FREAD|FWRITE)))
-		return (EINVAL);
-
 	nmp = VTONMP(vp);
 	if (!nmp)
 		return (ENXIO);
 
-	/* First, call the common code */
-	if ((error = nfs3_vnop_open(ap)))
-		return (error);
-
-	if (!vnode_isreg(vp)) {
-		/* Just mark that it was opened */
-		lck_mtx_lock(&np->n_openlock);
-		np->n_openrefcnt++;
-		lck_mtx_unlock(&np->n_openlock);
-		return (0);
-	}
+	if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE)))
+		return (EINVAL);
+	if (np->n_flag & NREVOKE)
+		return (EIO);
 
-	/* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
-	accessMode = 0;
-	if (ap->a_mode & FREAD)
-		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
-	if (ap->a_mode & FWRITE)
+	/*
+	 * fflags contains some combination of: PROT_READ, PROT_WRITE
+	 * Since it's not possible to mmap() without having the file open for reading,
+	 * read access is always there (regardless if PROT_READ is not set).
+	 */
+	accessMode = NFS_OPEN_SHARE_ACCESS_READ;
+	if (ap->a_fflags & PROT_WRITE)
 		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
-	if (ap->a_mode & O_EXLOCK)
-		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
-	else if (ap->a_mode & O_SHLOCK)
-		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
-	else
-		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	denyMode = NFS_OPEN_SHARE_DENY_NONE;
 
 	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
 	if (!noop)
 		return (ENOMEM);
 
 restart:
-	error = nfs_mount_state_in_use_start(nmp);
+	error = nfs_mount_state_in_use_start(nmp, NULL);
 	if (error) {
 		nfs_open_owner_rele(noop);
 		return (error);
 	}
-
-	error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_vnop_open: LOST %s\n", vname);
-		vnode_putname(vname);
+	if (np->n_flag & NREVOKE) {
 		error = EIO;
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+	if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) {
+		NP(np, "nfs_vnop_mmap: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
+		error = EPERM;
 	}
 	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
 		nfs_mount_state_in_use_end(nmp, 0);
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		error = nfs4_reopen(nofp, NULL);
 		nofp = NULL;
-		goto restart;
+		if (!error)
+			goto restart;
 	}
 	if (!error)
-		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+		error = nfs_open_file_set_busy(nofp, NULL);
 	if (error) {
 		nofp = NULL;
 		goto out;
 	}
 
 	/*
-	 * If we just created the file and the modes match, then we simply use
-	 * the open performed in the create.  Otherwise, send the request.
+	 * The open reference for mmap must mirror an existing open because
+	 * we may need to reclaim it after the file is closed.
+	 * So grab another open count matching the accessMode passed in.
+	 * If we already had an mmap open, prefer read/write without deny mode.
+	 * This means we may have to drop the current mmap open first.
 	 */
-	if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
-	    (nofp->nof_creator == current_thread()) &&
-	    (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
-	    (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
-		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
-		nofp->nof_creator = NULL;
-	} else {
-		if (!opened)
-			error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
-		if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
-		    (nofp->nof_creator == current_thread())) {
-			/*
-			 * Ugh.  This can happen if we just created the file with read-only
-			 * perms and we're trying to open it for real with different modes
-			 * (e.g. write-only or with a deny mode) and the server decides to
-			 * not allow the second open because of the read-only perms.
-			 * The best we can do is to just use the create's open.
-			 * We may have access we don't need or we may not have a requested
-			 * deny mode.  We may log complaints later, but we'll try to avoid it.
-			 */
-			if (denyMode != NFS_OPEN_SHARE_DENY_NONE) {
-				const char *vname = vnode_getname(NFSTOV(np));
-				printf("nfs4_vnop_open: deny mode foregone on create, %s\n", vname);
-				vnode_putname(vname);
-			}
-			nofp->nof_creator = NULL;
+
+	if (!nofp->nof_access) {
+		if (accessMode != NFS_OPEN_SHARE_ACCESS_READ) {
+			/* not asking for just read access -> fail */
+			error = EPERM;
+			goto out;
+		}
+		/* we don't have the file open, so open it for read access */
+		if (nmp->nm_vers < NFS_VER4) {
+			/* NFS v2/v3 opens are always allowed - so just add it. */
+			nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
 			error = 0;
+		} else {
+			error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
 		}
+		if (!error)
+			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
 		if (error)
 			goto out;
-		opened = 1;
-		/*
-		 * If we had just created the file, we already had it open.
-		 * If the actual open mode is less than what we grabbed at
-		 * create time, then we'll downgrade the open here.
-		 */
-		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
-		    (nofp->nof_creator == current_thread())) {
-			error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
-			if (error) {
-				const char *vname = vnode_getname(NFSTOV(np));
-				printf("nfs_vnop_open: create close error %d, %s\n", error, vname);
-				vnode_putname(vname);
-			}
-			if (!nfs_mount_state_error_should_restart(error)) {
-				error = 0;
-				nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
-			}
+	}
+
+	/* determine deny mode for open */
+	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
+		if (nofp->nof_d_rw || nofp->nof_d_rw_dw || nofp->nof_d_rw_drw) {
+			delegated = 1;
+			if (nofp->nof_d_rw)
+				denyMode = NFS_OPEN_SHARE_DENY_NONE;
+			else if (nofp->nof_d_rw_dw)
+				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+			else if (nofp->nof_d_rw_drw)
+				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		} else if (nofp->nof_rw || nofp->nof_rw_dw || nofp->nof_rw_drw) {
+			delegated = 0;
+			if (nofp->nof_rw)
+				denyMode = NFS_OPEN_SHARE_DENY_NONE;
+			else if (nofp->nof_rw_dw)
+				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+			else if (nofp->nof_rw_drw)
+				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		} else {
+			error = EPERM;
+		}
+	} else { /* NFS_OPEN_SHARE_ACCESS_READ */
+		if (nofp->nof_d_r || nofp->nof_d_r_dw || nofp->nof_d_r_drw) {
+			delegated = 1;
+			if (nofp->nof_d_r)
+				denyMode = NFS_OPEN_SHARE_DENY_NONE;
+			else if (nofp->nof_d_r_dw)
+				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+			else if (nofp->nof_d_r_drw)
+				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		} else if (nofp->nof_r || nofp->nof_r_dw || nofp->nof_r_drw) {
+			delegated = 0;
+			if (nofp->nof_r)
+				denyMode = NFS_OPEN_SHARE_DENY_NONE;
+			else if (nofp->nof_r_dw)
+				denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+			else if (nofp->nof_r_drw)
+				denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		} else {
+			error = EPERM;
+		}
+	}
+	if (error) /* mmap mode without proper open mode */
+		goto out;
+
+	/*
+	 * If the existing mmap access is more than the new access OR the
+	 * existing access is the same and the existing deny mode is less,
+	 * then we'll stick with the existing mmap open mode.
+	 */
+	if ((nofp->nof_mmap_access > accessMode) ||
+	    ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode)))
+		goto out;
+
+	/* update mmap open mode */
+	if (nofp->nof_mmap_access) {
+		error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
+		if (error) {
+			if (!nfs_mount_state_error_should_restart(error))
+				NP(np, "nfs_vnop_mmap: close of previous mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+			NP(np, "nfs_vnop_mmap: update, close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+			goto out;
 		}
+		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
 	}
 
+	nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
+	nofp->nof_mmap_access = accessMode;
+	nofp->nof_mmap_deny = denyMode;
+
 out:
 	if (nofp)
 		nfs_open_file_clear_busy(nofp);
@@ -2180,601 +2830,134 @@ out:
 	}
 	if (noop)
 		nfs_open_owner_rele(noop);
-	if (error) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_vnop_open: error %d, %s\n", error, vname);
-		vnode_putname(vname);
-	}
 	return (error);
 }
 
-int
-nfs4_close(
-	nfsnode_t np,
-	struct nfs_open_file *nofp,
-	uint32_t accessMode,
-	uint32_t denyMode,
-	vfs_context_t ctx)
-{
-	struct nfs_lock_owner *nlop;
-	int error = 0, changed = 0, closed = 0;
-	uint32_t newAccessMode, newDenyMode;
-
-	/* warn if modes don't match current state */
-	if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode)) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs4_close: mode mismatch %d %d, current %d %d, %s\n",
-			accessMode, denyMode, nofp->nof_access, nofp->nof_deny, vname);
-		vnode_putname(vname);
-	}
-
-	/*
-	 * If we're closing a write-only open, we may not have a write-only count
-	 * if we also grabbed read access.  So, check the read-write count.
-	 */
-	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
-		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
-		    (nofp->nof_w == 0) && nofp->nof_rw)
-			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
-	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
-		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
-		    (nofp->nof_w_dw == 0) && nofp->nof_rw_dw)
-			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
-	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
-		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
-		    (nofp->nof_w_drw == 0) && nofp->nof_rw_drw)
-			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
-	}
-
-	/*
-	 * Calculate new modes: a mode bit gets removed when there's only
-	 * one count in all the corresponding counts
-	 */
-	newAccessMode = nofp->nof_access;
-	newDenyMode = nofp->nof_deny;
-	if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
-	    (newAccessMode & NFS_OPEN_SHARE_ACCESS_READ) &&
-	    ((nofp->nof_r + nofp->nof_rw + nofp->nof_r_dw +
-	      nofp->nof_rw_dw + nofp->nof_r_drw + nofp->nof_rw_dw) == 1)) {
-		newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ;
-		changed = 1;
-	}
-	if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
-	    (newAccessMode & NFS_OPEN_SHARE_ACCESS_WRITE) &&
-	    ((nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw +
-	      nofp->nof_rw_dw + nofp->nof_w_drw + nofp->nof_rw_dw) == 1)) {
-		newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE;
-		changed = 1;
-	}
-	if ((denyMode & NFS_OPEN_SHARE_DENY_READ) &&
-	    (newDenyMode & NFS_OPEN_SHARE_DENY_READ) &&
-	    ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw) == 1)) {
-		newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ;
-		changed = 1;
-	}
-	if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
-	    (newDenyMode & NFS_OPEN_SHARE_DENY_WRITE) &&
-	    ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw +
-	      nofp->nof_r_dw + nofp->nof_w_dw + nofp->nof_rw_dw) == 1)) {
-		newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE;
-		changed = 1;
-	}
-
-
-	if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
-		/*
-		 * No more access after this close, so clean up and close it.
-		 */
-		closed = 1;
-		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
-			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
-		if (error == NFSERR_LOCKS_HELD) {
-			/*
-			 * Hmm... the server says we have locks we need to release first
-			 * Find the lock owner and try to unlock everything.
-			 */
-			nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
-			if (nlop) {
-				nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX, ctx);
-				nfs_lock_owner_rele(nlop);
-			}
-			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
-		}
-	} else if (changed) {
-		/*
-		 * File is still open but with less access, so downgrade the open.
-		 */
-		if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST))
-			error = nfs4_open_downgrade_rpc(np, nofp, ctx);
-	}
-
-	if (error) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs4_close: error %d, %s\n", error, vname);
-		vnode_putname(vname);
-		return (error);
-	}
-
-	/* Decrement the corresponding open access/deny mode counter. */
-	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
-		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
-			if (nofp->nof_r == 0)
-				printf("nfs4_close: open(R) count underrun\n");
-			else
-				nofp->nof_r--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
-			if (nofp->nof_w == 0)
-				printf("nfs4_close: open(W) count underrun\n");
-			else
-				nofp->nof_w--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
-			if (nofp->nof_rw == 0)
-				printf("nfs4_close: open(RW) count underrun\n");
-			else
-				nofp->nof_rw--;
-		}
-	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
-		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
-			if (nofp->nof_r_dw == 0)
-				printf("nfs4_close: open(R,DW) count underrun\n");
-			else
-				nofp->nof_r_dw--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
-			if (nofp->nof_w_dw == 0)
-				printf("nfs4_close: open(W,DW) count underrun\n");
-			else
-				nofp->nof_w_dw--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
-			if (nofp->nof_rw_dw == 0)
-				printf("nfs4_close: open(RW,DW) count underrun\n");
-			else
-				nofp->nof_rw_dw--;
-		}
-	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
-		if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
-			if (nofp->nof_r_drw == 0)
-				printf("nfs4_close: open(R,DRW) count underrun\n");
-			else
-				nofp->nof_r_drw--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) {
-			if (nofp->nof_w_drw == 0)
-				printf("nfs4_close: open(W,DRW) count underrun\n");
-			else
-				nofp->nof_w_drw--;
-		} else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
-			if (nofp->nof_rw_drw == 0)
-				printf("nfs4_close: open(RW,DRW) count underrun\n");
-			else
-				nofp->nof_rw_drw--;
-		}
-	}
-	/* update the modes */
-	nofp->nof_access = newAccessMode;
-	nofp->nof_deny = newDenyMode;
-	if (closed) {
-		if (nofp->nof_r || nofp->nof_w ||
-		    (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
-		    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
-		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw)
-			printf("nfs4_close: unexpected count: %u %u %u dw %u %u %u drw %u %u %u flags 0x%x\n",
-				nofp->nof_r, nofp->nof_w, nofp->nof_rw,
-				nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw,
-				nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw,
-				nofp->nof_flags);
-		/* clear out all open info, just to be safe */
-		nofp->nof_access = nofp->nof_deny = 0;
-		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
-		nofp->nof_r = nofp->nof_w = nofp->nof_rw = 0;
-		nofp->nof_r_dw = nofp->nof_w_dw = nofp->nof_rw_dw = 0;
-		nofp->nof_r_drw = nofp->nof_w_drw = nofp->nof_rw_drw = 0;
-		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
-		/* XXX we may potentially want to clean up idle/unused open file structures */
-	}
-	nofp->nof_opencnt--;
-	if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
-		error = EIO;
-		if (!nofp->nof_opencnt)
-			nofp->nof_flags &= ~NFS_OPEN_FILE_LOST;
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_close: LOST%s, %s\n", !(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? " (last)" : "", vname);
-		vnode_putname(vname);
-	}
-	return (error);
-}
 
 int
-nfs4_vnop_close(
-	struct vnop_close_args /* {
+nfs_vnop_mnomap(
+	struct vnop_mnomap_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
-		int a_fflag;
 		vfs_context_t a_context;
 	} */ *ap)
 {
 	vfs_context_t ctx = ap->a_context;
 	vnode_t vp = ap->a_vp;
-	int fflag = ap->a_fflag;
-	int error, common_error, accessMode, denyMode;
 	nfsnode_t np = VTONFS(vp);
 	struct nfsmount *nmp;
-	struct nfs_open_owner *noop = NULL;
 	struct nfs_open_file *nofp = NULL;
+	off_t size;
+	int error;
 
 	nmp = VTONMP(vp);
 	if (!nmp)
 		return (ENXIO);
 
-	/* First, call the common code */
-	common_error = nfs3_vnop_close(ap);
+	/* flush buffers/ubc before we drop the open (in case it's our last open) */
+	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
+	if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
+		ubc_msync(vp, 0, size, NULL, UBC_PUSHALL | UBC_SYNC);
 
-	if (!vnode_isreg(vp)) {
-		/* Just mark that it was closed */
-		lck_mtx_lock(&np->n_openlock);
-		np->n_openrefcnt--;
+	/* walk all open files and close all mmap opens */
+loop:
+	error = nfs_mount_state_in_use_start(nmp, NULL);
+	if (error)
+		return (error);
+	lck_mtx_lock(&np->n_openlock);
+	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
+		if (!nofp->nof_mmap_access)
+			continue;
 		lck_mtx_unlock(&np->n_openlock);
-		return (common_error);
+		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
+			nfs_mount_state_in_use_end(nmp, 0);
+			error = nfs4_reopen(nofp, NULL);
+			if (!error)
+				goto loop;
+		}
+		if (!error)
+			error = nfs_open_file_set_busy(nofp, NULL);
+		if (error) {
+			lck_mtx_lock(&np->n_openlock);
+			break;
+		}
+		if (nofp->nof_mmap_access) {
+			error = nfs_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
+			if (!nfs_mount_state_error_should_restart(error)) {
+				if (error) /* not a state-operation-restarting error, so just clear the access */
+					NP(np, "nfs_vnop_mnomap: close of mmap mode failed: %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
+			}
+			if (error)
+				NP(np, "nfs_vnop_mnomap: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+		}
+		nfs_open_file_clear_busy(nofp);
+		nfs_mount_state_in_use_end(nmp, error);
+		goto loop;
 	}
+	lck_mtx_unlock(&np->n_openlock);
+	nfs_mount_state_in_use_end(nmp, error);
+	return (error);
+}
 
-	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
-	if (!noop) {
-		printf("nfs4_vnop_close: can't get open owner!\n");
-		return (EIO);
-	}
+/*
+ * Search a node's lock owner list for the owner for this process.
+ * If not found and "alloc" is set, then allocate a new one.
+ */
+struct nfs_lock_owner *
+nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc)
+{
+	pid_t pid = proc_pid(p);
+	struct nfs_lock_owner *nlop, *newnlop = NULL;
 
-restart:
-	error = nfs_mount_state_in_use_start(nmp);
-	if (error) {
-		nfs_open_owner_rele(noop);
-		return (error);
+tryagain:
+	lck_mtx_lock(&np->n_openlock);
+	TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) {
+		if (nlop->nlo_pid != pid)
+			continue;
+		if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==))
+			break;
+		/* stale lock owner... reuse it if we can */
+		if (nlop->nlo_refcnt) {
+			TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
+			nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK;
+			lck_mtx_unlock(&np->n_openlock);
+			goto tryagain;
+		}
+		nlop->nlo_pid_start = p->p_start;
+		nlop->nlo_seqid = 0;
+		nlop->nlo_stategenid = 0;
+		break;
 	}
 
-	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-		nfs_mount_state_in_use_end(nmp, 0);
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
-		nofp = NULL;
-		goto restart;
-	}
-	if (error) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs4_vnop_close: no open file for owner %d, %s\n", error, vname);
-		vnode_putname(vname);
-		error = EBADF;
-		goto out;
+	if (!nlop && !newnlop && alloc) {
+		lck_mtx_unlock(&np->n_openlock);
+		MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK);
+		if (!newnlop)
+			return (NULL);
+		bzero(newnlop, sizeof(*newnlop));
+		lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
+		newnlop->nlo_pid = pid;
+		newnlop->nlo_pid_start = p->p_start;
+		newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
+		TAILQ_INIT(&newnlop->nlo_locks);
+		goto tryagain;
 	}
-	error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-	if (error) {
-		nofp = NULL;
-		goto out;
+	if (!nlop && newnlop) {
+		newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK;
+		TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link);
+		nlop = newnlop;
 	}
+	lck_mtx_unlock(&np->n_openlock);
 
-	/* fflag contains some combination of: FREAD, FWRITE, FHASLOCK */
-	accessMode = 0;
-	if (fflag & FREAD)
-		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
-	if (fflag & FWRITE)
-		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
-// XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
-//	if (fflag & O_EXLOCK)
-//		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
-//	else if (fflag & O_SHLOCK)
-//		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
-//	else
-//		denyMode = NFS_OPEN_SHARE_DENY_NONE;
-	if (fflag & FHASLOCK) {
-		/* XXX assume FHASLOCK is for the deny mode and not flock */
-		/* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
-		if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ)
-			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
-		else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE)
-			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
-		else
-			denyMode = NFS_OPEN_SHARE_DENY_NONE;
-	} else {
-			denyMode = NFS_OPEN_SHARE_DENY_NONE;
-	}
+	if (newnlop && (nlop != newnlop))
+		nfs_lock_owner_destroy(newnlop);
 
-	if (!accessMode) {
-		error = EINVAL;
-		goto out;
-	}
+	if (nlop)
+		nfs_lock_owner_ref(nlop);
 
-	error = nfs4_close(np, nofp, accessMode, denyMode, ctx);
-	if (error) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_vnop_close: close error %d, %s\n", error, vname);
-		vnode_putname(vname);
-	}
-
-out:
-	if (nofp)
-		nfs_open_file_clear_busy(nofp);
-	if (nfs_mount_state_in_use_end(nmp, error)) {
-		nofp = NULL;
-		goto restart;
-	}
-	if (noop)
-		nfs_open_owner_rele(noop);
-	if (error) {
-		const char *vname = vnode_getname(NFSTOV(np));
-		printf("nfs_vnop_close: error %d, %s\n", error, vname);
-		vnode_putname(vname);
-	}
-	if (!error)
-		error = common_error;
-	return (error);
-}
-
-int
-nfs4_vnop_mmap(
-	struct vnop_mmap_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		int a_fflags;
-		vfs_context_t a_context;
-	} */ *ap)
-{
-	vfs_context_t ctx = ap->a_context;
-	vnode_t vp = ap->a_vp;
-	nfsnode_t np = VTONFS(vp);
-	int error = 0, accessMode, denyMode;
-	struct nfsmount *nmp;
-	struct nfs_open_owner *noop = NULL;
-	struct nfs_open_file *nofp = NULL;
-
-	nmp = VTONMP(vp);
-	if (!nmp)
-		return (ENXIO);
-
-	if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE)))
-		return (EINVAL);
-
-	/*
-	 * fflags contains some combination of: PROT_READ, PROT_WRITE
-	 * Since it's not possible to mmap() without having the file open for reading,
-	 * read access is always there (regardless if PROT_READ is not set).
-	 */
-	accessMode = NFS_OPEN_SHARE_ACCESS_READ;
-	if (ap->a_fflags & PROT_WRITE)
-		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
-	denyMode = NFS_OPEN_SHARE_DENY_NONE;
-
-	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
-	if (!noop) {
-		printf("nfs4_vnop_mmap: no open owner\n");
-		return (EPERM);
-	}
-
-restart:
-	error = nfs_mount_state_in_use_start(nmp);
-	if (error) {
-		nfs_open_owner_rele(noop);
-		return (error);
-	}
-
-	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
-	if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) {
-		printf("nfs4_vnop_mmap: no open file for owner %d\n", error);
-		error = EPERM;
-	}
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-		nfs_mount_state_in_use_end(nmp, 0);
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
-		nofp = NULL;
-		goto restart;
-	}
-	if (!error)
-		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-	if (error) {
-		nofp = NULL;
-		goto out;
-	}
-
-	/*
-	 * The open reference for mmap must mirror an existing open because
-	 * we may need to reclaim it after the file is closed.
-	 * So grab another open count matching the accessMode passed in.
-	 * If we already had an mmap open, prefer read/write without deny mode.
-	 * This means we may have to drop the current mmap open first.
-	 */
-
-	/* determine deny mode for open */
-	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
-		if (nofp->nof_rw)
-			denyMode = NFS_OPEN_SHARE_DENY_NONE;
-		else if (nofp->nof_rw_dw)
-			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
-		else if (nofp->nof_rw_drw)
-			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
-		else
-			error = EPERM;
-	} else { /* NFS_OPEN_SHARE_ACCESS_READ */
-		if (nofp->nof_r)
-			denyMode = NFS_OPEN_SHARE_DENY_NONE;
-		else if (nofp->nof_r_dw)
-			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
-		else if (nofp->nof_r_drw)
-			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
-		else
-			error = EPERM;
-	}
-	if (error) /* mmap mode without proper open mode */
-		goto out;
-
-	/*
-	 * If the existing mmap access is more than the new access OR the
-	 * existing access is the same and the existing deny mode is less,
-	 * then we'll stick with the existing mmap open mode.
-	 */
-	if ((nofp->nof_mmap_access > accessMode) ||
-	    ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode)))
-		goto out;
-
-	/* update mmap open mode */
-	if (nofp->nof_mmap_access) {
-		error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
-		if (error) {
-			if (!nfs_mount_state_error_should_restart(error))
-				printf("nfs_vnop_mmap: close of previous mmap mode failed: %d\n", error);
-			const char *vname = vnode_getname(NFSTOV(np));
-			printf("nfs_vnop_mmap: update, close error %d, %s\n", error, vname);
-			vnode_putname(vname);
-			goto out;
-		}
-		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
-	}
-
-	if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) {
-		if (denyMode == NFS_OPEN_SHARE_DENY_NONE)
-			nofp->nof_rw++;
-		else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE)
-			nofp->nof_rw_dw++;
-		else /* NFS_OPEN_SHARE_DENY_BOTH */
-			nofp->nof_rw_drw++;
-	} else if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) {
-		if (denyMode == NFS_OPEN_SHARE_DENY_NONE)
-			nofp->nof_r++;
-		else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE)
-			nofp->nof_r_dw++;
-		else /* NFS_OPEN_SHARE_DENY_BOTH */
-			nofp->nof_r_drw++;
-	}
-	nofp->nof_mmap_access = accessMode;
-	nofp->nof_mmap_deny = denyMode;
-	nofp->nof_opencnt++;
-
-out:
-	if (nofp)
-		nfs_open_file_clear_busy(nofp);
-	if (nfs_mount_state_in_use_end(nmp, error)) {
-		nofp = NULL;
-		goto restart;
-	}
-	if (noop)
-		nfs_open_owner_rele(noop);
-	return (error);
-}
-
-
-int
-nfs4_vnop_mnomap(
-	struct vnop_mnomap_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		vfs_context_t a_context;
-	} */ *ap)
-{
-	vfs_context_t ctx = ap->a_context;
-	vnode_t vp = ap->a_vp;
-	nfsnode_t np = VTONFS(vp);
-	struct nfsmount *nmp;
-	struct nfs_open_file *nofp = NULL;
-	int error;
-
-	nmp = VTONMP(vp);
-	if (!nmp)
-		return (ENXIO);
-
-	/* walk all open files and close all mmap opens */
-loop:
-	error = nfs_mount_state_in_use_start(nmp);
-	if (error)
-		return (error);
-	lck_mtx_lock(&np->n_openlock);
-	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
-		if (!nofp->nof_mmap_access)
-			continue;
-		lck_mtx_unlock(&np->n_openlock);
-		if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
-			nfs_mount_state_in_use_end(nmp, 0);
-			nfs4_reopen(nofp, vfs_context_thread(ctx));
-			goto loop;
-		}
-		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-		if (error) {
-			lck_mtx_lock(&np->n_openlock);
-			break;
-		}
-		if (nofp->nof_mmap_access) {
-			error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx);
-			if (!nfs_mount_state_error_should_restart(error)) {
-				if (error) /* not a state-operation-restarting error, so just clear the access */
-					printf("nfs_vnop_mnomap: close of mmap mode failed: %d\n", error);
-				nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
-			}
-			if (error) {
-				const char *vname = vnode_getname(NFSTOV(np));
-				printf("nfs_vnop_mnomap: error %d, %s\n", error, vname);
-				vnode_putname(vname);
-			}
-		}
-		nfs_open_file_clear_busy(nofp);
-		nfs_mount_state_in_use_end(nmp, error);
-		goto loop;
-	}
-	lck_mtx_unlock(&np->n_openlock);
-	nfs_mount_state_in_use_end(nmp, error);
-	return (error);
-}
-
-/*
- * Search a node's lock owner list for the owner for this process.
- * If not found and "alloc" is set, then allocate a new one.
- */
-struct nfs_lock_owner *
-nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc)
-{
-	pid_t pid = proc_pid(p);
-	struct nfs_lock_owner *nlop, *newnlop = NULL;
-
-tryagain:
-	lck_mtx_lock(&np->n_openlock);
-	TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) {
-		if (nlop->nlo_pid != pid)
-			continue;
-		if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==))
-			break;
-		/* stale lock owner... reuse it if we can */
-		if (nlop->nlo_refcnt) {
-			TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
-			nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK;
-			lck_mtx_unlock(&np->n_openlock);
-			goto tryagain;
-		}
-		nlop->nlo_pid_start = p->p_start;
-		nlop->nlo_seqid = 0;
-		nlop->nlo_stategenid = 0;
-		break;
-	}
-
-	if (!nlop && !newnlop && alloc) {
-		lck_mtx_unlock(&np->n_openlock);
-		MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK);
-		if (!newnlop)
-			return (NULL);
-		bzero(newnlop, sizeof(*newnlop));
-		lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL);
-		newnlop->nlo_pid = pid;
-		newnlop->nlo_pid_start = p->p_start;
-		newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum);
-		TAILQ_INIT(&newnlop->nlo_locks);
-		goto tryagain;
-	}
-	if (!nlop && newnlop) {
-		newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK;
-		TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link);
-		nlop = newnlop;
-	}
-	lck_mtx_unlock(&np->n_openlock);
-
-	if (newnlop && (nlop != newnlop))
-		nfs_lock_owner_destroy(newnlop);
-
-	if (nlop)
-		nfs_lock_owner_ref(nlop);
-
-	return (nlop);
-}
+	return (nlop);
+}
 
 /*
  * destroy a lock owner that's no longer needed
@@ -2838,7 +3021,7 @@ nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd)
 	nmp = nlop->nlo_open_owner->noo_mount;
 	if (!nmp)
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	slpflag = (NMFLAG(nmp, INTR) && thd) ? PCATCH : 0;
 
 	lck_mtx_lock(&nlop->nlo_lock);
 	while (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY) {
@@ -2846,6 +3029,7 @@ nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd)
 			break;
 		nlop->nlo_flags |= NFS_LOCK_OWNER_WANT;
 		msleep(nlop, &nlop->nlo_lock, slpflag, "nfs_lock_owner_set_busy", &ts);
+		slpflag = 0;
 	}
 	if (!error)
 		nlop->nlo_flags |= NFS_LOCK_OWNER_BUSY;
@@ -2977,11 +3161,12 @@ nfs_file_lock_conflict(struct nfs_file_lock *nflp1, struct nfs_file_lock *nflp2,
  * Send an NFSv4 LOCK RPC to the server.
  */
 int
-nfs4_lock_rpc(
+nfs4_setlock_rpc(
 	nfsnode_t np,
 	struct nfs_open_file *nofp,
 	struct nfs_file_lock *nflp,
 	int reclaim,
+	int flags,
 	thread_t thd,
 	kauth_cred_t cred)
 {
@@ -2991,10 +3176,13 @@ nfs4_lock_rpc(
 	uint64_t xid;
 	uint32_t locktype;
 	int error = 0, lockerror = ENOENT, newlocker, numops, status;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
 	newlocker = (nlop->nlo_stategenid != nmp->nm_stategenid);
 	locktype = (nflp->nfl_flags & NFS_FILE_LOCK_WAIT) ?
@@ -3027,6 +3215,7 @@ nfs4_lock_rpc(
 		return (error);
 	}
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -3039,8 +3228,7 @@ nfs4_lock_rpc(
 	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCK);
 	nfsm_chain_add_32(error, &nmreq, locktype);
@@ -3061,7 +3249,7 @@ nfs4_lock_rpc(
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, (reclaim ? R_RECOVER : 0), &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -3070,7 +3258,7 @@ nfs4_lock_rpc(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCK);
 	nfs_owner_seqid_increment(newlocker ? nofp->nof_owner : NULL, nlop, error);
@@ -3103,21 +3291,27 @@ nfs4_unlock_rpc(
 	int type,
 	uint64_t start,
 	uint64_t end,
-	vfs_context_t ctx)
+	int flags,
+	thread_t thd,
+	kauth_cred_t cred)
 {
 	struct nfsmount *nmp;
 	struct nfsm_chain nmreq, nmrep;
 	uint64_t xid;
 	int error = 0, lockerror = ENOENT, numops, status;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
-	error = nfs_lock_owner_set_busy(nlop, vfs_context_thread(ctx));
+	error = nfs_lock_owner_set_busy(nlop, NULL);
 	if (error)
 		return (error);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -3130,8 +3324,7 @@ nfs4_unlock_rpc(
 	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKU);
 	nfsm_chain_add_32(error, &nmreq, (type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
@@ -3143,7 +3336,7 @@ nfs4_unlock_rpc(
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -3152,7 +3345,7 @@ nfs4_unlock_rpc(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKU);
 	nfs_owner_seqid_increment(NULL, nlop, error);
@@ -3167,14 +3360,10 @@ nfsmout:
 }
 
 /*
- * Check for any conflicts with the given lock.
- *
- * Checking for a lock doesn't require the file to be opened.
- * So we skip all the open owner, open file, lock owner work
- * and just check for a conflicting lock.
+ * Send an NFSv4 LOCKT RPC to the server.
  */
 int
-nfs4_getlock(
+nfs4_getlock_rpc(
 	nfsnode_t np,
 	struct nfs_lock_owner *nlop,
 	struct flock *fl,
@@ -3183,39 +3372,20 @@ nfs4_getlock(
 	vfs_context_t ctx)
 {
 	struct nfsmount *nmp;
-	struct nfs_file_lock *nflp;
 	struct nfsm_chain nmreq, nmrep;
 	uint64_t xid, val64 = 0;
 	uint32_t val = 0;
-	int error = 0, lockerror = ENOENT, numops, status;
+	int error = 0, lockerror, numops, status;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
-	lck_mtx_lock(&np->n_openlock);
-	/* scan currently held locks for conflict */
-	TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) {
-		if (nflp->nfl_flags & NFS_FILE_LOCK_BLOCKED)
-			continue;
-		if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) &&
-		    ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK)))
-			break;
-	}
-	if (nflp) {
-		/* found a conflicting lock */
-		fl->l_type = nflp->nfl_type;
-		fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid;
-		fl->l_start = nflp->nfl_start;
-		fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
-		fl->l_whence = SEEK_SET;
-	}
-	lck_mtx_unlock(&np->n_openlock);
-	if (nflp)
-		return (0);
-
-	/* no conflict found locally, so ask the server */
-
+	lockerror = ENOENT;
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -3228,8 +3398,7 @@ nfs4_getlock(
 	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKT);
 	nfsm_chain_add_32(error, &nmreq, (fl->l_type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ);
@@ -3240,7 +3409,7 @@ nfs4_getlock(
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -3249,7 +3418,7 @@ nfs4_getlock(
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, &xid);
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKT);
 	if (error == NFSERR_DENIED) {
@@ -3272,6 +3441,74 @@ nfsmout:
 	return (error);
 }
 
+
+/*
+ * Check for any conflicts with the given lock.
+ *
+ * Checking for a lock doesn't require the file to be opened.
+ * So we skip all the open owner, open file, lock owner work
+ * and just check for a conflicting lock.
+ */
+int
+nfs_advlock_getlock(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	struct flock *fl,
+	uint64_t start,
+	uint64_t end,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	struct nfs_file_lock *nflp;
+	int error = 0, answered = 0;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+restart:
+	if ((error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx))))
+		return (error);
+
+	lck_mtx_lock(&np->n_openlock);
+	/* scan currently held locks for conflict */
+	TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) {
+		if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))
+			continue;
+		if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) &&
+		    ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK)))
+			break;
+	}
+	if (nflp) {
+		/* found a conflicting lock */
+		fl->l_type = nflp->nfl_type;
+		fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid;
+		fl->l_start = nflp->nfl_start;
+		fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
+		fl->l_whence = SEEK_SET;
+		answered = 1;
+	} else if ((np->n_openflags & N_DELEG_WRITE) && !(np->n_openflags & N_DELEG_RETURN)) {
+		/*
+		 * If we have a write delegation, we know there can't be other
+		 * locks on the server.  So the answer is no conflicting lock found.
+		 */
+		fl->l_type = F_UNLCK;
+		answered = 1;
+	}
+	lck_mtx_unlock(&np->n_openlock);
+	if (answered) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		return (0);
+	}
+
+	/* no conflict found locally, so ask the server */
+	error = nmp->nm_funcs->nf_getlock_rpc(np, nlop, fl, start, end, ctx);
+
+	if (nfs_mount_state_in_use_end(nmp, error))
+		goto restart;
+	return (error);
+}
+
 /*
  * Acquire a file lock for the given range.
  *
@@ -3284,7 +3521,7 @@ nfsmout:
  * queue again to coalesce any locks adjacent to the new one.
  */
 int
-nfs4_setlock(
+nfs_advlock_setlock(
 	nfsnode_t np,
 	struct nfs_open_file *nofp,
 	struct nfs_lock_owner *nlop,
@@ -3304,7 +3541,10 @@ nfs4_setlock(
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
+
+	if ((type != F_RDLCK) && (type != F_WRLCK))
+		return (EINVAL);
 
 	/* allocate a new lock */
 	newnflp = nfs_file_lock_alloc(nlop);
@@ -3335,14 +3575,22 @@ nfs4_setlock(
 
 restart:
 	restart = 0;
-	error = nfs_mount_state_in_use_start(nmp);
+	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
 	if (error)
 		goto error_out;
 	inuse = 1;
+	if (np->n_flag & NREVOKE) {
+		error = EIO;
+		nfs_mount_state_in_use_end(nmp, 0);
+		inuse = 0;
+		goto error_out;
+	}
 	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
 		nfs_mount_state_in_use_end(nmp, 0);
 		inuse = 0;
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+		if (error)
+			goto error_out;
 		goto restart;
 	}
 
@@ -3354,7 +3602,8 @@ restart:
 	}
 
 	/* scan current list of locks (held and pending) for conflicts */
-	for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = TAILQ_NEXT(nflp, nfl_link)) {
+	for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = nextnflp) {
+		nextnflp = TAILQ_NEXT(nflp, nfl_link);
 		if (!nfs_file_lock_conflict(newnflp, nflp, &willsplit))
 			continue;
 		/* Conflict */
@@ -3374,10 +3623,10 @@ restart:
 				lck_mtx_unlock(&np->n_openlock);
 				nfs_mount_state_in_use_end(nmp, 0);
 				inuse = 0;
-				error = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
+				error = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
 				flocknflp = NULL;
 				if (!error)
-					error = nfs_mount_state_in_use_start(nmp);
+					error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
 				if (error) {
 					lck_mtx_lock(&np->n_openlock);
 					break;
@@ -3388,7 +3637,8 @@ restart:
 				if (!nfs_file_lock_conflict(newnflp, nflp, NULL))
 					break;
 			}
-			msleep(nflp, &np->n_openlock, slpflag, "nfs4_setlock_blocked", &ts);
+			msleep(nflp, &np->n_openlock, slpflag, "nfs_advlock_setlock_blocked", &ts);
+			slpflag = 0;
 			error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
 			if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
 				/* looks like we have a recover pending... restart */
@@ -3399,6 +3649,8 @@ restart:
 				lck_mtx_lock(&np->n_openlock);
 				break;
 			}
+			if (!error && (np->n_flag & NREVOKE))
+				error = EIO;
 		} while (!error && nfs_file_lock_conflict(newnflp, nflp, NULL));
 		nflp->nfl_blockcnt--;
 		if ((nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !nflp->nfl_blockcnt) {
@@ -3407,6 +3659,9 @@ restart:
 		}
 		if (error || restart)
 			break;
+		/* We have released n_openlock and we can't trust that nextnflp is still valid. */
+		/* So, start this lock-scanning loop over from where it started. */
+		nextnflp = TAILQ_NEXT(newnflp, nfl_link);
 	}
 	lck_mtx_unlock(&np->n_openlock);
 	if (restart)
@@ -3428,16 +3683,50 @@ restart:
 	}
 
 	/* once scan for local conflicts is clear, send request to server */
-	if ((error = nfs_open_state_set_busy(np, ctx)))
+	if ((error = nfs_open_state_set_busy(np, vfs_context_thread(ctx))))
 		goto error_out;
 	busy = 1;
 	delay = 0;
 	do {
-		error = nfs4_lock_rpc(np, nofp, newnflp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+		/* do we have a delegation? (that we're not returning?) */
+		if ((np->n_openflags & N_DELEG_MASK) && !(np->n_openflags & N_DELEG_RETURN)) {
+			if (np->n_openflags & N_DELEG_WRITE) {
+				/* with a write delegation, just take the lock delegated */
+				newnflp->nfl_flags |= NFS_FILE_LOCK_DELEGATED;
+				error = 0;
+				/* make sure the lock owner knows its open owner */
+				if (!nlop->nlo_open_owner) {
+					nfs_open_owner_ref(nofp->nof_owner);
+					nlop->nlo_open_owner = nofp->nof_owner;
+				}
+				break;
+			} else {
+				/*
+				 * If we don't have any non-delegated opens but we do have
+				 * delegated opens, then we need to first claim the delegated
+				 * opens so that the lock request on the server can be associated
+				 * with an open it knows about.
+				 */
+				if ((!nofp->nof_rw_drw && !nofp->nof_w_drw && !nofp->nof_r_drw &&
+				     !nofp->nof_rw_dw && !nofp->nof_w_dw && !nofp->nof_r_dw &&
+				     !nofp->nof_rw && !nofp->nof_w && !nofp->nof_r) &&
+				    (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
+				     nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
+				     nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r)) {
+					error = nfs4_claim_delegated_state_for_open_file(nofp, 0);
+					if (error)
+						break;
+				}
+			}
+		}
+		if (np->n_flag & NREVOKE)
+			error = EIO;
+		if (!error)
+			error = nmp->nm_funcs->nf_setlock_rpc(np, nofp, newnflp, 0, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
 		if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE)))
 			break;
 		/* request was denied due to either conflict or grace period */
-		if ((error != NFSERR_GRACE) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
+		if ((error == NFSERR_DENIED) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) {
 			error = EAGAIN;
 			break;
 		}
@@ -3447,13 +3736,13 @@ restart:
 			busy = 0;
 			nfs_mount_state_in_use_end(nmp, 0);
 			inuse = 0;
-			error2 = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
+			error2 = nfs_advlock_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx);
 			flocknflp = NULL;
 			if (!error2)
-				error2 = nfs_mount_state_in_use_start(nmp);
+				error2 = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
 			if (!error2) {
 				inuse = 1;
-				error2 = nfs_open_state_set_busy(np, ctx);
+				error2 = nfs_open_state_set_busy(np, vfs_context_thread(ctx));
 			}
 			if (error2) {
 				error = error2;
@@ -3461,12 +3750,18 @@ restart:
 			}
 			busy = 1;
 		}
-		/* wait a little bit and send the request again */
-		if (error == NFSERR_GRACE)
-			delay = 4;
-		if (delay < 4)
-			delay++;
-		tsleep(newnflp, slpflag, "nfs4_setlock_delay", delay * (hz/2));
+		/*
+		 * Wait a little bit and send the request again.
+		 * Except for retries of blocked v2/v3 request where we've already waited a bit.
+		 */
+		if ((nmp->nm_vers >= NFS_VER4) || (error == NFSERR_GRACE)) {
+			if (error == NFSERR_GRACE)
+				delay = 4;
+			if (delay < 4)
+				delay++;
+			tsleep(newnflp, slpflag, "nfs_advlock_setlock_delay", delay * (hz/2));
+			slpflag = 0;
+		}
 		error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0);
 		if (!error && (nmp->nm_state & NFSSTA_RECOVER)) {
 			/* looks like we have a recover pending... restart */
@@ -3476,6 +3771,8 @@ restart:
 			inuse = 0;
 			goto restart;
 		}
+		if (!error && (np->n_flag & NREVOKE))
+			error = EIO;
 	} while (!error);
 
 error_out:
@@ -3545,7 +3842,7 @@ error_out:
 			/* We're replacing a range in the middle of a lock. */
 			/* The current lock will be split into two locks. */
 			/* Update locks and insert new lock after current lock. */
-			nflp2->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK);
+			nflp2->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
 			nflp2->nfl_type = nflp->nfl_type;
 			nflp2->nfl_start = newnflp->nfl_end + 1;
 			nflp2->nfl_end = nflp->nfl_end;
@@ -3635,8 +3932,11 @@ error_out:
 	return (error);
 }
 
+/*
+ * Release all (same style) locks within the given range.
+ */
 int
-nfs4_unlock(
+nfs_advlock_unlock(
 	nfsnode_t np,
 	struct nfs_open_file *nofp,
 	struct nfs_lock_owner *nlop,
@@ -3654,14 +3954,16 @@ nfs4_unlock(
 		return (ENXIO);
 
 restart:
-	if ((error = nfs_mount_state_in_use_start(nmp)))
+	if ((error = nfs_mount_state_in_use_start(nmp, NULL)))
 		return (error);
 	if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
 		nfs_mount_state_in_use_end(nmp, 0);
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
+		error = nfs4_reopen(nofp, NULL);
+		if (error)
+			return (error);
 		goto restart;
 	}
-	if ((error = nfs_open_state_set_busy(np, ctx))) {
+	if ((error = nfs_open_state_set_busy(np, NULL))) {
 		nfs_mount_state_in_use_end(nmp, error);
 		return (error);
 	}
@@ -3725,11 +4027,13 @@ restart:
 	    ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX)) {
 		uint64_t s = 0;
 		int type = TAILQ_FIRST(&nlop->nlo_locks)->nfl_type;
-		while (nflp) {
+		int delegated = (TAILQ_FIRST(&nlop->nlo_locks)->nfl_flags & NFS_FILE_LOCK_DELEGATED);
+		while (!delegated && nflp) {
 			if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) {
 				/* unlock the range preceding this lock */
 				lck_mtx_unlock(&np->n_openlock);
-				error = nfs4_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, ctx);
+				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, 0,
+						vfs_context_thread(ctx), vfs_context_ucred(ctx));
 				if (nfs_mount_state_error_should_restart(error)) {
 					nfs_open_state_clear_busy(np);
 					nfs_mount_state_in_use_end(nmp, error);
@@ -3742,16 +4046,19 @@ restart:
 			}
 			nflp = TAILQ_NEXT(nflp, nfl_lolink);
 		}
-		lck_mtx_unlock(&np->n_openlock);
-		error = nfs4_unlock_rpc(np, nlop, type, s, end, ctx);
-		if (nfs_mount_state_error_should_restart(error)) {
-			nfs_open_state_clear_busy(np);
-			nfs_mount_state_in_use_end(nmp, error);
-			goto restart;
+		if (!delegated) {
+			lck_mtx_unlock(&np->n_openlock);
+			error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, type, s, end, 0,
+					vfs_context_thread(ctx), vfs_context_ucred(ctx));
+			if (nfs_mount_state_error_should_restart(error)) {
+				nfs_open_state_clear_busy(np);
+				nfs_mount_state_in_use_end(nmp, error);
+				goto restart;
+			}
+			lck_mtx_lock(&np->n_openlock);
+			if (error)
+				goto out;
 		}
-		lck_mtx_lock(&np->n_openlock);
-		if (error)
-			goto out;
 		send_unlock_rpcs = 0;
 	}
 
@@ -3767,9 +4074,10 @@ restart:
 		/* here's one to unlock */
 		if ((start <= nflp->nfl_start) && (end >= nflp->nfl_end)) {
 			/* The entire lock is being unlocked. */
-			if (send_unlock_rpcs) {
+			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
 				lck_mtx_unlock(&np->n_openlock);
-				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, ctx);
+				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, 0,
+						vfs_context_thread(ctx), vfs_context_ucred(ctx));
 				if (nfs_mount_state_error_should_restart(error)) {
 					nfs_open_state_clear_busy(np);
 					nfs_mount_state_in_use_end(nmp, error);
@@ -3788,9 +4096,10 @@ restart:
 		} else if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) {
 			/* We're unlocking a range in the middle of a lock. */
 			/* The current lock will be split into two locks. */
-			if (send_unlock_rpcs) {
+			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
 				lck_mtx_unlock(&np->n_openlock);
-				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, end, ctx);
+				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, end, 0,
+						vfs_context_thread(ctx), vfs_context_ucred(ctx));
 				if (nfs_mount_state_error_should_restart(error)) {
 					nfs_open_state_clear_busy(np);
 					nfs_mount_state_in_use_end(nmp, error);
@@ -3801,7 +4110,7 @@ restart:
 			if (error)
 				break;
 			/* update locks and insert new lock after current lock */
-			newnflp->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK);
+			newnflp->nfl_flags |= (nflp->nfl_flags & (NFS_FILE_LOCK_STYLE_MASK|NFS_FILE_LOCK_DELEGATED));
 			newnflp->nfl_type = nflp->nfl_type;
 			newnflp->nfl_start = end + 1;
 			newnflp->nfl_end = nflp->nfl_end;
@@ -3812,9 +4121,10 @@ restart:
 			newnflp = NULL;
 		} else if (start > nflp->nfl_start) {
 			/* We're unlocking the end of a lock. */
-			if (send_unlock_rpcs) {
+			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
 				lck_mtx_unlock(&np->n_openlock);
-				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, ctx);
+				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, 0,
+						vfs_context_thread(ctx), vfs_context_ucred(ctx));
 				if (nfs_mount_state_error_should_restart(error)) {
 					nfs_open_state_clear_busy(np);
 					nfs_mount_state_in_use_end(nmp, error);
@@ -3828,9 +4138,10 @@ restart:
 			nflp->nfl_end = start - 1;
 		} else if (end < nflp->nfl_end) {
 			/* We're unlocking the start of a lock. */
-			if (send_unlock_rpcs) {
+			if (send_unlock_rpcs && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED)) {
 				lck_mtx_unlock(&np->n_openlock);
-				error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, ctx);
+				error = nmp->nm_funcs->nf_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, 0,
+						vfs_context_thread(ctx), vfs_context_ucred(ctx));
 				if (nfs_mount_state_error_should_restart(error)) {
 					nfs_open_state_clear_busy(np);
 					nfs_mount_state_in_use_end(nmp, error);
@@ -3866,7 +4177,7 @@ out:
  * NFSv4 advisory file locking
  */
 int
-nfs4_vnop_advlock(
+nfs_vnop_advlock(
 	struct vnop_advlock_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
@@ -3884,19 +4195,34 @@ nfs4_vnop_advlock(
 	int flags = ap->a_flags;
 	vfs_context_t ctx = ap->a_context;
 	struct nfsmount *nmp;
-	struct nfs_vattr nvattr;
 	struct nfs_open_owner *noop = NULL;
 	struct nfs_open_file *nofp = NULL;
 	struct nfs_lock_owner *nlop = NULL;
 	off_t lstart;
 	uint64_t start, end;
 	int error = 0, modified, style;
+	enum vtype vtype;
 #define OFF_MAX QUAD_MAX
 
 	nmp = VTONMP(ap->a_vp);
 	if (!nmp)
 		return (ENXIO);
+	lck_mtx_lock(&nmp->nm_lock);
+	if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED)) {
+		lck_mtx_unlock(&nmp->nm_lock);
+		return (ENOTSUP);
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
 
+	if (np->n_flag & NREVOKE)
+		return (EIO);
+	vtype = vnode_vtype(ap->a_vp);
+	if (vtype == VDIR) /* ignore lock requests on directories */
+		return (0);
+	if (vtype != VREG) /* anything other than regular files is invalid */
+		return (EINVAL);
+
+	/* Convert the flock structure into a start and end. */
 	switch (fl->l_whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
@@ -3915,7 +4241,7 @@ nfs4_vnop_advlock(
 		nfs_node_unlock(np);
 		if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1))))
 			return (error);
-		if ((error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED)))
+		if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED)))
 			return (error);
 		nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
 		if ((np->n_size > OFF_MAX) ||
@@ -3944,8 +4270,8 @@ nfs4_vnop_advlock(
 		end = start - 1;
 		start += fl->l_len;
 	}
-	if (error)
-		return (error);
+	if ((nmp->nm_vers == NFS_VER2) && ((start > INT32_MAX) || (fl->l_len && (end > INT32_MAX))))
+		return (EINVAL);
 
 	style = (flags & F_FLOCK) ? NFS_FILE_LOCK_STYLE_FLOCK : NFS_FILE_LOCK_STYLE_POSIX;
 	if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && ((start != 0) || (end != UINT64_MAX)))
@@ -3956,17 +4282,17 @@ nfs4_vnop_advlock(
 	if (!nlop) {
 		error = (op == F_UNLCK) ? 0 : ENOMEM;
 		if (error)
-			printf("nfs4_vnop_advlock: no lock owner %d\n", error);
+			NP(np, "nfs_vnop_advlock: no lock owner, error %d", error);
 		goto out;
 	}
 
 	if (op == F_GETLK) {
-		error = nfs4_getlock(np, nlop, fl, start, end, ctx);
+		error = nfs_advlock_getlock(np, nlop, fl, start, end, ctx);
 	} else {
 		/* find the open owner */
 		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
 		if (!noop) {
-			printf("nfs4_vnop_advlock: no open owner\n");
+			NP(np, "nfs_vnop_advlock: no open owner %d", kauth_cred_getuid(vfs_context_ucred(ctx)));
 			error = EPERM;
 			goto out;
 		}
@@ -3976,24 +4302,25 @@ restart:
 		if (error)
 			error = EBADF;
 		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
-			printf("nfs_vnop_advlock: LOST\n");
+			NP(np, "nfs_vnop_advlock: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
 			error = EIO;
 		}
 		if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-			nfs4_reopen(nofp, vfs_context_thread(ctx));
+			error = nfs4_reopen(nofp, ((op == F_UNLCK) ? NULL : vfs_context_thread(ctx)));
 			nofp = NULL;
-			goto restart;
+			if (!error)
+				goto restart;
 		}
 		if (error) {
-			printf("nfs4_vnop_advlock: no open file %d\n", error);
+			NP(np, "nfs_vnop_advlock: no open file %d, %d", error, kauth_cred_getuid(noop->noo_cred));
 			goto out;
 		}
 		if (op == F_UNLCK) {
-			error = nfs4_unlock(np, nofp, nlop, start, end, style, ctx);
+			error = nfs_advlock_unlock(np, nofp, nlop, start, end, style, ctx);
 		} else if ((op == F_SETLK) || (op == F_SETLKW)) {
 			if ((op == F_SETLK) && (flags & F_WAIT))
 				op = F_SETLKW;
-			error = nfs4_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx);
+			error = nfs_advlock_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx);
 		} else {
 			/* not getlk, unlock or lock? */
 			error = EINVAL;
@@ -4012,7 +4339,7 @@ out:
  * Check if an open owner holds any locks on a file.
  */
 int
-nfs4_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
+nfs_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
 {
 	struct nfs_lock_owner *nlop;
 
@@ -4028,19 +4355,21 @@ nfs4_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp)
 /*
  * Reopen simple (no deny, no locks) open state that was lost.
  */
-void
+int
 nfs4_reopen(struct nfs_open_file *nofp, thread_t thd)
 {
 	struct nfs_open_owner *noop = nofp->nof_owner;
 	struct nfsmount *nmp = NFSTONMP(nofp->nof_np);
-	vnode_t vp = NFSTOV(nofp->nof_np);
+	nfsnode_t np = nofp->nof_np;
+	vnode_t vp = NFSTOV(np);
 	vnode_t dvp = NULL;
 	struct componentname cn;
 	const char *vname = NULL;
+	const char *name = NULL;
 	size_t namelen;
 	char smallname[128];
 	char *filename = NULL;
-	int error = 0, done = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	int error = 0, done = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
 	struct timespec ts = { 1, 0 };
 
 	lck_mtx_lock(&nofp->nof_lock);
@@ -4048,38 +4377,67 @@ nfs4_reopen(struct nfs_open_file *nofp, thread_t thd)
 		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
 			break;
 		msleep(&nofp->nof_flags, &nofp->nof_lock, slpflag|(PZERO-1), "nfsreopenwait", &ts);
+		slpflag = 0;
 	}
-	if (!(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+	if (error || !(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
 		lck_mtx_unlock(&nofp->nof_lock);
-		return;
+		return (error);
 	}
 	nofp->nof_flags |= NFS_OPEN_FILE_REOPENING;
 	lck_mtx_unlock(&nofp->nof_lock);
 
-	dvp = vnode_getparent(vp);
-	vname = vnode_getname(vp);
-	if (!dvp || !vname) {
-		error = EIO;
-		goto out;
+	nfs_node_lock_force(np);
+	if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
+		/*
+		 * The node's been sillyrenamed, so we need to use
+		 * the sillyrename directory/name to do the open.
+		 */
+		struct nfs_sillyrename *nsp = np->n_sillyrename;
+		dvp = NFSTOV(nsp->nsr_dnp);
+		if ((error = vnode_get(dvp))) {
+			nfs_node_unlock(np);
+			goto out;
+		}
+		name = nsp->nsr_name;
+	} else {
+		/*
+		 * [sigh] We can't trust VFS to get the parent right for named
+		 * attribute nodes.  (It likes to reparent the nodes after we've
+		 * created them.)  Luckily we can probably get the right parent
+		 * from the n_parent we have stashed away.
+		 */
+		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
+		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
+			dvp = NULL;
+		if (!dvp)
+			dvp = vnode_getparent(vp);
+		vname = vnode_getname(vp);
+		if (!dvp || !vname) {
+			if (!error)
+				error = EIO;
+			nfs_node_unlock(np);
+			goto out;
+		}
+		name = vname;
 	}
 	filename = &smallname[0];
-	namelen = snprintf(filename, sizeof(smallname), "%s", vname);
+	namelen = snprintf(filename, sizeof(smallname), "%s", name);
 	if (namelen >= sizeof(smallname)) {
-		namelen++;  /* snprintf result doesn't include '\0' */
-		MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK);
+		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
 		if (!filename) {
 			error = ENOMEM;
 			goto out;
 		}
-		snprintf(filename, namelen, "%s", vname);
+		snprintf(filename, namelen+1, "%s", name);
 	}
+	nfs_node_unlock(np);
 	bzero(&cn, sizeof(cn));
 	cn.cn_nameptr = filename;
 	cn.cn_namelen = namelen;
 
 restart:
 	done = 0;
-	if ((error = nfs_mount_state_in_use_start(nmp)))
+	if ((error = nfs_mount_state_in_use_start(nmp, thd)))
 		goto out;
 
 	if (nofp->nof_rw)
@@ -4092,19 +4450,22 @@ restart:
 	if (nfs_mount_state_in_use_end(nmp, error)) {
 		if (error == NFSERR_GRACE)
 			goto restart;
+		printf("nfs4_reopen: RPC failed, error %d, lost %d, %s\n", error,
+			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
 		error = 0;
 		goto out;
 	}
 	done = 1;
 out:
+	if (error && (error != EINTR) && (error != ERESTART))
+		nfs_revoke_open_state_for_node(np);
 	lck_mtx_lock(&nofp->nof_lock);
 	nofp->nof_flags &= ~NFS_OPEN_FILE_REOPENING;
-	if (error)
-		nofp->nof_flags |= NFS_OPEN_FILE_LOST;
 	if (done)
 		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
-	else
-		printf("nfs4_reopen: failed, error %d, lost %d\n", error, (nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0);
+	else if (error)
+		printf("nfs4_reopen: failed, error %d, lost %d, %s\n", error,
+			(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0, name ? name : "???");
 	lck_mtx_unlock(&nofp->nof_lock);
 	if (filename && (filename != &smallname[0]))
 		FREE(filename, M_TEMP);
@@ -4112,6 +4473,7 @@ out:
 		vnode_putname(vname);
 	if (dvp != NULLVP)
 		vnode_put(dvp);
+	return (error);
 }
 
 /*
@@ -4147,13 +4509,73 @@ nfs4_open_reopen_rpc(
 	int share_access,
 	int share_deny)
 {
-	return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, 0, share_access, share_deny));
+	return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, share_access, share_deny));
+}
+
+/*
+ * Send an OPEN_CONFIRM RPC to confirm an OPEN.
+ */
+int
+nfs4_open_confirm_rpc(
+	struct nfsmount *nmp,
+	nfsnode_t dnp,
+	u_char *fhp,
+	int fhlen,
+	struct nfs_open_owner *noop,
+	nfs_stateid *sid,
+	thread_t thd,
+	kauth_cred_t cred,
+	struct nfs_vattr *nvap,
+	uint64_t *xidp)
+{
+	struct nfsm_chain nmreq, nmrep;
+	int error = 0, status, numops;
+	struct nfsreq_secinfo_args si;
+
+	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, OPEN_CONFIRM, GETATTR
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
+	nfsm_chain_add_stateid(error, &nmreq, sid);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, &nmrep, xidp, &status);
+
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, sid);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	error = nfs4_parsefattr(&nmrep, NULL, nvap, NULL, NULL, NULL);
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
 }
 
 /*
  * common OPEN RPC code
  *
  * If create is set, ctx must be passed in.
+ * Returns a node on success if no node passed in.
  */
 int
 nfs4_open_rpc_internal(
@@ -4171,20 +4593,24 @@ nfs4_open_rpc_internal(
 {
 	struct nfsmount *nmp;
 	struct nfs_open_owner *noop = nofp->nof_owner;
-	struct nfs_vattr nvattr, dnvattr;
+	struct nfs_vattr nvattr;
 	int error = 0, open_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
-	int nfsvers, numops, exclusive = 0, gotuid, gotgid;
+	int nfsvers, namedattrs, numops, exclusive = 0, gotuid, gotgid;
 	u_int64_t xid, savedxid = 0;
 	nfsnode_t dnp = VTONFS(dvp);
 	nfsnode_t np, newnp = NULL;
 	vnode_t newvp = NULL;
 	struct nfsm_chain nmreq, nmrep;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
-	uint32_t rflags, delegation = 0, recall = 0, val;
+	uint32_t rflags, delegation, recall;
 	struct nfs_stateid stateid, dstateid, *sid;
 	fhandle_t fh;
-	struct nfsreq *req = NULL;
+	struct nfsreq rq, *req = &rq;
 	struct nfs_dulookup dul;
+	char sbuf[64], *s;
+	uint32_t ace_type, ace_flags, ace_mask, len, slen;
+	struct kauth_ace ace;
+	struct nfsreq_secinfo_args si;
 
 	if (create && !ctx)
 		return (EINVAL);
@@ -4193,6 +4619,9 @@ nfs4_open_rpc_internal(
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
 
 	np = *vpp ? VTONFS(*vpp) : NULL;
 	if (create && vap) {
@@ -4200,6 +4629,8 @@ nfs4_open_rpc_internal(
 		nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
 		gotuid = VATTR_IS_ACTIVE(vap, va_uid);
 		gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+		if (exclusive && (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)))
+			vap->va_vaflags |= VA_UTIMES_NULL;
 	} else {
 		exclusive = gotuid = gotgid = 0;
 	}
@@ -4213,7 +4644,12 @@ nfs4_open_rpc_internal(
 	if ((error = nfs_open_owner_set_busy(noop, thd)))
 		return (error);
 again:
-	rflags = 0;
+	rflags = delegation = recall = 0;
+	ace.ace_flags = 0;
+	s = sbuf;
+	slen = sizeof(sbuf);
+	NVATTR_INIT(&nvattr);
+	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, cnp->cn_nameptr, cnp->cn_namelen);
 
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
@@ -4232,13 +4668,9 @@ again:
 	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
 	nfsm_chain_add_32(error, &nmreq, share_access);
 	nfsm_chain_add_32(error, &nmreq, share_deny);
-
-	// open owner: clientid + uid
-	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
+	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
 	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
-	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
-
-	// openflag4
+	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
 	nfsm_chain_add_32(error, &nmreq, create);
 	if (create) {
 		if (exclusive) {
@@ -4253,40 +4685,36 @@ again:
 			nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
 		}
 	}
-
-	// open_claim4
 	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	if (!error)
 		error = busyerror = nfs_node_set_busy(dnp, thd);
 	nfsmout_if(error);
 
-	if (create)
+	if (create && !namedattrs)
 		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
 
-	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, &req);
+	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, R_NOINTR, NULL, &req);
 	if (!error) {
-		if (create)
+		if (create && !namedattrs)
 			nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
 		savedxid = xid;
 	}
 
-	if (create)
+	if (create && !namedattrs)
 		nfs_dulookup_finish(&dul, dnp, ctx);
 
 	if ((lockerror = nfs_node_lock(dnp)))
@@ -4309,51 +4737,69 @@ again:
 		case NFS_OPEN_DELEGATE_NONE:
 			break;
 		case NFS_OPEN_DELEGATE_READ:
-			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
-			nfsm_chain_get_32(error, &nmrep, recall);
-			// ACE: (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
-			break;
 		case NFS_OPEN_DELEGATE_WRITE:
 			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
 			nfsm_chain_get_32(error, &nmrep, recall);
-			// space (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			// ACE: (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
+				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			/* if we have any trouble accepting the ACE, just invalidate it */
+			ace_type = ace_flags = ace_mask = len = 0;
+			nfsm_chain_get_32(error, &nmrep, ace_type);
+			nfsm_chain_get_32(error, &nmrep, ace_flags);
+			nfsm_chain_get_32(error, &nmrep, ace_mask);
+			nfsm_chain_get_32(error, &nmrep, len);
+			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
+			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
+			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
+			if (!error && (len >= slen)) {
+				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
+				if (s)
+					slen = len+1;
+				else
+					ace.ace_flags = 0;
+			}
+			if (s)
+				nfsm_chain_get_opaque(error, &nmrep, len, s);
+			else
+				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
+			if (!error && s) {
+				s[len] = '\0';
+				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
+					ace.ace_flags = 0;
+			}
+			if (error || !s)
+				ace.ace_flags = 0;
+			if (s && (s != sbuf))
+				FREE(s, M_TEMP);
 			break;
 		default:
 			error = EBADRPC;
 			break;
 		}
 	/* At this point if we have no error, the object was created/opened. */
-	/* if we don't get attributes, then we should lookitup. */
 	open_error = error;
 	nfsmout_if(error);
-	if (create && !exclusive)
+	if (create && vap && !exclusive)
 		nfs_vattr_set_supported(bitmap, vap);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
 	nfsmout_if(error);
 	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		printf("nfs: open/create didn't return filehandle?\n");
+		printf("nfs: open/create didn't return filehandle? %s\n", cnp->cn_nameptr);
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	if (!create && np && !NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
 		// XXX for the open case, what if fh doesn't match the vnode we think we're opening?
-		printf("nfs4_open_rpc: warning: file handle mismatch\n");
+		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
+		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			NP(np, "nfs4_open_rpc: warning: file handle mismatch");
 	}
 	/* directory attributes: if we don't get them, make sure to invalidate */
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
 	if (error)
 		NATTRINVALIDATE(dnp);
 	nfsmout_if(error);
@@ -4364,39 +4810,8 @@ again:
 	if (rflags & NFS_OPEN_RESULT_CONFIRM) {
 		nfs_node_unlock(dnp);
 		lockerror = ENOENT;
-		nfsm_chain_cleanup(&nmreq);
-		nfsm_chain_cleanup(&nmrep);
-		// PUTFH, OPEN_CONFIRM, GETATTR
-		numops = 3;
-		nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
-		nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-		nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM);
-		nfsm_chain_add_stateid(error, &nmreq, sid);
-		nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-		nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-		nfsm_chain_build_done(error, &nmreq);
-		nfsm_assert(error, (numops == 0), EPROTO);
-		nfsmout_if(error);
-		error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status);
-
-		nfsm_chain_skip_tag(error, &nmrep);
-		nfsm_chain_get_32(error, &nmrep, numops);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-		nfsmout_if(error);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM);
-		nfs_owner_seqid_increment(noop, NULL, error);
-		nfsm_chain_get_stateid(error, &nmrep, sid);
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-		nfsmout_if(error);
-		NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, NULL, NULL);
+		NVATTR_CLEANUP(&nvattr);
+		error = nfs4_open_confirm_rpc(nmp, dnp, fh.fh_data, fh.fh_len, noop, sid, thd, cred, &nvattr, &xid);
 		nfsmout_if(error);
 		savedxid = xid;
 		if ((lockerror = nfs_node_lock(dnp)))
@@ -4415,17 +4830,18 @@ nfsmout:
 		dnp->n_flag |= NMODIFIED;
 		nfs_node_unlock(dnp);
 		lockerror = ENOENT;
-		nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
+		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
 	}
 	if (!lockerror)
 		nfs_node_unlock(dnp);
-	if (!error && create && fh.fh_len) {
+	if (!error && !np && fh.fh_len) {
 		/* create the vnode with the filehandle and attributes */
 		xid = savedxid;
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &newnp);
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &newnp);
 		if (!error)
 			newvp = NFSTOV(newnp);
 	}
+	NVATTR_CLEANUP(&nvattr);
 	if (!busyerror)
 		nfs_node_clear_busy(dnp);
 	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
@@ -4437,15 +4853,39 @@ nfsmout:
 			np->n_openflags &= ~N_DELEG_MASK;
 			np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
 			np->n_dstateid = dstateid;
+			np->n_dace = ace;
+			if (np->n_dlink.tqe_next == NFSNOLIST) {
+				lck_mtx_lock(&nmp->nm_lock);
+				if (np->n_dlink.tqe_next == NFSNOLIST)
+					TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
+				lck_mtx_unlock(&nmp->nm_lock);
+			}
 			lck_mtx_unlock(&np->n_openlock);
-		}
-		if (recall) {
-			nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, cred);
+		} else {
+			/* give the delegation back */
 			if (np) {
-				lck_mtx_lock(&np->n_openlock);
-				np->n_openflags &= ~N_DELEG_MASK;
-				lck_mtx_unlock(&np->n_openlock);
+				if (NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
+					/* update delegation state and return it */
+					lck_mtx_lock(&np->n_openlock);
+					np->n_openflags &= ~N_DELEG_MASK;
+					np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
+					np->n_dstateid = dstateid;
+					np->n_dace = ace;
+					if (np->n_dlink.tqe_next == NFSNOLIST) {
+						lck_mtx_lock(&nmp->nm_lock);
+						if (np->n_dlink.tqe_next == NFSNOLIST)
+							TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
+						lck_mtx_unlock(&nmp->nm_lock);
+					}
+					lck_mtx_unlock(&np->n_openlock);
+					/* don't need to send a separate delegreturn for fh */
+					fh.fh_len = 0;
+				}
+				/* return np's current delegation */
+				nfs4_delegation_return(np, 0, thd, cred);
 			}
+			if (fh.fh_len) /* return fh's delegation if it wasn't for np */
+				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
 		}
 	}
 	if (error) {
@@ -4478,6 +4918,266 @@ nfsmout:
 	return (error);
 }
 
+
+/*
+ * Send an OPEN RPC to claim a delegated open for a file
+ */
+int
+nfs4_claim_delegated_open_rpc(
+	struct nfs_open_file *nofp,
+	int share_access,
+	int share_deny,
+	int flags)
+{
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfs_vattr nvattr;
+	int error = 0, lockerror = ENOENT, status;
+	int nfsvers, numops;
+	u_int64_t xid;
+	nfsnode_t np = nofp->nof_np;
+	struct nfsm_chain nmreq, nmrep;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	uint32_t rflags = 0, delegation, recall = 0;
+	fhandle_t fh;
+	struct nfs_stateid dstateid;
+	char sbuf[64], *s = sbuf;
+	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
+	struct kauth_ace ace;
+	vnode_t dvp = NULL;
+	const char *vname = NULL;
+	const char *name = NULL;
+	size_t namelen;
+	char smallname[128];
+	char *filename = NULL;
+	struct nfsreq_secinfo_args si;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+
+	nfs_node_lock_force(np);
+	if ((vnode_vtype(NFSTOV(np)) != VDIR) && np->n_sillyrename) {
+		/*
+		 * The node's been sillyrenamed, so we need to use
+		 * the sillyrename directory/name to do the open.
+		 */
+		struct nfs_sillyrename *nsp = np->n_sillyrename;
+		dvp = NFSTOV(nsp->nsr_dnp);
+		if ((error = vnode_get(dvp))) {
+			nfs_node_unlock(np);
+			goto out;
+		}
+		name = nsp->nsr_name;
+	} else {
+		/*
+		 * [sigh] We can't trust VFS to get the parent right for named
+		 * attribute nodes.  (It likes to reparent the nodes after we've
+		 * created them.)  Luckily we can probably get the right parent
+		 * from the n_parent we have stashed away.
+		 */
+		if ((np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR) &&
+		    (((dvp = np->n_parent)) && (error = vnode_get(dvp))))
+			dvp = NULL;
+		if (!dvp)
+			dvp = vnode_getparent(NFSTOV(np));
+		vname = vnode_getname(NFSTOV(np));
+		if (!dvp || !vname) {
+			if (!error)
+				error = EIO;
+			nfs_node_unlock(np);
+			goto out;
+		}
+		name = vname;
+	}
+	filename = &smallname[0];
+	namelen = snprintf(filename, sizeof(smallname), "%s", name);
+	if (namelen >= sizeof(smallname)) {
+		MALLOC(filename, char *, namelen+1, M_TEMP, M_WAITOK);
+		if (!filename) {
+			error = ENOMEM;
+			goto out;
+		}
+		snprintf(filename, namelen+1, "%s", name);
+	}
+	nfs_node_unlock(np);
+
+	if ((error = nfs_open_owner_set_busy(noop, NULL)))
+		return (error);
+
+	NVATTR_INIT(&nvattr);
+	delegation = NFS_OPEN_DELEGATE_NONE;
+	dstateid = np->n_dstateid;
+	NFSREQ_SECINFO_SET(&si, VTONFS(dvp), NULL, 0, filename, namelen);
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, OPEN, GETATTR(FH)
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "open_claim_d", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, VTONFS(dvp)->n_fhp, VTONFS(dvp)->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
+	nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+	nfsm_chain_add_32(error, &nmreq, share_access);
+	nfsm_chain_add_32(error, &nmreq, share_deny);
+	// open owner: clientid + uid
+	nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid
+	nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
+	nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner
+	// openflag4
+	nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE);
+	// open_claim4
+	nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_DELEGATE_CUR);
+	nfsm_chain_add_stateid(error, &nmreq, &np->n_dstateid);
+	nfsm_chain_add_name(error, &nmreq, filename, namelen, nmp);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
+			noop->noo_cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock(np)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
+	nfs_owner_seqid_increment(noop, NULL, error);
+	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
+	nfsm_chain_check_change_info(error, &nmrep, np);
+	nfsm_chain_get_32(error, &nmrep, rflags);
+	bmlen = NFS_ATTR_BITMAP_LEN;
+	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
+	nfsm_chain_get_32(error, &nmrep, delegation);
+	if (!error)
+		switch (delegation) {
+		case NFS_OPEN_DELEGATE_NONE:
+			// if (!(np->n_openflags & N_DELEG_RETURN)) /* don't warn if delegation is being returned */
+			// 	printf("nfs: open delegated claim didn't return a delegation %s\n", filename ? filename : "???");
+			break;
+		case NFS_OPEN_DELEGATE_READ:
+		case NFS_OPEN_DELEGATE_WRITE:
+			if ((((np->n_openflags & N_DELEG_MASK) == N_DELEG_READ) &&
+			     (delegation == NFS_OPEN_DELEGATE_WRITE)) ||
+			    (((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) &&
+			     (delegation == NFS_OPEN_DELEGATE_READ)))
+				printf("nfs: open delegated claim returned a different delegation type! have %s got %s %s\n",
+				     ((np->n_openflags & N_DELEG_MASK) == N_DELEG_WRITE) ? "W" : "R",
+				     (delegation == NFS_OPEN_DELEGATE_WRITE) ? "W" : "R", filename ? filename : "???");
+			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+			nfsm_chain_get_32(error, &nmrep, recall);
+			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
+				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			/* if we have any trouble accepting the ACE, just invalidate it */
+			ace_type = ace_flags = ace_mask = len = 0;
+			nfsm_chain_get_32(error, &nmrep, ace_type);
+			nfsm_chain_get_32(error, &nmrep, ace_flags);
+			nfsm_chain_get_32(error, &nmrep, ace_mask);
+			nfsm_chain_get_32(error, &nmrep, len);
+			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
+			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
+			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
+			if (!error && (len >= slen)) {
+				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
+				if (s)
+					slen = len+1;
+				else
+					ace.ace_flags = 0;
+			}
+			if (s)
+				nfsm_chain_get_opaque(error, &nmrep, len, s);
+			else
+				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
+			if (!error && s) {
+				s[len] = '\0';
+				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
+					ace.ace_flags = 0;
+			}
+			if (error || !s)
+				ace.ace_flags = 0;
+			if (s && (s != sbuf))
+				FREE(s, M_TEMP);
+			if (!error) {
+				/* stuff the latest delegation state in the node */
+				lck_mtx_lock(&np->n_openlock);
+				np->n_openflags &= ~N_DELEG_MASK;
+				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
+				np->n_dstateid = dstateid;
+				np->n_dace = ace;
+				if (np->n_dlink.tqe_next == NFSNOLIST) {
+					lck_mtx_lock(&nmp->nm_lock);
+					if (np->n_dlink.tqe_next == NFSNOLIST)
+						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
+					lck_mtx_unlock(&nmp->nm_lock);
+				}
+				lck_mtx_unlock(&np->n_openlock);
+			}
+			break;
+		default:
+			error = EBADRPC;
+			break;
+		}
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
+	nfsmout_if(error);
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+		printf("nfs: open reclaim didn't return filehandle? %s\n", filename ? filename : "???");
+		error = EBADRPC;
+		goto nfsmout;
+	}
+	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
+		// XXX what if fh doesn't match the vnode we think we're re-opening?
+		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
+		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			printf("nfs4_claim_delegated_open_rpc: warning: file handle mismatch %s\n", filename ? filename : "???");
+	}
+	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
+	nfsmout_if(error);
+	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
+		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+nfsmout:
+	NVATTR_CLEANUP(&nvattr);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	if (!lockerror)
+		nfs_node_unlock(np);
+	nfs_open_owner_clear_busy(noop);
+	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
+		if (recall) {
+			/*
+			 * We're making a delegated claim.
+			 * Don't return the delegation here in case we have more to claim.
+			 * Just make sure it's queued up to be returned.
+			 */
+			nfs4_delegation_return_enqueue(np);
+		}
+	}
+out:
+	// if (!error)
+	// 	printf("nfs: open claim delegated (%d, %d) succeeded for %s\n", share_access, share_deny, filename ? filename : "???");
+	if (filename && (filename != &smallname[0]))
+		FREE(filename, M_TEMP);
+	if (vname)
+		vnode_putname(vname);
+	if (dvp != NULLVP)
+		vnode_put(dvp);
+	return (error);
+}
+
 /*
  * Send an OPEN RPC to reclaim an open file.
  */
@@ -4496,19 +5196,26 @@ nfs4_open_reclaim_rpc(
 	nfsnode_t np = nofp->nof_np;
 	struct nfsm_chain nmreq, nmrep;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
-	uint32_t rflags = 0, delegation, recall = 0, val;
+	uint32_t rflags = 0, delegation, recall = 0;
 	fhandle_t fh;
 	struct nfs_stateid dstateid;
+	char sbuf[64], *s = sbuf;
+	uint32_t ace_type, ace_flags, ace_mask, len, slen = sizeof(sbuf);
+	struct kauth_ace ace;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
-	if ((error = nfs_open_owner_set_busy(noop, current_thread())))
+	if ((error = nfs_open_owner_set_busy(noop, NULL)))
 		return (error);
 
+	NVATTR_INIT(&nvattr);
 	delegation = NFS_OPEN_DELEGATE_NONE;
+	dstateid = np->n_dstateid;
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
@@ -4542,13 +5249,13 @@ nfs4_open_reclaim_rpc(
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
 
-	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(), noop->noo_cred, R_RECOVER, &nmrep, &xid, &status);
+	error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(),
+			noop->noo_cred, &si, R_RECOVER|R_NOINTR, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -4567,38 +5274,66 @@ nfs4_open_reclaim_rpc(
 	if (!error)
 		switch (delegation) {
 		case NFS_OPEN_DELEGATE_NONE:
-			break;
-		case NFS_OPEN_DELEGATE_READ:
-			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
-			nfsm_chain_get_32(error, &nmrep, recall);
-			// ACE: (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
-			if (!error) {
-				/* stuff the delegation state in the node */
-				lck_mtx_lock(&np->n_openlock);
-				np->n_openflags &= ~N_DELEG_MASK;
-				np->n_openflags |= N_DELEG_READ;
-				np->n_dstateid = dstateid;
-				lck_mtx_unlock(&np->n_openlock);
+			if (np->n_openflags & N_DELEG_MASK) {
+				/*
+				 * Hey!  We were supposed to get our delegation back even
+				 * if it was getting immediately recalled.  Bad server!
+				 *
+				 * Just try to return the existing delegation.
+				 */
+				// NP(np, "nfs: open reclaim didn't return delegation?");
+				delegation = (np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE : NFS_OPEN_DELEGATE_READ;
+				recall = 1;
 			}
 			break;
+		case NFS_OPEN_DELEGATE_READ:
 		case NFS_OPEN_DELEGATE_WRITE:
 			nfsm_chain_get_stateid(error, &nmrep, &dstateid);
 			nfsm_chain_get_32(error, &nmrep, recall);
-			// space (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			// ACE: (skip) XXX
-			nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
-			nfsm_chain_get_32(error, &nmrep, val); /* string length */
-			nfsm_chain_adv(error, &nmrep, nfsm_rndup(val));
+			if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
+				nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+			/* if we have any trouble accepting the ACE, just invalidate it */
+			ace_type = ace_flags = ace_mask = len = 0;
+			nfsm_chain_get_32(error, &nmrep, ace_type);
+			nfsm_chain_get_32(error, &nmrep, ace_flags);
+			nfsm_chain_get_32(error, &nmrep, ace_mask);
+			nfsm_chain_get_32(error, &nmrep, len);
+			ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
+			ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
+			ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
+			if (!error && (len >= slen)) {
+				MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
+				if (s)
+					slen = len+1;
+				else
+					ace.ace_flags = 0;
+			}
+			if (s)
+				nfsm_chain_get_opaque(error, &nmrep, len, s);
+			else
+				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
+			if (!error && s) {
+				s[len] = '\0';
+				if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
+					ace.ace_flags = 0;
+			}
+			if (error || !s)
+				ace.ace_flags = 0;
+			if (s && (s != sbuf))
+				FREE(s, M_TEMP);
 			if (!error) {
 				/* stuff the delegation state in the node */
 				lck_mtx_lock(&np->n_openlock);
 				np->n_openflags &= ~N_DELEG_MASK;
-				np->n_openflags |= N_DELEG_WRITE;
+				np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
 				np->n_dstateid = dstateid;
+				np->n_dace = ace;
+				if (np->n_dlink.tqe_next == NFSNOLIST) {
+					lck_mtx_lock(&nmp->nm_lock);
+					if (np->n_dlink.tqe_next == NFSNOLIST)
+						TAILQ_INSERT_TAIL(&nmp->nm_delegations, np, n_dlink);
+					lck_mtx_unlock(&nmp->nm_lock);
+				}
 				lck_mtx_unlock(&np->n_openlock);
 			}
 			break;
@@ -4608,35 +5343,37 @@ nfs4_open_reclaim_rpc(
 		}
 	nfsmout_if(error);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
 	nfsmout_if(error);
 	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		printf("nfs: open reclaim didn't return filehandle?\n");
+		NP(np, "nfs: open reclaim didn't return filehandle?");
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) {
 		// XXX what if fh doesn't match the vnode we think we're re-opening?
-		printf("nfs4_open_reclaim_rpc: warning: file handle mismatch\n");
+		// That should be pretty hard in this case, given that we are doing
+		// the open reclaim using the file handle (and not a dir/name pair).
+		// Solaris Named Attributes may do this due to a bug.... so don't warn for named attributes.
+		if (!(np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			NP(np, "nfs4_open_reclaim_rpc: warning: file handle mismatch");
 	}
 	error = nfs_loadattrcache(np, &nvattr, &xid, 1);
 	nfsmout_if(error);
 	if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
 		nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
 nfsmout:
+	// if (!error)
+	// 	NP(np, "nfs: open reclaim (%d, %d) succeeded", share_access, share_deny);
+	NVATTR_CLEANUP(&nvattr);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	if (!lockerror)
 		nfs_node_unlock(np);
 	nfs_open_owner_clear_busy(noop);
 	if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) {
-		if (recall) {
-			nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, current_thread(), noop->noo_cred);
-			lck_mtx_lock(&np->n_openlock);
-			np->n_openflags &= ~N_DELEG_MASK;
-			lck_mtx_unlock(&np->n_openlock);
-		}
+		if (recall)
+			nfs4_delegation_return_enqueue(np);
 	}
 	return (error);
 }
@@ -4652,15 +5389,17 @@ nfs4_open_downgrade_rpc(
 	int error, lockerror = ENOENT, status, nfsvers, numops;
 	struct nfsm_chain nmreq, nmrep;
 	u_int64_t xid;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
-	if ((error = nfs_open_owner_set_busy(noop, vfs_context_thread(ctx))))
+	if ((error = nfs_open_owner_set_busy(noop, NULL)))
 		return (error);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -4679,12 +5418,13 @@ nfs4_open_downgrade_rpc(
 	nfsm_chain_add_32(error, &nmreq, nofp->nof_deny);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx),
+			&si, R_NOINTR, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -4696,7 +5436,7 @@ nfs4_open_downgrade_rpc(
 	nfs_owner_seqid_increment(noop, NULL, error);
 	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -4712,26 +5452,28 @@ nfs4_close_rpc(
 	struct nfs_open_file *nofp,
 	thread_t thd,
 	kauth_cred_t cred,
-	int flag)
+	int flags)
 {
 	struct nfs_open_owner *noop = nofp->nof_owner;
 	struct nfsmount *nmp;
 	int error, lockerror = ENOENT, status, nfsvers, numops;
 	struct nfsm_chain nmreq, nmrep;
 	u_int64_t xid;
+	struct nfsreq_secinfo_args si;
 
 	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
-	if ((error = nfs_open_owner_set_busy(noop, thd)))
+	if ((error = nfs_open_owner_set_busy(noop, NULL)))
 		return (error);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	// PUTFH, CLOSE, GETFH
+	// PUTFH, CLOSE, GETATTR
 	numops = 3;
 	nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED);
 	nfsm_chain_add_compound_header(error, &nmreq, "close", numops);
@@ -4744,12 +5486,11 @@ nfs4_close_rpc(
 	nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, flag, &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags|R_NOINTR, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
@@ -4761,7 +5502,7 @@ nfs4_close_rpc(
 	nfs_owner_seqid_increment(noop, NULL, error);
 	nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -4772,685 +5513,2630 @@ nfsmout:
 }
 
 
-int
-nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, thread_t thd, kauth_cred_t cred)
-{
-	int error = 0, status, numops;
-	uint64_t xid;
-	struct nfsm_chain nmreq, nmrep;
-
-	nfsm_chain_null(&nmreq);
-	nfsm_chain_null(&nmrep);
-
-	// PUTFH, DELEGRETURN
-	numops = 2;
-	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
-	nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN);
-	nfsm_chain_add_stateid(error, &nmreq, sid);
-	nfsm_chain_build_done(error, &nmreq);
-	nfsm_assert(error, (numops == 0), EPROTO);
-	nfsmout_if(error);
-	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, R_RECOVER, &nmrep, &xid, &status);
-	nfsm_chain_skip_tag(error, &nmrep);
-	nfsm_chain_get_32(error, &nmrep, numops);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN);
-nfsmout:
-	nfsm_chain_cleanup(&nmreq);
-	nfsm_chain_cleanup(&nmrep);
-	return (error);
-}
-
-
 /*
- * NFSv4 read call.
- * Just call nfs_bioread() to do the work.
- *
- * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP)
- * without first calling VNOP_OPEN, so we make sure the file is open here.
+ * Claim the delegated open combinations this open file holds.
  */
 int
-nfs4_vnop_read(
-	struct vnop_read_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		struct uio *a_uio;
-		int a_ioflag;
-		vfs_context_t a_context;
-	} */ *ap)
+nfs4_claim_delegated_state_for_open_file(struct nfs_open_file *nofp, int flags)
 {
-	vnode_t vp = ap->a_vp;
-	vfs_context_t ctx = ap->a_context;
-	nfsnode_t np;
+	struct nfs_open_owner *noop = nofp->nof_owner;
+	struct nfs_lock_owner *nlop;
+	struct nfs_file_lock *nflp, *nextnflp;
 	struct nfsmount *nmp;
-	struct nfs_open_owner *noop;
-	struct nfs_open_file *nofp;
-	int error;
-
-	if (vnode_vtype(ap->a_vp) != VREG)
-		return (EPERM);
-
-	np = VTONFS(vp);
-	nmp = NFSTONMP(np);
-	if (!nmp)
-		return (ENXIO);
+	int error = 0, reopen = 0;
 
-	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
-	if (!noop)
-		return (ENOMEM);
-restart:
-	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
-		printf("nfs_vnop_read: LOST\n");
-		error = EIO;
+	if (nofp->nof_d_rw_drw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_BOTH, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_rw_drw += nofp->nof_d_rw_drw;
+			nofp->nof_d_rw_drw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
 	}
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
-		nofp = NULL;
-		goto restart;
+	if (!error && nofp->nof_d_w_drw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_BOTH, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_w_drw += nofp->nof_d_w_drw;
+			nofp->nof_d_w_drw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
 	}
-	if (error) {
-		nfs_open_owner_rele(noop);
+	if (!error && nofp->nof_d_r_drw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_BOTH, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_r_drw += nofp->nof_d_r_drw;
+			nofp->nof_d_r_drw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	if (!error && nofp->nof_d_rw_dw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_WRITE, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_rw_dw += nofp->nof_d_rw_dw;
+			nofp->nof_d_rw_dw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	if (!error && nofp->nof_d_w_dw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_WRITE, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_w_dw += nofp->nof_d_w_dw;
+			nofp->nof_d_w_dw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	if (!error && nofp->nof_d_r_dw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_WRITE, flags);
+		if (!error) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_r_dw += nofp->nof_d_r_dw;
+			nofp->nof_d_r_dw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	/* non-deny-mode opens may be reopened if no locks are held */
+	if (!error && nofp->nof_d_rw) {
+		error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, flags);
+		/* for some errors, we should just try reopening the file */
+		if (nfs_mount_state_error_delegation_lost(error))
+			reopen = error;
+		if (!error || reopen) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_rw += nofp->nof_d_rw;
+			nofp->nof_d_rw = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	/* if we've already set reopen, we should move these other two opens from delegated to not delegated */
+	if ((!error || reopen) && nofp->nof_d_w) {
+		if (!error) {
+			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, flags);
+			/* for some errors, we should just try reopening the file */
+			if (nfs_mount_state_error_delegation_lost(error))
+				reopen = error;
+		}
+		if (!error || reopen) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_w += nofp->nof_d_w;
+			nofp->nof_d_w = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+	if ((!error || reopen) && nofp->nof_d_r) {
+		if (!error) {
+			error = nfs4_claim_delegated_open_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, flags);
+			/* for some errors, we should just try reopening the file */
+			if (nfs_mount_state_error_delegation_lost(error))
+				reopen = error;
+		}
+		if (!error || reopen) {
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_r += nofp->nof_d_r;
+			nofp->nof_d_r = 0;
+			lck_mtx_unlock(&nofp->nof_lock);
+		}
+	}
+
+	if (reopen) {
+		/*
+		 * Any problems with the delegation probably indicates that we
+		 * should review/return all of our current delegation state.
+		 */
+		if ((nmp = NFSTONMP(nofp->nof_np))) {
+			nfs4_delegation_return_enqueue(nofp->nof_np);
+			lck_mtx_lock(&nmp->nm_lock);
+			nfs_need_recover(nmp, NFSERR_EXPIRED);
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
+		if (reopen && (nfs_check_for_locks(noop, nofp) == 0)) {
+			/* just reopen the file on next access */
+			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, need reopen, %d",
+				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+			lck_mtx_lock(&nofp->nof_lock);
+			nofp->nof_flags |= NFS_OPEN_FILE_REOPEN;
+			lck_mtx_unlock(&nofp->nof_lock);
+			return (0);
+		}
+		if (reopen)
+			NP(nofp->nof_np, "nfs4_claim_delegated_state_for_open_file: %d, locks prevent reopen, %d",
+				reopen, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+	}
+
+	if (!error && ((nmp = NFSTONMP(nofp->nof_np)))) {
+		/* claim delegated locks */
+		TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) {
+			if (nlop->nlo_open_owner != noop)
+				continue;
+			TAILQ_FOREACH_SAFE(nflp, &nlop->nlo_locks, nfl_lolink, nextnflp) {
+				/* skip dead & blocked lock requests (shouldn't be any in the held lock list) */
+				if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
+					continue;
+				/* skip non-delegated locks */
+				if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
+					continue;
+				error = nmp->nm_funcs->nf_setlock_rpc(nofp->nof_np, nofp, nflp, 0, flags, current_thread(), noop->noo_cred);
+				if (error) {
+					NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) failed %d, %d",
+						nflp->nfl_start, nflp->nfl_end, error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+					break;
+				}
+				// else {
+				// 	NP(nofp->nof_np, "nfs: delegated lock claim (0x%llx, 0x%llx) succeeded, %d",
+				// 		nflp->nfl_start, nflp->nfl_end, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+				// }
+			}
+			if (error)
+				break;
+		}
+	}
+
+	if (!error)  /* all state claimed successfully! */
+		return (0);
+
+	/* restart if it looks like a problem more than just losing the delegation */
+	if (!nfs_mount_state_error_delegation_lost(error) &&
+	    ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error))) {
+		NP(nofp->nof_np, "nfs delegated lock claim error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+		if ((error == ETIMEDOUT) && ((nmp = NFSTONMP(nofp->nof_np))))
+			nfs_need_reconnect(nmp);
 		return (error);
 	}
-	if (!nofp->nof_access) {
-		/* we don't have the file open, so open it for read access */
-		error = nfs_mount_state_in_use_start(nmp);
-		if (error) {
-			nfs_open_owner_rele(noop);
-			return (error);
+
+	/* delegated state lost (once held but now not claimable) */ 
+	NP(nofp->nof_np, "nfs delegated state claim error %d, state lost, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+
+	/*
+	 * Any problems with the delegation probably indicates that we
+	 * should review/return all of our current delegation state.
+	 */
+	if ((nmp = NFSTONMP(nofp->nof_np))) {
+		nfs4_delegation_return_enqueue(nofp->nof_np);
+		lck_mtx_lock(&nmp->nm_lock);
+		nfs_need_recover(nmp, NFSERR_EXPIRED);
+		lck_mtx_unlock(&nmp->nm_lock);
+	}
+
+	/* revoke all open file state */
+	nfs_revoke_open_state_for_node(nofp->nof_np);
+
+	return (error);
+}
+
+/*
+ * Release all open state for the given node.
+ */
+void
+nfs_release_open_state_for_node(nfsnode_t np, int force)
+{
+	struct nfsmount *nmp = NFSTONMP(np);
+	struct nfs_open_file *nofp;
+	struct nfs_file_lock *nflp, *nextnflp;
+
+	/* drop held locks */
+	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
+		/* skip dead & blocked lock requests */
+		if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED))
+			continue;
+		/* send an unlock if not a delegated lock */
+		if (!force && nmp && !(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED))
+			nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER,
+				NULL, nflp->nfl_owner->nlo_open_owner->noo_cred);
+		/* kill/remove the lock */
+		lck_mtx_lock(&np->n_openlock);
+		nflp->nfl_flags |= NFS_FILE_LOCK_DEAD;
+		lck_mtx_lock(&nflp->nfl_owner->nlo_lock);
+		TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink);
+		lck_mtx_unlock(&nflp->nfl_owner->nlo_lock);
+		if (nflp->nfl_blockcnt) {
+			/* wake up anyone blocked on this lock */
+			wakeup(nflp);
+		} else {
+			/* remove nflp from lock list and destroy */
+			TAILQ_REMOVE(&np->n_locks, nflp, nfl_link);
+			nfs_file_lock_destroy(nflp);
 		}
-		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+		lck_mtx_unlock(&np->n_openlock);
+	}
+
+	lck_mtx_lock(&np->n_openlock);
+
+	/* drop all opens */
+	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
+		if (nofp->nof_flags & NFS_OPEN_FILE_LOST)
+			continue;
+		/* mark open state as lost */
+		lck_mtx_lock(&nofp->nof_lock);
+		nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN;
+		nofp->nof_flags |= NFS_OPEN_FILE_LOST;
+		lck_mtx_unlock(&nofp->nof_lock);
+		if (!force && nmp && (nmp->nm_vers >= NFS_VER4))
+			nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
+	}
+
+	lck_mtx_unlock(&np->n_openlock);
+}
+
+/*
+ * State for a node has been lost, drop it, and revoke the node.
+ * Attempt to return any state if possible in case the server
+ * might somehow think we hold it.
+ */
+void
+nfs_revoke_open_state_for_node(nfsnode_t np)
+{
+	struct nfsmount *nmp;
+
+	/* mark node as needing to be revoked */
+	nfs_node_lock_force(np);
+	if (np->n_flag & NREVOKE)  /* already revoked? */
+	{
+		NP(np, "nfs_revoke_open_state_for_node(): already revoked");
+		nfs_node_unlock(np);
+		return;
+	}
+	np->n_flag |= NREVOKE;
+	nfs_node_unlock(np);
+
+	nfs_release_open_state_for_node(np, 0);
+	NP(np, "nfs: state lost for %p 0x%x", np, np->n_flag);
+
+	/* mark mount as needing a revoke scan and have the socket thread do it. */
+	if ((nmp = NFSTONMP(np))) {
+		lck_mtx_lock(&nmp->nm_lock);
+		nmp->nm_state |= NFSSTA_REVOKE;
+		nfs_mount_sock_thread_wake(nmp);
+		lck_mtx_unlock(&nmp->nm_lock);
+	}
+}
+
+/*
+ * Claim the delegated open combinations that each of this node's open files hold.
+ */
+int
+nfs4_claim_delegated_state_for_node(nfsnode_t np, int flags)
+{
+	struct nfs_open_file *nofp;
+	int error = 0;
+
+	lck_mtx_lock(&np->n_openlock);
+
+	/* walk the open file list looking for opens with delegated state to claim */
+restart:
+	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
+		if (!nofp->nof_d_rw_drw && !nofp->nof_d_w_drw && !nofp->nof_d_r_drw &&
+		    !nofp->nof_d_rw_dw && !nofp->nof_d_w_dw && !nofp->nof_d_r_dw &&
+		    !nofp->nof_d_rw && !nofp->nof_d_w && !nofp->nof_d_r)
+			continue;
+		lck_mtx_unlock(&np->n_openlock);
+		error = nfs4_claim_delegated_state_for_open_file(nofp, flags);
+		lck_mtx_lock(&np->n_openlock);
 		if (error)
-			nofp = NULL;
-		if (!error)
-			error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
-		if (!error)
-			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
-		if (nofp)
-			nfs_open_file_clear_busy(nofp);
-		if (nfs_mount_state_in_use_end(nmp, error)) {
-			nofp = NULL;
-			goto restart;
+			break;
+		goto restart;
+	}
+
+	lck_mtx_unlock(&np->n_openlock);
+
+	return (error);
+}
+
+/*
+ * Mark a node as needed to have its delegation returned.
+ * Queue it up on the delegation return queue.
+ * Make sure the thread is running.
+ */
+void
+nfs4_delegation_return_enqueue(nfsnode_t np)
+{
+	struct nfsmount *nmp;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return;
+
+	lck_mtx_lock(&np->n_openlock);
+	np->n_openflags |= N_DELEG_RETURN;
+	lck_mtx_unlock(&np->n_openlock);
+
+	lck_mtx_lock(&nmp->nm_lock);
+	if (np->n_dreturn.tqe_next == NFSNOLIST)
+		TAILQ_INSERT_TAIL(&nmp->nm_dreturnq, np, n_dreturn);
+	nfs_mount_sock_thread_wake(nmp);
+	lck_mtx_unlock(&nmp->nm_lock);
+}
+
+/*
+ * return any delegation we may have for the given node
+ */
+int
+nfs4_delegation_return(nfsnode_t np, int flags, thread_t thd, kauth_cred_t cred)
+{
+	struct nfsmount *nmp;
+	fhandle_t fh;
+	nfs_stateid dstateid;
+	int error;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	/* first, make sure the node's marked for delegation return */
+	lck_mtx_lock(&np->n_openlock);
+	np->n_openflags |= (N_DELEG_RETURN|N_DELEG_RETURNING);
+	lck_mtx_unlock(&np->n_openlock);
+
+	/* make sure nobody else is using the delegation state */
+	if ((error = nfs_open_state_set_busy(np, NULL)))
+		goto out;
+
+	/* claim any delegated state */
+	if ((error = nfs4_claim_delegated_state_for_node(np, flags)))
+		goto out;
+
+	/* return the delegation */
+	lck_mtx_lock(&np->n_openlock);
+	dstateid = np->n_dstateid;
+	fh.fh_len = np->n_fhsize;
+	bcopy(np->n_fhp, &fh.fh_data, fh.fh_len);
+	lck_mtx_unlock(&np->n_openlock);
+	error = nfs4_delegreturn_rpc(NFSTONMP(np), fh.fh_data, fh.fh_len, &dstateid, flags, thd, cred);
+	/* assume delegation is gone for all errors except ETIMEDOUT, NFSERR_*MOVED */
+	if ((error != ETIMEDOUT) && (error != NFSERR_MOVED) && (error != NFSERR_LEASE_MOVED)) {
+		lck_mtx_lock(&np->n_openlock);
+		np->n_openflags &= ~N_DELEG_MASK;
+		lck_mtx_lock(&nmp->nm_lock);
+		if (np->n_dlink.tqe_next != NFSNOLIST) {
+			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
+			np->n_dlink.tqe_next = NFSNOLIST;
+		}
+		lck_mtx_unlock(&nmp->nm_lock);
+		lck_mtx_unlock(&np->n_openlock);
+	}
+
+out:
+	/* make sure it's no longer on the return queue and clear the return flags */
+	lck_mtx_lock(&nmp->nm_lock);
+	if (np->n_dreturn.tqe_next != NFSNOLIST) {
+		TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
+		np->n_dreturn.tqe_next = NFSNOLIST;
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+	lck_mtx_lock(&np->n_openlock);
+	np->n_openflags &= ~(N_DELEG_RETURN|N_DELEG_RETURNING);
+	lck_mtx_unlock(&np->n_openlock);
+
+	if (error) {
+		NP(np, "nfs4_delegation_return, error %d", error);
+		if (error == ETIMEDOUT)
+			nfs_need_reconnect(nmp);
+		if (nfs_mount_state_error_should_restart(error)) {
+			/* make sure recovery happens */
+			lck_mtx_lock(&nmp->nm_lock);
+			nfs_need_recover(nmp, nfs_mount_state_error_delegation_lost(error) ? NFSERR_EXPIRED : 0);
+			lck_mtx_unlock(&nmp->nm_lock);
 		}
 	}
-	nfs_open_owner_rele(noop);
-	if (error)
-		return (error);
-	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
+
+	nfs_open_state_clear_busy(np);
+
+	return (error);
 }
 
 /*
- * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
- * Files are created using the NFSv4 OPEN RPC.  So we must open the
- * file to create it and then close it.
+ * RPC to return a delegation for a file handle
+ */
+int
+nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, int flags, thread_t thd, kauth_cred_t cred)
+{
+	int error = 0, status, numops;
+	uint64_t xid;
+	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
+
+	NFSREQ_SECINFO_SET(&si, NULL, fhp, fhlen, NULL, 0);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, DELEGRETURN
+	numops = 2;
+	nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN);
+	nfsm_chain_add_stateid(error, &nmreq, sid);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, flags, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN);
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+
+/*
+ * NFS read call.
+ * Just call nfs_bioread() to do the work.
+ *
+ * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP)
+ * without first calling VNOP_OPEN, so we make sure the file is open here.
+ */
+int
+nfs_vnop_read(
+	struct vnop_read_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vnode_t vp = ap->a_vp;
+	vfs_context_t ctx = ap->a_context;
+	nfsnode_t np;
+	struct nfsmount *nmp;
+	struct nfs_open_owner *noop;
+	struct nfs_open_file *nofp;
+	int error;
+
+	if (vnode_vtype(ap->a_vp) != VREG)
+		return (EPERM);
+
+	np = VTONFS(vp);
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+	if (np->n_flag & NREVOKE)
+		return (EIO);
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+restart:
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		NP(np, "nfs_vnop_read: LOST %d", kauth_cred_getuid(noop->noo_cred));
+		error = EIO;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		if (!error)
+			goto restart;
+	}
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+	if (!nofp->nof_access) {
+		/* we don't have the file open, so open it for read access */
+		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+		if (error) {
+			nfs_open_owner_rele(noop);
+			return (error);
+		}
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+			nfs_mount_state_in_use_end(nmp, 0);
+			nfs_open_owner_rele(noop);
+			return (error);
+		}
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+		if (error)
+			nofp = NULL;
+		if (!error) {
+			if (nmp->nm_vers < NFS_VER4) {
+				/* NFS v2/v3 opens are always allowed - so just add it. */
+				nfs_open_file_add_open(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, 0);
+			} else {
+				error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
+			}
+		}
+		if (!error)
+			nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
+		if (nofp)
+			nfs_open_file_clear_busy(nofp);
+		if (nfs_mount_state_in_use_end(nmp, error)) {
+			nofp = NULL;
+			goto restart;
+		}
+	}
+	nfs_open_owner_rele(noop);
+	if (error)
+		return (error);
+	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
+}
+
+/*
+ * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
+ * Files are created using the NFSv4 OPEN RPC.  So we must open the
+ * file to create it and then close it.
+ */
+int
+nfs4_vnop_create(
+	struct vnop_create_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t *a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_attr *a_vap;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode_attr *vap = ap->a_vap;
+	vnode_t dvp = ap->a_dvp;
+	vnode_t *vpp = ap->a_vpp;
+	struct nfsmount *nmp;
+	nfsnode_t np;
+	int error = 0, busyerror = 0, accessMode, denyMode;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
+
+	nmp = VTONMP(dvp);
+	if (!nmp)
+		return (ENXIO);
+
+	if (vap)
+		nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx);
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	/* grab a provisional, nodeless open file */
+	error = nfs_open_file_find(NULL, noop, &newnofp, 0, 0, 1);
+	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		printf("nfs_vnop_create: LOST\n");
+		error = EIO;
+	}
+	if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		/* This shouldn't happen given that this is a new, nodeless nofp */
+		nfs_mount_state_in_use_end(nmp, 0);
+		error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
+		nfs_open_file_destroy(newnofp);
+		newnofp = NULL;
+		if (!error)
+			goto restart;
+	}
+	if (!error)
+		error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
+	if (error) {
+		if (newnofp)
+			nfs_open_file_destroy(newnofp);
+		newnofp = NULL;
+		goto out;
+	}
+
+	/*
+	 * We're just trying to create the file.
+	 * We'll create/open it RW, and set NFS_OPEN_FILE_CREATE.
+	 */
+	accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	denyMode = NFS_OPEN_SHARE_DENY_NONE;
+
+	/* Do the open/create */
+	error = nfs4_open_rpc(newnofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE, accessMode, denyMode);
+	if ((error == EACCES) && vap && !(vap->va_vaflags & VA_EXCLUSIVE) &&
+	    VATTR_IS_ACTIVE(vap, va_mode) && !(vap->va_mode & S_IWUSR)) {
+		/*
+		 * Hmm... it looks like we may have a situation where the request was
+		 * retransmitted because we didn't get the first response which successfully
+		 * created/opened the file and then the second time we were denied the open
+		 * because the mode the file was created with doesn't allow write access.
+		 *
+		 * We'll try to work around this by temporarily updating the mode and
+		 * retrying the open.
+		 */
+		struct vnode_attr vattr;
+
+		/* first make sure it's there */
+		int error2 = nfs_lookitup(VTONFS(dvp), cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+		if (!error2 && np) {
+			nfs_node_unlock(np);
+			*vpp = NFSTOV(np);
+			if (vnode_vtype(NFSTOV(np)) == VREG) {
+				VATTR_INIT(&vattr);
+				VATTR_SET(&vattr, va_mode, (vap->va_mode | S_IWUSR));
+				if (!nfs4_setattr_rpc(np, &vattr, ctx)) {
+					error2 = nfs4_open_rpc(newnofp, ctx, cnp, NULL, dvp, vpp, NFS_OPEN_NOCREATE, accessMode, denyMode);
+					VATTR_INIT(&vattr);
+					VATTR_SET(&vattr, va_mode, vap->va_mode);
+					nfs4_setattr_rpc(np, &vattr, ctx);
+					if (!error2)
+						error = 0;
+				}
+			}
+			if (error) {
+				vnode_put(*vpp);
+				*vpp = NULL;
+			}
+		}
+	}
+	if (!error && !*vpp) {
+		printf("nfs4_open_rpc returned without a node?\n");
+		/* Hmmm... with no node, we have no filehandle and can't close it */
+		error = EIO;
+	}
+	if (error) {
+		/* need to cleanup our temporary nofp */
+		nfs_open_file_clear_busy(newnofp);
+		nfs_open_file_destroy(newnofp);
+		newnofp = NULL;
+		goto out;
+	}
+	/* After we have a node, add our open file struct to the node */
+	np = VTONFS(*vpp);
+	nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
+	nofp = newnofp;
+	error = nfs_open_file_find_internal(np, noop, &nofp, 0, 0, 0);
+	if (error) {
+		/* This shouldn't happen, because we passed in a new nofp to use. */
+		printf("nfs_open_file_find_internal failed! %d\n", error);
+		goto out;
+	} else if (nofp != newnofp) {
+		/*
+		 * Hmm... an open file struct already exists.
+		 * Mark the existing one busy and merge our open into it.
+		 * Then destroy the one we created.
+		 * Note: there's no chance of an open confict because the
+		 * open has already been granted.
+		 */
+		busyerror = nfs_open_file_set_busy(nofp, NULL);
+		nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
+		nofp->nof_stateid = newnofp->nof_stateid;
+		if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
+			nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+		nfs_open_file_clear_busy(newnofp);
+		nfs_open_file_destroy(newnofp);
+	}
+	newnofp = NULL;
+	/* mark the node as holding a create-initiated open */
+	nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
+	nofp->nof_creator = current_thread();
+out:
+	if (nofp && !busyerror)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = newnofp = NULL;
+		busyerror = 0;
+		goto restart;
+	}
+	if (noop)
+		nfs_open_owner_rele(noop);
+	return (error);
+}
+
+/*
+ * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
+ */
+int
+nfs4_create_rpc(
+	vfs_context_t ctx,
+	nfsnode_t dnp,
+	struct componentname *cnp,
+	struct vnode_attr *vap,
+	int type,
+	char *link,
+	nfsnode_t *npp)
+{
+	struct nfsmount *nmp;
+	struct nfs_vattr nvattr;
+	int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
+	int nfsvers, namedattrs, numops;
+	u_int64_t xid, savedxid = 0;
+	nfsnode_t np = NULL;
+	vnode_t newvp = NULL;
+	struct nfsm_chain nmreq, nmrep;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
+	const char *tag;
+	nfs_specdata sd;
+	fhandle_t fh;
+	struct nfsreq rq, *req = &rq;
+	struct nfs_dulookup dul;
+	struct nfsreq_secinfo_args si;
+
+	nmp = NFSTONMP(dnp);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+	if (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+
+	sd.specdata1 = sd.specdata2 = 0;
+
+	switch (type) {
+	case NFLNK:
+		tag = "symlink";
+		break;
+	case NFBLK:
+	case NFCHR:
+		tag = "mknod";
+		if (!VATTR_IS_ACTIVE(vap, va_rdev))
+			return (EINVAL);
+		sd.specdata1 = major(vap->va_rdev);
+		sd.specdata2 = minor(vap->va_rdev);
+		break;
+	case NFSOCK:
+	case NFFIFO:
+		tag = "mknod";
+		break;
+	case NFDIR:
+		tag = "mkdir";
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+	if (!namedattrs)
+		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+
+	NFSREQ_SECINFO_SET(&si, dnp, NULL, 0, NULL, 0);
+	NVATTR_INIT(&nvattr);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR
+	numops = 6;
+	nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_CREATE);
+	nfsm_chain_add_32(error, &nmreq, type);
+	if (type == NFLNK) {
+		nfsm_chain_add_name(error, &nmreq, link, strlen(link), nmp);
+	} else if ((type == NFBLK) || (type == NFCHR)) {
+		nfsm_chain_add_32(error, &nmreq, sd.specdata1);
+		nfsm_chain_add_32(error, &nmreq, sd.specdata2);
+	}
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, dnp);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+
+	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+	if (!error) {
+		if (!namedattrs)
+			nfs_dulookup_start(&dul, dnp, ctx);
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+	}
+
+	if ((lockerror = nfs_node_lock(dnp)))
+		error = lockerror;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
+	nfsmout_if(error);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_CREATE);
+	nfsm_chain_check_change_info(error, &nmrep, dnp);
+	bmlen = NFS_ATTR_BITMAP_LEN;
+	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
+	/* At this point if we have no error, the object was created. */
+	/* if we don't get attributes, then we should lookitup. */
+	create_error = error;
+	nfsmout_if(error);
+	nfs_vattr_set_supported(bitmap, vap);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
+	nfsmout_if(error);
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+		printf("nfs: create/%s didn't return filehandle? %s\n", tag, cnp->cn_nameptr);
+		error = EBADRPC;
+		goto nfsmout;
+	}
+	/* directory attributes: if we don't get them, make sure to invalidate */
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	savedxid = xid;
+	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, &xid);
+	if (error)
+		NATTRINVALIDATE(dnp);
+
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+
+	if (!lockerror) {
+		if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) {
+			dnp->n_flag &= ~NNEGNCENTRIES;
+			cache_purge_negatives(NFSTOV(dnp));
+		}
+		dnp->n_flag |= NMODIFIED;
+		nfs_node_unlock(dnp);
+		/* nfs_getattr() will check changed and purge caches */
+		nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
+	}
+
+	if (!error && fh.fh_len) {
+		/* create the vnode with the filehandle and attributes */
+		xid = savedxid;
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
+		if (!error)
+			newvp = NFSTOV(np);
+	}
+	NVATTR_CLEANUP(&nvattr);
+
+	if (!namedattrs)
+		nfs_dulookup_finish(&dul, dnp, ctx);
+
+	/*
+	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+	 * if we can succeed in looking up the object.
+	 */
+	if ((create_error == EEXIST) || (!create_error && !newvp)) {
+		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+		if (!error) {
+			newvp = NFSTOV(np);
+			if (vnode_vtype(newvp) != nfstov_type(type, nfsvers))
+				error = EEXIST;
+		}
+	}
+	if (!busyerror)
+		nfs_node_clear_busy(dnp);
+	if (error) {
+		if (newvp) {
+			nfs_node_unlock(np);
+			vnode_put(newvp);
+		}
+	} else {
+		nfs_node_unlock(np);
+		*npp = np;
+	}
+	return (error);
+}
+
+int
+nfs4_vnop_mknod(
+	struct vnop_mknod_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t *a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_attr *a_vap;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	nfsnode_t np = NULL;
+	struct nfsmount *nmp;
+	int error;
+
+	nmp = VTONMP(ap->a_dvp);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!VATTR_IS_ACTIVE(ap->a_vap, va_type))
+		return (EINVAL);
+	switch (ap->a_vap->va_type) {
+	case VBLK:
+	case VCHR:
+	case VFIFO:
+	case VSOCK:
+		break;
+	default:
+		return (ENOTSUP);
+	}
+
+	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
+			vtonfs_type(ap->a_vap->va_type, nmp->nm_vers), NULL, &np);
+	if (!error)
+		*ap->a_vpp = NFSTOV(np);
+	return (error);
+}
+
+int
+nfs4_vnop_mkdir(
+	struct vnop_mkdir_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t *a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_attr *a_vap;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	nfsnode_t np = NULL;
+	int error;
+
+	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
+			NFDIR, NULL, &np);
+	if (!error)
+		*ap->a_vpp = NFSTOV(np);
+	return (error);
+}
+
+int
+nfs4_vnop_symlink(
+	struct vnop_symlink_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t *a_vpp;
+		struct componentname *a_cnp;
+		struct vnode_attr *a_vap;
+		char *a_target;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	nfsnode_t np = NULL;
+	int error;
+
+	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
+			NFLNK, ap->a_target, &np);
+	if (!error)
+		*ap->a_vpp = NFSTOV(np);
+	return (error);
+}
+
+int
+nfs4_vnop_link(
+	struct vnop_link_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		vnode_t a_tdvp;
+		struct componentname *a_cnp;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	vnode_t tdvp = ap->a_tdvp;
+	struct componentname *cnp = ap->a_cnp;
+	int error = 0, lockerror = ENOENT, status;
+	struct nfsmount *nmp;
+	nfsnode_t np = VTONFS(vp);
+	nfsnode_t tdnp = VTONFS(tdvp);
+	int nfsvers, numops;
+	u_int64_t xid, savedxid;
+	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq_secinfo_args si;
+
+	if (vnode_mount(vp) != vnode_mount(tdvp))
+		return (EXDEV);
+
+	nmp = VTONMP(vp);
+	if (!nmp)
+		return (ENXIO);
+	nfsvers = nmp->nm_vers;
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+	if (tdnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (EINVAL);
+
+	/*
+	 * Push all writes to the server, so that the attribute cache
+	 * doesn't get "out of sync" with the server.
+	 * XXX There should be a better way!
+	 */
+	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
+
+	if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx))))
+		return (error);
+
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	// PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR
+	numops = 7;
+	nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen);
+	nfsm_chain_add_compound_header(error, &nmreq, "link", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_LINK);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, tdnp);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, nfs_getattr_bitmap, nmp, np);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
+
+	if ((lockerror = nfs_node_lock2(tdnp, np))) {
+		error = lockerror;
+		goto nfsmout;
+	}
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_LINK);
+	nfsm_chain_check_change_info(error, &nmrep, tdnp);
+	/* directory attributes: if we don't get them, make sure to invalidate */
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	savedxid = xid;
+	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, &xid);
+	if (error)
+		NATTRINVALIDATE(tdnp);
+	/* link attributes: if we don't get them, make sure to invalidate */
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	xid = savedxid;
+	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+	if (error)
+		NATTRINVALIDATE(np);
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	if (!lockerror)
+		tdnp->n_flag |= NMODIFIED;
+	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
+	if (error == EEXIST)
+		error = 0;
+	if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
+		tdnp->n_flag &= ~NNEGNCENTRIES;
+		cache_purge_negatives(tdvp);
+	}
+	if (!lockerror)
+		nfs_node_unlock2(tdnp, np);
+	nfs_node_clear_busy2(tdnp, np);
+	return (error);
+}
+
+int
+nfs4_vnop_rmdir(
+	struct vnop_rmdir_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_dvp;
+		vnode_t a_vp;
+		struct componentname *a_cnp;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	vnode_t dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	struct nfsmount *nmp;
+	int error = 0, namedattrs;
+	nfsnode_t np = VTONFS(vp);
+	nfsnode_t dnp = VTONFS(dvp);
+	struct nfs_dulookup dul;
+
+	if (vnode_vtype(vp) != VDIR)
+		return (EINVAL);
+
+	nmp = NFSTONMP(dnp);
+	if (!nmp)
+		return (ENXIO);
+	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+
+	if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
+		return (error);
+
+	if (!namedattrs) {
+		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+		nfs_dulookup_start(&dul, dnp, ctx);
+	}
+
+	error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx));
+
+	nfs_name_cache_purge(dnp, np, cnp, ctx);
+	/* nfs_getattr() will check changed and purge caches */
+	nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
+	if (!namedattrs)
+		nfs_dulookup_finish(&dul, dnp, ctx);
+	nfs_node_clear_busy2(dnp, np);
+
+	/*
+	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+	 */
+	if (error == ENOENT)
+		error = 0;
+	if (!error) {
+		/*
+		 * remove nfsnode from hash now so we can't accidentally find it
+		 * again if another object gets created with the same filehandle
+		 * before this vnode gets reclaimed
+		 */
+		lck_mtx_lock(nfs_node_hash_mutex);
+		if (np->n_hflag & NHHASHED) {
+			LIST_REMOVE(np, n_hash);
+			np->n_hflag &= ~NHHASHED;
+			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
+		}
+		lck_mtx_unlock(nfs_node_hash_mutex);
+	}
+	return (error);
+}
+
+/*
+ * NFSv4 Named Attributes
+ *
+ * Both the extended attributes interface and the named streams interface
+ * are backed by NFSv4 named attributes.  The implementations for both use
+ * a common set of routines in an attempt to reduce code duplication, to
+ * increase efficiency, to increase caching of both names and data, and to
+ * confine the complexity.
+ *
+ * Each NFS node caches its named attribute directory's file handle.
+ * The directory nodes for the named attribute directories are handled
+ * exactly like regular directories (with a couple minor exceptions).
+ * Named attribute nodes are also treated as much like regular files as
+ * possible.
+ *
+ * Most of the heavy lifting is done by nfs4_named_attr_get().
+ */
+
+/*
+ * Get the given node's attribute directory node.
+ * If !fetch, then only return a cached node.
+ * Otherwise, we will attempt to fetch the node from the server.
+ * (Note: the node should be marked busy.)
  */
-int
-nfs4_vnop_create(
-	struct vnop_create_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
-		vnode_t *a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		vfs_context_t a_context;
-	} */ *ap)
+nfsnode_t
+nfs4_named_attr_dir_get(nfsnode_t np, int fetch, vfs_context_t ctx)
 {
-	vfs_context_t ctx = ap->a_context;
-	struct componentname *cnp = ap->a_cnp;
-	struct vnode_attr *vap = ap->a_vap;
-	vnode_t dvp = ap->a_dvp;
-	vnode_t *vpp = ap->a_vpp;
+	nfsnode_t adnp = NULL;
 	struct nfsmount *nmp;
-	nfsnode_t np;
-	int error = 0;
-	struct nfs_open_owner *noop = NULL;
-	struct nfs_open_file *nofp = NULL;
+	int error = 0, status, numops;
+	struct nfsm_chain nmreq, nmrep;
+	u_int64_t xid;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
+	fhandle_t fh;
+	struct nfs_vattr nvattr;
+	struct componentname cn;
+	struct nfsreq rq, *req = &rq;
+	struct nfsreq_secinfo_args si;
 
-	nmp = VTONMP(dvp);
+	nmp = NFSTONMP(np);
 	if (!nmp)
-		return (ENXIO);
-
-	nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx);
+		return (NULL);
+	if (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL)
+		return (NULL);
 
-	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
-	if (!noop)
-		return (ENOMEM);
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
+	NVATTR_INIT(&nvattr);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
 
-restart:
-	error = nfs_mount_state_in_use_start(nmp);
-	if (error) {
-		nfs_open_owner_rele(noop);
-		return (error);
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
+	cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
+	cn.cn_nameiop = LOOKUP;
+
+	if (np->n_attrdirfh) {
+		// XXX can't set parent correctly (to np) yet
+		error = nfs_nget(nmp->nm_mountp, NULL, &cn, np->n_attrdirfh+1, *np->n_attrdirfh,
+				NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &adnp);
+		if (adnp)
+			goto nfsmout;
+	}
+	if (!fetch) {
+		error = ENOENT;
+		goto nfsmout;
 	}
 
-	error = nfs_open_file_find(NULL, noop, &nofp, 0, 0, 1);
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
-		printf("nfs_vnop_create: LOST\n");
-		error = EIO;
-	}
-	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-		nfs_mount_state_in_use_end(nmp, 0);
-		nfs4_reopen(nofp, vfs_context_thread(ctx));
-		nofp = NULL;
-		goto restart;
-	}
+	// PUTFH, OPENATTR, GETATTR
+	numops = 3;
+	nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "openattr", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
+	nfsm_chain_add_32(error, &nmreq, 0);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
+		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request_async(np, NULL, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
 	if (!error)
-		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-	if (error) {
-		nofp = NULL;
-		goto out;
-	}
-
-	nofp->nof_opencnt++;
-	nofp->nof_access = NFS_OPEN_SHARE_ACCESS_BOTH;
-	nofp->nof_deny = NFS_OPEN_SHARE_DENY_NONE;
-	nofp->nof_rw++;
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
 
-	error = nfs4_open_rpc(nofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE,
-			NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE);
-	if (!error && !*vpp) {
-		printf("nfs4_open_rpc returned without a node?\n");
-		/* Hmmm... with no node, we have no filehandle and can't close it */
-		error = EIO;
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
+	nfsmout_if(error);
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
+		error = ENOENT;
+		goto nfsmout;
 	}
-	if (error) {
-		nofp->nof_rw--;
-		nofp->nof_access = 0;
-		nofp->nof_deny = 0;
-		nofp->nof_opencnt--;
-	}
-	if (*vpp) {
-		nofp->nof_np = np = VTONFS(*vpp);
-		/* insert nofp onto np's open list */
-		TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link);
-		if (!error) {
-			nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
-			nofp->nof_creator = current_thread();
-		}
+	if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
+		/* (re)allocate attrdir fh buffer */
+		if (np->n_attrdirfh)
+			FREE(np->n_attrdirfh, M_TEMP);
+		MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
 	}
-out:
-	if (nofp)
-		nfs_open_file_clear_busy(nofp);
-	if (nfs_mount_state_in_use_end(nmp, error)) {
-		nofp = NULL;
-		goto restart;
+	if (!np->n_attrdirfh) {
+		error = ENOMEM;
+		goto nfsmout;
 	}
-	if (noop)
-		nfs_open_owner_rele(noop);
-	return (error);
-}
+	/* cache the attrdir fh in the node */
+	*np->n_attrdirfh = fh.fh_len;
+	bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
+	/* create node for attrdir */
+	// XXX can't set parent correctly (to np) yet
+	error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
+nfsmout:
+	NVATTR_CLEANUP(&nvattr);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
 
-void
-nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx)
-{
-	/*
-	 * Don't bother setting UID if it's the same as the credential performing the create.
-	 * Don't bother setting GID if it's the same as the directory or credential.
-	 */
-	if (VATTR_IS_ACTIVE(vap, va_uid)) {
-		if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid)
-			VATTR_CLEAR_ACTIVE(vap, va_uid);
-	}
-	if (VATTR_IS_ACTIVE(vap, va_gid)) {
-		if ((vap->va_gid == dnp->n_vattr.nva_gid) ||
-		    (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid))
-			VATTR_CLEAR_ACTIVE(vap, va_gid);
+	if (adnp) {
+		/* sanity check that this node is an attribute directory */
+		if (adnp->n_vattr.nva_type != VDIR)
+			error = EINVAL;
+		if (!(adnp->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR))
+			error = EINVAL;
+		nfs_node_unlock(adnp);
+		if (error)
+			vnode_put(NFSTOV(adnp));
 	}
+	return (error ? NULL : adnp);
 }
 
 /*
- * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files.
+ * Get the given node's named attribute node for the name given.
+ *
+ * In an effort to increase the performance of named attribute access, we try
+ * to reduce server requests by doing the following:
+ *
+ * - cache the node's named attribute directory file handle in the node
+ * - maintain a directory vnode for the attribute directory
+ * - use name cache entries (positive and negative) to speed up lookups
+ * - optionally open the named attribute (with the given accessMode) in the same RPC
+ * - combine attribute directory retrieval with the lookup/open RPC
+ * - optionally prefetch the named attribute's first block of data in the same RPC
+ *
+ * Also, in an attempt to reduce the number of copies/variations of this code,
+ * parts of the RPC building/processing code are conditionalized on what is
+ * needed for any particular request (openattr, lookup vs. open, read).
+ *
+ * Note that because we may not have the attribute directory node when we start
+ * the lookup/open, we lock both the node and the attribute directory node.
  */
+
+#define NFS_GET_NAMED_ATTR_CREATE		0x1
+#define NFS_GET_NAMED_ATTR_CREATE_GUARDED	0x2
+#define NFS_GET_NAMED_ATTR_TRUNCATE		0x4
+#define NFS_GET_NAMED_ATTR_PREFETCH		0x8
+
 int
-nfs4_create_rpc(
-	vfs_context_t ctx,
-	nfsnode_t dnp,
+nfs4_named_attr_get(
+	nfsnode_t np,
 	struct componentname *cnp,
-	struct vnode_attr *vap,
-	int type,
-	char *link,
-	nfsnode_t *npp)
+	uint32_t accessMode,
+	int flags,
+	vfs_context_t ctx,
+	nfsnode_t *anpp,
+	struct nfs_open_file **nofpp)
 {
 	struct nfsmount *nmp;
-	struct nfs_vattr nvattr, dnvattr;
-	int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status;
-	int nfsvers, numops;
+	int error = 0, open_error = EIO;
+	int inuse = 0, adlockerror = ENOENT, busyerror = ENOENT, adbusyerror = ENOENT, nofpbusyerror = ENOENT;
+	int create, guarded, prefetch, truncate, noopbusy = 0;
+	int open, status, numops, hadattrdir, negnamecache;
+	struct nfs_vattr nvattr;
+	struct vnode_attr vattr;
+	nfsnode_t adnp = NULL, anp = NULL;
+	vnode_t avp = NULL;
 	u_int64_t xid, savedxid = 0;
-	nfsnode_t np = NULL;
-	vnode_t newvp = NULL;
 	struct nfsm_chain nmreq, nmrep;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen;
-	const char *tag;
-	nfs_specdata sd;
+	uint32_t denyMode, rflags, delegation, recall, eof, rlen, retlen;
+	nfs_stateid stateid, dstateid;
 	fhandle_t fh;
-	struct nfsreq *req = NULL;
-	struct nfs_dulookup dul;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *newnofp = NULL, *nofp = NULL;
+	struct vnop_access_args naa;
+	thread_t thd;
+	kauth_cred_t cred;
+	struct timeval now;
+	char sbuf[64], *s;
+	uint32_t ace_type, ace_flags, ace_mask, len, slen;
+	struct kauth_ace ace;
+	struct nfsreq rq, *req = &rq;
+	struct nfsreq_secinfo_args si;
+
+	*anpp = NULL;
+	fh.fh_len = 0;
+	rflags = delegation = recall = eof = rlen = retlen = 0;
+	ace.ace_flags = 0;
+	s = sbuf;
+	slen = sizeof(sbuf);
 
-	nmp = NFSTONMP(dnp);
+	nmp = NFSTONMP(np);
 	if (!nmp)
 		return (ENXIO);
-	nfsvers = nmp->nm_vers;
-
-	sd.specdata1 = sd.specdata2 = 0;
+	NVATTR_INIT(&nvattr);
+	negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
+	thd = vfs_context_thread(ctx);
+	cred = vfs_context_ucred(ctx);
+	create = (flags & NFS_GET_NAMED_ATTR_CREATE) ? NFS_OPEN_CREATE : NFS_OPEN_NOCREATE;
+	guarded = (flags & NFS_GET_NAMED_ATTR_CREATE_GUARDED) ? NFS_CREATE_GUARDED : NFS_CREATE_UNCHECKED;
+	truncate = (flags & NFS_GET_NAMED_ATTR_TRUNCATE);
+	prefetch = (flags & NFS_GET_NAMED_ATTR_PREFETCH);
+
+	if (!create) {
+		error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
+		if (error)
+			return (error);
+		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
+		    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
+			return (ENOATTR);
+	} else if (accessMode == NFS_OPEN_SHARE_ACCESS_NONE) {
+		/* shouldn't happen... but just be safe */
+		printf("nfs4_named_attr_get: create with no access %s\n", cnp->cn_nameptr);
+		accessMode = NFS_OPEN_SHARE_ACCESS_READ;
+	}
+	open = (accessMode != NFS_OPEN_SHARE_ACCESS_NONE);
+	if (open) {
+		/*
+		 * We're trying to open the file.
+		 * We'll create/open it with the given access mode,
+		 * and set NFS_OPEN_FILE_CREATE.
+		 */
+		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+		if (prefetch && guarded)
+			prefetch = 0;  /* no sense prefetching data that can't be there */
 
-	switch (type) {
-	case NFLNK:
-		tag = "symlink";
-		break;
-	case NFBLK:
-	case NFCHR:
-		tag = "mknod";
-		if (!VATTR_IS_ACTIVE(vap, va_rdev))
-			return (EINVAL);
-		sd.specdata1 = major(vap->va_rdev);
-		sd.specdata2 = minor(vap->va_rdev);
-		break;
-	case NFSOCK:
-	case NFFIFO:
-		tag = "mknod";
-		break;
-	case NFDIR:
-		tag = "mkdir";
-		break;
-	default:
-		return (EINVAL);
+		noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+		if (!noop)
+			return (ENOMEM);
 	}
 
-	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
-
-	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+	if ((error = busyerror = nfs_node_set_busy(np, vfs_context_thread(ctx))))
+		return (error);
 
+	adnp = nfs4_named_attr_dir_get(np, 0, ctx);
+	hadattrdir = (adnp != NULL);
+	if (prefetch) {
+		microuptime(&now);
+		/* use the special state ID because we don't have a real one to send */
+		stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0;
+		rlen = MIN(nmp->nm_rsize, nmp->nm_biosize);
+	}
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	// PUTFH, SAVEFH, CREATE, GETATTR(FH), RESTOREFH, GETATTR
-	numops = 6;
-	nfsm_chain_build_alloc_init(error, &nmreq, 66 * NFSX_UNSIGNED);
-	nfsm_chain_add_compound_header(error, &nmreq, tag, numops);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_CREATE);
-	nfsm_chain_add_32(error, &nmreq, type);
-	if (type == NFLNK) {
-		nfsm_chain_add_string(error, &nmreq, link, strlen(link));
-	} else if ((type == NFBLK) || (type == NFCHR)) {
-		nfsm_chain_add_32(error, &nmreq, sd.specdata1);
-		nfsm_chain_add_32(error, &nmreq, sd.specdata2);
+	if (hadattrdir) {
+		if ((error = adbusyerror = nfs_node_set_busy(adnp, vfs_context_thread(ctx))))
+			goto nfsmout;
+		/* nfs_getattr() will check changed and purge caches */
+		error = nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
+		nfsmout_if(error);
+		error = cache_lookup(NFSTOV(adnp), &avp, cnp);
+		switch (error) {
+		case ENOENT:
+			/* negative cache entry */
+			goto nfsmout;
+		case 0:
+			/* cache miss */
+			/* try dir buf cache lookup */
+			error = nfs_dir_buf_cache_lookup(adnp, &anp, cnp, ctx, 0);
+			if (!error && anp) {
+				/* dir buf cache hit */
+				*anpp = anp;
+				error = -1;
+			}
+			if (error != -1) /* cache miss */
+				break;
+			/* FALLTHROUGH */
+		case -1:
+			/* cache hit, not really an error */
+			OSAddAtomic(1, &nfsstats.lookupcache_hits);
+			if (!anp && avp)
+				*anpp = anp = VTONFS(avp);
+
+			nfs_node_clear_busy(adnp);
+			adbusyerror = ENOENT;
+
+			/* check for directory access */
+			naa.a_desc = &vnop_access_desc;
+			naa.a_vp = NFSTOV(adnp);
+			naa.a_action = KAUTH_VNODE_SEARCH;
+			naa.a_context = ctx;
+
+			/* compute actual success/failure based on accessibility */
+			error = nfs_vnop_access(&naa);
+			/* FALLTHROUGH */
+		default:
+			/* we either found it, or hit an error */
+			if (!error && guarded) {
+				/* found cached entry but told not to use it */
+				error = EEXIST;
+				vnode_put(NFSTOV(anp));
+				*anpp = anp = NULL;
+			}
+			/* we're done if error or we don't need to open */
+			if (error || !open)
+				goto nfsmout;
+			/* no error and we need to open... */
+		}
+	}
+
+	if (open) {
+restart:
+		error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+		if (error) {
+			nfs_open_owner_rele(noop);
+			noop = NULL;
+			goto nfsmout;
+		}
+		inuse = 1;
+
+		/* grab an open file - possibly provisional/nodeless if cache_lookup() failed */
+		error = nfs_open_file_find(anp, noop, &newnofp, 0, 0, 1);
+		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+			printf("nfs4_named_attr_get: LOST %d %s\n", kauth_cred_getuid(noop->noo_cred), cnp->cn_nameptr);
+			error = EIO;
+		}
+		if (!error && (newnofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+			nfs_mount_state_in_use_end(nmp, 0);
+			error = nfs4_reopen(newnofp, vfs_context_thread(ctx));
+			nfs_open_file_destroy(newnofp);
+			newnofp = NULL;
+			if (!error)
+				goto restart;
+		}
+		if (!error)
+			error = nfs_open_file_set_busy(newnofp, vfs_context_thread(ctx));
+		if (error) {
+			if (newnofp)
+				nfs_open_file_destroy(newnofp);
+			newnofp = NULL;
+			goto nfsmout;
+		}
+		if (anp) {
+			/*
+			 * We already have the node.  So we just need to open
+			 * it - which we may be able to do with a delegation.
+			 */
+			open_error = error = nfs4_open(anp, newnofp, accessMode, denyMode, ctx);
+			if (!error) {
+				/* open succeeded, so our open file is no longer temporary */
+				nofp = newnofp;
+				nofpbusyerror = 0;
+				newnofp = NULL;
+				if (nofpp)
+					*nofpp = nofp;
+			}
+			goto nfsmout;
+		}
+	}
+
+	/*
+	 * We either don't have the attrdir or we didn't find the attribute
+	 * in the name cache, so we need to talk to the server.
+	 *
+	 * If we don't have the attrdir, we'll need to ask the server for that too.
+	 * If the caller is requesting that the attribute be created, we need to
+	 * make sure the attrdir is created.
+	 * The caller may also request that the first block of an existing attribute
+	 * be retrieved at the same time.
+	 */
+
+	if (open) {
+		/* need to mark the open owner busy during the RPC */
+		if ((error = nfs_open_owner_set_busy(noop, thd)))
+			goto nfsmout;
+		noopbusy = 1;
+	}
+
+	/*
+	 * We'd like to get updated post-open/lookup attributes for the
+	 * directory and we may also want to prefetch some data via READ.
+	 * We'd like the READ results to be last so that we can leave the
+	 * data in the mbufs until the end.
+	 *
+	 * At a minimum we're sending: PUTFH, LOOKUP/OPEN, GETATTR, PUTFH, GETATTR
+	 */
+	numops = 5;
+	if (!hadattrdir)
+		numops += 3;	// also sending: OPENATTR, GETATTR, OPENATTR
+	if (prefetch)
+		numops += 4;	// also sending: SAVEFH, RESTOREFH, NVERIFY, READ
+	nfsm_chain_build_alloc_init(error, &nmreq, 64 * NFSX_UNSIGNED + cnp->cn_namelen);
+	nfsm_chain_add_compound_header(error, &nmreq, "getnamedattr", numops);
+	if (hadattrdir) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
+	} else {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
+		nfsm_chain_add_32(error, &nmreq, create ? 1 : 0);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+		NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+		nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
+			NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	}
+	if (open) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN);
+		nfsm_chain_add_32(error, &nmreq, noop->noo_seqid);
+		nfsm_chain_add_32(error, &nmreq, accessMode);
+		nfsm_chain_add_32(error, &nmreq, denyMode);
+		nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid);
+		nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED);
+		nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred));
+		nfsm_chain_add_32(error, &nmreq, create);
+		if (create) {
+			nfsm_chain_add_32(error, &nmreq, guarded);
+			VATTR_INIT(&vattr);
+			if (truncate)
+				VATTR_SET(&vattr, va_data_size, 0);
+			nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
+		}
+		nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL);
+		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+	} else {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
+		nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	}
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
-	nfsm_chain_add_fattr4(error, &nmreq, vap, nmp);
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
 	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
 		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
+	if (prefetch) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
+	}
+	if (hadattrdir) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, adnp->n_fhp, adnp->n_fhsize);
+	} else {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+		nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
+		nfsm_chain_add_32(error, &nmreq, 0);
+	}
 	numops--;
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
 		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	if (prefetch) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_NVERIFY);
+		VATTR_INIT(&vattr);
+		VATTR_SET(&vattr, va_data_size, 0);
+		nfsm_chain_add_fattr4(error, &nmreq, &vattr, nmp);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_READ);
+		nfsm_chain_add_stateid(error, &nmreq, &stateid);
+		nfsm_chain_add_64(error, &nmreq, 0);
+		nfsm_chain_add_32(error, &nmreq, rlen);
+	}
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-
-	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
-	if (!error) {
-		nfs_dulookup_start(&dul, dnp, ctx);
+	error = nfs_request_async(hadattrdir ? adnp : np, NULL, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, open ? R_NOINTR: 0, NULL, &req);
+	if (!error)
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
-	}
 
-	if ((lockerror = nfs_node_lock(dnp)))
-		error = lockerror;
+	if (hadattrdir && ((adlockerror = nfs_node_lock(adnp))))
+		error = adlockerror;
+	savedxid = xid;
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
-	nfsmout_if(error);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_CREATE);
-	nfsm_chain_check_change_info(error, &nmrep, dnp);
-	bmlen = NFS_ATTR_BITMAP_LEN;
-	nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
-	/* At this point if we have no error, the object was created. */
-	/* if we don't get attributes, then we should lookitup. */
-	create_error = error;
-	nfsmout_if(error);
-	nfs_vattr_set_supported(bitmap, vap);
+	if (!hadattrdir) {
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+		nfsmout_if(error);
+		error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
+		nfsmout_if(error);
+		if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) && fh.fh_len) {
+			if (!np->n_attrdirfh || (*np->n_attrdirfh != fh.fh_len)) {
+				/* (re)allocate attrdir fh buffer */
+				if (np->n_attrdirfh)
+					FREE(np->n_attrdirfh, M_TEMP);
+				MALLOC(np->n_attrdirfh, u_char*, fh.fh_len+1, M_TEMP, M_WAITOK);
+			}
+			if (np->n_attrdirfh) {
+				/* remember the attrdir fh in the node */
+				*np->n_attrdirfh = fh.fh_len;
+				bcopy(fh.fh_data, np->n_attrdirfh+1, fh.fh_len);
+				/* create busied node for attrdir */
+				struct componentname cn;
+				bzero(&cn, sizeof(cn));
+				cn.cn_nameptr = __CAST_AWAY_QUALIFIER(_PATH_FORKSPECIFIER, const, char *); /* "/..namedfork/" */
+				cn.cn_namelen = strlen(_PATH_FORKSPECIFIER);
+				cn.cn_nameiop = LOOKUP;
+				// XXX can't set parent correctly (to np) yet
+				error = nfs_nget(NFSTOMP(np), NULL, &cn, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, 0, &adnp);
+				if (!error) {
+					adlockerror = 0;
+					/* set the node busy */
+					SET(adnp->n_flag, NBUSY);
+					adbusyerror = 0;
+				}
+				/* if no adnp, oh well... */
+				error = 0;
+			}
+		}
+		NVATTR_CLEANUP(&nvattr);
+		fh.fh_len = 0;
+	}
+	if (open) {
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN);
+		nfs_owner_seqid_increment(noop, NULL, error);
+		nfsm_chain_get_stateid(error, &nmrep, &newnofp->nof_stateid);
+		nfsm_chain_check_change_info(error, &nmrep, adnp);
+		nfsm_chain_get_32(error, &nmrep, rflags);
+		bmlen = NFS_ATTR_BITMAP_LEN;
+		nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen);
+		nfsm_chain_get_32(error, &nmrep, delegation);
+		if (!error)
+			switch (delegation) {
+			case NFS_OPEN_DELEGATE_NONE:
+				break;
+			case NFS_OPEN_DELEGATE_READ:
+			case NFS_OPEN_DELEGATE_WRITE:
+				nfsm_chain_get_stateid(error, &nmrep, &dstateid);
+				nfsm_chain_get_32(error, &nmrep, recall);
+				if (delegation == NFS_OPEN_DELEGATE_WRITE) // space (skip) XXX
+					nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED);
+				/* if we have any trouble accepting the ACE, just invalidate it */
+				ace_type = ace_flags = ace_mask = len = 0;
+				nfsm_chain_get_32(error, &nmrep, ace_type);
+				nfsm_chain_get_32(error, &nmrep, ace_flags);
+				nfsm_chain_get_32(error, &nmrep, ace_mask);
+				nfsm_chain_get_32(error, &nmrep, len);
+				ace.ace_flags = nfs4_ace_nfstype_to_vfstype(ace_type, &error);
+				ace.ace_flags |= nfs4_ace_nfsflags_to_vfsflags(ace_flags);
+				ace.ace_rights = nfs4_ace_nfsmask_to_vfsrights(ace_mask);
+				if (!error && (len >= slen)) {
+					MALLOC(s, char*, len+1, M_TEMP, M_WAITOK);
+					if (s)
+						slen = len+1;
+					else
+						ace.ace_flags = 0;
+				}
+				if (s)
+					nfsm_chain_get_opaque(error, &nmrep, len, s);
+				else
+					nfsm_chain_adv(error, &nmrep, nfsm_rndup(len));
+				if (!error && s) {
+					s[len] = '\0';
+					if (nfs4_id2guid(s, &ace.ace_applicable, (ace_flags & NFS_ACE_IDENTIFIER_GROUP)))
+						ace.ace_flags = 0;
+				}
+				if (error || !s)
+					ace.ace_flags = 0;
+				if (s && (s != sbuf))
+					FREE(s, M_TEMP);
+				break;
+			default:
+				error = EBADRPC;
+				break;
+			}
+		/* At this point if we have no error, the object was created/opened. */
+		open_error = error;
+	} else {
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_LOOKUP);
+	}
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL);
+	error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL, NULL);
 	nfsmout_if(error);
-	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		printf("nfs: create/%s didn't return filehandle?\n", tag);
-		error = EBADRPC;
+	if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) || !fh.fh_len) {
+		error = EIO;
 		goto nfsmout;
 	}
-	/* directory attributes: if we don't get them, make sure to invalidate */
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
+	if (prefetch)
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	if (!hadattrdir)
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	savedxid = xid;
-	nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid);
-	if (error)
-		NATTRINVALIDATE(dnp);
-
-nfsmout:
-	nfsm_chain_cleanup(&nmreq);
-	nfsm_chain_cleanup(&nmrep);
+	nfsmout_if(error);
+	xid = savedxid;
+	nfsm_chain_loadattr(error, &nmrep, adnp, nmp->nm_vers, &xid);
+	nfsmout_if(error);
 
-	if (!lockerror) {
-		if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) {
-			dnp->n_flag &= ~NNEGNCENTRIES;
-			cache_purge_negatives(NFSTOV(dnp));
+	if (open) {
+		if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX)
+			newnofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+		if (rflags & NFS_OPEN_RESULT_CONFIRM) {
+			if (adnp) {
+				nfs_node_unlock(adnp);
+				adlockerror = ENOENT;
+			}
+			NVATTR_CLEANUP(&nvattr);
+			error = nfs4_open_confirm_rpc(nmp, adnp ? adnp : np, fh.fh_data, fh.fh_len, noop, &newnofp->nof_stateid, thd, cred, &nvattr, &xid);
+			nfsmout_if(error);
+			savedxid = xid;
+			if ((adlockerror = nfs_node_lock(adnp)))
+				error = adlockerror;
 		}
-		dnp->n_flag |= NMODIFIED;
-		nfs_node_unlock(dnp);
-		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
 	}
 
-	if (!error && fh.fh_len) {
+nfsmout:
+	if (open && adnp && !adlockerror) {
+		if (!open_error && (adnp->n_flag & NNEGNCENTRIES)) {
+			adnp->n_flag &= ~NNEGNCENTRIES;
+			cache_purge_negatives(NFSTOV(adnp));
+		}
+		adnp->n_flag |= NMODIFIED;
+		nfs_node_unlock(adnp);
+		adlockerror = ENOENT;
+		nfs_getattr(adnp, NULL, ctx, NGA_CACHED);
+	}
+	if (adnp && !adlockerror && (error == ENOENT) &&
+	    (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && negnamecache) {
+		/* add a negative entry in the name cache */
+		cache_enter(NFSTOV(adnp), NULL, cnp);
+		adnp->n_flag |= NNEGNCENTRIES;
+	}
+	if (adnp && !adlockerror) {
+		nfs_node_unlock(adnp);
+		adlockerror = ENOENT;
+	}
+	if (!error && !anp && fh.fh_len) {
 		/* create the vnode with the filehandle and attributes */
 		xid = savedxid;
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
-		if (!error)
-			newvp = NFSTOV(np);
+		error = nfs_nget(NFSTOMP(np), adnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &anp);
+		if (!error) {
+			*anpp = anp;
+			nfs_node_unlock(anp);
+		}
+		if (!error && open) {
+			nfs_open_file_add_open(newnofp, accessMode, denyMode, 0);
+			/* After we have a node, add our open file struct to the node */
+			nofp = newnofp;
+			error = nfs_open_file_find_internal(anp, noop, &nofp, 0, 0, 0);
+			if (error) {
+				/* This shouldn't happen, because we passed in a new nofp to use. */
+				printf("nfs_open_file_find_internal failed! %d\n", error);
+				nofp = NULL;
+			} else if (nofp != newnofp) {
+				/*
+				 * Hmm... an open file struct already exists.
+				 * Mark the existing one busy and merge our open into it.
+				 * Then destroy the one we created.
+				 * Note: there's no chance of an open confict because the
+				 * open has already been granted.
+				 */
+				nofpbusyerror = nfs_open_file_set_busy(nofp, NULL);
+				nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
+				nofp->nof_stateid = newnofp->nof_stateid;
+				if (newnofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)
+					nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK;
+				nfs_open_file_clear_busy(newnofp);
+				nfs_open_file_destroy(newnofp);
+				newnofp = NULL;
+			}
+			if (!error) {
+				newnofp = NULL;
+				nofpbusyerror = 0;
+				/* mark the node as holding a create-initiated open */
+				nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
+				nofp->nof_creator = current_thread();
+				if (nofpp)
+					*nofpp = nofp;
+			}
+		}
 	}
+	NVATTR_CLEANUP(&nvattr);
+	if (open && ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE))) {
+		if (!error && anp && !recall) {
+			/* stuff the delegation state in the node */
+			lck_mtx_lock(&anp->n_openlock);
+			anp->n_openflags &= ~N_DELEG_MASK;
+			anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
+			anp->n_dstateid = dstateid;
+			anp->n_dace = ace;
+			if (anp->n_dlink.tqe_next == NFSNOLIST) {
+				lck_mtx_lock(&nmp->nm_lock);
+				if (anp->n_dlink.tqe_next == NFSNOLIST)
+					TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
+				lck_mtx_unlock(&nmp->nm_lock);
+			}
+			lck_mtx_unlock(&anp->n_openlock);
+		} else {
+			/* give the delegation back */
+			if (anp) {
+				if (NFS_CMPFH(anp, fh.fh_data, fh.fh_len)) {
+					/* update delegation state and return it */
+					lck_mtx_lock(&anp->n_openlock);
+					anp->n_openflags &= ~N_DELEG_MASK;
+					anp->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE);
+					anp->n_dstateid = dstateid;
+					anp->n_dace = ace;
+					if (anp->n_dlink.tqe_next == NFSNOLIST) {
+						lck_mtx_lock(&nmp->nm_lock);
+						if (anp->n_dlink.tqe_next == NFSNOLIST)
+							TAILQ_INSERT_TAIL(&nmp->nm_delegations, anp, n_dlink);
+						lck_mtx_unlock(&nmp->nm_lock);
+					}
+					lck_mtx_unlock(&anp->n_openlock);
+					/* don't need to send a separate delegreturn for fh */
+					fh.fh_len = 0;
+				}
+				/* return anp's current delegation */
+				nfs4_delegation_return(anp, 0, thd, cred);
+			}
+			if (fh.fh_len) /* return fh's delegation if it wasn't for anp */
+				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, 0, thd, cred);
+		}
+	}
+	if (open) {
+		if (newnofp) {
+			/* need to cleanup our temporary nofp */
+			nfs_open_file_clear_busy(newnofp);
+			nfs_open_file_destroy(newnofp);
+			newnofp = NULL;
+		} else if (nofp && !nofpbusyerror) {
+			nfs_open_file_clear_busy(nofp);
+			nofpbusyerror = ENOENT;
+		}
+		if (inuse && nfs_mount_state_in_use_end(nmp, error)) {
+			inuse = 0;
+			nofp = newnofp = NULL;
+			rflags = delegation = recall = eof = rlen = retlen = 0;
+			ace.ace_flags = 0;
+			s = sbuf;
+			slen = sizeof(sbuf);
+			nfsm_chain_cleanup(&nmreq);
+			nfsm_chain_cleanup(&nmrep);
+			if (anp) {
+				vnode_put(NFSTOV(anp));
+				*anpp = anp = NULL;
+			}
+			hadattrdir = (adnp != NULL);
+			if (noopbusy) {
+				nfs_open_owner_clear_busy(noop);
+				noopbusy = 0;
+			}
+			goto restart;
+		}
+		if (noop) {
+			if (noopbusy) {
+				nfs_open_owner_clear_busy(noop);
+				noopbusy = 0;
+			}
+			nfs_open_owner_rele(noop);
+		}
+	}
+	if (!error && prefetch && nmrep.nmc_mhead) {
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_NVERIFY);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_READ);
+		nfsm_chain_get_32(error, &nmrep, eof);
+		nfsm_chain_get_32(error, &nmrep, retlen);
+		if (!error && anp) {
+			/*
+			 * There can be one problem with doing the prefetch.
+			 * Because we don't have the node before we start the RPC, we
+			 * can't have the buffer busy while the READ is performed.
+			 * So there is a chance that other I/O occured on the same
+			 * range of data while we were performing this RPC.  If that
+			 * happens, then it's possible the data we have in the READ
+			 * response is no longer up to date.
+			 * Once we have the node and the buffer, we need to make sure
+			 * that there's no chance we could be putting stale data in
+			 * the buffer.
+			 * So, we check if the range read is dirty or if any I/O may
+			 * have occured on it while we were performing our RPC.
+			 */
+			struct nfsbuf *bp = NULL;
+			int lastpg;
+			uint32_t pagemask;
+
+			retlen = MIN(retlen, rlen);
+
+			/* check if node needs size update or invalidation */
+			if (ISSET(anp->n_flag, NUPDATESIZE))
+				nfs_data_update_size(anp, 0);
+			if (!(error = nfs_node_lock(anp))) {
+				if (anp->n_flag & NNEEDINVALIDATE) {
+					anp->n_flag &= ~NNEEDINVALIDATE;
+					nfs_node_unlock(anp);
+					error = nfs_vinvalbuf(NFSTOV(anp), V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
+					if (!error) /* lets play it safe and just drop the data */
+						error = EIO;
+				} else {
+					nfs_node_unlock(anp);
+				}
+			}
 
-	nfs_dulookup_finish(&dul, dnp, ctx);
-
-	/*
-	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
-	 * if we can succeed in looking up the object.
-	 */
-	if ((create_error == EEXIST) || (!create_error && !newvp)) {
-		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
-		if (!error) {
-			newvp = NFSTOV(np);
-			if (vnode_vtype(newvp) != VLNK)
-				error = EEXIST;
+			/* calculate page mask for the range of data read */
+			lastpg = (trunc_page_32(retlen) - 1) / PAGE_SIZE;
+			pagemask = ((1 << (lastpg + 1)) - 1);
+
+			if (!error)
+				error = nfs_buf_get(anp, 0, nmp->nm_biosize, thd, NBLK_READ|NBLK_NOWAIT, &bp);
+			/* don't save the data if dirty or potential I/O conflict */
+			if (!error && bp && !bp->nb_dirtyoff && !(bp->nb_dirty & pagemask) &&
+			    timevalcmp(&anp->n_lastio, &now, <)) {
+				OSAddAtomic(1, &nfsstats.read_bios);
+				CLR(bp->nb_flags, (NB_DONE|NB_ASYNC));
+				SET(bp->nb_flags, NB_READ);
+				NFS_BUF_MAP(bp);
+				nfsm_chain_get_opaque(error, &nmrep, retlen, bp->nb_data);
+				if (error) {
+					bp->nb_error = error;
+					SET(bp->nb_flags, NB_ERROR);
+				} else {
+					bp->nb_offio = 0;
+					bp->nb_endio = rlen;
+					if ((retlen > 0) && (bp->nb_endio < (int)retlen))
+						bp->nb_endio = retlen;
+					if (eof || (retlen == 0)) {
+						/* zero out the remaining data (up to EOF) */
+						off_t rpcrem, eofrem, rem;
+						rpcrem = (rlen - retlen);
+						eofrem = anp->n_size - (NBOFF(bp) + retlen);
+						rem = (rpcrem < eofrem) ? rpcrem : eofrem;
+						if (rem > 0)
+							bzero(bp->nb_data + retlen, rem);
+					} else if ((retlen < rlen) && !ISSET(bp->nb_flags, NB_ERROR)) {
+						/* ugh... short read ... just invalidate for now... */
+						SET(bp->nb_flags, NB_INVAL);
+					}
+				}
+				nfs_buf_read_finish(bp);
+				microuptime(&anp->n_lastio);
+			}
+			if (bp)
+				nfs_buf_release(bp, 1);
 		}
+		error = 0; /* ignore any transient error in processing the prefetch */
 	}
-	if (!busyerror)
-		nfs_node_clear_busy(dnp);
-	if (error) {
-		if (newvp) {
-			nfs_node_unlock(np);
-			vnode_put(newvp);
+	if (adnp && !adbusyerror) {
+		nfs_node_clear_busy(adnp);
+		adbusyerror = ENOENT;
+	}
+	if (!busyerror) {
+		nfs_node_clear_busy(np);
+		busyerror = ENOENT;
+	}
+	if (adnp)
+		vnode_put(NFSTOV(adnp));
+	if (error && *anpp) {
+		vnode_put(NFSTOV(*anpp));
+		*anpp = NULL;
+	}
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+/*
+ * Remove a named attribute.
+ */
+int
+nfs4_named_attr_remove(nfsnode_t np, nfsnode_t anp, const char *name, vfs_context_t ctx)
+{
+	nfsnode_t adnp = NULL;
+	struct nfsmount *nmp;
+	struct componentname cn;
+	struct vnop_remove_args vra;
+	int error, putanp = 0;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
+	cn.cn_namelen = strlen(name);
+	cn.cn_nameiop = DELETE;
+	cn.cn_flags = 0;
+
+	if (!anp) {
+		error = nfs4_named_attr_get(np, &cn, NFS_OPEN_SHARE_ACCESS_NONE,
+				0, ctx, &anp, NULL);
+		if ((!error && !anp) || (error == ENOATTR))
+			error = ENOENT;
+		if (error) {
+			if (anp) {
+				vnode_put(NFSTOV(anp));
+				anp = NULL;
+			}
+			goto out;
 		}
-	} else {
-		nfs_node_unlock(np);
-		*npp = np;
+		putanp = 1;
+	}
+
+	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
+		goto out;
+	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
+	nfs_node_clear_busy(np);
+	if (!adnp) {
+		error = ENOENT;
+		goto out;
 	}
+
+	vra.a_desc = &vnop_remove_desc;
+	vra.a_dvp = NFSTOV(adnp);
+	vra.a_vp = NFSTOV(anp);
+	vra.a_cnp = &cn;
+	vra.a_flags = 0;
+	vra.a_context = ctx;
+	error = nfs_vnop_remove(&vra);
+out:
+	if (adnp)
+		vnode_put(NFSTOV(adnp));
+	if (putanp)
+		vnode_put(NFSTOV(anp));
 	return (error);
 }
 
 int
-nfs4_vnop_mknod(
-	struct vnop_mknod_args /* {
+nfs4_vnop_getxattr(
+	struct vnop_getxattr_args /* {
 		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
-		vnode_t *a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
+		vnode_t a_vp;
+		const char * a_name;
+		uio_t a_uio;
+		size_t *a_size;
+		int a_options;
 		vfs_context_t a_context;
 	} */ *ap)
 {
-	nfsnode_t np = NULL;
+	vfs_context_t ctx = ap->a_context;
 	struct nfsmount *nmp;
-	int error;
+	struct nfs_vattr nvattr;
+	struct componentname cn;
+	nfsnode_t anp;
+	int error = 0, isrsrcfork;
 
-	nmp = VTONMP(ap->a_dvp);
+	nmp = VTONMP(ap->a_vp);
 	if (!nmp)
 		return (ENXIO);
 
-	if (!VATTR_IS_ACTIVE(ap->a_vap, va_type))
-		return (EINVAL);
-	switch (ap->a_vap->va_type) {
-	case VBLK:
-	case VCHR:
-	case VFIFO:
-	case VSOCK:
-		break;
-	default:
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
 		return (ENOTSUP);
+	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
+	if (error)
+		return (error);
+	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
+	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
+		return (ENOATTR);
+
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
+	cn.cn_namelen = strlen(ap->a_name);
+	cn.cn_nameiop = LOOKUP;
+	cn.cn_flags = MAKEENTRY;
+
+	/* we'll normally try to prefetch data for xattrs... the resource fork is really a stream */
+	isrsrcfork = (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
+
+	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
+			!isrsrcfork ? NFS_GET_NAMED_ATTR_PREFETCH : 0, ctx, &anp, NULL);
+	if ((!error && !anp) || (error == ENOENT))
+		error = ENOATTR;
+	if (!error) {
+		if (ap->a_uio)
+			error = nfs_bioread(anp, ap->a_uio, 0, ctx);
+		else
+			*ap->a_size = anp->n_size;
 	}
+	if (anp)
+		vnode_put(NFSTOV(anp));
+	return (error);
+}
 
-	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
-			vtonfs_type(ap->a_vap->va_type, nmp->nm_vers), NULL, &np);
+int
+nfs4_vnop_setxattr(
+	struct vnop_setxattr_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		const char * a_name;
+		uio_t a_uio;
+		int a_options;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	vfs_context_t ctx = ap->a_context;
+	int options = ap->a_options;
+	uio_t uio = ap->a_uio;
+	const char *name = ap->a_name;
+	struct nfsmount *nmp;
+	struct componentname cn;
+	nfsnode_t anp = NULL;
+	int error = 0, closeerror = 0, flags, isrsrcfork, isfinderinfo, empty = 0, i;
+#define FINDERINFOSIZE 32
+	uint8_t finfo[FINDERINFOSIZE];
+	uint32_t *finfop;
+	struct nfs_open_file *nofp = NULL;
+	char uio_buf [ UIO_SIZEOF(1) ];
+	uio_t auio;
+	struct vnop_write_args vwa;
+
+	nmp = VTONMP(ap->a_vp);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
+
+	if ((options & XATTR_CREATE) && (options & XATTR_REPLACE))
+		return (EINVAL);
+
+	/* XXX limitation based on need to back up uio on short write */
+	if (uio_iovcnt(uio) > 1) {
+		printf("nfs4_vnop_setxattr: iovcnt > 1\n");
+		return (EINVAL);
+	}
+
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(name, const, char *);
+	cn.cn_namelen = strlen(name);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = MAKEENTRY;
+
+	isfinderinfo = (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0);
+	isrsrcfork = isfinderinfo ? 0 : (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0);
+	if (!isrsrcfork)
+		uio_setoffset(uio, 0);
+	if (isfinderinfo) {
+		if (uio_resid(uio) != sizeof(finfo))
+			return (ERANGE);
+		error = uiomove((char*)&finfo, sizeof(finfo), uio);
+		if (error)
+			return (error);
+		/* setting a FinderInfo of all zeroes means remove the FinderInfo */
+		empty = 1;
+		for (i=0, finfop=(uint32_t*)&finfo; i < (int)(sizeof(finfo)/sizeof(uint32_t)); i++)
+			if (finfop[i]) {
+				empty = 0;
+				break;
+			}
+		if (empty && !(options & (XATTR_CREATE|XATTR_REPLACE))) {
+			error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
+			if (error == ENOENT)
+				error = 0;
+			return (error);
+		}
+		/* first, let's see if we get a create/replace error */
+	}
+
+	/*
+	 * create/open the xattr
+	 *
+	 * We need to make sure not to create it if XATTR_REPLACE.
+	 * For all xattrs except the resource fork, we also want to
+	 * truncate the xattr to remove any current data.  We'll do
+	 * that by setting the size to 0 on create/open.
+	 */
+	flags = 0;
+	if (!(options & XATTR_REPLACE))
+		flags |= NFS_GET_NAMED_ATTR_CREATE;
+	if (options & XATTR_CREATE)
+		flags |= NFS_GET_NAMED_ATTR_CREATE_GUARDED;
+	if (!isrsrcfork)
+		flags |= NFS_GET_NAMED_ATTR_TRUNCATE;
+
+	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
+			flags, ctx, &anp, &nofp);
+	if (!error && !anp)
+		error = ENOATTR;
+	if (error)
+		goto out;
+	/* grab the open state from the get/create/open */
+	if (nofp && !(error = nfs_open_file_set_busy(nofp, NULL))) {
+		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+		nofp->nof_creator = NULL;
+		nfs_open_file_clear_busy(nofp);
+	}
+
+	/* Setting an empty FinderInfo really means remove it, skip to the close/remove */
+	if (isfinderinfo && empty)
+		goto doclose;
+
+	/*
+	 * Write the data out and flush.
+	 *
+	 * For FinderInfo, we've already copied the data to finfo, so do I/O from there.
+	 */
+	vwa.a_desc = &vnop_write_desc;
+	vwa.a_vp = NFSTOV(anp);
+	vwa.a_uio = NULL;
+	vwa.a_ioflag = 0;
+	vwa.a_context = ctx;
+	if (isfinderinfo) {
+		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf, sizeof(uio_buf));
+		uio_addiov(auio, (uintptr_t)&finfo, sizeof(finfo));
+		vwa.a_uio = auio;
+	} else if (uio_resid(uio) > 0) {
+		vwa.a_uio = uio;
+	}
+	if (vwa.a_uio) {
+		error = nfs_vnop_write(&vwa);
+		if (!error)
+			error = nfs_flush(anp, MNT_WAIT, vfs_context_thread(ctx), 0);
+	}
+doclose:
+	/* Close the xattr. */
+	if (nofp) {
+		int busyerror = nfs_open_file_set_busy(nofp, NULL);
+		closeerror = nfs_close(anp, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
+		if (!busyerror)
+			nfs_open_file_clear_busy(nofp);
+	}
+	if (!error && isfinderinfo && empty) { /* Setting an empty FinderInfo really means remove it */ 
+		error = nfs4_named_attr_remove(VTONFS(ap->a_vp), anp, name, ctx);
+		if (error == ENOENT)
+			error = 0;
+	}
 	if (!error)
-		*ap->a_vpp = NFSTOV(np);
+		error = closeerror;
+out:
+	if (anp)
+		vnode_put(NFSTOV(anp));
+	if (error == ENOENT)
+		error = ENOATTR;
 	return (error);
 }
 
 int
-nfs4_vnop_mkdir(
-	struct vnop_mkdir_args /* {
+nfs4_vnop_removexattr(
+	struct vnop_removexattr_args /* {
 		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
-		vnode_t *a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
+		vnode_t a_vp;
+		const char * a_name;
+		int a_options;
 		vfs_context_t a_context;
 	} */ *ap)
 {
-	nfsnode_t np = NULL;
+	struct nfsmount *nmp = VTONMP(ap->a_vp);
 	int error;
 
-	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
-			NFDIR, NULL, &np);
-	if (!error)
-		*ap->a_vpp = NFSTOV(np);
+	if (!nmp)
+		return (ENXIO);
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
+
+	error = nfs4_named_attr_remove(VTONFS(ap->a_vp), NULL, ap->a_name, ap->a_context);
+	if (error == ENOENT)
+		error = ENOATTR;
 	return (error);
 }
 
 int
-nfs4_vnop_symlink(
-	struct vnop_symlink_args /* {
+nfs4_vnop_listxattr(
+	struct vnop_listxattr_args /* {
 		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
-		vnode_t *a_vpp;
-		struct componentname *a_cnp;
-		struct vnode_attr *a_vap;
-		char *a_target;
+		vnode_t a_vp;
+		uio_t a_uio;
+		size_t *a_size;
+		int a_options;
 		vfs_context_t a_context;
 	} */ *ap)
 {
-	nfsnode_t np = NULL;
-	int error;
+	vfs_context_t ctx = ap->a_context;
+	nfsnode_t np = VTONFS(ap->a_vp);
+	uio_t uio = ap->a_uio;
+	nfsnode_t adnp = NULL;
+	struct nfsmount *nmp;
+	int error, done, i;
+	struct nfs_vattr nvattr;
+	uint64_t cookie, nextcookie, lbn = 0;
+	struct nfsbuf *bp = NULL;
+	struct nfs_dir_buf_header *ndbhp;
+	struct direntry *dp;
 
-	error = nfs4_create_rpc(ap->a_context, VTONFS(ap->a_dvp), ap->a_cnp, ap->a_vap,
-			NFLNK, ap->a_target, &np);
-	if (!error)
-		*ap->a_vpp = NFSTOV(np);
+	nmp = VTONMP(ap->a_vp);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
+
+	error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED);
+	if (error)
+		return (error);
+	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
+	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
+		return (0);
+
+	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx))))
+		return (error);
+	adnp = nfs4_named_attr_dir_get(np, 1, ctx);
+	nfs_node_clear_busy(np);
+	if (!adnp)
+		goto out;
+
+	if ((error = nfs_node_lock(adnp)))
+		goto out;
+
+	if (adnp->n_flag & NNEEDINVALIDATE) {
+		adnp->n_flag &= ~NNEEDINVALIDATE;
+		nfs_invaldir(adnp);
+		nfs_node_unlock(adnp);
+		error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
+		if (!error)
+			error = nfs_node_lock(adnp);
+		if (error)
+			goto out;
+	}
+
+	/*
+	 * check for need to invalidate when (re)starting at beginning
+	 */
+	if (adnp->n_flag & NMODIFIED) {
+		nfs_invaldir(adnp);
+		nfs_node_unlock(adnp);
+		if ((error = nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1)))
+			goto out;
+	} else {
+		nfs_node_unlock(adnp);
+	}
+	/* nfs_getattr() will check changed and purge caches */
+	if ((error = nfs_getattr(adnp, &nvattr, ctx, NGA_UNCACHED)))
+		goto out;
+
+	if (uio && (uio_resid(uio) == 0))
+		goto out;
+
+	done = 0;
+	nextcookie = lbn = 0;
+
+	while (!error && !done) {
+		OSAddAtomic(1, &nfsstats.biocache_readdirs);
+		cookie = nextcookie;
+getbuffer:
+		error = nfs_buf_get(adnp, lbn, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp);
+		if (error)
+			goto out;
+		ndbhp = (struct nfs_dir_buf_header*)bp->nb_data;
+		if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) {
+			if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */
+				ndbhp->ndbh_flags = 0;
+				ndbhp->ndbh_count = 0;
+				ndbhp->ndbh_entry_end = sizeof(*ndbhp);
+				ndbhp->ndbh_ncgen = adnp->n_ncgen;
+			}
+			error = nfs_buf_readdir(bp, ctx);
+			if (error == NFSERR_DIRBUFDROPPED)
+				goto getbuffer;
+			if (error)
+				nfs_buf_release(bp, 1);
+			if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) {
+				if (!nfs_node_lock(adnp)) {
+					nfs_invaldir(adnp);
+					nfs_node_unlock(adnp);
+				}
+				nfs_vinvalbuf(NFSTOV(adnp), 0, ctx, 1);
+				if (error == NFSERR_BAD_COOKIE)
+					error = ENOENT;
+			}
+			if (error)
+				goto out;
+		}
+
+		/* go through all the entries copying/counting */
+		dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp);
+		for (i=0; i < ndbhp->ndbh_count; i++) {
+			if (!xattr_protected(dp->d_name)) {
+				if (uio == NULL) {
+					*ap->a_size += dp->d_namlen + 1;
+				} else if (uio_resid(uio) < (dp->d_namlen + 1)) {
+					error = ERANGE;
+				} else {
+					error = uiomove(dp->d_name, dp->d_namlen+1, uio);
+					if (error && (error != EFAULT))
+						error = ERANGE;
+				}
+			}
+			nextcookie = dp->d_seekoff;
+			dp = NFS_DIRENTRY_NEXT(dp);
+		}
+
+		if (i == ndbhp->ndbh_count) {
+			/* hit end of buffer, move to next buffer */
+			lbn = nextcookie;
+			/* if we also hit EOF, we're done */
+			if (ISSET(ndbhp->ndbh_flags, NDB_EOF))
+				done = 1;
+		}
+		if (!error && !done && (nextcookie == cookie)) {
+			printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count);
+			error = EIO;
+		}
+		nfs_buf_release(bp, 1);
+	}
+out:
+	if (adnp)
+		vnode_put(NFSTOV(adnp));
 	return (error);
 }
 
+#if NAMEDSTREAMS
 int
-nfs4_vnop_link(
-	struct vnop_link_args /* {
+nfs4_vnop_getnamedstream(
+	struct vnop_getnamedstream_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
-		vnode_t a_tdvp;
-		struct componentname *a_cnp;
+		vnode_t *a_svpp;
+		const char *a_name;
+		enum nsoperation a_operation;
+		int a_flags;
 		vfs_context_t a_context;
 	} */ *ap)
 {
 	vfs_context_t ctx = ap->a_context;
-	vnode_t vp = ap->a_vp;
-	vnode_t tdvp = ap->a_tdvp;
-	struct componentname *cnp = ap->a_cnp;
-	int error = 0, lockerror = ENOENT, status;
 	struct nfsmount *nmp;
-	nfsnode_t np = VTONFS(vp);
-	nfsnode_t tdnp = VTONFS(tdvp);
-	int nfsvers, numops;
-	u_int64_t xid, savedxid;
-	struct nfsm_chain nmreq, nmrep;
-
-	if (vnode_mount(vp) != vnode_mount(tdvp))
-		return (EXDEV);
+	struct nfs_vattr nvattr;
+	struct componentname cn;
+	nfsnode_t anp;
+	int error = 0;
 
-	nmp = VTONMP(vp);
+	nmp = VTONMP(ap->a_vp);
 	if (!nmp)
 		return (ENXIO);
-	nfsvers = nmp->nm_vers;
-
-	/*
-	 * Push all writes to the server, so that the attribute cache
-	 * doesn't get "out of sync" with the server.
-	 * XXX There should be a better way!
-	 */
-	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
 
-	if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx))))
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
+	error = nfs_getattr(VTONFS(ap->a_vp), &nvattr, ctx, NGA_CACHED);
+	if (error)
 		return (error);
+	if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_NAMED_ATTR) &&
+	    !(nvattr.nva_flags & NFS_FFLAG_HAS_NAMED_ATTRS))
+		return (ENOATTR);
 
-	nfsm_chain_null(&nmreq);
-	nfsm_chain_null(&nmrep);
-
-	// PUTFH(SOURCE), SAVEFH, PUTFH(DIR), LINK, GETATTR(DIR), RESTOREFH, GETATTR
-	numops = 7;
-	nfsm_chain_build_alloc_init(error, &nmreq, 29 * NFSX_UNSIGNED + cnp->cn_namelen);
-	nfsm_chain_add_compound_header(error, &nmreq, "link", numops);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
-	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_LINK);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH);
-	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
-	nfsm_chain_build_done(error, &nmreq);
-	nfsm_assert(error, (numops == 0), EPROTO);
-	nfsmout_if(error);
-	error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
-
-	if ((lockerror = nfs_node_lock2(tdnp, np))) {
-		error = lockerror;
-		goto nfsmout;
-	}
-	nfsm_chain_skip_tag(error, &nmrep);
-	nfsm_chain_get_32(error, &nmrep, numops);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_LINK);
-	nfsm_chain_check_change_info(error, &nmrep, tdnp);
-	/* directory attributes: if we don't get them, make sure to invalidate */
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	savedxid = xid;
-	nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, NULL, &xid);
-	if (error)
-		NATTRINVALIDATE(tdnp);
-	/* link attributes: if we don't get them, make sure to invalidate */
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	xid = savedxid;
-	nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
-	if (error)
-		NATTRINVALIDATE(np);
-nfsmout:
-	nfsm_chain_cleanup(&nmreq);
-	nfsm_chain_cleanup(&nmrep);
-	if (!lockerror)
-		tdnp->n_flag |= NMODIFIED;
-	/* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */
-	if (error == EEXIST)
-		error = 0;
-	if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
-		tdnp->n_flag &= ~NNEGNCENTRIES;
-		cache_purge_negatives(tdvp);
-	}
-	if (!lockerror)
-		nfs_node_unlock2(tdnp, np);
-	nfs_node_clear_busy2(tdnp, np);
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
+	cn.cn_namelen = strlen(ap->a_name);
+	cn.cn_nameiop = LOOKUP;
+	cn.cn_flags = MAKEENTRY;
+
+	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_NONE,
+			0, ctx, &anp, NULL);
+	if ((!error && !anp) || (error == ENOENT))
+		error = ENOATTR;
+	if (!error && anp)
+		*ap->a_svpp = NFSTOV(anp);
+	else if (anp)
+		vnode_put(NFSTOV(anp));
 	return (error);
 }
 
 int
-nfs4_vnop_rmdir(
-	struct vnop_rmdir_args /* {
+nfs4_vnop_makenamedstream(
+	struct vnop_makenamedstream_args /* {
 		struct vnodeop_desc *a_desc;
-		vnode_t a_dvp;
+		vnode_t *a_svpp;
 		vnode_t a_vp;
-		struct componentname *a_cnp;
+		const char *a_name;
+		int a_flags;
 		vfs_context_t a_context;
 	} */ *ap)
 {
 	vfs_context_t ctx = ap->a_context;
-	vnode_t vp = ap->a_vp;
-	vnode_t dvp = ap->a_dvp;
-	struct componentname *cnp = ap->a_cnp;
+	struct nfsmount *nmp;
+	struct componentname cn;
+	nfsnode_t anp;
 	int error = 0;
-	nfsnode_t np = VTONFS(vp);
-	nfsnode_t dnp = VTONFS(dvp);
-	struct nfs_vattr dnvattr;
-	struct nfs_dulookup dul;
 
-	if (vnode_vtype(vp) != VDIR)
-		return (EINVAL);
+	nmp = VTONMP(ap->a_vp);
+	if (!nmp)
+		return (ENXIO);
 
-	if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx))))
-		return (error);
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
 
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
-	nfs_dulookup_start(&dul, dnp, ctx);
+	bzero(&cn, sizeof(cn));
+	cn.cn_nameptr = __CAST_AWAY_QUALIFIER(ap->a_name, const, char *);
+	cn.cn_namelen = strlen(ap->a_name);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = MAKEENTRY;
+
+	error = nfs4_named_attr_get(VTONFS(ap->a_vp), &cn, NFS_OPEN_SHARE_ACCESS_BOTH,
+			NFS_GET_NAMED_ATTR_CREATE, ctx, &anp, NULL);
+	if ((!error && !anp) || (error == ENOENT))
+		error = ENOATTR;
+	if (!error && anp)
+		*ap->a_svpp = NFSTOV(anp);
+	else if (anp)
+		vnode_put(NFSTOV(anp));
+	return (error);
+}
 
-	error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx));
+int
+nfs4_vnop_removenamedstream(
+	struct vnop_removenamedstream_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		vnode_t a_svp;
+		const char *a_name;
+		int a_flags;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	struct nfsmount *nmp = VTONMP(ap->a_vp);
+	nfsnode_t np = ap->a_vp ? VTONFS(ap->a_vp) : NULL;
+	nfsnode_t anp = ap->a_svp ? VTONFS(ap->a_svp) : NULL;
 
-	nfs_name_cache_purge(dnp, np, cnp, ctx);
-	/* nfs_getattr() will check changed and purge caches */
-	nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED);
-	nfs_dulookup_finish(&dul, dnp, ctx);
-	nfs_node_clear_busy2(dnp, np);
+	if (!nmp)
+		return (ENXIO);
 
 	/*
-	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+	 * Given that a_svp is a named stream, checking for
+	 * named attribute support is kinda pointless.
 	 */
-	if (error == ENOENT)
-		error = 0;
-	if (!error) {
-		/*
-		 * remove nfsnode from hash now so we can't accidentally find it
-		 * again if another object gets created with the same filehandle
-		 * before this vnode gets reclaimed
-		 */
-		lck_mtx_lock(nfs_node_hash_mutex);
-		if (np->n_hflag & NHHASHED) {
-			LIST_REMOVE(np, n_hash);
-			np->n_hflag &= ~NHHASHED;
-			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
-		}
-		lck_mtx_unlock(nfs_node_hash_mutex);
-	}
-	return (error);
+	if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR))
+		return (ENOTSUP);
+
+	return (nfs4_named_attr_remove(np, anp, ap->a_name, ap->a_context));
 }
 
+#endif
diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c
index 1c1c19123..4bd1bff61 100644
--- a/bsd/nfs/nfs_bio.c
+++ b/bsd/nfs/nfs_bio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -77,6 +77,7 @@
 #include <sys/kernel.h>
 #include <sys/ubc_internal.h>
 #include <sys/uio_internal.h>
+#include <sys/kpi_mbuf.h>
 
 #include <sys/vm.h>
 #include <sys/vmparam.h>
@@ -684,6 +685,21 @@ nfs_buf_get(
 loop:
 	lck_mtx_lock(nfs_buf_mutex);
 
+	/* wait for any buffer invalidation/flushing to complete */
+	while (np->n_bflag & NBINVALINPROG) {
+		np->n_bflag |= NBINVALWANT;
+		ts.tv_sec = 2;
+		ts.tv_nsec = 0;
+		msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_buf_get_invalwait", &ts);
+		if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
+			lck_mtx_unlock(nfs_buf_mutex);
+			FSDBG_BOT(541, np, blkno, 0, error);
+			return (error);
+		}
+		if (np->n_bflag & NBINVALINPROG)
+			slpflag = 0;
+	}
+
 	/* check for existence of nfsbuf in cache */
 	if ((bp = nfs_buf_incore(np, blkno))) {
 		/* if busy, set wanted and wait */
@@ -1041,8 +1057,8 @@ pagelist_cleanup_done:
 			if (start < NBOFF(bp))
 				start = NBOFF(bp);
 			if (end > start) {
-				if (!(rv = ubc_sync_range(vp, start, end, UBC_INVALIDATE)))
-					printf("nfs_buf_release(): ubc_sync_range failed!\n");
+				if ((rv = ubc_msync(vp, start, end, NULL, UBC_INVALIDATE)))
+					printf("nfs_buf_release(): ubc_msync failed!, error %d\n", rv);
 			}
 		}
 		CLR(bp->nb_flags, NB_PAGELIST);
@@ -1508,7 +1524,7 @@ nfs_buf_read_finish(struct nfsbuf *bp)
 		bp->nb_valid = (1 << (round_page_32(bp->nb_validend) / PAGE_SIZE)) - 1;
 		if (bp->nb_validend & PAGE_MASK) {
 			/* zero-fill remainder of last page */
-			bzero(bp->nb_data + bp->nb_validend, bp->nb_bufsize - bp->nb_validend);
+			bzero(bp->nb_data + bp->nb_validend, PAGE_SIZE - (bp->nb_validend & PAGE_MASK));
 		}
 	}
 	nfs_buf_iodone(bp);
@@ -1649,6 +1665,8 @@ finish:
 		kauth_cred_ref(cred);
 	cb = req->r_callback;
 	bp = cb.rcb_bp;
+	if (cb.rcb_func) /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
+		nfs_request_ref(req, 0);
 
 	nmp = NFSTONMP(np);
 	if (!nmp) {
@@ -1673,23 +1691,55 @@ finish:
 	error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, auio, &rlen, &eof);
 	if ((error == EINPROGRESS) && cb.rcb_func) {
 		/* async request restarted */
+		if (cb.rcb_func)
+			nfs_request_rele(req);
 		if (IS_VALID_CRED(cred))
 			kauth_cred_unref(&cred);
 		return;
 	}
 	if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) {
 		lck_mtx_lock(&nmp->nm_lock);
-		if ((error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-			printf("nfs_buf_read_rpc_finish: error %d, initiating recovery\n", error);
-			nmp->nm_state |= NFSSTA_RECOVER;
-			nfs_mount_sock_thread_wake(nmp);
+		if ((error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid)) {
+			NP(np, "nfs_buf_read_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
+				error, NBOFF(bp)+offset, cb.rcb_args[2], nmp->nm_stategenid);
+			nfs_need_recover(nmp, error);
 		}
 		lck_mtx_unlock(&nmp->nm_lock);
-		if (error == NFSERR_GRACE)
-			tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
-		if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
-			rlen = 0;
-			goto readagain;
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+		} else {
+			if (error == NFSERR_GRACE) {
+				if (cb.rcb_func) {
+					/*
+					 * For an async I/O request, handle a grace delay just like
+					 * jukebox errors.  Set the resend time and queue it up.
+					 */
+					struct timeval now;
+					if (req->r_nmrep.nmc_mhead) {
+						mbuf_freem(req->r_nmrep.nmc_mhead);
+						req->r_nmrep.nmc_mhead = NULL;
+					}
+					req->r_error = 0;
+					microuptime(&now);
+					lck_mtx_lock(&req->r_mtx);
+					req->r_resendtime = now.tv_sec + 2;
+					req->r_xid = 0;                 // get a new XID
+					req->r_flags |= R_RESTART;
+					req->r_start = 0;
+					nfs_asyncio_resend(req);
+					lck_mtx_unlock(&req->r_mtx);
+					if (IS_VALID_CRED(cred))
+						kauth_cred_unref(&cred);
+					/* Note: nfsreq reference taken will be dropped later when finished */
+					return;
+				}
+				/* otherwise, just pause a couple seconds and retry */
+				tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+			}
+			if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+				rlen = 0;
+				goto readagain;
+			}
 		}
 	}
 	if (error) {
@@ -1734,6 +1784,7 @@ readagain:
 				rreq = NULL;
 				goto finish;
 			}
+			nfs_request_rele(req);
 			/*
 			 * We're done here.
 			 * Outstanding RPC count is unchanged.
@@ -1746,6 +1797,8 @@ readagain:
 	}
 
 out:
+	if (cb.rcb_func)
+		nfs_request_rele(req);
 	if (IS_VALID_CRED(cred))
 		kauth_cred_unref(&cred);
 
@@ -1786,7 +1839,8 @@ nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn
 {
 	struct nfsmount *nmp = NFSTONMP(np);
 	struct nfsbuf *bp;
-	int error = 0, nra;
+	int error = 0;
+	uint32_t nra;
 
 	if (!nmp)
 		return (ENXIO);
@@ -1842,7 +1896,6 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
 {
 	vnode_t vp = NFSTOV(np);
 	struct nfsbuf *bp = NULL;
-	struct nfs_vattr nvattr;
 	struct nfsmount *nmp = VTONMP(vp);
 	daddr64_t lbn, rabn = 0, lastrabn, maxrabn = -1;
 	off_t diff;
@@ -1903,7 +1956,7 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
 	modified = (np->n_flag & NMODIFIED);
 	nfs_node_unlock(np);
 	/* nfs_getattr() will check changed and purge caches */
-	error = nfs_getattr(np, &nvattr, ctx, modified ? NGA_UNCACHED : NGA_CACHED);
+	error = nfs_getattr(np, NULL, ctx, modified ? NGA_UNCACHED : NGA_CACHED);
 	if (error) {
 		FSDBG_BOT(514, np, 0xd1e0004, 0, error);
 		return (error);
@@ -1986,6 +2039,12 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
 		np->n_lastread = (uio_offset(uio) - 1) / biosize;
 		nfs_node_unlock(np);
 
+		if ((uio_resid(uio) <= 0) || (uio_offset(uio) >= (off_t)np->n_size)) {
+			nfs_data_unlock(np);
+			FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), 0xaaaaaaaa);
+			return (0);
+		}
+
 		/* adjust readahead block number, if necessary */
 		if (rabn < lbn)
 			rabn = lbn;
@@ -2000,12 +2059,6 @@ nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx)
 			readaheads = 1;
 		}
 
-		if ((uio_resid(uio) <= 0) || (uio_offset(uio) >= (off_t)np->n_size)) {
-			nfs_data_unlock(np);
-			FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), 0xaaaaaaaa);
-			return (0);
-		}
-
 		OSAddAtomic(1, &nfsstats.biocache_reads);
 
 		/*
@@ -2182,7 +2235,7 @@ buffer_ready:
 int
 nfs_async_write_start(struct nfsmount *nmp)
 {
-	int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+	int error = 0, slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
 	struct timespec ts = {1, 0};
 
 	if (nfs_max_async_writes <= 0)
@@ -2301,7 +2354,7 @@ nfs_buf_write(struct nfsbuf *bp)
 		}
 		SET(bp->nb_flags, NB_WRITEINPROG);
 		error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp) + bp->nb_dirtyoff,
-				bp->nb_dirtyend - bp->nb_dirtyoff, bp->nb_wcred);
+				bp->nb_dirtyend - bp->nb_dirtyoff, bp->nb_wcred, bp->nb_verf);
 		CLR(bp->nb_flags, NB_WRITEINPROG);
 		if (error) {
 			if (error != NFSERR_STALEWRITEVERF) {
@@ -2610,7 +2663,7 @@ again:
 	CLR(bp->nb_flags, NB_WRITEINPROG);
 
 	if (!error && (commit != NFS_WRITE_FILESYNC)) {
-		error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp), bp->nb_bufsize, cred);
+		error = nmp->nm_funcs->nf_commit_rpc(np, NBOFF(bp), bp->nb_bufsize, cred, wverf);
 		if (error == NFSERR_STALEWRITEVERF) {
 			/* verifier changed, so we need to restart all the writes */
 			iomode = NFS_WRITE_FILESYNC;
@@ -2731,6 +2784,9 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred
 		} else {
 			nfs_buf_write_finish(bp, thd, cred);
 		}
+		/* It may have just been an interrupt... that's OK */
+		if (!ISSET(bp->nb_flags, NB_ERROR))
+			error = 0;
 	}
 
 	return (error);
@@ -2765,6 +2821,8 @@ finish:
 		kauth_cred_ref(cred);
 	cb = req->r_callback;
 	bp = cb.rcb_bp;
+	if (cb.rcb_func) /* take an extra reference on the nfsreq in case we want to resend it later due to grace error */
+		nfs_request_ref(req, 0);
 
 	nmp = NFSTONMP(np);
 	if (!nmp) {
@@ -2785,23 +2843,55 @@ finish:
 	error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &committed, &rlen, &wverf);
 	if ((error == EINPROGRESS) && cb.rcb_func) {
 		/* async request restarted */
+		if (cb.rcb_func)
+			nfs_request_rele(req);
 		if (IS_VALID_CRED(cred))
 			kauth_cred_unref(&cred);
 		return;
 	}
 	if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) {
 		lck_mtx_lock(&nmp->nm_lock);
-		if ((error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-			printf("nfs_buf_write_rpc_finish: error %d, initiating recovery\n", error);
-			nmp->nm_state |= NFSSTA_RECOVER;
-			nfs_mount_sock_thread_wake(nmp);
+		if ((error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid)) {
+			NP(np, "nfs_buf_write_rpc_finish: error %d @ 0x%llx, 0x%x 0x%x, initiating recovery",
+				error, NBOFF(bp)+offset, cb.rcb_args[2], nmp->nm_stategenid);
+			nfs_need_recover(nmp, error);
 		}
 		lck_mtx_unlock(&nmp->nm_lock);
-		if (error == NFSERR_GRACE)
-			tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
-		if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
-			rlen = 0;
-			goto writeagain;
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+		} else {
+			if (error == NFSERR_GRACE) {
+				if (cb.rcb_func) {
+					/*
+					 * For an async I/O request, handle a grace delay just like
+					 * jukebox errors.  Set the resend time and queue it up.
+					 */
+					struct timeval now;
+					if (req->r_nmrep.nmc_mhead) {
+						mbuf_freem(req->r_nmrep.nmc_mhead);
+						req->r_nmrep.nmc_mhead = NULL;
+					}
+					req->r_error = 0;
+					microuptime(&now);
+					lck_mtx_lock(&req->r_mtx);
+					req->r_resendtime = now.tv_sec + 2;
+					req->r_xid = 0;                 // get a new XID
+					req->r_flags |= R_RESTART;
+					req->r_start = 0;
+					nfs_asyncio_resend(req);
+					lck_mtx_unlock(&req->r_mtx);
+					if (IS_VALID_CRED(cred))
+						kauth_cred_unref(&cred);
+					/* Note: nfsreq reference taken will be dropped later when finished */
+					return;
+				}
+				/* otherwise, just pause a couple seconds and retry */
+				tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+			}
+			if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+				rlen = 0;
+				goto writeagain;
+			}
 		}
 	}
 	if (error) {
@@ -2863,6 +2953,7 @@ writeagain:
 				wreq = NULL;
 				goto finish;
 			}
+			nfs_request_rele(req);
 			/*
 			 * We're done here.
 			 * Outstanding RPC count is unchanged.
@@ -2875,8 +2966,10 @@ writeagain:
 	}
 
 out:
-	if (cb.rcb_func)
+	if (cb.rcb_func) {
 		nfs_async_write_done(nmp);
+		nfs_request_rele(req);
+	}
 	/*
 	 * Decrement outstanding RPC count on buffer
 	 * and call nfs_buf_write_finish on last RPC.
@@ -2918,6 +3011,7 @@ nfs_flushcommits(nfsnode_t np, int nowait)
 	struct nfsbuflists blist, commitlist;
 	int error = 0, retv, wcred_set, flags, dirty;
 	u_quad_t off, endoff, toff;
+	uint64_t wverf;
 	u_int32_t count;
 	kauth_cred_t wcred = NULL;
 
@@ -2956,6 +3050,7 @@ nfs_flushcommits(nfsnode_t np, int nowait)
 	if (nowait)
 		flags |= NBI_NOWAIT;
 	lck_mtx_lock(nfs_buf_mutex);
+	wverf = nmp->nm_verf;
 	if (!nfs_buf_iterprepare(np, &blist, flags)) {
 		while ((bp = LIST_FIRST(&blist))) {
 			LIST_REMOVE(bp, nb_vnbufs);
@@ -2965,8 +3060,8 @@ nfs_flushcommits(nfsnode_t np, int nowait)
 				continue;
 			if (ISSET(bp->nb_flags, NB_NEEDCOMMIT))
 				nfs_buf_check_write_verifier(np, bp);
-			if (((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT))
-				!= (NB_DELWRI | NB_NEEDCOMMIT))) {
+			if (((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT)) != (NB_DELWRI | NB_NEEDCOMMIT)) ||
+			    (bp->nb_verf != wverf)) {
 				nfs_buf_drop(bp);
 				continue;
 			}
@@ -3066,13 +3161,13 @@ nfs_flushcommits(nfsnode_t np, int nowait)
 			count = 0;
 		else
 			count = (endoff - off);
-		retv = nmp->nm_funcs->nf_commit_rpc(np, off, count, wcred);
+		retv = nmp->nm_funcs->nf_commit_rpc(np, off, count, wcred, wverf);
 	} else {
 		retv = 0;
 		LIST_FOREACH(bp, &commitlist, nb_vnbufs) {
 			toff = NBOFF(bp) + bp->nb_dirtyoff;
 			count = bp->nb_dirtyend - bp->nb_dirtyoff;
-			retv = nmp->nm_funcs->nf_commit_rpc(np, toff, count, bp->nb_wcred);
+			retv = nmp->nm_funcs->nf_commit_rpc(np, toff, count, bp->nb_wcred, wverf);
 			if (retv)
 				break;
 		}
@@ -3161,7 +3256,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr)
 		goto out;
 	}
 	nfsvers = nmp->nm_vers;
-	if (nmp->nm_flag & NFSMNT_INT)
+	if (NMFLAG(nmp, INTR))
 		slpflag = PCATCH;
 
 	if (!LIST_EMPTY(&np->n_dirtyblkhd)) {
@@ -3173,8 +3268,9 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr)
 	lck_mtx_lock(nfs_buf_mutex);
 	while (np->n_bflag & NBFLUSHINPROG) {
 		np->n_bflag |= NBFLUSHWANT;
-		msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL);
-		if ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0))) {
+		error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL);
+		if ((error && (error != EWOULDBLOCK)) ||
+		    ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0)))) {
 			lck_mtx_unlock(nfs_buf_mutex);
 			goto out;
 		}
@@ -3458,8 +3554,10 @@ nfs_vinvalbuf_internal(
 					if (error) {
 						FSDBG(554, bp, 0xd00dee, 0xbad, error);
 						nfs_node_lock_force(np);
-						np->n_error = error;
-						np->n_flag |= NWRITEERR;
+						if ((error != EINTR) && (error != ERESTART)) {
+							np->n_error = error;
+							np->n_flag |= NWRITEERR;
+						}
 						/*
 						 * There was a write error and we need to
 						 * invalidate attrs to sync with server.
@@ -3468,7 +3566,7 @@ nfs_vinvalbuf_internal(
 						 */
 						NATTRINVALIDATE(np);
 						nfs_node_unlock(np);
-						if (error == EINTR) {
+						if ((error == EINTR) || (error == ERESTART)) {
 							/*
 							 * Abort on EINTR.  If we don't, we could
 							 * be stuck in this loop forever because
@@ -3521,12 +3619,13 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf
 {
 	nfsnode_t np = VTONFS(vp);
 	struct nfsmount *nmp = VTONMP(vp);
-	int error, rv, slpflag, slptimeo, nflags;
+	int error, slpflag, slptimeo, nflags, retry = 0;
+	struct timespec ts = { 2, 0 };
 	off_t size;
 
 	FSDBG_TOP(554, np, flags, intrflg, 0);
 
-	if (nmp && !(nmp->nm_flag & NFSMNT_INT))
+	if (nmp && !NMFLAG(nmp, INTR))
 		intrflg = 0;
 	if (intrflg) {
 		slpflag = PCATCH;
@@ -3540,16 +3639,19 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf
 	lck_mtx_lock(nfs_buf_mutex);
 	while (np->n_bflag & NBINVALINPROG) {
 		np->n_bflag |= NBINVALWANT;
-		msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", NULL);
+		msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", &ts);
 		if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) {
 			lck_mtx_unlock(nfs_buf_mutex);
 			return (error);
 		}
+		if (np->n_bflag & NBINVALINPROG)
+			slpflag = 0;
 	}
 	np->n_bflag |= NBINVALINPROG;
 	lck_mtx_unlock(nfs_buf_mutex);
 
 	/* Now, flush as required.  */
+again:
 	error = nfs_vinvalbuf_internal(np, flags, thd, cred, slpflag, 0);
 	while (error) {
 		FSDBG(554, np, 0, 0, error);
@@ -3560,8 +3662,15 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf
 
 	/* get the pages out of vm also */
 	if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp)))
-		if (!(rv = ubc_sync_range(vp, 0, size, UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE)))
-			panic("nfs_vinvalbuf(): ubc_sync_range failed!");
+		if ((error = ubc_msync(vp, 0, size, NULL, UBC_PUSHALL | UBC_SYNC | UBC_INVALIDATE))) {
+			if (error == EINVAL)
+				panic("nfs_vinvalbuf(): ubc_msync failed!, error %d", error);
+			if (retry++ < 10) /* retry invalidating a few times */
+				goto again;
+			/* give up */
+			printf("nfs_vinvalbuf(): ubc_msync failed!, error %d", error);
+
+		}
 done:
 	lck_mtx_lock(nfs_buf_mutex);
 	nflags = np->n_bflag;
@@ -3574,6 +3683,57 @@ done:
 	return (error);
 }
 
+/*
+ * Wait for any busy buffers to complete.
+ */
+void
+nfs_wait_bufs(nfsnode_t np)
+{
+	struct nfsbuf *bp;
+	struct nfsbuflists blist;
+	int error = 0;
+
+	lck_mtx_lock(nfs_buf_mutex);
+	if (!nfs_buf_iterprepare(np, &blist, NBI_CLEAN)) {
+		while ((bp = LIST_FIRST(&blist))) {
+			LIST_REMOVE(bp, nb_vnbufs);
+			LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs);
+			nfs_buf_refget(bp);
+			while ((error = nfs_buf_acquire(bp, 0, 0, 0))) {
+				if (error != EAGAIN) {
+					nfs_buf_refrele(bp);
+					nfs_buf_itercomplete(np, &blist, NBI_CLEAN);
+					lck_mtx_unlock(nfs_buf_mutex);
+					return;
+				}
+			}
+			nfs_buf_refrele(bp);
+			nfs_buf_drop(bp);
+		}
+		nfs_buf_itercomplete(np, &blist, NBI_CLEAN);
+	}
+	if (!nfs_buf_iterprepare(np, &blist, NBI_DIRTY)) {
+		while ((bp = LIST_FIRST(&blist))) {
+			LIST_REMOVE(bp, nb_vnbufs);
+			LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs);
+			nfs_buf_refget(bp);
+			while ((error = nfs_buf_acquire(bp, 0, 0, 0))) {
+				if (error != EAGAIN) {
+					nfs_buf_refrele(bp);
+					nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
+					lck_mtx_unlock(nfs_buf_mutex);
+					return;
+				}
+			}
+			nfs_buf_refrele(bp);
+			nfs_buf_drop(bp);
+		}
+		nfs_buf_itercomplete(np, &blist, NBI_DIRTY);
+	}
+	lck_mtx_unlock(nfs_buf_mutex);
+}
+
+
 /*
  * Add an async I/O request to the mount's async I/O queue and make
  * sure that an nfsiod will service it.
diff --git a/bsd/nfs/nfs_boot.c b/bsd/nfs/nfs_boot.c
index 33bc25128..7fcd73bee 100644
--- a/bsd/nfs/nfs_boot.c
+++ b/bsd/nfs/nfs_boot.c
@@ -177,13 +177,7 @@ static int get_file_handle(struct nfs_dlmount *ndmntp);
 #define IP_CH(ip)	((u_char *)ip)
 #define IP_LIST(ip)	IP_CH(ip)[0],IP_CH(ip)[1],IP_CH(ip)[2],IP_CH(ip)[3]
 
-extern boolean_t
-netboot_iaddr(struct in_addr * iaddr_p);
-
-extern boolean_t
-netboot_rootpath(struct in_addr * server_ip,
-		 char * name, int name_len, 
-		 char * path, int path_len);
+#include <sys/netboot.h>
 
 /*
  * Called with an empty nfs_diskless struct to be filled in.
diff --git a/bsd/nfs/nfs_gss.c b/bsd/nfs/nfs_gss.c
index b8dbbb4a2..c848bfae6 100644
--- a/bsd/nfs/nfs_gss.c
+++ b/bsd/nfs/nfs_gss.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -147,8 +147,8 @@ int nfs_single_des;
  * These octet strings are used to encode/decode ASN.1 tokens
  * in the RPCSEC_GSS verifiers.
  */
-static u_char krb5_tokhead[] = { 0x60, 0x23 };
-static u_char krb5_mech[] = { 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x01, 0x02, 0x02 };
+static u_char krb5_tokhead[] __attribute__((unused)) = { 0x60, 0x23 };
+       u_char krb5_mech[11] = { 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x01, 0x02, 0x02 };
 static u_char krb5_mic[]  = { 0x01, 0x01, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff };
 static u_char krb5_mic3[]  = { 0x01, 0x01, 0x04, 0x00, 0xff, 0xff, 0xff, 0xff };
 static u_char krb5_wrap[] = { 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff };
@@ -184,11 +184,11 @@ static u_char iv0[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; // DES
 static int	nfs_gss_clnt_ctx_find(struct nfsreq *);
 static int	nfs_gss_clnt_ctx_failover(struct nfsreq *);
 static int	nfs_gss_clnt_ctx_init(struct nfsreq *, struct nfs_gss_clnt_ctx *);
+static int	nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 static int	nfs_gss_clnt_ctx_callserver(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 static char	*nfs_gss_clnt_svcname(struct nfsmount *);
 static int	nfs_gss_clnt_gssd_upcall(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 static void	nfs_gss_clnt_ctx_remove(struct nfsmount *, struct nfs_gss_clnt_ctx *);
-static int	nfs_gss_clnt_ctx_delay(struct nfsreq *, int *);
 #endif /* NFSCLIENT */
 
 #if NFSSERVER
@@ -253,6 +253,25 @@ nfs_gss_init(void)
 
 #if NFSCLIENT
 
+/*
+ * Is it OK to fall back to using AUTH_SYS?
+ */
+static int
+nfs_gss_sysok(struct nfsreq *req)
+{
+	struct nfsmount *nmp = req->r_nmp;
+	int i;
+
+	if (req->r_wrongsec) /* Not OK if we're trying to handle a wrongsec error */
+		return (0);
+	if (!nmp->nm_sec.count) /* assume it's OK if we don't have a set of flavors */
+		return (1);
+	for (i=0; i < nmp->nm_sec.count; i++)
+		if (nmp->nm_sec.flavors[i] == RPCAUTH_SYS)
+			return (1);
+	return (0);
+}
+
 /*
  * Find the context for a particular user.
  *
@@ -269,15 +288,14 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
 	struct nfs_gss_clnt_ctx *cp;
 	uid_t uid = kauth_cred_getuid(req->r_cred);
 	int error = 0;
-	int retrycnt = 0;
 
 	lck_mtx_lock(&nmp->nm_lock);
 	TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
 		if (cp->gss_clnt_uid == uid) {
 			if (cp->gss_clnt_flags & GSS_CTX_INVAL)
 				continue;
-			lck_mtx_unlock(&nmp->nm_lock);
 			nfs_gss_clnt_ctx_ref(req, cp);
+			lck_mtx_unlock(&nmp->nm_lock);
 			return (0);
 		}
 	}
@@ -292,8 +310,8 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
 		 */
 		TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) {
 			if (!(cp->gss_clnt_flags & GSS_CTX_INVAL)) {
-				lck_mtx_unlock(&nmp->nm_lock);
 				nfs_gss_clnt_ctx_ref(req, cp);
+				lck_mtx_unlock(&nmp->nm_lock);
 				return (0);
 			}
 		}
@@ -310,7 +328,7 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
 	 * to failover to sec=sys.
 	 */
 	if (req->r_thread == NULL) {
-		if (nmp->nm_flag & NFSMNT_SECSYSOK) {
+		if (nfs_gss_sysok(req)) {
 			error = nfs_gss_clnt_ctx_failover(req);
 		} else {
 			printf("nfs_gss_clnt_ctx_find: no context for async\n");
@@ -334,29 +352,7 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req)
 	TAILQ_INSERT_TAIL(&nmp->nm_gsscl, cp, gss_clnt_entries);
 	lck_mtx_unlock(&nmp->nm_lock);
 
-retry:
-	error = nfs_gss_clnt_ctx_init(req, cp);
-	if (error == ENEEDAUTH) {
-		error = nfs_gss_clnt_ctx_delay(req, &retrycnt);
-		if (!error)
-			goto retry;
-
-		/* Giving up on this context */
-		cp->gss_clnt_flags |= GSS_CTX_INVAL;
-
-		/*
-		 * Wake any threads waiting to use the context
-		 */
-		lck_mtx_lock(cp->gss_clnt_mtx);
-		cp->gss_clnt_thread = NULL;
-		if (cp->gss_clnt_flags & GSS_NEEDCTX) {
-			cp->gss_clnt_flags &= ~GSS_NEEDCTX;
-			wakeup(cp);
-		}
-		lck_mtx_unlock(cp->gss_clnt_mtx);				
-
-	}
-
+	error = nfs_gss_clnt_ctx_init_retry(req, cp); // Initialize new context
 	if (error)
 		nfs_gss_clnt_ctx_unref(req);
 
@@ -367,7 +363,7 @@ retry:
 	 * up a dummy context that allows this user to attempt
 	 * sec=sys calls.
 	 */
-	if (error && (nmp->nm_flag & NFSMNT_SECSYSOK) &&
+	if (error && nfs_gss_sysok(req) &&
 	    (error != ENXIO) && (error != ETIMEDOUT)) {
 		lck_mtx_lock(&nmp->nm_lock);
 		error = nfs_gss_clnt_ctx_failover(req);
@@ -433,7 +429,7 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args)
 	
 	slpflag = (PZERO-1);
 	if (req->r_nmp) {
-		slpflag |= ((req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
+		slpflag |= (NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
 		recordmark = (req->r_nmp->nm_sotype == SOCK_STREAM);
 	}
 retry:
@@ -483,6 +479,7 @@ retry:
 	if (cp->gss_clnt_thread && cp->gss_clnt_thread != current_thread()) {
 		cp->gss_clnt_flags |= GSS_NEEDCTX;
 		msleep(cp, cp->gss_clnt_mtx, slpflag | PDROP, "ctxwait", NULL);
+		slpflag &= ~PCATCH;
 		if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
 			return (error);
 		nfs_gss_clnt_ctx_unref(req);
@@ -504,6 +501,7 @@ retry:
 			((cp->gss_clnt_seqnum - cp->gss_clnt_seqwin) + 1) % cp->gss_clnt_seqwin)) {
 			cp->gss_clnt_flags |= GSS_NEEDSEQ;
 			msleep(cp, cp->gss_clnt_mtx, slpflag, "seqwin", NULL);
+			slpflag &= ~PCATCH;
 			if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) {
 				lck_mtx_unlock(cp->gss_clnt_mtx);
 				return (error);
@@ -995,9 +993,9 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
 	cp->gss_clnt_proc = RPCSEC_GSS_INIT;
 
 	cp->gss_clnt_service =
-		nmp->nm_auth == RPCAUTH_KRB5  ? RPCSEC_GSS_SVC_NONE :
-		nmp->nm_auth == RPCAUTH_KRB5I ? RPCSEC_GSS_SVC_INTEGRITY :
-		nmp->nm_auth == RPCAUTH_KRB5P ? RPCSEC_GSS_SVC_PRIVACY : 0;
+		req->r_auth == RPCAUTH_KRB5  ? RPCSEC_GSS_SVC_NONE :
+		req->r_auth == RPCAUTH_KRB5I ? RPCSEC_GSS_SVC_INTEGRITY :
+		req->r_auth == RPCAUTH_KRB5P ? RPCSEC_GSS_SVC_PRIVACY : 0;
 
 	cp->gss_clnt_gssd_flags = (nfs_single_des ? GSSD_NFS_1DES : 0);
 	/*
@@ -1055,7 +1053,9 @@ retry:
 	/*
 	 * The context is apparently established successfully
 	 */
+	lck_mtx_lock(cp->gss_clnt_mtx);
 	cp->gss_clnt_flags |= GSS_CTX_COMPLETE;
+	lck_mtx_unlock(cp->gss_clnt_mtx);
 	cp->gss_clnt_proc = RPCSEC_GSS_DATA;
 	microuptime(&now);
 	cp->gss_clnt_ctime = now.tv_sec;	// time stamp
@@ -1110,13 +1110,13 @@ nfsmout:
 	 * It will be removed when the reference count
 	 * drops to zero.
 	 */
+	lck_mtx_lock(cp->gss_clnt_mtx);
 	if (error)
 		cp->gss_clnt_flags |= GSS_CTX_INVAL;
 
 	/*
 	 * Wake any threads waiting to use the context
 	 */
-	lck_mtx_lock(cp->gss_clnt_mtx);
 	cp->gss_clnt_thread = NULL;
 	if (cp->gss_clnt_flags & GSS_NEEDCTX) {
 		cp->gss_clnt_flags &= ~GSS_NEEDCTX;
@@ -1127,6 +1127,77 @@ nfsmout:
 	return (error);
 }
 
+/*
+ * This function calls nfs_gss_clnt_ctx_init() to set up a new context.
+ * But if there's a failure in trying to establish the context it keeps
+ * retrying at progressively longer intervals in case the failure is
+ * due to some transient condition.  For instance, the server might be
+ * failing the context setup because directory services is not coming
+ * up in a timely fashion.
+ */
+static int
+nfs_gss_clnt_ctx_init_retry(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
+{
+	struct nfsmount *nmp = req->r_nmp;
+	struct timeval now;
+	time_t waituntil;
+	int error, slpflag;
+	int retries = 0;
+	int timeo = NFS_TRYLATERDEL;
+
+	if (nmp == NULL) {
+		error = ENXIO;
+		goto bad;
+	}
+
+	/* For an "intr" mount allow a signal to interrupt the retries */
+	slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
+
+	while ((error = nfs_gss_clnt_ctx_init(req, cp)) == ENEEDAUTH) {
+		microuptime(&now);
+		waituntil = now.tv_sec + timeo;
+		while (now.tv_sec < waituntil) {
+			tsleep(&lbolt, PSOCK | slpflag, "nfs_gss_clnt_ctx_init_retry", 0);
+			slpflag = 0;
+			error = nfs_sigintr(req->r_nmp, req, current_thread(), 0);
+			if (error)
+				goto bad;
+			microuptime(&now);
+		}
+
+		retries++;
+		/* If it's a soft mount just give up after a while */
+		if (NMFLAG(nmp, SOFT) && (retries > nmp->nm_retry)) {
+			error = ETIMEDOUT;
+			goto bad;
+		}
+		timeo *= 2;
+		if (timeo > 60)
+			timeo = 60;
+	}
+
+	if (error == 0)
+		return 0;	// success
+bad:
+	/*
+	 * Give up on this context
+	 */
+	lck_mtx_lock(cp->gss_clnt_mtx);
+	cp->gss_clnt_flags |= GSS_CTX_INVAL;
+
+	/*
+	 * Wake any threads waiting to use the context
+	 */
+	cp->gss_clnt_thread = NULL;
+	if (cp->gss_clnt_flags & GSS_NEEDCTX) {
+		cp->gss_clnt_flags &= ~GSS_NEEDCTX;
+		wakeup(cp);
+	}
+	lck_mtx_unlock(cp->gss_clnt_mtx);				
+
+	return error;
+}
+
 /*
  * Call the NFS server using a null procedure for context setup.
  * Even though it's a null procedure and nominally has no arguments
@@ -1260,11 +1331,11 @@ static int
 nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
 {
 	kern_return_t kr;
-	byte_buffer okey = NULL;
+	gssd_byte_buffer okey = NULL;
 	uint32_t skeylen = 0;
 	int retry_cnt = 0;
 	vm_map_copy_t itoken = NULL;
-	byte_buffer otoken = NULL;
+	gssd_byte_buffer otoken = NULL;
 	mach_msg_type_number_t otokenlen;
 	int error = 0;
 	char uprinc[1];
@@ -1279,7 +1350,7 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
 	 * the kernel is being compiled with -Wwrite-strings.
 	 */
 	uprinc[0] = '\0';
-	if (cp->gss_clnt_mport == NULL) {
+	if (!IPC_PORT_VALID(cp->gss_clnt_mport)) {
 		kr = task_get_gssd_port(get_threadtask(req->r_thread), &cp->gss_clnt_mport);
 		if (kr != KERN_SUCCESS) {
 			printf("nfs_gss_clnt_gssd_upcall: can't get gssd port, status %x (%d)\n", kr, kr);
@@ -1298,8 +1369,8 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp)
 retry:
 	kr = mach_gss_init_sec_context(
 		cp->gss_clnt_mport,
-		KRB5_MECH,
-		(byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_clnt_tokenlen,
+		GSSD_KRB5_MECH,
+		(gssd_byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_clnt_tokenlen,
 		cp->gss_clnt_uid,
 		uprinc,
 		cp->gss_clnt_svcname,
@@ -1512,8 +1583,8 @@ nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp)
 	if (nmp != NULL)
 		TAILQ_REMOVE(&nmp->nm_gsscl, cp, gss_clnt_entries);
 
-	if (cp->gss_clnt_mport)
-		task_release_special_port(cp->gss_clnt_mport);
+	task_release_special_port(cp->gss_clnt_mport);
+
 	if (cp->gss_clnt_mtx)
 		lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp);
 	if (cp->gss_clnt_handle)
@@ -1541,7 +1612,6 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
 	int error = 0;
 	uid_t saved_uid;
 	mach_port_t saved_mport;
-	int retrycnt = 0;
 
 	if (cp == NULL)
 		return (0);
@@ -1590,13 +1660,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req)
 	nfs_gss_clnt_ctx_unref(req);
 	nfs_gss_clnt_ctx_ref(req, ncp);
 
-retry:
-	error = nfs_gss_clnt_ctx_init(req, ncp); // Initialize new context
-	if (error == ENEEDAUTH) {
-		error = nfs_gss_clnt_ctx_delay(req, &retrycnt);
-		if (!error)
-			goto retry;
-	}
+	error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context
 out:
 	task_release_special_port(saved_mport);
 	if (error)
@@ -1610,17 +1674,13 @@ out:
  * The contexts are also destroyed by the server.
  */
 void
-nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp, int mntflags)
+nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp)
 {
 	struct nfs_gss_clnt_ctx *cp;
-	struct ucred temp_cred;
-	kauth_cred_t cred;
 	struct nfsm_chain nmreq, nmrep;
 	int error, status;
 	struct nfsreq req;
 
-	bzero((caddr_t) &temp_cred, sizeof(temp_cred));
-	temp_cred.cr_ngroups = 1;
 	req.r_nmp = nmp;
 
 	for (;;) {
@@ -1637,9 +1697,14 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp, int mntflags)
 		 * But don't bother if it's a forced unmount
 		 * or if it's a dummy sec=sys context.
 		 */
-		if (!(mntflags & MNT_FORCE) && cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS) {
-			temp_cred.cr_uid = cp->gss_clnt_uid;
-			cred = kauth_cred_create(&temp_cred);
+		if (!(nmp->nm_state & NFSSTA_FORCE) && (cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS)) {
+			kauth_cred_t cred;
+			struct posix_cred temp_pcred;
+
+			bzero((caddr_t) &temp_pcred, sizeof(temp_pcred));
+			temp_pcred.cr_ngroups = 1;
+			temp_pcred.cr_uid = cp->gss_clnt_uid;
+			cred = posix_cred_create(&temp_pcred);
 			cp->gss_clnt_proc = RPCSEC_GSS_DESTROY;
 
 			error = 0;
@@ -1660,48 +1725,13 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp, int mntflags)
 		 * the reference to remove it if its
 		 * refcount is zero.
 		 */
+		lck_mtx_lock(cp->gss_clnt_mtx);
 		cp->gss_clnt_flags |= GSS_CTX_INVAL;
+		lck_mtx_unlock(cp->gss_clnt_mtx);
 		nfs_gss_clnt_ctx_unref(&req);
 	}
 }
 
-/*
- * If we get a failure in trying to establish a context we need to wait a 
- * little while to see if the server is feeling better. In our case this is
- * probably a failure in directory services not coming up in a timely fashion.
- * This routine sort of mimics receiving a jukebox error.
- */
-static int
-nfs_gss_clnt_ctx_delay(struct nfsreq *req, int *retry)
-{
-	int timeo = (1 << *retry) * NFS_TRYLATERDEL;
-	int error = 0;
-	struct nfsmount *nmp = req->r_nmp;
-	struct timeval now;
-	time_t waituntil;
-
-	if (!nmp)
-		return (ENXIO);
-	if ((nmp->nm_flag & NFSMNT_SOFT) && *retry > nmp->nm_retry)
-		return (ETIMEDOUT);
-	if (timeo > 60)
-		timeo = 60;
-
-	microuptime(&now);
-	waituntil = now.tv_sec + timeo;
-	while (now.tv_sec < waituntil) {
-		tsleep(&lbolt, PSOCK, "nfs_gss_clnt_ctx_delay", 0);
-		error = nfs_sigintr(req->r_nmp, req, current_thread(), 0);
-		if (error)
-			break;
-		microuptime(&now);
-	}
-	*retry += 1;
-
-	return (error);
-}
-
-
 #endif /* NFSCLIENT */
 
 /*************
@@ -1733,7 +1763,7 @@ nfs_gss_svc_ctx_find(uint32_t handle)
 
 	lck_mtx_lock(nfs_gss_svc_ctx_mutex);
 
-	LIST_FOREACH(cp, head, gss_svc_entries)
+	LIST_FOREACH(cp, head, gss_svc_entries) {
 		if (cp->gss_svc_handle == handle) {
 			if (timenow > cp->gss_svc_incarnation + GSS_SVC_CTX_TTL) {
 				/* 
@@ -1743,14 +1773,20 @@ nfs_gss_svc_ctx_find(uint32_t handle)
 				 */
 				cp->gss_svc_handle = 0;
 				/*
-				 * Make sure though that we stay around for GSS_CTC_PEND seconds 
+				 * Make sure though that we stay around for GSS_CTX_PEND seconds 
 				 * for other threads that might be using the context.
 				 */
 				cp->gss_svc_incarnation = timenow;
+
 				cp = NULL;
+				break;
 			}
+			lck_mtx_lock(cp->gss_svc_mtx);				
+			cp->gss_svc_refcnt++;
+			lck_mtx_unlock(cp->gss_svc_mtx);				
 			break;
 		}
+	}
 
 	lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
 
@@ -1765,10 +1801,26 @@ static void
 nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *cp)
 {
 	struct nfs_gss_svc_ctx_hashhead *head;
+	struct nfs_gss_svc_ctx *p;
 	
+	lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+
+	/*
+	 * Give the client a random handle so that if we reboot
+	 * it's unlikely the client will get a bad context match.
+	 * Make sure it's not zero or already assigned.
+	 */
+retry:
+	cp->gss_svc_handle = random();
+	if (cp->gss_svc_handle == 0)
+		goto retry;
 	head = &nfs_gss_svc_ctx_hashtbl[SVC_CTX_HASH(cp->gss_svc_handle)];
+	LIST_FOREACH(p, head, gss_svc_entries)
+		if (p->gss_svc_handle == cp->gss_svc_handle)
+			goto retry;
 
-	lck_mtx_lock(nfs_gss_svc_ctx_mutex);
+	clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC,
+		&cp->gss_svc_incarnation);
 	LIST_INSERT_HEAD(head, cp, gss_svc_entries);
 	nfs_gss_ctx_count++;
 
@@ -1776,7 +1828,7 @@ nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *cp)
 		nfs_gss_timer_on = 1;
 
 		nfs_interval_timer_start(nfs_gss_svc_ctx_timer_call,
-			min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, GSS_SVC_CTX_TTL)) * MSECS_PER_SEC);
+			min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, nfsrv_gss_context_ttl)) * MSECS_PER_SEC);
 	}
 
 	lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
@@ -1790,7 +1842,6 @@ nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *cp)
 void
 nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2)
 {
-	struct nfs_gss_svc_ctx_hashhead *head;
 	struct nfs_gss_svc_ctx *cp, *next;
 	uint64_t timenow;
 	int contexts = 0;
@@ -1801,19 +1852,17 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2)
 
 	/*
 	 * Scan all the hash chains
-	 * Assume nfs_gss_svc_ctx_mutex is held
 	 */
 	for (i = 0; i < SVC_CTX_HASHSZ; i++) {
 		/*
 		 * For each hash chain, look for entries
 		 * that haven't been used in a while.
 		 */
-		head = &nfs_gss_svc_ctx_hashtbl[i];
-		for (cp = LIST_FIRST(head); cp; cp = next) {
+		LIST_FOREACH_SAFE(cp, &nfs_gss_svc_ctx_hashtbl[i], gss_svc_entries, next) {
 			contexts++;
-			next = LIST_NEXT(cp, gss_svc_entries);
-			if (timenow  > cp->gss_svc_incarnation + 
-				(cp->gss_svc_handle ? GSS_SVC_CTX_TTL : 0)) {
+			if (timenow > cp->gss_svc_incarnation + 
+				(cp->gss_svc_handle ? GSS_SVC_CTX_TTL : 0)
+				&& cp->gss_svc_refcnt == 0) {
 				/*
 				 * A stale context - remove it
 				 */
@@ -1836,7 +1885,7 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2)
 	nfs_gss_timer_on = nfs_gss_ctx_count > 0;
 	if (nfs_gss_timer_on)
 		nfs_interval_timer_start(nfs_gss_svc_ctx_timer_call,
-			min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, GSS_SVC_CTX_TTL)) * MSECS_PER_SEC);
+			min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, nfsrv_gss_context_ttl)) * MSECS_PER_SEC);
 
 	lck_mtx_unlock(nfs_gss_svc_ctx_mutex);
 }
@@ -1921,6 +1970,8 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc)
 			error = ENOMEM;
 			goto nfsmout;
 		}
+		cp->gss_svc_mtx = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
+		cp->gss_svc_refcnt = 1;
 	} else {
 
 		/*
@@ -1944,7 +1995,7 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc)
 	ki = &cp->gss_svc_kinfo;
 
 	if (proc == RPCSEC_GSS_DATA || proc == RPCSEC_GSS_DESTROY) {
-		struct ucred temp_cred;
+		struct posix_cred temp_pcred;
 
 		if (cp->gss_svc_seqwin == 0) {
 			/*
@@ -1975,6 +2026,8 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc)
 		 */
 		nfsm_chain_get_32(error, nmc, flavor);
 		nfsm_chain_get_32(error, nmc, verflen);
+		if (error)
+			goto nfsmout;
 		if (flavor != RPCSEC_GSS || verflen != KRB5_SZ_TOKEN(ki->hash_len))
 			error = NFSERR_AUTHERR | AUTH_BADVERF;
 		nfsm_chain_get_opaque(error, nmc, verflen, tokbuf);
@@ -1997,13 +2050,13 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc)
 		/*
 		 * Set up the user's cred
 		 */
-		bzero(&temp_cred, sizeof(temp_cred));
-		temp_cred.cr_uid = cp->gss_svc_uid;
-		bcopy(cp->gss_svc_gids, temp_cred.cr_groups,
+		bzero(&temp_pcred, sizeof(temp_pcred));
+		temp_pcred.cr_uid = cp->gss_svc_uid;
+		bcopy(cp->gss_svc_gids, temp_pcred.cr_groups,
 				sizeof(gid_t) * cp->gss_svc_ngroups);
-		temp_cred.cr_ngroups = cp->gss_svc_ngroups;
+		temp_pcred.cr_ngroups = cp->gss_svc_ngroups;
 
-		nd->nd_cr = kauth_cred_create(&temp_cred);
+		nd->nd_cr = posix_cred_create(&temp_pcred);
 		if (nd->nd_cr == NULL) {
 			error = ENOMEM;
 			goto nfsmout;
@@ -2135,12 +2188,21 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc)
 		nfsm_chain_get_32(error, nmc, verflen);
 		if (error || flavor != RPCAUTH_NULL || verflen > 0)
 			error = NFSERR_AUTHERR | RPCSEC_GSS_CREDPROBLEM;
-		if (error)
+		if (error) {
+			if (proc == RPCSEC_GSS_INIT) {
+				lck_mtx_destroy(cp->gss_svc_mtx, nfs_gss_svc_grp);
+				FREE(cp, M_TEMP);
+				cp = NULL;
+			}
 			goto nfsmout;
+		}
 	}
 
 	nd->nd_gss_context = cp;
+	return 0;
 nfsmout:
+	if (cp)
+		nfs_gss_svc_ctx_deref(cp);
 	return (error);
 }
 
@@ -2341,7 +2403,6 @@ int
 nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t *mrepp)
 {
 	struct nfs_gss_svc_ctx *cp = NULL;
-	uint32_t handle = 0;
 	int error = 0;
 	int autherr = 0;
 	struct nfsm_chain *nmreq, nmrep;
@@ -2355,22 +2416,7 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t *
 
 	switch (cp->gss_svc_proc) {
 	case RPCSEC_GSS_INIT:
-		/*
-		 * Give the client a random handle so that
-		 * if we reboot it's unlikely the client
-		 * will get a bad context match.
-		 * Make sure it's not zero, or already assigned.
-		 */
-		do {
-			handle = random();
-		} while (nfs_gss_svc_ctx_find(handle) != NULL || handle == 0);
-		cp->gss_svc_handle = handle;
-		cp->gss_svc_mtx = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL);
-		clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC,
-			&cp->gss_svc_incarnation);
-
 		nfs_gss_svc_ctx_insert(cp);
-
 		/* FALLTHRU */
 
 	case RPCSEC_GSS_CONTINUE_INIT:
@@ -2502,11 +2548,11 @@ nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *cp)
 	kern_return_t kr;
 	mach_port_t mp;
 	int retry_cnt = 0;
-	byte_buffer okey = NULL;
+	gssd_byte_buffer okey = NULL;
 	uint32_t skeylen = 0;
 	uint32_t ret_flags;
 	vm_map_copy_t itoken = NULL;
-	byte_buffer otoken = NULL;
+	gssd_byte_buffer otoken = NULL;
 	mach_msg_type_number_t otokenlen;
 	int error = 0;
 	char svcname[] = "nfs";
@@ -2527,7 +2573,7 @@ nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *cp)
 retry:
 	kr = mach_gss_accept_sec_context(
 		mp,
-		(byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_svc_tokenlen,
+		(gssd_byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_svc_tokenlen,
 		svcname,
 		0,
 		&cp->gss_svc_context,
@@ -2661,6 +2707,24 @@ nfs_gss_svc_seqnum_valid(struct nfs_gss_svc_ctx *cp, uint32_t seq)
 	return (1);
 }
 
+/*
+ * Drop a reference to a context
+ *
+ * Note that it's OK for the context to exist
+ * with a refcount of zero.  The refcount isn't
+ * checked until we're about to reap an expired one.
+ */
+void
+nfs_gss_svc_ctx_deref(struct nfs_gss_svc_ctx *cp)
+{
+	lck_mtx_lock(cp->gss_svc_mtx);				
+	if (cp->gss_svc_refcnt > 0)
+		cp->gss_svc_refcnt--;
+	else
+		printf("nfs_gss_ctx_deref: zero refcount\n");
+	lck_mtx_unlock(cp->gss_svc_mtx);				
+}
+
 /*
  * Called at NFS server shutdown - destroy all contexts
  */
@@ -2713,8 +2777,8 @@ extern ipc_port_t ipc_port_copy_send(ipc_port_t);
 static void
 task_release_special_port(mach_port_t mp)
 {
-
-	ipc_port_release_send(mp);
+	if (IPC_PORT_VALID(mp))
+		ipc_port_release_send(mp);
 }
 
 static mach_port_t
diff --git a/bsd/nfs/nfs_gss.h b/bsd/nfs/nfs_gss.h
index aa6d55e96..ad056e7f2 100644
--- a/bsd/nfs/nfs_gss.h
+++ b/bsd/nfs/nfs_gss.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -50,6 +50,9 @@ enum rpcsec_gss_service {
 	RPCSEC_GSS_SVC_SYS		= 4	// sec=sys (fallback)
 };
 
+/* encoded krb5 OID */
+extern u_char krb5_mech[11];
+
 /*
  * GSS-API things
  */
@@ -111,8 +114,8 @@ struct nfs_gss_clnt_ctx {
 	mach_port_t		gss_clnt_mport;		// Mach port for gssd upcall
 	u_char			*gss_clnt_verf;		// RPC verifier from server
 	char			*gss_clnt_svcname;	// Service name e.g. "nfs/big.apple.com"
-	gss_cred		gss_clnt_cred_handle;	// Opaque cred handle from gssd
-	gss_ctx			gss_clnt_context;	// Opaque context handle from gssd
+	gssd_cred		gss_clnt_cred_handle;	// Opaque cred handle from gssd
+	gssd_ctx		gss_clnt_context;	// Opaque context handle from gssd
 	u_char			*gss_clnt_token;	// GSS token exchanged via gssd & server
 	uint32_t		gss_clnt_tokenlen;	// Length of token
 	gss_key_info		gss_clnt_kinfo;		// GSS key info
@@ -136,6 +139,7 @@ struct nfs_gss_svc_ctx {
 	lck_mtx_t		*gss_svc_mtx;
 	LIST_ENTRY(nfs_gss_svc_ctx)	gss_svc_entries;
 	uint32_t		gss_svc_handle;		// Identifies server context to client
+	uint32_t		gss_svc_refcnt;		// Reference count
 	uint32_t		gss_svc_proc;		// Current GSS proc from cred
 	uid_t			gss_svc_uid;		// UID of this user
 	gid_t			gss_svc_gids[NGROUPS];	// GIDs of this user
@@ -144,8 +148,8 @@ struct nfs_gss_svc_ctx {
 	uint32_t		gss_svc_seqmax;		// Current max GSS sequence number
 	uint32_t		gss_svc_seqwin;		// GSS sequence number window
 	uint32_t		*gss_svc_seqbits;	// Bitmap to track seq numbers
-	gss_cred		gss_svc_cred_handle;	// Opaque cred handle from gssd
-	gss_ctx			gss_svc_context;	// Opaque context handle from gssd
+	gssd_cred		gss_svc_cred_handle;	// Opaque cred handle from gssd
+	gssd_ctx			gss_svc_context;	// Opaque context handle from gssd
 	u_char			*gss_svc_token;		// GSS token exchanged via gssd & client
 	uint32_t		gss_svc_tokenlen;	// Length of token
 	gss_key_info		gss_svc_kinfo;		// Session key info
@@ -184,12 +188,13 @@ int	nfs_gss_clnt_args_restore(struct nfsreq *);
 int	nfs_gss_clnt_ctx_renew(struct nfsreq *);
 void	nfs_gss_clnt_ctx_ref(struct nfsreq *, struct nfs_gss_clnt_ctx *);
 void	nfs_gss_clnt_ctx_unref(struct nfsreq *);
-void	nfs_gss_clnt_ctx_unmount(struct nfsmount *, int);
+void	nfs_gss_clnt_ctx_unmount(struct nfsmount *);
 int	nfs_gss_svc_cred_get(struct nfsrv_descript *, struct nfsm_chain *);
 int	nfs_gss_svc_verf_put(struct nfsrv_descript *, struct nfsm_chain *);
 int	nfs_gss_svc_ctx_init(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *);
 int	nfs_gss_svc_prepare_reply(struct nfsrv_descript *, struct nfsm_chain *);
 int	nfs_gss_svc_protect_reply(struct nfsrv_descript *, mbuf_t);
+void	nfs_gss_svc_ctx_deref(struct nfs_gss_svc_ctx *);
 void	nfs_gss_svc_cleanup(void);
 
 __END_DECLS
diff --git a/bsd/nfs/nfs_lock.c b/bsd/nfs/nfs_lock.c
index 590a70619..f76a9b6d0 100644
--- a/bsd/nfs/nfs_lock.c
+++ b/bsd/nfs/nfs_lock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2008 Apple Inc.  All rights reserved.
+ * Copyright (c) 2002-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,8 +95,6 @@
 
 extern void ipc_port_release_send(ipc_port_t);
 
-#define OFF_MAX QUAD_MAX
-
 /*
  * pending lock request messages are kept in this queue which is
  * kept sorted by transaction ID (xid).
@@ -104,28 +102,8 @@ extern void ipc_port_release_send(ipc_port_t);
 static uint64_t nfs_lockxid = 0;
 static LOCKD_MSG_QUEUE nfs_pendlockq;
 
-/*
- * This structure is used to identify processes which have acquired NFS locks.
- * Knowing which processes have ever acquired locks allows us to short-circuit
- * unlock requests for processes that have never had an NFS file lock.  Thus
- * avoiding a costly and unnecessary lockd request.
- */
-struct nfs_lock_pid {
-	TAILQ_ENTRY(nfs_lock_pid)	lp_lru;		/* LRU list */
-	LIST_ENTRY(nfs_lock_pid)	lp_hash;	/* hash chain */
-	int				lp_valid;	/* valid entry? */
-	int				lp_time;	/* last time seen valid */
-	pid_t				lp_pid;		/* The process ID. */
-	struct timeval			lp_pid_start;	/* Start time of process id */
-};
-
-#define NFS_LOCK_PID_HASH_SIZE		64	// XXX tune me
-#define	NFS_LOCK_PID_HASH(pid)	\
-	(&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
-static LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
-static TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
-static u_long nfs_lock_pid_hash;
-static uint32_t nfs_lock_pid_hash_trusted;
+/* list of mounts that are (potentially) making lockd requests */
+TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
 
 static lck_grp_t *nfs_lock_lck_grp;
 static lck_mtx_t *nfs_lock_mutex;
@@ -136,7 +114,6 @@ int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
 uint64_t nfs_lockxid_get(void);
-int nfs_lock_pid_check(proc_t, int);
 int nfs_lockd_send_request(LOCKD_MSG *, int);
 
 /*
@@ -146,31 +123,40 @@ void
 nfs_lockinit(void)
 {
 	TAILQ_INIT(&nfs_pendlockq);
-	nfs_lock_pid_hash_trusted = 1;
-	nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
-					 M_TEMP, &nfs_lock_pid_hash);
-	TAILQ_INIT(&nfs_lock_pid_lru);
+	TAILQ_INIT(&nfs_lockd_mount_list);
 
 	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
 	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
 }
 
 /*
- * change the count of NFS mounts that may need to make lockd requests
+ * Register a mount as (potentially) making lockd requests.
+ */
+void
+nfs_lockd_mount_register(struct nfsmount *nmp)
+{
+	lck_mtx_lock(nfs_lock_mutex);
+	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
+	nfs_lockd_mounts++;
+	lck_mtx_unlock(nfs_lock_mutex);
+}
+
+/*
+ * Unregister a mount as (potentially) making lockd requests.
  *
- * If the mount count drops to zero, then send a shutdown request to
+ * When the lockd mount count drops to zero, then send a shutdown request to
  * lockd if we've sent any requests to it.
  */
 void
-nfs_lockd_mount_change(int i)
+nfs_lockd_mount_unregister(struct nfsmount *nmp)
 {
+	int send_shutdown;
 	mach_port_t lockd_port = IPC_PORT_NULL;
 	kern_return_t kr;
-	int send_shutdown;
 
 	lck_mtx_lock(nfs_lock_mutex);
-
-	nfs_lockd_mounts += i;
+	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
+	nfs_lockd_mounts--;
 
 	/* send a shutdown request if there are no more lockd mounts */
 	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
@@ -183,7 +169,7 @@ nfs_lockd_mount_change(int i)
 		return;
 
 	/*
-	 * Let lockd know that it is no longer need for any NFS mounts
+	 * Let lockd know that it is no longer needed for any NFS mounts
 	 */
 	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
 	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
@@ -204,7 +190,7 @@ nfs_lockd_mount_change(int i)
  * insert a lock request message into the pending queue
  * (nfs_lock_mutex must be held)
  */
-inline void
+void
 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
 {
 	LOCKD_MSG_REQUEST *mr;
@@ -230,7 +216,7 @@ nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
  * remove a lock request message from the pending queue
  * (nfs_lock_mutex must be held)
  */
-inline void
+void
 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
 {
 	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
@@ -248,7 +234,7 @@ nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
  *
  * (nfs_lock_mutex must be held)
  */
-inline LOCKD_MSG_REQUEST *
+LOCKD_MSG_REQUEST *
 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
 {
 	LOCKD_MSG_REQUEST *mr;
@@ -264,8 +250,8 @@ nfs_lockdmsg_find_by_xid(uint64_t lockxid)
 
 /*
  * Because we can't depend on nlm_granted messages containing the same
- * cookie we sent with the original lock request, we need code test if
- * an nlm_granted answer matches the lock request.  We also need code
+ * cookie we sent with the original lock request, we need code to test
+ * if an nlm_granted answer matches the lock request.  We also need code
  * that can find a lockd message based solely on the nlm_granted answer.
  */
 
@@ -274,7 +260,7 @@ nfs_lockdmsg_find_by_xid(uint64_t lockxid)
  *
  * returns 0 on equality and 1 if different
  */
-inline int
+int
 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
 {
 	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
@@ -307,7 +293,7 @@ nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp
  *
  * (nfs_lock_mutex must be held)
  */
-inline LOCKD_MSG_REQUEST *
+LOCKD_MSG_REQUEST *
 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
 {
 	LOCKD_MSG_REQUEST *mr;
@@ -325,7 +311,7 @@ nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
  * return the next unique lock request transaction ID
  * (nfs_lock_mutex must be held)
  */
-inline uint64_t
+uint64_t
 nfs_lockxid_get(void)
 {
 	LOCKD_MSG_REQUEST *mr;
@@ -359,143 +345,6 @@ nfs_lockxid_get(void)
 	return nfs_lockxid;
 }
 
-
-/*
- * Check the nfs_lock_pid hash table for an entry and, if requested,
- * add the entry if it is not found.
- *
- * (Also, if adding, try to clean up some stale entries.)
- * (nfs_lock_mutex must be held)
- */
-int
-nfs_lock_pid_check(proc_t p, int addflag)
-{
-	struct nfs_lock_pid *lp, *lplru, *lplru_next, *mlp;
-	TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_free;
-	proc_t plru = PROC_NULL;
-	pid_t pid;
-	int error = 0;
-	struct timeval now;
-
-	TAILQ_INIT(&nfs_lock_pid_free);
-	mlp = NULL;
-
-loop:
-	/* Search hash chain */
-	pid = proc_pid(p);
-	error = ENOENT;
-	lp = NFS_LOCK_PID_HASH(pid)->lh_first;
-	for (; lp != NULL; lp = lp->lp_hash.le_next)
-		if (lp->lp_pid == pid) {
-			/* found pid... */
-			if (timevalcmp(&lp->lp_pid_start, &p->p_start, ==)) {
-				/* ...and it's valid */
-				/* move to tail of LRU */
-				TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
-				microuptime(&now);
-				lp->lp_time = now.tv_sec;
-				TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
-				error = 0;
-				break;
-			}
-			/* ...but it's no longer valid */
-			/* remove from hash, invalidate, and move to lru head */
-			LIST_REMOVE(lp, lp_hash);
-			lp->lp_valid = 0;
-			TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
-			TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
-			lp = NULL;
-			break;
-		}
-
-	/* if we didn't find it (valid), use any newly allocated one */
-	if (!lp)
-		lp = mlp;
-
-	/* if we don't have an lp and we've been asked to add it */
-	if ((error == ENOENT) && addflag && !lp) {
-		/* scan lru list for invalid, stale entries to reuse/free */
-		int lrucnt = 0;
-		microuptime(&now);
-		for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
-			lplru_next = TAILQ_NEXT(lplru, lp_lru);
-			if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
-				/*
-				 * If the oldest LRU entry is relatively new, then don't
-				 * bother scanning any further.
-				 */
-				break;
-			}
-			/* remove entry from LRU, and check if it's still in use */
-			TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
-			if (!lplru->lp_valid || !(plru = proc_find(lplru->lp_pid)) ||
-			    timevalcmp(&lplru->lp_pid_start, &plru->p_start, !=)) {
-				if (plru != PROC_NULL) {
-					proc_rele(plru);
-					plru = PROC_NULL;
-				}
-				/* no longer in use */
-				LIST_REMOVE(lplru, lp_hash);
-				if (!lp) {
-					/* we'll reuse this one */
-					lp = lplru;
-				} else {
-					/* queue it up for freeing */
-					TAILQ_INSERT_HEAD(&nfs_lock_pid_free, lplru, lp_lru);
-				}
-			} else {
-				/* still in use */
-				if (plru != PROC_NULL) {
-					proc_rele(plru);
-					plru = PROC_NULL;
-				}
-				lplru->lp_time = now.tv_sec;
-				TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
-			}
-			/* don't check too many entries at once */
-			if (++lrucnt > 8)
-				break;
-		}
-		if (!lp) {
-			/* we need to allocate a new one */
-			lck_mtx_unlock(nfs_lock_mutex);
-			MALLOC(mlp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
-				M_TEMP, M_WAITOK | M_ZERO);
-			lck_mtx_lock(nfs_lock_mutex);
-			if (mlp) /* make sure somebody hasn't already added this guy */
-				goto loop;
-			error = ENOMEM;
-		}
-	}
-	if ((error == ENOENT) && addflag && lp) {
-		/* (re)initialize nfs_lock_pid info */
-		lp->lp_pid = pid;
-		lp->lp_pid_start = p->p_start;
-		/* insert pid in hash */
-		LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
-		lp->lp_valid = 1;
-		lp->lp_time = now.tv_sec;
-		TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
-		error = 0;
-	}
-
-	if ((mlp && (lp != mlp)) || TAILQ_FIRST(&nfs_lock_pid_free)) {
-		lck_mtx_unlock(nfs_lock_mutex);
-		if (mlp && (lp != mlp)) {
-			/* we didn't need this one, so we can free it */
-			FREE(mlp, M_TEMP);
-		}
-		/* free up any stale entries */
-		while ((lp = TAILQ_FIRST(&nfs_lock_pid_free))) {
-			TAILQ_REMOVE(&nfs_lock_pid_free, lp, lp_lru);
-			FREE(lp, M_TEMP);
-		}
-		lck_mtx_lock(nfs_lock_mutex);
-	}
-
-	return (error);
-}
-
 #define MACH_MAX_TRIES 3
 
 int
@@ -551,186 +400,49 @@ nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
  * NFS advisory byte-level locks (client)
  */
 int
-nfs3_vnop_advlock(
-	struct vnop_advlock_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		caddr_t a_id;
-		int a_op;
-		struct flock *a_fl;
-		int a_flags;
-		vfs_context_t a_context;
-	} */ *ap)
+nfs3_lockd_request(
+	nfsnode_t np,
+	int type,
+	LOCKD_MSG_REQUEST *msgreq,
+	int flags,
+	thread_t thd)
 {
-	vfs_context_t ctx;
-	proc_t p;
-	LOCKD_MSG_REQUEST msgreq;
-	LOCKD_MSG *msg;
-	vnode_t vp;
-	nfsnode_t np;
+	LOCKD_MSG *msg = &msgreq->lmr_msg;
 	int error, error2;
-	int interruptable, modified;
-	struct flock *fl;
+	int interruptable, slpflag;
 	struct nfsmount *nmp;
-	struct nfs_vattr nvattr;
-	off_t start, end;
 	struct timeval now;
-	int timeo, endtime, lastmsg, wentdown = 0;
-	int lockpidcheck, nfsvers;
-	struct sockaddr *saddr;
+	int timeo, starttime, endtime, lastmsg, wentdown = 0;
 	struct timespec ts;
+	struct sockaddr *saddr;
 
-	ctx = ap->a_context;
-	p = vfs_context_proc(ctx);
-	vp = ap->a_vp;
-	fl = ap->a_fl;
-	np = VTONFS(vp);
-
-	nmp = VTONMP(vp);
-	if (!nmp)
-		return (ENXIO);
-	lck_mtx_lock(&nmp->nm_lock);
-	if (nmp->nm_flag & NFSMNT_NOLOCKS) {
-		lck_mtx_unlock(&nmp->nm_lock);
-		return (ENOTSUP);
-	}
-	nfsvers = nmp->nm_vers;
-	lck_mtx_unlock(&nmp->nm_lock);
-
-	/*
-	 * The NLM protocol doesn't allow the server to return an error
-	 * on ranges, so we do it.  Pre LFS (Large File Summit)
-	 * standards required EINVAL for the range errors.  More recent
-	 * standards use EOVERFLOW, but their EINVAL wording still
-	 * encompasses these errors.
-	 * Any code sensitive to this is either:
-	 *  1) written pre-LFS and so can handle only EINVAL, or
-	 *  2) written post-LFS and thus ought to be tolerant of pre-LFS
-	 *     implementations.
-	 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
-	 */
-	if (fl->l_whence != SEEK_END) {
-		if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
-		    fl->l_start < 0 ||
-		    (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
-		    (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
-			return (EINVAL);
-	}
-
-	lck_mtx_lock(nfs_lock_mutex);
-
-	/*
-	 * Need to check if this process has successfully acquired an NFS lock before.
-	 * If not, and this is an unlock request we can simply return success here.
-	 */
-	lockpidcheck = nfs_lock_pid_check(p, 0);
-	lck_mtx_unlock(nfs_lock_mutex);
-	if (lockpidcheck) {
-		if (lockpidcheck != ENOENT)
-			return (lockpidcheck);
-		if ((ap->a_op == F_UNLCK) && nfs_lock_pid_hash_trusted)
-			return (0);
-	}
-
-	/*
-	 * The NFS Lock Manager protocol doesn't directly handle
-	 * negative lengths or SEEK_END, so we need to normalize
-	 * things here where we have all the info.
-	 * (Note: SEEK_CUR is already adjusted for at this point)
-	 */
-	/* Convert the flock structure into a start and end. */
-	switch (fl->l_whence) {
-	case SEEK_SET:
-	case SEEK_CUR:
-		/*
-		 * Caller is responsible for adding any necessary offset
-		 * to fl->l_start when SEEK_CUR is used.
-		 */
-		start = fl->l_start;
-		break;
-	case SEEK_END:
-		/* need to flush, and refetch attributes to make */
-		/* sure we have the correct end of file offset   */
-		if ((error = nfs_node_lock(np)))
-			return (error);
-		modified = (np->n_flag & NMODIFIED);
-		nfs_node_unlock(np);
-		if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1))))
-			return (error);
-		if ((error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED)))
-			return (error);
-		nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
-		start = np->n_size + fl->l_start;
-		nfs_data_unlock(np);
-		break;
-	default:
-		return (EINVAL);
-	}
-	if (fl->l_len == 0)
-		end = -1;
-	else if (fl->l_len > 0)
-		end = start + fl->l_len - 1;
-	else { /* l_len is negative */
-		end = start - 1;
-		start += fl->l_len;
-	}
-	if (start < 0)
-		return (EINVAL);
-
-	if ((nfsvers == NFS_VER2) &&
-	    ((start >= 0x80000000) || (end >= 0x80000000)))
-		return (EINVAL);
-
-	/*
-	 * Fill in the information structure.
-	 * We set all values to zero with bzero to clear
-	 * out any information in the sockaddr_storage 
-	 * and nfs_filehandle contained in msgreq so that
-	 * we will not leak extraneous information out of 
-	 * the kernel when calling up to lockd via our mig
-	 * generated routine.
-	 */
-	bzero(&msgreq, sizeof(msgreq));
-	msg = &msgreq.lmr_msg;
-	msg->lm_version = LOCKD_MSG_VERSION;
-	msg->lm_flags = 0;
-
-	msg->lm_fl = *fl;
-	msg->lm_fl.l_start = start;
-	if (end != -1)
-		msg->lm_fl.l_len = end - start + 1;
-	msg->lm_fl.l_pid = vfs_context_pid(ctx);
-
-	if (ap->a_flags & F_WAIT)
-		msg->lm_flags |= LOCKD_MSG_BLOCK;
-	if (ap->a_op == F_GETLK)
-		msg->lm_flags |= LOCKD_MSG_TEST;
-
-	nmp = VTONMP(vp);
-	if (!nmp)
+	nmp = NFSTONMP(np);
+	if (!nmp || !nmp->nm_saddr)
 		return (ENXIO);
 
 	lck_mtx_lock(&nmp->nm_lock);
-	saddr = mbuf_data(nmp->nm_nam);
+	saddr = nmp->nm_saddr;
 	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
-	msg->lm_fh_len = (nfsvers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
-	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
-	if (nfsvers == NFS_VER3)
+	if (nmp->nm_vers == NFS_VER3)
 		msg->lm_flags |= LOCKD_MSG_NFSV3;
-	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
+#if 0 /* not yet */
+	if (nmp->nm_sotype != SOCK_DGRAM)
+		msg->lm_flags |= LOCKD_MSG_TCP;
+#endif
 
 	microuptime(&now);
+	starttime = now.tv_sec;
 	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
-	interruptable = nmp->nm_flag & NFSMNT_INT;
+	interruptable = NMFLAG(nmp, INTR);
 	lck_mtx_unlock(&nmp->nm_lock);
 
 	lck_mtx_lock(nfs_lock_mutex);
 
 	/* allocate unique xid */
 	msg->lm_xid = nfs_lockxid_get();
-	nfs_lockdmsg_enqueue(&msgreq);
+	nfs_lockdmsg_enqueue(msgreq);
 
-	timeo = 2;
+	timeo = 4;
 
 	for (;;) {
 		nfs_lockd_request_sent = 1;
@@ -751,7 +463,7 @@ nfs3_vnop_advlock(
 		 * Retry if it takes too long to get a response.
 		 *
 		 * The timeout numbers were picked out of thin air... they start
-		 * at 2 and double each timeout with a max of 60 seconds.
+		 * at 4 and double each timeout with a max of 30 seconds.
 		 *
 		 * In order to maintain responsiveness, we pass a small timeout
 		 * to msleep and calculate the timeouts ourselves.  This allows
@@ -759,15 +471,18 @@ nfs3_vnop_advlock(
 		 */
 wait_for_granted:
 		error = EWOULDBLOCK;
+		slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
 		ts.tv_sec = 2;
 		ts.tv_nsec = 0;
 		microuptime(&now);
 		endtime = now.tv_sec + timeo;
 		while (now.tv_sec < endtime) {
 			error = error2 = 0;
-			if (!msgreq.lmr_answered)
-				error = msleep(&msgreq, nfs_lock_mutex, PCATCH | PUSER, "lockd", &ts);
-			if (msgreq.lmr_answered) {
+			if (!msgreq->lmr_answered) {
+				error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
+				slpflag = 0;
+			}
+			if (msgreq->lmr_answered) {
 				/*
 				 * Note: it's possible to have a lock granted at
 				 * essentially the same time that we get interrupted.
@@ -775,8 +490,8 @@ wait_for_granted:
 				 * error from this request or we might not unlock the
 				 * lock that's been granted.
 				 */
-				nmp = VTONMP(vp);
-				if ((msgreq.lmr_errno == ENOTSUP) && nmp &&
+				nmp = NFSTONMP(np);
+				if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
 				    (nmp->nm_state & NFSSTA_LOCKSWORK)) {
 					/*
 					 * We have evidence that locks work, yet lockd
@@ -797,58 +512,81 @@ wait_for_granted:
 				break;
 			/* check that we still have our mount... */
 			/* ...and that we still support locks */
-			nmp = VTONMP(vp);
-			if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) {
+			/* ...and that there isn't a recovery pending */
+			nmp = NFSTONMP(np);
+			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 				error = error2;
-				if (fl->l_type == F_UNLCK)
-					printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error);
+				if (type == F_UNLCK)
+					printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 				break;
 			}
 			lck_mtx_lock(&nmp->nm_lock);
-			if (nmp->nm_flag & NFSMNT_NOLOCKS) {
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+				/* recovery pending... return an error that'll get this operation restarted */
+				error = NFSERR_GRACE;
 				lck_mtx_unlock(&nmp->nm_lock);
 				break;
 			}
-			interruptable = nmp->nm_flag & NFSMNT_INT;
+			interruptable = NMFLAG(nmp, INTR);
 			lck_mtx_unlock(&nmp->nm_lock);
 			microuptime(&now);
 		}
 		if (error) {
 			/* check that we still have our mount... */
-			nmp = VTONMP(vp);
-			if ((error2 = nfs_sigintr(nmp, NULL, vfs_context_thread(ctx), 0))) {
+			nmp = NFSTONMP(np);
+			if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
 				error = error2;
 				if (error2 != EINTR) {
-					if (fl->l_type == F_UNLCK)
-						printf("nfs_vnop_advlock: aborting unlock request, error %d\n", error);
+					if (type == F_UNLCK)
+						printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
 					break;
 				}
 			}
 			/* ...and that we still support locks */
 			lck_mtx_lock(&nmp->nm_lock);
-			if (nmp->nm_flag & NFSMNT_NOLOCKS) {
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 				if (error == EWOULDBLOCK)
 					error = ENOTSUP;
 				lck_mtx_unlock(&nmp->nm_lock);
 				break;
 			}
-			interruptable = nmp->nm_flag & NFSMNT_INT;
-			if (error != EWOULDBLOCK) {
+			/* ...and that there isn't a recovery pending */
+			if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+				/* recovery pending... return to allow recovery to occur */
+				error = NFSERR_DENIED;
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			interruptable = NMFLAG(nmp, INTR);
+			if ((error != EWOULDBLOCK) ||
+			    ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
+			    ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
+				if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
+					/* give up if this is for recovery and taking too long */
+					error = ETIMEDOUT;
+				} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
+					/* recovery pending... return an error that'll get this operation restarted */
+					error = NFSERR_GRACE;
+				}
 				lck_mtx_unlock(&nmp->nm_lock);
 				/*
 				 * We're going to bail on this request.
 				 * If we were a blocked lock request, send a cancel.
 				 */
-				if ((msgreq.lmr_errno == EINPROGRESS) &&
+				if ((msgreq->lmr_errno == EINPROGRESS) &&
 				    !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
 					/* set this request up as a cancel */
 					msg->lm_flags |= LOCKD_MSG_CANCEL;
-					nfs_lockdmsg_dequeue(&msgreq);
+					nfs_lockdmsg_dequeue(msgreq);
 					msg->lm_xid = nfs_lockxid_get();
-					nfs_lockdmsg_enqueue(&msgreq);
-					msgreq.lmr_saved_errno = error;
-					msgreq.lmr_errno = 0;
-					msgreq.lmr_answered = 0;
+					nfs_lockdmsg_enqueue(msgreq);
+					msgreq->lmr_saved_errno = error;
+					msgreq->lmr_errno = 0;
+					msgreq->lmr_answered = 0;
 					/* reset timeout */
 					timeo = 2;
 					/* send cancel request */
@@ -859,18 +597,18 @@ wait_for_granted:
 
 			/* warn if we're not getting any response */
 			microuptime(&now);
-			if ((msgreq.lmr_errno != EINPROGRESS) &&
+			if ((msgreq->lmr_errno != EINPROGRESS) &&
 			    !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
 			    (nmp->nm_tprintf_initial_delay != 0) &&
 			    ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
 				lck_mtx_unlock(&nmp->nm_lock);
 				lastmsg = now.tv_sec;
-				nfs_down(nmp, vfs_context_thread(ctx), 0, NFSSTA_LOCKTIMEO, "lockd not responding");
+				nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
 				wentdown = 1;
 			} else
 				lck_mtx_unlock(&nmp->nm_lock);
 
-			if (msgreq.lmr_errno == EINPROGRESS) {
+			if (msgreq->lmr_errno == EINPROGRESS) {
 				/*
 				 * We've got a blocked lock request that we are
 				 * going to retry.  First, we'll want to try to
@@ -883,95 +621,63 @@ wait_for_granted:
 				 * it is NLM_BLOCKED).
 				 */
 				msg->lm_flags |= LOCKD_MSG_CANCEL;
-				nfs_lockdmsg_dequeue(&msgreq);
+				nfs_lockdmsg_dequeue(msgreq);
 				msg->lm_xid = nfs_lockxid_get();
-				nfs_lockdmsg_enqueue(&msgreq);
-				msgreq.lmr_saved_errno = msgreq.lmr_errno;
-				msgreq.lmr_errno = 0;
-				msgreq.lmr_answered = 0;
+				nfs_lockdmsg_enqueue(msgreq);
+				msgreq->lmr_saved_errno = msgreq->lmr_errno;
+				msgreq->lmr_errno = 0;
+				msgreq->lmr_answered = 0;
 				timeo = 2;
 				/* send cancel then resend request */
 				continue;
 			}
 
-			if (msg->lm_flags & LOCKD_MSG_DENIED_GRACE) {
-				/*
-				 * Time to resend a request previously denied due to a grace period.
-				 */
-				msg->lm_flags &= ~LOCKD_MSG_DENIED_GRACE;
-				nfs_lockdmsg_dequeue(&msgreq);
-				msg->lm_xid = nfs_lockxid_get();
-				nfs_lockdmsg_enqueue(&msgreq);
-				msgreq.lmr_saved_errno = 0;
-				msgreq.lmr_errno = 0;
-				msgreq.lmr_answered = 0;
-				timeo = 2;
-				/* resend request */
-				continue;
-			}
-
 			/*
 			 * We timed out, so we will resend the request.
 			 */
-			timeo *= 2;
-			if (timeo > 60)
-				timeo = 60;
+			if (!(flags & R_RECOVER))
+				timeo *= 2;
+			if (timeo > 30)
+				timeo = 30;
 			/* resend request */
 			continue;
 		}
 
 		/* we got a reponse, so the server's lockd is OK */
-		nfs_up(VTONMP(vp), vfs_context_thread(ctx), NFSSTA_LOCKTIMEO,
+		nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
 			wentdown ? "lockd alive again" : NULL);
 		wentdown = 0;
 
-		if (msgreq.lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
+		if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
 			/*
 			 * The lock request was denied because the server lockd is
 			 * still in its grace period.  So, we need to try the
-			 * request again in a little bit.
+			 * request again in a little bit.  Return the GRACE error so
+			 * the higher levels can perform the retry.
 			 */
-			timeo = 4;
-			msgreq.lmr_answered = 0;
-			goto wait_for_granted;
+			msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
 		}
 
-		if (msgreq.lmr_errno == EINPROGRESS) {
+		if (msgreq->lmr_errno == EINPROGRESS) {
 			/* got NLM_BLOCKED response */
 			/* need to wait for NLM_GRANTED */
-			timeo = 60;
-			msgreq.lmr_answered = 0;
+			timeo = 30;
+			msgreq->lmr_answered = 0;
 			goto wait_for_granted;
 		}
 
 		if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
-		    (msgreq.lmr_saved_errno == EINPROGRESS)) {
+		    (msgreq->lmr_saved_errno == EINPROGRESS)) {
 			/*
 			 * We just got a successful reply to the
 			 * cancel of the previous blocked lock request.
-			 * Now, go ahead and resend the request.
+			 * Now, go ahead and return a DENIED error so the
+			 * higher levels can resend the request.
 			 */
 			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
-			nfs_lockdmsg_dequeue(&msgreq);
-			msg->lm_xid = nfs_lockxid_get();
-			nfs_lockdmsg_enqueue(&msgreq);
-			msgreq.lmr_saved_errno = 0;
-			msgreq.lmr_errno = 0;
-			msgreq.lmr_answered = 0;
-			timeo = 2;
-			/* resend request */
-			continue;
-		}
-
-		if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
-			if (msg->lm_fl.l_type != F_UNLCK) {
-				fl->l_type = msg->lm_fl.l_type;
-				fl->l_pid = msg->lm_fl.l_pid;
-				fl->l_start = msg->lm_fl.l_start;
-				fl->l_len = msg->lm_fl.l_len;
-				fl->l_whence = SEEK_SET;
-			} else
-				fl->l_type = F_UNLCK;
+			nfs_lockdmsg_dequeue(msgreq);
+			error = NFSERR_DENIED;
+			break;
 		}
 
 		/*
@@ -981,11 +687,12 @@ wait_for_granted:
 		 */
 		if (msg->lm_flags & LOCKD_MSG_CANCEL) {
 			msg->lm_flags &= ~LOCKD_MSG_CANCEL;
-			error = msgreq.lmr_saved_errno;
-		} else
-			error = msgreq.lmr_errno;
+			error = msgreq->lmr_saved_errno;
+		} else {
+			error = msgreq->lmr_errno;
+		}
 
-		nmp = VTONMP(vp);
+		nmp = NFSTONMP(np);
 		if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
 			/*
 			 * We have NO evidence that locks work and lockd
@@ -993,12 +700,18 @@ wait_for_granted:
 			 * that locks aren't supported and disable them
 			 * for this mount.
 			 */
+			nfs_lockdmsg_dequeue(msgreq);
+			lck_mtx_unlock(nfs_lock_mutex);
 			lck_mtx_lock(&nmp->nm_lock);
-			nmp->nm_flag |= NFSMNT_NOLOCKS;
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
+				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
+				nfs_lockd_mount_unregister(nmp);
+			}
 			nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 			lck_mtx_unlock(&nmp->nm_lock);
 			printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
 				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+			return (error);
 		}
 		if (!error) {
 			/* record that NFS file locking has worked on this mount */
@@ -1008,35 +721,162 @@ wait_for_granted:
 					nmp->nm_state |= NFSSTA_LOCKSWORK;
 				lck_mtx_unlock(&nmp->nm_lock);
 			}
-			/*
-			 * If we successfully acquired a lock, make sure this pid
-			 * is in the nfs_lock_pid hash table so we know we can't
-			 * short-circuit unlock requests.
-			 */
-			if ((lockpidcheck == ENOENT) &&
-			    ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW))) {
-				error = nfs_lock_pid_check(p, 1);
-				if (error) {
-					/*
-					 * We couldn't add the pid to the table,
-					 * so we can no longer trust that a pid
-					 * not in the table has no locks.
-					 */
-					nfs_lock_pid_hash_trusted = 0;
-					printf("nfs_vnop_advlock: pid add failed - no longer trusted\n");
-				}
-			}
 		}
 		break;
 	}
 
-	nfs_lockdmsg_dequeue(&msgreq);
+	nfs_lockdmsg_dequeue(msgreq);
 
 	lck_mtx_unlock(nfs_lock_mutex);
 
 	return (error);
 }
 
+/*
+ * Send an NLM LOCK message to the server
+ */
+int
+nfs3_setlock_rpc(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	struct nfs_file_lock *nflp,
+	int reclaim,
+	int flags,
+	thread_t thd,
+	kauth_cred_t cred)
+{
+	struct nfs_lock_owner *nlop = nflp->nfl_owner;
+	struct nfsmount *nmp;
+	int error;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	if (!nlop->nlo_open_owner) {
+		nfs_open_owner_ref(nofp->nof_owner);
+		nlop->nlo_open_owner = nofp->nof_owner;
+	}
+	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
+		return (error);
+
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
+		msg->lm_flags |= LOCKD_MSG_BLOCK;
+	if (reclaim)
+		msg->lm_flags |= LOCKD_MSG_RECLAIM;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(cred, &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = nflp->nfl_start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
+	msg->lm_fl.l_type = nflp->nfl_type;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
+
+	nfs_lock_owner_clear_busy(nlop);
+	return (error);
+}
+
+/*
+ * Send an NLM UNLOCK message to the server
+ */
+int
+nfs3_unlock_rpc(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	__unused int type,
+	uint64_t start,
+	uint64_t end,
+	int flags,
+	thread_t thd,
+	kauth_cred_t cred)
+{
+	struct nfsmount *nmp;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(cred, &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
+	msg->lm_fl.l_type = F_UNLCK;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
+}
+
+/*
+ * Send an NLM LOCK TEST message to the server
+ */
+int
+nfs3_getlock_rpc(
+	nfsnode_t np,
+	struct nfs_lock_owner *nlop,
+	struct flock *fl,
+	uint64_t start,
+	uint64_t end,
+	vfs_context_t ctx)
+{
+	struct nfsmount *nmp;
+	int error;
+	LOCKD_MSG_REQUEST msgreq;
+	LOCKD_MSG *msg;
+
+	nmp = NFSTONMP(np);
+	if (!nmp)
+		return (ENXIO);
+
+	/* set up lock message request structure */
+	bzero(&msgreq, sizeof(msgreq));
+	msg = &msgreq.lmr_msg;
+	msg->lm_version = LOCKD_MSG_VERSION;
+	msg->lm_flags |= LOCKD_MSG_TEST;
+	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
+	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
+	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
+
+	msg->lm_fl.l_whence = SEEK_SET;
+	msg->lm_fl.l_start = start;
+	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
+	msg->lm_fl.l_type = fl->l_type;
+	msg->lm_fl.l_pid = nlop->nlo_pid;
+
+	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
+
+	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
+		if (msg->lm_fl.l_type != F_UNLCK) {
+			fl->l_type = msg->lm_fl.l_type;
+			fl->l_pid = msg->lm_fl.l_pid;
+			fl->l_start = msg->lm_fl.l_start;
+			fl->l_len = msg->lm_fl.l_len;
+			fl->l_whence = SEEK_SET;
+		} else
+			fl->l_type = F_UNLCK;
+	}
+
+	return (error);
+}
+
 /*
  * nfslockdans --
  *      NFS advisory byte-level locks answer from the lock daemon.
@@ -1105,3 +945,58 @@ nfslockdans(proc_t p, struct lockd_ans *ansp)
 	return (0);
 }
 
+/*
+ * nfslockdnotify --
+ *      NFS host restart notification from the lock daemon.
+ *
+ * Used to initiate reclaiming of held locks when a server we
+ * have mounted reboots.
+ */
+int
+nfslockdnotify(proc_t p, user_addr_t argp)
+{
+	int error, i, headsize;
+	struct lockd_notify ln;
+	struct nfsmount *nmp;
+	struct sockaddr *saddr;
+
+	/* Let root make this call. */
+	error = proc_suser(p);
+	if (error)
+		return (error);
+
+	headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
+	error = copyin(argp, &ln, headsize);
+	if (error)
+		return (error);
+	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
+		return (EINVAL);
+	if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128))
+		return (EINVAL);
+	argp += headsize;
+	saddr = (struct sockaddr *)&ln.ln_addr[0];
+
+	lck_mtx_lock(nfs_lock_mutex);
+
+	for (i=0; i < ln.ln_addrcount; i++) {
+		error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
+		if (error)
+			break;
+		argp += sizeof(ln.ln_addr[0]);
+		/* scan lockd mount list for match to this address */
+		TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
+			/* check if address matches this mount's server address */
+			if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
+				continue;
+			/* We have a match!  Mark it as needing recovery. */
+			lck_mtx_lock(&nmp->nm_lock);
+			nfs_need_recover(nmp, 0);
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
+	}
+
+	lck_mtx_unlock(nfs_lock_mutex);
+
+	return (error);
+}
+
diff --git a/bsd/nfs/nfs_lock.h b/bsd/nfs/nfs_lock.h
index 7bd4e91a8..5a5efe3e4 100644
--- a/bsd/nfs/nfs_lock.h
+++ b/bsd/nfs/nfs_lock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2008 Apple Inc.  All rights reserved.
+ * Copyright (c) 2002-2010 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -91,6 +91,8 @@ typedef struct nfs_lock_msg {
 #define LOCKD_MSG_NFSV3		0x0004  /* NFSv3 request */
 #define LOCKD_MSG_CANCEL	0x0008  /* cancelling blocked request */
 #define LOCKD_MSG_DENIED_GRACE	0x0010	/* lock denied due to grace period */
+#define LOCKD_MSG_RECLAIM	0x0020  /* lock reclaim request */
+#define LOCKD_MSG_TCP		0x0040  /* (try to) use TCP for request */
 
 /* The structure used to maintain the pending request queue */
 typedef struct nfs_lock_msg_request {
@@ -128,11 +130,26 @@ struct lockd_ans {
 #define LOCKD_ANS_DENIED_GRACE	0x0008	/* lock denied due to grace period */
 
 
+/*
+ * The structure that lockd hands the kernel for each notify.
+ */
+#define LOCKD_NOTIFY_VERSION	1
+struct lockd_notify {
+	int			ln_version;		/* lockd_notify version */
+	int			ln_flags;		/* notify flags */
+	int			ln_pad;			/* (for alignment) */
+	int			ln_addrcount;		/* # of addresss */
+	struct sockaddr_storage	ln_addr[1];		/* List of addresses. */
+};
+
+
 #ifdef KERNEL
 void	nfs_lockinit(void);
-void	nfs_lockd_mount_change(int);
-int	nfs3_vnop_advlock(struct vnop_advlock_args *ap);
+void	nfs_lockd_mount_register(struct nfsmount *);
+void	nfs_lockd_mount_unregister(struct nfsmount *);
+int	nfs3_lockd_request(nfsnode_t, int, LOCKD_MSG_REQUEST *, int, thread_t);
 int	nfslockdans(proc_t p, struct lockd_ans *ansp);
+int	nfslockdnotify(proc_t p, user_addr_t argp);
 
 #endif
 #endif /* __APPLE_API_PRIVATE */
diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c
index 7d1926787..b3f2a47b9 100644
--- a/bsd/nfs/nfs_node.c
+++ b/bsd/nfs/nfs_node.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -75,6 +75,7 @@
 #include <sys/vnode.h>
 #include <sys/ubc.h>
 #include <sys/malloc.h>
+#include <sys/fcntl.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
@@ -145,6 +146,7 @@ nfs_nget(
 	int fhsize,
 	struct nfs_vattr *nvap,
 	u_int64_t *xidp,
+	uint32_t auth,
 	int flags,
 	nfsnode_t *npp)
 {
@@ -175,6 +177,21 @@ loop:
 		if (mp != mp2 || np->n_fhsize != fhsize ||
 		    bcmp(fhp, np->n_fhp, fhsize))
 			continue;
+		if (nvap && (nvap->nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) &&
+		    cnp && (cnp->cn_namelen > (fhsize - (int)sizeof(dnp)))) {
+			/* The name was too long to fit in the file handle.  Check it against the node's name. */
+			int namecmp = 0;
+			const char *vname = vnode_getname(NFSTOV(np));
+			if (vname) {
+				if (cnp->cn_namelen != (int)strlen(vname))
+					namecmp = 1;
+				else
+					namecmp = strncmp(vname, cnp->cn_nameptr, cnp->cn_namelen);
+				vnode_putname(vname);
+			}
+			if (namecmp)  /* full name didn't match */
+				continue;
+		}
 		FSDBG(263, dnp, np, np->n_flag, 0xcace0000);
 		/* if the node is locked, sleep on it */
 		if ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE)) {
@@ -246,10 +263,21 @@ loop:
 	bzero(np, sizeof *np);
 	np->n_hflag |= (NHINIT | NHLOCKED);
 	np->n_mount = mp;
+	np->n_auth = auth;
 	TAILQ_INIT(&np->n_opens);
 	TAILQ_INIT(&np->n_lock_owners);
 	TAILQ_INIT(&np->n_locks);
 	np->n_dlink.tqe_next = NFSNOLIST;
+	np->n_dreturn.tqe_next = NFSNOLIST;
+	np->n_monlink.le_next = NFSNOLIST;
+
+	/* ugh... need to keep track of ".zfs" directories to workaround server bugs */
+	if ((nvap->nva_type == VDIR) && cnp && (cnp->cn_namelen == 4) &&
+	    (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == 'z') &&
+	    (cnp->cn_nameptr[2] == 'f') && (cnp->cn_nameptr[3] == 's'))
+		np->n_flag |= NISDOTZFS;
+	if (dnp && (dnp->n_flag & NISDOTZFS))
+		np->n_flag |= NISDOTZFSCHILD;
 
 	if (dnp && cnp && ((cnp->cn_namelen != 2) ||
 	    (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) {
@@ -293,6 +321,8 @@ loop:
 	lck_mtx_unlock(nfs_node_hash_mutex);
 
 	/* do initial loading of attributes */
+	NACLINVALIDATE(np);
+	NACCESSINVALIDATE(np);
 	error = nfs_loadattrcache(np, nvap, xidp, 1);
 	if (error) {
 		FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
@@ -325,7 +355,6 @@ loop:
 	NFS_CHANGED_UPDATE(nfsvers, np, nvap);
 	if (nvap->nva_type == VDIR)
 		NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
-	NMODEINVALIDATE(np);
 
 	/* now, attempt to get a new vnode */
 	vfsp.vnfs_mp = mp;
@@ -363,7 +392,21 @@ loop:
 	if (!dnp || !cnp || !(flags & NG_MAKEENTRY))
 		vfsp.vnfs_flags |= VNFS_NOCACHE;
 
-	error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode);
+#if CONFIG_TRIGGERS
+	if ((nfsvers >= NFS_VER4) && (nvap->nva_type == VDIR) && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+		struct vnode_trigger_param vtp;
+		bzero(&vtp, sizeof(vtp));
+		bcopy(&vfsp, &vtp.vnt_params, sizeof(vfsp));
+		vtp.vnt_resolve_func = nfs_mirror_mount_trigger_resolve;
+		vtp.vnt_unresolve_func = nfs_mirror_mount_trigger_unresolve;
+		vtp.vnt_rearm_func = nfs_mirror_mount_trigger_rearm;
+		vtp.vnt_flags = VNT_AUTO_REARM;
+		error = vnode_create(VNCREATE_TRIGGER, VNCREATE_TRIGGER_SIZE, &vtp, &np->n_vnode);
+	} else
+#endif
+	{
+		error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode);
+	}
 	if (error) {
 		FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
 		nfs_node_unlock(np);
@@ -425,57 +468,58 @@ nfs_vnop_inactive(ap)
 	nfsnode_t np = VTONFS(ap->a_vp);
 	struct nfs_sillyrename *nsp;
 	struct nfs_vattr nvattr;
-	int unhash, attrerr, busyerror, error, inuse, busied;
+	int unhash, attrerr, busyerror, error, inuse, busied, force;
 	struct nfs_open_file *nofp;
-	const char *vname = NULL;
 	struct componentname cn;
 	struct nfsmount *nmp = NFSTONMP(np);
+	mount_t mp = vnode_mount(vp);
 
 restart:
+	force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
 	error = 0;
-	inuse = ((nmp->nm_vers >= NFS_VER4) && (nfs_mount_state_in_use_start(nmp) == 0));
+	inuse = (nfs_mount_state_in_use_start(nmp, NULL) == 0);
 
 	/* There shouldn't be any open or lock state at this point */
 	lck_mtx_lock(&np->n_openlock);
-	if (np->n_openrefcnt) {
-		vname = vnode_getname(vp);
-		printf("nfs_vnop_inactive: still open: %d %s\n", np->n_openrefcnt, vname ? vname : "//");
-	}
+	if (np->n_openrefcnt && !force)
+		NP(np, "nfs_vnop_inactive: still open: %d", np->n_openrefcnt);
 	TAILQ_FOREACH(nofp, &np->n_opens, nof_link) {
 		lck_mtx_lock(&nofp->nof_lock);
 		if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_inactive: open file busy: %s\n", vname ? vname : "//");
+			if (!force)
+				NP(np, "nfs_vnop_inactive: open file busy");
 			busied = 0;
 		} else {
 			nofp->nof_flags |= NFS_OPEN_FILE_BUSY;
 			busied = 1;
 		}
 		lck_mtx_unlock(&nofp->nof_lock);
+		if ((np->n_flag & NREVOKE) || (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+			if (busied)
+				nfs_open_file_clear_busy(nofp);
+			continue;
+		}
 		/*
 		 * If we just created the file, we already had it open in
 		 * anticipation of getting a subsequent open call.  If the
 		 * node has gone inactive without being open, we need to
 		 * clean up (close) the open done in the create.
 		 */
-		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && nofp->nof_creator) {
+		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && nofp->nof_creator && !force) {
 			if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
 				lck_mtx_unlock(&np->n_openlock);
 				if (busied)
 					nfs_open_file_clear_busy(nofp);
 				if (inuse)
 					nfs_mount_state_in_use_end(nmp, 0);
-				nfs4_reopen(nofp, vfs_context_thread(ctx));
-				goto restart;
+				if (!nfs4_reopen(nofp, NULL))
+					goto restart;
 			}
 			nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
 			lck_mtx_unlock(&np->n_openlock);
-			error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
+			error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
 			if (error) {
-				if (!vname)
-					vname = vnode_getname(vp);
-				printf("nfs_vnop_inactive: create close error: %d, %s\n", error, vname);
+				NP(np, "nfs_vnop_inactive: create close error: %d", error);
 				nofp->nof_flags |= NFS_OPEN_FILE_CREATE;
 			}
 			if (busied)
@@ -495,21 +539,19 @@ restart:
 				nofp->nof_r--;
 				nofp->nof_opencnt--;
 				nofp->nof_access = 0;
-			} else {
+			} else if (!force) {
 				lck_mtx_unlock(&np->n_openlock);
 				if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) {
 					if (busied)
 						nfs_open_file_clear_busy(nofp);
 					if (inuse)
 						nfs_mount_state_in_use_end(nmp, 0);
-					nfs4_reopen(nofp, vfs_context_thread(ctx));
-					goto restart;
+					if (!nfs4_reopen(nofp, NULL))
+						goto restart;
 				}
-				error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
+				error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx);
 				if (error) {
-					if (!vname)
-						vname = vnode_getname(vp);
-					printf("nfs_vnop_inactive: need close error: %d, %s\n", error, vname);
+					NP(np, "nfs_vnop_inactive: need close error: %d", error);
 					nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE;
 				}
 				if (busied)
@@ -519,32 +561,33 @@ restart:
 				goto restart;
 			}
 		}
-		if (nofp->nof_opencnt) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_inactive: file still open: %d %s\n", nofp->nof_opencnt, vname ? vname : "//");
-		}
-		if (nofp->nof_access || nofp->nof_deny ||
+		if (nofp->nof_opencnt && !force)
+			NP(np, "nfs_vnop_inactive: file still open: %d", nofp->nof_opencnt);
+		if (!force && (nofp->nof_access || nofp->nof_deny ||
 		    nofp->nof_mmap_access || nofp->nof_mmap_deny ||
 		    nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
 		    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
-		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_inactive: non-zero access: %d %d %d %d # %u %u %u dw %u %u %u drw %u %u %u %s\n",
+		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw ||
+		    nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw ||
+		    nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw ||
+		    nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) {
+			NP(np, "nfs_vnop_inactive: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u",
 				nofp->nof_access, nofp->nof_deny,
 				nofp->nof_mmap_access, nofp->nof_mmap_deny,
-				nofp->nof_r, nofp->nof_w, nofp->nof_rw,
-				nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw,
-				nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw,
-				vname ? vname : "//");
+				nofp->nof_r, nofp->nof_d_r,
+				nofp->nof_w, nofp->nof_d_w,
+				nofp->nof_rw, nofp->nof_d_rw,
+				nofp->nof_r_dw, nofp->nof_d_r_dw,
+				nofp->nof_w_dw, nofp->nof_d_w_dw,
+				nofp->nof_rw_dw, nofp->nof_d_rw_dw,
+				nofp->nof_r_drw, nofp->nof_d_r_drw,
+				nofp->nof_w_drw, nofp->nof_d_w_drw,
+				nofp->nof_rw_drw, nofp->nof_d_rw_drw);
 		}
 		if (busied)
 			nfs_open_file_clear_busy(nofp);
 	}
 	lck_mtx_unlock(&np->n_openlock);
-	if (vname)
-		vnode_putname(vname);
 
 	if (inuse && nfs_mount_state_in_use_end(nmp, error))
 		goto restart;
@@ -673,42 +716,59 @@ nfs_vnop_reclaim(ap)
 	struct nfs_open_file *nofp, *nextnofp;
 	struct nfs_file_lock *nflp, *nextnflp;
 	struct nfs_lock_owner *nlop, *nextnlop;
-	const char *vname = NULL;
 	struct nfsmount *nmp = np->n_mount ? VFSTONFS(np->n_mount) : NFSTONMP(np);
+	mount_t mp = vnode_mount(vp);
+	int force;
 
 	FSDBG_TOP(265, vp, np, np->n_flag, 0);
+	force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
 
 	/* There shouldn't be any open or lock state at this point */
 	lck_mtx_lock(&np->n_openlock);
 
 	if (nmp && (nmp->nm_vers >= NFS_VER4)) {
 		/* need to drop a delegation */
+		if (np->n_dreturn.tqe_next != NFSNOLIST) {
+			/* remove this node from the delegation return list */
+			lck_mtx_lock(&nmp->nm_lock);
+			if (np->n_dreturn.tqe_next != NFSNOLIST) {
+				TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
+				np->n_dreturn.tqe_next = NFSNOLIST;
+			}
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
 		if (np->n_dlink.tqe_next != NFSNOLIST) {
-			/* remove this node from the recall list */
+			/* remove this node from the delegation list */
 			lck_mtx_lock(&nmp->nm_lock);
 			if (np->n_dlink.tqe_next != NFSNOLIST) {
-				TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink);
+				TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
 				np->n_dlink.tqe_next = NFSNOLIST;
 			}
 			lck_mtx_unlock(&nmp->nm_lock);
 		}
-		if (np->n_openflags & N_DELEG_MASK) {
+		if ((np->n_openflags & N_DELEG_MASK) && !force) {
+			/* try to return the delegation */
 			np->n_openflags &= ~N_DELEG_MASK;
 			nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid,
-				vfs_context_thread(ctx), vfs_context_ucred(ctx));
+				R_RECOVER, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+		}
+		if (np->n_attrdirfh) {
+			FREE(np->n_attrdirfh, M_TEMP);
+			np->n_attrdirfh = NULL;
 		}
 	}
 
 	/* clean up file locks */
 	TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) {
-		if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD)) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_reclaim: lock 0x%llx 0x%llx 0x%x (bc %d) %s\n",
-				nflp->nfl_start, nflp->nfl_end, nflp->nfl_flags,
-				nflp->nfl_blockcnt, vname ? vname : "//");
+		if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !force) {
+			NP(np, "nfs_vnop_reclaim: lock 0x%llx 0x%llx 0x%x (bc %d)",
+				nflp->nfl_start, nflp->nfl_end, nflp->nfl_flags, nflp->nfl_blockcnt);
 		}
-		if (!(nflp->nfl_flags & NFS_FILE_LOCK_BLOCKED)) {
+		if (!(nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD))) {
+			/* try sending an unlock RPC if it wasn't delegated */
+			if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED) && !force)
+				nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER,
+					NULL, nflp->nfl_owner->nlo_open_owner->noo_cred);
 			lck_mtx_lock(&nflp->nfl_owner->nlo_lock);
 			TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink);
 			lck_mtx_unlock(&nflp->nfl_owner->nlo_lock);
@@ -718,72 +778,79 @@ nfs_vnop_reclaim(ap)
 	}
 	/* clean up lock owners */
 	TAILQ_FOREACH_SAFE(nlop, &np->n_lock_owners, nlo_link, nextnlop) {
-		if (!TAILQ_EMPTY(&nlop->nlo_locks)) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_reclaim: lock owner with locks %s\n",
-				vname ? vname : "//");
-		}
+		if (!TAILQ_EMPTY(&nlop->nlo_locks) && !force)
+			NP(np, "nfs_vnop_reclaim: lock owner with locks");
 		TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link);
 		nfs_lock_owner_destroy(nlop);
 	}
 	/* clean up open state */
-	if (np->n_openrefcnt) {
-		if (!vname)
-			vname = vnode_getname(vp);
-		printf("nfs_vnop_reclaim: still open: %d %s\n",
-			np->n_openrefcnt, vname ? vname : "//");
-	}
+	if (np->n_openrefcnt && !force)
+		NP(np, "nfs_vnop_reclaim: still open: %d", np->n_openrefcnt);
 	TAILQ_FOREACH_SAFE(nofp, &np->n_opens, nof_link, nextnofp) {
-		if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_reclaim: open file busy: %s\n",
-				vname ? vname : "//");
-		}
-		if (nofp->nof_opencnt) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_reclaim: file still open: %d %s\n",
-				nofp->nof_opencnt, vname ? vname : "//");
-		}
-		if (nofp->nof_access || nofp->nof_deny ||
-		    nofp->nof_mmap_access || nofp->nof_mmap_deny ||
-		    nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
-		    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
-		    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) {
-			if (!vname)
-				vname = vnode_getname(vp);
-			printf("nfs_vnop_reclaim: non-zero access: %d %d %d %d # %u %u %u dw %u %u %u drw %u %u %u %s\n",
-				nofp->nof_access, nofp->nof_deny,
-				nofp->nof_mmap_access, nofp->nof_mmap_deny,
-				nofp->nof_r, nofp->nof_w, nofp->nof_rw,
-				nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw,
-				nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw,
-				vname ? vname : "//");
+		if (nofp->nof_flags & NFS_OPEN_FILE_BUSY)
+			NP(np, "nfs_vnop_reclaim: open file busy");
+		if (!(np->n_flag & NREVOKE) && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+			if (nofp->nof_opencnt && !force)
+				NP(np, "nfs_vnop_reclaim: file still open: %d", nofp->nof_opencnt);
+			if (!force && (nofp->nof_access || nofp->nof_deny ||
+			    nofp->nof_mmap_access || nofp->nof_mmap_deny ||
+			    nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
+			    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
+			    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw ||
+			    nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw ||
+			    nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw ||
+			    nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) {
+				NP(np, "nfs_vnop_reclaim: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u",
+					nofp->nof_access, nofp->nof_deny,
+					nofp->nof_mmap_access, nofp->nof_mmap_deny,
+					nofp->nof_r, nofp->nof_d_r,
+					nofp->nof_w, nofp->nof_d_w,
+					nofp->nof_rw, nofp->nof_d_rw,
+					nofp->nof_r_dw, nofp->nof_d_r_dw,
+					nofp->nof_w_dw, nofp->nof_d_w_dw,
+					nofp->nof_rw_dw, nofp->nof_d_rw_dw,
+					nofp->nof_r_drw, nofp->nof_d_r_drw,
+					nofp->nof_w_drw, nofp->nof_d_w_drw,
+					nofp->nof_rw_drw, nofp->nof_d_rw_drw);
+				/* try sending a close RPC if it wasn't delegated */
+				if (nofp->nof_r || nofp->nof_w || nofp->nof_rw ||
+				    nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw ||
+				    nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw)
+					nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER);
+			}
 		}
 		TAILQ_REMOVE(&np->n_opens, nofp, nof_link);
 		nfs_open_file_destroy(nofp);
 	}
 	lck_mtx_unlock(&np->n_openlock);
 
-	lck_mtx_lock(nfs_buf_mutex);
-	if (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd)) {
-		if (!vname)
-			vname = vnode_getname(vp);
-		printf("nfs_reclaim: dropping %s buffers for file %s\n",
-			(!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean"),
-			(vname ? vname : "//"));
+	if (np->n_monlink.le_next != NFSNOLIST) {
+		/* Wait for any in-progress getattr to complete, */
+		/* then remove this node from the monitored node list. */
+		lck_mtx_lock(&nmp->nm_lock);
+		while (np->n_mflag & NMMONSCANINPROG) {
+			struct timespec ts = { 1, 0 };
+			np->n_mflag |= NMMONSCANWANT;
+			msleep(&np->n_mflag, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
+		}
+		if (np->n_monlink.le_next != NFSNOLIST) {
+			LIST_REMOVE(np, n_monlink);
+			np->n_monlink.le_next = NFSNOLIST;
+		}
+		lck_mtx_unlock(&nmp->nm_lock);
 	}
+
+	lck_mtx_lock(nfs_buf_mutex);
+	if (!force && (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd)))
+		NP(np, "nfs_reclaim: dropping %s buffers", (!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean"));
 	lck_mtx_unlock(nfs_buf_mutex);
-	if (vname)
-		vnode_putname(vname);
 	nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ap->a_context, 0);
 
 	lck_mtx_lock(nfs_node_hash_mutex);
 
 	if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) {
-		printf("nfs_reclaim: leaving unlinked file %s\n", np->n_sillyrename->nsr_name);
+		if (!force)
+			NP(np, "nfs_reclaim: leaving unlinked file %s", np->n_sillyrename->nsr_name);
 		if (np->n_sillyrename->nsr_cred != NOCRED)
 			kauth_cred_unref(&np->n_sillyrename->nsr_cred);
 		vnode_rele(NFSTOV(np->n_sillyrename->nsr_dnp));
@@ -808,6 +875,8 @@ nfs_vnop_reclaim(ap)
 		FREE_ZONE(np->n_cookiecache, sizeof(struct nfsdmap), M_NFSDIROFF);
 	if (np->n_fhsize > NFS_SMALLFH)
 		FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH);
+	if (np->n_vattr.nva_acl)
+		kauth_acl_free(np->n_vattr.nva_acl);
 	nfs_node_unlock(np);
 	vnode_clearfsnode(vp);
 
diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c
index e224a921d..956cc9285 100644
--- a/bsd/nfs/nfs_serv.c
+++ b/bsd/nfs/nfs_serv.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -89,6 +89,8 @@
 #include <sys/vm.h>
 #include <sys/vmparam.h>
 
+#include <netinet/in.h>
+
 #include <nfs/nfsproto.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfs.h>
@@ -114,6 +116,7 @@ lck_grp_t *nfsrv_slp_mutex_group;
 struct nfsrv_sockhead nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg,
 			nfsrv_sockwait, nfsrv_sockwork;
 struct nfsrv_sock *nfsrv_udpsock = NULL;
+struct nfsrv_sock *nfsrv_udp6sock = NULL;
 
 /* NFS exports */
 struct nfsrv_expfs_list nfsrv_exports;
@@ -232,6 +235,7 @@ nfsrv_init(void)
 	TAILQ_INIT(&nfsd_head);
 	TAILQ_INIT(&nfsd_queue);
 	nfsrv_udpsock = NULL;
+	nfsrv_udp6sock = NULL;
 
 	/* initialization complete */
 	nfsrv_initted = NFSRV_INITIALIZED;
@@ -312,15 +316,7 @@ nfsrv_access(
 	 *     obtain good performance in the optimistic mode.
 	 */
 	if (nfsmode & NFS_ACCESS_READ) {
-		if (vnode_isdir(vp)) {
-			testaction =
-			    KAUTH_VNODE_LIST_DIRECTORY |
-			    KAUTH_VNODE_READ_EXTATTRIBUTES;
-		} else {
-			testaction =
-			    KAUTH_VNODE_READ_DATA |
-			    KAUTH_VNODE_READ_EXTATTRIBUTES;
-		}
+		testaction = vnode_isdir(vp) ? KAUTH_VNODE_LIST_DIRECTORY : KAUTH_VNODE_READ_DATA;
 		if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0))
 			nfsmode &= ~NFS_ACCESS_READ;
 	}
@@ -617,6 +613,9 @@ nfsrv_lookup(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LOOKUP;
+#endif
 	ni.ni_cnd.cn_flags = LOCKLEAF;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	isdotdot = ((len == 2) && (ni.ni_cnd.cn_pnbuf[0] == '.') && (ni.ni_cnd.cn_pnbuf[1] == '.'));
@@ -1052,10 +1051,12 @@ again:
 		 * entry and free it.
 		 */
 		LIST_FOREACH_SAFE(fp, &firehead, fm_link, nfp) {
-			if (nfsrv_fsevents_enabled)
+			if (nfsrv_fsevents_enabled) {
+				fp->fm_context.vc_thread = current_thread();
 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
 					FSE_ARG_VNODE, fp->fm_vp,
 					FSE_ARG_DONE);
+			}
 			vnode_put(fp->fm_vp);
 			kauth_cred_unref(&fp->fm_context.vc_ucred);
 			LIST_REMOVE(fp, fm_link);
@@ -1829,10 +1830,6 @@ nfsrv_create(
 	ni.ni_cnd.cn_nameiop = 0;
 	rdev = 0;
 
-	/*
-	 * Save the original credential UID in case they are
-	 * mapped and we need to map the IDs in the attributes.
-	 */
 	saved_uid = kauth_cred_getuid(nd->nd_cr);
 
 	nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len);
@@ -1841,6 +1838,9 @@ nfsrv_create(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
@@ -1923,17 +1923,6 @@ nfsrv_create(
 	if (vp == NULL) {
 	        kauth_acl_t xacl = NULL;
 
-		/*
-		 * If the credentials were mapped, we should
-		 * map the same values in the attributes.
-		 */
-		if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) {
-			int ismember;
-			VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr));
-			if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember)
-				VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr));
-		}
-
 		/* authorize before creating */
 		error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
 
@@ -1950,20 +1939,17 @@ nfsrv_create(
 		}
 		VATTR_CLEAR_ACTIVE(vap, va_data_size);
 		VATTR_CLEAR_ACTIVE(vap, va_access_time);
+		/*
+		 * Server policy is to alway use the mapped rpc credential for 
+		 * file system object creation. This has the nice side effect of
+		 * enforcing BSD creation semantics
+		 */
+		VATTR_CLEAR_ACTIVE(vap, va_uid);
+		VATTR_CLEAR_ACTIVE(vap, va_gid);
 
 		/* validate new-file security information */
-		if (!error) {
+		if (!error) 
 			error = vnode_authattr_new(dvp, vap, 0, ctx);
-			if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) {
-				/*
-				 * Most NFS servers just ignore the UID/GID attributes, so we
-				 * try ignoring them if that'll help the request succeed.
-				 */
-				VATTR_CLEAR_ACTIVE(vap, va_uid);
-				VATTR_CLEAR_ACTIVE(vap, va_gid);
-				error = vnode_authattr_new(dvp, vap, 0, ctx);
-			}
-		}
 
 		if (vap->va_type == VREG || vap->va_type == VSOCK) {
 
@@ -2024,6 +2010,9 @@ nfsrv_create(
 				vp = NULL;
 			}
 			ni.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+			ni.ni_op = OP_LOOKUP;
+#endif
 			ni.ni_cnd.cn_flags &= ~LOCKPARENT;
 			ni.ni_cnd.cn_context = ctx;
 			ni.ni_startdir = dvp;
@@ -2168,10 +2157,6 @@ nfsrv_mknod(
 	vp = dvp = dirp = NULL;
 	ni.ni_cnd.cn_nameiop = 0;
 
-	/*
-	 * Save the original credential UID in case they are
-	 * mapped and we need to map the IDs in the attributes.
-	 */
 	saved_uid = kauth_cred_getuid(nd->nd_cr);
 
 	nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len);
@@ -2180,6 +2165,9 @@ nfsrv_mknod(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
@@ -2231,17 +2219,6 @@ nfsrv_mknod(
 	}
 	VATTR_SET(vap, va_type, vtyp);
 
-	/*
-	 * If the credentials were mapped, we should
-	 * map the same values in the attributes.
-	 */
-	if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) {
-		int ismember;
-		VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr));
-		if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember)
-			VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr));
-	}
-
 	/* authorize before creating */
 	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
 
@@ -2258,20 +2235,18 @@ nfsrv_mknod(
 	}
 	VATTR_CLEAR_ACTIVE(vap, va_data_size);
 	VATTR_CLEAR_ACTIVE(vap, va_access_time);
+	/*
+	 * Server policy is to alway use the mapped rpc credential for 
+	 * file system object creation. This has the nice side effect of
+	 * enforcing BSD creation semantics
+	 */
+	VATTR_CLEAR_ACTIVE(vap, va_uid);
+	VATTR_CLEAR_ACTIVE(vap, va_gid);
 
 	/* validate new-file security information */
-	if (!error) {
+	if (!error) 
 		error = vnode_authattr_new(dvp, vap, 0, ctx);
-		if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) {
-			/*
-			 * Most NFS servers just ignore the UID/GID attributes, so we
-			 * try ignoring them if that'll help the request succeed.
-			 */
-			VATTR_CLEAR_ACTIVE(vap, va_uid);
-			VATTR_CLEAR_ACTIVE(vap, va_gid);
-			error = vnode_authattr_new(dvp, vap, 0, ctx);
-		}
-	}
+
 	if (error)
 		goto out1;
 
@@ -2295,6 +2270,9 @@ nfsrv_mknod(
 			vp = NULL;
 		}
 		ni.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+		ni.ni_op = OP_LOOKUP;
+#endif
 		ni.ni_cnd.cn_flags &= ~LOCKPARENT;
 		ni.ni_cnd.cn_context = vfs_context_current();
 		ni.ni_startdir = dvp;
@@ -2416,6 +2394,9 @@ nfsrv_remove(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = DELETE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_UNLINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
@@ -2596,6 +2577,9 @@ nfsrv_rename(
 	kauth_cred_ref(saved_cred);
 retry:
 	fromni.ni_cnd.cn_nameiop = DELETE;
+#if CONFIG_TRIGGERS
+	fromni.ni_op = OP_UNLINK;
+#endif
 	fromni.ni_cnd.cn_flags = WANTPARENT;
 
 	fromni.ni_cnd.cn_pnbuf = frompath;
@@ -2628,6 +2612,9 @@ retry:
 	}
 
 	toni.ni_cnd.cn_nameiop = RENAME;
+#if CONFIG_TRIGGERS
+	toni.ni_op = OP_RENAME;
+#endif
 	toni.ni_cnd.cn_flags = WANTPARENT;
 
 	toni.ni_cnd.cn_pnbuf = topath;
@@ -3175,6 +3162,9 @@ nfsrv_link(
 		goto out;
 
 	ni.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error)
@@ -3307,10 +3297,6 @@ nfsrv_symlink(
 	linkdata = NULL;
 	dirp = NULL;
 
-	/*
-	 * Save the original credential UID in case they are
-	 * mapped and we need to map the IDs in the attributes.
-	 */
 	saved_uid = kauth_cred_getuid(nd->nd_cr);
 
 	ni.ni_cnd.cn_nameiop = 0;
@@ -3322,6 +3308,9 @@ nfsrv_symlink(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
@@ -3377,42 +3366,33 @@ nfsrv_symlink(
 		goto out;
 	}
 
-	/*
-	 * If the credentials were mapped, we should
-	 * map the same values in the attributes.
-	 */
-	if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) {
-		int ismember;
-		VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr));
-		if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember)
-			VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr));
-	}
 	VATTR_SET(vap, va_type, VLNK);
 	VATTR_CLEAR_ACTIVE(vap, va_data_size);
 	VATTR_CLEAR_ACTIVE(vap, va_access_time);
+	/*
+	 * Server policy is to alway use the mapped rpc credential for 
+	 * file system object creation. This has the nice side effect of
+	 * enforcing BSD creation semantics
+	 */
+	VATTR_CLEAR_ACTIVE(vap, va_uid);
+	VATTR_CLEAR_ACTIVE(vap, va_gid);
 
 	/* authorize before creating */
 	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0);
 
 	/* validate given attributes */
-	if (!error) {
+	if (!error)
 		error = vnode_authattr_new(dvp, vap, 0, ctx);
-		if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) {
-			/*
-			 * Most NFS servers just ignore the UID/GID attributes, so we
-			 * try ignoring them if that'll help the request succeed.
-			 */
-			VATTR_CLEAR_ACTIVE(vap, va_uid);
-			VATTR_CLEAR_ACTIVE(vap, va_gid);
-			error = vnode_authattr_new(dvp, vap, 0, ctx);
-		}
-	}
+
 	if (!error)
 		error = VNOP_SYMLINK(dvp, &vp, &ni.ni_cnd, vap, linkdata, ctx);
 
 	if (!error && (nd->nd_vers == NFS_VER3)) {
 		if (vp == NULL) {
 			ni.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+			ni.ni_op = OP_LOOKUP;
+#endif
 			ni.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
 			ni.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
 			ni.ni_cnd.cn_context = ctx;
@@ -3508,6 +3488,7 @@ nfsmout:
 /*
  * nfs mkdir service
  */
+ 
 int
 nfsrv_mkdir(
 	struct nfsrv_descript *nd,
@@ -3533,10 +3514,6 @@ nfsrv_mkdir(
 	nmreq = &nd->nd_nmreq;
 	nfsm_chain_null(&nmrep);
 
-	/*
-	 * Save the original credential UID in case they are
-	 * mapped and we need to map the IDs in the attributes.
-	 */
 	saved_uid = kauth_cred_getuid(nd->nd_cr);
 
 	ni.ni_cnd.cn_nameiop = 0;
@@ -3548,6 +3525,9 @@ nfsrv_mkdir(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_LINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
@@ -3593,17 +3573,6 @@ nfsrv_mkdir(
 		goto out;
 	}
 
-	/*
-	 * If the credentials were mapped, we should
-	 * map the same values in the attributes.
-	 */
-	if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) {
-		int ismember;
-		VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr));
-		if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember)
-			VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr));
-	}
-
 	error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx, nxo, 0);
 
 	/* construct ACL and handle inheritance */
@@ -3617,22 +3586,33 @@ nfsrv_mkdir(
 		if (!error && xacl != NULL)
 		        VATTR_SET(vap, va_acl, xacl);
 	}
+
 	VATTR_CLEAR_ACTIVE(vap, va_data_size);
 	VATTR_CLEAR_ACTIVE(vap, va_access_time);
+	/*
+	 * We don't support the S_ISGID bit for directories. Solaris and other
+	 * SRV4 derived systems might set this to get BSD semantics, which we enforce
+	 * any ways. 
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_mode))
+		vap->va_mode &= ~S_ISGID;
+	/*
+	 * Server policy is to alway use the mapped rpc credential for 
+	 * file system object creation. This has the nice side effect of
+	 * enforcing BSD creation semantics
+	 */
+	VATTR_CLEAR_ACTIVE(vap, va_uid);
+	VATTR_CLEAR_ACTIVE(vap, va_gid);
 
 	/* validate new-file security information */
-	if (!error) {
+	if (!error)
 		error = vnode_authattr_new(dvp, vap, 0, ctx);
-		if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) {
-			/*
-			 * Most NFS servers just ignore the UID/GID attributes, so we
-			 * try ignoring them if that'll help the request succeed.
-			 */
-			VATTR_CLEAR_ACTIVE(vap, va_uid);
-			VATTR_CLEAR_ACTIVE(vap, va_gid);
-			error = vnode_authattr_new(dvp, vap, 0, ctx);
-		}
-	}
+	/*
+	 * vnode_authattr_new can return errors other than EPERM, but that's not going to 
+	 * sit well with our clients so we map all errors to EPERM.
+         */
+	if (error)
+		error = EPERM;
 
 	if (!error)
 		error = VNOP_MKDIR(dvp, &vp, &ni.ni_cnd, vap, ctx);
@@ -3755,6 +3735,9 @@ nfsrv_rmdir(
 	nfsmerr_if(error);
 
 	ni.ni_cnd.cn_nameiop = DELETE;
+#if CONFIG_TRIGGERS
+	ni.ni_op = OP_UNLINK;
+#endif
 	ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 	error = nfsm_chain_get_path_namei(nmreq, len, &ni);
 	if (!error) {
diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c
index 8d6747009..71b6e5c44 100644
--- a/bsd/nfs/nfs_socket.c
+++ b/bsd/nfs/nfs_socket.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -72,6 +72,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
+#include <sys/signalvar.h>
 #include <sys/kauth.h>
 #include <sys/mount_internal.h>
 #include <sys/kernel.h>
@@ -91,11 +92,13 @@
 #include <kern/thread.h>
 #include <kern/thread_call.h>
 #include <sys/user.h>
+#include <sys/acct.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 
 #include <nfs/rpcv2.h>
+#include <nfs/krpc.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/xdr_subs.h>
@@ -117,6 +120,29 @@ int nfsrv_getreq(struct nfsrv_descript *);
 extern int nfsv3_procid[NFS_NPROCS];
 #endif /* NFSSERVER */
 
+/*
+ * compare two sockaddr structures
+ */
+int
+nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2)
+{
+	if (!sa1)
+		return (-1);
+	if (!sa2)
+		return (1);
+	if (sa1->sa_family != sa2->sa_family)
+		return ((sa1->sa_family < sa2->sa_family) ? -1 : 1);
+	if (sa1->sa_len != sa2->sa_len)
+		return ((sa1->sa_len < sa2->sa_len) ? -1 : 1);
+	if (sa1->sa_family == AF_INET)
+		return (bcmp(&((struct sockaddr_in*)sa1)->sin_addr,
+			     &((struct sockaddr_in*)sa2)->sin_addr, sizeof(((struct sockaddr_in*)sa1)->sin_addr)));
+	if (sa1->sa_family == AF_INET6)
+		return (bcmp(&((struct sockaddr_in6*)sa1)->sin6_addr,
+			     &((struct sockaddr_in6*)sa2)->sin6_addr, sizeof(((struct sockaddr_in6*)sa1)->sin6_addr)));
+	return (-1);
+}
+
 #if NFSCLIENT
 
 int	nfs_reconnect(struct nfsmount *);
@@ -188,214 +214,1270 @@ static int proct[NFS_NPROCS] = {
 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 
 /*
- * Initialize socket state and perform setup for a new NFS connection.
+ * Increment location index to next address/server/location.
  */
-int
-nfs_connect(struct nfsmount *nmp, int verbose)
+void
+nfs_location_next(struct nfs_fs_locations *nlp, struct nfs_location_index *nlip)
 {
-	socket_t so;
-	int error, on = 1, proto;
-	sock_upcall upcall;
-	struct sockaddr *saddr;
-	struct sockaddr_in sin;
-	struct timeval timeo;
-
-	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_sockflags |= NMSOCK_CONNECTING;
-	saddr = mbuf_data(nmp->nm_nam);
-	upcall = (nmp->nm_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
-	lck_mtx_unlock(&nmp->nm_lock);
-	error = sock_socket(saddr->sa_family, nmp->nm_sotype,
-			    nmp->nm_soproto, upcall, nmp, &nmp->nm_so);
-	if (error)
-		goto bad;
-	lck_mtx_lock(&nmp->nm_lock);
-	so = nmp->nm_so;
-
-	/*
-	 * Some servers require that the client port be a reserved port number.
-	 */
-	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
-		int portrange = IP_PORTRANGE_LOW;
-		error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange));
-		if (!error) {	/* bind now to check for failure */
-			sin.sin_len = sizeof (struct sockaddr_in);
-			sin.sin_family = AF_INET;
-			sin.sin_addr.s_addr = INADDR_ANY;
-			sin.sin_port = 0;
-			error = sock_bind(so, (struct sockaddr *) &sin);
-		}
-		if (error) {
-			lck_mtx_unlock(&nmp->nm_lock);
-			goto bad;
+	uint8_t loc = nlip->nli_loc;
+	uint8_t serv = nlip->nli_serv;
+	uint8_t addr = nlip->nli_addr;
+
+	/* move to next address */
+	addr++;
+	if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount) {
+		/* no more addresses on current server, go to first address of next server */
+next_server:
+		addr = 0;
+		serv++;
+		if (serv >= nlp->nl_locations[loc]->nl_servcount) {
+			/* no more servers on current location, go to first server of next location */
+			serv = 0;
+			loc++;
+			if (loc >= nlp->nl_numlocs)
+				loc = 0; /* after last location, wrap back around to first location */
 		}
 	}
-
 	/*
-	 * Protocols that do not require connections may be optionally left
-	 * unconnected for servers that reply from a different address/port.
+	 * It's possible for this next server to not have any addresses.
+	 * Check for that here and go to the next server.
+	 * But bail out if we've managed to come back around to the original
+	 * location that was passed in. (That would mean no servers had any
+	 * addresses.  And we don't want to spin here forever.)
 	 */
-	if (nmp->nm_flag & NFSMNT_NOCONN) {
-		if (nmp->nm_sotype == SOCK_STREAM) {
-			error = ENOTCONN;
-			lck_mtx_unlock(&nmp->nm_lock);
-			goto bad;
+	if ((loc == nlip->nli_loc) && (serv == nlip->nli_serv) && (addr == nlip->nli_addr))
+		return;
+	if (addr >= nlp->nl_locations[loc]->nl_servers[serv]->ns_addrcount)
+		goto next_server;
+
+	nlip->nli_loc = loc;
+	nlip->nli_serv = serv;
+	nlip->nli_addr = addr;
+}
+
+/*
+ * Compare two location indices.
+ */
+int
+nfs_location_index_cmp(struct nfs_location_index *nlip1, struct nfs_location_index *nlip2)
+{
+	if (nlip1->nli_loc != nlip2->nli_loc)
+		return (nlip1->nli_loc - nlip2->nli_loc);
+	if (nlip1->nli_serv != nlip2->nli_serv)
+		return (nlip1->nli_serv - nlip2->nli_serv);
+	return (nlip1->nli_addr - nlip2->nli_addr);
+}
+
+/*
+ * Get the mntfromname (or path portion only) for a given location.
+ */
+void
+nfs_location_mntfromname(struct nfs_fs_locations *locs, struct nfs_location_index idx, char *s, int size, int pathonly)
+{
+	struct nfs_fs_location *fsl = locs->nl_locations[idx.nli_loc];
+	char *p;
+	int cnt, i;
+
+	p = s;
+	if (!pathonly) {
+		cnt = snprintf(p, size, "%s:", fsl->nl_servers[idx.nli_serv]->ns_name);
+		p += cnt;
+		size -= cnt;
+	}
+	if (fsl->nl_path.np_compcount == 0) {
+		/* mounting root export on server */
+		if (size > 0) {
+			*p++ = '/';
+			*p++ = '\0';
 		}
-	} else {
-		int tocnt = 0, optlen = sizeof(error);
-		struct timespec ts = { 1, 0 };
+		return;
+	}
+	/* append each server path component */
+	for (i=0; (size > 0) && (i < (int)fsl->nl_path.np_compcount); i++) {
+		cnt = snprintf(p, size, "/%s", fsl->nl_path.np_components[i]);
+		p += cnt;
+		size -= cnt;
+	}
+}
 
-		lck_mtx_unlock(&nmp->nm_lock);
-		error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT);
-		if (error && (error != EINPROGRESS))
-			goto bad;
-		lck_mtx_lock(&nmp->nm_lock);
-		while (!sock_isconnected(so)) {
-			nfs_mount_check_dead_timeout(nmp);
-			if ((tocnt++ == 30) && verbose) /* log a warning if connect is taking a while */
-				log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
-					vfs_statfs(nmp->nm_mountp)->f_mntfromname);
-			/* check for error on socket */
-			sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen);
-			if (error) {
-				if (verbose)
-					log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
-						error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
-				break;
+/*
+ * NFS client connect socket upcall.
+ * (Used only during socket connect/search.)
+ */
+void
+nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag)
+{
+	struct nfs_socket *nso = arg;
+	size_t rcvlen;
+	mbuf_t m;
+	int error = 0, recv = 1;
+
+	if (nso->nso_flags & NSO_CONNECTING) {
+		NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso));
+		wakeup(nso->nso_wake);
+		return;
+	}
+
+	lck_mtx_lock(&nso->nso_lock);
+	if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) {
+		NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso));
+		lck_mtx_unlock(&nso->nso_lock);
+		return;
+	}
+	NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso));
+	nso->nso_flags |= NSO_UPCALL;
+
+	/* loop while we make error-free progress */
+	while (!error && recv) {
+		/* make sure we're still interested in this socket */
+		if (nso->nso_flags & (NSO_DISCONNECTING|NSO_DEAD))
+			break;
+		lck_mtx_unlock(&nso->nso_lock);
+		m = NULL;
+		if (nso->nso_sotype == SOCK_STREAM) {
+			error = nfs_rpc_record_read(so, &nso->nso_rrs, MSG_DONTWAIT, &recv, &m);
+		} else {
+			rcvlen = 1000000;
+			error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
+			recv = m ? 1 : 0;
+		}
+		lck_mtx_lock(&nso->nso_lock);
+		if (m) {
+			/* match response with request */
+			struct nfsm_chain nmrep;
+			uint32_t reply = 0, rxid = 0, verf_type, verf_len;
+			uint32_t reply_status, rejected_status, accepted_status;
+
+			nfsm_chain_dissect_init(error, &nmrep, m);
+			nfsm_chain_get_32(error, &nmrep, rxid);
+			nfsm_chain_get_32(error, &nmrep, reply);
+			if (!error && ((reply != RPC_REPLY) || (rxid != nso->nso_pingxid)))
+				error = EBADRPC;
+			nfsm_chain_get_32(error, &nmrep, reply_status);
+			if (!error && (reply_status == RPC_MSGDENIED)) {
+				nfsm_chain_get_32(error, &nmrep, rejected_status);
+				if (!error)
+					error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
 			}
-			/* abort if this is taking too long or we're unmounting */
-			if ((tocnt > 120) || (nmp->nm_sockflags & NMSOCK_UNMOUNT)) {
-				error = ENOTCONN;
-				break;
+			nfsm_chain_get_32(error, &nmrep, verf_type); /* verifier flavor */
+			nfsm_chain_get_32(error, &nmrep, verf_len); /* verifier length */
+			nfsmout_if(error);
+			if (verf_len)
+				nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
+			nfsm_chain_get_32(error, &nmrep, accepted_status);
+			nfsmout_if(error);
+			if ((accepted_status == RPC_PROGMISMATCH) && !nso->nso_version) {
+				uint32_t minvers, maxvers;
+				nfsm_chain_get_32(error, &nmrep, minvers);
+				nfsm_chain_get_32(error, &nmrep, maxvers);
+				nfsmout_if(error);
+				if (nso->nso_protocol == PMAPPROG) {
+					if ((minvers > RPCBVERS4) || (maxvers < PMAPVERS))
+						error = EPROGMISMATCH;
+					else if ((nso->nso_saddr->sa_family == AF_INET) &&
+						 (PMAPVERS >= minvers) && (PMAPVERS <= maxvers))
+						nso->nso_version = PMAPVERS;
+					else if (nso->nso_saddr->sa_family == AF_INET6) {
+						if ((RPCBVERS4 >= minvers) && (RPCBVERS4 <= maxvers))
+							nso->nso_version = RPCBVERS4;
+						else if ((RPCBVERS3 >= minvers) && (RPCBVERS3 <= maxvers))
+							nso->nso_version = RPCBVERS3;
+					}
+				} else if (nso->nso_protocol == NFS_PROG) {
+					if ((minvers > NFS_VER4) || (maxvers < NFS_VER2))
+						error = EPROGMISMATCH;
+					else if ((NFS_VER3 >= minvers) && (NFS_VER3 <= maxvers))
+						nso->nso_version = NFS_VER3;
+					else if ((NFS_VER2 >= minvers) && (NFS_VER2 <= maxvers))
+						nso->nso_version = NFS_VER2;
+					else if ((NFS_VER4 >= minvers) && (NFS_VER4 <= maxvers))
+						nso->nso_version = NFS_VER4;
+				}
+				if (!error && nso->nso_version)
+					accepted_status = RPC_SUCCESS;
 			}
-			if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))
-				break;
-			msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts);
+			if (!error) {
+				switch (accepted_status) {
+				case RPC_SUCCESS:
+					error = 0;
+					break;
+				case RPC_PROGUNAVAIL:
+					error = EPROGUNAVAIL;
+					break;
+				case RPC_PROGMISMATCH:
+					error = EPROGMISMATCH;
+					break;
+				case RPC_PROCUNAVAIL:
+					error = EPROCUNAVAIL;
+					break;
+				case RPC_GARBAGE:
+					error = EBADRPC;
+					break;
+				case RPC_SYSTEM_ERR:
+				default:
+					error = EIO;
+					break;
+				}
+			}
+nfsmout:
+			nso->nso_flags &= ~NSO_PINGING;
+			if (error) {
+				nso->nso_error = error;
+				nso->nso_flags |= NSO_DEAD;
+			} else {
+				nso->nso_flags |= NSO_VERIFIED;
+			}
+			mbuf_freem(m);
+			/* wake up search thread */
+			wakeup(nso->nso_wake);
+			break;
 		}
-		if ((tocnt > 30) && verbose)
-			log(LOG_INFO, "nfs_connect: socket connect %s for %s\n",
-				error ? "aborted" : "completed",
-				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
-		if (error) {
-			lck_mtx_unlock(&nmp->nm_lock);
-			goto bad;
+	}
+
+	nso->nso_flags &= ~NSO_UPCALL;
+	if ((error != EWOULDBLOCK) && (error || !recv)) {
+		/* problems with the socket... */
+		nso->nso_error = error ? error : EPIPE;
+		nso->nso_flags |= NSO_DEAD;
+		wakeup(nso->nso_wake);
+	}
+	if (nso->nso_flags & NSO_DISCONNECTING)
+		wakeup(&nso->nso_flags);
+	lck_mtx_unlock(&nso->nso_lock);
+}
+
+/*
+ * Create/initialize an nfs_socket structure.
+ */
+int
+nfs_socket_create(
+	__unused struct nfsmount *nmp,
+	struct sockaddr *sa,
+	int sotype,
+	in_port_t port,
+	uint32_t protocol,
+	uint32_t vers,
+	int resvport,
+	struct nfs_socket **nsop)
+{
+	struct nfs_socket *nso;
+	struct timeval now;
+	int error;
+#ifdef NFS_SOCKET_DEBUGGING
+	char naddr[MAX_IPv6_STR_LEN];
+	void *sinaddr;
+
+	if (sa->sa_family == AF_INET)
+		sinaddr = &((struct sockaddr_in*)sa)->sin_addr;
+	else
+		sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr;
+	if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr)
+		strlcpy(naddr, "<unknown>", sizeof(naddr));
+#endif
+
+	*nsop = NULL;
+
+	/* Create the socket. */
+	MALLOC(nso, struct nfs_socket *, sizeof(struct nfs_socket), M_TEMP, M_WAITOK|M_ZERO);
+	if (nso)
+		MALLOC(nso->nso_saddr, struct sockaddr *, sa->sa_len, M_SONAME, M_WAITOK|M_ZERO);
+	if (!nso || !nso->nso_saddr) {
+		if (nso)
+			FREE(nso, M_TEMP);
+		return (ENOMEM);
+	}
+	lck_mtx_init(&nso->nso_lock, nfs_request_grp, LCK_ATTR_NULL);
+	nso->nso_sotype = sotype;
+	if (nso->nso_sotype == SOCK_STREAM)
+		nfs_rpc_record_state_init(&nso->nso_rrs);
+	microuptime(&now);
+	nso->nso_timestamp = now.tv_sec;
+	bcopy(sa, nso->nso_saddr, sa->sa_len);
+	if (sa->sa_family == AF_INET)
+		((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
+	else if (sa->sa_family == AF_INET6)
+		((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
+	nso->nso_protocol = protocol;
+	nso->nso_version = vers;
+
+	error = sock_socket(sa->sa_family, nso->nso_sotype, 0, NULL, NULL, &nso->nso_so);
+
+	/* Some servers require that the client port be a reserved port number. */
+	if (!error && resvport && ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6))) {
+		struct sockaddr_storage ss;
+		int level = (sa->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
+		int optname = (sa->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
+		int portrange = IP_PORTRANGE_LOW;
+
+		error = sock_setsockopt(nso->nso_so, level, optname, &portrange, sizeof(portrange));
+		if (!error) {	/* bind now to check for failure */
+			ss.ss_len = sa->sa_len;
+			ss.ss_family = sa->sa_family;
+			if (ss.ss_family == AF_INET) {
+				((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
+				((struct sockaddr_in*)&ss)->sin_port = htons(0);
+			} else if (ss.ss_family == AF_INET6) {
+				((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
+				((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
+			} else {
+				error = EINVAL;
+			}
+			if (!error)
+				error = sock_bind(nso->nso_so, (struct sockaddr*)&ss);
 		}
 	}
 
+	if (error) {
+		NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype,
+			resvport ? "r" : "", port, protocol, vers));
+		nfs_socket_destroy(nso);
+	} else {
+		NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr,
+			sotype, resvport ? "r" : "", port, protocol, vers));
+		*nsop = nso;
+	}
+	return (error);
+}
+
+/*
+ * Destroy an nfs_socket structure.
+ */
+void
+nfs_socket_destroy(struct nfs_socket *nso)
+{
+	struct timespec ts = { 4, 0 };
+
+	lck_mtx_lock(&nso->nso_lock);
+	nso->nso_flags |= NSO_DISCONNECTING;
+	if (nso->nso_flags & NSO_UPCALL) /* give upcall a chance to complete */
+		msleep(&nso->nso_flags, &nso->nso_lock, PZERO-1, "nfswaitupcall", &ts);
+	lck_mtx_unlock(&nso->nso_lock);
+	sock_shutdown(nso->nso_so, SHUT_RDWR);
+	sock_close(nso->nso_so);
+	if (nso->nso_sotype == SOCK_STREAM)
+		nfs_rpc_record_state_cleanup(&nso->nso_rrs);
+	lck_mtx_destroy(&nso->nso_lock, nfs_request_grp);
+	if (nso->nso_saddr)
+		FREE(nso->nso_saddr, M_SONAME);
+	if (nso->nso_saddr2)
+		FREE(nso->nso_saddr2, M_SONAME);
+	NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso));
+	FREE(nso, M_TEMP);
+}
+
+/*
+ * Set common socket options on an nfs_socket.
+ */
+void
+nfs_socket_options(struct nfsmount *nmp, struct nfs_socket *nso)
+{
 	/*
 	 * Set socket send/receive timeouts
-	 * - Receive timeout shouldn't matter because all receives are performed
+	 * - Receive timeout shouldn't matter because most receives are performed
 	 *   in the socket upcall non-blocking.
 	 * - Send timeout should allow us to react to a blocked socket.
 	 *   Soft mounts will want to abort sooner.
 	 */
-	timeo.tv_usec = 0;
-	timeo.tv_sec = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
-	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
-	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
-	if (error) {
-		log(LOG_INFO, "nfs_connect: socket timeout setting errors for %s\n",
-			vfs_statfs(nmp->nm_mountp)->f_mntfromname);
-		error = 0;
-	}
+	struct timeval timeo;
+	int on = 1, proto;
 
-	if (nmp->nm_sotype == SOCK_STREAM) {
+	timeo.tv_usec = 0;
+	timeo.tv_sec = NMFLAG(nmp, SOFT) ? 5 : 60;
+	sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+	sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+	if (nso->nso_sotype == SOCK_STREAM) {
 		/* Assume that SOCK_STREAM always requires a connection */
-		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+		sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
 		/* set nodelay for TCP */
-		sock_gettype(so, NULL, NULL, &proto);
+		sock_gettype(nso->nso_so, NULL, NULL, &proto);
 		if (proto == IPPROTO_TCP)
-			sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+			sock_setsockopt(nso->nso_so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
 	}
-
-	if (nmp->nm_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
+	if (nso->nso_sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
 		int reserve = NFS_UDPSOCKBUF;
-		error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
-		error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
+		sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
+		sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
+	}
+	/* set SO_NOADDRERR to detect network changes ASAP */
+	sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
+	/* just playin' it safe with upcalls */
+	sock_setsockopt(nso->nso_so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+	/* socket should be interruptible if the mount is */
+	if (!NMFLAG(nmp, INTR))
+		sock_nointerrupt(nso->nso_so, 1);
+}
+
+/*
+ * Release resources held in an nfs_socket_search.
+ */
+void
+nfs_socket_search_cleanup(struct nfs_socket_search *nss)
+{
+	struct nfs_socket *nso, *nsonext;
+
+	TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
+		TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
+		nss->nss_sockcnt--;
+		nfs_socket_destroy(nso);
+	}
+	if (nss->nss_sock) {
+		nfs_socket_destroy(nss->nss_sock);
+		nss->nss_sock = NULL;
+	}
+}
+
+/*
+ * Prefer returning certain errors over others.
+ * This function returns a ranking of the given error.
+ */
+int
+nfs_connect_error_class(int error)
+{
+	switch (error) {
+	case 0:
+		return (0);
+	case ETIMEDOUT:
+	case EAGAIN:
+		return (1);
+	case EPIPE:
+	case EADDRNOTAVAIL:
+	case ENETDOWN:
+	case ENETUNREACH:
+	case ENETRESET:
+	case ECONNABORTED:
+	case ECONNRESET:
+	case EISCONN:
+	case ENOTCONN:
+	case ESHUTDOWN:
+	case ECONNREFUSED:
+	case EHOSTDOWN:
+	case EHOSTUNREACH:
+		return (2);
+	case ERPCMISMATCH:
+	case EPROCUNAVAIL:
+	case EPROGMISMATCH:
+	case EPROGUNAVAIL:
+		return (3);
+	case EBADRPC:
+		return (4);
+	default:
+		return (5);
+	}
+}
+
+/*
+ * Make sure a socket search returns the best error.
+ */
+void
+nfs_socket_search_update_error(struct nfs_socket_search *nss, int error)
+{
+	if (nfs_connect_error_class(error) >= nfs_connect_error_class(nss->nss_error))
+		nss->nss_error = error;
+}
+
+/*
+ * Continue the socket search until we have something to report.
+ */
+int
+nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss)
+{
+	struct nfs_socket *nso, *nsonext;
+	struct timeval now;
+	struct nfs_fs_location *fsl;
+	struct nfs_fs_server *fss;
+	struct sockaddr_storage ss;
+	char *addrstr;
+	int error, nomore = 0;
+
+loop:
+	microuptime(&now);
+	NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec));
+
+	/* Time to start another socket? */
+	while ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) ||
+	       ((nss->nss_sockcnt < 4) && (now.tv_sec >= (nss->nss_last + 2)))) {
+		if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
+			return (EINTR);
+		/* Find the next address to try... */
+		/* Have we run out of locations? */
+		if (!nomore && (nss->nss_last != -1) && !nfs_location_index_cmp(&nss->nss_nextloc, &nss->nss_startloc))
+			nomore = 1;
+		if (nomore) {
+			if (nss->nss_last < 0)
+				nss->nss_last = now.tv_sec;
+			break;
+		}
+		/* Can we convert the address to a sockaddr? */
+		fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc];
+		fss = fsl->nl_servers[nss->nss_nextloc.nli_serv];
+		addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr];
+		if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) {
+			nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
+			nss->nss_last = -2;
+			continue;
+		}
+		/* Check that socket family is acceptable. */
+		if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) {
+			nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
+			nss->nss_last = -2;
+			continue;
+		}
+
+		/* Create the socket. */
+		error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nss->nss_sotype,
+				nss->nss_port, nss->nss_protocol, nss->nss_version,
+				((nss->nss_protocol == NFS_PROG) && NMFLAG(nmp, RESVPORT)), &nso);
+		if (error)
+			return (error);
+
+		nso->nso_location = nss->nss_nextloc;
+		nso->nso_wake = nss;
+		error = sock_setupcall(nso->nso_so, nfs_connect_upcall, nso);
 		if (error) {
-			log(LOG_INFO, "nfs_connect: socket buffer setting errors for %s\n",
+			lck_mtx_lock(&nso->nso_lock);
+			nso->nso_error = error;
+			nso->nso_flags |= NSO_DEAD;
+			lck_mtx_unlock(&nso->nso_lock);
+		}
+
+		TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link);
+		nss->nss_sockcnt++;
+		nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc);
+
+		nss->nss_last = now.tv_sec;
+	}
+
+	/* check each active socket and try to push it along */
+	TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) {
+		lck_mtx_lock(&nso->nso_lock);
+		if (!(nso->nso_flags & NSO_CONNECTED)) {
+			if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) {
+				/* no connection needed, just say it's already connected */
+				nso->nso_flags |= NSO_CONNECTED;
+				NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n",
+					vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+			} else if (!(nso->nso_flags & NSO_CONNECTING)) {
+				/* initiate the connection */
+				nso->nso_flags |= NSO_CONNECTING;
+				lck_mtx_unlock(&nso->nso_lock);
+				NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n",
+					vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+				error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT);
+				lck_mtx_lock(&nso->nso_lock);
+				if (error && (error != EINPROGRESS)) {
+					nso->nso_error = error;
+					nso->nso_flags |= NSO_DEAD;
+					lck_mtx_unlock(&nso->nso_lock);
+					continue;
+				}
+			}
+			if (nso->nso_flags & NSO_CONNECTING) {
+				/* check the connection */
+				if (sock_isconnected(nso->nso_so)) {
+					NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n",
+						vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+					nso->nso_flags &= ~NSO_CONNECTING;
+					nso->nso_flags |= NSO_CONNECTED;
+				} else {
+					int optlen = sizeof(error);
+					error = 0;
+					sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen);
+					if (error) { /* we got an error on the socket */
+						NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n",
+							vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
+						if (nss->nss_flags & NSS_VERBOSE)
+							log(LOG_INFO, "nfs_connect: socket error %d for %s\n",
+								error, vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+						nso->nso_error = error;
+						nso->nso_flags |= NSO_DEAD;
+						lck_mtx_unlock(&nso->nso_lock);
+						continue;
+					}
+				}
+			}
+			if (nso->nso_flags & NSO_CONNECTED)
+				nfs_socket_options(nmp, nso);
+		}
+		if (!(nso->nso_flags & NSO_CONNECTED)) {
+			lck_mtx_unlock(&nso->nso_lock);
+			continue;
+		}
+		if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) ||
+		    ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) {
+			/* initiate a NULL RPC request */
+			uint64_t xid = nso->nso_pingxid;
+			mbuf_t m, mreq = NULL;
+			struct msghdr msg;
+			size_t reqlen, sentlen;
+			uint32_t vers;
+
+			if (!(vers = nso->nso_version)) {
+				if (nso->nso_protocol == PMAPPROG)
+					vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
+				else if (nso->nso_protocol == NFS_PROG)
+					vers = NFS_VER3;
+			}
+			lck_mtx_unlock(&nso->nso_lock);
+			error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS,
+					vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq);
+			lck_mtx_lock(&nso->nso_lock);
+			if (!error) {
+				nso->nso_flags |= NSO_PINGING;
+				nso->nso_pingxid = R_XID32(xid);
+				nso->nso_reqtimestamp = now.tv_sec;
+				bzero(&msg, sizeof(msg));
+				if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) {
+					msg.msg_name = nso->nso_saddr;
+					msg.msg_namelen = nso->nso_saddr->sa_len;
+				}
+				for (reqlen=0, m=mreq; m; m = mbuf_next(m))
+					reqlen += mbuf_len(m);
+				lck_mtx_unlock(&nso->nso_lock);
+				error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen);
+				NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n",
+					vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
+				lck_mtx_lock(&nso->nso_lock);
+				if (!error && (sentlen != reqlen))
+					error = ETIMEDOUT;
+			}
+			if (error) {
+				nso->nso_error = error;
+				nso->nso_flags |= NSO_DEAD;
+				lck_mtx_unlock(&nso->nso_lock);
+				continue;
+			}
+		}
+		if (nso->nso_flags & NSO_VERIFIED) {
+			/* WOOHOO!! This socket looks good! */
+			NFS_SOCK_DBG(("nfs connect %s socket %p verified\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+			if (!nso->nso_version) {
+				/* If the version isn't set, the default must have worked. */
+				if (nso->nso_protocol == PMAPPROG)
+					nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4;
+				if (nso->nso_protocol == NFS_PROG)
+					nso->nso_version = NFS_VER3;
+			}
+			lck_mtx_unlock(&nso->nso_lock);
+			TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
+			nss->nss_sockcnt--;
+			nss->nss_sock = nso;
+			break;
+		}
+		lck_mtx_unlock(&nso->nso_lock);
+	}
+
+	TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) {
+		lck_mtx_lock(&nso->nso_lock);
+		if (now.tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) {
+			/* took too long */
+			NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+			nso->nso_error = ETIMEDOUT;
+			nso->nso_flags |= NSO_DEAD;
+		}
+		if (!(nso->nso_flags & NSO_DEAD)) {
+			lck_mtx_unlock(&nso->nso_lock);
+			continue;
+		}
+		lck_mtx_unlock(&nso->nso_lock);
+		NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error));
+		nfs_socket_search_update_error(nss, nso->nso_error);
+		TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link);
+		nss->nss_sockcnt--;
+		nfs_socket_destroy(nso);
+		if (!nomore)
+			nss->nss_last = -2;
+	}
+
+	/*
+	 * Keep looping if we haven't found a socket yet and we have more
+	 * sockets to (continue to) try.
+	 */
+	error = 0;
+	if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || !nomore)) {
+		/* log a warning if connect is taking a while */
+		if (((now.tv_sec - nss->nss_timestamp) >= 30) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) {
+			log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n",
 				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+			nss->nss_flags |= NSS_WARNED;
+		}
+		if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
+			return (EINTR);
+		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
+			return (error);
+		if (nss->nss_last >= 0)
+			tsleep(nss, PSOCK, "nfs_connect_search_wait", hz);
+		goto loop;
+	}
+
+	NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
+	return (error);
+}
+
+/*
+ * Initialize a new NFS connection.
+ *
+ * Search for a location to connect a socket to and initialize the connection.
+ *
+ * An NFS mount may have multiple locations/servers/addresses available.
+ * We attempt to connect to each one asynchronously and will start
+ * several sockets in parallel if other locations are slow to answer.
+ * We'll use the first NFS socket we can successfully set up.
+ *
+ * The search may involve contacting the portmapper service first.
+ *
+ * A mount's initial connection may require negotiating some parameters such
+ * as socket type and NFS version.
+ */
+int
+nfs_connect(struct nfsmount *nmp, int verbose, int timeo)
+{
+	struct nfs_socket_search nss;
+	struct nfs_socket *nso, *nsonfs;
+	struct sockaddr_storage ss;
+	struct sockaddr *saddr, *oldsaddr;
+	sock_upcall upcall;
+	struct timeval now, start;
+	int error, savederror, nfsvers;
+	uint8_t	sotype = nmp->nm_sotype ? nmp->nm_sotype : SOCK_STREAM;
+	fhandle_t *fh = NULL;
+	char *path = NULL;
+	in_port_t port;
+
+	/* paranoia... check that we have at least one address in the locations */
+	uint32_t loc, serv;
+	for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) {
+		for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) {
+			if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount)
+				break;
+			NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname,
+				nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
+		}
+		if (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount)
+			break;
+	}
+	if (loc >= nmp->nm_locations.nl_numlocs) {
+		NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname));
+		return (EINVAL);
+	}
+
+	lck_mtx_lock(&nmp->nm_lock);
+	nmp->nm_sockflags |= NMSOCK_CONNECTING;
+	nmp->nm_nss = &nss;
+	lck_mtx_unlock(&nmp->nm_lock);
+	microuptime(&start);
+	savederror = error = 0;
+
+tryagain:
+	/* initialize socket search state */
+	bzero(&nss, sizeof(nss));
+	nss.nss_error = savederror;
+	TAILQ_INIT(&nss.nss_socklist);
+	nss.nss_sotype = sotype;
+	nss.nss_startloc = nmp->nm_locations.nl_current;
+	nss.nss_timestamp = start.tv_sec;
+	nss.nss_timeo = timeo;
+	if (verbose)
+		nss.nss_flags |= NSS_VERBOSE;
+
+	/* First time connecting, we may need to negotiate some things */
+	if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
+		if (!nmp->nm_vers) {
+			/* No NFS version specified... */
+			if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
+				/* ...connect to portmapper first if we (may) need any ports. */
+				nss.nss_port = PMAPPORT;
+				nss.nss_protocol = PMAPPROG;
+				nss.nss_version = 0;
+			} else {
+				/* ...connect to NFS port first. */
+				nss.nss_port = nmp->nm_nfsport;
+				nss.nss_protocol = NFS_PROG;
+				nss.nss_version = 0;
+			}
+		} else if (nmp->nm_vers >= NFS_VER4) {
+			/* For NFSv4, we use the given (or default) port. */
+			nss.nss_port = nmp->nm_nfsport ? nmp->nm_nfsport : NFS_PORT;
+			nss.nss_protocol = NFS_PROG;
+			nss.nss_version = 4;
+		} else {
+			/* For NFSv3/v2... */
+			if (!nmp->nm_nfsport || (!NM_OMATTR_GIVEN(nmp, FH) && !nmp->nm_mountport)) {
+				/* ...connect to portmapper first if we need any ports. */
+				nss.nss_port = PMAPPORT;
+				nss.nss_protocol = PMAPPROG;
+				nss.nss_version = 0;
+			} else {
+				/* ...connect to NFS port first. */
+				nss.nss_port = nmp->nm_nfsport;
+				nss.nss_protocol = NFS_PROG;
+				nss.nss_version = nmp->nm_vers;
+			}
+		}
+		NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
+			nss.nss_protocol, nss.nss_version));
+	} else {
+		/* we've connected before, just connect to NFS port */
+		if (!nmp->nm_nfsport) {
+			/* need to ask portmapper which port that would be */
+			nss.nss_port = PMAPPORT;
+			nss.nss_protocol = PMAPPROG;
+			nss.nss_version = 0;
+		} else {
+			nss.nss_port = nmp->nm_nfsport;
+			nss.nss_protocol = NFS_PROG;
+			nss.nss_version = nmp->nm_vers;
+		}
+		NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port,
+			nss.nss_protocol, nss.nss_version));
+	}
+
+	/* Set next location to first valid location. */
+	/* If start location is invalid, find next location. */
+	nss.nss_nextloc = nss.nss_startloc;
+	if ((nss.nss_nextloc.nli_serv >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servcount) ||
+	    (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) {
+		nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc);
+		if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) {
+			NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname));
+			return (ENOENT);
+		}
+	}
+	nss.nss_last = -1;
+
+keepsearching:
+
+	error = nfs_connect_search_loop(nmp, &nss);
+	if (error || !nss.nss_sock) {
+		/* search failed */
+		nfs_socket_search_cleanup(&nss);
+		if (!error && (nss.nss_sotype == SOCK_STREAM) && !nmp->nm_sotype && (nmp->nm_vers < NFS_VER4)) {
+			/* Try using UDP */
+			sotype = SOCK_DGRAM;
+			savederror = nss.nss_error;
+			NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error));
+			goto tryagain;
+		}
+		if (!error)
+			error = nss.nss_error ? nss.nss_error : ETIMEDOUT;
+		lck_mtx_lock(&nmp->nm_lock);
+		nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
+		nmp->nm_nss = NULL;
+		lck_mtx_unlock(&nmp->nm_lock);
+		if (nss.nss_flags & NSS_WARNED)
+			log(LOG_INFO, "nfs_connect: socket connect aborted for %s\n",
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+		if (fh)
+			FREE(fh, M_TEMP);
+		if (path)
+			FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+		NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, error));
+		return (error);
+	}
+
+	/* try to use nss_sock */
+	nso = nss.nss_sock;
+	nss.nss_sock = NULL;
+
+	/* We may be speaking to portmap first... to determine port(s). */
+	if (nso->nso_saddr->sa_family == AF_INET)
+		port = ntohs(((struct sockaddr_in*)nso->nso_saddr)->sin_port);
+	else
+		port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port);
+	if (port == PMAPPORT) {
+		/* Use this portmapper port to get the port #s we need. */
+		NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+
+		/* remove the connect upcall so nfs_portmap_lookup() can use this socket */
+		sock_setupcall(nso->nso_so, NULL, NULL);
+
+		/* Set up socket address and port for NFS socket. */
+		bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
+
+		/* If NFS version not set, try NFSv3 then NFSv2. */
+		nfsvers = nmp->nm_vers ? nmp->nm_vers : NFS_VER3;
+
+		if (!(port = nmp->nm_nfsport)) {
+			if (ss.ss_family == AF_INET)
+				((struct sockaddr_in*)&ss)->sin_port = htons(0);
+			else if (ss.ss_family == AF_INET6)
+				((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
+			error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
+					nso->nso_so, NFS_PROG, nfsvers, 
+					(nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
+			if (!error) {
+				if (ss.ss_family == AF_INET)
+					port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
+				else if (ss.ss_family == AF_INET6)
+					port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
+				if (!port)
+					error = EPROGUNAVAIL;
+			}
+			if (error && !nmp->nm_vers) {
+				nfsvers = NFS_VER2;
+				error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
+						nso->nso_so, NFS_PROG, nfsvers, 
+						(nso->nso_sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP, timeo);
+				if (!error) {
+					if (ss.ss_family == AF_INET)
+						port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
+					else if (ss.ss_family == AF_INET6)
+						port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
+					if (!port)
+						error = EPROGUNAVAIL;
+				}
+			}
+			if (error) {
+				nfs_socket_search_update_error(&nss, error);
+				nfs_socket_destroy(nso);
+				goto keepsearching;
+			}
+		}
+		/* Create NFS protocol socket and add it to the list of sockets. */
+		error = nfs_socket_create(nmp, (struct sockaddr*)&ss, nso->nso_sotype, port,
+				NFS_PROG, nfsvers, NMFLAG(nmp, RESVPORT), &nsonfs);
+		if (error) {
+			nfs_socket_search_update_error(&nss, error);
+			nfs_socket_destroy(nso);
+			goto keepsearching;
+		}
+		nsonfs->nso_location = nso->nso_location;
+		nsonfs->nso_wake = &nss;
+		error = sock_setupcall(nsonfs->nso_so, nfs_connect_upcall, nsonfs);
+		if (error) {
+			nfs_socket_search_update_error(&nss, error);
+			nfs_socket_destroy(nsonfs);
+			nfs_socket_destroy(nso);
+			goto keepsearching;
+		}
+		TAILQ_INSERT_TAIL(&nss.nss_socklist, nsonfs, nso_link);
+		nss.nss_sockcnt++;
+		if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
+			/* Set up socket address and port for MOUNT socket. */
 			error = 0;
+			bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
+			port = nmp->nm_mountport;
+			if (ss.ss_family == AF_INET)
+				((struct sockaddr_in*)&ss)->sin_port = htons(port);
+			else if (ss.ss_family == AF_INET6)
+				((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
+			if (!port) {
+				/* Get port/sockaddr for MOUNT version corresponding to NFS version. */
+				/* If NFS version is unknown, optimistically choose for NFSv3. */
+				int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
+				int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
+				error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
+						nso->nso_so, RPCPROG_MNT, mntvers, mntproto, timeo);
+			}
+			if (!error) {
+				if (ss.ss_family == AF_INET)
+					port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
+				else if (ss.ss_family == AF_INET6)
+					port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
+				if (!port)
+					error = EPROGUNAVAIL;
+			}
+			/* create sockaddr for MOUNT */
+			if (!error)
+				MALLOC(nsonfs->nso_saddr2, struct sockaddr *, ss.ss_len, M_SONAME, M_WAITOK|M_ZERO);
+			if (!error && !nsonfs->nso_saddr2)
+				error = ENOMEM;
+			if (!error)
+				bcopy(&ss, nsonfs->nso_saddr2, ss.ss_len);
+			if (error) {
+				lck_mtx_lock(&nsonfs->nso_lock);
+				nsonfs->nso_error = error;
+				nsonfs->nso_flags |= NSO_DEAD;
+				lck_mtx_unlock(&nsonfs->nso_lock);
+			}
 		}
+		nfs_socket_destroy(nso);
+		goto keepsearching;
 	}
 
-	/* set SO_NOADDRERR to detect network changes ASAP */
-	error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
+	/* nso is an NFS socket */
+	NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso));
+
+	/* If NFS version wasn't specified, it was determined during the connect. */
+	nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version;
+
+	/* Perform MOUNT call for initial NFSv2/v3 connection/mount. */
+	if ((nfsvers < NFS_VER4) && !(nmp->nm_sockflags & NMSOCK_HASCONNECTED) && !NM_OMATTR_GIVEN(nmp, FH)) {
+		error = 0;
+		saddr = nso->nso_saddr2;
+		if (!saddr) {
+			/* Need sockaddr for MOUNT port */
+			bcopy(nso->nso_saddr, &ss, nso->nso_saddr->sa_len);
+			port = nmp->nm_mountport;
+			if (ss.ss_family == AF_INET)
+				((struct sockaddr_in*)&ss)->sin_port = htons(port);
+			else if (ss.ss_family == AF_INET6)
+				((struct sockaddr_in6*)&ss)->sin6_port = htons(port);
+			if (!port) {
+				/* Get port/sockaddr for MOUNT version corresponding to NFS version. */
+				int mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
+				int mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nso->nso_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
+				error = nfs_portmap_lookup(nmp, vfs_context_current(), (struct sockaddr*)&ss,
+						NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
+				if (ss.ss_family == AF_INET)
+					port = ntohs(((struct sockaddr_in*)&ss)->sin_port);
+				else if (ss.ss_family == AF_INET6)
+					port = ntohs(((struct sockaddr_in6*)&ss)->sin6_port);
+			}
+			if (!error) {
+				if (port)
+					saddr = (struct sockaddr*)&ss;
+				else
+					error = EPROGUNAVAIL;
+			}
+		}
+		if (saddr)
+			MALLOC(fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
+		if (saddr && fh)
+			MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 
+		if (!saddr || !fh || !path) {
+			if (!error)
+				error = ENOMEM;
+			if (fh)
+				FREE(fh, M_TEMP);
+			if (path)
+				FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+			fh = NULL;
+			path = NULL;
+			nfs_socket_search_update_error(&nss, error);
+			nfs_socket_destroy(nso);
+			goto keepsearching;
+		}
+		nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1);
+		error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers,
+				path, vfs_context_current(), timeo, fh, &nmp->nm_servsec);
+		NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
+		if (!error) {
+			/* Make sure we can agree on a security flavor. */
+			int o, s;  /* indices into mount option and server security flavor lists */
+			int found = 0;
+
+			if ((nfsvers == NFS_VER3) && !nmp->nm_servsec.count) {
+				/* Some servers return an empty list to indicate RPCAUTH_SYS? */
+				nmp->nm_servsec.count = 1;
+				nmp->nm_servsec.flavors[0] = RPCAUTH_SYS;
+			}
+			if (nmp->nm_sec.count) {
+				/* Choose the first flavor in our list that the server supports. */
+				if (!nmp->nm_servsec.count) {
+					/* we don't know what the server supports, just use our first choice */
+					nmp->nm_auth = nmp->nm_sec.flavors[0];
+					found = 1;
+				}
+				for (o=0; !found && (o < nmp->nm_sec.count); o++)
+					for (s=0; !found && (s < nmp->nm_servsec.count); s++)
+						if (nmp->nm_sec.flavors[o] == nmp->nm_servsec.flavors[s]) {
+							nmp->nm_auth = nmp->nm_sec.flavors[o];
+							found = 1;
+						}
+			} else {
+				/* Choose the first one we support from the server's list. */
+				if (!nmp->nm_servsec.count) {
+					nmp->nm_auth = RPCAUTH_SYS;
+					found = 1;
+				}
+				for (s=0; s < nmp->nm_servsec.count; s++)
+					switch (nmp->nm_servsec.flavors[s]) {
+					case RPCAUTH_SYS:
+						/* prefer RPCAUTH_SYS to RPCAUTH_NONE */
+						if (found && (nmp->nm_auth == RPCAUTH_NONE))
+							found = 0;
+					case RPCAUTH_NONE:
+					case RPCAUTH_KRB5:
+					case RPCAUTH_KRB5I:
+					case RPCAUTH_KRB5P:
+						if (!found) {
+							nmp->nm_auth = nmp->nm_servsec.flavors[s];
+							found = 1;
+						}
+						break;
+					}
+			}
+			error = !found ? EAUTH : 0;
+		}
+		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+		path = NULL;
+		if (error) {
+			nfs_socket_search_update_error(&nss, error);
+			FREE(fh, M_TEMP);
+			fh = NULL;
+			nfs_socket_destroy(nso);
+			goto keepsearching;
+		}
+		if (nmp->nm_fh)
+			FREE(nmp->nm_fh, M_TEMP);
+		nmp->nm_fh = fh;
+		fh = NULL;
+		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
+	}
+
+	/* put the real upcall in place */
+	upcall = (nso->nso_sotype == SOCK_STREAM) ? nfs_tcp_rcv : nfs_udp_rcv;
+	error = sock_setupcall(nso->nso_so, upcall, nmp);
 	if (error) {
-		lck_mtx_unlock(&nmp->nm_lock);
-		goto bad;
+		nfs_socket_search_update_error(&nss, error);
+		nfs_socket_destroy(nso);
+		goto keepsearching;
 	}
-	/* just playin' it safe */
-	sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
-	if (!(nmp->nm_flag & NFSMNT_INT))
-		sock_nointerrupt(so, 1);
+	if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
+		/* set mntfromname to this location */
+		if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
+			nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location,
+				vfs_statfs(nmp->nm_mountp)->f_mntfromname,
+				sizeof(vfs_statfs(nmp->nm_mountp)->f_mntfromname), 0);
+		/* some negotiated values need to remain unchanged for the life of the mount */
+		if (!nmp->nm_sotype)
+			nmp->nm_sotype = nso->nso_sotype;
+		if (!nmp->nm_vers) {
+			nmp->nm_vers = nfsvers;
+			/* If we negotiated NFSv4, set nm_nfsport if we ended up on the standard NFS port */
+			if ((nfsvers >= NFS_VER4) && !NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT)) {
+				if (nso->nso_saddr->sa_family == AF_INET)
+					port = ((struct sockaddr_in*)nso->nso_saddr)->sin_port = htons(port);
+				else if (nso->nso_saddr->sa_family == AF_INET6)
+					port = ((struct sockaddr_in6*)nso->nso_saddr)->sin6_port = htons(port);
+				else
+					port = 0;
+				if (port == NFS_PORT)
+					nmp->nm_nfsport = NFS_PORT;
+			}
+		}
+		/* do some version-specific pre-mount set up */
+		if (nmp->nm_vers >= NFS_VER4) {
+			microtime(&now);
+			nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
+			if (!NMFLAG(nmp, NOCALLBACK))
+				nfs4_mount_callback_setup(nmp);
+		}
+	}
 
-	/* Initialize socket state variables */
+	/* Initialize NFS socket state variables */
+	lck_mtx_lock(&nmp->nm_lock);
 	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
 		nmp->nm_srtt[3] = (NFS_TIMEO << 3);
 	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
 		nmp->nm_sdrtt[3] = 0;
-	if (nmp->nm_sotype == SOCK_DGRAM) {
-		/* XXX do we really want to reset this on each reconnect? */
+	if (nso->nso_sotype == SOCK_DGRAM) {
 		nmp->nm_cwnd = NFS_MAXCWND / 2;	    /* Initial send window */
 		nmp->nm_sent = 0;
-	} else if (nmp->nm_sotype == SOCK_STREAM) {
-		nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
-		nmp->nm_fragleft = nmp->nm_reclen = 0;
+	} else if (nso->nso_sotype == SOCK_STREAM) {
 		nmp->nm_timeouts = 0;
 	}
 	nmp->nm_sockflags &= ~NMSOCK_CONNECTING;
 	nmp->nm_sockflags |= NMSOCK_SETUP;
-	FSDBG(529, nmp, nmp->nm_state, nmp->nm_flag, nmp->nm_cwnd);
+	/* move the socket to the mount structure */
+	nmp->nm_nso = nso;
+	oldsaddr = nmp->nm_saddr;
+	nmp->nm_saddr = nso->nso_saddr;
 	lck_mtx_unlock(&nmp->nm_lock);
 	error = nfs_connect_setup(nmp);
-bad:
 	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_sockflags &= ~(NMSOCK_CONNECTING|NMSOCK_SETUP);
+	nmp->nm_sockflags &= ~NMSOCK_SETUP;
 	if (!error) {
 		nmp->nm_sockflags |= NMSOCK_READY;
 		wakeup(&nmp->nm_sockflags);
 	}
+	if (error) {
+		NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error));
+		nfs_socket_search_update_error(&nss, error);
+		nmp->nm_saddr = oldsaddr;
+		if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
+			/* undo settings made prior to setup */
+			if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_SOCKET_TYPE))
+				nmp->nm_sotype = 0;
+			if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_VERSION)) {
+				if (nmp->nm_vers >= NFS_VER4) {
+					if (!NFS_BITMAP_ISSET(nmp->nm_mattrs, NFS_MATTR_NFS_PORT))
+						nmp->nm_nfsport = 0;
+					if (nmp->nm_cbid)
+						nfs4_mount_callback_shutdown(nmp);
+					if (IS_VALID_CRED(nmp->nm_mcred))
+						kauth_cred_unref(&nmp->nm_mcred);
+					bzero(&nmp->nm_un, sizeof(nmp->nm_un));
+				}
+				nmp->nm_vers = 0;
+			}
+		}
+		lck_mtx_unlock(&nmp->nm_lock);
+		nmp->nm_nso = NULL;
+		nfs_socket_destroy(nso);
+		goto keepsearching;
+	}
+
+	/* update current location */
+	if ((nmp->nm_locations.nl_current.nli_flags & NLI_VALID) &&
+	    (nmp->nm_locations.nl_current.nli_serv != nso->nso_location.nli_serv)) {
+		/* server has changed, we should initiate failover/recovery */
+		// XXX
+	}
+	nmp->nm_locations.nl_current = nso->nso_location;
+	nmp->nm_locations.nl_current.nli_flags |= NLI_VALID;
+
+	if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) {
+		/* We have now successfully connected... make a note of it. */
+		nmp->nm_sockflags |= NMSOCK_HASCONNECTED;
+	}
+
 	lck_mtx_unlock(&nmp->nm_lock);
-	return (error);
+	if (oldsaddr)
+		FREE(oldsaddr, M_SONAME);
+
+	if (nss.nss_flags & NSS_WARNED)
+		log(LOG_INFO, "nfs_connect: socket connect completed for %s\n",
+			vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+
+	nmp->nm_nss = NULL;
+	nfs_socket_search_cleanup(&nss);
+	if (fh)
+		FREE(fh, M_TEMP);
+	if (path)
+		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+	NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname));
+	return (0);
 }
 
+
 /* setup & confirm socket connection is functional */
 int
 nfs_connect_setup(struct nfsmount *nmp)
 {
-	struct nfsm_chain nmreq, nmrep;
-	int error = 0, status;
-	u_int64_t xid;
+	int error = 0;
 
 	if (nmp->nm_vers >= NFS_VER4) {
-		error = nfs4_setclientid(nmp);
-		if (error)
-			return (error);
-		error = nfs4_renew(nmp, R_SETUP);
-		if ((error == NFSERR_ADMIN_REVOKED) ||
-		    (error == NFSERR_EXPIRED) ||
-		    (error == NFSERR_LEASE_MOVED) ||
-		    (error == NFSERR_STALE_CLIENTID)) {
-			lck_mtx_lock(&nmp->nm_lock);
-			nmp->nm_state |= NFSSTA_RECOVER;
-			lck_mtx_unlock(&nmp->nm_lock);
+		if (nmp->nm_state & NFSSTA_CLIENTID) {
+			/* first, try to renew our current state */
+			error = nfs4_renew(nmp, R_SETUP);
+			if ((error == NFSERR_ADMIN_REVOKED) ||
+			    (error == NFSERR_CB_PATH_DOWN) ||
+			    (error == NFSERR_EXPIRED) ||
+			    (error == NFSERR_LEASE_MOVED) ||
+			    (error == NFSERR_STALE_CLIENTID)) {
+				lck_mtx_lock(&nmp->nm_lock);
+				nfs_need_recover(nmp, error);
+				lck_mtx_unlock(&nmp->nm_lock);
+			}
 		}
-	} else {
-		/* verify connection's OK by sending a NULL request */
-		nfsm_chain_null(&nmreq);
-		nfsm_chain_null(&nmrep);
-		nfsm_chain_build_alloc_init(error, &nmreq, 0);
-		nfsm_chain_build_done(error, &nmreq);
-		nfsmout_if(error);
-		error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL,
-				current_thread(), NULL, R_SETUP, &nmrep, &xid, &status);
-		if (!error)
-			error = status;
-nfsmout:
-		nfsm_chain_cleanup(&nmreq);
-		nfsm_chain_cleanup(&nmrep);
+		error = nfs4_setclientid(nmp);
 	}
 	return (error);
 }
@@ -422,10 +1504,10 @@ nfs_reconnect(struct nfsmount *nmp)
 
 	nfs_disconnect(nmp);
 
-	while ((error = nfs_connect(nmp, verbose))) {
+	while ((error = nfs_connect(nmp, verbose, 30))) {
 		verbose = 0;
 		nfs_disconnect(nmp);
-		if (error == EINTR || error == ERESTART)
+		if ((error == EINTR) || (error == ERESTART))
 			return (EINTR);
 		if (error == EIO)
 			return (EIO);
@@ -485,19 +1567,32 @@ nfs_reconnect(struct nfsmount *nmp)
 void
 nfs_disconnect(struct nfsmount *nmp)
 {
-	socket_t so;
+	struct nfs_socket *nso;
 
 	lck_mtx_lock(&nmp->nm_lock);
-	if ((nmp->nm_sotype == SOCK_STREAM) && nmp->nm_m) {
-		mbuf_freem(nmp->nm_m);
-		nmp->nm_m = nmp->nm_mlast = NULL;
-	}
-	if (nmp->nm_so) {
-		so = nmp->nm_so;
-		nmp->nm_so = NULL;
+tryagain:
+	if (nmp->nm_nso) {
+		struct timespec ts = { 1, 0 };
+		if (nmp->nm_state & NFSSTA_SENDING) { /* wait for sending to complete */
+			nmp->nm_state |= NFSSTA_WANTSND;
+			msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitsending", &ts);
+			goto tryagain;
+		}
+		if (nmp->nm_sockflags & NMSOCK_POKE) { /* wait for poking to complete */
+			msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
+			goto tryagain;
+		}
+		nmp->nm_sockflags |= NMSOCK_DISCONNECTING;
+		nmp->nm_sockflags &= ~NMSOCK_READY;
+		nso = nmp->nm_nso;
+		nmp->nm_nso = NULL;
+		if (nso->nso_saddr == nmp->nm_saddr)
+			nso->nso_saddr = NULL;
+		lck_mtx_unlock(&nmp->nm_lock);
+		nfs_socket_destroy(nso);
+		lck_mtx_lock(&nmp->nm_lock);
+		nmp->nm_sockflags &= ~NMSOCK_DISCONNECTING;
 		lck_mtx_unlock(&nmp->nm_lock);
-		sock_shutdown(so, SHUT_RDWR);
-		sock_close(so);
 	} else {
 		lck_mtx_unlock(&nmp->nm_lock);
 	}
@@ -536,6 +1631,7 @@ nfs_need_reconnect(struct nfsmount *nmp)
 	lck_mtx_unlock(nfs_request_mutex);
 }
 
+
 /*
  * thread to handle miscellaneous async NFS socket work (reconnects/resends)
  */
@@ -547,24 +1643,22 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
 	thread_t thd = current_thread();
 	struct nfsreq *req;
 	struct timeval now;
-	int error, dofinish, force;
+	int error, dofinish;
 	nfsnode_t np;
-	fhandle_t fh;
-	nfs_stateid dstateid;
 
 	lck_mtx_lock(&nmp->nm_lock);
 
 	while (!(nmp->nm_sockflags & NMSOCK_READY) ||
 	       !TAILQ_EMPTY(&nmp->nm_resendq) ||
+	       !LIST_EMPTY(&nmp->nm_monlist) ||
 	       nmp->nm_deadto_start ||
-	       ((nmp->nm_vers >= NFS_VER4) &&
-	       		((nmp->nm_state & NFSSTA_RECOVER) || !TAILQ_EMPTY(&nmp->nm_recallq))))
+	       (nmp->nm_state & NFSSTA_RECOVER) ||
+	       ((nmp->nm_vers >= NFS_VER4) && !TAILQ_EMPTY(&nmp->nm_dreturnq)))
 	{
 		if (nmp->nm_sockflags & NMSOCK_UNMOUNT)
 			break;
-		force = (nmp->nm_state & NFSSTA_FORCE);
 		/* do reconnect, if necessary */
-		if (!(nmp->nm_sockflags & NMSOCK_READY) && !force) {
+		if (!(nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_FORCE)) {
 			if (nmp->nm_reconnect_start <= 0) {
 				microuptime(&now);
 				nmp->nm_reconnect_start = now.tv_sec;
@@ -577,38 +1671,27 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
 		}
 		if ((nmp->nm_sockflags & NMSOCK_READY) &&
 		    (nmp->nm_state & NFSSTA_RECOVER) &&
-		    !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && !force) {
+		    !(nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
+		    !(nmp->nm_state & NFSSTA_FORCE)) {
 			/* perform state recovery */
 			lck_mtx_unlock(&nmp->nm_lock);
-			nfs4_recover(nmp);
+			nfs_recover(nmp);
 			lck_mtx_lock(&nmp->nm_lock);
 		}
-		/* handle NFSv4 delegation recalls */
-		while ((nmp->nm_vers >= NFS_VER4) && !force &&
+		/* handle NFSv4 delegation returns */
+		while ((nmp->nm_vers >= NFS_VER4) && !(nmp->nm_state & NFSSTA_FORCE) &&
 		       (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) &&
-		       ((np = TAILQ_FIRST(&nmp->nm_recallq)))) {
-			TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink);
-			np->n_dlink.tqe_next = NFSNOLIST;
+		       ((np = TAILQ_FIRST(&nmp->nm_dreturnq)))) {
 			lck_mtx_unlock(&nmp->nm_lock);
-			lck_mtx_lock(&np->n_openlock);
-			dstateid = np->n_dstateid;
-			if (np->n_openflags & N_DELEG_MASK) {
-				fh.fh_len = np->n_fhsize;
-				bcopy(np->n_fhp, &fh.fh_data, fh.fh_len);
-				np->n_openflags &= ~N_DELEG_MASK;
-				lck_mtx_unlock(&np->n_openlock);
-				nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, nmp->nm_mcred);
-			} else {
-				lck_mtx_unlock(&np->n_openlock);
-			}
+			nfs4_delegation_return(np, R_RECOVER, thd, nmp->nm_mcred);
 			lck_mtx_lock(&nmp->nm_lock);
 		}
 		/* do resends, if necessary/possible */
-		while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || force) &&
+		while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || (nmp->nm_state & NFSSTA_FORCE)) &&
 		       ((req = TAILQ_FIRST(&nmp->nm_resendq)))) {
 			if (req->r_resendtime)
 				microuptime(&now);
-			while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime))
+			while (req && !(nmp->nm_state & NFSSTA_FORCE) && req->r_resendtime && (now.tv_sec < req->r_resendtime))
 				req = TAILQ_NEXT(req, r_rchain);
 			if (!req)
 				break;
@@ -626,20 +1709,20 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
 				lck_mtx_lock(&nmp->nm_lock);
 				continue;
 			}
-			if ((req->r_flags & R_RESTART) || req->r_gss_ctx) {
+			if ((req->r_flags & R_RESTART) || nfs_request_using_gss(req)) {
 				req->r_flags &= ~R_RESTART;
 				req->r_resendtime = 0;
 				lck_mtx_unlock(&req->r_mtx);
 				/* async RPCs on GSS mounts need to be rebuilt and resent. */
 				nfs_reqdequeue(req);
-				if (req->r_gss_ctx) {
+				if (nfs_request_using_gss(req)) {
 					nfs_gss_clnt_rpcdone(req);
 					error = nfs_gss_clnt_args_restore(req);
 					if (error == ENEEDAUTH)
 						req->r_xid = 0;
 				}
 				NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n",
-					req->r_gss_ctx ? " gss" : "", req->r_procnum, req->r_xid,
+					nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid,
 					req->r_flags, req->r_rtt));
 				error = !req->r_nmp ? ENXIO : 0;	/* unmounted? */
 				if (!error)
@@ -693,20 +1776,45 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr)
 		}
 		if (nmp->nm_deadto_start)
 			nfs_mount_check_dead_timeout(nmp);
-		if (force || (nmp->nm_state & NFSSTA_DEAD))
+		if (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))
 			break;
-		if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & NFSSTA_RECOVER)) {
+		/* check monitored nodes, if necessary/possible */
+		if (!LIST_EMPTY(&nmp->nm_monlist)) {
+			nmp->nm_state |= NFSSTA_MONITOR_SCAN;
+			LIST_FOREACH(np, &nmp->nm_monlist, n_monlink) {
+				if (!(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
+					break;
+				np->n_mflag |= NMMONSCANINPROG;
+				lck_mtx_unlock(&nmp->nm_lock);
+				error = nfs_getattr(np, NULL, vfs_context_kernel(), (NGA_UNCACHED|NGA_MONITOR));
+				if (!error && ISSET(np->n_flag, NUPDATESIZE)) /* update quickly to avoid multiple events */
+					nfs_data_update_size(np, 0);
+				lck_mtx_lock(&nmp->nm_lock);
+				np->n_mflag &= ~NMMONSCANINPROG;
+				if (np->n_mflag & NMMONSCANWANT) {
+					np->n_mflag &= ~NMMONSCANWANT;
+					wakeup(&np->n_mflag);
+				}
+				if (error || !(nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING|NFSSTA_FORCE)))
+					break;
+			}
+			nmp->nm_state &= ~NFSSTA_MONITOR_SCAN;
+			if (nmp->nm_state & NFSSTA_UNMOUNTING)
+				wakeup(&nmp->nm_state); /* let unmounting thread know scan is done */
+		}
+		if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & (NFSSTA_RECOVER|NFSSTA_UNMOUNTING))) {
 			if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) ||
 			    (nmp->nm_state & NFSSTA_RECOVER))
 				ts.tv_sec = 1;
 			else
-				ts.tv_sec = 30;
+				ts.tv_sec = 5;
 			msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts);
 		}
 	}
 
 	/* If we're unmounting, send the unmount RPC, if requested/appropriate. */
-	if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && (nmp->nm_flag & NFSMNT_CALLUMNT) &&
+	if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) &&
+	    (nmp->nm_state & NFSSTA_MOUNTED) && NMFLAG(nmp, CALLUMNT) &&
 	    (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) {
 		lck_mtx_unlock(&nmp->nm_lock);
 		nfs3_umount_rpc(nmp, vfs_context_kernel(),
@@ -741,7 +1849,7 @@ nfs_mount_check_dead_timeout(struct nfsmount *nmp)
 {
 	struct timeval now;
 
-	if (!(nmp->nm_flag & NFSMNT_DEADTIMEOUT))
+	if (nmp->nm_deadtimeout <= 0)
 		return;
 	if (nmp->nm_deadto_start == 0)
 		return;
@@ -755,20 +1863,6 @@ nfs_mount_check_dead_timeout(struct nfsmount *nmp)
 	vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0);
 }
 
-/*
- * RPC record marker parsing state
- */
-struct nfs_rpc_record_state
-{
-	uint16_t	nrrs_lastfrag;		/* last fragment of record */
-	uint16_t	nrrs_markerleft;	/* marker bytes remaining */
-	uint32_t	nrrs_fragleft;		/* fragment bytes remaining */
-	uint32_t	nrrs_reclen;		/* length of RPC record */
-	mbuf_t		nrrs_m;			/* mbufs for current record */
-	mbuf_t		nrrs_mlast;
-};
-int nfs_rpc_record_read(socket_t, struct nfs_rpc_record_state *, int *, mbuf_t *);
-
 /*
  * NFS callback channel socket state
  */
@@ -776,7 +1870,7 @@ struct nfs_callback_socket
 {
 	TAILQ_ENTRY(nfs_callback_socket) ncbs_link;
 	socket_t			ncbs_so;	/* the socket */
-	struct sockaddr_in		ncbs_sin;	/* socket address */
+	struct sockaddr_storage		ncbs_saddr;	/* socket address */
 	struct nfs_rpc_record_state	ncbs_rrs;	/* RPC record parsing state */
 	time_t				ncbs_stamp;	/* last accessed at */
 	uint32_t			ncbs_flags;	/* see below */
@@ -795,7 +1889,9 @@ struct nfs_callback_socket
  * the requests up with mounts.
  */
 socket_t nfs4_cb_so = NULL;
+socket_t nfs4_cb_so6 = NULL;
 in_port_t nfs4_cb_port = 0;
+in_port_t nfs4_cb_port6 = 0;
 uint32_t nfs4_cb_id = 0;
 uint32_t nfs4_cb_so_usecount = 0;
 TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks;
@@ -813,9 +1909,12 @@ void
 nfs4_mount_callback_setup(struct nfsmount *nmp)
 {
 	struct sockaddr_in sin;
+	struct sockaddr_in6 sin6;
 	socket_t so = NULL;
+	socket_t so6 = NULL;
 	struct timeval timeo;
 	int error, on = 1;
+	in_port_t port;
 
 	lck_mtx_lock(nfs_global_mutex);
 	if (nfs4_cb_id == 0) {
@@ -834,32 +1933,34 @@ nfs4_mount_callback_setup(struct nfsmount *nmp)
 		return;
 	}
 
+	/* IPv4 */
 	error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so);
 	if (error) {
-		log(LOG_INFO, "nfs callback setup: error %d creating listening socket\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d creating listening IPv4 socket\n", error);
 		goto fail;
 	}
 	so = nfs4_cb_so;
 
+	sock_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
-	sin.sin_port = 0;
+	sin.sin_port = htons(nfs_callback_port); /* try to use specified port */
 	error = sock_bind(so, (struct sockaddr *)&sin);
 	if (error) {
-		log(LOG_INFO, "nfs callback setup: error %d binding listening socket\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d binding listening IPv4 socket\n", error);
 		goto fail;
 	}
 	error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len);
 	if (error) {
-		log(LOG_INFO, "nfs callback setup: error %d getting listening socket port\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d getting listening IPv4 socket port\n", error);
 		goto fail;
 	}
 	nfs4_cb_port = ntohs(sin.sin_port);
 
 	error = sock_listen(so, 32);
 	if (error) {
-		log(LOG_INFO, "nfs callback setup: error %d on listen\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d on IPv4 listen\n", error);
 		goto fail;
 	}
 
@@ -868,23 +1969,81 @@ nfs4_mount_callback_setup(struct nfsmount *nmp)
 	timeo.tv_sec = 60;
 	error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
 	if (error)
-		log(LOG_INFO, "nfs callback setup: error %d setting socket rx timeout\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket rx timeout\n", error);
 	error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
 	if (error)
-		log(LOG_INFO, "nfs callback setup: error %d setting socket tx timeout\n", error);
+		log(LOG_INFO, "nfs callback setup: error %d setting IPv4 socket tx timeout\n", error);
 	sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
 	sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
 	sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 	error = 0;
 
+	/* IPv6 */
+	error = sock_socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so6);
+	if (error) {
+		log(LOG_INFO, "nfs callback setup: error %d creating listening IPv6 socket\n", error);
+		goto fail;
+	}
+	so6 = nfs4_cb_so6;
+
+	sock_setsockopt(so6, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+	sock_setsockopt(so6, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on));
+	/* try to use specified port or same port as IPv4 */
+	port = nfs_callback_port ? nfs_callback_port : nfs4_cb_port;
+ipv6_bind_again:
+	sin6.sin6_len = sizeof(struct sockaddr_in6);
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_addr = in6addr_any;
+	sin6.sin6_port = htons(port);
+	error = sock_bind(so6, (struct sockaddr *)&sin6);
+	if (error) {
+		if (port != nfs_callback_port) {
+			/* if we simply tried to match the IPv4 port, then try any port */
+			port = 0;
+			goto ipv6_bind_again;
+		}
+		log(LOG_INFO, "nfs callback setup: error %d binding listening IPv6 socket\n", error);
+		goto fail;
+	}
+	error = sock_getsockname(so6, (struct sockaddr *)&sin6, sin6.sin6_len);
+	if (error) {
+		log(LOG_INFO, "nfs callback setup: error %d getting listening IPv6 socket port\n", error);
+		goto fail;
+	}
+	nfs4_cb_port6 = ntohs(sin6.sin6_port);
+
+	error = sock_listen(so6, 32);
+	if (error) {
+		log(LOG_INFO, "nfs callback setup: error %d on IPv6 listen\n", error);
+		goto fail;
+	}
+
+	/* receive timeout shouldn't matter.  If timeout on send, we'll want to drop the socket */
+	timeo.tv_usec = 0;
+	timeo.tv_sec = 60;
+	error = sock_setsockopt(so6, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+	if (error)
+		log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket rx timeout\n", error);
+	error = sock_setsockopt(so6, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+	if (error)
+		log(LOG_INFO, "nfs callback setup: error %d setting IPv6 socket tx timeout\n", error);
+	sock_setsockopt(so6, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+	sock_setsockopt(so6, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
+	sock_setsockopt(so6, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+	error = 0;
+
 fail:
 	if (error) {
-		nfs4_cb_so = NULL;
+		nfs4_cb_so = nfs4_cb_so6 = NULL;
 		lck_mtx_unlock(nfs_global_mutex);
 		if (so) {
 			sock_shutdown(so, SHUT_RDWR);
 			sock_close(so);
 		}
+		if (so6) {
+			sock_shutdown(so6, SHUT_RDWR);
+			sock_close(so6);
+		}
 	} else {
 		lck_mtx_unlock(nfs_global_mutex);
 	}
@@ -901,7 +2060,7 @@ void
 nfs4_mount_callback_shutdown(struct nfsmount *nmp)
 {
 	struct nfs_callback_socket *ncbsp;
-	socket_t so;
+	socket_t so, so6;
 	struct nfs4_cb_sock_list cb_socks;
 	struct timespec ts = {1,0};
 
@@ -910,12 +2069,14 @@ nfs4_mount_callback_shutdown(struct nfsmount *nmp)
 	/* wait for any callbacks in progress to complete */
 	while (nmp->nm_cbrefs)
 		msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts);
+	nmp->nm_cbid = 0;
 	if (--nfs4_cb_so_usecount) {
 		lck_mtx_unlock(nfs_global_mutex);
 		return;
 	}
 	so = nfs4_cb_so;
-	nfs4_cb_so = NULL;
+	so6 = nfs4_cb_so6;
+	nfs4_cb_so = nfs4_cb_so6 = NULL;
 	TAILQ_INIT(&cb_socks);
 	TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link);
 	lck_mtx_unlock(nfs_global_mutex);
@@ -923,10 +2084,15 @@ nfs4_mount_callback_shutdown(struct nfsmount *nmp)
 		sock_shutdown(so, SHUT_RDWR);
 		sock_close(so);
 	}
+	if (so6) {
+		sock_shutdown(so6, SHUT_RDWR);
+		sock_close(so6);
+	}
 	while ((ncbsp = TAILQ_FIRST(&cb_socks))) {
 		TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link);
 		sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
 		sock_close(ncbsp->ncbs_so);
+		nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
 		FREE(ncbsp, M_TEMP);
 	}
 }
@@ -958,6 +2124,7 @@ loop:
 		lck_mtx_unlock(nfs_global_mutex);
 		sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR);
 		sock_close(ncbsp->ncbs_so);
+		nfs_rpc_record_state_cleanup(&ncbsp->ncbs_rrs);
 		FREE(ncbsp, M_TEMP);
 		goto loop;
 	}
@@ -977,10 +2144,13 @@ nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
 	struct nfs_callback_socket *ncbsp;
 	struct nfsmount *nmp;
 	struct timeval timeo, now;
-	struct sockaddr_in *saddr;
-	int error, on = 1;
+	int error, on = 1, ip;
 
-	if (so != nfs4_cb_so)
+	if (so == nfs4_cb_so)
+		ip = 4;
+	else if (so == nfs4_cb_so6)
+		ip = 6;
+	else
 		return;
 
 	/* allocate/initialize a new nfs_callback_socket */
@@ -990,15 +2160,15 @@ nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
 		return;
 	}
 	bzero(ncbsp, sizeof(*ncbsp));
-	ncbsp->ncbs_sin.sin_len = sizeof(struct sockaddr_in);
-	ncbsp->ncbs_rrs.nrrs_markerleft = sizeof(ncbsp->ncbs_rrs.nrrs_fragleft);
+	ncbsp->ncbs_saddr.ss_len = (ip == 4) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
+	nfs_rpc_record_state_init(&ncbsp->ncbs_rrs);
 
 	/* accept a new socket */
-	error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_sin,
-			ncbsp->ncbs_sin.sin_len, MSG_DONTWAIT,
+	error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_saddr,
+			ncbsp->ncbs_saddr.ss_len, MSG_DONTWAIT,
 			nfs4_cb_rcv, ncbsp, &newso);
 	if (error) {
-		log(LOG_INFO, "nfs callback accept: error %d accepting socket\n", error);
+		log(LOG_INFO, "nfs callback accept: error %d accepting IPv%d socket\n", error, ip);
 		FREE(ncbsp, M_TEMP);
 		return;
 	}
@@ -1009,11 +2179,12 @@ nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
 	timeo.tv_sec = 60;
 	error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
 	if (error)
-		log(LOG_INFO, "nfs callback socket: error %d setting socket rx timeout\n", error);
+		log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket rx timeout\n", error, ip);
 	error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
 	if (error)
-		log(LOG_INFO, "nfs callback socket: error %d setting socket tx timeout\n", error);
+		log(LOG_INFO, "nfs callback socket: error %d setting IPv%d socket tx timeout\n", error, ip);
 	sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+	sock_setsockopt(newso, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
 	sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on));
 	sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
@@ -1028,11 +2199,10 @@ nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag)
 
 	/* verify it's from a host we have mounted */
 	TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) {
-		/* check socket's source address matches this mount's server address */
-		saddr = mbuf_data(nmp->nm_nam);
-		if ((ncbsp->ncbs_sin.sin_len == saddr->sin_len) &&
-		    (ncbsp->ncbs_sin.sin_family == saddr->sin_family) &&
-		    (ncbsp->ncbs_sin.sin_addr.s_addr == saddr->sin_addr.s_addr))
+		/* check if socket's source address matches this mount's server address */
+		if (!nmp->nm_saddr)
+			continue;
+		if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
 			break;
 	}
 	if (!nmp) /* we don't want this socket, mark it dead */
@@ -1077,7 +2247,7 @@ nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag)
 
 	/* loop while we make error-free progress */
 	while (!error && recv) {
-		error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, &recv, &m);
+		error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, MSG_DONTWAIT, &recv, &m);
 		if (m) /* handle the request */
 			error = nfs4_cb_handler(ncbsp, m);
 	}
@@ -1111,7 +2281,6 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 	socket_t so = ncbsp->ncbs_so;
 	struct nfsm_chain nmreq, nmrep;
 	mbuf_t mhead = NULL, mrest = NULL, m;
-	struct sockaddr_in *saddr;
 	struct msghdr msg;
 	struct nfsmount *nmp;
 	fhandle_t fh;
@@ -1203,12 +2372,10 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 			if (nmp->nm_cbid != cbid)
 				continue;
 			/* verify socket's source address matches this mount's server address */
-			saddr = mbuf_data(nmp->nm_nam);
-			if ((ncbsp->ncbs_sin.sin_len != saddr->sin_len) ||
-			    (ncbsp->ncbs_sin.sin_family != saddr->sin_family) ||
-			    (ncbsp->ncbs_sin.sin_addr.s_addr != saddr->sin_addr.s_addr))
+			if (!nmp->nm_saddr)
 				continue;
-			break;
+			if (nfs_sockaddr_cmp((struct sockaddr*)&ncbsp->ncbs_saddr, nmp->nm_saddr) == 0)
+				break;
 		}
 		/* mark the NFS mount as busy */
 		if (nmp)
@@ -1240,7 +2407,7 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 					numops = 0; /* don't process any more ops */
 				} else {
 					/* find the node for the file handle */
-					error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np);
+					error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
 					if (error || !np) {
 						status = NFSERR_BADHANDLE;
 						error = 0;
@@ -1301,7 +2468,7 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 					numops = 0; /* don't process any more ops */
 				} else {
 					/* find the node for the file handle */
-					error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np);
+					error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, RPCAUTH_UNKNOWN, NG_NOCREATE, &np);
 					if (error || !np) {
 						status = NFSERR_BADHANDLE;
 						error = 0;
@@ -1313,14 +2480,8 @@ nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq)
 						status = NFSERR_BAD_STATEID;
 						numops = 0; /* don't process any more ops */
 					}
-					if (!status) {
-						/* add node to recall queue, and wake socket thread */
-						lck_mtx_lock(&nmp->nm_lock);
-						if (np->n_dlink.tqe_next == NFSNOLIST)
-							TAILQ_INSERT_TAIL(&nmp->nm_recallq, np, n_dlink);
-						nfs_mount_sock_thread_wake(nmp);
-						lck_mtx_unlock(&nmp->nm_lock);
-					}
+					if (!status) /* add node to recall queue, and wake socket thread */
+						nfs4_delegation_return_enqueue(np);
 					if (np) {
 						nfs_node_unlock(np);
 						vnode_put(NFSTOV(np));
@@ -1456,6 +2617,28 @@ out:
 }
 
 
+/*
+ * Initialize an nfs_rpc_record_state structure.
+ */
+void
+nfs_rpc_record_state_init(struct nfs_rpc_record_state *nrrsp)
+{
+	bzero(nrrsp, sizeof(*nrrsp));
+	nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft);
+}
+
+/*
+ * Clean up an nfs_rpc_record_state structure.
+ */
+void
+nfs_rpc_record_state_cleanup(struct nfs_rpc_record_state *nrrsp)
+{
+	if (nrrsp->nrrs_m) {
+		mbuf_freem(nrrsp->nrrs_m);
+		nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL;
+	}
+}
+
 /*
  * Read the next (marked) RPC record from the socket.
  *
@@ -1463,7 +2646,7 @@ out:
  * *mp returns the next complete RPC record
  */
 int
-nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp, mbuf_t *mp)
+nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int flags, int *recvp, mbuf_t *mp)
 {
 	struct iovec aio;
 	struct msghdr msg;
@@ -1482,7 +2665,7 @@ nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp,
 		bzero(&msg, sizeof(msg));
 		msg.msg_iov = &aio;
 		msg.msg_iovlen = 1;
-		error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
+		error = sock_receive(so, &msg, flags, &rcvlen);
 		if (error || !rcvlen)
 			break;
 		*recvp = 1;
@@ -1497,10 +2680,7 @@ nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp,
 		}
 		nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft;
 		if (nrrsp->nrrs_reclen > NFS_MAXPACKET) {
-			/*
-			 * This is SERIOUS! We are out of sync with the sender
-			 * and forcing a disconnect/reconnect is all I can do.
-			 */
+			/* This is SERIOUS! We are out of sync with the sender. */
 			log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen);
 			error = EFBIG;
 		}
@@ -1510,7 +2690,7 @@ nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp,
 	while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) {
 		m = NULL;
 		rcvlen = nrrsp->nrrs_fragleft;
-		error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
+		error = sock_receivembuf(so, NULL, &m, flags, &rcvlen);
 		if (error || !rcvlen || !m)
 			break;
 		*recvp = 1;
@@ -1579,7 +2759,7 @@ int
 nfs_send(struct nfsreq *req, int wait)
 {
 	struct nfsmount *nmp;
-	socket_t so;
+	struct nfs_socket *nso;
 	int error, error2, sotype, rexmit, slpflag = 0, needrecon;
 	struct msghdr msg;
 	struct sockaddr *sendnam;
@@ -1597,7 +2777,7 @@ again:
 		return (error);
 	}
 
-	error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0);
+	error = nfs_sigintr(req->r_nmp, req, NULL, 0);
 	if (error) {
 		nfs_sndunlock(req);
 		lck_mtx_lock(&req->r_mtx);
@@ -1629,7 +2809,7 @@ again:
 	lck_mtx_lock(&nmp->nm_lock);
 	if (!(nmp->nm_sockflags & NMSOCK_READY) &&
 	    !((nmp->nm_sockflags & NMSOCK_SETUP) && (req->r_flags & R_SETUP))) {
-		if (nmp->nm_flag & NFSMNT_INT)
+		if (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR))
 			slpflag |= PCATCH;
 		lck_mtx_unlock(&nmp->nm_lock);
 		nfs_sndunlock(req);
@@ -1653,7 +2833,7 @@ again:
 				error = EIO;
 				break;
 			}
-			if ((nmp->nm_flag & NFSMNT_SOFT) && (nmp->nm_reconnect_start > 0)) {
+			if (NMFLAG(nmp, SOFT) && (nmp->nm_reconnect_start > 0)) {
 				struct timeval now;
 				microuptime(&now);
 				if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) {
@@ -1681,9 +2861,11 @@ again:
 		}
 		goto again;
 	}
-	so = nmp->nm_so;
+	nso = nmp->nm_nso;
+	/* note that we're using the mount's socket to do the send */
+	nmp->nm_state |= NFSSTA_SENDING;  /* will be cleared by nfs_sndunlock() */
 	lck_mtx_unlock(&nmp->nm_lock);
-	if (!so) {
+	if (!nso) {
 		nfs_sndunlock(req);
 		lck_mtx_lock(&req->r_mtx);
 		req->r_flags &= ~R_SENDING;
@@ -1700,7 +2882,7 @@ again:
 		lck_mtx_lock(&nmp->nm_lock);
 		if (!(req->r_flags & R_CWND) && (nmp->nm_sent >= nmp->nm_cwnd)) {
 			/* if we can't send this out yet, wait on the cwnd queue */
-			slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
+			slpflag = (NMFLAG(nmp, INTR) && req->r_thread) ? PCATCH : 0;
 			lck_mtx_unlock(&nmp->nm_lock);
 			nfs_sndunlock(req);
 			req->r_flags &= ~R_SENDING;
@@ -1764,13 +2946,11 @@ again:
 	}
 
 	bzero(&msg, sizeof(msg));
-	if (nmp->nm_nam && (sotype != SOCK_STREAM) && !sock_isconnected(so)) {
-		if ((sendnam = mbuf_data(nmp->nm_nam))) {
-			msg.msg_name = (caddr_t)sendnam;
-			msg.msg_namelen = sendnam->sa_len;
-		}
+	if ((sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so) && ((sendnam = nmp->nm_saddr))) {
+		msg.msg_name = (caddr_t)sendnam;
+		msg.msg_namelen = sendnam->sa_len;
 	}
-	error = sock_sendmbuf(so, &msg, mreqcopy, 0, &sentlen);
+	error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen);
 #ifdef NFS_SOCKET_DEBUGGING
 	if (error || (sentlen != req->r_mreqlen))
 		NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n",
@@ -1820,9 +3000,9 @@ again:
 		 * For now, ignore them all
 		 */
 		if ((error != EINTR) && (error != ERESTART) &&
-		    (error != EWOULDBLOCK) && (error != EIO)) {
+		    (error != EWOULDBLOCK) && (error != EIO) && (nso == nmp->nm_nso)) {
 			int clearerror = 0, optlen = sizeof(clearerror);
-			sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
+			sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen);
 #ifdef NFS_SOCKET_DEBUGGING
 			if (clearerror)
 				NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n",
@@ -1852,7 +3032,7 @@ again:
 		needrecon = 1;
 		break;
 	}
-	if (needrecon) { /* mark socket as needing reconnect */
+	if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */
 		NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error));
 		nfs_need_reconnect(nmp);
 	}
@@ -1902,20 +3082,19 @@ void
 nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag)
 {
 	struct nfsmount *nmp = arg;
+	struct nfs_socket *nso = nmp->nm_nso;
 	size_t rcvlen;
 	mbuf_t m;
 	int error = 0;
 
-	if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
-		wakeup(&nmp->nm_so);
-		return;
-	}
-
-	/* make sure we're on the current socket */
-	if (nmp->nm_so != so)
+	if (nmp->nm_sockflags & NMSOCK_CONNECTING)
 		return;
 
 	do {
+		/* make sure we're on the current socket */
+		if (!nso || (nso->nso_so != so))
+			return;
+
 		m = NULL;
 		rcvlen = 1000000;
 		error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
@@ -1935,123 +3114,54 @@ void
 nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag)
 {
 	struct nfsmount *nmp = arg;
-	struct iovec aio;
-	struct msghdr msg;
-	size_t rcvlen;
+	struct nfs_socket *nso = nmp->nm_nso;
+	struct nfs_rpc_record_state nrrs;
 	mbuf_t m;
 	int error = 0;
-	int recv;
+	int recv = 1;
 
-	if (nmp->nm_sockflags & NMSOCK_CONNECTING) {
-		wakeup(&nmp->nm_so);
+	if (nmp->nm_sockflags & NMSOCK_CONNECTING)
 		return;
-	}
 
 	/* make sure we're on the current socket */
-	if (nmp->nm_so != so)
-		return;
-
 	lck_mtx_lock(&nmp->nm_lock);
-	if (nmp->nm_sockflags & NMSOCK_UPCALL) {
-		/* upcall is already receiving data - just return */
+	nso = nmp->nm_nso;
+	if (!nso || (nso->nso_so != so) || (nmp->nm_sockflags & (NMSOCK_DISCONNECTING))) {
 		lck_mtx_unlock(&nmp->nm_lock);
 		return;
 	}
-	nmp->nm_sockflags |= NMSOCK_UPCALL;
-
-nextfrag:
-	recv = 0;
-
-	/* read the TCP RPC record marker */
-	while (!error && nmp->nm_markerleft) {
-		aio.iov_base = ((char*)&nmp->nm_fragleft +
-				sizeof(nmp->nm_fragleft) - nmp->nm_markerleft);
-		aio.iov_len = nmp->nm_markerleft;
-		bzero(&msg, sizeof(msg));
-		msg.msg_iov = &aio;
-		msg.msg_iovlen = 1;
-		lck_mtx_unlock(&nmp->nm_lock);
-		error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen);
-		lck_mtx_lock(&nmp->nm_lock);
-		if (error || !rcvlen)
-			break;
-		recv = 1;
-		nmp->nm_markerleft -= rcvlen;
-		if (nmp->nm_markerleft)
-			continue;
-		/* record marker complete */
-		nmp->nm_fragleft = ntohl(nmp->nm_fragleft);
-		if (nmp->nm_fragleft & 0x80000000) {
-			nmp->nm_sockflags |= NMSOCK_LASTFRAG;
-			nmp->nm_fragleft &= ~0x80000000;
-		}
-		nmp->nm_reclen += nmp->nm_fragleft;
-		if (nmp->nm_reclen > NFS_MAXPACKET) {
-			/*
-			 * This is SERIOUS! We are out of sync with the sender
-			 * and forcing a disconnect/reconnect is all I can do.
-			 */
-			log(LOG_ERR, "%s (%d) from nfs server %s\n",
-				"impossible RPC record length", nmp->nm_reclen,
-				vfs_statfs(nmp->nm_mountp)->f_mntfromname);
-			error = EFBIG;
-		}
-	}
+	lck_mtx_unlock(&nmp->nm_lock);
 
-	/* read the TCP RPC record fragment */
-	while (!error && !nmp->nm_markerleft && nmp->nm_fragleft) {
-		m = NULL;
-		rcvlen = nmp->nm_fragleft;
-		lck_mtx_unlock(&nmp->nm_lock);
-		error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen);
-		lck_mtx_lock(&nmp->nm_lock);
-		if (error || !rcvlen || !m)
-			break;
-		recv = 1;
-		/* append mbufs to list */
-		nmp->nm_fragleft -= rcvlen;
-		if (!nmp->nm_m) {
-			nmp->nm_m = m;
-		} else {
-			error = mbuf_setnext(nmp->nm_mlast, m);
-			if (error) {
-				printf("nfs_tcp_rcv: mbuf_setnext failed %d\n", error);
-				mbuf_freem(m);
-				break;
-			}
-		}
-		while (mbuf_next(m))
-			m = mbuf_next(m);
-		nmp->nm_mlast = m;
+	/* make sure this upcall should be trying to do work */
+	lck_mtx_lock(&nso->nso_lock);
+	if (nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) {
+		lck_mtx_unlock(&nso->nso_lock);
+		return;
 	}
+	nso->nso_flags |= NSO_UPCALL;
+	nrrs = nso->nso_rrs;
+	lck_mtx_unlock(&nso->nso_lock);
 
-	/* done reading fragment? */
-	m = NULL;
-	if (!error && !nmp->nm_markerleft && !nmp->nm_fragleft) {
-		/* reset socket fragment parsing state */
-		nmp->nm_markerleft = sizeof(nmp->nm_fragleft);
-		if (nmp->nm_sockflags & NMSOCK_LASTFRAG) {
-			/* RPC record complete */
-			m = nmp->nm_m;
-			/* reset socket record parsing state */
-			nmp->nm_reclen = 0;
-			nmp->nm_m = nmp->nm_mlast = NULL;
-			nmp->nm_sockflags &= ~NMSOCK_LASTFRAG;
-		}
+	/* loop while we make error-free progress */
+	while (!error && recv) {
+		error = nfs_rpc_record_read(so, &nrrs, MSG_DONTWAIT, &recv, &m);
+		if (m) /* match completed response with request */
+			nfs_request_match_reply(nmp, m);
 	}
 
-	if (m) { /* match completed response with request */
+	lck_mtx_lock(&nmp->nm_lock);
+	if (nmp->nm_nso == nso) {
+		/* still the same socket, so update socket's RPC parsing state */
+		lck_mtx_unlock(&nmp->nm_lock);
+		lck_mtx_lock(&nso->nso_lock);
+		nso->nso_rrs = nrrs;
+		nso->nso_flags &= ~NSO_UPCALL;
+		lck_mtx_unlock(&nso->nso_lock);
+		if (nmp->nm_sockflags & NMSOCK_DISCONNECTING)
+			wakeup(&nmp->nm_sockflags);
+	} else {
 		lck_mtx_unlock(&nmp->nm_lock);
-		nfs_request_match_reply(nmp, m);
-		lck_mtx_lock(&nmp->nm_lock);
 	}
-
-	/* loop if we've been making error-free progress */
-	if (!error && recv)
-		goto nextfrag;
-
-	nmp->nm_sockflags &= ~NMSOCK_UPCALL;
-	lck_mtx_unlock(&nmp->nm_lock);
 #ifdef NFS_SOCKET_DEBUGGING
 	if (!recv && (error != EWOULDBLOCK))
 		NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error));
@@ -2077,7 +3187,8 @@ nfs_sock_poke(struct nfsmount *nmp)
 	int dummy;
 
 	lck_mtx_lock(&nmp->nm_lock);
-	if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) || !nmp->nm_so) {
+	if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) ||
+	    !(nmp->nm_sockflags & NMSOCK_READY) || !nmp->nm_nso || !nmp->nm_nso->nso_so) {
 		lck_mtx_unlock(&nmp->nm_lock);
 		return;
 	}
@@ -2088,7 +3199,7 @@ nfs_sock_poke(struct nfsmount *nmp)
 	bzero(&msg, sizeof(msg));
 	msg.msg_iov = &aio;
 	msg.msg_iovlen = 1;
-	error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len);
+	error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len);
 	NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error));
 }
 
@@ -2183,7 +3294,7 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep)
 		/* signal anyone waiting on this request */
 		wakeup(req);
 		asyncioq = (req->r_callback.rcb_func != NULL);
-		if (req->r_gss_ctx != NULL)
+		if (nfs_request_using_gss(req))
 			nfs_gss_clnt_rpcdone(req);
 		lck_mtx_unlock(&req->r_mtx);
 		lck_mtx_unlock(nfs_request_mutex);
@@ -2209,16 +3320,16 @@ int
 nfs_wait_reply(struct nfsreq *req)
 {
 	struct timespec ts = { 2, 0 };
-	int error = 0, slpflag;
+	int error = 0, slpflag, first = 1;
 
-	if (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread)
+	if (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
 		slpflag = PCATCH;
 	else
 		slpflag = 0;
 
 	lck_mtx_lock(&req->r_mtx);
 	while (!req->r_nmrep.nmc_mhead) {
-		if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
+		if ((error = nfs_sigintr(req->r_nmp, req, first ? NULL : req->r_thread, 0)))
 			break;
 		if (((error = req->r_error)) || req->r_nmrep.nmc_mhead)
 			break;
@@ -2228,9 +3339,9 @@ nfs_wait_reply(struct nfsreq *req)
 				req->r_procnum, req->r_xid, req->r_flags, req->r_rtt));
 			req->r_flags |= R_SENDING;
 			lck_mtx_unlock(&req->r_mtx);
-			if (req->r_gss_ctx) {
+			if (nfs_request_using_gss(req)) {
 				/*
-				 * It's an RPCSEC_GSS mount.
+				 * It's an RPCSEC_GSS request.
 				 * Can't just resend the original request
 				 * without bumping the cred sequence number.
 				 * Go back and re-build the request.
@@ -2253,7 +3364,7 @@ nfs_wait_reply(struct nfsreq *req)
 		if (nfs_noremotehang(req->r_thread))
 			ts.tv_sec = 1;
 		msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitreply", &ts);
-		slpflag = 0;
+		first = slpflag = 0;
 	}
 	lck_mtx_unlock(&req->r_mtx);
 
@@ -2340,6 +3451,8 @@ nfs_request_create(
 	req->r_nmp = nmp;
 	req->r_np = np;
 	req->r_thread = thd;
+	if (!thd)
+		req->r_flags |= R_NOINTR;
 	if (IS_VALID_CRED(cred)) {
 		kauth_cred_ref(cred);
 		req->r_cred = cred;
@@ -2353,6 +3466,14 @@ nfs_request_create(
 	req->r_rchain.tqe_next = NFSREQNOLIST;
 	req->r_cchain.tqe_next = NFSREQNOLIST;
 
+	/* set auth flavor to use for request */
+	if (!req->r_cred)
+		req->r_auth = RPCAUTH_NONE;
+	else if (req->r_np && (req->r_np->n_auth != RPCAUTH_INVALID))
+		req->r_auth = req->r_np->n_auth;
+	else
+		req->r_auth = nmp->nm_auth;
+
 	lck_mtx_unlock(&nmp->nm_lock);
 
 	/* move the request mbuf chain to the nfsreq */
@@ -2394,6 +3515,18 @@ nfs_request_destroy(struct nfsreq *req)
 	lck_mtx_lock(&req->r_mtx);
 	if (nmp) {
 		lck_mtx_lock(&nmp->nm_lock);
+		if (req->r_flags & R_CWND) {
+			/* Decrement the outstanding request count.  */
+			req->r_flags &= ~R_CWND;
+			nmp->nm_sent -= NFS_CWNDSCALE;
+			if ((nmp->nm_sent < nmp->nm_cwnd) && !TAILQ_EMPTY(&nmp->nm_cwndq)) {
+				/* congestion window is open, poke the cwnd queue */
+				struct nfsreq *req2 = TAILQ_FIRST(&nmp->nm_cwndq);
+				TAILQ_REMOVE(&nmp->nm_cwndq, req2, r_cchain);
+				req2->r_cchain.tqe_next = NFSREQNOLIST;
+				wakeup(req2);
+			}
+		}
 		if (req->r_rchain.tqe_next != NFSREQNOLIST) {
 			TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
 			req->r_rchain.tqe_next = NFSREQNOLIST;
@@ -2424,12 +3557,14 @@ nfs_request_destroy(struct nfsreq *req)
 		mbuf_freem(req->r_nmrep.nmc_mhead);
 	if (IS_VALID_CRED(req->r_cred))
 		kauth_cred_unref(&req->r_cred);
-	if (req->r_gss_ctx)
+	if (nfs_request_using_gss(req))
 		nfs_gss_clnt_rpcdone(req);
 	SLIST_FOREACH_SAFE(gsp, &req->r_gss_seqlist, gss_seqnext, ngsp)
 		FREE(gsp, M_TEMP);
 	if (req->r_gss_ctx)
 		nfs_gss_clnt_ctx_unref(req);
+	if (req->r_wrongsec)
+		FREE(req->r_wrongsec, M_TEMP);
 
 	lck_mtx_destroy(&req->r_mtx, nfs_request_grp);
 	if (req->r_flags & R_ALLOCATED)
@@ -2471,7 +3606,7 @@ int
 nfs_request_add_header(struct nfsreq *req)
 {
 	struct nfsmount *nmp;
-	int error = 0, auth_len = 0;
+	int error = 0;
 	mbuf_t m;
 
 	/* free up any previous header */
@@ -2485,24 +3620,7 @@ nfs_request_add_header(struct nfsreq *req)
 	if (!nmp)
 		return (ENXIO);
 
-	if (!req->r_cred) /* RPCAUTH_NULL */
-		auth_len = 0;
-	else switch (nmp->nm_auth) {
-		case RPCAUTH_UNIX:
-			if (req->r_cred->cr_ngroups < 1)
-				return (EINVAL);
-			auth_len = ((((req->r_cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
-				nmp->nm_numgrps : (req->r_cred->cr_ngroups - 1)) << 2) +
-				5 * NFSX_UNSIGNED;
-			break;
-		case RPCAUTH_KRB5:
-		case RPCAUTH_KRB5I:
-		case RPCAUTH_KRB5P:
-			auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
-			break;
-		}
-
-	error = nfsm_rpchead(req, auth_len, req->r_mrest, &req->r_xid, &req->r_mhead);
+	error = nfsm_rpchead(req, req->r_mrest, &req->r_xid, &req->r_mhead);
 	if (error)
 		return (error);
 
@@ -2511,7 +3629,7 @@ nfs_request_add_header(struct nfsreq *req)
 	if (!nmp)
 		return (ENXIO);
 	lck_mtx_lock(&nmp->nm_lock);
-	if (nmp->nm_flag & NFSMNT_SOFT)
+	if (NMFLAG(nmp, SOFT))
 		req->r_retry = nmp->nm_retry;
 	else
 		req->r_retry = NFS_MAXREXMIT + 1;	/* past clip limit */
@@ -2598,7 +3716,7 @@ nfs_request_finish(
 	uint32_t auth_status = 0;
 	uint32_t accepted_status = 0;
 	struct nfsm_chain nmrep;
-	int error, auth, clearjbtimeo;
+	int error, clearjbtimeo;
 
 	error = req->r_error;
 
@@ -2612,10 +3730,10 @@ nfs_request_finish(
 
 	nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp;
 
-	/*
-	 * Decrement the outstanding request count.
-	 */
 	if ((req->r_flags & R_CWND) && nmp) {
+		/*
+		 * Decrement the outstanding request count.
+		 */
 		req->r_flags &= ~R_CWND;
 		lck_mtx_lock(&nmp->nm_lock);
 		FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd);
@@ -2630,9 +3748,9 @@ nfs_request_finish(
 		lck_mtx_unlock(&nmp->nm_lock);
 	}
 
-	if (req->r_gss_ctx) {	// Using gss cred ?
+	if (nfs_request_using_gss(req)) {
 		/*
-		 * If the request had an RPCSEC_GSS credential
+		 * If the request used an RPCSEC_GSS credential
 		 * then reset its sequence number bit in the
 		 * request window.
 		 */
@@ -2665,7 +3783,7 @@ nfs_request_finish(
 	 */
 	if (!error) {
 		if ((req->r_flags & R_TPRINTFMSG) ||
-		    (nmp && (nmp->nm_flag & NFSMNT_SOFT) &&
+		    (nmp && NMFLAG(nmp, SOFT) &&
 		     ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_FORCE)) == NFSSTA_TIMEO)))
 			nfs_up(nmp, req->r_thread, NFSSTA_TIMEO, "is alive again");
 		else
@@ -2725,11 +3843,10 @@ nfs_request_finish(
 	nfsm_chain_get_32(error, &nmrep, verf_len);  // verifier length
 	nfsmout_if(error);
 
-	auth = !req->r_cred ? RPCAUTH_NULL : nmp->nm_auth;
-	switch (auth) {
-	case RPCAUTH_NULL:
-	case RPCAUTH_UNIX:
-		/* Any AUTH_UNIX verifier is ignored */
+	switch (req->r_auth) {
+	case RPCAUTH_NONE:
+	case RPCAUTH_SYS:
+		/* Any AUTH_SYS verifier is ignored */
 		if (verf_len > 0)
 			nfsm_chain_adv(error, &nmrep, nfsm_rndup(verf_len));
 		nfsm_chain_get_32(error, &nmrep, accepted_status);
@@ -2760,7 +3877,7 @@ nfs_request_finish(
 			/*
 			 * It's a JUKEBOX error - delay and try again
 			 */
-			int delay, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
+			int delay, slpflag = (NMFLAG(nmp, INTR) && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
 
 			mbuf_freem(mrep);
 			req->r_nmrep.nmc_mhead = NULL;
@@ -2785,7 +3902,7 @@ nfs_request_finish(
 				nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO,
 					"resource temporarily unavailable (jukebox)");
 			}
-			if ((nmp->nm_flag & NFSMNT_SOFT) && (req->r_delay == 30)) {
+			if (NMFLAG(nmp, SOFT) && (req->r_delay == 30) && !(req->r_flags & R_NOINTR)) {
 				/* for soft mounts, just give up after a short while */
 				OSAddAtomic(1, &nfsstats.rpctimeouts);
 				nfs_softterm(req);
@@ -2802,6 +3919,7 @@ nfs_request_finish(
 					if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
 						goto nfsmout;
 					tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0);
+					slpflag = 0;
 				} while (--delay > 0);
 			}
 			req->r_xid = 0;			// get a new XID
@@ -2820,6 +3938,96 @@ nfs_request_finish(
 			nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again");
 		}
 
+		if ((nmp->nm_vers >= NFS_VER4) && (*status == NFSERR_WRONGSEC)) {
+			/*
+			 * Hmmm... we need to try a different security flavor.
+			 * The first time a request hits this, we will allocate an array
+			 * to track flavors to try.  We fill the array with the mount's
+			 * preferred flavors or the server's preferred flavors or just the
+			 * flavors we support.
+			 */
+			uint32_t srvflavors[NX_MAX_SEC_FLAVORS];
+			int srvcount, i, j;
+
+			/* Call SECINFO to try to get list of flavors from server. */
+			srvcount = NX_MAX_SEC_FLAVORS;
+			nfs4_secinfo_rpc(nmp, &req->r_secinfo, req->r_cred, srvflavors, &srvcount);
+
+			if (!req->r_wrongsec) {
+				/* first time... set up flavor array */
+				MALLOC(req->r_wrongsec, uint32_t*, NX_MAX_SEC_FLAVORS*sizeof(uint32_t), M_TEMP, M_WAITOK);
+				if (!req->r_wrongsec) {
+					error = EACCES;
+					goto nfsmout;
+				}
+				i=0;
+				if (nmp->nm_sec.count) { /* use the mount's preferred list of flavors */
+					for(; i < nmp->nm_sec.count; i++)
+						req->r_wrongsec[i] = nmp->nm_sec.flavors[i];
+				} else if (srvcount) { /* otherwise use the server's list of flavors */
+					for(; i < srvcount; i++)
+						req->r_wrongsec[i] = srvflavors[i];
+				} else { /* otherwise, just try the flavors we support. */
+					req->r_wrongsec[i++] = RPCAUTH_KRB5P;
+					req->r_wrongsec[i++] = RPCAUTH_KRB5I;
+					req->r_wrongsec[i++] = RPCAUTH_KRB5;
+					req->r_wrongsec[i++] = RPCAUTH_SYS;
+					req->r_wrongsec[i++] = RPCAUTH_NONE;
+				}
+				for(; i < NX_MAX_SEC_FLAVORS; i++) /* invalidate any remaining slots */
+					req->r_wrongsec[i] = RPCAUTH_INVALID;
+			}
+
+			/* clear the current flavor from the list */
+			for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
+				if (req->r_wrongsec[i] == req->r_auth)
+					req->r_wrongsec[i] = RPCAUTH_INVALID;
+
+			/* find the next flavor to try */
+			for(i=0; i < NX_MAX_SEC_FLAVORS; i++)
+				if (req->r_wrongsec[i] != RPCAUTH_INVALID) {
+					if (((req->r_wrongsec[i] == RPCAUTH_KRB5P) ||
+					     (req->r_wrongsec[i] == RPCAUTH_KRB5I) ||
+					     (req->r_wrongsec[i] == RPCAUTH_KRB5)) && (req->r_gss_ctx &&
+					    (req->r_gss_ctx->gss_clnt_service == RPCSEC_GSS_SVC_SYS))) {
+						/* don't bother trying Kerberos if we've already got a fallback context */
+						req->r_wrongsec[i] = RPCAUTH_INVALID;
+						continue;
+					}
+					if (!srvcount) /* no server list, just try it */
+						break;
+					/* check that it's in the server's list */
+					for(j=0; j < srvcount; j++)
+						if (req->r_wrongsec[i] == srvflavors[j])
+							break;
+					if (j < srvcount) /* found */
+						break;
+					/* not found in server list */
+					req->r_wrongsec[i] = RPCAUTH_INVALID;
+				}
+			if (i == NX_MAX_SEC_FLAVORS) {
+				/* nothing left to try! */
+				error = EACCES;
+				goto nfsmout;
+			}
+
+			/* retry with the next auth flavor */
+			req->r_auth = req->r_wrongsec[i];
+			req->r_xid = 0;			// get a new XID
+			req->r_flags |= R_RESTART;
+			req->r_start = 0;
+			FSDBG(273, R_XID32(req->r_xid), nmp, req, NFSERR_WRONGSEC);
+			return (0);
+		}
+		if ((nmp->nm_vers >= NFS_VER4) && req->r_wrongsec) {
+			/*
+			 * We renegotiated security for this request; so update the
+			 * default security flavor for the associated node.
+			 */
+			if (req->r_np)
+				req->r_np->n_auth = req->r_auth;
+		}
+
 		if (*status == NFS_OK) {
 			/*
 			 * Successful NFS request
@@ -2834,8 +4042,12 @@ nfs_request_finish(
 		 * If the File Handle was stale, invalidate the
 		 * lookup cache, just in case.
 		 */
-		if ((*status == ESTALE) && req->r_np)
+		if ((*status == ESTALE) && req->r_np) {
 			cache_purge(NFSTOV(req->r_np));
+			/* if monitored, also send delete event */
+			if (vnode_ismonitored(NFSTOV(req->r_np)))
+				nfs_vnode_notify(req->r_np, (VNODE_EVENT_ATTRIB|VNODE_EVENT_DELETE));
+		}
 		if (nmp->nm_vers == NFS_VER2)
 			mbuf_freem(mrep);
 		else
@@ -2875,6 +4087,22 @@ nfsmout:
 	return (error);
 }
 
+/*
+ * NFS request using a GSS/Kerberos security flavor?
+ */
+int
+nfs_request_using_gss(struct nfsreq *req)
+{
+	if (!req->r_gss_ctx)
+		return (0);
+	switch (req->r_auth) {
+		case RPCAUTH_KRB5:
+		case RPCAUTH_KRB5I:
+		case RPCAUTH_KRB5P:
+			return (1);
+	}
+	return (0);
+}
 
 /*
  * Perform an NFS request synchronously.
@@ -2887,13 +4115,14 @@ nfs_request(
 	struct nfsm_chain *nmrest,
 	int procnum,
 	vfs_context_t ctx,
+	struct nfsreq_secinfo_args *si,
 	struct nfsm_chain *nmrepp,
 	u_int64_t *xidp,
 	int *status)
 {
 	return nfs_request2(np, mp, nmrest, procnum,
 		vfs_context_thread(ctx), vfs_context_ucred(ctx),
-		0, nmrepp, xidp, status);
+		si, 0, nmrepp, xidp, status);
 }
 
 int
@@ -2904,6 +4133,7 @@ nfs_request2(
 	int procnum,
 	thread_t thd,
 	kauth_cred_t cred,
+	struct nfsreq_secinfo_args *si,
 	int flags,
 	struct nfsm_chain *nmrepp,
 	u_int64_t *xidp,
@@ -2915,6 +4145,8 @@ nfs_request2(
 	if ((error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, &req)))
 		return (error);
 	req->r_flags |= (flags & R_OPTMASK);
+	if (si)
+		req->r_secinfo = *si;
 
 	FSDBG_TOP(273, R_XID32(req->r_xid), np, procnum, 0);
 	do {
@@ -2998,10 +4230,13 @@ nfs_request_async(
 	int procnum,
 	thread_t thd,
 	kauth_cred_t cred,
+	struct nfsreq_secinfo_args *si,
+	int flags,
 	struct nfsreq_cbinfo *cb,
 	struct nfsreq **reqp)
 {
 	struct nfsreq *req;
+	struct nfsmount *nmp;
 	int error, sent;
 
 	error = nfs_request_create(np, mp, nmrest, procnum, thd, cred, reqp);
@@ -3009,7 +4244,10 @@ nfs_request_async(
 	FSDBG(274, (req ? R_XID32(req->r_xid) : 0), np, procnum, error);
 	if (error)
 		return (error);
+	req->r_flags |= (flags & R_OPTMASK);
 	req->r_flags |= R_ASYNC;
+	if (si)
+		req->r_secinfo = *si;
 	if (cb)
 		req->r_callback = *cb;
 	error = nfs_request_add_header(req);
@@ -3021,9 +4259,32 @@ nfs_request_async(
 		lck_mtx_lock(&req->r_mtx);
 		if (!error && !(req->r_flags & R_SENT) && req->r_callback.rcb_func) {
 			/* make sure to wait until this async I/O request gets sent */
-			int slpflag = (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0;
+			int slpflag = (req->r_nmp && NMFLAG(req->r_nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR)) ? PCATCH : 0;
 			struct timespec ts = { 2, 0 };
 			while (!(req->r_flags & R_SENT)) {
+				if ((req->r_flags & R_RESENDQ) && ((nmp = req->r_nmp))) {
+					lck_mtx_lock(&nmp->nm_lock);
+					if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
+						/*
+						 * It's not going to get off the resend queue if we're in recovery.
+						 * So, just take it off ourselves.  We could be holding mount state
+						 * busy and thus holding up the start of recovery.
+						 */
+						TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
+						req->r_rchain.tqe_next = NFSREQNOLIST;
+						if (req->r_flags & R_RESENDQ)
+							req->r_flags &= ~R_RESENDQ;
+						lck_mtx_unlock(&nmp->nm_lock);
+						req->r_flags |= R_SENDING;
+						lck_mtx_unlock(&req->r_mtx);
+						error = nfs_send(req, 1);
+						lck_mtx_lock(&req->r_mtx);
+						if (error)
+							break;
+						continue;
+					}
+					lck_mtx_unlock(&nmp->nm_lock);
+				}
 				if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
 					break;
 				msleep(req, &req->r_mtx, slpflag | (PZERO - 1), "nfswaitsent", &ts);
@@ -3052,12 +4313,30 @@ nfs_request_async_finish(
 	int *status)
 {
 	int error = 0, asyncio = req->r_callback.rcb_func ? 1 : 0;
+	struct nfsmount *nmp;
 
 	lck_mtx_lock(&req->r_mtx);
 	if (!asyncio)
 		req->r_flags |= R_ASYNCWAIT;
 	while (req->r_flags & R_RESENDQ) {  /* wait until the request is off the resend queue */
 		struct timespec ts = { 2, 0 };
+		if ((nmp = req->r_nmp)) {
+			lck_mtx_lock(&nmp->nm_lock);
+			if ((nmp->nm_state & NFSSTA_RECOVER) && (req->r_rchain.tqe_next != NFSREQNOLIST)) {
+				/*
+				 * It's not going to get off the resend queue if we're in recovery.
+				 * So, just take it off ourselves.  We could be holding mount state
+				 * busy and thus holding up the start of recovery.
+				 */
+				TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain);
+				req->r_rchain.tqe_next = NFSREQNOLIST;
+				if (req->r_flags & R_RESENDQ)
+					req->r_flags &= ~R_RESENDQ;
+				lck_mtx_unlock(&nmp->nm_lock);
+				break;
+			}
+			lck_mtx_unlock(&nmp->nm_lock);
+		}
 		if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0)))
 			break;
 		msleep(req, &req->r_mtx, PZERO-1, "nfsresendqwait", &ts);
@@ -3270,7 +4549,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 		 * Put a reasonable limit on the maximum timeout,
 		 * and reduce that limit when soft mounts get timeouts or are in reconnect.
 		 */
-		if (!(nmp->nm_flag & NFSMNT_SOFT))
+		if (!NMFLAG(nmp, SOFT))
 			maxtime = NFS_MAXTIMEO;
 		else if ((req->r_flags & (R_SETUP|R_RECOVER)) ||
 		         ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8)))
@@ -3290,7 +4569,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 			} else {
 				if (req->r_procnum == NFSPROC_NULL && req->r_gss_ctx != NULL)
 					timeo = NFS_MINIDEMTIMEO; // gss context setup
-				else if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+				else if (NMFLAG(nmp, DUMBTIMER))
 					timeo = nmp->nm_timeo;
 				else
 					timeo = NFS_RTO(nmp, proct[req->r_procnum]);
@@ -3320,7 +4599,8 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 			/* if it's been a few seconds, try poking the socket */
 			if ((nmp->nm_sotype == SOCK_STREAM) &&
 			    ((now.tv_sec - req->r_start) >= 3) &&
-			    !(nmp->nm_sockflags & NMSOCK_POKE)) {
+			    !(nmp->nm_sockflags & (NMSOCK_POKE|NMSOCK_UNMOUNT)) &&
+			    (nmp->nm_sockflags & NMSOCK_READY)) {
 				nmp->nm_sockflags |= NMSOCK_POKE;
 				TAILQ_INSERT_TAIL(&nfs_mount_poke_queue, nmp, nm_pokeq);
 			}
@@ -3328,7 +4608,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 		}
 
 		/* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */
-		if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
+		if ((NMFLAG(nmp, SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) &&
 		    ((req->r_rexmit >= req->r_retry) || /* too many */
 		     ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */
 			OSAddAtomic(1, &nfsstats.rpctimeouts);
@@ -3344,6 +4624,11 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 			} else {
 				lck_mtx_unlock(&nmp->nm_lock);
 			}
+			if (req->r_flags & R_NOINTR) {
+				/* don't terminate nointr requests on timeout */
+				lck_mtx_unlock(&req->r_mtx);
+				continue;
+			}
 			NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n",
 				req->r_procnum, req->r_xid, req->r_flags, req->r_rtt,
 				now.tv_sec - req->r_start));
@@ -3391,8 +4676,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1)
 		nfs_sock_poke(nmp);
 		lck_mtx_lock(&nmp->nm_lock);
 		nmp->nm_sockflags &= ~NMSOCK_POKE;
-		if (!(nmp->nm_state & NFSSTA_MOUNTED))
-			wakeup(&nmp->nm_sockflags);
+		wakeup(&nmp->nm_sockflags);
 		lck_mtx_unlock(&nmp->nm_lock);
 	}
 
@@ -3417,6 +4701,7 @@ nfs_noremotehang(thread_t thd)
  * and the mount is interruptable, or if we are a thread that is in the process
  * of cancellation (also SIGKILL posted).
  */
+extern int sigprop[NSIG+1];
 int
 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked)
 {
@@ -3428,19 +4713,17 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke
 
 	if (req && (req->r_flags & R_SOFTTERM))
 		return (ETIMEDOUT); /* request has been terminated. */
+	if (req && (req->r_flags & R_NOINTR))
+		thd = NULL; /* don't check for signal on R_NOINTR */
 
-	/*
-	 * If we're in the progress of a force unmount and there's
-	 * been a timeout, we're dead and fail IO.
-	 */
 	if (!nmplocked)
 		lck_mtx_lock(&nmp->nm_lock);
-	if ((nmp->nm_state & NFSSTA_FORCE) &&
-	    (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO|NFSSTA_LOCKTIMEO))) {
+	if (nmp->nm_state & NFSSTA_FORCE) {
+		/* If a force unmount is in progress then fail. */
 		error = EIO;
 	} else if (nmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) {
 		/* Someone is unmounting us, go soft and mark it. */
-		nmp->nm_flag |= NFSMNT_SOFT;
+		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
 		nmp->nm_state |= NFSSTA_FORCE;
 	}
 
@@ -3464,12 +4747,20 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke
 	if (thd == NULL)
 		return (0);
 
-	/* If this thread belongs to kernel task; then abort check is not needed */
-	if ((current_proc() != kernproc) && current_thread_aborted())
+	/*
+	 * Check if the process is aborted, but don't interrupt if we
+	 * were killed by a signal and this is the exiting thread which
+	 * is attempting to dump core.
+	 */
+	if (((p = current_proc()) != kernproc) && current_thread_aborted() &&
+	    (!(p->p_acflag & AXSIG) || (p->exit_thread != current_thread()) ||
+	     (p->p_sigacts == NULL) ||
+	     (p->p_sigacts->ps_sig < 1) || (p->p_sigacts->ps_sig > NSIG) ||
+	     !(sigprop[p->p_sigacts->ps_sig] & SA_CORE)))
 		return (EINTR);
 
 	/* mask off thread and process blocked signals. */
-	if ((nmp->nm_flag & NFSMNT_INT) && ((p = get_bsdthreadtask_info(thd))) &&
+	if (NMFLAG(nmp, INTR) && ((p = get_bsdthreadtask_info(thd))) &&
 	    proc_pendingsignals(p, NFSINT_SIGMASK))
 		return (EINTR);
 	return (0);
@@ -3495,7 +4786,7 @@ nfs_sndlock(struct nfsreq *req)
 	lck_mtx_lock(&nmp->nm_lock);
 	statep = &nmp->nm_state;
 
-	if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread)
+	if (NMFLAG(nmp, INTR) && req->r_thread && !(req->r_flags & R_NOINTR))
 		slpflag = PCATCH;
 	while (*statep & NFSSTA_SNDLOCK) {
 		if ((error = nfs_sigintr(nmp, req, req->r_thread, 1)))
@@ -3530,7 +4821,7 @@ nfs_sndunlock(struct nfsreq *req)
 	statep = &nmp->nm_state;
 	if ((*statep & NFSSTA_SNDLOCK) == 0)
 		panic("nfs sndunlock");
-	*statep &= ~NFSSTA_SNDLOCK;
+	*statep &= ~(NFSSTA_SNDLOCK|NFSSTA_SENDING);
 	if (*statep & NFSSTA_WANTSND) {
 		*statep &= ~NFSSTA_WANTSND;
 		wake = 1;
@@ -3544,62 +4835,113 @@ int
 nfs_aux_request(
 	struct nfsmount *nmp,
 	thread_t thd,
-	struct sockaddr_in *saddr,
+	struct sockaddr *saddr,
+	socket_t so,
+	int sotype,
 	mbuf_t mreq,
 	uint32_t xid,
 	int bindresv,
 	int timeo,
 	struct nfsm_chain *nmrep)
 {
-	int error = 0, on = 1, try, sendat = 2;
-	socket_t so = NULL;
-	struct sockaddr_in sin;
-	struct timeval tv = { 1, 0 };
+	int error = 0, on = 1, try, sendat = 2, soproto, recv, optlen, restoreto = 0;
+	socket_t newso = NULL;
+	struct sockaddr_storage ss;
+	struct timeval orig_rcvto, orig_sndto, tv = { 1, 0 };
 	mbuf_t m, mrep = NULL;
 	struct msghdr msg;
 	uint32_t rxid = 0, reply = 0, reply_status, rejected_status;
 	uint32_t verf_type, verf_len, accepted_status;
-	size_t readlen;
+	size_t readlen, sentlen;
+	struct nfs_rpc_record_state nrrs;
 
-	/* create socket and set options */
-	if (((error = sock_socket(saddr->sin_family, SOCK_DGRAM, IPPROTO_UDP, NULL, NULL, &so))) ||
-	    ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
-	    ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
-	    ((error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
-		goto nfsmout;
-	if (bindresv) {
-		int portrange = IP_PORTRANGE_LOW;
-		error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange));
-		nfsmout_if(error);
-		/* bind now to check for failure */
-		sin.sin_len = sizeof (struct sockaddr_in);
-		sin.sin_family = AF_INET;
-		sin.sin_addr.s_addr = INADDR_ANY;
-		sin.sin_port = 0;
-		error = sock_bind(so, (struct sockaddr *) &sin);
-		nfsmout_if(error);
+	if (!so) {
+		/* create socket and set options */
+		soproto = (sotype == SOCK_DGRAM) ? IPPROTO_UDP : IPPROTO_TCP;
+		if ((error = sock_socket(saddr->sa_family, sotype, soproto, NULL, NULL, &newso)))
+			goto nfsmout;
+
+		if (bindresv) {
+			int level = (saddr->sa_family == AF_INET) ? IPPROTO_IP : IPPROTO_IPV6;
+			int optname = (saddr->sa_family == AF_INET) ? IP_PORTRANGE : IPV6_PORTRANGE;
+			int portrange = IP_PORTRANGE_LOW;
+			error = sock_setsockopt(newso, level, optname, &portrange, sizeof(portrange));
+			nfsmout_if(error);
+			ss.ss_len = saddr->sa_len;
+			ss.ss_family = saddr->sa_family;
+			if (ss.ss_family == AF_INET) {
+				((struct sockaddr_in*)&ss)->sin_addr.s_addr = INADDR_ANY;
+				((struct sockaddr_in*)&ss)->sin_port = htons(0);
+			} else if (ss.ss_family == AF_INET6) {
+				((struct sockaddr_in6*)&ss)->sin6_addr = in6addr_any;
+				((struct sockaddr_in6*)&ss)->sin6_port = htons(0);
+			} else {
+				error = EINVAL;
+			}
+			if (!error)
+				error = sock_bind(newso, (struct sockaddr *)&ss);
+			nfsmout_if(error);
+		}
+
+		if (sotype == SOCK_STREAM) {
+			on = 4; /* don't wait too long for the socket to connect */
+			sock_setsockopt(newso, IPPROTO_TCP, TCP_CONNECTIONTIMEOUT, &on, sizeof(on));
+			error = sock_connect(newso, saddr, 0);
+			nfsmout_if(error);
+		}
+		if (((error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) ||
+		    ((error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) ||
+		    ((error = sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)))))
+			goto nfsmout;
+		so = newso;
+	} else {
+		/* make sure socket is using a one second timeout in this function */
+		optlen = sizeof(orig_rcvto);
+		error = sock_getsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, &optlen);
+		if (!error) {
+			optlen = sizeof(orig_sndto);
+			error = sock_getsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, &optlen);
+		}
+		if (!error) {
+			sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+			sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv));
+			restoreto = 1;
+		}
+	}
+
+	if (sotype == SOCK_STREAM) {
+		sendat = 0; /* we only resend the request for UDP */
+		nfs_rpc_record_state_init(&nrrs);
 	}
 
 	for (try=0; try < timeo; try++) {
-		if ((error = nfs_sigintr(nmp, NULL, thd, 0)))
+		if ((error = nfs_sigintr(nmp, NULL, !try ? NULL : thd, 0)))
 			break;
 		if (!try || (try == sendat)) {
-			/* send the request (resending periodically) */
+			/* send the request (resending periodically for UDP) */
 			if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m)))
 				goto nfsmout;
 			bzero(&msg, sizeof(msg));
-			msg.msg_name = saddr;
-			msg.msg_namelen = saddr->sin_len;
-			if ((error = sock_sendmbuf(so, &msg, m, 0, NULL)))
+			if ((sotype == SOCK_DGRAM) && !sock_isconnected(so)) {
+				msg.msg_name = saddr;
+				msg.msg_namelen = saddr->sa_len;
+			}
+			if ((error = sock_sendmbuf(so, &msg, m, 0, &sentlen)))
 				goto nfsmout;
 			sendat *= 2;
 			if (sendat > 30)
 				sendat = 30;
 		}
 		/* wait for the response */
-		readlen = 1<<18;
-		bzero(&msg, sizeof(msg));
-		error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
+		if (sotype == SOCK_STREAM) {
+			/* try to read (more of) record */
+			error = nfs_rpc_record_read(so, &nrrs, 0, &recv, &mrep);
+			/* if we don't have the whole record yet, we'll keep trying */
+		} else {
+			readlen = 1<<18;
+			bzero(&msg, sizeof(msg));
+			error = sock_receivembuf(so, &msg, &mrep, 0, &readlen);
+		}
 		if (error == EWOULDBLOCK)
 			continue;
 		nfsmout_if(error);
@@ -3615,7 +4957,7 @@ nfs_aux_request(
 		if (reply_status == RPC_MSGDENIED) {
 			nfsm_chain_get_32(error, nmrep, rejected_status);
 			nfsmout_if(error);
-			error = (rejected_status == RPC_MISMATCH) ? ENOTSUP : EACCES;
+			error = (rejected_status == RPC_MISMATCH) ? ERPCMISMATCH : EACCES;
 			goto nfsmout;
 		}
 		nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */
@@ -3624,18 +4966,159 @@ nfs_aux_request(
 		if (verf_len)
 			nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len));
 		nfsm_chain_get_32(error, nmrep, accepted_status);
-		nfsm_assert(error, (accepted_status == RPC_SUCCESS), EIO);
+		nfsmout_if(error);
+		switch (accepted_status) {
+		case RPC_SUCCESS:
+			error = 0;
+			break;
+		case RPC_PROGUNAVAIL:
+			error = EPROGUNAVAIL;
+			break;
+		case RPC_PROGMISMATCH:
+			error = EPROGMISMATCH;
+			break;
+		case RPC_PROCUNAVAIL:
+			error = EPROCUNAVAIL;
+			break;
+		case RPC_GARBAGE:
+			error = EBADRPC;
+			break;
+		case RPC_SYSTEM_ERR:
+		default:
+			error = EIO;
+			break;
+		}
 		break;
 	}
 nfsmout:
-	if (so) {
-		sock_shutdown(so, SHUT_RDWR);
-		sock_close(so);
+	if (restoreto) {
+		sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &orig_rcvto, sizeof(tv));
+		sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &orig_sndto, sizeof(tv));
+	}
+	if (newso) {
+		sock_shutdown(newso, SHUT_RDWR);
+		sock_close(newso);
 	}
 	mbuf_freem(mreq);
 	return (error);
 }
 
+int
+nfs_portmap_lookup(
+	struct nfsmount *nmp,
+	vfs_context_t ctx,
+	struct sockaddr *sa,
+	socket_t so,
+	uint32_t protocol,
+	uint32_t vers,
+	uint32_t ipproto,
+	int timeo)
+{
+	thread_t thd = vfs_context_thread(ctx);
+	kauth_cred_t cred = vfs_context_ucred(ctx);
+	struct sockaddr_storage ss;
+	struct sockaddr *saddr = (struct sockaddr*)&ss;
+	struct nfsm_chain nmreq, nmrep;
+	mbuf_t mreq;
+	int error = 0, ip, pmprog, pmvers, pmproc, ualen = 0;
+	uint32_t port;
+	uint64_t xid = 0;
+	char uaddr[MAX_IPv6_STR_LEN+16];
+
+	bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
+	if (saddr->sa_family == AF_INET) {
+		ip = 4;
+		pmprog = PMAPPROG;
+		pmvers = PMAPVERS;
+		pmproc = PMAPPROC_GETPORT;
+	} else if (saddr->sa_family == AF_INET6) {
+		ip = 6;
+		pmprog = RPCBPROG;
+		pmvers = RPCBVERS4;
+		pmproc = RPCBPROC_GETVERSADDR;
+	} else {
+	    	return (EINVAL);
+	}
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+tryagain:
+	/* send portmapper request to get port/uaddr */
+	if (ip == 4)
+		((struct sockaddr_in*)saddr)->sin_port = htons(PMAPPORT);
+	else
+		((struct sockaddr_in6*)saddr)->sin6_port = htons(PMAPPORT);
+	nfsm_chain_build_alloc_init(error, &nmreq, 8*NFSX_UNSIGNED);
+	nfsm_chain_add_32(error, &nmreq, protocol);
+	nfsm_chain_add_32(error, &nmreq, vers);
+	if (ip == 4) {
+		nfsm_chain_add_32(error, &nmreq, ipproto);
+		nfsm_chain_add_32(error, &nmreq, 0);
+	} else {
+		if (ipproto == IPPROTO_TCP)
+			nfsm_chain_add_string(error, &nmreq, "tcp6", 4);
+		else
+			nfsm_chain_add_string(error, &nmreq, "udp6", 4);
+		nfsm_chain_add_string(error, &nmreq, "", 0); /* uaddr */
+		nfsm_chain_add_string(error, &nmreq, "", 0); /* owner */
+	}
+	nfsm_chain_build_done(error, &nmreq);
+	nfsmout_if(error);
+	error = nfsm_rpchead2(nmp, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			pmprog, pmvers, pmproc, RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead,
+			&xid, &mreq);
+	nfsmout_if(error);
+	nmreq.nmc_mhead = NULL;
+	error = nfs_aux_request(nmp, thd, saddr, so, (ipproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			mreq, R_XID32(xid), 0, timeo, &nmrep);
+
+	/* grab port from portmap response */
+	if (ip == 4) {
+		nfsm_chain_get_32(error, &nmrep, port);
+		if (!error)
+			((struct sockaddr_in*)sa)->sin_port = htons(port);
+	} else {
+		/* get uaddr string and convert to sockaddr */
+		nfsm_chain_get_32(error, &nmrep, ualen);
+		if (!error) {
+			if (ualen > ((int)sizeof(uaddr)-1))
+				error = EIO;
+			if (ualen < 1) {
+				/* program is not available, just return a zero port */
+				bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
+				((struct sockaddr_in6*)saddr)->sin6_port = htons(0);
+			} else {
+				nfsm_chain_get_opaque(error, &nmrep, ualen, uaddr);
+				if (!error) {
+					uaddr[ualen] = '\0';
+					if (!nfs_uaddr2sockaddr(uaddr, saddr))
+						error = EIO;
+				}
+			}
+		}
+		if ((error == EPROGMISMATCH) || (error == EPROCUNAVAIL) || (error == EIO) || (error == EBADRPC)) {
+			/* remote doesn't support rpcbind version or proc (or we couldn't parse uaddr) */
+			if (pmvers == RPCBVERS4) {
+				/* fall back to v3 and GETADDR */
+				pmvers = RPCBVERS3;
+				pmproc = RPCBPROC_GETADDR;
+				nfsm_chain_cleanup(&nmreq);
+				nfsm_chain_cleanup(&nmrep);
+				bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
+				xid = 0;
+				error = 0;
+				goto tryagain;
+			}
+		}
+		if (!error)
+			bcopy(saddr, sa, min(saddr->sa_len, sa->sa_len));
+	}
+nfsmout:
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
 int
 nfs_msg(thread_t thd,
 	const char *server,
@@ -3670,12 +5153,12 @@ nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *m
 	lck_mtx_lock(&nmp->nm_lock);
 
 	timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
-	if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */
+	if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
 		   timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
 	wasunresponsive = (nmp->nm_state & timeoutmask);
 
 	/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
-	softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
+	softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
 
 	if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO))
 		nmp->nm_state |= NFSSTA_TIMEO;
@@ -3686,7 +5169,7 @@ nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *m
 
 	unresponsive = (nmp->nm_state & timeoutmask);
 
-	if (unresponsive && (nmp->nm_flag & NFSMNT_DEADTIMEOUT)) {
+	if (unresponsive && (nmp->nm_deadtimeout > 0)) {
 		microuptime(&now);
 		if (!wasunresponsive) {
 			nmp->nm_deadto_start = now.tv_sec;
@@ -3726,12 +5209,12 @@ nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg)
 	lck_mtx_lock(&nmp->nm_lock);
 
 	timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO;
-	if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */
+	if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */
 		   timeoutmask &= ~NFSSTA_JUKEBOXTIMEO;
 	wasunresponsive = (nmp->nm_state & timeoutmask);
 
 	/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
-	softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
+	softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
 
 	if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO))
 		nmp->nm_state &= ~NFSSTA_TIMEO;
@@ -3916,9 +5399,9 @@ nfsrv_send(struct nfsrv_sock *slp, mbuf_t nam, mbuf_t top)
  * be called with MBUF_WAITOK from an nfsd.
  */
 void
-nfsrv_rcv(socket_t so, caddr_t arg, int waitflag)
+nfsrv_rcv(socket_t so, void *arg, int waitflag)
 {
-	struct nfsrv_sock *slp = (struct nfsrv_sock *)arg;
+	struct nfsrv_sock *slp = arg;
 
 	if (!nfsd_thread_count || !(slp->ns_flag & SLP_VALID))
 		return;
@@ -4250,6 +5733,8 @@ nfsrv_dorec(
 	if (error) {
 		if (nam)
 			mbuf_freem(nam);
+		if (nd->nd_gss_context)
+			nfs_gss_svc_ctx_deref(nd->nd_gss_context);
 		FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 		return (error);
 	}
@@ -4274,7 +5759,6 @@ nfsrv_getreq(struct nfsrv_descript *nd)
 	uid_t user_id;
 	gid_t group_id;
 	int ngroups;
-	struct ucred temp_cred;
 	uint32_t val;
 
 	nd->nd_cr = NULL;
@@ -4331,10 +5815,11 @@ nfsrv_getreq(struct nfsrv_descript *nd)
 	nfsmout_if(error);
 
 	/* Handle authentication */
-	if (auth_type == RPCAUTH_UNIX) {
+	if (auth_type == RPCAUTH_SYS) {
+		struct posix_cred temp_pcred;
 		if (nd->nd_procnum == NFSPROC_NULL)
 			return (0);
-		nd->nd_sec = RPCAUTH_UNIX;
+		nd->nd_sec = RPCAUTH_SYS;
 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);	// skip stamp
 		nfsm_chain_get_32(error, nmreq, len);		// hostname length
 		if (len < 0 || len > NFS_MAXNAMLEN)
@@ -4343,23 +5828,23 @@ nfsrv_getreq(struct nfsrv_descript *nd)
 		nfsmout_if(error);
 
 		/* create a temporary credential using the bits from the wire */
-		bzero(&temp_cred, sizeof(temp_cred));
+		bzero(&temp_pcred, sizeof(temp_pcred));
 		nfsm_chain_get_32(error, nmreq, user_id);
 		nfsm_chain_get_32(error, nmreq, group_id);
-		temp_cred.cr_groups[0] = group_id;
+		temp_pcred.cr_groups[0] = group_id;
 		nfsm_chain_get_32(error, nmreq, len);		// extra GID count
 		if ((len < 0) || (len > RPCAUTH_UNIXGIDS))
 			error = EBADRPC;
 		nfsmout_if(error);
 		for (i = 1; i <= len; i++)
 			if (i < NGROUPS)
-				nfsm_chain_get_32(error, nmreq, temp_cred.cr_groups[i]);
+				nfsm_chain_get_32(error, nmreq, temp_pcred.cr_groups[i]);
 			else
 				nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);
 		nfsmout_if(error);
 		ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
 		if (ngroups > 1)
-			nfsrv_group_sort(&temp_cred.cr_groups[0], ngroups);
+			nfsrv_group_sort(&temp_pcred.cr_groups[0], ngroups);
 		nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED);	// verifier flavor (should be AUTH_NONE)
 		nfsm_chain_get_32(error, nmreq, len);		// verifier length
 		if (len < 0 || len > RPCAUTH_MAXSIZ)
@@ -4368,9 +5853,9 @@ nfsrv_getreq(struct nfsrv_descript *nd)
 			nfsm_chain_adv(error, nmreq, nfsm_rndup(len));
 
 		/* request creation of a real credential */
-		temp_cred.cr_uid = user_id;
-		temp_cred.cr_ngroups = ngroups;
-		nd->nd_cr = kauth_cred_create(&temp_cred);
+		temp_pcred.cr_uid = user_id;
+		temp_pcred.cr_ngroups = ngroups;
+		nd->nd_cr = posix_cred_create(&temp_pcred);
 		if (nd->nd_cr == NULL) {
 			nd->nd_repstat = ENOMEM;
 			nd->nd_procnum = NFSPROC_NOOP;
diff --git a/bsd/nfs/nfs_srvcache.c b/bsd/nfs/nfs_srvcache.c
index db1c6e6a7..7fde3da6b 100644
--- a/bsd/nfs/nfs_srvcache.c
+++ b/bsd/nfs/nfs_srvcache.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -174,6 +174,7 @@ nfsrv_initcache(void)
  * If there is any doubt, return FALSE.
  * The AF_INET family is handled as a special case so that address mbufs
  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ * Ditto for AF_INET6 which is only 16 bytes.
  */
 static int
 netaddr_match(
@@ -182,17 +183,22 @@ netaddr_match(
 	mbuf_t nam)
 {
 	struct sockaddr_in *inetaddr;
+	struct sockaddr_in6 *inet6addr;
 
 	switch (family) {
 	case AF_INET:
 		inetaddr = mbuf_data(nam);
-		if (inetaddr->sin_family == AF_INET &&
-		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+		if ((inetaddr->sin_family == AF_INET) &&
+		    (inetaddr->sin_addr.s_addr == haddr->had_inetaddr))
 			return (1);
 		break;
-	default:
+	case AF_INET6:
+		inet6addr = mbuf_data(nam);
+		if ((inet6addr->sin6_family == AF_INET6) &&
+		    !bcmp(&inet6addr->sin6_addr, &haddr->had_inet6addr, sizeof(inet6addr->sin6_addr)))
+			return (1);
 		break;
-	};
+	}
 	return (0);
 }
 
@@ -218,7 +224,7 @@ nfsrv_getcache(
 {
 	struct nfsrvcache *rp;
 	struct nfsm_chain nmrep;
-	struct sockaddr_in *saddr;
+	struct sockaddr *saddr;
 	int ret, error;
 
 	/*
@@ -232,7 +238,7 @@ loop:
 	for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
 	    rp = rp->rc_hash.le_next) {
 	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
-		netaddr_match(AF_INET, &rp->rc_haddr, nd->nd_nam)) {
+		netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
 			if ((rp->rc_flag & RC_LOCKED) != 0) {
 				rp->rc_flag |= RC_WANTED;
 				msleep(rp, nfsrv_reqcache_mutex, PZERO-1, "nfsrc", NULL);
@@ -323,10 +329,15 @@ loop:
 	rp->rc_state = RC_INPROG;
 	rp->rc_xid = nd->nd_retxid;
 	saddr = mbuf_data(nd->nd_nam);
-	switch (saddr->sin_family) {
+	rp->rc_family = saddr->sa_family;
+	switch (saddr->sa_family) {
 	case AF_INET:
 		rp->rc_flag |= RC_INETADDR;
-		rp->rc_inetaddr = saddr->sin_addr.s_addr;
+		rp->rc_inetaddr = ((struct sockaddr_in*)saddr)->sin_addr.s_addr;
+		break;
+	case AF_INET6:
+		rp->rc_flag |= RC_INETADDR;
+		rp->rc_inet6addr = ((struct sockaddr_in6*)saddr)->sin6_addr;
 		break;
 	default:
 		error = mbuf_copym(nd->nd_nam, 0, MBUF_COPYALL, MBUF_WAITOK, &rp->rc_nam);
@@ -366,7 +377,7 @@ loop:
 	for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0;
 	    rp = rp->rc_hash.le_next) {
 	    if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
-		netaddr_match(AF_INET, &rp->rc_haddr, nd->nd_nam)) {
+		netaddr_match(rp->rc_family, &rp->rc_haddr, nd->nd_nam)) {
 			if ((rp->rc_flag & RC_LOCKED) != 0) {
 				rp->rc_flag |= RC_WANTED;
 				msleep(rp, nfsrv_reqcache_mutex, PZERO-1, "nfsrc", NULL);
diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c
index 40b55e86e..dccead918 100644
--- a/bsd/nfs/nfs_subs.c
+++ b/bsd/nfs/nfs_subs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -99,6 +99,9 @@
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
+#if NFSCLIENT
+#define _NFS_XDR_SUBS_FUNCS_ /* define this to get xdrbuf function definitions */
+#endif
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfs_gss.h>
@@ -110,6 +113,8 @@
 #include <netinet/in.h>
 #include <net/kpi_interface.h>
 
+#include <sys/utfconv.h>
+
 /*
  * NFS globals
  */
@@ -793,6 +798,33 @@ nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio)
 
 #if NFSCLIENT
 
+int
+nfsm_chain_add_string_nfc(struct nfsm_chain *nmc, const uint8_t *s, uint32_t slen)
+{
+	uint8_t smallbuf[64];
+	uint8_t *nfcname = smallbuf;
+	size_t buflen = sizeof(smallbuf), nfclen;
+	int error;
+
+	error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM);
+	if (error == ENAMETOOLONG) {
+		buflen = MAXPATHLEN;
+		MALLOC_ZONE(nfcname, uint8_t *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+		if (nfcname)
+			error = utf8_normalizestr(s, slen, nfcname, &nfclen, buflen, UTF_PRECOMPOSED|UTF_NO_NULL_TERM);
+	}
+
+	/* if we got an error, just use the original string */
+	if (error)
+		nfsm_chain_add_string(error, nmc, s, slen);
+	else
+		nfsm_chain_add_string(error, nmc, nfcname, nfclen);
+
+	if (nfcname && (nfcname != smallbuf))
+		FREE_ZONE(nfcname, MAXPATHLEN, M_NAMEI);
+	return (error);
+}
+
 /*
  * Add an NFSv2 "sattr" structure to an mbuf chain
  */
@@ -909,7 +941,7 @@ nfsm_chain_get_fh_attr(
 			error = nfs_parsefattr(nmc, nfsvers, nvap);
 	} else if (gotfh) {
 		/* we need valid attributes in order to call nfs_nget() */
-		if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, ctx, nvap, xidp)) {
+		if (nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp)) {
 			gotattr = 0;
 			fhp->fh_len = 0;
 		}
@@ -985,7 +1017,6 @@ nfs_get_xid(uint64_t *xidp)
 int
 nfsm_rpchead(
 	struct nfsreq *req,
-	int auth_len,
 	mbuf_t mrest,
 	u_int64_t *xidp,
 	mbuf_t *mreqp)
@@ -993,23 +1024,55 @@ nfsm_rpchead(
 	struct nfsmount *nmp = req->r_nmp;
 	int nfsvers = nmp->nm_vers;
 	int proc = ((nfsvers == NFS_VER2) ? nfsv2_procid[req->r_procnum] : (int)req->r_procnum);
-	int auth_type = (!auth_len && !req->r_cred) ? RPCAUTH_NULL : nmp->nm_auth;
 
-	return nfsm_rpchead2(nmp->nm_sotype, NFS_PROG, nfsvers, proc,
-		auth_type, auth_len, req->r_cred, req, mrest, xidp, mreqp);
+	return nfsm_rpchead2(nmp, nmp->nm_sotype, NFS_PROG, nfsvers, proc,
+			req->r_auth, req->r_cred, req, mrest, xidp, mreqp);
 }
 
 int
-nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_len,
+nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type,
 	kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp)
 {
 	mbuf_t mreq, mb;
-	int error, i, grpsiz, authsiz, reqlen;
+	int error, i, grpsiz, auth_len = 0, authsiz, reqlen;
 	size_t headlen;
 	struct nfsm_chain nmreq;
 
-	/* allocate the packet */
+	/* calculate expected auth length */
+	switch (auth_type) {
+		case RPCAUTH_NONE:
+			auth_len = 0;
+			break;
+		case RPCAUTH_SYS:
+		    {
+			gid_t grouplist[NGROUPS];
+			int groupcount = NGROUPS;
+
+			if (!cred)
+				return (EINVAL);
+
+			(void)kauth_cred_getgroups(cred, grouplist, &groupcount);
+			if (groupcount < 1)
+				return (EINVAL);
+
+			auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ?
+				nmp->nm_numgrps : (groupcount - 1)) << 2) +
+				5 * NFSX_UNSIGNED;
+			break;
+		    }
+		case RPCAUTH_KRB5:
+		case RPCAUTH_KRB5I:
+		case RPCAUTH_KRB5P:
+			if (!req || !cred)
+				return (EINVAL);
+			auth_len = 5 * NFSX_UNSIGNED + 0; // zero context handle for now
+			break;
+		default:
+			return (EINVAL);
+		}
 	authsiz = nfsm_rndup(auth_len);
+
+	/* allocate the packet */
 	headlen = authsiz + 10 * NFSX_UNSIGNED;
 	if (sotype == SOCK_STREAM) /* also include room for any RPC Record Mark */
 		headlen += NFSX_UNSIGNED;
@@ -1055,27 +1118,36 @@ nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_
 
 add_cred:
 	switch (auth_type) {
-	case RPCAUTH_NULL:
-		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL); /* auth */
+	case RPCAUTH_NONE:
+		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE); /* auth */
 		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
-		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL);	/* verf */
+		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE);	/* verf */
 		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
 		nfsm_chain_build_done(error, &nmreq);
+		/* Append the args mbufs */
+		if (!error)
+			error = mbuf_setnext(nmreq.nmc_mcur, mrest);
 		break;
-	case RPCAUTH_UNIX:
-		nfsm_chain_add_32(error, &nmreq, RPCAUTH_UNIX);
+	case RPCAUTH_SYS: {
+		gid_t grouplist[NGROUPS];
+		int groupcount;
+
+		nfsm_chain_add_32(error, &nmreq, RPCAUTH_SYS);
 		nfsm_chain_add_32(error, &nmreq, authsiz);
 		nfsm_chain_add_32(error, &nmreq, 0);	/* stamp */
 		nfsm_chain_add_32(error, &nmreq, 0);	/* zero-length hostname */
 		nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(cred));	/* UID */
-		nfsm_chain_add_32(error, &nmreq, cred->cr_groups[0]);	/* GID */
+		nfsm_chain_add_32(error, &nmreq, kauth_cred_getgid(cred));	/* GID */
 		grpsiz = (auth_len >> 2) - 5;
 		nfsm_chain_add_32(error, &nmreq, grpsiz);/* additional GIDs */
+		memset(grouplist, 0, sizeof(grouplist));
+		groupcount = grpsiz;
+		(void)kauth_cred_getgroups(cred, grouplist, &groupcount);
 		for (i = 1; i <= grpsiz; i++)
-			nfsm_chain_add_32(error, &nmreq, cred->cr_groups[i]);
+			nfsm_chain_add_32(error, &nmreq, grouplist[i]);
 
 		/* And the verifier... */
-		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NULL);	/* flavor */
+		nfsm_chain_add_32(error, &nmreq, RPCAUTH_NONE);	/* flavor */
 		nfsm_chain_add_32(error, &nmreq, 0);		/* length */
 		nfsm_chain_build_done(error, &nmreq);
 
@@ -1083,16 +1155,24 @@ add_cred:
 		if (!error)
 			error = mbuf_setnext(nmreq.nmc_mcur, mrest);
 		break;
+	}
 	case RPCAUTH_KRB5:
 	case RPCAUTH_KRB5I:
 	case RPCAUTH_KRB5P:
 		error = nfs_gss_clnt_cred_put(req, &nmreq, mrest);
 		if (error == ENEEDAUTH) {
+			gid_t grouplist[NGROUPS];
+			int groupcount = NGROUPS;
 			/*
 			 * Use sec=sys for this user
 			 */
 			error = 0;
-			auth_type = RPCAUTH_UNIX;
+			req->r_auth = auth_type = RPCAUTH_SYS;
+			(void)kauth_cred_getgroups(cred, grouplist, &groupcount);
+			auth_len = ((((groupcount - 1) > nmp->nm_numgrps) ?
+				nmp->nm_numgrps : (groupcount - 1)) << 2) +
+				5 * NFSX_UNSIGNED;
+			authsiz = nfsm_rndup(auth_len);
 			goto add_cred;
 		}
 		break;
@@ -1141,6 +1221,21 @@ nfs_parsefattr(struct nfsm_chain *nmc, int nfsvers, struct nfs_vattr *nvap)
 	dev_t rdev;
 
 	val = val2 = 0;
+	NVATTR_INIT(nvap);
+
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TYPE);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_MODE);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SIZE);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_SPACE_USED);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_RAWDEV);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FSID);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_FILEID);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_ACCESS);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_MODIFY);
+	NFS_BITMAP_SET(nvap->nva_bitmap, NFS_FATTR_TIME_METADATA);
 
 	nfsm_chain_get_32(error, nmc, vtype);
 	nfsm_chain_get_32(error, nmc, vmode);
@@ -1241,6 +1336,12 @@ nfs_loadattrcache(
 	vnode_t vp;
 	struct timeval now;
 	struct nfs_vattr *npnvap;
+	int xattr = np->n_vattr.nva_flags & NFS_FFLAG_IS_ATTR;
+	int referral = np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL;
+	int aclbit, monitored, error = 0;
+	kauth_acl_t acl;
+	struct nfsmount *nmp;
+	uint32_t events = np->n_events;
 
 	if (np->n_hflag & NHINIT) {
 		vp = NULL;
@@ -1249,10 +1350,11 @@ nfs_loadattrcache(
 		vp = NFSTOV(np);
 		mp = vnode_mount(vp);
 	}
+	monitored = vp ? vnode_ismonitored(vp) : 0;
 
 	FSDBG_TOP(527, np, vp, *xidp >> 32, *xidp);
 
-	if (!VFSTONFS(mp)) {
+	if (!((nmp = VFSTONFS(mp)))) {
 		FSDBG_BOT(527, ENXIO, 1, 0, *xidp);
 		return (ENXIO);
 	}
@@ -1298,16 +1400,133 @@ nfs_loadattrcache(
 		 */
 		printf("nfs loadattrcache vnode changed type, was %d now %d\n",
 			vnode_vtype(vp), nvap->nva_type);
-		FSDBG_BOT(527, ESTALE, 3, 0, *xidp);
-		return (ESTALE);
+		error = ESTALE;
+		if (monitored)
+			events |= VNODE_EVENT_DELETE;
+		goto out;
 	}
 
+	npnvap = &np->n_vattr;
+
+	/*
+	 * The ACL cache needs special handling because it is not
+	 * always updated.  Save current ACL cache state so it can
+	 * be restored after copying the new attributes into place.
+	 */
+	aclbit = NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL);
+	acl = npnvap->nva_acl;
+
+	if (monitored) {
+		/*
+		 * For monitored nodes, check for attribute changes that should generate events.
+		 */
+		if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_NUMLINKS) &&
+		    (nvap->nva_nlink != npnvap->nva_nlink))
+			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_LINK;
+		if (events & VNODE_EVENT_PERMS)
+			/* no need to do all the checking if it's already set */;
+		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_MODE) &&
+			 (nvap->nva_mode != npnvap->nva_mode))
+			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) &&
+			 (nvap->nva_uid != npnvap->nva_uid))
+			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+		else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) &&
+			 (nvap->nva_gid != npnvap->nva_gid))
+			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+		else if (nmp->nm_vers >= NFS_VER4) {
+			if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER) &&
+			    !kauth_guid_equal(&nvap->nva_uuuid, &npnvap->nva_uuuid))
+				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+			else if (NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_OWNER_GROUP) &&
+				 !kauth_guid_equal(&nvap->nva_guuid, &npnvap->nva_guuid))
+				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+			else if ((NFS_BITMAP_ISSET(nvap->nva_bitmap, NFS_FATTR_ACL) &&
+				 nvap->nva_acl && npnvap->nva_acl &&
+			         ((nvap->nva_acl->acl_entrycount != npnvap->nva_acl->acl_entrycount) ||
+			          bcmp(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl)))))
+				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_PERMS;
+		}
+		if (((nmp->nm_vers >= NFS_VER4) && (nvap->nva_change != npnvap->nva_change)) ||
+		   (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_MODIFY) &&
+		    ((nvap->nva_timesec[NFSTIME_MODIFY] != npnvap->nva_timesec[NFSTIME_MODIFY]) ||
+		     (nvap->nva_timensec[NFSTIME_MODIFY] != npnvap->nva_timensec[NFSTIME_MODIFY]))))
+			events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_WRITE;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_RAWDEV) &&
+		    ((nvap->nva_rawdev.specdata1 != npnvap->nva_rawdev.specdata1) ||
+		     (nvap->nva_rawdev.specdata2 != npnvap->nva_rawdev.specdata2)))
+			events |= VNODE_EVENT_ATTRIB;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_FILEID) &&
+		    (nvap->nva_fileid != npnvap->nva_fileid))
+			events |= VNODE_EVENT_ATTRIB;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ARCHIVE) &&
+		    ((nvap->nva_flags & NFS_FFLAG_ARCHIVED) != (npnvap->nva_flags & NFS_FFLAG_ARCHIVED)))
+			events |= VNODE_EVENT_ATTRIB;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_HIDDEN) &&
+		    ((nvap->nva_flags & NFS_FFLAG_HIDDEN) != (npnvap->nva_flags & NFS_FFLAG_HIDDEN)))
+			events |= VNODE_EVENT_ATTRIB;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_CREATE) &&
+		    ((nvap->nva_timesec[NFSTIME_CREATE] != npnvap->nva_timesec[NFSTIME_CREATE]) ||
+		     (nvap->nva_timensec[NFSTIME_CREATE] != npnvap->nva_timensec[NFSTIME_CREATE])))
+			events |= VNODE_EVENT_ATTRIB;
+		if (!events && NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_TIME_BACKUP) &&
+		    ((nvap->nva_timesec[NFSTIME_BACKUP] != npnvap->nva_timesec[NFSTIME_BACKUP]) ||
+		     (nvap->nva_timensec[NFSTIME_BACKUP] != npnvap->nva_timensec[NFSTIME_BACKUP])))
+			events |= VNODE_EVENT_ATTRIB;
+	}
+
+	/* Copy the attributes to the attribute cache */
+	bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap));
+
 	microuptime(&now);
 	np->n_attrstamp = now.tv_sec;
 	np->n_xid = *xidp;
+	/* NFS_FFLAG_IS_ATTR and NFS_FFLAG_TRIGGER_REFERRAL need to be sticky... */
+	if (vp && xattr)
+		nvap->nva_flags |= xattr;
+	if (vp && referral)
+		nvap->nva_flags |= referral;
+
+	if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) {
+		/* we're updating the ACL */
+		if (nvap->nva_acl) {
+			/* make a copy of the acl for the cache */
+			npnvap->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount);
+			if (npnvap->nva_acl) {
+				bcopy(nvap->nva_acl, npnvap->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl));
+			} else {
+				/* can't make a copy to cache, invalidate ACL cache */
+				NFS_BITMAP_CLR(npnvap->nva_bitmap, NFS_FATTR_ACL);
+				NACLINVALIDATE(np);
+				aclbit = 0;
+			}
+		}
+		if (acl) {
+			kauth_acl_free(acl);
+			acl = NULL;
+		}
+	}
+	if (NFS_BITMAP_ISSET(npnvap->nva_bitmap, NFS_FATTR_ACL)) {
+		/* update the ACL timestamp */
+		np->n_aclstamp = now.tv_sec;
+	} else {
+		/* we aren't updating the ACL, so restore original values */
+		if (aclbit)
+			NFS_BITMAP_SET(npnvap->nva_bitmap, NFS_FATTR_ACL);
+		npnvap->nva_acl = acl;
+	}
 
-	npnvap = &np->n_vattr;
-	bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap));
+#if CONFIG_TRIGGERS
+	/*
+	 * For NFSv4, if the fsid doesn't match the fsid for the mount, then
+	 * this node is for a different file system on the server.  So we mark
+	 * this node as a trigger node that will trigger the mirror mount.
+	 */
+	if ((nmp->nm_vers >= NFS_VER4) && (nvap->nva_type == VDIR) &&
+	    ((np->n_vattr.nva_fsid.major != nmp->nm_fsid.major) ||
+	     (np->n_vattr.nva_fsid.minor != nmp->nm_fsid.minor)))
+		np->n_vattr.nva_flags |= NFS_FFLAG_TRIGGER;
+#endif
 
 	if (!vp || (nvap->nva_type != VREG)) {
 		np->n_size = nvap->nva_size;
@@ -1332,6 +1551,8 @@ nfs_loadattrcache(
 			 */
 			np->n_newsize = nvap->nva_size;
 			SET(np->n_flag, NUPDATESIZE);
+			if (monitored)
+				events |= VNODE_EVENT_ATTRIB | VNODE_EVENT_EXTEND;
 		}
 	}
 
@@ -1346,8 +1567,11 @@ nfs_loadattrcache(
 		}
 	}
 
-	FSDBG_BOT(527, 0, np, np->n_size, *xidp);
-	return (0);
+out:
+	if (monitored && events)
+		nfs_vnode_notify(np, events);
+	FSDBG_BOT(527, error, np, np->n_size, *xidp);
+	return (error);
 }
 
 /*
@@ -1359,16 +1583,22 @@ nfs_attrcachetimeout(nfsnode_t np)
 {
 	struct nfsmount *nmp;
 	struct timeval now;
-	int isdir, timeo;
+	int isdir;
+	uint32_t timeo;
 
 	if (!(nmp = NFSTONMP(np)))
 		return (0);
 
 	isdir = vnode_isdir(NFSTOV(np));
 
-	if ((np)->n_flag & NMODIFIED)
+	if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
+		/* If we have a delegation, we always use the max timeout. */
+		timeo = isdir ? nmp->nm_acdirmax : nmp->nm_acregmax;
+	} else if ((np)->n_flag & NMODIFIED) {
+		/* If we have modifications, we always use the min timeout. */
 		timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin;
-	else {
+	} else {
+		/* Otherwise, we base the timeout on how old the file seems. */
 		/* Note that if the client and server clocks are way out of sync, */
 		/* timeout will probably get clamped to a min or max value */
 		microtime(&now);
@@ -1396,26 +1626,32 @@ nfs_attrcachetimeout(nfsnode_t np)
  * Must be called with the node locked.
  */
 int
-nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper)
+nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int flags)
 {
 	struct nfs_vattr *nvap;
 	struct timeval nowup;
 	int32_t timeo;
 
-	if (!NATTRVALID(np)) {
+	/* Check if the attributes are valid. */
+	if (!NATTRVALID(np) || ((flags & NGA_ACL) && !NACLVALID(np))) {
 		FSDBG(528, np, 0, 0xffffff01, ENOENT);
 		OSAddAtomic(1, &nfsstats.attrcache_misses);
 		return (ENOENT);
 	}
 
+	/* Verify the cached attributes haven't timed out. */
 	timeo = nfs_attrcachetimeout(np);
-
 	microuptime(&nowup);
 	if ((nowup.tv_sec - np->n_attrstamp) >= timeo) {
 		FSDBG(528, np, 0, 0xffffff02, ENOENT);
 		OSAddAtomic(1, &nfsstats.attrcache_misses);
 		return (ENOENT);
 	}
+	if ((flags & NGA_ACL) && ((nowup.tv_sec - np->n_aclstamp) >= timeo)) {
+		FSDBG(528, np, 0, 0xffffff02, ENOENT);
+		OSAddAtomic(1, &nfsstats.attrcache_misses);
+		return (ENOENT);
+	}
 
 	nvap = &np->n_vattr;
 	FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace);
@@ -1451,9 +1687,257 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper)
 			nvaper->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec;
 		}
 	}
+	if (nvap->nva_acl) {
+		if (flags & NGA_ACL) {
+			nvaper->nva_acl = kauth_acl_alloc(nvap->nva_acl->acl_entrycount);
+			if (!nvaper->nva_acl)
+				return (ENOMEM);
+			bcopy(nvap->nva_acl, nvaper->nva_acl, KAUTH_ACL_COPYSIZE(nvap->nva_acl));
+		} else {
+			nvaper->nva_acl = NULL;
+		}
+	}
 	return (0);
 }
 
+/*
+ * When creating file system objects:
+ * Don't bother setting UID if it's the same as the credential performing the create.
+ * Don't bother setting GID if it's the same as the directory or credential.
+ */
+void
+nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	if (VATTR_IS_ACTIVE(vap, va_uid)) {
+		if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid) {
+			VATTR_CLEAR_ACTIVE(vap, va_uid);
+			VATTR_CLEAR_ACTIVE(vap, va_uuuid);
+		}
+	}
+	if (VATTR_IS_ACTIVE(vap, va_gid)) {
+		if ((vap->va_gid == dnp->n_vattr.nva_gid) ||
+		    (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid)) {
+			VATTR_CLEAR_ACTIVE(vap, va_gid);
+			VATTR_CLEAR_ACTIVE(vap, va_guuid);
+		}
+	}
+}
+
+/*
+ * Convert a universal address string to a sockaddr structure.
+ *
+ * Universal addresses can be in the following formats:
+ *
+ * d = decimal (IPv4)
+ * x = hexadecimal (IPv6)
+ * p = port (decimal)
+ *
+ * d.d.d.d
+ * d.d.d.d.p.p
+ * x:x:x:x:x:x:x:x
+ * x:x:x:x:x:x:x:x.p.p
+ * x:x:x:x:x:x:d.d.d.d
+ * x:x:x:x:x:x:d.d.d.d.p.p
+ *
+ * IPv6 strings can also have a series of zeroes elided
+ * IPv6 strings can also have a %scope suffix at the end (after any port)
+ *
+ * rules & exceptions:
+ * - value before : is hex
+ * - value before . is dec
+ * - once . hit, all values are dec
+ * - hex+port case means value before first dot is actually hex
+ * - . is always preceded by digits except if last hex was double-colon
+ *
+ * scan, converting #s to bytes
+ * first time a . is encountered, scan the rest to count them.
+ * 2 dots = just port
+ * 3 dots = just IPv4 no port
+ * 5 dots = IPv4 and port
+ */
+
+#define IS_DIGIT(C) \
+	(((C) >= '0') && ((C) <= '9'))
+
+#define IS_XDIGIT(C) \
+	(IS_DIGIT(C) || \
+	 (((C) >= 'A') && ((C) <= 'F')) || \
+	 (((C) >= 'a') && ((C) <= 'f')))
+
+int
+nfs_uaddr2sockaddr(const char *uaddr, struct sockaddr *addr)
+{
+	const char *p, *pd;	/* pointers to current character in scan */
+	const char *pnum;	/* pointer to current number to decode */
+	const char *pscope;	/* pointer to IPv6 scope ID */
+	uint8_t a[18];		/* octet array to store address bytes */
+	int i;			/* index of next octet to decode */
+	int dci;		/* index of octet to insert double-colon zeroes */
+	int dcount, xdcount;	/* count of digits in current number */
+	int needmore;		/* set when we know we need more input (e.g. after colon, period) */
+	int dots;		/* # of dots */
+	int hex;		/* contains hex values */
+	unsigned long val;	/* decoded value */
+	int s;			/* index used for sliding array to insert elided zeroes */
+
+#define HEXVALUE	0
+#define DECIMALVALUE	1
+#define GET(TYPE) \
+	do { \
+		if ((dcount <= 0) || (dcount > (((TYPE) == DECIMALVALUE) ? 3 : 4))) \
+			return (0); \
+		if (((TYPE) == DECIMALVALUE) && xdcount) \
+			return (0); \
+		val = strtoul(pnum, NULL, ((TYPE) == DECIMALVALUE) ? 10 : 16); \
+		if (((TYPE) == DECIMALVALUE) && (val >= 256)) \
+			return (0); \
+		/* check if there is room left in the array */ \
+		if (i > (int)(sizeof(a) - (((TYPE) == HEXVALUE) ? 2 : 1) - ((dci != -1) ? 2 : 0))) \
+			return (0); \
+		if ((TYPE) == HEXVALUE) \
+			a[i++] = ((val >> 8) & 0xff); \
+		a[i++] = (val & 0xff); \
+	} while (0)
+
+	hex = 0;
+	dots = 0;
+	dci = -1;
+	i = dcount = xdcount = 0;
+	pnum = p = uaddr;
+	pscope = NULL;
+	needmore = 1;
+	if ((*p == ':') && (*++p != ':')) /* if it starts with colon, gotta be a double */
+		return (0);
+
+	while (*p) {
+		if (IS_XDIGIT(*p)) {
+			dcount++;
+			if (!IS_DIGIT(*p))
+				xdcount++;
+			needmore = 0;
+			p++;
+		} else if (*p == '.') {
+			/* rest is decimal IPv4 dotted quad and/or port */
+			if (!dots) {
+				/* this is the first, so count them */
+				for (pd = p; *pd; pd++) {
+					if (*pd == '.') {
+						if (++dots > 5)
+							return (0);
+					} else if (hex && (*pd == '%')) {
+						break;
+					} else if ((*pd < '0') || (*pd > '9')) {
+						return (0);
+					}
+				}
+				if ((dots != 2) && (dots != 3) && (dots != 5))
+					return (0);
+				if (hex && (dots == 2)) { /* hex+port */
+					if (!dcount && needmore)
+						return (0);
+					if (dcount) /* last hex may be elided zero */
+						GET(HEXVALUE);
+				} else {
+					GET(DECIMALVALUE);
+				}
+			} else {
+				GET(DECIMALVALUE);
+			}
+			dcount = xdcount = 0;
+			needmore = 1;
+			pnum = ++p;
+		} else if (*p == ':') {
+			hex = 1;
+			if (dots)
+				return (0);
+			if (!dcount) { /* missing number, probably double colon */
+				if (dci >= 0) /* can only have one double colon */
+					return (0);
+				dci = i;
+				needmore = 0;
+			} else {
+				GET(HEXVALUE);
+				dcount = xdcount = 0;
+				needmore = 1;
+			}
+			pnum = ++p;
+		} else if (*p == '%') { /* scope ID delimiter */
+			if (!hex)
+				return (0);
+			p++;
+			pscope = p;
+			break;
+		} else { /* unexpected character */
+			return (0);
+		}
+	}
+	if (needmore && !dcount)
+		return (0);
+	if (dcount) /* decode trailing number */
+		GET(dots ? DECIMALVALUE : HEXVALUE);
+	if (dci >= 0) {  /* got a double-colon at i, need to insert a range of zeroes */
+		/* if we got a port, slide to end of array */
+		/* otherwise, slide to end of address (non-port) values */
+		int end = ((dots == 2) || (dots == 5)) ? sizeof(a) : (sizeof(a) - 2);
+		if (i % 2) /* length of zero range must be multiple of 2 */
+			return (0);
+		if (i >= end) /* no room? */
+			return (0);
+		/* slide (i-dci) numbers up from index dci */
+		for (s=0; s < (i - dci); s++)
+			a[end-1-s] = a[i-1-s];
+		/* zero (end-i) numbers at index dci */
+		for (s=0; s < (end - i); s++)
+			a[dci+s] = 0;
+		i = end;
+	}
+
+	/* copy out resulting socket address */
+	if (hex) {
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)addr;
+		if ((((dots == 0) || (dots == 3)) && (i != (sizeof(a)-2))))
+			return (0);
+		if ((((dots == 2) || (dots == 5)) && (i != sizeof(a))))
+			return (0);
+		bzero(sin6, sizeof(struct sockaddr_in6));
+		sin6->sin6_len = sizeof(struct sockaddr_in6);
+		sin6->sin6_family = AF_INET6;
+		bcopy(a, &sin6->sin6_addr.s6_addr, sizeof(struct in6_addr));
+		if ((dots == 5) || (dots == 2))
+			sin6->sin6_port = htons((a[16] << 8) | a[17]);
+		if (pscope) {
+			for (p=pscope; IS_DIGIT(*p); p++)
+				;
+			if (*p && !IS_DIGIT(*p)) { /* name */
+				ifnet_t interface = NULL;
+				if (ifnet_find_by_name(pscope, &interface) == 0)
+					sin6->sin6_scope_id = ifnet_index(interface);
+				if (interface)
+					ifnet_release(interface);
+			} else { /* decimal number */
+				sin6->sin6_scope_id = strtoul(pscope, NULL, 10);
+			}
+			/* XXX should we also embed scope id for linklocal? */
+		}
+	} else {
+		struct sockaddr_in *sin = (struct sockaddr_in*)addr;
+		if ((dots != 3) && (dots != 5))
+			return (0);
+		if ((dots == 3) && (i != 4))
+			return (0);
+		if ((dots == 5) && (i != 6))
+			return (0);
+		bzero(sin, sizeof(struct sockaddr_in));
+		sin->sin_len = sizeof(struct sockaddr_in);
+		sin->sin_family = AF_INET;
+		bcopy(a, &sin->sin_addr.s_addr, sizeof(struct in_addr));
+		if (dots == 5)
+			sin->sin_port = htons((a[4] << 8) | a[5]);
+	}
+	return (1);
+}
+
+
 #endif /* NFSCLIENT */
 
 /*
@@ -1478,8 +1962,7 @@ int nfsrv_free_netopt(struct radix_node *, void *);
 int nfsrv_free_addrlist(struct nfs_export *, struct user_nfs_export_args *);
 struct nfs_export_options *nfsrv_export_lookup(struct nfs_export *, mbuf_t);
 struct nfs_export *nfsrv_fhtoexport(struct nfs_filehandle *);
-int nfsrv_cmp_sockaddr(struct sockaddr_storage *, struct sockaddr_storage *);
-struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr_storage *, uid_t);
+struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr *, uid_t);
 void nfsrv_init_user_list(struct nfs_active_user_list *);
 void nfsrv_free_user_list(struct nfs_active_user_list *);
 
@@ -1939,7 +2422,6 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
 	unsigned int net;
 	user_addr_t uaddr;
 	kauth_cred_t cred;
-	struct ucred temp_cred;
 
 	uaddr = unxa->nxa_nets;
 	for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) {
@@ -1948,12 +2430,13 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
 			return (error);
 
 		if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) {
-		        bzero(&temp_cred, sizeof(temp_cred));
-			temp_cred.cr_uid = nxna.nxna_cred.cr_uid;
-			temp_cred.cr_ngroups = nxna.nxna_cred.cr_ngroups;
+			struct posix_cred temp_pcred;
+		        bzero(&temp_pcred, sizeof(temp_pcred));
+			temp_pcred.cr_uid = nxna.nxna_cred.cr_uid;
+			temp_pcred.cr_ngroups = nxna.nxna_cred.cr_ngroups;
 			for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++)
-				temp_cred.cr_groups[i] = nxna.nxna_cred.cr_groups[i];
-			cred = kauth_cred_create(&temp_cred);
+				temp_pcred.cr_groups[i] = nxna.nxna_cred.cr_groups[i];
+			cred = posix_cred_create(&temp_pcred);
 			if (!IS_VALID_CRED(cred))
 				return (ENOMEM);
 		} else {
@@ -2035,13 +2518,34 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa)
 				if (cred == cred2) {
 					/* creds are same (or both NULL) */
 					matched = 1;
-				} else if (cred && cred2 && (cred->cr_uid == cred2->cr_uid) &&
-				    (cred->cr_ngroups == cred2->cr_ngroups)) {
-					for (i=0; i < cred2->cr_ngroups && i < NGROUPS; i++)
-						if (cred->cr_groups[i] != cred2->cr_groups[i])
-							break;
-					if (i >= cred2->cr_ngroups || i >= NGROUPS)
-						matched = 1;
+				} else if (cred && cred2 && (kauth_cred_getuid(cred) == kauth_cred_getuid(cred2))) {
+				    /*
+				     * Now compare the effective and
+				     * supplementary groups...
+				     *
+				     * Note: This comparison, as written,
+				     * does not correctly indicate that
+				     * the groups are equivalent, since
+				     * other than the first supplementary
+				     * group, which is also the effective
+				     * group, order on the remaining groups
+				     * doesn't matter, and this is an
+				     * ordered compare.
+				     */
+				    gid_t groups[NGROUPS];
+				    gid_t groups2[NGROUPS];
+				    int groupcount = NGROUPS;
+				    int group2count = NGROUPS;
+
+				    if (!kauth_cred_getgroups(cred, groups, &groupcount) &&
+					!kauth_cred_getgroups(cred2, groups2, &group2count) &&
+					groupcount == group2count) {
+					    for (i=0; i < group2count; i++)
+						    if (groups[i] != groups2[i])
+							    break;
+					    if (i >= group2count || i >= NGROUPS)
+					    matched = 1;
+				    }
 				}
 			}
 			if (IS_VALID_CRED(cred))
@@ -2167,7 +2671,8 @@ void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX
 int
 nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 {
-	int error = 0, pathlen;
+	int error = 0;
+	size_t pathlen;
 	struct nfs_exportfs *nxfs, *nxfs2, *nxfs3;
 	struct nfs_export *nx, *nx2, *nx3;
 	struct nfs_filehandle nfh;
@@ -2179,10 +2684,10 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 
 	if (unxa->nxa_flags == NXA_CHECK) {
 		/* just check if the path is an NFS-exportable file system */
-		error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen);
+		error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen);
 		if (error)
 			return (error);
-		NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+		NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 			UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 		error = namei(&mnd);
 		if (error)
@@ -2215,8 +2720,11 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 		lck_rw_lock_exclusive(&nfsrv_export_rwlock);
 		while ((nxfs = LIST_FIRST(&nfsrv_exports))) {
 			mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
-			if (mp)
+			if (mp) {
 				vfs_clearflags(mp, MNT_EXPORTED);
+				mount_iterdrop(mp);
+				mp = NULL;
+			}
 			/* delete all exports on this file system */
 			while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) {
 				LIST_REMOVE(nx, nx_next);
@@ -2245,7 +2753,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 		return (0);
 	}
 
-	error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen);
+	error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, &pathlen);
 	if (error)
 		return (error);
 
@@ -2272,8 +2780,12 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 		if ((unxa->nxa_flags & (NXA_ADD|NXA_OFFLINE)) == NXA_ADD) {
 			/* if adding, verify that the mount is still what we expect */
 			mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
+			if (mp) {
+				mount_ref(mp, 0);
+				mount_iterdrop(mp);
+			}
 			/* find exported FS root vnode */
-			NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+			NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 				UIO_SYSSPACE, CAST_USER_ADDR_T(nxfs->nxfs_path), ctx);
 			error = namei(&mnd);
 			if (error)
@@ -2298,7 +2810,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 		}
 
 		/* find exported FS root vnode */
-		NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+		NDINIT(&mnd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 			UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
 		error = namei(&mnd);
 		if (error) {
@@ -2318,6 +2830,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 				mvp = NULL;
 			} else {
 				mp = vnode_mount(mvp);
+				mount_ref(mp, 0);
 
 				/* make sure the file system is NFS-exportable */
 				nfh.nfh_len = NFSV3_MAX_FID_SIZE;
@@ -2366,7 +2879,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 	}
 
 	if (unxa->nxa_exppath) {
-		error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, (size_t *)&pathlen);
+		error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, &pathlen);
 		if (error)
 			goto out;
 		LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
@@ -2483,6 +2996,9 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx)
 					vnode_get(xvp);
 				} else {
 					xnd.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+					xnd.ni_op = OP_LOOKUP;
+#endif
 					xnd.ni_cnd.cn_flags = LOCKLEAF;
 					xnd.ni_pathlen = pathlen - 1;
 					xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path;
@@ -2600,6 +3116,8 @@ out:
 		nameidone(&mnd);
 	}
 unlock_out:
+	if (mp)
+		mount_drop(mp, 0);
 	lck_rw_done(&nfsrv_export_rwlock);
 	return (error);
 }
@@ -2736,6 +3254,12 @@ nfsrv_fhtovp(
 
 	/* find mount structure */
 	mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path);
+	if (mp) {
+		error = vfs_busy(mp, LK_NOWAIT);
+		mount_iterdrop(mp);
+		if (error)
+			mp = NULL;
+	}
 	if (!mp) {
 		/*
 		 * We have an export, but no mount?
@@ -2746,6 +3270,7 @@ nfsrv_fhtovp(
 
 	fidp = nfhp->nfh_fhp + sizeof(*nxh);
 	error = VFS_FHTOVP(mp, nxh->nxh_fidlen, fidp, vpp, NULL);
+	vfs_unbusy(mp);
 	if (error)
 		return (error);
 	/* vnode pointer should be good at this point or ... */
@@ -2863,46 +3388,6 @@ nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2)
  * Functions for dealing with active user lists
  */
 
-/*
- * Compare address fields of two sockaddr_storage structures.
- * Returns zero if they match.
- */
-int
-nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock2)
-{
-	struct sockaddr_in	*ipv4_sock1, *ipv4_sock2;
-	struct sockaddr_in6	*ipv6_sock1, *ipv6_sock2;
-
-	/* check for valid parameters */
-	if (sock1 == NULL || sock2 == NULL)
-		return 1;
-
-	/* check address length */
-	if (sock1->ss_len != sock2->ss_len)
-		return 1;
-
-	/* Check address family */
-	if (sock1->ss_family != sock2->ss_family)
-		return 1;
-
-	if (sock1->ss_family == AF_INET) {
-		/* IPv4 */
-		ipv4_sock1 = (struct sockaddr_in *)sock1;
-		ipv4_sock2 = (struct sockaddr_in *)sock2;
-
-		if (!bcmp(&ipv4_sock1->sin_addr, &ipv4_sock2->sin_addr, sizeof(struct in_addr)))
-			return 0;
-	} else {
-		/* IPv6 */
-		ipv6_sock1 = (struct sockaddr_in6 *)sock1;
-		ipv6_sock2 = (struct sockaddr_in6 *)sock2;
-
-		if (!bcmp(&ipv6_sock1->sin6_addr, &ipv6_sock2->sin6_addr, sizeof(struct in6_addr)))
-			return 0;
-	}
-	return 1;
-}
-
 /*
  * Search the hash table for a user node with a matching IP address and uid field.
  * If found, the node's tm_last timestamp is updated and the node is returned.
@@ -2913,7 +3398,7 @@ nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock
  * The list's user_mutex lock MUST be held.
  */
 struct nfs_user_stat_node *
-nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_storage *sock, uid_t uid)
+nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr *saddr, uid_t uid)
 {
 	struct nfs_user_stat_node		*unode;
 	struct timeval				now;
@@ -2922,7 +3407,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor
 	/* seach the hash table */
 	head = NFS_USER_STAT_HASH(list->user_hashtbl, uid);
 	LIST_FOREACH(unode, head, hash_link) {
-		if (uid == unode->uid && nfsrv_cmp_sockaddr(sock, &unode->sock) == 0) {
+		if ((uid == unode->uid) && (nfs_sockaddr_cmp(saddr, (struct sockaddr*)&unode->sock) == 0)) {
 			/* found matching node */
 			break;
 		}
@@ -2964,7 +3449,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor
 
 	/* Initialize the node */
 	unode->uid = uid;
-	bcopy(sock, &unode->sock, sock->ss_len);
+	bcopy(saddr, &unode->sock, saddr->sa_len);
 	microtime(&now);
 	unode->ops = 0;
 	unode->bytes_read = 0;
@@ -2984,15 +3469,15 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u
 {
 	struct nfs_user_stat_node	*unode;
 	struct nfs_active_user_list	*ulist;
-	struct sockaddr_storage		*sock_stor;
+	struct sockaddr			*saddr;
 
 	if ((!nfsrv_user_stat_enabled) || (!nx) || (!nd) || (!nd->nd_nam))
 		return;
 
-	sock_stor = (struct sockaddr_storage *)mbuf_data(nd->nd_nam);
+	saddr = (struct sockaddr *)mbuf_data(nd->nd_nam);
 
 	/* check address family before going any further */
-	if ((sock_stor->ss_family != AF_INET) && (sock_stor->ss_family != AF_INET6))
+	if ((saddr->sa_family != AF_INET) && (saddr->sa_family != AF_INET6))
 		return;
 
 	ulist = &nx->nx_user_list;
@@ -3001,7 +3486,7 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u
 	lck_mtx_lock(&ulist->user_mutex);
 
 	/* get the user node */
-	unode = nfsrv_get_user_stat_node(ulist, sock_stor, uid);
+	unode = nfsrv_get_user_stat_node(ulist, saddr, uid);
 
 	if (!unode) {
 		lck_mtx_unlock(&ulist->user_mutex);
diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c
index c28eac76c..d6de219ba 100644
--- a/bsd/nfs/nfs_syscalls.c
+++ b/bsd/nfs/nfs_syscalls.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -150,36 +150,40 @@ SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hing
 
 #if NFSCLIENT
 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW, &nfs_single_des, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
 #endif /* NFSCLIENT */
 
 #if NFSSERVER
 SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW, &nfsrv_export_hash_size, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, "");
-SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW, &nfsrv_gss_context_ttl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
 #if CONFIG_FSE
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
 #endif
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
 #endif /* NFSSERVER */
 
 
@@ -191,11 +195,19 @@ nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
 	struct lockd_ans la;
 	int error;
 
-	if (uap->flag == NFSCLNT_LOCKDANS) {
+	switch (uap->flag) {
+	case NFSCLNT_LOCKDANS:
 		error = copyin(uap->argp, &la, sizeof(la));
-		return (error != 0 ? error : nfslockdans(p, &la));
+		if (!error)
+			error = nfslockdans(p, &la);
+		break;
+	case NFSCLNT_LOCKDNOTIFY:
+		error = nfslockdnotify(p, uap->argp);
+		break;
+	default:
+		error = EINVAL;
 	}
-	return EINVAL;
+	return (error);
 }
 
 /*
@@ -389,10 +401,10 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
 {
 	vnode_t vp;
 	struct nfs_filehandle nfh;
-	int error;
+	int error, fhlen, fidlen;
 	struct nameidata nd;
 	char path[MAXPATHLEN], *ptr;
-	u_int pathlen;
+	size_t pathlen;
 	struct nfs_exportfs *nxfs;
 	struct nfs_export *nx;
 
@@ -403,14 +415,20 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
 	if (error)
 		return (error);
 
-	error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
+	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
+	if (!error)
+		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
 	if (error)
 		return (error);
+	/* limit fh size to length specified (or v3 size by default) */
+	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
+		fhlen = NFSV3_MAX_FH_SIZE;
+	fidlen = fhlen - sizeof(struct nfs_exphandle);
 
 	if (!nfsrv_is_initialized())
 		return (EINVAL);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
 			UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
 	error = namei(&nd);
 	if (error)
@@ -452,9 +470,9 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
 	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
 	nfh.nfh_xh.nxh_flags = 0;
 	nfh.nfh_xh.nxh_reserved = 0;
-	nfh.nfh_len = NFSV3_MAX_FID_SIZE;
+	nfh.nfh_len = fidlen;
 	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
-	if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)
+	if (nfh.nfh_len > (uint32_t)fidlen)
 		error = EOVERFLOW;
 	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
 	nfh.nfh_len += sizeof(nfh.nfh_xh);
@@ -465,7 +483,7 @@ out:
 	vnode_put(vp);
 	if (error)
 		return (error);
-	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
+	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
 	return (error);
 }
 
@@ -564,7 +582,7 @@ fhopen( proc_t p,
 
 	if ((error = VNOP_OPEN(vp, fmode, ctx)))
 		goto bad;
-	if ((error = vnode_ref_ext(vp, fmode)))
+	if ((error = vnode_ref_ext(vp, fmode, 0)))
 		goto bad;
 
 	/*
@@ -714,8 +732,12 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 
 	sock_gettype(so, &sodomain, &sotype, &soprotocol);
 
-	/* There should be only one UDP socket */
-	if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+	/* There should be only one UDP socket for each of IPv4 and IPv6 */
+	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+		mbuf_freem(mynam);
+		return (EEXIST);
+	}
+	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
 		mbuf_freem(mynam);
 		return (EEXIST);
 	}
@@ -763,14 +785,26 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 	lck_mtx_lock(nfsd_mutex);
 
 	if (soprotocol == IPPROTO_UDP) {
-		/* There should be only one UDP socket */
-		if (nfsrv_udpsock) {
-			lck_mtx_unlock(nfsd_mutex);
-			nfsrv_slpfree(slp);
-			mbuf_freem(mynam);
-			return (EEXIST);
+		if (sodomain == AF_INET) {
+			/* There should be only one UDP/IPv4 socket */
+			if (nfsrv_udpsock) {
+				lck_mtx_unlock(nfsd_mutex);
+				nfsrv_slpfree(slp);
+				mbuf_freem(mynam);
+				return (EEXIST);
+			}
+			nfsrv_udpsock = slp;
+		}
+		if (sodomain == AF_INET6) {
+			/* There should be only one UDP/IPv6 socket */
+			if (nfsrv_udp6sock) {
+				lck_mtx_unlock(nfsd_mutex);
+				nfsrv_slpfree(slp);
+				mbuf_freem(mynam);
+				return (EEXIST);
+			}
+			nfsrv_udp6sock = slp;
 		}
-		nfsrv_udpsock = slp;
 	}
 
 	/* add the socket to the list */
@@ -782,11 +816,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 	slp->ns_nam = mynam;
 
 	/* set up the socket upcall */
-	socket_lock(so, 1);
-	so->so_upcallarg = (caddr_t)slp;
-	so->so_upcall = nfsrv_rcv;
-	so->so_rcv.sb_flags |= SB_UPCALL;
-	socket_unlock(so, 1);
+	sock_setupcall(so, nfsrv_rcv, slp);
 	/* just playin' it safe */
 	sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
 
@@ -978,6 +1008,8 @@ nfssvc_nfsd(void)
 					mbuf_freem(nd->nd_nam2);
 				if (IS_VALID_CRED(nd->nd_cr))
 					kauth_cred_unref(&nd->nd_cr);
+				if (nd->nd_gss_context)
+					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
 				FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 				nd = NULL;
 			}
@@ -1000,21 +1032,17 @@ nfssvc_nfsd(void)
 
 		    if (nfsrv_require_resv_port) {
 			/* Check if source port is a reserved port */
-			u_short port;
-			struct sockaddr *nam = mbuf_data(nd->nd_nam);
-			struct sockaddr_in *sin;
-
-			sin = (struct sockaddr_in *)nam;
-			port = ntohs(sin->sin_port);
-			if (port >= IPPORT_RESERVED && 
-			    nd->nd_procnum != NFSPROC_NULL) {
-			    char strbuf[MAX_IPv4_STR_LEN];
+			in_port_t port = 0;
+			struct sockaddr *saddr = mbuf_data(nd->nd_nam);
+
+			if (saddr->sa_family == AF_INET)
+				port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+			else if (saddr->sa_family == AF_INET6)
+				port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+			if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
 			    nd->nd_procnum = NFSPROC_NOOP;
 			    nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 			    cacherep = RC_DOIT;
-			    printf("NFS request from unprivileged port (%s:%d)\n",
-			    	inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
-			    	port);
 			}
 		    }
 
@@ -1130,6 +1158,8 @@ nfssvc_nfsd(void)
 				nfsm_chain_cleanup(&nd->nd_nmreq);
 				if (IS_VALID_CRED(nd->nd_cr))
 					kauth_cred_unref(&nd->nd_cr);
+				if (nd->nd_gss_context)
+					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
 				FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 				nfsrv_slpderef(slp);
 				lck_mtx_lock(nfsd_mutex);
@@ -1148,6 +1178,8 @@ nfssvc_nfsd(void)
 				mbuf_freem(nd->nd_nam2);
 			if (IS_VALID_CRED(nd->nd_cr))
 				kauth_cred_unref(&nd->nd_cr);
+			if (nd->nd_gss_context)
+				nfs_gss_svc_ctx_deref(nd->nd_gss_context);
 			FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 			nd = NULL;
 		    }
@@ -1294,6 +1326,8 @@ nfsrv_slpfree(struct nfsrv_sock *slp)
 			mbuf_freem(nwp->nd_nam2);
 		if (IS_VALID_CRED(nwp->nd_cr))
 			kauth_cred_unref(&nwp->nd_cr);
+		if (nwp->nd_gss_context)
+			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
 		FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
 	}
 	LIST_INIT(&slp->ns_tq);
@@ -1455,10 +1489,12 @@ nfsrv_cleanup(void)
 			 * Fire off the content modified fsevent for each
 			 * entry, remove it from the list, and free it.
 			 */
-			if (nfsrv_fsevents_enabled)
+			if (nfsrv_fsevents_enabled) {
+				fp->fm_context.vc_thread = current_thread();
 				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
 						FSE_ARG_VNODE, fp->fm_vp,
 						FSE_ARG_DONE);
+			}
 			vnode_put(fp->fm_vp);
 			kauth_cred_unref(&fp->fm_context.vc_ucred);
 			nfp = LIST_NEXT(fp, fm_link);
@@ -1475,6 +1511,7 @@ nfsrv_cleanup(void)
 	nfsrv_cleancache();	/* And clear out server cache */
 
 	nfsrv_udpsock = NULL;
+	nfsrv_udp6sock = NULL;
 }
 
 #endif /* NFS_NOSERVER */
diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c
index e92c58cdf..7a0323fde 100644
--- a/bsd/nfs/nfs_vfsops.c
+++ b/bsd/nfs/nfs_vfsops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -88,6 +88,7 @@
 #include <sys/socketvar.h>
 #include <sys/fcntl.h>
 #include <sys/quota.h>
+#include <sys/priv.h>
 #include <libkern/OSAtomic.h>
 
 #include <sys/vm.h>
@@ -159,21 +160,29 @@ int nfs_max_async_writes = NFS_DEFMAXASYNCWRITES;
 
 int nfs_iosize = NFS_IOSIZE;
 int nfs_access_cache_timeout = NFS_MAXATTRTIMO;
-int nfs_access_delete = 0;
+int nfs_access_delete = 1; /* too many servers get this wrong - workaround on by default */
+int nfs_access_dotzfs = 1;
+int nfs_access_for_getattr = 0;
 int nfs_allow_async = 0;
 int nfs_statfs_rate_limit = NFS_DEFSTATFSRATELIMIT;
 int nfs_lockd_mounts = 0;
 int nfs_lockd_request_sent = 0;
+int nfs_idmap_ctrl = NFS_IDMAP_CTRL_USE_IDMAP_SERVICE;
+int nfs_callback_port = 0;
 
 int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
 int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
 
 
-int		mountnfs(struct user_nfs_args *,mount_t,mbuf_t,vfs_context_t,vnode_t *);
+int		mountnfs(char *, mount_t, vfs_context_t, vnode_t *);
 static int	nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
 #if !defined(NO_MOUNT_PRIVATE)
 static int	nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t);
 #endif /* NO_MOUNT_PRIVATE */
+int		nfs_mount_connect(struct nfsmount *);
+void		nfs_mount_cleanup(struct nfsmount *);
+int		nfs_mountinfo_assemble(struct nfsmount *, struct xdrbuf *);
+int		nfs4_mount_update_path_with_symlink(struct nfsmount *, struct nfs_fs_path *, uint32_t, fhandle_t *, int *, fhandle_t *, vfs_context_t);
 
 /*
  * NFS VFS operations.
@@ -218,8 +227,8 @@ struct vfsops nfs_vfsops = {
 /*
  * version-specific NFS functions
  */
-int nfs3_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *);
-int nfs4_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *);
+int nfs3_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
+int nfs4_mount(struct nfsmount *, vfs_context_t, nfsnode_t *);
 int nfs3_fsinfo(struct nfsmount *, nfsnode_t, vfs_context_t);
 int nfs3_update_statfs(struct nfsmount *, vfs_context_t);
 int nfs4_update_statfs(struct nfsmount *, vfs_context_t);
@@ -247,7 +256,10 @@ struct nfs_funcs nfs3_funcs = {
 	nfs3_lookup_rpc_async,
 	nfs3_lookup_rpc_async_finish,
 	nfs3_remove_rpc,
-	nfs3_rename_rpc
+	nfs3_rename_rpc,
+	nfs3_setlock_rpc,
+	nfs3_unlock_rpc,
+	nfs3_getlock_rpc
 	};
 struct nfs_funcs nfs4_funcs = {
 	nfs4_mount,
@@ -265,7 +277,10 @@ struct nfs_funcs nfs4_funcs = {
 	nfs4_lookup_rpc_async,
 	nfs4_lookup_rpc_async_finish,
 	nfs4_remove_rpc,
-	nfs4_rename_rpc
+	nfs4_rename_rpc,
+	nfs4_setlock_rpc,
+	nfs4_unlock_rpc,
+	nfs4_getlock_rpc
 	};
 
 /*
@@ -358,8 +373,7 @@ nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx,
-		   &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	if (nfsvers == NFS_VER3)
@@ -418,6 +432,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	struct nfsm_chain nmreq, nmrep;
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
 	struct nfs_vattr nvattr;
+	struct nfsreq_secinfo_args si;
 
 	nfsvers = nmp->nm_vers;
 	np = nmp->nm_dnp;
@@ -426,6 +441,8 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	if ((error = vnode_get(NFSTOV(np))))
 		return (error);
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
+	NVATTR_INIT(&nvattr);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -440,12 +457,11 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
 	NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap);
 	NFS4_STATFS_ATTRIBUTES(bitmap);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, np);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &si, &nmrep, &xid, &status);
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
@@ -453,8 +469,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	nfsm_assert(error, NFSTONMP(np), ENXIO);
 	nfsmout_if(error);
 	lck_mtx_lock(&nmp->nm_lock);
-	NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL);
+	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
 	lck_mtx_unlock(&nmp->nm_lock);
 	nfsmout_if(error);
 	if ((lockerror = nfs_node_lock(np)))
@@ -467,6 +482,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx)
 	nfsmout_if(error);
 	nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE;
 nfsmout:
+	NVATTR_CLEANUP(&nvattr);
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
 	vnode_put(NFSTOV(np));
@@ -605,6 +621,8 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
 			caps |= VOL_CAP_FMT_HIDDEN_FILES;
 			valid |= VOL_CAP_FMT_HIDDEN_FILES;
 			// VOL_CAP_FMT_OPENDENYMODES
+//			caps |= VOL_CAP_FMT_OPENDENYMODES;
+//			valid |= VOL_CAP_FMT_OPENDENYMODES;
 		}
 		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] =
 			// VOL_CAP_FMT_PERSISTENTOBJECTIDS |
@@ -655,10 +673,18 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
 		if (nfsvers >= NFS_VER4) {
 			caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
 			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
-			// VOL_CAP_INT_EXTENDED_SECURITY
-			// VOL_CAP_INT_NAMEDSTREAMS
-			// VOL_CAP_INT_EXTENDED_ATTR
-		} else if ((nmp->nm_flag & NFSMNT_NOLOCKS)) {
+			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
+				caps |= VOL_CAP_INT_EXTENDED_SECURITY;
+			valid |= VOL_CAP_INT_EXTENDED_SECURITY;
+			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
+				caps |= VOL_CAP_INT_EXTENDED_ATTR;
+			valid |= VOL_CAP_INT_EXTENDED_ATTR;
+#if NAMEDSTREAMS
+			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
+				caps |= VOL_CAP_INT_NAMEDSTREAMS;
+			valid |= VOL_CAP_INT_NAMEDSTREAMS;
+#endif
+		} else if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
 			/* locks disabled on this mount, so they definitely won't work */
 			valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK;
 		} else if (nmp->nm_state & NFSSTA_LOCKSWORK) {
@@ -681,6 +707,7 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
 			// VOL_CAP_INT_MANLOCK |
 			// VOL_CAP_INT_NAMEDSTREAMS |
 			// VOL_CAP_INT_EXTENDED_ATTR |
+			VOL_CAP_INT_REMOTE_EVENT |
 			caps;
 		fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] =
 			VOL_CAP_INT_SEARCHFS |
@@ -698,6 +725,7 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
 			// VOL_CAP_INT_MANLOCK |
 			// VOL_CAP_INT_NAMEDSTREAMS |
 			// VOL_CAP_INT_EXTENDED_ATTR |
+			VOL_CAP_INT_REMOTE_EVENT |
 			valid;
 
 		fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
@@ -749,8 +777,7 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
 	nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, np->n_fhp, np->n_fhsize);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
@@ -770,7 +797,7 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
 	if (prefsize < nmp->nm_rsize)
 		nmp->nm_rsize = (prefsize + NFS_FABLKSIZE - 1) &
 			~(NFS_FABLKSIZE - 1);
-	if (maxsize < nmp->nm_rsize) {
+	if ((maxsize > 0) && (maxsize < nmp->nm_rsize)) {
 		nmp->nm_rsize = maxsize & ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_rsize == 0)
 			nmp->nm_rsize = maxsize;
@@ -784,7 +811,7 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
 	if (prefsize < nmp->nm_wsize)
 		nmp->nm_wsize = (prefsize + NFS_FABLKSIZE - 1) &
 			~(NFS_FABLKSIZE - 1);
-	if (maxsize < nmp->nm_wsize) {
+	if ((maxsize > 0) && (maxsize < nmp->nm_wsize)) {
 		nmp->nm_wsize = maxsize & ~(NFS_FABLKSIZE - 1);
 		if (nmp->nm_wsize == 0)
 			nmp->nm_wsize = maxsize;
@@ -793,10 +820,11 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
 
 	nfsm_chain_get_32(error, &nmrep, prefsize);
 	nfsmout_if(error);
-	if (prefsize < nmp->nm_readdirsize)
+	if ((prefsize > 0) && (prefsize < nmp->nm_readdirsize))
 		nmp->nm_readdirsize = prefsize;
-	if (maxsize < nmp->nm_readdirsize)
-		nmp->nm_readdirsize = maxsize;
+	if ((nmp->nm_fsattr.nfsa_maxread > 0) &&
+	    (nmp->nm_fsattr.nfsa_maxread < nmp->nm_readdirsize))
+		nmp->nm_readdirsize = nmp->nm_fsattr.nfsa_maxread;
 
 	nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
 
@@ -846,7 +874,6 @@ int
 nfs_mountroot(void)
 {
 	struct nfs_diskless nd;
-	struct nfs_vattr nvattr;
 	mount_t mp = NULL;
 	vnode_t vp = NULL;
 	vfs_context_t ctx;
@@ -864,9 +891,9 @@ nfs_mountroot(void)
 	 */
 	bzero((caddr_t) &nd, sizeof(nd));
 	error = nfs_boot_init(&nd);
-	if (error) {
-		panic("nfs_boot_init failed with %d\n", error);
-	}
+	if (error)
+		panic("nfs_boot_init: unable to initialize NFS root system information, "
+		      "error %d, check configuration: %s\n", error, PE_boot_args());
 
 	/*
 	 * Try NFSv3 first, then fallback to NFSv2.
@@ -895,27 +922,29 @@ tryagain:
 		}
 		if (v3) {
 			if (sotype == SOCK_STREAM) {
-				printf("nfs_boot_getfh(v3,TCP) failed with %d, trying UDP...\n", error);
+				printf("NFS mount (v3,TCP) failed with error %d, trying UDP...\n", error);
 				sotype = SOCK_DGRAM;
 				goto tryagain;
 			}
-			printf("nfs_boot_getfh(v3,UDP) failed with %d, trying v2...\n", error);
+			printf("NFS mount (v3,UDP) failed with error %d, trying v2...\n", error);
 			v3 = 0;
 			sotype = SOCK_STREAM;
 			goto tryagain;
 		} else if (sotype == SOCK_STREAM) {
-			printf("nfs_boot_getfh(v2,TCP) failed with %d, trying UDP...\n", error);
+			printf("NFS mount (v2,TCP) failed with error %d, trying UDP...\n", error);
 			sotype = SOCK_DGRAM;
 			goto tryagain;
+		} else {
+			printf("NFS mount (v2,UDP) failed with error %d, giving up...\n", error);
 		}
 		switch(error) {
 		case EPROGUNAVAIL:
-			panic("nfs_boot_getfh(v2,UDP) failed: NFS server mountd not responding - check server configuration: %s", PE_boot_args());
+			panic("NFS mount failed: NFS server mountd not responding, check server configuration: %s", PE_boot_args());
 		case EACCES:
 		case EPERM:
-			panic("nfs_boot_getfh(v2,UDP) failed: NFS server refused mount - check server configuration: %s", PE_boot_args());
+			panic("NFS mount failed: NFS server refused mount, check server configuration: %s", PE_boot_args());
 		default:
-			panic("nfs_boot_getfh(v2,UDP) failed with %d: %s", error, PE_boot_args());
+			panic("NFS mount failed with error %d, check configuration: %s", error, PE_boot_args());
 		}
 	}
 
@@ -943,20 +972,22 @@ tryagain:
 	{
 		if (v3) {
 			if (sotype == SOCK_STREAM) {
-				printf("nfs_mount_diskless(v3,TCP) failed with %d, trying UDP...\n", error);
+				printf("NFS root mount (v3,TCP) failed with %d, trying UDP...\n", error);
 				sotype = SOCK_DGRAM;
 				goto tryagain;
 			}
-			printf("nfs_mount_diskless(v3,UDP) failed with %d, trying v2...\n", error);
+			printf("NFS root mount (v3,UDP) failed with %d, trying v2...\n", error);
 			v3 = 0;
 			sotype = SOCK_STREAM;
 			goto tryagain;
 		} else if (sotype == SOCK_STREAM) {
-			printf("nfs_mount_diskless(v2,TCP) failed with %d, trying UDP...\n", error);
+			printf("NFS root mount (v2,TCP) failed with %d, trying UDP...\n", error);
 			sotype = SOCK_DGRAM;
 			goto tryagain;
+		} else {
+			printf("NFS root mount (v2,UDP) failed with error %d, giving up...\n", error);
 		}
-		panic("nfs_mount_diskless(v2,UDP) root failed with %d: %s\n", error, PE_boot_args());
+		panic("NFS root mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
 	}
 	}
 	printf("root on %s\n", nd.nd_root.ndm_mntfrom);
@@ -969,9 +1000,8 @@ tryagain:
 	if (nd.nd_private.ndm_saddr.sin_addr.s_addr) {
 	    error = nfs_mount_diskless_private(&nd.nd_private, "/private",
 					       0, &vppriv, &mppriv, ctx);
-	    if (error) {
-		panic("nfs_mount_diskless private failed with %d\n", error);
-	    }
+	    if (error)
+		panic("NFS /private mount failed with error %d, check configuration: %s\n", error, PE_boot_args());
 	    printf("private on %s\n", nd.nd_private.ndm_mntfrom);
 
 	    vfs_unbusy(mppriv);
@@ -990,8 +1020,9 @@ tryagain:
 		FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI);
 
 	/* Get root attributes (for the time). */
-	error = nfs_getattr(VTONFS(vp), &nvattr, ctx, NGA_UNCACHED);
-	if (error) panic("nfs_mountroot: getattr for root");
+	error = nfs_getattr(VTONFS(vp), NULL, ctx, NGA_UNCACHED);
+	if (error)
+		panic("NFS mount: failed to get attributes for root directory, error %d, check server", error);
 	return (0);
 }
 
@@ -1007,13 +1038,18 @@ nfs_mount_diskless(
 	mount_t *mpp,
 	vfs_context_t ctx)
 {
-	struct user_nfs_args args;
 	mount_t mp;
-	mbuf_t m;
-	int error;
+	int error, numcomps;
+	char *xdrbuf, *p, *cp, *frompath, *endserverp;
+	char uaddr[MAX_IPv4_STR_LEN];
+	struct xdrbuf xb;
+	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
+	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
+	uint32_t argslength_offset, attrslength_offset, end_offset;
 
 	if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_mntfrom, &mp))) {
-		printf("nfs_mount_diskless: NFS not configured");
+		printf("nfs_mount_diskless: NFS not configured\n");
 		return (error);
 	}
 
@@ -1021,26 +1057,112 @@ nfs_mount_diskless(
 	if (!(mntflag & MNT_RDONLY))
 		mp->mnt_flag &= ~MNT_RDONLY;
 
-	/* Initialize mount args. */
-	bzero((caddr_t) &args, sizeof(args));
-	args.addr     = CAST_USER_ADDR_T(&ndmntp->ndm_saddr);
-	args.addrlen  = ndmntp->ndm_saddr.sin_len;
-	args.sotype   = ndmntp->ndm_sotype;
-	args.fh       = CAST_USER_ADDR_T(&ndmntp->ndm_fh[0]);
-	args.fhsize   = ndmntp->ndm_fhlen;
-	args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_mntfrom);
-	args.flags    = NFSMNT_RESVPORT;
-	if (ndmntp->ndm_nfsv3)
-		args.flags |= NFSMNT_NFSV3;
-
-	error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m);
+	/* find the server-side path being mounted */
+	frompath = ndmntp->ndm_mntfrom;
+	if (*frompath == '[') {  /* skip IPv6 literal address */
+		while (*frompath && (*frompath != ']'))
+			frompath++;
+		if (*frompath == ']')
+			frompath++;
+	}
+	while (*frompath && (*frompath != ':'))
+		frompath++;
+	endserverp = frompath;
+	while (*frompath && (*frompath == ':'))
+		frompath++;
+	/* count fs location path components */
+	p = frompath;
+	while (*p && (*p == '/'))
+		p++;
+	numcomps = 0;
+	while (*p) {
+		numcomps++;
+		while (*p && (*p != '/'))
+			p++;
+		while (*p && (*p == '/'))
+			p++;
+	}
+
+	/* convert address to universal address string */
+	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
+		printf("nfs_mount_diskless: bad address\n");
+		return (EINVAL);
+	}
+
+	/* prepare mount attributes */
+	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
+
+	/* prepare mount flags */
+	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
+	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
+
+	/* build xdr buffer */
+	xb_init_buffer(&xb, NULL, 0);
+	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
+	argslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // args length
+	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
+	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
+	attrslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // attrs length
+	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
+	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
+	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
+	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
+	/* fs location */
+	xb_add_32(error, &xb, 1); /* fs location count */
+	xb_add_32(error, &xb, 1); /* server count */
+	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
+	xb_add_32(error, &xb, 1); /* address count */
+	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
+	xb_add_32(error, &xb, 0); /* empty server info */
+	xb_add_32(error, &xb, numcomps); /* pathname component count */
+	p = frompath;
+	while (*p && (*p == '/'))
+		p++;
+	while (*p) {
+		cp = p;
+		while (*p && (*p != '/'))
+			p++;
+		xb_add_string(error, &xb, cp, (p - cp)); /* component */
+		if (error)
+			break;
+		while (*p && (*p == '/'))
+			p++;
+	}
+	xb_add_32(error, &xb, 0); /* empty fsl info */
+	xb_add_32(error, &xb, mntflag); /* MNT flags */
+	xb_build_done(error, &xb);
+
+	/* update opaque counts */
+	end_offset = xb_offset(&xb);
+	if (!error) {
+		error = xb_seek(&xb, argslength_offset);
+		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
+	}
+	if (!error) {
+		error = xb_seek(&xb, attrslength_offset);
+		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
+	}
 	if (error) {
-		printf("nfs_mount_diskless: mbuf_get(soname) failed");
+		printf("nfs_mount_diskless: error %d assembling mount args\n", error);
+		xb_cleanup(&xb);
 		return (error);
 	}
-	mbuf_setlen(m, ndmntp->ndm_saddr.sin_len);
-	bcopy(&ndmntp->ndm_saddr, mbuf_data(m), ndmntp->ndm_saddr.sin_len);
-	if ((error = mountnfs(&args, mp, m, ctx, vpp))) {
+	/* grab the assembled buffer */
+	xdrbuf = xb_buffer_base(&xb);
+	xb.xb_flags &= ~XB_CLEANUP;
+
+	/* do the mount */
+	if ((error = mountnfs(xdrbuf, mp, ctx, vpp))) {
 		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
 		// XXX vfs_rootmountfailed(mp);
 		mount_list_lock();
@@ -1052,10 +1174,11 @@ nfs_mount_diskless(
 		mac_mount_label_destroy(mp);
 #endif
 		FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
-		return (error);
+	} else {
+		*mpp = mp;
 	}
-	*mpp = mp;
-	return (0);
+	xb_cleanup(&xb);
+	return (error);
 }
 
 #if !defined(NO_MOUNT_PRIVATE)
@@ -1072,16 +1195,21 @@ nfs_mount_diskless_private(
 	mount_t *mpp,
 	vfs_context_t ctx)
 {
-	struct user_nfs_args args;
 	mount_t mp;
-	mbuf_t m;
-	int error;
+	int error, numcomps;
 	proc_t procp;
 	struct vfstable *vfsp;
 	struct nameidata nd;
 	vnode_t vp;
+	char *xdrbuf = NULL, *p, *cp, *frompath, *endserverp;
+	char uaddr[MAX_IPv4_STR_LEN];
+	struct xdrbuf xb;
+	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
+	uint32_t argslength_offset, attrslength_offset, end_offset;
 
 	procp = current_proc(); /* XXX */
+	xb_init(&xb, 0);
 
 	{
 	/*
@@ -1107,7 +1235,7 @@ nfs_mount_diskless_private(
 	/*
 	 * Get vnode to be covered
 	 */
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 	    CAST_USER_ADDR_T(mntname), ctx);
 	if ((error = namei(&nd))) {
 		printf("nfs_mountroot: private namei failed!\n");
@@ -1189,26 +1317,112 @@ nfs_mount_diskless_private(
 	mac_mount_label_associate(ctx, mp);
 #endif
 
-	/* Initialize mount args. */
-	bzero((caddr_t) &args, sizeof(args));
-	args.addr     = CAST_USER_ADDR_T(&ndmntp->ndm_saddr);
-	args.addrlen  = ndmntp->ndm_saddr.sin_len;
-	args.sotype   = ndmntp->ndm_sotype;
-	args.fh       = CAST_USER_ADDR_T(ndmntp->ndm_fh);
-	args.fhsize   = ndmntp->ndm_fhlen;
-	args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_mntfrom);
-	args.flags    = NFSMNT_RESVPORT;
-	if (ndmntp->ndm_nfsv3)
-		args.flags |= NFSMNT_NFSV3;
-
-	error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m);
+	/* find the server-side path being mounted */
+	frompath = ndmntp->ndm_mntfrom;
+	if (*frompath == '[') {  /* skip IPv6 literal address */
+		while (*frompath && (*frompath != ']'))
+			frompath++;
+		if (*frompath == ']')
+			frompath++;
+	}
+	while (*frompath && (*frompath != ':'))
+		frompath++;
+	endserverp = frompath;
+	while (*frompath && (*frompath == ':'))
+		frompath++;
+	/* count fs location path components */
+	p = frompath;
+	while (*p && (*p == '/'))
+		p++;
+	numcomps = 0;
+	while (*p) {
+		numcomps++;
+		while (*p && (*p != '/'))
+			p++;
+		while (*p && (*p == '/'))
+			p++;
+	}
+
+	/* convert address to universal address string */
+	if (inet_ntop(AF_INET, &ndmntp->ndm_saddr.sin_addr, uaddr, sizeof(uaddr)) != uaddr) {
+		printf("nfs_mountroot: bad address\n");
+		error = EINVAL;
+		goto out;
+	}
+
+	/* prepare mount attributes */
+	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
+
+	/* prepare mount flags */
+	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
+	NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
+
+	/* build xdr buffer */
+	xb_init_buffer(&xb, NULL, 0);
+	xb_add_32(error, &xb, NFS_ARGSVERSION_XDR);
+	argslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // args length
+	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
+	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
+	attrslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // attrs length
+	xb_add_32(error, &xb, ndmntp->ndm_nfsv3 ? 3 : 2); // NFS version
+	xb_add_string(error, &xb, ((ndmntp->ndm_sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
+	xb_add_32(error, &xb, ntohs(ndmntp->ndm_saddr.sin_port)); // NFS port
+	xb_add_fh(error, &xb, &ndmntp->ndm_fh[0], ndmntp->ndm_fhlen);
+	/* fs location */
+	xb_add_32(error, &xb, 1); /* fs location count */
+	xb_add_32(error, &xb, 1); /* server count */
+	xb_add_string(error, &xb, ndmntp->ndm_mntfrom, (endserverp - ndmntp->ndm_mntfrom)); /* server name */
+	xb_add_32(error, &xb, 1); /* address count */
+	xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
+	xb_add_32(error, &xb, 0); /* empty server info */
+	xb_add_32(error, &xb, numcomps); /* pathname component count */
+	p = frompath;
+	while (*p && (*p == '/'))
+		p++;
+	while (*p) {
+		cp = p;
+		while (*p && (*p != '/'))
+			p++;
+		xb_add_string(error, &xb, cp, (p - cp)); /* component */
+		if (error)
+			break;
+		while (*p && (*p == '/'))
+			p++;
+	}
+	xb_add_32(error, &xb, 0); /* empty fsl info */
+	xb_add_32(error, &xb, mntflag); /* MNT flags */
+	xb_build_done(error, &xb);
+
+	/* update opaque counts */
+	end_offset = xb_offset(&xb);
+	if (!error) {
+		error = xb_seek(&xb, argslength_offset);
+		xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
+	}
+	if (!error) {
+		error = xb_seek(&xb, attrslength_offset);
+		xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
+	}
 	if (error) {
-		printf("nfs_mount_diskless_private: mbuf_get(soname) failed");
+		printf("nfs_mountroot: error %d assembling mount args\n", error);
 		goto out;
 	}
-	mbuf_setlen(m, ndmntp->ndm_saddr.sin_len);
-	bcopy(&ndmntp->ndm_saddr, mbuf_data(m), ndmntp->ndm_saddr.sin_len);
-	if ((error = mountnfs(&args, mp, m, ctx, &vp))) {
+	/* grab the assembled buffer */
+	xdrbuf = xb_buffer_base(&xb);
+	xb.xb_flags &= ~XB_CLEANUP;
+
+	/* do the mount */
+	if ((error = mountnfs(xdrbuf, mp, ctx, &vp))) {
 		printf("nfs_mountroot: mount %s failed: %d\n", mntname, error);
 		mount_list_lock();
 		vfsp->vfc_refcount--;
@@ -1225,63 +1439,65 @@ nfs_mount_diskless_private(
 	*mpp = mp;
 	*vpp = vp;
 out:
+	xb_cleanup(&xb);
 	return (error);
 }
 #endif /* NO_MOUNT_PRIVATE */
 
 /*
- * VFS Operations.
- *
- * mount system call
+ * Convert old style NFS mount args to XDR.
  */
-int
-nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
+static int
+nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int argsversion, int inkernel, char **xdrbufp)
 {
-	int error, argsvers;
+	int error = 0, args64bit, argsize, numcomps;
 	struct user_nfs_args args;
 	struct nfs_args tempargs;
-	mbuf_t nam;
+	caddr_t argsp;
 	size_t len;
-	u_char nfh[NFSX_V3FHMAX];
-	char *mntfrom;
-
-	error = copyin(data, (caddr_t)&argsvers, sizeof (argsvers));
-	if (error)
-		return (error);
-
-	switch (argsvers) {
+	u_char nfh[NFS4_FHSIZE];
+	char *mntfrom, *endserverp, *frompath, *p, *cp;
+	struct sockaddr_storage ss;
+	void *sinaddr;
+	char uaddr[MAX_IPv6_STR_LEN];
+	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN], mflags[NFS_MFLAG_BITMAP_LEN];
+	uint32_t nfsvers, nfslockmode = 0, argslength_offset, attrslength_offset, end_offset;
+	struct xdrbuf xb;
+
+	*xdrbufp = NULL;
+
+	/* allocate a temporary buffer for mntfrom */
+	MALLOC_ZONE(mntfrom, char*, MAXPATHLEN, M_NAMEI, M_WAITOK);
+	if (!mntfrom)
+		return (ENOMEM);
+
+	args64bit = (inkernel || vfs_context_is64bit(ctx));
+	argsp = args64bit ? (void*)&args : (void*)&tempargs;
+
+	argsize = args64bit ? sizeof(args) : sizeof(tempargs);
+	switch (argsversion) {
 	case 3:
-		if (vfs_context_is64bit(ctx))
-			error = copyin(data, (caddr_t)&args, sizeof (struct user_nfs_args3));
-		else
-			error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args3));
-		break;
+		argsize -= NFS_ARGSVERSION4_INCSIZE;
 	case 4:
-		if (vfs_context_is64bit(ctx))
-			error = copyin(data, (caddr_t)&args, sizeof (struct user_nfs_args4));
-		else
-			error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args4));
-		break;
+		argsize -= NFS_ARGSVERSION5_INCSIZE;
 	case 5:
-		if (vfs_context_is64bit(ctx))
-			error = copyin(data, (caddr_t)&args, sizeof (struct user_nfs_args5));
-		else
-			error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args5));
-		break;
+		argsize -= NFS_ARGSVERSION6_INCSIZE;
 	case 6:
-		if (vfs_context_is64bit(ctx))
-			error = copyin(data, (caddr_t)&args, sizeof (args));
-		else
-			error = copyin(data, (caddr_t)&tempargs, sizeof (tempargs));
 		break;
 	default:
-		return (EPROGMISMATCH);
+		error = EPROGMISMATCH;
+		goto nfsmout;
 	}
-	if (error)
-		return (error);
 
-	if (!vfs_context_is64bit(ctx)) {
-		args.version = tempargs.version;
+	/* read in the structure */
+	if (inkernel)
+		bcopy(CAST_DOWN(void *, data), argsp, argsize);
+	else
+		error = copyin(data, argsp, argsize);
+	nfsmout_if(error);
+
+	if (!args64bit) {
 		args.addrlen = tempargs.addrlen;
 		args.sotype = tempargs.sotype;
 		args.proto = tempargs.proto;
@@ -1299,39 +1515,357 @@ nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
 		args.addr = CAST_USER_ADDR_T(tempargs.addr);
 		args.fh = CAST_USER_ADDR_T(tempargs.fh);
 		args.hostname = CAST_USER_ADDR_T(tempargs.hostname);
-		if (argsvers >= 4) {
+		if (args.version >= 4) {
 			args.acregmin = tempargs.acregmin;
 			args.acregmax = tempargs.acregmax;
 			args.acdirmin = tempargs.acdirmin;
 			args.acdirmax = tempargs.acdirmax;
 		}
-		if (argsvers >= 5)
+		if (args.version >= 5)
 			args.auth = tempargs.auth;
-		if (argsvers >= 6)
+		if (args.version >= 6)
 			args.deadtimeout = tempargs.deadtimeout;
 	}
 
-	if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX)
-		return (EINVAL);
+	if ((args.fhsize < 0) || (args.fhsize > NFS4_FHSIZE)) {
+		error = EINVAL;
+		goto nfsmout;
+	}
 	if (args.fhsize > 0) {
-		error = copyin(args.fh, (caddr_t)nfh, args.fhsize);
-		if (error)
-			return (error);
+		if (inkernel)
+			bcopy(CAST_DOWN(void *, args.fh), (caddr_t)nfh, args.fhsize);
+		else
+			error = copyin(args.fh, (caddr_t)nfh, args.fhsize);
+		nfsmout_if(error);
 	}
 
-	mntfrom = &vfs_statfs(mp)->f_mntfromname[0];
-	error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len);
-	if (error)
-		return (error);
+	if (inkernel)
+		error = copystr(CAST_DOWN(void *, args.hostname), mntfrom, MAXPATHLEN-1, &len);
+	else
+		error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len);
+	nfsmout_if(error);
 	bzero(&mntfrom[len], MAXPATHLEN - len);
 
-	/* sockargs() call must be after above copyin() calls */
-	error = sockargs(&nam, args.addr, args.addrlen, MBUF_TYPE_SONAME);
-	if (error)
+	/* find the server-side path being mounted */
+	frompath = mntfrom;
+	if (*frompath == '[') {  /* skip IPv6 literal address */
+		while (*frompath && (*frompath != ']'))
+			frompath++;
+		if (*frompath == ']')
+			frompath++;
+	}
+	while (*frompath && (*frompath != ':'))
+		frompath++;
+	endserverp = frompath;
+	while (*frompath && (*frompath == ':'))
+		frompath++;
+	/* count fs location path components */
+	p = frompath;
+	while (*p && (*p == '/'))
+		p++;
+	numcomps = 0;
+	while (*p) {
+		numcomps++;
+		while (*p && (*p != '/'))
+			p++;
+		while (*p && (*p == '/'))
+			p++;
+	}
+
+	/* copy socket address */
+	if (inkernel)
+		bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen);
+	else
+		error = copyin(args.addr, &ss, args.addrlen);
+	nfsmout_if(error);
+	ss.ss_len = args.addrlen;
+
+	/* convert address to universal address string */
+	if (ss.ss_family == AF_INET)
+		sinaddr = &((struct sockaddr_in*)&ss)->sin_addr;
+	else if (ss.ss_family == AF_INET6)
+		sinaddr = &((struct sockaddr_in6*)&ss)->sin6_addr;
+	else
+		sinaddr = NULL;
+	if (!sinaddr || (inet_ntop(ss.ss_family, sinaddr, uaddr, sizeof(uaddr)) != uaddr)) {
+		error = EINVAL;
+		goto nfsmout;
+	}
+
+	/* prepare mount flags */
+	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
+	if (args.flags & NFSMNT_SOFT)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
+	if (args.flags & NFSMNT_INT)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
+	if (args.flags & NFSMNT_RESVPORT)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
+	if (args.flags & NFSMNT_NOCONN)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
+	if (args.flags & NFSMNT_DUMBTIMR)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
+	if (args.flags & NFSMNT_CALLUMNT)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
+	if (args.flags & NFSMNT_RDIRPLUS)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
+	if (args.flags & NFSMNT_NONEGNAMECACHE)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
+	if (args.flags & NFSMNT_MUTEJUKEBOX)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
+	if (args.flags & NFSMNT_NOQUOTA)
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
+
+	/* prepare mount attributes */
+	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
+	if (args.flags & NFSMNT_NFSV4)
+		nfsvers = 4;
+	else if (args.flags & NFSMNT_NFSV3)
+		nfsvers = 3;
+	else
+		nfsvers = 2;
+	if ((args.flags & NFSMNT_RSIZE) && (args.rsize > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
+	if ((args.flags & NFSMNT_WSIZE) && (args.wsize > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
+	if ((args.flags & NFSMNT_TIMEO) && (args.timeo > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
+	if ((args.flags & NFSMNT_RETRANS) && (args.retrans > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
+	if ((args.flags & NFSMNT_MAXGRPS) && (args.maxgrouplist > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
+	if ((args.flags & NFSMNT_READAHEAD) && (args.readahead > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
+	if ((args.flags & NFSMNT_READDIRSIZE) && (args.readdirsize > 0))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
+	if ((args.flags & NFSMNT_NOLOCKS) ||
+	    (args.flags & NFSMNT_LOCALLOCKS)) {
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
+		if (args.flags & NFSMNT_NOLOCKS)
+			nfslockmode = NFS_LOCK_MODE_DISABLED;
+		else if (args.flags & NFSMNT_LOCALLOCKS)
+			nfslockmode = NFS_LOCK_MODE_LOCAL;
+		else
+			nfslockmode = NFS_LOCK_MODE_ENABLED;
+	}
+	if (args.version >= 4) {
+		if ((args.flags & NFSMNT_ACREGMIN) && (args.acregmin > 0))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
+		if ((args.flags & NFSMNT_ACREGMAX) && (args.acregmax > 0))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
+		if ((args.flags & NFSMNT_ACDIRMIN) && (args.acdirmin > 0))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
+		if ((args.flags & NFSMNT_ACDIRMAX) && (args.acdirmax > 0))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
+	}
+	if (args.version >= 5) {
+		if ((args.flags & NFSMNT_SECFLAVOR) || (args.flags & NFSMNT_SECSYSOK))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
+	}
+	if (args.version >= 6) {
+		if ((args.flags & NFSMNT_DEADTIMEOUT) && (args.deadtimeout > 0))
+			NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
+	}
+
+	/* build xdr buffer */
+	xb_init_buffer(&xb, NULL, 0);
+	xb_add_32(error, &xb, args.version);
+	argslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // args length
+	xb_add_32(error, &xb, NFS_XDRARGS_VERSION_0);
+	xb_add_bitmap(error, &xb, mattrs, NFS_MATTR_BITMAP_LEN);
+	attrslength_offset = xb_offset(&xb);
+	xb_add_32(error, &xb, 0); // attrs length
+	xb_add_bitmap(error, &xb, mflags_mask, NFS_MFLAG_BITMAP_LEN); /* mask */
+	xb_add_bitmap(error, &xb, mflags, NFS_MFLAG_BITMAP_LEN); /* value */
+	xb_add_32(error, &xb, nfsvers);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
+		xb_add_32(error, &xb, args.rsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
+		xb_add_32(error, &xb, args.wsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
+		xb_add_32(error, &xb, args.readdirsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
+		xb_add_32(error, &xb, args.readahead);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
+		xb_add_32(error, &xb, args.acregmin);
+		xb_add_32(error, &xb, 0);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
+		xb_add_32(error, &xb, args.acregmax);
+		xb_add_32(error, &xb, 0);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
+		xb_add_32(error, &xb, args.acdirmin);
+		xb_add_32(error, &xb, 0);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
+		xb_add_32(error, &xb, args.acdirmax);
+		xb_add_32(error, &xb, 0);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
+		xb_add_32(error, &xb, nfslockmode);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
+		uint32_t flavors[2], i=0;
+		if (args.flags & NFSMNT_SECFLAVOR)
+			flavors[i++] = args.auth;
+		if ((args.flags & NFSMNT_SECSYSOK) && ((i == 0) || (flavors[0] != RPCAUTH_SYS)))
+			flavors[i++] = RPCAUTH_SYS;
+		xb_add_word_array(error, &xb, flavors, i);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
+		xb_add_32(error, &xb, args.maxgrouplist);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
+		xb_add_string(error, &xb, ((args.sotype == SOCK_DGRAM) ? "udp" : "tcp"), 3);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
+		xb_add_32(error, &xb, ((ss.ss_family == AF_INET) ? 
+			ntohs(((struct sockaddr_in*)&ss)->sin_port) :
+			ntohs(((struct sockaddr_in6*)&ss)->sin6_port)));
+	/* NFS_MATTR_MOUNT_PORT (not available in old args) */
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
+		/* convert from .1s increments to time */
+		xb_add_32(error, &xb, args.timeo/10);
+		xb_add_32(error, &xb, (args.timeo%10)*100000000);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
+		xb_add_32(error, &xb, args.retrans);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
+		xb_add_32(error, &xb, args.deadtimeout);
+		xb_add_32(error, &xb, 0);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH))
+		xb_add_fh(error, &xb, &nfh[0], args.fhsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
+		xb_add_32(error, &xb, 1); /* fs location count */
+		xb_add_32(error, &xb, 1); /* server count */
+		xb_add_string(error, &xb, mntfrom, (endserverp - mntfrom)); /* server name */
+		xb_add_32(error, &xb, 1); /* address count */
+		xb_add_string(error, &xb, uaddr, strlen(uaddr)); /* address */
+		xb_add_32(error, &xb, 0); /* empty server info */
+		xb_add_32(error, &xb, numcomps); /* pathname component count */
+		nfsmout_if(error);
+		p = frompath;
+		while (*p && (*p == '/'))
+			p++;
+		while (*p) {
+			cp = p;
+			while (*p && (*p != '/'))
+				p++;
+			xb_add_string(error, &xb, cp, (p - cp)); /* component */
+			nfsmout_if(error);
+			while (*p && (*p == '/'))
+				p++;
+		}
+		xb_add_32(error, &xb, 0); /* empty fsl info */
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
+		xb_add_32(error, &xb, (vfs_flags(mp) & MNT_VISFLAGMASK)); /* VFS MNT_* flags */
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM))
+		xb_add_string(error, &xb, mntfrom, strlen(mntfrom)); /* fixed f_mntfromname */
+	xb_build_done(error, &xb);
+
+	/* update opaque counts */
+	end_offset = xb_offset(&xb);
+	error = xb_seek(&xb, argslength_offset);
+	xb_add_32(error, &xb, end_offset - argslength_offset + XDRWORD/*version*/);
+	nfsmout_if(error);
+	error = xb_seek(&xb, attrslength_offset);
+	xb_add_32(error, &xb, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
+
+	if (!error) {
+		/* grab the assembled buffer */
+		*xdrbufp = xb_buffer_base(&xb);
+		xb.xb_flags &= ~XB_CLEANUP;
+	}
+nfsmout:
+	xb_cleanup(&xb);
+	FREE_ZONE(mntfrom, MAXPATHLEN, M_NAMEI);
+	return (error);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+int
+nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
+{
+	int error = 0, inkernel = vfs_iskernelmount(mp);
+	uint32_t argsversion, argslength;
+	char *xdrbuf = NULL;
+
+	/* read in version */
+	if (inkernel)
+		bcopy(CAST_DOWN(void *, data), &argsversion, sizeof(argsversion));
+	else if ((error = copyin(data, &argsversion, sizeof(argsversion))))
 		return (error);
 
-	args.fh = CAST_USER_ADDR_T(&nfh[0]);
-	error = mountnfs(&args, mp, nam, ctx, &vp);
+	/* If we have XDR args, then all values in the buffer are in network order */
+	if (argsversion == htonl(NFS_ARGSVERSION_XDR))
+		argsversion = NFS_ARGSVERSION_XDR;
+
+	switch (argsversion) {
+	case 3:
+	case 4:
+	case 5:
+	case 6:
+		/* convert old-style args to xdr */
+		error = nfs_convert_old_nfs_args(mp, data, ctx, argsversion, inkernel, &xdrbuf);
+		break;
+	case NFS_ARGSVERSION_XDR:
+		/* copy in xdr buffer */
+		if (inkernel)
+			bcopy(CAST_DOWN(void *, (data + XDRWORD)), &argslength, XDRWORD);
+		else
+			error = copyin((data + XDRWORD), &argslength, XDRWORD);
+		if (error)
+			break;
+		argslength = ntohl(argslength);
+		/* put a reasonable limit on the size of the XDR args */
+		if (argslength > 16*1024) {
+			error = E2BIG;
+			break;
+		}
+		/* allocate xdr buffer */
+		xdrbuf = xb_malloc(xdr_rndup(argslength));
+		if (!xdrbuf) {
+			error = ENOMEM;
+			break;
+		}
+		if (inkernel)
+			bcopy(CAST_DOWN(void *, data), xdrbuf, argslength);
+		else
+			error = copyin(data, xdrbuf, argslength);
+		break;
+	default:
+		error = EPROGMISMATCH;
+	}
+
+	if (error) {
+		if (xdrbuf)
+			xb_free(xdrbuf);
+		return (error);
+	}
+	error = mountnfs(xdrbuf, mp, ctx, &vp);
 	return (error);
 }
 
@@ -1339,32 +1873,33 @@ nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx)
  * Common code for mount and mountroot
  */
 
+/* Set up an NFSv2/v3 mount */
 int
 nfs3_mount(
 	struct nfsmount *nmp,
 	vfs_context_t ctx,
-	struct user_nfs_args *argp,
 	nfsnode_t *npp)
 {
 	int error = 0;
 	struct nfs_vattr nvattr;
 	u_int64_t xid;
-	u_char *fhp;
 
 	*npp = NULL;
 
+	if (!nmp->nm_fh)
+		return (EINVAL);
+
 	/*
 	 * Get file attributes for the mountpoint.  These are needed
 	 * in order to properly create the root vnode.
 	 */
-	fhp = CAST_DOWN(u_char *, argp->fh);
-	error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, fhp, argp->fhsize,
+	error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len, 0,
 			ctx, &nvattr, &xid);
 	if (error)
 		goto out;
 
-	error = nfs_nget(nmp->nm_mountp, NULL, NULL, fhp, argp->fhsize,
-			&nvattr, &xid, NG_MARKROOT, npp);
+	error = nfs_nget(nmp->nm_mountp, NULL, NULL, nmp->nm_fh->fh_data, nmp->nm_fh->fh_len,
+			&nvattr, &xid, RPCAUTH_UNKNOWN, NG_MARKROOT, npp);
 	if (*npp)
 		nfs_node_unlock(*npp);
 	if (error)
@@ -1403,325 +1938,1150 @@ out:
 	return (error);
 }
 
+/*
+ * Update an NFSv4 mount path with the contents of the symlink.
+ *
+ * Read the link for the given file handle.
+ * Insert the link's components into the path.
+ */
 int
-nfs4_mount(
-	struct nfsmount *nmp,
-	vfs_context_t ctx,
-	__unused struct user_nfs_args *argp,
-	nfsnode_t *npp)
+nfs4_mount_update_path_with_symlink(struct nfsmount *nmp, struct nfs_fs_path *nfsp, uint32_t curcomp, fhandle_t *dirfhp, int *depthp, fhandle_t *fhp, vfs_context_t ctx)
 {
-	struct nfsm_chain nmreq, nmrep;
-	int error = 0, numops, status, interval;
-	char *path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
-	char *name, *nextname;
-	fhandle_t fh;
-	struct nfs_vattr nvattr;
+	int error = 0, status, numops;
+	uint32_t len = 0, comp, newcomp, linkcompcount;
 	u_int64_t xid;
+	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq rq, *req = &rq;
+	struct nfsreq_secinfo_args si;
+	char *link = NULL, *p, *q, ch;
+	struct nfs_fs_path nfsp2;
+
+	bzero(&nfsp2, sizeof(nfsp2));
+	if (dirfhp->fh_len)
+		NFSREQ_SECINFO_SET(&si, NULL, dirfhp->fh_data, dirfhp->fh_len, nfsp->np_components[curcomp], 0);
+	else
+		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, nfsp->np_components[curcomp], 0);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
 
-	*npp = NULL;
-	fh.fh_len = 0;
-	TAILQ_INIT(&nmp->nm_open_owners);
-	TAILQ_INIT(&nmp->nm_recallq);
-	nmp->nm_stategenid = 1;
+	MALLOC_ZONE(link, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 
+	if (!link)
+		error = ENOMEM;
 
-	/* look up path to get fh and attrs for mount point root */
-	numops = 2; // PUTROOTFH + LOOKUP* + GETATTR
-	while (*path && (*path != '/'))
-		path++;
-	name = path;
-	while (*name) {
-		while (*name && (*name == '/'))
-			name++;
-		if (!*name)
-			break;
-		nextname = name;
-		while (*nextname && (*nextname != '/'))
-			nextname++;
-		numops++;
-		name = nextname;
-	}
-	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
-	nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+	// PUTFH, READLINK
+	numops = 2;
+	nfsm_chain_build_alloc_init(error, &nmreq, 12 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops);
 	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
-	// (LOOKUP)*
-	name = path;
-	while (*name) {
-		while (*name && (*name == '/'))
-			name++;
-		if (!*name)
-			break;
-		nextname = name;
-		while (*nextname && (*nextname != '/'))
-			nextname++;
-		numops--;
-		nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
-		nfsm_chain_add_string(error, &nmreq, name, nextname - name);
-		name = nextname;
-	}
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, fhp->fh_data, fhp->fh_len);
 	numops--;
-	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
-	NFS4_DEFAULT_ATTRIBUTES(nmp->nm_fsattr.nfsa_supp_attr);
-	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_FILEHANDLE);
-	nfsm_chain_add_bitmap(error, &nmreq, nmp->nm_fsattr.nfsa_supp_attr, NFS_ATTR_BITMAP_LEN);
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_READLINK);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status);
+
+	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+	if (!error)
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTROOTFH);
-	name = path;
-	while (*name) {
-		while (*name && (*name == '/'))
-			name++;
-		if (!*name)
-			break;
-		nextname = name;
-		while (*nextname && (*nextname != '/'))
-			nextname++;
-		nfsm_chain_op_check(error, &nmrep, NFS_OP_LOOKUP);
-		name = nextname;
-	}
-	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
-	nfsmout_if(error);
-	NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
-	NFS_CLEAR_ATTRIBUTES(&nvattr.nva_bitmap);
-	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, &fh, NULL);
-	if (!error && !NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
-		printf("nfs: mount didn't return filehandle?\n");
-		error = EBADRPC;
-	}
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_READLINK);
+	nfsm_chain_get_32(error, &nmrep, len);
 	nfsmout_if(error);
-
-	error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MARKROOT, npp);
+	if (len == 0)
+		error = ENOENT;
+	else if (len >= MAXPATHLEN)
+		len = MAXPATHLEN - 1;
+	nfsm_chain_get_opaque(error, &nmrep, len, link);
 	nfsmout_if(error);
+	/* make sure link string is terminated properly */
+	link[len] = '\0';
+
+	/* count the number of components in link */
+	p = link;
+	while (*p && (*p == '/'))
+		p++;
+	linkcompcount = 0;
+	while (*p) {
+		linkcompcount++;
+		while (*p && (*p != '/'))
+			p++;
+		while (*p && (*p == '/'))
+			p++;
+	}
 
-	/* adjust I/O sizes to server limits */
-	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD)) {
-		if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) {
-			nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread & ~(NFS_FABLKSIZE - 1);
-			if (nmp->nm_rsize == 0)
-				nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread;
+	/* free up used components */
+	for (comp=0; comp <= curcomp; comp++) {
+		if (nfsp->np_components[comp]) {
+			FREE(nfsp->np_components[comp], M_TEMP);
+			nfsp->np_components[comp] = NULL;
 		}
 	}
-	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE)) {
-		if (nmp->nm_fsattr.nfsa_maxwrite < (uint64_t)nmp->nm_wsize) {
-			nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite & ~(NFS_FABLKSIZE - 1);
-			if (nmp->nm_wsize == 0)
-				nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite;
+
+	/* set up new path */
+	nfsp2.np_compcount = nfsp->np_compcount - curcomp - 1 + linkcompcount;
+	MALLOC(nfsp2.np_components, char **, nfsp2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+	if (!nfsp2.np_components) {
+		error = ENOMEM;
+		goto nfsmout;
+	}
+
+	/* add link components */
+	p = link;
+	while (*p && (*p == '/'))
+		p++;
+	for (newcomp=0; newcomp < linkcompcount; newcomp++) {
+		/* find end of component */
+		q = p;
+		while (*q && (*q != '/'))
+			q++;
+		MALLOC(nfsp2.np_components[newcomp], char *, q-p+1, M_TEMP, M_WAITOK|M_ZERO);
+		if (!nfsp2.np_components[newcomp]) {
+			error = ENOMEM;
+			break;
 		}
+		ch = *q;
+		*q = '\0';
+		strlcpy(nfsp2.np_components[newcomp], p, q-p+1);
+		*q = ch;
+		p = q;
+		while (*p && (*p == '/'))
+			p++;
 	}
+	nfsmout_if(error);
 
-	/* set up lease renew timer */
-	nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
-	interval = nmp->nm_fsattr.nfsa_lease / 2;
-	if (interval < 1)
-		interval = 1;
-	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
+	/* add remaining components */
+	for(comp = curcomp + 1; comp < nfsp->np_compcount; comp++,newcomp++) {
+		nfsp2.np_components[newcomp] = nfsp->np_components[comp];
+		nfsp->np_components[comp] = NULL;
+	}
+
+	/* move new path into place */
+	FREE(nfsp->np_components, M_TEMP);
+	nfsp->np_components = nfsp2.np_components;
+	nfsp->np_compcount = nfsp2.np_compcount;
+	nfsp2.np_components = NULL;
 
+	/* for absolute link, let the caller now that the next dirfh is root */
+	if (link[0] == '/') {
+		dirfhp->fh_len = 0;
+		*depthp = 0;
+	}
 nfsmout:
-	if (*npp)
-		nfs_node_unlock(*npp);
+	if (link)
+		FREE_ZONE(link, MAXPATHLEN, M_NAMEI);
+	if (nfsp2.np_components) {
+		for (comp=0; comp < nfsp2.np_compcount; comp++)
+			if (nfsp2.np_components[comp])
+				FREE(nfsp2.np_components[comp], M_TEMP);
+		FREE(nfsp2.np_components, M_TEMP);
+	}
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
 	return (error);
 }
 
+/* Set up an NFSv4 mount */
 int
-mountnfs(
-	struct user_nfs_args *argp,
-	mount_t mp,
-	mbuf_t nam,
+nfs4_mount(
+	struct nfsmount *nmp,
 	vfs_context_t ctx,
-	vnode_t *vpp)
+	nfsnode_t *npp)
 {
-	struct nfsmount *nmp;
-	nfsnode_t np;
-	int error;
-	uint32_t maxio, iosize;
-	struct vfsstatfs *sbp;
-	struct timespec ts = { 1, 0 };
+	struct nfsm_chain nmreq, nmrep;
+	int error = 0, numops, status, interval, isdotdot, loopcnt = 0, depth = 0;
+	struct nfs_fs_path fspath, *nfsp, fspath2;
+	uint32_t bitmap[NFS_ATTR_BITMAP_LEN], comp, comp2;
+	fhandle_t fh, dirfh;
+	struct nfs_vattr nvattr;
+	u_int64_t xid;
+	struct nfsreq rq, *req = &rq;
+	struct nfsreq_secinfo_args si;
+	struct nfs_sec sec;
+	struct nfs_fs_locations nfsls;
+
+	*npp = NULL;
+	fh.fh_len = dirfh.fh_len = 0;
+	TAILQ_INIT(&nmp->nm_open_owners);
+	TAILQ_INIT(&nmp->nm_delegations);
+	TAILQ_INIT(&nmp->nm_dreturnq);
+	nmp->nm_stategenid = 1;
+	NVATTR_INIT(&nvattr);
+	bzero(&nfsls, sizeof(nfsls));
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
 
 	/*
-	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
-	 * no sense in that context.
+	 * If no security flavors were specified we'll want to default to the server's
+	 * preferred flavor.  For NFSv4.0 we need a file handle and name to get that via
+	 * SECINFO, so we'll do that on the last component of the server path we are
+	 * mounting.  If we are mounting the server's root, we'll need to defer the
+	 * SECINFO call to the first successful LOOKUP request.
 	 */
-	if (argp->sotype == SOCK_STREAM)
-		argp->flags &= ~NFSMNT_NOCONN;
-
-	if (vfs_flags(mp) & MNT_UPDATE) {
-		nmp = VFSTONFS(mp);
-		/* update paths, file handles, etc, here	XXX */
-		mbuf_freem(nam);
-		return (0);
-	} else {
-		MALLOC_ZONE(nmp, struct nfsmount *,
-				sizeof (struct nfsmount), M_NFSMNT, M_WAITOK);
-		if (!nmp) {
-			mbuf_freem(nam);
-			return (ENOMEM);
+	if (!nmp->nm_sec.count)
+		nmp->nm_state |= NFSSTA_NEEDSECINFO;
+
+	/* make a copy of the current location's path */
+	nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
+	bzero(&fspath, sizeof(fspath));
+	fspath.np_compcount = nfsp->np_compcount;
+	if (fspath.np_compcount > 0) {
+		MALLOC(fspath.np_components, char **, fspath.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+		if (!fspath.np_components) {
+			error = ENOMEM;
+			goto nfsmout;
 		}
-		bzero((caddr_t)nmp, sizeof (struct nfsmount));
-		lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
-		TAILQ_INIT(&nmp->nm_resendq);
-		TAILQ_INIT(&nmp->nm_iodq);
-		TAILQ_INIT(&nmp->nm_gsscl);
-		vfs_setfsprivate(mp, nmp);
-
-		nfs_nhinit_finish();
+		for (comp=0; comp < nfsp->np_compcount; comp++) {
+			int slen = strlen(nfsp->np_components[comp]);
+			MALLOC(fspath.np_components[comp], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
+			if (!fspath.np_components[comp]) {
+				error = ENOMEM;
+				break;
+			}
+			strlcpy(fspath.np_components[comp], nfsp->np_components[comp], slen+1);
+		}
+		if (error)
+			goto nfsmout;
 	}
-	lck_mtx_lock(&nmp->nm_lock);
 
-	/* setup defaults */
-	nmp->nm_vers = NFS_VER2;
-	nmp->nm_timeo = NFS_TIMEO;
-	nmp->nm_retry = NFS_RETRANS;
-	if (argp->sotype == SOCK_DGRAM) {
-		nmp->nm_wsize = NFS_DGRAM_WSIZE;
-		nmp->nm_rsize = NFS_DGRAM_RSIZE;
-	} else {
-		nmp->nm_wsize = NFS_WSIZE;
-		nmp->nm_rsize = NFS_RSIZE;
+	/* for mirror mounts, we can just use the file handle passed in */
+	if (nmp->nm_fh) {
+		dirfh.fh_len = nmp->nm_fh->fh_len;
+		bcopy(nmp->nm_fh->fh_data, dirfh.fh_data, dirfh.fh_len);
+		NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, NULL, 0);
+		goto gotfh;
 	}
-	nmp->nm_readdirsize = NFS_READDIRSIZE;
-	nmp->nm_numgrps = NFS_MAXGRPS;
-	nmp->nm_readahead = NFS_DEFRAHEAD;
-	nmp->nm_tprintf_delay = nfs_tprintf_delay;
-	if (nmp->nm_tprintf_delay < 0)
-		nmp->nm_tprintf_delay = 0;
-	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
-	if (nmp->nm_tprintf_initial_delay < 0)
-		nmp->nm_tprintf_initial_delay = 0;
-	nmp->nm_acregmin = NFS_MINATTRTIMO;
-	nmp->nm_acregmax = NFS_MAXATTRTIMO;
-	nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
-	nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
-	nmp->nm_auth = RPCAUTH_SYS;
-	nmp->nm_deadtimeout = 0;
-
-	vfs_getnewfsid(mp);
-	nmp->nm_mountp = mp;
-	vfs_setauthopaque(mp);
-	nmp->nm_flag = argp->flags;
-	nmp->nm_nam = nam;
-
-	if (argp->flags & NFSMNT_NFSV4) {
-		nmp->nm_vers = NFS_VER4;
-		/* NFSv4 is only allowed over TCP. */
-		if (argp->sotype != SOCK_STREAM) {
-			error = EINVAL;
-			goto bad;
+
+	/* otherwise, we need to get the fh for the directory we are mounting */
+
+	/* if no components, just get root */
+	if (fspath.np_compcount == 0) {
+nocomponents:
+		// PUTROOTFH + GETATTR(FH)
+		NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, NULL, 0);
+		numops = 2;
+		nfsm_chain_build_alloc_init(error, &nmreq, 9 * NFSX_UNSIGNED);
+		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+		NFS_CLEAR_ATTRIBUTES(bitmap);
+		NFS4_DEFAULT_ATTRIBUTES(bitmap);
+		NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE);
+		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
+		nfsm_chain_build_done(error, &nmreq);
+		nfsm_assert(error, (numops == 0), EPROTO);
+		nfsmout_if(error);
+		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+		if (!error)
+			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+		nfsm_chain_skip_tag(error, &nmrep);
+		nfsm_chain_get_32(error, &nmrep, numops);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTROOTFH);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+		nfsmout_if(error);
+		NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
+		error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, &dirfh, NULL, NULL);
+		if (!error && !NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) {
+			printf("nfs: mount didn't return filehandle?\n");
+			error = EBADRPC;
 		}
-	} else if (argp->flags & NFSMNT_NFSV3)
-		nmp->nm_vers = NFS_VER3;
+		nfsmout_if(error);
+		nfsm_chain_cleanup(&nmrep);
+		nfsm_chain_null(&nmreq);
+		NVATTR_CLEANUP(&nvattr);
+		goto gotfh;
+	}
 
-	if (nmp->nm_vers == NFS_VER2)
-		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
+	/* look up each path component */
+	for (comp=0; comp < fspath.np_compcount; ) {
+		isdotdot = 0;
+		if (fspath.np_components[comp][0] == '.') {
+			if (fspath.np_components[comp][1] == '\0') {
+				/* skip "." */
+				comp++;
+				continue;
+			}
+			/* treat ".." specially */
+			if ((fspath.np_components[comp][1] == '.') &&
+			    (fspath.np_components[comp][2] == '\0'))
+			    	isdotdot = 1;
+			if (isdotdot && (dirfh.fh_len == 0)) {
+				/* ".." in root directory is same as "." */
+				comp++;
+				continue;
+			}
+		}
+		// PUT(ROOT)FH + LOOKUP(P) + GETFH + GETATTR
+		if (dirfh.fh_len == 0)
+			NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
+		else
+			NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
+		numops = 4;
+		nfsm_chain_build_alloc_init(error, &nmreq, 18 * NFSX_UNSIGNED);
+		nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+		numops--;
+		if (dirfh.fh_len) {
+			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+			nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
+		} else {
+			nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTROOTFH);
+		}
+		numops--;
+		if (isdotdot) {
+			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUPP);
+		} else {
+			nfsm_chain_add_32(error, &nmreq, NFS_OP_LOOKUP);
+			nfsm_chain_add_name(error, &nmreq,
+				fspath.np_components[comp], strlen(fspath.np_components[comp]), nmp);
+		}
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETFH);
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+		NFS_CLEAR_ATTRIBUTES(bitmap);
+		NFS4_DEFAULT_ATTRIBUTES(bitmap);
+		/* if no namedattr support or component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
+		if (NMFLAG(nmp, NONAMEDATTR) || !strcmp(fspath.np_components[comp], ".zfs"))
+			NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
+		nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
+		nfsm_chain_build_done(error, &nmreq);
+		nfsm_assert(error, (numops == 0), EPROTO);
+		nfsmout_if(error);
+		error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+				vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+		if (!error)
+			error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+		nfsm_chain_skip_tag(error, &nmrep);
+		nfsm_chain_get_32(error, &nmrep, numops);
+		nfsm_chain_op_check(error, &nmrep, dirfh.fh_len ? NFS_OP_PUTFH : NFS_OP_PUTROOTFH);
+		nfsm_chain_op_check(error, &nmrep, isdotdot ? NFS_OP_LOOKUPP : NFS_OP_LOOKUP);
+		nfsmout_if(error);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETFH);
+		nfsm_chain_get_32(error, &nmrep, fh.fh_len);
+		nfsm_chain_get_opaque(error, &nmrep, fh.fh_len, fh.fh_data);
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+		if (!error) {
+			NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
+			error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, &nfsls);
+		}
+		nfsm_chain_cleanup(&nmrep);
+		nfsm_chain_null(&nmreq);
+		if (error) {
+			/* LOOKUP succeeded but GETATTR failed?  This could be a referral. */
+			/* Try the lookup again with a getattr for fs_locations. */
+			nfs_fs_locations_cleanup(&nfsls);
+			error = nfs4_get_fs_locations(nmp, NULL, dirfh.fh_data, dirfh.fh_len, fspath.np_components[comp], ctx, &nfsls);
+			if (!error && (nfsls.nl_numlocs < 1))
+				error = ENOENT;
+			nfsmout_if(error);
+			if (++loopcnt > MAXSYMLINKS) {
+				/* too many symlink/referral redirections */
+				error = ELOOP;
+				goto nfsmout;
+			}
+			/* tear down the current connection */
+			nfs_disconnect(nmp);
+			/* replace fs locations */
+			nfs_fs_locations_cleanup(&nmp->nm_locations);
+			nmp->nm_locations = nfsls;
+			bzero(&nfsls, sizeof(nfsls));
+			/* initiate a connection using the new fs locations */
+			error = nfs_mount_connect(nmp);
+			if (!error && !(nmp->nm_locations.nl_current.nli_flags & NLI_VALID))
+				error = EIO;
+			nfsmout_if(error);
+			/* add new server's remote path to beginning of our path and continue */
+			nfsp = &nmp->nm_locations.nl_locations[nmp->nm_locations.nl_current.nli_loc]->nl_path;
+			bzero(&fspath2, sizeof(fspath2));
+			fspath2.np_compcount = (fspath.np_compcount - comp - 1) + nfsp->np_compcount;
+			if (fspath2.np_compcount > 0) {
+				MALLOC(fspath2.np_components, char **, fspath2.np_compcount*sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+				if (!fspath2.np_components) {
+					error = ENOMEM;
+					goto nfsmout;
+				}
+				for (comp2=0; comp2 < nfsp->np_compcount; comp2++) {
+					int slen = strlen(nfsp->np_components[comp2]);
+					MALLOC(fspath2.np_components[comp2], char *, slen+1, M_TEMP, M_WAITOK|M_ZERO);
+					if (!fspath2.np_components[comp2]) {
+						/* clean up fspath2, then error out */
+						while (comp2 > 0) {
+							comp2--;
+							FREE(fspath2.np_components[comp2], M_TEMP);
+						}
+						FREE(fspath2.np_components, M_TEMP);
+						error = ENOMEM;
+						goto nfsmout;
+					}
+					strlcpy(fspath2.np_components[comp2], nfsp->np_components[comp2], slen+1);
+				}
+				if ((fspath.np_compcount - comp - 1) > 0)
+					bcopy(&fspath.np_components[comp+1], &fspath2.np_components[nfsp->np_compcount], (fspath.np_compcount - comp - 1)*sizeof(char*));
+				/* free up unused parts of old path (prior components and component array) */
+				do {
+					FREE(fspath.np_components[comp], M_TEMP);
+				} while (comp-- > 0);
+				FREE(fspath.np_components, M_TEMP);
+				/* put new path in place */
+				fspath = fspath2;
+			}
+			/* reset dirfh and component index */
+			dirfh.fh_len = 0;
+			comp = 0;
+			NVATTR_CLEANUP(&nvattr);
+			if (fspath.np_compcount == 0)
+				goto nocomponents;
+			continue;
+		}
+		nfsmout_if(error);
+		/* if file handle is for a symlink, then update the path with the symlink contents */
+		if (NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) && (nvattr.nva_type == VLNK)) {
+			if (++loopcnt > MAXSYMLINKS)
+				error = ELOOP;
+			else
+				error = nfs4_mount_update_path_with_symlink(nmp, &fspath, comp, &dirfh, &depth, &fh, ctx);
+			nfsmout_if(error);
+			/* directory file handle is either left the same or reset to root (if link was absolute) */
+			/* path traversal starts at beginning of the path again */
+			comp = 0;
+			NVATTR_CLEANUP(&nvattr);
+			nfs_fs_locations_cleanup(&nfsls);
+			continue;
+		}
+		NVATTR_CLEANUP(&nvattr);
+		nfs_fs_locations_cleanup(&nfsls);
+		/* not a symlink... */
+		if ((nmp->nm_state & NFSSTA_NEEDSECINFO) && (comp == (fspath.np_compcount-1)) && !isdotdot) {
+			/* need to get SECINFO for the directory being mounted */
+			if (dirfh.fh_len == 0)
+				NFSREQ_SECINFO_SET(&si, NULL, NULL, 0, isdotdot ? NULL : fspath.np_components[comp], 0);
+			else
+				NFSREQ_SECINFO_SET(&si, NULL, dirfh.fh_data, dirfh.fh_len, isdotdot ? NULL : fspath.np_components[comp], 0);
+			sec.count = NX_MAX_SEC_FLAVORS;
+			error = nfs4_secinfo_rpc(nmp, &si, vfs_context_ucred(ctx), sec.flavors, &sec.count);
+			/* [sigh] some implementations return "illegal" error for unsupported ops */
+			if (error == NFSERR_OP_ILLEGAL)
+				error = 0;
+			nfsmout_if(error);
+			/* set our default security flavor to the first in the list */
+			if (sec.count)
+				nmp->nm_auth = sec.flavors[0];
+			nmp->nm_state &= ~NFSSTA_NEEDSECINFO;
+		}
+		/* advance directory file handle, component index, & update depth */
+		dirfh = fh;
+		comp++;
+		if (!isdotdot) /* going down the hierarchy */
+			depth++;
+		else if (--depth <= 0)  /* going up the hierarchy */
+			dirfh.fh_len = 0; /* clear dirfh when we hit root */
+	}
 
-	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
-		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
-		if (nmp->nm_timeo < NFS_MINTIMEO)
-			nmp->nm_timeo = NFS_MINTIMEO;
-		else if (nmp->nm_timeo > NFS_MAXTIMEO)
-			nmp->nm_timeo = NFS_MAXTIMEO;
+gotfh:
+	/* get attrs for mount point root */
+	numops = NMFLAG(nmp, NONAMEDATTR) ? 2 : 3; // PUTFH + GETATTR + OPENATTR
+	nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED);
+	nfsm_chain_add_compound_header(error, &nmreq, "mount", numops);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH);
+	nfsm_chain_add_fh(error, &nmreq, NFS_VER4, dirfh.fh_data, dirfh.fh_len);
+	numops--;
+	nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR);
+	NFS_CLEAR_ATTRIBUTES(bitmap);
+	NFS4_DEFAULT_ATTRIBUTES(bitmap);
+	/* if no namedattr support or last component is ".zfs", clear NFS_FATTR_NAMED_ATTR */
+	if (NMFLAG(nmp, NONAMEDATTR) || ((fspath.np_compcount > 0) && !strcmp(fspath.np_components[fspath.np_compcount-1], ".zfs")))
+		NFS_BITMAP_CLR(bitmap, NFS_FATTR_NAMED_ATTR);
+	nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN);
+	if (!NMFLAG(nmp, NONAMEDATTR)) {
+		numops--;
+		nfsm_chain_add_32(error, &nmreq, NFS_OP_OPENATTR);
+		nfsm_chain_add_32(error, &nmreq, 0);
+	}
+	nfsm_chain_build_done(error, &nmreq);
+	nfsm_assert(error, (numops == 0), EPROTO);
+	nfsmout_if(error);
+	error = nfs_request_async(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), &si, 0, NULL, &req);
+	if (!error)
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+	nfsm_chain_skip_tag(error, &nmrep);
+	nfsm_chain_get_32(error, &nmrep, numops);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
+	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
+	nfsmout_if(error);
+	NFS_CLEAR_ATTRIBUTES(nmp->nm_fsattr.nfsa_bitmap);
+	error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL, NULL);
+	nfsmout_if(error);
+	if (!NMFLAG(nmp, NONAMEDATTR)) {
+		nfsm_chain_op_check(error, &nmrep, NFS_OP_OPENATTR);
+		if (error == ENOENT)
+			error = 0;
+		/* [sigh] some implementations return "illegal" error for unsupported ops */
+		if (error || !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_NAMED_ATTR)) {
+			nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
+		} else {
+			nmp->nm_fsattr.nfsa_flags |= NFS_FSFLAG_NAMED_ATTR;
+		}
+	} else {
+		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_NAMED_ATTR;
+	}
+	if (NMFLAG(nmp, NOACL)) /* make sure ACL support is turned off */
+		nmp->nm_fsattr.nfsa_flags &= ~NFS_FSFLAG_ACL;
+	if (NMFLAG(nmp, ACLONLY) && !(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL))
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
+	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_FH_EXPIRE_TYPE)) {
+		uint32_t fhtype = ((nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_FHTYPE_MASK) >> NFS_FSFLAG_FHTYPE_SHIFT);
+		if (fhtype != NFS_FH_PERSISTENT)
+			printf("nfs: warning: non-persistent file handles! for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
 	}
 
-	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
-		nmp->nm_retry = argp->retrans;
-		if (nmp->nm_retry > NFS_MAXREXMIT)
-			nmp->nm_retry = NFS_MAXREXMIT;
+	/* make sure it's a directory */
+	if (!NFS_BITMAP_ISSET(&nvattr.nva_bitmap, NFS_FATTR_TYPE) || (nvattr.nva_type != VDIR)) {
+		error = ENOTDIR;
+		goto nfsmout;
 	}
 
-	if (nmp->nm_vers != NFS_VER2) {
-		if (argp->sotype == SOCK_DGRAM)
-			maxio = NFS_MAXDGRAMDATA;
-		else
-			maxio = NFS_MAXDATA;
-	} else
-		maxio = NFS_V2MAXDATA;
+	/* save the NFS fsid */
+	nmp->nm_fsid = nvattr.nva_fsid;
+
+	/* create the root node */
+	error = nfs_nget(nmp->nm_mountp, NULL, NULL, dirfh.fh_data, dirfh.fh_len, &nvattr, &xid, rq.r_auth, NG_MARKROOT, npp);
+	nfsmout_if(error);
+
+	if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)
+		vfs_setextendedsecurity(nmp->nm_mountp);
 
-	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
-		nmp->nm_wsize = argp->wsize;
-		/* Round down to multiple of blocksize */
-		nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
-		if (nmp->nm_wsize <= 0)
-			nmp->nm_wsize = NFS_FABLKSIZE;
+	/* adjust I/O sizes to server limits */
+	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD) && (nmp->nm_fsattr.nfsa_maxread > 0)) {
+		if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) {
+			nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread & ~(NFS_FABLKSIZE - 1);
+			if (nmp->nm_rsize == 0)
+				nmp->nm_rsize = nmp->nm_fsattr.nfsa_maxread;
+		}
+	}
+	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXWRITE) && (nmp->nm_fsattr.nfsa_maxwrite > 0)) {
+		if (nmp->nm_fsattr.nfsa_maxwrite < (uint64_t)nmp->nm_wsize) {
+			nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite & ~(NFS_FABLKSIZE - 1);
+			if (nmp->nm_wsize == 0)
+				nmp->nm_wsize = nmp->nm_fsattr.nfsa_maxwrite;
+		}
 	}
-	if (nmp->nm_wsize > maxio)
-		nmp->nm_wsize = maxio;
-	if (nmp->nm_wsize > NFS_MAXBSIZE)
-		nmp->nm_wsize = NFS_MAXBSIZE;
 
-	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
-		nmp->nm_rsize = argp->rsize;
-		/* Round down to multiple of blocksize */
-		nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
-		if (nmp->nm_rsize <= 0)
-			nmp->nm_rsize = NFS_FABLKSIZE;
+	/* set up lease renew timer */
+	nmp->nm_renew_timer = thread_call_allocate(nfs4_renew_timer, nmp);
+	interval = nmp->nm_fsattr.nfsa_lease / 2;
+	if (interval < 1)
+		interval = 1;
+	nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000);
+
+nfsmout:
+	if (fspath.np_components) {
+		for (comp=0; comp < fspath.np_compcount; comp++)
+			if (fspath.np_components[comp])
+				FREE(fspath.np_components[comp], M_TEMP);
+		FREE(fspath.np_components, M_TEMP);
 	}
-	if (nmp->nm_rsize > maxio)
-		nmp->nm_rsize = maxio;
-	if (nmp->nm_rsize > NFS_MAXBSIZE)
-		nmp->nm_rsize = NFS_MAXBSIZE;
+	NVATTR_CLEANUP(&nvattr);
+	nfs_fs_locations_cleanup(&nfsls);
+	if (*npp)
+		nfs_node_unlock(*npp);
+	nfsm_chain_cleanup(&nmreq);
+	nfsm_chain_cleanup(&nmrep);
+	return (error);
+}
+
+/*
+ * Thread to handle initial NFS mount connection.
+ */
+void
+nfs_mount_connect_thread(void *arg, __unused wait_result_t wr)
+{
+	struct nfsmount *nmp = arg;
+	int error = 0, savederror = 0, slpflag = (NMFLAG(nmp, INTR) ? PCATCH : 0);
+	int done = 0, timeo, tries, maxtries;
 
-	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
-		nmp->nm_readdirsize = argp->readdirsize;
+	if (NM_OMFLAG(nmp, MNTQUICK)) {
+		timeo = 8;
+		maxtries = 1;
+	} else {
+		timeo = 30;
+		maxtries = 2;
 	}
-	if (nmp->nm_readdirsize > maxio)
-		nmp->nm_readdirsize = maxio;
-	if (nmp->nm_readdirsize > nmp->nm_rsize)
-		nmp->nm_readdirsize = nmp->nm_rsize;
 
-	if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
-		argp->maxgrouplist <= NFS_MAXGRPS)
-		nmp->nm_numgrps = argp->maxgrouplist;
-	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
-		argp->readahead <= NFS_MAXRAHEAD)
-		nmp->nm_readahead = argp->readahead;
-	if (argp->flags & NFSMNT_READAHEAD)
-		nmp->nm_readahead = argp->readahead;
-	if (nmp->nm_readahead < 0)
-		nmp->nm_readahead = 0;
-	else if (nmp->nm_readahead > NFS_MAXRAHEAD)
-		nmp->nm_readahead = NFS_MAXRAHEAD;
+	for (tries = 0; tries < maxtries; tries++) {
+		error = nfs_connect(nmp, 1, timeo);
+		switch (error) {
+		case ETIMEDOUT:
+		case EAGAIN:
+		case EPIPE:
+		case EADDRNOTAVAIL:
+		case ENETDOWN:
+		case ENETUNREACH:
+		case ENETRESET:
+		case ECONNABORTED:
+		case ECONNRESET:
+		case EISCONN:
+		case ENOTCONN:
+		case ESHUTDOWN:
+		case ECONNREFUSED:
+		case EHOSTDOWN:
+		case EHOSTUNREACH:
+			/* just keep retrying on any of these errors */
+			break;
+		case 0:
+		default:
+			/* looks like we got an answer... */
+			done = 1;
+			break;
+		}
 
-	if (argp->version >= 4) {
-		if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
-			nmp->nm_acregmin = argp->acregmin;
-		if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
-			nmp->nm_acregmax = argp->acregmax;
-		if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
-			nmp->nm_acdirmin = argp->acdirmin;
-		if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
-			nmp->nm_acdirmax = argp->acdirmax;
-		if (nmp->nm_acregmin > nmp->nm_acregmax)
-			nmp->nm_acregmin = nmp->nm_acregmax;
-		if (nmp->nm_acdirmin > nmp->nm_acdirmax)
-			nmp->nm_acdirmin = nmp->nm_acdirmax;
-	}
-	if (argp->version >= 5) {
-		if (argp->flags & NFSMNT_SECFLAVOR) {
-			/*
-			 * Check for valid security flavor
-			 */
-			switch (argp->auth) {
+		/* save the best error */
+		if (nfs_connect_error_class(error) >= nfs_connect_error_class(savederror))
+			savederror = error;
+		if (done) {
+			error = savederror;
+			break;
+		}
+
+		/* pause before next attempt */
+		if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0)))
+			break;
+		error = tsleep(nmp, PSOCK|slpflag, "nfs_mount_connect_retry", 2*hz);
+		if (error && (error != EWOULDBLOCK))
+			break;
+		error = savederror;
+	}
+
+	/* update status of mount connect */
+	lck_mtx_lock(&nmp->nm_lock);
+	if (!nmp->nm_mounterror)
+		nmp->nm_mounterror = error;
+	nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
+	lck_mtx_unlock(&nmp->nm_lock);
+	wakeup(&nmp->nm_nss);
+}
+
+int
+nfs_mount_connect(struct nfsmount *nmp)
+{
+	int error = 0, slpflag;
+	thread_t thd;
+	struct timespec ts = { 2, 0 };
+
+	/*
+	 * Set up the socket.  Perform initial search for a location/server/address to
+	 * connect to and negotiate any unspecified mount parameters.  This work is
+	 * done on a kernel thread to satisfy reserved port usage needs.
+	 */
+	slpflag = NMFLAG(nmp, INTR) ? PCATCH : 0;
+	lck_mtx_lock(&nmp->nm_lock);
+	/* set flag that the thread is running */
+	nmp->nm_state |= NFSSTA_MOUNT_THREAD;
+	if (kernel_thread_start(nfs_mount_connect_thread, nmp, &thd) != KERN_SUCCESS) {
+		nmp->nm_state &= ~NFSSTA_MOUNT_THREAD;
+		nmp->nm_mounterror = EIO;
+		printf("nfs mount %s start socket connect thread failed\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname);
+	} else {
+		thread_deallocate(thd);
+	}
+
+	/* wait until mount connect thread is finished/gone */
+	while (nmp->nm_state & NFSSTA_MOUNT_THREAD) {
+		error = msleep(&nmp->nm_nss, &nmp->nm_lock, slpflag|PSOCK, "nfsconnectthread", &ts);
+		if ((error && (error != EWOULDBLOCK)) || ((error = nfs_sigintr(nmp, NULL, current_thread(), 1)))) {
+			/* record error */
+			if (!nmp->nm_mounterror)
+				nmp->nm_mounterror = error;
+			/* signal the thread that we are aborting */
+			nmp->nm_sockflags |= NMSOCK_UNMOUNT;
+			if (nmp->nm_nss)
+				wakeup(nmp->nm_nss);
+			/* and continue waiting on it to finish */
+			slpflag = 0;
+		}
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	/* grab mount connect status */
+	error = nmp->nm_mounterror;
+
+	return (error);
+}
+
+/*
+ * Common code to mount an NFS file system.
+ */
+int
+mountnfs(
+	char *xdrbuf,
+	mount_t mp,
+	vfs_context_t ctx,
+	vnode_t *vpp)
+{
+	struct nfsmount *nmp;
+	nfsnode_t np;
+	int error = 0;
+	struct vfsstatfs *sbp;
+	struct xdrbuf xb;
+	uint32_t i, val, vers = 0, minorvers, maxio, iosize, len;
+	uint32_t *mattrs;
+	uint32_t *mflags_mask;
+	uint32_t *mflags;
+	uint32_t argslength, attrslength;
+	struct nfs_location_index firstloc = { NLI_VALID, 0, 0, 0 };
+
+	/* make sure mbuf constants are set up */
+	if (!nfs_mbuf_mhlen)
+		nfs_mbuf_init();
+
+	if (vfs_flags(mp) & MNT_UPDATE) {
+		nmp = VFSTONFS(mp);
+		/* update paths, file handles, etc, here	XXX */
+		xb_free(xdrbuf);
+		return (0);
+	} else {
+		/* allocate an NFS mount structure for this mount */
+		MALLOC_ZONE(nmp, struct nfsmount *,
+				sizeof (struct nfsmount), M_NFSMNT, M_WAITOK);
+		if (!nmp) {
+			xb_free(xdrbuf);
+			return (ENOMEM);
+		}
+		bzero((caddr_t)nmp, sizeof (struct nfsmount));
+		lck_mtx_init(&nmp->nm_lock, nfs_mount_grp, LCK_ATTR_NULL);
+		TAILQ_INIT(&nmp->nm_resendq);
+		TAILQ_INIT(&nmp->nm_iodq);
+		TAILQ_INIT(&nmp->nm_gsscl);
+		LIST_INIT(&nmp->nm_monlist);
+		vfs_setfsprivate(mp, nmp);
+		vfs_getnewfsid(mp);
+		nmp->nm_mountp = mp;
+		vfs_setauthopaque(mp);
+
+		nfs_nhinit_finish();
+
+		nmp->nm_args = xdrbuf;
+
+		/* set up defaults */
+		nmp->nm_vers = 0;
+		nmp->nm_timeo = NFS_TIMEO;
+		nmp->nm_retry = NFS_RETRANS;
+		nmp->nm_sotype = 0;
+		nmp->nm_sofamily = 0;
+		nmp->nm_nfsport = 0;
+		nmp->nm_wsize = NFS_WSIZE;
+		nmp->nm_rsize = NFS_RSIZE;
+		nmp->nm_readdirsize = NFS_READDIRSIZE;
+		nmp->nm_numgrps = NFS_MAXGRPS;
+		nmp->nm_readahead = NFS_DEFRAHEAD;
+		nmp->nm_tprintf_delay = nfs_tprintf_delay;
+		if (nmp->nm_tprintf_delay < 0)
+			nmp->nm_tprintf_delay = 0;
+		nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
+		if (nmp->nm_tprintf_initial_delay < 0)
+			nmp->nm_tprintf_initial_delay = 0;
+		nmp->nm_acregmin = NFS_MINATTRTIMO;
+		nmp->nm_acregmax = NFS_MAXATTRTIMO;
+		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
+		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
+		nmp->nm_auth = RPCAUTH_SYS;
+		nmp->nm_deadtimeout = 0;
+		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL);
+	}
+
+	mattrs = nmp->nm_mattrs;
+	mflags = nmp->nm_mflags;
+	mflags_mask = nmp->nm_mflags_mask;
+
+	/* set up NFS mount with args */
+	xb_init_buffer(&xb, xdrbuf, 2*XDRWORD);
+	xb_get_32(error, &xb, val); /* version */
+	xb_get_32(error, &xb, argslength); /* args length */
+	nfsmerr_if(error);
+	xb_init_buffer(&xb, xdrbuf, argslength);	/* restart parsing with actual buffer length */
+	xb_get_32(error, &xb, val); /* version */
+	xb_get_32(error, &xb, argslength); /* args length */
+	xb_get_32(error, &xb, val); /* XDR args version */
+	if (val != NFS_XDRARGS_VERSION_0)
+		error = EINVAL;
+	len = NFS_MATTR_BITMAP_LEN;
+	xb_get_bitmap(error, &xb, mattrs, len); /* mount attribute bitmap */
+	attrslength = 0;
+	xb_get_32(error, &xb, attrslength); /* attrs length */
+	if (!error && (attrslength > (argslength - ((4+NFS_MATTR_BITMAP_LEN+1)*XDRWORD))))
+		error = EINVAL;
+	nfsmerr_if(error);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
+		len = NFS_MFLAG_BITMAP_LEN;
+		xb_get_bitmap(error, &xb, mflags_mask, len); /* mount flag mask */
+		len = NFS_MFLAG_BITMAP_LEN;
+		xb_get_bitmap(error, &xb, mflags, len); /* mount flag values */
+		if (!error) {
+			/* clear all mask bits and OR in all the ones that are set */
+			nmp->nm_flags[0] &= ~mflags_mask[0];
+			nmp->nm_flags[0] |= (mflags_mask[0] & mflags[0]);
+		}
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION)) {
+		xb_get_32(error, &xb, vers);
+		if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
+			xb_get_32(error, &xb, minorvers);
+		else
+			minorvers = 0;
+		nfsmerr_if(error);
+		switch (vers) {
+		case 2:
+			nmp->nm_vers = NFS_VER2;
+			break;
+		case 3:
+			nmp->nm_vers = NFS_VER3;
+			break;
+		case 4:
+			switch (minorvers) {
+			case 0:
+				nmp->nm_vers = NFS_VER4;
+				break;
+			default:
+				error = EINVAL;
+			}
+			break;
+		default:
+			error = EINVAL;
+		}
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION)) {
+		/* should have also gotten NFS version (and already gotten minorvers) */
+		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
+			error = EINVAL;
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
+		xb_get_32(error, &xb, nmp->nm_rsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
+		xb_get_32(error, &xb, nmp->nm_wsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
+		xb_get_32(error, &xb, nmp->nm_readdirsize);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
+		xb_get_32(error, &xb, nmp->nm_readahead);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
+		xb_get_32(error, &xb, nmp->nm_acregmin);
+		xb_skip(error, &xb, XDRWORD);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
+		xb_get_32(error, &xb, nmp->nm_acregmax);
+		xb_skip(error, &xb, XDRWORD);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
+		xb_get_32(error, &xb, nmp->nm_acdirmin);
+		xb_skip(error, &xb, XDRWORD);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
+		xb_get_32(error, &xb, nmp->nm_acdirmax);
+		xb_skip(error, &xb, XDRWORD);
+	}
+	nfsmerr_if(error);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE)) {
+		xb_get_32(error, &xb, val);
+		switch (val) {
+		case NFS_LOCK_MODE_DISABLED:
+		case NFS_LOCK_MODE_LOCAL:
+			if (nmp->nm_vers >= NFS_VER4) {
+				/* disabled/local lock mode only allowed on v2/v3 */
+				error = EINVAL;
+				break;
+			}
+			/* FALLTHROUGH */
+		case NFS_LOCK_MODE_ENABLED:
+			nmp->nm_lockmode = val;
+			break;
+		default:
+			error = EINVAL;
+		}
+	}
+	nfsmerr_if(error);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
+		uint32_t seccnt;
+		xb_get_32(error, &xb, seccnt);
+		if (!error && ((seccnt < 1) || (seccnt > NX_MAX_SEC_FLAVORS)))
+			error = EINVAL;
+		nfsmerr_if(error);
+		nmp->nm_sec.count = seccnt;
+		for (i=0; i < seccnt; i++) {
+			xb_get_32(error, &xb, nmp->nm_sec.flavors[i]);
+			/* Check for valid security flavor */
+			switch (nmp->nm_sec.flavors[i]) {
+			case RPCAUTH_NONE:
 			case RPCAUTH_SYS:
 			case RPCAUTH_KRB5:
 			case RPCAUTH_KRB5I:
 			case RPCAUTH_KRB5P:
-				nmp->nm_auth = argp->auth;
 				break;
 			default:
 				error = EINVAL;
-				goto bad;
 			}
 		}
+		/* start with the first flavor */
+		nmp->nm_auth = nmp->nm_sec.flavors[0];
 	}
-	if (argp->version >= 6) {
-		if (argp->flags & NFSMNT_DEADTIMEOUT)
-			nmp->nm_deadtimeout = argp->deadtimeout;
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
+		xb_get_32(error, &xb, nmp->nm_numgrps);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE)) {
+		char sotype[6];
+
+		xb_get_32(error, &xb, val);
+		if (!error && ((val < 3) || (val > 5)))
+			error = EINVAL;
+		nfsmerr_if(error);
+		error = xb_get_bytes(&xb, sotype, val, 0);
+		nfsmerr_if(error);
+		sotype[val] = '\0';
+		if (!strcmp(sotype, "tcp")) {
+			nmp->nm_sotype = SOCK_STREAM;
+		} else if (!strcmp(sotype, "udp")) {
+			nmp->nm_sotype = SOCK_DGRAM;
+		} else if (!strcmp(sotype, "tcp4")) {
+			nmp->nm_sotype = SOCK_STREAM;
+			nmp->nm_sofamily = AF_INET;
+		} else if (!strcmp(sotype, "udp4")) {
+			nmp->nm_sotype = SOCK_DGRAM;
+			nmp->nm_sofamily = AF_INET;
+		} else if (!strcmp(sotype, "tcp6")) {
+			nmp->nm_sotype = SOCK_STREAM;
+			nmp->nm_sofamily = AF_INET6;
+		} else if (!strcmp(sotype, "udp6")) {
+			nmp->nm_sotype = SOCK_DGRAM;
+			nmp->nm_sofamily = AF_INET6;
+		} else if (!strcmp(sotype, "inet4")) {
+			nmp->nm_sofamily = AF_INET;
+		} else if (!strcmp(sotype, "inet6")) {
+			nmp->nm_sofamily = AF_INET6;
+		} else if (!strcmp(sotype, "inet")) {
+			nmp->nm_sofamily = 0; /* ok */
+		} else {
+			error = EINVAL;
+		}
+		if (!error && (nmp->nm_vers >= NFS_VER4) && nmp->nm_sotype &&
+		    (nmp->nm_sotype != SOCK_STREAM))
+			error = EINVAL;		/* NFSv4 is only allowed over TCP. */
+		nfsmerr_if(error);
 	}
-	if ((nmp->nm_flag & NFSMNT_DEADTIMEOUT) && (nmp->nm_deadtimeout <= 0))
-		nmp->nm_flag &= ~NFSMNT_DEADTIMEOUT;
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
+		xb_get_32(error, &xb, nmp->nm_nfsport);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
+		xb_get_32(error, &xb, nmp->nm_mountport);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
+		/* convert from time to 0.1s units */
+		xb_get_32(error, &xb, nmp->nm_timeo);
+		xb_get_32(error, &xb, val);
+		nfsmerr_if(error);
+		if (val >= 1000000000)
+			error = EINVAL;
+		nfsmerr_if(error);
+		nmp->nm_timeo *= 10;
+		nmp->nm_timeo += (val+100000000-1)/100000000;
+		/* now convert to ticks */
+		nmp->nm_timeo = (nmp->nm_timeo * NFS_HZ + 5) / 10;
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT)) {
+		xb_get_32(error, &xb, val);
+		if (!error && (val > 1))
+			nmp->nm_retry = val;
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
+		xb_get_32(error, &xb, nmp->nm_deadtimeout);
+		xb_skip(error, &xb, XDRWORD);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
+		nfsmerr_if(error);
+		MALLOC(nmp->nm_fh, fhandle_t *, sizeof(fhandle_t), M_TEMP, M_WAITOK|M_ZERO);
+		if (!nmp->nm_fh)
+			error = ENOMEM;
+		xb_get_32(error, &xb, nmp->nm_fh->fh_len);
+		nfsmerr_if(error);
+		error = xb_get_bytes(&xb, (char*)&nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len, 0);
+	}
+	nfsmerr_if(error);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
+		uint32_t loc, serv, addr, comp;
+		struct nfs_fs_location *fsl;
+		struct nfs_fs_server *fss;
+		struct nfs_fs_path *fsp;
+
+		xb_get_32(error, &xb, nmp->nm_locations.nl_numlocs); /* fs location count */
+		/* sanity check location count */
+		if (!error && ((nmp->nm_locations.nl_numlocs < 1) || (nmp->nm_locations.nl_numlocs > 256)))
+			error = EINVAL;
+		nfsmerr_if(error);
+		MALLOC(nmp->nm_locations.nl_locations, struct nfs_fs_location **, nmp->nm_locations.nl_numlocs * sizeof(struct nfs_fs_location*), M_TEMP, M_WAITOK|M_ZERO);
+		if (!nmp->nm_locations.nl_locations)
+			error = ENOMEM;
+		for (loc = 0; loc < nmp->nm_locations.nl_numlocs; loc++) {
+			nfsmerr_if(error);
+			MALLOC(fsl, struct nfs_fs_location *, sizeof(struct nfs_fs_location), M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsl)
+				error = ENOMEM;
+			nmp->nm_locations.nl_locations[loc] = fsl;
+			xb_get_32(error, &xb, fsl->nl_servcount); /* server count */
+			/* sanity check server count */
+			if (!error && ((fsl->nl_servcount < 1) || (fsl->nl_servcount > 256)))
+				error = EINVAL;
+			nfsmerr_if(error);
+			MALLOC(fsl->nl_servers, struct nfs_fs_server **, fsl->nl_servcount * sizeof(struct nfs_fs_server*), M_TEMP, M_WAITOK|M_ZERO);
+			if (!fsl->nl_servers)
+				error = ENOMEM;
+			for (serv = 0; serv < fsl->nl_servcount; serv++) {
+				nfsmerr_if(error);
+				MALLOC(fss, struct nfs_fs_server *, sizeof(struct nfs_fs_server), M_TEMP, M_WAITOK|M_ZERO);
+				if (!fss)
+					error = ENOMEM;
+				fsl->nl_servers[serv] = fss;
+				xb_get_32(error, &xb, val); /* server name length */
+				/* sanity check server name length */
+				if (!error && ((val < 1) || (val > MAXPATHLEN)))
+					error = EINVAL;
+				nfsmerr_if(error);
+				MALLOC(fss->ns_name, char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+				if (!fss->ns_name)
+					error = ENOMEM;
+				nfsmerr_if(error);
+				error = xb_get_bytes(&xb, fss->ns_name, val, 0); /* server name */
+				xb_get_32(error, &xb, fss->ns_addrcount); /* address count */
+				/* sanity check address count (OK to be zero) */
+				if (!error && (fss->ns_addrcount > 256))
+					error = EINVAL;
+				nfsmerr_if(error);
+				if (fss->ns_addrcount > 0) {
+					MALLOC(fss->ns_addresses, char **, fss->ns_addrcount * sizeof(char *), M_TEMP, M_WAITOK|M_ZERO);
+					if (!fss->ns_addresses)
+						error = ENOMEM;
+					for (addr = 0; addr < fss->ns_addrcount; addr++) {
+						xb_get_32(error, &xb, val); /* address length */
+						/* sanity check address length */
+						if (!error && ((val < 1) || (val > 128)))
+							error = EINVAL;
+						nfsmerr_if(error);
+						MALLOC(fss->ns_addresses[addr], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+						if (!fss->ns_addresses[addr])
+							error = ENOMEM;
+						nfsmerr_if(error);
+						error = xb_get_bytes(&xb, fss->ns_addresses[addr], val, 0); /* address */
+					}
+				}
+				xb_get_32(error, &xb, val); /* server info length */
+				xb_skip(error, &xb, val); /* skip server info */
+			}
+			/* get pathname */
+			fsp = &fsl->nl_path;
+			xb_get_32(error, &xb, fsp->np_compcount); /* component count */
+			/* sanity check component count */
+			if (!error && (fsp->np_compcount > MAXPATHLEN))
+				error = EINVAL;
+			nfsmerr_if(error);
+			if (fsp->np_compcount) {
+				MALLOC(fsp->np_components, char **, fsp->np_compcount * sizeof(char*), M_TEMP, M_WAITOK|M_ZERO);
+				if (!fsp->np_components)
+					error = ENOMEM;
+			}
+			for (comp = 0; comp < fsp->np_compcount; comp++) {
+				xb_get_32(error, &xb, val); /* component length */
+				/* sanity check component length */
+				if (!error && (val == 0)) {
+					/*
+					 * Apparently some people think a path with zero components should
+					 * be encoded with one zero-length component.  So, just ignore any
+					 * zero length components.
+					 */
+					comp--;
+					fsp->np_compcount--;
+					if (fsp->np_compcount == 0) {
+						FREE(fsp->np_components, M_TEMP);
+						fsp->np_components = NULL;
+					}
+					continue;
+				}
+				if (!error && ((val < 1) || (val > MAXPATHLEN)))
+					error = EINVAL;
+				nfsmerr_if(error);
+				MALLOC(fsp->np_components[comp], char *, val+1, M_TEMP, M_WAITOK|M_ZERO);
+				if (!fsp->np_components[comp])
+					error = ENOMEM;
+				nfsmerr_if(error);
+				error = xb_get_bytes(&xb, fsp->np_components[comp], val, 0); /* component */
+			}
+			xb_get_32(error, &xb, val); /* fs location info length */
+			xb_skip(error, &xb, val); /* skip fs location info */
+		}
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
+		xb_skip(error, &xb, XDRWORD);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
+		xb_get_32(error, &xb, len);
+		nfsmerr_if(error);
+		val = len;
+		if (val >= sizeof(vfs_statfs(mp)->f_mntfromname))
+			val = sizeof(vfs_statfs(mp)->f_mntfromname) - 1;
+		error = xb_get_bytes(&xb, vfs_statfs(mp)->f_mntfromname, val, 0);
+		if ((len - val) > 0)
+			xb_skip(error, &xb, len - val);
+		nfsmerr_if(error);
+		vfs_statfs(mp)->f_mntfromname[val] = '\0';
+	}
+	nfsmerr_if(error);
+
+	/*
+	 * Sanity check/finalize settings.
+	 */
+
+	if (nmp->nm_timeo < NFS_MINTIMEO)
+		nmp->nm_timeo = NFS_MINTIMEO;
+	else if (nmp->nm_timeo > NFS_MAXTIMEO)
+		nmp->nm_timeo = NFS_MAXTIMEO;
+	if (nmp->nm_retry > NFS_MAXREXMIT)
+		nmp->nm_retry = NFS_MAXREXMIT;
+
+	if (nmp->nm_numgrps > NFS_MAXGRPS)
+		nmp->nm_numgrps = NFS_MAXGRPS;
+	if (nmp->nm_readahead > NFS_MAXRAHEAD)
+		nmp->nm_readahead = NFS_MAXRAHEAD;
+	if (nmp->nm_acregmin > nmp->nm_acregmax)
+		nmp->nm_acregmin = nmp->nm_acregmax;
+	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
+		nmp->nm_acdirmin = nmp->nm_acdirmax;
+
+	/* need at least one fs location */
+	if (nmp->nm_locations.nl_numlocs < 1)
+		error = EINVAL;
+	nfsmerr_if(error);
+
+	/* init mount's mntfromname to first location */
+	if (!NM_OMATTR_GIVEN(nmp, MNTFROM))
+		nfs_location_mntfromname(&nmp->nm_locations, firstloc,
+			vfs_statfs(mp)->f_mntfromname, sizeof(vfs_statfs(mp)->f_mntfromname), 0);
+
+	/* Need to save the mounting credential for v4. */
+	nmp->nm_mcred = vfs_context_ucred(ctx);
+	if (IS_VALID_CRED(nmp->nm_mcred))
+		kauth_cred_ref(nmp->nm_mcred);
+
+	/*
+	 * If a reserved port is required, check for that privilege.
+	 * (Note that mirror mounts are exempt because the privilege was
+	 * already checked for the original mount.)
+	 */
+	if (NMFLAG(nmp, RESVPORT) && !vfs_iskernelmount(mp))
+		error = priv_check_cred(nmp->nm_mcred, PRIV_NETINET_RESERVEDPORT, 0);
+	nfsmerr_if(error);
+
+	/* do mount's initial socket connection */
+	error = nfs_mount_connect(nmp);
+	nfsmerr_if(error);
 
 	/* set up the version-specific function tables */
 	if (nmp->nm_vers < NFS_VER4)
@@ -1729,39 +3089,67 @@ mountnfs(
 	else
 		nmp->nm_funcs = &nfs4_funcs;
 
-	/* Set up the sockets and related info */
-	nmp->nm_sotype = argp->sotype;
-	nmp->nm_soproto = argp->proto;
-	if (nmp->nm_sotype == SOCK_DGRAM)
-		TAILQ_INIT(&nmp->nm_cwndq);
-
-	lck_mtx_unlock(&nmp->nm_lock);
-
-	/* make sure mbuf constants are set up */
-	if (!nfs_mbuf_mhlen)
-		nfs_mbuf_init();
-
+	/* sanity check settings now that version/connection is set */
+	if (nmp->nm_vers == NFS_VER2)		/* ignore RDIRPLUS on NFSv2 */
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
 	if (nmp->nm_vers >= NFS_VER4) {
-		struct timeval now;
-		microtime(&now);
-		nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec;
-		nmp->nm_mcred = vfs_context_ucred(ctx);
+		if (NFS_BITMAP_ISSET(nmp->nm_flags, NFS_MFLAG_ACLONLY)) /* aclonly trumps noacl */
+			NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_CALLUMNT);
+		if (nmp->nm_lockmode != NFS_LOCK_MODE_ENABLED)
+			error = EINVAL; /* disabled/local lock mode only allowed on v2/v3 */
+	} else {
+		/* ignore these if not v4 */
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOCALLBACK);
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NONAMEDATTR);
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL);
+		NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY);
 		if (IS_VALID_CRED(nmp->nm_mcred))
-			kauth_cred_ref(nmp->nm_mcred);
-		nfs4_mount_callback_setup(nmp);
+			kauth_cred_unref(&nmp->nm_mcred);
+	}
+	nfsmerr_if(error);
+
+	if (nmp->nm_sotype == SOCK_DGRAM) {
+		/* I/O size defaults for UDP are different */
+		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
+			nmp->nm_rsize = NFS_DGRAM_RSIZE;
+		if (!NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
+			nmp->nm_wsize = NFS_DGRAM_WSIZE;
 	}
 
-	/* set up the socket */
-	if ((error = nfs_connect(nmp, 1)))
-		goto bad;
+	/* round down I/O sizes to multiple of NFS_FABLKSIZE */
+	nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
+	if (nmp->nm_rsize <= 0)
+		nmp->nm_rsize = NFS_FABLKSIZE;
+	nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
+	if (nmp->nm_wsize <= 0)
+		nmp->nm_wsize = NFS_FABLKSIZE;
+
+	/* and limit I/O sizes to maximum allowed */
+	maxio = (nmp->nm_vers == NFS_VER2) ? NFS_V2MAXDATA :
+		(nmp->nm_sotype == SOCK_DGRAM) ? NFS_MAXDGRAMDATA : NFS_MAXDATA;
+	if (maxio > NFS_MAXBSIZE)
+		maxio = NFS_MAXBSIZE;
+	if (nmp->nm_rsize > maxio)
+		nmp->nm_rsize = maxio;
+	if (nmp->nm_wsize > maxio)
+		nmp->nm_wsize = maxio;
+
+	if (nmp->nm_readdirsize > maxio)
+		nmp->nm_readdirsize = maxio;
+	if (nmp->nm_readdirsize > nmp->nm_rsize)
+		nmp->nm_readdirsize = nmp->nm_rsize;
+
+	/* Set up the sockets and related info */
+	if (nmp->nm_sotype == SOCK_DGRAM)
+		TAILQ_INIT(&nmp->nm_cwndq);
 
 	/*
 	 * Get the root node/attributes from the NFS server and
 	 * do any basic, version-specific setup.
 	 */
-	error = nmp->nm_funcs->nf_mount(nmp, ctx, argp, &np);
-	if (error)
-		goto bad;
+	error = nmp->nm_funcs->nf_mount(nmp, ctx, &np);
+	nfsmerr_if(error);
 
 	/*
 	 * A reference count is needed on the node representing the
@@ -1776,7 +3164,7 @@ mountnfs(
 	vnode_put(*vpp);
 	if (error) {
 		vnode_recycle(*vpp);
-		goto bad;
+		goto nfsmerr;
 	}
 
 	/*
@@ -1788,151 +3176,877 @@ mountnfs(
 		if (!error2)
 			vnode_put(*vpp);
 		vnode_recycle(*vpp);
-		goto bad;
+		goto nfsmerr;
+	}
+	sbp = vfs_statfs(mp);
+	sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize;
+	sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize;
+	sbp->f_bfree = nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize;
+	sbp->f_bavail = nmp->nm_fsattr.nfsa_space_avail / sbp->f_bsize;
+	sbp->f_bused = (nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize) -
+			(nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize);
+	sbp->f_files = nmp->nm_fsattr.nfsa_files_total;
+	sbp->f_ffree = nmp->nm_fsattr.nfsa_files_free;
+	sbp->f_iosize = nfs_iosize;
+
+	/*
+	 * Calculate the size used for I/O buffers.  Use the larger
+	 * of the two sizes to minimise NFS requests but make sure
+	 * that it is at least one VM page to avoid wasting buffer
+	 * space and to allow easy mmapping of I/O buffers.
+	 * The read/write RPC calls handle the splitting up of
+	 * buffers into multiple requests if the buffer size is
+	 * larger than the I/O size.
+	 */
+	iosize = max(nmp->nm_rsize, nmp->nm_wsize);
+	if (iosize < PAGE_SIZE)
+		iosize = PAGE_SIZE;
+	nmp->nm_biosize = trunc_page_32(iosize);
+
+	/* For NFSv3 and greater, there is a (relatively) reliable ACCESS call. */
+	if (nmp->nm_vers > NFS_VER2)
+		vfs_setauthopaqueaccess(mp);
+
+	switch (nmp->nm_lockmode) {
+	case NFS_LOCK_MODE_DISABLED:
+		break;
+	case NFS_LOCK_MODE_LOCAL:
+		vfs_setlocklocal(nmp->nm_mountp);
+		break;
+	case NFS_LOCK_MODE_ENABLED:
+	default:
+		if (nmp->nm_vers <= NFS_VER3)
+			nfs_lockd_mount_register(nmp);
+		break;
+	}
+
+	/* success! */
+	lck_mtx_lock(&nmp->nm_lock);
+	nmp->nm_state |= NFSSTA_MOUNTED;
+	lck_mtx_unlock(&nmp->nm_lock);
+	return (0);
+nfsmerr:
+	nfs_mount_cleanup(nmp);
+	return (error);
+}
+
+#if CONFIG_TRIGGERS
+
+/*
+ * We've detected a file system boundary on the server and
+ * need to mount a new file system so that our file systems
+ * MIRROR the file systems on the server.
+ *
+ * Build the mount arguments for the new mount and call kernel_mount().
+ */
+int
+nfs_mirror_mount_domount(vnode_t dvp, vnode_t vp, vfs_context_t ctx)
+{
+	nfsnode_t np = VTONFS(vp);
+	nfsnode_t dnp = VTONFS(dvp);
+	struct nfsmount *nmp = NFSTONMP(np);
+	char fstype[MFSTYPENAMELEN], *mntfromname = NULL, *path = NULL, *relpath, *p, *cp;
+	int error = 0, pathbuflen = MAXPATHLEN, i, mntflags = 0, referral, skipcopy = 0;
+	size_t nlen;
+	struct xdrbuf xb, xbnew;
+	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t newmattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t newmflags[NFS_MFLAG_BITMAP_LEN];
+	uint32_t newmflags_mask[NFS_MFLAG_BITMAP_LEN];
+	uint32_t argslength = 0, val, count, mlen, mlen2, rlen, relpathcomps;
+	uint32_t argslength_offset, attrslength_offset, end_offset;
+	uint32_t numlocs, loc, numserv, serv, numaddr, addr, numcomp, comp;
+	char buf[XDRWORD];
+	struct nfs_fs_locations nfsls;
+
+	referral = (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER_REFERRAL);
+	if (referral)
+		bzero(&nfsls, sizeof(nfsls));
+
+	xb_init(&xbnew, 0);
+
+	if (!nmp || (nmp->nm_state & NFSSTA_FORCE))
+		return (ENXIO);
+
+	/* allocate a couple path buffers we need */
+	MALLOC_ZONE(mntfromname, char *, pathbuflen, M_NAMEI, M_WAITOK); 
+	if (!mntfromname) {
+		error = ENOMEM;
+		goto nfsmerr;
+	}
+	MALLOC_ZONE(path, char *, pathbuflen, M_NAMEI, M_WAITOK); 
+	if (!path) {
+		error = ENOMEM;
+		goto nfsmerr;
+	}
+
+	/* get the path for the directory being mounted on */
+	error = vn_getpath(vp, path, &pathbuflen);
+	if (error) {
+		error = ENOMEM;
+		goto nfsmerr;
+	}
+
+	/*
+	 * Set up the mntfromname for the new mount based on the
+	 * current mount's mntfromname and the directory's path
+	 * relative to the current mount's mntonname.
+	 * Set up relpath to point at the relative path on the current mount.
+	 * Also, count the number of components in relpath.
+	 * We'll be adding those to each fs location path in the new args.
+	 */
+	nlen = strlcpy(mntfromname, vfs_statfs(nmp->nm_mountp)->f_mntfromname, MAXPATHLEN);
+	if ((nlen > 0) && (mntfromname[nlen-1] == '/')) { /* avoid double '/' in new name */
+		mntfromname[nlen-1] = '\0';
+		nlen--;
+	}
+	relpath = mntfromname + nlen;
+	nlen = strlcat(mntfromname, path + strlen(vfs_statfs(nmp->nm_mountp)->f_mntonname), MAXPATHLEN);
+	if (nlen >= MAXPATHLEN) {
+		error = ENAMETOOLONG;
+		goto nfsmerr;
+	}
+	/* count the number of components in relpath */
+	p = relpath;
+	while (*p && (*p == '/'))
+		p++;
+	relpathcomps = 0;
+	while (*p) {
+		relpathcomps++;
+		while (*p && (*p != '/'))
+			p++;
+		while (*p && (*p == '/'))
+			p++;
+	}
+
+	/* grab a copy of the file system type */
+	vfs_name(vnode_mount(vp), fstype);
+
+	/* for referrals, fetch the fs locations */
+	if (referral) {
+		const char *vname = vnode_getname(NFSTOV(np));
+		if (!vname) {
+			error = ENOENT;
+		} else {
+			error = nfs4_get_fs_locations(nmp, dnp, NULL, 0, vname, ctx, &nfsls);
+			vnode_putname(vname);
+			if (!error && (nfsls.nl_numlocs < 1))
+				error = ENOENT;
+		}
+		nfsmerr_if(error);
+	}
+
+	/* set up NFS mount args based on current mount args */
+
+#define xb_copy_32(E, XBSRC, XBDST, V) \
+	do { \
+		if (E) break; \
+		xb_get_32((E), (XBSRC), (V)); \
+		if (skipcopy) break; \
+		xb_add_32((E), (XBDST), (V)); \
+	} while (0)
+#define xb_copy_opaque(E, XBSRC, XBDST) \
+	do { \
+		uint32_t __count, __val; \
+		xb_copy_32((E), (XBSRC), (XBDST), __count); \
+		if (E) break; \
+		__count = nfsm_rndup(__count); \
+		__count /= XDRWORD; \
+		while (__count-- > 0) \
+			xb_copy_32((E), (XBSRC), (XBDST), __val); \
+	} while (0)
+
+	xb_init_buffer(&xb, nmp->nm_args, 2*XDRWORD);
+	xb_get_32(error, &xb, val); /* version */
+	xb_get_32(error, &xb, argslength); /* args length */
+	xb_init_buffer(&xb, nmp->nm_args, argslength);
+
+	xb_init_buffer(&xbnew, NULL, 0);
+	xb_copy_32(error, &xb, &xbnew, val); /* version */
+	argslength_offset = xb_offset(&xbnew);
+	xb_copy_32(error, &xb, &xbnew, val); /* args length */
+	xb_copy_32(error, &xb, &xbnew, val); /* XDR args version */
+	count = NFS_MATTR_BITMAP_LEN;
+	xb_get_bitmap(error, &xb, mattrs, count); /* mount attribute bitmap */
+	nfsmerr_if(error);
+	for (i = 0; i < NFS_MATTR_BITMAP_LEN; i++)
+		newmattrs[i] = mattrs[i];
+	if (referral)
+		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FS_LOCATIONS);
+	else
+		NFS_BITMAP_SET(newmattrs, NFS_MATTR_FH);
+	NFS_BITMAP_SET(newmattrs, NFS_MATTR_FLAGS);
+	NFS_BITMAP_SET(newmattrs, NFS_MATTR_MNTFLAGS);
+	NFS_BITMAP_CLR(newmattrs, NFS_MATTR_MNTFROM);
+	xb_add_bitmap(error, &xbnew, newmattrs, NFS_MATTR_BITMAP_LEN);
+	attrslength_offset = xb_offset(&xbnew);
+	xb_copy_32(error, &xb, &xbnew, val); /* attrs length */
+	NFS_BITMAP_ZERO(newmflags_mask, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_ZERO(newmflags, NFS_MFLAG_BITMAP_LEN);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FLAGS)) {
+		count = NFS_MFLAG_BITMAP_LEN;
+		xb_get_bitmap(error, &xb, newmflags_mask, count); /* mount flag mask bitmap */
+		count = NFS_MFLAG_BITMAP_LEN;
+		xb_get_bitmap(error, &xb, newmflags, count); /* mount flag bitmap */
+	}
+	NFS_BITMAP_SET(newmflags_mask, NFS_MFLAG_EPHEMERAL);
+	NFS_BITMAP_SET(newmflags, NFS_MFLAG_EPHEMERAL);
+	xb_add_bitmap(error, &xbnew, newmflags_mask, NFS_MFLAG_BITMAP_LEN);
+	xb_add_bitmap(error, &xbnew, newmflags, NFS_MFLAG_BITMAP_LEN);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_VERSION))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_MINOR_VERSION))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READ_SIZE))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_WRITE_SIZE))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READDIR_SIZE))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_READAHEAD))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_LOCK_MODE))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SECURITY)) {
+		xb_copy_32(error, &xb, &xbnew, count);
+		while (!error && (count-- > 0))
+			xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MAX_GROUP_LIST))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOCKET_TYPE))
+		xb_copy_opaque(error, &xb, &xbnew);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_NFS_PORT))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MOUNT_PORT))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REQUEST_TIMEOUT)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT))
+		xb_copy_32(error, &xb, &xbnew, val);
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_DEAD_TIMEOUT)) {
+		xb_copy_32(error, &xb, &xbnew, val);
+		xb_copy_32(error, &xb, &xbnew, val);
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FH)) {
+		xb_get_32(error, &xb, count);
+		xb_skip(error, &xb, count);
+	}
+	if (!referral) {
+		/* set the initial file handle to the directory's file handle */
+		xb_add_fh(error, &xbnew, np->n_fhp, np->n_fhsize);
+	}
+	/* copy/extend/skip fs locations */
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_FS_LOCATIONS)) {
+		numlocs = numserv = numaddr = numcomp = 0;
+		if (referral) /* don't copy the fs locations for a referral */
+			skipcopy = 1;
+		xb_copy_32(error, &xb, &xbnew, numlocs); /* location count */
+		for (loc = 0; !error && (loc < numlocs); loc++) {
+			xb_copy_32(error, &xb, &xbnew, numserv); /* server count */
+			for (serv = 0; !error && (serv < numserv); serv++) {
+				xb_copy_opaque(error, &xb, &xbnew); /* server name */
+				xb_copy_32(error, &xb, &xbnew, numaddr); /* address count */
+				for (addr = 0; !error && (addr < numaddr); addr++)
+					xb_copy_opaque(error, &xb, &xbnew); /* address */
+				xb_copy_opaque(error, &xb, &xbnew); /* server info */
+			}
+			/* pathname */
+			xb_get_32(error, &xb, numcomp); /* component count */
+			if (!skipcopy)
+				xb_add_32(error, &xbnew, numcomp+relpathcomps); /* new component count */
+			for (comp = 0; !error && (comp < numcomp); comp++)
+				xb_copy_opaque(error, &xb, &xbnew); /* component */
+			/* add additional components */
+			for (comp = 0; !skipcopy && !error && (comp < relpathcomps); comp++) {
+				p = relpath;
+				while (*p && (*p == '/'))
+					p++;
+				while (*p && !error) {
+					cp = p;
+					while (*p && (*p != '/'))
+						p++;
+					xb_add_string(error, &xbnew, cp, (p - cp)); /* component */
+					while (*p && (*p == '/'))
+						p++;
+				}
+			}
+			xb_copy_opaque(error, &xb, &xbnew); /* fs location info */
+		}
+		if (referral)
+			skipcopy = 0;
+	}
+	if (referral) {
+		/* add referral's fs locations */
+		xb_add_32(error, &xbnew, nfsls.nl_numlocs);			/* FS_LOCATIONS */
+		for (loc = 0; !error && (loc < nfsls.nl_numlocs); loc++) {
+			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servcount);
+			for (serv = 0; !error && (serv < nfsls.nl_locations[loc]->nl_servcount); serv++) {
+				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_name,
+					strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_name));
+				xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
+				for (addr = 0; !error && (addr < nfsls.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
+					xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
+						strlen(nfsls.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
+				xb_add_32(error, &xbnew, 0); /* empty server info */
+			}
+			xb_add_32(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_compcount);
+			for (comp = 0; !error && (comp < nfsls.nl_locations[loc]->nl_path.np_compcount); comp++)
+				xb_add_string(error, &xbnew, nfsls.nl_locations[loc]->nl_path.np_components[comp],
+					strlen(nfsls.nl_locations[loc]->nl_path.np_components[comp]));
+			xb_add_32(error, &xbnew, 0); /* empty fs location info */
+		}
+	}
+	if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFLAGS))
+		xb_get_32(error, &xb, mntflags);
+	/*
+	 * We add the following mount flags to the ones for the mounted-on mount:
+	 * MNT_DONTBROWSE - to keep the mount from showing up as a separate volume
+	 * MNT_AUTOMOUNTED - to keep DiskArb from retriggering the mount after
+	 *                   an unmount (looking for /.autodiskmounted)
+	 */
+	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
+	xb_add_32(error, &xbnew, mntflags);
+	if (!referral && NFS_BITMAP_ISSET(mattrs, NFS_MATTR_MNTFROM)) {
+		/* copy mntfrom string and add relpath */
+		rlen = strlen(relpath);
+		xb_get_32(error, &xb, mlen);
+		nfsmerr_if(error);
+		mlen2 = mlen + ((relpath[0] != '/') ? 1 : 0) + rlen;
+		xb_add_32(error, &xbnew, mlen2);
+		count = mlen/XDRWORD;
+		/* copy the original string */
+		while (count-- > 0)
+			xb_copy_32(error, &xb, &xbnew, val);
+		if (!error && (mlen % XDRWORD)) {
+			error = xb_get_bytes(&xb, buf, mlen%XDRWORD, 0);
+			if (!error)
+				error = xb_add_bytes(&xbnew, buf, mlen%XDRWORD, 1);
+		}
+		/* insert a '/' if the relative path doesn't start with one */
+		if (!error && (relpath[0] != '/')) {
+			buf[0] = '/';
+			error = xb_add_bytes(&xbnew, buf, 1, 1);
+		}
+		/* add the additional relative path */
+		if (!error)
+			error = xb_add_bytes(&xbnew, relpath, rlen, 1);
+		/* make sure the resulting string has the right number of pad bytes */
+		if (!error && (mlen2 != nfsm_rndup(mlen2))) {
+			bzero(buf, sizeof(buf));
+			count = nfsm_rndup(mlen2) - mlen2;
+			error = xb_add_bytes(&xbnew, buf, count, 1);
+		}
+	}
+	xb_build_done(error, &xbnew);
+
+	/* update opaque counts */
+	end_offset = xb_offset(&xbnew);
+	if (!error) {
+		error = xb_seek(&xbnew, argslength_offset);
+		argslength = end_offset - argslength_offset + XDRWORD/*version*/;
+		xb_add_32(error, &xbnew, argslength);
+	}
+	if (!error) {
+		error = xb_seek(&xbnew, attrslength_offset);
+		xb_add_32(error, &xbnew, end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
+	}
+	nfsmerr_if(error);
+
+	/*
+	 * For kernel_mount() call, use the existing mount flags (instead of the
+	 * original flags) because flags like MNT_NOSUID and MNT_NODEV may have
+	 * been silently enforced.
+	 */
+	mntflags = vnode_vfsvisflags(vp);
+	mntflags |= (MNT_AUTOMOUNTED | MNT_DONTBROWSE);
+
+	/* do the mount */
+	error = kernel_mount(fstype, dvp, vp, path, xb_buffer_base(&xbnew), argslength,
+			mntflags, KERNEL_MOUNT_PERMIT_UNMOUNT | KERNEL_MOUNT_NOAUTH, ctx);
+
+nfsmerr:
+	if (error)
+		printf("nfs: mirror mount of %s on %s failed (%d)\n",
+			mntfromname, path, error);
+	/* clean up */
+	xb_cleanup(&xbnew);
+	if (referral)
+		nfs_fs_locations_cleanup(&nfsls);
+	if (path)
+		FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+	if (mntfromname)
+		FREE_ZONE(mntfromname, MAXPATHLEN, M_NAMEI);
+	if (!error)
+		nfs_ephemeral_mount_harvester_start();
+	return (error);
+}
+
+/*
+ * trigger vnode functions
+ */
+
+resolver_result_t
+nfs_mirror_mount_trigger_resolve(
+	vnode_t vp,
+	const struct componentname *cnp,
+	enum path_operation pop,
+	__unused int flags,
+	__unused void *data,
+	vfs_context_t ctx)
+{
+	nfsnode_t np = VTONFS(vp);
+	vnode_t pvp = NULLVP;
+	int error = 0;
+	resolver_result_t result;
+
+	/*
+	 * We have a trigger node that doesn't have anything mounted on it yet.
+	 * We'll do the mount if either:
+	 * (a) this isn't the last component of the path OR
+	 * (b) this is an op that looks like it should trigger the mount.
+	 */
+	if (cnp->cn_flags & ISLASTCN) {
+		switch (pop) {
+		case OP_MOUNT:
+		case OP_UNMOUNT:
+		case OP_STATFS:
+		case OP_LINK:
+		case OP_UNLINK:
+		case OP_RENAME:
+		case OP_MKNOD:
+		case OP_MKFIFO:
+		case OP_SYMLINK:
+		case OP_ACCESS:
+		case OP_GETATTR:
+		case OP_MKDIR:
+		case OP_RMDIR:
+		case OP_REVOKE:
+		case OP_GETXATTR:
+		case OP_LISTXATTR:
+			/* don't perform the mount for these operations */
+			result = vfs_resolver_result(np->n_trigseq, RESOLVER_NOCHANGE, 0);
+#ifdef NFS_TRIGGER_DEBUG
+			NP(np, "nfs trigger RESOLVE: no change, last %d nameiop %d, seq %d",
+				(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
+#endif
+			return (result);
+		case OP_OPEN:
+		case OP_CHDIR:
+		case OP_CHROOT:
+		case OP_TRUNCATE:
+		case OP_COPYFILE:
+		case OP_PATHCONF:
+		case OP_READLINK:
+		case OP_SETATTR:
+		case OP_EXCHANGEDATA:
+		case OP_SEARCHFS:
+		case OP_FSCTL:
+		case OP_SETXATTR:
+		case OP_REMOVEXATTR:
+		default:
+			/* go ahead and do the mount */
+			break;
+		}
+	}
+
+	if (vnode_mountedhere(vp) != NULL) {
+		/*
+		 * Um... there's already something mounted.
+		 * Been there.  Done that.  Let's just say it succeeded.
+		 */
+		error = 0;
+		goto skipmount;
+	}
+
+	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
+		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
+#ifdef NFS_TRIGGER_DEBUG
+		NP(np, "nfs trigger RESOLVE: busy error %d, last %d nameiop %d, seq %d",
+			error, (cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
+#endif
+		return (result);
+	}
+
+	pvp = vnode_getparent(vp);
+	if (pvp == NULLVP)
+		error = EINVAL;
+	if (!error)
+		error = nfs_mirror_mount_domount(pvp, vp, ctx);
+skipmount:
+	if (!error)
+		np->n_trigseq++;
+	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_RESOLVED, error);
+#ifdef NFS_TRIGGER_DEBUG
+	NP(np, "nfs trigger RESOLVE: %s %d, last %d nameiop %d, seq %d",
+		error ? "error" : "resolved", error,
+		(cnp->cn_flags & ISLASTCN) ? 1 : 0, cnp->cn_nameiop, np->n_trigseq);
+#endif
+
+	if (pvp != NULLVP)
+		vnode_put(pvp);
+	nfs_node_clear_busy(np);
+	return (result);
+}
+
+resolver_result_t
+nfs_mirror_mount_trigger_unresolve(
+	vnode_t vp,
+	int flags,
+	__unused void *data,
+	vfs_context_t ctx)
+{
+	nfsnode_t np = VTONFS(vp);
+	mount_t mp;
+	int error;
+	resolver_result_t result;
+
+	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
+		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
+#ifdef NFS_TRIGGER_DEBUG
+		NP(np, "nfs trigger UNRESOLVE: busy error %d, seq %d", error, np->n_trigseq);
+#endif
+		return (result);
+	}
+
+	mp = vnode_mountedhere(vp);
+	if (!mp)
+		error = EINVAL;
+	if (!error)
+		error = vfs_unmountbyfsid(&(vfs_statfs(mp)->f_fsid), flags, ctx);
+	if (!error)
+		np->n_trigseq++;
+	result = vfs_resolver_result(np->n_trigseq, error ? RESOLVER_ERROR : RESOLVER_UNRESOLVED, error);
+#ifdef NFS_TRIGGER_DEBUG
+	NP(np, "nfs trigger UNRESOLVE: %s %d, seq %d",
+		error ? "error" : "unresolved", error, np->n_trigseq);
+#endif
+	nfs_node_clear_busy(np);
+	return (result);
+}
+
+resolver_result_t
+nfs_mirror_mount_trigger_rearm(
+	vnode_t vp,
+	__unused int flags,
+	__unused void *data,
+	vfs_context_t ctx)
+{
+	nfsnode_t np = VTONFS(vp);
+	int error;
+	resolver_result_t result;
+
+	if ((error = nfs_node_set_busy(np, vfs_context_thread(ctx)))) {
+		result = vfs_resolver_result(np->n_trigseq, RESOLVER_ERROR, error);
+#ifdef NFS_TRIGGER_DEBUG
+		NP(np, "nfs trigger REARM: busy error %d, seq %d", error, np->n_trigseq);
+#endif
+		return (result);
+	}
+
+	np->n_trigseq++;
+	result = vfs_resolver_result(np->n_trigseq,
+			vnode_mountedhere(vp) ? RESOLVER_RESOLVED : RESOLVER_UNRESOLVED, 0);
+#ifdef NFS_TRIGGER_DEBUG
+	NP(np, "nfs trigger REARM: %s, seq %d",
+		vnode_mountedhere(vp) ? "resolved" : "unresolved", np->n_trigseq);
+#endif
+	nfs_node_clear_busy(np);
+	return (result);
+}
+
+/*
+ * Periodically attempt to unmount ephemeral (mirror) mounts in an attempt to limit
+ * the number of unused mounts.
+ */
+
+#define NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL	120	/* how often the harvester runs */
+struct nfs_ephemeral_mount_harvester_info {
+	fsid_t		fsid;		/* FSID that we need to try to unmount */
+	uint32_t	mountcount;	/* count of ephemeral mounts seen in scan */
+ };
+/* various globals for the harvester */
+static thread_call_t nfs_ephemeral_mount_harvester_timer = NULL;
+static int nfs_ephemeral_mount_harvester_on = 0;
+
+kern_return_t thread_terminate(thread_t);
+
+static int
+nfs_ephemeral_mount_harvester_callback(mount_t mp, void *arg)
+{
+	struct nfs_ephemeral_mount_harvester_info *hinfo = arg;
+	struct nfsmount *nmp;
+	struct timeval now;
+
+	if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
+		return (VFS_RETURNED);
+	nmp = VFSTONFS(mp);
+	if (!nmp || !NMFLAG(nmp, EPHEMERAL))
+		return (VFS_RETURNED);
+	hinfo->mountcount++;
+
+	/* avoid unmounting mounts that have been triggered within the last harvest interval */
+	microtime(&now);
+	if ((nmp->nm_mounttime >> 32) > ((uint32_t)now.tv_sec - NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL))
+		return (VFS_RETURNED);
+
+	if (hinfo->fsid.val[0] || hinfo->fsid.val[1]) {
+		/* attempt to unmount previously-found ephemeral mount */
+		vfs_unmountbyfsid(&hinfo->fsid, 0, vfs_context_kernel());
+		hinfo->fsid.val[0] = hinfo->fsid.val[1] = 0;
 	}
-	sbp = vfs_statfs(mp);
-	sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize;
-	sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize;
-	sbp->f_bfree = nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize;
-	sbp->f_bavail = nmp->nm_fsattr.nfsa_space_avail / sbp->f_bsize;
-	sbp->f_bused = (nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize) -
-			(nmp->nm_fsattr.nfsa_space_free / sbp->f_bsize);
-	sbp->f_files = nmp->nm_fsattr.nfsa_files_total;
-	sbp->f_ffree = nmp->nm_fsattr.nfsa_files_free;
-	sbp->f_iosize = nfs_iosize;
 
 	/*
-	 * Calculate the size used for I/O buffers.  Use the larger
-	 * of the two sizes to minimise NFS requests but make sure
-	 * that it is at least one VM page to avoid wasting buffer
-	 * space and to allow easy mmapping of I/O buffers.
-	 * The read/write RPC calls handle the splitting up of
-	 * buffers into multiple requests if the buffer size is
-	 * larger than the I/O size.
+	 * We can't call unmount here since we hold a mount iter ref
+	 * on mp so save its fsid for the next call iteration to unmount.
 	 */
-	iosize = max(nmp->nm_rsize, nmp->nm_wsize);
-	if (iosize < PAGE_SIZE)
-		iosize = PAGE_SIZE;
-	nmp->nm_biosize = trunc_page_32(iosize);
+	hinfo->fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0];
+	hinfo->fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1];
 
-	/*
-	 * V3 mounts give us a (relatively) reliable remote access(2)
-	 * call, so advertise the fact.
-	 *
-	 * XXX this may not be the best way to go, as the granularity
-	 *     offered isn't a good match to our needs.
-	 */
-	if (nmp->nm_vers != NFS_VER2)
-		vfs_setauthopaqueaccess(mp);
+	return (VFS_RETURNED);
+}
 
-	if (nmp->nm_flag & NFSMNT_LOCALLOCKS)
-		vfs_setlocklocal(nmp->nm_mountp);
-	if (!(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS)))
-		nfs_lockd_mount_change(1);
+/*
+ * Spawn a thread to do the ephemeral mount harvesting.
+ */
+static void
+nfs_ephemeral_mount_harvester_timer_func(void)
+{
+	thread_t thd;
 
-	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_state |= NFSSTA_MOUNTED;
-	lck_mtx_unlock(&nmp->nm_lock);
-	return (0);
-bad:
-	/* mark the socket for termination */
-	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
-	/* wait for any socket poking to complete */
-	while (nmp->nm_sockflags & NMSOCK_POKE)
-		msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
-	/* wait for the socket thread to terminate */
-	while (nmp->nm_sockthd) {
-		wakeup(&nmp->nm_sockthd);
-		msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts);
+	if (kernel_thread_start(nfs_ephemeral_mount_harvester, NULL, &thd) == KERN_SUCCESS)
+		thread_deallocate(thd);
+}
+
+/*
+ * Iterate all mounts looking for NFS ephemeral mounts to try to unmount.
+ */
+void
+nfs_ephemeral_mount_harvester(__unused void *arg, __unused wait_result_t wr)
+{
+	struct nfs_ephemeral_mount_harvester_info hinfo;
+	uint64_t deadline;
+
+	hinfo.mountcount = 0;
+	hinfo.fsid.val[0] = hinfo.fsid.val[1] = 0;
+	vfs_iterate(VFS_ITERATE_TAIL_FIRST, nfs_ephemeral_mount_harvester_callback, &hinfo);
+	if (hinfo.fsid.val[0] || hinfo.fsid.val[1]) {
+		/* attempt to unmount last found ephemeral mount */
+		vfs_unmountbyfsid(&hinfo.fsid, 0, vfs_context_kernel());
 	}
-	/* tear down the socket */
-	lck_mtx_unlock(&nmp->nm_lock);
-	nfs_disconnect(nmp);
-	if (nmp->nm_vers >= NFS_VER4) {
-		if (nmp->nm_cbid)
-			nfs4_mount_callback_shutdown(nmp);
-		if (nmp->nm_renew_timer) {
-			thread_call_cancel(nmp->nm_renew_timer);
-			thread_call_free(nmp->nm_renew_timer);
-		}
-		if (nmp->nm_longid) {
-			/* remove/deallocate the client ID data */
-			lck_mtx_lock(nfs_global_mutex);
-			TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link);
-			if (nmp->nm_longid->nci_id)
-				FREE(nmp->nm_longid->nci_id, M_TEMP);
-			FREE(nmp->nm_longid, M_TEMP);
-			lck_mtx_unlock(nfs_global_mutex);
-		}
-		if (IS_VALID_CRED(nmp->nm_mcred))
-			kauth_cred_unref(&nmp->nm_mcred);
+
+	lck_mtx_lock(nfs_global_mutex);
+	if (!hinfo.mountcount) {
+		/* no more ephemeral mounts - don't need timer */
+		nfs_ephemeral_mount_harvester_on = 0;
+	} else {
+		/* re-arm the timer */
+		clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
+		thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
+		nfs_ephemeral_mount_harvester_on = 1;
 	}
-	lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
-	FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
-	mbuf_freem(nam);
-	return (error);
+	lck_mtx_unlock(nfs_global_mutex);
+
+	/* thread done */
+	thread_terminate(current_thread());
 }
 
+/*
+ * Make sure the NFS ephemeral mount harvester timer is running.
+ */
 void
-nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo)
+nfs_ephemeral_mount_harvester_start(void)
+{
+	uint64_t deadline;
+
+	lck_mtx_lock(nfs_global_mutex);
+	if (nfs_ephemeral_mount_harvester_on) {
+		lck_mtx_unlock(nfs_global_mutex);
+		return;
+	}
+	if (nfs_ephemeral_mount_harvester_timer == NULL)
+		nfs_ephemeral_mount_harvester_timer = thread_call_allocate((thread_call_func_t)nfs_ephemeral_mount_harvester_timer_func, NULL);
+	clock_interval_to_deadline(NFS_EPHEMERAL_MOUNT_HARVEST_INTERVAL, NSEC_PER_SEC, &deadline);
+	thread_call_enter_delayed(nfs_ephemeral_mount_harvester_timer, deadline);
+	nfs_ephemeral_mount_harvester_on = 1;
+	lck_mtx_unlock(nfs_global_mutex);
+}
+
+#endif
+
+/*
+ * Send a MOUNT protocol MOUNT request to the server to get the initial file handle (and security).
+ */
+int
+nfs3_mount_rpc(struct nfsmount *nmp, struct sockaddr *sa, int sotype, int nfsvers, char *path, vfs_context_t ctx, int timeo, fhandle_t *fh, struct nfs_sec *sec)
 {
-	int error = 0, auth_len, slen;
+	int error = 0, slen, mntproto;
 	thread_t thd = vfs_context_thread(ctx);
 	kauth_cred_t cred = vfs_context_ucred(ctx);
-	char *path;
 	uint64_t xid = 0;
 	struct nfsm_chain nmreq, nmrep;
 	mbuf_t mreq;
-	uint32_t mntport = 0;
-	struct sockaddr *nam = mbuf_data(nmp->nm_nam);
-	struct sockaddr_in saddr;
-
-	bcopy(nam, &saddr, min(sizeof(saddr), nam->sa_len));
-	auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
-			nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
-			5 * NFSX_UNSIGNED;
+	uint32_t mntvers, mntport, val;
+	struct sockaddr_storage ss;
+	struct sockaddr *saddr = (struct sockaddr*)&ss;
+
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
-	/* send portmap request to get mountd port */
-	saddr.sin_port = htons(PMAPPORT);
-	nfsm_chain_build_alloc_init(error, &nmreq, 4*NFSX_UNSIGNED);
-	nfsm_chain_add_32(error, &nmreq, RPCPROG_MNT);
-	nfsm_chain_add_32(error, &nmreq, RPCMNT_VER1);
-	nfsm_chain_add_32(error, &nmreq, IPPROTO_UDP);
-	nfsm_chain_add_32(error, &nmreq, 0);
+	mntvers = (nfsvers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
+	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
+	sec->count = 0;
+
+	bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
+	if (saddr->sa_family == AF_INET) {
+		if (nmp->nm_mountport)
+			((struct sockaddr_in*)saddr)->sin_port = htons(nmp->nm_mountport);
+		mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+	} else {
+		if (nmp->nm_mountport)
+			((struct sockaddr_in6*)saddr)->sin6_port = htons(nmp->nm_mountport);
+		mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+	}
+
+	while (!mntport) {
+		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
+		nfsmout_if(error);
+		if (saddr->sa_family == AF_INET)
+			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+		else
+			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+		if (!mntport) {
+			/* if not found and TCP, then retry with UDP */
+			if (mntproto == IPPROTO_UDP) {
+				error = EPROGUNAVAIL;
+				break;
+			}
+			mntproto = IPPROTO_UDP;
+			bcopy(sa, saddr, min(sizeof(ss), sa->sa_len));
+		}
+	}
+	nfsmout_if(error || !mntport);
+
+	/* MOUNT protocol MOUNT request */
+	slen = strlen(path);
+	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
+	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfsm_rpchead2(SOCK_DGRAM, PMAPPROG, PMAPVERS, PMAPPROC_GETPORT,
-			RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
+	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			RPCPROG_MNT, mntvers, RPCMNT_MOUNT,
+			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
 	nfsmout_if(error);
 	nmreq.nmc_mhead = NULL;
-	error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep);
-	nfsmout_if(error);
-
-	/* grab mountd port from portmap response */
-	nfsm_chain_get_32(error, &nmrep, mntport);
+	error = nfs_aux_request(nmp, thd, saddr, NULL,
+			((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
+			mreq, R_XID32(xid), 1, timeo, &nmrep);
 	nfsmout_if(error);
+	nfsm_chain_get_32(error, &nmrep, val);
+	if (!error && val)
+		error = val;
+	nfsm_chain_get_fh(error, &nmrep, nfsvers, fh);
+	if (!error && (nfsvers > NFS_VER2)) {
+		sec->count = NX_MAX_SEC_FLAVORS;
+		error = nfsm_chain_get_secinfo(&nmrep, &sec->flavors[0], &sec->count);
+	}
+nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
-	xid = 0;
+	return (error);
+}
+
+
+/*
+ * Send a MOUNT protocol UNMOUNT request to tell the server we've unmounted it.
+ */
+void
+nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo)
+{
+	int error = 0, slen, mntproto;
+	thread_t thd = vfs_context_thread(ctx);
+	kauth_cred_t cred = vfs_context_ucred(ctx);
+	char *path;
+	uint64_t xid = 0;
+	struct nfsm_chain nmreq, nmrep;
+	mbuf_t mreq;
+	uint32_t mntvers, mntport;
+	struct sockaddr_storage ss;
+	struct sockaddr *saddr = (struct sockaddr*)&ss;
+
+	if (!nmp->nm_saddr)
+		return;
+
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
+
+	mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
+	mntproto = (NM_OMFLAG(nmp, MNTUDP) || (nmp->nm_sotype == SOCK_DGRAM)) ? IPPROTO_UDP : IPPROTO_TCP;
+	mntport = nmp->nm_mountport;
+
+	bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
+	if (saddr->sa_family == AF_INET)
+		((struct sockaddr_in*)saddr)->sin_port = htons(mntport);
+	else
+		((struct sockaddr_in6*)saddr)->sin6_port = htons(mntport);
+
+	while (!mntport) {
+		error = nfs_portmap_lookup(nmp, ctx, saddr, NULL, RPCPROG_MNT, mntvers, mntproto, timeo);
+  		nfsmout_if(error);
+		if (saddr->sa_family == AF_INET)
+			mntport = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+		else
+			mntport = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+		/* if not found and mntvers > VER1, then retry with VER1 */
+		if (!mntport) {
+			if (mntvers > RPCMNT_VER1) {
+				mntvers = RPCMNT_VER1;
+			} else if (mntproto == IPPROTO_TCP) {
+				mntproto = IPPROTO_UDP;
+				mntvers = (nmp->nm_vers == NFS_VER2) ? RPCMNT_VER1 : RPCMNT_VER3;
+			} else {
+				break;
+			}
+			bcopy(nmp->nm_saddr, saddr, min(sizeof(ss), nmp->nm_saddr->sa_len));
+		}
+	}
+	nfsmout_if(!mntport);
 
 	/* MOUNT protocol UNMOUNT request */
-	saddr.sin_port = htons(mntport);
 	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
 	while (*path && (*path != '/'))
 		path++;
 	slen = strlen(path);
 	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen));
-	nfsm_chain_add_string(error, &nmreq, path, slen);
+	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfsm_rpchead2(SOCK_DGRAM, RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT,
-			RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
+	error = nfsm_rpchead2(nmp, (mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT,
+			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
 	nfsmout_if(error);
 	nmreq.nmc_mhead = NULL;
-	error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 1, timeo, &nmrep);
+	error = nfs_aux_request(nmp, thd, saddr, NULL,
+		((mntproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM),
+		mreq, R_XID32(xid), 1, timeo, &nmrep);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	nfsm_chain_cleanup(&nmrep);
@@ -1949,15 +4063,15 @@ nfs_vfs_unmount(
 {
 	struct nfsmount *nmp;
 	vnode_t vp;
-	int error, flags = 0, docallback;
-	struct nfsreq *req, *treq;
-	struct nfs_reqqhead iodq;
+	int error, flags = 0;
 	struct timespec ts = { 1, 0 };
-	struct nfs_open_owner *noop, *nextnoop;
-	nfsnode_t np;
 
 	nmp = VFSTONFS(mp);
 	lck_mtx_lock(&nmp->nm_lock);
+	/*
+	 * Set the flag indicating that an unmount attempt is in progress.
+	 */
+	nmp->nm_state |= NFSSTA_UNMOUNTING;
 	/*
 	 * During a force unmount we want to...
 	 *   Mark that we are doing a force unmount.
@@ -1966,15 +4080,19 @@ nfs_vfs_unmount(
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 		nmp->nm_state |= NFSSTA_FORCE;
-		nmp->nm_flag |= NFSMNT_SOFT;
+		NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_SOFT);
 	}
+	/*
+	 * Wait for any in-progress monitored node scan to complete.
+	 */
+	while (nmp->nm_state & NFSSTA_MONITOR_SCAN)
+		msleep(&nmp->nm_state, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
 	/*
 	 * Goes something like this..
 	 * - Call vflush() to clear out vnodes for this file system,
 	 *   except for the swap files. Deal with them in 2nd pass.
 	 * - Decrement reference on the vnode representing remote root.
-	 * - Close the socket
-	 * - Free up the data structures
+	 * - Clean up the NFS mount structure.
 	 */
 	vp = NFSTOV(nmp->nm_dnp);
 	lck_mtx_unlock(&nmp->nm_lock);
@@ -1989,14 +4107,18 @@ nfs_vfs_unmount(
 		error = vflush(mp, NULLVP, flags); /* locks vp in the process */
 	} else {
 		if (vnode_isinuse(vp, 1))
-			return (EBUSY);
-		error = vflush(mp, vp, flags);
+			error = EBUSY;
+		else
+			error = vflush(mp, vp, flags);
 	}
-	if (error)
+	if (error) {
+		lck_mtx_lock(&nmp->nm_lock);
+		nmp->nm_state &= ~NFSSTA_UNMOUNTING;
+		lck_mtx_unlock(&nmp->nm_lock);
 		return (error);
+	}
 
 	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_state &= ~NFSSTA_MOUNTED;
 	nmp->nm_dnp = NULL;
 	lck_mtx_unlock(&nmp->nm_lock);
 
@@ -2010,26 +4132,86 @@ nfs_vfs_unmount(
 
 	vflush(mp, NULLVP, FORCECLOSE);
 
-	/*
-	 * Destroy any RPCSEC_GSS contexts
-	 */
-	if (!TAILQ_EMPTY(&nmp->nm_gsscl))
-		nfs_gss_clnt_ctx_unmount(nmp, mntflags);
+	nfs_mount_cleanup(nmp);
+	return (0);
+}
 
-	/* mark the socket for termination */
-	lck_mtx_lock(&nmp->nm_lock);
-	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
+/*
+ * cleanup/destroy NFS fs locations structure
+ */
+void
+nfs_fs_locations_cleanup(struct nfs_fs_locations *nfslsp)
+{
+	struct nfs_fs_location *fsl;
+	struct nfs_fs_server *fss;
+	struct nfs_fs_path *fsp;
+	uint32_t loc, serv, addr, comp;
+
+	/* free up fs locations */
+	if (!nfslsp->nl_numlocs || !nfslsp->nl_locations)
+		return;
+
+	for (loc = 0; loc < nfslsp->nl_numlocs; loc++) {
+		fsl = nfslsp->nl_locations[loc];
+		if (!fsl)
+			continue;
+		if ((fsl->nl_servcount > 0) && fsl->nl_servers) {
+			for (serv = 0; serv < fsl->nl_servcount; serv++) {
+				fss = fsl->nl_servers[serv];
+				if (!fss)
+					continue;
+				if ((fss->ns_addrcount > 0) && fss->ns_addresses) {
+					for (addr = 0; addr < fss->ns_addrcount; addr++)
+						FREE(fss->ns_addresses[addr], M_TEMP);
+					FREE(fss->ns_addresses, M_TEMP);
+				}
+				FREE(fss->ns_name, M_TEMP);
+				FREE(fss, M_TEMP);
+			}
+			FREE(fsl->nl_servers, M_TEMP);
+		}
+		fsp = &fsl->nl_path;
+		if (fsp->np_compcount && fsp->np_components) {
+			for (comp = 0; comp < fsp->np_compcount; comp++)
+				if (fsp->np_components[comp])
+					FREE(fsp->np_components[comp], M_TEMP);
+			FREE(fsp->np_components, M_TEMP);
+		}
+		FREE(fsl, M_TEMP);
+	}
+	FREE(nfslsp->nl_locations, M_TEMP);
+	nfslsp->nl_numlocs = 0;
+	nfslsp->nl_locations = NULL;
+}
+
+/*
+ * cleanup/destroy an nfsmount
+ */
+void
+nfs_mount_cleanup(struct nfsmount *nmp)
+{
+	struct nfsreq *req, *treq;
+	struct nfs_reqqhead iodq;
+	struct timespec ts = { 1, 0 };
+	struct nfs_open_owner *noop, *nextnoop;
+	nfsnode_t np;
+	int docallback;
 
 	/* stop callbacks */
-	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_cbid)
+	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid)
 		nfs4_mount_callback_shutdown(nmp);
 
-	/* wait for any socket poking to complete */
-	while (nmp->nm_sockflags & NMSOCK_POKE)
-		msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts);
+	/* Destroy any RPCSEC_GSS contexts */
+	if (!TAILQ_EMPTY(&nmp->nm_gsscl))
+		nfs_gss_clnt_ctx_unmount(nmp);
+
+	/* mark the socket for termination */
+	lck_mtx_lock(&nmp->nm_lock);
+	nmp->nm_sockflags |= NMSOCK_UNMOUNT;
 
 	/* Have the socket thread send the unmount RPC, if requested/appropriate. */
-	if ((nmp->nm_vers < NFS_VER4) && !(mntflags & MNT_FORCE) && (nmp->nm_flag & NFSMNT_CALLUMNT))
+	if ((nmp->nm_vers < NFS_VER4) && (nmp->nm_state & NFSSTA_MOUNTED) &&
+	    !(nmp->nm_state & NFSSTA_FORCE) && NMFLAG(nmp, CALLUMNT))
 		nfs_mount_sock_thread_wake(nmp);
 
 	/* wait for the socket thread to terminate */
@@ -2043,15 +4225,16 @@ nfs_vfs_unmount(
 	/* tear down the socket */
 	nfs_disconnect(nmp);
 
-	vfs_setfsprivate(mp, NULL);
+	if (nmp->nm_mountp)
+		vfs_setfsprivate(nmp->nm_mountp, NULL);
 
 	lck_mtx_lock(&nmp->nm_lock);
 
-	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_cbid) {
-		/* clear out any pending recall requests */
-		while ((np = TAILQ_FIRST(&nmp->nm_recallq))) {
-			TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink);
-			np->n_dlink.tqe_next = NFSNOLIST;
+	if ((nmp->nm_vers >= NFS_VER4) && !NMFLAG(nmp, NOCALLBACK) && nmp->nm_cbid) {
+		/* clear out any pending delegation return requests */
+		while ((np = TAILQ_FIRST(&nmp->nm_dreturnq))) {
+			TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn);
+			np->n_dreturn.tqe_next = NFSNOLIST;
 		}
 	}
 
@@ -2061,11 +4244,23 @@ nfs_vfs_unmount(
 		thread_call_free(nmp->nm_renew_timer);
 	}
 
-	mbuf_freem(nmp->nm_nam);
+	if (nmp->nm_saddr)
+		FREE(nmp->nm_saddr, M_SONAME);
+	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_rqsaddr)
+		FREE(nmp->nm_rqsaddr, M_SONAME);
 	lck_mtx_unlock(&nmp->nm_lock);
 
-	if ((nmp->nm_vers < NFS_VER4) && !(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS)))
-		nfs_lockd_mount_change(-1);
+	if (nmp->nm_state & NFSSTA_MOUNTED)
+		switch (nmp->nm_lockmode) {
+		case NFS_LOCK_MODE_DISABLED:
+		case NFS_LOCK_MODE_LOCAL:
+			break;
+		case NFS_LOCK_MODE_ENABLED:
+		default:
+			if (nmp->nm_vers <= NFS_VER3)
+				nfs_lockd_mount_unregister(nmp);
+			break;
+		}
 
 	if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) {
 		/* remove/deallocate the client ID data */
@@ -2126,24 +4321,41 @@ nfs_vfs_unmount(
 			req->r_callback.rcb_func(req);
 	}
 
-	/* clean up open owner list */
+	/* clean up common state */
+	lck_mtx_lock(&nmp->nm_lock);
+ 	while ((np = LIST_FIRST(&nmp->nm_monlist))) {
+ 		LIST_REMOVE(np, n_monlink);
+ 		np->n_monlink.le_next = NFSNOLIST;
+ 	}
+	TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) {
+		TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link);
+		noop->noo_flags &= ~NFS_OPEN_OWNER_LINK;
+		if (noop->noo_refcnt)
+			continue;
+		nfs_open_owner_destroy(noop);
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	/* clean up NFSv4 state */
 	if (nmp->nm_vers >= NFS_VER4) {
 		lck_mtx_lock(&nmp->nm_lock);
-		TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) {
-			TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link);
-			noop->noo_flags &= ~NFS_OPEN_OWNER_LINK;
-			if (noop->noo_refcnt)
-				continue;
-			nfs_open_owner_destroy(noop);
+		while ((np = TAILQ_FIRST(&nmp->nm_delegations))) {
+			TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink);
+			np->n_dlink.tqe_next = NFSNOLIST;
 		}
 		lck_mtx_unlock(&nmp->nm_lock);
-		if (IS_VALID_CRED(nmp->nm_mcred))
-			kauth_cred_unref(&nmp->nm_mcred);
 	}
+	if (IS_VALID_CRED(nmp->nm_mcred))
+		kauth_cred_unref(&nmp->nm_mcred);
 
+	nfs_fs_locations_cleanup(&nmp->nm_locations);
+
+	if (nmp->nm_args)
+		xb_free(nmp->nm_args);
 	lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp);
+	if (nmp->nm_fh)
+		FREE(nmp->nm_fh, M_TEMP);
 	FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
-	return (0);
 }
 
 /*
@@ -2192,8 +4404,8 @@ nfs_vfs_quotactl(
 int
 nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb)
 {
-	int error = 0, auth_len, slen, timeo;
-	int rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER;
+	int error = 0, slen, timeo;
+	int rqport = 0, rqproto, rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER;
 	thread_t thd = vfs_context_thread(ctx);
 	kauth_cred_t cred = vfs_context_ucred(ctx);
 	char *path;
@@ -2201,70 +4413,70 @@ nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
 	struct nfsm_chain nmreq, nmrep;
 	mbuf_t mreq;
 	uint32_t val = 0, bsize = 0;
-	struct sockaddr *nam = mbuf_data(nmp->nm_nam);
-	struct sockaddr_in saddr;
+	struct sockaddr *rqsaddr;
 	struct timeval now;
 
-	bcopy(nam, &saddr, min(sizeof(saddr), nam->sa_len));
-	auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
-			nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
-			5 * NFSX_UNSIGNED;
-	timeo = (nmp->nm_flag & NFSMNT_SOFT) ? 10 : 60;
-	nfsm_chain_null(&nmreq);
-	nfsm_chain_null(&nmrep);
+	if (!nmp->nm_saddr)
+		return (ENXIO);
 
-	/* check if we have a recently cached rquota port */
-	if (nmp->nm_rqport) {
-		microuptime(&now);
-		if ((nmp->nm_rqportstamp + 60) >= (uint32_t)now.tv_sec)
-			goto got_rqport;
-	}
+	if (NMFLAG(nmp, NOQUOTA))
+		return (ENOTSUP);
 
-	/* send portmap request to get rquota port */
-	saddr.sin_port = htons(PMAPPORT);
-	nfsm_chain_build_alloc_init(error, &nmreq, 4*NFSX_UNSIGNED);
-	nfsm_chain_add_32(error, &nmreq, RPCPROG_RQUOTA);
-	nfsm_chain_add_32(error, &nmreq, rqvers);
-	nfsm_chain_add_32(error, &nmreq, IPPROTO_UDP);
-	nfsm_chain_add_32(error, &nmreq, 0);
-	nfsm_chain_build_done(error, &nmreq);
-	nfsmout_if(error);
-	error = nfsm_rpchead2(SOCK_DGRAM, PMAPPROG, PMAPVERS, PMAPPROC_GETPORT,
-			RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
-	nfsmout_if(error);
-	nmreq.nmc_mhead = NULL;
-	error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep);
-	nfsmout_if(error);
+	if (!nmp->nm_rqsaddr)
+		MALLOC(nmp->nm_rqsaddr, struct sockaddr *, sizeof(struct sockaddr_storage), M_SONAME, M_WAITOK|M_ZERO);
+	if (!nmp->nm_rqsaddr)
+		return (ENOMEM);
+	rqsaddr = nmp->nm_rqsaddr;
+	if (rqsaddr->sa_family == AF_INET6)
+		rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
+	else if (rqsaddr->sa_family == AF_INET)
+		rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
 
-	/* grab rquota port from portmap response */
-	nfsm_chain_get_32(error, &nmrep, val);
-	nfsmout_if(error);
-	nmp->nm_rqport = val;
+	timeo = NMFLAG(nmp, SOFT) ? 10 : 60;
+	rqproto = IPPROTO_UDP; /* XXX should prefer TCP if mount is TCP */
+
+	/* check if we have a recently cached rquota port */
 	microuptime(&now);
-	nmp->nm_rqportstamp = now.tv_sec;
-	nfsm_chain_cleanup(&nmreq);
-	nfsm_chain_cleanup(&nmrep);
-	xid = 0;
+	if (!rqport || ((nmp->nm_rqsaddrstamp + 60) >= (uint32_t)now.tv_sec)) {
+		/* send portmap request to get rquota port */
+		bcopy(nmp->nm_saddr, rqsaddr, min(sizeof(struct sockaddr_storage), nmp->nm_saddr->sa_len));
+		error = nfs_portmap_lookup(nmp, ctx, rqsaddr, NULL, RPCPROG_RQUOTA, rqvers, rqproto, timeo);
+		if (error)
+			return (error);
+		if (rqsaddr->sa_family == AF_INET6)
+			rqport = ntohs(((struct sockaddr_in6*)rqsaddr)->sin6_port);
+		else if (rqsaddr->sa_family == AF_INET)
+			rqport = ntohs(((struct sockaddr_in*)rqsaddr)->sin_port);
+		else
+			return (EIO);
+		if (!rqport)
+			return (ENOTSUP);
+		microuptime(&now);
+		nmp->nm_rqsaddrstamp = now.tv_sec;
+	}
 
-got_rqport:
 	/* rquota request */
-	saddr.sin_port = htons(nmp->nm_rqport);
+	nfsm_chain_null(&nmreq);
+	nfsm_chain_null(&nmrep);
 	path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0];
 	while (*path && (*path != '/'))
 		path++;
 	slen = strlen(path);
 	nfsm_chain_build_alloc_init(error, &nmreq, 3 * NFSX_UNSIGNED + nfsm_rndup(slen));
-	nfsm_chain_add_string(error, &nmreq, path, slen);
+	nfsm_chain_add_name(error, &nmreq, path, slen, nmp);
 	if (type == GRPQUOTA)
 		nfsm_chain_add_32(error, &nmreq, type);
 	nfsm_chain_add_32(error, &nmreq, id);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfsm_rpchead2(SOCK_DGRAM, RPCPROG_RQUOTA, rqvers, RPCRQUOTA_GET,
-			RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
+	error = nfsm_rpchead2(nmp, (rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			RPCPROG_RQUOTA, rqvers, RPCRQUOTA_GET,
+			RPCAUTH_SYS, cred, NULL, nmreq.nmc_mhead, &xid, &mreq);
 	nfsmout_if(error);
 	nmreq.nmc_mhead = NULL;
-	error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep);
+	error = nfs_aux_request(nmp, thd, rqsaddr, NULL,
+			(rqproto == IPPROTO_UDP) ? SOCK_DGRAM : SOCK_STREAM,
+			mreq, R_XID32(xid), 0, timeo, &nmrep);
 	nfsmout_if(error);
 
 	/* parse rquota response */
@@ -2311,6 +4523,7 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
 	uint32_t bitmap[NFS_ATTR_BITMAP_LEN];
 	thread_t thd = vfs_context_thread(ctx);
 	kauth_cred_t cred = vfs_context_ucred(ctx);
+	struct nfsreq_secinfo_args si;
 
 	if (type != USRQUOTA)  /* NFSv4 only supports user quotas */
 		return (ENOTSUP);
@@ -2326,12 +4539,13 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
 	 * an effective uid that matches the given uid.
 	 */
 	if (id != kauth_cred_getuid(cred)) {
-		struct ucred temp_cred;
-		bzero(&temp_cred, sizeof(temp_cred));
-		temp_cred.cr_uid = id;
-		temp_cred.cr_ngroups = cred->cr_ngroups;
-		bcopy(cred->cr_groups, temp_cred.cr_groups, sizeof(temp_cred.cr_groups));
-		cred = kauth_cred_create(&temp_cred);
+		struct posix_cred temp_pcred;
+		posix_cred_t pcred = posix_cred_get(cred);
+		bzero(&temp_pcred, sizeof(temp_pcred));
+		temp_pcred.cr_uid = id;
+		temp_pcred.cr_ngroups = pcred->cr_ngroups;
+		bcopy(pcred->cr_groups, temp_pcred.cr_groups, sizeof(temp_pcred.cr_groups));
+		cred = posix_cred_create(&temp_pcred);
 		if (!IS_VALID_CRED(cred))
 			return (ENOMEM);
 	} else {
@@ -2347,6 +4561,7 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
 		return(error);
 	}
 
+	NFSREQ_SECINFO_SET(&si, np, NULL, 0, NULL, 0);
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -2363,19 +4578,18 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struc
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_HARD);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_AVAIL_SOFT);
 	NFS_BITMAP_SET(bitmap, NFS_FATTR_QUOTA_USED);
-	nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap,
-		NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr);
+	nfsm_chain_add_bitmap_supported(error, &nmreq, bitmap, nmp, NULL);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsm_assert(error, (numops == 0), EPROTO);
 	nfsmout_if(error);
-	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status);
+	error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, &si, 0, &nmrep, &xid, &status);
 	nfsm_chain_skip_tag(error, &nmrep);
 	nfsm_chain_get_32(error, &nmrep, numops);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH);
 	nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR);
 	nfsm_assert(error, NFSTONMP(np), ENXIO);
 	nfsmout_if(error);
-	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, dqb);
+	error = nfs4_parsefattr(&nmrep, NULL, NULL, NULL, dqb, NULL);
 	nfsmout_if(error);
 	nfsm_assert(error, NFSTONMP(np), ENXIO);
 nfsmout:
@@ -2391,7 +4605,7 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c
 {
 	struct nfsmount *nmp;
 	int cmd, type, error, nfsvers;
-	uid_t ruid = vfs_context_ucred(ctx)->cr_ruid;
+	uid_t euid = kauth_cred_getuid(vfs_context_ucred(ctx));
 	struct dqblk *dqb = (struct dqblk*)datap;
 
 	if (!(nmp = VFSTONFS(mp)))
@@ -2399,7 +4613,7 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c
 	nfsvers = nmp->nm_vers;
 
 	if (uid == ~0U)
-		uid = ruid;
+		uid = euid;
 
 	/* we can only support Q_GETQUOTA */
 	cmd = cmds >> SUBCMDSHIFT;
@@ -2420,7 +4634,7 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c
 	type = cmds & SUBCMDMASK;
 	if ((u_int)type >= MAXQUOTAS)
 		return (EINVAL);
-	if ((uid != ruid) && ((error = vfs_context_suser(ctx))))
+	if ((uid != euid) && ((error = vfs_context_suser(ctx))))
 		return (error);
 
 	if (vfs_busy(mp, LK_NOWAIT))
@@ -2438,7 +4652,7 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c
 int nfs_sync_callout(vnode_t, void *);
 
 struct nfs_sync_cargs {
-	thread_t	thd;
+	vfs_context_t	ctx;
 	int		waitfor;
 	int		error;
 };
@@ -2447,16 +4661,22 @@ int
 nfs_sync_callout(vnode_t vp, void *arg)
 {
 	struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg;
+	nfsnode_t np = VTONFS(vp);
 	int error;
 
-	if (LIST_EMPTY(&VTONFS(vp)->n_dirtyblkhd))
+	if (np->n_flag & NREVOKE) {
+		vn_revoke(vp, REVOKEALL, cargs->ctx);
+		return (VNODE_RETURNED);
+	}
+
+	if (LIST_EMPTY(&np->n_dirtyblkhd))
 		return (VNODE_RETURNED);
-	if (VTONFS(vp)->n_wrbusy > 0)
+	if (np->n_wrbusy > 0)
 		return (VNODE_RETURNED);
-	if (VTONFS(vp)->n_bflag & (NBFLUSHINPROG|NBINVALINPROG))
+	if (np->n_bflag & (NBFLUSHINPROG|NBINVALINPROG))
 		return (VNODE_RETURNED);
 
-	error = nfs_flush(VTONFS(vp), cargs->waitfor, cargs->thd, 0);
+	error = nfs_flush(np, cargs->waitfor, vfs_context_thread(cargs->ctx), 0);
 	if (error)
 		cargs->error = error;
 
@@ -2469,7 +4689,7 @@ nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx)
 	struct nfs_sync_cargs cargs;
 
 	cargs.waitfor = waitfor;
-	cargs.thd = vfs_context_thread(ctx);
+	cargs.ctx = ctx;
 	cargs.error = 0;
 
 	vnode_iterate(mp, 0, nfs_sync_callout, &cargs);
@@ -2538,6 +4758,290 @@ nfs_vfs_start(
 	return (0);
 }
 
+/*
+ * Build the mount info buffer for NFS_MOUNTINFO.
+ */
+int
+nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb)
+{
+	struct xdrbuf xbinfo, xborig;
+	char sotype[6];
+	uint32_t origargsvers, origargslength;
+	uint32_t infolength_offset, curargsopaquelength_offset, curargslength_offset, attrslength_offset, curargs_end_offset, end_offset;
+	uint32_t miattrs[NFS_MIATTR_BITMAP_LEN];
+	uint32_t miflags_mask[NFS_MIFLAG_BITMAP_LEN];
+	uint32_t miflags[NFS_MIFLAG_BITMAP_LEN];
+	uint32_t mattrs[NFS_MATTR_BITMAP_LEN];
+	uint32_t mflags_mask[NFS_MFLAG_BITMAP_LEN];
+	uint32_t mflags[NFS_MFLAG_BITMAP_LEN];
+	uint32_t loc, serv, addr, comp;
+	int i, timeo, error = 0;
+
+	/* set up mount info attr and flag bitmaps */
+	NFS_BITMAP_ZERO(miattrs, NFS_MIATTR_BITMAP_LEN);
+	NFS_BITMAP_SET(miattrs, NFS_MIATTR_FLAGS);
+	NFS_BITMAP_SET(miattrs, NFS_MIATTR_ORIG_ARGS);
+	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_ARGS);
+	NFS_BITMAP_SET(miattrs, NFS_MIATTR_CUR_LOC_INDEX);
+	NFS_BITMAP_ZERO(miflags_mask, NFS_MIFLAG_BITMAP_LEN);
+	NFS_BITMAP_ZERO(miflags, NFS_MIFLAG_BITMAP_LEN);
+	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_DEAD);
+	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_NOTRESP);
+	NFS_BITMAP_SET(miflags_mask, NFS_MIFLAG_RECOVERY);
+	if (nmp->nm_state & NFSSTA_DEAD)
+		NFS_BITMAP_SET(miflags, NFS_MIFLAG_DEAD);
+	if ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO)) ||
+	    ((nmp->nm_state & NFSSTA_LOCKTIMEO) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED)))
+		NFS_BITMAP_SET(miflags, NFS_MIFLAG_NOTRESP);
+	if (nmp->nm_state & NFSSTA_RECOVER)
+		NFS_BITMAP_SET(miflags, NFS_MIFLAG_RECOVERY);
+
+	/* get original mount args length */
+	xb_init_buffer(&xborig, nmp->nm_args, 2*XDRWORD);
+	xb_get_32(error, &xborig, origargsvers); /* version */
+	xb_get_32(error, &xborig, origargslength); /* args length */
+	nfsmerr_if(error);
+
+	/* set up current mount attributes bitmap */
+	NFS_BITMAP_ZERO(mattrs, NFS_MATTR_BITMAP_LEN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FLAGS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_VERSION);
+	if (nmp->nm_vers >= NFS_VER4)
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_MINOR_VERSION);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_READ_SIZE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_WRITE_SIZE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_READDIR_SIZE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_READAHEAD);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MIN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_REG_MAX);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MIN);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_ATTRCACHE_DIR_MAX);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_LOCK_MODE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_SECURITY);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MAX_GROUP_LIST);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_SOCKET_TYPE);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_NFS_PORT);
+	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_MOUNT_PORT);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_REQUEST_TIMEOUT);
+	if (NMFLAG(nmp, SOFT))
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_SOFT_RETRY_COUNT);
+	if (nmp->nm_deadtimeout)
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_DEAD_TIMEOUT);
+	if (nmp->nm_fh)
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_FH);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_FS_LOCATIONS);
+	NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS);
+	if (origargsvers < NFS_ARGSVERSION_XDR)
+		NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM);
+
+	/* set up current mount flags bitmap */
+	/* first set the flags that we will be setting - either on OR off */
+	NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_SOFT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_INTR);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RESVPORT);
+	if (nmp->nm_sotype == SOCK_DGRAM)
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCONNECT);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_DUMBTIMER);
+	if (nmp->nm_vers < NFS_VER4)
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_CALLUMNT);
+	if (nmp->nm_vers >= NFS_VER3)
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_RDIRPLUS);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONEGNAMECACHE);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MUTEJUKEBOX);
+	if (nmp->nm_vers >= NFS_VER4) {
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_EPHEMERAL);
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOCALLBACK);
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NONAMEDATTR);
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOACL);
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_ACLONLY);
+	}
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NFC);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_NOQUOTA);
+	if (nmp->nm_vers < NFS_VER4)
+		NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTUDP);
+	NFS_BITMAP_SET(mflags_mask, NFS_MFLAG_MNTQUICK);
+	/* now set the flags that should be set */
+	NFS_BITMAP_ZERO(mflags, NFS_MFLAG_BITMAP_LEN);
+	if (NMFLAG(nmp, SOFT))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_SOFT);
+	if (NMFLAG(nmp, INTR))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_INTR);
+	if (NMFLAG(nmp, RESVPORT))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_RESVPORT);
+	if ((nmp->nm_sotype == SOCK_DGRAM) && NMFLAG(nmp, NOCONNECT))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCONNECT);
+	if (NMFLAG(nmp, DUMBTIMER))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_DUMBTIMER);
+	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, CALLUMNT))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_CALLUMNT);
+	if ((nmp->nm_vers >= NFS_VER3) && NMFLAG(nmp, RDIRPLUS))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_RDIRPLUS);
+	if (NMFLAG(nmp, NONEGNAMECACHE))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NONEGNAMECACHE);
+	if (NMFLAG(nmp, MUTEJUKEBOX))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_MUTEJUKEBOX);
+	if (nmp->nm_vers >= NFS_VER4) {
+		if (NMFLAG(nmp, EPHEMERAL))
+			NFS_BITMAP_SET(mflags, NFS_MFLAG_EPHEMERAL);
+		if (NMFLAG(nmp, NOCALLBACK))
+			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOCALLBACK);
+		if (NMFLAG(nmp, NONAMEDATTR))
+			NFS_BITMAP_SET(mflags, NFS_MFLAG_NONAMEDATTR);
+		if (NMFLAG(nmp, NOACL))
+			NFS_BITMAP_SET(mflags, NFS_MFLAG_NOACL);
+		if (NMFLAG(nmp, ACLONLY))
+			NFS_BITMAP_SET(mflags, NFS_MFLAG_ACLONLY);
+	}
+	if (NMFLAG(nmp, NFC))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NFC);
+	if (NMFLAG(nmp, NOQUOTA) || ((nmp->nm_vers >= NFS_VER4) &&
+	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_HARD) &&
+	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_AVAIL_SOFT) &&
+	    !NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_supp_attr, NFS_FATTR_QUOTA_USED)))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_NOQUOTA);
+	if ((nmp->nm_vers < NFS_VER4) && NMFLAG(nmp, MNTUDP))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTUDP);
+	if (NMFLAG(nmp, MNTQUICK))
+		NFS_BITMAP_SET(mflags, NFS_MFLAG_MNTQUICK);
+
+	/* assemble info buffer: */
+	xb_init_buffer(&xbinfo, NULL, 0);
+	xb_add_32(error, &xbinfo, NFS_MOUNT_INFO_VERSION);
+	infolength_offset = xb_offset(&xbinfo);
+	xb_add_32(error, &xbinfo, 0);
+	xb_add_bitmap(error, &xbinfo, miattrs, NFS_MIATTR_BITMAP_LEN);
+	xb_add_bitmap(error, &xbinfo, miflags, NFS_MIFLAG_BITMAP_LEN);
+	xb_add_32(error, &xbinfo, origargslength);
+	if (!error)
+		error = xb_add_bytes(&xbinfo, nmp->nm_args, origargslength, 0);
+
+	/* the opaque byte count for the current mount args values: */
+	curargsopaquelength_offset = xb_offset(&xbinfo);
+	xb_add_32(error, &xbinfo, 0);
+
+	/* Encode current mount args values */
+	xb_add_32(error, &xbinfo, NFS_ARGSVERSION_XDR);
+	curargslength_offset = xb_offset(&xbinfo);
+	xb_add_32(error, &xbinfo, 0);
+	xb_add_32(error, &xbinfo, NFS_XDRARGS_VERSION_0);
+	xb_add_bitmap(error, &xbinfo, mattrs, NFS_MATTR_BITMAP_LEN);
+	attrslength_offset = xb_offset(&xbinfo);
+	xb_add_32(error, &xbinfo, 0);
+	xb_add_bitmap(error, &xbinfo, mflags_mask, NFS_MFLAG_BITMAP_LEN);
+	xb_add_bitmap(error, &xbinfo, mflags, NFS_MFLAG_BITMAP_LEN);
+	xb_add_32(error, &xbinfo, nmp->nm_vers);		/* NFS_VERSION */
+	if (nmp->nm_vers >= NFS_VER4)
+		xb_add_32(error, &xbinfo, 0);			/* NFS_MINOR_VERSION */
+	xb_add_32(error, &xbinfo, nmp->nm_rsize);		/* READ_SIZE */
+	xb_add_32(error, &xbinfo, nmp->nm_wsize);		/* WRITE_SIZE */
+	xb_add_32(error, &xbinfo, nmp->nm_readdirsize);		/* READDIR_SIZE */
+	xb_add_32(error, &xbinfo, nmp->nm_readahead);		/* READAHEAD */
+	xb_add_32(error, &xbinfo, nmp->nm_acregmin);		/* ATTRCACHE_REG_MIN */
+	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MIN */
+	xb_add_32(error, &xbinfo, nmp->nm_acregmax);		/* ATTRCACHE_REG_MAX */
+	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_REG_MAX */
+	xb_add_32(error, &xbinfo, nmp->nm_acdirmin);		/* ATTRCACHE_DIR_MIN */
+	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MIN */
+	xb_add_32(error, &xbinfo, nmp->nm_acdirmax);		/* ATTRCACHE_DIR_MAX */
+	xb_add_32(error, &xbinfo, 0);				/* ATTRCACHE_DIR_MAX */
+	xb_add_32(error, &xbinfo, nmp->nm_lockmode);		/* LOCK_MODE */
+	if (nmp->nm_sec.count) {
+		xb_add_32(error, &xbinfo, nmp->nm_sec.count);		/* SECURITY */
+		nfsmerr_if(error);
+		for (i=0; i < nmp->nm_sec.count; i++)
+			xb_add_32(error, &xbinfo, nmp->nm_sec.flavors[i]);
+	} else if (nmp->nm_servsec.count) {
+		xb_add_32(error, &xbinfo, nmp->nm_servsec.count);	/* SECURITY */
+		nfsmerr_if(error);
+		for (i=0; i < nmp->nm_servsec.count; i++)
+			xb_add_32(error, &xbinfo, nmp->nm_servsec.flavors[i]);
+	} else {
+		xb_add_32(error, &xbinfo, 1);				/* SECURITY */
+		xb_add_32(error, &xbinfo, nmp->nm_auth);
+	}
+	xb_add_32(error, &xbinfo, nmp->nm_numgrps);		/* MAX_GROUP_LIST */
+	nfsmerr_if(error);
+	snprintf(sotype, sizeof(sotype), "%s%s", (nmp->nm_sotype == SOCK_DGRAM) ? "udp" : "tcp",
+		nmp->nm_sofamily ? (nmp->nm_sofamily == AF_INET) ? "4" : "6" : "");
+	xb_add_string(error, &xbinfo, sotype, strlen(sotype));	/* SOCKET_TYPE */
+	xb_add_32(error, &xbinfo, ntohs(((struct sockaddr_in*)nmp->nm_saddr)->sin_port)); /* NFS_PORT */
+	if ((nmp->nm_vers < NFS_VER4) && nmp->nm_mountport)
+		xb_add_32(error, &xbinfo, nmp->nm_mountport);	/* MOUNT_PORT */
+	timeo = (nmp->nm_timeo * 10) / NFS_HZ;
+	xb_add_32(error, &xbinfo, timeo/10);			/* REQUEST_TIMEOUT */
+	xb_add_32(error, &xbinfo, (timeo%10)*100000000);	/* REQUEST_TIMEOUT */
+	if (NMFLAG(nmp, SOFT))
+		xb_add_32(error, &xbinfo, nmp->nm_retry);	/* SOFT_RETRY_COUNT */
+	if (nmp->nm_deadtimeout) {
+		xb_add_32(error, &xbinfo, nmp->nm_deadtimeout);	/* DEAD_TIMEOUT */
+		xb_add_32(error, &xbinfo, 0);			/* DEAD_TIMEOUT */
+	}
+	if (nmp->nm_fh)
+		xb_add_fh(error, &xbinfo, &nmp->nm_fh->fh_data[0], nmp->nm_fh->fh_len); /* FH */
+	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_numlocs);			/* FS_LOCATIONS */
+	for (loc = 0; !error && (loc < nmp->nm_locations.nl_numlocs); loc++) {
+		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servcount);
+		for (serv = 0; !error && (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount); serv++) {
+			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name,
+				strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name));
+			xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount);
+			for (addr = 0; !error && (addr < nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount); addr++)
+				xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr],
+					strlen(nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addresses[addr]));
+			xb_add_32(error, &xbinfo, 0); /* empty server info */
+		}
+		xb_add_32(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount);
+		for (comp = 0; !error && (comp < nmp->nm_locations.nl_locations[loc]->nl_path.np_compcount); comp++)
+			xb_add_string(error, &xbinfo, nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp],
+				strlen(nmp->nm_locations.nl_locations[loc]->nl_path.np_components[comp]));
+		xb_add_32(error, &xbinfo, 0); /* empty fs location info */
+	}
+	xb_add_32(error, &xbinfo, vfs_flags(nmp->nm_mountp));		/* MNTFLAGS */
+	if (origargsvers < NFS_ARGSVERSION_XDR)
+		xb_add_string(error, &xbinfo, vfs_statfs(nmp->nm_mountp)->f_mntfromname,
+			strlen(vfs_statfs(nmp->nm_mountp)->f_mntfromname));	/* MNTFROM */
+	curargs_end_offset = xb_offset(&xbinfo);
+
+	/* NFS_MIATTR_CUR_LOC_INDEX */
+	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_flags);
+	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_loc);
+	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_serv);
+	xb_add_32(error, &xbinfo, nmp->nm_locations.nl_current.nli_addr);
+
+	xb_build_done(error, &xbinfo);
+
+	/* update opaque counts */
+	end_offset = xb_offset(&xbinfo);
+	if (!error) {
+		error = xb_seek(&xbinfo, attrslength_offset);
+		xb_add_32(error, &xbinfo, curargs_end_offset - attrslength_offset - XDRWORD/*don't include length field*/);
+	}
+	if (!error) {
+		error = xb_seek(&xbinfo, curargslength_offset);
+		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
+	}
+	if (!error) {
+		error = xb_seek(&xbinfo, curargsopaquelength_offset);
+		xb_add_32(error, &xbinfo, curargs_end_offset - curargslength_offset + XDRWORD/*version*/);
+	}
+	if (!error) {
+		error = xb_seek(&xbinfo, infolength_offset);
+		xb_add_32(error, &xbinfo, end_offset - infolength_offset + XDRWORD/*version*/);
+	}
+	nfsmerr_if(error);
+
+	/* copy result xdrbuf to caller */
+	*xb = xbinfo;
+
+	/* and mark the local copy as not needing cleanup */
+	xbinfo.xb_flags &= ~XB_CLEANUP;
+nfsmerr:
+	xb_cleanup(&xbinfo);
+	return (error);
+}
+
 /*
  * Do that sysctl thang...
  */
@@ -2552,6 +5056,8 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 	struct nfsmount *nmp = NULL;
 	struct vfsquery vq;
 	boolean_t is_64_bit;
+	fsid_t fsid;
+	struct xdrbuf xb;
 #if NFSSERVER
 	struct nfs_exportfs *nxfs;
 	struct nfs_export *nx;
@@ -2622,6 +5128,32 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 		if (newp)
 			return copyin(newp, &nfsstats, sizeof nfsstats);
 		return (0);
+	case NFS_MOUNTINFO:
+		/* read in the fsid */
+		if (*oldlenp < sizeof(fsid))
+			return (EINVAL);
+		if ((error = copyin(oldp, &fsid, sizeof(fsid))))
+			return (error);
+		/* swizzle it back to host order */
+		fsid.val[0] = ntohl(fsid.val[0]);
+		fsid.val[1] = ntohl(fsid.val[1]);
+		/* find mount and make sure it's NFS */
+		if (((mp = vfs_getvfs(&fsid))) == NULL)
+			return (ENOENT);
+		if (strcmp(mp->mnt_vfsstat.f_fstypename, "nfs"))
+			return (EINVAL);
+		if (((nmp = VFSTONFS(mp))) == NULL)
+			return (ENOENT);
+		xb_init(&xb, 0);
+		if ((error = nfs_mountinfo_assemble(nmp, &xb)))
+			return (error);
+		if (*oldlenp < xb.xb_u.xb_buffer.xbb_len)
+			error = ENOMEM;
+		else
+			error = copyout(xb_buffer_base(&xb), oldp, xb.xb_u.xb_buffer.xbb_len);
+		*oldlenp = xb.xb_u.xb_buffer.xbb_len;
+		xb_cleanup(&xb);
+		break;
 #if NFSSERVER
 	case NFS_EXPORTSTATS:
 		/* setup export stat descriptor */
@@ -2866,7 +5398,7 @@ ustat_skip:
 	case VFS_CTL_NOLOCKS:
  		if (req->oldptr != USER_ADDR_NULL) {
 			lck_mtx_lock(&nmp->nm_lock);
-			val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
+			val = (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) ? 1 : 0;
 			lck_mtx_unlock(&nmp->nm_lock);
  			error = SYSCTL_OUT(req, &val, sizeof(val));
  			if (error)
@@ -2877,18 +5409,21 @@ ustat_skip:
  			if (error)
  				return (error);
 			lck_mtx_lock(&nmp->nm_lock);
-			if (nmp->nm_flag & NFSMNT_LOCALLOCKS) {
+			if (nmp->nm_lockmode == NFS_LOCK_MODE_LOCAL) {
 				/* can't toggle locks when using local locks */
 				error = EINVAL;
+			} else if ((nmp->nm_vers >= NFS_VER4) && val) {
+				/* can't disable locks for NFSv4 */
+				error = EINVAL;
 			} else if (val) {
-				if (!(nmp->nm_flag & NFSMNT_NOLOCKS))
-					nfs_lockd_mount_change(-1);
-				nmp->nm_flag |= NFSMNT_NOLOCKS;
+				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
+					nfs_lockd_mount_unregister(nmp);
+				nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
 				nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
 			} else {
-				if (nmp->nm_flag & NFSMNT_NOLOCKS)
-					nfs_lockd_mount_change(1);
-				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
+				if ((nmp->nm_vers <= NFS_VER3) && (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED))
+					nfs_lockd_mount_register(nmp);
+				nmp->nm_lockmode = NFS_LOCK_MODE_ENABLED;
 			}
 			lck_mtx_unlock(&nmp->nm_lock);
  		}
@@ -2896,14 +5431,13 @@ ustat_skip:
 	case VFS_CTL_QUERY:
 		lck_mtx_lock(&nmp->nm_lock);
 		/* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */
-		softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
+		softnobrowse = (NMFLAG(nmp, SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE));
 		if (!softnobrowse && (nmp->nm_state & NFSSTA_TIMEO))
 			vq.vq_flags |= VQ_NOTRESP;
-		if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) &&
-		    !(nmp->nm_flag & NFSMNT_MUTEJUKEBOX))
+		if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) && !NMFLAG(nmp, MUTEJUKEBOX))
 			vq.vq_flags |= VQ_NOTRESP;
 		if (!softnobrowse && (nmp->nm_state & NFSSTA_LOCKTIMEO) &&
-		    !(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS)))
+		    (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED))
 			vq.vq_flags |= VQ_NOTRESP;
 		if (nmp->nm_state & NFSSTA_DEAD)
 			vq.vq_flags |= VQ_DEAD;
diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c
index 00199a6df..d1e130b88 100644
--- a/bsd/nfs/nfs_vnops.c
+++ b/bsd/nfs/nfs_vnops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -134,11 +134,7 @@ int	nfsfifo_vnop_close(struct vnop_close_args *);
 int	nfs_vnop_ioctl(struct vnop_ioctl_args *);
 int	nfs_vnop_select(struct vnop_select_args *);
 int	nfs_vnop_setattr(struct vnop_setattr_args *);
-int	nfs_vnop_read(struct vnop_read_args *);
-int	nfs_vnop_write(struct vnop_write_args *);
-int	nfs_vnop_mmap(struct vnop_mmap_args *);
 int	nfs_vnop_fsync(struct vnop_fsync_args *);
-int	nfs_vnop_remove(struct vnop_remove_args *);
 int	nfs_vnop_rename(struct vnop_rename_args *);
 int	nfs_vnop_readdir(struct vnop_readdir_args *);
 int	nfs_vnop_readlink(struct vnop_readlink_args *);
@@ -148,6 +144,7 @@ int	nfs_vnop_pageout(struct vnop_pageout_args *);
 int	nfs_vnop_blktooff(struct vnop_blktooff_args *);
 int	nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
 int	nfs_vnop_blockmap(struct vnop_blockmap_args *);
+int	nfs_vnop_monitor(struct vnop_monitor_args *);
 
 int	nfs3_vnop_create(struct vnop_create_args *);
 int	nfs3_vnop_mknod(struct vnop_mknod_args *);
@@ -163,8 +160,8 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup },	/* lookup */
 	{ &vnop_create_desc, (vnop_t *)nfs3_vnop_create },	/* create */
 	{ &vnop_mknod_desc, (vnop_t *)nfs3_vnop_mknod },	/* mknod */
-	{ &vnop_open_desc, (vnop_t *)nfs3_vnop_open },		/* open */
-	{ &vnop_close_desc, (vnop_t *)nfs3_vnop_close },	/* close */
+	{ &vnop_open_desc, (vnop_t *)nfs_vnop_open },		/* open */
+	{ &vnop_close_desc, (vnop_t *)nfs_vnop_close },		/* close */
 	{ &vnop_access_desc, (vnop_t *)nfs_vnop_access },	/* access */
 	{ &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },	/* getattr */
 	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
@@ -174,6 +171,7 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vnop_select_desc, (vnop_t *)nfs_vnop_select },	/* select */
 	{ &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke },	/* revoke */
 	{ &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap },		/* mmap */
+	{ &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap },	/* mnomap */
 	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
 	{ &vnop_remove_desc, (vnop_t *)nfs_vnop_remove },	/* remove */
 	{ &vnop_link_desc, (vnop_t *)nfs3_vnop_link },		/* link */
@@ -187,7 +185,7 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
 	{ &vnop_strategy_desc, (vnop_t *)err_strategy },	/* strategy */
 	{ &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf },	/* pathconf */
-	{ &vnop_advlock_desc, (vnop_t *)nfs3_vnop_advlock },	/* advlock */
+	{ &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock },	/* advlock */
 	{ &vnop_bwrite_desc, (vnop_t *)err_bwrite },		/* bwrite */
 	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
 	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
@@ -195,6 +193,7 @@ static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
@@ -206,18 +205,18 @@ static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
 	{ &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup },	/* lookup */
 	{ &vnop_create_desc, (vnop_t *)nfs4_vnop_create },	/* create */
 	{ &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod },	/* mknod */
-	{ &vnop_open_desc, (vnop_t *)nfs4_vnop_open },		/* open */
-	{ &vnop_close_desc, (vnop_t *)nfs4_vnop_close },	/* close */
+	{ &vnop_open_desc, (vnop_t *)nfs_vnop_open },		/* open */
+	{ &vnop_close_desc, (vnop_t *)nfs_vnop_close },		/* close */
 	{ &vnop_access_desc, (vnop_t *)nfs_vnop_access },	/* access */
 	{ &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },	/* getattr */
 	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
-	{ &vnop_read_desc, (vnop_t *)nfs4_vnop_read },		/* read */
+	{ &vnop_read_desc, (vnop_t *)nfs_vnop_read },		/* read */
 	{ &vnop_write_desc, (vnop_t *)nfs_vnop_write },		/* write */
 	{ &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl },		/* ioctl */
 	{ &vnop_select_desc, (vnop_t *)nfs_vnop_select },	/* select */
 	{ &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke },	/* revoke */
-	{ &vnop_mmap_desc, (vnop_t *)nfs4_vnop_mmap },		/* mmap */
-	{ &vnop_mnomap_desc, (vnop_t *)nfs4_vnop_mnomap },	/* mnomap */
+	{ &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap },		/* mmap */
+	{ &vnop_mnomap_desc, (vnop_t *)nfs_vnop_mnomap },	/* mnomap */
 	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
 	{ &vnop_remove_desc, (vnop_t *)nfs_vnop_remove },	/* remove */
 	{ &vnop_link_desc, (vnop_t *)nfs4_vnop_link },		/* link */
@@ -231,7 +230,7 @@ static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
 	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
 	{ &vnop_strategy_desc, (vnop_t *)err_strategy },	/* strategy */
 	{ &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf },	/* pathconf */
-	{ &vnop_advlock_desc, (vnop_t *)nfs4_vnop_advlock },	/* advlock */
+	{ &vnop_advlock_desc, (vnop_t *)nfs_vnop_advlock },	/* advlock */
 	{ &vnop_bwrite_desc, (vnop_t *)err_bwrite },		/* bwrite */
 	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
 	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
@@ -239,6 +238,16 @@ static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },	/* getxattr */
+	{ &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },	/* setxattr */
+	{ &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
+	{ &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
+#if NAMEDSTREAMS
+	{ &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },	/* getnamedstream */
+	{ &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },	/* makenamedstream */
+	{ &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
+#endif
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
@@ -283,6 +292,7 @@ static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
@@ -323,6 +333,16 @@ static struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },	/* getxattr */
+	{ &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },	/* setxattr */
+	{ &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
+	{ &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
+#if NAMEDSTREAMS
+	{ &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },	/* getnamedstream */
+	{ &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },	/* makenamedstream */
+	{ &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
+#endif
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
@@ -365,6 +385,7 @@ static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
@@ -406,6 +427,16 @@ static struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
 	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
 	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
 	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
+	{ &vnop_getxattr_desc, (vnop_t *)nfs4_vnop_getxattr },	/* getxattr */
+	{ &vnop_setxattr_desc, (vnop_t *)nfs4_vnop_setxattr },	/* setxattr */
+	{ &vnop_removexattr_desc, (vnop_t *)nfs4_vnop_removexattr },/* removexattr */
+	{ &vnop_listxattr_desc, (vnop_t *)nfs4_vnop_listxattr },/* listxattr */
+#if NAMEDSTREAMS
+	{ &vnop_getnamedstream_desc, (vnop_t *)nfs4_vnop_getnamedstream },	/* getnamedstream */
+	{ &vnop_makenamedstream_desc, (vnop_t *)nfs4_vnop_makenamedstream },	/* makenamedstream */
+	{ &vnop_removenamedstream_desc, (vnop_t *)nfs4_vnop_removenamedstream },/* removenamedstream */
+#endif
+	{ &vnop_monitor_desc, (vnop_t *)nfs_vnop_monitor },	/* monitor */
 	{ NULL, NULL }
 };
 struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
@@ -418,30 +449,30 @@ int	nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t);
 /*
  * Find the slot in the access cache for this UID.
  * If adding and no existing slot is found, reuse slots in FIFO order.
- * The index of the next slot to use is kept in the last entry of the n_mode array.
+ * The index of the next slot to use is kept in the last entry of the n_access array.
  */
 int
-nfs_node_mode_slot(nfsnode_t np, uid_t uid, int add)
+nfs_node_access_slot(nfsnode_t np, uid_t uid, int add)
 {
 	int slot;
 
 	for (slot=0; slot < NFS_ACCESS_CACHE_SIZE; slot++)
-		if (np->n_modeuid[slot] == uid)
+		if (np->n_accessuid[slot] == uid)
 			break;
 	if (slot == NFS_ACCESS_CACHE_SIZE) {
 		if (!add)
 			return (-1);
-		slot = np->n_mode[NFS_ACCESS_CACHE_SIZE];
-		np->n_mode[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
+		slot = np->n_access[NFS_ACCESS_CACHE_SIZE];
+		np->n_access[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
 	}
 	return (slot);
 }
 
 int
-nfs3_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
+nfs3_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx)
 {
 	int error = 0, lockerror = ENOENT, status, slot;
-	uint32_t access = 0;
+	uint32_t access_result = 0;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
 	struct timeval now;
@@ -452,25 +483,24 @@ nfs3_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 
 	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
 	nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
-	nfsm_chain_add_32(error, &nmreq, *mode);
+	nfsm_chain_add_32(error, &nmreq, *access);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
 	if (!error)
 		error = status;
-	nfsm_chain_get_32(error, &nmrep, access);
+	nfsm_chain_get_32(error, &nmrep, access_result);
 	nfsmout_if(error);
 
 	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
-	slot = nfs_node_mode_slot(np, uid, 1);
-	np->n_modeuid[slot] = uid;
+	slot = nfs_node_access_slot(np, uid, 1);
+	np->n_accessuid[slot] = uid;
 	microuptime(&now);
-	np->n_modestamp[slot] = now.tv_sec;
-	np->n_mode[slot] = access;
+	np->n_accessstamp[slot] = now.tv_sec;
+	np->n_access[slot] = access_result;
 
 	/*
 	 * If we asked for DELETE but didn't get it, the server
@@ -479,11 +509,14 @@ nfs3_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx)
 	 * and just let any subsequent delete action fail if it
 	 * really isn't deletable.
 	 */
-	if ((*mode & NFS_ACCESS_DELETE) &&
-	    !(np->n_mode[slot] & NFS_ACCESS_DELETE))
-		np->n_mode[slot] |= NFS_ACCESS_DELETE;
-	/* pass back the mode returned with this request */
-	*mode = np->n_mode[slot];
+	if ((*access & NFS_ACCESS_DELETE) &&
+	    !(np->n_access[slot] & NFS_ACCESS_DELETE))
+		np->n_access[slot] |= NFS_ACCESS_DELETE;
+	/* ".zfs" subdirectories may erroneously give a denied answer for add/remove */
+	if (nfs_access_dotzfs && (np->n_flag & NISDOTZFSCHILD))
+		np->n_access[slot] |= (NFS_ACCESS_MODIFY|NFS_ACCESS_EXTEND|NFS_ACCESS_DELETE);
+	/* pass back the access returned with this request */
+	*access = np->n_access[slot];
 nfsmout:
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -495,8 +528,8 @@ nfsmout:
 /*
  * NFS access vnode op.
  * For NFS version 2, just return ok. File accesses may fail later.
- * For NFS version 3+, use the access RPC to check accessibility. If file modes
- * are changed on the server, accesses might still fail later.
+ * For NFS version 3+, use the access RPC to check accessibility. If file
+ * permissions are changed on the server, accesses might still fail later.
  */
 int
 nfs_vnop_access(
@@ -510,7 +543,7 @@ nfs_vnop_access(
 	vfs_context_t ctx = ap->a_context;
 	vnode_t vp = ap->a_vp;
 	int error = 0, slot, dorpc;
-	u_int32_t mode, wmode;
+	u_int32_t access, waccess;
 	nfsnode_t np = VTONFS(vp);
 	struct nfsmount *nmp;
 	int nfsvers;
@@ -541,53 +574,53 @@ nfs_vnop_access(
 	/*
 	 * Convert KAUTH primitives to NFS access rights.
 	 */
-	mode = 0;
+	access = 0;
 	if (vnode_isdir(vp)) {
 		/* directory */
 		if (ap->a_action &
 		    (KAUTH_VNODE_LIST_DIRECTORY |
 		    KAUTH_VNODE_READ_EXTATTRIBUTES))
-			mode |= NFS_ACCESS_READ;
+			access |= NFS_ACCESS_READ;
 		if (ap->a_action & KAUTH_VNODE_SEARCH)
-			mode |= NFS_ACCESS_LOOKUP;
+			access |= NFS_ACCESS_LOOKUP;
 		if (ap->a_action &
 		    (KAUTH_VNODE_ADD_FILE |
 		    KAUTH_VNODE_ADD_SUBDIRECTORY))
-			mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+			access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
 		if (ap->a_action & KAUTH_VNODE_DELETE_CHILD)
-			mode |= NFS_ACCESS_MODIFY;
+			access |= NFS_ACCESS_MODIFY;
 	} else {
 		/* file */
 		if (ap->a_action &
 		    (KAUTH_VNODE_READ_DATA |
 		    KAUTH_VNODE_READ_EXTATTRIBUTES))
-			mode |= NFS_ACCESS_READ;
+			access |= NFS_ACCESS_READ;
 		if (ap->a_action & KAUTH_VNODE_WRITE_DATA)
-			mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
+			access |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
 		if (ap->a_action & KAUTH_VNODE_APPEND_DATA)
-			mode |= NFS_ACCESS_EXTEND;
+			access |= NFS_ACCESS_EXTEND;
 		if (ap->a_action & KAUTH_VNODE_EXECUTE)
-			mode |= NFS_ACCESS_EXECUTE;
+			access |= NFS_ACCESS_EXECUTE;
 	}
 	/* common */
 	if (ap->a_action & KAUTH_VNODE_DELETE)
-		mode |= NFS_ACCESS_DELETE;
+		access |= NFS_ACCESS_DELETE;
 	if (ap->a_action &
 	    (KAUTH_VNODE_WRITE_ATTRIBUTES |
 	    KAUTH_VNODE_WRITE_EXTATTRIBUTES |
 	    KAUTH_VNODE_WRITE_SECURITY))
-		mode |= NFS_ACCESS_MODIFY;
+		access |= NFS_ACCESS_MODIFY;
 	/* XXX this is pretty dubious */
 	if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER)
-		mode |= NFS_ACCESS_MODIFY;
+		access |= NFS_ACCESS_MODIFY;
 
 	/* if caching, always ask for every right */
 	if (nfs_access_cache_timeout > 0) {
-		wmode = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
+		waccess = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
 			NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
 			NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
 	} else {
-		wmode = mode;
+		waccess = access;
 	}
 
 	if ((error = nfs_node_lock(np)))
@@ -598,39 +631,44 @@ nfs_vnop_access(
 	 * this request?
 	 */
 	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
-	slot = nfs_node_mode_slot(np, uid, 0);
+	slot = nfs_node_access_slot(np, uid, 0);
 	dorpc = 1;
-	if (mode == 0) {
+	if (access == 0) {
 		/* not asking for any rights understood by NFS, so don't bother doing an RPC */
 		/* OSAddAtomic(1, &nfsstats.accesscache_hits); */
 		dorpc = 0;
-		wmode = 0;
-	} else if (NMODEVALID(np, slot)) {
+		waccess = 0;
+	} else if (NACCESSVALID(np, slot)) {
 		microuptime(&now);
-		if ((now.tv_sec < (np->n_modestamp[slot] + nfs_access_cache_timeout)) &&
-		    ((np->n_mode[slot] & mode) == mode)) {
+		if ((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) &&
+		    ((np->n_access[slot] & access) == access)) {
 			/* OSAddAtomic(1, &nfsstats.accesscache_hits); */
 			dorpc = 0;
-			wmode = np->n_mode[slot];
+			waccess = np->n_access[slot];
 		}
 	}
 	nfs_node_unlock(np);
 	if (dorpc) {
 		/* Either a no, or a don't know.  Go to the wire. */
 		/* OSAddAtomic(1, &nfsstats.accesscache_misses); */
-		error = nmp->nm_funcs->nf_access_rpc(np, &wmode, ctx);
+		error = nmp->nm_funcs->nf_access_rpc(np, &waccess, ctx);
 	}
-	if (!error && ((wmode & mode) != mode))
+	if (!error && ((waccess & access) != access))
 		error = EACCES;
 
 	return (error);
 }
 
+
 /*
  * NFS open vnode op
+ *
+ * Perform various update/invalidation checks and then add the
+ * open to the node.  Regular files will have an open file structure
+ * on the node and, for NFSv4, perform an OPEN request on the server.
  */
 int
-nfs3_vnop_open(
+nfs_vnop_open(
 	struct vnop_open_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
@@ -642,17 +680,25 @@ nfs3_vnop_open(
 	vnode_t vp = ap->a_vp;
 	nfsnode_t np = VTONFS(vp);
 	struct nfsmount *nmp;
-	struct nfs_vattr nvattr;
+	int error, accessMode, denyMode, opened = 0;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
 	enum vtype vtype;
-	int error;
+
+	if (!(ap->a_mode & (FREAD|FWRITE)))
+		return (EINVAL);
 
 	nmp = VTONMP(vp);
 	if (!nmp)
 		return (ENXIO);
+	if (np->n_flag & NREVOKE)
+		return (EIO);
 
 	vtype = vnode_vtype(vp);
 	if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK))
 		return (EACCES);
+
+	/* First, check if we need to update/invalidate */
 	if (ISSET(np->n_flag, NUPDATESIZE))
 		nfs_data_update_size(np, 0);
 	if ((error = nfs_node_lock(np)))
@@ -666,7 +712,7 @@ nfs3_vnop_open(
 		if ((error = nfs_node_lock(np)))
 			return (error);
 	}
-	if (vnode_vtype(NFSTOV(np)) == VREG)
+	if (vtype == VREG)
 		np->n_lastrahead = -1;
 	if (np->n_flag & NMODIFIED) {
 		if (vtype == VDIR)
@@ -677,12 +723,145 @@ nfs3_vnop_open(
 	} else {
 		nfs_node_unlock(np);
 	}
+
 	/* nfs_getattr() will check changed and purge caches */
-	return (nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED));
+	if ((error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED)))
+		return (error);
+
+	if (vtype != VREG) {
+		/* Just mark that it was opened */
+		lck_mtx_lock(&np->n_openlock);
+		np->n_openrefcnt++;
+		lck_mtx_unlock(&np->n_openlock);
+		return (0);
+	}
+
+	/* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */
+	accessMode = 0;
+	if (ap->a_mode & FREAD)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	if (ap->a_mode & FWRITE)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
+	if (ap->a_mode & O_EXLOCK)
+		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+	else if (ap->a_mode & O_SHLOCK)
+		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+	else
+		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	// XXX don't do deny modes just yet (and never do it for !v4)
+	denyMode = NFS_OPEN_SHARE_DENY_NONE;
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+	if (!noop)
+		return (ENOMEM);
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+	if (np->n_flag & NREVOKE) {
+		error = EIO;
+		nfs_mount_state_in_use_end(nmp, 0);
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+		NP(np, "nfs_vnop_open: LOST %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+		error = EIO;
+	}
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+		nofp = NULL;
+		if (!error)
+			goto restart;
+	}
+	if (!error)
+		error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
+
+	if (nmp->nm_vers < NFS_VER4) {
+		/*
+		 * NFS v2/v3 opens are always allowed - so just add it.
+		 */
+		nfs_open_file_add_open(nofp, accessMode, denyMode, 0);
+		goto out;
+	}
+
+	/*
+	 * If we just created the file and the modes match, then we simply use
+	 * the open performed in the create.  Otherwise, send the request.
+	 */
+	if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+	    (nofp->nof_creator == current_thread()) &&
+	    (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) &&
+	    (denyMode == NFS_OPEN_SHARE_DENY_NONE)) {
+		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+		nofp->nof_creator = NULL;
+	} else {
+		if (!opened)
+			error = nfs4_open(np, nofp, accessMode, denyMode, ctx);
+		if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+		    (nofp->nof_creator == current_thread())) {
+			/*
+			 * Ugh.  This can happen if we just created the file with read-only
+			 * perms and we're trying to open it for real with different modes
+			 * (e.g. write-only or with a deny mode) and the server decides to
+			 * not allow the second open because of the read-only perms.
+			 * The best we can do is to just use the create's open.
+			 * We may have access we don't need or we may not have a requested
+			 * deny mode.  We may log complaints later, but we'll try to avoid it.
+			 */
+			if (denyMode != NFS_OPEN_SHARE_DENY_NONE)
+				NP(np, "nfs_vnop_open: deny mode foregone on create, %d", kauth_cred_getuid(nofp->nof_owner->noo_cred));
+			nofp->nof_creator = NULL;
+			error = 0;
+		}
+		if (error)
+			goto out;
+		opened = 1;
+		/*
+		 * If we had just created the file, we already had it open.
+		 * If the actual open mode is less than what we grabbed at
+		 * create time, then we'll downgrade the open here.
+		 */
+		if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) &&
+		    (nofp->nof_creator == current_thread())) {
+			error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx);
+			if (error)
+				NP(np, "nfs_vnop_open: create close error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+			if (!nfs_mount_state_error_should_restart(error)) {
+				error = 0;
+				nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+			}
+		}
+	}
+
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (error)
+		NP(np, "nfs_vnop_open: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
+	if (noop)
+		nfs_open_owner_rele(noop);
+	return (error);
 }
 
+
 /*
  * NFS close vnode op
+ *
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
@@ -700,11 +879,11 @@ nfs3_vnop_open(
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
- * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
- *                     them.
+ * for NFS Version 3 - flush dirty buffers to the server but don't invalidate them.
+ * for NFS Version 4 - basically the same as NFSv3
  */
 int
-nfs3_vnop_close(
+nfs_vnop_close(
 	struct vnop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
@@ -716,35 +895,36 @@ nfs3_vnop_close(
 	vnode_t vp = ap->a_vp;
 	nfsnode_t np = VTONFS(vp);
 	struct nfsmount *nmp;
-	int nfsvers;
-	int error = 0;
+	int error = 0, error1, nfsvers;
+	int fflag = ap->a_fflag;
+	enum vtype vtype;
+	int accessMode, denyMode;
+	struct nfs_open_owner *noop = NULL;
+	struct nfs_open_file *nofp = NULL;
 
-	if (vnode_vtype(vp) != VREG)
-		return (0);
 	nmp = VTONMP(vp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	vtype = vnode_vtype(vp);
 
+	/* First, check if we need to update/flush/invalidate */
 	if (ISSET(np->n_flag, NUPDATESIZE))
 		nfs_data_update_size(np, 0);
-	if ((error = nfs_node_lock(np)))
-		return (error);
+	nfs_node_lock_force(np);
 	if (np->n_flag & NNEEDINVALIDATE) {
 		np->n_flag &= ~NNEEDINVALIDATE;
 		nfs_node_unlock(np);
 		nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
-		if ((error = nfs_node_lock(np)))
-			return (error);
+		nfs_node_lock_force(np);
 	}
-	if (np->n_flag & NMODIFIED) {
+	if ((vtype == VREG) && (np->n_flag & NMODIFIED) && (fflag & FWRITE)) {
+		/* we're closing an open for write and the file is modified, so flush it */
 		nfs_node_unlock(np);
 		if (nfsvers != NFS_VER2)
 			error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
 		else
 			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
-		if (error)
-			return (error);
 		nfs_node_lock_force(np);
 		NATTRINVALIDATE(np);
 	}
@@ -753,9 +933,266 @@ nfs3_vnop_close(
 		error = np->n_error;
 	}
 	nfs_node_unlock(np);
+
+	if (vtype != VREG) {
+		/* Just mark that it was closed */
+		lck_mtx_lock(&np->n_openlock);
+		if (np->n_openrefcnt == 0) {
+			if (fflag & (FREAD|FWRITE)) {
+				NP(np, "nfs_vnop_close: open reference underrun");
+				error = EINVAL;
+			}
+		} else if (fflag & (FREAD|FWRITE)) {
+			np->n_openrefcnt--;
+		} else {
+			/* No FREAD/FWRITE set - probably the final close */
+			np->n_openrefcnt = 0;
+		}
+		lck_mtx_unlock(&np->n_openlock);
+		return (error);
+	}
+	error1 = error;
+
+	/* fflag should contain some combination of: FREAD, FWRITE, FHASLOCK */
+	accessMode = 0;
+	if (fflag & FREAD)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_READ;
+	if (fflag & FWRITE)
+		accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE;
+// XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open
+//	if (fflag & O_EXLOCK)
+//		denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+//	else if (fflag & O_SHLOCK)
+//		denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+//	else
+//		denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	if (fflag & FHASLOCK) {
+		/* XXX assume FHASLOCK is for the deny mode and not flock */
+		/* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */
+		if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ)
+			denyMode = NFS_OPEN_SHARE_DENY_BOTH;
+		else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE)
+			denyMode = NFS_OPEN_SHARE_DENY_WRITE;
+		else
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	} else {
+			denyMode = NFS_OPEN_SHARE_DENY_NONE;
+	}
+	// XXX don't do deny modes just yet (and never do it for !v4)
+	denyMode = NFS_OPEN_SHARE_DENY_NONE;
+
+	if (!accessMode) {
+		/*
+		 * No mode given to close?
+		 * Guess this is the final close.
+		 * We should unlock all locks and close all opens.
+		 */
+		mount_t mp = vnode_mount(vp);
+		int force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT));
+		nfs_release_open_state_for_node(np, force);
+		return (error);
+	}
+
+	noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0);
+	if (!noop) {
+		// printf("nfs_vnop_close: can't get open owner!\n");
+		return (EIO);
+	}
+
+restart:
+	error = nfs_mount_state_in_use_start(nmp, NULL);
+	if (error) {
+		nfs_open_owner_rele(noop);
+		return (error);
+	}
+
+	error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0);
+	if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+		nfs_mount_state_in_use_end(nmp, 0);
+		error = nfs4_reopen(nofp, NULL);
+		nofp = NULL;
+		if (!error)
+			goto restart;
+	}
+	if (error) {
+		NP(np, "nfs_vnop_close: no open file for owner, error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
+		error = EBADF;
+		goto out;
+	}
+	error = nfs_open_file_set_busy(nofp, NULL);
+	if (error) {
+		nofp = NULL;
+		goto out;
+	}
+
+	error = nfs_close(np, nofp, accessMode, denyMode, ctx);
+	if (error)
+		NP(np, "nfs_vnop_close: close error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
+
+out:
+	if (nofp)
+		nfs_open_file_clear_busy(nofp);
+	if (nfs_mount_state_in_use_end(nmp, error)) {
+		nofp = NULL;
+		goto restart;
+	}
+	if (!error)
+		error = error1;
+	if (error)
+		NP(np, "nfs_vnop_close: error %d, %d", error, kauth_cred_getuid(noop->noo_cred));
+	if (noop)
+		nfs_open_owner_rele(noop);
 	return (error);
 }
 
+/*
+ * nfs_close(): common function that does all the heavy lifting of file closure
+ *
+ * Takes an open file structure and a set of access/deny modes and figures out how
+ * to update the open file structure (and the state on the server) appropriately.
+ */
+int
+nfs_close(
+	nfsnode_t np,
+	struct nfs_open_file *nofp,
+	uint32_t accessMode,
+	uint32_t denyMode,
+	vfs_context_t ctx)
+{
+	struct nfs_lock_owner *nlop;
+	int error = 0, changed = 0, delegated = 0, closed = 0, downgrade = 0;
+	uint32_t newAccessMode, newDenyMode;
+
+	/* warn if modes don't match current state */
+	if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode))
+		NP(np, "nfs_close: mode mismatch %d %d, current %d %d, %d",
+			accessMode, denyMode, nofp->nof_access, nofp->nof_deny,
+			kauth_cred_getuid(nofp->nof_owner->noo_cred));
+
+	/*
+	 * If we're closing a write-only open, we may not have a write-only count
+	 * if we also grabbed read access.  So, check the read-write count.
+	 */
+	if (denyMode == NFS_OPEN_SHARE_DENY_NONE) {
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w == 0) && (nofp->nof_d_w == 0) &&
+		    (nofp->nof_rw || nofp->nof_d_rw))
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	} else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) {
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w_dw == 0) && (nofp->nof_d_w_dw == 0) &&
+		    (nofp->nof_rw_dw || nofp->nof_d_rw_dw))
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	} else { /* NFS_OPEN_SHARE_DENY_BOTH */
+		if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) &&
+		    (nofp->nof_w_drw == 0) && (nofp->nof_d_w_drw == 0) &&
+		    (nofp->nof_rw_drw || nofp->nof_d_rw_drw))
+			accessMode = NFS_OPEN_SHARE_ACCESS_BOTH;
+	}
+
+	nfs_open_file_remove_open_find(nofp, accessMode, denyMode, &newAccessMode, &newDenyMode, &delegated);
+	if ((newAccessMode != nofp->nof_access) || (newDenyMode != nofp->nof_deny))
+		changed = 1;
+	else
+		changed = 0;
+
+	if (NFSTONMP(np)->nm_vers < NFS_VER4) /* NFS v2/v3 closes simply need to remove the open. */
+		goto v3close;
+
+	if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) {
+		/*
+		 * No more access after this close, so clean up and close it.
+		 * Don't send a close RPC if we're closing a delegated open.
+		 */
+		nfs_wait_bufs(np);
+		closed = 1;
+		if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST))
+			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
+		if (error == NFSERR_LOCKS_HELD) {
+			/*
+			 * Hmm... the server says we have locks we need to release first
+			 * Find the lock owner and try to unlock everything.
+			 */
+			nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0);
+			if (nlop) {
+				nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX,
+					0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+				nfs_lock_owner_rele(nlop);
+			}
+			error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0);
+		}
+	} else if (changed) {
+		/*
+		 * File is still open but with less access, so downgrade the open.
+		 * Don't send a downgrade RPC if we're closing a delegated open.
+		 */
+		if (!delegated && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+			downgrade = 1;
+			/*
+			 * If we have delegated opens, we should probably claim them before sending
+			 * the downgrade because the server may not know the open we are downgrading to.
+			 */
+			if (nofp->nof_d_rw_drw || nofp->nof_d_w_drw || nofp->nof_d_r_drw ||
+			    nofp->nof_d_rw_dw || nofp->nof_d_w_dw || nofp->nof_d_r_dw ||
+			    nofp->nof_d_rw || nofp->nof_d_w || nofp->nof_d_r)
+				nfs4_claim_delegated_state_for_open_file(nofp, 0);
+			/* need to remove the open before sending the downgrade */
+			nfs_open_file_remove_open(nofp, accessMode, denyMode);
+			error = nfs4_open_downgrade_rpc(np, nofp, ctx);
+			if (error) /* Hmm.. that didn't work. Add the open back in. */
+				nfs_open_file_add_open(nofp, accessMode, denyMode, delegated);
+		}
+	}
+
+	if (error) {
+		NP(np, "nfs_close: error %d, %d", error, kauth_cred_getuid(nofp->nof_owner->noo_cred));
+		return (error);
+	}
+
+v3close:
+	if (!downgrade)
+		nfs_open_file_remove_open(nofp, accessMode, denyMode);
+
+	if (closed) {
+		lck_mtx_lock(&nofp->nof_lock);
+		if (nofp->nof_r || nofp->nof_d_r || nofp->nof_w || nofp->nof_d_w || nofp->nof_d_rw ||
+		    (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) ||
+		    nofp->nof_r_dw || nofp->nof_d_r_dw || nofp->nof_w_dw || nofp->nof_d_w_dw ||
+		    nofp->nof_rw_dw || nofp->nof_d_rw_dw || nofp->nof_r_drw || nofp->nof_d_r_drw ||
+		    nofp->nof_w_drw || nofp->nof_d_w_drw || nofp->nof_rw_drw || nofp->nof_d_rw_drw)
+			NP(np, "nfs_close: unexpected count: %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u flags 0x%x, %d",
+				nofp->nof_r, nofp->nof_d_r, nofp->nof_w, nofp->nof_d_w,
+				nofp->nof_rw, nofp->nof_d_rw, nofp->nof_r_dw, nofp->nof_d_r_dw,
+				nofp->nof_w_dw, nofp->nof_d_w_dw, nofp->nof_rw_dw, nofp->nof_d_rw_dw,
+				nofp->nof_r_drw, nofp->nof_d_r_drw, nofp->nof_w_drw, nofp->nof_d_w_drw,
+				nofp->nof_rw_drw, nofp->nof_d_rw_drw, nofp->nof_flags,
+				kauth_cred_getuid(nofp->nof_owner->noo_cred));
+		/* clear out all open info, just to be safe */
+		nofp->nof_access = nofp->nof_deny = 0;
+		nofp->nof_mmap_access = nofp->nof_mmap_deny = 0;
+		nofp->nof_r = nofp->nof_d_r = 0;
+		nofp->nof_w = nofp->nof_d_w = 0;
+		nofp->nof_rw = nofp->nof_d_rw = 0;
+		nofp->nof_r_dw = nofp->nof_d_r_dw = 0;
+		nofp->nof_w_dw = nofp->nof_d_w_dw = 0;
+		nofp->nof_rw_dw = nofp->nof_d_rw_dw = 0;
+		nofp->nof_r_drw = nofp->nof_d_r_drw = 0;
+		nofp->nof_w_drw = nofp->nof_d_w_drw = 0;
+		nofp->nof_rw_drw = nofp->nof_d_rw_drw = 0;
+		nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE;
+		lck_mtx_unlock(&nofp->nof_lock);
+		/* XXX we may potentially want to clean up idle/unused open file structures */
+	}
+	if (nofp->nof_flags & NFS_OPEN_FILE_LOST) {
+		error = EIO;
+		NP(np, "nfs_close: LOST%s, %d", !nofp->nof_opencnt ? " (last)" : "",
+			kauth_cred_getuid(nofp->nof_owner->noo_cred));
+	}
+	return (error);
+}
+
+
+
 
 int
 nfs3_getattr_rpc(
@@ -763,18 +1200,22 @@ nfs3_getattr_rpc(
 	mount_t mp,
 	u_char *fhp,
 	size_t fhsize,
+	int flags,
 	vfs_context_t ctx,
 	struct nfs_vattr *nvap,
 	u_int64_t *xidp)
 {
 	struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
-	int error = 0, status, nfsvers;
+	int error = 0, status, nfsvers, rpcflags = 0;
 	struct nfsm_chain nmreq, nmrep;
 
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
+	if (flags & NGA_MONITOR) /* vnode monitor requests should be soft */
+		rpcflags = R_RECOVER;
+
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_null(&nmrep);
 
@@ -784,8 +1225,9 @@ nfs3_getattr_rpc(
 	nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, mp, &nmreq, NFSPROC_GETATTR, ctx,
-			&nmrep, xidp, &status);
+	error = nfs_request2(np, mp, &nmreq, NFSPROC_GETATTR, 
+			vfs_context_thread(ctx), vfs_context_ucred(ctx),
+			NULL, rpcflags, &nmrep, xidp, &status);
 	if (!error)
 		error = status;
 	nfsmout_if(error);
@@ -798,10 +1240,11 @@ nfsmout:
 
 
 int
-nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncached)
+nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
 {
 	struct nfsmount *nmp;
 	int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
+	struct nfs_vattr nvattr;
 	struct timespec ts = { 2, 0 };
 	u_int64_t xid;
 
@@ -811,6 +1254,10 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncache
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
+	if (!nvap)
+		nvap = &nvattr;
+	NVATTR_INIT(nvap);
+
 	/* Update local times for special files. */
 	if (np->n_flag & (NACC | NUPD)) {
 		nfs_node_lock_force(np);
@@ -823,15 +1270,27 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncache
 
 	error = nfs_node_lock(np);
 	nfsmout_if(error);
-	if (!uncached) {
+	if (!(flags & (NGA_UNCACHED|NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
+		/*
+		 * Use the cache or wait for any getattr in progress if:
+		 * - it's a cached request, or
+		 * - we have a delegation
+		 */
 		while (1) {
-			error = nfs_getattrcache(np, nvap);
+			error = nfs_getattrcache(np, nvap, flags);
 			if (!error || (error != ENOENT)) {
 				nfs_node_unlock(np);
 				goto nfsmout;
 			}
+			error = 0;
 			if (!ISSET(np->n_flag, NGETATTRINPROG))
 				break;
+			if (flags & NGA_MONITOR) {
+				/* no need to wait if a request is pending */
+				error = EINPROGRESS;
+				nfs_node_unlock(np);
+				goto nfsmout;
+			}
 			SET(np->n_flag, NGETATTRWANT);
 			msleep(np, &np->n_lock, PZERO-1, "nfsgetattrwant", &ts);
 			if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
@@ -844,30 +1303,33 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncache
 	} else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
 		SET(np->n_flag, NGETATTRINPROG);
 		inprogset = 1;
+	} else if (flags & NGA_MONITOR) {
+		/* no need to make a request if one is pending */
+		error = EINPROGRESS;
 	}
 	nfs_node_unlock(np);
 
 	nmp = NFSTONMP(np);
-	if (!nmp) {
+	if (!nmp)
 		error = ENXIO;
+	if (error)
 		goto nfsmout;
-	}
 
 	/*
-	 * Try to get both the attributes and access info by making an
-	 * ACCESS call and seeing if it returns updated attributes.
+	 * We might want to try to get both the attributes and access info by
+	 * making an ACCESS call and seeing if it returns updated attributes.
 	 * But don't bother if we aren't caching access info or if the
 	 * attributes returned wouldn't be cached.
 	 */
-	if ((nfsvers != NFS_VER2) && (nfs_access_cache_timeout > 0)) {
+	if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
 		if (nfs_attrcachetimeout(np) > 0) {
 			/*  OSAddAtomic(1, &nfsstats.accesscache_misses); */
-			u_int32_t mode = NFS_ACCESS_ALL;
-			error = nmp->nm_funcs->nf_access_rpc(np, &mode, ctx);
+			u_int32_t access = NFS_ACCESS_ALL;
+			error = nmp->nm_funcs->nf_access_rpc(np, &access, ctx);
 			if (error)
 				goto nfsmout;
 			nfs_node_lock_force(np);
-			error = nfs_getattrcache(np, nvap);
+			error = nfs_getattrcache(np, nvap, flags);
 			nfs_node_unlock(np);
 			if (!error || (error != ENOENT))
 				goto nfsmout;
@@ -878,7 +1340,7 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncache
 
 	avoidfloods = 0;
 tryagain:
-	error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, ctx, nvap, &xid);
+	error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
 	if (!error) {
 		nfs_node_lock_force(np);
 		error = nfs_loadattrcache(np, nvap, &xid, 0);
@@ -933,6 +1395,17 @@ nfsmout:
 		if (wanted)
 			wakeup(np);
 	}
+
+	if (nvap == &nvattr) {
+		NVATTR_CLEANUP(nvap);
+	} else if (!(flags & NGA_ACL)) {
+		/* make sure we don't return an ACL if it wasn't asked for */
+		NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
+		if (nvap->nva_acl) {
+			kauth_acl_free(nvap->nva_acl);
+			nvap->nva_acl = NULL;
+		}
+	}
 	FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
 	return (error);
 }
@@ -1002,20 +1475,20 @@ nfs_vnop_setattr(
 	struct nfsmount *nmp;
 	struct vnode_attr *vap = ap->a_vap;
 	int error = 0;
-	int biosize, nfsvers;
-	u_quad_t origsize;
+	int biosize, nfsvers, namedattrs;
+	u_quad_t origsize, vapsize;
 	struct nfs_dulookup dul;
 	nfsnode_t dnp = NULL;
 	vnode_t dvp = NULL;
 	const char *vname = NULL;
 	struct nfs_open_owner *noop = NULL;
 	struct nfs_open_file *nofp = NULL;
-	struct nfs_vattr nvattr;
 
 	nmp = VTONMP(vp);
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
 	biosize = nmp->nm_biosize;
 
 	/* Disallow write attempts if the filesystem is mounted read-only. */
@@ -1058,46 +1531,52 @@ nfs_vnop_setattr(
 			/* flush everything */
 			error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0) , ctx, 1);
 			if (error) {
-				printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
+				NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
 				FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
 				return (error);
 			}
 			if (nfsvers >= NFS_VER4) {
 				/* setting file size requires having the file open for write access */
+				if (np->n_flag & NREVOKE)
+					return (EIO);
 				noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
 				if (!noop)
 					return (ENOMEM);
-retryopen:
+restart:
+				error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+				if (error)
+					return (error);
+				if (np->n_flag & NREVOKE) {
+					nfs_mount_state_in_use_end(nmp, 0);
+					return (EIO);
+				}
 				error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
 				if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))
 					error = EIO;
 				if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
-					nfs4_reopen(nofp, vfs_context_thread(ctx));
+					nfs_mount_state_in_use_end(nmp, 0);
+					error = nfs4_reopen(nofp, vfs_context_thread(ctx));
 					nofp = NULL;
-					goto retryopen;
+					if (!error)
+						goto restart;
 				}
+				if (!error)
+					error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
 				if (error) {
 					nfs_open_owner_rele(noop);
 					return (error);
 				}
 				if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
 					/* we don't have the file open for write access, so open it */
-					error = nfs_mount_state_in_use_start(nmp);
-					if (!error)
-						error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
-					if (error) {
-						nfs_open_owner_rele(noop);
-						return (error);
-					}
 					error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
 					if (!error)
 						nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
 					if (nfs_mount_state_error_should_restart(error)) {
 						nfs_open_file_clear_busy(nofp);
 						nofp = NULL;
+						if (nfs_mount_state_in_use_end(nmp, error))
+							goto restart;
 					}
-					if (nfs_mount_state_in_use_end(nmp, error))
-						goto retryopen;
 				}
 			}
 			nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
@@ -1198,61 +1677,52 @@ retryopen:
 			nfs_node_unlock(np);
 		}
 	}
-	if (VATTR_IS_ACTIVE(vap, va_mode) ||
-	    VATTR_IS_ACTIVE(vap, va_uid) ||
-	    VATTR_IS_ACTIVE(vap, va_gid)) {
-		if ((error = nfs_node_lock(np))) {
-			if (VATTR_IS_ACTIVE(vap, va_data_size))
-				nfs_data_unlock(np);
-			return (error);
-		}
-		NMODEINVALIDATE(np);
+	if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
+	     VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
+	    !(error = nfs_node_lock(np))) {
+		NACCESSINVALIDATE(np);
 		nfs_node_unlock(np);
-		dvp = vnode_getparent(vp);
-		vname = vnode_getname(vp);
-		dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
-		if (dnp) {
-			error = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
-			if (error) {
-				dnp = NULL;
-				error = 0;
+		if (!namedattrs) {
+			dvp = vnode_getparent(vp);
+			vname = vnode_getname(vp);
+			dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
+			if (dnp) {
+				error = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+				if (error) {
+					dnp = NULL;
+					error = 0;
+				}
+			}
+			if (dnp) {
+				nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
+				nfs_dulookup_start(&dul, dnp, ctx);
 			}
-		}
-		if (dnp) {
-			nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
-			nfs_dulookup_start(&dul, dnp, ctx);
 		}
 	}
 
-retrysetattr:
-	if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4))
-		error = nfs_mount_state_in_use_start(nmp);
-
-	if (!error) {
+	if (!error)
 		error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
 
-		if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4))
-			if (nfs_mount_state_in_use_end(nmp, error))
-				goto retrysetattr;
-	}
-
-	if (VATTR_IS_ACTIVE(vap, va_mode) ||
-	    VATTR_IS_ACTIVE(vap, va_uid) ||
-	    VATTR_IS_ACTIVE(vap, va_gid)) {
-		if (dnp) {
-			nfs_dulookup_finish(&dul, dnp, ctx);
-			nfs_node_clear_busy(dnp);
+	if (VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
+	    VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) {
+		if (!namedattrs) {
+			if (dnp) {
+				nfs_dulookup_finish(&dul, dnp, ctx);
+				nfs_node_clear_busy(dnp);
+			}
+			if (dvp != NULLVP)
+				vnode_put(dvp);
+			if (vname != NULL)
+				vnode_putname(vname);
 		}
-		if (dvp != NULLVP)
-			vnode_put(dvp);
-		if (vname != NULL)
-			vnode_putname(vname);
 	}
 
 	FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
 	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
-		if (error && (origsize != np->n_size)) {
+		if (error && (origsize != np->n_size) &&
+		    ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
 			/* make every effort to resync file size w/ server... */
+			/* (don't bother if we'll be restarting the operation) */
 			int err; /* preserve "error" for return */
 			np->n_size = np->n_vattr.nva_size = origsize;
 			nfs_node_lock_force(np);
@@ -1260,10 +1730,12 @@ retrysetattr:
 			nfs_node_unlock(np);
 			FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
 			ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+			vapsize = vap->va_data_size;
 			vap->va_data_size = origsize;
 			err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
 			if (err)
-				printf("nfs_vnop_setattr: nfs%d_setattr_rpc %d %d\n", nfsvers, error, err);
+				NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
+			vap->va_data_size = vapsize;
 		}
 		nfs_node_lock_force(np);
 		/*
@@ -1276,22 +1748,26 @@ retrysetattr:
 			CLR(np->n_flag, NUPDATESIZE);
 			NATTRINVALIDATE(np);
 			nfs_node_unlock(np);
-			nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
+			nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
 		} else {
 			nfs_node_unlock(np);
 		}
 		nfs_data_unlock(np);
 		if (nfsvers >= NFS_VER4) {
-			if (nofp->nof_flags & NFS_OPEN_FILE_SETATTR) {
-				int err = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
-				if (err) {
-					vname = vnode_getname(NFSTOV(np));
-					printf("nfs_vnop_setattr: close error: %d, %s\n", err, vname);
-					vnode_putname(vname);
+			if (nofp) {
+				/* don't close our setattr open if we'll be restarting... */
+				if (!nfs_mount_state_error_should_restart(error) &&
+				    (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
+					int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+					if (err)
+						NP(np, "nfs_vnop_setattr: close error: %d", err);
+					nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
 				}
-				nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
 				nfs_open_file_clear_busy(nofp);
+				nofp = NULL;
 			}
+			if (nfs_mount_state_in_use_end(nmp, error))
+				goto restart;
 			nfs_open_owner_rele(noop);
 		}
 	}
@@ -1414,8 +1890,7 @@ nfs3_setattr_rpc(
 	}
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	if (nfsvers == NFS_VER3) {
@@ -1435,7 +1910,7 @@ nfs3_setattr_rpc(
 	} else {
 		if (!error)
 			error = status;
-		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	}
 	/*
 	 * We just changed the attributes and we want to make sure that we
@@ -1495,6 +1970,7 @@ nfs_vnop_lookup(
 	*vpp = NULLVP;
 
 	dnp = VTONFS(dvp);
+	NVATTR_INIT(&nvattr);
 
 	mp = vnode_mount(dvp);
 	nmp = VFSTONFS(mp);
@@ -1503,12 +1979,12 @@ nfs_vnop_lookup(
 		goto error_return;
 	}
 	nfsvers = nmp->nm_vers;
-	negnamecache = !(nmp->nm_flag & NFSMNT_NONEGNAMECACHE);
+	negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
 
 	if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx))))
 		goto error_return;
 	/* nfs_getattr() will check changed and purge caches */
-	if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED)))
+	if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED)))
 		goto error_return;
 
 	error = cache_lookup(dvp, vpp, cnp);
@@ -1518,7 +1994,7 @@ nfs_vnop_lookup(
 		goto error_return;
 	case 0:
 		/* cache miss */
-		if ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) {
+		if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
 			/* if rdirplus, try dir buf cache lookup */
 			error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
 			if (!error && np) {
@@ -1535,8 +2011,10 @@ nfs_vnop_lookup(
 		OSAddAtomic(1, &nfsstats.lookupcache_hits);
 
 		nfs_node_clear_busy(dnp);
+		busyerror = ENOENT;
 
 		/* check for directory access */
+		naa.a_desc = &vnop_access_desc;
 		naa.a_vp = dvp;
 		naa.a_action = KAUTH_VNODE_SEARCH;
 		naa.a_context = ctx;
@@ -1561,6 +2039,11 @@ nfs_vnop_lookup(
 		fh.fh_len = 0;
 		goto found;
 	}
+	if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+		/* we should never be looking things up in a trigger directory, return nothing */
+		error = ENOENT;
+		goto error_return;
+	}
 
 	/* do we know this name is too long? */
 	nmp = VTONMP(dvp);
@@ -1581,7 +2064,7 @@ nfs_vnop_lookup(
 
 	error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
 	nfsmout_if(error);
-	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
+	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, &fh, &nvattr);
 	nfsmout_if(error);
 
 	/* is the file handle the same as this directory's file handle? */
@@ -1620,7 +2103,7 @@ found:
 		nfs_node_unlock(dnp);
 	} else {
 		ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
-		error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, ngflags, &np);
+		error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, ngflags, &np);
 		if (error)
 			goto error_return;
 		newvp = NFSTOV(np);
@@ -1647,6 +2130,7 @@ nfsmout:
 		nfs_node_unlock(dnp);
 	}
 error_return:
+	NVATTR_CLEANUP(&nvattr);
 	if (!busyerror)
 		nfs_node_clear_busy(dnp);
 	if (error && *vpp) {
@@ -1656,26 +2140,6 @@ error_return:
 	return (error);
 }
 
-/*
- * NFS read call.
- * Just call nfs_bioread() to do the work.
- */
-int
-nfs_vnop_read(
-	struct vnop_read_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		struct uio *a_uio;
-		int a_ioflag;
-		vfs_context_t a_context;
-	} */ *ap)
-{
-	if (vnode_vtype(ap->a_vp) != VREG)
-		return (EPERM);
-	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context));
-}
-
-
 /*
  * NFS readlink call
  */
@@ -1694,7 +2158,6 @@ nfs_vnop_readlink(
 	int error = 0, nfsvers;
 	uint32_t buflen;
 	uio_t uio = ap->a_uio;
-	struct nfs_vattr nvattr;
 	struct nfsbuf *bp = NULL;
 
 	if (vnode_vtype(ap->a_vp) != VLNK)
@@ -1711,7 +2174,7 @@ nfs_vnop_readlink(
 	nfsvers = nmp->nm_vers;
 
 	/* nfs_getattr() will check changed and purge caches */
-	if ((error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED))) {
+	if ((error = nfs_getattr(np, NULL, ctx, NGA_CACHED))) {
 		FSDBG(531, np, 0xd1e0001, 0, error);
 		return (error);
 	}
@@ -1764,8 +2227,7 @@ nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	if (nfsvers == NFS_VER3)
@@ -1827,6 +2289,10 @@ nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
 	while (tsiz > 0) {
 		len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
 		FSDBG(536, np, txoffset, len, 0);
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+			break;
+		}
 		if (nmp->nm_vers >= NFS_VER4)
 			stategenid = nmp->nm_stategenid;
 		error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
@@ -1836,16 +2302,19 @@ nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
 		if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
 		    (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
 			lck_mtx_lock(&nmp->nm_lock);
-			if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-				printf("nfs_read_rpc: error %d, initiating recovery\n", error);
-				nmp->nm_state |= NFSSTA_RECOVER;
-				nfs_mount_sock_thread_wake(nmp);
+			if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+				NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
+				nfs_need_recover(nmp, error);
 			}
 			lck_mtx_unlock(&nmp->nm_lock);
-			if (error == NFSERR_GRACE)
-				tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
-			if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
-				continue;
+			if (np->n_flag & NREVOKE) {
+				error = EIO;
+			} else {
+				if (error == NFSERR_GRACE)
+					tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+				if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+					continue;
+			}
 		}
 		if (error)
 			break;
@@ -1894,7 +2363,7 @@ nfs3_read_rpc_async(
 	}
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, cb, reqp);
+	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
@@ -1937,7 +2406,7 @@ nfs3_read_rpc_async_finish(
 		nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
 		nfsm_chain_get_32(error, &nmrep, eof);
 	} else {
-		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 	}
 	if (!lockerror)
 		nfs_node_unlock(np);
@@ -1980,7 +2449,6 @@ nfs_vnop_write(
 	nfsnode_t np = VTONFS(vp);
 	int ioflag = ap->a_ioflag;
 	struct nfsbuf *bp;
-	struct nfs_vattr nvattr;
 	struct nfsmount *nmp = VTONMP(vp);
 	daddr64_t lbn;
 	int biosize;
@@ -2046,7 +2514,7 @@ nfs_vnop_write(
 		if (ioflag & IO_APPEND) {
 			nfs_data_unlock(np);
 			/* nfs_getattr() will check changed and purge caches */
-			error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
+			error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
 			/* we'll be extending the file, so take the data lock exclusive */
 			nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
 			if (error) {
@@ -2272,12 +2740,14 @@ again:
 		 *
 		 * Notes:
 		 * We don't want to read anything we're just going to write over.
+		 * We don't want to read anything we're just going drop when the
+		 *   I/O is complete (i.e. don't do reads for NOCACHE requests).
 		 * We don't want to issue multiple I/Os if we don't have to
 		 *   (because they're synchronous rpcs).
 		 * We don't want to read anything we already have modified in the
 		 *   page cache.
 		 */
-		if (!ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+		if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
 			int firstpg, lastpg, dirtypg;
 			int firstpgoff, lastpgoff;
 			start = end = -1;
@@ -2296,6 +2766,22 @@ again:
 					start = (lastpg * PAGE_SIZE) + lastpgoff;
 				end = (lastpg + 1) * PAGE_SIZE;
 			}
+			if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+				/*
+				 * For nocache writes, if there is any partial page at the
+				 * start or end of the write range, then we do the write
+				 * synchronously to make sure that we can drop the data
+				 * from the cache as soon as the WRITE finishes.  Normally,
+				 * we would do an unstable write and not drop the data until
+				 * it was committed.  But doing that here would risk allowing
+				 * invalid data to be read from the cache between the WRITE
+				 * and the COMMIT.
+				 * (NB_STABLE indicates that data writes should be FILESYNC)
+				 */
+				if (end > start)
+					SET(bp->nb_flags, NB_STABLE);
+				goto skipread;
+			}
 			if (end > start) {
 				/* need to read the data in range: start...end-1 */
 
@@ -2327,8 +2813,11 @@ again:
 					uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
 					uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
 					error = nfs_read_rpc(np, auio, ctx);
-					if (error) /* couldn't read the data, so treat buffer as NOCACHE */
+					if (error) {
+						/* couldn't read the data, so treat buffer as synchronous NOCACHE */
 						SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+						goto skipread;
+					}
 					if (uio_resid(auio) > 0) {
 						FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
 						bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
@@ -2370,13 +2859,16 @@ again:
 					FSDBG(516, bp, start, end - start, 0xd00dee00);
 					bzero(bp->nb_data + start, end - start);
 					error = 0;
-				} else if (!ISSET(bp->nb_flags, NB_NOCACHE)) {
+				} else {
 					/* now we'll read the (rest of the) data */
 					uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
 					uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
 					error = nfs_read_rpc(np, auio, ctx);
-					if (error) /* couldn't read the data, so treat buffer as NOCACHE */
+					if (error) {
+						/* couldn't read the data, so treat buffer as synchronous NOCACHE */
 						SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+						goto skipread;
+					}
 					if (uio_resid(auio) > 0) {
 						FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
 						bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
@@ -2400,6 +2892,7 @@ again:
 				/* Note: pages being written to will be validated when written */
 			}
 		}
+skipread:
 
 		if (ISSET(bp->nb_flags, NB_ERROR)) {
 			error = bp->nb_error;
@@ -2554,6 +3047,10 @@ nfs_write_rpc2(
 	while (tsiz > 0) {
 		len = (tsiz > nmwsize) ? nmwsize : tsiz;
 		FSDBG(537, np, uio_offset(uio), len, 0);
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+			break;
+		}
 		if (nmp->nm_vers >= NFS_VER4)
 			stategenid = nmp->nm_stategenid;
 		error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
@@ -2565,16 +3062,19 @@ nfs_write_rpc2(
 		if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
 		    (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
 			lck_mtx_lock(&nmp->nm_lock);
-			if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-				printf("nfs_write_rpc: error %d, initiating recovery\n", error);
-				nmp->nm_state |= NFSSTA_RECOVER;
-				nfs_mount_sock_thread_wake(nmp);
+			if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+				NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
+				nfs_need_recover(nmp, error);
 			}
 			lck_mtx_unlock(&nmp->nm_lock);
-			if (error == NFSERR_GRACE)
-				tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
-			if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
-				continue;
+			if (np->n_flag & NREVOKE) {
+				error = EIO;
+			} else {
+				if (error == NFSERR_GRACE)
+					tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+				if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+					continue;
+			}
 		}
 		if (error)
 			break;
@@ -2635,6 +3135,7 @@ nfs3_write_rpc_async(
 	struct nfsreq **reqp)
 {
 	struct nfsmount *nmp;
+	mount_t mp;
 	int error = 0, nfsvers;
 	struct nfsm_chain nmreq;
 
@@ -2643,6 +3144,11 @@ nfs3_write_rpc_async(
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
 
+	/* for async mounts, don't bother sending sync write requests */
+	if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
+	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+		iomode = NFS_WRITE_UNSTABLE;
+
 	nfsm_chain_null(&nmreq);
 	nfsm_chain_build_alloc_init(error, &nmreq,
 		NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
@@ -2661,7 +3167,7 @@ nfs3_write_rpc_async(
 	error = nfsm_chain_add_uio(&nmreq, uio, len);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp);
+	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
@@ -2727,7 +3233,7 @@ nfs3_write_rpc_async_finish(
 	} else {
 		if (!error)
 			error = status;
-		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
 		nfsmout_if(error);
 	}
 	if (updatemtime)
@@ -2769,7 +3275,7 @@ nfs3_vnop_mknod(
 	nfsnode_t np = NULL;
 	struct nfsmount *nmp;
 	nfsnode_t dnp = VTONFS(dvp);
-	struct nfs_vattr nvattr, dnvattr;
+	struct nfs_vattr nvattr;
 	fhandle_t fh;
 	int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
 	struct timespec premtime = { 0, 0 };
@@ -2777,6 +3283,7 @@ nfs3_vnop_mknod(
 	u_int64_t xid, dxid;
 	int nfsvers, gotuid, gotgid;
 	struct nfsm_chain nmreq, nmrep;
+	struct nfsreq rq, *req = &rq;
 
 	nmp = VTONMP(dvp);
 	if (!nmp)
@@ -2797,6 +3304,8 @@ nfs3_vnop_mknod(
 	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
 		return (ENAMETOOLONG);
 
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
 	VATTR_SET_SUPPORTED(vap, va_mode);
 	VATTR_SET_SUPPORTED(vap, va_uid);
 	VATTR_SET_SUPPORTED(vap, va_gid);
@@ -2813,7 +3322,7 @@ nfs3_vnop_mknod(
 		NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	if (nfsvers == NFS_VER3) {
 		nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
 		nfsm_chain_add_v3sattr(error, &nmreq, vap);
@@ -2829,7 +3338,10 @@ nfs3_vnop_mknod(
 		error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
 	nfsmout_if(error);
 
-	error = nfs_request(dnp, NULL, &nmreq, NFSPROC_MKNOD, ctx, &nmrep, &xid, &status);
+	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
+	if (!error)
+		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(dnp)))
 		error = lockerror;
@@ -2857,11 +3369,11 @@ nfsmout:
 			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
 		nfs_node_unlock(dnp);
 		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+		nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
 	}
 
 	if (!error && fh.fh_len)
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
 	if (!error && !np)
 		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
 	if (!error && np)
@@ -2870,7 +3382,7 @@ nfsmout:
 		nfs_node_clear_busy(dnp);
 
 	if (!error && (gotuid || gotgid) &&
-	    (!newvp || nfs_getattrcache(np, &nvattr) ||
+	    (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
 	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
 	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
 		/* clear ID bits if server didn't use them (or we can't tell) */
@@ -2908,7 +3420,7 @@ nfs3_vnop_create(
 	vnode_t dvp = ap->a_dvp;
 	struct vnode_attr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
-	struct nfs_vattr nvattr, dnvattr;
+	struct nfs_vattr nvattr;
 	fhandle_t fh;
 	nfsnode_t np = NULL;
 	struct nfsmount *nmp;
@@ -2920,7 +3432,7 @@ nfs3_vnop_create(
 	u_int64_t xid, dxid;
 	uint32_t val;
 	struct nfsm_chain nmreq, nmrep;
-	struct nfsreq *req;
+	struct nfsreq rq, *req = &rq;
 	struct nfs_dulookup dul;
 
 	nmp = VTONMP(dvp);
@@ -2931,6 +3443,8 @@ nfs3_vnop_create(
 	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
 		return (ENAMETOOLONG);
 
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
 	VATTR_SET_SUPPORTED(vap, va_mode);
 	VATTR_SET_SUPPORTED(vap, va_uid);
 	VATTR_SET_SUPPORTED(vap, va_gid);
@@ -2940,11 +3454,13 @@ nfs3_vnop_create(
 	gotuid = VATTR_IS_ACTIVE(vap, va_uid);
 	gotgid = VATTR_IS_ACTIVE(vap, va_gid);
 
-	if (vap->va_vaflags & VA_EXCLUSIVE)
+	if (vap->va_vaflags & VA_EXCLUSIVE) {
 		fmode |= O_EXCL;
+		if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time))
+			vap->va_vaflags |= VA_UTIMES_NULL;
+	}
 
 again:
-	req = NULL;
 	error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
 	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
 
@@ -2955,7 +3471,7 @@ again:
 		NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	if (nfsvers == NFS_VER3) {
 		if (fmode & O_EXCL) {
 			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
@@ -2979,7 +3495,7 @@ again:
 	nfsmout_if(error);
 
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
 	if (!error) {
 		nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
@@ -3010,11 +3526,11 @@ nfsmout:
 			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
 		nfs_node_unlock(dnp);
 		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+		nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
 	}
 
 	if (!error && fh.fh_len)
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
 	if (!error && !np)
 		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
 	if (!error && np)
@@ -3051,7 +3567,7 @@ nfsmout:
 	if (!error)
 		*ap->a_vpp = newvp;
 	if (!error && (gotuid || gotgid) &&
-	    (!newvp || nfs_getattrcache(np, &nvattr) ||
+	    (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
 	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
 	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
 		/* clear ID bits if server didn't use them (or we can't tell) */
@@ -3091,7 +3607,7 @@ nfs_vnop_remove(
 	struct componentname *cnp = ap->a_cnp;
 	nfsnode_t dnp = VTONFS(dvp);
 	nfsnode_t np = VTONFS(vp);
-	int error = 0, nfsvers, inuse, gotattr = 0, flushed = 0, setsize = 0;
+	int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
 	struct nfs_vattr nvattr;
 	struct nfsmount *nmp;
 	struct nfs_dulookup dul;
@@ -3102,6 +3618,7 @@ nfs_vnop_remove(
 	if (!nmp)
 		return (ENXIO);
 	nfsvers = nmp->nm_vers;
+	namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
 
 again_relock:
 	error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
@@ -3117,7 +3634,8 @@ again_relock:
 	np->n_hflag |= NHLOCKED;
 	lck_mtx_unlock(nfs_node_hash_mutex);
 
-	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+	if (!namedattrs)
+		nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
 again:
 	inuse = vnode_isinuse(vp, 0);
 	if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
@@ -3152,16 +3670,13 @@ again:
 				nfs_node_unlock(np);
 				return (error);
 			}
+			if (!namedattrs)
+				nfs_dulookup_finish(&dul, dnp, ctx);
 			goto again_relock;
 		}
 
-		if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
-			lck_mtx_lock(&np->n_openlock);
-			np->n_openflags &= ~N_DELEG_MASK;
-			lck_mtx_unlock(&np->n_openlock);
-			nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid,
-				vfs_context_thread(ctx), vfs_context_ucred(ctx));
-		}
+		if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))
+			nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
 
 		/*
 		 * Purge the name cache so that the chance of a lookup for
@@ -3170,7 +3685,8 @@ again:
 		 */
 		nfs_name_cache_purge(dnp, np, cnp, ctx);
 
-		nfs_dulookup_start(&dul, dnp, ctx);
+		if (!namedattrs)
+			nfs_dulookup_start(&dul, dnp, ctx);
 
 		/* Do the rpc */
 		error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
@@ -3213,7 +3729,8 @@ again:
 			nfs_node_unlock(np);
 		}
 	} else if (!np->n_sillyrename) {
-		nfs_dulookup_start(&dul, dnp, ctx);
+		if (!namedattrs)
+			nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_sillyrename(dnp, np, cnp, ctx);
 		nfs_node_lock_force(np);
 		NATTRINVALIDATE(np);
@@ -3222,12 +3739,14 @@ again:
 		nfs_node_lock_force(np);
 		NATTRINVALIDATE(np);
 		nfs_node_unlock(np);
-		nfs_dulookup_start(&dul, dnp, ctx);
+		if (!namedattrs)
+			nfs_dulookup_start(&dul, dnp, ctx);
 	}
 
 	/* nfs_getattr() will check changed and purge caches */
-	nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED);
-	nfs_dulookup_finish(&dul, dnp, ctx);
+	nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
+	if (!namedattrs)
+		nfs_dulookup_finish(&dul, dnp, ctx);
 out:
 	/* unlock the node */
 	lck_mtx_lock(nfs_node_hash_mutex);
@@ -3286,11 +3805,11 @@ nfs3_remove_rpc(
 	nfsm_chain_build_alloc_init(error, &nmreq,
 		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, name, namelen);
+	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 
-	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, 0, &nmrep, &xid, &status);
+	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, NULL, 0, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock(dnp)))
 		error = lockerror;
@@ -3398,11 +3917,7 @@ nfs_vnop_rename(
 			tvp = NULL;
 		}
 	} else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) {
-		lck_mtx_lock(&tnp->n_openlock);
-		tnp->n_openflags &= ~N_DELEG_MASK;
-		lck_mtx_unlock(&tnp->n_openlock);
-		nfs4_delegreturn_rpc(nmp, tnp->n_fhp, tnp->n_fhsize, &tnp->n_dstateid,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx));
+		nfs4_delegation_return(tnp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
 	}
 
 	error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
@@ -3417,7 +3932,7 @@ nfs_vnop_rename(
 	if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
 		nfs_node_lock_force(tnp);
 		tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
-		    (nfs_getattrcache(tnp, &nvattr) || (nvattr.nva_nlink == 1)));
+		    (nfs_getattrcache(tnp, &nvattr, 0) || (nvattr.nva_nlink == 1)));
 		nfs_node_unlock(tnp);
 		lck_mtx_lock(nfs_node_hash_mutex);
 		if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
@@ -3474,8 +3989,8 @@ nfs_vnop_rename(
 	}
 out:
 	/* nfs_getattr() will check changed and purge caches */
-	nfs_getattr(fdnp, &nvattr, ctx, NGA_CACHED);
-	nfs_getattr(tdnp, &nvattr, ctx, NGA_CACHED);
+	nfs_getattr(fdnp, NULL, ctx, NGA_CACHED);
+	nfs_getattr(tdnp, NULL, ctx, NGA_CACHED);
 	if (locked) {
 		/* unlock node */
 		lck_mtx_lock(nfs_node_hash_mutex);
@@ -3525,13 +4040,13 @@ nfs3_rename_rpc(
 		(NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
 		nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, fnameptr, fnamelen);
+	nfsm_chain_add_name(error, &nmreq, fnameptr, fnamelen, nmp);
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, tnameptr, tnamelen);
+	nfsm_chain_add_name(error, &nmreq, tnameptr, tnamelen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 
-	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, &nmrep, &xid, &status);
+	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, NULL, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock2(fdnp, tdnp)))
 		error = lockerror;
@@ -3617,11 +4132,10 @@ nfs3_vnop_link(
 		NFSX_FH(nfsvers)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx, NULL, &nmrep, &xid, &status);
 
 	if ((lockerror = nfs_node_lock2(tdnp, np))) {
 		error = lockerror;
@@ -3680,7 +4194,7 @@ nfs3_vnop_symlink(
 	vnode_t dvp = ap->a_dvp;
 	struct vnode_attr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
-	struct nfs_vattr nvattr, dnvattr;
+	struct nfs_vattr nvattr;
 	fhandle_t fh;
 	int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
 	struct timespec premtime = { 0, 0 };
@@ -3691,7 +4205,7 @@ nfs3_vnop_symlink(
 	nfsnode_t dnp = VTONFS(dvp);
 	struct nfsmount *nmp;
 	struct nfsm_chain nmreq, nmrep;
-	struct nfsreq *req = NULL;
+	struct nfsreq rq, *req = &rq;
 	struct nfs_dulookup dul;
 
 	nmp = VTONMP(dvp);
@@ -3704,6 +4218,8 @@ nfs3_vnop_symlink(
 	    ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN)))
 		return (ENAMETOOLONG);
 
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
 	VATTR_SET_SUPPORTED(vap, va_mode);
 	VATTR_SET_SUPPORTED(vap, va_uid);
 	VATTR_SET_SUPPORTED(vap, va_gid);
@@ -3723,17 +4239,17 @@ nfs3_vnop_symlink(
 		NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	if (nfsvers == NFS_VER3)
 		nfsm_chain_add_v3sattr(error, &nmreq, vap);
-	nfsm_chain_add_string(error, &nmreq, ap->a_target, slen);
+	nfsm_chain_add_name(error, &nmreq, ap->a_target, slen, nmp);
 	if (nfsvers == NFS_VER2)
 		nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
 	if (!error) {
 		nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
@@ -3767,11 +4283,11 @@ nfsmout:
 			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
 		nfs_node_unlock(dnp);
 		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+		nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
 	}
 
 	if (!error && fh.fh_len)
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
 	if (!error && np)
 		newvp = NFSTOV(np);
 
@@ -3797,7 +4313,7 @@ nfsmout:
 	if (!busyerror)
 		nfs_node_clear_busy(dnp);
 	if (!error && (gotuid || gotgid) &&
-	    (!newvp || nfs_getattrcache(np, &nvattr) ||
+	    (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
 	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
 	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
 		/* clear ID bits if server didn't use them (or we can't tell) */
@@ -3834,7 +4350,7 @@ nfs3_vnop_mkdir(
 	vnode_t dvp = ap->a_dvp;
 	struct vnode_attr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
-	struct nfs_vattr nvattr, dnvattr;
+	struct nfs_vattr nvattr;
 	nfsnode_t np = NULL;
 	struct nfsmount *nmp;
 	nfsnode_t dnp = VTONFS(dvp);
@@ -3845,7 +4361,7 @@ nfs3_vnop_mkdir(
 	u_int64_t xid, dxid;
 	fhandle_t fh;
 	struct nfsm_chain nmreq, nmrep;
-	struct nfsreq *req = NULL;
+	struct nfsreq rq, *req = &rq;
 	struct nfs_dulookup dul;
 
 	nmp = VTONMP(dvp);
@@ -3855,6 +4371,8 @@ nfs3_vnop_mkdir(
 	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
 		return (ENAMETOOLONG);
 
+	nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
 	VATTR_SET_SUPPORTED(vap, va_mode);
 	VATTR_SET_SUPPORTED(vap, va_uid);
 	VATTR_SET_SUPPORTED(vap, va_gid);
@@ -3874,7 +4392,7 @@ nfs3_vnop_mkdir(
 		NFSX_FH(nfsvers) + NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	if (nfsvers == NFS_VER3)
 		nfsm_chain_add_v3sattr(error, &nmreq, vap);
 	else
@@ -3883,7 +4401,7 @@ nfs3_vnop_mkdir(
 	nfsmout_if(error);
 
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
 	if (!error) {
 		nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
@@ -3914,11 +4432,11 @@ nfsmout:
 			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
 		nfs_node_unlock(dnp);
 		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+		nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
 	}
 
 	if (!error && fh.fh_len)
-		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
+		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
 	if (!error && np)
 		newvp = NFSTOV(np);
 
@@ -3944,7 +4462,7 @@ nfsmout:
 	if (!busyerror)
 		nfs_node_clear_busy(dnp);
 	if (!error && (gotuid || gotgid) &&
-	    (!newvp || nfs_getattrcache(np, &nvattr) ||
+	    (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
 	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
 	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
 		/* clear ID bits if server didn't use them (or we can't tell) */
@@ -3985,11 +4503,10 @@ nfs3_vnop_rmdir(
 	struct nfsmount *nmp;
 	nfsnode_t np = VTONFS(vp);
 	nfsnode_t dnp = VTONFS(dvp);
-	struct nfs_vattr dnvattr;
 	int nfsvers;
 	u_int64_t xid;
 	struct nfsm_chain nmreq, nmrep;
-	struct nfsreq *req = NULL;
+	struct nfsreq rq, *req = &rq;
 	struct nfs_dulookup dul;
 
 	nmp = VTONMP(vp);
@@ -4010,12 +4527,12 @@ nfs3_vnop_rmdir(
 	nfsm_chain_build_alloc_init(error, &nmreq,
 		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+	nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
 	if (!error) {
 		nfs_dulookup_start(&dul, dnp, ctx);
 		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
@@ -4039,7 +4556,7 @@ nfsmout:
 		nfs_node_unlock(dnp);
 		nfs_name_cache_purge(dnp, np, cnp, ctx);
 		/* nfs_getattr() will check changed and purge caches */
-		nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+		nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
 	}
 	nfs_dulookup_finish(&dul, dnp, ctx);
 	nfs_node_clear_busy2(dnp, np);
@@ -4106,7 +4623,6 @@ nfs_vnop_readdir(
 	struct nfsmount *nmp;
 	uio_t uio = ap->a_uio;
 	int error, nfsvers, extended, numdirent, bigcookies, ptc, done;
-	struct nfs_vattr nvattr;
 	uint16_t i, iptc, rlen, nlen;
 	uint64_t cookie, nextcookie, lbn = 0;
 	struct nfsbuf *bp = NULL;
@@ -4132,6 +4648,11 @@ nfs_vnop_readdir(
 	if (uio_resid(uio) == 0)
 		return (0);
 
+	if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+		/* trigger directories should never be read, return nothing */
+		return (0);
+	}
+
 	thd = vfs_context_thread(ctx);
 	numdirent = done = 0;
 	nextcookie = uio_offset(uio);
@@ -4164,7 +4685,7 @@ nfs_vnop_readdir(
 			nfs_node_unlock(dnp);
 		}
 		/* nfs_getattr() will check changed and purge caches */
-		if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_UNCACHED)))
+		if ((error = nfs_getattr(dnp, NULL, ctx, NGA_UNCACHED)))
 			goto out;
 	} else {
 		nfs_node_unlock(dnp);
@@ -4412,7 +4933,8 @@ int
 nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp)
 {
 	struct nfsdmap *ndcc = dnp->n_cookiecache;
-	int8_t i, eofptc, iptc, found;
+	int8_t eofptc, found;
+	int i, iptc;
 	struct nfsmount *nmp;
 	struct nfsbuf *bp, *lastbp;
 	struct nfsbuflists blist;
@@ -4586,7 +5108,7 @@ nfs_dir_buf_search(
 			nvattrp = NFS_DIR_BUF_NVATTR(bp, i);
 			if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) ||
 			    (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) {
-				/* entry is no longer valid */
+				/* entry is not valid */
 				error = ENOENT;
 				break;
 			}
@@ -4633,7 +5155,7 @@ nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cn
 {
 	nfsnode_t newnp;
 	struct nfsmount *nmp;
-	int error = 0, slpflag, slptimeo, i, found = 0, count = 0;
+	int error = 0, i, found = 0, count = 0;
 	u_int64_t xid;
 	struct nfs_vattr nvattr;
 	fhandle_t fh;
@@ -4646,8 +5168,6 @@ nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cn
 
 	if (!(nmp = NFSTONMP(dnp)))
 		return (ENXIO);
-	slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0;
-	slptimeo = 0;
 	if (!purge)
 		*npp = NULL;
 
@@ -4728,7 +5248,7 @@ done:
 
 	if (!error && found && !purge) {
 		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
-				&nvattr, &xid, NG_MAKEENTRY, &newnp);
+				&nvattr, &xid, dnp->n_auth, NG_MAKEENTRY, &newnp);
 		if (error)
 			return (error);
 		newnp->n_attrstamp = attrstamp;
@@ -4762,7 +5282,7 @@ nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs
 	struct nfsmount *nmp = NFSTONMP(dnp);
 
 	cache_purge(NFSTOV(np));
-	if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS))
+	if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS))
 		nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1);
 }
 
@@ -4794,7 +5314,7 @@ nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx)
 	nmrsize = nmp->nm_rsize;
 	bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES;
 noplus:
-	rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0;
+	rdirplus = ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) ? 1 : 0;
 
 	if ((lockerror = nfs_node_lock(dnp)))
 		return (lockerror);
@@ -4843,7 +5363,7 @@ noplus:
 
 		error = nfs_request(dnp, NULL, &nmreq,
 				rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR,
-				ctx, &nmrep, &xid, &status);
+				ctx, NULL, &nmrep, &xid, &status);
 
 		if ((lockerror = nfs_node_lock(dnp)))
 			error = lockerror;
@@ -4864,7 +5384,7 @@ noplus:
 		if (error == NFSERR_NOTSUPP) {
 			/* oops... it doesn't look like readdirplus is supported */
 			lck_mtx_lock(&nmp->nm_lock);
-			nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
+			NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_RDIRPLUS);
 			lck_mtx_unlock(&nmp->nm_lock);
 			goto noplus;
 		}
@@ -5107,6 +5627,10 @@ nfs_sillyrename(
 	/* now, do the rename */
 	error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
 					dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
+
+	/* Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */
+	if (error == ENOENT)
+		error = 0;
 	if (!error) {
 		nfs_node_lock_force(dnp);
 		if (dnp->n_flag & NNEGNCENTRIES) {
@@ -5154,11 +5678,11 @@ nfs3_lookup_rpc_async(
 	nfsm_chain_build_alloc_init(error, &nmreq,
 		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
-	nfsm_chain_add_string(error, &nmreq, name, namelen);
+	nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
-			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, reqp);
+			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, reqp);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
 	return (error);
@@ -5167,6 +5691,8 @@ nfsmout:
 int
 nfs3_lookup_rpc_async_finish(
 	nfsnode_t dnp,
+	__unused char *name,
+	__unused int namelen,
 	vfs_context_t ctx,
 	struct nfsreq *req,
 	u_int64_t *xidp,
@@ -5206,7 +5732,7 @@ nfs3_lookup_rpc_async_finish(
 		nfsm_chain_postop_attr_get(error, &nmrep, attrflag, nvap);
 		nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
 		if (!error && !attrflag)
-			error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, ctx, nvap, xidp);
+			error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, 0, ctx, nvap, xidp);
 	} else {
 		error = nfs_parsefattr(&nmrep, nfsvers, nvap);
 	}
@@ -5249,6 +5775,8 @@ nfs_lookitup(
 	    (namelen > (int)nmp->nm_fsattr.nfsa_maxname))
 		return (ENAMETOOLONG);
 
+	NVATTR_INIT(&nvattr);
+
 	/* check for lookup of "." */
 	if ((name[0] == '.') && (namelen == 1)) {
 		/* skip lookup, we know who we are */
@@ -5259,7 +5787,7 @@ nfs_lookitup(
 
 	error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
 	nfsmout_if(error);
-	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
+	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, name, namelen, ctx, req, &xid, &fh, &nvattr);
 	nfsmout_if(!npp || error);
 
 	if (*npp) {
@@ -5299,7 +5827,7 @@ nfs_lookitup(
 		cnp->cn_nameptr = name;
 		cnp->cn_namelen = namelen;
 		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
-			    &nvattr, &xid, NG_MAKEENTRY, &np);
+			    &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
 		nfsmout_if(error);
 		newnp = np;
 	}
@@ -5307,6 +5835,7 @@ nfs_lookitup(
 nfsmout:
 	if (npp && !*npp && !error)
 		*npp = newnp;
+	NVATTR_CLEANUP(&nvattr);
 	return (error);
 }
 
@@ -5319,11 +5848,14 @@ nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, in
 {
 	int error, du_namelen;
 	vnode_t du_vp;
+	struct nfsmount *nmp = NFSTONMP(dnp);
 
 	/* check for ._ file in name cache */
 	dulp->du_flags = 0;
 	bzero(&dulp->du_cn, sizeof(dulp->du_cn));
 	du_namelen = namelen + 2;
+	if (!nmp || NMFLAG(nmp, NONEGNAMECACHE))
+		return;
 	if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_'))
 		return;
 	if (du_namelen >= (int)sizeof(dulp->du_smallname))
@@ -5342,8 +5874,8 @@ nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, in
 	if (error == -1) {
 		vnode_put(du_vp);
 	} else if (!error) {
-		struct nfsmount *nmp = NFSTONMP(dnp);
-		if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) {
+		nmp = NFSTONMP(dnp);
+		if (nmp && (nmp->nm_vers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
 			/* if rdirplus, try dir buf cache lookup */
 			nfsnode_t du_np = NULL;
 			if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) {
@@ -5367,7 +5899,7 @@ nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
 	struct nfsmount *nmp = NFSTONMP(dnp);
 	struct nfsreq *req = &dulp->du_req;
 
-	if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT))
+	if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT) || (dulp->du_flags & NFS_DULOOKUP_INPROG))
 		return;
 	if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
 			dulp->du_cn.cn_namelen, ctx, &req))
@@ -5390,7 +5922,9 @@ nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
 	if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG))
 		goto out;
 
-	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, &dulp->du_req, &xid, &fh, &nvattr);
+	NVATTR_INIT(&nvattr);
+	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, dulp->du_cn.cn_nameptr,
+			dulp->du_cn.cn_namelen, ctx, &dulp->du_req, &xid, &fh, &nvattr);
 	dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
 	if (error == ENOENT) {
 		/* add a negative entry in the name cache */
@@ -5400,12 +5934,13 @@ nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
 		nfs_node_unlock(dnp);
 	} else if (!error) {
 		error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
-			    &nvattr, &xid, NG_MAKEENTRY, &du_np);
+			    &nvattr, &xid, dulp->du_req.r_auth, NG_MAKEENTRY, &du_np);
 		if (!error) {
 			nfs_node_unlock(du_np);
 			vnode_put(NFSTOV(du_np));
 		}
 	}
+	NVATTR_CLEANUP(&nvattr);
 out:
 	if (dulp->du_flags & NFS_DULOOKUP_INPROG)
 		nfs_request_async_cancel(&dulp->du_req);
@@ -5420,14 +5955,15 @@ out:
 int
 nfs3_commit_rpc(
 	nfsnode_t np,
-	u_int64_t offset,
-	u_int64_t count,
-	kauth_cred_t cred)
+	uint64_t offset,
+	uint64_t count,
+	kauth_cred_t cred,
+	uint64_t wverf)
 {
 	struct nfsmount *nmp;
 	int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
 	struct timespec premtime = { 0, 0 };
-	u_int64_t xid, wverf;
+	u_int64_t xid, newwverf;
 	uint32_t count32;
 	struct nfsm_chain nmreq, nmrep;
 
@@ -5454,7 +5990,7 @@ nfs3_commit_rpc(
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
 	error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
-			current_thread(), cred, 0, &nmrep, &xid, &status);
+			current_thread(), cred, NULL, 0, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	/* can we do anything useful with the wcc info? */
@@ -5463,13 +5999,13 @@ nfs3_commit_rpc(
 		nfs_node_unlock(np);
 	if (!error)
 		error = status;
-	nfsm_chain_get_64(error, &nmrep, wverf);
+	nfsm_chain_get_64(error, &nmrep, newwverf);
 	nfsmout_if(error);
 	lck_mtx_lock(&nmp->nm_lock);
-	if (nmp->nm_verf != wverf) {
-		nmp->nm_verf = wverf;
+	if (nmp->nm_verf != newwverf)
+		nmp->nm_verf = newwverf;
+	if (wverf != newwverf)
 		error = NFSERR_STALEWRITEVERF;
-	}
 	lck_mtx_unlock(&nmp->nm_lock);
 nfsmout:
 	nfsm_chain_cleanup(&nmreq);
@@ -5494,23 +6030,6 @@ nfs_vnop_blockmap(
 	return (ENOTSUP);
 }
 
-/*
- * Mmap a file
- *
- * NB Currently unsupported.
- */
-/*ARGSUSED*/
-int
-nfs_vnop_mmap(
-	__unused struct vnop_mmap_args /* {
-		struct vnodeop_desc *a_desc;
-		vnode_t a_vp;
-		int a_fflags;
-		vfs_context_t a_context;
-	} */ *ap)
-{
-	return (EINVAL);
-}
 
 /*
  * fsync vnode op. Just call nfs_flush().
@@ -5556,8 +6075,7 @@ nfs3_pathconf_rpc(
 	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
 	nfsm_chain_build_done(error, &nmreq);
 	nfsmout_if(error);
-	error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx,
-			&nmrep, &xid, &status);
+	error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx, NULL, &nmrep, &xid, &status);
 	if ((lockerror = nfs_node_lock(np)))
 		error = lockerror;
 	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
@@ -5653,6 +6171,12 @@ nfs_vnop_pathconf(
 			return (0);
 		}
 		break;
+	case _PC_XATTR_SIZE_BITS:
+		/* Do we support xattrs natively? */
+		if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR)
+			break;  /* Yes */
+		/* No... so just return an error */
+		/* FALLTHROUGH */
 	default:
 		/* don't bother contacting the server if we know the answer */
 		return (EINVAL);
@@ -5738,6 +6262,7 @@ nfs_vnop_pathconf(
 		else
 			error = EINVAL;
 		break;
+	case _PC_XATTR_SIZE_BITS: /* same as file size bits if named attrs supported */
 	case _PC_FILESIZEBITS:
 		if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
 			*ap->a_retval = 64;
@@ -6007,7 +6532,7 @@ nfsfifo_vnop_close(
 /*ARGSUSED*/
 int
 nfs_vnop_ioctl(
-	__unused struct vnop_ioctl_args /* {
+	struct vnop_ioctl_args /* {
 		struct vnodeop_desc *a_desc;
 		vnode_t a_vp;
 		u_int32_t a_command;
@@ -6016,12 +6541,23 @@ nfs_vnop_ioctl(
 		vfs_context_t a_context;
 	} */ *ap)
 {
+	vfs_context_t ctx = ap->a_context;
+	vnode_t vp = ap->a_vp;
+	int error = ENOTTY;
 
-	/*
-	 * XXX we were once bogusly enoictl() which returned this (ENOTTY).
-	 * Probably we should return ENODEV.
-	 */
-	return (ENOTTY);
+	switch (ap->a_command) {
+
+	case F_FULLFSYNC:
+		if (vnode_vfsisrdonly(vp))
+			return (EROFS);
+		if (!VTONMP(vp))
+			return (ENXIO);
+		error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0);
+		break;
+
+	}
+
+	return (error);
 }
 
 /*ARGSUSED*/
@@ -6135,6 +6671,10 @@ tryagain:
 	bzero(req, sizeof(req));
 	nextsend = nextwait = 0;
 	do {
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+			break;
+		}
 		/* send requests while we need to and have available slots */
 		while ((txsize > 0) && (req[nextsend] == NULL)) {
 			iosize = MIN(nmrsize, txsize);
@@ -6161,14 +6701,11 @@ tryagain:
 			nextwait = (nextwait + 1) % MAXPAGINGREQS;
 			if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
 				lck_mtx_lock(&nmp->nm_lock);
-				if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-					printf("nfs_vnop_pagein: error %d, initiating recovery\n", error);
-					nmp->nm_state |= NFSSTA_RECOVER;
-					nfs_mount_sock_thread_wake(nmp);
+				if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+					NP(np, "nfs_vnop_pagein: error %d, initiating recovery", error);
+					nfs_need_recover(nmp, error);
 				}
 				lck_mtx_unlock(&nmp->nm_lock);
-				if (error == NFSERR_GRACE)
-					tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
 				restart++;
 				goto cancel;
 			}
@@ -6200,11 +6737,17 @@ cancel:
 			req[nextwait] = NULL;
 			nextwait = (nextwait + 1) % MAXPAGINGREQS;
 		}
-		if (restart) {
-			if ((restart <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */
-			    (!(error = nfs_mount_state_wait_for_recovery(nmp))))
-				goto tryagain;
-			printf("nfs_pagein: too many restarts, aborting.\n");
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+		} else if (restart) {
+			if (restart <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
+				if (error == NFSERR_GRACE)
+					tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+				if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+					goto tryagain;
+			} else {
+				NP(np, "nfs_pagein: too many restarts, aborting");
+			}
 		}
 	}
 
@@ -6579,6 +7122,10 @@ tryagain:
 	bzero(req, sizeof(req));
 	nextsend = nextwait = 0;
 	do {
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+			break;
+		}
 		/* send requests while we need to and have available slots */
 		while ((txsize > 0) && (req[nextsend] == NULL)) {
 			iosize = MIN(nmwsize, txsize);
@@ -6616,14 +7163,11 @@ tryagain:
 			nfs_node_unlock(np);
 			if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
 				lck_mtx_lock(&nmp->nm_lock);
-				if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-					printf("nfs_vnop_pageout: error %d, initiating recovery\n", error);
-					nmp->nm_state |= NFSSTA_RECOVER;
-					nfs_mount_sock_thread_wake(nmp);
+				if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+					NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
+					nfs_need_recover(nmp, error);
 				}
 				lck_mtx_unlock(&nmp->nm_lock);
-				if (error == NFSERR_GRACE)
-					tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
 				restart = 1;
 				goto cancel;
 			}
@@ -6654,16 +7198,13 @@ tryagain:
 				iomode = NFS_WRITE_UNSTABLE;
 				error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2);
 				if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) {
-					printf("nfs_vnop_pageout: restart: error %d\n", error);
+					NP(np, "nfs_vnop_pageout: restart: error %d", error);
 					lck_mtx_lock(&nmp->nm_lock);
-					if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
-						printf("nfs_vnop_pageout: error %d, initiating recovery\n", error);
-						nmp->nm_state |= NFSSTA_RECOVER;
-						nfs_mount_sock_thread_wake(nmp);
+					if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+						NP(np, "nfs_vnop_pageout: error %d, initiating recovery", error);
+						nfs_need_recover(nmp, error);
 					}
 					lck_mtx_unlock(&nmp->nm_lock);
-					if (error == NFSERR_GRACE)
-						tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
 					restart = 1;
 					goto cancel;
 				}
@@ -6690,7 +7231,7 @@ tryagain:
 	vrestart = 0;
 
 	if (!error && (commit != NFS_WRITE_FILESYNC)) {
-		error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred);
+		error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred, wverf);
 		if (error == NFSERR_STALEWRITEVERF) {
 			vrestart = 1;
 			error = EIO;
@@ -6709,18 +7250,26 @@ cancel:
 			np->n_numoutput--;
 			nfs_node_unlock(np);
 		}
-		if (vrestart) {
-			if (++vrestarts <= 100) /* guard against no progress */
-				goto tryagain;
-			printf("nfs_pageout: too many restarts, aborting.\n");
-			FSDBG(323, f_offset, xsize, ERESTART, -1);
-		}
-		if (restart) {
-			if ((restarts <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */
-			    (!(error = nfs_mount_state_wait_for_recovery(nmp))))
-				goto tryagain;
-			printf("nfs_pageout: too many restarts, aborting.\n");
-			FSDBG(323, f_offset, xsize, ERESTART, -1);
+		if (np->n_flag & NREVOKE) {
+			error = EIO;
+		} else {
+			if (vrestart) {
+				if (++vrestarts <= 100) /* guard against no progress */
+					goto tryagain;
+				NP(np, "nfs_pageout: too many restarts, aborting");
+				FSDBG(323, f_offset, xsize, ERESTART, -1);
+			}
+			if (restart) {
+				if (restarts <= nfs_mount_state_max_restarts(nmp)) { /* guard against no progress */
+					if (error == NFSERR_GRACE)
+						tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+					if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+						goto tryagain;
+				} else {
+					NP(np, "nfs_pageout: too many restarts, aborting");
+					FSDBG(323, f_offset, xsize, ERESTART, -1);
+				}
+			}
 		}
 	}
 
@@ -6762,7 +7311,7 @@ cancel:
 					abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
 					if (error <= NFS_ELAST) {
 						if ((errorcount[error] % 100) == 0)
-							printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
+							NP(np, "nfs_pageout: unexpected error %d. dumping vm page", error);
 						errorcount[error]++;
 					}
 					break;
@@ -6776,7 +7325,7 @@ cancel:
 					break;
 				case SEVER: /* not implemented */
 				default:
-					printf("nfs_pageout: action %d not expected\n", action);
+					NP(np, "nfs_pageout: action %d not expected", action);
 					break;
 			}
 
@@ -6837,3 +7386,84 @@ nfs_vnop_offtoblk(
 	return (0);
 }
 
+/*
+ * vnode change monitoring
+ */
+int
+nfs_vnop_monitor(
+	struct vnop_monitor_args /* {
+		struct vnodeop_desc *a_desc;
+		vnode_t a_vp;
+		uint32_t a_events;
+		uint32_t a_flags;
+		void *a_handle;
+		vfs_context_t a_context;
+	} */ *ap)
+{
+	nfsnode_t np = VTONFS(ap->a_vp);
+	struct nfsmount *nmp = VTONMP(ap->a_vp);
+	int error = 0;
+
+	if (!nmp)
+		return (ENXIO);
+
+	/* make sure that the vnode's monitoring status is up to date */
+	lck_mtx_lock(&nmp->nm_lock);
+	if (vnode_ismonitored(ap->a_vp)) {
+		/* This vnode is currently being monitored, make sure we're tracking it. */
+		if (np->n_monlink.le_next == NFSNOLIST) {
+			LIST_INSERT_HEAD(&nmp->nm_monlist, np, n_monlink);
+			nfs_mount_sock_thread_wake(nmp);
+		}
+	} else {
+		/* This vnode is no longer being monitored, make sure we're not tracking it. */
+		/* Wait for any in-progress getattr to complete first. */
+		while (np->n_mflag & NMMONSCANINPROG) {
+			struct timespec ts = { 1, 0 };
+			np->n_mflag |= NMMONSCANWANT;
+			msleep(&np->n_mflag, &nmp->nm_lock, PZERO-1, "nfswaitmonscan", &ts);
+		}
+		if (np->n_monlink.le_next != NFSNOLIST) {
+			LIST_REMOVE(np, n_monlink);
+			np->n_monlink.le_next = NFSNOLIST;
+		}
+	}
+	lck_mtx_unlock(&nmp->nm_lock);
+
+	return (error);
+}
+
+/*
+ * Send a vnode notification for the given events.
+ */
+void
+nfs_vnode_notify(nfsnode_t np, uint32_t events)
+{
+	struct nfsmount *nmp = NFSTONMP(np);
+	struct nfs_vattr nvattr;
+	struct vnode_attr vattr, *vap = NULL;
+	struct timeval now;
+
+	microuptime(&now);
+	if ((np->n_evtstamp == now.tv_sec) || !nmp) {
+		/* delay sending this notify */
+		np->n_events |= events;
+		return;
+	}
+	events |= np->n_events;
+	np->n_events = 0;
+	np->n_evtstamp = now.tv_sec;
+
+	vfs_get_notify_attributes(&vattr);
+	if (!nfs_getattrcache(np, &nvattr, 0)) {
+		vap = &vattr;
+		VATTR_INIT(vap);
+		VATTR_RETURN(vap, va_fsid, vfs_statfs(nmp->nm_mountp)->f_fsid.val[0]);
+		VATTR_RETURN(vap, va_fileid, nvattr.nva_fileid);
+		VATTR_RETURN(vap, va_mode, nvattr.nva_mode);
+		VATTR_RETURN(vap, va_uid, nvattr.nva_uid);
+		VATTR_RETURN(vap, va_gid, nvattr.nva_gid);
+		VATTR_RETURN(vap, va_nlink, nvattr.nva_nlink);
+	}
+	vnode_notify(NFSTOV(np), events, vap);
+}
diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h
index 910636f85..434d4f57a 100644
--- a/bsd/nfs/nfsm_subs.h
+++ b/bsd/nfs/nfsm_subs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,8 +73,8 @@
 
 #ifdef __APPLE_API_PRIVATE
 
-int nfsm_rpchead(struct nfsreq *, int, mbuf_t, u_int64_t *, mbuf_t *);
-int nfsm_rpchead2(int, int, int, int, int, int, kauth_cred_t, struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
+int nfsm_rpchead(struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
+int nfsm_rpchead2(struct nfsmount *, int, int, int, int, int, kauth_cred_t, struct nfsreq *, mbuf_t, u_int64_t *, mbuf_t *);
 
 int nfsm_chain_new_mbuf(struct nfsm_chain *, size_t);
 int nfsm_chain_add_opaque_f(struct nfsm_chain *, const u_char *, uint32_t);
@@ -83,6 +83,7 @@ int nfsm_chain_add_uio(struct nfsm_chain *, uio_t, uint32_t);
 int nfsm_chain_add_fattr4_f(struct nfsm_chain *, struct vnode_attr *, struct nfsmount *);
 int nfsm_chain_add_v2sattr_f(struct nfsm_chain *, struct vnode_attr *, uint32_t);
 int nfsm_chain_add_v3sattr_f(struct nfsm_chain *, struct vnode_attr *);
+int nfsm_chain_add_string_nfc(struct nfsm_chain *, const uint8_t *, uint32_t);
 
 int nfsm_chain_advance(struct nfsm_chain *, uint32_t);
 int nfsm_chain_offset(struct nfsm_chain *);
@@ -93,6 +94,7 @@ int nfsm_chain_get_uio(struct nfsm_chain *, uint32_t, uio_t);
 int nfsm_chain_get_fh_attr(struct nfsm_chain *, nfsnode_t,
 	vfs_context_t, int, uint64_t *, fhandle_t *, struct nfs_vattr *);
 int nfsm_chain_get_wcc_data_f(struct nfsm_chain *, nfsnode_t, struct timespec *, int *, u_int64_t *);
+int nfsm_chain_get_secinfo(struct nfsm_chain *, uint32_t *, int *);
 
 #if NFSSERVER
 void nfsm_adj(mbuf_t, int, int);
@@ -339,6 +341,16 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 		nfsm_chain_add_opaque((E), (NMC), (STR), (LEN)); \
 	} while (0)
 
+/* add a name to an mbuf chain */
+#define nfsm_chain_add_name(E, NMC, STR, LEN, NMP) \
+	do { \
+		if (E) break; \
+		if (NMFLAG((NMP), NFC)) \
+			(E) = nfsm_chain_add_string_nfc((NMC), (const uint8_t*)(STR), (LEN)); \
+		else \
+			nfsm_chain_add_string((E), (NMC), (STR), (LEN)); \
+	} while (0)
+
 /* add an NFSv2 time to an mbuf chain */
 #define nfsm_chain_add_v2time(E, NMC, TVP) \
 	do { \
@@ -454,6 +466,36 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 			nfsm_chain_add_32((E), (NMC), ((B)[__i] & (MASK)[__i])); \
 	} while (0)
 
+/* add NFSv4 attr bitmap masked with the supported attributes for this mount/node */
+#define nfsm_chain_add_bitmap_supported(E, NMC, B, NMP, NP) \
+	do { \
+		uint32_t __bitmap[NFS_ATTR_BITMAP_LEN], *__bmp = (B); \
+		int __nonamedattr = 0, __noacl = 0, __nomode = 0; \
+		if (!((NMP)->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR) || \
+		    ((NP) && (((nfsnode_t)(NP))->n_flag & (NISDOTZFS|NISDOTZFSCHILD)))) \
+			__nonamedattr = 1; \
+		if (!((NMP)->nm_fsattr.nfsa_flags & NFS_FSFLAG_ACL)) \
+			__noacl = 1; \
+		if (NMFLAG((NMP), ACLONLY)) \
+			__nomode = 1; \
+		if (__nonamedattr || __noacl || __nomode) { \
+			/* don't ask for attrs we're not supporting */ \
+			/* some ".zfs" directories can't handle being asked for some attributes */ \
+			int __ii; \
+			NFS_CLEAR_ATTRIBUTES(__bitmap); \
+			for (__ii=0; __ii < NFS_ATTR_BITMAP_LEN; __ii++) \
+				__bitmap[__ii] = (B)[__ii]; \
+			if (__nonamedattr) \
+				NFS_BITMAP_CLR(__bitmap, NFS_FATTR_NAMED_ATTR); \
+			if (__noacl) \
+				NFS_BITMAP_CLR(__bitmap, NFS_FATTR_ACL); \
+			if (__nomode) \
+				NFS_BITMAP_CLR(__bitmap, NFS_FATTR_MODE); \
+			__bmp = __bitmap; \
+		} \
+		nfsm_chain_add_bitmap_masked((E), (NMC), __bmp, NFS_ATTR_BITMAP_LEN, (NMP)->nm_fsattr.nfsa_supp_attr); \
+	} while (0)
+
 /* Add an NFSv4 "stateid" structure to an mbuf chain */
 #define nfsm_chain_add_stateid(E, NMC, SID) \
 	do { \
@@ -642,19 +684,18 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 	} while (0)
 
 /* update a node's attribute cache with attributes from an mbuf chain */
-#define nfsm_chain_loadattr(E, NMC, NP, VERS, A, X) \
+#define nfsm_chain_loadattr(E, NMC, NP, VERS, X) \
 	do { \
-		struct nfs_vattr ttvattr, *ttnvap; \
+		struct nfs_vattr ttvattr; \
 		if (E) break; \
-		ttnvap = (A) ? (A) : &ttvattr; \
 		if ((VERS) == NFS_VER4) { \
-			NFS_CLEAR_ATTRIBUTES(ttnvap->nva_bitmap); \
-			(E) = nfs4_parsefattr((NMC), NULL, ttnvap, NULL, NULL); \
+			(E) = nfs4_parsefattr((NMC), NULL, &ttvattr, NULL, NULL, NULL); \
 		} else { \
-			(E) = nfs_parsefattr((NMC), (VERS), ttnvap); \
+			(E) = nfs_parsefattr((NMC), (VERS), &ttvattr); \
 		} \
-		if (E) break; \
-		(E) = nfs_loadattrcache((NP), ttnvap, (X), 0); \
+		if (!(E) && (NP)) \
+			(E) = nfs_loadattrcache((NP), &ttvattr, (X), 0); \
+		NVATTR_CLEANUP(&ttvattr); \
 	} while (0)
 
 /* get NFSv4 attr bitmap */
@@ -693,7 +734,8 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 	do { \
 		uint32_t __val = 0; \
 		nfsm_chain_get_32((E), (NMC), __val); \
-		nfsm_assert((E), (__val == (OP)), EBADRPC); \
+		/* [sigh] some implementations return the "illegal" op for unsupported ops */ \
+		nfsm_assert((E), ((__val == (OP)) || (__val == NFS_OP_ILLEGAL)), EBADRPC); \
 		nfsm_chain_get_32((E), (NMC), __val); \
 		nfsm_assert((E), (__val == NFS_OK), __val); \
 	} while (0)
@@ -705,7 +747,7 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *);
 		nfsm_chain_get_32((E), (NMC), __ci_atomic); \
 		nfsm_chain_get_64((E), (NMC), __ci_before); \
 		nfsm_chain_get_64((E), (NMC), __ci_after); \
-		if (E) break; \
+		if ((E) || !(DNP)) break; \
 		if (__ci_atomic && (__ci_before == (DNP)->n_ncchange)) { \
 			(DNP)->n_ncchange = __ci_after; \
 		} else { \
diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h
index 742c166c5..97f955e2f 100644
--- a/bsd/nfs/nfsmount.h
+++ b/bsd/nfs/nfsmount.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -105,29 +105,130 @@ struct nfs_fsattr {
 #define NFS_FSFLAG_CHOWN_RESTRICTED	0x00000080
 #define NFS_FSFLAG_HOMOGENEOUS		0x00000100
 #define NFS_FSFLAG_NO_TRUNC		0x00000200
+#define NFS_FSFLAG_NAMED_ATTR		0x00000400
 #define NFS_FSFLAG_FHTYPE_MASK		0xFF000000
 #define NFS_FSFLAG_FHTYPE_SHIFT		24
 
+/*
+ * NFS file system location structures
+ */
+struct nfs_fs_server {
+	char *			ns_name;		/* name of server */
+	char **			ns_addresses;		/* array of addresses for server */
+	uint32_t		ns_addrcount;		/* # of addresses */
+};
+struct nfs_fs_path {
+	char **			np_components;		/* array of component pointers */
+	uint32_t		np_compcount;		/* # components in path */
+};
+struct nfs_fs_location {
+	struct nfs_fs_server **	nl_servers;		/* array of server pointers */
+	struct nfs_fs_path	nl_path;		/* file system path */
+	uint32_t		nl_servcount;		/* # of servers */
+};
+
+struct nfs_location_index {
+	uint8_t 		nli_flags;		/* misc flags */
+	uint8_t 		nli_loc;		/* location index */
+	uint8_t 		nli_serv;		/* server index */
+	uint8_t 		nli_addr;		/* address index */
+};
+#define NLI_VALID	0x01	/* index is valid */
+
+struct nfs_fs_locations {
+	struct nfs_fs_path	nl_root;		/* current server's root file system path */
+	uint32_t		nl_numlocs;		/* # of locations */
+	struct nfs_location_index nl_current;		/* index of current location/server/address */
+	struct nfs_fs_location **nl_locations;		/* array of fs locations */
+};
+
+/*
+ * RPC record marker parsing state
+ */
+struct nfs_rpc_record_state {
+	mbuf_t			nrrs_m;			/* mbufs for current record */
+	mbuf_t			nrrs_mlast;
+	uint16_t		nrrs_lastfrag;		/* last fragment of record */
+	uint16_t		nrrs_markerleft;	/* marker bytes remaining */
+	uint32_t		nrrs_fragleft;		/* fragment bytes remaining */
+	uint32_t		nrrs_reclen;		/* length of RPC record */
+};
+
+/*
+ * NFS socket structures
+ */
+struct nfs_socket {
+	lck_mtx_t		nso_lock;		/* nfs socket lock */
+	TAILQ_ENTRY(nfs_socket)	nso_link;		/* list of sockets */
+	struct sockaddr *	nso_saddr;		/* socket address */
+	struct sockaddr *	nso_saddr2;		/* additional socket address */
+	void *			nso_wake;		/* address to wake up */
+	time_t			nso_timestamp;
+	time_t			nso_reqtimestamp;	/* last request sent */
+	socket_t		nso_so;			/* socket */
+	uint8_t			nso_sotype;		/* Type of socket */
+	uint16_t		nso_flags;		/* NSO_* flags */
+	struct nfs_location_index nso_location;		/* location index */
+	uint32_t		nso_protocol;		/* RPC protocol */
+	uint32_t		nso_version;		/* RPC protocol version */
+	uint32_t		nso_pingxid;		/* RPC XID of NULL ping request */
+	int			nso_error;		/* saved error/status */
+	struct nfs_rpc_record_state nso_rrs;		/* RPC record parsing state (TCP) */
+};
+TAILQ_HEAD(nfssocketlist, nfs_socket);
+/* nso_flags */
+#define NSO_UPCALL		0x0001			/* socket upcall in progress */
+#define NSO_DEAD		0x0002			/* socket is dead */
+#define NSO_CONNECTING		0x0004			/* socket is being connected */
+#define NSO_CONNECTED		0x0008			/* socket connection complete */
+#define NSO_PINGING		0x0010			/* socket is being tested */
+#define NSO_VERIFIED		0x0020			/* socket appears functional */
+#define NSO_DISCONNECTING	0x0040			/* socket is being disconnected */
+
+/* NFS connect socket search state */
+struct nfs_socket_search {
+	struct nfs_location_index nss_startloc;		/* starting location index */
+	struct nfs_location_index nss_nextloc;		/* next location index */
+	struct nfssocketlist	nss_socklist;		/* list of active sockets */
+	time_t			nss_timestamp;		/* search start time */
+	time_t			nss_last;		/* timestamp of last socket */
+	struct nfs_socket *	nss_sock;		/* found socket */
+	uint8_t			nss_sotype;		/* TCP/UDP */
+	uint8_t			nss_sockcnt;		/* # of active sockets */
+	in_port_t		nss_port;		/* port # to connect to */
+	uint32_t		nss_protocol;		/* RPC protocol */
+	uint32_t		nss_version;		/* RPC protocol version */
+	uint32_t		nss_flags;		/* (see below) */
+	int			nss_timeo;		/* how long we are willing to wait */
+	int			nss_error;		/* best error we've gotten so far */
+};
+/* nss_flags */
+#define NSS_VERBOSE		0x00000001		/* OK to log info about socket search */
+#define NSS_WARNED		0x00000002		/* logged warning about socket search taking a while */
+
 /*
  * function table for calling version-specific NFS functions
  */
 struct nfs_funcs {
-	int	(*nf_mount)(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *);
+	int	(*nf_mount)(struct nfsmount *, vfs_context_t, nfsnode_t *);
 	int	(*nf_update_statfs)(struct nfsmount *, vfs_context_t);
 	int	(*nf_getquota)(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *);
 	int	(*nf_access_rpc)(nfsnode_t, u_int32_t *, vfs_context_t);
-	int	(*nf_getattr_rpc)(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *);
+	int	(*nf_getattr_rpc)(nfsnode_t, mount_t, u_char *, size_t, int, vfs_context_t, struct nfs_vattr *, u_int64_t *);
 	int	(*nf_setattr_rpc)(nfsnode_t, struct vnode_attr *, vfs_context_t);
 	int	(*nf_read_rpc_async)(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **);
 	int	(*nf_read_rpc_async_finish)(nfsnode_t, struct nfsreq *, uio_t, size_t *, int *);
 	int	(*nf_readlink_rpc)(nfsnode_t, char *, uint32_t *, vfs_context_t);
 	int	(*nf_write_rpc_async)(nfsnode_t, uio_t, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **);
 	int	(*nf_write_rpc_async_finish)(nfsnode_t, struct nfsreq *, int *, size_t *, uint64_t *);
-	int	(*nf_commit_rpc)(nfsnode_t, uint64_t, uint64_t, kauth_cred_t);
+	int	(*nf_commit_rpc)(nfsnode_t, uint64_t, uint64_t, kauth_cred_t, uint64_t);
 	int	(*nf_lookup_rpc_async)(nfsnode_t, char *, int, vfs_context_t, struct nfsreq **);
-	int	(*nf_lookup_rpc_async_finish)(nfsnode_t, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
+	int	(*nf_lookup_rpc_async_finish)(nfsnode_t, char *, int, vfs_context_t, struct nfsreq *, u_int64_t *, fhandle_t *, struct nfs_vattr *);
 	int	(*nf_remove_rpc)(nfsnode_t, char *, int, thread_t, kauth_cred_t);
 	int	(*nf_rename_rpc)(nfsnode_t, char *, int, nfsnode_t, char *, int, vfs_context_t);
+	int	(*nf_setlock_rpc)(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, int, thread_t, kauth_cred_t);
+	int	(*nf_unlock_rpc)(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, thread_t, kauth_cred_t);
+	int	(*nf_getlock_rpc)(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t);
 };
 
 /*
@@ -148,12 +249,18 @@ __private_extern__ struct nfsclientidlist nfsclientids;
  */
 struct nfsmount {
 	lck_mtx_t nm_lock;		/* nfs mount lock */
-	int	nm_flag;		/* Flags for soft/hard... */
+	char *	nm_args;		/* NFS mount args (XDR) */
+	uint32_t nm_mattrs[NFS_MATTR_BITMAP_LEN]; /* mount attributes in mount args */
+	uint32_t nm_mflags_mask[NFS_MFLAG_BITMAP_LEN]; /* mount flags mask in mount args */
+	uint32_t nm_mflags[NFS_MFLAG_BITMAP_LEN]; /* mount flags in mount args */
+	uint32_t nm_flags[NFS_MFLAG_BITMAP_LEN]; /* current mount flags (soft, intr, etc...) */
 	int	nm_state;		/* Internal state flags */
 	int	nm_vers;		/* NFS version */
 	struct nfs_funcs *nm_funcs;	/* version-specific functions */
+	kauth_cred_t nm_mcred;		/* credential used for the mount (v4) */
 	mount_t	nm_mountp;		/* VFS structure for this filesystem */
 	nfsnode_t nm_dnp;		/* root directory nfsnode pointer */
+	struct nfs_fs_locations nm_locations; /* file system locations */
 	int	nm_numgrps;		/* Max. size of groupslist */
 	TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */
 	int	nm_timeo;		/* Init timer for NFSMNT_DUMBTIMR */
@@ -162,36 +269,48 @@ struct nfsmount {
 	uint32_t nm_wsize;		/* Max size of write rpc */
 	uint32_t nm_biosize;		/* buffer I/O size */
 	uint32_t nm_readdirsize;	/* Size of a readdir rpc */
-	int	nm_readahead;		/* Num. of blocks to readahead */
-	int	nm_acregmin;		/* reg file min attr cache timeout */
-	int	nm_acregmax;		/* reg file max attr cache timeout */
-	int	nm_acdirmin;		/* dir min attr cache timeout */
-	int	nm_acdirmax;		/* dir max attr cache timeout */
-	uint32_t nm_auth;		/* security mechanism flavor */
+	uint32_t nm_readahead;		/* Num. of blocks to readahead */
+	uint32_t nm_acregmin;		/* reg file min attr cache timeout */
+	uint32_t nm_acregmax;		/* reg file max attr cache timeout */
+	uint32_t nm_acdirmin;		/* dir min attr cache timeout */
+	uint32_t nm_acdirmax;		/* dir max attr cache timeout */
+	uint32_t nm_auth;		/* security mechanism flavor being used */
+	struct nfs_sec nm_sec;		/* acceptable security mechanism flavors */
+	struct nfs_sec nm_servsec;	/* server's acceptable security mechanism flavors */
+	fhandle_t *nm_fh;		/* initial file handle */
+	uint8_t  nm_lockmode;		/* advisory file locking mode */
 	/* mount info */
 	uint32_t nm_fsattrstamp;	/* timestamp for fs attrs */
 	struct nfs_fsattr nm_fsattr;	/* file system attributes */
 	uint64_t nm_verf;		/* v3/v4 write verifier */
 	union {
 	    struct {			/* v2/v3 specific fields */
-		u_short rqport;		/* cached rquota port */
-		uint32_t rqportstamp;	/* timestamp of rquota port */
+		TAILQ_ENTRY(nfsmount) ldlink; /* chain of mounts registered for lockd use */
+		int udp_sent;		/* UDP request send count */
+		int udp_cwnd;		/* UDP request congestion window */
+		struct nfs_reqqhead udp_cwndq; /* requests waiting on cwnd */
+		struct sockaddr *rqsaddr;/* cached rquota socket address */
+		uint32_t rqsaddrstamp;	/* timestamp of rquota socket address */
 	    } v3;
 	    struct {			/* v4 specific fields */
 		struct nfs_client_id *longid; /* client ID, long form */
 		uint64_t mounttime;	/* used as client ID verifier */
 		uint64_t clientid;	/* client ID, short form */
 		thread_call_t renew_timer; /* RENEW timer call */
-		TAILQ_HEAD(, nfs_open_owner) open_owners; /* list of open owners */
-		TAILQ_HEAD(, nfsnode) recallq; /* list of nodes with recalled delegations */
+		nfs_fsid fsid;		/* NFS file system id */
+		TAILQ_HEAD(, nfsnode) delegations; /* list of nodes with delegations */
+		TAILQ_HEAD(, nfsnode) dreturnq; /* list of nodes with delegations to return */
 		TAILQ_ENTRY(nfsmount) cblink; /* chain of mounts registered for callbacks */
-		uint32_t stateinuse;	/* state in use counter */
-		uint32_t stategenid;	/* state generation counter */
-		kauth_cred_t mcred;	/* credential used for the mount */
 		uint32_t cbid;		/* callback channel identifier */
 		uint32_t cbrefs;	/* # callbacks using this mount */
 	    } v4;
 	} nm_un;
+	/* common state */
+	TAILQ_HEAD(, nfs_open_owner) nm_open_owners; /* list of open owners */
+	uint32_t nm_stateinuse;		/* state in use counter */
+	uint32_t nm_stategenid;		/* state generation counter */
+	time_t	nm_recover_start;	/* recover start time */
+ 	LIST_HEAD(, nfsnode) nm_monlist; /* list of nodes being monitored */
 	/* async I/O queue */
 	struct nfs_reqqhead nm_resendq;	/* async I/O resend queue */
 	struct nfs_reqqhead nm_iodq;	/* async I/O request queue */
@@ -199,11 +318,14 @@ struct nfsmount {
 	TAILQ_ENTRY(nfsmount) nm_iodlink; /* chain of mounts awaiting nfsiod */
 	int	nm_asyncwrites;		/* outstanding async I/O writes */
 	/* socket state */
-	int	nm_sotype;		/* Type of socket */
-	int	nm_soproto;		/* and protocol */
-	mbuf_t	nm_nam;			/* Address of server */
+	uint8_t	nm_sofamily;		/* (preferred) protocol family of socket */
+	uint8_t	nm_sotype;		/* (preferred) type of socket */
+	in_port_t	nm_nfsport;	/* NFS protocol port */
+	in_port_t	nm_mountport;	/* MOUNT protocol port (v2/v3) */
+	struct nfs_socket_search *nm_nss; /* current socket search structure */
+	struct nfs_socket *nm_nso;	/* current socket */
+	struct sockaddr	*nm_saddr;	/* Address of server */
 	u_short nm_sockflags;		/* socket state flags */
-	socket_t nm_so;			/* RPC socket */
 	time_t	nm_deadto_start;	/* dead timeout start time */
 	time_t	nm_reconnect_start;	/* reconnect start time */
 	int	nm_tprintf_initial_delay;	/* delay first "server down" */
@@ -213,27 +335,26 @@ struct nfsmount {
 	int	nm_sdrtt[4];
 	int	nm_timeouts;		/* Request timeouts */
 	int	nm_jbreqs;		/* # R_JBTPRINTFMSG requests */
-	union {
-		struct {
-			int sent;	/* Request send count */
-			int cwnd;	/* Request congestion window */
-			struct nfs_reqqhead cwndq; /* requests waiting on cwnd */
-		} udp;
-		struct {
-			u_int32_t mleft;/* marker bytes remaining */
-			u_int32_t fleft;/* fragment bytes remaining */
-			u_int32_t len;	/* length of RPC record */
-			mbuf_t m;	/* mbufs for current record */
-			mbuf_t mlast;
-		} tcp;
-	} nm_sockstate;
+	int	nm_mounterror;		/* status of mount connect */
 	TAILQ_ENTRY(nfsmount) nm_pokeq;	/* mount poke queue chain */
 	thread_t nm_sockthd;		/* socket thread for this mount */
 };
 
+/* macro for checking current mount flags */
+#define NMFLAG(NMP, F)		NFS_BITMAP_ISSET((NMP)->nm_flags, NFS_MFLAG_ ## F)
+/* macros for checking (original) mount attributes/flags */
+#define NM_OMATTR_GIVEN(NMP, F)	NFS_BITMAP_ISSET((NMP)->nm_mattrs, NFS_MATTR_ ## F)
+#define NM_OMFLAG_GIVEN(NMP, F)	NFS_BITMAP_ISSET((NMP)->nm_mflags_mask, NFS_MFLAG_ ## F)
+#define NM_OMFLAG(NMP, F)	NFS_BITMAP_ISSET((NMP)->nm_mflags, NFS_MFLAG_ ## F)
+
 /*
  * NFS mount state flags (nm_state)
  */
+#define NFSSTA_MOUNT_THREAD	0x00000040  /* nfs_mount_connect_thread running */
+#define NFSSTA_MONITOR_SCAN	0x00000080  /* scan of monitored nodes in progress */
+#define NFSSTA_UNMOUNTING	0x00000100  /* an unmount attempt is in progress */
+#define NFSSTA_NEEDSECINFO	0x00000200  /* need to fetch security info */
+#define NFSSTA_CLIENTID		0x00000400  /* short client ID is valid */
 #define NFSSTA_BIGCOOKIES	0x00000800  /* have seen >32bit dir cookies */
 #define NFSSTA_JUKEBOXTIMEO	0x00001000  /* experienced a jukebox timeout */
 #define NFSSTA_LOCKTIMEO	0x00002000  /* experienced a lock req timeout */
@@ -244,45 +365,41 @@ struct nfsmount {
 #define NFSSTA_HASWRITEVERF	0x00040000  /* Has write verifier for V3 */
 #define NFSSTA_GOTPATHCONF	0x00080000  /* Got the V3 pathconf info */
 #define NFSSTA_GOTFSINFO	0x00100000  /* Got the V3 fsinfo */
+#define NFSSTA_SENDING		0x00800000  /* Sending on socket */
 #define NFSSTA_SNDLOCK		0x01000000  /* Send socket lock */
 #define NFSSTA_WANTSND		0x02000000  /* Want above */
 #define NFSSTA_DEAD		0x04000000  /* mount is dead */
 #define NFSSTA_RECOVER		0x08000000  /* mount state needs to be recovered */
+#define NFSSTA_RECOVER_EXPIRED	0x10000000  /* mount state expired */
+#define NFSSTA_REVOKE		0x20000000  /* need to scan for revoked nodes */
 
 /* flags for nm_sockflags */
 #define NMSOCK_READY		0x0001	/* socket is ready for use */
 #define NMSOCK_CONNECTING	0x0002	/* socket is being connect()ed */
 #define NMSOCK_SETUP		0x0004	/* socket/connection is being set up */
 #define NMSOCK_UNMOUNT		0x0008	/* unmounted, no more socket activity */
-#define NMSOCK_LASTFRAG		0x0010	/* on last fragment of RPC record */
+#define NMSOCK_HASCONNECTED	0x0010	/* socket has connected before */
 #define NMSOCK_POKE		0x0020	/* socket needs to be poked */
-#define NMSOCK_UPCALL		0x0040	/* socket upcall in progress */
-
-/* aliases for socket state variables */
-#define nm_sent		nm_sockstate.udp.sent
-#define nm_cwnd		nm_sockstate.udp.cwnd
-#define nm_cwndq	nm_sockstate.udp.cwndq
-#define nm_markerleft	nm_sockstate.tcp.mleft
-#define nm_fragleft	nm_sockstate.tcp.fleft
-#define nm_reclen	nm_sockstate.tcp.len
-#define nm_m		nm_sockstate.tcp.m
-#define nm_mlast	nm_sockstate.tcp.mlast
+#define NMSOCK_DISCONNECTING	0x0080	/* socket is being disconnected */
 
 /* aliases for version-specific fields */
-#define nm_rqport	nm_un.v3.rqport
-#define nm_rqportstamp	nm_un.v3.rqportstamp
+#define nm_ldlink	nm_un.v3.ldlink
+#define nm_sent		nm_un.v3.udp_sent
+#define nm_cwnd		nm_un.v3.udp_cwnd
+#define nm_cwndq	nm_un.v3.udp_cwndq
+#define nm_rqproto	nm_un.v3.rqproto
+#define nm_rqsaddr	nm_un.v3.rqsaddr
+#define nm_rqsaddrstamp	nm_un.v3.rqsaddrstamp
 #define nm_longid	nm_un.v4.longid
 #define nm_clientid	nm_un.v4.clientid
 #define nm_mounttime	nm_un.v4.mounttime
+#define nm_fsid		nm_un.v4.fsid
 #define nm_renew_timer	nm_un.v4.renew_timer
-#define nm_open_owners	nm_un.v4.open_owners
-#define nm_stateinuse	nm_un.v4.stateinuse
-#define nm_stategenid	nm_un.v4.stategenid
-#define nm_mcred	nm_un.v4.mcred
 #define nm_cbid		nm_un.v4.cbid
 #define nm_cblink	nm_un.v4.cblink
 #define nm_cbrefs	nm_un.v4.cbrefs
-#define nm_recallq	nm_un.v4.recallq
+#define nm_delegations	nm_un.v4.delegations
+#define nm_dreturnq	nm_un.v4.dreturnq
 
 #if defined(KERNEL)
 /*
diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h
index fa0d5bfc4..cce1399ca 100644
--- a/bsd/nfs/nfsnode.h
+++ b/bsd/nfs/nfsnode.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -75,6 +75,7 @@
 #ifndef _NFS_NFS_H_
 #include <nfs/nfs.h>
 #endif
+#include <sys/kauth.h>
 
 /*
  * Silly rename structure that hangs off the nfsnode until the name
@@ -255,6 +256,7 @@ struct nfs_dir_buf_header {
 /* ndbh_flags */
 #define NDB_FULL	0x0001	/* buffer has been filled */
 #define NDB_EOF		0x0002	/* buffer contains EOF */
+#define NDB_PLUS	0x0004	/* buffer contains RDIRPLUS data */
 
 #define NFS_DIR_BUF_FIRST_DIRENTRY(BP) \
 	((struct direntry*)((char*)((BP)->nb_data) + sizeof(*ndbhp)))
@@ -313,11 +315,14 @@ struct nfsdmap {
 
 struct nfs_vattr {
 	enum vtype	nva_type;	/* vnode type (for create) */
-	uint32_t	nva_mode;	/* files access mode (and type) */
+	uint32_t	nva_mode;	/* file's access mode (and type) */
 	uid_t		nva_uid;	/* owner user id */
 	gid_t		nva_gid;	/* owner group id */
+	guid_t		nva_uuuid;	/* owner user UUID */
+	guid_t		nva_guuid;	/* owner group UUID */
+	kauth_acl_t	nva_acl;	/* access control list */
 	nfs_specdata	nva_rawdev;	/* device the special file represents */
-	uint32_t	nva_flags;	/* file flags */
+	uint32_t	nva_flags;	/* file flags (see below) */
 	uint32_t	nva_maxlink;	/* maximum # of links (v4) */
 	uint64_t	nva_nlink;	/* number of references to file */
 	uint64_t	nva_fileid;	/* file id */
@@ -330,13 +335,35 @@ struct nfs_vattr {
 	uint32_t 	nva_bitmap[NFS_ATTR_BITMAP_LEN]; /* attributes that are valid */
 };
 
-#define NFS_FFLAG_ARCHIVED	0x0001
-#define NFS_FFLAG_HIDDEN	0x0002
-#define NFS_FFLAG_NAMED_ATTR	0x0004	/* file has named attributes */
+/* nva_flags */
+#define NFS_FFLAG_ARCHIVED		0x0001
+#define NFS_FFLAG_HIDDEN		0x0002
+#define NFS_FFLAG_HAS_NAMED_ATTRS	0x0004	/* file has named attributes */
+#define NFS_FFLAG_TRIGGER		0x0008	/* node is a trigger/mirror mount point */
+#define NFS_FFLAG_TRIGGER_REFERRAL	0x0010	/* trigger is a referral */
+#define NFS_FFLAG_IS_ATTR		0x8000	/* file is a named attribute file/directory */
 
 /* flags for nfs_getattr() */
-#define NGA_CACHED	0
-#define NGA_UNCACHED	1
+#define NGA_CACHED	0x0001	/* use cached attributes (if still valid) */
+#define NGA_UNCACHED	0x0002	/* fetch new attributes */
+#define NGA_ACL		0x0004	/* fetch ACL */
+#define NGA_MONITOR	0x0008	/* vnode monitor attr update poll */
+
+/* macros for initting/cleaning up nfs_vattr structures */
+#define	NVATTR_INIT(NVAP) \
+	do { \
+		NFS_CLEAR_ATTRIBUTES((NVAP)->nva_bitmap); \
+		(NVAP)->nva_flags = 0; \
+		(NVAP)->nva_acl = NULL; \
+	} while (0)
+#define	NVATTR_CLEANUP(NVAP) \
+	do { \
+		NFS_CLEAR_ATTRIBUTES((NVAP)->nva_bitmap); \
+		if ((NVAP)->nva_acl) { \
+			kauth_acl_free((NVAP)->nva_acl); \
+			(NVAP)->nva_acl = NULL; \
+		} \
+	} while (0)
 
 /*
  * macros for detecting node changes
@@ -416,17 +443,27 @@ struct nfs_open_file {
 	uint32_t			nof_rw;			/* read/write opens (deny none) */
 	uint32_t			nof_r_dw;		/* read deny-write opens */
 	/* the rest of the counts have a max of 2 (1 for open + 1 for mmap) */
-	uint32_t			nof_w_dw:4;		/* write deny-write opens (max 2) */
-	uint32_t			nof_rw_dw:4;		/* read/write deny-write opens (max 2) */
-	uint32_t			nof_r_drw:4;		/* read deny-read/write opens (max 2) */
-	uint32_t			nof_w_drw:4;		/* write deny-read/write opens (max 2) */
-	uint32_t			nof_rw_drw:4;		/* read/write deny-read/write opens (max 2) */
+	uint32_t			nof_w_dw:2;		/* write deny-write opens (max 2) */
+	uint32_t			nof_rw_dw:2;		/* read/write deny-write opens (max 2) */
+	uint32_t			nof_r_drw:2;		/* read deny-read/write opens (max 2) */
+	uint32_t			nof_w_drw:2;		/* write deny-read/write opens (max 2) */
+	uint32_t			nof_rw_drw:2;		/* read/write deny-read/write opens (max 2) */
+	/* counts of DELEGATED access/deny mode open combinations */
+	uint32_t			nof_d_w_dw:2;		/* write deny-write opens (max 2) */
+	uint32_t			nof_d_rw_dw:2;		/* read/write deny-write opens (max 2) */
+	uint32_t			nof_d_r_drw:2;		/* read deny-read/write opens (max 2) */
+	uint32_t			nof_d_w_drw:2;		/* write deny-read/write opens (max 2) */
+	uint32_t			nof_d_rw_drw:2;		/* read/write deny-read/write opens (max 2) */
+	uint32_t			nof_d_r;		/* read opens (deny none) */
+	uint32_t			nof_d_w;		/* write opens (deny none) */
+	uint32_t			nof_d_rw;		/* read/write opens (deny none) */
+	uint32_t			nof_d_r_dw;		/* read deny-write opens */
 };
 /* nof_flags */
 #define NFS_OPEN_FILE_BUSY	0x0001	/* open state-modifying operation in progress */
 #define NFS_OPEN_FILE_WANT	0x0002	/* someone else wants to mark busy */
-#define NFS_OPEN_FILE_CREATE	0x0004	/* has an open(RW) from a VNOP_CREATE call */
-#define NFS_OPEN_FILE_NEEDCLOSE	0x0008	/* has an open(R) from an (unopen) VNOP_READ call */
+#define NFS_OPEN_FILE_CREATE	0x0004	/* has an open(RW) from a "CREATE" call */
+#define NFS_OPEN_FILE_NEEDCLOSE	0x0008	/* has an open(R) from an (unopen) VNOP_READ or VNOP_MMAP call */
 #define NFS_OPEN_FILE_SETATTR	0x0020	/* has an open(W) to perform a SETATTR(size) */
 #define NFS_OPEN_FILE_POSIXLOCK	0x0040	/* server supports POSIX locking semantics */
 #define NFS_OPEN_FILE_LOST	0x0080	/* open state has been lost */
@@ -458,6 +495,7 @@ struct nfs_file_lock {
 #define NFS_FILE_LOCK_WAIT		0x08	/* may block on conflicting locks */
 #define NFS_FILE_LOCK_BLOCKED		0x10	/* request is blocked */
 #define NFS_FILE_LOCK_DEAD		0x20	/* lock (request) no longer exists */
+#define NFS_FILE_LOCK_DELEGATED		0x40	/* lock acquired via delegation */
 
 TAILQ_HEAD(nfs_file_lock_queue, nfs_file_lock);
 
@@ -514,14 +552,18 @@ struct nfsnode {
 	lck_rw_t		n_datalock;	/* nfs node data lock */
 	void			*n_datalockowner;/* nfs node data lock owner (exclusive) */
 	LIST_ENTRY(nfsnode)	n_hash;		/* Hash chain */
+	LIST_ENTRY(nfsnode)	n_monlink;	/* list of monitored nodes */
 	u_quad_t		n_size;		/* Current size of file */
 	u_quad_t		n_newsize;	/* new size of file (pending update) */
 	u_int64_t		n_xid;		/* last xid to loadattr */
 	struct nfs_vattr	n_vattr;	/* Vnode attribute cache */
 	time_t			n_attrstamp;	/* Attr. cache timestamp */
-	u_int8_t		n_mode[NFS_ACCESS_CACHE_SIZE+1];	/* ACCESS mode cache */
-	uid_t                   n_modeuid[NFS_ACCESS_CACHE_SIZE];	/* credentials having mode */
-	time_t                  n_modestamp[NFS_ACCESS_CACHE_SIZE];	/* mode cache timestamp */
+	time_t			n_aclstamp;	/* ACL cache timestamp */
+	time_t			n_evtstamp;	/* last vnode event timestamp */
+	uint32_t		n_events;	/* pending vnode events */
+	u_int8_t		n_access[NFS_ACCESS_CACHE_SIZE+1];	/* ACCESS cache */
+	uid_t                   n_accessuid[NFS_ACCESS_CACHE_SIZE];	/* credentials having access */
+	time_t                  n_accessstamp[NFS_ACCESS_CACHE_SIZE];	/* access cache timestamp */
 	union {
 	    struct {
 		struct timespec	n3_mtime;	/* Prev modify time. */
@@ -530,6 +572,8 @@ struct nfsnode {
 	    struct {
 		uint64_t	n4_change;	/* prev change attribute */
 		uint64_t	n4_ncchange;	/* namecache change attribute */
+		u_char		*n4_attrdirfh;	/* associated attr directory fh */
+		struct timeval	n4_lastio;	/* time of most recent I/O on attr */
 	    } v4;
 	} n_un4;
 	vnode_t			n_parent;	/* this node's parent */
@@ -555,7 +599,9 @@ struct nfsnode {
 	u_short			n_flag;		/* node flags */
 	u_short			n_hflag;	/* node hash flags */
 	u_short			n_bflag;	/* node buffer flags */
+	u_short			n_mflag;	/* node mount flags */
 	u_char			n_fh[NFS_SMALLFH];/* Small File Handle */
+	uint32_t		n_auth;		/* security flavor used for this node */
 	struct nfsbuflists	n_cleanblkhd;	/* clean blocklist head */
 	struct nfsbuflists	n_dirtyblkhd;	/* dirty blocklist head */
 	union {
@@ -567,7 +613,10 @@ struct nfsnode {
 		daddr64_t	nd_lastdbl;	/* last dir buf lookup block# */
 	} n_un6;
 	int			n_bufiterflags;	/* buf iterator flags */
-	int			n_numoutput;	/* I/O in progress */
+	union {
+		int		nf_numoutput;	/* write I/Os in progress */
+		int		nd_trigseq;	/* vnode trigger seq# */
+	} n_un7;
 	/* open state */
 	lck_mtx_t		n_openlock;	/* nfs node open lock */
 	uint32_t		n_openflags;	/* open state flags */
@@ -578,7 +627,9 @@ struct nfsnode {
 	struct nfs_file_lock_queue n_locks;	/* list of locks */
 	/* delegation state */
 	nfs_stateid		n_dstateid;	/* delegation stateid */
-	TAILQ_ENTRY(nfsnode)	n_dlink;	/* delegation recall list link */
+	TAILQ_ENTRY(nfsnode)	n_dlink;	/* delegation list link */
+	TAILQ_ENTRY(nfsnode)	n_dreturn;	/* delegation return list link */
+	struct kauth_ace	n_dace;		/* delegation ACE */
 };
 
 #define NFS_DATA_LOCK_SHARED	1
@@ -604,20 +655,25 @@ struct nfsnode {
 #define n_sillyrename		n_un3.nf_silly
 #define n_wrbusy		n_un5.nf_wrbusy
 #define n_needcommitcnt		n_un6.nf_needcommitcnt
+#define n_numoutput		n_un7.nf_numoutput
 #define n_cookieverf		n_un1.nd_cookieverf
 #define n_eofcookie		n_un2.nd_eofcookie
 #define n_cookiecache		n_un3.nd_cookiecache
 #define n_ncgen			n_un5.nd_ncgen
 #define n_lastdbl		n_un6.nd_lastdbl
+#define n_trigseq		n_un7.nd_trigseq
 #define n_mtime			n_un4.v3.n3_mtime
 #define n_ncmtime		n_un4.v3.n3_ncmtime
 #define n_change		n_un4.v4.n4_change
 #define n_ncchange		n_un4.v4.n4_ncchange
+#define n_attrdirfh		n_un4.v4.n4_attrdirfh
+#define n_lastio		n_un4.v4.n4_lastio
 
 /*
  * Flags for n_flag
  */
 #define	NUPDATESIZE	0x0001	/* size of file needs updating */
+#define	NREVOKE		0x0002	/* node revoked */
 #define	NMODIFIED	0x0004	/* Might have a modified buffer in bio */
 #define	NWRITEERR	0x0008	/* Flag write errors so close will know */
 #define	NNEEDINVALIDATE	0x0010	/* need to call vinvalbuf() */
@@ -629,6 +685,9 @@ struct nfsnode {
 #define	NNEGNCENTRIES	0x0800	/* directory has negative name cache entries */
 #define	NBUSY		0x1000	/* node is busy */
 #define	NBUSYWANT	0x2000	/* waiting on busy node */
+#define NISDOTZFS	0x4000	/* a ".zfs" directory */
+#define NISDOTZFSCHILD	0x8000	/* a child of a ".zfs" directory */
+  
 
 /*
  * Flags for n_hflag
@@ -648,6 +707,13 @@ struct nfsnode {
 #define	NBINVALINPROG	0x0004	/* Avoid multiple calls to nfs_vinvalbuf() */
 #define	NBINVALWANT	0x0008	/* waiting for nfs_vinvalbuf() to complete */
 
+/*
+ * Flags for n_mflag
+ * Note: protected by nfsmount's nm_lock
+ */
+#define	NMMONSCANINPROG	0x0001	/* monitored node is currently updating attributes */
+#define	NMMONSCANWANT	0x0002	/* waiting for attribute update to complete */
+
 /*
  * n_openflags
  * Note: protected by n_openlock
@@ -657,18 +723,22 @@ struct nfsnode {
 #define N_DELEG_READ		0x0004	/* we have a read delegation */
 #define N_DELEG_WRITE		0x0008	/* we have a write delegation */
 #define N_DELEG_MASK		0x000c	/* delegation mask */
+#define N_DELEG_RETURN		0x0010	/* delegation queued for return */
+#define N_DELEG_RETURNING	0x0020	/* delegation being returned */
 
-/* attr/mode timestamp macros */
+/* attr/access/ACL cache timestamp macros */
 #define NATTRVALID(np)		((np)->n_attrstamp != ~0)
 #define NATTRINVALIDATE(np)	((np)->n_attrstamp = ~0)
-#define NMODEVALID(np, slot)	(((slot) >= 0) && ((slot) < 3) && ((np)->n_modestamp[(slot)] != ~0))
-#define NMODEINVALIDATE(np) \
+#define NACCESSVALID(np, slot)	(((slot) >= 0) && ((slot) < NFS_ACCESS_CACHE_SIZE) && ((np)->n_accessstamp[(slot)] != ~0))
+#define NACCESSINVALIDATE(np) \
 	do { \
-		(np)->n_modestamp[0] = ~0; \
-		(np)->n_modestamp[1] = ~0; \
-		(np)->n_modestamp[2] = ~0; \
-		(np)->n_mode[3] = 0; \
+		int __i; \
+		for (__i=0; __i < NFS_ACCESS_CACHE_SIZE; __i++) \
+			(np)->n_accessstamp[__i] = ~0; \
+		(np)->n_access[NFS_ACCESS_CACHE_SIZE] = 0; \
 	} while (0)
+#define NACLVALID(np)		((np)->n_aclstamp != ~0)
+#define NACLINVALIDATE(np)	((np)->n_aclstamp = ~0)
 
 /*
  * NFS-specific flags for nfs_vinvalbuf/nfs_flush
@@ -691,6 +761,16 @@ struct nfsnode {
 /* nfsnode hash table mutex */
 __private_extern__ lck_mtx_t *nfs_node_hash_mutex;
 
+/*
+ * printf-like helper macro that also outputs node name.
+ */
+#define NP(NP, FMT, ...) \
+	do { \
+		const char *__vname = (NP) ? vnode_getname(NFSTOV(NP)) : NULL; \
+		printf(FMT " %s\n", ##__VA_ARGS__, __vname ? __vname : "???"); \
+		if (__vname) vnode_putname(__vname); \
+	} while (0)
+
 /*
  * nfsiod structures
  */
@@ -743,7 +823,7 @@ void nfs_data_update_size(nfsnode_t, int);
 
 /* other stuff */
 int nfs_removeit(struct nfs_sillyrename *);
-int nfs_nget(mount_t,nfsnode_t,struct componentname *,u_char *,int,struct nfs_vattr *,u_int64_t *,int,nfsnode_t*);
+int nfs_nget(mount_t,nfsnode_t,struct componentname *,u_char *,int,struct nfs_vattr *,u_int64_t *,uint32_t,int,nfsnode_t*);
 void nfs_dir_cookie_cache(nfsnode_t, uint64_t, uint64_t);
 int nfs_dir_cookie_to_lbn(nfsnode_t, uint64_t, int *, uint64_t *);
 void nfs_invaldir(nfsnode_t);
diff --git a/bsd/nfs/nfsproto.h b/bsd/nfs/nfsproto.h
index 9823531f4..ec6bc9311 100644
--- a/bsd/nfs/nfsproto.h
+++ b/bsd/nfs/nfsproto.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -348,21 +348,21 @@ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5,
  * NFS attribute management stuff
  */
 #define NFS_ATTR_BITMAP_LEN	2
-#define NFS_BITMAP_SET(A, I)	(((uint32_t *)(A))[(I)/32] |= 1<<((I)%32))
-#define NFS_BITMAP_CLR(A, I)	(((uint32_t *)(A))[(I)/32] &= ~(1<<((I)%32)))
-#define NFS_BITMAP_ISSET(A, I)	(((uint32_t *)(A))[(I)/32] & (1<<((I)%32)))
+#define NFS_BITMAP_SET(B, I)	(((uint32_t *)(B))[(I)/32] |= 1<<((I)%32))
+#define NFS_BITMAP_CLR(B, I)	(((uint32_t *)(B))[(I)/32] &= ~(1<<((I)%32)))
+#define NFS_BITMAP_ISSET(B, I)	(((uint32_t *)(B))[(I)/32] & (1<<((I)%32)))
+#define NFS_BITMAP_ZERO(B, L) \
+	do { \
+		int __i; \
+		for (__i=0; __i < (L); __i++) \
+			((uint32_t*)(B))[__i] = 0; \
+	} while (0)
 
 __private_extern__ uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN];
 __private_extern__ uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN];
 __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
 
-#define NFS_CLEAR_ATTRIBUTES(A) \
-	do { \
-	int __i; \
-	for (__i=0; __i < NFS_ATTR_BITMAP_LEN; __i++) \
-		((uint32_t*)(A))[__i] = 0; \
-	} while (0)
-
+#define NFS_CLEAR_ATTRIBUTES(A)	NFS_BITMAP_ZERO((A), NFS_ATTR_BITMAP_LEN)
 #define NFS_COPY_ATTRIBUTES(SRC, DST) \
 	do { \
 	int __i; \
@@ -571,7 +571,7 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
 	/* NFS_BITMAP_SET((A), NFS_FATTR_FILEHANDLE); */ \
 	/* optional: */ \
 	/* NFS_BITMAP_SET((A), NFS_FATTR_ACL); */ \
-	/* NFS_BITMAP_SET((A), NFS_FATTR_ACLSUPPORT); */ \
+	NFS_BITMAP_SET((A), NFS_FATTR_ACLSUPPORT); \
 	NFS_BITMAP_SET((A), NFS_FATTR_ARCHIVE); \
 	/* NFS_BITMAP_SET((A), NFS_FATTR_CANSETTIME); */ \
 	NFS_BITMAP_SET((A), NFS_FATTR_CASE_INSENSITIVE); \
@@ -612,7 +612,7 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
 	NFS_BITMAP_SET((A), NFS_FATTR_TIME_METADATA); \
 	NFS_BITMAP_SET((A), NFS_FATTR_TIME_MODIFY); \
 	/* NFS_BITMAP_SET((A), NFS_FATTR_TIME_MODIFY_SET); */ \
-	/* NFS_BITMAP_SET((A), NFS_FATTR_MOUNTED_ON_FILEID); */ \
+	NFS_BITMAP_SET((A), NFS_FATTR_MOUNTED_ON_FILEID); \
 	} while (0)
 
 /* attributes requested when we want to do a "statfs" */
@@ -637,6 +637,7 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
 #define NFS_LIMIT_SIZE				1
 #define NFS_LIMIT_BLOCKS			2
 /* access/deny modes */
+#define NFS_OPEN_SHARE_ACCESS_NONE		0x00000000
 #define NFS_OPEN_SHARE_ACCESS_READ		0x00000001
 #define NFS_OPEN_SHARE_ACCESS_WRITE		0x00000002
 #define NFS_OPEN_SHARE_ACCESS_BOTH		0x00000003
@@ -740,6 +741,7 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN];
 #define NFS_ACE_SUCCESSFUL_ACCESS_ACE_FLAG	0x00000010
 #define NFS_ACE_FAILED_ACCESS_ACE_FLAG		0x00000020
 #define NFS_ACE_IDENTIFIER_GROUP		0x00000040
+#define NFS_ACE_INHERITED_ACE			0x00000080
 /* ACE mask flags */
 #define NFS_ACE_READ_DATA			0x00000001
 #define NFS_ACE_LIST_DIRECTORY			0x00000001
diff --git a/bsd/nfs/nfsrvcache.h b/bsd/nfs/nfsrvcache.h
index fa23f1877..06bc9baeb 100644
--- a/bsd/nfs/nfsrvcache.h
+++ b/bsd/nfs/nfsrvcache.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,13 @@
  * Definitions for the server recent request cache
  */
 
+/* Network address hash list element */
+union nethostaddr {
+	in_addr_t had_inetaddr;
+	struct in6_addr had_inet6addr;
+	mbuf_t had_nam;
+};
+
 #define	NFSRVCACHESIZ	64
 
 struct nfsrvcache {
@@ -86,6 +93,7 @@ struct nfsrvcache {
 		mbuf_t ru_repmb;		/* Reply mbuf list OR */
 		int ru_repstat;			/* Reply status */
 	} rc_un;
+	sa_family_t rc_family;			/* address family */
 	union nethostaddr rc_haddr;		/* Host address */
 	u_int32_t rc_proc;			/* rpc proc number */
 	u_char	rc_state;		/* Current state of request */
@@ -95,6 +103,7 @@ struct nfsrvcache {
 #define	rc_reply	rc_un.ru_repmb
 #define	rc_status	rc_un.ru_repstat
 #define	rc_inetaddr	rc_haddr.had_inetaddr
+#define	rc_inet6addr	rc_haddr.had_inet6addr
 #define	rc_nam		rc_haddr.had_nam
 
 /* Cache entry states */
diff --git a/bsd/nfs/rpcv2.h b/bsd/nfs/rpcv2.h
index 510f5110b..3a288f203 100644
--- a/bsd/nfs/rpcv2.h
+++ b/bsd/nfs/rpcv2.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -82,6 +82,7 @@
 
 /* Authentication */
 #define	RPCAUTH_NULL	0
+#define	RPCAUTH_NONE	RPCAUTH_NULL
 #define	RPCAUTH_UNIX	1
 #define RPCAUTH_SYS	RPCAUTH_UNIX
 #define	RPCAUTH_SHORT	2
@@ -89,6 +90,8 @@
 #define RPCAUTH_KRB5	390003
 #define RPCAUTH_KRB5I	390004
 #define RPCAUTH_KRB5P	390005
+#define RPCAUTH_INVALID	~0U
+#define RPCAUTH_UNKNOWN	RPCAUTH_INVALID
 
 #define	RPCAUTH_MAXSIZ	400
 #define	RPCAUTH_UNIXGIDS 16
diff --git a/bsd/nfs/xdr_subs.h b/bsd/nfs/xdr_subs.h
index 9a399db19..59356190a 100644
--- a/bsd/nfs/xdr_subs.h
+++ b/bsd/nfs/xdr_subs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -64,8 +64,6 @@
  *     @(#)xdr_subs.h  8.3 (Berkeley) 3/30/95
  * FreeBSD-Id: xdr_subs.h,v 1.9 1997/02/22 09:42:53 peter Exp $
  */
- 
-
 
 #ifndef _NFS_XDR_SUBS_H_
 #define _NFS_XDR_SUBS_H_
@@ -96,5 +94,418 @@
 	((uint32_t *)(t))[1] = htonl(((uint32_t *)(f))[_QUAD_LOWWORD]); \
 }
 
+
+/*
+ * xdrbuf
+ *
+ * generalized functionality for managing the building/dissecting of XDR data
+ */
+typedef enum xdrbuf_type { XDRBUF_BUFFER=1 } xdrbuf_type;
+
+struct xdrbuf {
+	union {
+		struct {
+			char *			xbb_base;	/* base address of buffer */
+			uint32_t		xbb_size;	/* size of buffer */
+			uint32_t		xbb_len;	/* length of data in buffer */
+		} xb_buffer;
+	} xb_u;
+	char *		xb_ptr;		/* pointer to current position */
+	size_t		xb_left;	/* bytes remaining in current buffer */
+	size_t		xb_growsize;	/* bytes to allocate when growing */
+	xdrbuf_type	xb_type;	/* type of xdr buffer */
+	uint32_t	xb_flags;	/* XB_* (see below) */
+};
+
+#define XB_CLEANUP	0x0001	/* needs cleanup */
+
+#define XDRWORD		4	/* the basic XDR building block is a 4 byte (32 bit) word */
+#define xdr_rndup(a)	(((a)+3)&(~0x3))	/* round up to XDRWORD size */
+#define xdr_pad(a)	(xdr_rndup(a) - (a))	/* calculate round up padding */
+
+void xb_init(struct xdrbuf *, xdrbuf_type);
+void xb_init_buffer(struct xdrbuf *, char *, size_t);
+void xb_cleanup(struct xdrbuf *);
+void *xb_malloc(size_t);
+void xb_free(void *);
+int xb_grow(struct xdrbuf *);
+void xb_set_cur_buf_len(struct xdrbuf *);
+char *xb_buffer_base(struct xdrbuf *);
+int xb_advance(struct xdrbuf *, uint32_t);
+int xb_offset(struct xdrbuf *);
+int xb_seek(struct xdrbuf *, uint32_t);
+int xb_add_bytes(struct xdrbuf *, const char *, uint32_t, int);
+int xb_get_bytes(struct xdrbuf *, char *, uint32_t, int);
+
+#ifdef _NFS_XDR_SUBS_FUNCS_
+
+/*
+ * basic initialization of xdrbuf structure
+ */
+void
+xb_init(struct xdrbuf *xbp, xdrbuf_type type)
+{
+	bzero(xbp, sizeof(*xbp));
+	xbp->xb_type = type;
+	xbp->xb_flags |= XB_CLEANUP;
+}
+
+/*
+ * initialize a single-buffer xdrbuf
+ */
+void
+xb_init_buffer(struct xdrbuf *xbp, char *buf, size_t buflen)
+{
+	xb_init(xbp, XDRBUF_BUFFER);
+	xbp->xb_u.xb_buffer.xbb_base = buf;
+	xbp->xb_u.xb_buffer.xbb_size = buflen;
+	xbp->xb_u.xb_buffer.xbb_len = buflen;
+	xbp->xb_growsize = 512;
+	xbp->xb_ptr = buf;
+	xbp->xb_left = buflen;
+	if (buf) /* when using an existing buffer, xb code should skip cleanup */
+		xbp->xb_flags &= ~XB_CLEANUP;
+}
+
+/*
+ * get the pointer to the single-buffer xdrbuf's buffer
+ */
+char *
+xb_buffer_base(struct xdrbuf *xbp)
+{
+	return (xbp->xb_u.xb_buffer.xbb_base);
+}
+
+/*
+ * clean up any resources held by an xdrbuf
+ */
+void
+xb_cleanup(struct xdrbuf *xbp)
+{
+	if (!(xbp->xb_flags & XB_CLEANUP))
+		return;
+	switch (xbp->xb_type) {
+	case XDRBUF_BUFFER:
+		if (xbp->xb_u.xb_buffer.xbb_base)
+			xb_free(xbp->xb_u.xb_buffer.xbb_base);
+		break;
+	}
+	xbp->xb_flags &= ~XB_CLEANUP;
+}
+
+/*
+ * set the length of valid data in the current buffer to
+ * be up to the current location within the buffer
+ */
+void
+xb_set_cur_buf_len(struct xdrbuf *xbp)
+{
+	switch (xbp->xb_type) {
+	case XDRBUF_BUFFER:
+		xbp->xb_u.xb_buffer.xbb_len = xbp->xb_ptr - xbp->xb_u.xb_buffer.xbb_base;
+		break;
+	}
+}
+
+/*
+ * advance forward through existing data in xdrbuf
+ */
+int
+xb_advance(struct xdrbuf *xbp, uint32_t len)
+{
+	uint32_t tlen;
+
+	while (len) {
+		if (xbp->xb_left <= 0)
+			return (EBADRPC);
+		tlen = MIN(xbp->xb_left, len);
+		if (tlen) {
+			xbp->xb_ptr += tlen;
+			xbp->xb_left -= tlen;
+			len -= tlen;
+		}
+	}
+	return (0);
+}
+
+/*
+ * Calculate the current offset in the XDR buffer.
+ */
+int
+xb_offset(struct xdrbuf *xbp)
+{
+	uint32_t offset = 0;
+
+	switch (xbp->xb_type) {
+	case XDRBUF_BUFFER:
+		offset = xbp->xb_ptr - xbp->xb_u.xb_buffer.xbb_base;
+		break;
+	}
+
+	return (offset);
+}
+
+/*
+ * Seek to the given offset in the existing data in the XDR buffer.
+ */
+int
+xb_seek(struct xdrbuf *xbp, uint32_t offset)
+{
+
+	switch (xbp->xb_type) {
+	case XDRBUF_BUFFER:
+		xbp->xb_ptr = xbp->xb_u.xb_buffer.xbb_base + offset;
+		xbp->xb_left = xbp->xb_u.xb_buffer.xbb_len - offset;
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * allocate memory
+ */
+void *
+xb_malloc(size_t size)
+{
+	void *buf = NULL;
+
+#ifdef KERNEL
+	MALLOC(buf, void *, size, M_TEMP, M_WAITOK);
+#else
+	buf = malloc(size);
+#endif
+	return (buf);
+}
+/*
+ * free a chunk of memory allocated with xb_malloc()
+ */
+void
+xb_free(void *buf)
+{
+#ifdef KERNEL
+	FREE(buf, M_TEMP);
+#else
+	free(buf);
+#endif
+}
+
+/*
+ * Increase space available for new data in XDR buffer.
+ */
+int
+xb_grow(struct xdrbuf *xbp)
+{
+	char *newbuf, *oldbuf;
+	size_t newsize, oldsize;
+
+	switch (xbp->xb_type) {
+	case XDRBUF_BUFFER:
+		oldsize = xbp->xb_u.xb_buffer.xbb_size;
+		oldbuf = xbp->xb_u.xb_buffer.xbb_base;
+		newsize = oldsize + xbp->xb_growsize;
+		newbuf = xb_malloc(newsize);
+		if (newbuf == NULL)
+			return (ENOMEM);
+		if (oldbuf != NULL) {
+			bcopy(oldbuf, newbuf, oldsize);
+			xb_free(oldbuf);
+		}
+		xbp->xb_u.xb_buffer.xbb_base = newbuf;
+		xbp->xb_u.xb_buffer.xbb_size = newsize;
+		xbp->xb_ptr = newbuf + oldsize;
+		xbp->xb_left = xbp->xb_growsize;
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * xb_add_bytes()
+ *
+ * Add "count" bytes of opaque data pointed to by "buf" to the given XDR buffer.
+ */
+int
+xb_add_bytes(struct xdrbuf *xbp, const char *buf, uint32_t count, int nopad)
+{
+	uint32_t len, tlen;
+	int error;
+
+	len = nopad ? count : xdr_rndup(count);
+
+	/* copy in "count" bytes and zero out any pad bytes */
+	while (len) {
+		if (xbp->xb_left <= 0) {
+			/* need more space */
+			if ((error = xb_grow(xbp)))
+				return (error);
+			if (xbp->xb_left <= 0)
+				return (ENOMEM);
+		}
+		tlen = MIN(xbp->xb_left, len);
+		if (tlen) {
+			if (count) {
+				if (tlen > count)
+					tlen = count;
+				bcopy(buf, xbp->xb_ptr, tlen);
+			} else {
+				bzero(xbp->xb_ptr, tlen);
+			}
+			xbp->xb_ptr += tlen;
+			xbp->xb_left -= tlen;
+			len -= tlen;
+			if (count) {
+				buf += tlen;
+				count -= tlen;
+			}
+		}
+	}
+	return (0);
+}
+
+/*
+ * xb_get_bytes()
+ *
+ * Get "count" bytes of opaque data from the given XDR buffer.
+ */
+int
+xb_get_bytes(struct xdrbuf *xbp, char *buf, uint32_t count, int nopad)
+{
+	uint32_t len, tlen;
+
+	len = nopad ? count : xdr_rndup(count);
+
+	/* copy in "count" bytes and zero out any pad bytes */
+	while (len) {
+		if (xbp->xb_left <= 0)
+			return (ENOMEM);
+		tlen = MIN(xbp->xb_left, len);
+		if (tlen) {
+			if (count) {
+				if (tlen > count)
+					tlen = count;
+				bcopy(xbp->xb_ptr, buf, tlen);
+			}
+			xbp->xb_ptr += tlen;
+			xbp->xb_left -= tlen;
+			len -= tlen;
+			if (count) {
+				buf += tlen;
+				count -= tlen;
+			}
+		}
+	}
+	return (0);
+}
+
+#endif /* _NFS_XDR_SUBS_FUNCS_ */
+
+
+/*
+ * macros for building XDR data
+ */
+
+/* finalize the data that has been added to the buffer */
+#define xb_build_done(E, XB) \
+	do { \
+		if (E) break; \
+		xb_set_cur_buf_len(XB); \
+	} while (0)
+
+/* add a 32-bit value */
+#define xb_add_32(E, XB, VAL) \
+	do { \
+		uint32_t __tmp; \
+		if (E) break; \
+		__tmp = txdr_unsigned(VAL); \
+		(E) = xb_add_bytes((XB), (void*)&__tmp, XDRWORD, 0); \
+	} while (0)
+
+/* add a 64-bit value */
+#define xb_add_64(E, XB, VAL) \
+	do { \
+		uint64_t __tmp1, __tmp2; \
+		if (E) break; \
+		__tmp1 = (VAL); \
+		txdr_hyper(&__tmp1, &__tmp2); \
+		(E) = xb_add_bytes((XB), (char*)&__tmp2, 2 * XDRWORD, 0); \
+	} while (0)
+
+/* add an array of XDR words */
+#define xb_add_word_array(E, XB, A, LEN) \
+	do { \
+		uint32_t __i; \
+		xb_add_32((E), (XB), (LEN)); \
+		for (__i=0; __i < (uint32_t)(LEN); __i++) \
+			xb_add_32((E), (XB), (A)[__i]); \
+	} while (0)
+#define xb_add_bitmap(E, XB, B, LEN)	xb_add_word_array((E), (XB), (B), (LEN))
+
+/* add a file handle */
+#define xb_add_fh(E, XB, FHP, FHLEN) \
+	do { \
+		xb_add_32((E), (XB), (FHLEN)); \
+		if (E) break; \
+		(E) = xb_add_bytes((XB), (char*)(FHP), (FHLEN), 0); \
+	} while (0)
+
+/* add a string */
+#define xb_add_string(E, XB, S, LEN) \
+	do { \
+		xb_add_32((E), (XB), (LEN)); \
+		if (E) break; \
+		(E) = xb_add_bytes((XB), (const char*)(S), (LEN), 0); \
+	} while (0)
+
+
+/*
+ * macros for decoding XDR data
+ */
+
+/* skip past data in the buffer */
+#define xb_skip(E, XB, LEN) \
+	do { \
+		if (E) break; \
+		(E) = xb_advance((XB), (LEN)); \
+	} while (0)
+
+/* get a 32-bit value */
+#define xb_get_32(E, XB, LVAL) \
+	do { \
+		uint32_t __tmp; \
+		if (E) break; \
+		(E) = xb_get_bytes((XB), (char*)&__tmp, XDRWORD, 0); \
+		if (E) break; \
+		(LVAL) = fxdr_unsigned(uint32_t, __tmp); \
+	} while (0)
+
+/* get a 64-bit value */
+#define xb_get_64(E, XB, LVAL) \
+	do { \
+		uint64_t __tmp; \
+		if (E) break; \
+		(E) = xb_get_bytes((XB), (char*)&__tmp, 2 * XDRWORD, 0); \
+		if (E) break; \
+		fxdr_hyper(&__tmp, &(LVAL)); \
+	} while (0)
+
+/* get an array of XDR words (of a given expected/maximum length) */
+#define xb_get_word_array(E, XB, A, LEN) \
+	do { \
+		uint32_t __len = 0, __i; \
+		xb_get_32((E), (XB), __len); \
+		if (E) break; \
+		for (__i=0; __i < MIN(__len, (uint32_t)(LEN)); __i++) \
+			xb_get_32((E), (XB), (A)[__i]); \
+		if (E) break; \
+		for (; __i < __len; __i++) \
+			xb_skip((E), (XB), XDRWORD); \
+		for (; __i < (uint32_t)(LEN); __i++) \
+			(A)[__i] = 0; \
+		(LEN) = __len; \
+	} while (0)
+#define xb_get_bitmap(E, XB, B, LEN)	xb_get_word_array((E), (XB), (B), (LEN))
+
 #endif /* __APPLE_API_PRIVATE */
 #endif /* _NFS_XDR_SUBS_H_ */
diff --git a/bsd/ppc/Makefile b/bsd/ppc/Makefile
deleted file mode 100644
index 21878d7f3..000000000
--- a/bsd/ppc/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-DATAFILES = \
-	 endian.h fasttrap_isa.h param.h profile.h \
-	setjmp.h signal.h limits.h _limits.h \
-	types.h vmparam.h _structs.h _types.h _param.h
-
-KERNELFILES = \
-	disklabel.h \
-	 endian.h param.h profile.h \
-	   signal.h limits.h _limits.h \
-	 types.h  vmparam.h _structs.h _types.h _param.h
-
-INSTALL_MD_LIST = ${DATAFILES}
-INSTALL_MD_LCL_LIST = ${DATAFILES} disklabel.h
-
-INSTALL_MD_DIR = ppc
-
-EXPORT_MD_LIST = ${KERNELFILES}
-
-EXPORT_MD_DIR = ppc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/bsd/ppc/_limits.h b/bsd/ppc/_limits.h
deleted file mode 100644
index d512ec411..000000000
--- a/bsd/ppc/_limits.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-#ifndef	_PPC__LIMITS_H_
-#define	_PPC__LIMITS_H_
-
-#define	__DARWIN_CLK_TCK		100	/* ticks per second */
-
-#endif	/* _PPC__LIMITS_H_ */
diff --git a/bsd/ppc/_param.h b/bsd/ppc/_param.h
deleted file mode 100644
index 938fc499f..000000000
--- a/bsd/ppc/_param.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef	_PPC__PARAM_H_
-#define	_PPC__PARAM_H_
-
-#include <ppc/_types.h>
-
-/*
- * Round p (pointer or byte index) up to a correctly-aligned value for all
- * data types (int, long, ...).   The result is unsigned int and must be
- * cast to any desired pointer type.
- */
-#define	__DARWIN_ALIGNBYTES	(sizeof(__darwin_size_t) - 1)
-#define	__DARWIN_ALIGN(p)	((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES) &~ __DARWIN_ALIGNBYTES)
-
-#define      __DARWIN_ALIGNBYTES32     (sizeof(__uint32_t) - 1)
-#define       __DARWIN_ALIGN32(p)       ((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES32) &~ __DARWIN_ALIGNBYTES32)
-
-
-#endif /* _PPC__PARAM_H_ */
diff --git a/bsd/ppc/_structs.h b/bsd/ppc/_structs.h
deleted file mode 100644
index c028f7efb..000000000
--- a/bsd/ppc/_structs.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/cdefs.h>
-
-#ifdef __need_mcontext_t
-#ifndef __need_struct_mcontext
-#define __need_struct_mcontext
-#endif /* __need_struct_mcontext */
-#endif /* __need_mcontext_t */
-
-#ifdef __need_mcontext64_t
-#ifndef __need_struct_mcontext64
-#define __need_struct_mcontext64
-#endif /* __need_struct_mcontext64 */
-#endif /* __need_mcontext64_t */
-
-#if defined(__need_struct_mcontext) || defined(__need_struct_mcontext64)
-#include <mach/ppc/_structs.h>
-#endif /* __need_struct_mcontext || __need_struct_mcontext64 */
-
-#ifdef __need_struct_mcontext
-#undef __need_struct_mcontext
-#ifndef _STRUCT_MCONTEXT
-#if __DARWIN_UNIX03
-#define	_STRUCT_MCONTEXT	struct __darwin_mcontext
-_STRUCT_MCONTEXT
-{
-	_STRUCT_PPC_EXCEPTION_STATE	__es;
-	_STRUCT_PPC_THREAD_STATE	__ss;
-	_STRUCT_PPC_FLOAT_STATE		__fs;
-	_STRUCT_PPC_VECTOR_STATE	__vs;
-};
-#else /* !__DARWIN_UNIX03 */
-#define	_STRUCT_MCONTEXT	struct mcontext
-_STRUCT_MCONTEXT
-{
-	_STRUCT_PPC_EXCEPTION_STATE	es;
-	_STRUCT_PPC_THREAD_STATE	ss;
-	_STRUCT_PPC_FLOAT_STATE		fs;
-	_STRUCT_PPC_VECTOR_STATE	vs;
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* _STRUCT_MCONTEXT */
-#endif /* __need_struct_mcontext */
-
-#ifdef __need_struct_mcontext64
-#undef __need_struct_mcontext64
-#ifndef _STRUCT_MCONTEXT64
-#if __DARWIN_UNIX03
-#define _STRUCT_MCONTEXT64	struct __darwin_mcontext64
-_STRUCT_MCONTEXT64
-{
-	_STRUCT_PPC_EXCEPTION_STATE64	__es;
-	_STRUCT_PPC_THREAD_STATE64	__ss;
-	_STRUCT_PPC_FLOAT_STATE		__fs;
-	_STRUCT_PPC_VECTOR_STATE	__vs;
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_MCONTEXT64	struct mcontext64
-_STRUCT_MCONTEXT64
-{
-	_STRUCT_PPC_EXCEPTION_STATE64	es;
-	_STRUCT_PPC_THREAD_STATE64	ss;
-	_STRUCT_PPC_FLOAT_STATE		fs;
-	_STRUCT_PPC_VECTOR_STATE	vs;
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* _STRUCT_MCONTEXT64 */
-#endif /* __need_struct_mcontext64 */
-
-#ifdef __need_mcontext_t
-#undef __need_mcontext_t
-#ifndef _MCONTEXT_T
-#define _MCONTEXT_T
-typedef _STRUCT_MCONTEXT	*mcontext_t;
-#endif /* _MCONTEXT_T */
-#endif /* __need_mcontext_t */
-
-#ifdef __need_mcontext64_t
-#undef __need_mcontext64_t
-#ifndef _MCONTEXT64_T
-#define _MCONTEXT64_T
-typedef _STRUCT_MCONTEXT64	*mcontext64_t;
-#endif /* _MCONTEXT64_T */
-#endif /* __need_mcontext64_t */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#ifndef PPC_MCONTEXT_SIZE
-#define PPC_MCONTEXT_SIZE	(PPC_THREAD_STATE_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)
-#endif /* PPC_MCONTEXT_SIZE */
-#ifndef PPC_MCONTEXT64_SIZE
-#define PPC_MCONTEXT64_SIZE	(PPC_THREAD_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)
-#endif /* PPC_MCONTEXT64_SIZE */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-/*
- * For now, just duplicate the 32-bit context as the generic one.
- */
-#ifdef __need_struct_sigcontext
-#undef __need_struct_sigcontext
-#ifndef _STRUCT_SIGCONTEXT
-#if __DARWIN_UNIX03		/* signal.h needs struct sigcontext visible */
-#define _STRUCT_SIGCONTEXT	struct __darwin_sigcontext
-_STRUCT_SIGCONTEXT
-{
-    int		__sc_onstack;	/* sigstack state to restore */
-    int		__sc_mask;	/* signal mask to restore */
-    int		__sc_ir;	/* pc */
-    int		__sc_psw;	/* processor status word */
-    int		__sc_sp;	/* stack pointer if sc_regs == NULL */
-    void	*__sc_regs;	/* (kernel private) saved state */
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_SIGCONTEXT	struct sigcontext
-_STRUCT_SIGCONTEXT
-{
-    int		sc_onstack;	/* sigstack state to restore */
-    int		sc_mask;	/* signal mask to restore */
-    int		sc_ir;		/* pc */
-    int		sc_psw;		/* processor status word */
-    int		sc_sp;		/* stack pointer if sc_regs == NULL */
-    void	*sc_regs;	/* (kernel private) saved state */
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* _STRUCT_SIGCONTEXT */
-#endif /* __need_struct_sigcontext */
-
-/*
- * Information pushed on stack when a signal is delivered.
- * This is used by the kernel to restore state following
- * execution of the signal handler.  It is also made available
- * to the handler to allow it to properly restore state if
- * a non-standard exit is performed.
- */
-#ifdef __need_struct_sigcontext32
-#undef __need_struct_sigcontext32
-#ifndef _STRUCT_SIGCONTEXT32
-#if __DARWIN_UNIX03
-#define _STRUCT_SIGCONTEXT32	struct __darwin_sigcontext32
-_STRUCT_SIGCONTEXT32
-{
-    int		__sc_onstack;	/* sigstack state to restore */
-    int		__sc_mask;	/* signal mask to restore */
-    int		__sc_ir;	/* pc */
-    int		__sc_psw;	/* processor status word */
-    int		__sc_sp;	/* stack pointer if sc_regs == NULL */
-    void	*__sc_regs;	/* (kernel private) saved state */
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_SIGCONTEXT32	struct sigcontext32
-_STRUCT_SIGCONTEXT32
-{
-    int		sc_onstack;	/* sigstack state to restore */
-    int		sc_mask;	/* signal mask to restore */
-    int		sc_ir;		/* pc */
-    int		sc_psw;		/* processor status word */
-    int		sc_sp;		/* stack pointer if sc_regs == NULL */
-    void	*sc_regs;	/* (kernel private) saved state */
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* _STRUCT_SIGCONTEXT32 */
-#endif /* __need_struct_sigcontext32 */
-
-#ifdef __need_struct_sigcontext64
-#undef __need_struct_sigcontext64
-#ifndef _STRUCT_SIGCONTEXT64
-#if __DARWIN_UNIX03
-#define _STRUCT_SIGCONTEXT64	struct __darwin_sigcontext64
-_STRUCT_SIGCONTEXT64
-{
-    int		__sc_onstack;	/* sigstack state to restore */
-    int		__sc_mask;	/* signal mask to restore */
-    long long	__sc_ir;	/* pc */
-    long long	__sc_psw;	/* processor status word */
-    long long	__sc_sp;	/* stack pointer if sc_regs == NULL */
-    void	*__sc_regs;	/* (kernel private) saved state */
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_SIGCONTEXT64	struct sigcontext64
-_STRUCT_SIGCONTEXT64
-{
-    int		sc_onstack;	/* sigstack state to restore */
-    int		sc_mask;	/* signal mask to restore */
-    long long	sc_ir;		/* pc */
-    long long	sc_psw;		/* processor status word */
-    long long	sc_sp;		/* stack pointer if sc_regs == NULL */
-    void	*sc_regs;	/* (kernel private) saved state */
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* _STRUCT_SIGCONTEXT64 */
-#endif /* __need_struct_sigcontext64 */
diff --git a/bsd/ppc/_types.h b/bsd/ppc/_types.h
deleted file mode 100644
index 4b7855988..000000000
--- a/bsd/ppc/_types.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef	_BSD_PPC__TYPES_H_
-#define	_BSD_PPC__TYPES_H_
-
-/*
- * This header file contains integer types.  It's intended to also contain
- * flotaing point and other arithmetic types, as needed, later.
- */
-
-#ifdef __GNUC__
-typedef __signed char		__int8_t;
-#else	/* !__GNUC__ */
-typedef char			__int8_t;
-#endif	/* !__GNUC__ */
-typedef unsigned char		__uint8_t;
-typedef	short			__int16_t;
-typedef	unsigned short		__uint16_t;
-typedef int			__int32_t;
-typedef unsigned int		__uint32_t;
-typedef long long		__int64_t;
-typedef unsigned long long	__uint64_t;
-
-typedef long			__darwin_intptr_t;
-typedef unsigned int		__darwin_natural_t;
-
-/*
- * The rune type below is declared to be an ``int'' instead of the more natural
- * ``unsigned long'' or ``long''.  Two things are happening here.  It is not
- * unsigned so that EOF (-1) can be naturally assigned to it and used.  Also,
- * it looks like 10646 will be a 31 bit standard.  This means that if your
- * ints cannot hold 32 bits, you will be in trouble.  The reason an int was
- * chosen over a long is that the is*() and to*() routines take ints (says
- * ANSI C), but they use __darwin_ct_rune_t instead of int.  By changing it
- * here, you lose a bit of ANSI conformance, but your programs will still
- * work.
- *
- * NOTE: rune_t is not covered by ANSI nor other standards, and should not
- * be instantiated outside of lib/libc/locale.  Use wchar_t.  wchar_t and
- * rune_t must be the same type.  Also wint_t must be no narrower than
- * wchar_t, and should also be able to hold all members of the largest
- * character set plus one extra value (WEOF). wint_t must be at least 16 bits.
- */
-
-typedef int			__darwin_ct_rune_t;	/* ct_rune_t */
-
-/*
- * mbstate_t is an opaque object to keep conversion state, during multibyte
- * stream conversions.  The content must not be referenced by user programs.
- */
-typedef union {
-	char		__mbstate8[128];
-	long long	_mbstateL;			/* for alignment */
-} __mbstate_t;
-
-typedef __mbstate_t		__darwin_mbstate_t;	/* mbstate_t */
-
-#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__)
-typedef __PTRDIFF_TYPE__	__darwin_ptrdiff_t;	/* ptr1 - ptr2 */
-#else
-typedef int			__darwin_ptrdiff_t;	/* ptr1 - ptr2 */
-#endif /* __GNUC__ */
-
-#if defined(__GNUC__) && defined(__SIZE_TYPE__)
-typedef __SIZE_TYPE__		__darwin_size_t;	/* sizeof() */
-#else
-typedef unsigned long		__darwin_size_t;	/* sizeof() */
-#endif
-
-#if (__GNUC__ > 2)
-typedef __builtin_va_list	__darwin_va_list;	/* va_list */
-#else
-typedef char *			__darwin_va_list;	/* va_list */
-#endif
-
-#if defined(__GNUC__) && defined(__WCHAR_TYPE__)
-typedef __WCHAR_TYPE__		__darwin_wchar_t;	/* wchar_t */
-#else
-typedef __darwin_ct_rune_t	__darwin_wchar_t;	/* wchar_t */
-#endif
-
-typedef __darwin_wchar_t	__darwin_rune_t;	/* rune_t */
-
-#if defined(__GNUC__) && defined(__WINT_TYPE__)
-typedef __WINT_TYPE__		__darwin_wint_t;	/* wint_t */
-#else
-typedef __darwin_ct_rune_t	__darwin_wint_t;	/* wint_t */
-#endif
-
-typedef unsigned long		__darwin_clock_t;	/* clock() */
-typedef __uint32_t		__darwin_socklen_t;	/* socklen_t (duh) */
-typedef long			__darwin_ssize_t;	/* byte count or error */
-typedef long			__darwin_time_t;	/* time() */
-
-#endif	/* _BSD_PPC__TYPES_H_ */
diff --git a/bsd/ppc/decodePPC.h b/bsd/ppc/decodePPC.h
deleted file mode 100644
index 8fb4756f6..000000000
--- a/bsd/ppc/decodePPC.h
+++ /dev/null
@@ -1,919 +0,0 @@
-/*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-struct dcdtab {
-
-	uint8_t		dcdFlgs;			/* Flags needed to decode */
-#define dcdStep 0x80				/* Step to next table entry on non-match */
-#define dcdJump 0x40				/* Jump to new entry in table. Index is in dcdMatch. */
-#define dcdMask 0x0F				/* Index into mask table.  0 matches everything */
-
-	uint8_t		dcdType;			/* Instruction type */
-#define diINV  0x00
-#define diTRP  0x01
-#define diSC   0x02
-#define diRFI  0x03
-#define diB	   0x04
-#define diBC   0x05
-#define diBLR  0x06
-#define diBCTR 0x07
-#define diOR   0x08
-#define diSPR  0x09
-#define diCMN  0x0A
-#define diPRV  0x0B
-
-	uint16_t	dcdMatch;			/* Extended op code to match */
-};
-
-typedef struct dcdtab dcdtab;
-
-static uint16_t masktab[] = {0x0000, 0x0003, 0x001C, 0x001E, 0x003E, /* Table of extended op masks */
-	0x003F, 0x03FE, 0x03FF, 0x07FC, 0x07FE, 0x07FF};
-
-static dcdtab insts[] = {
-    { 0x40,      0,     64 },         //    0 Maj op =  0, jump to entry 64
-    { 0x00,  diINV, 0x0000 },         //    1 Maj op =  1, invalid
-    { 0x00,  diTRP, 0x0000 },         //    2 Maj op =  2, tdi
-    { 0x00,  diTRP, 0x0000 },         //    3 Maj op =  3, twi
-    { 0x40,      0,     65 },         //    4 Maj op =  4, jump to entry 65
-    { 0x00,  diINV, 0x0000 },         //    5 Maj op =  5, invalid
-    { 0x00,  diINV, 0x0000 },         //    6 Maj op =  6, invalid
-    { 0x00,  diCMN, 0x0000 },         //    7 Maj op =  7, mulli
-    { 0x00,  diCMN, 0x0000 },         //    8 Maj op =  8, subfic
-    { 0x00,  diINV, 0x0000 },         //    9 Maj op =  9, invalid
-    { 0x00,  diCMN, 0x0000 },         //   10 Maj op = 10, cmpli
-    { 0x00,  diCMN, 0x0000 },         //   11 Maj op = 11, cmpi
-    { 0x00,  diCMN, 0x0000 },         //   12 Maj op = 12, addic
-    { 0x00,  diCMN, 0x0000 },         //   13 Maj op = 13, addic.
-    { 0x00,  diCMN, 0x0000 },         //   14 Maj op = 14, addi
-    { 0x00,  diCMN, 0x0000 },         //   15 Maj op = 15, addis
-    { 0x00,   diBC, 0x0000 },         //   16 Maj op = 16, bc
-    { 0x00,   diSC, 0x0000 },         //   17 Maj op = 17, sc
-    { 0x00,    diB, 0x0000 },         //   18 Maj op = 18, b
-    { 0x40,      0,    209 },         //   19 Maj op = 19, jump to entry 209
-    { 0x00,  diCMN, 0x0000 },         //   20 Maj op = 20, rlwimi
-    { 0x00,  diCMN, 0x0000 },         //   21 Maj op = 21, rlwinm
-    { 0x00,  diINV, 0x0000 },         //   22 Maj op = 22, invalid
-    { 0x00,  diCMN, 0x0000 },         //   23 Maj op = 23, rlwnm
-    { 0x00,   diOR, 0x0000 },         //   24 Maj op = 24, ori
-    { 0x00,  diCMN, 0x0000 },         //   25 Maj op = 25, oris
-    { 0x00,  diCMN, 0x0000 },         //   26 Maj op = 26, xori
-    { 0x00,  diCMN, 0x0000 },         //   27 Maj op = 27, xoris
-    { 0x00,  diCMN, 0x0000 },         //   28 Maj op = 28, andi.
-    { 0x00,  diCMN, 0x0000 },         //   29 Maj op = 29, andis.
-    { 0x40,      0,    224 },         //   30 Maj op = 30, jump to entry 224
-    { 0x40,      0,    230 },         //   31 Maj op = 31, jump to entry 230
-    { 0x00,  diCMN, 0x0000 },         //   32 Maj op = 32, lwz
-    { 0x00,  diCMN, 0x0000 },         //   33 Maj op = 33, lwzu
-    { 0x00,  diCMN, 0x0000 },         //   34 Maj op = 34, lbz
-    { 0x00,  diCMN, 0x0000 },         //   35 Maj op = 35, lbzu
-    { 0x00,  diCMN, 0x0000 },         //   36 Maj op = 36, stw
-    { 0x00,  diCMN, 0x0000 },         //   37 Maj op = 37, stwu
-    { 0x00,  diCMN, 0x0000 },         //   38 Maj op = 38, stb
-    { 0x00,  diCMN, 0x0000 },         //   39 Maj op = 39, stbu
-    { 0x00,  diCMN, 0x0000 },         //   40 Maj op = 40, lhz
-    { 0x00,  diCMN, 0x0000 },         //   41 Maj op = 41, lhzu
-    { 0x00,  diCMN, 0x0000 },         //   42 Maj op = 42, lha
-    { 0x00,  diCMN, 0x0000 },         //   43 Maj op = 43, lhau
-    { 0x00,  diCMN, 0x0000 },         //   44 Maj op = 44, sth
-    { 0x00,  diCMN, 0x0000 },         //   45 Maj op = 45, sthu
-    { 0x00,  diCMN, 0x0000 },         //   46 Maj op = 46, lmw
-    { 0x00,  diCMN, 0x0000 },         //   47 Maj op = 47, stmw
-    { 0x00,  diCMN, 0x0000 },         //   48 Maj op = 48, lfs
-    { 0x00,  diCMN, 0x0000 },         //   49 Maj op = 49, lfsu
-    { 0x00,  diCMN, 0x0000 },         //   50 Maj op = 50, lfd
-    { 0x00,  diCMN, 0x0000 },         //   51 Maj op = 51, lfdu
-    { 0x00,  diCMN, 0x0000 },         //   52 Maj op = 52, stfs
-    { 0x00,  diCMN, 0x0000 },         //   53 Maj op = 53, stfsu
-    { 0x00,  diCMN, 0x0000 },         //   54 Maj op = 54, stfd
-    { 0x00,  diCMN, 0x0000 },         //   55 Maj op = 55, stfdu
-    { 0x00,  diINV, 0x0000 },         //   56 Maj op = 56, invalid
-    { 0x00,  diINV, 0x0000 },         //   57 Maj op = 57, invalid
-    { 0x40,      0,    365 },         //   58 Maj op = 58, jump to entry 365
-    { 0x40,      0,    368 },         //   59 Maj op = 59, jump to entry 368
-    { 0x00,  diINV, 0x0000 },         //   60 Maj op = 60, invalid
-    { 0x00,  diINV, 0x0000 },         //   61 Maj op = 61, invalid
-    { 0x40,      0,    378 },         //   62 Maj op = 62, jump to entry 378
-    { 0x40,      0,    380 },         //   63 Maj op = 63, jump to entry 380
-    { 0x09,  diCMN, 0x0200 },         //   64 Maj op =  0, mask = 07FE, xop = 0x0200 ( 256) - attn
-    { 0x85,  diCMN, 0x0020 },         //   65 Maj op =  4, mask = 003F, xop = 0x0020 (  32) - vmhaddshs
-    { 0x85,  diCMN, 0x0021 },         //   66 Maj op =  4, mask = 003F, xop = 0x0021 (  33) - vmhraddshs
-    { 0x85,  diCMN, 0x0022 },         //   67 Maj op =  4, mask = 003F, xop = 0x0022 (  34) - vmladduhm
-    { 0x85,  diCMN, 0x0024 },         //   68 Maj op =  4, mask = 003F, xop = 0x0024 (  36) - vmsumubm
-    { 0x85,  diCMN, 0x0025 },         //   69 Maj op =  4, mask = 003F, xop = 0x0025 (  37) - vmsummbm
-    { 0x85,  diCMN, 0x0026 },         //   70 Maj op =  4, mask = 003F, xop = 0x0026 (  38) - vmsumuhm
-    { 0x85,  diCMN, 0x0027 },         //   71 Maj op =  4, mask = 003F, xop = 0x0027 (  39) - vmsumuhs
-    { 0x85,  diCMN, 0x0028 },         //   72 Maj op =  4, mask = 003F, xop = 0x0028 (  40) - vmsumshm
-    { 0x85,  diCMN, 0x0029 },         //   73 Maj op =  4, mask = 003F, xop = 0x0029 (  41) - vmsumshs
-    { 0x85,  diCMN, 0x002A },         //   74 Maj op =  4, mask = 003F, xop = 0x002A (  42) - vsel
-    { 0x85,  diCMN, 0x002B },         //   75 Maj op =  4, mask = 003F, xop = 0x002B (  43) - vperm
-    { 0x85,  diCMN, 0x002C },         //   76 Maj op =  4, mask = 003F, xop = 0x002C (  44) - vsldoi
-    { 0x85,  diCMN, 0x002E },         //   77 Maj op =  4, mask = 003F, xop = 0x002E (  46) - vmaddfp
-    { 0x85,  diCMN, 0x002F },         //   78 Maj op =  4, mask = 003F, xop = 0x002F (  47) - vnmsubfp
-    { 0x87,  diCMN, 0x0006 },         //   79 Maj op =  4, mask = 03FF, xop = 0x0006 (   6) - vcmpequb
-    { 0x87,  diCMN, 0x0046 },         //   80 Maj op =  4, mask = 03FF, xop = 0x0046 (  70) - vcmpequh
-    { 0x87,  diCMN, 0x0086 },         //   81 Maj op =  4, mask = 03FF, xop = 0x0086 ( 134) - vcmpequw
-    { 0x87,  diCMN, 0x00C6 },         //   82 Maj op =  4, mask = 03FF, xop = 0x00C6 ( 198) - vcmpeqfp
-    { 0x87,  diCMN, 0x01C6 },         //   83 Maj op =  4, mask = 03FF, xop = 0x01C6 ( 454) - vcmpgefp
-    { 0x87,  diCMN, 0x0206 },         //   84 Maj op =  4, mask = 03FF, xop = 0x0206 ( 518) - vcmpgtub
-    { 0x87,  diCMN, 0x0246 },         //   85 Maj op =  4, mask = 03FF, xop = 0x0246 ( 582) - vcmpgtuh
-    { 0x87,  diCMN, 0x0286 },         //   86 Maj op =  4, mask = 03FF, xop = 0x0286 ( 646) - vcmpgtuw
-    { 0x87,  diCMN, 0x02C6 },         //   87 Maj op =  4, mask = 03FF, xop = 0x02C6 ( 710) - vcmpgtfp
-    { 0x87,  diCMN, 0x0306 },         //   88 Maj op =  4, mask = 03FF, xop = 0x0306 ( 774) - vcmpgtsb
-    { 0x87,  diCMN, 0x0346 },         //   89 Maj op =  4, mask = 03FF, xop = 0x0346 ( 838) - vcmpgtsh
-    { 0x87,  diCMN, 0x0386 },         //   90 Maj op =  4, mask = 03FF, xop = 0x0386 ( 902) - vcmpgtsw
-    { 0x87,  diCMN, 0x03C6 },         //   91 Maj op =  4, mask = 03FF, xop = 0x03C6 ( 966) - vcmpbfp
-    { 0x8A,  diCMN, 0x0000 },         //   92 Maj op =  4, mask = 07FF, xop = 0x0000 (   0) - vaddubm
-    { 0x8A,  diCMN, 0x0002 },         //   93 Maj op =  4, mask = 07FF, xop = 0x0002 (   2) - vmaxub
-    { 0x8A,  diCMN, 0x0004 },         //   94 Maj op =  4, mask = 07FF, xop = 0x0004 (   4) - vrlb
-    { 0x8A,  diCMN, 0x0008 },         //   95 Maj op =  4, mask = 07FF, xop = 0x0008 (   8) - vmuloub
-    { 0x8A,  diCMN, 0x000A },         //   96 Maj op =  4, mask = 07FF, xop = 0x000A (  10) - vaddfp
-    { 0x8A,  diCMN, 0x000C },         //   97 Maj op =  4, mask = 07FF, xop = 0x000C (  12) - vmrghb
-    { 0x8A,  diCMN, 0x000E },         //   98 Maj op =  4, mask = 07FF, xop = 0x000E (  14) - vpkuhum
-    { 0x8A,  diCMN, 0x0040 },         //   99 Maj op =  4, mask = 07FF, xop = 0x0040 (  64) - vadduhm
-    { 0x8A,  diCMN, 0x0042 },         //  100 Maj op =  4, mask = 07FF, xop = 0x0042 (  66) - vmaxuh
-    { 0x8A,  diCMN, 0x0044 },         //  101 Maj op =  4, mask = 07FF, xop = 0x0044 (  68) - vrlh
-    { 0x8A,  diCMN, 0x0048 },         //  102 Maj op =  4, mask = 07FF, xop = 0x0048 (  72) - vmulouh
-    { 0x8A,  diCMN, 0x004A },         //  103 Maj op =  4, mask = 07FF, xop = 0x004A (  74) - vsubfp
-    { 0x8A,  diCMN, 0x004C },         //  104 Maj op =  4, mask = 07FF, xop = 0x004C (  76) - vmrghh
-    { 0x8A,  diCMN, 0x004E },         //  105 Maj op =  4, mask = 07FF, xop = 0x004E (  78) - vpkuwum
-    { 0x8A,  diCMN, 0x0080 },         //  106 Maj op =  4, mask = 07FF, xop = 0x0080 ( 128) - vadduwm
-    { 0x8A,  diCMN, 0x0082 },         //  107 Maj op =  4, mask = 07FF, xop = 0x0082 ( 130) - vmaxuw
-    { 0x8A,  diCMN, 0x0084 },         //  108 Maj op =  4, mask = 07FF, xop = 0x0084 ( 132) - vrlw
-    { 0x8A,  diCMN, 0x008C },         //  109 Maj op =  4, mask = 07FF, xop = 0x008C ( 140) - vmrghw
-    { 0x8A,  diCMN, 0x008E },         //  110 Maj op =  4, mask = 07FF, xop = 0x008E ( 142) - vpkuhus
-    { 0x8A,  diCMN, 0x00CE },         //  111 Maj op =  4, mask = 07FF, xop = 0x00CE ( 206) - vpkuwus
-    { 0x8A,  diCMN, 0x0102 },         //  112 Maj op =  4, mask = 07FF, xop = 0x0102 ( 258) - vmaxsb
-    { 0x8A,  diCMN, 0x0104 },         //  113 Maj op =  4, mask = 07FF, xop = 0x0104 ( 260) - vslb
-    { 0x8A,  diCMN, 0x0108 },         //  114 Maj op =  4, mask = 07FF, xop = 0x0108 ( 264) - vmulosb
-    { 0x8A,  diCMN, 0x010A },         //  115 Maj op =  4, mask = 07FF, xop = 0x010A ( 266) - vrefp
-    { 0x8A,  diCMN, 0x010C },         //  116 Maj op =  4, mask = 07FF, xop = 0x010C ( 268) - vmrglb
-    { 0x8A,  diCMN, 0x010E },         //  117 Maj op =  4, mask = 07FF, xop = 0x010E ( 270) - vpkshus
-    { 0x8A,  diCMN, 0x0142 },         //  118 Maj op =  4, mask = 07FF, xop = 0x0142 ( 322) - vmaxsh
-    { 0x8A,  diCMN, 0x0144 },         //  119 Maj op =  4, mask = 07FF, xop = 0x0144 ( 324) - vslh
-    { 0x8A,  diCMN, 0x0148 },         //  120 Maj op =  4, mask = 07FF, xop = 0x0148 ( 328) - vmulosh
-    { 0x8A,  diCMN, 0x014A },         //  121 Maj op =  4, mask = 07FF, xop = 0x014A ( 330) - vrsqrtefp
-    { 0x8A,  diCMN, 0x014C },         //  122 Maj op =  4, mask = 07FF, xop = 0x014C ( 332) - vmrglh
-    { 0x8A,  diCMN, 0x014E },         //  123 Maj op =  4, mask = 07FF, xop = 0x014E ( 334) - vpkswus
-    { 0x8A,  diCMN, 0x0180 },         //  124 Maj op =  4, mask = 07FF, xop = 0x0180 ( 384) - vaddcuw
-    { 0x8A,  diCMN, 0x0182 },         //  125 Maj op =  4, mask = 07FF, xop = 0x0182 ( 386) - vmaxsw
-    { 0x8A,  diCMN, 0x0184 },         //  126 Maj op =  4, mask = 07FF, xop = 0x0184 ( 388) - vslw
-    { 0x8A,  diCMN, 0x018A },         //  127 Maj op =  4, mask = 07FF, xop = 0x018A ( 394) - vexptefp
-    { 0x8A,  diCMN, 0x018C },         //  128 Maj op =  4, mask = 07FF, xop = 0x018C ( 396) - vmrglw
-    { 0x8A,  diCMN, 0x018E },         //  129 Maj op =  4, mask = 07FF, xop = 0x018E ( 398) - vpkshss
-    { 0x8A,  diCMN, 0x01C4 },         //  130 Maj op =  4, mask = 07FF, xop = 0x01C4 ( 452) - vsl
-    { 0x8A,  diCMN, 0x01CA },         //  131 Maj op =  4, mask = 07FF, xop = 0x01CA ( 458) - vlogefp
-    { 0x8A,  diCMN, 0x01CE },         //  132 Maj op =  4, mask = 07FF, xop = 0x01CE ( 462) - vpkswss
-    { 0x8A,  diCMN, 0x0200 },         //  133 Maj op =  4, mask = 07FF, xop = 0x0200 ( 512) - vaddubs
-    { 0x8A,  diCMN, 0x0202 },         //  134 Maj op =  4, mask = 07FF, xop = 0x0202 ( 514) - vminub
-    { 0x8A,  diCMN, 0x0204 },         //  135 Maj op =  4, mask = 07FF, xop = 0x0204 ( 516) - vsrb
-    { 0x8A,  diCMN, 0x0208 },         //  136 Maj op =  4, mask = 07FF, xop = 0x0208 ( 520) - vmuleub
-    { 0x8A,  diCMN, 0x020A },         //  137 Maj op =  4, mask = 07FF, xop = 0x020A ( 522) - vrfin
-    { 0x8A,  diCMN, 0x020C },         //  138 Maj op =  4, mask = 07FF, xop = 0x020C ( 524) - vspltb
-    { 0x8A,  diCMN, 0x020E },         //  139 Maj op =  4, mask = 07FF, xop = 0x020E ( 526) - vupkhsb
-    { 0x8A,  diCMN, 0x0240 },         //  140 Maj op =  4, mask = 07FF, xop = 0x0240 ( 576) - vadduhs
-    { 0x8A,  diCMN, 0x0242 },         //  141 Maj op =  4, mask = 07FF, xop = 0x0242 ( 578) - vminuh
-    { 0x8A,  diCMN, 0x0244 },         //  142 Maj op =  4, mask = 07FF, xop = 0x0244 ( 580) - vsrh
-    { 0x8A,  diCMN, 0x0248 },         //  143 Maj op =  4, mask = 07FF, xop = 0x0248 ( 584) - vmuleuh
-    { 0x8A,  diCMN, 0x024A },         //  144 Maj op =  4, mask = 07FF, xop = 0x024A ( 586) - vrfiz
-    { 0x8A,  diCMN, 0x024C },         //  145 Maj op =  4, mask = 07FF, xop = 0x024C ( 588) - vsplth
-    { 0x8A,  diCMN, 0x024E },         //  146 Maj op =  4, mask = 07FF, xop = 0x024E ( 590) - vupkhsh
-    { 0x8A,  diCMN, 0x0280 },         //  147 Maj op =  4, mask = 07FF, xop = 0x0280 ( 640) - vadduws
-    { 0x8A,  diCMN, 0x0282 },         //  148 Maj op =  4, mask = 07FF, xop = 0x0282 ( 642) - vminuw
-    { 0x8A,  diCMN, 0x0284 },         //  149 Maj op =  4, mask = 07FF, xop = 0x0284 ( 644) - vsrw
-    { 0x8A,  diCMN, 0x028A },         //  150 Maj op =  4, mask = 07FF, xop = 0x028A ( 650) - vrfip
-    { 0x8A,  diCMN, 0x028C },         //  151 Maj op =  4, mask = 07FF, xop = 0x028C ( 652) - vspltw
-    { 0x8A,  diCMN, 0x028E },         //  152 Maj op =  4, mask = 07FF, xop = 0x028E ( 654) - vupklsb
-    { 0x8A,  diCMN, 0x02C4 },         //  153 Maj op =  4, mask = 07FF, xop = 0x02C4 ( 708) - vsr
-    { 0x8A,  diCMN, 0x02CA },         //  154 Maj op =  4, mask = 07FF, xop = 0x02CA ( 714) - vrfim
-    { 0x8A,  diCMN, 0x02CE },         //  155 Maj op =  4, mask = 07FF, xop = 0x02CE ( 718) - vupklsh
-    { 0x8A,  diCMN, 0x0300 },         //  156 Maj op =  4, mask = 07FF, xop = 0x0300 ( 768) - vaddsbs
-    { 0x8A,  diCMN, 0x0302 },         //  157 Maj op =  4, mask = 07FF, xop = 0x0302 ( 770) - vminsb
-    { 0x8A,  diCMN, 0x0304 },         //  158 Maj op =  4, mask = 07FF, xop = 0x0304 ( 772) - vsrab
-    { 0x8A,  diCMN, 0x0308 },         //  159 Maj op =  4, mask = 07FF, xop = 0x0308 ( 776) - vmulesb
-    { 0x8A,  diCMN, 0x030A },         //  160 Maj op =  4, mask = 07FF, xop = 0x030A ( 778) - vcfux
-    { 0x8A,  diCMN, 0x030C },         //  161 Maj op =  4, mask = 07FF, xop = 0x030C ( 780) - vspltisb
-    { 0x8A,  diCMN, 0x030E },         //  162 Maj op =  4, mask = 07FF, xop = 0x030E ( 782) - vpkpx
-    { 0x8A,  diCMN, 0x0340 },         //  163 Maj op =  4, mask = 07FF, xop = 0x0340 ( 832) - vaddshs
-    { 0x8A,  diCMN, 0x0342 },         //  164 Maj op =  4, mask = 07FF, xop = 0x0342 ( 834) - vminsh
-    { 0x8A,  diCMN, 0x0344 },         //  165 Maj op =  4, mask = 07FF, xop = 0x0344 ( 836) - vsrah
-    { 0x8A,  diCMN, 0x0348 },         //  166 Maj op =  4, mask = 07FF, xop = 0x0348 ( 840) - vmulesh
-    { 0x8A,  diCMN, 0x034A },         //  167 Maj op =  4, mask = 07FF, xop = 0x034A ( 842) - vcfsx
-    { 0x8A,  diCMN, 0x034C },         //  168 Maj op =  4, mask = 07FF, xop = 0x034C ( 844) - vspltish
-    { 0x8A,  diCMN, 0x034E },         //  169 Maj op =  4, mask = 07FF, xop = 0x034E ( 846) - vupkhpx
-    { 0x8A,  diCMN, 0x0380 },         //  170 Maj op =  4, mask = 07FF, xop = 0x0380 ( 896) - vaddsws
-    { 0x8A,  diCMN, 0x0382 },         //  171 Maj op =  4, mask = 07FF, xop = 0x0382 ( 898) - vminsw
-    { 0x8A,  diCMN, 0x0384 },         //  172 Maj op =  4, mask = 07FF, xop = 0x0384 ( 900) - vsraw
-    { 0x8A,  diCMN, 0x038A },         //  173 Maj op =  4, mask = 07FF, xop = 0x038A ( 906) - vctuxs
-    { 0x8A,  diCMN, 0x038C },         //  174 Maj op =  4, mask = 07FF, xop = 0x038C ( 908) - vspltisw
-    { 0x8A,  diCMN, 0x03CA },         //  175 Maj op =  4, mask = 07FF, xop = 0x03CA ( 970) - vctsxs
-    { 0x8A,  diCMN, 0x03CE },         //  176 Maj op =  4, mask = 07FF, xop = 0x03CE ( 974) - vupklpx
-    { 0x8A,  diCMN, 0x0400 },         //  177 Maj op =  4, mask = 07FF, xop = 0x0400 (1024) - vsububm
-    { 0x8A,  diCMN, 0x0402 },         //  178 Maj op =  4, mask = 07FF, xop = 0x0402 (1026) - vavgub
-    { 0x8A,  diCMN, 0x0404 },         //  179 Maj op =  4, mask = 07FF, xop = 0x0404 (1028) - vand
-    { 0x8A,  diCMN, 0x040A },         //  180 Maj op =  4, mask = 07FF, xop = 0x040A (1034) - vmaxfp
-    { 0x8A,  diCMN, 0x040C },         //  181 Maj op =  4, mask = 07FF, xop = 0x040C (1036) - vslo
-    { 0x8A,  diCMN, 0x0440 },         //  182 Maj op =  4, mask = 07FF, xop = 0x0440 (1088) - vsubuhm
-    { 0x8A,  diCMN, 0x0442 },         //  183 Maj op =  4, mask = 07FF, xop = 0x0442 (1090) - vavguh
-    { 0x8A,  diCMN, 0x0444 },         //  184 Maj op =  4, mask = 07FF, xop = 0x0444 (1092) - vandc
-    { 0x8A,  diCMN, 0x044A },         //  185 Maj op =  4, mask = 07FF, xop = 0x044A (1098) - vminfp
-    { 0x8A,  diCMN, 0x044C },         //  186 Maj op =  4, mask = 07FF, xop = 0x044C (1100) - vsro
-    { 0x8A,  diCMN, 0x0480 },         //  187 Maj op =  4, mask = 07FF, xop = 0x0480 (1152) - vsubuwm
-    { 0x8A,  diCMN, 0x0482 },         //  188 Maj op =  4, mask = 07FF, xop = 0x0482 (1154) - vavguw
-    { 0x8A,  diCMN, 0x0484 },         //  189 Maj op =  4, mask = 07FF, xop = 0x0484 (1156) - vor
-    { 0x8A,  diCMN, 0x04C4 },         //  190 Maj op =  4, mask = 07FF, xop = 0x04C4 (1220) - vxor
-    { 0x8A,  diCMN, 0x0502 },         //  191 Maj op =  4, mask = 07FF, xop = 0x0502 (1282) - vavgsb
-    { 0x8A,  diCMN, 0x0504 },         //  192 Maj op =  4, mask = 07FF, xop = 0x0504 (1284) - vnor
-    { 0x8A,  diCMN, 0x0542 },         //  193 Maj op =  4, mask = 07FF, xop = 0x0542 (1346) - vavgsh
-    { 0x8A,  diCMN, 0x0580 },         //  194 Maj op =  4, mask = 07FF, xop = 0x0580 (1408) - vsubcuw
-    { 0x8A,  diCMN, 0x0582 },         //  195 Maj op =  4, mask = 07FF, xop = 0x0582 (1410) - vavgsw
-    { 0x8A,  diCMN, 0x0600 },         //  196 Maj op =  4, mask = 07FF, xop = 0x0600 (1536) - vsububs
-    { 0x8A,  diCMN, 0x0604 },         //  197 Maj op =  4, mask = 07FF, xop = 0x0604 (1540) - mfvscr
-    { 0x8A,  diCMN, 0x0608 },         //  198 Maj op =  4, mask = 07FF, xop = 0x0608 (1544) - vsum4ubs
-    { 0x8A,  diCMN, 0x0640 },         //  199 Maj op =  4, mask = 07FF, xop = 0x0640 (1600) - vsubuhs
-    { 0x8A,  diCMN, 0x0644 },         //  200 Maj op =  4, mask = 07FF, xop = 0x0644 (1604) - mtvscr
-    { 0x8A,  diCMN, 0x0648 },         //  201 Maj op =  4, mask = 07FF, xop = 0x0648 (1608) - vsum4shs
-    { 0x8A,  diCMN, 0x0680 },         //  202 Maj op =  4, mask = 07FF, xop = 0x0680 (1664) - vsubuws
-    { 0x8A,  diCMN, 0x0688 },         //  203 Maj op =  4, mask = 07FF, xop = 0x0688 (1672) - vsum2sws
-    { 0x8A,  diCMN, 0x0700 },         //  204 Maj op =  4, mask = 07FF, xop = 0x0700 (1792) - vsubsbs
-    { 0x8A,  diCMN, 0x0708 },         //  205 Maj op =  4, mask = 07FF, xop = 0x0708 (1800) - vsum4sbs
-    { 0x8A,  diCMN, 0x0740 },         //  206 Maj op =  4, mask = 07FF, xop = 0x0740 (1856) - vsubshs
-    { 0x8A,  diCMN, 0x0780 },         //  207 Maj op =  4, mask = 07FF, xop = 0x0780 (1920) - vsubsws
-    { 0x0A,  diCMN, 0x0788 },         //  208 Maj op =  4, mask = 07FF, xop = 0x0788 (1928) - vsumsws
-    { 0x89,  diCMN, 0x0000 },         //  209 Maj op = 19, mask = 07FE, xop = 0x0000 (   0) - mcrf
-    { 0x89,  diBLR, 0x0020 },         //  210 Maj op = 19, mask = 07FE, xop = 0x0020 (  16) - bclr
-    { 0x89,  diPRV, 0x0024 },         //  211 Maj op = 19, mask = 07FE, xop = 0x0024 (  18) - rfid
-    { 0x89,  diCMN, 0x0042 },         //  212 Maj op = 19, mask = 07FE, xop = 0x0042 (  33) - crnor
-    { 0x89,  diPRV, 0x0064 },         //  213 Maj op = 19, mask = 07FE, xop = 0x0064 (  50) - rfi
-    { 0x89,  diCMN, 0x0102 },         //  214 Maj op = 19, mask = 07FE, xop = 0x0102 ( 129) - crandc
-    { 0x89,  diCMN, 0x012C },         //  215 Maj op = 19, mask = 07FE, xop = 0x012C ( 150) - isync
-    { 0x89,  diCMN, 0x0182 },         //  216 Maj op = 19, mask = 07FE, xop = 0x0182 ( 193) - crxor
-    { 0x89,  diCMN, 0x01C2 },         //  217 Maj op = 19, mask = 07FE, xop = 0x01C2 ( 225) - crnand
-    { 0x89,  diCMN, 0x0202 },         //  218 Maj op = 19, mask = 07FE, xop = 0x0202 ( 257) - crand
-    { 0x89,  diPRV, 0x0224 },         //  219 Maj op = 19, mask = 07FE, xop = 0x0224 ( 274) - hrfid
-    { 0x89,  diCMN, 0x0242 },         //  220 Maj op = 19, mask = 07FE, xop = 0x0242 ( 289) - creqv
-    { 0x89,  diCMN, 0x0342 },         //  221 Maj op = 19, mask = 07FE, xop = 0x0342 ( 417) - crorc
-    { 0x89,  diCMN, 0x0382 },         //  222 Maj op = 19, mask = 07FE, xop = 0x0382 ( 449) - cror
-    { 0x09, diBCTR, 0x0420 },         //  223 Maj op = 19, mask = 07FE, xop = 0x0420 ( 528) - bctr
-    { 0x82,  diCMN, 0x0000 },         //  224 Maj op = 30, mask = 001C, xop = 0x0000 (   0) - rldicl
-    { 0x82,  diCMN, 0x0004 },         //  225 Maj op = 30, mask = 001C, xop = 0x0004 (   1) - rldicr
-    { 0x82,  diCMN, 0x0008 },         //  226 Maj op = 30, mask = 001C, xop = 0x0008 (   2) - rldic
-    { 0x82,  diCMN, 0x000C },         //  227 Maj op = 30, mask = 001C, xop = 0x000C (   3) - rldimi
-    { 0x83,  diCMN, 0x0010 },         //  228 Maj op = 30, mask = 001E, xop = 0x0010 (   8) - rldcl
-    { 0x03,  diCMN, 0x0012 },         //  229 Maj op = 30, mask = 001E, xop = 0x0012 (   9) - rldcr
-    { 0x86,  diCMN, 0x0010 },         //  230 Maj op = 31, mask = 03FE, xop = 0x0010 (   8) - subfc
-    { 0x86,  diCMN, 0x0012 },         //  231 Maj op = 31, mask = 03FE, xop = 0x0012 (   9) - mulhdu
-    { 0x86,  diCMN, 0x0014 },         //  232 Maj op = 31, mask = 03FE, xop = 0x0014 (  10) - addc
-    { 0x86,  diCMN, 0x0016 },         //  233 Maj op = 31, mask = 03FE, xop = 0x0016 (  11) - mulhwu
-    { 0x86,  diCMN, 0x0050 },         //  234 Maj op = 31, mask = 03FE, xop = 0x0050 (  40) - subf
-    { 0x86,  diCMN, 0x0092 },         //  235 Maj op = 31, mask = 03FE, xop = 0x0092 (  73) - mulhd
-    { 0x86,  diCMN, 0x0096 },         //  236 Maj op = 31, mask = 03FE, xop = 0x0096 (  75) - mulhw
-    { 0x86,  diCMN, 0x00D0 },         //  237 Maj op = 31, mask = 03FE, xop = 0x00D0 ( 104) - neg
-    { 0x86,  diCMN, 0x0110 },         //  238 Maj op = 31, mask = 03FE, xop = 0x0110 ( 136) - subfe
-    { 0x86,  diCMN, 0x0114 },         //  239 Maj op = 31, mask = 03FE, xop = 0x0114 ( 138) - adde
-    { 0x86,  diCMN, 0x0190 },         //  240 Maj op = 31, mask = 03FE, xop = 0x0190 ( 200) - subfze
-    { 0x86,  diCMN, 0x0194 },         //  241 Maj op = 31, mask = 03FE, xop = 0x0194 ( 202) - addze
-    { 0x86,  diCMN, 0x01D0 },         //  242 Maj op = 31, mask = 03FE, xop = 0x01D0 ( 232) - subfme
-    { 0x86,  diCMN, 0x01D2 },         //  243 Maj op = 31, mask = 03FE, xop = 0x01D2 ( 233) - mulld
-    { 0x86,  diCMN, 0x01D4 },         //  244 Maj op = 31, mask = 03FE, xop = 0x01D4 ( 234) - addme
-    { 0x86,  diCMN, 0x01D6 },         //  245 Maj op = 31, mask = 03FE, xop = 0x01D6 ( 235) - mullw
-    { 0x86,  diCMN, 0x0214 },         //  246 Maj op = 31, mask = 03FE, xop = 0x0214 ( 266) - add
-    { 0x86,  diCMN, 0x0392 },         //  247 Maj op = 31, mask = 03FE, xop = 0x0392 ( 457) - divdu
-    { 0x86,  diCMN, 0x0396 },         //  248 Maj op = 31, mask = 03FE, xop = 0x0396 ( 459) - divwu
-    { 0x86,  diCMN, 0x03D2 },         //  249 Maj op = 31, mask = 03FE, xop = 0x03D2 ( 489) - divd
-    { 0x86,  diCMN, 0x03D6 },         //  250 Maj op = 31, mask = 03FE, xop = 0x03D6 ( 491) - divw
-    { 0x88,  diCMN, 0x0674 },         //  251 Maj op = 31, mask = 07FC, xop = 0x0674 ( 413) - sradi
-    { 0x89,  diCMN, 0x0000 },         //  252 Maj op = 31, mask = 07FE, xop = 0x0000 (   0) - cmp
-    { 0x89,  diTRP, 0x0008 },         //  253 Maj op = 31, mask = 07FE, xop = 0x0008 (   4) - tw
-    { 0x89,  diCMN, 0x000C },         //  254 Maj op = 31, mask = 07FE, xop = 0x000C (   6) - lvsl
-    { 0x89,  diCMN, 0x000E },         //  255 Maj op = 31, mask = 07FE, xop = 0x000E (   7) - lvebx
-    { 0x89,  diCMN, 0x0026 },         //  256 Maj op = 31, mask = 07FE, xop = 0x0026 (  19) - mfcr
-    { 0x89,  diCMN, 0x0028 },         //  257 Maj op = 31, mask = 07FE, xop = 0x0028 (  20) - lwarx
-    { 0x89,  diCMN, 0x002A },         //  258 Maj op = 31, mask = 07FE, xop = 0x002A (  21) - ldx
-    { 0x89,  diCMN, 0x002E },         //  259 Maj op = 31, mask = 07FE, xop = 0x002E (  23) - lwzx
-    { 0x89,  diCMN, 0x0030 },         //  260 Maj op = 31, mask = 07FE, xop = 0x0030 (  24) - slw
-    { 0x89,  diCMN, 0x0034 },         //  261 Maj op = 31, mask = 07FE, xop = 0x0034 (  26) - cntlzw
-    { 0x89,  diCMN, 0x0036 },         //  262 Maj op = 31, mask = 07FE, xop = 0x0036 (  27) - sld
-    { 0x89,  diCMN, 0x0038 },         //  263 Maj op = 31, mask = 07FE, xop = 0x0038 (  28) - and
-    { 0x89,  diCMN, 0x0040 },         //  264 Maj op = 31, mask = 07FE, xop = 0x0040 (  32) - cmpl
-    { 0x89,  diCMN, 0x004C },         //  265 Maj op = 31, mask = 07FE, xop = 0x004C (  38) - lvsr
-    { 0x89,  diCMN, 0x004E },         //  266 Maj op = 31, mask = 07FE, xop = 0x004E (  39) - lvehx
-    { 0x89,  diCMN, 0x006A },         //  267 Maj op = 31, mask = 07FE, xop = 0x006A (  53) - ldux
-    { 0x89,  diCMN, 0x006C },         //  268 Maj op = 31, mask = 07FE, xop = 0x006C (  54) - dcbst
-    { 0x89,  diCMN, 0x006E },         //  269 Maj op = 31, mask = 07FE, xop = 0x006E (  55) - lwzux
-    { 0x89,  diCMN, 0x0074 },         //  270 Maj op = 31, mask = 07FE, xop = 0x0074 (  58) - cntlzd
-    { 0x89,  diCMN, 0x0078 },         //  271 Maj op = 31, mask = 07FE, xop = 0x0078 (  60) - andc
-    { 0x89,  diTRP, 0x0088 },         //  272 Maj op = 31, mask = 07FE, xop = 0x0088 (  68) - td
-    { 0x89,  diCMN, 0x008E },         //  273 Maj op = 31, mask = 07FE, xop = 0x008E (  71) - lvewx
-    { 0x89,  diPRV, 0x00A6 },         //  274 Maj op = 31, mask = 07FE, xop = 0x00A6 (  83) - mfmsr
-    { 0x89,  diCMN, 0x00A8 },         //  275 Maj op = 31, mask = 07FE, xop = 0x00A8 (  84) - ldarx
-    { 0x89,  diCMN, 0x00AC },         //  276 Maj op = 31, mask = 07FE, xop = 0x00AC (  86) - dcbf
-    { 0x89,  diCMN, 0x00AE },         //  277 Maj op = 31, mask = 07FE, xop = 0x00AE (  87) - lbzx
-    { 0x89,  diCMN, 0x00CE },         //  278 Maj op = 31, mask = 07FE, xop = 0x00CE ( 103) - lvx
-    { 0x89,  diCMN, 0x00EE },         //  279 Maj op = 31, mask = 07FE, xop = 0x00EE ( 119) - lbzux
-    { 0x89,  diCMN, 0x00F8 },         //  280 Maj op = 31, mask = 07FE, xop = 0x00F8 ( 124) - nor
-    { 0x89,  diCMN, 0x010E },         //  281 Maj op = 31, mask = 07FE, xop = 0x010E ( 135) - stvebx
-    { 0x89,  diCMN, 0x0120 },         //  282 Maj op = 31, mask = 07FE, xop = 0x0120 ( 144) - mtcrf
-    { 0x89,  diPRV, 0x0124 },         //  283 Maj op = 31, mask = 07FE, xop = 0x0124 ( 146) - mtmsr
-    { 0x89,  diCMN, 0x012A },         //  284 Maj op = 31, mask = 07FE, xop = 0x012A ( 149) - stdx
-    { 0x89,  diCMN, 0x012C },         //  285 Maj op = 31, mask = 07FE, xop = 0x012C ( 150) - stwcx
-    { 0x89,  diCMN, 0x012E },         //  286 Maj op = 31, mask = 07FE, xop = 0x012E ( 151) - stwx
-    { 0x89,  diCMN, 0x014E },         //  287 Maj op = 31, mask = 07FE, xop = 0x014E ( 167) - stvehx
-    { 0x89,  diPRV, 0x0164 },         //  288 Maj op = 31, mask = 07FE, xop = 0x0164 ( 178) - mtmsrd
-    { 0x89,  diCMN, 0x016A },         //  289 Maj op = 31, mask = 07FE, xop = 0x016A ( 181) - stdux
-    { 0x89,  diCMN, 0x016E },         //  290 Maj op = 31, mask = 07FE, xop = 0x016E ( 183) - stwux
-    { 0x89,  diCMN, 0x018E },         //  291 Maj op = 31, mask = 07FE, xop = 0x018E ( 199) - stvewx
-    { 0x89,  diCMN, 0x01A4 },         //  292 Maj op = 31, mask = 07FE, xop = 0x01A4 ( 210) - mtsr
-    { 0x89,  diCMN, 0x01AC },         //  293 Maj op = 31, mask = 07FE, xop = 0x01AC ( 214) - stdcx.
-    { 0x89,  diCMN, 0x01AE },         //  294 Maj op = 31, mask = 07FE, xop = 0x01AE ( 215) - stbx
-    { 0x89,  diCMN, 0x01CE },         //  295 Maj op = 31, mask = 07FE, xop = 0x01CE ( 231) - stvx
-    { 0x89,  diPRV, 0x01E4 },         //  296 Maj op = 31, mask = 07FE, xop = 0x01E4 ( 242) - mtsrin
-    { 0x89,  diCMN, 0x01EC },         //  297 Maj op = 31, mask = 07FE, xop = 0x01EC ( 246) - dcbtst
-    { 0x89,  diCMN, 0x01EE },         //  298 Maj op = 31, mask = 07FE, xop = 0x01EE ( 247) - stbux
-    { 0x89,  diPRV, 0x0224 },         //  299 Maj op = 31, mask = 07FE, xop = 0x0224 ( 274) - tlbiel
-    { 0x89,  diCMN, 0x022C },         //  300 Maj op = 31, mask = 07FE, xop = 0x022C ( 278) - dcbt
-    { 0x89,  diCMN, 0x022E },         //  301 Maj op = 31, mask = 07FE, xop = 0x022E ( 279) - lhzx
-    { 0x89,  diCMN, 0x0238 },         //  302 Maj op = 31, mask = 07FE, xop = 0x0238 ( 284) - eqv
-    { 0x89,  diPRV, 0x0264 },         //  303 Maj op = 31, mask = 07FE, xop = 0x0264 ( 306) - tlbie
-    { 0x89,  diPRV, 0x026C },         //  304 Maj op = 31, mask = 07FE, xop = 0x026C ( 310) - eciwx
-    { 0x89,  diCMN, 0x026E },         //  305 Maj op = 31, mask = 07FE, xop = 0x026E ( 311) - lhzux
-    { 0x89,  diCMN, 0x0278 },         //  306 Maj op = 31, mask = 07FE, xop = 0x0278 ( 316) - xor
-    { 0x89,  diSPR, 0x02A6 },         //  307 Maj op = 31, mask = 07FE, xop = 0x02A6 ( 339) - mfspr
-    { 0x89,  diCMN, 0x02AA },         //  308 Maj op = 31, mask = 07FE, xop = 0x02AA ( 341) - lwax
-    { 0x89,  diCMN, 0x02AC },         //  309 Maj op = 31, mask = 07FE, xop = 0x02AC ( 342) - dst
-    { 0x89,  diCMN, 0x02AE },         //  310 Maj op = 31, mask = 07FE, xop = 0x02AE ( 343) - lhax
-    { 0x89,  diCMN, 0x02CE },         //  311 Maj op = 31, mask = 07FE, xop = 0x02CE ( 359) - lvxl
-    { 0x89,  diPRV, 0x02E4 },         //  312 Maj op = 31, mask = 07FE, xop = 0x02E4 ( 370) - tlbia
-    { 0x89,  diCMN, 0x02E6 },         //  313 Maj op = 31, mask = 07FE, xop = 0x02E6 ( 371) - mftb
-    { 0x89,  diCMN, 0x02EA },         //  314 Maj op = 31, mask = 07FE, xop = 0x02EA ( 373) - lwaux
-    { 0x89,  diCMN, 0x02EC },         //  315 Maj op = 31, mask = 07FE, xop = 0x02EC ( 374) - dstst
-    { 0x89,  diCMN, 0x02EE },         //  316 Maj op = 31, mask = 07FE, xop = 0x02EE ( 375) - lhaux
-    { 0x89,  diPRV, 0x0324 },         //  317 Maj op = 31, mask = 07FE, xop = 0x0324 ( 402) - slbmte
-    { 0x89,  diCMN, 0x032E },         //  318 Maj op = 31, mask = 07FE, xop = 0x032E ( 407) - sthx
-    { 0x89,  diCMN, 0x0338 },         //  319 Maj op = 31, mask = 07FE, xop = 0x0338 ( 412) - orc
-    { 0x89,  diPRV, 0x0364 },         //  320 Maj op = 31, mask = 07FE, xop = 0x0364 ( 434) - slbie
-    { 0x89,  diPRV, 0x036C },         //  321 Maj op = 31, mask = 07FE, xop = 0x036C ( 438) - ecowx
-    { 0x89,  diCMN, 0x036E },         //  322 Maj op = 31, mask = 07FE, xop = 0x036E ( 439) - sthux
-    { 0x89,   diOR, 0x0378 },         //  323 Maj op = 31, mask = 07FE, xop = 0x0378 ( 444) - or
-    { 0x89,  diSPR, 0x03A6 },         //  324 Maj op = 31, mask = 07FE, xop = 0x03A6 ( 467) - mtspr
-    { 0x89,  diCMN, 0x03B8 },         //  325 Maj op = 31, mask = 07FE, xop = 0x03B8 ( 476) - nand
-    { 0x89,  diCMN, 0x03CE },         //  326 Maj op = 31, mask = 07FE, xop = 0x03CE ( 487) - stvxl
-    { 0x89,  diPRV, 0x03E4 },         //  327 Maj op = 31, mask = 07FE, xop = 0x03E4 ( 498) - slbia
-    { 0x89,  diCMN, 0x0400 },         //  328 Maj op = 31, mask = 07FE, xop = 0x0400 ( 512) - mcrxr
-    { 0x89,  diCMN, 0x042A },         //  329 Maj op = 31, mask = 07FE, xop = 0x042A ( 533) - lswx
-    { 0x89,  diCMN, 0x042C },         //  330 Maj op = 31, mask = 07FE, xop = 0x042C ( 534) - lwbrx
-    { 0x89,  diCMN, 0x042E },         //  331 Maj op = 31, mask = 07FE, xop = 0x042E ( 535) - lfsx
-    { 0x89,  diCMN, 0x0430 },         //  332 Maj op = 31, mask = 07FE, xop = 0x0430 ( 536) - srw
-    { 0x89,  diCMN, 0x0436 },         //  333 Maj op = 31, mask = 07FE, xop = 0x0436 ( 539) - srd
-    { 0x89,  diPRV, 0x046C },         //  334 Maj op = 31, mask = 07FE, xop = 0x046C ( 566) - tlbsync
-    { 0x89,  diCMN, 0x046E },         //  335 Maj op = 31, mask = 07FE, xop = 0x046E ( 567) - lfsux
-    { 0x89,  diPRV, 0x04A6 },         //  336 Maj op = 31, mask = 07FE, xop = 0x04A6 ( 595) - mfsr
-    { 0x89,  diCMN, 0x04AA },         //  337 Maj op = 31, mask = 07FE, xop = 0x04AA ( 597) - lswi
-    { 0x89,  diCMN, 0x04AC },         //  338 Maj op = 31, mask = 07FE, xop = 0x04AC ( 598) - sync
-    { 0x89,  diCMN, 0x04AE },         //  339 Maj op = 31, mask = 07FE, xop = 0x04AE ( 599) - lfdx
-    { 0x89,  diCMN, 0x04EE },         //  340 Maj op = 31, mask = 07FE, xop = 0x04EE ( 631) - lfdux
-    { 0x89,  diPRV, 0x0526 },         //  341 Maj op = 31, mask = 07FE, xop = 0x0526 ( 659) - mfsrin
-    { 0x89,  diCMN, 0x052A },         //  342 Maj op = 31, mask = 07FE, xop = 0x052A ( 661) - stswx
-    { 0x89,  diCMN, 0x052C },         //  343 Maj op = 31, mask = 07FE, xop = 0x052C ( 662) - stwbrx
-    { 0x89,  diCMN, 0x052E },         //  344 Maj op = 31, mask = 07FE, xop = 0x052E ( 663) - stfsx
-    { 0x89,  diCMN, 0x056E },         //  345 Maj op = 31, mask = 07FE, xop = 0x056E ( 695) - stfsux
-    { 0x89,  diCMN, 0x05AA },         //  346 Maj op = 31, mask = 07FE, xop = 0x05AA ( 725) - stswi
-    { 0x89,  diCMN, 0x05AE },         //  347 Maj op = 31, mask = 07FE, xop = 0x05AE ( 727) - stfdx
-    { 0x89,  diCMN, 0x05EC },         //  348 Maj op = 31, mask = 07FE, xop = 0x05EC ( 758) - dcba
-    { 0x89,  diCMN, 0x05EE },         //  349 Maj op = 31, mask = 07FE, xop = 0x05EE ( 759) - stfdux
-    { 0x89,  diCMN, 0x062C },         //  350 Maj op = 31, mask = 07FE, xop = 0x062C ( 790) - lhbrx
-    { 0x89,  diCMN, 0x0630 },         //  351 Maj op = 31, mask = 07FE, xop = 0x0630 ( 792) - sraw
-    { 0x89,  diCMN, 0x0634 },         //  352 Maj op = 31, mask = 07FE, xop = 0x0634 ( 794) - srad
-    { 0x89,  diCMN, 0x066C },         //  353 Maj op = 31, mask = 07FE, xop = 0x066C ( 822) - dss
-    { 0x89,  diCMN, 0x0670 },         //  354 Maj op = 31, mask = 07FE, xop = 0x0670 ( 824) - srawi
-    { 0x89,  diPRV, 0x06A6 },         //  355 Maj op = 31, mask = 07FE, xop = 0x06A6 ( 851) - slbmfev
-    { 0x89,  diCMN, 0x06AC },         //  356 Maj op = 31, mask = 07FE, xop = 0x06AC ( 854) - eieio
-    { 0x89,  diPRV, 0x0726 },         //  357 Maj op = 31, mask = 07FE, xop = 0x0726 ( 915) - slbmfee
-    { 0x89,  diCMN, 0x072C },         //  358 Maj op = 31, mask = 07FE, xop = 0x072C ( 918) - sthbrx
-    { 0x89,  diCMN, 0x0734 },         //  359 Maj op = 31, mask = 07FE, xop = 0x0734 ( 922) - extsh
-    { 0x89,  diCMN, 0x0774 },         //  360 Maj op = 31, mask = 07FE, xop = 0x0774 ( 954) - extsb
-    { 0x89,  diCMN, 0x07AC },         //  361 Maj op = 31, mask = 07FE, xop = 0x07AC ( 982) - icbi
-    { 0x89,  diCMN, 0x07AE },         //  362 Maj op = 31, mask = 07FE, xop = 0x07AE ( 983) - stfiwx
-    { 0x89,  diCMN, 0x07B4 },         //  363 Maj op = 31, mask = 07FE, xop = 0x07B4 ( 986) - extsw
-    { 0x09,  diCMN, 0x07EC },         //  364 Maj op = 31, mask = 07FE, xop = 0x07EC (1014) - dcbz
-    { 0x81,  diCMN, 0x0000 },         //  365 Maj op = 58, mask = 0003, xop = 0x0000 (   0) - ld
-    { 0x81,  diCMN, 0x0001 },         //  366 Maj op = 58, mask = 0003, xop = 0x0001 (   1) - ldu
-    { 0x01,  diCMN, 0x0002 },         //  367 Maj op = 58, mask = 0003, xop = 0x0002 (   2) - lwa
-    { 0x84,  diCMN, 0x0024 },         //  368 Maj op = 59, mask = 003E, xop = 0x0024 (  18) - fdivs
-    { 0x84,  diCMN, 0x0028 },         //  369 Maj op = 59, mask = 003E, xop = 0x0028 (  20) - fsubs
-    { 0x84,  diCMN, 0x002A },         //  370 Maj op = 59, mask = 003E, xop = 0x002A (  21) - fadds
-    { 0x84,  diCMN, 0x002C },         //  371 Maj op = 59, mask = 003E, xop = 0x002C (  22) - fsqrts
-    { 0x84,  diCMN, 0x0030 },         //  372 Maj op = 59, mask = 003E, xop = 0x0030 (  24) - fres
-    { 0x84,  diCMN, 0x0032 },         //  373 Maj op = 59, mask = 003E, xop = 0x0032 (  25) - fmuls
-    { 0x84,  diCMN, 0x0038 },         //  374 Maj op = 59, mask = 003E, xop = 0x0038 (  28) - fmsubs
-    { 0x84,  diCMN, 0x003A },         //  375 Maj op = 59, mask = 003E, xop = 0x003A (  29) - fmadds
-    { 0x84,  diCMN, 0x003C },         //  376 Maj op = 59, mask = 003E, xop = 0x003C (  30) - fnmsubs
-    { 0x04,  diCMN, 0x003E },         //  377 Maj op = 59, mask = 003E, xop = 0x003E (  31) - fnmadds
-    { 0x81,  diCMN, 0x0000 },         //  378 Maj op = 62, mask = 0003, xop = 0x0000 (   0) - std
-    { 0x01,  diCMN, 0x0001 },         //  379 Maj op = 62, mask = 0003, xop = 0x0001 (   1) - stdu
-    { 0x84,  diCMN, 0x0024 },         //  380 Maj op = 63, mask = 003E, xop = 0x0024 (  18) - fdiv
-    { 0x84,  diCMN, 0x0028 },         //  381 Maj op = 63, mask = 003E, xop = 0x0028 (  20) - fsub
-    { 0x84,  diCMN, 0x002A },         //  382 Maj op = 63, mask = 003E, xop = 0x002A (  21) - fadd
-    { 0x84,  diCMN, 0x002C },         //  383 Maj op = 63, mask = 003E, xop = 0x002C (  22) - fsqrt
-    { 0x84,  diCMN, 0x002E },         //  384 Maj op = 63, mask = 003E, xop = 0x002E (  23) - fsel
-    { 0x84,  diCMN, 0x0032 },         //  385 Maj op = 63, mask = 003E, xop = 0x0032 (  25) - fmul
-    { 0x84,  diCMN, 0x0034 },         //  386 Maj op = 63, mask = 003E, xop = 0x0034 (  26) - frsqrte
-    { 0x84,  diCMN, 0x0038 },         //  387 Maj op = 63, mask = 003E, xop = 0x0038 (  28) - fmsub
-    { 0x84,  diCMN, 0x003A },         //  388 Maj op = 63, mask = 003E, xop = 0x003A (  29) - fmadd
-    { 0x84,  diCMN, 0x003C },         //  389 Maj op = 63, mask = 003E, xop = 0x003C (  30) - fnmsub
-    { 0x84,  diCMN, 0x003E },         //  390 Maj op = 63, mask = 003E, xop = 0x003E (  31) - fnmadd
-    { 0x89,  diCMN, 0x0000 },         //  391 Maj op = 63, mask = 07FE, xop = 0x0000 (   0) - fcmpu
-    { 0x89,  diCMN, 0x0018 },         //  392 Maj op = 63, mask = 07FE, xop = 0x0018 (  12) - frsp
-    { 0x89,  diCMN, 0x001C },         //  393 Maj op = 63, mask = 07FE, xop = 0x001C (  14) - fctiw
-    { 0x89,  diCMN, 0x001E },         //  394 Maj op = 63, mask = 07FE, xop = 0x001E (  15) - fctiwz
-    { 0x89,  diCMN, 0x0040 },         //  395 Maj op = 63, mask = 07FE, xop = 0x0040 (  32) - fcmpo
-    { 0x89,  diCMN, 0x004C },         //  396 Maj op = 63, mask = 07FE, xop = 0x004C (  38) - mtfsb1
-    { 0x89,  diCMN, 0x0050 },         //  397 Maj op = 63, mask = 07FE, xop = 0x0050 (  40) - fneg
-    { 0x89,  diCMN, 0x0080 },         //  398 Maj op = 63, mask = 07FE, xop = 0x0080 (  64) - mcrfs
-    { 0x89,  diCMN, 0x008C },         //  399 Maj op = 63, mask = 07FE, xop = 0x008C (  70) - mtfsb0
-    { 0x89,  diCMN, 0x0090 },         //  400 Maj op = 63, mask = 07FE, xop = 0x0090 (  72) - fmr
-    { 0x89,  diCMN, 0x010C },         //  401 Maj op = 63, mask = 07FE, xop = 0x010C ( 134) - mtfsfi
-    { 0x89,  diCMN, 0x0110 },         //  402 Maj op = 63, mask = 07FE, xop = 0x0110 ( 136) - fnabs
-    { 0x89,  diCMN, 0x0210 },         //  403 Maj op = 63, mask = 07FE, xop = 0x0210 ( 264) - fabs
-    { 0x89,  diCMN, 0x048E },         //  404 Maj op = 63, mask = 07FE, xop = 0x048E ( 583) - mffs
-    { 0x89,  diCMN, 0x058E },         //  405 Maj op = 63, mask = 07FE, xop = 0x058E ( 711) - mtfsf
-    { 0x89,  diCMN, 0x065C },         //  406 Maj op = 63, mask = 07FE, xop = 0x065C ( 814) - fctid
-    { 0x89,  diCMN, 0x065E },         //  407 Maj op = 63, mask = 07FE, xop = 0x065E ( 815) - fctidz
-    { 0x09,  diCMN, 0x069C },         //  408 Maj op = 63, mask = 07FE, xop = 0x069C ( 846) - fcfid
-};
-
-#ifdef __decodePPC_debug__
-char *instname[] = {
-    "Jump entry...",
-    "Invalid",
-    "tdi",
-    "twi",
-    "Jump entry...",
-    "Invalid",
-    "Invalid",
-    "mulli",
-    "subfic",
-    "Invalid",
-    "cmpli",
-    "cmpi",
-    "addic",
-    "addic.",
-    "addi",
-    "addis",
-    "bc",
-    "sc",
-    "b",
-    "Jump entry...",
-    "rlwimi",
-    "rlwinm",
-    "Invalid",
-    "rlwnm",
-    "ori",
-    "oris",
-    "xori",
-    "xoris",
-    "andi.",
-    "andis.",
-    "Jump entry...",
-    "Jump entry...",
-    "lwz",
-    "lwzu",
-    "lbz",
-    "lbzu",
-    "stw",
-    "stwu",
-    "stb",
-    "stbu",
-    "lhz",
-    "lhzu",
-    "lha",
-    "lhau",
-    "sth",
-    "sthu",
-    "lmw",
-    "stmw",
-    "lfs",
-    "lfsu",
-    "lfd",
-    "lfdu",
-    "stfs",
-    "stfsu",
-    "stfd",
-    "stfdu",
-    "Invalid",
-    "Invalid",
-    "Jump entry...",
-    "Jump entry...",
-    "Invalid",
-    "Invalid",
-    "Jump entry...",
-    "Jump entry...",
-    "attn",
-    "vmhaddshs",
-    "vmhraddshs",
-    "vmladduhm",
-    "vmsumubm",
-    "vmsummbm",
-    "vmsumuhm",
-    "vmsumuhs",
-    "vmsumshm",
-    "vmsumshs",
-    "vsel",
-    "vperm",
-    "vsldoi",
-    "vmaddfp",
-    "vnmsubfp",
-    "vcmpequb",
-    "vcmpequh",
-    "vcmpequw",
-    "vcmpeqfp",
-    "vcmpgefp",
-    "vcmpgtub",
-    "vcmpgtuh",
-    "vcmpgtuw",
-    "vcmpgtfp",
-    "vcmpgtsb",
-    "vcmpgtsh",
-    "vcmpgtsw",
-    "vcmpbfp",
-    "vaddubm",
-    "vmaxub",
-    "vrlb",
-    "vmuloub",
-    "vaddfp",
-    "vmrghb",
-    "vpkuhum",
-    "vadduhm",
-    "vmaxuh",
-    "vrlh",
-    "vmulouh",
-    "vsubfp",
-    "vmrghh",
-    "vpkuwum",
-    "vadduwm",
-    "vmaxuw",
-    "vrlw",
-    "vmrghw",
-    "vpkuhus",
-    "vpkuwus",
-    "vmaxsb",
-    "vslb",
-    "vmulosb",
-    "vrefp",
-    "vmrglb",
-    "vpkshus",
-    "vmaxsh",
-    "vslh",
-    "vmulosh",
-    "vrsqrtefp",
-    "vmrglh",
-    "vpkswus",
-    "vaddcuw",
-    "vmaxsw",
-    "vslw",
-    "vexptefp",
-    "vmrglw",
-    "vpkshss",
-    "vsl",
-    "vlogefp",
-    "vpkswss",
-    "vaddubs",
-    "vminub",
-    "vsrb",
-    "vmuleub",
-    "vrfin",
-    "vspltb",
-    "vupkhsb",
-    "vadduhs",
-    "vminuh",
-    "vsrh",
-    "vmuleuh",
-    "vrfiz",
-    "vsplth",
-    "vupkhsh",
-    "vadduws",
-    "vminuw",
-    "vsrw",
-    "vrfip",
-    "vspltw",
-    "vupklsb",
-    "vsr",
-    "vrfim",
-    "vupklsh",
-    "vaddsbs",
-    "vminsb",
-    "vsrab",
-    "vmulesb",
-    "vcfux",
-    "vspltisb",
-    "vpkpx",
-    "vaddshs",
-    "vminsh",
-    "vsrah",
-    "vmulesh",
-    "vcfsx",
-    "vspltish",
-    "vupkhpx",
-    "vaddsws",
-    "vminsw",
-    "vsraw",
-    "vctuxs",
-    "vspltisw",
-    "vctsxs",
-    "vupklpx",
-    "vsububm",
-    "vavgub",
-    "vand",
-    "vmaxfp",
-    "vslo",
-    "vsubuhm",
-    "vavguh",
-    "vandc",
-    "vminfp",
-    "vsro",
-    "vsubuwm",
-    "vavguw",
-    "vor",
-    "vxor",
-    "vavgsb",
-    "vnor",
-    "vavgsh",
-    "vsubcuw",
-    "vavgsw",
-    "vsububs",
-    "mfvscr",
-    "vsum4ubs",
-    "vsubuhs",
-    "mtvscr",
-    "vsum4shs",
-    "vsubuws",
-    "vsum2sws",
-    "vsubsbs",
-    "vsum4sbs",
-    "vsubshs",
-    "vsubsws",
-    "vsumsws",
-    "mcrf",
-    "bclr",
-    "rfid",
-    "crnor",
-    "rfi",
-    "crandc",
-    "isync",
-    "crxor",
-    "crnand",
-    "crand",
-    "hrfid",
-    "creqv",
-    "crorc",
-    "cror",
-    "bctr",
-    "rldicl",
-    "rldicr",
-    "rldic",
-    "rldimi",
-    "rldcl",
-    "rldcr",
-    "subfc",
-    "mulhdu",
-    "addc",
-    "mulhwu",
-    "subf",
-    "mulhd",
-    "mulhw",
-    "neg",
-    "subfe",
-    "adde",
-    "subfze",
-    "addze",
-    "subfme",
-    "mulld",
-    "addme",
-    "mullw",
-    "add",
-    "divdu",
-    "divwu",
-    "divd",
-    "divw",
-    "sradi",
-    "cmp",
-    "tw",
-    "lvsl",
-    "lvebx",
-    "mfcr",
-    "lwarx",
-    "ldx",
-    "lwzx",
-    "slw",
-    "cntlzw",
-    "sld",
-    "and",
-    "cmpl",
-    "lvsr",
-    "lvehx",
-    "ldux",
-    "dcbst",
-    "lwzux",
-    "cntlzd",
-    "andc",
-    "td",
-    "lvewx",
-    "mfmsr",
-    "ldarx",
-    "dcbf",
-    "lbzx",
-    "lvx",
-    "lbzux",
-    "nor",
-    "stvebx",
-    "mtcrf",
-    "mtmsr",
-    "stdx",
-    "stwcx",
-    "stwx",
-    "stvehx",
-    "mtmsrd",
-    "stdux",
-    "stwux",
-    "stvewx",
-    "mtsr",
-    "stdcx.",
-    "stbx",
-    "stvx",
-    "mtsrin",
-    "dcbtst",
-    "stbux",
-    "tlbiel",
-    "dcbt",
-    "lhzx",
-    "eqv",
-    "tlbie",
-    "eciwx",
-    "lhzux",
-    "xor",
-    "mfspr",
-    "lwax",
-    "dst",
-    "lhax",
-    "lvxl",
-    "tlbia",
-    "mftb",
-    "lwaux",
-    "dstst",
-    "lhaux",
-    "slbmte",
-    "sthx",
-    "orc",
-    "slbie",
-    "ecowx",
-    "sthux",
-    "or",
-    "mtspr",
-    "nand",
-    "stvxl",
-    "slbia",
-    "mcrxr",
-    "lswx",
-    "lwbrx",
-    "lfsx",
-    "srw",
-    "srd",
-    "tlbsync",
-    "lfsux",
-    "mfsr",
-    "lswi",
-    "sync",
-    "lfdx",
-    "lfdux",
-    "mfsrin",
-    "stswx",
-    "stwbrx",
-    "stfsx",
-    "stfsux",
-    "stswi",
-    "stfdx",
-    "dcba",
-    "stfdux",
-    "lhbrx",
-    "sraw",
-    "srad",
-    "dss",
-    "srawi",
-    "slbmfev",
-    "eieio",
-    "slbmfee",
-    "sthbrx",
-    "extsh",
-    "extsb",
-    "icbi",
-    "stfiwx",
-    "extsw",
-    "dcbz",
-    "ld",
-    "ldu",
-    "lwa",
-    "fdivs",
-    "fsubs",
-    "fadds",
-    "fsqrts",
-    "fres",
-    "fmuls",
-    "fmsubs",
-    "fmadds",
-    "fnmsubs",
-    "fnmadds",
-    "std",
-    "stdu",
-    "fdiv",
-    "fsub",
-    "fadd",
-    "fsqrt",
-    "fsel",
-    "fmul",
-    "frsqrte",
-    "fmsub",
-    "fmadd",
-    "fnmsub",
-    "fnmadd",
-    "fcmpu",
-    "frsp",
-    "fctiw",
-    "fctiwz",
-    "fcmpo",
-    "mtfsb1",
-    "fneg",
-    "mcrfs",
-    "mtfsb0",
-    "fmr",
-    "mtfsfi",
-    "fnabs",
-    "fabs",
-    "mffs",
-    "mtfsf",
-    "fctid",
-    "fctidz",
-    "fcfid",
-};
-#endif
-
-static dcdtab dcdfail = { 0x00,	 diINV, 0x0000 };	// Decode failed
-
-static uint32_t sprtbl[] = {
-    0xCCC03274,						// spr    0 to   31
-    0x00000000,						// spr   32 to   63
-    0x00000000,						// spr   64 to   95
-    0x00000000,						// spr   96 to  127
-    0x00000080,						// spr  128 to  159
-    0x00000000,						// spr  160 to  191
-    0x00000000,						// spr  192 to  223
-    0x00000000,						// spr  224 to  255
-    0x9000FCAD,						// spr  256 to  287
-    0x0000C3F3,						// spr  288 to  319
-    0x00000000,						// spr  320 to  351
-    0x00000000,						// spr  352 to  383
-    0x00000000,						// spr  384 to  415
-    0x00000000,						// spr  416 to  447
-    0x00000000,						// spr  448 to  479
-    0x00000000,						// spr  480 to  511
-    0x0000FFFF,						// spr  512 to  543
-    0x00000000,						// spr  544 to  575
-    0x00000000,						// spr  576 to  607
-    0x00000000,						// spr  608 to  639
-    0x00000000,						// spr  640 to  671
-    0x00000000,						// spr  672 to  703
-    0x00000000,						// spr  704 to  735
-    0x00000000,						// spr  736 to  767
-    0x3FFF3FFF,						// spr  768 to  799
-    0x00000000,						// spr  800 to  831
-    0x00000000,						// spr  832 to  863
-    0x00000000,						// spr  864 to  895
-    0x00000000,						// spr  896 to  927
-    0xE1FFE1FF,						// spr  928 to  959
-    0x0000FE80,						// spr  960 to  991
-    0x0000FFFF,						// spr  992 to 1023
-};
diff --git a/bsd/ppc/endian.h b/bsd/ppc/endian.h
deleted file mode 100644
index c6929f117..000000000
--- a/bsd/ppc/endian.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * Copyright (c) 1995 NeXT Computer, Inc. All rights reserved.
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1987, 1991, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)endian.h	8.1 (Berkeley) 6/10/93
- */
-
-#ifndef _PPC_ENDIAN_H_
-#define	_PPC_ENDIAN_H_
-
-#include <sys/cdefs.h>
-
-/*
- * Define the order of 32-bit words in 64-bit words.
- */
-#define _QUAD_HIGHWORD 0
-#define _QUAD_LOWWORD 1
-
-/*
- * Definitions for byte order, according to byte significance from low
- * address to high.
- */
-#define	__DARWIN_LITTLE_ENDIAN	1234	/* LSB first: i386, vax */
-#define	__DARWIN_BIG_ENDIAN	4321	/* MSB first: 68000, ibm, net, ppc */
-#define	__DARWIN_PDP_ENDIAN	3412	/* LSB first in word, MSW first in long */
-
-#define	__DARWIN_BYTE_ORDER	__DARWIN_BIG_ENDIAN
-
-#if	defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))
-
-#define	LITTLE_ENDIAN	__DARWIN_LITTLE_ENDIAN
-#define	BIG_ENDIAN	__DARWIN_BIG_ENDIAN
-#define	PDP_ENDIAN	__DARWIN_PDP_ENDIAN
-
-#define	BYTE_ORDER	__DARWIN_BYTE_ORDER
-
-#include <sys/_endian.h>
-
-#endif /* defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) */
-#endif /* !_PPC_ENDIAN_H_ */
diff --git a/bsd/ppc/exec.h b/bsd/ppc/exec.h
deleted file mode 100644
index 471543a1d..000000000
--- a/bsd/ppc/exec.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 
- * Copyright (c) 1994, The University of Utah and
- * the Center for Software Science at the University of Utah (CSS).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSS DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- */
-
-#ifndef _BSD_PPC_EXEC_H_
-#define _BSD_PPC_EXEC_H_
-
-
-#include <sys/appleapiopts.h>
-
-#ifdef BSD_KERNEL_PRIVATE
-/* Size of a page in an object file. */
-#define	__LDPGSZ	4096
-
-/* Valid magic number check. */
-#define	N_BADMAG(ex) \
-	((ex).a_magic != NMAGIC && (ex).a_magic != OMAGIC && \
-	    (ex).a_magic != ZMAGIC)
-
-/* Address of the bottom of the text segment. */
-#define N_TXTADDR(X)	0
-
-/* Address of the bottom of the data segment. */
-#define N_DATADDR(ex) \
-	(N_TXTADDR(ex) + ((ex).a_magic == OMAGIC ? (ex).a_text \
-	: __LDPGSZ + ((ex).a_text - 1 & ~(__LDPGSZ - 1))))
-
-/* Text segment offset. */
-#define	N_TXTOFF(ex) \
-	((ex).a_magic == ZMAGIC ? __LDPGSZ : sizeof(struct exec))
-
-/* Data segment offset. */
-#define	N_DATOFF(ex) \
-	(N_TXTOFF(ex) + ((ex).a_magic != ZMAGIC ? (ex).a_text : \
-	__LDPGSZ + ((ex).a_text - 1 & ~(__LDPGSZ - 1))))
-
-/* Symbol table offset. */
-#define N_SYMOFF(ex) \
-	(N_TXTOFF(ex) + (ex).a_text + (ex).a_data + (ex).a_trsize + \
-	    (ex).a_drsize)
-
-/* String table offset. */
-#define	N_STROFF(ex) 	(N_SYMOFF(ex) + (ex).a_syms)
-
-/* Description of the object file header (a.out format). */
-struct exec {
-#define	OMAGIC	0407		/* old impure format */
-#define	NMAGIC	0410		/* read-only text */
-#define	ZMAGIC	0413		/* demand load format */
-#define QMAGIC	0314		/* demand load format. Header in text. */
-	unsigned int	a_magic;	/* magic number */
-
-	unsigned int	a_text;		/* text segment size */
-	unsigned int	a_data;		/* initialized data size */
-	unsigned int	a_bss;		/* uninitialized data size */
-	unsigned int	a_syms;		/* symbol table size */
-	unsigned int	a_entry;	/* entry point */
-	unsigned int	a_trsize;	/* text relocation size */
-	unsigned int	a_drsize;	/* data relocation size */
-};
-
-#endif /* BSD_KERNEL_PRIVATE */
-
-#endif /* _BSD_PPC_EXEC_H_ */
-
diff --git a/bsd/ppc/fasttrap_isa.h b/bsd/ppc/fasttrap_isa.h
deleted file mode 100644
index b4a2cb4c2..000000000
--- a/bsd/ppc/fasttrap_isa.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef	_FASTTRAP_ISA_H
-#define	_FASTTRAP_ISA_H
-
-/* #pragma ident	"@(#)fasttrap_isa.h	1.4	05/06/08 SMI" */
-
-#include <sys/types.h>
-#if defined(__APPLE__)
-#include <stdint.h>
-#endif 
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-typedef	uint32_t		fasttrap_instr_t;
-
-typedef struct fasttrap_machtp {
-	fasttrap_instr_t	ftmt_instr;     /* Original instruction */
-	int32_t				ftmt_trgt;		/* Offset or absolute address */
-	uint8_t				ftmt_type;		/* Emulation function type */
-#define ftmtNOP 0
-#define ftmtCommon 1
-#define ftmtB 2
-#define ftmtBC 3
-#define ftmtBLR 4
-#define ftmtBCTR 5
-	uint8_t				ftmt_bo;		/* Branch options */
-	uint8_t				ftmt_bi;		/* Condition bit */
-	uint8_t				ftmt_flgs;		/* Flags */
-#define ftmtAbs 2
-#define ftmtLink 1
-} fasttrap_machtp_t;
-
-#define	ftt_instr	ftt_mtp.ftmt_instr
-#define	ftt_trgt	ftt_mtp.ftmt_trgt
-#define	ftt_type	ftt_mtp.ftmt_type
-#define	ftt_bo		ftt_mtp.ftmt_bo
-#define	ftt_bi		ftt_mtp.ftmt_bi
-#define	ftt_flgs	ftt_mtp.ftmt_flgs
-
-#define	FASTTRAP_INSTR 0x0FFFDDDD
-#define T_DTRACE_RET (0x2E * 4)
-
-#define	FASTTRAP_RETURN_AFRAMES		7
-#define	FASTTRAP_ENTRY_AFRAMES		7
-#define	FASTTRAP_OFFSET_AFRAMES		6
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _FASTTRAP_ISA_H */
diff --git a/bsd/ppc/limits.h b/bsd/ppc/limits.h
deleted file mode 100644
index 8f7decbec..000000000
--- a/bsd/ppc/limits.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 1988, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)limits.h	8.3 (Berkeley) 1/4/94
- */
-
-#ifndef _PPC_LIMITS_H_
-#define _PPC_LIMITS_H_
-
-#include <sys/cdefs.h>
-#include <ppc/_limits.h>
-
-#define	CHAR_BIT	8		/* number of bits in a char */
-#define	MB_LEN_MAX	6		/* Allow 31 bit UTF2 */
-
-#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))
-#define	CLK_TCK		__DARWIN_CLK_TCK	/* ticks per second */
-#endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
-
-/*
- * According to ANSI (section 2.2.4.2), the values below must be usable by
- * #if preprocessing directives.  Additionally, the expression must have the
- * same type as would an expression that is an object of the corresponding
- * type converted according to the integral promotions.  The subtraction for
- * INT_MIN and LONG_MIN is so the value is not unsigned; 2147483648 is an
- * unsigned int for 32-bit two's complement ANSI compilers (section 3.1.3.2).
- * These numbers work for pcc as well.  The UINT_MAX and ULONG_MAX values
- * are written as hex so that GCC will be quiet about large integer constants.
- */
-#define	SCHAR_MAX	127		/* min value for a signed char */
-#define	SCHAR_MIN	(-128)		/* max value for a signed char */
-
-#define	UCHAR_MAX	255		/* max value for an unsigned char */
-#define	CHAR_MAX	127		/* max value for a char */
-#define	CHAR_MIN	(-128)		/* min value for a char */
-
-#define	USHRT_MAX	65535		/* max value for an unsigned short */
-#define	SHRT_MAX	32767		/* max value for a short */
-#define	SHRT_MIN	(-32768)	/* min value for a short */
-
-#define	UINT_MAX	0xffffffff	/* max value for an unsigned int */
-#define	INT_MAX		2147483647	/* max value for an int */
-#define	INT_MIN		(-2147483647-1)	/* min value for an int */
-
-#ifdef __LP64__
-#define	ULONG_MAX	0xffffffffffffffffUL	/* max unsigned long */
-#define	LONG_MAX	0x7fffffffffffffffL	/* max signed long */
-#define	LONG_MIN	(-0x7fffffffffffffffL-1) /* min signed long */
-#else /* !__LP64__ */
-#define	ULONG_MAX	0xffffffffUL	/* max unsigned long */
-#define	LONG_MAX	2147483647L	/* max signed long */
-#define	LONG_MIN	(-2147483647L-1) /* min signed long */
-#endif /* __LP64__ */
-
-#define	ULLONG_MAX	0xffffffffffffffffULL	/* max unsigned long long */
-#define	LLONG_MAX	0x7fffffffffffffffLL	/* max signed long long */
-#define	LLONG_MIN	(-0x7fffffffffffffffLL-1) /* min signed long long */
-
-#if !defined(_ANSI_SOURCE)
-#ifdef __LP64__
-#define LONG_BIT	64
-#else /* !__LP64__ */
-#define LONG_BIT	32
-#endif /* __LP64__ */
-#define	SSIZE_MAX	LONG_MAX	/* max value for a ssize_t */
-#define WORD_BIT	32
-
-#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE)
-#define	SIZE_T_MAX	ULONG_MAX	/* max value for a size_t */
-
-#define	UQUAD_MAX	ULLONG_MAX
-#define	QUAD_MAX	LLONG_MAX
-#define	QUAD_MIN	LLONG_MIN
-
-#endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */
-#endif /* !_ANSI_SOURCE */
-
-#endif /* _PPC_LIMITS_H_ */
diff --git a/bsd/ppc/param.h b/bsd/ppc/param.h
deleted file mode 100644
index a434e3c4c..000000000
--- a/bsd/ppc/param.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1993,1995 NeXT Computer, Inc. All Rights Reserved */
-
-#ifndef	_PPC_PARAM_H_
-#define	_PPC_PARAM_H_
-
-#include <ppc/_param.h>
-
-/*
- * Round p (pointer or byte index) up to a correctly-aligned value for all
- * data types (int, long, ...).   The result is unsigned int and must be
- * cast to any desired pointer type.
- */
-#define	ALIGNBYTES	__DARWIN_ALIGNBYTES
-#define	ALIGN(p)	__DARWIN_ALIGN(p)
-
-#define	NBPG		4096		/* bytes/page */
-#define	PGOFSET		(NBPG-1)	/* byte offset into page */
-#define	PGSHIFT		12		/* LOG2(NBPG) */
-
-#define NBSEG		0x40000000	/* bytes/segment (quadrant) */
-#define	SEGOFSET	(NBSEG-1)	/* byte offset into segment */
-#define	SEGSHIFT	30		/* LOG2(NBSEG) */
-
-#define	DEV_BSIZE	512
-#define	DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
-#define BLKDEV_IOSIZE	2048
-#define	MAXPHYS		(128 * 1024)	/* max raw I/O transfer size */
-
-#define	STACK_GROWTH_UP	0		/* stack grows to lower addresses */
-
-#define	CLSIZE		1
-#define	CLSIZELOG2	0
-
-#define STACKSIZE 4			/* pages in kernel stack */
-#define	UPAGES	0	/* total pages in u-area */
-					/* red zone is beyond this */
-
-/*
- * Constants related to network buffer management.
- * MCLBYTES must be no larger than CLBYTES (the software page size), and,
- * on machines that exchange pages of input or output buffers with mbuf
- * clusters (MAPPED_MBUFS), MCLBYTES must also be an integral multiple
- * of the hardware page size.
- */
-#define	MSIZE		256		/* size of an mbuf */
-#define	MCLBYTES	2048		/* large enough for ether MTU */
-#define	MCLSHIFT	11
-#define	MCLOFSET	(MCLBYTES - 1)
-#ifndef NMBCLUSTERS
-#if GATEWAY
-#define	NMBCLUSTERS	((1024 * 1024) / MCLBYTES)	/* cl map size: 1MB */
-#else
-#define	NMBCLUSTERS	((1024 * 1024) / MCLBYTES)	
-		/* cl map size was  0.5MB when MSIZE was 128, now it's 1MB*/
-#endif
-#endif
-
-/* pages ("clicks") (NBPG bytes) to disk blocks */
-#define	ctod(x)	((x)<<(PGSHIFT-DEV_BSHIFT))
-#define	dtoc(x)	((x)>>(PGSHIFT-DEV_BSHIFT))
-#define	dtob(x)	((x)<<DEV_BSHIFT)
-
-/* pages to bytes */
-#define	ctob(x)	((x)<<PGSHIFT)
-
-/* bytes to pages */
-#define	btoc(x)	(((unsigned)(x)+(PGOFSET))>>PGSHIFT)
-#ifdef __APPLE__
-#define  btodb(bytes, devBlockSize)         \
-        ((unsigned)(bytes) / devBlockSize)
-#define  dbtob(db, devBlockSize)            \
-             ((unsigned)(db) * devBlockSize)
-#else
-#define	btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
-	((unsigned)(bytes) >> DEV_BSHIFT)
-#define	dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
-	((unsigned)(db) << DEV_BSHIFT)
-#endif
-
-/*
- * Map a ``block device block'' to a file system block.
- * This should be device dependent, and should use the bsize
- * field from the disk label.
- * For now though just use DEV_BSIZE.
- */
-#define	bdbtofsb(bn)	((bn) / (BLKDEV_IOSIZE/DEV_BSIZE))
-
-/* from machdep/ppc/proc_reg.h */
-#ifdef __BIG_ENDIAN__
-#define ENDIAN_MASK(val,size) (1 << (size-1 - val))
-#else
-#error code not ported to little endian targets yet
-#endif /* __BIG_ENDIAN__ */
-
-#ifndef MASK
-#define MASK(PART)	ENDIAN_MASK(PART ## _BIT, 32)
-#endif
-
-#define	MSR_EE_BIT	16
-#define	MSR_PR_BIT	17
-#define USERMODE(msr) (msr & MASK(MSR_PR) ? TRUE : FALSE)
-#define BASEPRI(msr) (msr & MASK(MSR_EE) ? TRUE : FALSE)
-/* end of from proc_reg.h */
-
-#if	defined(KERNEL) || defined(STANDALONE)
-#define	DELAY(n) delay(n)
-#else
-#define	DELAY(n)	{ register int N = (n); while (--N > 0); }
-#endif	/* defined(KERNEL) || defined(STANDALONE) */
-
-#define	NPIDS		16	/* maximum number of PIDs per process */
-#define	NIOPIDS		8	/* maximum number of IO space PIDs */
-
-#endif	/* _PPC_PARAM_H_ */
diff --git a/bsd/ppc/profile.h b/bsd/ppc/profile.h
deleted file mode 100644
index 7be38b3a9..000000000
--- a/bsd/ppc/profile.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1997, Apple Computer, Inc. All rights reserved.
- *
- */
-
-#ifndef _BSD_PPC_PROFILE_H_
-#define _BSD_PPC_PROFILE_H_
-
-#include <sys/appleapiopts.h>
-
-#ifdef KERNEL
-#ifdef __APPLE_API_UNSTABLE
-/*
- * Block interrupts during mcount so that those interrupts can also be
- * counted (as soon as we get done with the current counting).  On the
- * PPC platfom, can't do splhigh/splx as those are C routines and can
- * recursively invoke mcount.
- */
-extern unsigned long disable_ee(void);
-extern void restore_ee(unsigned long smsr);
-
-#define MCOUNT_INIT		register unsigned long smsr;
-
-#define	MCOUNT_ENTER	smsr = disable_ee();
-
-#define	MCOUNT_EXIT		restore_ee(smsr);
-
-#endif /* __APPLE_API_UNSTABLE */
-#endif /* KERNEL */
-
-#endif /* _BSD_PPC_PROFILE_H_ */
diff --git a/bsd/ppc/reboot.h b/bsd/ppc/reboot.h
deleted file mode 100644
index 75e3a7656..000000000
--- a/bsd/ppc/reboot.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- 
-#ifndef	_BSD_PPC_REBOOT_H_
-#define _BSD_PPC_REBOOT_H_
-
-#include <sys/appleapiopts.h>
-
-/*
- * Empty file (publicly)
- */
-#ifdef BSD_KERNEL_PRIVATE
-/*
- *	Use most significant 16 bits to avoid collisions with
- *	machine independent flags.
- */
-#define RB_POWERDOWN	0x00010000	/* power down on halt */
-#define	RB_NOBOOTRC	0x00020000	/* don't run '/etc/rc.boot' */
-#define	RB_DEBUG	0x00040000	/* drop into mini monitor on panic */
-#define	RB_EJECT	0x00080000	/* eject disks on halt */
-#define	RB_COMMAND	0x00100000	/* new boot command specified */
-#define RB_NOFP		0x00200000	/* don't use floating point */
-#define RB_BOOTNEXT	0x00400000	/* reboot into NeXT */
-#define RB_BOOTDOS	0x00800000	/* reboot into DOS */
-
-
-#endif /* BSD_KERNEL_PRIVATE */
-
-#endif /* _BSD_PPC_REBOOT_H_ */
-
diff --git a/bsd/ppc/setjmp.h b/bsd/ppc/setjmp.h
deleted file mode 100644
index 27eb59ab0..000000000
--- a/bsd/ppc/setjmp.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
- *
- *	File:	ppc/setjmp.h
- *
- *	Declaration of setjmp routines and data structures.
- */
-#ifndef _BSD_PPC_SETJMP_H_
-#define _BSD_PPC_SETJMP_H_
-
-#include <sys/cdefs.h>
-
-#define __need_struct_sigcontext
-#if defined(KERNEL)
-#define __need_struct_sigcontext32
-#define __need_struct_sigcontext64
-#endif /* KERNEL */
-#include <ppc/_structs.h>
-
-struct _jmp_buf {
-#if __DARWIN_UNIX03
-	_STRUCT_SIGCONTEXT	__sigcontext; /* kernel state preserved by set/longjmp */
-	unsigned int __vmask __attribute__((aligned(8))); /* vector mask register */
-	unsigned int __vreg[32 * 4] __attribute__((aligned(16)));
-		/* 32 128-bit vector registers */
-#else /* !__DARWIN_UNIX03 */
-	_STRUCT_SIGCONTEXT	sigcontext; /* kernel state preserved by set/longjmp */
-	unsigned int vmask __attribute__((aligned(8))); /* vector mask register */
-	unsigned int vreg[32 * 4] __attribute__((aligned(16)));
-		/* 32 128-bit vector registers */
-#endif /* __DARWIN_UNIX03 */
-};
-
-/*
- *	_JBLEN is number of ints required to save the following:
- *	r1, r2, r13-r31, lr, cr, ctr, xer, sig  == 26 register_t sized
- *	fr14 -  fr31 = 18 doubles
- *	vmask, 32 vector registers = 129 ints
- *	2 ints to get all the elements aligned 
- *
- *	register_t is 2 ints for ppc64 threads
- */
-#define _JBLEN64	(26*2 + 18*2 + 129 + 1)
-#define _JBLEN32	(26 + 18*2 + 129 + 1)
-#define _JBLEN_MAX	_JBLEN64
-
-/*
- * Locally scoped sizes
- */
-#if defined(__ppc64__)
-#define _JBLEN _JBLEN64
-#else
-#define _JBLEN _JBLEN32
-#endif
-
-#if defined(KERNEL)
-typedef _STRUCT_SIGCONTEXT32 jmp_buf32[1];
-typedef struct __sigjmp_buf32 {
-		int __storage[_JBLEN32 + 1] __attribute__((aligned(8)));
-		} sigjmp_buf32[1];
-
-typedef struct sigcontext64 jmp_buf64[1];
-typedef struct __sigjmp_buf64 {
-		int __storage[_JBLEN64 + 1] __attribute__((aligned(8)));
-		} sigjmp_buf64[1];
-
-/*
- * JMM - have to decide how the kernel will deal with this.
- * For now, hard-code the 32-bit types.
- */
-typedef _STRUCT_SIGCONTEXT32 jmp_buf[1];
-typedef struct __sigjmp_buf32 sigjmp_buf[1];
-
-#else
-typedef int jmp_buf[_JBLEN];
-typedef int sigjmp_buf[_JBLEN + 1];
-#endif
-
-__BEGIN_DECLS
-int	setjmp(jmp_buf);
-void	longjmp(jmp_buf, int);
-
-#ifndef _ANSI_SOURCE
-int	_setjmp(jmp_buf);
-void	_longjmp(jmp_buf, int);
-int	sigsetjmp(sigjmp_buf, int);
-void	siglongjmp(sigjmp_buf, int);
-#endif /* _ANSI_SOURCE  */
-
-#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))
-void	longjmperror(void);
-#endif /* neither ANSI nor POSIX */
-__END_DECLS
-
-#endif /* !_BSD_PPC_SETJMP_H_ */
diff --git a/bsd/ppc/signal.h b/bsd/ppc/signal.h
deleted file mode 100644
index 31af83a02..000000000
--- a/bsd/ppc/signal.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1992, 1993 NeXT Computer, Inc.
- */
-
-#ifndef	_PPC_SIGNAL_H_
-#define	_PPC_SIGNAL_H_ 1
-
-#include <sys/cdefs.h>
-
-#ifndef _ANSI_SOURCE
-
-typedef int sig_atomic_t; 
-
-#include <sys/appleapiopts.h>
-
-#ifdef __APPLE_API_OBSOLETE
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-
-#define __need_struct_sigcontext
-#define __need_struct_sigcontext32
-#define __need_struct_sigcontext64
-#include <ppc/_structs.h>
-
-/*
- * Machine-dependant flags used in sigvec call.
- */
-#define	SV_SAVE_REGS	0x1000	/* Save all regs in sigcontext */
-
-/*
- * regs_saved_t -- Describes which registers beyond what the kernel cares
- *		   about are saved to and restored from this sigcontext.
- *
- * The default is REGS_SAVED_CALLER, only the caller saved registers
- * are saved.  If the SV_SAVE_REGS flag was set when the signal
- * handler was registered with sigvec() then all the registers will be
- * saved in the sigcontext, and REGS_SAVED_ALL will be set.  The C
- * library uses REGS_SAVED_NONE in order to quickly restore kernel
- * state during a longjmp().
- */
-typedef enum {
-	REGS_SAVED_NONE,		/* Only kernel managed regs restored */
-	REGS_SAVED_CALLER,		/* "Caller saved" regs: rpc, a0-a7,
-					   t0-t4, at, lk0-lk1, xt1-xt20,
-					   xr0-xr1 */
-	REGS_SAVED_ALL			/* All registers */
-} regs_saved_t;
-
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#endif /* __APPLE_API_OBSOLETE */
-
-#endif /* _ANSI_SOURCE */
-
-#endif /* _PPC_SIGNAL_H_ */
-
diff --git a/bsd/ppc/types.h b/bsd/ppc/types.h
deleted file mode 100644
index 21265f8e0..000000000
--- a/bsd/ppc/types.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright 1995 NeXT Computer, Inc. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)types.h	8.3 (Berkeley) 1/5/94
- */
-
-#ifndef	_MACHTYPES_H_
-#define	_MACHTYPES_H_
-
-#ifndef __ASSEMBLER__
-#include <ppc/_types.h>
-#include <sys/cdefs.h>
-/*
- * Basic integral types.  Omit the typedef if
- * not possible for a machine/compiler combination.
- */
-#ifndef _INT8_T
-#define _INT8_T
-typedef	__signed char		int8_t;
-#endif
-typedef	unsigned char		u_int8_t;
-#ifndef _INT16_T
-#define _INT16_T
-typedef	short			int16_t;
-#endif
-typedef	unsigned short		u_int16_t;
-#ifndef _INT32_T
-#define _INT32_T
-typedef	int			int32_t;
-#endif
-typedef	unsigned int		u_int32_t;
-#ifndef _INT64_T
-#define _INT64_T
-typedef	long long		int64_t;
-#endif
-typedef	unsigned long long	u_int64_t;
-
-#if __LP64__
-typedef int64_t			register_t;
-#else
-typedef int32_t			register_t;
-#endif
-
-#ifndef _INTPTR_T
-#define _INTPTR_T
-typedef __darwin_intptr_t	intptr_t;
-#endif
-#ifndef _UINTPTR_T
-#define _UINTPTR_T
-typedef unsigned long		uintptr_t;
-#endif
-
-#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))
-/* These types are used for reserving the largest possible size. */
-typedef u_int64_t		user_addr_t;	
-typedef u_int64_t		user_size_t;	
-typedef int64_t			user_ssize_t;
-typedef int64_t			user_long_t;
-typedef u_int64_t		user_ulong_t;
-typedef int64_t			user_time_t;
-typedef int64_t			user_off_t;
-#define USER_ADDR_NULL	((user_addr_t) 0)
-#define CAST_USER_ADDR_T(a_ptr)   ((user_addr_t)((uintptr_t)(a_ptr)))
-
-#ifdef KERNEL
-
-/*
- * These types are used when you know the word size of the target
- * user process. They can be used to create struct layouts independent
- * of the types and alignment requirements of the current running
- * kernel.
- */
-
-/*
- * The default ABI for the 32-bit PowerPC userspace is called "Power"
- * alignment, and aligns fundamental integral data types to their
- * natural boundary, with a maximum alignment of 4, even for 8-byte
- * quantites. Power alignment also pads a structure to 8-byte alignment
- * if the first field is an 8-byte quantity, which is not handled by
- * these typedefs. The default ABI for 64-bit PowerPC userspace is called
- * "Natural" alignment, and aligns fundamental integral data types
- * to their natural boundaries.
- */
-
-typedef __uint64_t		user64_addr_t __attribute__((aligned(8)));
-typedef __uint64_t		user64_size_t __attribute__((aligned(8)));
-typedef __int64_t		user64_ssize_t __attribute__((aligned(8)));
-typedef __int64_t		user64_long_t __attribute__((aligned(8)));
-typedef __uint64_t		user64_ulong_t __attribute__((aligned(8)));
-typedef __int64_t		user64_time_t __attribute__((aligned(8)));
-typedef __int64_t		user64_off_t __attribute__((aligned(8)));
-
-typedef __uint32_t		user32_addr_t;
-typedef __uint32_t		user32_size_t;
-typedef __int32_t		user32_ssize_t;
-typedef __int32_t		user32_long_t;
-typedef __uint32_t		user32_ulong_t;
-typedef __int32_t		user32_time_t;
-typedef __int64_t		user32_off_t __attribute__((aligned(4)));
-
-#endif /* KERNEL */
-
-#endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
-
-/* This defines the size of syscall arguments after copying into the kernel: */
-typedef u_int64_t		syscall_arg_t;
-
-#ifndef __offsetof
-#define __offsetof(type, field) ((size_t)(&((type *)0)->field))
-#endif
-
-#endif /* __ASSEMBLER__ */
-#endif	/* _MACHTYPES_H_ */
diff --git a/bsd/ppc/ucontext.h b/bsd/ppc/ucontext.h
deleted file mode 100644
index 5c391c283..000000000
--- a/bsd/ppc/ucontext.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _PPC_UCONTEXT_H_
-#define _PPC_UCONTEXT_H_
-
-
-#include <mach/ppc/_types.h>
-
-#if !__DARWIN_UNIX03
-struct mcontext {
-	struct ppc_exception_state	es;
-	struct ppc_thread_state		ss;
-	struct ppc_float_state		fs;
-	struct ppc_vector_state		vs;
-};
-#define PPC_MCONTEXT_SIZE	(PPC_THREAD_STATE_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)
-#else /* __DARWIN_UNIX03 */
-struct __darwin_mcontext {
-	struct __darwin_ppc_exception_state	es;
-	struct __darwin_ppc_thread_state	ss;
-	struct __darwin_ppc_float_state		fs;
-	struct __darwin_ppc_vector_state	vs;
-};
-#endif /* __DARWIN_UNIX03 */
-
-#ifndef _MCONTEXT_T
-#define _MCONTEXT_T
-typedef __darwin_mcontext_t		mcontext_t;
-#endif
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-struct mcontext64 {
-	struct ppc_exception_state64	es;
-	struct ppc_thread_state64	ss;
-	struct ppc_float_state		fs;
-	struct ppc_vector_state		vs;
-};
-#define PPC_MCONTEXT64_SIZE	(PPC_THREAD_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)
-
-#ifndef _MCONTEXT64_T
-#define _MCONTEXT64_T
-typedef struct mcontext64  * mcontext64_t;
-#endif
-
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#endif /* _PPC_UCONTEXT_H_ */
diff --git a/bsd/ppc/vmparam.h b/bsd/ppc/vmparam.h
deleted file mode 100644
index 8e682fcdf..000000000
--- a/bsd/ppc/vmparam.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef	_BSD_PPC_VMPARAM_H_
-#define	_BSD_PPC_VMPARAM_H_ 1
-
-#include <sys/resource.h>
-
-#define	USRSTACK	(0xc0000000)
-
-/* 
- * put the default 64-bit stack at the max address
- * (minus one 32-bit address space for other incidentals)
- */
-#define	USRSTACK64 (0x00007FFF5FC00000ULL)
-
-/*
- * Virtual memory related constants, all in bytes
- */
-#ifndef DFLDSIZ
-#define	DFLDSIZ		(RLIM_INFINITY)		/* initial data size limit */
-// XXX Not enforced
-//#define DFLDSIZ	(6*1024*1024)		/* initial data size limit */
-#endif
-#ifndef MAXDSIZ
-#define	MAXDSIZ		(RLIM_INFINITY)		/* max data size */
-#endif
-#ifndef	DFLSSIZ
-#define	DFLSSIZ		(8*1024*1024)		/* initial stack size limit */
-#endif
-#ifndef	MAXSSIZ
-#define	MAXSSIZ		(64*1024*1024)		/* max stack size */
-#endif
-#ifndef	DFLCSIZ
-#define DFLCSIZ		(0)			/* initial core size limit */
-#endif
-#ifndef	MAXCSIZ
-#define MAXCSIZ		(RLIM_INFINITY)		/* max core size */
-#endif
-
-#endif	/* _BSD_PPC_VMPARAM_H_ */
diff --git a/bsd/security/Makefile b/bsd/security/Makefile
index b574d2956..92974f6e2 100644
--- a/bsd/security/Makefile
+++ b/bsd/security/Makefile
@@ -10,8 +10,6 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = \
 	audit
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -21,8 +19,6 @@ INSTINC_SUBDIRS_ARM = \
 EXPINC_SUBDIRS = \
 	audit
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/security/audit/Makefile b/bsd/security/audit/Makefile
index 660e7c155..ac552f60d 100644
--- a/bsd/security/audit/Makefile
+++ b/bsd/security/audit/Makefile
@@ -9,8 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -19,8 +17,6 @@ INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/bsd/security/audit/audit.c b/bsd/security/audit/audit.c
index c454867bf..1ee6c85cd 100644
--- a/bsd/security/audit/audit.c
+++ b/bsd/security/audit/audit.c
@@ -231,23 +231,25 @@ audit_record_ctor(proc_t p, struct kaudit_record *ar)
 	ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC;
 	nanotime(&ar->k_ar.ar_starttime);
 
-	cred = kauth_cred_proc_ref(p);
+	if (PROC_NULL != p) {
+		cred = kauth_cred_proc_ref(p);
 
-	/*
-	 * Export the subject credential.
-	 */
-	cru2x(cred, &ar->k_ar.ar_subj_cred);
-	ar->k_ar.ar_subj_ruid = cred->cr_ruid;
-	ar->k_ar.ar_subj_rgid = cred->cr_rgid;
-	ar->k_ar.ar_subj_egid = cred->cr_groups[0];
-	ar->k_ar.ar_subj_pid = p->p_pid;
-	ar->k_ar.ar_subj_auid = cred->cr_audit.as_aia_p->ai_auid;
-	ar->k_ar.ar_subj_asid = cred->cr_audit.as_aia_p->ai_asid;
-	bcopy(&cred->cr_audit.as_mask, &ar->k_ar.ar_subj_amask,
-    	    sizeof(struct au_mask));
-	bcopy(&cred->cr_audit.as_aia_p->ai_termid, &ar->k_ar.ar_subj_term_addr,
-	    sizeof(struct au_tid_addr));
-	kauth_cred_unref(&cred);
+		/*
+	 	 * Export the subject credential.
+	 	 */
+		cru2x(cred, &ar->k_ar.ar_subj_cred);
+		ar->k_ar.ar_subj_ruid = kauth_cred_getruid(cred);
+		ar->k_ar.ar_subj_rgid = kauth_cred_getrgid(cred);
+		ar->k_ar.ar_subj_egid = kauth_cred_getgid(cred);
+		ar->k_ar.ar_subj_pid = p->p_pid;
+		ar->k_ar.ar_subj_auid = cred->cr_audit.as_aia_p->ai_auid;
+		ar->k_ar.ar_subj_asid = cred->cr_audit.as_aia_p->ai_asid;
+		bcopy(&cred->cr_audit.as_mask, &ar->k_ar.ar_subj_amask,
+    		    sizeof(struct au_mask));
+		bcopy(&cred->cr_audit.as_aia_p->ai_termid,
+		    &ar->k_ar.ar_subj_term_addr, sizeof(struct au_tid_addr));
+		kauth_cred_unref(&cred);
+	}
 }
 
 static void
@@ -311,6 +313,7 @@ audit_init(void)
 	audit_kinfo.ai_termid.at_type = AU_IPv4;
 	audit_kinfo.ai_termid.at_addr[0] = INADDR_ANY;
 
+	_audit_lck_grp_init();
 	mtx_init(&audit_mtx, "audit_mtx", NULL, MTX_DEF);
 	KINFO_LOCK_INIT();
 	cv_init(&audit_worker_cv, "audit_worker_cv");
@@ -353,7 +356,7 @@ audit_shutdown(void)
 /*
  * Return the current thread's audit record, if any.
  */
-__inline__ struct kaudit_record *
+struct kaudit_record *
 currecord(void)
 {
 
@@ -373,11 +376,24 @@ audit_new(int event, proc_t p, __unused struct uthread *uthread)
 {
 	struct kaudit_record *ar;
 	int no_record;
+	int audit_override;
 
+	/*
+	 * Override the audit_suspended and audit_enabled if it always
+	 * audits session events.
+	 *
+	 * XXXss - This really needs to be a generalized call to a filter
+	 * interface so if other things that use the audit subsystem in the
+	 * future can simply plugged in.
+	 */
+	audit_override = (AUE_SESSION_START == event ||
+	    AUE_SESSION_UPDATE == event || AUE_SESSION_END == event ||
+	    AUE_SESSION_CLOSE == event);
+	
 	mtx_lock(&audit_mtx);
 	no_record = (audit_suspended || !audit_enabled);
 	mtx_unlock(&audit_mtx);
-	if (no_record)
+	if (!audit_override && no_record)
 		return (NULL);
 
 	/*
@@ -395,10 +411,13 @@ audit_new(int event, proc_t p, __unused struct uthread *uthread)
 	ar->k_ar.ar_event = event;
 
 #if CONFIG_MACF
-	if (audit_mac_new(p, ar) != 0) {
-		zfree(audit_record_zone, ar);
-		return (NULL);
-	}
+	if (PROC_NULL != p) {
+		if (audit_mac_new(p, ar) != 0) {
+			zfree(audit_record_zone, ar);
+			return (NULL);
+		}
+	} else
+		ar->k_ar.ar_mac_records = NULL;
 #endif
 
 	mtx_lock(&audit_mtx);
@@ -414,7 +433,8 @@ audit_free(struct kaudit_record *ar)
 
 	audit_record_dtor(ar);
 #if CONFIG_MACF
-	audit_mac_free(ar);
+	if (NULL != ar->k_ar.ar_mac_records)
+		audit_mac_free(ar);
 #endif
 	zfree(audit_record_zone, ar);
 }
@@ -427,6 +447,7 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
 	au_id_t auid;
 	int sorf;
 	struct au_mask *aumask;
+	int audit_override;
 
 	if (ar == NULL)
 		return;
@@ -487,6 +508,17 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
 	event = ar->k_ar.ar_event;
 	class = au_event_class(event);
 
+	/*
+	 * See if we need to override the audit_suspend and audit_enabled
+	 * flags.
+	 *
+	 * XXXss - This check needs to be generalized so new filters can
+	 * easily be added.
+	 */
+	audit_override = (AUE_SESSION_START == event ||
+	    AUE_SESSION_UPDATE == event || AUE_SESSION_END == event ||
+	    AUE_SESSION_CLOSE == event);
+
 	ar->k_ar_commit |= AR_COMMIT_KERNEL;
 	if (au_preselect(event, class, aumask, sorf) != 0)
 		ar->k_ar_commit |= AR_PRESELECT_TRAIL;
@@ -494,7 +526,8 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
 	    ar->k_ar_commit & AR_PRESELECT_TRAIL) != 0)
 		ar->k_ar_commit |= AR_PRESELECT_PIPE;
 	if ((ar->k_ar_commit & (AR_PRESELECT_TRAIL | AR_PRESELECT_PIPE |
-	    AR_PRESELECT_USER_TRAIL | AR_PRESELECT_USER_PIPE)) == 0) {
+	    AR_PRESELECT_USER_TRAIL | AR_PRESELECT_USER_PIPE |
+	    AR_PRESELECT_FILTER)) == 0) {
 		mtx_lock(&audit_mtx);
 		audit_pre_q_len--;
 		mtx_unlock(&audit_mtx);
@@ -511,7 +544,7 @@ audit_commit(struct kaudit_record *ar, int error, int retval)
 	 * enabled should still be committed?
 	 */
 	mtx_lock(&audit_mtx);
-	if (audit_suspended || !audit_enabled) {
+	if (!audit_override && (audit_suspended || !audit_enabled)) {
 		audit_pre_q_len--;
 		mtx_unlock(&audit_mtx);
 		audit_free(ar);
diff --git a/bsd/security/audit/audit.h b/bsd/security/audit/audit.h
index 5af1da795..d85139b2b 100644
--- a/bsd/security/audit/audit.h
+++ b/bsd/security/audit/audit.h
@@ -174,7 +174,7 @@ void	audit_syscall_exit(int error, struct proc *proc,
 void	audit_mach_syscall_enter(unsigned short audit_event);
 void	audit_mach_syscall_exit(int retval, struct uthread *uthread);
 
-extern struct auditinfo_addr audit_default_aia;
+extern struct auditinfo_addr *audit_default_aia_p;
 
 /*
  * The remaining kernel functions are conditionally compiled in as they are
@@ -262,20 +262,23 @@ typedef struct ucred *kauth_cred_t;
 
 void	 audit_session_ref(kauth_cred_t cred);
 void	 audit_session_unref(kauth_cred_t cred);
-void	 audit_session_procnew(kauth_cred_t cred);
-void	 audit_session_procexit(kauth_cred_t cred);
+void	 audit_session_procnew(proc_t p);
+void	 audit_session_procexit(proc_t p);
 int	 audit_session_spawnjoin(proc_t p, ipc_port_t port);
 
+void	 audit_sdev_submit(au_id_t auid, au_asid_t asid, void *record,
+	    u_int record_len);
+
 /*
  * Audit session macros. 
  */
-#define	IS_VALID_SESSION(a)	((a) != NULL && (a) != &audit_default_aia)
+#define	IS_VALID_SESSION(a)	((a) != NULL && (a) != audit_default_aia_p)
 
 #define	AUDIT_SESSION_REF(cred)		audit_session_ref(cred)
 #define	AUDIT_SESSION_UNREF(cred)	audit_session_unref(cred)
 
-#define	AUDIT_SESSION_PROCNEW(cred) 	audit_session_procnew(cred)
-#define	AUDIT_SESSION_PROCEXIT(cred)	audit_session_procexit(cred)
+#define	AUDIT_SESSION_PROCNEW(p)	audit_session_procnew(p)
+#define	AUDIT_SESSION_PROCEXIT(p)	audit_session_procexit(p)
 
 #if CONFIG_MACF
 /* 
@@ -292,8 +295,8 @@ extern au_event_t sys_au_event[];
 #define	AUDIT_RECORD() \
 	((struct uthread*)get_bsdthread_info(current_thread()))->uu_ar
 
-#ifndef	AUDIT_USE_BUILDIN_EXPECT
-#define	AUDIT_USE_BUILDIN_EXPECT
+#ifndef	AUDIT_USE_BUILTIN_EXPECT
+#define	AUDIT_USE_BUILTIN_EXPECT
 #endif
 
 #ifdef	AUDIT_USE_BUILTIN_EXPECT
diff --git a/bsd/security/audit/audit_arg.c b/bsd/security/audit/audit_arg.c
index 66792758f..eb6d5d434 100644
--- a/bsd/security/audit/audit_arg.c
+++ b/bsd/security/audit/audit_arg.c
@@ -308,10 +308,10 @@ audit_arg_process(struct kaudit_record *ar, proc_t p)
 	ar->k_ar.ar_arg_asid = my_cred->cr_audit.as_aia_p->ai_asid;
 	bcopy(&my_cred->cr_audit.as_aia_p->ai_termid,
 	    &ar->k_ar.ar_arg_termid_addr, sizeof(au_tid_addr_t));
-	ar->k_ar.ar_arg_euid = my_cred->cr_uid;
-	ar->k_ar.ar_arg_egid = my_cred->cr_groups[0];
-	ar->k_ar.ar_arg_ruid = my_cred->cr_ruid;
-	ar->k_ar.ar_arg_rgid = my_cred->cr_rgid;
+	ar->k_ar.ar_arg_euid = kauth_cred_getuid(my_cred);
+	ar->k_ar.ar_arg_egid = kauth_cred_getgid(my_cred);
+	ar->k_ar.ar_arg_ruid = kauth_cred_getruid(my_cred);
+	ar->k_ar.ar_arg_rgid = kauth_cred_getrgid(my_cred);
 	kauth_cred_unref(&my_cred);
 	ar->k_ar.ar_arg_pid = p->p_pid;
 	ARG_SET_VALID(ar, ARG_AUID | ARG_EUID | ARG_EGID | ARG_RUID |
diff --git a/bsd/security/audit/audit_bsd.c b/bsd/security/audit/audit_bsd.c
index fdae0d79d..6f4d416c9 100644
--- a/bsd/security/audit/audit_bsd.c
+++ b/bsd/security/audit/audit_bsd.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2008-2009 Apple Inc.
+ * Copyright (c) 2008-2010 Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,6 +59,11 @@ struct mhdr {
 	char		 	 mh_data[0];
 };
 
+/*
+ * The lock group for the audit subsystem. 
+ */
+static lck_grp_t *audit_lck_grp = NULL;
+
 #define	AUDIT_MHMAGIC	0x4D656C53
 
 #if AUDIT_MALLOC_DEBUG
@@ -174,28 +179,25 @@ _audit_malloc(size_t size, au_malloc_type_t *type, int flags, const char *fn)
 _audit_malloc(size_t size, au_malloc_type_t *type, int flags)
 #endif
 {
-	union {
-	    struct mhdr	hdr;
-	    char mem[size + sizeof (struct mhdr)];
-	} *mem;
-	size_t	memsize = sizeof (*mem);
+	struct mhdr	*hdr;
+	size_t	memsize = sizeof (*hdr) + size;
 
 	if (size == 0)
 		return (NULL);
 	if (flags & M_NOWAIT) {
-		mem = (void *)kalloc_noblock(memsize);
+		hdr = (void *)kalloc_noblock(memsize);
 	} else {
-		mem = (void *)kalloc(memsize);
-		if (mem == NULL)
+		hdr = (void *)kalloc(memsize);
+		if (hdr == NULL)
 			panic("_audit_malloc: kernel memory exhausted");
 	}
-	if (mem == NULL)
+	if (hdr == NULL)
 		return (NULL);
-	mem->hdr.mh_size = memsize;
-	mem->hdr.mh_type = type;
-	mem->hdr.mh_magic = AUDIT_MHMAGIC;
+	hdr->mh_size = memsize;
+	hdr->mh_type = type;
+	hdr->mh_magic = AUDIT_MHMAGIC;
 	if (flags & M_ZERO)
-		memset(mem->hdr.mh_data, 0, size);
+		memset(hdr->mh_data, 0, size);
 #if AUDIT_MALLOC_DEBUG
 	if (type != NULL && type->mt_type < NUM_MALLOC_TYPES) {
 		OSAddAtomic64(memsize, &type->mt_size);
@@ -206,7 +208,7 @@ _audit_malloc(size_t size, au_malloc_type_t *type, int flags)
 		audit_malloc_types[type->mt_type] = type;
 	}
 #endif /* AUDIT_MALLOC_DEBUG */
-	return (mem->hdr.mh_data);
+	return (hdr->mh_data);
 }
 
 /*
@@ -316,15 +318,99 @@ _audit_cv_wait_sig(struct cv *cvp, lck_mtx_t *mp, const char *desc)
 }
 
 /*
- * Simple recursive lock. 
+ * BSD Mutexes.
+ */
+void
+#if DIAGNOSTIC
+_audit_mtx_init(struct mtx *mp, const char *lckname)
+#else
+_audit_mtx_init(struct mtx *mp, __unused const char *lckname)
+#endif
+{
+	mp->mtx_lock = lck_mtx_alloc_init(audit_lck_grp, LCK_ATTR_NULL);
+	KASSERT(mp->mtx_lock != NULL, 
+	    ("_audit_mtx_init: Could not allocate a mutex."));
+#if DIAGNOSTIC
+	strlcpy(mp->mtx_name, lckname, AU_MAX_LCK_NAME);	
+#endif
+}
+
+void
+_audit_mtx_destroy(struct mtx *mp)
+{
+
+	if (mp->mtx_lock) {
+		lck_mtx_free(mp->mtx_lock, audit_lck_grp);
+		mp->mtx_lock = NULL;
+	}
+}
+
+/*
+ * BSD rw locks.
  */
 void
-_audit_rlck_init(struct rlck *lp, const char *grpname)
+#if DIAGNOSTIC
+_audit_rw_init(struct rwlock *lp, const char *lckname)
+#else
+_audit_rw_init(struct rwlock *lp, __unused const char *lckname)
+#endif
+{
+	lp->rw_lock = lck_rw_alloc_init(audit_lck_grp, LCK_ATTR_NULL);
+	KASSERT(lp->rw_lock != NULL, 
+	    ("_audit_rw_init: Could not allocate a rw lock."));
+#if DIAGNOSTIC
+	strlcpy(lp->rw_name, lckname, AU_MAX_LCK_NAME);	
+#endif
+}
+
+void
+_audit_rw_destroy(struct rwlock *lp)
+{
+
+	if (lp->rw_lock) {
+		lck_rw_free(lp->rw_lock, audit_lck_grp);
+		lp->rw_lock = NULL;
+	}
+}
+/*
+ * Wait on a condition variable in a continuation (i.e. yield kernel stack).
+ * A cv_signal or cv_broadcast on the same condition variable will cause
+ * the thread to be scheduled.
+ */
+int
+_audit_cv_wait_continuation(struct cv *cvp, lck_mtx_t *mp, thread_continue_t function)
 {
+	int status = KERN_SUCCESS;
+
+	cvp->cv_waiters++;
+	assert_wait(cvp, THREAD_UNINT);
+	lck_mtx_unlock(mp);
+
+	status = thread_block(function);
 
-	lp->rl_grp = lck_grp_alloc_init(grpname, LCK_GRP_ATTR_NULL);
-	lp->rl_mtx = lck_mtx_alloc_init(lp->rl_grp, LCK_ATTR_NULL);
+	/* should not be reached, but just in case, re-lock */
+	lck_mtx_lock(mp);
+
+	return status;
+}
+
+/*
+ * Simple recursive lock. 
+ */
+void
+#if DIAGNOSTIC
+_audit_rlck_init(struct rlck *lp, const char *lckname)
+#else
+_audit_rlck_init(struct rlck *lp, __unused const char *lckname)
+#endif
+{
 
+	lp->rl_mtx = lck_mtx_alloc_init(audit_lck_grp, LCK_ATTR_NULL);
+	KASSERT(lp->rl_mtx != NULL, 
+	    ("_audit_rlck_init: Could not allocate a recursive lock."));
+#if DIAGNOSTIC
+	strlcpy(lp->rl_name, lckname, AU_MAX_LCK_NAME);	
+#endif
 	lp->rl_thread = 0;
 	lp->rl_recurse = 0;
 }
@@ -368,12 +454,8 @@ _audit_rlck_destroy(struct rlck *lp)
 {
 
 	if (lp->rl_mtx) {
-		lck_mtx_free(lp->rl_mtx, lp->rl_grp);
-		lp->rl_mtx = 0;
-	}
-	if (lp->rl_grp) {
-		lck_grp_free(lp->rl_grp);
-		lp->rl_grp = 0;
+		lck_mtx_free(lp->rl_mtx, audit_lck_grp);
+		lp->rl_mtx = NULL;
 	}
 }
 
@@ -397,12 +479,19 @@ _audit_rlck_assert(struct rlck *lp, u_int assert)
  * Simple sleep lock.
  */
 void
-_audit_slck_init(struct slck *lp, const char *grpname)
+#if DIAGNOSTIC
+_audit_slck_init(struct slck *lp, const char *lckname)
+#else
+_audit_slck_init(struct slck *lp, __unused const char *lckname)
+#endif
 {
 
-	lp->sl_grp = lck_grp_alloc_init(grpname, LCK_GRP_ATTR_NULL);
-	lp->sl_mtx = lck_mtx_alloc_init(lp->sl_grp, LCK_ATTR_NULL);
-
+	lp->sl_mtx = lck_mtx_alloc_init(audit_lck_grp, LCK_ATTR_NULL);
+	KASSERT(lp->sl_mtx != NULL, 
+	    ("_audit_slck_init: Could not allocate a sleep lock."));
+#if DIAGNOSTIC
+	strlcpy(lp->sl_name, lckname, AU_MAX_LCK_NAME);	
+#endif
 	lp->sl_locked = 0;
 	lp->sl_waiting = 0;
 }
@@ -442,7 +531,7 @@ _audit_slck_unlock(struct slck *lp)
 		lp->sl_waiting = 0;
 
 		/* Wake up *all* sleeping threads. */
-		thread_wakeup_prim((event_t) lp, /*1 thr*/ 0, THREAD_AWAKENED);
+		wakeup((event_t) lp);
 	}
 	lck_mtx_unlock(lp->sl_mtx);
 }
@@ -482,12 +571,8 @@ _audit_slck_destroy(struct slck *lp)
 {
 
 	if (lp->sl_mtx) {
-		lck_mtx_free(lp->sl_mtx, lp->sl_grp);
-		lp->sl_mtx = 0;
-	}
-	if (lp->sl_grp) {
-		lck_grp_free(lp->sl_grp);
-		lp->sl_grp = 0;
+		lck_mtx_free(lp->sl_mtx, audit_lck_grp);
+		lp->sl_mtx = NULL;
 	}
 }
 
@@ -545,6 +630,18 @@ _audit_ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
 	return (rv);	
 }
 
+/*
+ * Initialize lock group for audit related locks/mutexes.
+ */
+void
+_audit_lck_grp_init(void)
+{
+	audit_lck_grp = lck_grp_alloc_init("Audit", LCK_GRP_ATTR_NULL);
+
+	KASSERT(audit_lck_grp != NULL,
+	    ("audit_get_lck_grp: Could not allocate the audit lock group."));
+}
+
 int
 audit_send_trigger(unsigned int trigger)
 {
diff --git a/bsd/security/audit/audit_bsd.h b/bsd/security/audit/audit_bsd.h
index 23b61a5df..72db99f35 100644
--- a/bsd/security/audit/audit_bsd.h
+++ b/bsd/security/audit/audit_bsd.h
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2008, Apple Inc.
+ * Copyright (c) 2008-2009, Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,8 @@
 #endif
 #endif	/* DIAGNOSTIC */
 
+#define	AU_MAX_LCK_NAME	32	
+
 #if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN
 #define be16enc(p, d)   *(p) = (d)
 #define be32enc(p, d)   *(p) = (d)
@@ -176,7 +178,9 @@ struct cv {
  */
 struct mtx {
 	lck_mtx_t	*mtx_lock;
-	lck_grp_t	*mtx_grp;
+#if DIAGNOSTIC
+	char		 mtx_name[AU_MAX_LCK_NAME];
+#endif
 };
 
 /*
@@ -184,7 +188,9 @@ struct mtx {
  */
 struct rwlock {
 	lck_rw_t	*rw_lock;
-	lck_grp_t	*rw_grp;
+#if DIAGNOSTIC
+	char		 rw_name[AU_MAX_LCK_NAME];
+#endif
 };
 
 /*
@@ -192,9 +198,11 @@ struct rwlock {
  */
 struct slck {
 	lck_mtx_t	*sl_mtx;
-	lck_grp_t	*sl_grp;
 	int		 sl_locked;
 	int		 sl_waiting;
+#if DIAGNOSTIC
+	char		 sl_name[AU_MAX_LCK_NAME];
+#endif
 };
 
 /*
@@ -202,9 +210,11 @@ struct slck {
  */
 struct rlck {
 	lck_mtx_t	*rl_mtx;
-	lck_grp_t	*rl_grp;
 	uint32_t	 rl_recurse;
 	thread_t	 rl_thread;
+#if DIAGNOSTIC
+	char		 rl_name[AU_MAX_LCK_NAME];
+#endif
 };
 	
 /*
@@ -216,6 +226,8 @@ void    _audit_cv_signal(struct cv *cvp);
 void    _audit_cv_broadcast(struct cv *cvp);
 void    _audit_cv_wait(struct cv *cvp, lck_mtx_t *mp, const char *desc);
 int     _audit_cv_wait_sig(struct cv *cvp, lck_mtx_t *mp, const char *desc);
+int	_audit_cv_wait_continuation(struct cv *cvp, lck_mtx_t *mp,
+	    thread_continue_t function);
 #define cv_init(cvp, desc)	  _audit_cv_init(cvp, desc)
 #define cv_destroy(cvp)		  _audit_cv_destroy(cvp)
 #define cv_signal(cvp)		  _audit_cv_signal(cvp)
@@ -223,28 +235,20 @@ int     _audit_cv_wait_sig(struct cv *cvp, lck_mtx_t *mp, const char *desc);
 #define cv_broadcastpri(cvp, pri) _audit_cv_broadcast(cvp)
 #define cv_wait(cvp, mp)	  _audit_cv_wait(cvp, (mp)->mtx_lock, #cvp)
 #define cv_wait_sig(cvp, mp)	  _audit_cv_wait_sig(cvp, (mp)->mtx_lock, #cvp)
+#define cv_wait_continuation(cvp,mp,f) \
+    _audit_cv_wait_continuation(cvp, (mp)->mtx_lock, f)
 
 /*
  * BSD Mutexes.
  */
-#define	LOCK_MAX_NAME	64
-#define mtx_init(mp, name, type, opts)  do {				\
-	(mp)->mtx_grp = lck_grp_alloc_init(name, LCK_GRP_ATTR_NULL);	\
-        (mp)->mtx_lock = lck_mtx_alloc_init((mp)->mtx_grp,		\
-	     LCK_ATTR_NULL);						\
-} while(0)
-#define mtx_lock(mp)		lck_mtx_lock((mp)->mtx_lock)
-#define mtx_unlock(mp)		lck_mtx_unlock((mp)->mtx_lock)
-#define	mtx_destroy(mp) do {						\
-	if ((mp)->mtx_lock) {						\
-		lck_mtx_free((mp)->mtx_lock, (mp)->mtx_grp);		\
-		(mp)->mtx_lock = 0;					\
-	}								\
-	if ((mp)->mtx_grp) {						\
-		lck_grp_free((mp)->mtx_grp);				\
-		(mp)->mtx_grp = 0;					\
-	}								\
-} while (0)
+void	_audit_mtx_init(struct mtx *mp, const char *name);
+void	_audit_mtx_destroy(struct mtx *mp);
+#define	mtx_init(mp, name, type, opts) \
+				_audit_mtx_init(mp, name)
+#define	mtx_lock(mp)		lck_mtx_lock((mp)->mtx_lock)
+#define	mtx_unlock(mp)		lck_mtx_unlock((mp)->mtx_lock)
+#define	mtx_destroy(mp)		_audit_mtx_destroy(mp)
+#define mtx_yield(mp)		lck_mtx_yield((mp)->mtx_lock)
 
 /*
  * Sleep lock functions.
@@ -277,25 +281,14 @@ void		_audit_rlck_destroy(struct rlck *lp);
 /*
  * BSD rw locks.
  */
-#define	rw_init(lp, name)  do {						\
-        (lp)->rw_grp = lck_grp_alloc_init(name, LCK_GRP_ATTR_NULL);	\
-        (lp)->rw_lock = lck_rw_alloc_init((lp)->rw_grp,			\
-            LCK_ATTR_NULL);						\
-} while(0)
+void	_audit_rw_init(struct rwlock *lp, const char *name);
+void	_audit_rw_destroy(struct rwlock *lp);
+#define	rw_init(lp, name)	_audit_rw_init(lp, name)
 #define	rw_rlock(lp)		lck_rw_lock_shared((lp)->rw_lock)
 #define	rw_runlock(lp)		lck_rw_unlock_shared((lp)->rw_lock)
 #define	rw_wlock(lp)		lck_rw_lock_exclusive((lp)->rw_lock)
 #define	rw_wunlock(lp)		lck_rw_unlock_exclusive((lp)->rw_lock)
-#define	rw_destroy(lp) do {						\
-	if ((lp)->rw_lock) {						\
-		lck_rw_free((lp)->rw_lock, (lp)->rw_grp);		\
-		(lp)->rw_lock = 0;					\
-	}								\
-	if ((lp)->rw_grp) {						\
-		lck_grp_free((lp)->rw_grp);				\
-		(lp)->rw_grp = 0;					\
-	}								\
-} while (0)
+#define	rw_destroy(lp)		_audit_rw_destroy(lp)
 	
 #define	MA_OWNED		LCK_MTX_ASSERT_OWNED
 #define	RA_LOCKED		LCK_RW_ASSERT_HELD
@@ -319,6 +312,11 @@ void		_audit_rlck_destroy(struct rlck *lp);
 #define	slck_assert(lp, wht)	
 #endif /* DIAGNOSTIC */
 
+/*
+ * Synchronization initialization.
+ */
+void	_audit_lck_grp_init(void);
+
 /*
  * BSD (IPv6) event rate limiter.
  */ 
diff --git a/bsd/security/audit/audit_bsm.c b/bsd/security/audit/audit_bsm.c
index 0ee35a074..6f665d890 100644
--- a/bsd/security/audit/audit_bsm.c
+++ b/bsd/security/audit/audit_bsm.c
@@ -1757,6 +1757,24 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
 		}
 		break;
 
+	case AUE_SESSION_START:
+	case AUE_SESSION_UPDATE:
+	case AUE_SESSION_END:
+	case AUE_SESSION_CLOSE:
+		if (ARG_IS_VALID(kar, ARG_VALUE64)) {
+			tok = au_to_arg64(1, "sflags", ar->ar_arg_value64);
+			kau_write(rec, tok);
+		}
+		if (ARG_IS_VALID(kar, ARG_AMASK)) {
+			tok = au_to_arg32(2, "am_success",
+			    ar->ar_arg_amask.am_success);
+			kau_write(rec, tok);
+			tok = au_to_arg32(3, "am_failure",
+			    ar->ar_arg_amask.am_failure);
+			kau_write(rec, tok);
+		}
+		break;
+
 	/************************
 	 * Mach system calls    *
 	 ************************/
@@ -1884,7 +1902,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
 	}
 
 #if CONFIG_MACF
-	do {
+	if (NULL != ar->ar_mac_records) {
 		/* Convert the audit data from the MAC policies */
 		struct mac_audit_record *mar;
 
@@ -1913,7 +1931,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
 
 			kau_write(rec, tok);
 		}
-	} while (0);
+	}
 #endif
 
 	kau_write(rec, subj_tok);
diff --git a/bsd/security/audit/audit_ioctl.h b/bsd/security/audit/audit_ioctl.h
index 806f8ae93..1059532b9 100644
--- a/bsd/security/audit/audit_ioctl.h
+++ b/bsd/security/audit/audit_ioctl.h
@@ -31,6 +31,7 @@
 #define	_SECURITY_AUDIT_AUDIT_IOCTL_H_
 
 #define	AUDITPIPE_IOBASE	'A'
+#define	AUDITSDEV_IOBASE	'S'
 
 /*
  * Data structures used for complex ioctl arguments.  Do not change existing
@@ -79,4 +80,28 @@ struct auditpipe_ioctl_preselect {
 #define	AUDITPIPE_GET_DROPS		_IOR(AUDITPIPE_IOBASE, 102, u_int64_t)
 #define	AUDITPIPE_GET_TRUNCATES		_IOR(AUDITPIPE_IOBASE, 103, u_int64_t)
 
+/*
+ * Ioctls for the audit session device.
+ */
+#define	AUDITSDEV_GET_QLEN		_IOR(AUDITSDEV_IOBASE, 1, u_int)
+#define	AUDITSDEV_GET_QLIMIT		_IOR(AUDITSDEV_IOBASE, 2, u_int)
+#define	AUDITSDEV_SET_QLIMIT		_IOW(AUDITSDEV_IOBASE, 3, u_int)
+#define	AUDITSDEV_GET_QLIMIT_MIN	_IOR(AUDITSDEV_IOBASE, 4, u_int)
+#define	AUDITSDEV_GET_QLIMIT_MAX	_IOR(AUDITSDEV_IOBASE, 5, u_int)
+#define	AUDITSDEV_FLUSH			_IO(AUDITSDEV_IOBASE, 6)
+#define	AUDITSDEV_GET_MAXDATA		_IOR(AUDITSDEV_IOBASE, 7, u_int)
+
+/*
+ * Ioctls to retrieve and set the ALLSESSIONS flag in the audit session device.
+ */
+#define	AUDITSDEV_GET_ALLSESSIONS	_IOR(AUDITSDEV_IOBASE, 100, u_int)
+#define	AUDITSDEV_SET_ALLSESSIONS	_IOW(AUDITSDEV_IOBASE, 101, u_int)
+
+/*
+ * Ioctls to retrieve audit sessions device statistics.
+ */
+#define	AUDITSDEV_GET_INSERTS		_IOR(AUDITSDEV_IOBASE, 200, u_int64_t)
+#define	AUDITSDEV_GET_READS		_IOR(AUDITSDEV_IOBASE, 201, u_int64_t)
+#define	AUDITSDEV_GET_DROPS		_IOR(AUDITSDEV_IOBASE, 202, u_int64_t)
+
 #endif /* _SECURITY_AUDIT_AUDIT_IOCTL_H_ */
diff --git a/bsd/security/audit/audit_private.h b/bsd/security/audit/audit_private.h
index 803a2b936..aa26d7ede 100644
--- a/bsd/security/audit/audit_private.h
+++ b/bsd/security/audit/audit_private.h
@@ -113,6 +113,8 @@ extern au_class_t		audit_kevent_mask;
 #define	AR_PRESELECT_USER_TRAIL	0x00004000U
 #define	AR_PRESELECT_USER_PIPE	0x00008000U
 
+#define	AR_PRESELECT_FILTER	0x00010000U
+
 #define	AR_DRAIN_QUEUE		0x80000000U
 
 /*
@@ -171,6 +173,7 @@ union auditon_udata {
 	int			au_trigger;
 	au_evclass_map_t	au_evclass;
 	au_mask_t		au_mask;
+	au_asflgs_t		au_flags;
 	auditinfo_t		au_auinfo;
 	auditpinfo_t		au_aupinfo;
 	auditpinfo_addr_t	au_aupinfo_addr;
@@ -440,7 +443,7 @@ int	audit_mac_syscall_exit(unsigned short code, struct uthread *uthread,
  * Audit Session.
  */
 void	audit_session_init(void);
-int 	audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess);
+int 	audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p);
 auditinfo_addr_t *audit_session_update(auditinfo_addr_t *new_aia);
 int	audit_session_lookup(au_asid_t asid, auditinfo_addr_t *ret_aia);
 
diff --git a/bsd/security/audit/audit_session.c b/bsd/security/audit/audit_session.c
index 8e05f9dcd..4b63e0082 100644
--- a/bsd/security/audit/audit_session.c
+++ b/bsd/security/audit/audit_session.c
@@ -27,46 +27,40 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <stdarg.h>
+
 #include <sys/kernel.h>
-#include <sys/event.h>
+#include <sys/fcntl.h>
 #include <sys/kauth.h>
+#include <sys/conf.h>
+#include <sys/poll.h>
 #include <sys/queue.h>
+#include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/ucred.h>
+#include <sys/user.h>
+
+#include <miscfs/devfs/devfs.h>
 
 #include <libkern/OSAtomic.h>
 
 #include <bsm/audit.h>
+#include <bsm/audit_internal.h>
+#include <bsm/audit_kevents.h>
+
 #include <security/audit/audit.h>
 #include <security/audit/audit_bsd.h>
+#include <security/audit/audit_ioctl.h>
 #include <security/audit/audit_private.h>
 
 #include <vm/vm_protos.h>
+#include <mach/mach_port.h>
 #include <kern/audit_sessionport.h>
 
-kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
-    mach_msg_type_name_t, ipc_port_t *);
-void ipc_port_release_send(ipc_port_t);
-
-/*
- * The default auditinfo_addr entry for ucred.
- */
-struct auditinfo_addr audit_default_aia = {
-	.ai_auid = AU_DEFAUDITID,
-	.ai_asid = AU_DEFAUDITSID,
-	.ai_termid = { .at_type = AU_IPv4, },
-};
-
-#if CONFIG_AUDIT
-
-/*
- * Currently the hash table is a fixed size.
- */
-#define HASH_TABLE_SIZE		97
-#define	HASH_ASID(asid)		(audit_session_hash(asid) % HASH_TABLE_SIZE)
+#include <libkern/OSDebug.h>
 
 /*
  * Audit Session Entry.  This is treated as an object with public and private
@@ -84,119 +78,397 @@ struct au_sentry {
 	long			se_refcnt;	/* Reference count. */
 	long			se_procnt;	/* Processes in session. */
 	ipc_port_t		se_port;	/* Session port. */
-	struct klist		se_klist;	/* Knotes for session */
-	struct mtx		se_klist_mtx;	/* se_klist mutex */
 	LIST_ENTRY(au_sentry)	se_link; 	/* Hash bucket link list (1) */
 };
 typedef struct au_sentry au_sentry_t;
 
 #define	AU_SENTRY_PTR(aia_p)	((au_sentry_t *)(aia_p))
 
+/*
+ * The default au_sentry/auditinfo_addr entry for ucred. 
+ */
+
+static au_sentry_t audit_default_se = {
+	.se_auinfo = {
+			.ai_auid = AU_DEFAUDITID,
+			.ai_asid = AU_DEFAUDITSID,
+			.ai_termid = { .at_type = AU_IPv4, },
+	},
+	.se_refcnt = 1, 
+	.se_procnt = 1,
+};
+
+struct auditinfo_addr *audit_default_aia_p = &audit_default_se.se_auinfo;
+
+kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t,
+    mach_msg_type_name_t, ipc_port_t *);
+void ipc_port_release_send(ipc_port_t);
+
+#if CONFIG_AUDIT
+
+
+/*
+ * Currently the hash table is a fixed size.
+ */
+#define HASH_TABLE_SIZE		97
+#define	HASH_ASID(asid)		(audit_session_hash(asid) % HASH_TABLE_SIZE)
+
 static struct rwlock	se_entry_lck;		/* (1) lock for se_link above */
 
 LIST_HEAD(au_sentry_head, au_sentry);
 static struct au_sentry_head *au_sentry_bucket = NULL;
 
+#define AU_HISTORY_LOGGING 0
+#if AU_HISTORY_LOGGING
+typedef enum au_history_event {
+	AU_HISTORY_EVENT_UNKNOWN = 0,
+	AU_HISTORY_EVENT_REF     = 1,
+	AU_HISTORY_EVENT_UNREF   = 2,
+	AU_HISTORY_EVENT_BIRTH   = 3,
+	AU_HISTORY_EVENT_DEATH   = 4,
+	AU_HISTORY_EVENT_FIND    = 5
+} au_history_event_t;
+
+#define AU_HISTORY_MAX_STACK_DEPTH 8
+
+struct au_history {
+	struct au_sentry	*ptr;
+	struct au_sentry	 se;
+	void			*stack[AU_HISTORY_MAX_STACK_DEPTH];
+	unsigned int		 stack_depth;
+	au_history_event_t	 event;
+};
+
+static struct au_history *au_history;
+static size_t		  au_history_size = 65536;
+static unsigned int	  au_history_index;
+
+static inline unsigned int
+au_history_entries(void)
+{
+	if (au_history_index >= au_history_size)
+		return au_history_size;
+	else
+		return au_history_index;
+}
+
+static inline void
+au_history_record(au_sentry_t *se, au_history_event_t event)
+{
+	struct au_history *p;
+	unsigned int i;
+
+	i = OSAddAtomic(1, &au_history_index);
+	p = &au_history[i % au_history_size];
+
+	bzero(p, sizeof(*p));
+	p->event = event;
+	bcopy(se, &p->se, sizeof(p->se));
+	p->stack_depth = OSBacktrace(&p->stack[0], AU_HISTORY_MAX_STACK_DEPTH);
+	p->ptr = se;
+}
+#else
+#define au_history_record(se, event) do {} while (0)
+#endif
+
+MALLOC_DEFINE(M_AU_SESSION, "audit_session", "Audit session data");
+
+static void	audit_ref_session(au_sentry_t *se);
+static void	audit_unref_session(au_sentry_t *se);
+
+static void 	audit_session_event(int event, auditinfo_addr_t *aia_p);
+
+/*
+ * Audit session device.
+ */
+
+static MALLOC_DEFINE(M_AUDIT_SDEV, "audit_sdev", "Audit sdevs");
+static MALLOC_DEFINE(M_AUDIT_SDEV_ENTRY, "audit_sdevent",
+    "Audit sdev entries and buffers");
+
+/*
+ * Default audit sdev buffer parameters.
+ */
+#define	AUDIT_SDEV_QLIMIT_DEFAULT	128
+#define	AUDIT_SDEV_QLIMIT_MIN		1
+#define	AUDIT_SDEV_QLIMIT_MAX		1024
+
 /*
- * Audit Propagation Knote List is a list of kevent knotes that are assosiated
- * with an any ASID knote.  If the any ASID gets modified or deleted these are
- * modified or deleted as well.
+ * Entry structure.
  */
-struct au_plist {
-	struct knote		*pl_knote;	/* ptr to per-session knote */
-	LIST_ENTRY(au_plist)	 pl_link;	/* list link (2) */
+struct	audit_sdev_entry {
+	void				*ase_record;
+	u_int		 		 ase_record_len;
+	TAILQ_ENTRY(audit_sdev_entry)	 ase_queue;
 };
-typedef struct au_plist	au_plist_t;
 
-struct au_plisthead {
-	struct rlck		ph_rlck;	 /* (2) lock for pl_link list */
-	LIST_HEAD(au_plhead, au_plist)	ph_head; /* list head */
+/*
+ * Per audit sdev structure.  
+ */
+
+struct audit_sdev {
+	int		asdev_open;
+
+#define	AUDIT_SDEV_ASYNC	0x00000001
+#define	AUDIT_SDEV_NBIO		0x00000002
+
+#define	AUDIT_SDEV_ALLSESSIONS	0x00010000
+	u_int		asdev_flags;
+
+	struct selinfo	asdev_selinfo;
+	pid_t		asdev_sigio;
+
+	au_id_t		asdev_auid;
+	au_asid_t	asdev_asid;
+
+	/* Per-sdev mutex for most fields in this struct. */
+	struct mtx	asdev_mtx;
+
+	/*
+	 * Per-sdev sleep lock serializing user-generated reads and
+	 * flushes. uiomove() is called to copy out the current head
+	 * record's data whie the record remains in the queue, so we
+	 * prevent other threads from removing it using this lock.
+	 */
+	struct slck	asdev_sx;
+
+	/*
+	 * Condition variable to signal when data has been delivered to 
+	 * a sdev.
+	 */
+	struct cv	asdev_cv;
+
+	/* Count and bound of records in the queue. */
+	u_int		asdev_qlen;
+	u_int		asdev_qlimit;
+
+	/* The number of bytes of data across all records. */
+	u_int		asdev_qbyteslen;
+	
+	/* 
+	 * The amount read so far of the first record in the queue.
+	 * (The number of bytes available for reading in the queue is
+	 * qbyteslen - qoffset.)
+	 */
+	u_int		asdev_qoffset;
+
+	/*
+	 * Per-sdev operation statistics.
+	 */
+	u_int64_t	asdev_inserts;	/* Records added. */
+	u_int64_t	asdev_reads;	/* Records read. */
+	u_int64_t	asdev_drops;	/* Records dropped. */
+
+	/*
+	 * Current pending record list.  This is protected by a
+	 * combination of asdev_mtx and asdev_sx.  Note that both
+	 * locks are required to remove a record from the head of the
+	 * queue, as an in-progress read may sleep while copying and,
+	 * therefore, cannot hold asdev_mtx.
+	 */
+	TAILQ_HEAD(, audit_sdev_entry)	asdev_queue;
+
+	/* Global sdev list. */
+	TAILQ_ENTRY(audit_sdev)		asdev_list;
 };
-typedef struct au_plisthead	au_plisthead_t;
 
-#define	EV_ANY_ASID	EV_FLAG0
+#define	AUDIT_SDEV_LOCK(asdev)		mtx_lock(&(asdev)->asdev_mtx)
+#define	AUDIT_SDEV_LOCK_ASSERT(asdev)	mtx_assert(&(asdev)->asdev_mtx, \
+					    MA_OWNED)
+#define	AUDIT_SDEV_LOCK_DESTROY(asdev)	mtx_destroy(&(asdev)->asdev_mtx)
+#define	AUDIT_SDEV_LOCK_INIT(asdev)	mtx_init(&(asdev)->asdev_mtx, \
+					    "audit_sdev_mtx", NULL, MTX_DEF)
+#define	AUDIT_SDEV_UNLOCK(asdev)	mtx_unlock(&(asdev)->asdev_mtx)
+#define	AUDIT_SDEV_MTX(asdev)		(&(asdev)->asdev_mtx)
+
+#define	AUDIT_SDEV_SX_LOCK_DESTROY(asd)	slck_destroy(&(asd)->asdev_sx)
+#define	AUDIT_SDEV_SX_LOCK_INIT(asd)	slck_init(&(asd)->asdev_sx, \
+    					    "audit_sdev_sx")
+#define	AUDIT_SDEV_SX_XLOCK_ASSERT(asd)	slck_assert(&(asd)->asdev_sx, \
+    					    SA_XLOCKED)
+#define	AUDIT_SDEV_SX_XLOCK_SIG(asd)	slck_lock_sig(&(asd)->asdev_sx)
+#define	AUDIT_SDEV_SX_XUNLOCK(asd)	slck_unlock(&(asd)->asdev_sx)
 
-MALLOC_DEFINE(M_AU_SESSION, "audit_session", "Audit session data");
-MALLOC_DEFINE(M_AU_EV_PLIST, "audit_ev_plist", "Audit session event plist");
+/*
+ * Cloning variables and constants.
+ */
+#define	AUDIT_SDEV_NAME		"auditsessions"
+#define	MAX_AUDIT_SDEVS		32
+
+static int audit_sdev_major;
+static void *devnode;
+
+/*
+ * Global list of audit sdevs.  The list is protected by a rw lock.
+ * Individaul record queues are protected by  per-sdev locks.  These
+ * locks synchronize between threads walking the list to deliver to 
+ * individual sdevs and adds/removes of sdevs.
+ */
+static TAILQ_HEAD(, audit_sdev) audit_sdev_list;
+static struct rwlock		audit_sdev_lock;
+
+#define	AUDIT_SDEV_LIST_LOCK_INIT()	rw_init(&audit_sdev_lock, \
+    					    "audit_sdev_list_lock")
+#define	AUDIT_SDEV_LIST_RLOCK()		rw_rlock(&audit_sdev_lock)
+#define	AUDIT_SDEV_LIST_RUNLOCK()	rw_runlock(&audit_sdev_lock)
+#define	AUDIT_SDEV_LIST_WLOCK()         rw_wlock(&audit_sdev_lock)
+#define	AUDIT_SDEV_LIST_WLOCK_ASSERT()	rw_assert(&audit_sdev_lock, \
+    					    RA_WLOCKED)
+#define	AUDIT_SDEV_LIST_WUNLOCK()       rw_wunlock(&audit_sdev_lock)
+
+/*
+ * dev_t doesn't have a pointer for "softc" data so we have to keep track of
+ * it with the following global array (indexed by the minor number).
+ *
+ * XXX We may want to dynamically grow this as need.
+ */
+static struct audit_sdev	*audit_sdev_dtab[MAX_AUDIT_SDEVS];
 
 /*
- * Kevent filters.
+ * Special device methods and definition.
  */
-static int	audit_filt_sessionattach(struct knote *kn);
-static void	audit_filt_sessiondetach(struct knote *kn);
-static void	audit_filt_sessiontouch(struct knote *kn,
-    struct kevent64_s *kev, long type);
-static int	audit_filt_session(struct knote *kn, long hint);
-
-static void	audit_register_kevents(uint32_t asid, uint32_t auid);
-
-struct filterops audit_session_filtops = {
-	.f_attach	=	audit_filt_sessionattach,
-	.f_detach	=	audit_filt_sessiondetach,
-	.f_touch	=	audit_filt_sessiontouch,
-	.f_event	=	audit_filt_session,
+static open_close_fcn_t		audit_sdev_open;
+static open_close_fcn_t		audit_sdev_close;
+static read_write_fcn_t		audit_sdev_read;
+static ioctl_fcn_t		audit_sdev_ioctl; 
+static select_fcn_t		audit_sdev_poll;
+
+static struct cdevsw audit_sdev_cdevsw = {
+	.d_open      =          audit_sdev_open,
+	.d_close     =          audit_sdev_close,
+	.d_read      =          audit_sdev_read,
+	.d_write     =          eno_rdwrt,
+	.d_ioctl     =          audit_sdev_ioctl,
+	.d_stop      =          eno_stop,
+	.d_reset     =          eno_reset,
+	.d_ttys      =          NULL,
+	.d_select    =          audit_sdev_poll,
+	.d_mmap      =          eno_mmap,
+	.d_strategy  =          eno_strat,
+	.d_type      =          0
 };
 
 /*
- * The klist for consumers that are interested in any session (ASID). This list
- * is not associated with any data structure but is used for registering
- * new kevents when sessions are created.  This klist is lock by
- * anyas_klist_mtx.
- */ 
-static struct klist	anyas_klist;
-struct mtx		anyas_klist_mtx;
-
-#define	AUDIT_ANYAS_KLIST_LOCK_INIT()	mtx_init(&anyas_klist_mtx, \
-					"audit anyas_klist_mtx", NULL, MTX_DEF)
-#define	AUDIT_ANYAS_KLIST_LOCK()	mtx_lock(&anyas_klist_mtx)
-#define	AUDIT_ANYAS_KLIST_UNLOCK()	mtx_unlock(&anyas_klist_mtx)
-#define	AUDIT_ANYAS_KLIST_LOCK_ASSERT()	mtx_assert(&anyas_klist_mtx, MA_OWNED)
+ * Global statistics on audit sdevs.
+ */
+static int		audit_sdev_count;	/* Current number of sdevs. */
+static u_int64_t	audit_sdev_ever;	/* Sdevs ever allocated. */
+static u_int64_t	audit_sdev_records; 	/* Total records seen. */
+static u_int64_t	audit_sdev_drops;	/* Global record drop count. */
+
+static int audit_sdev_init(void);
 
 #define	AUDIT_SENTRY_RWLOCK_INIT()	rw_init(&se_entry_lck, \
-					    "audit se_entry_lck")
+					    "se_entry_lck")
 #define	AUDIT_SENTRY_RLOCK()		rw_rlock(&se_entry_lck)
 #define	AUDIT_SENTRY_WLOCK()		rw_wlock(&se_entry_lck)
 #define	AUDIT_SENTRY_RWLOCK_ASSERT()	rw_assert(&se_entry_lck, RA_LOCKED)
 #define	AUDIT_SENTRY_RUNLOCK()		rw_runlock(&se_entry_lck)
 #define	AUDIT_SENTRY_WUNLOCK()		rw_wunlock(&se_entry_lck)
 
-#define	AUDIT_SE_KLIST_LOCK_INIT(se, n)	mtx_init(&(se)->se_klist_mtx, \
-						n, NULL, MTX_DEF)
-#define	AUDIT_SE_KLIST_LOCK(se)		mtx_lock(&(se)->se_klist_mtx)
-#define	AUDIT_SE_KLIST_UNLOCK(se)	mtx_unlock(&(se)->se_klist_mtx)
-#define	AUDIT_SE_KLIST_LOCK_DESTROY(se)	mtx_destroy(&(se)->se_klist_mtx)
-#define	AUDIT_SE_KLIST_LOCK_ASSERT(se)	mtx_assert(&(se)->se_klist_mtx, \
-    						MA_OWNED)
-
-#define	AUDIT_PLIST_LOCK_INIT(pl)	rlck_init(&(pl)->ph_rlck, \
-					    "audit ph_rlck")
-#define	AUDIT_PLIST_LOCK(pl)		rlck_lock(&(pl)->ph_rlck)
-#define	AUDIT_PLIST_UNLOCK(pl)		rlck_unlock(&(pl)->ph_rlck)
-#define	AUDIT_PLIST_LOCK_DESTROY(pl)	rlck_destroy(&(pl)->ph_rlck)
-
+/* Access control on the auditinfo_addr.ai_flags member. */
+static uint64_t audit_session_superuser_set_sflags_mask;
+static uint64_t audit_session_superuser_clear_sflags_mask;
+static uint64_t audit_session_member_set_sflags_mask;
+static uint64_t audit_session_member_clear_sflags_mask;
+SYSCTL_NODE(, OID_AUTO, audit, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Audit controls");
+SYSCTL_NODE(_audit, OID_AUTO, session, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Audit sessions");
+SYSCTL_QUAD(_audit_session, OID_AUTO, superuser_set_sflags_mask, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &audit_session_superuser_set_sflags_mask,
+    "Audit session flags settable by superuser");
+SYSCTL_QUAD(_audit_session, OID_AUTO, superuser_clear_sflags_mask, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &audit_session_superuser_clear_sflags_mask,
+    "Audit session flags clearable by superuser");
+SYSCTL_QUAD(_audit_session, OID_AUTO, member_set_sflags_mask, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &audit_session_member_set_sflags_mask,
+    "Audit session flags settable by a session member");
+SYSCTL_QUAD(_audit_session, OID_AUTO, member_clear_sflags_mask, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &audit_session_member_clear_sflags_mask,
+    "Audit session flags clearable by a session member");
+
+#define	AUDIT_SESSION_DEBUG	0
 #if	AUDIT_SESSION_DEBUG
+/*
+ * The following is debugging code that can be used to get a snapshot of the 
+ * session state.  The audit session information is read out using sysctl:
+ *
+ * error = sysctlbyname("kern.audit_session_debug", buffer_ptr, &buffer_len,
+ * 		NULL, 0);
+ */
 #include <kern/kalloc.h>
 
+/*
+ * The per session record structure for the snapshot data.
+ */
 struct au_sentry_debug {
 	auditinfo_addr_t	se_auinfo;
-	long			se_refcnt;
-	long			se_procnt;
+	int64_t			se_refcnt;	/* refereence count */
+	int64_t			se_procnt;	/* process count */
+	int64_t			se_ptcnt;	/* process count from 
+						   proc table */
 };
 typedef struct au_sentry_debug au_sentry_debug_t;
 
 static int audit_sysctl_session_debug(struct sysctl_oid *oidp, void *arg1,
     int arg2, struct sysctl_req *req);
 
-SYSCTL_PROC(_kern, OID_AUTO, audit_session_debug, CTLFLAG_RD, NULL, 0,
-    audit_sysctl_session_debug, "S,audit_session_debug",
+SYSCTL_PROC(_kern, OID_AUTO, audit_session_debug, CTLFLAG_RD | CTLFLAG_LOCKED,
+    NULL, 0, audit_sysctl_session_debug, "S,audit_session_debug",
     "Current session debug info for auditing.");
 
 /*
- * Copy out the session debug info via the sysctl interface.  The userland code
- * is something like the following:
+ * Callouts for proc_interate() which is used to reconcile the audit session
+ * proc state information with the proc table.  We get everything we need
+ * in the filterfn while the proc_lock() is held so we really don't need the
+ * callout() function.
+ */
+static int 
+audit_session_debug_callout(__unused proc_t p, __unused void *arg)
+{
+
+	return (PROC_RETURNED_DONE);
+}
+
+static int
+audit_session_debug_filterfn(proc_t p, void *st)
+{
+	kauth_cred_t cred = p->p_ucred; 
+	auditinfo_addr_t *aia_p = cred->cr_audit.as_aia_p;
+	au_sentry_debug_t *sed_tab = (au_sentry_debug_t *) st;
+	au_sentry_debug_t  *sdtp;
+	au_sentry_t *se;
+
+	if (IS_VALID_SESSION(aia_p)) {
+		sdtp = &sed_tab[0];
+		do {
+			if (aia_p->ai_asid == sdtp->se_asid) {
+				sdtp->se_ptcnt++;
+
+				/* Do some santy checks. */
+				se = AU_SENTRY_PTR(aia_p);
+				if (se->se_refcnt != sdtp->se_refcnt) {
+					sdtp->se_refcnt =
+					    (int64_t)se->se_refcnt;
+				}
+				if (se->se_procnt != sdtp->se_procnt) {
+					sdtp->se_procnt =
+					    (int64_t)se->se_procnt;
+				}
+				break;
+			}
+			sdtp++;
+		} while (sdtp->se_asid != 0 && sdtp->se_auid != 0);
+	} else {
+		/* add it to the default sesison */
+		sed_tab->se_ptcnt++;
+	}
+
+	return (0);
+}
+
+/*
+ * Copy out the session debug info via the sysctl interface.
  *
- * error = sysctlbyname("kern.audit_session_debug", buffer_ptr, &buffer_len,
- * 		NULL, 0);
  */
 static int
 audit_sysctl_session_debug(__unused struct sysctl_oid *oidp,
@@ -223,6 +495,7 @@ audit_sysctl_session_debug(__unused struct sysctl_oid *oidp,
 		    if (se != NULL) 
 			    entry_cnt++;
 
+	entry_cnt++;  /* add one for the default entry */
 	/*
 	 * If just querying then return the space required.  There is an 
 	 * obvious race condition here so we just fudge this by 3 in case
@@ -258,10 +531,18 @@ audit_sysctl_session_debug(__unused struct sysctl_oid *oidp,
 	 */
 	sz = 0;
 	next_sed = sed_tab;
+	/* add the first entry for processes not tracked in sessions. */
+	bcopy(audit_default_aia_p, &next_sed->se_auinfo, sizeof (au_sentry_t));
+	next_sed->se_refcnt = (int64_t)audit_default_se.se_refcnt;
+	next_sed->se_procnt = (int64_t)audit_default_se.se_procnt;
+	next_sed++;
+	sz += sizeof(au_sentry_debug_t);
 	for(i = 0; i < HASH_TABLE_SIZE; i++) {
 		LIST_FOREACH(se, &au_sentry_bucket[i], se_link) {
 			if (se != NULL) {
-				bcopy(se, next_sed, sizeof(next_sed));
+				next_sed->se_auinfo = se->se_auinfo;
+				next_sed->se_refcnt = (int64_t)se->se_refcnt;
+				next_sed->se_procnt = (int64_t)se->se_procnt;
 				next_sed++;
 				sz += sizeof(au_sentry_debug_t);
 			}
@@ -269,6 +550,12 @@ audit_sysctl_session_debug(__unused struct sysctl_oid *oidp,
 	}
 	AUDIT_SENTRY_RUNLOCK();
 
+	/* Reconcile with the process table. */
+	(void) proc_iterate(PROC_ALLPROCLIST | PROC_ZOMBPROCLIST,
+	    audit_session_debug_callout, NULL,
+	    audit_session_debug_filterfn, (void *)&sed_tab[0]);
+
+
 	req->oldlen = sz;
 	err = SYSCTL_OUT(req, sed_tab, sz);
 	kfree(sed_tab, entry_cnt * sizeof(au_sentry_debug_t));
@@ -278,6 +565,65 @@ audit_sysctl_session_debug(__unused struct sysctl_oid *oidp,
 
 #endif /* AUDIT_SESSION_DEBUG */
 
+/*
+ * Create and commit a session audit event. The proc and se arguments needs to
+ * be that of the subject and not necessarily the current process.
+ */
+static void
+audit_session_event(int event, auditinfo_addr_t *aia_p)
+{
+	struct kaudit_record *ar;
+
+	KASSERT(AUE_SESSION_START == event || AUE_SESSION_UPDATE == event ||
+	    AUE_SESSION_END == event || AUE_SESSION_CLOSE == event,
+	    ("audit_session_event: invalid event: %d", event));
+
+	if (NULL == aia_p)
+		return;
+
+	/* 
+	 * Create a new audit record.  The record will contain the subject
+	 * ruid, rgid, egid, pid, auid, asid, amask, and term_addr 
+	 * (implicitly added by audit_new).
+	 */
+	ar = audit_new(event, PROC_NULL, /* Not used */ NULL);
+	if (NULL == ar)
+		return;
+
+	/*
+	 * Audit session events are always generated because they are used
+	 * by some userland consumers so just set the preselect flag.
+	 */
+	ar->k_ar_commit |= AR_PRESELECT_FILTER;
+
+	/* 
+	 * Populate the subject information.  Note that the ruid, rgid,
+	 * egid, and pid values are incorrect. We only need the  auditinfo_addr
+	 * information.
+	 */
+	ar->k_ar.ar_subj_ruid = 0;
+	ar->k_ar.ar_subj_rgid = 0;
+	ar->k_ar.ar_subj_egid = 0;
+	ar->k_ar.ar_subj_pid = 0;
+	ar->k_ar.ar_subj_auid = aia_p->ai_auid;
+	ar->k_ar.ar_subj_asid = aia_p->ai_asid;
+	bcopy(&aia_p->ai_termid, &ar->k_ar.ar_subj_term_addr,
+	    sizeof(struct au_tid_addr));
+
+	/* Add the audit masks to the record. */
+	ar->k_ar.ar_arg_amask.am_success = aia_p->ai_mask.am_success;
+	ar->k_ar.ar_arg_amask.am_failure = aia_p->ai_mask.am_failure;
+	ARG_SET_VALID(ar, ARG_AMASK);
+
+	/* Add the audit session flags to the record. */
+	ar->k_ar.ar_arg_value64 = aia_p->ai_flags; 
+	ARG_SET_VALID(ar, ARG_VALUE64);
+
+
+	/* Commit the record to the queue. */
+	audit_commit(ar, 0, 0);
+}
+
 /*
  * Hash the audit session ID using a simple 32-bit mix.
  */
@@ -296,7 +642,8 @@ audit_session_hash(au_asid_t asid)
 
 /*
  * Do an hash lookup and find the session entry for a given ASID.  Return NULL
- * if not found.
+ * if not found. If the session is found then audit_session_find takes a 
+ * reference. 
  */
 static au_sentry_t *
 audit_session_find(au_asid_t asid)
@@ -309,23 +656,14 @@ audit_session_find(au_asid_t asid)
 	hkey = HASH_ASID(asid);
 
 	LIST_FOREACH(found_se, &au_sentry_bucket[hkey], se_link)
-		if (found_se->se_asid == asid)
+		if (found_se->se_asid == asid) {
+			au_history_record(found_se, AU_HISTORY_EVENT_FIND);
+			audit_ref_session(found_se);
 			return (found_se);
+		}
 	return (NULL);
 }
 
-/*
- * Call kqueue knote while holding the session entry klist lock.
- */
-static void
-audit_session_knote(au_sentry_t *se, long hint)
-{
-
-	AUDIT_SE_KLIST_LOCK(se);
-	KNOTE(&se->se_klist, hint);
-	AUDIT_SE_KLIST_UNLOCK(se);
-}
-
 /*
  * Remove the given audit_session entry from the hash table.
  */
@@ -335,20 +673,35 @@ audit_session_remove(au_sentry_t *se)
 	uint32_t	 hkey;
 	au_sentry_t	*found_se, *tmp_se;
 
+	au_history_record(se, AU_HISTORY_EVENT_DEATH);
 	KASSERT(se->se_refcnt == 0, ("audit_session_remove: ref count != 0"));	
+	KASSERT(se != &audit_default_se,
+		("audit_session_remove: removing default session"));
 
 	hkey = HASH_ASID(se->se_asid);
 
 	AUDIT_SENTRY_WLOCK();
+	/*
+	 * Check and see if someone got a reference before we got the lock.
+	 */
+	if (se->se_refcnt != 0) {
+		AUDIT_SENTRY_WUNLOCK();
+		return;
+	}
+
+	audit_session_portdestroy(&se->se_port);
 	LIST_FOREACH_SAFE(found_se, &au_sentry_bucket[hkey], se_link, tmp_se) {
 		if (found_se == se) {
 
-			audit_session_knote(found_se, NOTE_AS_CLOSE);
+			/*
+			 * Generate an audit event to notify userland of the
+			 * session close.
+			 */
+			audit_session_event(AUE_SESSION_CLOSE,
+			    &found_se->se_auinfo);
 
 			LIST_REMOVE(found_se, se_link);
 			AUDIT_SENTRY_WUNLOCK();
-			AUDIT_SE_KLIST_LOCK_DESTROY(found_se);
-			found_se->se_refcnt = 0;
 			free(found_se, M_AU_SESSION);
 
 			return;
@@ -365,6 +718,11 @@ audit_ref_session(au_sentry_t *se)
 {
 	long old_val;
 
+	if (se == NULL || se == &audit_default_se)
+		return;
+
+	au_history_record(se, AU_HISTORY_EVENT_REF);
+
 	old_val = OSAddAtomicLong(1, &se->se_refcnt);
 	KASSERT(old_val < 100000,
 	    ("audit_ref_session: Too many references on session."));
@@ -378,6 +736,11 @@ audit_unref_session(au_sentry_t *se)
 {
 	long old_val;
 
+	if (se == NULL || se == &audit_default_se)
+		return;
+
+	au_history_record(se, AU_HISTORY_EVENT_UNREF);
+
 	old_val = OSAddAtomicLong(-1, &se->se_refcnt);
 	if (old_val == 1)
 		audit_session_remove(se);
@@ -393,6 +756,9 @@ audit_inc_procount(au_sentry_t *se)
 {
 	long old_val;
 
+	if (se == NULL || se == &audit_default_se)
+		return;
+	
 	old_val = OSAddAtomicLong(1, &se->se_procnt);
 	KASSERT(old_val <= PID_MAX,
 	    ("audit_inc_procount: proc count > PID_MAX"));
@@ -407,9 +773,16 @@ audit_dec_procount(au_sentry_t *se)
 {
 	long old_val;
 
+	if (se == NULL || se == &audit_default_se)
+		return;
+
 	old_val = OSAddAtomicLong(-1, &se->se_procnt);
+	/*
+	 * If this was the last process generate an audit event to notify
+	 * userland of the session ending.
+	 */
 	if (old_val == 1)
-		audit_session_knote(se, NOTE_AS_END);
+		audit_session_event(AUE_SESSION_END, &se->se_auinfo);
 	KASSERT(old_val >= 1,
 	    ("audit_dec_procount: proc count < 0"));
 }	
@@ -426,7 +799,7 @@ audit_update_sentry(au_sentry_t *se, auditinfo_addr_t *new_aia)
 	auditinfo_addr_t *aia = &se->se_auinfo;
 	int update;
 
-	KASSERT(new_aia != &audit_default_aia, 
+	KASSERT(new_aia != audit_default_aia_p, 
 	  ("audit_update_sentry: Trying to update the default aia."));
 
 	update = (aia->ai_auid != new_aia->ai_auid ||
@@ -464,64 +837,66 @@ audit_session_nextid(void)
  * reference to the entry that must be unref'ed.
  */
 static auditinfo_addr_t *
-audit_session_new(auditinfo_addr_t *new_aia, int newprocess)
+audit_session_new(auditinfo_addr_t *new_aia_p, auditinfo_addr_t *old_aia_p)
 {
-	au_asid_t asid;
+	au_asid_t new_asid;
 	au_sentry_t *se = NULL;
+	au_sentry_t *found_se = NULL;
 	auditinfo_addr_t *aia = NULL;
-	char nm[LOCK_MAX_NAME];
 	
-	KASSERT(new_aia != NULL, ("audit_session_new: new_aia == NULL"));
+	KASSERT(new_aia_p != NULL, ("audit_session_new: new_aia_p == NULL"));
 
-	asid = new_aia->ai_asid; 
+	new_asid = new_aia_p->ai_asid; 
 
-#if 0  /* XXX this assertion is currently broken by securityd/LoginWindow */
-	KASSERT((asid != AU_ASSIGN_ASID && asid <= PID_MAX),
-	    ("audit_session_new: illegal ASID value: %d", asid));
-#endif
-	
 	/*
 	 * Alloc a new session entry now so we don't wait holding the lock.
 	 */
 	se = malloc(sizeof(au_sentry_t), M_AU_SESSION, M_WAITOK | M_ZERO);
 
-	snprintf(nm, sizeof(nm), "audit se_klist_mtx %d", asid);
-	AUDIT_SE_KLIST_LOCK_INIT(se, nm);
-
 	/*
 	 * Find an unique session ID, if desired.
 	 */
 	AUDIT_SENTRY_WLOCK();
-	if (asid == AU_ASSIGN_ASID) {
+	if (new_asid == AU_ASSIGN_ASID) {
 		do {
-			asid = (au_asid_t)audit_session_nextid();
-		} while(audit_session_find(asid) != NULL);
+
+			new_asid = (au_asid_t)audit_session_nextid();
+			found_se = audit_session_find(new_asid);
+			
+			/* 
+			 * If the session ID is currently active then drop the
+			 * reference and try again.
+			 */
+			if (found_se != NULL)
+				audit_unref_session(found_se);
+			else
+				break;
+		} while(1);
 	} else {
-		au_sentry_t *found_se = NULL;
 
 		/*
 		 * Check to see if the requested ASID is already in the
 		 * hash table.  If so, update it with the new auditinfo.
 		 */	
-		if ((found_se = audit_session_find(asid)) != NULL) {
+		if ((found_se = audit_session_find(new_asid)) != NULL) {
 			int updated;
 
-			updated = audit_update_sentry(found_se, new_aia);
-			audit_ref_session(found_se);
+			updated = audit_update_sentry(found_se, new_aia_p);
 
 			AUDIT_SENTRY_WUNLOCK();
-			AUDIT_SE_KLIST_LOCK_DESTROY(se);
 			free(se, M_AU_SESSION);
 
-			if (updated) 
-				audit_session_knote(found_se, NOTE_AS_UPDATE);
+			/* If a different session then add this process in. */
+			if (new_aia_p != old_aia_p)
+				audit_inc_procount(found_se);
 
 			/*
-			 * If this is a new process joining this session then
-			 * we need to update the proc count.
+			 * If the session information was updated then
+			 * generate an audit event to notify userland.
 			 */
-			if (newprocess)
-				audit_inc_procount(found_se);
+			if (updated)
+				audit_session_event(AUE_SESSION_UPDATE,
+				    &found_se->se_auinfo);
 
 			return (&found_se->se_auinfo);
 		}
@@ -539,25 +914,23 @@ audit_session_new(auditinfo_addr_t *new_aia, int newprocess)
 	 */
 	se->se_port = IPC_PORT_NULL;
 	aia = &se->se_auinfo;
-	aia->ai_asid = asid;
-	aia->ai_auid = new_aia->ai_auid;
-	bzero(&new_aia->ai_mask, sizeof(new_aia->ai_mask));
-	bcopy(&new_aia->ai_termid, &aia->ai_termid, sizeof(aia->ai_termid));
-	aia->ai_flags = new_aia->ai_flags;
+	aia->ai_asid = new_asid;
+	aia->ai_auid = new_aia_p->ai_auid;
+	bzero(&new_aia_p->ai_mask, sizeof(new_aia_p->ai_mask));
+	bcopy(&new_aia_p->ai_termid, &aia->ai_termid, sizeof(aia->ai_termid));
+	aia->ai_flags = new_aia_p->ai_flags;
 
 	/*
 	 * Add it to the hash table.
 	 */
-	LIST_INSERT_HEAD(&au_sentry_bucket[HASH_ASID(asid)], se, se_link);
+	LIST_INSERT_HEAD(&au_sentry_bucket[HASH_ASID(new_asid)], se, se_link);
 	AUDIT_SENTRY_WUNLOCK();
 
 	/*
-	 * Register kevents for consumers wanting events for any ASID
-	 * and knote the event.
+	 * Generate an audit event to notify userland of the new session.
 	 */
-	audit_register_kevents(se->se_asid, se->se_auid);
-	audit_session_knote(se, NOTE_AS_START);
-
+	audit_session_event(AUE_SESSION_START, aia);
+	au_history_record(se, AU_HISTORY_EVENT_BIRTH);
 	return (aia);
 }
 
@@ -577,13 +950,22 @@ audit_session_lookup(au_asid_t asid, auditinfo_addr_t *ret_aia)
 		AUDIT_SENTRY_RUNLOCK();
 		return (1);
 	}
+	/* We have a reference on the session so it is safe to drop the lock. */
+	AUDIT_SENTRY_RUNLOCK();
 	if (ret_aia != NULL)
 		bcopy(&se->se_auinfo, ret_aia, sizeof(*ret_aia));
-	AUDIT_SENTRY_RUNLOCK();
+	audit_unref_session(se);
 
 	return (0);
 }
 
+void
+audit_session_aiaref(auditinfo_addr_t *aia_p)
+{
+
+	audit_ref_session(AU_SENTRY_PTR(aia_p));
+}
+	
 /*
  * Add a reference to the session entry.
  */
@@ -596,9 +978,13 @@ audit_session_ref(kauth_cred_t cred)
 	    ("audit_session_ref: Invalid kauth_cred."));
 
  	aia_p = cred->cr_audit.as_aia_p;
+	audit_session_aiaref(aia_p);
+}
+
+void audit_session_aiaunref(auditinfo_addr_t *aia_p)
+{
 
-	if (IS_VALID_SESSION(aia_p))
-		audit_ref_session(AU_SENTRY_PTR(aia_p));
+	audit_unref_session(AU_SENTRY_PTR(aia_p));
 }
 
 /* 
@@ -613,14 +999,17 @@ audit_session_unref(kauth_cred_t cred)
 	    ("audit_session_unref: Invalid kauth_cred."));
 
  	aia_p = cred->cr_audit.as_aia_p;
-
-	if (IS_VALID_SESSION(aia_p))
-		audit_unref_session(AU_SENTRY_PTR(aia_p));
+	audit_session_aiaunref(aia_p);
 }
 
+/*
+ * Increment the per audit session process count.  Assumes that the caller has
+ * a reference on the process' cred.
+ */
 void
-audit_session_procnew(kauth_cred_t cred)
+audit_session_procnew(proc_t p)
 {
+	kauth_cred_t cred = p->p_ucred;
 	auditinfo_addr_t *aia_p;
 	
 	KASSERT(IS_VALID_CRED(cred), 
@@ -628,13 +1017,17 @@ audit_session_procnew(kauth_cred_t cred)
 
 	aia_p = cred->cr_audit.as_aia_p; 
 
-	if (IS_VALID_SESSION(aia_p))
-		audit_inc_procount(AU_SENTRY_PTR(aia_p));
+	audit_inc_procount(AU_SENTRY_PTR(aia_p));
 }
 
+/*
+ * Decrement the per audit session process count.  Assumes that the caller has
+ * a reference on the cred.
+ */
 void
-audit_session_procexit(kauth_cred_t cred)
+audit_session_procexit(proc_t p)
 {
+	kauth_cred_t cred = p->p_ucred;
 	auditinfo_addr_t *aia_p;
 
 	KASSERT(IS_VALID_CRED(cred), 
@@ -642,8 +1035,7 @@ audit_session_procexit(kauth_cred_t cred)
 
 	aia_p = cred->cr_audit.as_aia_p; 
 
-	if (IS_VALID_SESSION(aia_p))
-		audit_dec_procount(AU_SENTRY_PTR(aia_p));
+	audit_dec_procount(AU_SENTRY_PTR(aia_p));
 }
 
 /*
@@ -658,450 +1050,109 @@ audit_session_init(void)
 	    ("audit_session_init: ASSIGNED_ASID_MAX is not large enough."));
 	
 	AUDIT_SENTRY_RWLOCK_INIT();
-	AUDIT_ANYAS_KLIST_LOCK_INIT();
 
 	au_sentry_bucket = malloc( sizeof(struct au_sentry) *
 	    HASH_TABLE_SIZE, M_AU_SESSION, M_WAITOK | M_ZERO);
 
 	for (i = 0; i < HASH_TABLE_SIZE; i++)
 		LIST_INIT(&au_sentry_bucket[i]);
-}
-
-/*
- * Allocate a new kevent propagation list (plist).
- */
-static caddr_t
-audit_new_plist(void)
-{
-	au_plisthead_t *plhead;
-
-	plhead = malloc(sizeof(au_plisthead_t), M_AU_EV_PLIST, M_WAITOK |
-	    M_ZERO);
-
-	LIST_INIT(&plhead->ph_head);
-	AUDIT_PLIST_LOCK_INIT(plhead);
 
-	return ((caddr_t) plhead);
+	(void)audit_sdev_init();
+#if AU_HISTORY_LOGGING
+	au_history = malloc(sizeof(struct au_history) * au_history_size,
+	    M_AU_SESSION, M_WAITOK|M_ZERO);
+#endif
 }
 
-/*
- * Destroy a kevent propagation list (plist).  The anyas_klist_mtx mutex must be
- * held by the caller. 
- */
-static void
-audit_destroy_plist(struct knote *anyas_kn)
+static int
+audit_session_update_check(kauth_cred_t cred, auditinfo_addr_t *old,
+    auditinfo_addr_t *new)
 {
-	au_plisthead_t *plhead;
-	au_plist_t *plentry, *ple_tmp;
-	struct kevent64_s kev;
-	
-	KASSERT(anyas_kn != NULL, ("audit_destroy_plist: anyas = NULL"));
-	plhead = (au_plisthead_t *)anyas_kn->kn_hook;
-	KASSERT(plhead != NULL, ("audit_destroy_plist: plhead = NULL"));
-
-	/*
-	 * Delete everything in the propagation list.
+	uint64_t n;
+
+	/* If the current audit ID is not the default then it is immutable. */
+	if (old->ai_auid != AU_DEFAUDITID && old->ai_auid != new->ai_auid)
+		return (EINVAL);
+
+	/* If the current termid is not the default then it is immutable. */
+	if ((old->ai_termid.at_type != AU_IPv4 ||
+	     old->ai_termid.at_port != 0 ||
+	     old->ai_termid.at_addr[0] != 0) &&
+	    (old->ai_termid.at_port != new->ai_termid.at_port ||
+	     old->ai_termid.at_type != new->ai_termid.at_type ||
+	     0 != bcmp(&old->ai_termid.at_addr, &new->ai_termid.at_addr,
+		 sizeof (old->ai_termid.at_addr))))
+		return (EINVAL);
+
+	/* The flags may be set only according to the
+	 * audit_session_*_set_sflags_masks.
 	 */
-	AUDIT_PLIST_LOCK(plhead);
-	LIST_FOREACH_SAFE(plentry, &plhead->ph_head, pl_link, ple_tmp) {
-		struct kqueue *kq = plentry->pl_knote->kn_kq;
-
-		kev.ident = plentry->pl_knote->kn_id;
-		kev.filter = EVFILT_SESSION;
-		kev.flags = EV_DELETE;
-
-		/*
-		 * The plist entry gets removed in rm_from_plist() which is
-		 * called indirectly by kevent_register().
-		 */
-		kevent_register(kq, &kev, NULL);
-	}
-	AUDIT_PLIST_UNLOCK(plhead);
-
-	/*
-	 * Remove the head.
+	n = ~old->ai_flags & new->ai_flags;
+	if (0 != n &&
+	    !((n == (audit_session_superuser_set_sflags_mask & n) &&
+		kauth_cred_issuser(cred)) ||
+	      (n == (audit_session_member_set_sflags_mask & n)    &&
+		old->ai_asid == new->ai_asid)))
+		return (EINVAL);
+
+	/* The flags may be cleared only according to the
+	 * audit_session_*_clear_sflags_masks.
 	 */
-	AUDIT_PLIST_LOCK_DESTROY(plhead);
-	free(plhead, M_AU_EV_PLIST);
+	n = ~new->ai_flags & old->ai_flags;
+	if (0 != n &&
+	    !((n == (audit_session_superuser_clear_sflags_mask & n) &&
+		kauth_cred_issuser(cred)) ||
+	      (n == (audit_session_member_clear_sflags_mask & n)    &&
+		old->ai_asid == new->ai_asid)))
+		return (EINVAL);
+
+	/* The audit masks are mutable. */
+	return (0);
 }
 
 /*
- * Add a knote pointer entry to the kevent propagation list.
+ * Safely update kauth cred of the given process with new the given audit info. 
  */
-static void
-audit_add_to_plist(struct knote *anyas_kn, struct knote *kn)
+int
+audit_session_setaia(proc_t p, auditinfo_addr_t *new_aia_p)
 {
-	au_plisthead_t *plhead;
-	au_plist_t *plentry;
-
-	KASSERT(anyas_kn != NULL, ("audit_add_to_plist: anyas = NULL"));
-	plhead = (au_plisthead_t *)anyas_kn->kn_hook;
-	KASSERT(plhead != NULL, ("audit_add_to_plist: plhead = NULL"));
+	kauth_cred_t my_cred, my_new_cred;
+	struct au_session  as;
+	struct au_session  tmp_as;
+	auditinfo_addr_t caia, *old_aia_p;
+	int ret;
 
-	plentry = malloc(sizeof(au_plist_t), M_AU_EV_PLIST, M_WAITOK | M_ZERO);
+	/*
+	 * If this is going to modify an existing session then do some
+	 * immutable checks.
+	 */
+	if (audit_session_lookup(new_aia_p->ai_asid, &caia) == 0) {
+		my_cred = kauth_cred_proc_ref(p);
+		ret = audit_session_update_check(my_cred, &caia, new_aia_p);
+		kauth_cred_unref(&my_cred);
+		if (ret)
+			return (ret);
+	}
 
-	plentry->pl_knote = kn;
-	AUDIT_PLIST_LOCK(plhead);
-	LIST_INSERT_HEAD(&plhead->ph_head, plentry, pl_link);
-	AUDIT_PLIST_UNLOCK(plhead);
-}
+	my_cred = kauth_cred_proc_ref(p);
+	bcopy(&new_aia_p->ai_mask, &as.as_mask, sizeof(as.as_mask));
+	old_aia_p = my_cred->cr_audit.as_aia_p;
+	/* audit_session_new() adds a reference on the session */
+	as.as_aia_p = audit_session_new(new_aia_p, old_aia_p);
 
-/*
- * Remote a knote pointer entry from the kevent propagation list.  The lock
- * on the plist may already be head (by audit_destroy_plist() above) so we use
- * a recursive lock.
- */
-static void
-audit_rm_from_plist(struct knote *kn)
-{
-	struct knote *anyas_kn;
-	au_plisthead_t *plhd;
-	au_plist_t *plentry, *ple_tmp;
-
-	KASSERT(kn != NULL, ("audit_rm_from_plist: kn = NULL"));
-	anyas_kn = (struct knote *)kn->kn_hook;
-	KASSERT(anyas_kn != NULL, ("audit_rm_to_plist: anyas = NULL"));
-	plhd = (au_plisthead_t *)anyas_kn->kn_hook;
-
-	AUDIT_PLIST_LOCK(plhd);
-	LIST_FOREACH_SAFE(plentry, &plhd->ph_head, pl_link, ple_tmp) {
-		if (plentry->pl_knote == kn) {
-			LIST_REMOVE(plentry, pl_link);
-			free(plentry, M_AU_EV_PLIST);
-			AUDIT_PLIST_UNLOCK(plhd);
-			return;
-		}
-	}
-	AUDIT_PLIST_UNLOCK(plhd);
-}
+	/* If the process left a session then update the process count. */
+	if (old_aia_p != new_aia_p)
+		audit_dec_procount(AU_SENTRY_PTR(old_aia_p));
 
-/*
- * The attach filter for EVFILT_SESSION.
- */
-static int
-audit_filt_sessionattach(struct knote *kn)
-{
-	au_sentry_t *se = NULL;
 
 	/*
-	 * Check flags for the events we currently support. 
+	 * We are modifying the audit info in a credential so we need a new
+	 * credential (or take another reference on an existing credential that
+	 * matches our new one).  We must do this because the audit info in the
+	 * credential is used as part of our hash key.	Get current credential
+	 * in the target process and take a reference while we muck with it.
 	 */
-	if ((kn->kn_sfflags & (NOTE_AS_START | NOTE_AS_END | NOTE_AS_CLOSE
-		    | NOTE_AS_UPDATE | NOTE_AS_ERR)) == 0)
-		return (ENOTSUP);
-
-	/*
-	 * If the interest is in any session then add to the any ASID knote
-	 * list.  Otherwise, add it to the knote list assosiated with the
-	 * given session.
-	 */
-	if (kn->kn_id == AS_ANY_ASID) {
-		
-		kn->kn_flags |= EV_CLEAR;
-		kn->kn_ptr.p_se = NULL;
-
-		/*
-		 * Attach a kevent propagation list for any kevents that get
-		 * added. 
-		 */
-		kn->kn_hook = audit_new_plist();
-	
-		AUDIT_ANYAS_KLIST_LOCK();
-		KNOTE_ATTACH(&anyas_klist, kn);
-		AUDIT_ANYAS_KLIST_UNLOCK();
-
-		return (0);
-	} else {
-
-		/*
-		 * NOTE: The anyas klist lock will be held in this
-		 * part of the code when indirectly called from
-		 * audit_register_kevents() below.
-		 */
-
-		/*
-		 * Check to make sure it is a valid ASID.
-		 */
-		if (kn->kn_id > ASSIGNED_ASID_MAX)
-			return (EINVAL);
-
-		AUDIT_SENTRY_RLOCK();
-		se = audit_session_find(kn->kn_id);
-		AUDIT_SENTRY_RUNLOCK();
-		if (se == NULL)
-			return (EINVAL);
-
-		AUDIT_SE_KLIST_LOCK(se);
-		kn->kn_flags |= EV_CLEAR;
-		kn->kn_ptr.p_se = se;
-
-		/*
-		 * If this attach is the result of an "any ASID" (pseudo)
-		 * kevent then attach the any session knote ptr to this knote.
-		 * Also, add this knote to the its propagation list.
-		 */
-		if (kn->kn_flags & EV_ANY_ASID) {
-			struct knote *anyas_kn =
-			    (struct knote *)((uintptr_t)kn->kn_kevent.ext[0]);
-			kn->kn_hook = (caddr_t) anyas_kn;
-			kn->kn_flags &= ~EV_ANY_ASID;
-			audit_add_to_plist(anyas_kn, kn);
-		} else
-			kn->kn_hook = NULL;
-		KNOTE_ATTACH(&se->se_klist, kn);
-		AUDIT_SE_KLIST_UNLOCK(se);
-
-		return (0);
-	}
-}
-
-/*
- * The detach filter for EVFILT_SESSION.
- */
-static void
-audit_filt_sessiondetach(struct knote *kn)
-{
-	au_sentry_t *se = NULL;
-
-	if (kn->kn_id == AS_ANY_ASID) {
-
-		AUDIT_ANYAS_KLIST_LOCK();
-		audit_destroy_plist(kn);
-		KNOTE_DETACH(&anyas_klist, kn);
-		AUDIT_ANYAS_KLIST_UNLOCK();
-
-	} else {
-		/*
-		 * If this knote was created by any ASID kevent then remove
-		 * from kevent propagation list.
-		 */
-		if (kn->kn_hook != NULL) {
-			audit_rm_from_plist(kn);
-			kn->kn_hook = NULL;
-		}
-
-		/*
-		 * Check to see if already detached.
-		 */
-		se = kn->kn_ptr.p_se;
-		if (se != NULL) {
-			AUDIT_SE_KLIST_LOCK(se);
-			kn->kn_ptr.p_se = NULL;
-			KNOTE_DETACH(&se->se_klist, kn);
-			AUDIT_SE_KLIST_UNLOCK(se);
-		}
-	}
-}
-
-/*
- * The touch filter for EVFILT_SESSION.  Check for any ASID kevent updates and
- * propagate the change.
- */
-static void
-audit_filt_sessiontouch(struct knote *kn, struct kevent64_s *kev, long type)
-{
-	struct knote *ple_kn;
-	struct kqueue *kq;
-	au_sentry_t *se;
-	au_plisthead_t *plhead;
-	au_plist_t *plentry;
-	struct kevent64_s newkev;
-
-	switch (type) {
-	case EVENT_REGISTER:
-		kn->kn_sfflags = kev->fflags;
-		kn->kn_sdata = kev->data;
-		/*
-		 * If an any ASID kevent was updated then we may need to
-		 * propagate the update.
-		 */
-		if (kev->ident == AS_ANY_ASID && kn->kn_hook != NULL) {
-
-			/*
-			 * Propagate the change to each of the session kevents
-			 * that were created by this any ASID kevent.
-			 */
-			plhead = (au_plisthead_t *)kn->kn_hook;
-			AUDIT_PLIST_LOCK(plhead);
-			LIST_FOREACH(plentry, &plhead->ph_head, pl_link) {
-
-				if ((ple_kn = plentry->pl_knote) == NULL)
-					continue;
-				if ((se = ple_kn->kn_ptr.p_se) == NULL)
-					continue;
-				if ((kq = ple_kn->kn_kq) == NULL)
-					continue;
-
-				newkev.ident = plentry->pl_knote->kn_id;
-				newkev.filter = EVFILT_SESSION;
-				newkev.flags = kev->flags;
-				newkev.fflags = kev->fflags;
-				newkev.data = kev->data;
-				newkev.udata = kev->udata;
-				kevent_register(kq, &newkev, NULL);
-			}
-			AUDIT_PLIST_UNLOCK(plhead);
-		}
-		break;
-
-	case EVENT_PROCESS:
-		*kev = kn->kn_kevent;
-		if (kn->kn_flags & EV_CLEAR) {
-			kn->kn_data = 0;
-			kn->kn_fflags = 0;
-		}
-		break;
-
-	default:
-		KASSERT((type == EVENT_REGISTER || type == EVENT_PROCESS),
-		    ("filt_sessiontouch(): invalid type (%ld)", type));
-		break;
-	}
-}
-
-/*
- * Event filter for EVFILT_SESSION.  The AUDIT_SE_KLIST_LOCK should be held
- * by audit_session_knote().
- */
-static int
-audit_filt_session(struct knote *kn, long hint)
-{
-	int events = (int)hint;
-	au_sentry_t *se = kn->kn_ptr.p_se;
-
-	if (hint != 0 && se != NULL) {
-
-		if (kn->kn_sfflags & events) {
-			kn->kn_fflags |= events;
-			kn->kn_data = se->se_auid;
-		}
-		
-		/*
-		 * If this is the last possible event for the knote,
-		 * detach the knote from the audit session before the
-		 * session goes away.
-		 */
-		if (events & NOTE_AS_CLOSE) {
-
-			/*
-			 * If created by any ASID kevent then remove from 
-			 * propagation list.
-			 */
-			if (kn->kn_hook != NULL) {
-				audit_rm_from_plist(kn);
-				kn->kn_hook = NULL;
-			}
-			kn->kn_flags |= (EV_EOF | EV_ONESHOT);
-			kn->kn_ptr.p_se = NULL;
-			AUDIT_SE_KLIST_LOCK_ASSERT(se);
-			KNOTE_DETACH(&se->se_klist, kn);
-
-			return (1);
-		}
-	}
-	return (kn->kn_fflags != 0);
-}
-
-/*
- * For all the consumers wanting events for all sessions, register new
- * kevents associated with the session for the given ASID.  The actual
- * attachment is done by the EVFILT_SESSION attach filter above.
- */
-static void
-audit_register_kevents(uint32_t asid, uint32_t auid)
-{
-	struct knote *kn;
-
-	AUDIT_ANYAS_KLIST_LOCK();
-	SLIST_FOREACH(kn, &anyas_klist, kn_selnext) {
-		struct kqueue *kq = kn->kn_kq;
-		struct kevent64_s kev;
-		int err;
-
-		kev.ident = asid;
-		kev.filter = EVFILT_SESSION;
-		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ANY_ASID;
-		kev.fflags = kn->kn_sfflags;
-		kev.data = auid;
-		kev.udata = kn->kn_kevent.udata;
-
-		/*
-		 * Save the knote ptr for this "any ASID" knote for the attach
-		 * filter.
-		 */
-		kev.ext[0] = (uint64_t)((uintptr_t)kn);
-
-		/*
-		 * XXX kevent_register() may block here alloc'ing a new knote.
-		 * We may want to think about using a lockless linked list or
-		 * at least a sleep rwlock for the anyas_klist.
-		 */
-		err = kevent_register(kq, &kev, NULL);
-		if (err)
-			kn->kn_fflags |= NOTE_AS_ERR;
-	}
-	AUDIT_ANYAS_KLIST_UNLOCK();
-}
-
-/*
- * Safely update kauth cred of the given process with new the given audit info. 
- * If the newprocess flag is set then we need to account for this process in
- * the proc count.
- */
-int
-audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess)
-{
-	kauth_cred_t my_cred, my_new_cred;
-	struct au_session  as;
-	struct au_session  tmp_as;
-	auditinfo_addr_t caia;
-
-	/*
-	 * If this is going to modify an existing session then do some
-	 * immutable checks.
-	 */
-	if (audit_session_lookup(aia_p->ai_asid, &caia) == 0) {
-
-		/* 
-		 * If the current audit ID is not the default then it is
-		 * immutable. 
-		 */
-		if (caia.ai_auid != AU_DEFAUDITID &&
-		    caia.ai_auid != aia_p->ai_auid)
-			return (EINVAL);
-
-		/*
-		 * If the current termid is not the default then it is
-		 * immutable.
-		 */
-		if ((caia.ai_termid.at_type != AU_IPv4 || 
-			caia.ai_termid.at_port != 0 || 
-			caia.ai_termid.at_addr[0] != 0) &&
-		    (caia.ai_termid.at_port != aia_p->ai_termid.at_port ||
-		     caia.ai_termid.at_type != aia_p->ai_termid.at_type ||
-		     bcmp(&caia.ai_termid.at_addr, &aia_p->ai_termid.at_addr,
-			 sizeof (caia.ai_termid.at_addr) )) )
-			return (EINVAL);
-
-		/* The audit flags are immutable. */
-		if (caia.ai_flags != aia_p->ai_flags)
-			return (EINVAL);
-
-		/* The audit masks are mutable. */
-	}
-
-	my_cred = kauth_cred_proc_ref(p);
-	bcopy(&aia_p->ai_mask, &as.as_mask, sizeof(as.as_mask));
-	as.as_aia_p = audit_session_new(aia_p, newprocess);
-
-	/*
-	 * We are modifying the audit info in a credential so we need a new
-	 * credential (or take another reference on an existing credential that
-	 * matches our new one).  We must do this because the audit info in the
-	 * credential is used as part of our hash key.	Get current credential
-	 * in the target process and take a reference while we muck with it.
-	 */
-	for (;;) {
+	for (;;) {
 
 		/*
 		 * Set the credential with new info.  If there is no change,
@@ -1129,6 +1180,8 @@ audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess)
 				continue;
 			}
 			p->p_ucred = my_new_cred;
+			/* update cred on proc */
+			PROC_UPDATE_CREDS_ONPROC(p);
 			proc_unlock(p);
 		}
 		/*
@@ -1137,11 +1190,11 @@ audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess)
 		kauth_cred_unref(&my_cred);
 		break;
 	}
-	audit_session_unref(my_new_cred);
 
-	/*
-	 * Propagate the change from the process to the Mach task.
-	 */
+	/* Drop the reference taken by audit_session_new() above. */
+	audit_unref_session(AU_SENTRY_PTR(as.as_aia_p));
+
+	/* Propagate the change from the process to the Mach task. */
 	set_security_token(p);
 
 	return (0);
@@ -1180,6 +1233,7 @@ audit_session_self(proc_t p, __unused struct audit_session_self_args *uap,
 
 	aia_p = cred->cr_audit.as_aia_p;
 	if (!IS_VALID_SESSION(aia_p)) {
+		/* Can't join the default session. */
 		err = EINVAL;
 		goto done;
 	}
@@ -1194,91 +1248,190 @@ audit_session_self(proc_t p, __unused struct audit_session_self_args *uap,
 		bcopy(&cred->cr_audit.as_mask, &se->se_mask,
 		    sizeof(se->se_mask));
 
-	if ((sendport = audit_session_mksend(aia_p, &se->se_port)) == NULL) {
-		/* failed to alloc new port */
-		err = ENOMEM;
-		goto done;
-	}
-
 	/*
-	 * This reference on the session is unref'ed in
-	 * audit_session_port_destory().  This reference is needed so the
-	 * session doesn't get dropped until the session join is done.
+	 * Get a send right to the session's Mach port and insert it in the
+	 * process' mach port namespace.
 	 */
-	audit_ref_session(se);
-
+	sendport = audit_session_mksend(aia_p, &se->se_port);
+	*ret_port = ipc_port_copyout_send(sendport, get_task_ipcspace(p->task));
 
 done:
 	if (cred != NULL)
 		kauth_cred_unref(&cred);	
-	if (err == 0)
-		*ret_port = ipc_port_copyout_send(sendport,
-		    get_task_ipcspace(p->task));
-	else
+	if (err != 0)
 		*ret_port = MACH_PORT_NULL;
-
 	return (err);
 }
 
-void
-audit_session_portaiadestroy(struct auditinfo_addr *port_aia_p)
+/*
+ * audit_session_port  (system call)
+ *
+ * Description: Obtain a Mach send right for the given session ID.
+ *
+ * Parameters:	p		Process calling audit_session_port().
+ *              uap->asid       The target audit session ID.  The special
+ *              		value -1 can be used to target the process's
+ *              		own session.
+ *              uap->portnamep  User address at which to place port name.
+ *
+ * Returns:	0		Success
+ * 		EINVAL		The calling process' session has not be set.
+ * 		EINVAL		The given session ID could not be found.
+ * 		EINVAL		The Mach port right could not be copied out.
+ * 		ESRCH		Bad process, can't get valid cred for process.
+ * 		EPERM		Only the superuser can reference sessions other
+ * 				than the process's own.
+ * 		ENOMEM		Port allocation failed due to no free memory.
+ */
+int
+audit_session_port(proc_t p, struct audit_session_port_args *uap,
+    __unused int *retval)
 {
-	au_sentry_t *se;
+	ipc_port_t sendport = IPC_PORT_NULL;
+	mach_port_name_t portname = MACH_PORT_NULL;
+	kauth_cred_t cred = NULL;
+	auditinfo_addr_t *aia_p = NULL;
+	au_sentry_t *se = NULL;
+	int err = 0;
+
+	/* Note: Currently this test will never be true, because
+	 * ASSIGNED_ASID_MAX is effectively (uint32_t)-2.
+	 */
+	if (uap->asid != -1 && (uint32_t)uap->asid > ASSIGNED_ASID_MAX) {
+		err = EINVAL;
+		goto done;
+	}
+	cred = kauth_cred_proc_ref(p);
+	if (!IS_VALID_CRED(cred)) {
+		err = ESRCH;
+		goto done;
+	}
+	aia_p = cred->cr_audit.as_aia_p;
 
-	KASSERT(port_aia_p != NULL,
-	    ("audit_session_infodestroy: port_aia_p = NULL"));
+	/* Find the session corresponding to the requested audit
+	 * session ID.  If found, take a reference on it so that
+	 * the session is not dropped until the join is later done.
+	 */
+	if (uap->asid == (au_asid_t)-1 ||
+	    uap->asid == aia_p->ai_asid) {
 
-	se = AU_SENTRY_PTR(port_aia_p);
+		if (!IS_VALID_SESSION(aia_p)) {
+			/* Can't join the default session. */
+			err = EINVAL;
+			goto done;
+		}
+
+		/* No privilege is required to obtain a port for our
+		 * own session.
+		 */
+		se = AU_SENTRY_PTR(aia_p);
+		audit_ref_session(se);
+	} else if (kauth_cred_issuser(cred)) {
+		/* The superuser may obtain a port for any existing
+		 * session.
+		 */
+		AUDIT_SENTRY_RLOCK();
+		se = audit_session_find(uap->asid);
+		AUDIT_SENTRY_RUNLOCK();
+		if (NULL == se) {
+			err = EINVAL;
+			goto done;
+		}
+		aia_p = &se->se_auinfo;
+	} else {
+		err = EPERM;
+		goto done;
+	}
 
 	/*
-	 * Drop the reference added in audit_session_self().
+	 * Processes that join using this mach port will inherit this process'
+	 * pre-selection masks.
 	 */
-	if (se != NULL) {
-		se->se_port = IPC_PORT_NULL;
-		audit_unref_session(se);
+	if (se->se_port == IPC_PORT_NULL)
+		bcopy(&cred->cr_audit.as_mask, &se->se_mask,
+		    sizeof(se->se_mask));
+
+	/*
+	 * Use the session reference to create a mach port reference for the
+	 * session (at which point we are free to drop the session reference)
+	 * and then copy out the mach port to the process' mach port namespace.
+	 */
+	sendport = audit_session_mksend(aia_p, &se->se_port);
+	portname = ipc_port_copyout_send(sendport, get_task_ipcspace(p->task));
+	if (!MACH_PORT_VALID(portname)) {
+		err = EINVAL;
+		goto done;
 	}
+	err = copyout(&portname, uap->portnamep, sizeof(mach_port_name_t));
+done:
+	if (cred != NULL)
+		kauth_cred_unref(&cred);
+	if (NULL != se)
+		audit_unref_session(se);
+	if (MACH_PORT_VALID(portname) && 0 != err)
+                (void)mach_port_deallocate(get_task_ipcspace(p->task),
+		    portname);
 
+	return (err);
 }
 
 static int
 audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid)
 {
-	auditinfo_addr_t *port_aia_p, *old_aia_p;
-	kauth_cred_t cred = NULL;
+	auditinfo_addr_t *new_aia_p, *old_aia_p;
+	kauth_cred_t my_cred = NULL;
 	au_asid_t old_asid;
 	int err = 0;
 
 	*new_asid = AU_DEFAUDITSID;
 
-	if ((port_aia_p = audit_session_porttoaia(port)) == NULL) {
+	if ((new_aia_p = audit_session_porttoaia(port)) == NULL) {
 		err = EINVAL;
 		goto done;
 	}
-	*new_asid = port_aia_p->ai_asid;
 
-	cred = kauth_cred_proc_ref(p);
-	if (!IS_VALID_CRED(cred)) {
-		kauth_cred_unref(&cred);	
+	proc_lock(p);
+	kauth_cred_ref(p->p_ucred);
+	my_cred = p->p_ucred;
+	if (!IS_VALID_CRED(my_cred)) {
+		kauth_cred_unref(&my_cred);	
+		proc_unlock(p);
 		err = ESRCH;
 		goto done;
 	}
-	old_aia_p = cred->cr_audit.as_aia_p;
+	old_aia_p = my_cred->cr_audit.as_aia_p;
 	old_asid = old_aia_p->ai_asid;
+	*new_asid = new_aia_p->ai_asid;
 
 	/*
 	 * Add process in if not already in the session.
 	 */
 	if (*new_asid != old_asid) {
-		audit_session_setaia(p, port_aia_p, 1);
-		/*
-		 * If this process was in a valid session before then we
-		 * need to decrement the process count of the session it
-		 * came from.
-		 */
-		if (IS_VALID_SESSION(old_aia_p))
-			audit_dec_procount(AU_SENTRY_PTR(old_aia_p));
+		kauth_cred_t my_new_cred;
+		struct au_session new_as;
+
+		bcopy(&new_aia_p->ai_mask, &new_as.as_mask,
+			sizeof(new_as.as_mask));
+		new_as.as_aia_p = new_aia_p;
+
+		my_new_cred = kauth_cred_setauditinfo(my_cred, &new_as);
+		p->p_ucred = my_new_cred;
+		PROC_UPDATE_CREDS_ONPROC(p);
+
+		/* Increment the proc count of new session */
+		audit_inc_procount(AU_SENTRY_PTR(new_aia_p));
+
+		proc_unlock(p);
+
+		/* Propagate the change from the process to the Mach task. */
+		set_security_token(p);
+
+		/* Decrement the process count of the former session. */
+		audit_dec_procount(AU_SENTRY_PTR(old_aia_p));
+	} else  {
+		proc_unlock(p);
 	}
-	kauth_cred_unref(&cred);	
+	kauth_cred_unref(&my_cred);
 
 done:
 	if (port != IPC_PORT_NULL)
@@ -1312,8 +1465,10 @@ audit_session_spawnjoin(proc_t p, ipc_port_t port)
  * Parameters:	p		Process calling session join.
  * 		uap->port	A Mach send right.
  *
- * Returns:	*ret_asid	Audit session ID of new session, which may
- * 				be AU_DEFAUDITSID in the failure case.
+ * Returns:	*ret_asid	Audit session ID of new session.
+ *				In the failure case the return value will be -1
+ *				and 'errno' will be set to a non-zero value
+ *				described below.
  *
  * Errno:	0		Success	
  * 		EINVAL		Invalid Mach port name.
@@ -1338,6 +1493,540 @@ audit_session_join(proc_t p, struct audit_session_join_args *uap,
 	return (err);
 }
 
+/*
+ * Audit session device.
+ */
+
+/*
+ * Free an audit sdev entry.
+ */
+static void
+audit_sdev_entry_free(struct audit_sdev_entry *ase)
+{
+
+	free(ase->ase_record, M_AUDIT_SDEV_ENTRY);
+	free(ase, M_AUDIT_SDEV_ENTRY);
+}
+
+/*
+ * Append individual record to a queue.  Allocate queue-local buffer and
+ * add to the queue.  If the queue is full or we can't allocate memory,
+ * drop the newest record.
+ */
+static void
+audit_sdev_append(struct audit_sdev *asdev, void *record, u_int record_len)
+{
+	struct audit_sdev_entry *ase;
+
+	AUDIT_SDEV_LOCK_ASSERT(asdev);
+
+	if (asdev->asdev_qlen >= asdev->asdev_qlimit) {
+		asdev->asdev_drops++;
+		audit_sdev_drops++;
+		return;
+	}
+
+	ase = malloc(sizeof (*ase), M_AUDIT_SDEV_ENTRY, M_NOWAIT | M_ZERO);
+	if (NULL == ase) {
+		asdev->asdev_drops++;
+		audit_sdev_drops++;
+		return;
+	}
+
+	ase->ase_record = malloc(record_len, M_AUDIT_SDEV_ENTRY, M_NOWAIT);
+	if (NULL == ase->ase_record) {
+		free(ase, M_AUDIT_SDEV_ENTRY);
+		asdev->asdev_drops++;
+		audit_sdev_drops++;
+		return;
+	}
+
+	bcopy(record, ase->ase_record, record_len);
+	ase->ase_record_len = record_len;
+
+	TAILQ_INSERT_TAIL(&asdev->asdev_queue, ase, ase_queue);
+	asdev->asdev_inserts++;
+	asdev->asdev_qlen++;
+	asdev->asdev_qbyteslen += ase->ase_record_len;
+	selwakeup(&asdev->asdev_selinfo);
+	if (asdev->asdev_flags & AUDIT_SDEV_ASYNC)
+		pgsigio(asdev->asdev_sigio, SIGIO);
+
+	cv_broadcast(&asdev->asdev_cv);
+}
+
+/*
+ * Submit an audit record to be queued in the audit session device.
+ */
+void
+audit_sdev_submit(__unused au_id_t auid, __unused au_asid_t asid, void *record,
+    u_int record_len)
+{
+	struct audit_sdev *asdev;
+
+	/*
+	 * Lockless read to avoid lock overhead if sessio devices are not in
+	 * use.
+	 */
+	if (NULL == TAILQ_FIRST(&audit_sdev_list))
+		return;
+
+	AUDIT_SDEV_LIST_RLOCK();
+	TAILQ_FOREACH(asdev, &audit_sdev_list, asdev_list) {
+		AUDIT_SDEV_LOCK(asdev);
+		
+		/* 
+		 * Only append to the sdev queue if the AUID and ASID match that
+		 * of the process that opened this session device or if the
+		 * ALLSESSIONS flag is set.
+		 */
+		if ((/* XXXss auid == asdev->asdev_auid && */
+			asid == asdev->asdev_asid) ||
+		    (asdev->asdev_flags & AUDIT_SDEV_ALLSESSIONS) != 0)
+			audit_sdev_append(asdev, record, record_len);
+		AUDIT_SDEV_UNLOCK(asdev);
+	}
+	AUDIT_SDEV_LIST_RUNLOCK();
+
+	/* Unlocked increment. */
+	audit_sdev_records++;
+}
+
+/*
+ * Allocate a new audit sdev.  Connects the sdev, on succes, to the global
+ * list and updates statistics.
+ */
+static struct audit_sdev *
+audit_sdev_alloc(void)
+{
+	struct audit_sdev *asdev;
+
+	AUDIT_SDEV_LIST_WLOCK_ASSERT();
+
+	asdev = malloc(sizeof (*asdev), M_AUDIT_SDEV, M_NOWAIT | M_ZERO);
+	if (NULL == asdev)
+		return (NULL);
+
+	asdev->asdev_qlimit = AUDIT_SDEV_QLIMIT_DEFAULT;
+	TAILQ_INIT(&asdev->asdev_queue);
+	AUDIT_SDEV_LOCK_INIT(asdev);
+	AUDIT_SDEV_SX_LOCK_INIT(asdev);
+	cv_init(&asdev->asdev_cv, "audit_sdev_cv");
+
+	/*
+	 * Add to global list and update global statistics.
+	 */
+	TAILQ_INSERT_HEAD(&audit_sdev_list, asdev, asdev_list);
+	audit_sdev_count++;
+	audit_sdev_ever++;
+
+	return (asdev);
+}
+
+/*
+ * Flush all records currently present in an audit sdev.
+ */
+static void
+audit_sdev_flush(struct audit_sdev *asdev)
+{
+	struct audit_sdev_entry *ase;
+
+	AUDIT_SDEV_LOCK_ASSERT(asdev);
+
+	while ((ase = TAILQ_FIRST(&asdev->asdev_queue)) != NULL) {
+		TAILQ_REMOVE(&asdev->asdev_queue, ase, ase_queue);
+		asdev->asdev_qbyteslen -= ase->ase_record_len;
+		audit_sdev_entry_free(ase);
+		asdev->asdev_qlen--;
+	}
+	asdev->asdev_qoffset = 0;
+
+	KASSERT(0 == asdev->asdev_qlen, ("audit_sdev_flush: asdev_qlen"));
+	KASSERT(0 == asdev->asdev_qbyteslen,
+	    ("audit_sdev_flush: asdev_qbyteslen"));
+}
+
+/*
+ * Free an audit sdev.
+ */
+static void
+audit_sdev_free(struct audit_sdev *asdev)
+{
+
+	AUDIT_SDEV_LIST_WLOCK_ASSERT();
+	AUDIT_SDEV_LOCK_ASSERT(asdev);
+
+	/* XXXss - preselect hook here */
+	audit_sdev_flush(asdev);
+	cv_destroy(&asdev->asdev_cv);
+	AUDIT_SDEV_SX_LOCK_DESTROY(asdev);
+	AUDIT_SDEV_LOCK_DESTROY(asdev);
+
+	TAILQ_REMOVE(&audit_sdev_list, asdev, asdev_list);
+	free(asdev, M_AUDIT_SDEV);
+	audit_sdev_count--;
+}
+
+/*
+ * Get the auditinfo_addr of the proc and check to see if suser.  Will return
+ * non-zero if not suser.
+ */
+static int
+audit_sdev_get_aia(proc_t p, struct auditinfo_addr *aia_p)
+{
+	int error;
+	kauth_cred_t scred;
+
+	scred = kauth_cred_proc_ref(p);
+	error = suser(scred, &p->p_acflag);
+
+	if (NULL != aia_p)
+		bcopy(scred->cr_audit.as_aia_p, aia_p, sizeof (*aia_p));
+	kauth_cred_unref(&scred);
+
+	return (error);
+}
+
+/*
+ * Audit session dev open method.
+ */
+static int
+audit_sdev_open(dev_t dev, __unused int flags,  __unused int devtype, proc_t p)
+{
+	struct audit_sdev *asdev;
+	struct auditinfo_addr aia;
+	int u;
+
+	u = minor(dev);
+	if (u < 0 || u > MAX_AUDIT_SDEVS)
+		return (ENXIO);
+
+	(void) audit_sdev_get_aia(p, &aia);
+
+	AUDIT_SDEV_LIST_WLOCK();
+	asdev = audit_sdev_dtab[u];
+	if (NULL == asdev) {
+		asdev = audit_sdev_alloc();
+		if (NULL == asdev) {
+			AUDIT_SDEV_LIST_WUNLOCK();
+			return (ENOMEM);
+		}
+		audit_sdev_dtab[u] = asdev;
+	} else {
+		KASSERT(asdev->asdev_open, ("audit_sdev_open: Already open"));
+		AUDIT_SDEV_LIST_WUNLOCK();
+		return (EBUSY);
+	}
+	asdev->asdev_open = 1;
+	asdev->asdev_auid = aia.ai_auid;
+	asdev->asdev_asid = aia.ai_asid;
+	asdev->asdev_flags = 0; 
+
+	AUDIT_SDEV_LIST_WUNLOCK();
+
+	return (0);
+}
+
+/*
+ * Audit session dev close method.
+ */
+static int
+audit_sdev_close(dev_t dev, __unused int flags, __unused int devtype,
+    __unused proc_t p)
+{
+	struct audit_sdev *asdev;
+	int u;
+
+	u = minor(dev);
+	asdev = audit_sdev_dtab[u];
+
+	KASSERT(asdev != NULL, ("audit_sdev_close: asdev == NULL"));
+	KASSERT(asdev->asdev_open, ("audit_sdev_close: !asdev_open"));
+
+	AUDIT_SDEV_LIST_WLOCK();
+	AUDIT_SDEV_LOCK(asdev);
+	asdev->asdev_open = 0;
+	audit_sdev_free(asdev);  /* sdev lock unlocked in audit_sdev_free() */
+	audit_sdev_dtab[u] = NULL;
+	AUDIT_SDEV_LIST_WUNLOCK();
+
+	return (0);
+}
+
+/*
+ * Audit session dev ioctl method.
+ */
+static int
+audit_sdev_ioctl(dev_t dev, u_long cmd, caddr_t data,
+    __unused int flag, proc_t p)
+{
+	struct audit_sdev *asdev;
+	int error;
+
+	asdev = audit_sdev_dtab[minor(dev)];
+	KASSERT(asdev != NULL, ("audit_sdev_ioctl: asdev == NULL"));
+
+	error = 0;
+
+	switch (cmd) {
+	case FIONBIO:
+		AUDIT_SDEV_LOCK(asdev);
+		if (*(int *)data)
+			asdev->asdev_flags |= AUDIT_SDEV_NBIO;
+		else
+			asdev->asdev_flags &= ~AUDIT_SDEV_NBIO;
+		AUDIT_SDEV_UNLOCK(asdev);
+		break;
+
+	case FIONREAD:
+		AUDIT_SDEV_LOCK(asdev);
+		*(int *)data = asdev->asdev_qbyteslen - asdev->asdev_qoffset;
+		AUDIT_SDEV_UNLOCK(asdev);
+		break;
+
+	case AUDITSDEV_GET_QLEN:
+		*(u_int *)data = asdev->asdev_qlen;
+		break;
+
+	case AUDITSDEV_GET_QLIMIT:
+		*(u_int *)data = asdev->asdev_qlimit;
+		break;
+
+	case AUDITSDEV_SET_QLIMIT:
+		if (*(u_int *)data >= AUDIT_SDEV_QLIMIT_MIN ||
+		    *(u_int *)data <= AUDIT_SDEV_QLIMIT_MAX) {
+			asdev->asdev_qlimit = *(u_int *)data;
+		} else
+			error = EINVAL;
+		break;
+
+	case AUDITSDEV_GET_QLIMIT_MIN:
+		*(u_int *)data = AUDIT_SDEV_QLIMIT_MIN;
+		break;
+
+	case AUDITSDEV_GET_QLIMIT_MAX:
+		*(u_int *)data = AUDIT_SDEV_QLIMIT_MAX;
+		break;
+
+	case AUDITSDEV_FLUSH:
+		if (AUDIT_SDEV_SX_XLOCK_SIG(asdev) != 0)
+			return (EINTR);
+		AUDIT_SDEV_LOCK(asdev);
+		audit_sdev_flush(asdev);
+		AUDIT_SDEV_UNLOCK(asdev);
+		AUDIT_SDEV_SX_XUNLOCK(asdev);
+		break;
+
+	case AUDITSDEV_GET_MAXDATA:
+		*(u_int *)data = MAXAUDITDATA;
+		break;
+
+	/* XXXss these should be 64 bit, maybe. */
+	case AUDITSDEV_GET_INSERTS:
+		*(u_int *)data = asdev->asdev_inserts;
+		break;
+
+	case AUDITSDEV_GET_READS:
+		*(u_int *)data = asdev->asdev_reads;
+		break;
+
+	case AUDITSDEV_GET_DROPS:
+		*(u_int *)data = asdev->asdev_drops;
+		break;
+
+	case AUDITSDEV_GET_ALLSESSIONS:
+		error = audit_sdev_get_aia(p, NULL);
+		if (error)
+			break;
+		*(u_int *)data = (asdev->asdev_flags & AUDIT_SDEV_ALLSESSIONS) ?
+		    1 : 0;
+		break;
+
+	case AUDITSDEV_SET_ALLSESSIONS:
+		error = audit_sdev_get_aia(p, NULL);
+		if (error)
+			break;
+
+		AUDIT_SDEV_LOCK(asdev);
+		if (*(int *)data)
+			asdev->asdev_flags |= AUDIT_SDEV_ALLSESSIONS;
+		else
+			asdev->asdev_flags &= ~AUDIT_SDEV_ALLSESSIONS;
+		AUDIT_SDEV_UNLOCK(asdev);
+		break;
+
+	default:
+		error = ENOTTY;
+	}
+
+	return (error);
+}
+
+/*
+ * Audit session dev read method. 
+ */
+static int
+audit_sdev_read(dev_t dev, struct uio *uio, __unused int flag)
+{
+	struct audit_sdev_entry *ase;
+	struct audit_sdev *asdev;
+	u_int toread;
+	int error;
+
+	asdev = audit_sdev_dtab[minor(dev)];
+	KASSERT(NULL != asdev, ("audit_sdev_read: asdev == NULL"));
+
+	/*
+	 * We hold a sleep lock over read and flush because we rely on the
+	 * stability of a record in the queue during uiomove.
+	 */
+	if (0 != AUDIT_SDEV_SX_XLOCK_SIG(asdev))
+		return (EINTR);
+	AUDIT_SDEV_LOCK(asdev);
+	while (TAILQ_EMPTY(&asdev->asdev_queue)) {
+		if (asdev->asdev_flags & AUDIT_SDEV_NBIO) {
+			AUDIT_SDEV_UNLOCK(asdev);
+			AUDIT_SDEV_SX_XUNLOCK(asdev);
+			return (EAGAIN);
+		}
+		error = cv_wait_sig(&asdev->asdev_cv, AUDIT_SDEV_MTX(asdev));
+		if (error) {
+			AUDIT_SDEV_UNLOCK(asdev);
+			AUDIT_SDEV_SX_XUNLOCK(asdev);
+			return (error);
+		}
+	}
+
+	/*
+	 * Copy as many remaining bytes from the current record to userspace
+	 * as we can. Keep processing records until we run out of records in
+	 * the queue or until the user buffer runs out of space.
+	 *
+	 * We rely on the sleep lock to maintain ase's stability here.
+	 */
+	asdev->asdev_reads++;
+	while ((ase = TAILQ_FIRST(&asdev->asdev_queue)) != NULL &&
+	    uio_resid(uio) > 0) {
+		AUDIT_SDEV_LOCK_ASSERT(asdev);
+
+		KASSERT(ase->ase_record_len > asdev->asdev_qoffset,
+		    ("audit_sdev_read: record_len > qoffset (1)"));
+		toread = MIN(ase->ase_record_len - asdev->asdev_qoffset,
+		    uio_resid(uio));
+		AUDIT_SDEV_UNLOCK(asdev);
+		error = uiomove((char *) ase->ase_record + asdev->asdev_qoffset,
+		    toread, uio);
+		if (error) {
+			AUDIT_SDEV_SX_XUNLOCK(asdev);
+			return (error);
+		}
+
+		/*
+		 * If the copy succeeded then update book-keeping, and if no
+		 * bytes remain in the current record then free it.
+		 */
+		AUDIT_SDEV_LOCK(asdev);
+		KASSERT(TAILQ_FIRST(&asdev->asdev_queue) == ase,
+		    ("audit_sdev_read: queue out of sync after uiomove"));
+		asdev->asdev_qoffset += toread;
+		KASSERT(ase->ase_record_len >= asdev->asdev_qoffset,
+		     ("audit_sdev_read: record_len >= qoffset (2)"));
+		 if (asdev->asdev_qoffset == ase->ase_record_len) {
+			 TAILQ_REMOVE(&asdev->asdev_queue, ase, ase_queue);
+			 asdev->asdev_qbyteslen -= ase->ase_record_len;
+			 audit_sdev_entry_free(ase);
+			 asdev->asdev_qlen--;
+			 asdev->asdev_qoffset = 0;
+		 }
+	}
+	AUDIT_SDEV_UNLOCK(asdev);
+	AUDIT_SDEV_SX_XUNLOCK(asdev);
+	return (0);
+}
+
+/*
+ * Audit session device poll method.
+ */
+static int
+audit_sdev_poll(dev_t dev, int events, void *wql, struct proc *p)
+{
+	struct audit_sdev *asdev;
+	int revents;
+
+	revents = 0;
+	asdev = audit_sdev_dtab[minor(dev)];
+	KASSERT(NULL != asdev, ("audit_sdev_poll: asdev == NULL"));
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		AUDIT_SDEV_LOCK(asdev);
+		if (NULL != TAILQ_FIRST(&asdev->asdev_queue))
+			revents |= events & (POLLIN | POLLRDNORM);
+		else
+			selrecord(p, &asdev->asdev_selinfo, wql);
+		AUDIT_SDEV_UNLOCK(asdev);
+	}
+	return (revents);
+}
+
+/*
+ * Audit sdev clone routine.  Provides a new minor number or returns -1.
+ * This called with DEVFS_LOCK held.
+ */
+static int
+audit_sdev_clone(__unused dev_t dev, int action)
+{
+	int i;
+
+	if (DEVFS_CLONE_ALLOC == action) {
+		for(i = 0; i < MAX_AUDIT_SDEVS; i++)
+			if (NULL == audit_sdev_dtab[i])
+				return (i);
+
+		/* 
+		 * This really should return -1 here but that seems to
+		 * hang things in devfs.  We instead return 0 and let
+		 * audit_sdev_open tell userland the bad news.
+		 */
+		return (0);
+	}
+
+	return (-1);
+}
+
+static int
+audit_sdev_init(void)
+{
+	dev_t dev;
+
+	TAILQ_INIT(&audit_sdev_list);
+	AUDIT_SDEV_LIST_LOCK_INIT();
+
+	audit_sdev_major = cdevsw_add(-1, &audit_sdev_cdevsw);
+	if (audit_sdev_major < 0)
+		return (KERN_FAILURE);
+
+	dev = makedev(audit_sdev_major, 0);
+	devnode = devfs_make_node_clone(dev, DEVFS_CHAR, UID_ROOT, GID_WHEEL,
+	    0644, audit_sdev_clone, AUDIT_SDEV_NAME, 0);
+
+	if (NULL == devnode)
+		return (KERN_FAILURE);
+
+	return (KERN_SUCCESS);
+}
+
+/* XXXss
+static int
+audit_sdev_shutdown(void)
+{
+
+	devfs_remove(devnode);
+	(void) cdevsw_remove(audit_sdev_major, &audit_sdev_cdevsw);
+
+	return (KERN_SUCCESS);
+}
+*/
+
 #else
 
 int
@@ -1358,4 +2047,12 @@ audit_session_join(proc_t p, struct audit_session_join_args *uap,
 	return (ENOSYS);
 }
 
+int
+audit_session_port(proc_t p, struct audit_session_port_args *uap, int *retval)
+{
+#pragma unused(p, uap, retval)
+
+	return (ENOSYS);
+}
+
 #endif /* CONFIG_AUDIT */
diff --git a/bsd/security/audit/audit_syscalls.c b/bsd/security/audit/audit_syscalls.c
index 0ad24367a..43d93bdda 100644
--- a/bsd/security/audit/audit_syscalls.c
+++ b/bsd/security/audit/audit_syscalls.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 1999-2009, Apple Inc.
+ * Copyright (c) 1999-2010, Apple Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -246,7 +246,7 @@ int
 auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 {
 	kauth_cred_t scred;
-	int error;
+	int error = 0;
 	union auditon_udata udata;
 	proc_t tp = PROC_NULL;
 	struct auditinfo_addr aia;
@@ -288,6 +288,8 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 	case A_GETPINFO_ADDR:
 	case A_SENDTRIGGER:
 	case A_GETSINFO_ADDR:
+	case A_GETSFLAGS:
+	case A_SETSFLAGS:
 		error = copyin(uap->data, (void *)&udata, uap->length);
 		if (error)
 			return (error);
@@ -296,33 +298,45 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 		break;
 	}
 
+	/* Check appropriate privilege. */
+	switch (uap->cmd) {
 	/*
 	 * A_GETSINFO doesn't require priviledge but only superuser  
 	 * gets to see the audit masks. 
 	 */
-	error = suser(kauth_cred_get(), &p->p_acflag);
-	if (A_GETSINFO_ADDR == uap->cmd) {
+	case A_GETSINFO_ADDR:
 		if ((sizeof(udata.au_kau_info) != uap->length) ||
 	   		(audit_session_lookup(udata.au_kau_info.ai_asid,
 					      &udata.au_kau_info) != 0))
-			return (EINVAL);
-		if (error) {
+			error = EINVAL;
+		else if (!kauth_cred_issuser(kauth_cred_get())) {
 			udata.au_kau_info.ai_mask.am_success = ~0;
 			udata.au_kau_info.ai_mask.am_failure = ~0;
 		}
-	} else
-		if (error)
-			return (error);
+		break;
+	case A_GETSFLAGS:
+	case A_SETSFLAGS:
+		/* Getting one's own audit session flags requires no
+		 * privilege.  Setting the flags is subject to access
+		 * control implemented in audit_session_setaia().
+		 */
+		break;
+	default:
+		error = suser(kauth_cred_get(), &p->p_acflag);
+		break;
+	}
+	if (error)
+		return (error);
 
 	/*
 	 * XXX Need to implement these commands by accessing the global
 	 * values associated with the commands.
 	 */
-	mtx_lock(&audit_mtx);
 	switch (uap->cmd) {
 	case A_OLDGETPOLICY:
 	case A_GETPOLICY:
 		if (sizeof(udata.au_policy64) == uap->length) {
+			mtx_lock(&audit_mtx);
 			if (!audit_fail_stop)
 				udata.au_policy64 |= AUDIT_CNT;
 			if (audit_panic_on_write_fail)
@@ -331,12 +345,12 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 				udata.au_policy64 |= AUDIT_ARGV;
 			if (audit_arge)
 				udata.au_policy64 |= AUDIT_ARGE;
+			mtx_unlock(&audit_mtx);
 			break;
 		}
-		if (sizeof(udata.au_policy) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_policy) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		if (!audit_fail_stop)
 			udata.au_policy |= AUDIT_CNT;
 		if (audit_panic_on_write_fail)
@@ -345,60 +359,61 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 			udata.au_policy |= AUDIT_ARGV;
 		if (audit_arge)
 			udata.au_policy |= AUDIT_ARGE;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_OLDSETPOLICY:
 	case A_SETPOLICY:
 		if (sizeof(udata.au_policy64) == uap->length) {
 			if (udata.au_policy64 & ~(AUDIT_CNT|AUDIT_AHLT|
-				AUDIT_ARGV|AUDIT_ARGE)) {
-				mtx_unlock(&audit_mtx);
+				AUDIT_ARGV|AUDIT_ARGE))
 				return (EINVAL);
-			}
+			mtx_lock(&audit_mtx);
 			audit_fail_stop = ((udata.au_policy64 & AUDIT_CNT) ==
 			    0);
 			audit_panic_on_write_fail = (udata.au_policy64 &
 			    AUDIT_AHLT);
 			audit_argv = (udata.au_policy64 & AUDIT_ARGV);
 			audit_arge = (udata.au_policy64 & AUDIT_ARGE);
-
+			mtx_unlock(&audit_mtx);
 			break;
 		}	
 		if ((sizeof(udata.au_policy) != uap->length) ||
 		    (udata.au_policy & ~(AUDIT_CNT|AUDIT_AHLT|AUDIT_ARGV|
-					 AUDIT_ARGE))) {
-			mtx_unlock(&audit_mtx);
+					 AUDIT_ARGE)))
 			return (EINVAL);
-		}
 		/*
 		 * XXX - Need to wake up waiters if the policy relaxes?
 		 */
+		mtx_lock(&audit_mtx);
 		audit_fail_stop = ((udata.au_policy & AUDIT_CNT) == 0);
 		audit_panic_on_write_fail = (udata.au_policy & AUDIT_AHLT);
 		audit_argv = (udata.au_policy & AUDIT_ARGV);
 		audit_arge = (udata.au_policy & AUDIT_ARGE);
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_GETKMASK:
-		if (sizeof(udata.au_mask) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_mask) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		udata.au_mask = audit_nae_mask;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_SETKMASK:
-		if (sizeof(udata.au_mask) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_mask) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		audit_nae_mask = udata.au_mask;
 		AUDIT_CHECK_IF_KEVENTS_MASK(audit_nae_mask);
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_OLDGETQCTRL:
 	case A_GETQCTRL:
 		if (sizeof(udata.au_qctrl64) == uap->length) {
+			mtx_lock(&audit_mtx);
 			udata.au_qctrl64.aq64_hiwater =
 			    (u_int64_t)audit_qctrl.aq_hiwater;
 			udata.au_qctrl64.aq64_lowater =
@@ -409,13 +424,14 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 			    (u_int64_t)audit_qctrl.aq_delay;
 			udata.au_qctrl64.aq64_minfree = 
 			    (int64_t)audit_qctrl.aq_minfree;
+			mtx_unlock(&audit_mtx);
 			break;
 		} 
-		if (sizeof(udata.au_qctrl) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_qctrl) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		udata.au_qctrl = audit_qctrl;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_OLDSETQCTRL:
@@ -426,10 +442,9 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 			      udata.au_qctrl64.aq64_hiwater) ||
 			     (udata.au_qctrl64.aq64_bufsz > AQ_MAXBUFSZ) ||
 			     (udata.au_qctrl64.aq64_minfree < 0) ||
-			     (udata.au_qctrl64.aq64_minfree > 100)) {
-				mtx_unlock(&audit_mtx);
+			     (udata.au_qctrl64.aq64_minfree > 100))
 				return (EINVAL);
-			}
+			mtx_lock(&audit_mtx);
 			audit_qctrl.aq_hiwater =
 			     (int)udata.au_qctrl64.aq64_hiwater;
 			audit_qctrl.aq_lowater =
@@ -439,77 +454,67 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 			audit_qctrl.aq_minfree = 
 			    (int)udata.au_qctrl64.aq64_minfree;
 			audit_qctrl.aq_delay = -1;  /* Not used. */
-
-			 break;
+			mtx_unlock(&audit_mtx);
+			break;
 		}
 		if ((sizeof(udata.au_qctrl) != uap->length) ||
 		    (udata.au_qctrl.aq_hiwater > AQ_MAXHIGH) ||
 		    (udata.au_qctrl.aq_lowater >= udata.au_qctrl.aq_hiwater) ||
 		    (udata.au_qctrl.aq_bufsz > AQ_MAXBUFSZ) ||
 		    (udata.au_qctrl.aq_minfree < 0) ||
-		    (udata.au_qctrl.aq_minfree > 100)) {
-			mtx_unlock(&audit_mtx);
+		    (udata.au_qctrl.aq_minfree > 100))
 			return (EINVAL);
-		}
 
+		mtx_lock(&audit_mtx);
 		audit_qctrl = udata.au_qctrl;
 		/* XXX The queue delay value isn't used with the kernel. */
 		audit_qctrl.aq_delay = -1;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_GETCWD:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_GETCAR:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_GETSTAT:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_SETSTAT:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_SETUMASK:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_SETSMASK:
-		mtx_unlock(&audit_mtx);
 		return (ENOSYS);
-		break;
 
 	case A_OLDGETCOND:
 	case A_GETCOND:
 		if (sizeof(udata.au_cond64) == uap->length) {
+			mtx_lock(&audit_mtx);
 			if (audit_enabled && !audit_suspended)
 				udata.au_cond64 = AUC_AUDITING;
 			else
 				udata.au_cond64 = AUC_NOAUDIT;
-
+			mtx_unlock(&audit_mtx);
 			break;
 		}
-		if (sizeof(udata.au_cond) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_cond) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		if (audit_enabled && !audit_suspended)
 			udata.au_cond = AUC_AUDITING;
 		else
 			udata.au_cond = AUC_NOAUDIT;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_OLDSETCOND:
 	case A_SETCOND:
 		if (sizeof(udata.au_cond64) == uap->length) {
+			mtx_lock(&audit_mtx);
 			if (udata.au_cond64 == AUC_NOAUDIT)
 				audit_suspended = 1;
 			if (udata.au_cond64 == AUC_AUDITING)
@@ -518,14 +523,15 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 				audit_suspended = 1;
 				mtx_unlock(&audit_mtx);
 				audit_shutdown();
-				mtx_lock(&audit_mtx);
+				break;
 			}
+			mtx_unlock(&audit_mtx);
 			break;
 		}
 		if (sizeof(udata.au_cond) != uap->length) {
-			mtx_unlock(&audit_mtx);
 			return (EINVAL);
 		}
+		mtx_lock(&audit_mtx);
 		if (udata.au_cond == AUC_NOAUDIT)
 			audit_suspended = 1;
 		if (udata.au_cond == AUC_AUDITING)
@@ -534,40 +540,32 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 			audit_suspended = 1;
 			mtx_unlock(&audit_mtx);
 			audit_shutdown();
-			mtx_lock(&audit_mtx);
+			break;
 		}
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_GETCLASS:
-		if (sizeof(udata.au_evclass) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_evclass) != uap->length)
 			return (EINVAL);
-		}
 		udata.au_evclass.ec_class = au_event_class(
 		    udata.au_evclass.ec_number);
 		break;
 
 	case A_SETCLASS:
-		if (sizeof(udata.au_evclass) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_evclass) != uap->length)
 			return (EINVAL);
-		}
 		au_evclassmap_insert(udata.au_evclass.ec_number,
 		    udata.au_evclass.ec_class);
 		break;
 
 	case A_GETPINFO:
 		if ((sizeof(udata.au_aupinfo) != uap->length) ||
-		    IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid)) {
-			mtx_unlock(&audit_mtx);
+		    IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid))
 			return (EINVAL);
-		}
-		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) {
-			mtx_unlock(&audit_mtx);
+		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL)
 			return (ESRCH);
-		}
 
-		mtx_unlock(&audit_mtx);
 		scred = kauth_cred_proc_ref(tp);
 		if (scred->cr_audit.as_aia_p->ai_termid.at_type == AU_IPv6) {
 			kauth_cred_unref(&scred);
@@ -590,19 +588,14 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 		kauth_cred_unref(&scred);
 		proc_rele(tp);
 		tp = PROC_NULL;
-		mtx_lock(&audit_mtx);
 		break;
 
 	case A_SETPMASK:
 		if ((sizeof(udata.au_aupinfo) != uap->length) ||
-		    IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid)) {
-			mtx_unlock(&audit_mtx);
+		    IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid))
 			return (EINVAL);
-		}
-		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) {
-			mtx_unlock(&audit_mtx);
+		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL)
 			return (ESRCH);
-		}
 		scred = kauth_cred_proc_ref(tp);
 		bcopy(scred->cr_audit.as_aia_p, &aia, sizeof(aia));
 		kauth_cred_unref(&scred);
@@ -611,44 +604,38 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 		aia.ai_mask.am_failure =
 		    udata.au_aupinfo.ap_mask.am_failure;
 		AUDIT_CHECK_IF_KEVENTS_MASK(aia.ai_mask);
-		error = audit_session_setaia(tp, &aia, 0);
-		mtx_unlock(&audit_mtx);
+		error = audit_session_setaia(tp, &aia);
 		proc_rele(tp);
 		tp = PROC_NULL;
 		if (error)
 			return (error);
-		mtx_lock(&audit_mtx);
 		break;
 
 	case A_SETFSIZE:
 		if ((sizeof(udata.au_fstat) != uap->length) ||
 		    ((udata.au_fstat.af_filesz != 0) &&
-		     (udata.au_fstat.af_filesz < MIN_AUDIT_FILE_SIZE))) {
-			mtx_unlock(&audit_mtx);
+		     (udata.au_fstat.af_filesz < MIN_AUDIT_FILE_SIZE)))
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		audit_fstat.af_filesz = udata.au_fstat.af_filesz;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_GETFSIZE:
-		if (sizeof(udata.au_fstat) != uap->length) {
-			mtx_unlock(&audit_mtx);
+		if (sizeof(udata.au_fstat) != uap->length)
 			return (EINVAL);
-		}
+		mtx_lock(&audit_mtx);
 		udata.au_fstat.af_filesz = audit_fstat.af_filesz;
 		udata.au_fstat.af_currsz = audit_fstat.af_currsz;
+		mtx_unlock(&audit_mtx);
 		break;
 
 	case A_GETPINFO_ADDR:
 		if ((sizeof(udata.au_aupinfo_addr) != uap->length) ||
-		    IS_NOT_VALID_PID(udata.au_aupinfo_addr.ap_pid)) {
-			mtx_unlock(&audit_mtx);
+		    IS_NOT_VALID_PID(udata.au_aupinfo_addr.ap_pid))
 			return (EINVAL);
-		}
-		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) {
-			mtx_unlock(&audit_mtx);
+		if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL)
 			return (ESRCH);
-		}
 		WARN_IF_AINFO_ADDR_CHANGED(uap->length,
 		    sizeof(auditpinfo_addr_t), "auditon(A_GETPINFO_ADDR,...)",
 		    "auditpinfo_addr_t");
@@ -672,41 +659,48 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 		break;
 
 	case A_GETKAUDIT:
-		mtx_unlock(&audit_mtx);
 		if (sizeof(udata.au_kau_info) != uap->length) 
 			return (EINVAL);
 		audit_get_kinfo(&udata.au_kau_info);
-		mtx_lock(&audit_mtx);
 		break;
 
 	case A_SETKAUDIT:
 		if ((sizeof(udata.au_kau_info) != uap->length) ||
 		    (udata.au_kau_info.ai_termid.at_type != AU_IPv4 &&
-		    udata.au_kau_info.ai_termid.at_type != AU_IPv6)) {
-			mtx_unlock(&audit_mtx);
+		    udata.au_kau_info.ai_termid.at_type != AU_IPv6))
 			return (EINVAL);
-		}
-		mtx_unlock(&audit_mtx);
 		audit_set_kinfo(&udata.au_kau_info);
-		mtx_lock(&audit_mtx);
 		break;
 
 	case A_SENDTRIGGER:
 		if ((sizeof(udata.au_trigger) != uap->length) || 
 		    (udata.au_trigger < AUDIT_TRIGGER_MIN) ||
-		    (udata.au_trigger > AUDIT_TRIGGER_MAX)) {
-			mtx_unlock(&audit_mtx);
+		    (udata.au_trigger > AUDIT_TRIGGER_MAX))
 			return (EINVAL);
-		}
-		mtx_unlock(&audit_mtx);
 		return (audit_send_trigger(udata.au_trigger));
 
 	case A_GETSINFO_ADDR:
 		/* Handled above before switch(). */
 		break;
 
+	case A_GETSFLAGS:
+		if (sizeof(udata.au_flags) != uap->length)
+			return (EINVAL);
+		bcopy(&(kauth_cred_get()->cr_audit.as_aia_p->ai_flags),
+		    &udata.au_flags, sizeof(udata.au_flags));
+		break;
+
+	case A_SETSFLAGS:
+		if (sizeof(udata.au_flags) != uap->length)
+			return (EINVAL);
+		bcopy(kauth_cred_get()->cr_audit.as_aia_p, &aia, sizeof(aia));
+		aia.ai_flags = udata.au_flags;
+		error = audit_session_setaia(p, &aia);
+		if (error)
+			return (error);
+		break;
+
 	default:
-		mtx_unlock(&audit_mtx);
 		return (EINVAL);
 	}
 
@@ -730,15 +724,13 @@ auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval)
 	case A_GETPINFO_ADDR:
 	case A_GETKAUDIT:
 	case A_GETSINFO_ADDR:
+	case A_GETSFLAGS:
 		error = copyout((void *)&udata, uap->data, uap->length);
-		if (error) {
-			mtx_unlock(&audit_mtx);
+		if (error)
 			return (ENOSYS);
-		}
 		break;
 	}
 
-	mtx_unlock(&audit_mtx);
 	return (0);
 }
 
@@ -803,7 +795,7 @@ setauid(proc_t p, struct setauid_args *uap, __unused int32_t *retval)
 	bcopy(&scred->cr_audit.as_mask, &aia.ai_mask, sizeof(au_mask_t));
 	kauth_cred_unref(&scred);
 	aia.ai_auid = id;
-	error = audit_session_setaia(p, &aia, 0);
+	error = audit_session_setaia(p, &aia);
 
 	return (error);
 }
@@ -917,7 +909,7 @@ setaudit(proc_t p, struct setaudit_args *uap, __unused int32_t *retval)
 	newaia.ai_termid.at_port = ai.ai_termid.port;
 	newaia.ai_termid.at_type = AU_IPv4;
 
-	error = audit_session_setaia(p, &newaia, 0);
+	error = audit_session_setaia(p, &newaia);
 	if (error)
 		return (error);
 
@@ -1007,7 +999,7 @@ setaudit_addr(proc_t p, struct setaudit_addr_args *uap,
 	if (aia.ai_asid == AU_DEFAUDITSID)
 		aia.ai_asid = AU_ASSIGN_ASID;
 
-	error = audit_session_setaia(p, &aia, 0);
+	error = audit_session_setaia(p, &aia);
 	if (error)
 		return (error);
 
@@ -1053,7 +1045,7 @@ auditctl(proc_t p, struct auditctl_args *uap, __unused int32_t *retval)
 	if (uap->path == USER_ADDR_NULL)
 		return (EINVAL);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 	    (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 :
 	    UIO_USERSPACE32), uap->path, vfs_context_current());
 	error = vn_open(&nd, AUDIT_OPEN_FLAGS, 0);
diff --git a/bsd/security/audit/audit_worker.c b/bsd/security/audit/audit_worker.c
index d307a7eb9..d9ef366a2 100644
--- a/bsd/security/audit/audit_worker.c
+++ b/bsd/security/audit/audit_worker.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 1999-2008 Apple Inc.
+ * Copyright (c) 1999-2010 Apple Inc.
  * Copyright (c) 2006-2008 Robert N. M. Watson
  * All rights reserved.
  *
@@ -203,16 +203,11 @@ audit_record_write(struct vnode *vp, struct vfs_context *ctx, void *data,
 	 */
 	if (audit_qctrl.aq_minfree != 0) {
 		temp = mnt_stat->f_blocks / (100 / audit_qctrl.aq_minfree);
-		if (mnt_stat->f_bfree < temp) {
-			if (ppsratecheck(&last_lowspace_trigger,
-			    &cur_lowspace_trigger, 1)) {
+		if (mnt_stat->f_bfree < temp &&
+		    ppsratecheck(&last_lowspace_trigger,
+		    &cur_lowspace_trigger, 1))
 				(void)audit_send_trigger(
 				    AUDIT_TRIGGER_LOW_SPACE);
-				printf("Warning: audit space low (< %d%% free)"
-				    "on audit log file-system\n",
-				    audit_qctrl.aq_minfree);
-			}
-		}
 	}
 
 	/*
@@ -358,7 +353,8 @@ audit_worker_process_record(struct kaudit_record *ar)
 
 	if (!(ar->k_ar_commit & AR_COMMIT_KERNEL) ||
 	    ((ar->k_ar_commit & AR_PRESELECT_PIPE) == 0 &&
-	    (ar->k_ar_commit & AR_PRESELECT_TRAIL) == 0))
+	    (ar->k_ar_commit & AR_PRESELECT_TRAIL) == 0 &&
+	    (ar->k_ar_commit & AR_PRESELECT_FILTER) == 0))
 		goto out;
 
 	auid = ar->k_ar.ar_subj_auid;
@@ -395,6 +391,16 @@ audit_worker_process_record(struct kaudit_record *ar)
 		    ar->k_ar_commit & AR_PRESELECT_TRAIL, bsm->data,
 		    bsm->len);
 
+	if (ar->k_ar_commit & AR_PRESELECT_FILTER) {
+
+		/*
+		 *  XXXss - This needs to be generalized so new filters can
+		 *  be easily plugged in.
+		 */
+		audit_sdev_submit(auid, ar->k_ar.ar_subj_asid, bsm->data,
+		    bsm->len);
+	}
+
 	kau_free(bsm);
 out:
 	if (trail_locked)
@@ -417,7 +423,9 @@ audit_worker(void)
 	struct kaudit_record *ar;
 	int lowater_signal;
 
-	audit_ctx.vc_thread = current_thread();
+	if (audit_ctx.vc_thread == NULL)
+		audit_ctx.vc_thread = current_thread();
+
 	TAILQ_INIT(&ar_worklist);
 	mtx_lock(&audit_mtx);
 	while (1) {
@@ -427,7 +435,8 @@ audit_worker(void)
 		 * Wait for a record.
 		 */
 		while (TAILQ_EMPTY(&audit_q))
-			cv_wait(&audit_worker_cv, &audit_mtx);
+			cv_wait_continuation(&audit_worker_cv, &audit_mtx,
+			    (thread_continue_t)audit_worker);
 
 		/*
 		 * If there are records in the global audit record queue,
diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile
index f74bfe8d0..53f457741 100644
--- a/bsd/sys/Makefile
+++ b/bsd/sys/Makefile
@@ -7,25 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-ALLPRODUCTS = AppleTV iPhone MacOSX
-PRODUCT = $(shell tconf --product)
-EXTRAUNIFDEF = $(foreach x,$(ALLPRODUCTS),$(if $(findstring $(PRODUCT),$(x)),-DPRODUCT_$(x),-UPRODUCT_$(x)))
-SINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
-SPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
-KINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
-KPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS = \
-
-INSTINC_SUBDIRS_PPC = \
-
-INSTINC_SUBDIRS_I386 = \
-
-EXPINC_SUBDIRS = \
-
-EXPINC_SUBDIRS_PPC = \
-
-EXPINC_SUBDIRS_I386 = \
+EXPINC_SUBDIRS =
 
 # Installs header file for user level -  
 #	  $(DSTROOT)/System/Library/Frameworks/System.framework/Headers
@@ -55,17 +39,22 @@ DATAFILES = \
 #	  $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders
 PRIVATE_DATAFILES = \
 	codesign.h \
+	content_protection.h \
 	disklabel.h \
+	fileport.h \
 	fsctl.h \
 	fsgetpath.h \
 	fslog.h \
+	imgsrc.h \
 	ipcs.h \
 	shm_internal.h \
 	spawn_internal.h \
 	tree.h \
 	ux_exception.h \
 	proc_info.h \
-	vnioctl.h
+	process_policy.h \
+	vnioctl.h \
+	priv.h
 
 # Installs header file for kernel extensions - 
 #	  $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers
@@ -97,10 +86,13 @@ KERNELFILES = \
 # Installs header file for Apple internal use for kernel extensions - 
 #	  $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders
 PRIVATE_KERNELFILES = \
+	codesign.h \
 	cprotect.h \
+	content_protection.h \
 	decmpfs.h \
 	disktab.h \
 	fbt.h \
+	fileport.h \
 	fsctl.h \
 	fslog.h \
 	mach_swapon.h \
@@ -115,19 +107,20 @@ PRIVATE_KERNELFILES = \
 	user.h \
 	vfs_context.h \
 	vmmeter.h \
-	spawn_internal.h
+	spawn_internal.h \
+	priv.h
 
 
 # /System/Library/Frameworks/System.framework/Headers and /usr/include
 INSTALL_MI_LIST	= ${DATAFILES}
 
-INSTALL_MI_GEN_LIST = syscall.h
+INSTALL_MI_GEN_LIST = syscall.h _posix_availability.h _symbol_aliasing.h
 
 INSTALL_MI_DIR = sys
 
-EXPORT_MI_LIST	= ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h filedesc.h pipe.h resourcevar.h semaphore.h \
+EXPORT_MI_LIST	= ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h pthread_internal.h filedesc.h pipe.h resourcevar.h semaphore.h \
 								vnode_internal.h proc_internal.h file_internal.h mount_internal.h \
-								uio_internal.h
+								uio_internal.h tree.h
 
 EXPORT_MI_GEN_LIST = syscall.h sysproto.h
 
@@ -156,6 +149,16 @@ sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS)
 	@echo "Generating bsd/sys/$@ from $<";
 	$(_v)$(MAKESYSCALLS) $< proto > /dev/null
 
+MAKE_POSIX_AVAILABILITY = $(SRCROOT)/bsd/sys/make_posix_availability.sh
+_posix_availability.h: $(MAKE_POSIX_AVAILABILITY)
+	@echo "Generating bsd/sys/$@"
+	$(_v)$(MAKE_POSIX_AVAILABILITY) $@
+
+MAKE_SYMBOL_ALIASING = $(SRCROOT)/bsd/sys/make_symbol_aliasing.sh
+_symbol_aliasing.h: $(MAKE_SYMBOL_ALIASING)
+	@echo "Generating bsd/sys/$@"
+	$(_v)$(MAKE_SYMBOL_ALIASING) $@
+
 include $(MakeInc_rule)
 include $(MakeInc_dir)
 
diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h
index 5f85717d7..42a8b7673 100644
--- a/bsd/sys/attr.h
+++ b/bsd/sys/attr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -49,6 +49,12 @@
 /* The following option only valid when requesting ATTR_CMN_RETURNED_ATTRS */
 #define FSOPT_PACK_INVAL_ATTRS	0x00000008
 
+#ifdef PRIVATE
+#define FSOPT_EXCHANGE_DATA_ONLY 0x0000010
+#endif
+
+
+
 /* we currently aren't anywhere near this amount for a valid
  * fssearchblock.sizeofsearchparams1 or fssearchblock.sizeofsearchparams2
  * but we put a sanity check in to avoid abuse of the value passed in from
@@ -208,6 +214,12 @@ typedef struct vol_capabilities_attr {
  * 
  * VOL_CAP_FMT_DECMPFS_COMPRESSION: When set, the volume supports transparent
  * decompression of compressed files using decmpfs.
+ *
+ * VOL_CAP_FMT_64BIT_OBJECT_IDS: When set, the volume uses object IDs that
+ * are 64-bit. This means that ATTR_CMN_FILEID and ATTR_CMN_PARENTID are the
+ * only legitimate attributes for obtaining object IDs from this volume and the
+ * 32-bit fid_objno fields of the fsobj_id_t returned by ATTR_CMN_OBJID,
+ * ATTR_CMN_OBJPERMID, and ATTR_CMN_PAROBJID are undefined.
  */
 #define VOL_CAP_FMT_PERSISTENTOBJECTIDS		0x00000001
 #define VOL_CAP_FMT_SYMBOLICLINKS 		0x00000002
@@ -225,7 +237,8 @@ typedef struct vol_capabilities_attr {
 #define VOL_CAP_FMT_HIDDEN_FILES		0x00002000
 #define VOL_CAP_FMT_PATH_FROM_ID		0x00004000
 #define VOL_CAP_FMT_NO_VOLUME_SIZES		0x00008000
-#define VOL_CAP_FMT_DECMPFS_COMPRESSION	0x00010000
+#define VOL_CAP_FMT_DECMPFS_COMPRESSION		0x00010000
+#define VOL_CAP_FMT_64BIT_OBJECT_IDS		0x00020000
 
 
 /*
@@ -338,13 +351,15 @@ typedef struct vol_attributes_attr {
 #define ATTR_CMN_FILEID				0x02000000
 #define ATTR_CMN_PARENTID			0x04000000
 #define ATTR_CMN_FULLPATH			0x08000000
+#define ATTR_CMN_ADDEDTIME			0x10000000
+
 /*
  * ATTR_CMN_RETURNED_ATTRS is only valid with getattrlist(2).
  * It is always the first attribute in the return buffer.
  */
 #define ATTR_CMN_RETURNED_ATTRS			0x80000000
 
-#define ATTR_CMN_VALIDMASK			0x8FE7FFFF
+#define ATTR_CMN_VALIDMASK			0x9FE7FFFF
 #define ATTR_CMN_SETMASK			0x01C7FF00
 #define ATTR_CMN_VOLSETMASK			0x00006700
 
@@ -378,7 +393,9 @@ typedef struct vol_attributes_attr {
 #define ATTR_DIR_LINKCOUNT			0x00000001
 #define ATTR_DIR_ENTRYCOUNT			0x00000002
 #define ATTR_DIR_MOUNTSTATUS			0x00000004
-#define DIR_MNTSTATUS_MNTPOINT		0x00000001
+/* ATTR_DIR_MOUNTSTATUS Flags: */
+#define	  DIR_MNTSTATUS_MNTPOINT		0x00000001
+#define	  DIR_MNTSTATUS_TRIGGER			0x00000002
 
 #define ATTR_DIR_VALIDMASK			0x00000007
 #define ATTR_DIR_SETMASK			0x00000000
@@ -394,11 +411,9 @@ typedef struct vol_attributes_attr {
 #define ATTR_FILE_DATAALLOCSIZE			0x00000400
 #define ATTR_FILE_RSRCLENGTH			0x00001000
 #define ATTR_FILE_RSRCALLOCSIZE			0x00002000
-/* Only used when CONFIG_PROTECT is ON */
-#define ATTR_FILE_PROTECTION_CLASS			0x00004000
 
-#define ATTR_FILE_VALIDMASK			0x000077FF
-#define ATTR_FILE_SETMASK			0x00004020
+#define ATTR_FILE_VALIDMASK			0x000037FF
+#define ATTR_FILE_SETMASK			0x00000020
 
 #define ATTR_FORK_TOTALSIZE			0x00000001
 #define ATTR_FORK_ALLOCSIZE			0x00000002
diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h
index 80bf3d384..f1d7f924b 100644
--- a/bsd/sys/buf.h
+++ b/bsd/sys/buf.h
@@ -90,6 +90,7 @@
 #define B_PASSIVE	0x00000800	/* PASSIVE I/Os are ignored by THROTTLE I/O */
 #define	B_IOSTREAMING	0x00001000	/* sequential access pattern detected */
 #define B_THROTTLED_IO	0x00002000	/* low priority I/O */
+#define B_ENCRYPTED_IO	0x00004000	/* Encrypted I/O */
 /*
  * make sure to check when adding flags that
  * that the new flags don't overlap the definitions
@@ -121,6 +122,8 @@ void	buf_markinvalid(buf_t);
  */
 void	buf_markdelayed(buf_t);
 
+void	buf_markclean(buf_t);
+
 /*!
  @function buf_markeintr
  @abstract Mark a buffer as having been interrupted during I/O.
@@ -634,6 +637,32 @@ errno_t	buf_setupl(buf_t, upl_t, uint32_t);
  */
 buf_t	buf_clone(buf_t, int, int, void (*)(buf_t, void *), void *);
 
+
+/*!
+ @function buf_create_shadow
+ @abstract Create a shadow buffer with optional private storage and an optional callback.
+ @param bp Buffer to shadow.
+ @param force_copy If TRUE, do not link the shadaow to 'bp' and if 'external_storage' == NULL,
+ force a copy of the data associated with 'bp'.
+ @param external_storage If non-NULL, associate it with the new buffer as its storage instead of the 
+ storage currently associated with 'bp'.
+ @param iodone Callback to be called from buf_biodone() when I/O completes, in the sense of buf_setcallback().
+ @param arg Argument to pass to iodone() callback.
+ @return NULL if the buffer to be shadowed is not B_META or a primary buffer (i.e. not a shadow buffer); otherwise, the new buffer.
+*/
+
+buf_t	buf_create_shadow(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg);
+
+
+/*!
+ @function buf_shadow
+ @abstract returns true if 'bp' is a shadow of another buffer.
+ @param bp Buffer to query.
+ @return 1 if 'bp' is a shadow, 0 otherwise.
+*/
+int	buf_shadow(buf_t bp);
+
+
 /*!
  @function buf_alloc
  @abstract Allocate an uninitialized buffer.
@@ -659,6 +688,7 @@ void	buf_free(buf_t);
  */
 #define	BUF_WRITE_DATA	0x0001		/* write data blocks first */
 #define	BUF_SKIP_META	0x0002		/* skip over metadata blocks */
+#define BUF_INVALIDATE_LOCKED	0x0004	/* force B_LOCKED blocks to be invalidated */
 
 /*!
  @function buf_invalidateblks
@@ -966,8 +996,38 @@ buf_t	buf_getblk(vnode_t, daddr64_t, int, int, int, int);
  @return Always returns a new buffer.
  */
 buf_t	buf_geteblk(int);
+
+/*!
+ @function buf_clear_redundancy_flags
+ @abstract Clear flags on a buffer.
+ @discussion: buffer_redundancy_flags &= ~flags
+ @param bp Buffer whose flags to clear.
+ @param flags Flags to remove from buffer's mask
+ @return void.
+ */
+void	buf_clear_redundancy_flags(buf_t, uint32_t);
+
+/*!
+ @function buf_redundancyflags
+ @abstract Get redundancy flags set on a buffer.
+ @param bp Buffer whose redundancy flags to grab.
+ @return flags.
+ */
+uint32_t	buf_redundancy_flags(buf_t);
+
+/*!
+ @function buf_setredundancyflags
+ @abstract Set redundancy flags on a buffer.
+ @discussion: buffer_redundancy_flags |= flags
+ @param bp Buffer whose flags to set.
+ @param flags Flags to add to buffer's redundancy flags
+ @return void.
+ */
+void	buf_set_redundancy_flags(buf_t, uint32_t);
+
 #ifdef KERNEL_PRIVATE
-void	buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void **, void **);
+void	buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void (**)(buf_t, void *), void **);
+
 
 /*!
  @function buf_getcpaddr
diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h
index 5718861f6..d80eb21c8 100644
--- a/bsd/sys/buf_internal.h
+++ b/bsd/sys/buf_internal.h
@@ -115,7 +115,16 @@ struct buf {
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
+
+	/* store extra information related to redundancy of data, such as
+	 * which redundancy copy to use, etc
+	 */
+	uint32_t b_redundancy_flags;
+
 	proc_t 	b_proc;			/* Associated proc; NULL if kernel. */
+#ifdef BUF_MAKE_PRIVATE
+	buf_t   b_data_store;
+#endif
 #if CONFIG_PROTECT
 	struct cprotect *b_cpentry; 	/* address of cp_entry, to be passed further down  */
 #endif /* CONFIG_PROTECT */
@@ -131,6 +140,12 @@ struct buf {
 
 /* cluster_io definitions for use with io bufs */
 #define b_uploffset  b_bufsize
+#define b_orig	     b_freelist.tqe_prev
+#define b_shadow     b_freelist.tqe_next
+#define	b_shadow_ref b_validoff
+#ifdef BUF_MAKE_PRIVATE
+#define b_data_ref   b_validend
+#endif
 #define b_trans_head b_freelist.tqe_prev
 #define b_trans_next b_freelist.tqe_next
 #define b_iostate    b_rcred
@@ -143,20 +158,25 @@ struct buf {
 #define	BL_BUSY		0x00000001	/* I/O in progress. */
 #define	BL_WANTED	0x00000002	/* Process wants this buffer. */
 #define BL_IOBUF	0x00000004	/* buffer allocated via 'buf_alloc' */
-#define BL_CALLDONE	0x00000008	/* callback routine on B_CALL bp has completed */
 #define BL_WANTDEALLOC	0x00000010	/* buffer should be put on empty list when clean */
+#define BL_SHADOW	0x00000020
+#define BL_EXTERNAL	0x00000040
+#define BL_WAITSHADOW	0x00000080
+#define BL_IOBUF_ALLOC	0x00000100
 
 /*
  * Parameters for buffer cache garbage collection 
  */
 #define BUF_STALE_THRESHHOLD 	30	/* Collect if untouched in the last 30 seconds */
-#define BUF_MAX_GC_COUNT	1000	/* Generally 6-8 MB */
+#define BUF_MAX_GC_COUNT	1024	/* Generally 6-8 MB */
+#define BUF_MAX_GC_BATCH_SIZE	128	/* Under a single grab of the lock */
 
 /*
  * mask used by buf_flags... these are the readable external flags
  */
 #define BUF_X_RDFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\
-		       B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING | B_THROTTLED_IO)
+		       B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING | B_THROTTLED_IO |\
+		       B_ENCRYPTED_IO)
 /*
  * mask used by buf_clearflags/buf_setflags... these are the writable external flags
  */
@@ -189,11 +209,10 @@ struct buf {
 /*
  * private flags used by by the cluster layer
  */
-#define B_NEED_IODONE   0x20000000	/* need biodone on the real_bp associated with a cluster_io */
+#define B_TWANTED	0x20000000	/* but_t that is part of a cluster level transaction is wanted */
 #define B_COMMIT_UPL    0x40000000	/* commit/abort the UPL on I/O success/failure */
 #define B_TDONE		0x80000000	/* buf_t that is part of a cluster level transaction has completed */
 
-
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
@@ -222,6 +241,8 @@ extern struct buf *buf_headers;		/* The buffer headers. */
 
 __BEGIN_DECLS
 
+buf_t	buf_create_shadow_priv(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg);
+
 buf_t	alloc_io_buf(vnode_t, int);
 void	free_io_buf(buf_t);
 
@@ -239,8 +260,6 @@ void	bufinit(void) __attribute__((section("__TEXT, initcode")));
 void	buf_list_lock(void);
 void	buf_list_unlock(void);
 
-void	buf_biowait_callback(buf_t);
-
 void	cluster_init(void) __attribute__((section("__TEXT, initcode")));
 void	buf_drop(buf_t);
 errno_t	buf_acquire(buf_t, int, int, int);
@@ -248,6 +267,9 @@ errno_t	buf_acquire(buf_t, int, int, int);
 int	count_busy_buffers(void);
 int	count_lock_queue(void);
 
+#ifdef BUF_MAKE_PRIVATE
+errno_t	buf_make_private(buf_t bp);
+#endif
 
 __END_DECLS
 
diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h
index 59e922bea..7076ef572 100644
--- a/bsd/sys/cdefs.h
+++ b/bsd/sys/cdefs.h
@@ -164,6 +164,12 @@
 #define	__unused
 #endif
 
+#if defined(__GNUC__) && __GNUC__ >= 4
+#define __used __attribute__((__used__))
+#else
+#define __used
+#endif
+
 /*
  * GCC 2.95 provides `__restrict' as an extension to C90 to support the
  * C99-specific `restrict' type qualifier.  We happen to use `__restrict' as
@@ -196,7 +202,7 @@
 #define __scanflike(fmtarg, firstvararg)
 #endif
 
-#define __IDSTRING(name,string) static const char name[] __unused = string
+#define __IDSTRING(name,string) static const char name[] __used = string
 
 #ifndef __COPYRIGHT
 #define __COPYRIGHT(s) __IDSTRING(copyright,s)
@@ -215,7 +221,7 @@
 #endif
 
 /*
- * COMPILATION ENVIRONMENTS
+ * COMPILATION ENVIRONMENTS -- see compat(5) for additional detail
  *
  * DEFAULT	By default newly complied code will get POSIX APIs plus
  *		Apple API extensions in scope.
@@ -259,24 +265,24 @@
 #define	__DARWIN_SUF_DARWIN10	"_darwin10"
 #define	__DARWIN10_ALIAS(sym)	__asm("_" __STRING(sym) __DARWIN_SUF_DARWIN10)
 #else /* !KERNEL */
-#ifdef PRODUCT_AppleTV
-/* Product: AppleTV */
+#ifdef PLATFORM_iPhoneOS
+/* Platform: iPhoneOS */
 #define __DARWIN_ONLY_64_BIT_INO_T	1
 #define __DARWIN_ONLY_UNIX_CONFORMANCE	1
 #define __DARWIN_ONLY_VERS_1050		1
-#endif /* PRODUCT_AppleTV */
-#ifdef PRODUCT_iPhone
-/* Product: iPhone */
+#endif /* PLATFORM_iPhoneOS */
+#ifdef PLATFORM_iPhoneSimulator
+/* Platform: iPhoneSimulator */
 #define __DARWIN_ONLY_64_BIT_INO_T	1
 #define __DARWIN_ONLY_UNIX_CONFORMANCE	1
 #define __DARWIN_ONLY_VERS_1050		1
-#endif /* PRODUCT_iPhone */
-#ifdef PRODUCT_MacOSX
-/* Product: MacOSX */
+#endif /* PLATFORM_iPhoneSimulator */
+#ifdef PLATFORM_MacOSX
+/* Platform: MacOSX */
 #define __DARWIN_ONLY_64_BIT_INO_T	0
 /* #undef __DARWIN_ONLY_UNIX_CONFORMANCE (automatically set for 64-bit) */
 #define __DARWIN_ONLY_VERS_1050		0
-#endif /* PRODUCT_MacOSX */
+#endif /* PLATFORM_MacOSX */
 #endif /* KERNEL */
 
 /*
@@ -313,6 +319,8 @@
 #      error "Can't define _NONSTD_SOURCE when only UNIX conformance is available."
 #    endif /* _NONSTD_SOURCE */
 #    define __DARWIN_UNIX03	1
+#  elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1040)
+#    define __DARWIN_UNIX03	0
 #  elif defined(_DARWIN_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)
 #    if defined(_NONSTD_SOURCE)
 #      error "Can't define both _NONSTD_SOURCE and any of _DARWIN_C_SOURCE, _XOPEN_SOURCE or _POSIX_C_SOURCE."
@@ -438,13 +446,19 @@
 /*
  * symbol release macros
  */
-#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1060)
-#undef __DARWIN_10_6_AND_LATER
-#define __DARWIN_10_6_AND_LATER_ALIAS(x)	/* nothing */
-#else /* 10.6 and beyond */
-#define __DARWIN_10_6_AND_LATER
-#define __DARWIN_10_6_AND_LATER_ALIAS(x)	x
+#ifdef KERNEL
+#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)
+#else
+#include <sys/_symbol_aliasing.h>
+
+#if defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
+#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   __DARWIN_ALIAS_STARTING_IPHONE_##_iphone(x)
+#elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
+#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)   __DARWIN_ALIAS_STARTING_MAC_##_mac(x)
+#else
+#define __DARWIN_ALIAS_STARTING(_mac, _iphone, x)
 #endif
+#endif /* KERNEL */
 
 
 /*
@@ -460,6 +474,7 @@
  *  _POSIX_C_SOURCE == 199506L		1003.1c-1995, 1003.1i-1995,
  *					and the omnibus ISO/IEC 9945-1: 1996
  *  _POSIX_C_SOURCE == 200112L		1003.1-2001
+ *  _POSIX_C_SOURCE == 200809L		1003.1-2008
  *
  * In addition, the X/Open Portability Guide, which is now the Single UNIX
  * Specification, defines a feature-test macro which indicates the version of
@@ -480,10 +495,13 @@
 
 /* Deal with various X/Open Portability Guides and Single UNIX Spec. */
 #ifdef _XOPEN_SOURCE
-#if _XOPEN_SOURCE - 0L >= 600L
+#if _XOPEN_SOURCE - 0L >= 700L && (!defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE - 0L < 200809L)
+#undef _POSIX_C_SOURCE
+#define _POSIX_C_SOURCE         200809L
+#elif _XOPEN_SOURCE - 0L >= 600L && (!defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE - 0L < 200112L)
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		200112L
-#elif _XOPEN_SOURCE - 0L >= 500L
+#elif _XOPEN_SOURCE - 0L >= 500L && (!defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE - 0L < 199506L)
 #undef _POSIX_C_SOURCE
 #define	_POSIX_C_SOURCE		199506L
 #endif
@@ -497,6 +515,44 @@
 #define _POSIX_C_SOURCE         198808L
 #endif
 
+/*
+ * Deprecation macro
+ */
+#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+#define __deprecated __attribute__((deprecated))
+#define __unavailable __attribute__((unavailable))
+#else
+#define __deprecated /* nothing */
+#define __unavailable /* nothing */
+#endif
+
+/* POSIX C deprecation macros */
+#ifdef KERNEL
+#define __POSIX_C_DEPRECATED(ver)
+#else
+#include <sys/_posix_availability.h>
+
+#define __POSIX_C_DEPRECATED(ver) ___POSIX_C_DEPRECATED_STARTING_##ver
+#endif
+
+/*
+ * Set a single macro which will always be defined and can be used to determine
+ * the appropriate namespace.  For POSIX, these values will correspond to
+ * _POSIX_C_SOURCE value.  Currently there are two additional levels corresponding
+ * to ANSI (_ANSI_SOURCE) and Darwin extensions (_DARWIN_C_SOURCE)
+ */
+#define __DARWIN_C_ANSI         010000L
+#define __DARWIN_C_FULL         900000L
+
+#if   defined(_ANSI_SOURCE)
+#define __DARWIN_C_LEVEL        __DARWIN_C_ANSI
+#elif defined(_POSIX_C_SOURCE) && !defined(_DARWIN_C_SOURCE) && !defined(_NONSTD_SOURCE)
+#define __DARWIN_C_LEVEL        _POSIX_C_SOURCE
+#else
+#define __DARWIN_C_LEVEL        __DARWIN_C_FULL
+#endif
+
+
 /*
  * long long is not supported in c89 (__STRICT_ANSI__), but g++ -ansi and
  * c99 still want long longs.  While not perfect, we allow long longs for
@@ -512,22 +568,7 @@
  * long doubles.  This applies only to ppc; i386 already has long double
  * support, while ppc64 doesn't have any backwards history.
  */
-#if   defined(__ppc__)
-#  if defined(__LDBL_MANT_DIG__) && defined(__DBL_MANT_DIG__) && \
-	__LDBL_MANT_DIG__ > __DBL_MANT_DIG__
-#    if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0 < 1040
-#      define	__DARWIN_LDBL_COMPAT(x)	__asm("_" __STRING(x) "$LDBLStub")
-#    else
-#      define	__DARWIN_LDBL_COMPAT(x)	__asm("_" __STRING(x) "$LDBL128")
-#    endif
-#    define	__DARWIN_LDBL_COMPAT2(x) __asm("_" __STRING(x) "$LDBL128")
-#    define	__DARWIN_LONG_DOUBLE_IS_DOUBLE	0
-#  else
-#   define	__DARWIN_LDBL_COMPAT(x) /* nothing */
-#   define	__DARWIN_LDBL_COMPAT2(x) /* nothing */
-#   define	__DARWIN_LONG_DOUBLE_IS_DOUBLE	1
-#  endif
-#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__)
+#if   defined(__i386__) || defined(__x86_64__)
 #  define	__DARWIN_LDBL_COMPAT(x)	/* nothing */
 #  define	__DARWIN_LDBL_COMPAT2(x) /* nothing */
 #  define	__DARWIN_LONG_DOUBLE_IS_DOUBLE	0
@@ -535,15 +576,6 @@
 #  error Unknown architecture
 #endif
 
-/*
- * Deprecation macro
- */
-#if __GNUC__ >= 3
-#define __deprecated __attribute__((deprecated))
-#else
-#define __deprecated /* nothing */
-#endif
-
 /*****************************************
  *  Public darwin-specific feature macros
  *****************************************/
@@ -605,7 +637,7 @@
  * catastrophic run-time failures.
  */
 #ifndef __CAST_AWAY_QUALIFIER
-#define __CAST_AWAY_QUALIFIER(variable, qualifier, type)  (type) ((char *)0 + ((qualifier char *)(variable) - (qualifier char *)0) ) 
+#define __CAST_AWAY_QUALIFIER(variable, qualifier, type)  (type) (long)(variable)
 #endif
 
 #endif /* !_CDEFS_H_ */
diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h
index 56ae21668..e72c25044 100644
--- a/bsd/sys/codesign.h
+++ b/bsd/sys/codesign.h
@@ -38,6 +38,7 @@
 #define CS_EXEC_SET_HARD	0x1000	/* set CS_HARD on any exec'ed process */
 #define CS_EXEC_SET_KILL	0x2000	/* set CS_KILL on any exec'ed process */
 #define CS_KILLED		0x10000	/* was killed by kernel for invalidity */
+#define CS_RESTRICT		0x20000 /* tell dyld to treat restricted */
 
 /* csops  operations */
 #define	CS_OPS_STATUS		0	/* return status */
@@ -47,6 +48,8 @@
 #define	CS_OPS_PIDPATH		4	/* get executable's pathname */
 #define	CS_OPS_CDHASH		5	/* get code directory hash */
 #define CS_OPS_PIDOFFSET	6	/* get offset of active Mach-o slice */
+#define CS_OPS_ENTITLEMENTS_BLOB 7	/* get entitlements blob */
+#define CS_OPS_MARKRESTRICT	8	/* set RESTRICT flag (sticky) */
 
 #ifndef KERNEL
 
diff --git a/bsd/sys/conf.h b/bsd/sys/conf.h
index 4cf53a914..39e4fef37 100644
--- a/bsd/sys/conf.h
+++ b/bsd/sys/conf.h
@@ -71,6 +71,8 @@
 
 #include <sys/appleapiopts.h>
 #include <sys/cdefs.h>
+#include <sys/queue.h>
+#include <stdint.h>
 
 /*
  * Definitions of device driver entry switches
@@ -194,10 +196,24 @@ struct cdevsw {
 	int			d_type;
 };
 
+#ifdef BSD_KERNEL_PRIVATE
+void devsw_init(void);
+
+extern uint64_t cdevsw_flags[];
+#define CDEVSW_SELECT_KQUEUE	0x01
+#define CDEVSW_USE_OFFSET	0x02
+
+struct thread;
+
+typedef struct devsw_lock {
+	TAILQ_ENTRY(devsw_lock) 	dl_list;
+	struct thread			*dl_thread;
+	dev_t				dl_dev;
+	int 				dl_mode;
+} *devsw_lock_t;
+
+#endif /* BSD_KERNEL_PRIVATE */
 
-#ifdef KERNEL_PRIVATE
-extern struct cdevsw cdevsw[];
-#endif /* KERNEL_PRIVATE */
 
 /*
  * Contents of empty cdevsw slot.
@@ -276,6 +292,16 @@ extern struct swdevt swdevt[];
  *  else -1
  */
 __BEGIN_DECLS
+#ifdef KERNEL_PRIVATE
+extern struct cdevsw cdevsw[];
+extern int cdevsw_setkqueueok(int, struct cdevsw*, int);
+#endif /* KERNEL_PRIVATE */
+
+#ifdef BSD_KERNEL_PRIVATE
+extern void devsw_lock(dev_t, int);
+extern void devsw_unlock(dev_t, int);
+#endif /* BSD_KERNEL_PRIVATE */
+
 int  bdevsw_isfree(int);
 int  bdevsw_add(int, struct bdevsw *);
 int  bdevsw_remove(int, struct bdevsw *);
diff --git a/osfmk/ppc/Performance.h b/bsd/sys/content_protection.h
similarity index 71%
rename from osfmk/ppc/Performance.h
rename to bsd/sys/content_protection.h
index 4442d603e..a4066e184 100644
--- a/osfmk/ppc/Performance.h
+++ b/bsd/sys/content_protection.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,21 +25,23 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- *		Keep special performance related stuff in here
- */
  
-#define PERF_HIST		0
-#define PMIHIST_SIZE	0x00400000
-#define perfClear		0
-#define perfStart		1
-#define perfStop		2
-#define perfMap			3
+#ifndef _SYS_CONTENT_PROTECTION_H_
+#define _SYS_CONTENT_PROTECTION_H_
 
-#ifndef ASSEMBLER
+#ifdef PRIVATE
+
+/* 
+ * Protection classes vary in their restrictions on read/writability.  A is generally
+ * the strictest, and D is effectively no restriction.
+ */
+#define PROTECTION_CLASS_A 1
+#define PROTECTION_CLASS_B 2
+#define PROTECTION_CLASS_C 3
+#define PROTECTION_CLASS_D 4
+#define PROTECTION_CLASS_E 5
+#define PROTECTION_CLASS_F 6
 
-extern unsigned int PMIhist;
-extern unsigned int PMIhistV;
-extern unsigned int PerfCtl(unsigned int cmd, unsigned int parm0);
+#endif /* PRIVATE */
 
-#endif /* !ASSEMBLER */
+#endif /* _SYS_CONTENT_PROTECTION_H_ */
diff --git a/bsd/sys/cprotect.h b/bsd/sys/cprotect.h
index 2edf9aed2..bebe3bb43 100644
--- a/bsd/sys/cprotect.h
+++ b/bsd/sys/cprotect.h
@@ -36,77 +36,114 @@ extern "C" {
 #if KERNEL_PRIVATE
 
 #include <sys/cdefs.h>
+#include <sys/content_protection.h>
 #include <sys/kernel_types.h>
 
-#define PROTECTION_CLASS_A 1
-#define PROTECTION_CLASS_B 2
-#define PROTECTION_CLASS_C 3
-#define PROTECTION_CLASS_D 4
-#define PROTECTION_CLASS_E 5
+#define CP_KEYSIZE 32				/* 8x4 = 32, 32x8 = 256 */
+#define CP_WRAPPEDKEYSIZE  40		/* 2x4 = 8, 8x8 = 64 */
 
-#define KEYSIZE 8				/* 8x4 = 32, 32x8 = 256 */
-#define INTEGRITYSIZE 2			/* 2x4 = 8, 8x8 = 64 */
+/* lock events from AppleKeyStore */
+#define CP_LOCKED_STATE 0		/* Device is locked */
+#define CP_UNLOCKED_STATE 1		/* Device is unlocked */
 
-#define LOCKED_STATE 0
-#define UNLOCKED_STATE 1
+#define CP_LOCKED_KEYCHAIN 0	
+#define CP_UNLOCKED_KEYCHAIN 1
 
-#define LOCKED_KEYCHAIN 0
-#define UNLOCKED_KEYCHAIN 1
+/* For struct cprotect: cp_flags */
+#define CP_NEEDS_KEYS		0x1		/* File needs persistent keys */
+#define CP_KEY_FLUSHED		0x2		/* File's unwrapped key has been purged from memory */
+#define CP_NO_XATTR			0x4		/* Key info has not been saved as EA to the FS */
 
-#define CONTENT_PROTECTION_XATTR_NAME	"com.apple.system.cprotect"
+/* Content Protection VNOP Operation flags */
+#define CP_READ_ACCESS 	0x1
+#define CP_WRITE_ACCESS 0x2
 
-#define kEMBCKeyHandleSpecial	~1
+#define CONTENT_PROTECTION_XATTR_NAME	"com.apple.system.cprotect"
+#define CP_CURRENT_MAJOR_VERS 2
+#define CP_CURRENT_MINOR_VERS 0
 
-/* SLIST_HEAD(cp_list, cp_entry) cp_head = LIST_HEAD_INITIALIZER(cp_head); */
-/* struct cp_list *cprotect_list_headp;                 /\* List head *\/ */
 
 typedef struct cprotect *cprotect_t;
 typedef struct cp_wrap_func *cp_wrap_func_t;
 typedef struct cp_global_state *cp_global_state_t;
 typedef struct cp_xattr *cp_xattr_t;
 
+typedef struct cnode * cnode_ptr_t;
+//forward declare the struct.
+struct hfsmount;
 
-typedef int wrapper_t(uint32_t properties, void *key_bytes, size_t key_length, void **wrapped_data, uint32_t *wrapped_length);
-typedef	int unwrapper_t(uint32_t properties, void *wrapped_data, size_t wrapped_data_length, void **key_bytes, uint32_t *key_length);
+/* The wrappers are invoked by the AKS kext */
+typedef int wrapper_t(uint32_t properties, void *key_bytes, size_t key_length, void *wrapped_data, size_t *wrapped_length);
+typedef	int unwrapper_t(uint32_t properties, void *wrapped_data, size_t wrapped_data_length, void *key_bytes, size_t *key_length);
 
+/* 
+ * Runtime-only structure containing the content protection status 
+ * for the given file.  This is contained within the cnode 
+ */
 struct cprotect {
-	uint32_t cache_key[KEYSIZE];
-	uint32_t special_data;
-	uint32_t pclass;
-	uint8_t cache_key_flushed;
-	uint8_t lock_state;			/* lock_state: 0 means unlocked. 1 means locked */
-};
-
-struct cp_entry {
-    SLIST_ENTRY(cp_entry) cp_list;
-	struct cprotect *protected_entry;
+	uint8_t		cp_cache_key[CP_KEYSIZE];
+	uint8_t		cp_persistent_key[CP_WRAPPEDKEYSIZE];
+	uint32_t	cp_flags;
+	uint32_t	cp_pclass;
 };
 
 struct cp_wrap_func {
-	wrapper_t *wrapper;
-	unwrapper_t *unwrapper;
+	wrapper_t	*wrapper;
+	unwrapper_t	*unwrapper;
 };
 
 struct cp_global_state {
+	uint8_t	wrap_functions_set;
 	uint8_t lock_state;
-	uint8_t wrap_functions_set;
 };
 
+/*
+ * On-disk structure written as the per-file EA payload 
+ * All on-disk multi-byte fields for the CP XATTR must be stored
+ * little-endian on-disk.  This means they must be endian swapped to
+ * L.E on getxattr() and converted to LE on setxattr().	
+ */
 struct cp_xattr {
-	uint32_t persistent_class;
-	uint8_t persistent_key[32];
-	uint8_t persistent_integrity[8];
-	uint8_t xattr_version;
+	u_int16_t	xattr_major_version;
+	u_int16_t	xattr_minor_version;
+	u_int32_t	flags;
+	u_int32_t	persistent_class;
+	u_int32_t	key_size;
+	uint8_t		persistent_key[CP_WRAPPEDKEYSIZE];	
 };
 
-int cp_create_init(vnode_t, vfs_context_t);
+/* Same is true for the root EA, all fields must be written little endian. */
+struct cp_root_xattr {
+	u_int16_t major_version;
+	u_int16_t minor_version;
+	u_int64_t flags;
+	u_int32_t reserved1;
+	u_int32_t reserved2;
+	u_int32_t reserved3;
+	u_int32_t reserved4;
+};
+
+
+/* 
+ * Functions to check the status of a CP and to query 
+ * the containing filesystem to see if it is supported.
+ */
+int cp_vnode_getclass(vnode_t, int *);
+int cp_vnode_setclass(vnode_t, uint32_t);
+
 int cp_key_store_action(int);
 int cp_register_wraps(cp_wrap_func_t);
-struct cprotect *cp_vnode_entry_alloc(void);
-void cp_vnode_entry_init(vnode_t);
-int cp_vnode_entry_init_needed(vnode_t);
-struct cp_xattr * cp_vn_getxattr(vnode_t, vfs_context_t);
-int cp_vn_setxattr(vnode_t, uint32_t, vfs_context_t);
+
+int cp_entry_init(cnode_ptr_t, struct mount *);
+int cp_entry_create_keys(cnode_ptr_t);
+void cp_entry_destroy(cnode_ptr_t);
+
+cnode_ptr_t cp_get_protected_cnode(vnode_t);
+int cp_handle_vnop(cnode_ptr_t, int);
+int cp_fs_protected (mount_t);
+int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr);
+int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr);
+int cp_handle_relocate (cnode_ptr_t cp);
 
 #endif	/* KERNEL_PRIVATE */
 
diff --git a/bsd/sys/decmpfs.h b/bsd/sys/decmpfs.h
index 72e99ee18..f8a61d288 100644
--- a/bsd/sys/decmpfs.h
+++ b/bsd/sys/decmpfs.h
@@ -84,7 +84,7 @@ typedef struct decmpfs_cnode {
 	uint32_t cmp_type;
 	uint32_t lockcount;
 	void    *lockowner;              /* cnode's lock owner (if a thread is currently holding an exclusive lock) */
-    uint64_t uncompressed_size;
+    uint64_t uncompressed_size __attribute__((aligned(8)));
     lck_rw_t compressed_data_lock;
 #if !DECMPFS_SUPPORTS_SWAP64
     /* we need a lock since we can't atomically fetch/set 64 bits */
diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h
index 0232617ca..745aa6710 100644
--- a/bsd/sys/disk.h
+++ b/bsd/sys/disk.h
@@ -161,12 +161,27 @@ typedef struct
 
 #ifdef KERNEL
 #define DK_FEATURE_FORCE_UNIT_ACCESS          0x00000001
+
+typedef struct
+{
+    uint64_t               offset;
+    uint64_t               length;
+
+    uint8_t                reserved0128[12];       /* reserved, clear to zero */
+
+    dev_t                  dev;
+} dk_physical_extent_t;
+
 #define DKIOCGETBLOCKCOUNT32                  _IOR('d', 25, uint32_t)
 #define DKIOCSETBLOCKSIZE                     _IOW('d', 24, uint32_t)
 #define DKIOCGETBSDUNIT                       _IOR('d', 27, uint32_t)
-#define DKIOCISSOLIDSTATE		      _IOR('d', 79, uint32_t)
+#define DKIOCISSOLIDSTATE                     _IOR('d', 79, uint32_t)
 #define DKIOCISVIRTUAL                        _IOR('d', 72, uint32_t)
 #define DKIOCGETBASE                          _IOR('d', 73, uint64_t)
+#define DKIOCGETTHROTTLEMASK                  _IOR('d', 80, uint64_t)
+#define DKIOCLOCKPHYSICALEXTENTS              _IO('d', 81)
+#define DKIOCGETPHYSICALEXTENT                _IOWR('d', 82, dk_physical_extent_t)
+#define DKIOCUNLOCKPHYSICALEXTENTS            _IO('d', 83)
 #endif /* KERNEL */
 
 #endif	/* _SYS_DISK_H_ */
diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h
index 6d9c6976a..d81a48a4f 100644
--- a/bsd/sys/dtrace.h
+++ b/bsd/sys/dtrace.h
@@ -119,6 +119,8 @@ typedef int64_t		hrtime_t;
 
 typedef enum { B_FALSE = 0, B_TRUE = 1 } _dtrace_boolean;
 
+typedef uint8_t UUID[16]; /* For modctl use in dtrace.h */
+
 struct modctl; /* In lieu of Solaris <sys/modctl.h> */
 /* NOTHING */  /* In lieu of Solaris <sys/processor.h> */
 #include <sys/ioctl.h> /* In lieu of Solaris <sys/systm.h> */
@@ -508,6 +510,15 @@ typedef struct dtrace_difv {
 #define DTRACEACT_RAISE                 (DTRACEACT_PROC_DESTRUCTIVE + 2)
 #define DTRACEACT_SYSTEM                (DTRACEACT_PROC_DESTRUCTIVE + 3)
 #define DTRACEACT_FREOPEN               (DTRACEACT_PROC_DESTRUCTIVE + 4)
+	
+#if defined(__APPLE__)
+/*
+ * Dtrace stop() will task_suspend the currently running process.
+ * Dtrace pidresume(pid) will task_resume it.
+ */
+	
+#define DTRACEACT_PIDRESUME		(DTRACEACT_PROC_DESTRUCTIVE + 50)
+#endif /* __APPLE__ */	
 
 #define DTRACEACT_PROC_CONTROL          0x0300
 
@@ -1340,6 +1351,34 @@ typedef struct dtrace_providerdesc {
 #define DTRACEIOC_FORMAT        (DTRACEIOC | 16)        /* get format str */
 #define DTRACEIOC_DOFGET        (DTRACEIOC | 17)        /* get DOF */
 #define DTRACEIOC_REPLICATE     (DTRACEIOC | 18)        /* replicate enab */
+#define DTRACEIOC_MODUUIDSLIST	(DTRACEIOC | 30)	/* APPLE ONLY, query for modules with missing symbols */
+#define DTRACEIOC_PROVMODSYMS	(DTRACEIOC | 31)	/* APPLE ONLY, provide missing symbols for a given module */
+	
+/*
+ * The following structs are used to provide symbol information to the kernel from userspace.
+ */
+	
+typedef struct dtrace_symbol {
+	uint64_t	dtsym_addr;			/* address of the symbol */
+	uint64_t	dtsym_size;			/* size of the symbol, must be uint64_t to maintain alignment when called by 64b uproc in i386 kernel */
+	char 		dtsym_name[DTRACE_FUNCNAMELEN];	/* symbol name */
+} dtrace_symbol_t;
+
+typedef struct dtrace_module_symbols {
+	UUID		dtmodsyms_uuid;
+	uint64_t	dtmodsyms_count;
+	dtrace_symbol_t	dtmodsyms_symbols[1];
+} dtrace_module_symbols_t;
+	
+#define DTRACE_MODULE_SYMBOLS_SIZE(count) (sizeof(dtrace_module_symbols_t) + ((count - 1) * sizeof(dtrace_symbol_t)))
+		
+typedef struct dtrace_module_uuids_list {
+	uint64_t	dtmul_count;
+	UUID		dtmul_uuid[1];
+} dtrace_module_uuids_list_t;
+		
+#define DTRACE_MODULE_UUIDS_LIST_SIZE(count) (sizeof(dtrace_module_uuids_list_t) + ((count - 1) * sizeof(UUID)))
+
 #endif /* __APPLE__ */
 
 /*
@@ -1566,7 +1605,7 @@ typedef struct dof_ioctl_data {
  *   dtps_provide_module(); see "Arguments and Notes" for dtrace_register(),
  *   below.
  *
- * 1.4  void dtps_enable(void *arg, dtrace_id_t id, void *parg)
+ * 1.4  int dtps_enable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.4.1  Overview
  *
@@ -1587,7 +1626,8 @@ typedef struct dof_ioctl_data {
  *
  * 1.4.3  Return value
  *
- *   None.
+ *   On success, dtps_enable() should return 0. On failure, -1 should be
+ *   returned.
  *
  * 1.4.4  Caller's context
  *
@@ -2141,7 +2181,7 @@ typedef struct dof_ioctl_data {
 typedef struct dtrace_pops {
         void (*dtps_provide)(void *arg, const dtrace_probedesc_t *spec);
         void (*dtps_provide_module)(void *arg, struct modctl *mp);
-        void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg);
+        int (*dtps_enable)(void *arg, dtrace_id_t id, void *parg);
         void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg);
         void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg);
         void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg);
@@ -2357,10 +2397,7 @@ struct regs;
 extern int (*dtrace_pid_probe_ptr)(struct regs *);
 extern int (*dtrace_return_probe_ptr)(struct regs *);
 #else
-#if defined (__ppc__) || defined (__ppc64__)
-extern int (*dtrace_pid_probe_ptr)(ppc_saved_state_t *regs);
-extern int (*dtrace_return_probe_ptr)(ppc_saved_state_t* regs);
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 extern int (*dtrace_pid_probe_ptr)(x86_saved_state_t *regs);
 extern int (*dtrace_return_probe_ptr)(x86_saved_state_t* regs);
 #else
@@ -2382,8 +2419,13 @@ extern void dtrace_membar_producer(void);
 extern void dtrace_membar_consumer(void);
 
 extern void (*dtrace_cpu_init)(processorid_t);
+#if !defined(__APPLE__)
 extern void (*dtrace_modload)(struct modctl *);
 extern void (*dtrace_modunload)(struct modctl *);
+#else
+extern int (*dtrace_modload)(struct kmod_info *);
+extern int (*dtrace_modunload)(struct kmod_info *);
+#endif /* __APPLE__ */
 extern void (*dtrace_helpers_cleanup)(proc_t*);
 extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
 extern void (*dtrace_cpustart_init)(void);
@@ -2427,14 +2469,11 @@ extern int dtrace_instr_size(uchar_t *instr);
 extern int dtrace_instr_size_isa(uchar_t *, model_t, int *);
 extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t));
 extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t));
-extern void dtrace_invop_callsite(void);
+extern void *dtrace_invop_callsite_pre;
+extern void *dtrace_invop_callsite_post;
 #endif
 
     
-#if defined (__ppc__) || defined (__ppc64__)
-extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t));
-extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t));
-#endif
 #undef proc_t
 #endif /* __APPLE__ */
 
@@ -2472,13 +2511,6 @@ extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t));
 
 #endif
 
-#if defined (__ppc__) || defined (__ppc64__)
-#define DTRACE_INVOP_NOP                4
-#define DTRACE_INVOP_RET                5
-#define DTRACE_INVOP_BCTR               6
-#define DTRACE_INVOP_TAILJUMP           7
-#endif
-
 
 #endif /* __APPLE__ */
 
diff --git a/bsd/sys/dtrace_glue.h b/bsd/sys/dtrace_glue.h
index 5612fe80c..6b3665b02 100644
--- a/bsd/sys/dtrace_glue.h
+++ b/bsd/sys/dtrace_glue.h
@@ -43,6 +43,10 @@
 #include <mach/kmod.h>
 #include <libkern/OSAtomic.h>
 
+#if defined(__i386__) || defined(__x86_64__)
+#include <i386/mp.h>
+#endif
+
 /*
  * cmn_err
  */
@@ -100,17 +104,17 @@ extern lck_mtx_t mod_lock;
 /*
  * Per-CPU data.
  */
-typedef struct cpu {
+typedef struct dtrace_cpu {
 	processorid_t   cpu_id;                    /* CPU number */
-	struct cpu      *cpu_next;                 /* next existing CPU */
+	struct dtrace_cpu *cpu_next;                 /* next existing CPU */
 	lck_rw_t        cpu_ft_lock;               /* DTrace: fasttrap lock */
 	uintptr_t       cpu_dtrace_caller;         /* DTrace: caller, if any */
 	hrtime_t        cpu_dtrace_chillmark;      /* DTrace: chill mark time */
 	hrtime_t        cpu_dtrace_chilled;        /* DTrace: total chill time */
 	boolean_t       cpu_dtrace_invop_underway; /* DTrace gaurds against invalid op re-entrancy */
-} cpu_t;
+} dtrace_cpu_t;
 
-extern cpu_t *cpu_list;
+extern dtrace_cpu_t *cpu_list;
 
 /*
  * The cpu_core structure consists of per-CPU state available in any context.
@@ -130,7 +134,8 @@ typedef struct cpu_core {
 } cpu_core_t;
 
 extern cpu_core_t *cpu_core;
-extern unsigned int real_ncpus;
+
+
 extern int cpu_number(void); /* From #include <kern/cpu_number.h>. Called from probe context, must blacklist. */
 
 #define	CPU		(&(cpu_list[cpu_number()]))	/* Pointer to current CPU */
@@ -187,6 +192,55 @@ extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *);
 				CPU_DTRACE_BADSTACK)
 #define	CPU_DTRACE_ERROR	(CPU_DTRACE_FAULT | CPU_DTRACE_DROP)
 
+/*
+ * Loadable Modules
+ */
+
+/* Keep the compiler happy */
+struct dtrace_module_symbols;
+
+/* Solaris' modctl structure, greatly simplified, shadowing parts of xnu kmod structure. */
+typedef struct modctl {
+	struct modctl	*mod_next;
+	struct modctl	*mod_stale;     // stale module chain
+	uint32_t	mod_id;		// the kext unique identifier
+	char		mod_modname[KMOD_MAX_NAME];
+	int		mod_loadcnt;
+	char		mod_loaded;
+	char		mod_flags;	// See flags below
+	int		mod_nenabled;	// # of enabled DTrace probes in module
+	vm_address_t	mod_address;	// starting address (of Mach-o header blob)
+	vm_size_t	mod_size;	// total size (of blob)
+	UUID		mod_uuid;
+	struct dtrace_module_symbols* mod_user_symbols;
+} modctl_t;
+
+/* Definitions for mod_flags */
+#define MODCTL_IS_MACH_KERNEL			0x01 // This module represents /mach_kernel
+#define MODCTL_HAS_KERNEL_SYMBOLS		0x02 // Kernel symbols (nlist) are available
+#define MODCTL_FBT_PROBES_PROVIDED      	0x04 // fbt probes have been provided
+#define MODCTL_FBT_INVALID			0x08 // Module is invalid for fbt probes
+#define MODCTL_SDT_PROBES_PROVIDED		0x10 // sdt probes have been provided
+#define MODCTL_SDT_INVALID			0x20 // Module is invalid for sdt probes
+#define MODCTL_HAS_UUID				0x40 // Module has UUID
+
+/* Simple/singular mod_flags accessors */
+#define MOD_IS_MACH_KERNEL(mod)			(mod->mod_flags & MODCTL_IS_MACH_KERNEL)
+#define MOD_HAS_KERNEL_SYMBOLS(mod)		(mod->mod_flags & MODCTL_HAS_KERNEL_SYMBOLS)
+#define MOD_HAS_USERSPACE_SYMBOLS(mod)		(mod->mod_user_symbols) /* No point in duplicating state in the flags bits */
+#define MOD_FBT_PROBES_PROVIDED(mod)   		(mod->mod_flags & MODCTL_FBT_PROBES_PROVIDED)
+#define MOD_FBT_INVALID(mod)			(mod->mod_flags & MODCTL_FBT_INVALID)
+#define MOD_SDT_PROBES_PROVIDED(mod)   		(mod->mod_flags & MODCTL_SDT_PROBES_PROVIDED)
+#define MOD_SDT_INVALID(mod)			(mod->mod_flags & MODCTL_SDT_INVALID)
+#define MOD_HAS_UUID(mod)			(mod->mod_flags & MODCTL_HAS_UUID)
+
+/* Compound accessors */
+#define MOD_FBT_DONE(mod)			(MOD_FBT_PROBES_PROVIDED(mod) || MOD_FBT_INVALID(mod))
+#define MOD_SDT_DONE(mod)			(MOD_SDT_PROBES_PROVIDED(mod) || MOD_SDT_INVALID(mod))
+#define MOD_SYMBOLS_DONE(mod)			(MOD_FBT_DONE(mod) && MOD_SDT_DONE(mod))
+
+extern modctl_t *dtrace_modctl_list;
+
 /*
  * cred_t
  */
@@ -244,8 +298,8 @@ typedef struct cyc_handler {
 } cyc_handler_t;
 
 typedef struct cyc_omni_handler {
-	void (*cyo_online)(void *, cpu_t *, cyc_handler_t *, cyc_time_t *);
-	void (*cyo_offline)(void *, cpu_t *, void *);
+	void (*cyo_online)(void *, dtrace_cpu_t *, cyc_handler_t *, cyc_time_t *);
+	void (*cyo_offline)(void *, dtrace_cpu_t *, void *);
 	void *cyo_arg;
 } cyc_omni_handler_t;
 
@@ -389,25 +443,6 @@ extern void kmem_cache_destroy(kmem_cache_t *);
 
 typedef struct _kthread kthread_t; /* For dtrace_vtime_switch(), dtrace_panicked and dtrace_errthread */
 
-/*
- * Loadable Modules
- */
-
-#if 0 /* kmod_lock has been removed */
-decl_simple_lock_data(extern,kmod_lock)
-#endif /* 0 */
-
-/* Want to use Darwin's kmod_info in place of the Solaris modctl.
-   Can't typedef since the (many) usages in the code are "struct modctl *" */
-extern kmod_info_t *kmod;
-#define modctl kmod_info
-
-#define mod_modname name
-#define mod_loadcnt id
-#define mod_next next
-#define mod_loaded info_version /* XXX Is always > 0, hence TRUE */
-#define modules kmod
-
 /*
  * proc
  */
@@ -472,15 +507,6 @@ static inline void atomic_add_64( uint64_t *theValue, int64_t theAmount )
 {
 	(void)OSAddAtomic64( theAmount, (SInt64 *)theValue );
 }
-#elif defined(__ppc__)
-static inline void atomic_add_64( uint64_t *theValue, int64_t theAmount )
-{
-	// FIXME
-	// atomic_add_64() is at present only called from fasttrap.c to increment
-	// or decrement a 64bit counter. Narrow to 32bits since ppc32 (G4) has
-	// no convenient 64bit atomic op.
-	(void)OSAddAtomic( (int32_t)theAmount, &(((SInt32 *)theValue)[1]));
-}
 #endif
 
 /*
diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h
index 4ef2ef655..7f42cff5e 100644
--- a/bsd/sys/dtrace_impl.h
+++ b/bsd/sys/dtrace_impl.h
@@ -1008,6 +1008,45 @@ typedef enum dtrace_activity {
 #define DTRACE_DOF_MODE_LAZY_ON		1
 #define DTRACE_DOF_MODE_LAZY_OFF	2
 #define DTRACE_DOF_MODE_NON_LAZY	3
+
+/*
+ * dtrace kernel symbol modes are used to control when the kernel may dispose of
+ * symbol information used by the fbt/sdt provider. The kernel itself, as well as
+ * every kext, has symbol table/nlist info that has historically been preserved
+ * for dtrace's use. This allowed dtrace to be lazy about allocating fbt/sdt probes,
+ * at the expense of keeping the symbol info in the kernel permanently.
+ *
+ * Starting in 10.7+, fbt probes may be created from userspace, in the same
+ * fashion as pid probes. The kernel allows dtrace "first right of refusal"
+ * whenever symbol data becomes available (such as a kext load). If dtrace is
+ * active, it will immediately read/copy the needed data, and then the kernel
+ * may free it. If dtrace is not active, it returns immediately, having done
+ * no work or allocations, and the symbol data is freed. Should dtrace need
+ * this data later, it is expected that the userspace client will push the
+ * data into the kernel via ioctl calls.
+ *
+ * The kernel symbol modes are used to control what dtrace does with symbol data:
+ *
+ * DTRACE_KERNEL_SYMBOLS_NEVER			Effectively disables fbt/sdt
+ * DTRACE_KERNEL_SYMBOLS_FROM_KERNEL		Immediately read/copy symbol data
+ * DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE		Wait for symbols from userspace
+ * DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL	Immediately read/copy symbol data
+ *
+ * It is legal to transition between DTRACE_KERNEL_SYMBOLS_FROM_KERNEL and 
+ * DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE. The DTRACE_KERNEL_SYMBOLS_NEVER and
+ * DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL are permanent modes, intended to
+ * disable fbt probes entirely, or prevent any symbols being loaded from
+ * userspace.
+*
+ * The kernel symbol mode is kept in dtrace_kernel_symbol_mode, which is protected
+ * by the dtrace_lock.
+ */
+
+#define DTRACE_KERNEL_SYMBOLS_NEVER 			0
+#define DTRACE_KERNEL_SYMBOLS_FROM_KERNEL		1
+#define DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE		2
+#define DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL	3
+	
 #endif /* __APPLE__ */
 
 /*
diff --git a/bsd/sys/errno.h b/bsd/sys/errno.h
index 54d5d0371..231c68ead 100644
--- a/bsd/sys/errno.h
+++ b/bsd/sys/errno.h
@@ -69,8 +69,9 @@
 #ifndef	_SYS_ERRNO_H_
 #define	_SYS_ERRNO_H_
 
-#if !defined(KERNEL) && !defined(KERNEL_PRIVATE)
 #include <sys/cdefs.h>
+
+#if !defined(KERNEL) && !defined(KERNEL_PRIVATE)
 __BEGIN_DECLS
 extern int * __error(void);
 #define errno (*__error())
@@ -96,7 +97,7 @@ __END_DECLS
 #define	ENOMEM		12		/* Cannot allocate memory */
 #define	EACCES		13		/* Permission denied */
 #define	EFAULT		14		/* Bad address */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	ENOTBLK		15		/* Block device required */
 #endif
 #define	EBUSY		16		/* Device / Resource busy */
@@ -134,9 +135,9 @@ __END_DECLS
 #define	EPROTOTYPE	41		/* Protocol wrong type for socket */
 #define	ENOPROTOOPT	42		/* Protocol not available */
 #define	EPROTONOSUPPORT	43		/* Protocol not supported */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	ESOCKTNOSUPPORT	44		/* Socket type not supported */
-#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
+#endif
 #define ENOTSUP		45		/* Operation not supported */
 #if !__DARWIN_UNIX03 && !defined(KERNEL)
 /*
@@ -150,9 +151,9 @@ __END_DECLS
 #define	EOPNOTSUPP	 ENOTSUP	/* Operation not supported on socket */
 #endif /* !__DARWIN_UNIX03 && !KERNEL */
 
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	EPFNOSUPPORT	46		/* Protocol family not supported */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 #define	EAFNOSUPPORT	47		/* Address family not supported by protocol family */
 #define	EADDRINUSE	48		/* Address already in use */
 #define	EADDRNOTAVAIL	49		/* Can't assign requested address */
@@ -166,10 +167,10 @@ __END_DECLS
 #define	ENOBUFS		55		/* No buffer space available */
 #define	EISCONN		56		/* Socket is already connected */
 #define	ENOTCONN	57		/* Socket is not connected */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	ESHUTDOWN	58		/* Can't send after socket shutdown */
 #define	ETOOMANYREFS	59		/* Too many references: can't splice */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 #define	ETIMEDOUT	60		/* Operation timed out */
 #define	ECONNREFUSED	61		/* Connection refused */
 
@@ -177,34 +178,34 @@ __END_DECLS
 #define	ENAMETOOLONG	63		/* File name too long */
 
 /* should be rearranged */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	EHOSTDOWN	64		/* Host is down */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 #define	EHOSTUNREACH	65		/* No route to host */
 #define	ENOTEMPTY	66		/* Directory not empty */
 
 /* quotas & mush */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	EPROCLIM	67		/* Too many processes */
 #define	EUSERS		68		/* Too many users */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 #define	EDQUOT		69		/* Disc quota exceeded */
 
 /* Network File System */
 #define	ESTALE		70		/* Stale NFS file handle */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	EREMOTE		71		/* Too many levels of remote in path */
 #define	EBADRPC		72		/* RPC struct is bad */
 #define	ERPCMISMATCH	73		/* RPC version wrong */
 #define	EPROGUNAVAIL	74		/* RPC prog. not avail */
 #define	EPROGMISMATCH	75		/* Program version wrong */
 #define	EPROCUNAVAIL	76		/* Bad procedure for program */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 
 #define	ENOLCK		77		/* No locks available */
 #define	ENOSYS		78		/* Function not implemented */
 
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define	EFTYPE		79		/* Inappropriate file type or format */
 #define	EAUTH		80		/* Authentication error */
 #define	ENEEDAUTH	81		/* Need authenticator */
@@ -212,26 +213,26 @@ __END_DECLS
 /* Intelligent device errors */
 #define	EPWROFF		82	/* Device power is off */
 #define	EDEVERR		83	/* Device error, e.g. paper out */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 
 #define	EOVERFLOW	84		/* Value too large to be stored in data type */
 
 /* Program loading errors */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define EBADEXEC	85	/* Bad executable */
 #define EBADARCH	86	/* Bad CPU type in executable */
 #define ESHLIBVERS	87	/* Shared library version mismatch */
 #define EBADMACHO	88	/* Malformed Macho file */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 
 #define	ECANCELED	89		/* Operation canceled */
 
 #define EIDRM		90		/* Identifier removed */
 #define ENOMSG		91		/* No message of desired type */   
 #define EILSEQ		92		/* Illegal byte sequence */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
 #define ENOATTR		93		/* Attribute not found */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#endif
 
 #define EBADMSG		94		/* Bad message */
 #define EMULTIHOP	95		/* Reserved */
@@ -249,9 +250,14 @@ __END_DECLS
 
 #define ENOPOLICY	103		/* No such policy registered */
 
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define	ELAST		103		/* Must be equal largest errno */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+#if __DARWIN_C_LEVEL >= 200809L
+#define ENOTRECOVERABLE 104		/* State not recoverable */
+#define EOWNERDEAD      105		/* Previous owner died */
+#endif
+
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
+#define	ELAST		105		/* Must be equal largest errno */
+#endif
 
 #ifdef KERNEL
 /* pseudo-errors returned inside kernel to modify return to process */
@@ -261,6 +267,10 @@ __END_DECLS
 #ifdef BSD_KERNEL_PRIVATE
 #define ERECYCLE    (-5)		/* restart lookup under heavy vnode pressure/recycling */
 #define EREDRIVEOPEN	(-6)
+#define EKEEPLOOKING	(-7)
+/* used for cvwait error returns to Libc */
+#define ECVCERORR	256
+#define ECVPERORR	512
 #else /* BSD_KERNEL_PRIVATE */
 /* -5 and -6 are reserved for kernel internal use */
 #endif /* BSD_KERNEL_PRIVATE */
diff --git a/bsd/sys/event.h b/bsd/sys/event.h
index abbd60045..05b31174a 100644
--- a/bsd/sys/event.h
+++ b/bsd/sys/event.h
@@ -70,9 +70,10 @@
 #define EVFILT_MACHPORT         (-8)	/* Mach portsets */
 #define EVFILT_FS		(-9)	/* Filesystem events */
 #define EVFILT_USER             (-10)   /* User events */
-#define	EVFILT_SESSION		(-11)	/* Audit session events */
+					/* (-11) unused */
+#define EVFILT_VM		(-12)	/* Virtual memory events */
 
-#define EVFILT_SYSCOUNT		11
+#define EVFILT_SYSCOUNT		12
 #define EVFILT_THREADMARKER	EVFILT_SYSCOUNT /* Internal use only */
 
 #pragma pack(4)
@@ -191,7 +192,6 @@ struct kevent64_s {
  * On input, NOTE_TRIGGER causes the event to be triggered for output.
  */
 #define NOTE_TRIGGER	0x01000000
-#define EV_TRIGGER      0x0100 /*deprecated--for backwards compatibility only*/
 
 /*
  * On input, the top two bits of fflags specifies how the lower twenty four 
@@ -233,16 +233,26 @@ struct kevent64_s {
  * that hangs off the proc structure. They also both play games with the hint
  * passed to KNOTE(). If NOTE_SIGNAL is passed as a hint, then the lower bits
  * of the hint contain the signal. IF NOTE_FORK is passed, then the lower bits
- * contain the PID of the child.
+ * contain the PID of the child. 
  */
 #define	NOTE_EXIT	0x80000000		/* process exited */
 #define	NOTE_FORK	0x40000000		/* process forked */
 #define	NOTE_EXEC	0x20000000		/* process exec'd */
 #define	NOTE_REAP	0x10000000		/* process reaped */
 #define	NOTE_SIGNAL	0x08000000		/* shared with EVFILT_SIGNAL */
+#define	NOTE_EXITSTATUS	0x04000000		/* exit status to be returned, valid for child process only */
+#define	NOTE_RESOURCEEND 0x02000000		/* resource limit reached, resource type returned */
 #define	NOTE_PDATAMASK	0x000fffff		/* mask for pid/signal */
 #define	NOTE_PCTRLMASK	(~NOTE_PDATAMASK)
 
+/*
+ * data/hint fflags for EVFILT_VM, shared with userspace.
+ */
+#define NOTE_VM_PRESSURE			0x80000000              /* will react on memory pressure */
+#define NOTE_VM_PRESSURE_TERMINATE		0x40000000              /* will quit on memory pressure, possibly after cleaning up dirty state */
+#define NOTE_VM_PRESSURE_SUDDEN_TERMINATE	0x20000000		/* will quit immediately on memory pressure */
+#define NOTE_VM_ERROR				0x10000000              /* there was an error */
+
 /*
  * data/hint fflags for EVFILT_TIMER, shared with userspace.
  * The default is a (repeating) interval timer with the data
@@ -258,7 +268,7 @@ struct kevent64_s {
 /*
  * data/hint fflags for EVFILT_MACHPORT, shared with userspace.
  *
- * Only portsets are support at this time.
+ * Only portsets are supported at this time.
  *
  * The fflags field can optionally contain the MACH_RCV_MSG, MACH_RCV_LARGE,
  * and related trailer receive options as defined in <mach/message.h>.
@@ -275,29 +285,6 @@ struct kevent64_s {
  * contains the name of the actual port detected with a message waiting.
  */
 
-/*
- * data/hint fflags for EVFILT_SESSION, shared with userspace.
- *
- * The kevent ident field should be set to AU_SESSION_ANY_ASID if interested
- * in events for any session.
- *
- * NOTE_AS_UPDATE may be going away since struct auditinfo_addr may become 
- * immutable once initially set.
- */
-#define	NOTE_AS_START	0x00000001		/* start of new session */
-#define	NOTE_AS_END	0x00000002		/* start of new session */
-#define	NOTE_AS_ERR	0x00000004		/* error tracking new session */
-#define	NOTE_AS_CLOSE	0x00000008		/* currently unsupported */
-#define	NOTE_AS_UPDATE	0x00000010		/* session data updated */
-
-/*
- * Kevent ident value for any session.
- */
-#define	AS_ANY_ASID	0xFFFFFFFF
-
-struct au_sentry;	/* Audit session entry */
-
-
 /*
  * DEPRECATED!!!!!!!!!
  * NOTE_TRACK, NOTE_TRACKERR, and NOTE_CHILD are no longer supported as of 10.5
@@ -338,7 +325,6 @@ struct knote {
 		struct		fileproc *p_fp;	/* file data pointer */
 		struct		proc *p_proc;	/* proc pointer */
 		struct          ipc_pset *p_pset;       /* pset pointer */
-		struct		au_sentry *p_se; 	/* Audit session ptr */
 	} kn_ptr;
 	struct			filterops *kn_fop;
 	int			kn_status;	/* status bits */
@@ -378,7 +364,7 @@ struct filterops {
 	/* Optional f_touch operation, called only if !f_isfd && non-NULL */
 	void    (*f_touch)(struct knote *kn, struct kevent64_s *kev, long type);
 	/* Optional f_peek operation, called only if KN_STAYQUEUED is set */
-	int	(*f_peek)(struct knote *kn);
+	unsigned (*f_peek)(struct knote *kn);
 };
 
 struct proc;
@@ -399,6 +385,7 @@ extern int	knote_detach(struct klist *list, struct knote *kn);
 extern int	knote_link_wait_queue(struct knote *kn, struct wait_queue *wq);	
 extern void	knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq);
 extern void	knote_fdclose(struct proc *p, int fd);
+extern void	knote_markstayqueued(struct knote *kn);
 
 #endif /* !KERNEL_PRIVATE */
 
diff --git a/bsd/sys/fasttrap_impl.h b/bsd/sys/fasttrap_impl.h
index 259841c70..a4017cc41 100644
--- a/bsd/sys/fasttrap_impl.h
+++ b/bsd/sys/fasttrap_impl.h
@@ -201,10 +201,7 @@ extern int fasttrap_tracepoint_init(proc_t *, fasttrap_tracepoint_t *,
 extern int fasttrap_tracepoint_install(proc_t *, fasttrap_tracepoint_t *);
 extern int fasttrap_tracepoint_remove(proc_t *, fasttrap_tracepoint_t *);
 
-#if defined (__ppc__) || defined (__ppc64__)
-extern int fasttrap_pid_probe(ppc_saved_state_t *regs);
-extern int fasttrap_return_probe(ppc_saved_state_t* regs);
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 extern int fasttrap_pid_probe(x86_saved_state_t *regs);
 extern int fasttrap_return_probe(x86_saved_state_t* regs);
 #else
diff --git a/bsd/sys/fbt.h b/bsd/sys/fbt.h
index c72208c99..3796443ea 100644
--- a/bsd/sys/fbt.h
+++ b/bsd/sys/fbt.h
@@ -29,9 +29,7 @@
 #ifndef _FBT_H
 #define _FBT_H
 
-#if defined (__ppc__) || defined (__ppc64__)
-typedef uint32_t machine_inst_t;
-#elif defined(__i386__) || defined (__x86_64__)
+#if defined(__i386__) || defined (__x86_64__)
 typedef uint8_t machine_inst_t;
 #else
 #error Unknown Architecture
@@ -45,18 +43,25 @@ typedef struct fbt_probe {
 	int8_t			fbtp_rval;
 	machine_inst_t	fbtp_patchval;
 	machine_inst_t	fbtp_savedval;
+        machine_inst_t  fbtp_currentval;
 	uintptr_t		fbtp_roffset;
 	dtrace_id_t		fbtp_id;
+	/* FIXME!
+	 * This field appears to only be used in error messages.
+	 * It puts this structure into the next size bucket in kmem_alloc
+	 * wasting 32 bytes per probe. (in i386 only)
+	 */
 	char			fbtp_name[MAX_FBTP_NAME_CHARS];
 	struct modctl	*fbtp_ctl;
 	int		fbtp_loadcnt;
+#if !defined(__APPLE__)
 	int		fbtp_symndx;
-	int		fbtp_primary;
+#endif
 	struct fbt_probe *fbtp_next;
 } fbt_probe_t;
 
 extern int dtrace_invop(uintptr_t, uintptr_t *, uintptr_t);
 extern int fbt_invop(uintptr_t, uintptr_t *, uintptr_t);
 extern void fbt_provide_module(void *, struct modctl *);
-
+extern int fbt_enable (void *arg, dtrace_id_t id, void *parg);
 #endif /* _FBT_H */
diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h
index e9302b184..f6cbe9d5a 100644
--- a/bsd/sys/fcntl.h
+++ b/bsd/sys/fcntl.h
@@ -77,6 +77,9 @@
  */
 #include <sys/_types.h>
 #include <sys/cdefs.h>
+#ifndef KERNEL
+#include <Availability.h>
+#endif
 
 /* We should not be exporting size_t here.  Temporary for gcc bootstrapping. */
 #ifndef _SIZE_T
@@ -168,6 +171,14 @@ typedef __darwin_pid_t	pid_t;
 #define		O_DSYNC	0x400000	/* synch I/O data integrity */
 #endif
 
+#ifdef KERNEL
+#define FNODIRECT	0x800000	/* fcntl(F_NODIRECT, 1) */
+#endif
+
+#if __DARWIN_C_LEVEL >= 200809L
+#define	O_CLOEXEC	0x1000000	/* implicitly set FD_CLOEXEC */
+#endif
+
 #ifdef KERNEL
 /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
 #define	FFLAGS(oflags)	((oflags) + 1)
@@ -220,6 +231,7 @@ typedef __darwin_pid_t	pid_t;
 #define	F_SETLK		8		/* set record locking information */
 #define	F_SETLKW	9		/* F_SETLK; wait if blocked */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#define F_FLUSH_DATA    40
 #define F_CHKCLEAN      41              /* Used for regression test */
 #define F_PREALLOCATE   42		/* Preallocate storage */
 #define F_SETSIZE       43		/* Truncate a file without zeroing space */	
@@ -248,14 +260,35 @@ typedef __darwin_pid_t	pid_t;
 
 #define F_ADDFILESIGS	61		/* add signature from same file (used by dyld for shared libs) */
 
-#define F_GETPROTECTIONCLASS	62		/* Get the protection class of a file from the EA, returns int */
-#define F_SETPROTECTIONCLASS	63		/* Set the protection class of a file for the EA, requires int */
+#define F_NODIRECT	62		/* used in conjunction with F_NOCACHE to indicate that DIRECT, synchonous writes */
+                                        /* should not be used (i.e. its ok to temporaily create cached pages) */
+
+#define F_GETPROTECTIONCLASS	63		/* Get the protection class of a file from the EA, returns int */
+#define F_SETPROTECTIONCLASS	64		/* Set the protection class of a file for the EA, requires int */
+
+#define F_LOG2PHYS_EXT  65		/* file offset to device offset, extended */
+
+#define	F_GETLKPID		66		/* get record locking information, per-process */
+
+#ifdef PRIVATE
+#define F_MOVEDATAEXTENTS	69	/* Swap only the data associated with two files */
+#endif
+
+#define F_SETBACKINGSTORE	70	/* Mark the file as being the backing store for another filesystem */
+#define F_GETPATH_MTMINFO	71 	/* return the full path of the FD, but error in specific mtmd circumstances */
+
+#define F_SETNOSIGPIPE		73	/* No SIGPIPE generated on EPIPE */
+#define F_GETNOSIGPIPE		74	/* Status of SIGPIPE for this fd */
 
 // FS-specific fcntl()'s numbers begin at 0x00010000 and go up
 #define FCNTL_FS_SPECIFIC_BASE  0x00010000
 
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
+#if __DARWIN_C_LEVEL >= 200809L
+#define	F_DUPFD_CLOEXEC		67	/* mark the dup with FD_CLOEXEC */
+#endif
+
 /* file descriptor flags (F_GETFD, F_SETFD) */
 #define	FD_CLOEXEC	1		/* close-on-exec flag */
 
@@ -296,7 +329,7 @@ typedef __darwin_pid_t	pid_t;
 #define	S_IFLNK		0120000		/* [XSI] symbolic link */
 #define	S_IFSOCK	0140000		/* [XSI] socket */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define	S_IFWHT		0160000		/* whiteout */
+#define	S_IFWHT		0160000		/* OBSOLETE: whiteout */
 #endif
 
 /* File mode */
@@ -464,13 +497,22 @@ typedef struct user_fbootstraptransfer {
  * For them the fcntl will nedd to switch from using BMAP to CMAP
  * and a per filesystem type flag will be needed to interpret the
  * contiguous bytes count result from CMAP.
+ *
+ * F_LOG2PHYS_EXT is a variant of F_LOG2PHYS that uses a passed in
+ * file offset and length instead of the current file offset.
+ * F_LOG2PHYS_EXT operates on the same structure as F_LOG2PHYS, but
+ * treats it as an in/out.
  */
 #pragma pack(4)
 
 struct log2phys {
-	unsigned int	l2p_flags;		/* unused so far */
-	off_t		l2p_contigbytes;	/* unused so far */
-	off_t		l2p_devoffset;	/* bytes into device */
+	unsigned int	l2p_flags;	 /* unused so far */
+	off_t		l2p_contigbytes; /* F_LOG2PHYS:     unused so far */
+					 /* F_LOG2PHYS_EXT: IN:  number of bytes to be queried */
+					 /*                 OUT: number of contiguous bytes at this position */
+	off_t		l2p_devoffset;   /* F_LOG2PHYS:     OUT: bytes into device */
+					 /* F_LOG2PHYS_EXT: IN:  bytes into file */
+					 /*                 OUT: bytes into device */
 };
 
 #pragma pack()
@@ -544,6 +586,13 @@ int	fcntl(int, int, ...) __DARWIN_ALIAS_C(fcntl);
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 
 #ifdef PRIVATE
+/*
+ * These definitions are retained temporarily for compatibility.
+ * If you want to use fileports, please use
+ *	#include <sys/fileport.h>
+ * or
+ *	#include <System/sys/fileport.h>
+ */
 #ifndef _FILEPORT_T
 #define _FILEPORT_T
 typedef __darwin_mach_port_t fileport_t;
@@ -561,7 +610,7 @@ void	filesec_free(filesec_t);
 int	filesec_get_property(filesec_t, filesec_property_t, void *);
 int	filesec_query_property(filesec_t, filesec_property_t, int *);
 int	filesec_set_property(filesec_t, filesec_property_t, const void *);
-int	filesec_unset_property(filesec_t, filesec_property_t);
+int	filesec_unset_property(filesec_t, filesec_property_t) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2);
 #define _FILESEC_UNSET_PROPERTY	((void *)0)
 #define _FILESEC_REMOVE_ACL	((void *)1)
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
diff --git a/bsd/sys/file.h b/bsd/sys/file.h
index b236f0840..bd3629144 100644
--- a/bsd/sys/file.h
+++ b/bsd/sys/file.h
@@ -81,6 +81,8 @@
 #define	_KAUTH_CRED_T
 struct ucred;
 typedef struct ucred *kauth_cred_t;
+struct posix_cred;
+typedef struct posix_cred *posix_cred_t;
 #endif	/* !_KAUTH_CRED_T */
 
 #pragma pack(4)
diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h
index 7592c745d..9fcb4d1f3 100644
--- a/bsd/sys/file_internal.h
+++ b/bsd/sys/file_internal.h
@@ -105,14 +105,17 @@ struct fileproc {
 #define FP_WAITCLOSE	0x0040
 #define FP_AIOISSUED	0x0080
 #define FP_WAITEVENT	0x0100
+#define FP_SELCONFLICT	0x0200	/* select conflict on an individual fp */
 
-#define FP_VALID_FLAGS (FP_INCREATE | FP_INCLOSE | FP_INSELECT | FP_INCHRREAD | FP_WRITTEN | FP_WRITTEN | FP_CLOSING | FP_WAITCLOSE | FP_AIOISSUED | FP_WAITEVENT)
+#define FP_VALID_FLAGS (FP_INCREATE | FP_INCLOSE | FP_INSELECT | FP_INCHRREAD | FP_WRITTEN | FP_CLOSING | FP_WAITCLOSE | FP_AIOISSUED | FP_WAITEVENT | FP_SELCONFLICT)
 
 
 #ifndef _KAUTH_CRED_T
 #define	_KAUTH_CRED_T
 struct ucred;
 typedef struct ucred *kauth_cred_t;
+struct posix_cred;
+typedef struct posix_cred *posix_cred_t;
 #endif	/* !_KAUTH_CRED_T */
 
 /* file types */
@@ -133,6 +136,7 @@ typedef enum {
 #define FG_RMMSGQ	0x08 	/* the fileglob is being removed from msgqueue */
 #define FG_WRMMSGQ	0x10 	/* wait for the fileglob to  be removed from msgqueue */
 #define FG_PORTMADE	0x20	/* a port was at some point created for this fileglob */
+#define FG_NOSIGPIPE	0x40	/* don't deliver SIGPIPE with EPIPE return */
 
 struct fileglob {
 	LIST_ENTRY(fileglob) f_list;/* list of active files */
@@ -159,11 +163,9 @@ struct fileglob {
 		int	(*fo_drain)	(struct fileproc *fp, vfs_context_t ctx);
 	} *fg_ops;
 	off_t	fg_offset;
-	caddr_t	fg_data;		/* vnode or socket or SHM or semaphore */
+	void 	*fg_data;		/* vnode or socket or SHM or semaphore */
 	lck_mtx_t fg_lock;
 	int32_t fg_lflags;		/* file global flags */
-	unsigned int fg_lockpc[4];
-	unsigned int fg_unlockpc[4];
 #if CONFIG_MACF
 	struct label *fg_label;  /* JMM - use the one in the cred? */
 #endif
diff --git a/bsd/sys/filedesc.h b/bsd/sys/filedesc.h
index 7ea50f5a9..740e2d6f4 100644
--- a/bsd/sys/filedesc.h
+++ b/bsd/sys/filedesc.h
@@ -121,7 +121,9 @@ struct filedesc {
 
 #ifdef KERNEL
 #define UF_RESVWAIT	0x10		/* close in progress */
-#define UF_VALID_FLAGS	(UF_EXCLOSE| UF_RESERVED | UF_CLOSING | UF_RESVWAIT)
+#define	UF_INHERIT	0x20		/* "inherit-on-exec" */
+#define UF_VALID_FLAGS	\
+	(UF_EXCLOSE | UF_RESERVED | UF_CLOSING | UF_RESVWAIT | UF_INHERIT)
 #endif /* KERNEL */
 
 /*
@@ -148,7 +150,7 @@ extern void	ffree(struct file *fp);
 #ifdef __APPLE_API_PRIVATE
 extern struct	filedesc *fdcopy(proc_t p, struct vnode *uth_cdir);
 extern void	fdfree(proc_t p);
-extern void	fdexec(proc_t p);
+extern void	fdexec(proc_t p, short flags);
 #endif /* __APPLE_API_PRIVATE */
 
 #endif /* KERNEL */
diff --git a/osfmk/ppc/machine_cpu.h b/bsd/sys/fileport.h
similarity index 68%
rename from osfmk/ppc/machine_cpu.h
rename to bsd/sys/fileport.h
index 88fe14def..779179baf 100644
--- a/osfmk/ppc/machine_cpu.h
+++ b/bsd/sys/fileport.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,27 +25,32 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#ifndef _PPC_MACHINE_CPU_H_
-#define _PPC_MACHINE_CPU_H_
 
-#include <mach/mach_types.h>
-#include <mach/boolean.h>
-#include <kern/kern_types.h>
-#include <pexpert/pexpert.h>
+#ifndef _SYS_FILEPORT_H_
+#define _SYS_FILEPORT_H_
 
-extern void	cpu_machine_init(
-	void);
+#include <sys/_types.h>
+#include <sys/cdefs.h>
 
-extern void	cpu_doshutdown(
-        void);
+#ifndef KERNEL
 
-extern void	cpu_signal_handler(
-	void);
+__BEGIN_DECLS
 
-typedef void (*broadcastFunc) (uint32_t);
+#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 
-int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t);
+#ifndef _FILEPORT_T
+#define _FILEPORT_T
+typedef __darwin_mach_port_t fileport_t;
+#define FILEPORT_NULL	((fileport_t)0)
+#endif /* _FILEPORT_T */
 
-#define cpu_pause()		/* Not for this architecture */
+int	fileport_makeport(int, fileport_t *);
+int	fileport_makefd(fileport_t);
 
-#endif /* _PPC_MACHINE_CPU_H_ */
+#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
+
+__END_DECLS
+
+#endif /* !KERNEL */
+
+#endif	/* !_SYS_FILEPORT_H_ */
diff --git a/bsd/sys/fsctl.h b/bsd/sys/fsctl.h
index b70ba0651..40c6b10b1 100644
--- a/bsd/sys/fsctl.h
+++ b/bsd/sys/fsctl.h
@@ -70,26 +70,161 @@
 #define	_SYS_FSCTL_H_
 
 #include <sys/ioccom.h>
+#include <sys/mount.h>
 
-#define FSIOC_SYNC_VOLUME	_IOW('A', 1, uint32_t)
-#define	FSCTL_SYNC_VOLUME	IOCBASECMD(FSIOC_SYNC_VOLUME)
+#ifdef XNU_KERNEL_PRIVATE
 
-#define	FSCTL_SYNC_FULLSYNC	(1<<0)	/* Flush the data fully to disk, if supported by the filesystem */
-#define	FSCTL_SYNC_WAIT		(1<<1)	/* Wait for the sync to complete */
+typedef struct user64_namespace_handler_info {
+	user64_addr_t  token;
+	user64_addr_t  flags;
+	user64_addr_t  fdptr;
+} user64_namespace_handler_info;
+
+typedef struct user32_namespace_handler_info {
+	user32_addr_t  token;
+	user32_addr_t  flags;
+	user32_addr_t  fdptr;
+} user32_namespace_handler_info;
+
+typedef struct namespace_handler_info {
+	user_addr_t  token;
+	user_addr_t  flags;
+	user_addr_t  fdptr;
+} namespace_handler_info;
+
+typedef struct user64_namespace_handler_info_ext {
+	user64_addr_t  token;
+	user64_addr_t  flags;
+	user64_addr_t  fdptr;
+	user64_addr_t  infoptr;
+} user64_namespace_handler_info_ext;
+
+typedef struct user32_namespace_handler_info_ext {
+	user32_addr_t  token;
+	user32_addr_t  flags;
+	user32_addr_t  fdptr;
+	user32_addr_t  infoptr;
+} user32_namespace_handler_info_ext;
+
+typedef struct namespace_handler_info_ext {
+	user_addr_t  token;
+	user_addr_t  flags;
+	user_addr_t  fdptr;
+	user_addr_t  infoptr;
+} namespace_handler_info_ext;
+
+extern int resolve_nspace_item(struct vnode *vp, uint64_t op);
+extern int resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg);
+extern int get_nspace_item_status(struct vnode *vp, int32_t *status);
+
+#else
+
+typedef struct namespace_handler_info {
+	int32_t    *token;
+	int64_t    *flags;
+	int32_t    *fdptr;
+} namespace_handler_info;
+
+typedef struct namespace_handler_info_ext {
+	int32_t    *token;
+	int64_t    *flags;
+	int32_t    *fdptr;
+	int64_t    *infoptr;     // for snapshot write events, the kernel puts an offset/length pair here
+} namespace_handler_info_ext;
+
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#define NAMESPACE_HANDLER_READ_OP             0x0001
+#define NAMESPACE_HANDLER_WRITE_OP            0x0002
+#define NAMESPACE_HANDLER_DELETE_OP           0x0004
+#define NAMESPACE_HANDLER_TRUNCATE_OP         0x0008
+#define NAMESPACE_HANDLER_RENAME_OP           0x0010
+#define NAMESPACE_HANDLER_METADATA_WRITE_OP   0x0020
+#define NAMESPACE_HANDLER_METADATA_DELETE_OP  0x0040
+#define NAMESPACE_HANDLER_METADATA_MOD        0x0080
+#define NAMESPACE_HANDLER_LINK_CREATE         0x0200
+
+#define NAMESPACE_HANDLER_NSPACE_EVENT        0x1000
+#define NAMESPACE_HANDLER_SNAPSHOT_EVENT      0x0100
+#define NAMESPACE_HANDLER_TRACK_EVENT         0x2000
+
+#define NAMESPACE_HANDLER_EVENT_TYPE_MASK (NAMESPACE_HANDLER_NSPACE_EVENT | NAMESPACE_HANDLER_SNAPSHOT_EVENT | NAMESPACE_HANDLER_TRACK_EVENT)
+
+#define DATALESS_CMPFS_TYPE     0x80000001
 
 
+typedef int32_t nspace_handler_info[2];
+typedef char fstypename_t[MFSTYPENAMELEN];
+
+#ifdef KERNEL
+
+typedef struct user64_package_ext_info {
+    user64_addr_t strings;
+    uint32_t      num_entries;
+    uint32_t      max_width;
+} user64_package_ext_info;
+
+typedef struct user32_package_ext_info {
+    user32_addr_t strings;
+    uint32_t      num_entries;
+    uint32_t      max_width;
+} user32_package_ext_info;
+
+#endif  // KERNEL
+
 typedef struct package_ext_info {
     const char *strings;
     uint32_t    num_entries;
     uint32_t    max_width;
 } package_ext_info;
 
-#define FSIOC_SET_PACKAGE_EXTS	_IOW('A', 2, struct package_ext_info)
-#define	FSCTL_SET_PACKAGE_EXTS	IOCBASECMD(FSIOC_SET_PACKAGE_EXTS)
+#define	FSCTL_SYNC_FULLSYNC	(1<<0)	/* Flush the data fully to disk, if supported by the filesystem */
+#define	FSCTL_SYNC_WAIT		(1<<1)	/* Wait for the sync to complete */
+
+
+#define FSIOC_SYNC_VOLUME			  _IOW('A', 1, uint32_t)
+#define	FSCTL_SYNC_VOLUME			  IOCBASECMD(FSIOC_SYNC_VOLUME)
+
+#define FSIOC_SET_PACKAGE_EXTS			  _IOW('A', 2, struct package_ext_info)
+#define	FSCTL_SET_PACKAGE_EXTS			  IOCBASECMD(FSIOC_SET_PACKAGE_EXTS)
+
+#define FSIOC_WAIT_FOR_SYNC			  _IOR('A', 3, int32_t)
+#define	FSCTL_WAIT_FOR_SYNC			  IOCBASECMD(FSIOC_WAIT_FOR_SYNC)
+
+#define FSIOC_NAMESPACE_HANDLER_GET		  _IOW('A', 4, struct namespace_handler_info)
+#define	FSCTL_NAMESPACE_HANDLER_GET		  IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET)
+
+#define FSIOC_NAMESPACE_HANDLER_UPDATE		  _IOW('A', 5, nspace_handler_info)
+#define	FSCTL_NAMESPACE_HANDLER_UPDATE		  IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE)
+
+#define FSIOC_NAMESPACE_HANDLER_UNBLOCK		  _IOW('A', 6, nspace_handler_info)
+#define	FSCTL_NAMESPACE_HANDLER_UNBLOCK		  IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK)
 
-#define FSIOC_WAIT_FOR_SYNC	_IOR('A', 3, int32_t)
-#define	FSCTL_WAIT_FOR_SYNC	IOCBASECMD(FSIOC_WAIT_FOR_SYNC)
+#define FSIOC_NAMESPACE_HANDLER_CANCEL		  _IOW('A', 7, nspace_handler_info)
+#define	FSCTL_NAMESPACE_HANDLER_CANCEL		  IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL)
 
+#define FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME _IOW('A', 8, int32_t)
+#define	FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME)
+
+#define FSIOC_OLD_SNAPSHOT_HANDLER_GET		  _IOW('A', 9, struct namespace_handler_info)
+#define FSCTL_OLD_SNAPSHOT_HANDLER_GET		  IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET)
+
+#define FSIOC_SET_FSTYPENAME_OVERRIDE		  _IOW('A', 10, fstypename_t)
+#define	FSCTL_SET_FSTYPENAME_OVERRIDE	          IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE)
+
+#define FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS _IOW('A', 11, int32_t)
+#define	FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS)
+
+#define FSIOC_TRACKED_HANDLER_GET		  _IOW('A', 12, struct namespace_handler_info)
+#define FSCTL_TRACKED_HANDLER_GET		  IOCBASECMD(FSIOC_TRACKED_HANDLER_GET)
+
+#define FSIOC_SNAPSHOT_HANDLER_GET_EXT		  _IOW('A', 13, struct namespace_handler_info_ext)
+#define FSCTL_SNAPSHOT_HANDLER_GET_EXT		  IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT)
+
+//
+// IO commands 14, 15, 16, and 17 are currently unused
+//
 
 //
 // Spotlight and fseventsd use these fsctl()'s to find out 
@@ -104,27 +239,10 @@ typedef struct package_ext_info {
 //       or else it will break binary compatibility with mds
 //       and fseventsd.
 //
-#define SPOTLIGHT_IOC_GET_MOUNT_TIME _IOR('h', 18, u_int32_t)
-#define SPOTLIGHT_FSCTL_GET_MOUNT_TIME IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME)
-#define SPOTLIGHT_IOC_GET_LAST_MTIME _IOR('h', 19, u_int32_t)
-#define SPOTLIGHT_FSCTL_GET_LAST_MTIME IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME)
-
-
-#ifdef KERNEL
-
-typedef struct user64_package_ext_info {
-    user64_addr_t strings;
-    uint32_t      num_entries;
-    uint32_t      max_width;
-} user64_package_ext_info;
-
-typedef struct user32_package_ext_info {
-    user32_addr_t strings;
-    uint32_t      num_entries;
-    uint32_t      max_width;
-} user32_package_ext_info;
-
-#endif  // KERNEL
+#define SPOTLIGHT_IOC_GET_MOUNT_TIME		  _IOR('h', 18, u_int32_t)
+#define SPOTLIGHT_FSCTL_GET_MOUNT_TIME		  IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME)
+#define SPOTLIGHT_IOC_GET_LAST_MTIME		  _IOR('h', 19, u_int32_t)
+#define SPOTLIGHT_FSCTL_GET_LAST_MTIME		  IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME)
 
 
 #ifndef KERNEL
diff --git a/bsd/sys/fsevents.h b/bsd/sys/fsevents.h
index e5cb3ec3c..82c16ac48 100644
--- a/bsd/sys/fsevents.h
+++ b/bsd/sys/fsevents.h
@@ -161,6 +161,7 @@ typedef struct fse_info {
 } fse_info;
 
 int   get_fse_info(struct vnode *vp, fse_info *fse, vfs_context_t ctx);
+int   vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap);
 
 char *get_pathbuff(void);
 void  release_pathbuff(char *path);
diff --git a/bsd/sys/fslog.h b/bsd/sys/fslog.h
index c1bee8c64..1266f3075 100644
--- a/bsd/sys/fslog.h
+++ b/bsd/sys/fslog.h
@@ -87,6 +87,14 @@ void fslog_fs_corrupt(struct mount *mnt);
 void fslog_io_error(const buf_t bp);
 
 #endif /* BSD_KERNEL_PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* Log information about external modification of a target process */
+void fslog_extmod_msgtracer(proc_t caller, proc_t target);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 #endif /* KERNEL */
 
 /* Keys used by FSLog */
diff --git a/bsd/sys/imageboot.h b/bsd/sys/imageboot.h
index 9ab02b5ab..a77c9cca8 100644
--- a/bsd/sys/imageboot.h
+++ b/bsd/sys/imageboot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -28,7 +28,12 @@
 #ifndef _IMAGEBOOT_H_
 #define _IMAGEBOOT_H_
 
-int imageboot_needed(void);
-int imageboot_setup(void);
+int 	imageboot_needed(void);
+void 	imageboot_setup(void);
+int	imageboot_format_is_valid(const char *root_path);
+int	imageboot_mount_image(const char *root_path, int height);
+
+#define IMAGEBOOT_CONTAINER_ARG		"container-dmg"
+#define IMAGEBOOT_ROOT_ARG		"root-dmg"
 
 #endif
diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h
index fa9be0460..0a194b779 100644
--- a/bsd/sys/imgact.h
+++ b/bsd/sys/imgact.h
@@ -85,15 +85,23 @@ struct image_params {
 	char		*ip_vdata;		/* file data (up to one page) */
 	int		ip_flags;		/* image flags */
 	int		ip_argc;		/* argument count */
-	char		*ip_argv;		/* argument vector beginning */
 	int		ip_envc;		/* environment count */
+	int		ip_applec;		/* apple vector count */
+
+	char		*ip_startargv;		/* argument vector beginning */
+	char		*ip_endargv;	/* end of argv/start of envv */
+	char		*ip_endenvv;	/* end of envv/start of applev */
+
 	char		*ip_strings;		/* base address for strings */
 	char		*ip_strendp;		/* current end pointer */
-	char		*ip_strendargvp;	/* end of argv/start of envp */
-	int		ip_strspace;		/* remaining space */
+
+	int 		ip_argspace;	/* remaining space of NCARGS limit (argv+envv) */
+	int		ip_strspace;		/* remaining total string space */
+
 	user_size_t 	ip_arch_offset;		/* subfile offset in ip_vp */
 	user_size_t 	ip_arch_size;		/* subfile length in ip_vp */
-	char		ip_interp_name[IMG_SHSIZE];	/* interpreter name */
+	char		ip_interp_buffer[IMG_SHSIZE];	/* interpreter buffer space */
+	int		ip_interp_sugid_fd;		/* fd for sugid script */
 
 	/* Next two fields are for support of architecture translation... */
 	char		*ip_p_comm;		/* optional alt p->p_comm */
@@ -112,14 +120,16 @@ struct image_params {
 /*
  * Image flags
  */
-#define	IMGPF_NONE	0x00000000		/* No flags */
-#define	IMGPF_INTERPRET	0x00000001		/* Interpreter invoked */
-#define	IMGPF_POWERPC	0x00000002		/* ppc mode for x86 */
+#define	IMGPF_NONE		0x00000000	/* No flags */
+#define	IMGPF_INTERPRET		0x00000001	/* Interpreter invoked */
+#define	IMGPF_POWERPC		0x00000002	/* ppc mode for x86 */
 #if CONFIG_EMBEDDED
 #undef IMGPF_POWERPC
 #endif
-#define	IMGPF_WAS_64BIT	0x00000004		/* exec from a 64Bit binary */
-#define	IMGPF_IS_64BIT	0x00000008		/* exec to a 64Bit binary */
-#define	IMGPF_SPAWN	0x00000010		/* spawn (without setexec) */
+#define	IMGPF_WAS_64BIT		0x00000004	/* exec from a 64Bit binary */
+#define	IMGPF_IS_64BIT		0x00000008	/* exec to a 64Bit binary */
+#define	IMGPF_SPAWN		0x00000010	/* spawn (without setexec) */
+#define	IMGPF_DISABLE_ASLR	0x00000020	/* disable ASLR */
+#define	IMGPF_ALLOW_DATA_EXEC	0x00000040	/* forcibly disallow data execution */
 
 #endif	/* !_SYS_IMGACT */
diff --git a/bsd/ppc/ptrace.h b/bsd/sys/imgsrc.h
similarity index 74%
rename from bsd/ppc/ptrace.h
rename to bsd/sys/imgsrc.h
index be9af6886..aac577176 100644
--- a/bsd/ppc/ptrace.h
+++ b/bsd/sys/imgsrc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,8 +25,9 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 /*
- * Copyright (c) 1992, 1993
+ * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,11 +58,42 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)ptrace.h	8.1 (Berkeley) 6/11/93
  */
 
-/*
- * Machine dependent trace commands.
- *
- * None for the PowerPC at this time.
+#ifndef _SYS_IMGSRC_H_
+#define _SYS_IMGSRC_H_
+
+#include <stdint.h>
+/* 
+ * For mount(2), defined here for easy use with System.framework/PrivateHeaders.
  */
+#define MNT_IMGSRC_BY_INDEX	0x20000000	
+
+typedef struct imgsrc_info 
+{
+	uint32_t	ii_height;	/* Nesting height: 0 is outermost */
+	uint32_t 	ii_flags;	/* Currently unused */
+	dev_t		ii_dev;		/* dev_t for this volume */
+	char		ii_reserved[24];/* TBD */
+} *imgsrc_info_t;
+
+struct mnt_imgsrc_args {
+	uint32_t 	mi_height;	/* As determined from an imgsrc_info structure */
+	uint32_t	mi_flags;	/* TBD */
+	const char*	mi_devpath;	/* Path to devnode */
+};
+
+#ifdef BSD_KERNEL_PRIVATE
+struct user64_mnt_imgsrc_args {
+	uint32_t 	mi_height;
+	uint32_t	mi_flags;
+	user64_addr_t 	mi_devpath;
+}; 
+
+struct user32_mnt_imgsrc_args {
+	uint32_t 	mi_height;
+	uint32_t	mi_flags;
+	user32_addr_t	mi_devpath;
+}; 
+#endif /* XNU_KERNEL_PRIVATE */
+#endif /* _SYS_IMGSRC_H_ */
diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h
index 33078a1f4..94f0b1e1e 100644
--- a/bsd/sys/kauth.h
+++ b/bsd/sys/kauth.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -106,11 +106,14 @@ struct kauth_identity_extlookup {
 #define KAUTH_EXTLOOKUP_WANT_MEMBERSHIP	(1<<12)
 #define KAUTH_EXTLOOKUP_VALID_MEMBERSHIP (1<<13)
 #define KAUTH_EXTLOOKUP_ISMEMBER	(1<<14)
+#define KAUTH_EXTLOOKUP_VALID_PWNAM	(1<<15)
+#define	KAUTH_EXTLOOKUP_WANT_PWNAM	(1<<16)
+#define KAUTH_EXTLOOKUP_VALID_GRNAM	(1<<17)
+#define	KAUTH_EXTLOOKUP_WANT_GRNAM	(1<<18)
 
 	__darwin_pid_t	el_info_pid;		/* request on behalf of PID */
+	u_int64_t	el_extend;		/* extension field */
 	u_int32_t	el_info_reserved_1;	/* reserved (APPLE) */
-	u_int32_t	el_info_reserved_2;	/* reserved (APPLE) */
-	u_int32_t	el_info_reserved_3;	/* reserved (APPLE) */
 
 	uid_t		el_uid;		/* user ID */
 	guid_t		el_uguid;	/* user GUID */
@@ -177,7 +180,6 @@ struct kauth_cred {
 	int	kc_nwhtgroups;		/* whiteout group list */
 	gid_t	*kc_whtgroups;
 	
-	struct auditinfo  cr_au;
 	struct au_session cr_audit;	/* user auditing data */
 
 	int	kc_nsupplement;		/* entry count in supplemental data pointer array */
@@ -192,6 +194,16 @@ struct kauth_cred {
 
 /* Kernel SPI for now */
 __BEGIN_DECLS
+/*
+ * Routines specific to credentials with POSIX credential labels attached
+ *
+ * XXX	Should be in policy_posix.h, with struct posix_cred
+ */
+extern kauth_cred_t posix_cred_create(posix_cred_t pcred);
+extern posix_cred_t posix_cred_get(kauth_cred_t cred);
+extern void posix_cred_label(kauth_cred_t cred, posix_cred_t pcred);
+extern int posix_cred_access(kauth_cred_t cred, id_t object_uid, id_t object_gid, mode_t object_mode, mode_t mode_req);
+
 extern uid_t	kauth_getuid(void);
 extern uid_t	kauth_getruid(void);
 extern gid_t	kauth_getgid(void);
@@ -221,7 +233,15 @@ extern int kauth_proc_label_update(struct proc *p, void *label);
 
 extern kauth_cred_t kauth_cred_find(kauth_cred_t cred);
 extern uid_t	kauth_cred_getuid(kauth_cred_t _cred);
+extern uid_t	kauth_cred_getruid(kauth_cred_t _cred);
+extern uid_t	kauth_cred_getsvuid(kauth_cred_t _cred);
 extern gid_t	kauth_cred_getgid(kauth_cred_t _cred);
+extern gid_t	kauth_cred_getrgid(kauth_cred_t _cred);
+extern gid_t	kauth_cred_getsvgid(kauth_cred_t _cred);
+extern int	kauth_cred_pwnam2guid(char *pwnam, guid_t *guidp);
+extern int	kauth_cred_grnam2guid(char *grnam, guid_t *guidp);
+extern int	kauth_cred_guid2pwnam(guid_t *guidp, char *pwnam);
+extern int	kauth_cred_guid2grnam(guid_t *guidp, char *grnam);
 extern int      kauth_cred_guid2uid(guid_t *_guid, uid_t *_uidp);
 extern int      kauth_cred_guid2gid(guid_t *_guid, gid_t *_gidp);
 extern int      kauth_cred_ntsid2uid(ntsid_t *_sid, uid_t *_uidp);
@@ -273,7 +293,7 @@ extern void	kauth_cred_uthread_update(struct uthread *, proc_t);
 #ifdef CONFIG_MACF
 extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct label *scriptlabel, struct label *execlabel);
 #endif
-extern int	kauth_cred_getgroups(gid_t *_groups, int *_groupcount);
+extern int	kauth_cred_getgroups(kauth_cred_t _cred, gid_t *_groups, int *_groupcount);
 extern int	kauth_cred_assume(uid_t _uid);
 extern int	kauth_cred_gid_subset(kauth_cred_t _cred1, kauth_cred_t _cred2, int *_resultp);
 struct auditinfo_addr;
@@ -468,6 +488,7 @@ struct kauth_acl_eval {
 	int			ae_options;
 #define KAUTH_AEVAL_IS_OWNER	(1<<0)		/* authorizing operation for owner */
 #define KAUTH_AEVAL_IN_GROUP	(1<<1)		/* authorizing operation for groupmember */
+#define KAUTH_AEVAL_IN_GROUP_UNKNOWN	(1<<2)		/* authorizing operation for unknown group membership */
 	/* expansions for 'generic' rights bits */
 	kauth_ace_rights_t	ae_exp_gall;
 	kauth_ace_rights_t	ae_exp_gread;
diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h
index 9f7b789c9..393c413df 100644
--- a/bsd/sys/kdebug.h
+++ b/bsd/sys/kdebug.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -47,6 +47,11 @@ __BEGIN_DECLS
 #include <kdebug.h>
 #endif /* KERNEL_BUILD */
 
+#ifdef	XNU_KERNEL_PRIVATE
+#include <stdint.h>
+#include <mach/branch_predicates.h>
+#endif
+
 /*
  * types of faults that vm_fault handles
  * and creates trace entries for
@@ -77,21 +82,22 @@ __BEGIN_DECLS
 
 
 /* The Kernel Debug Classes  */
-#define DBG_MACH			1
-#define DBG_NETWORK			2	
-#define DBG_FSYSTEM			3
-#define DBG_BSD				4
-#define DBG_IOKIT			5
-#define DBG_DRIVERS			6
-#define DBG_TRACE           7
+#define DBG_MACH		1
+#define DBG_NETWORK		2	
+#define DBG_FSYSTEM		3
+#define DBG_BSD			4
+#define DBG_IOKIT		5
+#define DBG_DRIVERS		6
+#define DBG_TRACE           	7
 #define DBG_DLIL	        8
 #define DBG_SECURITY		9
-#define DBG_MISC			20
-#define DBG_DYLD            31
-#define DBG_QT              32
-#define DBG_APPS            33
-#define DBG_LAUNCHD         34
-#define DBG_MIG				255
+#define DBG_CORESTORAGE		10
+#define DBG_MISC		20
+#define DBG_DYLD           	31
+#define DBG_QT              	32
+#define DBG_APPS            	33
+#define DBG_LAUNCHD         	34
+#define DBG_MIG			255
 
 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */
 #define	DBG_MACH_EXCP_KTRAP_x86	0x02	/* Kernel Traps on x86 */
@@ -114,6 +120,7 @@ __BEGIN_DECLS
 #define	DBG_MACH_MSGID_INVALID	0x50	/* Messages - invalid */
 #define DBG_MACH_LOCKS		0x60	/* new lock APIs */
 #define DBG_MACH_PMAP		0x70	/* pmap */
+#define DBG_MACH_MP		0x90	/* MP related */
 
 /* Codes for Scheduler (DBG_MACH_SCHED) */     
 #define MACH_SCHED              0x0     /* Scheduler */
@@ -127,6 +134,16 @@ __BEGIN_DECLS
 #define	MACH_DEMOTE             0x8	/* promotion undone */
 #define MACH_IDLE               0x9	/* processor idling */
 #define MACH_STACK_DEPTH        0xa	/* stack depth at switch */
+#define MACH_MOVED              0xb	/* did not use original scheduling decision */
+#define MACH_FAIRSHARE_ENTER    0xc	/* move to fairshare band */
+#define MACH_FAIRSHARE_EXIT     0xd	/* exit fairshare band */
+#define MACH_FAILSAFE		0xe	/* tripped fixed-pri/RT failsafe */
+#define	MACH_GET_URGENCY	0x14	/* Urgency queried by platform */
+#define	MACH_URGENCY		0x15	/* Urgency (RT/BG/NORMAL) communicated
+					 * to platform */
+#define	MACH_REDISPATCH		0x16	/* "next thread" thread redispatched */
+#define	MACH_REMOTE_AST		0x17	/* AST signal issued to remote processor */
+#define	MACH_SCHED_LPA_BROKEN	0x18	/* last_processor affinity broken in choose_processor */
 
 /* Codes for pmap (DBG_MACH_PMAP) */     
 #define PMAP__CREATE		0x0
@@ -176,31 +193,32 @@ __BEGIN_DECLS
 #define DBG_IOMCURS			5	/* Memory Cursor */
 #define DBG_IOMDESC			6	/* Memory Descriptors */
 #define DBG_IOPOWER			7	/* Power Managerment */
-#define DBG_IOSERVICE			8	/* Matching etc. */
+#define DBG_IOSERVICE		8	/* Matching etc. */
 
 /* **** 9-32 reserved for internal IOKit usage **** */
 
 #define DBG_IOSTORAGE		32	/* Storage layers */
 #define	DBG_IONETWORK		33	/* Network layers */
 #define	DBG_IOKEYBOARD		34	/* Keyboard */
-#define	DBG_IOHID			35	/* HID Devices */
-#define	DBG_IOAUDIO			36	/* Audio */
+#define	DBG_IOHID		35	/* HID Devices */
+#define	DBG_IOAUDIO		36	/* Audio */
 #define	DBG_IOSERIAL		37	/* Serial */
-#define	DBG_IOTTY			38	/* TTY layers */
-#define DBG_IOSAM			39	/* SCSI Architecture Model layers */
-#define DBG_IOPARALLELATA   40	/* Parallel ATA */
+#define	DBG_IOTTY		38	/* TTY layers */
+#define DBG_IOSAM		39	/* SCSI Architecture Model layers */
+#define DBG_IOPARALLELATA   	40	/* Parallel ATA */
 #define DBG_IOPARALLELSCSI	41	/* Parallel SCSI */
-#define DBG_IOSATA			42	/* Serial-ATA */
-#define DBG_IOSAS			43	/* SAS */
+#define DBG_IOSATA		42	/* Serial-ATA */
+#define DBG_IOSAS		43	/* SAS */
 #define DBG_IOFIBRECHANNEL	44	/* FiberChannel */
-#define DBG_IOUSB			45	/* USB */
+#define DBG_IOUSB		45	/* USB */
 #define DBG_IOBLUETOOTH		46	/* Bluetooth */
 #define DBG_IOFIREWIRE		47	/* FireWire */
 #define DBG_IOINFINIBAND	48	/* Infiniband */
-#define DBG_IOCPUPM			49	/* CPU Power Management */
+#define DBG_IOCPUPM		49	/* CPU Power Management */
 #define DBG_IOGRAPHICS		50	/* Graphics */
 #define DBG_HIBERNATE		51	/* hibernation related events */
 
+
 /* Backwards compatibility */
 #define	DBG_IOPOINTING		DBG_IOHID			/* OBSOLETE: Use DBG_IOHID instead */
 #define DBG_IODISK			DBG_IOSTORAGE		/* OBSOLETE: Use DBG_IOSTORAGE instead */
@@ -223,7 +241,7 @@ __BEGIN_DECLS
 #define DBG_DRVFIREWIRE		16	/* FireWire */
 #define DBG_DRVINFINIBAND	17	/* Infiniband */
 #define DBG_DRVGRAPHICS		18  /* Graphics */
-#define DBG_DRVSD			19  /* Secure Digital */
+#define DBG_DRVSD		19 	/* Secure Digital */
 
 /* Backwards compatibility */
 #define	DBG_DRVPOINTING		DBG_DRVHID		/* OBSOLETE: Use DBG_DRVHID instead */
@@ -236,7 +254,7 @@ __BEGIN_DECLS
 #define DBG_DLIL_PR_FLT 4       /* DLIL Protocol Filter */
 #define DBG_DLIL_IF_FLT 5       /* DLIL Interface FIlter */
 
-/* The Kernel Debug Sub Classes for File System */
+/* The Kernel Debug Sub Classes for File System (DBG_FSYSTEM) */
 #define DBG_FSRW      1       /* reads and writes to the filesystem */
 #define DBG_DKRW      2       /* reads and writes to the disk */
 #define DBG_FSVN      3       /* vnode operations (inc. locking/unlocking) */
@@ -244,6 +262,7 @@ __BEGIN_DECLS
 #define DBG_JOURNAL   5       /* journaling operations */
 #define DBG_IOCTL     6       /* ioctl to the disk */
 #define DBG_BOOTCACHE 7       /* bootcache operations */
+#define DBG_HFS       8       /* HFS-specific events; see bsd/hfs/hfs_kdebug.h */
 
 /* The Kernel Debug Sub Classes for BSD */
 #define DBG_BSD_PROC		0x01	/* process/signals related */
@@ -256,11 +275,15 @@ __BEGIN_DECLS
 /* The Codes for BSD subcode class DBG_BSD_PROC */
 #define BSD_PROC_EXIT		1	/* process exit */
 #define BSD_PROC_FRCEXIT 	2	/* Kernel force termination */
+
 /* The Kernel Debug Sub Classes for DBG_TRACE */
 #define DBG_TRACE_DATA      0
 #define DBG_TRACE_STRING    1
 #define	DBG_TRACE_INFO	    2
 
+/* The Kernel Debug Sub Classes for DBG_CORESTORAGE */
+#define DBG_CS_IO	0
+
 /* The Kernel Debug Sub Classes for DBG_MISC */
 #define DBG_EVENT	0x10
 #define	DBG_BUFFER	0x20
@@ -274,6 +297,8 @@ __BEGIN_DECLS
 #define DKIO_ASYNC	0x04
 #define DKIO_META	0x08
 #define DKIO_PAGING	0x10
+#define DKIO_THROTTLE	0x20
+#define DKIO_PASSIVE	0x40
 
 /* Codes for Application Sub Classes */
 #define DBG_APP_SAMBA	128
@@ -343,25 +368,38 @@ extern unsigned int kdebug_enable;
 #define KDEBUG_ENABLE_CHUD    0x4
 
 #if	(!defined(NO_KDEBUG))
-
+#ifdef	XNU_KERNEL_PRIVATE
 #define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e)				\
 do {									\
-    if (kdebug_enable)							\
+	if (__improbable(kdebug_enable))					\
         kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,		\
 		       (uintptr_t)d,(uintptr_t)e);			\
 } while(0)
 
 #define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e)				\
 do {									\
-    if (kdebug_enable)							\
+	if (__improbable(kdebug_enable))					\
         kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,		\
 			(uintptr_t)d,(uintptr_t)e);			\
 } while(0)
+#else	/* XNU_KERNEL_PRIVATE */
+#define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e)				\
+do {									\
+	if (kdebug_enable)						\
+        kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,		\
+		       (uintptr_t)d,(uintptr_t)e);			\
+} while(0)
 
-#else
-
-#define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e)
-#define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e)
+#define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e)				\
+do {									\
+	if (kdebug_enable)						\
+        kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c,		\
+			(uintptr_t)d,(uintptr_t)e);			\
+} while(0)
+#endif /* XNU_KERNEL_PRIVATE */
+#else /*!NO_KDEBUG */
+#define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) do { } while(0)
+#define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e) do { } while(0)
 
 #define __kdebug_constant_only __unused
 #endif
@@ -384,23 +422,37 @@ extern void kernel_debug1(
 
 
 #if	(KDEBUG && (!defined(NO_KDEBUG)))
-
+#ifdef	XNU_KERNEL_PRIVATE
 #define KERNEL_DEBUG(x,a,b,c,d,e)					\
 do {									\
-    if (kdebug_enable)							\
+	if (__improbable(kdebug_enable))				\
         kernel_debug((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,		\
 		     (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);		\
 } while(0)
 
 #define KERNEL_DEBUG1(x,a,b,c,d,e)					\
 do {									\
-    if (kdebug_enable)							\
+	if (__improbable(kdebug_enable))				\
         kernel_debug1((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,		\
 		      (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);	\
 } while(0)
 
 #define __kdebug_only
+#else /* !XNU_KERNEL_PRIVATE */
+#define KERNEL_DEBUG(x,a,b,c,d,e)					\
+do {									\
+	if (kdebug_enable)						\
+        kernel_debug((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,		\
+		     (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);		\
+} while(0)
 
+#define KERNEL_DEBUG1(x,a,b,c,d,e)					\
+do {									\
+	if (kdebug_enable)						\
+        kernel_debug1((uint32_t)x,  (uintptr_t)a, (uintptr_t)b,		\
+		      (uintptr_t)c, (uintptr_t)d, (uintptr_t)e);	\
+} while(0)
+#endif /* XNU_KERNEL_PRIVATE */
 #else
 
 #define KERNEL_DEBUG(x,a,b,c,d,e) do {} while (0)
@@ -410,6 +462,7 @@ do {									\
 #endif
 
 #ifdef KERNEL_PRIVATE
+#include <mach/boolean.h>
 struct proc;
 extern void kdbg_trace_data(struct proc *proc, long *arg_pid);
 
@@ -417,6 +470,19 @@ extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *a
 
 extern void kdbg_dump_trace_to_file(const char *);
 void start_kern_tracing(unsigned int);
+struct task;
+extern void kdbg_get_task_name(char*, int, struct task *task);
+void disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags);
+void enable_wrap(uint32_t old_slowcheck, boolean_t lostevents);
+void release_storage_unit(int cpu,  uint32_t storage_unit);
+int allocate_storage_unit(int cpu);
+
+void trace_handler_map_ctrl_page(uintptr_t addr, unsigned long ctrl_page_size, unsigned long storage_size, unsigned long kds_ptr_size);
+void trace_handler_map_bufinfo(uintptr_t addr, unsigned long size);
+void trace_handler_unmap_bufinfo(void);
+void trace_handler_map_buffer(int index, uintptr_t addr, unsigned long size);
+void trace_handler_unmap_buffer(int index);
+void trace_set_timebases(uint64_t tsc, uint64_t ns);
 #endif  /* KERNEL_PRIVATE */
 
 
@@ -446,7 +512,7 @@ typedef struct {
 
 #if !defined(__LP64__)
 #define KDBG_TIMESTAMP_MASK		0x00ffffffffffffffULL
-#define KDBG_CPU_MASK			0x0f00000000000000ULL
+#define KDBG_CPU_MASK			0xff00000000000000ULL
 #define KDBG_CPU_SHIFT			56
 static inline void
 kdbg_set_cpu(kd_buf *kp, int cpu)
@@ -460,9 +526,9 @@ kdbg_get_cpu(kd_buf *kp)
 	return (int) (((kp)->timestamp & KDBG_CPU_MASK) >> KDBG_CPU_SHIFT);
 }
 static inline void
-kdbg_set_timestamp(kd_buf *kp, uint64_t time)
+kdbg_set_timestamp(kd_buf *kp, uint64_t thetime)
 {
-	kp->timestamp = time & KDBG_TIMESTAMP_MASK;
+	kp->timestamp = thetime & KDBG_TIMESTAMP_MASK;
 }
 static inline uint64_t
 kdbg_get_timestamp(kd_buf *kp)
@@ -470,9 +536,9 @@ kdbg_get_timestamp(kd_buf *kp)
 	return kp->timestamp & KDBG_TIMESTAMP_MASK;
 }
 static inline void
-kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t time, int cpu)
+kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t thetime, int cpu)
 {
-	kp->timestamp = (time & KDBG_TIMESTAMP_MASK) | 
+	kp->timestamp = (thetime & KDBG_TIMESTAMP_MASK) | 
 				(((uint64_t) cpu) << KDBG_CPU_SHIFT);
 }
 #else
@@ -488,9 +554,9 @@ kdbg_get_cpu(kd_buf *kp)
 	return kp->cpuid;
 }
 static inline void
-kdbg_set_timestamp(kd_buf *kp, uint64_t time)
+kdbg_set_timestamp(kd_buf *kp, uint64_t thetime)
 {
-	kp->timestamp = time;
+	kp->timestamp = thetime;
 }
 static inline uint64_t
 kdbg_get_timestamp(kd_buf *kp)
@@ -498,9 +564,9 @@ kdbg_get_timestamp(kd_buf *kp)
 	return kp->timestamp;
 }
 static inline void
-kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t time, int cpu)
+kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t thetime, int cpu)
 {
-	kdbg_set_timestamp(kp, time);
+	kdbg_set_timestamp(kp, thetime);
 	kdbg_set_cpu(kp, cpu);
 }
 #endif
@@ -541,6 +607,18 @@ typedef struct {
 	char		command[20];
 } kd_threadmap;
 
+
+typedef struct {
+	int             version_no;
+	int             thread_count;
+	uint64_t        TOD_secs;
+	uint32_t        TOD_usecs;
+} RAW_header;
+
+#define RAW_VERSION0	0x55aa0000
+#define RAW_VERSION1	0x55aa0101
+
+
 #define	KDBG_CLASSTYPE		0x10000
 #define	KDBG_SUBCLSTYPE		0x20000
 #define	KDBG_RANGETYPE		0x40000
diff --git a/bsd/sys/kern_control.h b/bsd/sys/kern_control.h
index 0e83895b4..4a5a411d6 100644
--- a/bsd/sys/kern_control.h
+++ b/bsd/sys/kern_control.h
@@ -252,6 +252,7 @@ typedef errno_t (*ctl_disconnect_func)(kern_ctl_ref kctlref, u_int32_t unit, voi
 	@param unitinfo The user-defined private data initialized by the
 		ctl_connect_func callback.
 	@param m The data sent by the client to the kernel control in an
+		mbuf chain. Your function is responsible for releasing the
 		mbuf chain.
 	@param flags The flags specified by the client when calling
 		send/sendto/sendmsg (MSG_OOB/MSG_DONTROUTE).
@@ -433,6 +434,11 @@ ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m, u_int32_t flags)
 errno_t 
 ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space);
 
+#ifdef KERNEL_PRIVATE
+u_int32_t ctl_id_by_name(const char *name);
+errno_t ctl_name_by_id(u_int32_t id, char *out_name, size_t maxsize);
+#endif /* KERNEL_PRIVATE */
+
 __END_DECLS
 #endif /* KERNEL */
 
diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h
index b89337521..4a05a490f 100644
--- a/bsd/sys/kern_memorystatus.h
+++ b/bsd/sys/kern_memorystatus.h
@@ -52,7 +52,8 @@
 
 enum {
 	kMemoryStatusLevelNote = 1,
-	kMemoryStatusSnapshotNote = 2
+	kMemoryStatusSnapshotNote = 2,
+	kMemoryStatusHibernationNote = 3
 };
 
 enum {
@@ -109,19 +110,48 @@ typedef struct jetsam_snapshot {
 	jetsam_snapshot_entry_t entries[1];
 } jetsam_snapshot_t;
 
+typedef struct jetsam_hibernation_entry {
+ 	uint32_t pid;
+ 	uint32_t flags;
+ 	uint32_t pages;
+} jetsam_hibernation_entry_t;
+
+#endif /* !MACH_KERNEL_PRIVATE */
+
 enum {
-	kJetsamFlagsFrontmost =		(1 << 0),
-	kJetsamFlagsKilled =		(1 << 1),
-	kJetsamFlagsKilledHiwat =	(1 << 2)
+	kJetsamFlagsFrontmost =        (1 << 0),
+	kJetsamFlagsKilled =           (1 << 1),
+	kJetsamFlagsKilledHiwat =      (1 << 2),
+ 	kJetsamFlagsHibernated =       (1 << 3),
+ 	kJetsamFlagsKilledVnodes =     (1 << 4),
+ 	kJetsamFlagsKilledSwap =       (1 << 5),
+  	kJetsamFlagsThawed =           (1 << 6),
+  	kJetsamFlagsKilledVM =         (1 << 7),
+	kJetsamFlagsSuspForDiagnosis = (1 << 8)
 };
-#endif /* !MACH_KERNEL_PRIVATE */
 
 #ifdef KERNEL
 extern void kern_memorystatus_init(void) __attribute__((section("__TEXT, initcode")));
-extern int jetsam_kill_top_proc(void);
+extern int jetsam_kill_top_proc(boolean_t any, uint32_t reason);
 
 extern int kern_memorystatus_wakeup;
 extern int kern_memorystatus_level;
+extern unsigned int kern_memorystatus_delta;
+
+#ifdef CONFIG_FREEZE
+extern void kern_hibernation_init(void) __attribute__((section("__TEXT, initcode")));
+extern int kern_hibernation_wakeup;
+
+void kern_hibernation_on_pid_suspend(int pid);
+void kern_hibernation_on_pid_resume(int pid, task_t task);
+void kern_hibernation_on_pid_hibernate(int pid);
+#endif
+
+#if CONFIG_EMBEDDED
+#define VM_CHECK_MEMORYSTATUS do { vm_check_memorystatus(); } while(0)
+#else /*CONFIG_EMBEDDED*/
+#define VM_CHECK_MEMORYSTATUS do {} while(0)
+#endif
 
 #endif /* KERNEL */
 #endif /* SYS_KERN_MEMORYSTATUS_H */
diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h
index 00134b226..24239b9f4 100644
--- a/bsd/sys/kpi_mbuf.h
+++ b/bsd/sys/kpi_mbuf.h
@@ -55,6 +55,7 @@
 	@constant MBUF_EXT Indicates this mbuf has external data.
 	@constant MBUF_PKTHDR Indicates this mbuf has a packet header.
 	@constant MBUF_EOR Indicates this mbuf is the end of a record.
+	@constant MBUF_LOOP Indicates this packet is looped back.
 	@constant MBUF_BCAST Indicates this packet will be sent or was
 		received as a brodcast.
 	@constant MBUF_MCAST Indicates this packet will be sent or was
@@ -72,13 +73,15 @@ enum {
 	MBUF_EXT	= 0x0001,	/* has associated external storage */
 	MBUF_PKTHDR	= 0x0002,	/* start of record */
 	MBUF_EOR	= 0x0004,	/* end of record */
+	MBUF_LOOP	= 0x0040,	/* packet is looped back */
 
 	MBUF_BCAST	= 0x0100,	/* send/received as link-level broadcast */
 	MBUF_MCAST	= 0x0200,	/* send/received as link-level multicast */
 	MBUF_FRAG	= 0x0400,	/* packet is a fragment of a larger packet */
 	MBUF_FIRSTFRAG	= 0x0800,	/* packet is first fragment */
 	MBUF_LASTFRAG	= 0x1000,	/* packet is last fragment */
-	MBUF_PROMISC	= 0x2000	/* packet is promiscuous */
+	MBUF_PROMISC	= 0x2000,	/* packet is promiscuous */
+	MBUF_HASFCS	= 0x4000	/* packet has FCS */
 };
 typedef u_int32_t mbuf_flags_t;
 
@@ -145,6 +148,10 @@ typedef u_int32_t mbuf_type_t;
 		calculated yet.
 	@constant MBUF_CSUM_REQ_UDP Indicates the UDP checksum has not been
 		calculated yet.
+	@constant MBUF_CSUM_REQ_TCPIPV6 Indicates the TCP checksum for IPv6
+       		has not been calculated yet.
+	@constant MBUF_CSUM_REQ_UDPIPV6 Indicates the UDP checksum for IPv6
+		has not been calculated yet.
 */
 enum {
 	MBUF_TSO_IPV4		= 0x100000,
@@ -158,7 +165,9 @@ enum {
 #endif /* KERNEL_PRIVATE */
 	MBUF_CSUM_REQ_IP	= 0x0001,
 	MBUF_CSUM_REQ_TCP	= 0x0002,
-	MBUF_CSUM_REQ_UDP	= 0x0004
+	MBUF_CSUM_REQ_UDP	= 0x0004,
+	MBUF_CSUM_REQ_TCPIPV6	= 0x0020,
+	MBUF_CSUM_REQ_UDPIPV6	= 0x0040
 };
 typedef u_int32_t mbuf_csum_request_flags_t;
 
@@ -178,7 +187,7 @@ typedef u_int32_t mbuf_csum_request_flags_t;
 		hardware should be passed as the second parameter of
 		mbuf_set_csum_performed. The hardware calculated checksum value
 		can be retrieved using the second parameter passed to
-		mbuf_get_csum_performed.
+		mbuf_get_csum_performed. This should be done for IPv4 or IPv6.
 	@constant MBUF_CSUM_PSEUDO_HDR If set, this indicates that the
 		checksum value for MBUF_CSUM_DID_DATA includes the pseudo header
 		value. If this is not set, the stack will calculate the pseudo
@@ -1183,6 +1192,15 @@ extern u_int32_t mbuf_get_mlen(void);
  */
 extern u_int32_t mbuf_get_mhlen(void);
 
+/*!
+	@function mbuf_get_minclsize
+	@discussion This routine returns the minimum number of data bytes
+		before an external cluster is used.  This is equivalent to the
+		legacy MINCLSIZE macro.
+	@result	The minimum number of bytes before a cluster will be used.
+ */
+extern u_int32_t mbuf_get_minclsize(void);
+
 /*!
 	@function mbuf_clear_csum_performed
 	@discussion Clears the hardware checksum flags and values.
@@ -1330,32 +1348,8 @@ extern void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id,
  */
 extern void mbuf_stats(struct mbuf_stat *stats);
 
-#ifdef KERNEL_PRIVATE
-/*
-	@enum mbuf_priority_t
-	@abstract Priority of a packet.
-	@discussion Some mbufs represent packets containing application data.
-		The priority of the application data is represented by the
-		mbuf priority, as determined by the system.
-	@constant MBUF_PRIORITY_NORMAL Indicates the packet contains
-		normal priority data.
-	@constant MBUF_PRIORITY_BACKGROUND Indicates the packet contains
-		background priority data.
- */
-typedef enum {
-	MBUF_PRIORITY_NORMAL		= 0,
-	MBUF_PRIORITY_BACKGROUND	= 1
-} mbuf_priority_t;
-
-/*
-	@function mbuf_get_priority
-	@discussion Get the priority value of the packet.
-	@param mbuf The mbuf to obtain the priority value from.
-	@result The priority value of the packet.
- */
-extern mbuf_priority_t mbuf_get_priority(mbuf_t mbuf);
 
-/*
+/*!
 	@enum mbuf_traffic_class_t
 	@abstract Traffic class of a packet
 	@discussion Property that represent the category of traffic of a packet. 
@@ -1367,15 +1361,19 @@ extern mbuf_priority_t mbuf_get_priority(mbuf_t mbuf);
 */
 typedef enum {
 #ifdef XNU_KERNEL_PRIVATE
-	MBUF_TC_NONE	= -1,
+	MBUF_TC_UNSPEC	= -1,		/* Internal: not specified */
 #endif
 	MBUF_TC_BE 		= 0,
 	MBUF_TC_BK		= 1,
 	MBUF_TC_VI		= 2,
 	MBUF_TC_VO		= 3
+#ifdef XNU_KERNEL_PRIVATE
+        ,
+	MBUF_TC_MAX		= 4	/* Internal: traffic class count */
+#endif
 } mbuf_traffic_class_t;
 
-/*
+/*!
 	@function mbuf_get_traffic_class
 	@discussion Get the traffic class of an mbuf packet
 	@param mbuf The mbuf to get the traffic class of.
@@ -1383,7 +1381,7 @@ typedef enum {
 */
 extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
 
-/*
+/*!
 	@function mbuf_set_traffic_class
 	@discussion Set the traffic class of an mbuf packet.
 	@param mbuf The mbuf to set the traffic class on.
@@ -1391,7 +1389,6 @@ extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf);
 	@result 0 on success, EINVAL if bad paramater is passed
 */
 extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc);
-#endif /* KERNEL_PRIVATE */
 
 /* IF_QUEUE interaction */
 
diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h
index 5e380f5a7..5f2093369 100644
--- a/bsd/sys/kpi_socket.h
+++ b/bsd/sys/kpi_socket.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2011 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -56,7 +56,12 @@ struct timeval;
 		Calls to your upcall function are not serialized and may be
 		called concurrently from multiple threads in the kernel.
 
-		Your upcall function will be called when:
+		Your upcall function will be called: 
+		    when there is data more than the low water mark for reading, 
+		    or when there is space for a write,
+		    or when there is a connection to accept,
+		    or when a socket is connected,
+		    or when a socket is closed or disconnected
 
 	@param so A reference to the socket that's ready.
 	@param cookie The cookie passed in when the socket was created.
@@ -227,11 +232,16 @@ extern errno_t sock_settclassopt(socket_t so, const void* optval, size_t optlen)
 */
 extern errno_t sock_gettclassopt(socket_t so, void* optval, size_t* optlen);
 
+#ifdef XNU_KERNEL_PRIVATE
+extern void socket_set_traffic_mgt_flags_locked(socket_t so, u_int32_t flags);
+extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int32_t flags);
+#endif /* XNU_KERNEL_PRIVATE */
 #ifdef BSD_KERNEL_PRIVATE
 extern void socket_set_traffic_mgt_flags(socket_t so, u_int32_t flags);
 extern void socket_clear_traffic_mgt_flags(socket_t so, u_int32_t flags);
+extern errno_t socket_defunct(struct proc *, socket_t so, int);
 #endif /* BSD_KERNEL_PRIVATE */
-#endif
+#endif /* KERNEL_PRIVATE */
 
 /*!
 	@function sock_listen
@@ -473,6 +483,22 @@ extern errno_t sock_getaddr(socket_t so, struct sockaddr **psockname,
 	@param sockname The socket name to be freed.
  */
 extern void sock_freeaddr(struct sockaddr *sockname);
+
+/*
+	@function sock_setupcall
+	@discussion Set the notifier function to be called when an event
+		occurs on the socket. This may be set to NULL to disable 
+		further notifications. Setting the function does not 
+		affect currently notifications about to be sent or being sent.
+		Note: When this function is used on a socket passed from userspace 
+		it is crucial to call sock_retain() on the socket otherwise a callback 
+		could be dispatched on a closed socket and cause a crash.
+	@param sock The socket.
+	@param callback The notifier function
+	@param context A cookie passed directly to the callback
+*/
+extern errno_t sock_setupcall(socket_t sock, sock_upcall callback, void* context);
+
 #endif /* KERNEL_PRIVATE */
 
 __END_DECLS
diff --git a/bsd/sys/make_posix_availability.sh b/bsd/sys/make_posix_availability.sh
new file mode 100755
index 000000000..5aa58b364
--- /dev/null
+++ b/bsd/sys/make_posix_availability.sh
@@ -0,0 +1,71 @@
+#! /bin/sh -
+#
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this
+# file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+
+POSIX_VALUES="198808L 199009L 199209L 199309L 199506L 200112L 200809L"
+
+{
+cat <<EOF
+/* Copyright (c) 2010 Apple Inc. All rights reserved.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _CDEFS_H_
+# error "Never use <sys/_posix_availability.h> directly.  Use <sys/cdefs.h> instead."
+#endif
+
+EOF
+
+for value in ${POSIX_VALUES} ; do
+    echo "#if !defined(_DARWIN_C_SOURCE) && defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= ${value}"     
+    echo "#define ___POSIX_C_DEPRECATED_STARTING_${value} __deprecated"
+    echo "#else"
+    echo "#define ___POSIX_C_DEPRECATED_STARTING_${value}"
+    echo "#endif"
+    echo
+done
+} > $1
+
diff --git a/bsd/sys/make_symbol_aliasing.sh b/bsd/sys/make_symbol_aliasing.sh
new file mode 100755
index 000000000..fa5f0e33c
--- /dev/null
+++ b/bsd/sys/make_symbol_aliasing.sh
@@ -0,0 +1,86 @@
+#! /bin/bash -
+#
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this
+# file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+
+{
+cat <<EOF
+/* Copyright (c) 2010 Apple Inc. All rights reserved.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _CDEFS_H_
+# error "Never use <sys/_symbol_aliasing.h> directly.  Use <sys/cdefs.h> instead."
+#endif
+
+EOF
+
+for ver in $(${SDKROOT}/usr/local/libexec/availability.pl --ios) ; do
+    ver_major=${ver%.*}
+    ver_minor=${ver#*.}
+    value=$(printf "%d%02d00" ${ver_major} ${ver_minor})
+    str=$(printf "__IPHONE_%d_%d" ${ver_major} ${ver_minor})
+    echo "#if defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ >= ${value}"
+    echo "#define __DARWIN_ALIAS_STARTING_IPHONE_${str}(x) x"
+    echo "#else"
+    echo "#define __DARWIN_ALIAS_STARTING_IPHONE_${str}(x)"
+    echo "#endif"
+    echo ""
+done
+
+for ver in $(${SDKROOT}/usr/local/libexec/availability.pl --macosx) ; do
+    ver_major=${ver%.*}
+    ver_minor=${ver#*.}
+    value=$(printf "%d%d0" ${ver_major} ${ver_minor})
+    str=$(printf "__MAC_%d_%d" ${ver_major} ${ver_minor})
+    echo "#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= ${value}"
+    echo "#define __DARWIN_ALIAS_STARTING_MAC_${str}(x) x"
+    echo "#else"
+    echo "#define __DARWIN_ALIAS_STARTING_MAC_${str}(x)"
+    echo "#endif"
+    echo ""
+done
+} > $1
+
diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h
index dcbaaded7..4e8688735 100644
--- a/bsd/sys/malloc.h
+++ b/bsd/sys/malloc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -179,7 +179,7 @@
 #define M_IP6NDP	86	/* IPv6 Neighbour Discovery*/
 #define M_IP6OPT	87	/* IPv6 options management */
 #define M_IP6MISC	88	/* IPv6 misc. memory */
-#define M_TSEGQ		89	/* TCP segment queue entry */
+#define M_TSEGQ		89	/* TCP segment queue entry, unused */
 #define M_IGMP		90
 #define M_JNL_JNL   91  /* Journaling: "struct journal" */
 #define M_JNL_TR    92  /* Journaling: "struct transaction" */ 
@@ -204,8 +204,13 @@
 #if HFS_COMPRESSION
 #define M_DECMPFS_CNODE	109	/* decmpfs cnode structures */
 #endif /* HFS_COMPRESSION */
+#define M_INMFILTER	110	/* IPv4 multicast PCB-layer source filter */
+#define M_IPMSOURCE	111	/* IPv4 multicast IGMP-layer source filter */
+#define	M_IN6MFILTER	112	/* IPv6 multicast PCB-layer source filter */
+#define	M_IP6MOPTS	113	/* IPv6 multicast options */
+#define	M_IP6MSOURCE	114	/* IPv6 multicast MLD-layer source filter */
 
-#define	M_LAST		110	/* Must be last type + 1 */
+#define	M_LAST		115	/* Must be last type + 1 */
 
 #else /* BSD_KERNEL_PRIVATE */
 
@@ -253,6 +258,9 @@ extern struct kmemstats kmemstats[];
 #define FREE(addr, type) \
 	_FREE((void *)addr, type)
 
+#define	REALLOC(space, cast, addr, size, type, flags) \
+	(space) = (cast)_REALLOC(addr, size, type, flags)
+
 #define MALLOC_ZONE(space, cast, size, type, flags) \
 	(space) = (cast)_MALLOC_ZONE(size, type, flags)
 
@@ -268,6 +276,12 @@ extern void	_FREE(
 			void		*addr,
 			int		type);
 
+extern void	*_REALLOC(
+			void		*addr,
+			size_t		size,
+			int		type,
+			int		flags);
+
 extern void	*_MALLOC_ZONE(
 			size_t		size,
 			int		type,
diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h
index 247d7bb71..f0d45c565 100644
--- a/bsd/sys/mbuf.h
+++ b/bsd/sys/mbuf.h
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,12 +22,12 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/* 
+/*
  * Mach Operating System
  * Copyright (c) 1987 Carnegie-Mellon University
  * All rights reserved.  The CMU software License Agreement specifies
@@ -68,11 +68,6 @@
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.3 (Berkeley) 1/21/94
- **********************************************************************
- * HISTORY
- * 20-May-95  Mac Gillon (mgillon) at NeXT
- *	New version based on 4.4
- *	Purged old history
  */
 /*
  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
@@ -87,7 +82,7 @@
 #include <sys/cdefs.h>
 #include <sys/appleapiopts.h>
 
-#ifdef KERNEL_PRIVATE
+#ifdef XNU_KERNEL_PRIVATE
 
 #include <sys/lock.h>
 #include <sys/queue.h>
@@ -99,18 +94,10 @@
 /*
  * Mbufs are of a single size, MSIZE (machine/param.h), which
  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
- * MCLBYTES (also in machine/param.h), which has no additional overhead
- * and is used instead of the internal data area; this is done when
- * at least MINCLSIZE of data must be stored.
- */
-
-/*
- * These macros are mapped to the appropriate KPIs, so that private code
- * can be simply recompiled in order to be forward-compatible with future
- * changes toward the struture sizes.
+ * MCLBYTES/MBIGCLBYTES/M16KCLBYTES (also in machine/param.h), which has
+ * no additional overhead and is used instead of the internal data area;
+ * this is done when at least MINCLSIZE of data must be stored.
  */
-#define	MLEN		mbuf_get_mlen()		/* normal data len */
-#define	MHLEN		mbuf_get_mhlen()	/* data len w/pkthdr */
 
 /*
  * The following _MLEN and _MHLEN macros are private to xnu.  Private code
@@ -120,24 +107,31 @@
 #define	_MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
 #define	_MHLEN		(_MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
 
-#define	MINCLSIZE	(MHLEN + MLEN)	/* smallest amount to put in cluster */
-#define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
+#define	NMBPBGSHIFT	(MBIGCLSHIFT - MSIZESHIFT)
+#define	NMBPBG		(1 << NMBPBGSHIFT)	/* # of mbufs per big cl */
 
-#define NMBPCL		(sizeof(union mcluster) / sizeof(struct mbuf))
+#define	NCLPBGSHIFT	(MBIGCLSHIFT - MCLSHIFT)
+#define	NCLPBG		(1 << NCLPBGSHIFT)	/* # of cl per big cl */
+
+#define	NMBPCLSHIFT	(NMBPBGSHIFT - NCLPBGSHIFT)
+#define	NMBPCL		(1 << NMBPCLSHIFT)	/* # of mbufs per cl */
+
+#define	NCLPJCLSHIFT	((M16KCLSHIFT - MBIGCLSHIFT) + NCLPBGSHIFT)
+#define	NCLPJCL		(1 << NCLPJCLSHIFT)	/* # of cl per jumbo cl */
 
 /*
  * Macros for type conversion
  * mtod(m,t) -	convert mbuf pointer to data pointer of correct type
  * dtom(x) -	convert data pointer within mbuf to mbuf pointer (XXX)
  */
-#define mtod(m,t)       ((t)m_mtod(m))
-#define dtom(x)         m_dtom(x)
+#define	mtod(m, t)	((t)m_mtod(m))
+#define	dtom(x)		m_dtom(x)
 
 /* header at beginning of each mbuf: */
 struct m_hdr {
 	struct	mbuf *mh_next;		/* next buffer in chain */
 	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
-	int32_t     mh_len;		/* amount of data in this mbuf */
+	int32_t mh_len;			/* amount of data in this mbuf */
 	caddr_t	mh_data;		/* location of data */
 	short	mh_type;		/* type of data in this mbuf */
 	short	mh_flags;		/* flags; see below */
@@ -147,10 +141,29 @@ struct m_hdr {
  * Packet tag structure (see below for details).
  */
 struct m_tag {
+	u_int64_t		m_tag_cookie;	/* Error checking */
+#ifndef __LP64__
+	u_int32_t		pad;		/* For structure alignment */
+#endif /* !__LP64__ */
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
-	u_int16_t			m_tag_type;	/* Module specific type */
-	u_int16_t			m_tag_len;	/* Length of data */
-	u_int32_t			m_tag_id;	/* Module ID */
+	u_int16_t		m_tag_type;	/* Module specific type */
+	u_int16_t		m_tag_len;	/* Length of data */
+	u_int32_t		m_tag_id;	/* Module ID */
+};
+
+#ifdef __LP64__
+#define	M_TAG_ALIGN(len) \
+	P2ROUNDUP(len, sizeof (u_int64_t)) + sizeof (struct m_tag)
+#else
+#define	M_TAG_ALIGN(len) \
+	P2ROUNDUP(len, sizeof (u_int32_t)) + sizeof (struct m_tag)
+#endif /* !__LP64__ */
+
+#define	M_TAG_VALID_PATTERN	0xfeedfacefeedfaceULL
+#define	M_TAG_FREE_PATTERN	0xdeadbeefdeadbeefULL
+
+struct m_taghdr {
+	u_int64_t		refcnt;		/* Number of tags in this mbuf */
 };
 
 /* record/packet header in first mbuf of chain; valid if M_PKTHDR set */
@@ -160,14 +173,14 @@ struct	pkthdr {
 
 	/* variables for ip and tcp reassembly */
 	void	*header;		/* pointer to packet header */
-        /* variables for hardware checksum */
-    	/* Note: csum_flags is used for hardware checksum and VLAN */
-        int     csum_flags;             /* flags regarding checksum */       
-        int     csum_data;              /* data field used by csum routines */
+	/* variables for hardware checksum */
+	/* Note: csum_flags is used for hardware checksum and VLAN */
+	int	csum_flags;		/* flags regarding checksum */
+	int	csum_data;		/* data field used by csum routines */
 	u_int	tso_segsz;		/* TSO segment size (actual MSS) */
 	u_short	vlan_tag;		/* VLAN tag, host byte order */
 	u_short socket_id;		/* socket id */
-        SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
+	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 #if PF_PKTHDR
 	/*
 	 * Be careful; {en,dis}abling PF_PKTHDR will require xnu recompile;
@@ -176,16 +189,16 @@ struct	pkthdr {
 	 */
 	struct pf_mtag pf_mtag;
 #endif /* PF_PKTHDR */
-#if PKT_PRIORITY
 	u_int32_t prio;			/* packet priority */
-#endif /* PKT_PRIORITY */
+	u_short	vt_nrecs;		/* # of IGMPv3 records in this chain */
+	u_short _pad;
 };
 
 
 /* description of external storage mapped into mbuf, valid if M_EXT set */
 struct m_ext {
 	caddr_t	ext_buf;		/* start of buffer */
-	void	(*ext_free)(caddr_t , u_int, caddr_t);	/* free routine if not the usual */
+	void	(*ext_free)(caddr_t, u_int, caddr_t);	/* free routine if not the usual */
 	u_int	ext_size;		/* size of buffer, for ext_free */
 	caddr_t	ext_arg;		/* additional ext_free argument */
 	struct	ext_refsq {		/* references held */
@@ -226,58 +239,71 @@ struct mbuf {
 #define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
 #define	m_dat		M_dat.M_databuf
 
-/* mbuf flags */
+/* mbuf flags (private) */
 #define	M_EXT		0x0001	/* has associated external storage */
 #define	M_PKTHDR	0x0002	/* start of record */
 #define	M_EOR		0x0004	/* end of record */
 #define	M_PROTO1	0x0008	/* protocol-specific */
 #define	M_PROTO2	0x0010	/* protocol-specific */
 #define	M_PROTO3	0x0020	/* protocol-specific */
-#define	M_PROTO4	0x0040	/* protocol-specific */
+#define	M_LOOP		0x0040	/* packet is looped back */
 #define	M_PROTO5	0x0080	/* protocol-specific */
 
-/* mbuf pkthdr flags, also in m_flags */
+/* mbuf pkthdr flags, also in m_flags (private) */
 #define	M_BCAST		0x0100	/* send/received as link-level broadcast */
 #define	M_MCAST		0x0200	/* send/received as link-level multicast */
 #define	M_FRAG		0x0400	/* packet is a fragment of a larger packet */
 #define	M_FIRSTFRAG	0x0800	/* packet is first fragment */
 #define	M_LASTFRAG	0x1000	/* packet is last fragment */
 #define	M_PROMISC	0x2000	/* packet is promiscuous (shouldn't go to stack) */
+#define	M_HASFCS	0x4000	/* packet has FCS */
+#define	M_TAGHDR	0x8000	/* m_tag hdr structure at top of mbuf data */
+
+/*
+ * Flags to purge when crossing layers.
+ */
+#define	M_PROTOFLAGS \
+	(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO5)
 
 /* flags copied when copying m_pkthdr */
-#define M_COPYFLAGS     (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO2|M_PROTO3 | \
-                            M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG | \
-                            M_FIRSTFRAG|M_LASTFRAG|M_PROMISC)
-
-/* flags indicating hw checksum support and sw checksum requirements [freebsd4.1]*/
-#define CSUM_IP                 0x0001          /* will csum IP */
-#define CSUM_TCP                0x0002          /* will csum TCP */
-#define CSUM_UDP                0x0004          /* will csum UDP */
-#define CSUM_IP_FRAGS           0x0008          /* will csum IP fragments */
-#define CSUM_FRAGMENT           0x0010          /* will do IP fragmentation */
-        
-#define CSUM_IP_CHECKED         0x0100          /* did csum IP */
-#define CSUM_IP_VALID           0x0200          /*   ... the csum is valid */
-#define CSUM_DATA_VALID         0x0400          /* csum_data field is valid */
-#define CSUM_PSEUDO_HDR         0x0800          /* csum_data has pseudo hdr */
-#define CSUM_TCP_SUM16          0x1000          /* simple TCP Sum16 computation */
- 
-#define CSUM_DELAY_DATA         (CSUM_TCP | CSUM_UDP)
-#define CSUM_DELAY_IP           (CSUM_IP)       /* XXX add ipv6 here too? */
+#define	M_COPYFLAGS							\
+	(M_PKTHDR|M_EOR|M_PROTO1|M_PROTO2|M_PROTO3 |			\
+	M_LOOP|M_PROTO5|M_BCAST|M_MCAST|M_FRAG |			\
+	M_FIRSTFRAG|M_LASTFRAG|M_PROMISC|M_HASFCS)
+
+/* flags indicating hw checksum support and sw checksum requirements [freebsd4.1] */
+#define	CSUM_IP			0x0001		/* will csum IP */
+#define	CSUM_TCP		0x0002		/* will csum TCP */
+#define	CSUM_UDP		0x0004		/* will csum UDP */
+#define	CSUM_IP_FRAGS		0x0008		/* will csum IP fragments */
+#define	CSUM_FRAGMENT		0x0010		/* will do IP fragmentation */
+#define	CSUM_TCPIPV6		0x0020		/* will csum TCP for IPv6 */
+#define	CSUM_UDPIPV6		0x0040		/* will csum UDP for IPv6 */
+#define	CSUM_FRAGMENT_IPV6	0x0080		/* will do IPv6 fragmentation */
+
+#define	CSUM_IP_CHECKED		0x0100		/* did csum IP */
+#define	CSUM_IP_VALID		0x0200		/*   ... the csum is valid */
+#define	CSUM_DATA_VALID		0x0400		/* csum_data field is valid */
+#define	CSUM_PSEUDO_HDR		0x0800		/* csum_data has pseudo hdr */
+#define	CSUM_TCP_SUM16		0x1000		/* simple TCP Sum16 computation */
+
+#define	CSUM_DELAY_DATA		(CSUM_TCP | CSUM_UDP)
+#define	CSUM_DELAY_IP		(CSUM_IP)	/* IPv4 only: no IPv6 IP cksum */
+#define	CSUM_DELAY_IPV6_DATA	(CSUM_TCPIPV6 | CSUM_UDPIPV6)
+#define	CSUM_DATA_IPV6_VALID    CSUM_DATA_VALID	/* csum_data field is valid */
 /*
  * Note: see also IF_HWASSIST_CSUM defined in <net/if_var.h>
  */
 /* bottom 16 bits reserved for hardware checksum */
-#define CSUM_CHECKSUM_MASK	0xffff
+#define	CSUM_CHECKSUM_MASK	0xffff
 
 /* VLAN tag present */
-#define CSUM_VLAN_TAG_VALID	0x10000		/* vlan_tag field is valid */
+#define	CSUM_VLAN_TAG_VALID	0x10000		/* vlan_tag field is valid */
 
 /* TCP Segment Offloading requested on this mbuf */
-#define CSUM_TSO_IPV4          	0x100000          /* This mbuf needs to be segmented by the NIC */
-#define CSUM_TSO_IPV6          	0x200000          /* This mbuf needs to be segmented by the NIC */
-#endif /* KERNEL_PRIVATE */
-
+#define	CSUM_TSO_IPV4		0x100000	/* This mbuf needs to be segmented by the NIC */
+#define	CSUM_TSO_IPV6		0x200000	/* This mbuf needs to be segmented by the NIC */
+#endif /* XNU_KERNEL_PRIVATE */
 
 /* mbuf types */
 #define	MT_FREE		0	/* should be on free list */
@@ -293,20 +319,12 @@ struct mbuf {
 #define	MT_FTABLE	11	/* fragment reassembly header */
 #define	MT_RIGHTS	12	/* access rights */
 #define	MT_IFADDR	13	/* interface address */
-#define MT_CONTROL	14	/* extra-data protocol message */
-#define MT_OOBDATA	15	/* expedited data  */
-#define MT_TAG          16      /* volatile metadata associated to pkts */
-#define MT_MAX		32	/* enough? */
-
-#ifdef KERNEL_PRIVATE
-
-/* flags to m_get/MGET */
-/* Need to include malloc.h to get right options for malloc  */
-#include	<sys/malloc.h>
-
-#define	M_DONTWAIT	M_NOWAIT
-#define	M_WAIT		M_WAITOK
+#define	MT_CONTROL	14	/* extra-data protocol message */
+#define	MT_OOBDATA	15	/* expedited data  */
+#define	MT_TAG		16	/* volatile metadata associated to pkts */
+#define	MT_MAX		32	/* enough? */
 
+#ifdef XNU_KERNEL_PRIVATE
 /*
  * mbuf allocation/deallocation macros:
  *
@@ -319,9 +337,9 @@ struct mbuf {
  */
 
 #if 1
-#define MCHECK(m) m_mcheck(m)
+#define	MCHECK(m) m_mcheck(m)
 #else
-#define MCHECK(m)
+#define	MCHECK(m)
 #endif
 
 #define	MGET(m, how, type) ((m) = m_get((how), (type)))
@@ -347,27 +365,27 @@ union mcluster {
 
 #define	MCLALLOC(p, how)	((p) = m_mclalloc(how))
 
-#define	MCLFREE(p)	m_mclfree(p)
+#define	MCLFREE(p)		m_mclfree(p)
 
-#define	MCLGET(m, how) 	((m) = m_mclget(m, how))
+#define	MCLGET(m, how)		((m) = m_mclget(m, how))
 
 /*
  * Mbuf big cluster
  */
-
 union mbigcluster {
 	union mbigcluster	*mbc_next;
-	char 			mbc_buf[NBPG];
+	char			mbc_buf[MBIGCLBYTES];
 };
 
-#define	M16KCLBYTES	(16 * 1024)
-
+/*
+ * Mbuf jumbo cluster
+ */
 union m16kcluster {
 	union m16kcluster	*m16kcl_next;
 	char			m16kcl_buf[M16KCLBYTES];
 };
 
-#define MCLHASREFERENCE(m) m_mclhasreference(m)
+#define	MCLHASREFERENCE(m)	m_mclhasreference(m)
 
 /*
  * MFREE(struct mbuf *m, struct mbuf *n)
@@ -388,14 +406,19 @@ union m16kcluster {
  * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
  * an object of the specified size at the end of the mbuf, longword aligned.
  */
-#define	M_ALIGN(m, len)				\
-	{ (m)->m_data += (MLEN - (len)) &~ (sizeof(long) - 1); }
+#define	M_ALIGN(m, len)							\
+do {									\
+	(m)->m_data += (MLEN - (len)) &~ (sizeof (long) - 1);		\
+} while (0)
+
 /*
  * As above, for mbufs allocated with m_gethdr/MGETHDR
  * or initialized by M_COPY_PKTHDR.
  */
-#define	MH_ALIGN(m, len) \
-	{ (m)->m_data += (MHLEN - (len)) &~ (sizeof(long) - 1); }
+#define	MH_ALIGN(m, len)						\
+do {									\
+	(m)->m_data += (MHLEN - (len)) &~ (sizeof (long) - 1);		\
+} while (0)
 
 /*
  * Compute the amount of space available
@@ -417,21 +440,84 @@ union m16kcluster {
  * If how is M_DONTWAIT and allocation fails, the original mbuf chain
  * is freed and m is set to NULL.
  */
-#define	M_PREPEND(m, plen, how) 	((m) = m_prepend_2((m), (plen), (how)))
+#define	M_PREPEND(m, plen, how)	((m) = m_prepend_2((m), (plen), (how)))
 
 /* change mbuf to new type */
-#define MCHTYPE(m, t) 		m_mchtype(m, t)
-
-/* length to m_copy to copy all */
-#define	M_COPYALL	1000000000
+#define	MCHTYPE(m, t)		m_mchtype(m, t)
 
 /* compatiblity with 4.3 */
-#define  m_copy(m, o, l)	m_copym((m), (o), (l), M_DONTWAIT)
+#define	m_copy(m, o, l)		m_copym((m), (o), (l), M_DONTWAIT)
 
 #define	MBSHIFT		20				/* 1MB */
+#define	MBSIZE		(1 << MBSHIFT)
 #define	GBSHIFT		30				/* 1GB */
+#define	GBSIZE		(1 << GBSHIFT)
 
-#endif /* KERNEL_PRIVATE */
+/*
+ * M_STRUCT_GET ensures that intermediate protocol header (from "off" to
+ * "len") is located in single mbuf, on contiguous memory region.
+ * The pointer to the region will be returned to pointer variable "val",
+ * with type "typ".
+ *
+ * M_STRUCT_GET0 does the same, except that it aligns the structure at
+ * very top of mbuf.  GET0 is likely to make memory copy than GET.
+ */
+#define	M_STRUCT_GET(val, typ, m, off, len)				\
+do {									\
+	struct mbuf *t;							\
+	int tmp;							\
+									\
+	if ((m)->m_len >= (off) + (len)) {				\
+		(val) = (typ)(mtod((m), caddr_t) + (off));		\
+	} else {							\
+		t = m_pulldown((m), (off), (len), &tmp);		\
+		if (t != NULL) {					\
+			if (t->m_len < tmp + (len))			\
+				panic("m_pulldown malfunction");	\
+			(val) = (typ)(mtod(t, caddr_t) + tmp);		\
+		} else {						\
+			(val) = (typ)NULL;				\
+			(m) = NULL;					\
+		}							\
+	}								\
+} while (0)
+
+#define	M_STRUCT_GET0(val, typ, m, off, len)				\
+do {									\
+	struct mbuf *t;							\
+									\
+	if ((off) == 0) {						\
+		(val) = (typ)mtod(m, caddr_t);				\
+	} else {							\
+		t = m_pulldown((m), (off), (len), NULL);		\
+		if (t != NULL) {					\
+			if (t->m_len < (len))				\
+				panic("m_pulldown malfunction");	\
+			(val) = (typ)mtod(t, caddr_t);			\
+		} else {						\
+			(val) = (typ)NULL;				\
+			(m) = NULL;					\
+		}							\
+	}								\
+} while (0)
+
+#define	MBUF_INPUT_CHECK(m, rcvif)					\
+do {									\
+	if (!(m->m_flags & MBUF_PKTHDR) ||				\
+	    m->m_len < 0 ||						\
+	    m->m_len > ((njcl > 0) ? njclbytes : MBIGCLBYTES) ||	\
+	    m->m_type == MT_FREE ||					\
+	    ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) {	\
+		panic("Failed mbuf validity check: mbuf %p len %d "	\
+		    "type %d flags 0x%x data %p rcvif %s%d ifflags 0x%x",  \
+		    m, m->m_len, m->m_type, m->m_flags,			   \
+		    ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : m->m_data), \
+		    rcvif->if_name, rcvif->if_unit,			\
+		    (rcvif->if_flags & 0xffff));			\
+	}								\
+} while (0)
+
+#endif /* XNU_KERNEL_PRIVATE */
 
 /*
  * Mbuf statistics (legacy).
@@ -481,7 +567,7 @@ struct ombstat {
  */
 #define	MAX_MBUF_CNAME	15
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 /* For backwards compatibility with 32-bit userland process */
 struct omb_class_stat {
 	char		mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */
@@ -506,7 +592,7 @@ struct omb_class_stat {
 	u_int32_t	mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */
 	u_int64_t	mbcl_reserved[4];    /* for future use */
 } __attribute__((__packed__));
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 typedef struct mb_class_stat {
 	char		mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */
@@ -540,13 +626,13 @@ typedef struct mb_class_stat {
 #define	MCS_PURGING	2	/* cache is being purged */
 #define	MCS_OFFLINE	3	/* cache is offline (resizing) */
 
-#if defined(KERNEL_PRIVATE)
+#if defined(XNU_KERNEL_PRIVATE)
 /* For backwards compatibility with 32-bit userland process */
 struct omb_stat {
 	u_int32_t		mbs_cnt;	/* number of classes */
 	struct omb_class_stat	mbs_class[1];	/* class array */
 } __attribute__((__packed__));
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 
 typedef struct mb_stat {
 	u_int32_t	mbs_cnt;	/* number of classes */
@@ -556,120 +642,199 @@ typedef struct mb_stat {
 	mb_class_stat_t	mbs_class[1];	/* class array */
 } mb_stat_t;
 
+#ifdef PRIVATE
+#define	MLEAK_STACK_DEPTH	16	/* Max PC stack depth */
+
+typedef struct mleak_trace_stat {
+	u_int64_t	mltr_collisions;
+	u_int64_t	mltr_hitcount;
+	u_int64_t	mltr_allocs;
+	u_int64_t	mltr_depth;
+	u_int64_t	mltr_addr[MLEAK_STACK_DEPTH];
+} mleak_trace_stat_t;
+
+typedef struct mleak_stat {
+	u_int32_t		ml_isaddr64;	/* 64-bit KVA? */
+	u_int32_t		ml_cnt;		/* number of traces */
+	mleak_trace_stat_t	ml_trace[1];	/* trace array */
+} mleak_stat_t;
+
+struct mleak_table {
+	u_int32_t mleak_capture;	/* sampling capture counter */
+	u_int32_t mleak_sample_factor;	/* sample factor */
+
+	/* Times two active records want to occupy the same spot */
+	u_int64_t alloc_collisions;
+	u_int64_t trace_collisions;
+
+	/* Times new record lands on spot previously occupied by freed alloc */
+	u_int64_t alloc_overwrites;
+	u_int64_t trace_overwrites;
+
+	/* Times a new alloc or trace is put into the hash table */
+	u_int64_t alloc_recorded;
+	u_int64_t trace_recorded;
+
+	/* Total number of outstanding allocs */
+	u_int64_t outstanding_allocs;
+
+	/* Times mleak_log returned false because couldn't acquire the lock */
+	u_int64_t total_conflicts;
+};
+#endif /* PRIVATE */
+
 #ifdef KERNEL_PRIVATE
+__BEGIN_DECLS
 
-#ifdef	KERNEL
-extern union 	mcluster *mbutl;	/* virtual address of mclusters */
-extern union 	mcluster *embutl;	/* ending virtual address of mclusters */
-extern struct 	mbstat mbstat;		/* statistics */
-extern unsigned int nmbclusters;	/* number of mapped clusters */
-extern int	njcl;			/* # of clusters for jumbo sizes */
-extern int	njclbytes;		/* size of a jumbo cluster */
-extern int	max_linkhdr;		/* largest link-level header */
-extern int	max_protohdr;		/* largest protocol header */
-extern int	max_hdr;		/* largest link+protocol header */
-extern int	max_datalen;		/* MHLEN - max_hdr */
+/*
+ * Exported (private)
+ */
+
+extern struct mbstat mbstat;			/* statistics */
+
+__END_DECLS
+#endif /* KERNEL_PRIVATE */
 
+#ifdef XNU_KERNEL_PRIVATE
 __BEGIN_DECLS
-/* Not exported */
-__private_extern__ unsigned int mbuf_default_ncl(int, uint64_t);
+
+/*
+ * Not exported (xnu private)
+ */
+
+/* flags to m_get/MGET */
+/* Need to include malloc.h to get right options for malloc  */
+#include	<sys/malloc.h>
+
+struct mbuf;
+
+/* length to m_copy to copy all */
+#define	M_COPYALL	1000000000
+
+#define	M_DONTWAIT	M_NOWAIT
+#define	M_WAIT		M_WAITOK
+
+/*
+ * These macros are mapped to the appropriate KPIs, so that private code
+ * can be simply recompiled in order to be forward-compatible with future
+ * changes toward the struture sizes.
+ */
+#define	MLEN		mbuf_get_mlen()		/* normal data len */
+#define	MHLEN		mbuf_get_mhlen()	/* data len w/pkthdr */
+
+#define	MINCLSIZE	mbuf_get_minclsize()	/* cluster usage threshold */
+
+extern void m_freem(struct mbuf *);
+extern char *mcl_to_paddr(char *);
+extern void m_adj(struct mbuf *, int);
+extern void m_cat(struct mbuf *, struct mbuf *);
+extern void m_copydata(struct mbuf *, int, int, void *);
+extern struct mbuf *m_copym(struct mbuf *, int, int, int);
+extern struct mbuf *m_get(int, int);
+extern struct mbuf *m_gethdr(int, int);
+extern struct mbuf *m_getpacket(void);
+extern struct mbuf *m_getpackets(int, int, int);
+extern struct mbuf *m_mclget(struct mbuf *, int);
+extern void *m_mtod(struct mbuf *);
+extern struct mbuf *m_prepend_2(struct mbuf *, int, int);
+extern struct mbuf *m_pullup(struct mbuf *, int);
+extern struct mbuf *m_split(struct mbuf *, int, int);
+extern void m_mclfree(caddr_t p);
+
+__private_extern__ union mbigcluster *mbutl;	/* start VA of mbuf pool */
+__private_extern__ union mbigcluster *embutl;	/* end VA of mbuf pool */
+__private_extern__ unsigned int nmbclusters;	/* number of mapped clusters */
+__private_extern__ int njcl;		/* # of jumbo clusters  */
+__private_extern__ int njclbytes;	/* size of a jumbo cluster */
+__private_extern__ int max_linkhdr;	/* largest link-level header */
+__private_extern__ int max_protohdr;	/* largest protocol header */
+__private_extern__ int max_hdr;		/* largest link+protocol header */
+__private_extern__ int max_datalen;	/* MHLEN - max_hdr */
+
+__private_extern__ unsigned int mbuf_default_ncl(int, u_int64_t);
 __private_extern__ void mbinit(void);
 __private_extern__ struct mbuf *m_clattach(struct mbuf *, int, caddr_t,
-    void (*)(caddr_t , u_int, caddr_t), u_int, caddr_t, int);
+    void (*)(caddr_t, u_int, caddr_t), u_int, caddr_t, int);
 __private_extern__ caddr_t m_bigalloc(int);
 __private_extern__ void m_bigfree(caddr_t, u_int, caddr_t);
 __private_extern__ struct mbuf *m_mbigget(struct mbuf *, int);
 __private_extern__ caddr_t m_16kalloc(int);
 __private_extern__ void m_16kfree(caddr_t, u_int, caddr_t);
 __private_extern__ struct mbuf *m_m16kget(struct mbuf *, int);
-__private_extern__ void mbuf_growth_aggressive(void);
-__private_extern__ void mbuf_growth_normal(void);
-
-/* Exported */
-struct	mbuf *m_copym(struct mbuf *, int, int, int);
-struct	mbuf *m_split(struct mbuf *, int, int);
-struct	mbuf *m_free(struct mbuf *);
-struct	mbuf *m_get(int, int);
-struct	mbuf *m_getpacket(void);
-struct	mbuf *m_getclr(int, int);
-struct	mbuf *m_gethdr(int, int);
-struct	mbuf *m_prepend(struct mbuf *, int, int);
-struct  mbuf *m_prepend_2(struct mbuf *, int, int);
-struct	mbuf *m_pullup(struct mbuf *, int);
-struct	mbuf *m_retry(int, int);
-struct	mbuf *m_retryhdr(int, int);
-void m_adj(struct mbuf *, int);
-void m_freem(struct mbuf *);
-int m_freem_list(struct mbuf *);
-struct	mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(const void *, void *, size_t));
-char   *mcl_to_paddr(char *);
-struct mbuf *m_pulldown(struct mbuf*, int, int, int*);
-
-extern struct mbuf *m_getcl(int, int, int);
-struct mbuf *m_mclget(struct mbuf *, int);
-caddr_t m_mclalloc(int);
-void m_mclfree(caddr_t p);
-int m_mclhasreference(struct mbuf *);
-void m_copy_pkthdr(struct mbuf *, struct mbuf*);
-
-int m_mclref(struct mbuf *);
-int m_mclunref(struct mbuf *);
-
-void *          m_mtod(struct mbuf *);
-struct mbuf *   m_dtom(void *);
-int             m_mtocl(void *);
-union mcluster *m_cltom(int );
-
-int m_trailingspace(struct mbuf *);
-int m_leadingspace(struct mbuf *);
-
-struct mbuf *m_normalize(struct mbuf *m);
-void m_mchtype(struct mbuf *m, int t);
-void m_mcheck(struct mbuf*);
-
-extern void m_copyback(struct mbuf *, int , int , const void *);
-extern struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
-extern int m_makewritable(struct mbuf **, int, int, int);
-void m_copydata(struct mbuf *, int , int , void *);
-struct mbuf* m_dup(struct mbuf *m, int how);
-void m_cat(struct mbuf *, struct mbuf *);
-struct  mbuf *m_copym_with_hdrs(struct mbuf*, int, int, int, struct mbuf**, int*);
-struct mbuf *m_getpackets(int, int, int);
-struct mbuf * m_getpackethdrs(int , int );
-struct mbuf* m_getpacket_how(int );
-struct mbuf * m_getpackets_internal(unsigned int *, int , int , int , size_t);
-struct mbuf * m_allocpacket_internal(unsigned int * , size_t , unsigned int *, int , int , size_t );
 
-__END_DECLS
+__private_extern__ struct mbuf *m_free(struct mbuf *);
+__private_extern__ struct mbuf *m_getclr(int, int);
+__private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *);
+__private_extern__ unsigned int m_length(struct mbuf *);
+__private_extern__ struct mbuf *m_prepend(struct mbuf *, int, int);
+__private_extern__ struct mbuf *m_copyup(struct mbuf *, int, int);
+__private_extern__ struct mbuf *m_retry(int, int);
+__private_extern__ struct mbuf *m_retryhdr(int, int);
+__private_extern__ int m_freem_list(struct mbuf *);
+__private_extern__ int m_append(struct mbuf *, int, caddr_t);
+__private_extern__ struct mbuf *m_last(struct mbuf *);
+__private_extern__ struct mbuf *m_devget(char *, int, int, struct ifnet *,
+    void (*)(const void *, void *, size_t));
+__private_extern__ struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
+
+__private_extern__ struct mbuf *m_getcl(int, int, int);
+__private_extern__ caddr_t m_mclalloc(int);
+__private_extern__ int m_mclhasreference(struct mbuf *);
+__private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *);
+
+__private_extern__ struct mbuf *m_dtom(void *);
+__private_extern__ int m_mtocl(void *);
+__private_extern__ union mcluster *m_cltom(int);
+
+__private_extern__ int m_trailingspace(struct mbuf *);
+__private_extern__ int m_leadingspace(struct mbuf *);
+
+__private_extern__ struct mbuf *m_normalize(struct mbuf *m);
+__private_extern__ void m_mchtype(struct mbuf *m, int t);
+__private_extern__ void m_mcheck(struct mbuf *);
+
+__private_extern__ void m_copyback(struct mbuf *, int, int, const void *);
+__private_extern__ struct mbuf *m_copyback_cow(struct mbuf *, int, int,
+    const void *, int);
+__private_extern__ int m_makewritable(struct mbuf **, int, int, int);
+__private_extern__ struct mbuf *m_dup(struct mbuf *m, int how);
+__private_extern__ struct mbuf *m_copym_with_hdrs(struct mbuf *, int, int, int,
+    struct mbuf **, int *);
+__private_extern__ struct mbuf *m_getpackethdrs(int, int);
+__private_extern__ struct mbuf *m_getpacket_how(int);
+__private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int,
+    int, int, size_t);
+__private_extern__ struct mbuf *m_allocpacket_internal(unsigned int *, size_t,
+    unsigned int *, int, int, size_t);
 
 /*
- Packets may have annotations attached by affixing a list of "packet
- tags" to the pkthdr structure.  Packet tags are dynamically allocated
- semi-opaque data structures that have a fixed header (struct m_tag)
- that specifies the size of the memory block and an <id,type> pair that
- identifies it. The id identifies the module and the type identifies the
- type of data for that module. The id of zero is reserved for the kernel.
- 
- Note that the packet tag returned by m_tag_allocate has the default
- memory alignment implemented by malloc.  To reference private data one
- can use a construct like:
- 
-      struct m_tag *mtag = m_tag_allocate(...);
-      struct foo *p = (struct foo *)(mtag+1);
- 
- if the alignment of struct m_tag is sufficient for referencing members
- of struct foo.  Otherwise it is necessary to embed struct m_tag within
- the private data structure to insure proper alignment; e.g.
- 
-      struct foo {
-              struct m_tag    tag;
-              ...
-      };
-      struct foo *p = (struct foo *) m_tag_allocate(...);
-      struct m_tag *mtag = &p->tag;
+ * Packets may have annotations attached by affixing a list of "packet
+ * tags" to the pkthdr structure.  Packet tags are dynamically allocated
+ * semi-opaque data structures that have a fixed header (struct m_tag)
+ * that specifies the size of the memory block and an <id,type> pair that
+ * identifies it. The id identifies the module and the type identifies the
+ * type of data for that module. The id of zero is reserved for the kernel.
+ *
+ * Note that the packet tag returned by m_tag_allocate has the default
+ * memory alignment implemented by malloc.  To reference private data one
+ * can use a construct like:
+ *
+ *      struct m_tag *mtag = m_tag_allocate(...);
+ *      struct foo *p = (struct foo *)(mtag+1);
+ *
+ * if the alignment of struct m_tag is sufficient for referencing members
+ * of struct foo.  Otherwise it is necessary to embed struct m_tag within
+ * the private data structure to insure proper alignment; e.g.
+ *
+ *      struct foo {
+ *              struct m_tag    tag;
+ *              ...
+ *      };
+ *      struct foo *p = (struct foo *) m_tag_allocate(...);
+ *      struct m_tag *mtag = &p->tag;
  */
 
-#define KERNEL_MODULE_TAG_ID	0
+#define	KERNEL_MODULE_TAG_ID	0
 
 enum {
 	KERNEL_TAG_TYPE_NONE			= 0,
@@ -685,45 +850,27 @@ enum {
 	KERNEL_TAG_TYPE_PF			= 11
 };
 
-/*
- * As a temporary and low impact solution to replace the even uglier
- * approach used so far in some parts of the network stack (which relies
- * on global variables), packet tag-like annotations are stored in MT_TAG
- * mbufs (or lookalikes) prepended to the actual mbuf chain.
- *
- *      m_type  = MT_TAG
- *      m_flags = m_tag_id
- *      m_next  = next buffer in chain.
- *
- * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines.
- */
-#define _m_tag_id       m_hdr.mh_flags
-
-__BEGIN_DECLS
-
 /* Packet tag routines */
-struct  m_tag   *m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait);
-void             m_tag_free(struct m_tag *);
-void             m_tag_prepend(struct mbuf *, struct m_tag *);
-void             m_tag_unlink(struct mbuf *, struct m_tag *);
-void             m_tag_delete(struct mbuf *, struct m_tag *);
-void             m_tag_delete_chain(struct mbuf *, struct m_tag *);
-struct  m_tag   *m_tag_locate(struct mbuf *,u_int32_t id, u_int16_t type,
-							  struct m_tag *);
-struct  m_tag   *m_tag_copy(struct m_tag *, int wait);
-int              m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int wait);
-void             m_tag_init(struct mbuf *);
-struct  m_tag   *m_tag_first(struct mbuf *);
-struct  m_tag   *m_tag_next(struct mbuf *, struct m_tag *);
-
-extern void m_prio_init(struct mbuf *);
-extern void m_prio_background(struct mbuf *);
+__private_extern__ struct  m_tag *m_tag_alloc(u_int32_t, u_int16_t, int, int);
+__private_extern__ struct  m_tag *m_tag_create(u_int32_t, u_int16_t, int, int,
+	struct mbuf *);
+__private_extern__ void m_tag_free(struct m_tag *);
+__private_extern__ void m_tag_prepend(struct mbuf *, struct m_tag *);
+__private_extern__ void m_tag_unlink(struct mbuf *, struct m_tag *);
+__private_extern__ void m_tag_delete(struct mbuf *, struct m_tag *);
+__private_extern__ void m_tag_delete_chain(struct mbuf *, struct m_tag *);
+__private_extern__ struct m_tag *m_tag_locate(struct mbuf *, u_int32_t,
+    u_int16_t, struct m_tag *);
+__private_extern__ struct m_tag *m_tag_copy(struct m_tag *, int);
+__private_extern__ int m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
+__private_extern__ void m_tag_init(struct mbuf *);
+__private_extern__ struct  m_tag *m_tag_first(struct mbuf *);
+__private_extern__ struct  m_tag *m_tag_next(struct mbuf *, struct m_tag *);
+
+__private_extern__ void m_prio_init(struct mbuf *);
 
 __END_DECLS
-
-#endif /* KERNEL */
-
-#endif /* KERNEL_PRIVATE */
+#endif /* XNU_KERNEL_PRIVATE */
 #ifdef KERNEL
 #include <sys/kpi_mbuf.h>
 #endif /* KERNEL */
diff --git a/bsd/sys/mcache.h b/bsd/sys/mcache.h
index 21a169223..443e05b01 100644
--- a/bsd/sys/mcache.h
+++ b/bsd/sys/mcache.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -38,6 +38,7 @@ extern "C" {
 #include <sys/queue.h>
 #include <mach/boolean.h>
 #include <kern/locks.h>
+#include <libkern/OSAtomic.h>
 
 #ifdef ASSERT
 #undef ASSERT
@@ -57,11 +58,40 @@ extern "C" {
 #define	ASSERT(EX)	((void)0)
 #endif
 
-#if defined(__ppc__)
-#define	CPU_CACHE_SIZE	128
+#define	atomic_add_16_ov(a, n)						\
+	((u_int16_t) OSAddAtomic16(n, (volatile SInt16 *)a))
+
+#define	atomic_add_16(a, n)						\
+	((void) atomic_add_16_ov(a, n))
+
+#define	atomic_add_32_ov(a, n)						\
+	((u_int32_t) OSAddAtomic(n, (volatile SInt32 *)a))
+
+#define	atomic_add_32(a, n)						\
+	((void) atomic_add_32_ov(a, n))
+
+#define	atomic_add_64_ov(a, n)						\
+	((u_int64_t) OSAddAtomic64(n, (volatile SInt64 *)a))
+
+#define	atomic_add_64(a, n)						\
+	((void) atomic_add_64_ov(a, n))
+
+#define	atomic_set_64(a, n) do {					\
+	while (!OSCompareAndSwap64(*a, n, (volatile UInt64 *)a))	\
+		;							\
+} while (0)
+
+#if defined(__LP64__)
+#define	atomic_get_64(n, a) do {					\
+	(n) = *(a);							\
+} while (0)
 #else
+#define	atomic_get_64(n, a) do {					\
+	(n) = atomic_add_64_ov(a, 0);					\
+} while (0)
+#endif /* __LP64__ */
+
 #define	CPU_CACHE_SIZE	64
-#endif
 
 #ifndef IS_P2ALIGNED
 #define	IS_P2ALIGNED(v, a) \
@@ -152,6 +182,7 @@ typedef unsigned int (*mcache_allocfn_t)(void *, mcache_obj_t ***,
     unsigned int, int);
 typedef void (*mcache_freefn_t)(void *, mcache_obj_t *, boolean_t);
 typedef void (*mcache_auditfn_t)(void *, mcache_obj_t *, boolean_t);
+typedef void (*mcache_logfn_t)(u_int32_t, mcache_obj_t *, boolean_t);
 typedef void (*mcache_notifyfn_t)(void *, u_int32_t);
 
 typedef struct mcache {
@@ -164,6 +195,7 @@ typedef struct mcache {
 	mcache_allocfn_t mc_slab_alloc;	/* slab layer allocate callback */
 	mcache_freefn_t	mc_slab_free;	/* slab layer free callback */
 	mcache_auditfn_t mc_slab_audit;	/* slab layer audit callback */
+	mcache_logfn_t mc_slab_log;	/* slab layer log callback */
 	mcache_notifyfn_t mc_slab_notify; /* slab layer notify callback */
 	void		*mc_private;	/* opaque arg to callbacks */
 	size_t		mc_bufsize;	/* object size */
@@ -210,11 +242,12 @@ typedef struct mcache {
 
 /* Valid values for mc_flags */
 #define	MCF_VERIFY	0x00000001	/* enable verification */
-#define	MCF_AUDIT	0x00000002	/* enable transaction auditing */
+#define	MCF_TRACE	0x00000002	/* enable transaction auditing */
 #define	MCF_NOCPUCACHE	0x00000010	/* disable CPU layer caching */
+#define	MCF_NOLEAKLOG	0x00000100	/* disable leak logging */
 
-#define	MCF_DEBUG	(MCF_VERIFY | MCF_AUDIT)
-#define	MCF_FLAGS_MASK	(MCF_DEBUG | MCF_NOCPUCACHE)
+#define	MCF_DEBUG	(MCF_VERIFY | MCF_TRACE)
+#define	MCF_FLAGS_MASK	(MCF_DEBUG | MCF_NOCPUCACHE | MCF_NOLEAKLOG)
 
 /* Valid values for notify callback */
 #define	MCN_RETRYALLOC	0x00000001	/* Allocation should be retried */
@@ -245,8 +278,8 @@ __private_extern__ mcache_t *mcache_create(const char *, size_t,
 __private_extern__ void *mcache_alloc(mcache_t *, int);
 __private_extern__ void mcache_free(mcache_t *, void *);
 __private_extern__ mcache_t *mcache_create_ext(const char *, size_t,
-    mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_notifyfn_t,
-    void *, u_int32_t, int);
+    mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t,
+    mcache_notifyfn_t, void *, u_int32_t, int);
 __private_extern__ void mcache_destroy(mcache_t *);
 __private_extern__ unsigned int mcache_alloc_ext(mcache_t *, mcache_obj_t **,
     unsigned int, int);
diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h
index a82aec943..109c63634 100644
--- a/bsd/sys/mman.h
+++ b/bsd/sys/mman.h
@@ -130,6 +130,7 @@ typedef __darwin_size_t	size_t;
 #define	MAP_NOEXTEND	 0x0100	/* for MAP_FILE, don't change file size */
 #define	MAP_HASSEMAPHORE 0x0200	/* region may contain semaphores */
 #define MAP_NOCACHE	 0x0400 /* don't cache pages for this mapping */
+#define MAP_JIT		 0x0800 /* Allocate a region that will be used for JIT purposes */
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h
index 8633a1465..f6594c0ef 100644
--- a/bsd/sys/mount.h
+++ b/bsd/sys/mount.h
@@ -293,9 +293,6 @@ struct vfs_attr {
  * NFS export related mount flags.
  */
 #define	MNT_EXPORTED	0x00000100	/* file system is exported */
-#ifdef PRIVATE
-#define MNT_IMGSRC	0x00000200
-#endif /* CONFIG_IMGSRC_ACCESS */
 
 /*
  * MAC labeled / "quarantined" flag
@@ -319,6 +316,9 @@ struct vfs_attr {
 #define MNT_DEFWRITE	0x02000000	/* filesystem should defer writes */
 #define MNT_MULTILABEL	0x04000000	/* MAC support for individual labels */
 #define MNT_NOATIME	0x10000000	/* disable update of file access time */
+#ifdef BSD_KERNEL_PRIVATE
+/* #define MNT_IMGSRC_BY_INDEX 0x20000000 see sys/imgsrc.h */
+#endif /* BSD_KERNEL_PRIVATE */
 
 /* backwards compatibility only */
 #define MNT_UNKNOWNPERMISSIONS MNT_IGNORE_OWNERSHIP
@@ -334,7 +334,8 @@ struct vfs_attr {
 			MNT_LOCAL	| MNT_QUOTA | \
 			MNT_ROOTFS	| MNT_DOVOLFS	| MNT_DONTBROWSE | \
 			MNT_IGNORE_OWNERSHIP | MNT_AUTOMOUNTED | MNT_JOURNALED | \
-			MNT_NOUSERXATTR | MNT_DEFWRITE	| MNT_MULTILABEL | MNT_NOATIME | MNT_CPROTECT )
+			MNT_NOUSERXATTR | MNT_DEFWRITE	| MNT_MULTILABEL | \
+			MNT_NOATIME | MNT_CPROTECT)
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
@@ -440,6 +441,7 @@ union union_vfsidctl { /* the fields vc_vers and vc_fsid are compatible */
 #define VFS_CTL_NEWADDR	0x00010004	/* reconnect to new address */
 #define VFS_CTL_TIMEO	0x00010005	/* set timeout for vfs notification */
 #define VFS_CTL_NOLOCKS	0x00010006	/* disable file locking */
+#define VFS_CTL_SADDR	0x00010007	/* get server address */
 
 struct vfsquery {
 	u_int32_t	vq_flags;
@@ -684,6 +686,9 @@ struct vfsops {
 /*
  * flags passed into vfs_iterate
  */
+#ifdef PRIVATE
+#define VFS_ITERATE_TAIL_FIRST	(1 << 0)	
+#endif /* PRIVATE */
 
 /*
  * return values from callback
@@ -1164,14 +1169,88 @@ void	vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
   */
 void	vfs_event_init(void); /* XXX We should not export this */
 #ifdef KERNEL_PRIVATE
+int	vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx);
 int	vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx);
 int	vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx);
 int	vfs_extendedsecurity(mount_t);
 mount_t	vfs_getvfs_by_mntonname(char *);
 void    vfs_markdependency(mount_t);
 vnode_t vfs_vnodecovered(mount_t mp); /* Returns vnode with an iocount that must be released with vnode_put() */
-void * vfs_mntlabel(mount_t mp); /* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers.  */
+vnode_t vfs_devvp(mount_t mp); /* Please see block comment with implementation */
+void *  vfs_mntlabel(mount_t mp); /* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers.  */
 void	vfs_setunmountpreflight(mount_t mp);
+void	vfs_setcompoundopen(mount_t mp);
+uint64_t vfs_throttle_mask(mount_t mp);
+
+struct vnode_trigger_info;
+
+/*!
+ @function vfs_addtrigger
+ @abstract Create an "external" trigger vnode: look up a vnode and mark it as
+ a trigger.  Can only safely be called in the context of a callback set by
+ vfs_settriggercallback().  May only be used on a file which is not already
+ marked as a trigger. 
+ @param relpath Path relative to root of mountpoint at which to mark trigger.
+ @param vtip Information about trigger; analogous to "vnode_trigger_param"
+ 	argument to vnode_create.
+ @param ctx Authorization context.
+ */
+int 	vfs_addtrigger(mount_t mp, const char *relpath, struct vnode_trigger_info *vtip, vfs_context_t ctx);
+
+
+/*!
+ @enum vfs_trigger_callback_op_t
+ @abstract Operation to perform after an attempted unmount (successful or otherwise).
+ @constant VTC_REPLACE Unmount failed: attempt to replace triggers.  Only valid 
+ 	VFS operation to perform in this context is vfs_addtrigger().
+ @constant VTC_RELEASE Unmount succeeded: release external triggering context.
+ */
+typedef enum { 
+	VTC_REPLACE,
+	VTC_RELEASE
+} vfs_trigger_callback_op_t;
+
+/*!
+ @typedef vfs_trigger_callback_t
+ @abstract Callback to be passed to vfs_settriggercallback() and invoked from 
+ 	unmount context.  
+ @param mp Mountpoint on which unmount is occurring.
+ @param op Operation (see vfs_trigger_callback_op_t)
+ @param data Context passed to vfs_settriggercallback()
+ @param ctx Authorization context in which unmount is occurring.
+ */
+typedef void vfs_trigger_callback_t(mount_t mp, vfs_trigger_callback_op_t op, void *data, vfs_context_t ctx);
+
+/*!
+  @function vfs_settriggercallback
+  @abstract Install a callback to be called after unmount attempts on a volume, 
+  to restore triggers for failed unmounts and release state for successful ones.
+  @discussion Installs a callback which will be called in two situations: a 
+  failed unmount where vnodes may have been reclaimed and a successful unmount.
+  Gives an external trigger-marking entity an opportunity to replace triggers
+  which may have been reclaimed.  The callback can only be installed (not 
+  cleared), and only one callback can be installed.  The callback will be called
+  with a read-write lock held on the mount point; in the VTC_REPLACE case, the 
+  <em>only</em> valid VFS operation to perform in the context of the callback is
+  vfs_addtrigger() on the mountpoint in question.  This rwlock is held in order
+  to attempt to provide some modicum of coverage from lookups which might find
+  missing trigger vnodes and receive spurious ENOENTs.  Note that this 
+  protection is incomplete--current working directories, or traversals up into a
+  volume via ".." may still find missing triggers.  As of this writing, no
+  serialization mechanism exists to do better than this.
+  When the "op" is VTC_RELEASE, the mountpoint is going away, and the only valid
+  VFS operation is to free the  private data pointer if needed.  The callback 
+  will be called immediately, with VTC_REPLACE, from vfs_settriggercallback(), 
+  if installation is successful.
+  @param fsid FSID for filesystem in question.
+  @param vtc Callback pointer.
+  @param data Context pointer to be passed to callback.
+  @param flags Currently unused.
+  @param ctx Authorization context.
+  @return 0 for success.  EBUSY if a trigger has already been installed.
+  */
+int 	vfs_settriggercallback(fsid_t *fsid, vfs_trigger_callback_t vtc, void *data, uint32_t flags, vfs_context_t ctx);
+
 #endif	/* KERNEL_PRIVATE */
 __END_DECLS
 
diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h
index b069b1a0f..141fb3eeb 100644
--- a/bsd/sys/mount_internal.h
+++ b/bsd/sys/mount_internal.h
@@ -115,6 +115,7 @@ struct mount {
 	struct vnodelst	mnt_newvnodes;		/* list of vnodes this mount */
 	uint32_t		mnt_flag;		/* flags */
 	uint32_t		mnt_kern_flag;		/* kernel only flags */
+	uint32_t		mnt_compound_ops;	/* Available compound operations */
 	uint32_t		mnt_lflag;			/* mount life cycle flags */
 	uint32_t		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct vfsstatfs	mnt_vfsstat;		/* cache of filesystem stats */
@@ -131,17 +132,22 @@ struct mount {
 	uint32_t	mnt_ioqueue_depth;	/* the maxiumum number of commands a device can accept */
         uint32_t	mnt_ioscale;		/* scale the various throttles/limits imposed on the amount of I/O in flight */
 	uint32_t	mnt_ioflags;		/* flags for  underlying device */
-	pending_io_t	mnt_pending_write_size;	/* byte count of pending writes */
-	pending_io_t	mnt_pending_read_size;	/* byte count of pending reads */
+	pending_io_t	mnt_pending_write_size __attribute__((aligned(sizeof(pending_io_t))));	/* byte count of pending writes */
+	pending_io_t	mnt_pending_read_size  __attribute__((aligned(sizeof(pending_io_t))));	/* byte count of pending reads */
 
 	lck_rw_t	mnt_rwlock;		/* mutex readwrite lock */
 	lck_mtx_t	mnt_renamelock;		/* mutex that serializes renames that change shape of tree */
 	vnode_t		mnt_devvp;		/* the device mounted on for local file systems */
 	uint32_t	mnt_devbsdunit;		/* the BSD unit number of the device */
+	uint64_t	mnt_throttle_mask;	/* the throttle mask of what devices will be affected by I/O from this mnt */
 	void		*mnt_throttle_info;	/* used by the throttle code */
 	int32_t		mnt_crossref;		/* refernces to cover lookups  crossing into mp */
 	int32_t		mnt_iterref;		/* refernces to cover iterations; drained makes it -ve  */
- 
+#if CONFIG_TRIGGERS
+	int32_t		mnt_numtriggers; 	/* num of trigger vnodes for this mount */
+	vfs_trigger_callback_t *mnt_triggercallback;
+	void		*mnt_triggerdata;
+#endif
  	/* XXX 3762912 hack to support HFS filesystem 'owner' */
  	uid_t		mnt_fsowner;
  	gid_t		mnt_fsgroup;
@@ -190,6 +196,7 @@ struct mount {
 	 */
 	pid_t		mnt_dependent_pid;
 	void		*mnt_dependent_process;
+	char		fstypename_override[MFSTYPENAMELEN];
 };
 
 /*
@@ -228,6 +235,12 @@ extern struct mount * dead_mountp;
  *		because the bits here were broken out from the high bits
  *		of the mount flags.
  */
+#define MNTK_DENY_READDIREXT 0x00000200 /* Deny Extended-style readdir's for this volume */
+#define MNTK_PERMIT_UNMOUNT	0x00000400	/* Allow (non-forced) unmounts by UIDs other than the one that mounted the volume */
+#ifdef NFSCLIENT
+#define MNTK_TYPENAME_OVERRIDE  0x00000800      /* override the fstypename for statfs() */
+#endif /* NFSCLIENT */
+#define MNTK_KERNEL_MOUNT	0x00001000	/* mount came from kernel side */
 #ifdef CONFIG_IMGSRC_ACCESS
 #define MNTK_HAS_MOVED		0x00002000
 #define MNTK_BACKS_ROOT		0x00004000
@@ -392,13 +405,11 @@ struct user32_statfs {
 };
 
 /*
- * throttle I/Os are affected only by normal I/Os happening on the same bsd device node.  For example, disk1s3 and
- * disk1s5 are the same device node, while disk1s3 and disk2 are not (although disk2 might be a mounted disk image file
- * and the disk image file resides on a partition in disk1).  The following constant defines the maximum number of
- * different bsd device nodes the algorithm can consider, and larger numbers are rounded by this maximum.  Since
- * throttled I/O is usually useful in non-server environment only, a small number 16 is enough in most cases
+ * throttle I/Os are affected only by normal I/Os happening on the same spindle.  Currently we use a 64-bit integer to
+ * represent what devices are affected, so we can handle at most 64 different spindles.  Since
+ * throttled I/O is usually useful in non-server environment only, this number is enough in most cases.
  */
-#define LOWPRI_MAX_NUM_DEV 16
+#define LOWPRI_MAX_NUM_DEV 64
 
 __BEGIN_DECLS
 
@@ -425,7 +436,7 @@ void	vfs_unmountall(void);
 int	safedounmount(struct mount *, int, vfs_context_t);
 int	dounmount(struct mount *, int, int, vfs_context_t);
 
-/* xnuy internal api */
+/* xnu internal api */
 void  mount_dropcrossref(mount_t, vnode_t, int);
 mount_t mount_lookupby_volfsid(int, int);
 mount_t mount_list_lookupby_fsid(fsid_t *, int, int);
@@ -437,11 +448,31 @@ void mount_iterdrop(mount_t);
 void mount_iterdrain(mount_t);
 void mount_iterreset(mount_t);
 
+/* tags a volume as not supporting extended readdir for NFS exports */
+#ifdef BSD_KERNEL_PRIVATE
+void mount_set_noreaddirext (mount_t);
+#endif
+
+/* Private NFS spi */
+#define KERNEL_MOUNT_NOAUTH		0x01 /* Don't check the UID of the directory we are mounting on */
+#define KERNEL_MOUNT_PERMIT_UNMOUNT	0x02 /* Allow (non-forced) unmounts by users other the one who mounted the volume */
+#if NFSCLIENT
+/*
+ * NOTE: kernel_mount() does not force MNT_NOSUID, MNT_NOEXEC, or MNT_NODEC for non-privileged
+ * mounting credentials, as the mount(2) system call does.
+ */
+int kernel_mount(char *, vnode_t, vnode_t, const char *, void *, size_t, int, uint32_t, vfs_context_t);
+boolean_t vfs_iskernelmount(mount_t);
+#endif
+
 /* throttled I/O api */
 int throttle_get_io_policy(struct uthread **ut);
-extern void throttle_lowpri_io(boolean_t ok_to_sleep);
 int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp);
 
+/* throttled I/O helper function */
+/* convert the lowest bit to a device index */
+extern int num_trailing_0(uint64_t n);
+
 __END_DECLS
 
 #endif /* !_SYS_MOUNT_INTERNAL_H_ */
diff --git a/bsd/sys/msgbuf.h b/bsd/sys/msgbuf.h
index e05b73e9e..5b8211cac 100644
--- a/bsd/sys/msgbuf.h
+++ b/bsd/sys/msgbuf.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,22 +65,24 @@
 
 #include <sys/cdefs.h>
 
-#define	MSG_BSIZE	4096
+#define	MAX_MSG_BSIZE	(1*1024*1024)
 struct	msgbuf {
 #define	MSG_MAGIC	0x063061
-	long	msg_magic;
-	long	msg_size;
-	long	msg_bufx;		/* write pointer */
-	long	msg_bufr;		/* read pointer */
-	char	*msg_bufc;	/* buffer */
+	int		msg_magic;
+	int		msg_size;
+	int		msg_bufx;		/* write pointer */
+	int		msg_bufr;		/* read pointer */
+	char	*msg_bufc;		/* buffer */
 };
-#ifdef KERNEL
+
+#ifdef XNU_KERNEL_PRIVATE
 __BEGIN_DECLS
 extern struct	msgbuf *msgbufp;
 extern void log_putc(char);
 extern void log_putc_locked(char);
-extern void log_setsize(long size);
+extern int log_setsize(int size);
 extern int log_dmesg(user_addr_t, uint32_t, int32_t *);
 __END_DECLS
-#endif
+#endif /* XNU_KERNEL_PRIVATE */
+
 #endif	/* !_SYS_MSGBUF_H_ */
diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h
index 5aa2f701a..56d3ecf13 100644
--- a/bsd/sys/namei.h
+++ b/bsd/sys/namei.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -90,12 +90,15 @@
 /*
  * Encapsulation of namei parameters.
  */
-struct nameidata {
+struct nameidata {                         
 	/*
 	 * Arguments to namei/lookup.
 	 */
 	user_addr_t ni_dirp;		/* pathname pointer */
 	enum	uio_seg ni_segflg;	/* location of pathname */
+#if CONFIG_TRIGGERS
+	enum	path_operation ni_op;	/* intended operation, see enum path_operation in vnode.h */
+#endif /* CONFIG_TRIGGERS */
 	/*
 	 * Arguments to lookup.
 	 */
@@ -116,8 +119,25 @@ struct nameidata {
 	u_long	ni_loopcnt;		/* count of symlinks encountered */
 
 	struct componentname ni_cnd;
+	int32_t ni_flag;
+	int ni_ncgeneration;		/* For a batched vnop, grab generation beforehand */
 };
 
+#define NAMEI_CONTLOOKUP	0x002    /* Continue processing a lookup which was partially processed in a compound VNOP */
+#define NAMEI_TRAILINGSLASH	0x004    /* There was at least one trailing slash after last component */
+#define NAMEI_UNFINISHED	0x008    /* We broke off a lookup to do a compound op */
+/* 
+ * XXX Hack: we need to encode the intended VNOP in order to 
+ * be able to include information about which operations a filesystem
+ * supports in the decision to break off a lookup early.
+ */
+#define NAMEI_COMPOUNDOPEN	0x010	
+#define NAMEI_COMPOUNDREMOVE	0x020	
+#define NAMEI_COMPOUNDMKDIR	0x040	
+#define NAMEI_COMPOUNDRMDIR	0x080	
+#define NAMEI_COMPOUNDRENAME	0x100	
+#define NAMEI_COMPOUND_OP_MASK (NAMEI_COMPOUNDOPEN | NAMEI_COMPOUNDREMOVE | NAMEI_COMPOUNDMKDIR | NAMEI_COMPOUNDRMDIR | NAMEI_COMPOUNDRENAME)
+
 #ifdef KERNEL
 /*
  * namei operational modifier flags, stored in ni_cnd.flags
@@ -169,7 +189,27 @@ struct nameidata {
 /*
  * Initialization of an nameidata structure.
  */
-#define NDINIT(ndp, op, flags, segflg, namep, ctx) { \
+
+#if CONFIG_TRIGGERS
+/* Note: vnode triggers require more precise path operation (ni_op) */ 
+
+#define NDINIT(ndp, op, pop, flags, segflg, namep, ctx) { \
+	(ndp)->ni_cnd.cn_nameiop = op; \
+	(ndp)->ni_op = pop; \
+	(ndp)->ni_cnd.cn_flags = flags; \
+	if ((segflg) == UIO_USERSPACE) { \
+		(ndp)->ni_segflg = ((IS_64BIT_PROCESS(vfs_context_proc(ctx))) ? UIO_USERSPACE64 : UIO_USERSPACE32); \
+	} \
+	else { \
+		(ndp)->ni_segflg = segflg; \
+	} \
+	(ndp)->ni_dirp = namep; \
+	(ndp)->ni_cnd.cn_context = ctx; \
+	(ndp)->ni_flag = 0; \
+	(ndp)->ni_cnd.cn_ndp = (ndp); \
+}
+#else
+#define NDINIT(ndp, op, _unused_, flags, segflg, namep, ctx) { \
 	(ndp)->ni_cnd.cn_nameiop = op; \
 	(ndp)->ni_cnd.cn_flags = flags; \
 	if ((segflg) == UIO_USERSPACE) { \
@@ -180,7 +220,11 @@ struct nameidata {
 	} \
 	(ndp)->ni_dirp = namep; \
 	(ndp)->ni_cnd.cn_context = ctx; \
+	(ndp)->ni_flag = 0; \
+	(ndp)->ni_cnd.cn_ndp = (ndp); \
 }
+#endif /* CONFIG_TRIGGERS */
+
 #endif /* KERNEL */
 
 /*
@@ -210,21 +254,25 @@ struct	namecache {
 
 int	namei(struct nameidata *ndp);
 void	nameidone(struct nameidata *);
+void	namei_unlock_fsnode(struct nameidata *ndp);
 int	lookup(struct nameidata *ndp);
 int	relookup(struct vnode *dvp, struct vnode **vpp,
 		struct componentname *cnp);
+void	lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create);
 
 /*
  * namecache function prototypes
  */
 void    cache_purgevfs(mount_t mp);
 int		cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
-			  vfs_context_t context, int *trailing_slash, int *dp_authorized, vnode_t last_dp);
+			  vfs_context_t context, int *dp_authorized, vnode_t last_dp);
 
 void		vnode_cache_authorized_action(vnode_t vp, vfs_context_t context, kauth_action_t action);
 void		vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action);
 boolean_t	vnode_cache_is_stale(vnode_t vp);
 boolean_t	vnode_cache_is_authorized(vnode_t vp, vfs_context_t context, kauth_action_t action);
+int 		lookup_validate_creation_path(struct nameidata *ndp);
+int		namei_compound_available(vnode_t dp, struct nameidata *ndp);
 
 #endif /* KERNEL */
 
diff --git a/bsd/dev/ppc/memmove.c b/bsd/sys/netboot.h
similarity index 72%
rename from bsd/dev/ppc/memmove.c
rename to bsd/sys/netboot.h
index e102c248a..717100d7f 100644
--- a/bsd/dev/ppc/memmove.c
+++ b/bsd/sys/netboot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,30 +25,26 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/* Copyright (c) 1991,1993 NeXT Computer, Inc.  All rights reserved.
- * 
- */
-#include <sys/systm.h>
 
+/* 
+ * netboot.h
+ * - definitions for network booting/rooting
+ */
 
-void ovbcopy(const void *src, void *dst, size_t ulen)
-{
-	bcopy(src, dst, ulen);
-}
+#ifndef _SYS_NETBOOT_H
+#define _SYS_NETBOOT_H
 
-#if 0
-void *memcpy(void *dst, const void *src, unsigned int ulen)
-{
-	bcopy(src, dst, ulen);
-	return dst;
-}
+#include <mach/boolean.h>
+#include <netinet/in.h>
 
-void *memmove(void *dst, const void *src, unsigned int ulen)
-{
-	bcopy(src, dst, ulen);
-	return dst;
-}
+int		netboot_setup(void);
+int		netboot_mountroot(void);
+int		netboot_root(void);
 
-#endif /* 0 */
+boolean_t	netboot_iaddr(struct in_addr * iaddr_p);
 
+boolean_t	netboot_rootpath(struct in_addr * server_ip,
+				 char * name, int name_len, 
+				 char * path, int path_len);
 
+#endif /* _SYS_NETBOOT_H */
diff --git a/bsd/sys/priv.h b/bsd/sys/priv.h
new file mode 100644
index 000000000..1abb898bf
--- /dev/null
+++ b/bsd/sys/priv.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2006 nCircle Network Security, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson for the TrustedBSD
+ * Project under contract to nCircle Network Security, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY,
+ * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/sys/priv.h,v 1.38.2.1.2.1 2009/10/25 01:10:29 kensmith Exp $
+ */
+
+/*
+ * Kernel privilege checking interface.
+ */
+#ifndef _SYS_PRIV_H_
+#define	_SYS_PRIV_H_
+
+/*
+ * Privilege list, sorted loosely by kernel subsystem.
+ *
+ * Think carefully before adding or reusing one of these privileges -- are
+ * there existing instances referring to the same privilege?  Particular
+ * numeric privilege assignments are part of the kernel extension ABI.
+ */
+
+/*
+ * The remaining privileges typically correspond to one or a small
+ * number of specific privilege checks, and have (relatively) precise
+ * meanings.  They are loosely sorted into a set of base system
+ * privileges, such as the ability to reboot, and then loosely by
+ * subsystem, indicated by a subsystem name.
+ */
+#define	PRIV_ADJTIME		1000	/* Set time adjustment. */
+
+/*
+ * IPv4 and IPv6 privileges.
+ */
+#define	PRIV_NETINET_RESERVEDPORT	11000	/* Bind low port number. */
+
+#ifdef KERNEL
+/*
+ * Privilege check interface.  No flags are currently defined for the API.
+ */
+#include <sys/kauth.h>
+int	priv_check_cred(kauth_cred_t cred, int priv, int flags);
+#endif
+
+#endif /* !_SYS_PRIV_H_ */
diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h
index 92c86c0a1..8cec174a3 100644
--- a/bsd/sys/proc.h
+++ b/bsd/sys/proc.h
@@ -167,7 +167,7 @@ struct extern_proc {
 #define	P_TIMEOUT	0x00000400	/* Timing out during sleep */
 #define	P_TRACED	0x00000800	/* Debugged process being traced */
 
-#define	P_RESV3 	0x00001000	/* (P_WAITED)Debugging prc has waited for child */
+#define	P_DISABLE_ASLR	0x00001000	/* Disable address space layout randomization */
 #define	P_WEXIT		0x00002000	/* Working on exiting */
 #define	P_EXEC		0x00004000	/* Process called exec. */
 
@@ -252,7 +252,7 @@ extern int proc_pid(proc_t);
 extern int proc_ppid(proc_t);
 /* returns 1 if the process is marked for no remote hangs */
 extern int proc_noremotehang(proc_t);
-/* returns 1 is the process is marked for force quota */
+/* returns 1 if the process is marked for force quota */
 extern int proc_forcequota(proc_t);
 
 /* this routine returns 1 if the process is running with 64bit address space, else 0 */
@@ -292,9 +292,41 @@ extern int	msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int
 extern int proc_pidversion(proc_t);
 extern int proc_getcdhash(proc_t, unsigned char *);
 #endif /* KERNEL_PRIVATE */
+#ifdef XNU_KERNEL_PRIVATE
+/* 
+ * This returns an unique 64bit id of a given process. 
+ * Caller needs to hold proper reference on the 
+ * passed in process strucutre.
+ */
+extern uint64_t proc_uniqueid(proc_t);
+extern uint64_t proc_selfuniqueid(void);
+extern void proc_getexecutableuuid(proc_t, unsigned char *, unsigned long);
+#endif /* XNU_KERNEL_PRIVATE*/
 
 __END_DECLS
 
 #endif	/* KERNEL */
 
+#ifdef PRIVATE
+
+/* Values for pid_shutdown_sockets */
+#ifdef KERNEL
+#define SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL	0x0
+#endif /* KERNEL */
+#define SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC		0x1
+#define SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL		0x2
+
+#ifndef KERNEL
+
+__BEGIN_DECLS
+
+int pid_suspend(int pid);
+int pid_resume(int pid);
+
+
+__END_DECLS
+
+#endif /* !KERNEL */
+#endif /* PRIVATE */
+
 #endif	/* !_SYS_PROC_H_ */
diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h
index e22cd3ab4..67842664d 100644
--- a/bsd/sys/proc_info.h
+++ b/bsd/sys/proc_info.h
@@ -50,6 +50,7 @@ __BEGIN_DECLS
 #define PROC_TTY_ONLY		3
 #define PROC_UID_ONLY		4
 #define PROC_RUID_ONLY		5
+#define PROC_PPID_ONLY		6
 
 struct proc_bsdinfo {
 	uint32_t		pbi_flags;		/* 64bit; emulated etc */
@@ -77,25 +78,47 @@ struct proc_bsdinfo {
 };
 
 
+struct proc_bsdshortinfo {
+        uint32_t                pbsi_pid;		/* process id */
+        uint32_t                pbsi_ppid;		/* process parent id */
+        uint32_t                pbsi_pgid;		/* process perp id */
+	uint32_t                pbsi_status;		/* p_stat value, SZOMB, SRUN, etc */
+	char                    pbsi_comm[MAXCOMLEN];	/* upto 16 characters of process name */
+	uint32_t                pbsi_flags;              /* 64bit; emulated etc */
+        uid_t                   pbsi_uid;		/* current uid on process */
+        gid_t                   pbsi_gid;		/* current gid on process */
+        uid_t                   pbsi_ruid;		/* current ruid on process */
+        gid_t                   pbsi_rgid;		/* current tgid on process */
+        uid_t                   pbsi_svuid;		/* current svuid on process */
+        gid_t                   pbsi_svgid;		/* current svgid on process */
+        uint32_t                pbsi_rfu;		/* reserved for future use*/
+};
+
 
 /* pbi_flags values */
-#define PROC_FLAG_SYSTEM	1
-#define PROC_FLAG_TRACED	2
-#define PROC_FLAG_INEXIT	4
+#define PROC_FLAG_SYSTEM	1	/*  System process */
+#define PROC_FLAG_TRACED	2	/* process currently being traced, possibly by gdb */
+#define PROC_FLAG_INEXIT	4	/* process is working its way in exit() */
 #define PROC_FLAG_PPWAIT	8
-#define PROC_FLAG_LP64		0x10
-#define PROC_FLAG_SLEADER	0x20
-#define PROC_FLAG_CTTY		0x40
-#define PROC_FLAG_CONTROLT	0x80
-#define PROC_FLAG_THCWD		0x100
+#define PROC_FLAG_LP64		0x10	/* 64bit process */
+#define PROC_FLAG_SLEADER	0x20	/* The process is the session leader */
+#define PROC_FLAG_CTTY		0x40	/* process has a control tty */
+#define PROC_FLAG_CONTROLT	0x80	/* Has a controlling terminal */
+#define PROC_FLAG_THCWD		0x100	/* process has a thread with cwd */
 /* process control bits for resource starvation */
-#define PROC_FLAG_PC_THROTTLE	0x200
-#define PROC_FLAG_PC_SUSP	0x400
-#define PROC_FLAG_PC_KILL	0x600
+#define PROC_FLAG_PC_THROTTLE	0x200	/* In resource starvation situations, this process is to be throttled */
+#define PROC_FLAG_PC_SUSP	0x400	/* In resource starvation situations, this process is to be suspended */
+#define PROC_FLAG_PC_KILL	0x600	/* In resource starvation situations, this process is to be terminated */
 #define PROC_FLAG_PC_MASK	0x600
 /* process action bits for resource starvation */
-#define PROC_FLAG_PA_THROTTLE	0x800
-#define PROC_FLAG_PA_SUSP	0x1000
+#define PROC_FLAG_PA_THROTTLE	0x800	/* The process is currently throttled due to resource starvation */
+#define PROC_FLAG_PA_SUSP	0x1000	/* The process is currently suspended due to resource starvation */
+#define PROC_FLAG_PSUGID        0x2000	 /* process has set privileges since last exec */
+#define PROC_FLAG_EXEC		0x4000	 /* process has called exec  */
+#ifdef  PRIVATE
+#define PROC_FLAG_DARWINBG	0x8000	/* process in darwin background */
+#define PROC_FLAG_EXT_DARWINBG	0x10000	/* process in darwin background - external enforcement */
+#endif
 
 
 struct proc_taskinfo {
@@ -174,6 +197,7 @@ struct proc_regioninfo {
 #define SM_TRUESHARED      5
 #define SM_PRIVATE_ALIASED 6
 #define SM_SHARED_ALIASED  7
+#define SM_LARGE_PAGE      8
 
 
 /*
@@ -199,9 +223,16 @@ struct proc_workqueueinfo {
 	uint32_t	pwq_nthreads;		/* total number of workqueue threads */
 	uint32_t	pwq_runthreads;		/* total number of running workqueue threads */
 	uint32_t	pwq_blockedthreads;	/* total number of blocked workqueue threads */
-	uint32_t	reserved[1];		/* reserved for future use */
+	uint32_t	pwq_state;
 };
 
+/*
+ *	workqueue state (pwq_state field)
+ */
+#define WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT	0x1
+#define WQ_EXCEEDED_TOTAL_THREAD_LIMIT		0x2
+
+
 struct proc_fileinfo {
 	uint32_t		fi_openflags;
 	uint32_t		fi_status;	
@@ -561,6 +592,11 @@ struct proc_fdinfo {
 	uint32_t		proc_fdtype;	
 };
 
+struct proc_fileportinfo {
+	uint32_t		proc_fileport;
+	uint32_t		proc_fdtype;
+};
+
 /* Flavors for proc_pidinfo() */
 #define PROC_PIDLISTFDS			1
 #define PROC_PIDLISTFD_SIZE		(sizeof(struct proc_fdinfo))
@@ -600,6 +636,12 @@ struct proc_fdinfo {
 #define PROC_PIDWORKQUEUEINFO		12
 #define PROC_PIDWORKQUEUEINFO_SIZE	(sizeof(struct proc_workqueueinfo))
 
+#define PROC_PIDT_SHORTBSDINFO		13
+#define PROC_PIDT_SHORTBSDINFO_SIZE	(sizeof(struct proc_bsdshortinfo))
+
+#define PROC_PIDLISTFILEPORTS		14
+#define PROC_PIDLISTFILEPORTS_SIZE	(sizeof(struct proc_fileportinfo))
+
 /* Flavors for proc_pidfdinfo */
 
 #define PROC_PIDFDVNODEINFO		1
@@ -626,9 +668,29 @@ struct proc_fdinfo {
 #define PROC_PIDFDATALKINFO		8
 #define PROC_PIDFDATALKINFO_SIZE	(sizeof(struct appletalk_fdinfo))
 
+/* Flavors for proc_pidfileportinfo */
+
+#define PROC_PIDFILEPORTVNODEPATHINFO	2	/* out: vnode_fdinfowithpath */
+#define PROC_PIDFILEPORTVNODEPATHINFO_SIZE	\
+					PROC_PIDFDVNODEPATHINFO_SIZE
+
+#define PROC_PIDFILEPORTSOCKETINFO	3	/* out: socket_fdinfo */
+#define PROC_PIDFILEPORTSOCKETINFO_SIZE	PROC_PIDFDSOCKETINFO_SIZE
+
+#define PROC_PIDFILEPORTPSHMINFO	5	/* out: pshm_fdinfo */
+#define PROC_PIDFILEPORTPSHMINFO_SIZE	PROC_PIDFDPSHMINFO_SIZE
+
+#define PROC_PIDFILEPORTPIPEINFO	6	/* out: pipe_fdinfo */
+#define PROC_PIDFILEPORTPIPEINFO_SIZE	PROC_PIDFDPIPEINFO_SIZE
+
 /* used for proc_setcontrol */
 #define PROC_SELFSET_PCONTROL		1
 
+#define PROC_SELFSET_THREADNAME		2
+#define PROC_SELFSET_THREADNAME_SIZE	(MAXTHREADNAMESIZE -1)
+
+#define PROC_SELFSET_VMRSRCOWNER	3
+
 #ifdef XNU_KERNEL_PRIVATE
 #ifndef pshmnode
 struct pshmnode;
diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h
index 52e4197dd..26b91b3cd 100644
--- a/bsd/sys/proc_internal.h
+++ b/bsd/sys/proc_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -184,6 +184,14 @@ struct proc;
 
 #define PROC_NULL (struct proc *)0
 
+#define PROC_UPDATE_CREDS_ONPROC(p) { \
+	p->p_uid =  kauth_cred_getuid(p->p_ucred); \
+	p->p_gid =  kauth_cred_getgid(p->p_ucred); \
+	p->p_ruid =  kauth_cred_getruid(p->p_ucred); \
+	p->p_rgid =  kauth_cred_getrgid(p->p_ucred); \
+	p->p_svuid =  kauth_cred_getsvuid(p->p_ucred); \
+	p->p_svgid =  kauth_cred_getsvgid(p->p_ucred); \
+	}
 /*
  * Description of a process.
  *
@@ -203,6 +211,13 @@ struct	proc {
 	struct	proc *	p_pptr;		 	/* Pointer to parent process.(LL) */
 	pid_t		p_ppid;			/* process's parent pid number */
 	pid_t		p_pgrpid;		/* process group id of the process (LL)*/
+	uid_t		p_uid;
+	gid_t		p_gid;
+	uid_t		p_ruid;
+	gid_t		p_rgid;
+	uid_t		p_svuid;
+	gid_t		p_svgid;
+	uint64_t	p_uniqueid;		/* process uniqe ID */
 
 	lck_mtx_t 	p_mlock;		/* mutex lock for proc */
 
@@ -281,6 +296,7 @@ struct	proc {
 	lck_mtx_t			p_dtrace_sprlock;		/* sun proc lock emulation */
 	int				p_dtrace_probes;		/* (PL) are there probes for this proc? */
 	u_int				p_dtrace_count;			/* (sprlock) number of DTrace tracepoints */
+        uint8_t                         p_dtrace_stop;                  /* indicates a DTrace-desired stop */
 	struct dtrace_ptss_page*	p_dtrace_ptss_pages;		/* (sprlock) list of user ptss pages */
 	struct dtrace_ptss_page_entry*	p_dtrace_ptss_free_list;	/* (atomic) list of individual ptss entries */
 	struct dtrace_helpers*		p_dtrace_helpers;		/* (dtrace_lock) DTrace per-proc private */
@@ -314,7 +330,9 @@ struct	proc {
 	char	p_name[(2*MAXCOMLEN)+1];	/* PL */
 
 	struct 	pgrp *p_pgrp;	/* Pointer to process group. (LL) */
+#if CONFIG_EMBEDDED
 	int		p_iopol_disk;	/* disk I/O policy (PL) */
+#endif /* CONFIG_EMBEDDED */
 	uint32_t	p_csflags;	/* flags for codesign (PL) */
 	uint32_t	p_pcaction;	/* action  for process control on starvation */
 	uint8_t p_uuid[16];		/* from LC_UUID load command */
@@ -330,6 +348,7 @@ struct	proc {
 	struct klist p_klist;  /* knote list (PL ?)*/
 
 	struct	rusage *p_ru;	/* Exit information. (PL) */
+	int		p_sigwaitcnt;
 	thread_t 	p_signalholder;
 	thread_t 	p_transholder;
 
@@ -408,10 +427,13 @@ struct	proc {
 #define	P_LLIMWAIT	0x00040000
 #define P_LWAITED   	0x00080000 
 #define P_LINSIGNAL    	0x00100000 
-#define P_LSIGNALWAIT  	0x00200000 
+#define P_UNUSED  	0x00200000 	/* Unused */
 #define P_LRAGE_VNODES	0x00400000
 #define P_LREGISTER	0x00800000	/* thread start fns registered  */
+#if CONFIG_EMBEDDED
 #define P_LBACKGROUND	0x01000000
+#endif /* CONFIG_EMBEDDED */
+#define P_LVMRSRCOWNER	0x02000000	/* can handle the resource ownership of  */
 
 /* Process control state for resource starvation */
 #define P_PCTHROTTLE	1
@@ -426,7 +448,7 @@ struct	proc {
 #define PROC_SETACTION_STATE(p) (p->p_pcaction = (PROC_CONTROL_STATE(p) | (PROC_CONTROL_STATE(p) << 16)))
 #define PROC_RESETACTION_STATE(p) (p->p_pcaction = PROC_CONTROL_STATE(p))
 
-/* advisory flags in the proc */
+/* additional process flags */
 #define P_LADVLOCK		0x01
 
 /* defns for proc_iterate */
@@ -580,10 +602,10 @@ extern lck_mtx_t * proc_list_mlock;
 extern lck_mtx_t * proc_klist_mlock;
 
 #define BSD_SIMUL_EXECS		33 /* 32 , allow for rounding */
-#define	BSD_PAGABLE_MAP_SIZE	(BSD_SIMUL_EXECS * (NCARGS + PAGE_SIZE))
-__private_extern__ int execargs_cache_size;
-__private_extern__ int execargs_free_count;
-__private_extern__ vm_offset_t * execargs_cache;
+#define	BSD_PAGEABLE_SIZE_PER_EXEC	(NCARGS + PAGE_SIZE + PAGE_SIZE) /* page for apple vars, page for executable header */
+extern int execargs_cache_size;
+extern int execargs_free_count;
+extern vm_offset_t * execargs_cache;
 
 #define SESS_LEADER(p, sessp)	((sessp)->s_leader == (p))
 
@@ -611,9 +633,11 @@ extern LIST_HEAD(sesshashhead, session) *sesshashtbl;
 extern u_long sesshash;
 
 extern lck_grp_t * proc_lck_grp;
+#if CONFIG_FINE_LOCK_GROUPS
 extern lck_grp_t * proc_mlock_grp;
 extern lck_grp_t * proc_fdmlock_grp;
 extern lck_grp_t * proc_slock_grp;
+#endif
 extern lck_grp_attr_t * proc_lck_grp_attr;
 extern lck_attr_t * proc_lck_attr;
 
@@ -638,6 +662,9 @@ __private_extern__ int proc_core_name(const char *name, uid_t uid, pid_t pid,
 		char *cr_name, size_t cr_name_len);
 extern int isinferior(struct proc *, struct proc *);
 __private_extern__ struct proc *pzfind(pid_t);	/* Find zombie by id. */
+__private_extern__ struct proc *proc_find_zombref(pid_t);	/* Find zombie by id. */
+__private_extern__ void proc_drop_zombref(struct proc * p);	/* Find zombie by id. */
+
 
 extern struct	lctx *lcfind(pid_t);		/* Find a login context by id */
 extern struct	lctx *lccreate(void);		/* Create a new login context */
@@ -699,6 +726,7 @@ void proc_transcommit(struct proc *, int locked);
 void proc_transend(struct proc *, int locked);
 int  proc_transwait(struct proc *, int locked);
 void  proc_rele_locked(struct proc *  p);
+struct proc *proc_ref_locked(struct proc *  p);
 void proc_knote(struct proc * p, long hint);
 void proc_knote_drain(struct proc *p);
 void workqueue_init_lock(proc_t p);
diff --git a/bsd/sys/process_policy.h b/bsd/sys/process_policy.h
new file mode 100644
index 000000000..19f3c2617
--- /dev/null
+++ b/bsd/sys/process_policy.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _SYS_PROCESS_POLICY_H
+#define _SYS_PROCESS_POLICY_H
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <stdint.h>
+
+__BEGIN_DECLS
+
+/* defns of scope */
+#define PROC_POLICY_SCOPE_PROCESS	1	/* the policy setting is for process wide effect */
+#define PROC_POLICY_SCOPE_THREAD	2	/* the policy setting is for thread inside a proc */
+
+/* defns  of actions with no attributes */
+#define PROC_POLICY_ACTION_APPLY	1	/* enforce the set policy */
+#define PROC_POLICY_ACTION_RESTORE	2	/* revert the applied action back */
+#define PROC_POLICY_ACTION_DENYINHERIT	3	/* set for no inheritence of the specified policy */
+#define PROC_POLICY_ACTION_DENYSELFSET	4	/* set for the process to set its own policy */
+#define PROC_POLICY_ACTION_ENABLE	5	/* enable policy and its actions */
+#define PROC_POLICY_ACTION_DISABLE	6	/* disable policy and its actions, also clears any actions that have already happened */
+/* defns  of actions  with attributes */
+#define PROC_POLICY_ACTION_SET		10	/* set the policy attributes */
+#define PROC_POLICY_ACTION_GET		11	/* get the policy attributes */
+#define PROC_POLICY_ACTION_ADD		12	/* add a policy attribute */
+#define PROC_POLICY_ACTION_REMOVE	13	/* remove a policy attribute */
+
+/* policies */
+#define PROC_POLICY NONE		0
+#define PROC_POLICY_BACKGROUND		1	/* darwin background policy */
+#define PROC_POLICY_HARDWARE_ACCESS	2	/* access to various hardware */
+#define PROC_POLICY_RESOURCE_STARVATION	3	/* behavior on resource starvation */
+#define PROC_POLICY_RESOURCE_USAGE	4	/* behavior on resource consumption */
+#define PROC_POLICY_RESERVED		5	/* behavior on resource consumption */
+#define PROC_POLICY_APPTYPE		6	/* behavior on resource consumption */
+
+/* sub policies for background policy */
+#define PROC_POLICY_BG_NONE		0	/* none */
+#define PROC_POLICY_BG_LOWCPUPRI	1	/* Low cpu priority */
+#define PROC_POLICY_BG_DISKTHROTTLE 	2	/* disk accesses throttled */
+#define PROC_POLICY_BG_NETTHROTTLE 	4	/* network accesses throttled */
+#define PROC_POLICY_BG_GPUDENY	 	8	/* no access to GPU */
+#if CONFIG_EMBEDDED
+#define PROC_POLICY_BG_ALL            0x0F
+#else /* CONFIG_EMBEDDED */
+#define PROC_POLICY_BG_ALL            0x07
+#endif /* CONFIG_EMBEDDED */
+#define PROC_POLICY_BG_DEFAULT	 	PROC_POLICY_BG_ALL
+
+/* sub policies for hardware */
+#define PROC_POLICY_HWACCESS_NONE	0
+#define PROC_POLICY_HWACCESS_DISK	1	/* disk access */
+#define PROC_POLICY_HWACCESS_GPU	2	/* GPU access */
+#define PROC_POLICY_HWACCESS_NETWORK	3	/* network access */
+#define PROC_POLICY_HWACCESS_CPU	4	/* cpu access */
+
+/* attribute values for disk hardware access, bit different as it should reflect IOPOL_XXX */
+#define PROC_POLICY_DISKACC_NONE	0
+#define PROC_POLICY_DISKACC_NORMAL	1	/* normal access to the disk */
+#define PROC_POLICY_DISKACC_PASSIVE	2	/* treat the I/Os as passive */
+#define PROC_POLICY_DISKACC_THROTTLE	3	/* throttle the disk IOs */
+#define PROC_POLICY_DISKACC_DEFAULT	0
+
+/* attribute values for GPU hardware access */
+#define PROC_POLICY_GPUACC_NONE	0
+#define PROC_POLICY_GPUACC_FULLACCESS	0	/* complete access to the GPU */
+#define PROC_POLICY_GPUACC_DENYACCESS	1	/* deny any access to the GPU */
+#define PROC_POLICY_GPUACC_DEFAULT	0	/*  default is complete access */
+
+/* atrribute values for  network hardware access */
+#define PROC_POLICY_NETACC_NONE	0
+#define PROC_POLICY_NETACC_NORMAL	0	/* complete access to the network */
+#define PROC_POLICY_NETACC_THROTTLE	1	/* throttle access to network */
+#define PROC_POLICY_NETACC_DEFAULT	0	/*  default is complete access */
+
+/* atrribute values for  network hardware access */
+#define PROC_POLICY_CPUACC_NONE		0
+#define PROC_POLICY_CPUACC_ALL		0	/* access to all avialable cpus */
+#define PROC_POLICY_CPUACC_ONE		1	/* access to only one available cpu */
+#define PROC_POLICY_CPUACC_LLCACHE	2	/* access to only one last level cache */
+#define PROC_POLICY_CPUACC_DEFAULT	0	/*  default is access to all cpus */
+
+
+/* System Resource management (ie usage and starvation related) definitions */
+
+/* sub policies for resource starvation */
+#define PROC_POLICY_RS_NONE		0
+#define PROC_POLICY_RS_VIRTUALMEM	1	/* virtual memory starvation */
+
+/* sub policies for resource usage */
+#define PROC_POLICY_RUSAGE_NONE		0
+#define PROC_POLICY_RUSAGE_WIREDMEM	1	/* wired memory usages */
+#define PROC_POLICY_RUSAGE_VIRTMEM	2	/* virtual memory usage */
+#define PROC_POLICY_RUSAGE_CPU		3	/* amount of cpu usage */
+#define PROC_POLICY_RUSAGE_DISK		4	/* amount of disk usage */
+#define PROC_POLICY_RUSAGE_NETWORK	5	/* amount of network usage */
+#define PROC_POLICY_RUSAGE_POWER	6	/* amount of power/battery consumption */
+
+/* attribute values for the resource usage and low resource */
+#define PROC_POLICY_RSRCACT_NONE	0
+#define PROC_POLICY_RSRCACT_THROTTLE	1	/* throttle on resource condition */
+#define PROC_POLICY_RSRCACT_SUSPEND	2	/* suspend on resource condition */
+#define PROC_POLICY_RSRCACT_TERMINATE	3	/* kill on resource condition */
+#define PROC_POLICY_RSRCACT_NOTIFY	4	/* send kqueue notification */
+
+
+/* type of resource for kqueue notifiction */
+#define PROC_POLICY_RSRTYPE_CPU		1
+#define PROC_POLICY_RSRTYPE_WIREDMEM	2
+#define PROC_POLICY_RSRTYPE_VIRTUALMEM	4
+#define PROC_POLICY_RSRTYPE_DISK	8
+#define PROC_POLICY_RSRTYPE_NETWORK	0x010
+#define PROC_POLICY_RSRTYPE_POWER	0x20
+
+
+typedef struct proc_policy_attribute {
+	uint32_t	ppattr_attribute;  /* the policy attribute to be modified or returned */
+	uint32_t	ppattr_resv;       /* pad field */
+	uint64_t	ppattr_value1;     /* 64bit policy specific attribute */
+	uint64_t	ppattr_value2;     /* 64bit policy specific attribute */
+	uint64_t	ppattr_value3;     /* 64bit policy specific attribute */
+	uint64_t	ppattr_resv1[4];    /* reserved for future use */
+} proc_policy_attribute_t;
+
+
+typedef struct proc_policy_cpuusage_attr {
+	uint32_t	ppattr_cpu_attr ; /* specified action as in PROC_POLICY_RSRCACT_xx */
+	uint32_t	ppattr_cpu_percentage;       /* percentage of interval */
+	uint64_t	ppattr_cpu_attr_interval;     /* 64bit interval in nsecs */
+	uint64_t	ppattr_cpu_attr_deadline;     /* 64bit deadline in nsecs */
+} proc_policy_cpuusage_attr_t;
+
+
+/* sub policies for PROC_POLICY_APPTYPE */
+#define PROC_POLICY_OSX_APPTYPE_NONE            0
+#define PROC_POLICY_OSX_APPTYPE_TAL             1	/* TAL based launched */
+#define PROC_POLICY_OSX_APPTYPE_WIDGET          2	/* for dashboard client */
+#define PROC_POLICY_OSX_APPTYPE_DASHCLIENT      2	/* rename to move away from widget */
+#define PROC_POLICY_IOS_APPTYPE                 3	/* ios specific handling */
+#define PROC_POLICY_IOS_NONUITYPE               4	/* ios non graphics type */
+
+#ifndef KERNEL
+int process_policy(int scope, int action, int policy, int policy_subtype, proc_policy_attribute_t * attrp, pid_t target_pid, uint64_t target_threadid);
+#endif /* KERNEL */
+
+
+__END_DECLS
+
+#endif /*_SYS_PROCESS_POLICY_H */
diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h
index bdf36a317..75b6d6b28 100644
--- a/bsd/sys/protosw.h
+++ b/bsd/sys/protosw.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,12 @@
 #include <sys/cdefs.h>
 
 #define	PR_SLOWHZ	2		/* 2 slow timeouts per second */
+#ifndef __APPLE__
+/*
+ * See rdar://7617868: pr_fasttimo was removed use your own timer or pr_slowtimo instead
+ */
 #define	PR_FASTHZ	5		/* 5 fast timeouts per second */
+#endif
 
 #ifdef PRIVATE
 
@@ -105,7 +110,8 @@ struct socket_filter;
  * The userreq routine interfaces protocols to the system and is
  * described below.
  */
- 
+
+#include <sys/socket.h> 
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 #ifdef KERNEL
@@ -132,8 +138,12 @@ struct protosw {
 	void	*pr_ousrreq;
 /* utility hooks */
 	void	(*pr_init)(void);	/* initialization hook */
+#if __APPLE__
+	void	(*pr_unused)(void);	/* placeholder - fasttimo is removed */
+#else
 	void	(*pr_fasttimo)(void);
 					/* fast timeout (200ms) */
+#endif
 	void	(*pr_slowtimo)(void);
 					/* slow timeout (500ms) */
 	void	(*pr_drain)(void);
@@ -408,6 +418,7 @@ char	*prcorequests[] = {
 
 __BEGIN_DECLS
 void domaininit(void) __attribute__((section("__TEXT, initcode")));
+void domainfin(void) __attribute__((section("__TEXT, fincode")));
 
 void	pfctlinput(int, struct sockaddr *);
 void	pfctlinput2(int, struct sockaddr *, void *);
@@ -418,6 +429,7 @@ struct protosw *pffindtype(int family, int type);
 extern int net_add_proto(struct protosw *, struct domain *);
 extern int net_del_proto(int, int, struct domain *);
 
+extern u_int64_t net_uptime(void);
 __END_DECLS
 
 /* Temp hack to link static domains together */
diff --git a/bsd/sys/pthread_internal.h b/bsd/sys/pthread_internal.h
index 7d0cfae29..6cc80f5f3 100644
--- a/bsd/sys/pthread_internal.h
+++ b/bsd/sys/pthread_internal.h
@@ -29,12 +29,34 @@
 #ifndef _SYS_PTHREAD_INTERNAL_H_
 #define _SYS_PTHREAD_INTERNAL_H_
 
-#undef pthread_mutexattr_t;
-
+#include <sys/user.h>
 #include <kern/thread_call.h>
 
+struct ksyn_waitq_element {
+	TAILQ_ENTRY(ksyn_waitq_element) kwe_list;	/* link to other list members */
+	void *          kwe_kwqqueue;            	/* queue blocked on */
+	uint32_t	kwe_flags;			/* flags */
+	uint32_t        kwe_lockseq;			/* the sequence of the entry */
+	uint32_t	kwe_count;			/* upper bound on number of matches still pending */
+	uint32_t 	kwe_psynchretval;		/* thread retval */
+	void		*kwe_uth;			/* uthread */
+};
+typedef struct ksyn_waitq_element * ksyn_waitq_element_t;
+
+/* kew_flags defns */
+#define KWE_THREAD_INWAIT       1
+#define KWE_THREAD_PREPOST      2
+#define KWE_THREAD_BROADCAST    4
+
+
 #define WORKITEM_SIZE 64
-#define WORKQUEUE_NUMPRIOS 3
+
+#define WORKQUEUE_HIGH_PRIOQUEUE    0       /* high priority queue */
+#define WORKQUEUE_DEFAULT_PRIOQUEUE 1       /* default priority queue */
+#define WORKQUEUE_LOW_PRIOQUEUE     2       /* low priority queue */
+#define WORKQUEUE_BG_PRIOQUEUE      3       /* background priority queue */
+
+#define WORKQUEUE_NUMPRIOS 4
 
 #define WORKQUEUE_OVERCOMMIT	0x10000
 
@@ -57,6 +79,8 @@ struct threadlist {
 #define TH_LIST_SUSPENDED 	0x08
 #define TH_LIST_BUSY		0x10
 #define TH_LIST_NEED_WAKEUP	0x20
+#define TH_LIST_CONSTRAINED	0x40
+
 
 struct workitem {
 	TAILQ_ENTRY(workitem) wi_entry;
@@ -83,6 +107,7 @@ struct workqueue {
 	uint32_t	wq_timer_interval;
         uint32_t	wq_affinity_max;
         uint32_t	wq_threads_scheduled;
+	uint32_t	wq_constrained_threads_scheduled;
 	uint32_t	wq_nthreads;
         uint32_t      	wq_thidlecount;
 	uint32_t	wq_reqconc[WORKQUEUE_NUMPRIOS];	  /* requested concurrency for each priority level */
@@ -100,6 +125,8 @@ struct workqueue {
 
 #define WQL_ATIMER_BUSY		0x01
 #define WQL_ATIMER_WAITING	0x02
+#define WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT    0x04
+#define WQL_EXCEEDED_TOTAL_THREAD_LIMIT          0x08
 
 
 #define WQ_VECT_SET_BIT(vector, bit)	\
@@ -121,7 +148,7 @@ struct workqueue {
 
 /* workq_kernreturn commands */
 #define WQOPS_QUEUE_ADD 1
-#define WQOPS_QUEUE_REMOVE 2
+#define WQOPS_QUEUE_REMOVE_OBSOLETE 2 
 #define WQOPS_THREAD_RETURN 4
 #define WQOPS_THREAD_SETCONC  8
 
@@ -129,12 +156,12 @@ struct workqueue {
 #define PTH_DEFAULT_GUARDSIZE 4*1024
 #define MAX_PTHREAD_SIZE 64*1024
 
-void workqueue_exit(struct proc *);
-
-void pthread_init(void);
 extern lck_grp_attr_t   *pthread_lck_grp_attr;
 extern lck_grp_t    *pthread_lck_grp;
 extern lck_attr_t   *pthread_lck_attr;
 
+void workqueue_exit(struct proc *);
+void pthread_init(void);
+void psynch_zoneinit(void);
 #endif /* _SYS_PTHREAD_INTERNAL_H_ */
 
diff --git a/bsd/sys/queue.h b/bsd/sys/queue.h
index a8e96cd4c..9ccb63e74 100644
--- a/bsd/sys/queue.h
+++ b/bsd/sys/queue.h
@@ -133,8 +133,11 @@
  * _INSERT_AFTER		+	+	+	+	+
  * _INSERT_TAIL			-	-	+	+	+
  * _CONCAT			-	-	+	+	-
+ * _REMOVE_AFTER		+	-	+	-	-
  * _REMOVE_HEAD			+	-	+	-	-
+ * _REMOVE_HEAD_UNTIL		-	-	+	-	-
  * _REMOVE			+	+	+	+	+
+ * _SWAP			-	+	+	+	-
  *
  */
 #ifdef QUEUE_MACRO_DEBUG
@@ -232,12 +235,16 @@ struct {								\
 		struct type *curelm = SLIST_FIRST((head));		\
 		while (SLIST_NEXT(curelm, field) != (elm))		\
 			curelm = SLIST_NEXT(curelm, field);		\
-		SLIST_NEXT(curelm, field) =				\
-		    SLIST_NEXT(SLIST_NEXT(curelm, field), field);	\
+		SLIST_REMOVE_AFTER(curelm, field);			\
 	}								\
 	TRASHIT((elm)->field.sle_next);					\
 } while (0)
 
+#define SLIST_REMOVE_AFTER(elm, field) do {				\
+	SLIST_NEXT(elm, field) =					\
+	    SLIST_NEXT(SLIST_NEXT(elm, field), field);			\
+} while (0)
+
 #define	SLIST_REMOVE_HEAD(head, field) do {				\
 	SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field);	\
 } while (0)
@@ -324,9 +331,7 @@ struct {								\
 		struct type *curelm = STAILQ_FIRST((head));		\
 		while (STAILQ_NEXT(curelm, field) != (elm))		\
 			curelm = STAILQ_NEXT(curelm, field);		\
-		if ((STAILQ_NEXT(curelm, field) =			\
-		     STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\
-			(head)->stqh_last = &STAILQ_NEXT((curelm), field);\
+		STAILQ_REMOVE_AFTER(head, curelm, field);		\
 	}								\
 	TRASHIT((elm)->field.stqe_next);				\
 } while (0)
@@ -337,11 +342,31 @@ struct {								\
 		(head)->stqh_last = &STAILQ_FIRST((head));		\
 } while (0)
 
-#define	STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do {			\
-	if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL)	\
-		(head)->stqh_last = &STAILQ_FIRST((head));		\
+#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do {                 \
+       if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \
+               (head)->stqh_last = &STAILQ_FIRST((head));              \
 } while (0)
 
+#define STAILQ_REMOVE_AFTER(head, elm, field) do {			\
+	if ((STAILQ_NEXT(elm, field) =					\
+	     STAILQ_NEXT(STAILQ_NEXT(elm, field), field)) == NULL)	\
+		(head)->stqh_last = &STAILQ_NEXT((elm), field);		\
+} while (0)
+
+#define STAILQ_SWAP(head1, head2, type) do {				\
+	struct type *swap_first = STAILQ_FIRST(head1);			\
+	struct type **swap_last = (head1)->stqh_last;			\
+	STAILQ_FIRST(head1) = STAILQ_FIRST(head2);			\
+	(head1)->stqh_last = (head2)->stqh_last;			\
+	STAILQ_FIRST(head2) = swap_first;				\
+	(head2)->stqh_last = swap_last;					\
+	if (STAILQ_EMPTY(head1))					\
+		(head1)->stqh_last = &STAILQ_FIRST(head1);		\
+	if (STAILQ_EMPTY(head2))					\
+		(head2)->stqh_last = &STAILQ_FIRST(head2);		\
+} while (0)
+
+
 /*
  * List declarations.
  */
@@ -444,6 +469,16 @@ struct {								\
 	TRASHIT((elm)->field.le_prev);					\
 } while (0)
 
+#define LIST_SWAP(head1, head2, type, field) do {			\
+	struct type *swap_tmp = LIST_FIRST((head1));			\
+	LIST_FIRST((head1)) = LIST_FIRST((head2));			\
+	LIST_FIRST((head2)) = swap_tmp;					\
+	if ((swap_tmp = LIST_FIRST((head1))) != NULL)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head1));		\
+	if ((swap_tmp = LIST_FIRST((head2))) != NULL)			\
+		swap_tmp->field.le_prev = &LIST_FIRST((head2));		\
+} while (0)
+
 /*
  * Tail queue declarations.
  */
@@ -574,6 +609,23 @@ struct {								\
 	QMD_TRACE_ELEM(&(elm)->field);					\
 } while (0)
 
+#define TAILQ_SWAP(head1, head2, type, field) do {                      \
+	struct type *swap_first = (head1)->tqh_first;                   \
+	struct type **swap_last = (head1)->tqh_last;                    \
+	(head1)->tqh_first = (head2)->tqh_first;                        \
+	(head1)->tqh_last = (head2)->tqh_last;                          \
+	(head2)->tqh_first = swap_first;                                \
+	(head2)->tqh_last = swap_last;                                  \
+	if ((swap_first = (head1)->tqh_first) != NULL)                  \
+		swap_first->field.tqe_prev = &(head1)->tqh_first;       \
+	else                                                            \
+		(head1)->tqh_last = &(head1)->tqh_first;                \
+	if ((swap_first = (head2)->tqh_first) != NULL)                  \
+		swap_first->field.tqe_prev = &(head2)->tqh_first;       \
+	else                                                            \
+		(head2)->tqh_last = &(head2)->tqh_first;                \
+} while (0)
+
 /*
  * Circular queue definitions.
  */
diff --git a/bsd/sys/reboot.h b/bsd/sys/reboot.h
index f79f2a9e2..6c64e53b8 100644
--- a/bsd/sys/reboot.h
+++ b/bsd/sys/reboot.h
@@ -135,7 +135,7 @@
 #include <machine/reboot.h>
 
 __BEGIN_DECLS
-void	boot(int, int, char *);
+int	boot(int, int, char *);
 __END_DECLS
 
 #define PROC_SHUTDOWN_LOG "/var/log/kernel-shutdown.log"
diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h
index 72c969c12..fbe8e6266 100644
--- a/bsd/sys/resource.h
+++ b/bsd/sys/resource.h
@@ -68,6 +68,9 @@
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 
+#ifndef KERNEL 
+#include <Availability.h>
+#endif
 
 /* [XSI] The timeval structure shall be defined as described in
  * <sys/time.h>
@@ -121,6 +124,12 @@ typedef __uint64_t	rlim_t;
  */
 #define PRIO_DARWIN_BG 0x1000
 
+/*
+ * use PRIO_DARWIN_NONUI to restrict a process's ability to make calls to
+ * the GPU.
+ */
+#define PRIO_DARWIN_NONUI 0x1001
+
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 
@@ -305,13 +314,13 @@ struct _iopol_param_t {
 __BEGIN_DECLS
 int	getpriority(int, id_t);
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-int	getiopolicy_np(int, int);
+int	getiopolicy_np(int, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 #endif /* !_POSIX_C_SOURCE || _DARWIN_C_SOURCE */
 int	getrlimit(int, struct rlimit *) __DARWIN_ALIAS(getrlimit);
 int	getrusage(int, struct rusage *);
 int	setpriority(int, id_t, int);
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-int	setiopolicy_np(int, int, int);
+int	setiopolicy_np(int, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 #endif /* !_POSIX_C_SOURCE || _DARWIN_C_SOURCE */
 int	setrlimit(int, const struct rlimit *) __DARWIN_ALIAS(setrlimit);
 __END_DECLS
diff --git a/bsd/sys/sdt_impl.h b/bsd/sys/sdt_impl.h
index cbd117b61..e9531067c 100644
--- a/bsd/sys/sdt_impl.h
+++ b/bsd/sys/sdt_impl.h
@@ -74,6 +74,9 @@ struct module {
 };
 
 extern int sdt_invop(uintptr_t, uintptr_t *, uintptr_t);
+#if defined (__APPLE__)
+extern uint64_t sdt_getarg(void *, dtrace_id_t, void *, int, int);
+#endif /* __APPLE__ */    
 
 void sdt_provide_module(void *, struct modctl *);
 void sdt_init(void);
@@ -85,8 +88,6 @@ extern int          sdt_probetab_mask;
 
 #if defined(__i386__) || defined(__x86_64__)
 typedef uint8_t sdt_instr_t;
-#elif defined(__ppc__) || defined(__ppc64__)
-typedef uint32_t sdt_instr_t;
 #else
 #error Unknown implementation
 #endif
diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h
index faa6fcc1d..d2e40fd76 100644
--- a/bsd/sys/signal.h
+++ b/bsd/sys/signal.h
@@ -145,12 +145,6 @@
 #define __need_mcontext_t
 #define __need_stack_t
 #define __need_ucontext_t
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#if defined(__ppc__) || defined(__ppc64__)
-#define __need_mcontext64_t
-#define __need_ucontext64_t
-#endif /* __ppc__  || __ppc64__ */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 #include <sys/_structs.h>
 
 #ifndef _PID_T
diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h
index 6b37dfced..3fc35997c 100644
--- a/bsd/sys/socket.h
+++ b/bsd/sys/socket.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,14 @@
 #include <sys/cdefs.h>
 #include <machine/_param.h>
 
+#ifdef PRIVATE
+#include <sys/param.h>
+#endif /* PRIVATE */
+
+#ifndef KERNEL 
+#include <Availability.h>
+#endif
+
 /*
  * Definitions related to sockets: types, address families, options.
  */
@@ -130,6 +138,23 @@ struct iovec {
 	size_t	 iov_len;	/* [XSI] Size of region iov_base points to */
 };
 #endif
+
+#ifdef PRIVATE
+#define SO_TCDBG_PID	0x01	/* Set/get traffic class for PID */
+#define SO_TCDBG_PNAME	0x02	/* Set/get traffic class for processes of that name */
+#define SO_TCDBG_PURGE	0x04	/* Purge entries for unused PIDs */
+#define SO_TCDBG_FLUSH	0x08	/* Flush all entries */
+#define SO_TCDBG_COUNT	0x10	/* Get count of entries */
+#define SO_TCDBG_LIST	0x20	/* List entries */
+
+struct so_tcdbg {
+	u_int32_t	so_tcdbg_cmd;
+	int32_t		so_tcdbg_tclass;
+	u_int32_t	so_tcdbg_count;
+	pid_t		so_tcdbg_pid;
+	char		so_tcdbg_pname[MAXCOMLEN + 1];
+};
+#endif /* PRIVATE */
  
 /*
  * Types
@@ -161,6 +186,7 @@ struct iovec {
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 #define	SO_REUSEPORT	0x0200		/* allow local address & port reuse */
 #define	SO_TIMESTAMP	0x0400		/* timestamp received dgram traffic */
+#define SO_TIMESTAMP_MONOTONIC	0x0800	/* Monotonically increasing timestamp on rcvd dgram */
 #ifndef __APPLE__
 #define	SO_ACCEPTFILTER	0x1000		/* there is an accept filter */
 #else
@@ -184,6 +210,8 @@ struct iovec {
 #define	SO_TYPE		0x1008		/* get socket type */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 /*efine	SO_PRIVSTATE	0x1009		   get/deny privileged state */
+#define SO_LABEL        0x1010          /* socket's MAC label */
+#define SO_PEERLABEL    0x1011          /* socket's peer MAC label */
 #ifdef __APPLE__
 #define SO_NREAD	0x1020		/* APPLE: get 1st-packet byte count */
 #define SO_NKE		0x1021		/* APPLE: Install socket-level NKE */
@@ -203,16 +231,26 @@ struct iovec {
 #define SO_RANDOMPORT   0x1082  /* APPLE: request local port randomization */
 #define SO_NP_EXTENSIONS	0x1083	/* To turn off some POSIX behavior */
 #endif
+
 #ifdef PRIVATE
 #define	SO_EXECPATH	0x1085 		/* Application Firewall Socket option */
-#define SO_TRAFFIC_CLASS	0x1086		/* Traffic class */
+#define SO_TRAFFIC_CLASS		0x1086		/* Traffic class (int)*/
 #define  SO_TC_BE	0		/* Best effort, normal */
 #define  SO_TC_BK	1		/* Background, low priority or bulk traffic */
 #define  SO_TC_VI	2		/* Interactive video, constant bit rate, low latency */
 #define  SO_TC_VO	3		/* Interactive voice, constant bit rate, lowest latency */
-#endif
-#define	SO_LABEL	0x1010		/* socket's MAC label */
-#define	SO_PEERLABEL	0x1011		/* socket's peer MAC label */
+#define  SO_TC_MAX	4		/* Max traffic class value */
+
+/* Background socket configuration flags */
+#define TRAFFIC_MGT_SO_BACKGROUND       0x0001  /* background socket */
+#define TRAFFIC_MGT_TCP_RECVBG          0x0002  /* Only TCP sockets, receiver throttling */
+
+#define SO_RECV_TRAFFIC_CLASS	0x1087		/* Receive traffic class (bool)*/
+#define SO_TRAFFIC_CLASS_DBG	0x1088		/* Debug traffic class (struct so_tcdbg) */
+#define SO_TRAFFIC_CLASS_STATS	0x1089		/* Traffic class statistics */
+#define	SO_DEFUNCTOK	0x1100		/* can be defunct'd */
+#define	SO_ISDEFUNCT	0x1101		/* get defunct status */
+#endif /* PRIVATE */
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /*
@@ -429,6 +467,9 @@ struct sockaddr_storage {
  */
 #define	PF_VLAN		((uint32_t)0x766c616e)	/* 'vlan' */
 #define PF_BOND		((uint32_t)0x626f6e64)	/* 'bond' */
+#ifdef KERNEL_PRIVATE
+#define PF_BRIDGE	((uint32_t)0x62726467)	/* 'brdg' */
+#endif /* KERNEL_PRIVATE */
 
 /*
  * Definitions for network related sysctl, CTL_NET.
@@ -492,14 +533,18 @@ struct sockaddr_storage {
  *	Fifth: type of info, defined below
  *	Sixth: flag(s) to mask with for NET_RT_FLAGS
  */
-#define NET_RT_DUMP			1		/* dump; may limit to a.f. */
-#define NET_RT_FLAGS		2		/* by flags, e.g. RESOLVING */
-#define NET_RT_IFLIST		3		/* survey interface list */
-#define NET_RT_STAT			4		/* routing statistics */
-#define NET_RT_TRASH		5		/* routes not in table but not freed */
-#define NET_RT_IFLIST2	6		/* interface list with addresses */
-#define NET_RT_DUMP2                     7               /* dump; may limit to a.f. */
-#define	NET_RT_MAXID		8
+#define NET_RT_DUMP		1	/* dump; may limit to a.f. */
+#define NET_RT_FLAGS		2	/* by flags, e.g. RESOLVING */
+#define NET_RT_IFLIST		3	/* survey interface list */
+#define NET_RT_STAT		4	/* routing statistics */
+#define NET_RT_TRASH		5	/* routes not in table but not freed */
+#define NET_RT_IFLIST2		6	/* interface list with addresses */
+#define NET_RT_DUMP2		7	/* dump; may limit to a.f. */
+#ifdef PRIVATE
+#define	NET_RT_DUMPX		8	/* private */
+#define	NET_RT_DUMPX_FLAGS	9	/* private */
+#endif /* PRIVATE */
+#define	NET_RT_MAXID		10
 #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 
 #ifdef KERNEL_PRIVATE
@@ -512,6 +557,8 @@ struct sockaddr_storage {
 	{ "trash", CTLTYPE_INT }, \
 	{ "iflist2", CTLTYPE_STRUCT }, \
         { "dump2", CTLTYPE_STRUCT }, \
+        { "dumpx", CTLTYPE_STRUCT }, \
+        { "dumpx_flags", CTLTYPE_STRUCT }, \
 }
 
 #endif /* KERNEL_PRIVATE */
@@ -595,7 +642,13 @@ struct user32_msghdr {
 #define	MSG_DONTWAIT	0x80		/* this message should be nonblocking */
 #define	MSG_EOF		0x100		/* data completes connection */
 #ifdef __APPLE__
+#ifndef PRIVATE
+#ifdef __APPLE_API_OBSOLETE
+#define MSG_WAITSTREAM  0x200           /* wait up to full request.. may return partial */
+#endif
+#else
 #define MSG_WAITSTREAM  0x200           /* wait up to full request.. may return partial */
+#endif
 #define MSG_FLUSH	0x400		/* Start of 'hold' seq; dump so_temp */
 #define MSG_HOLD	0x800		/* Hold frag in so_temp */
 #define MSG_SEND	0x1000		/* Send the packet in so_temp */
@@ -680,7 +733,7 @@ struct cmsgcred {
 	    ((unsigned char *)(mhdr)->msg_control +			\
 	     (mhdr)->msg_controllen)) ?					\
 	  (struct cmsghdr *)0L /* NULL */ :				\
-	  (struct cmsghdr *)((unsigned char *)(cmsg) +			\
+	  (struct cmsghdr *)(void *)((unsigned char *)(cmsg) +		\
 	 		    __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len))))
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
@@ -694,10 +747,11 @@ struct cmsgcred {
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 /* "Socket"-level control message types: */
-#define	SCM_RIGHTS	0x01		/* access rights (array of int) */
+#define	SCM_RIGHTS			0x01	/* access rights (array of int) */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define	SCM_TIMESTAMP	0x02		/* timestamp (struct timeval) */
-#define	SCM_CREDS	0x03		/* process creds (struct cmsgcred) */
+#define	SCM_TIMESTAMP			0x02	/* timestamp (struct timeval) */
+#define	SCM_CREDS			0x03	/* process creds (struct cmsgcred) */
+#define	SCM_TIMESTAMP_MONOTONIC		0x04	/* timestamp (uint64_t) */ 
 
 #ifdef KERNEL_PRIVATE
 /*
@@ -792,7 +846,7 @@ ssize_t	sendto(int, const void *, size_t,
 		int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS_C(sendto);
 int	setsockopt(int, int, int, const void *, socklen_t);
 int	shutdown(int, int);
-int	sockatmark(int);
+int	sockatmark(int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 int	socket(int, int, int);
 int	socketpair(int, int, int, int *) __DARWIN_ALIAS(socketpair);
 
@@ -804,7 +858,6 @@ int	sendfile(int, int, off_t, off_t *, struct sf_hdtr *, int);
 void	pfctlinput(int, struct sockaddr *);
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 __END_DECLS
-
 #endif /* !KERNEL */
 
 #ifdef KERNEL
diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h
index 35560c65b..3c81716fe 100644
--- a/bsd/sys/socketvar.h
+++ b/bsd/sys/socketvar.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -108,6 +108,17 @@ extern	char netio[], netcon[], netcls[];
 
 typedef	u_quad_t so_gen_t;
 
+#ifdef PRIVATE
+#define SO_TC_STATS_MAX 4
+
+struct data_stats {
+        u_int64_t       rxpackets;
+        u_int64_t       rxbytes;
+        u_int64_t       txpackets;
+        u_int64_t       txbytes;
+};
+#endif /* PRIVATE */
+
 #ifdef KERNEL_PRIVATE
 #ifndef __APPLE__
 /* We don't support BSD style socket filters */
@@ -196,6 +207,7 @@ struct socket {
 	void	(*so_upcall)(struct socket *so, caddr_t arg, int waitf);
 	caddr_t	so_upcallarg;		/* Arg for above */
 	uid_t	so_uid;			/* who opened the socket */
+	gid_t	so_gid;			/* gid of whoever opened the socket */
 	/* NB: generation count must not be first; easiest to make it last. */
 	so_gen_t so_gencnt;		/* generation count */
 #ifndef __APPLE__
@@ -220,7 +232,7 @@ struct socket {
 #define	SOF_NOSIGPIPE	0x1
 #define	SOF_NOADDRAVAIL	0x2	/* EADDRNOTAVAIL if src addr is gone */
 #define	SOF_PCBCLEARING	0x4	/* pru_disconnect done; don't call pru_detach */
-#define	SOF_DEFUNCT	0x8	/* accepted socket marked as inactive */
+#define	SOF_DEFUNCT	0x8	/* socket marked as inactive */
 #define	SOF_CLOSEWAIT	0x10	/* blocked in close awaiting some events */
 #define	SOF_UPCALLINUSE	0x20	/* socket upcall is currently in progress */
 #define SOF_REUSESHAREUID	0x40	/* Allows SO_REUSEADDR/SO_REUSEPORT for multiple so_uid */
@@ -233,6 +245,9 @@ struct socket {
 #define	SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns  */
 #define SOF_BINDRANDOMPORT 0x1000 /* Request a randomized port number for the bind */
 #define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow setsockopt(2) after shut down */
+#define SOF_RECV_TRAFFIC_CLASS	0x4000	/* Receive traffic class as ancillary data */
+#define	SOF_NODEFUNCT	0x8000	/* socket cannot be defunct'd */
+#define SOF_INCOMP_INPROGRESS 0x10000 /* incomp socket still being processed */
 	int	so_usecount;	/* refcounting of socket use */;
 	int	so_retaincnt;
 	u_int32_t so_filteruse;	/* usecount for the socket filters */
@@ -252,10 +267,36 @@ struct socket {
 	struct	label *so_label;	/* MAC label for socket */
 	struct	label *so_peerlabel;	/* cached MAC label for socket peer */
 	thread_t	so_background_thread;	/* thread that marked this socket background */
-#if PKT_PRIORITY
 	int		so_traffic_class;
-#endif /* PKT_PRIORITY */
+	
+	// last process to interact with this socket
+	u_int64_t	last_upid;
+	pid_t		last_pid;
+
+	struct data_stats	so_tc_stats[SO_TC_STATS_MAX];
 };
+
+/* Control message accessor in mbufs */
+
+#define _MIN_NXT_CMSGHDR_PTR(cmsg)                              \
+	((char *)(cmsg) +                                       \
+	    __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) +    \
+	    __DARWIN_ALIGN32(sizeof(struct cmsghdr)))
+
+#define M_FIRST_CMSGHDR(m)                                                                      \
+        ((char *)(m) != (char *)0L && (size_t)(m)->m_len >= sizeof(struct cmsghdr) &&           \
+	  (socklen_t)(m)->m_len >= __DARWIN_ALIGN32(((struct cmsghdr *)(m)->m_data)->cmsg_len) ?\
+         (struct cmsghdr *)(m)->m_data :                                                        \
+         (struct cmsghdr *)0L)
+
+#define M_NXT_CMSGHDR(m, cmsg)                                                  \
+        ((char *)(cmsg) == (char *)0L ? M_FIRST_CMSGHDR(m) :                    \
+            _MIN_NXT_CMSGHDR_PTR(cmsg) > ((char *)(m)->m_data) + (m)->m_len ||  \
+            _MIN_NXT_CMSGHDR_PTR(cmsg) < (char *)(m)->m_data ?                  \
+                (struct cmsghdr *)0L /* NULL */ :                               \
+                (struct cmsghdr *)((unsigned char *)(cmsg) +                    \
+                            __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))
+
 #endif /* KERNEL_PRIVATE */
 
 /*
@@ -278,6 +319,7 @@ struct socket {
 #define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
 #define	SS_DRAINING		0x4000	/* close waiting for blocked system
 					   calls to drain */
+#define	SS_DEFUNCT		0x8000	/* has been fully defunct'd */
 
 #if defined(__LP64__)
 #define	_XSOCKET_PTR(x)		u_int32_t
@@ -288,13 +330,13 @@ struct socket {
 #pragma pack(4)
 
 struct xsockbuf {
-		u_int32_t	sb_cc;
-		u_int32_t	sb_hiwat;
-		u_int32_t	sb_mbcnt;
-		u_int32_t	sb_mbmax;
-		int32_t		sb_lowat;
-		short		sb_flags;
-		short		sb_timeo;
+	u_int32_t	sb_cc;
+	u_int32_t	sb_hiwat;
+	u_int32_t	sb_mbcnt;
+	u_int32_t	sb_mbmax;
+	int32_t		sb_lowat;
+	short		sb_flags;
+	short		sb_timeo;
 };
 
 /*
@@ -348,6 +390,56 @@ struct	xsocket64 {
 
 #endif /* !CONFIG_EMBEDDED */
 
+#ifdef PRIVATE
+
+#define XSO_SOCKET	0x001
+#define XSO_RCVBUF	0x002
+#define XSO_SNDBUF	0x004
+#define XSO_STATS	0x008
+#define XSO_INPCB	0x010
+#define XSO_TCPCB	0x020
+
+struct	xsocket_n {
+	u_int32_t		xso_len;		/* length of this structure */
+	u_int32_t		xso_kind;		/* XSO_SOCKET */
+	u_int64_t		xso_so;	/* makes a convenient handle */
+	short			so_type;
+	short			so_options;
+	short			so_linger;
+	short			so_state;
+	u_int64_t		so_pcb;		/* another convenient handle */
+	int				xso_protocol;
+	int				xso_family;
+	short			so_qlen;
+	short			so_incqlen;
+	short			so_qlimit;
+	short			so_timeo;
+	u_short			so_error;
+	pid_t			so_pgid;
+	u_int32_t		so_oobmark;
+	uid_t			so_uid;		/* XXX */
+};
+
+struct xsockbuf_n {
+	u_int32_t		xsb_len;		/* length of this structure */
+	u_int32_t		xsb_kind;		/* XSO_RCVBUF or XSO_SNDBUF */
+	u_int32_t		sb_cc;
+	u_int32_t		sb_hiwat;
+	u_int32_t		sb_mbcnt;
+	u_int32_t		sb_mbmax;
+	int32_t			sb_lowat;
+	short			sb_flags;
+	short			sb_timeo;
+};
+
+struct xsockstat_n {
+	u_int32_t		xst_len;		/* length of this structure */
+	u_int32_t		xst_kind;		/* XSO_STATS */
+	struct data_stats	xst_tc_stats[SO_TC_STATS_MAX];
+};
+
+#endif /* PRIVATE */
+
 #pragma pack()
 
 #ifdef KERNEL_PRIVATE
@@ -434,6 +526,7 @@ extern so_gen_t so_gencnt;
 extern int	socket_debug;
 extern int sosendjcl;
 extern int sosendjcl_ignore_capab;
+extern int sodefunctlog;
 extern int somaxconn;
 
 struct file;
@@ -444,6 +537,7 @@ struct stat;
 struct ucred;
 struct uio;
 struct knote;
+struct so_tcdbg;
 
 #define	SBLASTRECORDCHK(sb, s)	\
 	if (socket_debug) sblastrecordchk(sb, s);
@@ -458,6 +552,20 @@ struct knote;
 	}					\
 }
 
+#define	SODEFUNCTLOG(x)		do { if (sodefunctlog) printf x; } while (0)
+
+/*
+ * For debugging traffic class behaviors
+ */
+#define SOTCDB_NO_DSCP		0x01	/* Do not set DSCP code in IP header */
+#define SOTCDB_NO_MTC		0x02	/* Do not set the mbuf traffic class */
+#define SOTCDB_NO_SENDTCPBG	0x04	/* Do not use background TCP CC algorithm for sender */
+#define SOTCDB_NO_LCLTST	0x08	/* Do not test for local destination for setting DSCP */
+#define SOTCDB_NO_DSCPTST	0x10	/* Overwritte any existing DSCP code */
+#define SOTCDB_NO_RECVTCPBG	0x20	/* Do not use throttling on receiver-side of TCP */ 
+
+extern u_int32_t sotcdb;
+
 /*
  * From uipc_socket and friends
  */
@@ -481,6 +589,7 @@ extern void sbcheck(struct sockbuf *sb);
 extern void sblastmbufchk(struct sockbuf *, const char *);
 extern void sblastrecordchk(struct sockbuf *, const char *);
 extern struct mbuf *sbcreatecontrol(caddr_t p, int size, int type, int level);
+extern struct mbuf **sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** m);
 extern void sbdrop(struct sockbuf *sb, int len);
 extern void sbdroprecord(struct sockbuf *sb);
 extern void sbflush(struct sockbuf *sb);
@@ -512,11 +621,14 @@ extern void sofree(struct socket *so);
 extern void soreference(struct socket *so);
 extern void sodereference(struct socket *so);
 extern void somultipages(struct socket *, boolean_t);
+extern int sosetdefunct(struct proc *, struct socket *, int level, boolean_t);
+extern int sodefunct(struct proc *, struct socket *, int level);
 extern int sogetopt(struct socket *so, struct sockopt *sopt);
 extern void sohasoutofband(struct socket *so);
 extern void soisconnected(struct socket *so);
 extern void soisconnecting(struct socket *so);
 extern void soisdisconnected(struct socket *so);
+extern void sodisconnectwakeup(struct socket *so);
 extern void soisdisconnecting(struct socket *so);
 extern int soisbackground(struct socket *so);
 extern int solisten(struct socket *so, int backlog);
@@ -531,8 +643,15 @@ extern int socket_unlock(struct socket *so, int refcount);
 extern void sofreelastref(struct socket *, int);
 extern int sogetaddr_locked(struct socket *, struct sockaddr **, int);
 extern const char *solockhistory_nr(struct socket *);
-extern void set_traffic_class(struct mbuf *, struct socket *, int);
+extern void set_packet_tclass(struct mbuf *, struct socket *, int, int);
 extern int mbuf_traffic_class_from_control(struct mbuf *);
+extern void set_tcp_stream_priority(struct socket *so);
+extern int so_set_traffic_class(struct socket *, int);
+extern void so_set_default_traffic_class(struct socket *);
+extern void socket_tclass_init(void);
+extern int so_set_tcdbg(struct socket *, struct so_tcdbg *);
+extern int sogetopt_tcdbg(struct socket *, struct sockopt *);
+extern void so_recv_data_stat(struct socket *, struct mbuf *, size_t);
 
 /*
  * XXX; prepare mbuf for (__FreeBSD__ < 3) routines.
@@ -557,6 +676,7 @@ extern void sotoxsocket(struct socket *so, struct xsocket *xso);
 #if !CONFIG_EMBEDDED
 extern void sotoxsocket64(struct socket *so, struct xsocket64 *xso);
 #endif
+extern void sbwakeup(struct sockbuf *sb);
 extern void sowakeup(struct socket *so, struct sockbuf *sb);
 extern int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p);
 
diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h
index 8d415e6e4..3a6b1371b 100644
--- a/bsd/sys/sockio.h
+++ b/bsd/sys/sockio.h
@@ -158,6 +158,18 @@
 #define SIOCGIFALTMTU	_IOWR('i', 72, struct ifreq) 	/* get if alternate mtu */
 #define SIOCSIFBOND	 _IOW('i', 70, struct ifreq)	/* set bond if config */
 #define SIOCGIFBOND	_IOWR('i', 71, struct ifreq)	/* get bond if config */
+
+#ifdef PRIVATE
+/* 
+ * temporary control calls to attach/detach IP to/from an ethernet interface
+ */
+#define	SIOCPROTOATTACH	_IOWR('i', 80, struct ifreq)	/* attach proto to interface */
+#define	SIOCPROTODETACH	_IOWR('i', 81, struct ifreq)	/* detach proto from interface */
+#endif /* PRIVATE */
+
+#define SIOCSIFCAP       _IOW('i', 90, struct ifreq)    /* set IF features */ 
+#define SIOCGIFCAP      _IOWR('i', 91, struct ifreq)    /* get IF features */
+
 #define	SIOCIFCREATE	_IOWR('i', 120, struct ifreq)	/* create clone if */
 #define	SIOCIFDESTROY	 _IOW('i', 121, struct ifreq)	/* destroy clone if */
 #define SIOCIFCREATE2   _IOWR('i', 122, struct ifreq)   /* create clone if with data */
@@ -192,11 +204,6 @@
 #define	SIOCIFGCLONERS64 _IOWR('i', 129, struct if_clonereq64) /* get cloners */
 #endif /* KERNEL */
 
-/* 
- * temporary control calls to attach/detach IP to/from an ethernet interface
- */
-#define	SIOCPROTOATTACH	_IOWR('i', 80, struct ifreq)	/* attach proto to interface */
-#define	SIOCPROTODETACH	_IOWR('i', 81, struct ifreq)	/* detach proto from interface */
 #endif /* PRIVATE */
 
 #define	SIOCGIFASYNCMAP _IOWR('i', 124, struct ifreq)	/* get ppp asyncmap */
diff --git a/bsd/sys/spawn.h b/bsd/sys/spawn.h
index f54fcc396..4947902dd 100644
--- a/bsd/sys/spawn.h
+++ b/bsd/sys/spawn.h
@@ -58,6 +58,15 @@
  */
 #define	POSIX_SPAWN_SETEXEC		0x0040
 #define	POSIX_SPAWN_START_SUSPENDED	0x0080
+#ifdef	PRIVATE
+#define	_POSIX_SPAWN_DISABLE_ASLR	0x0100
+#define	_POSIX_SPAWN_ALLOW_DATA_EXEC	0x2000
+#define	POSIX_SPAWN_OSX_TALAPP_START	0x0400
+#define	POSIX_SPAWN_OSX_WIDGET_START	0x0800
+#define	POSIX_SPAWN_OSX_DBCLIENT_START	0x0800		/* not a bug, same as widget just rename */
+#define	POSIX_SPAWN_IOS_APP_START	0x1000
+#endif	/* PRIVATE */
+#define	POSIX_SPAWN_CLOEXEC_DEFAULT	0x4000
 
 /*
  * Possible values to be set for the process control actions on resource starvation.
diff --git a/bsd/sys/spawn_internal.h b/bsd/sys/spawn_internal.h
index 0e8943947..d29526095 100644
--- a/bsd/sys/spawn_internal.h
+++ b/bsd/sys/spawn_internal.h
@@ -30,7 +30,7 @@
 /*
  * [SPN] Support for _POSIX_SPAWN
  *
- * This file contains intern datastructures which are externally represented
+ * This file contains internal data structures which are externally represented
  * as opaque void pointers to prevent introspection.  This permits us to
  * change the underlying implementation of the code to maintain it or to
  * support new features, as needed, without the consumer needing to recompile
@@ -110,7 +110,8 @@ typedef struct _posix_spawnattr {
 typedef enum {
 	PSFA_OPEN = 0,
 	PSFA_CLOSE = 1,
-	PSFA_DUP2 = 2
+	PSFA_DUP2 = 2,
+	PSFA_INHERIT = 3
 } psfa_t;
 
 
diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h
index bcc8b79b4..d5daf6120 100644
--- a/bsd/sys/stat.h
+++ b/bsd/sys/stat.h
@@ -443,7 +443,7 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp);
 #define	S_IFLNK		0120000		/* [XSI] symbolic link */
 #define	S_IFSOCK	0140000		/* [XSI] socket */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define	S_IFWHT		0160000		/* whiteout */
+#define	S_IFWHT		0160000		/* OBSOLETE: whiteout */
 #endif
 
 /* File mode */
@@ -489,7 +489,7 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp);
 #define	S_ISLNK(m)	(((m) & S_IFMT) == S_IFLNK)	/* symbolic link */
 #define	S_ISSOCK(m)	(((m) & S_IFMT) == S_IFSOCK)	/* socket */
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define	S_ISWHT(m)	(((m) & S_IFMT) == S_IFWHT)	/* whiteout */
+#define	S_ISWHT(m)	(((m) & S_IFMT) == S_IFWHT)	/* OBSOLETE: whiteout */
 #endif
 
 /*
@@ -553,7 +553,8 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp);
  */
 /* #define UF_NOUNLINK	0x00000010 */	/* file may not be removed or renamed */
 #define UF_COMPRESSED	0x00000020	/* file is hfs-compressed */
-/* Bits 0x0040 through 0x4000 are currently undefined. */
+#define UF_TRACKED		0x00000040	/* file renames and deletes are tracked */
+/* Bits 0x0080 through 0x4000 are currently undefined. */
 #define UF_HIDDEN	0x00008000	/* hint that this item should not be */
 					/* displayed in a GUI */
 /*
@@ -607,13 +608,13 @@ int	chmodx_np(const char *, filesec_t);
 int	fchflags(int, __uint32_t);
 int	fchmodx_np(int, filesec_t);
 int	fstatx_np(int, struct stat *, filesec_t) __DARWIN_INODE64(fstatx_np);
-int	lchflags(const char *, __uint32_t);
-int	lchmod(const char *, mode_t);
+int	lchflags(const char *, __uint32_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
+int	lchmod(const char *, mode_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 int	lstatx_np(const char *, struct stat *, filesec_t) __DARWIN_INODE64(lstatx_np);
 int	mkdirx_np(const char *, filesec_t);
 int	mkfifox_np(const char *, filesec_t);
 int	statx_np(const char *, struct stat *, filesec_t) __DARWIN_INODE64(statx_np);
-int	umaskx_np(filesec_t);
+int	umaskx_np(filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
 
 #if !__DARWIN_ONLY_64_BIT_INO_T
 /* The following deprecated routines are simillar to stat and friends except provide struct stat64 instead of struct stat  */
diff --git a/bsd/sys/sys_domain.h b/bsd/sys/sys_domain.h
index 013959c8a..981d9f107 100644
--- a/bsd/sys/sys_domain.h
+++ b/bsd/sys/sys_domain.h
@@ -96,8 +96,9 @@ struct ctl_cb {
 	lck_mtx_t				*mtx;
 	struct socket			*so;					/* controlling socket */
 	struct kctl				*kctl;					/* back pointer to controller */
-	u_int32_t				unit;
 	void					*userdata;
+	u_int32_t				unit;
+	u_int32_t				usecount;
 };
 
 
diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h
index 083432071..1da032f48 100644
--- a/bsd/sys/sysctl.h
+++ b/bsd/sys/sysctl.h
@@ -110,12 +110,34 @@
  * type given below. Each sysctl level defines a set of name/type
  * pairs to be used by sysctl(1) in manipulating the subsystem.
  *
- * When declaring new sysctl names, please use the CTLFLAG_LOCKED
- * flag in the type to indicate that all necessary locking will
- * be handled within the sysctl. Any sysctl defined without
- * CTLFLAG_LOCKED is considered legacy and will be protected by
- * both the kernel funnel and the sysctl memlock. This is not
- * optimal, so it is best to handle locking yourself.
+ * When declaring new sysctl names, unless your sysctl is callable
+ * from the paging path, please use the CTLFLAG_LOCKED flag in the
+ * type to indicate that all necessary locking will be handled
+ * within the sysctl.
+ *
+ * Any sysctl defined without CTLFLAG_LOCKED is considered legacy
+ * and will be protected by both wiring the user process pages and,
+ * if it is a 32 bit legacy KEXT, by the obsolete kernel funnel.
+ *
+ * Note:	This is not optimal, so it is best to handle locking
+ *		yourself, if you are able to do so.  A simple design
+ *		pattern for use to avoid in a single function known
+ *		to potentially be in the paging path ot doing a DMA
+ *		to physical memory in a user space process is:
+ *
+ *			lock
+ *			perform operation vs. local buffer
+ *			unlock
+ *			SYSCTL_OUT(rey, local buffer, length)
+ *
+ *		...this assumes you are not using a deep call graph
+ *		or are unable to pass a local buffer address as a
+ *		parameter into your deep call graph.
+ *
+ *		Note that very large user buffers can fail the wire
+ *		if to do so would require more physical pages than
+ *		are available (the caller will get an ENOMEM error,
+ *		see sysctl_mem_hold() for details).
  */
 struct ctlname {
 	char	*ctl_name;	/* subsystem name */
@@ -139,7 +161,8 @@ struct ctlname {
 #define CTLFLAG_MASKED	0x04000000	/* deprecated variable, do not display */
 #define CTLFLAG_NOAUTO	0x02000000	/* do not auto-register */
 #define CTLFLAG_KERN	0x01000000	/* valid inside the kernel */
-#define CTLFLAG_LOCKED	0x00800000	/* node will handle locking itself (highly encouraged) */
+#define CTLFLAG_LOCKED	0x00800000	/* node will handle locking itself */
+#define CTLFLAG_OID2	0x00400000	/* struct sysctl_oid has version info */
 
 /*
  * USE THIS instead of a hardwired number from the categories below
@@ -161,33 +184,6 @@ struct ctlname {
 #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, void *arg1, int arg2, \
 	struct sysctl_req *req)
 
-/*
- * Locking and stats
- */
-struct sysctl_lock {
-	int	sl_lock;
-	int	sl_want;
-	int	sl_locked;
-};
-
-#define MEMLOCK_LOCK() \
-	do { \
-		while (memlock.sl_lock) { \
-			memlock.sl_want = 1; \
-			(void) tsleep((caddr_t)&memlock, PRIBIO+1, "sysctl", 0); \
-			memlock.sl_locked++; \
-		} \
-		memlock.sl_lock = 1; \
-	} while(0)
-
-#define MEMLOCK_UNLOCK() \
-	do { \
-		memlock.sl_lock = 0; \
-		if (memlock.sl_want) { \
-			memlock.sl_want = 0; \
-			wakeup((caddr_t)&memlock); \
-		} \
-	}while(0)
 
 /*
  * This describes the access space for a sysctl request.  This is needed
@@ -195,22 +191,55 @@ struct sysctl_lock {
  */
 struct sysctl_req {
 	struct proc	*p;
-	int         lock;
-	user_addr_t oldptr;
-	size_t		oldlen;
-	size_t		oldidx;
-	int		    (*oldfunc)(struct sysctl_req *, const void *, size_t);
-	user_addr_t newptr;
-	size_t		newlen;
-	size_t		newidx;
-	int		    (*newfunc)(struct sysctl_req *, void *, size_t);
+	int		lock;
+	user_addr_t	oldptr;		/* pointer to user supplied buffer */
+	size_t		oldlen;		/* user buffer length (also returned) */
+	size_t		oldidx;		/* total data iteratively copied out */
+	int		(*oldfunc)(struct sysctl_req *, const void *, size_t);
+	user_addr_t	newptr;		/* buffer containing new value */
+	size_t		newlen;		/* length of new value */
+	size_t		newidx;		/* total data iteratively copied in */
+	int		(*newfunc)(struct sysctl_req *, void *, size_t);
 };
 
 SLIST_HEAD(sysctl_oid_list, sysctl_oid);
 
+#define SYSCTL_OID_VERSION	1	/* current OID structure version */
+
 /*
  * This describes one "oid" in the MIB tree.  Potentially more nodes can
  * be hidden behind it, expanded by the handler.
+ *
+ * NOTES:	We implement binary comparibility between CTLFLAG_OID2 and
+ *		pre-CTLFLAG_OID2 structure in sysctl_register_oid() and in
+ *		sysctl_unregister_oid() using the fact that the fields up
+ *		to oid_fmt are unchanged, and that the field immediately
+ *		following is on an alignment boundary following a pointer
+ *		type and is also a pointer.  This lets us get the previous
+ *		size of the structure, and the copy-cut-off point, using
+ *		the offsetof() language primitive, and these values  are
+ *		used in conjunction with the fact that earlier and future
+ *		statically compiled sysctl_oid structures are declared via
+ *		macros.  This lets us overload the macros so that the addition
+ *		of the CTLFLAG_OID2 in newly compiled code containing sysctl
+ *		node declarations, subsequently allowing us to to avoid
+ *		changing the KPI used for non-static (un)registration in
+ *		KEXTs.
+ *
+ *		This depends on the fact that people declare SYSCTLs,
+ *		rather than declaring sysctl_oid structures.  All new code
+ *		should avoid declaring struct sysctl_oid's directly without
+ *		the macros; the current risk for this is limited to losing
+ *		your description field and ending up with a malloc'ed copy,
+ *		as if it were a legacy binary static declaration via SYSCTL;
+ *		in the future, we may deprecate access to a named structure
+ *		type in third party code.  Use the macros, or our code will
+ *		end up with compile errors when that happens.
+ *
+ *		Please try to include a long description of the field in any
+ *		new sysctl declarations (all the macros support this).  This
+ *		field may be the only human readable documentation your users
+ *		get for your sysctl.
  */
 struct sysctl_oid {
 	struct sysctl_oid_list *oid_parent;
@@ -222,6 +251,9 @@ struct sysctl_oid {
 	const char	*oid_name;
 	int 		(*oid_handler) SYSCTL_HANDLER_ARGS;
 	const char	*oid_fmt;
+	const char	*oid_descr; /* offsetof() field / long description */
+	int		oid_version;
+	int		oid_refcnt;
 };
 
 #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l)
@@ -267,7 +299,7 @@ __END_DECLS
 #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \
 	struct sysctl_oid sysctl_##parent##_##name = {			 \
 		&sysctl_##parent##_children, { 0 },			 \
-		nbr, kind, a1, a2, #name, handler, fmt };		 \
+		nbr, kind|CTLFLAG_OID2, a1, a2, #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 }; \
 	SYSCTL_LINKER_SET_ENTRY(__sysctl_set, sysctl_##parent##_##name)
 
 /* This constructs a node from which other oids can hang. */
@@ -510,6 +542,9 @@ SYSCTL_DECL(_user);
 #define KERN_KDPIDEX            14
 #define KERN_KDSETRTCDEC        15
 #define KERN_KDGETENTROPY       16
+#define KERN_KDWRITETR		17
+#define KERN_KDWRITEMAP		18
+
 
 /* KERN_PANICINFO types */
 #define	KERN_PANICINFO_MAXSIZE	1	/* quad: panic UI image size limit */
diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h
index b83a67c44..df71d010f 100644
--- a/bsd/sys/sysent.h
+++ b/bsd/sys/sysent.h
@@ -31,9 +31,6 @@
 
 #include <sys/appleapiopts.h>
 #include <sys/cdefs.h>
-#ifdef __ppc__
-#include <sys/types.h>
-#endif
 
 #ifdef KERNEL_PRIVATE
 #ifdef __APPLE_API_PRIVATE
@@ -59,7 +56,7 @@ extern struct sysent sysent[];
 #endif	/* __INIT_SYSENT_C__ */
 
 extern int nsysent;
-#define NUM_SYSENT	434	/* Current number of defined syscalls */
+#define NUM_SYSENT	439	/* Current number of defined syscalls */
 
 /* sy_funnel flags bits */
 #define FUNNEL_MASK	0x07f
diff --git a/bsd/sys/syslog.h b/bsd/sys/syslog.h
index e85a4a817..71004cf2a 100644
--- a/bsd/sys/syslog.h
+++ b/bsd/sys/syslog.h
@@ -26,7 +26,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
+/*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -38,10 +38,6 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
@@ -59,9 +55,10 @@
  * SUCH DAMAGE.
  *
  *	@(#)syslog.h	8.1 (Berkeley) 6/2/93
+ * $FreeBSD: src/sys/sys/syslog.h,v 1.27.2.1.4.1 2010/06/14 02:09:06 kensmith Exp $
  */
 
-#ifndef	_SYS_SYSLOG_H_
+#ifndef _SYS_SYSLOG_H_
 #define _SYS_SYSLOG_H_
 
 #include <sys/appleapiopts.h>
@@ -88,54 +85,61 @@
 #define	LOG_DEBUG	7	/* debug-level messages */
 
 #define	LOG_PRIMASK	0x07	/* mask to extract priority part (internal) */
-				/* extract priority */
+/* extract priority */
 #define	LOG_PRI(p)	((p) & LOG_PRIMASK)
-#define	LOG_MAKEPRI(fac, pri)	(((fac) << 3) | (pri))
+#define	LOG_MAKEPRI(fac, pri)	((fac) | (pri))
 
 #ifdef SYSLOG_NAMES
 #define	INTERNAL_NOPRI	0x10	/* the "no priority" priority */
-				/* mark "facility" */
-#define	INTERNAL_MARK	LOG_MAKEPRI(LOG_NFACILITIES, 0)
+/* mark "facility" */
+#define	INTERNAL_MARK	LOG_MAKEPRI((LOG_NFACILITIES<<3), 0)
 typedef struct _code {
-	char	*c_name;
-	int	c_val;
+	const char	*c_name;
+	int		c_val;
 } CODE;
 
 CODE prioritynames[] = {
-	{ "alert",	LOG_ALERT },
-	{ "crit",	LOG_CRIT },
-	{ "debug",	LOG_DEBUG },
-	{ "emerg",	LOG_EMERG },
-	{ "err",	LOG_ERR },
-	{ "error",	LOG_ERR },		/* DEPRECATED */
-	{ "info",	LOG_INFO },
-	{ "none",	INTERNAL_NOPRI },	/* INTERNAL */
-	{ "notice",	LOG_NOTICE },
-	{ "panic", 	LOG_EMERG },		/* DEPRECATED */
-	{ "warn",	LOG_WARNING },		/* DEPRECATED */
-	{ "warning",	LOG_WARNING },
-	{ 0,		-1 }
+	{ "alert",	LOG_ALERT,	},
+	{ "crit",	LOG_CRIT,	},
+	{ "debug",	LOG_DEBUG,	},
+	{ "emerg",	LOG_EMERG,	},
+	{ "err",	LOG_ERR,	},
+	{ "error",	LOG_ERR,	},	/* DEPRECATED */
+	{ "info",	LOG_INFO,	},
+	{ "none",	INTERNAL_NOPRI,	},	/* INTERNAL */
+	{ "notice",	LOG_NOTICE,	},
+	{ "panic", 	LOG_EMERG,	},	/* DEPRECATED */
+	{ "warn",	LOG_WARNING,	},	/* DEPRECATED */
+	{ "warning",	LOG_WARNING,	},
+	{ NULL,		-1,		}
 };
 #endif
 
 /* facility codes */
-#define	LOG_KERN		(0<<3)	/* kernel messages */
-#define	LOG_USER		(1<<3)	/* random user-level messages */
-#define	LOG_MAIL		(2<<3)	/* mail system */
-#define	LOG_DAEMON		(3<<3)	/* system daemons */
-#define	LOG_AUTH		(4<<3)	/* security/authorization messages */
-#define	LOG_SYSLOG		(5<<3)	/* messages generated internally by syslogd */
-#define	LOG_LPR			(6<<3)	/* line printer subsystem */
-#define	LOG_NEWS		(7<<3)	/* network news subsystem */
-#define	LOG_UUCP		(8<<3)	/* UUCP subsystem */
-#define	LOG_CRON		(9<<3)	/* clock daemon */
-#define	LOG_AUTHPRIV 	(10<<3)	/* security/authorization messages (private) */
-#define	LOG_FTP			(11<<3)	/* ftp daemon */
-#define	LOG_NETINFO		(12<<3)	/* NetInfo */
+#define	LOG_KERN	(0<<3)	/* kernel messages */
+#define	LOG_USER	(1<<3)	/* random user-level messages */
+#define	LOG_MAIL	(2<<3)	/* mail system */
+#define	LOG_DAEMON	(3<<3)	/* system daemons */
+#define	LOG_AUTH	(4<<3)	/* authorization messages */
+#define	LOG_SYSLOG	(5<<3)	/* messages generated internally by syslogd */
+#define	LOG_LPR		(6<<3)	/* line printer subsystem */
+#define	LOG_NEWS	(7<<3)	/* network news subsystem */
+#define	LOG_UUCP	(8<<3)	/* UUCP subsystem */
+#define	LOG_CRON	(9<<3)	/* clock daemon */
+#define	LOG_AUTHPRIV	(10<<3)	/* authorization messages (private) */
+/* Facility #10 clashes in DEC UNIX, where */
+/* it's defined as LOG_MEGASAFE for AdvFS  */
+/* event logging.                          */
+#define	LOG_FTP		(11<<3)	/* ftp daemon */
+//#define	LOG_NTP		(12<<3)	/* NTP subsystem */
+//#define	LOG_SECURITY	(13<<3) /* security subsystems (firewalling, etc.) */
+//#define	LOG_CONSOLE	(14<<3) /* /dev/console output */
+#define	LOG_NETINFO	(12<<3)	/* NetInfo */
 #define	LOG_REMOTEAUTH	(13<<3)	/* remote authentication/authorization */
-#define	LOG_INSTALL		(14<<3)	/* installer subsystem */
-#define	LOG_RAS			(15<<3)	/* Remote Access Service (VPN / PPP) */
+#define	LOG_INSTALL	(14<<3)	/* installer subsystem */
+#define	LOG_RAS		(15<<3)	/* Remote Access Service (VPN / PPP) */
 
+/* other codes through 15 reserved for system use */
 #define	LOG_LOCAL0	(16<<3)	/* reserved for local use */
 #define	LOG_LOCAL1	(17<<3)	/* reserved for local use */
 #define	LOG_LOCAL2	(18<<3)	/* reserved for local use */
@@ -145,43 +149,43 @@ CODE prioritynames[] = {
 #define	LOG_LOCAL6	(22<<3)	/* reserved for local use */
 #define	LOG_LOCAL7	(23<<3)	/* reserved for local use */
 
-#define	LOG_LAUNCHD		(24<<3)	/* launchd - general bootstrap daemon */
+#define	LOG_LAUNCHD	(24<<3)	/* launchd - general bootstrap daemon */
 
 #define	LOG_NFACILITIES	25	/* current number of facilities */
 #define	LOG_FACMASK	0x03f8	/* mask to extract facility part */
-				/* facility of pri */
+/* facility of pri */
 #define	LOG_FAC(p)	(((p) & LOG_FACMASK) >> 3)
 
 #ifdef SYSLOG_NAMES
 CODE facilitynames[] = {
-	{ "auth",	LOG_AUTH },
-	{ "authpriv",	LOG_AUTHPRIV },
-	{ "cron", 	LOG_CRON },
-	{ "daemon",	LOG_DAEMON },
-	{ "ftp",	LOG_FTP },
-	{ "install",	LOG_INSTALL },
-	{ "kern",	LOG_KERN },
-	{ "lpr",	LOG_LPR },
-	{ "mail",	LOG_MAIL },
-	{ "mark", 	INTERNAL_MARK },	/* INTERNAL */
-	{ "netinfo",	LOG_NETINFO },
-	{ "ras", 	LOG_RAS },
-	{ "remoteauth", LOG_REMOTEAUTH },
-	{ "news",	LOG_NEWS },
-	{ "security",	LOG_AUTH },		/* DEPRECATED */
-	{ "syslog",	LOG_SYSLOG },
-	{ "user",	LOG_USER },
-	{ "uucp",	LOG_UUCP },
-	{ "local0",	LOG_LOCAL0 },
-	{ "local1",	LOG_LOCAL1 },
-	{ "local2",	LOG_LOCAL2 },
-	{ "local3",	LOG_LOCAL3 },
-	{ "local4",	LOG_LOCAL4 },
-	{ "local5",	LOG_LOCAL5 },
-	{ "local6",	LOG_LOCAL6 },
-	{ "local7",	LOG_LOCAL7 },
-	{ "launchd", 	LOG_LAUNCHD },
-	{ 0,		-1 }
+	{ "auth",	LOG_AUTH,	},
+	{ "authpriv",	LOG_AUTHPRIV,	},
+	{ "cron", 	LOG_CRON,	},
+	{ "daemon",	LOG_DAEMON,	},
+	{ "ftp",	LOG_FTP,	},
+	{ "install",	LOG_INSTALL	},
+	{ "kern",	LOG_KERN,	},
+	{ "lpr",	LOG_LPR,	},
+	{ "mail",	LOG_MAIL,	},
+	{ "mark", 	INTERNAL_MARK,	},	/* INTERNAL */
+	{ "netinfo",	LOG_NETINFO,	},
+	{ "ras", 	LOG_RAS		},
+	{ "remoteauth", LOG_REMOTEAUTH	},
+	{ "news",	LOG_NEWS,	},
+	{ "security",	LOG_AUTH	},	/* DEPRECATED */
+	{ "syslog",	LOG_SYSLOG,	},
+	{ "user",	LOG_USER,	},
+	{ "uucp",	LOG_UUCP,	},
+	{ "local0",	LOG_LOCAL0,	},
+	{ "local1",	LOG_LOCAL1,	},
+	{ "local2",	LOG_LOCAL2,	},
+	{ "local3",	LOG_LOCAL3,	},
+	{ "local4",	LOG_LOCAL4,	},
+	{ "local5",	LOG_LOCAL5,	},
+	{ "local6",	LOG_LOCAL6,	},
+	{ "local7",	LOG_LOCAL7,	},
+	{ "launchd", 	LOG_LAUNCHD	},
+	{ NULL,		-1,		}
 };
 #endif
 
@@ -211,18 +215,24 @@ CODE facilitynames[] = {
 #define	LOG_PERROR	0x20	/* log to stderr as well */
 
 #ifndef KERNEL
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#include <sys/_types.h>		/* for __darwin_va_list */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+
+/*
+ * Don't use va_list in the vsyslog() prototype.   Va_list is typedef'd in two
+ * places (<machine/varargs.h> and <machine/stdarg.h>), so if we include one
+ * of them here we may collide with the utility's includes.  It's unreasonable
+ * for utilities to have to include one of them to include syslog.h, so we get
+ * __va_list from <sys/_types.h> and use it.
+ */
+#include <sys/_types.h>
 
 __BEGIN_DECLS
 void	closelog(void);
 void	openlog(const char *, int, int);
 int	setlogmask(int);
-void	syslog(int, const char *, ...) __DARWIN_LDBL_COMPAT(syslog);
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-void	vsyslog(int, const char *, __darwin_va_list) __DARWIN_LDBL_COMPAT(vsyslog);
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
+void	syslog(int, const char *, ...) __printflike(2, 3) __DARWIN_LDBL_COMPAT(syslog);
+#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL
+void	vsyslog(int, const char *, __darwin_va_list) __printflike(2, 0) __DARWIN_LDBL_COMPAT(vsyslog);
+#endif
 __END_DECLS
 
 #else /* !KERNEL */
diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h
index d5fdbe392..f08bc477c 100644
--- a/bsd/sys/systm.h
+++ b/bsd/sys/systm.h
@@ -223,10 +223,17 @@ void	bsd_untimeout(void (*)(void *), void *arg);
 void	set_fsblocksize(struct vnode *);
 uint64_t tvtoabstime(struct timeval *);
 void	*throttle_info_create(void);
-void	throttle_info_mount_ref(mount_t mp, void * throttle_info);	
-void	throttle_info_mount_rel(mount_t mp);	
+void	throttle_info_mount_ref(mount_t mp, void * throttle_info);
+void	throttle_info_mount_rel(mount_t mp);
 void	throttle_info_release(void *throttle_info);
 void	throttle_info_update(void *throttle_info, int flags);
+uint32_t throttle_lowpri_io(int sleep_amount);
+void	throttle_set_thread_io_policy(int policy);
+typedef struct __throttle_info_handle *throttle_info_handle_t;
+int		throttle_info_ref_by_mask(
+	uint64_t throttle_mask, throttle_info_handle_t *throttle_info_handle);
+void	throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle);
+void	throttle_info_update_by_mask(void *throttle_info_handle, int flags);
 __END_DECLS
 
 #endif /* !_SYS_SYSTM_H_ */
diff --git a/bsd/sys/time.h b/bsd/sys/time.h
index 732d1ae76..a32ed62d6 100644
--- a/bsd/sys/time.h
+++ b/bsd/sys/time.h
@@ -68,6 +68,8 @@
 #include <sys/_types.h>
 #ifdef KERNEL
 #include <machine/types.h>	/* user_time_t */
+#else /* !KERNEL */
+#include <Availability.h>
 #endif /* KERNEL */
 
 /*
@@ -240,7 +242,7 @@ __BEGIN_DECLS
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 int	adjtime(const struct timeval *, struct timeval *);
 int	futimes(int, const struct timeval *);
-int	lutimes(const char *, const struct timeval *);
+int	lutimes(const char *, const struct timeval *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
 int	settimeofday(const struct timeval *, const struct timezone *);
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
diff --git a/bsd/sys/tree.h b/bsd/sys/tree.h
index f4bf40c73..42427ca31 100644
--- a/bsd/sys/tree.h
+++ b/bsd/sys/tree.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -26,693 +26,4 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-/*	$NetBSD: tree.h,v 1.13 2006/08/27 22:32:38 christos Exp $	*/
-/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
-/*
- * Copyright 2002 Niels Provos <provos@citi.umich.edu>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef	_SYS_TREE_H_
-#define	_SYS_TREE_H_
-
-/*
- * This file defines data structures for different types of trees:
- * splay trees and red-black trees.
- *
- * A splay tree is a self-organizing data structure.  Every operation
- * on the tree causes a splay to happen.  The splay moves the requested
- * node to the root of the tree and partly rebalances it.
- *
- * This has the benefit that request locality causes faster lookups as
- * the requested nodes move to the top of the tree.  On the other hand,
- * every lookup causes memory writes.
- *
- * The Balance Theorem bounds the total access time for m operations
- * and n inserts on an initially empty tree as O((m + n)lg n).  The
- * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
- *
- * A red-black tree is a binary search tree with the node color as an
- * extra attribute.  It fulfills a set of conditions:
- *	- every search path from the root to a leaf consists of the
- *	  same number of black nodes,
- *	- each red node (except for the root) has a black parent,
- *	- each leaf node is black.
- *
- * Every operation on a red-black tree is bounded as O(lg n).
- * The maximum height of a red-black tree is 2lg (n+1).
- */
-
-#define SPLAY_HEAD(name, type)						\
-struct name {								\
-	struct type *sph_root; /* root of the tree */			\
-}
-
-#define SPLAY_INITIALIZER(root)						\
-	{ NULL }
-
-#define SPLAY_INIT(root) do {						\
-	(root)->sph_root = NULL;					\
-} while (/*CONSTCOND*/ 0)
-
-#define SPLAY_ENTRY(type)						\
-struct {								\
-	struct type *spe_left; /* left element */			\
-	struct type *spe_right; /* right element */			\
-}
-
-#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
-#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
-#define SPLAY_ROOT(head)		(head)->sph_root
-#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
-
-/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
-#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
-	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
-	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
-	(head)->sph_root = tmp;						\
-} while (/*CONSTCOND*/ 0)
-
-#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
-	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
-	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
-	(head)->sph_root = tmp;						\
-} while (/*CONSTCOND*/ 0)
-
-#define SPLAY_LINKLEFT(head, tmp, field) do {				\
-	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
-	tmp = (head)->sph_root;						\
-	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
-} while (/*CONSTCOND*/ 0)
-
-#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
-	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
-	tmp = (head)->sph_root;						\
-	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
-} while (/*CONSTCOND*/ 0)
-
-#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
-	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
-	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
-	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
-	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
-} while (/*CONSTCOND*/ 0)
-
-/* Generates prototypes and inline functions */
-
-#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
-void name##_SPLAY(struct name *, struct type *);			\
-void name##_SPLAY_MINMAX(struct name *, int);				\
-struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
-struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
-									\
-/* Finds the node with the same key as elm */				\
-static __inline struct type *						\
-name##_SPLAY_FIND(struct name *head, struct type *elm)			\
-{									\
-	if (SPLAY_EMPTY(head))						\
-		return(NULL);						\
-	name##_SPLAY(head, elm);					\
-	if ((cmp)(elm, (head)->sph_root) == 0)				\
-		return (head->sph_root);				\
-	return (NULL);							\
-}									\
-									\
-static __inline struct type *						\
-name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
-{									\
-	name##_SPLAY(head, elm);					\
-	if (SPLAY_RIGHT(elm, field) != NULL) {				\
-		elm = SPLAY_RIGHT(elm, field);				\
-		while (SPLAY_LEFT(elm, field) != NULL) {		\
-			elm = SPLAY_LEFT(elm, field);			\
-		}							\
-	} else								\
-		elm = NULL;						\
-	return (elm);							\
-}									\
-									\
-static __inline struct type *						\
-name##_SPLAY_MIN_MAX(struct name *head, int val)			\
-{									\
-	name##_SPLAY_MINMAX(head, val);					\
-        return (SPLAY_ROOT(head));					\
-}
-
-/* Main splay operation.
- * Moves node close to the key of elm to top
- */
-#define SPLAY_GENERATE(name, type, field, cmp)				\
-struct type *								\
-name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
-{									\
-    if (SPLAY_EMPTY(head)) {						\
-	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
-    } else {								\
-	    int __comp;							\
-	    name##_SPLAY(head, elm);					\
-	    __comp = (cmp)(elm, (head)->sph_root);			\
-	    if(__comp < 0) {						\
-		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
-		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
-		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
-	    } else if (__comp > 0) {					\
-		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
-		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
-		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
-	    } else							\
-		    return ((head)->sph_root);				\
-    }									\
-    (head)->sph_root = (elm);						\
-    return (NULL);							\
-}									\
-									\
-struct type *								\
-name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
-{									\
-	struct type *__tmp;						\
-	if (SPLAY_EMPTY(head))						\
-		return (NULL);						\
-	name##_SPLAY(head, elm);					\
-	if ((cmp)(elm, (head)->sph_root) == 0) {			\
-		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
-			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
-		} else {						\
-			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
-			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
-			name##_SPLAY(head, elm);			\
-			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
-		}							\
-		return (elm);						\
-	}								\
-	return (NULL);							\
-}									\
-									\
-void									\
-name##_SPLAY(struct name *head, struct type *elm)			\
-{									\
-	struct type __node, *__left, *__right, *__tmp;			\
-	int __comp;							\
-\
-	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
-	__left = __right = &__node;					\
-\
-	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
-		if (__comp < 0) {					\
-			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
-			if (__tmp == NULL)				\
-				break;					\
-			if ((cmp)(elm, __tmp) < 0){			\
-				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
-				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
-					break;				\
-			}						\
-			SPLAY_LINKLEFT(head, __right, field);		\
-		} else if (__comp > 0) {				\
-			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
-			if (__tmp == NULL)				\
-				break;					\
-			if ((cmp)(elm, __tmp) > 0){			\
-				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
-				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
-					break;				\
-			}						\
-			SPLAY_LINKRIGHT(head, __left, field);		\
-		}							\
-	}								\
-	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
-}									\
-									\
-/* Splay with either the minimum or the maximum element			\
- * Used to find minimum or maximum element in tree.			\
- */									\
-void name##_SPLAY_MINMAX(struct name *head, int __comp) \
-{									\
-	struct type __node, *__left, *__right, *__tmp;			\
-\
-	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
-	__left = __right = &__node;					\
-\
-	while (1) {							\
-		if (__comp < 0) {					\
-			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
-			if (__tmp == NULL)				\
-				break;					\
-			if (__comp < 0){				\
-				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
-				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
-					break;				\
-			}						\
-			SPLAY_LINKLEFT(head, __right, field);		\
-		} else if (__comp > 0) {				\
-			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
-			if (__tmp == NULL)				\
-				break;					\
-			if (__comp > 0) {				\
-				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
-				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
-					break;				\
-			}						\
-			SPLAY_LINKRIGHT(head, __left, field);		\
-		}							\
-	}								\
-	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
-}
-
-#define SPLAY_NEGINF	-1
-#define SPLAY_INF	1
-
-#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
-#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
-#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
-#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
-#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
-					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
-#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
-					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
-
-#define SPLAY_FOREACH(x, name, head)					\
-	for ((x) = SPLAY_MIN(name, head);				\
-	     (x) != NULL;						\
-	     (x) = SPLAY_NEXT(name, head, x))
-
-/* Macros that define a red-black tree */
-#define RB_HEAD(name, type)						\
-struct name {								\
-	struct type *rbh_root; /* root of the tree */			\
-}
-
-#define RB_INITIALIZER(root)						\
-	{ NULL }
-
-#define RB_INIT(root) do {						\
-	(root)->rbh_root = NULL;					\
-} while (/*CONSTCOND*/ 0)
-
-#define RB_BLACK	0
-#define RB_RED		1
-#define RB_ENTRY(type)							\
-struct {								\
-	struct type *rbe_left;		/* left element */		\
-	struct type *rbe_right;		/* right element */		\
-	struct type *rbe_parent;	/* parent element */		\
-	int rbe_color;			/* node color */		\
-}
-
-#define RB_LEFT(elm, field)		(elm)->field.rbe_left
-#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
-#define RB_PARENT(elm, field)		(elm)->field.rbe_parent
-#define RB_COLOR(elm, field)		(elm)->field.rbe_color
-#define RB_ROOT(head)			(head)->rbh_root
-#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
-
-#define RB_SET(elm, parent, field) do {					\
-	RB_PARENT(elm, field) = parent;					\
-	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
-	RB_COLOR(elm, field) = RB_RED;					\
-} while (/*CONSTCOND*/ 0)
-
-#define RB_SET_BLACKRED(black, red, field) do {				\
-	RB_COLOR(black, field) = RB_BLACK;				\
-	RB_COLOR(red, field) = RB_RED;					\
-} while (/*CONSTCOND*/ 0)
-
-#ifndef RB_AUGMENT
-#define RB_AUGMENT(x) (void)(x)
-#endif
-
-#define RB_ROTATE_LEFT(head, elm, tmp, field) do {			\
-	(tmp) = RB_RIGHT(elm, field);					\
-	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {	\
-		RB_PARENT(RB_LEFT(tmp, field), field) = (elm);		\
-	}								\
-	RB_AUGMENT(elm);						\
-	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
-		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
-			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
-		else							\
-			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
-	} else								\
-		(head)->rbh_root = (tmp);				\
-	RB_LEFT(tmp, field) = (elm);					\
-	RB_PARENT(elm, field) = (tmp);					\
-	RB_AUGMENT(tmp);						\
-	if ((RB_PARENT(tmp, field)))					\
-		RB_AUGMENT(RB_PARENT(tmp, field));			\
-} while (/*CONSTCOND*/ 0)
-
-#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {			\
-	(tmp) = RB_LEFT(elm, field);					\
-	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {	\
-		RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);		\
-	}								\
-	RB_AUGMENT(elm);						\
-	if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) {	\
-		if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))	\
-			RB_LEFT(RB_PARENT(elm, field), field) = (tmp);	\
-		else							\
-			RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);	\
-	} else								\
-		(head)->rbh_root = (tmp);				\
-	RB_RIGHT(tmp, field) = (elm);					\
-	RB_PARENT(elm, field) = (tmp);					\
-	RB_AUGMENT(tmp);						\
-	if ((RB_PARENT(tmp, field)))					\
-		RB_AUGMENT(RB_PARENT(tmp, field));			\
-} while (/*CONSTCOND*/ 0)
-
-/* Generates prototypes and inline functions */
-#define RB_PROTOTYPE(name, type, field, cmp)				\
-void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
-void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
-struct type *name##_RB_REMOVE(struct name *, struct type *);		\
-struct type *name##_RB_INSERT(struct name *, struct type *);		\
-struct type *name##_RB_FIND(struct name *, struct type *);		\
-struct type *name##_RB_NEXT(struct type *);				\
-struct type *name##_RB_MINMAX(struct name *, int);
-
-/* Generates prototypes (with storage class) and inline functions */
-#define RB_PROTOTYPE_SC(_sc_, name, type, field, cmp)			\
-_sc_ void name##_RB_INSERT_COLOR(struct name *, struct type *);		\
-_sc_ void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \
-_sc_ struct type *name##_RB_REMOVE(struct name *, struct type *);	\
-_sc_ struct type *name##_RB_INSERT(struct name *, struct type *);	\
-_sc_ struct type *name##_RB_FIND(struct name *, struct type *);		\
-_sc_ struct type *name##_RB_NEXT(struct type *);			\
-_sc_ struct type *name##_RB_MINMAX(struct name *, int);
-
-/* Main rb operation.
- * Moves node close to the key of elm to top
- */
-#define RB_GENERATE(name, type, field, cmp)				\
-void									\
-name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
-{									\
-	struct type *parent, *gparent, *tmp;				\
-	while ((parent = RB_PARENT(elm, field)) != NULL &&		\
-	    RB_COLOR(parent, field) == RB_RED) {			\
-		gparent = RB_PARENT(parent, field);			\
-		if (parent == RB_LEFT(gparent, field)) {		\
-			tmp = RB_RIGHT(gparent, field);			\
-			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
-				RB_COLOR(tmp, field) = RB_BLACK;	\
-				RB_SET_BLACKRED(parent, gparent, field);\
-				elm = gparent;				\
-				continue;				\
-			}						\
-			if (RB_RIGHT(parent, field) == elm) {		\
-				RB_ROTATE_LEFT(head, parent, tmp, field);\
-				tmp = parent;				\
-				parent = elm;				\
-				elm = tmp;				\
-			}						\
-			RB_SET_BLACKRED(parent, gparent, field);	\
-			RB_ROTATE_RIGHT(head, gparent, tmp, field);	\
-		} else {						\
-			tmp = RB_LEFT(gparent, field);			\
-			if (tmp && RB_COLOR(tmp, field) == RB_RED) {	\
-				RB_COLOR(tmp, field) = RB_BLACK;	\
-				RB_SET_BLACKRED(parent, gparent, field);\
-				elm = gparent;				\
-				continue;				\
-			}						\
-			if (RB_LEFT(parent, field) == elm) {		\
-				RB_ROTATE_RIGHT(head, parent, tmp, field);\
-				tmp = parent;				\
-				parent = elm;				\
-				elm = tmp;				\
-			}						\
-			RB_SET_BLACKRED(parent, gparent, field);	\
-			RB_ROTATE_LEFT(head, gparent, tmp, field);	\
-		}							\
-	}								\
-	RB_COLOR(head->rbh_root, field) = RB_BLACK;			\
-}									\
-									\
-void									\
-name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
-{									\
-	struct type *tmp;						\
-	while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&	\
-	    elm != RB_ROOT(head)) {					\
-		if (RB_LEFT(parent, field) == elm) {			\
-			tmp = RB_RIGHT(parent, field);			\
-			if (RB_COLOR(tmp, field) == RB_RED) {		\
-				RB_SET_BLACKRED(tmp, parent, field);	\
-				RB_ROTATE_LEFT(head, parent, tmp, field);\
-				tmp = RB_RIGHT(parent, field);		\
-			}						\
-			if ((RB_LEFT(tmp, field) == NULL ||		\
-			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
-			    (RB_RIGHT(tmp, field) == NULL ||		\
-			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
-				RB_COLOR(tmp, field) = RB_RED;		\
-				elm = parent;				\
-				parent = RB_PARENT(elm, field);		\
-			} else {					\
-				if (RB_RIGHT(tmp, field) == NULL ||	\
-				    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
-					struct type *oleft;		\
-					if ((oleft = RB_LEFT(tmp, field)) \
-					    != NULL)			\
-						RB_COLOR(oleft, field) = RB_BLACK;\
-					RB_COLOR(tmp, field) = RB_RED;	\
-					RB_ROTATE_RIGHT(head, tmp, oleft, field);\
-					tmp = RB_RIGHT(parent, field);	\
-				}					\
-				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
-				RB_COLOR(parent, field) = RB_BLACK;	\
-				if (RB_RIGHT(tmp, field))		\
-					RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
-				RB_ROTATE_LEFT(head, parent, tmp, field);\
-				elm = RB_ROOT(head);			\
-				break;					\
-			}						\
-		} else {						\
-			tmp = RB_LEFT(parent, field);			\
-			if (RB_COLOR(tmp, field) == RB_RED) {		\
-				RB_SET_BLACKRED(tmp, parent, field);	\
-				RB_ROTATE_RIGHT(head, parent, tmp, field);\
-				tmp = RB_LEFT(parent, field);		\
-			}						\
-			if ((RB_LEFT(tmp, field) == NULL ||		\
-			    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
-			    (RB_RIGHT(tmp, field) == NULL ||		\
-			    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
-				RB_COLOR(tmp, field) = RB_RED;		\
-				elm = parent;				\
-				parent = RB_PARENT(elm, field);		\
-			} else {					\
-				if (RB_LEFT(tmp, field) == NULL ||	\
-				    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
-					struct type *oright;		\
-					if ((oright = RB_RIGHT(tmp, field)) \
-					    != NULL)			\
-						RB_COLOR(oright, field) = RB_BLACK;\
-					RB_COLOR(tmp, field) = RB_RED;	\
-					RB_ROTATE_LEFT(head, tmp, oright, field);\
-					tmp = RB_LEFT(parent, field);	\
-				}					\
-				RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
-				RB_COLOR(parent, field) = RB_BLACK;	\
-				if (RB_LEFT(tmp, field))		\
-					RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
-				RB_ROTATE_RIGHT(head, parent, tmp, field);\
-				elm = RB_ROOT(head);			\
-				break;					\
-			}						\
-		}							\
-	}								\
-	if (elm)							\
-		RB_COLOR(elm, field) = RB_BLACK;			\
-}									\
-									\
-struct type *								\
-name##_RB_REMOVE(struct name *head, struct type *elm)			\
-{									\
-	struct type *child, *parent, *old = elm;			\
-	int color;							\
-	if (RB_LEFT(elm, field) == NULL)				\
-		child = RB_RIGHT(elm, field);				\
-	else if (RB_RIGHT(elm, field) == NULL)				\
-		child = RB_LEFT(elm, field);				\
-	else {								\
-		struct type *left;					\
-		elm = RB_RIGHT(elm, field);				\
-		while ((left = RB_LEFT(elm, field)) != NULL)		\
-			elm = left;					\
-		child = RB_RIGHT(elm, field);				\
-		parent = RB_PARENT(elm, field);				\
-		color = RB_COLOR(elm, field);				\
-		if (child)						\
-			RB_PARENT(child, field) = parent;		\
-		if (parent) {						\
-			if (RB_LEFT(parent, field) == elm)		\
-				RB_LEFT(parent, field) = child;		\
-			else						\
-				RB_RIGHT(parent, field) = child;	\
-			RB_AUGMENT(parent);				\
-		} else							\
-			RB_ROOT(head) = child;				\
-		if (RB_PARENT(elm, field) == old)			\
-			parent = elm;					\
-		(elm)->field = (old)->field;				\
-		if (RB_PARENT(old, field)) {				\
-			if (RB_LEFT(RB_PARENT(old, field), field) == old)\
-				RB_LEFT(RB_PARENT(old, field), field) = elm;\
-			else						\
-				RB_RIGHT(RB_PARENT(old, field), field) = elm;\
-			RB_AUGMENT(RB_PARENT(old, field));		\
-		} else							\
-			RB_ROOT(head) = elm;				\
-		RB_PARENT(RB_LEFT(old, field), field) = elm;		\
-		if (RB_RIGHT(old, field))				\
-			RB_PARENT(RB_RIGHT(old, field), field) = elm;	\
-		if (parent) {						\
-			left = parent;					\
-			do {						\
-				RB_AUGMENT(left);			\
-			} while ((left = RB_PARENT(left, field)) != NULL); \
-		}							\
-		goto color;						\
-	}								\
-	parent = RB_PARENT(elm, field);					\
-	color = RB_COLOR(elm, field);					\
-	if (child)							\
-		RB_PARENT(child, field) = parent;			\
-	if (parent) {							\
-		if (RB_LEFT(parent, field) == elm)			\
-			RB_LEFT(parent, field) = child;			\
-		else							\
-			RB_RIGHT(parent, field) = child;		\
-		RB_AUGMENT(parent);					\
-	} else								\
-		RB_ROOT(head) = child;					\
-color:									\
-	if (color == RB_BLACK)						\
-		name##_RB_REMOVE_COLOR(head, parent, child);		\
-	return (old);							\
-}									\
-									\
-/* Inserts a node into the RB tree */					\
-struct type *								\
-name##_RB_INSERT(struct name *head, struct type *elm)			\
-{									\
-	struct type *tmp;						\
-	struct type *parent = NULL;					\
-	int comp = 0;							\
-	tmp = RB_ROOT(head);						\
-	while (tmp) {							\
-		parent = tmp;						\
-		comp = (cmp)(elm, parent);				\
-		if (comp < 0)						\
-			tmp = RB_LEFT(tmp, field);			\
-		else if (comp > 0)					\
-			tmp = RB_RIGHT(tmp, field);			\
-		else							\
-			return (tmp);					\
-	}								\
-	RB_SET(elm, parent, field);					\
-	if (parent != NULL) {						\
-		if (comp < 0)						\
-			RB_LEFT(parent, field) = elm;			\
-		else							\
-			RB_RIGHT(parent, field) = elm;			\
-		RB_AUGMENT(parent);					\
-	} else								\
-		RB_ROOT(head) = elm;					\
-	name##_RB_INSERT_COLOR(head, elm);				\
-	return (NULL);							\
-}									\
-									\
-/* Finds the node with the same key as elm */				\
-struct type *								\
-name##_RB_FIND(struct name *head, struct type *elm)			\
-{									\
-	struct type *tmp = RB_ROOT(head);				\
-	int comp;							\
-	while (tmp) {							\
-		comp = cmp(elm, tmp);					\
-		if (comp < 0)						\
-			tmp = RB_LEFT(tmp, field);			\
-		else if (comp > 0)					\
-			tmp = RB_RIGHT(tmp, field);			\
-		else							\
-			return (tmp);					\
-	}								\
-	return (NULL);							\
-}									\
-									\
-/* ARGSUSED */								\
-struct type *								\
-name##_RB_NEXT(struct type *elm)					\
-{									\
-	if (RB_RIGHT(elm, field)) {					\
-		elm = RB_RIGHT(elm, field);				\
-		while (RB_LEFT(elm, field))				\
-			elm = RB_LEFT(elm, field);			\
-	} else {							\
-		if (RB_PARENT(elm, field) &&				\
-		    (elm == RB_LEFT(RB_PARENT(elm, field), field)))	\
-			elm = RB_PARENT(elm, field);			\
-		else {							\
-			while (RB_PARENT(elm, field) &&			\
-			    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
-				elm = RB_PARENT(elm, field);		\
-			elm = RB_PARENT(elm, field);			\
-		}							\
-	}								\
-	return (elm);							\
-}									\
-									\
-struct type *								\
-name##_RB_MINMAX(struct name *head, int val)				\
-{									\
-	struct type *tmp = RB_ROOT(head);				\
-	struct type *parent = NULL;					\
-	while (tmp) {							\
-		parent = tmp;						\
-		if (val < 0)						\
-			tmp = RB_LEFT(tmp, field);			\
-		else							\
-			tmp = RB_RIGHT(tmp, field);			\
-	}								\
-	return (parent);						\
-}
-
-#define RB_NEGINF	-1
-#define RB_INF	1
-
-#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
-#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
-#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
-#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
-#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
-#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
-
-#define RB_FOREACH(x, name, head)					\
-	for ((x) = RB_MIN(name, head);					\
-	     (x) != NULL;						\
-	     (x) = name##_RB_NEXT(x))
-
-#endif	/* _SYS_TREE_H_ */
+#include <libkern/tree.h>
diff --git a/bsd/sys/tty.h b/bsd/sys/tty.h
index f0f546c48..ecfb234d5 100644
--- a/bsd/sys/tty.h
+++ b/bsd/sys/tty.h
@@ -220,6 +220,8 @@ struct clist;
 #define	TS_DSR_OFLOW	0x800000	/* For CDSR_OFLOW. */
 #endif
 
+#define	TS_IOCTL_NOT_OK	0x1000000	/* Workaround <rdar://....> */
+
 
 /* Character type information. */
 #define	ORDINARY	0
diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h
index a26ba1caa..4ee9e86cf 100644
--- a/bsd/sys/ubc.h
+++ b/bsd/sys/ubc.h
@@ -70,6 +70,7 @@ int	ubc_setcred(struct vnode *, struct proc *) __deprecated;
 /* code signing */
 struct cs_blob;
 struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t);
+int cs_entitlements_blob_get(proc_t p, void **, size_t *);
 #endif
 
 /* cluster IO routines */
diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h
index 775a8457b..d7197f089 100644
--- a/bsd/sys/ubc_internal.h
+++ b/bsd/sys/ubc_internal.h
@@ -87,6 +87,8 @@ struct cl_readahead {
 struct cl_writebehind {
 	lck_mtx_t	cl_lockw;
         void	*	cl_scmap;			/* pointer to sparse cluster map */
+	off_t		cl_last_write;			/* offset of the end of the last write */
+	off_t		cl_seq_written;			/* sequentially written bytes */
 	int		cl_sparse_pushes;		/* number of pushes outside of the cl_lockw in progress */
 	int		cl_sparse_wait;			/* synchronous push is in progress */
 	int		cl_number;			/* number of packed write behind clusters currently valid */
@@ -124,6 +126,13 @@ struct ubc_info {
         struct	cl_writebehind *cl_wbehind;	/* cluster write behind context */
 
 	struct	cs_blob		*cs_blobs; 	/* for CODE SIGNING */
+#if CHECK_CS_VALIDATION_BITMAP
+	void			*cs_valid_bitmap;     /* right now: used only for signed files on the read-only root volume */
+	uint64_t		cs_valid_bitmap_size; /* Save original bitmap size in case the file size changes.
+						       * In the future, we may want to reconsider changing the
+						       * underlying bitmap to reflect the new file size changes.
+						       */
+#endif /* CHECK_CS_VALIDATION_BITMAP */
 };
 
 /* Defines for ui_flags */
@@ -159,6 +168,7 @@ __private_extern__ uint32_t cluster_hard_throttle_limit(vnode_t, uint32_t *, uin
 #define UBC_FOR_PAGEOUT         0x0002
 
 memory_object_control_t ubc_getobject(vnode_t, int);
+boolean_t	ubc_strict_uncached_IO(vnode_t);
 
 int	ubc_info_init(vnode_t);
 int	ubc_info_init_withsize(vnode_t, off_t);
@@ -181,6 +191,8 @@ int	ubc_cs_getcdhash(vnode_t, off_t, unsigned char *);
 kern_return_t ubc_cs_blob_allocate(vm_offset_t *, vm_size_t *);
 void ubc_cs_blob_deallocate(vm_offset_t, vm_size_t);
 
+kern_return_t	ubc_cs_validation_bitmap_allocate( vnode_t );
+void		ubc_cs_validation_bitmap_deallocate( vnode_t );
 __END_DECLS
 
 
diff --git a/bsd/sys/ucontext.h b/bsd/sys/ucontext.h
index b31d50ed3..249cf5e23 100644
--- a/bsd/sys/ucontext.h
+++ b/bsd/sys/ucontext.h
@@ -35,12 +35,6 @@
 #define __need_mcontext_t
 #define __need_stack_t
 #define __need_ucontext_t
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#if defined(__ppc__) || defined(__ppc64__)
-#define __need_mcontext64_t
-#define __need_ucontext64_t
-#endif /* __ppc__|| __ppc64__  */ 
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
 #include <sys/_structs.h>
 
 #ifndef _SIGSET_T
diff --git a/bsd/sys/ucred.h b/bsd/sys/ucred.h
index 0d8b0f2a4..6d914a4df 100644
--- a/bsd/sys/ucred.h
+++ b/bsd/sys/ucred.h
@@ -90,6 +90,7 @@ struct ucred {
 	TAILQ_ENTRY(ucred)	cr_link; /* never modify this without KAUTH_CRED_HASH_LOCK */
 	u_long	cr_ref;			/* reference count */
 	
+struct posix_cred {
 	/*
 	 * The credential hash depends on everything from this point on
 	 * (see kauth_cred_get_hashkey)
@@ -102,15 +103,9 @@ struct ucred {
 	gid_t	cr_rgid;		/* real group id */
 	gid_t	cr_svgid;		/* saved group id */
 	uid_t	cr_gmuid;		/* UID for group membership purposes */
-	/*
-	 * XXX - cr_au will be replaced with cr_audit below.
-	 * cr_au is here to keep kexts from breaking. It seems to
-	 * be currently used by the ucred hashing as well.
-	 */
-	struct auditinfo cr_au;		/* XXX This needs to go away. */
-	struct label	*cr_label;	/* MAC label */
-
 	int	cr_flags;		/* flags on credential */
+} cr_posix;
+	struct label	*cr_label;	/* MAC label */
 	/* 
 	 * NOTE: If anything else (besides the flags)
 	 * added after the label, you must change
@@ -121,6 +116,7 @@ struct ucred {
 #ifndef _KAUTH_CRED_T
 #define	_KAUTH_CRED_T
 typedef struct ucred *kauth_cred_t;
+typedef struct posix_cred *posix_cred_t;
 #endif	/* !_KAUTH_CRED_T */
 
 /*
diff --git a/bsd/sys/un.h b/bsd/sys/un.h
index 479058ff2..400620396 100644
--- a/bsd/sys/un.h
+++ b/bsd/sys/un.h
@@ -83,8 +83,13 @@ struct	sockaddr_un {
 };
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+
+/* Level number of get/setsockopt for local domain sockets */
+#define SOL_LOCAL		0
+
 /* Socket options. */
 #define LOCAL_PEERCRED          0x001           /* retrieve peer credentails */
+
 #endif	/* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 
diff --git a/bsd/sys/unistd.h b/bsd/sys/unistd.h
index d80b3bbd3..c778c66f3 100644
--- a/bsd/sys/unistd.h
+++ b/bsd/sys/unistd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -174,6 +174,7 @@ struct accessx_descriptor {
 #define	_PC_REC_XFER_ALIGN	23	/* Recommended buffer alignment */
 #define	_PC_SYMLINK_MAX		24	/* Max # of bytes in symlink name */
 #define	_PC_SYNC_IO		25	/* Sync I/O [SIO] supported? */
+#define _PC_XATTR_SIZE_BITS 	26	/* # of bits to represent maximum xattr size */
 
 /* configurable system strings */
 #define	_CS_PATH		 1
diff --git a/bsd/sys/unpcb.h b/bsd/sys/unpcb.h
index 2376c11f8..a50aebe36 100644
--- a/bsd/sys/unpcb.h
+++ b/bsd/sys/unpcb.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -134,7 +134,7 @@ struct	unpcb {
 	unp_gen_t	unp_gencnt;	/* generation count of this instance */
 	int		unp_flags;	/* flags */
 	struct xucred	unp_peercred;	/* peer credentials, if applicable */
-	lck_mtx_t 	*unp_mtx;	/* per unpcb lock */
+	decl_lck_mtx_data( ,unp_mtx);	/* per unpcb lock */
 	int		rw_thrcount;    /* disconnect should wait for this count to become zero */
 };
 #endif /* KERNEL */
@@ -155,6 +155,7 @@ struct	unpcb {
 #define UNP_HAVEPC			0x0001
 #define UNP_HAVEPCCACHED		0x0002
 #define UNP_DONTDISCONNECT		0x0004
+#define	UNP_TRACE_MDNS			0x1000
 
 #ifdef KERNEL
 struct  unpcb_compat {
diff --git a/bsd/sys/user.h b/bsd/sys/user.h
index 66f355110..4a59aa866 100644
--- a/bsd/sys/user.h
+++ b/bsd/sys/user.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,12 +79,15 @@
 #endif
 #include <sys/vm.h>		/* XXX */
 #include <sys/sysctl.h>
- 
+
+
 #ifdef KERNEL
+#ifdef BSD_KERNEL_PRIVATE
+#include <sys/pthread_internal.h> /* for uu_kwe entry */
+#endif  /* BSD_KERNEL_PRIVATE */
 #ifdef __APPLE_API_PRIVATE
 #include <sys/eventvar.h>
 
-
 #if !defined(__LP64__) || defined(XNU_KERNEL_PRIVATE)
 /*
  * VFS context structure (part of uthread)
@@ -124,7 +127,7 @@ struct uthread {
 			int poll;
 			int error;
 			int count;
-			int kfcount;
+			int _reserved1;	// UNUSED: avoid changing size for now
 			char * wql;
 	} uu_select;			/* saved state for select() */
 	/* to support kevent continuations */
@@ -156,7 +159,9 @@ struct uthread {
 	caddr_t uu_wchan;			/* sleeping thread wait channel */
 	const char *uu_wmesg;			/* ... wait message */
 	int uu_flag;
+#if CONFIG_EMBEDDED
 	int uu_iopol_disk;			/* disk I/O policy */
+#endif /* CONFIG_EMBEDDED */
 	struct proc * uu_proc;
 	void * uu_userstate;
 	wait_queue_set_t uu_wqset;			/* cached across select calls */
@@ -172,12 +177,12 @@ struct uthread {
 
 	struct kaudit_record 	*uu_ar;			/* audit record */
 	struct task*	uu_aio_task;			/* target task for async io */
-     
-  /* network support for dlil layer locking */
-	u_int32_t	dlil_incremented_read;
+
 	lck_mtx_t	*uu_mtx;
 
 	int		uu_lowpri_window;
+	boolean_t	uu_throttle_isssd;
+	boolean_t	uu_throttle_bc;
 	void	*	uu_throttle_info; 	/* pointer to throttled I/Os info */
 
 	struct kern_sigaltstack uu_sigstk;
@@ -191,12 +196,14 @@ struct uthread {
         int		uu_iocount;
         int		uu_vpindex;
         void 	*	uu_vps[32];
+        void    *       uu_pcs[32][10];
 #endif
 #if CONFIG_DTRACE
 	siginfo_t	t_dtrace_siginfo;
 	uint32_t	t_dtrace_errno; /* Most recent errno */
-        uint8_t         t_dtrace_stop;  /* indicates a DTrace-desired stop */
+        uint8_t         t_dtrace_stop;  /* indicates a DTrace desired stop */
         uint8_t         t_dtrace_sig;   /* signal sent via DTrace's raise() */
+        uint64_t	t_dtrace_resumepid; /* DTrace's pidresume() pid */
 
         union __tdu {
                 struct __tds {
@@ -232,10 +239,7 @@ struct uthread {
 #endif /* CONFIG_DTRACE */
 	void *		uu_threadlist;
 	char *		pth_name;
-	TAILQ_ENTRY(uthread) uu_mtxlist;	/* psynch waiters list*/
-	uint32_t	uu_lockseq;		/* seq on arrival */
-	uint32_t	uu_psynchretval;	/* pmtx retval */
-	void *		uu_kwqqueue;		/* queue blocked on */
+	struct ksyn_waitq_element  uu_kwe;		/* user for pthread synch */
 };
 
 typedef struct uthread * uthread_t;
@@ -252,7 +256,9 @@ typedef struct uthread * uthread_t;
 #define UT_PASSIVE_IO	0x00000100	/* this thread issues passive I/O */
 #define UT_PROCEXIT	0x00000200	/* this thread completed the  proc exit */
 #define UT_RAGE_VNODES	0x00000400	/* rapid age any vnodes created by this thread */	
+#if CONFIG_EMBEDDED
 #define UT_BACKGROUND	0x00000800	/* this thread is in background state */	
+#endif /* !CONFIG_EMBEDDED */
 #define UT_BACKGROUND_TRAFFIC_MGT	0x00001000 /* background traffic is regulated */
 
 #define	UT_VFORK	0x02000000	/* thread has vfork children */
diff --git a/bsd/sys/vfs_context.h b/bsd/sys/vfs_context.h
index 16453bb7a..fd31f99e3 100644
--- a/bsd/sys/vfs_context.h
+++ b/bsd/sys/vfs_context.h
@@ -5,7 +5,9 @@
 #include <sys/types.h>
 #include <sys/kernel_types.h>
 #include <kern/thread.h>
+#ifdef BSD_KERNEL_PRIVATE
 #include <sys/user.h>
+#endif
 #include <stdint.h>
 
 /*
diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h
index 65620f277..965518cb0 100644
--- a/bsd/sys/vnode.h
+++ b/bsd/sys/vnode.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -145,6 +145,7 @@ enum vtagtype	{
 #define IO_BACKGROUND IO_PASSIVE /* used for backward compatibility.  to be removed after IO_BACKGROUND is no longer
 								  * used by DiskImages in-kernel mode */
 #define	IO_NOAUTH	0x8000		/* No authorization checks. */
+#define IO_NODIRECT    0x10000		/* don't use direct synchronous writes if IO_NOCACHE is specified */
 
 
 /*
@@ -159,15 +160,15 @@ struct componentname {
 	uint32_t	cn_flags;	/* flags (see below) */
 #ifdef BSD_KERNEL_PRIVATE
 	vfs_context_t	cn_context;
-	void * pad_obsolete2;
+	struct nameidata *cn_ndp;	/* pointer back to nameidata */
 
 /* XXX use of these defines are deprecated */
 #define	cn_proc		(cn_context->vc_proc + 0)	/* non-lvalue */
 #define	cn_cred		(cn_context->vc_ucred + 0)	/* non-lvalue */
 
 #else
-	void * obsolete1;	/* use vfs_context_t */
-	void * obsolete2;	/* use vfs_context_t */
+	void * cn_reserved1;	/* use vfs_context_t */
+	void * cn_reserved2;	/* use vfs_context_t */
 #endif
 	/*
 	 * Shared between lookup and commit routines.
@@ -201,8 +202,8 @@ struct componentname {
 #define	ISDOTDOT	0x00002000 /* current component name is .. */
 #define	MAKEENTRY	0x00004000 /* entry is to be added to name cache */
 #define	ISLASTCN	0x00008000 /* this is last component of pathname */
-#define	ISWHITEOUT	0x00020000 /* found whiteout */
-#define	DOWHITEOUT	0x00040000 /* do whiteouts */
+#define	ISWHITEOUT	0x00020000 /* OBSOLETE: found whiteout */
+#define	DOWHITEOUT	0x00040000 /* OBSOLETE: do whiteouts */
 
 
 /* The following structure specifies a vnode for creation */
@@ -228,6 +229,234 @@ struct vnode_fsparam {
 #define VNCREATE_FLAVOR	0
 #define VCREATESIZE sizeof(struct vnode_fsparam)
 
+
+#ifdef KERNEL_PRIVATE
+/*
+ * Resolver callback SPI for trigger vnodes
+ *
+ * Only available from kernels built with CONFIG_TRIGGERS option
+ */
+
+/*!
+ @enum Pathname Lookup Operations
+ @abstract Constants defining pathname operations (passed to resolver callbacks)
+ */
+enum path_operation	{
+	OP_LOOKUP,
+	OP_MOUNT,
+	OP_UNMOUNT,
+	OP_STATFS,
+	OP_OPEN,
+	OP_LINK,
+	OP_UNLINK,
+	OP_RENAME,
+	OP_CHDIR,
+	OP_CHROOT,
+	OP_MKNOD,
+	OP_MKFIFO,
+	OP_SYMLINK,
+	OP_ACCESS,
+	OP_PATHCONF,
+	OP_READLINK,
+	OP_GETATTR,
+	OP_SETATTR,
+	OP_TRUNCATE,
+	OP_COPYFILE,
+	OP_MKDIR,
+	OP_RMDIR,
+	OP_REVOKE,
+	OP_EXCHANGEDATA,
+	OP_SEARCHFS,
+	OP_FSCTL,
+	OP_GETXATTR,
+	OP_SETXATTR,
+	OP_REMOVEXATTR,
+	OP_LISTXATTR,
+	OP_MAXOP	/* anything beyond previous entry is invalid */
+};
+
+/*
+ * is operation a traditional trigger (autofs)?
+ * 1 if trigger, 0 if no trigger
+ */
+extern int vfs_istraditionaltrigger(enum path_operation op, const struct componentname *cnp);
+
+/*!
+ @enum resolver status
+ @abstract Constants defining resolver status
+ @constant RESOLVER_RESOLVED  the resolver has finished (typically means a successful mount)
+ @constant RESOLVER_NOCHANGE  the resolver status didn't change
+ @constant RESOLVER_UNRESOLVED  the resolver has finished (typically means a successful unmount)
+ @constant RESOLVER_ERROR  the resolver encountered an error (errno passed in aux value)
+ @constant RESOLVER_STOP  a request to destroy trigger XXX do we need this???
+ */
+enum resolver_status {
+	RESOLVER_RESOLVED,
+	RESOLVER_NOCHANGE,
+	RESOLVER_UNRESOLVED,
+	RESOLVER_ERROR,
+	RESOLVER_STOP
+};
+
+typedef uint64_t resolver_result_t;
+
+/*
+ * Compound resolver result
+ *
+ * The trigger vnode callbacks use a compound result value. In addition
+ * to the resolver status, it contains a sequence number and an auxiliary
+ * value.
+ *
+ * The sequence value is used by VFS to sequence-stamp trigger vnode
+ * state transitions. It is expected to be incremented each time a
+ * resolver changes state (ie resolved or unresolved). A result
+ * containing a stale sequence (older than a trigger vnode's current
+ * value) will be ignored by VFS.
+ *
+ * The auxiliary value is currently only used to deliver the errno
+ * value for RESOLVER_ERROR status conditions. When a RESOLVER_ERROR
+ * occurs, VFS will propagate this error back to the syscall that
+ * encountered the trigger vnode.
+ */
+extern resolver_result_t vfs_resolver_result(uint32_t seq, enum resolver_status stat, int aux);
+
+/*
+ * Extract values from a compound resolver result
+ */
+extern enum resolver_status vfs_resolver_status(resolver_result_t);
+extern uint32_t vfs_resolver_sequence(resolver_result_t);
+extern int vfs_resolver_auxiliary(resolver_result_t);
+
+
+/*!
+ @typedef trigger_vnode_resolve_callback_t
+ @abstract function prototype for a trigger vnode resolve callback
+ @discussion This function is associated with a trigger vnode during a vnode create.  It is
+ typically called when a lookup operation occurs for a trigger vnode
+ @param vp The trigger vnode which needs resolving
+ @param cnp Various data about lookup, e.g. filename and state flags
+ @param pop The pathname operation that initiated the lookup (see enum path_operation).
+ @param flags
+ @param data Arbitrary data supplied by vnode trigger creator
+ @param ctx Context for authentication.
+ @return RESOLVER_RESOLVED, RESOLVER_NOCHANGE, RESOLVER_UNRESOLVED or RESOLVER_ERROR
+*/
+typedef resolver_result_t (* trigger_vnode_resolve_callback_t)(
+	vnode_t				vp,
+	const struct componentname *	cnp,
+	enum path_operation		pop,
+	int				flags,
+	void *				data,
+	vfs_context_t			ctx);
+
+/*!
+ @typedef trigger_vnode_unresolve_callback_t
+ @abstract function prototype for a trigger vnode unresolve callback
+ @discussion This function is associated with a trigger vnode during a vnode create.  It is
+ called to unresolve a trigger vnode (typically this means unmount).
+ @param vp The trigger vnode which needs unresolving
+ @param flags Unmount flags
+ @param data Arbitrary data supplied by vnode trigger creator
+ @param ctx Context for authentication.
+ @return RESOLVER_NOCHANGE, RESOLVER_UNRESOLVED or RESOLVER_ERROR
+*/
+typedef resolver_result_t (* trigger_vnode_unresolve_callback_t)(
+	vnode_t		vp,
+	int		flags,
+	void *		data,
+	vfs_context_t	ctx);
+
+/*!
+ @typedef trigger_vnode_rearm_callback_t
+ @abstract function prototype for a trigger vnode rearm callback
+ @discussion This function is associated with a trigger vnode during a vnode create.  It is
+ called to verify a rearm from VFS (i.e. should VFS rearm the trigger?).
+ @param vp The trigger vnode which needs rearming
+ @param flags
+ @param data Arbitrary data supplied by vnode trigger creator
+ @param ctx Context for authentication.
+ @return RESOLVER_NOCHANGE or RESOLVER_ERROR
+*/
+typedef resolver_result_t (* trigger_vnode_rearm_callback_t)(
+	vnode_t		vp,
+	int		flags,
+	void *		data,
+	vfs_context_t	ctx);
+
+/*!
+ @typedef trigger_vnode_reclaim_callback_t
+ @abstract function prototype for a trigger vnode reclaim callback
+ @discussion This function is associated with a trigger vnode during a vnode create.  It is
+ called to deallocate private callback argument data
+ @param vp The trigger vnode associated with the data
+ @param data The arbitrary data supplied by vnode trigger creator
+*/
+typedef void (* trigger_vnode_reclaim_callback_t)(
+	vnode_t		vp,
+	void *		data);
+
+/*!
+ @function vnode_trigger_update
+ @abstract Update a trigger vnode's state.
+ @discussion This allows a resolver to notify VFS of a state change in a trigger vnode.
+ @param vp The trigger vnode whose information to update.
+ @param result A compound resolver result value
+ @return EINVAL if result value is invalid or vp isn't a trigger vnode
+ */
+extern int vnode_trigger_update(vnode_t vp, resolver_result_t result);
+
+struct vnode_trigger_info {
+	trigger_vnode_resolve_callback_t	vti_resolve_func;
+	trigger_vnode_unresolve_callback_t	vti_unresolve_func;
+	trigger_vnode_rearm_callback_t		vti_rearm_func;
+	trigger_vnode_reclaim_callback_t	vti_reclaim_func;
+	void *					vti_data;   /* auxiliary data (optional) */
+	uint32_t				vti_flags;  /* optional flags (see below) */
+};
+
+/*
+ * SPI for creating a trigger vnode
+ *
+ * Uses the VNCREATE_TRIGGER flavor with existing vnode_create() KPI
+ *
+ * Only one resolver per vnode.
+ *
+ * ERRORS (in addition to vnode_create errors):
+ *	EINVAL (invalid resolver info, like invalid flags)
+ *	ENOTDIR (only directories can have a resolver)
+ *	EPERM (vnode cannot be a trigger - eg root dir of a file system)
+ *	ENOMEM
+ */
+struct vnode_trigger_param {
+	struct vnode_fsparam			vnt_params; /* same as for VNCREATE_FLAVOR */
+	trigger_vnode_resolve_callback_t	vnt_resolve_func;
+	trigger_vnode_unresolve_callback_t	vnt_unresolve_func;
+	trigger_vnode_rearm_callback_t		vnt_rearm_func;
+	trigger_vnode_reclaim_callback_t	vnt_reclaim_func;
+	void *					vnt_data;   /* auxiliary data (optional) */
+	uint32_t				vnt_flags;  /* optional flags (see below) */
+};
+
+#define VNCREATE_TRIGGER	(('T' << 8) + ('V'))
+#define VNCREATE_TRIGGER_SIZE	sizeof(struct vnode_trigger_param)
+
+/*
+ * vnode trigger flags (vnt_flags)
+ *
+ * VNT_AUTO_REARM:
+ * On unmounts of a trigger mount, automatically re-arm the trigger.
+ *
+ * VNT_NO_DIRECT_MOUNT:
+ * A trigger vnode instance that doesn't directly trigger a mount,
+ * instead it triggers the mounting of sub-trigger nodes.
+ */
+#define VNT_AUTO_REARM    	(1 << 0)	
+#define VNT_NO_DIRECT_MOUNT	(1 << 1)	
+#define VNT_VALID_MASK    	(VNT_AUTO_REARM | VNT_NO_DIRECT_MOUNT)
+
+#endif /* KERNEL_PRIVATE */
+
+
 /*
  * Vnode attributes, new-style.
  *
@@ -287,6 +516,7 @@ struct vnode_fsparam {
 #define VNODE_ATTR_va_guuid		(1LL<<27)	/* 08000000 */
 #define VNODE_ATTR_va_nchildren		(1LL<<28)       /* 10000000 */
 #define VNODE_ATTR_va_dirlinkcount	(1LL<<29)       /* 20000000 */
+#define VNODE_ATTR_va_addedtime		(1LL<<30)		/* 40000000 */
 
 #define VNODE_ATTR_BIT(n)	(VNODE_ATTR_ ## n)
 /*
@@ -307,7 +537,8 @@ struct vnode_fsparam {
 				VNODE_ATTR_BIT(va_name) |		\
 				VNODE_ATTR_BIT(va_type) |		\
 				VNODE_ATTR_BIT(va_nchildren) |		\
-				VNODE_ATTR_BIT(va_dirlinkcount)) 
+				VNODE_ATTR_BIT(va_dirlinkcount)|		\
+                VNODE_ATTR_BIT(va_addedtime)) 
 /*
  * Attributes that can be applied to a new file object.
  */
@@ -381,14 +612,23 @@ struct vnode_attr {
 	uint64_t	va_dirlinkcount;  /* Real references to dir (i.e. excluding "." and ".." refs) */
 
 	/* add new fields here only */
+#ifdef BSD_KERNEL_PRIVATE
+	struct kauth_acl *va_base_acl;
+#else
+	void * 		va_reserved1;
+#endif /* BSD_KERNEL_PRIVATE */
+    struct timespec va_addedtime;	/* timestamp when item was added to parent directory */
+
 		
 };
 
 /*
  * Flags for va_vaflags.
  */
-#define	VA_UTIMES_NULL	0x010000	/* utimes argument was NULL */
-#define VA_EXCLUSIVE	0x020000	/* exclusive create request */
+#define	VA_UTIMES_NULL		0x010000	/* utimes argument was NULL */
+#define VA_EXCLUSIVE		0x020000	/* exclusive create request */
+#define VA_NOINHERIT		0x040000	/* Don't inherit ACLs from parent */
+#define VA_NOAUTH		0x080000	
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
@@ -761,6 +1001,14 @@ int	vnode_isnocache(vnode_t);
  */
 int	vnode_israge(vnode_t);
 
+/*!
+ @function vnode_needssnapshots
+ @abstract Check if a vnode needs snapshots events (regardless of its ctime status)
+ @param vp The vnode to test.
+ @return Nonzero if vnode needs snapshot events, 0 otherwise
+ */
+int	vnode_needssnapshots(vnode_t);
+
 /*!
  @function vnode_setnocache
  @abstract Set a vnode to not have its data cached in memory (i.e. we write-through to disk and always read from disk).
@@ -992,6 +1240,20 @@ int vfs_context_rele(vfs_context_t);
 vfs_context_t vfs_context_current(void);
 #ifdef KERNEL_PRIVATE
 int	vfs_context_bind(vfs_context_t);
+
+/*!
+ @function vfs_ctx_skipatime
+ @abstract Check to see if this context should skip updating a vnode's access times.
+ @discussion  This is currently tied to the vnode rapid aging process.  If the process is marked for rapid aging, 
+ then the kernel should not update vnodes it touches for access time purposes.  This will check to see if the
+ specified process and/or thread is marked for rapid aging when it manipulates vnodes. 
+ @param ctx The context being investigated. 
+ @return 1 if we should skip access time updates.  
+ @return 0 if we should NOT skip access time updates.
+ */
+
+int	vfs_ctx_skipatime(vfs_context_t ctx);
+
 #endif
 
 /*!
@@ -1048,6 +1310,10 @@ int 	vnode_get(vnode_t);
  */
 int 	vnode_getwithvid(vnode_t, uint32_t);
 
+#ifdef BSD_KERNEL_PRIVATE
+int vnode_getwithvid_drainok(vnode_t, uint32_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
 /*!
  @function vnode_getwithref
  @abstract Increase the iocount on a vnode on which a usecount (persistent reference) is held.
@@ -1172,6 +1438,17 @@ int 	vnode_notify(vnode_t, uint32_t, struct vnode_attr*);
  */ 
 int	vnode_ismonitored(vnode_t);
 
+
+/*!
+ @function vnode_isdyldsharedcache
+ @abstract Check whether a file is a dyld shared cache file.
+ @param vp Vnode to examine.
+ @discussion Will not reenter the filesystem.
+ @return nonzero if a dyld shared cache file, zero otherwise.
+ */ 
+int	vnode_isdyldsharedcache(vnode_t);
+
+
 /*!
  @function vfs_get_notify_attributes
  @abstract Determine what attributes are required to send up a notification with vnode_notify().
@@ -1298,7 +1575,7 @@ int vn_getpath(struct vnode *vp, char *pathbuf, int *len);
  */
 #define VNODE_LOOKUP_NOFOLLOW		0x01
 #define	VNODE_LOOKUP_NOCROSSMOUNT	0x02
-#define VNODE_LOOKUP_DOWHITEOUT		0x04
+#define VNODE_LOOKUP_DOWHITEOUT		0x04	/* OBSOLETE */
 /*!
  @function vnode_lookup
  @abstract Convert a path into a vnode.
@@ -1368,6 +1645,7 @@ int	vnode_iterate(struct mount *, int, int (*)(struct vnode *, void *), void *);
 #define VNODE_ITERATE_INACTIVE	0x200
 #ifdef BSD_KERNEL_PRIVATE
 #define VNODE_ALWAYS		0x400
+#define VNODE_DRAINO		0x800
 #endif /* BSD_KERNEL_PRIVATE */
 
 /*
@@ -1545,6 +1823,20 @@ void	vnode_putname(const char *name);
  */
 vnode_t	vnode_getparent(vnode_t vp);
 
+#ifdef KERNEL_PRIVATE
+/*! 
+ @function vnode_lookup_continue_needed
+ @abstract Determine whether vnode needs additional processing in VFS before being opened.
+ @discussion If result is zero, filesystem can open this vnode.  If result is nonzero,
+ additional processing is needed in VFS (e.g. symlink, mountpoint).  Nonzero results should
+ be passed up to VFS.
+ @param vp Vnode to consider opening (found by filesystem).
+ @param cnp Componentname as passed to filesystem from VFS.
+ @result 0 to indicate that a vnode can be opened, or an error that should be passed up to VFS.
+ */
+int vnode_lookup_continue_needed(vnode_t vp, struct componentname *cnp);
+#endif /* KERNEL_PRIVATE */
+
 #ifdef BSD_KERNEL_PRIVATE
 /* Not in export list so can be private */
 struct stat;
diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h
index 66812b08d..33ae10047 100644
--- a/bsd/sys/vnode_if.h
+++ b/bsd/sys/vnode_if.h
@@ -92,6 +92,13 @@
 
 extern struct vnodeop_desc vnop_default_desc;
 extern struct vnodeop_desc vnop_lookup_desc;
+#ifdef KERNEL_PRIVATE
+extern struct vnodeop_desc vnop_compound_open_desc;
+extern struct vnodeop_desc vnop_compound_remove_desc;
+extern struct vnodeop_desc vnop_compound_rename_desc;
+extern struct vnodeop_desc vnop_compound_mkdir_desc;
+extern struct vnodeop_desc vnop_compound_rmdir_desc;
+#endif /* KERNEL_PRIVATE */
 extern struct vnodeop_desc vnop_create_desc;
 extern struct vnodeop_desc vnop_whiteout_desc;
 extern struct vnodeop_desc vnop_mknod_desc;
@@ -257,6 +264,44 @@ struct vnop_open_args {
 	vfs_context_t a_context;
 };
 
+#ifdef KERNEL_PRIVATE
+struct vnop_compound_open_args {
+	struct vnodeop_desc *a_desc;
+
+	vnode_t a_dvp;				/* Directory in which to open/create */
+	vnode_t *a_vpp;				/* Resulting vnode */
+	int a_fmode;	   			/* Open mode */
+	struct componentname *a_cnp;		/* Path to look up */
+	struct vnode_attr *a_vap;		/* Attributes with which to create, if appropriate */
+	uint32_t a_flags;			/* VNOP-control flags */
+	uint32_t *a_status;			/* Information about results */
+
+	vfs_context_t a_context;			/* Authorization context */
+
+	int (*a_open_create_authorizer)(	/* Authorizer for create case */
+			vnode_t dvp,			/* Directory in which to create */
+			struct componentname *cnp,	/* As passed to VNOP */
+			struct vnode_attr *vap,		/* As passed to VNOP */
+			vfs_context_t ctx,		/* Context */
+			void *reserved);		/* Who knows */
+
+	int (*a_open_existing_authorizer)(	/* Authorizer for preexisting case */
+			vnode_t vp,			/* vp to open */
+			struct componentname *cnp,	/* Lookup state */
+			int fmode,			/* As passed to VNOP */
+			vfs_context_t ctx,		/* Context */   
+			void *reserved);		/* Who knows */
+
+	void *a_reserved;
+};
+
+/* Control flags */
+#define VNOP_COMPOUND_OPEN_DO_CREATE   0x00000001
+
+/* Results */
+#define COMPOUND_OPEN_STATUS_DID_CREATE 0x00000001
+#endif /* KERNEL_PRIVATE */
+
 /*!
  @function VNOP_OPEN
  @abstract Call down to a filesystem to open a file.
@@ -272,6 +317,11 @@ struct vnop_open_args {
 extern errno_t VNOP_OPEN(vnode_t, int, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
+#ifdef BSD_KERNEL_PRIVATE
+struct nameidata;
+extern int VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, int32_t fmode, uint32_t *status, struct vnode_attr *vap, vfs_context_t ctx);
+#endif
+
 struct vnop_close_args {
 	struct vnodeop_desc *a_desc;
 	vnode_t a_vp;
@@ -381,8 +431,7 @@ struct vnop_read_args {
  @discussion VNOP_READ() is where the hard work of of the read() system call happens.  The filesystem may use
  the buffer cache, the cluster layer, or an alternative method to get its data; uio routines will be used to see that data
  is copied to the correct virtual address in the correct address space and will update its uio argument
- to indicate how much data has been moved.  Filesystems will not receive a read request on a file without having
- first received a VNOP_OPEN().
+ to indicate how much data has been moved.  
  @param vp The vnode to read from.
  @param uio Description of request, including file offset, amount of data requested, destination address for data,
  and whether that destination is in kernel or user space.
@@ -406,8 +455,7 @@ struct vnop_write_args {
  @discussion VNOP_WRITE() is to write() as VNOP_READ() is to read().  The filesystem may use
  the buffer cache, the cluster layer, or an alternative method to write its data; uio routines will be used to see that data
  is copied to the correct virtual address in the correct address space and will update its uio argument
- to indicate how much data has been moved.  Filesystems will not receive a write request on a file without having
- first received a VNOP_OPEN().
+ to indicate how much data has been moved.  
  @param vp The vnode to write to.
  @param uio Description of request, including file offset, amount of data to write, source address for data,
  and whether that destination is in kernel or user space.
@@ -600,6 +648,28 @@ struct vnop_remove_args {
 extern errno_t VNOP_REMOVE(vnode_t, vnode_t, struct componentname *, int, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
+#ifdef KERNEL_PRIVATE
+struct vnop_compound_remove_args {
+	struct vnodeop_desc *a_desc;
+	vnode_t a_dvp;				/* Directory in which to lookup and remove */
+	vnode_t *a_vpp;				/* File to remove; may or may not point to NULL pointer */
+	struct componentname *a_cnp;		/* Name of file to remove */
+	struct vnode_attr *a_vap;		/* Destination for file attributes on successful delete */
+	uint32_t a_flags;			/* Control flags (unused) */
+	vfs_context_t a_context;		/* Authorization context */
+	int (*a_remove_authorizer)(		/* Authorizer callback */
+			vnode_t dvp, 			/* Directory in which to delete */
+			vnode_t vp, 			/* File to delete */
+			struct componentname *cnp, 	/* As passed to VNOP */
+			vfs_context_t ctx, 		/* As passed to VNOP */
+			void *reserved);		/* Always NULL */
+	void *a_reserved;			/* Unused */
+};
+#endif /* KERNEL_PRIVATE */
+
+#ifdef BSD_KERNEL_PRIVATE 
+extern errno_t VNOP_COMPOUND_REMOVE(vnode_t, vnode_t*, struct nameidata *, int32_t flags, struct vnode_attr *vap, vfs_context_t);
+#endif 
 struct vnop_link_args {
 	struct vnodeop_desc *a_desc;
 	vnode_t a_vp;
@@ -650,6 +720,43 @@ struct vnop_rename_args {
 extern errno_t VNOP_RENAME(vnode_t, vnode_t, struct componentname *, vnode_t, vnode_t, struct componentname *, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
+#ifdef KERNEL_PRIVATE
+struct vnop_compound_rename_args {
+	struct vnodeop_desc *a_desc;
+
+	vnode_t a_fdvp;			/* Directory from which to rename */
+	vnode_t *a_fvpp;		/* Vnode to rename (can point to a NULL pointer) */
+	struct componentname *a_fcnp;	/* Source name */
+	struct vnode_attr *a_fvap;	
+
+	vnode_t a_tdvp;			/* Directory to which to rename */
+	vnode_t *a_tvpp;		/* Vnode to rename over (can point to a NULL pointer) */
+	struct componentname *a_tcnp;	/* Destination name */
+	struct vnode_attr *a_tvap;
+
+	uint32_t a_flags;		/* Control flags: currently unused */
+	vfs_context_t a_context;	/* Authorization context */
+	int (*a_rename_authorizer)(			/* Authorization callback */
+			vnode_t fdvp, 			/* As passed to VNOP */
+			vnode_t fvp, 			/* Vnode to rename */
+			struct componentname *fcnp, 	/* As passed to VNOP */
+			vnode_t tdvp, 			/* As passed to VNOP */
+			vnode_t tvp, 			/* Vnode to rename over (can be NULL) */
+			struct componentname *tcnp,	/* As passed to VNOP */
+			vfs_context_t ctx, 		/* As passed to VNOP */
+			void *reserved);		/* Always NULL */
+	void *a_reserved;		/* Currently unused */
+};
+#endif /* KERNEL_PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+errno_t 
+VNOP_COMPOUND_RENAME( 
+		struct vnode *fdvp,  struct vnode **fvpp,  struct componentname *fcnp, struct vnode_attr *fvap,
+             	struct vnode *tdvp,  struct vnode **tvpp,  struct componentname *tcnp, struct vnode_attr *tvap,  
+	     	uint32_t flags,vfs_context_t ctx);
+#endif /* XNU_KERNEL_PRIVATE */
+
 struct vnop_mkdir_args {
 	struct vnodeop_desc *a_desc;
 	vnode_t a_dvp;
@@ -674,6 +781,27 @@ struct vnop_mkdir_args {
 extern errno_t VNOP_MKDIR(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
+
+#ifdef KERNEL_PRIVATE
+struct vnop_compound_mkdir_args {
+	struct vnodeop_desc *a_desc;
+	vnode_t a_dvp;			/* Directory in which to create */
+	vnode_t *a_vpp;			/* Destination for found or created vnode */
+	struct componentname *a_cnp;	/* Name of directory to create */
+	struct vnode_attr *a_vap;	/* Creation attributes */
+	uint32_t a_flags;		/* Control flags (unused) */
+	vfs_context_t a_context;	/* Authorization context */
+#if 0
+	int (*a_mkdir_authorizer)(vnode_t dvp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx, void *reserved);
+#endif /* 0 */
+	void *a_reserved;		/* Unused */
+};
+#endif /* KERNEL_PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+extern errno_t VNOP_COMPOUND_MKDIR(vnode_t, vnode_t *, struct nameidata *, struct vnode_attr *, vfs_context_t);
+#endif /* XNU_KERNEL_PRIVATE */
+
 struct vnop_rmdir_args {
 	struct vnodeop_desc *a_desc;
 	vnode_t a_dvp;
@@ -695,6 +823,30 @@ struct vnop_rmdir_args {
 extern errno_t VNOP_RMDIR(vnode_t, vnode_t, struct componentname *, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
+#ifdef KERNEL_PRIVATE
+struct vnop_compound_rmdir_args {
+	struct vnodeop_desc *a_desc;
+	vnode_t a_dvp;				/* Directory in which to look up and delete */
+	vnode_t *a_vpp;				/* Destination for found vnode */
+	struct componentname *a_cnp;		/* Name to delete */
+	struct vnode_attr *a_vap;		/* Location in which to store attributes if delete succeeds (can be NULL) */
+	uint32_t a_flags;			/* Control flags (currently unused) */
+	vfs_context_t a_context;		/* Context for authorization */
+	int (*a_rmdir_authorizer)(		/* Authorization callback */
+			vnode_t dvp, 			/* As passed to VNOP */
+			vnode_t vp, 			/* Directory to delete */
+			struct componentname *cnp, 	/* As passed to VNOP */
+			vfs_context_t ctx, 		/* As passed to VNOP */
+			void *reserved); 		/* Always NULL */
+	void *a_reserved;			/* Unused */
+};
+#endif /* KERNEL_PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+extern errno_t VNOP_COMPOUND_RMDIR(vnode_t, vnode_t*, struct nameidata *, struct vnode_attr *vap, vfs_context_t);
+#endif /* XNU_KERNEL_PRIVATE */
+
+
 struct vnop_symlink_args {
        struct vnodeop_desc *a_desc;
        vnode_t a_dvp;
@@ -723,7 +875,6 @@ struct vnop_symlink_args {
 extern errno_t VNOP_SYMLINK(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, char *, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
 
-
 /*
  *
  *  When VNOP_READDIR is called from the NFS Server, the nfs_data
@@ -941,7 +1092,7 @@ struct vnop_allocate_args {
  a file.  It can be used to either shrink or grow a file.  If the file shrinks,
  its ubc size will be modified accordingly, but if it grows, then the ubc size is unchanged;
  space is set aside without being actively used by the file.  VNOP_ALLOCATE() is currently only 
- called as part of the F_PREALLOCATE fcntl, and is supported only by AFP and HFS.  
+ called as part of the F_PREALLOCATE fcntl.  
  @param vp The vnode for which to preallocate space.
  @param length Desired preallocated file length.
  @param flags 
@@ -1009,12 +1160,20 @@ struct vnop_pageout_args {
  @abstract Write data from a mapped file back to disk.
  @discussion VNOP_PAGEOUT() is called when data from a mapped file needs to be flushed to disk, either
  because of an msync() call or due to memory pressure.  Filesystems are for the most part expected to
- just call cluster_pageout().
+ just call cluster_pageout().   However, if they opt into the VFC_VFSVNOP_PAGEOUTV2 flag, then
+ they will be responsible for creating their own UPLs.
  @param vp The vnode for which to page out data.
- @param pl UPL describing pages needing to be paged out.
- @param pl_offset Offset in UPL from which to start paging out data.
- @param f_offset Offset in file of data needing to be paged out.
- @param size Amount of data to page out (in bytes).
+ @param pl UPL describing pages needed to be paged out.  If UPL is NULL, then it means the filesystem 
+ has opted into VFC_VFSVNOP_PAGEOUTV2 semantics, which means that it will create and operate on its own UPLs
+ as opposed to relying on the one passed down into the filesystem.  This means that the filesystem must be 
+ responsible for N cluster_pageout calls for N dirty ranges in the UPL. 
+ @param pl_offset Offset in UPL from which to start paging out data.  Under the new VFC_VFSVNOP_PAGEOUTV2
+ semantics, this is the offset in the range specified that must be paged out if the associated page is dirty. 
+ @param f_offset Offset in file of data needing to be paged out.    Under the new VFC_VFSVNOP_PAGEOUTV2
+ semantics, this represents the offset in the file where we should start looking for dirty pages.
+ @param size Amount of data to page out (in bytes).   Under VFC_VFSVNOP_PAGEOUTV2, this represents
+ the size of the range to be considered.  The fileystem is free to extend or shrink the specified range
+ to better fit its blocking model as long as the page at 'pl_offset' is included.
  @param flags UPL-style flags: UPL_IOSYNC, UPL_NOCOMMIT, UPL_NORDAHEAD, UPL_VNODE_PAGER, UPL_MSYNC.
  Filesystems should generally leave it to the cluster layer to handle these flags. See the
  memory_object_types.h header in the kernel framework if interested.
@@ -1042,6 +1201,36 @@ struct vnop_searchfs_args {
 	vfs_context_t a_context;
 };
 
+/*
+   @function VNOP_SEARCHFS
+   @abstract Search a filesystem quickly for files or directories that match the passed-in search criteria.
+   @discussion VNOP_SEARCHFS is a getattrlist-based system call which is implemented almost entirely inside
+   supported filesystems.  Callers provide a set of criteria to match against, and the filesystem is responsible
+   for finding all files or directories that match the criteria.  Once these files or directories are found, 
+   the user-requested attributes of these files is provided as output.  The set of searchable attributes is a 
+   subset of the getattrlist  attributes.  For example, ATTR_CMN_UUID is not a valid searchable attribute as of 
+   10.6.  A common usage scenario could be to request all files whose mod dates is greater than time X, less than 
+   time Y, and provide the inode ID and filename of the matching objects as output.  
+   @param vp The vnode representing the mountpoint of the filesystem to be searched.
+   @param a_searchparams1 If one-argument search criteria is requested, the search criteria would go here. However,
+   some search criteria, like ATTR_CMN_MODTIME, can be bounded.  The user could request files modified between time X
+   and time Y.  In this case, the lower bound goes in a_searchparams1.
+   @param a_searchparams2 If two-argument search criteria is requested, the upper bound goes in here.
+   @param a_searchattrs Contains the getattrlist-style attribute bits which are requested by the current search.
+   @param a_maxmatches The maximum number of matches to return in a single system call.
+   @param a_timelimit The suggested maximum amount of time we can spend in the kernel to service this system call.  
+   Filesystems should use this as a guide only, and set their own internal maximum time to avoid denial of service.
+   @param a_returnattrs The getattrlist-style attributes to return for items in the filesystem that match the search 
+   criteria above.
+   @param a_scriptcode Currently ignored.
+   @param a_uio The uio in which to write out the search matches.
+   @param a_searchstate Sometimes searches cannot be completed in a single system call.  In this case, we provide 
+   an identifier back to the user which indicates where to resume a previously-started search.  This is an opaque structure
+   used by the filesystem to identify where to resume said search.
+   @param a_context The context in which to perform the filesystem search.
+   @return 0 on success, EAGAIN for searches which could not be completed in 1 call, and other ERRNOS as needed.
+ */
+
 #ifdef XNU_KERNEL_PRIVATE
 extern errno_t VNOP_SEARCHFS(vnode_t, void *, void *, struct attrlist *, uint32_t, struct timeval *, struct attrlist *, uint32_t *, uint32_t, uint32_t, struct uio *, struct searchstate *, vfs_context_t);
 #endif /* XNU_KERNEL_PRIVATE */
diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h
index dbff3a50d..7d17be99e 100644
--- a/bsd/sys/vnode_internal.h
+++ b/bsd/sys/vnode_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -100,6 +100,29 @@ struct unsafe_fsnode {
 	void *	  fsnodeowner;
 };
 
+#if CONFIG_TRIGGERS
+/*
+ * VFS Internal (private) trigger vnode resolver info.
+ */
+struct vnode_resolve {
+	lck_mtx_t				vr_lock;   /* protects vnode_resolve_t fields */
+	trigger_vnode_resolve_callback_t	vr_resolve_func;
+	trigger_vnode_unresolve_callback_t	vr_unresolve_func;
+	trigger_vnode_rearm_callback_t		vr_rearm_func;
+	trigger_vnode_reclaim_callback_t	vr_reclaim_func;
+	void *					vr_data;   /* private data for resolver */
+	uint32_t				vr_flags;
+	uint32_t				vr_lastseq;
+};
+typedef struct vnode_resolve *vnode_resolve_t;
+
+/* private vr_flags */
+#define VNT_RESOLVED        (1UL << 31)
+#define VNT_VFS_UNMOUNTED   (1UL << 30)
+#define VNT_EXTERNAL	    (1UL << 29)
+
+#endif /* CONFIG_TRIGGERS */
+
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  * v_freelist is locked by the global vnode_list_lock
@@ -166,6 +189,9 @@ struct vnode {
 #if CONFIG_MACF
 	struct label *v_label;			/* MAC security label */
 #endif
+#if CONFIG_TRIGGERS
+	vnode_resolve_t v_resolve;		/* trigger vnode resolve info (VDIR only) */
+#endif /* CONFIG_TRIGGERS */
 };
 
 #define	v_mountedhere	v_un.vu_mountedhere
@@ -199,7 +225,6 @@ struct vnode {
 #define	VL_TERMWANT	0x0008		/* there's a waiter  for recycle finish (vnode_getiocount)*/
 #define	VL_DEAD		0x0010		/* vnode is dead, cleaned of filesystem-specific info */
 #define	VL_MARKTERM	0x0020		/* vnode should be recycled when no longer referenced */
-#define	VL_MOUNTDEAD	0x0040		/* v_moutnedhere is dead   */
 #define VL_NEEDINACTIVE	0x0080		/* delay VNOP_INACTIVE until iocount goes to 0 */
 
 #define	VL_LABEL	0x0100		/* vnode is marked for labeling */
@@ -224,7 +249,7 @@ struct vnode {
 #define VDEVFLUSH	0x000040        /* device vnode after vflush */
 #define	VMOUNT		0x000080	/* mount operation in progress */
 #define	VBWAIT		0x000100	/* waiting for output to complete */
-					/* Free slot here after removing VALIASED for radar #5971707 */
+#define VSHARED_DYLD	0x000200	/* vnode is a dyld shared cache file */
 #define	VNOCACHE_DATA	0x000400	/* don't keep data cached once it's been consumed */
 #define	VSTANDARD	0x000800	/* vnode obtained from common pool */
 #define	VAGE		0x001000	/* Insert vnode at head of free list */
@@ -244,6 +269,7 @@ struct vnode {
 #define	VISNAMEDSTREAM	0x400000	/* vnode is a named stream (eg HFS resource fork) */
 #endif
 #define VOPENEVT        0x800000        /* if process is P_CHECKOPENEVT, then or in the O_EVTONLY flag on open */
+#define VNEEDSSNAPSHOT 0x1000000
 
 /*
  * Global vnode data.
@@ -251,7 +277,8 @@ struct vnode {
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 
 #ifdef CONFIG_IMGSRC_ACCESS
-extern	struct vnode *imgsrc_rootvnode;
+#define MAX_IMAGEBOOT_NESTING	2
+extern	struct vnode *imgsrc_rootvnodes[];
 #endif /* CONFIG_IMGSRC_ACCESS */
 
 
@@ -367,6 +394,10 @@ int 	vn_open(struct nameidata *ndp, int fmode, int cmode);
 int	vn_open_modflags(struct nameidata *ndp, int *fmode, int cmode);
 int	vn_open_auth(struct nameidata *ndp, int *fmode, struct vnode_attr *);
 int 	vn_close(vnode_t, int flags, vfs_context_t ctx);
+errno_t vn_remove(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struct vnode_attr *vap, vfs_context_t ctx);
+errno_t vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, struct vnode_attr *fvap,
+       	     struct vnode *tdvp, struct vnode **tvpp, struct componentname *tcnp, struct vnode_attr *tvap,
+       	     uint32_t flags, vfs_context_t ctx);
 
 void	lock_vnode_and_post(vnode_t, int);
 
@@ -377,14 +408,30 @@ void	lock_vnode_and_post(vnode_t, int);
 		} \
 	} while (0) 
 		
-
+/* Authorization subroutines */
+int	vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs_context_t ctx, void *reserved);
+int	vn_authorize_create(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*); 
+int	vn_attribute_prepare(vnode_t dvp, struct vnode_attr *vap, uint32_t *defaulted_fieldsp, vfs_context_t ctx);
+void	vn_attribute_cleanup(struct vnode_attr *vap, uint32_t defaulted_fields);
+int	vn_authorize_unlink(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
+int 	vn_authorize_rename(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
+       		struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
+            	vfs_context_t ctx, void *reserved);
+int	vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved);
+
+typedef int (*vn_create_authorizer_t)(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
+int vn_authorize_mkdir(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
+int vn_authorize_null(vnode_t, struct componentname *, struct vnode_attr *, vfs_context_t, void*);
+/* End of authorization subroutines */
 
 #define VN_CREATE_NOAUTH		(1<<0)
 #define VN_CREATE_NOINHERIT		(1<<1)
 #define VN_CREATE_UNION			(1<<2)
 #define	VN_CREATE_NOLABEL		(1<<3)
-errno_t vn_create(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, int flags, vfs_context_t);
-
+#define	VN_CREATE_DOOPEN		(1<<4)	/* Open file if a batched operation is available */
+errno_t vn_create(vnode_t, vnode_t *, struct nameidata *, struct vnode_attr *, uint32_t, int, uint32_t*, vfs_context_t);
+int	vn_mkdir(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, vfs_context_t ctx);
+int 	vn_rmdir(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, vfs_context_t ctx);
 
 int	vn_getxattr(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t);
 int	vn_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t);
@@ -415,6 +462,7 @@ void	cache_enter_with_gen(vnode_t dvp, vnode_t vp, struct componentname *cnp, in
 const char *cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp);
 
 int vn_pathconf(vnode_t, int, int32_t *, vfs_context_t);
+extern int nc_disabled; 	
 
 #define	vnode_lock_convert(v)	lck_mtx_convert_spin(&(v)->v_lock)
 
@@ -423,12 +471,16 @@ void	vnode_lock_spin(vnode_t);
 
 void	vnode_list_lock(void);
 void	vnode_list_unlock(void);
-int	vnode_ref_ext(vnode_t, int);
+
+#define VNODE_REF_FORCE	0x1
+int	vnode_ref_ext(vnode_t, int, int);
+
 void	vnode_rele_ext(vnode_t, int, int);
 void	vnode_rele_internal(vnode_t, int, int, int);
 #ifdef BSD_KERNEL_PRIVATE
 int	vnode_getalways(vnode_t);
 int 	vget_internal(vnode_t, int, int);
+errno_t vnode_getiocount(vnode_t, unsigned int, int);
 #endif /* BSD_KERNEL_PRIVATE */
 int	vnode_get_locked(vnode_t);
 int	vnode_put_locked(vnode_t);
@@ -448,6 +500,24 @@ errno_t	vnode_setsize(vnode_t, off_t, int ioflag, vfs_context_t);
 int	vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx);
 int	vnode_isspec(vnode_t vp);
 
+
+#ifdef BSD_KERNEL_PRIVATE
+
+typedef uint32_t compound_vnop_id_t;
+#define	COMPOUND_VNOP_OPEN		0x01
+#define	COMPOUND_VNOP_MKDIR		0x02
+#define	COMPOUND_VNOP_RENAME		0x04
+#define	COMPOUND_VNOP_REMOVE		0x08
+#define	COMPOUND_VNOP_RMDIR		0x10
+
+int 	vnode_compound_rename_available(vnode_t vp);
+int 	vnode_compound_rmdir_available(vnode_t vp);
+int 	vnode_compound_mkdir_available(vnode_t vp);
+int 	vnode_compound_remove_available(vnode_t vp);
+int 	vnode_compound_open_available(vnode_t vp);
+int    	vnode_compound_op_available(vnode_t, compound_vnop_id_t);
+#endif /* BSD_KERNEL_PRIVATE */
+
 void vn_setunionwait(vnode_t);
 void vn_checkunionwait(vnode_t);
 void vn_clearunionwait(vnode_t, int);
@@ -471,9 +541,18 @@ int	vfs_sysctl(int *name, uint32_t namelen, user_addr_t oldp, size_t *oldlenp,
 int	sysctl_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req);
 
 #ifdef BSD_KERNEL_PRIVATE
-void vnode_knoteupdate(struct knote *kn);
 void vnode_setneedinactive(vnode_t);
 int 	vnode_hasnamedstreams(vnode_t); /* Does this vnode have associated named streams? */
-#endif
+
+void nspace_proc_exit(struct proc *p);
+
+#if CONFIG_TRIGGERS
+/* VFS Internal Vnode Trigger Interfaces (Private) */
+int vnode_trigger_resolve(vnode_t, struct nameidata *, vfs_context_t);
+void vnode_trigger_rearm(vnode_t, vfs_context_t);
+void vfs_nested_trigger_unmounts(mount_t, int, vfs_context_t);
+#endif /* CONFIG_TRIGGERS */
+
+#endif /* BSD_KERNEL_PRIVATE */
 
 #endif /* !_SYS_VNODE_INTERNAL_H_ */
diff --git a/bsd/sys/xattr.h b/bsd/sys/xattr.h
index c9ecf4275..bd91c3c31 100644
--- a/bsd/sys/xattr.h
+++ b/bsd/sys/xattr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -59,7 +59,19 @@ __BEGIN_DECLS
 int  xattr_protected(const char *);
 int  xattr_validatename(const char *);
 
-#define XATTR_MAXSIZE	(64 * 1024 * 1024)
+/* Maximum extended attribute size supported by VFS */
+#define XATTR_MAXSIZE		(64 * 1024 * 1024)
+
+#ifdef PRIVATE
+/* Maximum extended attribute size in an Apple Double file */	
+#define AD_XATTR_MAXSIZE 	(128 * 1024)  
+
+/* Number of bits used to represent the maximum size of 
+ * extended attribute stored in an Apple Double file.
+ */
+#define AD_XATTR_SIZE_BITS	18
+#endif /* PRIVATE */
+
 __END_DECLS
 #endif /* KERNEL */
 
diff --git a/bsd/uuid/Makefile b/bsd/uuid/Makefile
index 8d5af9310..1f7f17bfc 100644
--- a/bsd/uuid/Makefile
+++ b/bsd/uuid/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 # In both the framework PrivateHeader area and /usr/include/uuid
diff --git a/bsd/vfs/Makefile b/bsd/vfs/Makefile
index 3d578ffd7..b9ddbedcc 100644
--- a/bsd/vfs/Makefile
+++ b/bsd/vfs/Makefile
@@ -9,14 +9,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES = \
diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c
index 50338b255..a18760397 100644
--- a/bsd/vfs/kpi_vfs.c
+++ b/bsd/vfs/kpi_vfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -605,6 +605,13 @@ VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ct
 }
 
 
+/* returns the cached throttle mask for the mount_t */
+uint64_t
+vfs_throttle_mask(mount_t mp)
+{
+	return(mp->mnt_throttle_mask);
+}
+
 /* returns a  copy of vfs type name for the mount_t */
 void 
 vfs_name(mount_t mp, char * buffer)
@@ -943,6 +950,27 @@ vfs_vnodecovered(mount_t mp)
 	}
 }
 
+/*
+ * Returns device vnode backing a mountpoint with an iocount (if valid vnode exists).
+ * The iocount must be released with vnode_put().  Note that this KPI is subtle
+ * with respect to the validity of using this device vnode for anything substantial
+ * (which is discouraged).  If commands are sent to the device driver without
+ * taking proper steps to ensure that the device is still open, chaos may ensue.  
+ * Similarly, this routine should only be called if there is some guarantee that
+ * the mount itself is still valid.
+ */
+vnode_t
+vfs_devvp(mount_t mp)
+{
+	vnode_t vp = mp->mnt_devvp;
+
+	if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
+		return vp;
+	}
+
+	return NULLVP;
+}
+
 /*
  * return the io attributes associated with mount_t
  */
@@ -1002,7 +1030,6 @@ extern int vfs_opv_numops;
 errno_t
 vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle)
 {
-#pragma unused(data)
 	struct vfstable	*newvfstbl = NULL;
 	int	i,j;
 	int	(***opv_desc_vector_p)(void *);
@@ -1686,6 +1713,40 @@ vnode_israge(vnode_t vp)
         return ((vp->v_flag & VRAGE)? 1 : 0);
 }
 
+int
+vnode_needssnapshots(vnode_t vp)
+{
+	return ((vp->v_flag & VNEEDSSNAPSHOT)? 1 : 0);
+}
+
+
+/* Check the process/thread to see if we should skip atime updates */
+int
+vfs_ctx_skipatime (vfs_context_t ctx) {
+	struct uthread *ut;
+	proc_t proc;
+	thread_t thr;
+
+	proc = vfs_context_proc(ctx);
+	thr = vfs_context_thread (ctx);
+
+	/* Validate pointers in case we were invoked via a kernel context */
+	if (thr && proc) {
+		ut = get_bsdthread_info (thr);
+
+		if (proc->p_lflag & P_LRAGE_VNODES) {
+			return 1;
+		}
+		
+		if (ut) {
+			if  (ut->uu_flag & UT_RAGE_VNODES) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
 /* is vnode_t marked to not keep data cached once it's been consumed */
 int 
 vnode_isnocache(vnode_t vp)
@@ -1743,6 +1804,46 @@ vnode_islnk(vnode_t vp)
 	return ((vp->v_type == VLNK)? 1 : 0);
 }
 
+int
+vnode_lookup_continue_needed(vnode_t vp, struct componentname *cnp)
+{
+	struct nameidata *ndp = cnp->cn_ndp;
+
+	if (ndp == NULL) {
+		panic("vnode_lookup_continue_needed(): cnp->cn_ndp is NULL\n");
+	}
+
+	if (vnode_isdir(vp)) {
+		if (vp->v_mountedhere != NULL) {
+			goto yes;
+		}
+
+#if CONFIG_TRIGGERS
+		if (vp->v_resolve) {
+			goto yes;
+		}
+#endif /* CONFIG_TRIGGERS */
+
+	}
+
+
+	if (vnode_islnk(vp)) {
+		/* From lookup():  || *ndp->ni_next == '/') No need for this, we know we're NULL-terminated here */
+		if (cnp->cn_flags & FOLLOW) { 
+			goto yes;
+		}
+		if (ndp->ni_flag & NAMEI_TRAILINGSLASH) {
+			goto yes;
+		}
+	}
+
+	return 0;
+
+yes:
+	ndp->ni_flag |= NAMEI_CONTLOOKUP;
+	return EKEEPLOOKING;
+}
+
 /* is vnode_t a fifo ? */
 int 
 vnode_isfifo(vnode_t vp)
@@ -2041,6 +2142,37 @@ vnode_vfsisrdonly(vnode_t vp)
 	return ((vp->v_mount->mnt_flag & MNT_RDONLY)? 1 : 0);
 }
 
+int
+vnode_compound_rename_available(vnode_t vp) 
+{
+	return vnode_compound_op_available(vp, COMPOUND_VNOP_RENAME);
+}
+int
+vnode_compound_rmdir_available(vnode_t vp) 
+{
+	return vnode_compound_op_available(vp, COMPOUND_VNOP_RMDIR);
+}
+int
+vnode_compound_mkdir_available(vnode_t vp) 
+{
+	return vnode_compound_op_available(vp, COMPOUND_VNOP_MKDIR);
+}
+int
+vnode_compound_remove_available(vnode_t vp) 
+{
+	return vnode_compound_op_available(vp, COMPOUND_VNOP_REMOVE);
+}
+int
+vnode_compound_open_available(vnode_t vp) 
+{
+	return vnode_compound_op_available(vp, COMPOUND_VNOP_OPEN);
+}
+
+int
+vnode_compound_op_available(vnode_t vp, compound_vnop_id_t opid) 
+{
+	return ((vp->v_mount->mnt_compound_ops & opid) != 0);
+}
 
 /*
  * Returns vnode ref to current working directory; if a per-thread current
@@ -2769,6 +2901,15 @@ vnode_notify(vnode_t vp, uint32_t events, struct vnode_attr *vap)
 	return 0;
 }
 
+
+
+int
+vnode_isdyldsharedcache(vnode_t vp)
+{
+	return ((vp->v_flag & VSHARED_DYLD) ? 1 : 0);
+}
+
+
 /*
  * For a filesystem that isn't tracking its own vnode watchers:
  * check whether a vnode is being monitored.
@@ -2778,27 +2919,6 @@ vnode_ismonitored(vnode_t vp) {
 	return (vp->v_knotes.slh_first != NULL);
 }
 
-/*
- * Conceived as a function available only in BSD kernel so that if kevent_register
- * changes what a knote of type EVFILT_VNODE is watching, it can push
- * that updated information down to a networked filesystem that may
- * need to update server-side monitoring.
- *
- * Blunted to do nothing--because we want to get both kqueue and fsevents support
- * from the VNOP_MONITOR design, we always want all the events a filesystem can provide us.
- */
-void
-vnode_knoteupdate(__unused struct knote *kn) 
-{
-#if 0
-	vnode_t vp = (vnode_t)kn->kn_hook;
-	if (vnode_getwithvid(vp, kn->kn_hookid) == 0) {
-		VNOP_MONITOR(vp, kn->kn_sfflags, VNODE_MONITOR_UPDATE, (void*)kn, NULL);
-		vnode_put(vp);
-	}
-#endif
-}
-
 /*
  * Initialize a struct vnode_attr and activate the attributes required
  * by the vnode_notify() call.
@@ -2811,6 +2931,44 @@ vfs_get_notify_attributes(struct vnode_attr *vap)
 	return 0;
 }
 
+#if CONFIG_TRIGGERS
+int 
+vfs_settriggercallback(fsid_t *fsid, vfs_trigger_callback_t vtc, void *data, uint32_t flags __unused, vfs_context_t ctx)
+{
+	int error;
+	mount_t mp;
+
+	mp = mount_list_lookupby_fsid(fsid, 0 /* locked */, 1 /* withref */);
+	if (mp == NULL) {
+		return ENOENT;
+	}
+
+	error = vfs_busy(mp, LK_NOWAIT);
+	mount_iterdrop(mp);
+
+	if (error != 0) {
+		return ENOENT;
+	}
+
+	mount_lock(mp);
+	if (mp->mnt_triggercallback != NULL) {
+		error = EBUSY;
+		mount_unlock(mp);
+		goto out;
+	}
+
+	mp->mnt_triggercallback = vtc;
+	mp->mnt_triggerdata = data;
+	mount_unlock(mp);
+
+	mp->mnt_triggercallback(mp, VTC_REPLACE, data, ctx);
+
+out:
+	vfs_unbusy(mp);
+	return 0;
+}
+#endif /* CONFIG_TRIGGERS */
+
 /*
  *  Definition of vnode operations.
  */
@@ -2909,13 +3067,87 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t
 }
 
 #if 0
-/*
- *#
- *#% create       dvp     L L L
- *#% create       vpp     - L -
- *#
- */
- 
+struct vnop_compound_open_args {
+	struct vnodeop_desc *a_desc;
+	vnode_t a_dvp;
+	vnode_t *a_vpp;
+	struct componentname *a_cnp;
+	int32_t a_flags;
+	int32_t a_fmode;
+	struct vnode_attr *a_vap;
+	vfs_context_t a_context;
+	void *a_reserved;
+};
+#endif /* 0 */
+
+int
+VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, int32_t fmode, uint32_t *statusp, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	int _err;
+	struct vnop_compound_open_args a;
+	int did_create = 0;
+	int want_create;
+	uint32_t tmp_status = 0;
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	want_create = (flags & VNOP_COMPOUND_OPEN_DO_CREATE);
+
+	a.a_desc = &vnop_compound_open_desc;
+	a.a_dvp = dvp;
+	a.a_vpp = vpp; /* Could be NULL */
+	a.a_cnp = cnp;
+	a.a_flags = flags;
+	a.a_fmode = fmode;
+	a.a_status = (statusp != NULL) ? statusp : &tmp_status;
+	a.a_vap = vap;
+	a.a_context = ctx;
+	a.a_open_create_authorizer = vn_authorize_create;
+	a.a_open_existing_authorizer = vn_authorize_open_existing;
+	a.a_reserved = NULL;
+
+	if (dvp == NULLVP) {
+		panic("No dvp?");
+	}
+	if (want_create && !vap) {
+		panic("Want create, but no vap?");
+	}
+	if (!want_create && vap) {
+		panic("Don't want create, but have a vap?");
+	}
+
+	_err = (*dvp->v_op[vnop_compound_open_desc.vdesc_offset])(&a);
+
+	did_create = (*a.a_status & COMPOUND_OPEN_STATUS_DID_CREATE);
+
+	if (did_create && !want_create) {
+		panic("Filesystem did a create, even though none was requested?");
+	}
+
+	if (did_create) { 
+		if (!NATIVE_XATTR(dvp)) {
+			/* 
+			 * Remove stale Apple Double file (if any).
+			 */
+			xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0);
+		}
+
+		/* On create, provide kqueue notification */
+		post_event_if_success(dvp, _err, NOTE_WRITE);
+	}
+
+	lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, did_create);
+#if 0 /* FSEvents... */
+	if (*vpp && _err && _err != EKEEPLOOKING) {
+		vnode_put(*vpp);
+		*vpp = NULLVP;
+	}
+#endif /* 0 */
+
+	return (_err);
+
+}
+
+#if 0
 struct vnop_create_args {
 	struct vnodeop_desc *a_desc;
 	vnode_t a_dvp;
@@ -3094,34 +3326,34 @@ struct vnop_open_args {
 };
 #endif /* 0*/
 errno_t 
-VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx)
+VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) 
 {
 	int _err;
 	struct vnop_open_args a;
 #ifndef __LP64__
 	int thread_safe;
-	int funnel_state = 0;
+	int funnel_state = 0; 
 #endif /* __LP64__ */
 
 	if (ctx == NULL) {
 		ctx = vfs_context_current();
-	}
+	}    
 	a.a_desc = &vnop_open_desc;
 	a.a_vp = vp;
 	a.a_mode = mode;
-	a.a_context = ctx;
+	a.a_context = ctx; 
 
 #ifndef __LP64__
 	thread_safe = THREAD_SAFE_FS(vp);
 	if (!thread_safe) {
 		funnel_state = thread_funnel_set(kernel_flock, TRUE);
 		if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
-		        if ( (_err = lock_fsnode(vp, NULL)) ) {
-			        (void) thread_funnel_set(kernel_flock, funnel_state);
-			        return (_err);
-			}
-		}
-	}
+			if ( (_err = lock_fsnode(vp, NULL)) ) {
+				(void) thread_funnel_set(kernel_flock, funnel_state);
+				return (_err);
+			}    
+		}    
+	}    
 #endif /* __LP64__ */
 
 	_err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a);
@@ -3130,9 +3362,9 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx)
 	if (!thread_safe) {
 		if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) {
 			unlock_fsnode(vp, NULL);
-		}
+		}    
 		(void) thread_funnel_set(kernel_flock, funnel_state);
-	}
+	}    
 #endif /* __LP64__ */
 
 	return (_err);
@@ -4012,6 +4244,49 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_
 	return (_err);
 }
 
+int
+VNOP_COMPOUND_REMOVE(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	int _err;
+	struct vnop_compound_remove_args a;
+	int no_vp = (*vpp == NULLVP);
+
+	a.a_desc = &vnop_compound_remove_desc;
+	a.a_dvp = dvp;
+	a.a_vpp = vpp;
+	a.a_cnp = &ndp->ni_cnd;
+	a.a_flags = flags;
+	a.a_vap = vap;
+	a.a_context = ctx;
+	a.a_remove_authorizer = vn_authorize_unlink;
+
+	_err = (*dvp->v_op[vnop_compound_remove_desc.vdesc_offset])(&a);
+	if (_err == 0) {
+	        vnode_setneedinactive(*vpp);
+
+		if ( !(NATIVE_XATTR(dvp)) ) {
+		        /* 
+			 * Remove any associated extended attribute file (._ AppleDouble file).
+			 */
+		        xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 1);
+		}
+	}
+
+	post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK);
+	post_event_if_success(dvp, _err, NOTE_WRITE);
+
+	if (no_vp) {
+		lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, 0);
+		if (*vpp && _err && _err != EKEEPLOOKING) {
+			vnode_put(*vpp);
+			*vpp = NULLVP;
+		}
+	}
+
+	//printf("VNOP_COMPOUND_REMOVE() returning %d\n", _err);
+
+	return (_err);
+}
 
 #if 0
 /*
@@ -4085,114 +4360,33 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct
 	return (_err);
 }
 
-
-#if 0
-/*
- *#
- *#% rename       fdvp    U U U
- *#% rename       fvp     U U U
- *#% rename       tdvp    L U U
- *#% rename       tvp     X U U
- *#
- */
-struct vnop_rename_args {
-	struct vnodeop_desc *a_desc;
-	vnode_t a_fdvp;
-	vnode_t a_fvp;
-	struct componentname *a_fcnp;
-	vnode_t a_tdvp;
-	vnode_t a_tvp;
-	struct componentname *a_tcnp;
-	vfs_context_t a_context;
-};
-#endif /* 0*/
 errno_t
-VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
-            struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
-            vfs_context_t ctx)
+vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, struct vnode_attr *fvap,
+            struct vnode *tdvp, struct vnode **tvpp, struct componentname *tcnp, struct vnode_attr *tvap,
+            uint32_t flags, vfs_context_t ctx)
 {
-	int _err = 0;
-	int events;
-	struct vnop_rename_args a;
-	char smallname1[48];
-	char smallname2[48];
-	char *xfromname = NULL;
-	char *xtoname = NULL;
-#ifndef __LP64__
-	int funnel_state = 0;
-	vnode_t	lock_first = NULL, lock_second = NULL;
-	vnode_t fdvp_unsafe = NULLVP;
-	vnode_t tdvp_unsafe = NULLVP;
-#endif /* __LP64__ */
+	int _err;
 	vnode_t src_attr_vp = NULLVP;
 	vnode_t dst_attr_vp = NULLVP;
 	struct nameidata fromnd;
 	struct nameidata tond;
+	char smallname1[48];
+	char smallname2[48];
+	char *xfromname = NULL;
+	char *xtoname = NULL;
+	int batched;
 
-	a.a_desc = &vnop_rename_desc;
-	a.a_fdvp = fdvp;
-	a.a_fvp = fvp;
-	a.a_fcnp = fcnp;
-	a.a_tdvp = tdvp;
-	a.a_tvp = tvp;
-	a.a_tcnp = tcnp;
-	a.a_context = ctx;
+	batched = vnode_compound_rename_available(fdvp);
 
 #ifndef __LP64__
-	if (!THREAD_SAFE_FS(fdvp))
-	        fdvp_unsafe = fdvp;
-	if (!THREAD_SAFE_FS(tdvp))
-	        tdvp_unsafe = tdvp;
+	vnode_t fdvp_unsafe = (THREAD_SAFE_FS(fdvp) ? NULLVP : fdvp);
+#endif /* __LP64__ */
 
-	if (fdvp_unsafe != NULLVP) {
-		/*
-		 * Lock parents in vnode address order to avoid deadlocks
-		 * note that it's possible for the fdvp to be unsafe,
-		 * but the tdvp to be safe because tvp could be a directory
-		 * in the root of a filesystem... in that case, tdvp is the
-		 * in the filesystem that this root is mounted on
-		 */
-		if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) {
-			lock_first  = fdvp_unsafe;
-			lock_second = NULL;
-		} else if (fdvp_unsafe < tdvp_unsafe) {
-			lock_first  = fdvp_unsafe;
-			lock_second = tdvp_unsafe;
-		} else {
-			lock_first  = tdvp_unsafe;
-			lock_second = fdvp_unsafe;
-		}
-		if ( (_err = lock_fsnode(lock_first, &funnel_state)) )
-			return (_err);
-
-		if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) {
-			unlock_fsnode(lock_first, &funnel_state);
-			return (_err);
-		}
-
-		/*
-		 * Lock both children in vnode address order to avoid deadlocks
-		 */
-		if (tvp == NULL || tvp == fvp) {
-			lock_first  = fvp;
-			lock_second = NULL;
-		} else if (fvp < tvp) {
-			lock_first  = fvp;
-			lock_second = tvp;
-		} else {
-			lock_first  = tvp;
-			lock_second = fvp;
-		}
-		if ( (_err = lock_fsnode(lock_first, NULL)) )
-			goto out1;
-
-		if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) {
-		        unlock_fsnode(lock_first, NULL);
-			goto out1;
-		}
+	if (!batched) {
+		if (*fvpp == NULLVP) 
+			panic("Not batched, and no fvp?");
 	}
-#endif /* __LP64__ */
-	
+
 	/* 
 	 * We need to preflight any potential AppleDouble file for the source file
 	 * before doing the rename operation, since we could potentially be doing
@@ -4235,8 +4429,8 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
 		 * is only for AppleDouble files.
 		 */
 		if (xfromname != NULL) {
-			NDINIT(&fromnd, RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE,
-					CAST_USER_ADDR_T(xfromname), ctx);
+			NDINIT(&fromnd, RENAME, OP_RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK,
+			       UIO_SYSSPACE, CAST_USER_ADDR_T(xfromname), ctx);
 			fromnd.ni_dvp = fdvp;
 			error = namei(&fromnd);
 		
@@ -4267,21 +4461,18 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
 		}
 	}
 
+	if (batched) {
+		_err = VNOP_COMPOUND_RENAME(fdvp, fvpp, fcnp, fvap, tdvp, tvpp, tcnp, tvap, flags, ctx);
+		if (_err != 0) {
+			printf("VNOP_COMPOUND_RENAME() returned %d\n", _err);
+		}
 
-	/* do the rename of the main file. */
-	_err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a);
-
-#ifndef  __LP64__
-	if (fdvp_unsafe != NULLVP) {
-	        if (lock_second != NULL)
-		        unlock_fsnode(lock_second, NULL);
-		unlock_fsnode(lock_first, NULL);
+	} else {
+		_err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx);
 	}
-#endif /* __LP64__ */
 
 	if (_err == 0) {
-		if (tvp && tvp != fvp)
-		        vnode_setneedinactive(tvp);
+		mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp);
 	}
 
 	/* 
@@ -4295,7 +4486,7 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
 		 * Note that tdvp already has an iocount reference. Make sure to check that we
 		 * get a valid vnode from namei.
 		 */
-		NDINIT(&tond, RENAME,
+		NDINIT(&tond, RENAME, OP_RENAME,
 		       NOCACHE | NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE,
 		       CAST_USER_ADDR_T(xtoname), ctx);
 		tond.ni_dvp = tdvp;
@@ -4309,81 +4500,15 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
 		}
 		
 		if (src_attr_vp) {
-			/* attempt to rename src -> dst */
-
-			a.a_desc = &vnop_rename_desc;
-			a.a_fdvp = fdvp;
-			a.a_fvp = src_attr_vp;
-			a.a_fcnp = &fromnd.ni_cnd;
-			a.a_tdvp = tdvp;
-			a.a_tvp = dst_attr_vp;
-			a.a_tcnp = &tond.ni_cnd;
-			a.a_context = ctx;
-
-#ifndef __LP64__
-			if (fdvp_unsafe != NULLVP) {
-				/*
-				 * Lock in vnode address order to avoid deadlocks
-				 */
-				if (dst_attr_vp == NULL || dst_attr_vp == src_attr_vp) {
-					lock_first  = src_attr_vp;
-					lock_second = NULL;
-				} else if (src_attr_vp < dst_attr_vp) {
-					lock_first  = src_attr_vp;
-					lock_second = dst_attr_vp;
-				} else {
-					lock_first  = dst_attr_vp;
-					lock_second = src_attr_vp;
-				}
-				if ( (error = lock_fsnode(lock_first, NULL)) == 0) {
-					if (lock_second != NULL && (error = lock_fsnode(lock_second, NULL)) )
-						unlock_fsnode(lock_first, NULL);
-				}
+			if (batched) {
+				error = VNOP_COMPOUND_RENAME(fdvp, &src_attr_vp, &fromnd.ni_cnd, NULL,
+						tdvp, &dst_attr_vp, &tond.ni_cnd, NULL,
+						0, ctx);
+			} else {
+				error = VNOP_RENAME(fdvp, src_attr_vp, &fromnd.ni_cnd, 
+						tdvp, dst_attr_vp, &tond.ni_cnd, ctx);
 			}
-#endif /* __LP64__ */
-			if (error == 0) {
-				const char *oname;
-				vnode_t oparent;
 
-				/* Save these off so we can later verify them (fix up below) */
-				oname   = src_attr_vp->v_name;
-				oparent = src_attr_vp->v_parent;
-
-				error = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a);
-
-#ifndef __LP64__
-				if (fdvp_unsafe != NULLVP) {
-					if (lock_second != NULL)
-						unlock_fsnode(lock_second, NULL);
-					unlock_fsnode(lock_first, NULL);
-				}
-#endif /* __LP64__ */
-
-				if (error == 0) {
-					vnode_setneedinactive(src_attr_vp);
-
-					if (dst_attr_vp && dst_attr_vp != src_attr_vp)
-						vnode_setneedinactive(dst_attr_vp);
-					/*
-					 * Fix up name & parent pointers on ._ file
-					 */
-					if (oname == src_attr_vp->v_name &&
-							oparent == src_attr_vp->v_parent) {
-						int update_flags;
-
-						update_flags = VNODE_UPDATE_NAME;
-
-						if (fdvp != tdvp)
-							update_flags |= VNODE_UPDATE_PARENT;
-
-						vnode_update_identity(src_attr_vp, tdvp,
-								tond.ni_cnd.cn_nameptr,
-								tond.ni_cnd.cn_namelen,
-								tond.ni_cnd.cn_hash,
-								update_flags);
-					}
-				}
-			}
 			/* kevent notifications for moving resource files 
 			 * _err is zero if we're here, so no need to notify directories, code
 			 * below will do that.  only need to post the rename on the source and
@@ -4449,6 +4574,125 @@ out:
 		FREE(xtoname, M_TEMP);
 	}
 
+	return _err;
+}
+
+
+#if 0
+/*
+ *#
+ *#% rename       fdvp    U U U
+ *#% rename       fvp     U U U
+ *#% rename       tdvp    L U U
+ *#% rename       tvp     X U U
+ *#
+ */
+struct vnop_rename_args {
+	struct vnodeop_desc *a_desc;
+	vnode_t a_fdvp;
+	vnode_t a_fvp;
+	struct componentname *a_fcnp;
+	vnode_t a_tdvp;
+	vnode_t a_tvp;
+	struct componentname *a_tcnp;
+	vfs_context_t a_context;
+};
+#endif /* 0*/
+errno_t
+VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp,
+            struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp,
+            vfs_context_t ctx)
+{
+	int _err = 0;
+	int events;
+	struct vnop_rename_args a;
+#ifndef __LP64__
+	int funnel_state = 0;
+	vnode_t	lock_first = NULL, lock_second = NULL;
+	vnode_t fdvp_unsafe = NULLVP;
+	vnode_t tdvp_unsafe = NULLVP;
+#endif /* __LP64__ */
+
+	a.a_desc = &vnop_rename_desc;
+	a.a_fdvp = fdvp;
+	a.a_fvp = fvp;
+	a.a_fcnp = fcnp;
+	a.a_tdvp = tdvp;
+	a.a_tvp = tvp;
+	a.a_tcnp = tcnp;
+	a.a_context = ctx;
+
+#ifndef __LP64__
+	if (!THREAD_SAFE_FS(fdvp))
+	        fdvp_unsafe = fdvp;
+	if (!THREAD_SAFE_FS(tdvp))
+	        tdvp_unsafe = tdvp;
+
+	if (fdvp_unsafe != NULLVP) {
+		/*
+		 * Lock parents in vnode address order to avoid deadlocks
+		 * note that it's possible for the fdvp to be unsafe,
+		 * but the tdvp to be safe because tvp could be a directory
+		 * in the root of a filesystem... in that case, tdvp is the
+		 * in the filesystem that this root is mounted on
+		 */
+		if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) {
+			lock_first  = fdvp_unsafe;
+			lock_second = NULL;
+		} else if (fdvp_unsafe < tdvp_unsafe) {
+			lock_first  = fdvp_unsafe;
+			lock_second = tdvp_unsafe;
+		} else {
+			lock_first  = tdvp_unsafe;
+			lock_second = fdvp_unsafe;
+		}
+		if ( (_err = lock_fsnode(lock_first, &funnel_state)) )
+			return (_err);
+
+		if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) {
+			unlock_fsnode(lock_first, &funnel_state);
+			return (_err);
+		}
+
+		/*
+		 * Lock both children in vnode address order to avoid deadlocks
+		 */
+		if (tvp == NULL || tvp == fvp) {
+			lock_first  = fvp;
+			lock_second = NULL;
+		} else if (fvp < tvp) {
+			lock_first  = fvp;
+			lock_second = tvp;
+		} else {
+			lock_first  = tvp;
+			lock_second = fvp;
+		}
+		if ( (_err = lock_fsnode(lock_first, NULL)) )
+			goto out1;
+
+		if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) {
+		        unlock_fsnode(lock_first, NULL);
+			goto out1;
+		}
+	}
+#endif /* __LP64__ */
+
+	/* do the rename of the main file. */
+	_err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a);
+
+#ifndef  __LP64__
+	if (fdvp_unsafe != NULLVP) {
+	        if (lock_second != NULL)
+		        unlock_fsnode(lock_second, NULL);
+		unlock_fsnode(lock_first, NULL);
+	}
+#endif /* __LP64__ */
+
+	if (_err == 0) {
+		if (tvp && tvp != fvp)
+		        vnode_setneedinactive(tvp);
+	}
+
 #ifndef __LP64__
 out1:
 	if (fdvp_unsafe != NULLVP) {
@@ -4488,6 +4732,112 @@ out1:
 	return (_err);
 }
 
+int
+VNOP_COMPOUND_RENAME( 
+		struct vnode *fdvp,  struct vnode **fvpp,  struct componentname *fcnp, struct vnode_attr *fvap,
+             	struct vnode *tdvp,  struct vnode **tvpp,  struct componentname *tcnp, struct vnode_attr *tvap,  
+	     	uint32_t flags, vfs_context_t ctx)
+{
+	int _err = 0;
+	int events;
+	struct vnop_compound_rename_args a;
+	int no_fvp, no_tvp;
+
+	no_fvp = (*fvpp) == NULLVP;
+	no_tvp = (*tvpp) == NULLVP;
+
+	a.a_desc = &vnop_compound_rename_desc;
+	
+	a.a_fdvp = fdvp;
+	a.a_fvpp = fvpp;
+	a.a_fcnp = fcnp;
+	a.a_fvap = fvap;
+
+	a.a_tdvp = tdvp;
+	a.a_tvpp = tvpp;
+	a.a_tcnp = tcnp;
+	a.a_tvap = tvap;
+	
+	a.a_flags = flags;
+	a.a_context = ctx;
+	a.a_rename_authorizer = vn_authorize_rename;
+	a.a_reserved = NULL;
+
+	/* do the rename of the main file. */
+	_err = (*fdvp->v_op[vnop_compound_rename_desc.vdesc_offset])(&a);
+
+	if (_err == 0) {
+		if (*tvpp && *tvpp != *fvpp)
+		        vnode_setneedinactive(*tvpp);
+	}
+
+	/* Wrote at least one directory.  If transplanted a dir, also changed link counts */
+	if (0 == _err && *fvpp != *tvpp) {
+		if (!*fvpp) {
+			panic("No fvpp after compound rename?");
+		}
+
+		events = NOTE_WRITE;
+		if (vnode_isdir(*fvpp)) {
+			/* Link count on dir changed only if we are moving a dir and...
+			 * 	--Moved to new dir, not overwriting there
+			 * 	--Kept in same dir and DID overwrite
+			 */
+			if (((fdvp != tdvp) && (!*tvpp)) || ((fdvp == tdvp) && (*tvpp))) {
+				events |= NOTE_LINK;
+			}
+		}
+
+		lock_vnode_and_post(fdvp, events);
+		if (fdvp != tdvp) {
+			lock_vnode_and_post(tdvp,  events);
+		}
+
+		/* If you're replacing the target, post a deletion for it */
+		if (*tvpp)
+		{
+			lock_vnode_and_post(*tvpp, NOTE_DELETE);
+		}
+
+		lock_vnode_and_post(*fvpp, NOTE_RENAME);
+	}
+
+	if (no_fvp) {
+		lookup_compound_vnop_post_hook(_err, fdvp, *fvpp, fcnp->cn_ndp, 0); 
+	}
+	if (no_tvp && *tvpp != NULLVP) {
+		lookup_compound_vnop_post_hook(_err, tdvp, *tvpp, tcnp->cn_ndp, 0);
+	}
+
+	if (_err && _err != EKEEPLOOKING) {
+		if (*fvpp) {
+			vnode_put(*fvpp);
+			*fvpp = NULLVP;
+		}
+		if (*tvpp) {
+			vnode_put(*tvpp);
+			*tvpp = NULLVP;
+		}
+	}
+
+	return (_err);
+}
+
+int
+vn_mkdir(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp,
+           struct vnode_attr *vap, vfs_context_t ctx)
+{
+	if (ndp->ni_cnd.cn_nameiop != CREATE) {
+		panic("Non-CREATE nameiop in vn_mkdir()?");
+	}
+
+	if (vnode_compound_mkdir_available(dvp)) {
+		return VNOP_COMPOUND_MKDIR(dvp, vpp, ndp, vap, ctx);
+	} else {
+		return VNOP_MKDIR(dvp, vpp, &ndp->ni_cnd, vap, ctx);
+	}
+}
+
  #if 0
 /*
  *#
@@ -4550,6 +4900,59 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
        return (_err);
 }
 
+int
+VNOP_COMPOUND_MKDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp,
+           struct vnode_attr *vap, vfs_context_t ctx)
+{
+       int _err;
+       struct vnop_compound_mkdir_args a;
+
+       a.a_desc = &vnop_compound_mkdir_desc;
+       a.a_dvp = dvp;
+       a.a_vpp = vpp;
+       a.a_cnp = &ndp->ni_cnd;
+       a.a_vap = vap;
+       a.a_flags = 0;
+       a.a_context = ctx;
+#if 0
+       a.a_mkdir_authorizer = vn_authorize_mkdir;
+#endif /* 0 */
+       a.a_reserved = NULL;
+
+       _err = (*dvp->v_op[vnop_compound_mkdir_desc.vdesc_offset])(&a);
+	if (_err == 0 && !NATIVE_XATTR(dvp)) {
+		/* 
+		 * Remove stale Apple Double file (if any).
+		 */
+		xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0);
+	}
+
+	post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE);
+
+	lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, (_err == 0));
+	if (*vpp && _err && _err != EKEEPLOOKING) {
+		vnode_put(*vpp);
+		*vpp = NULLVP;
+	}
+
+	return (_err);
+}
+
+int
+vn_rmdir(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	if (vnode_compound_rmdir_available(dvp)) {
+		return VNOP_COMPOUND_RMDIR(dvp, vpp, ndp, vap, ctx);
+	} else {
+		if (*vpp == NULLVP) {
+			panic("NULL vp, but not a compound VNOP?");
+		}
+		if (vap != NULL) {
+			panic("Non-NULL vap, but not a compound VNOP?");
+		}
+		return VNOP_RMDIR(dvp, *vpp, &ndp->ni_cnd, ctx);
+	}
+}
 
 #if 0
 /*
@@ -4618,6 +5021,53 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c
 	return (_err);
 }
 
+int
+VNOP_COMPOUND_RMDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp,
+           struct vnode_attr *vap, vfs_context_t ctx)
+{
+       int _err;
+       struct vnop_compound_rmdir_args a;
+       int no_vp;
+
+       a.a_desc = &vnop_mkdir_desc;
+       a.a_dvp = dvp;
+       a.a_vpp = vpp;
+       a.a_cnp = &ndp->ni_cnd;
+       a.a_vap = vap;
+       a.a_flags = 0;
+       a.a_context = ctx;
+       a.a_rmdir_authorizer = vn_authorize_rmdir;
+       a.a_reserved = NULL;
+
+       no_vp = (*vpp == NULLVP);
+
+       _err = (*dvp->v_op[vnop_compound_rmdir_desc.vdesc_offset])(&a);
+	if (_err == 0 && !NATIVE_XATTR(dvp)) {
+		/* 
+		 * Remove stale Apple Double file (if any).
+		 */
+		xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0);
+	}
+
+	if (*vpp) {
+		post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK);
+	}
+	post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE);
+
+       if (no_vp) {
+	       lookup_compound_vnop_post_hook(_err, dvp, *vpp, ndp, 0);
+
+#if 0 /* Removing orphaned ._ files requires a vp.... */
+	       if (*vpp && _err && _err != EKEEPLOOKING) {
+		       vnode_put(*vpp);
+		       *vpp = NULLVP;
+	       }
+#endif  /* 0 */
+       }
+
+       return (_err);
+}
+
 /*
  * Remove a ._ AppleDouble file
  */
@@ -4642,7 +5092,7 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int forc
 		MALLOC(filename, char *, len, M_TEMP, M_WAITOK);
 		len = snprintf(filename, len, "._%s", basename);
 	}
-	NDINIT(&nd, DELETE, WANTPARENT | LOCKLEAF | NOFOLLOW | USEDVP, UIO_SYSSPACE,
+	NDINIT(&nd, DELETE, OP_UNLINK, WANTPARENT | LOCKLEAF | NOFOLLOW | USEDVP, UIO_SYSSPACE,
 	       CAST_USER_ADDR_T(filename), ctx);
 	nd.ni_dvp = dvp;
 	if (namei(&nd) != 0)
@@ -4678,32 +5128,9 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int forc
 		}
 	}
 	if (force) {
-		struct vnop_remove_args a;
 		int  error;
-#ifndef __LP64__
-		int thread_safe = THREAD_SAFE_FS(dvp);
-#endif /* __LP64__ */
 	
-		a.a_desc    = &vnop_remove_desc;
-		a.a_dvp     = nd.ni_dvp;
-		a.a_vp      = xvp;
-		a.a_cnp     = &nd.ni_cnd;
-		a.a_context = ctx;
-
-#ifndef __LP64__
-		if (!thread_safe) {
-			if ( (lock_fsnode(xvp, NULL)) )
-				goto out1;
-		}
-#endif /* __LP64__ */
-
-		error = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a);
-
-#ifndef __LP64__
-		if (!thread_safe)
-			unlock_fsnode(xvp, NULL);
-#endif /* __LP64__ */
-
+		error = VNOP_REMOVE(dvp, xvp, &nd.ni_cnd, 0, ctx);
 		if (error == 0)
 			vnode_setneedinactive(xvp);
 
@@ -4745,7 +5172,7 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap,
 		MALLOC(filename, char *, len, M_TEMP, M_WAITOK);
 		len = snprintf(filename, len, "._%s", basename);
 	}
-	NDINIT(&nd, LOOKUP, NOFOLLOW | USEDVP, UIO_SYSSPACE,
+	NDINIT(&nd, LOOKUP, OP_SETATTR, NOFOLLOW | USEDVP, UIO_SYSSPACE,
 	       CAST_USER_ADDR_T(filename), ctx);
 	nd.ni_dvp = dvp;
 	if (namei(&nd) != 0)
@@ -4847,7 +5274,6 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 		xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0);
 	}
 
-
 #ifndef __LP64__
 	if (!thread_safe) {
 		unlock_fsnode(dvp, &funnel_state);
@@ -5454,6 +5880,16 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset,
 	return (_err);
 }
 
+int
+vn_remove(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t flags, struct vnode_attr *vap, vfs_context_t ctx)
+{
+	if (vnode_compound_remove_available(dvp)) {
+		return VNOP_COMPOUND_REMOVE(dvp, vpp, ndp, flags, vap, ctx);
+	} else {
+		return VNOP_REMOVE(dvp, *vpp, &ndp->ni_cnd, flags, ctx);
+	}
+}
+
 
 #if 0
 /*
diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c
index b94375efd..091ee16ab 100644
--- a/bsd/vfs/vfs_attrlist.c
+++ b/bsd/vfs/vfs_attrlist.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -395,6 +395,7 @@ static struct getattrlist_attrtab getattrlist_common_tab[] = {
 	{ATTR_CMN_FILEID,	VATTR_BIT(va_fileid), 		sizeof(uint64_t),		KAUTH_VNODE_READ_ATTRIBUTES},
 	{ATTR_CMN_PARENTID,	VATTR_BIT(va_parentid),		sizeof(uint64_t),		KAUTH_VNODE_READ_ATTRIBUTES},
 	{ATTR_CMN_FULLPATH, 0, 	sizeof(struct attrreference),	KAUTH_VNODE_READ_ATTRIBUTES	},
+    {ATTR_CMN_ADDEDTIME, VATTR_BIT(va_addedtime), ATTR_TIME_SIZE,	KAUTH_VNODE_READ_ATTRIBUTES}, 
 	{ATTR_CMN_RETURNED_ATTRS, 0,				sizeof(attribute_set_t),	0},
 	{0, 0, 0, 0}
 };
@@ -523,6 +524,27 @@ getattrlist_fixupattrs(attribute_set_t *asp, struct vnode_attr *vap)
 	if (asp->commonattr) {
 		tab = getattrlist_common_tab;
 		do {
+            /* 
+			 * This if() statement is slightly confusing. We're trying to
+			 * iterate through all of the bits listed in the array 
+			 * getattr_common_tab, and see if the filesystem was expected
+			 * to support it, and whether or not we need to do anything about this.
+			 * 
+			 * This array is full of structs that have 4 fields (attr, bits, size, action).
+			 * The first is used to store the ATTR_CMN_* bit that was being requested 
+			 * from userland.  The second stores the VATTR_BIT corresponding to the field
+			 * filled in vnode_attr struct.  If it is 0, then we don't typically expect
+			 * the filesystem to fill in this field.  The third is the size of the field,
+			 * and the fourth is the type of kauth actions needed.
+			 *
+			 * So, for all of the ATTR_CMN bits listed in this array, we iterate through 
+			 * them, and check to see if it was both passed down to the filesystem via the
+			 * va_active bitfield, and whether or not we expect it to be emitted from
+			 * the filesystem.  If it wasn't supported, then we un-twiddle the bit and move
+			 * on.  This is done so that we can uncheck those bits and re-request
+			 * a vnode_getattr from the filesystem again.
+			 */
+
 			if ((tab->attr & asp->commonattr) &&
 			    (tab->bits & vap->va_active) &&
 			    (tab->bits & vap->va_supported) == 0) {
@@ -1108,6 +1130,7 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp,
 	}
 	if (alp->volattr & ATTR_VOL_UUID) {
 		ATTR_PACK(&ab, vs.f_uuid);
+		ab.actual.volattr |= ATTR_VOL_UUID;
 	}
 	if (alp->volattr & ATTR_VOL_ATTRIBUTES) {
 		/* fix up volume attribute information */
@@ -1188,6 +1211,7 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 	int		return_valid;
 	int		pack_invalid;
 	int		vtype = 0;
+	uint32_t	perms = 0;
 
 	proc_is64 = proc_is64bit(p);
 	VATTR_INIT(&va);
@@ -1604,6 +1628,30 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 		ATTR_PACK_TIME(ab, va.va_backup_time, proc_is64);
 		ab.actual.commonattr |= ATTR_CMN_BKUPTIME;
 	}
+	/*
+	 * They are requesting user access, we should obtain this before getting 
+	 * the finder info. For some network file systems this is a performance
+	 * improvement.
+	 */
+	if (al.commonattr & ATTR_CMN_USERACCESS) {	/* this is expensive */
+		if (vtype == VDIR) {
+			if (vnode_authorize(vp, NULL,
+								KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, ctx) == 0)
+				perms |= W_OK;
+			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY, ctx) == 0)
+				perms |= R_OK;
+			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH, ctx) == 0)
+				perms |= X_OK;
+		} else {
+			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA, ctx) == 0)
+				perms |= W_OK;
+			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, ctx) == 0)
+				perms |= R_OK;
+			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, ctx) == 0)
+				perms |= X_OK;
+		}
+	}
+	
 	if (al.commonattr & ATTR_CMN_FNDRINFO) {
 		uio_t	auio;
 		size_t	fisize = 32;
@@ -1654,25 +1702,8 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 		ATTR_PACK4(ab, va.va_flags);
 		ab.actual.commonattr |= ATTR_CMN_FLAGS;
 	}
-	if (al.commonattr & ATTR_CMN_USERACCESS) {	/* this is expensive */
-		uint32_t	perms = 0;
-		if (vtype == VDIR) {
-			if (vnode_authorize(vp, NULL,
-				KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, ctx) == 0)
-				perms |= W_OK;
-			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY, ctx) == 0)
-				perms |= R_OK;
-			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH, ctx) == 0)
-				perms |= X_OK;
-		} else {
-			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA, ctx) == 0)
-				perms |= W_OK;
-			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, ctx) == 0)
-				perms |= R_OK;
-			if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, ctx) == 0)
-				perms |= X_OK;
-		}
-
+	/* We already obtain the user access, so just fill in the buffer here */
+	if (al.commonattr & ATTR_CMN_USERACCESS) {
 #if CONFIG_MACF
 		/* 
 		 * Rather than MAC preceding DAC, in this case we want
@@ -1737,6 +1768,12 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 		attrlist_pack_string (&ab, fullpathptr, fullpathlen);
 		ab.actual.commonattr |= ATTR_CMN_FULLPATH;
 	}
+    
+    if (al.commonattr & ATTR_CMN_ADDEDTIME) {
+		ATTR_PACK_TIME(ab, va.va_addedtime, proc_is64);
+		ab.actual.commonattr |= ATTR_CMN_ADDEDTIME;
+	}
+
 
 	/* directory attributes *********************************************/
 	if (al.dirattr && (vtype == VDIR)) {
@@ -1749,24 +1786,100 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 			ab.actual.dirattr |= ATTR_DIR_ENTRYCOUNT;
 		}
 		if (al.dirattr & ATTR_DIR_MOUNTSTATUS) {
-			ATTR_PACK_CAST(&ab, uint32_t, (vp->v_flag & VROOT) ?
-			               DIR_MNTSTATUS_MNTPOINT : 0);
+			uint32_t mntstat;
+
+			mntstat = (vp->v_flag & VROOT) ? DIR_MNTSTATUS_MNTPOINT : 0;
+#if CONFIG_TRIGGERS
+			/*
+			 * Report back on active vnode triggers
+			 * that can directly trigger a mount
+			 */
+			if (vp->v_resolve &&
+			    !(vp->v_resolve->vr_flags & VNT_NO_DIRECT_MOUNT)) {
+				mntstat |= DIR_MNTSTATUS_TRIGGER;
+			}
+#endif
+			ATTR_PACK4(ab, mntstat);
 			ab.actual.dirattr |= ATTR_DIR_MOUNTSTATUS;
 		}
 	}
 
 	/* file attributes **************************************************/
 	if (al.fileattr && (vtype != VDIR)) {
+
+		size_t	rsize = 0;
+		uint64_t rlength = 0;
+		uint64_t ralloc = 0;
+		/* 
+		 * Pre-fetch the rsrc attributes now so we only get them once.
+		 * Fetch the resource fork size/allocation via xattr interface 
+		 */
+		if (al.fileattr & (ATTR_FILE_TOTALSIZE | ATTR_FILE_ALLOCSIZE | ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)) {
+			if ((error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, &rsize, XATTR_NOSECURITY, ctx)) != 0) {
+				if ((error == ENOENT) || (error == ENOATTR) || (error == ENOTSUP) || (error == EPERM)|| (error == EACCES)) {
+					rsize = 0;
+					error = 0;
+				} else {
+					goto out;
+				}
+			}
+			rlength = rsize;
+
+			if (al.fileattr & (ATTR_FILE_RSRCALLOCSIZE | ATTR_FILE_ALLOCSIZE)) {
+				uint32_t  blksize = vp->v_mount->mnt_vfsstat.f_bsize;
+				if (blksize == 0) {
+					blksize = 512;
+				}
+				ralloc = roundup(rsize, blksize);
+			}
+		}
+
 		if (al.fileattr & ATTR_FILE_LINKCOUNT) {
 			ATTR_PACK4(ab, (uint32_t)va.va_nlink);
 			ab.actual.fileattr |= ATTR_FILE_LINKCOUNT;
 		}
+		/*
+		 * Note the following caveats for the TOTALSIZE and ALLOCSIZE attributes: 
+		 * We infer that if the filesystem does not support va_data_size or va_data_alloc
+		 * it must not know about alternate forks.  So when we need to gather
+		 * the total size or total alloc, it's OK to substitute the total size for 
+		 * the data size below.  This is because it is likely a flat filesystem and we must
+		 * be using AD files to store the rsrc fork and EAs.  
+		 * 
+		 * Additionally, note that getattrlist is barred from being called on 
+		 * resource fork paths. (Search for CN_ALLOWRSRCFORK).  So if the filesystem does 
+		 * support va_data_size, it is guaranteed to represent the data fork's size.  This 
+		 * is an important distinction to make because when we call vnode_getattr on 
+		 * an HFS resource fork vnode, to get the size, it will vend out the resource 
+		 * fork's size (it only gets the size of the passed-in vnode).  
+		 */
 		if (al.fileattr & ATTR_FILE_TOTALSIZE) {
-			ATTR_PACK8(ab, va.va_total_size);
+			uint64_t totalsize = rlength;			
+
+			if (VATTR_IS_SUPPORTED(&va, va_data_size)) {
+				totalsize += va.va_data_size;
+			} else {
+				totalsize += va.va_total_size;
+			}
+
+			ATTR_PACK8(ab, totalsize);
 			ab.actual.fileattr |= ATTR_FILE_TOTALSIZE;
 		}
 		if (al.fileattr & ATTR_FILE_ALLOCSIZE) {
-			ATTR_PACK8(ab, va.va_total_alloc);
+			uint64_t totalalloc = ralloc;
+		
+			/* 
+			 * If data_alloc is supported, then it must represent the 
+			 * data fork size.
+			 */
+			if (VATTR_IS_SUPPORTED(&va, va_data_alloc)) {
+				totalalloc += va.va_data_alloc;
+			}
+			else {
+				totalalloc += va.va_total_alloc;
+			}
+	
+			ATTR_PACK8(ab, totalalloc);
 			ab.actual.fileattr |= ATTR_FILE_ALLOCSIZE;
 		}
 		if (al.fileattr & ATTR_FILE_IOBLOCKSIZE) {
@@ -1793,6 +1906,12 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 			ATTR_PACK4(ab, dev);
 			ab.actual.fileattr |= ATTR_FILE_DEVTYPE;
 		}
+		
+		/* 
+		 * If the filesystem does not support datalength
+		 * or dataallocsize, then we infer that totalsize and 
+		 * totalalloc are substitutes.
+		 */
 		if (al.fileattr & ATTR_FILE_DATALENGTH) {
 			if (VATTR_IS_SUPPORTED(&va, va_data_size)) {
 				ATTR_PACK8(ab, va.va_data_size);
@@ -1809,37 +1928,17 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con
 			}
 			ab.actual.fileattr |= ATTR_FILE_DATAALLOCSIZE;
 		}
-		/* fetch resource fork size/allocation via xattr interface */
-		if (al.fileattr & (ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)) {
-			size_t	rsize;
-			uint64_t rlength;
-
-			if ((error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, &rsize, XATTR_NOSECURITY, ctx)) != 0) {
-				if ((error == ENOENT) || (error == ENOATTR) || (error == ENOTSUP) || (error == EPERM)) {
-					rsize = 0;
-					error = 0;
-				} else {
-					goto out;
-				}
-			}
-			if (al.fileattr & ATTR_FILE_RSRCLENGTH) {
-				rlength = rsize;
-				ATTR_PACK8(ab, rlength);
-				ab.actual.fileattr |= ATTR_FILE_RSRCLENGTH;
-			}
-			if (al.fileattr & ATTR_FILE_RSRCALLOCSIZE) {
-				uint32_t  blksize = vp->v_mount->mnt_vfsstat.f_bsize;
-				if (blksize == 0)
-					blksize = 512;
-				rlength = roundup(rsize, blksize);
-				ATTR_PACK8(ab, rlength);
-				ab.actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE;
-			}
+		/* already got the resource fork size/allocation above */
+		if (al.fileattr & ATTR_FILE_RSRCLENGTH) {
+			ATTR_PACK8(ab, rlength);
+			ab.actual.fileattr |= ATTR_FILE_RSRCLENGTH;
 		}
-		if (al.fileattr & ATTR_FILE_PROTECTION_CLASS) {
+		if (al.fileattr & ATTR_FILE_RSRCALLOCSIZE) {
+			ATTR_PACK8(ab, ralloc);
+			ab.actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE;
 		}
 	}
-	
+
 	/* diagnostic */
 	if (!return_valid && (ab.fixedcursor - ab.base) != fixedsize)
 		panic("packed field size mismatch; allocated %ld but packed %ld for common %08x vol %08x",
@@ -1938,7 +2037,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused int32_t *retval)
 	nameiflags = NOTRIGGER | AUDITVNPATH1;
 	if (!(uap->options & FSOPT_NOFOLLOW))
 		nameiflags |= FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_GETATTR, nameiflags, UIO_USERSPACE, uap->path, ctx);
 
 	if ((error = namei(&nd)) != 0)
 		goto out;
@@ -2198,8 +2297,6 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con
 		VFS_DEBUG(ctx, vp, "ATTRLIST - XXX device type change not implemented");
 		goto out;
 	}
-	if (al.fileattr & ATTR_FILE_PROTECTION_CLASS) {
-	}
 
 	/*
 	 * Validate and authorize.
@@ -2325,10 +2422,10 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused int32_t *retval)
 	/*
 	 * Look up the file.
 	 */
-	nameiflags = 0;
+	nameiflags = AUDITVNPATH1;
 	if ((uap->options & FSOPT_NOFOLLOW) == 0)
 		nameiflags |= FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_SETATTR, nameiflags, UIO_USERSPACE, uap->path, ctx);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c
index 69b9e8520..0d474ed28 100644
--- a/bsd/vfs/vfs_bio.c
+++ b/bsd/vfs/vfs_bio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -110,12 +110,13 @@
 
 #include <sys/sdt.h>
 
+
 #if BALANCE_QUEUES
 static __inline__ void bufqinc(int q);
 static __inline__ void bufqdec(int q);
 #endif
 
-static int	bcleanbuf(buf_t bp, boolean_t discard);
+int	bcleanbuf(buf_t bp, boolean_t discard);
 static int	brecover_data(buf_t bp);
 static boolean_t incore(vnode_t vp, daddr64_t blkno);
 /* timeout is in msecs */
@@ -125,7 +126,13 @@ static void	buf_reassign(buf_t bp, vnode_t newvp);
 static errno_t	buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo);
 static int	buf_iterprepare(vnode_t vp, struct buflists *, int flags);
 static void	buf_itercomplete(vnode_t vp, struct buflists *, int flags);
-boolean_t buffer_cache_gc(int);
+static boolean_t buffer_cache_gc(int);
+static buf_t	buf_brelse_shadow(buf_t bp);
+static void	buf_free_meta_store(buf_t bp);
+
+static buf_t	buf_create_shadow_internal(buf_t bp, boolean_t force_copy,
+					   uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg, int priv);
+
 
 __private_extern__ int  bdwrite_internal(buf_t, int);
 
@@ -156,6 +163,7 @@ long nbdwrite = 0;
 int blaundrycnt = 0;
 static int boot_nbuf_headers = 0;
 
+static TAILQ_HEAD(delayqueue, buf) delaybufqueue;
 
 static TAILQ_HEAD(ioqueue, buf) iobufqueue;
 static TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
@@ -231,7 +239,7 @@ int lru_is_stale = LRU_IS_STALE;
 int age_is_stale = AGE_IS_STALE;
 int meta_is_stale = META_IS_STALE;
 
-
+#define MAXLAUNDRY	10
 
 /* LIST_INSERT_HEAD() with assertions */
 static __inline__ void
@@ -278,7 +286,28 @@ bremhash(buf_t	bp)
 	*bp->b_hash.le_prev = (bp)->b_hash.le_next;
 }
 
+/*
+ * buf_mtxp held.
+ */
+static __inline__ void
+bmovelaundry(buf_t bp)
+{
+	bp->b_whichq = BQ_LAUNDRY;
+	bp->b_timestamp = buf_timestamp();
+	binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY);
+	blaundrycnt++;
+}
 
+static __inline__ void
+buf_release_credentials(buf_t bp)
+{
+	if (IS_VALID_CRED(bp->b_rcred)) {
+		kauth_cred_unref(&bp->b_rcred);
+	}
+	if (IS_VALID_CRED(bp->b_wcred)) {
+		kauth_cred_unref(&bp->b_wcred);
+	}
+}
 
 
 int
@@ -315,6 +344,17 @@ buf_markdelayed(buf_t bp) {
         SET(bp->b_flags, B_DONE);
 }
 
+void
+buf_markclean(buf_t bp) {
+
+	if (ISSET(bp->b_flags, B_DELWRI)) {
+		CLR(bp->b_flags, B_DELWRI);
+
+		OSAddAtomicLong(-1, &nbdwrite);
+		buf_reassign(bp, bp->b_vp);
+	}
+}
+
 void
 buf_markeintr(buf_t bp) {
   
@@ -571,15 +611,179 @@ buf_clone(buf_t bp, int io_offset, int io_size, void (*iodone)(buf_t, void *), v
 }
 
 
+int
+buf_shadow(buf_t bp)
+{
+	if (bp->b_lflags & BL_SHADOW)
+		return 1;
+	return 0;
+}
+
+
+buf_t
+buf_create_shadow_priv(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg)
+{
+	return (buf_create_shadow_internal(bp, force_copy, external_storage, iodone, arg, 1));
+}
+
+buf_t
+buf_create_shadow(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg)
+{
+	return (buf_create_shadow_internal(bp, force_copy, external_storage, iodone, arg, 0));
+}
+
+
+static buf_t
+buf_create_shadow_internal(buf_t bp, boolean_t force_copy, uintptr_t external_storage, void (*iodone)(buf_t, void *), void *arg, int priv)
+{
+        buf_t	io_bp;
+
+	KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_START, bp, 0, 0, 0, 0);
+
+	if ( !(bp->b_flags & B_META) || (bp->b_lflags & BL_IOBUF)) {
+
+		KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_END, bp, 0, 0, 0, 0);
+		return (NULL);
+	}
+#ifdef BUF_MAKE_PRIVATE
+	if (bp->b_shadow_ref && bp->b_data_ref == 0 && external_storage == 0)
+		panic("buf_create_shadow: %p is in the private state (%d, %d)", bp, bp->b_shadow_ref, bp->b_data_ref);
+#endif
+	io_bp = alloc_io_buf(bp->b_vp, priv);
+
+	io_bp->b_flags = bp->b_flags & (B_META | B_ZALLOC | B_ASYNC | B_READ | B_FUA);
+	io_bp->b_blkno = bp->b_blkno;
+	io_bp->b_lblkno = bp->b_lblkno;
+
+	if (iodone) {
+	        io_bp->b_transaction = arg;
+		io_bp->b_iodone = iodone;
+		io_bp->b_flags |= B_CALL;
+	}
+	if (force_copy == FALSE) {
+		io_bp->b_bcount = bp->b_bcount;
+		io_bp->b_bufsize = bp->b_bufsize;
+
+		if (external_storage) {
+			io_bp->b_datap = external_storage;
+#ifdef BUF_MAKE_PRIVATE
+			io_bp->b_data_store = NULL;
+#endif
+		} else {
+			io_bp->b_datap = bp->b_datap;
+#ifdef BUF_MAKE_PRIVATE
+			io_bp->b_data_store = bp;
+#endif
+		}
+		*(buf_t *)(&io_bp->b_orig) = bp;
+
+		lck_mtx_lock_spin(buf_mtxp);
+
+		io_bp->b_lflags |= BL_SHADOW;
+		io_bp->b_shadow = bp->b_shadow;
+		bp->b_shadow = io_bp;
+		bp->b_shadow_ref++;
+
+#ifdef BUF_MAKE_PRIVATE
+		if (external_storage)
+			io_bp->b_lflags |= BL_EXTERNAL;
+		else
+			bp->b_data_ref++;
+#endif
+		lck_mtx_unlock(buf_mtxp);
+	} else {
+		if (external_storage) {
+#ifdef BUF_MAKE_PRIVATE
+			io_bp->b_lflags |= BL_EXTERNAL;
+#endif
+			io_bp->b_bcount = bp->b_bcount;
+			io_bp->b_bufsize = bp->b_bufsize;
+			io_bp->b_datap = external_storage;
+		} else {
+			allocbuf(io_bp, bp->b_bcount);
+
+			io_bp->b_lflags |= BL_IOBUF_ALLOC;
+		}
+		bcopy((caddr_t)bp->b_datap, (caddr_t)io_bp->b_datap, bp->b_bcount);
+
+#ifdef BUF_MAKE_PRIVATE
+		io_bp->b_data_store = NULL;
+#endif
+	}
+	KERNEL_DEBUG(0xbbbbc000 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, io_bp, 0);
+
+	return (io_bp);
+}
+
+
+#ifdef BUF_MAKE_PRIVATE
+errno_t
+buf_make_private(buf_t bp)
+{
+	buf_t	ds_bp;
+	buf_t	t_bp;
+	struct buf my_buf;
+
+	KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_START, bp, bp->b_shadow_ref, 0, 0, 0);
+
+	if (bp->b_shadow_ref == 0 || bp->b_data_ref == 0 || ISSET(bp->b_lflags, BL_SHADOW)) {
+
+		KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, EINVAL, 0);
+		return (EINVAL);
+	}
+	my_buf.b_flags = B_META;
+	my_buf.b_datap = (uintptr_t)NULL;
+	allocbuf(&my_buf, bp->b_bcount);
+
+	bcopy((caddr_t)bp->b_datap, (caddr_t)my_buf.b_datap, bp->b_bcount);
+
+	lck_mtx_lock_spin(buf_mtxp);
+
+	for (t_bp = bp->b_shadow; t_bp; t_bp = t_bp->b_shadow) {
+		if ( !ISSET(bp->b_lflags, BL_EXTERNAL))
+			break;
+	}
+	ds_bp = t_bp;
+
+	if (ds_bp == NULL && bp->b_data_ref)
+		panic("buf_make_private: b_data_ref != 0 && ds_bp == NULL");
+
+	if (ds_bp && (bp->b_data_ref == 0 || bp->b_shadow_ref == 0))
+		panic("buf_make_private: ref_count == 0 && ds_bp != NULL");
+
+	if (ds_bp == NULL) {
+		lck_mtx_unlock(buf_mtxp);
+
+		buf_free_meta_store(&my_buf);
+
+		KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, EINVAL, 0);
+		return (EINVAL);
+	}
+	for (t_bp = bp->b_shadow; t_bp; t_bp = t_bp->b_shadow) {
+		if ( !ISSET(t_bp->b_lflags, BL_EXTERNAL))
+			t_bp->b_data_store = ds_bp;
+	}
+	ds_bp->b_data_ref = bp->b_data_ref;
+
+	bp->b_data_ref = 0;
+	bp->b_datap = my_buf.b_datap;
+
+	lck_mtx_unlock(buf_mtxp);
+
+	KERNEL_DEBUG(0xbbbbc004 | DBG_FUNC_END, bp, bp->b_shadow_ref, 0, 0, 0);
+	return (0);
+}
+#endif
+
 
 void
 buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction,
-	      void **old_iodone, void **old_transaction)
+			  void (**old_iodone)(buf_t, void *), void **old_transaction)
 {
-        if (old_iodone)
-	        *old_iodone = (void *)(bp->b_iodone);
+	if (old_iodone)
+		*old_iodone = bp->b_iodone;
 	if (old_transaction)
-	        *old_transaction = (void *)(bp->b_transaction);
+		*old_transaction = bp->b_transaction;
 
 	bp->b_transaction = transaction;
 	bp->b_iodone = filter;
@@ -884,6 +1088,13 @@ buf_strategy(vnode_t devvp, void *ap)
 	vnode_t	vp = bp->b_vp;
 	int	bmap_flags;
         errno_t error;
+#if CONFIG_DTRACE
+	int dtrace_io_start_flag = 0;	 /* We only want to trip the io:::start
+					  * probe once, with the true phisical
+					  * block in place (b_blkno)
+					  */
+
+#endif	
 
 	if (vp == NULL || vp->v_type == VCHR || vp->v_type == VBLK)
 	        panic("buf_strategy: b_vp == NULL || vtype == VCHR | VBLK\n");
@@ -893,7 +1104,6 @@ buf_strategy(vnode_t devvp, void *ap)
 	 * end up issuing the I/O...
 	 */
 	bp->b_dev = devvp->v_rdev;
-	DTRACE_IO1(start, buf_t, bp);
 
 	if (bp->b_flags & B_READ)
 	        bmap_flags = VNODE_READ;
@@ -909,6 +1119,7 @@ buf_strategy(vnode_t devvp, void *ap)
 			 * to deal with filesystem block sizes
 			 * that aren't equal to the page size
 			 */
+			DTRACE_IO1(start, buf_t, bp);
 		        return (cluster_bp(bp));
 		}
 		if (bp->b_blkno == bp->b_lblkno) {
@@ -916,30 +1127,53 @@ buf_strategy(vnode_t devvp, void *ap)
 			size_t 	contig_bytes;
 		  
 			if ((error = VNOP_BLKTOOFF(vp, bp->b_lblkno, &f_offset))) {
+				DTRACE_IO1(start, buf_t, bp);
 			        buf_seterror(bp, error);
 				buf_biodone(bp);
 
 			        return (error);
 			}
 			if ((error = VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL))) {
+				DTRACE_IO1(start, buf_t, bp);
 			        buf_seterror(bp, error);
 				buf_biodone(bp);
 
 			        return (error);
 			}
+			
+			DTRACE_IO1(start, buf_t, bp);
+#if CONFIG_DTRACE
+			dtrace_io_start_flag = 1;
+#endif /* CONFIG_DTRACE */			
+			
 			if ((bp->b_blkno == -1) || (contig_bytes == 0)) {
 				/* Set block number to force biodone later */
 				bp->b_blkno = -1;
 			        buf_clear(bp);
 			}
-			else if ((long)contig_bytes < bp->b_bcount)
+			else if ((long)contig_bytes < bp->b_bcount) {
 			        return (buf_strategy_fragmented(devvp, bp, f_offset, contig_bytes));
+			}
 		}
+		
+#if CONFIG_DTRACE
+		if (dtrace_io_start_flag == 0) {
+			DTRACE_IO1(start, buf_t, bp);
+			dtrace_io_start_flag = 1;
+		}
+#endif /* CONFIG_DTRACE */
+		
 		if (bp->b_blkno == -1) {
 		        buf_biodone(bp);
 			return (0);
 		}
 	}
+
+#if CONFIG_DTRACE
+	if (dtrace_io_start_flag == 0)
+		DTRACE_IO1(start, buf_t, bp);
+#endif /* CONFIG_DTRACE */
+	
 	/*
 	 * we can issue the I/O because...
 	 * either B_CLUSTER is set which
@@ -1067,6 +1301,7 @@ int
 buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo)
 {
 	buf_t	bp;
+	int	aflags;
 	int	error = 0;
 	int	must_rescan = 1;
 	struct	buflists local_iterblkhd;
@@ -1097,6 +1332,7 @@ buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo)
 		        goto try_dirty_list;
 		}
 		while (!LIST_EMPTY(&local_iterblkhd)) {
+
 			bp = LIST_FIRST(&local_iterblkhd);
 
 			LIST_REMOVE(bp, b_vnbufs);
@@ -1108,7 +1344,12 @@ buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo)
 			if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META)))
 				continue;
 
-			if ( (error = (int)buf_acquire_locked(bp, BAC_REMOVE | BAC_SKIP_LOCKED, slpflag, slptimeo)) ) {
+			aflags = BAC_REMOVE;
+
+			if ( !(flags & BUF_INVALIDATE_LOCKED) )
+				aflags |= BAC_SKIP_LOCKED;
+
+			if ( (error = (int)buf_acquire_locked(bp, aflags, slpflag, slptimeo)) ) {
 			        if (error == EDEADLK)
 				        /*	
 					 * this buffer was marked B_LOCKED... 
@@ -1136,6 +1377,10 @@ buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo)
 			}
 			lck_mtx_unlock(buf_mtxp);
 
+			if (bp->b_flags & B_LOCKED)
+				KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 0, 0);
+
+			CLR(bp->b_flags, B_LOCKED);
 			SET(bp->b_flags, B_INVAL);
 			buf_brelse(bp);
 
@@ -1170,7 +1415,12 @@ try_dirty_list:
 			if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META)))
 				continue;
 
-			if ( (error = (int)buf_acquire_locked(bp, BAC_REMOVE | BAC_SKIP_LOCKED, slpflag, slptimeo)) ) {
+			aflags = BAC_REMOVE;
+
+			if ( !(flags & BUF_INVALIDATE_LOCKED) )
+				aflags |= BAC_SKIP_LOCKED;
+
+			if ( (error = (int)buf_acquire_locked(bp, aflags, slpflag, slptimeo)) ) {
 			        if (error == EDEADLK)
 				        /*	
 					 * this buffer was marked B_LOCKED... 
@@ -1198,6 +1448,10 @@ try_dirty_list:
 			}
 			lck_mtx_unlock(buf_mtxp);
 
+			if (bp->b_flags & B_LOCKED)
+				KERNEL_DEBUG(0xbbbbc038, bp, 0, 0, 1, 0);
+
+			CLR(bp->b_flags, B_LOCKED);
 			SET(bp->b_flags, B_INVAL);
 
 			if (ISSET(bp->b_flags, B_DELWRI) && (flags & BUF_WRITE_DATA))
@@ -1360,6 +1614,19 @@ bremfree_locked(buf_t bp)
 {
 	struct bqueues *dp = NULL;
 	int whichq;
+
+	whichq = bp->b_whichq;
+
+	if (whichq == -1) {
+		if (bp->b_shadow_ref == 0)
+			panic("bremfree_locked: %p not on freelist", bp);
+		/*
+		 * there are clones pointing to 'bp'...
+		 * therefore, it was not put on a freelist
+		 * when buf_brelse was last called on 'bp'
+		 */
+		return;
+	}
 	/*
 	 * We only calculate the head of the freelist when removing
 	 * the last element of the list as that is the only time that
@@ -1367,8 +1634,6 @@ bremfree_locked(buf_t bp)
 	 *
 	 * NB: This makes an assumption about how tailq's are implemented.
 	 */
-	whichq = bp->b_whichq;
-
 	if (bp->b_freelist.tqe_next == NULL) {
 	        dp = &bufqueues[whichq];
 
@@ -1385,6 +1650,7 @@ bremfree_locked(buf_t bp)
 
 	bp->b_whichq = -1;
 	bp->b_timestamp = 0; 
+	bp->b_shadow = 0;
 }
 
 /*
@@ -1432,7 +1698,7 @@ brelvp_locked(buf_t bp)
 static void
 buf_reassign(buf_t bp, vnode_t newvp)
 {
-	register struct buflists *listheadp;
+	struct buflists *listheadp;
 
 	if (newvp == NULL) {
 		printf("buf_reassign: NULL");
@@ -1502,8 +1768,11 @@ bufinit(void)
 		binsheadfree(bp, dp, BQ_EMPTY);
 		binshash(bp, &invalhash);
 	}
-
 	boot_nbuf_headers = nbuf_headers;
+
+	TAILQ_INIT(&iobufqueue);
+	TAILQ_INIT(&delaybufqueue);
+
 	for (; i < nbuf_headers + niobuf_headers; i++) {
 		bp = &buf_headers[i];
 		bufhdrinit(bp);
@@ -1601,8 +1870,10 @@ bufzoneinit(void)
 					meta_zones[i].mz_max,
 					PAGE_SIZE,
 					meta_zones[i].mz_name);
+		zone_change(meta_zones[i].mz_zone, Z_CALLERACCT, FALSE);
 	}
 	buf_hdr_zone = zinit(sizeof(struct buf), 32, PAGE_SIZE, "buf headers");
+	zone_change(buf_hdr_zone, Z_CALLERACCT, FALSE);
 }
 
 static __inline__ zone_t
@@ -1853,7 +2124,7 @@ vn_bwrite(struct vnop_bwrite_args *ap)
  * headers, we can get in to the situation where "too" many 
  * buf_bdwrite()s can create situation where the kernel can create
  * buffers faster than the disks can service. Doing a buf_bawrite() in
- * cases were we have "too many" outstanding buf_bdwrite()s avoids that.
+ * cases where we have "too many" outstanding buf_bdwrite()s avoids that.
  */
 __private_extern__ int
 bdwrite_internal(buf_t bp, int return_error)
@@ -1955,6 +2226,116 @@ buf_bawrite(buf_t bp)
 }
 
 
+
+static void
+buf_free_meta_store(buf_t bp)
+{
+	if (bp->b_bufsize) {
+		if (ISSET(bp->b_flags, B_ZALLOC)) {
+			zone_t z;
+
+			z = getbufzone(bp->b_bufsize);
+			zfree(z, (void *)bp->b_datap);
+		} else
+			kmem_free(kernel_map, bp->b_datap, bp->b_bufsize); 
+
+		bp->b_datap = (uintptr_t)NULL;
+		bp->b_bufsize = 0;
+	}
+}
+
+
+static buf_t
+buf_brelse_shadow(buf_t bp)
+{
+	buf_t	bp_head;
+	buf_t	bp_temp;
+	buf_t	bp_return = NULL;
+#ifdef BUF_MAKE_PRIVATE
+	buf_t	bp_data;
+	int	data_ref = 0;
+#endif
+	lck_mtx_lock_spin(buf_mtxp);
+
+	bp_head = (buf_t)bp->b_orig;
+
+	if (bp_head->b_whichq != -1)
+		panic("buf_brelse_shadow: bp_head on freelist %d\n", bp_head->b_whichq);
+
+#ifdef BUF_MAKE_PRIVATE
+	if (bp_data = bp->b_data_store) {
+		bp_data->b_data_ref--;
+		/*
+		 * snapshot the ref count so that we can check it 
+		 * outside of the lock... we only want the guy going
+		 * from 1 -> 0 to try and release the storage
+		 */
+		data_ref = bp_data->b_data_ref;
+	}
+#endif
+	KERNEL_DEBUG(0xbbbbc008 | DBG_FUNC_START, bp, bp_head, bp_head->b_shadow_ref, 0, 0);
+
+	bp_head->b_shadow_ref--;
+
+	for (bp_temp = bp_head; bp_temp && bp != bp_temp->b_shadow; bp_temp = bp_temp->b_shadow);
+
+	if (bp_temp == NULL)
+		panic("buf_brelse_shadow: bp not on list %p", bp_head);
+
+	bp_temp->b_shadow = bp_temp->b_shadow->b_shadow;
+
+#ifdef BUF_MAKE_PRIVATE
+	/*
+	 * we're about to free the current 'owner' of the data buffer and
+	 * there is at least one other shadow buf_t still pointing at it
+	 * so transfer it to the first shadow buf left in the chain
+	 */
+	if (bp == bp_data && data_ref) {
+		if ((bp_data = bp_head->b_shadow) == NULL)
+			panic("buf_brelse_shadow: data_ref mismatch bp(%p)", bp);
+
+		for (bp_temp = bp_data; bp_temp; bp_temp = bp_temp->b_shadow)
+			bp_temp->b_data_store = bp_data;
+		bp_data->b_data_ref = data_ref;
+	}
+#endif
+	if (bp_head->b_shadow_ref == 0 && bp_head->b_shadow)
+		panic("buf_relse_shadow: b_shadow != NULL && b_shadow_ref == 0  bp(%p)", bp); 
+	if (bp_head->b_shadow_ref && bp_head->b_shadow == 0)
+		panic("buf_relse_shadow: b_shadow == NULL && b_shadow_ref != 0  bp(%p)", bp); 
+
+	if (bp_head->b_shadow_ref == 0) {
+		if (!ISSET(bp_head->b_lflags, BL_BUSY)) {
+
+			CLR(bp_head->b_flags, B_AGE);
+			bp_head->b_timestamp = buf_timestamp();
+
+			if (ISSET(bp_head->b_flags, B_LOCKED)) {
+				bp_head->b_whichq = BQ_LOCKED;
+				binstailfree(bp_head, &bufqueues[BQ_LOCKED], BQ_LOCKED);
+			} else {
+				bp_head->b_whichq = BQ_META;
+				binstailfree(bp_head, &bufqueues[BQ_META], BQ_META);
+			}
+		} else if (ISSET(bp_head->b_lflags, BL_WAITSHADOW)) {
+			CLR(bp_head->b_lflags, BL_WAITSHADOW);
+
+			bp_return = bp_head;
+		}
+	}
+	lck_mtx_unlock(buf_mtxp);
+#ifdef BUF_MAKE_PRIVATE	
+	if (bp == bp_data && data_ref == 0)
+		buf_free_meta_store(bp);
+
+	bp->b_data_store = NULL;
+#endif
+	KERNEL_DEBUG(0xbbbbc008 | DBG_FUNC_END, bp, 0, 0, 0, 0);
+
+	return (bp_return);
+}
+
+
 /*
  * Release a buffer on to the free lists.
  * Described in Bach (p. 46).
@@ -1979,7 +2360,18 @@ buf_brelse(buf_t bp)
 	bp->b_tag = 0;
 #endif
 	if (bp->b_lflags & BL_IOBUF) {
+		buf_t	shadow_master_bp = NULL;
+
+		if (ISSET(bp->b_lflags, BL_SHADOW))
+			shadow_master_bp = buf_brelse_shadow(bp);
+		else if (ISSET(bp->b_lflags, BL_IOBUF_ALLOC))
+			 buf_free_meta_store(bp);
 	        free_io_buf(bp);
+
+		if (shadow_master_bp) {
+			bp = shadow_master_bp;
+			goto finish_shadow_master;
+		}
 		return;
 	}
 
@@ -1999,7 +2391,7 @@ buf_brelse(buf_t bp)
 	if (ISSET(bp->b_flags, B_META) && ISSET(bp->b_flags, B_INVAL)) {
 		if (ISSET(bp->b_flags, B_FILTER)) {	/* if necessary, call out */
 			void	(*iodone_func)(struct buf *, void *) = bp->b_iodone;
-			void 	*arg = (void *)bp->b_transaction;
+			void 	*arg = bp->b_transaction;
 
 			CLR(bp->b_flags, B_FILTER);	/* but note callout done */
 			bp->b_iodone = NULL;
@@ -2020,7 +2412,7 @@ buf_brelse(buf_t bp)
 		kern_return_t kret;
 		int           upl_flags;
 
-		if ( (upl == NULL) ) {
+		if (upl == NULL) {
 		        if ( !ISSET(bp->b_flags, B_INVAL)) {
 				kret = ubc_create_upl(bp->b_vp, 
 						      ubc_blktooff(bp->b_vp, bp->b_lblkno),
@@ -2082,6 +2474,9 @@ buf_brelse(buf_t bp)
 	if ((bp->b_bufsize <= 0) || 
 			ISSET(bp->b_flags, B_INVAL) || 
 			(ISSET(bp->b_lflags, BL_WANTDEALLOC) && !ISSET(bp->b_flags, B_DELWRI))) {
+
+		boolean_t	delayed_buf_free_meta_store = FALSE;
+
 		/*
 		 * If it's invalid or empty, dissociate it from its vnode,
 		 * release its storage if B_META, and
@@ -2091,34 +2486,34 @@ buf_brelse(buf_t bp)
 			OSAddAtomicLong(-1, &nbdwrite);
 
 		if (ISSET(bp->b_flags, B_META)) {
-		        if (bp->b_bufsize) {
-			        if (ISSET(bp->b_flags, B_ZALLOC)) {
-				        zone_t z;
-
-					z = getbufzone(bp->b_bufsize);
-					zfree(z, (void *)bp->b_datap);
-				} else
-				        kmem_free(kernel_map, bp->b_datap, bp->b_bufsize); 
-
-				 bp->b_datap = (uintptr_t)NULL;
-				 bp->b_bufsize = 0;
-			}
+			if (bp->b_shadow_ref)
+				delayed_buf_free_meta_store = TRUE;
+			else
+				buf_free_meta_store(bp);
 		}
 		/*
 		 * nuke any credentials we were holding
 		 */
-		if (IS_VALID_CRED(bp->b_rcred)) {
-		        kauth_cred_unref(&bp->b_rcred);
-		}
-		if (IS_VALID_CRED(bp->b_wcred)) {
-		        kauth_cred_unref(&bp->b_wcred);
+		buf_release_credentials(bp);
+
+		lck_mtx_lock_spin(buf_mtxp);
+
+		if (bp->b_shadow_ref) {
+			SET(bp->b_lflags, BL_WAITSHADOW);
+			
+			lck_mtx_unlock(buf_mtxp);
+			
+			return;
 		}
-		CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE | B_FUA));
+		if (delayed_buf_free_meta_store == TRUE) {
 
-		bufq = &bufqueues[BQ_EMPTY];
-		bp->b_whichq = BQ_EMPTY;
+			lck_mtx_unlock(buf_mtxp);
+finish_shadow_master:
+			buf_free_meta_store(bp);
 
-		lck_mtx_lock_spin(buf_mtxp);
+			lck_mtx_lock_spin(buf_mtxp);
+		}
+		CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE | B_FUA));
 
 		if (bp->b_vp)
 			brelvp_locked(bp);
@@ -2127,8 +2522,10 @@ buf_brelse(buf_t bp)
 		BLISTNONE(bp);
 		binshash(bp, &invalhash);
 
-		binsheadfree(bp, bufq, BQ_EMPTY);
+		bp->b_whichq = BQ_EMPTY;
+		binsheadfree(bp, &bufqueues[BQ_EMPTY], BQ_EMPTY);
 	} else {
+
 		/*
 		 * It has valid data.  Put it on the end of the appropriate
 		 * queue, so that it'll stick around for as long as possible.
@@ -2143,13 +2540,32 @@ buf_brelse(buf_t bp)
 			whichq = BQ_LRU;		/* valid data */
 		bufq = &bufqueues[whichq];
 
-		CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE));
-		bp->b_whichq = whichq;
 		bp->b_timestamp = buf_timestamp();
 
-	        lck_mtx_lock_spin(buf_mtxp);
-
-		binstailfree(bp, bufq, whichq);
+		lck_mtx_lock_spin(buf_mtxp);
+		
+		/*
+		 * the buf_brelse_shadow routine doesn't take 'ownership'
+		 * of the parent buf_t... it updates state that is protected by
+		 * the buf_mtxp, and checks for BL_BUSY to determine whether to
+		 * put the buf_t back on a free list.  b_shadow_ref is protected
+		 * by the lock, and since we have not yet cleared B_BUSY, we need
+		 * to check it while holding the lock to insure that one of us
+		 * puts this buf_t back on a free list when it is safe to do so
+		 */
+		if (bp->b_shadow_ref == 0) {
+			CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE));
+			bp->b_whichq = whichq;
+			binstailfree(bp, bufq, whichq);
+		} else {
+			/*
+			 * there are still cloned buf_t's pointing
+			 * at this guy... need to keep it off the
+			 * freelists until a buf_brelse is done on 
+			 * the last clone
+			 */
+			CLR(bp->b_flags, (B_ASYNC | B_NOCACHE));
+		}
 	}
 	if (needbuffer) {
 	        /*
@@ -2581,6 +2997,23 @@ buf_geteblk(int size)
 	return (bp);
 }
 
+uint32_t
+buf_redundancy_flags(buf_t bp)
+{
+	return bp->b_redundancy_flags;
+}
+
+void
+buf_set_redundancy_flags(buf_t bp, uint32_t flags)
+{
+	SET(bp->b_redundancy_flags, flags);
+}
+
+void
+buf_clear_redundancy_flags(buf_t bp, uint32_t flags)
+{
+	CLR(bp->b_redundancy_flags, flags);
+}
 
 /*
  * With UBC, there is no need to expand / shrink the file data 
@@ -2861,14 +3294,14 @@ found:
 
 /* 
  * Clean a buffer.
- * Returns 0 is buffer is ready to use,
+ * Returns 0 if buffer is ready to use,
  * Returns 1 if issued a buf_bawrite() to indicate 
  * that the buffer is not ready.
  * 
  * buf_mtxp is held upon entry
  * returns with buf_mtxp locked
  */
-static int
+int
 bcleanbuf(buf_t bp, boolean_t discard)
 {
 	/* Remove from the queue */
@@ -2887,10 +3320,7 @@ bcleanbuf(buf_t bp, boolean_t discard)
 			SET(bp->b_lflags, BL_WANTDEALLOC);
 		}
 
-		bp->b_whichq = BQ_LAUNDRY;
-		bp->b_timestamp = buf_timestamp();
-		binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY);
-		blaundrycnt++;
+		bmovelaundry(bp);
 
 		lck_mtx_unlock(buf_mtxp);
 
@@ -2926,30 +3356,12 @@ bcleanbuf(buf_t bp, boolean_t discard)
 
 	BLISTNONE(bp);
 
-	if (ISSET(bp->b_flags, B_META)) {
-	        vm_offset_t elem;
-
-		elem = (vm_offset_t)bp->b_datap;
-		bp->b_datap = (uintptr_t)0xdeadbeef;
-
-		if (ISSET(bp->b_flags, B_ZALLOC)) {
-		        zone_t z;
-
-			z = getbufzone(bp->b_bufsize);
-			zfree(z, (void *)elem);
-		} else
-			kmem_free(kernel_map, elem, bp->b_bufsize); 
-	}
+	if (ISSET(bp->b_flags, B_META))
+		buf_free_meta_store(bp);
 
 	trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
 
-	/* nuke any credentials we were holding */
-	if (IS_VALID_CRED(bp->b_rcred)) {
-		kauth_cred_unref(&bp->b_rcred);
-	}
-	if (IS_VALID_CRED(bp->b_wcred)) {
-		kauth_cred_unref(&bp->b_wcred);
-	}
+	buf_release_credentials(bp);
 
 	/* If discarding, just move to the empty queue */
 	if (discard) {
@@ -3163,24 +3575,6 @@ buf_biowait(buf_t bp)
 		return (0);
 }
 
-/*
- * Wait for the callback operation on a B_CALL buffer to complete.
- */
-void
-buf_biowait_callback(buf_t bp)
-{
-	while (!ISSET(bp->b_lflags, BL_CALLDONE)) {
-
-		lck_mtx_lock_spin(buf_mtxp);
-
-		if (!ISSET(bp->b_lflags, BL_CALLDONE)) {
-			DTRACE_IO1(wait__start, buf_t, bp);
-			(void) msleep(bp, buf_mtxp, PDROP | (PRIBIO+1), "buf_biowait", NULL);
-			DTRACE_IO1(wait__done, buf_t, bp);
-		} else
-			lck_mtx_unlock(buf_mtxp);
-	}
-}
 
 /*
  * Mark I/O complete on a buffer.
@@ -3242,6 +3636,11 @@ buf_biodone(buf_t bp)
 		else if (bp->b_flags & B_PAGEIO)
 		        code |= DKIO_PAGING;
 
+		if (bp->b_flags & B_THROTTLED_IO)
+			code |= DKIO_THROTTLE;
+		else if (bp->b_flags & B_PASSIVE)
+			code |= DKIO_PASSIVE;
+
 		KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
                               bp, (uintptr_t)bp->b_vp,
 				      bp->b_resid, bp->b_error, 0);
@@ -3252,11 +3651,14 @@ buf_biodone(buf_t bp)
 	        microuptime(&priority_IO_timestamp_for_root);
 	        hard_throttle_on_root = 0;
 	}
+
 	/*
 	 * I/O was done, so don't believe
-	 * the DIRTY state from VM anymore
+	 * the DIRTY state from VM anymore...
+	 * and we need to reset the THROTTLED/PASSIVE
+	 * indicators
 	 */
-	CLR(bp->b_flags, B_WASDIRTY);
+	CLR(bp->b_flags, (B_WASDIRTY | B_THROTTLED_IO | B_PASSIVE));
 	DTRACE_IO1(done, buf_t, bp);
 
 	if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW))
@@ -3269,46 +3671,26 @@ buf_biodone(buf_t bp)
 
 	if (ISSET(bp->b_flags, (B_CALL | B_FILTER))) {	/* if necessary, call out */
 		void	(*iodone_func)(struct buf *, void *) = bp->b_iodone;
-		void 	*arg = (void *)bp->b_transaction;
+		void 	*arg = bp->b_transaction;
 		int     callout = ISSET(bp->b_flags, B_CALL);
 
+		if (iodone_func == NULL)
+			panic("biodone: bp @ %p has NULL b_iodone!\n", bp);			
+
 		CLR(bp->b_flags, (B_CALL | B_FILTER));	/* filters and callouts are one-shot */
 		bp->b_iodone = NULL;
 		bp->b_transaction = NULL;
 
-		if (iodone_func == NULL) {
-			panic("biodone: bp @ %p has NULL b_iodone!\n", bp);			
-		} else { 
-		        if (callout)
-			        SET(bp->b_flags, B_DONE);	/* note that it's done */
-			(*iodone_func)(bp, arg);
-		}
-		if (callout) {
-		        int need_wakeup = 0;
+		if (callout)
+		        SET(bp->b_flags, B_DONE);	/* note that it's done */
 
-		        /*
+		(*iodone_func)(bp, arg);
+
+		if (callout) {
+			/*
 			 * assumes that the callback function takes
 			 * ownership of the bp and deals with releasing it if necessary
-			 * BL_WANTED indicates that we've decided to wait on the
-			 * completion of this I/O in a synchronous manner... we
-			 * still call the callback function, but in addition we
-			 * will do a wakeup... BL_CALLDONE indicates that the callback
-			 * routine has completed and its ok for the waiter to take
-			 * 'ownership' of this bp back
 			 */
-		        lck_mtx_lock_spin(buf_mtxp);
-
-			if (bp->b_lflags & BL_WANTED) {
-			        CLR(bp->b_lflags, BL_WANTED);
-				need_wakeup = 1;
-			}
-			SET(bp->b_lflags, BL_CALLDONE);
-
-			lck_mtx_unlock(buf_mtxp);
-			
-			if (need_wakeup)
-			        wakeup(bp);
-
 			goto biodone_done;
 		}
 		/*
@@ -3390,8 +3772,8 @@ void
 vfs_bufstats()
 {
 	int i, j, count;
-	register struct buf *bp;
-	register struct bqueues *dp;
+	struct buf *bp;
+	struct bqueues *dp;
 	int counts[MAXBSIZE/CLBYTES+1];
 	static char *bname[BQUEUES] =
 		{ "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" };
@@ -3418,7 +3800,7 @@ vfs_bufstats()
 }
 #endif /* DIAGNOSTIC */
 
-#define	NRESERVEDIOBUFS	64
+#define	NRESERVEDIOBUFS	128
 
 
 buf_t
@@ -3433,9 +3815,7 @@ alloc_io_buf(vnode_t vp, int priv)
 		bufstats.bufs_iobufsleeps++;
 
 		need_iobuffer = 1;
-		(void) msleep(&need_iobuffer, iobuffer_mtxp, PDROP | (PRIBIO+1), (const char *)"alloc_io_buf", NULL);
-
-		lck_mtx_lock_spin(iobuffer_mtxp);
+		(void) msleep(&need_iobuffer, iobuffer_mtxp, PSPIN | (PRIBIO+1), (const char *)"alloc_io_buf", NULL);
 	}
 	TAILQ_REMOVE(&iobufqueue, bp, b_freelist);
 
@@ -3457,6 +3837,7 @@ alloc_io_buf(vnode_t vp, int priv)
 	bp->b_datap = 0;
 	bp->b_flags = 0;
 	bp->b_lflags = BL_BUSY | BL_IOBUF;
+	bp->b_redundancy_flags = 0;
 	bp->b_blkno = bp->b_lblkno = 0;
 #ifdef JOE_DEBUG
 	bp->b_owner = current_thread();
@@ -3551,6 +3932,8 @@ bcleanbuf_thread_init(void)
 	thread_deallocate(thread);
 }
 
+typedef int (*bcleanbufcontinuation)(int);
+
 static void
 bcleanbuf_thread(void)
 {
@@ -3562,10 +3945,9 @@ bcleanbuf_thread(void)
 	        lck_mtx_lock_spin(buf_mtxp);
 
 		while ( (bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY])) == NULL) {
- 		        (void)msleep((void *)&bufqueues[BQ_LAUNDRY], buf_mtxp, PDROP | PRIBIO, "blaundry", NULL);
-
-			lck_mtx_lock_spin(buf_mtxp);
+			(void)msleep0(&bufqueues[BQ_LAUNDRY], buf_mtxp, PRIBIO|PDROP, "blaundry", 0, (bcleanbufcontinuation)bcleanbuf_thread);
 		}
+		
 		/*
 		 * Remove from the queue
 		 */
@@ -3597,7 +3979,7 @@ bcleanbuf_thread(void)
 			binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY);
 			blaundrycnt++;
 
-			/* we never leave a busy page on the laundary queue */
+			/* we never leave a busy page on the laundry queue */
 			CLR(bp->b_lflags, BL_BUSY);
 			buf_busycount--;
 #ifdef JOE_DEBUG
@@ -3606,12 +3988,18 @@ bcleanbuf_thread(void)
 #endif
 
 			lck_mtx_unlock(buf_mtxp);
-
-			if (loopcnt > 10) {
-			        (void)tsleep((void *)&bufqueues[BQ_LAUNDRY], PRIBIO, "blaundry", 1);
+			
+			if (loopcnt > MAXLAUNDRY) {
+				/*
+				 * bawrite_internal() can return errors if we're throttled. If we've
+				 * done several I/Os and failed, give the system some time to unthrottle
+				 * the vnode
+				 */
+				(void)tsleep((void *)&bufqueues[BQ_LAUNDRY], PRIBIO, "blaundry", 1);
 				loopcnt = 0;
 			} else {
-			        (void)thread_block(THREAD_CONTINUE_NULL);
+				/* give other threads a chance to run */
+				(void)thread_block(THREAD_CONTINUE_NULL);
 				loopcnt++;
 			}
 		}
@@ -3680,34 +4068,125 @@ buffer_cache_gc(int all)
 {
 	buf_t bp;
 	boolean_t did_large_zfree = FALSE;
+	boolean_t need_wakeup = FALSE;
 	int now = buf_timestamp();
-	uint32_t count = 0;
+	uint32_t found = 0, total_found = 0;
+	struct bqueues privq;
 	int thresh_hold = BUF_STALE_THRESHHOLD;
 
 	if (all)
 		thresh_hold = 0;
+	/* 
+	 * We only care about metadata (incore storage comes from zalloc()).
+	 * No more than 1024 buffers total, and only those not accessed within the
+	 * last 30s.  We will also only examine 128 buffers during a single grab
+	 * of the lock in order to limit lock hold time.
+	 */
+	lck_mtx_lock(buf_mtxp);
+	do {
+		found = 0;
+		TAILQ_INIT(&privq);
+		need_wakeup = FALSE;
 
-	lck_mtx_lock_spin(buf_mtxp);
+		while (((bp = TAILQ_FIRST(&bufqueues[BQ_META]))) && 
+				(now > bp->b_timestamp) &&
+				(now - bp->b_timestamp > thresh_hold) && 
+				(found < BUF_MAX_GC_BATCH_SIZE)) {
+
+			/* Remove from free list */
+			bremfree_locked(bp);
+			found++;
+
+#ifdef JOE_DEBUG
+			bp->b_owner = current_thread();
+			bp->b_tag   = 12;
+#endif
+
+			/* If dirty, move to laundry queue and remember to do wakeup */
+			if (ISSET(bp->b_flags, B_DELWRI)) {
+				SET(bp->b_lflags, BL_WANTDEALLOC);
+
+				bmovelaundry(bp);
+				need_wakeup = TRUE;
+
+				continue;
+			}
+
+			/* 
+			 * Mark busy and put on private list.  We could technically get 
+			 * away without setting BL_BUSY here.
+			 */
+			SET(bp->b_lflags, BL_BUSY);
+			buf_busycount++;
 
-	/* We only care about metadata (incore storage comes from zalloc()) */
-	bp = TAILQ_FIRST(&bufqueues[BQ_META]);
+			/* 
+			 * Remove from hash and dissociate from vp.
+			 */
+			bremhash(bp);
+			if (bp->b_vp) {
+				brelvp_locked(bp);
+			}
 
-	/* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */
-	while ((bp != NULL) && ((now - bp->b_timestamp) > thresh_hold) && (all || (count < BUF_MAX_GC_COUNT))) {
-		int result, size;
-		boolean_t is_zalloc;
+			TAILQ_INSERT_TAIL(&privq, bp, b_freelist);
+		}
 
-		size = buf_size(bp);
-		is_zalloc = ISSET(bp->b_flags, B_ZALLOC);
+		if (found == 0) {
+			break;
+		}
 
-		result = bcleanbuf(bp, TRUE);
-		if ((result == 0) && is_zalloc && (size >= PAGE_SIZE)) {
-			/* We've definitely freed at least a page to a zone */
-			did_large_zfree = TRUE;
+		/* Drop lock for batch processing */
+		lck_mtx_unlock(buf_mtxp);
+
+		/* Wakeup and yield for laundry if need be */
+		if (need_wakeup) {
+			wakeup(&bufqueues[BQ_LAUNDRY]);
+			(void)thread_block(THREAD_CONTINUE_NULL);
 		}
-		bp = TAILQ_FIRST(&bufqueues[BQ_META]);
-		count++;
-	} 
+
+		/* Clean up every buffer on private list */
+		TAILQ_FOREACH(bp, &privq, b_freelist) {
+			/* Take note if we've definitely freed at least a page to a zone */
+			if ((ISSET(bp->b_flags, B_ZALLOC)) && (buf_size(bp) >= PAGE_SIZE)) {
+				did_large_zfree = TRUE;
+			}    
+
+			trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
+
+			/* Free Storage */
+			buf_free_meta_store(bp);
+
+			/* Release credentials */
+			buf_release_credentials(bp);
+
+			/* Prepare for moving to empty queue */
+			CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED 
+						| B_AGE | B_ASYNC | B_NOCACHE | B_FUA));
+			bp->b_whichq = BQ_EMPTY;
+			BLISTNONE(bp);
+		}
+
+		lck_mtx_lock(buf_mtxp);
+
+		/* Back under lock, move them all to invalid hash and clear busy */
+		TAILQ_FOREACH(bp, &privq, b_freelist) {
+			binshash(bp, &invalhash);
+			CLR(bp->b_lflags, BL_BUSY);
+			buf_busycount--;
+
+#ifdef JOE_DEBUG
+			if (bp->b_owner != current_thread()) {
+				panic("Buffer stolen from buffer_cache_gc()");
+			}
+			bp->b_owner = current_thread();
+			bp->b_tag   = 13;
+#endif
+		}
+
+		/* And do a big bulk move to the empty queue */
+		TAILQ_CONCAT(&bufqueues[BQ_EMPTY], &privq, b_freelist);
+		total_found += found;
+
+	} while ((all || (total_found < BUF_MAX_GC_COUNT)) && (found == BUF_MAX_GC_BATCH_SIZE));
 
 	lck_mtx_unlock(buf_mtxp);
 
diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c
index ba73d95a4..3096d1294 100644
--- a/bsd/vfs/vfs_cache.c
+++ b/bsd/vfs/vfs_cache.c
@@ -114,6 +114,7 @@ long	numcache;			/* number of cache entries allocated */
 int 	desiredNodes;
 int 	desiredNegNodes;
 int	ncs_negtotal;
+int	nc_disabled = 0;
 TAILQ_HEAD(, namecache) nchead;		/* chain of all name cache entries */
 TAILQ_HEAD(, namecache) neghead;	/* chain of only negative cache entries */
 
@@ -309,8 +310,22 @@ again:
 		 */
 		if (((vp->v_parent != NULLVP) && !fixhardlink) ||
 		    (flags & BUILDPATH_NO_FS_ENTER)) {
-			vp = vp->v_parent;
+			/*
+			 * In this if () block we are not allowed to enter the filesystem
+			 * to conclusively get the most accurate parent identifier.
+			 * As a result, if 'vp' does not identify '/' and it
+			 * does not have a valid v_parent, then error out
+			 * and disallow further path construction
+			 */
+			if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
+				/* Only '/' is allowed to have a NULL parent pointer */
+				ret = EINVAL;
+
+				/* The code below will exit early if 'tvp = vp' == NULL */
+			}
 
+			vp = vp->v_parent;
+			
 			/*
 			 * if the vnode we have in hand isn't a directory and it
 			 * has a v_parent, then we started with the resource fork
@@ -808,11 +823,18 @@ void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
 }
 
 
-boolean_t vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
+extern int bootarg_vnode_cache_defeat;	/* default = 0, from bsd_init.c */
+
+boolean_t
+vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
 {
 	kauth_cred_t	ucred;
 	boolean_t	retval = FALSE;
 
+	/* Boot argument to defeat rights caching */
+	if (bootarg_vnode_cache_defeat)
+		return FALSE;
+
 	if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
 	        /*
 		 * a TTL is enabled on the rights cache... handle it here
@@ -937,7 +959,7 @@ boolean_t vnode_cache_is_stale(vnode_t vp)
  */
 int 
 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
-		vfs_context_t ctx, int *trailing_slash, int *dp_authorized, vnode_t last_dp)
+		vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
 {
 	char		*cp;		/* pointer into pathname argument */
 	int		vid;
@@ -951,8 +973,12 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
 	unsigned int	hash;
 	int		error = 0;
 
+#if CONFIG_TRIGGERS
+	vnode_t 	trigger_vp;
+#endif /* CONFIG_TRIGGERS */
+
 	ucred = vfs_context_ucred(ctx);
-	*trailing_slash = 0;
+	ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
 
 	NAME_CACHE_LOCK_SHARED();
 
@@ -999,7 +1025,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
 			ndp->ni_pathlen--;
 
 			if (*cp == '\0') {
-			        *trailing_slash = 1;
+			        ndp->ni_flag |= NAMEI_TRAILINGSLASH;
 				*ndp->ni_next = '\0';
 			}
 		}
@@ -1073,10 +1099,12 @@ skiprsrcfork:
 		*dp_authorized = 1;
 
 		if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
-		        if (cnp->cn_nameiop != LOOKUP)
-			        break;
-		        if (cnp->cn_flags & (LOCKPARENT | NOCACHE))
-			        break;
+			if (cnp->cn_nameiop != LOOKUP)
+				break;
+			if (cnp->cn_flags & LOCKPARENT) 
+				break;
+			if (cnp->cn_flags & NOCACHE)
+				break;
 			if (cnp->cn_flags & ISDOTDOT) {
 				/*
 				 * Force directory hardlinks to go to
@@ -1126,6 +1154,7 @@ skiprsrcfork:
 			        vp = NULL;
 		        break;
 		}
+
 		if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
 
 		        if (mp->mnt_realrootvp == NULLVP || mp->mnt_generation != mount_generation ||
@@ -1133,6 +1162,20 @@ skiprsrcfork:
 			        break;
 			vp = mp->mnt_realrootvp;
 		}
+
+#if CONFIG_TRIGGERS
+		/*
+		 * After traversing all mountpoints stacked here, if we have a
+		 * trigger in hand, resolve it.  Note that we don't need to 
+		 * leave the fast path if the mount has already happened.
+		 */
+		if ((vp->v_resolve != NULL) && 
+				(vp->v_resolve->vr_resolve_func != NULL)) {
+			break;
+		} 
+#endif /* CONFIG_TRIGGERS */
+
+
 		dp = vp;
 		vp = NULLVP;
 
@@ -1184,7 +1227,7 @@ need_dp:
 				 * immediately w/o waiting... it always succeeds
 				 */
 				vnode_get(dp);
-			} else if ( (vnode_getwithvid(dp, vid)) ) {
+			} else if ( (vnode_getwithvid_drainok(dp, vid)) ) {
 				/*
 				 * failure indicates the vnode
 				 * changed identity or is being
@@ -1202,7 +1245,7 @@ need_dp:
 		}
 	}
 	if (vp != NULLVP) {
-	        if ( (vnode_getwithvid(vp, vvid)) ) {
+	        if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
 		        vp = NULLVP;
 
 		        /*
@@ -1219,9 +1262,24 @@ need_dp:
 			}
 		}
 	}
+
 	ndp->ni_dvp = dp;
 	ndp->ni_vp  = vp;
 
+#if CONFIG_TRIGGERS
+	trigger_vp = vp ? vp : dp;
+	if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
+		error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
+		if (error) {
+			if (vp)
+				vnode_put(vp);
+			if (dp) 
+				vnode_put(dp);
+			goto errorout;
+		}
+	} 
+#endif /* CONFIG_TRIGGERS */
+
 errorout:
 	/* 
 	 * If we came into cache_lookup_path after an iteration of the lookup loop that
@@ -1249,6 +1307,10 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
 	long namelen = cnp->cn_namelen;
 	unsigned int hashval = (cnp->cn_hash & NCHASHMASK);
 	
+	if (nc_disabled) {
+		return NULL;
+	}
+
 	ncpp = NCHHASH(dvp, cnp->cn_hash);
 	LIST_FOREACH(ncp, ncpp, nc_hash) {
 	        if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
@@ -1328,6 +1390,10 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 		cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
 	hashval = (cnp->cn_hash & NCHASHMASK);
 
+	if (nc_disabled) {
+		return 0;
+	}
+
 	NAME_CACHE_LOCK_SHARED();
 
 relook:
@@ -1485,6 +1551,9 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn
         struct namecache *ncp, *negp;
 	struct nchashhead *ncpp;
 
+	if (nc_disabled) 
+		return;
+
 	/*
 	 * if the entry is for -ve caching vp is null
 	 */
@@ -1799,7 +1868,10 @@ cache_purge(vnode_t vp)
         struct namecache *ncp;
 	kauth_cred_t tcred = NULL;
 
-	if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL) && (vp->v_cred == NOCRED))
+	if ((LIST_FIRST(&vp->v_nclinks) == NULL) && 
+			(LIST_FIRST(&vp->v_ncchildren) == NULL) && 
+			(vp->v_cred == NOCRED) &&
+			(vp->v_parent == NULLVP))
 	        return;
 
 	NAME_CACHE_LOCK();
@@ -1973,9 +2045,6 @@ add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_
         uint32_t	  lock_index;
 	char              *ptr;
     
-	if (hashval == 0) {
-		hashval = hash_string(name, 0);
-	}
 	/*
 	 * if the length already accounts for the null-byte, then
 	 * subtract one so later on we don't index past the end
@@ -1984,6 +2053,10 @@ add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_
 	if (len > 0 && name[len-1] == '\0') {
 		len--;
 	}
+	if (hashval == 0) {
+		hashval = hash_string(name, len);
+	}
+
 	/*
 	 * take this lock 'shared' to keep the hash stable
 	 * if someone else decides to grow the pool they
diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c
index 499056a3b..0e8bd67dd 100644
--- a/bsd/vfs/vfs_cluster.c
+++ b/bsd/vfs/vfs_cluster.c
@@ -82,6 +82,7 @@
 #include <mach/memory_object_types.h>
 #include <mach/vm_map.h>
 #include <mach/upl.h>
+#include <kern/task.h>
 
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
@@ -90,6 +91,8 @@
 #include <sys/kdebug.h>
 #include <libkern/OSAtomic.h>  
 
+#include <sys/sdt.h>
+
 #if 0
 #undef KERNEL_DEBUG
 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
@@ -111,6 +114,8 @@
 #define CL_DIRECT_IO	0x1000
 #define CL_PASSIVE	0x2000
 #define CL_IOSTREAMING	0x4000
+#define CL_CLOSE	0x8000
+#define	CL_ENCRYPTED	0x10000
 
 #define MAX_VECTOR_UPL_ELEMENTS	8
 #define MAX_VECTOR_UPL_SIZE	(2 * MAX_UPL_SIZE) * PAGE_SIZE
@@ -122,6 +127,7 @@ extern void vector_upl_set_pagelist(upl_t);
 extern void vector_upl_set_iostate(upl_t, upl_t, vm_offset_t, u_int32_t);
 
 struct clios {
+	lck_mtx_t io_mtxp;
         u_int  io_completed;       /* amount of io that has currently completed */
         u_int  io_issued;          /* amount of io that was successfully issued */
         int    io_error;           /* error code of first error encountered */
@@ -131,7 +137,6 @@ struct clios {
 static lck_grp_t	*cl_mtx_grp;
 static lck_attr_t	*cl_mtx_attr;
 static lck_grp_attr_t   *cl_mtx_grp_attr;
-static lck_mtx_t	*cl_mtxp;
 static lck_mtx_t	*cl_transaction_mtxp;
 
 
@@ -157,6 +162,8 @@ static int cluster_iodone(buf_t bp, void *callback_arg);
 static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags);
 static int cluster_hard_throttle_on(vnode_t vp, uint32_t);
 
+static void cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name);
+
 static void cluster_syncup(vnode_t vp, off_t newEOF, int (*)(buf_t, void *), void *callback_arg);
 
 static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference);
@@ -183,10 +190,10 @@ static void	cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t files
 
 static int	cluster_push_now(vnode_t vp, struct cl_extent *, off_t EOF, int flags, int (*)(buf_t, void *), void *callback_arg);
 
-static int	cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int (*)(buf_t, void *), void *callback_arg);
+static int	cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int flags, int (*)(buf_t, void *), void *callback_arg);
 
 static void	sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF, int (*)(buf_t, void *), void *callback_arg);
-static void	sparse_cluster_push(void **cmapp, vnode_t vp, off_t EOF, int push_flag, int (*)(buf_t, void *), void *callback_arg);
+static void	sparse_cluster_push(void **cmapp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*)(buf_t, void *), void *callback_arg);
 static void	sparse_cluster_add(void **cmapp, vnode_t vp, struct cl_extent *, off_t EOF, int (*)(buf_t, void *), void *callback_arg);
 
 static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp);
@@ -203,12 +210,20 @@ static kern_return_t vfs_drt_control(void **cmapp, int op_type);
 #define MAX_VECTS		16
 #define MIN_DIRECT_WRITE_SIZE	(4 * PAGE_SIZE)
 
+#define WRITE_THROTTLE		6
+#define WRITE_THROTTLE_SSD	2
+#define WRITE_BEHIND		1
+#define WRITE_BEHIND_SSD	1
+#define PREFETCH		3
+#define PREFETCH_SSD		2
+
 #define IO_SCALE(vp, base)		(vp->v_mount->mnt_ioscale * base)
 #define MAX_CLUSTER_SIZE(vp)		(cluster_max_io_size(vp->v_mount, CL_WRITE))
-#define MAX_PREFETCH(vp, io_size)	(io_size * IO_SCALE(vp, 3))
+#define MAX_PREFETCH(vp, size, is_ssd)	(size * IO_SCALE(vp, (is_ssd && !ignore_is_ssd) ? PREFETCH_SSD : PREFETCH))
 
-
-int speculative_reads_disabled = 0;
+int	ignore_is_ssd = 0;
+int	speculative_reads_disabled = 0;
+uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3);
 
 /*
  * throttle the number of async writes that
@@ -235,15 +250,6 @@ cluster_init(void) {
 	 */
 	cl_mtx_attr = lck_attr_alloc_init();
 
-	/*
-	 * allocate and initialize mutex's used to protect updates and waits
-	 * on the cluster_io context
-	 */
-	cl_mtxp	= lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr);
-
-	if (cl_mtxp == NULL)
-	        panic("cluster_init: failed to allocate cl_mtxp");
-
 	cl_transaction_mtxp = lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr);
 
 	if (cl_transaction_mtxp == NULL)
@@ -412,7 +418,7 @@ cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *c
 	        if (wbp->cl_number) {
 		        lck_mtx_lock(&wbp->cl_lockw);
 
-			cluster_try_push(wbp, vp, newEOF, PUSH_ALL | PUSH_SYNC, callback, callback_arg);
+			cluster_try_push(wbp, vp, newEOF, PUSH_ALL | PUSH_SYNC, 0, callback, callback_arg);
 
 			lck_mtx_unlock(&wbp->cl_lockw);
 		}
@@ -450,6 +456,27 @@ cluster_hard_throttle_on(vnode_t vp, uint32_t hard_throttle)
 }
 
 
+static void
+cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name)
+{
+
+	lck_mtx_lock(&iostate->io_mtxp);
+
+	while ((iostate->io_issued - iostate->io_completed) > target) {
+
+		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
+			     iostate->io_issued, iostate->io_completed, target, 0, 0);
+
+		iostate->io_wanted = 1;
+		msleep((caddr_t)&iostate->io_wanted, &iostate->io_mtxp, PRIBIO + 1, wait_name, NULL);
+
+		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
+			     iostate->io_issued, iostate->io_completed, target, 0, 0);
+	}	
+	lck_mtx_unlock(&iostate->io_mtxp);
+}
+
+
 static int
 cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags)
 {
@@ -457,7 +484,7 @@ cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_fla
 	int page_in  = 0;
 	int page_out = 0;
 
-	if (io_flags & B_PHYS)
+	if ((io_flags & (B_PHYS | B_CACHE)) == (B_PHYS | B_CACHE))
 	        /*
 		 * direct write of any flavor, or a direct read that wasn't aligned
 		 */
@@ -517,33 +544,44 @@ cluster_iodone(buf_t bp, void *callback_arg)
 		     cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0);
 
 	if (cbp_head->b_trans_next || !(cbp_head->b_flags & B_EOT)) {
+		boolean_t	need_wakeup = FALSE;
 
 		lck_mtx_lock_spin(cl_transaction_mtxp);
 
 		bp->b_flags |= B_TDONE;
 		
+		if (bp->b_flags & B_TWANTED) {
+			CLR(bp->b_flags, B_TWANTED);
+			need_wakeup = TRUE;
+		}
 		for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
-		        /*
+			/*
 			 * all I/O requests that are part of this transaction
 			 * have to complete before we can process it
 			 */
-		        if ( !(cbp->b_flags & B_TDONE)) {
+			if ( !(cbp->b_flags & B_TDONE)) {
 
-			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
+				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
 					     cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0);
 
 				lck_mtx_unlock(cl_transaction_mtxp);
+
+				if (need_wakeup == TRUE)
+					wakeup(bp);
+
 				return 0;
 			}
 			if (cbp->b_flags & B_EOT)
-			        transaction_complete = TRUE;
+				transaction_complete = TRUE;
 		}
 		lck_mtx_unlock(cl_transaction_mtxp);
 
+		if (need_wakeup == TRUE)
+			wakeup(bp);
+
 		if (transaction_complete == FALSE) {
-		        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
+			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END,
 				     cbp_head, 0, 0, 0, 0);
-
 			return 0;
 		}
 	}
@@ -609,7 +647,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
 		 * someone has issued multiple I/Os asynchrounsly
 		 * and is waiting for them to complete (streaming)
 		 */
-		lck_mtx_lock_spin(cl_mtxp);
+		lck_mtx_lock_spin(&iostate->io_mtxp);
 
 	        if (error && iostate->io_error == 0)
 		        iostate->io_error = error;
@@ -624,7 +662,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
 		        iostate->io_wanted = 0;
 			need_wakeup = 1;
 		}
-		lck_mtx_unlock(cl_mtxp);
+		lck_mtx_unlock(&iostate->io_mtxp);
 
 		if (need_wakeup)
 		        wakeup((caddr_t)&iostate->io_wanted);
@@ -649,7 +687,7 @@ cluster_iodone(buf_t bp, void *callback_arg)
 			ubc_upl_commit_range(upl, upl_offset - pg_offset, commit_size, upl_flags);
 		}
 	}
-	if ((b_flags & B_NEED_IODONE) && real_bp) {
+	if (real_bp) {
 		if (error) {
 			real_bp->b_flags |= B_ERROR;
 			real_bp->b_error = error;
@@ -735,27 +773,36 @@ cluster_wait_IO(buf_t cbp_head, int async)
 	        /*
 		 * async callback completion will not normally
 		 * generate a wakeup upon I/O completion...
-		 * by setting BL_WANTED, we will force a wakeup
+		 * by setting B_TWANTED, we will force a wakeup
 		 * to occur as any outstanding I/Os complete... 
-		 * I/Os already completed will have BL_CALLDONE already
-		 * set and we won't block in buf_biowait_callback..
+		 * I/Os already completed will have B_TDONE already
+		 * set and we won't cause us to block
 		 * note that we're actually waiting for the bp to have
 		 * completed the callback function... only then
 		 * can we safely take back ownership of the bp
-		 * need the main buf mutex in order to safely
-		 * update b_lflags
 		 */
-	        buf_list_lock();
+		lck_mtx_lock_spin(cl_transaction_mtxp);
 
 		for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next)
-		      cbp->b_lflags |= BL_WANTED;
+		      cbp->b_flags |= B_TWANTED;
 
-		buf_list_unlock();
+		lck_mtx_unlock(cl_transaction_mtxp);
 	}
 	for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) {
-	        if (async)
-		        buf_biowait_callback(cbp);
-		else
+
+	        if (async) {
+			while (!ISSET(cbp->b_flags, B_TDONE)) {
+
+				lck_mtx_lock_spin(cl_transaction_mtxp);
+
+				if (!ISSET(cbp->b_flags, B_TDONE)) {
+					DTRACE_IO1(wait__start, buf_t, cbp);
+					(void) msleep(cbp, cl_transaction_mtxp, PDROP | (PRIBIO+1), "cluster_wait_IO", NULL);
+					DTRACE_IO1(wait__done, buf_t, cbp);
+				} else
+					lck_mtx_unlock(cl_transaction_mtxp);
+			}
+		} else
 		        buf_biowait(cbp);
 	}
 }
@@ -781,7 +828,7 @@ cluster_complete_transaction(buf_t *cbp_head, void *callback_arg, int *retval, i
 	 * so that cluster_iodone sees the transaction as completed
 	 */
 	for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next)
-	        cbp->b_flags |= B_TDONE;
+		cbp->b_flags |= B_TDONE;
 
 	error = cluster_iodone(*cbp_head, callback_arg);
 
@@ -910,10 +957,9 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 			else {
 			        u_int max_cluster;
 				u_int max_cluster_size;
-				u_int max_prefetch;
-				
+				u_int scale;
+
 				max_cluster_size = MAX_CLUSTER_SIZE(vp);
-				max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ));
 
 				if (max_iosize > max_cluster_size)
 				        max_cluster = max_cluster_size;
@@ -922,8 +968,16 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 
 				if (size < max_cluster)
 				        max_cluster = size;
+				
+				if ((vp->v_mount->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd)
+					scale = WRITE_THROTTLE_SSD;
+				else
+					scale = WRITE_THROTTLE;
 
-			        async_throttle = min(IO_SCALE(vp, VNODE_ASYNC_THROTTLE), (max_prefetch / max_cluster) - 1);
+				if (flags & CL_CLOSE)
+					scale += MAX_CLUSTERS;
+
+			        async_throttle = min(IO_SCALE(vp, VNODE_ASYNC_THROTTLE), ((scale * max_cluster_size) / max_cluster) - 1);
 			}
 		}
 	}
@@ -935,12 +989,14 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 		io_flags |= B_IOSTREAMING;
 	if (flags & CL_COMMIT)
 	        io_flags |= B_COMMIT_UPL;
-	if (flags & CL_PRESERVE)
+	if (flags & CL_DIRECT_IO)
 	        io_flags |= B_PHYS;
-	if (flags & CL_KEEPCACHED)
-	        io_flags |= B_CACHE;
+	if (flags & (CL_PRESERVE | CL_KEEPCACHED))
+		io_flags |= B_CACHE;
 	if (flags & CL_PASSIVE)
 	        io_flags |= B_PASSIVE;
+	if (flags & CL_ENCRYPTED)
+		io_flags |= B_ENCRYPTED_IO;	
 	if (vp->v_flag & VSYSTEM)
 	        io_flags |= B_META;
 
@@ -997,7 +1053,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 		        off_t	e_offset;
 			int	pageout_flags;
 
-			if(upl_get_internal_vectorupl(upl))
+			if (upl_get_internal_vectorupl(upl))
 				panic("Vector UPLs should not take this code-path\n");
 		        /*
 			 * we're writing into a 'hole'
@@ -1104,7 +1160,6 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 			}
 			if (vnode_pageout(vp, upl, trunc_page(upl_offset), trunc_page_64(f_offset), PAGE_SIZE, pageout_flags, NULL) != PAGER_SUCCESS) {
 			        error = EINVAL;
-			 	break;
 			}
 			e_offset = round_page_64(f_offset + 1);
 			io_size = e_offset - f_offset;
@@ -1133,6 +1188,11 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 				 */
 			        size = 0;
 			}
+			if (error) {
+				if (size == 0)
+					flags &= ~CL_COMMIT;
+			 	break;
+			}
 			continue;
 		}
 		lblkno = (daddr64_t)(f_offset / PAGE_SIZE_64);
@@ -1370,10 +1430,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 		        cbp_head = cbp;
 			cbp_tail = cbp;
 
-			if ( (cbp_head->b_real_bp = real_bp) ) {
-			        cbp_head->b_flags |= B_NEED_IODONE;
+			if ( (cbp_head->b_real_bp = real_bp) )
 				real_bp = (buf_t)NULL;
-			}
 		}
 		*(buf_t *)(&cbp->b_trans_head) = cbp_head;
 
@@ -1479,7 +1537,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 			 * since we never really issued the io
 			 * just go ahead and adjust it back
 			 */
-		        lck_mtx_lock_spin(cl_mtxp);
+		        lck_mtx_lock_spin(&iostate->io_mtxp);
 
 		        if (iostate->io_error == 0)
 			        iostate->io_error = error;
@@ -1493,7 +1551,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no
 			        iostate->io_wanted = 0;
 				need_wakeup = 1;
 			}
-		        lck_mtx_unlock(cl_mtxp);
+		        lck_mtx_unlock(&iostate->io_mtxp);
 
 			if (need_wakeup)
 			        wakeup((caddr_t)&iostate->io_wanted);
@@ -1604,8 +1662,16 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct
 
 		return;
 	}
-	max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ));
+	max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ), (vp->v_mount->mnt_kern_flag & MNTK_SSD));
 
+	if ((max_prefetch / PAGE_SIZE) > speculative_prefetch_max)
+		max_prefetch = (speculative_prefetch_max * PAGE_SIZE);
+
+	if (max_prefetch <= PAGE_SIZE) {
+		KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END,
+			     rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 6, 0);
+		return;
+	}
 	if (extent->e_addr < rap->cl_maxra) {
 	        if ((rap->cl_maxra - extent->e_addr) > ((max_prefetch / PAGE_SIZE) / 4)) {
 
@@ -1667,18 +1733,7 @@ cluster_pageout_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offs
         off_t         max_size;
 	int           local_flags;
 
-	if (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
-	        /*
-		 * if we know we're issuing this I/O to a virtual device (i.e. disk image)
-		 * then we don't want to enforce this throttle... if we do, we can 
-		 * potentially deadlock since we're stalling the pageout thread at a time
-		 * when the disk image might need additional memory (which won't be available
-		 * if the pageout thread can't run)... instead we'll just depend on the throttle
-		 * that the pageout thread now has in place to deal with external files
-		 */
-	        local_flags = CL_PAGEOUT;
-	else
-	        local_flags = CL_PAGEOUT | CL_THROTTLE;
+	local_flags = CL_PAGEOUT | CL_THROTTLE;
 
 	if ((flags & UPL_IOSYNC) == 0) 
 		local_flags |= CL_ASYNC;
@@ -1686,6 +1741,8 @@ cluster_pageout_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offs
 		local_flags |= CL_COMMIT;
 	if ((flags & UPL_KEEPCACHED))
 	        local_flags |= CL_KEEPCACHED;
+	if (flags & UPL_PAGING_ENCRYPTED)
+		local_flags |= CL_ENCRYPTED;
 
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE,
@@ -1762,6 +1819,8 @@ cluster_pagein_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offse
 		local_flags |= CL_COMMIT;
 	if (flags & UPL_IOSTREAMING)
 		local_flags |= CL_IOSTREAMING;
+	if (flags & UPL_PAGING_ENCRYPTED)
+		local_flags |= CL_ENCRYPTED;
 
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE,
@@ -1869,12 +1928,12 @@ cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t
 	}
         /*
          * do a write through the cache if one of the following is true....
-         *   NOCACHE is not true and
+         *   NOCACHE is not true or NODIRECT is true
          *   the uio request doesn't target USERSPACE
          * otherwise, find out if we want the direct or contig variant for
          * the first vector in the uio request
          */
-        if ( (flags & IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg) )
+	if ( ((flags & (IO_NOCACHE | IO_NODIRECT)) == IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg) )
 	        retval = cluster_io_type(uio, &write_type, &write_length, MIN_DIRECT_WRITE_SIZE);
 
         if ( (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL)) && write_type == IO_DIRECT)
@@ -2027,6 +2086,8 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in
 	iostate.io_error = 0;
 	iostate.io_wanted = 0;
 
+	lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+
 	mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask;
 	devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize;
 
@@ -2207,23 +2268,9 @@ next_dwrite:
 		 * if there are already too many outstanding writes
 		 * wait until some complete before issuing the next
 		 */
-		if (iostate.io_issued > iostate.io_completed) {
-
-			lck_mtx_lock(cl_mtxp);
-
-			while ((iostate.io_issued - iostate.io_completed) > (max_upl_size * IO_SCALE(vp, 2))) {
-
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-					     iostate.io_issued, iostate.io_completed, max_upl_size * IO_SCALE(vp, 2), 0, 0);
-
-				iostate.io_wanted = 1;
-				msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL);
+		if (iostate.io_issued > iostate.io_completed)
+			cluster_iostate_wait(&iostate, max_upl_size * IO_SCALE(vp, 2), "cluster_write_direct");
 
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-					     iostate.io_issued, iostate.io_completed, max_upl_size * IO_SCALE(vp, 2), 0, 0);
-			}	
-			lck_mtx_unlock(cl_mtxp);
-		}
 		if (iostate.io_error) {
 		        /*
 			 * one of the earlier writes we issued ran into a hard error
@@ -2303,7 +2350,7 @@ next_dwrite:
 
 wait_for_dwrites:
 
-	if(retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
+	if (retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) {
 		retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg);
 		reset_vector_run_state();	
 	}
@@ -2313,23 +2360,13 @@ wait_for_dwrites:
 		 * make sure all async writes issued as part of this stream
 		 * have completed before we return
 		 */
-	        lck_mtx_lock(cl_mtxp);
-
-		while (iostate.io_issued != iostate.io_completed) {
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-
-		        iostate.io_wanted = 1;
-			msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL);
-
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-		}	
-		lck_mtx_unlock(cl_mtxp);
+		cluster_iostate_wait(&iostate, 0, "cluster_write_direct");
 	}
 	if (iostate.io_error)
 	        retval = iostate.io_error;
 
+	lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+
 	if (io_req_size && retval == 0) {
 	        /*
 		 * we couldn't handle the tail of this request in DIRECT mode
@@ -2392,6 +2429,8 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type,
         iostate.io_error = 0;
         iostate.io_wanted = 0;
 
+	lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+
 next_cwrite:
 	io_size = *write_length;
 
@@ -2480,22 +2519,9 @@ next_cwrite:
 		 * if there are already too many outstanding writes
 		 * wait until some have completed before issuing the next
 		 */
-		if (iostate.io_issued > iostate.io_completed) {
-		        lck_mtx_lock(cl_mtxp);
-
-			while ((iostate.io_issued - iostate.io_completed) > (MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2))) {
+		if (iostate.io_issued > iostate.io_completed)
+			cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_write_contig");
 
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-					     iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0);
-
-			        iostate.io_wanted = 1;
-				msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL);
-
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-					     iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0);
-			}
-			lck_mtx_unlock(cl_mtxp);
-		}
                 if (iostate.io_error) {
                         /*
                          * one of the earlier writes we issued ran into a hard error
@@ -2539,25 +2565,14 @@ wait_for_cwrites:
          * make sure all async writes that are part of this stream
          * have completed before we proceed
          */
-	if (iostate.io_issued > iostate.io_completed) {
-		
-		lck_mtx_lock(cl_mtxp);
-
-		while (iostate.io_issued != iostate.io_completed) {
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-
-			iostate.io_wanted = 1;
-			msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL);
+	if (iostate.io_issued > iostate.io_completed)
+		cluster_iostate_wait(&iostate, 0, "cluster_write_contig");
 
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-		}
-		lck_mtx_unlock(cl_mtxp);
-	}
         if (iostate.io_error)
 	        error = iostate.io_error;
 
+	lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+
 	if (error == 0 && tail_size)
 	        error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, 0, callback, callback_arg);
 
@@ -2632,6 +2647,9 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
 	off_t            zero_off;
 	long long        zero_cnt1;
 	off_t            zero_off1;
+	off_t		 write_off = 0;
+	int		 write_cnt = 0;
+	boolean_t	 first_pass = FALSE;
 	struct cl_extent cl;
 	struct cl_writebehind *wbp;
 	int              bflag;
@@ -2713,7 +2731,16 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
 			     retval, 0, 0, 0, 0);
 		return (0);
 	}
-
+	if (uio) {
+		write_off = uio->uio_offset;
+		write_cnt = uio_resid(uio);
+		/*
+		 * delay updating the sequential write info
+		 * in the control block until we've obtained
+		 * the lock for it
+		 */
+		first_pass = TRUE;
+	}
 	while ((total_size = (io_resid + zero_cnt + zero_cnt1)) && retval == 0) {
 	        /*
 		 * for this iteration of the loop, figure out where our starting point is
@@ -3008,7 +3035,7 @@ check_cluster:
 				 */
 				wbp->cl_number = 0;
 
-				sparse_cluster_push(&(wbp->cl_scmap), vp, newEOF, PUSH_ALL, callback, callback_arg);
+				sparse_cluster_push(&(wbp->cl_scmap), vp, newEOF, PUSH_ALL, 0, callback, callback_arg);
 				/*
 				 * no clusters of either type present at this point
 				 * so just go directly to start_new_cluster since
@@ -3017,7 +3044,17 @@ check_cluster:
 				 * to avoid the deadlock with sparse_cluster_push
 				 */
 				goto start_new_cluster;
-			}		    
+			}
+			if (first_pass) {
+				if (write_off == wbp->cl_last_write)
+					wbp->cl_seq_written += write_cnt;
+				else
+					wbp->cl_seq_written = write_cnt;
+
+				wbp->cl_last_write = write_off + write_cnt;
+
+				first_pass = FALSE;
+			}
 			if (wbp->cl_number == 0)
 			        /*
 				 * no clusters currently present
@@ -3132,14 +3169,27 @@ check_cluster:
 				 */
 			        goto delay_io;
 
-			if (wbp->cl_number < MAX_CLUSTERS)
+			if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE) &&
+			    wbp->cl_number == MAX_CLUSTERS &&
+			    wbp->cl_seq_written >= (MAX_CLUSTERS * (max_cluster_pgcount * PAGE_SIZE))) {
+				uint32_t	n;
+
+				if (vp->v_mount->mnt_kern_flag & MNTK_SSD)
+					n = WRITE_BEHIND_SSD;
+				else
+					n = WRITE_BEHIND;
+
+				while (n--)
+					cluster_try_push(wbp, vp, newEOF, 0, 0, callback, callback_arg);
+			}
+			if (wbp->cl_number < MAX_CLUSTERS) {
 			        /*
 				 * we didn't find an existing cluster to
 				 * merge into, but there's room to start
 				 * a new one
 				 */
 			        goto start_new_cluster;
-
+			}
 			/*
 			 * no exisitng cluster to merge with and no
 			 * room to start a new one... we'll try 
@@ -3157,7 +3207,7 @@ check_cluster:
 			 */
 			if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE)) {
 				
-				ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, callback, callback_arg);
+				ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, (flags & IO_NOCACHE) ? 0 : PUSH_DELAY, 0, callback, callback_arg);
 			}
 
 			/*
@@ -3176,18 +3226,6 @@ check_cluster:
 
 				continue;
 			}
-			/*
-			 * we pushed one cluster successfully, so we must be sequentially writing this file
-			 * otherwise, we would have failed and fallen into the sparse cluster support
-			 * so let's take the opportunity to push out additional clusters...
-			 * this will give us better I/O locality if we're in a copy loop
-			 * (i.e.  we won't jump back and forth between the read and write points
-			 */
-			if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE)) {
-			        while (wbp->cl_number)
-				        cluster_try_push(wbp, vp, newEOF, 0, callback, callback_arg);
-			}
-
 start_new_cluster:
 			wbp->cl_clusters[wbp->cl_number].b_addr = cl.b_addr;
 			wbp->cl_clusters[wbp->cl_number].e_addr = cl.e_addr;
@@ -3342,19 +3380,25 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 	struct cl_extent	extent;
 	int              bflag;
 	int		 take_reference = 1;
+#if CONFIG_EMBEDDED
 	struct uthread  *ut;
+#endif /* CONFIG_EMBEDDED */
 	int		 policy = IOPOL_DEFAULT;
-
+	boolean_t	 iolock_inited = FALSE;
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START,
 		     (int)uio->uio_offset, io_req_size, (int)filesize, flags, 0);
 			 
+#if !CONFIG_EMBEDDED
+	policy = proc_get_task_selfdiskacc();
+#else /* !CONFIG_EMBEDDED */
 	policy = current_proc()->p_iopol_disk;
 
 	ut = get_bsdthread_info(current_thread());
 
 	if (ut->uu_iopol_disk != IOPOL_DEFAULT)
 		policy = ut->uu_iopol_disk;
+#endif /* !CONFIG_EMBEDDED */
 
 	if (policy == IOPOL_THROTTLE || (flags & IO_NOCACHE))
 		take_reference = 0;
@@ -3365,7 +3409,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 		bflag = 0;
 
 	max_io_size = cluster_max_io_size(vp->v_mount, CL_READ);
-	max_prefetch = MAX_PREFETCH(vp, max_io_size);
+	max_prefetch = MAX_PREFETCH(vp, max_io_size, (vp->v_mount->mnt_kern_flag & MNTK_SSD));
 	max_rd_size = max_prefetch;
 
 	last_request_offset = uio->uio_offset + io_req_size;
@@ -3464,7 +3508,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 
 				io_requested = io_resid;
 
-			        retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_resid, 0, last_ioread_offset == 0 ? take_reference : 0);
+			        retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_resid, 0, take_reference);
 
 				xsize = io_requested - io_resid;
 
@@ -3576,6 +3620,11 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 			 * we may have to clip the size of it to keep from reading past
 			 * the end of the last physical block associated with the file
 			 */
+			if (iolock_inited == FALSE) {
+				lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+
+				iolock_inited = TRUE;
+			}
 			upl_offset = start_pg * PAGE_SIZE;
 			io_size    = (last_pg - start_pg) * PAGE_SIZE;
 
@@ -3588,6 +3637,18 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 
 			error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset,
 					   io_size, CL_READ | CL_ASYNC | bflag, (buf_t)NULL, &iostate, callback, callback_arg);
+
+			if (rap) {
+                                if (extent.e_addr < rap->cl_maxra) {
+                                       /*
+                                        * we've just issued a read for a block that should have been
+                                        * in the cache courtesy of the read-ahead engine... something
+                                        * has gone wrong with the pipeline, so reset the read-ahead
+                                        * logic which will cause us to restart from scratch
+                                        */
+                                        rap->cl_maxra = 0;
+                               }
+                        }
 		}
 		if (error == 0) {
 		        /*
@@ -3666,22 +3727,9 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 					rap->cl_lastr = extent.e_addr;
 				}
 			}
-			if (iostate.io_issued > iostate.io_completed) {
+			if (iostate.io_issued > iostate.io_completed)
+				cluster_iostate_wait(&iostate, 0, "cluster_read_copy");
 
-				lck_mtx_lock(cl_mtxp);
-
-				while (iostate.io_issued != iostate.io_completed) {
-					KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-						     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-
-					iostate.io_wanted = 1;
-					msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_copy", NULL);
-
-					KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-						     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-				}	
-				lck_mtx_unlock(cl_mtxp);
-			}
 			if (iostate.io_error)
 			        error = iostate.io_error;
 			else {
@@ -3693,6 +3741,9 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 				
 				io_req_size -= (val_size - io_requested);
 			}
+		} else {
+			if (iostate.io_issued > iostate.io_completed)
+				cluster_iostate_wait(&iostate, 0, "cluster_read_copy");
 		}
 		if (start_pg < last_pg) {
 		        /*
@@ -3773,6 +3824,20 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file
 			}
 		}
 	}
+	if (iolock_inited == TRUE) {
+		if (iostate.io_issued > iostate.io_completed) {
+			/*
+			 * cluster_io returned an error after it
+			 * had already issued some I/O.  we need
+			 * to wait for that I/O to complete before
+			 * we can destroy the iostate mutex...
+			 * 'retval' already contains the early error
+			 * so no need to pick it up from iostate.io_error
+			 */
+			cluster_iostate_wait(&iostate, 0, "cluster_read_copy");
+		}
+		lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+	}
 	if (rap != NULL) {
 	        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END,
 			     (int)uio->uio_offset, io_req_size, rap->cl_lastr, retval, 0);
@@ -3819,6 +3884,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
 	u_int32_t	 max_upl_size;
 	u_int32_t        max_rd_size;
 	u_int32_t        max_rd_ahead;
+	boolean_t	 strict_uncached_IO = FALSE;
 
 	u_int32_t	 vector_upl_iosize = 0;
 	int		 issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1);
@@ -3835,6 +3901,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
 	max_rd_ahead = max_rd_size * IO_SCALE(vp, 2);
 
 	io_flag = CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO | CL_DIRECT_IO;
+
 	if (flags & IO_PASSIVE)
 		io_flag |= CL_PASSIVE;
 
@@ -3843,6 +3910,8 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
 	iostate.io_error = 0;
 	iostate.io_wanted = 0;
 
+	lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+
 	devblocksize = (u_int32_t)vp->v_mount->mnt_devblocksize;
 	mem_alignment_mask = (u_int32_t)vp->v_mount->mnt_alignmentmask;
 
@@ -3862,6 +3931,9 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
                 */
                devblocksize = PAGE_SIZE;
 	}
+
+	strict_uncached_IO = ubc_strict_uncached_IO(vp);
+
 next_dread:
 	io_req_size = *read_length;
 	iov_base = uio_curriovbase(uio);
@@ -3913,8 +3985,9 @@ next_dread:
 		 * cluster_copy_ubc_data returns the resid
 		 * in io_size
 		 */
-		retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_size, 0, 0);
-			
+		if (strict_uncached_IO == FALSE) {
+			retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_size, 0, 0);
+		}
 		/*
 		 * calculate the number of bytes actually copied
 		 * starting size - residual
@@ -3991,21 +4064,26 @@ next_dread:
 			 */
 		        goto wait_for_dreads;
 		}
-		if ((xsize = io_size) > max_rd_size)
-		        xsize = max_rd_size;
 
-		io_size = 0;
+		if (strict_uncached_IO == FALSE) {
 
-		ubc_range_op(vp, uio->uio_offset, uio->uio_offset + xsize, UPL_ROP_ABSENT, (int *)&io_size);
+			if ((xsize = io_size) > max_rd_size)
+		        	xsize = max_rd_size;
 
-		if (io_size == 0) {
-			/*
-			 * a page must have just come into the cache
-			 * since the first page in this range is no
-			 * longer absent, go back and re-evaluate
-			 */
-		        continue;
+			io_size = 0;
+
+			ubc_range_op(vp, uio->uio_offset, uio->uio_offset + xsize, UPL_ROP_ABSENT, (int *)&io_size);
+
+			if (io_size == 0) {
+				/*
+				 * a page must have just come into the cache
+				 * since the first page in this range is no
+				 * longer absent, go back and re-evaluate
+				 */
+				continue;
+			}
 		}
+
 		iov_base = uio_curriovbase(uio);
 
 		upl_offset = (vm_offset_t)((u_int32_t)iov_base & PAGE_MASK);
@@ -4097,22 +4175,9 @@ next_dread:
 		 * if there are already too many outstanding reads
 		 * wait until some have completed before issuing the next read
 		 */
-		if (iostate.io_issued > iostate.io_completed) {
+		if (iostate.io_issued > iostate.io_completed)
+			cluster_iostate_wait(&iostate, max_rd_ahead, "cluster_read_direct");
 
-			lck_mtx_lock(cl_mtxp);
-
-			while ((iostate.io_issued - iostate.io_completed) > max_rd_ahead) {
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-					     iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0);
-
-				iostate.io_wanted = 1;
-				msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL);
-
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-					     iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0);
-			}	
-			lck_mtx_unlock(cl_mtxp);
-		}
 		if (iostate.io_error) {
 		        /*
 			 * one of the earlier reads we issued ran into a hard error
@@ -4191,25 +4256,14 @@ wait_for_dreads:
 	 * make sure all async reads that are part of this stream
 	 * have completed before we return
 	 */
-	if (iostate.io_issued > iostate.io_completed) {
+	if (iostate.io_issued > iostate.io_completed)
+		cluster_iostate_wait(&iostate, 0, "cluster_read_direct");
 
-	        lck_mtx_lock(cl_mtxp);
-
-		while (iostate.io_issued != iostate.io_completed) {
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-
-		        iostate.io_wanted = 1;
-			msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL);
-
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-		}	
-		lck_mtx_unlock(cl_mtxp);
-	}
 	if (iostate.io_error)
 	        retval = iostate.io_error;
 
+	lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+
 	if (io_req_size && retval == 0) {
 	        /*
 		 * we couldn't handle the tail of this request in DIRECT mode
@@ -4273,6 +4327,8 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type,
 	iostate.io_error = 0;
 	iostate.io_wanted = 0;
 
+	lck_mtx_init(&iostate.io_mtxp, cl_mtx_grp, cl_mtx_attr);
+
 next_cread:
 	io_size = *read_length;
 
@@ -4370,21 +4426,9 @@ next_cread:
 		 * if there are already too many outstanding reads
 		 * wait until some have completed before issuing the next
 		 */
-		if (iostate.io_issued > iostate.io_completed) {
-		        lck_mtx_lock(cl_mtxp);
-
-			while ((iostate.io_issued - iostate.io_completed) > (MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2))) {
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-					     iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0);
-
-			        iostate.io_wanted = 1;
-				msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL);
+		if (iostate.io_issued > iostate.io_completed)
+			cluster_iostate_wait(&iostate, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), "cluster_read_contig");
 
-				KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-					     iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0);
-			}	
-			lck_mtx_unlock(cl_mtxp);
-		}
 		if (iostate.io_error) {
 		        /*
 			 * one of the earlier reads we issued ran into a hard error
@@ -4425,25 +4469,14 @@ wait_for_creads:
 	 * make sure all async reads that are part of this stream
 	 * have completed before we proceed
 	 */
-	if (iostate.io_issued > iostate.io_completed) {
-
-		lck_mtx_lock(cl_mtxp);
-
-		while (iostate.io_issued != iostate.io_completed) {
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-
-			iostate.io_wanted = 1;
-			msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL);
+	if (iostate.io_issued > iostate.io_completed)
+		cluster_iostate_wait(&iostate, 0, "cluster_read_contig");
 
-			KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END,
-				     iostate.io_issued, iostate.io_completed, 0, 0, 0);
-		}	
-		lck_mtx_unlock(cl_mtxp);
-	}
 	if (iostate.io_error)
 	        error = iostate.io_error;
 
+	lck_mtx_destroy(&iostate.io_mtxp, cl_mtx_grp);
+
 	if (error == 0 && tail_size)
 	        error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, CL_READ, callback, callback_arg);
 
@@ -4787,7 +4820,7 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca
 
 			lck_mtx_unlock(&wbp->cl_lockw);
 
-			sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg);
+			sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg);
 
 			lck_mtx_lock(&wbp->cl_lockw);
 
@@ -4796,11 +4829,11 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca
 			if (wbp->cl_sparse_wait && wbp->cl_sparse_pushes == 0)
 				wakeup((caddr_t)&wbp->cl_sparse_pushes);
 		} else {
-			sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg);
+			sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg);
 		}
 		retval = 1;
 	} else  {
-		retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg);
+		retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg);
 	}
 	lck_mtx_unlock(&wbp->cl_lockw);
 
@@ -4861,7 +4894,7 @@ cluster_release(struct ubc_info *ubc)
 
 
 static int
-cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int (*callback)(buf_t, void *), void *callback_arg)
+cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg)
 {
         int cl_index;
 	int cl_index1;
@@ -4944,15 +4977,15 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
 	        int	flags;
 		struct	cl_extent cl;
 
+		flags = io_flags & (IO_PASSIVE|IO_CLOSE);
+
 	        /*
 		 * try to push each cluster in turn...
 		 */
 		if (l_clusters[cl_index].io_flags & CLW_IONOCACHE)
-		        flags = IO_NOCACHE;
-		else
-		        flags = 0;
+		        flags |= IO_NOCACHE;
 
-		if ((l_clusters[cl_index].io_flags & CLW_IOPASSIVE) || (push_flag & IO_PASSIVE))
+		if (l_clusters[cl_index].io_flags & CLW_IOPASSIVE)
 		        flags |= IO_PASSIVE;
 
 		if (push_flag & PUSH_SYNC)
@@ -5057,9 +5090,9 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags, int (*c
 	kern_return_t    kret;
 
 	if (flags & IO_PASSIVE)
-	    bflag = CL_PASSIVE;
+		bflag = CL_PASSIVE;
 	else
-	    bflag = 0;
+		bflag = 0;
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_START,
 		     (int)cl->b_addr, (int)cl->e_addr, (int)EOF, flags, 0);
@@ -5186,6 +5219,9 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags, int (*c
 		if ( !(flags & IO_SYNC))
 		        io_flags |= CL_ASYNC;
 
+		if (flags & IO_CLOSE)
+		        io_flags |= CL_CLOSE;
+
 		retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size,
 				    io_flags, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg);
 
@@ -5237,7 +5273,7 @@ sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*c
  * from the write-behind context (the cluster_push case), the wb lock is not held
  */
 static void
-sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int (*callback)(buf_t, void *), void *callback_arg)
+sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int io_flags, int (*callback)(buf_t, void *), void *callback_arg)
 {
         struct cl_extent cl;
         off_t		offset;
@@ -5255,7 +5291,7 @@ sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int (*ca
 		cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64);
 		cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64);
 
-		cluster_push_now(vp, &cl, EOF, push_flag & IO_PASSIVE, callback, callback_arg);
+		cluster_push_now(vp, &cl, EOF, io_flags & (IO_PASSIVE|IO_CLOSE), callback, callback_arg);
 
 		if ( !(push_flag & PUSH_ALL) )
 		        break;
@@ -5285,7 +5321,7 @@ sparse_cluster_add(void **scmap, vnode_t vp, struct cl_extent *cl, off_t EOF, in
 		 * only a partial update was done
 		 * push out some pages and try again
 		 */
-	        sparse_cluster_push(scmap, vp, EOF, 0, callback, callback_arg);
+	        sparse_cluster_push(scmap, vp, EOF, 0, 0, callback, callback_arg);
 
 		offset += (new_dirty * PAGE_SIZE_64);
 		length -= (new_dirty * PAGE_SIZE);
@@ -5308,9 +5344,9 @@ cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, u_int32_t
 	int              bflag;
 
 	if (flags & IO_PASSIVE)
-	    bflag = CL_PASSIVE;
+		bflag = CL_PASSIVE;
 	else
-	    bflag = 0;
+		bflag = 0;
 
 	upl_flags = UPL_SET_LITE;
 
@@ -5479,7 +5515,7 @@ cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int m
 	io_size = *io_resid;
 
 	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START,
-		     (int)uio->uio_offset, 0, io_size, 0, 0);
+		     (int)uio->uio_offset, io_size, mark_dirty, take_reference, 0);
 
 	control = ubc_getobject(vp, UBC_FLAGS_NONE);
 
diff --git a/bsd/vfs/vfs_conf.c b/bsd/vfs/vfs_conf.c
index 529129d9c..a4a962b66 100644
--- a/bsd/vfs/vfs_conf.c
+++ b/bsd/vfs/vfs_conf.c
@@ -86,8 +86,8 @@ struct mount *rootfs;
 struct vnode *rootvnode;
 
 #ifdef CONFIG_IMGSRC_ACCESS
-struct vnode *imgsrc_rootvnode;
-#endif /* IMGSRC_ACESS */
+struct vnode *imgsrc_rootvnodes[MAX_IMAGEBOOT_NESTING];	/* [0] -> source volume, [1] -> first disk image */
+#endif /* CONFIG_IMGSRC_ACCESS */
 
 int (*mountroot)(void) = NULL;
 
@@ -102,7 +102,6 @@ extern	struct vfsops nfs_vfsops;
 extern	int nfs_mountroot(void);
 extern	struct vfsops afs_vfsops;
 extern	struct vfsops null_vfsops;
-extern	struct vfsops union_vfsops;
 extern	struct vfsops devfs_vfsops;
 
 /*
@@ -117,7 +116,7 @@ typedef int (*mountroot_t)(mount_t, vnode_t, vfs_context_t);
 static struct vfstable vfstbllist[] = {
 	/* HFS/HFS+ Filesystem */
 #if HFS
-	{ &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2, NULL, 0},
+	{ &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2 | VFC_VFSVNOP_PAGEINV2, NULL, 0},
 #endif
 
 	/* Memory-based Filesystem */
@@ -140,18 +139,6 @@ static struct vfstable vfstbllist[] = {
 #endif
 #endif /* __LP64__ */
 
-	/* Loopback (Minimal) Filesystem Layer */
-#ifndef __LP64__
-#if NULLFS
-	{ &null_vfsops, "loopback", 9, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS , NULL, 0},
-#endif
-#endif /* __LP64__ */
-
-	/* Union (translucent) Filesystem */
-#if UNION
-	{ &union_vfsops, "unionfs", 15, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0},
-#endif
-
 	/* Device Filesystem */
 #if DEVFS
 #if CONFIG_MACF
@@ -214,7 +201,6 @@ extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
 extern struct vnodeopv_desc hfs_specop_opv_desc;
 extern struct vnodeopv_desc hfs_fifoop_opv_desc;
-extern struct vnodeopv_desc union_vnodeop_opv_desc;
 extern struct vnodeopv_desc devfs_vnodeop_opv_desc;
 extern struct vnodeopv_desc devfs_spec_vnodeop_opv_desc;
 #if FDESC
@@ -241,9 +227,6 @@ struct vnodeopv_desc *vfs_opv_descs[] = {
 	&fifo_nfsv4nodeop_opv_desc,
 #endif
 #endif
-#if NULLFS
-	&null_vnodeop_opv_desc,
-#endif
 #if HFS
 	&hfs_vnodeop_opv_desc,
 	&hfs_std_vnodeop_opv_desc,
@@ -252,9 +235,6 @@ struct vnodeopv_desc *vfs_opv_descs[] = {
 	&hfs_fifoop_opv_desc,
 #endif
 #endif
-#if UNION
-	&union_vnodeop_opv_desc,
-#endif
 #if DEVFS
 	&devfs_vnodeop_opv_desc,
 	&devfs_spec_vnodeop_opv_desc,
diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c
index e09f990dc..0132a60dd 100644
--- a/bsd/vfs/vfs_fsevents.c
+++ b/bsd/vfs/vfs_fsevents.c
@@ -206,6 +206,7 @@ fsevents_internal_init(void)
     // ever grow beyond what we initially filled it with
     zone_change(event_zone, Z_EXHAUST, TRUE);
     zone_change(event_zone, Z_COLLECT, FALSE);
+    zone_change(event_zone, Z_CALLERACCT, FALSE);
 }
 
 static void
@@ -1821,6 +1822,11 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio)
 
 	if (watcher->event_list[kfse->type] == FSE_REPORT && watcher_cares_about_dev(watcher, kfse->dev)) {
 
+	    if (last_event_ptr == kfse) {
+		last_event_ptr = NULL;
+		last_event_type = -1;
+		last_coalesced_time = 0;
+	    }
 	    error = copy_out_kfse(watcher, kfse, uio);
 	    if (error != 0) {
 		// if an event won't fit or encountered an error while
@@ -2667,18 +2673,24 @@ get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx)
 	memset(fse, 0, sizeof(fse_info));
 	return -1;
     }
-    
-    fse->ino  = (ino64_t)va.va_fileid;
-    fse->dev  = (dev_t)va.va_fsid;
-    fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode;
-    fse->uid  = (uid_t)va.va_uid;
-    fse->gid  = (gid_t)va.va_gid;
+
+    return vnode_get_fse_info_from_vap(vp, fse, &va);
+}
+
+int
+vnode_get_fse_info_from_vap(vnode_t vp, fse_info *fse, struct vnode_attr *vap) 
+{
+    fse->ino  = (ino64_t)vap->va_fileid;
+    fse->dev  = (dev_t)vap->va_fsid;
+    fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | vap->va_mode;
+    fse->uid  = (uid_t)vap->va_uid;
+    fse->gid  = (gid_t)vap->va_gid;
     if (vp->v_flag & VISHARDLINK) {
 	fse->mode |= FSE_MODE_HLINK;
 	if (vp->v_type == VDIR) {
-	    fse->nlink = (uint64_t)va.va_dirlinkcount;
+	    fse->nlink = (uint64_t)vap->va_dirlinkcount;
 	} else {
-	    fse->nlink = (uint64_t)va.va_nlink;
+	    fse->nlink = (uint64_t)vap->va_nlink;
 	}
     }    
 
diff --git a/bsd/vfs/vfs_fslog.c b/bsd/vfs/vfs_fslog.c
index 618e4546c..580ea60b4 100644
--- a/bsd/vfs/vfs_fslog.c
+++ b/bsd/vfs/vfs_fslog.c
@@ -42,6 +42,8 @@
 #include <sys/fslog.h>
 #include <sys/mount_internal.h>
 
+#include <uuid/uuid.h>
+
 /* String to append as format modifier for each key-value pair */
 #define FSLOG_KEYVAL_FMT	"[%s %s] " 
 #define FSLOG_KEYVAL_FMT_LEN	(sizeof(FSLOG_KEYVAL_FMT) - 1)
@@ -341,12 +343,10 @@ static int escape_str(char *str, int len, int buflen)
 void fslog_fs_corrupt(struct mount *mnt)
 {
 	if (mnt != NULL) {
-		if (mnt->mnt_vfsstat.f_mntonname != NULL) {
-			fslog_err(FSLOG_MSG_SINGLE,
-				  FSLOG_KEY_ERR_TYPE, FSLOG_VAL_ERR_TYPE_FS,
-				  FSLOG_KEY_MNTPT, mnt->mnt_vfsstat.f_mntonname,
-				  NULL);
-		}
+		fslog_err(FSLOG_MSG_SINGLE,
+			  FSLOG_KEY_ERR_TYPE, FSLOG_VAL_ERR_TYPE_FS,
+			  FSLOG_KEY_MNTPT, mnt->mnt_vfsstat.f_mntonname,
+			  NULL);
 	}
 		
 	return;
@@ -458,3 +458,73 @@ out:
 
 	return;
 }
+
+static void
+_fslog_extmod_msgtracer_internal(int level, const char *facility, int num_pairs, ...)
+{
+	va_list ap;
+
+	va_start(ap, num_pairs);
+	(void) fslog_asl_msg(level, facility,
+				num_pairs, ap, NULL);
+	va_end(ap);
+}
+
+/* Log information about external modification of a process,
+ * using MessageTracer formatting. Assumes that both the caller
+ * and target are appropriately locked.
+ * Currently prints following information - 
+ * 	1. Caller process name (truncated to 16 characters)
+ *	2. Caller process Mach-O UUID
+ *  3. Target process name (truncated to 16 characters)
+ *  4. Target process Mach-O UUID
+ */
+void
+fslog_extmod_msgtracer(proc_t caller, proc_t target)
+{
+	if ((caller != PROC_NULL) && (target != PROC_NULL)) {
+
+		/*
+		 * Print into buffer large enough for "ThisIsAnApplicat(BC223DD7-B314-42E0-B6B0-C5D2E6638337)",
+		 * including space for escaping, and NUL byte included in sizeof(uuid_string_t).
+		 */
+
+		uuid_string_t uuidstr;
+		char c_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)];
+		char t_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)];
+
+		strlcpy(c_name, caller->p_comm, sizeof(c_name));
+		uuid_unparse_upper(caller->p_uuid, uuidstr);
+		strlcat(c_name, "(", sizeof(c_name));
+		strlcat(c_name, uuidstr, sizeof(c_name));
+		strlcat(c_name, ")", sizeof(c_name));
+		if (0 != escape_str(c_name, strlen(c_name), sizeof(c_name))) {
+			return;
+		}
+
+		strlcpy(t_name, target->p_comm, sizeof(t_name));
+		uuid_unparse_upper(target->p_uuid, uuidstr);
+		strlcat(t_name, "(", sizeof(t_name));
+		strlcat(t_name, uuidstr, sizeof(t_name));
+		strlcat(t_name, ")", sizeof(t_name));
+		if (0 != escape_str(t_name, strlen(t_name), sizeof(t_name))) {
+			return;
+		}
+
+#if DEBUG
+		printf("EXTMOD: %s(%d) -> %s(%d)\n",
+			   c_name,
+			   proc_pid(caller),
+			   t_name,
+			   proc_pid(target));
+#endif
+
+		_fslog_extmod_msgtracer_internal(LOG_DEBUG, "messagetracer",
+							4,
+							"com.apple.message.domain", "com.apple.kernel.external_modification", /* 0 */
+							"com.apple.message.signature", c_name, /* 1 */
+							"com.apple.message.signature2", t_name, /* 2 */
+							"com.apple.message.result", "noop", /* 3 */
+							NULL);
+	}
+}
diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c
index 253bbcd77..2c83c4725 100644
--- a/bsd/vfs/vfs_init.c
+++ b/bsd/vfs/vfs_init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -261,6 +261,12 @@ lck_grp_t * vnode_lck_grp;
 lck_grp_attr_t * vnode_lck_grp_attr;
 lck_attr_t * vnode_lck_attr;
 
+#if CONFIG_TRIGGERS
+/* vars for vnode trigger resolver */
+lck_grp_t * trigger_vnode_lck_grp;
+lck_grp_attr_t * trigger_vnode_lck_grp_attr;
+lck_attr_t * trigger_vnode_lck_attr;
+#endif
 
 /* vars for vnode list lock */
 lck_grp_t * vnode_list_lck_grp;
@@ -289,6 +295,9 @@ lck_mtx_t * mnt_list_mtx_lock;
 lck_mtx_t *pkg_extensions_lck;
 
 struct mount * dead_mountp;
+
+extern void nspace_handler_init(void);
+
 /*
  * Initialize the vnode structures and initialize each file system type.
  */
@@ -324,6 +333,12 @@ vfsinit(void)
 	/* Allocate vnode lock attribute */
 	vnode_lck_attr = lck_attr_alloc_init();
 
+#if CONFIG_TRIGGERS
+	trigger_vnode_lck_grp_attr = lck_grp_attr_alloc_init();
+	trigger_vnode_lck_grp = lck_grp_alloc_init("trigger_vnode", trigger_vnode_lck_grp_attr);
+	trigger_vnode_lck_attr = lck_attr_alloc_init();
+#endif
+
 	/* Allocate fs config lock group attribute and group */
 	fsconf_lck_grp_attr= lck_grp_attr_alloc_init();
 
@@ -373,6 +388,7 @@ vfsinit(void)
 	 */
 	journal_init();
 #endif 
+	nspace_handler_init();
 
 	/*
 	 * Build vnode operation vectors.
diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c
index 0a967aba9..4999f814b 100644
--- a/bsd/vfs/vfs_journal.c
+++ b/bsd/vfs/vfs_journal.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2002-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -51,6 +51,7 @@
 #include <sys/tty.h>
 #include <sys/ubc.h>
 #include <sys/malloc.h>
+#include <kern/task.h>
 #include <kern/thread.h>
 #include <kern/kalloc.h>
 #include <sys/disk.h>
@@ -58,9 +59,36 @@
 #include <miscfs/specfs/specdev.h>
 #include <libkern/OSAtomic.h>	/* OSAddAtomic */
 
-extern task_t kernel_task;
+kern_return_t	thread_terminate(thread_t);
 
-#define DBG_JOURNAL_FLUSH 1
+/*
+ * Set sysctl vfs.generic.jnl.kdebug.trim=1 to enable KERNEL_DEBUG_CONSTANT
+ * logging of trim-related calls within the journal.  (They're
+ * disabled by default because there can be a lot of these events,
+ * and we don't want to overwhelm the kernel debug buffer.  If you
+ * want to watch these events in particular, just set the sysctl.)
+ */
+static int jnl_kdebug = 0;
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, jnl, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Journal");
+SYSCTL_NODE(_vfs_generic_jnl, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Journal kdebug");
+SYSCTL_INT(_vfs_generic_jnl_kdebug, OID_AUTO, trim, CTLFLAG_RW|CTLFLAG_LOCKED, &jnl_kdebug, 0, "Enable kdebug logging for journal TRIM");
+
+#define DBG_JOURNAL_FLUSH			FSDBG_CODE(DBG_JOURNAL, 1)
+#define DBG_JOURNAL_TRIM_ADD		FSDBG_CODE(DBG_JOURNAL, 2)
+#define DBG_JOURNAL_TRIM_REMOVE		FSDBG_CODE(DBG_JOURNAL, 3)
+#define DBG_JOURNAL_TRIM_REMOVE_PENDING	FSDBG_CODE(DBG_JOURNAL, 4)
+#define DBG_JOURNAL_TRIM_REALLOC	FSDBG_CODE(DBG_JOURNAL, 5)
+#define DBG_JOURNAL_TRIM_FLUSH		FSDBG_CODE(DBG_JOURNAL, 6)
+#define DBG_JOURNAL_TRIM_UNMAP		FSDBG_CODE(DBG_JOURNAL, 7)
+
+/* 
+ * Cap the journal max size to 2GB.  On HFS, it will attempt to occupy
+ * a full allocation block if the current size is smaller than the allocation
+ * block on which it resides.  Once we hit the exabyte filesystem range, then
+ * it will use 2GB allocation blocks.  As a result, make the cap 2GB.
+ */
+#define MAX_JOURNAL_SIZE 0x80000000U
 
 #include <sys/sdt.h> /* DTRACE_IO1 */
 #else
@@ -80,6 +108,13 @@ extern task_t kernel_task;
 
 #include "vfs_journal.h"
 
+#include <sys/kdebug.h>
+
+#if 0
+#undef KERNEL_DEBUG
+#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
+#endif
+
 #ifndef CONFIG_HFS_TRIM
 #define CONFIG_HFS_TRIM 0
 #endif
@@ -104,10 +139,10 @@ SYSCTL_UINT (_kern, OID_AUTO, jnl_trim_flush, CTLFLAG_RW, &jnl_trim_flush_limit,
 
 /* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */
 __private_extern__ void qsort(
-    void * array,
-    size_t nmembers,
-    size_t member_size,
-    int (*)(const void *, const void *));
+	void * array,
+	size_t nmembers,
+	size_t member_size,
+	int (*)(const void *, const void *));
 
 
 
@@ -116,8 +151,13 @@ __private_extern__ void qsort(
 //       fields as well as the first entry of binfo[]
 #define BLHDR_CHECKSUM_SIZE 32
 
-
-static int end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg);
+static void lock_condition(journal *jnl, boolean_t *condition, const char *condition_name);
+static void wait_condition(journal *jnl, boolean_t *condition, const char *condition_name);
+static void unlock_condition(journal *jnl, boolean_t *condition);
+static void finish_end_thread(transaction *tr);
+static void write_header_thread(journal *jnl);
+static int finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callback_arg);
+static int end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg, boolean_t drop_lock, boolean_t must_wait);
 static void abort_transaction(journal *jnl, transaction *tr);
 static void dump_journal(journal *jnl);
 
@@ -125,8 +165,8 @@ static __inline__ void  lock_journal(journal *jnl);
 static __inline__ void  unlock_journal(journal *jnl);
 static __inline__ void  lock_oldstart(journal *jnl);
 static __inline__ void  unlock_oldstart(journal *jnl);
-
-
+static __inline__ void  lock_flush(journal *jnl);
+static __inline__ void  unlock_flush(journal *jnl);
 
 
 //
@@ -134,10 +174,10 @@ static __inline__ void  unlock_oldstart(journal *jnl);
 //
 
 typedef struct bucket {
-    off_t     block_num;
-    uint32_t  jnl_offset;
-    uint32_t  block_size;
-    int32_t   cksum;
+	off_t     block_num;
+	uint32_t  jnl_offset;
+	uint32_t  block_size;
+	int32_t   cksum;
 } bucket;
 
 #define STARTING_BUCKETS 256
@@ -149,56 +189,56 @@ static int do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_
 static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr, int overwriting);
 
 #define CHECK_JOURNAL(jnl) \
-    do { \
-    if (jnl == NULL) {\
-	panic("%s:%d: null journal ptr?\n", __FILE__, __LINE__);\
-    }\
-    if (jnl->jdev == NULL) { \
-	panic("%s:%d: jdev is null!\n", __FILE__, __LINE__);\
-    } \
-    if (jnl->fsdev == NULL) { \
-	panic("%s:%d: fsdev is null!\n", __FILE__, __LINE__);\
-    } \
-    if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC) {\
-	panic("%s:%d: jhdr magic corrupted (0x%x != 0x%x)\n",\
-	__FILE__, __LINE__, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC);\
-    }\
-    if (   jnl->jhdr->start <= 0 \
-	|| jnl->jhdr->start > jnl->jhdr->size) {\
-	panic("%s:%d: jhdr start looks bad (0x%llx max size 0x%llx)\n", \
-	__FILE__, __LINE__, jnl->jhdr->start, jnl->jhdr->size);\
-    }\
-    if (   jnl->jhdr->end <= 0 \
-	|| jnl->jhdr->end > jnl->jhdr->size) {\
-	panic("%s:%d: jhdr end looks bad (0x%llx max size 0x%llx)\n", \
-	__FILE__, __LINE__, jnl->jhdr->end, jnl->jhdr->size);\
-    }\
-    } while(0)
+	do {		   \
+	if (jnl == NULL) {					\
+		panic("%s:%d: null journal ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (jnl->jdev == NULL) {				\
+		panic("%s:%d: jdev is null!\n", __FILE__, __LINE__); \
+	}							\
+	if (jnl->fsdev == NULL) {				\
+		panic("%s:%d: fsdev is null!\n", __FILE__, __LINE__);	\
+	}								\
+	if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC) {			\
+		panic("%s:%d: jhdr magic corrupted (0x%x != 0x%x)\n",	\
+		      __FILE__, __LINE__, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC); \
+	}								\
+	if (   jnl->jhdr->start <= 0					\
+	       || jnl->jhdr->start > jnl->jhdr->size) {			\
+		panic("%s:%d: jhdr start looks bad (0x%llx max size 0x%llx)\n", \
+		      __FILE__, __LINE__, jnl->jhdr->start, jnl->jhdr->size); \
+	}								\
+	if (   jnl->jhdr->end <= 0					\
+	       || jnl->jhdr->end > jnl->jhdr->size) {			\
+		panic("%s:%d: jhdr end looks bad (0x%llx max size 0x%llx)\n", \
+		      __FILE__, __LINE__, jnl->jhdr->end, jnl->jhdr->size); \
+	}								\
+	} while(0)
 
 #define CHECK_TRANSACTION(tr) \
-    do {\
-    if (tr == NULL) {\
-	panic("%s:%d: null transaction ptr?\n", __FILE__, __LINE__);\
-    }\
-    if (tr->jnl == NULL) {\
-	panic("%s:%d: null tr->jnl ptr?\n", __FILE__, __LINE__);\
-    }\
-    if (tr->blhdr != (block_list_header *)tr->tbuffer) {\
-	panic("%s:%d: blhdr (%p) != tbuffer (%p)\n", __FILE__, __LINE__, tr->blhdr, tr->tbuffer);\
-    }\
-    if (tr->total_bytes < 0) {\
-	panic("%s:%d: tr total_bytes looks bad: %d\n", __FILE__, __LINE__, tr->total_bytes);\
-    }\
-    if (tr->journal_start < 0) {\
-	panic("%s:%d: tr journal start looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_start);\
-    }\
-    if (tr->journal_end < 0) {\
-	panic("%s:%d: tr journal end looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_end);\
-    }\
-    if (tr->blhdr && (tr->blhdr->max_blocks <= 0 || tr->blhdr->max_blocks > (tr->jnl->jhdr->size/tr->jnl->jhdr->jhdr_size))) {\
-	panic("%s:%d: tr blhdr max_blocks looks bad: %d\n", __FILE__, __LINE__, tr->blhdr->max_blocks);\
-    }\
-    } while(0)
+	do {		      \
+	if (tr == NULL) {					\
+		panic("%s:%d: null transaction ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (tr->jnl == NULL) {						\
+		panic("%s:%d: null tr->jnl ptr?\n", __FILE__, __LINE__); \
+	}								\
+	if (tr->blhdr != (block_list_header *)tr->tbuffer) {		\
+		panic("%s:%d: blhdr (%p) != tbuffer (%p)\n", __FILE__, __LINE__, tr->blhdr, tr->tbuffer); \
+	}								\
+	if (tr->total_bytes < 0) {					\
+		panic("%s:%d: tr total_bytes looks bad: %d\n", __FILE__, __LINE__, tr->total_bytes); \
+	}								\
+	if (tr->journal_start < 0) {					\
+		panic("%s:%d: tr journal start looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_start); \
+	}								\
+	if (tr->journal_end < 0) {					\
+		panic("%s:%d: tr journal end looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_end); \
+	}								\
+	if (tr->blhdr && (tr->blhdr->max_blocks <= 0 || tr->blhdr->max_blocks > (tr->jnl->jhdr->size/tr->jnl->jhdr->jhdr_size))) { \
+		panic("%s:%d: tr blhdr max_blocks looks bad: %d\n", __FILE__, __LINE__, tr->blhdr->max_blocks);	\
+	}								\
+	} while(0)
 
 
 
@@ -210,14 +250,14 @@ static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, of
 static int
 calc_checksum(char *ptr, int len)
 {
-    int i, cksum=0;
+	int i, cksum=0;
 
-    // this is a lame checksum but for now it'll do
-    for(i=0; i < len; i++, ptr++) {
+	// this is a lame checksum but for now it'll do
+	for(i = 0; i < len; i++, ptr++) {
 		cksum = (cksum << 8) ^ (cksum + *(unsigned char *)ptr);
-    }
+	}
 
-    return (~cksum);
+	return (~cksum);
 }
 
 //
@@ -247,6 +287,18 @@ unlock_journal(journal *jnl)
 	lck_mtx_unlock(&jnl->jlock);
 }
 
+static __inline__ void
+lock_flush(journal *jnl)
+{
+	lck_mtx_lock(&jnl->flock);
+}
+
+static __inline__ void
+unlock_flush(journal *jnl)
+{
+	lck_mtx_unlock(&jnl->flock);
+}
+
 static __inline__ void
 lock_oldstart(journal *jnl)
 {
@@ -277,78 +329,80 @@ unlock_oldstart(journal *jnl)
 static size_t
 do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction)
 {
-    int         err, curlen=len;
-    size_t      io_sz = 0;
-    buf_t	bp;
-    off_t 	max_iosize;
+	int	err, curlen=len;
+	size_t	io_sz = 0;
+	buf_t	bp;
+	off_t 	max_iosize;
 
-    if (*offset < 0 || *offset > jnl->jhdr->size) {
+	if (*offset < 0 || *offset > jnl->jhdr->size) {
 		panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size);
-    }
-
-    if (direction & JNL_WRITE)
-	max_iosize = jnl->max_write_size;
-    else if (direction & JNL_READ)
-	max_iosize = jnl->max_read_size;
-    else
-	max_iosize = 128 * 1024;
+	}
+	
+	if (direction & JNL_WRITE)
+		max_iosize = jnl->max_write_size;
+	else if (direction & JNL_READ)
+		max_iosize = jnl->max_read_size;
+	else
+		max_iosize = 128 * 1024;
 
-  again:
-    bp = alloc_io_buf(jnl->jdev, 1);
+again:
+	bp = alloc_io_buf(jnl->jdev, 1);
 
-    if (*offset + (off_t)curlen > jnl->jhdr->size && *offset != 0 && jnl->jhdr->size != 0) {
+	if (*offset + (off_t)curlen > jnl->jhdr->size && *offset != 0 && jnl->jhdr->size != 0) {
 		if (*offset == jnl->jhdr->size) {
 			*offset = jnl->jhdr->jhdr_size;
 		} else {
 			curlen = (off_t)jnl->jhdr->size - *offset;
 		}
-    }
+	}
 
 	if (curlen > max_iosize) {
 		curlen = max_iosize;
 	}
 
-    if (curlen <= 0) {
+	if (curlen <= 0) {
 		panic("jnl: do_jnl_io: curlen == %d, offset 0x%llx len %zd\n", curlen, *offset, len);
-    }
+	}
 
 	if (*offset == 0 && (direction & JNL_HEADER) == 0) {
 		panic("jnl: request for i/o to jnl-header without JNL_HEADER flag set! (len %d, data %p)\n", curlen, data);
 	}
 
-    if (direction & JNL_READ)
-            buf_setflags(bp, B_READ);
-    else {
-            /*
-	     * don't have to set any flags
-	     */
-            vnode_startwrite(jnl->jdev);
-    }
-    buf_setsize(bp, curlen);
-    buf_setcount(bp, curlen);
-    buf_setdataptr(bp, (uintptr_t)data);
-    buf_setblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
-    buf_setlblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
-    if ((direction & JNL_WRITE) && (jnl->flags & JOURNAL_DO_FUA_WRITES)) {
-	buf_markfua(bp);
-    }
+	if (direction & JNL_READ)
+		buf_setflags(bp, B_READ);
+	else {
+		/*
+		 * don't have to set any flags
+		 */
+		vnode_startwrite(jnl->jdev);
+	}
+	buf_setsize(bp, curlen);
+	buf_setcount(bp, curlen);
+	buf_setdataptr(bp, (uintptr_t)data);
+	buf_setblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
+	buf_setlblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size));
+
+	if ((direction & JNL_WRITE) && (jnl->flags & JOURNAL_DO_FUA_WRITES)) {
+		buf_markfua(bp);
+	}
 
-    DTRACE_IO1(journal__start, buf_t, bp);
-    err = VNOP_STRATEGY(bp);
-    if (!err) {
+	DTRACE_IO1(journal__start, buf_t, bp);
+	err = VNOP_STRATEGY(bp);
+	if (!err) {
 		err = (int)buf_biowait(bp);
-    }
-    DTRACE_IO1(journal__done, buf_t, bp);
-    free_io_buf(bp);
+	}
+	DTRACE_IO1(journal__done, buf_t, bp);
+	free_io_buf(bp);
 
-    if (err) {
-	printf("jnl: %s: do_jnl_io: strategy err 0x%x\n", jnl->jdev_name, err);
-	return 0;
-    }
+	if (err) {
+		printf("jnl: %s: do_jnl_io: strategy err 0x%x\n", jnl->jdev_name, err);
+		return 0;
+	}
+
+	*offset += curlen;
+	io_sz   += curlen;
 
-    *offset += curlen;
-    io_sz   += curlen;
-    if (io_sz != len) {
+	if (io_sz != len) {
 		// handle wrap-around
 		data    = (char *)data + curlen;
 		curlen  = len - io_sz;
@@ -356,21 +410,21 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction
 			*offset = jnl->jhdr->jhdr_size;
 		}
 		goto again;
-    }
+	}
 
-    return io_sz;
+	return io_sz;
 }
 
 static size_t
 read_journal_data(journal *jnl, off_t *offset, void *data, size_t len)
 {
-    return do_journal_io(jnl, offset, data, len, JNL_READ);
+	return do_journal_io(jnl, offset, data, len, JNL_READ);
 }
 
 static size_t
 write_journal_data(journal *jnl, off_t *offset, void *data, size_t len)
 {
-    return do_journal_io(jnl, offset, data, len, JNL_WRITE);
+	return do_journal_io(jnl, offset, data, len, JNL_WRITE);
 }
 
 
@@ -383,64 +437,66 @@ read_journal_header(journal *jnl, void *data, size_t len)
 }
 
 static int
-write_journal_header(journal *jnl, int updating_start)
-{
-    static int num_err_prints = 0;
-    int ret=0;
-    off_t jhdr_offset = 0;
-    struct vfs_context context;
-
-    context.vc_thread = current_thread();
-    context.vc_ucred = NOCRED;
-    // 
-    // Flush the track cache if we're not doing force-unit-access
-    // writes.
-    //
-    if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
-	ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
-    }
-    if (ret != 0) {
-	//
-	// Only print this error if it's a different error than the
-	// previous one, or if it's the first time for this device
-	// or if the total number of printfs is less than 25.  We
-	// allow for up to 25 printfs to insure that some make it
-	// into the on-disk syslog.  Otherwise if we only printed
-	// one, it's possible it would never make it to the syslog
-	// for the root volume and that makes debugging hard.
+write_journal_header(journal *jnl, int updating_start, uint32_t sequence_num)
+{
+	static int num_err_prints = 0;
+	int ret=0;
+	off_t jhdr_offset = 0;
+	struct vfs_context context;
+
+	context.vc_thread = current_thread();
+	context.vc_ucred = NOCRED;
+	// 
+	// Flush the track cache if we're not doing force-unit-access
+	// writes.
 	//
-	if (   ret != jnl->last_flush_err
-	    || (jnl->flags & JOURNAL_FLUSHCACHE_ERR) == 0
-	    || num_err_prints++ < 25) {
+	if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
+		ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
+	}
+	if (ret != 0) {
+		//
+		// Only print this error if it's a different error than the
+		// previous one, or if it's the first time for this device
+		// or if the total number of printfs is less than 25.  We
+		// allow for up to 25 printfs to insure that some make it
+		// into the on-disk syslog.  Otherwise if we only printed
+		// one, it's possible it would never make it to the syslog
+		// for the root volume and that makes debugging hard.
+		//
+		if (   ret != jnl->last_flush_err
+		       || (jnl->flags & JOURNAL_FLUSHCACHE_ERR) == 0
+		       || num_err_prints++ < 25) {
 	    
-	    printf("jnl: %s: flushing fs disk buffer returned 0x%x\n", jnl->jdev_name, ret);
+			printf("jnl: %s: flushing fs disk buffer returned 0x%x\n", jnl->jdev_name, ret);
 	    
-	    jnl->flags |= JOURNAL_FLUSHCACHE_ERR;
-	    jnl->last_flush_err = ret;
+			jnl->flags |= JOURNAL_FLUSHCACHE_ERR;
+			jnl->last_flush_err = ret;
+		}
 	}
-    }
 
-    jnl->jhdr->checksum = 0;
-    jnl->jhdr->checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
-    if (do_journal_io(jnl, &jhdr_offset, jnl->header_buf, jnl->jhdr->jhdr_size, JNL_WRITE|JNL_HEADER) != (size_t)jnl->jhdr->jhdr_size) {
-	printf("jnl: %s: write_journal_header: error writing the journal header!\n", jnl->jdev_name);
-	jnl->flags |= JOURNAL_INVALID;
-	return -1;
-    }	
-
-    // If we're not doing force-unit-access writes, then we
-    // have to flush after writing the journal header so that
-    // a future transaction doesn't sneak out to disk before
-    // the header does and thus overwrite data that the old
-    // journal header refers to.  Saw this exact case happen
-    // on an IDE bus analyzer with Larry Barras so while it
-    // may seem obscure, it's not.
-    //
-    if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
-	VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
-    }
+	jnl->jhdr->sequence_num = sequence_num;
+	jnl->jhdr->checksum = 0;
+	jnl->jhdr->checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
 
-    return 0;
+	if (do_journal_io(jnl, &jhdr_offset, jnl->header_buf, jnl->jhdr->jhdr_size, JNL_WRITE|JNL_HEADER) != (size_t)jnl->jhdr->jhdr_size) {
+		printf("jnl: %s: write_journal_header: error writing the journal header!\n", jnl->jdev_name);
+		jnl->flags |= JOURNAL_INVALID;
+		return -1;
+	}	
+
+	// If we're not doing force-unit-access writes, then we
+	// have to flush after writing the journal header so that
+	// a future transaction doesn't sneak out to disk before
+	// the header does and thus overwrite data that the old
+	// journal header refers to.  Saw this exact case happen
+	// on an IDE bus analyzer with Larry Barras so while it
+	// may seem obscure, it's not.
+	//
+	if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) {
+		VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context);
+	}
+
+	return 0;
 }
 
 
@@ -456,18 +512,29 @@ write_journal_header(journal *jnl, int updating_start)
 static void
 free_old_stuff(journal *jnl)
 {
-    transaction *tr, *next;
+	transaction *tr, *next;
+	block_list_header  *blhdr=NULL, *next_blhdr=NULL;
 
-    lock_oldstart(jnl);
-    tr = jnl->tr_freeme;
-    jnl->tr_freeme = NULL;
-    unlock_oldstart(jnl);
+	if (jnl->tr_freeme == NULL)
+		return;
 
-    for(; tr; tr=next) {
-	next = tr->next;
-	FREE_ZONE(tr, sizeof(transaction), M_JNL_TR);
-    }
+	lock_oldstart(jnl);
+	tr = jnl->tr_freeme;
+	jnl->tr_freeme = NULL;
+	unlock_oldstart(jnl);
+
+	for(; tr; tr=next) {
+		for (blhdr = tr->blhdr; blhdr; blhdr = next_blhdr) {
+			next_blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum);
+			blhdr->binfo[0].bnum = 0xdeadc0de;
+		    
+			kmem_free(kernel_map, (vm_offset_t)blhdr, tr->tbuffer_size);
 
+			KERNEL_DEBUG(0xbbbbc01c, jnl, tr, tr->tbuffer_size, 0, 0);
+		}
+		next = tr->next;
+		FREE_ZONE(tr, sizeof(transaction), M_JNL_TR);
+	}
 }
 
 
@@ -481,167 +548,169 @@ free_old_stuff(journal *jnl)
 static void
 buffer_flushed_callback(struct buf *bp, void *arg)
 {
-    transaction  *tr;
-    journal      *jnl;
-    transaction  *ctr, *prev=NULL, *next;
-    size_t        i;
-    int           bufsize, amt_flushed, total_bytes;
+	transaction  *tr;
+	journal      *jnl;
+	transaction  *ctr, *prev=NULL, *next;
+	size_t        i;
+	int           bufsize, amt_flushed, total_bytes;
 
 
-    //printf("jnl: buf flush: bp @ 0x%x l/blkno %qd/%qd vp 0x%x tr @ 0x%x\n",
-    //	   bp, buf_lblkno(bp), buf_blkno(bp), buf_vnode(bp), arg);
+	//printf("jnl: buf flush: bp @ 0x%x l/blkno %qd/%qd vp 0x%x tr @ 0x%x\n",
+	//	   bp, buf_lblkno(bp), buf_blkno(bp), buf_vnode(bp), arg);
 
-    // snarf out the bits we want
-    bufsize = buf_size(bp);
-    tr      = (transaction *)arg;
+	// snarf out the bits we want
+	bufsize = buf_size(bp);
+	tr      = (transaction *)arg;
 
-    // then we've already seen it
-    if (tr == NULL) {
+	// then we've already seen it
+	if (tr == NULL) {
 		return;
-    }
+	}
 
-    CHECK_TRANSACTION(tr);
+	CHECK_TRANSACTION(tr);
 
-    jnl = tr->jnl;
-    if (jnl->flags & JOURNAL_INVALID) {
+	jnl = tr->jnl;
+	if (jnl->flags & JOURNAL_INVALID) {
 		return;
-    }
+	}
 
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
 
-    amt_flushed = tr->num_killed;
-    total_bytes = tr->total_bytes;
+	amt_flushed = tr->num_killed;
+	total_bytes = tr->total_bytes;
     
-    // update the number of blocks that have been flushed.
-    // this buf may represent more than one block so take
-    // that into account.
-    //
-    // OSAddAtomic() returns the value of tr->num_flushed before the add
-    //
-    amt_flushed += OSAddAtomic(bufsize, &tr->num_flushed);
+	// update the number of blocks that have been flushed.
+	// this buf may represent more than one block so take
+	// that into account.
+	//
+	// OSAddAtomic() returns the value of tr->num_flushed before the add
+	//
+	amt_flushed += OSAddAtomic(bufsize, &tr->num_flushed);
 
 
-    // if this transaction isn't done yet, just return as
-    // there is nothing to do.
-    //
-    // NOTE: we are careful to not reference anything through
-    //       the tr pointer after doing the OSAddAtomic().  if
-    //       this if statement fails then we are the last one
-    //       and then it's ok to dereference "tr".
-    //
-    if ((amt_flushed + bufsize) < total_bytes) {
+	// if this transaction isn't done yet, just return as
+	// there is nothing to do.
+	//
+	// NOTE: we are careful to not reference anything through
+	//       the tr pointer after doing the OSAddAtomic().  if
+	//       this if statement fails then we are the last one
+	//       and then it's ok to dereference "tr".
+	//
+	if ((amt_flushed + bufsize) < total_bytes) {
 		return;
-    }
+	}
 
-    // this will single thread checking the transaction
-    lock_oldstart(jnl);
+	// this will single thread checking the transaction
+	lock_oldstart(jnl);
 
-    if (tr->total_bytes == (int)0xfbadc0de) {
-	// then someone beat us to it...
-	unlock_oldstart(jnl);
-	return;
-    }
+	if (tr->total_bytes == (int)0xfbadc0de) {
+		// then someone beat us to it...
+		unlock_oldstart(jnl);
+		return;
+	}
 
-    // mark this so that we're the owner of dealing with the
-    // cleanup for this transaction
-    tr->total_bytes = 0xfbadc0de;
+	// mark this so that we're the owner of dealing with the
+	// cleanup for this transaction
+	tr->total_bytes = 0xfbadc0de;
 
-    //printf("jnl: tr 0x%x (0x%llx 0x%llx) in jnl 0x%x completed.\n",
-    //   tr, tr->journal_start, tr->journal_end, jnl);
+	//printf("jnl: tr 0x%x (0x%llx 0x%llx) in jnl 0x%x completed.\n",
+	//   tr, tr->journal_start, tr->journal_end, jnl);
 
-    // find this entry in the old_start[] index and mark it completed
-    for(i=0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
+	// find this entry in the old_start[] index and mark it completed
+	for(i = 0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
 	
-	if ((off_t)(jnl->old_start[i] & ~(0x8000000000000000ULL)) == tr->journal_start) {
-	    jnl->old_start[i] &= ~(0x8000000000000000ULL);
-	    break;
+		if ((off_t)(jnl->old_start[i] & ~(0x8000000000000000ULL)) == tr->journal_start) {
+			jnl->old_start[i] &= ~(0x8000000000000000ULL);
+			break;
+		}
 	}
-    }
 
-    if (i >= sizeof(jnl->old_start)/sizeof(jnl->old_start[0])) {
-	panic("jnl: buffer_flushed: did not find tr w/start @ %lld (tr %p, jnl %p)\n",
-	    tr->journal_start, tr, jnl);
-    }
+	if (i >= sizeof(jnl->old_start)/sizeof(jnl->old_start[0])) {
+		panic("jnl: buffer_flushed: did not find tr w/start @ %lld (tr %p, jnl %p)\n",
+		      tr->journal_start, tr, jnl);
+	}
 
 
-    // if we are here then we need to update the journal header
-    // to reflect that this transaction is complete
-    if (tr->journal_start == jnl->active_start) {
-	jnl->active_start = tr->journal_end;
-	tr->journal_start = tr->journal_end = (off_t)0;
-    }
+	// if we are here then we need to update the journal header
+	// to reflect that this transaction is complete
+	if (tr->journal_start == jnl->active_start) {
+		jnl->active_start = tr->journal_end;
+		tr->journal_start = tr->journal_end = (off_t)0;
+	}
 
-    // go through the completed_trs list and try to coalesce
-    // entries, restarting back at the beginning if we have to.
-    for(ctr=jnl->completed_trs; ctr; prev=ctr, ctr=next) {
-	if (ctr->journal_start == jnl->active_start) {
-	    jnl->active_start = ctr->journal_end;
-	    if (prev) {
-		prev->next = ctr->next;
-	    }
-	    if (ctr == jnl->completed_trs) {
-		jnl->completed_trs = ctr->next;
-	    }
+	// go through the completed_trs list and try to coalesce
+	// entries, restarting back at the beginning if we have to.
+	for (ctr = jnl->completed_trs; ctr; prev=ctr, ctr=next) {
+		if (ctr->journal_start == jnl->active_start) {
+			jnl->active_start = ctr->journal_end;
+			if (prev) {
+				prev->next = ctr->next;
+			}
+			if (ctr == jnl->completed_trs) {
+				jnl->completed_trs = ctr->next;
+			}
 	    
-	    next           = jnl->completed_trs;   // this starts us over again
-	    ctr->next      = jnl->tr_freeme;
-	    jnl->tr_freeme = ctr;
-	    ctr            = NULL;
-	} else if (tr->journal_end == ctr->journal_start) {
-	    ctr->journal_start = tr->journal_start;
-	    next               = jnl->completed_trs;  // this starts us over again
-	    ctr                = NULL;
-	    tr->journal_start  = tr->journal_end = (off_t)0;
-	} else if (tr->journal_start == ctr->journal_end) {
-	    ctr->journal_end  = tr->journal_end;
-	    next              = ctr->next;
-	    tr->journal_start = tr->journal_end = (off_t)0;
-	} else if (ctr->next && ctr->journal_end == ctr->next->journal_start) {
-	    // coalesce the next entry with this one and link the next
-	    // entry in at the head of the tr_freeme list
-	    next              = ctr->next;           // temporarily use the "next" variable
-	    ctr->journal_end  = next->journal_end;
-	    ctr->next         = next->next;
-	    next->next        = jnl->tr_freeme;      // link in the next guy at the head of the tr_freeme list
-	    jnl->tr_freeme    = next;
-
-	    next              = jnl->completed_trs;  // this starts us over again
-	    ctr               = NULL;
-	} else {
-	    next = ctr->next;
+			next           = jnl->completed_trs;   // this starts us over again
+			ctr->next      = jnl->tr_freeme;
+			jnl->tr_freeme = ctr;
+			ctr            = NULL;
+		} else if (tr->journal_end == ctr->journal_start) {
+			ctr->journal_start = tr->journal_start;
+			next               = jnl->completed_trs;  // this starts us over again
+			ctr                = NULL;
+			tr->journal_start  = tr->journal_end = (off_t)0;
+		} else if (tr->journal_start == ctr->journal_end) {
+			ctr->journal_end  = tr->journal_end;
+			next              = ctr->next;
+			tr->journal_start = tr->journal_end = (off_t)0;
+		} else if (ctr->next && ctr->journal_end == ctr->next->journal_start) {
+			// coalesce the next entry with this one and link the next
+			// entry in at the head of the tr_freeme list
+			next              = ctr->next;           // temporarily use the "next" variable
+			ctr->journal_end  = next->journal_end;
+			ctr->next         = next->next;
+			next->next        = jnl->tr_freeme;      // link in the next guy at the head of the tr_freeme list
+			jnl->tr_freeme    = next;
+
+			next              = jnl->completed_trs;  // this starts us over again
+			ctr               = NULL;
+		} else {
+			next = ctr->next;
+		}
 	}
-    }
     
-    // if this is true then we didn't merge with anyone
-    // so link ourselves in at the head of the completed
-    // transaction list.
-    if (tr->journal_start != 0) {
-	// put this entry into the correct sorted place
-	// in the list instead of just at the head.
-	//
+	// if this is true then we didn't merge with anyone
+	// so link ourselves in at the head of the completed
+	// transaction list.
+	if (tr->journal_start != 0) {
+		// put this entry into the correct sorted place
+		// in the list instead of just at the head.
+		//
 	
-	prev = NULL;
-	for(ctr=jnl->completed_trs; ctr && tr->journal_start > ctr->journal_start; prev=ctr, ctr=ctr->next) {
-	    // just keep looping
-	}
+		prev = NULL;
+		for (ctr = jnl->completed_trs; ctr && tr->journal_start > ctr->journal_start; prev=ctr, ctr=ctr->next) {
+			// just keep looping
+		}
 
-	if (ctr == NULL && prev == NULL) {
-	    jnl->completed_trs = tr;
-	    tr->next = NULL;
-	} else if (ctr == jnl->completed_trs) {
-	    tr->next = jnl->completed_trs;
-	    jnl->completed_trs = tr;
+		if (ctr == NULL && prev == NULL) {
+			jnl->completed_trs = tr;
+			tr->next = NULL;
+		} else if (ctr == jnl->completed_trs) {
+			tr->next = jnl->completed_trs;
+			jnl->completed_trs = tr;
+		} else {
+			tr->next = prev->next;
+			prev->next = tr;
+		}
 	} else {
-	    tr->next = prev->next;
-	    prev->next = tr;
-	}
-    } else {
-	// if we're here this tr got merged with someone else so
-	// put it on the list to be free'd
-	tr->next       = jnl->tr_freeme;
-	jnl->tr_freeme = tr;
-    }
-    unlock_oldstart(jnl);
+		// if we're here this tr got merged with someone else so
+		// put it on the list to be free'd
+		tr->next       = jnl->tr_freeme;
+		jnl->tr_freeme = tr;
+	}
+	unlock_oldstart(jnl);
+
+	unlock_condition(jnl, &jnl->asyncIO);
 }
 
 
@@ -655,51 +724,51 @@ buffer_flushed_callback(struct buf *bp, void *arg)
 static void
 swap_journal_header(journal *jnl)
 {
-    jnl->jhdr->magic      = SWAP32(jnl->jhdr->magic);
-    jnl->jhdr->endian     = SWAP32(jnl->jhdr->endian);
-    jnl->jhdr->start      = SWAP64(jnl->jhdr->start);
-    jnl->jhdr->end        = SWAP64(jnl->jhdr->end);
-    jnl->jhdr->size       = SWAP64(jnl->jhdr->size);
-    jnl->jhdr->blhdr_size = SWAP32(jnl->jhdr->blhdr_size);
-    jnl->jhdr->checksum   = SWAP32(jnl->jhdr->checksum);
-    jnl->jhdr->jhdr_size  = SWAP32(jnl->jhdr->jhdr_size);
-    jnl->jhdr->sequence_num  = SWAP32(jnl->jhdr->sequence_num);
+	jnl->jhdr->magic      = SWAP32(jnl->jhdr->magic);
+	jnl->jhdr->endian     = SWAP32(jnl->jhdr->endian);
+	jnl->jhdr->start      = SWAP64(jnl->jhdr->start);
+	jnl->jhdr->end        = SWAP64(jnl->jhdr->end);
+	jnl->jhdr->size       = SWAP64(jnl->jhdr->size);
+	jnl->jhdr->blhdr_size = SWAP32(jnl->jhdr->blhdr_size);
+	jnl->jhdr->checksum   = SWAP32(jnl->jhdr->checksum);
+	jnl->jhdr->jhdr_size  = SWAP32(jnl->jhdr->jhdr_size);
+	jnl->jhdr->sequence_num  = SWAP32(jnl->jhdr->sequence_num);
 }
 
 static void
 swap_block_list_header(journal *jnl, block_list_header *blhdr)
 {
-    int i;
+	int i;
     
-    blhdr->max_blocks = SWAP16(blhdr->max_blocks);
-    blhdr->num_blocks = SWAP16(blhdr->num_blocks);
-    blhdr->bytes_used = SWAP32(blhdr->bytes_used);
-    blhdr->checksum   = SWAP32(blhdr->checksum);
-    blhdr->flags      = SWAP32(blhdr->flags);
-
-    if (blhdr->num_blocks >= ((jnl->jhdr->blhdr_size / sizeof(block_info)) - 1)) {
-	printf("jnl: %s: blhdr num blocks looks suspicious (%d / blhdr size %d).  not swapping.\n", jnl->jdev_name, blhdr->num_blocks, jnl->jhdr->blhdr_size);
-	return;
-    }
+	blhdr->max_blocks = SWAP16(blhdr->max_blocks);
+	blhdr->num_blocks = SWAP16(blhdr->num_blocks);
+	blhdr->bytes_used = SWAP32(blhdr->bytes_used);
+	blhdr->checksum   = SWAP32(blhdr->checksum);
+	blhdr->flags      = SWAP32(blhdr->flags);
+
+	if (blhdr->num_blocks >= ((jnl->jhdr->blhdr_size / sizeof(block_info)) - 1)) {
+		printf("jnl: %s: blhdr num blocks looks suspicious (%d / blhdr size %d).  not swapping.\n", jnl->jdev_name, blhdr->num_blocks, jnl->jhdr->blhdr_size);
+		return;
+	}
 
-    for(i=0; i < blhdr->num_blocks; i++) {
+	for(i = 0; i < blhdr->num_blocks; i++) {
 		blhdr->binfo[i].bnum    = SWAP64(blhdr->binfo[i].bnum);
 		blhdr->binfo[i].u.bi.bsize   = SWAP32(blhdr->binfo[i].u.bi.bsize);
 		blhdr->binfo[i].u.bi.b.cksum = SWAP32(blhdr->binfo[i].u.bi.b.cksum);
-    }
+	}
 }
 
 
 static int
 update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize)
 {
-    int		ret;
-    struct buf *oblock_bp=NULL;
+	int		ret;
+	struct buf *oblock_bp=NULL;
     
-    // first read the block we want.
-    ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
-    if (ret != 0) {
-	printf("jnl: %s: update_fs_block: error reading fs block # %lld! (ret %d)\n", jnl->jdev_name, fs_block, ret);
+	// first read the block we want.
+	ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
+	if (ret != 0) {
+		printf("jnl: %s: update_fs_block: error reading fs block # %lld! (ret %d)\n", jnl->jdev_name, fs_block, ret);
 
 		if (oblock_bp) {
 			buf_brelse(oblock_bp);
@@ -709,277 +778,277 @@ update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize)
 		// let's try to be aggressive here and just re-write the block
 		oblock_bp = buf_getblk(jnl->fsdev, (daddr64_t)fs_block, bsize, 0, 0, BLK_META);
 		if (oblock_bp == NULL) {
-		    printf("jnl: %s: update_fs_block: buf_getblk() for %lld failed! failing update.\n", jnl->jdev_name, fs_block);
-		    return -1;
+			printf("jnl: %s: update_fs_block: buf_getblk() for %lld failed! failing update.\n", jnl->jdev_name, fs_block);
+			return -1;
 		}
-    }
+	}
 	    
-    // make sure it's the correct size.
-    if (buf_size(oblock_bp) != bsize) {
+	// make sure it's the correct size.
+	if (buf_size(oblock_bp) != bsize) {
 		buf_brelse(oblock_bp);
 		return -1;
-    }
+	}
 
-    // copy the journal data over top of it
-    memcpy((char *)0 + buf_dataptr(oblock_bp), block_ptr, bsize);
+	// copy the journal data over top of it
+	memcpy((char *)buf_dataptr(oblock_bp), block_ptr, bsize);
 
-    if ((ret = VNOP_BWRITE(oblock_bp)) != 0) {
-	printf("jnl: %s: update_fs_block: failed to update block %lld (ret %d)\n", jnl->jdev_name, fs_block,ret);
-	return ret;
-    }
+	if ((ret = VNOP_BWRITE(oblock_bp)) != 0) {
+		printf("jnl: %s: update_fs_block: failed to update block %lld (ret %d)\n", jnl->jdev_name, fs_block,ret);
+		return ret;
+	}
 
-    // and now invalidate it so that if someone else wants to read
-    // it in a different size they'll be able to do it.
-    ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
-    if (oblock_bp) {
+	// and now invalidate it so that if someone else wants to read
+	// it in a different size they'll be able to do it.
+	ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp);
+	if (oblock_bp) {
                 buf_markinvalid(oblock_bp);
 		buf_brelse(oblock_bp);
-    }
+	}
 	    
-    return 0;
+	return 0;
 }
 
 static int
 grow_table(struct bucket **buf_ptr, int num_buckets, int new_size)
 {
-    struct bucket *newBuf;
-    int current_size = num_buckets, i;
+	struct bucket *newBuf;
+	int current_size = num_buckets, i;
     
-    // return if newsize is less than the current size
-    if (new_size < num_buckets) {
-	return current_size;
-    }
+	// return if newsize is less than the current size
+	if (new_size < num_buckets) {
+		return current_size;
+	}
     
-    if ((MALLOC(newBuf, struct bucket *, new_size*sizeof(struct bucket), M_TEMP, M_WAITOK)) == NULL) {
-	printf("jnl: grow_table: no memory to expand coalesce buffer!\n");
-	return -1;
-    }
+	if ((MALLOC(newBuf, struct bucket *, new_size*sizeof(struct bucket), M_TEMP, M_WAITOK)) == NULL) {
+		printf("jnl: grow_table: no memory to expand coalesce buffer!\n");
+		return -1;
+	}
     
-    //  printf("jnl: lookup_bucket: expanded co_buf to %d elems\n", new_size);
+	//  printf("jnl: lookup_bucket: expanded co_buf to %d elems\n", new_size);
     
-    // copy existing elements 
-    bcopy(*buf_ptr, newBuf, num_buckets*sizeof(struct bucket));
+	// copy existing elements 
+	bcopy(*buf_ptr, newBuf, num_buckets*sizeof(struct bucket));
     
-    // initialize the new ones
-    for(i=num_buckets; i < new_size; i++) {
-	newBuf[i].block_num = (off_t)-1;
-    }
+	// initialize the new ones
+	for(i = num_buckets; i < new_size; i++) {
+		newBuf[i].block_num = (off_t)-1;
+	}
     
-    // free the old container
-    FREE(*buf_ptr, M_TEMP);
+	// free the old container
+	FREE(*buf_ptr, M_TEMP);
     
-    // reset the buf_ptr
-    *buf_ptr = newBuf;
+	// reset the buf_ptr
+	*buf_ptr = newBuf;
     
-    return new_size;
+	return new_size;
 }
 
 static int
 lookup_bucket(struct bucket **buf_ptr, off_t block_num, int num_full)
 {
-    int lo, hi, index, matches, i;
+	int lo, hi, index, matches, i;
     
-    if (num_full == 0) {
-	return 0; // table is empty, so insert at index=0
-    }
+	if (num_full == 0) {
+		return 0; // table is empty, so insert at index=0
+	}
     
-    lo = 0;
-    hi = num_full - 1;
-    index = -1;
+	lo = 0;
+	hi = num_full - 1;
+	index = -1;
     
-    // perform binary search for block_num
-    do {
-	int mid = (hi - lo)/2 + lo;
-	off_t this_num = (*buf_ptr)[mid].block_num;
+	// perform binary search for block_num
+	do {
+		int mid = (hi - lo)/2 + lo;
+		off_t this_num = (*buf_ptr)[mid].block_num;
 	
-	if (block_num == this_num) {
-	    index = mid;
-	    break;
-	}
+		if (block_num == this_num) {
+			index = mid;
+			break;
+		}
 	
-	if (block_num < this_num) {
-	    hi = mid;
-	    continue;
-	}
+		if (block_num < this_num) {
+			hi = mid;
+			continue;
+		}
 	
-	if (block_num > this_num) {
-	    lo = mid + 1;
-	    continue;
-	}
-    } while(lo < hi);
+		if (block_num > this_num) {
+			lo = mid + 1;
+			continue;
+		}
+	} while (lo < hi);
     
-    // check if lo and hi converged on the match
-    if (block_num == (*buf_ptr)[hi].block_num) {
-	index = hi;
-    }
+	// check if lo and hi converged on the match
+	if (block_num == (*buf_ptr)[hi].block_num) {
+		index = hi;
+	}
     
-    // if no existing entry found, find index for new one
-    if (index == -1) {
-	index = (block_num < (*buf_ptr)[hi].block_num) ? hi : hi + 1;
-    } else {
-	// make sure that we return the right-most index in the case of multiple matches
-	matches = 0;
-	i = index + 1;
-	while(i < num_full && block_num == (*buf_ptr)[i].block_num) {
-	    matches++;
-	    i++;
-	}
-
-	index += matches;
-    }
+	// if no existing entry found, find index for new one
+	if (index == -1) {
+		index = (block_num < (*buf_ptr)[hi].block_num) ? hi : hi + 1;
+	} else {
+		// make sure that we return the right-most index in the case of multiple matches
+		matches = 0;
+		i = index + 1;
+		while (i < num_full && block_num == (*buf_ptr)[i].block_num) {
+			matches++;
+			i++;
+		}
+
+		index += matches;
+	}
     
-    return index;
+	return index;
 }
 
 static int
 insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t num, size_t size, size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr, int overwriting)
 {
-    if (!overwriting) {
-	// grow the table if we're out of space
-	if (*num_full_ptr >= *num_buckets_ptr) {
-	    int new_size = *num_buckets_ptr * 2;
-	    int grow_size = grow_table(buf_ptr, *num_buckets_ptr, new_size);
+	if (!overwriting) {
+		// grow the table if we're out of space
+		if (*num_full_ptr >= *num_buckets_ptr) {
+			int new_size = *num_buckets_ptr * 2;
+			int grow_size = grow_table(buf_ptr, *num_buckets_ptr, new_size);
 	    
-	    if (grow_size < new_size) {
-		printf("jnl: %s: add_block: grow_table returned an error!\n", jnl->jdev_name);
-		return -1;
-	    }
+			if (grow_size < new_size) {
+				printf("jnl: %s: add_block: grow_table returned an error!\n", jnl->jdev_name);
+				return -1;
+			}
 	    
-	    *num_buckets_ptr = grow_size; //update num_buckets to reflect the new size
-	}
+			*num_buckets_ptr = grow_size; //update num_buckets to reflect the new size
+		}
 	
-	// if we're not inserting at the end, we need to bcopy
-	if (blk_index != *num_full_ptr) {
-	    bcopy( (*buf_ptr)+(blk_index), (*buf_ptr)+(blk_index+1), (*num_full_ptr-blk_index)*sizeof(struct bucket) );
-	}
+		// if we're not inserting at the end, we need to bcopy
+		if (blk_index != *num_full_ptr) {
+			bcopy( (*buf_ptr)+(blk_index), (*buf_ptr)+(blk_index+1), (*num_full_ptr-blk_index)*sizeof(struct bucket) );
+		}
 	
-	(*num_full_ptr)++; // increment only if we're not overwriting
-    }
+		(*num_full_ptr)++; // increment only if we're not overwriting
+	}
 
-    // sanity check the values we're about to add
-    if ((off_t)offset >= jnl->jhdr->size) {
-	offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
-    }
-    if (size <= 0) {
-	panic("jnl: insert_block: bad size in insert_block (%zd)\n", size);
-    }	 
-
-    (*buf_ptr)[blk_index].block_num = num;
-    (*buf_ptr)[blk_index].block_size = size;
-    (*buf_ptr)[blk_index].jnl_offset = offset;
-    (*buf_ptr)[blk_index].cksum = cksum;
+	// sanity check the values we're about to add
+	if ((off_t)offset >= jnl->jhdr->size) {
+		offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
+	}
+	if (size <= 0) {
+		panic("jnl: insert_block: bad size in insert_block (%zd)\n", size);
+	}	 
+
+	(*buf_ptr)[blk_index].block_num = num;
+	(*buf_ptr)[blk_index].block_size = size;
+	(*buf_ptr)[blk_index].jnl_offset = offset;
+	(*buf_ptr)[blk_index].cksum = cksum;
     
-    return blk_index;
+	return blk_index;
 }
 
 static int
 do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num, size_t size, __unused size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr)
 {
-    int num_to_remove, index, i, overwrite, err;
-    size_t jhdr_size = jnl->jhdr->jhdr_size, new_offset;
-    off_t overlap, block_start, block_end;
-
-    block_start = block_num*jhdr_size;
-    block_end = block_start + size;
-    overwrite = (block_num == (*buf_ptr)[blk_index].block_num && size >= (*buf_ptr)[blk_index].block_size);
-
-    // first, eliminate any overlap with the previous entry
-    if (blk_index != 0 && !overwrite) {
-	off_t prev_block_start = (*buf_ptr)[blk_index-1].block_num*jhdr_size;
-	off_t prev_block_end = prev_block_start + (*buf_ptr)[blk_index-1].block_size;
-	overlap = prev_block_end - block_start;
-	if (overlap > 0) {
-	    if (overlap % jhdr_size != 0) {
-		panic("jnl: do_overlap: overlap with previous entry not a multiple of %zd\n", jhdr_size);
-	    }
-
-	    // if the previous entry completely overlaps this one, we need to break it into two pieces.
-	    if (prev_block_end > block_end) {
-		off_t new_num = block_end / jhdr_size;
-		size_t new_size = prev_block_end - block_end;
-
-		new_offset = (*buf_ptr)[blk_index-1].jnl_offset + (block_end - prev_block_start);
+	int	num_to_remove, index, i, overwrite, err;
+	size_t	jhdr_size = jnl->jhdr->jhdr_size, new_offset;
+	off_t	overlap, block_start, block_end;
+
+	block_start = block_num*jhdr_size;
+	block_end = block_start + size;
+	overwrite = (block_num == (*buf_ptr)[blk_index].block_num && size >= (*buf_ptr)[blk_index].block_size);
+
+	// first, eliminate any overlap with the previous entry
+	if (blk_index != 0 && !overwrite) {
+		off_t prev_block_start = (*buf_ptr)[blk_index-1].block_num*jhdr_size;
+		off_t prev_block_end = prev_block_start + (*buf_ptr)[blk_index-1].block_size;
+		overlap = prev_block_end - block_start;
+		if (overlap > 0) {
+			if (overlap % jhdr_size != 0) {
+				panic("jnl: do_overlap: overlap with previous entry not a multiple of %zd\n", jhdr_size);
+			}
+
+			// if the previous entry completely overlaps this one, we need to break it into two pieces.
+			if (prev_block_end > block_end) {
+				off_t new_num = block_end / jhdr_size;
+				size_t new_size = prev_block_end - block_end;
+
+				new_offset = (*buf_ptr)[blk_index-1].jnl_offset + (block_end - prev_block_start);
 		
-		err = insert_block(jnl, buf_ptr, blk_index, new_num, new_size, new_offset, cksum, num_buckets_ptr, num_full_ptr, 0);
-		if (err < 0) {
-		    panic("jnl: do_overlap: error inserting during pre-overlap\n");
-		}
-	    }
+				err = insert_block(jnl, buf_ptr, blk_index, new_num, new_size, new_offset, cksum, num_buckets_ptr, num_full_ptr, 0);
+				if (err < 0) {
+					panic("jnl: do_overlap: error inserting during pre-overlap\n");
+				}
+			}
 	    
-	    // Regardless, we need to truncate the previous entry to the beginning of the overlap
-	    (*buf_ptr)[blk_index-1].block_size = block_start - prev_block_start;
-	    (*buf_ptr)[blk_index-1].cksum = 0;   // have to blow it away because there's no way to check it
+			// Regardless, we need to truncate the previous entry to the beginning of the overlap
+			(*buf_ptr)[blk_index-1].block_size = block_start - prev_block_start;
+			(*buf_ptr)[blk_index-1].cksum = 0;   // have to blow it away because there's no way to check it
+		}
 	}
-    }
 
-    // then, bail out fast if there's no overlap with the entries that follow
-    if (!overwrite && block_end <= (off_t)((*buf_ptr)[blk_index].block_num*jhdr_size)) {
-	return 0; // no overlap, no overwrite
-    } else if (overwrite && (blk_index + 1 >= *num_full_ptr || block_end <= (off_t)((*buf_ptr)[blk_index+1].block_num*jhdr_size))) {
+	// then, bail out fast if there's no overlap with the entries that follow
+	if (!overwrite && block_end <= (off_t)((*buf_ptr)[blk_index].block_num*jhdr_size)) {
+		return 0; // no overlap, no overwrite
+	} else if (overwrite && (blk_index + 1 >= *num_full_ptr || block_end <= (off_t)((*buf_ptr)[blk_index+1].block_num*jhdr_size))) {
 
-	(*buf_ptr)[blk_index].cksum = cksum;   // update this
-	return 1; // simple overwrite
-    }
+		(*buf_ptr)[blk_index].cksum = cksum;   // update this
+		return 1; // simple overwrite
+	}
     
-    // Otherwise, find all cases of total and partial overlap. We use the special
-    // block_num of -2 to designate entries that are completely overlapped and must
-    // be eliminated. The block_num, size, and jnl_offset of partially overlapped
-    // entries must be adjusted to keep the array consistent.
-    index = blk_index;
-    num_to_remove = 0;
-    while(index < *num_full_ptr && block_end > (off_t)((*buf_ptr)[index].block_num*jhdr_size)) {
-	if (block_end >= (off_t)(((*buf_ptr)[index].block_num*jhdr_size + (*buf_ptr)[index].block_size))) {
-	    (*buf_ptr)[index].block_num = -2; // mark this for deletion
-	    num_to_remove++;
-	} else {
-	    overlap = block_end - (*buf_ptr)[index].block_num*jhdr_size;
-	    if (overlap > 0) {
-		if (overlap % jhdr_size != 0) {
-		    panic("jnl: do_overlap: overlap of %lld is not multiple of %zd\n", overlap, jhdr_size);
-		}
-		
-		// if we partially overlap this entry, adjust its block number, jnl offset, and size
-		(*buf_ptr)[index].block_num += (overlap / jhdr_size); // make sure overlap is multiple of jhdr_size, or round up
-		(*buf_ptr)[index].cksum = 0;
+	// Otherwise, find all cases of total and partial overlap. We use the special
+	// block_num of -2 to designate entries that are completely overlapped and must
+	// be eliminated. The block_num, size, and jnl_offset of partially overlapped
+	// entries must be adjusted to keep the array consistent.
+	index = blk_index;
+	num_to_remove = 0;
+	while (index < *num_full_ptr && block_end > (off_t)((*buf_ptr)[index].block_num*jhdr_size)) {
+		if (block_end >= (off_t)(((*buf_ptr)[index].block_num*jhdr_size + (*buf_ptr)[index].block_size))) {
+			(*buf_ptr)[index].block_num = -2; // mark this for deletion
+			num_to_remove++;
+		} else {
+			overlap = block_end - (*buf_ptr)[index].block_num*jhdr_size;
+			if (overlap > 0) {
+				if (overlap % jhdr_size != 0) {
+					panic("jnl: do_overlap: overlap of %lld is not multiple of %zd\n", overlap, jhdr_size);
+				}
+				
+				// if we partially overlap this entry, adjust its block number, jnl offset, and size
+				(*buf_ptr)[index].block_num += (overlap / jhdr_size); // make sure overlap is multiple of jhdr_size, or round up
+				(*buf_ptr)[index].cksum = 0;
 		
-		new_offset = (*buf_ptr)[index].jnl_offset + overlap; // check for wrap-around
-		if ((off_t)new_offset >= jnl->jhdr->size) {
-		    new_offset = jhdr_size + (new_offset - jnl->jhdr->size);
-		}
-		(*buf_ptr)[index].jnl_offset = new_offset;
+				new_offset = (*buf_ptr)[index].jnl_offset + overlap; // check for wrap-around
+				if ((off_t)new_offset >= jnl->jhdr->size) {
+					new_offset = jhdr_size + (new_offset - jnl->jhdr->size);
+				}
+				(*buf_ptr)[index].jnl_offset = new_offset;
 		
-		(*buf_ptr)[index].block_size -= overlap; // sanity check for negative value
-		if ((*buf_ptr)[index].block_size <= 0) {
-		    panic("jnl: do_overlap: after overlap, new block size is invalid (%u)\n", (*buf_ptr)[index].block_size);
-		    // return -1; // if above panic is removed, return -1 for error
+				(*buf_ptr)[index].block_size -= overlap; // sanity check for negative value
+				if ((*buf_ptr)[index].block_size <= 0) {
+					panic("jnl: do_overlap: after overlap, new block size is invalid (%u)\n", (*buf_ptr)[index].block_size);
+					// return -1; // if above panic is removed, return -1 for error
+				}
+			}
+			
 		}
-	    }
-	    
-	}
 
-	index++;
-    }
+		index++;
+	}
 
-    // bcopy over any completely overlapped entries, starting at the right (where the above loop broke out)
-    index--; // start with the last index used within the above loop
-    while(index >= blk_index) {
-	if ((*buf_ptr)[index].block_num == -2) {
-	    if (index == *num_full_ptr-1) {
-		(*buf_ptr)[index].block_num = -1; // it's the last item in the table... just mark as free
-	    } else {
-		bcopy( (*buf_ptr)+(index+1), (*buf_ptr)+(index), (*num_full_ptr - (index + 1)) * sizeof(struct bucket) );
-	    }
-	    (*num_full_ptr)--;
-	}
-	index--;
-    }
+	// bcopy over any completely overlapped entries, starting at the right (where the above loop broke out)
+	index--; // start with the last index used within the above loop
+	while (index >= blk_index) {
+		if ((*buf_ptr)[index].block_num == -2) {
+			if (index == *num_full_ptr-1) {
+				(*buf_ptr)[index].block_num = -1; // it's the last item in the table... just mark as free
+			} else {
+				bcopy( (*buf_ptr)+(index+1), (*buf_ptr)+(index), (*num_full_ptr - (index + 1)) * sizeof(struct bucket) );
+			}
+			(*num_full_ptr)--;
+		}
+		index--;
+	}
 
-    // eliminate any stale entries at the end of the table
-    for(i=*num_full_ptr; i < (*num_full_ptr + num_to_remove); i++) {
-	(*buf_ptr)[i].block_num = -1;
-    }
+	// eliminate any stale entries at the end of the table
+	for(i = *num_full_ptr; i < (*num_full_ptr + num_to_remove); i++) {
+		(*buf_ptr)[i].block_num = -1;
+	}
     
-    return 0; // if we got this far, we need to insert the entry into the table (rather than overwrite) 
+	return 0; // if we got this far, we need to insert the entry into the table (rather than overwrite) 
 }
 
 // PR-3105942: Coalesce writes to the same block in journal replay
@@ -993,90 +1062,90 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num
 static int
 add_block(journal *jnl, struct bucket **buf_ptr, off_t block_num, size_t size, __unused size_t offset, int32_t cksum, int *num_buckets_ptr, int *num_full_ptr)
 {
-    int blk_index, overwriting;
+	int	blk_index, overwriting;
     
-    // on return from lookup_bucket(), blk_index is the index into the table where block_num should be
-    // inserted (or the index of the elem to overwrite). 
-    blk_index = lookup_bucket( buf_ptr, block_num, *num_full_ptr);
+	// on return from lookup_bucket(), blk_index is the index into the table where block_num should be
+	// inserted (or the index of the elem to overwrite). 
+	blk_index = lookup_bucket( buf_ptr, block_num, *num_full_ptr);
     
-    // check if the index is within bounds (if we're adding this block to the end of
-    // the table, blk_index will be equal to num_full)
-    if (blk_index < 0 || blk_index > *num_full_ptr) {
-        //printf("jnl: add_block: trouble adding block to co_buf\n");
-	return -1;
-    } // else printf("jnl: add_block: adding block 0x%llx at i=%d\n", block_num, blk_index);
+	// check if the index is within bounds (if we're adding this block to the end of
+	// the table, blk_index will be equal to num_full)
+	if (blk_index < 0 || blk_index > *num_full_ptr) {
+		//printf("jnl: add_block: trouble adding block to co_buf\n");
+		return -1;
+	} // else printf("jnl: add_block: adding block 0x%llx at i=%d\n", block_num, blk_index);
     
-    // Determine whether we're overwriting an existing entry by checking for overlap
-    overwriting = do_overlap(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr);
-    if (overwriting < 0) {
-	return -1; // if we got an error, pass it along
-    }
+	// Determine whether we're overwriting an existing entry by checking for overlap
+	overwriting = do_overlap(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr);
+	if (overwriting < 0) {
+		return -1; // if we got an error, pass it along
+	}
         
-    // returns the index, or -1 on error
-    blk_index = insert_block(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr, overwriting);
+	// returns the index, or -1 on error
+	blk_index = insert_block(jnl, buf_ptr, blk_index, block_num, size, offset, cksum, num_buckets_ptr, num_full_ptr, overwriting);
     
-    return blk_index;
+	return blk_index;
 }
 
 static int
 replay_journal(journal *jnl)
 {
-    int i, orig_checksum, checksum, check_block_checksums=0, bad_blocks=0;
-    size_t ret;
-    size_t  max_bsize = 0;		/* protected by block_ptr */
-    block_list_header *blhdr;
-    off_t offset, txn_start_offset=0, blhdr_offset, orig_jnl_start;
-    char *buff, *block_ptr=NULL;
-    struct bucket *co_buf;
-    int num_buckets = STARTING_BUCKETS, num_full, check_past_jnl_end = 1, in_uncharted_territory=0;
-    uint32_t last_sequence_num = 0;
+	int		i, orig_checksum, checksum, check_block_checksums=0, bad_blocks=0;
+	size_t		ret;
+	size_t		max_bsize = 0;		/* protected by block_ptr */
+	block_list_header *blhdr;
+	off_t		offset, txn_start_offset=0, blhdr_offset, orig_jnl_start;
+	char		*buff, *block_ptr=NULL;
+	struct bucket	*co_buf;
+	int		num_buckets = STARTING_BUCKETS, num_full, check_past_jnl_end = 1, in_uncharted_territory=0;
+	uint32_t	last_sequence_num = 0;
     
-    // wrap the start ptr if it points to the very end of the journal
-    if (jnl->jhdr->start == jnl->jhdr->size) {
+	// wrap the start ptr if it points to the very end of the journal
+	if (jnl->jhdr->start == jnl->jhdr->size) {
 		jnl->jhdr->start = jnl->jhdr->jhdr_size;
-    }
-    if (jnl->jhdr->end == jnl->jhdr->size) {
+	}
+	if (jnl->jhdr->end == jnl->jhdr->size) {
 		jnl->jhdr->end = jnl->jhdr->jhdr_size;
-    }
+	}
 
-    if (jnl->jhdr->start == jnl->jhdr->end) {
+	if (jnl->jhdr->start == jnl->jhdr->end) {
 		return 0;
-    }
+	}
 
-    orig_jnl_start = jnl->jhdr->start;
+	orig_jnl_start = jnl->jhdr->start;
 
-    // allocate memory for the header_block.  we'll read each blhdr into this
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size)) {
+	// allocate memory for the header_block.  we'll read each blhdr into this
+	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size)) {
 		printf("jnl: %s: replay_journal: no memory for block buffer! (%d bytes)\n",
-		    jnl->jdev_name, jnl->jhdr->blhdr_size);
+		       jnl->jdev_name, jnl->jhdr->blhdr_size);
 		return -1;
-    }
+	}
 
-    // allocate memory for the coalesce buffer
-    if ((MALLOC(co_buf, struct bucket *, num_buckets*sizeof(struct bucket), M_TEMP, M_WAITOK)) == NULL) {
-        printf("jnl: %s: replay_journal: no memory for coalesce buffer!\n", jnl->jdev_name);
-	return -1;
-    }
+	// allocate memory for the coalesce buffer
+	if ((MALLOC(co_buf, struct bucket *, num_buckets*sizeof(struct bucket), M_TEMP, M_WAITOK)) == NULL) {
+		printf("jnl: %s: replay_journal: no memory for coalesce buffer!\n", jnl->jdev_name);
+		return -1;
+	}
 
-  restart_replay:
+restart_replay:
 
-    // initialize entries
-    for(i=0; i < num_buckets; i++) {
-        co_buf[i].block_num = -1;
-    }
-    num_full = 0; // empty at first
+	// initialize entries
+	for(i = 0; i < num_buckets; i++) {
+		co_buf[i].block_num = -1;
+	}
+	num_full = 0; // empty at first
 
 
-    printf("jnl: %s: replay_journal: from: %lld to: %lld (joffset 0x%llx)\n",
-	jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end, jnl->jdev_offset);
+	printf("jnl: %s: replay_journal: from: %lld to: %lld (joffset 0x%llx)\n",
+	       jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end, jnl->jdev_offset);
 
-    while(check_past_jnl_end || jnl->jhdr->start != jnl->jhdr->end) {
+	while (check_past_jnl_end || jnl->jhdr->start != jnl->jhdr->end) {
 		offset = blhdr_offset = jnl->jhdr->start;
 		ret = read_journal_data(jnl, &offset, buff, jnl->jhdr->blhdr_size);
 		if (ret != (size_t)jnl->jhdr->blhdr_size) {
-		    printf("jnl: %s: replay_journal: Could not read block list header block @ 0x%llx!\n", jnl->jdev_name, offset);
-		    bad_blocks = 1;
-		    goto bad_txn_handling;
+			printf("jnl: %s: replay_journal: Could not read block list header block @ 0x%llx!\n", jnl->jdev_name, offset);
+			bad_blocks = 1;
+			goto bad_txn_handling;
 		}
 
 		blhdr = (block_list_header *)buff;
@@ -1101,101 +1170,101 @@ replay_journal(journal *jnl)
 		//          anything
 		//
 		if (checksum != orig_checksum) {
-		    if (check_past_jnl_end && in_uncharted_territory) {
+			if (check_past_jnl_end && in_uncharted_territory) {
 
-			if (blhdr_offset != jnl->jhdr->end) {
-			    printf("jnl: %s: Extra txn replay stopped @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
-			}
+				if (blhdr_offset != jnl->jhdr->end) {
+					printf("jnl: %s: Extra txn replay stopped @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
+				}
 
-			check_past_jnl_end = 0;
-			jnl->jhdr->end = blhdr_offset;
-			continue;
-		    }
+				check_past_jnl_end = 0;
+				jnl->jhdr->end = blhdr_offset;
+				continue;
+			}
 
-		    printf("jnl: %s: replay_journal: bad block list header @ 0x%llx (checksum 0x%x != 0x%x)\n",
+			printf("jnl: %s: replay_journal: bad block list header @ 0x%llx (checksum 0x%x != 0x%x)\n",
 			jnl->jdev_name, blhdr_offset, orig_checksum, checksum);
 
-		    if (blhdr_offset == orig_jnl_start) {
-			// if there's nothing in the journal at all, just bail out altogether.
-			goto bad_replay;
-		    }
+			if (blhdr_offset == orig_jnl_start) {
+				// if there's nothing in the journal at all, just bail out altogether.
+				goto bad_replay;
+			}
 
-		    bad_blocks = 1;
-		    goto bad_txn_handling;
+			bad_blocks = 1;
+			goto bad_txn_handling;
 		}
 
 		if (   (last_sequence_num != 0)
-		    && (blhdr->binfo[0].u.bi.b.sequence_num != 0)
-		    && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num)
-		    && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num+1)) {
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != 0)
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num)
+		       && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num+1)) {
 
-		    txn_start_offset = jnl->jhdr->end = blhdr_offset;
+			txn_start_offset = jnl->jhdr->end = blhdr_offset;
 
-		    if (check_past_jnl_end) {
-			check_past_jnl_end = 0;
-			printf("jnl: %s: 2: extra replay stopped @ %lld / 0x%llx (seq %d < %d)\n",
-			    jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
-			continue;
-		    }
+			if (check_past_jnl_end) {
+				check_past_jnl_end = 0;
+				printf("jnl: %s: 2: extra replay stopped @ %lld / 0x%llx (seq %d < %d)\n",
+				       jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
+				continue;
+			}
 
-		    printf("jnl: %s: txn sequence numbers out of order in txn @ %lld / %llx! (%d < %d)\n",
-			jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
-		    bad_blocks = 1;
-		    goto bad_txn_handling;
+			printf("jnl: %s: txn sequence numbers out of order in txn @ %lld / %llx! (%d < %d)\n",
+			       jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num);
+			bad_blocks = 1;
+			goto bad_txn_handling;
 		}
 		last_sequence_num = blhdr->binfo[0].u.bi.b.sequence_num;
 
 		if (blhdr_offset >= jnl->jhdr->end && jnl->jhdr->start <= jnl->jhdr->end) {
-		    if (last_sequence_num == 0) {
-			check_past_jnl_end = 0;
-			printf("jnl: %s: pre-sequence-num-enabled txn's - can not go further than end (%lld %lld).\n",
-			    jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
-			if (jnl->jhdr->start != jnl->jhdr->end) {
-			    jnl->jhdr->start = jnl->jhdr->end;
+			if (last_sequence_num == 0) {
+				check_past_jnl_end = 0;
+				printf("jnl: %s: pre-sequence-num-enabled txn's - can not go further than end (%lld %lld).\n",
+				       jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+				if (jnl->jhdr->start != jnl->jhdr->end) {
+					jnl->jhdr->start = jnl->jhdr->end;
+				}
+				continue;
 			}
-			continue;
-		    }
-		    printf("jnl: %s: examining extra transactions starting @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
+			printf("jnl: %s: examining extra transactions starting @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset);
 		}
 
 		if (   blhdr->max_blocks <= 0 || blhdr->max_blocks > (jnl->jhdr->size/jnl->jhdr->jhdr_size)
-			   || blhdr->num_blocks <= 0 || blhdr->num_blocks > blhdr->max_blocks) {
-		    printf("jnl: %s: replay_journal: bad looking journal entry: max: %d num: %d\n",
-			jnl->jdev_name, blhdr->max_blocks, blhdr->num_blocks);
-		    bad_blocks = 1;
-		    goto bad_txn_handling;
+		       || blhdr->num_blocks <= 0 || blhdr->num_blocks > blhdr->max_blocks) {
+			printf("jnl: %s: replay_journal: bad looking journal entry: max: %d num: %d\n",
+			       jnl->jdev_name, blhdr->max_blocks, blhdr->num_blocks);
+			bad_blocks = 1;
+			goto bad_txn_handling;
 		}
 	
 		max_bsize = 0;
-		for(i=1; i < blhdr->num_blocks; i++) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
 			if (blhdr->binfo[i].bnum < 0 && blhdr->binfo[i].bnum != (off_t)-1) {
-			    printf("jnl: %s: replay_journal: bogus block number 0x%llx\n", jnl->jdev_name, blhdr->binfo[i].bnum);
-			    bad_blocks = 1;
-			    goto bad_txn_handling;
+				printf("jnl: %s: replay_journal: bogus block number 0x%llx\n", jnl->jdev_name, blhdr->binfo[i].bnum);
+				bad_blocks = 1;
+				goto bad_txn_handling;
 			}
 			
 			if ((size_t)blhdr->binfo[i].u.bi.bsize > max_bsize) {
-			    max_bsize = blhdr->binfo[i].u.bi.bsize;
+				max_bsize = blhdr->binfo[i].u.bi.bsize;
 			}
 		}
 
 		if (blhdr->flags & BLHDR_CHECK_CHECKSUMS) {
-		    check_block_checksums = 1;
-		    if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
-			goto bad_replay;
-		    }
+			check_block_checksums = 1;
+			if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
+				goto bad_replay;
+			}
 		} else {
-		    block_ptr = NULL;
+			block_ptr = NULL;
 		}
 
 		if (blhdr->flags & BLHDR_FIRST_HEADER) {
-		    txn_start_offset = blhdr_offset;
+			txn_start_offset = blhdr_offset;
 		}
 
 		//printf("jnl: replay_journal: adding %d blocks in journal entry @ 0x%llx to co_buf\n", 
 		//       blhdr->num_blocks-1, jnl->jhdr->start);
 		bad_blocks = 0;
-		for(i=1; i < blhdr->num_blocks; i++) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
 			int size, ret_val;
 			off_t number;
 
@@ -1204,48 +1273,48 @@ replay_journal(journal *jnl)
 			
 			// don't add "killed" blocks
 			if (number == (off_t)-1) {
-			    //printf("jnl: replay_journal: skipping killed fs block (index %d)\n", i);
+				//printf("jnl: replay_journal: skipping killed fs block (index %d)\n", i);
 			} else {
 
-			    if (check_block_checksums) {
-				int32_t disk_cksum;
-				off_t block_offset;
+				if (check_block_checksums) {
+					int32_t disk_cksum;
+					off_t block_offset;
 
-				block_offset = offset;
+					block_offset = offset;
 
-				// read the block so we can check the checksum
-				ret = read_journal_data(jnl, &block_offset, block_ptr, size);
-				if (ret != (size_t)size) {
-				    printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, offset);
-				    bad_blocks = 1;
-				    goto bad_txn_handling;
-				}
+					// read the block so we can check the checksum
+					ret = read_journal_data(jnl, &block_offset, block_ptr, size);
+					if (ret != (size_t)size) {
+						printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, offset);
+						bad_blocks = 1;
+						goto bad_txn_handling;
+					}
 				
-				disk_cksum = calc_checksum(block_ptr, size);
-
-				// there is no need to swap the checksum from disk because
-				// it got swapped when the blhdr was read in.
-				if (blhdr->binfo[i].u.bi.b.cksum != 0 && disk_cksum != blhdr->binfo[i].u.bi.b.cksum) {
-				    printf("jnl: %s: txn starting at %lld (%lld) @ index %3d bnum %lld (%d) with disk cksum != blhdr cksum (0x%.8x 0x%.8x)\n",
-					jnl->jdev_name, txn_start_offset, blhdr_offset, i, number, size, disk_cksum, blhdr->binfo[i].u.bi.b.cksum);
-				    printf("jnl: 0x%.8x 0x%.8x 0x%.8x 0x%.8x  0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
-					*(int *)&block_ptr[0*sizeof(int)], *(int *)&block_ptr[1*sizeof(int)], *(int *)&block_ptr[2*sizeof(int)], *(int *)&block_ptr[3*sizeof(int)],
-					*(int *)&block_ptr[4*sizeof(int)], *(int *)&block_ptr[5*sizeof(int)], *(int *)&block_ptr[6*sizeof(int)], *(int *)&block_ptr[7*sizeof(int)]);
-
-				    bad_blocks = 1;
-				    goto bad_txn_handling;
+					disk_cksum = calc_checksum(block_ptr, size);
+
+					// there is no need to swap the checksum from disk because
+					// it got swapped when the blhdr was read in.
+					if (blhdr->binfo[i].u.bi.b.cksum != 0 && disk_cksum != blhdr->binfo[i].u.bi.b.cksum) {
+						printf("jnl: %s: txn starting at %lld (%lld) @ index %3d bnum %lld (%d) with disk cksum != blhdr cksum (0x%.8x 0x%.8x)\n",
+						       jnl->jdev_name, txn_start_offset, blhdr_offset, i, number, size, disk_cksum, blhdr->binfo[i].u.bi.b.cksum);
+						printf("jnl: 0x%.8x 0x%.8x 0x%.8x 0x%.8x  0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+						       *(int *)&block_ptr[0*sizeof(int)], *(int *)&block_ptr[1*sizeof(int)], *(int *)&block_ptr[2*sizeof(int)], *(int *)&block_ptr[3*sizeof(int)],
+						       *(int *)&block_ptr[4*sizeof(int)], *(int *)&block_ptr[5*sizeof(int)], *(int *)&block_ptr[6*sizeof(int)], *(int *)&block_ptr[7*sizeof(int)]);
+
+						bad_blocks = 1;
+						goto bad_txn_handling;
+					}
 				}
-			    }
 
 
-			    // add this bucket to co_buf, coalescing where possible
-			    // printf("jnl: replay_journal: adding block 0x%llx\n", number);
-			    ret_val = add_block(jnl, &co_buf, number, size, (size_t) offset, blhdr->binfo[i].u.bi.b.cksum, &num_buckets, &num_full);
+				// add this bucket to co_buf, coalescing where possible
+				// printf("jnl: replay_journal: adding block 0x%llx\n", number);
+				ret_val = add_block(jnl, &co_buf, number, size, (size_t) offset, blhdr->binfo[i].u.bi.b.cksum, &num_buckets, &num_full);
 			    
-			    if (ret_val == -1) {
-				printf("jnl: %s: replay_journal: trouble adding block to co_buf\n", jnl->jdev_name);
-				goto bad_replay;
-			    } // else printf("jnl: replay_journal: added block 0x%llx at i=%d\n", number);
+				if (ret_val == -1) {
+					printf("jnl: %s: replay_journal: trouble adding block to co_buf\n", jnl->jdev_name);
+					goto bad_replay;
+				} // else printf("jnl: replay_journal: added block 0x%llx at i=%d\n", number);
 			}
 			
 			// increment offset
@@ -1256,28 +1325,28 @@ replay_journal(journal *jnl)
 			// into account
 			//
 			if (offset >= jnl->jhdr->size) {
-			    offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
+				offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size);
 			}
 		}
 
 		if (block_ptr) {
-		    kmem_free(kernel_map, (vm_offset_t)block_ptr, max_bsize);
-		    block_ptr = NULL;
+			kmem_free(kernel_map, (vm_offset_t)block_ptr, max_bsize);
+			block_ptr = NULL;
 		}
 		
-      bad_txn_handling:
+bad_txn_handling:
 		if (bad_blocks) {
-		    if (txn_start_offset == 0) {
-			printf("jnl: %s: no known good txn start offset! aborting journal replay.\n", jnl->jdev_name);
-			goto bad_replay;
-		    }
+			if (txn_start_offset == 0) {
+				printf("jnl: %s: no known good txn start offset! aborting journal replay.\n", jnl->jdev_name);
+				goto bad_replay;
+			}
 
-		    jnl->jhdr->start = orig_jnl_start;
-		    jnl->jhdr->end = txn_start_offset;
-		    check_past_jnl_end = 0;
-		    last_sequence_num = 0;
-		    printf("jnl: %s: restarting journal replay (%lld - %lld)!\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
-		    goto restart_replay;
+			jnl->jhdr->start = orig_jnl_start;
+			jnl->jhdr->end = txn_start_offset;
+			check_past_jnl_end = 0;
+			last_sequence_num = 0;
+			printf("jnl: %s: restarting journal replay (%lld - %lld)!\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+			goto restart_replay;
 		}
 
 		jnl->jhdr->start += blhdr->bytes_used;
@@ -1287,98 +1356,98 @@ replay_journal(journal *jnl)
 		}
 
 		if (jnl->jhdr->start == jnl->jhdr->end) {
-		    in_uncharted_territory = 1;
+			in_uncharted_territory = 1;
 		}
-    }
+	}
 
-    if (jnl->jhdr->start != jnl->jhdr->end) {
-	printf("jnl: %s: start %lld != end %lld.  resetting end.\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
-	jnl->jhdr->end = jnl->jhdr->start;
-    }
+	if (jnl->jhdr->start != jnl->jhdr->end) {
+		printf("jnl: %s: start %lld != end %lld.  resetting end.\n", jnl->jdev_name, jnl->jhdr->start, jnl->jhdr->end);
+		jnl->jhdr->end = jnl->jhdr->start;
+	}
 
-    //printf("jnl: replay_journal: replaying %d blocks\n", num_full);
+	//printf("jnl: replay_journal: replaying %d blocks\n", num_full);
     
-    /*
-     * make sure it's at least one page in size, so
-     * start max_bsize at PAGE_SIZE
-     */
-    for (i = 0, max_bsize = PAGE_SIZE; i < num_full; i++) {
-
-            if (co_buf[i].block_num == (off_t)-1)
-	            continue;
+	/*
+	 * make sure it's at least one page in size, so
+	 * start max_bsize at PAGE_SIZE
+	 */
+	for (i = 0, max_bsize = PAGE_SIZE; i < num_full; i++) {
 
-	    if (co_buf[i].block_size > max_bsize)
-	            max_bsize = co_buf[i].block_size;
-    }
-    /*
-     * round max_bsize up to the nearest PAGE_SIZE multiple
-     */
-    if (max_bsize & (PAGE_SIZE - 1)) {
-            max_bsize = (max_bsize + PAGE_SIZE) & ~(PAGE_SIZE - 1);
-    }
+		if (co_buf[i].block_num == (off_t)-1)
+			continue;
 
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
-	goto bad_replay;
-    }
+		if (co_buf[i].block_size > max_bsize)
+			max_bsize = co_buf[i].block_size;
+	}
+	/*
+	 * round max_bsize up to the nearest PAGE_SIZE multiple
+	 */
+	if (max_bsize & (PAGE_SIZE - 1)) {
+		max_bsize = (max_bsize + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+	}
+
+	if (kmem_alloc(kernel_map, (vm_offset_t *)&block_ptr, max_bsize)) {
+		goto bad_replay;
+	}
     
-    // Replay the coalesced entries in the co-buf
-    for(i=0; i < num_full; i++) {
-	size_t size = co_buf[i].block_size;
-	off_t jnl_offset = (off_t) co_buf[i].jnl_offset;
-	off_t number = co_buf[i].block_num;
+	// Replay the coalesced entries in the co-buf
+	for(i = 0; i < num_full; i++) {
+		size_t size = co_buf[i].block_size;
+		off_t jnl_offset = (off_t) co_buf[i].jnl_offset;
+		off_t number = co_buf[i].block_num;
 	
 	
-	// printf("replaying co_buf[%d]: block 0x%llx, size 0x%x, jnl_offset 0x%llx\n", i, co_buf[i].block_num,
-	//      co_buf[i].block_size, co_buf[i].jnl_offset);
+		// printf("replaying co_buf[%d]: block 0x%llx, size 0x%x, jnl_offset 0x%llx\n", i, co_buf[i].block_num,
+		//      co_buf[i].block_size, co_buf[i].jnl_offset);
 	
-	if (number == (off_t)-1) {
-	    // printf("jnl: replay_journal: skipping killed fs block\n");
-	} else {
+		if (number == (off_t)-1) {
+			// printf("jnl: replay_journal: skipping killed fs block\n");
+		} else {
 	    
-	    // do journal read, and set the phys. block 
-	    ret = read_journal_data(jnl, &jnl_offset, block_ptr, size);
-	    if (ret != size) {
-		printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, offset);
-		goto bad_replay;
-	    }
+			// do journal read, and set the phys. block 
+			ret = read_journal_data(jnl, &jnl_offset, block_ptr, size);
+			if (ret != size) {
+				printf("jnl: %s: replay_journal: Could not read journal entry data @ offset 0x%llx!\n", jnl->jdev_name, offset);
+				goto bad_replay;
+			}
 	    	    
-	    if (update_fs_block(jnl, block_ptr, number, size) != 0) {
-		goto bad_replay;
-	    }
+			if (update_fs_block(jnl, block_ptr, number, size) != 0) {
+				goto bad_replay;
+			}
+		}
 	}
-    }
     
+	
+	// done replaying; update jnl header
+	if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num) != 0) {
+		goto bad_replay;
+	}
 
-    // done replaying; update jnl header
-    if (write_journal_header(jnl, 1) != 0) {
-	goto bad_replay;
-    }
-
-    printf("jnl: %s: journal replay done.\n", jnl->jdev_name);
+	printf("jnl: %s: journal replay done.\n", jnl->jdev_name);
     
-    // free block_ptr
-    if (block_ptr) {
-	kmem_free(kernel_map, (vm_offset_t)block_ptr, max_bsize);
-	block_ptr = NULL;
-    }
+	// free block_ptr
+	if (block_ptr) {
+		kmem_free(kernel_map, (vm_offset_t)block_ptr, max_bsize);
+		block_ptr = NULL;
+	}
     
-    // free the coalesce buffer
-    FREE(co_buf, M_TEMP);
-    co_buf = NULL;
+	// free the coalesce buffer
+	FREE(co_buf, M_TEMP);
+	co_buf = NULL;
   
-    kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size);
-    return 0;
+	kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size);
+	return 0;
 
-  bad_replay:
-    if (block_ptr) {
+bad_replay:
+	if (block_ptr) {
 		kmem_free(kernel_map, (vm_offset_t)block_ptr, max_bsize);
-    }
-    if (co_buf) {
-      FREE(co_buf, M_TEMP);
-    }
-    kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size);
+	}
+	if (co_buf) {
+		FREE(co_buf, M_TEMP);
+	}
+	kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size);
 
-    return -1;
+	return -1;
 }
 
 
@@ -1413,11 +1482,11 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz)
 		}
 	}
 
-    // size up the transaction buffer... can't be larger than the number
-    // of blocks that can fit in a block_list_header block.
-    if (tbuffer_size == 0) {
+	// size up the transaction buffer... can't be larger than the number
+	// of blocks that can fit in a block_list_header block.
+	if (tbuffer_size == 0) {
 		jnl->tbuffer_size = def_tbuffer_size;
-    } else {
+	} else {
 		// make sure that the specified tbuffer_size isn't too small
 		if (tbuffer_size < jnl->jhdr->blhdr_size * 2) {
 			tbuffer_size = jnl->jhdr->blhdr_size * 2;
@@ -1428,23 +1497,23 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz)
 		}
 
 		jnl->tbuffer_size = tbuffer_size;
-    }
+	}
 
-    if (jnl->tbuffer_size > (jnl->jhdr->size / 2)) {
+	if (jnl->tbuffer_size > (jnl->jhdr->size / 2)) {
 		jnl->tbuffer_size = (jnl->jhdr->size / 2);
-    }
+	}
     
-    if (jnl->tbuffer_size > MAX_TRANSACTION_BUFFER_SIZE) {
+	if (jnl->tbuffer_size > MAX_TRANSACTION_BUFFER_SIZE) {
 		jnl->tbuffer_size = MAX_TRANSACTION_BUFFER_SIZE;
-    }
+	}
 
-    jnl->jhdr->blhdr_size = (jnl->tbuffer_size / jnl->jhdr->jhdr_size) * sizeof(block_info);
-    if (jnl->jhdr->blhdr_size < phys_blksz) {
-	jnl->jhdr->blhdr_size = phys_blksz;
-    } else if ((jnl->jhdr->blhdr_size % phys_blksz) != 0) {
+	jnl->jhdr->blhdr_size = (jnl->tbuffer_size / jnl->jhdr->jhdr_size) * sizeof(block_info);
+	if (jnl->jhdr->blhdr_size < phys_blksz) {
+		jnl->jhdr->blhdr_size = phys_blksz;
+	} else if ((jnl->jhdr->blhdr_size % phys_blksz) != 0) {
 		// have to round up so we're an even multiple of the physical block size
 		jnl->jhdr->blhdr_size = (jnl->jhdr->blhdr_size + (phys_blksz - 1)) & ~(phys_blksz - 1);
-    }
+	}
 }
 
 
@@ -1452,96 +1521,99 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz)
 static void
 get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_context *context)
 {
-    off_t	readblockcnt;
-    off_t	writeblockcnt;
-    off_t	readmaxcnt=0, tmp_readmaxcnt;
-    off_t	writemaxcnt=0, tmp_writemaxcnt;
-    off_t       readsegcnt, writesegcnt;
-    int32_t     features;
-
-    if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, context) == 0) {
-	if (features & DK_FEATURE_FORCE_UNIT_ACCESS) {
-	    const char *name = vnode_name(devvp);
-	    jnl->flags |= JOURNAL_DO_FUA_WRITES;
-	    printf("jnl: %s: enabling FUA writes (features 0x%x)\n", name ? name : "no-name-dev", features);
+	off_t	readblockcnt;
+	off_t	writeblockcnt;
+	off_t	readmaxcnt=0, tmp_readmaxcnt;
+	off_t	writemaxcnt=0, tmp_writemaxcnt;
+	off_t	readsegcnt, writesegcnt;
+	int32_t	features;
+
+	if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, context) == 0) {
+		if (features & DK_FEATURE_FORCE_UNIT_ACCESS) {
+			const char *name = vnode_name(devvp);
+			jnl->flags |= JOURNAL_DO_FUA_WRITES;
+			printf("jnl: %s: enabling FUA writes (features 0x%x)\n", name ? name : "no-name-dev", features);
+		}
+		if (features & DK_FEATURE_UNMAP) {
+			jnl->flags |= JOURNAL_USE_UNMAP;
+		}
 	}
-    }
 
-    //
-    // First check the max read size via several different mechanisms...
-    //
-    VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, (caddr_t)&readmaxcnt, 0, context);
+	//
+	// First check the max read size via several different mechanisms...
+	//
+	VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, (caddr_t)&readmaxcnt, 0, context);
 
-    if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t)&readblockcnt, 0, context) == 0) {
-	    tmp_readmaxcnt = readblockcnt * phys_blksz;
-	    if (readmaxcnt == 0 || (readblockcnt > 0 && tmp_readmaxcnt < readmaxcnt)) {
-		    readmaxcnt = tmp_readmaxcnt;
-	    }
-    }
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t)&readblockcnt, 0, context) == 0) {
+		tmp_readmaxcnt = readblockcnt * phys_blksz;
+		if (readmaxcnt == 0 || (readblockcnt > 0 && tmp_readmaxcnt < readmaxcnt)) {
+			readmaxcnt = tmp_readmaxcnt;
+		}
+	}
 
-    if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t)&readsegcnt, 0, context)) {
-	    readsegcnt = 0;
-    }
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t)&readsegcnt, 0, context)) {
+		readsegcnt = 0;
+	}
 
-    if (readsegcnt > 0 && (readsegcnt * PAGE_SIZE) < readmaxcnt) {
-	    readmaxcnt = readsegcnt * PAGE_SIZE;
-    }
+	if (readsegcnt > 0 && (readsegcnt * PAGE_SIZE) < readmaxcnt) {
+		readmaxcnt = readsegcnt * PAGE_SIZE;
+	}
 	    
-    if (readmaxcnt == 0) {
-	    readmaxcnt = 128 * 1024;
-    } else if (readmaxcnt > UINT32_MAX) {
-	    readmaxcnt = UINT32_MAX;
-    }
+	if (readmaxcnt == 0) {
+		readmaxcnt = 128 * 1024;
+	} else if (readmaxcnt > UINT32_MAX) {
+		readmaxcnt = UINT32_MAX;
+	}
 
 
-    //
-    // Now check the max writes size via several different mechanisms...
-    //
-    VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t)&writemaxcnt, 0, context);
+	//
+	// Now check the max writes size via several different mechanisms...
+	//
+	VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t)&writemaxcnt, 0, context);
 
-    if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t)&writeblockcnt, 0, context) == 0) {
-	    tmp_writemaxcnt = writeblockcnt * phys_blksz;
-	    if (writemaxcnt == 0 || (writeblockcnt > 0 && tmp_writemaxcnt < writemaxcnt)) {
-		    writemaxcnt = tmp_writemaxcnt;
-	    }
-    }
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t)&writeblockcnt, 0, context) == 0) {
+		tmp_writemaxcnt = writeblockcnt * phys_blksz;
+		if (writemaxcnt == 0 || (writeblockcnt > 0 && tmp_writemaxcnt < writemaxcnt)) {
+			writemaxcnt = tmp_writemaxcnt;
+		}
+	}
 
-    if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,	(caddr_t)&writesegcnt, 0, context)) {
-	    writesegcnt = 0;
-    }
+	if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,	(caddr_t)&writesegcnt, 0, context)) {
+		writesegcnt = 0;
+	}
 
-    if (writesegcnt > 0 && (writesegcnt * PAGE_SIZE) < writemaxcnt) {
-	    writemaxcnt = writesegcnt * PAGE_SIZE;
-    }
+	if (writesegcnt > 0 && (writesegcnt * PAGE_SIZE) < writemaxcnt) {
+		writemaxcnt = writesegcnt * PAGE_SIZE;
+	}
 
-    if (writemaxcnt == 0) {
-	    writemaxcnt = 128 * 1024;
-    } else if (writemaxcnt > UINT32_MAX) {
-	    writemaxcnt = UINT32_MAX;
-    }
+	if (writemaxcnt == 0) {
+		writemaxcnt = 128 * 1024;
+	} else if (writemaxcnt > UINT32_MAX) {
+		writemaxcnt = UINT32_MAX;
+	}
 
-    jnl->max_read_size  = readmaxcnt;
-    jnl->max_write_size = writemaxcnt;
-    // printf("jnl: %s: max read/write: %lld k / %lld k\n",
-    //     jnl->jdev_name ? jnl->jdev_name : "unknown",
-    //     jnl->max_read_size/1024, jnl->max_write_size/1024);
+	jnl->max_read_size  = readmaxcnt;
+	jnl->max_write_size = writemaxcnt;
+	// printf("jnl: %s: max read/write: %lld k / %lld k\n",
+	//     jnl->jdev_name ? jnl->jdev_name : "unknown",
+	//     jnl->max_read_size/1024, jnl->max_write_size/1024);
 }
 
 
 static const char *
 get_jdev_name(struct vnode *jvp)
 {
-    const char *jdev_name;
+	const char *jdev_name;
     
-    jdev_name = vnode_name(jvp);
-    if (jdev_name == NULL) {
-	jdev_name = vfs_addname("unknown-dev", strlen("unknown-dev"), 0, 0);
-    } else {
-	// this just bumps the refcount on the name so we have our own copy
-	jdev_name = vfs_addname(jdev_name, strlen(jdev_name), 0, 0);
-    }
+	jdev_name = vnode_name(jvp);
+	if (jdev_name == NULL) {
+		jdev_name = vfs_addname("unknown-dev", strlen("unknown-dev"), 0, 0);
+	} else {
+		// this just bumps the refcount on the name so we have our own copy
+		jdev_name = vfs_addname(jdev_name, strlen(jdev_name), 0, 0);
+	}
 
-    return jdev_name;
+	return jdev_name;
 }
 
 
@@ -1556,143 +1628,167 @@ journal_create(struct vnode *jvp,
 			   void        (*flush)(void *arg),
 			   void         *arg)
 {
-    journal *jnl;
-    uint32_t      phys_blksz, new_txn_base;
-    struct vfs_context context;
-    const char *jdev_name;
+	journal		*jnl;
+	uint32_t	phys_blksz, new_txn_base;
+	u_int32_t	min_size;
+	struct vfs_context context;
+	const char	*jdev_name;
+	/* 
+	 * Cap the journal max size to 2GB.  On HFS, it will attempt to occupy
+	 * a full allocation block if the current size is smaller than the allocation
+	 * block on which it resides.  Once we hit the exabyte filesystem range, then
+	 * it will use 2GB allocation blocks.  As a result, make the cap 2GB.
+	 */
+	context.vc_thread = current_thread();
+	context.vc_ucred = FSCRED;
 
-    context.vc_thread = current_thread();
-    context.vc_ucred = FSCRED;
+	jdev_name = get_jdev_name(jvp);
 
-    jdev_name = get_jdev_name(jvp);
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
+		return NULL;
+	}
 
-    /* Get the real physical block size. */
-    if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
-	return NULL;
-    }
+	if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) {
+		printf("jnl: create: journal size %lld looks bogus.\n", journal_size);
+		return NULL;
+	}
 
-    if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) {
-	    printf("jnl: create: journal size %lld looks bogus.\n", journal_size);
-	    return NULL;
-    }
+	min_size = phys_blksz * (phys_blksz / sizeof(block_info));
+	/* Reject journals that are too small given the sector size of the device */
+	if (journal_size < min_size) {
+		printf("jnl: create: journal size (%lld) too small given sector size of (%u)\n", 
+				journal_size, phys_blksz);
+		return NULL;
+	}
 
-    if (phys_blksz > min_fs_blksz) {
+	if (phys_blksz > min_fs_blksz) {
 		printf("jnl: %s: create: error: phys blksize %u bigger than min fs blksize %zd\n",
-		    jdev_name, phys_blksz, min_fs_blksz);
+		       jdev_name, phys_blksz, min_fs_blksz);
 		return NULL;
-    }
+	}
 
-    if ((journal_size % phys_blksz) != 0) {
+	if ((journal_size % phys_blksz) != 0) {
 		printf("jnl: %s: create: journal size 0x%llx is not an even multiple of block size 0x%ux\n",
-		    jdev_name, journal_size, phys_blksz);
+		       jdev_name, journal_size, phys_blksz);
 		return NULL;
-    }
+	}
 
 
-    MALLOC_ZONE(jnl, struct journal *, sizeof(struct journal), M_JNL_JNL, M_WAITOK);
-    memset(jnl, 0, sizeof(*jnl));
+	MALLOC_ZONE(jnl, struct journal *, sizeof(struct journal), M_JNL_JNL, M_WAITOK);
+	memset(jnl, 0, sizeof(*jnl));
 
-    jnl->jdev         = jvp;
-    jnl->jdev_offset  = offset;
-    jnl->fsdev        = fsvp;
-    jnl->flush        = flush;
-    jnl->flush_arg    = arg;
-    jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
-    jnl->jdev_name    = jdev_name;
-    lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
+	jnl->jdev         = jvp;
+	jnl->jdev_offset  = offset;
+	jnl->fsdev        = fsvp;
+	jnl->flush        = flush;
+	jnl->flush_arg    = arg;
+	jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
+	jnl->jdev_name    = jdev_name;
+	lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
 
-    get_io_info(jvp, phys_blksz, jnl, &context);
+	get_io_info(jvp, phys_blksz, jnl, &context);
 	
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
-	printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
-	goto bad_kmem_alloc;
-    }
-    jnl->header_buf_size = phys_blksz;
+	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
+		printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
+		goto bad_kmem_alloc;
+	}
+	jnl->header_buf_size = phys_blksz;
 
-    jnl->jhdr = (journal_header *)jnl->header_buf;
-    memset(jnl->jhdr, 0, sizeof(journal_header));
+	jnl->jhdr = (journal_header *)jnl->header_buf;
+	memset(jnl->jhdr, 0, sizeof(journal_header));
 
-    // we have to set this up here so that do_journal_io() will work
-    jnl->jhdr->jhdr_size = phys_blksz;
+	// we have to set this up here so that do_journal_io() will work
+	jnl->jhdr->jhdr_size = phys_blksz;
 
-    //
-    // We try and read the journal header to see if there is already one
-    // out there.  If there is, it's possible that it has transactions
-    // in it that we might replay if we happen to pick a sequence number
-    // that is a little less than the old one, there is a crash and the 
-    // last txn written ends right at the start of a txn from the previous
-    // incarnation of this file system.  If all that happens we would
-    // replay the transactions from the old file system and that would
-    // destroy your disk.  Although it is extremely unlikely for all those
-    // conditions to happen, the probability is non-zero and the result is
-    // severe - you lose your file system.  Therefore if we find a valid
-    // journal header and the sequence number is non-zero we write junk
-    // over the entire journal so that there is no way we will encounter
-    // any old transactions.  This is slow but should be a rare event
-    // since most tools erase the journal.
-    //
-    if (   read_journal_header(jnl, jnl->jhdr, phys_blksz) == phys_blksz
-	&& jnl->jhdr->magic == JOURNAL_HEADER_MAGIC
-	&& jnl->jhdr->sequence_num != 0) {
+	//
+	// We try and read the journal header to see if there is already one
+	// out there.  If there is, it's possible that it has transactions
+	// in it that we might replay if we happen to pick a sequence number
+	// that is a little less than the old one, there is a crash and the 
+	// last txn written ends right at the start of a txn from the previous
+	// incarnation of this file system.  If all that happens we would
+	// replay the transactions from the old file system and that would
+	// destroy your disk.  Although it is extremely unlikely for all those
+	// conditions to happen, the probability is non-zero and the result is
+	// severe - you lose your file system.  Therefore if we find a valid
+	// journal header and the sequence number is non-zero we write junk
+	// over the entire journal so that there is no way we will encounter
+	// any old transactions.  This is slow but should be a rare event
+	// since most tools erase the journal.
+	//
+	if (   read_journal_header(jnl, jnl->jhdr, phys_blksz) == phys_blksz
+	       && jnl->jhdr->magic == JOURNAL_HEADER_MAGIC
+	       && jnl->jhdr->sequence_num != 0) {
 
-	new_txn_base = (jnl->jhdr->sequence_num + (journal_size / phys_blksz) + (random() % 16384)) & 0x00ffffff;
-	printf("jnl: create: avoiding old sequence number 0x%x (0x%x)\n", jnl->jhdr->sequence_num, new_txn_base);
+		new_txn_base = (jnl->jhdr->sequence_num + (journal_size / phys_blksz) + (random() % 16384)) & 0x00ffffff;
+		printf("jnl: create: avoiding old sequence number 0x%x (0x%x)\n", jnl->jhdr->sequence_num, new_txn_base);
 
 #if 0
-	int i;
-	off_t pos=0;
+		int i;
+		off_t pos=0;
 
-	for(i=1; i < journal_size / phys_blksz; i++) {
-	    pos = i*phys_blksz;
+		for(i = 1; i < journal_size / phys_blksz; i++) {
+			pos = i*phys_blksz;
 
-	    // we don't really care what data we write just so long
-	    // as it's not a valid transaction header.  since we have
-	    // the header_buf sitting around we'll use that.
-	    write_journal_data(jnl, &pos, jnl->header_buf, phys_blksz);
-	}
-	printf("jnl: create: done clearing journal (i=%d)\n", i);
+			// we don't really care what data we write just so long
+			// as it's not a valid transaction header.  since we have
+			// the header_buf sitting around we'll use that.
+			write_journal_data(jnl, &pos, jnl->header_buf, phys_blksz);
+		}
+		printf("jnl: create: done clearing journal (i=%d)\n", i);
 #endif
-    } else {
-	new_txn_base = random() & 0x00ffffff;
-    }
+	} else {
+		new_txn_base = random() & 0x00ffffff;
+	}
 
-    memset(jnl->header_buf, 0, phys_blksz);
+	memset(jnl->header_buf, 0, phys_blksz);
     
-    jnl->jhdr->magic      = JOURNAL_HEADER_MAGIC;
-    jnl->jhdr->endian     = ENDIAN_MAGIC;
-    jnl->jhdr->start      = phys_blksz;    // start at block #1, block #0 is for the jhdr itself
-    jnl->jhdr->end        = phys_blksz;
-    jnl->jhdr->size       = journal_size;
-    jnl->jhdr->jhdr_size  = phys_blksz;
-    size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
-
-    jnl->active_start     = jnl->jhdr->start;
-
-    // XXXdbg  - for testing you can force the journal to wrap around
-    // jnl->jhdr->start = jnl->jhdr->size - (phys_blksz*3);
-    // jnl->jhdr->end   = jnl->jhdr->size - (phys_blksz*3);
+	jnl->jhdr->magic      = JOURNAL_HEADER_MAGIC;
+	jnl->jhdr->endian     = ENDIAN_MAGIC;
+	jnl->jhdr->start      = phys_blksz;    // start at block #1, block #0 is for the jhdr itself
+	jnl->jhdr->end        = phys_blksz;
+	jnl->jhdr->size       = journal_size;
+	jnl->jhdr->jhdr_size  = phys_blksz;
+	size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
+
+	jnl->active_start     = jnl->jhdr->start;
+
+	// XXXdbg  - for testing you can force the journal to wrap around
+	// jnl->jhdr->start = jnl->jhdr->size - (phys_blksz*3);
+	// jnl->jhdr->end   = jnl->jhdr->size - (phys_blksz*3);
     
-    jnl->jhdr->sequence_num = new_txn_base;
+	jnl->jhdr->sequence_num = new_txn_base;
 
-    lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
+	lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
+	lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr);
+	lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr);
+	
+	jnl->flushing = FALSE;
+	jnl->asyncIO = FALSE;
+	jnl->flush_aborted = FALSE;
+	jnl->writing_header = FALSE;
+	jnl->async_trim = NULL;
+	jnl->sequence_num = jnl->jhdr->sequence_num;
+	
+	if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num) != 0) {
+		printf("jnl: %s: journal_create: failed to write journal header.\n", jdev_name);
+		goto bad_write;
+	}
 
-    if (write_journal_header(jnl, 1) != 0) {
-	printf("jnl: %s: journal_create: failed to write journal header.\n", jdev_name);
-	goto bad_write;
-    }
+	return jnl;
 
-    return jnl;
 
+bad_write:
+	kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
+bad_kmem_alloc:
+	if (jdev_name) {
+		vfs_removename(jdev_name);
+	}
+	jnl->jhdr = NULL;
+	FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
 
-  bad_write:
-    kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
-  bad_kmem_alloc:
-    if (jdev_name) {
-	vfs_removename(jdev_name);
-    }
-    jnl->jhdr = NULL;
-    FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
-    return NULL;
+	return NULL;
 }
 
 
@@ -1707,69 +1803,78 @@ journal_open(struct vnode *jvp,
 			 void        (*flush)(void *arg),
 			 void         *arg)
 {
-    journal *jnl;
-    uint32_t   orig_blksz=0;
-    uint32_t   phys_blksz;
-    int      orig_checksum, checksum;
-    struct vfs_context context;
-    const char *jdev_name = get_jdev_name(jvp);
-
-    context.vc_thread = current_thread();
-    context.vc_ucred = FSCRED;
-
-    /* Get the real physical block size. */
-    if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
+	journal		*jnl;
+	uint32_t	orig_blksz=0;
+	uint32_t	phys_blksz;
+	u_int32_t	min_size = 0;
+	int		orig_checksum, checksum;
+	struct vfs_context context;
+	const char	*jdev_name = get_jdev_name(jvp);
+
+	context.vc_thread = current_thread();
+	context.vc_ucred = FSCRED;
+
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
 		return NULL;
-    }
+	}
 
-    if (phys_blksz > min_fs_blksz) {
+	if (phys_blksz > min_fs_blksz) {
 		printf("jnl: %s: open: error: phys blksize %u bigger than min fs blksize %zd\n",
-		    jdev_name, phys_blksz, min_fs_blksz);
+		       jdev_name, phys_blksz, min_fs_blksz);
 		return NULL;
-    }
+	}
 
-    if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) {
-	    printf("jnl: open: journal size %lld looks bogus.\n", journal_size);
-	    return NULL;
-    }
+	if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) {
+		printf("jnl: open: journal size %lld looks bogus.\n", journal_size);
+		return NULL;
+	}
+
+	min_size = phys_blksz * (phys_blksz / sizeof(block_info));
+	/* Reject journals that are too small given the sector size of the device */
+	if (journal_size < min_size) {
+		printf("jnl: open: journal size (%lld) too small given sector size of (%u)\n", 
+				journal_size, phys_blksz);
+		return NULL;
+	}
     
-    if ((journal_size % phys_blksz) != 0) {
+	if ((journal_size % phys_blksz) != 0) {
 		printf("jnl: %s: open: journal size 0x%llx is not an even multiple of block size 0x%x\n",
-		    jdev_name, journal_size, phys_blksz);
+		       jdev_name, journal_size, phys_blksz);
 		return NULL;
-    }
+	}
 
-    MALLOC_ZONE(jnl, struct journal *, sizeof(struct journal), M_JNL_JNL, M_WAITOK);
-    memset(jnl, 0, sizeof(*jnl));
+	MALLOC_ZONE(jnl, struct journal *, sizeof(struct journal), M_JNL_JNL, M_WAITOK);
+	memset(jnl, 0, sizeof(*jnl));
 
-    jnl->jdev         = jvp;
-    jnl->jdev_offset  = offset;
-    jnl->fsdev        = fsvp;
-    jnl->flush        = flush;
-    jnl->flush_arg    = arg;
-    jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
-    jnl->jdev_name    = jdev_name;
-    lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
+	jnl->jdev         = jvp;
+	jnl->jdev_offset  = offset;
+	jnl->fsdev        = fsvp;
+	jnl->flush        = flush;
+	jnl->flush_arg    = arg;
+	jnl->flags        = (flags & JOURNAL_OPTION_FLAGS_MASK);
+	jnl->jdev_name    = jdev_name;
+	lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr);
 
-    get_io_info(jvp, phys_blksz, jnl, &context);
+	get_io_info(jvp, phys_blksz, jnl, &context);
 
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
-	printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
-	goto bad_kmem_alloc;
-    }
-    jnl->header_buf_size = phys_blksz;
+	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) {
+		printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz);
+		goto bad_kmem_alloc;
+	}
+	jnl->header_buf_size = phys_blksz;
 
-    jnl->jhdr = (journal_header *)jnl->header_buf;
-    memset(jnl->jhdr, 0, sizeof(journal_header));
+	jnl->jhdr = (journal_header *)jnl->header_buf;
+	memset(jnl->jhdr, 0, sizeof(journal_header));
 
-    // we have to set this up here so that do_journal_io() will work
-    jnl->jhdr->jhdr_size = phys_blksz;
+	// we have to set this up here so that do_journal_io() will work
+	jnl->jhdr->jhdr_size = phys_blksz;
 
-    if (read_journal_header(jnl, jnl->jhdr, phys_blksz) != phys_blksz) {
+	if (read_journal_header(jnl, jnl->jhdr, phys_blksz) != phys_blksz) {
 		printf("jnl: %s: open: could not read %u bytes for the journal header.\n",
-		    jdev_name, phys_blksz);
+		       jdev_name, phys_blksz);
 		goto bad_journal;
-    }
+	}
 
 	orig_checksum = jnl->jhdr->checksum;
 	jnl->jhdr->checksum = 0;
@@ -1784,18 +1889,18 @@ journal_open(struct vnode *jvp,
 		checksum = calc_checksum((char *)jnl->jhdr, JOURNAL_HEADER_CKSUM_SIZE);
 	}
 
-    if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC && jnl->jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
+	if (jnl->jhdr->magic != JOURNAL_HEADER_MAGIC && jnl->jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
 		printf("jnl: %s: open: journal magic is bad (0x%x != 0x%x)\n",
-		    jnl->jdev_name, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC);
+		       jnl->jdev_name, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC);
 		goto bad_journal;
-    }
+	}
 
 	// only check if we're the current journal header magic value
 	if (jnl->jhdr->magic == JOURNAL_HEADER_MAGIC) {
 
 		if (orig_checksum != checksum) {
 			printf("jnl: %s: open: journal checksum is bad (0x%x != 0x%x)\n",
-			    jdev_name, orig_checksum, checksum);
+			       jdev_name, orig_checksum, checksum);
 				   
 			//goto bad_journal;
 		}
@@ -1807,16 +1912,16 @@ journal_open(struct vnode *jvp,
 	}
 
     if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
-    	/*
-    	 * The volume has probably been resized (such that we had to adjust the
-    	 * logical sector size), or copied to media with a different logical
-    	 * sector size.
+	/*
+	 * The volume has probably been resized (such that we had to adjust the
+	 * logical sector size), or copied to media with a different logical
+	 * sector size.
 	 *
 	 * Temporarily change the device's logical block size to match the
 	 * journal's header size.  This will allow us to replay the journal
 	 * safely.  If the replay succeeds, we will update the journal's header
 	 * size (later in this function).
-    	 */
+	 */
 
 	orig_blksz = phys_blksz;
 	phys_blksz = jnl->jhdr->jhdr_size;
@@ -1825,27 +1930,27 @@ journal_open(struct vnode *jvp,
 	printf("jnl: %s: open: temporarily switched block size from %u to %u\n",
 	       jdev_name, orig_blksz, phys_blksz);
     }
-
-    if (   jnl->jhdr->start <= 0
-		   || jnl->jhdr->start > jnl->jhdr->size
-		   || jnl->jhdr->start > 1024*1024*1024) {
+    
+	if (   jnl->jhdr->start <= 0
+	       || jnl->jhdr->start > jnl->jhdr->size
+	       || jnl->jhdr->start > 1024*1024*1024) {
 		printf("jnl: %s: open: jhdr start looks bad (0x%llx max size 0x%llx)\n",
-		    jdev_name, jnl->jhdr->start, jnl->jhdr->size);
+		       jdev_name, jnl->jhdr->start, jnl->jhdr->size);
 		goto bad_journal;
-    }
+	}
 
-    if (   jnl->jhdr->end <= 0
-		   || jnl->jhdr->end > jnl->jhdr->size
-		   || jnl->jhdr->end > 1024*1024*1024) {
+	if (   jnl->jhdr->end <= 0
+	       || jnl->jhdr->end > jnl->jhdr->size
+	       || jnl->jhdr->end > 1024*1024*1024) {
 		printf("jnl: %s: open: jhdr end looks bad (0x%llx max size 0x%llx)\n",
-		    jdev_name, jnl->jhdr->end, jnl->jhdr->size);
+		       jdev_name, jnl->jhdr->end, jnl->jhdr->size);
 		goto bad_journal;
-    }
+	}
 
-    if (jnl->jhdr->size < (256*1024) || jnl->jhdr->size > 1024*1024*1024) {
-	printf("jnl: %s: open: jhdr size looks bad (0x%llx)\n", jdev_name, jnl->jhdr->size);
-	goto bad_journal;
-    }
+	if (jnl->jhdr->size < (256*1024) || jnl->jhdr->size > 1024*1024*1024) {
+		printf("jnl: %s: open: jhdr size looks bad (0x%llx)\n", jdev_name, jnl->jhdr->size);
+		goto bad_journal;
+	}
 
 // XXXdbg - can't do these checks because hfs writes all kinds of
 //          non-uniform sized blocks even on devices that have a block size
@@ -1853,28 +1958,28 @@ journal_open(struct vnode *jvp,
 //          therefore these checks will fail and so we just have to punt and
 //          do more relaxed checking...
 // XXXdbg    if ((jnl->jhdr->start % jnl->jhdr->jhdr_size) != 0) {
-    if ((jnl->jhdr->start % 512) != 0) {
+	if ((jnl->jhdr->start % 512) != 0) {
 		printf("jnl: %s: open: journal start (0x%llx) not a multiple of 512?\n",
-		    jdev_name, jnl->jhdr->start);
+		       jdev_name, jnl->jhdr->start);
 		goto bad_journal;
-    }
+	}
 
 //XXXdbg    if ((jnl->jhdr->end % jnl->jhdr->jhdr_size) != 0) {
-    if ((jnl->jhdr->end % 512) != 0) {
+	if ((jnl->jhdr->end % 512) != 0) {
 		printf("jnl: %s: open: journal end (0x%llx) not a multiple of block size (0x%x)?\n",
-		    jdev_name, jnl->jhdr->end, jnl->jhdr->jhdr_size);
+		       jdev_name, jnl->jhdr->end, jnl->jhdr->jhdr_size);
 		goto bad_journal;
-    }
+	}
 
-    // take care of replaying the journal if necessary
-    if (flags & JOURNAL_RESET) {
-	printf("jnl: %s: journal start/end pointers reset! (jnl %p; s 0x%llx e 0x%llx)\n",
-	    jdev_name, jnl, jnl->jhdr->start, jnl->jhdr->end);
-	jnl->jhdr->start = jnl->jhdr->end;
-    } else if (replay_journal(jnl) != 0) {
-	printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name);
-	goto bad_journal;
-    }
+	// take care of replaying the journal if necessary
+	if (flags & JOURNAL_RESET) {
+		printf("jnl: %s: journal start/end pointers reset! (jnl %p; s 0x%llx e 0x%llx)\n",
+		       jdev_name, jnl, jnl->jhdr->start, jnl->jhdr->end);
+		jnl->jhdr->start = jnl->jhdr->end;
+	} else if (replay_journal(jnl) != 0) {
+		printf("jnl: %s: journal_open: Error replaying the journal!\n", jdev_name);
+		goto bad_journal;
+	}
 
     /*
      * When we get here, we know that the journal is empty (jnl->jhdr->start ==
@@ -1891,6 +1996,7 @@ journal_open(struct vnode *jvp,
 	VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
 	phys_blksz = orig_blksz;
 	orig_blksz = 0;
+	printf("jnl: %s: open: restored block size to %u\n", jdev_name, phys_blksz);
 	
 	jnl->jhdr->jhdr_size = phys_blksz;
 	jnl->jhdr->start = phys_blksz;
@@ -1899,23 +2005,24 @@ journal_open(struct vnode *jvp,
 				   (journal_size / phys_blksz) +
 				   (random() % 16384)) & 0x00ffffff;
 	
-	if (write_journal_header(jnl, 1)) {
+	if (write_journal_header(jnl, 1, jnl->jhdr->sequence_num)) {
 		printf("jnl: %s: open: failed to update journal header size\n", jdev_name);
 		goto bad_journal;
 	}
     }
-
+    
     // make sure this is in sync!
     jnl->active_start = jnl->jhdr->start;
+    jnl->sequence_num = jnl->jhdr->sequence_num;
 
     // set this now, after we've replayed the journal
     size_up_tbuffer(jnl, tbuffer_size, phys_blksz);
 
     // TODO: Does this need to change if the device's logical block size changed?
     if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) {
-	    printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
-		   jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
-	    goto bad_journal;
+	printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size,
+	   jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size);
+	goto bad_journal;
     }
 
     lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr);
@@ -1926,7 +2033,7 @@ journal_open(struct vnode *jvp,
     if (orig_blksz != 0) {
 	phys_blksz = orig_blksz;
 	VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context);
-	printf("jnl: %s: open: restored block size after error\n", jdev_name);
+	printf("jnl: %s: open: restored block size to %u after error\n", jdev_name, orig_blksz);
     }
     kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz);
   bad_kmem_alloc:
@@ -1945,110 +2052,109 @@ journal_is_clean(struct vnode *jvp,
 		 struct vnode *fsvp,
                  size_t        min_fs_block_size)
 {
-    journal jnl;
-    uint32_t     phys_blksz;
-    int  ret;
-    int     orig_checksum, checksum;
-    struct vfs_context context;
-    const char *jdev_name = get_jdev_name(jvp);
-
-    context.vc_thread = current_thread();
-    context.vc_ucred = FSCRED;
-
-    /* Get the real physical block size. */
-    if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
-	printf("jnl: %s: is_clean: failed to get device block size.\n", jdev_name);
-	return EINVAL;
-    }
+	journal		jnl;
+	uint32_t	phys_blksz;
+	int		ret;
+	int		orig_checksum, checksum;
+	struct vfs_context context;
+	const		char *jdev_name = get_jdev_name(jvp);
+
+	context.vc_thread = current_thread();
+	context.vc_ucred = FSCRED;
+
+	/* Get the real physical block size. */
+	if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) {
+		printf("jnl: %s: is_clean: failed to get device block size.\n", jdev_name);
+		return EINVAL;
+	}
 
-    if (phys_blksz > (uint32_t)min_fs_block_size) {
-	printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %zd\n",
-	    jdev_name, phys_blksz, min_fs_block_size);
-	return EINVAL;
-    }
+	if (phys_blksz > (uint32_t)min_fs_block_size) {
+		printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %zd\n",
+		       jdev_name, phys_blksz, min_fs_block_size);
+		return EINVAL;
+	}
 
-    if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) {
-	    printf("jnl: is_clean: journal size %lld looks bogus.\n", journal_size);
-	    return EINVAL;
-    }
+	if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) {
+		printf("jnl: is_clean: journal size %lld looks bogus.\n", journal_size);
+		return EINVAL;
+	}
     
-    if ((journal_size % phys_blksz) != 0) {
-	printf("jnl: %s: is_clean: journal size 0x%llx is not an even multiple of block size 0x%x\n",
-	    jdev_name, journal_size, phys_blksz);
-	return EINVAL;
-    }
+	if ((journal_size % phys_blksz) != 0) {
+		printf("jnl: %s: is_clean: journal size 0x%llx is not an even multiple of block size 0x%x\n",
+		       jdev_name, journal_size, phys_blksz);
+		return EINVAL;
+	}
 
-    memset(&jnl, 0, sizeof(jnl));
+	memset(&jnl, 0, sizeof(jnl));
 
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz)) {
-	printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz);
-	return ENOMEM;
-    }
-    jnl.header_buf_size = phys_blksz;
+	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz)) {
+		printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz);
+		return ENOMEM;
+	}
+	jnl.header_buf_size = phys_blksz;
 
-    get_io_info(jvp, phys_blksz, &jnl, &context);
+	get_io_info(jvp, phys_blksz, &jnl, &context);
     
-    jnl.jhdr = (journal_header *)jnl.header_buf;
-    memset(jnl.jhdr, 0, sizeof(journal_header));
+	jnl.jhdr = (journal_header *)jnl.header_buf;
+	memset(jnl.jhdr, 0, sizeof(journal_header));
 
-    jnl.jdev        = jvp;
-    jnl.jdev_offset = offset;
-    jnl.fsdev       = fsvp;
+	jnl.jdev        = jvp;
+	jnl.jdev_offset = offset;
+	jnl.fsdev       = fsvp;
 
-    // we have to set this up here so that do_journal_io() will work
-    jnl.jhdr->jhdr_size = phys_blksz;
+	// we have to set this up here so that do_journal_io() will work
+	jnl.jhdr->jhdr_size = phys_blksz;
 
-    if (read_journal_header(&jnl, jnl.jhdr, phys_blksz) != (unsigned)phys_blksz) {
-	printf("jnl: %s: is_clean: could not read %d bytes for the journal header.\n",
-	    jdev_name, phys_blksz);
-	ret = EINVAL;
-	goto get_out;
-    }
+	if (read_journal_header(&jnl, jnl.jhdr, phys_blksz) != (unsigned)phys_blksz) {
+		printf("jnl: %s: is_clean: could not read %d bytes for the journal header.\n",
+		       jdev_name, phys_blksz);
+		ret = EINVAL;
+		goto get_out;
+	}
 
-    orig_checksum = jnl.jhdr->checksum;
-    jnl.jhdr->checksum = 0;
-
-    if (jnl.jhdr->magic == SWAP32(JOURNAL_HEADER_MAGIC)) {
-	// do this before the swap since it's done byte-at-a-time
-	orig_checksum = SWAP32(orig_checksum);
-	checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
-	swap_journal_header(&jnl);
-	jnl.flags |= JOURNAL_NEED_SWAP;
-    } else {
-	checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
-    }
+	orig_checksum = jnl.jhdr->checksum;
+	jnl.jhdr->checksum = 0;
 
-    if (jnl.jhdr->magic != JOURNAL_HEADER_MAGIC && jnl.jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
-	printf("jnl: %s: is_clean: journal magic is bad (0x%x != 0x%x)\n",
-	    jdev_name, jnl.jhdr->magic, JOURNAL_HEADER_MAGIC);
-	ret = EINVAL;
-	goto get_out;
-    }
+	if (jnl.jhdr->magic == SWAP32(JOURNAL_HEADER_MAGIC)) {
+		// do this before the swap since it's done byte-at-a-time
+		orig_checksum = SWAP32(orig_checksum);
+		checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+		swap_journal_header(&jnl);
+		jnl.flags |= JOURNAL_NEED_SWAP;
+	} else {
+		checksum = calc_checksum((char *)jnl.jhdr, JOURNAL_HEADER_CKSUM_SIZE);
+	}
 
-    if (orig_checksum != checksum) {
-	printf("jnl: %s: is_clean: journal checksum is bad (0x%x != 0x%x)\n", jdev_name, orig_checksum, checksum);
-	ret = EINVAL;
-	goto get_out;
-    }
+	if (jnl.jhdr->magic != JOURNAL_HEADER_MAGIC && jnl.jhdr->magic != OLD_JOURNAL_HEADER_MAGIC) {
+		printf("jnl: %s: is_clean: journal magic is bad (0x%x != 0x%x)\n",
+		       jdev_name, jnl.jhdr->magic, JOURNAL_HEADER_MAGIC);
+		ret = EINVAL;
+		goto get_out;
+	}
 
-    //
-    // if the start and end are equal then the journal is clean.
-    // otherwise it's not clean and therefore an error.
-    //
-    if (jnl.jhdr->start == jnl.jhdr->end) {
-	ret = 0;
-    } else {
-	    ret = EBUSY;    // so the caller can differentiate an invalid journal from a "busy" one
-    }
+	if (orig_checksum != checksum) {
+		printf("jnl: %s: is_clean: journal checksum is bad (0x%x != 0x%x)\n", jdev_name, orig_checksum, checksum);
+		ret = EINVAL;
+		goto get_out;
+	}
 
-  get_out:
-    kmem_free(kernel_map, (vm_offset_t)jnl.header_buf, phys_blksz);
-    if (jdev_name) {
-	vfs_removename(jdev_name);
-    }
-    
-    return ret;    
+	//
+	// if the start and end are equal then the journal is clean.
+	// otherwise it's not clean and therefore an error.
+	//
+	if (jnl.jhdr->start == jnl.jhdr->end) {
+		ret = 0;
+	} else {
+		ret = EBUSY;    // so the caller can differentiate an invalid journal from a "busy" one
+	}
 
+get_out:
+	kmem_free(kernel_map, (vm_offset_t)jnl.header_buf, phys_blksz);
+	if (jdev_name) {
+		vfs_removename(jdev_name);
+	}
+    
+	return ret;    
 
 }
 
@@ -2056,26 +2162,31 @@ journal_is_clean(struct vnode *jvp,
 void
 journal_close(journal *jnl)
 {
-    volatile off_t *start, *end;
-    int             counter=0;
+	volatile off_t *start, *end;
+	int             counter=0;
 
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
 
 	// set this before doing anything that would block so that
 	// we start tearing things down properly.
 	//
 	jnl->flags |= JOURNAL_CLOSE_PENDING;
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		lock_journal(jnl);
-    }
+	}
 
-    //
-    // only write stuff to disk if the journal is still valid
-    //
-    if ((jnl->flags & JOURNAL_INVALID) == 0) {
+	wait_condition(jnl, &jnl->flushing, "journal_close");
+
+	//
+	// only write stuff to disk if the journal is still valid
+	//
+	if ((jnl->flags & JOURNAL_INVALID) == 0) {
 
 		if (jnl->active_tr) {
+			/*
+			 * "journal_end_transaction" will fire the flush asynchronously
+			 */
 			journal_end_transaction(jnl);
 		}
 		
@@ -2084,8 +2195,17 @@ journal_close(journal *jnl)
 			transaction *tr = jnl->cur_tr;
 
 			jnl->cur_tr = NULL;
-			end_transaction(tr, 1, NULL, NULL);   // force it to get flushed
+			/*
+			 * "end_transaction" will wait for any in-progress flush to complete
+			 * before flushing "cur_tr" synchronously("must_wait" == TRUE)
+			 */
+			end_transaction(tr, 1, NULL, NULL, FALSE, TRUE);
 		}
+		/*
+		 * if there was an "active_tr", make sure we wait for
+		 * it to flush if there was no "cur_tr" to process
+		 */
+		wait_condition(jnl, &jnl->flushing, "journal_close");
     
 		//start = &jnl->jhdr->start;
 		start = &jnl->active_start;
@@ -2101,20 +2221,22 @@ journal_close(journal *jnl)
 
 		if (*start != *end) {
 			printf("jnl: %s: close: buffer flushing didn't seem to flush out all the transactions! (0x%llx - 0x%llx)\n",
-			    jnl->jdev_name, *start, *end);
+			       jnl->jdev_name, *start, *end);
 		}
 
 		// make sure this is in sync when we close the journal
 		jnl->jhdr->start = jnl->active_start;
 
 		// if this fails there's not much we can do at this point...
-		write_journal_header(jnl, 1);
-    } else {
+		write_journal_header(jnl, 1, jnl->sequence_num);
+	} else {
 		// if we're here the journal isn't valid any more.
 		// so make sure we don't leave any locked blocks lying around
 		printf("jnl: %s: close: journal %p, is invalid.  aborting outstanding transactions\n", jnl->jdev_name, jnl);
+
 		if (jnl->active_tr || jnl->cur_tr) {
 			transaction *tr;
+
 			if (jnl->active_tr) {
 				tr = jnl->active_tr;
 				jnl->active_tr = NULL;
@@ -2122,45 +2244,45 @@ journal_close(journal *jnl)
 				tr = jnl->cur_tr;
 				jnl->cur_tr = NULL;
 			}
-
 			abort_transaction(jnl, tr);
+
 			if (jnl->active_tr || jnl->cur_tr) {
-			    panic("jnl: %s: close: jnl @ %p had both an active and cur tr\n", jnl->jdev_name, jnl);
+				panic("jnl: %s: close: jnl @ %p had both an active and cur tr\n", jnl->jdev_name, jnl);
 			}
 		}
-    }
+	}
 
-    free_old_stuff(jnl);
+	free_old_stuff(jnl);
 
-    kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size);
-    jnl->jhdr = (void *)0xbeefbabe;
+	kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size);
+	jnl->jhdr = (void *)0xbeefbabe;
 
-    if (jnl->jdev_name) {
-	vfs_removename(jnl->jdev_name);
-    }
+	if (jnl->jdev_name) {
+		vfs_removename(jnl->jdev_name);
+	}
 
-    FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
+	FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL);
 }
 
 static void
 dump_journal(journal *jnl)
 {
-    transaction *ctr;
-
-    printf("journal for dev %s:", jnl->jdev_name);
-    printf("  jdev_offset %.8llx\n", jnl->jdev_offset);
-    printf("  magic: 0x%.8x\n", jnl->jhdr->magic);
-    printf("  start: 0x%.8llx\n", jnl->jhdr->start);
-    printf("  end:   0x%.8llx\n", jnl->jhdr->end);
-    printf("  size:  0x%.8llx\n", jnl->jhdr->size);
-    printf("  blhdr size: %d\n", jnl->jhdr->blhdr_size);
-    printf("  jhdr size: %d\n", jnl->jhdr->jhdr_size);
-    printf("  chksum: 0x%.8x\n", jnl->jhdr->checksum);
+	transaction *ctr;
+
+	printf("journal for dev %s:", jnl->jdev_name);
+	printf("  jdev_offset %.8llx\n", jnl->jdev_offset);
+	printf("  magic: 0x%.8x\n", jnl->jhdr->magic);
+	printf("  start: 0x%.8llx\n", jnl->jhdr->start);
+	printf("  end:   0x%.8llx\n", jnl->jhdr->end);
+	printf("  size:  0x%.8llx\n", jnl->jhdr->size);
+	printf("  blhdr size: %d\n", jnl->jhdr->blhdr_size);
+	printf("  jhdr size: %d\n", jnl->jhdr->jhdr_size);
+	printf("  chksum: 0x%.8x\n", jnl->jhdr->checksum);
     
-    printf("  completed transactions:\n");
-    for(ctr=jnl->completed_trs; ctr; ctr=ctr->next) {
+	printf("  completed transactions:\n");
+	for (ctr = jnl->completed_trs; ctr; ctr = ctr->next) {
 		printf("    0x%.8llx - 0x%.8llx\n", ctr->journal_start, ctr->journal_end);
-    }
+	}
 }
 
 
@@ -2168,18 +2290,18 @@ dump_journal(journal *jnl)
 static off_t
 free_space(journal *jnl)
 {
-    off_t free_space_offset;
+	off_t free_space_offset;
 	
-    if (jnl->jhdr->start < jnl->jhdr->end) {
+	if (jnl->jhdr->start < jnl->jhdr->end) {
 		free_space_offset = jnl->jhdr->size - (jnl->jhdr->end - jnl->jhdr->start) - jnl->jhdr->jhdr_size;
-    } else if (jnl->jhdr->start > jnl->jhdr->end) {
+	} else if (jnl->jhdr->start > jnl->jhdr->end) {
 		free_space_offset = jnl->jhdr->start - jnl->jhdr->end;
-    } else {
+	} else {
 		// journal is completely empty
 		free_space_offset = jnl->jhdr->size - jnl->jhdr->jhdr_size;
-    }
+	}
 
-    return free_space_offset;
+	return free_space_offset;
 }
 
 
@@ -2188,46 +2310,50 @@ free_space(journal *jnl)
 // The "desired_size" is in bytes.
 //
 static int
-check_free_space(journal *jnl, int desired_size)
+check_free_space(journal *jnl, int desired_size, boolean_t *delayed_header_write, uint32_t sequence_num)
 {
-    size_t i;
-    int    counter=0;
+	size_t	i;
+	int	counter=0;
+
+	//printf("jnl: check free space (desired 0x%x, avail 0x%Lx)\n",
+        //	   desired_size, free_space(jnl));
 
-    //printf("jnl: check free space (desired 0x%x, avail 0x%Lx)\n",
-//	   desired_size, free_space(jnl));
+	if (delayed_header_write)
+		*delayed_header_write = FALSE;
     
-    while (1) {
+	while (1) {
 		int old_start_empty;
 		
+		// make sure there's space in the journal to hold this transaction
+		if (free_space(jnl) > desired_size && jnl->old_start[0] == 0) {
+			break;
+		}
 		if (counter++ == 5000) {
 			dump_journal(jnl);
 			panic("jnl: check_free_space: buffer flushing isn't working "
-				  "(jnl @ %p s %lld e %lld f %lld [active start %lld]).\n", jnl,
-				  jnl->jhdr->start, jnl->jhdr->end, free_space(jnl), jnl->active_start);
+			      "(jnl @ %p s %lld e %lld f %lld [active start %lld]).\n", jnl,
+			      jnl->jhdr->start, jnl->jhdr->end, free_space(jnl), jnl->active_start);
 		}
 		if (counter > 7500) {
-		    printf("jnl: %s: check_free_space: giving up waiting for free space.\n", jnl->jdev_name);
-		    return ENOSPC;
+			printf("jnl: %s: check_free_space: giving up waiting for free space.\n", jnl->jdev_name);
+			return ENOSPC;
 		}
 
-		// make sure there's space in the journal to hold this transaction
-		if (free_space(jnl) > desired_size && jnl->old_start[0] == 0) {
-			break;
-		}
 		//
 		// here's where we lazily bump up jnl->jhdr->start.  we'll consume
 		// entries until there is enough space for the next transaction.
 		//
 		old_start_empty = 1;
 		lock_oldstart(jnl);
-		for(i=0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
+
+		for (i = 0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) {
 			int   lcl_counter;
 
 			lcl_counter = 0;
 			while (jnl->old_start[i] & 0x8000000000000000LL) {
 				if (lcl_counter++ > 1000) {
 					panic("jnl: check_free_space: tr starting @ 0x%llx not flushing (jnl %p).\n",
-						  jnl->old_start[i], jnl);
+					      jnl->old_start[i], jnl);
 				}
 				
 				unlock_oldstart(jnl);
@@ -2245,10 +2371,16 @@ check_free_space(journal *jnl, int desired_size)
 			old_start_empty   = 0;
 			jnl->jhdr->start  = jnl->old_start[i];
 			jnl->old_start[i] = 0;
+
 			if (free_space(jnl) > desired_size) {
-				unlock_oldstart(jnl);
-				write_journal_header(jnl, 1);
-				lock_oldstart(jnl);
+				
+				if (delayed_header_write)
+					*delayed_header_write = TRUE;
+				else {
+					unlock_oldstart(jnl);
+					write_journal_header(jnl, 1, sequence_num);
+					lock_oldstart(jnl);
+				}
 				break;
 			}
 		}
@@ -2268,7 +2400,11 @@ check_free_space(journal *jnl, int desired_size)
 			// start of the loop.
 			//
 			jnl->jhdr->start = jnl->active_start;
-			write_journal_header(jnl, 1);
+			
+			if (delayed_header_write)
+				*delayed_header_write = TRUE;
+			else
+				write_journal_header(jnl, 1, sequence_num);
 			continue;
 		}
 
@@ -2283,9 +2419,9 @@ check_free_space(journal *jnl, int desired_size)
 		// wait for a while to avoid being cpu-bound (this will
 		// put us to sleep for 10 milliseconds)
 		tsleep((caddr_t)jnl, PRIBIO, "check_free_space2", 1);
-    }
+	}
 
-    return 0;
+	return 0;
 }
 
 /*
@@ -2297,31 +2433,31 @@ journal_allocate_transaction(journal *jnl)
 	transaction *tr;
 	
 	MALLOC_ZONE(tr, transaction *, sizeof(transaction), M_JNL_TR, M_WAITOK);
-    memset(tr, 0, sizeof(transaction));
+	memset(tr, 0, sizeof(transaction));
 
-    tr->tbuffer_size = jnl->tbuffer_size;
+	tr->tbuffer_size = jnl->tbuffer_size;
 
-    if (kmem_alloc(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size)) {
+	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size)) {
 		FREE_ZONE(tr, sizeof(transaction), M_JNL_TR);
 		jnl->active_tr = NULL;
 		return ENOMEM;
-    }
+	}
 
-    // journal replay code checksum check depends on this.
-    memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE);
-    // Fill up the rest of the block with unimportant bytes (0x5a 'Z' chosen for visibility)
-    memset(tr->tbuffer + BLHDR_CHECKSUM_SIZE, 0x5a, jnl->jhdr->blhdr_size - BLHDR_CHECKSUM_SIZE);
+	// journal replay code checksum check depends on this.
+	memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE);
+	// Fill up the rest of the block with unimportant bytes (0x5a 'Z' chosen for visibility)
+	memset(tr->tbuffer + BLHDR_CHECKSUM_SIZE, 0x5a, jnl->jhdr->blhdr_size - BLHDR_CHECKSUM_SIZE);
 
-    tr->blhdr = (block_list_header *)tr->tbuffer;
-    tr->blhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(block_info)) - 1;
-    tr->blhdr->num_blocks = 1;      // accounts for this header block
-    tr->blhdr->bytes_used = jnl->jhdr->blhdr_size;
-    tr->blhdr->flags = BLHDR_CHECK_CHECKSUMS | BLHDR_FIRST_HEADER;
+	tr->blhdr = (block_list_header *)tr->tbuffer;
+	tr->blhdr->max_blocks = (jnl->jhdr->blhdr_size / sizeof(block_info)) - 1;
+	tr->blhdr->num_blocks = 1;      // accounts for this header block
+	tr->blhdr->bytes_used = jnl->jhdr->blhdr_size;
+	tr->blhdr->flags = BLHDR_CHECK_CHECKSUMS | BLHDR_FIRST_HEADER;
 
-    tr->sequence_num = ++jnl->jhdr->sequence_num;
-    tr->num_blhdrs  = 1;
-    tr->total_bytes = jnl->jhdr->blhdr_size;
-    tr->jnl         = jnl;
+	tr->sequence_num = ++jnl->sequence_num;
+	tr->num_blhdrs  = 1;
+	tr->total_bytes = jnl->jhdr->blhdr_size;
+	tr->jnl         = jnl;
 
 	jnl->active_tr  = tr;
 	
@@ -2331,67 +2467,72 @@ journal_allocate_transaction(journal *jnl)
 int
 journal_start_transaction(journal *jnl)
 {
-    int ret;
+	int ret;
 
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
     
-    if (jnl->flags & JOURNAL_INVALID) {
-		return EINVAL;
-    }
+	free_old_stuff(jnl);
 
-    if (jnl->owner == current_thread()) {
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+	if (jnl->owner == current_thread()) {
 		if (jnl->active_tr == NULL) {
 			panic("jnl: start_tr: active_tr is NULL (jnl @ %p, owner %p, current_thread %p\n",
-				  jnl, jnl->owner, current_thread());
+			      jnl, jnl->owner, current_thread());
 		}
 		jnl->nested_count++;
 		return 0;
-    }
-
-    lock_journal(jnl);
+	}
+	lock_journal(jnl);
 
-    if (jnl->owner != NULL || jnl->nested_count != 0 || jnl->active_tr != NULL) {
+	if (jnl->owner != NULL || jnl->nested_count != 0 || jnl->active_tr != NULL) {
 		panic("jnl: start_tr: owner %p, nested count %d, active_tr %p jnl @ %p\n",
-			  jnl->owner, jnl->nested_count, jnl->active_tr, jnl);
-    }
+		      jnl->owner, jnl->nested_count, jnl->active_tr, jnl);
+	}
 
-    jnl->owner        = current_thread();
-    jnl->nested_count = 1;
+	jnl->owner = current_thread();
+	jnl->nested_count = 1;
 
-    free_old_stuff(jnl);
+#if JOE
+	// make sure there's room in the journal
+	if (free_space(jnl) < jnl->tbuffer_size) {
 
-    // make sure there's room in the journal
-    if (free_space(jnl) < jnl->tbuffer_size) {
-	// this is the call that really waits for space to free up
-	// as well as updating jnl->jhdr->start
-	if (check_free_space(jnl, jnl->tbuffer_size) != 0) {
-		printf("jnl: %s: start transaction failed: no space\n", jnl->jdev_name);
-		ret = ENOSPC;
-		goto bad_start;
+		KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_START, jnl, 0, 0, 0, 0);
+
+		// this is the call that really waits for space to free up
+		// as well as updating jnl->jhdr->start
+		if (check_free_space(jnl, jnl->tbuffer_size, NULL, jnl->sequence_num) != 0) {
+			printf("jnl: %s: start transaction failed: no space\n", jnl->jdev_name);
+			ret = ENOSPC;
+			goto bad_start;
+		}
+		KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_END, jnl, 0, 0, 0, 0);
 	}
-    }
+#endif
 
-    // if there's a buffered transaction, use it.
-    if (jnl->cur_tr) {
+	// if there's a buffered transaction, use it.
+	if (jnl->cur_tr) {
 		jnl->active_tr = jnl->cur_tr;
 		jnl->cur_tr    = NULL;
 
 		return 0;
-    }
+	}
 
 	ret = journal_allocate_transaction(jnl);
 	if (ret) {
 		goto bad_start;
 	}
 
-    // printf("jnl: start_tr: owner 0x%x new tr @ 0x%x\n", jnl->owner, jnl->active_tr);
+	// printf("jnl: start_tr: owner 0x%x new tr @ 0x%x\n", jnl->owner, jnl->active_tr);
 
-    return 0;
+	return 0;
 
-  bad_start:
+bad_start:
 	jnl->owner        = NULL;
 	jnl->nested_count = 0;
 	unlock_journal(jnl);
+
 	return ret;
 }
 
@@ -2399,118 +2540,123 @@ journal_start_transaction(journal *jnl)
 int
 journal_modify_block_start(journal *jnl, struct buf *bp)
 {
-    transaction *tr;
+	transaction *tr;
     
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
+
 
-    if (jnl->flags & JOURNAL_INVALID) {
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
 		return EINVAL;
-    }
+	}
 
-    // XXXdbg - for debugging I want this to be true.  later it may
-    //          not be necessary.
-    if ((buf_flags(bp) & B_META) == 0) {
+	// XXXdbg - for debugging I want this to be true.  later it may
+	//          not be necessary.
+	if ((buf_flags(bp) & B_META) == 0) {
 		panic("jnl: modify_block_start: bp @ %p is not a meta-data block! (jnl %p)\n", bp, jnl);
-    }
+	}
 
-    tr = jnl->active_tr;
-    CHECK_TRANSACTION(tr);
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		panic("jnl: modify_block_start: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-			  jnl, jnl->owner, current_thread());
-    }
-
-    free_old_stuff(jnl);
+		      jnl, jnl->owner, current_thread());
+	}
 
-    //printf("jnl: mod block start (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d; total bytes %d)\n",
-    //   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
+	//printf("jnl: mod block start (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d; total bytes %d)\n",
+	//   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
 
-    // can't allow blocks that aren't an even multiple of the
-    // underlying block size.
-    if ((buf_size(bp) % jnl->jhdr->jhdr_size) != 0) {
-	    uint32_t phys_blksz, bad=0;
+	// can't allow blocks that aren't an even multiple of the
+	// underlying block size.
+	if ((buf_size(bp) % jnl->jhdr->jhdr_size) != 0) {
+		uint32_t phys_blksz, bad=0;
 	    
-	    if (VNOP_IOCTL(jnl->jdev, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
-		    bad = 1;
-	    } else if (phys_blksz != (uint32_t)jnl->jhdr->jhdr_size) {
-		    if (phys_blksz < 512) {
-			    panic("jnl: mod block start: phys blksz %d is too small (%d, %d)\n",
-				  phys_blksz, buf_size(bp), jnl->jhdr->jhdr_size);
-		    }
-
-		    if ((buf_size(bp) % phys_blksz) != 0) {
-			    bad = 1;
-		    } else if (phys_blksz < (uint32_t)jnl->jhdr->jhdr_size) {
-			    jnl->jhdr->jhdr_size = phys_blksz;
-		    } else {
-			    // the phys_blksz is now larger... need to realloc the jhdr
-			    char *new_header_buf;
-
-			    printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n",
-				   jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz);
-			    if (kmem_alloc(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz)) {
-				    printf("jnl: modify_block_start: %s: create: phys blksz change (was %d, now %d) but could not allocate space for new header\n",
-					   jnl->jdev_name, jnl->jhdr->jhdr_size, phys_blksz);
-				    bad = 1;
-			    } else {
-				    memcpy(new_header_buf, jnl->header_buf, jnl->header_buf_size);
-				    memset(&new_header_buf[jnl->header_buf_size], 0x18, (phys_blksz - jnl->header_buf_size));
-				    kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size);
-				    jnl->header_buf = new_header_buf;
-				    jnl->header_buf_size = phys_blksz;
-
-				    jnl->jhdr = (journal_header *)jnl->header_buf;
-				    jnl->jhdr->jhdr_size = phys_blksz;
-			    }
-		    }
-	    } else {
-		    bad = 1;
-	    }
+		if (VNOP_IOCTL(jnl->jdev, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) {
+			bad = 1;
+		} else if (phys_blksz != (uint32_t)jnl->jhdr->jhdr_size) {
+			if (phys_blksz < 512) {
+				panic("jnl: mod block start: phys blksz %d is too small (%d, %d)\n",
+				      phys_blksz, buf_size(bp), jnl->jhdr->jhdr_size);
+			}
+
+			if ((buf_size(bp) % phys_blksz) != 0) {
+				bad = 1;
+			} else if (phys_blksz < (uint32_t)jnl->jhdr->jhdr_size) {
+				jnl->jhdr->jhdr_size = phys_blksz;
+			} else {
+				// the phys_blksz is now larger... need to realloc the jhdr
+				char *new_header_buf;
+
+				printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n",
+				       jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz);
+				if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz)) {
+					printf("jnl: modify_block_start: %s: create: phys blksz change (was %d, now %d) but could not allocate space for new header\n",
+					       jnl->jdev_name, jnl->jhdr->jhdr_size, phys_blksz);
+					bad = 1;
+				} else {
+					memcpy(new_header_buf, jnl->header_buf, jnl->header_buf_size);
+					memset(&new_header_buf[jnl->header_buf_size], 0x18, (phys_blksz - jnl->header_buf_size));
+					kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size);
+					jnl->header_buf = new_header_buf;
+					jnl->header_buf_size = phys_blksz;
+					
+					jnl->jhdr = (journal_header *)jnl->header_buf;
+					jnl->jhdr->jhdr_size = phys_blksz;
+				}
+			}
+		} else {
+			bad = 1;
+		}
 	    
-	    if (bad) {
-		panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n",
-			  buf_size(bp), jnl->jhdr->jhdr_size);
-		return -1;
-	    }
-    }
+		if (bad) {
+			panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n",
+			      buf_size(bp), jnl->jhdr->jhdr_size);
+			return -1;
+		}
+	}
 
-    // make sure that this transaction isn't bigger than the whole journal
-    if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) {
+	// make sure that this transaction isn't bigger than the whole journal
+	if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) {
 		panic("jnl: transaction too big (%d >= %lld bytes, bufsize %d, tr %p bp %p)\n",
-			  tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp);
+		      tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp);
 		return -1;
-    }
+	}
 
-    // if the block is dirty and not already locked we have to write
-    // it out before we muck with it because it has data that belongs
-    // (presumably) to another transaction.
-    //
-    if ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI) {
+	// if the block is dirty and not already locked we have to write
+	// it out before we muck with it because it has data that belongs
+	// (presumably) to another transaction.
+	//
+	if ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI) {
 
 		if (buf_flags(bp) & B_ASYNC) {
 			panic("modify_block_start: bp @ %p has async flag set!\n", bp);
 		}
+		if (bp->b_shadow_ref)
+			panic("modify_block_start: dirty bp @ %p has shadows!\n", bp);
 
 		// this will cause it to not be buf_brelse()'d
                 buf_setflags(bp, B_NORELSE);
 		VNOP_BWRITE(bp);
-    }
-    buf_setflags(bp, B_LOCKED);
-	
-    return 0;
+	}
+	buf_setflags(bp, B_LOCKED);
+
+	return 0;
 }
 
 int
 journal_modify_block_abort(journal *jnl, struct buf *bp)
 {
-    transaction *tr;
+	transaction	*tr;
 	block_list_header *blhdr;
-	int i;
+	int		i;
     
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
 
-    tr = jnl->active_tr;
+	free_old_stuff(jnl);
+
+	tr = jnl->active_tr;
 	
 	//
 	// if there's no active transaction then we just want to
@@ -2522,26 +2668,24 @@ journal_modify_block_abort(journal *jnl, struct buf *bp)
 		return 0;
 	}
 
-    if (jnl->flags & JOURNAL_INVALID) {
+	if (jnl->flags & JOURNAL_INVALID) {
     	/* Still need to buf_brelse(). Callers assume we consume the bp. */
     	buf_brelse(bp);
 		return EINVAL;
-    }
+	}
 
-    CHECK_TRANSACTION(tr);
+	CHECK_TRANSACTION(tr);
     
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		panic("jnl: modify_block_abort: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-			  jnl, jnl->owner, current_thread());
-    }
-
-    free_old_stuff(jnl);
+		      jnl, jnl->owner, current_thread());
+	}
 
-    // printf("jnl: modify_block_abort: tr 0x%x bp 0x%x\n", jnl->active_tr, bp);
+	// printf("jnl: modify_block_abort: tr 0x%x bp 0x%x\n", jnl->active_tr, bp);
 
-    // first check if it's already part of this transaction
-    for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) {
-		for(i=1; i < blhdr->num_blocks; i++) {
+	// first check if it's already part of this transaction
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
 			if (bp == blhdr->binfo[i].u.bp) {
 				break;
 			}
@@ -2550,7 +2694,7 @@ journal_modify_block_abort(journal *jnl, struct buf *bp)
 		if (i < blhdr->num_blocks) {
 			break;
 		}
-    }
+	}
 
 	//
 	// if blhdr is null, then this block has only had modify_block_start
@@ -2560,76 +2704,75 @@ journal_modify_block_abort(journal *jnl, struct buf *bp)
 	// on it and so we need to keep it locked in memory.
 	//
 	if (blhdr == NULL) { 
-	          buf_clearflags(bp, B_LOCKED);
+		buf_clearflags(bp, B_LOCKED);
 	}
 
-    buf_brelse(bp);
-    return 0;
+	buf_brelse(bp);
+	return 0;
 }
 
 
 int
-journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *bp, void *arg), void *arg)
+journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, void *arg), void *arg)
 {
-    int                i = 1;
-    int	               tbuffer_offset=0;
-    char              *blkptr;
-    block_list_header *blhdr, *prev=NULL;
-    transaction       *tr;
+	int		i = 1;
+	int		tbuffer_offset=0;
+	block_list_header *blhdr, *prev=NULL;
+	transaction	*tr;
+
+	CHECK_JOURNAL(jnl);
 
-    CHECK_JOURNAL(jnl);
+	free_old_stuff(jnl);
 
-    if (jnl->flags & JOURNAL_INVALID) {
+	if (jnl->flags & JOURNAL_INVALID) {
     	/* Still need to buf_brelse(). Callers assume we consume the bp. */
     	buf_brelse(bp);
 		return EINVAL;
-    }
+	}
 
-    tr = jnl->active_tr;
-    CHECK_TRANSACTION(tr);
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		panic("jnl: modify_block_end: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-			  jnl, jnl->owner, current_thread());
-    }
-
-    free_old_stuff(jnl);
+		      jnl, jnl->owner, current_thread());
+	}
 
-    //printf("jnl: mod block end:  (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d, total bytes %d)\n", 
-    //   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
+	//printf("jnl: mod block end:  (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d, total bytes %d)\n", 
+	//   bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes);
 
-    if ((buf_flags(bp) & B_LOCKED) == 0) {
+	if ((buf_flags(bp) & B_LOCKED) == 0) {
 		panic("jnl: modify_block_end: bp %p not locked! jnl @ %p\n", bp, jnl);
-    }
+	}
 	 
-    // first check if it's already part of this transaction
-    for(blhdr=tr->blhdr; blhdr; prev=blhdr,blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) {
+	// first check if it's already part of this transaction
+	for (blhdr = tr->blhdr; blhdr; prev = blhdr, blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
 		tbuffer_offset = jnl->jhdr->blhdr_size;
 
-		for(i=1; i < blhdr->num_blocks; i++) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
 			if (bp == blhdr->binfo[i].u.bp) {
 				break;
 			}
 			if (blhdr->binfo[i].bnum != (off_t)-1) {
-			    tbuffer_offset += buf_size(blhdr->binfo[i].u.bp);
+				tbuffer_offset += buf_size(blhdr->binfo[i].u.bp);
 			} else {
-			    tbuffer_offset += blhdr->binfo[i].u.bi.bsize;
+				tbuffer_offset += blhdr->binfo[i].u.bi.bsize;
 			}
 		}
 
 		if (i < blhdr->num_blocks) {
 			break;
 		}
-    }
+	}
 
-    if (blhdr == NULL
-		&& prev
-		&& (prev->num_blocks+1) <= prev->max_blocks
-		&& (prev->bytes_used+buf_size(bp)) <= (uint32_t)tr->tbuffer_size) {
+	if (blhdr == NULL
+	    && prev
+	    && (prev->num_blocks+1) <= prev->max_blocks
+	    && (prev->bytes_used+buf_size(bp)) <= (uint32_t)tr->tbuffer_size) {
 		blhdr = prev;
-    } else if (blhdr == NULL) {
-		block_list_header *nblhdr;
 
+	} else if (blhdr == NULL) {
+		block_list_header *nblhdr;
 		if (prev == NULL) {
 			panic("jnl: modify block end: no way man, prev == NULL?!?, jnl %p, bp %p\n", jnl, bp);
 		}
@@ -2641,9 +2784,9 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *
 		// through prev->binfo[0].bnum.  that's a skanky way to do things but
 		// avoids having yet another linked list of small data structures to manage.
 
-		if (kmem_alloc(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size)) {
+		if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size)) {
 			panic("jnl: end_tr: no space for new block tr @ %p (total bytes: %d)!\n",
-				  tr, tr->total_bytes);
+			      tr, tr->total_bytes);
 		}
 
 		// journal replay code checksum check depends on this.
@@ -2667,25 +2810,15 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *
 		blhdr          = nblhdr;
 		tbuffer_offset = jnl->jhdr->blhdr_size;
 		i              = 1;
-    }
+	}
 
 
-    if ((i+1) > blhdr->max_blocks) {
+	if ((i+1) > blhdr->max_blocks) {
 		panic("jnl: modify_block_end: i = %d, max_blocks %d\n", i, blhdr->max_blocks);
-    }
-
-	// if the function pointer is not set then copy the
-	// block of data now.  if the function pointer is set 
-	// the copy will happen after calling the callback in
-	// end_transaction() just before it goes to disk.
-	//
-	if (func == NULL) {
-		blkptr = (char *)&((char *)blhdr)[tbuffer_offset];
-		memcpy(blkptr, (char *)0 + buf_dataptr(bp), buf_size(bp));
 	}
 
-    // if this is true then this is a new block we haven't seen
-    if (i >= blhdr->num_blocks) {
+	// if this is true then this is a new block we haven't seen
+	if (i >= blhdr->num_blocks) {
                 int	bsize;
 		vnode_t	vp;
 
@@ -2695,8 +2828,9 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *
 
 		blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp));
 		blhdr->binfo[i].u.bp = bp;
+
 		if (func) {
-			void *old_func=NULL, *old_arg=NULL;
+			void (*old_func)(buf_t, void *)=NULL, *old_arg=NULL;
 			
 			buf_setfilter(bp, func, arg, &old_func, &old_arg);
 			if (old_func != NULL && old_func != func) {
@@ -2708,48 +2842,48 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf *
 		tr->total_bytes   += bsize;
 
 		blhdr->num_blocks++;
-    }
-    buf_bdwrite(bp);
+	}
+	buf_bdwrite(bp);
 
-    return 0;
+	return 0;
 }
 
 int
 journal_kill_block(journal *jnl, struct buf *bp)
 {
-    int                i;
-    int		       bflags;
-    block_list_header *blhdr;
-    transaction       *tr;
+	int		i;
+	int		bflags;
+	block_list_header *blhdr;
+	transaction	*tr;
 
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
 
-    if (jnl->flags & JOURNAL_INVALID) {
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
 		return EINVAL;
-    }
+	}
 
-    tr = jnl->active_tr;
-    CHECK_TRANSACTION(tr);
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		panic("jnl: modify_block_end: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-			  jnl, jnl->owner, current_thread());
-    }
-
-    free_old_stuff(jnl);
+		      jnl, jnl->owner, current_thread());
+	}
 
-    bflags = buf_flags(bp);
+	bflags = buf_flags(bp);
 
-    if ( !(bflags & B_LOCKED))
-            panic("jnl: modify_block_end: called with bp not B_LOCKED");
+	if ( !(bflags & B_LOCKED))
+		panic("jnl: modify_block_end: called with bp not B_LOCKED");
 
-    /*
-     * bp must be BL_BUSY and B_LOCKED
-     */
-    // first check if it's already part of this transaction
-    for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) {
+	/*
+	 * bp must be BL_BUSY and B_LOCKED
+	 * first check if it's already part of this transaction
+	 */
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
 
-		for(i=1; i < blhdr->num_blocks; i++) {
+		for (i = 1; i < blhdr->num_blocks; i++) {
 			if (bp == blhdr->binfo[i].u.bp) {
 			        vnode_t vp;
 
@@ -2783,9 +2917,38 @@ journal_kill_block(journal *jnl, struct buf *bp)
 		if (i < blhdr->num_blocks) {
 			break;
 		}
-    }
+	}
 
-    return 0;
+	return 0;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		journal_trim_set_callback
+;
+; Function:		Provide the journal with a routine to be called back when a
+;				TRIM has (or would have) been issued to the device.  That
+;				is, the transaction has been flushed to the device, and the
+;				blocks freed by the transaction are now safe for reuse.
+;
+;				CAUTION: If the journal becomes invalid (eg., due to an I/O
+;				error when trying to write to the journal), this callback
+;				will stop getting called, even if extents got freed before
+;				the journal became invalid!
+;
+; Input Arguments:
+;	jnl			- The journal structure for the filesystem.
+;	callback	- The function to call when the TRIM is complete.
+;	arg			- An argument to be passed to callback.
+;________________________________________________________________________________
+*/
+__private_extern__ void
+journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg)
+{
+	jnl->trim_callback = callback;
+	jnl->trim_callback_arg = arg;
 }
 
 
@@ -2802,7 +2965,7 @@ journal_kill_block(journal *jnl, struct buf *bp)
 ;				grown successfully.
 ;
 ; Input Arguments:
-;	tr			- The transaction containing the extent list.
+;	trim		- The trim list to be resized.
 ;
 ; Output:
 ;	(result)	- ENOMEM or 0.
@@ -2813,53 +2976,107 @@ journal_kill_block(journal *jnl, struct buf *bp)
 ;________________________________________________________________________________
 */
 static int
-journal_trim_realloc(transaction *tr)
+trim_realloc(struct jnl_trim_list *trim)
 {
-	if (CONFIG_HFS_TRIM) {
-		void *new_extents;
-		uint32_t new_allocated_count;
-		
-		new_allocated_count = tr->trim.allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS;
-		new_extents = kalloc(new_allocated_count * sizeof(dk_extent_t));
-		if (new_extents == NULL) {
-			printf("journal_trim_realloc: unable to grow extent list!\n");
-			/*
-			 * Since we could be called when allocating space previously marked
-			 * to be trimmed, we need to empty out the list to be safe.
-			 */
-			tr->trim.extent_count = 0;
-			return ENOMEM;
-		}
-		
-		/* Copy the old extent list to the newly allocated list. */
-		if (tr->trim.extents != NULL) {
-			memmove(new_extents,
-					tr->trim.extents,
-					tr->trim.allocated_count * sizeof(dk_extent_t));
-			kfree(tr->trim.extents,
-				  tr->trim.allocated_count * sizeof(dk_extent_t));
-		}
-		
-		tr->trim.allocated_count = new_allocated_count;
-		tr->trim.extents = new_extents;
+	void *new_extents;
+	uint32_t new_allocated_count;
+	
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_START, trim, 0, trim->allocated_count, trim->extent_count, 0);
+	
+	new_allocated_count = trim->allocated_count + JOURNAL_DEFAULT_TRIM_EXTENTS;
+	new_extents = kalloc(new_allocated_count * sizeof(dk_extent_t));
+	if (new_extents == NULL) {
+		printf("jnl: trim_realloc: unable to grow extent list!\n");
+		/*
+		 * Since we could be called when allocating space previously marked
+		 * to be trimmed, we need to empty out the list to be safe.
+		 */
+		trim->extent_count = 0;
+		if (jnl_kdebug)
+			KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_END, ENOMEM, 0, trim->allocated_count, 0, 0);
+		return ENOMEM;
 	}
+	
+	/* Copy the old extent list to the newly allocated list. */
+	if (trim->extents != NULL) {
+		memmove(new_extents,
+				trim->extents,
+				trim->allocated_count * sizeof(dk_extent_t));
+		kfree(trim->extents,
+			  trim->allocated_count * sizeof(dk_extent_t));
+	}
+	
+	trim->allocated_count = new_allocated_count;
+	trim->extents = new_extents;
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REALLOC | DBG_FUNC_END, 0, 0, new_allocated_count, trim->extent_count, 0);
+	
 	return 0;
 }
 
 
+/*
+;________________________________________________________________________________
+;
+; Routine:		trim_search_extent
+;
+; Function:		Search the given extent list to see if any of its extents
+;				overlap the given extent.
+;
+; Input Arguments:
+;	trim		- The trim list to be searched.
+;	offset		- The first byte of the range to be searched for.
+;	length		- The number of bytes of the extent being searched for.
+;
+; Output:
+;	(result)	- TRUE if one or more extents overlap, FALSE otherwise.
+;________________________________________________________________________________
+*/
+static int
+trim_search_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length)
+{
+	uint64_t end = offset + length;
+	uint32_t lower = 0;						/* Lowest index to search */
+	uint32_t upper = trim->extent_count;	/* Highest index to search + 1 */
+	uint32_t middle;
+	
+	/* A binary search over the extent list. */
+	while (lower < upper) {
+		middle = (lower + upper) / 2;
+		
+		if (trim->extents[middle].offset >= end)
+			upper = middle;
+		else if (trim->extents[middle].offset + trim->extents[middle].length <= offset)
+			lower = middle + 1;
+		else
+			return TRUE;
+	}
+	
+	return FALSE;
+}
+
+
 /*
 ;________________________________________________________________________________
 ;
 ; Routine:		journal_trim_add_extent
 ;
-; Function:		Make note of a range of bytes that should be unmapped
-;				(trimmed).  That is, the given range of bytes no longer have
-;				useful content, and the device can unmap the previous
-;				contents.  For example, a solid state disk may reuse the
-;				underlying storage for other blocks.
+; Function:		Keep track of extents that have been freed as part of this
+;				transaction.  If the underlying device supports TRIM (UNMAP),
+;				then those extents will be trimmed/unmapped once the
+;				transaction has been written to the journal.  (For example,
+;				SSDs can support trim/unmap and avoid having to recopy those
+;				blocks when doing wear leveling, and may reuse the same
+;				phsyical blocks for different logical blocks.)
 ;
-;				The extent will be unmapped after the transaction is written
-;				to the journal.
+;				HFS also uses this, in combination with journal_trim_set_callback,
+;				to add recently freed extents to its free extent cache, but
+;				only after the transaction that freed them is committed to
+;				disk.  (This reduces the chance of overwriting live data in
+;				a way that causes data loss if a transaction never gets
+;				written to the journal.)
 ;
 ; Input Arguments:
 ;	jnl			- The journal for the volume containing the byte range.
@@ -2870,113 +3087,114 @@ journal_trim_realloc(transaction *tr)
 __private_extern__ int
 journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
 {
-	if (CONFIG_HFS_TRIM) {
-		uint64_t end;
-		transaction *tr;
-		dk_extent_t *extent;
-		uint32_t insert_index;
-		uint32_t replace_count;
-		
-		CHECK_JOURNAL(jnl);
+	uint64_t end;
+	transaction *tr;
+	dk_extent_t *extent;
+	uint32_t insert_index;
+	uint32_t replace_count;
 	
-		if (jnl->flags & JOURNAL_TRIM_ERR) {
-			/*
-			 * A previous trim failed, so we have disabled trim for this volume
-			 * for as long as it remains mounted.
-			 */
-			return 0;
-		}
-		
-		if (jnl->flags & JOURNAL_INVALID) {
-			return EINVAL;
-		}
+	CHECK_JOURNAL(jnl);
+
+	/* TODO: Is it OK to manipulate the trim list even if JOURNAL_INVALID is set?  I think so... */
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_START, jnl, offset, length, tr->trim.extent_count, 0);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+			  jnl, jnl->owner, current_thread());
+	}
+
+	free_old_stuff(jnl);
 	
-		tr = jnl->active_tr;
-		CHECK_TRANSACTION(tr);
+	end = offset + length;
 	
-		if (jnl->owner != current_thread()) {
-			panic("jnl: trim_add_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-				  jnl, jnl->owner, current_thread());
-		}
+	/*
+	 * Find the range of existing extents that can be combined with the
+	 * input extent.  We start by counting the number of extents that end
+	 * strictly before the input extent, then count the number of extents
+	 * that overlap or are contiguous with the input extent.
+	 */
+	extent = tr->trim.extents;
+	insert_index = 0;
+	while (insert_index < tr->trim.extent_count && extent->offset + extent->length < offset) {
+		++insert_index;
+		++extent;
+	}
+	replace_count = 0;
+	while (insert_index + replace_count < tr->trim.extent_count && extent->offset <= end) {
+		++replace_count;
+		++extent;
+	}
 	
-		free_old_stuff(jnl);
-		
-		end = offset + length;
-		
-		/*
-		 * Find the range of existing extents that can be combined with the
-		 * input extent.  We start by counting the number of extents that end
-		 * strictly before the input extent, then count the number of extents
-		 * that overlap or are contiguous with the input extent.
-		 */
-		extent = tr->trim.extents;
-		insert_index = 0;
-		while (insert_index < tr->trim.extent_count && extent->offset + extent->length < offset) {
-			++insert_index;
-			++extent;
-		}
-		replace_count = 0;
-		while (insert_index + replace_count < tr->trim.extent_count && extent->offset <= end) {
-			++replace_count;
-			++extent;
+	/*
+	 * If none of the existing extents can be combined with the input extent,
+	 * then just insert it in the list (before item number insert_index).
+	 */
+	if (replace_count == 0) {
+		/* If the list was already full, we need to grow it. */
+		if (tr->trim.extent_count == tr->trim.allocated_count) {
+			if (trim_realloc(&tr->trim) != 0) {
+				printf("jnl: trim_add_extent: out of memory!");
+				if (jnl_kdebug)
+					KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, ENOMEM, 0, 0, tr->trim.extent_count, 0);
+				return ENOMEM;
+			}
 		}
 		
-		/*
-		 * If none of the existing extents can be combined with the input extent,
-		 * then just insert it in the list (before item number insert_index).
-		 */
-		if (replace_count == 0) {
-			/* If the list was already full, we need to grow it. */
-			if (tr->trim.extent_count == tr->trim.allocated_count) {
-				if (journal_trim_realloc(tr) != 0) {
-					printf("jnl: trim_add_extent: out of memory!");
-					return ENOMEM;
-				}
-			}
-			
-			/* Shift any existing extents with larger offsets. */
-			if (insert_index < tr->trim.extent_count) {
-				memmove(&tr->trim.extents[insert_index+1],
-						&tr->trim.extents[insert_index],
-						(tr->trim.extent_count - insert_index) * sizeof(dk_extent_t));
-			}
-			tr->trim.extent_count++;
-			
-			/* Store the new extent in the list. */
-			tr->trim.extents[insert_index].offset = offset;
-			tr->trim.extents[insert_index].length = length;
-			
-			/* We're done. */
-			return 0;
+		/* Shift any existing extents with larger offsets. */
+		if (insert_index < tr->trim.extent_count) {
+			memmove(&tr->trim.extents[insert_index+1],
+					&tr->trim.extents[insert_index],
+					(tr->trim.extent_count - insert_index) * sizeof(dk_extent_t));
 		}
+		tr->trim.extent_count++;
 		
-		/*
-		 * Update extent number insert_index to be the union of the input extent
-		 * and all of the replaced extents.
-		 */
-		if (tr->trim.extents[insert_index].offset < offset)
-			offset = tr->trim.extents[insert_index].offset;
-		extent = &tr->trim.extents[insert_index + replace_count - 1];
-		if (extent->offset + extent->length > end)
-			end = extent->offset + extent->length;
+		/* Store the new extent in the list. */
 		tr->trim.extents[insert_index].offset = offset;
-		tr->trim.extents[insert_index].length = end - offset;
+		tr->trim.extents[insert_index].length = length;
 		
-		/*
-		 * If we were replacing more than one existing extent, then shift any
-		 * extents with larger offsets, and update the count of extents.
-		 *
-		 * We're going to leave extent #insert_index alone since it was just updated, above.
-		 * We need to move extents from index (insert_index + replace_count) through the end of
-		 * the list by (replace_count - 1) positions so that they overwrite extent #(insert_index + 1).
-		 */
-		if (replace_count > 1 && (insert_index + replace_count) < tr->trim.extent_count) {
-			memmove(&tr->trim.extents[insert_index + 1],
-					&tr->trim.extents[insert_index + replace_count],
-					(tr->trim.extent_count - insert_index - replace_count) * sizeof(dk_extent_t));
-		}
-		tr->trim.extent_count -= replace_count - 1;
-    }
+		/* We're done. */
+		if (jnl_kdebug)
+			KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, 0, 0, 0, tr->trim.extent_count, 0);
+		return 0;
+	}
+	
+	/*
+	 * Update extent number insert_index to be the union of the input extent
+	 * and all of the replaced extents.
+	 */
+	if (tr->trim.extents[insert_index].offset < offset)
+		offset = tr->trim.extents[insert_index].offset;
+	extent = &tr->trim.extents[insert_index + replace_count - 1];
+	if (extent->offset + extent->length > end)
+		end = extent->offset + extent->length;
+	tr->trim.extents[insert_index].offset = offset;
+	tr->trim.extents[insert_index].length = end - offset;
+	
+	/*
+	 * If we were replacing more than one existing extent, then shift any
+	 * extents with larger offsets, and update the count of extents.
+	 *
+	 * We're going to leave extent #insert_index alone since it was just updated, above.
+	 * We need to move extents from index (insert_index + replace_count) through the end of
+	 * the list by (replace_count - 1) positions so that they overwrite extent #(insert_index + 1).
+	 */
+	if (replace_count > 1 && (insert_index + replace_count) < tr->trim.extent_count) {
+		memmove(&tr->trim.extents[insert_index + 1],
+				&tr->trim.extents[insert_index + replace_count],
+				(tr->trim.extent_count - insert_index - replace_count) * sizeof(dk_extent_t));
+	}
+	tr->trim.extent_count -= replace_count - 1;
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_ADD | DBG_FUNC_END, 0, 0, 0, tr->trim.extent_count, 0);
     return 0;
 }
 
@@ -2984,153 +3202,217 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length)
 /*
 ;________________________________________________________________________________
 ;
-; Routine:		journal_trim_remove_extent
+; Routine:		trim_remove_extent
 ;
-; Function:		Make note of a range of bytes, some of which may have previously
-;				been passed to journal_trim_add_extent, is now in use on the
-;				volume.  The given bytes will be not be trimmed as part of
-;				this transaction.
+; Function:		Indicate that a range of bytes, some of which may have previously
+;				been passed to journal_trim_add_extent, is now allocated.
+;				Any overlapping ranges currently in the journal's trim list will
+;				be removed.  If the underlying device supports TRIM (UNMAP), then
+;				these extents will not be trimmed/unmapped when the transaction
+;				is written to the journal.
+;
+;				HFS also uses this to prevent newly allocated space from being
+;				added to its free extent cache (if some portion of the newly
+;				allocated space was recently freed).
 ;
 ; Input Arguments:
-;	jnl			- The journal for the volume containing the byte range.
+;	trim		- The trim list to update.
 ;	offset		- The first byte of the range to be trimmed.
 ;	length		- The number of bytes of the extent being trimmed.
 ;________________________________________________________________________________
 */
-__private_extern__ int
-journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
+static int
+trim_remove_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length)
 {
-	if (CONFIG_HFS_TRIM) {
-		u_int64_t end;
-		dk_extent_t *extent;
-		transaction *tr;
-		u_int32_t keep_before;
-		u_int32_t keep_after;
-		
-		CHECK_JOURNAL(jnl);
-	
-		if (jnl->flags & JOURNAL_TRIM_ERR) {
-			/*
-			 * A previous trim failed, so we have disabled trim for this volume
-			 * for as long as it remains mounted.
-			 */
-			return 0;
-		}
-		
-		if (jnl->flags & JOURNAL_INVALID) {
-			return EINVAL;
-		}
-	
-		tr = jnl->active_tr;
-		CHECK_TRANSACTION(tr);
+	u_int64_t end;
+	dk_extent_t *extent;
+	u_int32_t keep_before;
+	u_int32_t keep_after;
 	
-		if (jnl->owner != current_thread()) {
-			panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
-				  jnl, jnl->owner, current_thread());
-		}
+	end = offset + length;
 	
-		free_old_stuff(jnl);
+	/*
+	 * Find any existing extents that start before or end after the input
+	 * extent.  These extents will be modified if they overlap the input
+	 * extent.  Other extents between them will be deleted.
+	 */
+	extent = trim->extents;
+	keep_before = 0;
+	while (keep_before < trim->extent_count && extent->offset < offset) {
+		++keep_before;
+		++extent;
+	}
+	keep_after = keep_before;
+	if (keep_after > 0) {
+		/* See if previous extent extends beyond both ends of input extent. */
+		--keep_after;
+		--extent;
+	}
+	while (keep_after < trim->extent_count && (extent->offset + extent->length) <= end) {
+		++keep_after;
+		++extent;
+	}
 	
-		end = offset + length;
+	/*
+	 * When we get here, the first keep_before extents (0 .. keep_before-1)
+	 * start before the input extent, and extents (keep_after .. extent_count-1)
+	 * end after the input extent.  We'll need to keep, all of those extents,
+	 * but possibly modify #(keep_before-1) and #keep_after to remove the portion
+	 * that overlaps with the input extent.
+	 */
 	
-		/*
-		 * Find any existing extents that start before or end after the input
-		 * extent.  These extents will be modified if they overlap the input
-		 * extent.  Other extents between them will be deleted.
-		 */
-		extent = tr->trim.extents;
-		keep_before = 0;
-		while (keep_before < tr->trim.extent_count && extent->offset < offset) {
-			++keep_before;
-			++extent;
-		}
-		keep_after = keep_before;
-		if (keep_after > 0) {
-			/* See if previous extent extends beyond both ends of input extent. */
-			--keep_after;
-			--extent;
-		}
-		while (keep_after < tr->trim.extent_count && (extent->offset + extent->length) <= end) {
-			++keep_after;
-			++extent;
+	/*
+	 * Does the input extent start after and end before the same existing
+	 * extent?  If so, we have to "punch a hole" in that extent and convert
+	 * it to two separate extents.
+	 */
+	if (keep_before >  keep_after) {
+		/* If the list was already full, we need to grow it. */
+		if (trim->extent_count == trim->allocated_count) {
+			if (trim_realloc(trim) != 0) {
+				printf("jnl: trim_remove_extent: out of memory!");
+				return ENOMEM;
+			}
 		}
 		
 		/*
-		 * When we get here, the first keep_before extents (0 .. keep_before-1)
-		 * start before the input extent, and extents (keep_after .. extent_count-1)
-		 * end after the input extent.  We'll need to keep, all of those extents,
-		 * but possibly modify #(keep_before-1) and #keep_after to remove the portion
-		 * that overlaps with the input extent.
+		 * Make room for a new extent by shifting extents #keep_after and later
+		 * down by one extent.  When we're done, extents #keep_before and
+		 * #keep_after will be identical, and we can fall through to removing
+		 * the portion that overlaps the input extent.
 		 */
+		memmove(&trim->extents[keep_before],
+				&trim->extents[keep_after],
+				(trim->extent_count - keep_after) * sizeof(dk_extent_t));
+		++trim->extent_count;
+		++keep_after;
 		
 		/*
-		 * Does the input extent start after and end before the same existing
-		 * extent?  If so, we have to "punch a hole" in that extent and convert
-		 * it to two separate extents.
+		 * Fall through.  We now have the case where the length of extent
+		 * #(keep_before - 1) needs to be updated, and the start of extent
+		 * #(keep_after) needs to be updated.
 		 */
-		if (keep_before >  keep_after) {
-			/* If the list was already full, we need to grow it. */
-			if (tr->trim.extent_count == tr->trim.allocated_count) {
-				if (journal_trim_realloc(tr) != 0) {
-					printf("jnl: trim_remove_extent: out of memory!");
-					return ENOMEM;
-				}
-			}
-			
-			/*
-			 * Make room for a new extent by shifting extents #keep_after and later
-			 * down by one extent.  When we're done, extents #keep_before and
-			 * #keep_after will be identical, and we can fall through to removing
-			 * the portion that overlaps the input extent.
-			 */
-			memmove(&tr->trim.extents[keep_before],
-					&tr->trim.extents[keep_after],
-					(tr->trim.extent_count - keep_after) * sizeof(dk_extent_t));
-			++tr->trim.extent_count;
-			++keep_after;
-			
-			/*
-			 * Fall through.  We now have the case where the length of extent
-			 * #(keep_before - 1) needs to be updated, and the start of extent
-			 * #(keep_after) needs to be updated.
-			 */
+	}
+	
+	/*
+	 * May need to truncate the end of extent #(keep_before - 1) if it overlaps
+	 * the input extent.
+	 */
+	if (keep_before > 0) {
+		extent = &trim->extents[keep_before - 1];
+		if (extent->offset + extent->length > offset) {
+			extent->length = offset - extent->offset;
 		}
-		
-		/*
-		 * May need to truncate the end of extent #(keep_before - 1) if it overlaps
-		 * the input extent.
-		 */
-		if (keep_before > 0) {
-			extent = &tr->trim.extents[keep_before - 1];
-			if (extent->offset + extent->length > offset) {
-				extent->length = offset - extent->offset;
-			}
+	}
+	
+	/*
+	 * May need to update the start of extent #(keep_after) if it overlaps the
+	 * input extent.
+	 */
+	if (keep_after < trim->extent_count) {
+		extent = &trim->extents[keep_after];
+		if (extent->offset < end) {
+			extent->length = extent->offset + extent->length - end;
+			extent->offset = end;
 		}
+	}
+	
+	/*
+	 * If there were whole extents that overlapped the input extent, get rid
+	 * of them by shifting any following extents, and updating the count.
+	 */
+	if (keep_after > keep_before && keep_after < trim->extent_count) {
+		memmove(&trim->extents[keep_before],
+				&trim->extents[keep_after],
+				(trim->extent_count - keep_after) * sizeof(dk_extent_t));
+	}
+	trim->extent_count -= keep_after - keep_before;
+
+	return 0;
+}
+
+
+/*
+;________________________________________________________________________________
+;
+; Routine:		journal_trim_remove_extent
+;
+; Function:		Make note of a range of bytes, some of which may have previously
+;				been passed to journal_trim_add_extent, is now in use on the
+;				volume.  The given bytes will be not be trimmed as part of
+;				this transaction, or a pending trim of a transaction being
+;				asynchronously flushed.
+;
+; Input Arguments:
+;	jnl			- The journal for the volume containing the byte range.
+;	offset		- The first byte of the range to be trimmed.
+;	length		- The number of bytes of the extent being trimmed.
+;________________________________________________________________________________
+*/
+__private_extern__ int
+journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length)
+{
+	int error = 0;
+	transaction *tr;
+	
+	CHECK_JOURNAL(jnl);
+
+	/* TODO: Is it OK to manipulate the trim list even if JOURNAL_INVALID is set?  I think so... */
+	if (jnl->flags & JOURNAL_INVALID) {
+		return EINVAL;
+	}
+
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_START, jnl, offset, length, tr->trim.extent_count, 0);
+
+	if (jnl->owner != current_thread()) {
+		panic("jnl: trim_remove_extent: called w/out a transaction! jnl %p, owner %p, curact %p\n",
+			  jnl, jnl->owner, current_thread());
+	}
+
+	free_old_stuff(jnl);
+	
+	error = trim_remove_extent(&tr->trim, offset, length);
+	if (error == 0) {
+		int found = FALSE;
 		
 		/*
-		 * May need to update the start of extent #(keep_after) if it overlaps the
-		 * input extent.
+		 * See if a pending trim has any extents that overlap with the
+		 * one we were given.
 		 */
-		if (keep_after < tr->trim.extent_count) {
-			extent = &tr->trim.extents[keep_after];
-			if (extent->offset < end) {
-				extent->length = extent->offset + extent->length - end;
-				extent->offset = end;
-			}
-		}
+		lck_rw_lock_shared(&jnl->trim_lock);
+		if (jnl->async_trim != NULL)
+			found = trim_search_extent(jnl->async_trim, offset, length);
+		lck_rw_unlock_shared(&jnl->trim_lock);
 		
-		/*
-		 * If there were whole extents that overlapped the input extent, get rid
-		 * of them by shifting any following extents, and updating the count.
-		 */
-		if (keep_after > keep_before && keep_after < tr->trim.extent_count) {
-			memmove(&tr->trim.extents[keep_before],
-					&tr->trim.extents[keep_after],
-					(tr->trim.extent_count - keep_after) * sizeof(dk_extent_t));
+		if (found) {
+			/*
+			 * There was an overlap, so avoid trimming the extent we
+			 * just allocated.  (Otherwise, it might get trimmed after
+			 * we've written to it, which will cause that data to be
+			 * corrupted.)
+			 */
+			uint32_t async_extent_count = 0;
+			
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_START, jnl, offset, length, 0, 0);
+			lck_rw_lock_exclusive(&jnl->trim_lock);
+			if (jnl->async_trim != NULL) {
+				error = trim_remove_extent(jnl->async_trim, offset, length);
+				async_extent_count = jnl->async_trim->extent_count;
+			}
+			lck_rw_unlock_exclusive(&jnl->trim_lock);
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE_PENDING | DBG_FUNC_END, error, 0, 0, async_extent_count, 0);
 		}
-		tr->trim.extent_count -= keep_after - keep_before;
 	}
-	return 0;
+
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_REMOVE | DBG_FUNC_END, error, 0, 0, tr->trim.extent_count, 0);
+	return error;
 }
 
 
@@ -3139,29 +3421,70 @@ journal_trim_flush(journal *jnl, transaction *tr)
 {
 	int errno = 0;
 	
-	if (CONFIG_HFS_TRIM) {
-		if ((jnl->flags & JOURNAL_TRIM_ERR) == 0 && tr->trim.extent_count > 0) {
-			dk_unmap_t unmap;
-			
-			bzero(&unmap, sizeof(unmap));
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_START, jnl, tr, 0, tr->trim.extent_count, 0);
+
+	if (tr->trim.extent_count > 0) {
+		dk_unmap_t unmap;
+				
+		bzero(&unmap, sizeof(unmap));
+		lck_rw_lock_shared(&jnl->trim_lock);
+		if (CONFIG_HFS_TRIM && (jnl->flags & JOURNAL_USE_UNMAP)) {
 			unmap.extents = tr->trim.extents;
 			unmap.extentsCount = tr->trim.extent_count;
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_START, jnl, tr, 0, tr->trim.extent_count, 0);
 			errno = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel());
+			if (jnl_kdebug)
+				KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_END, errno, 0, 0, 0, 0);
 			if (errno) {
 				printf("jnl: error %d from DKIOCUNMAP (extents=%lx, count=%u); disabling trim for %s\n",
-						errno, (unsigned long) (tr->trim.extents), tr->trim.extent_count,
+						errno, (unsigned long) (unmap.extents), unmap.extentsCount,
 						jnl->jdev_name);
-				jnl->flags |= JOURNAL_TRIM_ERR;
+				jnl->flags &= ~JOURNAL_USE_UNMAP;
 			}
 		}
-		if (tr->trim.extents) {
-			kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
-			tr->trim.allocated_count = 0;
-			tr->trim.extent_count = 0;
-			tr->trim.extents = NULL;
-		}
+
+		/*
+		 * Call back into the file system to tell them that we have
+		 * trimmed some extents and that they can now be reused.
+		 *
+		 * CAUTION: If the journal becomes invalid (eg., due to an I/O
+		 * error when trying to write to the journal), this callback
+		 * will stop getting called, even if extents got freed before
+		 * the journal became invalid!
+		 */
+		if (jnl->trim_callback)
+			jnl->trim_callback(jnl->trim_callback_arg, tr->trim.extent_count, tr->trim.extents);
+
+		lck_rw_unlock_shared(&jnl->trim_lock);
+	}
+
+	/*
+	 * If the transaction we're flushing was the async transaction, then
+	 * tell the current transaction that there is no pending trim
+	 * any more.
+	 *
+	 * NOTE: Since we released the lock, another thread could have
+	 * removed one or more extents from our list.  That's not a
+	 * problem since any writes to the re-allocated blocks
+	 * would get sent to the device after the DKIOCUNMAP.
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim == &tr->trim)
+		jnl->async_trim = NULL;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
+
+	if (tr->trim.extents) {			
+		kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
+		tr->trim.allocated_count = 0;
+		tr->trim.extent_count = 0;
+		tr->trim.extents = NULL;
 	}
 	
+	if (jnl_kdebug)
+		KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_FLUSH | DBG_FUNC_END, errno, 0, 0, 0, 0);
+
 	return errno;
 }
 
@@ -3169,23 +3492,23 @@ journal_trim_flush(journal *jnl, transaction *tr)
 static int
 journal_binfo_cmp(const void *a, const void *b)
 {
-    const block_info *bi_a = (const struct block_info *)a;
-    const block_info *bi_b = (const struct block_info *)b;
-    daddr64_t res;
+	const block_info *bi_a = (const struct block_info *)a;
+	const block_info *bi_b = (const struct block_info *)b;
+	daddr64_t res;
 
-    if (bi_a->bnum == (off_t)-1) {
+	if (bi_a->bnum == (off_t)-1) {
 		return 1;
-    }
-    if (bi_b->bnum == (off_t)-1) {
+	}
+	if (bi_b->bnum == (off_t)-1) {
 		return -1;
-    }
+	}
 
-    // don't have to worry about negative block
-    // numbers so this is ok to do.
-    //
-    res = (buf_blkno(bi_a->u.bp) - buf_blkno(bi_b->u.bp));
+	// don't have to worry about negative block
+	// numbers so this is ok to do.
+	//
+	res = (buf_blkno(bi_a->u.bp) - buf_blkno(bi_b->u.bp));
 
-    return (int)res;
+	return (int)res;
 }
 
 
@@ -3220,27 +3543,27 @@ journal_binfo_cmp(const void *a, const void *b)
  *		-1		An error occurred.  The journal is marked invalid.
  */
 static int
-end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg)
+end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void *callback_arg, boolean_t drop_lock, boolean_t must_wait)
 {
-    int                 i, ret, amt;
-    errno_t		errno;
-    off_t               end;
-    journal            *jnl = tr->jnl;
-    struct buf         *bp, **bparray;
-    block_list_header  *blhdr=NULL, *next=NULL;
-    size_t              tbuffer_offset;
+	block_list_header  *blhdr=NULL, *next=NULL;
+	int		i, ret_val = 0;
+	errno_t		errno;
+	journal		*jnl = tr->jnl;
+	struct buf	*bp;
+	size_t		tbuffer_offset;
+	boolean_t	drop_lock_early;
 
 	if (jnl->cur_tr) {
 		panic("jnl: jnl @ %p already has cur_tr %p, new tr: %p\n",
 			  jnl, jnl->cur_tr, tr);
 	}
 
-    // if there weren't any modified blocks in the transaction
-    // just save off the transaction pointer and return.
-    if (tr->total_bytes == jnl->jhdr->blhdr_size) {
+	// if there weren't any modified blocks in the transaction
+	// just save off the transaction pointer and return.
+	if (tr->total_bytes == jnl->jhdr->blhdr_size) {
 		jnl->cur_tr = tr;
-		return 0;
-    }
+		goto done;
+	}
 
     // if our transaction buffer isn't very full, just hang
     // on to it and don't actually flush anything.  this is
@@ -3248,174 +3571,314 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
     // transaction buffer if it's full or if we have more than
     // one of them so we don't start hogging too much memory.
     //
-    // We also check the number of extents waiting to be trimmed.
-    // If it is small enough, then keep accumulating more (so we
-    // can reduce the overhead of trimming).  If there was a
-    // prior trim error, then we stop issuing trims for this
+    // We also check the device supports UNMAP/TRIM, and if so,
+    // the number of extents waiting to be trimmed.  If it is
+    // small enough, then keep accumulating more (so we can
+    // reduce the overhead of trimming).  If there was a prior
+    // trim error, then we stop issuing trims for this
     // volume, so we can also coalesce transactions.
-    //
+	//
     if (   force_it == 0
 		   && (jnl->flags & JOURNAL_NO_GROUP_COMMIT) == 0 
 		   && tr->num_blhdrs < 3
 		   && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))
-		   && ((jnl->flags & JOURNAL_TRIM_ERR) || (tr->trim.extent_count < jnl_trim_flush_limit))) {
+		   && (!(jnl->flags & JOURNAL_USE_UNMAP) || (tr->trim.extent_count < jnl_trim_flush_limit))) {
 
 		jnl->cur_tr = tr;
-		return 0;
-    }
+		goto done;
+	}
 
+	KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_START, jnl, tr, drop_lock, must_wait, 0);
 
-    // if we're here we're going to flush the transaction buffer to disk.
-    // make sure there is room in the journal first.
-    check_free_space(jnl, tr->total_bytes);
+	lock_condition(jnl, &jnl->flushing, "end_transaction");
 
-    // range check the end index
-    if (jnl->jhdr->end <= 0 || jnl->jhdr->end > jnl->jhdr->size) {
-		panic("jnl: end_transaction: end is bogus 0x%llx (sz 0x%llx)\n",
-			  jnl->jhdr->end, jnl->jhdr->size);
-    }
+	/*
+	 * if the previous 'finish_end_transaction' was being run
+	 * asynchronously, it could have encountered a condition
+	 * that caused it to mark the journal invalid... if that
+	 * occurred while we were waiting for it to finish, we
+	 * need to notice and abort the current transaction
+	 */
+	if ((jnl->flags & JOURNAL_INVALID) || jnl->flush_aborted == TRUE) {
+		unlock_condition(jnl, &jnl->flushing);
 
-    // this transaction starts where the current journal ends
-    tr->journal_start = jnl->jhdr->end;
-    end               = jnl->jhdr->end;
+		abort_transaction(jnl, tr);
+		ret_val = -1;
+		KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0);
+		goto done;
+	}
 
-	//
-	// if the first entry in old_start[] isn't free yet, loop calling the
-	// file system flush routine until it is (or we panic).
-	//
-	i = 0;
-	lock_oldstart(jnl);
-	while ((jnl->old_start[0] & 0x8000000000000000LL) != 0) {
-		if (jnl->flush) {
-			unlock_oldstart(jnl);
+	/*
+	 * Store a pointer to this transaction's trim list so that
+	 * future transactions can find it.
+	 *
+	 * Note: if there are no extents in the trim list, then don't
+	 * bother saving the pointer since nothing can add new extents
+	 * to the list (and other threads/transactions only care if
+	 * there is a trim pending).
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim != NULL)
+		panic("jnl: end_transaction: async_trim already non-NULL!");
+	if (tr->trim.extent_count > 0)
+		jnl->async_trim = &tr->trim;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
 
-			if (jnl->flush) {
-				jnl->flush(jnl->flush_arg);
-			}
+	/*
+	 * snapshot the transaction sequence number while we are still behind
+	 * the journal lock since it will be bumped upon the start of the
+	 * next transaction group which may overlap the current journal flush...
+	 * we pass the snapshot into write_journal_header during the journal
+	 * flush so that it can write the correct version in the header...
+	 * because we hold the 'flushing' condition variable for the duration
+	 * of the journal flush, 'saved_sequence_num' remains stable
+	 */
+	jnl->saved_sequence_num = jnl->sequence_num;
 
-			// yield the cpu so others can get in to clear the lock bit
-			(void)tsleep((void *)jnl, PRIBIO, "jnl-old-start-sleep", 1);
+	/*
+	 * if we're here we're going to flush the transaction buffer to disk.
+	 * 'check_free_space' will not return untl there is enough free
+	 * space for this transaction in the journal and jnl->old_start[0]
+	 * is avaiable for use
+	 */
+	KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_START, jnl, 0, 0, 0, 0);
 
-			lock_oldstart(jnl);
-		}
-		if (i++ >= 500) {
-			panic("jnl: transaction that started at 0x%llx is not completing! jnl %p\n",
-				  jnl->old_start[0] & (~0x8000000000000000LL), jnl);
-		}
+	check_free_space(jnl, tr->total_bytes, &tr->delayed_header_write, jnl->saved_sequence_num);
+
+	KERNEL_DEBUG(0xbbbbc030 | DBG_FUNC_END, jnl, tr->delayed_header_write, 0, 0, 0);
+
+	// range check the end index
+	if (jnl->jhdr->end <= 0 || jnl->jhdr->end > jnl->jhdr->size) {
+		panic("jnl: end_transaction: end is bogus 0x%llx (sz 0x%llx)\n",
+			  jnl->jhdr->end, jnl->jhdr->size);
 	}
+	if (tr->delayed_header_write == TRUE) {
+		thread_t	thread = THREAD_NULL;
 
-	//
-	// slide everyone else down and put our latest guy in the last
-	// entry in the old_start array
-	//
-	
-	/* Because old_start is locked above, we can cast away the volatile qualifier before passing it to memcpy. */
+		lock_condition(jnl, &jnl->writing_header, "end_transaction");
+		/*
+		 * fire up a thread to write the journal header
+		 * asynchronously... when it finishes, it will call
+		 * unlock_condition... we can overlap the preparation of
+		 * the log and buffers during this time
+		 */
+		kernel_thread_start((thread_continue_t)write_header_thread, jnl, &thread);
+	} else
+		jnl->write_header_failed = FALSE;
+
+
+	// this transaction starts where the current journal ends
+	tr->journal_start = jnl->jhdr->end;
+
+	lock_oldstart(jnl);
+	/*
+	 * Because old_start is locked above, we can cast away the volatile qualifier before passing it to memcpy.
+	 * slide everyone else down and put our latest guy in the last
+	 * entry in the old_start array
+	 */
 	memcpy(__CAST_AWAY_QUALIFIER(&jnl->old_start[0], volatile, void *), __CAST_AWAY_QUALIFIER(&jnl->old_start[1], volatile, void *), sizeof(jnl->old_start)-sizeof(jnl->old_start[0]));
 	jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] = tr->journal_start | 0x8000000000000000LL;
 
 	unlock_oldstart(jnl);
 
 
-    // for each block, make sure that the physical block # is set
-    for(blhdr=tr->blhdr; blhdr; blhdr=next) {
-		char *blkptr;
-		
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		char	*blkptr;
+		buf_t	sbp;
+		int32_t	bsize;
+
 		tbuffer_offset = jnl->jhdr->blhdr_size;
-		for(i=1; i < blhdr->num_blocks; i++) {
-			daddr64_t blkno;
-			daddr64_t lblkno;
-			struct vnode *vp;
 
-			bp = blhdr->binfo[i].u.bp;
+		for (i = 1; i < blhdr->num_blocks; i++) {
 
-			// if this block has a callback function set, call
-			// it now and then copy the data from the bp into
-			// the journal. 
 			if (blhdr->binfo[i].bnum != (off_t)-1) {
-				void (*func)(struct buf *, void *);
+				void (*func)(buf_t, void *);
 				void  *arg;
 
+				bp = blhdr->binfo[i].u.bp;
+
 				if (bp == NULL) {
 					panic("jnl: inconsistent binfo (NULL bp w/bnum %lld; jnl @ %p, tr %p)\n",
 						blhdr->binfo[i].bnum, jnl, tr);
 				}
-
-				buf_setfilter(bp, NULL, NULL, (void **)&func, &arg);
-
-				if (func) {
-					// acquire the bp here so that we can safely
-					// mess around with its data.  buf_acquire()
-					// will return EAGAIN if the buffer was busy,
-					// so loop trying again.
-					do {
-						errno = buf_acquire(bp, 0, 0, 0);
-					} while (errno == EAGAIN);
-					
-					if (errno == 0) {
+				/*
+				 * acquire the bp here so that we can safely
+				 * mess around with its data.  buf_acquire()
+				 * will return EAGAIN if the buffer was busy,
+				 * so loop trying again.
+				 */
+				do {
+					errno = buf_acquire(bp, BAC_REMOVE, 0, 0);
+				} while (errno == EAGAIN);
 					
-						// call the hook function and then copy the
-						// data into the transaction buffer...
-						func(bp, arg);
+				if (errno)
+					panic("could not acquire bp %p (err %d)\n", bp, errno);
 
-						blkptr = (char *)&((char *)blhdr)[tbuffer_offset];
-						memcpy(blkptr, (char *)buf_dataptr(bp), buf_size(bp));
-
-						buf_drop(bp);
+				if ((buf_flags(bp) & (B_LOCKED|B_DELWRI)) != (B_LOCKED|B_DELWRI)) {
+					if (jnl->flags & JOURNAL_CLOSE_PENDING) {
+						buf_clearflags(bp, B_LOCKED);
+						buf_brelse(bp);
+						
+						/*
+						 * this is an odd case that appears to happen occasionally
+						 * make sure we mark this block as no longer valid
+						 * so that we don't process it in "finish_end_transaction" since
+						 * the bp that is recorded in our array no longer belongs
+						 * to us (normally we substitute a shadow bp to be processed
+						 * issuing a 'buf_bawrite' on a stale buf_t pointer leads
+						 * to all kinds of problems.
+						 */
+						blhdr->binfo[i].bnum = (off_t)-1;
+						continue;
 					} else {
-						panic("could not acquire bp %p (err %d)\n", bp, errno);
+						panic("jnl: end_tr: !!!DANGER!!! bp %p flags (0x%x) not LOCKED & DELWRI\n", bp, buf_flags(bp));
 					}
 				}
+				bsize = buf_size(bp);
 
-			} else {   // bnum == -1, only true if a block was "killed" 
+				buf_setfilter(bp, NULL, NULL, &func, &arg);
+				
+				blkptr = (char *)&((char *)blhdr)[tbuffer_offset];
 
-				tbuffer_offset += blhdr->binfo[i].u.bi.bsize;
-				continue;
-			}
+				sbp = buf_create_shadow_priv(bp, FALSE, (uintptr_t)blkptr, 0, 0);
 
-			tbuffer_offset += buf_size(bp);
+				if (sbp == NULL)
+					panic("jnl: buf_create_shadow returned NULL");
 
-			vp = buf_vnode(bp);
-			blkno = buf_blkno(bp);
-			lblkno = buf_lblkno(bp);
+				/*
+				 * copy the data into the transaction buffer...
+				 */
+				memcpy(blkptr, (char *)buf_dataptr(bp), bsize);
 
-			if (vp == NULL && lblkno == blkno) {
-			    printf("jnl: %s: end_tr: bad news! bp @ %p w/null vp and l/blkno = %qd/%qd.  aborting the transaction (tr %p jnl %p).\n",
-				jnl->jdev_name, bp, lblkno, blkno, tr, jnl);
-			    goto bad_journal;
-			}
-	    
-			// if the lblkno is the same as blkno and this bp isn't
-			// associated with the underlying file system device then
-			// we need to call bmap() to get the actual physical block.
-			//
-			if ((lblkno == blkno) && (vp != jnl->fsdev)) {
-			        off_t	f_offset;
-				size_t 	contig_bytes;
+				buf_clearflags(bp, B_LOCKED);
+				buf_markclean(bp);
+				buf_drop(bp);
 
-				if (VNOP_BLKTOOFF(vp, lblkno, &f_offset)) {
-					printf("jnl: %s: end_tr: vnop_blktooff failed @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
-					goto bad_journal;
-				}
-				if (VNOP_BLOCKMAP(vp, f_offset, buf_count(bp), &blkno, &contig_bytes, NULL, 0, NULL)) {
-					printf("jnl: %s: end_tr: can't blockmap the bp @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
-					goto bad_journal;
-				}
-				if ((uint32_t)contig_bytes < buf_count(bp)) {
-					printf("jnl: %s: end_tr: blk not physically contiguous on disk@ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
-					goto bad_journal;
+				/*
+				 * adopt the shadow buffer for this block
+				 */
+				if (func) {
+					/*
+					 * transfer FS hook function to the
+					 * shadow buffer... it will get called
+					 * in finish_end_transaction
+					 */
+					buf_setfilter(sbp, func, arg, NULL, NULL);
 				}
-				buf_setblkno(bp, blkno);
+				blhdr->binfo[i].u.bp = sbp;
+
+			} else {
+				// bnum == -1, only true if a block was "killed" 
+				bsize = blhdr->binfo[i].u.bi.bsize;
 			}
-			// update this so we write out the correct physical block number!
-			blhdr->binfo[i].bnum = (off_t)(blkno);
+			tbuffer_offset += bsize;
 		}
-
 		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
-    }
-    
+	}
+	/*
+	 * if callback != NULL, we don't want to drop the journal
+	 * lock, or complete end_transaction asynchronously, since
+	 * the caller is expecting the callback to run in the calling
+	 * context
+	 *
+	 * if drop_lock == FALSE, we can't complete end_transaction
+	 * asynchronously
+	 */
+	if (callback)
+		drop_lock_early = FALSE;
+	else
+		drop_lock_early = drop_lock;
+
+	if (drop_lock_early == FALSE)
+		must_wait = TRUE;
+
+	if (drop_lock_early == TRUE) {
+		jnl->owner = NULL;
+		unlock_journal(jnl);
+		drop_lock = FALSE;
+	}
+	if (must_wait == TRUE)
+		ret_val = finish_end_transaction(tr, callback, callback_arg);
+	else {
+		thread_t	thread = THREAD_NULL;
+
+		/*
+		 * fire up a thread to complete processing this transaction
+		 * asynchronously... when it finishes, it will call
+		 * unlock_condition
+		 */
+		kernel_thread_start((thread_continue_t)finish_end_thread, tr, &thread);
+	}
+	KERNEL_DEBUG(0xbbbbc018|DBG_FUNC_END, jnl, tr, ret_val, 0, 0);
+done:
+	if (drop_lock == TRUE) {
+		jnl->owner = NULL;
+		unlock_journal(jnl);
+	}
+	return (ret_val);
+}
+
+
+static void
+finish_end_thread(transaction *tr)
+{
+#if !CONFIG_EMBEDDED
+	proc_apply_thread_selfdiskacc(IOPOL_PASSIVE);
+#else /* !CONFIG_EMBEDDED */
+	struct uthread	*ut;
+
+	ut = get_bsdthread_info(current_thread());
+	ut->uu_iopol_disk = IOPOL_PASSIVE;
+#endif /* !CONFIG_EMBEDDED */
+
+	finish_end_transaction(tr, NULL, NULL);
+
+	thread_deallocate(current_thread());
+	thread_terminate(current_thread());
+}
 
+static void
+write_header_thread(journal *jnl)
+{
+#if !CONFIG_EMBEDDED
+	proc_apply_thread_selfdiskacc(IOPOL_PASSIVE);
+#else /* !CONFIG_EMBEDDED */
+	struct uthread	*ut;
+
+	ut = get_bsdthread_info(current_thread());
+	ut->uu_iopol_disk = IOPOL_PASSIVE;
+#endif /* !CONFIG_EMBEDDED */
+
+	if (write_journal_header(jnl, 1, jnl->saved_sequence_num))
+		jnl->write_header_failed = TRUE;
+	else
+		jnl->write_header_failed = FALSE;
+	unlock_condition(jnl, &jnl->writing_header);
+
+	thread_deallocate(current_thread());
+	thread_terminate(current_thread());
+}
+
+static int
+finish_end_transaction(transaction *tr, errno_t (*callback)(void*), void *callback_arg)
+{
+	int		i, amt;
+	int		ret = 0;
+	off_t		end;
+	journal		*jnl = tr->jnl;
+	buf_t		bp, *bparray;
+	vnode_t		vp;
+	block_list_header  *blhdr=NULL, *next=NULL;
+	size_t		tbuffer_offset;
+	int		bufs_written = 0;
+	int		ret_val = 0;
+
+	KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_START, jnl, tr, 0, 0, 0);
+
+	end  = jnl->jhdr->end;
+
+	for (blhdr = tr->blhdr; blhdr; blhdr = (block_list_header *)((long)blhdr->binfo[0].bnum)) {
 
-    for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) {
 		amt = blhdr->bytes_used;
 
 		blhdr->binfo[0].u.bi.b.sequence_num = tr->sequence_num;
@@ -3424,64 +3887,139 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
 		blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE);
 
 		if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) {
-		    panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
+			panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *));
 		}
-
-		// calculate individual block checksums
 		tbuffer_offset = jnl->jhdr->blhdr_size;
-		for(i=1; i < blhdr->num_blocks; i++) {
-		    int32_t bsize;
+
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			void (*func)(buf_t, void *);
+			void	*arg;
+			int32_t	bsize;
 		    
-		    if (blhdr->binfo[i].bnum != (off_t)-1) {
-			bparray[i] = blhdr->binfo[i].u.bp;
-			bsize = buf_size(bparray[i]);
-			blhdr->binfo[i].u.bi.bsize = bsize;
-			blhdr->binfo[i].u.bi.b.cksum = calc_checksum(&((char *)blhdr)[tbuffer_offset], bsize);
-		    } else {
-			bparray[i] = NULL;
-			bsize = blhdr->binfo[i].u.bi.bsize;
-			blhdr->binfo[i].u.bi.b.cksum = 0;
-		    }
+			/*
+			 * finish preparing the shadow buf_t before 
+			 * calculating the individual block checksums
+			 */
+			if (blhdr->binfo[i].bnum != (off_t)-1) {
+				daddr64_t blkno;
+				daddr64_t lblkno;
 
-		    tbuffer_offset += bsize;
-		}
+				bp = blhdr->binfo[i].u.bp;
+				
+				vp = buf_vnode(bp);
+				blkno = buf_blkno(bp);
+				lblkno = buf_lblkno(bp);
 
-		ret = write_journal_data(jnl, &end, blhdr, amt);
+				if (vp == NULL && lblkno == blkno) {
+					printf("jnl: %s: end_tr: bad news! bp @ %p w/null vp and l/blkno = %qd/%qd.  aborting the transaction (tr %p jnl %p).\n",
+					       jnl->jdev_name, bp, lblkno, blkno, tr, jnl);
+					ret_val = -1;
+					goto bad_journal;
+				}
+	    
+				// if the lblkno is the same as blkno and this bp isn't
+				// associated with the underlying file system device then
+				// we need to call bmap() to get the actual physical block.
+				//
+				if ((lblkno == blkno) && (vp != jnl->fsdev)) {
+					off_t	f_offset;
+					size_t 	contig_bytes;
+
+					if (VNOP_BLKTOOFF(vp, lblkno, &f_offset)) {
+						printf("jnl: %s: end_tr: vnop_blktooff failed @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+						ret_val = -1;
+						goto bad_journal;
+					}
+					if (VNOP_BLOCKMAP(vp, f_offset, buf_count(bp), &blkno, &contig_bytes, NULL, 0, NULL)) {
+						printf("jnl: %s: end_tr: can't blockmap the bp @ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+						ret_val = -1;
+						goto bad_journal;
+					}
+					if ((uint32_t)contig_bytes < buf_count(bp)) {
+						printf("jnl: %s: end_tr: blk not physically contiguous on disk@ %p, jnl %p\n", jnl->jdev_name, bp, jnl);
+						ret_val = -1;
+						goto bad_journal;
+					}
+					buf_setblkno(bp, blkno);
+				}
+				// update this so we write out the correct physical block number!
+				blhdr->binfo[i].bnum = (off_t)(blkno);
 
-		// always put the bp pointers back
-		for(i=1; i < blhdr->num_blocks; i++) {
-		    if (blhdr->binfo[i].bnum != (off_t)-1) {
-			blhdr->binfo[i].u.bp = bparray[i];
-		    }
+				/*
+				 * pick up the FS hook function (if any) and prepare
+				 * to fire this buffer off in the next pass
+				 */
+				buf_setfilter(bp, buffer_flushed_callback, tr, &func, &arg);
+
+				if (func) {
+					/*
+					 * call the hook function supplied by the filesystem...
+					 * this needs to happen BEFORE cacl_checksum in case
+					 * the FS morphs the data in the buffer
+					 */
+					func(bp, arg);
+				}
+				bparray[i] = bp;
+				bsize = buf_size(bp);
+				blhdr->binfo[i].u.bi.bsize = bsize;
+				blhdr->binfo[i].u.bi.b.cksum = calc_checksum(&((char *)blhdr)[tbuffer_offset], bsize);
+			} else {
+				bparray[i] = NULL;
+				bsize = blhdr->binfo[i].u.bi.bsize;
+				blhdr->binfo[i].u.bi.b.cksum = 0;
+			}
+			tbuffer_offset += bsize;
 		}
+		/*
+		 * if we fired off the journal_write_header asynchronously in
+		 * 'end_transaction', we need to wait for its completion
+		 * before writing the actual journal data
+		 */
+		wait_condition(jnl, &jnl->writing_header, "finish_end_transaction");
+
+		if (jnl->write_header_failed == FALSE)
+			ret = write_journal_data(jnl, &end, blhdr, amt);
+		else 
+			ret_val = -1;
+		/*
+		 * put the bp pointers back so that we can 
+		 * make the final pass on them
+		 */
+		for (i = 1; i < blhdr->num_blocks; i++)
+			blhdr->binfo[i].u.bp = bparray[i];
 
 		kmem_free(kernel_map, (vm_offset_t)bparray, blhdr->num_blocks * sizeof(struct buf *));
 
+		if (ret_val == -1)
+			goto bad_journal;
+
 		if (ret != amt) {
 			printf("jnl: %s: end_transaction: only wrote %d of %d bytes to the journal!\n",
-			    jnl->jdev_name, ret, amt);
+			       jnl->jdev_name, ret, amt);
 
+			ret_val = -1;
 			goto bad_journal;
 		}
-    }
+	}
+	jnl->jhdr->end  = end;    // update where the journal now ends
+	tr->journal_end = end;    // the transaction ends here too
 
-    jnl->jhdr->end  = end;    // update where the journal now ends
-    tr->journal_end = end;    // the transaction ends here too
-    if (tr->journal_start == 0 || tr->journal_end == 0) {
+	if (tr->journal_start == 0 || tr->journal_end == 0) {
 		panic("jnl: end_transaction: bad tr journal start/end: 0x%llx 0x%llx\n",
-			  tr->journal_start, tr->journal_end);
-    }
+		      tr->journal_start, tr->journal_end);
+	}
 
-    if (write_journal_header(jnl, 0) != 0) {
+	if (write_journal_header(jnl, 0, jnl->saved_sequence_num) != 0) {
+		ret_val = -1;
 		goto bad_journal;
-    }
-
+	}
 	/*
 	 * If the caller supplied a callback, call it now that the blocks have been
 	 * written to the journal.  This is used by journal_relocate so, for example,
 	 * the file system can change its pointer to the new journal.
 	 */
 	if (callback != NULL && callback(callback_arg) != 0) {
+		ret_val = -1;
 		goto bad_journal;
 	}
 	
@@ -3489,284 +4027,429 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void
 	// Send a DKIOCUNMAP for the extents trimmed by this transaction, and
 	// free up the extent list.
 	//
-	errno = journal_trim_flush(jnl, tr);
+	journal_trim_flush(jnl, tr);
 	
-    //
-    // setup for looping through all the blhdr's.  we null out the
-    // tbuffer and blhdr fields so that they're not used any more.
-    //
-    blhdr       = tr->blhdr;
-    tr->tbuffer = NULL;
-    tr->blhdr   = NULL;
-
-    // the buffer_flushed_callback will only be called for the 
-    // real blocks that get flushed so we have to account for 
-    // the block_list_headers here.
-    //
-    tr->num_flushed = tr->num_blhdrs * jnl->jhdr->blhdr_size;
-
-    // for each block, set the iodone callback and unlock it
-    for(; blhdr; blhdr=next) {
-
-		// we can re-order the buf ptrs because everything is written out already
-		qsort(&blhdr->binfo[1], blhdr->num_blocks-1, sizeof(block_info), journal_binfo_cmp);
-
-		for(i=1; i < blhdr->num_blocks; i++) {
-			if (blhdr->binfo[i].bnum == (off_t)-1) {
-				continue;
-			}
+	// the buffer_flushed_callback will only be called for the 
+	// real blocks that get flushed so we have to account for 
+	// the block_list_headers here.
+	//
+	tr->num_flushed = tr->num_blhdrs * jnl->jhdr->blhdr_size;
 
-			bp = blhdr->binfo[i].u.bp;
+	lock_condition(jnl, &jnl->asyncIO, "finish_end_transaction");
 
-			// have to pass BAC_REMOVE here because we're going to bawrite()
-			// the buffer when we're done
-			do {
-				errno = buf_acquire(bp, BAC_REMOVE, 0, 0);
-			} while (errno == EAGAIN);
-			
-			if (errno == 0) {
-				struct vnode *save_vp;
-				void *cur_filter;
+	//
+	// setup for looping through all the blhdr's.
+	//
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		uint16_t	num_blocks;
 
-				if ((buf_flags(bp) & (B_LOCKED|B_DELWRI)) != (B_LOCKED|B_DELWRI)) {
-					if (jnl->flags & JOURNAL_CLOSE_PENDING) {
-					    buf_clearflags(bp, B_LOCKED);
-					    buf_brelse(bp);
-						continue;
-					} else {
-						panic("jnl: end_tr: !!!DANGER!!! bp %p flags (0x%x) not LOCKED & DELWRI\n", bp, buf_flags(bp));
-					}
-				}
-				save_vp = buf_vnode(bp);
+		/*
+		 * grab this info ahead of issuing the buf_bawrites...
+		 * once the last one goes out, its possible for blhdr
+		 * to be freed (especially if we get preempted) before
+		 * we do the last check of num_blocks or
+		 * grab the next blhdr pointer...
+		 */
+		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
+		num_blocks = blhdr->num_blocks;
 
-				buf_setfilter(bp, buffer_flushed_callback, tr, &cur_filter, NULL);
+		/*
+		 * we can re-order the buf ptrs because everything is written out already
+		 */
+		qsort(&blhdr->binfo[1], num_blocks-1, sizeof(block_info), journal_binfo_cmp);
 
-				if (cur_filter) {
-					panic("jnl: bp @ %p (blkno %qd, vp %p) has non-null iodone (%p) buffflushcb %p\n",
-						  bp, buf_blkno(bp), save_vp, cur_filter, buffer_flushed_callback);
-				}
-				buf_clearflags(bp, B_LOCKED);
+		/*
+		 * need to make sure that the loop issuing the buf_bawrite's
+		 * does not touch blhdr once the last buf_bawrite has been
+		 * issued... at that point, we no longer have a legitmate
+		 * reference on the associated storage since it will be
+		 * released upon the completion of that last buf_bawrite
+		 */
+		for (i = num_blocks-1; i >= 1; i--) {
+			if (blhdr->binfo[i].bnum != (off_t)-1)
+				break;
+			num_blocks--;
+		}
+		for (i = 1; i < num_blocks; i++) {
 
-				// kicking off the write here helps performance
+			if ((bp = blhdr->binfo[i].u.bp)) {
+				vp = buf_vnode(bp);
+		    
 				buf_bawrite(bp);
-				// XXXdbg this is good for testing: buf_bdwrite(bp);
-				//buf_bdwrite(bp);
 				
 				// this undoes the vnode_ref() in journal_modify_block_end()
-				vnode_rele_ext(save_vp, 0, 1);
-			} else {
-				printf("jnl: %s: end_transaction: could not acquire block %p (errno %d)!\n",
-				    jnl->jdev_name,bp, errno);
+				vnode_rele_ext(vp, 0, 1);
+
+				bufs_written++;
 			}
 		}
+	}
+	if (bufs_written == 0) {
+		/*
+		 * since we didn't issue any buf_bawrite's, there is no
+		 * async trigger to cause the memory associated with this
+		 * transaction to be freed... so, move it to the garbage
+		 * list now
+		 */
+		lock_oldstart(jnl);
 
-		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
+		tr->next       = jnl->tr_freeme;
+		jnl->tr_freeme = tr;
 
-		// we can free blhdr here since we won't need it any more
-		blhdr->binfo[0].bnum = 0xdeadc0de;
-		kmem_free(kernel_map, (vm_offset_t)blhdr, tr->tbuffer_size);
-    }
+		unlock_oldstart(jnl);
 
-    //printf("jnl: end_tr: tr @ 0x%x, jnl-blocks: 0x%llx - 0x%llx. exit!\n",
-    //   tr, tr->journal_start, tr->journal_end);
-    return 0;
+		unlock_condition(jnl, &jnl->asyncIO);
+	}
 
+	//printf("jnl: end_tr: tr @ 0x%x, jnl-blocks: 0x%llx - 0x%llx. exit!\n",
+	//   tr, tr->journal_start, tr->journal_end);
 
-  bad_journal:
-    jnl->flags |= JOURNAL_INVALID;
-    jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL;
-    abort_transaction(jnl, tr);		// cleans up list of extents to be trimmed
-    return -1;
+bad_journal:
+	if (ret_val == -1) {
+		/*
+		 * 'flush_aborted' is protected by the flushing condition... we need to
+		 * set it before dropping the condition so that it will be
+		 * noticed in 'end_transaction'... we add this additional
+		 * aborted condition so that we can drop the 'flushing' condition
+		 * before grabbing the journal lock... this avoids a deadlock
+		 * in 'end_transaction' which is holding the journal lock while
+		 * waiting for the 'flushing' condition to clear...
+		 * everyone else will notice the JOURNAL_INVALID flag
+		 */
+		jnl->flush_aborted = TRUE;
+
+		unlock_condition(jnl, &jnl->flushing);
+		lock_journal(jnl);
+
+		jnl->flags |= JOURNAL_INVALID;
+		jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL;
+		abort_transaction(jnl, tr);		// cleans up list of extents to be trimmed
+
+		unlock_journal(jnl);
+	} else
+		unlock_condition(jnl, &jnl->flushing);
+
+	KERNEL_DEBUG(0xbbbbc028|DBG_FUNC_END, jnl, tr, bufs_written, ret_val, 0);
+
+	return (ret_val);
+}
+
+
+static void
+lock_condition(journal *jnl, boolean_t *condition, const char *condition_name)
+{
+
+	KERNEL_DEBUG(0xbbbbc020|DBG_FUNC_START, jnl, condition, 0, 0, 0);
+
+	lock_flush(jnl);
+
+	while (*condition == TRUE)
+		msleep(condition, &jnl->flock, PRIBIO, condition_name, NULL);
+
+	*condition = TRUE;
+	unlock_flush(jnl);
+
+	KERNEL_DEBUG(0xbbbbc020|DBG_FUNC_END, jnl, condition, 0, 0, 0);
+}
+
+static void
+wait_condition(journal *jnl, boolean_t *condition, const char *condition_name)
+{
+
+	if (*condition == FALSE)
+		return;
+
+	KERNEL_DEBUG(0xbbbbc02c|DBG_FUNC_START, jnl, condition, 0, 0, 0);
+
+	lock_flush(jnl);
+
+	while (*condition == TRUE)
+		msleep(condition, &jnl->flock, PRIBIO, condition_name, NULL);
+
+	unlock_flush(jnl);
+
+	KERNEL_DEBUG(0xbbbbc02c|DBG_FUNC_END, jnl, condition, 0, 0, 0);
+}
+
+static void
+unlock_condition(journal *jnl, boolean_t *condition)
+{
+	lock_flush(jnl);
+
+	*condition = FALSE;
+	wakeup(condition);
+
+	unlock_flush(jnl);
 }
 
 static void
 abort_transaction(journal *jnl, transaction *tr)
 {
-    int                i;
-    errno_t		errno;
-    block_list_header *blhdr, *next;
-    struct buf        *bp;
-    struct vnode      *save_vp;
+	block_list_header *blhdr, *next;
 
-    // for each block list header, iterate over the blocks then
-    // free up the memory associated with the block list.
-    //
-    // for each block, clear the lock bit and release it.
-    //
-    for(blhdr=tr->blhdr; blhdr; blhdr=next) {
+	// for each block list header, iterate over the blocks then
+	// free up the memory associated with the block list.
+	//
+	// find each of the primary blocks (i.e. the list could
+	// contain a mix of shadowed and real buf_t's depending
+	// on when the abort condition was detected) and mark them
+	// clean and locked in the cache... this at least allows 
+	// the FS a consistent view between it's incore data structures
+	// and the meta-data held in the cache
+	//
+	KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_START, jnl, tr, 0, 0, 0);
+
+	for (blhdr = tr->blhdr; blhdr; blhdr = next) {
+		int	i;
+		
+		for (i = 1; i < blhdr->num_blocks; i++) {
+			buf_t		bp, tbp, sbp;
+			vnode_t		bp_vp;
+			errno_t		errno;
 
-		for(i=1; i < blhdr->num_blocks; i++) {
-			if (blhdr->binfo[i].bnum == (off_t)-1) {
+			if (blhdr->binfo[i].bnum == (off_t)-1)
 				continue;
-			}
-			if ( (buf_vnode(blhdr->binfo[i].u.bp) == NULL) ||
-			     !(buf_flags(blhdr->binfo[i].u.bp) & B_LOCKED) ) {
-			        continue;
-			}
 
-			errno = buf_meta_bread(buf_vnode(blhdr->binfo[i].u.bp),
-							 buf_lblkno(blhdr->binfo[i].u.bp),
-							 buf_size(blhdr->binfo[i].u.bp),
-							 NOCRED,
-							 &bp);
-			if (errno == 0) {
-				if (bp != blhdr->binfo[i].u.bp) {
-					panic("jnl: abort_tr: got back a different bp! (bp %p should be %p, jnl %p\n",
-						  bp, blhdr->binfo[i].u.bp, jnl);
-				}
+			tbp = blhdr->binfo[i].u.bp;
 
-				// releasing a bp marked invalid
-				// also clears the locked and delayed state
-				buf_markinvalid(bp);
-				save_vp = buf_vnode(bp);
+			bp_vp = buf_vnode(tbp);
 
-				buf_brelse(bp);
+			buf_setfilter(tbp, NULL, NULL, NULL, NULL);
 
-				vnode_rele_ext(save_vp, 0, 1);
-			} else {
-				printf("jnl: %s: abort_tr: could not find block %Ld vp %p!\n",
-				    jnl->jdev_name, blhdr->binfo[i].bnum, blhdr->binfo[i].u.bp);
-				if (bp) {
+			if (buf_shadow(tbp))
+				sbp = tbp;
+			else
+				sbp = NULL;
+
+			if (bp_vp) {
+				errno = buf_meta_bread(bp_vp,
+						       buf_lblkno(tbp),
+						       buf_size(tbp),
+						       NOCRED,
+						       &bp);
+				if (errno == 0) {
+					if (sbp == NULL && bp != tbp && (buf_flags(tbp) & B_LOCKED)) {
+						panic("jnl: abort_tr: got back a different bp! (bp %p should be %p, jnl %p\n",
+						      bp, tbp, jnl);
+					}
+					/*
+					 * once the journal has been marked INVALID and aborted,
+					 * NO meta data can be written back to the disk, so 
+					 * mark the buf_t clean and make sure it's locked in the cache
+					 * note: if we found a shadow, the real buf_t needs to be relocked
+					 */
+					buf_setflags(bp, B_LOCKED);
+					buf_markclean(bp);
 					buf_brelse(bp);
+
+					KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_NONE, jnl, tr, bp, 0, 0);
+
+					/*
+					 * this undoes the vnode_ref() in journal_modify_block_end()
+					 */
+					vnode_rele_ext(bp_vp, 0, 1);
+				} else {
+					printf("jnl: %s: abort_tr: could not find block %Ld vp %p!\n",
+					       jnl->jdev_name, blhdr->binfo[i].bnum, tbp);
+					if (bp) {
+						buf_brelse(bp);
+					}
 				}
 			}
+			if (sbp)
+				buf_brelse(sbp);
 		}
-
 		next = (block_list_header *)((long)blhdr->binfo[0].bnum);
 
 		// we can free blhdr here since we won't need it any more
 		blhdr->binfo[0].bnum = 0xdeadc0de;
 		kmem_free(kernel_map, (vm_offset_t)blhdr, tr->tbuffer_size);
-    }
+	}
 
+	/*
+	 * If the transaction we're aborting was the async transaction, then
+	 * tell the current transaction that there is no pending trim
+	 * any more.
+	 */
+	lck_rw_lock_exclusive(&jnl->trim_lock);
+	if (jnl->async_trim == &tr->trim)
+		jnl->async_trim = NULL;
+	lck_rw_unlock_exclusive(&jnl->trim_lock);
+	
 	if (tr->trim.extents) {
 		kfree(tr->trim.extents, tr->trim.allocated_count * sizeof(dk_extent_t));
 	}
 	tr->trim.allocated_count = 0;
 	tr->trim.extent_count = 0;
 	tr->trim.extents = NULL;
-    tr->tbuffer     = NULL;
-    tr->blhdr       = NULL;
-    tr->total_bytes = 0xdbadc0de;
+	tr->tbuffer     = NULL;
+	tr->blhdr       = NULL;
+	tr->total_bytes = 0xdbadc0de;
 	FREE_ZONE(tr, sizeof(transaction), M_JNL_TR);
+
+	KERNEL_DEBUG(0xbbbbc034|DBG_FUNC_END, jnl, tr, 0, 0, 0);
 }
 
 
 int
 journal_end_transaction(journal *jnl)
 {
-    int ret;
+	int ret;
 	transaction *tr;
     
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
+
+	free_old_stuff(jnl);
 
 	if ((jnl->flags & JOURNAL_INVALID) && jnl->owner == NULL) {
 		return 0;
 	}
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		panic("jnl: end_tr: I'm not the owner! jnl %p, owner %p, curact %p\n",
-			  jnl, jnl->owner, current_thread());
-    }
-
-    free_old_stuff(jnl);
+		      jnl, jnl->owner, current_thread());
+	}
+	jnl->nested_count--;
 
-    jnl->nested_count--;
-    if (jnl->nested_count > 0) {
+	if (jnl->nested_count > 0) {
 		return 0;
-    } else if (jnl->nested_count < 0) {
+	} else if (jnl->nested_count < 0) {
 		panic("jnl: jnl @ %p has negative nested count (%d). bad boy.\n", jnl, jnl->nested_count);
-    }
+	}
     
-    if (jnl->flags & JOURNAL_INVALID) {
+	if (jnl->flags & JOURNAL_INVALID) {
 		if (jnl->active_tr) {
 			if (jnl->cur_tr != NULL) {
 				panic("jnl: journal @ %p has active tr (%p) and cur tr (%p)\n",
-					  jnl, jnl->active_tr, jnl->cur_tr);
+				      jnl, jnl->active_tr, jnl->cur_tr);
 			}
-	    
 			tr             = jnl->active_tr;
 			jnl->active_tr = NULL;
+
 			abort_transaction(jnl, tr);
 		}
-
 		jnl->owner = NULL;
 		unlock_journal(jnl);
 
 		return EINVAL;
-    }
-
-    tr = jnl->active_tr;
-    CHECK_TRANSACTION(tr);
+	}
 
-    // clear this out here so that when check_free_space() calls
-    // the FS flush function, we don't panic in journal_flush()
-    // if the FS were to call that.  note: check_free_space() is
-    // called from end_transaction().
-    // 
-    jnl->active_tr = NULL;
-    ret = end_transaction(tr, 0, NULL, NULL);
+	tr = jnl->active_tr;
+	CHECK_TRANSACTION(tr);
 
-    jnl->owner = NULL;
-    unlock_journal(jnl);
+	// clear this out here so that when check_free_space() calls
+	// the FS flush function, we don't panic in journal_flush()
+	// if the FS were to call that.  note: check_free_space() is
+	// called from end_transaction().
+	// 
+	jnl->active_tr = NULL;
+	ret = end_transaction(tr, 0, NULL, NULL, TRUE, FALSE);
 
-    return ret;
+	return ret;
 }
 
 
+/* 
+ * Flush the contents of the journal to the disk. 
+ *
+ *  Input: 
+ *  	wait_for_IO - 
+ *  	If TRUE, wait to write in-memory journal to the disk 
+ *  	consistently, and also wait to write all asynchronous 
+ *  	metadata blocks to its corresponding locations
+ *  	consistently on the disk.  This means that the journal 
+ *  	is empty at this point and does not contain any 
+ *  	transactions.  This is overkill in normal scenarios  
+ *  	but is useful whenever the metadata blocks are required 
+ *  	to be consistent on-disk instead of just the journal 
+ *  	being consistent; like before live verification 
+ *  	and live volume resizing.  
+ *
+ *  	If FALSE, only wait to write in-memory journal to the 
+ *  	disk consistently.  This means that the journal still 
+ *  	contains uncommitted transactions and the file system 
+ *  	metadata blocks in the journal transactions might be 
+ *  	written asynchronously to the disk.  But there is no 
+ *  	guarantee that they are written to the disk before 
+ *  	returning to the caller.  Note that this option is 
+ *  	sufficient for file system data integrity as it 
+ *  	guarantees consistent journal content on the disk.
+ */
 int
-journal_flush(journal *jnl)
+journal_flush(journal *jnl, boolean_t wait_for_IO)
 {
-    int need_signal = 0;
+	boolean_t drop_lock = FALSE;
     
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
     
-    if (jnl->flags & JOURNAL_INVALID) {
+	free_old_stuff(jnl);
+
+	if (jnl->flags & JOURNAL_INVALID) {
 		return -1;
-    }
+	}
 
-    KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_JOURNAL, DBG_JOURNAL_FLUSH)) 
-    	| DBG_FUNC_START, 0, 0, 0, 0, 0);
+	KERNEL_DEBUG(DBG_JOURNAL_FLUSH | DBG_FUNC_START, jnl, 0, 0, 0, 0);
 
-    if (jnl->owner != current_thread()) {
+	if (jnl->owner != current_thread()) {
 		lock_journal(jnl);
-		need_signal = 1;
-    }
-
-    free_old_stuff(jnl);
+		drop_lock = TRUE;
+	}
 
-    // if we're not active, flush any buffered transactions
-    if (jnl->active_tr == NULL && jnl->cur_tr) {
+	// if we're not active, flush any buffered transactions
+	if (jnl->active_tr == NULL && jnl->cur_tr) {
 		transaction *tr = jnl->cur_tr;
 
 		jnl->cur_tr = NULL;
-		end_transaction(tr, 1, NULL, NULL);   // force it to get flushed
-    }
 
-    if (need_signal) {
-		unlock_journal(jnl);
-    }
+		if (wait_for_IO) {
+			wait_condition(jnl, &jnl->flushing, "journal_flush");
+			wait_condition(jnl, &jnl->asyncIO, "journal_flush");
+		}
+		/*
+		 * "end_transction" will wait for any current async flush
+		 * to complete, before flushing "cur_tr"... because we've
+		 * specified the 'must_wait' arg as TRUE, it will then
+		 * synchronously flush the "cur_tr"
+		 */
+		end_transaction(tr, 1, NULL, NULL, drop_lock, TRUE);   // force it to get flushed
 
-    KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_JOURNAL, DBG_JOURNAL_FLUSH)) 
-    	| DBG_FUNC_END, 0, 0, 0, 0, 0);
+	} else  { 
+		if (drop_lock == TRUE) {
+			unlock_journal(jnl);
+		}
 
-    return 0;
+		/* Because of pipelined journal, the journal transactions 
+		 * might be in process of being flushed on another thread.  
+		 * If there is nothing to flush currently, we should 
+		 * synchronize ourselves with the pipelined journal thread 
+		 * to ensure that all inflight transactions, if any, are 
+		 * flushed before we return success to caller.
+		 */
+		wait_condition(jnl, &jnl->flushing, "journal_flush");
+	}
+	if (wait_for_IO) {
+		wait_condition(jnl, &jnl->asyncIO, "journal_flush");
+	}
+
+	KERNEL_DEBUG(DBG_JOURNAL_FLUSH | DBG_FUNC_END, jnl, 0, 0, 0, 0);
+
+	return 0;
 }
 
 int
 journal_active(journal *jnl)
 {
-    if (jnl->flags & JOURNAL_INVALID) {
+	if (jnl->flags & JOURNAL_INVALID) {
 		return -1;
-    }
+	}
     
-    return (jnl->active_tr == NULL) ? 0 : 1;
+	return (jnl->active_tr == NULL) ? 0 : 1;
 }
 
 void *
 journal_owner(journal *jnl)
 {
-    return jnl->owner;
+	return jnl->owner;
 }
 
 int journal_uses_fua(journal *jnl)
@@ -3835,37 +4518,37 @@ int journal_uses_fua(journal *jnl)
 int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbuffer_size,
 	errno_t (*callback)(void *), void *callback_arg)
 {
-	int ret;
-	transaction *tr;
+	int		ret;
+	transaction	*tr;
 	
 	/*
 	 * Sanity check inputs, and adjust the size of the transaction buffer.
 	 */
-    if ((offset % jnl->jhdr->jhdr_size) != 0) {
+	if ((offset % jnl->jhdr->jhdr_size) != 0) {
 		printf("jnl: %s: relocate: offset 0x%llx is not an even multiple of block size 0x%x\n",
-		    jnl->jdev_name, offset, jnl->jhdr->jhdr_size);
+		       jnl->jdev_name, offset, jnl->jhdr->jhdr_size);
 		return EINVAL;
-    }
-    if ((journal_size % jnl->jhdr->jhdr_size) != 0) {
+	}
+	if ((journal_size % jnl->jhdr->jhdr_size) != 0) {
 		printf("jnl: %s: relocate: journal size 0x%llx is not an even multiple of block size 0x%x\n",
-		    jnl->jdev_name, journal_size, jnl->jhdr->jhdr_size);
+		       jnl->jdev_name, journal_size, jnl->jhdr->jhdr_size);
 		return EINVAL;
-    }
+	}
 
-    CHECK_JOURNAL(jnl);
+	CHECK_JOURNAL(jnl);
 
 	/* Guarantee we own the active transaction. */
-    if (jnl->flags & JOURNAL_INVALID) {
+	if (jnl->flags & JOURNAL_INVALID) {
 		return EINVAL;
-    }
-    if (jnl->owner != current_thread()) {
-    	panic("jnl: relocate: Not the owner! jnl %p, owner %p, curact %p\n",
-    		jnl, jnl->owner, current_thread());
+	}
+	if (jnl->owner != current_thread()) {
+		panic("jnl: relocate: Not the owner! jnl %p, owner %p, curact %p\n",
+		      jnl, jnl->owner, current_thread());
 	}
 	
-    if (tbuffer_size == 0)
-    	tbuffer_size = jnl->tbuffer_size;
-    size_up_tbuffer(jnl, tbuffer_size, jnl->jhdr->jhdr_size);
+	if (tbuffer_size == 0)
+		tbuffer_size = jnl->tbuffer_size;
+	size_up_tbuffer(jnl, tbuffer_size, jnl->jhdr->jhdr_size);
 	
 	/*
 	 * Flush any non-active transactions.  We have to temporarily hide the
@@ -3875,11 +4558,13 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
 	tr = jnl->active_tr;
 	CHECK_TRANSACTION(tr);
 	jnl->active_tr = NULL;
-	ret = journal_flush(jnl);
+	ret = journal_flush(jnl, TRUE);
 	jnl->active_tr = tr;
+
 	if (ret) {
 		return ret;
 	}
+	wait_condition(jnl, &jnl->flushing, "end_transaction");
 	
 	/* Update the journal's offset and size in memory. */
 	jnl->jdev_offset = offset;
@@ -3893,7 +4578,7 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
 	 * before they get written to their normal on-disk locations.
 	 */
 	jnl->active_tr = NULL;
-	ret = end_transaction(tr, 1, callback, callback_arg);
+	ret = end_transaction(tr, 1, callback, callback_arg, FALSE, TRUE);
 	if (ret) {
 		printf("jnl: %s: relocate: end_transaction failed (%d)\n", jnl->jdev_name, ret);
 		goto bad_journal;
@@ -3912,9 +4597,9 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu
 	return 0;
 
 bad_journal:
-    jnl->flags |= JOURNAL_INVALID;
-    abort_transaction(jnl, tr);
-    return ret;
+	jnl->flags |= JOURNAL_INVALID;
+	abort_transaction(jnl, tr);
+	return ret;
 }
 
 
@@ -3927,62 +4612,62 @@ int journal_uses_fua(__unused journal *jnl)
 
 journal *
 journal_create(__unused struct vnode *jvp,
-    __unused off_t         offset,
-    __unused off_t         journal_size,
-    __unused struct vnode *fsvp,
-    __unused size_t        min_fs_blksz,
-    __unused int32_t       flags,
-    __unused int32_t       tbuffer_size,
-    __unused void        (*flush)(void *arg),
-    __unused void         *arg)
+	       __unused off_t         offset,
+	       __unused off_t         journal_size,
+	       __unused struct vnode *fsvp,
+	       __unused size_t        min_fs_blksz,
+	       __unused int32_t       flags,
+	       __unused int32_t       tbuffer_size,
+	       __unused void        (*flush)(void *arg),
+	       __unused void         *arg)
 {
     return NULL;
 }
 
 journal *
 journal_open(__unused struct vnode *jvp,
-    __unused off_t         offset,
-    __unused off_t         journal_size,
-    __unused struct vnode *fsvp,
-    __unused size_t        min_fs_blksz,
-    __unused int32_t       flags,
-    __unused int32_t       tbuffer_size,
-    __unused void        (*flush)(void *arg),
-    __unused void         *arg)
+	     __unused off_t         offset,
+	     __unused off_t         journal_size,
+	     __unused struct vnode *fsvp,
+	     __unused size_t        min_fs_blksz,
+	     __unused int32_t       flags,
+	     __unused int32_t       tbuffer_size,
+	     __unused void        (*flush)(void *arg),
+	     __unused void         *arg)
 {
-    return NULL;
+	return NULL;
 }
 
 
 int
 journal_modify_block_start(__unused journal *jnl, __unused struct buf *bp)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int
 journal_modify_block_end(__unused journal *jnl,
-    __unused struct buf *bp,
-    __unused void (*func)(struct buf *bp, void *arg),
-    __unused void *arg)
+			 __unused struct buf *bp,
+			 __unused void (*func)(struct buf *bp, void *arg),
+			 __unused void *arg)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int
 journal_kill_block(__unused journal *jnl, __unused struct buf *bp)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int journal_relocate(__unused journal *jnl,
-    __unused off_t offset,
-    __unused off_t journal_size,
-    __unused int32_t tbuffer_size,
-    __unused errno_t (*callback)(void *),
-    __unused void *callback_arg)
+		     __unused off_t offset,
+		     __unused off_t journal_size,
+		     __unused int32_t tbuffer_size,
+		     __unused errno_t (*callback)(void *),
+		     __unused void *callback_arg)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 void
@@ -3993,19 +4678,19 @@ journal_close(__unused journal *jnl)
 int
 journal_start_transaction(__unused journal *jnl)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int
 journal_end_transaction(__unused journal *jnl)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int
-journal_flush(__unused journal *jnl)
+journal_flush(__unused journal *jnl, __unused boolean_t wait_for_IO)
 {
-    return EINVAL;
+	return EINVAL;
 }
 
 int
@@ -4015,13 +4700,13 @@ journal_is_clean(__unused struct vnode *jvp,
 		 __unused struct vnode *fsvp,
                  __unused size_t        min_fs_block_size)
 {
-    return 0;
+	return 0;
 }
 
 
 void *
 journal_owner(__unused journal *jnl)
 {
-    return NULL;
+	return NULL;
 }
 #endif  // !JOURNALING
diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h
index 310445395..11b24c3ee 100644
--- a/bsd/vfs/vfs_journal.h
+++ b/bsd/vfs/vfs_journal.h
@@ -1,4 +1,3 @@
-
 /*
  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
@@ -81,20 +80,23 @@ struct jnl_trim_list {
 	dk_extent_t *extents;
 };
 
+typedef void (*jnl_trim_callback_t)(void *arg, uint32_t extent_count, const dk_extent_t *extents);
+
 typedef struct transaction {
-    int                 tbuffer_size;  // in bytes
-    char               *tbuffer;       // memory copy of the transaction
-    block_list_header  *blhdr;         // points to the first byte of tbuffer
-    int                 num_blhdrs;    // how many buffers we've allocated
-    int                 total_bytes;   // total # of bytes in transaction
-    int                 num_flushed;   // how many bytes have been flushed
-    int                 num_killed;    // how many bytes were "killed"
-    off_t               journal_start; // where in the journal this transaction starts
-    off_t               journal_end;   // where in the journal this transaction ends
-    struct journal     *jnl;           // ptr back to the journal structure
-    struct transaction *next;          // list of tr's (either completed or to be free'd)
-    uint32_t            sequence_num;
-    struct jnl_trim_list	trim;
+    int                  tbuffer_size;  // in bytes
+    char                *tbuffer;       // memory copy of the transaction
+    block_list_header   *blhdr;         // points to the first byte of tbuffer
+    int                  num_blhdrs;    // how many buffers we've allocated
+    int                  total_bytes;   // total # of bytes in transaction
+    int                  num_flushed;   // how many bytes have been flushed
+    int                  num_killed;    // how many bytes were "killed"
+    off_t                journal_start; // where in the journal this transaction starts
+    off_t                journal_end;   // where in the journal this transaction ends
+    struct journal      *jnl;           // ptr back to the journal structure
+    struct transaction  *next;          // list of tr's (either completed or to be free'd)
+    uint32_t             sequence_num;
+    struct jnl_trim_list trim;
+    boolean_t            delayed_header_write;
 } transaction;
 
 
@@ -133,6 +135,8 @@ typedef struct journal_header {
  */
 typedef struct journal {
     lck_mtx_t           jlock;             // protects the struct journal data
+    lck_mtx_t		flock;             // serializes flushing of journal
+    lck_rw_t            trim_lock;         // protects the async_trim field, below
 
     struct vnode       *jdev;              // vnode of the device where the journal lives
     off_t               jdev_offset;       // byte offset to the start of the journal
@@ -145,11 +149,23 @@ typedef struct journal {
 
     int32_t             flags;
     int32_t             tbuffer_size;      // default transaction buffer size
-
+    boolean_t		flush_aborted;
+    boolean_t		flushing;
+    boolean_t		asyncIO;
+    boolean_t		writing_header;
+    boolean_t		write_header_failed;
+
+    struct jnl_trim_list *async_trim;      // extents to be trimmed by transaction being asynchronously flushed
+    jnl_trim_callback_t	trim_callback;
+    void				*trim_callback_arg;
+    
     char               *header_buf;        // in-memory copy of the journal header
     int32_t             header_buf_size;
     journal_header     *jhdr;              // points to the first byte of header_buf
 
+    uint32_t		saved_sequence_num;
+    uint32_t		sequence_num;
+
     off_t               max_read_size;
     off_t               max_write_size;
 
@@ -174,7 +190,7 @@ typedef struct journal {
 #define JOURNAL_FLUSHCACHE_ERR    0x00040000   // means we already printed this err
 #define JOURNAL_NEED_SWAP         0x00080000   // swap any data read from disk
 #define JOURNAL_DO_FUA_WRITES     0x00100000   // do force-unit-access writes
-#define JOURNAL_TRIM_ERR          0x00200000   // a previous trim failed
+#define JOURNAL_USE_UNMAP         0x00200000   // device supports UNMAP (TRIM)
 
 /* journal_open/create options are always in the low-16 bits */
 #define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff
@@ -306,11 +322,12 @@ int   journal_kill_block(journal *jnl, struct buf *bp);
 #ifdef BSD_KERNEL_PRIVATE
 int   journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length);
 int   journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length);
+void  journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg);
 #endif
 int   journal_end_transaction(journal *jnl);
 
 int   journal_active(journal *jnl);
-int   journal_flush(journal *jnl);
+int   journal_flush(journal *jnl, boolean_t wait_for_IO);
 void *journal_owner(journal *jnl);    // compare against current_thread()
 int   journal_uses_fua(journal *jnl);
 
diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c
index 553bb41f9..10b885d51 100644
--- a/bsd/vfs/vfs_lookup.c
+++ b/bsd/vfs/vfs_lookup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -111,6 +111,17 @@ static	void kdebug_lookup(struct vnode *dp, struct componentname *cnp);
 static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx);
 #endif
 
+boolean_t 	lookup_continue_ok(struct nameidata *ndp);
+int		lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx);
+int 		lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx);
+int		lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx);
+void		lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation);
+int		lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx);
+int		lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, 
+			int vbusyflags, int *keep_going, int nc_generation,
+			int wantparent, int atroot, vfs_context_t ctx);
+int 		lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent);
+
 /*
  * Convert a pathname into a pointer to a locked inode.
  *
@@ -150,12 +161,10 @@ int
 namei(struct nameidata *ndp)
 {
 	struct filedesc *fdp;	/* pointer to file descriptor state */
-	char *cp;		/* pointer into pathname argument */
 	struct vnode *dp;	/* the directory we are searching */
 	struct vnode *usedvp = ndp->ni_dvp;  /* store pointer to vp in case we must loop due to
 										   	heavy vnode pressure */
 	u_long cnpflags = ndp->ni_cnd.cn_flags; /* store in case we have to restore after loop */
-	uio_t auio;
 	int error;
 	struct componentname *cnp = &ndp->ni_cnd;
 	vfs_context_t ctx = cnp->cn_context;
@@ -164,8 +173,8 @@ namei(struct nameidata *ndp)
 /* XXX ut should be from context */
 	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());
 #endif
-	char *tmppn;
-	char uio_buf[ UIO_SIZEOF(1) ];
+
+	fdp = p->p_fd;
 
 #if DIAGNOSTIC
 	if (!vfs_context_ucred(ctx) || !p)
@@ -175,7 +184,35 @@ namei(struct nameidata *ndp)
 	if (cnp->cn_flags & OPMASK)
 		panic ("namei: flags contaminated with nameiops");
 #endif
-	fdp = p->p_fd;
+
+	/*
+	 * A compound VNOP found something that needs further processing:
+	 * either a trigger vnode, a covered directory, or a symlink.
+	 */
+	if (ndp->ni_flag & NAMEI_CONTLOOKUP) {
+		int rdonly, vbusyflags, keep_going, wantparent;
+
+		rdonly = cnp->cn_flags & RDONLY;
+		vbusyflags = ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) ? LK_NOWAIT : 0;
+		keep_going = 0;
+		wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+
+		ndp->ni_flag &= ~(NAMEI_CONTLOOKUP);
+
+		error = lookup_handle_found_vnode(ndp, &ndp->ni_cnd, rdonly, vbusyflags, 
+				&keep_going, ndp->ni_ncgeneration, wantparent, 0, ctx);
+		if (error)
+			goto out_drop;
+		if (keep_going) {
+			if ((cnp->cn_flags & ISSYMLINK) == 0) {
+				panic("We need to keep going on a continued lookup, but for vp type %d (tag %d)\n", ndp->ni_vp->v_type, ndp->ni_vp->v_tag);
+			}
+			goto continue_symlink;
+		}
+
+		return 0;
+
+	}
 
 vnode_recycled:
 
@@ -310,9 +347,6 @@ retry_copy:
 	ndp->ni_vp  = NULLVP;
 
 	for (;;) {
-	        int need_newpathbuf;
-		u_int linklen;
-
 		ndp->ni_startdir = dp;
 
 		if ( (error = lookup(ndp)) ) {
@@ -324,104 +358,13 @@ retry_copy:
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			return (0);
 		}
-#ifndef __LP64__
-		if ((cnp->cn_flags & FSNODELOCKHELD)) {
-		        cnp->cn_flags &= ~FSNODELOCKHELD;
-			unlock_fsnode(ndp->ni_dvp, NULL);
-		}	
-#endif /* __LP64__ */
-
-		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
-			error = ELOOP;
-			break;
-		}
-#if CONFIG_MACF
-		if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0)
-			break;
-#endif /* MAC */
-		if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF))
-		        need_newpathbuf = 1;
-		else
-		        need_newpathbuf = 0;
-
-		if (need_newpathbuf) {
-			MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
-			if (cp == NULL) {
-				error = ENOMEM;
-				break;
-			}
-		} else {
-			cp = cnp->cn_pnbuf;
-		}
-		auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
-
-		uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
-
-		error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
-		if (error) {
-			if (need_newpathbuf)
-				FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
-			break;
-		}
-
-		/* 
-		 * Safe to set unsigned with a [larger] signed type here
-		 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
-		 * is only 1024.
-		 */
-		linklen = MAXPATHLEN - (u_int)uio_resid(auio);
-		if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
-			if (need_newpathbuf)
-				FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
 
-			error = ENAMETOOLONG;
+continue_symlink:
+		/* Gives us a new path to process, and a starting dir */
+		error = lookup_handle_symlink(ndp, &dp, ctx);
+		if (error != 0) {
 			break;
 		}
-		if (need_newpathbuf) {
-			long len = cnp->cn_pnlen;
-
-			tmppn = cnp->cn_pnbuf;
-			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
-			cnp->cn_pnbuf = cp;
-			cnp->cn_pnlen = MAXPATHLEN;
-
-			if ( (cnp->cn_flags & HASBUF) )
-			        FREE_ZONE(tmppn, len, M_NAMEI);
-			else
-			        cnp->cn_flags |= HASBUF;
-		} else
-			cnp->cn_pnbuf[linklen] = '\0';
-
-		ndp->ni_pathlen += linklen;
-		cnp->cn_nameptr = cnp->cn_pnbuf;
-
-		/*
-		 * starting point for 'relative'
-		 * symbolic link path
-		 */
-		dp = ndp->ni_dvp;
-	        /*
-		 * get rid of references returned via 'lookup'
-		 */
-		vnode_put(ndp->ni_vp);
-		vnode_put(ndp->ni_dvp);
-
-		ndp->ni_vp = NULLVP;
-		ndp->ni_dvp = NULLVP;
-
-		/*
-		 * Check if symbolic link restarts us at the root
-		 */
-		if (*(cnp->cn_nameptr) == '/') {
-			while (*(cnp->cn_nameptr) == '/') {
-				cnp->cn_nameptr++;
-				ndp->ni_pathlen--;
-			}
-			if ((dp = ndp->ni_rootdir) == NULLVP) {
-			        error = ENOENT;
-				goto error_out;
-			}
-		}
 	}
 	/*
 	 * only come here if we fail to handle a SYMLINK...
@@ -429,6 +372,7 @@ retry_copy:
 	 * we need to drop the iocount that was picked
 	 * up in the lookup routine
 	 */
+out_drop:
 	if (ndp->ni_dvp)
 	        vnode_put(ndp->ni_dvp);
 	if (ndp->ni_vp)
@@ -440,6 +384,7 @@ retry_copy:
 	}
 	cnp->cn_pnbuf = NULL;
 	ndp->ni_vp = NULLVP;
+	ndp->ni_dvp = NULLVP;
 	if (error == ERECYCLE){
 		/* vnode was recycled underneath us. re-drive lookup to start at 
 		   the beginning again, since recycling invalidated last lookup*/
@@ -452,143 +397,534 @@ retry_copy:
 	return (error);
 }
 
+int		
+namei_compound_available(vnode_t dp, struct nameidata *ndp)
+{
+	if ((ndp->ni_flag & NAMEI_COMPOUNDOPEN) != 0) {
+		return vnode_compound_open_available(dp);
+	}
 
-/*
- * Search a pathname.
- * This is a very central and rather complicated routine.
- *
- * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
- * The starting directory is taken from ni_startdir. The pathname is
- * descended until done, or a symbolic link is encountered. The variable
- * ni_more is clear if the path is completed; it is set to one if a
- * symbolic link needing interpretation is encountered.
- *
- * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
- * whether the name is to be looked up, created, renamed, or deleted.
- * When CREATE, RENAME, or DELETE is specified, information usable in
- * creating, renaming, or deleting a directory entry may be calculated.
- * If flag has LOCKPARENT or'ed into it, the parent directory is returned
- * locked. If flag has WANTPARENT or'ed into it, the parent directory is
- * returned unlocked. Otherwise the parent directory is not returned. If
- * the target of the pathname exists and LOCKLEAF is or'ed into the flag
- * the target is returned locked, otherwise it is returned unlocked.
- * When creating or renaming and LOCKPARENT is specified, the target may not
- * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
- * 
- * Overall outline of lookup:
- *
- * dirloop:
- *	identify next component of name at ndp->ni_ptr
- *	handle degenerate case where name is null string
- *	if .. and crossing mount points and on mounted filesys, find parent
- *	call VNOP_LOOKUP routine for next component name
- *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
- *	    component vnode returned in ni_vp (if it exists), locked.
- *	if result vnode is mounted on and crossing mount points,
- *	    find mounted on vnode
- *	if more components of name, do next level at dirloop
- *	return the answer in ni_vp, locked if LOCKLEAF set
- *	    if LOCKPARENT set, return locked parent in ni_dvp
- *	    if WANTPARENT set, return unlocked parent in ni_dvp
- *
- * Returns:	0			Success
- *		ENOENT			No such file or directory
- *		EBADF			Bad file descriptor
- *		ENOTDIR			Not a directory
- *		EROFS			Read-only file system [CREATE]
- *		EISDIR			Is a directory [CREATE]
- *		cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
- *		vnode_authorize:EROFS
- *		vnode_authorize:EACCES
- *		vnode_authorize:EPERM
- *		vnode_authorize:???
- *		VNOP_LOOKUP:ENOENT	No such file or directory
- *		VNOP_LOOKUP:EJUSTRETURN	Restart system call (INTERNAL)
- *		VNOP_LOOKUP:???
- *		VFS_ROOT:ENOTSUP
- *		VFS_ROOT:ENOENT
- *		VFS_ROOT:???
- */
+	return 0;
+}
 int
-lookup(struct nameidata *ndp)
+lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx)
 {
-	char	*cp;		/* pointer into pathname argument */
-	vnode_t		tdp;		/* saved dp */
-	vnode_t		dp;		/* the directory we are searching */
-	mount_t		mp;		/* mount table entry */
-	int docache = 1;		/* == 0 do not cache last component */
-	int wantparent;			/* 1 => wantparent or lockparent flag */
-	int rdonly;			/* lookup read-only flag bit */
-	int trailing_slash = 0;
-	int dp_authorized = 0;
-	int error = 0;
-	struct componentname *cnp = &ndp->ni_cnd;
-	vfs_context_t ctx = cnp->cn_context;
-	int mounted_on_depth = 0;
-	int dont_cache_mp = 0;
-	vnode_t	mounted_on_dp = NULLVP;
-	int current_mount_generation = 0;
-	int vbusyflags = 0;
-	int nc_generation = 0;
-	vnode_t last_dp = NULLVP;
+	int error;
 
-	/*
-	 * Setup: break out flag bits into variables.
-	 */
-	if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) {
-	        if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE))
-		        docache = 0;
+	if (!dp_authorized_in_cache) {
+		error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
+		if (error)
+			return error;
 	}
-	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
-	rdonly = cnp->cn_flags & RDONLY;
-	cnp->cn_flags &= ~ISSYMLINK;
-	cnp->cn_consume = 0;
+#if CONFIG_MACF
+	error = mac_vnode_check_lookup(ctx, dp, cnp);
+	if (error)
+		return error;
+#endif /* CONFIG_MACF */
 
-	dp = ndp->ni_startdir;
-	ndp->ni_startdir = NULLVP;
+	return 0;
+}
 
-	if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0)
-			vbusyflags = LK_NOWAIT;
-	cp = cnp->cn_nameptr;
+void 
+lookup_consider_update_cache(vnode_t dvp, vnode_t vp, struct componentname *cnp, int nc_generation) 
+{
+	int isdot_or_dotdot;
+	isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
 
-	if (*cp == '\0') {
-	        if ( (vnode_getwithref(dp)) ) {
-			dp = NULLVP;
-		        error = ENOENT;
-			goto bad;
+	if (vp->v_name == NULL || vp->v_parent == NULLVP) {
+		int  update_flags = 0;
+
+		if (isdot_or_dotdot == 0) {
+			if (vp->v_name == NULL)
+				update_flags |= VNODE_UPDATE_NAME;
+			if (dvp != NULLVP && vp->v_parent == NULLVP)
+				update_flags |= VNODE_UPDATE_PARENT;
+
+			if (update_flags)
+				vnode_update_identity(vp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
 		}
-		goto emptyname;
 	}
-dirloop: 
-	ndp->ni_vp = NULLVP;
+	if ( (cnp->cn_flags & MAKEENTRY) && (vp->v_flag & VNCACHEABLE) && LIST_FIRST(&vp->v_nclinks) == NULL) {
+		/*
+		 * missing from name cache, but should
+		 * be in it... this can happen if volfs
+		 * causes the vnode to be created or the
+		 * name cache entry got recycled but the
+		 * vnode didn't...
+		 * check to make sure that ni_dvp is valid
+		 * cache_lookup_path may return a NULL
+		 * do a quick check to see if the generation of the
+		 * directory matches our snapshot... this will get
+		 * rechecked behind the name cache lock, but if it
+		 * already fails to match, no need to go any further
+		 */
+		if (dvp != NULLVP && (nc_generation == dvp->v_nc_generation) && (!isdot_or_dotdot))
+			cache_enter_with_gen(dvp, vp, cnp, nc_generation);
+	}
 
-	if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) {
-		dp = NULLVP;
-		goto bad;
+}
+
+#if NAMEDRSRCFORK
+/*
+ * Can change ni_dvp and ni_vp.  On success, returns with iocounts on stream vnode (always) and
+ * data fork if requested.  On failure, returns with iocount data fork (always) and its parent directory 
+ * (if one was provided).
+ */
+int
+lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname *cnp, int wantparent, vfs_context_t ctx)
+{
+	vnode_t svp = NULLVP;
+	enum nsoperation nsop;
+	int error;
+
+	if (dp->v_type != VREG) {
+		error = ENOENT;
+		goto out;
 	}
-	if ((cnp->cn_flags & ISLASTCN)) {
-	        if (docache)
-		        cnp->cn_flags |= MAKEENTRY;
-	} else
-	        cnp->cn_flags |= MAKEENTRY;
+	switch (cnp->cn_nameiop) {
+		case DELETE:
+			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
+				nsop = NS_DELETE;
+			} else {
+				error = EPERM;
+				goto out;
+			}
+			break;
+		case CREATE:
+			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
+				nsop = NS_CREATE;
+			} else {
+				error = EPERM;
+				goto out;
+			}
+			break;
+		case LOOKUP:
+			/* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
+			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
+				nsop = NS_OPEN;
+			} else {
+				error = EPERM;
+				goto out;
+			}
+			break;
+		default:
+			error = EPERM;
+			goto out;
+	}
+	/* Ask the file system for the resource fork. */
+	error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx);
 
-	dp = ndp->ni_dvp;
+	/* During a create, it OK for stream vnode to be missing. */
+	if (error == ENOATTR || error == ENOENT) {
+		error = (nsop == NS_CREATE) ? 0 : ENOENT;
+	}		
+	if (error) {
+		goto out;
+	}
+	/* The "parent" of the stream is the file. */
+	if (wantparent) {
+		if (ndp->ni_dvp) {
+#ifndef __LP64__
+			if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) {
+				ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
+				unlock_fsnode(ndp->ni_dvp, NULL);
+			}	
+#endif /* __LP64__ */
+			vnode_put(ndp->ni_dvp);
+		}
+		ndp->ni_dvp = dp;
+	} else {
+		vnode_put(dp);
+	}
+	ndp->ni_vp = svp;  /* on create this may be null */
 
-	if (ndp->ni_vp != NULLVP) {
-	        /*
-		 * cache_lookup_path returned a non-NULL ni_vp then,
-		 * we're guaranteed that the dp is a VDIR, it's 
-		 * been authorized, and vp is not ".."
-		 *
-		 * make sure we don't try to enter the name back into
-		 * the cache if this vp is purged before we get to that
-		 * check since we won't have serialized behind whatever
-		 * activity is occurring in the FS that caused the purge
-		 */
-	        if (dp != NULLVP)
-		        nc_generation = dp->v_nc_generation - 1;
+	/* Restore the truncated pathname buffer (for audits). */
+	if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
+		ndp->ni_next[0] = '/';
+	}
+	cnp->cn_flags  &= ~MAKEENTRY;
 
-	        goto returned_from_lookup_path;
+	return 0;
+out:
+	return error;
+}
+#endif /* NAMEDRSRCFORK */
+
+/*
+ * iocounts in:
+ * 	--One on ni_vp.  One on ni_dvp if there is more path, or we didn't come through the
+ * 	cache, or we came through the cache and the caller doesn't want the parent.
+ *
+ * iocounts out:
+ *	--Leaves us in the correct state for the next step, whatever that might be.
+ *	--If we find a symlink, returns with iocounts on both ni_vp and ni_dvp.
+ *	--If we are to look up another component, then we have an iocount on ni_vp and
+ *	nothing else.  
+ *	--If we are done, returns an iocount on ni_vp, and possibly on ni_dvp depending on nameidata flags.
+ *	--In the event of an error, may return with ni_dvp NULL'ed out (in which case, iocount
+ *	was dropped).
+ */
+int		
+lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int rdonly, 
+		int vbusyflags, int *keep_going, int nc_generation,
+		int wantparent, int atroot, vfs_context_t ctx)
+{
+	vnode_t dp;
+	int error;
+	char *cp;
+
+	dp = ndp->ni_vp;
+	*keep_going = 0;
+
+	if (ndp->ni_vp == NULLVP) {
+		panic("NULL ni_vp in %s\n", __FUNCTION__);
+	}
+
+	if (atroot) {
+		goto nextname;
+	}
+
+#if CONFIG_TRIGGERS
+	if (dp->v_resolve) {
+		error = vnode_trigger_resolve(dp, ndp, ctx);
+		if (error) {
+			goto out;
+		}
+	}
+#endif /* CONFIG_TRIGGERS */
+
+	/*
+	 * Take into account any additional components consumed by
+	 * the underlying filesystem.
+	 */
+	if (cnp->cn_consume > 0) {
+		cnp->cn_nameptr += cnp->cn_consume;
+		ndp->ni_next += cnp->cn_consume;
+		ndp->ni_pathlen -= cnp->cn_consume;
+		cnp->cn_consume = 0;
+	} else {
+		lookup_consider_update_cache(ndp->ni_dvp, dp, cnp, nc_generation);
+	}
+
+	/*
+	 * Check to see if the vnode has been mounted on...
+	 * if so find the root of the mounted file system.
+	 * Updates ndp->ni_vp.
+	 */
+	error = lookup_traverse_mountpoints(ndp, cnp, dp, vbusyflags, ctx);
+	dp = ndp->ni_vp;
+	if (error) {
+		goto out;
+	}
+
+#if CONFIG_MACF
+	if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
+		error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
+		if (error)
+			goto out;
+	}
+#endif
+
+	/*
+	 * Check for symbolic link
+	 */
+	if ((dp->v_type == VLNK) &&
+	    ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
+		cnp->cn_flags |= ISSYMLINK;
+		*keep_going = 1;
+		return (0);
+	}
+
+	/*
+	 * Check for bogus trailing slashes.
+	 */
+	if ((ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
+		if (dp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto out;
+		}
+		ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
+	} 
+	
+nextname:
+	/*
+	 * Not a symbolic link.  If more pathname,
+	 * continue at next component, else return.
+	 *
+	 * Definitely have a dvp if there's another slash 
+	 */
+	if (*ndp->ni_next == '/') {
+		cnp->cn_nameptr = ndp->ni_next + 1;
+		ndp->ni_pathlen--;
+		while (*cnp->cn_nameptr == '/') {
+			cnp->cn_nameptr++;
+			ndp->ni_pathlen--;
+		}
+
+		cp = cnp->cn_nameptr;
+		vnode_put(ndp->ni_dvp);
+		ndp->ni_dvp = NULLVP;
+
+		if (*cp == '\0') {
+			goto emptyname;
+		}
+
+		*keep_going = 1;
+		return 0;
+	}
+				  
+	/*
+	 * Disallow directory write attempts on read-only file systems.
+	 */
+	if (rdonly &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
+		error = EROFS;
+		goto out;
+	}
+	
+	/* If SAVESTART is set, we should have a dvp */
+	if (cnp->cn_flags & SAVESTART) {
+	        /*	
+		 * note that we already hold a reference
+		 * on both dp and ni_dvp, but for some reason
+		 * can't get another one... in this case we
+		 * need to do vnode_put on dp in 'bad2'
+		 */
+	        if ( (vnode_get(ndp->ni_dvp)) ) {
+		        error = ENOENT;
+			goto out;
+		}
+		ndp->ni_startdir = ndp->ni_dvp;
+	}
+	if (!wantparent && ndp->ni_dvp) {
+		vnode_put(ndp->ni_dvp);
+		ndp->ni_dvp = NULLVP;
+	}
+
+	if (cnp->cn_flags & AUDITVNPATH1)
+		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
+	else if (cnp->cn_flags & AUDITVNPATH2)
+		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
+
+#if NAMEDRSRCFORK
+	/*
+	 * Caller wants the resource fork.
+	 */
+	if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
+		error = lookup_handle_rsrc_fork(dp, ndp, cnp, wantparent, ctx);
+		if (error != 0)
+			goto out;
+
+		dp = ndp->ni_vp;
+	}
+#endif
+	if (kdebug_enable)
+	        kdebug_lookup(dp, cnp);
+
+	return 0;
+
+emptyname:
+	error = lookup_handle_emptyname(ndp, cnp, wantparent);
+	if (error != 0) 
+		goto out;
+
+	return 0;
+out:
+	return error;
+
+}
+
+/*
+ * Comes in iocount on ni_vp.  May overwrite ni_dvp, but doesn't interpret incoming value.
+ */
+int 
+lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent)
+{
+	vnode_t dp;
+	int error = 0;
+
+	dp = ndp->ni_vp;
+	cnp->cn_namelen = 0;
+	/*
+	 * A degenerate name (e.g. / or "") which is a way of
+	 * talking about a directory, e.g. like "/." or ".".
+	 */
+	if (dp->v_type != VDIR) {
+		error = ENOTDIR;
+		goto out;
+	}
+	if (cnp->cn_nameiop != LOOKUP) {
+		error = EISDIR;
+		goto out;
+	}
+	if (wantparent) {
+	        /*	
+		 * note that we already hold a reference
+		 * on dp, but for some reason can't
+		 * get another one... in this case we
+		 * need to do vnode_put on dp in 'bad'
+		 */
+	        if ( (vnode_get(dp)) ) {
+		        error = ENOENT;
+			goto out;
+		}
+		ndp->ni_dvp = dp;
+	}
+	cnp->cn_flags &= ~ISDOTDOT;
+	cnp->cn_flags |= ISLASTCN;
+	ndp->ni_next = cnp->cn_nameptr;
+	ndp->ni_vp = dp;
+
+	if (cnp->cn_flags & AUDITVNPATH1)
+		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
+	else if (cnp->cn_flags & AUDITVNPATH2)
+		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
+	if (cnp->cn_flags & SAVESTART)
+		panic("lookup: SAVESTART");
+
+	return 0;
+out:
+	return error;
+}
+/*
+ * Search a pathname.
+ * This is a very central and rather complicated routine.
+ *
+ * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
+ * The starting directory is taken from ni_startdir. The pathname is
+ * descended until done, or a symbolic link is encountered. The variable
+ * ni_more is clear if the path is completed; it is set to one if a
+ * symbolic link needing interpretation is encountered.
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it, the parent directory is returned
+ * locked. If flag has WANTPARENT or'ed into it, the parent directory is
+ * returned unlocked. Otherwise the parent directory is not returned. If
+ * the target of the pathname exists and LOCKLEAF is or'ed into the flag
+ * the target is returned locked, otherwise it is returned unlocked.
+ * When creating or renaming and LOCKPARENT is specified, the target may not
+ * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
+ * 
+ * Overall outline of lookup:
+ *
+ * dirloop:
+ *	identify next component of name at ndp->ni_ptr
+ *	handle degenerate case where name is null string
+ *	if .. and crossing mount points and on mounted filesys, find parent
+ *	call VNOP_LOOKUP routine for next component name
+ *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
+ *	    component vnode returned in ni_vp (if it exists), locked.
+ *	if result vnode is mounted on and crossing mount points,
+ *	    find mounted on vnode
+ *	if more components of name, do next level at dirloop
+ *	return the answer in ni_vp, locked if LOCKLEAF set
+ *	    if LOCKPARENT set, return locked parent in ni_dvp
+ *	    if WANTPARENT set, return unlocked parent in ni_dvp
+ *
+ * Returns:	0			Success
+ *		ENOENT			No such file or directory
+ *		EBADF			Bad file descriptor
+ *		ENOTDIR			Not a directory
+ *		EROFS			Read-only file system [CREATE]
+ *		EISDIR			Is a directory [CREATE]
+ *		cache_lookup_path:ERECYCLE  (vnode was recycled from underneath us, redrive lookup again)
+ *		vnode_authorize:EROFS
+ *		vnode_authorize:EACCES
+ *		vnode_authorize:EPERM
+ *		vnode_authorize:???
+ *		VNOP_LOOKUP:ENOENT	No such file or directory
+ *		VNOP_LOOKUP:EJUSTRETURN	Restart system call (INTERNAL)
+ *		VNOP_LOOKUP:???
+ *		VFS_ROOT:ENOTSUP
+ *		VFS_ROOT:ENOENT
+ *		VFS_ROOT:???
+ */
+int
+lookup(struct nameidata *ndp)
+{
+	char	*cp;		/* pointer into pathname argument */
+	vnode_t		tdp;		/* saved dp */
+	vnode_t		dp;		/* the directory we are searching */
+	int docache = 1;		/* == 0 do not cache last component */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int rdonly;			/* lookup read-only flag bit */
+	int dp_authorized = 0;
+	int error = 0;
+	struct componentname *cnp = &ndp->ni_cnd;
+	vfs_context_t ctx = cnp->cn_context;
+	int vbusyflags = 0;
+	int nc_generation = 0;
+	vnode_t last_dp = NULLVP;
+	int keep_going;
+	int atroot;
+
+	/*
+	 * Setup: break out flag bits into variables.
+	 */
+	if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) {
+	        if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE))
+		        docache = 0;
+	}
+	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
+	rdonly = cnp->cn_flags & RDONLY;
+	cnp->cn_flags &= ~ISSYMLINK;
+	cnp->cn_consume = 0;
+
+	dp = ndp->ni_startdir;
+	ndp->ni_startdir = NULLVP;
+
+	if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0)
+			vbusyflags = LK_NOWAIT;
+	cp = cnp->cn_nameptr;
+
+	if (*cp == '\0') {
+	        if ( (vnode_getwithref(dp)) ) {
+			dp = NULLVP;
+		        error = ENOENT;
+			goto bad;
+		}
+		ndp->ni_vp = dp;
+		error = lookup_handle_emptyname(ndp, cnp, wantparent);
+		if (error) {
+			goto bad;
+		}
+
+		return 0;
+	}
+dirloop: 
+	atroot = 0;
+	ndp->ni_vp = NULLVP;
+
+	if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) {
+		dp = NULLVP;
+		goto bad;
+	}
+	if ((cnp->cn_flags & ISLASTCN)) {
+	        if (docache)
+		        cnp->cn_flags |= MAKEENTRY;
+	} else
+	        cnp->cn_flags |= MAKEENTRY;
+
+	dp = ndp->ni_dvp;
+
+	if (ndp->ni_vp != NULLVP) {
+	        /*
+		 * cache_lookup_path returned a non-NULL ni_vp then,
+		 * we're guaranteed that the dp is a VDIR, it's 
+		 * been authorized, and vp is not ".."
+		 *
+		 * make sure we don't try to enter the name back into
+		 * the cache if this vp is purged before we get to that
+		 * check since we won't have serialized behind whatever
+		 * activity is occurring in the FS that caused the purge
+		 */
+	        if (dp != NULLVP)
+		        nc_generation = dp->v_nc_generation - 1;
+
+	        goto returned_from_lookup_path;
 	}
 
 	/*
@@ -618,7 +954,8 @@ dirloop:
 					error = ENOENT;
 					goto bad;
 				}
-				goto nextname;
+				atroot = 1;
+				goto returned_from_lookup_path;
 			}
 			if ((dp->v_flag & VROOT) == 0 ||
 			    (cnp->cn_flags & NOCROSSMOUNT))
@@ -653,21 +990,32 @@ unionlookup:
 	        goto lookup_error;
 	}
 	if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) {
-		if (!dp_authorized) {
-			error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx);
-			if (error)
-				goto lookup_error;
-		}
-#if CONFIG_MACF
-		error = mac_vnode_check_lookup(ctx, dp, cnp);
-		if (error)
+		error = lookup_authorize_search(dp, cnp, dp_authorized, ctx);
+		if (error) {
 			goto lookup_error;
-#endif /* CONFIG_MACF */
+		}
+	}
+
+	/*
+	 * Now that we've authorized a lookup, can bail out if the filesystem
+	 * will be doing a batched operation.  Return an iocount on dvp.
+	 */
+#if NAMEDRSRCFORK
+	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) {
+#else 
+	if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) {
+#endif /* NAMEDRSRCFORK */
+		ndp->ni_flag |= NAMEI_UNFINISHED;
+		ndp->ni_ncgeneration = dp->v_nc_generation;
+		return 0;
 	}
 
         nc_generation = dp->v_nc_generation;
 
-	if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) {
+	error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx);
+
+
+	if ( error ) {
 lookup_error:
 		if ((error == ENOENT) &&
 		    (dp->v_flag & VROOT) && (dp->v_mount != NULL) &&
@@ -699,18 +1047,9 @@ lookup_error:
 		if (ndp->ni_vp != NULLVP)
 			panic("leaf should be empty");
 
-		/*
-		 * If creating and at end of pathname, then can consider
-		 * allowing file to be created.
-		 */
-		if (rdonly) {
-			error = EROFS;
-			goto bad;
-		}
-		if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) {
-			error = ENOENT;
+		error = lookup_validate_creation_path(ndp);
+		if (error)
 			goto bad;
-		}
 		/*
 		 * We return with ni_vp NULL to indicate that the entry
 		 * doesn't currently exist, leaving a pointer to the
@@ -731,337 +1070,33 @@ lookup_error:
 		return (0);
 	}
 returned_from_lookup_path:
-	dp = ndp->ni_vp;
-
-	/*
-	 * Take into account any additional components consumed by
-	 * the underlying filesystem.
-	 */
-	if (cnp->cn_consume > 0) {
-		cnp->cn_nameptr += cnp->cn_consume;
-		ndp->ni_next += cnp->cn_consume;
-		ndp->ni_pathlen -= cnp->cn_consume;
-		cnp->cn_consume = 0;
-	} else {
-		int isdot_or_dotdot;
-		isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT);
-
-	        if (dp->v_name == NULL || dp->v_parent == NULLVP) {
-			int  update_flags = 0;
-
-			if (isdot_or_dotdot == 0) {
-			        if (dp->v_name == NULL)
-					update_flags |= VNODE_UPDATE_NAME;
-				if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP)
-				        update_flags |= VNODE_UPDATE_PARENT;
-
-				if (update_flags)
-				        vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags);
-			}
-		}
-		if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) {
-		        /*
-			 * missing from name cache, but should
-			 * be in it... this can happen if volfs
-			 * causes the vnode to be created or the
-			 * name cache entry got recycled but the
-			 * vnode didn't...
-			 * check to make sure that ni_dvp is valid
-			 * cache_lookup_path may return a NULL
-			 * do a quick check to see if the generation of the
-			 * directory matches our snapshot... this will get
-			 * rechecked behind the name cache lock, but if it
-			 * already fails to match, no need to go any further
-			 */
-		        if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation) && (!isdot_or_dotdot))
-			        cache_enter_with_gen(ndp->ni_dvp, dp, cnp, nc_generation);
-		}
-	}
-
-	mounted_on_dp = dp;
-	mounted_on_depth = 0;
-	dont_cache_mp = 0;
-	current_mount_generation = mount_generation;
-	/*
-	 * Check to see if the vnode has been mounted on...
-	 * if so find the root of the mounted file system.
-	 */
-check_mounted_on:
-	if ((dp->v_type == VDIR) && dp->v_mountedhere &&
-            ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
-	  
-	        vnode_lock(dp);
-
-		if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {
-			struct uthread *uth = (struct uthread *)get_bsdthread_info(current_thread());
-
-			mp->mnt_crossref++;
-			vnode_unlock(dp);
-
-				
-			if (vfs_busy(mp, vbusyflags)) {
-				mount_dropcrossref(mp, dp, 0);
-				if (vbusyflags == LK_NOWAIT) {
-					error = ENOENT;
-					goto bad2;	
-				}
-				goto check_mounted_on;
-			}
-
-			/*
-			 * XXX - if this is the last component of the
-			 * pathname, and it's either not a lookup operation
-			 * or the NOTRIGGER flag is set for the operation,
-			 * set a uthread flag to let VFS_ROOT() for autofs
-			 * know it shouldn't trigger a mount.
-			 */
-			if ((cnp->cn_flags & ISLASTCN) &&
-			    (cnp->cn_nameiop != LOOKUP ||
-			     (cnp->cn_flags & NOTRIGGER))) {
-				uth->uu_notrigger = 1;
-				dont_cache_mp = 1;
-			}
-			error = VFS_ROOT(mp, &tdp, ctx);
-			/* XXX - clear the uthread flag */
-			uth->uu_notrigger = 0;
-			/*
-			 * mount_dropcrossref does a vnode_put
-			 * on dp if the 3rd arg is non-zero
-			 */
-			mount_dropcrossref(mp, dp, 1);
-			dp = NULL;
-			vfs_unbusy(mp);
-
-			if (error) {
-				goto bad2;
-			}
-			ndp->ni_vp = dp = tdp;
-			mounted_on_depth++;
-			
-			goto check_mounted_on;
-		} 
-		vnode_unlock(dp);
+	/* We'll always have an iocount on ni_vp when this finishes. */
+	error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx);
+	if (error != 0) {
+		goto bad2; 
 	}
 
-#if CONFIG_MACF
-	if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) {
-		error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx);
-		if (error)
-		        goto bad2;
-	}
-#endif
-
-	if (mounted_on_depth && !dont_cache_mp) {
-	        mp = mounted_on_dp->v_mountedhere;
-
-		if (mp) {
-		        mount_lock_spin(mp);
-			mp->mnt_realrootvp_vid = dp->v_id;
-			mp->mnt_realrootvp = dp;
-			mp->mnt_generation = current_mount_generation;
-			mount_unlock(mp);
-		}
-	}
-
-	/*
-	 * Check for symbolic link
-	 */
-	if ((dp->v_type == VLNK) &&
-	    ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) {
-		cnp->cn_flags |= ISSYMLINK;
-		return (0);
-	}
-
-	/*
-	 * Check for bogus trailing slashes.
-	 */
-	if (trailing_slash) {
-		if (dp->v_type != VDIR) {
-			error = ENOTDIR;
-			goto bad2;
-		}
-		trailing_slash = 0;
-	}
-
-nextname:
-	/*
-	 * Not a symbolic link.  If more pathname,
-	 * continue at next component, else return.
-	 */
-	if (*ndp->ni_next == '/') {
-		cnp->cn_nameptr = ndp->ni_next + 1;
-		ndp->ni_pathlen--;
-		while (*cnp->cn_nameptr == '/') {
-			cnp->cn_nameptr++;
-			ndp->ni_pathlen--;
-		}
-		vnode_put(ndp->ni_dvp);
-
-		cp = cnp->cn_nameptr;
-
-		if (*cp == '\0')
-			goto emptyname;
-
-		/*
-		 * cache_lookup_path is now responsible for dropping io ref on dp
-		 * when it is called again in the dirloop.  This ensures we hold
-		 * a ref on dp until we complete the next round of lookup.
-		 */
-		last_dp = dp;
-		goto dirloop;
-	}
-				  
-	/*
-	 * Disallow directory write attempts on read-only file systems.
-	 */
-	if (rdonly &&
-	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
-		error = EROFS;
-		goto bad2;
-	}
-	if (cnp->cn_flags & SAVESTART) {
-	        /*	
-		 * note that we already hold a reference
-		 * on both dp and ni_dvp, but for some reason
-		 * can't get another one... in this case we
-		 * need to do vnode_put on dp in 'bad2'
-		 */
-	        if ( (vnode_get(ndp->ni_dvp)) ) {
-		        error = ENOENT;
-			goto bad2;
-		}
-		ndp->ni_startdir = ndp->ni_dvp;
-	}
-	if (!wantparent && ndp->ni_dvp) {
-		vnode_put(ndp->ni_dvp);
-		ndp->ni_dvp = NULLVP;
-	}
-
-	if (cnp->cn_flags & AUDITVNPATH1)
-		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
-	else if (cnp->cn_flags & AUDITVNPATH2)
-		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
-
-#if NAMEDRSRCFORK
-	/*
-	 * Caller wants the resource fork.
-	 */
-	if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) {
-		vnode_t svp = NULLVP;
-		enum nsoperation nsop;
-
-		if (dp->v_type != VREG) {
-			error = ENOENT;
-			goto bad2;
-		}
-		switch (cnp->cn_nameiop) {
-		case DELETE:
-			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
-				nsop = NS_DELETE;
-			} else {
-				error = EPERM;
-				goto bad2;
-			}
-			break;
-		case CREATE:
-			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
-				nsop = NS_CREATE;
-			} else {
-				error = EPERM;
-				goto bad2;
-			}
-			break;
-		case LOOKUP:
-			/* Make sure our lookup of "/..namedfork/rsrc" is allowed. */
-			if (cnp->cn_flags & CN_ALLOWRSRCFORK) {
-				nsop = NS_OPEN;
-			} else {
-				error = EPERM;
-				goto bad2;
-			}
-			break;
-		default:
-			error = EPERM;
-			goto bad2;
-		}
-		/* Ask the file system for the resource fork. */
-		error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx);
-
-		/* During a create, it OK for stream vnode to be missing. */
-		if (error == ENOATTR || error == ENOENT) {
-			error = (nsop == NS_CREATE) ? 0 : ENOENT;
-		}		
-		if (error) {
-			goto bad2;
-		}
-		/* The "parent" of the stream is the file. */
-		if (wantparent) {
-			if (ndp->ni_dvp) {
-#ifndef __LP64__
-				if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) {
-					ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
-					unlock_fsnode(ndp->ni_dvp, NULL);
-				}	
-#endif /* __LP64__ */
-				vnode_put(ndp->ni_dvp);
-			}
-			ndp->ni_dvp = dp;
-		} else {
-			vnode_put(dp);
-		}
-		ndp->ni_vp = dp = svp;  /* on create this may be null */
+	if (keep_going) {
+		dp = ndp->ni_vp;
 
-		/* Restore the truncated pathname buffer (for audits). */
-		if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') {
-			ndp->ni_next[0] = '/';
+		/* namei() will handle symlinks */
+		if ((dp->v_type == VLNK) &&
+				((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) {
+			return 0; 
 		}
-		cnp->cn_flags  &= ~MAKEENTRY;
-	}
-#endif
-	if (kdebug_enable)
-	        kdebug_lookup(dp, cnp);
-	return (0);
 
-emptyname:
-	cnp->cn_namelen = 0;
-	/*
-	 * A degenerate name (e.g. / or "") which is a way of
-	 * talking about a directory, e.g. like "/." or ".".
-	 */
-	if (dp->v_type != VDIR) {
-		error = ENOTDIR;
-		goto bad;
-	}
-	if (cnp->cn_nameiop != LOOKUP) {
-		error = EISDIR;
-		goto bad;
-	}
-	if (wantparent) {
-	        /*	
-		 * note that we already hold a reference
-		 * on dp, but for some reason can't
-		 * get another one... in this case we
-		 * need to do vnode_put on dp in 'bad'
+		/*
+		 * Otherwise, there's more path to process.  
+		 * cache_lookup_path is now responsible for dropping io ref on dp
+		 * when it is called again in the dirloop.  This ensures we hold
+		 * a ref on dp until we complete the next round of lookup.
 		 */
-	        if ( (vnode_get(dp)) ) {
-		        error = ENOENT;
-			goto bad;
-		}
-		ndp->ni_dvp = dp;
+		last_dp = dp;
+
+		goto dirloop;
 	}
-	cnp->cn_flags &= ~ISDOTDOT;
-	cnp->cn_flags |= ISLASTCN;
-	ndp->ni_next = cp;
-	ndp->ni_vp = dp;
 
-	if (cnp->cn_flags & AUDITVNPATH1)
-		AUDIT_ARG(vnpath, dp, ARG_VNODE1);
-	else if (cnp->cn_flags & AUDITVNPATH2)
-		AUDIT_ARG(vnpath, dp, ARG_VNODE2);
-	if (cnp->cn_flags & SAVESTART)
-		panic("lookup: SAVESTART");
 	return (0);
-
 bad2:
 #ifndef __LP64__
 	if ((cnp->cn_flags & FSNODELOCKHELD)) {
@@ -1070,9 +1105,9 @@ bad2:
 	}
 #endif /* __LP64__ */
 	if (ndp->ni_dvp)
-	        vnode_put(ndp->ni_dvp);
-	if (dp)
-	        vnode_put(dp);
+		vnode_put(ndp->ni_dvp);
+
+	vnode_put(ndp->ni_vp);
 	ndp->ni_vp = NULLVP;
 
 	if (kdebug_enable)
@@ -1095,6 +1130,257 @@ bad:
 	return (error);
 }
 
+int 
+lookup_validate_creation_path(struct nameidata *ndp)
+{
+	struct componentname *cnp = &ndp->ni_cnd;
+
+	/*
+	 * If creating and at end of pathname, then can consider
+	 * allowing file to be created.
+	 */
+	if (cnp->cn_flags & RDONLY) {
+		return EROFS;
+	}
+	if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) {
+		return ENOENT;
+	}
+	
+	return 0;
+}
+
+/*
+ * Modifies only ni_vp.  Always returns with ni_vp still valid (iocount held).
+ */
+int
+lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, 
+		int vbusyflags, vfs_context_t ctx)
+{
+	mount_t mp;
+	vnode_t tdp;
+	int error = 0;
+	uthread_t uth;
+	uint32_t depth = 0;
+	int dont_cache_mp = 0;
+	vnode_t	mounted_on_dp;
+	int current_mount_generation = 0;
+	
+	mounted_on_dp = dp;
+	current_mount_generation = mount_generation;
+
+	while ((dp->v_type == VDIR) && dp->v_mountedhere &&
+			((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
+#if CONFIG_TRIGGERS
+		/*
+		 * For a trigger vnode, call its resolver when crossing its mount (if requested)
+		 */
+		if (dp->v_resolve) {
+			(void) vnode_trigger_resolve(dp, ndp, ctx);
+		}
+#endif
+		vnode_lock(dp);
+
+		if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) {
+
+			mp->mnt_crossref++;
+			vnode_unlock(dp);
+
+
+			if (vfs_busy(mp, vbusyflags)) {
+				mount_dropcrossref(mp, dp, 0);
+				if (vbusyflags == LK_NOWAIT) {
+					error = ENOENT;
+					goto out;
+				}
+
+				continue;
+			}
+
+
+			/*
+			 * XXX - if this is the last component of the
+			 * pathname, and it's either not a lookup operation
+			 * or the NOTRIGGER flag is set for the operation,
+			 * set a uthread flag to let VFS_ROOT() for autofs
+			 * know it shouldn't trigger a mount.
+			 */
+			uth = (struct uthread *)get_bsdthread_info(current_thread());
+			if ((cnp->cn_flags & ISLASTCN) &&
+					(cnp->cn_nameiop != LOOKUP ||
+					 (cnp->cn_flags & NOTRIGGER))) {
+				uth->uu_notrigger = 1;
+				dont_cache_mp = 1;
+			}
+
+			error = VFS_ROOT(mp, &tdp, ctx);
+			/* XXX - clear the uthread flag */
+			uth->uu_notrigger = 0;
+
+			mount_dropcrossref(mp, dp, 0);
+			vfs_unbusy(mp);
+
+			if (error) {
+				goto out;
+			}
+
+			vnode_put(dp);
+			ndp->ni_vp = dp = tdp;
+			depth++;
+
+#if CONFIG_TRIGGERS
+			/*
+			 * Check if root dir is a trigger vnode
+			 */
+			if (dp->v_resolve) {
+				error = vnode_trigger_resolve(dp, ndp, ctx);
+				if (error) {
+					goto out;
+				}
+			}
+#endif			
+
+		} else { 
+			vnode_unlock(dp);
+			break;
+		}
+	}
+
+	if (depth && !dont_cache_mp) {
+	        mp = mounted_on_dp->v_mountedhere;
+
+		if (mp) {
+		        mount_lock_spin(mp);
+			mp->mnt_realrootvp_vid = dp->v_id;
+			mp->mnt_realrootvp = dp;
+			mp->mnt_generation = current_mount_generation;
+			mount_unlock(mp);
+		}
+	}
+
+	return 0;
+
+out:
+	return error;
+}
+
+/*
+ * Takes ni_vp and ni_dvp non-NULL.  Returns with *new_dp set to the location
+ * at which to start a lookup with a resolved path, and all other iocounts dropped.
+ */
+int 
+lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx)
+{
+	int error;
+	char *cp;		/* pointer into pathname argument */
+	uio_t auio;
+	char uio_buf[ UIO_SIZEOF(1) ];
+	int need_newpathbuf;
+	u_int linklen;
+	struct componentname *cnp = &ndp->ni_cnd;
+	vnode_t dp;
+	char *tmppn;
+
+#ifndef __LP64__
+	if ((cnp->cn_flags & FSNODELOCKHELD)) {
+		cnp->cn_flags &= ~FSNODELOCKHELD;
+		unlock_fsnode(ndp->ni_dvp, NULL);
+	}	
+#endif /* __LP64__ */
+
+	if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
+		return ELOOP;
+	}
+#if CONFIG_MACF
+	if ((error = mac_vnode_check_readlink(ctx, ndp->ni_vp)) != 0)
+		return error;
+#endif /* MAC */
+	if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF))
+		need_newpathbuf = 1;
+	else
+		need_newpathbuf = 0;
+
+	if (need_newpathbuf) {
+		MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+		if (cp == NULL) {
+			return ENOMEM;
+		}
+	} else {
+		cp = cnp->cn_pnbuf;
+	}
+	auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf));
+
+	uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN);
+
+	error = VNOP_READLINK(ndp->ni_vp, auio, ctx);
+	if (error) {
+		if (need_newpathbuf)
+			FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
+		return error;
+	}
+
+	/* 
+	 * Safe to set unsigned with a [larger] signed type here
+	 * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN
+	 * is only 1024.
+	 */
+	linklen = MAXPATHLEN - (u_int)uio_resid(auio);
+	if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
+		if (need_newpathbuf)
+			FREE_ZONE(cp, MAXPATHLEN, M_NAMEI);
+
+		return ENAMETOOLONG;
+	}
+	if (need_newpathbuf) {
+		long len = cnp->cn_pnlen;
+
+		tmppn = cnp->cn_pnbuf;
+		bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
+		cnp->cn_pnbuf = cp;
+		cnp->cn_pnlen = MAXPATHLEN;
+
+		if ( (cnp->cn_flags & HASBUF) )
+			FREE_ZONE(tmppn, len, M_NAMEI);
+		else
+			cnp->cn_flags |= HASBUF;
+	} else
+		cnp->cn_pnbuf[linklen] = '\0';
+
+	ndp->ni_pathlen += linklen;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+
+	/*
+	 * starting point for 'relative'
+	 * symbolic link path
+	 */
+	dp = ndp->ni_dvp;
+
+	/*
+	 * get rid of references returned via 'lookup'
+	 */
+	vnode_put(ndp->ni_vp);
+	vnode_put(ndp->ni_dvp);	/* ALWAYS have a dvp for a symlink */
+
+	ndp->ni_vp = NULLVP;
+	ndp->ni_dvp = NULLVP;
+
+	/*
+	 * Check if symbolic link restarts us at the root
+	 */
+	if (*(cnp->cn_nameptr) == '/') {
+		while (*(cnp->cn_nameptr) == '/') {
+			cnp->cn_nameptr++;
+			ndp->ni_pathlen--;
+		}
+		if ((dp = ndp->ni_rootdir) == NULLVP) {
+			return ENOENT;
+		}
+	}
+
+	*new_dp = dp;
+
+	return 0;
+}
+
 /*
  * relookup - lookup a path name component
  *    Used by lookup to re-aquire things.
@@ -1205,18 +1491,27 @@ bad:
 	return (error);
 }
 
-/*
- * Free pathname buffer
- */
 void
-nameidone(struct nameidata *ndp)
+namei_unlock_fsnode(struct nameidata *ndp) 
 {
 #ifndef __LP64__
 	if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) {
 	        ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD;
 		unlock_fsnode(ndp->ni_dvp, NULL);
 	}	
+#else
+	(void)ndp;
 #endif /* __LP64__ */
+}
+
+/*
+ * Free pathname buffer
+ */
+void
+nameidone(struct nameidata *ndp)
+{
+	namei_unlock_fsnode(ndp);
+
 	if (ndp->ni_cnd.cn_flags & HASBUF) {
 		char *tmp = ndp->ni_cnd.cn_pnbuf;
 
@@ -1258,6 +1553,7 @@ nameidone(struct nameidata *ndp)
  * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with
  * no '>' padding.  But /foo_bar/spam would log "/foo_bar>>>>".
  */
+#if !defined(NO_KDEBUG)
 static void
 kdebug_lookup(struct vnode *dp, struct componentname *cnp)
 {
@@ -1305,7 +1601,34 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp)
 		KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0);
 	}
 }
+#else /* NO_KDEBUG */
+static void
+kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused)
+{
+}
+#endif /* NO_KDEBUG */
+
+int
+vfs_getbyid(fsid_t *fsid, ino64_t ino, vnode_t *vpp, vfs_context_t ctx)
+{
+	mount_t mp;
+	int error;
+	
+	mp = mount_lookupby_volfsid(fsid->val[0], 1);
+	if (mp == NULL) {
+		return EINVAL;
+	}
+
+	/* Get the target vnode. */
+	if (ino == 2) {
+		error = VFS_ROOT(mp, vpp, ctx);
+	} else {
+		error = VFS_VGET(mp, ino, vpp, ctx);
+	}
 
+	vfs_unbusy(mp);
+	return error;
+}
 /*
  * Obtain the real path from a legacy volfs style path.
  *
@@ -1384,3 +1707,59 @@ out:
 	return (error);
 }
 #endif
+
+void
+lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create)
+{
+	if (error == 0 && vp == NULLVP) {
+		panic("NULL vp with error == 0.\n");
+	}
+
+	/* 
+	 * We don't want to do any of this if we didn't use the compound vnop
+	 * to perform the lookup... i.e. if we're allowing and using the legacy pattern,
+	 * where we did a full lookup.
+	 */
+	if ((ndp->ni_flag & NAMEI_COMPOUND_OP_MASK) == 0) {
+		return;
+	}
+
+	/* 
+	 * If we're going to continue the lookup, we'll handle
+	 * all lookup-related updates at that time.
+	 */
+	if (error == EKEEPLOOKING) {
+		return;
+	}
+
+	/*
+	 * Only audit or update cache for *found* vnodes.  For creation
+	 * neither would happen in the non-compound-vnop case.
+	 */
+	if ((vp != NULLVP) && !did_create) {
+		/* 
+		 * If MAKEENTRY isn't set, and we've done a successful compound VNOP, 
+		 * then we certainly don't want to update cache or identity.
+		 */
+		if ((error != 0) || (ndp->ni_cnd.cn_flags & MAKEENTRY)) {
+			lookup_consider_update_cache(dvp, vp, &ndp->ni_cnd, ndp->ni_ncgeneration);
+		}
+		if (ndp->ni_cnd.cn_flags & AUDITVNPATH1)
+			AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+		else if (ndp->ni_cnd.cn_flags & AUDITVNPATH2)
+			AUDIT_ARG(vnpath, vp, ARG_VNODE2);
+	}
+
+	/* 
+	 * If you created (whether you opened or not), cut a lookup tracepoint 
+	 * for the parent dir (as would happen without a compound vnop).  Note: we may need
+	 * a vnode despite failure in this case!
+	 *
+	 * If you did not create:
+	 * 	Found child (succeeded or not): cut a tracepoint for the child.  
+	 * 	Did not find child: cut a tracepoint with the parent.
+	 */
+	if (kdebug_enable) {
+	        kdebug_lookup(vp ? vp : dvp, &ndp->ni_cnd); 
+	}
+}
diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c
index 3b10114cb..462fbef79 100644
--- a/bsd/vfs/vfs_subr.c
+++ b/bsd/vfs/vfs_subr.c
@@ -118,12 +118,17 @@
 
 #include <mach/mach_types.h>
 #include <mach/memory_object_types.h>
+#include <mach/memory_object_control.h>
 
 #include <kern/kalloc.h>	/* kalloc()/kfree() */
 #include <kern/clock.h>		/* delay_for_interval() */
 #include <libkern/OSAtomic.h>	/* OSAddAtomic() */
 
 
+#ifdef JOE_DEBUG
+#include <libkern/OSDebug.h>
+#endif
+
 #include <vm/vm_protos.h>	/* vnode_pager_vrele() */
 
 #if CONFIG_MACF
@@ -133,6 +138,10 @@
 extern lck_grp_t *vnode_lck_grp;
 extern lck_attr_t *vnode_lck_attr;
 
+#if CONFIG_TRIGGERS
+extern lck_grp_t *trigger_vnode_lck_grp;
+extern lck_attr_t *trigger_vnode_lck_attr;
+#endif
 
 extern lck_mtx_t * mnt_list_mtx_lock;
 
@@ -145,6 +154,16 @@ int	vttoif_tab[9] = {
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
+
+/* XXX These should be in a BSD accessible Mach header, but aren't. */
+extern void             memory_object_mark_used(
+	memory_object_control_t         control);
+
+extern void             memory_object_mark_unused(
+	memory_object_control_t         control,
+	boolean_t                       rage);
+
+
 /* XXX next protptype should be from <nfs/nfs.h> */
 extern int       nfs_vinvalbuf(vnode_t, int, vfs_context_t, int);
 
@@ -173,7 +192,6 @@ static void vclean(vnode_t vp, int flag);
 static void vnode_reclaim_internal(vnode_t, int, int, int);
 
 static void vnode_dropiocount (vnode_t);
-static errno_t vnode_getiocount(vnode_t vp, unsigned int vid, int vflags);
 
 static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev);
 static int  vnode_reload(vnode_t);
@@ -188,6 +206,9 @@ static int vnode_iterate_prepare(mount_t);
 static int vnode_iterate_reloadq(mount_t);
 static void vnode_iterate_clear(mount_t);
 static mount_t vfs_getvfs_locked(fsid_t *);
+static int vn_create_reg(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp,
+		struct vnode_attr *vap, uint32_t flags, int fmode, uint32_t *statusp, vfs_context_t ctx);
+static int vnode_authattr_new_internal(vnode_t dvp, struct vnode_attr *vap, int noauth, uint32_t *defaulted_fieldsp, vfs_context_t ctx);
 
 errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); 
 
@@ -195,6 +216,11 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
 static void record_vp(vnode_t vp, int count);
 #endif
 
+#if CONFIG_TRIGGERS
+static int vnode_resolver_create(mount_t, vnode_t, struct vnode_trigger_param *, boolean_t external);
+static void vnode_resolver_detach(vnode_t);
+#endif
+
 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 TAILQ_HEAD(deadlst, vnode) vnode_dead_list;	/* vnode dead list */
 
@@ -370,29 +396,27 @@ void
 vnode_writedone(vnode_t vp)
 {
 	if (vp) {
-	        OSAddAtomic(-1, &vp->v_numoutput);
+		int need_wakeup = 0;
 
-		if (vp->v_numoutput <= 1) {
-		        int need_wakeup = 0;
+	        OSAddAtomic(-1, &vp->v_numoutput);
 
-		        vnode_lock_spin(vp);
+		vnode_lock_spin(vp);
 
-			if (vp->v_numoutput < 0)
-			        panic("vnode_writedone: numoutput < 0");
+		if (vp->v_numoutput < 0)
+			panic("vnode_writedone: numoutput < 0");
 
-			if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= 1)) {
-			        vp->v_flag &= ~VTHROTTLED;
-				need_wakeup = 1;
-			}
-			if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) {
-			        vp->v_flag &= ~VBWAIT;
-				need_wakeup = 1;
-			}
-			vnode_unlock(vp);
-		
-			if (need_wakeup)
-			        wakeup((caddr_t)&vp->v_numoutput);
+		if ((vp->v_flag & VTHROTTLED)) {
+			vp->v_flag &= ~VTHROTTLED;
+			need_wakeup = 1;
+		}
+		if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) {
+			vp->v_flag &= ~VBWAIT;
+			need_wakeup = 1;
 		}
+		vnode_unlock(vp);
+		
+		if (need_wakeup)
+			wakeup((caddr_t)&vp->v_numoutput);
 	}
 }
 
@@ -781,6 +805,13 @@ mount_refdrain(mount_t mp)
 	return(0);
 }
 
+/* Tags the mount point as not supportine extended readdir for NFS exports */
+void 
+mount_set_noreaddirext(mount_t mp) {
+	mount_lock (mp);
+	mp->mnt_kern_flag |= MNTK_DENY_READDIREXT;
+	mount_unlock (mp);
+}
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
@@ -887,6 +918,8 @@ vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname)
 	mp->mnt_ioflags = 0;
 	mp->mnt_realrootvp = NULLVP;
 	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
+	mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
+	mp->mnt_devbsdunit = 0;
 
 	mount_lock_init(mp);
 	(void)vfs_busy(mp, LK_NOWAIT);
@@ -1120,6 +1153,8 @@ vfs_getvfs_by_mntonname(char *path)
 		if (!strncmp(mp->mnt_vfsstat.f_mntonname, path,
 					sizeof(mp->mnt_vfsstat.f_mntonname))) {
 			retmp = mp;
+			if (mount_iterref(retmp, 1))
+				retmp = NULL;
 			goto out;
 		}
 	}
@@ -1358,6 +1393,7 @@ found_alias:
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_specflags = 0;
 		nvp->v_speclastr = -1;
+		nvp->v_specinfo->si_opencount = 0;
 
 		SPECHASH_LOCK();
 		
@@ -1416,22 +1452,16 @@ int
 vget_internal(vnode_t vp, int vid, int vflags)
 {
 	int error = 0;
-	int vpid;
 
 	vnode_lock_spin(vp);
 
-	if (vflags & VNODE_WITHID)
-		vpid = vid;
-	else
-		vpid = vp->v_id;    // save off the original v_id
-
 	if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0))
 	        /*
 		 * vnode to be returned only if it has writers opened 
 		 */
 	        error = EINVAL;
 	else
-	        error = vnode_getiocount(vp, vpid, vflags);
+	        error = vnode_getiocount(vp, vid, vflags);
 
 	vnode_unlock(vp);
 
@@ -1446,7 +1476,7 @@ int
 vnode_ref(vnode_t vp)
 {
 
-        return (vnode_ref_ext(vp, 0));
+        return (vnode_ref_ext(vp, 0, 0));
 }
 
 /*
@@ -1454,7 +1484,7 @@ vnode_ref(vnode_t vp)
  *		ENOENT			No such file or directory [terminating]
  */
 int
-vnode_ref_ext(vnode_t vp, int fmode)
+vnode_ref_ext(vnode_t vp, int fmode, int flags)
 {
 	int	error = 0;
 
@@ -1471,10 +1501,12 @@ vnode_ref_ext(vnode_t vp, int fmode)
 	/*
 	 * if you are the owner of drain/termination, can acquire usecount
 	 */
-	if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) {
-	        if (vp->v_owner != current_thread()) {
-		        error = ENOENT;
-			goto out;
+	if ((flags & VNODE_REF_FORCE) == 0) {
+		if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) {
+			if (vp->v_owner != current_thread()) {
+				error = ENOENT;
+				goto out;
+			}
 		}
 	}
 	vp->v_usecount++;
@@ -1507,6 +1539,13 @@ vnode_ref_ext(vnode_t vp, int fmode)
 			vnode_list_remove(vp);
 		}
 	}
+	if (vp->v_usecount == 1 && vp->v_type == VREG && !(vp->v_flag & VSYSTEM)) {
+
+		if (vp->v_ubcinfo) {
+			vnode_lock_convert(vp);
+			memory_object_mark_used(vp->v_ubcinfo->ui_control);
+		}
+	}
 out:
 	vnode_unlock(vp);
 
@@ -1659,6 +1698,7 @@ vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter)
 void
 vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
 {
+
 	if ( !locked)
 	        vnode_lock_spin(vp);
 #if DIAGNOSTIC
@@ -1689,9 +1729,7 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
 	                vp->v_lflag |= VL_NEEDINACTIVE;
 			vp->v_flag  &= ~(VNOCACHE_DATA | VRAOFF | VOPENEVT);
 		}
-		if ( !locked)
-		        vnode_unlock(vp);
-		return;
+		goto done;
 	}
 	vp->v_flag  &= ~(VNOCACHE_DATA | VRAOFF | VOPENEVT);
 
@@ -1709,9 +1747,8 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
 		        vp->v_flag |= VAGE;
 		}
 	        vnode_list_add(vp);
-		if ( !locked)
-		        vnode_unlock(vp);
-		return;
+
+		goto done;
 	}
 	/*
 	 * at this point both the iocount and usecount
@@ -1746,15 +1783,22 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
 		
 		if (ut->uu_defer_reclaims) {
 		        vp->v_defer_reclaimlist = ut->uu_vreclaims;
-				ut->uu_vreclaims = vp;
-		        goto defer_reclaim;
+			ut->uu_vreclaims = vp;
+			goto done;
 		}
 		vnode_lock_convert(vp);
 	        vnode_reclaim_internal(vp, 1, 1, 0);
 	}
 	vnode_dropiocount(vp);
 	vnode_list_add(vp);
-defer_reclaim:
+done:
+	if (vp->v_usecount == 0 && vp->v_type == VREG && !(vp->v_flag & VSYSTEM)) {
+
+		if (vp->v_ubcinfo) {
+			vnode_lock_convert(vp);
+			memory_object_mark_unused(vp->v_ubcinfo->ui_control, (vp->v_flag & VRAGE) == VRAGE);
+		}
+	}
 	if ( !locked)
 	        vnode_unlock(vp);
 	return;
@@ -2020,13 +2064,13 @@ vclean(vnode_t vp, int flags)
 #endif
 		{
 		        VNOP_FSYNC(vp, MNT_WAIT, ctx);
-			buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
+			buf_invalidateblks(vp, BUF_WRITE_DATA | BUF_INVALIDATE_LOCKED, 0, 0);
 		}
 		if (UBCINFOEXISTS(vp))
 		        /*
 			 * Clean the pages in VM.
 			 */
-		        (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL);
+		        (void)ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL | UBC_INVALIDATE | UBC_SYNC);
 	}
 	if (active || need_inactive) 
 		VNOP_INACTIVE(vp, ctx);
@@ -2039,18 +2083,7 @@ vclean(vnode_t vp, int flags)
 		if (vnode_isshadow(vp)) {
 			vnode_relenamedstream(pvp, vp, ctx);
 		}
-
-		/*
-		 * Because vclean calls VNOP_INACTIVE prior to calling vnode_relenamedstream, we may not have 
-		 * torn down and/or deleted the shadow file yet.  On HFS, if the shadow file is sufficiently large 
-		 * and occupies a large number of extents, the deletion will be deferred until VNOP_INACTIVE 
-		 * and the file treated like an open-unlinked.  To rectify this, call VNOP_INACTIVE again
-		 * explicitly to force its removal.
-		 */
-		if (vnode_isshadow(vp)) {
-			VNOP_INACTIVE(vp, ctx);
-		}
-
+		
 		/* 
 		 * No more streams associated with the parent.  We
 		 * have a ref on it, so its identity is stable.
@@ -2072,6 +2105,14 @@ vclean(vnode_t vp, int flags)
 	 */
 	ubc_destroy_named(vp);
 
+#if CONFIG_TRIGGERS
+	/*
+	 * cleanup trigger info from vnode (if any)
+	 */
+	if (vp->v_resolve)
+		vnode_resolver_detach(vp);
+#endif
+
 	/*
 	 * Reclaim the vnode.
 	 */
@@ -2301,7 +2342,7 @@ vcount(vnode_t vp)
 
 loop:
 	if (!vnode_isaliased(vp))
-	        return (vp->v_usecount - vp->v_kusecount);
+	        return (vp->v_specinfo->si_opencount);
 	count = 0;
 
 	SPECHASH_LOCK();
@@ -2332,7 +2373,7 @@ loop:
 				vnode_unlock(vq);
 				goto loop;
 			}
-			count += (vq->v_usecount - vq->v_kusecount);
+			count += vq->v_specinfo->si_opencount;
 		}
 		vnode_unlock(vq);
 
@@ -2710,7 +2751,7 @@ sysctl_vnode
 }
 
 SYSCTL_PROC(_kern, KERN_VNODE, vnode,
-		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MASKED,
+		CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED,
 		0, 0, sysctl_vnode, "S,", "");
 
 
@@ -2804,6 +2845,8 @@ vnode_pager_vrele(vnode_t vp)
 
 #include <sys/disk.h>
 
+u_int32_t rootunit = (u_int32_t)-1;
+
 errno_t
 vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 {
@@ -2824,24 +2867,25 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 	vfs_context_t ctx = vfs_context_current();
 	int isssd = 0;
 	int isvirtual = 0;
+
+
+	VNOP_IOCTL(devvp, DKIOCGETTHROTTLEMASK, (caddr_t)&mp->mnt_throttle_mask, 0, NULL);
 	/*
-	 * determine if this mount point exists on the same device as the root
-	 * partition... if so, then it comes under the hard throttle control
+	 * as a reasonable approximation, only use the lowest bit of the mask
+	 * to generate a disk unit number
 	 */
-	int        thisunit = -1;
-	static int rootunit = -1;
+	mp->mnt_devbsdunit = num_trailing_0(mp->mnt_throttle_mask);
 
-	if (rootunit == -1) {
-	        if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, ctx))
-		        rootunit = -1; 
-		else if (rootvp == devvp)
-		        mp->mnt_kern_flag |= MNTK_ROOTDEV;
-	}
-	if (devvp != rootvp && rootunit != -1) {
-	        if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, ctx) == 0) {
-		        if (thisunit == rootunit)
-			        mp->mnt_kern_flag |= MNTK_ROOTDEV;
-		}
+	if (devvp == rootvp)
+		rootunit = mp->mnt_devbsdunit;
+
+	if (mp->mnt_devbsdunit == rootunit) {
+		/*
+		 * this mount point exists on the same device as the root
+		 * partition, so it comes under the hard throttle control...
+		 * this is true even for the root mount point itself
+		 */
+		mp->mnt_kern_flag |= MNTK_ROOTDEV;
 	}
 	/*
 	 * force the spec device to re-cache
@@ -2875,7 +2919,6 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp)
 	        if (isssd)
 		        mp->mnt_kern_flag |= MNTK_SSD;
 	}
-
 	if ((error = VNOP_IOCTL(devvp, DKIOCGETFEATURES,
 				(caddr_t)&features, 0, ctx)))
 		return (error);
@@ -3253,7 +3296,11 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 			sfs.f_fsid = sp->f_fsid;
 			sfs.f_owner = sp->f_owner;
     
-			strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
+			if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+				strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+			} else {
+				strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
+			}
 			strlcpy(sfs.f_mntonname, sp->f_mntonname, MNAMELEN);
 			strlcpy(sfs.f_mntfromname, sp->f_mntfromname, MNAMELEN);
             
@@ -3307,7 +3354,11 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2,
 			sfs.f_fsid = sp->f_fsid;
 			sfs.f_owner = sp->f_owner;
     
-			strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
+			if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+				strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+			} else {
+				strlcpy(sfs.f_fstypename, sp->f_fstypename, MFSNAMELEN);
+			}
 			strlcpy(sfs.f_mntonname, sp->f_mntonname, MNAMELEN);
 			strlcpy(sfs.f_mntfromname, sp->f_mntfromname, MNAMELEN);
             
@@ -3414,14 +3465,14 @@ sysctl_vfs_noremotehang(__unused struct sysctl_oid *oidp,
 }
 
 /* the vfs.generic. branch. */
-SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW|CTLFLAG_LOCKED, NULL, "vfs generic hinge");
+SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs generic hinge");
 /* retreive a list of mounted filesystem fsid_t */
-SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
+SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD | CTLFLAG_LOCKED,
     NULL, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
 /* perform operations on filesystem via fsid_t */
-SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW|CTLFLAG_LOCKED,
+SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW | CTLFLAG_LOCKED,
     sysctl_vfs_ctlbyfsid, "ctlbyfsid");
-SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW|CTLFLAG_ANYBODY,
+SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW | CTLFLAG_ANYBODY,
     NULL, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
 	
 	
@@ -3448,18 +3499,18 @@ retry:
 
 	vnode_list_lock();
 
-	if ( !TAILQ_EMPTY(&vnode_dead_list)) {
-	        /*
-		 * Can always reuse a dead one
+	if ((numvnodes - deadvnodes) < desiredvnodes || force_alloc) {
+		if ( !TAILQ_EMPTY(&vnode_dead_list)) {
+			/*
+			 * Can always reuse a dead one
+			 */
+			vp = TAILQ_FIRST(&vnode_dead_list);
+			goto steal_this_vp;
+		}
+		/*
+		 * no dead vnodes available... if we're under
+		 * the limit, we'll create a new vnode
 		 */
-	        vp = TAILQ_FIRST(&vnode_dead_list);
-		goto steal_this_vp;
-	}
-	/*
-	 * no dead vnodes available... if we're under
-	 * the limit, we'll create a new vnode
-	 */
-	if (numvnodes < desiredvnodes || force_alloc) {
 		numvnodes++;
 		vnode_list_unlock();
 
@@ -3493,17 +3544,22 @@ retry:
 			panic("new_vnode: vp (%p) on RAGE list not marked VLIST_RAGE", vp);
 
 		    // if we're a dependency-capable process, skip vnodes that can
-		    // cause recycling deadlocks. (i.e. this process is diskimages
-		    // helper and the vnode is in a disk image).
-		    //
-		    if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) {
-			break;
+			// cause recycling deadlocks. (i.e. this process is diskimages
+			// helper and the vnode is in a disk image).  Querying the
+			// mnt_kern_flag for the mount's virtual device status
+			// is safer than checking the mnt_dependent_process, which
+			// may not be updated if there are multiple devnode layers 
+			// in between the disk image and the final consumer.
+
+		    if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
+					(vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
+				break;
 		    }
 
 		    // don't iterate more than MAX_WALK_COUNT vnodes to
 		    // avoid keeping the vnode list lock held for too long.
 		    if (walk_count++ > MAX_WALK_COUNT) {
-			vp = NULL;
+				vp = NULL;
 			break;
 		    }
 		}
@@ -3516,12 +3572,18 @@ retry:
 		 */
 		walk_count = 0;
 		TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
-		    // if we're a dependency-capable process, skip vnodes that can
-		    // cause recycling deadlocks. (i.e. this process is diskimages
-		    // helper and the vnode is in a disk image)
-		    //
-		    if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) {
-			break;
+
+			// if we're a dependency-capable process, skip vnodes that can
+			// cause recycling deadlocks. (i.e. this process is diskimages
+			// helper and the vnode is in a disk image).  Querying the
+			// mnt_kern_flag for the mount's virtual device status
+			// is safer than checking the mnt_dependent_process, which
+			// may not be updated if there are multiple devnode layers 
+			// in between the disk image and the final consumer.
+
+		    if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || 
+					(vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) == 0) {
+				break;
 		    }
 
 		    // don't iterate more than MAX_WALK_COUNT vnodes to
@@ -3572,7 +3634,7 @@ retry:
 		 * Running out of vnodes tends to make a system unusable. Start killing
 		 * processes that jetsam knows are killable.
 		 */
-		if (jetsam_kill_top_proc() < 0) {
+		if (jetsam_kill_top_proc(TRUE, kJetsamFlagsKilledVnodes) < 0) {
 			/*
 			 * If jetsam can't find any more processes to kill and there
 			 * still aren't any free vnodes, panic. Hopefully we'll get a
@@ -3754,10 +3816,27 @@ vnode_get_locked(struct vnode *vp)
 	return (0);
 }
 
+/*
+ * vnode_getwithvid() cuts in line in front of a vnode drain (that is,
+ * while the vnode is draining, but at no point after that) to prevent
+ * deadlocks when getting vnodes from filesystem hashes while holding
+ * resources that may prevent other iocounts from being released.
+ */
 int
 vnode_getwithvid(vnode_t vp, uint32_t vid)
 {
-        return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID)));
+        return(vget_internal(vp, vid, ( VNODE_NODEAD | VNODE_WITHID | VNODE_DRAINO )));
+}
+
+/*
+ * vnode_getwithvid_drainok() is like vnode_getwithvid(), but *does* block behind a vnode
+ * drain; it exists for use in the VFS name cache, where we really do want to block behind
+ * vnode drain to prevent holding off an unmount.
+ */
+int
+vnode_getwithvid_drainok(vnode_t vp, uint32_t vid)
+{
+        return(vget_internal(vp, vid, ( VNODE_NODEAD | VNODE_WITHID )));
 }
 
 int
@@ -3801,7 +3880,7 @@ retry:
 		vnode_dropiocount(vp);
 		return(0);
 	}
-	if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) {
+	if ((vp->v_lflag & (VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) {
 
 	        vp->v_lflag &= ~VL_NEEDINACTIVE;
 	        vnode_unlock(vp);
@@ -3914,7 +3993,7 @@ vnode_drain(vnode_t vp)
 {
 	
 	if (vp->v_lflag & VL_DRAIN) {
-		panic("vnode_drain: recursuve drain");
+		panic("vnode_drain: recursive drain");
 		return(ENOENT);
 	}
 	vp->v_lflag |= VL_DRAIN;
@@ -3922,13 +4001,16 @@ vnode_drain(vnode_t vp)
 
 	while (vp->v_iocount > 1)
 		msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", NULL);
+
+	vp->v_lflag &= ~VL_DRAIN;
+
 	return(0);
 }
 
 
 /*
  * if the number of recent references via vnode_getwithvid or vnode_getwithref
- * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from
+ * exceeds this threshold, than 'UN-AGE' the vnode by removing it from
  * the LRU list if it's currently on it... once the iocount and usecount both drop
  * to 0, it will get put back on the end of the list, effectively making it younger
  * this allows us to keep actively referenced vnodes in the list without having
@@ -3937,12 +4019,13 @@ vnode_drain(vnode_t vp)
  */
 #define UNAGE_THRESHHOLD	25
 
-static errno_t
+errno_t
 vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
 {
 	int nodead = vflags & VNODE_NODEAD;
 	int nosusp = vflags & VNODE_NOSUSPEND;
 	int always = vflags & VNODE_ALWAYS;
+	int beatdrain = vflags & VNODE_DRAINO;
 
 	for (;;) {
 		/*
@@ -3974,6 +4057,18 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
 		
 		if (always != 0) 
 			break;
+
+		/*
+		 * In some situations, we want to get an iocount
+		 * even if the vnode is draining to prevent deadlock,
+		 * e.g. if we're in the filesystem, potentially holding
+		 * resources that could prevent other iocounts from
+		 * being released.
+		 */
+		if (beatdrain && (vp->v_lflag & VL_DRAIN)) {
+			break;
+		}
+
 		vnode_lock_convert(vp);
 
 		if (vp->v_lflag & VL_TERMINATE) {
@@ -3983,7 +4078,7 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags)
 		} else
 			msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", NULL);
 	}
-	if (vid != vp->v_id) {
+	if (((vflags & VNODE_WITHID) != 0) && vid != vp->v_id) {
 		return(ENOENT);
 	}
 	if (++vp->v_references >= UNAGE_THRESHHOLD) {
@@ -4087,7 +4182,6 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags)
 	vp->v_socket = NULL;
 
 	vp->v_lflag &= ~VL_TERMINATE;
-	vp->v_lflag &= ~VL_DRAIN;
 	vp->v_owner = NULL;
 
 	KNOTE(&vp->v_knotes, NOTE_REVOKE);
@@ -4114,7 +4208,6 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags)
  * The following api creates a vnode and associates all the parameter specified in vnode_fsparam
  * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias
  * is obsoleted by this.
- *  vnode_create(int flavor, size_t size, void * param,  vnode_t  *vp)
  */
 int  
 vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
@@ -4127,159 +4220,210 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp)
         struct  uthread *ut;
 	struct componentname *cnp;
 	struct vnode_fsparam *param = (struct vnode_fsparam *)data;
-	
-	if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) {
-		if ( (error = new_vnode(&vp)) ) {
-			return(error);
-		} else {
-			dvp = param->vnfs_dvp;
-			cnp = param->vnfs_cnp;
+#if CONFIG_TRIGGERS
+	struct vnode_trigger_param *tinfo = NULL;
+#endif
+	if (param == NULL)
+		return (EINVAL);
 
-			vp->v_op = param->vnfs_vops;
-			vp->v_type = param->vnfs_vtype;
-			vp->v_data = param->vnfs_fsnode;
+#if CONFIG_TRIGGERS
+	if ((flavor == VNCREATE_TRIGGER) && (size == VNCREATE_TRIGGER_SIZE)) {
+		tinfo = (struct vnode_trigger_param *)data;
+
+		/* Validate trigger vnode input */
+		if ((param->vnfs_vtype != VDIR) ||
+		    (tinfo->vnt_resolve_func == NULL) ||
+		    (tinfo->vnt_flags & ~VNT_VALID_MASK)) {
+			return (EINVAL);
+		}
+		/* Fall through a normal create (params will be the same) */
+		flavor = VNCREATE_FLAVOR;
+		size = VCREATESIZE;
+	}
+#endif
+	if ((flavor != VNCREATE_FLAVOR) || (size != VCREATESIZE))
+		return (EINVAL);
+
+	if ( (error = new_vnode(&vp)) )
+		return(error);
 
-			if (param->vnfs_markroot)
-				vp->v_flag |= VROOT;
-			if (param->vnfs_marksystem)
-				vp->v_flag |= VSYSTEM;
-			if (vp->v_type == VREG) {
-				error = ubc_info_init_withsize(vp, param->vnfs_filesize);
-				if (error) {
+	dvp = param->vnfs_dvp;
+	cnp = param->vnfs_cnp;
+
+	vp->v_op = param->vnfs_vops;
+	vp->v_type = param->vnfs_vtype;
+	vp->v_data = param->vnfs_fsnode;
+
+	if (param->vnfs_markroot)
+		vp->v_flag |= VROOT;
+	if (param->vnfs_marksystem)
+		vp->v_flag |= VSYSTEM;
+	if (vp->v_type == VREG) {
+		error = ubc_info_init_withsize(vp, param->vnfs_filesize);
+		if (error) {
 #ifdef JOE_DEBUG
-				        record_vp(vp, 1);
+			record_vp(vp, 1);
 #endif
-					vp->v_mount = NULL;
-					vp->v_op = dead_vnodeop_p;
-					vp->v_tag = VT_NON;
-					vp->v_data = NULL;
-					vp->v_type = VBAD;
-					vp->v_lflag |= VL_DEAD;
-
-					vnode_put(vp);
-					return(error);
-				}
-			}
+			vp->v_mount = NULL;
+			vp->v_op = dead_vnodeop_p;
+			vp->v_tag = VT_NON;
+			vp->v_data = NULL;
+			vp->v_type = VBAD;
+			vp->v_lflag |= VL_DEAD;
+
+			vnode_put(vp);
+			return(error);
+		}
+	}
+#ifdef JOE_DEBUG
+	record_vp(vp, 1);
+#endif
+
+#if CONFIG_TRIGGERS
+	/*
+	 * For trigger vnodes, attach trigger info to vnode
+	 */
+	if ((vp->v_type == VDIR) && (tinfo != NULL)) {
+		/* 
+		 * Note: has a side effect of incrementing trigger count on the
+		 * mount if successful, which we would need to undo on a 
+		 * subsequent failure.
+		 */
+#ifdef JOE_DEBUG
+		record_vp(vp, -1);
+#endif
+		error = vnode_resolver_create(param->vnfs_mp, vp, tinfo, FALSE);
+		if (error) {
+			printf("vnode_create: vnode_resolver_create() err %d\n", error);
+			vp->v_mount = NULL;
+			vp->v_op = dead_vnodeop_p;
+			vp->v_tag = VT_NON;
+			vp->v_data = NULL;
+			vp->v_type = VBAD;
+			vp->v_lflag |= VL_DEAD;
 #ifdef JOE_DEBUG
 			record_vp(vp, 1);
 #endif
-			if (vp->v_type == VCHR || vp->v_type == VBLK) {
-                
-				vp->v_tag = VT_DEVFS;		/* callers will reset if needed (bdevvp) */
-
-				if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) {
-				        /*
-					 * if checkalias returns a vnode, it will be locked
-					 *
-					 * first get rid of the unneeded vnode we acquired
-					 */
-					vp->v_data = NULL;
-					vp->v_op = spec_vnodeop_p;
-					vp->v_type = VBAD;
-					vp->v_lflag = VL_DEAD;
-					vp->v_data = NULL; 
-					vp->v_tag = VT_NON;
-					vnode_put(vp);
+			vnode_put(vp);
+			return (error);
+		}
+	}
+#endif
+	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 
-					/*
-					 * switch to aliased vnode and finish
-					 * preparing it
-					 */
-					vp = nvp;
-
-					vclean(vp, 0);
-					vp->v_op = param->vnfs_vops;
-					vp->v_type = param->vnfs_vtype;
-					vp->v_data = param->vnfs_fsnode;
-					vp->v_lflag = 0;
-					vp->v_mount = NULL;
-					insmntque(vp, param->vnfs_mp);
-					insert = 0;
-					vnode_unlock(vp);
-				}
-			}
+		vp->v_tag = VT_DEVFS;		/* callers will reset if needed (bdevvp) */
 
-			if (vp->v_type == VFIFO) {
-				struct fifoinfo *fip;
+		if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) {
+			/*
+			 * if checkalias returns a vnode, it will be locked
+			 *
+			 * first get rid of the unneeded vnode we acquired
+			 */
+			vp->v_data = NULL;
+			vp->v_op = spec_vnodeop_p;
+			vp->v_type = VBAD;
+			vp->v_lflag = VL_DEAD;
+			vp->v_data = NULL; 
+			vp->v_tag = VT_NON;
+			vnode_put(vp);
 
-				MALLOC(fip, struct fifoinfo *,
-					sizeof(*fip), M_TEMP, M_WAITOK);
-				bzero(fip, sizeof(struct fifoinfo ));
-				vp->v_fifoinfo = fip;
-			}
-			/* The file systems must pass the address of the location where
-			 * they store the vnode pointer. When we add the vnode into the mount
-			 * list and name cache they become discoverable. So the file system node
-			 * must have the connection to vnode setup by then
+			/*
+			 * switch to aliased vnode and finish
+			 * preparing it
 			 */
-			*vpp = vp;
+			vp = nvp;
 
-			/* Add fs named reference. */
-			if (param->vnfs_flags & VNFS_ADDFSREF) {
-				vp->v_lflag |= VNAMED_FSHASH;
-			}
-			if (param->vnfs_mp) {
-					if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL)
-						vp->v_flag |= VLOCKLOCAL;
-			        if (insert) {
-					if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
-						panic("insmntque: vp on the free list\n");
-				        /*
-					 * enter in mount vnode list
-					 */
-				        insmntque(vp, param->vnfs_mp);
-				}
+			vclean(vp, 0);
+			vp->v_op = param->vnfs_vops;
+			vp->v_type = param->vnfs_vtype;
+			vp->v_data = param->vnfs_fsnode;
+			vp->v_lflag = 0;
+			vp->v_mount = NULL;
+			insmntque(vp, param->vnfs_mp);
+			insert = 0;
+			vnode_unlock(vp);
+		}
+	}
+
+	if (vp->v_type == VFIFO) {
+		struct fifoinfo *fip;
+
+		MALLOC(fip, struct fifoinfo *,
+			sizeof(*fip), M_TEMP, M_WAITOK);
+		bzero(fip, sizeof(struct fifoinfo ));
+		vp->v_fifoinfo = fip;
+	}
+	/* The file systems must pass the address of the location where
+	 * they store the vnode pointer. When we add the vnode into the mount
+	 * list and name cache they become discoverable. So the file system node
+	 * must have the connection to vnode setup by then
+	 */
+	*vpp = vp;
+
+	/* Add fs named reference. */
+	if (param->vnfs_flags & VNFS_ADDFSREF) {
+		vp->v_lflag |= VNAMED_FSHASH;
+	}
+	if (param->vnfs_mp) {
+			if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL)
+				vp->v_flag |= VLOCKLOCAL;
+		if (insert) {
+			if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
+				panic("insmntque: vp on the free list\n");
+
+			/*
+			 * enter in mount vnode list
+			 */
+			insmntque(vp, param->vnfs_mp);
+		}
 #ifndef __LP64__
-				if ((param->vnfs_mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE) == 0) {
-				        MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *,
-						    sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK);
-					vp->v_unsafefs->fsnode_count = 0;
-					vp->v_unsafefs->fsnodeowner  = (void *)NULL;
-					lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr);
-				}
+		if ((param->vnfs_mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE) == 0) {
+			MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *,
+				    sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK);
+			vp->v_unsafefs->fsnode_count = 0;
+			vp->v_unsafefs->fsnodeowner  = (void *)NULL;
+			lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr);
+		}
 #endif /* __LP64__ */
-			}
-			if (dvp && vnode_ref(dvp) == 0) {
-				vp->v_parent = dvp;
-			}
-			if (cnp) {
-				if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) {
-					/*
-					 * enter into name cache
-					 * we've got the info to enter it into the name cache now
-					 * cache_enter_create will pick up an extra reference on
-					 * the name entered into the string cache
-					 */
-					vp->v_name = cache_enter_create(dvp, vp, cnp);
-				} else
-					vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
+	}
+	if (dvp && vnode_ref(dvp) == 0) {
+		vp->v_parent = dvp;
+	}
+	if (cnp) {
+		if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) {
+			/*
+			 * enter into name cache
+			 * we've got the info to enter it into the name cache now
+			 * cache_enter_create will pick up an extra reference on
+			 * the name entered into the string cache
+			 */
+			vp->v_name = cache_enter_create(dvp, vp, cnp);
+		} else
+			vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
 
-				if ((cnp->cn_flags & UNIONCREATED) == UNIONCREATED)
-					vp->v_flag |= VISUNION;
-			}
-			if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) {
-			        /*
-				 * this vnode is being created as cacheable in the name cache
-				 * this allows us to re-enter it in the cache
-				 */
-			        vp->v_flag |= VNCACHEABLE;
-			}
-			ut = get_bsdthread_info(current_thread());
-		
-			if ((current_proc()->p_lflag & P_LRAGE_VNODES) ||
-			    (ut->uu_flag & UT_RAGE_VNODES)) {
-			        /*
-				 * process has indicated that it wants any
-				 * vnodes created on its behalf to be rapidly
-				 * aged to reduce the impact on the cached set
-				 * of vnodes
-				 */
-			        vp->v_flag |= VRAGE;
-			}
-			return(0);
-		}
+		if ((cnp->cn_flags & UNIONCREATED) == UNIONCREATED)
+			vp->v_flag |= VISUNION;
+	}
+	if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) {
+		/*
+		 * this vnode is being created as cacheable in the name cache
+		 * this allows us to re-enter it in the cache
+		 */
+		vp->v_flag |= VNCACHEABLE;
+	}
+	ut = get_bsdthread_info(current_thread());
+
+	if ((current_proc()->p_lflag & P_LRAGE_VNODES) ||
+	    (ut->uu_flag & UT_RAGE_VNODES)) {
+		/*
+		 * process has indicated that it wants any
+		 * vnodes created on its behalf to be rapidly
+		 * aged to reduce the impact on the cached set
+		 * of vnodes
+		 */
+		vp->v_flag |= VRAGE;
 	}
-	return (EINVAL);
+	return (0);
 }
 
 int
@@ -4309,13 +4453,14 @@ vnode_removefsref(vnode_t vp)
 
 
 int
-vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg)
+vfs_iterate(int flags, int (*callout)(mount_t, void *), void *arg)
 {
 	mount_t	mp;
 	int ret = 0;
 	fsid_t * fsid_list;
 	int count, actualcount,  i;
 	void * allocmem;
+	int indx_start, indx_stop, indx_incr;
 
 	count = mount_getvfscnt();
 	count += 10;
@@ -4325,7 +4470,21 @@ vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg)
 
 	actualcount = mount_fillfsids(fsid_list, count);
 
-	for (i=0; i< actualcount; i++) {
+	/*
+	 * Establish the iteration direction
+	 * VFS_ITERATE_TAIL_FIRST overrides default head first order (oldest first)
+	 */
+	if (flags & VFS_ITERATE_TAIL_FIRST) {
+		indx_start = actualcount - 1;
+		indx_stop = -1;
+		indx_incr = -1;
+	} else /* Head first by default */ {
+		indx_start = 0;
+		indx_stop = actualcount;
+		indx_incr = 1;
+	}
+
+	for (i=indx_start; i != indx_stop; i += indx_incr) {
 
 		/* obtain the mount point with iteration reference */
 		mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1);
@@ -4567,7 +4726,8 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx)
 		ndflags |= DOWHITEOUT;
 
 	/* XXX AUDITVNPATH1 needed ? */
-	NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, ndflags, UIO_SYSSPACE,
+	       CAST_USER_ADDR_T(path), ctx);
 
 	if ((error = namei(&nd)))
 		return (error);
@@ -4603,7 +4763,8 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_
 		ndflags |= DOWHITEOUT;
 	
 	/* XXX AUDITVNPATH1 needed ? */
-	NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
+	NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE,
+	       CAST_USER_ADDR_T(path), ctx);
 
 	if ((error = vn_open(&nd, fmode, cmode)))
 		*vpp = NULL;
@@ -4656,6 +4817,18 @@ vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx)
 	return(vnode_setattr(vp, &va, ctx));
 }
 
+static int
+vn_create_reg(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, uint32_t flags, int fmode, uint32_t *statusp, vfs_context_t ctx)
+{
+	/* Only use compound VNOP for compound operation */
+	if (vnode_compound_open_available(dvp) && ((flags & VN_CREATE_DOOPEN) != 0)) {
+		*vpp = NULLVP;
+		return VNOP_COMPOUND_OPEN(dvp, vpp, ndp, VNOP_COMPOUND_OPEN_DO_CREATE, fmode, statusp, vap, ctx);
+	} else {
+		return VNOP_CREATE(dvp, vpp, &ndp->ni_cnd, vap, ctx);
+	}
+}
+
 /*
  * Create a filesystem object of arbitrary type with arbitrary attributes in
  * the spevied directory with the specified name.
@@ -4698,70 +4871,48 @@ vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx)
  *		in the code they originated.
  */
 errno_t
-vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx)
+vn_create(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, struct vnode_attr *vap, uint32_t flags, int fmode, uint32_t *statusp, vfs_context_t ctx)
 {
-	kauth_acl_t oacl, nacl;
-	int initial_acl;
-	errno_t	error;
+	errno_t	error, old_error;
 	vnode_t vp = (vnode_t)0;
+	boolean_t batched;
+	struct componentname *cnp;
+	uint32_t defaulted;
 
+	cnp = &ndp->ni_cnd;
 	error = 0;
-	oacl = nacl = NULL;
-	initial_acl = 0;
+	batched = namei_compound_available(dvp, ndp) ? TRUE : FALSE;
 
 	KAUTH_DEBUG("%p    CREATE - '%s'", dvp, cnp->cn_nameptr);
 
+	if (flags & VN_CREATE_NOINHERIT) 
+		vap->va_vaflags |= VA_NOINHERIT;
+	if (flags & VN_CREATE_NOAUTH) 
+		vap->va_vaflags |= VA_NOAUTH;
 	/*
-	 * Handle ACL inheritance.
+	 * Handle ACL inheritance, initialize vap.
 	 */
-	if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) {
-		/* save the original filesec */
-		if (VATTR_IS_ACTIVE(vap, va_acl)) {
-			initial_acl = 1;
-			oacl = vap->va_acl;
-		}
-
-		vap->va_acl = NULL;
-		if ((error = kauth_acl_inherit(dvp,
-			 oacl,
-			 &nacl,
-			 vap->va_type == VDIR,
-			 ctx)) != 0) {
-			KAUTH_DEBUG("%p    CREATE - error %d processing inheritance", dvp, error);
-			return(error);
-		}
+	error = vn_attribute_prepare(dvp, vap, &defaulted, ctx);
+	if (error) {
+		return error;
+	}
 
-		/*
-		 * If the generated ACL is NULL, then we can save ourselves some effort
-		 * by clearing the active bit.
-		 */
-		if (nacl == NULL) {
-			VATTR_CLEAR_ACTIVE(vap, va_acl);
-		} else {
-			VATTR_SET(vap, va_acl, nacl);
-		}
+	if (vap->va_type != VREG && (fmode != 0 || (flags & VN_CREATE_DOOPEN) || statusp)) {
+		panic("Open parameters, but not a regular file.");
 	}
-	
-	/*
-	 * Check and default new attributes.
-	 * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller
-	 * hasn't supplied them.
-	 */
-	if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) {
-		KAUTH_DEBUG("%p    CREATE - error %d handing/defaulting attributes", dvp, error);
-		goto out;
+	if ((fmode != 0) && ((flags & VN_CREATE_DOOPEN) == 0)) {
+		panic("Mode for open, but not trying to open...");
 	}
 
-		
 	/*
 	 * Create the requested node.
 	 */
 	switch(vap->va_type) {
 	case VREG:
-		error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx);
+		error = vn_create_reg(dvp, vpp, ndp, vap, flags, fmode, statusp, ctx);
 		break;
 	case VDIR:
-		error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx);
+		error = vn_mkdir(dvp, vpp, ndp, vap, ctx);
 		break;
 	case VSOCK:
 	case VFIFO:
@@ -4778,6 +4929,8 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_att
 	}
 
 	vp = *vpp;
+	old_error = error;
+
 #if CONFIG_MACF
 	if (!(flags & VN_CREATE_NOLABEL)) {
 		error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx);
@@ -4797,24 +4950,22 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_att
 #if CONFIG_MACF
 error:
 #endif
-	if ((error != 0 ) && (vp != (vnode_t)0)) {
-		*vpp = (vnode_t) 0;
-		vnode_put(vp);
+	if ((error != 0) && (vp != (vnode_t)0)) {
+
+		/* If we've done a compound open, close */
+		if (batched && (old_error == 0) && (vap->va_type == VREG)) {
+			VNOP_CLOSE(vp, fmode, ctx);
+		}
+
+		/* Need to provide notifications if a create succeeded */
+		if (!batched) {
+			*vpp = (vnode_t) 0;
+			vnode_put(vp);
+		}
 	}
 
 out:
-	/*
-	 * If the caller supplied a filesec in vap, it has been replaced
-	 * now by the post-inheritance copy.  We need to put the original back
-	 * and free the inherited product.
-	 */
-	if (initial_acl) {
-		VATTR_SET(vap, va_acl, oacl);
-	} else {
-		VATTR_CLEAR_ACTIVE(vap, va_acl);
-	}
-	if (nacl != NULL)
-		kauth_acl_free(nacl);
+	vn_attribute_cleanup(vap, defaulted);
 
 	return(error);
 }
@@ -4845,6 +4996,433 @@ vnode_authorize_init(void)
 	vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL);
 }
 
+#define VATTR_PREPARE_DEFAULTED_UID		0x1
+#define VATTR_PREPARE_DEFAULTED_GID		0x2
+#define VATTR_PREPARE_DEFAULTED_MODE		0x4
+
+int
+vn_attribute_prepare(vnode_t dvp, struct vnode_attr *vap, uint32_t *defaulted_fieldsp, vfs_context_t ctx)
+{
+	kauth_acl_t nacl = NULL, oacl = NULL;
+	int error;
+
+	/*
+	 * Handle ACL inheritance.
+	 */
+	if (!(vap->va_vaflags & VA_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) {
+		/* save the original filesec */
+		if (VATTR_IS_ACTIVE(vap, va_acl)) {
+			oacl = vap->va_acl;
+		}
+
+		vap->va_acl = NULL;
+		if ((error = kauth_acl_inherit(dvp,
+			 oacl,
+			 &nacl,
+			 vap->va_type == VDIR,
+			 ctx)) != 0) {
+			KAUTH_DEBUG("%p    CREATE - error %d processing inheritance", dvp, error);
+			return(error);
+		}
+
+		/*
+		 * If the generated ACL is NULL, then we can save ourselves some effort
+		 * by clearing the active bit.
+		 */
+		if (nacl == NULL) {
+			VATTR_CLEAR_ACTIVE(vap, va_acl);
+		} else {
+			vap->va_base_acl = oacl;
+			VATTR_SET(vap, va_acl, nacl);
+		}
+	}
+	
+	error = vnode_authattr_new_internal(dvp, vap, (vap->va_vaflags & VA_NOAUTH), defaulted_fieldsp, ctx);
+	if (error) {
+		vn_attribute_cleanup(vap, *defaulted_fieldsp);
+	} 
+
+	return error;
+}
+
+void
+vn_attribute_cleanup(struct vnode_attr *vap, uint32_t defaulted_fields)
+{
+	/*
+	 * If the caller supplied a filesec in vap, it has been replaced
+	 * now by the post-inheritance copy.  We need to put the original back
+	 * and free the inherited product.
+	 */
+	kauth_acl_t nacl, oacl;
+
+	if (VATTR_IS_ACTIVE(vap, va_acl)) {
+		nacl = vap->va_acl;
+		oacl = vap->va_base_acl;
+
+		if (oacl)  {
+			VATTR_SET(vap, va_acl, oacl);
+			vap->va_base_acl = NULL;
+		} else {
+			VATTR_CLEAR_ACTIVE(vap, va_acl);
+		}
+
+		if (nacl != NULL) {
+			kauth_acl_free(nacl);
+		}
+	}
+
+	if ((defaulted_fields & VATTR_PREPARE_DEFAULTED_MODE) != 0) {
+		VATTR_CLEAR_ACTIVE(vap, va_mode);
+	}
+	if ((defaulted_fields & VATTR_PREPARE_DEFAULTED_GID) != 0) {
+		VATTR_CLEAR_ACTIVE(vap, va_gid);
+	}
+	if ((defaulted_fields & VATTR_PREPARE_DEFAULTED_UID) != 0) {
+		VATTR_CLEAR_ACTIVE(vap, va_uid);
+	}
+
+	return;
+}
+
+int
+vn_authorize_unlink(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, __unused void *reserved)
+{
+	int error = 0;
+
+	/*
+	 * Normally, unlinking of directories is not supported. 
+	 * However, some file systems may have limited support.
+	 */
+	if ((vp->v_type == VDIR) &&
+			!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+		return (EPERM);	/* POSIX */
+	}
+
+	/* authorize the delete operation */
+#if CONFIG_MACF
+	if (!error)
+		error = mac_vnode_check_unlink(ctx, dvp, vp, cnp);
+#endif /* MAC */
+	if (!error)
+		error = vnode_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx);
+
+	return error;
+}
+
+int
+vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs_context_t ctx, void *reserved)
+{
+	/* Open of existing case */
+	kauth_action_t action;
+	int error = 0;
+
+	if (cnp->cn_ndp == NULL) {
+		panic("NULL ndp");
+	}
+	if (reserved != NULL) {
+		panic("reserved not NULL.");
+	}
+
+#if CONFIG_MACF
+	/* XXX may do duplicate work here, but ignore that for now (idempotent) */
+	if (vfs_flags(vnode_mount(vp)) & MNT_MULTILABEL) {
+		error = vnode_label(vnode_mount(vp), NULL, vp, NULL, 0, ctx);
+		if (error)
+			return (error);
+	}
+#endif
+
+	if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
+		return (ENOTDIR);
+	}
+
+	if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
+		return (EOPNOTSUPP);	/* Operation not supported on socket */
+	}
+
+	if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
+		return (ELOOP);		/* O_NOFOLLOW was specified and the target is a symbolic link */
+	}
+
+	/* disallow write operations on directories */
+	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
+		return (EISDIR);
+	}
+
+	if ((cnp->cn_ndp->ni_flag & NAMEI_TRAILINGSLASH)) {
+		if (vp->v_type != VDIR) {
+			return (ENOTDIR);
+		}
+	}
+
+#if CONFIG_MACF
+	/* If a file being opened is a shadow file containing 
+	 * namedstream data, ignore the macf checks because it 
+	 * is a kernel internal file and access should always 
+	 * be allowed.
+	 */
+	if (!(vnode_isshadow(vp) && vnode_isnamedstream(vp))) {
+		error = mac_vnode_check_open(ctx, vp, fmode);
+		if (error) {
+			return (error);
+		}
+	}
+#endif
+
+	/* compute action to be authorized */
+	action = 0;
+	if (fmode & FREAD) {
+		action |= KAUTH_VNODE_READ_DATA;
+	}
+	if (fmode & (FWRITE | O_TRUNC)) {
+		/*
+		 * If we are writing, appending, and not truncating,
+		 * indicate that we are appending so that if the
+		 * UF_APPEND or SF_APPEND bits are set, we do not deny
+		 * the open.
+		 */
+		if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
+			action |= KAUTH_VNODE_APPEND_DATA;
+		} else {
+			action |= KAUTH_VNODE_WRITE_DATA;
+		}
+	}
+	return (vnode_authorize(vp, NULL, action, ctx));
+}
+
+int
+vn_authorize_create(vnode_t dvp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx, void *reserved)
+{
+	/* Creation case */
+	int error;
+
+	if (cnp->cn_ndp == NULL) {
+		panic("NULL cn_ndp");
+	}
+	if (reserved != NULL) {
+		panic("reserved not NULL.");
+	}
+
+	/* Only validate path for creation if we didn't do a complete lookup */
+	if (cnp->cn_ndp->ni_flag & NAMEI_UNFINISHED) {
+		error = lookup_validate_creation_path(cnp->cn_ndp);
+		if (error)
+			return (error);
+	}
+
+#if CONFIG_MACF
+	error = mac_vnode_check_create(ctx, dvp, cnp, vap);
+	if (error)
+		return (error);
+#endif /* CONFIG_MACF */
+
+	return (vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx));
+}
+
+int 
+vn_authorize_rename(struct vnode *fdvp,  struct vnode *fvp,  struct componentname *fcnp,
+             struct vnode *tdvp,  struct vnode *tvp,  struct componentname *tcnp,
+             vfs_context_t ctx, void *reserved)
+{
+	int error = 0;
+	int moving = 0;
+
+	if (reserved != NULL) {
+		panic("Passed something other than NULL as reserved field!");
+	}
+
+	/*
+	 * Avoid renaming "." and "..".
+	 *
+	 * XXX No need to check for this in the FS.  We should always have the leaves
+	 * in VFS in this case.
+	 */
+	if (fvp->v_type == VDIR &&
+	    ((fdvp == fvp) ||
+	     (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+	     ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT)) ) {
+		error = EINVAL;
+		goto out;
+	}
+
+	if (tvp == NULLVP && vnode_compound_rename_available(tdvp)) {
+		error = lookup_validate_creation_path(tcnp->cn_ndp);
+		if (error) 
+			goto out;
+	}
+
+	/***** <MACF> *****/
+#if CONFIG_MACF
+	error = mac_vnode_check_rename_from(ctx, fdvp, fvp, fcnp);
+	if (error)
+		goto out;
+#endif
+
+#if CONFIG_MACF
+	error = mac_vnode_check_rename_to(ctx,
+			tdvp, tvp, fdvp == tdvp, tcnp);
+	if (error)
+		goto out;
+#endif
+	/***** </MACF> *****/
+
+	/***** <MiscChecks> *****/
+	if (tvp != NULL) {
+		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+			error = ENOTDIR;
+			goto out;
+		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+			error = EISDIR;
+			goto out;
+		}
+	}
+
+	if (fvp == tdvp) {
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * The following edge case is caught here:
+	 * (to cannot be a descendent of from)
+	 *
+	 *       o fdvp
+	 *      /
+	 *     /
+	 *    o fvp
+	 *     \
+	 *      \
+	 *       o tdvp
+	 *      /
+	 *     /
+	 *    o tvp
+	 */
+	if (tdvp->v_parent == fvp) {
+		error = EINVAL;
+		goto out;
+	}
+	/***** </MiscChecks> *****/
+
+	/***** <Kauth> *****/
+
+	error = 0;
+	if ((tvp != NULL) && vnode_isdir(tvp)) {
+		if (tvp != fdvp)
+			moving = 1;
+	} else if (tdvp != fdvp) {
+		moving = 1;
+	}
+
+
+	/*
+	 * must have delete rights to remove the old name even in
+	 * the simple case of fdvp == tdvp.
+	 *
+	 * If fvp is a directory, and we are changing it's parent,
+	 * then we also need rights to rewrite its ".." entry as well.
+	 */
+	if (vnode_isdir(fvp)) {
+		if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
+			goto out;
+	} else {
+		if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
+			goto out;
+	}
+	if (moving) {
+		/* moving into tdvp or tvp, must have rights to add */
+		if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
+						NULL, 
+						vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
+						ctx)) != 0) {
+			goto out;
+		}
+	} else {
+		/* node staying in same directory, must be allowed to add new name */
+		if ((error = vnode_authorize(fdvp, NULL,
+						vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+			goto out;
+	}
+	/* overwriting tvp */
+	if ((tvp != NULL) && !vnode_isdir(tvp) &&
+			((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
+		goto out;
+	}
+
+	/***** </Kauth> *****/
+
+	/* XXX more checks? */
+out:
+	return error;
+}
+
+int
+vn_authorize_mkdir(vnode_t dvp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx, void *reserved)
+{
+	int error;
+
+	if (reserved != NULL) {
+		panic("reserved not NULL in vn_authorize_mkdir()");	
+	}
+
+	/* XXX A hack for now, to make shadow files work */
+	if (cnp->cn_ndp == NULL) {
+		return 0;
+	}
+
+	if (vnode_compound_mkdir_available(dvp)) {
+		error = lookup_validate_creation_path(cnp->cn_ndp);
+		if (error)
+			goto out;
+	}
+
+#if CONFIG_MACF
+	error = mac_vnode_check_create(ctx,
+	    dvp, cnp, vap);
+	if (error)
+		goto out;
+#endif
+
+  	/* authorize addition of a directory to the parent */
+  	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
+  		goto out;
+ 	
+out:
+	return error;
+}
+
+int
+vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved)
+{
+	int error;
+
+	if (reserved != NULL) {
+		panic("Non-NULL reserved argument to vn_authorize_rmdir()");
+	}
+
+	if (vp->v_type != VDIR) {
+		/*
+		 * rmdir only deals with directories
+		 */
+		return ENOTDIR;
+	} 
+	
+	if (dvp == vp) {
+		/*
+		 * No rmdir "." please.
+		 */
+		return EINVAL;
+	} 
+	
+#if CONFIG_MACF
+	error = mac_vnode_check_unlink(ctx, dvp,
+			vp, cnp);
+	if (error)
+		return error;
+#endif
+
+	return vnode_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx);
+}
+
 /*
  * Authorize an operation on a vnode.
  *
@@ -5254,8 +5832,11 @@ vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir)
 	} else {
 		error = vauth_file_ingroup(vcp, &ismember, (!group_ok ? EACCES : 0));
 	}
-	if (error)
-		goto out;
+	if (error) {
+		if (!group_ok)
+			ismember = 1;
+		error = 0;
+	}
 	if (ismember) {
 		_SETWHERE("group");
 		if (!group_ok)
@@ -5324,8 +5905,6 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 	/* check the ACL on the directory */
 	delete_child_denied = 0;
 	if (!cached_delete_child && VATTR_IS_NOT(dvap, va_acl, NULL)) {
-		errno_t	posix_error;
-
 		eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
 		eval.ae_acl = &dvap->va_acl->acl_ace[0];
 		eval.ae_count = dvap->va_acl->acl_entrycount;
@@ -5338,9 +5917,11 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 		 * have the ACL evaluation answer.  Previously, we would
 		 * always deny the operation at this point.
 		 */
-		if ((posix_error = vauth_dir_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT)
-			return(posix_error);
-		if (ismember)
+		if ((error = vauth_dir_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
+			return(error);
+		if (error == ENOENT)
+			eval.ae_options |= KAUTH_AEVAL_IN_GROUP_UNKNOWN;
+		else if (ismember)
 			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
 		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
 		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
@@ -5361,18 +5942,11 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 		case KAUTH_RESULT_DENY:
 			delete_child_denied = 1;
 			break;
-		case KAUTH_RESULT_ALLOW:
-			KAUTH_DEBUG("%p    ALLOWED - granted by directory ACL", vcp->vp);
-			return(0);
+			/* FALLSTHROUGH */
+                case KAUTH_RESULT_ALLOW:
+                        KAUTH_DEBUG("%p    ALLOWED - granted by directory ACL", vcp->vp);
+                        return(0);
 		case KAUTH_RESULT_DEFER:
-			/*
-			 * If we don't have a POSIX answer of "yes", and we
-			 * can't get an ACL answer, then we deny it now.
-			 */
-			if (posix_error == ENOENT) {
-				delete_child_denied = 1;
-				break;
-			}
 		default:
 			/* Effectively the same as !delete_child_denied */
 			KAUTH_DEBUG("%p    DEFERRED - directory ACL", vcp->vp);
@@ -5383,8 +5957,6 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 	/* check the ACL on the node */
 	delete_denied = 0;
 	if (VATTR_IS_NOT(vap, va_acl, NULL)) {
-		errno_t	posix_error;
-
 		eval.ae_requested = KAUTH_VNODE_DELETE;
 		eval.ae_acl = &vap->va_acl->acl_ace[0];
 		eval.ae_count = vap->va_acl->acl_entrycount;
@@ -5397,9 +5969,11 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 		 * have the ACL evaluation answer.  Previously, we would
 		 * always deny the operation at this point.
 		 */
-		if ((posix_error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT)
-			return(posix_error);
-		if (ismember)
+		if ((error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
+			return(error);
+		if (error == ENOENT)
+			eval.ae_options |= KAUTH_AEVAL_IN_GROUP_UNKNOWN;
+		else if (ismember)
 			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
 		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
 		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
@@ -5419,13 +5993,6 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 			KAUTH_DEBUG("%p    ALLOWED - granted by file ACL", vcp->vp);
 			return(0);
 		case KAUTH_RESULT_DEFER:
-			/*
-			 * If we don't have a POSIX answer of "yes", and we
-			 * can't get an ACL answer, then we deny it now.
-			 */
-			if (posix_error == ENOENT) {
-				delete_denied = 1;
-			}
 		default:
 			/* Effectively the same as !delete_child_denied */
 			KAUTH_DEBUG("%p    DEFERRED%s - by file ACL", vcp->vp, delete_denied ? "(DENY)" : "");
@@ -5447,13 +6014,13 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
 	 */
 	if (!cached_delete_child && (dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
 		KAUTH_DEBUG("%p    DENIED - sticky bit rules (user %d  file %d  dir %d)",
-		    vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid);
+		    vcp->vp, cred->cr_posix.cr_uid, vap->va_uid, dvap->va_uid);
 		return(EACCES);
 	}
 
 	/* check the directory */
 	if (!cached_delete_child && (error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
-		KAUTH_DEBUG("%p    ALLOWED - granted by posix permisssions", vcp->vp);
+		KAUTH_DEBUG("%p    DENIED - denied by posix permisssions", vcp->vp);
 		return(error);
 	}
 
@@ -5501,8 +6068,6 @@ vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_r
 
 	/* if we have an ACL, evaluate it */
 	if (VATTR_IS_NOT(vap, va_acl, NULL)) {
-		errno_t	posix_error;
-
 		eval.ae_requested = acl_rights;
 		eval.ae_acl = &vap->va_acl->acl_ace[0];
 		eval.ae_count = vap->va_acl->acl_entrycount;
@@ -5515,9 +6080,11 @@ vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_r
 		 * have the ACL evaluation answer.  Previously, we would
 		 * always deny the operation at this point.
 		 */
-		if ((posix_error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT)
-			return(posix_error);
-		if (ismember)
+		if ((error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && error != ENOENT)
+			return(error);
+		if (error == ENOENT)
+			eval.ae_options |= KAUTH_AEVAL_IN_GROUP_UNKNOWN;
+		else if (ismember)
 			eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
 		eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
 		eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
@@ -5537,14 +6104,6 @@ vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_r
 			KAUTH_DEBUG("%p    ALLOWED - all rights granted by ACL", vcp->vp);
 			return(0);
 		case KAUTH_RESULT_DEFER:
-			/*
-			 * If we don't have a POSIX answer of "yes", and we
-			 * can't get an ACL answer, then we deny it now.
-			 */
-			if (posix_error == ENOENT) {
-				KAUTH_DEBUG("%p    DENIED(DEFERRED) - by ACL", vcp->vp);
-				return(EACCES);		/* deny, deny, counter-allege */
-			}
 		default:
 			/* Effectively the same as !delete_child_denied */
 			KAUTH_DEBUG("%p    DEFERRED - directory ACL", vcp->vp);
@@ -5866,8 +6425,8 @@ vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action,
 		 * find the stream and flush its cache.
 		 */
 		if (vnode_isnamedstream(vp) && (!vfs_authopaque(vp->v_mount))) {
-			cvp = vp->v_parent;
-			if ((cvp != NULLVP) && (vnode_getwithref(cvp) == 0)) {
+			cvp = vnode_getparent(vp);
+			if (cvp != NULLVP) {
 				parent_iocount = 1;
 			} else {
 				cvp = NULL;
@@ -5897,8 +6456,10 @@ vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action,
 defer:
         result = vnode_authorize_callback_int(cred, idata, action, arg0, arg1, arg2, arg3);
 
-	if (result == KAUTH_RESULT_ALLOW && cvp != NULLVP)
+	if (result == KAUTH_RESULT_ALLOW && cvp != NULLVP) {
+		KAUTH_DEBUG("%p - caching action = %x", cvp, action);
 	        vnode_cache_authorized_action(cvp, ctx, action);
+	}
 
 out:
 	if (parent_iocount) {
@@ -6068,7 +6629,7 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
 	 */
 	if (vnode_isnamedstream(vp) &&
 	    (vp->v_parent != NULL) &&
-	    (vget_internal(vp->v_parent, 0, VNODE_NODEAD) == 0)) {
+	    (vget_internal(vp->v_parent, 0, VNODE_NODEAD | VNODE_DRAINO) == 0)) {
 		parent_ref = TRUE;
 		vcp->vp = vp = vp->v_parent;
 		if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL))
@@ -6175,6 +6736,7 @@ out:
 		if (VATTR_IS_SUPPORTED(&dva, va_mode) &&
 		    !(dva.va_mode & (S_ISVTX))) {
 		    	/* OK to cache delete rights */
+			KAUTH_DEBUG("%p - caching DELETE_CHILD rights", dvp);
 			vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE_CHILD);
 		}
 	}
@@ -6188,12 +6750,18 @@ out:
 	return(KAUTH_RESULT_ALLOW);
 }
 
+int 
+vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx)
+{
+	return vnode_authattr_new_internal(dvp, vap, noauth, NULL, ctx);
+}
+
 /*
  * Check that the attribute information in vattr can be legally applied to
  * a new file by the context.
  */
-int
-vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx)
+static int
+vnode_authattr_new_internal(vnode_t dvp, struct vnode_attr *vap, int noauth, uint32_t *defaulted_fieldsp, vfs_context_t ctx)
 {
 	int		error;
 	int		has_priv_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode;
@@ -6202,6 +6770,11 @@ vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_
 	mount_t		dmp;
 
 	error = 0;
+
+	if (defaulted_fieldsp) {
+		*defaulted_fieldsp = 0;
+	}
+
 	defaulted_owner = defaulted_group = defaulted_mode = 0;
 
 	/*
@@ -6384,6 +6957,17 @@ vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_
 		}
 	}
 out:	
+	if (defaulted_fieldsp) {
+		if (defaulted_mode) {
+			*defaulted_fieldsp |= VATTR_PREPARE_DEFAULTED_MODE;
+		}
+		if (defaulted_group) {
+			*defaulted_fieldsp |= VATTR_PREPARE_DEFAULTED_GID;
+		}
+		if (defaulted_owner) {
+			*defaulted_fieldsp |= VATTR_PREPARE_DEFAULTED_UID;
+		}
+	}
 	return(error);
 }
 
@@ -6481,6 +7065,14 @@ vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_
 		VATTR_WANTED(&ova, va_flags);
 	}
 
+	/*
+	 * If ACLs are being changed, we need the old ACLs.
+	 */
+	if (VATTR_IS_ACTIVE(vap, va_acl)) {
+		KAUTH_DEBUG("ATTR - acl changing, fetching old flags");
+		VATTR_WANTED(&ova, va_acl);
+	}
+
 	/*
 	 * If the size is being set, make sure it's not a directory.
 	 */
@@ -6886,7 +7478,7 @@ no_guuid_change:
 				KAUTH_DEBUG("CHMOD - adding/removing ACL entries");
 			} else if (vap->va_acl->acl_entrycount > 0) {
 				/* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */
-				if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0],
+				if (memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0],
 					sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) {
 					required_action |= KAUTH_VNODE_WRITE_SECURITY;
 					KAUTH_DEBUG("CHMOD - changing ACL entries");
@@ -6909,29 +7501,28 @@ out:
 	return(error);
 }
 
+static int
+setlocklocal_callback(struct vnode *vp, __unused void *cargs)
+{
+	vnode_lock_spin(vp);
+	vp->v_flag |= VLOCKLOCAL;
+	vnode_unlock(vp);
+
+	return (VNODE_RETURNED);
+}
 
 void
 vfs_setlocklocal(mount_t mp)
 {
-	vnode_t vp;
-	
-	mount_lock(mp);
+	mount_lock_spin(mp);
 	mp->mnt_kern_flag |= MNTK_LOCK_LOCAL;
+	mount_unlock(mp);
 
 	/*
-	 * We do not expect anyone to be using any vnodes at the
-	 * time this routine is called. So no need for vnode locking 
+	 * The number of active vnodes is expected to be
+	 * very small when vfs_setlocklocal is invoked.
 	 */
-	TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
-			vp->v_flag |= VLOCKLOCAL;
-	}
-	TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) {
-			vp->v_flag |= VLOCKLOCAL;
-	}
-	TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) {
-			vp->v_flag |= VLOCKLOCAL;
-	}
-	mount_unlock(mp);
+	vnode_iterate(mp, 0, setlocklocal_callback, NULL);
 }
 
 void
@@ -6942,6 +7533,14 @@ vfs_setunmountpreflight(mount_t mp)
 	mount_unlock(mp);
 }
 
+void
+vfs_setcompoundopen(mount_t mp)
+{
+	mount_lock_spin(mp);
+	mp->mnt_compound_ops |= COMPOUND_VNOP_OPEN;
+	mount_unlock(mp);
+}
+
 void
 vn_setunionwait(vnode_t vp)
 {
@@ -7146,13 +7745,17 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int *
 					!((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 					    (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'))
 					  ) {
-
-				NDINIT(&nd_temp, DELETE, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx);
+	
+				NDINIT(&nd_temp, DELETE, OP_UNLINK, USEDVP,
+				       UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name),
+				       ctx);
 				nd_temp.ni_dvp = vp;
 				error = unlink1(ctx, &nd_temp, 0);
-				if (error && error != ENOENT) {
+
+				if (error &&  error != ENOENT) {
 					goto outsc;
 				}
+
 			}
 			cpos += dp->d_reclen;
 			dp = (struct dirent*)cpos;
@@ -7208,21 +7811,645 @@ lock_vnode_and_post(vnode_t vp, int kevent_num)
 #ifdef JOE_DEBUG
 static void record_vp(vnode_t vp, int count) {
         struct uthread *ut;
-        int  i;
 
+#if CONFIG_TRIGGERS
+	if (vp->v_resolve)
+		return;
+#endif
 	if ((vp->v_flag & VSYSTEM))
 	        return;
 
 	ut = get_bsdthread_info(current_thread());
         ut->uu_iocount += count;
 
-	if (ut->uu_vpindex < 32) {
-	        for (i = 0; i < ut->uu_vpindex; i++) {
-		        if (ut->uu_vps[i] == vp)
-			        return;
+	if (count == 1) {
+		if (ut->uu_vpindex < 32) {
+			OSBacktrace((void **)&ut->uu_pcs[ut->uu_vpindex][0], 10);
+
+			ut->uu_vps[ut->uu_vpindex] = vp;
+			ut->uu_vpindex++;
 		}
-		ut->uu_vps[ut->uu_vpindex] = vp;
-		ut->uu_vpindex++;
 	}
 }
 #endif
+
+
+#if CONFIG_TRIGGERS
+
+#define TRIG_DEBUG 0
+
+#if TRIG_DEBUG
+#define TRIG_LOG(...) do { printf("%s: ", __FUNCTION__); printf(__VA_ARGS__); } while (0)
+#else
+#define TRIG_LOG(...)
+#endif
+
+/*
+ * Resolver result functions
+ */
+
+resolver_result_t
+vfs_resolver_result(uint32_t seq, enum resolver_status stat, int aux)
+{
+	/*
+	 * |<---   32   --->|<---  28  --->|<- 4 ->|
+	 *      sequence        auxiliary    status
+	 */
+	return (((uint64_t)seq) << 32) |		
+	       (((uint64_t)(aux & 0x0fffffff)) << 4) |	
+	       (uint64_t)(stat & 0x0000000F);	
+}
+
+enum resolver_status
+vfs_resolver_status(resolver_result_t result)
+{
+	/* lower 4 bits is status */
+	return (result & 0x0000000F);
+}
+
+uint32_t
+vfs_resolver_sequence(resolver_result_t result)
+{
+	/* upper 32 bits is sequence */
+	return (uint32_t)(result >> 32);
+}
+
+int
+vfs_resolver_auxiliary(resolver_result_t result)
+{
+	/* 28 bits of auxiliary */
+	return (int)(((uint32_t)(result & 0xFFFFFFF0)) >> 4);
+}
+
+/*
+ * SPI
+ * Call in for resolvers to update vnode trigger state
+ */
+int
+vnode_trigger_update(vnode_t vp, resolver_result_t result)
+{
+	vnode_resolve_t rp;
+	uint32_t seq;
+	enum resolver_status stat;
+
+	if (vp->v_resolve == NULL) {
+		return (EINVAL);
+	}
+
+	stat = vfs_resolver_status(result);
+	seq = vfs_resolver_sequence(result);
+
+	if ((stat != RESOLVER_RESOLVED) && (stat != RESOLVER_UNRESOLVED)) {
+		return (EINVAL);
+	}
+
+	rp = vp->v_resolve;
+	lck_mtx_lock(&rp->vr_lock);
+
+	if (seq > rp->vr_lastseq) {
+		if (stat == RESOLVER_RESOLVED)
+			rp->vr_flags |= VNT_RESOLVED;
+		else
+			rp->vr_flags &= ~VNT_RESOLVED;
+
+		rp->vr_lastseq = seq;
+	}
+
+	lck_mtx_unlock(&rp->vr_lock);
+
+	return (0);
+}
+
+static int
+vnode_resolver_attach(vnode_t vp, vnode_resolve_t rp, boolean_t ref)
+{
+	int error;
+
+	vnode_lock_spin(vp);
+	if (vp->v_resolve != NULL) {
+		vnode_unlock(vp);
+		return EINVAL;
+	} else {
+		vp->v_resolve = rp;
+	}
+	vnode_unlock(vp);
+	
+	if (ref) {
+		error = vnode_ref_ext(vp, O_EVTONLY, VNODE_REF_FORCE);
+		if (error != 0) {
+			panic("VNODE_REF_FORCE didn't help...");
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * VFS internal interfaces for vnode triggers
+ *
+ * vnode must already have an io count on entry
+ * v_resolve is stable when io count is non-zero
+ */
+static int
+vnode_resolver_create(mount_t mp, vnode_t vp, struct vnode_trigger_param *tinfo, boolean_t external)
+{
+	vnode_resolve_t rp;
+	int result;
+	char byte;
+
+#if 1
+	/* minimum pointer test (debugging) */
+	if (tinfo->vnt_data)
+		byte = *((char *)tinfo->vnt_data);
+#endif
+	MALLOC(rp, vnode_resolve_t, sizeof(*rp), M_TEMP, M_WAITOK);
+	if (rp == NULL)
+		return (ENOMEM);
+
+	lck_mtx_init(&rp->vr_lock, trigger_vnode_lck_grp, trigger_vnode_lck_attr);
+
+	rp->vr_resolve_func = tinfo->vnt_resolve_func;
+	rp->vr_unresolve_func = tinfo->vnt_unresolve_func;
+	rp->vr_rearm_func = tinfo->vnt_rearm_func;
+	rp->vr_reclaim_func = tinfo->vnt_reclaim_func;
+	rp->vr_data = tinfo->vnt_data;
+	rp->vr_lastseq = 0;
+	rp->vr_flags = tinfo->vnt_flags & VNT_VALID_MASK;
+	if (external) {
+		rp->vr_flags |= VNT_EXTERNAL;
+	}
+
+	result = vnode_resolver_attach(vp, rp, external);
+	if (result != 0) {
+		goto out;
+	}
+
+	if (mp) {
+		OSAddAtomic(1, &mp->mnt_numtriggers);
+	}
+
+	return (result);
+
+out:
+	FREE(rp, M_TEMP);
+	return result;
+}
+
+static void
+vnode_resolver_release(vnode_resolve_t rp)
+{
+	/*
+	 * Give them a chance to free any private data
+	 */
+	if (rp->vr_data && rp->vr_reclaim_func) {
+		rp->vr_reclaim_func(NULLVP, rp->vr_data);
+	}
+
+	lck_mtx_destroy(&rp->vr_lock, trigger_vnode_lck_grp);
+	FREE(rp, M_TEMP);
+
+}
+
+/* Called after the vnode has been drained */
+static void
+vnode_resolver_detach(vnode_t vp)
+{
+	vnode_resolve_t rp;
+	mount_t	mp;
+
+	mp = vnode_mount(vp);
+
+	vnode_lock(vp);
+	rp = vp->v_resolve;
+	vp->v_resolve = NULL;
+	vnode_unlock(vp);
+
+	if ((rp->vr_flags & VNT_EXTERNAL) != 0) {
+		vnode_rele_ext(vp, O_EVTONLY, 1);
+	} 
+
+	vnode_resolver_release(rp);
+	
+	/* Keep count of active trigger vnodes per mount */
+	OSAddAtomic(-1, &mp->mnt_numtriggers);	
+}
+
+/*
+ * Pathname operations that don't trigger a mount for trigger vnodes
+ */
+static const u_int64_t ignorable_pathops_mask =
+	1LL << OP_MOUNT |
+	1LL << OP_UNMOUNT |
+	1LL << OP_STATFS |
+	1LL << OP_ACCESS |
+	1LL << OP_GETATTR |
+	1LL << OP_LISTXATTR;
+
+int
+vfs_istraditionaltrigger(enum path_operation op, const struct componentname *cnp)
+{
+	if (cnp->cn_flags & ISLASTCN)
+		return ((1LL << op) & ignorable_pathops_mask) == 0;
+	else
+		return (1);
+}
+
+__private_extern__
+void
+vnode_trigger_rearm(vnode_t vp, vfs_context_t ctx)
+{
+	vnode_resolve_t rp;
+	resolver_result_t result;
+	enum resolver_status status;
+	uint32_t seq;
+
+	if ((vp->v_resolve == NULL) ||
+	    (vp->v_resolve->vr_rearm_func == NULL) ||
+	    (vp->v_resolve->vr_flags & VNT_AUTO_REARM) == 0) {
+		return;
+	}
+
+	rp = vp->v_resolve;
+	lck_mtx_lock(&rp->vr_lock);
+
+	/*
+	 * Check if VFS initiated this unmount. If so, we'll catch it after the unresolve completes.
+	 */
+	if (rp->vr_flags & VNT_VFS_UNMOUNTED) {
+		lck_mtx_unlock(&rp->vr_lock);
+		return;
+	}
+
+	/* Check if this vnode is already armed */
+	if ((rp->vr_flags & VNT_RESOLVED) == 0) {
+		lck_mtx_unlock(&rp->vr_lock);
+		return;
+	}
+
+	lck_mtx_unlock(&rp->vr_lock);
+
+	result = rp->vr_rearm_func(vp, 0, rp->vr_data, ctx);
+	status = vfs_resolver_status(result);
+	seq = vfs_resolver_sequence(result);
+
+	lck_mtx_lock(&rp->vr_lock);
+	if (seq > rp->vr_lastseq) {
+		if (status == RESOLVER_UNRESOLVED)
+			rp->vr_flags &= ~VNT_RESOLVED;
+		rp->vr_lastseq = seq;
+	}
+	lck_mtx_unlock(&rp->vr_lock);
+}
+
+__private_extern__
+int
+vnode_trigger_resolve(vnode_t vp, struct nameidata *ndp, vfs_context_t ctx)
+{
+	vnode_resolve_t rp;
+	enum path_operation op;
+	resolver_result_t result;
+	enum resolver_status status;
+	uint32_t seq;
+
+	/* Only trigger on topmost vnodes */
+	if ((vp->v_resolve == NULL) ||
+	    (vp->v_resolve->vr_resolve_func == NULL) ||
+	    (vp->v_mountedhere != NULL)) {
+		return (0);
+	}
+
+	rp = vp->v_resolve;
+	lck_mtx_lock(&rp->vr_lock);
+
+	/* Check if this vnode is already resolved */
+	if (rp->vr_flags & VNT_RESOLVED) {
+		lck_mtx_unlock(&rp->vr_lock);
+		return (0);
+	}
+
+	lck_mtx_unlock(&rp->vr_lock);
+
+	/*
+	 * XXX
+	 * assumes that resolver will not access this trigger vnode (otherwise the kernel will deadlock)
+	 * is there anyway to know this???
+	 * there can also be other legitimate lookups in parallel
+	 *
+	 * XXX - should we call this on a separate thread with a timeout?
+	 * 
+	 * XXX - should we use ISLASTCN to pick the op value???  Perhaps only leafs should
+	 * get the richer set and non-leafs should get generic OP_LOOKUP?  TBD
+	 */
+	op = (ndp->ni_op < OP_MAXOP) ? ndp->ni_op: OP_LOOKUP;
+
+	result = rp->vr_resolve_func(vp, &ndp->ni_cnd, op, 0, rp->vr_data, ctx);
+	status = vfs_resolver_status(result);
+	seq = vfs_resolver_sequence(result);
+
+	lck_mtx_lock(&rp->vr_lock);
+	if (seq > rp->vr_lastseq) {
+		if (status == RESOLVER_RESOLVED)
+			rp->vr_flags |= VNT_RESOLVED;
+		rp->vr_lastseq = seq;
+	}
+	lck_mtx_unlock(&rp->vr_lock);
+
+	/* On resolver errors, propagate the error back up */
+	return (status == RESOLVER_ERROR ? vfs_resolver_auxiliary(result) : 0);
+}
+
+static int
+vnode_trigger_unresolve(vnode_t vp, int flags, vfs_context_t ctx)
+{
+	vnode_resolve_t rp;
+	resolver_result_t result;
+	enum resolver_status status;
+	uint32_t seq;
+
+	if ((vp->v_resolve == NULL) || (vp->v_resolve->vr_unresolve_func == NULL)) {
+		return (0);
+	}
+
+	rp = vp->v_resolve;
+	lck_mtx_lock(&rp->vr_lock);
+
+	/* Check if this vnode is already resolved */
+	if ((rp->vr_flags & VNT_RESOLVED) == 0) {
+		printf("vnode_trigger_unresolve: not currently resolved\n");
+		lck_mtx_unlock(&rp->vr_lock);
+		return (0);
+	}
+
+	rp->vr_flags |= VNT_VFS_UNMOUNTED;
+
+	lck_mtx_unlock(&rp->vr_lock);
+
+	/*
+	 * XXX
+	 * assumes that resolver will not access this trigger vnode (otherwise the kernel will deadlock)
+	 * there can also be other legitimate lookups in parallel
+	 *
+	 * XXX - should we call this on a separate thread with a timeout?
+	 */
+
+	result = rp->vr_unresolve_func(vp, flags, rp->vr_data, ctx);
+	status = vfs_resolver_status(result);
+	seq = vfs_resolver_sequence(result);
+
+	lck_mtx_lock(&rp->vr_lock);
+	if (seq > rp->vr_lastseq) {
+		if (status == RESOLVER_UNRESOLVED)
+			rp->vr_flags &= ~VNT_RESOLVED;
+		rp->vr_lastseq = seq;
+	}
+	rp->vr_flags &= ~VNT_VFS_UNMOUNTED;
+	lck_mtx_unlock(&rp->vr_lock);
+
+	/* On resolver errors, propagate the error back up */
+	return (status == RESOLVER_ERROR ? vfs_resolver_auxiliary(result) : 0);
+}
+
+static int
+triggerisdescendant(mount_t mp, mount_t rmp)
+{
+	int match = FALSE;
+
+	/*
+	 * walk up vnode covered chain looking for a match
+	 */
+	name_cache_lock_shared();
+
+	while (1) {
+		vnode_t vp;
+
+		/* did we encounter "/" ? */
+		if (mp->mnt_flag & MNT_ROOTFS)
+			break;
+
+		vp = mp->mnt_vnodecovered;
+		if (vp == NULLVP)
+			break;
+
+		mp = vp->v_mount;
+		if (mp == rmp) {
+			match = TRUE;
+			break;
+		}
+	}
+
+	name_cache_unlock();
+
+	return (match);
+}
+
+struct trigger_unmount_info {
+	vfs_context_t	ctx;
+	mount_t		top_mp;
+	vnode_t		trigger_vp;
+	mount_t		trigger_mp;
+	uint32_t	trigger_vid;
+	int		flags;
+};
+
+static int
+trigger_unmount_callback(mount_t mp, void * arg)
+{
+	struct trigger_unmount_info * infop = (struct trigger_unmount_info *)arg;
+	boolean_t mountedtrigger = FALSE;
+
+	/*
+	 * When we encounter the top level mount we're done
+	 */
+	if (mp == infop->top_mp)
+		return (VFS_RETURNED_DONE);
+
+	if ((mp->mnt_vnodecovered == NULL) ||
+	    (vnode_getwithref(mp->mnt_vnodecovered) != 0)) {
+		return (VFS_RETURNED);
+	}
+
+	if ((mp->mnt_vnodecovered->v_mountedhere == mp) &&
+	    (mp->mnt_vnodecovered->v_resolve != NULL) &&
+	    (mp->mnt_vnodecovered->v_resolve->vr_flags & VNT_RESOLVED)) {
+		mountedtrigger = TRUE;
+	}
+	vnode_put(mp->mnt_vnodecovered);
+
+	/*
+	 * When we encounter a mounted trigger, check if its under the top level mount
+	 */
+	if ( !mountedtrigger || !triggerisdescendant(mp, infop->top_mp) )
+		return (VFS_RETURNED);
+
+	/*
+	 * Process any pending nested mount (now that its not referenced)
+	 */
+	if ((infop->trigger_vp != NULLVP) &&
+	    (vnode_getwithvid(infop->trigger_vp, infop->trigger_vid) == 0)) {
+		vnode_t vp = infop->trigger_vp;
+		int error;
+
+		infop->trigger_vp = NULLVP;
+		
+		if (mp == vp->v_mountedhere) {
+			vnode_put(vp);
+			printf("trigger_unmount_callback: unexpected match '%s'\n",
+				mp->mnt_vfsstat.f_mntonname);
+			return (VFS_RETURNED);
+		}
+		if (infop->trigger_mp != vp->v_mountedhere) {
+			vnode_put(vp);
+			printf("trigger_unmount_callback: trigger mnt changed! (%p != %p)\n",
+				infop->trigger_mp, vp->v_mountedhere);
+			goto savenext;
+		}
+
+		error = vnode_trigger_unresolve(vp, infop->flags, infop->ctx);
+		vnode_put(vp);
+		if (error) {
+			printf("unresolving: '%s', err %d\n",
+				vp->v_mountedhere ? vp->v_mountedhere->mnt_vfsstat.f_mntonname :
+				"???", error);
+			return (VFS_RETURNED_DONE); /* stop iteration on errors */
+		}
+	}
+savenext:
+	/*
+	 * We can't call resolver here since we hold a mount iter
+	 * ref on mp so save its covered vp for later processing
+	 */
+	infop->trigger_vp = mp->mnt_vnodecovered;
+	if ((infop->trigger_vp != NULLVP) &&
+	    (vnode_getwithref(infop->trigger_vp) == 0)) {
+		if (infop->trigger_vp->v_mountedhere == mp) {
+			infop->trigger_vid = infop->trigger_vp->v_id;
+			infop->trigger_mp = mp;
+		}
+		vnode_put(infop->trigger_vp);
+	}
+
+	return (VFS_RETURNED);
+}
+
+/*
+ * Attempt to unmount any trigger mounts nested underneath a mount.
+ * This is a best effort attempt and no retries are performed here.
+ *
+ * Note: mp->mnt_rwlock is held exclusively on entry (so be carefull)
+ */
+__private_extern__
+void
+vfs_nested_trigger_unmounts(mount_t mp, int flags, vfs_context_t ctx)
+{
+	struct trigger_unmount_info info;
+
+	/* Must have trigger vnodes */
+	if (mp->mnt_numtriggers == 0) {
+		return;
+	}
+	/* Avoid recursive requests (by checking covered vnode) */
+	if ((mp->mnt_vnodecovered != NULL) &&
+	    (vnode_getwithref(mp->mnt_vnodecovered) == 0)) {
+		boolean_t recursive = FALSE;
+
+		if ((mp->mnt_vnodecovered->v_mountedhere == mp) &&
+		    (mp->mnt_vnodecovered->v_resolve != NULL) &&
+		    (mp->mnt_vnodecovered->v_resolve->vr_flags & VNT_VFS_UNMOUNTED)) {
+			recursive = TRUE;
+		}
+		vnode_put(mp->mnt_vnodecovered);
+		if (recursive)
+			return;
+	}
+
+	/*
+	 * Attempt to unmount any nested trigger mounts (best effort)
+	 */
+	info.ctx = ctx;
+	info.top_mp = mp;
+	info.trigger_vp = NULLVP;
+	info.trigger_vid = 0;
+	info.trigger_mp = NULL;
+	info.flags = flags;
+
+	(void) vfs_iterate(VFS_ITERATE_TAIL_FIRST, trigger_unmount_callback, &info);
+
+	/*
+	 * Process remaining nested mount (now that its not referenced)
+	 */
+	if ((info.trigger_vp != NULLVP) &&
+	    (vnode_getwithvid(info.trigger_vp, info.trigger_vid) == 0)) {
+		vnode_t vp = info.trigger_vp;
+
+		if (info.trigger_mp == vp->v_mountedhere) {
+			(void) vnode_trigger_unresolve(vp, flags, ctx);
+		}
+		vnode_put(vp);
+	}
+}
+
+int
+vfs_addtrigger(mount_t mp, const char *relpath, struct vnode_trigger_info *vtip, vfs_context_t ctx)
+{
+	struct nameidata nd;
+	int res;
+	vnode_t rvp, vp;
+	struct vnode_trigger_param vtp;
+	
+	/* 
+	 * Must be called for trigger callback, wherein rwlock is held 
+	 */
+	lck_rw_assert(&mp->mnt_rwlock, LCK_RW_ASSERT_HELD);
+
+	TRIG_LOG("Adding trigger at %s\n", relpath);
+	TRIG_LOG("Trying VFS_ROOT\n");
+
+	/* 
+	 * We do a lookup starting at the root of the mountpoint, unwilling
+	 * to cross into other mountpoints.
+	 */
+	res = VFS_ROOT(mp, &rvp, ctx);
+	if (res != 0) {
+		goto out;
+	}
+
+	TRIG_LOG("Trying namei\n");
+
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP | NOCROSSMOUNT | FOLLOW, UIO_SYSSPACE,
+		CAST_USER_ADDR_T(relpath), ctx);
+	nd.ni_dvp = rvp;
+	res = namei(&nd);
+	if (res != 0) {
+		vnode_put(rvp);
+		goto out;
+	}
+	
+	vp = nd.ni_vp;
+	nameidone(&nd);
+	vnode_put(rvp);
+
+	TRIG_LOG("Trying vnode_resolver_create()\n");
+
+	/* 
+	 * Set up blob.  vnode_create() takes a larger structure
+	 * with creation info, and we needed something different
+	 * for this case.  One needs to win, or we need to munge both;
+	 * vnode_create() wins.
+	 */
+	bzero(&vtp, sizeof(vtp));
+	vtp.vnt_resolve_func = vtip->vti_resolve_func;
+	vtp.vnt_unresolve_func = vtip->vti_unresolve_func;
+	vtp.vnt_rearm_func = vtip->vti_rearm_func;
+	vtp.vnt_reclaim_func = vtip->vti_reclaim_func;
+	vtp.vnt_reclaim_func = vtip->vti_reclaim_func;
+	vtp.vnt_data = vtip->vti_data;
+	vtp.vnt_flags = vtip->vti_flags;
+
+	res = vnode_resolver_create(mp, vp, &vtp, TRUE);
+	vnode_put(vp);
+out:
+	TRIG_LOG("Returning %d\n", res);
+	return res;
+}
+
+#endif /* CONFIG_TRIGGERS */
diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c
index 02c3c39af..3d9b4591b 100644
--- a/bsd/vfs/vfs_syscalls.c
+++ b/bsd/vfs/vfs_syscalls.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1995-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -92,6 +92,7 @@
 #include <sys/quota.h>
 #include <sys/kdebug.h>
 #include <sys/fsevents.h>
+#include <sys/imgsrc.h>
 #include <sys/sysproto.h>
 #include <sys/xattr.h>
 #include <sys/fcntl.h>
@@ -101,7 +102,6 @@
 #include <machine/cons.h>
 #include <machine/limits.h>
 #include <miscfs/specfs/specdev.h>
-#include <miscfs/union/union.h>
 
 #include <security/audit/audit.h>
 #include <bsm/audit_kevents.h>
@@ -109,6 +109,7 @@
 #include <mach/mach_types.h>
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
+#include <kern/task.h>
 
 #include <vm/vm_pageout.h>
 
@@ -153,15 +154,21 @@ static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
 			user_addr_t bufp);
 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
+static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
+                        struct componentname *cnp, user_addr_t fsmountargs,
+                        int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
+                        vfs_context_t ctx);
+void vfs_notify_mount(vnode_t pdvp);
+
+int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
 
 #ifdef CONFIG_IMGSRC_ACCESS
-static int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname);
 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
 static void mount_end_update(mount_t mp);
-static int relocate_imageboot_source(vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs);
+static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
 #endif /* CONFIG_IMGSRC_ACCESS */
 
 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
@@ -220,6 +227,60 @@ extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
  * Virtual File System System Calls
  */
 
+#if NFSCLIENT
+/*
+ * Private in-kernel mounting spi (NFS only, not exported)
+ */
+ __private_extern__
+boolean_t
+vfs_iskernelmount(mount_t mp)
+{
+	return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
+}
+
+ __private_extern__
+int
+kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
+             void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
+{
+	struct nameidata nd;
+	boolean_t did_namei;
+	int error;
+
+	NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, 
+	       UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
+
+	/*
+	 * Get the vnode to be covered if it's not supplied
+	 */
+	if (vp == NULLVP) {
+		error = namei(&nd);
+		if (error)
+			return (error);
+		vp = nd.ni_vp;
+		pvp = nd.ni_dvp;
+		did_namei = TRUE;
+	} else {
+		char *pnbuf = CAST_DOWN(char *, path);
+
+		nd.ni_cnd.cn_pnbuf = pnbuf;
+		nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
+		did_namei = FALSE;
+	}
+
+	error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
+	                     syscall_flags, kern_flags, NULL, TRUE, ctx);
+
+	if (did_namei) {
+		vnode_put(vp);
+		vnode_put(pvp);
+		nameidone(&nd);
+	}
+
+	return (error);
+}
+#endif /* NFSCLIENT */
+
 /*
  * Mount a file system.
  */
@@ -237,6 +298,13 @@ mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
 	return (__mac_mount(p, &muap, retval));
 }
 
+void
+vfs_notify_mount(vnode_t pdvp) 
+{
+	vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
+	lock_vnode_and_post(pdvp, NOTE_WRITE);
+}
+
 /*
  * __mac_mount:
  *	Mount a file system taking into account MAC label behavior.
@@ -256,10 +324,135 @@ mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
  * Returns:        0                       Success
  *                !0                       Not success
  */
+boolean_t root_fs_upgrade_try = FALSE;
+
 int
 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
 {
-	struct vnode *vp, *pvp;
+	vnode_t pvp, vp;
+	vfs_context_t ctx = vfs_context_current();
+	char fstypename[MFSNAMELEN];
+	struct nameidata nd;
+	size_t dummy=0;
+	char *labelstr = NULL;
+	int flags = uap->flags;
+	int error;
+	boolean_t is_64bit = IS_64BIT_PROCESS(p);
+
+	/*
+	 * Get the fs type name from user space
+	 */
+	error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
+	if (error)
+		return (error);
+
+	/*
+	 * Get the vnode to be covered
+	 */
+	NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, 
+	       UIO_USERSPACE, uap->path, ctx);
+	error = namei(&nd);
+	if (error)
+		return (error);
+	vp = nd.ni_vp;
+	pvp = nd.ni_dvp;
+	
+#ifdef CONFIG_IMGSRC_ACCESS
+	/* Mounting image source cannot be batched with other operations */
+	if (flags == MNT_IMGSRC_BY_INDEX) {
+		error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
+		                                  ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
+		goto out;
+	}
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+#if CONFIG_MACF
+	/*
+	 * Get the label string (if any) from user space
+	 */
+	if (uap->mac_p != USER_ADDR_NULL) {
+		struct user_mac mac;
+		size_t ulen = 0;
+
+		if (is_64bit) {
+			struct user64_mac mac64;
+			error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+			mac.m_buflen = mac64.m_buflen;
+			mac.m_string = mac64.m_string;
+		} else {
+			struct user32_mac mac32;
+			error = copyin(uap->mac_p, &mac32, sizeof(mac32));
+			mac.m_buflen = mac32.m_buflen;
+			mac.m_string = mac32.m_string;
+		}
+		if (error)
+			goto out;
+		if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
+		    (mac.m_buflen < 2)) {
+			error = EINVAL;
+			goto out;
+		}
+		MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
+		error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
+		if (error) {
+			goto out;
+		}
+		AUDIT_ARG(mac_string, labelstr);
+	}
+#endif /* CONFIG_MACF */
+
+	AUDIT_ARG(fflags, flags);
+
+	if ((vp->v_flag & VROOT) &&
+		(vp->v_mount->mnt_flag & MNT_ROOTFS)) {
+			flags |= MNT_UPDATE;
+	/*
+	 * See 7392553 for more details on why this check exists.
+         * Suffice to say: If this check is ON and something tries
+	 * to mount the rootFS RW, we'll turn off the codesign
+	 * bitmap optimization.	 
+	 */	   
+#if CHECK_CS_VALIDATION_BITMAP
+		if ( !(flags & MNT_RDONLY) ) {
+			root_fs_upgrade_try = TRUE;
+		}
+#endif
+	}
+
+	error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
+	                     labelstr, FALSE, ctx);
+out:
+#if CONFIG_MACF
+	if (labelstr)
+		FREE(labelstr, M_MACTEMP);
+#endif /* CONFIG_MACF */
+
+	vnode_put(vp);
+	vnode_put(pvp);
+	nameidone(&nd);
+
+	return (error);
+}
+
+/*
+ * common mount implementation (final stage of mounting)
+ 
+ * Arguments:
+ *  fstypename	file system type (ie it's vfs name)
+ *  pvp		parent of covered vnode
+ *  vp		covered vnode
+ *  cnp		component name (ie path) of covered vnode
+ *  flags	generic mount flags
+ *  fsmountargs	file system specific data
+ *  labelstr	optional MAC label
+ *  kernelmount	TRUE for mounts initiated from inside the kernel
+ *  ctx		caller's context
+ */
+static int
+mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
+             struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
+             char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
+{
 	struct vnode *devvp = NULLVP;
 	struct vnode *device_vnode = NULLVP;
 #if CONFIG_MACF
@@ -267,57 +460,20 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 #endif
 	struct mount *mp;
 	struct vfstable *vfsp = (struct vfstable *)0;
+	struct proc *p = vfs_context_proc(ctx);
 	int error, flag = 0;
-	struct vnode_attr va;
-	vfs_context_t ctx = vfs_context_current();
-	struct nameidata nd;
-	struct nameidata nd1;
-	char fstypename[MFSNAMELEN];
-	size_t dummy=0;
 	user_addr_t devpath = USER_ADDR_NULL;
-	user_addr_t fsmountargs =  uap->data;
 	int ronly = 0;
 	int mntalloc = 0;
 	boolean_t vfsp_ref = FALSE;
-	mode_t accessmode;
-	boolean_t is_64bit;
 	boolean_t is_rwlock_locked = FALSE;
 	boolean_t did_rele = FALSE;
 	boolean_t have_usecount = FALSE;
 
-	AUDIT_ARG(fflags, uap->flags);
-
-	is_64bit = proc_is64bit(p);
-
 	/*
-	 * Get vnode to be covered
+	 * Process an update for an existing mount
 	 */
-	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, 
-		   UIO_USERSPACE, uap->path, ctx);
-	error = namei(&nd);
-	if (error)
-		return (error);
-	vp = nd.ni_vp;
-	pvp = nd.ni_dvp;
-	
-	if ((vp->v_flag & VROOT) &&
-		(vp->v_mount->mnt_flag & MNT_ROOTFS)) 
-			uap->flags |= MNT_UPDATE;
-
-	error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
-	if (error)
-		goto out1;
-	
-#ifdef CONFIG_IMGSRC_ACCESS
-	if (uap->flags == MNT_IMGSRC) {
-		error = relocate_imageboot_source(vp, &nd.ni_cnd, fstypename, ctx, is_64bit, fsmountargs);
-		vnode_put(pvp);
-		vnode_put(vp);
-		return error;
-	}
-#endif /* CONFIG_IMGSRC_ACCESS */
-
-	if (uap->flags & MNT_UPDATE) {
+	if (flags & MNT_UPDATE) {
 		if ((vp->v_flag & VROOT) == 0) {
 			error = EINVAL;
 			goto out1;
@@ -338,7 +494,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 		 * We only allow the filesystem to be reloaded if it
 		 * is currently mounted read-only.
 		 */
-		if ((uap->flags & MNT_RELOAD) &&
+		if ((flags & MNT_RELOAD) &&
 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 			error = ENOTSUP;
 			goto out1;
@@ -347,8 +503,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 #ifdef CONFIG_IMGSRC_ACCESS 
 		/* Can't downgrade the backer of the root FS */
 		if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
-			(!vfs_isrdonly(mp)) && (uap->flags & MNT_RDONLY))
-		{
+			(!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
 			error = ENOTSUP;
 			goto out1;
 		}
@@ -365,7 +520,6 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 #if CONFIG_MACF
 		error = mac_mount_check_remount(ctx, mp);
 		if (error != 0) {
-			lck_rw_done(&mp->mnt_rwlock);
 			goto out1;
 		}
 #endif
@@ -373,48 +527,26 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 		 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
 		 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
 		 */
-		if (suser(vfs_context_ucred(ctx), NULL)) {
-			uap->flags |= MNT_NOSUID | MNT_NODEV;
+		if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
+			flags |= MNT_NOSUID | MNT_NODEV;
 			if (mp->mnt_flag & MNT_NOEXEC)
-				uap->flags |= MNT_NOEXEC;
+				flags |= MNT_NOEXEC;
 		}
 		flag = mp->mnt_flag;
 
-		mp->mnt_flag |=
-		    uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 
 		vfsp = mp->mnt_vtable;
 		goto update;
 	}
-	/*
-	 * If the user is not root, ensure that they own the directory
-	 * onto which we are attempting to mount.
-	 */
-	VATTR_INIT(&va);
-	VATTR_WANTED(&va, va_uid);
-	if ((error = vnode_getattr(vp, &va, ctx)) ||
-	    (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
-	     (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
-		goto out1;
-	}
 	/*
 	 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
 	 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
 	 */
-	if (suser(vfs_context_ucred(ctx), NULL)) {
-		uap->flags |= MNT_NOSUID | MNT_NODEV;
+	if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
+		flags |= MNT_NOSUID | MNT_NODEV;
 		if (vp->v_mount->mnt_flag & MNT_NOEXEC)
-			uap->flags |= MNT_NOEXEC;
-	}
-	if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
-		goto out1;
-
-	if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
-		goto out1;
-
-	if (vp->v_type != VDIR) {
-		error = ENOTDIR;
-		goto out1;
+			flags |= MNT_NOEXEC;
 	}
 
 	/* XXXAUDIT: Should we capture the type on the error path as well? */
@@ -431,22 +563,22 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 		error = ENODEV;
 		goto out1;
 	}
-#if CONFIG_MACF
-	error = mac_mount_check_mount(ctx, vp,
-	    &nd.ni_cnd, vfsp->vfc_name);
-	if (error != 0)
+
+	/*
+	 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
+	 */
+	if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
+		error = EINVAL;  /* unsupported request */
 		goto out1;
-#endif
-	if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
-		error = EBUSY;
+	}
+
+	error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
+	if (error != 0) {
 		goto out1;
 	}
-	vnode_lock_spin(vp);
-	SET(vp->v_flag, VMOUNT);
-	vnode_unlock(vp);
 
 	/*
-	 * Allocate and initialize the filesystem.
+	 * Allocate and initialize the filesystem (mount_t)
 	 */
 	MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
 		M_MOUNT, M_WAITOK);
@@ -477,35 +609,50 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3
 	//mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
-	strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+	strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
 	mp->mnt_vnodecovered = vp;
 	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
-	mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+	mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
+	mp->mnt_devbsdunit = 0;
 
 	/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
 	vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
-	
+
+#if NFSCLIENT
+	if (kernelmount)
+		mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
+	if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
+		mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
+#endif /* NFSCLIENT */
+
 update:
 	/*
 	 * Set the mount level flags.
 	 */
-	if (uap->flags & MNT_RDONLY)
+	if (flags & MNT_RDONLY)
 		mp->mnt_flag |= MNT_RDONLY;
-	else if (mp->mnt_flag & MNT_RDONLY)
+	else if (mp->mnt_flag & MNT_RDONLY) {
+		// disallow read/write upgrades of file systems that
+		// had the TYPENAME_OVERRIDE feature set.
+		if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+			error = EPERM;
+			goto out1;
+		}
 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
-
+	}
 	mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 			  MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
-			  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
-			  MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE | MNT_CPROTECT );
-
-	mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC |	MNT_NODEV |
-				      MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
-				      MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED | 
-					  MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE | MNT_CPROTECT );
+			  MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
+			  MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+			  MNT_QUARANTINE | MNT_CPROTECT);
+	mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+				 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
+				 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
+				 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
+				 MNT_QUARANTINE | MNT_CPROTECT);
 
 #if CONFIG_MACF
-	if (uap->flags & MNT_MULTILABEL) {
+	if (flags & MNT_MULTILABEL) {
 		if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
 			error = EINVAL;
 			goto out1;
@@ -513,9 +660,11 @@ update:
 		mp->mnt_flag |= MNT_MULTILABEL;
 	}
 #endif
-
+	/*
+	 * Process device path for local file systems if requested
+	 */
 	if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
-		if (is_64bit) {
+		if (vfs_context_is64bit(ctx)) {
 			if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
 				goto out1;	
 			fsmountargs += sizeof(devpath);
@@ -528,16 +677,18 @@ update:
 			fsmountargs += sizeof(tmp);
 		}
 
-		/* if it is not update and device name needs to be parsed */
+		/* Lookup device and authorize access to it */
 		if ((devpath)) {
-			NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
-			if ( (error = namei(&nd1)) )
+			struct nameidata nd;
+
+			NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
+			if ( (error = namei(&nd)) )
 				goto out1;
 
-			strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
-			devvp = nd1.ni_vp;
+			strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+			devvp = nd.ni_vp;
 
-			nameidone(&nd1);
+			nameidone(&nd);
 
 			if (devvp->v_type != VBLK) {
 				error = ENOTBLK;
@@ -552,14 +703,16 @@ update:
 			* permissions on the device.
 			*/
 			if (suser(vfs_context_ucred(ctx), NULL) != 0) {
-				accessmode = KAUTH_VNODE_READ_DATA;
+				mode_t accessmode = KAUTH_VNODE_READ_DATA;
+
 				if ((mp->mnt_flag & MNT_RDONLY) == 0)
 					accessmode |= KAUTH_VNODE_WRITE_DATA;
 				if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
 					goto out2;
 			}
 		}
-		if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
+		/* On first mount, preflight and open device */
+		if (devpath && ((flags & MNT_UPDATE) == 0)) {
 			if ( (error = vnode_ref(devvp)) )
 				goto out2;
 			/*
@@ -595,114 +748,75 @@ update:
 
 			mp->mnt_devvp = devvp;
 			device_vnode = devvp;
-		} else {
-			if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
-				dev_t dev;
-				int maj;
-				/*
-				 * If upgrade to read-write by non-root, then verify
-				 * that user has necessary permissions on the device.
-				 */
-				device_vnode = mp->mnt_devvp;
-
-				if (device_vnode) {
-					vnode_getalways(device_vnode);
 
-					if (suser(vfs_context_ucred(ctx), NULL)) {
-						if ((error = vnode_authorize(device_vnode, NULL, 
-										KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
-							vnode_put(device_vnode);
-							goto out2;
-						}
-					}
+		} else if ((mp->mnt_flag & MNT_RDONLY) &&
+		           (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
+		           (device_vnode = mp->mnt_devvp)) {
+			dev_t dev;
+			int maj;
+			/*
+			 * If upgrade to read-write by non-root, then verify
+			 * that user has necessary permissions on the device.
+			 */
+			vnode_getalways(device_vnode);
 
-					/* Tell the device that we're upgrading */
-					dev = (dev_t)device_vnode->v_rdev;
-					maj = major(dev);
+			if (suser(vfs_context_ucred(ctx), NULL) &&
+			    (error = vnode_authorize(device_vnode, NULL, 
+			     KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
+			     ctx)) != 0) {
+				vnode_put(device_vnode);
+				goto out2;
+			}
 
-					if ((u_int)maj >= (u_int)nblkdev)
-						panic("Volume mounted on a device with invalid major number.\n");
+			/* Tell the device that we're upgrading */
+			dev = (dev_t)device_vnode->v_rdev;
+			maj = major(dev);
 
-					error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
+			if ((u_int)maj >= (u_int)nblkdev)
+				panic("Volume mounted on a device with invalid major number.");
 
-					vnode_put(device_vnode);
-					if (error != 0) {
-						goto out2;
-					}
-				}
-			}
+			error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
+			vnode_put(device_vnode);
 			device_vnode = NULLVP;
+			if (error != 0) {
+				goto out2;
+			}
 		}
 	}
 #if CONFIG_MACF
-	if ((uap->flags & MNT_UPDATE) == 0) {
+	if ((flags & MNT_UPDATE) == 0) {
 		mac_mount_label_init(mp);
 		mac_mount_label_associate(ctx, mp);
 	}
-	if (uap->mac_p != USER_ADDR_NULL) {
-		struct user_mac mac;
-		char *labelstr = NULL;
-		size_t ulen = 0;
-
-		if ((uap->flags & MNT_UPDATE) != 0) {
-			error = mac_mount_check_label_update(
-			    ctx, mp);
+	if (labelstr) {
+		if ((flags & MNT_UPDATE) != 0) {
+			error = mac_mount_check_label_update(ctx, mp);
 			if (error != 0)
 				goto out3;
 		}
-		if (is_64bit) {
-			error = copyin(uap->mac_p, &mac, sizeof(mac));
-		} else {
-			struct mac mac32;
-			error = copyin(uap->mac_p, &mac32, sizeof(mac32));
-			mac.m_buflen = mac32.m_buflen;
-			mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
-		}
-		if (error != 0)
-			goto out3;
-		if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
-		    (mac.m_buflen < 2)) {
-			error = EINVAL;
-			goto out3;
-		}
-		MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
-		error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
-		if (error != 0) {
-			FREE(labelstr, M_MACTEMP);
-			goto out3;
-		}
-		AUDIT_ARG(mac_string, labelstr);
-		error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
-		FREE(labelstr, M_MACTEMP);
-		if (error != 0)
-			goto out3;
 	}
 #endif
-	if (device_vnode != NULL) {
-		VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
-		mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
-	}
-
 	/*
 	 * Mount the filesystem.
 	 */
 	error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
 
-	if (uap->flags & MNT_UPDATE) {
+	if (flags & MNT_UPDATE) {
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
 		mp->mnt_flag &=~
 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error)
-			mp->mnt_flag = flag;
+			mp->mnt_flag = flag;  /* restore flag value */
 		vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
 		lck_rw_done(&mp->mnt_rwlock);
 		is_rwlock_locked = FALSE;
 		if (!error)
 			enablequotas(mp, ctx);
-		goto out2;
+		goto exit;
 	}
+
 	/*
 	 * Put the new filesystem on the mount list after root.
 	 */
@@ -761,11 +875,14 @@ update:
 		 */
 		(void)VFS_START(mp, 0, ctx);
 
-		error = mount_list_add(mp);
-		if (error != 0) {
+		if (mount_list_add(mp) != 0) {
+			/*
+			 * The system is shutting down trying to umount
+			 * everything, so fail with a plausible errno.
+			 */
+			error = EBUSY;
 			goto out4;
 		}
-
 		lck_rw_done(&mp->mnt_rwlock);
 		is_rwlock_locked = FALSE;
 
@@ -818,8 +935,14 @@ update:
 		} 
 
 		/* Now that mount is setup, notify the listeners */
-		vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
+		vfs_notify_mount(pvp);
 	} else {
+		/* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
+		if (mp->mnt_vnodelist.tqh_first != NULL) {
+			panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", 
+					mp->mnt_vtable->vfc_name, error);
+		}
+
 		vnode_lock_spin(vp);
 		CLR(vp->v_flag, VMOUNT);
 		vnode_unlock(vp);
@@ -833,45 +956,60 @@ update:
 		}
 		lck_rw_done(&mp->mnt_rwlock);
 		is_rwlock_locked = FALSE;
+		
+		/*
+		 * if we get here, we have a mount structure that needs to be freed,
+		 * but since the coveredvp hasn't yet been updated to point at it,
+		 * no need to worry about other threads holding a crossref on this mp
+		 * so it's ok to just free it
+		 */
 		mount_lock_destroy(mp);
 #if CONFIG_MACF
 		mac_mount_label_destroy(mp);
 #endif
 		FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
 	}
-	nameidone(&nd);
-
+exit:
 	/*
-	 * drop I/O count on covered 'vp' and
-	 * on the device vp if there was one
+	 * drop I/O count on the device vp if there was one
 	 */
 	if (devpath && devvp)
 	        vnode_put(devvp);
-	vnode_put(vp);
-
-	/* Note that we've changed something in the parent directory */
-	post_event_if_success(pvp, error, NOTE_WRITE);
-	vnode_put(pvp);
 
 	return(error);
 
+/* Error condition exits */
 out4:
 	(void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
+	
+	/* 
+	 * If the mount has been placed on the covered vp,
+	 * it may have been discovered by now, so we have
+	 * to treat this just like an unmount
+	 */
+	mount_lock_spin(mp);
+	mp->mnt_lflag |= MNT_LDEAD;
+	mount_unlock(mp);
+
 	if (device_vnode != NULLVP) {
 		vnode_rele(device_vnode);
 		VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
                        ctx);
 		did_rele = TRUE;
 	}
+
 	vnode_lock_spin(vp);
+
+	mp->mnt_crossref++;
 	vp->v_mountedhere = (mount_t) 0;
+
 	vnode_unlock(vp);
-	
+
 	if (have_usecount) {
 		vnode_rele(vp);
 	}
 out3:
-	if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
+	if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
 		vnode_rele(devvp);
 out2:
 	if (devpath && devvp)
@@ -881,47 +1019,50 @@ out1:
 	if (is_rwlock_locked == TRUE) {
 		lck_rw_done(&mp->mnt_rwlock);
 	}
+	
 	if (mntalloc) {
+		if (mp->mnt_crossref)
+			mount_dropcrossref(mp, vp, 0);
+		else {
+			mount_lock_destroy(mp);
 #if CONFIG_MACF
-		mac_mount_label_destroy(mp);
+			mac_mount_label_destroy(mp);
 #endif
-		FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+			FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+		}
 	}
-
 	if (vfsp_ref) {
 		mount_list_lock();
 		vfsp->vfc_refcount--;
 		mount_list_unlock();
 	}
-	vnode_put(vp);
-	vnode_put(pvp);
-	nameidone(&nd);
 
 	return(error);
 }
 
-#ifdef CONFIG_IMGSRC_ACCESS
 /* 
  * Flush in-core data, check for competing mount attempts,
  * and set VMOUNT
  */
-static int
-prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname)
+int
+prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
 {
 	struct vnode_attr va;
 	int error;
 
-	/*
-	 * If the user is not root, ensure that they own the directory
-	 * onto which we are attempting to mount.
-	 */
-	VATTR_INIT(&va);
-	VATTR_WANTED(&va, va_uid);
-	if ((error = vnode_getattr(vp, &va, ctx)) ||
-	    (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
-	     (!vfs_context_issuser(ctx)))) { 
-		error = EPERM;
-		goto out;
+	if (!skip_auth) {
+		/*
+		 * If the user is not root, ensure that they own the directory
+		 * onto which we are attempting to mount.
+		 */
+		VATTR_INIT(&va);
+		VATTR_WANTED(&va, va_uid);
+		if ((error = vnode_getattr(vp, &va, ctx)) ||
+				(va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
+				 (!vfs_context_issuser(ctx)))) { 
+			error = EPERM;
+			goto out;
+		}
 	}
 
 	if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
@@ -955,30 +1096,57 @@ out:
 	return error;
 }
 
+#if CONFIG_IMGSRC_ACCESS
+
+#if DEBUG
+#define IMGSRC_DEBUG(args...) printf(args)
+#else
+#define IMGSRC_DEBUG(args...) do { } while(0)
+#endif 
+
 static int
 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
 {
 	struct nameidata nd;
-	vnode_t vp;
+	vnode_t vp, realdevvp;
 	mode_t accessmode;
 	int error;
 
-	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
-	if ( (error = namei(&nd)) )
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
+	if ( (error = namei(&nd)) ) {
+		IMGSRC_DEBUG("namei() failed with %d\n", error);
 		return error;
+	}
 
-	strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
 	vp = nd.ni_vp;
-	nameidone(&nd);
 
-	if (vp->v_type != VBLK) {
+	if (!vnode_isblk(vp)) {
+		IMGSRC_DEBUG("Not block device.\n");
 		error = ENOTBLK;
 		goto out;
 	}
-	if (major(vp->v_rdev) >= nblkdev) {
+
+	realdevvp = mp->mnt_devvp;
+	if (realdevvp == NULLVP) {
+		IMGSRC_DEBUG("No device backs the mount.\n");
 		error = ENXIO;
 		goto out;
 	}
+
+	error = vnode_getwithref(realdevvp);
+	if (error != 0) {
+		IMGSRC_DEBUG("Coudn't get iocount on device.\n");
+		goto out;
+	}
+
+	if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
+		IMGSRC_DEBUG("Wrong dev_t.\n");
+		error = ENXIO;
+		goto out1;
+	}
+
+	strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
+
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
@@ -987,12 +1155,18 @@ authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_
 		accessmode = KAUTH_VNODE_READ_DATA;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= KAUTH_VNODE_WRITE_DATA;
-		if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0)
-			goto out;
+		if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
+			IMGSRC_DEBUG("Access denied.\n");
+			goto out1;
+		}
 	}
 
 	*devvpp = vp;
+
+out1:
+	vnode_put(realdevvp);
 out:
+	nameidone(&nd);
 	if (error) {
 		vnode_put(vp);
 	}
@@ -1113,20 +1287,41 @@ mount_end_update(mount_t mp)
 }
 
 static int
-relocate_imageboot_source(vnode_t vp, struct componentname *cnp, 
+get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
+{
+	vnode_t vp;
+
+	if (height >= MAX_IMAGEBOOT_NESTING) {
+		return EINVAL;
+	}
+
+	vp = imgsrc_rootvnodes[height];
+	if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
+		*rvpp = vp;
+		return 0;
+	} else {
+		return ENOENT;
+	}
+}
+
+static int
+relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, 
 		const char *fsname, vfs_context_t ctx, 
-		boolean_t is64bit, user_addr_t fsmountargs)
+		boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
 {
 	int error;
 	mount_t mp;
 	boolean_t placed = FALSE;
-	vnode_t devvp;
+	vnode_t devvp = NULLVP;
 	struct vfstable *vfsp;
 	user_addr_t devpath;
 	char *old_mntonname;
+	vnode_t rvp;
+	uint32_t height;
+	uint32_t flags;
 
 	/* If we didn't imageboot, nothing to move */
-	if (imgsrc_rootvnode == NULLVP) {
+	if (imgsrc_rootvnodes[0] == NULLVP) {
 		return EINVAL;
 	}
 
@@ -1135,23 +1330,84 @@ relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
 		return EPERM;
 	}
 
-	error = vnode_get(imgsrc_rootvnode);
+	IMGSRC_DEBUG("looking for root vnode.\n");
+
+	/*
+	 * Get root vnode of filesystem we're moving.
+	 */
+	if (by_index) {
+		if (is64bit) {
+			struct user64_mnt_imgsrc_args mia64;
+			error = copyin(fsmountargs, &mia64, sizeof(mia64));
+			if (error != 0) {
+				IMGSRC_DEBUG("Failed to copy in arguments.\n");
+				return error;
+			}
+
+			height = mia64.mi_height;
+			flags = mia64.mi_flags;
+			devpath = mia64.mi_devpath;
+		} else {
+			struct user32_mnt_imgsrc_args mia32;
+			error = copyin(fsmountargs, &mia32, sizeof(mia32));
+			if (error != 0) {
+				IMGSRC_DEBUG("Failed to copy in arguments.\n");
+				return error;
+			}
+
+			height = mia32.mi_height;
+			flags = mia32.mi_flags;
+			devpath = mia32.mi_devpath;
+		}
+	} else {
+		/*
+		 * For binary compatibility--assumes one level of nesting.
+		 */
+		if (is64bit) {
+			if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
+				return error;
+		} else {
+			user32_addr_t tmp;
+			if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
+				return error;
+
+			/* munge into LP64 addr */
+			devpath = CAST_USER_ADDR_T(tmp);
+		}
+
+		height = 0;
+		flags = 0;
+	}
+
+	if (flags != 0) {
+		IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
+		return EINVAL;
+	}
+
+	error = get_imgsrc_rootvnode(height, &rvp);
 	if (error != 0) {
+		IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
 		return error;
 	}
 
+	IMGSRC_DEBUG("got root vnode.\n");
+
 	MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
 
 	/* Can only move once */
-	mp = vnode_mount(imgsrc_rootvnode);
+	mp = vnode_mount(rvp);
 	if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+		IMGSRC_DEBUG("Already moved.\n");
 		error = EBUSY;
 		goto out0;
 	}
 
+	IMGSRC_DEBUG("Starting updated.\n");
+
 	/* Get exclusive rwlock on mount, authorize update on mp */
 	error = mount_begin_update(mp , ctx, 0);
 	if (error != 0) {
+		IMGSRC_DEBUG("Starting updated failed with %d\n", error);
 		goto out0;
 	}
 
@@ -1160,40 +1416,38 @@ relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
 	 * so we're now safe to proceed.
 	 */
 	if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
+		IMGSRC_DEBUG("Already moved [2]\n");
 		goto out1;
 	}
+		
+	
+	IMGSRC_DEBUG("Preparing coveredvp.\n");
 
 	/* Mark covered vnode as mount in progress, authorize placing mount on top */
-	error = prepare_coveredvp(vp, ctx, cnp, fsname);
+	error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
 	if (error != 0) {
+		IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
 		goto out1;
 	}
 	
+	IMGSRC_DEBUG("Covered vp OK.\n");
+
 	/* Sanity check the name caller has provided */
 	vfsp = mp->mnt_vtable;
 	if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
+		IMGSRC_DEBUG("Wrong fs name.\n");
 		error = EINVAL;
 		goto out2;
 	}
 
 	/* Check the device vnode and update mount-from name, for local filesystems */
 	if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
-		if (is64bit) {
-			if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
-				goto out2;	
-			fsmountargs += sizeof(devpath);
-		} else {
-			user32_addr_t tmp;
-			if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
-				goto out2;	
-			/* munge into LP64 addr */
-			devpath = CAST_USER_ADDR_T(tmp);
-			fsmountargs += sizeof(tmp);
-		}
+		IMGSRC_DEBUG("Local, doing device validation.\n");
 
 		if (devpath != USER_ADDR_NULL) {
 			error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
 			if (error) {
+				IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
 				goto out2;
 			}
 
@@ -1205,6 +1459,8 @@ relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
 	 * Place mp on top of vnode, ref the vnode,  call checkdirs(),
 	 * and increment the name cache's mount generation 
 	 */
+
+	IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
 	error = place_mount_and_checkdirs(mp, vp, ctx);
 	if (error != 0) {
 		goto out2;
@@ -1221,15 +1477,21 @@ relocate_imageboot_source(vnode_t vp, struct componentname *cnp,
 	mount_unlock(mp);
 
 	/* Finally, add to mount list, completely ready to go */
-	error = mount_list_add(mp);
-	if (error != 0) {
+	if (mount_list_add(mp) != 0) {
+		/*
+		 * The system is shutting down trying to umount
+		 * everything, so fail with a plausible errno.
+		 */
+		error = EBUSY;
 		goto out3;
 	}
 
 	mount_end_update(mp);
-	vnode_put(imgsrc_rootvnode);
+	vnode_put(rvp);
 	FREE(old_mntonname, M_TEMP);
 
+	vfs_notify_mount(pvp);
+
 	return 0;
 out3:
 	strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
@@ -1255,7 +1517,7 @@ out1:
 	mount_end_update(mp);
 
 out0:
-	vnode_put(imgsrc_rootvnode);
+	vnode_put(rvp);
 	FREE(old_mntonname, M_TEMP);
 	return error;
 }
@@ -1282,7 +1544,8 @@ enablequotas(struct mount *mp, vfs_context_t ctx)
 	 */
 	for (type=0; type < MAXQUOTAS; type++) {
 		snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
-		NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
+		NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
+		       CAST_USER_ADDR_T(qfpath), ctx);
 		if (namei(&qnd) != 0)
 			continue; 	    /* option file to trigger quotas is not present */
 		vnode_put(qnd.ni_vp);
@@ -1410,7 +1673,7 @@ unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -1466,13 +1729,18 @@ safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
 	proc_t p = vfs_context_proc(ctx);
 
 	/*
-	 * Only root, or the user that did the original mount is
-	 * permitted to unmount this filesystem.
+	 * Skip authorization if the mount is tagged as permissive and 
+	 * this is not a forced-unmount attempt.
 	 */
-	if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
-	    (error = suser(kauth_cred_get(), &p->p_acflag)))
-		goto out;
-
+	if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
+		/*
+		 * Only root, or the user that did the original mount is
+		 * permitted to unmount this filesystem.
+		 */
+		if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
+				(error = suser(kauth_cred_get(), &p->p_acflag)))
+			goto out;
+	}
 	/*
 	 * Don't allow unmounting the root file system.
 	 */
@@ -1507,9 +1775,13 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
 	int forcedunmount = 0;
 	int lflags = 0;
 	struct vnode *devvp = NULLVP;
+#if CONFIG_TRIGGERS
+	int did_vflush = 0;
+#endif /* CONFIG_TRIGGERS */
 
 	if (flags & MNT_FORCE)
 		forcedunmount = 1;
+
 	mount_lock(mp);
 	/* XXX post jaguar fix LK_DRAIN - then clean this up */
 	if ((flags & MNT_FORCE)) {
@@ -1572,7 +1844,11 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
 			}
 		}
 	}
-	
+
+#if CONFIG_TRIGGERS
+	vfs_nested_trigger_unmounts(mp, flags, ctx);
+	did_vflush = 1;
+#endif	
 	if (forcedunmount)
 		lflags |= FORCECLOSE;
 	error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM  | SKIPROOT | lflags);
@@ -1614,14 +1890,17 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
 	lck_rw_done(&mp->mnt_rwlock);
 	mount_list_remove(mp);
 	lck_rw_lock_exclusive(&mp->mnt_rwlock);
-	
+
 	/* mark the mount point hook in the vp but not drop the ref yet */
 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
-			vnode_getwithref(coveredvp);
-			vnode_lock_spin(coveredvp);
-			coveredvp->v_mountedhere = (struct mount *)0;
-			vnode_unlock(coveredvp);
-			vnode_put(coveredvp);
+		vnode_getwithref(coveredvp);
+		vnode_lock_spin(coveredvp);
+
+		mp->mnt_crossref++;
+		coveredvp->v_mountedhere = (struct mount *)0;
+
+		vnode_unlock(coveredvp);
+		vnode_put(coveredvp);
 	}
 
 	mount_list_lock();
@@ -1650,11 +1929,33 @@ out:
 		mp->mnt_lflag &= ~MNT_LWAIT;
 		needwakeup = 1;	
 	}
+
+
+#if CONFIG_TRIGGERS
+	/* 
+	 * Callback and context are set together under the mount lock, and
+	 * never cleared, so we're safe to examine them here, drop the lock, 
+	 * and call out.
+	 */
+	if (mp->mnt_triggercallback != NULL) {
+		mount_unlock(mp);
+		if (error == 0) {
+			mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
+		} else if (did_vflush) {
+			mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
+		}
+	} else {
+		mount_unlock(mp);
+	}
+#else 
 	mount_unlock(mp);
+#endif /* CONFIG_TRIGGERS */
+
 	lck_rw_done(&mp->mnt_rwlock);
 
 	if (needwakeup)
 		wakeup((caddr_t)mp);
+
 	if (!error) {
 		if ((coveredvp != NULLVP)) {
 			vnode_t pvp;
@@ -1662,18 +1963,12 @@ out:
 			vnode_getwithref(coveredvp);
 			pvp = vnode_getparent(coveredvp);
 			vnode_rele(coveredvp);
-			vnode_lock_spin(coveredvp);
-			if(mp->mnt_crossref == 0) {
-				vnode_unlock(coveredvp);
-				mount_lock_destroy(mp);
-#if CONFIG_MACF
-				mac_mount_label_destroy(mp);
-#endif
-				FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
-			}  else {
-				coveredvp->v_lflag |= VL_MOUNTDEAD;
-				vnode_unlock(coveredvp);
-			}
+
+			mount_dropcrossref(mp, coveredvp, 0);
+#if CONFIG_TRIGGERS
+			if (coveredvp->v_resolve)
+				vnode_trigger_rearm(coveredvp, ctx);
+#endif	
 			vnode_put(coveredvp);
 
 			if (pvp) {
@@ -1695,25 +1990,28 @@ out:
 void
 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
 {
-		vnode_lock(dp);
-		mp->mnt_crossref--;
-		if (mp->mnt_crossref < 0)
-			panic("mount cross refs -ve");
-		if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
-			dp->v_lflag &= ~VL_MOUNTDEAD;
-			if (need_put)
-			        vnode_put_locked(dp);
-			vnode_unlock(dp);
-			mount_lock_destroy(mp);
-#if CONFIG_MACF
-			mac_mount_label_destroy(mp);
-#endif
-			FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
-			return;
-		}
+	vnode_lock(dp);
+	mp->mnt_crossref--;
+
+	if (mp->mnt_crossref < 0)
+		panic("mount cross refs -ve");
+
+	if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
+			
 		if (need_put)
-		        vnode_put_locked(dp);
+			vnode_put_locked(dp);
 		vnode_unlock(dp);
+
+		mount_lock_destroy(mp);
+#if CONFIG_MACF
+		mac_mount_label_destroy(mp);
+#endif
+		FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+		return;
+	}
+	if (need_put)
+		vnode_put_locked(dp);
+	vnode_unlock(dp);
 }
 
 
@@ -1806,8 +2104,8 @@ quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
 
 	AUDIT_ARG(uid, uap->uid);
 	AUDIT_ARG(cmd, uap->cmd);
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-		UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, ctx);
 	error = namei(&nd);
 	if (error)
 		return (error);
@@ -1914,7 +2212,7 @@ statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
 	vfs_context_t ctx = vfs_context_current();
 	vnode_t vp;
 
-	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -2000,7 +2298,11 @@ statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
 	sfs.f_type = mp->mnt_vtable->vfc_typenum;
 	sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	sfs.f_fssubtype = sfsp->f_fssubtype;
-	strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
+	if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+		strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+	} else {
+		strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
+	}
 	strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
 	strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
 
@@ -2022,7 +2324,7 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *r
 	vfs_context_t ctxp = vfs_context_current();
 	vnode_t vp;
 
-	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctxp);
 	error = namei(&nd);
 	if (error)
@@ -2068,7 +2370,7 @@ fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t
 
 	mp = vp->v_mount;
 	if (!mp) {
-		error = EBADF;
+		error = EBADF;;
 		goto out;
 	}
 	sp = &mp->mnt_vfsstat;
@@ -2470,7 +2772,7 @@ common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
 	vnode_t tvp;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = change_dir(&nd, ctx);
 	if (error)
@@ -2572,7 +2874,7 @@ chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
 	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
 		return (error);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = change_dir(&nd, ctx);
 	if (error)
@@ -2795,7 +3097,15 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v
 	 */
 	if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
 		vnode_t ttyvp;
-		vnode_ref(vp);
+
+		/* 
+		 * We already have a ref from vn_open_auth(), so we can demand another reference.
+		 */	
+		error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
+		if (error != 0) {
+			panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
+		}
+
 		session_lock(sessp);
 		ttyvp = sessp->s_ttyvp;
 		sessp->s_ttyvp = vp;
@@ -2808,6 +3118,8 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v
 	vnode_put(vp);
 
 	proc_fdlock(p);
+	if (flags & O_CLOEXEC)
+		*fdflags(p, indx) |= UF_EXCLOSE;
 	procfdtbl_releasefd(p, indx, NULL);
 	fp_drop(p, indx, fp, 1);
 	proc_fdunlock(p);
@@ -2887,7 +3199,8 @@ open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
 	if (xsecdst != NULL)
 		VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
+	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, vfs_context_current());
 
 	ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
 	if (xsecdst != NULL)
@@ -2916,7 +3229,8 @@ open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
 	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 	VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
+	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, vfs_context_current());
 
 	return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
 }
@@ -2933,7 +3247,6 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
 	struct vnode_attr va;
 	vfs_context_t ctx = vfs_context_current();
 	int error;
-	int whiteout = 0;
 	struct nameidata nd;
 	vnode_t	vp, dvp;
 
@@ -2950,7 +3263,7 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
 
 	if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
 		return (error);
-	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
+	NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -2973,32 +3286,22 @@ mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
 	case S_IFBLK:
 		VATTR_SET(&va, va_type, VBLK);
 		break;
-	case S_IFWHT:
-		whiteout = 1;
-		break;
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 #if CONFIG_MACF
-	if (!whiteout) {
-		error = mac_vnode_check_create(ctx,
-		    nd.ni_dvp, &nd.ni_cnd, &va);
-		if (error)
-			goto out;
-	}
+	error = mac_vnode_check_create(ctx,
+	    nd.ni_dvp, &nd.ni_cnd, &va);
+	if (error)
+		goto out;
 #endif
 
  	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
  		goto out;
 
-	if (whiteout) {
-		error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
-	} else {
-		error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
-	}
-	if (error)
+	if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
 		goto out;
 
 	if (vp) {
@@ -3050,7 +3353,7 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
 	int error;
 	struct nameidata nd;
 
-	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
+	NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1, 
 		UIO_USERSPACE, upath, ctx);
 	error = namei(&nd);
 	if (error)
@@ -3065,19 +3368,10 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
    	}
    	VATTR_SET(vap, va_type, VFIFO);
 
-#if CONFIG_MACF
-	error = mac_vnode_check_create(ctx, nd.ni_dvp,
-	    &nd.ni_cnd, vap);
-	if (error)
+	if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
 		goto out;
-#endif
-
-
-   	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-   		goto out;
 
- 	
-  	error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
+  	error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
 out:
 	/*
 	 * nameidone has to happen before we vnode_put(dvp)
@@ -3263,7 +3557,7 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
 	vp = dvp = lvp = NULLVP;
 
 	/* look up the object we are linking to */
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -3297,6 +3591,9 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
 	}
 
 	/* lookup the target node */
+#if CONFIG_TRIGGERS
+	nd.ni_op = OP_LINK;
+#endif
 	nd.ni_cnd.cn_nameiop = CREATE;
 	nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
 	nd.ni_dirp = uap->link;
@@ -3439,7 +3736,7 @@ symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
 		goto out;
 	AUDIT_ARG(text, path);	/* This is the link string */
 
-	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
+	NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->link, ctx);
 	error = namei(&nd);
 	if (error)
@@ -3481,6 +3778,9 @@ symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
 
 		if (vp == NULL) {
 			nd.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+			nd.ni_op = OP_LOOKUP;
+#endif
 			nd.ni_cnd.cn_flags = 0;
 			error = namei(&nd);
 			vp = nd.ni_vp;
@@ -3557,7 +3857,7 @@ undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
 	vfs_context_t ctx = vfs_context_current();
 	vnode_t	vp, dvp;
 
-	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1, 
+	NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -3598,19 +3898,25 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
 	int  len=0;
 #if CONFIG_FSE
 	fse_info  finfo;
+	struct vnode_attr va;
 #endif
 	int flags = 0;
 	int need_event = 0;
 	int has_listeners = 0;
 	int truncated_path=0;
+	int batched;
+	struct vnode_attr *vap = NULL;
+
 #if NAMEDRSRCFORK
 	/* unlink or delete is allowed on rsrc forks and named streams */
 	ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
 #endif
 
 	ndp->ni_cnd.cn_flags |= LOCKPARENT;
+	ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
 	cnp = &ndp->ni_cnd;
 
+lookup_continue:
 	error = namei(ndp);
 	if (error)
 		return (error);
@@ -3618,57 +3924,62 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
 	dvp = ndp->ni_dvp;
 	vp = ndp->ni_vp;
 
+
 	/* With Carbon delete semantics, busy files cannot be deleted */
 	if (nodelbusy) {
 		flags |= VNODE_REMOVE_NODELETEBUSY;
 	}
 
-	/*
-	 * Normally, unlinking of directories is not supported. 
-	 * However, some file systems may have limited support.
-	 */
-	if ((vp->v_type == VDIR) &&
-	    !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
-		error = EPERM;	/* POSIX */
-	}
+	if (vp) {
+		batched = vnode_compound_remove_available(vp);
+		/*
+		 * The root of a mounted filesystem cannot be deleted.
+		 */
+		if (vp->v_flag & VROOT) {
+			error = EBUSY;
+		}
 
-	/*
-	 * The root of a mounted filesystem cannot be deleted.
-	 */
-	if (vp->v_flag & VROOT) {
-		error = EBUSY;
-	}
-	if (error)
-		goto out;
+		if (!batched) {
+			error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
+			if (error) {
+				goto out;
+			}
+		}
+	} else {
+		batched = 1;
 
+		if (!vnode_compound_remove_available(dvp)) {
+			panic("No vp, but no compound remove?");
+		}
+	}
 
-	/* authorize the delete operation */
-#if CONFIG_MACF
-	if (!error)
-		error = mac_vnode_check_unlink(ctx,
-		    dvp, vp, cnp);
-#endif /* MAC */
-	if (!error)
-		error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
-	if (error)
-		goto out;
-	
 #if CONFIG_FSE
 	need_event = need_fsevent(FSE_DELETE, dvp);
 	if (need_event) {
-		if ((vp->v_flag & VISHARDLINK) == 0) {
-			get_fse_info(vp, &finfo, ctx);
+		if (!batched) {
+			if ((vp->v_flag & VISHARDLINK) == 0) {
+				/* XXX need to get these data in batched VNOP */
+				get_fse_info(vp, &finfo, ctx);
+			}
+		} else {
+			error = vfs_get_notify_attributes(&va);
+			if (error) {
+				goto out;
+			}
+
+			vap = &va;
 		}
 	}
 #endif
 	has_listeners = kauth_authorize_fileop_has_listeners();
 	if (need_event || has_listeners) {
-		GET_PATH(path);
 		if (path == NULL) {
-			error = ENOMEM;
-			goto out;
+			GET_PATH(path);
+			if (path == NULL) {
+				error = ENOMEM;
+				goto out;
+			}
 		}
-
 		len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
 	}
 
@@ -3677,7 +3988,25 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
 		error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
 	else
 #endif
-		error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
+	{
+		error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
+		vp = ndp->ni_vp;
+		if (error == EKEEPLOOKING) {
+			if (!batched) {
+				panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
+			}
+
+			if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+				panic("EKEEPLOOKING, but continue flag not set?");
+			}
+
+			if (vnode_isdir(vp)) {
+				error = EISDIR;
+				goto out;
+			}
+			goto lookup_continue;
+		}
+	}
 
 	/*
 	 * Call out to allow 3rd party notification of delete. 
@@ -3706,6 +4035,8 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
 		if (need_event) {
 			if (vp->v_flag & VISHARDLINK) {
 				get_fse_info(vp, &finfo, ctx);
+			} else if (vap) {
+				vnode_get_fse_info_from_vap(vp, &finfo, vap);
 			}
 			if (truncated_path) {
 				finfo.mode |= FSE_TRUNCATED_PATH;
@@ -3717,27 +4048,30 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
 		}
 #endif
 	}
+
+out:
 	if (path != NULL)
 		RELEASE_PATH(path);
 
-	/*
-	 * nameidone has to happen before we vnode_put(dvp)
-	 * since it may need to release the fs_nodelock on the dvp
-	 */
-out:
 #if NAMEDRSRCFORK
 	/* recycle the deleted rsrc fork vnode to force a reclaim, which 
 	 * will cause its shadow file to go away if necessary.
 	 */
-	 if ((vnode_isnamedstream(ndp->ni_vp)) &&
-		(ndp->ni_vp->v_parent != NULLVP) &&
-		vnode_isshadow(ndp->ni_vp)) {
-   			vnode_recycle(ndp->ni_vp);
+	 if (vp && (vnode_isnamedstream(vp)) &&
+		(vp->v_parent != NULLVP) &&
+		vnode_isshadow(vp)) {
+   			vnode_recycle(vp);
 	 }	
 #endif
+	/*
+	 * nameidone has to happen before we vnode_put(dvp)
+	 * since it may need to release the fs_nodelock on the dvp
+	 */
 	nameidone(ndp);
 	vnode_put(dvp);
-	vnode_put(vp);
+	if (vp) {
+		vnode_put(vp);
+	}
 	return (error);
 }
 
@@ -3750,7 +4084,8 @@ unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, ctx);
 	return unlink1(ctx, &nd, 0);
 }
 
@@ -3763,7 +4098,8 @@ delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, ctx);
 	return unlink1(ctx, &nd, 1);
 }
 
@@ -4132,7 +4468,9 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused in
 				niopts |= WANTPARENT;
 
 			/* do the lookup */
-			NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
+			NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
+			       CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
+			       &context);
 			error = namei(&nd);
 			if (!error) {
 				vp = nd.ni_vp;
@@ -4218,7 +4556,8 @@ access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
  	/* need parent for vnode_authorize for deletion test */
  	if (uap->flags & _DELETE_OK)
  		niopts |= WANTPARENT;
- 	NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
+ 	NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
+ 	       uap->path, &context);
 
 #if NAMEDRSRCFORK
 	/* access(F_OK) calls are allowed for resource forks. */
@@ -4410,7 +4749,7 @@ stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecu
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1, 
 	    UIO_USERSPACE, path, ctx);
 	return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
 }
@@ -4483,7 +4822,7 @@ lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, 
 	    UIO_USERSPACE, path, ctx);
 
 	return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
@@ -4569,7 +4908,7 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
 	struct nameidata nd;
 	vfs_context_t ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -4597,7 +4936,7 @@ readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
 	vfs_context_t ctx = vfs_context_current();
 	char uio_buf[ UIO_SIZEOF(1) ];
 
-	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -4680,7 +5019,7 @@ chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
 	struct nameidata nd;
 
 	AUDIT_ARG(fflags, uap->flags);
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -4782,7 +5121,7 @@ chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
 	struct nameidata nd;
 	int error;
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, path, ctx);
 	if ((error = namei(&nd)))
 		return (error);
@@ -4984,7 +5323,8 @@ chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int
 
 	AUDIT_ARG(owner, uap->uid, uap->gid);
 
-	NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_SETATTR,
+		(follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -5193,7 +5533,7 @@ utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
 	 * AUDIT: Needed to change the order of operations to do the 
 	 * name lookup first because auditing wants the path.
 	 */
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error)
@@ -5260,7 +5600,7 @@ truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
 
 	if (uap->length < 0)
 		return(EINVAL);
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
+	NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1, 
 		UIO_USERSPACE, uap->path, ctx);
 	if ((error = namei(&nd)))
 		return (error);
@@ -5472,14 +5812,15 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
 		return(EINVAL);
 	}
 
-	NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
+	NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
 		UIO_USERSPACE, uap->from, ctx);
 	if ((error = namei(&fromnd)))
 		return (error);
 	fvp = fromnd.ni_vp;
 
-	NDINIT(&tond, CREATE,  LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
-	    UIO_USERSPACE, uap->to, ctx);
+	NDINIT(&tond, CREATE, OP_LINK,
+	       LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+	       UIO_USERSPACE, uap->to, ctx);
 	if ((error = namei(&tond))) {
 		goto out1;
 	}
@@ -5552,76 +5893,101 @@ rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
 	int do_retry;
 	int mntrename;
 	int need_event;
-	const char *oname;
+	const char *oname = NULL;
 	char *from_name = NULL, *to_name = NULL;
 	int from_len=0, to_len=0;
 	int holding_mntlock;
 	mount_t locked_mp = NULL;
-	vnode_t oparent;
+	vnode_t oparent = NULLVP;
 #if CONFIG_FSE
 	fse_info from_finfo, to_finfo;
+	struct vnode_attr fva, tva;
 #endif
 	int from_truncated=0, to_truncated;
+	int batched = 0;
+	struct vnode_attr *fvap, *tvap;
+	int continuing = 0;
 	
 	holding_mntlock = 0;
     do_retry = 0;
 retry:
 	fvp = tvp = NULL;
 	fdvp = tdvp = NULL;
+	fvap = tvap = NULL;
 	mntrename = FALSE;
 
-	NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
+	NDINIT(&fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
+	       UIO_USERSPACE, uap->from, ctx);
+	fromnd.ni_flag = NAMEI_COMPOUNDRENAME;
+	
+	NDINIT(&tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
+	       UIO_USERSPACE, uap->to, ctx);
+	tond.ni_flag = NAMEI_COMPOUNDRENAME;
 	
-	if ( (error = namei(&fromnd)) )
-	        goto out1;
-	fdvp = fromnd.ni_dvp;
-	fvp  = fromnd.ni_vp;
+continue_lookup:
+	if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+		if ( (error = namei(&fromnd)) )
+			goto out1;
+		fdvp = fromnd.ni_dvp;
+		fvp  = fromnd.ni_vp;
 
-#if CONFIG_MACF
-	error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
-	if (error)
-		goto out1;
-#endif
+		if (fvp && fvp->v_type == VDIR)
+			tond.ni_cnd.cn_flags |= WILLBEDIR;
+	}
 
-	NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
-	if (fvp->v_type == VDIR)
-		tond.ni_cnd.cn_flags |= WILLBEDIR;
+	if ((tond.ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
+		if ( (error = namei(&tond)) ) {
+			/*
+			 * Translate error code for rename("dir1", "dir2/.").
+			 */
+			if (error == EISDIR && fvp->v_type == VDIR) 
+				error = EINVAL;
+			goto out1;
+		}
+		tdvp = tond.ni_dvp;
+		tvp  = tond.ni_vp;
+	}	
 
-	if ( (error = namei(&tond)) ) {
-		/*
-		 * Translate error code for rename("dir1", "dir2/.").
+	batched = vnode_compound_rename_available(fdvp);
+	if (!fvp) {
+		/* 
+		 * Claim: this check will never reject a valid rename.
+		 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
+		 * Suppose fdvp and tdvp are not on the same mount.
+		 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem.  If fvp is the root, 
+		 * 	then you can't move it to within another dir on the same mountpoint.
+		 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
+		 *
+		 * If this check passes, then we are safe to pass these vnodes to the same FS.
 		 */
-	        if (error == EISDIR && fvp->v_type == VDIR) 
-		        error = EINVAL;
-		goto out1;
+		if (fdvp->v_mount != tdvp->v_mount) {
+			error = EXDEV;
+			goto out1;
+		}
+		goto skipped_lookup;
 	}
-	tdvp = tond.ni_dvp;
-	tvp  = tond.ni_vp;
-
-#if CONFIG_MACF
-	error = mac_vnode_check_rename_to(ctx,
-	    tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
-	if (error)
-		goto out1;
-#endif
 
-	if (tvp != NULL) {
-		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
-			error = ENOTDIR;
-			goto out1;
-		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
-			error = EISDIR;
+	if (!batched) {
+		error = vn_authorize_rename(fdvp, fvp, &fromnd.ni_cnd, tdvp, tvp, &tond.ni_cnd, ctx, NULL);
+		if (error) {
+			if (error == ENOENT) {
+				/*
+				 * We encountered a race where after doing the namei, tvp stops
+				 * being valid. If so, simply re-drive the rename call from the
+				 * top.
+				 */
+				do_retry = 1;	
+			}
 			goto out1;
 		}
 	}
-	if (fvp == tdvp) {
-		error = EINVAL;
-		goto out1;
-	}
+
         /*
          * If the source and destination are the same (i.e. they're
          * links to the same vnode) and the target file system is
          * case sensitive, then there is nothing to do.
+	 *
+	 * XXX Come back to this.
          */
 	if (fvp == tvp) {
 		int pathconf_val;
@@ -5636,93 +6002,15 @@ retry:
 		}	
 	}
 
-	/*
-	 * Authorization.
-	 *
-	 * If tvp is a directory and not the same as fdvp, or tdvp is not
-	 * the same as fdvp, the node is moving between directories and we
-	 * need rights to remove from the old and add to the new.
-	 *
-	 * If tvp already exists and is not a directory, we need to be
-	 * allowed to delete it.
-	 *
-	 * Note that we do not inherit when renaming.
-	 *
-	 * XXX This needs to be revisited to implement the deferred-inherit bit
-	 */
-	{
-		int moving = 0;
-
-		error = 0;
-		if ((tvp != NULL) && vnode_isdir(tvp)) {
-			if (tvp != fdvp)
-				moving = 1;
-		} else if (tdvp != fdvp) {
-			moving = 1;
-		}
-		/*
-		 * must have delete rights to remove the old name even in
-		 * the simple case of fdvp == tdvp.
-		 *
-		 * If fvp is a directory, and we are changing it's parent,
-		 * then we also need rights to rewrite its ".." entry as well.
-		 */
-		if (vnode_isdir(fvp)) {
-			if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
-				goto auth_exit;
-		} else {
-		if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
-			goto auth_exit;
-		}
-		if (moving) {
-			/* moving into tdvp or tvp, must have rights to add */
-			if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
-				 NULL, 
-				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
-				 ctx)) != 0) {
-                /*
-                 * We could encounter a race where after doing the namei, tvp stops
-                 * being valid. If so, simply re-drive the rename call from the
-                 * top.
-                 */
-                 if (error == ENOENT) {
-                     do_retry = 1;
-                 }
-				goto auth_exit;
-			}
-		} else {
-			/* node staying in same directory, must be allowed to add new name */
-			if ((error = vnode_authorize(fdvp, NULL,
-				 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-				goto auth_exit;
-		}
-		/* overwriting tvp */
-		if ((tvp != NULL) && !vnode_isdir(tvp) &&
-		    ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
-            /*
-             * We could encounter a race where after doing the namei, tvp stops
-             * being valid. If so, simply re-drive the rename call from the
-             * top.
-             */
-            if (error == ENOENT) {
-                do_retry = 1;
-            }
-			goto auth_exit;
-		}
- 		    
-		/* XXX more checks? */
-
-auth_exit:
-		/* authorization denied */
-		if (error != 0)
-			goto out1;
-	}
 	/*
 	 * Allow the renaming of mount points.
 	 * - target must not exist
 	 * - target must reside in the same directory as source
 	 * - union mounts cannot be renamed
 	 * - "/" cannot be renamed
+	 *
+	 * XXX Handle this in VFS after a continued lookup (if we missed
+	 * in the cache to start off)
 	 */
 	if ((fvp->v_flag & VROOT) &&
 	    (fvp->v_type == VDIR) &&
@@ -5752,35 +6040,6 @@ auth_exit:
 		error = EXDEV;
 		goto out1;
 	}
-	/*
-	 * Avoid renaming "." and "..".
-	 */
-	if (fvp->v_type == VDIR &&
-	    ((fdvp == fvp) ||
-	     (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
-	     ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
-		error = EINVAL;
-		goto out1;
-	}
-	/*
-	 * The following edge case is caught here:
-	 * (to cannot be a descendent of from)
-	 *
-	 *       o fdvp
-	 *      /
-	 *     /
-	 *    o fvp
-	 *     \
-	 *      \
-	 *       o tdvp
-	 *      /
-	 *     /
-	 *    o tvp
-	 */
-	if (tdvp->v_parent == fvp) {
-		error = EINVAL;
-		goto out1;
-	}
 
 	/*
 	 * If source is the same as the destination (that is the
@@ -5799,6 +6058,8 @@ auth_exit:
 	 * NOTE - that fvp == tvp also occurs if they are hard linked and 
 	 * that correct behaviour then is just to return success without doing
 	 * anything.
+	 *
+	 * XXX filesystem should take care of this itself, perhaps...
 	 */
 	if (fvp == tvp && fdvp == tdvp) {
 		if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
@@ -5882,17 +6143,35 @@ auth_exit:
 		        holding_mntlock = 0;
 		}
 	}
+
 	// save these off so we can later verify that fvp is the same
 	oname   = fvp->v_name;
 	oparent = fvp->v_parent;
 
+skipped_lookup:
 #if CONFIG_FSE
-	need_event = need_fsevent(FSE_RENAME, fvp);
+	need_event = need_fsevent(FSE_RENAME, fdvp);
 	if (need_event) { 
-	        get_fse_info(fvp, &from_finfo, ctx);
+		if (fvp) {
+			get_fse_info(fvp, &from_finfo, ctx);
+		} else {
+			error = vfs_get_notify_attributes(&fva);
+			if (error) {
+				goto out1;
+			}
+
+			fvap = &fva;
+		}
 
 		if (tvp) {
 		        get_fse_info(tvp, &to_finfo, ctx);
+		} else if (batched) {
+			error = vfs_get_notify_attributes(&tva);
+			if (error) {
+				goto out1;
+			}
+
+			tvap = &tva;
 		}
 	}
 #else
@@ -5900,26 +6179,30 @@ auth_exit:
 #endif /* CONFIG_FSE */
 
 	if (need_event || kauth_authorize_fileop_has_listeners()) {
-		GET_PATH(from_name);
 		if (from_name == NULL) {
-			error = ENOMEM;
-			goto out1;
+			GET_PATH(from_name);
+			if (from_name == NULL) {
+				error = ENOMEM;
+				goto out1;
+			}
 		}
 
 		from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
 
-		GET_PATH(to_name);
 		if (to_name == NULL) {
-			error = ENOMEM;
-			goto out1;
+			GET_PATH(to_name);
+			if (to_name == NULL) {
+				error = ENOMEM;
+				goto out1;
+			}
 		}
 
 		to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
 	} 
 	
-	error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
-			    tdvp, tvp, &tond.ni_cnd,
-			    ctx);
+	error = vn_rename(fdvp, &fvp, &fromnd.ni_cnd, fvap,
+			    tdvp, &tvp, &tond.ni_cnd, tvap,
+			    0, ctx);
 
 	if (holding_mntlock) {
 		/*
@@ -5931,16 +6214,29 @@ auth_exit:
 		holding_mntlock = 0;
 	}
 	if (error) {
-        /*
-         * We may encounter a race in the VNOP where the destination didn't 
-         * exist when we did the namei, but it does by the time we go and 
-         * try to create the entry. In this case, we should re-drive this rename
-         * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
+		if (error == EKEEPLOOKING) {
+			if ((fromnd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
+				if ((tond.ni_flag & NAMEI_CONTLOOKUP) == 0) {
+					panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
+				}
+			}
+
+			fromnd.ni_vp = fvp;
+			tond.ni_vp = tvp;
+	
+			goto continue_lookup;
+		}
+
+		/*
+		 * We may encounter a race in the VNOP where the destination didn't 
+		 * exist when we did the namei, but it does by the time we go and 
+		 * try to create the entry. In this case, we should re-drive this rename
+		 * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
 		 * but other filesystems susceptible to this race could return it, too. 
-         */
-        if (error == ERECYCLE) {
-            do_retry = 1;
-        }
+		 */
+		if (error == ERECYCLE) {
+			do_retry = 1;
+		}
 
 		goto out1;
 	} 
@@ -5958,6 +6254,14 @@ auth_exit:
 			// set it here since only the from_finfo gets reported up to user space
 			from_finfo.mode |= FSE_TRUNCATED_PATH;
 		}
+
+		if (tvap && tvp) {
+			vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
+		}
+		if (fvap) {
+			vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
+		}
+
 	        if (tvp) {
 		        add_fsevent(FSE_RENAME, ctx,
 				    FSE_ARG_STRING, from_len, from_name,
@@ -6020,8 +6324,10 @@ auth_exit:
 	 * check that fvp has the same name/parent pointers it
 	 * had before the rename call... this is a 'weak' check
 	 * at best...
+	 *
+	 * XXX oparent and oname may not be set in the compound vnop case
 	 */
-	if (oname == fvp->v_name && oparent == fvp->v_parent) {
+	if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
 	        int update_flags;
 
 	        update_flags = VNODE_UPDATE_NAME;
@@ -6068,12 +6374,12 @@ out1:
 	        vnode_put(fdvp);
 	}
 	
-    /*
-     * If things changed after we did the namei, then we will re-drive
-     * this rename call from the top.
-     */
+	/*
+	 * If things changed after we did the namei, then we will re-drive
+	 * this rename call from the top.
+	 */
 	if(do_retry) {
-        do_retry = 0;
+		do_retry = 0;
 		goto retry;
 	}
 	
@@ -6096,12 +6402,16 @@ mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
 	vnode_t	vp, dvp;
 	int error;
 	int update_flags = 0;
+	int batched;
 	struct nameidata nd;
 
 	AUDIT_ARG(mode, vap->va_mode);
-	NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, 
-		UIO_USERSPACE, path, ctx);
+	NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
+	       path, ctx);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
+	nd.ni_flag = NAMEI_COMPOUNDMKDIR;
+
+continue_lookup:
 	error = namei(&nd);
 	if (error)
 		return (error);
@@ -6112,24 +6422,56 @@ mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
   		error = EEXIST;
   		goto out;
   	}
+	
+	batched = vnode_compound_mkdir_available(dvp);
 
 	VATTR_SET(vap, va_type, VDIR);
-   
-#if CONFIG_MACF
-	error = mac_vnode_check_create(ctx,
-	    nd.ni_dvp, &nd.ni_cnd, vap);
-	if (error)
+ 
+	/*
+	 * XXX
+	 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
+	 * only get EXISTS or EISDIR for existing path components, and not that it could see
+	 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
+	 * it will fail in a spurious  manner.  Need to figure out if this is valid behavior.
+	 */
+ 	if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
+		if (error == EACCES || error == EPERM) {
+			int error2;
+
+			nameidone(&nd);
+			vnode_put(dvp);
+			dvp = NULLVP;
+
+			/* 
+			 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST 
+			 * rather than EACCESS if the target exists.
+			 */
+			NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
+					path, ctx);
+			error2 = namei(&nd);
+			if (error2) {
+				goto out;
+			} else {
+				vp = nd.ni_vp;
+				error = EEXIST;
+				goto out;
+			}
+		}
+
 		goto out;
-#endif
+	}
+
+	/*
+	 * make the directory 
+	 */
+  	if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
+		if (error == EKEEPLOOKING) {
+			nd.ni_vp = vp;
+			goto continue_lookup;
+		}
 
-  	/* authorize addition of a directory to the parent */
-  	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
-  		goto out;
- 	
-   
-	/* make the directory */
-  	if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
   		goto out;
+	}
 		
 	// Make sure the name & parent pointers are hooked up
 	if (vp->v_name == NULL)
@@ -6152,8 +6494,9 @@ out:
 	nameidone(&nd);
 
 	if (vp)
-	        vnode_put(vp);
-	vnode_put(dvp);
+		vnode_put(vp);
+	if (dvp) 
+		vnode_put(dvp);
 
 	return (error);
 }
@@ -6219,10 +6562,19 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
 	vnode_t vp, dvp;
 	int error;
 	struct nameidata nd;
+	char     *path = NULL;
+	int       len=0;
+	int has_listeners = 0;
+	int need_event = 0;
+	int truncated = 0;
 	vfs_context_t ctx = vfs_context_current();
+#if CONFIG_FSE
+	struct vnode_attr va;
+#endif /* CONFIG_FSE */
+	struct vnode_attr *vap = NULL;
+	int batched;
 
 	int restart_flag;
-	uint32_t oldvp_id = UINT32_MAX;
 
 	/* 
 	 * This loop exists to restart rmdir in the unlikely case that two
@@ -6230,10 +6582,13 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
 	 * containing orphaned appleDouble files.
 	 */
 	do {
+		NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
+		       UIO_USERSPACE, uap->path, ctx);
+		nd.ni_flag = NAMEI_COMPOUNDRMDIR;
+continue_lookup:
 		restart_flag = 0;
+		vap = NULL;
 
-		NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, 
-				UIO_USERSPACE, uap->path, ctx);
 		error = namei(&nd);
 		if (error)
 			return (error);
@@ -6241,132 +6596,153 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
 		dvp = nd.ni_dvp;
 		vp = nd.ni_vp;
 
+		if (vp) {
+			batched = vnode_compound_rmdir_available(vp);
 
-		/*
-		 * If being restarted check if the new vp
-		 * still has the same v_id.
-		 */
-		if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
-			error = ENOENT;
-			goto out;
-		}
+			if (vp->v_flag & VROOT) {
+				/*
+				 * The root of a mounted filesystem cannot be deleted.
+				 */
+				error = EBUSY;
+				goto out;
+			}
 
-		if (vp->v_type != VDIR) {
-			/*
-			 * rmdir only deals with directories
-			 */
-			error = ENOTDIR;
-		} else if (dvp == vp) {
 			/*
-			 * No rmdir "." please.
+			 * Removed a check here; we used to abort if vp's vid
+			 * was not the same as what we'd seen the last time around.
+			 * I do not think that check was valid, because if we retry
+			 * and all dirents are gone, the directory could legitimately
+			 * be recycled but still be present in a situation where we would
+			 * have had permission to delete.  Therefore, we won't make 
+			 * an effort to preserve that check now that we may not have a
+			 * vp here.
 			 */
-			error = EINVAL;
-		} else if (vp->v_flag & VROOT) {
-			/*
-			 * The root of a mounted filesystem cannot be deleted.
-			 */
-			error = EBUSY;
+
+			if (!batched) {
+				error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
+				if (error) {
+					goto out;
+				}
+			}
 		} else {
-#if CONFIG_MACF
-			error = mac_vnode_check_unlink(ctx, dvp,
-					vp, &nd.ni_cnd);
-			if (!error)
-#endif
-				error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
+			batched = 1;
+
+			if (!vnode_compound_rmdir_available(dvp)) {
+				panic("No error, but no compound rmdir?");
+			}
 		}
-		if (!error) {
-			char     *path = NULL;
-			int       len=0;
-			int has_listeners = 0;
-			int need_event = 0;
-			int truncated = 0;
+
 #if CONFIG_FSE
-			fse_info  finfo;
+		fse_info  finfo;
 
-			need_event = need_fsevent(FSE_DELETE, dvp);
-			if (need_event) {
+		need_event = need_fsevent(FSE_DELETE, dvp);
+		if (need_event) {
+			if (!batched) {
 				get_fse_info(vp, &finfo, ctx);
+			} else {
+				error = vfs_get_notify_attributes(&va);
+				if (error) {
+					goto out;
+				}
+
+				vap = &va;
 			}
+		}
 #endif
-			has_listeners = kauth_authorize_fileop_has_listeners();
-			if (need_event || has_listeners) {
+		has_listeners = kauth_authorize_fileop_has_listeners();
+		if (need_event || has_listeners) {
+			if (path == NULL) {
 				GET_PATH(path);
 				if (path == NULL) {
 					error = ENOMEM;
 					goto out;
 				}
+			}
 
-				len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
+			len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
 #if CONFIG_FSE
-				if (truncated) {
-					finfo.mode |= FSE_TRUNCATED_PATH;
-				}
-#endif
+			if (truncated) {
+				finfo.mode |= FSE_TRUNCATED_PATH;
 			}
+#endif
+		}
 
-			error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
-
-			/*
-			 * Special case to remove orphaned AppleDouble
-			 * files. I don't like putting this in the kernel,
-			 * but carbon does not like putting this in carbon either,
-			 * so here we are.
-			 */
-			if (error == ENOTEMPTY) {
-				error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
-				if (error == EBUSY) {
-					oldvp_id = vp->v_id;
-					goto out;
-				}
+		error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+		nd.ni_vp = vp;
+		if (vp == NULLVP) {
+			/* Couldn't find a vnode */
+			goto out;
+		}
 
+		if (error == EKEEPLOOKING) {
+			goto continue_lookup;
+		}
 
-				/*
-				 * Assuming everything went well, we will try the RMDIR again 
-				 */
-				if (!error)
-					error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
+		/*
+		 * Special case to remove orphaned AppleDouble
+		 * files. I don't like putting this in the kernel,
+		 * but carbon does not like putting this in carbon either,
+		 * so here we are.
+		 */
+		if (error == ENOTEMPTY) {
+			error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
+			if (error == EBUSY) {
+				goto out;
 			}
 
+
 			/*
-			 * Call out to allow 3rd party notification of delete. 
-			 * Ignore result of kauth_authorize_fileop call.
+			 * Assuming everything went well, we will try the RMDIR again 
 			 */
-			if (!error) {
-				if (has_listeners) {
-					kauth_authorize_fileop(vfs_context_ucred(ctx), 
-							KAUTH_FILEOP_DELETE, 
-							(uintptr_t)vp,
-							(uintptr_t)path);
-				}
+			if (!error)
+				error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
+		}
 
-				if (vp->v_flag & VISHARDLINK) {
-				    // see the comment in unlink1() about why we update
-				    // the parent of a hard link when it is removed
-				    vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
-				}
+		/*
+		 * Call out to allow 3rd party notification of delete. 
+		 * Ignore result of kauth_authorize_fileop call.
+		 */
+		if (!error) {
+			if (has_listeners) {
+				kauth_authorize_fileop(vfs_context_ucred(ctx), 
+						KAUTH_FILEOP_DELETE, 
+						(uintptr_t)vp,
+						(uintptr_t)path);
+			}
+
+			if (vp->v_flag & VISHARDLINK) {
+				// see the comment in unlink1() about why we update
+				// the parent of a hard link when it is removed
+				vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+			}
 
 #if CONFIG_FSE
-				if (need_event) {
-					add_fsevent(FSE_DELETE, ctx,
-							FSE_ARG_STRING, len, path,
-							FSE_ARG_FINFO, &finfo,
-							FSE_ARG_DONE);
+			if (need_event) {
+				if (vap) {
+					vnode_get_fse_info_from_vap(vp, &finfo, vap);
 				}
-#endif
+				add_fsevent(FSE_DELETE, ctx,
+						FSE_ARG_STRING, len, path,
+						FSE_ARG_FINFO, &finfo,
+						FSE_ARG_DONE);
 			}
-			if (path != NULL)
-				RELEASE_PATH(path);
+#endif
 		}
 
 out:
+		if (path != NULL) {
+			RELEASE_PATH(path);
+			path = NULL;
+		}
 		/*
 		 * nameidone has to happen before we vnode_put(dvp)
 		 * since it may need to release the fs_nodelock on the dvp
 		 */
 		nameidone(&nd);
-
 		vnode_put(dvp);
-		vnode_put(vp);
+
+		if (vp) 
+			vnode_put(vp);
 
 		if (restart_flag == 0) {
 			wakeup_one((caddr_t)vp);
@@ -6389,7 +6765,8 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
                 int *numdirent, vfs_context_t ctxp)
 {
 	/* Check if fs natively supports VNODE_READDIR_EXTENDED */
-	if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
+	if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) && 
+		   ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0))	{
 		return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
 	} else {
 		size_t bufsize;
@@ -6673,8 +7050,8 @@ revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
 	int error;
 	struct nameidata nd;
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, 
-		UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+	       uap->path, ctx);
 	error = namei(&nd);
 	if (error)
 		return (error);
@@ -6922,24 +7299,24 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t
 	nameiflags = 0;
 	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
 
-    NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1, 
-        	UIO_USERSPACE, uap->path1, ctx);
+	NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
+	       UIO_USERSPACE, uap->path1, ctx);
 
-    error = namei(&fnd);
-    if (error)
-        goto out2;
+	error = namei(&fnd);
+	if (error)
+		goto out2;
 
 	nameidone(&fnd);
 	fvp = fnd.ni_vp;
 
-    NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2, 
-        	UIO_USERSPACE, uap->path2, ctx);
+	NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2, 
+               UIO_USERSPACE, uap->path2, ctx);
 
-    error = namei(&snd);
-    if (error) {
+	error = namei(&snd);
+	if (error) {
 		vnode_put(fvp);
 		goto out2;
-    }
+	}
 	nameidone(&snd);
 	svp = snd.ni_vp;
 
@@ -7187,8 +7564,8 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
 
 	nameiflags = 0;
 	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, 
-		UIO_USERSPACE, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
+	       UIO_USERSPACE, uap->path, ctx);
 
 	error = namei(&nd);
 	if (error)
@@ -7197,6 +7574,14 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
 	nameidone(&nd);
 	vp = nd.ni_vp; 
 
+#if CONFIG_MACF
+	error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
+	if (error) {
+		vnode_put(vp);
+		goto freeandexit;
+	}
+#endif
+
 	 
 	/*
 	 * If searchblock.maxmatches == 0, then skip the search. This has happened 
@@ -7215,44 +7600,716 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
 	   from copying out any results...
 	 */
 
-	fserror = VNOP_SEARCHFS(vp,
-							searchparams1,
-							searchparams2,
-							&searchblock.searchattrs,
-							(u_long)searchblock.maxmatches,
-							&timelimit,
-							returnattrs,
-							&nummatches,
-							(u_long)uap->scriptcode,
-							(u_long)uap->options,
-							auio,
-							state,
-							ctx);
-		
-saveandexit:
+	fserror = VNOP_SEARCHFS(vp,
+							searchparams1,
+							searchparams2,
+							&searchblock.searchattrs,
+							(u_long)searchblock.maxmatches,
+							&timelimit,
+							returnattrs,
+							&nummatches,
+							(u_long)uap->scriptcode,
+							(u_long)uap->options,
+							auio,
+							state,
+							ctx);
+		
+saveandexit:
+
+	vnode_put(vp);
+
+	/* Now copy out the stuff that needs copying out. That means the number of matches, the
+	   search state.  Everything was already put into he return buffer by the vop call. */
+
+	if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
+		goto freeandexit;
+
+    if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
+		goto freeandexit;
+	
+	error = fserror;
+
+freeandexit:
+
+	FREE(searchparams1,M_TEMP);
+
+	return(error);
+
+
+} /* end of searchfs system call */
+
+
+
+lck_grp_attr_t *  nspace_group_attr;
+lck_attr_t *      nspace_lock_attr;
+lck_grp_t *       nspace_mutex_group;
+
+lck_mtx_t         nspace_handler_lock;
+lck_mtx_t         nspace_handler_exclusion_lock;
+
+time_t snapshot_timestamp=0;
+int nspace_allow_virtual_devs=0;
+
+void nspace_handler_init(void);
+
+typedef struct nspace_item_info {
+	struct vnode *vp;
+	void         *arg;
+	uint64_t      op;
+	uint32_t      vid;
+	uint32_t      flags;
+	uint32_t      token;
+	uint32_t      refcount;
+} nspace_item_info;
+
+#define MAX_NSPACE_ITEMS   128
+nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
+uint32_t      nspace_item_idx=0;              // also used as the sleep/wakeup rendezvous address
+uint32_t      nspace_token_id=0;
+uint32_t      nspace_handler_timeout = 15;    // seconds
+
+#define NSPACE_ITEM_NEW         0x0001
+#define NSPACE_ITEM_PROCESSING  0x0002
+#define NSPACE_ITEM_DEAD        0x0004
+#define NSPACE_ITEM_CANCELLED   0x0008
+#define NSPACE_ITEM_DONE        0x0010
+#define NSPACE_ITEM_RESET_TIMER 0x0020
+
+#define NSPACE_ITEM_NSPACE_EVENT   0x0040
+#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
+#define NSPACE_ITEM_TRACK_EVENT    0x0100
+
+#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
+
+//#pragma optimization_level 0
+
+typedef enum {
+	NSPACE_HANDLER_NSPACE = 0,
+	NSPACE_HANDLER_SNAPSHOT = 1,
+	NSPACE_HANDLER_TRACK = 2,
+
+	NSPACE_HANDLER_COUNT,
+} nspace_type_t;
+
+typedef struct {
+	uint64_t handler_tid;
+	struct proc *handler_proc;
+	int handler_busy;
+} nspace_handler_t;
+
+nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
+
+static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
+{
+	switch(nspace_type) {
+		case NSPACE_HANDLER_NSPACE:
+			return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
+		case NSPACE_HANDLER_SNAPSHOT:
+			return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
+		case NSPACE_HANDLER_TRACK:
+			return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
+		default:
+			printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
+			return 0;
+	}
+}
+
+static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
+{
+	switch(nspace_type) {
+		case NSPACE_HANDLER_NSPACE:
+			return NSPACE_ITEM_NSPACE_EVENT;
+		case NSPACE_HANDLER_SNAPSHOT:
+			return NSPACE_ITEM_SNAPSHOT_EVENT;
+		case NSPACE_HANDLER_TRACK:
+			return NSPACE_ITEM_TRACK_EVENT;
+		default:
+			printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
+			return 0;
+	}
+}
+
+static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
+{
+	switch(nspace_type) {
+		case NSPACE_HANDLER_NSPACE:
+			return FREAD | FWRITE | O_EVTONLY;
+		case NSPACE_HANDLER_SNAPSHOT:
+		case NSPACE_HANDLER_TRACK:
+			return FREAD | O_EVTONLY;
+		default:
+			printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
+			return 0;
+	}
+}
+
+static inline nspace_type_t nspace_type_for_op(uint64_t op)
+{
+	switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
+		case NAMESPACE_HANDLER_NSPACE_EVENT:
+			return NSPACE_HANDLER_NSPACE;
+		case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
+			return NSPACE_HANDLER_SNAPSHOT;
+		case NAMESPACE_HANDLER_TRACK_EVENT:
+			return NSPACE_HANDLER_TRACK;
+		default:
+			printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
+			return NSPACE_HANDLER_NSPACE;
+	}
+}
+
+static inline int nspace_is_special_process(struct proc *proc)
+{
+	int i;
+	for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+		if (proc == nspace_handlers[i].handler_proc)
+			return 1;
+	}
+	return 0;
+}
+
+void
+nspace_handler_init(void)
+{
+	nspace_lock_attr    = lck_attr_alloc_init();
+	nspace_group_attr   = lck_grp_attr_alloc_init();
+	nspace_mutex_group  = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
+	lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
+	lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
+	memset(&nspace_items[0], 0, sizeof(nspace_items));
+}
+
+void
+nspace_proc_exit(struct proc *p)
+{
+	int i, event_mask = 0;
+	
+	for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+		if (p == nspace_handlers[i].handler_proc) {
+			event_mask |= nspace_item_flags_for_type(i);
+			nspace_handlers[i].handler_tid = 0;
+			nspace_handlers[i].handler_proc = NULL;
+		}
+	}
+
+	if (event_mask == 0) {
+		return;
+	}
+	
+	if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
+		// if this process was the snapshot handler, zero snapshot_timeout
+		snapshot_timestamp = 0;
+	}
+	
+	//
+	// unblock anyone that's waiting for the handler that died
+	//
+	lck_mtx_lock(&nspace_handler_lock);
+	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+		if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
+
+			if ( nspace_items[i].flags & event_mask ) {
+
+				if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+					vnode_lock_spin(nspace_items[i].vp);
+					nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+					vnode_unlock(nspace_items[i].vp);
+				}
+				nspace_items[i].vp = NULL;
+				nspace_items[i].vid = 0;
+				nspace_items[i].flags = NSPACE_ITEM_DONE;
+				nspace_items[i].token = 0;
+				
+				wakeup((caddr_t)&(nspace_items[i].vp));
+			}
+		}
+	}
+	
+	wakeup((caddr_t)&nspace_item_idx);
+	lck_mtx_unlock(&nspace_handler_lock);
+}
+
+
+int 
+resolve_nspace_item(struct vnode *vp, uint64_t op)
+{
+	return resolve_nspace_item_ext(vp, op, NULL);
+}
+
+int 
+resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
+{
+	int i, error, keep_waiting;
+	struct timespec ts;
+	nspace_type_t nspace_type = nspace_type_for_op(op);
+
+	// only allow namespace events on regular files, directories and symlinks.
+	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+		return 0;
+	}
+
+	//
+	// if this is a snapshot event and the vnode is on a
+	// disk image just pretend nothing happened since any
+	// change to the disk image will cause the disk image
+	// itself to get backed up and this avoids multi-way
+	// deadlocks between the snapshot handler and the ever
+	// popular diskimages-helper process.  the variable
+	// nspace_allow_virtual_devs allows this behavior to
+	// be overridden (for use by the Mobile TimeMachine
+	// testing infrastructure which uses disk images)
+	//
+	if (   (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
+	    && (vp->v_mount != NULL)
+	    && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
+	    && !nspace_allow_virtual_devs) {
+
+		return 0;
+	}
+
+	// if (thread_tid(current_thread()) == namespace_handler_tid) {
+	if (nspace_handlers[nspace_type].handler_proc == NULL) {
+		return 0;
+	}
+
+	if (nspace_is_special_process(current_proc())) {
+		return EDEADLK;
+	}
+
+	lck_mtx_lock(&nspace_handler_lock);
+
+retry:
+	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+		if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
+			break;
+		}
+	}
+
+	if (i >= MAX_NSPACE_ITEMS) {
+		for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+			if (nspace_items[i].flags == 0) {
+				break;
+			}
+		}
+	} else {
+		nspace_items[i].refcount++;
+	}
+	
+	if (i >= MAX_NSPACE_ITEMS) {
+		ts.tv_sec = nspace_handler_timeout;
+		ts.tv_nsec = 0;
+
+		error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
+		if (error == 0) {
+			// an entry got free'd up, go see if we can get a slot
+			goto retry;
+		} else {
+			lck_mtx_unlock(&nspace_handler_lock);
+			return error;
+		}
+	}
+
+	//
+	// if it didn't already exist, add it.  if it did exist
+	// we'll get woken up when someone does a wakeup() on
+	// the slot in the nspace_items table.
+	//
+	if (vp != nspace_items[i].vp) {
+		nspace_items[i].vp = vp;
+		nspace_items[i].arg = arg;
+		nspace_items[i].op = op;
+		nspace_items[i].vid = vnode_vid(vp);
+		nspace_items[i].flags = NSPACE_ITEM_NEW;
+		nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
+		if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
+			if (arg) {
+				vnode_lock_spin(vp);
+				vp->v_flag |= VNEEDSSNAPSHOT;
+				vnode_unlock(vp);
+			}
+		}
+
+		nspace_items[i].token = 0;
+		nspace_items[i].refcount = 1;
+		
+		wakeup((caddr_t)&nspace_item_idx);
+	}
+
+	//
+	// Now go to sleep until the handler does a wakeup on this
+	// slot in the nspace_items table (or we timeout).
+	//
+	keep_waiting = 1;
+	while(keep_waiting) {
+		ts.tv_sec = nspace_handler_timeout;
+		ts.tv_nsec = 0;
+		error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
+
+		if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
+			error = 0;
+		} else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
+			error = nspace_items[i].token;
+		} else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
+			if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
+				nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
+				continue;
+			} else {
+				error = ETIMEDOUT;
+			}
+		} else if (error == 0) {
+			// hmmm, why did we get woken up?
+			printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
+			       nspace_items[i].token);
+		} 
+
+		if (--nspace_items[i].refcount == 0) {
+			nspace_items[i].vp = NULL;     // clear this so that no one will match on it again
+			nspace_items[i].arg = NULL;
+			nspace_items[i].token = 0;     // clear this so that the handler will not find it anymore
+			nspace_items[i].flags = 0;     // this clears it for re-use
+		}
+		wakeup(&nspace_token_id);
+		keep_waiting = 0;
+	}
+
+	lck_mtx_unlock(&nspace_handler_lock);
+
+	return error;
+}
+
+
+int
+get_nspace_item_status(struct vnode *vp, int32_t *status)
+{
+	int i;
+
+	lck_mtx_lock(&nspace_handler_lock);
+	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+		if (nspace_items[i].vp == vp) {
+			break;
+		}
+	}
+
+	if (i >= MAX_NSPACE_ITEMS) {
+		lck_mtx_unlock(&nspace_handler_lock);
+		return ENOENT;
+	}
+
+	*status = nspace_items[i].flags;
+	lck_mtx_unlock(&nspace_handler_lock);
+	return 0;
+}
+	
+
+#if 0
+static int
+build_volfs_path(struct vnode *vp, char *path, int *len)
+{
+	struct vnode_attr va;
+	int ret;
+
+	VATTR_INIT(&va);
+	VATTR_WANTED(&va, va_fsid);
+	VATTR_WANTED(&va, va_fileid);
+
+	if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
+		*len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
+		ret = -1;
+	} else {
+		*len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
+		ret = 0;
+	}
+
+	return ret;
+}
+#endif
+
+//
+// Note: this function does NOT check permissions on all of the
+// parent directories leading to this vnode.  It should only be
+// called on behalf of a root process.  Otherwise a process may
+// get access to a file because the file itself is readable even
+// though its parent directories would prevent access.
+//
+static int
+vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+	int error, action;
+
+	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+		return error;
+	}
+
+#if CONFIG_MACF
+	error = mac_vnode_check_open(ctx, vp, fmode);
+	if (error)
+		return error;
+#endif
 
-	vnode_put(vp);
+	/* compute action to be authorized */
+	action = 0;
+	if (fmode & FREAD) {
+		action |= KAUTH_VNODE_READ_DATA;
+	}
+	if (fmode & (FWRITE | O_TRUNC)) {
+		/*
+		 * If we are writing, appending, and not truncating,
+		 * indicate that we are appending so that if the
+		 * UF_APPEND or SF_APPEND bits are set, we do not deny
+		 * the open.
+		 */
+		if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
+			action |= KAUTH_VNODE_APPEND_DATA;
+		} else {
+			action |= KAUTH_VNODE_WRITE_DATA;
+		}
+	}
 
-	/* Now copy out the stuff that needs copying out. That means the number of matches, the
-	   search state.  Everything was already put into he return buffer by the vop call. */
+	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
+		return error;
+		
 
-	if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
-		goto freeandexit;
+	//
+	// if the vnode is tagged VOPENEVT and the current process
+	// has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+	// flag to the open mode so that this open won't count against
+	// the vnode when carbon delete() does a vnode_isinuse() to see
+	// if a file is currently in use.  this allows spotlight
+	// importers to not interfere with carbon apps that depend on
+	// the no-delete-if-busy semantics of carbon delete().
+	//
+	if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+		fmode |= O_EVTONLY;
+	}
 
-    if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
-		goto freeandexit;
-	
-	error = fserror;
+	if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+		return error;
+	}
+	if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
+		VNOP_CLOSE(vp, fmode, ctx);
+		return error;
+	}
 
-freeandexit:
+	/* call out to allow 3rd party notification of open. 
+	 * Ignore result of kauth_authorize_fileop call.
+	 */
+	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
+			       (uintptr_t)vp, 0);
 
-	FREE(searchparams1,M_TEMP);
 
-	return(error);
+	return 0;
+}
 
+static int
+wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_type)
+{
+	int i, error=0, unblock=0;
+	task_t curtask;
+	
+	lck_mtx_lock(&nspace_handler_exclusion_lock);
+	if (nspace_handlers[nspace_type].handler_busy) {
+		lck_mtx_unlock(&nspace_handler_exclusion_lock);
+		return EBUSY;
+	}
+	nspace_handlers[nspace_type].handler_busy = 1;
+	lck_mtx_unlock(&nspace_handler_exclusion_lock);
+	
+	/* 
+	 * Any process that gets here will be one of the namespace handlers.
+	 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
+	 * as we can cause deadlocks to occur, because the namespace handler may prevent
+	 * VNOP_INACTIVE from proceeding.  Mark the current task as a P_DEPENDENCY_CAPABLE 
+	 * process.
+	 */
+	curtask = current_task();
+	bsd_set_dependency_capable (curtask);	
+	
+	lck_mtx_lock(&nspace_handler_lock);
+	if (nspace_handlers[nspace_type].handler_proc == NULL) {
+		nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
+		nspace_handlers[nspace_type].handler_proc = current_proc();
+	}
+	
+	while (error == 0) {
+		
+		for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+			if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+				if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+					continue;
+				}
+				break;
+			}
+		}
+		
+		if (i < MAX_NSPACE_ITEMS) {
+			nspace_items[i].flags  &= ~NSPACE_ITEM_NEW;
+			nspace_items[i].flags  |= NSPACE_ITEM_PROCESSING;
+			nspace_items[i].token  = ++nspace_token_id;
+			
+			if (nspace_items[i].vp) {
+				struct fileproc *fp;
+				int32_t indx, fmode;
+				struct proc *p = current_proc();
+				vfs_context_t ctx = vfs_context_current();
+				
+				fmode = nspace_open_flags_for_type(nspace_type);
+				
+				error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
+				if (error) {
+					unblock = 1;
+					break;
+				}
+				error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
+				if (error) {
+					unblock = 1;
+					vnode_put(nspace_items[i].vp);
+					break;
+				}
+				
+				if ((error = falloc(p, &fp, &indx, ctx))) {
+					vn_close(nspace_items[i].vp, fmode, ctx);
+					vnode_put(nspace_items[i].vp);
+					unblock = 1;
+					break;
+				}
+				
+				fp->f_fglob->fg_flag = fmode;
+				fp->f_fglob->fg_type = DTYPE_VNODE;
+				fp->f_fglob->fg_ops = &vnops;
+				fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
+				
+				proc_fdlock(p);
+				procfdtbl_releasefd(p, indx, NULL);
+				fp_drop(p, indx, fp, 1);
+				proc_fdunlock(p);
+				
+				error = copyout(&nspace_items[i].token, nhi->token, sizeof(uint32_t));
+				error = copyout(&nspace_items[i].op, nhi->flags, sizeof(uint64_t));
+				error = copyout(&indx, nhi->fdptr, sizeof(uint32_t));
+				if (nhi->infoptr) {
+					uio_t uio = (uio_t)nspace_items[i].arg;
+					uint64_t u_offset, u_length;
+					
+					if (uio) {
+						u_offset = uio_offset(uio);
+						u_length = uio_resid(uio);
+					} else {
+						u_offset = 0;
+						u_length = 0;
+					}						
+					error = copyout(&u_offset, nhi->infoptr, sizeof(uint64_t));
+					error = copyout(&u_length, nhi->infoptr+sizeof(uint64_t), sizeof(uint64_t));
+				}
+				if (error) {
+					vn_close(nspace_items[i].vp, fmode, ctx);
+					fp_free(p, indx, fp);
+					unblock = 1;
+				}
+				
+				vnode_put(nspace_items[i].vp);
+				
+				break;
+			} else {
+				printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
+				       i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
+			}
+			
+		} else {
+			error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
+			if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+				error = EINVAL;
+				break;
+			}
+			
+		}
+	}
+	
+	if (unblock) {
+		if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+			vnode_lock_spin(nspace_items[i].vp);
+			nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+			vnode_unlock(nspace_items[i].vp);
+		}
+		nspace_items[i].vp = NULL;
+		nspace_items[i].vid = 0;
+		nspace_items[i].flags = NSPACE_ITEM_DONE;
+		nspace_items[i].token = 0;
+		
+		wakeup((caddr_t)&(nspace_items[i].vp));
+	}
+	
+	if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
+		// just go through every snapshot event and unblock it immediately.
+		if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+			for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+				if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
+					if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
+						nspace_items[i].vp = NULL;
+						nspace_items[i].vid = 0;
+						nspace_items[i].flags = NSPACE_ITEM_DONE;
+						nspace_items[i].token = 0;
+						
+						wakeup((caddr_t)&(nspace_items[i].vp));					
+					}
+				}
+			}
+		}
+	}
+	
+	lck_mtx_unlock(&nspace_handler_lock);
+	
+	lck_mtx_lock(&nspace_handler_exclusion_lock);
+	nspace_handlers[nspace_type].handler_busy = 0;
+	lck_mtx_unlock(&nspace_handler_exclusion_lock);
+	
+	return error;
+}
 
-} /* end of searchfs system call */
 
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
+{
+	int error = 0;
+	namespace_handler_info_ext nhi;
+	
+	if (nspace_type == NSPACE_HANDLER_SNAPSHOT && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
+		return EINVAL;
+	}
+	
+	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+		return error;
+	}
+	
+	if (   (is64bit && size != sizeof(user64_namespace_handler_info) && size != sizeof(user64_namespace_handler_info_ext))
+	    || (is64bit == 0 && size != sizeof(user32_namespace_handler_info) && size != sizeof(user32_namespace_handler_info_ext))) {
+		
+		// either you're 64-bit and passed a 64-bit struct or
+		// you're 32-bit and passed a 32-bit struct.  otherwise
+		// it's not ok.
+		return EINVAL;
+	}
+	
+	if (is64bit) {
+		nhi.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
+		nhi.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
+		nhi.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
+		if (size == sizeof(user64_namespace_handler_info_ext)) {
+			nhi.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
+		} else {
+			nhi.infoptr = 0;
+		}
+	} else {
+		nhi.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
+		nhi.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
+		nhi.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
+		if (size == sizeof(user32_namespace_handler_info_ext)) {
+			nhi.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
+		} else {
+			nhi.infoptr = 0;
+		}
+	}
+	
+	return wait_for_namespace_event(&nhi, nspace_type);
+}
 
 /*
  * Make a filesystem-specific control call:
@@ -7272,7 +8329,7 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 	size = IOCPARM_LEN(cmd);
 	if (size > IOCPARM_MAX) return (EINVAL);
 
-    is64bit = proc_is64bit(p);
+	is64bit = proc_is64bit(p);
 
 	memp = NULL;
 	if (size > sizeof (stkbuf)) {
@@ -7287,12 +8344,12 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 			error = copyin(udata, data, size);
 			if (error) goto FSCtl_Exit;
 		} else {
-		    if (is64bit) {
-    			*(user_addr_t *)data = udata;
-		    }
-		    else {
-    			*(uint32_t *)data = (uint32_t)udata;
-		    }
+			if (is64bit) {
+				*(user_addr_t *)data = udata;
+			}
+			else {
+				*(uint32_t *)data = (uint32_t)udata;
+			}
 		};
 	} else if ((cmd & IOC_OUT) && size) {
 		/*
@@ -7302,10 +8359,10 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 		bzero(data, size);
 	} else if (cmd & IOC_VOID) {
 		if (is64bit) {
-		    *(user_addr_t *)data = udata;
+			*(user_addr_t *)data = udata;
 		}
 		else {
-		    *(uint32_t *)data = (uint32_t)udata;
+			*(uint32_t *)data = (uint32_t)udata;
 		}
 	}
 
@@ -7349,31 +8406,31 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 		*arg_vp = NULL;
 
 	} else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
-	    user_addr_t ext_strings;
-	    uint32_t    num_entries;
-	    uint32_t    max_width;
+		user_addr_t ext_strings;
+		uint32_t    num_entries;
+		uint32_t    max_width;
 	    
-	    if (   (is64bit && size != sizeof(user64_package_ext_info))
-		|| (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
+		if (   (is64bit && size != sizeof(user64_package_ext_info))
+		   || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
 
-		// either you're 64-bit and passed a 64-bit struct or
-		// you're 32-bit and passed a 32-bit struct.  otherwise
-		// it's not ok.
-		error = EINVAL;
-		goto FSCtl_Exit;
-	    }
+			// either you're 64-bit and passed a 64-bit struct or
+			// you're 32-bit and passed a 32-bit struct.  otherwise
+			// it's not ok.
+			error = EINVAL;
+			goto FSCtl_Exit;
+		}
 
-	    if (is64bit) {
-		ext_strings = ((user64_package_ext_info *)data)->strings;
-		num_entries = ((user64_package_ext_info *)data)->num_entries;
-		max_width   = ((user64_package_ext_info *)data)->max_width;
-	    } else {
-		ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
-		num_entries = ((user32_package_ext_info *)data)->num_entries;
-		max_width   = ((user32_package_ext_info *)data)->max_width;
-	    }
+		if (is64bit) {
+			ext_strings = ((user64_package_ext_info *)data)->strings;
+			num_entries = ((user64_package_ext_info *)data)->num_entries;
+			max_width   = ((user64_package_ext_info *)data)->max_width;
+		} else {
+			ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
+			num_entries = ((user32_package_ext_info *)data)->num_entries;
+			max_width   = ((user32_package_ext_info *)data)->max_width;
+		}
 	    
-	    error = set_package_extensions_table(ext_strings, num_entries, max_width);
+		error = set_package_extensions_table(ext_strings, num_entries, max_width);
 
 	} else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
 		error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
@@ -7384,6 +8441,192 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long
 			error *= -1;
 		}
 			
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
+		error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
+	} else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
+		error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
+	} else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
+		error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
+	} else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
+		error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
+		uint32_t token, val;
+		int i;
+
+		if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+
+		if (!nspace_is_special_process(p)) {
+			error = EINVAL;
+			goto FSCtl_Exit;
+		}
+
+		token = ((uint32_t *)data)[0];
+		val   = ((uint32_t *)data)[1];
+
+		lck_mtx_lock(&nspace_handler_lock);
+
+		for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+			if (nspace_items[i].token == token) {
+				break;
+			}
+		}
+
+		if (i >= MAX_NSPACE_ITEMS) {
+			error = ENOENT;
+		} else {
+			//
+			// if this bit is set, when resolve_nspace_item() times out
+			// it will loop and go back to sleep.
+			//
+			nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
+		}
+
+		lck_mtx_unlock(&nspace_handler_lock);
+
+		if (error) {
+			printf("nspace-handler-update: did not find token %u\n", token);
+		}
+
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
+		uint32_t token, val;
+		int i;
+
+		if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+
+		if (!nspace_is_special_process(p)) {
+			error = EINVAL;
+			goto FSCtl_Exit;
+		}
+
+		token = ((uint32_t *)data)[0];
+		val   = ((uint32_t *)data)[1];
+
+		lck_mtx_lock(&nspace_handler_lock);
+
+		for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+			if (nspace_items[i].token == token) {
+				break;
+			}
+		}
+
+		if (i >= MAX_NSPACE_ITEMS) {
+			printf("nspace-handler-unblock: did not find token %u\n", token);
+			error = ENOENT;
+		} else {
+			if (val == 0 && nspace_items[i].vp) {
+				vnode_lock_spin(nspace_items[i].vp);
+				nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+				vnode_unlock(nspace_items[i].vp);
+			}
+
+			nspace_items[i].vp = NULL;
+			nspace_items[i].arg = NULL;
+			nspace_items[i].op = 0;
+			nspace_items[i].vid = 0;
+			nspace_items[i].flags = NSPACE_ITEM_DONE;
+			nspace_items[i].token = 0;
+
+			wakeup((caddr_t)&(nspace_items[i].vp));
+		}
+
+		lck_mtx_unlock(&nspace_handler_lock);
+
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
+		uint32_t token, val;
+		int i;
+
+		if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+
+		if (!nspace_is_special_process(p)) {
+			error = EINVAL;
+			goto FSCtl_Exit;
+		}
+
+		token = ((uint32_t *)data)[0];
+		val   = ((uint32_t *)data)[1];
+
+		lck_mtx_lock(&nspace_handler_lock);
+
+		for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+			if (nspace_items[i].token == token) {
+				break;
+			}
+		}
+
+		if (i >= MAX_NSPACE_ITEMS) {
+			printf("nspace-handler-cancel: did not find token %u\n", token);
+			error = ENOENT;
+		} else {
+			if (nspace_items[i].vp) {
+				vnode_lock_spin(nspace_items[i].vp);
+				nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+				vnode_unlock(nspace_items[i].vp);
+			}
+
+			nspace_items[i].vp = NULL;			
+			nspace_items[i].arg = NULL;			
+			nspace_items[i].vid = 0;
+			nspace_items[i].token = val;
+			nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
+			nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;			
+
+			wakeup((caddr_t)&(nspace_items[i].vp));
+		}
+
+		lck_mtx_unlock(&nspace_handler_lock);
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
+		if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+
+		// we explicitly do not do the namespace_handler_proc check here
+
+		lck_mtx_lock(&nspace_handler_lock);
+		snapshot_timestamp = ((uint32_t *)data)[0];
+		wakeup(&nspace_item_idx);
+		lck_mtx_unlock(&nspace_handler_lock);
+		printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
+
+	} else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
+		if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+
+		lck_mtx_lock(&nspace_handler_lock);
+		nspace_allow_virtual_devs = ((uint32_t *)data)[0];
+		lck_mtx_unlock(&nspace_handler_lock);
+		printf("nspace-snapshot-handler will%s allow events on disk-images\n",
+		       nspace_allow_virtual_devs ? "" : " NOT");
+		error = 0;
+		
+	} else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
+		if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+			goto FSCtl_Exit;
+		}
+		if (vp->v_mount) {
+			mount_lock(vp->v_mount);
+			if (data[0] != 0) {
+				strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
+				vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
+				if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+					vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
+					vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
+				}
+			} else {
+				if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
+					vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
+				}
+				vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
+				vp->v_mount->fstypename_override[0] = '\0';
+			}
+			mount_unlock(vp->v_mount);
+		}
 	} else {
 		/* Invoke the filesystem-specific code */
 		error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
@@ -7418,8 +8661,8 @@ fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
 	/* Get the vnode for the file we are getting info on:  */
 	nameiflags = 0;
 	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
-	    uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
+	       UIO_USERSPACE, uap->path, ctx);
 	if ((error = namei(&nd))) goto done;
 	vp = nd.ni_vp;
 	nameidone(&nd);
@@ -7520,7 +8763,7 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
 		return (EINVAL);
 
 	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
 	if ((error = namei(&nd))) {
 		return (error);
 	}
@@ -7531,8 +8774,10 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
 		goto out;
 	}
 	if (xattr_protected(attrname)) {
-		error = EPERM;
-		goto out;
+		if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
+			error = EPERM;
+			goto out;
+		}
 	}
 	/*
 	 * the specific check for 0xffffffff is a hack to preserve
@@ -7558,10 +8803,10 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
 	if (uap->size == 0xffffffff || uap->size == (size_t)-1)
 		goto no_uio;
 
-	if (uap->size > (size_t)XATTR_MAXSIZE)
-		uap->size = XATTR_MAXSIZE;
-
 	if (uap->value) {
+		if (uap->size > (size_t)XATTR_MAXSIZE)
+			uap->size = XATTR_MAXSIZE;
+		
 		auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
 		                            &uio_buf[0], sizeof(uio_buf));
 		uio_addiov(auio, uap->value, uap->size);
@@ -7652,7 +8897,12 @@ setxattr(proc_t p, struct setxattr_args *uap, int *retval)
 		return (EINVAL);
 
 	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
-		return (error);
+		if (error == EPERM) {
+			/* if the string won't fit in attrname, copyinstr emits EPERM */
+			return (ENAMETOOLONG);
+		}
+		/* Otherwise return the default error from copyinstr to detect ERANGE, etc */
+		return error;
 	}
 	if (xattr_protected(attrname))
 		return(EPERM);
@@ -7661,7 +8911,7 @@ setxattr(proc_t p, struct setxattr_args *uap, int *retval)
 	}
 
 	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
 	if ((error = namei(&nd))) {
 		return (error);
 	}
@@ -7698,7 +8948,9 @@ fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
 	size_t namelen;
 	int error;
 	char uio_buf[ UIO_SIZEOF(1) ];
+#if CONFIG_FSE
 	vfs_context_t ctx = vfs_context_current();
+#endif
 
 	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
 		return (EINVAL);
@@ -7762,7 +9014,7 @@ removexattr(proc_t p, struct removexattr_args *uap, int *retval)
 	if (xattr_protected(attrname))
 		return(EPERM);
 	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
-	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
 	if ((error = namei(&nd))) {
 		return (error);
 	}
@@ -7793,7 +9045,9 @@ fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
 	char attrname[XATTR_MAXNAMELEN+1];
 	size_t namelen;
 	int error;
+#if CONFIG_FSE
 	vfs_context_t ctx = vfs_context_current();
+#endif
 
 	if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
 		return (EINVAL);
@@ -7847,15 +9101,15 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
 		return (EINVAL);
 
 	nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
-	NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
+	NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
 	if ((error = namei(&nd))) {
 		return (error);
 	}
 	vp = nd.ni_vp;
 	nameidone(&nd);
 	if (uap->namebuf != 0 && uap->bufsize > 0) {
-		auio = uio_createwithbuffer(1, 0, spacetype, 
-								  	  UIO_READ, &uio_buf[0], sizeof(uio_buf));
+		auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
+		                            &uio_buf[0], sizeof(uio_buf));
 		uio_addiov(auio, uap->namebuf, uap->bufsize);
 	}
 
@@ -7958,6 +9212,13 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
 	if (error) {
 		goto out;
 	}
+#if CONFIG_MACF
+	error = mac_vnode_check_fsgetpath(ctx, vp);
+	if (error) {
+		vnode_put(vp);
+		goto out;
+	}
+#endif
 	/* Obtain the absolute path to this vnode. */
 	bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
 	error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
@@ -8007,7 +9268,11 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 		sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
 		sfs.f_fsid = sfsp->f_fsid;
 		sfs.f_owner = sfsp->f_owner;
-		strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+		if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+			strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+		} else {
+			strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+		}
 		strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
 		strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
 
@@ -8080,7 +9345,11 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
 		sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
 		sfs.f_fsid = sfsp->f_fsid;
 		sfs.f_owner = sfsp->f_owner;
-		strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+		if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+			strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+		} else {
+			strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
+		}
 		strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
 		strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
 
diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c
index adf92df30..f785b0d8c 100644
--- a/bsd/vfs/vfs_utfconv.c
+++ b/bsd/vfs/vfs_utfconv.c
@@ -80,7 +80,7 @@
  * Similar to __CFUniCharIsNonBaseCharacter except that
  * unicode_combinable also includes Hangul Jamo characters.
  */
-inline int
+int
 unicode_combinable(u_int16_t character)
 {
 	const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
@@ -105,7 +105,7 @@ unicode_combinable(u_int16_t character)
  *
  * Similar to __CFUniCharIsDecomposableCharacter.
  */
-inline int
+int
 unicode_decomposeable(u_int16_t character) {
 	const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
 	u_int8_t value;
@@ -1024,7 +1024,7 @@ priortysort(u_int16_t* characters, int count)
 	u_int32_t p1, p2;
 	u_int16_t *ch1, *ch2;
 	u_int16_t *end;
-	int changes = 1;
+	int changes = 0;
 
 	end = characters + count;
 	do {
@@ -1035,13 +1035,22 @@ priortysort(u_int16_t* characters, int count)
 		while (ch2 < end) {
 			p1 = p2;
 			p2 = get_combining_class(*ch2);
-			if (p1 > p2) {
+			if (p1 > p2 && p2 != 0) {
 				u_int32_t tmp;
 
 				tmp = *ch1;
 				*ch1 = *ch2;
 				*ch2 = tmp;
 				changes = 1;
+				
+				/*
+				 * Make sure that p2 contains the combining class for the
+				 * character now stored at *ch2.  This isn't required for
+				 * correctness, but it will be more efficient if a character
+				 * with a large combining class has to "bubble past" several
+				 * characters with lower combining classes.
+				 */
+				p2 = p1;
 			}
 			++ch1;
 			++ch2;
diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c
index c7b110fd6..d7e2b5f14 100644
--- a/bsd/vfs/vfs_vnops.c
+++ b/bsd/vfs/vfs_vnops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -125,6 +125,7 @@ static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
 			vfs_context_t ctx);
 static void filt_vndetach(struct knote *kn);
 static int filt_vnode(struct knote *kn, long hint);
+static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
 #if 0
 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
 			vfs_context_t ctx);
@@ -163,6 +164,138 @@ vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
 	return(vn_open_auth(ndp, fmodep, &va));
 }
 
+static int
+vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+	int error;
+
+	if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
+		goto bad;
+	}
+
+	/* call out to allow 3rd party notification of open. 
+	 * Ignore result of kauth_authorize_fileop call.
+	 */
+	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
+						   (uintptr_t)vp, 0);
+
+	return 0;
+
+bad:
+	return error;
+
+}
+
+/*
+ * May do nameidone() to allow safely adding an FSEvent.  Cue off of ni_dvp to
+ * determine whether that has happened.  
+ */
+static int
+vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
+{
+	uint32_t status = 0;
+	vnode_t dvp = ndp->ni_dvp;
+	int batched;
+	int error;
+	vnode_t vp;
+
+	batched = vnode_compound_open_available(ndp->ni_dvp);
+	*did_open = FALSE;
+
+	VATTR_SET(vap, va_type, VREG);
+	if (fmode & O_EXCL)
+		vap->va_vaflags |= VA_EXCLUSIVE;
+
+#if NAMEDRSRCFORK
+	if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
+		if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 
+			goto out;
+		if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
+			goto out;
+		*did_create = TRUE;
+	} else {
+#endif
+		if (!batched) {
+			if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
+				goto out;
+		}
+
+		error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
+		if (error != 0) {
+			if (batched) {
+				*did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
+			} else {
+				*did_create = FALSE;
+			}
+
+			if (error == EKEEPLOOKING) {
+				if (*did_create) {
+					panic("EKEEPLOOKING, but we did a create?");
+				}
+				if (!batched) {
+					panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
+				}
+				if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+					panic("EKEEPLOOKING, but continue flag not set?");
+				}
+
+				/* 
+				 * Do NOT drop the dvp: we need everything to continue the lookup.
+				 */
+				return error;
+			}
+		} else {
+			if (batched) {
+				*did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
+				*did_open = TRUE;
+			} else {
+				*did_create = TRUE;
+			}
+		}
+#if NAMEDRSRCFORK
+	}
+#endif
+
+	/* 
+	* Unlock the fsnode (if locked) here so that we are free
+	* to drop the dvp iocount and prevent deadlock in build_path().
+	* nameidone() will still do the right thing later.
+	*/
+	vp = ndp->ni_vp;
+	namei_unlock_fsnode(ndp);
+
+	if (*did_create) {
+		int	update_flags = 0;
+
+		// Make sure the name & parent pointers are hooked up
+		if (vp->v_name == NULL)
+			update_flags |= VNODE_UPDATE_NAME;
+		if (vp->v_parent == NULLVP)
+			update_flags |= VNODE_UPDATE_PARENT;
+
+		if (update_flags)
+			vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
+
+		vnode_put(dvp);
+		ndp->ni_dvp = NULLVP;
+
+#if CONFIG_FSE
+		if (need_fsevent(FSE_CREATE_FILE, vp)) {
+			add_fsevent(FSE_CREATE_FILE, ctx,
+					FSE_ARG_VNODE, vp,
+					FSE_ARG_DONE);
+		}
+#endif
+	}
+out:
+	if (ndp->ni_dvp != NULLVP) {
+		vnode_put(dvp);
+		ndp->ni_dvp = NULLVP;
+	}
+
+	return error;
+}
+
 /*
  * Open a file with authorization, updating the contents of the structures
  * pointed to by ndp, fmodep, and vap as necessary to perform the requested
@@ -217,100 +350,85 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
 	int error;
 	int fmode;
 	uint32_t origcnflags;
-	kauth_action_t action;
+	boolean_t did_create;
+	boolean_t did_open;
+	boolean_t need_vnop_open;
+	boolean_t batched;
+	boolean_t ref_failed;
 
 again:
 	vp = NULL;
 	dvp = NULL;
+	batched = FALSE;
+	did_create = FALSE;
+	need_vnop_open = TRUE;
+	ref_failed = FALSE;
 	fmode = *fmodep;
 	origcnflags = ndp->ni_cnd.cn_flags;
+
+	/*
+	 * O_CREAT
+	 */
 	if (fmode & O_CREAT) {
 	        if ( (fmode & O_DIRECTORY) ) {
 		        error = EINVAL;
 			goto out;
 		}
 		ndp->ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+		ndp->ni_op = OP_LINK;
+#endif
 		/* Inherit USEDVP, vnode_open() supported flags only */
 		ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
 		ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
+		ndp->ni_flag = NAMEI_COMPOUNDOPEN;
 #if NAMEDRSRCFORK
 		/* open calls are allowed for resource forks. */
 		ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
 #endif
 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
 			ndp->ni_cnd.cn_flags |= FOLLOW;
+
+continue_create_lookup:
 		if ( (error = namei(ndp)) )
 			goto out;
+
 		dvp = ndp->ni_dvp;
 		vp = ndp->ni_vp;
 
- 		/* not found, create */
-		if (vp == NULL) {
- 			/* must have attributes for a new file */
- 			if (vap == NULL) {
- 				error = EINVAL;
-				goto badcreate;
- 			}
-
-			VATTR_SET(vap, va_type, VREG);
-#if CONFIG_MACF
-			error = mac_vnode_check_create(ctx,
-			    dvp, &ndp->ni_cnd, vap);
-			if (error)
-				goto badcreate;
-#endif /* MAC */
+		batched = vnode_compound_open_available(dvp);
 
-			/* authorize before creating */
- 			if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
-				goto badcreate;
+		/* not found, create */
+		if (vp == NULL) {
+			/* must have attributes for a new file */
+			if (vap == NULL) {
+				error = EINVAL;
+				goto out;
+			}
+			/*
+			 * Attempt a create.   For a system supporting compound VNOPs, we may
+			 * find an existing file or create one; in either case, we will already
+			 * have the file open and no VNOP_OPEN() will be needed.
+			 */
+			error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
 
-			if (fmode & O_EXCL)
-				vap->va_vaflags |= VA_EXCLUSIVE;
-#if NAMEDRSRCFORK
-			if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
-				if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
-					goto badcreate;
-			} else
-#endif
-			if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
-				goto badcreate;
-			
+			dvp = ndp->ni_dvp;
 			vp = ndp->ni_vp;
 
-			if (vp) {
-				int	update_flags = 0;
-
-			        // Make sure the name & parent pointers are hooked up
-			        if (vp->v_name == NULL)
-					update_flags |= VNODE_UPDATE_NAME;
-				if (vp->v_parent == NULLVP)
-				        update_flags |= VNODE_UPDATE_PARENT;
-
-				if (update_flags)
-				        vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
-
-#if CONFIG_FSE
-				if (need_fsevent(FSE_CREATE_FILE, vp)) {
-					vnode_put(dvp);
-					dvp = NULL;
-				        add_fsevent(FSE_CREATE_FILE, ctx,
-						    FSE_ARG_VNODE, vp,
-						    FSE_ARG_DONE);
+			/* 
+			 * Detected a node that the filesystem couldn't handle.  Don't call
+			 * nameidone() yet, because we need that path buffer.
+			 */
+			if (error == EKEEPLOOKING) {
+				if (!batched) {
+					panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
 				}
-#endif
-
+				goto continue_create_lookup;
 			}
-			/*
-			 * nameidone has to happen before we vnode_put(dvp)
-			 * and clear the ni_dvp field, since it may need
-			 * to release the fs_nodelock on the dvp
-			 */
-badcreate:
-			nameidone(ndp);
-			ndp->ni_dvp = NULL;
 
+			nameidone(ndp);
 			if (dvp) {
-				vnode_put(dvp);
+				panic("Shouldn't have a dvp here.");
 			}
 
 			if (error) {
@@ -318,129 +436,166 @@ badcreate:
 				 * Check for a creation or unlink race.
 				 */
 				if (((error == EEXIST) && !(fmode & O_EXCL)) ||
-					   ((error == ENOENT) && (fmode & O_CREAT))){
+						((error == ENOENT) && (fmode & O_CREAT))){
+					if (vp) 
+						vnode_put(vp);
 					goto again;
 				}
 				goto bad;
 			}
-			fmode &= ~O_TRUNC;
+
+			need_vnop_open = !did_open;
 		} else {
+			if (fmode & O_EXCL)
+				error = EEXIST;
+
+			/* 
+			 * We have a vnode.  Use compound open if available 
+			 * or else fall through to "traditional" path.  Note: can't
+			 * do a compound open for root, because the parent belongs
+			 * to a different FS.
+			 */
+			if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
+				error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
+
+				if (error == 0) {
+					vp = ndp->ni_vp;
+					need_vnop_open = FALSE;
+				} else if (error == EKEEPLOOKING) {
+					if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+						panic("EKEEPLOOKING, but continue flag not set?");
+					}
+					goto continue_create_lookup;
+				} 
+			}
 			nameidone(ndp);
-			ndp->ni_dvp = NULL;
 			vnode_put(dvp);
+			ndp->ni_dvp = NULLVP;
 
-			if (fmode & O_EXCL) {
-				error = EEXIST;
+			if (error) {
 				goto bad;
 			}
+
 			fmode &= ~O_CREAT;
+
+			/* Fall through */
 		}
 	} else {
+		/*
+		 * Not O_CREAT
+		 */
 		ndp->ni_cnd.cn_nameiop = LOOKUP;
 		/* Inherit USEDVP, vnode_open() supported flags only */
 		ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
-		ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1;
+		ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
 #if NAMEDRSRCFORK
 		/* open calls are allowed for resource forks. */
 		ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
 #endif
+		ndp->ni_flag = NAMEI_COMPOUNDOPEN;
+
 		/* preserve NOFOLLOW from vnode_open() */
 		if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
-		    ndp->ni_cnd.cn_flags &= ~FOLLOW;
+			ndp->ni_cnd.cn_flags &= ~FOLLOW;
 		}
 
-		if ( (error = namei(ndp)) )
-			goto out;
-		vp = ndp->ni_vp;
+		/* Do a lookup, possibly going directly to filesystem for compound operation */
+		do {
+			if ( (error = namei(ndp)) )
+				goto out;
+			vp = ndp->ni_vp;
+			dvp = ndp->ni_dvp;
+
+			/* Check for batched lookup-open */
+			batched = vnode_compound_open_available(dvp);
+			if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
+				error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
+				vp = ndp->ni_vp;
+				if (error == 0) {
+					need_vnop_open = FALSE;
+				} else if (error == EKEEPLOOKING) {
+					if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
+						panic("EKEEPLOOKING, but continue flag not set?");
+					}
+				}
+			}
+		} while (error == EKEEPLOOKING);
+
 		nameidone(ndp);
-		ndp->ni_dvp = NULL;
+		vnode_put(dvp);
+		ndp->ni_dvp = NULLVP;
 
-		if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
-		        error = ENOTDIR;
+		if (error) {
 			goto bad;
 		}
 	}
 
-	if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
-		error = EOPNOTSUPP;	/* Operation not supported on socket */
-		goto bad;
-	}
-
-	if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
-		error = ELOOP;	/* O_NOFOLLOW was specified and the target is a symbolic link */
-		goto bad;
+	/* 
+	 * By this point, nameidone() is called, dvp iocount is dropped,
+	 * and dvp pointer is cleared.
+	 */
+	if (ndp->ni_dvp != NULLVP) {
+		panic("Haven't cleaned up adequately in vn_open_auth()");
 	}
 
-	/* authorize open of an existing file */
-	if ((fmode & O_CREAT) == 0) {
-
-		/* disallow write operations on directories */
-		if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
-			error = EISDIR;
-			goto bad;
+	/*
+	 * Expect to use this code for filesystems without compound VNOPs, for the root 
+	 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
+	 * and for shadow files, which do not live on the same filesystems as their "parents."
+	 */
+	if (need_vnop_open) {
+		if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
+			panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
 		}
 
-#if CONFIG_MACF
-		error = mac_vnode_check_open(ctx, vp, fmode);
-		if (error)
-			goto bad;
-#endif
-
-		/* compute action to be authorized */
-		action = 0;
-		if (fmode & FREAD) {
-			action |= KAUTH_VNODE_READ_DATA;
-		}
-		if (fmode & (FWRITE | O_TRUNC)) {
-			/*
-			 * If we are writing, appending, and not truncating,
-			 * indicate that we are appending so that if the
-			 * UF_APPEND or SF_APPEND bits are set, we do not deny
-			 * the open.
-			 */
-			if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
-				action |= KAUTH_VNODE_APPEND_DATA;
-			} else {
-			action |= KAUTH_VNODE_WRITE_DATA;
+		if (!did_create) {
+			error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
+			if (error) {
+				goto bad;
 			}
 		}
-		if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
-			goto bad;
-		
 
-		//
-		// if the vnode is tagged VOPENEVT and the current process
-		// has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
-		// flag to the open mode so that this open won't count against
-		// the vnode when carbon delete() does a vnode_isinuse() to see
-		// if a file is currently in use.  this allows spotlight
-		// importers to not interfere with carbon apps that depend on
-		// the no-delete-if-busy semantics of carbon delete().
-		//
-		if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
-		    fmode |= O_EVTONLY;
+		error = VNOP_OPEN(vp, fmode, ctx);
+		if (error) {
+			goto bad;
 		}
+		need_vnop_open = FALSE;
+	}
 
+	// if the vnode is tagged VOPENEVT and the current process
+	// has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+	// flag to the open mode so that this open won't count against
+	// the vnode when carbon delete() does a vnode_isinuse() to see
+	// if a file is currently in use.  this allows spotlight
+	// importers to not interfere with carbon apps that depend on
+	// the no-delete-if-busy semantics of carbon delete().
+	//
+	if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+		fmode |= O_EVTONLY;
 	}
 
-	if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+	/*
+	 * Grab reference, etc.
+	 */
+	error = vn_open_auth_finish(vp, fmode, ctx);
+	if (error) {
+		ref_failed = TRUE;
 		goto bad;
 	}
-	if ( (error = vnode_ref_ext(vp, fmode)) ) {
-		goto bad2;
-	}
 
-	/* call out to allow 3rd party notification of open. 
-	 * Ignore result of kauth_authorize_fileop call.
-	 */
-	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 
-						   (uintptr_t)vp, 0);
+	/* Compound VNOP open is responsible for doing the truncate */
+	if (batched || did_create) 
+		fmode &= ~O_TRUNC;
 
 	*fmodep = fmode;
 	return (0);
-bad2:
-	VNOP_CLOSE(vp, fmode, ctx);
+
 bad:
+	/* Opened either explicitly or by a batched create */
+	if (!need_vnop_open) {
+		VNOP_CLOSE(vp, fmode, ctx);
+	}
+
 	ndp->ni_vp = NULL;
 	if (vp) {
 #if NAMEDRSRCFORK
@@ -459,10 +614,11 @@ bad:
 		 *
 		 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
 		 */
-		if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) {
+		if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
 			goto again;
 		}
 	}
+
 out:
 	return (error);
 }
@@ -502,16 +658,6 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
 {
 	int error;
 
-#if CONFIG_FSE
-	if (flags & FWASWRITTEN) {
-	        if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
-		        add_fsevent(FSE_CONTENT_MODIFIED, ctx,
-				    FSE_ARG_VNODE, vp,
-				    FSE_ARG_DONE);
-		}
-	}
-#endif
-
 #if NAMEDRSRCFORK
 	/* Sync data from resource fork shadow file if needed. */
 	if ((vp->v_flag & VISNAMEDSTREAM) && 
@@ -529,6 +675,16 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
 
 	error = VNOP_CLOSE(vp, flags, ctx);
 
+#if CONFIG_FSE
+	if (flags & FWASWRITTEN) {
+	        if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
+		        add_fsevent(FSE_CONTENT_MODIFIED, ctx,
+				    FSE_ARG_VNODE, vp,
+				    FSE_ARG_DONE);
+		}
+	}
+#endif
+
 	if (!vnode_isspec(vp))
 		(void)vnode_rele_ext(vp, flags, 0);
 	
@@ -782,6 +938,9 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
 		ioflag |= IO_NDELAY;
 	if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
 	        ioflag |= IO_NOCACHE;
+	if (fp->f_fglob->fg_flag & FNODIRECT)
+		ioflag |= IO_NODIRECT;
+
 	/*
 	 * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
 	 *
@@ -996,7 +1155,7 @@ vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat6
 		sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
 	}
 
-	/* if we're interested in exended security data and we got an ACL */
+	/* if we're interested in extended security data and we got an ACL */
 	if (xsec != NULL) {
 		if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
 		    !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
@@ -1147,7 +1306,10 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
 		error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
 
 		if (error == 0 && com == TIOCSCTTY) {
-			vnode_ref(vp);
+			error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
+			if (error != 0) {
+				panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
+			}
 
 			funnel_state = thread_funnel_set(kernel_flock, TRUE);
 			sessp = proc_session(vfs_context_proc(ctx));
@@ -1235,6 +1397,7 @@ int
 vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
 {
 	int	error = 0;
+	struct vfs_attr vfa;
 
 	switch(name) {
 	case _PC_EXTENDED_SECURITY_NP:
@@ -1273,6 +1436,33 @@ vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
 	case _PC_SYNC_IO:	/* unistd.h: _POSIX_SYNCHRONIZED_IO */
 		*retval = 0;	/* [SIO] option is not supported */
 		break;
+	case _PC_XATTR_SIZE_BITS:
+		/* The number of bits used to store maximum extended 
+		 * attribute size in bytes.  For example, if the maximum 
+		 * attribute size supported by a file system is 128K, the 
+		 * value returned will be 18.  However a value 18 can mean 
+		 * that the maximum attribute size can be anywhere from 
+		 * (256KB - 1) to 128KB.  As a special case, the resource 
+		 * fork can have much larger size, and some file system 
+		 * specific extended attributes can have smaller and preset 
+		 * size; for example, Finder Info is always 32 bytes.
+		 */
+		memset(&vfa, 0, sizeof(vfa));
+		VFSATTR_INIT(&vfa);
+		VFSATTR_WANTED(&vfa, f_capabilities);
+		if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
+		    (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && 
+		    (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && 
+		    (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
+			/* Supports native extended attributes */
+			error = VNOP_PATHCONF(vp, name, retval, ctx);
+		} else {
+			/* Number of bits used to represent the maximum size of 
+			 * extended attribute stored in an Apple Double file.
+			 */
+			*retval = AD_XATTR_SIZE_BITS;
+		}
+		break;
 	default:
 		error = VNOP_PATHCONF(vp, name, retval, ctx);
 		break;
@@ -1303,7 +1493,7 @@ vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
 					}
 
 				} else if (!vnode_isreg(vp)) {
-					if (vnode_isspec(vp) && 
+					if (vnode_ischr(vp) && 
 							(error = spec_kqfilter(vp, kn)) == 0) {
 						/* claimed by a special device */
 						vnode_put(vp);
@@ -1447,18 +1637,22 @@ vnode_writable_space_count(vnode_t vp)
 static int
 filt_vnode(struct knote *kn, long hint)
 {
-	struct vnode *vp = (struct vnode *)kn->kn_hook;
+	vnode_t vp = (struct vnode *)kn->kn_hook;
 	int activate = 0;
+	long orig_hint = hint;
 
 	if (0 == hint) {
-		if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) {
+		vnode_lock(vp);
+
+		if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
+			/* Is recycled */
 			hint = NOTE_REVOKE;
-		} else {
-			vnode_put(vp);
-		}
-	}    
+		} 
+	} else {
+		lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
+	}
 
-	/* NOTE_REVOKE is special, as it is only sent during vnode reclaim */
+	/* Special handling for vnodes that are in recycle or already gone */
 	if (NOTE_REVOKE == hint) {
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		activate = 1;
@@ -1496,5 +1690,15 @@ filt_vnode(struct knote *kn, long hint)
 		}
 	}
 
+	if (orig_hint == 0) {
+		/*
+		 * Definitely need to unlock, may need to put 
+		 */
+		if (hint == 0) {
+			vnode_put_locked(vp);
+		}
+		vnode_unlock(vp);
+	}
+
 	return (activate);
 }
diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c
index d15711685..a37ba0f74 100644
--- a/bsd/vfs/vfs_xattr.c
+++ b/bsd/vfs/vfs_xattr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -65,8 +65,6 @@
 #define MAKE_SHADOW_NAME(VP, NAME)  \
 	snprintf((NAME), sizeof((NAME)), ".vfs_rsrc_stream_%p%08x%p", (void*)(VP), (VP)->v_id, (VP)->v_data);
 
-static vnode_t shadow_dvp;  /* tmp directory to hold stream shadow files */
-static int shadow_vid;
 static int shadow_sequence;
 
 
@@ -556,7 +554,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context)
 		return (0);
 	}
 	datasize = va.va_data_size;
-	if ((datasize == 0)) {
+	if (datasize == 0) {
 		(void) default_removexattr(vp, XATTR_RESOURCEFORK_NAME, 0, context);
 		return (0);
 	}
@@ -623,9 +621,10 @@ getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize,
 	char tmpname[80];
 	size_t datasize = 0;
 	int  error = 0;
+	int retries = 0;
 
+retry_create:
 	*creator = 0;
-
 	/* Establish a unique file name. */
 	MAKE_SHADOW_NAME(vp, tmpname);
 	bzero(&cn, sizeof(cn));
@@ -705,9 +704,32 @@ getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize,
 	if (error == 0) {
 		vnode_recycle(svp);
 		*creator = 1;
-	} else if ((error == EEXIST) && !makestream) {
+	} 
+	else if ((error == EEXIST) && !makestream) {
 		error = VNOP_LOOKUP(dvp, &svp, &cn, context);
 	}
+	else if ((error == ENOENT) && !makestream) {
+		/*
+		 * We could have raced with a rmdir on the shadow directory
+		 * post-lookup.  Retry from the beginning, 1x only, to
+		 * try and see if we need to re-create the shadow directory	
+		 * in get_shadow_dir.
+		 */
+		if (retries == 0) {
+			retries++;
+			if (dvp) {
+				vnode_put (dvp);
+				dvp = NULLVP;
+			}
+			if (svp) {
+				vnode_put (svp);
+				svp = NULLVP;
+			}
+			goto retry_create;
+		}
+		/* Otherwise, just error out normally below */
+	}
+	
 out:
 	if (dvp) {
 		vnode_put(dvp);
@@ -936,15 +958,27 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context)
 	uint32_t  tmp_fsid;
 	int  error;
 
-	/* Check if we've already created it. */
-	if (shadow_dvp != NULLVP) {
-		if ((error = vnode_getwithvid(shadow_dvp, shadow_vid))) {
-			shadow_dvp = NULLVP;
-		} else {
-			*sdvpp = shadow_dvp;
-			return (0);
-		}
+
+	bzero(tmpname, sizeof(tmpname));
+	snprintf(tmpname, sizeof(tmpname), "/var/run/.vfs_rsrc_streams_%p%x",
+			(void*)rootvnode, shadow_sequence);
+	/* 
+	 * Look up the shadow directory to ensure that it still exists. 
+	 * By looking it up, we get an iocounted dvp to use, and avoid some coherency issues
+	 * in caching it when multiple threads may be trying to manipulate the pointers.
+	 */
+	error = vnode_lookup(tmpname, 0, &sdvp, context);
+	if (error == 0) {
+		/*
+		 * If we get here, then we have successfully looked up the shadow dir, 
+		 * and it has an iocount from the lookup. Return the vp in the output argument.
+		 */
+		*sdvpp = sdvp;
+		return (0);
 	}
+	/* In the failure case, no iocount is acquired */
+	sdvp = NULLVP;
+	bzero (tmpname, sizeof(tmpname));
 
 	/* Obtain the vnode for "/var/run" directory. */
 	if (vnode_lookup("/var/run", 0, &dvp, context) != 0) {
@@ -980,14 +1014,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context)
 	/*
 	 * There can be only one winner for an exclusive create.
 	 */
-	if (error == 0) {
-		/* Take a long term ref to keep this dir around. */
-		error = vnode_ref(sdvp);
-		if (error == 0) {
-			shadow_dvp = sdvp;
-			shadow_vid = sdvp->v_id;
-		}
-	} else if (error == EEXIST) {
+	if (error == EEXIST) {
 		/* loser has to look up directory */
 		error = VNOP_LOOKUP(dvp, &sdvp, &cn, context);
 		if (error == 0) {
@@ -995,7 +1022,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context)
 			if (sdvp->v_type != VDIR) {
 				goto baddir;
 			}
-			/* Obtain the fsid for /var/run directory */
+			/* Obtain the fsid for /tmp directory */
 			VATTR_INIT(&va);
 			VATTR_WANTED(&va, va_fsid);
 			if (VNOP_GETATTR(dvp, &va, context) != 0  ||
@@ -1156,7 +1183,7 @@ baddir:
 #define ATTR_BUF_SIZE      4096        /* default size of the attr file and how much we'll grow by */
 
 /* Implementation Limits */
-#define ATTR_MAX_SIZE      (128*1024)  /* 128K maximum attribute data size */
+#define ATTR_MAX_SIZE      AD_XATTR_MAXSIZE
 #define ATTR_MAX_HDR_SIZE  65536
 /*
  * Note: ATTR_MAX_HDR_SIZE is the largest attribute header
@@ -2347,12 +2374,15 @@ open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context)
 	 * file security from the EA must always get access
 	 */
 lookup:
-	NDINIT(&nd, LOOKUP, LOCKLEAF | NOFOLLOW | USEDVP | DONOTAUTH, UIO_SYSSPACE,
-	       CAST_USER_ADDR_T(filename), context);
+	NDINIT(&nd, LOOKUP, OP_OPEN, LOCKLEAF | NOFOLLOW | USEDVP | DONOTAUTH,
+	       UIO_SYSSPACE, CAST_USER_ADDR_T(filename), context);
    	nd.ni_dvp = dvp;
 
 	if (fileflags & O_CREAT) {
 		nd.ni_cnd.cn_nameiop = CREATE;
+#if CONFIG_TRIGGERS
+		nd.ni_op = OP_LINK;
+#endif
 		if (dvp != vp) {
 			nd.ni_cnd.cn_flags |= LOCKPARENT;
 		}
@@ -2394,8 +2424,9 @@ lookup:
 			if (gid != KAUTH_GID_NONE)
 				VATTR_SET(&va, va_gid, gid);
 
-			error = vn_create(dvp, &nd.ni_vp, &nd.ni_cnd, &va,
+			error = vn_create(dvp, &nd.ni_vp, &nd, &va,
 			                  VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT | VN_CREATE_NOLABEL,
+					  0, NULL,
 			                  context);
 			if (error)
 				error = ENOATTR;
@@ -2544,7 +2575,7 @@ remove_xattrfile(vnode_t xvp, vfs_context_t context)
 		return (error);
 	}
 
-	NDINIT(&nd, DELETE, LOCKPARENT | NOFOLLOW | DONOTAUTH,
+	NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | NOFOLLOW | DONOTAUTH,
 	       UIO_SYSSPACE, CAST_USER_ADDR_T(path), context);
 	error = namei(&nd);
 	FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
diff --git a/bsd/vfs/vnode_if.c b/bsd/vfs/vnode_if.c
index 1a77414e2..6dd63bfde 100644
--- a/bsd/vfs/vnode_if.c
+++ b/bsd/vfs/vnode_if.c
@@ -106,6 +106,24 @@ struct vnodeop_desc vnop_lookup_desc = {
 	NULL
 };
 
+int vnop_compound_open_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vnop_compound_open_args, a_dvp),
+	VDESC_NO_OFFSET
+};
+
+struct vnodeop_desc vnop_compound_open_desc = {
+	0,
+	"vnop_compound_open",
+	0 | VDESC_VP0_WILLRELE,
+	vnop_compound_open_vp_offsets, 
+	VOPARG_OFFSETOF(struct vnop_compound_open_args, a_vpp),
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VOPARG_OFFSETOF(struct vnop_compound_open_args, a_cnp),
+	VOPARG_OFFSETOF(struct vnop_compound_open_args, a_context),
+	NULL
+};
+
 int vnop_create_vp_offsets[] = {
 	VOPARG_OFFSETOF(struct vnop_create_args,a_dvp),
 	VDESC_NO_OFFSET
@@ -485,6 +503,23 @@ struct vnodeop_desc vnop_remove_desc = {
 	NULL
 };
 
+int vnop_remove_extended_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vnop_remove_args,a_dvp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vnop_compound_remove_desc = {
+	0,
+	"vnop_compound_remove",
+	0,
+	vnop_remove_vp_offsets,
+	VOPARG_OFFSETOF(struct vnop_compound_remove_args, a_vpp),
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VOPARG_OFFSETOF(struct vnop_remove_args, a_cnp),
+	VOPARG_OFFSETOF(struct vnop_remove_args, a_context),
+	NULL
+};
+
 int vnop_link_vp_offsets[] = {
 	VOPARG_OFFSETOF(struct vnop_link_args,a_vp),
 	VOPARG_OFFSETOF(struct vnop_link_args,a_tdvp),
@@ -523,6 +558,26 @@ struct vnodeop_desc vnop_rename_desc = {
 	NULL
 };
 
+int vnop_compound_rename_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args,a_fdvp),
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args,a_fvpp),
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args,a_tdvp),
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args,a_tvpp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vnop_compound_rename_desc = {
+	0,
+	"vnop_compound_rename",
+	0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLRELE | VDESC_VP3_WILLRELE,
+	vnop_compound_rename_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args, a_fcnp),
+	VOPARG_OFFSETOF(struct vnop_compound_rename_args, a_context),
+	NULL
+};
+
 int vnop_mkdir_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vnop_mkdir_args,a_dvp),
        VDESC_NO_OFFSET
@@ -540,6 +595,24 @@ struct vnodeop_desc vnop_mkdir_desc = {
        NULL
 };
 
+int vnop_compound_mkdir_vp_offsets[] = {
+       VOPARG_OFFSETOF(struct vnop_compound_mkdir_args,a_dvp),
+       VDESC_NO_OFFSET
+};
+struct vnodeop_desc vnop_compound_mkdir_desc = {
+       0,
+       "vnop_compound_mkdir",
+       0 | VDESC_VP0_WILLRELE,
+       vnop_compound_mkdir_vp_offsets,
+       VOPARG_OFFSETOF(struct vnop_compound_mkdir_args, a_vpp),
+       VDESC_NO_OFFSET,
+       VDESC_NO_OFFSET,
+       VOPARG_OFFSETOF(struct vnop_compound_mkdir_args, a_cnp),
+       VOPARG_OFFSETOF(struct vnop_compound_mkdir_args, a_context),
+       NULL
+};
+
+
 int vnop_rmdir_vp_offsets[] = {
 	VOPARG_OFFSETOF(struct vnop_rmdir_args,a_dvp),
 	VOPARG_OFFSETOF(struct vnop_rmdir_args,a_vp),
@@ -558,6 +631,23 @@ struct vnodeop_desc vnop_rmdir_desc = {
 	NULL
 };
 
+int vnop_compound_rmdir_vp_offsets[] = {
+	VOPARG_OFFSETOF(struct vnop_compound_rmdir_args,a_dvp),
+	VDESC_NO_OFFSET
+};
+struct vnodeop_desc vnop_compound_rmdir_desc = {
+	0,
+	"vnop_compound_rmdir",
+	0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE,
+	vnop_rmdir_vp_offsets,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VDESC_NO_OFFSET,
+	VOPARG_OFFSETOF(struct vnop_compound_rmdir_args, a_cnp),
+	VOPARG_OFFSETOF(struct vnop_compound_rmdir_args, a_context),
+	NULL
+};
+
 int vnop_symlink_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vnop_symlink_args,a_dvp),
        VDESC_NO_OFFSET
@@ -1004,6 +1094,7 @@ struct vnodeop_desc *vfs_op_descs[] = {
 	&vnop_mknod_desc,
 	&vnop_whiteout_desc,
 	&vnop_open_desc,
+	&vnop_compound_open_desc,
 	&vnop_close_desc,
 	&vnop_access_desc,
 	&vnop_getattr_desc,
@@ -1021,10 +1112,14 @@ struct vnodeop_desc *vfs_op_descs[] = {
 	&vnop_mnomap_desc,
 	&vnop_fsync_desc,
 	&vnop_remove_desc,
+	&vnop_compound_remove_desc,
 	&vnop_link_desc,
 	&vnop_rename_desc,
+	&vnop_compound_rename_desc,
 	&vnop_mkdir_desc,
+	&vnop_compound_mkdir_desc,
 	&vnop_rmdir_desc,
+	&vnop_compound_rmdir_desc,
 	&vnop_symlink_desc,
 	&vnop_readdir_desc,
 	&vnop_readdirattr_desc,
diff --git a/bsd/vm/Makefile b/bsd/vm/Makefile
index f0ce21745..608304077 100644
--- a/bsd/vm/Makefile
+++ b/bsd/vm/Makefile
@@ -10,14 +10,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 DATAFILES =  \
diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c
index 420238db9..bb2808ecf 100644
--- a/bsd/vm/dp_backing_file.c
+++ b/bsd/vm/dp_backing_file.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -42,8 +42,11 @@
 #include <sys/vnode_internal.h>
 #include <sys/namei.h>
 #include <sys/ubc_internal.h>
-#include <sys/mount_internal.h>
 #include <sys/malloc.h>
+#include <sys/user.h>
+#if CONFIG_PROTECT
+#include <sys/cprotect.h>
+#endif
 
 #include <default_pager/default_pager_types.h>
 #include <default_pager/default_pager_object.h>
@@ -245,7 +248,7 @@ macx_swapon(
 	/*
 	 * Get a vnode for the paging area.
 	 */
-	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 	       (user_addr_t) args->filename, ctx);
 
@@ -274,6 +277,18 @@ macx_swapon(
 	if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0))
 		goto swapon_bailout;
 
+#if CONFIG_PROTECT
+	{
+		void *cnode = NULL;
+		/* initialize content protection keys manually */
+		if ((cnode = cp_get_protected_cnode(vp)) != 0) {
+			if ((error = cp_handle_vnop(cnode, CP_WRITE_ACCESS)) != 0)
+				goto swapon_bailout;
+		}
+	}
+#endif
+
+
 	if (default_pager_init_flag == 0) {
 		start_def_pager(NULL);
 		default_pager_init_flag = 1;
@@ -306,21 +321,23 @@ macx_swapon(
 	   goto swapon_bailout;
 	}
 
-	if (vp->v_mount->mnt_kern_flag & MNTK_SSD) {
+#if CONFIG_EMBEDDED
+	dp_cluster_size = 1 * PAGE_SIZE;
+#else
+	if ((dp_isssd = vnode_pager_isSSD(vp)) == TRUE) {
 		/*
 		 * keep the cluster size small since the
 		 * seek cost is effectively 0 which means
 		 * we don't care much about fragmentation
 		 */
-		dp_isssd = TRUE;
 		dp_cluster_size = 2 * PAGE_SIZE;
 	} else {
 		/*
 		 * use the default cluster size
 		 */
-		dp_isssd = FALSE;
 		dp_cluster_size = 0;
 	}
+#endif
 	kr = default_pager_backing_store_create(default_pager, 
 					-1, /* default priority */
 					dp_cluster_size,
@@ -379,6 +396,12 @@ swapon_bailout:
 	}
 	(void) thread_funnel_set(kernel_flock, FALSE);
 	AUDIT_MACH_SYSCALL_EXIT(error);
+
+	if (error)
+		printf("macx_swapon FAILED - %d\n", error);
+	else
+		printf("macx_swapon SUCCESS\n");
+
 	return(error);
 }
 
@@ -402,6 +425,8 @@ macx_swapoff(
 	int			error;
 	boolean_t		funnel_state;
 	vfs_context_t ctx = vfs_context_current();
+	struct uthread	*ut;
+	int			orig_iopol_disk;
 
 	AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF);
 
@@ -415,7 +440,7 @@ macx_swapoff(
 	/*
 	 * Get the vnode for the paging area.
 	 */
-	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+	NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
 	       ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32),
 	       (user_addr_t) args->filename, ctx);
 
@@ -447,7 +472,24 @@ macx_swapoff(
 	}
 	backing_store = (mach_port_t)bs_port_table[i].bs;
 
+	ut = get_bsdthread_info(current_thread());
+
+#if !CONFIG_EMBEDDED
+	orig_iopol_disk = proc_get_thread_selfdiskacc();
+	proc_apply_thread_selfdiskacc(IOPOL_THROTTLE);
+#else /* !CONFIG_EMBEDDED */
+	orig_iopol_disk = ut->uu_iopol_disk;
+	ut->uu_iopol_disk = IOPOL_THROTTLE;
+#endif /* !CONFIG_EMBEDDED */
+
 	kr = default_pager_backing_store_delete(backing_store);
+
+#if !CONFIG_EMBEDDED
+	proc_apply_thread_selfdiskacc(orig_iopol_disk);
+#else /* !CONFIG_EMBEDDED */
+	ut->uu_iopol_disk = orig_iopol_disk;
+#endif /* !CONFIG_EMBEDDED */
+
 	switch (kr) {
 		case KERN_SUCCESS:
 			error = 0;
@@ -476,6 +518,12 @@ swapoff_bailout:
 
 	(void) thread_funnel_set(kernel_flock, FALSE);
 	AUDIT_MACH_SYSCALL_EXIT(error);
+
+	if (error)
+		printf("macx_swapoff FAILED - %d\n", error);
+	else
+		printf("macx_swapoff SUCCESS\n");
+
 	return(error);
 }
 
diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c
index 369c91350..0190e70f7 100644
--- a/bsd/vm/vm_unix.c
+++ b/bsd/vm/vm_unix.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,6 +44,7 @@
 #include <kern/thread.h>
 #include <kern/debug.h>
 #include <kern/lock.h>
+#include <kern/extmod_statistics.h>
 #include <mach/mach_traps.h>
 #include <mach/port.h>
 #include <mach/task.h>
@@ -74,8 +75,11 @@
 #include <sys/sysproto.h>
 #include <sys/mman.h>
 #include <sys/sysctl.h>
+#include <sys/cprotect.h>
+#include <sys/kpi_socket.h>
 
 #include <security/audit/audit.h>
+#include <security/mac.h>
 #include <bsm/audit_kevents.h>
 
 #include <kern/kalloc.h>
@@ -90,6 +94,18 @@
 
 #include <vm/vm_protos.h>
 
+#if CONFIG_FREEZE
+#include <sys/kern_memorystatus.h>
+#endif
+
+
+int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); 
+int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
+int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
+
+SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
+
+
 /*
  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  */
@@ -97,8 +113,8 @@
 #ifndef SECURE_KERNEL
 extern int allow_stack_exec, allow_data_exec;
 
-SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
 #endif /* !SECURE_KERNEL */
 
 static const char *prot_values[] = {
@@ -121,7 +137,7 @@ log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
 
 int shared_region_unnest_logging = 1;
 
-SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW,
+SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &shared_region_unnest_logging, 0, "");
 
 int vm_shared_region_unnest_log_interval = 10;
@@ -486,8 +502,8 @@ task_for_pid_posix_check(proc_t target)
 
 	/* Do target's ruid, euid, and saved uid match my euid? */
 	if ((kauth_cred_getuid(targetcred) != myuid) || 
-			(targetcred->cr_ruid != myuid) ||
-			(targetcred->cr_svuid != myuid)) {
+			(kauth_cred_getruid(targetcred) != myuid) ||
+			(kauth_cred_getsvuid(targetcred) != myuid)) {
 		allowed = FALSE;
 		goto out;
 	}
@@ -600,6 +616,8 @@ task_for_pid(
 
 		/* Grant task port access */
 		task_reference(p->task);
+		extmod_statistics_incr_task_for_pid(p->task);
+
 		sright = (void *) convert_task_to_port(p->task);
 		tret = ipc_port_copyout_send(
 				sright, 
@@ -664,7 +682,7 @@ task_name_for_pid(
 		    && ((current_proc() == p)
 			|| kauth_cred_issuser(kauth_cred_get()) 
 			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 
-			    ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
+			    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
 
 			if (p->task != TASK_NULL) {
 				task_reference(p->task);
@@ -714,21 +732,21 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 	int 	error = 0;
 
 #if CONFIG_MACF
-	error = mac_proc_check_suspend_resume(p, 0); /* 0 for suspend */
+	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
 	if (error) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 #endif
 
 	if (pid == 0) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 
 	targetproc = proc_find(pid);
 	if (!task_for_pid_posix_check(targetproc)) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 
@@ -744,7 +762,7 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 			(tfpport != IPC_PORT_NULL)) {
 
 			if (tfpport == IPC_PORT_DEAD) {
-				error = KERN_PROTECTION_FAILURE;
+				error = EACCES;
 				goto out;
 			}
 
@@ -753,9 +771,9 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 
 			if (error != MACH_MSG_SUCCESS) {
 				if (error == MACH_RCV_INTERRUPTED)
-					error = KERN_ABORTED;
+					error = EINTR;
 				else
-					error = KERN_FAILURE;
+					error = EPERM;
 				goto out;
 			}
 		}
@@ -764,8 +782,19 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
 
 	task_reference(target);
 	error = task_suspend(target);
+	if (error) {
+		if (error == KERN_INVALID_ARGUMENT) {
+			error = EINVAL;
+		} else {
+			error = EPERM;
+		}
+	}
 	task_deallocate(target);
 
+#if CONFIG_FREEZE
+	kern_hibernation_on_pid_suspend(pid);
+#endif
+
 out:
 	if (targetproc != PROC_NULL)
 		proc_rele(targetproc);
@@ -782,21 +811,21 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 	int 	error = 0;
 
 #if CONFIG_MACF
-	error = mac_proc_check_suspend_resume(p, 1); /* 1 for resume */
+	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
 	if (error) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 #endif
 
 	if (pid == 0) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 
 	targetproc = proc_find(pid);
 	if (!task_for_pid_posix_check(targetproc)) {
-		error = KERN_FAILURE;
+		error = EPERM;
 		goto out;
 	}
 
@@ -812,7 +841,7 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 			(tfpport != IPC_PORT_NULL)) {
 
 			if (tfpport == IPC_PORT_DEAD) {
-				error = KERN_PROTECTION_FAILURE;
+				error = EACCES;
 				goto out;
 			}
 
@@ -821,9 +850,9 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 
 			if (error != MACH_MSG_SUCCESS) {
 				if (error == MACH_RCV_INTERRUPTED)
-					error = KERN_ABORTED;
+					error = EINTR;
 				else
-					error = KERN_FAILURE;
+					error = EPERM;
 				goto out;
 			}
 		}
@@ -831,7 +860,19 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
 #endif
 
 	task_reference(target);
+
+#if CONFIG_FREEZE
+	kern_hibernation_on_pid_resume(pid, target);
+#endif
+
 	error = task_resume(target);
+	if (error) {
+		if (error == KERN_INVALID_ARGUMENT) {
+			error = EINVAL;
+		} else {
+			error = EPERM;
+		}
+	}
 	task_deallocate(target);
 
 out:
@@ -843,6 +884,118 @@ out:
 	return 0;
 }
 
+#if CONFIG_EMBEDDED
+kern_return_t
+pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
+{
+	int 	error = 0;
+	proc_t	targetproc = PROC_NULL;
+	int 	pid = args->pid;
+
+#ifndef CONFIG_FREEZE
+	#pragma unused(pid)
+#else
+
+#if CONFIG_MACF
+	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
+	if (error) {
+		error = EPERM;
+		goto out;
+	}
+#endif
+
+	/*
+	 * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
+	 * here - individual ids aren't required. However, it's intended that that this call is to change
+	 * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
+	 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
+	 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
+	 */
+	if (pid >= 0) {
+		targetproc = proc_find(pid);
+		if (!task_for_pid_posix_check(targetproc)) {
+			error = EPERM;
+			goto out;
+		}
+	}
+
+	if (pid == -1) {
+		kern_hibernation_on_pid_hibernate(pid);
+	} else {
+		error = EPERM;
+	}
+
+out:
+
+#endif /* CONFIG_FREEZE */
+
+	if (targetproc != PROC_NULL)
+		proc_rele(targetproc);
+	*ret = error;
+	return error;
+}
+
+int
+pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
+{
+	int 				error = 0;
+	proc_t				targetproc = PROC_NULL;
+	struct filedesc		*fdp;
+	struct fileproc		*fp;
+	int 				pid = args->pid;
+	int					level = args->level;
+	int					i;
+
+	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
+		level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
+	{
+		error = EINVAL;
+		goto out;
+	}
+
+#if CONFIG_MACF
+	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
+	if (error) {
+		error = EPERM;
+		goto out;
+	}
+#endif
+
+	targetproc = proc_find(pid);
+	if (!task_for_pid_posix_check(targetproc)) {
+		error = EPERM;
+		goto out;
+	}
+
+	proc_fdlock(targetproc);
+	fdp = targetproc->p_fd;
+
+	for (i = 0; i < fdp->fd_nfiles; i++) {
+		struct socket *sockp;
+
+		fp = fdp->fd_ofiles[i];
+		if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
+			fp->f_fglob->fg_type != DTYPE_SOCKET)
+		{
+			continue;
+		}
+
+		sockp = (struct socket *)fp->f_fglob->fg_data;
+
+		/* Call networking stack with socket and level */
+		(void) socket_defunct(targetproc, sockp, level);
+	}
+
+	proc_fdunlock(targetproc);
+
+out:
+	if (targetproc != PROC_NULL)
+		proc_rele(targetproc);
+	*ret = error;
+	return error;
+}
+#endif /* CONFIG_EMBEDDED */
+
 static int
 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
     __unused int arg2, struct sysctl_req *req)
@@ -876,17 +1029,17 @@ static int kern_secure_kernel = 1;
 static int kern_secure_kernel = 0;
 #endif
 
-SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
+SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
 
-SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
-SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
+SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 
-SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
+SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &shared_region_trace_level, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
+SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &shared_region_version, 0, "");
-SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
+SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &shared_region_persistence, 0, "");
 
 /*
@@ -968,6 +1121,31 @@ shared_region_check_np(
 	return error;
 }
 
+
+int
+shared_region_copyin_mappings(
+		struct proc			*p,
+		user_addr_t			user_mappings,
+		unsigned int			mappings_count,
+		struct shared_file_mapping_np	*mappings)
+{
+	int		error = 0;
+	vm_size_t	mappings_size = 0;
+
+	/* get the list of mappings the caller wants us to establish */
+	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
+	error = copyin(user_mappings,
+		       mappings,
+		       mappings_size);
+	if (error) {
+		SHARED_REGION_TRACE_ERROR(
+			("shared_region: %p [%d(%s)] map(): "
+			 "copyin(0x%llx, %d) failed (error=%d)\n",
+			 current_thread(), p->p_pid, p->p_comm,
+			 (uint64_t)user_mappings, mappings_count, error));
+	}
+	return error;
+}
 /*
  * shared_region_map_np()
  *
@@ -979,25 +1157,22 @@ shared_region_check_np(
  * requiring any further setup.
  */
 int
-shared_region_map_np(
+_shared_region_map(
 	struct proc				*p,
-	struct shared_region_map_np_args	*uap,
-	__unused int				*retvalp)
+	int					fd,
+	uint32_t				mappings_count,
+	struct shared_file_mapping_np		*mappings,
+	memory_object_control_t			*sr_file_control,
+	struct shared_file_mapping_np		*mapping_to_slide)
 {
 	int				error;
 	kern_return_t			kr;
-	int				fd;
 	struct fileproc			*fp;
 	struct vnode			*vp, *root_vp;
 	struct vnode_attr		va;
 	off_t				fs;
 	memory_object_size_t		file_size;
-	user_addr_t			user_mappings;
-	struct shared_file_mapping_np	*mappings;
-#define SFM_MAX_STACK	8
-	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
-	unsigned int			mappings_count;
-	vm_size_t			mappings_size;
+	vm_prot_t			maxprot = VM_PROT_ALL;
 	memory_object_control_t		file_control;
 	struct vm_shared_region		*shared_region;
 
@@ -1006,15 +1181,9 @@ shared_region_map_np(
 		 current_thread(), p->p_pid, p->p_comm));
 
 	shared_region = NULL;
-	mappings_count = 0;
-	mappings_size = 0;
-	mappings = NULL;
 	fp = NULL;
 	vp = NULL;
 
-	/* get file descriptor for shared region cache file */
-	fd = uap->fd;
-
 	/* get file structure from file descriptor */
 	error = fp_lookup(p, fd, &fp, 0);
 	if (error) {
@@ -1068,11 +1237,38 @@ shared_region_map_np(
 		goto done;
 	}
 
+#if CONFIG_MACF
+	error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
+			fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
+	if (error) {
+		goto done;
+	}
+#endif /* MAC */
+
+#if CONFIG_PROTECT
+	/* check for content protection access */
+	{
+	void *cnode;
+	if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
+		error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
+		if (error) 
+			goto done;
+	}
+	}
+#endif /* CONFIG_PROTECT */
+
 	/* make sure vnode is on the process's root volume */
 	root_vp = p->p_fd->fd_rdir;
 	if (root_vp == NULL) {
 		root_vp = rootvnode;
+	} else {
+		/*
+		 * Chroot-ed processes can't use the shared_region.
+		 */
+		error = EINVAL;
+		goto done;
 	}
+
 	if (vp->v_mount != root_vp->v_mount) {
 		SHARED_REGION_TRACE_ERROR(
 			("shared_region: %p [%d(%s)] map(%p:'%s'): "
@@ -1128,42 +1324,12 @@ shared_region_map_np(
 		error = EINVAL;
 		goto done;
 	}
-			 
-	/* get the list of mappings the caller wants us to establish */
-	mappings_count = uap->count;	/* number of mappings */
-	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
-	if (mappings_count == 0) {
-		SHARED_REGION_TRACE_INFO(
-			("shared_region: %p [%d(%s)] map(%p:'%s'): "
-			 "no mappings\n",
-			 current_thread(), p->p_pid, p->p_comm,
-			 vp, vp->v_name));
-		error = 0;	/* no mappings: we're done ! */
-		goto done;
-	} else if (mappings_count <= SFM_MAX_STACK) {
-		mappings = &stack_mappings[0];
-	} else {
-		SHARED_REGION_TRACE_ERROR(
-			("shared_region: %p [%d(%s)] map(%p:'%s'): "
-			 "too many mappings (%d)\n",
-			 current_thread(), p->p_pid, p->p_comm,
-			 vp, vp->v_name, mappings_count));
-		error = EINVAL;
-		goto done;
-	}
 
-	user_mappings = uap->mappings;	/* the mappings, in user space */
-	error = copyin(user_mappings,
-		       mappings,
-		       mappings_size);
-	if (error) {
-		SHARED_REGION_TRACE_ERROR(
-			("shared_region: %p [%d(%s)] map(%p:'%s'): "
-			 "copyin(0x%llx, %d) failed (error=%d)\n",
-			 current_thread(), p->p_pid, p->p_comm,
-			 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
-		goto done;
+	if (sr_file_control != NULL) {
+		*sr_file_control = file_control;
 	}
+			 
+
 
 	/* get the process's shared region (setup in vm_map_exec()) */
 	shared_region = vm_shared_region_get(current_task());
@@ -1182,7 +1348,8 @@ shared_region_map_np(
 				       mappings,
 				       file_control,
 				       file_size,
-				       (void *) p->p_fd->fd_rdir);
+				       (void *) p->p_fd->fd_rdir,
+				       mapping_to_slide);
 	if (kr != KERN_SUCCESS) {
 		SHARED_REGION_TRACE_ERROR(
 			("shared_region: %p [%d(%s)] map(%p:'%s'): "
@@ -1210,6 +1377,12 @@ shared_region_map_np(
 
 	error = 0;
 
+	vnode_lock_spin(vp);
+
+	vp->v_flag |= VSHARED_DYLD;
+
+	vnode_unlock(vp);
+
 	/* update the vnode's access time */
 	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
 		VATTR_INIT(&va);
@@ -1249,6 +1422,126 @@ done:
 	return error;
 }
 
+int
+_shared_region_slide(uint32_t slide,
+			mach_vm_offset_t	entry_start_address,
+			mach_vm_size_t		entry_size,
+			mach_vm_offset_t	slide_start,
+			mach_vm_size_t		slide_size,
+			memory_object_control_t	sr_file_control)
+{
+	void *slide_info_entry = NULL;
+	int			error;
+
+	if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
+		printf("slide_info initialization failed with kr=%d\n", error);
+		goto done;
+	}
+
+	slide_info_entry = vm_shared_region_get_slide_info_entry();
+	if (slide_info_entry == NULL){
+		error = EFAULT;
+	} else {	
+		error = copyin(slide_start,
+			       slide_info_entry,
+			       (vm_size_t)slide_size);
+	}
+	if (error) {
+		goto done;
+	}
+ 
+	if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
+ 		error = EFAULT; 
+ 		printf("Sanity Check failed for slide_info\n");
+ 	} else {
+#if DEBUG
+		printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
+ 				(void*)(uintptr_t)entry_start_address, 
+ 				(unsigned long)entry_size, 
+ 				(unsigned long)slide_size);
+#endif
+	}
+done:
+	return error;
+}
+
+int
+shared_region_map_and_slide_np(
+	struct proc				*p,
+	struct shared_region_map_and_slide_np_args	*uap,
+	__unused int					*retvalp)
+{
+	struct shared_file_mapping_np	mapping_to_slide;
+	struct shared_file_mapping_np	*mappings;
+	unsigned int mappings_count = uap->count;
+
+	memory_object_control_t		sr_file_control;
+	kern_return_t			kr = KERN_SUCCESS;
+	uint32_t			slide = uap->slide;
+	
+#define SFM_MAX_STACK	8
+	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
+
+	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
+		if (kr == KERN_INVALID_ARGUMENT) {
+			/*
+			 * This will happen if we request sliding again 
+			 * with the same slide value that was used earlier
+			 * for the very first sliding. We continue through
+			 * to the mapping layer. This is so that we can be
+			 * absolutely certain that the same mappings have
+			 * been requested.
+			 */
+			kr = KERN_SUCCESS;
+		} else {
+			goto done;
+		}
+	}
+
+	if (mappings_count == 0) {
+		SHARED_REGION_TRACE_INFO(
+			("shared_region: %p [%d(%s)] map(): "
+			 "no mappings\n",
+			 current_thread(), p->p_pid, p->p_comm));
+		kr = 0;	/* no mappings: we're done ! */
+		goto done;
+	} else if (mappings_count <= SFM_MAX_STACK) {
+		mappings = &stack_mappings[0];
+	} else {
+		SHARED_REGION_TRACE_ERROR(
+			("shared_region: %p [%d(%s)] map(): "
+			 "too many mappings (%d)\n",
+			 current_thread(), p->p_pid, p->p_comm,
+			 mappings_count));
+		kr = KERN_FAILURE;
+		goto done;
+	}
+
+	if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
+		goto done;
+	}
+
+
+	kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
+	if (kr != KERN_SUCCESS) {
+		return kr;
+	}
+
+	if (slide) {
+		kr = _shared_region_slide(slide, 
+				mapping_to_slide.sfm_file_offset, 
+				mapping_to_slide.sfm_size, 
+				uap->slide_start, 
+				uap->slide_size, 
+				sr_file_control);
+		if (kr  != KERN_SUCCESS) {
+			vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
+			return kr;
+		}
+	}
+done:
+	return kr;
+}
 
 /* sysctl overflow room */
 
@@ -1256,11 +1549,11 @@ done:
 	allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
 	reclaimed. It allows the app to calculate how much memory is free outside the free target. */
 extern unsigned int	vm_page_free_target;
-SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD, 
+SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 
 		   &vm_page_free_target, 0, "Pageout daemon free target");
 
 extern unsigned int	vm_memory_pressure;
-SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD,
+SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_memory_pressure, 0, "Memory pressure indicator");
 
 static int
@@ -1277,36 +1570,36 @@ SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
 	    0, 0, vm_ctl_page_free_wanted, "I", "");
 
 extern unsigned int	vm_page_purgeable_count;
-SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD,
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_purgeable_count, 0, "Purgeable page count");
 
 extern unsigned int	vm_page_purgeable_wired_count;
-SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD,
+SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
 
-SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD,
+SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
-SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reusable_pages_success, "");
-SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reusable_pages_failure, "");
-SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reusable_pages_shared, "");
-SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.all_reusable_calls, "");
-SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.partial_reusable_calls, "");
-SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reuse_pages_success, "");
-SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.reuse_pages_failure, "");
-SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.all_reuse_calls, "");
-SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.partial_reuse_calls, "");
-SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.can_reuse_success, "");
-SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD,
+SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 	   &vm_page_stats_reusable.can_reuse_failure, "");
 
 
diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c
index a15b6dcc4..d12a65652 100644
--- a/bsd/vm/vnode_pager.c
+++ b/bsd/vm/vnode_pager.c
@@ -52,6 +52,7 @@
 #include <sys/mount_internal.h>	/* needs internal due to fhandle_t */
 #include <sys/ubc_internal.h>
 #include <sys/lock.h>
+#include <sys/disk.h> 		/* For DKIOC calls */
 
 #include <mach/mach_types.h>
 #include <mach/memory_object_types.h>
@@ -81,6 +82,27 @@
 #include <vm/vm_protos.h>
 
 
+void
+vnode_pager_throttle()
+{
+	struct uthread *ut;
+
+	ut = get_bsdthread_info(current_thread());
+
+	if (ut->uu_lowpri_window)
+		throttle_lowpri_io(TRUE);
+}
+
+
+boolean_t
+vnode_pager_isSSD(vnode_t vp)
+{
+	if (vp->v_mount->mnt_kern_flag & MNTK_SSD)
+		return (TRUE);
+	return (FALSE);
+}
+
+
 uint32_t
 vnode_pager_isinuse(struct vnode *vp)
 {
@@ -137,6 +159,85 @@ vnode_pager_get_cs_blobs(
 	return KERN_SUCCESS;
 }
 
+/* 
+ * vnode_trim:
+ * Used to call the DKIOCUNMAP ioctl on the underlying disk device for the specified vnode.
+ * Trims the region at offset bytes into the file, for length bytes.
+ *
+ * Care must be taken to ensure that the vnode is sufficiently reference counted at the time this
+ * function is called; no iocounts or usecounts are taken on the vnode.
+ * This function is non-idempotent in error cases;  We cannot un-discard the blocks if only some of them
+ * are successfully discarded.
+ */
+u_int32_t vnode_trim (
+		struct vnode *vp,
+		off_t offset,
+		size_t length)
+{
+	daddr64_t io_blockno;	 /* Block number corresponding to the start of the extent */
+	size_t io_bytecount;	/* Number of bytes in current extent for the specified range */
+	size_t trimmed = 0;
+	off_t current_offset = offset; 
+	size_t remaining_length = length;
+	int error = 0;
+	u_int32_t blocksize = 0;
+	struct vnode *devvp;
+	dk_extent_t extent;
+	dk_unmap_t unmap;
+
+
+	/* Get the underlying device vnode */
+	devvp = vp->v_mount->mnt_devvp;
+
+	/* Figure out the underlying device block size */
+	error  = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blocksize, 0, vfs_context_kernel());
+	if (error) {
+		goto trim_exit;
+	}
+
+	/* 
+	 * We may not get the entire range from offset -> offset+length in a single
+	 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
+	 * the whole range or if we encounter an error.
+	 */
+	while (trimmed < length) {
+		/*
+		 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
+		 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is 
+		 * broken into multiple extents, it must be called multiple times, increasing the offset
+		 * in each call to ensure that the entire range is covered.
+		 */
+		error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 
+				&io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);
+
+		if (error) {
+			goto trim_exit;
+		}
+		/* 
+		 * We have a contiguous run.  Prepare & issue the ioctl for the device.
+		 * the DKIOCUNMAP ioctl takes offset in bytes from the start of the device.
+		 */
+		memset (&extent, 0, sizeof(dk_extent_t));
+		memset (&unmap, 0, sizeof(dk_unmap_t));
+		extent.offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
+		extent.length = io_bytecount;
+		unmap.extents = &extent;
+		unmap.extentsCount = 1;
+		error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
+
+		if (error) {
+			goto trim_exit;
+		}
+		remaining_length = remaining_length - io_bytecount;
+		trimmed = trimmed + io_bytecount;
+		current_offset = current_offset + io_bytecount;
+	}
+trim_exit:
+
+	return error;
+
+}
+
 pager_return_t
 vnode_pageout(struct vnode *vp,
 	upl_t			upl,
@@ -219,9 +320,7 @@ vnode_pageout(struct vnode *vp,
 		else
 			request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
 		
-	        ubc_create_upl(vp, f_offset, size, &upl, &pl, request_flags);
-
-		if (upl == (upl_t)NULL) {
+	        if (ubc_create_upl(vp, f_offset, size, &upl, &pl, request_flags) != KERN_SUCCESS) {
 			result    = PAGER_ERROR;
 			error_ret = EINVAL;
 			goto out;
@@ -555,14 +654,23 @@ vnode_pagein(
 					       xsize, flags, vfs_context_current())) ) {
 		        	/*
 				 * Usually this UPL will be aborted/committed by the lower cluster layer.
-				 * In the case of decmpfs, however, we may return an error (EAGAIN) to avoid
-				 * a deadlock with another thread already inflating the file. In that case,
-				 * we must take care of our UPL at this layer itself.
+				 *
+				 * a)	In the case of decmpfs, however, we may return an error (EAGAIN) to avoid
+				 *	a deadlock with another thread already inflating the file. 
+				 *
+				 * b)	In the case of content protection, EPERM is a valid error and we should respect it.
+				 *
+				 * In those cases, we must take care of our UPL at this layer itself.
 				 */
 				if (must_commit) {
 					if(error == EAGAIN) {
 			        		ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
 					}
+#if CONFIG_PROTECT
+					if(error == EPERM) {
+			        		ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
+					}
+#endif
 				}
 				result = PAGER_ERROR;
 				error  = PAGER_ERROR;
diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports
index f0322f1ac..ebb5af5db 100644
--- a/config/BSDKernel.exports
+++ b/config/BSDKernel.exports
@@ -33,6 +33,7 @@ _buf_bwrite
 _buf_callback
 _buf_clear
 _buf_clearflags
+_buf_clear_redundancy_flags
 _buf_clone
 _buf_count
 _buf_dataptr
@@ -55,6 +56,7 @@ _buf_iterate
 _buf_lblkno
 _buf_map
 _buf_markaged
+_buf_markclean
 _buf_markdelayed
 _buf_markeintr
 _buf_markfua
@@ -63,6 +65,7 @@ _buf_meta_bread
 _buf_meta_breadn
 _buf_proc
 _buf_rcred
+_buf_redundancy_flags
 _buf_reset
 _buf_resid
 _buf_setblkno
@@ -77,6 +80,7 @@ _buf_seterror
 _buf_setflags
 _buf_setfsprivate
 _buf_setlblkno
+_buf_set_redundancy_flags
 _buf_setresid
 _buf_setsize
 _buf_setupl
@@ -222,6 +226,8 @@ _ifnet_allocate
 _ifnet_attach
 _ifnet_attach_protocol
 _ifnet_baudrate
+_ifnet_capabilities_enabled
+_ifnet_capabilities_supported
 _ifnet_detach
 _ifnet_detach_protocol
 _ifnet_eflags
@@ -259,6 +265,8 @@ _ifnet_remove_multicast
 _ifnet_resolve_multicast:_dlil_resolve_multi
 _ifnet_set_addrlen
 _ifnet_set_baudrate
+_ifnet_set_capabilities_supported
+_ifnet_set_capabilities_enabled
 _ifnet_set_eflags
 _ifnet_set_flags
 _ifnet_set_hdrlen
@@ -305,6 +313,10 @@ _kauth_cred_get_with_ref
 _kauth_cred_getgid
 _kauth_cred_getguid
 _kauth_cred_getntsid
+_kauth_cred_getrgid
+_kauth_cred_getruid
+_kauth_cred_getsvgid
+_kauth_cred_getsvuid
 _kauth_cred_getuid
 _kauth_cred_gid2guid
 _kauth_cred_gid2ntsid
@@ -351,6 +363,7 @@ _mbuf_clear_csum_performed
 _mbuf_clear_csum_requested
 _mbuf_get_mlen
 _mbuf_get_mhlen
+_mbuf_get_minclsize
 _mbuf_clear_vlan_tag
 _mbuf_concatenate
 _mbuf_copy_pkthdr
@@ -368,6 +381,7 @@ _mbuf_freem_list
 _mbuf_get
 _mbuf_get_csum_performed
 _mbuf_get_csum_requested
+_mbuf_get_traffic_class
 _mbuf_get_tso_requested
 _mbuf_get_vlan_tag
 _mbuf_getcluster
@@ -388,6 +402,7 @@ _mbuf_pulldown
 _mbuf_pullup
 _mbuf_set_csum_performed
 _mbuf_set_csum_requested
+_mbuf_set_traffic_class
 _mbuf_set_vlan_tag
 _mbuf_setdata
 _mbuf_setflags
@@ -670,6 +685,7 @@ _vfs_setextendedsecurity
 _vfs_setflags
 _vfs_setfsprivate
 _vfs_setioattr
+_vfs_setlocklocal
 _vfs_setmaxsymlen
 _vfs_statfs
 _vfs_sysctl
diff --git a/config/BSDKernel.ppc.exports b/config/BSDKernel.ppc.exports
deleted file mode 100644
index 83559e0b0..000000000
--- a/config/BSDKernel.ppc.exports
+++ /dev/null
@@ -1,37 +0,0 @@
-_file_vnode
-_in6_cksum:_inet6_cksum
-_is_suser
-_is_suser1
-_mbuf_data
-_mbuf_inet6_cksum
-_mbuf_len
-_mbuf_next
-_mbuf_nextpkt
-_mbuf_pkthdr_header
-_mbuf_pkthdr_len
-_mbuf_pkthdr_rcvif
-_mbuf_pkthdr_setheader
-_mbuf_setlen
-_mbuf_setnextpkt
-_mbuf_type
-_nd6_lookup_ipv6
-_proc_ucred
-_rootvnode
-_spl0
-_splbio
-_splclock
-_splhigh
-_splimp
-_spllo
-_spln
-_sploff
-_splon
-_splpower
-_splsched
-_splsoftclock
-_spltty
-_splvm
-_splx
-_suser
-_ubc_setcred
-_ubc_sync_range
diff --git a/config/Dummy.exports b/config/Dummy.exports
new file mode 100644
index 000000000..fe7149c32
--- /dev/null
+++ b/config/Dummy.exports
@@ -0,0 +1 @@
+# Dummy exports, exists for stub architectures like PPC
diff --git a/config/IOKit.exports b/config/IOKit.exports
index 8f1cb8e73..2ad8e78c9 100644
--- a/config/IOKit.exports
+++ b/config/IOKit.exports
@@ -365,7 +365,6 @@ __ZN13IOCommandGate10runCommandEPvS0_S0_S0_
 __ZN13IOCommandGate10superClassE
 __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E
 __ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop
-__ZN13IOCommandGate12checkForWorkEv
 __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_
 __ZN13IOCommandGate13commandWakeupEPvb
 __ZN13IOCommandGate14attemptCommandEPvS0_S0_S0_
@@ -430,6 +429,7 @@ __ZN13IOEventSource23_RESERVEDIOEventSource4Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource5Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource6Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource7Ev
+__ZN13IOEventSource4freeEv
 __ZN13IOEventSource4initEP8OSObjectPFvS1_zE
 __ZN13IOEventSource6enableEv
 __ZN13IOEventSource7disableEv
@@ -440,6 +440,7 @@ __ZN13IOEventSource9MetaClassC2Ev
 __ZN13IOEventSource9closeGateEv
 __ZN13IOEventSource9metaClassE
 __ZN13IOEventSource9setActionEPFvP8OSObjectzE
+__ZN13IOEventSource12checkForWorkEv
 __ZN13IOEventSourceC1EPK11OSMetaClass
 __ZN13IOEventSourceC2EPK11OSMetaClass
 __ZN13IOEventSourceD0Ev
@@ -483,12 +484,12 @@ __ZN14IOPMrootDomain14tellChangeDownEm
 __ZN14IOPMrootDomain15powerChangeDoneEm
 __ZN14IOPMrootDomain16tellNoChangeDownEm
 __ZN14IOPMrootDomain17createPMAssertionEyjP9IOServicePKc
-__ZN14IOPMrootDomain17getSleepSupportedEv
-__ZN14IOPMrootDomain17setAggressivenessEmm
-__ZN14IOPMrootDomain18changePowerStateToEm
 __ZN14IOPMrootDomain18releasePMAssertionEy
 __ZN14IOPMrootDomain19getPMAssertionLevelEy
 __ZN14IOPMrootDomain19setPMAssertionLevelEyj
+__ZN14IOPMrootDomain17getSleepSupportedEv
+__ZN14IOPMrootDomain17setAggressivenessEmm
+__ZN14IOPMrootDomain18changePowerStateToEm
 __ZN14IOPMrootDomain22changePowerStateToPrivEm
 __ZN14IOPMrootDomain22removePublishedFeatureEj
 __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm
@@ -860,7 +861,6 @@ __ZN18IORegistryIteratorD2Ev
 __ZN18IOTimerEventSource10gMetaClassE
 __ZN18IOTimerEventSource10superClassE
 __ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop
-__ZN18IOTimerEventSource12checkForWorkEv
 __ZN18IOTimerEventSource13cancelTimeoutEv
 __ZN18IOTimerEventSource14setTimeoutFuncEv
 __ZN18IOTimerEventSource16timerEventSourceEP8OSObjectPFvS1_PS_E
diff --git a/config/IOKit.i386.exports b/config/IOKit.i386.exports
index 068770db6..d83bbdde6 100644
--- a/config/IOKit.i386.exports
+++ b/config/IOKit.i386.exports
@@ -280,7 +280,6 @@ __ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient
 __ZN9IOService13startMatchingEm
 __ZN9IOService13waitMatchIdleEm
 __ZN9IOService13willTerminateEPS_m
-__ZN9IOService14actionFinalizeEPS_m
 __ZN9IOService14doServiceMatchEm
 __ZN9IOService14messageClientsEmPvj
 __ZN9IOService14newTemperatureElPS_
@@ -299,13 +298,11 @@ __ZN9IOService16didYouWakeSystemEv
 __ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_
 __ZN9IOService16requestTerminateEPS_m
 __ZN9IOService16setCPUSnoopDelayEm
-__ZN9IOService18actionDidTerminateEPS_m
 __ZN9IOService18doServiceTerminateEm
 __ZN9IOService18matchPropertyTableEP12OSDictionaryPl
 __ZN9IOService18requireMaxBusStallEm
 __ZN9IOService18settleTimerExpiredEv
 __ZN9IOService18systemWillShutdownEm
-__ZN9IOService19actionWillTerminateEPS_mP7OSArray
 __ZN9IOService19deliverNotificationEPK8OSSymbolmm
 __ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator
 __ZN9IOService22PM_Clamp_Timer_ExpiredEv
diff --git a/config/IOKit.ppc.exports b/config/IOKit.ppc.exports
deleted file mode 100644
index 26b5a9209..000000000
--- a/config/IOKit.ppc.exports
+++ /dev/null
@@ -1,383 +0,0 @@
-_IOPanic
-_PE_parse_boot_arg
-__Z11IODBDMAStopPV23IODBDMAChannelRegisters
-__Z12IODBDMAFlushPV23IODBDMAChannelRegisters
-__Z12IODBDMAPausePV23IODBDMAChannelRegisters
-__Z12IODBDMAResetPV23IODBDMAChannelRegisters
-__Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor
-__Z15IODBDMAContinuePV23IODBDMAChannelRegisters
-__Z16IODTFindSlotNameP15IORegistryEntrym
-__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E
-__Z17IODTGetCellCountsP15IORegistryEntryPmS1_
-__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_
-__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc
-__ZN10AppleMacIO9metaClassE
-__ZN10IOWorkLoop19workLoopWithOptionsEm
-__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem
-__ZN10IOWorkLoop9sleepGateEPvm
-__ZN11IOCatalogue11findDriversEP12OSDictionaryPl
-__ZN11IOCatalogue11findDriversEP9IOServicePl
-__ZN11IODataQueue11withEntriesEmm
-__ZN11IODataQueue12withCapacityEm
-__ZN11IODataQueue15initWithEntriesEmm
-__ZN11IODataQueue16initWithCapacityEm
-__ZN11IODataQueue7enqueueEPvm
-__ZN11IOMemoryMap10getAddressEv
-__ZN11IOMemoryMap18getPhysicalSegmentEmPm
-__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory
-__ZN11IOMemoryMap7getSizeEv
-__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm
-__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy
-__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm
-__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm
-__ZN12IODMACommand11synchronizeEm
-__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm
-__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm
-__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm
-__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm
-__ZN12IODMACommand15genIOVMSegmentsEPyPvPm
-__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_
-__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_
-__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb
-__ZN12IODMACommand8transferEmyPvy
-__ZN12IOUserClient12initWithTaskEP4taskPvm
-__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary
-__ZN12IOUserClient15mapClientMemoryEmP4taskmj
-__ZN12IOUserClient15sendAsyncResultEPjiPPvm
-__ZN12IOUserClient17mapClientMemory64EmP4taskmy
-__ZN12IOUserClient17sendAsyncResult64EPyiS0_m
-__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor
-__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy
-__ZN12IOUserClient23getExternalTrapForIndexEm
-__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore
-__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem
-__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm
-__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy
-__ZN12IOUserClient25getExternalMethodForIndexEm
-__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem
-__ZN12IOUserClient30getExternalAsyncMethodForIndexEm
-__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem
-__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem
-__ZN13IOCommandGate12commandSleepEPvm
-__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm
-__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm
-__ZN13IOEventSource9sleepGateEPv12UnsignedWidem
-__ZN13IOEventSource9sleepGateEPvm
-__ZN13_IOServiceJob8startJobEP9IOServiceim
-__ZN14IOCommandQueue10gMetaClassE
-__ZN14IOCommandQueue10superClassE
-__ZN14IOCommandQueue12checkForWorkEv
-__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei
-__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_
-__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E
-__ZN14IOCommandQueue4freeEv
-__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei
-__ZN14IOCommandQueue9MetaClassC1Ev
-__ZN14IOCommandQueue9MetaClassC2Ev
-__ZN14IOCommandQueue9metaClassE
-__ZN14IOCommandQueueC1EPK11OSMetaClass
-__ZN14IOCommandQueueC1Ev
-__ZN14IOCommandQueueC2EPK11OSMetaClass
-__ZN14IOCommandQueueC2Ev
-__ZN14IOCommandQueueD0Ev
-__ZN14IOCommandQueueD2Ev
-__ZN14IODeviceMemory12withSubRangeEPS_mm
-__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm
-__ZN14IODeviceMemory9withRangeEmm
-__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm
-__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm
-__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm
-__ZN14IOPMrootDomain17setSleepSupportedEm
-__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j
-__ZN14IOPMrootDomain24receivePowerNotificationEm
-__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j
-__ZN15IODMAController13getControllerEP9IOServicem
-__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim
-__ZN15IODMAController20createControllerNameEm
-__ZN15IODMAController21registerDMAControllerEm
-__ZN16AppleMacIODevice9metaClassE
-__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m
-__ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm
-__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim
-__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m
-__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc
-__ZN16IORangeAllocator10deallocateEmm
-__ZN16IORangeAllocator12allocElementEm
-__ZN16IORangeAllocator13allocateRangeEmm
-__ZN16IORangeAllocator14deallocElementEm
-__ZN16IORangeAllocator28setFragmentCapacityIncrementEm
-__ZN16IORangeAllocator4initEmmmm
-__ZN16IORangeAllocator8allocateEmPmm
-__ZN16IORangeAllocator9withRangeEmmmm
-__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
-__ZN17IOBigMemoryCursor17withSpecificationEmmm
-__ZN17IOBigMemoryCursor21initWithSpecificationEmmm
-__ZN17IOSharedDataQueue11withEntriesEmm
-__ZN17IOSharedDataQueue12withCapacityEm
-__ZN17IOSharedDataQueue16initWithCapacityEm
-__ZN17IOSharedDataQueue7dequeueEPvPm
-__ZN18IOMemoryDescriptor10setMappingEP4taskjm
-__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb
-__ZN18IOMemoryDescriptor10writeBytesEmPKvm
-__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm
-__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection
-__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task
-__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper
-__ZN18IOMemoryDescriptor12setPurgeableEmPm
-__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection
-__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb
-__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection
-__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task
-__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper
-__ZN18IOMemoryDescriptor16getSourceSegmentEmPm
-__ZN18IOMemoryDescriptor16performOperationEmmm
-__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task
-__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm
-__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task
-__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm
-__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm
-__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb
-__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy
-__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection
-__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm
-__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb
-__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection
-__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev
-__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev
-__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev
-__ZN18IOMemoryDescriptor3mapEP4taskjmmm
-__ZN18IOMemoryDescriptor3mapEm
-__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm
-__ZN18IOMemoryDescriptor6setTagEm
-__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm
-__ZN18IOMemoryDescriptor9readBytesEmPvm
-__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem
-__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem
-__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide
-__ZN18IOTimerEventSource10setTimeoutE13mach_timespec
-__ZN18IOTimerEventSource10setTimeoutEmm
-__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide
-__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec
-__ZN18IOTimerEventSource10wakeAtTimeEmm
-__ZN18IOTimerEventSource12setTimeoutMSEm
-__ZN18IOTimerEventSource12setTimeoutUSEm
-__ZN18IOTimerEventSource12wakeAtTimeMSEm
-__ZN18IOTimerEventSource12wakeAtTimeUSEm
-__ZN18IOTimerEventSource15setTimeoutTicksEm
-__ZN18IOTimerEventSource15wakeAtTimeTicksEm
-__ZN19IODBDMAMemoryCursor10gMetaClassE
-__ZN19IODBDMAMemoryCursor10superClassE
-__ZN19IODBDMAMemoryCursor17withSpecificationEmmm
-__ZN19IODBDMAMemoryCursor21initWithSpecificationEmmm
-__ZN19IODBDMAMemoryCursor9MetaClassC1Ev
-__ZN19IODBDMAMemoryCursor9MetaClassC2Ev
-__ZN19IODBDMAMemoryCursor9metaClassE
-__ZN19IODBDMAMemoryCursorC1EPK11OSMetaClass
-__ZN19IODBDMAMemoryCursorC1Ev
-__ZN19IODBDMAMemoryCursorC2EPK11OSMetaClass
-__ZN19IODBDMAMemoryCursorC2Ev
-__ZN19IODBDMAMemoryCursorD0Ev
-__ZN19IODBDMAMemoryCursorD2Ev
-__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
-__ZN20IOLittleMemoryCursor17withSpecificationEmmm
-__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm
-__ZN20RootDomainUserClient15setPreventativeEmm
-__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem
-__ZN21IOInterruptController10initVectorElP17IOInterruptVector
-__ZN21IOInterruptController11causeVectorElP17IOInterruptVector
-__ZN21IOInterruptController12enableVectorElP17IOInterruptVector
-__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector
-__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector
-__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector
-__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
-__ZN21IONaturalMemoryCursor17withSpecificationEmmm
-__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm
-__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm
-__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection
-__ZN21IOSubMemoryDescriptor12setPurgeableEmPm
-__ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptormmm
-__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm
-__ZN21IOSubMemoryDescriptor7prepareE11IODirection
-__ZN21IOSubMemoryDescriptor8completeE11IODirection
-__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb
-__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm
-__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb
-__ZN23IOMultiMemoryDescriptor7prepareE11IODirection
-__ZN23IOMultiMemoryDescriptor8completeE11IODirection
-__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj
-__ZN24IOBufferMemoryDescriptor11withOptionsEmjj
-__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection
-__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb
-__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb
-__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj
-__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj
-__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task
-__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj
-__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy
-__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev
-__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev
-__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev
-__ZN24IOBufferMemoryDescriptor9setLengthEj
-__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb
-__ZN25IOGeneralMemoryDescriptor11setPositionEm
-__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection
-__ZN25IOGeneralMemoryDescriptor12setPurgeableEmPm
-__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj
-__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb
-__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection
-__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task
-__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper
-__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv
-__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm
-__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm
-__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm
-__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm
-__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm
-__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb
-__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection
-__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm
-__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm
-__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection
-__ZN25IOGeneralMemoryDescriptor8completeE11IODirection
-__ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection
-__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection
-__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm
-__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptormm
-__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsE11IODirection
-__ZN29IOInterleavedMemoryDescriptor7prepareE11IODirection
-__ZN29IOInterleavedMemoryDescriptor8completeE11IODirection
-__ZN8IOMapper10allocTableEm
-__ZN8IOMapper10iovmInsertEjmP13upl_page_infom
-__ZN8IOMapper10iovmInsertEjmPjm
-__ZN8IOMapper11NewARTTableEmPPvPj
-__ZN8IOMapper12FreeARTTableEP6OSDatam
-__ZN8IOPMprot10gMetaClassE
-__ZN8IOPMprot10superClassE
-__ZN8IOPMprot9MetaClassC1Ev
-__ZN8IOPMprot9MetaClassC2Ev
-__ZN8IOPMprot9metaClassE
-__ZN8IOPMprotC1EPK11OSMetaClass
-__ZN8IOPMprotC1Ev
-__ZN8IOPMprotC2EPK11OSMetaClass
-__ZN8IOPMprotC2Ev
-__ZN8IOPMprotD0Ev
-__ZN8IOPMprotD2Ev
-__ZN9IOService10adjustBusyEl
-__ZN9IOService10handleOpenEPS_mPv
-__ZN9IOService10systemWakeEv
-__ZN9IOService10youAreRootEv
-__ZN9IOService11_adjustBusyEl
-__ZN9IOService11handleCloseEPS_m
-__ZN9IOService11tellClientsEi
-__ZN9IOService12clampPowerOnEm
-__ZN9IOService12didTerminateEPS_mPb
-__ZN9IOService12requestProbeEm
-__ZN9IOService12waitForStateEmmP13mach_timespec
-__ZN9IOService13getPMworkloopEv
-__ZN9IOService13messageClientEmP8OSObjectPvj
-__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient
-__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient
-__ZN9IOService13startMatchingEm
-__ZN9IOService13waitMatchIdleEm
-__ZN9IOService13willTerminateEPS_m
-__ZN9IOService14actionFinalizeEPS_m
-__ZN9IOService14doServiceMatchEm
-__ZN9IOService14messageClientsEmPvj
-__ZN9IOService14newTemperatureElPS_
-__ZN9IOService14setPowerParentEP17IOPowerConnectionbm
-__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l
-__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j
-__ZN9IOService15registerServiceEm
-__ZN9IOService15tellChangeDown1Em
-__ZN9IOService15tellChangeDown2Em
-__ZN9IOService15terminateClientEPS_m
-__ZN9IOService15terminatePhase1Em
-__ZN9IOService15terminateWorkerEm
-__ZN9IOService16ack_timer_tickedEv
-__ZN9IOService16command_receivedEPvS0_S0_S0_
-__ZN9IOService16didYouWakeSystemEv
-__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_
-__ZN9IOService16requestTerminateEPS_m
-__ZN9IOService16setCPUSnoopDelayEm
-__ZN9IOService18actionDidTerminateEPS_m
-__ZN9IOService18doServiceTerminateEm
-__ZN9IOService18matchPropertyTableEP12OSDictionaryPl
-__ZN9IOService18requireMaxBusStallEm
-__ZN9IOService18settleTimerExpiredEv
-__ZN9IOService18systemWillShutdownEm
-__ZN9IOService19actionWillTerminateEPS_mP7OSArray
-__ZN9IOService19deliverNotificationEPK8OSSymbolmm
-__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator
-__ZN9IOService20_RESERVEDIOService48Ev
-__ZN9IOService20_RESERVEDIOService49Ev
-__ZN9IOService20_RESERVEDIOService50Ev
-__ZN9IOService20_RESERVEDIOService51Ev
-__ZN9IOService20_RESERVEDIOService52Ev
-__ZN9IOService20_RESERVEDIOService53Ev
-__ZN9IOService20_RESERVEDIOService54Ev
-__ZN9IOService20_RESERVEDIOService55Ev
-__ZN9IOService20_RESERVEDIOService56Ev
-__ZN9IOService20_RESERVEDIOService57Ev
-__ZN9IOService20_RESERVEDIOService58Ev
-__ZN9IOService20_RESERVEDIOService59Ev
-__ZN9IOService20_RESERVEDIOService60Ev
-__ZN9IOService20_RESERVEDIOService61Ev
-__ZN9IOService20_RESERVEDIOService62Ev
-__ZN9IOService20_RESERVEDIOService63Ev
-__ZN9IOService22PM_Clamp_Timer_ExpiredEv
-__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection
-__ZN9IOService23acknowledgeNotificationEPvm
-__ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_l
-__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection
-__ZN9IOService23scheduleTerminatePhase2Em
-__ZN9IOService23tellClientsWithResponseEi
-__ZN9IOService24PM_idle_timer_expirationEv
-__ZN9IOService24mapDeviceMemoryWithIndexEjm
-__ZN9IOService26temperatureCriticalForZoneEPS_
-__ZN9IOService27serializedAllowPowerChange2Em
-__ZN9IOService28serializedCancelPowerChange2Em
-__ZN9IOService4openEPS_mPv
-__ZN9IOService5closeEPS_m
-__ZN9IOService5probeEPS_Pl
-__ZN9IOService6PMfreeEv
-__ZN9IOService7messageEmPS_Pv
-__ZN9IOService8finalizeEm
-__ZN9IOService9terminateEm
-__ZNK11IOCatalogue13serializeDataEmP11OSSerialize
-__ZNK14IOCommandQueue12getMetaClassEv
-__ZNK14IOCommandQueue9MetaClass5allocEv
-__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem
-__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem
-__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem
-__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem
-__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem
-__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem
-__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj
-__ZNK19IODBDMAMemoryCursor12getMetaClassEv
-__ZNK19IODBDMAMemoryCursor9MetaClass5allocEv
-__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj
-__ZNK8IOPMprot12getMetaClassEv
-__ZNK8IOPMprot9MetaClass5allocEv
-__ZTV14IOCommandQueue
-__ZTV19IODBDMAMemoryCursor
-__ZTV8IOPMprot
-__ZTVN14IOCommandQueue9MetaClassE
-__ZTVN19IODBDMAMemoryCursor9MetaClassE
-__ZTVN8IOPMprot9MetaClassE
diff --git a/config/IOKit.x86_64.exports b/config/IOKit.x86_64.exports
index d3067b6e0..6f986aea6 100644
--- a/config/IOKit.x86_64.exports
+++ b/config/IOKit.x86_64.exports
@@ -226,7 +226,6 @@ __ZN9IOService13newUserClientEP4taskPvjPP12IOUserClient
 __ZN9IOService13startMatchingEj
 __ZN9IOService13waitMatchIdleEj
 __ZN9IOService13willTerminateEPS_j
-__ZN9IOService14actionFinalizeEPS_j
 __ZN9IOService14doServiceMatchEj
 __ZN9IOService14messageClientsEjPvm
 __ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_i
@@ -238,7 +237,6 @@ __ZN9IOService15terminateWorkerEj
 __ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_jPS_S3_mES3_S3_
 __ZN9IOService16requestTerminateEPS_j
 __ZN9IOService16setCPUSnoopDelayEj
-__ZN9IOService18actionDidTerminateEPS_j
 __ZN9IOService18doServiceTerminateEj
 __ZN9IOService18matchPropertyTableEP12OSDictionaryPi
 __ZN9IOService18requireMaxBusStallEj
@@ -249,7 +247,6 @@ __ZN9IOService19_RESERVEDIOService2Ev
 __ZN9IOService19_RESERVEDIOService3Ev
 __ZN9IOService19_RESERVEDIOService4Ev
 __ZN9IOService19_RESERVEDIOService5Ev
-__ZN9IOService19actionWillTerminateEPS_jP7OSArray
 __ZN9IOService19deliverNotificationEPK8OSSymboljj
 __ZN9IOService23acknowledgeNotificationEPvj
 __ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_i
diff --git a/config/Libkern.exports b/config/Libkern.exports
index 2e7ff44dd..4bd05a193 100644
--- a/config/Libkern.exports
+++ b/config/Libkern.exports
@@ -1,4 +1,4 @@
-___bzero:_bzero
+___bzero
 _Assert
 _MD5Final
 _MD5Init
@@ -744,6 +744,7 @@ _deflateSetDictionary
 _ffs
 _flush_dcache
 _flush_dcache64
+_gOSKextUnresolved
 _inet_ntop
 _inflate
 _inflateEnd
diff --git a/config/Libkern.i386.exports b/config/Libkern.i386.exports
index 31d172284..d1a97b9ee 100644
--- a/config/Libkern.i386.exports
+++ b/config/Libkern.i386.exports
@@ -1,4 +1,7 @@
 _lck_mtx_unlock_darwin10
+_lck_mtx_lock_spin
+_lck_mtx_try_lock_spin
+_lck_mtx_convert_spin
 _OSAddAtomic64
 _OSCompareAndSwap64
 _OSRuntimeFinalizeCPP
diff --git a/config/Libkern.ppc.exports b/config/Libkern.ppc.exports
deleted file mode 100644
index ebf87f219..000000000
--- a/config/Libkern.ppc.exports
+++ /dev/null
@@ -1,29 +0,0 @@
-_OSDequeueAtomic
-_OSEnqueueAtomic
-_OSRuntimeFinalizeCPP
-_OSRuntimeInitializeCPP
-_OSRuntimeUnloadCPP
-_OSRuntimeUnloadCPPForSegment
-__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_
-__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_
-__ZN8OSObject19_RESERVEDOSObject16Ev
-__ZN8OSObject19_RESERVEDOSObject17Ev
-__ZN8OSObject19_RESERVEDOSObject18Ev
-__ZN8OSObject19_RESERVEDOSObject19Ev
-__ZN8OSObject19_RESERVEDOSObject20Ev
-__ZN8OSObject19_RESERVEDOSObject21Ev
-__ZN8OSObject19_RESERVEDOSObject22Ev
-__ZN8OSObject19_RESERVEDOSObject23Ev
-__ZN8OSObject19_RESERVEDOSObject24Ev
-__ZN8OSObject19_RESERVEDOSObject25Ev
-__ZN8OSObject19_RESERVEDOSObject26Ev
-__ZN8OSObject19_RESERVEDOSObject27Ev
-__ZN8OSObject19_RESERVEDOSObject28Ev
-__ZN8OSObject19_RESERVEDOSObject29Ev
-__ZN8OSObject19_RESERVEDOSObject30Ev
-__ZN8OSObject19_RESERVEDOSObject31Ev
-_bcopy_nc
-_bzero_nc
-_sprintf
-_strcat
-_strcpy
diff --git a/config/Libkern.x86_64.exports b/config/Libkern.x86_64.exports
index 639d10368..c42f577d8 100644
--- a/config/Libkern.x86_64.exports
+++ b/config/Libkern.x86_64.exports
@@ -1,8 +1,10 @@
+_lck_mtx_lock_spin
+_lck_mtx_try_lock_spin
+_lck_mtx_convert_spin
 _OSAddAtomic64
 _OSCompareAndSwap64
 __ZN12OSOrderedSet12withCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_
 __ZN12OSOrderedSet16initWithCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_
-_gOSKextUnresolved
 _sprintf
 _strcat
 _strcpy
diff --git a/config/MACFramework.exports b/config/MACFramework.exports
index cba6d7dae..839eadc4f 100644
--- a/config/MACFramework.exports
+++ b/config/MACFramework.exports
@@ -8,6 +8,8 @@ _mac_label_set
 
 _mac_audit_text
 
+_mac_iokit_check_hid_control
+
 _sbuf_cat
 _sbuf_data
 _sbuf_delete
diff --git a/config/MACFramework.ppc.exports b/config/MACFramework.ppc.exports
deleted file mode 100644
index 6006136b4..000000000
--- a/config/MACFramework.ppc.exports
+++ /dev/null
@@ -1,9 +0,0 @@
-_kau_will_audit
-_mac_kalloc
-_mac_kalloc_noblock
-_mac_kfree
-_mac_mbuf_alloc
-_mac_mbuf_free
-_mac_unwire
-_mac_wire
-_sysctl__security_mac_children
diff --git a/config/Mach.ppc.exports b/config/Mach.ppc.exports
deleted file mode 100644
index cc31a814e..000000000
--- a/config/Mach.ppc.exports
+++ /dev/null
@@ -1 +0,0 @@
-_semaphore_timedwait
diff --git a/config/Makefile b/config/Makefile
index 9a00f1027..ff2d46ddb 100644
--- a/config/Makefile
+++ b/config/Makefile
@@ -1,5 +1,3 @@
-MAC = defined
-
 export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
 export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
@@ -12,31 +10,22 @@ include $(MakeInc_def)
 ALL_SUBDIRS = 
 
 INSTINC_SUBDIRS = 
-
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 = 
-
 INSTINC_SUBDIRS_X86_64 = 
-
 INSTINC_SUBDIRS_ARM = 
 
 EXPINC_SUBDIRS = 
-
-EXPINC_SUBDIRS_PPC =  
-
 EXPINC_SUBDIRS_I386 = 
-
 EXPINC_SUBDIRS_X86_64 = 
-
 EXPINC_SUBDIRS_ARM = 
 
+
 COMP_SUBDIRS = 
 
 INST_SUBDIRS =	
 
 
-INSTALL_DATA_LIST= \
+INSTALL_KEXT_PLIST_LIST= \
 	System.kext/Info.plist \
 	System.kext/PlugIns/Libkern.kext/Info.plist \
 	System.kext/PlugIns/Mach.kext/Info.plist \
@@ -48,18 +37,10 @@ INSTALL_DATA_LIST= \
 	System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \
 	System.kext/PlugIns/IOSystemManagement.kext/Info.plist \
 	System.kext/PlugIns/Unsupported.kext/Info.plist \
-	System.kext/PlugIns/Private.kext/Info.plist \
-	\
-	System.kext/PlugIns/System6.0.kext/Info.plist \
-	System.kext/PlugIns/Libkern6.0.kext/Info.plist \
-	System.kext/PlugIns/Mach6.0.kext/Info.plist \
-	System.kext/PlugIns/BSDKernel6.0.kext/Info.plist \
-	System.kext/PlugIns/IOKit6.0.kext/Info.plist \
+	System.kext/PlugIns/Private.kext/Info.plist
 
-INSTALL_DATA_DIR= \
-	/System/Library/Extensions/
+INSTALL_KEXT_DIR = /System/Library/Extensions/
 
-INSTMAN_SUBDIRS = 
 
 MD_SUPPORTED_KPI_FILENAME="SupportedKPIs-${ARCH_CONFIG_LC}.txt"
 MI_SUPPORTED_KPI_FILENAME="SupportedKPIs-all-archs.txt"
@@ -72,32 +53,39 @@ endif
 
 ifeq ($(ARCH_CONFIG),I386)
 SUPPORT_SYSTEM60_KEXT	= 1
-else ifeq ($(ARCH_CONFIG),ARM)
-SUPPORT_SYSTEM60_KEXT	= 1
 else
 SUPPORT_SYSTEM60_KEXT	= 0
 endif
 
+ifeq ($(SUPPORT_SYSTEM60_KEXT),1)
+INSTALL_KEXT_PLIST_LIST += \
+	System.kext/PlugIns/System6.0.kext/Info.plist \
+	System.kext/PlugIns/Libkern6.0.kext/Info.plist \
+	System.kext/PlugIns/Mach6.0.kext/Info.plist \
+	System.kext/PlugIns/BSDKernel6.0.kext/Info.plist \
+	System.kext/PlugIns/IOKit6.0.kext/Info.plist
+endif
+
 SYMBOL_COMPONENT_LIST =	\
         System6.0	\
         BSDKernel	\
         IOKit		\
-        Libkern	\
-        Mach	\
-	Unsupported	\
-	Private
-
-ifdef MAC
-SYMBOL_COMPONENT_LIST += MACFramework
-MACFRAMEWORKEXPORTS = \
-	-export $(SRCROOT)/$(COMPONENT)/MACFramework.exports \
-	-export $(SRCROOT)/$(COMPONENT)/MACFramework.$(ARCH_CONFIG_LC).exports
-endif
+        Libkern		\
+        Mach		\
+        MACFramework	\
+        Unsupported	\
+        Private
 
 SYMBOL_SET_BUILD = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJPATH)/$(set).symbolset)
 SYMBOL_SET_FAT = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJROOT)/$(set).symbolset)
 
-## .SUFFIXES: .symbolset .symbollist
+INSTALL_KEXT_PLISTS = $(addprefix $(DSTROOT)$(INSTALL_KEXT_DIR), $(INSTALL_KEXT_PLIST_LIST))
+
+$(INSTALL_KEXT_PLISTS): $(DSTROOT)$(INSTALL_KEXT_DIR)% : $(SOURCE)/%
+	@echo Install $< in $@
+	$(_v)$(MKDIR) $(dir $@);				\
+	$(RM) $(RMFLAGS) $@;					\
+	$(INSTALL) $(DATA_INSTALL_FLAGS) $< $(dir $@)
 
 $(OBJPATH)/allsymbols: $(OBJPATH)/mach_kernel
 	$(_v)$(NM) -gj $< > $@
@@ -143,36 +131,20 @@ $(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset :
 		printf "" > $@;						\
 	fi
 
-build_symbol_sets:	$(SYMBOL_SET_BUILD)
+build_symbol_sets:	$(SYMBOL_SET_BUILD) $(OBJPATH)/allsymbols
 	$(_v)$(KEXT_CREATE_SYMBOL_SET) \
 		$($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
+		$(foreach comp,$(filter-out System6.0 Private,$(SYMBOL_COMPONENT_LIST)), \
+			-export $(SRCROOT)/$(COMPONENT)/$(comp).exports \
+			-export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \
 		-import $(OBJPATH)/allsymbols \
-		-export $(SRCROOT)/$(COMPONENT)/Libkern.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Libkern.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/Mach.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Mach.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/IOKit.exports \
-		-export $(SRCROOT)/$(COMPONENT)/IOKit.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/BSDKernel.exports \
-		-export $(SRCROOT)/$(COMPONENT)/BSDKernel.$(ARCH_CONFIG_LC).exports \
-		$(MACFRAMEWORKEXPORTS) \
-		-export $(SRCROOT)/$(COMPONENT)/Unsupported.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Unsupported.$(ARCH_CONFIG_LC).exports \
 		-output /dev/null $(_vstdout);
 	$(_v)$(KEXT_CREATE_SYMBOL_SET) \
-		$($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \
+		$($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \
+		$(foreach comp,$(filter-out System6.0 Unsupported,$(SYMBOL_COMPONENT_LIST)), \
+			-export $(SRCROOT)/$(COMPONENT)/$(comp).exports \
+			-export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \
 		-import $(OBJPATH)/allsymbols \
-		-export $(SRCROOT)/$(COMPONENT)/Libkern.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Libkern.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/Mach.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Mach.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/IOKit.exports \
-		-export $(SRCROOT)/$(COMPONENT)/IOKit.$(ARCH_CONFIG_LC).exports \
-		-export $(SRCROOT)/$(COMPONENT)/BSDKernel.exports \
-		-export $(SRCROOT)/$(COMPONENT)/BSDKernel.$(ARCH_CONFIG_LC).exports \
-		$(MACFRAMEWORKEXPORTS) \
-		-export $(SRCROOT)/$(COMPONENT)/Private.exports \
-		-export $(SRCROOT)/$(COMPONENT)/Private.$(ARCH_CONFIG_LC).exports \
 		-output /dev/null $(_vstdout);
 	$(_v) $(SRCROOT)/$(COMPONENT)/list_supported.sh $(SRCROOT)/$(COMPONENT) $(ARCH_CONFIG_LC) $(OBJPATH)/${MD_SUPPORTED_KPI_FILENAME};
 	$(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i  $(ARCH_CONFIG)` ]; \
@@ -181,41 +153,42 @@ build_symbol_sets:	$(SYMBOL_SET_BUILD)
 	fi
 
 
-install_symbol_sets:	$(SYMBOL_SET_FAT) $(SRCROOT)/config/MasterVersion
-	$(_v)if [ -s "$(OBJROOT)/System6.0.symbolset" ]; then	\
-		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0;	\
-		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach6.0.kext/Mach6.0;	\
-		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel6.0.kext/BSDKernel6.0;	\
-		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern6.0.kext/Libkern6.0;	\
-		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit6.0.kext/IOKit6.0;	\
+install_symbol_sets:	$(SYMBOL_SET_FAT) $(SRCROOT)/config/MasterVersion $(INSTALL_KEXT_PLISTS)
+	$(_v)if [ -s "$(OBJROOT)/System6.0.symbolset" -a $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then	\
+		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0;	\
+		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach6.0.kext/Mach6.0;	\
+		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel6.0.kext/BSDKernel6.0;	\
+		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern6.0.kext/Libkern6.0;	\
+		install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit6.0.kext/IOKit6.0;	\
+	fi
+	$(_v)if [ -s "$(OBJROOT)/BSDKernel.symbolset" ]; then \
+		install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset    $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel;			\
+		install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset        $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit.kext/IOKit;				\
+		install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset      $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern.kext/Libkern;			\
+		install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset         $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach.kext/Mach;				\
+		install $(INSTALL_FLAGS) $(OBJROOT)/MACFramework.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/MACFramework.kext/MACFramework;		\
+		install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset  $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported;		\
+		install $(INSTALL_FLAGS) $(OBJROOT)/Private.symbolset      $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Private.kext/Private;			\
 	fi
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset    $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel;
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset        $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/IOKit;
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset      $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Libkern;
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset         $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Mach;
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset  $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported;
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Private.symbolset      $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Private.kext/Private;
-	$(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/AppleNMI.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOSystemManagement.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist \
-		$(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Private.kext/Info.plist;
+	$(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/AppleNMI.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOSystemManagement.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/MACFramework.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist \
+		$(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Private.kext/Info.plist;
 	$(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR);
 	$(_v)install $(INSTALL_FLAGS) $(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR);
 	$(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i  $(ARCH_CONFIG)` ]; then \
 		install $(INSTALL_FLAGS) $(OBJROOT)/$(MI_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR); \
 	fi
-ifdef MAC
-	$(_v)install $(INSTALL_FLAGS) $(OBJROOT)/MACFramework.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/MACFramework;
-	$(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/Info.plist
-endif
-	$(_v)$(CP) -rf $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext $(SYMROOT)
+	$(_v)$(MKDIR) $(SYMROOT)
+	$(_v)$(CP) -rf $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext $(SYMROOT)
 
 do_build_all:	build_symbol_sets
 
diff --git a/config/MasterVersion b/config/MasterVersion
index 237d3331d..8f5b9dd34 100644
--- a/config/MasterVersion
+++ b/config/MasterVersion
@@ -1,4 +1,4 @@
-10.8.0
+11.0.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
diff --git a/config/Private.exports b/config/Private.exports
index fb730cba2..299cabf8e 100644
--- a/config/Private.exports
+++ b/config/Private.exports
@@ -15,15 +15,21 @@ _bdevsw
 _boot
 _bsd_hostname
 _bsd_set_dependency_capable
+_buf_create_shadow
 _buf_getcpaddr
 _buf_setcpaddr
 _buf_setfilter
+_buf_shadow
 _cdevsw
+_cdevsw_setkqueueok
 _clalloc
 _clfree
 _cons_cinput
 _cp_key_store_action
 _cp_register_wraps
+_cs_entitlements_blob_get
+_ctl_id_by_name
+_ctl_name_by_id
 _fd_rdwr
 _get_aiotask
 _hz
@@ -38,7 +44,14 @@ _ip_mutex
 _ip_output
 _ip_protox
 _ipc_port_release_send
+_kauth_cred_getgroups
+_kauth_cred_guid2grnam
+_kauth_cred_guid2pwnam
+_kauth_cred_grnam2guid
+_kauth_cred_pwnam2guid
+_kdp_register_link
 _kdp_set_interface
+_kdp_unregister_link
 _kdp_unregister_send_receive
 _kmem_alloc_kobject
 _linesw
@@ -55,9 +68,7 @@ _m_pullup
 _m_split
 _m_trailingspace:_mbuf_trailingspace
 _mac_proc_set_enforce
-_mbuf_get_priority
-_mbuf_get_traffic_class
-_mbuf_set_traffic_class
+_mbuf_get_priority:_mbuf_get_traffic_class
 _mcl_to_paddr
 _mountroot_post_hook
 _net_add_domain
@@ -65,7 +76,7 @@ _net_add_proto
 _net_del_domain
 _net_del_proto
 _netboot_root
-_perf_monitor_register
+_perf_monitor_register_*
 _perf_monitor_unregister
 _pffinddomain
 _pffindproto
@@ -111,6 +122,7 @@ _q_to_b
 _register_decmpfs_decompressor
 _rootdev
 _rootvp
+_rtfree
 _sbappendaddr
 _sbappendrecord
 _sbflush
@@ -121,6 +133,7 @@ _socantsendmore
 _sock_getlistener
 _sock_release
 _sock_retain
+_sock_setupcall
 _sodisconnect
 _sofree
 _sofreelastref
@@ -137,13 +150,20 @@ _soreserve
 _sorwakeup
 _sosend
 _termioschars
-_thread_tid
+_thread_clear_eager_preempt
 _thread_dispatchqaddr
+_thread_set_eager_preempt
+_thread_tid
 _throttle_info_create
 _throttle_info_mount_ref
 _throttle_info_mount_rel
 _throttle_info_release
 _throttle_info_update
+_throttle_info_ref_by_mask
+_throttle_info_rel_by_mask
+_throttle_info_update_by_mask
+_throttle_lowpri_io
+_throttle_set_thread_io_policy
 _timeout
 _tk_nin
 _tk_rawcc
@@ -167,15 +187,26 @@ _unmountroot_pre_hook
 _unputc
 _unregister_decmpfs_decompressor
 _untimeout
+_vnode_isdyldsharedcache
 _vnode_ismonitored
 _vnode_notify
+_vnop_compound_open_desc
+_vnop_compound_mkdir_desc
+_vnop_compound_remove_desc
+_vnop_compound_rename_desc
+_vnop_compound_rmdir_desc
 _vnop_monitor_desc
 _vfs_context_bind
 _vfs_context_get_special_port
 _vfs_context_set_special_port
+_vfs_devvp
+_vfs_getattr
+_vfs_getbyid
 _vfs_get_notify_attributes
 _vfs_mntlabel
+_vfs_setcompoundopen
 _vfs_setunmountpreflight
+_vfs_throttle_mask
 _vfs_vnodecovered
 _vm_map_copy_copy
 _vm_map_copy_discard
@@ -184,5 +215,6 @@ _vm_map_copyin_common
 _vm_map_copyout
 _vn_getpath_fsenter
 _vn_searchfs_inappropriate_name
+_vnode_lookup_continue_needed
 _sock_settclassopt
 _sock_gettclassopt
diff --git a/config/Private.i386.exports b/config/Private.i386.exports
index 5ff0653e9..b6b05d103 100644
--- a/config/Private.i386.exports
+++ b/config/Private.i386.exports
@@ -1,14 +1,35 @@
+_IOGetBootKeyStoreData
+_SHA256_Final
+_SHA256_Init
+_SHA256_Update
+__ZN22IOInterruptEventSource7warmCPUEy
 _acpi_install_wake_handler
 _acpi_sleep_kernel
 _add_fsevent
 _apic_table
+_apply_func_phys
 _cpu_to_lapic
 _cpuid_features
 _cpuid_info
-_gOSKextUnresolved
 _lapic_end_of_interrupt
 _lapic_unmask_perfcnt_interrupt
 _mp_broadcast
 _mp_cpus_call
+_mp_cpus_call1
 _need_fsevent
+_pal_efi_call_in_32bit_mode
+_pal_efi_call_in_64bit_mode
+_pal_machine_sleep
 _smp_initialized
+_vfs_addtrigger
+_vfs_istraditionaltrigger
+_vfs_resolver_auxiliary
+_vfs_resolver_result
+_vfs_resolver_sequence
+_vfs_resolver_status
+_vfs_settriggercallback
+_vnode_trigger_update
+_xts_decrypt
+_xts_done
+_xts_encrypt
+_xts_start
diff --git a/config/Private.ppc.exports b/config/Private.ppc.exports
deleted file mode 100644
index 0f0b58c19..000000000
--- a/config/Private.ppc.exports
+++ /dev/null
@@ -1,2 +0,0 @@
-_add_fsevent
-_need_fsevent
diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports
index 9748fcbe7..a19ab484b 100644
--- a/config/Private.x86_64.exports
+++ b/config/Private.x86_64.exports
@@ -1,7 +1,13 @@
+_IOGetBootKeyStoreData
+_SHA256_Final
+_SHA256_Init
+_SHA256_Update
+__ZN22IOInterruptEventSource7warmCPUEy
 _acpi_install_wake_handler
 _acpi_sleep_kernel
 _add_fsevent
 _apic_table
+_apply_func_phys
 _cpu_to_lapic
 _cpuid_features
 _cpuid_info
@@ -9,7 +15,23 @@ _lapic_end_of_interrupt
 _lapic_unmask_perfcnt_interrupt
 _mp_broadcast
 _mp_cpus_call
+_mp_cpus_call1
 _need_fsevent
+_pal_efi_call_in_32bit_mode
+_pal_efi_call_in_64bit_mode
 _semaphore_timedwait
 _smp_initialized
 _kext_get_vm_map
+_pal_machine_sleep
+_vfs_addtrigger
+_vfs_istraditionaltrigger
+_vfs_resolver_auxiliary
+_vfs_resolver_result
+_vfs_resolver_sequence
+_vfs_resolver_status
+_vfs_settriggercallback
+_vnode_trigger_update
+_xts_decrypt
+_xts_done
+_xts_encrypt
+_xts_start
diff --git a/config/System6.0.exports b/config/System6.0.exports
index 75146568c..c3d167834 100644
--- a/config/System6.0.exports
+++ b/config/System6.0.exports
@@ -620,7 +620,6 @@ __ZN13IOCommandGate10runCommandEPvS0_S0_S0_
 __ZN13IOCommandGate10superClassE
 __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E
 __ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop
-__ZN13IOCommandGate12checkForWorkEv
 __ZN13IOCommandGate12commandSleepEPv12UnsignedWidem
 __ZN13IOCommandGate12commandSleepEPvm
 __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_
@@ -689,6 +688,7 @@ __ZN13IOEventSource23_RESERVEDIOEventSource4Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource5Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource6Ev
 __ZN13IOEventSource23_RESERVEDIOEventSource7Ev
+__ZN13IOEventSource4freeEv
 __ZN13IOEventSource4initEP8OSObjectPFvS1_zE
 __ZN13IOEventSource6enableEv
 __ZN13IOEventSource7disableEv
@@ -699,6 +699,7 @@ __ZN13IOEventSource9MetaClassC2Ev
 __ZN13IOEventSource9closeGateEv
 __ZN13IOEventSource9metaClassE
 __ZN13IOEventSource9setActionEPFvP8OSObjectzE
+__ZN13IOEventSource12checkForWorkEv
 __ZN13IOEventSource9sleepGateEPv12UnsignedWidem
 __ZN13IOEventSource9sleepGateEPvm
 __ZN13IOEventSourceC1EPK11OSMetaClass
@@ -1241,7 +1242,6 @@ __ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide
 __ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec
 __ZN18IOTimerEventSource10wakeAtTimeEmm
 __ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop
-__ZN18IOTimerEventSource12checkForWorkEv
 __ZN18IOTimerEventSource12setTimeoutMSEm
 __ZN18IOTimerEventSource12setTimeoutUSEm
 __ZN18IOTimerEventSource12wakeAtTimeMSEm
@@ -2816,7 +2816,6 @@ __start
 _absolutetime_to_nanoseconds
 _acknowledgeSleepWakeNotification
 _appleClut8
-_argstrcpy
 _assert_wait
 _assert_wait_timeout
 _atoi
@@ -2952,7 +2951,6 @@ _get_inpcb_str_size
 _get_kernel_symfile
 _get_procrustime
 _get_task_map
-_getval
 _invalidate_icache
 _invalidate_icache64
 _iokit_add_reference
@@ -2970,7 +2968,6 @@ _iokit_version_variant:_version_variant
 _ipc_port_release_send
 _is_suser
 _is_suser1
-_isargsep
 _kOSBooleanFalse
 _kOSBooleanTrue
 _kalloc
@@ -3150,7 +3147,6 @@ _thread_call_is_delayed
 _thread_cancel_timer
 _thread_deallocate
 _thread_flavor_array
-_thread_funnel_set
 _thread_policy_set
 _thread_reference
 _thread_set_timer
diff --git a/config/System6.0.i386.exports b/config/System6.0.i386.exports
index 5cb3b501c..f3955791d 100644
--- a/config/System6.0.i386.exports
+++ b/config/System6.0.i386.exports
@@ -18,12 +18,12 @@ _lapic_end_of_interrupt
 _ml_get_max_cpus
 _mp_broadcast
 _mp_cpus_call 
+_mp_cpus_call1
 _mp_rendezvous_no_intrs
-_mtrr_range_add
-_mtrr_range_remove
 _rtc_clock_stepped
 _rtc_clock_stepping
 _smp_initialized
 _sprintf
 _strcat
 _strcpy
+_thread_funnel_set
diff --git a/config/System6.0.ppc.exports b/config/System6.0.ppc.exports
deleted file mode 100644
index 6b9d3ed8c..000000000
--- a/config/System6.0.ppc.exports
+++ /dev/null
@@ -1,256 +0,0 @@
-_CallTVector
-_OSDequeueAtomic
-_OSEnqueueAtomic
-_PE_Determine_Clock_Speeds
-_PE_find_scc
-_PE_init_taproot
-_PE_parse_boot_arg
-_PE_read_write_time_of_day
-_PE_write_IIC
-_PPCcalls
-_ResetHandler
-__Z11IODBDMAStopPV23IODBDMAChannelRegisters
-__Z12IODBDMAFlushPV23IODBDMAChannelRegisters
-__Z12IODBDMAPausePV23IODBDMAChannelRegisters
-__Z12IODBDMAResetPV23IODBDMAChannelRegisters
-__Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor
-__Z15IODBDMAContinuePV23IODBDMAChannelRegisters
-__Z32IOFreePhysicallyContiguousMemoryPjj
-__Z36IOAllocatePhysicallyContiguousMemoryjjPjPm
-__ZN10AppleMacIO10deleteListEv
-__ZN10AppleMacIO10gMetaClassE
-__ZN10AppleMacIO10processNubEP9IOService
-__ZN10AppleMacIO10superClassE
-__ZN10AppleMacIO11excludeListEv
-__ZN10AppleMacIO12publishBelowEP15IORegistryEntry
-__ZN10AppleMacIO15getNubResourcesEP9IOService
-__ZN10AppleMacIO20_RESERVEDAppleMacIO0Ev
-__ZN10AppleMacIO20_RESERVEDAppleMacIO1Ev
-__ZN10AppleMacIO20_RESERVEDAppleMacIO2Ev
-__ZN10AppleMacIO20_RESERVEDAppleMacIO3Ev
-__ZN10AppleMacIO5startEP9IOService
-__ZN10AppleMacIO8selfTestEv
-__ZN10AppleMacIO9MetaClassC1Ev
-__ZN10AppleMacIO9MetaClassC2Ev
-__ZN10AppleMacIO9createNubEP15IORegistryEntry
-__ZN10AppleMacIO9metaClassE
-__ZN10AppleMacIOC1EPK11OSMetaClass
-__ZN10AppleMacIOC2EPK11OSMetaClass
-__ZN10AppleMacIOD0Ev
-__ZN10AppleMacIOD2Ev
-__ZN10AppleNVRAM10gMetaClassE
-__ZN10AppleNVRAM10superClassE
-__ZN10AppleNVRAM4readEmPhm
-__ZN10AppleNVRAM5startEP9IOService
-__ZN10AppleNVRAM5writeEmPhm
-__ZN10AppleNVRAM9MetaClassC1Ev
-__ZN10AppleNVRAM9MetaClassC2Ev
-__ZN10AppleNVRAM9metaClassE
-__ZN10AppleNVRAMC1EPK11OSMetaClass
-__ZN10AppleNVRAMC1Ev
-__ZN10AppleNVRAMC2EPK11OSMetaClass
-__ZN10AppleNVRAMC2Ev
-__ZN10AppleNVRAMD0Ev
-__ZN10AppleNVRAMD2Ev
-__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory
-__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm
-__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy
-__ZN16AppleMacIODevice10gMetaClassE
-__ZN16AppleMacIODevice10superClassE
-__ZN16AppleMacIODevice12getResourcesEv
-__ZN16AppleMacIODevice13matchLocationEP9IOService
-__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice0Ev
-__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice1Ev
-__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice2Ev
-__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice3Ev
-__ZN16AppleMacIODevice9MetaClassC1Ev
-__ZN16AppleMacIODevice9MetaClassC2Ev
-__ZN16AppleMacIODevice9metaClassE
-__ZN16AppleMacIODeviceC1EPK11OSMetaClass
-__ZN16AppleMacIODeviceC1Ev
-__ZN16AppleMacIODeviceC2EPK11OSMetaClass
-__ZN16AppleMacIODeviceC2Ev
-__ZN16AppleMacIODeviceD0Ev
-__ZN16AppleMacIODeviceD2Ev
-__ZN17IONVRAMController10gMetaClassE
-__ZN17IONVRAMController10superClassE
-__ZN17IONVRAMController4syncEv
-__ZN17IONVRAMController5startEP9IOService
-__ZN17IONVRAMController9MetaClassC1Ev
-__ZN17IONVRAMController9MetaClassC2Ev
-__ZN17IONVRAMController9metaClassE
-__ZN17IONVRAMControllerC1EPK11OSMetaClass
-__ZN17IONVRAMControllerC2EPK11OSMetaClass
-__ZN17IONVRAMControllerD0Ev
-__ZN17IONVRAMControllerD2Ev
-__ZN19ApplePlatformExpert10deleteListEv
-__ZN19ApplePlatformExpert10gMetaClassE
-__ZN19ApplePlatformExpert10superClassE
-__ZN19ApplePlatformExpert11excludeListEv
-__ZN19ApplePlatformExpert14getMachineNameEPci
-__ZN19ApplePlatformExpert15getGMTTimeOfDayEv
-__ZN19ApplePlatformExpert15setGMTTimeOfDayEl
-__ZN19ApplePlatformExpert23registerNVRAMControllerEP17IONVRAMController
-__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert0Ev
-__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert1Ev
-__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert2Ev
-__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert3Ev
-__ZN19ApplePlatformExpert5startEP9IOService
-__ZN19ApplePlatformExpert9MetaClassC1Ev
-__ZN19ApplePlatformExpert9MetaClassC2Ev
-__ZN19ApplePlatformExpert9configureEP9IOService
-__ZN19ApplePlatformExpert9metaClassE
-__ZN19ApplePlatformExpertC1EPK11OSMetaClass
-__ZN19ApplePlatformExpertC2EPK11OSMetaClass
-__ZN19ApplePlatformExpertD0Ev
-__ZN19ApplePlatformExpertD2Ev
-__ZN19IODBDMAMemoryCursor10gMetaClassE
-__ZN19IODBDMAMemoryCursor10superClassE
-__ZN19IODBDMAMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
-__ZN19IODBDMAMemoryCursor17withSpecificationEmmm
-__ZN19IODBDMAMemoryCursor21initWithSpecificationEmmm
-__ZN19IODBDMAMemoryCursor9MetaClassC1Ev
-__ZN19IODBDMAMemoryCursor9MetaClassC2Ev
-__ZN19IODBDMAMemoryCursor9metaClassE
-__ZN19IODBDMAMemoryCursorC1EPK11OSMetaClass
-__ZN19IODBDMAMemoryCursorC1Ev
-__ZN19IODBDMAMemoryCursorC2EPK11OSMetaClass
-__ZN19IODBDMAMemoryCursorC2Ev
-__ZN19IODBDMAMemoryCursorD0Ev
-__ZN19IODBDMAMemoryCursorD2Ev
-__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy
-__ZN8AppleCPU10gMetaClassE
-__ZN8AppleCPU10getCPUNameEv
-__ZN8AppleCPU10quiesceCPUEv
-__ZN8AppleCPU10superClassE
-__ZN8AppleCPU5startEP9IOService
-__ZN8AppleCPU7haltCPUEv
-__ZN8AppleCPU7initCPUEb
-__ZN8AppleCPU8startCPUEjj
-__ZN8AppleCPU9MetaClassC1Ev
-__ZN8AppleCPU9MetaClassC2Ev
-__ZN8AppleCPU9metaClassE
-__ZN8AppleCPUC1EPK11OSMetaClass
-__ZN8AppleCPUC1Ev
-__ZN8AppleCPUC2EPK11OSMetaClass
-__ZN8AppleCPUC2Ev
-__ZN8AppleCPUD0Ev
-__ZN8AppleCPUD2Ev
-__ZN8AppleNMI10gMetaClassE
-__ZN8AppleNMI10superClassE
-__ZN8AppleNMI15handleInterruptEPvP9IOServicei
-__ZN8AppleNMI18_RESERVEDAppleNMI0Ev
-__ZN8AppleNMI18_RESERVEDAppleNMI1Ev
-__ZN8AppleNMI18_RESERVEDAppleNMI2Ev
-__ZN8AppleNMI18_RESERVEDAppleNMI3Ev
-__ZN8AppleNMI22powerStateWillChangeToEmmP9IOService
-__ZN8AppleNMI5startEP9IOService
-__ZN8AppleNMI7initNMIEP21IOInterruptControllerP6OSData
-__ZN8AppleNMI9MetaClassC1Ev
-__ZN8AppleNMI9MetaClassC2Ev
-__ZN8AppleNMI9metaClassE
-__ZN8AppleNMIC1EPK11OSMetaClass
-__ZN8AppleNMIC1Ev
-__ZN8AppleNMIC2EPK11OSMetaClass
-__ZN8AppleNMIC2Ev
-__ZN8AppleNMID0Ev
-__ZN8AppleNMID2Ev
-__ZN8OSObject19_RESERVEDOSObject16Ev
-__ZN8OSObject19_RESERVEDOSObject17Ev
-__ZN8OSObject19_RESERVEDOSObject18Ev
-__ZN8OSObject19_RESERVEDOSObject19Ev
-__ZN8OSObject19_RESERVEDOSObject20Ev
-__ZN8OSObject19_RESERVEDOSObject21Ev
-__ZN8OSObject19_RESERVEDOSObject22Ev
-__ZN8OSObject19_RESERVEDOSObject23Ev
-__ZN8OSObject19_RESERVEDOSObject24Ev
-__ZN8OSObject19_RESERVEDOSObject25Ev
-__ZN8OSObject19_RESERVEDOSObject26Ev
-__ZN8OSObject19_RESERVEDOSObject27Ev
-__ZN8OSObject19_RESERVEDOSObject28Ev
-__ZN8OSObject19_RESERVEDOSObject29Ev
-__ZN8OSObject19_RESERVEDOSObject30Ev
-__ZN8OSObject19_RESERVEDOSObject31Ev
-__ZN9IOService20_RESERVEDIOService48Ev
-__ZN9IOService20_RESERVEDIOService49Ev
-__ZN9IOService20_RESERVEDIOService50Ev
-__ZN9IOService20_RESERVEDIOService51Ev
-__ZN9IOService20_RESERVEDIOService52Ev
-__ZN9IOService20_RESERVEDIOService53Ev
-__ZN9IOService20_RESERVEDIOService54Ev
-__ZN9IOService20_RESERVEDIOService55Ev
-__ZN9IOService20_RESERVEDIOService56Ev
-__ZN9IOService20_RESERVEDIOService57Ev
-__ZN9IOService20_RESERVEDIOService58Ev
-__ZN9IOService20_RESERVEDIOService59Ev
-__ZN9IOService20_RESERVEDIOService60Ev
-__ZN9IOService20_RESERVEDIOService61Ev
-__ZN9IOService20_RESERVEDIOService62Ev
-__ZN9IOService20_RESERVEDIOService63Ev
-__ZNK10AppleMacIO12getMetaClassEv
-__ZNK10AppleMacIO14compareNubNameEPK9IOServiceP8OSStringPS4_
-__ZNK10AppleMacIO9MetaClass5allocEv
-__ZNK10AppleNVRAM12getMetaClassEv
-__ZNK10AppleNVRAM9MetaClass5allocEv
-__ZNK16AppleMacIODevice11compareNameEP8OSStringPS1_
-__ZNK16AppleMacIODevice12getMetaClassEv
-__ZNK16AppleMacIODevice9MetaClass5allocEv
-__ZNK17IONVRAMController12getMetaClassEv
-__ZNK17IONVRAMController9MetaClass5allocEv
-__ZNK19ApplePlatformExpert12getMetaClassEv
-__ZNK19ApplePlatformExpert9MetaClass5allocEv
-__ZNK19IODBDMAMemoryCursor12getMetaClassEv
-__ZNK19IODBDMAMemoryCursor9MetaClass5allocEv
-__ZNK8AppleCPU12getMetaClassEv
-__ZNK8AppleCPU9MetaClass5allocEv
-__ZNK8AppleNMI12getMetaClassEv
-__ZNK8AppleNMI9MetaClass5allocEv
-__ZTV10AppleMacIO
-__ZTV10AppleNVRAM
-__ZTV16AppleMacIODevice
-__ZTV17IONVRAMController
-__ZTV19ApplePlatformExpert
-__ZTV19IODBDMAMemoryCursor
-__ZTV8AppleCPU
-__ZTV8AppleNMI
-__ZTVN10AppleMacIO9MetaClassE
-__ZTVN10AppleNVRAM9MetaClassE
-__ZTVN16AppleMacIODevice9MetaClassE
-__ZTVN17IONVRAMController9MetaClassE
-__ZTVN19ApplePlatformExpert9MetaClassE
-__ZTVN19IODBDMAMemoryCursor9MetaClassE
-__ZTVN8AppleCPU9MetaClassE
-__ZTVN8AppleNMI9MetaClassE
-__eSynchronizeIO
-_abs
-_bcopy_nc
-_bzero_nc
-_cacheDisable
-_cacheInit
-_delay_for_interval
-_gGetDefaultBusSpeedsKey
-_get_io_base_addr
-_get_preemption_level
-_hfs_addconverter
-_hfs_remconverter
-_ignore_zero_fault
-_killprint
-_kprintf_lock
-_mapping_prealloc
-_mapping_relpre
-_ml_enable_cache_level
-_ml_enable_nap
-_ml_mem_backoff
-_ml_ppc_sleep
-_ml_set_processor_speed
-_ml_set_processor_voltage
-_ml_throttle
-_pe_do_clock_test
-_pe_run_clock_test
-_pmsRunLocal
-_rc4_crypt
-_rc4_init
-_scc
-_sprintf
-_strcat
-_strcpy
diff --git a/config/Unsupported.exports b/config/Unsupported.exports
index 8886533d8..374517b7e 100644
--- a/config/Unsupported.exports
+++ b/config/Unsupported.exports
@@ -6,6 +6,8 @@ _KUNCUserNotificationDisplayAlert
 _KUNCUserNotificationDisplayFromBundle
 _KUNCUserNotificationDisplayNotice
 _NDR_record
+_OSSpinLockTry
+_OSSpinLockUnlock
 _PE_kputc
 __Z22OSFlushObjectTrackListv
 __ZN15IOWatchDogTimer10gMetaClassE
@@ -49,6 +51,7 @@ __ZN9IODTNVRAM26calculatePartitionChecksumEPh
 __ZN9IODTNVRAM9metaClassE
 __ZN9IODTNVRAMC2EPK11OSMetaClass
 __ZN9IODTNVRAMD2Ev
+__ZN9IODTNVRAM10safeToSyncEv
 __ZNK15IOWatchDogTimer12getMetaClassEv
 __ZNK15IOWatchDogTimer9MetaClass5allocEv
 __ZNK9IODTNVRAM17getOFVariablePermEPK8OSSymbol
@@ -64,6 +67,7 @@ _aes_decrypt_key
 _aes_decrypt_key128
 _aes_decrypt_key256
 _aes_encrypt_cbc
+_aes_encrypt_key
 _aes_encrypt_key128
 _aes_encrypt_key256
 _appleClut8
@@ -93,7 +97,6 @@ _host_get_special_port
 _host_priv_self
 _hz
 _ipc_kernel_map
-_ipflow_fastforward
 _kalloc
 _kauth_cred_issuser
 _kauth_cred_label_update
@@ -115,7 +118,11 @@ _ldisc_deregister
 _ldisc_register
 _log
 _mach_gss_accept_sec_context
+_mach_gss_accept_sec_context_v2
+_mach_gss_hold_cred
 _mach_gss_init_sec_context
+_mach_gss_init_sec_context_v2
+_mach_gss_unhold_cred
 _mach_make_memory_entry_64
 _mach_memory_entry_page_op
 _mach_memory_entry_range_op
@@ -159,7 +166,6 @@ _thread_notrigger
 _thread_tid
 _tsleep
 _vfs_context_current
-_vfs_setlocklocal
 _vfs_update_vfsstat
 _vm_allocate
 _vm_deallocate
diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports
index 66029e241..38b70f0ff 100644
--- a/config/Unsupported.i386.exports
+++ b/config/Unsupported.i386.exports
@@ -23,7 +23,7 @@ _kernel_thread
 _lapic_set_perfcnt_interrupt_mask
 _lapic_set_pmi_func
 _lo_ifp
-_m_adj
+_m_adj:_mbuf_adj
 _m_cat
 _m_copydata
 _m_copym
@@ -41,7 +41,7 @@ _m_split
 _m_trailingspace:_mbuf_trailingspace
 _mach_msg_rpc_from_kernel
 _mach_msg_send_from_kernel_with_options
-_mcl_to_paddr
+_mcl_to_paddr:_mbuf_data_to_physical
 _ml_get_apicid
 _ml_get_maxbusdelay
 _ml_get_maxsnoop
diff --git a/config/Unsupported.ppc.exports b/config/Unsupported.ppc.exports
deleted file mode 100644
index fbc85ede8..000000000
--- a/config/Unsupported.ppc.exports
+++ /dev/null
@@ -1,118 +0,0 @@
-_CallTVector
-_PPCcalls
-_PE_write_IIC
-__ZN19IODBDMAMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm
-__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_
-__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject
-__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject
-__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm
-__ZN9IODTNVRAM19unescapeBytesToDataEPKhm
-_domains
-_get_preemption_level
-_ignore_zero_fault
-_ifunit
-_in6addr_local
-_in_broadcast
-_inaddr_local
-_inet_domain_mutex
-_ip_mutex
-_ip_output
-_ip_protox
-_killprint
-_kernel_flock
-_kernel_thread
-_lo_ifp
-_mapping_prealloc
-_mapping_relpre
-_m_adj
-_m_cat
-_m_copydata
-_m_copym
-_m_free:_mbuf_free
-_m_freem:_mbuf_freem
-_m_get
-_m_gethdr
-_m_getpacket
-_m_getpackets
-_m_mclget
-_m_mtod
-_m_prepend_2
-_m_pullup
-_m_split
-_m_trailingspace:_mbuf_trailingspace
-_mcl_to_paddr
-_ml_enable_cache_level
-_ml_enable_nap
-_ml_ppc_sleep
-_ml_set_processor_speed
-_ml_set_processor_voltage
-_ml_throttle
-_nd6_storelladdr
-_net_add_domain
-_net_add_proto
-_net_del_domain
-_net_del_proto
-_pffinddomain
-_pffindproto
-_pmsStart
-_pmsPark
-_pmsRun
-_pmsRunLocal
-_pmsBuild
-_pru_abort_notsupp
-_pru_accept_notsupp
-_pru_bind_notsupp
-_pru_connect2_notsupp
-_pru_connect_notsupp
-_pru_disconnect_notsupp
-_pru_listen_notsupp
-_pru_peeraddr_notsupp
-_pru_rcvd_notsupp
-_pru_rcvoob_notsupp
-_pru_send_notsupp
-_pru_sense_null
-_pru_shutdown_notsupp
-_pru_sockaddr_notsupp
-_pru_sopoll_notsupp
-_ml_mem_backoff
-_sbappendaddr
-_sbappendrecord
-_sbflush
-_sbspace
-_soabort
-_sobind
-_socantrcvmore
-_socantsendmore
-_sock_getlistener
-_sock_release
-_sock_retain
-_soclose
-_soconnect
-_socreate
-_sodisconnect
-_sofree
-_sofreelastref
-_soisconnected
-_soisconnecting
-_soisdisconnected
-_soisdisconnecting
-_sonewconn
-_sooptcopyin
-_sooptcopyout
-_sopoll
-_soreceive
-_soreserve
-_sorwakeup
-_sosend
-_sosetopt
-_tcbinfo
-_thread_call_func
-_thread_call_func_cancel
-_thread_call_func_delayed
-_thread_call_is_delayed
-_thread_cancel_timer
-_thread_funnel_set
-_thread_set_timer
-_thread_set_timer_deadline
-_udbinfo
-_clock_get_system_value
diff --git a/config/Unsupported.x86_64.exports b/config/Unsupported.x86_64.exports
index 79dce8fdc..9413c7dec 100644
--- a/config/Unsupported.x86_64.exports
+++ b/config/Unsupported.x86_64.exports
@@ -32,3 +32,4 @@ _tmrCvt
 _tsc_get_info
 _hibernate_vm_lock
 _hibernate_vm_unlock
+
diff --git a/config/version.c b/config/version.c
index d916cbee8..4870d134c 100644
--- a/config/version.c
+++ b/config/version.c
@@ -46,3 +46,5 @@ const char osbuilder[] = "###KERNEL_BUILDER###";
 const char osrelease[] = OSRELEASE;
 const char ostype[] = OSTYPE;
 char osversion[OSVERSIZE];
+
+__private_extern__ const char compiler_version[] = __VERSION__;
diff --git a/osfmk/ppc/cpu_affinity.h b/iokit/IOKit/AppleKeyStoreInterface.h
similarity index 62%
rename from osfmk/ppc/cpu_affinity.h
rename to iokit/IOKit/AppleKeyStoreInterface.h
index 2e0ae7ce4..02cb776c1 100644
--- a/osfmk/ppc/cpu_affinity.h
+++ b/iokit/IOKit/AppleKeyStoreInterface.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,28 +25,36 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#ifdef KERNEL_PRIVATE
-#ifndef _PPC_CPU_AFFINITY_H_
-#define _PPC_CPU_AFFINITY_H_
 
-/*
- * Just one hardware affinity set - the whole machine.
- * This allows us to give the pretense that PPC supports the affinity policy
- * SPI. The kernel will accept affinity hints but effectively ignore them. 
- * Hence Universal Apps can use platform-independent code.
- */
-static inline int ml_get_max_affinity_sets(void)
+#ifndef _IOKIT_APPLEKEYSTOREINTERFACE_H
+#define _IOKIT_APPLEKEYSTOREINTERFACE_H
+
+// These are currently duplicate defs with different names
+// from AppleKeyStore & CoreStorage
+
+// aka MAX_KEY_SIZE
+#define AKS_MAX_KEY_SIZE    128 
+
+// aka rawKey
+struct aks_raw_key_t
 {
-	return 1;
-}
+    uint32_t  keybytecount;
+    uint8_t   keybytes[AKS_MAX_KEY_SIZE];       
+};
 
-/*
- * Return the single processor set.
- */
-static inline processor_set_t ml_affinity_to_pset(__unused int affinity_num)
+// aka volumeKey
+struct aks_volume_key_t
 {
-	return processor_pset(master_processor);
-}
+    uint32_t      algorithm;
+    aks_raw_key_t key;    
+};
+
+// aka AKS_GETKEY
+#define AKS_PLATFORM_FUNCTION_GETKEY    "getKey"
+
+// aka kCSFDETargetVEKID
+#define PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID  "CSFDETargetVEKID"
+
+#define AKS_SERVICE_PATH                "/IOResources/AppleKeyStore"
 
-#endif /* _I386_CPU_AFFINITY_H_ */
-#endif /* KERNEL_PRIVATE */
+#endif /* _IOKIT_APPLEKEYSTOREINTERFACE_H */
diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h
index 391a0460e..f5d504061 100644
--- a/iokit/IOKit/IOBufferMemoryDescriptor.h
+++ b/iokit/IOKit/IOBufferMemoryDescriptor.h
@@ -86,6 +86,7 @@ private:
 			       task_t	    inTask) APPLE_KEXT_DEPRECATED; /* use withOptions() instead */
 #endif /* !__LP64__ */
 
+public:
     virtual bool initWithPhysicalMask(
 				task_t		  inTask,
 				IOOptionBits      options,
@@ -146,7 +147,11 @@ public:
     kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute.<br>
     kIOMemoryPageable - pass to request memory be non-wired - the default for kernel allocated memory is wired.<br>
     kIOMemoryPurgeable - pass to request memory that may later have its purgeable state set with IOMemoryDescriptor::setPurgeable. Only supported for kIOMemoryPageable allocations.<br>
-    kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications.
+    kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications.<br>
+    kIOMapInhibitCache - allocate memory with inhibited cache setting. <br>		
+    kIOMapWriteThruCache - allocate memory with writethru cache setting. <br>		
+    kIOMapCopybackCache - allocate memory with copyback cache setting. <br>		
+    kIOMapWriteCombineCache - allocate memory with writecombined cache setting.
     @param capacity The number of bytes to allocate.
     @param alignment The minimum required alignment of the buffer in bytes - 1 is the default for no required alignment. For example, pass 256 to get memory allocated at an address with bits 0-7 zero.
     @result Returns an instance of class IOBufferMemoryDescriptor to be released by the caller, which will free the memory desriptor and associated buffer. */
@@ -164,7 +169,11 @@ public:
     @param options Options for the allocation:<br>
     kIODirectionOut, kIODirectionIn - set the direction of the I/O transfer.<br>
     kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute.<br>
-    kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications.
+    kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications.<br>
+    kIOMapInhibitCache - allocate memory with inhibited cache setting. <br>		
+    kIOMapWriteThruCache - allocate memory with writethru cache setting. <br>		
+    kIOMapCopybackCache - allocate memory with copyback cache setting. <br>		
+    kIOMapWriteCombineCache - allocate memory with writecombined cache setting.
     @param capacity The number of bytes to allocate.
     @param mask The buffer will be allocated with pages such that physical addresses will only have bits set present in physicalMask. For example, pass 0x00000000FFFFFFFFULL for a buffer to be accessed by hardware that has 32 address bits.
     @result Returns an instance of class IOBufferMemoryDescriptor to be released by the caller, which will free the memory desriptor and associated buffer. */
diff --git a/iokit/IOKit/IOCatalogue.h b/iokit/IOKit/IOCatalogue.h
index f087396fd..49f8fb84c 100644
--- a/iokit/IOKit/IOCatalogue.h
+++ b/iokit/IOKit/IOCatalogue.h
@@ -63,11 +63,11 @@ private:
     SInt32                   generation;
 
 /* This stuff is no longer used at all but was exported in prior
- * releases, so we keep it around for PPC/i386 only.
+ * releases, so we keep it around for i386 only.
  */
-#if __ppc__ || __i386__
+#if __i386__
     IOLock *                 kld_lock;
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 public:
     /*!
@@ -202,9 +202,19 @@ public:
     /*!
         @function reset
         @abstract Return the Catalogue to its initial state.
+        @discussion
+        Should only be used by kextd just before it sends all kext personalities down during a rescan.
     */
     void reset(void);
 
+    /*!
+        @function resetAndAddDrivers
+        @abstract Replace personalities in IOCatalog with those provided.
+        @discussion
+        Resets the catalogue with a new set of drivers, preserving matching originals to keep wired memory usage down.
+    */
+    bool resetAndAddDrivers(OSArray * drivers, bool doNubMatching = true);
+
     /*!
         @function serialize
         @abstract Serializes the catalog for transport to the user.
@@ -215,10 +225,10 @@ public:
 
     bool serializeData(IOOptionBits kind, OSSerialize * s) const;
 
-/* This stuff is no longer used at all we keep it around for PPC/i386
+/* This stuff is no longer used at all we keep it around for i386
  * binary compatibility only. Symbols are no longer exported.
  */
-#if __ppc__ || __i386__
+#if __i386__
     /*!
         @function recordStartupExtensions
         @abstract Records extensions made available by the primary booter.
@@ -253,7 +263,7 @@ public:
             removed or wasn't present, KERN_FAILURE otherwise.
     */
     virtual kern_return_t removeKernelLinker(void);
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 private:
 
diff --git a/iokit/IOKit/IOCommandGate.h b/iokit/IOKit/IOCommandGate.h
index 1b17b791d..d38c88670 100644
--- a/iokit/IOKit/IOCommandGate.h
+++ b/iokit/IOKit/IOCommandGate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2009 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -83,10 +83,6 @@ member function's parameter list.
 			       void *arg2, void *arg3);
 
 protected:
-/*!
-    @function checkForWork
-    @abstract Not used, $link IOEventSource::checkForWork(). */
-    virtual bool checkForWork();
 
 /*! @struct ExpansionData
     @discussion This structure will be used to expand the capablilties of the IOWorkLoop in the future.
diff --git a/iokit/IOKit/IODMACommand.h b/iokit/IOKit/IODMACommand.h
index a2a2852f3..c8bd8b37a 100644
--- a/iokit/IOKit/IODMACommand.h
+++ b/iokit/IOKit/IODMACommand.h
@@ -258,7 +258,7 @@ public:
 
 /*! @function setMemoryDescriptor
     @abstract Sets and resets the DMACommand's current memory descriptor
-    @discussion The DMA command will configure itself based on the information that it finds in the memory descriptor.  It looks for things like the direction of the memory descriptor and whether the current memory descriptor is already mapped into some IOMMU.  As a programmer convenience it can also prepare the memory descriptor immediately.  See prepare().  Note the IODMACommand is designed to used multiple times with a succession of memory descriptors, making the pooling of commands possible.  It is an error though to attempt to reset a currently prepared() DMA command.  Warning: This routine may block so never try to autoprepare an IODMACommand while in a gated context, i.e. one of the WorkLoops action call outs.
+    @discussion The DMA command will configure itself based on the information that it finds in the memory descriptor.  It looks for things like the direction of the memory descriptor and whether the current memory descriptor is already mapped into some IOMMU.  As a programmer convenience it can also prepare the DMA command immediately.  See prepare().  Note the IODMACommand is designed to used multiple times with a succession of memory descriptors, making the pooling of commands possible.  It is an error though to attempt to reset a currently prepared() DMA command.  Warning: This routine may block so never try to autoprepare an IODMACommand while in a gated context, i.e. one of the WorkLoops action call outs.
     @param mem A pointer to the current I/Os memory descriptor.
     @param autoPrepare An optional boolean variable that will call the prepare() function automatically after the memory descriptor is processed. Defaults to true.
     @result Returns kIOReturnSuccess, kIOReturnBusy if currently prepared, kIOReturnNoSpace if the length(mem) >= Maximum Transfer Size or the error codes returned by prepare() (qv).
diff --git a/iokit/IOKit/IODataQueueShared.h b/iokit/IOKit/IODataQueueShared.h
index 2fa0e9a45..dc4532486 100644
--- a/iokit/IOKit/IODataQueueShared.h
+++ b/iokit/IOKit/IODataQueueShared.h
@@ -66,7 +66,7 @@ typedef struct _IODataQueueMemory {
  * @abstract A struct mapping to the appendix region of a data queue.
  * @discussion This struct is variable sized dependent on the version.  The struct represents the data queue appendix information.
  * @field version The version of the queue appendix.
- * @field port The notification port associated with this queue.
+ * @field msgh Mach message header containing the notification mach port associated with this queue.
  */
 typedef struct _IODataQueueAppendix {
     UInt32            version;
diff --git a/iokit/IOKit/IOEventSource.h b/iokit/IOKit/IOEventSource.h
index 4afc5aa99..10a392afc 100644
--- a/iokit/IOKit/IOEventSource.h
+++ b/iokit/IOKit/IOEventSource.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2000, 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,6 +44,9 @@ HISTORY
 #include <IOKit/system.h>
 #include <IOKit/IOWorkLoop.h>
 
+#if IOKITSTATS
+#include <IOKit/IOStatisticsPrivate.h>
+#endif
 
 __BEGIN_DECLS
 #include <mach/clock_types.h>
@@ -69,12 +72,12 @@ source may only be a member of 1 linked list chain.  If you need to move it
 between chains than make sure it is removed from the original chain before
 attempting to move it.
 <br><br>
-	The IOEventSource makes no attempt to maintain the consitency of it's internal data across multi-threading.  It is assumed that the user of these basic tools will protect the data that these objects represent in some sort of device wide instance lock.	For example the IOWorkLoop maintains the event chain by handing off change request to its own thread and thus single threading access to its state.
+	The IOEventSource makes no attempt to maintain the consistency of its internal data across multi-threading.  It is assumed that the user of these basic tools will protect the data that these objects represent in some sort of device wide instance lock.	For example the IOWorkLoop maintains the event chain by using an IOCommandGate and thus single threading access to its state.
 <br><br>
-	All subclasses of the IOEventSource are expected to implement the checkForWork() member function.
+	All subclasses of IOEventSource that wish to perform work on the work-loop thread are expected to implement the checkForWork() member function. As of Mac OS X, 10.7 (Darwin 11), checkForWork is no longer pure virtual, and should not be overridden if there is no work to be done.
 
 <br><br>
-	checkForWork() is the key method in this class.	 It is called by some work-loop when convienient and is expected to evaluate it's internal state and determine if an event has occurred since the last call.  In the case of an event having occurred then the instance defined target(owner)/action will be called.	 The action is stored as an ordinary C function pointer but the first parameter is always the owner.  This means that a C++ member function can be used as an action function though this depends on the ABI.
+	checkForWork() is the key method in this class.	 It is called by some work-loop when convienient and is expected to evaluate its internal state and determine if an event has occurred since the last call.  In the case of an event having occurred then the instance defined target(owner)/action will be called.	 The action is stored as an ordinary C function pointer but the first parameter is always the owner.  This means that a C++ member function can be used as an action function though this depends on the ABI.
 <br><br>
 	Although the eventChainNext variable contains a reference to the next event source in the chain this reference is not retained.  The list 'owner' i.e. the client that creates the event, not the work-loop, is expected to retain the source.
 */
@@ -82,6 +85,9 @@ class IOEventSource : public OSObject
 {
     OSDeclareAbstractStructors(IOEventSource)
     friend class IOWorkLoop;
+#if IOKITSTATS
+    friend class IOStatistics;
+#endif
 
 public:
 /*!
@@ -125,7 +131,13 @@ protected:
 /*! @struct ExpansionData
     @discussion This structure will be used to expand the capablilties of the IOEventSource in the future.
     */    
-    struct ExpansionData { };
+    struct ExpansionData {
+#if IOKITSTATS
+	    struct IOEventSourceCounter *counter;
+#else
+	    void *iokitstatsReserved;
+#endif
+	};
 
 /*! @var reserved
     Reserved for future use.  (Internal use only)  */
@@ -149,14 +161,19 @@ successfully.
 */
     virtual bool init(OSObject *owner, IOEventSource::Action action = 0);
 
+    virtual void free( void );
+
 /*! @function checkForWork
-    @abstract Pure Virtual member function used by IOWorkLoop for work
+    @abstract Virtual member function used by IOWorkLoop for work
 scheduling.
     @discussion This function will be called to request a subclass to check
-it's internal state for any work to do and then to call out the owner/action.
+its internal state for any work to do and then to call out the owner/action.
+If this event source never performs any work (e.g. IOCommandGate), this
+method should not be overridden. NOTE: This method is no longer declared pure
+virtual. A default implementation is provided in IOEventSource.
     @result Return true if this function needs to be called again before all its outstanding events have been processed.
         */
-    virtual bool checkForWork() = 0;
+    virtual bool checkForWork();
 
 /*! @function setWorkLoop
     @abstract Set'ter for $link workLoop variable.
diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h
index 30b307816..0fb3c53f3 100644
--- a/iokit/IOKit/IOHibernatePrivate.h
+++ b/iokit/IOKit/IOHibernatePrivate.h
@@ -34,6 +34,7 @@ extern "C" {
 
 #ifdef KERNEL
 #include <crypto/aes.h>
+#include <uuid/uuid.h>
 #endif
 
 struct IOPolledFileExtent
@@ -48,7 +49,9 @@ struct IOHibernateImageHeader
     uint64_t	imageSize;
     uint64_t	image1Size;
     
-    uint32_t	restore1CodePage;
+    uint32_t	restore1CodePhysPage;
+    uint32_t    reserved1;
+    uint64_t	restore1CodeVirt;
     uint32_t	restore1PageCount;
     uint32_t	restore1CodeOffset;
     uint32_t	restore1StackOffset;
@@ -86,16 +89,15 @@ struct IOHibernateImageHeader
 
     uint32_t	diag[4];
 
-    int32_t	graphicsInfoOffset;
-    int32_t	cryptVarsOffset;
-    int32_t	memoryMapOffset;
-    uint32_t    memoryMapSize;
+    uint32_t    handoffPages;
+    uint32_t    handoffPageCount;
+
     uint32_t    systemTableOffset;
 
     uint32_t	debugFlags;
     uint32_t	options;
 
-    uint32_t	reserved[71];		// make sizeof == 512
+    uint32_t	reserved[70];		// make sizeof == 512
 
     uint64_t	encryptEnd __attribute__ ((packed));
     uint64_t	deviceBase __attribute__ ((packed));
@@ -154,6 +156,25 @@ typedef struct hibernate_cryptvars_t hibernate_cryptvars_t;
 
 #endif /* defined(_AES_H) */
 
+enum 
+{
+    kIOHibernateHandoffType                 = 0x686f0000,
+    kIOHibernateHandoffTypeEnd              = kIOHibernateHandoffType + 0,
+    kIOHibernateHandoffTypeGraphicsInfo     = kIOHibernateHandoffType + 1,
+    kIOHibernateHandoffTypeCryptVars        = kIOHibernateHandoffType + 2,
+    kIOHibernateHandoffTypeMemoryMap        = kIOHibernateHandoffType + 3,
+    kIOHibernateHandoffTypeDeviceTree       = kIOHibernateHandoffType + 4,
+    kIOHibernateHandoffTypeDeviceProperties = kIOHibernateHandoffType + 5,
+    kIOHibernateHandoffTypeKeyStore         = kIOHibernateHandoffType + 6,
+};
+
+struct IOHibernateHandoff
+{
+    uint32_t type;
+    uint32_t bytecount;
+    uint8_t  data[];
+};
+typedef struct IOHibernateHandoff IOHibernateHandoff;
 
 enum 
 {
@@ -233,15 +254,20 @@ typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uin
 struct kern_direct_file_io_ref_t *
 kern_open_file_for_direct_io(const char * name, 
 			     kern_get_file_extents_callback_t callback, 
-			     void *      callback_ref,
-			     dev_t *     device_result,
-                             uint64_t *  partitionbase_result,
-                             uint64_t *  maxiocount_result,
-                             boolean_t * solid_state);
+			     void * callback_ref,
+			     dev_t * partition_device_result,
+			     dev_t * image_device_result,
+                             uint64_t * partitionbase_result,
+                             uint64_t * maxiocount_result,
+                             uint32_t * oflags,
+                             off_t offset,
+                             caddr_t addr,
+                             vm_size_t len);
 
 
 void
-kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref);
+kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
+			      off_t offset, caddr_t addr, vm_size_t len);
 int
 kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
 int get_kernel_symfile(struct proc *p, char const **symfile);
@@ -257,6 +283,7 @@ hibernate_setup(IOHibernateImageHeader * header,
                         boolean_t vmflush,
 			hibernate_page_list_t ** page_list_ret,
 			hibernate_page_list_t ** page_list_wired_ret,
+			hibernate_page_list_t ** page_list_pal_ret,
                         boolean_t * encryptedswap);
 kern_return_t 
 hibernate_teardown(hibernate_page_list_t * page_list,
@@ -279,6 +306,7 @@ hibernate_vm_unlock(void);
 void
 hibernate_page_list_setall(hibernate_page_list_t * page_list,
 			   hibernate_page_list_t * page_list_wired,
+			   hibernate_page_list_t * page_list_pal,
 			   uint32_t * pagesOut);
 
 // mark pages to be saved, or pages not to be saved but available 
@@ -316,7 +344,7 @@ hibernate_page_bitmap_pin(hibernate_page_list_t * list, uint32_t * page);
 uint32_t
 hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t page);
 
-void 
+uintptr_t 
 hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags);
 
 void
@@ -341,8 +369,6 @@ extern uint32_t    gIOHibernateFreeTime;	// max time to spend freeing pages (ms)
 extern uint8_t     gIOHibernateRestoreStack[];
 extern uint8_t     gIOHibernateRestoreStackEnd[];
 extern IOHibernateImageHeader *    gIOHibernateCurrentHeader;
-extern hibernate_graphics_t *      gIOHibernateGraphicsInfo;
-extern hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars;
 
 #define HIBLOG(fmt, args...)	\
     { kprintf(fmt, ## args); printf(fmt, ## args); }
@@ -419,9 +445,11 @@ enum {
 #define kIOHibernateMachineSignatureKey	  "machine-signature"
 
 #define kIOHibernateRTCVariablesKey	"IOHibernateRTCVariables"
+#define kIOHibernateSMCVariablesKey	"IOHibernateSMCVariables"
 
 #define kIOHibernateBootSwitchVarsKey			"boot-switch-vars"
 
+#define kIOHibernateUseKernelInterpreter    0x80000000
 
 #ifdef __cplusplus
 }
diff --git a/iokit/IOKit/IOInterruptEventSource.h b/iokit/IOKit/IOInterruptEventSource.h
index fe5d4ae12..2e1a82765 100644
--- a/iokit/IOKit/IOInterruptEventSource.h
+++ b/iokit/IOKit/IOInterruptEventSource.h
@@ -189,6 +189,17 @@ state when checkForWork is called. */
     @param nub Where did the interrupt originate from
     @param ind What is this interrupts index within 'nub'. */
     virtual void disableInterruptOccurred(void *, IOService *nub, int ind);
+    
+/*! @function warmCPU
+    @abstract Tries to reduce latency for an interrupt which will be received near a specified time.
+    @discussion Warms up a CPU in advance of an interrupt so that the interrupt may be serviced with predictable latency.  
+    The warm-up is not periodic; callers should call warmCPU once in advance of each interrupt.  It is recommended that
+    requests be issues in serial (i.e. each after the target for the previous call has elapsed), as there is a systemwide
+    cap on the number of outstanding requests.  This routine may be disruptive to the system if used with very small intervals
+    between requests; it should be used only in cases where interrupt latency is absolutely critical, and tens or hundreds of 
+    milliseconds between targets is the expected time scale.  NOTE: it is not safe to call this method with interrupts disabled.
+    @param abstime Time at which interrupt is expected. */
+    IOReturn warmCPU(uint64_t abstime);
 
 private:
     IOReturn registerInterruptHandler(IOService *inProvider, int inIntIndex);
diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h
index 96fb7c5a0..de2850d4e 100644
--- a/iokit/IOKit/IOKitDebug.h
+++ b/iokit/IOKit/IOKitDebug.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,13 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
-
 
 #ifndef _IOKIT_IOKITDEBUG_H
 #define _IOKIT_IOKITDEBUG_H
@@ -82,6 +75,9 @@ enum {
     kOSRegistryModsMode =         0x00040000ULL,  // Change default registry modification handling - panic vs. log
 //    kIOTraceIOService   =         0x00080000ULL,  // Obsolete: Use iotrace=0x00080000ULL to enable now
     kIOLogHibernate     =         0x00100000ULL,
+    kIOLogDriverPower1  =         0x01000000ULL,
+    kIOLogDriverPower2  =         0x02000000ULL,
+    kIOStatistics       =         0x04000000ULL,
 
     // debug aids - change behaviour
     kIONoFreeObjects    =         0x00100000ULL,
@@ -97,7 +93,7 @@ enum {
 	kIOTraceEventSources	=		0x00000004ULL,	// Trace non-passive event sources
 	kIOTraceIntEventSource	=		0x00000008ULL,	// Trace IOIES and IOFIES sources
 	kIOTraceCommandGates	=		0x00000010ULL,	// Trace command gate activity
-	kIOTraceTimers			= 		0x00000020ULL,	// Trace timer event source activity
+	kIOTraceTimers			= 		0x00000008ULL,	// Trace timer event source activity
 	
 	kIOTracePowerMgmt		=		0x00000400ULL,	// Trace power management changes
 	
@@ -108,15 +104,22 @@ enum {
 
 extern SInt64    gIOKitDebug;
 extern SInt64    gIOKitTrace;
-extern UInt64	 gIOInterruptThresholdNS;
-
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct IORegistryPlane;
-extern void    IOPrintPlane( const struct IORegistryPlane * plane );
+#ifdef __cplusplus
+class IORegistryPlane;
+#endif
+
+extern void    IOPrintPlane(
+#ifdef __cplusplus
+                            const IORegistryPlane * plane
+#else
+                            const struct IORegistryPlane * plane
+#endif
+                           );
 #ifndef _OSCPPDEBUG_H
 extern void    OSPrintMemory( void );
 #endif
diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h
index 62395d54d..a6d7c8bf5 100644
--- a/iokit/IOKit/IOKitKeys.h
+++ b/iokit/IOKit/IOKitKeys.h
@@ -151,6 +151,7 @@
 
 // IODTNVRAM property keys
 #define kIONVRAMDeletePropertyKey	"IONVRAM-DELETE-PROPERTY"
+#define kIONVRAMSyncNowPropertyKey	"IONVRAM-SYNCNOW-PROPERTY"
 #define kIODTNVRAMPanicInfoKey		"aapl,panic-info"
 
 // keys for complex boot information
diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h
index 73e93db0f..06794304e 100644
--- a/iokit/IOKit/IOKitKeysPrivate.h
+++ b/iokit/IOKit/IOKitKeysPrivate.h
@@ -32,25 +32,31 @@
 #include <IOKit/IOKitKeys.h>
 
 // properties found in the registry root
-#define kIOConsoleUsersKey                      "IOConsoleUsers"        /* value is OSArray */
+#define kIOConsoleLockedKey                     "IOConsoleLocked"               /* value is OSBoolean */
+#define kIOConsoleUsersKey                      "IOConsoleUsers"                /* value is OSArray */
 #define kIOMaximumMappedIOByteCountKey          "IOMaximumMappedIOByteCount"    /* value is OSNumber */
 
 // properties found in the console user dict
+#define kIOConsoleSessionAuditIDKey             "kCGSSessionAuditIDKey"        /* value is OSNumber */
 
-#define kIOConsoleSessionIDKey                  "kCGSSessionIDKey"        /* value is OSNumber */
-
-#define kIOConsoleSessionUserNameKey            "kCGSSessionUserNameKey"    /* value is OSString */
-#define kIOConsoleSessionUIDKey                 "kCGSSessionUserIDKey"        /* value is OSNumber */
-#define kIOConsoleSessionConsoleSetKey          "kCGSSessionConsoleSetKey"    /* value is OSNumber */
-#define kIOConsoleSessionOnConsoleKey           "kCGSSessionOnConsoleKey"    /* value is OSBoolean */
+#define kIOConsoleSessionUserNameKey            "kCGSSessionUserNameKey"       /* value is OSString */
+#define kIOConsoleSessionUIDKey                 "kCGSSessionUserIDKey"         /* value is OSNumber */
+#define kIOConsoleSessionConsoleSetKey          "kCGSSessionConsoleSetKey"     /* value is OSNumber */
+#define kIOConsoleSessionOnConsoleKey           "kCGSSessionOnConsoleKey"      /* value is OSBoolean */
 #define kIOConsoleSessionSecureInputPIDKey      "kCGSSessionSecureInputPID"    /* value is OSNumber */
+#define kIOConsoleSessionScreenLockedTimeKey    "CGSSessionScreenLockedTime"   /* value is OSNumber, secs - 1970 */
 
 // IOResources property
-#define kIOConsoleUsersSeedKey                  "IOConsoleUsersSeed"        /* value is OSNumber */
+#define kIOConsoleUsersSeedKey                  "IOConsoleUsersSeed"           /* value is OSNumber */
+
+// interest type
+#define kIOConsoleSecurityInterest		"IOConsoleSecurityInterest"
+
 
 // private keys for clientHasPrivilege
 #define kIOClientPrivilegeConsoleUser           "console"
 #define kIOClientPrivilegeSecureConsoleProcess  "secureprocess"
+#define kIOClientPrivilegeConsoleSession        "consolesession"
 
 // clientHasPrivilege security token for kIOClientPrivilegeSecureConsoleProcess
 typedef struct _IOUCProcessToken {
diff --git a/iokit/IOKit/IOKitServer.h b/iokit/IOKit/IOKitServer.h
index a68c99243..26787a25c 100644
--- a/iokit/IOKit/IOKitServer.h
+++ b/iokit/IOKit/IOKitServer.h
@@ -73,6 +73,11 @@ enum {
     @constant kIOCatalogRemoveDrivers  Signals a call to the removeDrivers function in IOCatalogue.
     @constant kIOCatalogRemoveDriversNoMatch  Signals a call to the removedrivers function in IOCatalogue but does not start a matching thread. 
     @constant kIOCatalogStartMatching  Signals the IOCatalogue to start an IOService matching thread.
+    @constant kIOCatalogRemoveKernelLinker  Deprecated; does nothing.
+    @constant kIOCatalogKextdActive  Signals the kernel that kextd is running.
+    @constant kIOCatalogKextdFinishedLaunching  Signals the IOCatalogue that kextd has finished sending it information at startup.
+    @constant kIOCatalogResetDrivers  Resets the IOCatalogue with a new set of personalities.
+    @constant kIOCatalogResetDriversNoMatch  Resets the IOCatalogue with a new set of personalities but does not start a matching thread.
 */
 enum {
     kIOCatalogAddDrivers	= 1,
@@ -82,7 +87,9 @@ enum {
     kIOCatalogStartMatching,
     kIOCatalogRemoveKernelLinker,
     kIOCatalogKextdActive,
-    kIOCatalogKextdFinishedLaunching
+    kIOCatalogKextdFinishedLaunching,
+    kIOCatalogResetDrivers,
+    kIOCatalogResetDriversNoMatch
 };
 
 // IOCatalogueGetData
diff --git a/iokit/IOKit/IOLib.h b/iokit/IOKit/IOLib.h
index 6183a3358..5e91b4725 100644
--- a/iokit/IOKit/IOLib.h
+++ b/iokit/IOKit/IOLib.h
@@ -222,19 +222,10 @@ void IOMappedWrite32(IOPhysicalAddress address, UInt32 value);
 
 void IOMappedWrite64(IOPhysicalAddress address, UInt64 value);
 
-/*! @function IOSetProcessorCacheMode
-    @abstract Sets the processor cache mode for mapped memory.
-    @discussion This function sets the cache mode of an already mapped & wired memory range. Note this may not be supported on I/O mappings or shared memory - it is far preferable to set the cache mode as mappings are created with the IOMemoryDescriptor::map method.
-    @param task Task the memory is mapped into.
-    @param address Virtual address of the memory.
-    @param length Length of the range to set.
-    @param cacheMode A constant from IOTypes.h, <br>
-	kIOMapDefaultCache to inhibit the cache in I/O areas, kIOMapCopybackCache in general purpose RAM.<br>
-	kIOMapInhibitCache, kIOMapWriteThruCache, kIOMapCopybackCache to set the appropriate caching.<br> 
-    @result An IOReturn code.*/
+/* This function is deprecated. Cache settings may be set for allocated memory with the IOBufferMemoryDescriptor api. */
 
 IOReturn IOSetProcessorCacheMode( task_t task, IOVirtualAddress address,
-				  IOByteCount length, IOOptionBits cacheMode );
+				  IOByteCount length, IOOptionBits cacheMode ) __attribute__((deprecated));
 
 /*! @function IOFlushProcessorCache
     @abstract Flushes the processor cache for mapped memory.
@@ -341,8 +332,23 @@ void Debugger(const char * reason);
 void IOPanic(const char *reason) __attribute__((deprecated));
 #endif
 
-struct OSDictionary * IOBSDNameMatching( const char * name );
-struct OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen );
+#ifdef __cplusplus
+class OSDictionary;
+#endif
+
+#ifdef __cplusplus
+OSDictionary *
+#else
+struct OSDictionary *
+#endif
+IOBSDNameMatching( const char * name );
+
+#ifdef __cplusplus
+OSDictionary *
+#else
+struct OSDictionary *
+#endif
+IOOFPathMatching( const char * path, char * buf, int maxLen );
 
 /*
  * Convert between size and a power-of-two alignment.
diff --git a/iokit/IOKit/IOMemoryCursor.h b/iokit/IOKit/IOMemoryCursor.h
index dfe9eed8c..048cdf584 100644
--- a/iokit/IOKit/IOMemoryCursor.h
+++ b/iokit/IOKit/IOMemoryCursor.h
@@ -378,85 +378,5 @@ public:
     }
 };
 
-/************************* class IODBDMAMemoryCursor *************************/
-
-#if defined(__ppc__)
-
-struct IODBDMADescriptor;
-
-/*!
-    @class IODBDMAMemoryCursor
-    @abstract An IOMemoryCursor subclass that outputs a vector of DBDMA descriptors where the address and length are filled in.  
-    @discussion The IODBDMAMemoryCursor would be used when the DBDMA hardware is available for the device for that will use an instance of this cursor.
-*/
-class IODBDMAMemoryCursor : public IOMemoryCursor
-{
-    OSDeclareDefaultStructors(IODBDMAMemoryCursor)
-
-public:
-/*! @function outputSegment
-    @abstract Outpust the given segment into the output segments array in address and length fields of an DBDMA descriptor.
-    @param segment The physical address and length that is next to be output.
-    @param segments Base of the output vector of DMA address length pairs.
-    @param segmentIndex Index to output 'segment' in the 'segments' array.
-*/
-    static void outputSegment(PhysicalSegment segment,
-			      void *	      segments,
-			      UInt32	      segmentIndex);
-
-/*! @defined dbdmaOutputSegment
-    @discussion Backward compatibility define for the old global function definition.  See IODBDMAMemoryCursor::outputSegment. */
-#define dbdmaOutputSegment IODBDMAMemoryCursor::outputSegment
-
-/*! @function withSpecification
-    @abstract Creates and initializes an IODBDMAMemoryCursor in one operation.
-    @discussion Factory function to create and initialize an IODBDMAMemoryCursor in one operation.  See also IODBDMAMemoryCursor::initWithSpecification.
-    @param maxSegmentSize Maximum allowable size for one segment.  Defaults to 0.
-    @param maxTransferSize Maximum size of an entire transfer.	Defaults to 0 indicating no maximum.
-    @param alignment Alignment restrictions on output physical addresses.  Not currently implemented.  Defaults to single byte alignment.
-    @result Returns a new memory cursor if successfully created and initialized, 0 otherwise.
-*/
-    static IODBDMAMemoryCursor * 
-	withSpecification(IOPhysicalLength maxSegmentSize,
-			  IOPhysicalLength maxTransferSize,
-			  IOPhysicalLength alignment = 1);
-
-/*! @function initWithSpecification
-    @abstract Primary initializer for the IODBDMAMemoryCursor class.
-    @param maxSegmentSize Maximum allowable size for one segment.  Defaults to 0.
-    @param maxTransferSize Maximum size of an entire transfer.	Defaults to 0 indicating no maximum.
-    @param alignment Alignment restrictions on output physical addresses.  Not currently implemented.  Defaults to single byte alignment.
-    @result Returns true if the inherited classes and this instance initialize successfully.
-*/
-    virtual bool initWithSpecification(IOPhysicalLength	 maxSegmentSize,
-				       IOPhysicalLength	 maxTransferSize,
-				       IOPhysicalLength	 alignment = 1);
-
-
-/*! @function getPhysicalSegments
-    @abstract Generates a DBDMA physical scatter/gather list given a memory descriptor.
-    @discussion Generates a list of DBDMA descriptors where the address and length fields are filled in appropriately.	But the client is expected to fill in the rest of the DBDMA descriptor as is appropriate for their particular hardware.  Wraps IOMemoryCursor::genPhysicalSegments.
-    @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. 
-    @param fromPosition Starting location of the I/O within a memory descriptor. 
-    @param segments Pointer to an array of DBDMA descriptors for the output physical scatter/gather list.  Be warned no room is left for a preamble in the output array.  'segments' should point to the first memory description slot in a DBDMA command.
-    @param maxSegments Maximum number of segments that can be written to the DBDMA descriptor table.
-    @param inMaxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized.
-    @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described.  Defaults to 0 indicating that no transfer size need be returned. 
-    @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned.
-*/
-    virtual UInt32 getPhysicalSegments(IOMemoryDescriptor * descriptor,
-				       IOByteCount	    fromPosition,
-				       IODBDMADescriptor *  segments,
-				       UInt32		    maxSegments,
-				       UInt32		    inMaxTransferSize = 0,
-				       IOByteCount	 *  transferSize = 0)
-    {
-	return genPhysicalSegments(descriptor, fromPosition, segments,
-				maxSegments, inMaxTransferSize, transferSize);
-    }
-};
-
-#endif /* defined(__ppc__) */
-
 #endif /* !_IOMEMORYCURSOR_H */
 
diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h
index 866da4703..6e6961136 100644
--- a/iokit/IOKit/IOMemoryDescriptor.h
+++ b/iokit/IOKit/IOMemoryDescriptor.h
@@ -320,7 +320,7 @@ public:
     @param withLength The length of memory.
     @param options
         kIOMemoryDirectionMask (options:direction)	This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. 
-    @param task The task the virtual ranges are mapped into. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api.
+    @param task The task the virtual ranges are mapped into. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api. The task argument may be NULL to specify memory by physical address.
     @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */
 
     static IOMemoryDescriptor * withAddressRange(
@@ -337,7 +337,7 @@ public:
     @param options
         kIOMemoryDirectionMask (options:direction)	This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. 
         kIOMemoryAsReference	For options:type = Virtual or Physical this indicate that the memory descriptor need not copy the ranges array into local memory.  This is an optimisation to try to minimise unnecessary allocations.
-    @param task The task each of the virtual ranges are mapped into. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api.
+    @param task The task each of the virtual ranges are mapped into. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api. The task argument may be NULL to specify memory by physical address.
     @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */
 
     static IOMemoryDescriptor * withAddressRanges(
@@ -640,7 +640,7 @@ protected:
 public:
 /*! @function getVirtualAddress
     @abstract Accessor to the virtual address of the first byte in the mapping.
-    @discussion This method returns the virtual address of the first byte in the mapping.
+    @discussion This method returns the virtual address of the first byte in the mapping. Since the IOVirtualAddress is only 32bit in 32bit kernels, the getAddress() method should be used for compatibility with 64bit task mappings.
     @result A virtual address. */
 
     virtual IOVirtualAddress 	getVirtualAddress();
@@ -725,9 +725,25 @@ public:
 					 mach_vm_size_t       offset = 0);
 
 #ifdef __LP64__
+/*! @function getAddress
+    @abstract Accessor to the virtual address of the first byte in the mapping.
+    @discussion This method returns the virtual address of the first byte in the mapping.
+    @result A virtual address. */
+/*! @function getSize
+    @abstract Accessor to the length of the mapping.
+    @discussion This method returns the length of the mapping.
+    @result A byte count. */
     inline mach_vm_address_t 	getAddress() __attribute__((always_inline));
     inline mach_vm_size_t 	getSize() __attribute__((always_inline));
 #else /* !__LP64__ */
+/*! @function getAddress
+    @abstract Accessor to the virtual address of the first byte in the mapping.
+    @discussion This method returns the virtual address of the first byte in the mapping.
+    @result A virtual address. */
+/*! @function getSize
+    @abstract Accessor to the length of the mapping.
+    @discussion This method returns the length of the mapping.
+    @result A byte count. */
     virtual mach_vm_address_t 	getAddress();
     virtual mach_vm_size_t 	getSize();
 #endif /* !__LP64__ */
@@ -770,8 +786,6 @@ enum {
 };
 #endif /* XNU_KERNEL_PRIVATE */
 
-#if !defined(__LP64) || defined(_IOMEMORYDESCRIPTOR_INTERNAL_)
-
 // The following classes are private implementation of IOMemoryDescriptor - they
 // should not be referenced directly, just through the public API's in the 
 // IOMemoryDescriptor class. For example, an IOGeneralMemoryDescriptor instance
@@ -929,8 +943,6 @@ public:
 
 };
 
-#endif /* !defined(__LP64) || defined(_IOMEMORYDESCRIPTOR_INTERNAL_) */
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #ifdef __LP64__
diff --git a/iokit/IOKit/IOMessage.h b/iokit/IOKit/IOMessage.h
index 77a1001aa..4a571b9d4 100644
--- a/iokit/IOKit/IOMessage.h
+++ b/iokit/IOKit/IOMessage.h
@@ -7,7 +7,7 @@
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
+ * may notificationused to create, or enable the creation or redistribution of,
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
@@ -32,16 +32,24 @@
 #include <IOKit/IOReturn.h>
 #include <IOKit/IOTypes.h>
 
+/*!
+ * @header IOMessage.h
+ *
+ * Defines message type constants for several IOKit messaging API's.
+ *
+ */
+
 typedef UInt32 IOMessage;
 
 #define iokit_common_msg(message)          (UInt32)(sys_iokit|sub_iokit_common|message)
 #define iokit_family_msg(sub,message)      (UInt32)(sys_iokit|sub|message)
 
-/*! @defined iokit_vendor_specific_msg
-    @discussion iokit_vendor_specific_msg passes messages in the sub_iokit_vendor_specific
-    subsystem. It can be used to be generate messages that are used for private 
-    communication between vendor specific code with the  IOService::message() etc. APIs.
-*/
+/*! 
+ * @defined         iokit_vendor_specific_msg
+ * @discussion      iokit_vendor_specific_msg passes messages in the sub_iokit_vendor_specific
+ *                  subsystem. It can be used to generate messages that are used for private 
+ *                  communication between vendor specific code with the IOService::message() etc. APIs.
+ */
 #define iokit_vendor_specific_msg(message) (UInt32)(sys_iokit|sub_iokit_vendor_specific|message)
 
 #define kIOMessageServiceIsTerminated      iokit_common_msg(0x010)
@@ -54,29 +62,159 @@ typedef UInt32 IOMessage;
 
 #define kIOMessageServiceBusyStateChange   iokit_common_msg(0x120)
 
+#define kIOMessageConsoleSecurityChange    iokit_common_msg(0x128)
+
 #define kIOMessageServicePropertyChange    iokit_common_msg(0x130)
 
-#define kIOMessageCanDevicePowerOff        iokit_common_msg(0x200)
+#define kIOMessageCopyClientID             iokit_common_msg(0x330)
+
+#define kIOMessageSystemCapabilityChange   iokit_common_msg(0x340)
+#define kIOMessageDeviceSignaledWakeup     iokit_common_msg(0x350)
+
+
+/*!
+ * @defined         kIOMessageDeviceWillPowerOff
+ * @discussion      Indicates the device is about to move to a lower power state.
+ *                  Sent to IOKit interest notification clients of type <code>kIOAppPowerStateInterest</code>
+ *                  and <code>kIOGeneralInterest</code>.
+ */
 #define kIOMessageDeviceWillPowerOff       iokit_common_msg(0x210)
-#define kIOMessageDeviceWillNotPowerOff    iokit_common_msg(0x220)
+
+/*!
+ * @defined         kIOMessageDeviceHasPoweredOn
+ * @discussion      Indicates the device has just moved to a higher power state.
+ *                  Sent to IOKit interest notification clients of type <code>kIOAppPowerStateInterest</code>
+ *                  and <code>kIOGeneralInterest</code>.
+ */
 #define kIOMessageDeviceHasPoweredOn       iokit_common_msg(0x230)
 
-// IOService power mgt does not send
-//   kIOMessageDeviceWillPowerOn
-//   kIOMessageDeviceHasPoweredOff
-#define kIOMessageDeviceWillPowerOn        iokit_common_msg(0x215)
-#define kIOMessageDeviceHasPoweredOff      iokit_common_msg(0x225)
+/*! @group          In-kernel system shutdown and restart notifications
+ */
 
-#define kIOMessageCanSystemPowerOff        iokit_common_msg(0x240)
+/*!
+ * @defined         kIOMessageSystemWillPowerOff
+ * @discussion      Indicates an imminent system shutdown. Recipients have a limited 
+ *                  amount of time to respond, otherwise the system will timeout and 
+ *                  shutdown even without a response.
+ *                  Delivered to in-kernel IOKit drivers via <code>IOService::systemWillShutdown()</code>, 
+ *                  and to clients of <code>registerPrioritySleepWakeInterest()</code>.
+ *                  Never delivered to user space notification clients.
+ */
 #define kIOMessageSystemWillPowerOff       iokit_common_msg(0x250)
-#define kIOMessageSystemWillNotPowerOff    iokit_common_msg(0x260)
+
+/*!
+ * @defined         kIOMessageSystemWillRestart
+ * @discussion      Indicates an imminent system restart. Recipients have a limited 
+ *                  amount of time to respond, otherwise the system will timeout and 
+ *                  restart even without a response.
+ *                  Delivered to in-kernel IOKit drivers via <code>IOService::systemWillShutdown()</code>, 
+ *                  and to clients of <code>registerPrioritySleepWakeInterest()</code>.
+ *                  Never delivered to user space notification clients.
+ */
+#define kIOMessageSystemWillRestart        iokit_common_msg(0x310)
+
+/*!
+ * @defined         kIOMessageSystemPagingOff
+ * @discussion      Indicates an imminent system shutdown, paging device now unavailable.
+ *					Recipients have a limited amount of time to respond, otherwise the
+ *					system will timeout and shutdown even without a response.
+ *                  Delivered to clients of <code>registerPrioritySleepWakeInterest()</code>.
+ *                  Never delivered to user space notification clients.
+ */
+#define kIOMessageSystemPagingOff       iokit_common_msg(0x255)
+
+
+/*! @group          System sleep and wake notifications
+ */
+
+/*!
+ * @defined         kIOMessageCanSystemSleep
+ * @discussion      Announces/Requests permission to proceed to system sleep.
+ *                  Delivered to in-kernel IOKit drivers via <code>kIOGeneralInterest</code>
+ *                  and <code>kIOPriorityPowerStateInterest</code>.
+ *                  Delivered to user clients of <code>IORegisterForSystemPower</code>.
+ */
 #define kIOMessageCanSystemSleep           iokit_common_msg(0x270)
-#define kIOMessageSystemWillSleep          iokit_common_msg(0x280)
+
+/*!
+ * @defined         kIOMessageSystemWillNotSleep
+ * @discussion      Announces that the system has retracted a previous attempt to sleep; 
+ *                  it follows <code>kIOMessageCanSystemSleep</code>.
+ *                  Delivered to in-kernel IOKit drivers via <code>kIOGeneralInterest</code>
+ *                  and <code>kIOPriorityPowerStateInterest</code>.
+ *                  Delivered to user clients of <code>IORegisterForSystemPower</code>.
+ */
 #define kIOMessageSystemWillNotSleep       iokit_common_msg(0x290)
-#define kIOMessageSystemHasPoweredOn       iokit_common_msg(0x300)
-#define kIOMessageSystemWillRestart        iokit_common_msg(0x310)
+
+/*!
+ * @defined         kIOMessageSystemWillSleep
+ * @discussion      Announces that sleep is beginning.
+ *                  Delivered to in-kernel IOKit drivers via <code>kIOGeneralInterest</code>
+ *                  and <code>kIOPriorityPowerStateInterest</code>.
+ *                  Delivered to user clients of <code>IORegisterForSystemPower</code>.
+ */
+#define kIOMessageSystemWillSleep          iokit_common_msg(0x280)
+
+/*!
+ * @defined         kIOMessageSystemWillPowerOn
+ * @discussion      Announces that the system is beginning to power the device tree; most 
+ *                  devices are unavailable at this point..
+ *                  Delivered to in-kernel IOKit drivers via <code>kIOGeneralInterest</code>
+ *                  and <code>kIOPriorityPowerStateInterest</code>.
+ *                  Delivered to user clients of <code>IORegisterForSystemPower</code>.
+ */
 #define kIOMessageSystemWillPowerOn        iokit_common_msg(0x320)
 
-#define kIOMessageCopyClientID		   iokit_common_msg(0x330)
+/*!
+ * @defined         kIOMessageSystemHasPoweredOn
+ * @discussion      Announces that the system and its devices have woken up.
+ *                  Delivered to in-kernel IOKit drivers via <code>kIOGeneralInterest</code>
+ *                  and <code>kIOPriorityPowerStateInterest</code>.
+ *                  Delivered to user clients of <code>IORegisterForSystemPower</code>.
+ */
+#define kIOMessageSystemHasPoweredOn       iokit_common_msg(0x300)
+
+/*! @group          Unused and deprecated notifications
+ */
+
+/*! 
+ * @defined         kIOMessageCanDevicePowerOff
+ * @discussion      Delivered to <code>kIOAppPowerStateInterest</code> clients of 
+ *                  devices that implement their own idle timeouts.
+ *                  This message type is almost never used.
+ */
+#define kIOMessageCanDevicePowerOff        iokit_common_msg(0x200)
+
+/*! 
+ * @defined         kIOMessageDeviceWillNotPowerOff
+ * @discussion      This IOKit interest notification is largely unused; 
+ *                  it's not very interesting.
+ */
+#define kIOMessageDeviceWillNotPowerOff    iokit_common_msg(0x220)
+
+/*! 
+ * @defined         kIOMessageSystemWillNotPowerOff
+ * @deprecated      This IOKit message is unused.
+ */
+#define kIOMessageSystemWillNotPowerOff    iokit_common_msg(0x260)
+
+/*!
+ * @defined         kIOMessageCanSystemPowerOff
+ * @deprecated      This IOKit message is unused.
+ */
+#define kIOMessageCanSystemPowerOff        iokit_common_msg(0x240)
+
+/*!
+ * @defined         kIOMessageDeviceWillPowerOn
+ * @discussion      IOService power mgt does not send kIOMessageDeviceWillPowerOn.
+ */
+#define kIOMessageDeviceWillPowerOn        iokit_common_msg(0x215)
+
+/*!
+ * @defined         kIOMessageDeviceHasPoweredOff
+ * @discussion      IOService power mgt does not send kIOMessageDeviceHasPoweredOff.
+ */
+#define kIOMessageDeviceHasPoweredOff      iokit_common_msg(0x225)
+
 
 #endif /* ! __IOKIT_IOMESSAGE_H */
diff --git a/iokit/IOKit/IONVRAM.h b/iokit/IOKit/IONVRAM.h
index a9337bd1d..15bf709f6 100644
--- a/iokit/IOKit/IONVRAM.h
+++ b/iokit/IOKit/IONVRAM.h
@@ -29,17 +29,20 @@
 #ifndef _IOKIT_IONVRAM_H
 #define _IOKIT_IONVRAM_H
 
+#ifdef __cplusplus
 #include <IOKit/IOKitKeys.h>
 #include <IOKit/IOService.h>
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/nvram/IONVRAMController.h>
-
+#endif /* __cplusplus */
 
 #define kIODTNVRAMOFPartitionName       "common"
 #define kIODTNVRAMXPRAMPartitionName    "APL,MacOS75"
 #define kIODTNVRAMPanicInfoPartitonName "APL,OSXPanic"
 #define kIODTNVRAMFreePartitionName     "wwwwwwwwwwww"
 
+#define MIN_SYNC_NOW_INTERVAL 15*60 /* Minimum 15 Minutes interval mandated */
+
 enum {
   kIODTNVRAMImageSize        = 0x2000,
   kIODTNVRAMXPRAMSize        = 0x0100,
@@ -60,6 +63,8 @@ enum {
   kOFVariablePermKernelOnly
 };
 
+#ifdef __cplusplus
+
 class IODTNVRAM : public IOService
 {
   OSDeclareDefaultStructors(IODTNVRAM);
@@ -86,6 +91,8 @@ private:
   UInt32            _piPartitionSize;
   UInt8             *_piImage;
   bool              _systemPaniced;
+  SInt32            _lastDeviceSync;
+  bool              _freshInterval;
   
   virtual UInt8 calculatePartitionChecksum(UInt8 *partitionHeader);
   virtual IOReturn initOFVariables(void);
@@ -162,6 +169,9 @@ public:
 				       IOByteCount length);  
   
   virtual IOByteCount savePanicInfo(UInt8 *buffer, IOByteCount length);
+  virtual bool safeToSync(void);
 };
 
+#endif /* __cplusplus */
+
 #endif /* !_IOKIT_IONVRAM_H */
diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h
index f75a3e3ab..a27cf64ad 100644
--- a/iokit/IOKit/IOPlatformExpert.h
+++ b/iokit/IOKit/IOPlatformExpert.h
@@ -57,7 +57,8 @@ enum {
   kPEHangCPU,
   kPEUPSDelayHaltCPU,
   kPEPanicRestartCPU,
-  kPEPanicSync
+  kPEPanicSync,
+  kPEPagingOff
 };
 extern int (*PE_halt_restart)(unsigned int type);
 extern int PEHaltRestart(unsigned int type);
@@ -68,6 +69,12 @@ extern UInt32 PESavePanicInfo(UInt8 *buffer, UInt32 length);
 extern long PEGetGMTTimeOfDay( void );
 extern void PESetGMTTimeOfDay( long secs );
   
+/* unless it's a "well-known" property, these will read/write out the value as raw data */
+
+extern boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, const unsigned int len);
+
+extern boolean_t PEReadNVRAMProperty(const char *symbol, void *value, unsigned int *len);
+
 #ifdef __cplusplus
 } /* extern "C" */
 
diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h
index f2f513929..c3282f8ea 100644
--- a/iokit/IOKit/IOService.h
+++ b/iokit/IOKit/IOService.h
@@ -134,6 +134,7 @@ extern const OSSymbol *		gIOBusyInterest;
 extern const OSSymbol *		gIOOpenInterest;
 extern const OSSymbol *		gIOAppPowerStateInterest;
 extern const OSSymbol *		gIOPriorityPowerStateInterest;
+extern const OSSymbol *		gIOConsoleSecurityInterest;
 
 extern const OSSymbol *		gIODeviceMemoryKey;
 extern const OSSymbol *		gIOInterruptControllersKey;
@@ -434,25 +435,6 @@ private:
     OSMetaClassDeclareReservedUnused(IOService, 46);
     OSMetaClassDeclareReservedUnused(IOService, 47);
 
-#ifdef __ppc__
-    OSMetaClassDeclareReservedUnused(IOService, 48);
-    OSMetaClassDeclareReservedUnused(IOService, 49);
-    OSMetaClassDeclareReservedUnused(IOService, 50);
-    OSMetaClassDeclareReservedUnused(IOService, 51);
-    OSMetaClassDeclareReservedUnused(IOService, 52);
-    OSMetaClassDeclareReservedUnused(IOService, 53);
-    OSMetaClassDeclareReservedUnused(IOService, 54);
-    OSMetaClassDeclareReservedUnused(IOService, 55);
-    OSMetaClassDeclareReservedUnused(IOService, 56);
-    OSMetaClassDeclareReservedUnused(IOService, 57);
-    OSMetaClassDeclareReservedUnused(IOService, 58);
-    OSMetaClassDeclareReservedUnused(IOService, 59);
-    OSMetaClassDeclareReservedUnused(IOService, 60);
-    OSMetaClassDeclareReservedUnused(IOService, 61);
-    OSMetaClassDeclareReservedUnused(IOService, 62);
-    OSMetaClassDeclareReservedUnused(IOService, 63);
-#endif
-
 public:
 /*! @function getState
     @abstract Accessor for IOService state bits, not normally needed or used outside IOService.
@@ -1220,6 +1202,8 @@ public:
     static void setPMRootDomain( class IOPMrootDomain * rootDomain );
     static IOReturn catalogNewDrivers( OSOrderedSet * newTables );
     uint64_t getAccumulatedBusyTime( void );
+    static void updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessage);
+    static void consoleLockTimer(thread_call_param_t p0, thread_call_param_t p1);
 
 private:
     static IOReturn waitMatchIdle( UInt32 ms );
@@ -1312,10 +1296,13 @@ private:
     static void terminateThread( void * arg, wait_result_t unused );
     static void terminateWorker( IOOptionBits options );
     static void actionWillTerminate( IOService * victim, IOOptionBits options, 
-                                        OSArray * doPhase2List );
-    static void actionDidTerminate( IOService * victim, IOOptionBits options );
-    static void actionFinalize( IOService * victim, IOOptionBits options );
-    static void actionStop( IOService * client, IOService * provider );
+                                     OSArray * doPhase2List, void*, void * );
+    static void actionDidTerminate( IOService * victim, IOOptionBits options,
+                                    void *, void *, void *);
+    static void actionFinalize( IOService * victim, IOOptionBits options,
+                                void *, void *, void *);
+    static void actionStop( IOService * client, IOService * provider,
+                            void *, void *, void *);
 
 	APPLE_KEXT_COMPATIBILITY_VIRTUAL
     IOReturn resolveInterrupt(IOService *nub, int source);
@@ -1337,8 +1324,8 @@ public:
     virtual void PMinit( void );
 
 /*! @function PMstop
-    @abstract Frees and removes the driver from power management.
-    @discussion The power managment variables don't exist after this call and the power managment methods in the caller shouldn't be called.    
+    @abstract Stop power managing the driver.
+    @discussion Removes the driver from the power plane and stop its power management. This method is synchronous against any power management method invocations (e.g. <code>setPowerState</code> or <code>setAggressiveness</code>), so when this method returns it is guaranteed those power management methods will not be entered. Driver should not call any power management methods after this call.
     Calling <code>PMstop</code> cleans up for the three power management initialization calls: @link PMinit PMinit@/link, @link joinPMtree joinPMtree@/link, and @link registerPowerDriver registerPowerDriver@/link. */
 
     virtual void PMstop( void );
@@ -1368,6 +1355,7 @@ public:
 /*! @function registerInterestedDriver
     @abstract Allows an IOService object to register interest in the changing power state of a power-managed IOService object.
     @discussion Call <code>registerInterestedDriver</code> on the IOService object you are interested in receiving power state messages from, and pass a pointer to the interested driver (<code>this</code>) as an argument.
+    The interested driver is retained until the power interest is removed by calling <code>deRegisterInterestedDriver</code>.
     The interested driver should override @link powerStateWillChangeTo powerStateWillChangeTo@/link and @link powerStateDidChangeTo powerStateDidChangeTo@/link to receive these power change messages.
     Interested drivers must acknowledge power changes in <code>powerStateWillChangeTo</code> or <code>powerStateDidChangeTo</code>, either via return value or later calls to @link acknowledgePowerChange acknowledgePowerChange@/link.
     @param theDriver The driver of interest adds this pointer to the list of interested drivers. It informs drivers on this list before and after the power change.
@@ -1378,7 +1366,8 @@ public:
 
 /*! @function deRegisterInterestedDriver
     @abstract De-registers power state interest from a previous call to <code>registerInterestedDriver</code>.
-    @discussion Most drivers do not need to override <code>deRegisterInterestedDriver</code>.
+    @discussion The retain from <code>registerInterestedDriver</code> is released. This method is synchronous against any <code>powerStateWillChangeTo</code> or <code>powerStateDidChangeTo</code> call targeting the interested driver, so when this method returns it is guaranteed those interest handlers will not be entered.
+    Most drivers do not need to override <code>deRegisterInterestedDriver</code>.
     @param theDriver The interested driver previously passed into @link registerInterestedDriver registerInterestedDriver@/link.
     @result A return code that can be ignored by the caller. */
 
@@ -1725,10 +1714,13 @@ protected:
 #ifdef XNU_KERNEL_PRIVATE
     /* Power management internals */
 public:
+    void idleTimerExpired( void );
     void settleTimerExpired( void );
-    IOReturn synchronizePowerTree( void );
-    bool assertPMThreadCall( void );
-    void deassertPMThreadCall( void );
+    IOReturn synchronizePowerTree( IOOptionBits options = 0, IOService * notifyRoot = 0 );
+    bool assertPMDriverCall( IOPMDriverCallEntry * callEntry, IOOptionBits options = 0, IOPMinformee * inform = 0 );
+    void deassertPMDriverCall( IOPMDriverCallEntry * callEntry );
+    IOReturn changePowerStateWithOverrideTo( unsigned long ordinal );
+    static const char * getIOMessageString( uint32_t msg );
 
 #ifdef __LP64__
     static IOWorkLoop * getPMworkloop( void );
@@ -1736,10 +1728,7 @@ public:
 
 protected:
     bool tellClientsWithResponse( int messageType );
-    bool tellClientsWithResponse( int messageType, bool (*)(OSObject *, void *) );
     void tellClients( int messageType );
-    void tellClients( int messageType, bool (*)(OSObject *, void *) );
-    IOReturn changePowerStateWithOverrideTo( unsigned long ordinal );
 
 private:
 #ifndef __LP64__
@@ -1752,44 +1741,44 @@ private:
     void PMfree( void );
     bool tellChangeDown1 ( unsigned long );
     bool tellChangeDown2 ( unsigned long );
-    IOReturn startPowerChange ( unsigned long, unsigned long, unsigned long, IOPowerConnection *, unsigned long );
+    IOReturn startPowerChange( IOPMPowerChangeFlags, IOPMPowerStateIndex, IOPMPowerFlags, IOPowerConnection *, IOPMPowerFlags );
 	void setParentInfo ( IOPMPowerFlags, IOPowerConnection *, bool );
-    IOReturn notifyAll ( int nextMachineState, bool is_prechange );
-    bool notifyChild ( IOPowerConnection * nextObject, bool is_prechange );
+    IOReturn notifyAll ( uint32_t nextMS );
+    bool notifyChild ( IOPowerConnection * child );
 
     // power change initiated by driver
 	void OurChangeStart( void );
+    void OurSyncStart ( void );
     void OurChangeTellClientsPowerDown ( void );
     void OurChangeTellPriorityClientsPowerDown ( void );
+    void OurChangeTellCapabilityWillChange ( void );
     void OurChangeNotifyInterestedDriversWillChange ( void );
     void OurChangeSetPowerState ( void );
     void OurChangeWaitForPowerSettle ( void );
     void OurChangeNotifyInterestedDriversDidChange ( void );
+    void OurChangeTellCapabilityDidChange ( void );
     void OurChangeFinish ( void );
-    void OurSyncStart ( void );
 
     // downward power change initiated by a power parent
 	IOReturn ParentChangeStart( void );
-    void ParentDownTellPriorityClientsPowerDown ( void );
-    void ParentDownNotifyInterestedDriversWillChange ( void );
-    void ParentDownNotifyDidChangeAndAcknowledgeChange ( void );
-    void ParentDownSetPowerState ( void );
-    void ParentDownWaitForPowerSettle ( void );
-    void ParentAcknowledgePowerChange ( void );
-    
-    // upward power change initiated by a power parent
-    void ParentUpSetPowerState ( void );
-    void ParentUpWaitForSettleTime ( void );
-    void ParentUpNotifyInterestedDriversDidChange ( void );
+    void ParentChangeTellPriorityClientsPowerDown ( void );
+    void ParentChangeTellCapabilityWillChange ( void );
+    void ParentChangeNotifyInterestedDriversWillChange ( void );
+    void ParentChangeSetPowerState ( void );
+    void ParentChangeWaitForPowerSettle ( void );
+    void ParentChangeNotifyInterestedDriversDidChange ( void );
+    void ParentChangeTellCapabilityDidChange ( void );
+    void ParentChangeAcknowledgePowerChange ( void );
     
     void all_done ( void );
     void start_ack_timer ( void );
     void stop_ack_timer ( void );
     void startSettleTimer( void );
     bool checkForDone ( void );
-    bool responseValid ( unsigned long x, int pid );
+    bool responseValid ( uint32_t x, int pid );
     void computeDesiredState ( unsigned long tempDesire = 0 );
     void rebuildChildClampBits ( void );
+    void tellSystemCapabilityChange( uint32_t nextMS );
 
 	static void ack_timer_expired( thread_call_param_t, thread_call_param_t );
 	static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * );
@@ -1797,8 +1786,10 @@ private:
 	static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 );
 	static void releasePMRequest( IOPMRequest * request );
 	static void pmDriverCallout( IOService * from );
-	static void pmTellClientWithResponse( OSObject * object, void * context );
 	static void pmTellAppWithResponse( OSObject * object, void * context );
+	static void pmTellClientWithResponse( OSObject * object, void * context );
+    static void pmTellCapabilityAppWithResponse ( OSObject * object, void * arg );
+    static void pmTellCapabilityClientWithResponse( OSObject * object, void * arg );
 	bool ackTimerTick( void );
 	void addPowerChild1( IOPMRequest * request );
 	void addPowerChild2( IOPMRequest * request );
@@ -1831,14 +1822,15 @@ private:
 	void driverInformPowerChange( void );
 	bool isPMBlocked( IOPMRequest * request, int count );
 	void notifyChildren( void );
-	void notifyChildrenDone( void );
+	void notifyChildrenOrdered( void );
+	void notifyChildrenDelayed( void );
     void cleanClientResponses ( bool logErrors );
-    void idleTimerExpired( IOTimerEventSource * );
 	void updatePowerClient( const OSSymbol * client, uint32_t powerState );
 	void removePowerClient( const OSSymbol * client );
 	uint32_t getPowerStateForClient( const OSSymbol * client );
     IOReturn requestPowerState( const OSSymbol * client, uint32_t state );
-    IOReturn requestDomainPower( unsigned long ourPowerState, IOOptionBits options = 0 );
+    IOReturn requestDomainPower( IOPMPowerStateIndex ourPowerState, IOOptionBits options = 0 );
+    void waitForPMDriverCall( IOService * target = 0 );
 #endif /* XNU_KERNEL_PRIVATE */
 };
 
diff --git a/iokit/IOKit/IOServicePM.h b/iokit/IOKit/IOServicePM.h
index 96edc11c0..2a2c4c400 100644
--- a/iokit/IOKit/IOServicePM.h
+++ b/iokit/IOKit/IOServicePM.h
@@ -47,6 +47,15 @@ class IOPMRequest;
 class IOPMRequestQueue;
 class IOPMCompletionQueue;
 
+typedef unsigned long       IOPMPowerStateIndex;
+typedef uint32_t            IOPMPowerChangeFlags;
+
+struct IOPMDriverCallEntry {
+    queue_chain_t   link;
+    thread_t        thread;
+    IOService *     target;
+};
+
 /* Binary compatibility with drivers that access pm_vars */
 #ifdef __LP64__
 #define PM_VARS_SUPPORT     0
diff --git a/iokit/IOKit/IOSharedLock.h b/iokit/IOKit/IOSharedLock.h
index eadfc407d..795007451 100644
--- a/iokit/IOKit/IOSharedLock.h
+++ b/iokit/IOKit/IOSharedLock.h
@@ -1,19 +1,14 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -23,70 +18,20 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
-
-/*
- * Multiprocessor locks used within the shared memory area between the
- * kernel and event system.  These must work in both user and kernel mode.
- * 
- * These routines are public, for the purpose of writing frame buffer device
- * drivers which handle their own cursors.  Certain architectures define a
- * generic display class which handles cursor drawing and is subclassed by
- * driver writers.  These drivers need not be concerned with the following
- * types and definitions.
- *
- * The ev_lock(), ev_unlock(), and ev_try_lock() functions are available only
- * to drivers built in or dynamically loaded into the kernel, and to DPS
- * drivers built in or dynamically loaded into the Window Server.  They do not
- * exist in any shared library.
- *
- * --> They're now in IOKit user lib.
+ * @APPLE_LICENSE_HEADER_END@
  */
 
 #ifndef _IOKIT_IOSHAREDLOCK_H
 #define _IOKIT_IOSHAREDLOCK_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// should be 32 bytes on PPC
-typedef volatile int		IOSharedLockData;
-typedef IOSharedLockData    *   IOSharedLock;
-
-#define IOSpinLockInit(l)	(*(l) = (IOSharedLockData)0)
-
-#ifndef KERNEL
-extern void IOSpinLock(IOSharedLock l);
-#endif
+#include <libkern/OSAtomic.h>
 
-extern void IOSpinUnlock(IOSharedLock l);
-extern boolean_t IOTrySpinLock(IOSharedLock l);
+#define IOSharedLockData OSSpinLock
+#define ev_lock_data_t   OSSpinLock
 
-/* exact same stuff & implementation */
-
-typedef IOSharedLockData 	ev_lock_data_t;
-typedef ev_lock_data_t	    *	ev_lock_t;
-
-#define ev_init_lock(l)		(*(l) = (ev_lock_data_t)0)
-// needs isync?
-//#define ev_is_locked(l)	(*(l) != (ev_lock_data_t)0)
-
-#ifndef KERNEL
-extern void ev_lock(ev_lock_t l);		// Spin lock!
+#ifdef KERNEL
+#define ev_unlock(l)     OSSpinLockUnlock(l)
+#define ev_try_lock(l)   OSSpinLockTry(l)
 #endif
 
-extern void ev_unlock(ev_lock_t l);
-extern boolean_t ev_try_lock(ev_lock_t l);
-
-#ifdef __cplusplus
-}
-#endif
 #endif /* ! _IOKIT_IOSHAREDLOCK_H */
diff --git a/iokit/IOKit/IOStatistics.h b/iokit/IOKit/IOStatistics.h
new file mode 100644
index 000000000..0c4a1abb2
--- /dev/null
+++ b/iokit/IOKit/IOStatistics.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _IOKIT_STATISTICS_H
+#define _IOKIT_STATISTICS_H
+
+#define IOSTATISTICS_SIG            'IOST'
+#define IOSTATISTICS_SIG_USERCLIENT 'IOSU'
+#define IOSTATISTICS_SIG_WORKLOOP   'IOSW'
+
+/* Update when the binary format changes */
+#define IOSTATISTICS_VER  			0x2
+
+enum {
+	kIOStatisticsDriverNameLength  = 64,
+	kIOStatisticsClassNameLength   = 64,
+	kIOStatisticsProcessNameLength = 20
+};
+
+enum {
+	kIOStatisticsDerivedEventSourceCounter = 0,
+	kIOStatisticsTimerEventSourceCounter,
+	kIOStatisticsCommandGateCounter,
+	kIOStatisticsCommandQueueCounter,
+	kIOStatisticsInterruptEventSourceCounter,
+	kIOStatisticsFilterInterruptEventSourceCounter
+};
+
+typedef uint32_t IOStatisticsCounterType;
+
+enum {
+	kIOStatisticsGeneral = 0,
+	kIOStatisticsWorkLoop,
+	kIOStatisticsUserClient
+};
+
+/* Keep our alignments as intended */
+
+#pragma pack(4)
+
+/* Event Counters */ 
+
+typedef struct IOStatisticsInterruptEventSources {
+	uint32_t created;
+	uint32_t produced;
+	uint32_t checksForWork;	
+} IOStatisticsInterruptEventSources;
+
+typedef struct IOStatisticsTimerEventSources {
+	uint32_t created;
+	uint32_t openGateCalls;	
+	uint32_t closeGateCalls;
+	uint64_t timeOnGate;
+	uint32_t timeouts;
+	uint32_t checksForWork;
+} IOStatisticsTimerEventSources;
+
+typedef struct IOStatisticsDerivedEventSources {
+	uint32_t created;
+	uint32_t openGateCalls;
+	uint32_t closeGateCalls;
+	uint64_t timeOnGate;
+} IOStatisticsDerivedEventSources;
+
+typedef struct IOStatisticsCommandGates {
+	uint32_t created;
+	uint32_t openGateCalls;
+	uint32_t closeGateCalls;
+	uint64_t timeOnGate;
+	uint32_t actionCalls;
+} IOStatisticsCommandGates;
+ 
+typedef struct IOStatisticsCommandQueues {
+	uint32_t created;
+	uint32_t actionCalls;
+} IOStatisticsCommandQueues;
+ 
+typedef struct IOStatisticsUserClients {
+	uint32_t created;
+	uint32_t clientCalls;
+} IOStatisticsUserClients;
+
+/* General mode */
+
+typedef struct IOStatisticsHeader {
+	uint32_t sig; /* 'IOST' */
+	uint32_t ver; /* incremented with every data revision */
+
+	uint32_t seq; /* sequence ID */
+
+	uint32_t globalStatsOffset;
+	uint32_t kextStatsOffset;
+	uint32_t memoryStatsOffset;
+	uint32_t classStatsOffset;
+	uint32_t counterStatsOffset;
+	uint32_t kextIdentifiersOffset;
+	uint32_t classNamesOffset;
+
+	/* struct IOStatisticsGlobal */
+	/* struct IOStatisticsKext */
+	/* struct IOStatisticsMemory */
+	/* struct IOStatisticsClass */
+	/* struct IOStatisticsCounter */
+	/* struct IOStatisticsKextIdentifier */
+	/* struct IOStatisticsClassName */
+} IOStatisticsHeader;
+
+typedef struct IOStatisticsGlobal {
+	uint32_t kextCount;
+	uint32_t classCount;
+	uint32_t workloops;
+} IOStatisticsGlobal;
+
+typedef struct IOStatisticsKext {
+	uint32_t loadTag;
+	uint32_t loadSize;
+	uint32_t wiredSize;
+	uint32_t classes; /* Number of classes owned */
+	uint32_t classIndexes[]; /* Variable length array of owned class indexes */
+} IOStatisticsKext;
+
+typedef struct IOStatisticsMemory {
+	uint32_t allocatedSize;
+	uint32_t freedSize;
+	uint32_t allocatedAlignedSize;
+	uint32_t freedAlignedSize;
+	uint32_t allocatedContiguousSize;
+	uint32_t freedContiguousSize;
+	uint32_t allocatedPageableSize;
+	uint32_t freedPageableSize;
+} IOStatisticsMemory;
+
+typedef struct IOStatisticsClass {
+	uint32_t classID;
+	uint32_t superClassID;
+	uint32_t classSize;
+} IOStatisticsClass;
+
+typedef struct IOStatisticsCounter {
+	uint32_t classID;
+	uint32_t classInstanceCount;
+	struct IOStatisticsUserClients userClientStatistics;
+	struct IOStatisticsInterruptEventSources interruptEventSourceStatistics;
+	struct IOStatisticsInterruptEventSources filterInterruptEventSourceStatistics;
+	struct IOStatisticsTimerEventSources timerEventSourceStatistics;
+	struct IOStatisticsCommandGates commandGateStatistics;
+	struct IOStatisticsCommandQueues commandQueueStatistics;
+	struct IOStatisticsDerivedEventSources derivedEventSourceStatistics;
+} IOStatisticsCounter;
+
+typedef struct IOStatisticsKextIdentifier {
+	char identifier[kIOStatisticsDriverNameLength];
+} IOStatisticsKextIdentifier;
+
+typedef struct IOStatisticsClassName {
+	char name[kIOStatisticsClassNameLength];
+} IOStatisticsClassName;
+
+/* WorkLoop mode */
+
+typedef struct IOStatisticsWorkLoop {
+	uint32_t attachedEventSources;
+	uint64_t timeOnGate;
+	uint32_t kextLoadTag;
+	uint32_t dependentKexts;
+	uint32_t dependentKextLoadTags[];
+} IOStatisticsWorkLoop;
+
+typedef struct IOStatisticsWorkLoopHeader {
+	uint32_t sig; /* 'IOSW */
+	uint32_t ver; /* incremented with every data revision */
+	uint32_t seq; /* sequence ID */
+	uint32_t workloopCount;
+	struct IOStatisticsWorkLoop workLoopStats;
+} IOStatisticsWorkLoopHeader;
+
+/* UserClient mode */
+
+typedef struct IOStatisticsUserClientCall {
+	char processName[kIOStatisticsProcessNameLength];
+	int32_t pid;
+	uint32_t calls;
+} IOStatisticsUserClientCall;
+
+typedef struct IOStatisticsUserClientHeader {
+	uint32_t sig; /* 'IOSU */
+	uint32_t ver; /* incremented with every data revision */	
+	uint32_t seq; /* sequence ID */
+	uint32_t processes;
+	struct IOStatisticsUserClientCall userClientCalls[];
+} IOStatisticsUserClientHeader;
+
+#pragma pack()
+
+#endif /* _IOKIT_STATISTICS_H */
diff --git a/iokit/IOKit/IOStatisticsPrivate.h b/iokit/IOKit/IOStatisticsPrivate.h
new file mode 100644
index 000000000..a41230c3d
--- /dev/null
+++ b/iokit/IOKit/IOStatisticsPrivate.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __IOKIT_STATISTICS_PRIVATE_H
+#define __IOKIT_STATISTICS_PRIVATE_H
+
+#if IOKITSTATS
+
+#include <sys/queue.h>
+#include <sys/tree.h>
+
+#include <libkern/c++/OSKext.h>
+#include <libkern/OSDebug.h>
+
+#include <IOKit/IOMemoryDescriptor.h>
+#include <IOKit/IOStatistics.h>
+
+#ifndef KERNEL
+#error IOStatisticsPrivate.h is for kernel use only
+#endif
+
+/* Defines */
+#define IOKIT_STATISTICS_RECORDED_USERCLIENT_PROCS 20
+
+#ifndef __probable
+#define __probable(x) x
+#endif
+
+/* Forward declarations */
+class IOWorkLoop;
+class IOUserClient;
+class IOEventSource;
+
+struct IOEventSourceCounter;
+struct IOUserClientCounter;
+struct IOWorkLoopCounter;
+struct IOUserClientProcessEntry;
+
+struct KextNode;
+
+/* Allocation tracking */
+
+enum {
+	kIOStatisticsMalloc = 0,
+	kIOStatisticsFree,
+	kIOStatisticsMallocAligned,
+	kIOStatisticsFreeAligned,
+	kIOStatisticsMallocContiguous,
+	kIOStatisticsFreeContiguous,
+	kIOStatisticsMallocPageable,
+	kIOStatisticsFreePageable,
+	kIOStatisticsAllocCount
+};
+
+TAILQ_HEAD(ProcessEntryList, IOUserClientProcessEntry);
+
+/* Tree and list structs */
+
+typedef struct ClassNode {
+	RB_ENTRY(ClassNode) tLink;
+	SLIST_ENTRY(ClassNode) lLink;
+	struct KextNode *parentKext;
+	uint32_t classID;
+	uint32_t superClassID;
+	const OSMetaClass *metaClass;
+	SLIST_HEAD(, IOEventSourceCounter) counterList;
+	SLIST_HEAD(, IOUserClientCounter) userClientList;
+} ClassNode;
+
+typedef struct KextNode {
+	RB_ENTRY(KextNode) link;
+	RB_ENTRY(KextNode) addressLink;
+	OSKext *kext;
+	OSKextLoadTag loadTag;
+	vm_offset_t address;
+	vm_offset_t address_end;
+	uint32_t memoryCounters[kIOStatisticsAllocCount];
+	uint32_t classes;
+	SLIST_HEAD(, ClassNode) classList;
+	SLIST_HEAD(, IOWorkLoopCounter) workLoopList;
+	ProcessEntryList userClientCallList;
+} KextNode;
+
+/* User client tracing */
+
+typedef struct IOUserClientProcessEntry {
+	TAILQ_ENTRY(IOUserClientProcessEntry) link;
+	char processName[kIOStatisticsProcessNameLength];
+	int32_t pid;
+	uint32_t calls;
+} IOUserClientProcessEntry;
+
+/* Counters */
+
+typedef struct IOInterruptEventSourceCounter {
+	uint32_t produced;
+	uint32_t checksForWork;
+} IOInterruptEventSourceCounter;
+
+typedef struct IOTimerEventSourceCounter {
+	uint32_t timeouts;
+	uint32_t checksForWork;
+} IOTimerEventSourceCounter;
+
+typedef struct IOCommandGateCounter {
+	uint32_t actionCalls;
+} IOCommandGateCounter;
+
+typedef struct IOCommandQueueCounter {
+	uint32_t actionCalls;
+} IOCommandQueueCounter;
+
+typedef struct IOEventSourceCounter {
+	SLIST_ENTRY(IOEventSourceCounter) link;
+	ClassNode *parentClass;
+	IOStatisticsCounterType type;
+	uint64_t startTimeStamp;
+	uint64_t timeOnGate;
+	uint32_t closeGateCalls;
+	uint32_t openGateCalls;
+	union {
+		IOInterruptEventSourceCounter interrupt;
+		IOInterruptEventSourceCounter filter;
+		IOTimerEventSourceCounter timer;
+		IOCommandGateCounter commandGate;
+		IOCommandQueueCounter commandQueue;
+	} u;
+} IOEventSourceCounter;
+
+typedef struct IOWorkLoopDependency {
+	RB_ENTRY(IOWorkLoopDependency) link;
+	OSKextLoadTag loadTag;
+} IOWorkLoopDependency;
+
+typedef struct IOWorkLoopCounter {    
+	SLIST_ENTRY(IOWorkLoopCounter) link;
+	KextNode *parentKext;
+	int attachedEventSources;
+	IOWorkLoop *workLoop;
+	uint64_t startTimeStamp;
+    uint64_t timeOnGate;
+	uint32_t closeGateCalls;
+	uint32_t openGateCalls;
+	typedef RB_HEAD(DependencyTree, IOWorkLoopDependency) DependencyTreeHead;
+	DependencyTreeHead dependencyHead;
+	static int loadTagCompare(IOWorkLoopDependency *e1, IOWorkLoopDependency *e2);
+	RB_PROTOTYPE_SC(static, DependencyTree, IOWorkLoopDependency, dependencyLink, KextTagCompare);
+} IOWorkLoopCounter;
+
+typedef struct IOUserClientCounter {
+	SLIST_ENTRY(IOUserClientCounter) link;
+	ClassNode *parentClass;
+	uint32_t clientCalls;
+} IOUserClientCounter;
+
+class IOStatistics {
+	static bool enabled;
+
+	static IORWLock *lock;
+
+	static uint32_t sequenceID;
+
+	static uint32_t lastKextIndex;
+	static uint32_t lastClassIndex;
+
+	static uint32_t loadedKexts;
+	static uint32_t registeredClasses;
+	static uint32_t registeredCounters;
+	static uint32_t registeredWorkloops;
+
+	static uint32_t attachedEventSources;
+
+	static KextNode *kextHint;
+	
+	static IOWorkLoopDependency *nextWorkLoopDependency;
+
+	typedef RB_HEAD(KextTree, KextNode) KextTreeHead;
+	static KextTreeHead kextHead;
+	static int kextNodeCompare(KextNode *e1, KextNode *e2);
+	RB_PROTOTYPE_SC(static, KextTree, KextNode, link, kextNodeCompare);
+
+	typedef RB_HEAD(KextAddressTree, KextNode) KextAddressTreeHead;
+	static KextAddressTreeHead kextAddressHead;
+	static int kextAddressNodeCompare(KextNode *e1, KextNode *e2);
+	RB_PROTOTYPE_SC(static, KextAddressTree, KextNode, addressLink, kextAddressNodeCompare);
+
+	typedef RB_HEAD(ClassTree, ClassNode) ClassTreeHead;
+	static ClassTreeHead classHead;
+	static int classNodeCompare(ClassNode *e1, ClassNode *e2);
+	RB_PROTOTYPE_SC(static, ClassTree, ClassNode, tLink, classNodeCompare);
+
+	static int oid_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, int arg2, struct sysctl_req *req);
+
+	static uint32_t copyGlobalStatistics(IOStatisticsGlobal *stats);
+	static uint32_t copyKextStatistics(IOStatisticsKext *stats);
+	static uint32_t copyMemoryStatistics(IOStatisticsMemory *stats);
+	static uint32_t copyClassStatistics(IOStatisticsClass *stats);
+	static uint32_t copyCounterStatistics(IOStatisticsCounter *stats);
+	static uint32_t copyKextIdentifiers(IOStatisticsKextIdentifier *kextIDs);
+	static uint32_t copyClassNames(IOStatisticsClassName *classNames);
+
+	static uint32_t copyWorkLoopStatistics(IOStatisticsWorkLoop *workLoopStats);
+
+	static uint32_t copyUserClientStatistics(IOStatisticsUserClientHeader *stats, uint32_t loadTag);
+
+	static void updateAllocationCounter(vm_offset_t address, uint32_t index, vm_size_t size);
+
+	static void storeUserClientCallInfo(IOUserClient *userClient, IOUserClientCounter *counter);
+
+	static KextNode *getKextNodeFromBacktrace(boolean_t write);
+	static void releaseKextNode(KextNode *node);
+
+public:
+	
+	static void initialize();
+
+	static void onKextLoad(OSKext *kext, kmod_info_t *kmod_info);
+	static void onKextUnload(OSKext *kext);
+	static void onClassAdded(OSKext *parentKext, OSMetaClass *metaClass);
+	static void onClassRemoved(OSKext *parentKext, OSMetaClass *metaClass);
+
+	static IOEventSourceCounter *registerEventSource(OSObject *inOwner);
+	static void unregisterEventSource(IOEventSourceCounter *counter);
+	
+	static IOWorkLoopCounter *registerWorkLoop(IOWorkLoop *workLoop);
+	static void unregisterWorkLoop(IOWorkLoopCounter *counter);
+
+	static IOUserClientCounter *registerUserClient(IOUserClient *userClient);
+	static void unregisterUserClient(IOUserClientCounter *counter);
+
+	static int getStatistics(sysctl_req *req);
+	static int getWorkLoopStatistics(sysctl_req *req);
+	static int getUserClientStatistics(sysctl_req *req);
+
+	/* Inlines for counter manipulation.
+	 * 				
+	 * NOTE: counter access is not expressly guarded here so as not to incur performance penalties
+	 * in the instrumented parent objects. Writes are arranged so as to be protected by pre-existing
+	 * locks in the parent where appropriate, but reads have no such guarantee. Counters should
+	 * therefore be regarded as providing an indication of current state, rather than precisely
+	 * accurate statistics. 
+	 */
+	
+	static inline void setCounterType(IOEventSourceCounter *counter, IOStatisticsCounterType type) {
+		if (counter) {
+			counter->type = type;
+		}
+	}
+
+	static inline void countOpenGate(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->timeOnGate += mach_absolute_time() - counter->startTimeStamp;
+			counter->openGateCalls++;
+		}
+	}
+
+	static inline void countCloseGate(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->startTimeStamp = mach_absolute_time();
+			counter->closeGateCalls++;
+		}
+	}
+
+	/* Interrupt */
+	static inline void countInterruptCheckForWork(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->u.interrupt.checksForWork++;
+		}
+	}
+
+	static inline void countInterrupt(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->u.interrupt.produced++;
+		}
+	}
+
+	/* CommandQueue */
+	static inline void countCommandQueueActionCall(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->u.commandQueue.actionCalls++;
+		}
+	}
+
+	/* CommandGate */
+	static inline void countCommandGateActionCall(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->u.commandGate.actionCalls++;
+		}
+	}
+
+	/* Timer */
+	static inline void countTimerTimeout(IOEventSourceCounter *counter) {
+		if (counter) {
+			counter->u.timer.timeouts++;
+		}
+	}
+
+	/* WorkLoop */
+	static void attachWorkLoopEventSource(IOWorkLoopCounter *wlc, IOEventSourceCounter *esc);
+	static void detachWorkLoopEventSource(IOWorkLoopCounter *wlc, IOEventSourceCounter *esc);
+
+	static inline void countWorkLoopOpenGate(IOWorkLoopCounter *counter) {
+		if (counter) {
+			counter->timeOnGate += mach_absolute_time() - counter->startTimeStamp;
+			counter->openGateCalls++;
+		}
+	}
+
+	static inline void countWorkLoopCloseGate(IOWorkLoopCounter *counter) {
+		if (counter) {
+			counter->startTimeStamp = mach_absolute_time();
+			counter->closeGateCalls++;
+		}
+	}
+
+	/* IOLib allocations */	
+	static void countAlloc(uint32_t index, vm_size_t size);
+
+	/* UserClient */
+	static void countUserClientCall(IOUserClient *client);
+};
+
+#else
+
+/* Statistics disabled */
+
+class IOStatistics {
+public:
+	static void initialize() {}
+};
+
+#endif /* IOKITSTATS */
+
+#endif /* __IOKIT_STATISTICS_PRIVATE_H */
diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h
index a1d22f4d3..b551fd723 100644
--- a/iokit/IOKit/IOTimeStamp.h
+++ b/iokit/IOKit/IOTimeStamp.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -190,4 +190,8 @@ IOTimeStamp(uintptr_t csc,
 #define IOSERVICE_TERMINATE_STOP_DEFER		16	/* 0x05080040 */
 #define IOSERVICE_TERMINATE_DONE		17	/* 0x05080044 */
 
+#define IOSERVICE_KEXTD_ALIVE		18	/* 0x05080048 */
+#define IOSERVICE_KEXTD_READY		19	/* 0x0508004C */
+#define IOSERVICE_REGISTRY_QUIET		20	/* 0x05080050 */
+
 #endif /* ! IOKIT_IOTIMESTAMP_H */
diff --git a/iokit/IOKit/IOTimerEventSource.h b/iokit/IOKit/IOTimerEventSource.h
index 7cc0d38c3..bbbeaf964 100644
--- a/iokit/IOKit/IOTimerEventSource.h
+++ b/iokit/IOKit/IOTimerEventSource.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2000, 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,10 +95,6 @@ protected:
     @abstract Sub-class implementation of free method, frees calloutEntry */
     virtual void free();
 
-/*! @function checkForWork
-    @abstract Have to implement it is mandatory in $link IOEventSource, but IOTimerEventSources don't actually use this work-loop mechanism. */
-    virtual bool checkForWork();
-
     virtual void setWorkLoop(IOWorkLoop *workLoop);
 
 public:
diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h
index 9f5d5a3f7..3c41ab070 100644
--- a/iokit/IOKit/IOTypes.h
+++ b/iokit/IOKit/IOTypes.h
@@ -164,7 +164,12 @@ typedef unsigned int	IOAlignment;
 #ifndef __IOKIT_PORTS_DEFINED__
 #define __IOKIT_PORTS_DEFINED__
 #ifdef KERNEL
+#ifdef __cplusplus
+class OSObject;
+typedef OSObject * io_object_t;
+#else
 typedef struct OSObject * io_object_t;
+#endif
 #else /* KERNEL */
 typedef mach_port_t	io_object_t;
 #endif /* KERNEL */
diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h
index 7283ebd41..c3c40c57a 100644
--- a/iokit/IOKit/IOUserClient.h
+++ b/iokit/IOKit/IOUserClient.h
@@ -37,6 +37,9 @@
 #include <IOKit/IOService.h>
 #include <IOKit/OSMessageNotification.h>
 
+#if IOKITSTATS
+#include <IOKit/IOStatisticsPrivate.h>
+#endif
 
 enum {
     kIOUCTypeMask	= 0x0000000f,
@@ -164,18 +167,29 @@ enum {
 class IOUserClient : public IOService
 {
     OSDeclareAbstractStructors(IOUserClient)
+#if IOKITSTATS
+    friend class IOStatistics;
+#endif
 
 protected:
 /*! @struct ExpansionData
     @discussion This structure will be used to expand the capablilties of this class in the future.
 */    
-    struct ExpansionData { };
+    struct ExpansionData {
+#if IOKITSTATS
+	    IOUserClientCounter *counter;
+#else
+	    void *iokitstatsReserved;
+#endif
+    };
 
 /*! @var reserved
     Reserved for future use.  (Internal use only) 
 */
     ExpansionData * reserved;
 
+    bool reserve();
+
 #ifdef XNU_KERNEL_PRIVATE
 public:
 #else
diff --git a/iokit/IOKit/IOWorkLoop.h b/iokit/IOKit/IOWorkLoop.h
index 808329ada..e248a9b3b 100644
--- a/iokit/IOKit/IOWorkLoop.h
+++ b/iokit/IOKit/IOWorkLoop.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,14 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
-Copyright (c) 1998 Apple Computer, Inc.	 All rights reserved.
-HISTORY
-    1998-7-13	Godfrey van der Linden(gvdl)
-	Created.
-    1998-10-30	Godfrey van der Linden(gvdl)
-	Converted to C++
-*/
 
 #ifndef __IOKIT_IOWORKLOOP_H
 #define __IOKIT_IOWORKLOOP_H
@@ -44,6 +36,10 @@ HISTORY
 
 #include <IOKit/system.h>
 
+#if IOKITSTATS
+#include <IOKit/IOStatisticsPrivate.h>
+#endif
+
 class IOEventSource;
 class IOTimerEventSource;
 class IOCommandGate;
@@ -87,7 +83,14 @@ private:
     @abstract Static function that calls the threadMain function. 
 */
     static void threadMainContinuation(IOWorkLoop *self);
-
+	
+/*! @function eventSourcePerformsWork
+	@abstract Checks if the event source passed in overrides checkForWork() to perform any work.
+IOWorkLoop uses this to determine if the event source should be polled in runEventSources() or not.
+	@param inEventSource The event source to check.
+*/
+	bool eventSourcePerformsWork(IOEventSource *inEventSource);
+	
 protected:
 
 /*! @typedef maintCommandEnum
@@ -138,6 +141,15 @@ protected:
 */    
     struct ExpansionData {
 	IOOptionBits options;
+	IOEventSource *passiveEventChain;
+#if DEBUG
+	void * allocationBacktrace[16];
+#endif /* DEBUG */
+#if IOKITSTATS
+	struct IOWorkLoopCounter *counter;
+#else
+	void *iokitstatsReserved;
+#endif
     };
 
 /*! @var reserved
@@ -237,13 +249,13 @@ public:
 
 /*! @function enableAllInterrupts
     @abstract Calls enable() in all interrupt event sources.
-    @discussion For all event sources (ES) for which IODynamicCast(IOInterruptEventSource, ES) is valid, in eventChain call enable() function.  See IOEventSource::enable().
+    @discussion For all event sources (ES) for which OSDynamicCast(IOInterruptEventSource, ES) is valid, in eventChain call enable() function.  See IOEventSource::enable().
 */
     virtual void enableAllInterrupts() const;
 
 /*! @function disableAllInterrupts
     @abstract Calls disable() in all interrupt event sources.
-    @discussion For all event sources (ES) for which IODynamicCast(IOInterruptEventSource, ES) is valid,  in eventChain call disable() function.  See IOEventSource::disable().
+    @discussion For all event sources (ES) for which OSDynamicCast(IOInterruptEventSource, ES) is valid,  in eventChain call disable() function.  See IOEventSource::disable().
 */
     virtual void disableAllInterrupts() const;
 
@@ -252,6 +264,9 @@ protected:
     // Internal APIs used by event sources to control the thread
     friend class IOEventSource;
     friend class IOTimerEventSource;
+#if IOKITSTATS
+    friend class IOStatistics;
+#endif
     virtual void signalWorkAvailable();
     virtual void openGate();
     virtual void closeGate();
diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile
index 23d52274b..7b3c8df3e 100644
--- a/iokit/IOKit/Makefile
+++ b/iokit/IOKit/Makefile
@@ -3,9 +3,13 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
 export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 
-IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A
-export INCDIR = $(IOKIT_FRAMEDIR)/Headers
-export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders
+IOKIT_INCVERS = A
+IOKIT_INCFRAME = $(FRAMEDIR)/IOKit.framework
+IOKIT_INCDIR = $(IOKIT_INCFRAME)/Versions/$(IOKIT_INCVERS)/Headers
+IOKIT_PINCDIR = $(IOKIT_INCFRAME)/Versions/$(IOKIT_INCVERS)/PrivateHeaders
+
+export INCDIR = $(IOKIT_INCDIR)
+export LCLDIR = $(IOKIT_PINCDIR)
 
 include $(MakeInc_cmd)
 include $(MakeInc_def)
@@ -18,20 +22,13 @@ INSTINC_SUBDIRS = \
 	rtc \
 	system_management
 
-INSTINC_SUBDIRS_PPC = \
-	ppc
-
-INSTINC_SUBDIRS_I386 = \
-	i386
+INSTINC_SUBDIRS_I386 = 
 
-INSTINC_SUBDIRS_X86_64 = \
-	i386
+INSTINC_SUBDIRS_X86_64 = 
 
-INSTINC_SUBDIRS_ARM = \
-	arm
+INSTINC_SUBDIRS_ARM = 
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
@@ -41,7 +38,9 @@ NOT_EXPORT_HEADERS =
 NOT_KF_MI_HEADERS  = $(NOT_EXPORT_HEADERS)			\
 		     IOKitKeysPrivate.h IOCPU.h			\
 		     IOHibernatePrivate.h IOPolledInterface.h	\
-		     IOCommandQueue.h IOLocksPrivate.h
+		     IOCommandQueue.h IOLocksPrivate.h 		\
+		     AppleKeyStoreInterface.h			\
+		     IOStatistics.h IOStatisticsPrivate.h
 
 NOT_LOCAL_HEADERS = 
 
@@ -51,7 +50,7 @@ INSTALL_MI_LIST	= IOBSD.h IOKitKeys.h IOKitServer.h IOReturn.h\
 		  IOSharedLock.h IOTypes.h OSMessageNotification.h\
 		  IODataQueueShared.h IOMessage.h
                   
-INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h IOLocksPrivate.h
+INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h IOLocksPrivate.h IOStatistics.h AppleKeyStoreInterface.h
 
 INSTALL_MI_DIR = .
 
diff --git a/iokit/IOKit/i386/IOSharedLockImp.h b/iokit/IOKit/i386/IOSharedLockImp.h
deleted file mode 100644
index cb15fb1d8..000000000
--- a/iokit/IOKit/i386/IOSharedLockImp.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
-
-/* 	Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved. 
- *
- * EventShmemLock.h -	Shared memory area locks for use between the
- *			WindowServer and the Event Driver.
- *
- *
- * HISTORY
- * 29 April 1992    Mike Paquette at NeXT
- *      Created. 
- *
- * Multiprocessor locks used within the shared memory area between the
- * kernel and event system.  These must work in both user and kernel mode.
- * The locks are defined in an include file so they get exported to the local
- * include file area.
- *
- * This is basically a ripoff of the spin locks under the cthreads packages.
- */
-
-#ifndef _IOKIT_IOSHAREDLOCKIMP_H
-#define _IOKIT_IOSHAREDLOCKIMP_H
-
-#include <architecture/i386/asm_help.h>
-
-#ifndef KERNEL
-#error this file for kernel only; comm page has user versions
-#endif
-
-	TEXT
-
-/*
- * void
- * ev_unlock(p)
- *	int *p;
- *
- * Unlock the lock pointed to by p.
- */
-LEAF(_ev_unlock, 0)
-LEAF(_IOSpinUnlock, 0)
-#if __x86_64__
-	movl		$0, (%rdi)
-#else
-	movl		4(%esp), %ecx
-	movl		$0, (%ecx)
-#endif
-END(_ev_unlock)
-
-
-/*
- * int
- * ev_try_lock(p)
- *	int *p;
- *
- * Try to lock p.  Return zero if not successful.
- */
-
-LEAF(_ev_try_lock, 0)
-LEAF(_IOTrySpinLock, 0)
-#if __x86_64__
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	lock
-	cmpxchgl	%edx, (%rdi)
-	setz		%dl
-	movzbl		%dl, %eax
-#else
-        movl            4(%esp), %ecx 
-	xorl		%eax, %eax
-        lock
-        cmpxchgl        %ecx, (%ecx)
-	jne	1f
-	movl	$1, %eax		/* yes */
-	ret
-1:
-	xorl	%eax, %eax		/* no */
-#endif
-END(_ev_try_lock)
-
-
-#endif /* ! _IOKIT_IOSHAREDLOCKIMP_H */
diff --git a/iokit/IOKit/machine/Makefile b/iokit/IOKit/machine/Makefile
index 4a77745b4..14dd46d76 100644
--- a/iokit/IOKit/machine/Makefile
+++ b/iokit/IOKit/machine/Makefile
@@ -14,12 +14,10 @@ MI_DIR = machine
 EXCLUDE_HEADERS = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 
diff --git a/iokit/IOKit/nvram/Makefile b/iokit/IOKit/nvram/Makefile
index 3235dd242..2a3da6d3c 100644
--- a/iokit/IOKit/nvram/Makefile
+++ b/iokit/IOKit/nvram/Makefile
@@ -14,13 +14,11 @@ MI_DIR = nvram
 NOT_EXPORT_HEADERS = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
diff --git a/iokit/IOKit/platform/Makefile b/iokit/IOKit/platform/Makefile
index 644b0b114..7d5079f87 100644
--- a/iokit/IOKit/platform/Makefile
+++ b/iokit/IOKit/platform/Makefile
@@ -15,13 +15,11 @@ NOT_EXPORT_HEADERS =
 NOT_KF_MI_HEADERS  = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
diff --git a/iokit/IOKit/power/Makefile b/iokit/IOKit/power/Makefile
index dcebcdb9b..fd1518bd7 100644
--- a/iokit/IOKit/power/Makefile
+++ b/iokit/IOKit/power/Makefile
@@ -14,13 +14,11 @@ MI_DIR = power
 NOT_EXPORT_HEADERS = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
diff --git a/iokit/IOKit/ppc/IODBDMA.h b/iokit/IOKit/ppc/IODBDMA.h
deleted file mode 100644
index afe1337bb..000000000
--- a/iokit/IOKit/ppc/IODBDMA.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1997 Apple Computer, Inc.
- *
- *
- * HISTORY
- *
- * Simon Douglas  10 Nov 97
- * - first checked in, mostly from MacOS DBDMA.i, machdep/ppc/dbdma.h
- *	but use byte reverse ops.
- */
-
-#ifndef _IODBDMA_H_
-#define _IODBDMA_H_
-
-#include <IOKit/IOTypes.h>
-#include <libkern/OSByteOrder.h>
-
-
-/* DBDMA definitions */
-
-struct IODBDMAChannelRegisters {
-    volatile unsigned long 	channelControl;
-    volatile unsigned long 	channelStatus;
-    volatile unsigned long 	commandPtrHi;		/* implementation optional*/
-    volatile unsigned long 	commandPtrLo;
-    volatile unsigned long 	interruptSelect;	/* implementation optional*/
-    volatile unsigned long 	branchSelect;		/* implementation optional*/
-    volatile unsigned long 	waitSelect;		/* implementation optional*/
-    volatile unsigned long 	transferModes;		/* implementation optional*/
-    volatile unsigned long 	data2PtrHi;		/* implementation optional*/
-    volatile unsigned long 	data2PtrLo;		/* implementation optional*/
-
-    volatile unsigned long 	reserved1;
-    volatile unsigned long 	addressHi;		/* implementation optional*/
-    volatile unsigned long 	reserved2[4];
-    volatile unsigned long 	unimplemented[16];
-
-/* This structure must remain fully padded to 256 bytes.*/
-    volatile unsigned long 	undefined[32];
-};
-typedef struct IODBDMAChannelRegisters IODBDMAChannelRegisters;
-
-/* These constants define the DB-DMA channel control words and status flags.*/
-
-enum {
-	kdbdmaRun	= 0x00008000,
-	kdbdmaPause	= 0x00004000,
-	kdbdmaFlush	= 0x00002000,
-	kdbdmaWake	= 0x00001000,
-	kdbdmaDead	= 0x00000800,
-	kdbdmaActive	= 0x00000400,
-	kdbdmaBt	= 0x00000100,
-	kdbdmaS7	= 0x00000080,
-	kdbdmaS6	= 0x00000040,
-	kdbdmaS5	= 0x00000020,
-	kdbdmaS4	= 0x00000010,
-	kdbdmaS3	= 0x00000008,
-	kdbdmaS2	= 0x00000004,
-	kdbdmaS1	= 0x00000002,
-	kdbdmaS0	= 0x00000001
-};
-
-
-#define	IOSetDBDMAChannelControlBits(mask)	( ((mask) | (mask) << 16) )
-#define	IOClearDBDMAChannelControlBits(mask)	( (mask) << 16)
-
-
-/* This structure defines the DB-DMA channel command descriptor.*/
-
-/*
-   *** WARNING:	Endian-ness issues must be considered when performing load/store! ***
-*/
-
-struct IODBDMADescriptor {
-	unsigned long            operation;   /* cmd || key || i || b || w || reqCount*/
-	unsigned long            address;
-	volatile unsigned long   cmdDep;
-	volatile unsigned long   result;      /* xferStatus || resCount*/
-};
-typedef struct IODBDMADescriptor IODBDMADescriptor;
-
-/* These constants define the DB-DMA channel command operations and modifiers.*/
-
-
-enum {
-/* Command.cmd operations*/
-	kdbdmaOutputMore	= 0,
-	kdbdmaOutputLast	= 1,
-	kdbdmaInputMore		= 2,
-	kdbdmaInputLast		= 3,
-	kdbdmaStoreQuad		= 4,
-	kdbdmaLoadQuad		= 5,
-	kdbdmaNop		= 6,
-	kdbdmaStop		= 7
-};
-
-
-enum {
-/* Command.key modifiers (choose one for INPUT, OUTPUT, LOAD, and STORE)*/
-	kdbdmaKeyStream0	= 0,	/* default modifier*/
-	kdbdmaKeyStream1	= 1,
-	kdbdmaKeyStream2	= 2,
-	kdbdmaKeyStream3	= 3,
-	kdbdmaKeyRegs		= 5,
-	kdbdmaKeySystem		= 6,
-	kdbdmaKeyDevice		= 7,
-
-	kdbdmaIntNever		= 0,	/* default modifier*/
-	kdbdmaIntIfTrue		= 1,
-	kdbdmaIntIfFalse	= 2,
-	kdbdmaIntAlways		= 3,
-
-	kdbdmaBranchNever	= 0,	/* default modifier*/
-	kdbdmaBranchIfTrue	= 1,
-	kdbdmaBranchIfFalse	= 2,
-	kdbdmaBranchAlways	= 3,
-
-	kdbdmaWaitNever		= 0,	/* default modifier*/
-	kdbdmaWaitIfTrue	= 1,
-	kdbdmaWaitIfFalse	= 2,
-	kdbdmaWaitAlways	= 3,
-
-	kdbdmaCommandMask	= (long)0xFFFF0000,
-	kdbdmaReqCountMask	= 0x0000FFFF
-};
-
-
-/* These constants define the DB-DMA channel command results.*/
-
-enum {
-	/* result masks*/
-	kdbdmaStatusRun		= kdbdmaRun << 16,
-	kdbdmaStatusPause	= kdbdmaPause << 16,
-	kdbdmaStatusFlush	= kdbdmaFlush << 16,
-	kdbdmaStatusWake	= kdbdmaWake << 16,
-	kdbdmaStatusDead	= kdbdmaDead << 16,
-	kdbdmaStatusActive	= kdbdmaActive << 16,
-	kdbdmaStatusBt		= kdbdmaBt << 16,
-	kdbdmaStatusS7		= kdbdmaS7 << 16,
-	kdbdmaStatusS6		= kdbdmaS6 << 16,
-	kdbdmaStatusS5		= kdbdmaS5 << 16,
-	kdbdmaStatusS4		= kdbdmaS4 << 16,
-	kdbdmaStatusS3		= kdbdmaS3 << 16,
-	kdbdmaStatusS2		= kdbdmaS2 << 16,
-	kdbdmaStatusS1		= kdbdmaS1 << 16,
-	kdbdmaStatusS0		= kdbdmaS0 << 16,
-	kdbdmaResCountMask	= 0x0000FFFF,
-	kdbdmaXferStatusMask	= 0xFFFF0000
-};
-
-
-/*  These macros are are IODBDMAChannelRegisters accessor functions. */
-
-#define IOSetDBDMAChannelRegister(registerSetPtr,field,value)	\
-OSWriteSwapInt32(registerSetPtr,offsetof(IODBDMAChannelRegisters,field),value)
-
-#define IOGetDBDMAChannelRegister(registerSetPtr, field)	\
-OSReadSwapInt32(registerSetPtr,offsetof(IODBDMAChannelRegisters, field))
-
-
-/* 	void IOSetDBDMAChannelControl (IODBDMAChannelRegisters *registerSetPtr, unsigned long ctlValue); */
-
-#define IOSetDBDMAChannelControl(registerSetPtr,ctlValue)		\
-do {									\
-    eieio();								\
-    IOSetDBDMAChannelRegister(registerSetPtr,channelControl,ctlValue);	\
-    eieio();								\
-} while(0)
-
-/* 	unsigned long IOGetDBDMAChannelStatus (IODBDMAChannelRegisters *registerSetPtr); */
-
-#define IOGetDBDMAChannelStatus(registerSetPtr)		\
-	IOGetDBDMAChannelRegister(registerSetPtr,channelStatus)
-
-/* 	unsigned long IOGetDBDMACommandPtr (IODBDMAChannelRegisters *registerSetPtr); */
-
-#define IOGetDBDMACommandPtr(registerSetPtr)			\
-	IOGetDBDMAChannelRegister(registerSetPtr,commandPtrLo)
-
-/* 	void IOSetDBDMACommandPtr (IODBDMAChannelRegisters *registerSetPtr, unsigned long cclPtr); */
-
-#define IOSetDBDMACommandPtr(registerSetPtr,cclPtr)			\
-do {									\
-    IOSetDBDMAChannelRegister(registerSetPtr,commandPtrHi,0);		\
-    eieio();								\
-    IOSetDBDMAChannelRegister(registerSetPtr,commandPtrLo,cclPtr);	\
-    eieio();								\
-} while(0)
-
-
-/* 	unsigned long IOGetDBDMAInterruptSelect (IODBDMAChannelRegisters *registerSetPtr); */
-
-#define IOGetDBDMAInterruptSelect(registerSetPtr)		\
-        IOGetDBDMAChannelRegister(registerSetPtr,interruptSelect)
-
-/* 	void IOSetDBDMAInterruptSelect (IODBDMAChannelRegisters *registerSetPtr, unsigned long intSelValue); */
-
-#define IOSetDBDMAInterruptSelect(registerSetPtr,intSelValue)		   \
-do {									   \
-    IOSetDBDMAChannelRegister(registerSetPtr,interruptSelect,intSelValue); \
-    eieio();								   \
-} while(0)
-
-/* 	unsigned long IOGetDBDMABranchSelect (IODBDMAChannelRegisters *registerSetPtr); */
-
-#define IOGetDBDMABranchSelect(registerSetPtr)				\
-	IOGetDBDMAChannelRegister(registerSetPtr,branchSelect)
-
-/* 	void IOSetDBDMABranchSelect (IODBDMAChannelRegisters *registerSetPtr, unsigned long braSelValue); */
-
-#define IOSetDBDMABranchSelect(registerSetPtr,braSelValue)		\
-do {									\
-    IOSetDBDMAChannelRegister(registerSetPtr,branchSelect,braSelValue);	\
-    eieio();								\
-} while(0)
-
-/* 	unsigned long IOGetDBDMAWaitSelect (IODBDMAChannelRegisters *registerSetPtr); */
-
-#define IOGetDBDMAWaitSelect(registerSetPtr)				\
-	IOGetDBDMAChannelRegister(registerSetPtr,waitSelect)
-
-/* 	void IOSetDBDMAWaitSelect (IODBDMAChannelRegisters *registerSetPtr, unsigned long waitSelValue); */
-
-#define IOSetDBDMAWaitSelect(registerSetPtr,waitSelValue)		\
-do {									\
-    IOSetDBDMAChannelRegister(registerSetPtr,waitSelect,waitSelValue);	\
-    eieio();								\
-} while(0)
-
-
-/*  These macros are IODBDMADescriptor accessor functions. */
-
-#define IOSetDBDMADescriptor(descPtr,field,value)		\
-OSWriteSwapInt32( descPtr, offsetof( IODBDMADescriptor, field), value)
-
-#define IOGetDBDMADescriptor(descPtr,field)	\
-OSReadSwapInt32( descPtr, offsetof( IODBDMADescriptor, field))
-
-#define	IOMakeDBDMAOperation(cmd,key,interrupt,branch,wait,count)	\
-    ( ((cmd) << 28) | ((key) << 24) | ((interrupt) << 20)		\
-      | ((branch) << 18) | ( (wait) << 16) | (count) )
-
-/* void  IOMakeDBDMADescriptor (IODBDMADescriptor *descPtr,
-				unsigned long cmd,
-				unsigned long key,
-				unsigned long interrupt,
-				unsigned long branch,
-				unsigned long wait,
-				unsigned long count,
-				unsigned long addr); */
-
-#define IOMakeDBDMADescriptor(descPtr,cmd,key,interrupt,branch,wait,count,addr)\
-do {									       \
-    IOSetDBDMADescriptor(descPtr, address, addr);			       \
-    IOSetDBDMADescriptor(descPtr, cmdDep,  0);				       \
-    IOSetDBDMADescriptor(descPtr, result,  0);				       \
-    eieio();								       \
-    IOSetDBDMADescriptor(descPtr, operation,				       \
-        IOMakeDBDMAOperation(cmd,key,interrupt,branch,wait,count));	       \
-    eieio();								       \
-} while(0)
-
-/* void IOMakeDBDMADescriptorDep (IODBDMADescriptor *descPtr,
-				unsigned long cmd,
-				unsigned long key,
-				unsigned long interrupt,
-				unsigned long branch,
-				unsigned long wait,
-				unsigned long count,
-				unsigned long addr,
-				unsigned long dep); */
-
-#define IOMakeDBDMADescriptorDep(descPtr,cmd,key,interrupt,branch,wait,count,addr,dep) \
-do {									       \
-    IOSetDBDMADescriptor(descPtr, address, addr);			       \
-    IOSetDBDMADescriptor(descPtr, cmdDep, dep);				       \
-    IOSetDBDMADescriptor(descPtr, result, 0);				       \
-    eieio();								       \
-    IOSetDBDMADescriptor(descPtr, operation,				       \
-        IOMakeDBDMAOperation(cmd, key, interrupt, branch, wait, count));       \
-    eieio();								       \
-} while(0)
-
-/*	Field accessors - NOTE: unsynchronized */
-
-/* 	unsigned long IOGetDBDMAOperation (IODBDMADescriptor *descPtr) */
-
-#define IOGetCCOperation(descPtr)				\
-	IOGetDBDMADescriptor(descPtr,operation)
-
-/* 	void IOSetCCOperation (IODBDMADescriptor *descPtr, unsigned long operationValue) */
-
-#define IOSetCCOperation(descPtr,operationValue)		\
-	IOSetDBDMADescriptor(descPtr,operation,operationValue)
-
-/* 	unsigned long IOGetCCAddress (IODBDMADescriptor *descPtr) */
-
-#define IOGetCCAddress(descPtr)				\
-	IOGetDBDMADescriptor(descPtr,address)
-
-/* 	void IOSetCCAddress (IODBDMADescriptor *descPtr, unsigned long addressValue) */
-
-#define IOSetCCAddress(descPtr,addressValue)		\
-	IOSetDBDMADescriptor(descPtr,address, addressValue)
-
-/* 	unsigned long IOGetCCCmdDep (IODBDMADescriptor *descPtr) */
-
-#define IOGetCCCmdDep(descPtr)				\
-	IOGetDBDMADescriptor(descPtr,cmdDep)
-
-/* 	void IOSetCCCmdDep (IODBDMADescriptor *descPtr, unsigned long cmdDepValue) */
-
-#define IOSetCCCmdDep(descPtr,cmdDepValue)		\
-	IOSetDBDMADescriptor(descPtr,cmdDep,cmdDepValue)
-
-/* 	unsigned long IOGetCCResult (IODBDMADescriptor *descPtr) */
-
-#define IOGetCCResult(descPtr)				\
-	IOGetDBDMADescriptor(descPtr,result)
-
-/* 	void IOSetCCResult (IODBDMADescriptor *descPtr, unsigned long resultValue) */
-
-#define IOSetCCResult(descPtr,resultValue)		\
-	IOSetDBDMADescriptor(descPtr,result,resultValue)
-
-
-/* DBDMA routines */
-
-extern void	IODBDMAStart( volatile IODBDMAChannelRegisters *registerSetPtr, volatile IODBDMADescriptor *physicalDescPtr);
-extern void	IODBDMAStop( volatile IODBDMAChannelRegisters *registerSetPtr);
-extern void	IODBDMAFlush( volatile IODBDMAChannelRegisters *registerSetPtr);
-extern void	IODBDMAReset( volatile IODBDMAChannelRegisters *registerSetPtr);
-extern void	IODBDMAContinue( volatile IODBDMAChannelRegisters *registerSetPtr);
-extern void	IODBDMAPause( volatile IODBDMAChannelRegisters *registerSetPtr);
-
-extern IOReturn	IOAllocatePhysicallyContiguousMemory( unsigned int size, unsigned int options,
-				     IOVirtualAddress * logical, IOPhysicalAddress * physical );
-extern IOReturn IOFreePhysicallyContiguousMemory( IOVirtualAddress * logical, unsigned int size);
-
-#endif /* !defined(_IODBDMA_H_) */
diff --git a/iokit/IOKit/ppc/IOSharedLockImp.h b/iokit/IOKit/ppc/IOSharedLockImp.h
deleted file mode 100644
index 8c685b223..000000000
--- a/iokit/IOKit/ppc/IOSharedLockImp.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1998 Apple Computer, Inc.  All rights reserved. 
- *
- * HISTORY
- *
- */
-
-/* 	Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved. 
- *
- * EventShmemLock.h -	Shared memory area locks for use between the
- *			WindowServer and the Event Driver.
- *
- * HISTORY
- * 30 Nov   1992    Ben Fathi (benf@next.com)
- *      Ported to m98k.
- *
- * 29 April 1992    Mike Paquette at NeXT
- *      Created. 
- *
- * Multiprocessor locks used within the shared memory area between the
- * kernel and event system.  These must work in both user and kernel mode.
- * The locks are defined in an include file so they get exported to the local
- * include file area.
- */
-
-
-#ifndef _IOKIT_IOSHAREDLOCKIMP_H
-#define _IOKIT_IOSHAREDLOCKIMP_H
-
-#include <architecture/ppc/asm_help.h>
-#ifdef KERNEL
-#undef END
-#include <mach/ppc/asm.h>
-#endif
-
-/*
- *	void
- *	ev_lock(p)
- *		register int *p;
- *
- *	Lock the lock pointed to by p.  Spin (possibly forever) until
- *		the lock is available.  Test and test and set logic used.
- */
-	TEXT
-
-#ifndef KERNEL
-LEAF(_ev_lock)
-
-		li		a6,1			// lock value
-		
-8:		lwz		a7,0(a0)		// Get lock word
-		mr.		a7,a7			// Is it held?
-		bne--	8b				// Yup...
-
-9:		lwarx	a7,0,a0			// read the lock
-		mr.		a7,a7			// Is it held?
-		bne--	7f				// yes, kill reservation
-		stwcx.	a6,0,a0			// try to get the lock
-		bne--	9b 				// failed, try again
-		isync
-		blr						// got it, return
-		
-7:		li		a7,-4			// Point to a spot in the red zone
-		stwcx.	a7,a7,r1		// Kill reservation
-		b		8b				// Go wait some more...
-		
-		
-END(_ev_lock)
-
-LEAF(_IOSpinLock)
-
-		li		a6,1			// lock value
-		
-8:		lwz		a7,0(a0)		// Get lock word
-		mr.		a7,a7			// Is it held?
-		bne--	8b				// Yup...
-
-9:		lwarx	a7,0,a0			// read the lock
-		mr.		a7,a7			// Is it held?
-		bne--	7f				// yes, kill reservation
-		stwcx.	a6,0,a0			// try to get the lock
-		bne--	9b 				// failed, try again
-		isync
-		blr						// got it, return
-		
-7:		li		a7,-4			// Point to a spot in the red zone
-		stwcx.	a7,a7,r1		// Kill reservation
-		b		8b				// Go wait some more...
-END(_IOSpinLock)
-#endif
-
-/*
- *	void
- *	spin_unlock(p)
- *		int *p;
- *
- *	Unlock the lock pointed to by p.
- */
-
-LEAF(_ev_unlock)
-	sync
-	li	a7,0
-	stw	a7,0(a0)
-	blr
-END(_ev_unlock)
-
-LEAF(_IOSpinUnlock)
-	sync
-	li	a7,0
-	stw	a7,0(a0)
-	blr
-END(_IOSpinUnlock)
-
-
-/*
- *	ev_try_lock(p)
- *		int *p;
- *
- *	Try to lock p.  Return TRUE if successful in obtaining lock.
- */
-
-LEAF(_ev_try_lock)
-		li		a6,1			// lock value
-		
-		lwz		a7,0(a0)		// Get lock word
-		mr.		a7,a7			// Is it held?
-		bne--	6f				// Yup...
-
-9:		lwarx	a7,0,a0			// read the lock
-		mr.		a7,a7			// Is it held?
-		bne--	7f				// yes, kill reservation
-		stwcx.	a6,0,a0			// try to get the lock
-		bne--	9b 				// failed, try again
-		li		a0,1			// return TRUE
-		isync
-		blr						// got it, return
-		
-7:		li		a7,-4			// Point to a spot in the red zone
-		stwcx.	a7,a7,r1		// Kill reservation
-
-6:
-		li	a0,0				// return FALSE
-		blr
-		
-END(_ev_try_lock)
-
-LEAF(_IOTrySpinLock)
-		li		a6,1			// lock value
-		
-		lwz		a7,0(a0)		// Get lock word
-		mr.		a7,a7			// Is it held?
-		bne--	6f				// Yup...
-
-9:		lwarx	a7,0,a0			// read the lock
-		mr.		a7,a7			// Is it held?
-		bne--	7f				// yes, kill reservation
-		stwcx.	a6,0,a0			// try to get the lock
-		bne--	9b 				// failed, try again
-		li		a0,1			// return TRUE
-		isync
-		blr						// got it, return
-		
-7:		li		a7,-4			// Point to a spot in the red zone
-		stwcx.	a7,a7,r1		// Kill reservation
-
-6:
-		li	a0,0				// return FALSE
-		blr
-		
-END(_IOTrySpinLock)
-
-#endif /* ! _IOKIT_IOSHAREDLOCKIMP_H */
diff --git a/iokit/IOKit/ppc/Makefile b/iokit/IOKit/ppc/Makefile
deleted file mode 100644
index 21ff86cad..000000000
--- a/iokit/IOKit/ppc/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A
-export INCDIR = $(IOKIT_FRAMEDIR)/Headers
-export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-MD_DIR = ppc
-NOT_EXPORT_HEADERS = IOSharedLockImp.h
-
-INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
-
-EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
-ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
-
-INSTALL_MD_LIST	= IOSharedLockImp.h
-INSTALL_MD_LCL_LIST = ""
-INSTALL_MD_DIR = $(MD_DIR)
-
-EXPORT_MD_LIST	= $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS))
-EXPORT_MD_DIR = IOKit/$(MD_DIR)
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h
index 804b9bbfd..f0002d5d6 100644
--- a/iokit/IOKit/pwr_mgt/IOPM.h
+++ b/iokit/IOKit/pwr_mgt/IOPM.h
@@ -32,10 +32,6 @@
 #include <IOKit/IOMessage.h>
 #include <IOKit/IOReturn.h>
 
-#ifdef __ppc__
-#include <IOKit/pwr_mgt/IOPMDeprecated.h>
-#endif
-
 /*! @header IOPM.h
     @abstract Defines power management constants and keys used by both in-kernel and user space power management.
     @discussion IOPM.h defines a range of power management constants used in several in-kernel and user space APIs. Most significantly, the IOPMPowerFlags used to specify the fields of an IOPMPowerState struct are defined here.
@@ -80,7 +76,7 @@ enum {
 
     Useful only as a Capability.
     
-    @constant kIOPMSleepCapability 
+    @constant kIOPMSleepCapability
     Used only by certain IOKit Families (USB). Not defined or used by generic Power Management. Read your family documentation to see if you should define a powerstate using these capabilities.
     
     @constant kIOPMRestartCapability
@@ -91,6 +87,9 @@ enum {
 
     @constant kIOPMRestart
     Used only by certain IOKit Families (USB). Not defined or used by generic Power Management. Read your family documentation to see if you should define a powerstate using these capabilities.
+
+    @constant kIOPMInitialDeviceState
+    Indicates the initial power state for the device. If <code>initialPowerStateForDomainState()</code> returns a power state with this flag set in the capability field, then the initial power change is performed without calling the driver's <code>setPowerState()</code>.
 */
 typedef unsigned long IOPMPowerFlags;
 enum {
@@ -101,7 +100,8 @@ enum {
     kIOPMSleepCapability            = 0x00000004,
     kIOPMRestartCapability          = 0x00000080,
     kIOPMSleep                      = 0x00000001,
-    kIOPMRestart                    = 0x00000080
+    kIOPMRestart                    = 0x00000080,
+    kIOPMInitialDeviceState         = 0x00000100
 };
 
 /*
@@ -121,7 +121,6 @@ enum {
     kIOPMNotPowerManaged            = 0x0800
 };
 
-
 /*
  * Deprecated IOPMPowerFlags
  * Their behavior is undefined when used in IOPMPowerState
@@ -221,7 +220,7 @@ enum {
  * 
  * See IOPMrootDomain notification kIOPMMessageSleepWakeUUIDChange
  */
-#define kIOPMSleepWakeUUIDKey               "SleepWakeUUID"
+ #define kIOPMSleepWakeUUIDKey              "SleepWakeUUID"
 
 /* kIOPMDeepSleepEnabledKey
  * Indicates the Deep Sleep enable state.
@@ -239,11 +238,14 @@ enum {
  */
 #define kIOPMDeepSleepDelayKey              "Standby Delay"
 
-/* kIOPMLowBatteryWakeThresholdKey
- * Key refers to a CFNumberRef that represents the percentage of battery
- * remaining charge that will trigger a system wake followed by Deep Sleep.
+/* kIOPMDestroyFVKeyOnStandbyKey
+ * Specifies if FileVault key can be stored when going to standby mode
+ * It has a boolean value,
+ *  true        == Destroy FV key when going to standby mode
+ *  false       == Retain FV key when going to standby mode
+ *  not present == Retain FV key when going to standby mode
  */
-#define kIOPMLowBatteryWakeThresholdKey     "LowBatteryWakeThreshold"
+#define kIOPMDestroyFVKeyOnStandbyKey            "DestroyFVKeyOnStandby"
 
 /*******************************************************************************
  *
@@ -276,8 +278,16 @@ enum {
      */
     kIOPMDriverAssertionExternalMediaMountedBit     = 0x10,
 
+    /*! kIOPMDriverAssertionReservedBit5
+     * Reserved for Thunderbolt.
+     */
     kIOPMDriverAssertionReservedBit5                = 0x20,
-    kIOPMDriverAssertionReservedBit6                = 0x40,
+
+    /*! kIOPMDriverAssertionPreventDisplaySleepBit
+     * When set, the display should remain powered on while the system's awake.
+     */
+    kIOPMDriverAssertionPreventDisplaySleepBit      = 0x40,
+
     kIOPMDriverAssertionReservedBit7                = 0x80
 };
 
@@ -406,6 +416,7 @@ enum {
  * These commands are issued from system drivers only:
  *      ApplePMU, AppleSMU, IOGraphics, AppleACPIFamily
  *
+ * TODO: deprecate kIOPMAllowSleep and kIOPMPreventSleep
  ******************************************************************************/
 enum {
   kIOPMSleepNow                 = (1<<0),  // put machine to sleep now
@@ -500,6 +511,8 @@ enum {
 #define kIOPMPSCapacityEstimatedKey	                "CapacityEstimated"
 #define kIOPMPSBatteryChargeStatusKey               "ChargeStatus"
 #define kIOPMPSBatteryTemperatureKey                "Temperature"
+#define kIOPMPSAdapterDetailsKey		    "AdapterDetails"
+#define kIOPMPSChargerConfigurationKey		    "ChargerConfiguration"
 
 // kIOPMPSBatteryChargeStatusKey may have one of the following values, or may have
 // no value. If kIOPMBatteryChargeStatusKey has a NULL value (or no value) associated with it
@@ -507,6 +520,7 @@ enum {
 // then the charge may have been interrupted.
 #define kIOPMBatteryChargeStatusTooHot              "HighTemperature"
 #define kIOPMBatteryChargeStatusTooCold             "LowTemperature"
+#define kIOPMBatteryChargeStatusTooHotOrCold	    "HighOrLowTemperature"
 #define kIOPMBatteryChargeStatusGradient            "BatteryTemperatureGradient"
 
 // Definitions for battery location, in case of multiple batteries.
@@ -526,6 +540,16 @@ enum {
     kIOPMGoodValue      = 3
 };
 
+// Keys for kIOPMPSAdapterDetailsKey dictionary
+#define kIOPMPSAdapterDetailsIDKey		    "AdapterID"
+#define kIOPMPSAdapterDetailsWattsKey		    "Watts"
+#define kIOPMPSAdapterDetailsRevisionKey	    "AdapterRevision"
+#define kIOPMPSAdapterDetailsSerialNumberKey	    "SerialNumber"
+#define kIOPMPSAdapterDetailsFamilyKey		    "FamilyCode"
+#define kIOPMPSAdapterDetailsAmperageKey	    "Amperage"
+#define kIOPMPSAdapterDetailsDescriptionKey	    "Description"
+#define kIOPMPSAdapterDetailsPMUConfigurationKey    "PMUConfiguration"
+
 // Battery's time remaining estimate is invalid this long (seconds) after a wake
 #define kIOPMPSInvalidWakeSecondsKey           "BatteryInvalidWakeSeconds"
 
@@ -688,7 +712,6 @@ enum {
     kIOBatteryChargerConnect    = (1 << 0)
 };
 
-
 // Private power management message indicating battery data has changed
 // Indicates new data resides in the IORegistry
 #define kIOPMMessageBatteryStatusHasChanged         iokit_family_msg(sub_iokit_pmu, 0x100)
@@ -714,7 +737,6 @@ enum {
   kIOPMClamshellStateOnWake = (1<<10)     // used only by Platform Expert
 };
 
-
 // **********************************************
 // Internal power management data structures
 // **********************************************
@@ -731,7 +753,7 @@ enum {
     kIOPMSuperclassPolicy1
 };
 
-struct stateChangeNote{
+struct stateChangeNote {
     IOPMPowerFlags    stateFlags;
     unsigned long    stateNum;
     void *         powerRef;
@@ -748,5 +770,54 @@ typedef struct IOPowerStateChangeNotification IOPowerStateChangeNotification;
 typedef IOPowerStateChangeNotification sleepWakeNote;
 #endif /* KERNEL && __cplusplus */
 
-#endif /* ! _IOKIT_IOPM_H */
+/*! @struct IOPMSystemCapabilityChangeParameters
+    @abstract A structure describing a system capability change.
+    @discussion A system capability change is a system level transition from a set
+        of system capabilities to a new set of system capabilities. Power management
+        sends a <code>kIOMessageSystemCapabilityChange</code> message and provides
+        this structure as the message data (by reference) to
+        <code>gIOPriorityPowerStateInterest</code> clients when system capability
+        changes.
+    @field notifyRef An identifier for this message notification. Clients with pending
+        I/O can signal completion by calling <code>allowPowerChange()</code> with this
+        value as the argument. Clients that are able to process the notification
+        synchronously should ignore this field.
+    @field maxWaitForReply A return value to the caller indicating the maximum time in
+        microseconds to wait for the <code>allowPowerChange()</code> call. The default
+        value is zero, which indicates the client processing has finished, and power
+        management should not wait for an <code>allowPowerChange()</code> call.
+    @field changeFlags Flags will be set to indicate whether the notification precedes
+        the capability change (<code>kIOPMSystemCapabilityWillChange</code>), or after
+        the capability change has occurred (<code>kIOPMSystemCapabilityDidChange</code>).
+    @field __reserved1 Set to zero.
+    @field fromCapabilities The system capabilities at the start of the transition.
+    @field toCapabilities The system capabilities at the end of the transition.
+    @field __reserved2 Set to zero.
+ */
+struct IOPMSystemCapabilityChangeParameters {
+    uint32_t    notifyRef;
+    uint32_t    maxWaitForReply;
+    uint32_t    changeFlags;
+    uint32_t    __reserved1;
+    uint32_t    fromCapabilities;
+    uint32_t    toCapabilities;
+    uint32_t    __reserved2[4];
+};
+
+/*! @enum IOPMSystemCapabilityChangeFlags
+    @constant kIOPMSystemCapabilityWillChange Indicates the system capability will change.
+    @constant kIOPMSystemCapabilityDidChange Indicates the system capability has changed.
+*/
+enum {
+    kIOPMSystemCapabilityWillChange = 0x01,
+    kIOPMSystemCapabilityDidChange  = 0x02
+};
 
+enum {
+    kIOPMSystemCapabilityCPU        = 0x01,
+    kIOPMSystemCapabilityGraphics   = 0x02,
+    kIOPMSystemCapabilityAudio      = 0x04,
+    kIOPMSystemCapabilityNetwork    = 0x08
+};
+
+#endif /* ! _IOKIT_IOPM_H */
diff --git a/iokit/IOKit/pwr_mgt/IOPMDeprecated.h b/iokit/IOKit/pwr_mgt/IOPMDeprecated.h
deleted file mode 100644
index 3bee01a3b..000000000
--- a/iokit/IOKit/pwr_mgt/IOPMDeprecated.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _IOPMDeprecated_h_
-#define _IOPMDeprecated_h_
-
-#ifdef __ppc__
-
-// Power events
-enum {
-  kClamshellClosedEventMask  = (1<<0),  // User closed lid
-  kDockingBarEventMask       = (1<<1),  // OBSOLETE
-  kACPlugEventMask           = (1<<2),  // User plugged or unplugged adapter
-  kFrontPanelButtonEventMask = (1<<3),  // User hit the front panel button
-  kBatteryStatusEventMask    = (1<<4)   // Battery status has changed
-};
-
-// PUBLIC power management features
-// NOTE: this is a direct port from classic, some of these bits
-//       are obsolete but are included for completeness
-enum {
-  kPMHasWakeupTimerMask        = (1<<0),  // 1=wake timer is supported
-  kPMHasSharedModemPortMask    = (1<<1),  // Not used
-  kPMHasProcessorCyclingMask   = (1<<2),  // 1=processor cycling supported
-  kPMMustProcessorCycleMask    = (1<<3),  // Not used
-  kPMHasReducedSpeedMask       = (1<<4),  // 1=supports reduced processor speed
-  kPMDynamicSpeedChangeMask    = (1<<5),  // 1=supports changing processor speed on the fly
-  kPMHasSCSIDiskModeMask       = (1<<6),  // 1=supports using machine as SCSI drive
-  kPMCanGetBatteryTimeMask     = (1<<7),  // 1=battery time can be calculated
-  kPMCanWakeupOnRingMask       = (1<<8),  // 1=machine can wake on modem ring
-  kPMHasDimmingSupportMask     = (1<<9),  // 1=has monitor dimming support
-  kPMHasStartupTimerMask       = (1<<10), // 1=can program startup timer
-  kPMHasChargeNotificationMask = (1<<11), // 1=client can determine charger status/get notifications
-  kPMHasDimSuspendSupportMask  = (1<<12), // 1=can dim diplay to DPMS ('off') state
-  kPMHasWakeOnNetActivityMask  = (1<<13), // 1=supports waking upon receipt of net packet
-  kPMHasWakeOnLidMask          = (1<<14), // 1=can wake upon lid/case opening
-  kPMCanPowerOffPCIBusMask     = (1<<15), // 1=can remove power from PCI bus on sleep
-  kPMHasDeepSleepMask          = (1<<16), // 1=supports deep (hibernation) sleep
-  kPMHasSleepMask              = (1<<17), // 1=machine support low power sleep (ala powerbooks)
-  kPMSupportsServerModeAPIMask = (1<<18), // 1=supports reboot on AC resume for unexpected power loss
-  kPMHasUPSIntegrationMask     = (1<<19)  // 1=supports incorporating UPS devices into power source calcs
-};
-
-// PRIVATE power management features
-// NOTE: this is a direct port from classic, some of these bits
-//       are obsolete but are included for completeness.
-enum {
-  kPMHasExtdBattInfoMask       = (1<<0),  // Not used
-  kPMHasBatteryIDMask          = (1<<1),  // Not used
-  kPMCanSwitchPowerMask        = (1<<2),  // Not used 
-  kPMHasCelsiusCyclingMask     = (1<<3),  // Not used
-  kPMHasBatteryPredictionMask  = (1<<4),  // Not used
-  kPMHasPowerLevelsMask        = (1<<5),  // Not used
-  kPMHasSleepCPUSpeedMask      = (1<<6),  // Not used
-  kPMHasBtnIntHandlersMask     = (1<<7),  // 1=supports individual button interrupt handlers
-  kPMHasSCSITermPowerMask      = (1<<8),  // 1=supports SCSI termination power switch
-  kPMHasADBButtonHandlersMask  = (1<<9),  // 1=supports button handlers via ADB
-  kPMHasICTControlMask         = (1<<10), // 1=supports ICT control
-  kPMHasLegacyDesktopSleepMask = (1<<11), // 1=supports 'doze' style sleep
-  kPMHasDeepIdleMask           = (1<<12), // 1=supports Idle2 in hardware
-  kPMOpenLidPreventsSleepMask  = (1<<13), // 1=open case prevent machine from sleeping
-  kPMClosedLidCausesSleepMask  = (1<<14), // 1=case closed (clamshell closed) causes sleep
-  kPMHasFanControlMask         = (1<<15), // 1=machine has software-programmable fan/thermostat controls
-  kPMHasThermalControlMask     = (1<<16), // 1=machine supports thermal monitoring
-  kPMHasVStepSpeedChangeMask   = (1<<17), // 1=machine supports processor voltage/clock change
-  kPMEnvironEventsPolledMask   = (1<<18)  // 1=machine doesn't generate pmu env ints, we must poll instead 
-};
-
-// DEFAULT public and private features for machines whose device tree
-// does NOT contain this information (pre-Core99).
-
-// For Cuda-based Desktops
-
-#define kStdDesktopPMFeatures   kPMHasWakeupTimerMask         |\
-                                kPMHasProcessorCyclingMask    |\
-                                kPMHasDimmingSupportMask      |\
-                                kPMHasStartupTimerMask        |\
-                                kPMSupportsServerModeAPIMask  |\
-                                kPMHasUPSIntegrationMask
-
-#define kStdDesktopPrivPMFeatures  kPMHasExtdBattInfoMask     |\
-                                   kPMHasICTControlMask       |\
-                                   kPMHasLegacyDesktopSleepMask
-
-#define kStdDesktopNumBatteries 0
-
-// For Wallstreet (PowerBook G3 Series 1998)
-
-#define kWallstreetPMFeatures   kPMHasWakeupTimerMask         |\
-                                kPMHasProcessorCyclingMask    |\
-                                kPMHasReducedSpeedMask        |\
-                                kPMDynamicSpeedChangeMask     |\
-                                kPMHasSCSIDiskModeMask        |\
-                                kPMCanGetBatteryTimeMask      |\
-                                kPMHasDimmingSupportMask      |\
-                                kPMHasChargeNotificationMask  |\
-                                kPMHasDimSuspendSupportMask   |\
-                                kPMHasSleepMask
-
-#define kWallstreetPrivPMFeatures  kPMHasExtdBattInfoMask      |\
-                                   kPMHasBatteryIDMask         |\
-                                   kPMCanSwitchPowerMask       |\
-                                   kPMHasADBButtonHandlersMask |\
-                                   kPMHasSCSITermPowerMask     |\
-                                   kPMHasICTControlMask        |\
-                                   kPMClosedLidCausesSleepMask |\
-                                   kPMEnvironEventsPolledMask
-
-#define kStdPowerBookPMFeatures      kWallstreetPMFeatures
-#define kStdPowerBookPrivPMFeatures  kWallstreetPrivPMFeatures
-
-#define kStdPowerBookNumBatteries 2
-
-// For 101 (PowerBook G3 Series 1999)
-
-#define k101PMFeatures          kPMHasWakeupTimerMask         |\
-                                kPMHasProcessorCyclingMask    |\
-                                kPMHasReducedSpeedMask        |\
-                                kPMDynamicSpeedChangeMask     |\
-                                kPMHasSCSIDiskModeMask        |\
-                                kPMCanGetBatteryTimeMask      |\
-                                kPMHasDimmingSupportMask      |\
-                                kPMHasChargeNotificationMask  |\
-                                kPMHasDimSuspendSupportMask   |\
-                                kPMHasSleepMask               |\
-                                kPMHasUPSIntegrationMask
-
-#define k101PrivPMFeatures      kPMHasExtdBattInfoMask        |\
-                                kPMHasBatteryIDMask           |\
-                                kPMCanSwitchPowerMask         |\
-                                kPMHasADBButtonHandlersMask   |\
-                                kPMHasSCSITermPowerMask       |\
-                                kPMHasICTControlMask          |\
-                                kPMClosedLidCausesSleepMask   |\
-                                kPMEnvironEventsPolledMask
-
-
-// These flags are deprecated. Use the version with the kIOPM prefix in IOPM.h
-enum {
-  kACInstalled      = (1<<0),
-  kBatteryCharging  = (1<<1),
-  kBatteryInstalled = (1<<2),
-  kUPSInstalled     = (1<<3),
-  kBatteryAtWarn    = (1<<4),
-  kBatteryDepleted  = (1<<5),
-  kACnoChargeCapability = (1<<6),     // AC adapter cannot charge battery
-  kRawLowBattery    = (1<<7),         // used only by  Platform Expert
-  kForceLowSpeed    = (1<<8)         // set by Platfm Expert, chk'd by Pwr Plugin};
-};
-
-#endif /* __ppc32 */
-#endif /* _IOPMDeprecated_h_ */
diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h
index 88ff6c788..3e61d81e0 100644
--- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h
+++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h
@@ -30,8 +30,197 @@
 
 #include <IOKit/pwr_mgt/IOPM.h>
 
-/*****************************************************************************/
+#pragma mark PM Timeline Logging
+/**************************************************
+*
+* Timeline API Keys - Reports timing details for 
+*   applications, drivers, and system during PM activity
+*
+* For kernel-internal use only
+**************************************************/
+
+// Keys for interfacing with IOPMrootDomain Timeline
+/* @constant kIOPMTimelineDictionaryKey
+ * @abstract RootDomain key for dictionary describing Timeline's info
+ */
+#define     kIOPMTimelineDictionaryKey                  "PMTimelineLogging"
+
+/* @constant kIOPMTimelineEnabledKey
+ * @abstract Boolean value indicating whether the system is recording PM events.
+ * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
+ * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
+ */
+#define     kIOPMTimelineEnabledKey                     "TimelineEnabled"
+
+/* @constant kIOMPTimelineSystemNumberTrackedKey
+ * @abstract The maximum number of system power events the system may record.
+ * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
+ * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
+ */
+#define     kIOPMTimelineSystemNumberTrackedKey         "TimelineSystemEventsTracked"
+
+/* @constant kIOPMTimelineSystemBufferSizeKey
+ * @abstract Size in bytes  of buffer recording system PM events
+ * @discussion Key may be found in the dictionary at IOPMrootDomain's property 
+ * kIOPMTimelineDictionaryKey. uint32_t value; may be 0.
+ */
+#define     kIOPMTimelineSystemBufferSizeKey            "TimelineSystemBufferSize"
+
+
+
+/* @constant kIOPMEventTypeIntermediateFlag
+ * @abstract This bit indicates the event is an intermediate event
+ *      which must occur within a major system power event.
+ */
+#define kIOPMEventTypeIntermediateFlag              0x10000000
+
+/* @enum SystemEventTypes
+ * @abstract Potential system events logged in the system event record.
+ */
+enum {
+	kIOPMEventTypeUndefined                     = 0,
+
+    /* Event types mark driver events 
+     */
+    kIOPMEventTypeSetPowerStateImmediate        = 1001,
+    kIOPMEventTypeSetPowerStateDelayed          = 1002,
+    kIOPMEventTypePSWillChangeTo                = 1003,
+    kIOPMEventTypePSDidChangeTo                 = 1004,
+    kIOPMEventTypeAppResponse                   = 1005,
+
+
+    /* Start and stop event types bracket major
+     * system power management events.
+     */
+	kIOPMEventTypeSleep                         = 2001,
+	kIOPMEventTypeSleepDone                     = 2002,
+	kIOPMEventTypeWake                          = 3001,
+	kIOPMEventTypeWakeDone                      = 3002,
+	kIOPMEventTypeDoze                          = 4001,
+	kIOPMEventTypeDozeDone                      = 4002,
+	kIOPMEventTypeLiteWakeUp                    = 5001,
+	kIOPMEventTypeLiteWakeUpDone                = 5002,
+	kIOPMEventTypeLiteWakeDown                  = 5003,
+	kIOPMEventTypeLiteWakeDownDone              = 5004,
+	kIOPMEventTypeUUIDSet                       = 6001,
+	kIOPMEventTypeUUIDClear                     = 6002,
+
+    /* Intermediate events that may only occur within the bounds
+     * of a major system event (between the event's initiation and its "done event".)
+     * e.g. chronologically kIOPMEventTypeSleep may be followed by one or more
+     *      intermediate events, which then must be followed by kIOPMEventTypeSleepDone.
+     *
+     * The intermediate events below will always occur in a Sleep or Wake event, and may
+     *      or may not occur for any of the other events.
+     */
+    kIOPMEventTypeAppNotificationsFinished      = 501 | kIOPMEventTypeIntermediateFlag,
+    kIOPMEventTypeDriverNotificationsFinished   = 502 | kIOPMEventTypeIntermediateFlag,
+    kIOPMEventTypeCalTimeChange                 = 503 | kIOPMEventTypeIntermediateFlag
+};
+
+
+/* @enum SystemSleepReasons 
+ * @abstract The potential causes for system sleep as logged in the system event record.
+ */
+enum {
+    kIOPMSleepReasonClamshell                   = 101,
+    kIOPMSleepReasonPowerButton                 = 102,
+    kIOPMSleepReasonSoftware                    = 103,
+    kIOPMSleepReasonOSSwitchHibernate           = 104,
+    kIOPMSleepReasonIdle                        = 105,
+    kIOPMSleepReasonLowPower                    = 106,
+    kIOPMSleepReasonThermalEmergency            = 107,
+    kIOPMSleepReasonMaintenance                 = 108
+};
+
+/*
+ * Possible C-string sleep reasons found under kRootDomainSleepReasonsKey
+ */
+#define kIOPMClamshellSleepKey                      "Clamshell Sleep"
+#define kIOPMPowerButtonSleepKey                    "Power Button Sleep"
+#define kIOPMSoftwareSleepKey                       "Software Sleep"
+#define kIOPMOSSwitchHibernationKey                 "OS Switch Sleep"
+#define kIOPMIdleSleepKey                           "Idle Sleep"
+#define kIOPMLowPowerSleepKey                       "Low Power Sleep"
+#define kIOPMThermalEmergencySleepKey               "Thermal Emergency Sleep"
+
+
+enum {
+    kIOPMMaxSystemEventsTracked = 25000,
+    kIOPMDefaultSystemEventsTracked = 1000,
+    kMaxPMStringLength = 40,
+};
+
+/* @struct IOPMSystemEventRecord
+ * @abstract Records a singe power event to a particular PM entity.
+ * This includes changes to a driver's power state, application responses
+ * to PM notifications, or system power management milestones.
+ */
+typedef struct {
+    union {
+        // For DRIVER events
+        char        ownerName[kMaxPMStringLength];
+        // For SYSTEM events, uuid contains the string describing the active UUID
+        char        uuid[kMaxPMStringLength];
+    };
+
+    // For DRIVER events - records the name of the driver who generated the notifications.
+    char        interestName[kMaxPMStringLength];
+    
+    // DRIVER & SYSTEM - Times are stored as uint64_t
+    // The high 32 bytes are the seconds returned from clock_get_calendar_microtime, 
+    // and the low 32 bytes are the accompanying microseconds.
+    uint64_t    timestamp;
+
+    union {
+        // For DRIVER events - ownerDisambiguateID is a unique descriptor of the driver, to disambiguate
+        // several similarly named drivers.
+        uint64_t    ownerDisambiguateID;
+        // For SYSTEM events - eventReason is a value in SystemSleepReason
+        uint64_t    eventReason;
+    };
+    
+    // DRIVER & SYSTEM - eventType is one of 'SystemEventTypes'
+    // The value of eventType determines, among ohter things, whether this is a SYSTEM or
+    //      DRIVER event type.
+    uint32_t    eventType;
+
+    // DRIVER & SYSTEM - eventResult is an IOReturn value
+    uint32_t    eventResult;
+
+    // DRIVER - If defined, elapsedTimeUS records the entire time a transaction took to complete
+    uint32_t    elapsedTimeUS;
+
+    // DRIVER - in power state changes, oldState & newState are PM power state indices.
+    uint8_t     oldState;
+    uint8_t     newState;
+} IOPMSystemEventRecord;
+
+/* @struct IOPMTraceBufferHeader
+ * Occupies the first bytes in the buffer allocated by IOPMrootDomain
+ * Describes the size and current index of the trace buffer
+ */
+typedef struct {
+	uint32_t	sizeBytes;
+	uint32_t    sizeEntries;
+	uint32_t    index;
+} IOPMTraceBufferHeader;
+
+/* Argument to IOPMrootDomain::clientMemoryForType to acquire
+ * memory mapping.
+ */
+enum {
+    kPMRootDomainMapTraceBuffer = 1
+};
 
+/**************************************************
+*
+* Accountability API Ends here
+*
+**************************************************/
+
+
+#pragma mark Stray Bitfields
 // Private power commands issued to root domain
 // bits 0-7 in IOPM.h
 
@@ -143,10 +332,10 @@ typedef struct {
 /* PM RootDomain tracePoints
  *
  * In the sleep/wake process, we expect the sleep trace points to proceed
- * in increasing order. Once sleep begins with code kIOPMTracePointSleepStarted = 0x11,
+ * in increasing order. Once sleep begins with code kIOPMTracePointSleepStarted,
  * we expect sleep to continue in a monotonically increasing order of tracepoints
- * to kIOPMTracePointSystemLoginwindowPhase = 0x30. After trace point SystemLoginWindowPhase,
- * the system will return to kIOPMTracePointSystemUp = 0x00.
+ * to kIOPMTracePointSystemLoginwindowPhase. After trace point SystemLoginWindowPhase,
+ * the system will return to kIOPMTracePointSystemUp.
  *
  * If the trace point decreases (instead of increasing) before reaching kIOPMTracePointSystemUp,
  * that indicates that the sleep process was cancelled. The cancel reason shall be indicated
@@ -155,94 +344,215 @@ typedef struct {
 
 enum {
 /* When kTracePointSystemUp is the latest tracePoint,
-   the system is awake. It is not asleep, sleeping, or waking.
-   
-   * Phase begins: At boot, at completion of wake from sleep,
-          immediately following kIOPMTracePointSystemLoginwindowPhase.
-   * Phase ends: When a sleep attempt is initiated.
+ * the system is awake. It is not asleep, sleeping, or waking.
+ *
+ * Phase begins: At boot, at completion of wake from sleep,
+ *      immediately following kIOPMTracePointSystemLoginwindowPhase.
+ * Phase ends: When a sleep attempt is initiated.
  */
     kIOPMTracePointSystemUp                     = 0,
 
-/* When kIOPMTracePointSleepStarted we have just initiated sleep.
+/* When kIOPMTracePointSleepStarted is the latest tracePoint,
+ * sleep has been initiated.
+ *
+ * Phase begins: At initiation of system sleep (idle or forced).
+ * Phase ends: PM starts to notify applications of system sleep.
+ */
+    kIOPMTracePointSleepStarted                 = 0x10,
 
-    Note: The state prior to kIOPMTracePointSleepStarted may be only one of:
-        * kIOPMTracePointSystemUp
-        * kIOPMTracePointSystemLoginwindowPhase or 
+/* When kIOPMTracePointSleepApplications is the latest tracePoint,
+ * a system sleep has been initiated and PM waits for responses
+ * from notified applications.
+ *
+ * Phase begins: Begin to asynchronously fire kIOMessageSystemWillSleep
+ *      notifications, and also kIOMessageCanSystemSleep for the idle sleep case.
+ * Phase ends: When PM has received all application responses.
+ */
+    kIOPMTracePointSleepApplications            = 0x11,
 
-   * Phase begins: At initiation of system sleep (idle or forced).
-   * Phase ends: As we start to notify applications of system sleep.
+/* When kIOPMTracePointSleepPriorityClients is the latest tracePoint,
+ * PM is notifying priority clients and in-kernel system capability
+ * clients, and waiting for any asynchronous completions.
+ *
+ * Phase begins: Synchronous delivery of kIOMessageSystemWillSleep notifications.
+ * Phase ends: All notified clients have acknowledged.
+ */
+    kIOPMTracePointSleepPriorityClients         = 0x12,
+    
+/* When kIOPMTracePointSleepWillChangeInterests is the latest tracePoint,
+ * PM is calling powerStateWillChangeTo() on interested drivers of root domain.
+ *
+ * Phase begins: Dispatch a callout thread to call interested drivers.
+ * Phase ends: Callout thread work done, and acknowledgePowerChange() called
+ *      by drivers that indicated asynchronous completion.
  */
-    kIOPMTracePointSleepStarted             = 0x11,
+    kIOPMTracePointSleepWillChangeInterests     = 0x13,
 
-/* When kTracePointSystemSleepAppsPhase is the latest tracePoint,
-   a system sleep has been irrevocably inititated and PM waits
-   for responses from notified applications.
+/* When kIOPMTracePointSleepPowerPlaneDrivers is the latest tracePoint,
+ * PM is directing power plane drivers to power off in leaf-to-root order.
+ *
+ * Phase begins: Root domain informs its power children that it will drop to
+ *      sleep state. This has a cascade effect and triggers all drivers in
+ *      the power plane to transition to a lower power state if necessary.
+ * Phase ends: All power transitions in response to the root domain power
+ *      change have completed.
+ */
+    kIOPMTracePointSleepPowerPlaneDrivers       = 0x14,
+    
+/* When kIOPMTracePointSleepDidChangeInterests is the latest tracePoint,
+ * PM is calling powerStateDidChangeTo() on interested drivers of root domain.
+ *
+ * Phase begins: Dispatch a callout thread to call interested drivers.
+ * Phase ends: Callout thread work done, and acknowledgePowerChange() called
+ *      by drivers that indicated asynchronous completion.
+ */
+    kIOPMTracePointSleepDidChangeInterests      = 0x15,
 
-   * Phase begins: Begin to asynchronously fire kIOMessageSystemWillSleep notifications,
-   *        and in the case of an idle sleep kIOMessageCanSystemSleep as well.
-   * Phase ends: When we have received all user & interested kernel acknowledgements.
+/* When kIOPMTracePointSleepCapabilityClients is the latest tracePoint,
+ * PM is notifying system capability clients about system sleep.
+ *
+ * Phase begins: Send kIOMessageSystemCapabilityChange notifications to inform
+ *      capability clients that system has lost all capabilities.
+ * Phase ends: Finished sending notifications.
  */
-    kIOPMTracePointSystemSleepAppsPhase         = 0x12,
+    kIOPMTracePointSleepCapabilityClients       = 0x16,
 
+/* When kIOPMTracePointSleepPlatformActions is the latest tracePoint,
+ * PM is calling drivers that have registered a platform sleep action.
+ */
+    kIOPMTracePointSleepPlatformActions         = 0x17,
 
-/* When kIOPMTracePointSystemHibernatePhase is the latest tracePoint,
-    PM is writing the hiernate image to disk.
+/* When kIOPMTracePointSleepCPUs is the latest tracePoint,
+ * PM is shutting down all non-boot processors.
+ *
+ * Phase begins: Shutdown all non-boot processors.
+ * Phase ends: Reduced to only the boot processor running.
  */
-    kIOPMTracePointSystemHibernatePhase         = 0x13,
+    kIOPMTracePointSleepCPUs                    = 0x18,
 
-/* When kTracePointSystemSleepDriversPhase is the latest tracePoint,
-    PM is iterating the driver tree powering off devices individually.
+/* When kIOPMTracePointSleepPlatformDriver is the latest tracePoint,
+ * PM is executing platform dependent code to prepare for system sleep.
+ */
+    kIOPMTracePointSleepPlatformDriver          = 0x19,
 
-   * Phase begins: When IOPMrootDomain has received all of its power acknowledgements and begins
-   *        executing IOService::powerDomainWillChangeTo()
-   * Phase ends: When IOPMrootDomain::powerChangeDone begins executing CPU shutoff code.
+/* When kIOPMTracePointHibernate is the latest tracePoint,
+ * PM is writing the hibernate image to disk.
  */
-    kIOPMTracePointSystemSleepDriversPhase      = 0x14,
+    kIOPMTracePointHibernate                    = 0x1a,
 
-/* When kTracePointSystemSleepPlatformPhase is the latest tracePoint,
-    all apps and drivers have notified of sleep. Plotfarm is powering
-    off CPU; or system is asleep; or low level wakeup is underway.
+/* When kIOPMTracePointSystemSleep is the latest tracePoint,
+ * PM has recorded the final trace point before the hardware platform
+ * enters sleep state, or low level wakeup is underway - such as restoring
+ * the hibernate image from disk.
+ *
+ * Note: If a system is asleep and then loses power, and it does not have a
+ * hibernate image to restore from (e.g. hibernatemode = 0), then OS X will
+ * interpret this power loss as a failure in kIOPMTracePointSystemSleep.
+ *
+ * Phase begins: Before the OS directs the hardware to enter sleep state.
+ * Phase ends: Control returns to the OS on wake, but before recording the first
+ *      wake trace point.
+ */
+    kIOPMTracePointSystemSleep                  = 0x1f,
 
-    Note: If a system is asleep and then loses power, and it does not have a hibernate
-        image to restore from (e.g. hibernatemode = 0), then OS X may interpret this power
-        loss as a system crash in the kTracePointSystemSleepPlatformPhase, since the
-        power loss resembles a hang or crash, and the power being removed by the user.
+/* When kIOPMTracePointWakePlatformDriver is the latest tracePoint,
+ * PM is executing platform dependent code to prepare for system wake.
+ */
+    kIOPMTracePointWakePlatformDriver           = 0x21,
+
+/* When kIOPMTracePointWakePlatformActions is the latest tracePoint,
+ * PM is calling drivers that have registered a platform wake action.
+ */
+    kIOPMTracePointWakePlatformActions          = 0x22,
 
-   * Phase begins: IOPMrootDomain has already shut off drivers, and is now powering off CPU.
-   * Phase ends: Immediately after CPU's are powered back on during wakeup.
+/* When kIOPMTracePointWakeCPUs is the latest tracePoint,
+ * PM is bringing all non-boot processors online.
+ */
+    kIOPMTracePointWakeCPUs                     = 0x23,
+
+/* When kIOPMTracePointWakeWillPowerOnClients is the latest tracePoint,
+ * PM is sending kIOMessageSystemWillPowerOn to both kernel clients and
+ * applications. PM also notifies system capability clients about the
+ * proposed capability change.
+ *
+ * Phase begins: Send kIOMessageSystemWillPowerOn and
+ *      kIOMessageSystemCapabilityChange notifications.
+ * Phase ends: Finished sending notifications.
  */
-    kIOPMTracePointSystemSleepPlatformPhase     = 0x15,
+    kIOPMTracePointWakeWillPowerOnClients       = 0x24,
 
-/* When kTracePointSystemWakeDriversPhase is the latest tracePoint,
-    System CPU is powered, PM is notifying drivers of system wake.
+/* When kIOPMTracePointWakeWillChangeInterests is the latest tracePoint,
+ * PM is calling powerStateWillChangeTo() on interested drivers of root domain.
+ *
+ * Phase begins: Dispatch a callout thread to call interested drivers.
+ * Phase ends: Callout thread work done, and acknowledgePowerChange() called
+ *      by drivers that indicated asynchronous completion.
+ */
+    kIOPMTracePointWakeWillChangeInterests      = 0x25,
+
+/* When kIOPMTracePointWakeDidChangeInterests is the latest tracePoint,
+ * PM is calling powerStateDidChangeTo() on interested drivers of root domain.
+ *
+ * Phase begins: Dispatch a callout thread to call interested drivers.
+ * Phase ends: Callout thread work done, and acknowledgePowerChange() called
+ *      by drivers that indicated asynchronous completion.
+ */
+    kIOPMTracePointWakeDidChangeInterests       = 0x26,
 
-   * Phase begins: CPU's have successfully powered up and OS is executing.
-   * Phase ends: All drivers have handled power events & acknowledged completion.
-        IOPMrootDomain is about to deliver kIOMessageSystemHasPoweredOn.
+/* When kIOPMTracePointWakePowerPlaneDrivers is the latest tracePoint,
+ * PM is directing power plane drivers to power up in root-to-leaf order.
+ *
+ * Phase begins: Root domain informs its power children that it transitioned
+ *      to ON state. This has a cascade effect and triggers all drivers in
+ *      the power plane to re-evaluate and potentially change power state.
+ * Phase ends: All power transitions in response to the root domain power
+ *      change have completed.
  */
-    kIOPMTracePointSystemWakeDriversPhase       = 0x21,
+    kIOPMTracePointWakePowerPlaneDrivers        = 0x27,
 
-/* When kTracePointSystemWakeAppsPhase is the latest tracePoint,
-   System CPU is powered, PM has powered on each driver.
+/* When kIOPMTracePointWakeCapabilityClients is the latest tracePoint,
+ * PM is notifying system capability clients about system wake, and waiting
+ * for any asynchronous completions.
+ *
+ * Phase begins: Inform capability clients that system has gained capabilities.
+ * Phase ends: All notified clients have acknowledged.
+ */
+    kIOPMTracePointWakeCapabilityClients        = 0x28,
 
-   * Phase begins: IOPMrootDomain::tellChangeUp before sending asynchronous 
-        kIOMessageSystemHasPoweredOn notifications
-   * Phase ends: IOPMrootDomain::tellChangeUp after sending asynchronous notifications
+/* When kIOPMTracePointWakeApplications is the latest tracePoint,
+ * System CPU is powered, PM has powered on each driver.
+ *
+ * Phase begins: Send asynchronous kIOMessageSystemHasPoweredOn notifications.
+ * Phase ends: Finished sending asynchronous notifications.
  */
-    kIOPMTracePointSystemWakeAppsPhase          = 0x22,
+    kIOPMTracePointWakeApplications             = 0x29,
 
 /* kIOPMTracePointSystemLoginwindowPhase
-    This phase represents a several minute window after the system has powered on.
-    Higher levels of system diagnostics are in a heightened state of alert in this phase,
-    in case any user errors occurred that we could not detect in software.
-    
-    This several minute window  
+ * This phase represents a several minute window after the system has powered on.
+ * Higher levels of system diagnostics are in a heightened state of alert in this phase,
+ * in case any user errors occurred that we could not detect in software.
+ *
+ * Phase begins: After IOPMrootDomain sends kIOMessageSystemHasPoweredOn message.
+ * Phase ends: When loginwindow calls IOPMSleepWakeSetUUID(NULL) the system shall 
+ *      be considered awake and usable. The next phase shall be kIOPMTracePointSystemUp.
+ */
+    kIOPMTracePointSystemLoginwindowPhase       = 0x30,
 
-   * Phase begins: After IOPMrootDomain sends kIOMessageSystemHasPoweredOn message.
-   * Phase ends: When loginwindow calls IOPMSleepWakeSetUUID(NULL) the system shall 
-        be considered awake and usable. The next phase shall be kIOPMTracePointSystemUp.
+/* When kIOPMTracePointDarkWakeEntry is the latest tracePoint,
+ * PM has started a transition from full wake to dark wake.
+ *
+ * Phase begins: Start transition to dark wake.
+ * Phase ends: System in dark wake. Before recording kIOPMTracePointSystemUp.
+ */
+    kIOPMTracePointDarkWakeEntry                = 0x31,
+
+/* When kIOPMTracePointDarkWakeExit is the latest tracePoint,
+ * PM has started a transition from dark wake to full wake.
+ *
+ * Phase begins: Start transition to full wake.
+ * Phase ends: System in full wake. Before recording kIOPMTracePointSystemUp.
  */
-    kIOPMTracePointSystemLoginwindowPhase       = 0x30 
+    kIOPMTracePointDarkWakeExit                 = 0x32
 };
 
 /*****************************************************************************/
diff --git a/iokit/IOKit/pwr_mgt/IOPowerConnection.h b/iokit/IOKit/pwr_mgt/IOPowerConnection.h
index 179035b2f..a7ece0ad5 100644
--- a/iokit/IOKit/pwr_mgt/IOPowerConnection.h
+++ b/iokit/IOKit/pwr_mgt/IOPowerConnection.h
@@ -46,16 +46,18 @@ class IOPowerConnection : public IOService
 protected:
     /*! @field parentKnowsState	true: parent knows state of its domain
 					used by child */
-    bool		stateKnown;
+    bool            stateKnown;
+
     /*! @field currentPowerFlags	power flags which describe  the current state of the power domain
 					used by child */
     IOPMPowerFlags 	currentPowerFlags;
+
     /*! @field desiredDomainState	state number which corresponds to the child's desire
 					used by parent */
     unsigned long	desiredDomainState;
 
     /*! @field requestFlag		set to true when desiredDomainState is set */
-    bool		requestFlag;
+    bool            requestFlag;
 
     /*! @field preventIdleSleepFlag	true if child has this bit set in its desired state
 					used by parent */
@@ -67,16 +69,21 @@ protected:
 
     /*! @field awaitingAck		true if child has not yet acked our notification
 					used by parent */
-    bool		awaitingAck;
+    bool            awaitingAck;
 
     /*! @field readyFlag		true if the child has been added as a power child
 					used by parent */
-	bool		readyFlag;
+	bool            readyFlag;
 
+#ifdef XNU_KERNEL_PRIVATE
 public:
-        /*! @function setParentKnowsState
-            @abstract Sets the stateKnown variable.
-            @discussion Called by the parent when the object is created and called by the child when it discovers that the parent now knows its state. */
+    bool            delayChildNotification;
+#endif
+
+public:
+    /*! @function setParentKnowsState
+        @abstract Sets the stateKnown variable.
+        @discussion Called by the parent when the object is created and called by the child when it discovers that the parent now knows its state. */
     void setParentKnowsState (bool );
 
     /*! @function setParentCurrentPowerFlags
@@ -107,7 +114,6 @@ public:
     @discussion Called by the parent. */
     void setChildHasRequestedPower ( void );
 
-
     /*! @function childHasRequestedPower
         @abstract Return the flag that says whether the child has called requestPowerDomainState.
     @discussion Called by the PCI Aux Power Supply Driver to see if a device driver
diff --git a/iokit/IOKit/pwr_mgt/Makefile b/iokit/IOKit/pwr_mgt/Makefile
index 14165762a..b82357fe9 100644
--- a/iokit/IOKit/pwr_mgt/Makefile
+++ b/iokit/IOKit/pwr_mgt/Makefile
@@ -18,20 +18,18 @@ NOT_EXPORT_HEADERS = \
 	IOPMPagingPlexus.h
 	
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 
 ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
 
-INSTALL_MI_LIST	= IOPMLibDefs.h IOPM.h IOPMDeprecated.h
+INSTALL_MI_LIST	= IOPMLibDefs.h IOPM.h
 INSTALL_MI_LCL_LIST = IOPMPrivate.h
 INSTALL_MI_DIR = $(MI_DIR)
 
diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h
index 2de4d289c..760e7d674 100644
--- a/iokit/IOKit/pwr_mgt/RootDomain.h
+++ b/iokit/IOKit/pwr_mgt/RootDomain.h
@@ -31,18 +31,20 @@
 #include <IOKit/IOService.h>
 #include <IOKit/pwr_mgt/IOPM.h>
 #include "IOKit/pwr_mgt/IOPMPrivate.h"
+#include <IOKit/IOBufferMemoryDescriptor.h> 
 
 #ifdef XNU_KERNEL_PRIVATE
-#if defined(__i386__) || defined(__x86_64__)
-#define ROOT_DOMAIN_RUN_STATES      1
-#endif
 struct AggressivesRecord;
-class PMAssertionsTracker;
-#endif /* XNU_KERNEL_PRIVATE */
-
+struct IOPMMessageFilterContext;
+struct IOPMActions;
+class PMSettingObject;
+class IOPMTimeline;
+class PMEventDetails;
+class PMTraceWorker;
 class IOPMPowerStateQueue;
 class RootDomainUserClient;
-class PMTraceWorker;
+class PMAssertionsTracker;
+#endif
 
 /*!
  * Types for PM Assertions
@@ -105,19 +107,6 @@ enum {
 #define kIOPMThermalEmergencySleepKey       "Thermal Emergency Sleep"
 #define kIOPMMaintenanceSleepKey            "Maintenance Sleep"
 
-enum
-{
-    kIOPMSleepReasonClamshell           = 1,
-    kIOPMSleepReasonPowerButton         = 2,
-    kIOPMSleepReasonSoftware            = 3,
-    kIOPMSleepReasonOSSwitchHibernation = 4,
-    kIOPMSleepReasonIdle                = 5,
-    kIOPMSleepReasonLowPower            = 6,
-    kIOPMSleepReasonThermalEmergency    = 7,
-    kIOPMSleepReasonMaintenance         = 8,
-    kIOPMSleepReasonMax
-};
-
 /*
  * String constants for communication with PM CPU
  */
@@ -268,17 +257,6 @@ public:
                                 const OSSymbol * typeOfInterest,
                                 IOServiceInterestHandler handler,
                                 void * target, void * ref = 0 );
-                                
-    void                pmStatsRecordEvent(
-                                int             eventIndex,
-                                AbsoluteTime    timestamp);
-
-    void                pmStatsRecordApplicationResponse(
-                            const OSSymbol		*response,
-                            const char 		    *name,
-                            int                 messageType,
-                            uint32_t			delay_ms,
-                            int     			app_pid);
 
     virtual IOReturn    callPlatformFunction(
                                 const OSSymbol *functionName,
@@ -346,57 +324,76 @@ private:
 #ifdef XNU_KERNEL_PRIVATE
     /* Root Domain internals */
 public:
+    void        tagPowerPlaneService(
+                    IOService *     service,
+                    IOPMActions *   actions );
 
-#if HIBERNATION
-    bool        getHibernateSettings(
-                    uint32_t *  hibernateMode,
-                    uint32_t *  hibernateFreeRatio,
-                    uint32_t *  hibernateFreeTime );
-#endif
+    void        overrideOurPowerChange(
+                    IOService *     service,
+                    IOPMActions *   actions,
+                    unsigned long * inOutPowerState,
+                    uint32_t *      inOutChangeFlags );
 
-#if ROOT_DOMAIN_RUN_STATES
-    void        tagPowerPlaneService(
-                    IOService * service,
-                    uint32_t *  rdFlags );
-
-    void        handleActivityTickleForService( IOService * service, 
-                                                     unsigned long type,
-                                                     unsigned long currentPowerState,
-                                                     uint32_t activityTickleCount );
-
-    void        handlePowerChangeStartForService(
-                    IOService * service,
-                    uint32_t *  rootDomainFlags,
-                    uint32_t    newPowerState,
-                    uint32_t    changeFlags );
-
-    void        handlePowerChangeDoneForService(
-                    IOService * service,
-                    uint32_t *  rootDomainFlags,
-                    uint32_t    newPowerState,
-                    uint32_t    changeFlags );
-
-    void        overridePowerStateForService(
+    void        handleOurPowerChangeStart(
                     IOService *     service,
-                    uint32_t *      rdFlags,
-                    unsigned long * powerState,
+                    IOPMActions *   actions,
+                    uint32_t        powerState,
+                    uint32_t *      inOutChangeFlags );
+
+    void        handleOurPowerChangeDone(
+                    IOService *     service,
+                    IOPMActions *   actions,
+                    uint32_t        powerState,
+                    uint32_t        changeFlags );
+
+    void        overridePowerChangeForUIService(
+                    IOService *     service,
+                    IOPMActions *   actions,
+                    unsigned long * inOutPowerState,
+                    uint32_t *      inOutChangeFlags );
+
+    void        handleActivityTickleForDisplayWrangler(
+                    IOService *     service,
+                    IOPMActions *   actions );
+
+    bool        shouldDelayChildNotification(
+                    IOService * service );
+
+    void        handlePowerChangeStartForPCIDevice(
+                    IOService *     service,
+                    IOPMActions *   actions, 
+                    uint32_t        powerState,
+                    uint32_t *      inOutChangeFlags );
+
+    void        handlePowerChangeDoneForPCIDevice(
+                    IOService *     service,
+                    IOPMActions *   actions, 
+                    uint32_t        powerState,
                     uint32_t        changeFlags );
 
+    void        askChangeDownDone(
+                    IOPMPowerChangeFlags * inOutChangeFlags,
+                    bool * cancel );
+
+    void        handlePublishSleepWakeUUID(
+                    bool shouldPublish);
+
+    void        handleQueueSleepWakeUUID(
+                    OSObject *obj);
+
     IOReturn    setMaintenanceWakeCalendar(
                     const IOPMCalendarStruct * calendar );
-#endif /* ROOT_DOMAIN_RUN_STATES */
 
     // Handle callbacks from IOService::systemWillShutdown()
-	void acknowledgeSystemWillShutdown( IOService * from );
+	void        acknowledgeSystemWillShutdown( IOService * from );
 
     // Handle platform halt and restart notifications
-	void handlePlatformHaltRestart( UInt32 pe_type );
+	void        handlePlatformHaltRestart( UInt32 pe_type );
+
+    IOReturn    shutdownSystem( void );
+    IOReturn    restartSystem( void );
+    void        handleSleepTimerExpiration( void );
 
-    IOReturn shutdownSystem( void );
-    IOReturn restartSystem( void );
-    void handleSleepTimerExpiration( void );
-    void handleForcedSleepTimerExpiration( void );
-    void stopIgnoringClamshellEventsDuringWakeup( void );
     bool        activitySinceSleep(void);
     bool        abortHibernation(void);
 
@@ -404,15 +401,67 @@ public:
     void        handleAggressivesRequests( void );
 
     void        tracePoint( uint8_t point );
+    void        tracePoint( uint8_t point, uint8_t data );
+    void        traceDetail( uint32_t data32 );
+
+    bool        systemMessageFilter(
+                    void * object, void * arg1, void * arg2, void * arg3 );
+
+/*! @function recordPMEvent
+    @abstract Logs IOService PM event timing.
+    @discussion Should only be called from IOServicePM. Should not be exported.
+    @result kIOReturn on success.
+*/
+    IOReturn    recordPMEvent( PMEventDetails *details );
+    IOReturn    recordAndReleasePMEvent( PMEventDetails *details );
+    IOReturn    recordPMEventGated( PMEventDetails *details );
+    IOReturn    recordAndReleasePMEventGated( PMEventDetails *details );
+
+    void        pmStatsRecordEvent(
+                                int             eventIndex,
+                                AbsoluteTime    timestamp);
+
+    void        pmStatsRecordApplicationResponse(
+                                const OSSymbol		*response,
+                                const char 		    *name,
+                                int                 messageType,
+                                uint32_t			delay_ms,
+                                int     			app_pid);
+
+#if HIBERNATION
+    bool        getHibernateSettings(
+                    uint32_t *  hibernateMode,
+                    uint32_t *  hibernateFreeRatio,
+                    uint32_t *  hibernateFreeTime );
+#endif
 
 private:
     friend class PMSettingObject;
-    friend class PMAssertionsTracker;
     friend class RootDomainUserClient;
+    friend class PMAssertionsTracker;
+
+    static IOReturn sysPowerDownHandler( void * target, void * refCon,
+                                    UInt32 messageType, IOService * service,
+                                    void * messageArgument, vm_size_t argSize );
+
+    static IOReturn displayWranglerNotification( void * target, void * refCon,
+                                    UInt32 messageType, IOService * service,
+                                    void * messageArgument, vm_size_t argSize );
+
+    static IOReturn rootBusyStateChangeHandler( void * target, void * refCon,
+                                    UInt32 messageType, IOService * service,
+                                    void * messageArgument, vm_size_t argSize );
+
+    static bool displayWranglerMatchPublished( void * target, void * refCon,
+                                    IOService * newService,
+                                    IONotifier * notifier);
+
+    static bool batteryPublished( void * target, void * refCon,
+                                    IOService * resourceService,
+                                    IONotifier * notifier);
 
-    // Points to our parent
     IOService *             wrangler;
-    class IORootParent *    patriarch;
+    IOService *             wranglerConnection;
 
     IOLock                  *featuresDictLock;  // guards supportedFeatures
     IOPMPowerStateQueue     *pmPowerStateQueue;
@@ -422,7 +471,7 @@ private:
     PMAssertionsTracker     *pmAssertions;
 
     // Settings controller info
-    IORecursiveLock         *settingsCtrlLock;  
+    IOLock                  *settingsCtrlLock;  
     OSDictionary            *settingsCallbacks;
     OSDictionary            *fPMSettingsDict;
 
@@ -430,16 +479,16 @@ private:
     IONotifier              *_displayWranglerNotifier;
 
     // Statistics
-    const OSSymbol           *_statsNameKey;
-    const OSSymbol           *_statsPIDKey;
-    const OSSymbol           *_statsTimeMSKey;
-    const OSSymbol           *_statsResponseTypeKey;
-    const OSSymbol           *_statsMessageTypeKey;
+    const OSSymbol          *_statsNameKey;
+    const OSSymbol          *_statsPIDKey;
+    const OSSymbol          *_statsTimeMSKey;
+    const OSSymbol          *_statsResponseTypeKey;
+    const OSSymbol          *_statsMessageTypeKey;
     
     OSString                *queuedSleepWakeUUIDString;
-
     OSArray                 *pmStatsAppResponses;
 
+    bool                    uuidPublished;
     PMStatsStruct           pmStats;
 
     // Pref: idle time before idle sleep
@@ -452,41 +501,78 @@ private:
     unsigned long           extraSleepDelay;		
 
     // Used to wait between say display idle and system idle
-    thread_call_t           extraSleepTimer;		
-
-    // Used to ignore clamshell close events while we're waking from sleep
-    thread_call_t           clamshellWakeupIgnore;   
-
+    thread_call_t           extraSleepTimer;
     thread_call_t           diskSyncCalloutEntry;
 
-    uint32_t                runStateIndex;
-    uint32_t                runStateFlags;
-    uint32_t                nextRunStateIndex;
-    uint32_t                wranglerTickled;
+    // IOPMActions parameter encoding
+    enum {
+        kPMActionsFlagIsDisplayWrangler = 0x00000001,
+        kPMActionsFlagIsGraphicsDevice  = 0x00000002,
+        kPMActionsFlagIsAudioDevice     = 0x00000004,
+        kPMActionsFlagLimitPower        = 0x00000008,
+        kPMActionsPCIBitNumberMask      = 0x000000ff  
+    };
+
+    // Track system capabilities.
+    uint32_t                _desiredCapability;
+    uint32_t                _currentCapability;
+    uint32_t                _pendingCapability;
+    uint32_t                _highestCapability;
+    OSSet *                 _joinedCapabilityClients;
+    uint32_t                _systemStateGeneration;
+
+    // Type of clients that can receive system messages.
+    enum {
+        kSystemMessageClientConfigd   = 0x01,
+        kSystemMessageClientApp       = 0x02,
+        kSystemMessageClientUser      = 0x03,
+        kSystemMessageClientKernel    = 0x04,
+        kSystemMessageClientAll       = 0x07
+    };
+    uint32_t                _systemMessageClientMask;
+
+    // Power state and capability change transitions.
+    enum {
+        kSystemTransitionNone         = 0,
+        kSystemTransitionSleep        = 1,
+        kSystemTransitionWake         = 2,
+        kSystemTransitionCapability   = 3,
+        kSystemTransitionNewCapClient = 4
+    }                       _systemTransitionType;
 
     unsigned int            systemBooting           :1;
     unsigned int            systemShutdown          :1;
+    unsigned int            systemDarkWake          :1;
     unsigned int            clamshellExists         :1;
-    unsigned int            clamshellIsClosed       :1;
-    unsigned int            ignoringClamshell       :1;
-    unsigned int            ignoringClamshellOnWake :1;
+    unsigned int            clamshellClosed         :1;
+    unsigned int            clamshellDisabled       :1;
     unsigned int            desktopMode             :1;
-    unsigned int            acAdaptorConnected      :1;    
+    unsigned int            acAdaptorConnected      :1;
 
-    unsigned int            allowSleep              :1;
-    unsigned int            sleepIsSupported        :1;
-    unsigned int            canSleep                :1;
-    unsigned int            sleepASAP               :1;
     unsigned int            idleSleepTimerPending   :1;
     unsigned int            userDisabledAllSleep    :1;
-    unsigned int            ignoreChangeDown        :1;
+    unsigned int            childPreventSystemSleep :1;
+    unsigned int            ignoreTellChangeDown    :1;
     unsigned int            wranglerAsleep          :1;
+    unsigned int            wranglerTickled         :1;
+    unsigned int            wranglerSleepIgnored    :1;
+    unsigned int            graphicsSuppressed      :1;
+
+    unsigned int            capabilityLoss          :1;
+    unsigned int            pciCantSleepFlag        :1;
+    unsigned int            pciCantSleepValid       :1;
+    unsigned int            logWranglerTickle       :1;
+    unsigned int            logGraphicsClamp        :1;
+    unsigned int            darkWakeToSleepASAP     :1;
+    unsigned int            darkWakeMaintenance     :1;
+    unsigned int            darkWakePostTickle      :1;
 
     unsigned int            sleepTimerMaintenance   :1;
     unsigned int            lowBatteryCondition     :1;
     unsigned int            hibernateDisabled       :1;
     unsigned int            hibernateNoDefeat       :1;
     unsigned int            hibernateAborted        :1;
+    unsigned int            rejectWranglerTickle    :1;
 
     uint32_t                hibernateMode;
     uint32_t                userActivityCount;
@@ -498,54 +584,45 @@ private:
     int32_t                 idxPMCPULimitedPower;
 
     IOOptionBits            platformSleepSupport;
+    uint32_t                _debugWakeSeconds;
 
     queue_head_t            aggressivesQueue;
     thread_call_t           aggressivesThreadCall;
     OSData *                aggressivesData;
 
     AbsoluteTime            wranglerSleepTime;
-    
+    AbsoluteTime            systemWakeTime;
+
     // PCI top-level PM trace
     IOService *             pciHostBridgeDevice;
+    IOService *             pciHostBridgeDriver;
 
-	// IOPMrootDomain internal sleep call
-    IOReturn privateSleepSystem( uint32_t sleepReason );
-    void announcePowerSourceChange( void );
+    IONotifier *            systemCapabilityNotifier;
 
-    void reportUserInput( void );
-    static IOReturn sysPowerDownHandler( void * target, void * refCon,
-                                    UInt32 messageType, IOService * service,
-                                    void * messageArgument, vm_size_t argSize );
+    IOPMTimeline            *timeline;
 
-    static IOReturn displayWranglerNotification( void * target, void * refCon,
-                                    UInt32 messageType, IOService * service,
-                                    void * messageArgument, vm_size_t argSize );
+	// IOPMrootDomain internal sleep call
+    IOReturn    privateSleepSystem( uint32_t sleepReason );
+    void        reportUserInput( void );
+    bool        checkSystemCanSleep( IOOptionBits options = 0 );
 
-    static bool displayWranglerPublished( void * target, void * refCon,
-                                    IOService * newService);
+    void        adjustPowerState( bool sleepASAP = false );
+    void        setQuickSpinDownTimeout( void );
+    void        restoreUserSpinDownTimeout( void );
 
-    static bool batteryPublished( void * target, void * refCon,
-                                    IOService * resourceService );
+    bool        shouldSleepOnClamshellClosed(void );
+    void        sendClientClamshellNotification( void );
 
-    void adjustPowerState( void );
-    void setQuickSpinDownTimeout( void );
-    void restoreUserSpinDownTimeout( void );
-    
-    bool shouldSleepOnClamshellClosed(void );
-    void sendClientClamshellNotification( void );
-    
     // Inform PMCPU of changes to state like lid, AC vs. battery
-    void informCPUStateChange( uint32_t type, uint32_t value );
+    void        informCPUStateChange( uint32_t type, uint32_t value );
 
-    void dispatchPowerEvent( uint32_t event, void * arg0, uint64_t arg1 );
-    void handlePowerNotification( UInt32 msg );
+    void        dispatchPowerEvent( uint32_t event, void * arg0, uint64_t arg1 );
+    void        handlePowerNotification( UInt32 msg );
 
-    IOReturn setPMSetting(const OSSymbol *, OSObject *);
+    IOReturn    setPMSetting(const OSSymbol *, OSObject *);
 
-    void startIdleSleepTimer( uint32_t inSeconds );
-    void cancelIdleSleepTimer( void );
-
-    void updateRunState( uint32_t inRunState );
+    void        startIdleSleepTimer( uint32_t inSeconds );
+    void        cancelIdleSleepTimer( void );
 
     IOReturn    setAggressiveness(
                         unsigned long type,
@@ -561,19 +638,23 @@ private:
                         const AggressivesRecord * array,
                         int count );
 
-    void        aggressivenessChanged( void );
+    // getPMTraceMemoryDescriptor should only be called by our friend RootDomainUserClient
+    IOMemoryDescriptor *getPMTraceMemoryDescriptor(void);
 
     IOReturn    setPMAssertionUserLevels(IOPMDriverAssertionType);
-    
+
     void        publishSleepWakeUUID( bool shouldPublish );
 
+    void        evaluatePolicy( int stimulus, uint32_t arg = 0 );
+
+    void        deregisterPMSettingObject( PMSettingObject * pmso );
+
 #if HIBERNATION
     bool        getSleepOption( const char * key, uint32_t * option );
     bool        evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p );
     void        evaluateSystemSleepPolicyEarly( void );
     void        evaluateSystemSleepPolicyFinal( void );
 #endif /* HIBERNATION */
-
 #endif /* XNU_KERNEL_PRIVATE */
 };
 
@@ -582,10 +663,9 @@ class IORootParent: public IOService
 {
     OSDeclareFinalStructors(IORootParent)
 
-private:
-    unsigned long mostRecentChange;
-
 public:
+    static void initialize( void );
+    virtual OSObject * copyProperty( const char * aKey ) const;
     bool start( IOService * nub );
     void shutDownSystem( void );
     void restartSystem( void );
diff --git a/iokit/IOKit/rtc/Makefile b/iokit/IOKit/rtc/Makefile
index ace4cfb12..e16d5b83a 100644
--- a/iokit/IOKit/rtc/Makefile
+++ b/iokit/IOKit/rtc/Makefile
@@ -14,13 +14,11 @@ MI_DIR = rtc
 NOT_EXPORT_HEADERS = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
diff --git a/iokit/IOKit/system_management/Makefile b/iokit/IOKit/system_management/Makefile
index 1f168421f..c887db562 100644
--- a/iokit/IOKit/system_management/Makefile
+++ b/iokit/IOKit/system_management/Makefile
@@ -14,13 +14,11 @@ MI_DIR = system_management
 NOT_EXPORT_HEADERS = 
 
 INSTINC_SUBDIRS =
-INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
 INSTINC_SUBDIRS_X86_64 =
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp
index 004d2ec89..563059600 100644
--- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp
+++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp
@@ -103,16 +103,6 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask(
     // Grab IOMD bits from the Buffer MD options
     iomdOptions  |= (options & kIOBufferDescriptorMemoryFlags);
 
-#if 0
-    // workarounds-
-    if ((options & kIOMemoryPhysicallyContiguous) || ((capacity == 0x1000) && (inTask == kernel_task))
-      && !physicalMask)
-    {
-	highestMask = physicalMask = 0xFFFFF000;
-    }
-    //-
-#endif
-
     if (physicalMask && (alignment <= 1))
     {
 	alignment   = ((physicalMask ^ (-1ULL)) & (physicalMask - 1));
diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp
index 7646d2a97..5dd9ea416 100644
--- a/iokit/Kernel/IOCPU.cpp
+++ b/iokit/Kernel/IOCPU.cpp
@@ -302,12 +302,15 @@ void IOCPUSleepKernel(void)
     long cnt, numCPUs;
     IOCPU *target;
     IOCPU *bootCPU = NULL;
-  
+    IOPMrootDomain  *rootDomain = IOService::getPMRootDomain();
+
     kprintf("IOCPUSleepKernel\n");
 
     OSIterator * iter;
     IOService *  service;
 
+    rootDomain->tracePoint( kIOPMTracePointSleepPlatformActions );
+
     queue_init(&gIOSleepActionQueue);
     queue_init(&gIOWakeActionQueue);
 
@@ -333,6 +336,8 @@ void IOCPUSleepKernel(void)
     iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0U-1,
 				NULL, NULL, NULL);
 
+    rootDomain->tracePoint( kIOPMTracePointSleepCPUs );
+
     numCPUs = gIOCPUs->getCount();
     // Sleep the CPUs.
     cnt = numCPUs;
@@ -352,10 +357,14 @@ void IOCPUSleepKernel(void)
         }
     }
 
+    rootDomain->tracePoint( kIOPMTracePointSleepPlatformDriver );
+
     // Now sleep the boot CPU.
     if (bootCPU)
         bootCPU->haltCPU();
 
+    rootDomain->tracePoint( kIOPMTracePointWakePlatformActions );
+
     iocpu_run_platform_actions(&gIOWakeActionQueue, 0, 0U-1,
 				    NULL, NULL, NULL);
 
@@ -372,6 +381,8 @@ void IOCPUSleepKernel(void)
     if (!queue_empty(&gIOWakeActionQueue))
 	panic("gIOWakeActionQueue");
   
+    rootDomain->tracePoint( kIOPMTracePointWakeCPUs );
+
     // Wake the other CPUs.
     for (cnt = 0; cnt < numCPUs; cnt++) 
     {
diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp
index 8c51eed84..c6de3b56c 100644
--- a/iokit/Kernel/IOCatalogue.cpp
+++ b/iokit/Kernel/IOCatalogue.cpp
@@ -74,31 +74,6 @@ IOLock         * gIOCatalogLock;
 #if PRAGMA_MARK
 #pragma mark Utility functions
 #endif
-/*********************************************************************
-*********************************************************************/
-static void
-UniqueProperties(OSDictionary * dict)
-{
-    OSString * data;
-
-    data = OSDynamicCast(OSString, dict->getObject(gIOClassKey));
-    if (data) {
-        const OSSymbol *classSymbol = OSSymbol::withString(data);
-
-        dict->setObject( gIOClassKey, (OSSymbol *) classSymbol);
-        classSymbol->release();
-    }
-
-    data = OSDynamicCast(OSString, dict->getObject(gIOMatchCategoryKey));
-    if (data) {
-        const OSSymbol *classSymbol = OSSymbol::withString(data);
-
-        dict->setObject(gIOMatchCategoryKey, (OSSymbol *) classSymbol);
-        classSymbol->release();
-    }
-    return;
-}
-
 /*********************************************************************
 * Add a new personality to the set if it has a unique IOResourceMatchKey value.
 * XXX -- svail: This should be optimized.
@@ -170,13 +145,13 @@ bool IOCatalogue::init(OSArray * initArray)
     gIOCatalogLock = IOLockAlloc();
 
     lock     = gIOCatalogLock;
-#if __ppc__ || __i386__
+#if __i386__
     kld_lock = NULL;
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
     kernelTables->reset();
     while( (dict = (OSDictionary *) kernelTables->getNextObject())) {
-        UniqueProperties(dict);
+        OSKext::uniquePersonalityProperties(dict);
         if( 0 == dict->getObject( gIOClassKey ))
             IOLog("Missing or bad \"%s\" key\n",
                     gIOClassKey->getCStringNoCopy());
@@ -306,7 +281,7 @@ IOCatalogue::findDrivers(
     OSDictionary         * dict;
     OSOrderedSet         * set;
 
-    UniqueProperties(matching);
+    OSKext::uniquePersonalityProperties(matching);
 
     set = OSOrderedSet::withCapacity( 1, IOServiceOrdering,
                                       (void *)gIOProbeScoreKey );
@@ -345,7 +320,7 @@ bool IOCatalogue::addDrivers(
     bool                   result = false;
     OSCollectionIterator * iter = NULL;       // must release
     OSOrderedSet         * set = NULL;        // must release
-    OSDictionary         * dict = NULL;       // do not release
+    OSObject             * object = NULL;       // do not release
     OSArray              * persons = NULL;    // do not release
 
     persons = OSDynamicCast(OSArray, drivers);
@@ -364,16 +339,26 @@ bool IOCatalogue::addDrivers(
         goto finish;
     }
 
+   /* Start with success; clear it on an error.
+    */
     result = true;
 
     IOLockLock(lock);
-    while ( (dict = (OSDictionary *) iter->getNextObject()) ) {
+    while ( (object = iter->getNextObject()) ) {
     
         // xxx Deleted OSBundleModuleDemand check; will handle in other ways for SL
 
+        OSDictionary * personality = OSDynamicCast(OSDictionary, object);
+
         SInt count;
-        
-        UniqueProperties(dict);
+
+        if (!personality) {
+            IOLog("IOCatalogue::addDrivers() encountered non-dictionary; bailing.\n");
+            result = false;
+            break;
+        }
+
+        OSKext::uniquePersonalityProperties(personality);
         
         // Add driver personality to catalogue.
         count = array->getCount();
@@ -389,7 +374,7 @@ bool IOCatalogue::addDrivers(
             * Do not compare just the properties present in one driver
             * pesonality or the other.
             */
-            if (dict->isEqualTo(driver)) {
+            if (personality->isEqualTo(driver)) {
                 break;
             }
         }
@@ -398,15 +383,15 @@ bool IOCatalogue::addDrivers(
             continue;
         }
         
-        result = array->setObject(dict);
+        result = array->setObject(personality);
         if (!result) {
             break;
         }
         
-        AddNewImports(set, dict);
+        AddNewImports(set, personality);
     }
     // Start device matching.
-    if (doNubMatching && (set->getCount() > 0)) {
+    if (result && doNubMatching && (set->getCount() > 0)) {
         IOService::catalogNewDrivers(set);
         generation++;
     }
@@ -455,7 +440,7 @@ IOCatalogue::removeDrivers(
         return false;
     }
 
-    UniqueProperties( matching );
+    OSKext::uniquePersonalityProperties( matching );
 
     IOLockLock(lock);
     kernelTables->reset();
@@ -553,8 +538,7 @@ void IOCatalogue::moduleHasLoaded(OSString * moduleName)
     startMatching(dict);
     dict->release();
 
-    (void) OSKext::setDeferredLoadSucceeded();
-    (void) OSKext::considerRebuildOfPrelinkedKernel();
+    (void) OSKext::considerRebuildOfPrelinkedKernel(moduleName);
 }
 
 void IOCatalogue::moduleHasLoaded(const char * moduleName)
@@ -589,7 +573,7 @@ static IOReturn _terminateDrivers(OSDictionary * matching)
     if ( !iter )
         return kIOReturnNoMemory;
 
-    UniqueProperties( matching );
+    OSKext::uniquePersonalityProperties( matching );
 
     // terminate instances.
     do {
@@ -785,7 +769,183 @@ bool IOCatalogue::startMatching( OSDictionary * matching )
 
 void IOCatalogue::reset(void)
 {
+    IOCatalogue::resetAndAddDrivers(/* no drivers; true reset */ NULL,
+        /* doMatching */ false);
+    return;
+}
+
+bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching)
+{
+    bool                   result              = false;
+    OSArray              * newPersonalities    = NULL;  // do not release
+    OSCollectionIterator * newPIterator        = NULL;  // must release
+    OSOrderedSet         * matchSet            = NULL;  // must release
+    OSArray              * oldPersonalities    = NULL;  // must release
+    OSArray              * kernelPersonalities = NULL;  // must release
+    OSString             * errorString         = NULL;  // must release
+    OSObject             * object              = NULL;  // do not release
+    OSDictionary         * thisNewPersonality  = NULL;  // do not release
+    signed int             count, i;
+
+    extern const char    * gIOKernelConfigTables;
+
+    if (drivers) {
+        newPersonalities = OSDynamicCast(OSArray, drivers);
+        if (!newPersonalities) {
+            goto finish;
+        }
+
+        newPIterator = OSCollectionIterator::withCollection(newPersonalities);
+        if (!newPIterator) {
+            goto finish;
+        }
+        
+        matchSet = OSOrderedSet::withCapacity(10, IOServiceOrdering,
+            (void *)gIOProbeScoreKey);
+        if (!matchSet) {
+            goto finish;
+        }
+    }
+
+   /* Read personalities for the built-in kernel driver classes.
+    * We don't have many any more.
+    */
+    kernelPersonalities = OSDynamicCast(OSArray,
+        OSUnserialize(gIOKernelConfigTables, &errorString));
+    if (!kernelPersonalities && errorString) {
+        IOLog("KernelConfigTables syntax error: %s\n",
+            errorString->getCStringNoCopy());
+        goto finish;
+    }
+    
+   /* Now copy the current array of personalities so we can reuse them
+    * if the new list contains any duplicates. This saves on memory
+    * consumption.
+    */
+    oldPersonalities = OSDynamicCast(OSArray, array->copyCollection());
+    if (!oldPersonalities) {
+        goto finish;
+    }
+
+    result = true;
+
     IOLog("Resetting IOCatalogue.\n");
+    
+   /* No goto finish from here to unlock.
+    */
+    IOLockLock(lock);
+    
+    array->flushCollection();
+
+   /* Add back the kernel personalities and remove them from the old
+    * array so we don't try to match on them again. Go forward through
+    * the arrays as this causes the least iteration since kernel personalities
+    * should always be first.
+    */
+    count = kernelPersonalities->getCount();
+    for (i = 0; i < count; i++) {
+    
+       /* Static cast here, as the data is coming from within the kernel image.
+        */
+        OSDictionary * thisNewPersonality = (OSDictionary *)
+            kernelPersonalities->getObject(i);
+        array->setObject(thisNewPersonality);
+
+        signed int oldPCount = oldPersonalities->getCount();
+        for (signed int oldPIndex = 0; oldPIndex < oldPCount; oldPIndex++) {
+            if (thisNewPersonality->isEqualTo(oldPersonalities->getObject(oldPIndex))) {
+                oldPersonalities->removeObject(oldPIndex);
+                break;
+            }
+        }
+    }
+
+   /* Now add the new set of personalities passed in, using existing
+    * copies if we had them in kernel memory already.
+    */
+    if (newPIterator) {
+        OSDictionary * thisOldPersonality = NULL;  // do not release
+        
+        while ( (object = newPIterator->getNextObject()) ) {
+
+            thisNewPersonality = OSDynamicCast(OSDictionary, object);
+            if (!thisNewPersonality) {
+                IOLog("IOCatalogue::resetAndAddDrivers() encountered non-dictionary; bailing.\n");
+            result = false;
+            break;
+            }
+
+           /* Convert common OSString property values to OSSymbols.
+            */
+            OSKext::uniquePersonalityProperties(thisNewPersonality);
+            
+           /* Add driver personality to catalogue, but if we had a copy already
+            * use that instead so we don't have multiple copies from OSKext instances.
+            */
+            count = oldPersonalities->getCount();
+            thisOldPersonality = NULL;
+            while (count--) {
+                
+                thisOldPersonality = (OSDictionary *)oldPersonalities->getObject(count);
+                
+               /* Unlike in other functions, this comparison must be exact!
+                * The catalogue must be able to contain personalities that
+                * are proper supersets of others.
+                * Do not compare just the properties present in one driver
+                * pesonality or the other.
+                */
+                if (thisNewPersonality->isEqualTo(thisOldPersonality)) {
+                    break;
+                }
+            }
+
+           /* If we found a dup, add the *original* back to the catalogue,
+            * remove it from our bookkeeping list, and continue.
+            * Don't worry about matching on personalities we already had.
+            */
+            if (count >= 0) {
+                array->setObject(thisOldPersonality);
+                oldPersonalities->removeObject(count);
+                continue;
+            }
+
+           /* Otherwise add the new personality and mark it for matching.
+            */
+            array->setObject(thisNewPersonality);
+            AddNewImports(matchSet, thisNewPersonality);                
+        }
+
+       /*****
+        * Now, go through remaining old personalities, which have effectively
+        * been removed, and add them to the match set as necessary.
+        */
+        count = oldPersonalities->getCount();
+        while (count--) {
+        
+           /* Static cast here is ok as these dictionaries were already in the catalogue.
+            */
+            thisOldPersonality = (OSDictionary *)oldPersonalities->getObject(count);
+            AddNewImports(matchSet, thisOldPersonality);
+        }
+
+       /* Finally, start device matching on all new & removed personalities.
+        */
+        if (result && doNubMatching && (matchSet->getCount() > 0)) {
+            IOService::catalogNewDrivers(matchSet);
+            generation++;
+        }
+    }
+
+    IOLockUnlock(lock);
+
+finish:
+    if (newPIterator) newPIterator->release();
+    if (matchSet) matchSet->release();
+    if (oldPersonalities) oldPersonalities->release();
+    if (kernelPersonalities) kernelPersonalities->release();
+    if (errorString) errorString->release();
+
+    return result;
 }
 
 bool IOCatalogue::serialize(OSSerialize * s) const
@@ -837,9 +997,9 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const
 **********************************************************************
 **********************************************************************
 * These functions are no longer used are necessary for C++ binary
-* compatibility on ppc/i386.
+* compatibility on i386.
 **********************************************************************/
-#if __ppc__ || __i386__
+#if __i386__
 
 bool IOCatalogue::recordStartupExtensions(void)
 {  return false;  }
@@ -850,4 +1010,4 @@ bool IOCatalogue::addExtensionsFromArchive(OSData * mkext)
 kern_return_t IOCatalogue::removeKernelLinker(void)
 {  return KERN_NOT_SUPPORTED;  }
 
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
diff --git a/iokit/Kernel/IOCommandGate.cpp b/iokit/Kernel/IOCommandGate.cpp
index 0b823d2b6..29ecd859e 100644
--- a/iokit/Kernel/IOCommandGate.cpp
+++ b/iokit/Kernel/IOCommandGate.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2000, 2009-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -49,11 +49,33 @@ OSMetaClassDefineReservedUnused(IOCommandGate, 5);
 OSMetaClassDefineReservedUnused(IOCommandGate, 6);
 OSMetaClassDefineReservedUnused(IOCommandGate, 7);
 
-bool IOCommandGate::checkForWork() { return false; }
+#if IOKITSTATS
+
+#define IOStatisticsInitializeCounter() \
+do { \
+	IOStatistics::setCounterType(IOEventSource::reserved->counter, kIOStatisticsCommandGateCounter); \
+} while (0)
+
+#define IOStatisticsActionCall() \
+do { \
+	IOStatistics::countCommandGateActionCall(IOEventSource::reserved->counter); \
+} while (0)
+
+#else
+
+#define IOStatisticsInitializeCounter()
+#define IOStatisticsActionCall()
+
+#endif /* IOKITSTATS */
 
 bool IOCommandGate::init(OSObject *inOwner, Action inAction)
 {
-    return super::init(inOwner, (IOEventSource::Action) inAction);
+    bool res = super::init(inOwner, (IOEventSource::Action) inAction);
+    if (res) {
+        IOStatisticsInitializeCounter();
+    }
+
+    return res;
 }
 
 IOCommandGate *
@@ -162,6 +184,8 @@ IOReturn IOCommandGate::runAction(Action inAction,
 		IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
 								 (uintptr_t) inAction, (uintptr_t) owner);
 	
+    IOStatisticsActionCall();
+	
     // Must be gated and on the work loop or enabled
     res = (*inAction)(owner, arg0, arg1, arg2, arg3);
 	
@@ -170,7 +194,7 @@ IOReturn IOCommandGate::runAction(Action inAction,
 							   (uintptr_t) inAction, (uintptr_t) owner);
     
     openGate();
-
+	
     return res;
 }
 
@@ -196,9 +220,11 @@ IOReturn IOCommandGate::attemptAction(Action inAction,
 		
         if (trace)
             IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-			    (uintptr_t) inAction, (uintptr_t) owner);
-
-	res = (*inAction)(owner, arg0, arg1, arg2, arg3);
+									 (uintptr_t) inAction, (uintptr_t) owner);
+        
+        IOStatisticsActionCall();
+        
+        res = (*inAction)(owner, arg0, arg1, arg2, arg3);
 		
         if (trace)
             IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
diff --git a/iokit/Kernel/IOCommandQueue.cpp b/iokit/Kernel/IOCommandQueue.cpp
index 7d7249dee..3a184bf94 100644
--- a/iokit/Kernel/IOCommandQueue.cpp
+++ b/iokit/Kernel/IOCommandQueue.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,6 +35,20 @@
 
 #include <mach/sync_policy.h>
 
+#if IOKITSTATS
+
+#define IOStatisticsInitializeCounter() \
+	IOStatistics::setCounterType(reserved->counter, kIOStatisticsCommandQueueCounter)
+
+#define IOStatisticsActionCall() \
+	IOStatistics::countCommandQueueActionCall(reserved->counter)
+
+#else
+
+#define IOStatisticsInitializeCounter()
+#define IOStatisticsActionCall()
+
+#endif /* IOKITSTATS */
 
 #define NUM_FIELDS_IN_COMMAND	4
 typedef struct commandEntryTag {
@@ -87,6 +101,8 @@ bool IOCommandQueue::init(OSObject *inOwner,
 
     producerIndex = consumerIndex = 0;
 
+    IOStatisticsInitializeCounter();
+
     return true;
 }
 
@@ -130,7 +146,7 @@ void IOCommandQueue::free()
 
 bool IOCommandQueue::checkForWork()
 {
-    void *field0, *field1, *field2, *field3;
+    void	*field0, *field1, *field2, *field3;
 	bool	trace = ( gIOKitTrace & kIOTraceCommandGates ) ? true : false;
 
     if (!enabled || consumerIndex == producerIndex)
@@ -150,10 +166,11 @@ bool IOCommandQueue::checkForWork()
 
 	if (trace)
 		IOTimeStampStartConstant(IODBG_CMDQ(IOCMDQ_ACTION),
-			(uintptr_t) action, (uintptr_t) owner);
-
+								 (uintptr_t) action, (uintptr_t) owner);
+	
+    IOStatisticsActionCall();
     (*(IOCommandQueueAction) action)(owner, field0, field1, field2, field3);
-
+	
 	if (trace)
 		IOTimeStampEndConstant(IODBG_CMDQ(IOCMDQ_ACTION),
 							   (uintptr_t) action, (uintptr_t) owner);
diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp
index 444abf720..b95ee921d 100644
--- a/iokit/Kernel/IODMACommand.cpp
+++ b/iokit/Kernel/IODMACommand.cpp
@@ -84,7 +84,7 @@ enum
 /**************************** class IODMACommand ***************************/
 
 #undef super
-#define super OSObject
+#define super IOCommand
 OSDefineMetaClassAndStructors(IODMACommand, IOCommand);
 
 OSMetaClassDefineReservedUsed(IODMACommand,  0);
@@ -227,6 +227,8 @@ IODMACommand::free()
 IOReturn
 IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepare)
 {
+	IOReturn err = kIOReturnSuccess;
+	
     if (mem == fMemory)
     {
 	if (!autoPrepare)
@@ -244,15 +246,15 @@ IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepar
 	if (fActive)
 	    return kIOReturnBusy;
 	clearMemoryDescriptor();
-    };
+    }
 
     if (mem) {
 	bzero(&fMDSummary, sizeof(fMDSummary));
-	IOReturn rtn = mem->dmaCommandOperation(
+	err = mem->dmaCommandOperation(
 		kIOMDGetCharacteristics,
 		&fMDSummary, sizeof(fMDSummary));
-	if (rtn)
-	    return rtn;
+	if (err)
+	    return err;
 
 	ppnum_t highPage = fMDSummary.fHighestPage ? fMDSummary.fHighestPage : gIOLastPage;
 
@@ -269,11 +271,15 @@ IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepar
 	fMemory = mem;
 
 	mem->dmaCommandOperation(kIOMDSetDMAActive, this, 0);
-	if (autoPrepare)
-	    return prepare();
-    };
-
-    return kIOReturnSuccess;
+	if (autoPrepare) {
+		err = prepare();
+		if (err) {
+			clearMemoryDescriptor();
+		}
+	}
+    }
+	
+    return err;
 }
 
 IOReturn
diff --git a/iokit/Kernel/IODMAController.cpp b/iokit/Kernel/IODMAController.cpp
index 33a54dc76..603998035 100644
--- a/iokit/Kernel/IODMAController.cpp
+++ b/iokit/Kernel/IODMAController.cpp
@@ -50,9 +50,9 @@ IODMAController *IODMAController::getController(IOService *provider, UInt32 dmaI
   
   // Find the name of the parent dma controller
   dmaParentData = OSDynamicCast(OSData, provider->getProperty("dma-parent"));
-  if (dmaParentData == 0) return false;
+  if (dmaParentData == 0) return NULL;
   dmaParentName = createControllerName(*(UInt32 *)dmaParentData->getBytesNoCopy());
-  if (dmaParentName == 0) return false;
+  if (dmaParentName == 0) return NULL;
   
   // Wait for the parent dma controller
   dmaController = OSDynamicCast(IODMAController, IOService::waitForService(IOService::nameMatching(dmaParentName)));
diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp
index afb221cf4..8de463efd 100644
--- a/iokit/Kernel/IODeviceTreeSupport.cpp
+++ b/iokit/Kernel/IODeviceTreeSupport.cpp
@@ -37,12 +37,14 @@
 
 #include <pexpert/device_tree.h>
 
+#include <machine/machine_routines.h>
+
 extern "C" {
-    #include <machine/machine_routines.h>
-    void DTInit( void * data );
 
-    int IODTGetLoaderInfo( char *key, void **infoAddr, int *infosize );
-    void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize );
+int IODTGetLoaderInfo( const char *key, void **infoAddr, int *infosize );
+void IODTFreeLoaderInfo( const char *key, void *infoAddr, int infoSize );
+int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize );
+
 }
 
 #include <IOKit/assert.h>
@@ -209,26 +211,6 @@ IODeviceTreeAlloc( void * dtTop )
             if( !intMap && child->getProperty( gIODTInterruptParentKey))
                 intMap = true;
 
-#if __ppc__
-            OSObject * obj;
-
-            // Look for a "driver,AAPL,MacOSX,PowerPC" property.
-            if( (obj = child->getProperty( "driver,AAPL,MacOSX,PowerPC"))) {
-                gIOCatalogue->addExtensionsFromArchive((OSData *)obj);
-                child->removeProperty( "driver,AAPL,MacOSX,PowerPC");
-            }
-
-            // some gross pruning
-            child->removeProperty( "lanLib,AAPL,MacOS,PowerPC");
-
-            if( (obj = child->getProperty( "driver,AAPL,MacOS,PowerPC"))) {
-
-                if( (0 == (prop = (OSData *)child->getProperty( gIODTTypeKey )))
-                  || (strncmp("display", (char *)prop->getBytesNoCopy(), sizeof("display"))) ) {
-                    child->removeProperty( "driver,AAPL,MacOS,PowerPC");
-                }
-            }
-#endif /* __ppc__ */
         }
         regIter->release();
     }
@@ -265,7 +247,7 @@ IODeviceTreeAlloc( void * dtTop )
     return( parent);
 }
 
-int IODTGetLoaderInfo( char *key, void **infoAddr, int *infoSize )
+int IODTGetLoaderInfo( const char *key, void **infoAddr, int *infoSize )
 {
     IORegistryEntry		*chosen;
     OSData				*propObj;
@@ -290,7 +272,7 @@ int IODTGetLoaderInfo( char *key, void **infoAddr, int *infoSize )
     return 0;
 }
 
-void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize )
+void IODTFreeLoaderInfo( const char *key, void *infoAddr, int infoSize )
 {
     vm_offset_t			range[2];
     IORegistryEntry		*chosen;
@@ -307,6 +289,26 @@ void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize )
     }
 }
 
+int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize )
+{
+    IORegistryEntry		*defaults;
+    OSData			*defaultObj;
+    unsigned int		defaultSize;
+
+    defaults = IORegistryEntry::fromPath( "/defaults", gIODTPlane );
+    if ( defaults == 0 ) return -1;
+
+    defaultObj = OSDynamicCast( OSData, defaults->getProperty(key) );
+    if ( defaultObj == 0 ) return -1;
+
+    defaultSize = defaultObj->getLength();
+    if ( defaultSize > infoSize) return -1;
+
+    memcpy( infoAddr, defaultObj->getBytesNoCopy(), defaultSize );
+
+    return 0;
+}
+
 static void FreePhysicalMemory( vm_offset_t * range )
 {
     vm_offset_t	virt;
diff --git a/iokit/Kernel/IOEventSource.cpp b/iokit/Kernel/IOEventSource.cpp
index a20232d91..95046dacd 100644
--- a/iokit/Kernel/IOEventSource.cpp
+++ b/iokit/Kernel/IOEventSource.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2000, 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -40,6 +40,7 @@ HISTORY
 #define super OSObject
 
 OSDefineMetaClassAndAbstractStructors(IOEventSource, OSObject)
+
 OSMetaClassDefineReservedUnused(IOEventSource, 0);
 OSMetaClassDefineReservedUnused(IOEventSource, 1);
 OSMetaClassDefineReservedUnused(IOEventSource, 2);
@@ -49,17 +50,88 @@ OSMetaClassDefineReservedUnused(IOEventSource, 5);
 OSMetaClassDefineReservedUnused(IOEventSource, 6);
 OSMetaClassDefineReservedUnused(IOEventSource, 7);
 
+bool IOEventSource::checkForWork() { return false; }
+
 /* inline function implementations */
-void IOEventSource::signalWorkAvailable()	{ workLoop->signalWorkAvailable(); }
-void IOEventSource::openGate()			{ workLoop->openGate(); }
-void IOEventSource::closeGate()			{ workLoop->closeGate(); }
-bool IOEventSource::tryCloseGate()		{ return workLoop->tryCloseGate(); }
+
+#if IOKITSTATS
+
+#define IOStatisticsRegisterCounter() \
+do { \
+	reserved->counter = IOStatistics::registerEventSource(inOwner); \
+} while (0)
+
+#define IOStatisticsUnregisterCounter() \
+do { \
+	if (reserved) \
+		IOStatistics::unregisterEventSource(reserved->counter); \
+} while (0)
+
+#define IOStatisticsOpenGate() \
+do { \
+	IOStatistics::countOpenGate(reserved->counter); \
+} while (0)
+
+#define IOStatisticsCloseGate() \
+do { \
+	IOStatistics::countCloseGate(reserved->counter); \
+} while (0)
+
+#else
+
+#define IOStatisticsRegisterCounter()
+#define IOStatisticsUnregisterCounter()
+#define IOStatisticsOpenGate()
+#define IOStatisticsCloseGate()
+
+#endif /* IOKITSTATS */
+
+void IOEventSource::signalWorkAvailable() 
+{ 
+	workLoop->signalWorkAvailable(); 
+}
+
+void IOEventSource::openGate() 
+{ 
+	IOStatisticsOpenGate();
+	workLoop->openGate(); 
+}
+
+void IOEventSource::closeGate()	
+{ 
+	workLoop->closeGate(); 
+	IOStatisticsCloseGate(); 
+}
+
+bool IOEventSource::tryCloseGate() 
+{ 
+	bool res; 
+	if ((res = workLoop->tryCloseGate())) {
+		IOStatisticsCloseGate(); 
+	}
+	return res; 
+}
+
 int IOEventSource::sleepGate(void *event, UInt32 type)
-        { return workLoop->sleepGate(event, type); }
+{ 
+	bool res; 
+	IOStatisticsOpenGate(); 
+	res = workLoop->sleepGate(event, type); 
+	IOStatisticsCloseGate(); 
+	return res; 
+}
+
 int IOEventSource::sleepGate(void *event, AbsoluteTime deadline, UInt32 type)
-        { return workLoop->sleepGate(event, deadline, type); }
-void IOEventSource::wakeupGate(void *event, bool oneThread)
-        { workLoop->wakeupGate(event, oneThread); }
+{ 
+	bool res; 
+	IOStatisticsOpenGate(); 
+	res = workLoop->sleepGate(event, deadline, type);
+	IOStatisticsCloseGate(); 
+	return res; 
+}
+  
+void IOEventSource::wakeupGate(void *event, bool oneThread) { workLoop->wakeupGate(event, oneThread); }
+
 
 bool IOEventSource::init(OSObject *inOwner,
                          Action inAction)
@@ -75,9 +147,28 @@ bool IOEventSource::init(OSObject *inOwner,
     (void) setAction(inAction);
     enabled = true;
 
+    if(!reserved) {
+        reserved = IONew(ExpansionData, 1);
+        if (!reserved) {
+            return false;
+        }
+    }
+
+    IOStatisticsRegisterCounter();
+		
     return true;
 }
 
+void IOEventSource::free( void )
+{
+    IOStatisticsUnregisterCounter();
+	
+    if (reserved)
+		IODelete(reserved, ExpansionData, 1);
+
+    super::free();
+}
+
 IOEventSource::Action IOEventSource::getAction () const { return action; };
 
 void IOEventSource::setAction(Action inAction)
diff --git a/iokit/Kernel/IOFilterInterruptEventSource.cpp b/iokit/Kernel/IOFilterInterruptEventSource.cpp
index f4f73e2b4..944e84ced 100644
--- a/iokit/Kernel/IOFilterInterruptEventSource.cpp
+++ b/iokit/Kernel/IOFilterInterruptEventSource.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,6 +32,25 @@
 #include <IOKit/IOTimeStamp.h>
 #include <IOKit/IOWorkLoop.h>
 
+#if IOKITSTATS
+
+#define IOStatisticsInitializeCounter() \
+do { \
+	IOStatistics::setCounterType(IOEventSource::reserved->counter, kIOStatisticsFilterInterruptEventSourceCounter); \
+} while (0)
+
+#define IOStatisticsInterrupt() \
+do { \
+	IOStatistics::countInterrupt(IOEventSource::reserved->counter); \
+} while (0)
+
+#else
+
+#define IOStatisticsInitializeCounter()
+#define IOStatisticsInterrupt()
+
+#endif /* IOKITSTATS */
+
 #define super IOInterruptEventSource
 
 OSDefineMetaClassAndStructors
@@ -79,6 +98,9 @@ IOFilterInterruptEventSource::init(OSObject *inOwner,
         return false;
 
     filterAction = inFilterAction;
+
+    IOStatisticsInitializeCounter();
+	
     return true;
 }
 
@@ -103,9 +125,10 @@ IOFilterInterruptEventSource *IOFilterInterruptEventSource
 void IOFilterInterruptEventSource::signalInterrupt()
 {
 	bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+    
+    IOStatisticsInterrupt();
     producerCount++;
-
+	
 	if (trace)
 	    IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
     
@@ -129,20 +152,20 @@ IOFilterInterruptEventSource::getFilterAction() const
 void IOFilterInterruptEventSource::normalInterruptOccurred
     (void */*refcon*/, IOService */*prov*/, int /*source*/)
 {
-    bool filterRes;
+    bool 	filterRes;
 	bool	trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+	
 	if (trace)
 		IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
 								 (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
+    
     // Call the filter.
     filterRes = (*filterAction)(owner, this);
 	
 	if (trace)
 		IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
 							   (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
+	
     if (filterRes)
         signalInterrupt();
 }
@@ -150,20 +173,20 @@ void IOFilterInterruptEventSource::normalInterruptOccurred
 void IOFilterInterruptEventSource::disableInterruptOccurred
     (void */*refcon*/, IOService *prov, int source)
 {
-    bool filterRes;
+    bool 	filterRes;
 	bool	trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+	
 	if (trace)
 		IOTimeStampStartConstant(IODBG_INTES(IOINTES_FILTER),
 								 (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
+    
     // Call the filter.
     filterRes = (*filterAction)(owner, this);
 	
 	if (trace)
 		IOTimeStampEndConstant(IODBG_INTES(IOINTES_FILTER),
 							   (uintptr_t) filterAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
+	
     if (filterRes) {
         prov->disableInterrupt(source);	/* disable the interrupt */
         signalInterrupt();
diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp
index bc180fb5b..a4d7dbb4d 100644
--- a/iokit/Kernel/IOHibernateIO.cpp
+++ b/iokit/Kernel/IOHibernateIO.cpp
@@ -151,6 +151,7 @@ to restrict I/O ops.
 #include <IOKit/pwr_mgt/IOPowerConnection.h>
 #include "IOPMPowerStateQueue.h"
 #include <IOKit/IOBufferMemoryDescriptor.h>
+#include <IOKit/AppleKeyStoreInterface.h>
 #include <crypto/aes.h>
 
 #include <sys/uio.h>
@@ -158,13 +159,20 @@ to restrict I/O ops.
 #include <sys/stat.h>
 #include <sys/fcntl.h>                       // (FWRITE, ...)
 #include <sys/sysctl.h>
+#include <sys/kdebug.h>
 
 #include <IOKit/IOHibernatePrivate.h>
 #include <IOKit/IOPolledInterface.h>
 #include <IOKit/IONVRAM.h>
 #include "IOHibernateInternal.h"
-#include "WKdm.h"
+#include <libkern/WKdm.h>
 #include "IOKitKernelInternal.h"
+#include <pexpert/device_tree.h>
+
+#include <machine/pal_routines.h>
+#include <machine/pal_hibernate.h>
+
+extern "C" addr64_t		kvtophys(vm_offset_t va);
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -192,6 +200,8 @@ static IOPolledFileIOVars	          gFileVars;
 static IOHibernateVars			  gIOHibernateVars;
 static struct kern_direct_file_io_ref_t * gIOHibernateFileRef;
 static hibernate_cryptvars_t 		  gIOHibernateCryptWakeContext;
+static hibernate_graphics_t  		  _hibernateGraphics;
+static hibernate_graphics_t * 		  gIOHibernateGraphicsInfo = &_hibernateGraphics;
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -540,19 +550,59 @@ file_extent_callback(void * ref, uint64_t start, uint64_t length)
     ctx->size += length;
 }
 
+static IOService * 
+IOCopyMediaForDev(dev_t device)
+{
+    OSDictionary * matching;
+    OSNumber *     num;
+    OSIterator *   iter;
+    IOService *    result = 0;
+
+    matching = IOService::serviceMatching("IOMedia");
+    if (!matching)
+        return (0);
+    do
+    {
+        num = OSNumber::withNumber(major(device), 32);
+        if (!num)
+            break;
+        matching->setObject(kIOBSDMajorKey, num);
+        num->release();
+        num = OSNumber::withNumber(minor(device), 32);
+        if (!num)
+            break;
+        matching->setObject(kIOBSDMinorKey, num);
+        num->release();
+        if (!num)
+            break;
+        iter = IOService::getMatchingServices(matching);
+        if (iter)
+        {
+            result = (IOService *) iter->getNextObject();
+            result->retain();
+            iter->release();
+        }
+    }
+    while (false);
+    matching->release();
+
+    return (result);
+}
+
 IOReturn
 IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
 			    IOPolledFileIOVars ** fileVars, OSData ** fileExtents,
-			    OSData ** imagePath)
+			    OSData ** imagePath, uint8_t * volumeCryptKey)
 {
     IOReturn			err = kIOReturnError;
     IOPolledFileIOVars *	vars;
     _OpenFileContext		ctx;
     OSData *			extentsData;
     OSNumber *			num;
-    IORegistryEntry *		part = 0;
-    OSDictionary *		matching;
-    OSIterator *		iter;
+    IOService *                 part = 0;
+    OSString *                  keyUUID = 0;
+    OSString *                  keyStoreUUID = 0;
+    dev_t 			block_dev;
     dev_t 			hibernate_image_dev;
     uint64_t			maxiobytes;
 
@@ -575,10 +625,13 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
 	ctx.size    = 0;
 	vars->fileRef = kern_open_file_for_direct_io(filename, 
 						    &file_extent_callback, &ctx, 
+						    &block_dev,
 						    &hibernate_image_dev,
                                                     &vars->block0,
                                                     &maxiobytes,
-                                                    &vars->solid_state);
+                                                    &vars->flags, 
+                                                    0, (caddr_t) gIOHibernateCurrentHeader, 
+                                                    sizeof(IOHibernateImageHeader));
 	if (!vars->fileRef)
 	{
 	    err = kIOReturnNoSpace;
@@ -587,10 +640,10 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
 	gIOHibernateFileRef = vars->fileRef;
 
         if (kIOHibernateModeSSDInvert & gIOHibernateMode)
-            vars->solid_state = vars->solid_state ? false : true;
+            vars->flags ^= kIOHibernateOptionSSD;
 
 	HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx ssd %d\n", filename, ctx.size, 
-                    vars->block0, maxiobytes, vars->solid_state);
+                    vars->block0, maxiobytes, kIOHibernateOptionSSD & vars->flags);
 	if (ctx.size < 1*1024*1024)		// check against image size estimate!
 	{
 	    err = kIOReturnNoSpace;
@@ -601,41 +654,52 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
             vars->bufferSize = maxiobytes;
     
 	vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy();
-    
-	matching = IOService::serviceMatching("IOMedia");
-	num = OSNumber::withNumber(major(hibernate_image_dev), 32);
-	matching->setObject(kIOBSDMajorKey, num);
-	num->release();
-	num = OSNumber::withNumber(minor(hibernate_image_dev), 32);
-	matching->setObject(kIOBSDMinorKey, num);
-	num->release();
-	iter = IOService::getMatchingServices(matching);
-	matching->release();
-	if (iter)
-	{
-	    part = (IORegistryEntry *) iter->getNextObject();
-	    part->retain();
-	    iter->release();
-	}
-    	if (!part)
-	    break;
 
-	int minor, major;
+        part = IOCopyMediaForDev(block_dev);
+        if (!part)
+            break;
+
+        err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, 
+        				  (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL);
+        if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID)
+        {
+//            IOLog("got volume key %s\n", keyStoreUUID->getCStringNoCopy());
+            uuid_t                  volumeKeyUUID;
+            aks_volume_key_t        vek;
+            static IOService *      sKeyStore;
+            static const OSSymbol * sAKSGetKey;
+
+            if (!sAKSGetKey)
+                sAKSGetKey = OSSymbol::withCStringNoCopy(AKS_PLATFORM_FUNCTION_GETKEY);
+            if (!sKeyStore)
+                sKeyStore = (IOService *) IORegistryEntry::fromPath(AKS_SERVICE_PATH, gIOServicePlane);
+            if (sKeyStore)
+                err = uuid_parse(keyStoreUUID->getCStringNoCopy(), volumeKeyUUID);
+            else
+                err = kIOReturnNoResources;
+            if (kIOReturnSuccess == err)    
+                err = sKeyStore->callPlatformFunction(sAKSGetKey, true, volumeKeyUUID, &vek, NULL, NULL);
+            if (kIOReturnSuccess != err)    
+                IOLog("volume key err 0x%x\n", err);
+            else
+            {
+                size_t bytes = (kIOHibernateAESKeySize / 8);
+                if (vek.key.keybytecount < bytes)
+                     bytes = vek.key.keybytecount;
+                bcopy(&vek.key.keybytes[0], volumeCryptKey, bytes);
+            }
+            bzero(&vek, sizeof(vek));
+        }
+        part->release();
+
+        part = IOCopyMediaForDev(hibernate_image_dev);
+        if (!part)
+            break;
+
 	IORegistryEntry * next;
 	IORegistryEntry * child;
 	OSData * data;
 
-	num = (OSNumber *) part->getProperty(kIOBSDMajorKey);
-	if (!num)
-	    break;
-	major = num->unsigned32BitValue();
-	num = (OSNumber *) part->getProperty(kIOBSDMinorKey);
-	if (!num)
-	    break;
-	minor = num->unsigned32BitValue();
-
-	hibernate_image_dev = makedev(major, minor);
-
         vars->pollers = OSArray::withCapacity(4);
 	if (!vars->pollers)
 	    break;
@@ -663,7 +727,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
                 && child->isParent(next, gIOServicePlane, true));
 
 	HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n",
-		    major, minor, (long)vars->blockSize, vars->pollers->getCount());
+		    major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, vars->pollers->getCount());
 	if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded)
 	    continue;
 
@@ -682,18 +746,22 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
 
 	if ((extentsData->getLength() >= sizeof(IOPolledFileExtent)))
 	{
-	    char str2[24];
+	    char str2[24 + sizeof(uuid_string_t) + 2];
 
 #if defined(__i386__) || defined(__x86_64__)
 	    if (!gIOCreateEFIDevicePathSymbol)
 		gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath");
 
-	    snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start);
+            if (keyUUID)
+                snprintf(str2, sizeof(str2), "%qx:%s", 
+                                vars->extentMap[0].start, keyUUID->getCStringNoCopy());
+            else
+                snprintf(str2, sizeof(str2), "%qx", vars->extentMap[0].start);
 
 	    err = IOService::getPlatform()->callPlatformFunction(
 						gIOCreateEFIDevicePathSymbol, false,
-						(void *) part, (void *) str2, (void *) true,
-						(void *) &data);
+						(void *) part, (void *) str2,
+						(void *) (uintptr_t) true, (void *) &data);
 #else
 	    char str1[256];
 	    int len = sizeof(str1);
@@ -724,7 +792,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
         HIBLOG("error 0x%x opening hibernation file\n", err);
 	if (vars->fileRef)
 	{
-	    kern_close_file_for_direct_io(vars->fileRef);
+	    kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0);
 	    gIOHibernateFileRef = vars->fileRef = NULL;
 	}
     }
@@ -827,6 +895,8 @@ IOPolledFileWrite(IOPolledFileIOVars * vars,
                 && (vars->position > vars->encryptStart)
                 && ((vars->position - length) < vars->encryptEnd))
             {
+                AbsoluteTime startTime, endTime;
+
                 uint32_t encryptLen, encryptStart;
                 encryptLen = vars->position - vars->encryptStart;
                 if (encryptLen > length)
@@ -835,12 +905,20 @@ IOPolledFileWrite(IOPolledFileIOVars * vars,
                 if (vars->position > vars->encryptEnd)
                     encryptLen -= (vars->position - vars->encryptEnd);
 
+                clock_get_uptime(&startTime);
+
                 // encrypt the buffer
                 aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart,
                                 &cryptvars->aes_iv[0],
                                 encryptLen / AES_BLOCK_SIZE,
                                 vars->buffer + vars->bufferHalf + encryptStart,
                                 &cryptvars->ctx.encrypt);
+    
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
+                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
+                vars->cryptBytes += encryptLen;
+
                 // save initial vector for following encrypts
                 bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE,
                         &cryptvars->aes_iv[0],
@@ -916,7 +994,7 @@ IOPolledFileRead(IOPolledFileIOVars * vars,
 	vars->bufferOffset += copy;
 //	vars->position += copy;
 
-	if (vars->bufferOffset == vars->bufferLimit)
+	if ((vars->bufferOffset == vars->bufferLimit) && (vars->position < vars->readEnd))
 	{
 	    if (vars->io)
             {
@@ -929,9 +1007,9 @@ IOPolledFileRead(IOPolledFileIOVars * vars,
 
 if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
 
-	    vars->position += vars->lastRead;
+	    vars->position        += vars->lastRead;
 	    vars->extentRemaining -= vars->lastRead;
-	    vars->bufferLimit = vars->lastRead;
+	    vars->bufferLimit      = vars->lastRead;
 
 	    if (!vars->extentRemaining)
 	    {
@@ -953,14 +1031,18 @@ if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n",
 		length = vars->extentRemaining;
 	    else
 		length = vars->bufferSize;
-	    vars->lastRead = length;
+	    if ((length + vars->position) > vars->readEnd)
+	    	length = vars->readEnd - vars->position;
 
+	    vars->lastRead = length;
+	    if (length)
+	    {
 //if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length);
-
-	    err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length);
-            if (kIOReturnSuccess != err)
-                break;
-	    vars->io = true;
+		err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length);
+		if (kIOReturnSuccess != err)
+		    break;
+		vars->io = true;
+	    }
 
 	    vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
 	    vars->bufferOffset = 0;
@@ -969,16 +1051,26 @@ if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n",
             if (cryptvars)
             {
                 uint8_t thisVector[AES_BLOCK_SIZE];
+                AbsoluteTime startTime, endTime;
+
                 // save initial vector for following decrypts
                 bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE);
                 bcopy(vars->buffer + vars->bufferHalf + lastReadLength - AES_BLOCK_SIZE, 
                         &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
+
                 // decrypt the buffer
+                clock_get_uptime(&startTime);
+
                 aes_decrypt_cbc(vars->buffer + vars->bufferHalf,
                                 &thisVector[0],
                                 lastReadLength / AES_BLOCK_SIZE,
                                 vars->buffer + vars->bufferHalf,
                                 &cryptvars->ctx.decrypt);
+
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&vars->cryptTime, &endTime);
+                SUB_ABSOLUTETIME(&vars->cryptTime, &startTime);
+                vars->cryptBytes += lastReadLength;
             }
 #endif /* CRYPTO */
 	}
@@ -1013,10 +1105,12 @@ IOHibernateSystemSleep(void)
 
     if (IOService::getPMRootDomain()->getHibernateSettings(
         &gIOHibernateMode, &gIOHibernateFreeRatio, &gIOHibernateFreeTime))
+    {
         if (kIOHibernateModeSleep & gIOHibernateMode)
             // default to discard clean for safe sleep
             gIOHibernateMode ^= (kIOHibernateModeDiscardCleanInactive 
                                 | kIOHibernateModeDiscardCleanActive);
+    }
 
     if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey)))
     {
@@ -1039,40 +1133,48 @@ IOHibernateSystemSleep(void)
         vars->ioBuffer  = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 
 				    2 * kDefaultIOSize, page_size);
 
-        if (!vars->srcBuffer || !vars->ioBuffer)
+	vars->handoffBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 
+				    ptoa_64(gIOHibernateHandoffPageCount), page_size);
+
+        if (!vars->srcBuffer || !vars->ioBuffer || !vars->handoffBuffer)
         {
             err = kIOReturnNoMemory;
             break;
         }
 
+	// open & invalidate the image file
+	gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
         err = IOPolledFileOpen(gIOHibernateFilename, vars->ioBuffer,
-                                &vars->fileVars, &vars->fileExtents, &data);
+                                &vars->fileVars, &vars->fileExtents, &data, 
+                                &vars->volumeCryptKey[0]);
         if (KERN_SUCCESS != err)
         {
 	    HIBLOG("IOPolledFileOpen(%x)\n", err);
             break;
         }
-	if (vars->fileVars->fileRef)
-	{
-	    // invalidate the image file
-	    gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
-	    int err = kern_write_file(vars->fileVars->fileRef, 0,
-					(caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
-            if (KERN_SUCCESS != err)
-                HIBLOG("kern_write_file(%d)\n", err);
-	}
 
 	bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
 	gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags;
-
-        dsSSD = (vars->fileVars->solid_state
+        dsSSD = ((0 != (kIOHibernateOptionSSD & vars->fileVars->flags))
                 && (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)));
-
         if (dsSSD)
         {
             gIOHibernateCurrentHeader->options |= 
                                                 kIOHibernateOptionSSD
                                               | kIOHibernateOptionColor;
+
+#if defined(__i386__) || defined(__x86_64__)
+            if (!uuid_is_null(vars->volumeCryptKey) &&
+                  (kOSBooleanTrue != IOService::getPMRootDomain()->getProperty(kIOPMDestroyFVKeyOnStandbyKey)))
+            {
+                uintptr_t smcVars[2];
+                smcVars[0] = sizeof(vars->volumeCryptKey);
+                smcVars[1] = (uintptr_t)(void *) &vars->volumeCryptKey[0];
+
+                IOService::getPMRootDomain()->setProperty(kIOHibernateSMCVariablesKey, smcVars, sizeof(smcVars));
+                bzero(smcVars, sizeof(smcVars));
+            }
+#endif
         }
         else
         {
@@ -1087,7 +1189,7 @@ IOHibernateSystemSleep(void)
         err = hibernate_setup(gIOHibernateCurrentHeader, 
                                 gIOHibernateFreeRatio, gIOHibernateFreeTime,
                                 dsSSD,
-                                &vars->page_list, &vars->page_list_wired, &encryptedswap);
+                                &vars->page_list, &vars->page_list_wired, &vars->page_list_pal, &encryptedswap);
         clock_get_uptime(&endTime);
         SUB_ABSOLUTETIME(&endTime, &startTime);
         absolutetime_to_nanoseconds(endTime, &nsec);
@@ -1096,7 +1198,7 @@ IOHibernateSystemSleep(void)
         if (KERN_SUCCESS != err)
             break;
 
-        if (encryptedswap)
+        if (encryptedswap || !uuid_is_null(vars->volumeCryptKey))
             gIOHibernateMode ^= kIOHibernateModeEncrypt; 
 
         if (kIOHibernateOptionProgress & gIOHibernateCurrentHeader->options)
@@ -1137,45 +1239,6 @@ IOHibernateSystemSleep(void)
             }
             data->release();
 
-#if defined(__ppc__)
-            size_t	      len;
-            char              valueString[16];
-
-	    vars->saveBootDevice = gIOOptionsEntry->copyProperty(kIOSelectedBootDeviceKey);
-            if (gIOChosenEntry)
-            {
-		OSData * bootDevice = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOBootPathKey));
-		if (bootDevice)
-		{
-		    sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey);
-		    OSString * str2 = OSString::withCStringNoCopy((const char *) bootDevice->getBytesNoCopy());
-		    if (sym && str2)
-			gIOOptionsEntry->setProperty(sym, str2);
-		    if (sym)
-			sym->release();
-		    if (str2)
-			str2->release();
-		}
-                data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMemorySignatureKey));
-                if (data)
-                {
-                    vars->haveFastBoot = true;
-
-                    len = snprintf(valueString, sizeof(valueString), "0x%lx", *((UInt32 *)data->getBytesNoCopy()));
-                    data = OSData::withBytes(valueString, len + 1);
-                    sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey);
-                    if (sym && data)
-                        gIOOptionsEntry->setProperty(sym, data);
-                    if (sym)
-                        sym->release();
-                    if (data)
-                        data->release();
-                }
-                data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey));
-                if (data)
-                    gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy());
-            }
-#endif /* __ppc__ */
 #if defined(__i386__) || defined(__x86_64__)
 	    struct AppleRTCHibernateVars
 	    {
@@ -1529,6 +1592,38 @@ IOHibernateSystemHasSlept(void)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+static DeviceTreeNode *
+MergeDeviceTree(DeviceTreeNode * entry, IORegistryEntry * regEntry)
+{
+    DeviceTreeNodeProperty * prop;
+    DeviceTreeNode *         child;
+    IORegistryEntry *        childRegEntry;
+    const char *             nameProp;
+    unsigned int             propLen, idx;
+
+    prop = (DeviceTreeNodeProperty *) (entry + 1);
+    for (idx = 0; idx < entry->nProperties; idx++)
+    {
+	if (regEntry && (0 != strcmp("name", prop->name)))
+	{
+	    regEntry->setProperty((const char *) prop->name, (void *) (prop + 1), prop->length);
+//	    HIBPRINT("%s: %s, %d\n", regEntry->getName(), prop->name, prop->length);
+	}
+	prop = (DeviceTreeNodeProperty *) (((uintptr_t)(prop + 1)) + ((prop->length + 3) & ~3));
+    }
+
+    child = (DeviceTreeNode *) prop;
+    for (idx = 0; idx < entry->nChildren; idx++)
+    {
+	if (kSuccess != DTGetProperty(child, "name", (void **) &nameProp, &propLen))
+	    panic("no name");
+	childRegEntry = regEntry ? regEntry->childFromPath(nameProp, gIODTPlane) : NULL;
+//	HIBPRINT("%s == %p\n", nameProp, childRegEntry);
+	child = MergeDeviceTree(child, childRegEntry);
+    }
+    return (child);
+}
+
 IOReturn
 IOHibernateSystemWake(void)
 {
@@ -1582,64 +1677,9 @@ IOHibernateSystemWake(void)
 
     // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched
 
-#ifdef __ppc__
-    OSData * data = OSData::withCapacity(4);
-    if (gIOOptionsEntry && data)
-    {
-        const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
-        if (sym)
-        {
-            gIOOptionsEntry->setProperty(sym, data);
-            sym->release();
-        }
-        sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey);
-        if (sym)
-        {
-	    if (vars->saveBootDevice)
-	    {
-		gIOOptionsEntry->setProperty(sym, vars->saveBootDevice);
-		vars->saveBootDevice->release();
-	    }
-            sym->release();
-        }
-        sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey);
-        if (sym)
-        {
-            gIOOptionsEntry->setProperty(sym, data);
-            sym->release();
-        }
-        sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey);
-        if (sym)
-        {
-            gIOOptionsEntry->removeProperty(sym);
-            sym->release();
-        }
-    }
-    if (data)
-        data->release();
-
-    if (gIOOptionsEntry)
-    {
-	if (!vars->haveFastBoot)
-	{
-	    // reset boot audio volume
-	    IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform());
-	    if (platform)
-		platform->writeXPRAM(kXPRamAudioVolume, 
-					&vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume));
-	}
-
-	// sync now to hardware if the booter has not
-	if (kIOHibernateStateInactive == gIOHibernateState)
-	    gIOOptionsEntry->sync();
-	else
-	    // just sync the variables in case a later panic syncs nvram (it won't sync variables)
-	    gIOOptionsEntry->syncOFVariables();
-    }
-#endif
-
 #if defined(__i386__) || defined(__x86_64__)
 	IOService::getPMRootDomain()->removeProperty(gIOHibernateRTCVariablesKey);
+	IOService::getPMRootDomain()->removeProperty(kIOHibernateSMCVariablesKey);
 
 	/*
 	 * Hibernate variable is written to NVRAM on platforms in which RtcRam
@@ -1672,6 +1712,47 @@ IOHibernateSystemWake(void)
 	vars->srcBuffer->release();
     if (vars->ioBuffer)
 	vars->ioBuffer->release();
+    bzero(&gIOHibernateHandoffPages[0], gIOHibernateHandoffPageCount * sizeof(gIOHibernateHandoffPages[0]));
+    if (vars->handoffBuffer)
+    {
+	IOHibernateHandoff * handoff;
+	bool done = false;
+	for (handoff = (IOHibernateHandoff *) vars->handoffBuffer->getBytesNoCopy();
+	     !done;
+	     handoff = (IOHibernateHandoff *) &handoff->data[handoff->bytecount])
+	{
+//	    HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount);
+	    uint8_t * data = &handoff->data[0];
+	    switch (handoff->type)
+	    {
+		case kIOHibernateHandoffTypeEnd:
+		    done = true;
+		    break;
+
+	    case kIOHibernateHandoffTypeDeviceTree:
+		    MergeDeviceTree((DeviceTreeNode *) data, IOService::getServiceRoot());
+		    break;
+    
+		case kIOHibernateHandoffTypeKeyStore:
+#if defined(__i386__) || defined(__x86_64__)
+		    {
+			IOBufferMemoryDescriptor *
+			md = IOBufferMemoryDescriptor::withBytes(data, handoff->bytecount, kIODirectionOutIn);
+			if (md)
+			{
+			    IOSetKeyStoreData(md);
+			}
+		    }
+#endif
+		    break;
+    
+		default:
+		    done = (kIOHibernateHandoffType != (handoff->type & 0xFFFF0000));
+		    break;
+	    }    
+	}
+	vars->handoffBuffer->release();
+    }
     if (vars->fileExtents)
 	vars->fileExtents->release();
 
@@ -1687,14 +1768,11 @@ IOHibernateSystemPostWake(void)
 {
     if (gIOHibernateFileRef)
     {
-	// invalidate the image file
+	// invalidate & close the image file
 	gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
-	int err = kern_write_file(gIOHibernateFileRef, 0,
-				    (caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
-	if (KERN_SUCCESS != err)
-	    HIBLOG("kern_write_file(%d)\n", err);
-
-	kern_close_file_for_direct_io(gIOHibernateFileRef);
+	kern_close_file_for_direct_io(gIOHibernateFileRef,
+				       0, (caddr_t) gIOHibernateCurrentHeader, 
+				       sizeof(IOHibernateImageHeader));
         gIOHibernateFileRef = 0;
     }
     return (kIOReturnSuccess);
@@ -1703,13 +1781,13 @@ IOHibernateSystemPostWake(void)
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 SYSCTL_STRING(_kern, OID_AUTO, hibernatefile, 
-		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		gIOHibernateFilename, sizeof(gIOHibernateFilename), "");
 SYSCTL_STRING(_kern, OID_AUTO, bootsignature, 
-		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		gIOHibernateBootSignature, sizeof(gIOHibernateBootSignature), "");
 SYSCTL_UINT(_kern, OID_AUTO, hibernatemode, 
-		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+		CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 		&gIOHibernateMode, 0, "");
 
 void
@@ -1738,10 +1816,6 @@ IOHibernateSystemInit(IOPMrootDomain * rootDomain)
 static void
 hibernate_setup_for_wake(void)
 {
-#if __ppc__
-    // go slow (state needed for wake)
-    ml_set_processor_speed(1);
-#endif
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -1762,6 +1836,34 @@ uint32_t	wired_pages_encrypted = 0;
 uint32_t	dirty_pages_encrypted = 0;
 uint32_t	wired_pages_clear = 0;
 
+static void
+hibernate_pal_callback(void *vars_arg, vm_offset_t addr)
+{
+	IOHibernateVars *vars = (IOHibernateVars *)vars_arg;
+	/* Make sure it's not in either of the save lists */
+	hibernate_set_page_state(vars->page_list, vars->page_list_wired, atop_64(addr), 1, kIOHibernatePageStateFree);
+
+	/* Set it in the bitmap of pages owned by the PAL */
+	hibernate_page_bitset(vars->page_list_pal, TRUE, atop_64(addr));
+}
+
+static struct hibernate_cryptvars_t *local_cryptvars;
+
+extern "C" int
+hibernate_pal_write(void *buffer, size_t size)
+{
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+	IOReturn err = IOPolledFileWrite(vars->fileVars, (const uint8_t *)buffer, size, local_cryptvars);
+	if (kIOReturnSuccess != err) {
+		kprintf("epic hibernate fail! %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+
 extern "C" uint32_t
 hibernate_write_image(void)
 {
@@ -1786,9 +1888,11 @@ hibernate_write_image(void)
     uint32_t	 tag;
     uint32_t	 pageType;
     uint32_t	 pageAndCount[2];
+    addr64_t     phys64;
+    IOByteCount  segLen;
 
     AbsoluteTime startTime, endTime;
-    AbsoluteTime allTime, compTime, decoTime;
+    AbsoluteTime allTime, compTime;
     uint64_t     compBytes;
     uint64_t     nsec;
     uint32_t     lastProgressStamp = 0;
@@ -1809,9 +1913,12 @@ hibernate_write_image(void)
 	kdebug_enable = save_kdebug_enable;
 
     KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 1) | DBG_FUNC_START, 0, 0, 0, 0, 0);
+    IOService::getPMRootDomain()->tracePoint(kIOPMTracePointHibernate);
 
     restore1Sum = sum1 = sum2 = 0;
 
+    hibernate_pal_prepare();
+
 #if CRYPTO
     // encryption data. "iv" is the "initial vector".
     if (kIOHibernateModeEncrypt & gIOHibernateMode)
@@ -1831,6 +1938,9 @@ hibernate_write_image(void)
 
         cryptvars = &_cryptvars;
         bzero(cryptvars, sizeof(hibernate_cryptvars_t));
+        for (pageCount = 0; pageCount < sizeof(vars->wiredCryptKey); pageCount++)
+            vars->wiredCryptKey[pageCount] ^= vars->volumeCryptKey[pageCount];
+        bzero(&vars->volumeCryptKey[0], sizeof(vars->volumeCryptKey));
         aes_encrypt_key(vars->wiredCryptKey,
                         kIOHibernateAESKeySize,
                         &cryptvars->ctx.encrypt);
@@ -1838,7 +1948,8 @@ hibernate_write_image(void)
         bcopy(&first_iv[0], &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
         bzero(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey));
         bzero(&vars->cryptKey[0], sizeof(vars->cryptKey));
-        bzero(gIOHibernateCryptWakeVars, sizeof(hibernate_cryptwakevars_t));
+
+        local_cryptvars = cryptvars;
     }
 #endif /* CRYPTO */
 
@@ -1846,6 +1957,7 @@ hibernate_write_image(void)
 
     hibernate_page_list_setall(vars->page_list,
                                vars->page_list_wired,
+							   vars->page_list_pal,
                                &pageCount);
 
     HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount);
@@ -1863,9 +1975,8 @@ hibernate_write_image(void)
 #endif
 
     needEncrypt = (0 != (kIOHibernateModeEncrypt & gIOHibernateMode));
-
     AbsoluteTime_to_scalar(&compTime) = 0;
-    AbsoluteTime_to_scalar(&decoTime) = 0;
+    compBytes = 0;
 
     clock_get_uptime(&allTime);
     IOService::getPMRootDomain()->pmStatsRecordEvent( 
@@ -1901,18 +2012,26 @@ hibernate_write_image(void)
         uintptr_t hibernateBase;
         uintptr_t hibernateEnd;
 
-#if defined(__i386__) || defined(__x86_64__)
-        hibernateBase = sectINITPTB;
-#else
-        hibernateBase = sectHIBB;
-#endif
+        hibernateBase = HIB_BASE; /* Defined in PAL headers */
 
         hibernateEnd = (sectHIBB + sectSizeHIB);
+
         // copy out restore1 code
-    
-        page = atop_32(hibernateBase);
-        count = atop_32(round_page(hibernateEnd)) - page;
-        header->restore1CodePage = page;
+
+        for (count = 0;
+            (phys64 = vars->handoffBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone));
+            count += segLen)
+        {
+	    for (pagesDone = 0; pagesDone < atop_32(segLen); pagesDone++)
+	    {
+	    	gIOHibernateHandoffPages[atop_32(count) + pagesDone] = atop_64(phys64) + pagesDone;
+	    }
+        }
+
+        page = atop_32(kvtophys(hibernateBase));
+        count = atop_32(round_page(hibernateEnd) - hibernateBase);
+        header->restore1CodePhysPage = page;
+        header->restore1CodeVirt = hibernateBase;
         header->restore1PageCount = count;
         header->restore1CodeOffset = ((uintptr_t) &hibernate_machine_entrypoint)      - hibernateBase;
         header->restore1StackOffset = ((uintptr_t) &gIOHibernateRestoreStackEnd[0]) - 64 - hibernateBase;
@@ -1922,7 +2041,7 @@ hibernate_write_image(void)
         for (page = 0; page < count; page++)
         {
             if ((src < &gIOHibernateRestoreStack[0]) || (src >= &gIOHibernateRestoreStackEnd[0]))
-                restore1Sum += hibernate_sum_page(src, header->restore1CodePage + page);
+                restore1Sum += hibernate_sum_page(src, header->restore1CodeVirt + page);
             else
                 restore1Sum += 0x00000000;
             src += page_size;
@@ -1956,9 +2075,6 @@ hibernate_write_image(void)
 
         // write the preview buffer
 
-        addr64_t phys64;
-        IOByteCount segLen;
-
         if (vars->previewBuffer)
         {
             ppnum = 0;
@@ -2031,8 +2147,9 @@ hibernate_write_image(void)
 
 	hibernate_page_list_set_volatile(vars->page_list, vars->page_list_wired, &pageCount);
     
-        page = atop_32(hibernateBase);
-        count = atop_32(round_page(hibernateEnd)) - page;
+
+        page = atop_32(KERNEL_IMAGE_TO_PHYS(hibernateBase));
+        count = atop_32(round_page(KERNEL_IMAGE_TO_PHYS(hibernateEnd))) - page;
         hibernate_set_page_state(vars->page_list, vars->page_list_wired,
                                         page, count,
                                         kIOHibernatePageStateFree);
@@ -2048,10 +2165,22 @@ hibernate_write_image(void)
             pageCount -= atop_32(segLen);
         }
 
+        for (count = 0;
+            (phys64 = vars->handoffBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone));
+            count += segLen)
+        {
+            hibernate_set_page_state(vars->page_list, vars->page_list_wired, 
+                                        atop_64(phys64), atop_32(segLen),
+                                        kIOHibernatePageStateFree);
+            pageCount -= atop_32(segLen);
+        }
+
+		(void)hibernate_pal_callback;
+
         src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();
     
-        pagesDone = 0;
-        lastBlob = 0;
+        pagesDone  = 0;
+        lastBlob   = 0;
     
         HIBLOG("writing %d pages\n", pageCount);
 
@@ -2254,9 +2383,9 @@ hibernate_write_image(void)
         else
             header->fileExtentMapSize = sizeof(header->fileExtentMap);
         bcopy(&fileExtents[0], &header->fileExtentMap[0], count);
-    
-        header->deviceBase = vars->fileVars->block0;
 
+        header->deviceBase = vars->fileVars->block0;
+    
         IOPolledFileSeek(vars->fileVars, 0);
         err = IOPolledFileWrite(vars->fileVars,
                                     (uint8_t *) header, sizeof(IOHibernateImageHeader), 
@@ -2283,12 +2412,16 @@ hibernate_write_image(void)
 		nsec / 1000000ULL);
 
     absolutetime_to_nanoseconds(compTime, &nsec);
-    HIBLOG("comp time: %qd ms, ", 
-		nsec / 1000000ULL);
+    HIBLOG("comp bytes: %qd time: %qd ms %qd Mb/s, ", 
+		compBytes, 
+		nsec / 1000000ULL,
+		nsec ? (((compBytes * 1000000000ULL) / 1024 / 1024) / nsec) : 0);
 
-    absolutetime_to_nanoseconds(decoTime, &nsec);
-    HIBLOG("deco time: %qd ms, ", 
-		nsec / 1000000ULL);
+    absolutetime_to_nanoseconds(vars->fileVars->cryptTime, &nsec);
+    HIBLOG("crypt bytes: %qd time: %qd ms %qd Mb/s, ", 
+		vars->fileVars->cryptBytes, 
+		nsec / 1000000ULL, 
+		nsec ? (((vars->fileVars->cryptBytes * 1000000000ULL) / 1024 / 1024) / nsec) : 0);
 
     HIBLOG("\nimage %qd, uncompressed %qd (%d), compressed %qd (%d%%), sum1 %x, sum2 %x\n", 
                header->imageSize,
@@ -2353,7 +2486,9 @@ hibernate_machine_init(void)
     uint32_t     sum;
     uint32_t     pagesDone;
     uint32_t     pagesRead = 0;
+    AbsoluteTime startTime, compTime;
     AbsoluteTime allTime, endTime;
+    uint64_t     compBytes;
     uint64_t     nsec;
     uint32_t     lastProgressStamp = 0;
     uint32_t     progressStamp;
@@ -2381,7 +2516,7 @@ hibernate_machine_init(void)
 
     HIBPRINT("diag %x %x %x %x\n",
 	    gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], 
-	    gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]); 
+	    gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]);
 
     HIBPRINT("video %x %d %d %d status %x\n",
 	    gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, 
@@ -2392,6 +2527,62 @@ hibernate_machine_init(void)
 
     boot_args *args = (boot_args *) PE_state.bootArgs;
 
+    cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0;
+
+    if (gIOHibernateCurrentHeader->handoffPageCount > gIOHibernateHandoffPageCount)
+    	panic("handoff overflow");
+
+    IOHibernateHandoff * handoff;
+    bool                 done           = false;
+    bool                 foundCryptData = false;
+
+    for (handoff = (IOHibernateHandoff *) vars->handoffBuffer->getBytesNoCopy();
+    	 !done;
+    	 handoff = (IOHibernateHandoff *) &handoff->data[handoff->bytecount])
+    {
+//	HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount);
+	uint8_t * data = &handoff->data[0];
+    	switch (handoff->type)
+    	{
+	    case kIOHibernateHandoffTypeEnd:
+	    	done = true;
+		break;
+
+	    case kIOHibernateHandoffTypeGraphicsInfo:
+		bcopy(data, gIOHibernateGraphicsInfo, sizeof(*gIOHibernateGraphicsInfo));
+		break;
+
+	    case kIOHibernateHandoffTypeCryptVars:
+		if (cryptvars)
+		{
+		    hibernate_cryptwakevars_t *
+		    wakevars = (hibernate_cryptwakevars_t *) &handoff->data[0];
+		    bcopy(&wakevars->aes_iv[0], &cryptvars->aes_iv[0], sizeof(cryptvars->aes_iv));
+		}
+		foundCryptData = true;
+		bzero(data, handoff->bytecount);
+		break;
+
+	    case kIOHibernateHandoffTypeMemoryMap:
+		hibernate_newruntime_map(data, handoff->bytecount, 
+					 gIOHibernateCurrentHeader->systemTableOffset);
+	    	break;
+
+	    case kIOHibernateHandoffTypeDeviceTree:
+		{
+//		    DTEntry chosen = NULL;
+//		    HIBPRINT("DTLookupEntry %d\n", DTLookupEntry((const DTEntry) data, "/chosen", &chosen));
+		}
+	    	break;
+
+	    default:
+	    	done = (kIOHibernateHandoffType != (handoff->type & 0xFFFF0000));
+	    	break;
+	}    
+    }
+    if (cryptvars && !foundCryptData)
+    	panic("hibernate handoff");
+
     if (vars->videoMapping 
 	&& gIOHibernateGraphicsInfo->physicalAddress
 	&& (args->Video.v_baseAddr == gIOHibernateGraphicsInfo->physicalAddress))
@@ -2404,21 +2595,11 @@ hibernate_machine_init(void)
     }
 
     uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();
-
-    if (gIOHibernateWakeMapSize)
-    {
-	err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(gIOHibernateWakeMap), 
-						    gIOHibernateWakeMapSize);
-	if (kIOReturnSuccess == err)
-	    hibernate_newruntime_map(src, gIOHibernateWakeMapSize, 
-				     gIOHibernateCurrentHeader->systemTableOffset);
-	gIOHibernateWakeMap = 0;
-	gIOHibernateWakeMapSize = 0;
-    }
-
     uint32_t decoOffset;
 
     clock_get_uptime(&allTime);
+    AbsoluteTime_to_scalar(&compTime) = 0;
+    compBytes = 0;
 
     HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled());
     err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0);
@@ -2439,23 +2620,17 @@ hibernate_machine_init(void)
         ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, 0, lastBlob);
     }
 
-    cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0;
-    if (kIOHibernateModeEncrypt & gIOHibernateMode)
-    {
-        cryptvars = &gIOHibernateCryptWakeContext;
-        bcopy(&gIOHibernateCryptWakeVars->aes_iv[0], 
-                &cryptvars->aes_iv[0], 
-                sizeof(cryptvars->aes_iv));
-    }
-
     // kick off the read ahead
     vars->fileVars->io	         = false;
     vars->fileVars->bufferHalf   = 0;
     vars->fileVars->bufferLimit  = 0;
     vars->fileVars->lastRead     = 0;
+    vars->fileVars->readEnd      = gIOHibernateCurrentHeader->imageSize;
     vars->fileVars->bufferOffset = vars->fileVars->bufferLimit;
+    vars->fileVars->cryptBytes   = 0;
+    AbsoluteTime_to_scalar(&vars->fileVars->cryptTime) = 0;
 
-    IOPolledFileRead(vars->fileVars, 0, 0, cryptvars);
+    err = IOPolledFileRead(vars->fileVars, 0, 0, cryptvars);
     vars->fileVars->bufferOffset = vars->fileVars->bufferLimit;
     // --
 
@@ -2464,7 +2639,7 @@ hibernate_machine_init(void)
     uint32_t * header = (uint32_t *) src;
     sum = 0;
 
-    do
+    while (kIOReturnSuccess == err)
     {
 	unsigned int count;
 	unsigned int page;
@@ -2510,7 +2685,14 @@ hibernate_machine_init(void)
 	    if (compressedSize < page_size)
 	    {
 		decoOffset = page_size;
+
+                clock_get_uptime(&startTime);
 		WKdm_decompress((WK_word*) src, (WK_word*) (src + decoOffset), PAGE_SIZE_IN_WORDS);
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&compTime, &endTime);
+                SUB_ABSOLUTETIME(&compTime, &startTime);
+
+                compBytes += page_size;
 	    }
 	    else
 		decoOffset = 0;
@@ -2554,7 +2736,8 @@ hibernate_machine_init(void)
 	    }
 	}
     }
-    while (true);
+    if (pagesDone == gIOHibernateCurrentHeader->actualUncompressedPages)
+	err = kIOReturnLockedRead;
 
     if (kIOReturnSuccess != err)
 	panic("Hibernate restore error %x", err);
@@ -2580,10 +2763,22 @@ hibernate_machine_init(void)
     SUB_ABSOLUTETIME(&endTime, &allTime);
     absolutetime_to_nanoseconds(endTime, &nsec);
 
-    HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms\n", 
+    HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms, ", 
 		pagesDone, sum, nsec / 1000000ULL);
- 
-   KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_NONE, pagesRead, pagesDone, 0, 0, 0);
+
+    absolutetime_to_nanoseconds(compTime, &nsec);
+    HIBLOG("comp bytes: %qd time: %qd ms %qd Mb/s, ", 
+		compBytes, 
+		nsec / 1000000ULL,
+		nsec ? (((compBytes * 1000000000ULL) / 1024 / 1024) / nsec) : 0);
+
+    absolutetime_to_nanoseconds(vars->fileVars->cryptTime, &nsec);
+    HIBLOG("crypt bytes: %qd time: %qd ms %qd Mb/s\n", 
+		vars->fileVars->cryptBytes, 
+		nsec / 1000000ULL, 
+		nsec ? (((vars->fileVars->cryptBytes * 1000000000ULL) / 1024 / 1024) / nsec) : 0);
+
+    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_NONE, pagesRead, pagesDone, 0, 0, 0);
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h
index 2b2e5802e..7e7e95fe6 100644
--- a/iokit/Kernel/IOHibernateInternal.h
+++ b/iokit/Kernel/IOHibernateInternal.h
@@ -36,8 +36,10 @@ struct IOHibernateVars
 {
     hibernate_page_list_t *		page_list;
     hibernate_page_list_t *		page_list_wired;
+    hibernate_page_list_t *		page_list_pal;
     class IOBufferMemoryDescriptor *    ioBuffer;
     class IOBufferMemoryDescriptor *    srcBuffer;
+    class IOBufferMemoryDescriptor *    handoffBuffer;
     class IOMemoryDescriptor *          previewBuffer;
     OSData *          			previewData;
     OSData *		 		fileExtents;
@@ -52,6 +54,7 @@ struct IOHibernateVars
     uint8_t				saveBootAudioVolume;
     uint8_t				wiredCryptKey[kIOHibernateAESKeySize / 8];
     uint8_t				cryptKey[kIOHibernateAESKeySize / 8];
+    uint8_t				volumeCryptKey[kIOHibernateAESKeySize / 8];
 };
 typedef struct IOHibernateVars IOHibernateVars;
 
@@ -68,12 +71,15 @@ struct IOPolledFileIOVars
     IOByteCount 			bufferHalf;
     IOByteCount				extentRemaining;
     IOByteCount				lastRead;
-    boolean_t                           solid_state;
+    IOByteCount				readEnd;
+    uint32_t                            flags;
     uint64_t				block0;
     uint64_t				position;
     uint64_t				extentPosition;
     uint64_t				encryptStart;
     uint64_t				encryptEnd;
+    uint64_t                            cryptBytes;
+    AbsoluteTime                        cryptTime;
     IOPolledFileExtent * 		extentMap;
     IOPolledFileExtent * 		currentExtent;
     bool				io;
@@ -103,6 +109,5 @@ extern unsigned long sectSizeDATA;
 extern vm_offset_t sectINITPTB;
 #endif
 
-extern vm_offset_t gIOHibernateWakeMap;	    // ppnum
-extern vm_size_t   gIOHibernateWakeMapSize;
-
+extern ppnum_t gIOHibernateHandoffPages[];
+extern uint32_t gIOHibernateHandoffPageCount;
diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c
index 280b8c430..7259ab3ec 100644
--- a/iokit/Kernel/IOHibernateRestoreKernel.c
+++ b/iokit/Kernel/IOHibernateRestoreKernel.c
@@ -35,9 +35,13 @@
 #include <crypto/aes.h>
 #include <libkern/libkern.h>
 
-#include "WKdm.h"
+#include <libkern/WKdm.h>
 #include "IOHibernateInternal.h"
 
+#if defined(__i386__) || defined(__x86_64__)
+#include <i386/pal_hibernate.h>
+#endif
+
 /*
 This code is linked into the kernel but part of the "__HIB" section, which means
 its used by code running in the special context of restoring the kernel text and data
@@ -52,14 +56,15 @@ uint32_t gIOHibernateDebugFlags;
 static IOHibernateImageHeader _hibernateHeader;
 IOHibernateImageHeader * gIOHibernateCurrentHeader = &_hibernateHeader;
 
-static hibernate_graphics_t _hibernateGraphics;
-hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics;
-
-static hibernate_cryptwakevars_t _cryptWakeVars;
-hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars = &_cryptWakeVars;
+ppnum_t gIOHibernateHandoffPages[64];
+uint32_t gIOHibernateHandoffPageCount = sizeof(gIOHibernateHandoffPages) 
+					/ sizeof(gIOHibernateHandoffPages[0]);
 
-vm_offset_t gIOHibernateWakeMap;    	    // ppnum
-vm_size_t   gIOHibernateWakeMapSize;
+#if CONFIG_DEBUG
+void hibprintf(const char *fmt, ...);
+#else
+#define hibprintf(x...)
+#endif
 
 
 #if CONFIG_SLEEP
@@ -148,7 +153,7 @@ static void uart_puthex(uint64_t num)
 	c = 0xf & (num >> bit);
 	if (c)
 	    leading = false;
-	else if (leading)
+	else if (leading && bit)
 	    continue;
 	if (c <= 9)
 	    c += '0';
@@ -333,7 +338,7 @@ hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t
     return (count);
 }
 
-static vm_offset_t
+static ppnum_t
 hibernate_page_list_grab(hibernate_page_list_t * list, uint32_t * pNextFree)
 {
     uint32_t		 nextFree = *pNextFree;
@@ -365,27 +370,19 @@ static uint32_t
 store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, 
 		uint32_t * buffer, uint32_t ppnum)
 {
-    uint64_t dst;
-    uint32_t sum;
-
-    dst = ptoa_64(ppnum);
-    if (ppnum < 0x00100000)
-	buffer = (uint32_t *) (uintptr_t) dst;
+	uint64_t dst = ptoa_64(ppnum);
 
-    if (compressedSize != PAGE_SIZE)
-    {
-	WKdm_decompress((WK_word*) src, (WK_word*) buffer, PAGE_SIZE >> 2);
-	src = buffer;
-    }
-
-    sum = hibernate_sum_page((uint8_t *) src, ppnum);
-
-    if (((uint64_t) (uintptr_t) src) == dst)
-	src = 0;
-
-    hibernate_restore_phys_page((uint64_t) (uintptr_t) src, dst, PAGE_SIZE, procFlags);
+	if (compressedSize != PAGE_SIZE)
+	{
+		dst = pal_hib_map(DEST_COPY_AREA, dst);
+		WKdm_decompress((WK_word*) src, (WK_word*)(uintptr_t)dst, PAGE_SIZE >> 2);
+	}
+	else
+	{
+		dst = hibernate_restore_phys_page((uint64_t) (uintptr_t) src, dst, PAGE_SIZE, procFlags);
+	}
 
-    return (sum);
+	return hibernate_sum_page((uint8_t *)(uintptr_t)dst, ppnum);
 }
 
 // used only for small struct copies
@@ -411,9 +408,10 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
 {
     uint32_t idx;
     uint32_t * src;
-    uint32_t * buffer;
+    uint32_t * imageReadPos;
     uint32_t * pageIndexSource;
     hibernate_page_list_t * map;
+    uint32_t stage;
     uint32_t count;
     uint32_t ppnum;
     uint32_t page;
@@ -424,10 +422,13 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
     uint32_t * copyPageList;
     uint32_t copyPageIndex;
     uint32_t sum;
+    uint32_t pageSum;
     uint32_t nextFree;
     uint32_t lastImagePage;
     uint32_t lastMapPage;
     uint32_t lastPageIndexPage;
+    uint32_t handoffPages;
+    uint32_t handoffPageCount;
 
     C_ASSERT(sizeof(IOHibernateImageHeader) == 512);
 
@@ -440,84 +441,43 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
                 gIOHibernateCurrentHeader, 
                 sizeof(IOHibernateImageHeader));
 
-    if (!p2)
-    {
-	count = header->graphicsInfoOffset;
-	if (count)
-	    p2 = (void *)(((uintptr_t) header) - count);
-    }
-    if (p2) 
-        bcopy_internal(p2, 
-                gIOHibernateGraphicsInfo, 
-                sizeof(hibernate_graphics_t));
-    else
-        gIOHibernateGraphicsInfo->physicalAddress = gIOHibernateGraphicsInfo->depth = 0;
-
-    if (!p3)
-    {
-	count = header->cryptVarsOffset;
-	if (count)
-	    p3 = (void *)(((uintptr_t) header) - count);
-    }
-    if (p3)
-        bcopy_internal(p3, 
-                gIOHibernateCryptWakeVars, 
-                sizeof(hibernate_cryptwakevars_t));
-
-    src = (uint32_t *)
+    map = (hibernate_page_list_t *)
                 (((uintptr_t) &header->fileExtentMap[0]) 
                             + header->fileExtentMapSize 
-                            + ptoa_32(header->restore1PageCount));
-
-    if (header->previewSize)
-    {
-        pageIndexSource = src;
-        map = (hibernate_page_list_t *)(((uintptr_t) pageIndexSource) + header->previewSize);
-        src = (uint32_t *) (((uintptr_t) pageIndexSource) + header->previewPageListSize);
-    }
-    else
-    {
-        pageIndexSource = 0;
-        map = (hibernate_page_list_t *) src;
-        src = (uint32_t *) (((uintptr_t) map) + header->bitmapSize);
-    }
-
-    lastPageIndexPage = atop_32((uintptr_t) src);
+                            + ptoa_32(header->restore1PageCount)
+                            + header->previewSize);
 
     lastImagePage = atop_32(((uintptr_t) header) + header->image1Size);
 
     lastMapPage = atop_32(((uintptr_t) map) + header->bitmapSize);
 
+    handoffPages     = header->handoffPages;
+    handoffPageCount = header->handoffPageCount;
+
     debug_code(kIOHibernateRestoreCodeImageEnd,       ptoa_64(lastImagePage));
-    debug_code(kIOHibernateRestoreCodePageIndexStart, (uintptr_t) pageIndexSource);
-    debug_code(kIOHibernateRestoreCodePageIndexEnd,   ptoa_64(lastPageIndexPage));
     debug_code(kIOHibernateRestoreCodeMapStart,       (uintptr_t) map);
     debug_code(kIOHibernateRestoreCodeMapEnd,         ptoa_64(lastMapPage));
 
+    debug_code('hand', ptoa_64(handoffPages));
+    debug_code('hnde', ptoa_64(handoffPageCount));
+
     // knock all the image pages to be used out of free map
     for (ppnum = atop_32((uintptr_t) header); ppnum <= lastImagePage; ppnum++)
     {
 	hibernate_page_bitset(map, FALSE, ppnum);
     }
+    // knock all the handoff pages to be used out of free map
+    for (ppnum = handoffPages; ppnum < (handoffPages + handoffPageCount); ppnum++)
+    {
+	hibernate_page_bitset(map, FALSE, ppnum);
+    }
 
     nextFree = 0;
     hibernate_page_list_grab(map, &nextFree);
-    buffer = (uint32_t *) (uintptr_t) ptoa_32(hibernate_page_list_grab(map, &nextFree));
 
-    if (header->memoryMapSize && (count = header->memoryMapOffset))
-    {
-	p4 = (void *)(((uintptr_t) header) - count);
-	gIOHibernateWakeMap     = hibernate_page_list_grab(map, &nextFree);
-	gIOHibernateWakeMapSize = header->memoryMapSize;
-	debug_code(kIOHibernateRestoreCodeWakeMapSize, gIOHibernateWakeMapSize);
-	if (gIOHibernateWakeMapSize > PAGE_SIZE)
-	    fatal();
-	bcopy_internal(p4, (void  *) (uintptr_t) ptoa_32(gIOHibernateWakeMap), gIOHibernateWakeMapSize);
-    }
-    else
-	gIOHibernateWakeMapSize = 0;
+    pal_hib_window_setup(hibernate_page_list_grab(map, &nextFree));
 
-    sum = gIOHibernateCurrentHeader->actualRestore1Sum;
+    sum = header->actualRestore1Sum;
     gIOHibernateCurrentHeader->diag[0] = (uint32_t)(uintptr_t) header;
     gIOHibernateCurrentHeader->diag[1] = sum;
 
@@ -528,54 +488,110 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
     copyPageIndex     = PAGE_SIZE >> 2;
 
     compressedSize    = PAGE_SIZE;
+    stage             = 2;
+    count             = 0;
+    src               = NULL;
+
+    if (gIOHibernateCurrentHeader->previewSize)
+    {
+	pageIndexSource = (uint32_t *)
+		     (((uintptr_t) &header->fileExtentMap[0]) 
+				 + gIOHibernateCurrentHeader->fileExtentMapSize 
+				 + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount));
+	imageReadPos = (uint32_t *) (((uintptr_t) pageIndexSource) + gIOHibernateCurrentHeader->previewPageListSize);
+	lastPageIndexPage = atop_32((uintptr_t) imageReadPos);
+    }
+    else
+    {
+	pageIndexSource   = NULL;
+	lastPageIndexPage = 0;
+	imageReadPos =  (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize);
+    }
+
+    debug_code(kIOHibernateRestoreCodePageIndexStart, (uintptr_t) pageIndexSource);
+    debug_code(kIOHibernateRestoreCodePageIndexEnd,   ptoa_64(lastPageIndexPage));
 
     while (1)
     {
-        if (pageIndexSource)
-        {
-            ppnum = pageIndexSource[0];
-            count = pageIndexSource[1];
-            pageIndexSource += 2;
-            if (!count)
-            {
-                pageIndexSource = 0;
-                src =  (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize);
-                ppnum = src[0];
-                count = src[1];
-                src += 2;
-            } 
-        }
-        else
-        {
-            ppnum = src[0];
-            count = src[1];
-            if (!count)
-                break;
-            src += 2;
+	switch (stage)
+	{
+	    case 2:
+		// copy handoff data
+		count = src ? 0 : handoffPageCount;
+		if (!count)
+		    break;
+		if (count > gIOHibernateHandoffPageCount)
+		    count = gIOHibernateHandoffPageCount;
+		src = (uint32_t *) (uintptr_t) ptoa_64(handoffPages);
+		break;
+	
+	    case 1:
+		// copy pageIndexSource pages == preview image data
+		if (!src)
+		{
+		    if (!pageIndexSource)
+		    	break;
+		    src = imageReadPos;
+		}
+		ppnum = pageIndexSource[0];
+		count = pageIndexSource[1];
+		pageIndexSource += 2;
+		imageReadPos = src;
+		break;
+
+	    case 0:
+		// copy pages
+		if (!src)
+		{
+		    src =  (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize);
+		}
+		ppnum = src[0];
+		count = src[1];
+		src += 2;
+		imageReadPos = src;
+		break;
+	}
+
+
+	if (!count)
+	{
+	    if (!stage)
+	        break;
+	    stage--;
+	    src = NULL;
+	    continue;
 	}
 
 	for (page = 0; page < count; page++, ppnum++)
 	{
-            uint32_t tag;
+	    uint32_t tag;
 	    int conflicts;
 
-            if (!pageIndexSource)
-            {
-                tag = *src++;
-                compressedSize = kIOHibernateTagLength & tag;
-            }
+	    if (2 == stage)
+		ppnum = gIOHibernateHandoffPages[page];
+	    else if (!stage)
+	    {
+		tag = *src++;
+		compressedSize = kIOHibernateTagLength & tag;
+	    }
+
+	    conflicts = (ppnum >= atop_32((uintptr_t) map)) && (ppnum <= lastMapPage);
 
-	    conflicts = (((ppnum >= atop_32((uintptr_t) map)) && (ppnum <= lastMapPage))
-		      || ((ppnum >= atop_32((uintptr_t) src)) && (ppnum <= lastImagePage)));
+	    conflicts |= ((ppnum >= atop_32((uintptr_t) imageReadPos)) && (ppnum <= lastImagePage));
 
-            if (pageIndexSource)
-                conflicts |= ((ppnum >= atop_32((uintptr_t) pageIndexSource)) && (ppnum <= lastPageIndexPage));
+	    if (stage >= 2)
+ 		conflicts |= ((ppnum >= atop_32((uintptr_t) src)) && (ppnum <= (handoffPages + handoffPageCount - 1)));
+
+	    if (stage >= 1)
+ 		conflicts |= ((ppnum >= atop_32((uintptr_t) pageIndexSource)) && (ppnum <= lastPageIndexPage));
 
 	    if (!conflicts)
 	    {
-		if (compressedSize)
-		    sum += store_one_page(gIOHibernateCurrentHeader->processorFlags,
-					    src, compressedSize, buffer, ppnum);
+//              if (compressedSize)
+		pageSum = store_one_page(gIOHibernateCurrentHeader->processorFlags,
+					 src, compressedSize, 0, ppnum);
+		if (stage != 2)
+		    sum += pageSum;
 		uncompressedPages++;
 	    }
 	    else
@@ -596,46 +612,59 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
 		    // alloc new copy list page
 		    uint32_t pageListPage = hibernate_page_list_grab(map, &nextFree);
 		    // link to current
-		    if (copyPageList)
-			copyPageList[1] = pageListPage;
-		    else
-			copyPageListHead = pageListPage;
-		    copyPageList = (uint32_t *) (uintptr_t) ptoa_32(pageListPage);
+		    if (copyPageList) {
+			    copyPageList[1] = pageListPage;
+		    } else {
+			    copyPageListHead = pageListPage;
+		    }
+		    copyPageList = (uint32_t *)pal_hib_map(SRC_COPY_AREA, 
+				    ptoa_32(pageListPage));
 		    copyPageList[1] = 0;
 		    copyPageIndex = 2;
 		}
 
 		copyPageList[copyPageIndex++] = ppnum;
 		copyPageList[copyPageIndex++] = bufferPage;
-		copyPageList[copyPageIndex++] = compressedSize;
+		copyPageList[copyPageIndex++] = (compressedSize | (stage << 24));
 		copyPageList[0] = copyPageIndex;
 
-		dst = (uint32_t *) (uintptr_t) ptoa_32(bufferPage);
+		dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_32(bufferPage));
 		for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++)
-		    dst[idx] = src[idx];
+			dst[idx] = src[idx];
 	    }
 	    src += ((compressedSize + 3) >> 2);
 	}
     }
 
+    /* src points to the last page restored, so we need to skip over that */
+    hibernateRestorePALState(src);
+
     // -- copy back conflicts
 
     copyPageList = (uint32_t *)(uintptr_t) ptoa_32(copyPageListHead);
+
     while (copyPageList)
     {
+	copyPageList = (uint32_t *)pal_hib_map(COPY_PAGE_AREA, (uintptr_t)copyPageList);
 	for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3)
 	{
-	    ppnum	   =              copyPageList[copyPageIndex + 0];
-	    src		   = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[copyPageIndex + 1]);
-	    compressedSize =              copyPageList[copyPageIndex + 2];
-
-	    sum += store_one_page(gIOHibernateCurrentHeader->processorFlags,
-				    src, compressedSize, buffer, ppnum);
+	    ppnum          = copyPageList[copyPageIndex + 0];
+	    src            = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[copyPageIndex + 1]);
+	    src            = (uint32_t *)pal_hib_map(SRC_COPY_AREA, (uintptr_t)src);
+	    compressedSize = copyPageList[copyPageIndex + 2];
+	    stage 	   = compressedSize >> 24;
+	    compressedSize &= 0x1FFF;
+	    pageSum        = store_one_page(gIOHibernateCurrentHeader->processorFlags,
+			    			src, compressedSize, 0, ppnum);
+	    if (stage != 2)
+	    	sum += pageSum;
 	    uncompressedPages++;
 	}
 	copyPageList = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[1]);
     }
 
+    pal_hib_patchup();
+
     // -- image has been destroyed...
 
     gIOHibernateCurrentHeader->actualImage1Sum         = sum;
@@ -646,16 +675,10 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
     gIOHibernateState = kIOHibernateStateWakingFromHibernate;
 
 #if CONFIG_SLEEP
-#if defined(__ppc__)
-    typedef void (*ResetProc)(void);
-    ResetProc proc;
-    proc = (ResetProc) 0x100;
-    __asm__ volatile("ori 0, 0, 0" : : );
-    proc();
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
     typedef void (*ResetProc)(void);
     ResetProc proc;
-    proc = (ResetProc) acpi_wake_prot_entry;
+    proc = HIB_ENTRYPOINT;
     // flush caches
     __asm__("wbinvd");
     proc();
@@ -666,3 +689,445 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header,
 
     return -1;
 }
+
+#if CONFIG_DEBUG
+/* standalone printf implementation */
+/*-
+ * Copyright (c) 1986, 1988, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)subr_prf.c	8.3 (Berkeley) 1/21/94
+ */
+
+typedef long ptrdiff_t;
+char const hibhex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+#define hibhex2ascii(hex)  (hibhex2ascii_data[hex])
+#define toupper(c)      ((c) - 0x20 * (((c) >= 'a') && ((c) <= 'z')))
+static size_t
+hibstrlen(const char *s)
+{
+	size_t l = 0;
+	while (*s++)
+		l++;
+	return l;
+}
+
+/* Max number conversion buffer length: a u_quad_t in base 2, plus NUL byte. */
+#define MAXNBUF	(sizeof(intmax_t) * NBBY + 1)
+
+/*
+ * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse
+ * order; return an optional length and a pointer to the last character
+ * written in the buffer (i.e., the first character of the string).
+ * The buffer pointed to by `nbuf' must have length >= MAXNBUF.
+ */
+static char *
+ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper)
+{
+	char *p, c;
+
+	/* Truncate so we don't call umoddi3, which isn't in __HIB */
+#if !defined(__LP64__)
+	uint32_t num2 = (uint32_t) num;
+#else
+	uintmax_t num2 = num;
+#endif
+
+	p = nbuf;
+	*p = '\0';
+	do {
+		c = hibhex2ascii(num2 % base);
+		*++p = upper ? toupper(c) : c;
+	} while (num2 /= base);
+	if (lenp)
+		*lenp = (int)(p - nbuf);
+	return (p);
+}
+
+/*
+ * Scaled down version of printf(3).
+ *
+ * Two additional formats:
+ *
+ * The format %b is supported to decode error registers.
+ * Its usage is:
+ *
+ *	printf("reg=%b\n", regval, "*");
+ *
+ * where  is the output base expressed as a control character, e.g.
+ * \10 gives octal; \20 gives hex.  Each arg is a sequence of characters,
+ * the first of which gives the bit number to be inspected (origin 1), and
+ * the next characters (up to a control character, i.e. a character <= 32),
+ * give the name of the register.  Thus:
+ *
+ *	kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n");
+ *
+ * would produce output:
+ *
+ *	reg=3
+ *
+ * XXX:  %D  -- Hexdump, takes pointer and separator string:
+ *		("%6D", ptr, ":")   -> XX:XX:XX:XX:XX:XX
+ *		("%*D", len, ptr, " " -> XX XX XX XX ...
+ */
+static int
+hibkvprintf(char const *fmt, void (*func)(int, void*), void *arg, int radix, va_list ap)
+{
+#define PCHAR(c) {int cc=(c); if (func) (*func)(cc,arg); else *d++ = cc; retval++; }
+	char nbuf[MAXNBUF];
+	char *d;
+	const char *p, *percent, *q;
+	u_char *up;
+	int ch, n;
+	uintmax_t num;
+	int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot;
+	int cflag, hflag, jflag, tflag, zflag;
+	int dwidth, upper;
+	char padc;
+	int stop = 0, retval = 0;
+
+	num = 0;
+	if (!func)
+		d = (char *) arg;
+	else
+		d = NULL;
+
+	if (fmt == NULL)
+		fmt = "(fmt null)\n";
+
+	if (radix < 2 || radix > 36)
+		radix = 10;
+
+	for (;;) {
+		padc = ' ';
+		width = 0;
+		while ((ch = (u_char)*fmt++) != '%' || stop) {
+			if (ch == '\0')
+				return (retval);
+			PCHAR(ch);
+		}
+		percent = fmt - 1;
+		qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0;
+		sign = 0; dot = 0; dwidth = 0; upper = 0;
+		cflag = 0; hflag = 0; jflag = 0; tflag = 0; zflag = 0;
+reswitch:	switch (ch = (u_char)*fmt++) {
+		case '.':
+			dot = 1;
+			goto reswitch;
+		case '#':
+			sharpflag = 1;
+			goto reswitch;
+		case '+':
+			sign = 1;
+			goto reswitch;
+		case '-':
+			ladjust = 1;
+			goto reswitch;
+		case '%':
+			PCHAR(ch);
+			break;
+		case '*':
+			if (!dot) {
+				width = va_arg(ap, int);
+				if (width < 0) {
+					ladjust = !ladjust;
+					width = -width;
+				}
+			} else {
+				dwidth = va_arg(ap, int);
+			}
+			goto reswitch;
+		case '0':
+			if (!dot) {
+				padc = '0';
+				goto reswitch;
+			}
+		case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+				for (n = 0;; ++fmt) {
+					n = n * 10 + ch - '0';
+					ch = *fmt;
+					if (ch < '0' || ch > '9')
+						break;
+				}
+			if (dot)
+				dwidth = n;
+			else
+				width = n;
+			goto reswitch;
+		case 'b':
+			num = (u_int)va_arg(ap, int);
+			p = va_arg(ap, char *);
+			for (q = ksprintn(nbuf, num, *p++, NULL, 0); *q;)
+				PCHAR(*q--);
+
+			if (num == 0)
+				break;
+
+			for (tmp = 0; *p;) {
+				n = *p++;
+				if (num & (1 << (n - 1))) {
+					PCHAR(tmp ? ',' : '<');
+					for (; (n = *p) > ' '; ++p)
+						PCHAR(n);
+					tmp = 1;
+				} else
+					for (; *p > ' '; ++p)
+						continue;
+			}
+			if (tmp)
+				PCHAR('>');
+			break;
+		case 'c':
+			PCHAR(va_arg(ap, int));
+			break;
+		case 'D':
+			up = va_arg(ap, u_char *);
+			p = va_arg(ap, char *);
+			if (!width)
+				width = 16;
+			while(width--) {
+				PCHAR(hibhex2ascii(*up >> 4));
+				PCHAR(hibhex2ascii(*up & 0x0f));
+				up++;
+				if (width)
+					for (q=p;*q;q++)
+						PCHAR(*q);
+			}
+			break;
+		case 'd':
+		case 'i':
+			base = 10;
+			sign = 1;
+			goto handle_sign;
+		case 'h':
+			if (hflag) {
+				hflag = 0;
+				cflag = 1;
+			} else
+				hflag = 1;
+			goto reswitch;
+		case 'j':
+			jflag = 1;
+			goto reswitch;
+		case 'l':
+			if (lflag) {
+				lflag = 0;
+				qflag = 1;
+			} else
+				lflag = 1;
+			goto reswitch;
+		case 'n':
+			if (jflag)
+				*(va_arg(ap, intmax_t *)) = retval;
+			else if (qflag)
+				*(va_arg(ap, quad_t *)) = retval;
+			else if (lflag)
+				*(va_arg(ap, long *)) = retval;
+			else if (zflag)
+				*(va_arg(ap, size_t *)) = retval;
+			else if (hflag)
+				*(va_arg(ap, short *)) = retval;
+			else if (cflag)
+				*(va_arg(ap, char *)) = retval;
+			else
+				*(va_arg(ap, int *)) = retval;
+			break;
+		case 'o':
+			base = 8;
+			goto handle_nosign;
+		case 'p':
+			base = 16;
+			sharpflag = (width == 0);
+			sign = 0;
+			num = (uintptr_t)va_arg(ap, void *);
+			goto number;
+		case 'q':
+			qflag = 1;
+			goto reswitch;
+		case 'r':
+			base = radix;
+			if (sign)
+				goto handle_sign;
+			goto handle_nosign;
+		case 's':
+			p = va_arg(ap, char *);
+			if (p == NULL)
+				p = "(null)";
+			if (!dot)
+				n = (typeof(n))hibstrlen (p);
+			else
+				for (n = 0; n < dwidth && p[n]; n++)
+					continue;
+
+			width -= n;
+
+			if (!ladjust && width > 0)
+				while (width--)
+					PCHAR(padc);
+			while (n--)
+				PCHAR(*p++);
+			if (ladjust && width > 0)
+				while (width--)
+					PCHAR(padc);
+			break;
+		case 't':
+			tflag = 1;
+			goto reswitch;
+		case 'u':
+			base = 10;
+			goto handle_nosign;
+		case 'X':
+			upper = 1;
+		case 'x':
+			base = 16;
+			goto handle_nosign;
+		case 'y':
+			base = 16;
+			sign = 1;
+			goto handle_sign;
+		case 'z':
+			zflag = 1;
+			goto reswitch;
+handle_nosign:
+			sign = 0;
+			if (jflag)
+				num = va_arg(ap, uintmax_t);
+			else if (qflag)
+				num = va_arg(ap, u_quad_t);
+			else if (tflag)
+				num = va_arg(ap, ptrdiff_t);
+			else if (lflag)
+				num = va_arg(ap, u_long);
+			else if (zflag)
+				num = va_arg(ap, size_t);
+			else if (hflag)
+				num = (u_short)va_arg(ap, int);
+			else if (cflag)
+				num = (u_char)va_arg(ap, int);
+			else
+				num = va_arg(ap, u_int);
+			goto number;
+handle_sign:
+			if (jflag)
+				num = va_arg(ap, intmax_t);
+			else if (qflag)
+				num = va_arg(ap, quad_t);
+			else if (tflag)
+				num = va_arg(ap, ptrdiff_t);
+			else if (lflag)
+				num = va_arg(ap, long);
+			else if (zflag)
+				num = va_arg(ap, ssize_t);
+			else if (hflag)
+				num = (short)va_arg(ap, int);
+			else if (cflag)
+				num = (char)va_arg(ap, int);
+			else
+				num = va_arg(ap, int);
+number:
+			if (sign && (intmax_t)num < 0) {
+				neg = 1;
+				num = -(intmax_t)num;
+			}
+			p = ksprintn(nbuf, num, base, &tmp, upper);
+			if (sharpflag && num != 0) {
+				if (base == 8)
+					tmp++;
+				else if (base == 16)
+					tmp += 2;
+			}
+			if (neg)
+				tmp++;
+
+			if (!ladjust && padc != '0' && width
+			    && (width -= tmp) > 0)
+				while (width--)
+					PCHAR(padc);
+			if (neg)
+				PCHAR('-');
+			if (sharpflag && num != 0) {
+				if (base == 8) {
+					PCHAR('0');
+				} else if (base == 16) {
+					PCHAR('0');
+					PCHAR('x');
+				}
+			}
+			if (!ladjust && width && (width -= tmp) > 0)
+				while (width--)
+					PCHAR(padc);
+
+			while (*p)
+				PCHAR(*p--);
+
+			if (ladjust && width && (width -= tmp) > 0)
+				while (width--)
+					PCHAR(padc);
+
+			break;
+		default:
+			while (percent < fmt)
+				PCHAR(*percent++);
+			/*
+			 * Since we ignore an formatting argument it is no
+			 * longer safe to obey the remaining formatting
+			 * arguments as the arguments will no longer match
+			 * the format specs.
+			 */
+			stop = 1;
+			break;
+		}
+	}
+#undef PCHAR
+}
+
+
+static void
+putchar(int c, void *arg)
+{
+	(void)arg;
+	uart_putc(c);
+}
+
+void
+hibprintf(const char *fmt, ...)
+{
+	/* http://www.pagetable.com/?p=298 */
+	va_list ap;
+
+	va_start(ap, fmt);
+	hibkvprintf(fmt, putchar, NULL, 10, ap);
+	va_end(ap);
+}
+#endif /* CONFIG_DEBUG */
+
diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp
index a8e04bddd..1000178ad 100644
--- a/iokit/Kernel/IOInterruptController.cpp
+++ b/iokit/Kernel/IOInterruptController.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -26,11 +26,6 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-
-#if __ppc__
-#include <ppc/proc_reg.h> 
-#endif
-
 #include <IOKit/IOLib.h>
 #include <IOKit/IOService.h>
 #include <IOKit/IOPlatformExpert.h>
@@ -295,17 +290,10 @@ IOReturn IOInterruptController::enableInterrupt(IOService *nub, int source)
   
   if (vector->interruptDisabledSoft) {
     vector->interruptDisabledSoft = 0;
-#if __ppc__
-    sync();
-    isync();
-#endif
     
     if (!getPlatform()->atInterruptLevel()) {
       while (vector->interruptActive)
 	{}
-#if __ppc__
-      isync();
-#endif
     }
     if (vector->interruptDisabledHard) {
       vector->interruptDisabledHard = 0;
@@ -330,17 +318,10 @@ IOReturn IOInterruptController::disableInterrupt(IOService *nub, int source)
   vector = &vectors[vectorNumber];
   
   vector->interruptDisabledSoft = 1;
-#if __ppc__
-  sync();
-  isync();
-#endif
   
   if (!getPlatform()->atInterruptLevel()) {
     while (vector->interruptActive)
 	{}
-#if __ppc__
-    isync();
-#endif
   }
   
   return kIOReturnSuccess;
@@ -663,10 +644,6 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub,
   interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); 
   if (!vector->interruptDisabledSoft) {
     vector->interruptDisabledSoft = 1;
-#if __ppc__
-    sync();
-    isync();
-#endif
     vectorsEnabled--;
   }
   IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState);
@@ -674,9 +651,6 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub,
   if (!getPlatform()->atInterruptLevel()) {
     while (vector->interruptActive)
 	{}
-#if __ppc__
-    isync();
-#endif
   }
   
   return kIOReturnSuccess;
@@ -699,48 +673,26 @@ IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/,
     vector = &vectors[vectorNumber];
     
     vector->interruptActive = 1;
-#if __ppc__
-    sync();
-    isync();
-#endif
-    if (!vector->interruptDisabledSoft) {
-#if __ppc__
-      isync();
-#endif
-      
-      // Call the handler if it exists.
-      if (vector->interruptRegistered) {
-      
-		  bool		trace		= (gIOKitTrace & kIOTraceInterrupts) ? true : false;
-		  bool		timeHandler	= gIOInterruptThresholdNS ? true : false;
-		  uint64_t	startTime	= 0;
-		  uint64_t	endTime		= 0;
+	if (!vector->interruptDisabledSoft) {
+	  
+	  // Call the handler if it exists.
+	  if (vector->interruptRegistered) {
+		  
+		  bool	trace = (gIOKitTrace & kIOTraceInterrupts) ? true : false;
 		  
 		  if (trace)
 			  IOTimeStampStartConstant(IODBG_INTC(IOINTC_HANDLER),
 									   (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target);
 		  
-		  if (timeHandler)
-			  startTime = mach_absolute_time();
-		  
 		  // Call handler.
 		  vector->handler(vector->target, vector->refCon, vector->nub, vector->source);
-
-		  if (timeHandler)
-		  {
-			  endTime = mach_absolute_time();
-			  if ((endTime - startTime) > gIOInterruptThresholdNS)
-				  panic("IOSIC::handleInterrupt: interrupt exceeded threshold, handlerTime = %qd, vectorNumber = %d, handler = %p, target = %p\n",
-						endTime - startTime, (int)vectorNumber, vector->handler, vector->target);
-		  }
 		  
 		  if (trace)
 			  IOTimeStampEndConstant(IODBG_INTC(IOINTC_HANDLER),
 									 (uintptr_t) vectorNumber, (uintptr_t) vector->handler, (uintptr_t)vector->target);
 		  
-      }
-      
-    }
+		}
+	}
     
     vector->interruptActive = 0;
   }
diff --git a/iokit/Kernel/IOInterruptEventSource.cpp b/iokit/Kernel/IOInterruptEventSource.cpp
index 97d4c5957..8b49024a1 100644
--- a/iokit/Kernel/IOInterruptEventSource.cpp
+++ b/iokit/Kernel/IOInterruptEventSource.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,13 +25,7 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
-Copyright (c) 1998 Apple Computer, Inc.  All rights reserved.
 
-HISTORY
-    1998-7-13	Godfrey van der Linden(gvdl)
-        Created.
-*/
 #include <IOKit/IOInterruptEventSource.h>
 #include <IOKit/IOKitDebug.h>
 #include <IOKit/IOLib.h>
@@ -40,6 +34,31 @@ HISTORY
 #include <IOKit/IOTimeStamp.h>
 #include <IOKit/IOWorkLoop.h>
 
+#if IOKITSTATS
+
+#define IOStatisticsInitializeCounter() \
+do { \
+	IOStatistics::setCounterType(IOEventSource::reserved->counter, kIOStatisticsInterruptEventSourceCounter); \
+} while (0)
+
+#define IOStatisticsCheckForWork() \
+do { \
+	IOStatistics::countInterruptCheckForWork(IOEventSource::reserved->counter); \
+} while (0)
+
+#define IOStatisticsInterrupt() \
+do { \
+	IOStatistics::countInterrupt(IOEventSource::reserved->counter); \
+} while (0)
+
+#else
+
+#define IOStatisticsInitializeCounter()
+#define IOStatisticsCheckForWork()
+#define IOStatisticsInterrupt()
+
+#endif // IOKITSTATS
+
 #define super IOEventSource
 
 OSDefineMetaClassAndStructors(IOInterruptEventSource, IOEventSource)
@@ -74,6 +93,8 @@ bool IOInterruptEventSource::init(OSObject *inOwner,
 	    intIndex = inIntIndex;
     }
 
+    IOStatisticsInitializeCounter();
+
     return res;
 }
 
@@ -182,24 +203,26 @@ bool IOInterruptEventSource::checkForWork()
     int numInts = cacheProdCount - consumerCount;
     IOInterruptEventAction intAction = (IOInterruptEventAction) action;
 	bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+	
+    IOStatisticsCheckForWork();
+	
 	if ( numInts > 0 )
 	{
 		if (trace)
 			IOTimeStampStartConstant(IODBG_INTES(IOINTES_ACTION),
 									 (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
+		
 		// Call the handler
-        (*intAction)(owner, this,  numInts);
+		(*intAction)(owner, this, numInts);
 		
 		if (trace)
 			IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
 								   (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-
-        consumerCount = cacheProdCount;
-        if (autoDisable && !explicitDisable)
-            enable();
-    }
+		
+		consumerCount = cacheProdCount;
+		if (autoDisable && !explicitDisable)
+			enable();
+	}
 	
 	else if ( numInts < 0 )
 	{
@@ -208,17 +231,17 @@ bool IOInterruptEventSource::checkForWork()
 									 (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
 		
 		// Call the handler
-    	(*intAction)(owner, this, -numInts);
+		(*intAction)(owner, this, -numInts);
 		
 		if (trace)
 			IOTimeStampEndConstant(IODBG_INTES(IOINTES_ACTION),
 								   (uintptr_t) intAction, (uintptr_t) owner, (uintptr_t) this, (uintptr_t) workLoop);
-    
-        consumerCount = cacheProdCount;
-        if (autoDisable && !explicitDisable)
-            enable();
-    }
-
+		
+		consumerCount = cacheProdCount;
+		if (autoDisable && !explicitDisable)
+			enable();
+	}
+	
     return false;
 }
 
@@ -226,14 +249,15 @@ void IOInterruptEventSource::normalInterruptOccurred
     (void */*refcon*/, IOService */*prov*/, int /*source*/)
 {
 	bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+	
+    IOStatisticsInterrupt();
     producerCount++;
-
+	
 	if (trace)
 	    IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
 	
     signalWorkAvailable();
-
+	
 	if (trace)
 	    IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
 }
@@ -242,16 +266,17 @@ void IOInterruptEventSource::disableInterruptOccurred
     (void */*refcon*/, IOService *prov, int source)
 {
 	bool trace = (gIOKitTrace & kIOTraceIntEventSource) ? true : false;
-
+	
     prov->disableInterrupt(source);	/* disable the interrupt */
-
+	
+    IOStatisticsInterrupt();
     producerCount++;
-
+	
 	if (trace)
 	    IOTimeStampStartConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
     
     signalWorkAvailable();
-
+	
 	if (trace)
 	    IOTimeStampEndConstant(IODBG_INTES(IOINTES_SEMA), (uintptr_t) this, (uintptr_t) owner);
 }
@@ -264,3 +289,10 @@ void IOInterruptEventSource::interruptOccurred
     else
         normalInterruptOccurred(refcon, prov, source);
 }
+
+IOReturn IOInterruptEventSource::warmCPU
+    (uint64_t abstime)
+{
+
+	return ml_interrupt_prewarm(abstime);
+}
diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp
index 31d681664..21048d88c 100644
--- a/iokit/Kernel/IOKitDebug.cpp
+++ b/iokit/Kernel/IOKitDebug.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -28,15 +28,15 @@
 
 #include <sys/sysctl.h>
 
+#include <libkern/c++/OSContainers.h>
+#include <libkern/c++/OSCPPDebug.h>
+
 #include <IOKit/IOKitDebug.h>
 #include <IOKit/IOLib.h>
 #include <IOKit/assert.h>
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOService.h>
 
-#include <libkern/c++/OSContainers.h>
-#include <libkern/c++/OSCPPDebug.h>
-
 #ifdef IOKITDEBUG
 #define DEBUG_INIT_VALUE IOKITDEBUG
 #else
@@ -44,12 +44,10 @@
 #endif
 
 SInt64		gIOKitDebug = DEBUG_INIT_VALUE;
-SInt64		gIOKitTrace = 0x3B;
-UInt64		gIOInterruptThresholdNS = 0;
+SInt64		gIOKitTrace = 0;
 
 SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitDebug, "boot_arg io");
 SYSCTL_QUAD(_debug, OID_AUTO, iotrace, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOKitTrace, "trace io");
-SYSCTL_QUAD(_debug, OID_AUTO, iointthreshold, CTLFLAG_RW | CTLFLAG_LOCKED, &gIOInterruptThresholdNS, "io interrupt threshold");
 
 
 int 		debug_malloc_size;
@@ -100,7 +98,7 @@ void IOPrintPlane( const IORegistryPlane * plane )
     iter->release();
 }
 
-void dbugprintf(char *fmt, ...);
+void dbugprintf(const char *fmt, ...);
 void db_dumpiojunk( const IORegistryPlane * plane );
 
 void db_piokjunk(void) {
diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h
index 804c57f24..5a74159a4 100644
--- a/iokit/Kernel/IOKitKernelInternal.h
+++ b/iokit/Kernel/IOKitKernelInternal.h
@@ -38,6 +38,29 @@ __BEGIN_DECLS
 #include <mach/memory_object_types.h>
 #include <device/device_port.h>
 
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if !defined(NO_KDEBUG)
+
+#define IOServiceTrace(csc, a, b, c, d) do {				\
+    if(kIOTraceIOService & gIOKitDebug) {				\
+	KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0);	\
+    }									\
+} while(0)
+
+#else /* NO_KDEBUG */
+
+#define IOServiceTrace(csc, a, b, c, d) do {	\
+  (void)a;					\
+  (void)b;					\
+  (void)c;					\
+  (void)d;					\
+} while (0)
+
+#endif /* NO_KDEBUG */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
 typedef kern_return_t (*IOIteratePageableMapsCallback)(vm_map_t map, void * ref);
 
 void IOLibInit(void);
@@ -149,4 +172,8 @@ extern "C" void IOKitInitializeTime( void );
 
 extern "C" OSString * IOCopyLogNameForPID(int pid);
 
+#if defined(__i386__) || defined(__x86_64__)
+extern "C" void IOSetKeyStoreData(IOMemoryDescriptor * data);
+#endif
+
 #endif /* ! _IOKIT_KERNELINTERNAL_H */
diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp
index a5415e71c..50000299d 100644
--- a/iokit/Kernel/IOLib.cpp
+++ b/iokit/Kernel/IOLib.cpp
@@ -55,6 +55,24 @@
 #include <sys/sysctl.h>
 #endif
 
+#include "libkern/OSAtomic.h"
+#include <libkern/c++/OSKext.h>
+#include <IOKit/IOStatisticsPrivate.h>
+#include <sys/msgbuf.h>
+
+#if IOKITSTATS
+
+#define IOStatisticsAlloc(type, size) \
+do { \
+	IOStatistics::countAlloc(type, size); \
+} while (0)
+
+#else
+
+#define IOStatisticsAlloc(type, size)
+
+#endif /* IOKITSTATS */
+
 extern "C"
 {
 
@@ -63,7 +81,7 @@ mach_timespec_t IOZeroTvalspec = { 0, 0 };
 
 extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
 
-int
+extern int
 __doprnt(
 	const char		*fmt,
 	va_list			argp,
@@ -71,7 +89,9 @@ __doprnt(
 	void                    *arg,
 	int			radix);
 
-extern void conslog_putc(char);
+extern void cons_putc_locked(char);
+extern void bsd_log_lock(void);
+extern void bsd_log_unlock(void);
 
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -178,11 +198,13 @@ void * IOMalloc(vm_size_t size)
     void * address;
 
     address = (void *)kalloc(size);
+    if ( address ) {
 #if IOALLOCDEBUG
-    if (address) {
 		debug_iomalloc_size += size;
-	}
 #endif
+		IOStatisticsAlloc(kIOStatisticsMalloc, size);
+    }
+
     return address;
 }
 
@@ -193,6 +215,7 @@ void IOFree(void * address, vm_size_t size)
 #if IOALLOCDEBUG
 		debug_iomalloc_size -= size;
 #endif
+		IOStatisticsAlloc(kIOStatisticsFree, size);
     }
 }
 
@@ -250,11 +273,12 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment)
 
     assert(0 == (address & alignMask));
 
-#if IOALLOCDEBUG
     if( address) {
+#if IOALLOCDEBUG
 		debug_iomalloc_size += size;
-	}
 #endif
+    	IOStatisticsAlloc(kIOStatisticsMallocAligned, size);
+	}
 
     return (void *) address;
 }
@@ -289,6 +313,8 @@ void IOFreeAligned(void * address, vm_size_t size)
 #if IOALLOCDEBUG
     debug_iomalloc_size -= size;
 #endif
+
+    IOStatisticsAlloc(kIOStatisticsFreeAligned, size);
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -325,7 +351,7 @@ IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size)
 
 mach_vm_address_t
 IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, 
-				     mach_vm_size_t alignment, bool contiguous)
+			                mach_vm_size_t alignment, bool contiguous)
 {
     kern_return_t	kr;
     mach_vm_address_t	address;
@@ -405,6 +431,7 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP
     return (address);
 }
 
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 struct _IOMallocContiguousEntry
@@ -463,6 +490,10 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment,
     }
     while (false);
 
+	if (address) {
+	    IOStatisticsAlloc(kIOStatisticsMallocContiguous, size);
+    }
+
     return (void *) address;
 }
 
@@ -500,6 +531,8 @@ void IOFreeContiguous(void * _address, vm_size_t size)
     {
 	IOKernelFreePhysical((mach_vm_address_t) address, size);
     }
+
+    IOStatisticsAlloc(kIOStatisticsFreeContiguous, size);
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -603,10 +636,12 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment)
     if( kIOReturnSuccess != kr)
         ref.address = 0;
 
+	if( ref.address) {
 #if IOALLOCDEBUG
-    if( ref.address)
        debug_iomallocpageable_size += round_page(size);
 #endif
+       IOStatisticsAlloc(kIOStatisticsMallocPageable, size);
+	}
 
     return( (void *) ref.address );
 }
@@ -640,6 +675,8 @@ void IOFreePageable(void * address, vm_size_t size)
 #if IOALLOCDEBUG
     debug_iomallocpageable_size -= round_page(size);
 #endif
+
+    IOStatisticsAlloc(kIOStatisticsFreePageable, size);
 }
     
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -727,23 +764,36 @@ void IOPause(unsigned nanoseconds)
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-static void _iolog_putc(int ch, void *arg __unused)
+static void _iolog_consputc(int ch, void *arg __unused)
 {
-	conslog_putc(ch);
+    cons_putc_locked(ch);
+}
+
+static void _iolog_logputc(int ch, void *arg __unused)
+{
+    log_putc_locked(ch);
 }
 
 void IOLog(const char *format, ...)
 {
-	va_list ap;
+    va_list ap;
 
-	va_start(ap, format);
-	__doprnt(format, ap, _iolog_putc, NULL, 16);
-	va_end(ap);
+    va_start(ap, format);
+    IOLogv(format, ap);
+    va_end(ap);
 }
 
 void IOLogv(const char *format, va_list ap)
 {
-	__doprnt(format, ap, _iolog_putc, NULL, 16);
+    va_list ap2;
+
+    va_copy(ap2, ap);
+
+    bsd_log_lock();
+    __doprnt(format, ap, _iolog_logputc, NULL, 16);
+    bsd_log_unlock();
+
+    __doprnt(format, ap2, _iolog_consputc, NULL, 16);
 }
 
 #if !__LP64__
diff --git a/iokit/Kernel/IOMemoryCursor.cpp b/iokit/Kernel/IOMemoryCursor.cpp
index 36a15009d..99999991d 100644
--- a/iokit/Kernel/IOMemoryCursor.cpp
+++ b/iokit/Kernel/IOMemoryCursor.cpp
@@ -325,66 +325,3 @@ IOLittleMemoryCursor::initWithSpecification(IOPhysicalLength inMaxSegmentSize,
                                         inMaxTransferSize,
                                         inAlignment);
 }
-
-/************************* class IODBDMAMemoryCursor *************************/
-
-#if defined(__ppc__)
-
-#include <IOKit/ppc/IODBDMA.h>
-
-#undef super
-#define super IOMemoryCursor
-OSDefineMetaClassAndStructors(IODBDMAMemoryCursor, IOMemoryCursor)
-
-void 
-IODBDMAMemoryCursor::outputSegment(PhysicalSegment inSegment,
-                                   void *	   inSegments,
-                                   UInt32	   inSegmentIndex)
-{
-    IODBDMADescriptor *segment;
-
-    segment = &((IODBDMADescriptor *) inSegments)[inSegmentIndex];
-
-    // Write location into address field
-    OSWriteSwapInt32((UInt32 *) segment, 4, inSegment.location);
-
-    // Write count into 1st two bytes of operation field.
-    // DO NOT touch rest of operation field as it should contain a STOP command.
-    OSWriteSwapInt16((UInt16 *) segment, 0, inSegment.length);
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-IODBDMAMemoryCursor *
-IODBDMAMemoryCursor::withSpecification(IOPhysicalLength inMaxSegmentSize,
-                                       IOPhysicalLength inMaxTransferSize,
-                                       IOPhysicalLength inAlignment)
-{
-    IODBDMAMemoryCursor *me = new IODBDMAMemoryCursor;
-
-    if (me && !me->initWithSpecification(inMaxSegmentSize,
-                                         inMaxTransferSize,
-                                         inAlignment))
-    {
-        me->release();
-        return 0;
-    }
-
-    return me;
-}
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-bool
-IODBDMAMemoryCursor::initWithSpecification(IOPhysicalLength inMaxSegmentSize,
-                                           IOPhysicalLength inMaxTransferSize,
-                                           IOPhysicalLength inAlignment)
-{
-    return super::initWithSpecification(&IODBDMAMemoryCursor::outputSegment,
-                                        inMaxSegmentSize,
-                                        inMaxTransferSize,
-                                        inAlignment);
-}
-
-#endif /* defined(__ppc__) */
-
diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp
index a46021ede..0a11064a1 100644
--- a/iokit/Kernel/IOMemoryDescriptor.cpp
+++ b/iokit/Kernel/IOMemoryDescriptor.cpp
@@ -71,26 +71,8 @@ __BEGIN_DECLS
 #include <vm/vm_protos.h>
 
 extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
-void ipc_port_release_send(ipc_port_t port);
-
-/* Copy between a physical page and a virtual address in the given vm_map */
-kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int which);
-
-memory_object_t
-device_pager_setup(
-	memory_object_t	pager,
-	uintptr_t		device_handle,
-	vm_size_t	size,
-	int		flags);
-void
-device_pager_deallocate(
-        memory_object_t);
-kern_return_t
-device_pager_populate_object(
-	memory_object_t		pager,
-	vm_object_offset_t	offset,
-	ppnum_t			phys_addr,
-	vm_size_t		size);
+extern void ipc_port_release_send(ipc_port_t port);
+
 kern_return_t
 memory_object_iopl_request(
 	ipc_port_t		port,
@@ -172,8 +154,8 @@ struct ioGMDData {
     // align arrays to 8 bytes so following macros work
     unsigned int fPad;
 #endif
-    upl_page_info_t fPageList[];
-    ioPLBlock fBlocks[];
+    upl_page_info_t fPageList[1]; /* variable length */
+    ioPLBlock fBlocks[1]; /* variable length */
 };
 
 #define getDataP(osd)	((ioGMDData *) (osd)->getBytesNoCopy())
@@ -182,7 +164,7 @@ struct ioGMDData {
     (((osd)->getLength() - ((char *) getIOPLList(d) - (char *) d)) / sizeof(ioPLBlock))
 #define getPageList(d)	(&(d->fPageList[0]))
 #define computeDataSize(p, u) \
-    (sizeof(ioGMDData) + p * sizeof(upl_page_info_t) + u * sizeof(ioPLBlock))
+    (offsetof(ioGMDData, fPageList) + p * sizeof(upl_page_info_t) + u * sizeof(ioPLBlock))
 
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -306,18 +288,7 @@ IOMemoryDescriptor::withPhysicalAddress(
 				IOByteCount		length,
 				IODirection      	direction )
 {
-#ifdef __LP64__
     return (IOMemoryDescriptor::withAddressRange(address, length, direction, TASK_NULL));
-#else /* !__LP64__ */
-    IOGeneralMemoryDescriptor *self = new IOGeneralMemoryDescriptor;
-    if (self
-    && !self->initWithPhysicalAddress(address, length, direction)) {
-        self->release();
-        return 0;
-    }
-
-    return self;
-#endif /* !__LP64__ */
 }
 
 #ifndef __LP64__
@@ -500,9 +471,7 @@ void *IOGeneralMemoryDescriptor::createNamedEntry()
 
     memory_object_size_t  actualSize = size;
     vm_prot_t             prot       = VM_PROT_READ;
-#if CONFIG_EMBEDDED
     if (kIODirectionOut != (kIODirectionOutIn & _flags))
-#endif
 	prot |= VM_PROT_WRITE;
 
     if (_memEntry)
@@ -630,6 +599,17 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
 {
     IOOptionBits type = options & kIOMemoryTypeMask;
 
+#ifndef __LP64__
+    if (task
+        && (kIOMemoryTypeVirtual == type)
+        && vm_map_is_64bit(get_task_map(task)) 
+        && ((IOVirtualRange *) buffers)->address)
+    {
+        OSReportWithBacktrace("IOMemoryDescriptor: attempt to create 32b virtual in 64b task, use ::withAddressRange()");
+        return false;
+    }
+#endif /* !__LP64__ */
+
     // Grab the original MD's configuation data to initialse the
     // arguments to this function.
     if (kIOMemoryTypePersistentMD == type) {
@@ -644,10 +624,10 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
 	    return false;
 
 	_memEntry = initData->fMemEntry;	// Grab the new named entry
-	options = orig->_flags | kIOMemoryAsReference; 
-	_singleRange = orig->_singleRange;	// Initialise our range
-	buffers = &_singleRange;
-	count = 1;
+	options = orig->_flags & ~kIOMemoryAsReference; 
+        type = options & kIOMemoryTypeMask;
+	buffers = orig->_ranges.v;
+	count = orig->_rangesCount;
 
 	// Now grab the original task and whatever mapper was previously used
 	task = orig->_task;
@@ -665,16 +645,6 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
         assert(task);
         if (!task)
             return false;
-
-#ifndef __LP64__
-	if (vm_map_is_64bit(get_task_map(task)) 
-	    && (kIOMemoryTypeVirtual == type) 
-	    && ((IOVirtualRange *) buffers)->address)
-	{
-	    OSReportWithBacktrace("IOMemoryDescriptor: attempt to create 32b virtual in 64b task, use ::withAddressRange()");
-            return false;
-	}
-#endif /* !__LP64__ */
 	break;
 
     case kIOMemoryTypePhysical:		// Neither Physical nor UPL should have a task
@@ -721,7 +691,10 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
 	}
 
 	if (_memEntry)
-	    { ipc_port_release_send((ipc_port_t) _memEntry); _memEntry = 0; }
+	{
+	    ipc_port_release_send((ipc_port_t) _memEntry);
+	    _memEntry = 0;
+	}
 	if (_mappings)
 	    _mappings->flushCollection();
     }
@@ -782,7 +755,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
         else if (!_memoryEntries->initWithCapacity(dataSize))
             return false;
 
-        _memoryEntries->appendBytes(0, sizeof(ioGMDData));
+        _memoryEntries->appendBytes(0, computeDataSize(0, 0));
         dataP = getDataP(_memoryEntries);
         dataP->fMapper = mapper;
         dataP->fPageCnt = 0;
@@ -794,6 +767,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
 
         ioPLBlock iopl;
         iopl.fIOPL = (upl_t) buffers;
+        upl_set_referenced(iopl.fIOPL, true);
         upl_page_info_t *pageList = UPL_GET_INTERNAL_PAGE_LIST(iopl.fIOPL);
 
 	if (upl_get_size(iopl.fIOPL) < (count + offset))
@@ -853,7 +827,8 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
 	      case kIOMemoryTypeVirtual64:
 	      case kIOMemoryTypePhysical64:
 		if (count == 1
-		    && (((IOAddressRange *) buffers)->address + ((IOAddressRange *) buffers)->length) <= 0x100000000ULL) {
+		    && (((IOAddressRange *) buffers)->address + ((IOAddressRange *) buffers)->length) <= 0x100000000ULL
+		    ) {
 		    if (kIOMemoryTypeVirtual64 == type)
 			type = kIOMemoryTypeVirtual;
 		    else
@@ -931,7 +906,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void *	buffers,
             else if (!_memoryEntries->initWithCapacity(dataSize))
                 return false;
     
-            _memoryEntries->appendBytes(0, sizeof(ioGMDData));
+            _memoryEntries->appendBytes(0, computeDataSize(0, 0));
             dataP = getDataP(_memoryEntries);
             dataP->fMapper = mapper;
             dataP->fPageCnt = _pages;
@@ -1183,11 +1158,7 @@ IOGeneralMemoryDescriptor::getPreparationID( void )
 
     if (kIOPreparationIDUnprepared == dataP->fPreparationID)
     {
-#if defined(__ppc__ )
-    	dataP->fPreparationID = gIOMDPreparationID++;
-#else
 	dataP->fPreparationID = OSIncrementAtomic64(&gIOMDPreparationID);
-#endif
     }
     return (dataP->fPreparationID);
 }
@@ -1397,7 +1368,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *
 	IOPhysicalAddress pageAddr = pageList[pageInd].phys_addr;
 	if (!pageAddr) {
 	    panic("!pageList phys_addr");
-    }
+	}
 
 	address = ptoa_64(pageAddr) + offset;
 
@@ -1911,13 +1882,8 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options,
     return (remaining ? kIOReturnUnderrun : kIOReturnSuccess);
 }
 
-#if	defined(__ppc__) || defined(__arm__)
-extern vm_offset_t		static_memory_end;
-#define io_kernel_static_end	static_memory_end
-#else
 extern vm_offset_t		first_avail;
 #define io_kernel_static_end	first_avail
-#endif
 
 static kern_return_t
 io_get_kernel_static_upl(
@@ -2118,12 +2084,16 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection)
             iopl.fIOMDOffset = mdOffset;
             iopl.fPageInfo = pageIndex;
 
+#if 0
+	    // used to remove the upl for auto prepares here, for some errant code
+	    // that freed memory before the descriptor pointing at it
 	    if ((_flags & kIOMemoryAutoPrepare) && iopl.fIOPL)
 	    {
 		upl_commit(iopl.fIOPL, 0, 0);
 		upl_deallocate(iopl.fIOPL);
 		iopl.fIOPL = 0;
 	    }
+#endif
 
             if (!_memoryEntries->appendBytes(&iopl, sizeof(iopl))) {
                 // Clean up partial created and unsaved iopl
@@ -2169,7 +2139,7 @@ abortExit:
              upl_deallocate(ioplList[range].fIOPL);
 	    }
 	}
-	(void) _memoryEntries->initWithBytes(dataP, sizeof(ioGMDData)); // == setLength()
+	(void) _memoryEntries->initWithBytes(dataP, computeDataSize(0, 0)); // == setLength()
 
         if (mapper && mapBase)
             mapper->iovmFree(mapBase, _pages);
@@ -2231,7 +2201,7 @@ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection)
  * issued; the prepare() and complete() must occur in pairs, before
  * before and after an I/O transfer involving pageable memory.
  */
- 
+
 IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */)
 {
     IOOptionBits type = _flags & kIOMemoryTypeMask;
@@ -2273,8 +2243,11 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */)
 			 upl_commit(ioplList[ind].fIOPL, 0, 0);
 			 upl_deallocate(ioplList[ind].fIOPL);
 		    }
+	    } else if (kIOMemoryTypeUPL == type) {
+		upl_set_referenced(ioplList[0].fIOPL, false);
 	    }
-	    (void) _memoryEntries->initWithBytes(dataP, sizeof(ioGMDData)); // == setLength()
+
+	    (void) _memoryEntries->initWithBytes(dataP, computeDataSize(0, 0)); // == setLength()
 
 	    dataP->fPreparationID = kIOPreparationIDUnprepared;
 	}
@@ -3328,13 +3301,12 @@ IOMemoryMap * IOMemoryMap::copyCompatible(
     retain();
     if( (fLength == _length) && (!_offset))
     {
-	newMapping->release();
 	newMapping = this;
     }
     else
     {
 	newMapping->fSuperMap = this;
-	newMapping->fOffset   = _offset;
+	newMapping->fOffset   = fOffset + _offset;
 	newMapping->fAddress  = fAddress + _offset;
     }
 
@@ -3608,7 +3580,14 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping(
 		iter->release();
 	    }
 	    if (result || (options & kIOMapReference))
+	    {
+	        if (result != mapping)
+	        {
+                    mapping->release();
+                    mapping = NULL;
+                }
 		continue;
+	    }
 	}
 
 	if (!mapDesc)
diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp
index 4c51e4457..85ac1a2ec 100644
--- a/iokit/Kernel/IONVRAM.cpp
+++ b/iokit/Kernel/IONVRAM.cpp
@@ -39,7 +39,6 @@
 #define kIONVRAMPrivilege	kIOClientPrivilegeAdministrator
 //#define kIONVRAMPrivilege	kIOClientPrivilegeLocalUser
 
-
 OSDefineMetaClassAndStructors(IODTNVRAM, IOService);
 
 bool IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane)
@@ -205,6 +204,9 @@ void IODTNVRAM::registerNVRAMController(IONVRAMController *nvram)
     _piImage = _nvramImage + _piPartitionOffset;
   }
   
+  _lastDeviceSync = 0;
+  _freshInterval = TRUE;		// we will allow sync() even before the first 15 minutes have passed.
+
   initOFVariables();
 }
 
@@ -229,27 +231,31 @@ bool IODTNVRAM::serializeProperties(OSSerialize *s) const
   OSDictionary         *dict = 0, *tmpDict = 0;
   OSCollectionIterator *iter = 0;
   
-  if (_ofDict == 0) return false;
-  
   // Verify permissions.
   hasPrivilege = (kIOReturnSuccess == IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege));
 
   tmpDict = OSDictionary::withCapacity(1);
   if (tmpDict == 0) return false;
+
+  if (_ofDict == 0) {
+    /* No nvram. Return an empty dictionary. */
+    dict = tmpDict;
+  } else {
+    /* Copy properties with client privilege. */
+    iter = OSCollectionIterator::withCollection(_ofDict);
+    if (iter == 0) return false;
     
-  iter = OSCollectionIterator::withCollection(_ofDict);
-  if (iter == 0) return false;
-    
-  while (1) {
-    key = OSDynamicCast(OSSymbol, iter->getNextObject());
-    if (key == 0) break;
+    while (1) {
+      key = OSDynamicCast(OSSymbol, iter->getNextObject());
+      if (key == 0) break;
       
-    variablePerm = getOFVariablePerm(key);
-    if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) &&
-	( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {
-      tmpDict->setObject(key, _ofDict->getObject(key));
+      variablePerm = getOFVariablePerm(key);
+      if ((hasPrivilege || (variablePerm != kOFVariablePermRootOnly)) &&
+	  ( ! (variablePerm == kOFVariablePermKernelOnly && current_task() != kernel_task) )) {
+	tmpDict->setObject(key, _ofDict->getObject(key));
+      }
+      dict = tmpDict;
     }
-    dict = tmpDict;
   }
 
   result = dict->serialize(s);
@@ -412,18 +418,32 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties)
     if (object == 0) continue;
     
     if (key->isEqualTo(kIONVRAMDeletePropertyKey)) {
-      tmpStr = OSDynamicCast(OSString, object);
-      if (tmpStr != 0) {
-	key = OSSymbol::withString(tmpStr);
-	removeProperty(key);
-	key->release();
-	result = true;
-      } else {
-	result = false;
-      }
-    } else {
-      result = setProperty(key, object);
+		tmpStr = OSDynamicCast(OSString, object);
+		if (tmpStr != 0) {
+			key = OSSymbol::withString(tmpStr);
+			removeProperty(key);
+			key->release();
+			result = true;
+		} else {
+			result = false;
+		}
+    } else if(key->isEqualTo(kIONVRAMSyncNowPropertyKey)) {
+		tmpStr = OSDynamicCast(OSString, object);
+		if (tmpStr != 0) {
+
+			result = true; // We are not going to gaurantee sync, this is best effort
+
+			if(safeToSync())
+				sync();
+
+		} else {
+			result = false;
+		}
+	}
+	else {
+		result = setProperty(key, object);
     }
+
   }
   
   iter->release();
@@ -1656,3 +1676,26 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry,
 
   return ok ? kIOReturnSuccess : kIOReturnNoMemory;
 }
+
+bool IODTNVRAM::safeToSync(void)
+{
+    AbsoluteTime delta;
+    UInt64       delta_ns;
+    SInt32       delta_secs;
+	
+	// delta interval went by
+	clock_get_uptime(&delta);
+	
+    // Figure it in seconds.
+    absolutetime_to_nanoseconds(delta, &delta_ns);
+    delta_secs = (SInt32)(delta_ns / NSEC_PER_SEC);
+
+	if ((delta_secs > (_lastDeviceSync + MIN_SYNC_NOW_INTERVAL)) || _freshInterval)
+	{
+		_lastDeviceSync = delta_secs;
+		_freshInterval = FALSE;
+		return TRUE;
+	}
+
+	return FALSE;
+}
diff --git a/iokit/Kernel/IOPMPowerSource.cpp b/iokit/Kernel/IOPMPowerSource.cpp
index e6a11fc07..614f4caa3 100644
--- a/iokit/Kernel/IOPMPowerSource.cpp
+++ b/iokit/Kernel/IOPMPowerSource.cpp
@@ -165,7 +165,6 @@ void IOPMPowerSource::updateStatus (void)
 void IOPMPowerSource::setPSProperty(const OSSymbol *key, OSObject *val)
 {
     OSObject    *lastVal;
-    OSNumber    *newNumVal;
 
     if(!key || !val) return;
 
@@ -175,19 +174,12 @@ void IOPMPowerSource::setPSProperty(const OSSymbol *key, OSObject *val)
     // Otherwise, just compare pointers.
     
     if( (lastVal = properties->getObject(key)) ) {
-        newNumVal = OSDynamicCast(OSNumber, val);
-        if(newNumVal) {
-            if(newNumVal->isEqualTo(lastVal)) {
-                // settings didn't change
-            } else {
-                // num val is not equal to last val
-                settingsChangedSinceUpdate = true;
-            }
-        } else {
-            // pointer compare as last resort
-            if(lastVal != val)
-                settingsChangedSinceUpdate = true;        
-        }
+	if(val->isEqualTo(lastVal)) {
+	    // settings didn't change
+	} else {
+	    // num val is not equal to last val
+	    settingsChangedSinceUpdate = true;
+	}
     } else {
         // new setting; no last value
         settingsChangedSinceUpdate = true;
diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp
index 3ccda1a1b..e6146bb24 100644
--- a/iokit/Kernel/IOPMrootDomain.cpp
+++ b/iokit/Kernel/IOPMrootDomain.cpp
@@ -27,6 +27,7 @@
  */
 #include <libkern/c++/OSKext.h>
 #include <libkern/c++/OSMetaClass.h>
+#include <libkern/OSAtomic.h>
 #include <libkern/OSDebug.h>
 #include <IOKit/IOWorkLoop.h>
 #include <IOKit/IOCommandGate.h>
@@ -43,7 +44,6 @@
 #include "IOKit/pwr_mgt/IOPowerConnection.h"
 #include "IOPMPowerStateQueue.h"
 #include <IOKit/IOCatalogue.h>
-#include <IOKit/IOCommand.h>    // IOServicePMPrivate
 #if HIBERNATION
 #include <IOKit/IOHibernatePrivate.h>
 #endif
@@ -64,64 +64,98 @@ __END_DECLS
 #endif
 
 #define kIOPMrootDomainClass    "IOPMrootDomain"
+#define LOG_PREFIX              "PMRD: "
 
-#define LOG_PREFIX  "PMRD: "
+#define MSG(x...) \
+    do { kprintf(LOG_PREFIX x); IOLog(x); } while (false)
 
-#define LOG(x...)   do { \
-    kprintf(LOG_PREFIX x); IOLog(x); } while (false)
-
-#define KLOG(x...)  do { \
-    kprintf(LOG_PREFIX x); } while (false)
+#define LOG(x...)    \
+    do { kprintf(LOG_PREFIX x); } while (false)
 
 #define DLOG(x...)  do { \
 	if (kIOLogPMRootDomain & gIOKitDebug) \
         kprintf(LOG_PREFIX x); } while (false)
 
+#define _LOG(x...)
+
 #define CHECK_THREAD_CONTEXT
 #ifdef  CHECK_THREAD_CONTEXT
 static IOWorkLoop * gIOPMWorkLoop = 0;
-#define ASSERT_GATED(x)                                     \
+#define ASSERT_GATED()                                      \
 do {                                                        \
     if (gIOPMWorkLoop && gIOPMWorkLoop->inGate() != true) { \
-        panic("RootDomain: not inside PM gate");               \
+        panic("RootDomain: not inside PM gate");            \
     }                                                       \
 } while(false)
 #else
-#define ASSERT_GATED(x)
+#define ASSERT_GATED()
 #endif /* CHECK_THREAD_CONTEXT */
 
+#define CAP_LOSS(c)  \
+        (((_pendingCapability & (c)) == 0) && \
+         ((_currentCapability & (c)) != 0))
+
+#define CAP_GAIN(c)  \
+        (((_currentCapability & (c)) == 0) && \
+         ((_pendingCapability & (c)) != 0))
+
+#define CAP_CHANGE(c)    \
+        (((_currentCapability ^ _pendingCapability) & (c)) != 0)
+
+#define CAP_CURRENT(c)  \
+        ((_currentCapability & (c)) != 0)
+
+#define CAP_HIGHEST(c)  \
+        ((_highestCapability & (c)) != 0)
+
+#define DARK_TO_FULL_EVALUATE_CLAMSHELL     0
+
 // Event types for IOPMPowerStateQueue::submitPowerEvent()
 enum {
-    kPowerEventFeatureChanged = 1,
-    kPowerEventReceivedPowerNotification,
-    kPowerEventSystemBootCompleted,
-    kPowerEventSystemShutdown,
-    kPowerEventUserDisabledSleep,
-    kPowerEventConfigdRegisteredInterest,
-    kPowerEventAggressivenessChanged,
-    kPowerEventAssertionCreate,                 // 8
-    kPowerEventAssertionRelease,                // 9
-    kPowerEventAssertionSetLevel                // 10
+    kPowerEventFeatureChanged = 1,              // 1
+    kPowerEventReceivedPowerNotification,       // 2
+    kPowerEventSystemBootCompleted,             // 3
+    kPowerEventSystemShutdown,                  // 4
+    kPowerEventUserDisabledSleep,               // 5
+    kPowerEventRegisterSystemCapabilityClient,  // 6
+    kPowerEventRegisterKernelCapabilityClient,  // 7
+    kPowerEventPolicyStimulus,                  // 8
+    kPowerEventAssertionCreate,                 // 9
+    kPowerEventAssertionRelease,                // 10
+    kPowerEventAssertionSetLevel,               // 11
+    kPowerEventQueueSleepWakeUUID,              // 12
+    kPowerEventPublishSleepWakeUUID             // 13
+};
+
+// For evaluatePolicy()
+// List of stimuli that affects the root domain policy.
+enum {
+    kStimulusDisplayWranglerSleep,      // 0
+    kStimulusDisplayWranglerWake,       // 1
+    kStimulusAggressivenessChanged,     // 2
+    kStimulusDemandSystemSleep,         // 3
+    kStimulusAllowSystemSleepChanged,   // 4
+    kStimulusDarkWakeActivityTickle,    // 5
+    kStimulusDarkWakeEntry,             // 6
+    kStimulusDarkWakeReentry,           // 7
+    kStimulusDarkWakeEvaluate           // 8
 };
 
 extern "C" {
 IOReturn OSKextSystemSleepOrWake( UInt32 );
 }
 
-extern const IORegistryPlane * gIOPowerPlane;
-
 static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t );
-static void wakeupClamshellTimerExpired( thread_call_param_t us, thread_call_param_t );
 static void notifySystemShutdown( IOService * root, unsigned long event );
-static bool clientMessageFilter( OSObject * object, void * context );
-static void handleAggressivesFunction( thread_call_param_t param1, thread_call_param_t param2 );
+static void handleAggressivesFunction( thread_call_param_t, thread_call_param_t );
 static void pmEventTimeStamp(uint64_t *recordTS);
 
 // "IOPMSetSleepSupported"  callPlatformFunction name
 static const OSSymbol *sleepSupportedPEFunction = NULL;
 static const OSSymbol *sleepMessagePEFunction   = NULL;
 
-#define kIOSleepSupportedKey  "IOSleepSupported"
+#define kIOSleepSupportedKey        "IOSleepSupported"
+#define kIOPMSystemCapabilitiesKey  "System Capabilities"
 
 #define kRD_AllPowerSources (kIOPMSupportedOnAC \
                            | kIOPMSupportedOnBatt \
@@ -137,91 +171,36 @@ enum
 #define kLocalEvalClamshellCommand        (1 << 15)
 
 enum {
-    OFF_STATE       = 0,
-    RESTART_STATE   = 1,
-    SLEEP_STATE     = 2,
-    DOZE_STATE      = 3,
-    ON_STATE        = 4,
+    OFF_STATE           = 0,
+    RESTART_STATE       = 1,
+    SLEEP_STATE         = 2,
+    ON_STATE            = 3,
     NUM_POWER_STATES
 };
 
 #define ON_POWER        kIOPMPowerOn
 #define RESTART_POWER   kIOPMRestart
 #define SLEEP_POWER     kIOPMAuxPowerOn
-#define DOZE_POWER      kIOPMDoze
 
 static IOPMPowerState ourPowerStates[NUM_POWER_STATES] =
 {
     {1, 0,                      0,              0,             0,0,0,0,0,0,0,0},
     {1, kIOPMRestartCapability,	kIOPMRestart,	RESTART_POWER, 0,0,0,0,0,0,0,0},	
     {1, kIOPMSleepCapability,   kIOPMSleep,     SLEEP_POWER,   0,0,0,0,0,0,0,0},
-    {1, kIOPMDoze,              kIOPMDoze,      DOZE_POWER,    0,0,0,0,0,0,0,0},
     {1, kIOPMPowerOn,           kIOPMPowerOn,   ON_POWER,      0,0,0,0,0,0,0,0}
 };
 
-// Clients eligible to receive system power messages.
-enum {
-    kMessageClientNone = 0,
-    kMessageClientAll,
-    kMessageClientConfigd       
-};
-
-// Run states (R-state) defined within the ON power state.
-enum {
-    kRStateNormal = 0,
-    kRStateDark,
-    kRStateMaintenance,
-    kRStateCount
-};
-
-// IOService in power plane can be tagged with following flags.
-enum {
-	kServiceFlagGraphics    = 0x01,
-	kServiceFlagNoPowerUp   = 0x02,
-    kServiceFlagTopLevelPCI = 0x04
-};
-
-// Flags describing R-state features and capabilities.
-enum {
-    kRStateFlagNone             = 0x00000000,
-    kRStateFlagSuppressGraphics = 0x00000001,
-    kRStateFlagSuppressMessages = 0x00000002,
-    kRStateFlagSuppressPCICheck = 0x00000004,
-    kRStateFlagDisableIdleSleep = 0x00000008
-};
-
-#if ROOT_DOMAIN_RUN_STATES
-
-// Table of flags for each R-state.
-static uint32_t gRStateFlags[ kRStateCount ] =
-{
-    kRStateFlagNone,
-
-    /* Dark wake */
-    kRStateFlagSuppressGraphics,
-
-    /* Maintenance wake */
-    kRStateFlagSuppressGraphics |
-    kRStateFlagSuppressMessages |
-    kRStateFlagSuppressPCICheck |
-    kRStateFlagDisableIdleSleep
-};
-
-static IONotifier *     gConfigdNotifier = 0;
-
-#define kIOPMRootDomainRunStateKey          "Run State"
 #define kIOPMRootDomainWakeTypeMaintenance  "Maintenance"
 #define kIOPMRootDomainWakeTypeSleepTimer   "SleepTimer"
 #define kIOPMrootDomainWakeTypeLowBattery   "LowBattery"
-
-#endif /* ROOT_DOMAIN_RUN_STATES */
+#define kIOPMRootDomainWakeTypeUser         "User"
+#define kIOPMRootDomainWakeTypeAlarm        "Alarm"
+#define kIOPMRootDomainWakeTypeNetwork      "Network"
 
 // Special interest that entitles the interested client from receiving
-// all system messages. Used by pmconfigd to support maintenance wake.
+// all system messages. Only used by powerd.
 //
-#define kIOPMPrivilegedPowerInterest        "IOPMPrivilegedPowerInterest"
-
-static IONotifier *     gSysPowerDownNotifier = 0;
+#define kIOPMSystemCapabilityInterest       "IOPMSystemCapabilityInterest"
 
 /*
  * Aggressiveness
@@ -231,8 +210,6 @@ static IONotifier *     gSysPowerDownNotifier = 0;
 
 #define kAggressivesMinValue    1
 
-static uint32_t gAggressivesState = 0;
-
 enum {
     kAggressivesStateBusy           = 0x01,
     kAggressivesStateQuickSpindown  = 0x02
@@ -269,14 +246,28 @@ enum {
 enum {
     kAggressivesRecordFlagModified         = 0x00000001,
     kAggressivesRecordFlagMinValue         = 0x00000002
-    
+};
+
+// gDarkWakeFlags
+enum {
+    kDarkWakeFlagHIDTickleEarly      = 0x01, // hid tickle before gfx suppression
+    kDarkWakeFlagHIDTickleLate       = 0x02, // hid tickle after gfx suppression
+    kDarkWakeFlagHIDTickleNone       = 0x03, // hid tickle is not posted
+    kDarkWakeFlagHIDTickleMask       = 0x03,
+    kDarkWakeFlagIgnoreDiskIOInDark  = 0x04, // ignore disk idle in DW
+    kDarkWakeFlagIgnoreDiskIOAlways  = 0x08, // always ignore disk idle
+    kDarkWakeFlagIgnoreDiskIOMask    = 0x0C,
+    kDarkWakeFlagAlarmIsDark         = 0x0100
 };
 
 static IOPMrootDomain * gRootDomain;
+static IONotifier *     gSysPowerDownNotifier = 0;
 static UInt32           gSleepOrShutdownPending = 0;
 static UInt32           gWillShutdown = 0;
-static uint32_t         gMessageClientType = kMessageClientNone;
+static UInt32           gPagingOff = 0;
 static UInt32           gSleepWakeUUIDIsSet = false;
+static uint32_t         gAggressivesState = 0;
+static uint32_t         gDarkWakeFlags = kDarkWakeFlagHIDTickleNone;
 
 struct timeval gIOLastSleepTime;
 struct timeval gIOLastWakeTime;
@@ -293,29 +284,173 @@ const OSSymbol *gIOPMStatsApplicationResponseTimedOut;
 const OSSymbol *gIOPMStatsApplicationResponseCancel;
 const OSSymbol *gIOPMStatsApplicationResponseSlow;
 
+/*
+ * PMSettingHandle
+ * Opaque handle passed to clients of registerPMSettingController()
+ */
+class PMSettingHandle : public OSObject
+{
+    OSDeclareFinalStructors( PMSettingHandle )
+    friend class PMSettingObject;
+
+private:
+    PMSettingObject *pmso;
+    void free(void);
+};
+
+/*
+ * PMSettingObject
+ * Internal object to track each PM setting controller
+ */
 class PMSettingObject : public OSObject
 {
-    OSDeclareFinalStructors(PMSettingObject)
+    OSDeclareFinalStructors( PMSettingObject )
+    friend class IOPMrootDomain;
+
 private:
+    queue_head_t                    calloutQueue;
+    thread_t                        waitThread;
     IOPMrootDomain                  *parent;
+    PMSettingHandle                 *pmsh;
     IOPMSettingControllerCallback   func;
     OSObject                        *target;
     uintptr_t                       refcon;
     uint32_t                        *publishedFeatureID;
-    int                             releaseAtCount;
+    uint32_t                        settingCount;
+    bool                            disabled;
+
+    void free(void);
+
 public:
     static PMSettingObject *pmSettingObject(
-                IOPMrootDomain      *parent_arg,
+                IOPMrootDomain                  *parent_arg,
                 IOPMSettingControllerCallback   handler_arg,
-                OSObject    *target_arg,
-                uintptr_t   refcon_arg,
-                uint32_t    supportedPowerSources,
-                const OSSymbol *settings[]);
+                OSObject                        *target_arg,
+                uintptr_t                       refcon_arg,
+                uint32_t                        supportedPowerSources,
+                const OSSymbol                  *settings[],
+                OSObject                        **handle_obj);
+
+    void dispatchPMSetting(const OSSymbol *type, OSObject *object);
+    void clientHandleFreed(void);
+};
+
+struct PMSettingCallEntry {
+    queue_chain_t   link;
+    thread_t        thread;
+};
 
-    void setPMSetting(const OSSymbol *type, OSObject *obj);
+#define PMSETTING_LOCK()    IOLockLock(settingsCtrlLock)
+#define PMSETTING_UNLOCK()  IOLockUnlock(settingsCtrlLock)
+#define PMSETTING_WAIT(p)   IOLockSleep(settingsCtrlLock, p, THREAD_UNINT)
+#define PMSETTING_WAKEUP(p) IOLockWakeup(settingsCtrlLock, p, true)
 
-    void taggedRelease(const void *tag, const int when) const;
-    void free(void);
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+/* @class IOPMTimeline
+ * @astract Tracks & records PM activity.
+ * @discussion Intended for use only as a helper-class to IOPMrootDomain.
+ *      Do not subclass or directly invoke iOPMTimeline
+ */
+class IOPMTimeline : public OSObject 
+{
+    OSDeclareDefaultStructors( IOPMTimeline );
+
+public:  
+    static IOPMTimeline* timeline(IOPMrootDomain *root_domain);
+  
+    bool            setProperties(OSDictionary *d);
+    OSDictionary    *copyInfoDictionary(void);
+    
+    IOReturn    recordSystemPowerEvent( PMEventDetails *details );
+                                
+    IOReturn    recordDetailedPowerEvent( PMEventDetails *details );
+
+    IOMemoryDescriptor      *getPMTraceMemoryDescriptor();
+    
+    uint32_t getNumEventsLoggedThisPeriod();    
+    void     setNumEventsLoggedThisPeriod(uint32_t newCount);
+    bool     isSleepCycleInProgress();
+    void     setSleepCycleInProgressFlag(bool flag);
+private:
+    bool        init(void);
+    void        free(void);
+
+    void        setEventsTrackedCount(uint32_t newTracked);
+    void        setEventsRecordingLevel(uint32_t eventsTrackedBits);
+    static uint32_t _atomicIndexIncrement(uint32_t *index, uint32_t limit);
+    
+    enum {
+        kPMTimelineRecordTardyDrivers   = 1 << 0,
+        kPMTmielineRecordSystemEvents   = 1 << 1,
+        kPMTimelineRecordAllDrivers     = 1 << 2,
+        kPMTimelineRecordOff            = 0,
+        kPMTimelineRecordDefault        = 3,
+        kPMTimelineRecordDebug          = 7    
+    };
+
+    // eventsRecordingLevel is a bitfield defining which PM driver events will get logged
+    // into the PM buffer. 
+    uint32_t                    eventsRecordingLevel;
+    
+    // pmTraceMemoryDescriptor represents the memory block that IOPMTimeLine records PM trace points into.
+    IOBufferMemoryDescriptor    *pmTraceMemoryDescriptor;
+
+    // Pointer to starting address in pmTraceMemoryDescriptor
+    IOPMSystemEventRecord       *traceBuffer;
+    IOPMTraceBufferHeader       *hdr;
+
+    uint16_t                    systemState;
+    
+    IOLock                      *logLock;
+    IOPMrootDomain              *owner;
+
+    uint32_t                    numEventsLoggedThisPeriod;
+    bool                        sleepCycleInProgress;
+};
+
+OSDefineMetaClassAndStructors( IOPMTimeline, OSObject )
+
+/*
+ * PMTraceWorker
+ * Internal helper object for logging trace points to RTC
+ * IOPMrootDomain and only IOPMrootDomain should instantiate
+ * exactly one of these.
+ */
+
+typedef void (*IOPMTracePointHandler)(
+        void * target, uint32_t code, uint32_t data );
+
+class PMTraceWorker : public OSObject
+{
+    OSDeclareDefaultStructors(PMTraceWorker)
+public:
+    typedef enum { kPowerChangeStart, kPowerChangeCompleted } change_t;
+
+    static PMTraceWorker        *tracer( IOPMrootDomain * );
+    void                        tracePCIPowerChange(change_t, IOService *, uint32_t, uint32_t);
+    void                        tracePoint(uint8_t phase);
+    void                        tracePoint(uint8_t phase, uint8_t data8);
+    void                        traceDetail(uint32_t detail);
+    void                        traceLoginWindowPhase(uint8_t phase);
+    int                         recordTopLevelPCIDevice(IOService *);
+    void                        RTC_TRACE(void);
+    virtual bool				serialize(OSSerialize *s) const;
+
+    IOPMTracePointHandler       tracePointHandler;
+    void *                      tracePointTarget;
+private:
+    IOPMrootDomain              *owner;
+    IOLock                      *pciMappingLock;
+    OSArray                     *pciDeviceBitMappings;
+
+    uint8_t                     addedToRegistry;
+    uint8_t                     tracePhase;
+    uint8_t                     loginWindowPhase;
+    uint8_t                     traceData8;
+    uint32_t                    traceData32;
 };
 
 /*
@@ -327,7 +462,7 @@ class PMAssertionsTracker : public OSObject
     OSDeclareFinalStructors(PMAssertionsTracker)
 public:
     static PMAssertionsTracker  *pmAssertionsTracker( IOPMrootDomain * );
-
+    
     IOReturn                    createAssertion(IOPMDriverAssertionType, IOPMDriverAssertionLevel, IOService *, const char *, IOPMDriverAssertionID *);
     IOReturn                    releaseAssertion(IOPMDriverAssertionID);
     IOReturn                    setAssertionLevel(IOPMDriverAssertionID, IOPMDriverAssertionLevel);
@@ -353,7 +488,7 @@ private:
         IOService                   *ownerService;
         IOPMDriverAssertionLevel    level;
     } PMAssertStruct;
-    
+
     uint32_t                    tabulateProducerCount;
     uint32_t                    tabulateConsumerCount;
 
@@ -363,52 +498,14 @@ private:
     IOPMrootDomain              *owner;
     OSArray                     *assertionsArray;
     IOLock                      *assertionsArrayLock;
-    IOPMDriverAssertionID       issuingUniqueID;
+    IOPMDriverAssertionID       issuingUniqueID __attribute__((aligned(8))); /* aligned for atomic access */
     IOPMDriverAssertionType     assertionsKernel;
     IOPMDriverAssertionType     assertionsUser;
     IOPMDriverAssertionType     assertionsCombined;
 };
-
+ 
 OSDefineMetaClassAndFinalStructors(PMAssertionsTracker, OSObject);
-
-/*
- * PMTraceWorker
- * Internal helper object for logging trace points to RTC
- * IOPMrootDomain and only IOPMrootDomain should instantiate
- * exactly one of these.
- */
-
-typedef void (*IOPMTracePointHandler)(
-        void * target, uint32_t code, uint32_t data );
-
-class PMTraceWorker : public OSObject
-{
-    OSDeclareDefaultStructors(PMTraceWorker)
-public:
-    typedef enum { kPowerChangeStart, kPowerChangeCompleted } change_t;
-
-    static PMTraceWorker        *tracer( IOPMrootDomain * );
-    void                        tracePCIPowerChange(change_t, IOService *, uint32_t, uint32_t);
-    void                        tracePoint(uint8_t phase);
-    void                        traceLoginWindowPhase(uint8_t phase);
-    int                         recordTopLevelPCIDevice(IOService *);
-    void                        RTC_TRACE(void);
-    virtual bool				serialize(OSSerialize *s) const;
-
-    IOPMTracePointHandler       tracePointHandler;
-    void *                      tracePointTarget;
-private:
-    IOPMrootDomain              *owner;
-    IOLock                      *pciMappingLock;
-    OSArray                     *pciDeviceBitMappings;
-
-    uint8_t                     tracePhase;
-    uint8_t                     loginWindowPhase;
-    uint8_t                     addedToRegistry;
-    uint8_t                     unused0;
-    uint32_t                    pciBusyBitMask;
-};
-
+ 
 /*
  * PMHaltWorker
  * Internal helper object for Shutdown/Restart notifications.
@@ -441,6 +538,19 @@ OSDefineMetaClassAndFinalStructors( PMHaltWorker, OSObject )
 #define super IOService
 OSDefineMetaClassAndFinalStructors(IOPMrootDomain, IOService)
 
+static void IOPMRootDomainWillShutdown(void)
+{
+    if (OSCompareAndSwap(0, 1, &gWillShutdown))
+    {
+	OSKext::willShutdown();
+	for (int i = 0; i < 100; i++)
+	{
+	    if (OSCompareAndSwap(0, 1, &gSleepOrShutdownPending)) break;
+	    IOSleep( 100 );
+	}
+    }
+}
+
 extern "C"
 {
     IONotifier * registerSleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref)
@@ -473,61 +583,60 @@ extern "C"
         return gRootDomain->shutdownSystem();
     }
 
-    void IOSystemShutdownNotification ( void )
+    void IOSystemShutdownNotification(void)
     {
-	if (OSCompareAndSwap(0, 1, &gWillShutdown))
-	{
-	    OSKext::willShutdown();
-	    for (int i = 0; i < 100; i++)
-	    {
-		if (OSCompareAndSwap(0, 1, &gSleepOrShutdownPending)) break;
-		IOSleep( 100 );
-	    }
-	}
+    	IOPMRootDomainWillShutdown();
+		if (OSCompareAndSwap(0, 1, &gPagingOff))
+		{
+#if !CONFIG_EMBEDDED
+			gRootDomain->handlePlatformHaltRestart(kPEPagingOff);
+#endif
+		}
     }
 
     int sync_internal(void);    
 }
 
 /*
-A device is always in the highest power state which satisfies its driver, its policy-maker, and any power domain
-children it has, but within the constraint of the power state provided by its parent.  The driver expresses its desire by
-calling changePowerStateTo(), the policy-maker expresses its desire by calling changePowerStateToPriv(), and the children
-express their desires by calling requestPowerDomainState().
-
-The Root Power Domain owns the policy for idle and demand sleep and doze for the system.  It is a power-managed IOService just
-like the others in the system.  It implements several power states which correspond to what we see as Sleep, Doze, etc.
-
-The sleep/doze policy is as follows:
-Sleep and Doze are prevented if the case is open so that nobody will think the machine is off and plug/unplug cards.
-Sleep and Doze are prevented if the sleep timeout slider in the preferences panel is at zero.
-The system cannot Sleep, but can Doze if some object in the tree is in a power state marked kIOPMPreventSystemSleep.
-
-These three conditions are enforced using the "driver clamp" by calling changePowerStateTo().  For example, if the case is
-opened, changePowerStateTo(ON_STATE) is called to hold the system on regardless of the desires of the children of the root or
-the state of the other clamp.
-
-Demand Sleep/Doze is initiated by pressing the front panel power button, closing the clamshell, or selecting the menu item.
-In this case the root's parent actually initiates the power state change so that the root has no choice and does not give
-applications the opportunity to veto the change.
-
-Idle Sleep/Doze occurs if no objects in the tree are in a state marked kIOPMPreventIdleSleep.  When this is true, the root's
-children are not holding the root on, so it sets the "policy-maker clamp" by calling changePowerStateToPriv(ON_STATE)
-to hold itself on until the sleep timer expires.  This timer is set for the difference between the sleep timeout slider and
-the larger of the display dim timeout slider and the disk spindown timeout slider in the Preferences panel.  For example, if
-the system is set to sleep after thirty idle minutes, and the display and disk are set to sleep after five idle minutes,
-when there is no longer an object in the tree holding the system out of Idle Sleep (via kIOPMPreventIdleSleep), the root
-sets its timer for 25 minutes (30 - 5).  When the timer expires, it releases its clamp and now nothing is holding it awake,
-so it falls asleep.
-
-Demand sleep is prevented when the system is booting.  When preferences are transmitted by the loginwindow at the end of
-boot, a flag is cleared, and this allows subsequent Demand Sleep.
-
-The system will not Sleep, but will Doze if some object calls setSleepSupported(kPCICantSleep) during a power change to the sleep state (this can be done by the PCI Aux Power Supply drivers, Slots99, MacRISC299, etc.).  This is not enforced with
-a clamp, but sets a flag which is noticed before actually sleeping the kernel.  If the flag is set, the root steps up
-one power state from Sleep to Doze, and any objects in the tree for which this is relevent will act appropriately (USB and
-ADB will turn on again so that they can wake the system out of Doze (keyboard/mouse activity will cause the Display Wrangler
-to be tickled)).
+A device is always in the highest power state which satisfies its driver,
+its policy-maker, and any power children it has, but within the constraint
+of the power state provided by its parent.  The driver expresses its desire by
+calling changePowerStateTo(), the policy-maker expresses its desire by calling
+changePowerStateToPriv(), and the children express their desires by calling
+requestPowerDomainState().
+
+The Root Power Domain owns the policy for idle and demand sleep for the system.
+It is a power-managed IOService just like the others in the system.
+It implements several power states which map to what we see as Sleep and On.
+
+The sleep policy is as follows:
+1. Sleep is prevented if the case is open so that nobody will think the machine
+   is off and plug/unplug cards.
+2. Sleep is prevented if the sleep timeout slider in the prefs panel is zero.
+3. System cannot Sleep if some object in the tree is in a power state marked
+   kIOPMPreventSystemSleep.
+
+These three conditions are enforced using the "driver clamp" by calling
+changePowerStateTo(). For example, if the case is opened,
+changePowerStateTo(ON_STATE) is called to hold the system on regardless
+of the desires of the children of the root or the state of the other clamp.
+
+Demand Sleep is initiated by pressing the front panel power button, closing
+the clamshell, or selecting the menu item. In this case the root's parent
+actually initiates the power state change so that the root domain has no
+choice and does not give applications the opportunity to veto the change.
+
+Idle Sleep occurs if no objects in the tree are in a state marked
+kIOPMPreventIdleSleep.  When this is true, the root's children are not holding
+the root on, so it sets the "policy-maker clamp" by calling
+changePowerStateToPriv(ON_STATE) to hold itself on until the sleep timer expires.
+This timer is set for the difference between the sleep timeout slider and the
+display dim timeout slider. When the timer expires, it releases its clamp and
+now nothing is holding it awake, so it falls asleep.
+
+Demand sleep is prevented when the system is booting.  When preferences are
+transmitted by the loginwindow at the end of boot, a flag is cleared,
+and this allows subsequent Demand Sleep.
 */
 
 //******************************************************************************
@@ -547,16 +656,27 @@ IOPMrootDomain * IOPMrootDomain::construct( void )
 
 static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 )
 {
-    IOService       *rootDomain = (IOService *) p0;
-    unsigned long   pmRef = (unsigned long) p1;
+    IOService * rootDomain = (IOService *) p0;
+    uint32_t    notifyRef  = (uint32_t)(uintptr_t) p1;
+    uint32_t    powerState = rootDomain->getPowerState();
 
-    DLOG("disk_sync_callout start\n");
+    DLOG("disk_sync_callout ps=%u\n", powerState);
 
+    if (ON_STATE == powerState)
+    {
 #if	HIBERNATION
-    IOHibernateSystemSleep();
+        IOHibernateSystemSleep();
 #endif
-    sync_internal();
-    rootDomain->allowPowerChange(pmRef);
+        sync_internal();
+    }
+#if	HIBERNATION
+    else
+    {
+        IOHibernateSystemPostWake();
+    }
+#endif
+
+    rootDomain->allowPowerChange(notifyRef);
     DLOG("disk_sync_callout finish\n");
 }
 
@@ -601,11 +721,11 @@ sysctl_sleepwaketime SYSCTL_HANDLER_ARGS
 }
 
 static SYSCTL_PROC(_kern, OID_AUTO, sleeptime,
-	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN,
+	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 	    &gIOLastSleepTime, 0, sysctl_sleepwaketime, "S,timeval", "");
 
 static SYSCTL_PROC(_kern, OID_AUTO, waketime,
-	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN,
+	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 	    &gIOLastWakeTime, 0, sysctl_sleepwaketime, "S,timeval", "");
 
 
@@ -617,7 +737,7 @@ sysctl_willshutdown
     int error = sysctl_io_number(req, gWillShutdown, sizeof(int), &new_value, &changed);
     if (changed) {
 	if (!gWillShutdown && (new_value == 1)) {
-	    IOSystemShutdownNotification();
+	    IOPMRootDomainWillShutdown();
 	} else
 	    error = EINVAL;
     }
@@ -625,7 +745,7 @@ sysctl_willshutdown
 }
 
 static SYSCTL_PROC(_kern, OID_AUTO, willshutdown,
-	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN,
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 	    0, 0, sysctl_willshutdown, "I", "");
 
 #if !CONFIG_EMBEDDED
@@ -661,16 +781,19 @@ sysctl_progressmeter
 }
 
 static SYSCTL_PROC(_kern, OID_AUTO, progressmeterenable,
-	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN,
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 	    0, 0, sysctl_progressmeterenable, "I", "");
 
 static SYSCTL_PROC(_kern, OID_AUTO, progressmeter,
-	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN,
+	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
 	    0, 0, sysctl_progressmeter, "I", "");
 
 #endif
 
+static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, "");
+
 static const OSSymbol * gIOPMSettingAutoWakeSecondsKey;
+static const OSSymbol * gIOPMSettingDebugWakeRelativeKey;
 static const OSSymbol * gIOPMSettingMaintenanceWakeCalendarKey;
 
 //******************************************************************************
@@ -684,11 +807,13 @@ bool IOPMrootDomain::start( IOService * nub )
 {
     OSIterator      *psIterator;
     OSDictionary    *tmpDict;
+    IORootParent *   patriarch;
 
     super::start(nub);
 
     gRootDomain = this;
     gIOPMSettingAutoWakeSecondsKey = OSSymbol::withCString(kIOPMSettingAutoWakeSecondsKey);
+    gIOPMSettingDebugWakeRelativeKey = OSSymbol::withCString(kIOPMSettingDebugWakeRelativeKey);
     gIOPMSettingMaintenanceWakeCalendarKey =
         OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey);
 
@@ -706,7 +831,7 @@ bool IOPMrootDomain::start( IOService * nub )
             OSSymbol::withCString(kIOPMSettingAutoPowerSecondsKey),
             OSSymbol::withCString(kIOPMSettingAutoWakeCalendarKey),
             OSSymbol::withCString(kIOPMSettingAutoPowerCalendarKey),
-            OSSymbol::withCString(kIOPMSettingDebugWakeRelativeKey),
+            gIOPMSettingDebugWakeRelativeKey,
             OSSymbol::withCString(kIOPMSettingDebugPowerRelativeKey),
             OSSymbol::withCString(kIOPMSettingWakeOnRingKey),
             OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey),
@@ -719,28 +844,25 @@ bool IOPMrootDomain::start( IOService * nub )
             OSSymbol::withCString(kIOPMStateConsoleShutdown)
         };
 
+    PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags));
+
     queue_init(&aggressivesQueue);
     aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this);
     aggressivesData = OSData::withCapacity(
                         sizeof(AggressivesRecord) * (kPMLastAggressivenessType + 4));
 
     featuresDictLock = IOLockAlloc();
-    settingsCtrlLock = IORecursiveLockAlloc();
+    settingsCtrlLock = IOLockAlloc();
     setPMRootDomain(this);
     
     extraSleepTimer = thread_call_allocate(
                         idleSleepTimerExpired,
                         (thread_call_param_t) this);
 
-    clamshellWakeupIgnore = thread_call_allocate(
-                        wakeupClamshellTimerExpired,
-                        (thread_call_param_t) this);
-
     diskSyncCalloutEntry = thread_call_allocate(
                         &disk_sync_callout,
                         (thread_call_param_t) this);
     
-    canSleep = true;
     setProperty(kIOSleepSupportedKey, true);
 
     bzero(&pmStats, sizeof(pmStats));
@@ -749,21 +871,27 @@ bool IOPMrootDomain::start( IOService * nub )
 
     pmAssertions = PMAssertionsTracker::pmAssertionsTracker(this);
 
-    updateRunState(kRStateNormal);
     userDisabledAllSleep = false;
-    allowSleep = true;
-    sleepIsSupported = true;
     systemBooting = true;
     sleepSlider = 0;
     idleSleepTimerPending = false;
     wrangler = NULL;
-    sleepASAP = false;
-    clamshellIsClosed = false;
-    clamshellExists = false;
-    ignoringClamshell = true;
-    ignoringClamshellOnWake = false;
+    clamshellClosed    = false;
+    clamshellExists    = false;
+    clamshellDisabled  = true;
     acAdaptorConnected = true;
 
+    // Set the default system capabilities at boot.
+    _currentCapability = kIOPMSystemCapabilityCPU      |
+                         kIOPMSystemCapabilityGraphics |
+                         kIOPMSystemCapabilityAudio    |
+                         kIOPMSystemCapabilityNetwork;
+
+    _pendingCapability = _currentCapability;
+    _desiredCapability = _currentCapability;
+    _highestCapability = _currentCapability;
+    setProperty(kIOPMSystemCapabilitiesKey, _currentCapability, 64);
+
     queuedSleepWakeUUIDString = NULL;
     pmStatsAppResponses     = OSArray::withCapacity(5);
     _statsNameKey           = OSSymbol::withCString(kIOPMStatsNameKey);
@@ -810,10 +938,23 @@ bool IOPMrootDomain::start( IOService * nub )
     patriarch->addPowerChild(this);
 
     registerPowerDriver(this, ourPowerStates, NUM_POWER_STATES);
-
-    // set a clamp until we sleep
     changePowerStateToPriv(ON_STATE);
 
+    if (gIOKitDebug & (kIOLogDriverPower1 | kIOLogDriverPower2))
+    {
+        // Setup our PM logging & recording code
+        timeline = IOPMTimeline::timeline(this);    
+        if (timeline) {
+            OSDictionary *tlInfo = timeline->copyInfoDictionary();
+            
+            if (tlInfo) 
+            {
+                setProperty(kIOPMTimelineDictionaryKey, tlInfo);
+                tlInfo->release();
+            }
+        }
+    }
+
     // install power change handler
     gSysPowerDownNotifier = registerPrioritySleepWakeInterest( &sysPowerDownHandler, this, 0);
 
@@ -823,22 +964,12 @@ bool IOPMrootDomain::start( IOService * nub )
     {
         _displayWranglerNotifier = addMatchingNotification( 
                 gIOPublishNotification, tmpDict, 
-                (IOServiceMatchingNotificationHandler) &displayWranglerPublished,
+                (IOServiceMatchingNotificationHandler) &displayWranglerMatchPublished,
                 this, 0);
         tmpDict->release();
     }
 #endif
 
-    // Battery location published - ApplePMU support only
-    if ((tmpDict = serviceMatching("IOPMPowerSource")))
-    {
-        _batteryPublishNotifier = addMatchingNotification( 
-                gIOPublishNotification, tmpDict, 
-                (IOServiceMatchingNotificationHandler) &batteryPublished,
-                this, this);
-        tmpDict->release();
-    }
-
     const OSSymbol *ucClassName = OSSymbol::withCStringNoCopy("RootDomainUserClient");
     setProperty(gIOUserClientClassKey, (OSObject *) ucClassName);
     ucClassName->release();
@@ -874,7 +1005,6 @@ bool IOPMrootDomain::start( IOService * nub )
     return true;
 }
 
-
 //******************************************************************************
 // setProperties
 //
@@ -888,44 +1018,39 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     OSDictionary    *dict = OSDynamicCast(OSDictionary, props_obj);
     OSBoolean       *b;
     OSNumber        *n;
-    OSString        *str;
+    OSDictionary    *d;
     OSSymbol        *type;
     OSObject        *obj;
     unsigned int    i;
 
-    const OSSymbol *boot_complete_string = 
-                OSSymbol::withCString("System Boot Complete");
-    const OSSymbol *sys_shutdown_string = 
-                OSSymbol::withCString("System Shutdown");
-    const OSSymbol *stall_halt_string = 
-                OSSymbol::withCString("StallSystemAtHalt");
-    const OSSymbol *battery_warning_disabled_string = 
-                OSSymbol::withCString("BatteryWarningsDisabled");
-    const OSSymbol *idle_seconds_string = 
-                OSSymbol::withCString("System Idle Seconds");
+    const OSSymbol *publish_simulated_battery_string    = OSSymbol::withCString("SoftwareSimulatedBatteries");
+    const OSSymbol *boot_complete_string                = OSSymbol::withCString("System Boot Complete");
+    const OSSymbol *sys_shutdown_string                 = OSSymbol::withCString("System Shutdown");
+    const OSSymbol *stall_halt_string                   = OSSymbol::withCString("StallSystemAtHalt");
+    const OSSymbol *battery_warning_disabled_string     = OSSymbol::withCString("BatteryWarningsDisabled");
+    const OSSymbol *idle_seconds_string                 = OSSymbol::withCString("System Idle Seconds");
+    const OSSymbol *sleepdisabled_string                = OSSymbol::withCString("SleepDisabled");
+    const OSSymbol *ondeck_sleepwake_uuid_string        = OSSymbol::withCString(kIOPMSleepWakeUUIDKey);
+    const OSSymbol *loginwindow_tracepoint_string       = OSSymbol::withCString(kIOPMLoginWindowSecurityDebugKey);
+    const OSSymbol *pmTimelineLogging_string            = OSSymbol::withCString(kIOPMTimelineDictionaryKey);
 #if	HIBERNATION
-    const OSSymbol *hibernatemode_string = 
-                OSSymbol::withCString(kIOHibernateModeKey);
-    const OSSymbol *hibernatefile_string = 
-                OSSymbol::withCString(kIOHibernateFileKey);
-    const OSSymbol *hibernatefreeratio_string = 
-                OSSymbol::withCString(kIOHibernateFreeRatioKey);
-    const OSSymbol *hibernatefreetime_string = 
-                OSSymbol::withCString(kIOHibernateFreeTimeKey);
+    const OSSymbol *hibernatemode_string                = OSSymbol::withCString(kIOHibernateModeKey);
+    const OSSymbol *hibernatefile_string                = OSSymbol::withCString(kIOHibernateFileKey);
+    const OSSymbol *hibernatefreeratio_string           = OSSymbol::withCString(kIOHibernateFreeRatioKey);
+    const OSSymbol *hibernatefreetime_string            = OSSymbol::withCString(kIOHibernateFreeTimeKey);
 #endif
-    const OSSymbol *sleepdisabled_string =
-                OSSymbol::withCString("SleepDisabled");
-    const OSSymbol *ondeck_sleepwake_uuid_string =
-                OSSymbol::withCString(kIOPMSleepWakeUUIDKey);
-    const OSSymbol *loginwindow_tracepoint_string = 
-                OSSymbol::withCString(kIOPMLoginWindowSecurityDebugKey);
-                
-    if(!dict) 
+
+    if (!dict) 
     {
         return_value = kIOReturnBadArgument;
         goto exit;
     }
 
+    if ((b = OSDynamicCast(OSBoolean, dict->getObject(publish_simulated_battery_string))))
+    {
+        publishResource(publish_simulated_battery_string, kOSBooleanTrue);
+    }
+
     if ((n = OSDynamicCast(OSNumber, dict->getObject(idle_seconds_string))))
     {
         setProperty(idle_seconds_string, n);
@@ -936,44 +1061,53 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     {
         pmPowerStateQueue->submitPowerEvent( kPowerEventSystemBootCompleted );
     }
-
-    if( battery_warning_disabled_string
-        && dict->getObject(battery_warning_disabled_string))
+    
+    if( battery_warning_disabled_string && dict->getObject(battery_warning_disabled_string))
     {
-        setProperty( battery_warning_disabled_string, 
-                        dict->getObject(battery_warning_disabled_string));
+        setProperty( battery_warning_disabled_string, dict->getObject(battery_warning_disabled_string));
     }
     
-    if( sys_shutdown_string 
-        && (b = OSDynamicCast(OSBoolean, dict->getObject(sys_shutdown_string)))) 
+    if (pmTimelineLogging_string && (d = OSDynamicCast(OSDictionary, dict->getObject(pmTimelineLogging_string))))
+    {
+        if (timeline && timeline->setProperties(d)) 
+        {
+            OSDictionary *tlInfo = timeline->copyInfoDictionary();            
+            if (tlInfo) {
+                setProperty(kIOPMTimelineDictionaryKey, tlInfo);
+                tlInfo->release();
+            }
+        }
+    }
+
+    if( sys_shutdown_string && (b = OSDynamicCast(OSBoolean, dict->getObject(sys_shutdown_string)))) 
     {
         pmPowerStateQueue->submitPowerEvent(kPowerEventSystemShutdown, (void *) b);
     }
     
-    if( stall_halt_string
-        && (b = OSDynamicCast(OSBoolean, dict->getObject(stall_halt_string))) ) 
+    if( stall_halt_string && (b = OSDynamicCast(OSBoolean, dict->getObject(stall_halt_string))) ) 
     {
         setProperty(stall_halt_string, b);
     }
 
 #if	HIBERNATION
     if ( hibernatemode_string
-    && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string))))
+        && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string))))
     {
     	setProperty(hibernatemode_string, n);
     }
     if ( hibernatefreeratio_string
-    && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string))))
+        && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string))))
     {
         setProperty(hibernatefreeratio_string, n);
     }
     if ( hibernatefreetime_string
-    && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string))))
+        && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string))))
     {
         setProperty(hibernatefreetime_string, n);
-    }
+    }    
+    OSString *str;
     if ( hibernatefile_string
-    && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string))))
+        && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string))))
     {
         setProperty(hibernatefile_string, str);
     }
@@ -985,28 +1119,14 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
         setProperty(sleepdisabled_string, b);
         pmPowerStateQueue->submitPowerEvent(kPowerEventUserDisabledSleep, (void *) b);
     }
-    
     if (ondeck_sleepwake_uuid_string
         && (obj = dict->getObject(ondeck_sleepwake_uuid_string)))
     {
-        // Clear the currently published UUID
-        if (kOSBooleanFalse == obj) 
-        {
-            publishSleepWakeUUID(NULL);
+        if(pmPowerStateQueue) {
+            obj->retain();
+            pmPowerStateQueue->submitPowerEvent(kPowerEventQueueSleepWakeUUID, (void *)obj);
         }
 
-        // Cache UUID for an upcoming sleep/wake        
-        if ((str = OSDynamicCast(OSString, obj))) 
-        {
-            if (queuedSleepWakeUUIDString) {
-                queuedSleepWakeUUIDString->release();
-                queuedSleepWakeUUIDString = NULL;
-            }
-            queuedSleepWakeUUIDString = str;
-            queuedSleepWakeUUIDString->retain();
-            DLOG("SleepWake UUID queued: %s\n",
-                queuedSleepWakeUUIDString->getCStringNoCopy());
-        }
     }
     
     if (loginwindow_tracepoint_string
@@ -1024,6 +1144,10 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     {
         setProperty(kIOPMDeepSleepDelayKey, n);
     }
+    if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMDestroyFVKeyOnStandbyKey))))
+    {
+        setProperty(kIOPMDestroyFVKeyOnStandbyKey, b);
+    }
 
     // Relay our allowed PM settings onto our registered PM clients
     for(i = 0; i < allowedPMSettings->getCount(); i++) {
@@ -1034,24 +1158,31 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
         obj = dict->getObject(type);
         if(!obj) continue;
 
-	if ((gIOPMSettingAutoWakeSecondsKey == type) && ((n = OSDynamicCast(OSNumber, obj))))
-	{
-	    UInt32 rsecs = n->unsigned32BitValue();
-	    if (!rsecs)
-		autoWakeStart = autoWakeEnd = 0;
-	    else
-	    {
-		AbsoluteTime deadline;
-		clock_interval_to_deadline(rsecs + kAutoWakePostWindow, kSecondScale, &deadline);
-		autoWakeEnd = AbsoluteTime_to_scalar(&deadline);
-		if (rsecs > kAutoWakePreWindow)
-		    rsecs -= kAutoWakePreWindow;
-		else
-		    rsecs = 0;
-		clock_interval_to_deadline(rsecs, kSecondScale, &deadline);
-		autoWakeStart = AbsoluteTime_to_scalar(&deadline);
-	    }
-	}
+        if ((gIOPMSettingAutoWakeSecondsKey == type) && ((n = OSDynamicCast(OSNumber, obj))))
+        {
+            UInt32 rsecs = n->unsigned32BitValue();
+            if (!rsecs)
+            autoWakeStart = autoWakeEnd = 0;
+            else
+            {
+            AbsoluteTime deadline;
+            clock_interval_to_deadline(rsecs + kAutoWakePostWindow, kSecondScale, &deadline);
+            autoWakeEnd = AbsoluteTime_to_scalar(&deadline);
+            if (rsecs > kAutoWakePreWindow)
+                rsecs -= kAutoWakePreWindow;
+            else
+                rsecs = 0;
+            clock_interval_to_deadline(rsecs, kSecondScale, &deadline);
+            autoWakeStart = AbsoluteTime_to_scalar(&deadline);
+            }
+        }
+        if (gIOPMSettingDebugWakeRelativeKey == type)
+        {
+            if ((n = OSDynamicCast(OSNumber, obj)))
+                _debugWakeSeconds = n->unsigned32BitValue();
+            else
+                _debugWakeSeconds = 0;
+        }
         
         return_value = setPMSetting(type, obj);
         
@@ -1059,14 +1190,16 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj )
     }
 
 exit:
+    if(publish_simulated_battery_string) publish_simulated_battery_string->release();
     if(boot_complete_string) boot_complete_string->release();
     if(sys_shutdown_string) sys_shutdown_string->release();
     if(stall_halt_string) stall_halt_string->release();
-    if (battery_warning_disabled_string) battery_warning_disabled_string->release();
+    if(battery_warning_disabled_string) battery_warning_disabled_string->release();
     if(idle_seconds_string) idle_seconds_string->release();
     if(sleepdisabled_string) sleepdisabled_string->release();
     if(ondeck_sleepwake_uuid_string) ondeck_sleepwake_uuid_string->release();
     if(loginwindow_tracepoint_string) loginwindow_tracepoint_string->release();
+    if(pmTimelineLogging_string) pmTimelineLogging_string->release();
 #if	HIBERNATION
     if(hibernatemode_string) hibernatemode_string->release();
     if(hibernatefile_string) hibernatefile_string->release();
@@ -1076,114 +1209,21 @@ exit:
     return return_value;
 }
 
+// MARK: -
+// MARK: Aggressiveness
 
 //******************************************************************************
-// aggressivenessChanged
+// setAggressiveness
 //
-// We are behind the command gate to examine changes to aggressives.
+// Override IOService::setAggressiveness()
 //******************************************************************************
 
-void IOPMrootDomain::aggressivenessChanged( void )
+IOReturn IOPMrootDomain::setAggressiveness(
+    unsigned long   type,
+    unsigned long   value )
 {
-    unsigned long   minutesToSleep = 0;
-    unsigned long   minutesToDisplayDim = 0;
-
-    ASSERT_GATED();
-
-    // Fetch latest display and system sleep slider values.
-	getAggressiveness(kPMMinutesToSleep, &minutesToSleep);
-	getAggressiveness(kPMMinutesToDim,   &minutesToDisplayDim);
-    DLOG("aggressiveness changed system %u, display %u\n",
-        (uint32_t) minutesToSleep, (uint32_t) minutesToDisplayDim);
-
-    DLOG("idle time -> %ld secs (ena %d)\n",
-        idleSeconds, (minutesToSleep != 0));
-
-    if (0x7fffffff == minutesToSleep)
-        minutesToSleep = idleSeconds;
-
-    // How long to wait before sleeping the system once the displays turns
-    // off is indicated by 'extraSleepDelay'.
-
-    if ( minutesToSleep > minutesToDisplayDim ) {
-        extraSleepDelay = minutesToSleep - minutesToDisplayDim;
-    }
-    else {
-        extraSleepDelay = 0;
-    }
-
-    // system sleep timer was disabled, but not anymore.
-    if ( (sleepSlider == 0) && (minutesToSleep != 0) ) {
-        if (!wrangler)
-        {
-            sleepASAP = false;
-            changePowerStateToPriv(ON_STATE);
-            if (idleSeconds)
-            {
-                startIdleSleepTimer( idleSeconds );
-            }
-        }
-        else
-        {
-            // Start idle sleep timer if wrangler went to sleep
-            // while system sleep was disabled.
-
-            sleepASAP = false;
-            if (wranglerAsleep)
-            {
-                AbsoluteTime    now;
-                uint64_t        nanos;
-                uint32_t        minutesSinceDisplaySleep = 0;
-                uint32_t        sleepDelay;
-
-                clock_get_uptime(&now);
-                if (CMP_ABSOLUTETIME(&now, &wranglerSleepTime) > 0)
-                {
-                    SUB_ABSOLUTETIME(&now, &wranglerSleepTime);
-                    absolutetime_to_nanoseconds(now, &nanos);
-                    minutesSinceDisplaySleep = nanos / (60000000000ULL);
-                }
-
-                if (extraSleepDelay > minutesSinceDisplaySleep)
-                {
-                    sleepDelay = extraSleepDelay - minutesSinceDisplaySleep;
-                }
-                else
-                {
-                    // 1 min idle sleep.
-                    sleepDelay = 1;
-                }
-
-                startIdleSleepTimer(sleepDelay * 60);
-                DLOG("display slept %u min, set idle timer to %u min\n",
-                    minutesSinceDisplaySleep, sleepDelay);
-            }
-        }
-    }
-
-    sleepSlider = minutesToSleep;
-    if ( sleepSlider == 0 ) {
-        cancelIdleSleepTimer();
-        // idle sleep is now disabled
-        adjustPowerState();
-        // make sure we're powered
-        patriarch->wakeSystem();
-    }
-}
-
-
-//******************************************************************************
-// setAggressiveness
-//
-// Override IOService::setAggressiveness()
-//******************************************************************************
-
-IOReturn IOPMrootDomain::setAggressiveness(
-    unsigned long   type,
-    unsigned long   value )
-{
-    return setAggressiveness( type, value, 0 );
-}
+    return setAggressiveness( type, value, 0 );
+}
 
 /*
  * Private setAggressiveness() with an internal options argument.
@@ -1197,8 +1237,8 @@ IOReturn IOPMrootDomain::setAggressiveness(
     AggressivesRequest *    request;
     bool                    found = false;
 
-    DLOG("setAggressiveness 0x%x = %u, options 0x%x\n",
-        (uint32_t) type, (uint32_t) value, (uint32_t) options);
+    DLOG("setAggressiveness(%x) 0x%x = %u\n",
+        (uint32_t) options, (uint32_t) type, (uint32_t) value);
 
     request = IONew(AggressivesRequest, 1);
     if (!request)
@@ -1255,7 +1295,6 @@ IOReturn IOPMrootDomain::setAggressiveness(
     return kIOReturnSuccess;
 }
 
-
 //******************************************************************************
 // getAggressiveness
 //
@@ -1328,8 +1367,8 @@ IOReturn IOPMrootDomain::getAggressiveness (
 
     if (source)
     {
-        DLOG("getAggressiveness 0x%x = %u, source %d\n",
-            (uint32_t) type, value, source);
+        DLOG("getAggressiveness(%d) 0x%x = %u\n",
+            source, (uint32_t) type, value);
         *outLevel = (unsigned long) value;
         return kIOReturnSuccess;
     }
@@ -1341,7 +1380,6 @@ IOReturn IOPMrootDomain::getAggressiveness (
     }
 }
 
-
 //******************************************************************************
 // joinAggressiveness
 //
@@ -1356,7 +1394,7 @@ IOReturn IOPMrootDomain::joinAggressiveness(
     if (!service || (service == this))
         return kIOReturnBadArgument;
 
-    DLOG("joinAggressiveness %s (%p)\n", service->getName(), service);
+    DLOG("joinAggressiveness %s %p\n", service->getName(), service);
 
     request = IONew(AggressivesRequest, 1);
     if (!request)
@@ -1377,7 +1415,6 @@ IOReturn IOPMrootDomain::joinAggressiveness(
     return kIOReturnSuccess;
 }
 
-
 //******************************************************************************
 // handleAggressivesRequests
 //
@@ -1443,7 +1480,7 @@ void IOPMrootDomain::handleAggressivesRequests( void )
                                     broadcast = true;
                                     record->flags |= (kAggressivesRecordFlagMinValue |
                                                       kAggressivesRecordFlagModified);
-                                    DLOG("quick spindown accelerated, was %u min\n",
+                                    DLOG("disk spindown accelerated, was %u min\n",
                                         record->value);
                                 }
                             }
@@ -1545,11 +1582,12 @@ unlock_done:
     // Submit a power event to handle those changes on the PM work loop.
 
     if (pingSelf && pmPowerStateQueue) {
-        pmPowerStateQueue->submitPowerEvent( kPowerEventAggressivenessChanged );
+        pmPowerStateQueue->submitPowerEvent(
+            kPowerEventPolicyStimulus,
+            (void *) kStimulusAggressivenessChanged );
     }
 }
 
-
 //******************************************************************************
 // synchronizeAggressives
 //
@@ -1564,6 +1602,7 @@ void IOPMrootDomain::synchronizeAggressives(
     IOService *                 service;
     AggressivesRequest *        request;
     const AggressivesRecord *   record;
+    IOPMDriverCallEntry         callEntry;
     uint32_t                    value;
     int                         i;
 
@@ -1580,7 +1619,7 @@ void IOPMrootDomain::synchronizeAggressives(
 
         if (service)
         {
-            if (service->assertPMThreadCall())
+            if (service->assertPMDriverCall(&callEntry))
             {
                 for (i = 0, record = array; i < count; i++, record++)
                 {
@@ -1588,18 +1627,17 @@ void IOPMrootDomain::synchronizeAggressives(
                     if (record->flags & kAggressivesRecordFlagMinValue)
                         value = kAggressivesMinValue;
 
-                    DLOG("synchronizeAggressives 0x%x = %u to %s\n",
+                    _LOG("synchronizeAggressives 0x%x = %u to %s\n",
                         record->type, value, service->getName());
                     service->setAggressiveness(record->type, value);
                 }
-                service->deassertPMThreadCall();
+                service->deassertPMDriverCall(&callEntry);
             }
             service->release();     // retained by joinAggressiveness()
         }
     }
 }
 
-
 //******************************************************************************
 // broadcastAggressives
 //
@@ -1610,18 +1648,19 @@ void IOPMrootDomain::broadcastAggressives(
     const AggressivesRecord *   array,
     int                         count )
 {
-	IORegistryIterator *        iter;
-	IORegistryEntry *           entry;
-	IOPowerConnection *         connect;
+    IORegistryIterator *        iter;
+    IORegistryEntry *           entry;
+    IOPowerConnection *         connect;
     IOService *                 service;
     const AggressivesRecord *   record;
+    IOPMDriverCallEntry         callEntry;
     uint32_t                    value;
     int                         i;
 
-	iter = IORegistryIterator::iterateOver(
-		this, gIOPowerPlane, kIORegistryIterateRecursively);
+    iter = IORegistryIterator::iterateOver(
+            this, gIOPowerPlane, kIORegistryIterateRecursively);
     if (iter)
-	{
+    {
         do
         {
             iter->reset();
@@ -1633,7 +1672,7 @@ void IOPMrootDomain::broadcastAggressives(
 
                 if ((service = (IOService *) connect->copyChildEntry(gIOPowerPlane)))
                 {
-                    if (service->assertPMThreadCall())
+                    if (service->assertPMDriverCall(&callEntry))
                     {
                         for (i = 0, record = array; i < count; i++, record++)
                         {
@@ -1642,12 +1681,12 @@ void IOPMrootDomain::broadcastAggressives(
                                 value = record->value;
                                 if (record->flags & kAggressivesRecordFlagMinValue)
                                     value = kAggressivesMinValue;
-                                DLOG("broadcastAggressives %x = %u to %s\n",
+                                _LOG("broadcastAggressives %x = %u to %s\n",
                                     record->type, value, service->getName());
                                 service->setAggressiveness(record->type, value);
                             }
                         }
-                        service->deassertPMThreadCall();
+                        service->deassertPMDriverCall(&callEntry);
                     }
                     service->release();
                 }
@@ -1658,6 +1697,8 @@ void IOPMrootDomain::broadcastAggressives(
     }
 }
 
+// MARK: -
+// MARK: System Sleep
 
 //******************************************************************************
 // startIdleSleepTimer
@@ -1678,7 +1719,6 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds )
     }
 }
 
-
 //******************************************************************************
 // cancelIdleSleepTimer
 //
@@ -1687,7 +1727,7 @@ void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds )
 void IOPMrootDomain::cancelIdleSleepTimer( void )
 {
     ASSERT_GATED();
-    if (idleSleepTimerPending) 
+    if (idleSleepTimerPending)
     {
         DLOG("idle timer cancelled\n");
         thread_call_cancel(extraSleepTimer);
@@ -1695,7 +1735,6 @@ void IOPMrootDomain::cancelIdleSleepTimer( void )
     }
 }
 
-
 //******************************************************************************
 // idleSleepTimerExpired
 //
@@ -1707,13 +1746,6 @@ static void idleSleepTimerExpired(
     ((IOPMrootDomain *)us)->handleSleepTimerExpiration();
 }
 
-static void wakeupClamshellTimerExpired(
-    thread_call_param_t us, thread_call_param_t )
-{
-    ((IOPMrootDomain *)us)->stopIgnoringClamshellEventsDuringWakeup();
-}
-
-
 //******************************************************************************
 // handleSleepTimerExpiration
 //
@@ -1747,41 +1779,34 @@ void IOPMrootDomain::handleSleepTimerExpiration( void )
         return;
     }
 
-    // accelerate disk spin down if spin down timer is non-zero
     setQuickSpinDownTimeout();
-
-    sleepASAP = true;
-    adjustPowerState();
+    adjustPowerState(true);
 }
 
-
 //******************************************************************************
-// stopIgnoringClamshellEventsDuringWakeup
+// setQuickSpinDownTimeout
 //
 //******************************************************************************
 
-void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup( void )
+void IOPMrootDomain::setQuickSpinDownTimeout( void )
 {
-    if (!getPMworkloop()->inGate())
-    {
-        getPMworkloop()->runAction(
-            OSMemberFunctionCast(IOWorkLoop::Action, this,
-                &IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup),
-            this);
-        return;
-    }
-
     ASSERT_GATED();
+    setAggressiveness(
+        kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownEnable );
+}
 
-    // Allow clamshell-induced sleep now
-    ignoringClamshellOnWake = false;
+//******************************************************************************
+// restoreUserSpinDownTimeout
+//
+//******************************************************************************
 
-    // Re-send clamshell event, in case it causes a sleep
-    if (clamshellIsClosed)
-        handlePowerNotification( kLocalEvalClamshellCommand );
+void IOPMrootDomain::restoreUserSpinDownTimeout( void )
+{
+    ASSERT_GATED();
+    setAggressiveness(
+        kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownDisable );
 }
 
-
 //******************************************************************************
 // sleepSystem
 //
@@ -1806,21 +1831,17 @@ IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options )
           
     if (options && options->getObject("OSSwitch")) 
     {
-
         // Log specific sleep cause for OS Switch hibernation
-        return privateSleepSystem( kIOPMSleepReasonOSSwitchHibernation);
-
+        return privateSleepSystem( kIOPMSleepReasonOSSwitchHibernate);
     } else {
-
         return privateSleepSystem( kIOPMSleepReasonSoftware);
-
     }
 }
 
 /* private */
 IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
 {
-    static const char * IOPMSleepReasons[kIOPMSleepReasonMax] = {
+    static const char * IOPMSleepReasons[] = {
         "",
         kIOPMClamshellSleepKey,
         kIOPMPowerButtonSleepKey,
@@ -1829,711 +1850,869 @@ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason )
         kIOPMIdleSleepKey,
         kIOPMLowPowerSleepKey,
         kIOPMClamshellSleepKey,
-        kIOPMThermalEmergencySleepKey
+        kIOPMThermalEmergencySleepKey,
+        kIOPMMaintenanceSleepKey
     };
-    if ( userDisabledAllSleep )
-    {
-        LOG("Sleep prevented by user disable\n");
 
-        /* Prevent sleep of all kinds if directed to by user space */
-        return kIOReturnNotPermitted;
-    }
+    PMEventDetails *details;
 
-    if ( systemBooting || systemShutdown || !allowSleep )
+    if (!checkSystemCanSleep())
     {
-        LOG("Sleep prevented by SB %d, SS %d, AS %d\n",
-            systemBooting, systemShutdown, allowSleep);
+        // Record why the system couldn't sleep	
+        details = PMEventDetails::eventDetails(kIOPMEventTypeSleep, NULL,
+                                        sleepReason, kIOReturnNotPermitted);
+		
+		recordAndReleasePMEvent( details );
+        return kIOReturnNotPermitted;
+    }
 
-        // Unable to sleep because system is in the process of booting or
-        // shutting down, or sleep has otherwise been disallowed.
-        return kIOReturnError;
+    if (timeline)
+        timeline->setSleepCycleInProgressFlag(true);
+  
+    // Time to publish a UUID for the Sleep --> Wake cycle  
+    if(pmPowerStateQueue) {
+        pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)true);
     }
 
+  
+    // Log the beginning of system sleep.
+	details = PMEventDetails::eventDetails(kIOPMEventTypeSleep, NULL,
+                                            sleepReason, kIOReturnSuccess);
+	
+	recordAndReleasePMEvent( details );
+	
     // Record sleep cause in IORegistry
     lastSleepReason = sleepReason;
-    if (sleepReason && (sleepReason < kIOPMSleepReasonMax)) {
+    sleepReason -= (kIOPMSleepReasonClamshell - 1);
+    if (sleepReason && (sleepReason < sizeof(IOPMSleepReasons)/sizeof(IOPMSleepReasons[0]))) {
         setProperty(kRootDomainSleepReasonKey, IOPMSleepReasons[sleepReason]);
     }
 
-    patriarch->sleepSystem();
+    if (pmPowerStateQueue)
+        pmPowerStateQueue->submitPowerEvent(
+                            kPowerEventPolicyStimulus,
+                            (void *) kStimulusDemandSystemSleep );
+
     return kIOReturnSuccess;
 }
 
+IOReturn IOPMrootDomain::recordPMEventGated(PMEventDetails *record)
+{  
+  // If we don't have a place to log to, we can't actually
+  // log anything. Chances are, the person who is asking us to do    
+  // the PM logging has forgotten to set the right bootflags
+  if(!timeline)
+    return kIOReturnSuccess;
 
-//******************************************************************************
-// shutdownSystem
-//
-//******************************************************************************
+  if(gIOPMWorkLoop->inGate() == false) {
+    
+    IOReturn ret = gIOPMWorkLoop->runAction(
+                     OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::recordPMEventGated),
+                     (OSObject *)this,
+                     (void *)record);
+    
+    return ret;
+  }
+  else {
+    // Now that we're guaranteed to be running in gate ...
 
-IOReturn IOPMrootDomain::shutdownSystem( void )
-{
-    //patriarch->shutDownSystem();
-    return kIOReturnUnsupported;
+    // Check the validity of the argument we are given
+    if(!record) 
+      return kIOReturnBadArgument;
+    
+	  // Record a driver event, or a system event
+	  if(record->eventClassifier == kIOPMEventClassDriverEvent
+	     || record->eventClassifier == kIOPMEventClassSystemEvent)
+		  return this->recordPMEvent(record);
+	
+	  else
+		  return kIOReturnBadArgument;
+  }
 }
 
-
-//******************************************************************************
-// restartSystem
-//
-//******************************************************************************
-
-IOReturn IOPMrootDomain::restartSystem( void )
+IOReturn IOPMrootDomain::recordAndReleasePMEventGated(PMEventDetails *record)
 {
-    //patriarch->restartSystem();
-    return kIOReturnUnsupported;
-}
+    IOReturn ret = kIOReturnBadArgument;
 
+    if (record)
+    {
+        ret = recordPMEventGated(record);
+        record->release();
+    }
+    
+    return ret;
+}
 
 //******************************************************************************
 // powerChangeDone
 //
 // This overrides powerChangeDone in IOService.
-//
-// Menu sleep and idle sleep move us from the ON state to the SLEEP_STATE.
-// In this case:
-// If we finished going to the SLEEP_STATE, and the platform is capable of
-// true sleep, then sleep the kernel. Otherwise switch up to the DOZE_STATE
-// which will keep almost everything as off as it can get.
 //******************************************************************************
 
-void IOPMrootDomain::powerChangeDone( unsigned long previousState )
+void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState )
 {
+    PMEventDetails *details;
+
     ASSERT_GATED();
     DLOG("PowerChangeDone: %u->%u\n",
-        (uint32_t) previousState, (uint32_t) getPowerState());
-
-    switch ( getPowerState() ) {
-        case SLEEP_STATE:
-			if ( previousState != ON_STATE )
-				break;
+        (uint32_t) previousPowerState, (uint32_t) getPowerState());
+	
+    switch ( getPowerState() )
+    {
+        case SLEEP_STATE: {
+            if (previousPowerState != ON_STATE)
+                break;
+			
+            details = PMEventDetails::eventDetails(
+                            kIOPMEventTypeSleepDone,
+                            NULL, 
+                            NULL, 
+                            kIOReturnSuccess);
+			
+            recordAndReleasePMEvent( details );
 
-            if ( canSleep )
-            {
-                // re-enable this timer for next sleep
-                cancelIdleSleepTimer();
-                wranglerTickled = true;
+            // re-enable this timer for next sleep
+            cancelIdleSleepTimer();
 
-                clock_sec_t		secs;
-				clock_usec_t	microsecs;
-                clock_get_calendar_microtime(&secs, &microsecs);
-                logtime(secs);
-                gIOLastSleepTime.tv_sec  = secs;
-                gIOLastSleepTime.tv_usec = microsecs;
-                gIOLastWakeTime.tv_sec = 0;
-                gIOLastWakeTime.tv_usec = 0;
+            clock_sec_t		secs;
+            clock_usec_t	microsecs;
+            clock_get_calendar_microtime(&secs, &microsecs);
+            logtime(secs);
+            gIOLastSleepTime.tv_sec  = secs;
+            gIOLastSleepTime.tv_usec = microsecs;
+            gIOLastWakeTime.tv_sec = 0;
+            gIOLastWakeTime.tv_usec = 0;
 
 #if	HIBERNATION
-                LOG("System %sSleep\n", gIOHibernateState ? "Safe" : "");
-
-                tracePoint(kIOPMTracePointSystemHibernatePhase);
+            LOG("System %sSleep\n", gIOHibernateState ? "Safe" : "");
 
-                IOHibernateSystemHasSlept();
+            IOHibernateSystemHasSlept();
 
-                evaluateSystemSleepPolicyFinal();
+            evaluateSystemSleepPolicyFinal();
 #else
-                LOG("System Sleep\n");
+            LOG("System Sleep\n");
 #endif
 
-                tracePoint(kIOPMTracePointSystemSleepPlatformPhase);
+            getPlatform()->sleepKernel();
 
-                getPlatform()->sleepKernel();
+            // The CPU(s) are off at this point,
+            // Code will resume execution here upon wake.
 
-                // The CPU(s) are off at this point. When they're awakened by CPU interrupt,
-                // code will resume execution here.
+            clock_get_uptime(&systemWakeTime);
 
-                // Now we're waking...
-                tracePoint(kIOPMTracePointSystemWakeDriversPhase);
-                
 #if	HIBERNATION
-                IOHibernateSystemWake();
+            IOHibernateSystemWake();
 #endif
 
-                // sleep transition complete
-                gSleepOrShutdownPending = 0;
-
-                // trip the reset of the calendar clock
-                clock_wakeup_calendar();
-
-                // get us some power
-                patriarch->wakeSystem();
-
-                // Set indicator if UUID was set - allow it to be cleared.
-                if (getProperty(kIOPMSleepWakeUUIDKey))
-                    gSleepWakeUUIDIsSet = true;
+            // sleep transition complete
+            gSleepOrShutdownPending = 0;
 
-#if !ROOT_DOMAIN_RUN_STATES
-                tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter);
-#endif
+            // trip the reset of the calendar clock
+            clock_wakeup_calendar();
 
 #if	HIBERNATION
-                LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
+            LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
 #endif
 
-                // log system wake
-                getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0);
-                lowBatteryCondition = false;
+            // log system wake
+            getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0);
+            lowBatteryCondition = false;
+            lastSleepReason = 0;
+
+            // And start logging the wake event here
+            // TODO: Publish the wakeReason string as an integer
+            details = PMEventDetails::eventDetails(
+                            kIOPMEventTypeWake,
+                            NULL, 
+                            0, 
+                            kIOReturnSuccess);
+			
+            recordAndReleasePMEvent( details );
+			
 
 #ifndef __LP64__
-                // tell the tree we're waking
-                systemWake();
+            systemWake();
 #endif
 
-
 #if defined(__i386__) || defined(__x86_64__)
-                sleepTimerMaintenance = false;
-#if ROOT_DOMAIN_RUN_STATES
-                OSString * wakeType = OSDynamicCast(
-                    OSString, getProperty(kIOPMRootDomainWakeTypeKey));
-                if (wakeType && wakeType->isEqualTo(kIOPMrootDomainWakeTypeLowBattery))
+            wranglerTickled    = false;
+            graphicsSuppressed = false;
+            darkWakePostTickle = false;
+            logGraphicsClamp   = true;
+            logWranglerTickle  = true;
+            sleepTimerMaintenance = false;
+
+            OSString * wakeType = OSDynamicCast(
+                OSString, getProperty(kIOPMRootDomainWakeTypeKey));
+            OSString * wakeReason = OSDynamicCast(
+                OSString, getProperty(kIOPMRootDomainWakeReasonKey));
+
+            if (wakeType && wakeType->isEqualTo(kIOPMrootDomainWakeTypeLowBattery))
+            {
+                lowBatteryCondition = true;
+                darkWakeMaintenance = true;
+                darkWakeToSleepASAP = true;
+            }
+            else if ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0)
+            {
+                OSNumber * hibOptions = OSDynamicCast(
+                    OSNumber, getProperty(kIOHibernateOptionsKey));
+
+                if (hibernateAborted ||
+                    ((hibOptions &&
+                     !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake))) ||
+                    ((_debugWakeSeconds != 0) &&
+                      ((gDarkWakeFlags & kDarkWakeFlagAlarmIsDark) == 0)) ||
+                    (wakeType && (
+                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) ||
+                     wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm))))
                 {
-                    lowBatteryCondition = true;
-                    updateRunState(kRStateMaintenance);
-                    wranglerTickled = false;
+                    wranglerTickled = true;
                 }
-                else if (wakeType && !hibernateAborted && wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer))
+                else
+                if (wakeType &&
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
                 {
-                    sleepTimerMaintenance = true;
-                    updateRunState(kRStateMaintenance);
-                    wranglerTickled = false;
+                    darkWakeMaintenance = true;
+                    darkWakeToSleepASAP = true;
                 }
-                else if (wakeType && !hibernateAborted && wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
+                else
+                if (wakeType &&
+                    wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer))
                 {
-                    updateRunState(kRStateMaintenance);
-                    wranglerTickled = false;
+                    darkWakeMaintenance = true;
+                    darkWakeToSleepASAP = true;
+                    sleepTimerMaintenance = true;
                 }
                 else
-#endif  /* ROOT_DOMAIN_RUN_STATES */
                 {
-                    updateRunState(kRStateNormal);
-                    reportUserInput();
-                }
-#else   /* !__i386__ && !__x86_64__ */
-                // stay awake for at least 30 seconds
-                startIdleSleepTimer(30);
-                reportUserInput();
-#endif
-
-                changePowerStateToPriv(ON_STATE);
-            } else {
-                updateRunState(kRStateNormal);
+                    // Unidentified wake source, resume to full wake if debug
+                    // alarm is pending.
 
-                // allow us to step up a power state
-                patriarch->sleepToDoze();
-
-                // ignore children's request for higher power during doze.
-                changePowerStateWithOverrideTo(DOZE_STATE);
+                    if (_debugWakeSeconds && (!wakeReason || wakeReason->isEqualTo("")))
+                        wranglerTickled = true;
+                    else
+                        darkWakeToSleepASAP = true;
+                }
             }
-            break;
-
-        case DOZE_STATE:
-            if ( previousState != DOZE_STATE ) 
+            else
             {
-                LOG("System Doze\n");
-            }
-            // re-enable this timer for next sleep
-            cancelIdleSleepTimer();
-            gSleepOrShutdownPending = 0;
-
-            // Invalidate prior activity tickles to allow wake from doze.
-            if (wrangler) wrangler->changePowerStateTo(0);
-            break;
-
-#if ROOT_DOMAIN_RUN_STATES
-        case ON_STATE:
-            // SLEEP -> ON (Maintenance)
-            // Go back to sleep, unless cancelled by a HID event.
+                // Post a HID tickle immediately - except for maintenance wake.
 
-            if ((previousState == SLEEP_STATE) &&
-                (runStateIndex == kRStateMaintenance) &&
-                !wranglerTickled)
-            {
-                if (lowBatteryCondition)
+                if (hibernateAborted || !wakeType ||
+                    !wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance))
                 {
-                    lastSleepReason = kIOPMSleepReasonLowPower;
-                    setProperty(kRootDomainSleepReasonKey, kIOPMLowPowerSleepKey);
+                    wranglerTickled = true;
                 }
                 else
                 {
-                    lastSleepReason = kIOPMSleepReasonMaintenance;
-                    setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey);
+                    darkWakeMaintenance = true;
+                    darkWakeToSleepASAP = true;
                 }
-                changePowerStateWithOverrideTo(SLEEP_STATE);
             }
 
-            // ON -> ON triggered by R-state changes.
-
-            if ((previousState == ON_STATE) &&
-                (runStateIndex != nextRunStateIndex) &&
-                (nextRunStateIndex < kRStateCount))
+            if (wranglerTickled)
+                reportUserInput();
+            else if (!darkWakeMaintenance)
             {
-                LOG("R-state changed %u->%u\n",
-                    runStateIndex, nextRunStateIndex);
-                updateRunState(nextRunStateIndex);
+                // Early/late tickle for non-maintenance wake.
+                if (((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == 
+                     kDarkWakeFlagHIDTickleEarly) ||
+                    ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) == 
+                     kDarkWakeFlagHIDTickleLate))
+                {
+                    darkWakePostTickle = true;
+                }
+            }
+#else   /* !__i386__ && !__x86_64__ */
+            // stay awake for at least 30 seconds
+            wranglerTickled = true;
+            startIdleSleepTimer(30);
+#endif
+
+            changePowerStateToPriv(ON_STATE);
+        }   break;
+    
+        case ON_STATE: {
+            bool wasPrevented = childPreventSystemSleep;
+
+            details = PMEventDetails::eventDetails(
+                            kIOPMEventTypeWakeDone,
+                            NULL, 
+                            0, 
+                            kIOReturnSuccess);
+			
+            recordAndReleasePMEvent( details );
+
+            if (previousPowerState != ON_STATE)
+                _debugWakeSeconds = 0;
+
+            // Update childPreventSystemSleep flag using the capability computed
+            // by IOSevice::rebuildChildClampBits().
+
+            childPreventSystemSleep =
+                ((currentCapability() & kIOPMChildClamp2) != 0);
 
-                DLOG("kIOMessageSystemHasPoweredOn (%u)\n",
-                    gMessageClientType);
-                tellClients(kIOMessageSystemHasPoweredOn, clientMessageFilter);
+            if (wasPrevented && !childPreventSystemSleep)
+            {
+                evaluatePolicy( kStimulusDarkWakeEvaluate );
             }
-            
-            break;
-#endif  /* ROOT_DOMAIN_RUN_STATES */
+        }   break;
     }
 }
 
-
 //******************************************************************************
-// wakeFromDoze
+// requestPowerDomainState
 //
-// The Display Wrangler calls here when it switches to its highest state.
-// If the  system is currently dozing, allow it to wake by making sure the
-// parent is providing power.
+// Extend implementation in IOService. Running on PM work loop thread.
+//
+// Examine children desires and initiate idle-sleep if all children are idle,
+// prevent idle and system sleep flags are not set.
 //******************************************************************************
 
-void IOPMrootDomain::wakeFromDoze( void )
+IOReturn IOPMrootDomain::requestPowerDomainState (
+    IOPMPowerFlags      childDesire,
+    IOPowerConnection * childConnection,
+    unsigned long       specification )
 {
-    if ( getPowerState() == DOZE_STATE )
+    OSIterator          *iter;
+    OSObject            *next;
+    IOPowerConnection   *connection;
+    IOPMPowerFlags      mergedChildDesire = 0;
+    IOPMPowerFlags      editedChildDesire;
+    IOPMPowerFlags      thisDesire;
+    bool                sleepASAP = false;
+
+    ASSERT_GATED();
+
+    // Disregard disk I/O (anything besides the display wrangler) as a
+    // factor in preventing idle sleep - based on a runtime setting.
+
+    if ((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOAlways) &&
+        (kIOPMPreventIdleSleep & childDesire) &&
+        (childConnection != wranglerConnection))
     {
-        tracePoint(kIOPMTracePointSystemWakeDriversPhase);
-        changePowerStateToPriv(ON_STATE);
-        patriarch->wakeSystem();
+        childDesire &= ~kIOPMPreventIdleSleep;
     }
-}
 
+    // Force the child's input power requirement to 0 unless the prevent
+    // idle-sleep flag is set. Nil input power flags maps to our state 0.
+    // Our power clamp (deviceDesire) clamps the lowest power state at 2.
 
-//******************************************************************************
-// publishFeature
-//
-// Adds a new feature to the supported features dictionary
-//******************************************************************************
+    editedChildDesire = 0;
+    if (childDesire & kIOPMPreventIdleSleep)
+        editedChildDesire |= (kIOPMPowerOn | kIOPMPreventIdleSleep);
+    if (childDesire & kIOPMPreventSystemSleep)
+        editedChildDesire |= (kIOPMPowerOn | kIOPMPreventSystemSleep);
 
-void IOPMrootDomain::publishFeature( const char * feature )
-{
-    publishFeature(feature, kRD_AllPowerSources, NULL);
-}
+    iter = getChildIterator(gIOPowerPlane);
+    if ( iter )
+    {
+        while ( (next = iter->getNextObject()) )
+        {
+            if ( (connection = OSDynamicCast(IOPowerConnection, next)) )
+            {
+                // Ignore child that are in the process of joining.
+				if (connection->getReadyFlag() == false)
+					continue;
 
+                // OR in the child's input power requirements.
+                // Is this connection attached to the child that called
+                // requestPowerDomainState()?
 
-//******************************************************************************
-// publishFeature (with supported power source specified)
-//
-// Adds a new feature to the supported features dictionary
-//******************************************************************************
-
-void IOPMrootDomain::publishFeature(
-    const char *feature, 
-    uint32_t supportedWhere,
-    uint32_t *uniqueFeatureID)
-{
-    static uint16_t     next_feature_id = 500;
-
-    OSNumber            *new_feature_data = NULL;
-    OSNumber            *existing_feature = NULL;
-    OSArray             *existing_feature_arr = NULL;
-    OSObject            *osObj = NULL;
-    uint32_t            feature_value = 0;
-
-    supportedWhere &= kRD_AllPowerSources; // mask off any craziness!
-
-    if(!supportedWhere) {
-        // Feature isn't supported anywhere!
-        return;
-    }
-    
-    if(next_feature_id > 5000) {
-        // Far, far too many features!
-        return;
-    }
-
-    if(featuresDictLock) IOLockLock(featuresDictLock);
+                if (connection == childConnection)
+                {
+                    thisDesire = editedChildDesire;
+                }
+                else
+                {
+                    thisDesire = 0;
+                    if (connection->getPreventIdleSleepFlag())
+                        thisDesire |= (kIOPMPowerOn | kIOPMPreventIdleSleep);
+                    if (connection->getPreventSystemSleepFlag())
+                        thisDesire |= (kIOPMPowerOn | kIOPMPreventSystemSleep);
+                }
 
-    OSDictionary *features =
-        (OSDictionary *) getProperty(kRootDomainSupportedFeatures);
-    
-    // Create new features dict if necessary
-    if ( features && OSDynamicCast(OSDictionary, features)) {
-        features = OSDictionary::withDictionary(features);
-    } else {
-        features = OSDictionary::withCapacity(1);
-    }
-    
-    // Create OSNumber to track new feature
-    
-    next_feature_id += 1;
-    if( uniqueFeatureID ) {
-        // We don't really mind if the calling kext didn't give us a place
-        // to stash their unique id. Many kexts don't plan to unload, and thus
-        // have no need to remove themselves later.
-        *uniqueFeatureID = next_feature_id;
+                mergedChildDesire |= thisDesire;
+                if (thisDesire && (kIOLogPMRootDomain & gIOKitDebug))
+                {
+                    IOService * child =
+                        (IOService *) connection->getChildEntry(gIOPowerPlane);
+                    LOG("child %p, noIdle %d, noSleep %d - %s\n",
+                        child,
+                        ((thisDesire & kIOPMPreventIdleSleep) != 0),
+                        ((thisDesire & kIOPMPreventSystemSleep) != 0),
+                        child ? child->getName() : "?");
+                }
+            }
+        }
+        iter->release();
     }
 
-    feature_value = (uint32_t)next_feature_id;
-    feature_value <<= 16;
-    feature_value += supportedWhere;
-
-    new_feature_data = OSNumber::withNumber(
-                                (unsigned long long)feature_value, 32);
+    DLOG("mergedChildDesire 0x%lx, extraSleepDelay %ld\n",
+        mergedChildDesire, extraSleepDelay);
 
-    // Does features object already exist?
-    if( (osObj = features->getObject(feature)) )
+    if ( !mergedChildDesire && !systemBooting )
     {
-        if(( existing_feature = OSDynamicCast(OSNumber, osObj) ))
-        {
-            // We need to create an OSArray to hold the now 2 elements.
-            existing_feature_arr = OSArray::withObjects(
-                            (const OSObject **)&existing_feature, 1, 2);
-        } else if(( existing_feature_arr = OSDynamicCast(OSArray, osObj) ))
+        if (!wrangler)
         {
-            // Add object to existing array        
-            existing_feature_arr = OSArray::withArray(
-                            existing_feature_arr,
-                            existing_feature_arr->getCount() + 1);
+            changePowerStateToPriv(ON_STATE);
+            if (idleSeconds)
+            {
+                // stay awake for at least idleSeconds
+                startIdleSleepTimer(idleSeconds);
+            }
         }
-
-        if (existing_feature_arr)
+        else if (!extraSleepDelay && !idleSleepTimerPending && !systemDarkWake)
         {
-            existing_feature_arr->setObject(new_feature_data);
-            features->setObject(feature, existing_feature_arr);
-            existing_feature_arr->release();
-            existing_feature_arr = 0;
+            sleepASAP = true;
         }
-    } else {
-        // The easy case: no previously existing features listed. We simply
-        // set the OSNumber at key 'feature' and we're on our way.
-        features->setObject(feature, new_feature_data);        
     }
-    
-    new_feature_data->release();
 
-    setProperty(kRootDomainSupportedFeatures, features);
+    // Drop our power clamp to SLEEP_STATE when all children became idle,
+    // and system sleep and display sleep slider values are equal.
 
-    features->release();
+    adjustPowerState(sleepASAP);
 
-    if(featuresDictLock) IOLockUnlock(featuresDictLock);    
+    // If our power clamp has already dropped to SLEEP_STATE, and no child
+    // is keeping us at ON_STATE, then the following will trigger idle sleep.
 
-    // Notify EnergySaver and all those in user space so they might
-    // re-populate their feature specific UI    
-    if(pmPowerStateQueue) {
-        pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged );
-    }
+    return super::requestPowerDomainState(
+        editedChildDesire, childConnection, specification);
 }
 
-
 //******************************************************************************
-// removePublishedFeature
+// tellChangeDown
 //
-// Removes previously published feature
+// Override the superclass implementation to send a different message type.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID )
+bool IOPMrootDomain::tellChangeDown( unsigned long stateNum )
 {
-    IOReturn                ret = kIOReturnError;
-    uint32_t                feature_value = 0;
-    uint16_t                feature_id = 0;
-    bool                    madeAChange = false;
-    
-    OSSymbol                *dictKey = NULL;
-    OSCollectionIterator    *dictIterator = NULL;
-    OSArray                 *arrayMember  = NULL;
-    OSNumber                *numberMember = NULL;
-    OSObject                *osObj        = NULL;
-    OSNumber                *osNum        = NULL;
-    OSArray                 *arrayMemberCopy;
+    DLOG("tellChangeDown %u->%u\n",
+        (uint32_t) getPowerState(), (uint32_t) stateNum);
 
-    if(featuresDictLock) IOLockLock(featuresDictLock);
-
-    OSDictionary *features =
-        (OSDictionary *) getProperty(kRootDomainSupportedFeatures);
-    
-    if ( features && OSDynamicCast(OSDictionary, features) )
+    if (SLEEP_STATE == stateNum)
     {
-        // Any modifications to the dictionary are made to the copy to prevent
-        // races & crashes with userland clients. Dictionary updated
-        // automically later.
-        features = OSDictionary::withDictionary(features);
-    } else {
-        features = NULL;
-        ret = kIOReturnNotFound;
-        goto exit;
-    }
-    
-    // We iterate 'features' dictionary looking for an entry tagged
-    // with 'removeFeatureID'. If found, we remove it from our tracking
-    // structures and notify the OS via a general interest message.
-    
-    dictIterator = OSCollectionIterator::withCollection(features);
-    if(!dictIterator) {
-        goto exit;
+        if (!ignoreTellChangeDown)
+            tracePoint( kIOPMTracePointSleepApplications );
+        else
+            tracePoint( kIOPMTracePointSleepPriorityClients );   
     }
-    
-    while( (dictKey = OSDynamicCast(OSSymbol, dictIterator->getNextObject())) )
+
+    if ((SLEEP_STATE == stateNum) && !ignoreTellChangeDown)
     {
-        osObj = features->getObject(dictKey);
-        
-        // Each Feature is either tracked by an OSNumber
-        if( osObj && (numberMember = OSDynamicCast(OSNumber, osObj)) )
-        {
-            feature_value = numberMember->unsigned32BitValue();
-            feature_id = (uint16_t)(feature_value >> 16);
+        userActivityAtSleep = userActivityCount;
+        hibernateAborted = false;
+        DLOG("tellChangeDown::userActivityAtSleep %d\n", userActivityAtSleep);
 
-            if( feature_id == (uint16_t)removeFeatureID )
-            {
-                // Remove this node
-                features->removeObject(dictKey);
-                madeAChange = true;
-                break;
-            }
-        
-        // Or tracked by an OSArray of OSNumbers
-        } else if( osObj && (arrayMember = OSDynamicCast(OSArray, osObj)) )
-        {
-            unsigned int arrayCount = arrayMember->getCount();
-            
-            for(unsigned int i=0; i<arrayCount; i++)
-            {
-                osNum = OSDynamicCast(OSNumber, arrayMember->getObject(i));
-                if(!osNum) {
-                    continue;
-                }
-                
-                feature_value = osNum->unsigned32BitValue();
-                feature_id = (uint16_t)(feature_value >> 16);
+        // Direct callout into OSKext so it can disable kext unloads
+        // during sleep/wake to prevent deadlocks.
+        OSKextSystemSleepOrWake( kIOMessageSystemWillSleep );
 
-                if( feature_id == (uint16_t)removeFeatureID )
-                {
-                    // Remove this node
-                    if( 1 == arrayCount ) {
-                        // If the array only contains one element, remove
-                        // the whole thing.
-                        features->removeObject(dictKey);
-                    } else {
-                        // Otherwise remove the element from a copy of the array.
-                        arrayMemberCopy = OSArray::withArray(arrayMember);
-                        if (arrayMemberCopy)
-                        {
-                            arrayMemberCopy->removeObject(i);
-                            features->setObject(dictKey, arrayMemberCopy);
-                            arrayMemberCopy->release();
-                        }
-                    }
+        IOService::updateConsoleUsers(NULL, kIOMessageSystemWillSleep);
 
-                    madeAChange = true;
-                    break;
-                }
-            }
-        }    
-    }
-    
-    dictIterator->release();
-    
-    if( madeAChange )
-    {
-        ret = kIOReturnSuccess;    
+        // Notify platform that sleep has begun
+        getPlatform()->callPlatformFunction(
+                        sleepMessagePEFunction, false,
+                        (void *)(uintptr_t) kIOMessageSystemWillSleep,
+                        NULL, NULL, NULL);
 
-        setProperty(kRootDomainSupportedFeatures, features);
-    
-        // Notify EnergySaver and all those in user space so they might
-        // re-populate their feature specific UI    
-        if(pmPowerStateQueue) {
-            pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged );
-        }
-    } else {
-        ret = kIOReturnNotFound;
+        // Two change downs are sent by IOServicePM. Ignore the 2nd.
+        // But tellClientsWithResponse() must be called for both.
+        ignoreTellChangeDown = true;
     }
-    
-exit:
-    if(features)    features->release();
-    if(featuresDictLock) IOLockUnlock(featuresDictLock);    
-    return ret;
-}
 
+    return super::tellClientsWithResponse( kIOMessageSystemWillSleep );
+}
 
 //******************************************************************************
-// announcePowerSourceChange
+// askChangeDown
 //
-// Notifies "interested parties" that the battery state has changed
+// Override the superclass implementation to send a different message type.
+// This must be idle sleep since we don't ask during any other power change.
 //******************************************************************************
 
-void IOPMrootDomain::announcePowerSourceChange( void )
+bool IOPMrootDomain::askChangeDown( unsigned long stateNum )
 {
-#ifdef __ppc__
-    IORegistryEntry *_batteryRegEntry = (IORegistryEntry *) getProperty("BatteryEntry");
+    DLOG("askChangeDown %u->%u\n",
+        (uint32_t) getPowerState(), (uint32_t) stateNum);
 
-    // (if possible) re-publish power source state under IOPMrootDomain;
-    // only do so if the battery controller publishes an IOResource 
-    // defining battery location. Called from ApplePMU battery driver.
+    // Don't log for dark wake entry
+    if (kSystemTransitionSleep == _systemTransitionType)
+        tracePoint( kIOPMTracePointSleepApplications );
 
-    if(_batteryRegEntry)
-    {
-        OSArray             *batt_info;
-        batt_info = (OSArray *) _batteryRegEntry->getProperty(kIOBatteryInfoKey);
-        if(batt_info)
-            setProperty(kIOBatteryInfoKey, batt_info);
-    }
-#endif
+    return super::tellClientsWithResponse( kIOMessageCanSystemSleep );
 }
 
-
 //******************************************************************************
-// setPMSetting (private)
+// askChangeDownDone
 //
-// Internal helper to relay PM settings changes from user space to individual
-// drivers. Should be called only by IOPMrootDomain::setProperties.
+// Called by PM after all apps have responded to kIOMessageCanSystemSleep.
+// pmconfigd may create a deny sleep assertion before ack'ing.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::setPMSetting(
-    const OSSymbol *type,
-    OSObject *obj)
+void IOPMrootDomain::askChangeDownDone(
+        IOPMPowerChangeFlags * inOutChangeFlags, bool * cancel )
 {
-    OSArray             *arr = NULL;
-    PMSettingObject     *p_obj = NULL;
-    int                 count;
-    int                 i;
+    DLOG("askChangeDownDone(0x%x, %u) type %x, cap %x->%x\n",
+        *inOutChangeFlags, *cancel,
+        _systemTransitionType,
+        _currentCapability, _pendingCapability);
 
-    if(NULL == type) return kIOReturnBadArgument;
+    if ((false == *cancel) && (kSystemTransitionSleep == _systemTransitionType))
+    {
+        // Dark->Sleep transition.
+        // Check if there are any deny sleep assertions.
+        // Full->Dark transition is never cancelled.
 
-    IORecursiveLockLock(settingsCtrlLock);
-    
-    fPMSettingsDict->setObject(type, obj);
+        if (!checkSystemCanSleep(true))
+        {
+            // Cancel dark wake to sleep transition.
+            // Must re-scan assertions upon entering dark wake.
 
-    arr = (OSArray *)settingsCallbacks->getObject(type);
-    if(NULL == arr) goto exit;
-    count = arr->getCount();
-    for(i=0; i<count; i++) {
-        p_obj = (PMSettingObject *)OSDynamicCast(PMSettingObject, arr->getObject(i));
-        if(p_obj) p_obj->setPMSetting(type, obj);
+            *cancel = true;
+            DLOG("cancel dark->sleep\n");
+        }
     }
-
-exit:
-    IORecursiveLockUnlock(settingsCtrlLock);
-    return kIOReturnSuccess;
 }
 
-
 //******************************************************************************
-// copyPMSetting (public)
+// tellNoChangeDown
 //
-// Allows kexts to safely read setting values, without being subscribed to
-// notifications.
-//******************************************************************************
-
-OSObject * IOPMrootDomain::copyPMSetting(
-    OSSymbol *whichSetting)
-{
-    OSObject *obj = NULL;
+// Notify registered applications and kernel clients that we are not dropping
+// power.
+//
+// We override the superclass implementation so we can send a different message
+// type to the client or application being notified.
+//
+// This must be a vetoed idle sleep, since no other power change can be vetoed.
+//******************************************************************************
 
-    if(!whichSetting) return NULL;
+void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum )
+{
+    DLOG("tellNoChangeDown %u->%u\n",
+        (uint32_t) getPowerState(), (uint32_t) stateNum);
 
-    IORecursiveLockLock(settingsCtrlLock);
-    obj = fPMSettingsDict->getObject(whichSetting);
-    if(obj) {
-        obj->retain();
+    if (idleSeconds && !wrangler)
+    {
+        // stay awake for at least idleSeconds
+        startIdleSleepTimer(idleSeconds);
     }
-    IORecursiveLockUnlock(settingsCtrlLock);
-    
-    return obj;
+    return tellClients( kIOMessageSystemWillNotSleep );
 }
 
-
 //******************************************************************************
-// registerPMSettingController (public)
+// tellChangeUp
 //
-// direct wrapper to registerPMSettingController with uint32_t power source arg
+// Notify registered applications and kernel clients that we are raising power.
+//
+// We override the superclass implementation so we can send a different message
+// type to the client or application being notified.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::registerPMSettingController(
-    const OSSymbol *                settings[],
-    IOPMSettingControllerCallback   func,
-    OSObject                        *target,
-    uintptr_t                       refcon,
-    OSObject                        **handle)
+void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
 {
-    return registerPMSettingController( 
-            settings,
-            (kIOPMSupportedOnAC | kIOPMSupportedOnBatt | kIOPMSupportedOnUPS),
-            func, target, refcon, handle);
-}
+    OSData *publishPMStats = NULL;
+
+    DLOG("tellChangeUp %u->%u\n",
+        (uint32_t) getPowerState(), (uint32_t) stateNum);
+
+    ignoreTellChangeDown = false;
+
+    if ( stateNum == ON_STATE )
+    {
+        // Direct callout into OSKext so it can disable kext unloads
+        // during sleep/wake to prevent deadlocks.
+        OSKextSystemSleepOrWake( kIOMessageSystemHasPoweredOn );
+
+        // Notify platform that sleep was cancelled or resumed.
+        getPlatform()->callPlatformFunction(
+                        sleepMessagePEFunction, false,
+                        (void *)(uintptr_t) kIOMessageSystemHasPoweredOn,
+                        NULL, NULL, NULL);
+
+        if (getPowerState() == ON_STATE)
+        {
+            // this is a quick wake from aborted sleep
+            if (idleSeconds && !wrangler)
+            {
+                // stay awake for at least idleSeconds
+                startIdleSleepTimer(idleSeconds);
+            }
+            tellClients( kIOMessageSystemWillPowerOn );
+        }
+
+        tracePoint( kIOPMTracePointWakeApplications );
+        publishPMStats = OSData::withBytes(&pmStats, sizeof(pmStats));
+        setProperty(kIOPMSleepStatisticsKey, publishPMStats);
+        publishPMStats->release();
+        bzero(&pmStats, sizeof(pmStats));
+
+        if (pmStatsAppResponses) 
+        {
+            setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses);
+            pmStatsAppResponses->release();
+            pmStatsAppResponses = OSArray::withCapacity(5);
+        }
 
+        tellClients( kIOMessageSystemHasPoweredOn );
+    }
+}
 
 //******************************************************************************
-// registerPMSettingController (public)
+// sysPowerDownHandler
 //
-// Kexts may register for notifications when a particular setting is changed.
-// A list of settings is available in IOPM.h.
-// Arguments:
-//  * settings - An OSArray containing OSSymbols. Caller should populate this
-//          array with a list of settings caller wants notifications from.
-//  * func - A C function callback of the type IOPMSettingControllerCallback
-//  * target - caller may provide an OSObject *, which PM will pass as an 
-//          target to calls to "func"
-//  * refcon - caller may provide an void *, which PM will pass as an 
-//          argument to calls to "func"
-//  * handle - This is a return argument. We will populate this pointer upon
-//          call success. Hold onto this and pass this argument to
-//          IOPMrootDomain::deRegisterPMSettingCallback when unloading your kext
-// Returns:
-//      kIOReturnSuccess on success
+// Perform a vfs sync before system sleep.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::registerPMSettingController(
-    const OSSymbol *                settings[],
-    uint32_t                        supportedPowerSources,
-    IOPMSettingControllerCallback   func,
-    OSObject                        *target,
-    uintptr_t                       refcon,
-    OSObject                        **handle)
+IOReturn IOPMrootDomain::sysPowerDownHandler(
+    void * target, void * refCon,
+    UInt32 messageType, IOService * service,
+    void * messageArgs, vm_size_t argSize )
 {
-    PMSettingObject     *pmso = NULL;
-    OSArray             *list = NULL;
-    IOReturn            ret = kIOReturnSuccess;
-    int                 i;
+    IOReturn    ret;
 
-    if( NULL == settings ||
-        NULL == func ||
-        NULL == handle)
+    DLOG("sysPowerDownHandler message %s\n", getIOMessageString(messageType));
+
+    if (!gRootDomain)
+        return kIOReturnUnsupported;
+
+    if (messageType == kIOMessageSystemCapabilityChange)
     {
-        return kIOReturnBadArgument;
-    }
+        IOPMSystemCapabilityChangeParameters * params =
+            (IOPMSystemCapabilityChangeParameters *) messageArgs;
+
+        // Interested applications have been notified of an impending power
+        // change and have acked (when applicable).
+        // This is our chance to save whatever state we can before powering
+        // down.
+        // We call sync_internal defined in xnu/bsd/vfs/vfs_syscalls.c,
+        // via callout
+
+        DLOG("sysPowerDownHandler cap %x -> %x (flags %x)\n",
+            params->fromCapabilities, params->toCapabilities,
+            params->changeFlags);
+
+        if ((params->changeFlags & kIOPMSystemCapabilityWillChange) &&
+            (params->fromCapabilities & kIOPMSystemCapabilityCPU) &&
+            (params->toCapabilities & kIOPMSystemCapabilityCPU) == 0)
+        {
+            // We will ack within 20 seconds
+            params->maxWaitForReply = 20 * 1000 * 1000;
+#if	HIBERNATION
+            gRootDomain->evaluateSystemSleepPolicyEarly();
 
-    pmso = PMSettingObject::pmSettingObject(
-                (IOPMrootDomain *)this, func, target, 
-                refcon, supportedPowerSources, settings);
+            // add in time we could spend freeing pages
+            if (gRootDomain->hibernateMode && !gRootDomain->hibernateDisabled)
+            {
+                params->maxWaitForReply = kCapabilityClientMaxWait;
+            }
+            DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->maxWaitForReply / 1000 / 1000));
+#endif
+
+            if ( !OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) )
+            {
+                // Purposely delay the ack and hope that shutdown occurs quickly.
+                // Another option is not to schedule the thread and wait for
+                // ack timeout...
+                AbsoluteTime deadline;
+                clock_interval_to_deadline( 30, kSecondScale, &deadline );
+                thread_call_enter1_delayed(
+                    gRootDomain->diskSyncCalloutEntry, 
+                    (thread_call_param_t) params->notifyRef,
+                    deadline );
+            }
+            else
+                thread_call_enter1(
+                    gRootDomain->diskSyncCalloutEntry,
+                    (thread_call_param_t) params->notifyRef);
+        }
+#if	HIBERNATION
+        else
+        if ((params->changeFlags & kIOPMSystemCapabilityDidChange) &&
+            (params->toCapabilities & kIOPMSystemCapabilityCPU) &&
+            (params->fromCapabilities & kIOPMSystemCapabilityCPU) == 0)
+        {
+            // We will ack within 110 seconds
+            params->maxWaitForReply = 110 * 1000 * 1000;
 
-    if(!pmso) {
-        ret = kIOReturnInternalError;
-        goto bail_no_unlock;
+            thread_call_enter1(
+                gRootDomain->diskSyncCalloutEntry,
+                (thread_call_param_t) params->notifyRef);
+        }
+#endif
+        ret = kIOReturnSuccess;
     }
 
-    IORecursiveLockLock(settingsCtrlLock);
-    for(i=0; settings[i]; i++) 
+    return ret;
+}
+
+//******************************************************************************
+// handleQueueSleepWakeUUID
+//
+// Called from IOPMrootDomain when we're initiating a sleep,
+// or indirectly from PM configd when PM decides to clear the UUID.
+// PM clears the UUID several minutes after successful wake from sleep,
+// so that we might associate App spindumps with the immediately previous
+// sleep/wake.
+//
+// @param   obj has a retain on it. We're responsible for releasing that retain.
+//******************************************************************************
+
+void IOPMrootDomain::handleQueueSleepWakeUUID(OSObject *obj)
+{        
+    OSString    *str = NULL;
+
+    if (kOSBooleanFalse == obj) 
     {
-        list = (OSArray *)settingsCallbacks->getObject(settings[i]);
-        if(!list) {
-            // New array of callbacks for this setting
-            list = OSArray::withCapacity(1);
-            settingsCallbacks->setObject(settings[i], list);
-            list->release();
+        handlePublishSleepWakeUUID(NULL);
+    }
+    else if ((str = OSDynamicCast(OSString, obj))) 
+    {
+        // This branch caches the UUID for an upcoming sleep/wake        
+        if (queuedSleepWakeUUIDString) {
+            queuedSleepWakeUUIDString->release();
+            queuedSleepWakeUUIDString = NULL;
         }
+        queuedSleepWakeUUIDString = str;
+        queuedSleepWakeUUIDString->retain();
 
-        // Add caller to the callback list
-        list->setObject(pmso);
+        DLOG("SleepWake UUID queued: %s\n", queuedSleepWakeUUIDString->getCStringNoCopy());
     }
 
-    IORecursiveLockUnlock(settingsCtrlLock);
-    
-    ret = kIOReturnSuccess;
+    if (obj) {
+        obj->release();
+    }
+    return;
+
+}
+//******************************************************************************
+// handlePublishSleepWakeUUID
+//
+// Called from IOPMrootDomain when we're initiating a sleep,
+// or indirectly from PM configd when PM decides to clear the UUID.
+// PM clears the UUID several minutes after successful wake from sleep,
+// so that we might associate App spindumps with the immediately previous
+// sleep/wake.
+//******************************************************************************
+
+void IOPMrootDomain::handlePublishSleepWakeUUID( bool shouldPublish )
+{
+   ASSERT_GATED();
+
+   /* 
+    * Clear the current UUID
+    */
+   if (gSleepWakeUUIDIsSet)
+   {
+        DLOG("SleepWake UUID cleared\n");
+
+        OSString *UUIDstring = NULL;
+        
+        if (timeline && 
+            (UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey)))) 
+        {
+            PMEventDetails *details = PMEventDetails::eventDetails(kIOPMEventTypeUUIDClear, 
+                            UUIDstring->getCStringNoCopy(), NULL, 0);
+            if (details) {
+                timeline->recordSystemPowerEvent( details );
+                details->release();
+            }
+            timeline->setNumEventsLoggedThisPeriod(0); 
+        }
+
+        gSleepWakeUUIDIsSet = false;
+
+        removeProperty(kIOPMSleepWakeUUIDKey);
+        messageClients(kIOPMMessageSleepWakeUUIDChange, kIOPMMessageSleepWakeUUIDCleared);
+    }
+
+    /*
+     * Optionally, publish a new UUID
+     */
+    if (queuedSleepWakeUUIDString && shouldPublish) {
+
+        OSString  *publishThisUUID = NULL;
+
+        publishThisUUID = queuedSleepWakeUUIDString;
+        publishThisUUID->retain();
+
+        if (timeline) {
+            PMEventDetails  *details;
+            details = PMEventDetails::eventDetails(kIOPMEventTypeUUIDSet,
+                              publishThisUUID->getCStringNoCopy(), NULL, 0);
+            if (details) {
+                timeline->recordSystemPowerEvent( details );
+                details->release();
+            }
+        }
+        
+        if (publishThisUUID)
+        {
+            setProperty(kIOPMSleepWakeUUIDKey, publishThisUUID);
+            publishThisUUID->release();
+        }
+        
+        gSleepWakeUUIDIsSet = true;
+        messageClients(kIOPMMessageSleepWakeUUIDChange, kIOPMMessageSleepWakeUUIDSet);
+
+        queuedSleepWakeUUIDString->release();
+        queuedSleepWakeUUIDString = NULL;
+    }
+}
+
+//******************************************************************************
+// changePowerStateTo & changePowerStateToPriv
+//
+// Override of these methods for logging purposes.
+//******************************************************************************
+
+IOReturn IOPMrootDomain::changePowerStateTo( unsigned long ordinal )
+{
+    return kIOReturnUnsupported;    // ignored
+}
+
+IOReturn IOPMrootDomain::changePowerStateToPriv( unsigned long ordinal )
+{
+    DLOG("changePowerStateToPriv(%lu)\n", ordinal);
+
+    if ((ordinal != ON_STATE) && (ordinal != SLEEP_STATE))
+        return kIOReturnUnsupported;
 
-    // Track this instance by its OSData ptr from now on  
-    *handle = pmso;
+    return super::changePowerStateToPriv(ordinal);
+}
+
+//******************************************************************************
+// activity detect
+//
+//******************************************************************************
+
+bool IOPMrootDomain::activitySinceSleep(void)
+{
+    return (userActivityCount != userActivityAtSleep);
+}
+
+bool IOPMrootDomain::abortHibernation(void)
+{
+    bool ret = activitySinceSleep();
 
-bail_no_unlock:
-    if(kIOReturnSuccess != ret) 
+    if (ret && !hibernateAborted)
     {
-        // Error return case
-        if(pmso) pmso->release();
-        if(handle) *handle = NULL;
+        DLOG("activitySinceSleep ABORT [%d, %d]\n", userActivityCount, userActivityAtSleep);
+        hibernateAborted = true;
     }
-    return ret;
+    return (ret);
 }
 
+extern "C" int
+hibernate_should_abort(void)
+{
+    if (gRootDomain)
+        return (gRootDomain->abortHibernation());
+    else
+        return (0);
+}
 
 //******************************************************************************
 // sleepOnClamshellClosed
@@ -2544,13 +2723,13 @@ bail_no_unlock:
 
 bool IOPMrootDomain::shouldSleepOnClamshellClosed( void )
 {
-    DLOG("clamshell state %d, EX %d, IG %d, IW %d, DT %d, AC %d\n",
-        clamshellIsClosed, clamshellExists, ignoringClamshell,
-        ignoringClamshellOnWake, desktopMode, acAdaptorConnected);
+    if (!clamshellExists)
+        return false;
+
+    DLOG("clamshell closed %d, disabled %d, desktopMode %d, ac %d\n",
+        clamshellClosed, clamshellDisabled, desktopMode, acAdaptorConnected);
 
-    return ( !ignoringClamshell 
-          && !ignoringClamshellOnWake 
-          && !(desktopMode && acAdaptorConnected) );
+    return ( !clamshellDisabled && !(desktopMode && acAdaptorConnected) );
 }
 
 void IOPMrootDomain::sendClientClamshellNotification( void )
@@ -2560,7 +2739,7 @@ void IOPMrootDomain::sendClientClamshellNotification( void )
         return;
 
     setProperty(kAppleClamshellStateKey, 
-        clamshellIsClosed ? kOSBooleanTrue : kOSBooleanFalse);
+        clamshellClosed ? kOSBooleanTrue : kOSBooleanFalse);
 
     setProperty(kAppleClamshellCausesSleepKey, 
         shouldSleepOnClamshellClosed() ? kOSBooleanTrue : kOSBooleanFalse);
@@ -2569,28 +2748,566 @@ void IOPMrootDomain::sendClientClamshellNotification( void )
      *      ( kClamshellStateBit | kClamshellSleepBit )
      */
     messageClients(kIOPMMessageClamshellStateChange,
-        (void *) ( (clamshellIsClosed ? kClamshellStateBit : 0)
+        (void *) ( (clamshellClosed ? kClamshellStateBit : 0)
              | ( shouldSleepOnClamshellClosed() ? kClamshellSleepBit : 0)) );
 }
 
-
 //******************************************************************************
-// informCPUStateChange
-//
-// Call into PM CPU code so that CPU power savings may dynamically adjust for
-// running on battery, with the lid closed, etc.
+// getSleepSupported
 //
-// informCPUStateChange is a no-op on non x86 systems
-// only x86 has explicit support in the IntelCPUPowerManagement kext
+// Deprecated
 //******************************************************************************
 
-void IOPMrootDomain::informCPUStateChange(
-    uint32_t type, 
-    uint32_t value )
+IOOptionBits IOPMrootDomain::getSleepSupported( void )
 {
-#if defined(__i386__) || defined(__x86_64__)
+    return( platformSleepSupport );
+}
 
-    pmioctlVariableInfo_t varInfoStruct;                            
+//******************************************************************************
+// setSleepSupported
+//
+// Deprecated
+//******************************************************************************
+
+void IOPMrootDomain::setSleepSupported( IOOptionBits flags )
+{
+    DLOG("setSleepSupported(%x)\n", (uint32_t) flags);
+    OSBitOrAtomic(flags, &platformSleepSupport);
+}
+
+//******************************************************************************
+// wakeFromDoze
+//
+// Deprecated.
+//******************************************************************************
+
+void IOPMrootDomain::wakeFromDoze( void )
+{
+    // Preserve symbol for familes (IOUSBFamily and IOGraphics)
+}
+
+// MARK: -
+// MARK: Features
+
+//******************************************************************************
+// publishFeature
+//
+// Adds a new feature to the supported features dictionary
+//******************************************************************************
+
+void IOPMrootDomain::publishFeature( const char * feature )
+{
+    publishFeature(feature, kRD_AllPowerSources, NULL);
+}
+
+//******************************************************************************
+// publishFeature (with supported power source specified)
+//
+// Adds a new feature to the supported features dictionary
+//******************************************************************************
+
+void IOPMrootDomain::publishFeature(
+    const char *feature, 
+    uint32_t supportedWhere,
+    uint32_t *uniqueFeatureID)
+{
+    static uint16_t     next_feature_id = 500;
+
+    OSNumber            *new_feature_data = NULL;
+    OSNumber            *existing_feature = NULL;
+    OSArray             *existing_feature_arr = NULL;
+    OSObject            *osObj = NULL;
+    uint32_t            feature_value = 0;
+
+    supportedWhere &= kRD_AllPowerSources; // mask off any craziness!
+
+    if(!supportedWhere) {
+        // Feature isn't supported anywhere!
+        return;
+    }
+    
+    if(next_feature_id > 5000) {
+        // Far, far too many features!
+        return;
+    }
+
+    if(featuresDictLock) IOLockLock(featuresDictLock);
+
+    OSDictionary *features =
+        (OSDictionary *) getProperty(kRootDomainSupportedFeatures);
+    
+    // Create new features dict if necessary
+    if ( features && OSDynamicCast(OSDictionary, features)) {
+        features = OSDictionary::withDictionary(features);
+    } else {
+        features = OSDictionary::withCapacity(1);
+    }
+    
+    // Create OSNumber to track new feature
+    
+    next_feature_id += 1;
+    if( uniqueFeatureID ) {
+        // We don't really mind if the calling kext didn't give us a place
+        // to stash their unique id. Many kexts don't plan to unload, and thus
+        // have no need to remove themselves later.
+        *uniqueFeatureID = next_feature_id;
+    }
+
+    feature_value = (uint32_t)next_feature_id;
+    feature_value <<= 16;
+    feature_value += supportedWhere;
+
+    new_feature_data = OSNumber::withNumber(
+                                (unsigned long long)feature_value, 32);
+
+    // Does features object already exist?
+    if( (osObj = features->getObject(feature)) )
+    {
+        if(( existing_feature = OSDynamicCast(OSNumber, osObj) ))
+        {
+            // We need to create an OSArray to hold the now 2 elements.
+            existing_feature_arr = OSArray::withObjects(
+                            (const OSObject **)&existing_feature, 1, 2);
+        } else if(( existing_feature_arr = OSDynamicCast(OSArray, osObj) ))
+        {
+            // Add object to existing array        
+            existing_feature_arr = OSArray::withArray(
+                            existing_feature_arr,
+                            existing_feature_arr->getCount() + 1);
+        }
+
+        if (existing_feature_arr)
+        {
+            existing_feature_arr->setObject(new_feature_data);
+            features->setObject(feature, existing_feature_arr);
+            existing_feature_arr->release();
+            existing_feature_arr = 0;
+        }
+    } else {
+        // The easy case: no previously existing features listed. We simply
+        // set the OSNumber at key 'feature' and we're on our way.
+        features->setObject(feature, new_feature_data);        
+    }
+    
+    new_feature_data->release();
+
+    setProperty(kRootDomainSupportedFeatures, features);
+
+    features->release();
+
+    if(featuresDictLock) IOLockUnlock(featuresDictLock);    
+
+    // Notify EnergySaver and all those in user space so they might
+    // re-populate their feature specific UI    
+    if(pmPowerStateQueue) {
+        pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged );
+    }
+}
+
+//******************************************************************************
+// removePublishedFeature
+//
+// Removes previously published feature
+//******************************************************************************
+
+IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID )
+{
+    IOReturn                ret = kIOReturnError;
+    uint32_t                feature_value = 0;
+    uint16_t                feature_id = 0;
+    bool                    madeAChange = false;
+    
+    OSSymbol                *dictKey = NULL;
+    OSCollectionIterator    *dictIterator = NULL;
+    OSArray                 *arrayMember  = NULL;
+    OSNumber                *numberMember = NULL;
+    OSObject                *osObj        = NULL;
+    OSNumber                *osNum        = NULL;
+    OSArray                 *arrayMemberCopy;
+
+    if(featuresDictLock) IOLockLock(featuresDictLock);
+
+    OSDictionary *features =
+        (OSDictionary *) getProperty(kRootDomainSupportedFeatures);
+    
+    if ( features && OSDynamicCast(OSDictionary, features) )
+    {
+        // Any modifications to the dictionary are made to the copy to prevent
+        // races & crashes with userland clients. Dictionary updated
+        // automically later.
+        features = OSDictionary::withDictionary(features);
+    } else {
+        features = NULL;
+        ret = kIOReturnNotFound;
+        goto exit;
+    }
+    
+    // We iterate 'features' dictionary looking for an entry tagged
+    // with 'removeFeatureID'. If found, we remove it from our tracking
+    // structures and notify the OS via a general interest message.
+    
+    dictIterator = OSCollectionIterator::withCollection(features);
+    if(!dictIterator) {
+        goto exit;
+    }
+    
+    while( (dictKey = OSDynamicCast(OSSymbol, dictIterator->getNextObject())) )
+    {
+        osObj = features->getObject(dictKey);
+        
+        // Each Feature is either tracked by an OSNumber
+        if( osObj && (numberMember = OSDynamicCast(OSNumber, osObj)) )
+        {
+            feature_value = numberMember->unsigned32BitValue();
+            feature_id = (uint16_t)(feature_value >> 16);
+
+            if( feature_id == (uint16_t)removeFeatureID )
+            {
+                // Remove this node
+                features->removeObject(dictKey);
+                madeAChange = true;
+                break;
+            }
+        
+        // Or tracked by an OSArray of OSNumbers
+        } else if( osObj && (arrayMember = OSDynamicCast(OSArray, osObj)) )
+        {
+            unsigned int arrayCount = arrayMember->getCount();
+            
+            for(unsigned int i=0; i<arrayCount; i++)
+            {
+                osNum = OSDynamicCast(OSNumber, arrayMember->getObject(i));
+                if(!osNum) {
+                    continue;
+                }
+                
+                feature_value = osNum->unsigned32BitValue();
+                feature_id = (uint16_t)(feature_value >> 16);
+
+                if( feature_id == (uint16_t)removeFeatureID )
+                {
+                    // Remove this node
+                    if( 1 == arrayCount ) {
+                        // If the array only contains one element, remove
+                        // the whole thing.
+                        features->removeObject(dictKey);
+                    } else {
+                        // Otherwise remove the element from a copy of the array.
+                        arrayMemberCopy = OSArray::withArray(arrayMember);
+                        if (arrayMemberCopy)
+                        {
+                            arrayMemberCopy->removeObject(i);
+                            features->setObject(dictKey, arrayMemberCopy);
+                            arrayMemberCopy->release();
+                        }
+                    }
+
+                    madeAChange = true;
+                    break;
+                }
+            }
+        }    
+    }
+    
+    dictIterator->release();
+    
+    if( madeAChange )
+    {
+        ret = kIOReturnSuccess;    
+
+        setProperty(kRootDomainSupportedFeatures, features);
+    
+        // Notify EnergySaver and all those in user space so they might
+        // re-populate their feature specific UI    
+        if(pmPowerStateQueue) {
+            pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged );
+        }
+    } else {
+        ret = kIOReturnNotFound;
+    }
+    
+exit:
+    if(features)    features->release();
+    if(featuresDictLock) IOLockUnlock(featuresDictLock);    
+    return ret;
+}
+
+//******************************************************************************
+// setPMSetting (private)
+//
+// Internal helper to relay PM settings changes from user space to individual
+// drivers. Should be called only by IOPMrootDomain::setProperties.
+//******************************************************************************
+
+IOReturn IOPMrootDomain::setPMSetting(
+    const OSSymbol  *type,
+    OSObject        *object )
+{
+    PMSettingCallEntry  *entries = 0;
+    OSArray             *chosen  = 0;
+    const OSArray       *array;
+    PMSettingObject     *pmso;
+    thread_t            thisThread;
+    int                 i, j, count, capacity;
+
+    if (NULL == type)
+        return kIOReturnBadArgument;
+
+    PMSETTING_LOCK();
+
+    // Update settings dict so changes are visible from copyPMSetting().    
+    fPMSettingsDict->setObject(type, object);
+
+    // Prep all PMSetting objects with the given 'type' for callout.
+    array = (const OSArray *) settingsCallbacks->getObject(type);
+    if (!array || ((capacity = array->getCount()) == 0))
+        goto unlock_exit;
+
+    // Array to retain PMSetting objects targeted for callout.
+    chosen = OSArray::withCapacity(capacity);
+    if (!chosen)
+        goto unlock_exit;   // error
+
+    entries = IONew(PMSettingCallEntry, capacity);
+    if (!entries)
+        goto unlock_exit;   // error
+    memset(entries, 0, sizeof(PMSettingCallEntry) * capacity);
+
+    thisThread = current_thread();
+
+    for (i = 0, j = 0; i<capacity; i++)
+    {
+        pmso = (PMSettingObject *) array->getObject(i);
+        if (pmso->disabled)
+            continue;
+        entries[j].thread = thisThread;        
+        queue_enter(&pmso->calloutQueue, &entries[j], PMSettingCallEntry *, link);
+        chosen->setObject(pmso);
+        j++;
+    }
+    count = j;
+    if (!count)
+        goto unlock_exit; 
+
+    PMSETTING_UNLOCK();
+
+    // Call each pmso in the chosen array.
+    for (i=0; i<count; i++)
+    {
+        pmso = (PMSettingObject *) chosen->getObject(i);
+        pmso->dispatchPMSetting(type, object);
+    }
+
+    PMSETTING_LOCK();
+    for (i=0; i<count; i++)
+    {
+        pmso = (PMSettingObject *) chosen->getObject(i);
+        queue_remove(&pmso->calloutQueue, &entries[i], PMSettingCallEntry *, link);
+        if (pmso->waitThread)
+        {
+            PMSETTING_WAKEUP(pmso);
+        }
+    }
+unlock_exit:
+    PMSETTING_UNLOCK();
+
+    if (chosen)  chosen->release();
+    if (entries) IODelete(entries, PMSettingCallEntry, capacity);
+
+    return kIOReturnSuccess;
+}
+
+//******************************************************************************
+// copyPMSetting (public)
+//
+// Allows kexts to safely read setting values, without being subscribed to
+// notifications.
+//******************************************************************************
+
+OSObject * IOPMrootDomain::copyPMSetting(
+    OSSymbol *whichSetting)
+{
+    OSObject *obj = NULL;
+
+    if(!whichSetting) return NULL;
+
+    PMSETTING_LOCK();
+    obj = fPMSettingsDict->getObject(whichSetting);
+    if(obj) {
+        obj->retain();
+    }
+    PMSETTING_UNLOCK();
+    
+    return obj;
+}
+
+//******************************************************************************
+// registerPMSettingController (public)
+//
+// direct wrapper to registerPMSettingController with uint32_t power source arg
+//******************************************************************************
+
+IOReturn IOPMrootDomain::registerPMSettingController(
+    const OSSymbol *                settings[],
+    IOPMSettingControllerCallback   func,
+    OSObject                        *target,
+    uintptr_t                       refcon,
+    OSObject                        **handle)
+{
+    return registerPMSettingController( 
+            settings,
+            (kIOPMSupportedOnAC | kIOPMSupportedOnBatt | kIOPMSupportedOnUPS),
+            func, target, refcon, handle);
+}
+
+//******************************************************************************
+// registerPMSettingController (public)
+//
+// Kexts may register for notifications when a particular setting is changed.
+// A list of settings is available in IOPM.h.
+// Arguments:
+//  * settings - An OSArray containing OSSymbols. Caller should populate this
+//          array with a list of settings caller wants notifications from.
+//  * func - A C function callback of the type IOPMSettingControllerCallback
+//  * target - caller may provide an OSObject *, which PM will pass as an 
+//          target to calls to "func"
+//  * refcon - caller may provide an void *, which PM will pass as an 
+//          argument to calls to "func"
+//  * handle - This is a return argument. We will populate this pointer upon
+//          call success. Hold onto this and pass this argument to
+//          IOPMrootDomain::deRegisterPMSettingCallback when unloading your kext
+// Returns:
+//      kIOReturnSuccess on success
+//******************************************************************************
+
+IOReturn IOPMrootDomain::registerPMSettingController(
+    const OSSymbol *                settings[],
+    uint32_t                        supportedPowerSources,
+    IOPMSettingControllerCallback   func,
+    OSObject                        *target,
+    uintptr_t                       refcon,
+    OSObject                        **handle)
+{
+    PMSettingObject *pmso = NULL;
+    OSObject        *pmsh = NULL;
+    OSArray         *list = NULL;
+    int             i;
+
+    if (NULL == settings ||
+        NULL == func     ||
+        NULL == handle)
+    {
+        return kIOReturnBadArgument;
+    }
+
+    pmso = PMSettingObject::pmSettingObject(
+                (IOPMrootDomain *) this, func, target, 
+                refcon, supportedPowerSources, settings, &pmsh);
+
+    if (!pmso) {
+        *handle = NULL;
+        return kIOReturnInternalError;
+    }
+
+    PMSETTING_LOCK();
+    for (i=0; settings[i]; i++)
+    {
+        list = (OSArray *) settingsCallbacks->getObject(settings[i]);
+        if (!list) {
+            // New array of callbacks for this setting
+            list = OSArray::withCapacity(1);
+            settingsCallbacks->setObject(settings[i], list);
+            list->release();
+        }
+
+        // Add caller to the callback list
+        list->setObject(pmso);
+    }
+    PMSETTING_UNLOCK();
+
+    // Return handle to the caller, the setting object is private.
+    *handle = pmsh;
+
+    return kIOReturnSuccess;
+}
+
+//******************************************************************************
+// deregisterPMSettingObject (private)
+//
+// Only called from PMSettingObject.
+//******************************************************************************
+
+void IOPMrootDomain::deregisterPMSettingObject( PMSettingObject * pmso )
+{
+    thread_t                thisThread = current_thread();
+    PMSettingCallEntry      *callEntry;
+    OSCollectionIterator    *iter;
+    OSSymbol                *sym;
+    OSArray                 *array;
+    int                     index;
+    bool                    wait;
+
+    PMSETTING_LOCK();
+
+    pmso->disabled = true;
+
+    // Wait for all callout threads to finish.
+    do {
+        wait = false;
+        queue_iterate(&pmso->calloutQueue, callEntry, PMSettingCallEntry *, link)
+        {
+            if (callEntry->thread != thisThread)
+            {
+                wait = true;
+                break;
+            }
+        }
+        if (wait)
+        {
+            assert(0 == pmso->waitThread);
+            pmso->waitThread = thisThread;
+            PMSETTING_WAIT(pmso);
+            pmso->waitThread = 0;
+        }
+    } while (wait);
+
+    // Search each PM settings array in the kernel.
+    iter = OSCollectionIterator::withCollection(settingsCallbacks);
+    if (iter) 
+    {
+        while ((sym = OSDynamicCast(OSSymbol, iter->getNextObject())))
+        {
+            array = (OSArray *) settingsCallbacks->getObject(sym);
+            index = array->getNextIndexOfObject(pmso, 0);
+            if (-1 != index) {
+                array->removeObject(index);
+            }
+        }
+        iter->release();
+    }
+
+    PMSETTING_UNLOCK();
+
+    pmso->release();
+}
+
+//******************************************************************************
+// informCPUStateChange
+//
+// Call into PM CPU code so that CPU power savings may dynamically adjust for
+// running on battery, with the lid closed, etc.
+//
+// informCPUStateChange is a no-op on non x86 systems
+// only x86 has explicit support in the IntelCPUPowerManagement kext
+//******************************************************************************
+
+void IOPMrootDomain::informCPUStateChange(
+    uint32_t type, 
+    uint32_t value )
+{
+#if defined(__i386__) || defined(__x86_64__)
+
+    pmioctlVariableInfo_t varInfoStruct;                            
     int                 pmCPUret = 0;
     const char          *varNameStr = NULL;
     int32_t             *varIndex   = NULL;
@@ -2639,6 +3356,8 @@ void IOPMrootDomain::informCPUStateChange(
 #endif /* __i386__ || __x86_64__ */
 }
 
+// MARK: -
+// MARK: Deep Sleep Policy
 
 #if HIBERNATION
 
@@ -2673,7 +3392,7 @@ enum {
     kIOPMSleepFactorUSBExternalDevice       = 0x00000080,
     kIOPMSleepFactorBluetoothHIDDevice      = 0x00000100,
     kIOPMSleepFactorExternalMediaMounted    = 0x00000200,
-    kIOPMSleepFactorDriverAssertBit5        = 0x00000400,
+    kIOPMSleepFactorDriverAssertBit5        = 0x00000400,   /* Reserved for ThunderBolt */
     kIOPMSleepFactorDriverAssertBit6        = 0x00000800,
     kIOPMSleepFactorDriverAssertBit7        = 0x00001000
 };
@@ -2730,18 +3449,15 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p )
     if (getPMAssertionLevel(kIOPMDriverAssertionExternalMediaMountedBit) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorExternalMediaMounted;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=
+    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) !=    /* AssertionBit5 = Thunderbolt */
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorDriverAssertBit5;
-    if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit6) !=
-        kIOPMDriverAssertionLevelOff)
-        currentFactors |= kIOPMSleepFactorDriverAssertBit6;
     if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit7) !=
         kIOPMDriverAssertionLevelOff)
         currentFactors |= kIOPMSleepFactorDriverAssertBit7;
     if (0 == deepSleepDelay)
         currentFactors |= kIOPMSleepFactorDeepSleepNoDelay;
-    if (!clamshellIsClosed)
+    if (!clamshellClosed)
         currentFactors |= kIOPMSleepFactorLidOpen;
     if (acAdaptorConnected)
         currentFactors |= kIOPMSleepFactorACPower;
@@ -2897,1768 +3613,2139 @@ bool IOPMrootDomain::getSleepOption( const char * key, uint32_t * option )
         ok = true;
     }
 
-    if (obj)
-        obj->release();
-    if (optionsProp)
-        optionsProp->release();
+    if (obj)
+        obj->release();
+    if (optionsProp)
+        optionsProp->release();
+
+    return true;
+}
+#endif /* HIBERNATION */
+
+// MARK: -
+// MARK: Shutdown and Restart
+
+//******************************************************************************
+// handlePlatformHaltRestart
+//
+//******************************************************************************
+
+struct HaltRestartApplierContext {
+	IOPMrootDomain *	RootDomain;
+	unsigned long		PowerState;
+	IOPMPowerFlags		PowerFlags;
+	UInt32				MessageType;
+	UInt32				Counter;
+};
+
+static void
+platformHaltRestartApplier( OSObject * object, void * context )
+{
+	IOPowerStateChangeNotification	notify;
+	HaltRestartApplierContext *		ctx;
+	AbsoluteTime					startTime;
+	UInt32							deltaTime;
+
+	ctx = (HaltRestartApplierContext *) context;
+	
+	memset(&notify, 0, sizeof(notify));
+    notify.powerRef    = (void *)ctx->Counter;
+    notify.returnValue = 0;
+    notify.stateNumber = ctx->PowerState;
+    notify.stateFlags  = ctx->PowerFlags;
+
+	clock_get_uptime(&startTime);
+    ctx->RootDomain->messageClient( ctx->MessageType, object, (void *)&notify );
+	deltaTime = computeDeltaTimeMS(&startTime);
+
+	if ((deltaTime > kPMHaltTimeoutMS) ||
+        (gIOKitDebug & kIOLogPMRootDomain))
+	{
+		_IOServiceInterestNotifier * notifier;
+		notifier = OSDynamicCast(_IOServiceInterestNotifier, object);
+
+		// IOService children of IOPMrootDomain are not instrumented.
+		// Only IORootParent currently falls under that group.
+
+		if (notifier)
+		{
+			LOG("%s handler %p took %u ms\n",
+				(ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" :
+					 (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart",
+				notifier->handler, (uint32_t) deltaTime );
+		}
+	}
+
+	ctx->Counter++;
+}
+
+void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
+{
+	HaltRestartApplierContext	ctx;
+	AbsoluteTime				startTime;
+	UInt32						deltaTime;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.RootDomain = this;
+
+	clock_get_uptime(&startTime);
+	switch (pe_type)
+	{
+		case kPEHaltCPU:
+        case kPEUPSDelayHaltCPU:
+			ctx.PowerState  = OFF_STATE;
+			ctx.MessageType = kIOMessageSystemWillPowerOff;
+			break;
+
+		case kPERestartCPU:
+			ctx.PowerState  = RESTART_STATE;
+			ctx.MessageType = kIOMessageSystemWillRestart;
+			break;
+
+		case kPEPagingOff:
+			ctx.PowerState  = ON_STATE;
+			ctx.MessageType = kIOMessageSystemPagingOff;
+			break;
+
+		default:
+			return;
+	}
+
+	// Notify legacy clients
+	applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx);
+
+    // For normal shutdown, turn off File Server Mode.
+    if (kPEHaltCPU == pe_type)
+    {
+        const OSSymbol * setting = OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey);
+        OSNumber * num = OSNumber::withNumber((unsigned long long) 0, 32);
+        if (setting && num)
+        {
+            setPMSetting(setting, num);
+            setting->release();
+            num->release();
+        }
+    }
+
+	if (kPEPagingOff != pe_type)
+	{
+		// Notify in power tree order
+		notifySystemShutdown(this, ctx.MessageType);
+	}
 
-    return true;
+	deltaTime = computeDeltaTimeMS(&startTime);
+	LOG("%s all drivers took %u ms\n",
+		(ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" :
+			 (ctx.MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart",
+		(uint32_t) deltaTime );
 }
-#endif /* HIBERNATION */
-
 
 //******************************************************************************
-// dispatchPowerEvent
+// shutdownSystem
 //
-// IOPMPowerStateQueue callback function. Running on PM work loop thread.
 //******************************************************************************
 
-void IOPMrootDomain::dispatchPowerEvent(
-    uint32_t event, void * arg0, uint64_t arg1 )
+IOReturn IOPMrootDomain::shutdownSystem( void )
 {
-    DLOG("power event %u args %p 0x%llx\n", event, arg0, arg1);
-    ASSERT_GATED();
-
-    switch (event)
-    {
-        case kPowerEventFeatureChanged:
-            messageClients(kIOPMMessageFeatureChange, this);
-            break;
-
-        case kPowerEventReceivedPowerNotification:
-            handlePowerNotification( (UInt32)(uintptr_t) arg0 );
-            break;
-        
-        case kPowerEventSystemBootCompleted:
-            if (systemBooting)
-            {
-                systemBooting = false;
-                adjustPowerState();
-
-                // If lid is closed, re-send lid closed notification
-                // now that booting is complete.
-                if( clamshellIsClosed )
-                {
-                    handlePowerNotification(kLocalEvalClamshellCommand);
-                }
-            }
-            break;
-        
-        case kPowerEventSystemShutdown:
-            if (kOSBooleanTrue == (OSBoolean *) arg0)
-            {
-                /* We set systemShutdown = true during shutdown
-                   to prevent sleep at unexpected times while loginwindow is trying
-                   to shutdown apps and while the OS is trying to transition to
-                   complete power of.
-                   
-                   Set to true during shutdown, as soon as loginwindow shows
-                   the "shutdown countdown dialog", through individual app
-                   termination, and through black screen kernel shutdown.
-                 */
-                LOG("systemShutdown true\n");
-                systemShutdown = true;
-            } else {
-                /*
-                 A shutdown was initiated, but then the shutdown
-                 was cancelled, clearing systemShutdown to false here.
-                */
-                LOG("systemShutdown false\n");
-                systemShutdown = false;            
-            }
-            break;
-
-        case kPowerEventUserDisabledSleep:
-            userDisabledAllSleep = (kOSBooleanTrue == (OSBoolean *) arg0);
-            break;
-
-#if ROOT_DOMAIN_RUN_STATES
-        case kPowerEventConfigdRegisteredInterest:
-            if (gConfigdNotifier)
-            {
-                gConfigdNotifier->release();
-                gConfigdNotifier = 0;
-            }
-            if (arg0)
-            {
-                gConfigdNotifier = (IONotifier *) arg0;
-            }
-            break;
-#endif
-
-        case kPowerEventAggressivenessChanged:
-            aggressivenessChanged();
-            break;
-
-        case kPowerEventAssertionCreate:
-            if (pmAssertions) {
-                pmAssertions->handleCreateAssertion((OSData *)arg0);
-            }
-            break;
+    return kIOReturnUnsupported;
+}
 
-        case kPowerEventAssertionRelease:
-            if (pmAssertions) {
-                pmAssertions->handleReleaseAssertion(arg1);
-            }
-            break;
+//******************************************************************************
+// restartSystem
+//
+//******************************************************************************
 
-        case kPowerEventAssertionSetLevel:
-            if (pmAssertions) {
-                pmAssertions->handleSetAssertionLevel(arg1, (IOPMDriverAssertionLevel)(uintptr_t)arg0);
-            }
-            break;
-    }
+IOReturn IOPMrootDomain::restartSystem( void )
+{
+    return kIOReturnUnsupported;
 }
 
+// MARK: -
+// MARK: System Capability
 
 //******************************************************************************
-// systemPowerEventOccurred
-//
-// The power controller is notifying us of a hardware-related power management
-// event that we must handle. 
+// tagPowerPlaneService
 //
-// systemPowerEventOccurred covers the same functionality that
-// receivePowerNotification does; it simply provides a richer API for conveying
-// more information.
+// Running on PM work loop thread.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::systemPowerEventOccurred(
-    const OSSymbol *event,
-    uint32_t intValue)
+void IOPMrootDomain::tagPowerPlaneService(
+        IOService *     service,
+        IOPMActions *   actions )
 {
-    IOReturn        attempt = kIOReturnSuccess;
-    OSNumber        *newNumber = NULL;
+    uint32_t    flags = 0;
+    bool        isDisplayWrangler;
 
-    if (!event) 
-        return kIOReturnBadArgument;
-        
-    newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue));
-    if (!newNumber)
-        return kIOReturnInternalError;
+    memset(actions, 0, sizeof(*actions));
+    actions->target = this;
 
-    attempt = systemPowerEventOccurred(event, (OSObject *)newNumber);
+    if (service == this)
+    {
+        actions->actionPowerChangeStart =
+            OSMemberFunctionCast(
+                IOPMActionPowerChangeStart, this,
+                &IOPMrootDomain::handleOurPowerChangeStart);
 
-    newNumber->release();
+        actions->actionPowerChangeDone =
+            OSMemberFunctionCast(
+                IOPMActionPowerChangeDone, this,
+                &IOPMrootDomain::handleOurPowerChangeDone);
 
-    return attempt;
-}
+        actions->actionPowerChangeOverride =
+            OSMemberFunctionCast(
+                IOPMActionPowerChangeOverride, this,
+                &IOPMrootDomain::overrideOurPowerChange);
+        return;
+    }
 
-IOReturn IOPMrootDomain::systemPowerEventOccurred(
-    const OSSymbol *event,
-    OSObject *value)
-{
-    OSDictionary *thermalsDict = NULL;
-    bool shouldUpdate = true;
-    
-    if (!event || !value) 
-        return kIOReturnBadArgument;
+#if !NO_KERNEL_HID
+    isDisplayWrangler = (0 != service->metaCast("IODisplayWrangler"));
+    if (isDisplayWrangler)
+    {
+        wrangler = service;
+        wranglerConnection = (IOService *) service->getParentEntry(gIOPowerPlane);
+    }
+#else
+    isDisplayWrangler = false;
+#endif
 
-    // LOCK
-    // We reuse featuresDict Lock because it already exists and guards
-    // the very infrequently used publish/remove feature mechanism; so there's zero rsk
-    // of stepping on that lock.
-    if (featuresDictLock) IOLockLock(featuresDictLock);
+#if defined(__i386__) || defined(__x86_64__)
+    if (isDisplayWrangler)
+        flags |= kPMActionsFlagIsDisplayWrangler;
+    if (service->getProperty("IOPMStrictTreeOrder"))
+        flags |= kPMActionsFlagIsGraphicsDevice;
+    if (service->getProperty("IOPMUnattendedWakePowerState"))
+        flags |= kPMActionsFlagIsAudioDevice;
+#endif
 
-    thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey);
-                   
-    if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) {
-        thermalsDict = OSDictionary::withDictionary(thermalsDict);                        
-    } else {
-        thermalsDict = OSDictionary::withCapacity(1);
-    }
+    // Find the power connection object that is a child of the PCI host
+    // bridge, and has a graphics/audio device attached below. Mark the
+    // power branch for delayed child notifications.
 
-    if (!thermalsDict) {
-        shouldUpdate = false;
-        goto exit;
+    if (flags)
+    {
+        IORegistryEntry * child  = service;
+        IORegistryEntry * parent = child->getParentEntry(gIOPowerPlane);
+
+        while (child != this)
+        {
+            if ((parent == pciHostBridgeDriver) ||
+                (parent == this))
+            {
+                if (OSDynamicCast(IOPowerConnection, child))
+                {
+                    IOPowerConnection * conn = (IOPowerConnection *) child;
+                    conn->delayChildNotification = true;
+                }
+                break;
+            }
+            child = parent;
+            parent = child->getParentEntry(gIOPowerPlane);
+        }
     }
 
-    thermalsDict->setObject (event, value);
+    if (flags)
+    {
+        DLOG("%s tag flags %x\n", service->getName(), flags);
+        actions->parameter |= flags;
+        actions->actionPowerChangeOverride =
+            OSMemberFunctionCast(
+                IOPMActionPowerChangeOverride, this,
+                &IOPMrootDomain::overridePowerChangeForUIService);
 
-    setProperty (kIOPMRootDomainPowerStatusKey, thermalsDict);
+        if (flags & kPMActionsFlagIsDisplayWrangler)
+        {
+            actions->actionActivityTickle =
+                OSMemberFunctionCast(
+                    IOPMActionActivityTickle, this,
+                    &IOPMrootDomain::handleActivityTickleForDisplayWrangler);
+        }
+        return;
+    }
 
-    thermalsDict->release();
+    // Locate the first PCI host bridge for PMTrace.
+    if (!pciHostBridgeDevice && service->metaCast("IOPCIBridge"))
+    {
+        IOService * provider = service->getProvider();
+        if (OSDynamicCast(IOPlatformDevice, provider) &&
+            provider->inPlane(gIODTPlane))
+        {
+            pciHostBridgeDevice = provider;
+            pciHostBridgeDriver = service;
+            DLOG("PMTrace found PCI host bridge %s->%s\n",
+                provider->getName(), service->getName());
+        }
+    }
 
-exit:
-    // UNLOCK
-    if (featuresDictLock) IOLockUnlock(featuresDictLock);
+    // Tag top-level PCI devices. The order of PMinit() call does not
+	// change across boots and is used as the PCI bit number.
+    if (pciHostBridgeDevice && service->metaCast("IOPCIDevice"))
+    {
+        // Would prefer to check built-in property, but tagPowerPlaneService()
+        // is called before pciDevice->registerService().
+        IORegistryEntry * parent = service->getParentEntry(gIODTPlane);
+        if ((parent == pciHostBridgeDevice) && service->getProperty("acpi-device"))
+        {
+            int bit = pmTracer->recordTopLevelPCIDevice( service );
+            if (bit >= 0)
+            {
+				// Save the assigned bit for fast lookup.
+                actions->parameter |= (bit & kPMActionsPCIBitNumberMask);
 
-    if (shouldUpdate)
-        messageClients (kIOPMMessageSystemPowerEventOccurred, (void *)NULL);
+                actions->actionPowerChangeStart =
+                    OSMemberFunctionCast(
+                        IOPMActionPowerChangeStart, this,
+                        &IOPMrootDomain::handlePowerChangeStartForPCIDevice);
 
-    return kIOReturnSuccess;
+                actions->actionPowerChangeDone =
+                    OSMemberFunctionCast(
+                        IOPMActionPowerChangeDone, this,
+                        &IOPMrootDomain::handlePowerChangeDoneForPCIDevice);
+            }
+        }
+    }
 }
 
-
 //******************************************************************************
-// receivePowerNotification
-//
-// The power controller is notifying us of a hardware-related power management
-// event that we must handle. This may be a result of an 'environment' interrupt
-// from the power mgt micro.
+// PM actions for root domain
 //******************************************************************************
 
-IOReturn IOPMrootDomain::receivePowerNotification( UInt32 msg )
+void IOPMrootDomain::overrideOurPowerChange(
+    IOService *     service,
+    IOPMActions *   actions,
+    unsigned long * inOutPowerState,
+    uint32_t *      inOutChangeFlags )
 {
-    pmPowerStateQueue->submitPowerEvent(
-        kPowerEventReceivedPowerNotification, (void *) msg );
-    return kIOReturnSuccess;
+    uint32_t    powerState  = (uint32_t) *inOutPowerState;
+    uint32_t    changeFlags = *inOutChangeFlags;
+    uint32_t    currentPowerState = (uint32_t) getPowerState();
+
+    if ((currentPowerState == powerState) ||
+        (changeFlags & kIOPMParentInitiated))
+    {
+        // FIXME: cancel any parent change (unexpected)
+        // Root parent is permanently pegged at max power,
+        // kIOPMParentInitiated is unexpected.
+        return;
+    }
+
+    if (powerState < currentPowerState)
+    {
+        if ((changeFlags & kIOPMSkipAskPowerDown) == 0)
+        {
+            /* Convenient place to run any code at idle sleep time
+             * IOPMrootDomain initiates an idle sleep here
+             *
+             * Set last sleep cause accordingly.
+             */
+            pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)true);
+
+            lastSleepReason = kIOPMSleepReasonIdle;
+            setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey);
+        }
+        if (CAP_CURRENT(kIOPMSystemCapabilityGraphics))
+        {
+            // Root domain is dropping power state ON->SLEEP.
+            // If system is in full wake, first drop to dark wake.
+
+            darkWakeToSleepASAP = true;
+
+            // Drop graphics capability.
+            // No transition if system is already in dark wake.
+
+            _desiredCapability &= ~(
+                kIOPMSystemCapabilityGraphics |
+                kIOPMSystemCapabilityAudio    );
+
+            *inOutPowerState = ON_STATE;
+            *inOutChangeFlags |= kIOPMSynchronize;
+
+            // Revert device desire from SLEEP->ON.
+            changePowerStateToPriv(ON_STATE);
+        }
+    }
 }
 
-void IOPMrootDomain::handlePowerNotification( UInt32 msg )
+void IOPMrootDomain::handleOurPowerChangeStart(
+    IOService *     service,
+    IOPMActions *   actions,
+    uint32_t        powerState,
+    uint32_t *      inOutChangeFlags )
 {
-    bool        eval_clamshell = false;
+    uint32_t changeFlags = *inOutChangeFlags;
+    uint32_t currentPowerState = (uint32_t) getPowerState();
 
-    ASSERT_GATED();
+    _systemTransitionType    = kSystemTransitionNone;
+    _systemMessageClientMask = 0;
+    capabilityLoss           = false;
 
-    /*
-     * Local (IOPMrootDomain only) eval clamshell command
-     */
-    if (msg & kLocalEvalClamshellCommand)
+    // 1. Explicit capability change.
+
+    if (changeFlags & kIOPMSynchronize)
     {
-        eval_clamshell = true;
+        if (powerState == ON_STATE)
+        {
+            if (changeFlags & kIOPMSyncNoChildNotify)
+                _systemTransitionType = kSystemTransitionNewCapClient;
+            else
+                _systemTransitionType = kSystemTransitionCapability;
+        }
     }
 
-    /*
-     * Overtemp
-     */
-    if (msg & kIOPMOverTemp)
+    // 2. Going to sleep (cancellation still possible).
+
+    else if (powerState < currentPowerState)
+        _systemTransitionType = kSystemTransitionSleep;
+
+    // 3. Woke from (idle or demand) sleep.
+
+    else if (!systemBooting &&
+             (changeFlags & kIOPMSelfInitiated) &&
+             (powerState > currentPowerState))
     {
-        LOG("PowerManagement emergency overtemp signal. Going to sleep!");
-        privateSleepSystem (kIOPMSleepReasonThermalEmergency);
+        _systemTransitionType = kSystemTransitionWake;
+        _desiredCapability = kIOPMSystemCapabilityCPU |
+                             kIOPMSystemCapabilityNetwork;
+
+        // Check for early HID events (e.g. LID open)
+        if (wranglerTickled)
+        {
+            _desiredCapability |= (
+                kIOPMSystemCapabilityGraphics |
+                kIOPMSystemCapabilityAudio );
+        }
     }
 
-#ifdef __ppc__
-    /*
-     * PMU Processor Speed Change
-     */
-    if (msg & kIOPMProcessorSpeedChange) 
+    // Update pending wake capability at the beginning of every
+    // state transition (including synchronize). This will become
+    // the current capability at the end of the transition.
+
+    if (kSystemTransitionSleep == _systemTransitionType)
     {
-        IOService *pmu = waitForService(serviceMatching("ApplePMU"));
-        pmu->callPlatformFunction("prepareForSleep", false, 0, 0, 0, 0);
-        getPlatform()->sleepKernel();
-        pmu->callPlatformFunction("recoverFromSleep", false, 0, 0, 0, 0);
+        _pendingCapability = 0;
+        capabilityLoss = true;
     }
-#endif
-
-    /*
-     * Sleep Now!
-     */
-    if (msg & kIOPMSleepNow) 
+    else if (kSystemTransitionNewCapClient != _systemTransitionType)
     {
-        privateSleepSystem (kIOPMSleepReasonSoftware);
+        _pendingCapability = _desiredCapability |
+                             kIOPMSystemCapabilityCPU |
+                             kIOPMSystemCapabilityNetwork;
+
+        if (_pendingCapability & kIOPMSystemCapabilityGraphics)
+            _pendingCapability |= kIOPMSystemCapabilityAudio;
+
+        if ((kSystemTransitionCapability == _systemTransitionType) &&
+            (_pendingCapability == _currentCapability))
+        {
+            // Cancel the PM state change.
+            _systemTransitionType = kSystemTransitionNone;
+            *inOutChangeFlags |= kIOPMNotDone;
+        }
+        if (__builtin_popcount(_pendingCapability) <
+            __builtin_popcount(_currentCapability))
+            capabilityLoss = true;
+        if (CAP_LOSS(kIOPMSystemCapabilityGraphics))
+            rejectWranglerTickle = true;
     }
-    
-    /*
-     * Power Emergency
-     */
-    if (msg & kIOPMPowerEmergency) 
+
+    // 1. Capability change.
+
+    if (kSystemTransitionCapability == _systemTransitionType)
     {
-        lowBatteryCondition = true;
-        privateSleepSystem (kIOPMSleepReasonLowPower);
+        // Dark to Full transition.
+        if (CAP_GAIN(kIOPMSystemCapabilityGraphics))
+        {
+            tracePoint( kIOPMTracePointDarkWakeExit );
+            wranglerSleepIgnored = false;
+            sleepTimerMaintenance = false;
+            hibernateNoDefeat = false;
+            _systemMessageClientMask = kSystemMessageClientUser;
+            if ((_highestCapability & kIOPMSystemCapabilityGraphics) == 0)
+                _systemMessageClientMask |= kSystemMessageClientKernel;
+
+            tellClients(kIOMessageSystemWillPowerOn);
+        }
+
+        // Full to Dark transition.
+        if (CAP_LOSS(kIOPMSystemCapabilityGraphics))
+        {
+            tracePoint( kIOPMTracePointDarkWakeEntry );
+            *inOutChangeFlags |= kIOPMSyncTellPowerDown;
+            _systemMessageClientMask = kSystemMessageClientUser;
+        }
     }
 
-    /*
-     * Clamshell OPEN
-     */
-    if (msg & kIOPMClamshellOpened) 
+    // 2. System sleep.
+
+    else if (kSystemTransitionSleep == _systemTransitionType)
     {
-        // Received clamshel open message from clamshell controlling driver
-        // Update our internal state and tell general interest clients
-        clamshellIsClosed = false;
-        clamshellExists = true;
+        // Beginning of a system sleep transition.
+        // Cancellation is still possible.
+        tracePoint( kIOPMTracePointSleepStarted, lastSleepReason );
 
-        if (msg & kIOPMSetValue)
+        _systemMessageClientMask = kSystemMessageClientAll;
+        if ((_currentCapability & kIOPMSystemCapabilityGraphics) == 0)
+            _systemMessageClientMask &= ~kSystemMessageClientApp;
+        if ((_highestCapability & kIOPMSystemCapabilityGraphics) == 0)
+            _systemMessageClientMask &= ~kSystemMessageClientKernel;
+
+        // Optimization to ignore wrangler power down thus skipping
+        // the disk spindown and arming the idle timer for demand sleep.
+
+        if (changeFlags & kIOPMIgnoreChildren)
         {
-            reportUserInput();
+            wranglerSleepIgnored = true;
         }
 
-        // Tell PMCPU
-        informCPUStateChange(kInformLid, 0);
+        logWranglerTickle = false;
+    }
 
-        // Tell general interest clients        
-        sendClientClamshellNotification();
+    // 3. System wake.
 
-        bool aborting =  ((lastSleepReason == kIOPMSleepReasonClamshell)
-                       || (lastSleepReason == kIOPMSleepReasonIdle) 
-                       || (lastSleepReason == kIOPMSleepReasonMaintenance));
-        if (aborting) userActivityCount++;
-        DLOG("clamshell tickled %d lastSleepReason %d\n", userActivityCount, lastSleepReason);
+    else if (kSystemTransitionWake == _systemTransitionType)
+    {
+        wranglerSleepIgnored = false;
+
+        if (_pendingCapability & kIOPMSystemCapabilityGraphics)
+        {
+            _systemMessageClientMask = kSystemMessageClientAll;
+        }
+        else
+        {
+            _systemMessageClientMask = kSystemMessageClientConfigd;
+        }
+
+        tracePoint( kIOPMTracePointWakeWillPowerOnClients );
+        tellClients(kIOMessageSystemWillPowerOn);
     }
 
-    /* 
-     * Clamshell CLOSED
-     * Send the clamshell interest notification since the lid is closing. 
-     */
-    if (msg & kIOPMClamshellClosed)
+    if ((kSystemTransitionNone != _systemTransitionType) &&
+        (kSystemTransitionNewCapClient != _systemTransitionType))
     {
-        // Received clamshel open message from clamshell controlling driver
-        // Update our internal state and tell general interest clients
-        clamshellIsClosed = true;
-        clamshellExists = true;
+        _systemStateGeneration++;
+        systemDarkWake = false;
 
-        // Tell PMCPU
-        informCPUStateChange(kInformLid, 1);
+        DLOG("=== START (%u->%u, 0x%x) type %u, gen %u, msg %x, "
+             "dcp %x:%x:%x\n",
+            currentPowerState, powerState, *inOutChangeFlags,
+            _systemTransitionType, _systemStateGeneration,
+            _systemMessageClientMask,
+            _desiredCapability, _currentCapability, _pendingCapability);
+    }
+}
 
-        // Tell general interest clients
-        sendClientClamshellNotification();
-        
-        // And set eval_clamshell = so we can attempt 
-        eval_clamshell = true;
+void IOPMrootDomain::handleOurPowerChangeDone(
+    IOService *     service,
+    IOPMActions *   actions,
+    uint32_t        powerState,
+    uint32_t        changeFlags )
+{
+    if (kSystemTransitionNewCapClient == _systemTransitionType)
+    {
+        _systemTransitionType = kSystemTransitionNone;
+        return;
     }
 
-    /*
-     * Set Desktop mode (sent from graphics)
-     *
-     *  -> reevaluate lid state
-     */
-    if (msg & kIOPMSetDesktopMode) 
+    if (_systemTransitionType != kSystemTransitionNone)
     {
-        desktopMode = (0 != (msg & kIOPMSetValue));
-        msg &= ~(kIOPMSetDesktopMode | kIOPMSetValue);
+        uint32_t currentPowerState = (uint32_t) getPowerState();
 
-        sendClientClamshellNotification();
+        if (changeFlags & kIOPMNotDone)
+        {
+            // Power down was cancelled or vetoed.
+            _pendingCapability = _currentCapability;
+            lastSleepReason = 0;
 
-        // Re-evaluate the lid state
-        if( clamshellIsClosed )
+            if (((_currentCapability & kIOPMSystemCapabilityGraphics) == 0) &&
+                (_currentCapability & kIOPMSystemCapabilityCPU))
+            {
+                pmPowerStateQueue->submitPowerEvent(
+                    kPowerEventPolicyStimulus,
+                    (void *) kStimulusDarkWakeReentry,
+                    _systemStateGeneration );                
+            }
+            
+            // Revert device desire to max.
+            changePowerStateToPriv(ON_STATE);
+        }
+        else
         {
-            eval_clamshell = true;
+            // Send message on dark wake to full wake promotion.
+            // tellChangeUp() handles the normal SLEEP->ON case.
+
+            if (kSystemTransitionCapability == _systemTransitionType)
+            {
+                if (CAP_GAIN(kIOPMSystemCapabilityGraphics))
+                {
+                    tellClients(kIOMessageSystemHasPoweredOn);
+#if DARK_TO_FULL_EVALUATE_CLAMSHELL
+                    // Re-evaluate clamshell state ourselves when graphics
+                    // will not get kIOMessageSystemHasPoweredOn.
+
+                    if (clamshellClosed &&
+                        ((_systemMessageClientMask & kSystemMessageClientKernel) == 0))
+                    {
+                        receivePowerNotification( kLocalEvalClamshellCommand );
+                    }
+#endif
+                }
+                if (CAP_LOSS(kIOPMSystemCapabilityGraphics))
+                    wranglerTickled = false;
+            }
+
+            // Reset state after exiting from dark wake.
+
+            if (CAP_GAIN(kIOPMSystemCapabilityGraphics) ||
+                CAP_LOSS(kIOPMSystemCapabilityCPU))
+            {
+                darkWakeMaintenance = false;
+                darkWakeToSleepASAP = false;
+                pciCantSleepValid   = false;
+                rejectWranglerTickle = false;
+            }
+
+            // Entered dark mode.
+
+            if (((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0) &&
+                 (_pendingCapability & kIOPMSystemCapabilityCPU))
+            {
+                if (((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOInDark) == 0) &&
+                    (kSystemTransitionWake == _systemTransitionType) &&
+                    (_debugWakeSeconds == 0))
+                {
+                    OSObject * prop = copyProperty(kIOPMRootDomainWakeTypeKey);
+                    if (prop)
+                    {
+                        OSString * wakeType = OSDynamicCast(OSString, prop);
+                        if (wakeType &&
+                            wakeType->isEqualTo(kIOPMRootDomainWakeTypeNetwork))
+                        {
+                            // Woke from network and entered dark wake.                    
+                            if (darkWakeToSleepASAP)
+                            {
+                                DLOG("cleared darkWakeToSleepASAP\n");
+                                darkWakeToSleepASAP = false;
+                            }
+                        }
+                        prop->release();
+                    }
+                }
+
+                // Queue an evaluation of whether to remain in dark wake,
+                // and for how long. This serves the purpose of draining
+                // any assertions from the queue.
+
+                pmPowerStateQueue->submitPowerEvent(
+                    kPowerEventPolicyStimulus,
+                    (void *) kStimulusDarkWakeEntry,
+                    _systemStateGeneration );
+            }
         }
-    }
-    
-    /*
-     * AC Adaptor connected
-     *
-     *  -> reevaluate lid state
-     */
-    if (msg & kIOPMSetACAdaptorConnected) 
-    {
-        acAdaptorConnected = (0 != (msg & kIOPMSetValue));
-        msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue);
 
-        // Tell CPU PM
-        informCPUStateChange(kInformAC, !acAdaptorConnected);
+        DLOG("=== FINISH (%u->%u, 0x%x) type %u, gen %u, msg %x, "
+             "dcp %x:%x:%x, dbgtimer %u\n",
+            currentPowerState, powerState, changeFlags,
+            _systemTransitionType, _systemStateGeneration,
+            _systemMessageClientMask,
+            _desiredCapability, _currentCapability, _pendingCapability,
+            _debugWakeSeconds);
 
-        // Tell BSD if AC is connected
-        //      0 == external power source; 1 == on battery
-        post_sys_powersource(acAdaptorConnected ? 0:1);
+        // Update current system capability.
 
-        sendClientClamshellNotification();
+        if (_currentCapability != _pendingCapability)
+            _currentCapability = _pendingCapability;
 
-        // Re-evaluate the lid state
-        if( clamshellIsClosed )
+        // Update highest system capability.
+
+        if (!CAP_CURRENT(kIOPMSystemCapabilityCPU))
+            _highestCapability = 0;     // reset at sleep state
+        else
+            _highestCapability |= _currentCapability;
+
+        if (darkWakePostTickle &&
+            (kSystemTransitionWake == _systemTransitionType) &&
+            (gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) ==
+             kDarkWakeFlagHIDTickleLate)
         {
-            eval_clamshell = true;
+            darkWakePostTickle = false;
+            reportUserInput();
         }
-    }
-    
-    /*
-     * Enable Clamshell (external display disappear)
-     *
-     *  -> reevaluate lid state
-     */
-    if (msg & kIOPMEnableClamshell) 
-    {
-        // Re-evaluate the lid state
-        // System should sleep on external display disappearance
-        // in lid closed operation.
-        if( clamshellIsClosed && (true == ignoringClamshell) )        
+
+        // Reset tracepoint at completion of capability change,
+        // completion of wake transition, and aborted sleep transition.
+
+        if ((_systemTransitionType == kSystemTransitionCapability) ||
+            (_systemTransitionType == kSystemTransitionWake) ||
+            ((_systemTransitionType == kSystemTransitionSleep) &&
+             (changeFlags & kIOPMNotDone)))
         {
-            eval_clamshell = true;
+            setProperty(kIOPMSystemCapabilitiesKey, _currentCapability, 64);
+            tracePoint( kIOPMTracePointSystemUp, 0 );
         }
 
-        ignoringClamshell = false;
+        _systemTransitionType = kSystemTransitionNone;
+        _systemMessageClientMask = 0;
 
-        sendClientClamshellNotification();
+        logGraphicsClamp = false;
     }
-    
-    /*
-     * Disable Clamshell (external display appeared)
-     * We don't bother re-evaluating clamshell state. If the system is awake,
-     * the lid is probably open. 
-     */
-    if (msg & kIOPMDisableClamshell) 
-    {
-        ignoringClamshell = true;
+}
 
-        sendClientClamshellNotification();
+//******************************************************************************
+// PM actions for graphics and audio.
+//******************************************************************************
+
+void IOPMrootDomain::overridePowerChangeForUIService(
+    IOService *     service,
+    IOPMActions *   actions,
+    unsigned long * inOutPowerState,
+    uint32_t *      inOutChangeFlags )
+{
+    uint32_t powerState  = (uint32_t) *inOutPowerState;
+    uint32_t changeFlags = (uint32_t) *inOutChangeFlags;
+
+    if (kSystemTransitionNone == _systemTransitionType)
+    {
+        // Not in midst of a system transition.
+        // Do not modify power limit enable state.
     }
+    else if ((actions->parameter & kPMActionsFlagLimitPower) == 0)
+    {
+        // Activate power limiter.
 
-    /*
-     * Evaluate clamshell and SLEEP if appropiate
-     */
-    if ( eval_clamshell && shouldSleepOnClamshellClosed() ) 
+        if ((actions->parameter & kPMActionsFlagIsDisplayWrangler) &&
+            ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0))
+        {
+            actions->parameter |= kPMActionsFlagLimitPower;
+        }
+        else if ((actions->parameter & kPMActionsFlagIsAudioDevice) &&
+                 ((_pendingCapability & kIOPMSystemCapabilityAudio) == 0))
+        {
+            actions->parameter |= kPMActionsFlagLimitPower;
+        }
+        else if ((actions->parameter & kPMActionsFlagIsGraphicsDevice) &&
+                 (_systemTransitionType == kSystemTransitionSleep))
+        {
+            // For graphics devices, arm the limiter when entering
+            // system sleep. Not when dropping to dark wake.
+            actions->parameter |= kPMActionsFlagLimitPower; 
+        }
+
+        if (actions->parameter & kPMActionsFlagLimitPower)
+        {
+            DLOG("+ plimit %s %p\n",
+                service->getName(), service);
+        }
+    }
+    else
     {
+        // Remove power limit.
 
+        if ((actions->parameter & (
+            kPMActionsFlagIsDisplayWrangler |
+            kPMActionsFlagIsGraphicsDevice )) &&
+            (_pendingCapability & kIOPMSystemCapabilityGraphics))
+        {
+            actions->parameter &= ~kPMActionsFlagLimitPower;
+        }
+        else if ((actions->parameter & kPMActionsFlagIsAudioDevice) &&
+                 (_pendingCapability & kIOPMSystemCapabilityAudio))
+        {
+            actions->parameter &= ~kPMActionsFlagLimitPower;
+        }
 
-        // SLEEP!
-        privateSleepSystem (kIOPMSleepReasonClamshell);
+        if ((actions->parameter & kPMActionsFlagLimitPower) == 0)
+        {
+            DLOG("- plimit %s %p\n",
+                service->getName(), service);
+        }
     }
 
-    /*
-     * Power Button
-     */
-    if (msg & kIOPMPowerButton) 
+    if (actions->parameter & kPMActionsFlagLimitPower)
     {
-        // toggle state of sleep/wake
-        // are we dozing?
-        if ( getPowerState() == DOZE_STATE ) 
+        uint32_t maxPowerState = (uint32_t)(-1);
+
+        if (changeFlags & (kIOPMDomainDidChange | kIOPMDomainWillChange))
         {
-#ifndef __LP64__
-            // yes, tell the tree we're waking
-            systemWake();
-#endif
-            // wake the Display Wrangler
-            reportUserInput();
+            // Enforce limit for system power/cap transitions.
+
+            maxPowerState = 0;
+            if (actions->parameter & kPMActionsFlagIsDisplayWrangler)
+            {
+                // Forces a 3->1 transition sequence
+                if (changeFlags & kIOPMDomainWillChange)
+                    maxPowerState = 3;
+                else
+                    maxPowerState = 1;
+            }
+        }
+        else
+        {
+            // Deny all self-initiated changes when power is limited.
+            // Wrangler tickle should never defeat the limiter.
+
+            maxPowerState = service->getPowerState();
+        }
+
+        if (powerState > maxPowerState)
+        {
+            DLOG("> plimit %s %p (%u->%u, 0x%x)\n",
+                service->getName(), service, powerState, maxPowerState,
+                changeFlags);
+            *inOutPowerState = maxPowerState;
+
+            if (darkWakePostTickle &&
+                (actions->parameter & kPMActionsFlagIsDisplayWrangler) &&
+                (changeFlags & kIOPMDomainWillChange) &&
+                ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) ==
+                 kDarkWakeFlagHIDTickleEarly))
+            {
+                darkWakePostTickle = false;
+                reportUserInput();
+            }
         }
-        else {
-            OSString *pbs = OSString::withCString("DisablePowerButtonSleep");
-            // Check that power button sleep is enabled
-            if( pbs ) {
-                if( kOSBooleanTrue != getProperty(pbs))
-                privateSleepSystem (kIOPMSleepReasonPowerButton);
+
+        if (!graphicsSuppressed && (changeFlags & kIOPMDomainDidChange))
+        {
+            if (logGraphicsClamp)
+            {
+                AbsoluteTime    now;
+                uint64_t        nsec;
+
+                clock_get_uptime(&now);
+                SUB_ABSOLUTETIME(&now, &systemWakeTime);
+                absolutetime_to_nanoseconds(now, &nsec);
+                MSG("Graphics suppressed %u ms\n",
+                    ((int)((nsec) / 1000000ULL)));
             }
+            graphicsSuppressed = true;
         }
     }
+}
 
-    /*
-     * Allow Sleep
-     *
-     */
-    if ( (msg & kIOPMAllowSleep) && !allowSleep ) 
+void IOPMrootDomain::handleActivityTickleForDisplayWrangler(
+    IOService *     service,
+    IOPMActions *   actions )
+{
+    // Warning: Not running in PM work loop context - don't modify state !!!
+    // Trap tickle directed to IODisplayWrangler while running with graphics
+    // capability suppressed.
+
+    assert(service == wrangler);
+
+    if (service == wrangler)
     {
-        allowSleep = true;
-        adjustPowerState();
+        bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) 
+                       || (lastSleepReason == kIOPMSleepReasonMaintenance));
+        if (aborting) {
+            userActivityCount++;
+            DLOG("display wrangler tickled1 %d lastSleepReason %d\n", userActivityCount, lastSleepReason);
+        }
     }
 
-    /*
-     * Prevent Sleep
-     *
-     */
-    if (msg & kIOPMPreventSleep) {
-        allowSleep = false;
-	    // are we dozing?
-        if ( getPowerState() == DOZE_STATE ) {
-#ifndef __LP64__
-            // yes, tell the tree we're waking
-            systemWake();
-#endif
-            adjustPowerState();
-            // wake the Display Wrangler
-            reportUserInput();
-        } else {
-            adjustPowerState();
-            // make sure we have power to clamp
-            patriarch->wakeSystem();
+    if (!wranglerTickled && !lowBatteryCondition &&
+        ((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0))
+    {
+        DLOG("display wrangler tickled\n");
+        if (kIOLogPMRootDomain & gIOKitDebug)
+            OSReportWithBacktrace("Dark wake display tickle");
+        if (pmPowerStateQueue)
+        {
+            pmPowerStateQueue->submitPowerEvent(
+                kPowerEventPolicyStimulus,
+                (void *) kStimulusDarkWakeActivityTickle );
         }
     }
 }
 
-
 //******************************************************************************
-// getSleepSupported
-//
+// Approve usage of delayed child notification by PM.
 //******************************************************************************
 
-IOOptionBits IOPMrootDomain::getSleepSupported( void )
+bool IOPMrootDomain::shouldDelayChildNotification(
+    IOService * service )
 {
-    return( platformSleepSupport );
+    if (((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0) &&
+        !wranglerTickled &&
+        (kSystemTransitionWake == _systemTransitionType))
+    {
+        DLOG("%s: delay child notify\n", service->getName());
+        return true;
+    }
+    return false;
 }
 
-
 //******************************************************************************
-// setSleepSupported
-//
+// PM actions for PCI device.
 //******************************************************************************
 
-void IOPMrootDomain::setSleepSupported( IOOptionBits flags )
+void IOPMrootDomain::handlePowerChangeStartForPCIDevice(
+    IOService *     service,
+    IOPMActions *   actions, 
+    uint32_t        powerState,
+    uint32_t *      inOutChangeFlags )
 {
-    DLOG("setSleepSupported(%x)\n", (uint32_t) flags);
-    OSBitOrAtomic(flags, &platformSleepSupport);
+    pmTracer->tracePCIPowerChange(
+        PMTraceWorker::kPowerChangeStart,
+        service, *inOutChangeFlags,
+        (actions->parameter & kPMActionsPCIBitNumberMask));
 }
 
+void IOPMrootDomain::handlePowerChangeDoneForPCIDevice(
+    IOService *     service,
+    IOPMActions *   actions, 
+    uint32_t        powerState,
+    uint32_t        changeFlags )
+{
+    pmTracer->tracePCIPowerChange(
+        PMTraceWorker::kPowerChangeCompleted,
+        service, changeFlags,
+        (actions->parameter & kPMActionsPCIBitNumberMask));
+}
 
 //******************************************************************************
-// requestPowerDomainState
-//
-// The root domain intercepts this call to the superclass.
-// Called on the PM work loop thread.
+// registerInterest
 //
-// If the clamp bit is not set in the desire, then the child doesn't need the power
-// state it's requesting; it just wants it. The root ignores desires but not needs.
-// If the clamp bit is not set, the root takes it that the child can tolerate no
-// power and interprets the request accordingly. If all children can thus tolerate
-// no power, we are on our way to idle sleep.
+// Override IOService::registerInterest() to intercept special clients.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::requestPowerDomainState (
-    IOPMPowerFlags      desiredFlags,
-    IOPowerConnection * whichChild,
-    unsigned long       specification )
+IONotifier * IOPMrootDomain::registerInterest(
+                const OSSymbol * typeOfInterest,
+                IOServiceInterestHandler handler,
+                void * target, void * ref )
 {
-    OSIterator          *iter;
-    OSObject            *next;
-    IOPowerConnection   *connection;
-    IOPMPowerFlags      powerRequestFlag = 0;
-    IOPMPowerFlags      editedDesire;
+    IONotifier *    notifier;
+    bool            isSystemCapabilityClient;
+    bool            isKernelCapabilityClient;
 
-    ASSERT_GATED();
+    isSystemCapabilityClient =
+        typeOfInterest &&
+        typeOfInterest->isEqualTo(kIOPMSystemCapabilityInterest);
+
+    isKernelCapabilityClient =
+        typeOfInterest &&
+        typeOfInterest->isEqualTo(gIOPriorityPowerStateInterest);
 
-    if (kIOLogPMRootDomain & gIOKitDebug)
+    if (isSystemCapabilityClient)
+        typeOfInterest = gIOAppPowerStateInterest;
+
+    notifier = super::registerInterest(typeOfInterest, handler, target, ref);
+    if (notifier && pmPowerStateQueue)
     {
-        IOService * powerChild =
-            (IOService *) whichChild->getChildEntry(gIOPowerPlane);
-        DLOG("child %p, flags %lx, spec %lx - %s\n",
-            powerChild, desiredFlags, specification,
-            powerChild ? powerChild->getName() : "?");
+        if (isSystemCapabilityClient)
+        {
+            notifier->retain();
+            if (pmPowerStateQueue->submitPowerEvent(
+                kPowerEventRegisterSystemCapabilityClient, notifier) == false)
+                notifier->release();
+        }
+
+        if (isKernelCapabilityClient)
+        {
+            notifier->retain();
+            if (pmPowerStateQueue->submitPowerEvent(
+                kPowerEventRegisterKernelCapabilityClient, notifier) == false)
+                notifier->release();
+        }
     }
 
-    // Force the child's input power requirements to 0 unless the prevent
-    // idle-sleep flag is set. No input power flags map to our state 0.
-    // Our power clamp (deviceDesire) keeps the minimum power state at 2.
+    return notifier;
+}
 
-    if (desiredFlags & kIOPMPreventIdleSleep)
-        editedDesire = kIOPMPreventIdleSleep | kIOPMPowerOn;
-    else
-        editedDesire = 0;
+//******************************************************************************
+// systemMessageFilter
+//
+//******************************************************************************
 
-    // Recompute sleep supported flag (doze if not supported)
-    sleepIsSupported = true;
+bool IOPMrootDomain::systemMessageFilter(
+    void * object, void * arg1, void * arg2, void * arg3 )
+{
+    const IOPMInterestContext * context = (const IOPMInterestContext *) arg1;
+    bool  isCapMsg = (context->messageType == kIOMessageSystemCapabilityChange);
+    bool  isCapClient = false;
+    bool  allow = false;
 
-    iter = getChildIterator(gIOPowerPlane);
-    if ( iter ) 
-    {
-        while ( (next = iter->getNextObject()) ) 
+    do {
+        if ((kSystemTransitionNewCapClient == _systemTransitionType) &&
+            (!isCapMsg || !_joinedCapabilityClients ||
+             !_joinedCapabilityClients->containsObject((OSObject *) object)))
+            break;
+
+        // Capability change message for app and kernel clients.
+
+        if (isCapMsg)
         {
-            if ( (connection = OSDynamicCast(IOPowerConnection, next)) ) 
-            {
-                // Ignore child that are in the process of joining.
-				if (connection->getReadyFlag() == false)
-					continue;
+            if ((context->notifyType == kNotifyPriority) ||
+                (context->notifyType == kNotifyCapabilityChangePriority))
+                isCapClient = true;
 
-                // Is this connection attached to the child that called
-                // requestPowerDomainState()?
+            if ((context->notifyType == kNotifyCapabilityChangeApps) &&
+                (object == (void *) systemCapabilityNotifier))
+                isCapClient = true;
+        }
 
-                if (connection == whichChild) 
-                {
-                    // OR in the child's input power requirements.
-                    powerRequestFlag |= editedDesire;
+        if (isCapClient)
+        {
+            IOPMSystemCapabilityChangeParameters * capArgs =
+                (IOPMSystemCapabilityChangeParameters *) arg2;
 
-                    if ( desiredFlags & kIOPMPreventSystemSleep )
-                        sleepIsSupported = false;
-                }
+            if (kSystemTransitionNewCapClient == _systemTransitionType)
+            {
+                capArgs->fromCapabilities = 0;
+                capArgs->toCapabilities = _currentCapability;
+                capArgs->changeFlags = 0;
+            }
+            else
+            {
+                capArgs->fromCapabilities = _currentCapability;
+                capArgs->toCapabilities = _pendingCapability;
+
+                if (context->isPreChange)
+                    capArgs->changeFlags = kIOPMSystemCapabilityWillChange;
                 else
-                {
-                    if (kIOLogPMRootDomain & gIOKitDebug)
-                    {
-                        IOService * powerChild =
-                            (IOService *) connection->getChildEntry(gIOPowerPlane);
-                        DLOG("child %p, state %ld, noIdle %d, noSleep %d - %s\n",
-                            powerChild,
-                            connection->getDesiredDomainState(),
-                            connection->getPreventIdleSleepFlag(),
-                            connection->getPreventSystemSleepFlag(),
-                            powerChild ? powerChild->getName() : "?");
-                    }
+                    capArgs->changeFlags = kIOPMSystemCapabilityDidChange;
+            }
 
-                    // OR in the child's desired power state (0 or ON_STATE).
-                    powerRequestFlag |= connection->getDesiredDomainState();
+            // Capability change messages only go to the PM configd plugin. 
+            // Wait for response post-change if capabilitiy is increasing.
+            // Wait for response pre-change if capability is decreasing.
 
-                    if ( connection->getPreventSystemSleepFlag() )
-                        sleepIsSupported = false;
-                }
+            if ((context->notifyType == kNotifyCapabilityChangeApps) && arg3 &&
+                ( (capabilityLoss && context->isPreChange) ||
+                  (!capabilityLoss && !context->isPreChange) ) )
+            {
+                // app has not replied yet, wait for it
+                *((OSObject **) arg3) = kOSBooleanFalse;
             }
+
+            allow = true;
+            break;
         }
-        iter->release();
-    }
 
-    DLOG("childPowerFlags 0x%lx, extraSleepDelay %ld\n",
-        powerRequestFlag, extraSleepDelay);
+        // Capability client will always see kIOMessageCanSystemSleep,
+        // even for demand sleep.
 
-    if ( !powerRequestFlag && !systemBooting ) 
-    {
-        if (!wrangler)
+        if ((kIOMessageCanSystemSleep == context->messageType) ||
+            (kIOMessageSystemWillNotSleep == context->messageType))
         {
-            sleepASAP = false;
-            changePowerStateToPriv(ON_STATE);
-            if (idleSeconds)
+            if (object == (OSObject *) systemCapabilityNotifier)
             {
-                // stay awake for at least idleSeconds
-                startIdleSleepTimer(idleSeconds);        
+                allow = true;
+                break;
+            }
+            
+            // Not idle sleep, don't ask apps.
+            if (context->changeFlags & kIOPMSkipAskPowerDown)
+            {
+                break;
             }
         }
-        else if (!extraSleepDelay && !idleSleepTimerPending)
+
+        // Reject capability change messages for legacy clients.
+        // Reject legacy system sleep messages for capability client.
+
+        if (isCapMsg || (object == (OSObject *) systemCapabilityNotifier))
         {
-            sleepASAP = true;
+            break;
         }
-    }
-
-    // Drop our power clamp to SLEEP_STATE when all children became idle,
-    // and the system sleep and display sleep values are equal.
 
-    adjustPowerState();
+        // Filter system sleep messages.
 
-    // If our power clamp has already dropped to SLEEP_STATE, and no child
-    // is keeping us at ON_STATE, then this will trigger idle sleep.
+        if ((context->notifyType == kNotifyApps) &&
+            (_systemMessageClientMask & kSystemMessageClientApp))
+        {
+            allow = true;
+        }
+        else if ((context->notifyType == kNotifyPriority) &&
+                 (_systemMessageClientMask & kSystemMessageClientKernel))
+        {
+            allow = true;
+        }
+    }
+    while (false);
 
-    editedDesire |= (desiredFlags & kIOPMPreventSystemSleep);
+    if (allow && isCapMsg && _joinedCapabilityClients)
+    {
+        _joinedCapabilityClients->removeObject((OSObject *) object);
+        if (_joinedCapabilityClients->getCount() == 0)
+        {
+            DLOG("destroyed capability client set %p\n",
+                _joinedCapabilityClients);
+            _joinedCapabilityClients->release();
+            _joinedCapabilityClients = 0;
+        }
+    }
 
-    return super::requestPowerDomainState(
-        editedDesire, whichChild, specification);
+    return allow;
 }
 
-
 //******************************************************************************
-// handlePlatformHaltRestart
+// setMaintenanceWakeCalendar
 //
 //******************************************************************************
 
-struct HaltRestartApplierContext {
-	IOPMrootDomain *	RootDomain;
-	unsigned long		PowerState;
-	IOPMPowerFlags		PowerFlags;
-	UInt32				MessageType;
-	UInt32				Counter;
-};
-
-static void
-platformHaltRestartApplier( OSObject * object, void * context )
+IOReturn IOPMrootDomain::setMaintenanceWakeCalendar(
+    const IOPMCalendarStruct * calendar )
 {
-	IOPowerStateChangeNotification	notify;
-	HaltRestartApplierContext *		ctx;
-	AbsoluteTime					startTime;
-	UInt32							deltaTime;
+    OSData * data;
+    IOReturn ret;
 
-	ctx = (HaltRestartApplierContext *) context;
-	
-	memset(&notify, 0, sizeof(notify));
-    notify.powerRef    = (void *)ctx->Counter;
-    notify.returnValue = 0;
-    notify.stateNumber = ctx->PowerState;
-    notify.stateFlags  = ctx->PowerFlags;
+    if (!calendar)
+        return kIOReturnBadArgument;
+    
+    data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar));
+    if (!data)
+        return kIOReturnNoMemory;
+    
+    ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data);
 
-	clock_get_uptime(&startTime);
-    ctx->RootDomain->messageClient( ctx->MessageType, object, (void *)&notify );
-	deltaTime = computeDeltaTimeMS(&startTime);
+    data->release();
+    return ret;
+}
 
-	if ((deltaTime > kPMHaltTimeoutMS) || (gIOKitDebug & kIOLogDebugPower))
-	{
-		_IOServiceInterestNotifier * notifier;
-		notifier = OSDynamicCast(_IOServiceInterestNotifier, object);
+// MARK: -
+// MARK: Display Wrangler
 
-		// IOService children of IOPMrootDomain are not instrumented.
-		// Only IORootParent currently falls under that group.
+//******************************************************************************
+// displayWranglerNotification
+//
+// Handle the notification when the IODisplayWrangler changes power state.
+//******************************************************************************
 
-		if (notifier)
-		{
-			KLOG("%s handler %p took %u ms\n",
-				(ctx->MessageType == kIOMessageSystemWillPowerOff) ?
-					"PowerOff" : "Restart",
-				notifier->handler, (uint32_t) deltaTime );
-		}
-	}
+IOReturn IOPMrootDomain::displayWranglerNotification(
+    void * target, void * refCon,
+    UInt32 messageType, IOService * service,
+    void * messageArgument, vm_size_t argSize )
+{
+#if !NO_KERNEL_HID
+    int                                 displayPowerState;
+    IOPowerStateChangeNotification *    params =
+            (IOPowerStateChangeNotification *) messageArgument;
 
-	ctx->Counter++;
-}
+    if ((messageType != kIOMessageDeviceWillPowerOff) &&
+        (messageType != kIOMessageDeviceHasPoweredOn))
+        return kIOReturnUnsupported;
 
-void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type )
-{
-	HaltRestartApplierContext	ctx;
-	AbsoluteTime				startTime;
-	UInt32						deltaTime;
+    ASSERT_GATED();
+    if (!gRootDomain)
+        return kIOReturnUnsupported;
 
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.RootDomain = this;
+    displayPowerState = params->stateNumber;
+    DLOG("DisplayWrangler message 0x%x, power state %d\n",
+              (uint32_t) messageType, displayPowerState);
 
-	clock_get_uptime(&startTime);
-	switch (pe_type)
-	{
-		case kPEHaltCPU:
-        case kPEUPSDelayHaltCPU:
-			ctx.PowerState  = OFF_STATE;
-			ctx.MessageType = kIOMessageSystemWillPowerOff;
-			break;
+    switch (messageType) {
+       case kIOMessageDeviceWillPowerOff:
 
-		case kPERestartCPU:
-			ctx.PowerState  = RESTART_STATE;
-			ctx.MessageType = kIOMessageSystemWillRestart;
-			break;
+            // Display wrangler has dropped power due to display idle
+            // or force system sleep.
+            //
+            // 4 Display ON
+            // 3 Display Dim
+            // 2 Display Sleep
+            // 1 Not visible to user
+            // 0 Not visible to user
 
-		default:
-			return;
-	}
+            if (displayPowerState > 2)
+                break;
+
+            gRootDomain->evaluatePolicy( kStimulusDisplayWranglerSleep );
+            break;
 
-	// Notify legacy clients
-	applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx);
+        case kIOMessageDeviceHasPoweredOn:
 
-    // For normal shutdown, turn off File Server Mode.
-    if (kPEHaltCPU == pe_type)
-    {
-        const OSSymbol * setting = OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey);
-        OSNumber * num = OSNumber::withNumber((unsigned long long) 0, 32);
-        if (setting && num)
-        {
-            setPMSetting(setting, num);
-            setting->release();
-            num->release();
-        }
-    }
+            // Display wrangler has powered on due to user activity 
+            // or wake from sleep.
 
-	// Notify in power tree order
-	notifySystemShutdown(this, ctx.MessageType);
+            if ( 4 != displayPowerState )
+                break;
 
-	deltaTime = computeDeltaTimeMS(&startTime);
-	KLOG("%s all drivers took %u ms\n",
-		(ctx.MessageType == kIOMessageSystemWillPowerOff) ?
-			"PowerOff" : "Restart",
-		(uint32_t) deltaTime );
+            gRootDomain->evaluatePolicy( kStimulusDisplayWranglerWake );
+            break;
+    }
+#endif
+    return kIOReturnUnsupported;
 }
 
-
-//******************************************************************************
-// registerInterest
+//*********************************************************************************
+// displayWranglerMatchPublished
 //
+// Receives a notification when the IODisplayWrangler is published.
+// When it's published we install a power state change handler.
 //******************************************************************************
 
-IONotifier * IOPMrootDomain::registerInterest(
-                const OSSymbol * typeOfInterest,
-                IOServiceInterestHandler handler,
-                void * target, void * ref )
+bool IOPMrootDomain::displayWranglerMatchPublished( 
+    void * target, 
+    void * refCon,
+    IOService * newService,
+    IONotifier * notifier __unused)
 {
-    IONotifier *    notifier;
-    bool            isConfigd;
-
-    isConfigd = typeOfInterest &&
-                typeOfInterest->isEqualTo(kIOPMPrivilegedPowerInterest);
-
-    if (isConfigd)
-        typeOfInterest = gIOAppPowerStateInterest;
-
-    notifier = super::registerInterest(typeOfInterest, handler, target, ref);
-
-#if ROOT_DOMAIN_RUN_STATES
-    if (isConfigd && notifier && pmPowerStateQueue)
+#if !NO_KERNEL_HID
+    // found the display wrangler, now install a handler
+    if( !newService->registerInterest( gIOGeneralInterest, 
+                            &displayWranglerNotification, target, 0) ) 
     {
-        notifier->retain();
-        if (pmPowerStateQueue->submitPowerEvent(
-                kPowerEventConfigdRegisteredInterest, notifier) == false)
-            notifier->release();
+        return false;
     }
 #endif
-
-    return notifier;
+    return true;
 }
 
-static bool clientMessageFilter( OSObject * object, void * arg )
+//******************************************************************************
+// reportUserInput
+//
+//******************************************************************************
+
+void IOPMrootDomain::reportUserInput( void )
 {
-#if ROOT_DOMAIN_RUN_STATES
-#if LOG_INTEREST_CLIENTS
-    IOPMInterestContext * context = (IOPMInterestContext *) arg;
-#endif
-    bool    allow = false;
+#if !NO_KERNEL_HID
+    OSIterator * iter;
 
-    switch (gMessageClientType)
+    if(!wrangler) 
     {
-        case kMessageClientNone:
-            allow = false;
-            break;
-        
-        case kMessageClientAll:
-            allow = true;
-            break;
-
-        case kMessageClientConfigd:
-            allow = ((object == (OSObject *) gConfigdNotifier) ||
-                     (object == (OSObject *) gSysPowerDownNotifier));
-            break;
+        iter = getMatchingServices(serviceMatching("IODisplayWrangler"));
+        if(iter) 
+        {
+            wrangler = (IOService *) iter->getNextObject();
+            iter->release();
+        }
     }
 
-#if LOG_INTEREST_CLIENTS
-    if (allow)
-        DLOG("system message %x to %p\n",
-            context->msgType, object);
-#endif
-
-    return allow;
-#else
-    return true;
+    if(wrangler)
+        wrangler->activityTickle(0,0);
 #endif
 }
 
+// MARK: -
+// MARK: Battery
 
 //******************************************************************************
-// tellChangeDown
+// batteryPublished
 //
-// We override the superclass implementation so we can send a different message
-// type to the client or application being notified.
+// Notification on battery class IOPowerSource appearance
 //******************************************************************************
 
-bool IOPMrootDomain::tellChangeDown( unsigned long stateNum )
-{
-    bool    done;
+bool IOPMrootDomain::batteryPublished( 
+    void * target, 
+    void * root_domain,
+    IOService * resourceService,
+    IONotifier * notifier __unused )
+{    
+    // rdar://2936060&4435589    
+    // All laptops have dimmable LCD displays
+    // All laptops have batteries
+    // So if this machine has a battery, publish the fact that the backlight
+    // supports dimming.
+    ((IOPMrootDomain *)root_domain)->publishFeature("DisplayDims");
 
-    DLOG("tellChangeDown %u->%u, R-state %u\n",
-        (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex);
+    return (true);
+}
 
-    switch ( stateNum ) {
-        case DOZE_STATE:
-        case SLEEP_STATE:
+// MARK: -
+// MARK: System PM Policy
 
-            if (!ignoreChangeDown)
-            {
-                userActivityAtSleep = userActivityCount;
-                hibernateAborted = false;
-                DLOG("tellChangeDown::userActivityAtSleep %d\n", userActivityAtSleep);
+//******************************************************************************
+// checkSystemCanSleep
+//
+//******************************************************************************
 
-                // Direct callout into OSKext so it can disable kext unloads
-                // during sleep/wake to prevent deadlocks.
-                OSKextSystemSleepOrWake( kIOMessageSystemWillSleep );
+bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options )
+{
+    int err = 0;
 
-                if ( (SLEEP_STATE == stateNum) && sleepSupportedPEFunction )
-                {
-                    // Reset PCI prevent sleep flag before calling platform driver.
-                    OSBitAndAtomic(~kPCICantSleep, &platformSleepSupport);
+    // Conditions that prevent idle and demand system sleep.
 
-                    // Skip PCI check for maintenance sleep.
-                    if ((runStateFlags & kRStateFlagSuppressPCICheck) == 0)
-                    {
-                        // Determine if the machine supports sleep, or must doze.
-                        getPlatform()->callPlatformFunction(
-                                        sleepSupportedPEFunction, false,
-                                        NULL, NULL, NULL, NULL);
-                    }
+    do {
+        if (userDisabledAllSleep)
+        {
+            err = 1;        // 1. user-space sleep kill switch
+            break;
+        }
 
-                    // If the machine only supports doze, the callPlatformFunction call
-                    // boils down to IOPMrootDomain::setSleepSupported(kPCICantSleep), 
-                    // otherwise nothing.
-                }
+        if (systemBooting || systemShutdown)
+        {
+            err = 2;        // 2. restart or shutdown in progress
+            break;
+        }
 
-                // Notify platform that sleep has begun
-                getPlatform()->callPlatformFunction(
-                                sleepMessagePEFunction, false,
-                                (void *)(uintptr_t) kIOMessageSystemWillSleep,
-                                NULL, NULL, NULL);
+        if (options == 0)
+            break;
 
-                // Update canSleep and kIOSleepSupportedKey property so drivers
-                // can tell if platform is going to sleep versus doze. 
+        // Conditions above pegs the system at full wake.
+        // Conditions below prevent system sleep but does not prevent
+        // dark wake, and must be called from gated context.
 
-#if CONFIG_SLEEP
-                canSleep = true;
-#else
-                canSleep = false;
+#if !CONFIG_SLEEP
+        err = 3;            // 3. config does not support sleep
+        break;
 #endif
-                if (!sleepIsSupported)
-                    canSleep = false;
-                if (platformSleepSupport & kPCICantSleep)
-                    canSleep = false;
-                setProperty(kIOSleepSupportedKey, canSleep);
-                DLOG("canSleep %d\n", canSleep);
-
-                // Publish the new sleep-wake UUID
-                publishSleepWakeUUID(true);
-
-                // Two change downs are sent by IOServicePM. Ignore the 2nd.
-                ignoreChangeDown = true;
-                
-                tracePoint( kIOPMTracePointSystemSleepAppsPhase);
-            }
 
-            DLOG("kIOMessageSystemWillSleep (%d)\n", gMessageClientType);
-            done = super::tellClientsWithResponse(
-                    kIOMessageSystemWillSleep, clientMessageFilter);
-            break;
+        if (lowBatteryCondition)
+        {
+            break;          // always sleep on low battery
+        }
 
-        default:
-            done = super::tellChangeDown(stateNum);
+        if (childPreventSystemSleep)
+        {
+            err = 4;        // 4. child prevent system sleep clamp
             break;
-    }
-    return done;
-}
-
+        }
 
-//******************************************************************************
-// askChangeDown
-//
-// We override the superclass implementation so we can send a different message
-// type to the client or application being notified.
-//
-// This must be idle sleep since we don't ask during any other power change.
-//******************************************************************************
+        if (getPMAssertionLevel( kIOPMDriverAssertionCPUBit ) ==
+            kIOPMDriverAssertionLevelOn)
+        {
+            err = 5;        // 5. CPU assertion
+            break;
+        }
 
-bool IOPMrootDomain::askChangeDown( unsigned long stateNum )
-{
-    DLOG("askChangeDown %u->%u, R-state %u\n",
-        (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex);
-    DLOG("kIOMessageCanSystemSleep (%d)\n", gMessageClientType);
+        if (pciCantSleepValid)
+        {
+            if (pciCantSleepFlag)
+                err = 6;    // 6. PCI card does not support PM (cached)
+            break;
+        }
+        else if (sleepSupportedPEFunction &&
+                 CAP_HIGHEST(kIOPMSystemCapabilityGraphics))
+        {            
+            IOReturn ret;
+            OSBitAndAtomic(~kPCICantSleep, &platformSleepSupport);
+            ret = getPlatform()->callPlatformFunction(
+                                    sleepSupportedPEFunction, false,
+                                    NULL, NULL, NULL, NULL);
+            pciCantSleepValid = true;
+            pciCantSleepFlag  = false;
+            if ((platformSleepSupport & kPCICantSleep) ||
+                ((ret != kIOReturnSuccess) && (ret != kIOReturnUnsupported)))
+            {
+                err = 6;    // 6. PCI card does not support PM
+                pciCantSleepFlag = true;
+                break;
+            }
+        }
+    }
+    while (false);
 
-    return super::tellClientsWithResponse(
-                    kIOMessageCanSystemSleep,
-                    clientMessageFilter);
+    if (err)
+    {
+        DLOG("System sleep prevented by %d\n", err);
+        return false;
+    }
+    return true;
 }
 
-
 //******************************************************************************
-// tellNoChangeDown
-//
-// Notify registered applications and kernel clients that we are not dropping
-// power.
-//
-// We override the superclass implementation so we can send a different message
-// type to the client or application being notified.
+// adjustPowerState
 //
-// This must be a vetoed idle sleep, since no other power change can be vetoed.
+// Conditions that affect our wake/sleep decision has changed.
+// If conditions dictate that the system must remain awake, clamp power
+// state to max with changePowerStateToPriv(ON). Otherwise if sleepASAP
+// is TRUE, then remove the power clamp and allow the power state to drop
+// to SLEEP_STATE.
 //******************************************************************************
 
-void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum )
+void IOPMrootDomain::adjustPowerState( bool sleepASAP )
 {
-    DLOG("tellNoChangeDown %u->%u, R-state %u\n",
-        (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex);
+    DLOG("adjustPowerState ps %u, asap %d, slider %ld\n",
+        (uint32_t) getPowerState(), sleepASAP, sleepSlider);
 
-	// Sleep canceled, clear the sleep trace point.
-    tracePoint(kIOPMTracePointSystemUp);
+    ASSERT_GATED();
 
-    if (idleSeconds && !wrangler)
+    if ((sleepSlider == 0) || !checkSystemCanSleep())
     {
-        // stay awake for at least idleSeconds
-        sleepASAP = false;
-        startIdleSleepTimer(idleSeconds);
+        changePowerStateToPriv(ON_STATE);
+    }
+    else if ( sleepASAP )
+    {
+        changePowerStateToPriv(SLEEP_STATE);
     }
-    DLOG("kIOMessageSystemWillNotSleep (%d)\n", gMessageClientType);
-    return tellClients(kIOMessageSystemWillNotSleep, clientMessageFilter);
 }
 
-
 //******************************************************************************
-// tellChangeUp
-//
-// Notify registered applications and kernel clients that we are raising power.
+// dispatchPowerEvent
 //
-// We override the superclass implementation so we can send a different message
-// type to the client or application being notified.
+// IOPMPowerStateQueue callback function. Running on PM work loop thread.
 //******************************************************************************
 
-void IOPMrootDomain::tellChangeUp( unsigned long stateNum )
+void IOPMrootDomain::dispatchPowerEvent(
+    uint32_t event, void * arg0, uint64_t arg1 )
 {
-    OSData *publishPMStats = NULL;
-
-    DLOG("tellChangeUp %u->%u, R-state %u\n",
-        (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex);
-
-    ignoreChangeDown = false;
+    DLOG("power event %u args %p 0x%llx\n", event, arg0, arg1);
+    ASSERT_GATED();
 
-    if ( stateNum == ON_STATE )
+    switch (event)
     {
-        // Direct callout into OSKext so it can disable kext unloads
-        // during sleep/wake to prevent deadlocks.
-        OSKextSystemSleepOrWake( kIOMessageSystemHasPoweredOn );
-
-        // Notify platform that sleep was cancelled or resumed.
-        getPlatform()->callPlatformFunction(
-                        sleepMessagePEFunction, false,
-                        (void *)(uintptr_t) kIOMessageSystemHasPoweredOn,
-                        NULL, NULL, NULL);
+        case kPowerEventFeatureChanged:
+            messageClients(kIOPMMessageFeatureChange, this);
+            break;
 
-        if (getPowerState() == ON_STATE)
-        {
-            // this is a quick wake from aborted sleep
-            if (idleSeconds && !wrangler)
+        case kPowerEventReceivedPowerNotification:
+            handlePowerNotification( (UInt32)(uintptr_t) arg0 );
+            break;
+        
+        case kPowerEventSystemBootCompleted:
+            if (systemBooting)
             {
-                // stay awake for at least idleSeconds
-                sleepASAP = false;
-                startIdleSleepTimer(idleSeconds);
-            }
-            DLOG("kIOMessageSystemWillPowerOn (%d)\n", gMessageClientType);
-            tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter);
-        }
-#if	HIBERNATION
-        else
-        {
-            IOHibernateSystemPostWake();
-        }
-#endif
- 
-        tracePoint(kIOPMTracePointSystemWakeAppsPhase);
-        publishPMStats = OSData::withBytes(&pmStats, sizeof(pmStats));
-        setProperty(kIOPMSleepStatisticsKey, publishPMStats);
-        publishPMStats->release();
-        bzero(&pmStats, sizeof(pmStats));
+                systemBooting = false;
 
-        if (pmStatsAppResponses) 
-        {
-            setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses);
-            pmStatsAppResponses->release();
-            pmStatsAppResponses = OSArray::withCapacity(5);
-        }
-        
-        DLOG("kIOMessageSystemHasPoweredOn (%d)\n", gMessageClientType);
-        tellClients(kIOMessageSystemHasPoweredOn, clientMessageFilter);
+                // If lid is closed, re-send lid closed notification
+                // now that booting is complete.
+                if ( clamshellClosed )
+                {
+                    handlePowerNotification(kLocalEvalClamshellCommand);
+                }
+                evaluatePolicy( kStimulusAllowSystemSleepChanged );
+            }
+            break;
 
-        tracePoint(kIOPMTracePointSystemUp);
-    }
-}
+        case kPowerEventSystemShutdown:
+            if (kOSBooleanTrue == (OSBoolean *) arg0)
+            {
+                /* We set systemShutdown = true during shutdown
+                   to prevent sleep at unexpected times while loginwindow is trying
+                   to shutdown apps and while the OS is trying to transition to
+                   complete power of.
+                   
+                   Set to true during shutdown, as soon as loginwindow shows
+                   the "shutdown countdown dialog", through individual app
+                   termination, and through black screen kernel shutdown.
+                 */
+                systemShutdown = true;
+            } else {
+                /*
+                 A shutdown was initiated, but then the shutdown
+                 was cancelled, clearing systemShutdown to false here.
+                */
+                systemShutdown = false;            
+            }
+            break;
 
+        case kPowerEventUserDisabledSleep:
+            userDisabledAllSleep = (kOSBooleanTrue == (OSBoolean *) arg0);
+            break;
 
-//******************************************************************************
-// reportUserInput
-//
-//******************************************************************************
+        case kPowerEventRegisterSystemCapabilityClient:
+            if (systemCapabilityNotifier)
+            {
+                systemCapabilityNotifier->release();
+                systemCapabilityNotifier = 0;
+            }
+            if (arg0)
+            {
+                systemCapabilityNotifier = (IONotifier *) arg0;
+                systemCapabilityNotifier->retain();
+            }
+            /* intentional fall-through */
 
-void IOPMrootDomain::reportUserInput( void )
-{
-#if !NO_KERNEL_HID
-    OSIterator * iter;
+        case kPowerEventRegisterKernelCapabilityClient:
+            if (!_joinedCapabilityClients)
+                _joinedCapabilityClients = OSSet::withCapacity(8);
+            if (arg0)
+            {
+                IONotifier * notify = (IONotifier *) arg0;
+                if (_joinedCapabilityClients)
+                {
+                    _joinedCapabilityClients->setObject(notify);
+                    synchronizePowerTree( kIOPMSyncNoChildNotify );
+                }
+                notify->release();
+            }
+            break;
 
-    if(!wrangler) 
-    {
-        iter = getMatchingServices(serviceMatching("IODisplayWrangler"));
-        if(iter) 
-        {
-            wrangler = (IOService *) iter->getNextObject();
-            iter->release();
-        }
-    }
+        case kPowerEventPolicyStimulus:
+            if (arg0)
+            {
+                int stimulus = (uintptr_t) arg0;
+                evaluatePolicy( stimulus, (uint32_t) arg1 );
+            }
+            break;
 
-    if(wrangler)
-        wrangler->activityTickle(0,0);
-#endif
-}
+        case kPowerEventAssertionCreate:
+            if (pmAssertions) {
+                pmAssertions->handleCreateAssertion((OSData *)arg0);
+            }
+            break;
 
 
-//******************************************************************************
-// setQuickSpinDownTimeout
-//
-//******************************************************************************
+        case kPowerEventAssertionRelease:
+            if (pmAssertions) {
+                pmAssertions->handleReleaseAssertion(arg1);
+            }
+            break;
 
-void IOPMrootDomain::setQuickSpinDownTimeout( void )
-{
-    ASSERT_GATED();
-    setAggressiveness(
-        kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownEnable );
+        case kPowerEventAssertionSetLevel:
+            if (pmAssertions) {
+                pmAssertions->handleSetAssertionLevel(arg1, (IOPMDriverAssertionLevel)(uintptr_t)arg0);
+            }
+            break;
+            
+        case kPowerEventQueueSleepWakeUUID:
+            handleQueueSleepWakeUUID((OSObject *)arg0);
+            break;
+        case kPowerEventPublishSleepWakeUUID:
+            handlePublishSleepWakeUUID((bool)arg0);
+            break;
+    }
 }
 
-
 //******************************************************************************
-// restoreUserSpinDownTimeout
+// systemPowerEventOccurred
+//
+// The power controller is notifying us of a hardware-related power management
+// event that we must handle. 
 //
+// systemPowerEventOccurred covers the same functionality that
+// receivePowerNotification does; it simply provides a richer API for conveying
+// more information.
 //******************************************************************************
 
-void IOPMrootDomain::restoreUserSpinDownTimeout( void )
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    uint32_t intValue)
 {
-    ASSERT_GATED();
-    setAggressiveness(
-        kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownDisable );
-}
+    IOReturn        attempt = kIOReturnSuccess;
+    OSNumber        *newNumber = NULL;
+
+    if (!event) 
+        return kIOReturnBadArgument;
+        
+    newNumber = OSNumber::withNumber(intValue, 8*sizeof(intValue));
+    if (!newNumber)
+        return kIOReturnInternalError;
 
+    attempt = systemPowerEventOccurred(event, (OSObject *)newNumber);
 
-//******************************************************************************
-// changePowerStateTo & changePowerStateToPriv
-//
-// Override of these methods for logging purposes.
-//******************************************************************************
+    newNumber->release();
 
-IOReturn IOPMrootDomain::changePowerStateTo( unsigned long ordinal )
-{
-    return kIOReturnUnsupported;    // ignored
+    return attempt;
 }
 
-IOReturn IOPMrootDomain::changePowerStateToPriv( unsigned long ordinal )
+IOReturn IOPMrootDomain::systemPowerEventOccurred(
+    const OSSymbol *event,
+    OSObject *value)
 {
-    DLOG("changePowerStateToPriv(%lu)\n", ordinal);
+    OSDictionary *thermalsDict = NULL;
+    bool shouldUpdate = true;
+    
+    if (!event || !value) 
+        return kIOReturnBadArgument;
 
-	if ( (getPowerState() == DOZE_STATE) && (ordinal != ON_STATE) )
-	{
-		return kIOReturnSuccess;
-	}
+    // LOCK
+    // We reuse featuresDict Lock because it already exists and guards
+    // the very infrequently used publish/remove feature mechanism; so there's zero rsk
+    // of stepping on that lock.
+    if (featuresDictLock) IOLockLock(featuresDictLock);
 
-    if ( (userDisabledAllSleep || systemBooting || systemShutdown) &&
-         (ordinal == SLEEP_STATE) )
-    {
-        DLOG("SLEEP rejected, forced to ON state (UD %d, SB %d, SS %d)\n",
-            userDisabledAllSleep, systemBooting, systemShutdown);
+    thermalsDict = (OSDictionary *)getProperty(kIOPMRootDomainPowerStatusKey);
+                   
+    if (thermalsDict && OSDynamicCast(OSDictionary, thermalsDict)) {
+        thermalsDict = OSDictionary::withDictionary(thermalsDict);                        
+    } else {
+        thermalsDict = OSDictionary::withCapacity(1);
+    }
 
-        super::changePowerStateToPriv(ON_STATE);
+    if (!thermalsDict) {
+        shouldUpdate = false;
+        goto exit;
     }
 
-    return super::changePowerStateToPriv(ordinal);
-}
+    thermalsDict->setObject (event, value);
 
-//******************************************************************************
-// activity detect
-//
-//******************************************************************************
+    setProperty (kIOPMRootDomainPowerStatusKey, thermalsDict);
 
-bool IOPMrootDomain::activitySinceSleep(void)
-{
-    return (userActivityCount != userActivityAtSleep);
-}
+    thermalsDict->release();
 
-bool IOPMrootDomain::abortHibernation(void)
-{
-    bool ret = activitySinceSleep();
+exit:
+    // UNLOCK
+    if (featuresDictLock) IOLockUnlock(featuresDictLock);
 
-    if (ret && !hibernateAborted)
-    {
-        DLOG("activitySinceSleep ABORT [%d, %d]\n", userActivityCount, userActivityAtSleep);
-        hibernateAborted = true;
-    }
-    return (ret);
-}
+    if (shouldUpdate)
+        messageClients (kIOPMMessageSystemPowerEventOccurred, (void *)NULL);
 
-extern "C" int
-hibernate_should_abort(void)
-{
-    if (gRootDomain)
-        return (gRootDomain->abortHibernation());
-    else
-        return (0);
+    return kIOReturnSuccess;
 }
 
 //******************************************************************************
-// updateRunState
+// receivePowerNotification
 //
+// The power controller is notifying us of a hardware-related power management
+// event that we must handle. This may be a result of an 'environment' interrupt
+// from the power mgt micro.
 //******************************************************************************
 
-void IOPMrootDomain::updateRunState( uint32_t inRunState )
+IOReturn IOPMrootDomain::receivePowerNotification( UInt32 msg )
 {
-#if ROOT_DOMAIN_RUN_STATES
-    if (inRunState < kRStateCount)
-    {
-        runStateIndex = nextRunStateIndex = inRunState;
-        runStateFlags = gRStateFlags[inRunState];
-
-        setProperty(
-            kIOPMRootDomainRunStateKey,
-            (unsigned long long) inRunState, 32);
-    }
-#endif
+    pmPowerStateQueue->submitPowerEvent(
+        kPowerEventReceivedPowerNotification, (void *) msg );
+    return kIOReturnSuccess;
 }
 
+void IOPMrootDomain::handlePowerNotification( UInt32 msg )
+{
+    bool        eval_clamshell = false;
 
-#if ROOT_DOMAIN_RUN_STATES
-//******************************************************************************
-// tagPowerPlaneService
-//
-// Running on PM work loop thread.
-//******************************************************************************
+    ASSERT_GATED();
 
-void IOPMrootDomain::tagPowerPlaneService(
-        IOService * service,
-        uint32_t *  rdFlags )
-{
-    *rdFlags = 0;
+    /*
+     * Local (IOPMrootDomain only) eval clamshell command
+     */
+    if (msg & kLocalEvalClamshellCommand)
+    {
+        eval_clamshell = true;
+    }
 
-    if (service->getProperty("IOPMStrictTreeOrder") ||
-        service->metaCast("IODisplayWrangler") ||
-        OSDynamicCast(OSNumber,
-            service->getProperty("IOPMUnattendedWakePowerState")))
+    /*
+     * Overtemp
+     */
+    if (msg & kIOPMOverTemp)
     {
-        *rdFlags |= kServiceFlagGraphics;
-        DLOG("tagged device %s %x\n", service->getName(), *rdFlags);
+        MSG("PowerManagement emergency overtemp signal. Going to sleep!");
+        privateSleepSystem (kIOPMSleepReasonThermalEmergency);
     }
 
-    // Locate the first PCI host bridge.
-    if (!pciHostBridgeDevice && service->metaCast("IOPCIBridge"))
+    /*
+     * Sleep Now!
+     */
+    if (msg & kIOPMSleepNow) 
     {
-        IOService * provider = service->getProvider();
-        if (OSDynamicCast(IOPlatformDevice, provider) &&
-            provider->inPlane(gIODTPlane))
-        {
-            pciHostBridgeDevice = provider;
-            DLOG("PMTrace found PCI host bridge %s->%s\n",
-                provider->getName(), service->getName());
-        }
+        privateSleepSystem (kIOPMSleepReasonSoftware);
+    }
+    
+    /*
+     * Power Emergency
+     */
+    if (msg & kIOPMPowerEmergency) 
+    {
+        lowBatteryCondition = true;
+        privateSleepSystem (kIOPMSleepReasonLowPower);
     }
 
-    // Tag top-level PCI devices. The order of PMinit() call does not
-	// change across boots and is used as the PCI bit number.
-    if (pciHostBridgeDevice && service->metaCast("IOPCIDevice"))
+    /*
+     * Clamshell OPEN
+     */
+    if (msg & kIOPMClamshellOpened) 
     {
-        // Would prefer to check built-in property, but tagPowerPlaneService()
-        // is called before pciDevice->registerService().
-        IORegistryEntry * parent = service->getParentEntry(gIODTPlane);
-        if ((parent == pciHostBridgeDevice) && service->getProperty("acpi-device"))
+        // Received clamshel open message from clamshell controlling driver
+        // Update our internal state and tell general interest clients
+        clamshellClosed = false;
+        clamshellExists = true;
+
+        if (msg & kIOPMSetValue)
         {
-            int bit = pmTracer->recordTopLevelPCIDevice( service );
-            if (bit >= 0)
-            {
-				// Save the assigned bit for fast lookup.
-                bit &= 0xff;
-                *rdFlags |= (kServiceFlagTopLevelPCI | (bit << 8));
-            }
-        }
-    }
-}
+            reportUserInput();
+        }
 
+        // Tell PMCPU
+        informCPUStateChange(kInformLid, 0);
 
-//******************************************************************************
-// handleActivityTickleForService
-//
-// Called by IOService::activityTickle() for a tickle that is requesting the
-// service to raise power state. Called from driver thread.
-//******************************************************************************
+        // Tell general interest clients        
+        sendClientClamshellNotification();
 
-void IOPMrootDomain::handleActivityTickleForService( IOService * service, 
-                                                     unsigned long type,
-                                                     unsigned long currentPowerState,
-                                                     uint32_t activityTickleCount )
-{
-    if ((service == wrangler) 
-)
-    {
-        bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) 
+        bool aborting =  ((lastSleepReason == kIOPMSleepReasonClamshell)
+                       || (lastSleepReason == kIOPMSleepReasonIdle) 
                        || (lastSleepReason == kIOPMSleepReasonMaintenance));
         if (aborting) userActivityCount++;
-        DLOG("display wrangler tickled1 %d lastSleepReason %d\n", userActivityCount, lastSleepReason);
-    }
-
-    // Tickle directed to IODisplayWrangler while graphics is disabled.
-    // Bring graphics online.
+        DLOG("clamshell tickled %d lastSleepReason %d\n", userActivityCount, lastSleepReason);
+    } 
 
-    if ((!currentPowerState) &&
-        (service == wrangler) &&
-        (runStateIndex > kRStateNormal) &&
-        (false == wranglerTickled) &&
-        (false == lowBatteryCondition))
+    /* 
+     * Clamshell CLOSED
+     * Send the clamshell interest notification since the lid is closing. 
+     */
+    if (msg & kIOPMClamshellClosed)
     {
-        DLOG("display wrangler tickled\n");
-        if (kIOLogPMRootDomain & gIOKitDebug)
-            OSReportWithBacktrace("Display Tickle");
-        wranglerTickled = true;
-        synchronizePowerTree();
-    }
-}
-
-//******************************************************************************
-// handlePowerChangeStartForService
-//
-// Running on PM work loop thread.
-//******************************************************************************
+        // Received clamshel open message from clamshell controlling driver
+        // Update our internal state and tell general interest clients
+        clamshellClosed = true;
+        clamshellExists = true;
 
-void IOPMrootDomain::handlePowerChangeStartForService(
-        IOService *     service,
-        uint32_t *      rdFlags,
-        uint32_t        newPowerState,
-        uint32_t        changeFlags )
-{
-    if (service == this)
-    {
-        uint32_t currentPowerState = (uint32_t) getPowerState();
-        uint32_t nextRunStateFlags;
+        // Tell PMCPU
+        informCPUStateChange(kInformLid, 1);
 
-        assert(nextRunStateIndex < kRStateCount);
-        nextRunStateFlags = gRStateFlags[nextRunStateIndex];
+        // Tell general interest clients
+        sendClientClamshellNotification();
+        
+        // And set eval_clamshell = so we can attempt 
+        eval_clamshell = true;
+    }
 
-        gMessageClientType = kMessageClientNone;
+    /*
+     * Set Desktop mode (sent from graphics)
+     *
+     *  -> reevaluate lid state
+     */
+    if (msg & kIOPMSetDesktopMode) 
+    {
+        desktopMode = (0 != (msg & kIOPMSetValue));
+        msg &= ~(kIOPMSetDesktopMode | kIOPMSetValue);
 
-        // Transition towards or away from ON power state.
+        sendClientClamshellNotification();
 
-        if ((currentPowerState != newPowerState) &&
-            ((ON_STATE == newPowerState) || (ON_STATE == currentPowerState)))
+        // Re-evaluate the lid state
+        if( clamshellClosed )
         {
-            if ((runStateFlags & kRStateFlagSuppressMessages) == 0)
-                gMessageClientType = kMessageClientAll;
-            else
-                gMessageClientType = kMessageClientConfigd;
+            eval_clamshell = true;
         }
+    }
+    
+    /*
+     * AC Adaptor connected
+     *
+     *  -> reevaluate lid state
+     */
+    if (msg & kIOPMSetACAdaptorConnected) 
+    {
+        acAdaptorConnected = (0 != (msg & kIOPMSetValue));
+        msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue);
 
-        // Transition caused by deassertion of system notification suppression.
+        // Tell CPU PM
+        informCPUStateChange(kInformAC, !acAdaptorConnected);
 
-        if ((ON_STATE == newPowerState) &&
-            (ON_STATE == currentPowerState) &&
-            ((runStateFlags ^ nextRunStateFlags) & kRStateFlagSuppressMessages))
-        {
-            gMessageClientType = kMessageClientAll;
-        }
+        // Tell BSD if AC is connected
+        //      0 == external power source; 1 == on battery
+        post_sys_powersource(acAdaptorConnected ? 0:1);
 
-        if (ON_STATE == newPowerState)
-        {
-            DLOG("kIOMessageSystemWillPowerOn (%d)\n",
-                gMessageClientType);
-            tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter);
-        }
-        
-        if (SLEEP_STATE == newPowerState)
+        sendClientClamshellNotification();
+
+        // Re-evaluate the lid state
+        if( clamshellClosed )
         {
-            tracePoint(kIOPMTracePointSleepStarted);
+            eval_clamshell = true;
         }
     }
     
-    if (*rdFlags & kServiceFlagTopLevelPCI)
+    /*
+     * Enable Clamshell (external display disappear)
+     *
+     *  -> reevaluate lid state
+     */
+    if (msg & kIOPMEnableClamshell) 
     {
-        pmTracer->tracePCIPowerChange(
-			PMTraceWorker::kPowerChangeStart,
-			service, changeFlags,
-            (*rdFlags >> 8) & 0xff);
-    }
-}
-
+        // Re-evaluate the lid state
+        // System should sleep on external display disappearance
+        // in lid closed operation.
+        if( clamshellClosed && (true == clamshellDisabled) )        
+        {
+            eval_clamshell = true;
+        }
 
-//******************************************************************************
-// handlePowerChangeDoneForService
-//
-// Running on PM work loop thread.
-//******************************************************************************
+        clamshellDisabled = false;
 
-void IOPMrootDomain::handlePowerChangeDoneForService(
-        IOService *     service,
-        uint32_t *      rdFlags,
-        uint32_t        newPowerState,
-        uint32_t        changeFlags )
-{
-    if (*rdFlags & kServiceFlagTopLevelPCI)
-    {
-        pmTracer->tracePCIPowerChange(
-			PMTraceWorker::kPowerChangeCompleted,
-            service, changeFlags,
-            (*rdFlags >> 8) & 0xff);
+        sendClientClamshellNotification();
     }
-}
-
-
-//******************************************************************************
-// overridePowerStateForService
-//
-// Runs on PM work loop thread.
-//******************************************************************************
+    
+    /*
+     * Disable Clamshell (external display appeared)
+     * We don't bother re-evaluating clamshell state. If the system is awake,
+     * the lid is probably open. 
+     */
+    if (msg & kIOPMDisableClamshell) 
+    {
+        clamshellDisabled = true;
 
-void IOPMrootDomain::overridePowerStateForService(
-        IOService *     service,
-        uint32_t *      rdFlags,
-        unsigned long * powerState,
-        uint32_t        changeFlags )
-{
-    uint32_t inPowerState = (uint32_t) *powerState;
+        sendClientClamshellNotification();
+    }
 
-    if ((service == this) && (inPowerState == ON_STATE) &&
-        (changeFlags & kIOPMSynchronize))
+    /*
+     * Evaluate clamshell and SLEEP if appropiate
+     */
+    if ( eval_clamshell && shouldSleepOnClamshellClosed() ) 
     {
-        DLOG("sync root domain %u->%u\n",
-            (uint32_t) getPowerState(), inPowerState);
 
-        // Root Domain is in a reduced R-state, and a HID tickle has
-        // requested a PM tree sync. Begin R-state transition.
 
-        if (runStateIndex != kRStateNormal)
-        {
-            sleepTimerMaintenance = false;
-            hibernateNoDefeat = false;
-            nextRunStateIndex = kRStateNormal;
-            setProperty(
-                kIOPMRootDomainRunStateKey,
-                (unsigned long long) kRStateNormal, 32);            
-        }
+        // SLEEP!
+        privateSleepSystem (kIOPMSleepReasonClamshell);
     }
-
-    if (*rdFlags & kServiceFlagGraphics)
+    else if ( eval_clamshell )
     {
-        DLOG("graphics device %s %u->%u (flags 0x%x)\n",
-            service->getName(), (uint32_t) service->getPowerState(),
-            inPowerState, changeFlags);
+        evaluatePolicy(kStimulusDarkWakeEvaluate);
+    }
 
-        if (inPowerState == 0)
+    /*
+     * Power Button
+     */
+    if (msg & kIOPMPowerButton) 
+    {
+        if (!wranglerAsleep)
         {
-            // Graphics device is powering down, apply limit preventing
-            // device from powering back up later unless we consent.
-
-            if ((*rdFlags & kServiceFlagNoPowerUp) == 0)
-            {
-                *rdFlags |= kServiceFlagNoPowerUp;
-                DLOG("asserted power limit for %s\n",
-                    service->getName());
+            OSString *pbs = OSString::withCString("DisablePowerButtonSleep");
+            // Check that power button sleep is enabled
+            if( pbs ) {
+                if( kOSBooleanTrue != getProperty(pbs))
+                    privateSleepSystem (kIOPMSleepReasonPowerButton);
             }
         }
         else
-        {
-            uint32_t nextRunStateFlags;
-
-            assert(nextRunStateIndex < kRStateCount);
-            nextRunStateFlags = gRStateFlags[nextRunStateIndex];
-        
-            // Graphics device is powering up. Release power limit at the
-            // did-change machine state.
-
-            if (changeFlags & kIOPMSynchronize)
-            {
-                if ((runStateFlags & kRStateFlagSuppressGraphics) &&
-                    ((nextRunStateFlags & kRStateFlagSuppressGraphics) == 0) &&
-                    (changeFlags & kIOPMDomainDidChange))
-                {
-                    // Woke up without graphics power, but
-                    // HID event has tickled display wrangler.
-                    *rdFlags &= ~kServiceFlagNoPowerUp;
-                    DLOG("removed power limit for %s\n",
-                        service->getName());
-                }
-            }
-            else if ((runStateFlags & kRStateFlagSuppressGraphics) == 0)
-            {
-                *rdFlags &= ~kServiceFlagNoPowerUp;
-            }
-
-            if (*rdFlags & kServiceFlagNoPowerUp)
-            {
-                DLOG("limited %s to power state 0\n",
-                    service->getName());
-                *powerState = 0;
-            }
-        }
+            reportUserInput();
     }
 }
 
-
-//******************************************************************************
-// setMaintenanceWakeCalendar
-//
-//******************************************************************************
-
-IOReturn IOPMrootDomain::setMaintenanceWakeCalendar(
-    const IOPMCalendarStruct * calendar )
-{
-    OSData * data;
-    IOReturn ret;
-
-    if (!calendar)
-        return kIOReturnBadArgument;
-    
-    data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar));
-    if (!data)
-        return kIOReturnNoMemory;
-    
-    ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data);
-
-    data->release();
-    return ret;
-}
-#endif /* ROOT_DOMAIN_RUN_STATES */
-
-
 //******************************************************************************
-// sysPowerDownHandler
-//
-// Receives a notification when the RootDomain changes state. 
+// evaluatePolicy
 //
-// Allows us to take action on system sleep, power down, and restart after
-// applications have received their power change notifications and replied,
-// but before drivers have powered down. We perform a vfs sync on power down.
+// Evaluate root-domain policy in response to external changes.
 //******************************************************************************
 
-IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon,
-                                    UInt32 messageType, IOService * service,
-                                    void * messageArgument, vm_size_t argSize )
+void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg )
 {
-    IOReturn                             ret;
-    IOPowerStateChangeNotification      *params = (IOPowerStateChangeNotification *) messageArgument;
-    IOPMrootDomain                      *rootDomain = OSDynamicCast(IOPMrootDomain, service);
-
-    DLOG("sysPowerDownHandler message %x\n", (uint32_t) messageType);
-
-    if(!rootDomain)
-        return kIOReturnUnsupported;
-
-    switch (messageType) {
-        case kIOMessageSystemWillSleep:
-            // Interested applications have been notified of an impending power
-            // change and have acked (when applicable).
-            // This is our chance to save whatever state we can before powering
-            // down.
-            // We call sync_internal defined in xnu/bsd/vfs/vfs_syscalls.c,
-            // via callout
-#if	HIBERNATION
-            rootDomain->evaluateSystemSleepPolicyEarly();
-            if (rootDomain->hibernateMode && !rootDomain->hibernateDisabled)
-            {
-                // We will ack within 240 seconds
-                params->returnValue = 240 * 1000 * 1000;
-            }
-            else
-#endif
-            // We will ack within 20 seconds
-            params->returnValue = 20 * 1000 * 1000;
-            DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->returnValue / 1000 / 1000));
-            if ( ! OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) )
-            {
-                // Purposely delay the ack and hope that shutdown occurs quickly.
-                // Another option is not to schedule the thread and wait for
-                // ack timeout...
-                AbsoluteTime deadline;
-                clock_interval_to_deadline( 30, kSecondScale, &deadline );
-                thread_call_enter1_delayed( rootDomain->diskSyncCalloutEntry, 
-                                            (thread_call_param_t)params->powerRef,
-                                            deadline );
-            }
-            else
-                thread_call_enter1(rootDomain->diskSyncCalloutEntry, (thread_call_param_t)params->powerRef);
-            ret = kIOReturnSuccess;
-            break;
-
-        case kIOMessageSystemWillPowerOff:
-        case kIOMessageSystemWillRestart:
-            ret = kIOReturnUnsupported;
-            break;
+    union {
+        struct {
+            int idleSleepEnabled    : 1;
+            int idleSleepDisabled   : 1;
+            int displaySleep        : 1;
+            int sleepDelayChanged   : 1;
+            int evaluateDarkWake    : 1;
+        } bit;
+        uint32_t u32;
+    } flags;
+
+    DLOG("evaluatePolicy( %d, 0x%x )\n", stimulus, arg);
 
-        default:
-            ret = kIOReturnUnsupported;
-            break;
-    }
-    return ret;
-}
+    ASSERT_GATED();
+    flags.u32 = 0;
 
-//******************************************************************************
-// publishSleepWakeUUID
-//
-// 
-//******************************************************************************
-void IOPMrootDomain::publishSleepWakeUUID( bool shouldPublish )
-{
-    if (shouldPublish) 
+    switch (stimulus)
     {
-        if (queuedSleepWakeUUIDString) 
-        {
-            if (OSCompareAndSwap(/*old*/ true, /*new*/ false, &gSleepWakeUUIDIsSet))
+        case kStimulusDisplayWranglerSleep:
+            if (!wranglerAsleep)
             {
-                // Upon wake, it takes some time for userland to invalidate the
-                // UUID. If another sleep is initiated during that period, force
-                // a CLEAR message to balance the upcoming SET message.
-
-                messageClients( kIOPMMessageSleepWakeUUIDChange,
-                                kIOPMMessageSleepWakeUUIDCleared );
-
-                DLOG("SleepWake UUID forced clear\n");
+                wranglerAsleep = true;
+                clock_get_uptime(&wranglerSleepTime);
+                flags.bit.displaySleep = true;
             }
+            break;
 
-            setProperty(kIOPMSleepWakeUUIDKey, queuedSleepWakeUUIDString);
-            DLOG("SleepWake UUID published: %s\n", queuedSleepWakeUUIDString->getCStringNoCopy());
-            queuedSleepWakeUUIDString->release();
-            queuedSleepWakeUUIDString = NULL;
-            messageClients(kIOPMMessageSleepWakeUUIDChange, 
-                            kIOPMMessageSleepWakeUUIDSet);
-        }
-    } else {
-        if (OSCompareAndSwap(/*old*/ true, /*new*/ false, &gSleepWakeUUIDIsSet))
-        {
-            DLOG("SleepWake UUID cleared\n");
-            removeProperty(kIOPMSleepWakeUUIDKey);
-            messageClients(kIOPMMessageSleepWakeUUIDChange, 
-                            kIOPMMessageSleepWakeUUIDCleared);        
-        }
-    }
-}
-
-
-//******************************************************************************
-// displayWranglerNotification
-//
-// Receives a notification when the IODisplayWrangler changes state.
-//
-// Allows us to take action on display dim/undim.
-//
-// When the display sleeps we:
-// - Start the idle sleep timer
-// - set the quick spin down timeout
-//
-// On wake from display sleep:
-// - Cancel the idle sleep timer
-// - restore the user's chosen spindown timer from the "quick" spin down value
-//******************************************************************************
-
-IOReturn IOPMrootDomain::displayWranglerNotification(
-    void * target, void * refCon,
-    UInt32 messageType, IOService * service,
-    void * messageArgument, vm_size_t argSize )
-{
-#if !NO_KERNEL_HID
-    int                                 displayPowerState;
-    IOPowerStateChangeNotification *    params =
-            (IOPowerStateChangeNotification *) messageArgument;
-
-    if ((messageType != kIOMessageDeviceWillPowerOff) &&
-        (messageType != kIOMessageDeviceHasPoweredOn))
-        return kIOReturnUnsupported;
+        case kStimulusDisplayWranglerWake:
+            wranglerAsleep = false;
+            flags.bit.idleSleepDisabled = true;
+            break;
 
-    ASSERT_GATED();
-    if (!gRootDomain)
-        return kIOReturnUnsupported;
+        case kStimulusAggressivenessChanged:
+        {
+            unsigned long   minutesToIdleSleep  = 0;
+            unsigned long   minutesToDisplayDim = 0;
+            unsigned long   minutesDelta        = 0;
 
-    displayPowerState = params->stateNumber;
-    DLOG("DisplayWrangler message 0x%x, new power state %d\n",
-              (uint32_t) messageType, displayPowerState);
+            // Fetch latest display and system sleep slider values.
+            getAggressiveness(kPMMinutesToSleep, &minutesToIdleSleep);
+            getAggressiveness(kPMMinutesToDim,   &minutesToDisplayDim);
+            DLOG("aggressiveness changed: system %u->%u, display %u\n",
+                (uint32_t) sleepSlider,
+                (uint32_t) minutesToIdleSleep,
+                (uint32_t) minutesToDisplayDim);
 
-    switch (messageType) {
-       case kIOMessageDeviceWillPowerOff:
+            DLOG("idle time -> %ld secs (ena %d)\n",
+                idleSeconds, (minutesToIdleSleep != 0));
 
-            // The display wrangler has dropped power because of idle display sleep
-            // or force system sleep.
-            //
-            // 4 Display ON
-            // 3 Display Dim
-            // 2 Display Sleep
-            // 1 Not visible to user
-            // 0 Not visible to user
+            if (0x7fffffff == minutesToIdleSleep)
+                minutesToIdleSleep = idleSeconds;
 
-            if (gRootDomain->wranglerAsleep || (displayPowerState > 2))
-                break;
+            // How long to wait before sleeping the system once
+            // the displays turns off is indicated by 'extraSleepDelay'.
 
-            // Record the time the display wrangler went to sleep.
+            if ( minutesToIdleSleep > minutesToDisplayDim )
+                minutesDelta = minutesToIdleSleep - minutesToDisplayDim;
 
-            gRootDomain->wranglerAsleep = true;
-            clock_get_uptime(&gRootDomain->wranglerSleepTime);
+            if ((sleepSlider == 0) && (minutesToIdleSleep != 0))
+                flags.bit.idleSleepEnabled = true;
 
-            // We start a timer here if the System Sleep timer is greater than the
-            // Display Sleep timer. We kick off this timer when the display sleeps.
-            //
-            // Note that, although Display Dim timings may change adaptively accordingly
-            // to the user's activity patterns, Display Sleep _always_ occurs at the
-            // specified interval since last user activity.
+            if ((sleepSlider != 0) && (minutesToIdleSleep == 0))
+                flags.bit.idleSleepDisabled = true;
+
+            if ((minutesDelta != extraSleepDelay) &&
+                !flags.bit.idleSleepEnabled && !flags.bit.idleSleepDisabled)
+                flags.bit.sleepDelayChanged = true;
 
-            if ( gRootDomain->extraSleepDelay )
+            if (systemDarkWake && !darkWakeToSleepASAP &&
+                (flags.bit.idleSleepEnabled || flags.bit.idleSleepDisabled))
             {
-                gRootDomain->startIdleSleepTimer(gRootDomain->extraSleepDelay * 60);            
+                // Reconsider decision to remain in dark wake
+                flags.bit.evaluateDarkWake = true;
             }
-            else if ( gRootDomain->sleepSlider )
-            {
-                // Accelerate disk spindown if system sleep and display sleep
-                // sliders are set to the same value (e.g. both set to 5 min),
-                // and display is about to go dark. Check that spin down timer
-                // is non-zero (zero = never spin down) and system sleep is
-                // not set to never sleep.
 
-                gRootDomain->setQuickSpinDownTimeout();
-            }
+            sleepSlider = minutesToIdleSleep;
+            extraSleepDelay = minutesDelta;
+        }   break;
 
+        case kStimulusDemandSystemSleep:
+            changePowerStateWithOverrideTo( SLEEP_STATE );
             break;
 
-        case kIOMessageDeviceHasPoweredOn:
+        case kStimulusAllowSystemSleepChanged:
+            // FIXME: de-compose to change flags.
+            adjustPowerState();
+            break;
 
-            // The display wrangler has powered on either because of user activity 
-            // or wake from sleep/doze.
+        case kStimulusDarkWakeActivityTickle:
+            if (false == wranglerTickled)
+            {
+                uint32_t    options = 0;
+                IOService * pciRoot = 0;
 
-            if ( 4 != displayPowerState )
-                break;
+                if (rejectWranglerTickle)
+                {
+                    DLOG("rejected tickle, type %u capability %x:%x\n",
+                        _systemTransitionType,
+                        _currentCapability, _pendingCapability);
+                    break;
+                }
+
+                _desiredCapability |=
+                    (kIOPMSystemCapabilityGraphics |
+                     kIOPMSystemCapabilityAudio);
+                
+                if ((kSystemTransitionWake == _systemTransitionType) &&
+                    !(_pendingCapability & kIOPMSystemCapabilityGraphics) &&
+                    !graphicsSuppressed)
+                {
+                    DLOG("Promoting to full wake\n");
+
+                    // Elevate to full wake while waking up to dark wake.
+                    // PM will hold off notifying the graphics subsystem about
+                    // system wake as late as possible, so if a HID event does
+                    // arrive, we can turn on graphics on this wake cycle, and
+                    // not have to wait till the following cycle. That latency
+                    // can be huge on some systems. However, once any graphics
+                    // suppression has taken effect, it is too late. All other
+                    // graphics devices must be similarly suppressed. But the
+                    // delay till the following cycle should be very short.
+
+                    _pendingCapability |=
+                        (kIOPMSystemCapabilityGraphics |
+                         kIOPMSystemCapabilityAudio);
+
+                    // Immediately bring up audio and graphics.
+                    pciRoot = pciHostBridgeDriver;
+
+                    // Notify clients about full wake.
+                    _systemMessageClientMask = kSystemMessageClientAll;
+                    tellClients(kIOMessageSystemWillPowerOn);
+                }
 
-            gRootDomain->wranglerAsleep = false;
-            gRootDomain->adjustPowerState();
-            gRootDomain->cancelIdleSleepTimer();
+                // Unsafe to cancel once graphics was powered.
+                // If system woke from dark wake, the return to sleep can
+                // be cancelled. But "awake -> dark -> sleep" transition
+                // cannot be cancelled.
+                
+                if (!CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) {
+                    options |= kIOPMSyncCancelPowerDown;                    
+                }
 
-            // Change the spindown value back to the user's selection from our
-            // accelerated setting.
-            gRootDomain->restoreUserSpinDownTimeout();
+                synchronizePowerTree( options, pciRoot );
+                wranglerTickled = true;
+                // IOGraphics doesn't lit the display even though graphics
+                // is enanbled in kIOMessageSystemCapabilityChange message(radar 9502104)
+                // So, do an explicit activity tickle
+                if(wrangler)
+                    wrangler->activityTickle(0,0);
 
+                if (logWranglerTickle)
+                {
+                    AbsoluteTime    now;
+                    uint64_t        nsec;
+
+                    clock_get_uptime(&now);
+                    SUB_ABSOLUTETIME(&now, &systemWakeTime);
+                    absolutetime_to_nanoseconds(now, &nsec);
+                    MSG("HID tickle %u ms\n",
+                        ((int)((nsec) / 1000000ULL)));
+                    logWranglerTickle = false;
+                }
+            }
             break;
 
-         default:
-             break;
-     }
-#endif
-     return kIOReturnUnsupported;
-}
-
+        case kStimulusDarkWakeEntry:
+        case kStimulusDarkWakeReentry:
+            // Any system transitions since the last dark wake transition
+            // will invalid the stimulus.
 
-//******************************************************************************
-// displayWranglerPublished
-//
-// Receives a notification when the IODisplayWrangler is published.
-// When it's published we install a power state change handler.
-//******************************************************************************
+            if (arg == _systemStateGeneration)
+            {
+                DLOG("dark wake entry\n");
+                systemDarkWake = true;
+                wranglerAsleep = true;
+                clock_get_uptime(&wranglerSleepTime);
 
-bool IOPMrootDomain::displayWranglerPublished( 
-    void * target, 
-    void * refCon,
-    IOService * newService)
-{
-#if !NO_KERNEL_HID
-    if(!gRootDomain)
-        return false;
+                // Always accelerate disk spindown while in dark wake,
+                // even if system does not support/allow sleep.
 
-    gRootDomain->wrangler = newService;
+                cancelIdleSleepTimer();
+                setQuickSpinDownTimeout();
+                flags.bit.evaluateDarkWake = true;
+            }
+            break;
 
-    // we found the display wrangler, now install a handler
-    if( !gRootDomain->wrangler->registerInterest( gIOGeneralInterest, 
-                            &displayWranglerNotification, target, 0) ) 
-    {
-        return false;
-    }
+        case kStimulusDarkWakeEvaluate:
+            if (systemDarkWake)
+            {
+                flags.bit.evaluateDarkWake = true;
+            }
+#if !DARK_TO_FULL_EVALUATE_CLAMSHELL
+            else
+            {
+                // Not through kLocalEvalClamshellCommand to avoid loop.
+                if (clamshellClosed && shouldSleepOnClamshellClosed() &&
+                    checkSystemCanSleep(true))
+                {
+                    privateSleepSystem( kIOPMSleepReasonClamshell );
+                }
+            }
 #endif
-    return true;
-}
+            break;
 
+    } /* switch(stimulus) */
 
-//******************************************************************************
-// batteryPublished
-//
-// Notification on battery class IOPowerSource appearance
-//******************************************************************************
+    if (flags.bit.evaluateDarkWake && !wranglerTickled)
+    {
+        if (darkWakeToSleepASAP ||
+            (clamshellClosed && !(desktopMode && acAdaptorConnected)))
+        {
+            // System currently in dark wake, and no children and
+            // assertion prevent system sleep.
 
-bool IOPMrootDomain::batteryPublished( 
-    void * target, 
-    void * root_domain,
-    IOService * resourceService )
-{    
-    // rdar://2936060&4435589    
-    // All laptops have dimmable LCD displays
-    // All laptops have batteries
-    // So if this machine has a battery, publish the fact that the backlight
-    // supports dimming.
-    ((IOPMrootDomain *)root_domain)->publishFeature("DisplayDims");
+            if (checkSystemCanSleep(true))
+            {
+                if (lowBatteryCondition)
+                {
+                    lastSleepReason = kIOPMSleepReasonLowPower;
+                    setProperty(kRootDomainSleepReasonKey, kIOPMLowPowerSleepKey);
+                }
+                else  if (darkWakeMaintenance)
+                {
+                    lastSleepReason = kIOPMSleepReasonMaintenance;
+                    setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey);
+                }
+                changePowerStateWithOverrideTo( SLEEP_STATE );
+            }
+            else
+            {
+                // Parked in dark wake, a tickle will return to full wake
+                rejectWranglerTickle = false;
+            }
+        } else // non-maintenance (network) dark wake
+        {
+            if (checkSystemCanSleep(true))
+            {
+                // Release power clamp, and wait for children idle.
+                adjustPowerState(true);
+            }
+            else
+            {
+                changePowerStateToPriv(ON_STATE);
+            }
+            rejectWranglerTickle = false;
+        }
+    }
 
-    return (true);
-}
+    if (systemDarkWake)
+    {
+        // The rest are irrelevant while system is in dark wake.
+        flags.u32 = 0;
+    }
 
+    if (flags.bit.displaySleep || flags.bit.sleepDelayChanged)
+    {
+        bool cancelQuickSpindown = false;
 
-//******************************************************************************
-// adjustPowerState
-//
-// Some condition that affects our wake/sleep/doze decision has changed.
-//
-// If the sleep slider is in the off position, we cannot sleep or doze.
-// If the enclosure is open, we cannot sleep or doze.
-// If the system is still booting, we cannot sleep or doze.
-//
-// In those circumstances, we prevent sleep and doze by holding power on with
-// changePowerStateToPriv(ON).
-//
-// If the above conditions do not exist, and also the sleep timer has expired,
-// we allow sleep or doze to occur with either changePowerStateToPriv(SLEEP) or
-// changePowerStateToPriv(DOZE) depending on whether or not we already know the
-// platform cannot sleep.
-//
-// In this case, sleep or doze will either occur immediately or at the next time
-// that no children are holding the system out of idle sleep via the 
-// kIOPMPreventIdleSleep flag in their power state arrays.
-//******************************************************************************
+        if (flags.bit.sleepDelayChanged)
+        {
+            DLOG("extra sleep timer changed\n");
+            cancelIdleSleepTimer();
+            cancelQuickSpindown = true;
+        }
+        else
+        {
+            DLOG("display sleep\n");        
+        }
 
-void IOPMrootDomain::adjustPowerState( void )
-{
-    DLOG("adjustPowerState "
-        "PS %u, ASAP %d, SL %ld, AS %d, SB %d, SS %d, UD %d\n",
-        (uint32_t) getPowerState(), sleepASAP, sleepSlider,
-        allowSleep, systemBooting, systemShutdown, userDisabledAllSleep);
+        if (wranglerAsleep && !wranglerSleepIgnored)
+        {
+            if ( extraSleepDelay )
+            {
+                // Start a timer here if the System Sleep timer is greater
+                // than the Display Sleep timer.
 
-    ASSERT_GATED();
+                startIdleSleepTimer(gRootDomain->extraSleepDelay * 60);            
+            }
+            else if ( sleepSlider )
+            {
+                // Accelerate disk spindown if system sleep and display sleep
+                // sliders are set to the same value (e.g. both set to 5 min),
+                // and display is about to go dark. Check the system sleep is
+                // not set to never sleep. Disk sleep setting is ignored.
+
+                setQuickSpinDownTimeout();
+                cancelQuickSpindown = false;
+            }
+        }
+        
+        if (cancelQuickSpindown)
+            restoreUserSpinDownTimeout();
+    }
 
-    if ( (sleepSlider == 0) 
-        || !allowSleep 
-        || systemBooting 
-        || systemShutdown
-        || userDisabledAllSleep
-        || (runStateFlags & kRStateFlagDisableIdleSleep) )
+    if (flags.bit.idleSleepEnabled)
     {
-        changePowerStateToPriv(ON_STATE);
-    } else {
-        if ( sleepASAP ) 
+        DLOG("idle sleep timer enabled\n");
+        if (!wrangler)
         {
-            /* Convenient place to run any code at idle sleep time
-             * IOPMrootDomain initiates an idle sleep here
-             *
-             * Set last sleep cause accordingly.
-             */
-            lastSleepReason = kIOPMSleepReasonIdle;
-            setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey);
+            changePowerStateToPriv(ON_STATE);
+            if (idleSeconds)
+            {
+                startIdleSleepTimer( idleSeconds );
+            }
+        }
+        else
+        {
+            // Start idle sleep timer if wrangler went to sleep
+            // while system sleep was disabled. Disk spindown is
+            // accelerated upon timer expiration.
+
+            if (wranglerAsleep)
+            {
+                AbsoluteTime    now;
+                uint64_t        nanos;
+                uint32_t        minutesSinceDisplaySleep = 0;
+                uint32_t        sleepDelay;
+
+                clock_get_uptime(&now);
+                if (CMP_ABSOLUTETIME(&now, &wranglerSleepTime) > 0)
+                {
+                    SUB_ABSOLUTETIME(&now, &wranglerSleepTime);
+                    absolutetime_to_nanoseconds(now, &nanos);
+                    minutesSinceDisplaySleep = nanos / (60000000000ULL);
+                }
+
+                if (extraSleepDelay > minutesSinceDisplaySleep)
+                {
+                    sleepDelay = extraSleepDelay - minutesSinceDisplaySleep;
+                }
+                else
+                {
+                    sleepDelay = 1; // 1 min
+                }
 
-            sleepASAP = false;
-            changePowerStateToPriv(SLEEP_STATE);
+                startIdleSleepTimer(sleepDelay * 60);
+                DLOG("display slept %u min, set idle timer to %u min\n",
+                    minutesSinceDisplaySleep, sleepDelay);
+            }
         }
     }
+
+    if (flags.bit.idleSleepDisabled)
+    {
+        DLOG("idle sleep timer disabled\n");
+        cancelIdleSleepTimer();
+        restoreUserSpinDownTimeout();
+        adjustPowerState();
+    }
 }
 
+// MARK: -
+// MARK: Statistics
+
+//******************************************************************************
+// pmStats
+//
+//******************************************************************************
+
 void IOPMrootDomain::pmStatsRecordEvent(
     int                 eventIndex,
     AbsoluteTime        timestamp)
@@ -4783,6 +5870,8 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse(
     return;
 }
 
+// MARK: -
+// MARK: PMTraceWorker
 
 //******************************************************************************
 // TracePoint support
@@ -4812,7 +5901,7 @@ IOReturn IOPMrootDomain::callPlatformFunction(
         statusCode = (((uint64_t)tracePointPCI) << 32) | tracePointPhases;
 		if ((tracePointPhases >> 24) != kIOPMTracePointSystemUp)
         {
-            LOG("Sleep failure code 0x%08x 0x%08x\n",
+            MSG("Sleep failure code 0x%08x 0x%08x\n",
                 tracePointPCI, tracePointPhases);
         }
 		setProperty(kIOPMSleepWakeFailureCodeKey, statusCode, 64);
@@ -4827,7 +5916,20 @@ IOReturn IOPMrootDomain::callPlatformFunction(
 
 void IOPMrootDomain::tracePoint( uint8_t point )
 {
-    pmTracer->tracePoint(point);
+    if (!systemBooting)
+        pmTracer->tracePoint(point);
+}
+
+void IOPMrootDomain::tracePoint( uint8_t point, uint8_t data )
+{
+    if (!systemBooting)
+        pmTracer->tracePoint(point, data);
+}
+
+void IOPMrootDomain::traceDetail( uint32_t detail )
+{
+    if (!systemBooting)
+        pmTracer->traceDetail( detail );
 }
 
 //******************************************************************************
@@ -4862,7 +5964,7 @@ PMTraceWorker *PMTraceWorker::tracer(IOPMrootDomain *owner)
     me->pciMappingLock = IOLockAlloc();
     me->tracePhase = kIOPMTracePointSystemUp;
     me->loginWindowPhase = 0;
-    me->pciBusyBitMask = 0;
+    me->traceData32 = 0;
     return me;
 }
 
@@ -4872,13 +5974,11 @@ void PMTraceWorker::RTC_TRACE(void)
 	{
 		uint32_t    wordA;
 
-		wordA = tracePhase;			// destined for bits 24-31
-		wordA <<= 8;
-		wordA |= loginWindowPhase;	// destined for bits 16-23
-		wordA <<= 16;
+        wordA = (tracePhase << 24) | (loginWindowPhase << 16) |
+                (traceData8 << 8);
 
-        tracePointHandler( tracePointTarget, pciBusyBitMask, wordA );
-		DLOG("RTC_TRACE wrote 0x%08x 0x%08x\n", pciBusyBitMask, wordA);
+        tracePointHandler( tracePointTarget, traceData32, wordA );
+		_LOG("RTC_TRACE wrote 0x%08x 0x%08x\n", traceData32, wordA);
 	}
 }
 
@@ -4905,7 +6005,7 @@ int PMTraceWorker::recordTopLevelPCIDevice(IOService * pciDevice)
         pciDeviceBitMappings->setObject(deviceName))
     {
         index = pciDeviceBitMappings->getCount() - 1;
-        DLOG("PMTrace PCI array: set object %s => %d\n",
+        _LOG("PMTrace PCI array: set object %s => %d\n",
             deviceName->getCStringNoCopy(), index);
     }
     if (deviceName)
@@ -4932,9 +6032,37 @@ bool PMTraceWorker::serialize(OSSerialize *s) const
 
 void PMTraceWorker::tracePoint(uint8_t phase)
 {
+    // clear trace detail when phase begins
+    if (tracePhase != phase)
+        traceData32 = 0;
+
+    tracePhase = phase;
+
+    DLOG("trace point 0x%02x\n", tracePhase);
+    RTC_TRACE();
+}
+
+void PMTraceWorker::tracePoint(uint8_t phase, uint8_t data8)
+{
+    // clear trace detail when phase begins
+    if (tracePhase != phase)
+        traceData32 = 0;
+
     tracePhase = phase;
+    traceData8 = data8;
+
+    DLOG("trace point 0x%02x 0x%02x\n", tracePhase, traceData8);
+    RTC_TRACE();
+}
+
+void PMTraceWorker::traceDetail(uint32_t detail)
+{
+    if (kIOPMTracePointSleepPriorityClients != tracePhase)
+        return;
+
+    traceData32 = detail;
+    DLOG("trace point 0x%02x detail 0x%08x\n", tracePhase, traceData32);
 
-    DLOG("IOPMrootDomain: trace point 0x%02x\n", tracePhase);
     RTC_TRACE();
 }
 
@@ -4942,7 +6070,7 @@ void PMTraceWorker::traceLoginWindowPhase(uint8_t phase)
 {
     loginWindowPhase = phase;
 
-    DLOG("IOPMrootDomain: loginwindow tracepoint 0x%02x\n", loginWindowPhase);
+    DLOG("loginwindow tracepoint 0x%02x\n", loginWindowPhase);
     RTC_TRACE();
 }
 
@@ -4953,14 +6081,14 @@ void PMTraceWorker::tracePCIPowerChange(
 	uint32_t	expectedFlag;
 
 	// Ignore PCI changes outside of system sleep/wake.
-    if ((kIOPMTracePointSystemSleepDriversPhase != tracePhase) &&
-        (kIOPMTracePointSystemWakeDriversPhase  != tracePhase))
+    if ((kIOPMTracePointSleepPowerPlaneDrivers != tracePhase) &&
+        (kIOPMTracePointWakePowerPlaneDrivers  != tracePhase))
         return;
 
 	// Only record the WillChange transition when going to sleep,
 	// and the DidChange on the way up.
 	changeFlags &= (kIOPMDomainWillChange | kIOPMDomainDidChange);
-	expectedFlag = (kIOPMTracePointSystemSleepDriversPhase == tracePhase) ?
+	expectedFlag = (kIOPMTracePointSleepPowerPlaneDrivers == tracePhase) ?
 					kIOPMDomainWillChange : kIOPMDomainDidChange;
 	if (changeFlags != expectedFlag)
 		return;
@@ -4972,21 +6100,23 @@ void PMTraceWorker::tracePCIPowerChange(
 
         if (kPowerChangeStart == type)
         {
-            pciBusyBitMask |= bitMask;
-            DLOG("PMTrace: Device %s started  - bit %2d mask 0x%08x => 0x%08x\n",
-                service->getName(), bitNum, bitMask, pciBusyBitMask);
+            traceData32 |= bitMask;
+            _LOG("PMTrace: Device %s started  - bit %2d mask 0x%08x => 0x%08x\n",
+                service->getName(), bitNum, bitMask, traceData32);
         }
         else
         {
-            pciBusyBitMask &= ~bitMask;
-            DLOG("PMTrace: Device %s finished - bit %2d mask 0x%08x => 0x%08x\n",
-                service->getName(), bitNum, bitMask, pciBusyBitMask);
+            traceData32 &= ~bitMask;
+            _LOG("PMTrace: Device %s finished - bit %2d mask 0x%08x => 0x%08x\n",
+                service->getName(), bitNum, bitMask, traceData32);
         }
 
-        RTC_TRACE();        
+        RTC_TRACE();
     }
 }
 
+// MARK: -
+// MARK: PMHaltWorker
 
 //******************************************************************************
 // PMHaltWorker Class
@@ -5138,9 +6268,9 @@ void PMHaltWorker::work( PMHaltWorker * me )
 
 		deltaTime = computeDeltaTimeMS(&startTime);
 		if ((deltaTime > kPMHaltTimeoutMS) || timeout ||
-			(gIOKitDebug & (kIOLogDebugPower | kIOLogPMRootDomain)))
+			(gIOKitDebug & kIOLogPMRootDomain))
 		{
-			KLOG("%s driver %s (%p) took %u ms\n",
+			LOG("%s driver %s (%p) took %u ms\n",
 				(gPMHaltEvent == kIOMessageSystemWillPowerOff) ?
 					"PowerOff" : "Restart",
 				service->getName(), service,
@@ -5173,7 +6303,7 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now )
 		if (nano > 3000000000ULL)
 		{
 			me->timeout = true;
-			LOG("%s still waiting on %s\n",
+			MSG("%s still waiting on %s\n",
 				(gPMHaltEvent == kIOMessageSystemWillPowerOff) ?
 					"PowerOff" : "Restart",
 				me->service->getName());
@@ -5406,18 +6536,90 @@ notifySystemShutdown( IOService * root, unsigned long event )
 	}
 	IOLockUnlock(gPMHaltLock);
 
-	// Release all workers
+	// Release all workers
+
+	for (unsigned int i = 0; i < numWorkers; i++)
+	{
+		if (workers[i])
+			workers[i]->release();
+		// worker also retained by it's own thread
+	}
+
+done:
+	DLOG("%s done\n", __FUNCTION__);
+	return;
+}
+
+//*********************************************************************************
+// Sleep/Wake logging
+//
+//*********************************************************************************
+
+IOMemoryDescriptor *IOPMrootDomain::getPMTraceMemoryDescriptor(void)
+{
+    if (timeline)
+        return timeline->getPMTraceMemoryDescriptor();
+    else
+        return NULL;
+}
+
+// Forwards external reports of detailed events to IOPMTimeline
+IOReturn IOPMrootDomain::recordPMEvent(PMEventDetails *details)
+{
+    if (timeline && details) {
+        
+		  IOReturn rc;
+		
+		  // Record a detailed driver power change event, or... 
+		  if(details->eventClassifier == kIOPMEventClassDriverEvent) {
+			  rc = timeline->recordDetailedPowerEvent( details );
+		  }
+		
+		  // Record a system power management event
+		  else if(details->eventClassifier == kIOPMEventClassSystemEvent) {
+			  rc = timeline->recordSystemPowerEvent( details );
+		  }
+		  else {
+			  return kIOReturnBadArgument;
+		  }
+      
+      // If we get to record this message, then we've reached the 
+      // end of another successful Sleep --> Wake cycle
+      // At this point, we pat ourselves in the back and allow
+      // our Sleep --> Wake UUID to be published
+      if(details->eventType == kIOPMEventTypeWakeDone) {
+        timeline->setSleepCycleInProgressFlag(false);
+      }
+
+/*
+      // Check if its time to clear the timeline buffer
+      if(getProperty(kIOPMSleepWakeUUIDKey) 
+         && timeline->isSleepCycleInProgress() == false
+         && timeline->getNumEventsLoggedThisPeriod() > 500) {
+            
+        // Clear the old UUID
+        if(pmPowerStateQueue) {
+            pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)false );
+        }
+      }
+*/
+      return rc;
+    }
+    else
+        return kIOReturnNotReady;
+}
+
+IOReturn IOPMrootDomain::recordAndReleasePMEvent(PMEventDetails *details)
+{
+    IOReturn ret = kIOReturnBadArgument;
 
-	for (unsigned int i = 0; i < numWorkers; i++)
-	{
-		if (workers[i])
-			workers[i]->release();
-		// worker also retained by it's own thread
-	}
+    if (details)
+    {
+        ret = recordPMEvent(details);
+        details->release();
+    }
 
-done:
-	DLOG("%s done\n", __FUNCTION__);
-	return;
+    return ret;
 }
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
@@ -5493,16 +6695,30 @@ bool IOPMrootDomain::serializeProperties( OSSerialize * s ) const
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
+// MARK: -
+// MARK: PMSettingHandle
 
-#undef super
-#define super OSObject
-OSDefineMetaClassAndFinalStructors(PMSettingObject, OSObject)
+OSDefineMetaClassAndStructors( PMSettingHandle, OSObject )
 
-void PMSettingObject::setPMSetting(const OSSymbol *type, OSObject *obj)
+void PMSettingHandle::free( void )
 {
-        (*func)(target, type, obj, refcon);
+    if (pmso)
+    {
+        pmso->clientHandleFreed();
+        pmso->release();
+        pmso = 0;
+    }
+
+    OSObject::free();
 }
 
+// MARK: -
+// MARK: PMSettingObject
+
+#undef super
+#define super OSObject
+OSDefineMetaClassAndFinalStructors( PMSettingObject, OSObject )
+
 /* 
  * Static constructor/initializer for PMSettingObject
  */
@@ -5512,92 +6728,445 @@ PMSettingObject *PMSettingObject::pmSettingObject(
     OSObject                            *target_arg,
     uintptr_t                           refcon_arg,
     uint32_t                            supportedPowerSources,
-    const OSSymbol *                    settings[])
+    const OSSymbol *                    settings[],
+    OSObject                            **handle_obj)
 {
-    uint32_t                            objCount = 0;
-    PMSettingObject                     *pmso;
+    uint32_t                            settingCount = 0;
+    PMSettingObject                     *pmso = 0;
+    PMSettingHandle                     *pmsh = 0;
 
-    if( !parent_arg || !handler_arg || !settings ) return NULL;
+    if ( !parent_arg || !handler_arg || !settings || !handle_obj )
+        return NULL;
 
-     // count OSSymbol entries in NULL terminated settings array
-    while( settings[objCount] ) {
-        objCount++;
+    // count OSSymbol entries in NULL terminated settings array
+    while (settings[settingCount]) {
+        settingCount++;
     }
-    if(0 == objCount) return NULL;
+    if (0 == settingCount)
+        return NULL;
 
     pmso = new PMSettingObject;
-    if(!pmso || !pmso->init()) return NULL;
-
-    pmso->parent = parent_arg;
-    pmso->func = handler_arg;
-    pmso->target = target_arg;
-    pmso->refcon = refcon_arg;
-    pmso->releaseAtCount = objCount + 1; // release when it has count+1 retains
- 
-    pmso->publishedFeatureID = (uint32_t *)IOMalloc(sizeof(uint32_t)*objCount);
-    if(pmso->publishedFeatureID) {
-        for(unsigned int i=0; i<objCount; i++) {
+    if (!pmso || !pmso->init())
+        goto fail;
+
+    pmsh = new PMSettingHandle;
+    if (!pmsh || !pmsh->init())
+        goto fail;
+
+    queue_init(&pmso->calloutQueue);
+    pmso->parent       = parent_arg;
+    pmso->func         = handler_arg;
+    pmso->target       = target_arg;
+    pmso->refcon       = refcon_arg;
+    pmso->settingCount = settingCount;
+
+    pmso->retain();     // handle holds a retain on pmso
+    pmsh->pmso = pmso;
+    pmso->pmsh = pmsh;
+
+    pmso->publishedFeatureID = (uint32_t *)IOMalloc(sizeof(uint32_t)*settingCount);
+    if (pmso->publishedFeatureID) {
+        for (unsigned int i=0; i<settingCount; i++) {
             // Since there is now at least one listener to this setting, publish
             // PM root domain support for it.
-            parent_arg->publishFeature( settings[i]->getCStringNoCopy(), 
+            parent_arg->publishFeature( settings[i]->getCStringNoCopy(),
                     supportedPowerSources, &pmso->publishedFeatureID[i] );
         }
     }
-    
+
+    *handle_obj = pmsh;
     return pmso;
+
+fail:
+    if (pmso) pmso->release();
+    if (pmsh) pmsh->release();
+    return NULL;
 }
 
-void PMSettingObject::free(void)
+void PMSettingObject::free( void )
 {
-    OSCollectionIterator    *settings_iter;
-    OSSymbol                *sym;
-    OSArray                 *arr;
-    int                     arr_idx;
-    int                     i;
-    int                     objCount = releaseAtCount - 1;
-    
-    if(publishedFeatureID) {
-        for(i=0; i<objCount; i++) {
-            if(0 != publishedFeatureID[i]) {
+    if (publishedFeatureID) {
+        for (uint32_t i=0; i<settingCount; i++) {
+            if (publishedFeatureID[i]) {
                 parent->removePublishedFeature( publishedFeatureID[i] );
             }
         }
+
+        IOFree(publishedFeatureID, sizeof(uint32_t) * settingCount);
+    }
+
+    super::free();
+}
+
+void PMSettingObject::dispatchPMSetting( const OSSymbol * type, OSObject * object )
+{
+    (*func)(target, type, object, refcon);
+}
+
+void PMSettingObject::clientHandleFreed( void )
+{
+    parent->deregisterPMSettingObject(this);
+}
+
+// MARK: -
+// MARK: IOPMTimeline
+
+#undef super
+#define super OSObject
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+IOPMTimeline *IOPMTimeline::timeline(IOPMrootDomain *root_domain)
+{
+    IOPMTimeline    *myself;
+    
+    if (!root_domain)
+        return NULL;
     
-        IOFree(publishedFeatureID, sizeof(uint32_t) * objCount);
+    myself = new IOPMTimeline;
+ 
+    if (myself) {
+        myself->owner = root_domain;
+        myself->init();
     }
-            
-    IORecursiveLockLock(parent->settingsCtrlLock);        
+ 
+    return myself;
+}
+
+bool IOPMTimeline::init(void)
+{
+    if (!super::init()) {
+        return false;
+    }
+
+    logLock = IOLockAlloc();
     
-    // Search each PM settings array in the kernel.
-    settings_iter = OSCollectionIterator::withCollection(parent->settingsCallbacks);
-    if(settings_iter) 
+    // Fresh timeline, no events logged yet
+    this->numEventsLoggedThisPeriod = 0;
+    this->sleepCycleInProgress = false;
+
+    //this->setEventsRecordingLevel(1);   // TODO
+    this->setEventsTrackedCount(kIOPMDefaultSystemEventsTracked);
+
+    return true;
+}
+
+void IOPMTimeline::free(void)
+{
+    if (pmTraceMemoryDescriptor) {
+        pmTraceMemoryDescriptor->release();
+        pmTraceMemoryDescriptor = NULL;
+    }
+    
+    IOLockFree(logLock);
+
+    super::free();
+}
+
+IOMemoryDescriptor *IOPMTimeline::getPMTraceMemoryDescriptor()
+{
+    return pmTraceMemoryDescriptor;
+}
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+bool IOPMTimeline::setProperties(OSDictionary *d)
+{
+    OSNumber    *n = NULL;
+    OSBoolean   *b = NULL;
+    bool        changed = false;
+
+    /* Changes size of detailed events buffer */
+    n = (OSNumber *)d->getObject(kIOPMTimelineSystemNumberTrackedKey);
+    if (OSDynamicCast(OSNumber, n))
     {
-        while(( sym = OSDynamicCast(OSSymbol, settings_iter->getNextObject()) ))
-        {
-            arr = (OSArray *)parent->settingsCallbacks->getObject(sym);
-            arr_idx = arr->getNextIndexOfObject(this, 0);
-            if(-1 != arr_idx) {
-                // 'this' was found in the array; remove it                
-                arr->removeObject(arr_idx);
-            }
-        }
+        changed = true;
+        this->setEventsTrackedCount(n->unsigned32BitValue());        
+    }
+
+
+    /* enables or disables system events */
+    b = (OSBoolean *)d->getObject(kIOPMTimelineEnabledKey);
+    if (b)
+    {
+        changed = true;
+        this->setEventsRecordingLevel((int)(kOSBooleanTrue == b));        
+    }
+
+    return changed;
+}
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+OSDictionary *IOPMTimeline::copyInfoDictionary(void)
+{
+    OSDictionary *out = OSDictionary::withCapacity(3);
+    OSNumber    *n = NULL;
+
+    if (!out || !hdr)
+        return NULL;
+
+    n = OSNumber::withNumber(hdr->sizeEntries, 32);
+    out->setObject(kIOPMTimelineSystemNumberTrackedKey, n);
+    n->release();
+    
+    n = OSNumber::withNumber(hdr->sizeBytes, 32);
+    out->setObject(kIOPMTimelineSystemBufferSizeKey, n);
+    n->release();
+
+    // bool
+    out->setObject(kIOPMTimelineEnabledKey, eventsRecordingLevel ? kOSBooleanTrue : kOSBooleanFalse);
+
+    return out;
+}
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+/* IOPMTimeline::recordSystemPowerEvent()
+ *
+ * Expected "type" arguments are listed in IOPMPrivate.h under enum "SystemEventTypes"
+ * Type arguments include "system events", and "Intermediate events"
+ *
+ * - System Events have paired "start" and "stop" events.
+ * - A start event shall be followed by a stop event.
+ * - Any number of Intermediate Events may fall between the 
+ *   start and stop events.
+ * - Intermediate events are meaningless outside the bounds of a system event's
+ *   start & stoup routines.
+ * - It's invalid to record a Start event without a following Stop event; e.g. two
+ *   Start events without an intervenining Stop event is invalid.
+ *
+ * Buffer invariants
+ * - The first recorded system event shall be preceded by an entry with type == 0
+ * - IOPMTimeline may choose not to record intermediate events while there's not
+ *   a system event in process.
+ */
+IOReturn IOPMTimeline::recordSystemPowerEvent( PMEventDetails *details )
+{
+    static bool                 wakeDonePending = true;
+    IOPMSystemEventRecord       *record_to = NULL;
+    OSString                    *swUUIDKey = NULL;
+    uint32_t                    useIndex = 0;
+
+    if (!details)
+        return kIOReturnBadArgument;
+
+    if (!traceBuffer) 
+        return kIOReturnNotReady;
+    
+    if (details->eventType == kIOPMEventTypeWakeDone)
+    {
+      if(!wakeDonePending)  
+        return kIOReturnBadArgument;
+    }
+
+    IOLockLock(logLock);
     
-        settings_iter->release();
+    if (details->eventType == kIOPMEventTypeWake) {
+        wakeDonePending = true;
+    } else if (details->eventType == kIOPMEventTypeWakeDone) {
+        wakeDonePending = false;
     }
+
+    systemState = details->eventType;
+   
+    useIndex = _atomicIndexIncrement(&hdr->index, hdr->sizeEntries);
     
-    IORecursiveLockUnlock(parent->settingsCtrlLock);
+    // The entry immediately after the latest entry (and thus
+    //  immediately before the first entry) shall have a type 0.
+    if (useIndex + 1 >= hdr->sizeEntries) {
+        traceBuffer[useIndex + 1].eventType = 0;
+    } else {
+        traceBuffer[0].eventType = 0;
+    }
     
-    super::free();
+    record_to = &traceBuffer[useIndex];
+    bzero(record_to, sizeof(IOPMSystemEventRecord));
+
+    /*****/
+    record_to->eventType    = details->eventType;
+    record_to->eventReason  = details->reason;
+    record_to->eventResult  = details->result;
+    pmEventTimeStamp(&record_to->timestamp);
+
+    // If caller doesn't provide a UUID, we'll use the UUID that's posted
+    // on IOPMrootDomain under key kIOPMSleepWakeUUIDKey
+    if (!details->uuid)  {
+        swUUIDKey = OSDynamicCast(OSString, owner->copyProperty(kIOPMSleepWakeUUIDKey));
+
+        if (swUUIDKey)
+            details->uuid = swUUIDKey->getCStringNoCopy();
+    }
+
+    if (details->uuid)
+        strncpy(record_to->uuid, details->uuid, kMaxPMStringLength);
+
+    if (swUUIDKey) 
+        swUUIDKey->release();
+
+    numEventsLoggedThisPeriod++;
+    /*****/
+
+    IOLockUnlock(logLock);
+    
+    return kIOReturnSuccess;
+
+}
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+IOReturn IOPMTimeline::recordDetailedPowerEvent( PMEventDetails *details )
+{
+    IOPMSystemEventRecord *record_to = NULL;
+    uint32_t                useIndex;
+
+    if (!details->eventType || !details->ownerName) 
+        return kIOReturnBadArgument;
+        
+    IOLockLock(logLock);
+
+    useIndex = _atomicIndexIncrement(&hdr->index, hdr->sizeEntries);
+    
+    record_to = (IOPMSystemEventRecord *)&traceBuffer[useIndex];
+    bzero(record_to, sizeof(IOPMSystemEventRecord));
+
+    /*****/
+    record_to->eventType = details->eventType;
+    if (details->ownerName && (strlen(details->ownerName) > 1)) {
+        strlcpy( record_to->ownerName, 
+                 details->ownerName, 
+                 sizeof(record_to->ownerName));
+    }
+    
+    record_to->ownerDisambiguateID = details->ownerUnique;
+    
+    if (details->interestName && (strlen(details->interestName) > 1)) {
+        strlcpy(record_to->interestName, 
+                details->interestName, 
+                sizeof(record_to->interestName));
+    }
+
+    record_to->oldState      = details->oldState;
+    record_to->newState      = details->newState;
+    record_to->eventResult   = details->result;
+    record_to->elapsedTimeUS = details->elapsedTimeUS;
+    pmEventTimeStamp(&record_to->timestamp);
+
+    numEventsLoggedThisPeriod++;
+    /*****/
+
+    IOLockUnlock(logLock);
+    return kIOReturnSuccess;
+}
+
+uint32_t IOPMTimeline::getNumEventsLoggedThisPeriod() {
+  return this->numEventsLoggedThisPeriod;
+}
+
+void IOPMTimeline::setNumEventsLoggedThisPeriod(uint32_t newCount) {
+  this->numEventsLoggedThisPeriod = newCount;
+}
+
+bool IOPMTimeline::isSleepCycleInProgress() {
+  return this->sleepCycleInProgress;
+}
+
+void IOPMTimeline::setSleepCycleInProgressFlag(bool flag) {
+  this->sleepCycleInProgress = flag;
+}
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+    
+void IOPMTimeline::setEventsTrackedCount(uint32_t newTracked)
+{
+    size_t      make_buf_size = 0;
+    
+    make_buf_size = sizeof(IOPMTraceBufferHeader) + (newTracked * sizeof(IOPMSystemEventRecord));
+
+    IOLockLock(logLock);
+
+    if (pmTraceMemoryDescriptor) {
+        pmTraceMemoryDescriptor->release();
+        pmTraceMemoryDescriptor = NULL;
+    }
+
+    hdr = NULL;
+    traceBuffer = NULL;
+
+    if (0 == newTracked)
+    {
+        IOLog("IOPMrootDomain -> erased buffer.\n");
+        goto exit;
+    }
+
+    pmTraceMemoryDescriptor = IOBufferMemoryDescriptor::withOptions(
+                    kIOMemoryKernelUserShared | kIODirectionIn, make_buf_size);
+
+    if (!pmTraceMemoryDescriptor)
+    {
+        IOLog("IOPMRootDomain -> IOBufferMemoryDescriptor(%d) returns NULL\n", (int)make_buf_size);
+        goto exit;
+    }    
+
+    pmTraceMemoryDescriptor->prepare(kIODirectionIn);
+    
+    // Header occupies the first sizeof(IOPMTraceBufferHeader) bytes
+    hdr = (IOPMTraceBufferHeader *)pmTraceMemoryDescriptor->getBytesNoCopy();
+
+    // Recorded events occupy the remaining bulk of the buffer
+    traceBuffer = (IOPMSystemEventRecord *)((uint8_t *)hdr + sizeof(IOPMTraceBufferHeader));
+
+    bzero(hdr, make_buf_size);
+
+    hdr->sizeBytes = make_buf_size;
+    hdr->sizeEntries = newTracked;
+
+    IOLog("IOPMRootDomain -> IOBufferMemoryDescriptor(%d) returns bufferMB with address 0x%08x\n", (int)make_buf_size, (unsigned int)(uintptr_t)traceBuffer);
+
+exit:
+    IOLockUnlock(logLock);
+}
+
+//*********************************************************************************
+//*********************************************************************************
+//*********************************************************************************
+
+void IOPMTimeline::setEventsRecordingLevel(uint32_t eventsTrackedBits)
+{
+
+    // TODO
+
+    return;
+
 }
 
-void PMSettingObject::taggedRelease(const void *tag, const int when) const
-{     
-    // We have n+1 retains - 1 per array that this PMSettingObject is a member
-    // of, and 1 retain to ourself. When we get a release with n+1 retains
-    // remaining, we go ahead and free ourselves, cleaning up array pointers
-    // in free();
+/* static helper to IOPMTimeline 
+ */
+uint32_t IOPMTimeline::_atomicIndexIncrement(uint32_t *index, uint32_t limit)
+{
+    uint32_t    was_index;
+    uint32_t    inc_index;
+    
+    if(!index)
+        return NULL;
+    
+    do {
+        was_index = *index;
+        inc_index = (was_index+1)%limit;
+    } while (!OSCompareAndSwap(was_index, inc_index, index));
 
-    super::taggedRelease(tag, releaseAtCount);    
+    return inc_index;
 }
 
 // MARK: -
@@ -5676,8 +7245,19 @@ void PMAssertionsTracker::tabulate(void)
 
     if ((assertionsKernel != oldKernel) ||
         (assertionsCombined != oldCombined))
-    {
-        owner->messageClients(kIOPMMessageDriverAssertionsChanged);
+    {    
+        owner->messageClients(kIOPMMessageDriverAssertionsChanged);        
+        
+        if (((assertionsCombined & kIOPMDriverAssertionPreventDisplaySleepBit) != 0) 
+                && ((oldCombined & kIOPMDriverAssertionPreventDisplaySleepBit) == 0))
+        {
+            /* We react to a new PreventDisplaySleep assertion by waking the display
+             * with an activityTickle
+             */
+            owner->evaluatePolicy(kStimulusDarkWakeActivityTickle);
+        } else {
+            owner->evaluatePolicy(kStimulusDarkWakeEvaluate);
+        }
     }
 }
 
@@ -5780,18 +7360,14 @@ IOReturn PMAssertionsTracker::createAssertion(
     PMAssertStruct  track;
 
     // Warning: trillions and trillions of created assertions may overflow the unique ID.
-#ifdef __ppc__
-    track.id = issuingUniqueID++;  // FIXME: need OSIncrementAtomic64() for ppc
-#else
     track.id = OSIncrementAtomic64((SInt64*) &issuingUniqueID);
-#endif
     track.level = level;
     track.assertionBits = which;
     track.ownerString = whoItIs ? OSSymbol::withCString(whoItIs) : 0;
     track.ownerService = serviceID;
     track.modifiedTime = 0;
     pmEventTimeStamp(&track.createdTime);
-
+    
     dataStore = OSData::withBytes(&track, sizeof(PMAssertStruct));
     if (!dataStore)
     {
@@ -6010,6 +7586,7 @@ IOPMDriverAssertionLevel PMAssertionsTracker::getAssertionLevel(
 //*********************************************************************************
 //*********************************************************************************
 
+
 static void pmEventTimeStamp(uint64_t *recordTS)
 {
     clock_sec_t     tsec;
@@ -6031,37 +7608,38 @@ static void pmEventTimeStamp(uint64_t *recordTS)
     return;
 }
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+// MARK: -
+// MARK: IORootParent
 
-#undef  super
-#define super IOService
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 OSDefineMetaClassAndFinalStructors(IORootParent, IOService)
 
-// This array exactly parallels the state array for the root domain.
-// Power state changes initiated by a device can be vetoed by a client of the device, and
-// power state changes initiated by the parent of a device cannot be vetoed by a client of the device,
-// so when the root domain wants a power state change that cannot be vetoed (e.g. demand sleep), it asks
-// its parent to make the change.  That is the reason for this complexity.
+// The reason that root domain needs a root parent is to facilitate demand
+// sleep, since a power change from the root parent cannot be vetoed.
+//
+// The above statement is no longer true since root domain now performs
+// demand sleep using overrides. But root parent remains to avoid changing
+// the power tree stacking. Root parent is parked at the max power state.
+
 
-static IOPMPowerState patriarchPowerStates[NUM_POWER_STATES] =
+static IOPMPowerState patriarchPowerStates[2] =
 {
-    {1,0,0,0,0,0,0,0,0,0,0,0},              // off   (not used)
-    {1,0,RESTART_POWER,0,0,0,0,0,0,0,0,0},  // reset (not used)
-    {1,0,SLEEP_POWER,0,0,0,0,0,0,0,0,0},    // sleep
-    {1,0,DOZE_POWER,0,0,0,0,0,0,0,0,0},     // doze
-    {1,0,ON_POWER,0,0,0,0,0,0,0,0,0},       // running
+    {1,0,ON_POWER,0,0,0,0,0,0,0,0,0},
+    {1,0,ON_POWER,0,0,0,0,0,0,0,0,0},
 };
 
+void IORootParent::initialize( void )
+{
+}
+
 bool IORootParent::start( IOService * nub )
 {
-    mostRecentChange = ON_STATE;
-    super::start(nub);
+    IOService::start(nub);
     attachToParent( getRegistryRoot(), gIOPowerPlane );
     PMinit();
-    registerPowerDriver(this, patriarchPowerStates, NUM_POWER_STATES);
-	wakeSystem();
-    powerOverrideOnPriv();	
+    registerPowerDriver(this, patriarchPowerStates, 2);
+    makeUsable();
     return true;
 }
 
@@ -6075,30 +7653,22 @@ void IORootParent::restartSystem( void )
 
 void IORootParent::sleepSystem( void )
 {
-    mostRecentChange = SLEEP_STATE;
-    changePowerStateToPriv(SLEEP_STATE);
 }
 
 void IORootParent::dozeSystem( void )
 {
-    mostRecentChange = DOZE_STATE;
-    changePowerStateToPriv(DOZE_STATE);
 }
 
-// Called in demand sleep when sleep discovered to be impossible after actually attaining that state.
-// This brings the parent to doze, which allows the root to step up from sleep to doze.
-
-// In idle sleep, do nothing because the parent is still on and the root can freely change state.
-
 void IORootParent::sleepToDoze( void )
 {
-    if ( mostRecentChange == SLEEP_STATE ) {
-        changePowerStateToPriv(DOZE_STATE);
-    }
 }
 
 void IORootParent::wakeSystem( void )
 {
-    mostRecentChange = ON_STATE;
-    changePowerStateToPriv(ON_STATE);
 }
+
+OSObject * IORootParent::copyProperty( const char * aKey) const
+{
+    return (IOService::copyProperty(aKey));
+}
+
diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp
index f00ffd725..7800babda 100644
--- a/iokit/Kernel/IOPlatformExpert.cpp
+++ b/iokit/Kernel/IOPlatformExpert.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -44,6 +44,7 @@
 
 #include <libkern/c++/OSContainers.h>
 #include <libkern/crypto/sha1.h>
+#include <libkern/OSAtomic.h>
 
 extern "C" {
 #include <machine/machine_routines.h>
@@ -77,6 +78,7 @@ OSMetaClassDefineReservedUnused(IOPlatformExpert, 11);
 static IOPlatformExpert * gIOPlatform;
 static OSDictionary * gIOInterruptControllers;
 static IOLock * gIOInterruptControllersLock;
+static IODTNVRAM *gIOOptionsEntry;
 
 OSSymbol * gPlatformInterruptControllerName;
 
@@ -258,7 +260,7 @@ int IOPlatformExpert::haltRestart(unsigned int type)
   // On ARM kPEPanicRestartCPU is supported in the drivers
   if (type == kPEPanicRestartCPU)
 	  type = kPERestartCPU;
-  
+
   if (PE_halt_restart) return (*PE_halt_restart)(type);
   else return -1;
 }
@@ -371,6 +373,8 @@ PMLog(const char *who, unsigned long event,
 {
     UInt32 debugFlags = gIOKitDebug;
     UInt32 traceFlags = gIOKitTrace;
+    uintptr_t   name = 0;
+    UInt32 i = 0;
 
     if (debugFlags & kIOLogPower) {
 
@@ -402,8 +406,11 @@ PMLog(const char *who, unsigned long event,
 		code |= DBG_FUNC_START - sgnevent;
 	    }
 
-	    // Record the timestamp, wish I had a this pointer
-	    IOTimeStampConstant(code, (uintptr_t) who, event, param1, param2);
+        // Get first 8 characters of the name
+        while ( i < sizeof(uintptr_t) && who[i] != 0) 
+        {    ((char *)&name)[sizeof(uintptr_t)-i-1]=who[i]; i++; }
+	    // Record the timestamp. 
+	    IOTimeStampConstant(code, name, event, param1, param2);
 	}
     }
 }
@@ -779,12 +786,13 @@ int PEGetPlatformEpoch(void)
 
 int PEHaltRestart(unsigned int type)
 {
-  IOPMrootDomain    *pmRootDomain = IOService::getPMRootDomain();
+  IOPMrootDomain    *pmRootDomain;
   AbsoluteTime      deadline;
   thread_call_t     shutdown_hang;
   
   if(type == kPEHaltCPU || type == kPERestartCPU || type == kPEUPSDelayHaltCPU)
   {
+    pmRootDomain = IOService::getPMRootDomain();
     /* Notify IOKit PM clients of shutdown/restart
        Clients subscribe to this message with a call to
        IOService::registerInterest()
@@ -820,6 +828,115 @@ UInt32 PESavePanicInfo(UInt8 *buffer, UInt32 length)
   else return 0;
 }
 
+
+
+inline static int init_gIOOptionsEntry(void)
+{
+    IORegistryEntry *entry;
+    void *nvram_entry;
+    volatile void **options;
+    int ret = -1;
+
+    if (gIOOptionsEntry) 
+        return 0;
+
+    entry = IORegistryEntry::fromPath( "/options", gIODTPlane );
+    if (!entry)
+        return -1;
+
+    nvram_entry = (void *) OSDynamicCast(IODTNVRAM, entry);
+    if (!nvram_entry) 
+        goto release;
+
+    options = (volatile void **) &gIOOptionsEntry;
+    if (!OSCompareAndSwapPtr(NULL, nvram_entry, options)) {
+        ret = 0;
+        goto release;
+    }
+
+    return 0;
+
+release:
+    entry->release();
+    return ret;
+
+}
+
+/* pass in a NULL value if you just want to figure out the len */
+boolean_t PEReadNVRAMProperty(const char *symbol, void *value,
+                              unsigned int *len)
+{
+    OSObject  *obj;
+    OSData *data;
+    unsigned int vlen;
+
+    if (!symbol || !len)
+        goto err;
+
+    if (init_gIOOptionsEntry() < 0)
+        goto err;
+
+    vlen = *len;
+    *len = 0;
+
+    obj = gIOOptionsEntry->getProperty(symbol);
+    if (!obj)
+        goto err;
+
+    /* convert to data */
+    data = OSDynamicCast(OSData, obj);
+    if (!data) 
+        goto err;
+
+    *len  = data->getLength();
+    vlen  = min(vlen, *len);
+    if (vlen)
+        memcpy((void *) value, data->getBytesNoCopy(), vlen);
+
+    return TRUE;
+
+err:
+    return FALSE;
+}
+
+
+boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, 
+                               const unsigned int len)
+{
+    const OSSymbol *sym;
+    OSData *data;
+    bool ret = false;
+
+    if (!symbol || !value || !len)
+        goto err;
+
+    if (init_gIOOptionsEntry() < 0)
+        goto err;
+
+    sym = OSSymbol::withCStringNoCopy(symbol);
+    if (!sym)
+        goto err;
+
+    data = OSData::withBytes((void *) value, len);
+    if (!data)
+        goto sym_done;
+
+    ret = gIOOptionsEntry->setProperty(sym, data);
+    data->release();
+
+sym_done:
+    sym->release();
+
+    if (ret == true) {
+        gIOOptionsEntry->sync();
+        return TRUE;
+    }
+
+err:
+    return FALSE;
+}
+
+
 long PEGetGMTTimeOfDay(void)
 {
 	long	result = 0;
diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp
index e41a94bdc..a299d3fa1 100644
--- a/iokit/Kernel/IORegistryEntry.cpp
+++ b/iokit/Kernel/IORegistryEntry.cpp
@@ -278,7 +278,12 @@ bool IORegistryEntry::init( OSDictionary * dict )
 	bzero(reserved, sizeof(ExpansionData));
     }
     if( dict) {
-	dict->retain();
+	if (OSCollection::kImmutable & dict->setOptions(0, 0)) {
+	    dict = (OSDictionary *) dict->copyCollection();
+	    if (!dict)
+           	return (false);
+	} else
+	    dict->retain();
 	if( fPropertyTable)
 	    fPropertyTable->release();
 	fPropertyTable = dict;
diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp
index 1a28626cf..6ef0b3413 100644
--- a/iokit/Kernel/IOService.cpp
+++ b/iokit/Kernel/IOService.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -50,10 +50,13 @@
 #include <IOKit/assert.h>
 #include <sys/errno.h>
 
+#include <machine/pal_routines.h>
+
 #define LOG kprintf
 //#define LOG IOLog
 
 #include "IOServicePrivate.h"
+#include "IOKitKernelInternal.h"
 
 // take lockForArbitration before LOCKNOTIFY
 
@@ -106,11 +109,16 @@ const OSSymbol *		gIOKitDebugKey;
 
 const OSSymbol *		gIOCommandPoolSizeKey;
 
+const OSSymbol *		gIOConsoleLockedKey;
 const OSSymbol *		gIOConsoleUsersKey;
 const OSSymbol *		gIOConsoleSessionUIDKey;
+const OSSymbol *		gIOConsoleSessionAuditIDKey;
 const OSSymbol *		gIOConsoleUsersSeedKey;
-const OSSymbol *        gIOConsoleSessionOnConsoleKey;
-const OSSymbol *        gIOConsoleSessionSecureInputPIDKey;
+const OSSymbol *		gIOConsoleSessionOnConsoleKey;
+const OSSymbol *		gIOConsoleSessionSecureInputPIDKey;
+const OSSymbol *		gIOConsoleSessionScreenLockedTimeKey;
+
+static clock_sec_t		gIOConsoleLockTime;
 
 static int			gIOResourceGenerationCount;
 
@@ -125,6 +133,7 @@ const OSSymbol *		gIOGeneralInterest;
 const OSSymbol *		gIOBusyInterest;
 const OSSymbol *		gIOAppPowerStateInterest;
 const OSSymbol *		gIOPriorityPowerStateInterest;
+const OSSymbol *		gIOConsoleSecurityInterest;
 
 static OSDictionary * 		gNotifications;
 static IORecursiveLock *	gNotificationLock;
@@ -159,6 +168,9 @@ const OSSymbol *		gIOPlatformActiveActionKey;
 
 const OSSymbol *		gIOPlatformFunctionHandlerSet;
 
+static IOLock *			gIOConsoleUsersLock;
+static thread_call_t		gIOConsoleLockCallout;
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define LOCKREADNOTIFY()	\
@@ -213,14 +225,6 @@ bool IOService::isInactive( void ) const
     { return( 0 != (kIOServiceInactiveState & getState())); }
 
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-#define IOServiceTrace(csc, a, b, c, d) {				\
-    if(kIOTraceIOService & gIOKitTrace) {				\
-	KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0);	\
-    }									\
-}
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #if defined(__i386__) || defined(__x86_64__)
@@ -296,6 +300,7 @@ void IOService::initialize( void )
     gIOBusyInterest   		= OSSymbol::withCStringNoCopy( kIOBusyInterest );
     gIOAppPowerStateInterest   	= OSSymbol::withCStringNoCopy( kIOAppPowerStateInterest );
     gIOPriorityPowerStateInterest   	= OSSymbol::withCStringNoCopy( kIOPriorityPowerStateInterest );
+    gIOConsoleSecurityInterest 	= OSSymbol::withCStringNoCopy( kIOConsoleSecurityInterest );
 
     gNotifications		= OSDictionary::withCapacity( 1 );
     gIOPublishNotification	= OSSymbol::withCStringNoCopy(
@@ -310,13 +315,18 @@ void IOService::initialize( void )
 						 kIOTerminatedNotification );
     gIOServiceKey		= OSSymbol::withCStringNoCopy( kIOServiceClass);
 
+    gIOConsoleLockedKey		= OSSymbol::withCStringNoCopy( kIOConsoleLockedKey);
     gIOConsoleUsersKey		= OSSymbol::withCStringNoCopy( kIOConsoleUsersKey);
     gIOConsoleSessionUIDKey	= OSSymbol::withCStringNoCopy( kIOConsoleSessionUIDKey);
-    gIOConsoleUsersSeedKey	= OSSymbol::withCStringNoCopy( kIOConsoleUsersSeedKey);
-    gIOConsoleSessionOnConsoleKey = OSSymbol::withCStringNoCopy( kIOConsoleSessionOnConsoleKey);
-    gIOConsoleSessionSecureInputPIDKey = OSSymbol::withCStringNoCopy( kIOConsoleSessionSecureInputPIDKey);
-    gIOConsoleUsersSeedValue	= OSData::withBytesNoCopy(&gIOConsoleUsersSeed, sizeof(gIOConsoleUsersSeed));
+    gIOConsoleSessionAuditIDKey	= OSSymbol::withCStringNoCopy( kIOConsoleSessionAuditIDKey);
+
+    gIOConsoleUsersSeedKey	         = OSSymbol::withCStringNoCopy(kIOConsoleUsersSeedKey);
+    gIOConsoleSessionOnConsoleKey        = OSSymbol::withCStringNoCopy(kIOConsoleSessionOnConsoleKey);
+    gIOConsoleSessionSecureInputPIDKey   = OSSymbol::withCStringNoCopy(kIOConsoleSessionSecureInputPIDKey);
+    gIOConsoleSessionScreenLockedTimeKey = OSSymbol::withCStringNoCopy(kIOConsoleSessionScreenLockedTimeKey);
 
+    gIOConsoleUsersSeedValue	       = OSData::withBytesNoCopy(&gIOConsoleUsersSeed, sizeof(gIOConsoleUsersSeed));
+	
     gIOPlatformSleepActionKey	= OSSymbol::withCStringNoCopy(kIOPlatformSleepActionKey);
     gIOPlatformWakeActionKey	= OSSymbol::withCStringNoCopy(kIOPlatformWakeActionKey);
     gIOPlatformQuiesceActionKey	= OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey);
@@ -345,9 +355,14 @@ void IOService::initialize( void )
 
     gIOServiceBusyLock = IOLockAlloc();
 
+    gIOConsoleUsersLock = IOLockAlloc();
+
     err = semaphore_create(kernel_task, &gJobsSemaphore, SYNC_POLICY_FIFO, 0);
 
-    assert( gIOServiceBusyLock && gJobs && gJobsLock && (err == KERN_SUCCESS) );
+    gIOConsoleLockCallout = thread_call_allocate(&IOService::consoleLockTimer, NULL);
+
+    assert( gIOServiceBusyLock && gJobs && gJobsLock && gIOConsoleUsersLock
+    		&& gIOConsoleLockCallout && (err == KERN_SUCCESS) );
 
     gIOResources = IOResources::resources();
     assert( gIOResources );
@@ -578,7 +593,6 @@ void IOService::startMatching( IOOptionBits options )
 //			OSKernelStackRemaining(), getName());
 
     if( needConfig) {
-	prevBusy = _adjustBusy( 1 );
         needWake = (0 != (kIOServiceSyncPubState & __state[1]));
     }
 
@@ -591,6 +605,8 @@ void IOService::startMatching( IOOptionBits options )
 
     if( needConfig) {
 
+	prevBusy = _adjustBusy( 1 );
+
         if( needWake) {
             IOLockLock( gIOServiceBusyLock );
             thread_wakeup( (event_t) this/*&__state[1]*/ );
@@ -1470,6 +1486,7 @@ IONotifier * IOService::registerInterest( const OSSymbol * typeOfInterest,
     if( (typeOfInterest != gIOGeneralInterest)
      && (typeOfInterest != gIOBusyInterest)
      && (typeOfInterest != gIOAppPowerStateInterest)
+     && (typeOfInterest != gIOConsoleSecurityInterest)
      && (typeOfInterest != gIOPriorityPowerStateInterest))
         return( 0 );
 
@@ -1541,6 +1558,7 @@ void IOService::unregisterAllInterest( void )
     cleanInterestList( getProperty( gIOBusyInterest ));
     cleanInterestList( getProperty( gIOAppPowerStateInterest ));
     cleanInterestList( getProperty( gIOPriorityPowerStateInterest ));
+    cleanInterestList( getProperty( gIOConsoleSecurityInterest ));
 }
 
 /*
@@ -1583,7 +1601,7 @@ void _IOServiceInterestNotifier::remove()
     LOCKWRITENOTIFY();
 
     if( queue_next( &chain )) {
-	remqueue( 0, &chain);
+	remqueue(&chain);
 	queue_next( &chain) = queue_prev( &chain) = 0;
 	release();
     }
@@ -1631,7 +1649,7 @@ void _IOServiceInterestNotifier::enable( bool was )
 
 #define tailQ(o)		setObject(o)
 #define headQ(o)		setObject(0, o)
-#define TLOG(fmt, args...)  	{ if(kIOLogYield & gIOKitDebug) IOLog(fmt, ## args); }
+#define TLOG(fmt, args...)  	{ if(kIOLogYield & gIOKitDebug) { IOLog("[%llx] ", thread_tid(current_thread())); IOLog(fmt, ## args); }}
 
 static void _workLoopAction( IOWorkLoop::Action action,
                              IOService * service,
@@ -1667,13 +1685,15 @@ bool IOService::requestTerminate( IOService * provider, IOOptionBits options )
 
 bool IOService::terminatePhase1( IOOptionBits options )
 {
-    IOService *		victim;
-    IOService *		client;
-    OSIterator *	iter;
-    OSArray *		makeInactive;
-    bool		ok;
-    bool		didInactive;
-    bool		startPhase2 = false;
+    IOService *	 victim;
+    IOService *	 client;
+    OSIterator * iter;
+    OSArray *	 makeInactive;
+	int          waitResult = THREAD_AWAKENED;
+	bool         wait;
+    bool		 ok;
+    bool		 didInactive;
+    bool		 startPhase2 = false;
 
     TLOG("%s::terminatePhase1(%08llx)\n", getName(), (long long)options);
 
@@ -1701,16 +1721,38 @@ bool IOService::terminatePhase1( IOOptionBits options )
 
     while( victim ) {
 
-	didInactive = victim->lockForArbitration( true );
+		didInactive = victim->lockForArbitration( true );
         if( didInactive) {
             didInactive = (0 == (victim->__state[0] & kIOServiceInactiveState));
             if( didInactive) {
                 victim->__state[0] |= kIOServiceInactiveState;
                 victim->__state[0] &= ~(kIOServiceRegisteredState | kIOServiceMatchedState
                                         | kIOServiceFirstPublishState | kIOServiceFirstMatchState);
+
+				if (victim == this)
+					victim->__state[1] |= kIOServiceTermPhase1State;
+
                 victim->_adjustBusy( 1 );
-            }
-	    victim->unlockForArbitration();
+
+            } else if (victim != this) do {
+
+				IOLockLock(gIOServiceBusyLock);
+				wait = (victim->__state[1] & kIOServiceTermPhase1State);
+				if( wait) {
+				    TLOG("%s::waitPhase1(%s)\n", getName(), victim->getName());
+					victim->__state[1] |= kIOServiceTerm1WaiterState;
+					victim->unlockForArbitration();
+					assert_wait((event_t)&victim->__state[1], THREAD_UNINT);
+				}
+				IOLockUnlock(gIOServiceBusyLock);
+				if( wait) {
+					waitResult = thread_block(THREAD_CONTINUE_NULL);
+				    TLOG("%s::did waitPhase1(%s)\n", getName(), victim->getName());
+					victim->lockForArbitration();
+				}
+			} while( wait && (waitResult != THREAD_TIMED_OUT));
+
+			victim->unlockForArbitration();
         }
         if( victim == this)
             startPhase2 = didInactive;
@@ -1755,8 +1797,21 @@ bool IOService::terminatePhase1( IOOptionBits options )
     makeInactive->release();
 
     if( startPhase2)
-        scheduleTerminatePhase2( options );
+    {
+		lockForArbitration();
+		__state[1] &= ~kIOServiceTermPhase1State;
+		if (kIOServiceTerm1WaiterState & __state[1])
+		{
+			__state[1] &= ~kIOServiceTerm1WaiterState;
+			TLOG("%s::wakePhase1\n", getName());
+			IOLockLock( gIOServiceBusyLock );
+			thread_wakeup( (event_t) &__state[1]);
+			IOLockUnlock( gIOServiceBusyLock );
+		}
+		unlockForArbitration();
 
+        scheduleTerminatePhase2( options );
+    }
     return( true );
 }
 
@@ -1917,7 +1972,9 @@ bool IOService::didTerminate( IOService * provider, IOOptionBits options, bool *
 }
 
 void IOService::actionWillTerminate( IOService * victim, IOOptionBits options, 
-                                    OSArray * doPhase2List )
+				     OSArray * doPhase2List,
+				     void *unused2 __unused,
+				     void *unused3 __unused  )
 {
     OSIterator * iter;
     IOService *	 client;
@@ -1945,7 +2002,9 @@ void IOService::actionWillTerminate( IOService * victim, IOOptionBits options,
     }
 }
 
-void IOService::actionDidTerminate( IOService * victim, IOOptionBits options )
+void IOService::actionDidTerminate( IOService * victim, IOOptionBits options,
+			    void *unused1 __unused, void *unused2 __unused,
+			    void *unused3 __unused )
 {
     OSIterator * iter;
     IOService *	 client;
@@ -1977,7 +2036,9 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options )
     }
 }
 
-void IOService::actionFinalize( IOService * victim, IOOptionBits options )
+void IOService::actionFinalize( IOService * victim, IOOptionBits options,
+			    void *unused1 __unused, void *unused2 __unused,
+			    void *unused3 __unused )
 {
     TLOG("%s::finalize(%08llx)\n", victim->getName(), (long long)options);
 
@@ -1991,7 +2052,9 @@ void IOService::actionFinalize( IOService * victim, IOOptionBits options )
     victim->finalize( options );
 }
 
-void IOService::actionStop( IOService * provider, IOService * client )
+void IOService::actionStop( IOService * provider, IOService * client,
+			    void *unused1 __unused, void *unused2 __unused,
+			    void *unused3 __unused )
 {
     TLOG("%s::stop(%s)\n", client->getName(), provider->getName());
 
@@ -3181,8 +3244,10 @@ UInt32 IOService::_adjustBusy( SInt32 delta )
 				     &messageClientsApplier, &context );
 
 #if !NO_KEXTD
-            if( nowQuiet && (next == gIOServiceRoot))
+            if( nowQuiet && (next == gIOServiceRoot)) {
                 OSKext::considerUnloads();
+                IOServiceTrace(IOSERVICE_REGISTRY_QUIET, 0, 0, 0, 0);
+            }
 #endif
         }
 
@@ -3386,7 +3451,7 @@ IOReturn IOService::waitMatchIdle( UInt32 msToWait )
     bool            wait;
     int             waitResult = THREAD_AWAKENED;
     bool            computeDeadline = true;
-    AbsoluteTime    abstime;
+    AbsoluteTime    deadline;
 
     IOLockLock( gJobsLock );
     do {
@@ -3394,14 +3459,12 @@ IOReturn IOService::waitMatchIdle( UInt32 msToWait )
         if( wait) {
             if( msToWait) {
                 if( computeDeadline ) {
-                    clock_interval_to_absolutetime_interval(
-                          msToWait, kMillisecondScale, &abstime );
-                    clock_absolutetime_interval_to_deadline(
-                          abstime, &abstime );
+                    clock_interval_to_deadline(
+                          msToWait, kMillisecondScale, &deadline );
                     computeDeadline = false;
                 }
 			  waitResult = IOLockSleepDeadline( gJobsLock, &gNumConfigThreads,
-								abstime, THREAD_UNINT );
+								deadline, THREAD_UNINT );
 	    	   } else {
 			  waitResult = IOLockSleep( gJobsLock, &gNumConfigThreads,
 								THREAD_UNINT );
@@ -4096,6 +4159,34 @@ IOService * IOResources::resources( void )
     return( inst );
 }
 
+bool IOResources::init( OSDictionary * dictionary )
+{
+    // Do super init first
+    if ( !super::init() )
+        return false;
+
+    // Allow PAL layer to publish a value
+    const char *property_name;
+    int property_value;
+
+    pal_get_resource_property( &property_name, &property_value );
+
+    if( property_name ) {
+	OSNumber *num;
+	const OSSymbol *	sym;
+
+	if( (num = OSNumber::withNumber(property_value, 32)) != 0 ) {
+	    if( (sym = OSSymbol::withCString( property_name)) != 0 ) {
+		this->setProperty( sym, num );
+		sym->release();
+	    }
+	    num->release();
+	}
+    }
+
+    return true;
+}
+
 IOWorkLoop * IOResources::getWorkLoop() const
 {
     // If we are the resource root
@@ -4133,6 +4224,92 @@ bool IOResources::matchPropertyTable( OSDictionary * table )
     return( ok );
 }
 
+void IOService::consoleLockTimer(thread_call_param_t p0, thread_call_param_t p1)
+{
+    IOService::updateConsoleUsers(NULL, 0);
+}
+
+void IOService::updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessage)
+{
+    IORegistryEntry * regEntry;
+    OSObject *        locked = kOSBooleanFalse;
+    uint32_t          idx;
+    bool              publish;
+    OSDictionary *    user;
+    static IOMessage  sSystemPower;
+
+    regEntry = IORegistryEntry::getRegistryRoot();
+
+    IOLockLock(gIOConsoleUsersLock);
+
+    if (systemMessage)
+    {
+        sSystemPower = systemMessage;
+    }
+    if (consoleUsers)
+    {
+        OSNumber * num = 0;
+	for (idx = 0; 
+	      (!num) && (user = OSDynamicCast(OSDictionary, consoleUsers->getObject(idx))); 
+	      idx++)
+	{
+	    num = OSDynamicCast(OSNumber, user->getObject(gIOConsoleSessionScreenLockedTimeKey));
+	}
+        gIOConsoleLockTime = num ? num->unsigned32BitValue() : 0;
+    }
+
+    if (gIOConsoleLockTime)
+    {
+	if (kIOMessageSystemWillSleep == sSystemPower)
+	    locked = kOSBooleanTrue;
+	else
+	{
+	    clock_sec_t  now;
+	    clock_usec_t microsecs;
+
+	    clock_get_calendar_microtime(&now, &microsecs);
+	    if (gIOConsoleLockTime > now)
+	    {
+		AbsoluteTime deadline;
+		clock_interval_to_deadline(gIOConsoleLockTime - now, kSecondScale, &deadline);
+		thread_call_enter_delayed(gIOConsoleLockCallout, deadline);
+	    }
+	    else
+	    {
+		locked = kOSBooleanTrue;
+	    }
+	}
+    }
+
+    publish = (consoleUsers || (locked != regEntry->getProperty(gIOConsoleLockedKey)));
+    if (publish)
+    {
+	regEntry->setProperty(gIOConsoleLockedKey, locked);
+	if (consoleUsers)
+	{
+	    regEntry->setProperty(gIOConsoleUsersKey, consoleUsers);
+	}
+	OSIncrementAtomic( &gIOConsoleUsersSeed );
+    }
+
+    IOLockUnlock(gIOConsoleUsersLock);
+
+    if (publish)
+    {
+	publishResource( gIOConsoleUsersSeedKey, gIOConsoleUsersSeedValue );
+
+	MessageClientsContext context;
+    
+	context.service  = getServiceRoot();
+	context.type     = kIOMessageConsoleSecurityChange;
+	context.argument = (void *) regEntry;
+	context.argSize  = 0;
+    
+	applyToInterestNotifiers(getServiceRoot(), gIOConsoleSecurityInterest, 
+				 &messageClientsApplier, &context );
+    }
+}
+
 IOReturn IOResources::setProperties( OSObject * properties )
 {
     IOReturn			err;
@@ -4152,15 +4329,17 @@ IOReturn IOResources::setProperties( OSObject * properties )
     if( 0 == iter)
 	return( kIOReturnBadArgument);
 
-    while( (key = OSDynamicCast(OSSymbol, iter->getNextObject()))) {
-
-	if (gIOConsoleUsersKey == key)
+    while( (key = OSDynamicCast(OSSymbol, iter->getNextObject())))
+    {
+	if (gIOConsoleUsersKey == key) do
 	{
-	    IORegistryEntry::getRegistryRoot()->setProperty(key, dict->getObject(key));
-	    OSIncrementAtomic( &gIOConsoleUsersSeed );
-	    publishResource( gIOConsoleUsersSeedKey, gIOConsoleUsersSeedValue );
-	    continue;
+	    OSArray * consoleUsers;
+	    consoleUsers = OSDynamicCast(OSArray, dict->getObject(key));
+	    if (!consoleUsers)
+		continue;
+	    IOService::updateConsoleUsers(consoleUsers, 0);
 	}
+	while (false);
 
 	publishResource( key, dict->getObject(key) );
     }
@@ -4461,7 +4640,7 @@ bool IOService::passiveMatch( OSDictionary * table, bool changesOK )
     } while( matchParent && (where = where->getProvider()) );
 
     if( kIOLogMatch & gIOKitDebug)
-        if( where != this)
+        if( where && (where != this) )
             LOG("match parent @ %s = %d\n",
                         where->getName(), match );
 
@@ -5174,22 +5353,3 @@ OSMetaClassDefineReservedUnused(IOService, 44);
 OSMetaClassDefineReservedUnused(IOService, 45);
 OSMetaClassDefineReservedUnused(IOService, 46);
 OSMetaClassDefineReservedUnused(IOService, 47);
-
-#ifdef __ppc__
-OSMetaClassDefineReservedUnused(IOService, 48);
-OSMetaClassDefineReservedUnused(IOService, 49);
-OSMetaClassDefineReservedUnused(IOService, 50);
-OSMetaClassDefineReservedUnused(IOService, 51);
-OSMetaClassDefineReservedUnused(IOService, 52);
-OSMetaClassDefineReservedUnused(IOService, 53);
-OSMetaClassDefineReservedUnused(IOService, 54);
-OSMetaClassDefineReservedUnused(IOService, 55);
-OSMetaClassDefineReservedUnused(IOService, 56);
-OSMetaClassDefineReservedUnused(IOService, 57);
-OSMetaClassDefineReservedUnused(IOService, 58);
-OSMetaClassDefineReservedUnused(IOService, 59);
-OSMetaClassDefineReservedUnused(IOService, 60);
-OSMetaClassDefineReservedUnused(IOService, 61);
-OSMetaClassDefineReservedUnused(IOService, 62);
-OSMetaClassDefineReservedUnused(IOService, 63);
-#endif
diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp
index fcecfbf00..bd7bcd002 100644
--- a/iokit/Kernel/IOServicePM.cpp
+++ b/iokit/Kernel/IOServicePM.cpp
@@ -26,14 +26,16 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-//#define IOASSERT 1
+//#undef IOASSERT
+//#define IOASSERT    1
+
 #include <IOKit/assert.h>
 #include <IOKit/IOKitDebug.h>
 #include <IOKit/IOLib.h>
 #include <IOKit/IOMessage.h>
 #include <IOKit/IOPlatformExpert.h>
 #include <IOKit/IOService.h>
-#include <IOKit/IOTimerEventSource.h>
+#include <IOKit/IOEventSource.h>
 #include <IOKit/IOWorkLoop.h>
 #include <IOKit/IOCommand.h>
 
@@ -42,8 +44,10 @@
 #include <IOKit/pwr_mgt/IOPMinformeeList.h>
 #include <IOKit/pwr_mgt/IOPowerConnection.h>
 #include <IOKit/pwr_mgt/RootDomain.h>
+#include <IOKit/pwr_mgt/IOPMPrivate.h>
 
 #include <sys/proc.h>
+#include <libkern/OSDebug.h>
 
 // Required for notification instrumentation
 #include "IOServicePrivate.h"
@@ -51,6 +55,7 @@
 #include "IOKitKernelInternal.h"
 
 static void settle_timer_expired(thread_call_param_t, thread_call_param_t);
+static void idle_timer_expired(thread_call_param_t, thread_call_param_t);
 static void tellKernelClientApplier(OSObject * object, void * arg);
 static void tellAppClientApplier(OSObject * object, void * arg);
 
@@ -69,15 +74,20 @@ static uint64_t computeTimeDeltaNS( const AbsoluteTime * start )
 OSDefineMetaClassAndStructors(IOPMprot, OSObject)
 #endif
 
-//*********************************************************************************
+// Container class for recording system power events
+OSDefineMetaClassAndStructors( PMEventDetails, OSObject );
+
+//******************************************************************************
 // Globals
-//*********************************************************************************
+//******************************************************************************
 
 static bool                  gIOPMInitialized   = false;
 static uint32_t              gIOPMBusyCount     = 0;
+static uint32_t              gIOPMWorkCount     = 0;
 static IOWorkLoop *          gIOPMWorkLoop      = 0;
 static IOPMRequestQueue *    gIOPMRequestQueue  = 0;
 static IOPMRequestQueue *    gIOPMReplyQueue    = 0;
+static IOPMWorkQueue *       gIOPMWorkQueue     = 0;
 static IOPMCompletionQueue * gIOPMFreeQueue     = 0;
 static IOPMRequest *         gIOPMRequest       = 0;
 static IOPlatformExpert *    gPlatform          = 0;
@@ -96,16 +106,31 @@ static uint32_t getPMRequestType( void )
     return type;
 }
 
-//*********************************************************************************
+//******************************************************************************
 // Macros
-//*********************************************************************************
+//******************************************************************************
 
 #define PM_ERROR(x...)              do { kprintf(x); IOLog(x); } while (false)
-#define PM_DEBUG(x...)              do { kprintf(x); } while (false)
-#define PM_TRACE(x...)              do {  \
-	if (kIOLogDebugPower & gIOKitDebug) kprintf(x); } while (false)
+#define PM_LOG(x...)                do { kprintf(x); } while (false)
+
+#define PM_LOG1(x...)               do {  \
+                                    if (kIOLogDebugPower & gIOKitDebug) \
+                                        kprintf(x); } while (false)
+
+#define PM_LOG2(x...)               do {  \
+                                    if (kIOLogDebugPower & gIOKitDebug) \
+                                        kprintf(x); } while (false)
 
-#define PM_CONNECT(x...)
+#if 0
+#define PM_LOG3(x...)               do { kprintf(x); } while (false)
+#else
+#define PM_LOG3(x...)
+#endif
+
+#define RD_LOG(x...)                do { \
+                                    if ((kIOLogPMRootDomain & gIOKitDebug) && \
+                                        (getPMRootDomain() == this)) \
+                                        kprintf("PMRD: " x); } while (false)
 
 #define PM_ASSERT_IN_GATE(x)          \
 do {                                  \
@@ -114,7 +139,7 @@ do {                                  \
 
 #define PM_LOCK()                   IOLockLock(fPMLock)
 #define PM_UNLOCK()                 IOLockUnlock(fPMLock)
-#define PM_LOCK_SLEEP(event)        IOLockSleep(fPMLock, event, THREAD_UNINT)
+#define PM_LOCK_SLEEP(event, dl)    IOLockSleepDeadline(fPMLock, event, dl, THREAD_UNINT)
 #define PM_LOCK_WAKEUP(event)       IOLockWakeup(fPMLock, event, false)
 
 #define ns_per_us                   1000
@@ -128,17 +153,16 @@ do {                                  \
     do { gPlatform->PMLog( fName, t, a, b); } while(0)
 
 #define NS_TO_MS(nsec)              ((int)((nsec) / 1000000ULL))
+#define NS_TO_US(nsec)              ((int)((nsec) / 1000ULL))
 
 #if CONFIG_EMBEDDED
 #define SUPPORT_IDLE_CANCEL         1
 #endif
 
-#define kNotifyWillChange           (true)
-#define kNotifyDidChange            (false)
-
 #define kIOPMPowerStateMax          0xFFFFFFFF  
 
-#define IS_PM_ROOT()                (this == gIOPMRootNode)
+#define IS_PM_ROOT                  (this == gIOPMRootNode)
+#define IS_ROOT_DOMAIN              (getPMRootDomain() == this)
 #define IS_POWER_DROP               (fHeadNotePowerState < fCurrentPowerState)
 #define IS_POWER_RISE               (fHeadNotePowerState > fCurrentPowerState)
 
@@ -149,41 +173,69 @@ do {                                  \
 // use message tracer to log messages longer than (ns):
 #define LOG_APP_RESPONSE_MSG_TRACER (3 * 1000ULL * 1000ULL * 1000ULL)
 
-#define RESERVE_DOMAIN_POWER        1
-
 enum {
     kReserveDomainPower = 1
 };
 
+#define MS_PUSH(n)  \
+    do { assert(kIOPM_BadMachineState == fSavedMachineState); \
+         assert(kIOPM_BadMachineState != n); \
+         fSavedMachineState = n; } while (false)
+
+#define MS_POP()    \
+    do { assert(kIOPM_BadMachineState != fSavedMachineState); \
+         fMachineState = fSavedMachineState; \
+         fSavedMachineState = kIOPM_BadMachineState; } while (false)
+
+#define PM_ACTION_0(a) \
+    do { if (fPMActions.a) { \
+         (fPMActions.a)(fPMActions.target, this, &fPMActions); } \
+         } while (false)
+
+#define PM_ACTION_2(a, x, y) \
+    do { if (fPMActions.a) { \
+         (fPMActions.a)(fPMActions.target, this, &fPMActions, x, y); } \
+         } while (false)
+
 //*********************************************************************************
 // PM machine states
+//
+// Check kgmacros after modifying machine states.
 //*********************************************************************************
 
 enum {
+    kIOPM_Finished                                      = 0,
+
     kIOPM_OurChangeTellClientsPowerDown                 = 1,
     kIOPM_OurChangeTellPriorityClientsPowerDown         = 2,
     kIOPM_OurChangeNotifyInterestedDriversWillChange    = 3,
     kIOPM_OurChangeSetPowerState                        = 4,
     kIOPM_OurChangeWaitForPowerSettle                   = 5,
     kIOPM_OurChangeNotifyInterestedDriversDidChange     = 6,
-    kIOPM_OurChangeFinish                               = 7,
-    kIOPM_ParentDownTellPriorityClientsPowerDown        = 8,
-    kIOPM_ParentDownNotifyInterestedDriversWillChange   = 9,
-    /* 10 not used */
-    kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange = 11,
-    kIOPM_ParentDownSetPowerState                       = 12,
-    kIOPM_ParentDownWaitForPowerSettle                  = 13,
-    kIOPM_ParentAcknowledgePowerChange                  = 14,
-    kIOPM_ParentUpSetPowerState                         = 15,
-    /* 16 not used */
-    kIOPM_ParentUpWaitForSettleTime                     = 17,
-    kIOPM_ParentUpNotifyInterestedDriversDidChange      = 18,
-    /* 19 not used */
-    kIOPM_Finished                                      = 20,
-    kIOPM_DriverThreadCallDone                          = 21,
-    kIOPM_NotifyChildrenDone                            = 22,
+    kIOPM_OurChangeTellCapabilityDidChange              = 7,
+    kIOPM_OurChangeFinish                               = 8,
+
+    kIOPM_ParentChangeTellPriorityClientsPowerDown      = 10,
+    kIOPM_ParentChangeNotifyInterestedDriversWillChange = 11,
+    kIOPM_ParentChangeSetPowerState                     = 12,
+    kIOPM_ParentChangeWaitForPowerSettle                = 13,
+    kIOPM_ParentChangeNotifyInterestedDriversDidChange  = 14,
+    kIOPM_ParentChangeTellCapabilityDidChange           = 15,
+    kIOPM_ParentChangeAcknowledgePowerChange            = 16,
+
+    kIOPM_NotifyChildrenStart                           = 17,
+    kIOPM_NotifyChildrenOrdered                         = 18,
+    kIOPM_NotifyChildrenDelayed                         = 19,
+    kIOPM_SyncTellClientsPowerDown                      = 20,
+    kIOPM_SyncTellPriorityClientsPowerDown              = 21,
+    kIOPM_SyncNotifyWillChange                          = 22,
     kIOPM_SyncNotifyDidChange                           = 23,
-    kIOPM_SyncFinish                                    = 24
+    kIOPM_SyncTellCapabilityDidChange                   = 24,
+    kIOPM_SyncFinish                                    = 25,
+    kIOPM_TellCapabilityChangeDone                      = 26,
+    kIOPM_DriverThreadCallDone                          = 27,
+
+    kIOPM_BadMachineState                               = 0xFFFFFFFF
 };
 
 
@@ -366,68 +418,95 @@ void IOService::PMinit ( void )
 		if ( !gIOPMInitialized )
 		{
             gPlatform = getPlatform();
-			gIOPMWorkLoop = IOWorkLoop::workLoop();
-			if (gIOPMWorkLoop)
-			{
-				gIOPMRequestQueue = IOPMRequestQueue::create(
-					this, OSMemberFunctionCast(IOPMRequestQueue::Action,
-						this, &IOService::servicePMRequestQueue));
+            gIOPMWorkLoop = IOWorkLoop::workLoop();
+            if (gIOPMWorkLoop)
+            {
+                gIOPMRequestQueue = IOPMRequestQueue::create(
+                    this, OSMemberFunctionCast(IOPMRequestQueue::Action,
+                        this, &IOService::servicePMRequestQueue));
+
+                gIOPMReplyQueue = IOPMRequestQueue::create(
+                    this, OSMemberFunctionCast(IOPMRequestQueue::Action,
+                        this, &IOService::servicePMReplyQueue));
+
+                gIOPMWorkQueue = IOPMWorkQueue::create(
+                    this,
+                    OSMemberFunctionCast(IOPMWorkQueue::Action, this,
+                        &IOService::servicePMRequest),
+                    OSMemberFunctionCast(IOPMWorkQueue::Action, this,
+                        &IOService::retirePMRequest));
+
+                gIOPMFreeQueue = IOPMCompletionQueue::create(
+                    this, OSMemberFunctionCast(IOPMCompletionQueue::Action,
+                        this, &IOService::servicePMFreeQueue));
+
+                if (gIOPMWorkLoop->addEventSource(gIOPMRequestQueue) !=
+                    kIOReturnSuccess)
+                {
+                    gIOPMRequestQueue->release();
+                    gIOPMRequestQueue = 0;
+                }
 
-				gIOPMReplyQueue = IOPMRequestQueue::create(
-					this, OSMemberFunctionCast(IOPMRequestQueue::Action,
-						this, &IOService::servicePMReplyQueue));
+                if (gIOPMWorkLoop->addEventSource(gIOPMReplyQueue) !=
+                    kIOReturnSuccess)
+                {
+                    gIOPMReplyQueue->release();
+                    gIOPMReplyQueue = 0;
+                }
+
+                if (gIOPMWorkLoop->addEventSource(gIOPMWorkQueue) !=
+                    kIOReturnSuccess)
+                {
+                    gIOPMWorkQueue->release();
+                    gIOPMWorkQueue = 0;
+                }
 
-				gIOPMFreeQueue = IOPMCompletionQueue::create(
-					this, OSMemberFunctionCast(IOPMCompletionQueue::Action,
-						this, &IOService::servicePMFreeQueue));
+                if (gIOPMWorkLoop->addEventSource(gIOPMFreeQueue) !=
+                    kIOReturnSuccess)
+                {
+                    gIOPMFreeQueue->release();
+                    gIOPMFreeQueue = 0;
+                }
 
-				if (gIOPMWorkLoop->addEventSource(gIOPMRequestQueue) !=
-					kIOReturnSuccess)
-				{
-					gIOPMRequestQueue->release();
-					gIOPMRequestQueue = 0;
-				}
+                gIOPMPowerClientDevice =
+                    OSSymbol::withCStringNoCopy( "DevicePowerState" );
 
-				if (gIOPMWorkLoop->addEventSource(gIOPMReplyQueue) !=
-					kIOReturnSuccess)
-				{
-					gIOPMReplyQueue->release();
-					gIOPMReplyQueue = 0;
-				}
+                gIOPMPowerClientDriver =
+                    OSSymbol::withCStringNoCopy( "DriverPowerState" );
 
-				if (gIOPMWorkLoop->addEventSource(gIOPMFreeQueue) !=
-					kIOReturnSuccess)
-				{
-					gIOPMFreeQueue->release();
-					gIOPMFreeQueue = 0;
-				}
+                gIOPMPowerClientChildProxy =
+                    OSSymbol::withCStringNoCopy( "ChildProxyPowerState" );
 
-                gIOPMPowerClientDevice     = OSSymbol::withCStringNoCopy( "DevicePowerState" );
-                gIOPMPowerClientDriver     = OSSymbol::withCStringNoCopy( "DriverPowerState" );
-                gIOPMPowerClientChildProxy = OSSymbol::withCStringNoCopy( "ChildProxyPowerState" );
-                gIOPMPowerClientChildren   = OSSymbol::withCStringNoCopy( "ChildrenPowerState" );
-			}
+                gIOPMPowerClientChildren =
+                    OSSymbol::withCStringNoCopy( "ChildrenPowerState" );
+            }
 
-			if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue)
-				gIOPMInitialized = true;
-		}
-		if (!gIOPMInitialized)
-			return;
+            if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue)
+                gIOPMInitialized = true;
+        }
+        if (!gIOPMInitialized)
+            return;
 
         pwrMgt = new IOServicePM;
         pwrMgt->init();
         setProperty(kPwrMgtKey, pwrMgt);
 
+        queue_init(&pwrMgt->WorkChain);
+        queue_init(&pwrMgt->RequestHead);
+        queue_init(&pwrMgt->PMDriverCallQueue);
+
+        fOwner                      = this;
         fPMLock                     = IOLockAlloc();
         fInterestedDrivers          = new IOPMinformeeList;
         fInterestedDrivers->initialize();
         fDesiredPowerState          = 0;
         fDeviceDesire               = 0;
-        fInitialChange              = true;
-        fPreviousRequest            = 0;
-        fDeviceOverrides            = false;
+        fInitialPowerChange         = true;
+        fInitialSetPowerState       = true;
+        fPreviousRequestPowerFlags  = 0;
+        fDeviceOverrideEnabled      = false;
         fMachineState               = kIOPM_Finished;
-        fIdleTimerEventSource       = NULL;
+        fSavedMachineState          = kIOPM_BadMachineState;
         fIdleTimerMinPowerState     = 0;
         fActivityLock               = IOLockAlloc();
         fStrictTreeOrder            = false;
@@ -437,13 +516,12 @@ void IOService::PMinit ( void )
         fNumberOfPowerStates        = 0;
         fCurrentPowerState          = 0;
         fParentsCurrentPowerFlags   = 0;
-        fMaxCapability              = 0;
+        fMaxPowerState              = 0;
         fName                       = getName();
         fParentsKnowState           = false;
         fSerialNumber               = 0;
         fResponseArray              = NULL;
         fNotifyClientArray          = NULL;
-        fDoNotPowerDown             = true;
         fCurrentPowerConsumption    = kIOPMUnknown;
         fOverrideMaxPowerState      = kIOPMPowerStateMax;
 
@@ -457,9 +535,20 @@ void IOService::PMinit ( void )
 			&IOService::ack_timer_expired, (thread_call_param_t)this);
         fSettleTimer = thread_call_allocate(
 			&settle_timer_expired, (thread_call_param_t)this);
-		fDriverCallEntry = thread_call_allocate(
+        fIdleTimer = thread_call_allocate(
+            &idle_timer_expired, (thread_call_param_t)this);
+        fDriverCallEntry = thread_call_allocate(
 			(thread_call_func_t) &IOService::pmDriverCallout, this);
-		assert(fDriverCallEntry);
+        assert(fDriverCallEntry);
+
+        // Check for powerChangeDone override.
+        if (OSMemberFunctionCast(void (*)(void),
+				getResourceService(), &IOService::powerChangeDone) !=
+			  OSMemberFunctionCast(void (*)(void),
+				this, &IOService::powerChangeDone))
+        {
+            fPCDFunctionOverride = true;
+        }
 
 #if PM_VARS_SUPPORT
         IOPMprot * prot = new IOPMprot;
@@ -472,7 +561,7 @@ void IOService::PMinit ( void )
             pm_vars = prot;
 		}
 #else
-        pm_vars = (void *) true;
+        pm_vars = (void *) (uintptr_t) true;
 #endif
 
         initialized = true;
@@ -487,22 +576,18 @@ void IOService::PMinit ( void )
 
 void IOService::PMfree ( void )
 {
-	initialized = false;
+    initialized = false;
     pm_vars = 0;
 
     if ( pwrMgt )
-	{
-		assert(fMachineState == kIOPM_Finished);
-		assert(fInsertInterestSet == NULL);
-		assert(fRemoveInterestSet == NULL);
+    {
+        assert(fMachineState == kIOPM_Finished);
+        assert(fInsertInterestSet == NULL);
+        assert(fRemoveInterestSet == NULL);
         assert(fNotifyChildArray  == NULL);
+        assert(queue_empty(&pwrMgt->RequestHead));
+        assert(queue_empty(&fPMDriverCallQueue));
 
-        if ( fIdleTimerEventSource != NULL ) {
-            fIdleTimerEventSource->disable();
-            gIOPMWorkLoop->removeEventSource(fIdleTimerEventSource);
-            fIdleTimerEventSource->release();
-            fIdleTimerEventSource = NULL;
-        }
         if ( fSettleTimer ) {
             thread_call_cancel(fSettleTimer);
             thread_call_free(fSettleTimer);
@@ -513,6 +598,11 @@ void IOService::PMfree ( void )
             thread_call_free(fAckTimer);
             fAckTimer = NULL;
         }
+        if ( fIdleTimer ) {
+            thread_call_cancel(fIdleTimer);
+            thread_call_free(fIdleTimer);
+            fIdleTimer = NULL;
+        }
         if ( fDriverCallEntry ) {
             thread_call_free(fDriverCallEntry);
             fDriverCallEntry = NULL;
@@ -525,20 +615,15 @@ void IOService::PMfree ( void )
             IOLockFree(fActivityLock);
             fActivityLock = NULL;
         }
-		if ( fInterestedDrivers ) {
-			fInterestedDrivers->release();
-			fInterestedDrivers = NULL;
-		}
-		if ( fPMWorkQueue ) {
-			gIOPMWorkLoop->removeEventSource(fPMWorkQueue);
-			fPMWorkQueue->release();
-			fPMWorkQueue = 0;
-		}
-		if (fDriverCallParamSlots && fDriverCallParamPtr) {
-			IODelete(fDriverCallParamPtr, DriverCallParam, fDriverCallParamSlots);
-			fDriverCallParamPtr = 0;
-			fDriverCallParamSlots = 0;
-		}
+        if ( fInterestedDrivers ) {
+            fInterestedDrivers->release();
+            fInterestedDrivers = NULL;
+        }
+        if (fDriverCallParamSlots && fDriverCallParamPtr) {
+            IODelete(fDriverCallParamPtr, DriverCallParam, fDriverCallParamSlots);
+            fDriverCallParamPtr = 0;
+            fDriverCallParamSlots = 0;
+        }
         if ( fResponseArray ) {
             fResponseArray->release();
             fResponseArray = NULL;
@@ -548,25 +633,25 @@ void IOService::PMfree ( void )
             fNotifyClientArray = NULL;
         }
         if (fPowerStates && fNumberOfPowerStates) {
-            IODelete(fPowerStates, IOPMPowerState, fNumberOfPowerStates);
+            IODelete(fPowerStates, IOPMPSEntry, fNumberOfPowerStates);
             fNumberOfPowerStates = 0;
             fPowerStates = NULL;
         }
-		if (fPowerClients) {
-			fPowerClients->release();
-			fPowerClients = 0;
-		}
+        if (fPowerClients) {
+            fPowerClients->release();
+            fPowerClients = 0;
+        }
 
 #if PM_VARS_SUPPORT
-		if (fPMVars)
-		{
-			fPMVars->release();
-			fPMVars = 0;
-		}
+        if (fPMVars)
+        {
+            fPMVars->release();
+            fPMVars = 0;
+        }
 #endif
 
         pwrMgt->release();
-		pwrMgt = 0;
+        pwrMgt = 0;
     }
 }
 
@@ -614,42 +699,35 @@ IOReturn IOService::youAreRoot ( void )
 
 void IOService::PMstop ( void )
 {
-	IOPMRequest * request;
+    IOPMRequest * request;
 
-	if (!initialized)
-		return;
+    if (!initialized)
+        return;
 
-	// Schedule an async PMstop request, but immediately stop any further
-	// calls to the controlling or interested drivers. This device will
-	// continue to exist in the power plane and participate in power state
-	// changes until the PMstop async request is processed.
+    PM_LOCK();
 
-	PM_LOCK();
-	fLockedFlags.PMStop = true;
-    if (fLockedFlags.DriverCallBusy)
-    {
-        PM_DEBUG("%s: PMstop() driver call busy\n", getName());
-    }
-    while (fThreadAssertionCount != 0)
+    if (fLockedFlags.PMStop)
     {
-        if (current_thread() == fThreadAssertionThread)
-        {
-            PM_ERROR("%s: PMstop() called from PM thread call\n", getName());
-            break;
-        }
-        // Wait for thread assertions to drop to zero.
-        PM_DEBUG("%s: PMstop() wait for %u thread assertion(s)\n",
-            getName(), fThreadAssertionCount);
-        PM_LOCK_SLEEP(&fThreadAssertionCount);
+        PM_LOG2("%s: PMstop() already stopped\n", fName);
+        PM_UNLOCK();
+        return;
     }
+
+    // Inhibit future driver calls.
+    fLockedFlags.PMStop = true;
+
+    // Wait for all prior driver calls to finish.
+    waitForPMDriverCall();
+
     PM_UNLOCK();
 
-	request = acquirePMRequest( this, kIOPMRequestTypePMStop );
-	if (request)
-	{
-		PM_TRACE("%s: %p PMstop\n", getName(), this);
-		submitPMRequest( request );
-	}
+    // The rest of the work is performed async.
+    request = acquirePMRequest( this, kIOPMRequestTypePMStop );
+    if (request)
+    {
+        PM_LOG2("%s: %p PMstop\n", getName(), this);
+        submitPMRequest( request );
+    }
 }
 
 //*********************************************************************************
@@ -660,14 +738,14 @@ void IOService::PMstop ( void )
 
 void IOService::handlePMstop ( IOPMRequest * request )
 {
-    OSIterator *		iter;
+    OSIterator *        iter;
     OSObject *			next;
     IOPowerConnection *	connection;
     IOService *			theChild;
     IOService *			theParent;
 
 	PM_ASSERT_IN_GATE();
-	PM_TRACE("%s: %p %s start\n", getName(), this, __FUNCTION__);
+	PM_LOG2("%s: %p %s start\n", getName(), this, __FUNCTION__);
 
     // remove the property
     removeProperty(kPwrMgtKey);			
@@ -729,23 +807,23 @@ void IOService::handlePMstop ( IOPMRequest * request )
 
     if ( fInterestedDrivers )
     {
-		IOPMinformeeList *	list = fInterestedDrivers;
+        IOPMinformeeList *	list = fInterestedDrivers;
         IOPMinformee *		item;
 
-		PM_LOCK();
-		while ((item = list->firstInList()))
-		{
-			list->removeFromList(item->whatObject);
-		}
-		PM_UNLOCK();
-	}
+        PM_LOCK();
+        while ((item = list->firstInList()))
+        {
+            list->removeFromList(item->whatObject);
+        }
+        PM_UNLOCK();
+    }
 
-	// Tell idleTimerExpired() to ignore idle timer.
-	fIdleTimerPeriod = 0;
-    if (fIdleTimerEventSource)
-        fIdleTimerEventSource->disable();
+    // Tell idleTimerExpired() to ignore idle timer.
+    fIdleTimerPeriod = 0;
+    if (fIdleTimer && thread_call_cancel(fIdleTimer))
+        release();
 
-	PM_TRACE("%s: %p %s done\n", getName(), this, __FUNCTION__);
+    PM_LOG2("%s: %p %s done\n", getName(), this, __FUNCTION__);
 }
 
 //*********************************************************************************
@@ -791,7 +869,7 @@ IOReturn IOService::addPowerChild ( IOService * child )
 		}
 		if (!ok)
 		{
-			PM_DEBUG("%s: %s (%p) is already a child\n",
+			PM_LOG("%s: %s (%p) is already a child\n",
 				getName(), child->getName(), child);
 			break;
 		}
@@ -876,7 +954,7 @@ void IOService::addPowerChild1 ( IOPMRequest * request )
 		tempDesire = fNumberOfPowerStates - 1;
 	}
 
-	if (tempDesire && (IS_PM_ROOT() || (fMaxCapability >= tempDesire)))
+	if (tempDesire && (IS_PM_ROOT || (fMaxPowerState >= tempDesire)))
 	{
 		adjustPowerState(tempDesire);
 	}
@@ -903,7 +981,7 @@ void IOService::addPowerChild2 ( IOPMRequest * request )
 
 	if (!parent || !inPlane(gIOPowerPlane))
 	{
-		PM_DEBUG("%s: addPowerChild2 not in power plane\n", getName());
+		PM_LOG("%s: addPowerChild2 not in power plane\n", getName());
 		return;
 	}
 
@@ -914,7 +992,7 @@ void IOService::addPowerChild2 ( IOPMRequest * request )
 	powerState = parent->fCurrentPowerState;
 
 	if (knowsState)
-		powerFlags = parent->fPowerStates[powerState].outputPowerCharacter;
+		powerFlags = parent->fPowerStates[powerState].outputPowerFlags;
 	else
 		powerFlags = 0;
 
@@ -928,16 +1006,14 @@ void IOService::addPowerChild2 ( IOPMRequest * request )
 
     if ( fControllingDriver && fParentsKnowState )
     {
-        fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags);
+        fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags);
         // initially change into the state we are already in
         tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags);
-        fPreviousRequest = 0xffffffff;
+        fPreviousRequestPowerFlags = (IOPMPowerFlags)(-1);
         adjustPowerState(tempDesire);
     }
 
-#if ROOT_DOMAIN_RUN_STATES
-    getPMRootDomain()->tagPowerPlaneService(this, &fRootDomainState);
-#endif
+    getPMRootDomain()->tagPowerPlaneService(this, &fPMActions);
 }
 
 //*********************************************************************************
@@ -960,7 +1036,7 @@ void IOService::addPowerChild3 ( IOPMRequest * request )
 	{
 		if (child->getProperty("IOPMStrictTreeOrder"))
 		{
-			PM_DEBUG("%s: strict PM order enforced\n", getName());
+			PM_LOG1("%s: strict PM order enforced\n", getName());
 			fStrictTreeOrder = true;
 		}
 
@@ -969,7 +1045,7 @@ void IOService::addPowerChild3 ( IOPMRequest * request )
 	}
 	else
 	{
-		PM_DEBUG("%s: addPowerChild3 not in power plane\n", getName());
+		PM_LOG("%s: addPowerChild3 not in power plane\n", getName());
 	}
 
 	connection->release();
@@ -1031,6 +1107,10 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub )
 			if ( fHeadNotePendingAcks == 0 )
 			{
 				stop_ack_timer();
+
+				// Request unblocked, work queue
+				// should re-scan all busy requests.
+				gIOPMWorkQueue->incrementProducerCount();
 			}
 		}
 	}
@@ -1065,8 +1145,8 @@ IOReturn IOService::registerPowerDriver (
 	IOPMPowerState *	powerStates,
 	unsigned long		numberOfStates )
 {
-	IOPMRequest *	 request;
-	IOPMPowerState * powerStatesCopy = 0;
+	IOPMRequest *   request;
+	IOPMPSEntry *   powerStatesCopy = 0;
 
     if (!initialized)
 		return IOPMNotYetInitialized;
@@ -1092,12 +1172,19 @@ IOReturn IOService::registerPowerDriver (
 
 	do {
 		// Make a copy of the supplied power state array.
-		powerStatesCopy = IONew(IOPMPowerState, numberOfStates);
+		powerStatesCopy = IONew(IOPMPSEntry, numberOfStates);
 		if (!powerStatesCopy)
 			break;
 
-		bcopy( powerStates, powerStatesCopy,
-			sizeof(IOPMPowerState) * numberOfStates );
+        for (uint32_t i = 0; i < numberOfStates; i++)
+        {
+            powerStatesCopy[i].capabilityFlags  = powerStates[i].capabilityFlags;
+            powerStatesCopy[i].outputPowerFlags = powerStates[i].outputPowerCharacter;
+            powerStatesCopy[i].inputPowerFlags  = powerStates[i].inputPowerRequirement;
+            powerStatesCopy[i].staticPower      = powerStates[i].staticPower;
+            powerStatesCopy[i].settleUpTime     = powerStates[i].settleUpTime;
+            powerStatesCopy[i].settleDownTime   = powerStates[i].settleDownTime;
+        }
 
 		request = acquirePMRequest( this, kIOPMRequestTypeRegisterPowerDriver );
 		if (!request)
@@ -1114,7 +1201,7 @@ IOReturn IOService::registerPowerDriver (
 	while (false);
 
 	if (powerStatesCopy)
-		IODelete(powerStatesCopy, IOPMPowerState, numberOfStates);
+		IODelete(powerStatesCopy, IOPMPSEntry, numberOfStates);
 	return kIOReturnNoMemory;
 }
 
@@ -1124,12 +1211,12 @@ IOReturn IOService::registerPowerDriver (
 
 void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
 {
-	IOService *			powerDriver    = (IOService *)      request->fArg0;
-	IOPMPowerState *	powerStates    = (IOPMPowerState *) request->fArg1;
-	unsigned long		numberOfStates = (unsigned long)    request->fArg2;
-    unsigned long		i;
-	IOService *			root;
-	OSIterator *		iter;
+	IOService *     powerDriver    = (IOService *)   request->fArg0;
+	IOPMPSEntry *   powerStates    = (IOPMPSEntry *) request->fArg1;
+	unsigned long   numberOfStates = (unsigned long) request->fArg2;
+    unsigned long   i;
+	IOService *     root;
+	OSIterator *    iter;
 
 	PM_ASSERT_IN_GATE();
 	assert(powerStates);
@@ -1140,7 +1227,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
     {
 		OUR_PMLog(kPMLogControllingDriver,
 			(unsigned long) numberOfStates,
-			(unsigned long) powerStates[0].version);
+			(unsigned long) kIOPMPowerStateVersion1);
 
         fPowerStates            = powerStates;
 		fNumberOfPowerStates    = numberOfStates;
@@ -1150,7 +1237,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
 		// make a mask of all the character bits we know about
 		fOutputPowerCharacterFlags = 0;
 		for ( i = 0; i < numberOfStates; i++ ) {
-			fOutputPowerCharacterFlags |= fPowerStates[i].outputPowerCharacter;
+			fOutputPowerCharacterFlags |= fPowerStates[i].outputPowerFlags;
 		}
 
 		// Register powerDriver as interested, unless already done.
@@ -1201,7 +1288,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
 		if ( inPlane(gIOPowerPlane) && fParentsKnowState )
 		{
 			unsigned long tempDesire;
-			fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags);
+			fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags);
 			// initially change into the state we are already in
 			tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags);
 			adjustPowerState(tempDesire);
@@ -1210,7 +1297,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
 	else
 	{
 		OUR_PMLog(kPMLogControllingDriverErr2, numberOfStates, 0);
-        IODelete(powerStates, IOPMPowerState, numberOfStates);
+        IODelete(powerStates, IOPMPSEntry, numberOfStates);
 	}
 
 	powerDriver->release();
@@ -1227,29 +1314,33 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request )
 
 IOPMPowerFlags IOService::registerInterestedDriver ( IOService * driver )
 {
-	IOPMRequest *	request;
-	bool			signal;
+    IOPMRequest *	request;
+    bool			signal;
 
-	if (!initialized || !fInterestedDrivers)
-		return IOPMNotPowerManaged;
+    if (!driver || !initialized || !fInterestedDrivers)
+        return 0;
 
-	PM_LOCK();
-	signal = (!fInsertInterestSet && !fRemoveInterestSet);
-	if (fInsertInterestSet == NULL)
-		fInsertInterestSet = OSSet::withCapacity(4);
-	if (fInsertInterestSet)
-		fInsertInterestSet->setObject(driver);
-	PM_UNLOCK();
+    PM_LOCK();
+    signal = (!fInsertInterestSet && !fRemoveInterestSet);
+    if (fInsertInterestSet == NULL)
+        fInsertInterestSet = OSSet::withCapacity(4);
+    if (fInsertInterestSet)
+    {
+        fInsertInterestSet->setObject(driver);
+        if (fRemoveInterestSet)
+            fRemoveInterestSet->removeObject(driver);
+    }
+    PM_UNLOCK();
 
-	if (signal)
-	{
-		request = acquirePMRequest( this, kIOPMRequestTypeInterestChanged );
-		if (request)
-			submitPMRequest( request );
-	}
+    if (signal)
+    {
+        request = acquirePMRequest( this, kIOPMRequestTypeInterestChanged );
+        if (request)
+            submitPMRequest( request );
+    }
 
-	// This return value cannot be trusted, but return a value
-	// for those clients that care.
+    // This return value cannot be trusted, but return a value
+    // for those clients that care.
 
     OUR_PMLog(kPMLogInterestedDriver, kIOPMDeviceUsable, 2);
     return kIOPMDeviceUsable;	
@@ -1261,41 +1352,44 @@ IOPMPowerFlags IOService::registerInterestedDriver ( IOService * driver )
 
 IOReturn IOService::deRegisterInterestedDriver ( IOService * driver )
 {
-	IOPMinformeeList *	list;
+    IOPMinformeeList *	list;
     IOPMinformee *		item;
-	IOPMRequest *		request;
-	bool				signal;
+    IOPMRequest *       request;
+    bool                signal;
 
-	if (!initialized || !fInterestedDrivers)
-		return IOPMNotPowerManaged;
+    if (!driver)
+        return kIOReturnBadArgument;
+    if (!initialized || !fInterestedDrivers)
+        return IOPMNotPowerManaged;
 
-	PM_LOCK();
-	signal = (!fRemoveInterestSet && !fInsertInterestSet);
-	if (fRemoveInterestSet == NULL)
-		fRemoveInterestSet = OSSet::withCapacity(4);
-	if (fRemoveInterestSet)
-	{
-		fRemoveInterestSet->setObject(driver);
+    PM_LOCK();
+    signal = (!fRemoveInterestSet && !fInsertInterestSet);
+    if (fRemoveInterestSet == NULL)
+        fRemoveInterestSet = OSSet::withCapacity(4);
+    if (fRemoveInterestSet)
+    {
+        fRemoveInterestSet->setObject(driver);
+        if (fInsertInterestSet)
+            fInsertInterestSet->removeObject(driver);
 
-		list = fInterestedDrivers;
-		item = list->findItem(driver);
-		if (item && item->active)
-		{
-			item->active = false;
-		}
-		if (fLockedFlags.DriverCallBusy)
-            PM_DEBUG("%s::deRegisterInterestedDriver() driver call busy\n", getName());
-	}
-	PM_UNLOCK();
+        list = fInterestedDrivers;
+        item = list->findItem(driver);
+        if (item && item->active)
+        {
+            item->active = false;
+            waitForPMDriverCall( driver );
+        }
+    }
+    PM_UNLOCK();
 
-	if (signal)
-	{
-		request = acquirePMRequest( this, kIOPMRequestTypeInterestChanged );
-		if (request)
-			submitPMRequest( request );
-	}
+    if (signal)
+    {
+        request = acquirePMRequest( this, kIOPMRequestTypeInterestChanged );
+        if (request)
+            submitPMRequest( request );
+    }
 
-	return IOPMNoErr;
+    return IOPMNoErr;
 }
 
 //*********************************************************************************
@@ -1306,49 +1400,48 @@ IOReturn IOService::deRegisterInterestedDriver ( IOService * driver )
 
 void IOService::handleInterestChanged( IOPMRequest * request )
 {
-	IOService *			driver;
+    IOService *			driver;
     IOPMinformee *		informee;
-	IOPMinformeeList *	list = fInterestedDrivers;
+    IOPMinformeeList *	list = fInterestedDrivers;
 
-	PM_LOCK();
+    PM_LOCK();
 
-	if (fInsertInterestSet)
-	{
-		while ((driver = (IOService *) fInsertInterestSet->getAnyObject()))
-		{
-			if ((list->findItem(driver) == NULL) &&
-				(!fRemoveInterestSet ||
-				 !fRemoveInterestSet->containsObject(driver)))
-			{
-				informee = list->appendNewInformee(driver);
-			}
-			fInsertInterestSet->removeObject(driver);
-		}
-		fInsertInterestSet->release();
-		fInsertInterestSet = 0;
-	}
+    if (fInsertInterestSet)
+    {
+        while ((driver = (IOService *) fInsertInterestSet->getAnyObject()))
+        {
+            if (list->findItem(driver) == NULL)
+            {
+                informee = list->appendNewInformee(driver);
+            }
+            fInsertInterestSet->removeObject(driver);
+        }
+        fInsertInterestSet->release();
+        fInsertInterestSet = 0;
+    }
 
-	if (fRemoveInterestSet)
-	{
-		while ((driver = (IOService *) fRemoveInterestSet->getAnyObject()))
-		{
-			informee = list->findItem(driver);
-			if (informee)
-			{
-				if (fHeadNotePendingAcks && informee->timer)
-				{
-					informee->timer = 0;
-					fHeadNotePendingAcks--;
-				}
-				list->removeFromList(driver);
-			}
-			fRemoveInterestSet->removeObject(driver);
-		}
-		fRemoveInterestSet->release();
-		fRemoveInterestSet = 0;
-	}
+    if (fRemoveInterestSet)
+    {
+        while ((driver = (IOService *) fRemoveInterestSet->getAnyObject()))
+        {
+            informee = list->findItem(driver);
+            if (informee)
+            {
+                // Clean-up async interest acknowledgement
+                if (fHeadNotePendingAcks && informee->timer)
+                {
+                    informee->timer = 0;
+                    fHeadNotePendingAcks--;
+                }
+                list->removeFromList(driver);
+            }
+            fRemoveInterestSet->removeObject(driver);
+        }
+        fRemoveInterestSet->release();
+        fRemoveInterestSet = 0;
+    }
 
-	PM_UNLOCK();
+    PM_UNLOCK();
 }
 
 //*********************************************************************************
@@ -1432,11 +1525,25 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request )
                 {
                     uint64_t nsec = computeTimeDeltaNS(&informee->startTime);
                     if (nsec > LOG_SETPOWER_TIMES)
-                        PM_DEBUG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) async took %d ms\n",
+                        PM_LOG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) async took %d ms\n",
                             informee->whatObject->getName(),
                             (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did",
                             informee->whatObject,
-                            fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec));
+                            fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_US(nsec));
+                    
+                    uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) 
+                                            ? kIOPMEventTypePSWillChangeTo
+                                             : kIOPMEventTypePSDidChangeTo;
+
+                    PMEventDetails *details = PMEventDetails::eventDetails(
+                                                logType,
+                                                fName,
+                                                (uintptr_t)this,
+                                                informee->whatObject->getName(),
+                                                0, 0, 0,
+                                                NS_TO_MS(nsec));
+
+                    getPMRootDomain()->recordAndReleasePMEventGated( details );
                 }
 #endif
                 // mark it acked
@@ -1523,8 +1630,17 @@ void IOService::adjustPowerState ( uint32_t clamp )
 	computeDesiredState(clamp);
 	if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane))
 	{
+        IOPMPowerChangeFlags changeFlags = kIOPMSelfInitiated;
+
+        // Indicate that children desires were ignored, and do not ask
+        // apps for permission to drop power. This is used by root domain
+        // for demand sleep.
+
+        if (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride)
+            changeFlags |= (kIOPMIgnoreChildren | kIOPMSkipAskPowerDown);
+
 		startPowerChange(
-			 /* flags        */	kIOPMWeInitiated,
+			 /* flags        */	changeFlags,
 			 /* power state  */	fDesiredPowerState,
 			 /* domain flags */	0,
 			 /* connection   */	0,
@@ -1536,9 +1652,11 @@ void IOService::adjustPowerState ( uint32_t clamp )
 // [public] synchronizePowerTree
 //*********************************************************************************
 
-IOReturn IOService::synchronizePowerTree ( void )
+IOReturn IOService::synchronizePowerTree (
+    IOOptionBits    options,
+    IOService *     notifyRoot )
 {
-	IOPMRequest *   request_c;
+	IOPMRequest *   request_c = 0;
     IOPMRequest *   request_s;
 
     if (this != getPMRootDomain())
@@ -1546,15 +1664,30 @@ IOReturn IOService::synchronizePowerTree ( void )
 	if (!initialized)
 		return kIOPMNotYetInitialized;
 
-    request_c = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
-    request_s = acquirePMRequest( this, kIOPMRequestTypeSynchronizePowerTree );
+    if (notifyRoot)
+    {
+        IOPMRequest * nr;
 
-    if (!request_c || !request_s)
-        goto error_no_memory;
+        // Cancels don't need to be synchronized.
+        nr = acquirePMRequest(notifyRoot, kIOPMRequestTypeChildNotifyDelayCancel);
+        if (nr) submitPMRequest(nr);        
+        nr = acquirePMRequest(getPMRootDomain(), kIOPMRequestTypeChildNotifyDelayCancel);
+        if (nr) submitPMRequest(nr);
+    }
 
-    request_c->attachNextRequest( request_s );
+    request_s = acquirePMRequest( this, kIOPMRequestTypeSynchronizePowerTree );
+    if (!request_s)
+        goto error_no_memory;
 
-    submitPMRequest(request_c);
+    if (options & kIOPMSyncCancelPowerDown)
+        request_c = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+    if (request_c)
+    {
+        request_c->attachNextRequest( request_s );
+        submitPMRequest(request_c);
+    }
+    
+    request_s->fArg0 = (void *)(uintptr_t) options;
     submitPMRequest(request_s);
 
     return kIOReturnSuccess;
@@ -1569,14 +1702,17 @@ error_no_memory:
 // [private] handleSynchronizePowerTree
 //*********************************************************************************
 
-void IOService::handleSynchronizePowerTree ( IOPMRequest * /*request*/ )
+void IOService::handleSynchronizePowerTree ( IOPMRequest * request )
 {
 	PM_ASSERT_IN_GATE();
 	if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane) &&
         (fCurrentPowerState == fNumberOfPowerStates - 1))
 	{
+        IOOptionBits options = (uintptr_t) request->fArg0;
+
 		startPowerChange(
-			 /* flags        */	kIOPMWeInitiated | kIOPMSynchronize,
+			 /* flags        */	kIOPMSelfInitiated | kIOPMSynchronize |
+                                (options & kIOPMSyncNoChildNotify),
 			 /* power state  */	fCurrentPowerState,
 			 /* domain flags */	0,
 			 /* connection   */	0,
@@ -1610,24 +1746,24 @@ IOReturn IOService::powerDomainWillChangeTo (
 
 void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request )
 {
-	IOPMPowerFlags		parentPowerFlags = (IOPMPowerFlags) request->fArg0;
-	IOPowerConnection *	whichParent = (IOPowerConnection *) request->fArg1;
-    unsigned long       parentChangeFlags = (unsigned long) request->fArg2;
-    OSIterator *		iter;
-    OSObject *			next;
-    IOPowerConnection *	connection;
-    unsigned long		newPowerState;
-    unsigned long       myChangeFlags;
-    IOPMPowerFlags		combinedPowerFlags;
-	bool				savedParentsKnowState;
-	IOReturn			result = IOPMAckImplied;
+	IOPMPowerFlags		 parentPowerFlags = (IOPMPowerFlags) request->fArg0;
+	IOPowerConnection *	 whichParent = (IOPowerConnection *) request->fArg1;
+    IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2;
+    IOPMPowerChangeFlags myChangeFlags;
+    OSIterator *		 iter;
+    OSObject *			 next;
+    IOPowerConnection *	 connection;
+    IOPMPowerStateIndex  newPowerState;
+    IOPMPowerFlags		 combinedPowerFlags;
+	bool				 savedParentsKnowState;
+	IOReturn			 result = IOPMAckImplied;
 
 	PM_ASSERT_IN_GATE();
     OUR_PMLog(kPMLogWillChange, parentPowerFlags, 0);
 
 	if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck())
 	{
-		PM_DEBUG("%s::%s not in power tree\n", getName(), __FUNCTION__);
+		PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__);
         goto exit_no_ack;
 	}
 
@@ -1656,7 +1792,7 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request )
     // If our initial change has yet to occur, then defer the power change
     // until after the power domain has completed its power transition.
 
-    if ( fControllingDriver && !fInitialChange )
+    if ( fControllingDriver && !fInitialPowerChange )
     {
 		newPowerState = fControllingDriver->maxCapabilityForDomainState(
 							combinedPowerFlags);
@@ -1729,21 +1865,21 @@ IOReturn IOService::powerDomainDidChangeTo (
 
 void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
 {
-	IOPMPowerFlags		parentPowerFlags = (IOPMPowerFlags) request->fArg0;
-	IOPowerConnection *	whichParent = (IOPowerConnection *) request->fArg1;
-    unsigned long       parentChangeFlags = (unsigned long) request->fArg2;
-    unsigned long		newPowerState;
-    unsigned long       myChangeFlags;
-    unsigned long       initialDesire;
-	bool				savedParentsKnowState;
-	IOReturn			result = IOPMAckImplied;
+	IOPMPowerFlags		 parentPowerFlags = (IOPMPowerFlags) request->fArg0;
+	IOPowerConnection *	 whichParent = (IOPowerConnection *) request->fArg1;
+    IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2;
+    IOPMPowerChangeFlags myChangeFlags;
+    IOPMPowerStateIndex  newPowerState;
+    IOPMPowerStateIndex  initialDesire;
+	bool				 savedParentsKnowState;
+	IOReturn			 result = IOPMAckImplied;
 
 	PM_ASSERT_IN_GATE();
     OUR_PMLog(kPMLogDidChange, parentPowerFlags, 0);
 
 	if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck())
 	{
-		PM_DEBUG("%s::%s not in power tree\n", getName(), __FUNCTION__);
+		PM_LOG("%s::%s not in power tree\n", getName(), __FUNCTION__);
         goto exit_no_ack;
 	}
 
@@ -1756,7 +1892,7 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
 		newPowerState = fControllingDriver->maxCapabilityForDomainState(
 							fParentsCurrentPowerFlags);
 
-        if (fInitialChange)
+        if (fInitialPowerChange)
         {
             initialDesire = fControllingDriver->initialPowerStateForDomainState(
                             fParentsCurrentPowerFlags);
@@ -1796,7 +1932,7 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request )
 
 	if (!savedParentsKnowState && fParentsKnowState)
 	{
-		PM_TRACE("%s::powerDomainDidChangeTo parentsKnowState = true\n",
+		PM_LOG1("%s::powerDomainDidChangeTo parentsKnowState = true\n",
 			getName());
 		requestDomainPower( fDesiredPowerState );
 	}
@@ -1889,7 +2025,7 @@ void IOService::rebuildChildClampBits ( void )
             {
 				if (connection->getReadyFlag() == false)
 				{
-					PM_CONNECT("[%s] %s: connection not ready\n",
+					PM_LOG3("[%s] %s: connection not ready\n",
 						getName(), __FUNCTION__);
 					continue;
 				}
@@ -1919,7 +2055,7 @@ IOReturn IOService::requestPowerDomainState(
     IOPowerConnection * childConnection,
     unsigned long		specification )
 {
-    unsigned long       ps;
+    IOPMPowerStateIndex ps;
 	IOPMPowerFlags		outputPowerFlags;
     IOService *         child;
 	IOPMRequest *       subRequest;
@@ -1931,7 +2067,7 @@ IOReturn IOService::requestPowerDomainState(
 
 	if (gIOPMWorkLoop->onThread() == false)
 	{
-		PM_DEBUG("%s::requestPowerDomainState\n", getName());
+		PM_LOG("%s::requestPowerDomainState\n", getName());
 		return kIOReturnSuccess;
 	}
 
@@ -1941,7 +2077,7 @@ IOReturn IOService::requestPowerDomainState(
 		return kIOReturnNotAttached;
 
     if (!fControllingDriver || !fNumberOfPowerStates)
-        return IOPMNotYetInitialized;
+        return kIOReturnNotReady;
 
 	child = (IOService *) childConnection->getChildEntry(gIOPowerPlane);
 	assert(child);
@@ -1953,10 +2089,10 @@ IOReturn IOService::requestPowerDomainState(
     // Merge in the power flags contributed by this power parent
     // at its current or impending power state. 
 
-    outputPowerFlags = fPowerStates[fCurrentPowerState].outputPowerCharacter;
+    outputPowerFlags = fPowerStates[fCurrentPowerState].outputPowerFlags;
 	if (fMachineState != kIOPM_Finished)
 	{
-		if (IS_POWER_DROP && (getPMRootDomain() != this))
+		if (IS_POWER_DROP && !IS_ROOT_DOMAIN)
 		{
 			// Use the lower power state when dropping power. 
 			// Must be careful since a power drop can be canceled
@@ -1967,7 +2103,7 @@ IOReturn IOService::requestPowerDomainState(
 			// The child must not wait for this parent to raise power
 			// if the power drop was cancelled. The solution is to cancel
 			// the power drop if possible, then schedule an adjustment to
-			// re-evaluate our correct power state.
+			// re-evaluate the parent's power state.
 			//
 			// Root domain is excluded to avoid idle sleep issues. And permit
 			// root domain children to pop up when system is going to sleep.
@@ -1977,14 +2113,14 @@ IOReturn IOService::requestPowerDomainState(
 			{
 				fDoNotPowerDown = true;     // cancel power drop
 				adjustPower     = true;     // schedule an adjustment
-				PM_TRACE("%s: power drop cancelled in state %u by %s\n",
+				PM_LOG1("%s: power drop cancelled in state %u by %s\n",
 					getName(), fMachineState, child->getName());
 			}
 			else
 			{
 				// Beyond cancellation point, report the impending state.
 				outputPowerFlags =
-					fPowerStates[fHeadNotePowerState].outputPowerCharacter;
+					fPowerStates[fHeadNotePowerState].outputPowerFlags;
 			}
 		}
 		else if (IS_POWER_RISE)
@@ -2006,7 +2142,7 @@ IOReturn IOService::requestPowerDomainState(
 
     for (ps = 0; ps < fNumberOfPowerStates; ps++)
     {
-        if ((fPowerStates[ps].outputPowerCharacter & childRequestPowerFlags) ==
+        if ((fPowerStates[ps].outputPowerFlags & childRequestPowerFlags) ==
             (fOutputPowerCharacterFlags & childRequestPowerFlags))
             break;
     }
@@ -2028,7 +2164,7 @@ IOReturn IOService::requestPowerDomainState(
 #if ENABLE_DEBUG_LOGS
     if (adjustPower)
     {
-        PM_DEBUG("requestPowerDomainState[%s]: %s, init %d, %u->%u\n",
+        PM_LOG("requestPowerDomainState[%s]: %s, init %d, %u->%u\n",
             getName(), child->getName(),
             !childConnection->childHasRequestedPower(),
             (uint32_t) childConnection->getDesiredDomainState(),
@@ -2049,7 +2185,7 @@ IOReturn IOService::requestPowerDomainState(
 	// adjust power state. Submit a request if one wasn't pending,
 	// or if the current request is part of a call tree.
 
-    if (adjustPower && !fDeviceOverrides &&
+    if (adjustPower && !fDeviceOverrideEnabled &&
         (!fAdjustPowerScheduled || gIOPMRequest->getRootRequest()))
     {
 		subRequest = acquirePMRequest(
@@ -2185,8 +2321,8 @@ IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal )
 	{
 		fTempClampPowerState = max(fTempClampPowerState, ordinal);
 		fTempClampCount++;
-        fOverrideMaxPowerState = ordinal;
-		request->fArg2 = (void *) true;
+		fOverrideMaxPowerState = ordinal;
+		request->fArg2 = (void *) (uintptr_t) true;
 	}
 
 	submitPMRequest( request );
@@ -2228,7 +2364,7 @@ IOReturn IOService::requestPowerState (
 	{
 		fTempClampPowerState = max(fTempClampPowerState, state);
 		fTempClampCount++;
-		request->fArg2 = (void *) true;
+		request->fArg2 = (void *) (uintptr_t) true;
 	}
 
 	submitPMRequest( request );
@@ -2255,8 +2391,8 @@ void IOService::handleRequestPowerState ( IOPMRequest * request )
 	if (fNumberOfPowerStates && (state >= fNumberOfPowerStates))
 		state = fNumberOfPowerStates - 1;
 
-    // Override from changePowerStateWithOverrideTo() persists until
-    // the next "device" power request, such as changePowerStateToPriv().
+    // The power suppression due to changePowerStateWithOverrideTo() expires
+    // upon the next "device" power request - changePowerStateToPriv().
 
     if ((getPMRequestType() != kIOPMRequestTypeRequestPowerStateOverride) &&
         (client == gIOPMPowerClientDevice))
@@ -2317,6 +2453,77 @@ uint32_t IOService::getPowerStateForClient( const OSSymbol * client )
     return powerState;
 }
 
+//*********************************************************************************
+// [protected] powerOverrideOnPriv
+//*********************************************************************************
+
+IOReturn IOService::powerOverrideOnPriv ( void )
+{
+	IOPMRequest * request;
+
+    if (!initialized)
+		return IOPMNotYetInitialized;
+
+	if (gIOPMWorkLoop->inGate())
+	{
+		fDeviceOverrideEnabled = true;
+		return IOPMNoErr;
+	}
+
+	request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOnPriv );
+	if (!request)
+		return kIOReturnNoMemory;
+
+	submitPMRequest( request );
+    return IOPMNoErr;
+}
+
+//*********************************************************************************
+// [protected] powerOverrideOffPriv
+//*********************************************************************************
+
+IOReturn IOService::powerOverrideOffPriv ( void )
+{
+	IOPMRequest * request;
+
+    if (!initialized)
+		return IOPMNotYetInitialized;
+
+	if (gIOPMWorkLoop->inGate())
+	{
+		fDeviceOverrideEnabled = false;
+		return IOPMNoErr;
+	}
+
+	request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOffPriv );
+	if (!request)
+		return kIOReturnNoMemory;
+
+	submitPMRequest( request );
+    return IOPMNoErr;
+}
+
+//*********************************************************************************
+// [private] handlePowerOverrideChanged
+//*********************************************************************************
+
+void IOService::handlePowerOverrideChanged ( IOPMRequest * request )
+{
+	PM_ASSERT_IN_GATE();
+	if (request->getType() == kIOPMRequestTypePowerOverrideOnPriv)
+	{
+		OUR_PMLog(kPMLogOverrideOn, 0, 0);
+		fDeviceOverrideEnabled = true;
+    }
+	else
+	{
+		OUR_PMLog(kPMLogOverrideOff, 0, 0);
+		fDeviceOverrideEnabled = false;
+	}
+
+	adjustPowerState();
+}
+
 //*********************************************************************************
 // [private] computeDesiredState
 //*********************************************************************************
@@ -2335,7 +2542,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
 	if (!fNumberOfPowerStates)
 	{
         fDesiredPowerState = 0;
-		//PM_DEBUG("%s::%s no controlling driver\n", getName(), __FUNCTION__);
+		//PM_LOG("%s::%s no controlling driver\n", getName(), __FUNCTION__);
 		return;
 	}
 
@@ -2350,7 +2557,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
             {
                 if (connection->getReadyFlag() == false)
                 {
-                    PM_CONNECT("[%s] %s: connection not ready\n",
+                    PM_LOG3("[%s] %s: connection not ready\n",
                         getName(), __FUNCTION__);
                     continue;
                 }
@@ -2376,7 +2583,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
         while ((client = (const OSSymbol *) iter->getNextObject()))
         {
 			// Ignore child and driver when override is in effect.
-            if ((fDeviceOverrides ||
+            if ((fDeviceOverrideEnabled ||
                 (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride)) &&
                 ((client == gIOPMPowerClientChildren) ||
                  (client == gIOPMPowerClientDriver)))
@@ -2388,7 +2595,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
 
             desiredState = getPowerStateForClient(client);
             assert(desiredState < fNumberOfPowerStates);			
-			PM_TRACE("  %u %s\n",
+			PM_LOG1("  %u %s\n",
 				desiredState, client->getCStringNoCopy());
 
             newPowerState = max(newPowerState, desiredState);
@@ -2415,7 +2622,7 @@ void IOService::computeDesiredState ( unsigned long localClamp )
 
     fDesiredPowerState = newPowerState;
 
-    PM_TRACE("  temp %u, clamp %u, current %u, new %u\n",
+    PM_LOG1("  temp %u, clamp %u, current %u, new %u\n",
         (uint32_t) localClamp, (uint32_t) fTempClampPowerState,
 		(uint32_t) fCurrentPowerState, newPowerState);
 
@@ -2466,12 +2673,92 @@ IOWorkLoop * IOService::getPMworkloop ( void )
 	return gIOPMWorkLoop;
 }
 
+#if NOT_YET
+
 //*********************************************************************************
-// [public] activityTickle
-//
-// The tickle with parameter kIOPMSuperclassPolicy1 causes the activity
-// flag to be set, and the device state checked.  If the device has been
-// powered down, it is powered up again.
+// Power Parent/Children Applier
+//*********************************************************************************
+
+static void
+applyToPowerChildren( 
+    IOService *               service,
+    IOServiceApplierFunction  applier,
+    void *                    context,
+    IOOptionBits              options )
+{
+	PM_ASSERT_IN_GATE();
+
+    IORegistryEntry *       entry;
+    IORegistryIterator *    iter;
+    IOPowerConnection *     connection;
+    IOService *             child;
+
+    iter = IORegistryIterator::iterateOver(service, gIOPowerPlane, options);
+    if (iter)
+    {
+        while ((entry = iter->getNextObject()))
+        {
+            // Get child of IOPowerConnection objects
+            if ((connection = OSDynamicCast(IOPowerConnection, entry)))
+            {
+                child = (IOService *) connection->copyChildEntry(gIOPowerPlane);
+                if (child)
+                {
+                    (*applier)(child, context);
+                    child->release();
+                }
+            }
+        }
+        iter->release();
+    }
+}
+
+static void
+applyToPowerParent( 
+    IOService *               service,
+    IOServiceApplierFunction  applier,
+    void *                    context,
+    IOOptionBits              options )
+{
+	PM_ASSERT_IN_GATE();
+
+    IORegistryEntry *       entry;
+    IORegistryIterator *    iter;
+    IOPowerConnection *     connection;
+    IOService *             parent;
+
+    iter = IORegistryIterator::iterateOver(service, gIOPowerPlane,
+            options | kIORegistryIterateParents);
+    if (iter)
+    {
+        while ((entry = iter->getNextObject()))
+        {
+            // Get child of IOPowerConnection objects
+            if ((connection = OSDynamicCast(IOPowerConnection, entry)))
+            {
+                parent = (IOService *) connection->copyParentEntry(gIOPowerPlane);
+                if (parent)
+                {
+                    (*applier)(parent, context);
+                    parent->release();
+                }
+            }
+        }
+        iter->release();
+    }
+}
+
+#endif /* NOT_YET */
+
+// MARK: -
+// MARK: Activity Tickle & Idle Timer
+
+//*********************************************************************************
+// [public] activityTickle
+//
+// The tickle with parameter kIOPMSuperclassPolicy1 causes the activity
+// flag to be set, and the device state checked.  If the device has been
+// powered down, it is powered up again.
 // The tickle with parameter kIOPMSubclassPolicy is ignored here and
 // should be intercepted by a subclass.
 //*********************************************************************************
@@ -2487,14 +2774,11 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
 
 		// Record device activity for the idle timer handler.
 
-        fDeviceActive = true;
+        fDeviceWasActive = true;
         fActivityTickleCount++;
         clock_get_uptime(&fDeviceActiveTimestamp);
 
-#if ROOT_DOMAIN_RUN_STATES
-        getPMRootDomain()->handleActivityTickleForService(this, type,
-                    fCurrentPowerState, fActivityTickleCount);
-#endif
+        PM_ACTION_0(actionActivityTickle);
 
 		// Record the last tickle power state.
 		// This helps to filter out redundant tickles as
@@ -2509,7 +2793,7 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber )
 			if (request)
 			{
 				request->fArg0 = (void *) stateNumber;	// power state
-				request->fArg1 = (void *) true;			// power rise
+				request->fArg1 = (void *) (uintptr_t) true;	// power rise
 				submitPMRequest(request);
 			}
 		}
@@ -2558,21 +2842,19 @@ void IOService::handleActivityTickle ( IOPMRequest * request )
 	}
 }
 
-//*********************************************************************************
+//******************************************************************************
 // [public] setIdleTimerPeriod
 //
-// A subclass policy-maker is going to use our standard idleness
-// detection service.  Make a command queue and an idle timer and
-// connect them to the power management workloop.  Finally,
-// start the timer.
-//*********************************************************************************
+// A subclass policy-maker is using our standard idleness detection service.
+// Start the idle timer. Period is in seconds.
+//******************************************************************************
 
 IOReturn IOService::setIdleTimerPeriod ( unsigned long period )
 {
     if (!initialized)
 		return IOPMNotYetInitialized;
 
-    OUR_PMLog(kPMLogSetIdleTimerPeriod, period, 0);
+    OUR_PMLog(kPMLogSetIdleTimerPeriod, period, fIdleTimerPeriod);
 
     IOPMRequest * request =
         acquirePMRequest( this, kIOPMRequestTypeSetIdleTimerPeriod );
@@ -2582,7 +2864,7 @@ IOReturn IOService::setIdleTimerPeriod ( unsigned long period )
     request->fArg0 = (void *) period;
     submitPMRequest( request );
 
-    return IOPMNoErr;
+    return kIOReturnSuccess;
 }
 
 //******************************************************************************
@@ -2597,10 +2879,10 @@ SInt32 IOService::nextIdleTimeout(
     AbsoluteTime lastActivity, 
     unsigned int powerState)
 {
-    AbsoluteTime                        delta;
-    UInt64                              delta_ns;
-    SInt32                              delta_secs;
-    SInt32                              delay_secs;
+    AbsoluteTime        delta;
+    UInt64              delta_ns;
+    SInt32              delta_secs;
+    SInt32              delay_secs;
 
     // Calculate time difference using funky macro from clock.h.
     delta = currentTime;
@@ -2619,26 +2901,25 @@ SInt32 IOService::nextIdleTimeout(
     return (SInt32)delay_secs;
 }
 
-//******************************************************************************
+//*********************************************************************************
 // [public] start_PM_idle_timer
-//
-// The parameter is a pointer to us.  Use it to call our timeout method.
-//******************************************************************************
+//*********************************************************************************
 
 void IOService::start_PM_idle_timer ( void )
 {
-    static const int                    maxTimeout = 100000;
-    static const int                    minTimeout = 1;
-    AbsoluteTime                        uptime;
-    SInt32                              idle_in = 0;
+    static const int    maxTimeout = 100000;
+    static const int    minTimeout = 1;
+    AbsoluteTime        uptime, deadline;
+    SInt32              idle_in = 0;
+	boolean_t           pending;
 
-	if (!initialized || !fIdleTimerPeriod || !fIdleTimerEventSource)
+	if (!initialized || !fIdleTimerPeriod)
 		return;
 
     IOLockLock(fActivityLock);
 
     clock_get_uptime(&uptime);
-    
+
     // Subclasses may modify idle sleep algorithm
     idle_in = nextIdleTimeout(uptime, fDeviceActiveTimestamp, fCurrentPowerState);
 
@@ -2655,18 +2936,41 @@ void IOService::start_PM_idle_timer ( void )
 
     IOLockUnlock(fActivityLock);
 
-	fIdleTimerEventSource->setTimeout(idle_in, NSEC_PER_SEC);
+    retain();
+    clock_interval_to_absolutetime_interval(idle_in, kSecondScale, &deadline);
+    ADD_ABSOLUTETIME(&deadline, &uptime);
+    pending = thread_call_enter_delayed(fIdleTimer, deadline);
+    if (pending) release();
+}
+
+//*********************************************************************************
+// idle_timer_expired
+//*********************************************************************************
+
+static void
+idle_timer_expired (
+    thread_call_param_t arg0, thread_call_param_t arg1 )
+{
+	IOService * me = (IOService *) arg0;
+
+	if (gIOPMWorkLoop)
+		gIOPMWorkLoop->runAction(
+            OSMemberFunctionCast(IOWorkLoop::Action, me,
+                &IOService::idleTimerExpired),
+            me);
+
+	me->release();
 }
 
 //*********************************************************************************
 // [private] idleTimerExpired
 //
-// The idle timer has expired.  If there has been activity since the last
+// The idle timer has expired. If there has been activity since the last
 // expiration, just restart the timer and return.  If there has not been
 // activity, switch to the next lower power state and restart the timer.
 //*********************************************************************************
 
-void IOService::idleTimerExpired( IOTimerEventSource * )
+void IOService::idleTimerExpired( void )
 {
 	IOPMRequest *	request;
 	bool			restartTimer = true;
@@ -2678,10 +2982,10 @@ void IOService::idleTimerExpired( IOTimerEventSource * )
 
 	// Check for device activity (tickles) over last timer period.
 
-	if (fDeviceActive)
+	if (fDeviceWasActive)
 	{
 		// Device was active - do not drop power, restart timer.
-		fDeviceActive = false;
+		fDeviceWasActive = false;
 	}
 	else
 	{
@@ -2699,7 +3003,7 @@ void IOService::idleTimerExpired( IOTimerEventSource * )
 		if (request)
 		{
 			request->fArg0 = (void *) 0;		// power state (irrelevant)
-			request->fArg1 = (void *) false;	// power drop
+			request->fArg1 = (void *) (uintptr_t) false;	// power drop
 			submitPMRequest( request );
 
 			// Do not restart timer until after the tickle request has been
@@ -2798,7 +3102,7 @@ IOReturn IOService::systemWake ( void )
             {
 				if (connection->getReadyFlag() == false)
 				{
-					PM_CONNECT("[%s] %s: connection not ready\n",
+					PM_LOG3("[%s] %s: connection not ready\n",
 						getName(), __FUNCTION__);
 					continue;
 				}
@@ -2836,7 +3140,7 @@ IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone )
     
     OUR_PMLog(kPMLogCriticalTemp, 0, 0);
 
-    if ( inPlane(gIOPowerPlane) && !IS_PM_ROOT() )
+    if ( inPlane(gIOPowerPlane) && !IS_PM_ROOT )
     {
         theNub = (IOService *)copyParentEntry(gIOPowerPlane);
         if ( theNub )
@@ -2854,87 +3158,21 @@ IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone )
 }
 #endif /* !__LP64__ */
 
-//*********************************************************************************
-// [protected] powerOverrideOnPriv
-//*********************************************************************************
-
-IOReturn IOService::powerOverrideOnPriv ( void )
-{
-	IOPMRequest * request;
-
-    if (!initialized)
-		return IOPMNotYetInitialized;
-
-	if (gIOPMWorkLoop->inGate())
-	{
-		fDeviceOverrides = true;
-		return IOPMNoErr;
-	}
-
-	request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOnPriv );
-	if (!request)
-		return kIOReturnNoMemory;
-
-	submitPMRequest( request );
-    return IOPMNoErr;
-}
-
-//*********************************************************************************
-// [protected] powerOverrideOffPriv
-//*********************************************************************************
-
-IOReturn IOService::powerOverrideOffPriv ( void )
-{
-	IOPMRequest * request;
-
-    if (!initialized)
-		return IOPMNotYetInitialized;
-
-	if (gIOPMWorkLoop->inGate())
-	{
-		fDeviceOverrides = false;
-		return IOPMNoErr;
-	}
-
-	request = acquirePMRequest( this, kIOPMRequestTypePowerOverrideOffPriv );
-	if (!request)
-		return kIOReturnNoMemory;
-
-	submitPMRequest( request );
-    return IOPMNoErr;
-}
-
-//*********************************************************************************
-// [private] handlePowerOverrideChanged
-//*********************************************************************************
-
-void IOService::handlePowerOverrideChanged ( IOPMRequest * request )
-{
-	PM_ASSERT_IN_GATE();
-	if (request->getType() == kIOPMRequestTypePowerOverrideOnPriv)
-	{
-		OUR_PMLog(kPMLogOverrideOn, 0, 0);
-		fDeviceOverrides = true;
-    }
-	else
-	{
-		OUR_PMLog(kPMLogOverrideOff, 0, 0);
-		fDeviceOverrides = false;
-	}
-
-	adjustPowerState();
-}
+// MARK: -
+// MARK: Power Change (Common)
 
 //*********************************************************************************
 // [private] startPowerChange
+//
+// All power state changes starts here.
 //*********************************************************************************
 
-IOReturn IOService::startPowerChange (
-    unsigned long		changeFlags,
-    unsigned long		powerState,
-    unsigned long		domainFlags,
-    IOPowerConnection *	parentConnection,
-    unsigned long		parentFlags )
+IOReturn IOService::startPowerChange(
+    IOPMPowerChangeFlags    changeFlags,
+    IOPMPowerStateIndex     powerState,
+    IOPMPowerFlags          domainFlags,
+    IOPowerConnection *     parentConnection,
+    IOPMPowerFlags          parentFlags )
 {
 	PM_ASSERT_IN_GATE();
 	assert( fMachineState == kIOPM_Finished );
@@ -2943,32 +3181,17 @@ IOReturn IOService::startPowerChange (
     if (powerState >= fNumberOfPowerStates)
         return IOPMAckImplied;
 
-#if ROOT_DOMAIN_RUN_STATES
-    // Root domain can override chosen power state to a lower state.
-    getPMRootDomain()->overridePowerStateForService(
-                        this, &fRootDomainState,
-                        &powerState, changeFlags);
-#endif
-
-    // Invalidate the last recorded tickle power state when a power transition
-    // is about to occur, and not as a result of a tickle request.
+    fIsPreChange = true;
+    PM_ACTION_2(actionPowerChangeOverride, &powerState, &changeFlags);
 
-    if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) &&
-        (fActivityTicklePowerState != -1))
-    {
-        IOLockLock(fActivityLock);
-        fActivityTicklePowerState = -1;
-        IOLockUnlock(fActivityLock);
-    }
-
-	// Initialize the change note.
+	// Forks to either Driver or Parent initiated power change paths.
 
-    fHeadNoteFlags            = changeFlags;
+    fHeadNoteChangeFlags      = changeFlags;
     fHeadNotePowerState       = powerState;
 	fHeadNotePowerArrayEntry  = &fPowerStates[ powerState ];
 	fHeadNoteParentConnection = NULL;
 
-	if (changeFlags & kIOPMWeInitiated)
+	if (changeFlags & kIOPMSelfInitiated)
 	{
         if (changeFlags & kIOPMSynchronize)
             OurSyncStart();
@@ -2992,70 +3215,68 @@ IOReturn IOService::startPowerChange (
 
 bool IOService::notifyInterestedDrivers ( void )
 {
-	IOPMinformee *		informee;
-	IOPMinformeeList *	list = fInterestedDrivers;
-	DriverCallParam *	param;
-	IOItemCount			count;
+    IOPMinformee *		informee;
+    IOPMinformeeList *	list = fInterestedDrivers;
+    DriverCallParam *	param;
+    IOItemCount			count;
 
-	PM_ASSERT_IN_GATE();
-	assert( fDriverCallParamCount == 0 );
-	assert( fHeadNotePendingAcks == 0 );
+    PM_ASSERT_IN_GATE();
+    assert( fDriverCallParamCount == 0 );
+    assert( fHeadNotePendingAcks == 0 );
 
     fHeadNotePendingAcks = 0;
 
-	count = list->numberOfItems();
-	if (!count)
-		goto done;	// no interested drivers
+    count = list->numberOfItems();
+    if (!count)
+        goto done;	// no interested drivers
 
-	// Allocate an array of interested drivers and their return values
-	// for the callout thread. Everything else is still "owned" by the
-	// PM work loop, which can run to process acknowledgePowerChange()
-	// responses.
+    // Allocate an array of interested drivers and their return values
+    // for the callout thread. Everything else is still "owned" by the
+    // PM work loop, which can run to process acknowledgePowerChange()
+    // responses.
 
-	param = (DriverCallParam *) fDriverCallParamPtr;
-	if (count > fDriverCallParamSlots)
-	{
-		if (fDriverCallParamSlots)
-		{
-			assert(fDriverCallParamPtr);
-			IODelete(fDriverCallParamPtr, DriverCallParam, fDriverCallParamSlots);
-			fDriverCallParamPtr = 0;
-			fDriverCallParamSlots = 0;
-		}
+    param = (DriverCallParam *) fDriverCallParamPtr;
+    if (count > fDriverCallParamSlots)
+    {
+        if (fDriverCallParamSlots)
+        {
+            assert(fDriverCallParamPtr);
+            IODelete(fDriverCallParamPtr, DriverCallParam, fDriverCallParamSlots);
+            fDriverCallParamPtr = 0;
+            fDriverCallParamSlots = 0;
+        }
 
-		param = IONew(DriverCallParam, count);
-		if (!param)
-			goto done;	// no memory
+        param = IONew(DriverCallParam, count);
+        if (!param)
+            goto done;	// no memory
 
-		fDriverCallParamPtr   = (void *) param;
-		fDriverCallParamSlots = count;
-	}
+        fDriverCallParamPtr   = (void *) param;
+        fDriverCallParamSlots = count;
+    }
 
-	informee = list->firstInList();
-	assert(informee);
-	for (IOItemCount i = 0; i < count; i++)
-	{
-		informee->timer = -1;
-		param[i].Target = informee;
-		informee->retain();
+    informee = list->firstInList();
+    assert(informee);
+    for (IOItemCount i = 0; i < count; i++)
+    {
+        informee->timer = -1;
+        param[i].Target = informee;
+        informee->retain();
         informee = list->nextInList( informee );
-	}
-
-	fDriverCallParamCount = count;
-	fHeadNotePendingAcks = count;
+    }
 
-	// Machine state will be blocked pending callout thread completion.
+    fDriverCallParamCount = count;
+    fHeadNotePendingAcks  = count;
 
-	PM_LOCK();
-	assert( fLockedFlags.DriverCallBusy == false );
-	fLockedFlags.DriverCallBusy = true;
-	PM_UNLOCK();
-	thread_call_enter( fDriverCallEntry );
-	return true;
+    // Block state machine and wait for callout completion.
+    assert(!fDriverCallBusy);
+    fDriverCallBusy = true;
+    thread_call_enter( fDriverCallEntry );
+    return true;
 
 done:
-	// no interested drivers or did not schedule callout thread due to error.
-	return false;
+    // Return false if there are no interested drivers or could not schedule
+    // callout thread due to error.
+    return false;
 }
 
 //*********************************************************************************
@@ -3064,18 +3285,18 @@ done:
 
 void IOService::notifyInterestedDriversDone ( void )
 {
-	IOPMinformee *		informee;
-	IOItemCount			count;
-	DriverCallParam *	param;
-	IOReturn			result;
+    IOPMinformee *		informee;
+    IOItemCount			count;
+    DriverCallParam *   param;
+    IOReturn            result;
 
 	PM_ASSERT_IN_GATE();
+	assert( fDriverCallBusy == false );
+	assert( fMachineState == kIOPM_DriverThreadCallDone );
+
 	param = (DriverCallParam *) fDriverCallParamPtr;
 	count = fDriverCallParamCount;
 
-	assert( fLockedFlags.DriverCallBusy == false );
-	assert( fMachineState == kIOPM_DriverThreadCallDone );
-
 	if (param && count)
 	{
 		for (IOItemCount i = 0; i < count; i++, param++)
@@ -3128,10 +3349,23 @@ void IOService::notifyInterestedDriversDone ( void )
 		}
 	}
 
-	// Hop back to original machine state path (from notifyAll)
-	fMachineState = fNextMachineState;
+    MS_POP();  // pushed by notifyAll()
+
+    // If interest acks are outstanding, wait for fHeadNotePendingAcks to become
+    // zero before notifying children. This enforces the children after interest
+    // ordering even for async interest clients.
 
-	notifyChildren();
+    if (!fHeadNotePendingAcks)
+    {
+        notifyChildren();
+    }
+    else
+    {
+        MS_PUSH(fMachineState);
+        fMachineState = kIOPM_NotifyChildrenStart;
+        PM_LOG2("%s: %u outstanding async interest\n",
+            getName(), fHeadNotePendingAcks);
+    }
 }
 
 //*********************************************************************************
@@ -3144,6 +3378,17 @@ void IOService::notifyChildren ( void )
     OSObject *			next;
     IOPowerConnection *	connection;
 	OSArray *			children = 0;
+    IOPMrootDomain *    rootDomain;
+    bool                delayNotify = false;
+    
+    if ((fHeadNotePowerState != fCurrentPowerState) &&
+        (IS_POWER_DROP == fIsPreChange) &&
+        ((rootDomain = getPMRootDomain()) == this))
+    {
+        rootDomain->tracePoint( IS_POWER_DROP ?
+            kIOPMTracePointSleepPowerPlaneDrivers :
+            kIOPMTracePointWakePowerPlaneDrivers  );
+    }
 
 	if (fStrictTreeOrder)
 		children = OSArray::withCapacity(8);
@@ -3160,49 +3405,78 @@ void IOService::notifyChildren ( void )
             {
 				if (connection->getReadyFlag() == false)
 				{
-					PM_CONNECT("[%s] %s: connection not ready\n",
+					PM_LOG3("[%s] %s: connection not ready\n",
 						getName(), __FUNCTION__);
 					continue;
 				}
 
-				if (children)
+                // Mechanism to postpone the did-change notification to
+                // certain power children to order those children last.
+                // Cannot be used together with strict tree ordering.
+
+                if (!fIsPreChange &&
+                    (connection->delayChildNotification) &&
+                    getPMRootDomain()->shouldDelayChildNotification(this))
+                {
+                    if (!children)
+                    {
+                        children = OSArray::withCapacity(8);
+                        if (children)
+                            delayNotify = true;
+                    }
+                    if (delayNotify)
+                    {
+                        children->setObject( connection );
+                        continue;
+                    }
+                }
+
+				if (!delayNotify && children)
 					children->setObject( connection );
 				else
-					notifyChild( connection,
-						fDriverCallReason == kDriverCallInformPreChange );
+					notifyChild( connection );
 			}
         }
         iter->release();
     }
 
+    if (children && (children->getCount() == 0))
+    {
+        children->release();
+        children = 0;
+    }
 	if (children)
 	{
-		if (children->getCount() == 0)
-		{
-			children->release();
-			children = 0;
-		}
-		else
-		{
-			assert(fNotifyChildArray == 0);
-			fNotifyChildArray = children;
-			fNextMachineState = fMachineState;
-			fMachineState     = kIOPM_NotifyChildrenDone;
-		}		
+        assert(fNotifyChildArray == 0);
+        fNotifyChildArray = children;        
+        MS_PUSH(fMachineState);
+
+        if (delayNotify)
+        {
+            // Wait for exiting child notifications to complete,
+            // before notifying the children in the array.
+            fMachineState = kIOPM_NotifyChildrenDelayed;
+            PM_LOG2("%s: %d children in delayed array\n",
+                getName(), children->getCount());
+        }
+        else
+        {
+            // Notify children in the array one at a time.
+            fMachineState = kIOPM_NotifyChildrenOrdered;
+        }
 	}
 }
 
 //*********************************************************************************
-// [private] notifyChildrenDone
+// [private] notifyChildrenOrdered
 //*********************************************************************************
 
-void IOService::notifyChildrenDone ( void )
+void IOService::notifyChildrenOrdered ( void )
 {
 	PM_ASSERT_IN_GATE();
 	assert(fNotifyChildArray);
-	assert(fMachineState == kIOPM_NotifyChildrenDone);
+	assert(fMachineState == kIOPM_NotifyChildrenOrdered);
 
-	// Interested drivers have all acked (if any), ack timer stopped.
 	// Notify one child, wait for it to ack, then repeat for next child.
 	// This is a workaround for some drivers with multiple instances at
 	// the same branch in the power tree, but the driver is slow to power
@@ -3217,28 +3491,61 @@ void IOService::notifyChildrenDone ( void )
 		IOPowerConnection *	connection;
 		connection = (IOPowerConnection *) fNotifyChildArray->getObject(0);
 		fNotifyChildArray->removeObject(0);
-		notifyChild( connection, fDriverCallReason == kDriverCallInformPreChange );
+		notifyChild( connection );
 	}
 	else
 	{
 		fNotifyChildArray->release();
 		fNotifyChildArray = 0;
-		fMachineState = fNextMachineState;
+
+        MS_POP();   // pushed by notifyChildren()
 	}
 }
 
+//*********************************************************************************
+// [private] notifyChildrenDelayed
+//*********************************************************************************
+
+void IOService::notifyChildrenDelayed ( void )
+{
+    IOPowerConnection *	connection;
+
+	PM_ASSERT_IN_GATE();
+	assert(fNotifyChildArray);
+	assert(fMachineState == kIOPM_NotifyChildrenDelayed);
+
+    // Wait after all non-delayed children and interested drivers have ack'ed,
+    // then notify all delayed children. When explicitly cancelled, interest
+    // acks (and ack timer) may still be outstanding.
+
+    for (int i = 0; ; i++)
+    {
+		connection = (IOPowerConnection *) fNotifyChildArray->getObject(i);
+        if (!connection)
+            break;
+
+		notifyChild( connection );
+    }
+
+    PM_LOG2("%s: notified delayed children\n", getName());
+    fNotifyChildArray->release();
+    fNotifyChildArray = 0;
+    
+    MS_POP();   // pushed by notifyChildren()
+}
+
 //*********************************************************************************
 // [private] notifyAll
 //*********************************************************************************
 
-IOReturn IOService::notifyAll ( int nextMachineState, bool is_prechange )
+IOReturn IOService::notifyAll ( uint32_t nextMS )
 {
 	// Save the next machine_state to be restored by notifyInterestedDriversDone()
 
 	PM_ASSERT_IN_GATE();
-	fNextMachineState = nextMachineState;
+    MS_PUSH(nextMS);
 	fMachineState     = kIOPM_DriverThreadCallDone;
-	fDriverCallReason = is_prechange ?
+	fDriverCallReason = fIsPreChange ?
 						kDriverCallInformPreChange : kDriverCallInformPostChange;
 
 	if (!notifyInterestedDrivers())
@@ -3258,16 +3565,15 @@ IOReturn IOService::actionDriverCalloutDone (
 	void * arg0, void * arg1,
 	void * arg2, void * arg3 )
 {
-	IOServicePM * pwrMgt = (IOServicePM *) arg0;
+    IOServicePM * pwrMgt = (IOServicePM *) arg0;
 
-	PM_LOCK();
-	fLockedFlags.DriverCallBusy = false;
-	PM_UNLOCK();
+    assert( fDriverCallBusy );
+    fDriverCallBusy = false;
 
-	if (gIOPMReplyQueue)
-		gIOPMReplyQueue->signalWorkAvailable();
+    assert(gIOPMWorkQueue);
+    gIOPMWorkQueue->signalWorkAvailable();
 
-	return kIOReturnSuccess;
+    return kIOReturnSuccess;
 }
 
 void IOService::pmDriverCallout ( IOService * from )
@@ -3302,27 +3608,35 @@ void IOService::pmDriverCallout ( IOService * from )
 
 void IOService::driverSetPowerState ( void )
 {
-	IOService *			driver;
-	unsigned long		powerState;
-	DriverCallParam *	param;
-	IOReturn			result;
+    IOPMPowerStateIndex powerState;
+    DriverCallParam *	param;
+    IOPMDriverCallEntry callEntry;
     AbsoluteTime        end;
+    IOReturn            result;
+    uint32_t            oldPowerState = getPowerState();
 
-    assert( fLockedFlags.DriverCallBusy == true );
-	param = (DriverCallParam *) fDriverCallParamPtr;
-	assert( param );
-	assert( fDriverCallParamCount == 1 );
+    assert( fDriverCallBusy );
+    assert( fDriverCallParamPtr );
+    assert( fDriverCallParamCount == 1 );
 
-	driver = fControllingDriver;
-	powerState = fHeadNotePowerState;
+    param = (DriverCallParam *) fDriverCallParamPtr;
+    powerState = fHeadNotePowerState;
 
-	if (fLockedFlags.PMStop == false)
-	{
-		OUR_PMLog(          kPMLogProgramHardware, (uintptr_t) this, powerState);
+    if (assertPMDriverCall(&callEntry))
+    {
+        OUR_PMLog(          kPMLogProgramHardware, (uintptr_t) this, powerState);
         clock_get_uptime(&fDriverCallStartTime);
-		result = driver->setPowerState( powerState, this );
+        result = fControllingDriver->setPowerState( powerState, this );
         clock_get_uptime(&end);
-		OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result);
+        OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result);
+
+        deassertPMDriverCall(&callEntry);
+
+        if (result < 0)
+        {
+            PM_LOG("%s::setPowerState(%p, %lu -> %lu) returned 0x%x\n",
+                fName, this, fCurrentPowerState, powerState, result);
+        }
 
 #if LOG_SETPOWER_TIMES
         if ((result == IOPMAckImplied) || (result < 0))
@@ -3332,15 +3646,27 @@ void IOService::driverSetPowerState ( void )
             SUB_ABSOLUTETIME(&end, &fDriverCallStartTime);
             absolutetime_to_nanoseconds(end, &nsec);
             if (nsec > LOG_SETPOWER_TIMES)
-                PM_DEBUG("%s::setPowerState(%p, %lu -> %lu) took %d ms\n",
+                PM_LOG("%s::setPowerState(%p, %lu -> %lu) took %d ms\n",
                     fName, this, fCurrentPowerState, powerState, NS_TO_MS(nsec));
+
+            PMEventDetails *details = PMEventDetails::eventDetails(
+                                        kIOPMEventTypeSetPowerStateImmediate, // type
+                                        fName,								  // who
+                                        (uintptr_t)this,					  // owner unique
+                                        NULL,								  // interest name
+                                        (uint8_t)oldPowerState,				  // old
+                                        (uint8_t)powerState,				  // new
+                                        0,									  // result
+                                        NS_TO_US(nsec));					  // usec completion time
+
+            getPMRootDomain()->recordAndReleasePMEventGated( details );
         }
 #endif
-	}
-	else
-		result = kIOPMAckImplied;
+    }
+    else
+        result = kIOPMAckImplied;
 
-	param->Result = result;
+    param->Result = result;
 }
 
 //*********************************************************************************
@@ -3351,46 +3677,51 @@ void IOService::driverSetPowerState ( void )
 
 void IOService::driverInformPowerChange ( void )
 {
-	IOItemCount			count;
-	IOPMinformee *		informee;
-	IOService *			driver;
-	IOReturn			result;
-	IOPMPowerFlags		powerFlags;
-	unsigned long		powerState;
-	DriverCallParam *	param;
+    IOPMinformee *		informee;
+    IOService *			driver;
+    DriverCallParam *	param;
+    IOPMDriverCallEntry callEntry;
+    IOPMPowerFlags		powerFlags;
+    IOPMPowerStateIndex powerState;
     AbsoluteTime        end;
+    IOReturn            result;
+    IOItemCount			count;
 
-    assert( fLockedFlags.DriverCallBusy == true );
-	param = (DriverCallParam *) fDriverCallParamPtr;
-	count = fDriverCallParamCount;
-	assert( count && param );
+    assert( fDriverCallBusy );
+    assert( fDriverCallParamPtr );
+    assert( fDriverCallParamCount );
 
-	powerFlags = fHeadNotePowerArrayEntry->capabilityFlags;
-	powerState = fHeadNotePowerState;
+    param = (DriverCallParam *) fDriverCallParamPtr;
+    count = fDriverCallParamCount;
 
-	for (IOItemCount i = 0; i < count; i++)
-	{
-		informee = (IOPMinformee *) param->Target;
-		driver   = informee->whatObject;
+    powerFlags = fHeadNotePowerArrayEntry->capabilityFlags;
+    powerState = fHeadNotePowerState;
 
-		if ((fLockedFlags.PMStop == false) && informee->active)
-		{
-			if (fDriverCallReason == kDriverCallInformPreChange)
-			{
-				OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState);
-                clock_get_uptime(&informee->startTime);
-				result = driver->powerStateWillChangeTo(powerFlags, powerState, this);
-                clock_get_uptime(&end);
-				OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result);
-			}
-			else
-			{
-				OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState);
+    for (IOItemCount i = 0; i < count; i++)
+    {
+        informee = (IOPMinformee *) param->Target;
+        driver   = informee->whatObject;
+
+        if (assertPMDriverCall(&callEntry, 0, informee))
+        {
+            if (fDriverCallReason == kDriverCallInformPreChange)
+            {
+                OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState);
                 clock_get_uptime(&informee->startTime);
-				result = driver->powerStateDidChangeTo(powerFlags, powerState, this);
+                result = driver->powerStateWillChangeTo(powerFlags, powerState, this);
                 clock_get_uptime(&end);
-				OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result);
-			}
+                OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result);
+            }
+            else
+            {
+                OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState);
+                clock_get_uptime(&informee->startTime);
+                result = driver->powerStateDidChangeTo(powerFlags, powerState, this);
+                clock_get_uptime(&end);
+                OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result);
+            }
+    
+            deassertPMDriverCall(&callEntry);
 
 #if LOG_SETPOWER_TIMES
             if ((result == IOPMAckImplied) || (result < 0))
@@ -3400,19 +3731,35 @@ void IOService::driverInformPowerChange ( void )
                 SUB_ABSOLUTETIME(&end, &informee->startTime);
                 absolutetime_to_nanoseconds(end, &nsec);
                 if (nsec > LOG_SETPOWER_TIMES)
-                    PM_DEBUG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) took %d ms\n",
+                    PM_LOG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) took %d ms\n",
                         driver->getName(),
                         (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did",
                         driver, fName, fCurrentPowerState, powerState, NS_TO_MS(nsec));
+
+                uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) 
+                                    ? kIOPMEventTypePSWillChangeTo
+                                    : kIOPMEventTypePSDidChangeTo;
+
+                PMEventDetails *details = PMEventDetails::eventDetails(
+                                            logType,						// type
+                                            fName,							// who
+                                            (uintptr_t)this,				// owner unique
+                                            driver->getName(),				// interest name
+                                            (uint8_t)fCurrentPowerState,	// old
+                                            (uint8_t)fHeadNotePowerState,	// new
+                                            0,								// result
+                                            NS_TO_US(nsec));				// usec completion time
+
+                getPMRootDomain()->recordAndReleasePMEventGated( details );
             }
 #endif
-		}
-		else
-			result = kIOPMAckImplied;
+        }
+        else
+            result = kIOPMAckImplied;
 
-		param->Result = result;
-		param++;
-	}
+        param->Result = result;
+        param++;
+    }
 }
 
 //*********************************************************************************
@@ -3422,14 +3769,14 @@ void IOService::driverInformPowerChange ( void )
 // If the object acknowledges the current change, we return TRUE.
 //*********************************************************************************
 
-bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
+bool IOService::notifyChild ( IOPowerConnection * theNub )
 {
-    IOReturn		ret = IOPMAckImplied;
-    unsigned long	childPower;
-    IOService *		theChild;
-	IOPMRequest *	childRequest;
-    uint32_t        requestArg2;
-	int				requestType;
+    IOReturn                ret = IOPMAckImplied;
+    unsigned long           childPower;
+    IOService *             theChild;
+	IOPMRequest *           childRequest;
+    IOPMPowerChangeFlags    requestArg2;
+	int                     requestType;
 
 	PM_ASSERT_IN_GATE();
     theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane));
@@ -3444,11 +3791,11 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
 	fHeadNotePendingAcks++;
     theNub->setAwaitingAck(true);
 
-    requestArg2 = fHeadNoteFlags;
+    requestArg2 = fHeadNoteChangeFlags;
     if (fHeadNotePowerState < fCurrentPowerState)
         requestArg2 |= kIOPMDomainPowerDrop;
 
-    requestType = is_prechange ?
+    requestType = fIsPreChange ?
         kIOPMRequestTypePowerDomainWillChange :
         kIOPMRequestTypePowerDomainDidChange;
 
@@ -3456,7 +3803,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
 	if (childRequest)
 	{
         theNub->retain();
-		childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerCharacter;
+		childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerFlags;
 		childRequest->fArg1 = (void *) theNub;
 		childRequest->fArg2 = (void *) requestArg2;
 		theChild->submitPMRequest( childRequest );
@@ -3481,6 +3828,246 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
 	return (IOPMAckImplied == ret);
 }
 
+//*********************************************************************************
+// [private] notifyControllingDriver
+//*********************************************************************************
+
+bool IOService::notifyControllingDriver ( void )
+{
+    DriverCallParam *	param;
+
+    PM_ASSERT_IN_GATE();
+    assert( fDriverCallParamCount == 0  );
+    assert( fControllingDriver );
+
+    if (fInitialSetPowerState)
+    {
+        // Driver specified flag to skip the inital setPowerState()
+        if (fHeadNotePowerArrayEntry->capabilityFlags & kIOPMInitialDeviceState)
+        {
+            return false;
+        }
+        fInitialSetPowerState = false;
+    }
+
+    param = (DriverCallParam *) fDriverCallParamPtr;
+    if (!param)
+    {
+        param = IONew(DriverCallParam, 1);
+        if (!param)
+            return false;	// no memory
+
+        fDriverCallParamPtr   = (void *) param;
+        fDriverCallParamSlots = 1;
+    }
+
+    param->Target = fControllingDriver;
+    fDriverCallParamCount = 1;
+    fDriverTimer = -1;
+
+    // Block state machine and wait for callout completion.
+    assert(!fDriverCallBusy);
+    fDriverCallBusy = true;
+    thread_call_enter( fDriverCallEntry );
+
+    return true;
+}
+
+//*********************************************************************************
+// [private] notifyControllingDriverDone
+//*********************************************************************************
+
+void IOService::notifyControllingDriverDone( void )
+{
+	DriverCallParam *	param;
+	IOReturn			result;
+
+	PM_ASSERT_IN_GATE();
+	param = (DriverCallParam *) fDriverCallParamPtr;
+
+	assert( fDriverCallBusy == false );
+	assert( fMachineState == kIOPM_DriverThreadCallDone );
+
+	if (param && fDriverCallParamCount)
+	{
+		assert(fDriverCallParamCount == 1);
+		
+		// the return value from setPowerState()
+		result = param->Result;
+
+		if ((result == IOPMAckImplied) || (result < 0))
+		{
+            fDriverTimer = 0;
+		}
+		else if (fDriverTimer)
+		{
+            assert(fDriverTimer == -1);
+
+            // Driver has not acked, and has returned a positive result.
+            // Enforce a minimum permissible timeout value.
+            // Make the min value large enough so timeout is less likely
+            // to occur if a driver misinterpreted that the return value
+            // should be in microsecond units.  And make it large enough
+            // to be noticeable if a driver neglects to ack.
+
+            if (result < kMinAckTimeoutTicks)
+                result = kMinAckTimeoutTicks;
+
+            fDriverTimer = (result / (ACK_TIMER_PERIOD / ns_per_us)) + 1;
+		}
+		// else, child has already acked and driver_timer reset to 0.
+
+		fDriverCallParamCount = 0;
+
+		if ( fDriverTimer )
+		{
+			OUR_PMLog(kPMLogStartAckTimer, 0, 0);
+			start_ack_timer();
+		}
+	}
+
+    MS_POP();   // pushed by OurChangeSetPowerState()
+    fIsPreChange  = false;
+}
+
+//*********************************************************************************
+// [private] all_done
+//
+// A power change is done.
+//*********************************************************************************
+
+void IOService::all_done ( void )
+{
+    IOPMPowerStateIndex     prevPowerState;
+    const IOPMPSEntry *     powerStatePtr;
+    IOPMDriverCallEntry     callEntry;
+    uint32_t                prevMachineState = fMachineState;
+    bool                    callAction = false;
+
+    fMachineState = kIOPM_Finished;
+
+    if ((fHeadNoteChangeFlags & kIOPMSynchronize) &&
+        ((prevMachineState == kIOPM_Finished) ||
+         (prevMachineState == kIOPM_SyncFinish)))
+    {
+        // Sync operation and no power change occurred.
+        // Do not inform driver and clients about this request completion,
+        // except for the originator (root domain).
+
+        PM_ACTION_2(actionPowerChangeDone,
+            fHeadNotePowerState, fHeadNoteChangeFlags);
+
+        if (getPMRequestType() == kIOPMRequestTypeSynchronizePowerTree)
+        {
+            powerChangeDone(fCurrentPowerState);
+        }
+
+        return;
+    }
+
+    // our power change
+    if ( fHeadNoteChangeFlags & kIOPMSelfInitiated )
+    {
+        // could our driver switch to the new state?
+        if ( !( fHeadNoteChangeFlags & kIOPMNotDone) )
+        {
+			// we changed, tell our parent
+            requestDomainPower(fHeadNotePowerState);
+
+            // yes, did power raise?
+            if ( fCurrentPowerState < fHeadNotePowerState )
+            {
+                // yes, inform clients and apps
+                tellChangeUp (fHeadNotePowerState);
+            }
+            prevPowerState = fCurrentPowerState;
+            // either way
+            fCurrentPowerState = fHeadNotePowerState;
+#if PM_VARS_SUPPORT
+            fPMVars->myCurrentState = fCurrentPowerState;
+#endif
+            OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0);
+            PM_ACTION_2(actionPowerChangeDone,
+                fHeadNotePowerState, fHeadNoteChangeFlags);
+            callAction = true;
+
+            powerStatePtr = &fPowerStates[fCurrentPowerState];
+            fCurrentCapabilityFlags = powerStatePtr->capabilityFlags;
+            if (fCurrentCapabilityFlags & kIOPMStaticPowerValid)
+                fCurrentPowerConsumption = powerStatePtr->staticPower;
+
+            // inform subclass policy-maker
+            if (fPCDFunctionOverride && fParentsKnowState &&
+                assertPMDriverCall(&callEntry, kIOPMADC_NoInactiveCheck))
+            {
+                powerChangeDone(prevPowerState);
+                deassertPMDriverCall(&callEntry);
+            }
+        }
+        else if (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride)
+        {
+            // changePowerStateWithOverrideTo() was cancelled
+            fOverrideMaxPowerState = kIOPMPowerStateMax;
+        }
+    }
+
+    // parent's power change
+    if ( fHeadNoteChangeFlags & kIOPMParentInitiated)
+    {
+        if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) &&
+             (fCurrentPowerState >= fHeadNotePowerState))   ||
+			  ((fHeadNoteChangeFlags & kIOPMDomainDidChange)  &&
+             (fCurrentPowerState < fHeadNotePowerState)))
+        {
+            // did power raise?
+            if ( fCurrentPowerState < fHeadNotePowerState )
+            {
+                // yes, inform clients and apps
+                tellChangeUp (fHeadNotePowerState);
+            }
+            // either way
+            prevPowerState = fCurrentPowerState;
+            fCurrentPowerState = fHeadNotePowerState;
+#if PM_VARS_SUPPORT
+            fPMVars->myCurrentState = fCurrentPowerState;
+#endif
+            fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags);
+
+            OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0);
+            PM_ACTION_2(actionPowerChangeDone,
+                fHeadNotePowerState, fHeadNoteChangeFlags);
+            callAction = true;
+
+            powerStatePtr = &fPowerStates[fCurrentPowerState];
+            fCurrentCapabilityFlags = powerStatePtr->capabilityFlags;
+            if (fCurrentCapabilityFlags & kIOPMStaticPowerValid)
+                fCurrentPowerConsumption = powerStatePtr->staticPower;
+
+            // inform subclass policy-maker
+            if (fPCDFunctionOverride && fParentsKnowState &&
+                assertPMDriverCall(&callEntry, kIOPMADC_NoInactiveCheck))
+            {
+                powerChangeDone(prevPowerState);
+                deassertPMDriverCall(&callEntry);
+            }
+        }
+    }
+
+    // When power rises enough to satisfy the tickle's desire for more power,
+    // the condition preventing idle-timer from dropping power is removed.
+
+    if (fCurrentPowerState >= fIdleTimerMinPowerState)
+    {
+        fIdleTimerMinPowerState = 0;
+    }
+
+    if (!callAction)
+    {
+        PM_ACTION_2(actionPowerChangeDone,
+            fHeadNotePowerState, fHeadNoteChangeFlags);
+    }
+}
+
 // MARK: -
 // MARK: Power Change Initiated by Driver
 
@@ -3495,13 +4082,13 @@ void IOService::OurChangeStart ( void )
 	PM_ASSERT_IN_GATE();
     OUR_PMLog( kPMLogStartDeviceChange, fHeadNotePowerState, fCurrentPowerState );
 
-	// fMaxCapability is our maximum possible power state based on the current
+	// fMaxPowerState is our maximum possible power state based on the current
 	// power state of our parents.  If we are trying to raise power beyond the
 	// maximum, send an async request for more power to all parents.
 
-    if (!IS_PM_ROOT() && (fMaxCapability < fHeadNotePowerState))
+    if (!IS_PM_ROOT && (fMaxPowerState < fHeadNotePowerState))
     {
-        fHeadNoteFlags |= kIOPMNotDone;
+        fHeadNoteChangeFlags |= kIOPMNotDone;
         requestDomainPower(fHeadNotePowerState);
         OurChangeFinish();
         return;
@@ -3509,36 +4096,29 @@ void IOService::OurChangeStart ( void )
 
 	// Redundant power changes skips to the end of the state machine.
 
-    if (!fInitialChange && (fHeadNotePowerState == fCurrentPowerState))
+    if (!fInitialPowerChange && (fHeadNotePowerState == fCurrentPowerState))
 	{
 		OurChangeFinish();
 		return;
     }
-    fInitialChange = false;
+    fInitialPowerChange = false;
 
-#if ROOT_DOMAIN_RUN_STATES
     // Change started, but may not complete...
     // Can be canceled (power drop) or deferred (power rise).
 
-    getPMRootDomain()->handlePowerChangeStartForService(
-                        /* service */       this,
-                        /* RD flags */      &fRootDomainState,
-                        /* new pwr state */ fHeadNotePowerState,
-                        /* change flags */  fHeadNoteFlags );
-#endif
+    PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags);
 
 	// Two separate paths, depending if power is being raised or lowered.
 	// Lowering power is subject to approval by clients of this service.
 
     if (IS_POWER_DROP)
     {
-		// Next machine state for a power drop.
-        fMachineState = kIOPM_OurChangeTellClientsPowerDown;
         fDoNotPowerDown = false;
 
-        // Ask apps and kernel clients permission to lower power.	
+        // Ask for persmission to drop power state
+        fMachineState = kIOPM_OurChangeTellClientsPowerDown;
         fOutOfBandParameter = kNotifyApps;
-		askChangeDown(fHeadNotePowerState);
+        askChangeDown(fHeadNotePowerState);
     }
 	else
 	{
@@ -3554,7 +4134,6 @@ void IOService::OurChangeStart ( void )
         // then the child will signal the parent to adjust power, and the child
         // will defer its power change.
 
-#if RESERVE_DOMAIN_POWER
         IOReturn ret;
 
         // Reserve parent power necessary to achieve fHeadNotePowerState.
@@ -3562,13 +4141,12 @@ void IOService::OurChangeStart ( void )
         if (ret != kIOReturnSuccess)
         {
             // Reservation failed, defer power rise.
-            fHeadNoteFlags |= kIOPMNotDone;
+            fHeadNoteChangeFlags |= kIOPMNotDone;
             OurChangeFinish();
             return;
         }
-#endif
-		// Notify interested drivers and children.
-        notifyAll( kIOPM_OurChangeSetPowerState, kNotifyWillChange );
+
+        OurChangeTellCapabilityWillChange();
     }
 }
 
@@ -3613,26 +4191,26 @@ requestDomainPowerApplier(
 //*********************************************************************************
 
 IOReturn IOService::requestDomainPower(
-    unsigned long   ourPowerState,
-    IOOptionBits    options )
+    IOPMPowerStateIndex ourPowerState,
+    IOOptionBits        options )
 {
-    const IOPMPowerState *          powerStateEntry;
+    const IOPMPSEntry *             powerStateEntry;
     IOPMPowerFlags                  requestPowerFlags;
-    unsigned long                   maxPowerState;
+    IOPMPowerStateIndex             maxPowerState;
     IOPMRequestDomainPowerContext   context;
 
 	PM_ASSERT_IN_GATE();
     assert(ourPowerState < fNumberOfPowerStates);
     if (ourPowerState >= fNumberOfPowerStates)
         return kIOReturnBadArgument;
-    if (IS_PM_ROOT())
+    if (IS_PM_ROOT)
         return kIOReturnSuccess;
 
     // Fetch the input power flags for the requested power state.
     // Parent request is stated in terms of required power flags.
 
 	powerStateEntry = &fPowerStates[ourPowerState];
-	requestPowerFlags = powerStateEntry->inputPowerRequirement;
+	requestPowerFlags = powerStateEntry->inputPowerFlags;
 
     if (powerStateEntry->capabilityFlags & (kIOPMChildClamp | kIOPMPreventIdleSleep))
         requestPowerFlags |= kIOPMPreventIdleSleep;
@@ -3642,12 +4220,12 @@ IOReturn IOService::requestDomainPower(
     // Disregard the "previous request" for power reservation.
 
     if (((options & kReserveDomainPower) == 0) &&
-        (fPreviousRequest == requestPowerFlags))
+        (fPreviousRequestPowerFlags == requestPowerFlags))
     {
         // skip if domain already knows our requirements
         goto done;
     }
-    fPreviousRequest = requestPowerFlags;
+    fPreviousRequestPowerFlags = requestPowerFlags;
 
     context.child              = this;
     context.requestPowerFlags  = requestPowerFlags;
@@ -3661,7 +4239,7 @@ IOReturn IOService::requestDomainPower(
 
         if (maxPowerState < fHeadNotePowerState)
         {
-            PM_TRACE("%s: power desired %u:0x%x got %u:0x%x\n",
+            PM_LOG1("%s: power desired %u:0x%x got %u:0x%x\n",
                 getName(),
                 (uint32_t) ourPowerState, (uint32_t) requestPowerFlags,
                 (uint32_t) maxPowerState, (uint32_t) fHeadNoteDomainTargetFlags);
@@ -3681,30 +4259,38 @@ void IOService::OurSyncStart ( void )
 {
 	PM_ASSERT_IN_GATE();
 
-    if (fInitialChange)
+    if (fInitialPowerChange)
         return;
 
-#if ROOT_DOMAIN_RUN_STATES
-    getPMRootDomain()->handlePowerChangeStartForService(
-                        /* service */       this,
-                        /* RD flags */      &fRootDomainState,
-                        /* new pwr state */ fHeadNotePowerState,
-                        /* change flags */  fHeadNoteFlags );
-#endif
+    PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags);
+
+    if (fHeadNoteChangeFlags & kIOPMNotDone)
+    {
+		OurChangeFinish();
+		return;
+    }
 
-    fMachineState     = kIOPM_SyncNotifyDidChange;
-    fDriverCallReason = kDriverCallInformPreChange;
+    if (fHeadNoteChangeFlags & kIOPMSyncTellPowerDown)
+    {
+        fDoNotPowerDown = false;
 
-    notifyChildren();
+        // Ask for permission to drop power state
+        fMachineState = kIOPM_SyncTellClientsPowerDown;
+        fOutOfBandParameter = kNotifyApps;
+        askChangeDown(fHeadNotePowerState);
+    }
+    else
+    {
+        // Only inform capability app and clients.
+        tellSystemCapabilityChange( kIOPM_SyncNotifyWillChange );
+    }
 }
 
 //*********************************************************************************
 // [private] OurChangeTellClientsPowerDown
 //
-// All registered applications and kernel clients have positively acknowledged our
-// intention of lowering power.  Here we notify them all that we will definitely
-// lower the power.  If we don't have to wait for any of them to acknowledge, we
-// carry on by notifying interested drivers.  Otherwise, we do wait.
+// All applications and kernel clients have acknowledged our permission to drop
+// power. Here we notify them that we will lower the power and wait for acks. 
 //*********************************************************************************
 
 void IOService::OurChangeTellClientsPowerDown ( void )
@@ -3716,10 +4302,8 @@ void IOService::OurChangeTellClientsPowerDown ( void )
 //*********************************************************************************
 // [private] OurChangeTellPriorityClientsPowerDown
 //
-// All registered applications and kernel clients have positively acknowledged our
-// intention of lowering power.  Here we notify "priority" clients that we are
-// lowering power.  If we don't have to wait for any of them to acknowledge, we
-// carry on by notifying interested drivers.  Otherwise, we do wait.
+// All applications and kernel clients have acknowledged our intention to drop
+// power.  Here we notify "priority" clients that we are lowering power.
 //*********************************************************************************
 
 void IOService::OurChangeTellPriorityClientsPowerDown ( void )
@@ -3728,80 +4312,123 @@ void IOService::OurChangeTellPriorityClientsPowerDown ( void )
     tellChangeDown2(fHeadNotePowerState);
 }
 
+//*********************************************************************************
+// [private] OurChangeTellCapabilityWillChange
+//
+// Extra stage for root domain to notify apps and drivers about the
+// system capability change when raising power state.
+//*********************************************************************************
+
+void IOService::OurChangeTellCapabilityWillChange ( void )
+{
+    if (!IS_ROOT_DOMAIN)
+        return OurChangeNotifyInterestedDriversWillChange();
+
+    tellSystemCapabilityChange( kIOPM_OurChangeNotifyInterestedDriversWillChange );
+}
+
 //*********************************************************************************
 // [private] OurChangeNotifyInterestedDriversWillChange
 //
-// All registered applications and kernel clients have acknowledged our notification
-// that we are lowering power.  Here we notify interested drivers.  If we don't have
-// to wait for any of them to acknowledge, we instruct our power driver to make the
-// change. Otherwise, we do wait.
+// All applications and kernel clients have acknowledged our power state change.
+// Here we notify interested drivers pre-change.
 //*********************************************************************************
 
 void IOService::OurChangeNotifyInterestedDriversWillChange ( void )
 {
-    IOPMrootDomain  *rootDomain;
+    IOPMrootDomain * rootDomain;
     if ((rootDomain = getPMRootDomain()) == this)
     {
-        rootDomain->tracePoint(kIOPMTracePointSystemSleepDriversPhase);
+        if (IS_POWER_DROP)
+        {
+            rootDomain->tracePoint( kIOPMTracePointSleepWillChangeInterests );
+
+            PMEventDetails *details = PMEventDetails::eventDetails(
+                                        kIOPMEventTypeAppNotificationsFinished,
+                                        NULL,
+                                        100,
+                                        kIOReturnSuccess);
+            rootDomain->recordAndReleasePMEventGated( details );
+        }
+        else
+            rootDomain->tracePoint( kIOPMTracePointWakeWillChangeInterests );
     }
 
-    notifyAll( kIOPM_OurChangeSetPowerState, kNotifyWillChange );
+    notifyAll( kIOPM_OurChangeSetPowerState );
 }
 
 //*********************************************************************************
 // [private] OurChangeSetPowerState
 //
-// All interested drivers have acknowledged our pre-change notification of a power
-// change we initiated.  Here we instruct our controlling driver to make
-// the change to the hardware.  If it does so, we continue processing
-// (waiting for settle and notifying interested parties post-change.)
-// If it doesn't, we have to wait for it to acknowledge and then continue.
+// Instruct our controlling driver to program the hardware for the power state
+// change. Wait for async completions.
 //*********************************************************************************
 
 void IOService::OurChangeSetPowerState ( void )
 {
-    fNextMachineState = kIOPM_OurChangeWaitForPowerSettle;
-	fMachineState     = kIOPM_DriverThreadCallDone;
-	fDriverCallReason = kDriverCallSetPowerState;
+    MS_PUSH( kIOPM_OurChangeWaitForPowerSettle );
+    fMachineState     = kIOPM_DriverThreadCallDone;
+    fDriverCallReason = kDriverCallSetPowerState;
 
-	if (notifyControllingDriver() == false)
-		notifyControllingDriverDone();
+    if (notifyControllingDriver() == false)
+        notifyControllingDriverDone();
 }
 
 //*********************************************************************************
 // [private] OurChangeWaitForPowerSettle
 //
-// Our controlling driver has changed power state on the hardware
-// during a power change we initiated. Wait for the driver specified
-// settle time to expire, before notifying interested parties post-change.
+// Our controlling driver has completed the power state change we initiated.
+// Wait for the driver specified settle time to expire.
 //*********************************************************************************
 
-void IOService::OurChangeWaitForPowerSettle( void )
+void IOService::OurChangeWaitForPowerSettle ( void )
 {
-	fMachineState = kIOPM_OurChangeNotifyInterestedDriversDidChange;
+    fMachineState = kIOPM_OurChangeNotifyInterestedDriversDidChange;
     startSettleTimer();
 }
 
 //*********************************************************************************
 // [private] OurChangeNotifyInterestedDriversDidChange
 //
-// Power has settled on a power change we initiated.  Here we notify
-// all our interested parties post-change.  If they all acknowledge, we're
-// done with this change note, and we can start on the next one.
-// Otherwise we have to wait for acknowledgements and finish up later.
+// Power has settled on a power change we initiated. Here we notify
+// all our interested drivers post-change.
 //*********************************************************************************
 
 void IOService::OurChangeNotifyInterestedDriversDidChange ( void )
 {
-    notifyAll( kIOPM_OurChangeFinish, kNotifyDidChange );
+    IOPMrootDomain * rootDomain;
+    if ((rootDomain = getPMRootDomain()) == this)
+    {
+        rootDomain->tracePoint( IS_POWER_DROP ?
+            kIOPMTracePointSleepDidChangeInterests :
+            kIOPMTracePointWakeDidChangeInterests  );
+    }
+
+    notifyAll( kIOPM_OurChangeTellCapabilityDidChange );
+}
+
+//*********************************************************************************
+// [private] OurChangeTellCapabilityDidChange
+//
+// For root domain to notify capability power-change.
+//*********************************************************************************
+
+void IOService::OurChangeTellCapabilityDidChange ( void )
+{
+    if (!IS_ROOT_DOMAIN)
+        return OurChangeFinish();
+
+    getPMRootDomain()->tracePoint( IS_POWER_DROP ?
+        kIOPMTracePointSleepCapabilityClients :
+        kIOPMTracePointWakeCapabilityClients  );
+
+    tellSystemCapabilityChange( kIOPM_OurChangeFinish );
 }
 
 //*********************************************************************************
 // [private] OurChangeFinish
 //
-// Power has settled on a power change we initiated, and
-// all our interested parties have acknowledged.  We're
-// done with this change note, and we can start on the next one.
+// Done with this self-induced power state change.
 //*********************************************************************************
 
 void IOService::OurChangeFinish ( void )
@@ -3829,17 +4456,11 @@ IOReturn IOService::ParentChangeStart ( void )
 		// TODO: redundant? See handlePowerDomainWillChangeTo()
 		setParentInfo( fHeadNoteParentFlags, fHeadNoteParentConnection, true );
 
-#if ROOT_DOMAIN_RUN_STATES
-        getPMRootDomain()->handlePowerChangeStartForService(
-                            /* service */       this,
-                            /* RD flags */      &fRootDomainState,
-                            /* new pwr state */ fHeadNotePowerState,
-                            /* change flags */  fHeadNoteFlags );
-#endif
+        PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags);
 
-    	// tell apps and kernel clients
-    	fInitialChange = false;
-        fMachineState = kIOPM_ParentDownTellPriorityClientsPowerDown;
+    	// Tell apps and kernel clients
+    	fInitialPowerChange = false;
+        fMachineState = kIOPM_ParentChangeTellPriorityClientsPowerDown;
 		tellChangeDown1(fHeadNotePowerState);
         return IOPMWillAckLater;
     }
@@ -3864,24 +4485,19 @@ IOReturn IOService::ParentChangeStart ( void )
         }
     }
 
-    if ( fHeadNoteFlags & kIOPMDomainDidChange )
+    if ( fHeadNoteChangeFlags & kIOPMDomainDidChange )
 	{
         if ( fHeadNotePowerState > fCurrentPowerState )
         {
-#if ROOT_DOMAIN_RUN_STATES
-            getPMRootDomain()->handlePowerChangeStartForService(
-                                /* service */       this,
-                                /* RD flags */      &fRootDomainState,
-                                /* new pwr state */ fHeadNotePowerState,
-                                /* change flags */  fHeadNoteFlags );
-#endif
+            PM_ACTION_2(actionPowerChangeStart,
+                fHeadNotePowerState, &fHeadNoteChangeFlags);
 
             // Parent did change up - start our change up
-            fInitialChange = false;
-            notifyAll( kIOPM_ParentUpSetPowerState, kNotifyWillChange );
+            fInitialPowerChange = false;            
+            ParentChangeTellCapabilityWillChange();
             return IOPMWillAckLater;
         }
-        else if (fHeadNoteFlags & kIOPMSynchronize)
+        else if (fHeadNoteChangeFlags & kIOPMSynchronize)
         {
             // We do not need to change power state, but notify
             // children to propagate tree synchronization.
@@ -3897,102 +4513,108 @@ IOReturn IOService::ParentChangeStart ( void )
 }
 
 //*********************************************************************************
-// [private] ParentDownTellPriorityClientsPowerDown
+// [private] ParentChangeTellPriorityClientsPowerDown
 //
-// All applications and kernel clients have been notified of a power lowering
-// initiated by the parent and we had to wait for responses.  Here
-// we notify any priority clients.  If they all ack, we continue with the power change.
-// If at least one doesn't, we have to wait for it to acknowledge and then continue.
+// All applications and kernel clients have acknowledged our intention to drop
+// power.  Here we notify "priority" clients that we are lowering power.
 //*********************************************************************************
 
-void IOService::ParentDownTellPriorityClientsPowerDown ( void )
+void IOService::ParentChangeTellPriorityClientsPowerDown ( void )
 {
-    fMachineState = kIOPM_ParentDownNotifyInterestedDriversWillChange;
+    fMachineState = kIOPM_ParentChangeNotifyInterestedDriversWillChange;
 	tellChangeDown2(fHeadNotePowerState);
 }
 
 //*********************************************************************************
-// [private] ParentDownNotifyInterestedDriversWillChange
+// [private] ParentChangeTellCapabilityWillChange
 //
-// All applications and kernel clients have been notified of a power lowering
-// initiated by the parent and we had to wait for their responses.  Here we notify
-// any interested drivers and power domain children.  If they all ack, we continue
-// with the power change.
-// If at least one doesn't, we have to wait for it to acknowledge and then continue.
+// All (legacy) applications and kernel clients have acknowledged, extra stage for
+// root domain to notify apps and drivers about the system capability change.
 //*********************************************************************************
 
-void IOService::ParentDownNotifyInterestedDriversWillChange ( void )
+void IOService::ParentChangeTellCapabilityWillChange ( void )
 {
-    IOPMrootDomain  *rootDomain;
-    if ((rootDomain = getPMRootDomain()) == this)
-    {
-        rootDomain->tracePoint(kIOPMTracePointSystemSleepDriversPhase);
-    }
+    if (!IS_ROOT_DOMAIN)
+        return ParentChangeNotifyInterestedDriversWillChange();
 
-	notifyAll( kIOPM_ParentDownSetPowerState, kNotifyWillChange );
+    tellSystemCapabilityChange( kIOPM_ParentChangeNotifyInterestedDriversWillChange );
 }
 
 //*********************************************************************************
-// [private] ParentDownSetPowerState
+// [private] ParentChangeNotifyInterestedDriversWillChange
 //
-// We had to wait for it, but all parties have acknowledged our pre-change
-// notification of a power lowering initiated by the parent.
-// Here we instruct our controlling driver
-// to put the hardware in the state it needs to be in when the domain is
-// lowered.  If it does so, we continue processing
-// (waiting for settle and acknowledging the parent.)
-// If it doesn't, we have to wait for it to acknowledge and then continue.
+// All applications and kernel clients have acknowledged our power state change.
+// Here we notify interested drivers pre-change.
 //*********************************************************************************
 
-void IOService::ParentDownSetPowerState ( void )
+void IOService::ParentChangeNotifyInterestedDriversWillChange ( void )
 {
-	fNextMachineState = kIOPM_ParentDownWaitForPowerSettle;
-	fMachineState     = kIOPM_DriverThreadCallDone;
-	fDriverCallReason = kDriverCallSetPowerState;
+	notifyAll( kIOPM_ParentChangeSetPowerState );
+}
 
-	if (notifyControllingDriver() == false)
-		notifyControllingDriverDone();
+//*********************************************************************************
+// [private] ParentChangeSetPowerState
+//
+// Instruct our controlling driver to program the hardware for the power state
+// change. Wait for async completions.
+//*********************************************************************************
+
+void IOService::ParentChangeSetPowerState ( void )
+{
+    MS_PUSH( kIOPM_ParentChangeWaitForPowerSettle );
+    fMachineState     = kIOPM_DriverThreadCallDone;
+    fDriverCallReason = kDriverCallSetPowerState;
+
+    if (notifyControllingDriver() == false)
+        notifyControllingDriverDone();
 }
 
 //*********************************************************************************
-// [private] ParentDownWaitForPowerSettle
+// [private] ParentChangeWaitForPowerSettle
 //
-// Our controlling driver has changed power state on the hardware
-// during a power change initiated by our parent.  We have had to wait
-// for acknowledgement from interested parties, or we have had to wait
-// for the controlling driver to change the state.  Here we see if we need
-// to wait for power to settle before continuing.  If not, we continue
-// processing (acknowledging our preparedness to the parent).
-// If so, we wait and continue later.
+// Our controlling driver has completed the power state change initiated by our
+// parent. Wait for the driver specified settle time to expire.
 //*********************************************************************************
 
-void IOService::ParentDownWaitForPowerSettle ( void )
+void IOService::ParentChangeWaitForPowerSettle ( void )
 {
-	fMachineState = kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange;
+	fMachineState = kIOPM_ParentChangeNotifyInterestedDriversDidChange;
     startSettleTimer();
 }
 
 //*********************************************************************************
-// [private] ParentDownNotifyDidChangeAndAcknowledgeChange
+// [private] ParentChangeNotifyInterestedDriversDidChange
+//
+// Power has settled on a power change initiated by our parent. Here we notify
+// all our interested drivers post-change.
+//*********************************************************************************
+
+void IOService::ParentChangeNotifyInterestedDriversDidChange ( void )
+{
+	notifyAll( kIOPM_ParentChangeTellCapabilityDidChange );	
+}
+
+//*********************************************************************************
+// [private] ParentChangeTellCapabilityDidChange
 //
-// Power has settled on a power change initiated by our parent.  Here we
-// notify interested parties.
+// For root domain to notify capability power-change.
 //*********************************************************************************
 
-void IOService::ParentDownNotifyDidChangeAndAcknowledgeChange ( void )
+void IOService::ParentChangeTellCapabilityDidChange ( void )
 {
-	notifyAll( kIOPM_ParentAcknowledgePowerChange, kNotifyDidChange );	
+    if (!IS_ROOT_DOMAIN)
+        return ParentChangeAcknowledgePowerChange();
+
+    tellSystemCapabilityChange( kIOPM_ParentChangeAcknowledgePowerChange );
 }
 
 //*********************************************************************************
 // [private] ParentAcknowledgePowerChange
 //
-// We had to wait for it, but all parties have acknowledged our post-change
-// notification of a power change (either Up or Down) initiated by the parent.
-// Here we acknowledge the parent.
+// Acknowledge our power parent that our power change is done. 
 //*********************************************************************************
 
-void IOService::ParentAcknowledgePowerChange ( void )
+void IOService::ParentChangeAcknowledgePowerChange ( void )
 {
     IORegistryEntry *	nub;
     IOService *			parent;
@@ -4009,192 +4631,26 @@ void IOService::ParentAcknowledgePowerChange ( void )
     nub->release();
 }
 
+// MARK: -
+// MARK: Ack and Settle timers
+
 //*********************************************************************************
-// [private] ParentUpSetPowerState
+// [private] settleTimerExpired
 //
-// Our parent has informed us via powerStateDidChange that it has
-// raised the power in our power domain, and we have had to wait
-// for some interested party to acknowledge our notification.
-//   Here we instruct our controlling
-// driver to program the hardware to take advantage of the higher domain
-// power.  If it does so, we continue processing
-// (waiting for settle and notifying interested parties post-change.)
-// If it doesn't, we have to wait for it to acknowledge and then continue.
+// Power has settled after our last change.  Notify interested parties that
+// there is a new power state.
 //*********************************************************************************
 
-void IOService::ParentUpSetPowerState ( void )
+void IOService::settleTimerExpired( void )
 {
-	fNextMachineState = kIOPM_ParentUpWaitForSettleTime;
-	fMachineState     = kIOPM_DriverThreadCallDone;
-	fDriverCallReason = kDriverCallSetPowerState;
-
-	if (notifyControllingDriver() == false)
-		notifyControllingDriverDone();
+	fSettleTimeUS = 0;
+	gIOPMWorkQueue->signalWorkAvailable();
 }
 
 //*********************************************************************************
-// [private] ParentUpWaitForSettleTime
+// settle_timer_expired
 //
-// Our controlling driver has changed power state on the hardware
-// during a power raise initiated by the parent, but we had to wait for it.
-// Here we see if we need to wait for power to settle before continuing.
-// If not, we continue processing  (notifying interested parties post-change).
-// If so, we wait and continue later.
-//*********************************************************************************
-
-void IOService::ParentUpWaitForSettleTime ( void )
-{
-	fMachineState = kIOPM_ParentUpNotifyInterestedDriversDidChange;
-    startSettleTimer();
-}
-
-//*********************************************************************************
-// [private] ParentUpNotifyInterestedDriversDidChange
-//
-// Power has settled on a power raise initiated by the parent.
-// Here we notify all our interested parties post-change.  If they all acknowledge,
-// we're done with this change note, and we can start on the next one.
-// Otherwise we have to wait for acknowledgements and finish up later.
-//*********************************************************************************
-
-void IOService::ParentUpNotifyInterestedDriversDidChange ( void )
-{
-	notifyAll( kIOPM_ParentAcknowledgePowerChange, kNotifyDidChange );	
-}
-
-//*********************************************************************************
-// [private] all_done
-//
-// A power change is complete, and the used post-change note is at
-// the head of the queue.  Remove it and set myCurrentState to the result
-// of the change.  Start up the next change in queue.
-//*********************************************************************************
-
-void IOService::all_done ( void )
-{
-    unsigned long	previous_state;
-
-#if ROOT_DOMAIN_RUN_STATES
-    getPMRootDomain()->handlePowerChangeDoneForService(
-                        /* service */       this,
-                        /* RD flags */      &fRootDomainState,
-                        /* new pwr state */ fHeadNotePowerState,
-                        /* change flags */  fHeadNoteFlags );
-#endif
-
-    if ((fHeadNoteFlags & kIOPMSynchronize) &&
-        ((fMachineState == kIOPM_Finished) || (fMachineState == kIOPM_SyncFinish)))
-    {
-        // Sync operation and no power change occurred.
-        // Do not inform driver and clients about this request completion,
-        // except for the originator (root domain).
-
-        if (getPMRequestType() == kIOPMRequestTypeSynchronizePowerTree)
-        {
-            powerChangeDone(fCurrentPowerState);
-        }
-
-        fMachineState = kIOPM_Finished;
-        return;
-    }
-
-    fMachineState = kIOPM_Finished;
-
-    // our power change
-    if ( fHeadNoteFlags & kIOPMWeInitiated )
-    {
-        // could our driver switch to the new state?
-        if ( !( fHeadNoteFlags & kIOPMNotDone) )
-        {
-			// we changed, tell our parent
-            requestDomainPower(fHeadNotePowerState);
-
-            // yes, did power raise?
-            if ( fCurrentPowerState < fHeadNotePowerState )
-            {
-                // yes, inform clients and apps
-                tellChangeUp (fHeadNotePowerState);
-            }
-            previous_state = fCurrentPowerState;
-            // either way
-            fCurrentPowerState = fHeadNotePowerState;
-#if PM_VARS_SUPPORT
-			fPMVars->myCurrentState = fCurrentPowerState;
-#endif
-            OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0);
-			
-            // inform subclass policy-maker
-            if ((fLockedFlags.PMStop == false) && fParentsKnowState)
-                powerChangeDone(previous_state);
-            else
-                PM_DEBUG("%s::powerChangeDone() skipped\n", getName());
-        }
-    }
-
-    // parent's power change
-    if ( fHeadNoteFlags & kIOPMParentInitiated)
-    {
-        if (((fHeadNoteFlags & kIOPMDomainWillChange) && (fCurrentPowerState >= fHeadNotePowerState)) ||
-			((fHeadNoteFlags & kIOPMDomainDidChange) && (fCurrentPowerState < fHeadNotePowerState)))
-        {
-            // did power raise?
-            if ( fCurrentPowerState < fHeadNotePowerState )
-            {
-                // yes, inform clients and apps
-                tellChangeUp (fHeadNotePowerState);
-            }
-            // either way
-            previous_state = fCurrentPowerState;
-            fCurrentPowerState = fHeadNotePowerState;
-#if PM_VARS_SUPPORT
-			fPMVars->myCurrentState = fCurrentPowerState;
-#endif
-            fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags);
-
-            OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0);
-
-            // inform subclass policy-maker
-            if ((fLockedFlags.PMStop == false) && fParentsKnowState)
-                powerChangeDone(previous_state);
-            else
-                PM_DEBUG("%s::powerChangeDone() skipped\n", getName());
-        }
-    }
-
-    if (fCurrentPowerState < fNumberOfPowerStates)
-    {
-        const IOPMPowerState * powerStatePtr = &fPowerStates[fCurrentPowerState];
-
-        fCurrentCapabilityFlags = powerStatePtr->capabilityFlags;
-        if (fCurrentCapabilityFlags & kIOPMStaticPowerValid)
-            fCurrentPowerConsumption = powerStatePtr->staticPower;
-    }
-
-    // When power rises enough to satisfy the tickle's desire for more power,
-    // the condition preventing idle-timer from dropping power is removed.
-
-    if (fCurrentPowerState >= fIdleTimerMinPowerState)
-    {
-        fIdleTimerMinPowerState = 0;
-    }
-}
-
-//*********************************************************************************
-// [public] settleTimerExpired
-//
-// Power has settled after our last change.  Notify interested parties that
-// there is a new power state.
-//*********************************************************************************
-
-void IOService::settleTimerExpired ( void )
-{
-	fSettleTimeUS = 0;
-}
-
-//*********************************************************************************
-// settle_timer_expired
-//
-// Holds a retain while the settle timer callout is in flight.
+// Holds a retain while the settle timer callout is in flight.
 //*********************************************************************************
 
 static void
@@ -4202,12 +4658,11 @@ settle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 )
 {
 	IOService * me = (IOService *) arg0;
 
-	if (gIOPMWorkLoop && gIOPMReplyQueue)
+	if (gIOPMWorkLoop && gIOPMWorkQueue)
 	{
 		gIOPMWorkLoop->runAction(
             OSMemberFunctionCast(IOWorkLoop::Action, me, &IOService::settleTimerExpired),
             me);
-		gIOPMReplyQueue->signalWorkAvailable();
 	}
 	me->release();
 }
@@ -4221,7 +4676,7 @@ settle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 )
 void IOService::startSettleTimer( void )
 {
     AbsoluteTime        deadline;
-    unsigned long       i;
+    IOPMPowerStateIndex i;
     uint32_t            settleTime = 0;
 	boolean_t           pending;
 
@@ -4288,8 +4743,7 @@ bool IOService::ackTimerTick( void )
 	PM_ASSERT_IN_GATE();
     switch (fMachineState) {
         case kIOPM_OurChangeWaitForPowerSettle:
-        case kIOPM_ParentDownWaitForPowerSettle:
-        case kIOPM_ParentUpWaitForSettleTime:
+        case kIOPM_ParentChangeWaitForPowerSettle:
             // are we waiting for controlling driver to acknowledge?
             if ( fDriverTimer > 0 )
             {
@@ -4304,6 +4758,20 @@ bool IOService::ackTimerTick( void )
                     PM_ERROR("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms\n",
                         fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec));
 
+#if LOG_SETPOWER_TIMES
+                    PMEventDetails *details = PMEventDetails::eventDetails(
+                                                kIOPMEventTypeSetPowerStateDelayed, // type
+                                                fName,								// who
+                                                (uintptr_t)this,					// owner unique
+                                                NULL,								// interest name
+                                                (uint8_t)getPowerState(),			// old
+                                                0,									// new
+                                                kIOReturnTimeout,					// result
+                                                NS_TO_US(nsec));					// usec completion time
+					
+                    getPMRootDomain()->recordAndReleasePMEventGated( details );
+#endif
+
                     if (gIOKitDebug & kIOLogDebugPower)
                     {
                         panic("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms",
@@ -4321,12 +4789,7 @@ bool IOService::ackTimerTick( void )
             }
             break;
 
-        case kIOPM_OurChangeSetPowerState:
-        case kIOPM_OurChangeFinish:
-        case kIOPM_ParentDownSetPowerState:
-        case kIOPM_ParentAcknowledgePowerChange:
-        case kIOPM_ParentUpSetPowerState:
-		case kIOPM_NotifyChildrenDone:
+        case kIOPM_NotifyChildrenStart:
             // are we waiting for interested parties to acknowledge?
             if ( fHeadNotePendingAcks != 0 )
             {
@@ -4350,6 +4813,24 @@ bool IOService::ackTimerTick( void )
                                 nextObject->whatObject, fName, fCurrentPowerState, fHeadNotePowerState,
                                 NS_TO_MS(nsec));
 
+#if LOG_SETPOWER_TIMES
+                            uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) 
+                                                ? kIOPMEventTypePSWillChangeTo
+                                                : kIOPMEventTypePSDidChangeTo;
+							
+                            PMEventDetails *details = PMEventDetails::eventDetails(
+                                                        logType,							  // type
+                                                        fName,								  // who
+                                                        (uintptr_t)this,					  // owner unique
+                                                        nextObject->whatObject->getName(),	  // interest name
+                                                        (uint8_t)fCurrentPowerState,		  // old
+                                                        (uint8_t)fHeadNotePowerState,		  // new
+                                                        kIOReturnTimeout,					  // result
+                                                        NS_TO_US(nsec));					  // usec completion time
+							
+                            getPMRootDomain()->recordAndReleasePMEventGated( details );
+#endif
+
                             // Pretend driver has acked.
                             fHeadNotePendingAcks--;
                         }
@@ -4369,11 +4850,16 @@ bool IOService::ackTimerTick( void )
             }
             break;
 
-        case kIOPM_ParentDownTellPriorityClientsPowerDown:
-        case kIOPM_ParentDownNotifyInterestedDriversWillChange:
+        // TODO: aggreggate this
         case kIOPM_OurChangeTellClientsPowerDown:
         case kIOPM_OurChangeTellPriorityClientsPowerDown:
         case kIOPM_OurChangeNotifyInterestedDriversWillChange:
+        case kIOPM_ParentChangeTellPriorityClientsPowerDown:
+        case kIOPM_ParentChangeNotifyInterestedDriversWillChange:
+        case kIOPM_SyncTellClientsPowerDown:
+        case kIOPM_SyncTellPriorityClientsPowerDown:
+        case kIOPM_SyncNotifyWillChange:
+        case kIOPM_TellCapabilityChangeDone:
 			// apps didn't respond in time
             cleanClientResponses(true);
             OUR_PMLog(kPMLogClientTardy, 0, 1);
@@ -4382,7 +4868,7 @@ bool IOService::ackTimerTick( void )
             break;
 
         default:
-            PM_TRACE("%s: unexpected ack timer tick (state = %d)\n",
+            PM_LOG1("%s: unexpected ack timer tick (state = %d)\n",
 				getName(), fMachineState);
             break;
     }
@@ -4441,8 +4927,8 @@ IOService::actionAckTimerExpired (
 	// otherwise no need to signal the work loop.
 
 	done = me->ackTimerTick();
-	if (done && gIOPMReplyQueue)
-		gIOPMReplyQueue->signalWorkAvailable();
+	if (done && gIOPMWorkQueue)
+		gIOPMWorkQueue->signalWorkAvailable();
 
 	return kIOReturnSuccess;
 }
@@ -4465,104 +4951,31 @@ IOService::ack_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg
 	me->release();
 }
 
-//*********************************************************************************
-// [private] notifyControllingDriver
-//*********************************************************************************
-
-bool IOService::notifyControllingDriver ( void )
-{
-	DriverCallParam *	param;
-	unsigned long		powerState;
-
-	PM_ASSERT_IN_GATE();
-	assert( fDriverCallParamCount == 0  );
-	assert( fControllingDriver );
-
-	powerState = fHeadNotePowerState;
-
-	param = (DriverCallParam *) fDriverCallParamPtr;
-	if (!param)
-	{
-		param = IONew(DriverCallParam, 1);
-		if (!param)
-			return false;	// no memory
-
-		fDriverCallParamPtr   = (void *) param;
-		fDriverCallParamSlots = 1;
-	}
-
-	param->Target = fControllingDriver;
-	fDriverCallParamCount = 1;
-
-	fDriverTimer = -1;
-
-	// Machine state for this object will stall waiting for a reply
-	// from the callout thread.
-
-	PM_LOCK();
-	assert( fLockedFlags.DriverCallBusy == false );
-	fLockedFlags.DriverCallBusy = true;
-	PM_UNLOCK();
-	thread_call_enter( fDriverCallEntry );
-	return true;
-}
+// MARK: -
+// MARK: Client Messaging
 
 //*********************************************************************************
-// [private] notifyControllingDriverDone
+// [private] tellSystemCapabilityChange
 //*********************************************************************************
 
-void IOService::notifyControllingDriverDone( void )
+void IOService::tellSystemCapabilityChange( uint32_t nextMS )
 {
-	DriverCallParam *	param;
-	IOReturn			result;
-
-	PM_ASSERT_IN_GATE();
-	param = (DriverCallParam *) fDriverCallParamPtr;
-
-	assert( fLockedFlags.DriverCallBusy == false );
-	assert( fMachineState == kIOPM_DriverThreadCallDone );
+	MS_PUSH( nextMS );
+    fMachineState       = kIOPM_TellCapabilityChangeDone;
+    fOutOfBandMessage   = kIOMessageSystemCapabilityChange;
 
-	if (param)
-	{
-		assert(fDriverCallParamCount == 1);
-		
-		// the return value from setPowerState()
-		result = param->Result;
-
-		if ((result == IOPMAckImplied) || (result < 0))
-		{
-			// child return IOPMAckImplied
-			fDriverTimer = 0;
-		}
-		else if (fDriverTimer)
-		{
-			assert(fDriverTimer == -1);
-
-            // Driver has not acked, and has returned a positive result.
-            // Enforce a minimum permissible timeout value.
-            // Make the min value large enough so timeout is less likely
-            // to occur if a driver misinterpreted that the return value
-            // should be in microsecond units.  And make it large enough
-            // to be noticeable if a driver neglects to ack.
-
-            if (result < kMinAckTimeoutTicks)
-                result = kMinAckTimeoutTicks;
-
-            fDriverTimer = (result / (ACK_TIMER_PERIOD / ns_per_us)) + 1;
-		}
-		// else, child has already acked and driver_timer reset to 0.
-
-		fDriverCallParamCount = 0;
-
-		if ( fDriverTimer )
-		{
-			OUR_PMLog(kPMLogStartAckTimer, 0, 0);
-			start_ack_timer();
-		}
-	}
+    if (fIsPreChange)
+    {
+        // Notify app first on pre-change.
+        fOutOfBandParameter = kNotifyCapabilityChangeApps;
+    }
+    else
+    {
+        // Notify kernel clients first on post-change.
+        fOutOfBandParameter = kNotifyCapabilityChangePriority;
+    }
 
-	// Hop back to original machine state path.
-	fMachineState = fNextMachineState;
+    tellClientsWithResponse( fOutOfBandMessage );
 }
 
 //*********************************************************************************
@@ -4648,11 +5061,11 @@ static void logAppTimeouts ( OSObject * object, void * arg )
         clientIndex = context->notifyClients->getNextIndexOfObject(object, 0);
 
         if ((clientIndex != (unsigned int) -1) &&
-            (flag = context->responseFlags->getObject(clientIndex)) &&
+            (flag = context->responseArray->getObject(clientIndex)) &&
             (flag != kOSBooleanTrue))
         {
             OSString * clientID = 0;
-            context->us->messageClient(context->msgType, object, &clientID);
+            context->us->messageClient(context->messageType, object, &clientID);
             PM_ERROR(context->errorLog, clientID ? clientID->getCStringNoCopy() : "");
 
             // TODO: record message type if possible
@@ -4669,32 +5082,47 @@ static void logAppTimeouts ( OSObject * object, void * arg )
 
 void IOService::cleanClientResponses ( bool logErrors )
 {
-    IOPMInterestContext context;
-
-    if (logErrors && fResponseArray && fNotifyClientArray) {
-        context.responseFlags    = fResponseArray;
-        context.notifyClients    = fNotifyClientArray;
-        context.serialNumber     = fSerialNumber;
-        context.counter          = 0;
-        context.msgType          = kIOMessageCopyClientID;
-        context.us               = this;
-        context.maxTimeRequested = 0;
-        context.stateNumber      = fHeadNotePowerState;
-        context.stateFlags       = fHeadNotePowerArrayEntry->capabilityFlags;
-        context.errorLog         = "PM notification timeout (%s)\n";
-
+    if (logErrors && fResponseArray)
+    {
         switch ( fOutOfBandParameter ) {
             case kNotifyApps:
-                applyToInterested(gIOAppPowerStateInterest, logAppTimeouts, (void *) &context);
-            case kNotifyPriority:
+            case kNotifyCapabilityChangeApps:
+                if (fNotifyClientArray)
+                {
+                    IOPMInterestContext context;
+
+                    context.responseArray    = fResponseArray;
+                    context.notifyClients    = fNotifyClientArray;
+                    context.serialNumber     = fSerialNumber;
+                    context.messageType      = kIOMessageCopyClientID;
+                    context.notifyType       = kNotifyApps;
+                    context.isPreChange      = fIsPreChange;
+                    context.enableTracing    = false;
+                    context.us               = this;
+                    context.maxTimeRequested = 0;
+                    context.stateNumber      = fHeadNotePowerState;
+                    context.stateFlags       = fHeadNotePowerArrayEntry->capabilityFlags;
+                    context.changeFlags      = fHeadNoteChangeFlags;
+                    context.errorLog         = "PM notification timeout (%s)\n";
+
+                    applyToInterested(gIOAppPowerStateInterest, logAppTimeouts, (void *) &context);
+                }
+                break;
+
             default:
+                // kNotifyPriority, kNotifyCapabilityChangePriority
+                // TODO: identify the priority client that has not acked
+                PM_ERROR("PM priority notification timeout\n");
+                if (gIOKitDebug & kIOLogDebugPower)
+                {
+                    panic("PM priority notification timeout");
+                }
                 break;
         }
     }
 
     if (fResponseArray)
     {
-        // get rid of this stuff
         fResponseArray->release();
         fResponseArray = NULL;
     }
@@ -4703,8 +5131,6 @@ void IOService::cleanClientResponses ( bool logErrors )
         fNotifyClientArray->release();
         fNotifyClientArray = NULL;
     }
-
-    return;
 }
 
 //*********************************************************************************
@@ -4716,53 +5142,95 @@ void IOService::cleanClientResponses ( bool logErrors )
 // Return true if we don't have to wait for acknowledgements
 //*********************************************************************************
 
-bool IOService::tellClientsWithResponse (
-        int messageType )
-{
-    return tellClientsWithResponse( messageType, 0 );
-}
-
-bool IOService::tellClientsWithResponse (
-        int                 messageType,
-        IOPMMessageFilter   filter )
+bool IOService::tellClientsWithResponse ( int messageType )
 {
     IOPMInterestContext     context;
+    bool                    isRootDomain = IS_ROOT_DOMAIN;
 
 	PM_ASSERT_IN_GATE();
     assert( fResponseArray == NULL );
     assert( fNotifyClientArray == NULL );
 
+    RD_LOG("tellClientsWithResponse( %s, %d )\n",
+        getIOMessageString(messageType), fOutOfBandParameter);
+
     fResponseArray = OSArray::withCapacity( 1 );
     if (!fResponseArray)
         goto exit;
 
     fResponseArray->setCapacityIncrement(8);
-    fSerialNumber += 1;
+    if (++fSerialNumber == 0)
+        fSerialNumber++;        
 
-    context.responseFlags    = fResponseArray;
+    context.responseArray    = fResponseArray;
     context.notifyClients    = 0;
     context.serialNumber     = fSerialNumber;
-    context.counter          = 0;
-    context.msgType          = messageType;
+    context.messageType      = messageType;
+    context.notifyType       = fOutOfBandParameter;
+    context.isPreChange      = fIsPreChange;
+    context.enableTracing    = false;
     context.us               = this;
     context.maxTimeRequested = 0;
     context.stateNumber      = fHeadNotePowerState;
     context.stateFlags       = fHeadNotePowerArrayEntry->capabilityFlags;
-    context.filterFunc       = filter;
+    context.changeFlags      = fHeadNoteChangeFlags;
+    context.messageFilter    = (isRootDomain) ?
+                               OSMemberFunctionCast(
+                                    IOPMMessageFilter,
+                                    this,
+                                    &IOPMrootDomain::systemMessageFilter) : 0;
 
     switch ( fOutOfBandParameter ) {
         case kNotifyApps:
             applyToInterested( gIOAppPowerStateInterest,
 				pmTellAppWithResponse, (void *) &context );
-            fNotifyClientArray = context.notifyClients;
+
+            if (isRootDomain &&
+                (fMachineState != kIOPM_OurChangeTellClientsPowerDown) &&
+                (fMachineState != kIOPM_SyncTellClientsPowerDown))
+            {
+                // Notify capability app for tellChangeDown1()
+                // but not for askChangeDown().
+                context.notifyType  = kNotifyCapabilityChangeApps;
+                context.messageType = kIOMessageSystemCapabilityChange;
+                applyToInterested( gIOAppPowerStateInterest,
+                    pmTellCapabilityAppWithResponse, (void *) &context ); 
+                context.notifyType  = fOutOfBandParameter;
+                context.messageType = messageType;
+            }
+            context.maxTimeRequested = k30seconds;
 
             applyToInterested( gIOGeneralInterest,
 				pmTellClientWithResponse, (void *) &context );
+
+            fNotifyClientArray = context.notifyClients;
             break;
 
         case kNotifyPriority:
+            context.enableTracing = isRootDomain;
             applyToInterested( gIOPriorityPowerStateInterest,
 				pmTellClientWithResponse, (void *) &context );
+
+            if (isRootDomain)
+            {
+                // Notify capability clients for tellChangeDown2().
+                context.notifyType  = kNotifyCapabilityChangePriority;
+                context.messageType = kIOMessageSystemCapabilityChange;
+                applyToInterested( gIOPriorityPowerStateInterest,
+                    pmTellCapabilityClientWithResponse, (void *) &context );
+            }
+            break;
+
+        case kNotifyCapabilityChangeApps:
+            applyToInterested( gIOAppPowerStateInterest,
+				pmTellCapabilityAppWithResponse, (void *) &context );
+            fNotifyClientArray = context.notifyClients;
+            context.maxTimeRequested = k30seconds;
+            break;
+
+        case kNotifyCapabilityChangePriority:
+            applyToInterested( gIOPriorityPowerStateInterest,
+				pmTellCapabilityClientWithResponse, (void *) &context );
             break;
     }
 
@@ -4770,6 +5238,8 @@ bool IOService::tellClientsWithResponse (
     if ( !checkForDone() )
     {
         OUR_PMLog(kPMLogStartAckTimer, context.maxTimeRequested, 0);
+        if (context.enableTracing)
+            getPMRootDomain()->traceDetail( context.maxTimeRequested / 1000 );
 		start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale );	
         return false;
     }
@@ -4799,143 +5269,361 @@ exit:
 
 void IOService::pmTellAppWithResponse ( OSObject * object, void * arg )
 {
-    IOPMInterestContext *	context = (IOPMInterestContext *) arg;
+    IOPMInterestContext *   context = (IOPMInterestContext *) arg;
     IOServicePM *           pwrMgt = context->us->pwrMgt;
+    uint32_t                msgIndex, msgRef, msgType;
+#if LOG_APP_RESPONSE_TIMES
     AbsoluteTime            now;
-    UInt32                  refcon;
+#endif
 
     if (!OSDynamicCast(_IOServiceInterestNotifier, object))
+        return;
+
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, 0, 0))
     {
-		// object must be an _IOServiceInterestNotifier.
+        if (kIOLogDebugPower & gIOKitDebug)
+        {
+            // Log client pid/name and client array index.
+            OSString * clientID = 0;
+            context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+            PM_LOG("%s DROP App %s, %s\n",
+                context->us->getName(),
+                getIOMessageString(context->messageType),
+                clientID ? clientID->getCStringNoCopy() : "");
+            if (clientID) clientID->release();
+        }
         return;
     }
 
-    // Lazily create app clients array.
+    // Create client array (for tracking purposes) only if the service
+    // has app clients. Usually only root domain does.
     if (0 == context->notifyClients)
-    {
         context->notifyClients = OSArray::withCapacity( 32 );
+
+    msgType  = context->messageType;
+    msgIndex = context->responseArray->getCount();
+    msgRef   = ((context->serialNumber & 0xFFFF) << 16) + (msgIndex & 0xFFFF);
+
+    OUR_PMLog(kPMLogAppNotify, msgType, msgRef);
+    if (kIOLogDebugPower & gIOKitDebug)
+    {
+        // Log client pid/name and client array index.
+        OSString * clientID = 0;
+        context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+        PM_LOG("%s MESG App(%u) %s, %s\n",
+            context->us->getName(),
+            msgIndex, getIOMessageString(msgType),
+            clientID ? clientID->getCStringNoCopy() : "");
+        if (clientID) clientID->release();
     }
 
-    if (context->filterFunc && !context->filterFunc(object, arg))
+#if LOG_APP_RESPONSE_TIMES
+    OSNumber * num;
+    clock_get_uptime(&now);
+    num = OSNumber::withNumber(AbsoluteTime_to_scalar(&now), sizeof(uint64_t) * 8);
+    if (num)
     {
-        // ack - needed to match the counter index at logAppTimeouts().
-        context->responseFlags->setObject(context->counter, kOSBooleanTrue);
-        if (context->notifyClients)
-            context->notifyClients->setObject(context->counter, kOSBooleanTrue);
+        context->responseArray->setObject(msgIndex, num);
+        num->release();
     }
     else
+#endif
+    context->responseArray->setObject(msgIndex, kOSBooleanFalse);
+
+    if (context->notifyClients)
+        context->notifyClients->setObject(msgIndex, object);
+
+    context->us->messageClient(msgType, object, (void *) msgRef);
+}
+
+//*********************************************************************************
+// [static private] pmTellClientWithResponse
+//
+// We send a message to an in-kernel client, and we expect a response,
+// so we compute a cookie we can identify the response with.
+//*********************************************************************************
+
+void IOService::pmTellClientWithResponse ( OSObject * object, void * arg )
+{
+    IOPowerStateChangeNotification  notify;
+    IOPMInterestContext *           context = (IOPMInterestContext *) arg;
+    OSObject *                      replied = kOSBooleanTrue;
+    _IOServiceInterestNotifier *    notifier;
+    uint32_t                        msgIndex, msgRef, msgType;
+    IOReturn                        retCode;
+
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, 0, 0))
     {
-        refcon = ((context->serialNumber & 0xFFFF)<<16)
-               + (context->counter & 0xFFFF);
-        OUR_PMLog(kPMLogAppNotify, context->msgType, refcon);
+        if ((kIOLogDebugPower & gIOKitDebug) &&
+            (OSDynamicCast(_IOServiceInterestNotifier, object)))
+        {
+            _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object;
+            PM_LOG("%s DROP Client %s, notifier %p, handler %p\n",
+                context->us->getName(),
+                getIOMessageString(context->messageType),
+                object, n->handler);
+		}        
+        return;
+    }
 
-        if (gIOKitDebug & kIOLogDebugPower)
+    notifier = OSDynamicCast(_IOServiceInterestNotifier, object);
+    msgType  = context->messageType;
+    msgIndex = context->responseArray->getCount();
+    msgRef   = ((context->serialNumber & 0xFFFF) << 16) + (msgIndex & 0xFFFF);
+
+    IOServicePM * pwrMgt = context->us->pwrMgt;
+    if (gIOKitDebug & kIOLogPower) {
+		OUR_PMLog(kPMLogClientNotify, msgRef, msgType);
+		if (OSDynamicCast(IOService, object)) {
+			const char *who = ((IOService *) object)->getName();
+			gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0);
+		}
+        else if (notifier) {
+			OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0);
+        }
+    }
+    if ((kIOLogDebugPower & gIOKitDebug) && notifier)
+    {
+        PM_LOG("%s MESG Client %s, notifier %p, handler %p\n",
+            context->us->getName(),
+            getIOMessageString(msgType),
+            object, notifier->handler);
+    }
+
+    notify.powerRef    = (void *)(uintptr_t) msgRef;
+    notify.returnValue = 0;
+    notify.stateNumber = context->stateNumber;
+    notify.stateFlags  = context->stateFlags;
+
+    if (context->enableTracing && (notifier != 0))
+    {
+        uint32_t detail = ((msgIndex & 0xff) << 24) |
+                          ((msgType & 0xfff) << 12) |
+                          (((uintptr_t) notifier->handler) & 0xfff);
+        getPMRootDomain()->traceDetail( detail );
+    }
+
+    retCode = context->us->messageClient(msgType, object, (void *) &notify);
+    if ( kIOReturnSuccess == retCode )
+    {
+        if ( 0 == notify.returnValue )
         {
-            // Log client pid/name and associated index.
-            OSString * clientID = 0;
-            context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
-            PM_DEBUG("[Notify %u] message 0x%x to %s\n",
-                (uint32_t) context->counter,
-                context->msgType,
-                clientID ? clientID->getCStringNoCopy() : "");
-            if (clientID) clientID->release();
+            // client doesn't want time to respond
+			OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object);
+        }
+        else
+        {
+            replied = kOSBooleanFalse;
+            if ( notify.returnValue > context->maxTimeRequested )
+            {
+                if (notify.returnValue > kPriorityClientMaxWait)
+                {
+                    context->maxTimeRequested = kPriorityClientMaxWait;
+                    PM_ERROR("%s: client %p returned %llu for %s\n",
+                        context->us->getName(),
+                        notifier ? (void *)  notifier->handler : object,
+                        (uint64_t) notify.returnValue,
+                        getIOMessageString(msgType));
+                }
+                else
+                    context->maxTimeRequested = notify.returnValue;
+            }
         }
+    }
+    else
+    {
+        // not a client of ours
+        // so we won't be waiting for response
+		OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0);
+    }
+
+    context->responseArray->setObject(msgIndex, replied);
+}
+
+//*********************************************************************************
+// [static private] pmTellCapabilityAppWithResponse
+//*********************************************************************************
 
+void IOService::pmTellCapabilityAppWithResponse ( OSObject * object, void * arg )
+{
+    IOPMSystemCapabilityChangeParameters msgArg;
+    IOPMInterestContext *       context = (IOPMInterestContext *) arg;
+    OSObject *                  replied = kOSBooleanTrue;
+    IOServicePM *               pwrMgt = context->us->pwrMgt;
+    uint32_t                    msgIndex, msgRef, msgType;
+#if LOG_APP_RESPONSE_TIMES
+    AbsoluteTime                now;
+#endif
+
+    if (!OSDynamicCast(_IOServiceInterestNotifier, object))
+        return;
+
+    memset(&msgArg, 0, sizeof(msgArg));
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, &msgArg, &replied))
+    {
+        return;
+    }
+
+    // Create client array (for tracking purposes) only if the service
+    // has app clients. Usually only root domain does.
+    if (0 == context->notifyClients)
+        context->notifyClients = OSArray::withCapacity( 32 );
+
+    msgType  = context->messageType;
+    msgIndex = context->responseArray->getCount();
+    msgRef   = ((context->serialNumber & 0xFFFF) << 16) + (msgIndex & 0xFFFF);
+
+    OUR_PMLog(kPMLogAppNotify, msgType, msgRef);
+    if (kIOLogDebugPower & gIOKitDebug)
+    {
+        // Log client pid/name and client array index.
+        OSString * clientID = 0;
+        context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+        PM_LOG("%s MESG App(%u) %s, wait %u, %s\n",
+            context->us->getName(),
+            msgIndex, getIOMessageString(msgType),
+            (replied != kOSBooleanTrue),
+            clientID ? clientID->getCStringNoCopy() : "");
+        if (clientID) clientID->release();
+    }
+
+    msgArg.notifyRef = msgRef;
+    msgArg.maxWaitForReply = 0;
+
+    if (replied == kOSBooleanTrue)
+    {
+        msgArg.notifyRef = 0;
+        context->responseArray->setObject(msgIndex, kOSBooleanTrue);
+        if (context->notifyClients)
+            context->notifyClients->setObject(msgIndex, kOSBooleanTrue);
+    }
+    else
+    {
 #if LOG_APP_RESPONSE_TIMES
         OSNumber * num;
         clock_get_uptime(&now);
         num = OSNumber::withNumber(AbsoluteTime_to_scalar(&now), sizeof(uint64_t) * 8);
         if (num)
         {
-            context->responseFlags->setObject(context->counter, num);
+            context->responseArray->setObject(msgIndex, num);
             num->release();
         }
         else
 #endif
-        context->responseFlags->setObject(context->counter, kOSBooleanFalse);
-        
-        if (context->notifyClients)
-            context->notifyClients->setObject(context->counter, object);
+        context->responseArray->setObject(msgIndex, kOSBooleanFalse);
 
-        context->us->messageClient(context->msgType, object, (void *)refcon);
-        if ( context->maxTimeRequested < k30seconds )
-        {
-            context->maxTimeRequested = k30seconds;
-        }
+        if (context->notifyClients)
+            context->notifyClients->setObject(msgIndex, object);
     }
 
-    context->counter++;
+    context->us->messageClient(msgType, object, (void *) &msgArg, sizeof(msgArg));
 }
 
 //*********************************************************************************
-// [static private] pmTellClientWithResponse
-//
-// We send a message to an in-kernel client, and we expect a response, so we compute a
-// cookie we can identify the response with.
-// If it doesn't understand the notification (it is not power-management savvy)
-// we won't wait for it to prepare for sleep.  If it tells us via a return code
-// in the passed struct that it is currently ready, we won't wait for it to prepare.
-// If it tells us via the return code in the struct that it does need time, we will chill.
+// [static private] pmTellCapabilityClientWithResponse
 //*********************************************************************************
 
-void IOService::pmTellClientWithResponse ( OSObject * object, void * arg )
+void IOService::pmTellCapabilityClientWithResponse(
+    OSObject * object, void * arg )
 {
+    IOPMSystemCapabilityChangeParameters msgArg;
     IOPMInterestContext *           context = (IOPMInterestContext *) arg;
-    IOPowerStateChangeNotification  notify;
-    UInt32                          refcon;
+    OSObject *                      replied = kOSBooleanTrue;
+    _IOServiceInterestNotifier *    notifier;
+    uint32_t                        msgIndex, msgRef, msgType;
     IOReturn                        retCode;
-    OSObject *                      theFlag;
 
-    if (context->filterFunc && !context->filterFunc(object, arg))
+    memset(&msgArg, 0, sizeof(msgArg));
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, &msgArg, 0))
+    {
+        if ((kIOLogDebugPower & gIOKitDebug) &&
+            (OSDynamicCast(_IOServiceInterestNotifier, object)))
+        {
+            _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object;
+            PM_LOG("%s DROP Client %s, notifier %p, handler %p\n",
+                context->us->getName(),
+                getIOMessageString(context->messageType),
+                object, n->handler);
+		}        
         return;
+    }
 
-    refcon = ((context->serialNumber & 0xFFFF)<<16) + (context->counter & 0xFFFF);
-    context->responseFlags->setObject(context->counter, kOSBooleanFalse);
+    notifier = OSDynamicCast(_IOServiceInterestNotifier, object);
+    msgType  = context->messageType;
+    msgIndex = context->responseArray->getCount();
+    msgRef   = ((context->serialNumber & 0xFFFF) << 16) + (msgIndex & 0xFFFF);
 
     IOServicePM * pwrMgt = context->us->pwrMgt;
     if (gIOKitDebug & kIOLogPower) {
-		OUR_PMLog(kPMLogClientNotify, refcon, (UInt32) context->msgType);
+		OUR_PMLog(kPMLogClientNotify, msgRef, msgType);
 		if (OSDynamicCast(IOService, object)) {
 			const char *who = ((IOService *) object)->getName();
-			gPlatform->PMLog(who,
-				kPMLogClientNotify, * (UInt32 *) object, (UInt64) object);
-		} else if (OSDynamicCast(_IOServiceInterestNotifier, object)) {
-			_IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object;
-			OUR_PMLog(kPMLogClientNotify, (UInt64) n->handler, 0);
+			gPlatform->PMLog(who, kPMLogClientNotify, (uintptr_t) object, 0);
+		}
+        else if (notifier) {
+			OUR_PMLog(kPMLogClientNotify, (uintptr_t) notifier->handler, 0);
 		}
     }
+    if ((kIOLogDebugPower & gIOKitDebug) && notifier)
+    {
+        PM_LOG("%s MESG Client %s, notifier %p, handler %p\n",
+            context->us->getName(),
+            getIOMessageString(msgType),
+            object, notifier->handler);
+    }
 
-    notify.powerRef = (void *)refcon;
-    notify.returnValue = 0;
-    notify.stateNumber = context->stateNumber;
-    notify.stateFlags = context->stateFlags;
-    retCode = context->us->messageClient(context->msgType,object,(void *)&notify);
-    if ( retCode == kIOReturnSuccess )
+    msgArg.notifyRef = msgRef;
+    msgArg.maxWaitForReply = 0;
+
+    if (context->enableTracing && (notifier != 0))
+    {
+        uint32_t detail = ((msgIndex & 0xff) << 24) |
+                          ((msgType & 0xfff) << 12) |
+                          (((uintptr_t) notifier->handler) & 0xfff);
+        getPMRootDomain()->traceDetail( detail );
+    }
+
+    retCode = context->us->messageClient(
+        msgType, object, (void *) &msgArg, sizeof(msgArg));
+
+    if ( kIOReturnSuccess == retCode )
     {
-        if ( notify.returnValue == 0 )
+        if ( 0 == msgArg.maxWaitForReply )
         {
             // client doesn't want time to respond
-            context->responseFlags->replaceObject(context->counter, kOSBooleanTrue);
-			OUR_PMLog(kPMLogClientAcknowledge, refcon, (UInt64) object);
-        } else {
-            // it does want time, and it hasn't responded yet
-            theFlag = context->responseFlags->getObject(context->counter);
-            if ( kOSBooleanTrue != theFlag ) 
+			OUR_PMLog(kPMLogClientAcknowledge, msgRef, (uintptr_t) object);
+        }
+        else
+        {
+            replied = kOSBooleanFalse;
+            if ( msgArg.maxWaitForReply > context->maxTimeRequested )
             {
-                // so note its time requirement
-                if ( context->maxTimeRequested < notify.returnValue ) 
+                if (msgArg.maxWaitForReply > kCapabilityClientMaxWait)
                 {
-                    context->maxTimeRequested = notify.returnValue;
+                    context->maxTimeRequested = kCapabilityClientMaxWait;
+                    PM_ERROR("%s: client %p returned %u for %s\n",
+                        context->us->getName(),
+                        notifier ? (void *) notifier->handler : object,
+                        msgArg.maxWaitForReply,
+                        getIOMessageString(msgType));
                 }
+                else
+                    context->maxTimeRequested = msgArg.maxWaitForReply;
             }
         }
-    } else {
-		OUR_PMLog(kPMLogClientAcknowledge, refcon, 0);
+    }
+    else
+    {
         // not a client of ours
         // so we won't be waiting for response
-        context->responseFlags->replaceObject(context->counter, kOSBooleanTrue);
+		OUR_PMLog(kPMLogClientAcknowledge, msgRef, 0);
     }
-    context->counter++;
+
+    context->responseArray->setObject(msgIndex, replied);
 }
 
 //*********************************************************************************
@@ -4974,23 +5662,29 @@ void IOService::tellChangeUp ( unsigned long )
 //*********************************************************************************
 
 void IOService::tellClients ( int messageType )
-{
-    tellClients( messageType, 0 );
-}
-
-void IOService::tellClients ( int messageType, IOPMMessageFilter filter )
 {
     IOPMInterestContext     context;
 
-    context.msgType     = messageType;
-    context.us          = this;
-    context.stateNumber = fHeadNotePowerState;
-    context.stateFlags  = fHeadNotePowerArrayEntry->capabilityFlags;
-    context.filterFunc  = filter;
-
+    RD_LOG("tellClients( %s )\n", getIOMessageString(messageType));
+
+    memset(&context, 0, sizeof(context));
+    context.messageType   = messageType;
+    context.isPreChange   = fIsPreChange;
+    context.us            = this;
+    context.stateNumber   = fHeadNotePowerState;
+    context.stateFlags    = fHeadNotePowerArrayEntry->capabilityFlags;
+    context.changeFlags   = fHeadNoteChangeFlags;
+    context.messageFilter = (IS_ROOT_DOMAIN) ?
+                            OSMemberFunctionCast(
+                                IOPMMessageFilter,
+                                this,
+                                &IOPMrootDomain::systemMessageFilter) : 0;
+
+    context.notifyType    = kNotifyPriority;
     applyToInterested( gIOPriorityPowerStateInterest,
         tellKernelClientApplier, (void *) &context );
-    
+
+    context.notifyType    = kNotifyApps;
     applyToInterested( gIOAppPowerStateInterest,
         tellAppClientApplier, (void *) &context );
 
@@ -5006,18 +5700,40 @@ void IOService::tellClients ( int messageType, IOPMMessageFilter filter )
 
 static void tellKernelClientApplier ( OSObject * object, void * arg )
 {
-    IOPMInterestContext *           context = (IOPMInterestContext *) arg;
     IOPowerStateChangeNotification	notify;
+    IOPMInterestContext *           context = (IOPMInterestContext *) arg;
 
-    if (context->filterFunc && !context->filterFunc(object, arg))
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, 0, 0))
+    {
+        if ((kIOLogDebugPower & gIOKitDebug) &&
+            (OSDynamicCast(_IOServiceInterestNotifier, object)))
+        {
+            _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object;
+            PM_LOG("%s DROP Client %s, notifier %p, handler %p\n",
+                context->us->getName(),
+                IOService::getIOMessageString(context->messageType),
+                object, n->handler);
+		}
         return;
+    }
 
     notify.powerRef     = (void *) 0;
     notify.returnValue	= 0;
     notify.stateNumber	= context->stateNumber;
     notify.stateFlags	= context->stateFlags;
 
-    context->us->messageClient(context->msgType, object, &notify);
+    context->us->messageClient(context->messageType, object, &notify);
+
+    if ((kIOLogDebugPower & gIOKitDebug) &&
+        (OSDynamicCast(_IOServiceInterestNotifier, object)))
+    {
+        _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object;
+        PM_LOG("%s MESG Client %s, notifier %p, handler %p\n",
+            context->us->getName(),
+            IOService::getIOMessageString(context->messageType),
+            object, n->handler);
+    }
 }
 
 //*********************************************************************************
@@ -5028,12 +5744,38 @@ static void tellKernelClientApplier ( OSObject * object, void * arg )
 
 static void tellAppClientApplier ( OSObject * object, void * arg )
 {
-    IOPMInterestContext *   context = (IOPMInterestContext *) arg;
+    IOPMInterestContext * context = (IOPMInterestContext *) arg;
 
-    if (context->filterFunc && !context->filterFunc(object, arg))
+    if (context->messageFilter &&
+        !context->messageFilter(context->us, object, context, 0, 0))
+    {
+        if (kIOLogDebugPower & gIOKitDebug)
+        {
+            // Log client pid/name and client array index.
+            OSString * clientID = 0;
+            context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+            PM_LOG("%s DROP App %s, %s\n",
+                context->us->getName(),
+                IOService::getIOMessageString(context->messageType),
+                clientID ? clientID->getCStringNoCopy() : "");
+            if (clientID) clientID->release();
+        }
         return;
+    }
+
+    if (kIOLogDebugPower & gIOKitDebug)
+    {
+        // Log client pid/name and client array index.
+        OSString * clientID = 0;
+        context->us->messageClient(kIOMessageCopyClientID, object, &clientID);
+        PM_LOG("%s MESG App %s, %s\n",
+            context->us->getName(),
+            IOService::getIOMessageString(context->messageType),
+            clientID ? clientID->getCStringNoCopy() : "");
+        if (clientID) clientID->release();
+    }
 
-    context->us->messageClient(context->msgType, object, 0);
+    context->us->messageClient(context->messageType, object, 0);
 }
 
 //*********************************************************************************
@@ -5069,12 +5811,11 @@ bool IOService::checkForDone ( void )
 // [public] responseValid
 //*********************************************************************************
 
-bool IOService::responseValid ( unsigned long x, int pid )
+bool IOService::responseValid ( uint32_t refcon, int pid )
 {
     UInt16			serialComponent;
     UInt16			ordinalComponent;
     OSObject *		theFlag;
-    unsigned long	refcon = (unsigned long) x;
 
     serialComponent  = (refcon >> 16) & 0xFFFF;
     ordinalComponent = (refcon & 0xFFFF);
@@ -5102,18 +5843,37 @@ bool IOService::responseValid ( unsigned long x, int pid )
 #if LOG_APP_RESPONSE_TIMES
         AbsoluteTime	now;
         AbsoluteTime	start;
-        uint64_t	nsec;
+        uint64_t        nsec;
+        OSString        *name = IOCopyLogNameForPID(pid);
 
         clock_get_uptime(&now);
         AbsoluteTime_to_scalar(&start) = num->unsigned64BitValue();
         SUB_ABSOLUTETIME(&now, &start);
         absolutetime_to_nanoseconds(now, &nsec);
+		
+        PMEventDetails *details = PMEventDetails::eventDetails(
+                                    kIOPMEventTypeAppResponse,				// type
+                                    name ? name->getCStringNoCopy() : "",   // who
+                                    (uintptr_t)pid,							// owner unique
+                                    NULL,									// interest name
+                                    0,										// old
+                                    0,										// new
+                                    0,										// result
+                                    NS_TO_US(nsec));						// usec completion time
+		
+        getPMRootDomain()->recordAndReleasePMEventGated( details );
+
+        if (kIOLogDebugPower & gIOKitDebug)
+        {
+            PM_LOG("Ack(%u) %u ms\n",
+                (uint32_t) ordinalComponent,
+                NS_TO_MS(nsec));
+        }
 
         // > 100 ms
         if (nsec > LOG_APP_RESPONSE_TIMES)
         {
-            OSString * name = IOCopyLogNameForPID(pid);
-            PM_DEBUG("PM response took %d ms (%s)\n", NS_TO_MS(nsec),
+            PM_LOG("PM response took %d ms (%s)\n", NS_TO_MS(nsec),
                 name ? name->getCStringNoCopy() : "");
 
             if (nsec > LOG_APP_RESPONSE_MSG_TRACER)
@@ -5123,22 +5883,17 @@ bool IOService::responseValid ( unsigned long x, int pid )
                     gIOPMStatsApplicationResponseSlow, 
                     name ? name->getCStringNoCopy() : "", 0,
                     NS_TO_MS(nsec), pid);
-            }
-            
-            if (name)
-            name->release();
+            }            
         }
+
+        if (name)
+            name->release();
 #endif
         theFlag = kOSBooleanFalse;
     }
 
     if ( kOSBooleanFalse == theFlag ) 
     {
-        if ((gIOKitDebug & kIOLogDebugPower) &&
-            (fOutOfBandParameter == kNotifyApps))
-        {
-            PM_DEBUG("[Notify %u] acked\n", (uint32_t) ordinalComponent);
-        }
         fResponseArray->replaceObject(ordinalComponent, kOSBooleanTrue);
     }
     
@@ -5151,9 +5906,6 @@ bool IOService::responseValid ( unsigned long x, int pid )
 // Our power state is about to lower, and we have notified applications
 // and kernel clients, and one of them has acknowledged.  If this is the last to do
 // so, and all acknowledgements are positive, we continue with the power change.
-//
-// We serialize this processing with timer expiration with a command gate on the
-// power management workloop, which the timer expiration is command gated to as well.
 //*********************************************************************************
 
 IOReturn IOService::allowPowerChange ( unsigned long refcon )
@@ -5172,7 +5924,7 @@ IOReturn IOService::allowPowerChange ( unsigned long refcon )
 
 	request->fArg0 = (void *) refcon;
 	request->fArg1 = (void *) proc_selfpid();
-    request->fArg2 = (void *) 0;
+	request->fArg2 = (void *) 0;
 	submitPMRequest( request );
 
 	return kIOReturnSuccess;
@@ -5192,9 +5944,6 @@ IOReturn IOService::serializedAllowPowerChange2 ( unsigned long refcon )
 // Our power state is about to lower, and we have notified applications
 // and kernel clients, and one of them has vetoed the change.  If this is the last
 // client to respond, we abandon the power change.
-//
-// We serialize this processing with timer expiration with a command gate on the
-// power management workloop, which the timer expiration is command gated to as well.
 //*********************************************************************************
 
 IOReturn IOService::cancelPowerChange ( unsigned long refcon )
@@ -5255,6 +6004,9 @@ void IOService::clampPowerOn ( unsigned long duration )
 }
 #endif /* !__LP64__ */
 
+// MARK: -
+// MARK: Driver Overrides
+
 //*********************************************************************************
 // [public] setPowerState
 //
@@ -5285,8 +6037,8 @@ unsigned long IOService::maxCapabilityForDomainState ( IOPMPowerFlags domainStat
    }
    for ( i = fNumberOfPowerStates - 1; i >= 0; i-- )
    {
-       if ( (domainState & fPowerStates[i].inputPowerRequirement) ==
-			               fPowerStates[i].inputPowerRequirement )
+       if ( (domainState & fPowerStates[i].inputPowerFlags) ==
+			               fPowerStates[i].inputPowerFlags )
        {
            return i;
        }
@@ -5312,8 +6064,8 @@ unsigned long IOService::initialPowerStateForDomainState ( IOPMPowerFlags domain
     }
     for ( i = fNumberOfPowerStates - 1; i >= 0; i-- )
     {
-        if ( (domainState & fPowerStates[i].inputPowerRequirement) ==
-			fPowerStates[i].inputPowerRequirement )
+        if ( (domainState & fPowerStates[i].inputPowerFlags) ==
+			fPowerStates[i].inputPowerFlags )
         {
             return i;
         }
@@ -5339,8 +6091,8 @@ unsigned long IOService::powerStateForDomainState ( IOPMPowerFlags domainState )
     }
     for ( i = fNumberOfPowerStates - 1; i >= 0; i-- )
     {
-        if ( (domainState & fPowerStates[i].inputPowerRequirement) ==
-			fPowerStates[i].inputPowerRequirement )
+        if ( (domainState & fPowerStates[i].inputPowerFlags) ==
+			fPowerStates[i].inputPowerFlags )
         {
             return i;
         }
@@ -5420,6 +6172,9 @@ void IOService::systemWillShutdown( IOOptionBits specifier )
 		rootDomain->acknowledgeSystemWillShutdown( this );
 }
 
+// MARK: -
+// MARK: PM State Machine
+
 //*********************************************************************************
 // [private static] acquirePMRequest
 //*********************************************************************************
@@ -5473,7 +6228,7 @@ void IOService::submitPMRequest( IOPMRequest * request )
 	assert( gIOPMReplyQueue );
 	assert( gIOPMRequestQueue );
 
-	PM_TRACE("[+ %02lx] %p [%p %s] %p %p %p\n",
+	PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n",
 		(long)request->getType(), request,
 		request->getTarget(), request->getTarget()->getName(),
 		request->fArg0, request->fArg1, request->fArg2);
@@ -5493,7 +6248,7 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count )
 	for (IOItemCount i = 0; i < count; i++)
 	{
 		IOPMRequest * req = requests[i];
-		PM_TRACE("[+ %02lx] %p [%p %s] %p %p %p\n",
+		PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n",
 			(long)req->getType(), req,
 			req->getTarget(), req->getTarget()->getName(),
 			req->fArg0, req->fArg1, req->fArg2);
@@ -5504,66 +6259,39 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count )
 
 //*********************************************************************************
 // [private] servicePMRequestQueue
+//
+// Called from IOPMRequestQueue::checkForWork().
 //*********************************************************************************
 
 bool IOService::servicePMRequestQueue(
 	IOPMRequest *		request,
 	IOPMRequestQueue *	queue )
 {
-	// Calling PM methods without PMinit() is not allowed, fail the requests.
-
-	if (!initialized)
-	{
-		PM_DEBUG("%s: PM not initialized\n", getName());
-        goto done;
-	}
-
-	// Create an IOPMWorkQueue on demand, when the initial PM request is
-	// received.
+    bool more;
 
-	if (!fPMWorkQueue)
-	{
-		// Allocate and attach an IOPMWorkQueue on demand to avoid taking
-		// the work loop lock in PMinit(), which may deadlock with certain
-		// drivers / families.
-
-		fPMWorkQueue = IOPMWorkQueue::create(
-			/* target */	this,
-			/* Work */		OSMemberFunctionCast(IOPMWorkQueue::Action, this,
-								&IOService::servicePMRequest),
-			/* Done */		OSMemberFunctionCast(IOPMWorkQueue::Action, this,
-								&IOService::retirePMRequest)
-			);
-
-		if (fPMWorkQueue &&
-			(gIOPMWorkLoop->addEventSource(fPMWorkQueue) != kIOReturnSuccess))
-		{
-			PM_ERROR("%s: add PM work queue failed\n", getName());
-			fPMWorkQueue->release();
-			fPMWorkQueue = 0;
-		}
+    if (initialized)
+    {
+        // Work queue will immediately execute the queue'd request if possible.
+        // If execution blocks, the work queue will wait for a producer signal.
+        // Only need to signal more when completing attached requests.
 
-		if (!fPMWorkQueue)
-		{
-			PM_ERROR("%s: no PM work queue (type %02lx)\n",
-				getName(), (long)request->getType());
-			goto done;
-		}
-	}
+        more = gIOPMWorkQueue->queuePMRequest(request, pwrMgt);
+        return more;
+    }
 
-	fPMWorkQueue->queuePMRequest(request);
-	return false;	// do not signal more
+    // Calling PM without PMinit() is not allowed, fail the request.
 
-done:
+    PM_LOG("%s: PM not initialized\n", getName());
 	fAdjustPowerScheduled = false;
-	gIOPMFreeQueue->queuePMRequest(request);
-	return false;	// do not signal more
+	more = gIOPMFreeQueue->queuePMRequest(request);
+    if (more) gIOPMWorkQueue->incrementProducerCount();
+	return more;
 }
 
 //*********************************************************************************
 // [private] servicePMFreeQueue
 //
-// Called by the request completion to recycle a completed request.
+// Called from IOPMCompletionQueue::checkForWork().
 //*********************************************************************************
 
 bool IOService::servicePMFreeQueue(
@@ -5575,26 +6303,8 @@ bool IOService::servicePMFreeQueue(
 
     if (root && (root != request))
         more = true;
-
-    if (fLockedFlags.PMStop && fPMWorkQueue && fPMWorkQueue->isEmpty())
-    {
-        // Driver PMstop'ed and the work queue is empty.
-        // Detach and destroy the work queue to avoid the similar cleanup by
-        // PMfree(), which is deadlock prone. After PMstop() if driver calls PM,
-        // or a request from power parent or child arrives, it is possible to
-        // create/cleanup work queue more than once. Should be rare.
-
-        gIOPMWorkLoop->removeEventSource(fPMWorkQueue);
-        fPMWorkQueue->release();
-        fPMWorkQueue = 0;
-
-        if ( fIdleTimerEventSource != NULL ) {
-            fIdleTimerEventSource->disable();
-            gIOPMWorkLoop->removeEventSource(fIdleTimerEventSource);
-            fIdleTimerEventSource->release();
-            fIdleTimerEventSource = NULL;
-        }		
-    }
+    if (more)
+        gIOPMWorkQueue->incrementProducerCount();
 
 	releasePMRequest( request );
 	return more;
@@ -5610,14 +6320,14 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 {
 	assert(request && queue);
 
-	PM_TRACE("[- %02x] %p [%p %s] State %d, Busy %d\n",
+	PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n",
 		request->getType(), request, this, getName(),
 		fMachineState, gIOPMBusyCount);
 
 	// Catch requests created by idleTimerExpired().
 
 	if ((request->getType() == kIOPMRequestTypeActivityTickle) &&
-		(request->fArg1 == (void *) false))
+	    (request->fArg1 == (void *) (uintptr_t) false))
 	{
 		// Idle timer power drop request completed.
 		// Restart the idle timer if deviceDesire can go lower, otherwise set
@@ -5633,8 +6343,10 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 			fIdleTimerStopped = true;
 	}
 
-	gIOPMFreeQueue->queuePMRequest( request );
-	return true;
+    // If the request is linked, then Work queue has already incremented its
+    // producer count.
+
+	return (gIOPMFreeQueue->queuePMRequest( request ));
 }
 
 //*********************************************************************************
@@ -5656,7 +6368,8 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count )
             // 5 = kDriverCallInformPreChange
             // 6 = kDriverCallInformPostChange
             // 7 = kDriverCallSetPowerState
-			if (fLockedFlags.DriverCallBusy) reason = 5 + fDriverCallReason;
+			if (fDriverCallBusy)
+                reason = 5 + fDriverCallReason;
 			break;
 		}
 
@@ -5691,7 +6404,7 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count )
 	{
 		if (count)
 		{
-			PM_TRACE("[B %02x] %p [%p %s] State %d, Reason %d\n",
+			PM_LOG1("[B %02x] %p [%p %s] state %d, reason %d\n",
 				request->getType(), request, this, getName(),
 				fMachineState, reason);
 		}
@@ -5717,10 +6430,11 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 
 	while (isPMBlocked(request, loop++) == false)
 	{
-		PM_TRACE("[W %02x] %p [%p %s] State %d\n",
+		PM_LOG1("[W %02x] %p [%p %s] state %d\n",
 			request->getType(), request, this, getName(), fMachineState);
 
 		gIOPMRequest = request;
+        gIOPMWorkCount++;
 
 		// Every PM machine states must be handled in one of the cases below.
 
@@ -5731,40 +6445,87 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 				break;
 
 			case kIOPM_OurChangeTellClientsPowerDown:
-				// our change, was it vetoed?
+                // Root domain might self cancel due to assertions.
+                if (IS_ROOT_DOMAIN)
+                {
+                    bool cancel = (bool) fDoNotPowerDown;
+                    getPMRootDomain()->askChangeDownDone(
+                        &fHeadNoteChangeFlags, &cancel);
+                    fDoNotPowerDown = cancel;
+                }
+
+                // askChangeDown() done, was it vetoed?
 				if (!fDoNotPowerDown)
 				{
+                    if (IS_ROOT_DOMAIN) {
+                        PMEventDetails *details = PMEventDetails::eventDetails(
+                                                    kIOPMEventTypeAppNotificationsFinished,
+                                                    NULL,
+                                                    0,
+                                                    0);
+						
+						getPMRootDomain()->recordAndReleasePMEventGated( details );
+                    }
+
 					// no, we can continue
 					OurChangeTellClientsPowerDown();
 				}
 				else
 				{
+                    if (IS_ROOT_DOMAIN) {
+                        PMEventDetails *details = PMEventDetails::eventDetails(
+                                                    kIOPMEventTypeSleepDone,
+                                                    NULL,
+                                                    1, /* reason: 1 == Ask clients succeeded */
+                                                    kIOReturnAborted); /* result */
+			  
+                        getPMRootDomain()->recordAndReleasePMEventGated( details );
+                    }
+
 					OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
 					PM_ERROR("%s: idle cancel\n", fName);
 					// yes, rescind the warning
 					tellNoChangeDown(fHeadNotePowerState);
 					// mark the change note un-actioned
-					fHeadNoteFlags |= kIOPMNotDone;
+					fHeadNoteChangeFlags |= kIOPMNotDone;
 					// and we're done
-					all_done();
+					OurChangeFinish();
 				}
 				break;
 
 			case kIOPM_OurChangeTellPriorityClientsPowerDown:
-				// our change, should it be acted on still?
+				// tellChangeDown(kNotifyApps) done, was it cancelled?
 				if (fDoNotPowerDown)
 				{
+                    if (IS_ROOT_DOMAIN) {
+						PMEventDetails *details = PMEventDetails::eventDetails(
+                                                    kIOPMEventTypeSleepDone,
+                                                    NULL,
+                                                    2, /* reason: 2 == Client cancelled wake */
+                                                    kIOReturnAborted); /* result */
+						
+						getPMRootDomain()->recordAndReleasePMEventGated( details );
+                    }
 					OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
 					PM_ERROR("%s: idle revert\n", fName);
 					// no, tell clients we're back in the old state
 					tellChangeUp(fCurrentPowerState);
 					// mark the change note un-actioned
-					fHeadNoteFlags |= kIOPMNotDone;
+					fHeadNoteChangeFlags |= kIOPMNotDone;
 					// and we're done
-					all_done();
+					OurChangeFinish();
 				}
 				else
 				{
+                    if (IS_ROOT_DOMAIN) {
+						PMEventDetails *details = PMEventDetails::eventDetails(
+                                                    kIOPMEventTypeAppNotificationsFinished,
+                                                    NULL,
+                                                    2, /* reason: 2 == TellPriorityClientsDone */																	
+                                                    kIOReturnSuccess); /* result */
+						
+						getPMRootDomain()->recordAndReleasePMEventGated( details );
+                    }
 					// yes, we can continue
 					OurChangeTellPriorityClientsPowerDown();  
 				}
@@ -5786,44 +6547,40 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 				OurChangeNotifyInterestedDriversDidChange();
 				break;
 
+            case kIOPM_OurChangeTellCapabilityDidChange:
+                OurChangeTellCapabilityDidChange();
+                break;
+
 			case kIOPM_OurChangeFinish:
 				OurChangeFinish();
 				break;
 
-			case kIOPM_ParentDownTellPriorityClientsPowerDown:
-				ParentDownTellPriorityClientsPowerDown();
+			case kIOPM_ParentChangeTellPriorityClientsPowerDown:
+				ParentChangeTellPriorityClientsPowerDown();
 				break;
 
-			case kIOPM_ParentDownNotifyInterestedDriversWillChange:
-				ParentDownNotifyInterestedDriversWillChange();
+			case kIOPM_ParentChangeNotifyInterestedDriversWillChange:
+				ParentChangeNotifyInterestedDriversWillChange();
 				break;
 
-			case kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange:
-				ParentDownNotifyDidChangeAndAcknowledgeChange();
+			case kIOPM_ParentChangeSetPowerState:
+				ParentChangeSetPowerState();
 				break;
 
-			case kIOPM_ParentDownSetPowerState:
-				ParentDownSetPowerState();	
+			case kIOPM_ParentChangeWaitForPowerSettle:
+				ParentChangeWaitForPowerSettle();
 				break;
 
-			case kIOPM_ParentDownWaitForPowerSettle:
-				ParentDownWaitForPowerSettle();
+			case kIOPM_ParentChangeNotifyInterestedDriversDidChange:
+				ParentChangeNotifyInterestedDriversDidChange();
 				break;
 
-			case kIOPM_ParentAcknowledgePowerChange:
-				ParentAcknowledgePowerChange();
-				break;
-
-			case kIOPM_ParentUpSetPowerState:
-				ParentUpSetPowerState();
-				break;
-
-			case kIOPM_ParentUpWaitForSettleTime:
-				ParentUpWaitForSettleTime();
-				break;
+            case kIOPM_ParentChangeTellCapabilityDidChange:
+                ParentChangeTellCapabilityDidChange();
+                break;
 
-			case kIOPM_ParentUpNotifyInterestedDriversDidChange:
-				ParentUpNotifyInterestedDriversDidChange();
+			case kIOPM_ParentChangeAcknowledgePowerChange:
+				ParentChangeAcknowledgePowerChange();
 				break;
 
 			case kIOPM_DriverThreadCallDone:
@@ -5833,21 +6590,116 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 					notifyInterestedDriversDone();
 				break;
 
-			case kIOPM_NotifyChildrenDone:
-				notifyChildrenDone();
+			case kIOPM_NotifyChildrenOrdered:
+				notifyChildrenOrdered();
+				break;
+
+			case kIOPM_NotifyChildrenDelayed:
+				notifyChildrenDelayed();
+				break;
+
+            case kIOPM_NotifyChildrenStart:
+                PM_LOG2("%s: kIOPM_NotifyChildrenStart done\n", getName());
+                MS_POP();   // from notifyInterestedDriversDone()
+                notifyChildren();
+                break;
+
+            case kIOPM_SyncTellClientsPowerDown:
+                // Root domain might self cancel due to assertions.
+                if (IS_ROOT_DOMAIN)
+                {
+                    bool cancel = (bool) fDoNotPowerDown;
+                    getPMRootDomain()->askChangeDownDone(
+                        &fHeadNoteChangeFlags, &cancel);
+                    fDoNotPowerDown = cancel;
+                }                
+				if (!fDoNotPowerDown)
+				{
+                    fMachineState = kIOPM_SyncTellPriorityClientsPowerDown;
+                    fOutOfBandParameter = kNotifyApps;
+                    tellChangeDown(fHeadNotePowerState);
+				}
+				else
+				{
+					OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+					PM_ERROR("%s: idle cancel\n", fName);
+					tellNoChangeDown(fHeadNotePowerState);
+					fHeadNoteChangeFlags |= kIOPMNotDone;
+					OurChangeFinish();
+				}
+                break;
+
+            case kIOPM_SyncTellPriorityClientsPowerDown:
+				if (!fDoNotPowerDown)
+				{
+                    fMachineState = kIOPM_SyncNotifyWillChange;
+                    fOutOfBandParameter = kNotifyPriority;
+                    tellChangeDown(fHeadNotePowerState);
+                }
+                else
+                {
+					OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+					PM_ERROR("%s: idle revert\n", fName);
+					tellChangeUp(fCurrentPowerState);
+					fHeadNoteChangeFlags |= kIOPMNotDone;
+					OurChangeFinish();
+				}
 				break;
 
+            case kIOPM_SyncNotifyWillChange:
+                if (kIOPMSyncNoChildNotify & fHeadNoteChangeFlags)
+                {
+                    fMachineState = kIOPM_SyncFinish;
+                    continue;
+                }
+                fMachineState     = kIOPM_SyncNotifyDidChange;
+                fDriverCallReason = kDriverCallInformPreChange;
+                notifyChildren();
+                break;
+
             case kIOPM_SyncNotifyDidChange:
-                fMachineState     = kIOPM_SyncFinish;
+                fIsPreChange = false;
+
+                if (fHeadNoteChangeFlags & kIOPMParentInitiated)
+                    fMachineState = kIOPM_SyncFinish;
+                else
+                    fMachineState = kIOPM_SyncTellCapabilityDidChange;
+
                 fDriverCallReason = kDriverCallInformPostChange;
                 notifyChildren();
                 break;
 
+            case kIOPM_SyncTellCapabilityDidChange:
+                tellSystemCapabilityChange( kIOPM_SyncFinish );
+                break;
+
             case kIOPM_SyncFinish:
-                if (fHeadNoteFlags & kIOPMParentInitiated)
-                    ParentAcknowledgePowerChange();
+                if (fHeadNoteChangeFlags & kIOPMParentInitiated)
+                    ParentChangeAcknowledgePowerChange();
                 else
                     OurChangeFinish();
+                break;
+
+            case kIOPM_TellCapabilityChangeDone:
+                if (fIsPreChange)
+                {
+                    if (fOutOfBandParameter == kNotifyCapabilityChangePriority)
+                    {
+                        MS_POP();   // tellSystemCapabilityChange()
+                        continue;
+                    }
+                    fOutOfBandParameter = kNotifyCapabilityChangePriority;
+                }
+                else
+                {
+                    if (fOutOfBandParameter == kNotifyCapabilityChangeApps)
+                    {
+                        MS_POP();   // tellSystemCapabilityChange()
+                        continue;
+                    }
+                    fOutOfBandParameter = kNotifyCapabilityChangeApps;
+                }
+                tellClientsWithResponse( fOutOfBandMessage );
                 break;
 
 			default:
@@ -5859,8 +6711,6 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 
 		if (fMachineState == kIOPM_Finished)
 		{
-			//PM_TRACE("[%s] PM   End: Request %p (type %02lx)\n",
-			//	getName(), request, request->getType());
 			done = true;
 			break;
 		}
@@ -5910,6 +6760,7 @@ void IOService::executePMRequest( IOPMRequest * request )
 			break;
 
 		case kIOPMRequestTypePowerDomainDidChange:
+            
 			handlePowerDomainDidChangeTo( request );
 			break;
 
@@ -5933,29 +6784,10 @@ void IOService::executePMRequest( IOPMRequest * request )
 
         case kIOPMRequestTypeSetIdleTimerPeriod:
             {
-                IOWorkLoop * wl  = gIOPMWorkLoop;
                 fIdleTimerPeriod = (uintptr_t) request->fArg0;
 
-                if (wl && (false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0))
+                if ((false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0))
                 {
-                    if ( NULL == fIdleTimerEventSource )
-                    {
-                        IOTimerEventSource * timerSrc;
-
-                        timerSrc = IOTimerEventSource::timerEventSource(
-                            this,
-                            OSMemberFunctionCast(IOTimerEventSource::Action,
-                                this, &IOService::idleTimerExpired));
-                        
-                        if (timerSrc && (wl->addEventSource(timerSrc) != kIOReturnSuccess))
-                        {
-                            timerSrc->release();
-                            timerSrc = 0;
-                        }
-
-                        fIdleTimerEventSource = timerSrc;
-                    }
-
                     fActivityTickleCount = 0;
                     clock_get_uptime(&fIdleTimerStartTime);
                     start_PM_idle_timer();
@@ -5979,7 +6811,7 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
 	assert( request && queue );
 	assert( request->isReplyType() );
 
-	PM_TRACE("[A %02x] %p [%p %s] State %d\n",
+	PM_LOG1("[A %02x] %p [%p %s] state %d\n",
 		request->getType(), request, this, getName(), fMachineState);
 
 	switch ( request->getType() )
@@ -5987,32 +6819,30 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
 		case kIOPMRequestTypeAllowPowerChange:
 		case kIOPMRequestTypeCancelPowerChange:
 			// Check if we are expecting this response.
-			if (responseValid((unsigned long) request->fArg0, (int)(long) request->fArg1))
+			if (responseValid((uint32_t)(uintptr_t) request->fArg0,
+                              (int)(uintptr_t) request->fArg1))
 			{
 				if (kIOPMRequestTypeCancelPowerChange == request->getType())
                 {
-                    OSString * name = (OSString *) request->fArg2;
-                    getPMRootDomain()->pmStatsRecordApplicationResponse(
-                        gIOPMStatsApplicationResponseCancel,
-                        name ? name->getCStringNoCopy() : "", 0,
-                        0, (int)(uintptr_t) request->fArg1);
+                    // Clients are not allowed to cancel when kIOPMSkipAskPowerDown
+                    // flag is set. Only root domain will set this flag.
+
+                    if ((fHeadNoteChangeFlags & kIOPMSkipAskPowerDown) == 0)
+                    {
+                        fDoNotPowerDown = true;
 
-					fDoNotPowerDown = true;
+                        OSString * name = (OSString *) request->fArg2;
+                        getPMRootDomain()->pmStatsRecordApplicationResponse(
+                            gIOPMStatsApplicationResponseCancel,
+                            name ? name->getCStringNoCopy() : "", 0,
+                            0, (int)(uintptr_t) request->fArg1);
+                    }
                 }
 
 				if (checkForDone())
 				{
 					stop_ack_timer();
-					if ( fResponseArray )
-					{
-						fResponseArray->release();
-						fResponseArray = NULL;
-					}
-                    if ( fNotifyClientArray )
-                    {
-                        fNotifyClientArray->release();
-                        fNotifyClientArray = NULL;
-                    }
+                    cleanClientResponses(false);
 					more = true;
 				}
 			}
@@ -6045,8 +6875,20 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
 #if LOG_SETPOWER_TIMES
                 uint64_t nsec = computeTimeDeltaNS(&fDriverCallStartTime);
                 if (nsec > LOG_SETPOWER_TIMES)
-                    PM_DEBUG("%s::setPowerState(%p, %lu -> %lu) async took %d ms\n",
+                    PM_LOG("%s::setPowerState(%p, %lu -> %lu) async took %d ms\n",
                         fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec));
+				
+				PMEventDetails *details = PMEventDetails::eventDetails(
+                                            kIOPMEventTypeSetPowerStateDelayed,		// type
+                                            fName,									// who
+                                            (uintptr_t)this,						// owner unique
+                                            NULL,									// interest name
+                                            (uint8_t)getPowerState(),				// old
+                                            (uint8_t)fHeadNotePowerState,			// new
+                                            0,										// result
+                                            NS_TO_US(nsec));						// usec completion time
+				
+				getPMRootDomain()->recordAndReleasePMEventGated( details );
 #endif
 				OUR_PMLog(kPMLogDriverAcknowledgeSet, (uintptr_t) this, fDriverTimer);
 				fDriverTimer = 0;
@@ -6066,68 +6908,187 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
 
 		case kIOPMRequestTypeIdleCancel:
 			if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) 
-			 || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown))
+             || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) 
+             || (fMachineState == kIOPM_SyncTellClientsPowerDown)
+             || (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown))
 			{
-				OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, 0);
+				OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+                PM_LOG2("%s: cancel from machine state %d\n",
+                    getName(), fMachineState);
 				fDoNotPowerDown = true;
-				if (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)
+                // Stop waiting for app replys.
+				if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) ||
+                    (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown))
 					cleanClientResponses(false);
 				more = true;
 			}
 			break;
 
+        case kIOPMRequestTypeChildNotifyDelayCancel:
+            if (fMachineState == kIOPM_NotifyChildrenDelayed)
+            {
+                PM_LOG2("%s: delay notify cancelled\n", getName());                
+                notifyChildrenDelayed();
+            }
+            break;
+
 		default:
 			panic("servicePMReplyQueue: unknown reply type %x",
                 request->getType());
 	}
 
-	releasePMRequest( request );
+	more |= gIOPMFreeQueue->queuePMRequest(request);
+    if (more)
+        gIOPMWorkQueue->incrementProducerCount();
+
 	return more;
 }
 
 //*********************************************************************************
-// [private] assertPMThreadCall / deassertPMThreadCall
+// [private] assertPMDriverCall / deassertPMDriverCall
 //*********************************************************************************
 
-bool IOService::assertPMThreadCall( void )
+bool IOService::assertPMDriverCall(
+    IOPMDriverCallEntry *   entry,
+    IOOptionBits            options,
+    IOPMinformee *          inform )
 {
+    IOService * target = 0;
+    bool        ok = false;
+
     if (!initialized)
         return false;
 
-    // PMfree() should only be called from IOService::free().
-    // That makes it safe to touch IOServicePM state here.
-    // Caller holds a retain and has checked target is on PM plane.
-
     PM_LOCK();
+
     if (fLockedFlags.PMStop)
     {
-        // PMstop() already issued - fail the assertion.
-        PM_UNLOCK();
-        return false;
+        goto fail;
+    }
+    
+    if (((options & kIOPMADC_NoInactiveCheck) == 0) && isInactive())
+    {
+        goto fail;
+    }
+
+    if (inform)
+    {
+        if (!inform->active)
+        {
+            goto fail;
+        }
+        target = inform->whatObject;
+        if (target->isInactive())
+        {
+            goto fail;
+        }
     }
 
-    // Increment assertion count to block PMstop(), and return true.
-    fThreadAssertionCount++;
-    fThreadAssertionThread = current_thread();  // only 1 caller
+    entry->thread = current_thread();
+    entry->target = target;
+    queue_enter(&fPMDriverCallQueue, entry, IOPMDriverCallEntry *, link);
+    ok = true;
+
+fail:
     PM_UNLOCK();
 
-    return true;
+    return ok;
 }
 
-void IOService::deassertPMThreadCall( void )
+void IOService::deassertPMDriverCall( IOPMDriverCallEntry * entry )
 {
+    bool wakeup = false;
+
     PM_LOCK();
-    assert(fThreadAssertionCount > 0);
-    if (fThreadAssertionCount)
-        fThreadAssertionCount--;
-    if (current_thread() == fThreadAssertionThread)
-        fThreadAssertionThread = 0;
-    if ((fThreadAssertionCount == 0) && fLockedFlags.PMStop)
+
+    assert( !queue_empty(&fPMDriverCallQueue) );
+    queue_remove(&fPMDriverCallQueue, entry, IOPMDriverCallEntry *, link);
+    if (fLockedFlags.PMDriverCallWait)
     {
-        // PMstop() is blocked waiting for assertion count to drop to zero.
-        PM_LOCK_WAKEUP(&fThreadAssertionCount);
+        wakeup = true;
     }
+
     PM_UNLOCK();
+
+    if (wakeup)
+        PM_LOCK_WAKEUP(&fPMDriverCallQueue);
+}
+
+void IOService::waitForPMDriverCall( IOService * target )
+{
+    const IOPMDriverCallEntry * entry;
+    thread_t                    thread = current_thread();
+    AbsoluteTime                deadline;
+    int                         waitResult;
+    bool                        log = true;
+    bool                        wait;
+
+    do {
+        wait = false;
+        queue_iterate(&fPMDriverCallQueue, entry, const IOPMDriverCallEntry *, link)
+        {
+            // Target of interested driver call
+            if (target && (target != entry->target))
+                continue;
+
+            if (entry->thread == thread)
+            {
+                if (log)
+                {
+                    PM_LOG("%s: %s(%s) on PM thread\n",
+                        fName, __FUNCTION__, target ? target->getName() : "");
+                    OSReportWithBacktrace("%s: %s(%s) on PM thread\n",
+                        fName, __FUNCTION__, target ? target->getName() : "");
+                    log = false;
+                }
+                continue;
+            }
+            
+            wait = true;
+            break;
+        }
+
+        if (wait)
+        {
+            fLockedFlags.PMDriverCallWait = true;
+            clock_interval_to_deadline(15, kSecondScale, &deadline);
+            waitResult = PM_LOCK_SLEEP(&fPMDriverCallQueue, deadline);
+            fLockedFlags.PMDriverCallWait = false;
+            if (THREAD_TIMED_OUT == waitResult)
+            {
+                PM_ERROR("%s: waitForPMDriverCall timeout\n", fName);
+                wait = false;
+            }
+        }
+    } while (wait);
+}
+
+//*********************************************************************************
+// [private] Debug helpers
+//*********************************************************************************
+
+const char * IOService::getIOMessageString( uint32_t msg )
+{
+#define MSG_ENTRY(x)    {x, #x}
+
+    static const IONamedValue msgNames[] = { 
+        MSG_ENTRY( kIOMessageCanDevicePowerOff      ),
+        MSG_ENTRY( kIOMessageDeviceWillPowerOff     ),
+        MSG_ENTRY( kIOMessageDeviceWillNotPowerOff  ),
+        MSG_ENTRY( kIOMessageDeviceHasPoweredOn     ),
+        MSG_ENTRY( kIOMessageCanSystemPowerOff      ),
+        MSG_ENTRY( kIOMessageSystemWillPowerOff     ),
+        MSG_ENTRY( kIOMessageSystemWillNotPowerOff  ),
+        MSG_ENTRY( kIOMessageCanSystemSleep         ),
+        MSG_ENTRY( kIOMessageSystemWillSleep        ),
+        MSG_ENTRY( kIOMessageSystemWillNotSleep     ),
+        MSG_ENTRY( kIOMessageSystemHasPoweredOn     ),
+        MSG_ENTRY( kIOMessageSystemWillRestart      ),
+        MSG_ENTRY( kIOMessageSystemWillPowerOn      ),
+        MSG_ENTRY( kIOMessageSystemCapabilityChange )
+    };
+
+    return IOFindNameForValue(msg, msgNames);
 }
 
 // MARK: -
@@ -6189,8 +7150,10 @@ void IOPMRequest::reset( void )
 	}	
 }
 
-void IOPMRequest::attachNextRequest( IOPMRequest * next )
+bool IOPMRequest::attachNextRequest( IOPMRequest * next )
 {
+    bool ok = false;
+
     if (!fRequestNext)
     {
         // Postpone the execution of the next request after
@@ -6204,11 +7167,15 @@ void IOPMRequest::attachNextRequest( IOPMRequest * next )
             (uint32_t) fRequestNext->fWorkWaitCount,
             fTarget->getName());
 #endif
+        ok = true;
     }
+    return ok;
 }
 
-void IOPMRequest::detachNextRequest( void )
+bool IOPMRequest::detachNextRequest( void )
 {
+    bool ok = false;
+
     if (fRequestNext)
     {
         assert(fRequestNext->fWorkWaitCount);
@@ -6222,11 +7189,15 @@ void IOPMRequest::detachNextRequest( void )
             fTarget->getName());
 #endif
         fRequestNext = 0;
+        ok = true;
     }
+    return ok;
 }
 
-void IOPMRequest::attachRootRequest( IOPMRequest * root )
+bool IOPMRequest::attachRootRequest( IOPMRequest * root )
 {
+    bool ok = false;
+
     if (!fRequestRoot)
     {
         // Delay the completion of the root request after
@@ -6240,11 +7211,15 @@ void IOPMRequest::attachRootRequest( IOPMRequest * root )
             (uint32_t) fRequestRoot->fFreeWaitCount,
             fTarget->getName());
 #endif
+        ok = true;
     }
+    return ok;
 }
 
-void IOPMRequest::detachRootRequest( void )
+bool IOPMRequest::detachRootRequest( void )
 {
+    bool ok = false;
+
     if (fRequestRoot)
     {
         assert(fRequestRoot->fFreeWaitCount);
@@ -6258,7 +7233,9 @@ void IOPMRequest::detachRootRequest( void )
             fTarget->getName());
 #endif
         fRequestRoot = 0;
+        ok = true;
     }
+    return ok;
 }
 
 // MARK: -
@@ -6267,8 +7244,7 @@ void IOPMRequest::detachRootRequest( void )
 //*********************************************************************************
 // IOPMRequestQueue Class
 //
-// Global queues. As PM-aware drivers load and unload, their IOPMWorkQueue's are
-// created and deallocated. IOPMRequestQueue are created once and never released.
+// Global queues. Queues are created once and never released.
 //*********************************************************************************
 
 OSDefineMetaClassAndStructors( IOPMRequestQueue, IOEventSource );
@@ -6353,19 +7329,13 @@ bool IOPMRequestQueue::checkForWork( void )
 	return more;
 }
 
-void IOPMRequestQueue::signalWorkAvailable( void )
-{
-	IOEventSource::signalWorkAvailable();
-}
-
 // MARK: -
 // MARK: IOPMWorkQueue
 
 //*********************************************************************************
 // IOPMWorkQueue Class
 //
-// Every object in the power plane that has handled a PM request, will have an
-// instance of IOPMWorkQueue allocated for it.
+// Queue of IOServicePM objects with busy IOPMRequest(s).
 //*********************************************************************************
 
 OSDefineMetaClassAndStructors( IOPMWorkQueue, IOEventSource );
@@ -6390,43 +7360,160 @@ bool IOPMWorkQueue::init( IOService * inOwner, Action work, Action retire )
 
 	queue_init(&fWorkQueue);
 
-	fWorkAction   = work;
-	fRetireAction = retire;
+	fWorkAction    = work;
+	fRetireAction  = retire;
+    fConsumerCount = fProducerCount = 0;
 
 	return true;
 }
 
-void IOPMWorkQueue::queuePMRequest( IOPMRequest * request )
+bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt )
 {
+    bool more = false;
+    bool empty;
+
 	assert( request );
+    assert( pwrMgt );
 	assert( onThread() );
+    assert( queue_next(&request->fCommandChain) ==
+            queue_prev(&request->fCommandChain) );
 
 	gIOPMBusyCount++;
-	queue_enter(&fWorkQueue, request, IOPMRequest *, fCommandChain);
-	checkForWork();
+
+    // Add new request to the tail of the per-service request queue.
+    // Then immediately check the request queue to minimize latency
+    // if the queue was empty.
+
+    empty = queue_empty(&pwrMgt->RequestHead);
+	queue_enter(&pwrMgt->RequestHead, request, IOPMRequest *, fCommandChain);
+    if (empty)
+    {
+        more = checkRequestQueue(&pwrMgt->RequestHead, &empty);
+        if (!empty)
+        {
+            // New Request is blocked, add IOServicePM to work queue.
+            assert( queue_next(&pwrMgt->WorkChain) ==
+                    queue_prev(&pwrMgt->WorkChain) );
+
+            queue_enter(&fWorkQueue, pwrMgt, IOServicePM *, WorkChain);
+            fQueueLength++;
+            PM_LOG3("IOPMWorkQueue: [%u] added %s@%p to queue\n",
+                fQueueLength, pwrMgt->Name, pwrMgt);
+        }
+    }
+
+    return more;
 }
 
-bool IOPMWorkQueue::checkForWork( void )
+bool IOPMWorkQueue::checkRequestQueue( queue_head_t * queue, bool * empty )
 {
 	IOPMRequest *	request;
-	IOService *		target = (IOService *) owner;
-	bool			done;
+	IOService *		target;
+    bool            more = false;
+	bool			done = false;
 
-	while (!queue_empty(&fWorkQueue))
-	{
-		request = (IOPMRequest *) queue_first(&fWorkQueue);
-		assert(request->getTarget() == target);
-		if (request->isWorkBlocked()) break;
-		done = (*fWorkAction)( target, request, this );
-		if (!done) break;
-
-		assert(gIOPMBusyCount > 0);
-		if (gIOPMBusyCount) gIOPMBusyCount--;
-		queue_remove_first(&fWorkQueue, request, IOPMRequest *, fCommandChain);
-		(*fRetireAction)( target, request, this );
-	}
+    assert(!queue_empty(queue));
+    do {
+		request = (IOPMRequest *) queue_first(queue);
+		if (request->isWorkBlocked())
+            break;  // cannot start, blocked on attached request
 
-	return false;
+		target = request->getTarget();
+        done = (*fWorkAction)( target, request, this );
+		if (!done)
+            break;  // work started, blocked on PM state machine
+
+        assert(gIOPMBusyCount > 0);
+		if (gIOPMBusyCount)
+            gIOPMBusyCount--;
+
+        queue_remove_first(queue, request, IOPMRequest *, fCommandChain);
+        more |= (*fRetireAction)( target, request, this );
+        done = queue_empty(queue);
+    } while (!done);
+
+    *empty = done;
+
+    if (more)
+    {
+        // Retired request blocks another request, since the
+        // blocked request may reside in the work queue, we
+        // must bump the producer count to avoid work stall.
+        fProducerCount++;
+    }
+
+    return more;
+}
+
+bool IOPMWorkQueue::checkForWork( void )
+{
+	IOServicePM *   entry;
+	IOServicePM *   next;
+    bool			more = false;
+    bool            empty;
+
+#if WORK_QUEUE_STATS
+    fStatCheckForWork++;
+#endif
+
+    // Each producer signal triggers a full iteration over
+    // all IOServicePM entries in the work queue.
+
+    while (fConsumerCount != fProducerCount)
+    {
+        PM_LOG3("IOPMWorkQueue: checkForWork %u %u\n",
+            fProducerCount, fConsumerCount);
+
+        fConsumerCount = fProducerCount;
+
+#if WORK_QUEUE_STATS        
+        if (queue_empty(&fWorkQueue))
+        {
+            fStatQueueEmpty++;
+            break;
+        }
+        fStatScanEntries++;
+        uint32_t cachedWorkCount = gIOPMWorkCount;
+#endif
+
+        entry = (IOServicePM *) queue_first(&fWorkQueue);
+        while (!queue_end(&fWorkQueue, (queue_entry_t) entry))
+        {
+            more |= checkRequestQueue(&entry->RequestHead, &empty);
+
+            // Get next entry, points to head if current entry is last.
+            next = (IOServicePM *) queue_next(&entry->WorkChain);
+
+            // if request queue is empty, remove IOServicePM from queue.
+            if (empty)
+            {
+                assert(fQueueLength);
+                if (fQueueLength) fQueueLength--;
+                PM_LOG3("IOPMWorkQueue: [%u] removed %s@%p from queue\n",
+                    fQueueLength, entry->Name, entry);
+                queue_remove(&fWorkQueue, entry, IOServicePM *, WorkChain);
+            }
+            entry = next;
+        }
+
+#if WORK_QUEUE_STATS
+        if (cachedWorkCount == gIOPMWorkCount)
+            fStatNoWorkDone++;
+#endif
+    }
+
+    return more;
+}
+
+void IOPMWorkQueue::signalWorkAvailable( void )
+{
+    fProducerCount++;
+	IOEventSource::signalWorkAvailable();
+}
+
+void IOPMWorkQueue::incrementProducerCount( void )
+{
+    fProducerCount++;
 }
 
 // MARK: -
@@ -6438,7 +7525,8 @@ bool IOPMWorkQueue::checkForWork( void )
 
 OSDefineMetaClassAndStructors( IOPMCompletionQueue, IOEventSource );
 
-IOPMCompletionQueue * IOPMCompletionQueue::create( IOService * inOwner, Action inAction )
+IOPMCompletionQueue *
+IOPMCompletionQueue::create( IOService * inOwner, Action inAction )
 {
 	IOPMCompletionQueue * me = OSTypeAlloc(IOPMCompletionQueue);
 	if (me && !me->init(inOwner, inAction))
@@ -6458,39 +7546,40 @@ bool IOPMCompletionQueue::init( IOService * inOwner, Action inAction )
 	return true;
 }
 
-void IOPMCompletionQueue::queuePMRequest( IOPMRequest * request )
+bool IOPMCompletionQueue::queuePMRequest( IOPMRequest * request )
 {
+    bool more;
+
 	assert(request);
-    request->detachNextRequest();   // unblocks next request
+    // unblock dependent request
+    more = request->detachNextRequest();
 	queue_enter(&fQueue, request, IOPMRequest *, fCommandChain);
-	if (workLoop) signalWorkAvailable();
+    return more;
 }
 
 bool IOPMCompletionQueue::checkForWork( void )
 {
     Action			dqAction = (Action) action;
 	IOPMRequest *	request;
+	IOPMRequest *   next;
 	IOService *		target;
 	bool			more = false;
-	queue_head_t	tmpQueue;
-
-	queue_init(&tmpQueue);
 
-	while (!queue_empty(&fQueue))
-	{
-		queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain );
-		if (request->isFreeBlocked())
-		{
-			queue_enter(&tmpQueue, request, IOPMRequest *, fCommandChain);
-			continue;
-		}
-		target = request->getTarget();
-		assert(target);
-		more |= (*dqAction)( target, request, this );
-	}
+    request = (IOPMRequest *) queue_first(&fQueue);
+    while (!queue_end(&fQueue, (queue_entry_t) request))
+    {
+        next = (IOPMRequest *) queue_next(&request->fCommandChain);
+		if (!request->isFreeBlocked())
+        {
+            queue_remove(&fQueue, request, IOPMRequest *, fCommandChain);
+            target = request->getTarget();
+            assert(target);
+            more |= (*dqAction)( target, request, this );
+        }
+        request = next;
+    }
 
-	queue_new_head(&tmpQueue, &fQueue, IOPMRequest *, fCommandChain);
-	return more;
+    return more;
 }
 
 // MARK: -
@@ -6519,11 +7608,16 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s  )
 {
 	OSDictionary *	dict;
 	bool			ok = false;
-	int				dictSize = 4;
+	int				dictSize = 5;
 
 	if (IdleTimerPeriod)
 		dictSize += 4;
 
+#if WORK_QUEUE_STATS
+    if (gIOPMRootNode == ControllingDriver)
+        dictSize += 4;
+#endif
+
     if (PowerClients)
         dict = OSDictionary::withDictionary(
             PowerClients, PowerClients->getCount() + dictSize);
@@ -6533,11 +7627,13 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s  )
 	if (dict)
 	{
         setPMProperty(dict, "CurrentPowerState", CurrentPowerState);
+        if (NumberOfPowerStates)
+            setPMProperty(dict, "MaxPowerState", NumberOfPowerStates-1);
         if (DesiredPowerState != CurrentPowerState)
             setPMProperty(dict, "DesiredPowerState", DesiredPowerState);
         if (kIOPM_Finished != MachineState)
             setPMProperty(dict, "MachineState", MachineState);
-        if (DeviceOverrides)
+        if (DeviceOverrideEnabled)
             dict->setObject("PowerOverrideOn", kOSBooleanTrue);
 
 		if (IdleTimerPeriod)
@@ -6560,7 +7656,7 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s  )
                 delta = now;
                 SUB_ABSOLUTETIME(&delta, &DeviceActiveTimestamp);
                 absolutetime_to_nanoseconds(delta, &nsecs);
-                setPMProperty(dict, "TimeSinceActivityTickle", NS_TO_MS(nsecs));
+                setPMProperty(dict, "TimeSinceLastTickle", NS_TO_MS(nsecs));
             }
 
             if (AbsoluteTime_to_scalar(&IdleTimerStartTime))
@@ -6573,6 +7669,20 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s  )
             }
 		}
 
+#if WORK_QUEUE_STATS
+        if (gIOPMRootNode == Owner)
+        {
+            setPMProperty(dict, "WQ-CheckForWork",
+                gIOPMWorkQueue->fStatCheckForWork);
+            setPMProperty(dict, "WQ-ScanEntries",
+                gIOPMWorkQueue->fStatScanEntries);
+            setPMProperty(dict, "WQ-QueueEmpty",
+                gIOPMWorkQueue->fStatQueueEmpty);
+            setPMProperty(dict, "WQ-NoWorkDone",
+                gIOPMWorkQueue->fStatNoWorkDone);
+        }
+#endif
+
 		ok = dict->serialize(s);
 		dict->release();
 	}
@@ -6593,3 +7703,53 @@ bool IOServicePM::serialize( OSSerialize * s ) const
 
     return (kIOReturnSuccess == ret);
 }
+
+PMEventDetails* PMEventDetails::eventDetails(uint32_t   type,
+                                             const char *ownerName,
+                                             uintptr_t  ownerUnique,
+                                             const char *interestName,
+                                             uint8_t    oldState,
+                                             uint8_t    newState,
+                                             uint32_t   result,
+                                             uint32_t   elapsedTimeUS) {
+	
+	PMEventDetails *myself;
+	myself  = new PMEventDetails;
+	
+	if(myself) {
+		myself->eventType     = type;
+		myself->ownerName     = ownerName;
+		myself->ownerUnique   = ownerUnique;
+		myself->interestName  = interestName;
+		myself->oldState      = oldState;
+		myself->newState      = newState;
+		myself->result        = result;
+		myself->elapsedTimeUS = elapsedTimeUS;
+		
+		myself->eventClassifier = kIOPMEventClassDriverEvent;
+	}
+	
+	return myself;
+}
+
+
+PMEventDetails* PMEventDetails::eventDetails(uint32_t   type,
+                                             const char *uuid,
+                                             uint32_t   reason,
+                                             uint32_t   result) {
+	
+	PMEventDetails *myself;
+	myself  = new PMEventDetails;
+	
+	if(myself) {
+		myself->eventType     = type;
+		myself->uuid          = uuid;
+		myself->reason        = reason;
+		myself->result        = result;
+		
+		myself->eventClassifier = kIOPMEventClassSystemEvent;
+	}
+	
+	return myself;
+}
+
diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h
index 818285f8e..96e5bfacc 100644
--- a/iokit/Kernel/IOServicePMPrivate.h
+++ b/iokit/Kernel/IOServicePMPrivate.h
@@ -29,17 +29,163 @@
 #ifndef _IOKIT_IOSERVICEPMPRIVATE_H
 #define _IOKIT_IOSERVICEPMPRIVATE_H
 
-/*! @class IOServicePM
-    @abstract Power management class.
-*/
+#include <IOKit/IOCommand.h>
+#include <IOKit/IOEventSource.h>
+
+//******************************************************************************
+// PM command types
+//******************************************************************************
+
+enum {
+    /* Command Types */
+    kIOPMRequestTypeInvalid                     = 0x00,
+    kIOPMRequestTypePMStop                      = 0x01,
+    kIOPMRequestTypeAddPowerChild1              = 0x02,
+    kIOPMRequestTypeAddPowerChild2              = 0x03,
+    kIOPMRequestTypeAddPowerChild3              = 0x04,
+    kIOPMRequestTypeRegisterPowerDriver         = 0x05,
+    kIOPMRequestTypeAdjustPowerState            = 0x06,
+    kIOPMRequestTypePowerDomainWillChange       = 0x07,
+    kIOPMRequestTypePowerDomainDidChange        = 0x08,
+    kIOPMRequestTypePowerOverrideOnPriv         = 0x09,
+    kIOPMRequestTypePowerOverrideOffPriv        = 0x0A,
+    kIOPMRequestTypeActivityTickle              = 0x0B,
+    kIOPMRequestTypeRequestPowerState           = 0x0C,
+    kIOPMRequestTypeSynchronizePowerTree        = 0x0D,
+    kIOPMRequestTypeRequestPowerStateOverride   = 0x0E,
+    kIOPMRequestTypeSetIdleTimerPeriod          = 0x0F,
+    
+    /* Reply Types */
+    kIOPMRequestTypeReplyStart                  = 0x80,
+    kIOPMRequestTypeAckPowerChange              = 0x81,
+    kIOPMRequestTypeAckSetPowerState            = 0x82,
+    kIOPMRequestTypeAllowPowerChange            = 0x83,
+    kIOPMRequestTypeCancelPowerChange           = 0x84,
+    kIOPMRequestTypeInterestChanged             = 0x85,
+    kIOPMRequestTypeIdleCancel                  = 0x86,
+    kIOPMRequestTypeChildNotifyDelayCancel      = 0x87
+};
+
+//******************************************************************************
+// PM actions - For root domain only
+//******************************************************************************
+
+struct IOPMActions;
+
+typedef void
+(*IOPMActionPowerChangeStart)(
+    void *          target,
+    IOService *     service,
+    IOPMActions *   actions, 
+    uint32_t        powerState,
+    uint32_t *      changeFlags );
+
+typedef void
+(*IOPMActionPowerChangeDone)(
+    void *          target,
+    IOService *     service,
+    IOPMActions *   actions, 
+    uint32_t        powerState,
+    uint32_t        changeFlags );
+
+typedef void
+(*IOPMActionPowerChangeOverride)(
+    void *          target,
+    IOService *     service,
+    IOPMActions *   actions, 
+    unsigned long * powerState,
+    uint32_t *      changeFlags );
+
+typedef void
+(*IOPMActionActivityTickle)(
+    void *          target,
+    IOService *     service,
+    IOPMActions *   actions );
+
+struct IOPMActions {
+    void *                          target;
+    uint32_t                        parameter;
+    IOPMActionPowerChangeStart      actionPowerChangeStart;
+    IOPMActionPowerChangeDone       actionPowerChangeDone;
+    IOPMActionPowerChangeOverride   actionPowerChangeOverride;
+    IOPMActionActivityTickle        actionActivityTickle;
+};
+
+//******************************************************************************
+
+enum {
+	kIOPMEventClassSystemEvent			= 0x00,
+	kIOPMEventClassDriverEvent			= 0x1
+};
+
+class PMEventDetails : public OSObject 
+{
+    OSDeclareDefaultStructors( PMEventDetails );
+    friend class IOServicePM;
+    friend class IOPMrootDomain;
+    friend class IOPMTimeline;
+public:  
+  static PMEventDetails *eventDetails(uint32_t   type,
+                                      const char *ownerName,
+                                      uintptr_t  ownerUnique,
+                                      const char *interestName,
+                                      uint8_t    oldState,
+                                      uint8_t    newState,
+                                      uint32_t   result,
+                                      uint32_t   elapsedTimeUS);
+
+  static PMEventDetails *eventDetails(uint32_t   type,
+                                      const char *uuid,
+                                      uint32_t   reason,
+                                      uint32_t   result);
+private:
+  uint8_t		  eventClassifier;
+  uint32_t        eventType;
+  const char      *ownerName;
+  uintptr_t       ownerUnique;
+  const char      *interestName;
+  uint8_t         oldState;
+  uint8_t         newState;
+  uint32_t        result;
+  uint32_t        elapsedTimeUS;
+  
+  const char      *uuid;
+  uint32_t        reason;
+};
+
+// Internal concise representation of IOPMPowerState
+struct IOPMPSEntry
+{
+    IOPMPowerFlags	capabilityFlags;
+    IOPMPowerFlags	outputPowerFlags;
+    IOPMPowerFlags	inputPowerFlags;
+    uint32_t        staticPower;
+    uint32_t        settleUpTime;
+    uint32_t        settleDownTime;
+};
+
+//******************************************************************************
+// IOServicePM
+//******************************************************************************
+
 class IOServicePM : public OSObject
 {
     friend class IOService;
+    friend class IOPMWorkQueue;
 
     OSDeclareDefaultStructors( IOServicePM )
 
 private:
-    // List of interested drivers.
+    // Link IOServicePM objects on IOPMWorkQueue.
+    queue_chain_t           WorkChain;
+    
+    // Queue of IOPMRequest objects.
+    queue_head_t            RequestHead;
+
+    // IOService creator and owner.
+    IOService *             Owner;
+
+    // List of interested drivers (protected by PMLock).
     IOPMinformeeList *      InterestedDrivers;
 
     // How long to wait for controlling driver to acknowledge.
@@ -50,21 +196,22 @@ private:
 
     thread_call_t           AckTimer;
     thread_call_t           SettleTimer;
+    thread_call_t           IdleTimer;
 
     // Settle time after changing power state.
-    unsigned long           SettleTimeUS;
+    uint32_t                SettleTimeUS;
 
     // The flags describing current change note.
-    unsigned long           HeadNoteFlags;
+    IOPMPowerChangeFlags    HeadNoteChangeFlags;
 
     // The new power state number being changed to.
-    unsigned long           HeadNotePowerState;
+    IOPMPowerStateIndex     HeadNotePowerState;
 
     // Points to the entry in the power state array.
-    IOPMPowerState *        HeadNotePowerArrayEntry;
+    IOPMPSEntry *           HeadNotePowerArrayEntry;
 
     // Power flags supplied by all parents (domain).
-    unsigned long           HeadNoteDomainFlags;
+    IOPMPowerFlags          HeadNoteDomainFlags;
 
     // Power flags supplied by domain accounting for parent changes.
     IOPMPowerFlags          HeadNoteDomainTargetFlags;
@@ -73,32 +220,26 @@ private:
     IOPowerConnection *     HeadNoteParentConnection;
     
     // Power flags supplied by the changing parent.
-    unsigned long           HeadNoteParentFlags;
+    IOPMPowerFlags          HeadNoteParentFlags;
 
     // Number of acks still outstanding.
-    unsigned long           HeadNotePendingAcks;
+    uint32_t                HeadNotePendingAcks;
 
     // PM state lock.
-    IOLock	*               PMLock;
-
-    // Initialized to true, then set to false after the initial power change.
-    bool                    InitialChange;
-
-    // Ignore children and driver desires if true.
-    bool                    DeviceOverrides;
-
-    // True if device was active since last idle timer expiration.
-    bool                    DeviceActive;
-
-    // Keeps track of any negative responses from notified apps and clients.
-    bool					DoNotPowerDown;
-
-    // True if all our parents know the state of their power domain.
-    bool					ParentsKnowState;
-
-    bool					StrictTreeOrder;
-    bool					IdleTimerStopped;
-	bool					AdjustPowerScheduled;
+    IOLock *                PMLock;
+
+    unsigned int            InitialPowerChange:1;
+    unsigned int            InitialSetPowerState:1;
+    unsigned int            DeviceOverrideEnabled:1;
+    unsigned int            DeviceWasActive:1;
+    unsigned int            DoNotPowerDown:1;
+    unsigned int            ParentsKnowState:1;
+    unsigned int            StrictTreeOrder:1;
+    unsigned int            IdleTimerStopped:1;
+    unsigned int            AdjustPowerScheduled:1;
+    unsigned int            IsPreChange:1;
+    unsigned int            DriverCallBusy:1;
+    unsigned int            PCDFunctionOverride:1;
 
     // Time of last device activity.
     AbsoluteTime            DeviceActiveTimestamp;
@@ -106,105 +247,106 @@ private:
     // Used to protect activity flag.
     IOLock *                ActivityLock;
 
-    // Idle timer event source.
-    IOTimerEventSource *    IdleTimerEventSource;
-
     // Idle timer's period in seconds.
     unsigned long           IdleTimerPeriod;
     unsigned long           IdleTimerMinPowerState;
     AbsoluteTime            IdleTimerStartTime;
 
     // Power state desired by a subclassed device object.
-    unsigned long           DeviceDesire;
+    IOPMPowerStateIndex     DeviceDesire;
 
     // This is the power state we desire currently.
-    unsigned long           DesiredPowerState;
+    IOPMPowerStateIndex     DesiredPowerState;
 
     // This is what our parent thinks our need is.
-    unsigned long           PreviousRequest;
+    IOPMPowerFlags          PreviousRequestPowerFlags;
 
     // Cache result from getName(), used in logging.
     const char *            Name;
 
     // Number of power states in the power array.
-    unsigned long           NumberOfPowerStates;
+    IOPMPowerStateIndex     NumberOfPowerStates;
 
     // Power state array.
-    IOPMPowerState *		PowerStates;
+    IOPMPSEntry *           PowerStates;
 
     // The controlling driver.
-    IOService *				ControllingDriver;
+    IOService *             ControllingDriver;
 
     // Our current power state.
-    unsigned long			CurrentPowerState;
+    IOPMPowerStateIndex     CurrentPowerState;
 
     // Logical OR of power flags for each power domain parent.
-    IOPMPowerFlags			ParentsCurrentPowerFlags;
+    IOPMPowerFlags          ParentsCurrentPowerFlags;
 
     // The highest power state we can achieve in current power domain.
-    unsigned long			MaxCapability;
+    IOPMPowerStateIndex     MaxPowerState;
 
     // Logical OR of all output power character flags in the array.
-    IOPMPowerFlags			OutputPowerCharacterFlags;
+    IOPMPowerFlags          OutputPowerCharacterFlags;
 
     // OSArray which manages responses from notified apps and clients.
-    OSArray *				ResponseArray;
+    OSArray *               ResponseArray;
     OSArray *               NotifyClientArray;
 
     // Used to uniquely identify power management notification to apps and clients.
-    UInt16					SerialNumber;
+    UInt16                  SerialNumber;
 
     // Used to communicate desired function to tellClientsWithResponse().
     // This is used because it avoids changing the signatures of the affected virtual methods.
-    int						OutOfBandParameter;
+    int                     OutOfBandParameter;
 
     AbsoluteTime            DriverCallStartTime;
     IOPMPowerFlags          CurrentCapabilityFlags;
     long                    ActivityTicklePowerState;
     unsigned long           CurrentPowerConsumption;
-    unsigned long           TempClampPowerState;
-    IOPMWorkQueue *			PMWorkQueue;
-    OSSet *					InsertInterestSet;
-    OSSet *					RemoveInterestSet;
-    OSArray *				NotifyChildArray;
+    IOPMPowerStateIndex     TempClampPowerState;
+    OSArray *               NotifyChildArray;
     OSDictionary *          PowerClients;
-    thread_call_t			DriverCallEntry;
-    void *					DriverCallParamPtr;
-    IOItemCount				DriverCallParamCount;
-    IOItemCount				DriverCallParamSlots;
+    thread_call_t           DriverCallEntry;
+    void *                  DriverCallParamPtr;
+    IOItemCount             DriverCallParamCount;
+    IOItemCount             DriverCallParamSlots;
     uint32_t                DriverCallReason;
+    uint32_t                OutOfBandMessage;
     uint32_t                TempClampCount;
     uint32_t                OverrideMaxPowerState;
     uint32_t                ActivityTickleCount;
     uint32_t                WaitReason;
-    uint32_t                NextMachineState;
+    uint32_t                SavedMachineState;
     uint32_t                RootDomainState;
-    uint32_t                ThreadAssertionCount;
 
-    // Protected by PMLock
+    // Protected by PMLock - BEGIN
     struct {
-        uint32_t            DriverCallBusy : 1;
-        uint32_t            PMStop         : 1;
+        uint32_t            PMStop              : 1;
+        uint32_t            PMDriverCallWait    : 1;
     } LockedFlags;
 
-    thread_t                ThreadAssertionThread;
+    queue_head_t            PMDriverCallQueue;
+    OSSet *                 InsertInterestSet;
+    OSSet *                 RemoveInterestSet;
+    // Protected by PMLock - END
 
 #if PM_VARS_SUPPORT
-    IOPMprot *				PMVars;
+    IOPMprot *              PMVars;
 #endif
 
+    IOPMActions             PMActions;
+
     // Serialize IOServicePM state for debug output.
     IOReturn gatedSerialize( OSSerialize * s );
     virtual bool serialize( OSSerialize * s ) const;
 };
 
+#define fOwner                      pwrMgt->Owner
 #define fInterestedDrivers          pwrMgt->InterestedDrivers
 #define fDriverTimer                pwrMgt->DriverTimer
+#define fMachineState               pwrMgt->MachineState
 #define fAckTimer                   pwrMgt->AckTimer
 #define fSettleTimer                pwrMgt->SettleTimer
-#define fMachineState               pwrMgt->MachineState
+#define fIdleTimer                  pwrMgt->IdleTimer
 #define fSettleTimeUS               pwrMgt->SettleTimeUS
-#define fHeadNoteFlags              pwrMgt->HeadNoteFlags
+#define fHeadNoteChangeFlags        pwrMgt->HeadNoteChangeFlags
 #define fHeadNotePowerState         pwrMgt->HeadNotePowerState
 #define fHeadNotePowerArrayEntry    pwrMgt->HeadNotePowerArrayEntry
 #define fHeadNoteDomainFlags        pwrMgt->HeadNoteDomainFlags
@@ -213,63 +355,63 @@ private:
 #define fHeadNoteParentFlags        pwrMgt->HeadNoteParentFlags
 #define fHeadNotePendingAcks        pwrMgt->HeadNotePendingAcks
 #define fPMLock                     pwrMgt->PMLock
-#define fInitialChange              pwrMgt->InitialChange
-#define fDeviceOverrides            pwrMgt->DeviceOverrides
+#define fInitialPowerChange         pwrMgt->InitialPowerChange
+#define fInitialSetPowerState       pwrMgt->InitialSetPowerState
+#define fDeviceOverrideEnabled      pwrMgt->DeviceOverrideEnabled
+#define fDeviceWasActive            pwrMgt->DeviceWasActive
+#define fDoNotPowerDown             pwrMgt->DoNotPowerDown
+#define fParentsKnowState           pwrMgt->ParentsKnowState
+#define fStrictTreeOrder            pwrMgt->StrictTreeOrder
+#define fIdleTimerStopped           pwrMgt->IdleTimerStopped
+#define fAdjustPowerScheduled       pwrMgt->AdjustPowerScheduled
+#define fIsPreChange                pwrMgt->IsPreChange
+#define fDriverCallBusy             pwrMgt->DriverCallBusy
+#define fPCDFunctionOverride        pwrMgt->PCDFunctionOverride
+#define fDeviceActiveTimestamp      pwrMgt->DeviceActiveTimestamp
 #define fActivityLock               pwrMgt->ActivityLock
-#define fIdleTimerEventSource       pwrMgt->IdleTimerEventSource
 #define fIdleTimerPeriod            pwrMgt->IdleTimerPeriod
 #define fIdleTimerMinPowerState     pwrMgt->IdleTimerMinPowerState
-#define fDeviceActive               pwrMgt->DeviceActive
 #define fIdleTimerStartTime         pwrMgt->IdleTimerStartTime
-#define fDeviceActiveTimestamp      pwrMgt->DeviceActiveTimestamp
-#define fActivityTickleCount        pwrMgt->ActivityTickleCount
 #define fDeviceDesire               pwrMgt->DeviceDesire
 #define fDesiredPowerState          pwrMgt->DesiredPowerState
-#define fPreviousRequest            pwrMgt->PreviousRequest
+#define fPreviousRequestPowerFlags  pwrMgt->PreviousRequestPowerFlags
 #define fName                       pwrMgt->Name
 #define fNumberOfPowerStates        pwrMgt->NumberOfPowerStates
 #define fPowerStates                pwrMgt->PowerStates
 #define fControllingDriver          pwrMgt->ControllingDriver
-#define fAggressivenessValue        pwrMgt->AggressivenessValue
-#define fAggressivenessValid        pwrMgt->AggressivenessValid
 #define fCurrentPowerState          pwrMgt->CurrentPowerState
-#define fParentsKnowState           pwrMgt->ParentsKnowState
 #define fParentsCurrentPowerFlags   pwrMgt->ParentsCurrentPowerFlags
-#define fMaxCapability              pwrMgt->MaxCapability
+#define fMaxPowerState              pwrMgt->MaxPowerState
 #define fOutputPowerCharacterFlags  pwrMgt->OutputPowerCharacterFlags
-#define fSerialNumber               pwrMgt->SerialNumber
 #define fResponseArray              pwrMgt->ResponseArray
 #define fNotifyClientArray          pwrMgt->NotifyClientArray
-#define fDoNotPowerDown             pwrMgt->DoNotPowerDown
+#define fSerialNumber               pwrMgt->SerialNumber
 #define fOutOfBandParameter         pwrMgt->OutOfBandParameter
 #define fDriverCallStartTime        pwrMgt->DriverCallStartTime
 #define fCurrentCapabilityFlags     pwrMgt->CurrentCapabilityFlags
+#define fActivityTicklePowerState   pwrMgt->ActivityTicklePowerState
 #define fCurrentPowerConsumption    pwrMgt->CurrentPowerConsumption
 #define fTempClampPowerState        pwrMgt->TempClampPowerState
-#define fTempClampCount             pwrMgt->TempClampCount
-#define fOverrideMaxPowerState      pwrMgt->OverrideMaxPowerState
-#define fPMWorkQueue                pwrMgt->PMWorkQueue
-#define fWaitReason                 pwrMgt->WaitReason
-#define fNextMachineState           pwrMgt->NextMachineState
-#define fDriverCallReason           pwrMgt->DriverCallReason
+#define fNotifyChildArray           pwrMgt->NotifyChildArray
+#define fPowerClients               pwrMgt->PowerClients
 #define fDriverCallEntry            pwrMgt->DriverCallEntry
 #define fDriverCallParamPtr         pwrMgt->DriverCallParamPtr
 #define fDriverCallParamCount       pwrMgt->DriverCallParamCount
 #define fDriverCallParamSlots       pwrMgt->DriverCallParamSlots
-#define fActivityTickled            pwrMgt->ActivityTickled
+#define fDriverCallReason           pwrMgt->DriverCallReason
+#define fOutOfBandMessage           pwrMgt->OutOfBandMessage
+#define fTempClampCount             pwrMgt->TempClampCount
+#define fOverrideMaxPowerState      pwrMgt->OverrideMaxPowerState
+#define fActivityTickleCount        pwrMgt->ActivityTickleCount
+#define fWaitReason                 pwrMgt->WaitReason
+#define fSavedMachineState          pwrMgt->SavedMachineState
+#define fRootDomainState            pwrMgt->RootDomainState
+#define fLockedFlags                pwrMgt->LockedFlags
+#define fPMDriverCallQueue          pwrMgt->PMDriverCallQueue
 #define fInsertInterestSet          pwrMgt->InsertInterestSet
 #define fRemoveInterestSet          pwrMgt->RemoveInterestSet
-#define fStrictTreeOrder            pwrMgt->StrictTreeOrder
-#define fNotifyChildArray           pwrMgt->NotifyChildArray
-#define fIdleTimerStopped           pwrMgt->IdleTimerStopped
-#define fAdjustPowerScheduled       pwrMgt->AdjustPowerScheduled
-#define fActivityTicklePowerState   pwrMgt->ActivityTicklePowerState
 #define fPMVars                     pwrMgt->PMVars
-#define fPowerClients               pwrMgt->PowerClients
-#define fRootDomainState            pwrMgt->RootDomainState
-#define fThreadAssertionCount       pwrMgt->ThreadAssertionCount
-#define fThreadAssertionThread      pwrMgt->ThreadAssertionThread
-#define fLockedFlags                pwrMgt->LockedFlags
+#define fPMActions                  pwrMgt->PMActions
 
 /*
 When an IOService is waiting for acknowledgement to a power change
@@ -279,13 +421,27 @@ the ack timer is ticking every tenth of a second.
 */
 #define ACK_TIMER_PERIOD            100000000
 
-#define kIOPMParentInitiated        0x01    // this power change initiated by our  parent
-#define kIOPMWeInitiated            0x02    // this power change initiated by this device
-#define kIOPMNotDone                0x04    // we couldn't make this change
-#define kIOPMDomainWillChange       0x08    // change started by PowerDomainWillChangeTo
-#define kIOPMDomainDidChange        0x10    // change started by PowerDomainDidChangeTo
-#define kIOPMDomainPowerDrop        0x20    // Domain is lowering power
-#define kIOPMSynchronize            0x40    // change triggered by power tree re-sync
+// Max wait time in microseconds for kernel priority and capability clients
+// with async message handlers to acknowledge.
+// 
+#define kPriorityClientMaxWait      (90 * 1000 * 1000)
+#define kCapabilityClientMaxWait    (240 * 1000 * 1000)
+
+// Attributes describing a power state change.
+// See IOPMPowerChangeFlags data type.
+//
+#define kIOPMParentInitiated        0x0001  // this power change initiated by our  parent
+#define kIOPMSelfInitiated          0x0002  // this power change initiated by this device
+#define kIOPMNotDone                0x0004  // we couldn't make this change
+#define kIOPMDomainWillChange       0x0008  // change started by PowerDomainWillChangeTo
+#define kIOPMDomainDidChange        0x0010  // change started by PowerDomainDidChangeTo
+#define kIOPMDomainPowerDrop        0x0020  // Domain is lowering power
+#define kIOPMIgnoreChildren         0x0040  // Ignore children and driver power desires
+#define kIOPMSkipAskPowerDown       0x0080  // skip the ask app phase
+#define kIOPMSynchronize            0x0100  // change triggered by power tree re-sync
+#define kIOPMSyncNoChildNotify      0x0200  // sync root domain only, not entire tree
+#define kIOPMSyncTellPowerDown      0x0400  // send the ask/will power off messages
+#define kIOPMSyncCancelPowerDown    0x0800  // sleep cancel for maintenance wake
 
 enum {
     kDriverCallInformPreChange,
@@ -298,73 +454,51 @@ struct DriverCallParam {
     IOReturn    Result;
 };
 
-// values of outofbandparameter
+// values of OutOfBandParameter
 enum {
     kNotifyApps,
-    kNotifyPriority
+    kNotifyPriority,
+    kNotifyCapabilityChangeApps,
+    kNotifyCapabilityChangePriority
 };
 
-typedef bool (*IOPMMessageFilter)(OSObject * object, void * context);
+typedef bool (*IOPMMessageFilter)(
+        void * target, void * object, void * arg1, void * arg2, void * arg3 );
 
 // used for applyToInterested
 struct IOPMInterestContext {
-    OSArray *           responseFlags;
-    OSArray *           notifyClients;
-    UInt16              serialNumber;
-    UInt16              counter;
-    UInt32              maxTimeRequested;
-    int                 msgType;
-    IOService *         us;
-    unsigned long       stateNumber;
-    IOPMPowerFlags      stateFlags;
-    const char *        errorLog;
-    IOPMMessageFilter   filterFunc;
+    OSArray *               responseArray;
+    OSArray *               notifyClients;
+    uint16_t                serialNumber;
+    uint8_t                 isPreChange;
+    uint8_t                 enableTracing;
+    uint32_t                maxTimeRequested;
+    uint32_t                messageType;
+    uint32_t                notifyType;
+    IOService *             us;
+    IOPMPowerStateIndex     stateNumber;
+    IOPMPowerFlags          stateFlags;
+    IOPMPowerChangeFlags    changeFlags;
+    const char *            errorLog;
+    IOPMMessageFilter       messageFilter;
 };
 
-//*********************************************************************************
+// assertPMDriverCall() options
+enum {
+    kIOPMADC_NoInactiveCheck = 1
+};
+
+//******************************************************************************
 // PM Statistics & Diagnostics
-//*********************************************************************************
+//******************************************************************************
 
 extern const OSSymbol *gIOPMStatsApplicationResponseTimedOut;
 extern const OSSymbol *gIOPMStatsApplicationResponseCancel;
 extern const OSSymbol *gIOPMStatsApplicationResponseSlow;
 
-//*********************************************************************************
-// PM command types
-//*********************************************************************************
-
-enum {
-    /* Command Types */
-    kIOPMRequestTypeInvalid                     = 0x00,
-    kIOPMRequestTypePMStop                      = 0x01,
-    kIOPMRequestTypeAddPowerChild1              = 0x02,
-    kIOPMRequestTypeAddPowerChild2              = 0x03,
-    kIOPMRequestTypeAddPowerChild3              = 0x04,
-    kIOPMRequestTypeRegisterPowerDriver         = 0x05,
-    kIOPMRequestTypeAdjustPowerState            = 0x06,
-    kIOPMRequestTypePowerDomainWillChange       = 0x07,
-    kIOPMRequestTypePowerDomainDidChange        = 0x08,
-    kIOPMRequestTypePowerOverrideOnPriv         = 0x09,
-    kIOPMRequestTypePowerOverrideOffPriv        = 0x0A,
-    kIOPMRequestTypeActivityTickle              = 0x0B,
-    kIOPMRequestTypeRequestPowerState           = 0x0C,
-    kIOPMRequestTypeSynchronizePowerTree        = 0x0D,
-    kIOPMRequestTypeRequestPowerStateOverride   = 0x0E,
-    kIOPMRequestTypeSetIdleTimerPeriod          = 0x0F,
-
-    /* Reply Types */
-    kIOPMRequestTypeReplyStart                  = 0x80,
-    kIOPMRequestTypeAckPowerChange              = 0x81,
-    kIOPMRequestTypeAckSetPowerState            = 0x82,
-    kIOPMRequestTypeAllowPowerChange            = 0x83,
-    kIOPMRequestTypeCancelPowerChange           = 0x84,
-    kIOPMRequestTypeInterestChanged             = 0x85,
-    kIOPMRequestTypeIdleCancel                  = 0x86
-};
-
-//*********************************************************************************
-// IOServicePM internal helper classes
-//*********************************************************************************
+//******************************************************************************
+// IOPMRequest
+//******************************************************************************
 
 typedef void (*IOPMCompletionAction)(void * target, void * param, IOReturn status);
 
@@ -445,12 +579,16 @@ public:
     static IOPMRequest * create( void );
     bool   init( IOService * owner, IOOptionBits type );
     void   reset( void );
-    void   attachNextRequest( IOPMRequest * next );
-    void   detachNextRequest( void );
-    void   attachRootRequest( IOPMRequest * root );
-    void   detachRootRequest( void );
+    bool   attachNextRequest( IOPMRequest * next );
+    bool   detachNextRequest( void );
+    bool   attachRootRequest( IOPMRequest * root );
+    bool   detachRootRequest( void );
 };
 
+//******************************************************************************
+// IOPMRequestQueue
+//******************************************************************************
+
 class IOPMRequestQueue : public IOEventSource
 {
     OSDeclareDefaultStructors( IOPMRequestQueue )
@@ -470,9 +608,14 @@ public:
     static  IOPMRequestQueue * create( IOService * inOwner, Action inAction );
     void    queuePMRequest( IOPMRequest * request );
     void    queuePMRequestChain( IOPMRequest ** requests, IOItemCount count );
-    void    signalWorkAvailable( void );
 };
 
+//******************************************************************************
+// IOPMWorkQueue
+//******************************************************************************
+
+#define WORK_QUEUE_STATS    1
+
 class IOPMWorkQueue : public IOEventSource
 {
     OSDeclareDefaultStructors( IOPMWorkQueue )
@@ -480,24 +623,36 @@ class IOPMWorkQueue : public IOEventSource
 public:
     typedef bool (*Action)( IOService *, IOPMRequest *, IOPMWorkQueue * );
 
+#if WORK_QUEUE_STATS
+    uint64_t            fStatCheckForWork;
+    uint64_t            fStatScanEntries;
+    uint64_t            fStatQueueEmpty;
+    uint64_t            fStatNoWorkDone;
+#endif
+
 protected:
-    queue_head_t    fWorkQueue;
-    Action          fWorkAction;
-    Action          fRetireAction;
+    queue_head_t        fWorkQueue;
+    Action              fWorkAction;
+    Action              fRetireAction;
+    uint32_t            fQueueLength;
+    uint32_t            fConsumerCount;
+    volatile uint32_t   fProducerCount;
 
     virtual bool checkForWork( void );
     virtual bool init( IOService * inOwner, Action work, Action retire );
+    bool    checkRequestQueue( queue_head_t * queue, bool * empty );
 
 public:
     static  IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire );
-    void    queuePMRequest( IOPMRequest * request );
-
-    inline boolean_t isEmpty( void )
-    {
-        return queue_empty(&fWorkQueue);
-    }
+    bool    queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt );
+    void    signalWorkAvailable( void );
+    void    incrementProducerCount( void );
 };
 
+//******************************************************************************
+// IOPMCompletionQueue
+//******************************************************************************
+
 class IOPMCompletionQueue : public IOEventSource
 {
     OSDeclareDefaultStructors( IOPMCompletionQueue )
@@ -513,7 +668,7 @@ protected:
 
 public:
     static  IOPMCompletionQueue * create( IOService * inOwner, Action inAction );
-    void    queuePMRequest( IOPMRequest * request );
+    bool    queuePMRequest( IOPMRequest * request );
 };
 
 #endif /* !_IOKIT_IOSERVICEPMPRIVATE_H */
diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h
index 1d455fbee..873d47660 100644
--- a/iokit/Kernel/IOServicePrivate.h
+++ b/iokit/Kernel/IOServicePrivate.h
@@ -55,6 +55,8 @@ enum {
     kIOServiceConfigState	= 0x04000000,
     kIOServiceTermPhase2State	= 0x01000000,
     kIOServiceTermPhase3State	= 0x00800000,
+    kIOServiceTermPhase1State	= 0x00400000,
+	kIOServiceTerm1WaiterState  = 0x00200000
 };
 
 // options for terminate()
@@ -167,6 +169,7 @@ class IOResources : public IOService
 
 public:
     static IOService * resources( void );
+    virtual bool init( OSDictionary * dictionary = 0 );
     virtual IOWorkLoop * getWorkLoop( ) const;
     virtual bool matchPropertyTable( OSDictionary * table );
     virtual IOReturn setProperties( OSObject * properties );
@@ -195,6 +198,7 @@ public:
 
 extern const OSSymbol * gIOConsoleUsersKey;
 extern const OSSymbol * gIOConsoleSessionUIDKey;
+extern const OSSymbol *	gIOConsoleSessionAuditIDKey;
 extern const OSSymbol * gIOConsoleSessionOnConsoleKey;
 extern const OSSymbol * gIOConsoleSessionSecureInputPIDKey;
 
diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp
index 7b70541d6..49397d4cb 100644
--- a/iokit/Kernel/IOStartIOKit.cpp
+++ b/iokit/Kernel/IOStartIOKit.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2011 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -38,7 +38,10 @@
 #include <IOKit/IOLib.h>
 #include <IOKit/IOKitKeys.h>
 #include <IOKit/IOKitDebug.h>
+#include <IOKit/pwr_mgt/RootDomain.h>
 #include <IOKit/pwr_mgt/IOPMinformeeList.h>
+#include <IOKit/IOStatisticsPrivate.h>
+#include <IOKit/IOKitKeysPrivate.h>
 
 #include <IOKit/assert.h>
 
@@ -61,7 +64,7 @@ void IOKitInitializeTime( void )
 	t.tv_nsec = 0;
 	IOService::waitForService(
 		IOService::resourceMatching("IORTC"), &t );
-#ifdef ppc
+#if defined(__i386__) || defined(__x86_64__)
 	IOService::waitForService(
 		IOService::resourceMatching("IONVRAM"), &t );
 #endif
@@ -79,6 +82,8 @@ void IOKitResetTime( void )
     clock_get_calendar_microtime(&secs, &microsecs);
     gIOLastWakeTime.tv_sec  = secs;
     gIOLastWakeTime.tv_usec = microsecs;
+
+    IOService::updateConsoleUsers(NULL, kIOMessageSystemHasPoweredOn);
 }
 
 void iokit_post_constructor_init(void)
@@ -90,8 +95,11 @@ void iokit_post_constructor_init(void)
     assert( root );
     IOService::initialize();
     IOCatalogue::initialize();
+    IOStatistics::initialize();
+    OSKext::initialize();
     IOUserClient::initialize();
     IOMemoryDescriptor::initialize();
+    IORootParent::initialize();
 
     // Initializes IOPMinformeeList class-wide shared lock
     IOPMinformeeList::getSharedRecursiveLock();
@@ -107,7 +115,6 @@ void iokit_post_constructor_init(void)
         root->setProperty( kIOKitDiagnosticsKey, obj );
 	obj->release();
     }
-
 }
 
 // From <osfmk/kern/debug.c>
@@ -122,24 +129,20 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 )
 {
     IOPlatformExpertDevice *	rootNub;
     int				debugFlags;
-	uint32_t		intThreshold;
 
     if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) ))
-	gIOKitDebug = debugFlags;
-
+		gIOKitDebug = debugFlags;
+	
     if( PE_parse_boot_argn( "iotrace", &debugFlags, sizeof (debugFlags) ))
 		gIOKitTrace = debugFlags;
 	
 	// Compat for boot-args
 	gIOKitTrace |= (gIOKitDebug & kIOTraceCompatBootArgs);
-
-    if( PE_parse_boot_argn( "iointthreshold", &intThreshold, sizeof (intThreshold) ))
-		gIOInterruptThresholdNS = intThreshold * 1000;
 	
     // Check for the log synchronous bit set in io
     if (gIOKitDebug & kIOLogSynchronous)
         debug_mode = true;
-	
+
     //
     // Have to start IOKit environment before we attempt to start
     // the C++ runtime environment.  At some stage we have to clean up
diff --git a/iokit/Kernel/IOStatistics.cpp b/iokit/Kernel/IOStatistics.cpp
new file mode 100644
index 000000000..9235b293d
--- /dev/null
+++ b/iokit/Kernel/IOStatistics.cpp
@@ -0,0 +1,1279 @@
+/*
+ * Copyright (c) 2010 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/sysctl.h>
+#include <kern/host.h>
+
+#include <IOKit/system.h>
+#include <libkern/c++/OSKext.h>
+#include <libkern/OSAtomic.h>
+
+#include <IOKit/IOStatisticsPrivate.h>
+#include <IOKit/IOUserClient.h>
+#include <IOKit/IOEventSource.h>
+#include <IOKit/IOKitDebug.h>
+
+#if IOKITSTATS
+
+bool IOStatistics::enabled = false;
+
+uint32_t IOStatistics::sequenceID = 0;
+
+uint32_t IOStatistics::lastClassIndex = 0;
+uint32_t IOStatistics::lastKextIndex = 0;
+
+uint32_t IOStatistics::loadedKexts = 0;
+uint32_t IOStatistics::registeredClasses = 0;
+uint32_t IOStatistics::registeredCounters = 0;
+uint32_t IOStatistics::registeredWorkloops = 0;
+
+uint32_t IOStatistics::attachedEventSources = 0;
+
+IOWorkLoopDependency *IOStatistics::nextWorkLoopDependency = NULL;
+
+/* Logging */
+
+#define LOG_LEVEL 0
+
+#define LOG(level, format, ...) \
+do { \
+	if (level <= LOG_LEVEL) \
+		printf(format, ##__VA_ARGS__); \
+} while (0)
+
+/* Locks */
+
+IORWLock *IOStatistics::lock = NULL;
+
+/* Kext tree */
+
+KextNode *IOStatistics::kextHint = NULL;
+
+IOStatistics::KextTreeHead IOStatistics::kextHead = RB_INITIALIZER(&IOStatistics::kextHead);
+
+int IOStatistics::kextNodeCompare(KextNode *e1, KextNode *e2) 
+{
+    if (e1->kext < e2->kext)
+        return -1;
+    else if (e1->kext > e2->kext)
+        return 1;
+    else
+        return 0;
+}
+
+RB_GENERATE(IOStatistics::KextTree, KextNode, link, kextNodeCompare);
+
+/* Kext tree ordered by address */
+
+IOStatistics::KextAddressTreeHead IOStatistics::kextAddressHead = RB_INITIALIZER(&IOStatistics::kextAddressHead);
+
+int IOStatistics::kextAddressNodeCompare(KextNode *e1, KextNode *e2) 
+{
+    if (e1->address < e2->address)
+        return -1;
+    else if (e1->address > e2->address)
+        return 1; 
+    else
+        return 0;
+}
+
+RB_GENERATE(IOStatistics::KextAddressTree, KextNode, addressLink, kextAddressNodeCompare);
+
+/* Class tree */
+
+IOStatistics::ClassTreeHead IOStatistics::classHead = RB_INITIALIZER(&IOStatistics::classHead);
+
+int IOStatistics::classNodeCompare(ClassNode *e1, ClassNode *e2) {
+    if (e1->metaClass < e2->metaClass)
+        return -1;
+    else if (e1->metaClass > e2->metaClass)
+        return 1;
+    else
+        return 0;
+}
+
+RB_GENERATE(IOStatistics::ClassTree, ClassNode, tLink, classNodeCompare);
+
+/* Workloop dependencies */
+
+int IOWorkLoopCounter::loadTagCompare(IOWorkLoopDependency *e1, IOWorkLoopDependency *e2) {
+    if (e1->loadTag < e2->loadTag)
+        return -1;
+    else if (e1->loadTag > e2->loadTag)
+        return 1;
+    else
+        return 0;
+}
+
+RB_GENERATE(IOWorkLoopCounter::DependencyTree, IOWorkLoopDependency, link, IOWorkLoopCounter::loadTagCompare);
+
+/* sysctl stuff */
+
+static int 
+oid_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, int arg2, struct sysctl_req *req)
+{
+	int error = EINVAL;
+	uint32_t request = arg2;
+
+	switch (request)
+	{
+		case kIOStatisticsGeneral:
+			error = IOStatistics::getStatistics(req);
+			break;
+		case kIOStatisticsWorkLoop:
+			error = IOStatistics::getWorkLoopStatistics(req);
+			break;
+		case kIOStatisticsUserClient:
+			error = IOStatistics::getUserClientStatistics(req);
+			break;		
+		default:
+			break;
+	}
+
+	return error;
+}
+ 
+SYSCTL_NODE(_debug, OID_AUTO, iokit_statistics, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IOStatistics");
+
+static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, general,
+	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+	    0, kIOStatisticsGeneral, oid_sysctl, "S", "");
+
+static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, workloop,
+	    CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+	    0, kIOStatisticsWorkLoop, oid_sysctl, "S", "");
+
+static SYSCTL_PROC(_debug_iokit_statistics, OID_AUTO, userclient,
+	    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED,
+	    0, kIOStatisticsUserClient, oid_sysctl, "S", "");
+
+void IOStatistics::initialize()
+{
+	if (enabled) {
+		return;
+	}
+
+#if DEVELOPMENT || DEBUG
+	/* Always enabled in development and debug builds. */
+#else
+	/* Only enabled in release builds if the boot argument is set. */
+	if (!(kIOStatistics & gIOKitDebug)) {
+		return;
+	}
+#endif	
+	
+	sysctl_register_oid(&sysctl__debug_iokit_statistics_general);
+	sysctl_register_oid(&sysctl__debug_iokit_statistics_workloop);
+	sysctl_register_oid(&sysctl__debug_iokit_statistics_userclient);
+	
+	lock = IORWLockAlloc();
+	if (!lock) {
+		return;
+	}
+	
+	nextWorkLoopDependency = (IOWorkLoopDependency*)kalloc(sizeof(IOWorkLoopDependency));
+	if (!nextWorkLoopDependency) {
+		return;
+	}
+	
+	enabled = true;
+}
+
+void IOStatistics::onKextLoad(OSKext *kext, kmod_info_t *kmod_info) 
+{
+	KextNode *ke;
+
+	assert(kext && kmod_info);
+
+	if (!enabled) {
+		return;
+	}
+
+	LOG(1, "IOStatistics::onKextLoad: %s, tag %d, address 0x%llx, address end 0x%llx\n",
+		kext->getIdentifierCString(), kmod_info->id, (uint64_t)kmod_info->address, (uint64_t)(kmod_info->address + kmod_info->size));
+
+	ke = (KextNode *)kalloc(sizeof(KextNode));
+	if (!ke) {
+		return;
+	}
+
+	memset(ke, 0, sizeof(KextNode));
+	
+	ke->kext = kext;
+	ke->loadTag = kmod_info->id;
+	ke->address = kmod_info->address;
+	ke->address_end = kmod_info->address + kmod_info->size;
+
+	SLIST_INIT(&ke->classList);
+	TAILQ_INIT(&ke->userClientCallList);
+
+	IORWLockWrite(lock);
+
+	RB_INSERT(KextTree, &kextHead, ke);
+	RB_INSERT(KextAddressTree, &kextAddressHead, ke);
+	
+	sequenceID++;
+	loadedKexts++;
+	lastKextIndex++;
+	
+	IORWLockUnlock(lock);
+}
+
+void IOStatistics::onKextUnload(OSKext *kext) 
+{
+	KextNode sought, *found;
+	
+	assert(kext);
+	
+	if (!enabled) {
+		return;
+	}
+
+	LOG(1, "IOStatistics::onKextUnload: %s\n", kext->getIdentifierCString());
+	
+	IORWLockWrite(lock);
+
+	sought.kext = kext;
+	found = RB_FIND(KextTree, &kextHead, &sought);
+	if (found) {
+		IOWorkLoopCounter *wlc;
+		IOUserClientProcessEntry *uce;
+
+		/* Free up the list of counters */
+		while ((wlc = SLIST_FIRST(&found->workLoopList))) {
+			SLIST_REMOVE_HEAD(&found->workLoopList, link);
+			kfree(wlc, sizeof(IOWorkLoopCounter));
+		}
+
+		/* Free up the user client list */
+		while ((uce = TAILQ_FIRST(&found->userClientCallList))) {
+			TAILQ_REMOVE(&found->userClientCallList, uce, link);
+			kfree(uce, sizeof(IOUserClientProcessEntry));
+		}
+
+		/* Remove from kext trees */
+		RB_REMOVE(KextTree, &kextHead, found);
+		RB_REMOVE(KextAddressTree, &kextAddressHead, found);
+
+		/*
+		 * Clear a matching kextHint to avoid use after free in
+		 * onClassAdded() for a class add after a KEXT unload.
+		 */
+		if (found == kextHint) {
+			kextHint = NULL;
+		}
+		
+		/* Finally, free the class node */
+		kfree(found, sizeof(KextNode));
+		
+		sequenceID++;
+		loadedKexts--;
+	}
+	else {
+		panic("IOStatistics::onKextUnload: cannot find kext: %s", kext->getIdentifierCString());
+	}
+
+	IORWLockUnlock(lock);
+}
+
+void IOStatistics::onClassAdded(OSKext *parentKext, OSMetaClass *metaClass) 
+{
+	ClassNode *ce;
+	KextNode soughtKext, *foundKext = NULL;
+
+	assert(parentKext && metaClass);
+
+	if (!enabled) {
+		return;
+	}
+
+	LOG(1, "IOStatistics::onClassAdded: %s\n", metaClass->getClassName());
+
+	ce = (ClassNode *)kalloc(sizeof(ClassNode));
+	if (!ce) {
+		return;	
+	}
+
+	memset(ce, 0, sizeof(ClassNode));
+
+	IORWLockWrite(lock);
+
+	/* Hinted? */
+	if (kextHint && kextHint->kext == parentKext) {
+		foundKext = kextHint;
+	}
+	else {
+		soughtKext.kext = parentKext;
+		foundKext = RB_FIND(KextTree, &kextHead, &soughtKext);
+	}
+
+	if (foundKext) {
+		ClassNode soughtClass, *foundClass = NULL;
+		const OSMetaClass *superClass;
+
+		ce->metaClass = metaClass;
+		ce->classID = lastClassIndex++;
+		ce->parentKext = foundKext;
+		
+		/* Has superclass? */
+	 	superClass = ce->metaClass->getSuperClass();
+		if (superClass) {
+			soughtClass.metaClass = superClass;
+			foundClass = RB_FIND(ClassTree, &classHead, &soughtClass);
+		}
+		ce->superClassID = foundClass ? foundClass->classID : (uint32_t)(-1);
+
+		SLIST_INIT(&ce->counterList);
+		SLIST_INIT(&ce->userClientList);
+		
+		RB_INSERT(ClassTree, &classHead, ce);
+		SLIST_INSERT_HEAD(&foundKext->classList, ce, lLink);
+		
+		foundKext->classes++;
+		
+		kextHint = foundKext;
+		
+		sequenceID++;	
+		registeredClasses++;
+	}
+	else {
+		panic("IOStatistics::onClassAdded: cannot find parent kext: %s", parentKext->getIdentifierCString());
+	}
+	
+	IORWLockUnlock(lock);
+}
+
+void IOStatistics::onClassRemoved(OSKext *parentKext, OSMetaClass *metaClass) 
+{
+	ClassNode sought, *found;
+
+	assert(parentKext && metaClass);
+
+	if (!enabled) {
+		return;
+	}
+
+	LOG(1, "IOStatistics::onClassRemoved: %s\n", metaClass->getClassName());
+
+	IORWLockWrite(lock);
+
+	sought.metaClass = metaClass;
+	found = RB_FIND(ClassTree, &classHead, &sought);
+	if (found) {
+		IOEventSourceCounter *esc;
+		IOUserClientCounter *ucc;
+		
+		/* Free up the list of counters */
+		while ((esc = SLIST_FIRST(&found->counterList))) {
+			SLIST_REMOVE_HEAD(&found->counterList, link);
+			kfree(esc, sizeof(IOEventSourceCounter));
+		}
+
+		/* Free up the user client list */
+		while ((ucc = SLIST_FIRST(&found->userClientList))) {
+			SLIST_REMOVE_HEAD(&found->userClientList, link);
+			kfree(ucc, sizeof(IOUserClientCounter));
+		}
+
+		/* Remove from class tree */
+		RB_REMOVE(ClassTree, &classHead, found);
+		
+		/* Remove from parent */
+		SLIST_REMOVE(&found->parentKext->classList, found, ClassNode, lLink);
+		
+		/* Finally, free the class node */
+		kfree(found, sizeof(ClassNode));
+		
+		sequenceID++;
+		registeredClasses--;
+	}
+	else {
+		panic("IOStatistics::onClassRemoved: cannot find class: %s", metaClass->getClassName());
+	}
+
+	IORWLockUnlock(lock);
+}
+
+IOEventSourceCounter *IOStatistics::registerEventSource(OSObject *inOwner)
+{
+	IOEventSourceCounter *counter = NULL;
+	ClassNode sought, *found = NULL;
+	boolean_t createDummyCounter = FALSE;
+	
+	assert(inOwner);
+
+	if (!enabled) {
+		return NULL;
+	}
+
+	counter = (IOEventSourceCounter*)kalloc(sizeof(IOEventSourceCounter));
+	if (!counter) {
+		return NULL;
+	}
+	
+	memset(counter, 0, sizeof(IOEventSourceCounter));
+
+	IORWLockWrite(lock);
+
+	/* Workaround for <rdar://problem/7158117> - create a dummy counter when inOwner is bad.
+	 * We use retainCount here as our best indication that the pointer is awry.
+	 */
+	if (inOwner->retainCount > 0xFFFFFF) {
+		kprintf("IOStatistics::registerEventSource - bad metaclass %p\n", inOwner);
+		createDummyCounter = TRUE;
+	}
+	else {
+		sought.metaClass = inOwner->getMetaClass();
+		found = RB_FIND(ClassTree, &classHead, &sought);
+	}
+
+	if (found) {
+		counter->parentClass = found;
+		SLIST_INSERT_HEAD(&found->counterList, counter, link);
+		registeredCounters++;
+	}
+
+	if (!(createDummyCounter || found)) {
+		panic("IOStatistics::registerEventSource: cannot find parent class: %s", inOwner->getMetaClass()->getClassName());
+	}
+	
+	IORWLockUnlock(lock);
+	
+	return counter;
+}
+
+void IOStatistics::unregisterEventSource(IOEventSourceCounter *counter)
+{
+	if (!counter) {
+		return;
+	}
+
+	IORWLockWrite(lock);
+
+	if (counter->parentClass) {
+		SLIST_REMOVE(&counter->parentClass->counterList, counter, IOEventSourceCounter, link);
+		registeredCounters--;
+	}
+	kfree(counter, sizeof(IOEventSourceCounter));
+	
+	IORWLockUnlock(lock);
+}
+
+IOWorkLoopCounter* IOStatistics::registerWorkLoop(IOWorkLoop *workLoop)
+{
+	IOWorkLoopCounter *counter = NULL;
+	KextNode *found;
+
+	assert(workLoop);
+
+	if (!enabled) {
+		return NULL;
+	}
+
+	counter = (IOWorkLoopCounter*)kalloc(sizeof(IOWorkLoopCounter));
+	if (!counter) {
+		return NULL;
+	}
+    
+	memset(counter, 0, sizeof(IOWorkLoopCounter));
+
+	found = getKextNodeFromBacktrace(TRUE);
+	if (!found) {
+		panic("IOStatistics::registerWorkLoop: cannot find parent kext");
+	}
+
+	counter->parentKext = found;
+	counter->workLoop = workLoop;
+	RB_INIT(&counter->dependencyHead);
+	SLIST_INSERT_HEAD(&found->workLoopList, counter, link);
+	registeredWorkloops++;
+
+	releaseKextNode(found);
+
+	return counter;
+}
+
+void IOStatistics::unregisterWorkLoop(IOWorkLoopCounter *counter)
+{
+	if (!counter) {
+		return;
+	}
+	
+	IORWLockWrite(lock);
+
+	SLIST_REMOVE(&counter->parentKext->workLoopList, counter, IOWorkLoopCounter, link);
+	kfree(counter, sizeof(IOWorkLoopCounter));
+	registeredWorkloops--;
+	
+	IORWLockUnlock(lock);
+}
+
+IOUserClientCounter *IOStatistics::registerUserClient(IOUserClient *userClient)
+{
+	ClassNode sought, *found;
+	IOUserClientCounter *counter = NULL;
+
+	assert(userClient);
+
+	if (!enabled) {
+		return NULL;
+	}
+
+	counter = (IOUserClientCounter*)kalloc(sizeof(IOUserClientCounter));
+	if (!counter) {
+		return NULL;
+	}
+	
+	memset(counter, 0, sizeof(IOUserClientCounter));
+
+	IORWLockWrite(lock);
+
+	sought.metaClass = userClient->getMetaClass();
+
+	found = RB_FIND(ClassTree, &classHead, &sought);
+	if (found) {
+		counter->parentClass = found;
+		SLIST_INSERT_HEAD(&found->userClientList, counter, link);
+	}
+	else {
+		panic("IOStatistics::registerUserClient: cannot find parent class: %s", sought.metaClass->getClassName());
+	}
+
+	IORWLockUnlock(lock);
+
+	return counter;
+}
+
+void IOStatistics::unregisterUserClient(IOUserClientCounter *counter)
+{
+	if (!counter) {
+		return;
+	}
+	
+	IORWLockWrite(lock);
+	
+	SLIST_REMOVE(&counter->parentClass->userClientList, counter, IOUserClientCounter, link);
+	kfree(counter, sizeof(IOUserClientCounter));
+
+	IORWLockUnlock(lock);
+}
+
+void IOStatistics::attachWorkLoopEventSource(IOWorkLoopCounter *wlc, IOEventSourceCounter *esc) 
+{
+	if (!wlc) {
+        return;
+	}
+    
+	IORWLockWrite(lock);
+	
+	if (!nextWorkLoopDependency) {
+		return;
+	}
+	
+	attachedEventSources++;
+	wlc->attachedEventSources++;
+	
+	/* Track the kext dependency */
+	nextWorkLoopDependency->loadTag = esc->parentClass->parentKext->loadTag;
+	if (NULL == RB_INSERT(IOWorkLoopCounter::DependencyTree, &wlc->dependencyHead, nextWorkLoopDependency)) {
+		nextWorkLoopDependency = (IOWorkLoopDependency*)kalloc(sizeof(IOWorkLoopDependency));
+	}
+    
+	IORWLockUnlock(lock);
+}
+
+void IOStatistics::detachWorkLoopEventSource(IOWorkLoopCounter *wlc, IOEventSourceCounter *esc) 
+{
+	IOWorkLoopDependency sought, *found;
+    
+	if (!wlc) {
+		return;
+	}
+    
+	IORWLockWrite(lock);
+
+	attachedEventSources--;
+	wlc->attachedEventSources--;
+	
+	sought.loadTag = esc->parentClass->parentKext->loadTag;
+
+	found = RB_FIND(IOWorkLoopCounter::DependencyTree, &wlc->dependencyHead, &sought);
+	if (found) {
+		RB_REMOVE(IOWorkLoopCounter::DependencyTree, &wlc->dependencyHead, found);
+		kfree(found, sizeof(IOWorkLoopDependency));
+	}
+
+	IORWLockUnlock(lock);
+}
+
+int IOStatistics::getStatistics(sysctl_req *req)
+{
+	int error;
+	uint32_t calculatedSize, size;
+	char *buffer, *ptr;
+	IOStatisticsHeader *header;
+
+	assert(IOStatistics::enabled && req);
+    
+	IORWLockRead(IOStatistics::lock);
+
+	/* Work out how much we need to allocate. IOStatisticsKext is of variable size. */
+	calculatedSize = sizeof(IOStatisticsHeader) + 
+					 sizeof(IOStatisticsGlobal) +
+					(sizeof(IOStatisticsKext) * loadedKexts) + (sizeof(uint32_t) * registeredClasses) + 
+					(sizeof(IOStatisticsMemory) * loadedKexts) +
+					(sizeof(IOStatisticsClass) * registeredClasses) +
+					(sizeof(IOStatisticsCounter) * registeredClasses) +
+					(sizeof(IOStatisticsKextIdentifier) * loadedKexts) +
+					(sizeof(IOStatisticsClassName) * registeredClasses);
+
+	/* Size request? */
+	if (req->oldptr == USER_ADDR_NULL) {
+		error = SYSCTL_OUT(req, NULL, calculatedSize);
+		goto exit;
+	}
+	
+	/* Read only */
+	if (req->newptr != USER_ADDR_NULL) {
+		error = EPERM;
+		goto exit;
+	}
+
+	buffer = (char*)kalloc(calculatedSize);
+	if (!buffer) {
+		error = ENOMEM;
+		goto exit;
+	}
+
+	memset(buffer, 0, calculatedSize);
+	
+	ptr = buffer;
+	
+	header = (IOStatisticsHeader*)((void*)ptr);
+
+	header->sig = IOSTATISTICS_SIG;
+	header->ver = IOSTATISTICS_VER;
+
+	header->seq = sequenceID;
+
+	ptr += sizeof(IOStatisticsHeader);
+
+	/* Global data - seq, timers, interrupts, etc) */
+	header->globalStatsOffset = sizeof(IOStatisticsHeader);
+	size = copyGlobalStatistics((IOStatisticsGlobal*)((void*)ptr));
+	ptr += size;
+
+	/* Kext statistics */
+	header->kextStatsOffset = header->globalStatsOffset + size;
+	size = copyKextStatistics((IOStatisticsKext*)((void*)ptr));
+	ptr += size;
+
+	/* Memory allocation info */
+	header->memoryStatsOffset = header->kextStatsOffset + size;
+	size = copyMemoryStatistics((IOStatisticsMemory*)((void*)ptr));
+	ptr += size;
+	
+	/* Class statistics */
+	header->classStatsOffset = header->memoryStatsOffset + size;
+	size = copyClassStatistics((IOStatisticsClass*)((void*)ptr));
+	ptr += size;
+	
+	/* Dynamic class counter data */
+	header->counterStatsOffset = header->classStatsOffset + size;
+	size = copyCounterStatistics((IOStatisticsCounter*)((void*)ptr));
+	ptr += size;
+	
+	/* Kext identifiers */
+	header->kextIdentifiersOffset = header->counterStatsOffset + size;
+	size = copyKextIdentifiers((IOStatisticsKextIdentifier*)((void*)ptr));
+	ptr += size;
+
+	/* Class names */
+	header->classNamesOffset = header->kextIdentifiersOffset + size;
+	size = copyClassNames((IOStatisticsClassName*)ptr);
+	ptr += size;
+	
+	LOG(2, "IOStatistics::getStatistics - calculatedSize 0x%x, kexts 0x%x, classes 0x%x.\n",
+	 	calculatedSize, loadedKexts, registeredClasses);
+
+	assert( (uint32_t)(ptr - buffer) == calculatedSize );
+
+	error = SYSCTL_OUT(req, buffer, calculatedSize);
+
+	kfree(buffer, calculatedSize);
+
+exit:
+	IORWLockUnlock(IOStatistics::lock);
+	return error;
+}
+
+int IOStatistics::getWorkLoopStatistics(sysctl_req *req)
+{
+	int error;
+	uint32_t calculatedSize, size;
+	char *buffer;
+	IOStatisticsWorkLoopHeader *header;
+
+	assert(IOStatistics::enabled && req);
+
+	IORWLockRead(IOStatistics::lock);
+
+	/* Approximate how much we need to allocate (worse case estimate) */
+	calculatedSize = sizeof(IOStatisticsWorkLoop) * registeredWorkloops +
+					 sizeof(uint32_t) * attachedEventSources;
+
+	/* Size request? */
+	if (req->oldptr == USER_ADDR_NULL) {
+		error = SYSCTL_OUT(req, NULL, calculatedSize);
+		goto exit;
+	}
+	
+	/* Read only */
+	if (req->newptr != USER_ADDR_NULL) {
+		error = EPERM;
+		goto exit;
+	}
+
+	buffer = (char*)kalloc(calculatedSize);
+	if (!buffer) {
+		error = ENOMEM;
+		goto exit;
+	}
+
+	header = (IOStatisticsWorkLoopHeader*)((void*)buffer);
+	
+	header->sig = IOSTATISTICS_SIG_WORKLOOP;
+	header->ver = IOSTATISTICS_VER;
+
+	header->seq = sequenceID;
+	
+	header->workloopCount = registeredWorkloops;
+
+	size = copyWorkLoopStatistics(&header->workLoopStats);
+
+	LOG(2, "IOStatistics::getWorkLoopStatistics: calculatedSize %d, size %d\n", calculatedSize, size);
+
+	assert( size <= calculatedSize );
+
+	error = SYSCTL_OUT(req, buffer, size);
+
+	kfree(buffer, calculatedSize);
+
+exit:
+	IORWLockUnlock(IOStatistics::lock);
+	return error;
+}
+
+int IOStatistics::getUserClientStatistics(sysctl_req *req)
+{	
+	int error;
+	uint32_t calculatedSize, size;
+	char *buffer;
+	uint32_t requestedLoadTag = 0;
+	IOStatisticsUserClientHeader *header;
+
+	assert(IOStatistics::enabled && req);
+
+	IORWLockRead(IOStatistics::lock);
+
+	/* Work out how much we need to allocate */
+	calculatedSize = sizeof(IOStatisticsUserClientHeader) + 
+					 sizeof(IOStatisticsUserClientCall) * IOKIT_STATISTICS_RECORDED_USERCLIENT_PROCS * loadedKexts;
+	
+	/* Size request? */
+	if (req->oldptr == USER_ADDR_NULL) {
+		error = SYSCTL_OUT(req, NULL, calculatedSize);
+		goto exit;
+	}
+
+	/* Kext request (potentially) valid? */
+	if (!req->newptr || req->newlen < sizeof(requestedLoadTag)) {
+		error = EINVAL;
+		goto exit;
+	}
+
+	SYSCTL_IN(req, &requestedLoadTag, sizeof(requestedLoadTag));
+	
+	LOG(2, "IOStatistics::getUserClientStatistics - requesting kext w/load tag: %d\n", requestedLoadTag);
+
+	buffer = (char*)kalloc(calculatedSize);
+	if (!buffer) {
+		error = ENOMEM;
+		goto exit;
+	}
+
+	header = (IOStatisticsUserClientHeader*)((void*)buffer);
+
+	header->sig = IOSTATISTICS_SIG_USERCLIENT;
+	header->ver = IOSTATISTICS_VER;
+	
+	header->seq = sequenceID;
+
+	header->processes = 0;
+
+	size = copyUserClientStatistics(header, requestedLoadTag);
+	
+	assert((sizeof(IOStatisticsUserClientHeader) + size) <= calculatedSize);
+	
+	if (size) {
+		error = SYSCTL_OUT(req, buffer, sizeof(IOStatisticsUserClientHeader) + size);
+	}
+	else {
+		error = EINVAL;
+	}
+
+	kfree(buffer, calculatedSize);
+
+exit:
+	IORWLockUnlock(IOStatistics::lock);
+	return error;
+}
+
+uint32_t IOStatistics::copyGlobalStatistics(IOStatisticsGlobal *stats)
+{
+	stats->kextCount = loadedKexts;
+	stats->classCount = registeredClasses;
+	stats->workloops = registeredWorkloops;
+	
+	return sizeof(IOStatisticsGlobal);
+}
+
+uint32_t IOStatistics::copyKextStatistics(IOStatisticsKext *stats)
+{
+	KextNode *ke;
+	ClassNode *ce;
+	uint32_t index = 0;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		stats->loadTag = ke->loadTag;
+		ke->kext->getSizeInfo(&stats->loadSize, &stats->wiredSize);
+
+		stats->classes = ke->classes;
+
+		/* Append indices of owned classes */
+		SLIST_FOREACH(ce, &ke->classList, lLink) {
+			stats->classIndexes[index++] = ce->classID;
+		}
+		
+		stats = (IOStatisticsKext *)((void*)((char*)stats + sizeof(IOStatisticsKext) + (ke->classes * sizeof(uint32_t))));
+	}
+
+	return (sizeof(IOStatisticsKext) * loadedKexts + sizeof(uint32_t) * registeredClasses);
+}
+
+uint32_t IOStatistics::copyMemoryStatistics(IOStatisticsMemory *stats)
+{
+	KextNode *ke;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		stats->allocatedSize = ke->memoryCounters[kIOStatisticsMalloc];
+		stats->freedSize = ke->memoryCounters[kIOStatisticsFree]; 
+		stats->allocatedAlignedSize = ke->memoryCounters[kIOStatisticsMallocAligned];
+		stats->freedAlignedSize = ke->memoryCounters[kIOStatisticsFreeAligned];
+		stats->allocatedContiguousSize = ke->memoryCounters[kIOStatisticsMallocContiguous];
+		stats->freedContiguousSize = ke->memoryCounters[kIOStatisticsFreeContiguous];
+		stats->allocatedPageableSize = ke->memoryCounters[kIOStatisticsMallocPageable];
+		stats->freedPageableSize = ke->memoryCounters[kIOStatisticsFreePageable];
+		stats++;
+	}
+	
+	return (sizeof(IOStatisticsMemory) * loadedKexts);
+}
+
+uint32_t IOStatistics::copyClassStatistics(IOStatisticsClass *stats)
+{
+	KextNode *ke;
+	ClassNode *ce;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		SLIST_FOREACH(ce, &ke->classList, lLink) {
+			stats->classID = ce->classID;
+			stats->superClassID = ce->superClassID;		
+			stats->classSize = ce->metaClass->getClassSize();
+
+			stats++;
+		}
+	}
+
+	return sizeof(IOStatisticsClass) * registeredClasses;
+}
+
+uint32_t IOStatistics::copyCounterStatistics(IOStatisticsCounter *stats)
+{
+	KextNode *ke;
+	ClassNode *ce;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		SLIST_FOREACH(ce, &ke->classList, lLink) {
+			IOUserClientCounter *userClientCounter;
+			IOEventSourceCounter *counter;
+
+			stats->classID = ce->classID;
+			stats->classInstanceCount = ce->metaClass->getInstanceCount();
+
+			IOStatisticsUserClients *uc = &stats->userClientStatistics;
+
+			/* User client counters */
+			SLIST_FOREACH(userClientCounter, &ce->userClientList, link) {
+				uc->clientCalls += userClientCounter->clientCalls;
+				uc->created++;
+			}
+
+			IOStatisticsInterruptEventSources *iec = &stats->interruptEventSourceStatistics;
+			IOStatisticsInterruptEventSources *fiec = &stats->filterInterruptEventSourceStatistics;
+			IOStatisticsTimerEventSources *tec = &stats->timerEventSourceStatistics;
+			IOStatisticsCommandGates *cgc = &stats->commandGateStatistics;
+			IOStatisticsCommandQueues *cqc = &stats->commandQueueStatistics;
+			IOStatisticsDerivedEventSources *dec = &stats->derivedEventSourceStatistics;
+
+			/* Event source counters */
+			SLIST_FOREACH(counter, &ce->counterList, link) {
+				switch (counter->type) {	
+					case kIOStatisticsInterruptEventSourceCounter:
+						iec->created++;
+						iec->produced += counter->u.interrupt.produced;
+						iec->checksForWork += counter->u.interrupt.checksForWork;
+						break;
+					case kIOStatisticsFilterInterruptEventSourceCounter:
+						fiec->created++;
+						fiec->produced += counter->u.filter.produced;
+						fiec->checksForWork += counter->u.filter.checksForWork;
+						break;
+					case kIOStatisticsTimerEventSourceCounter:
+						tec->created++;
+						tec->timeouts += counter->u.timer.timeouts;
+						tec->checksForWork += counter->u.timer.checksForWork;
+						tec->timeOnGate += counter->timeOnGate;
+						tec->closeGateCalls += counter->closeGateCalls;
+						tec->openGateCalls += counter->openGateCalls;
+						break;
+					case kIOStatisticsCommandGateCounter:
+						cgc->created++;
+						cgc->timeOnGate += counter->timeOnGate;
+						cgc->actionCalls += counter->u.commandGate.actionCalls;
+						break;
+					case kIOStatisticsCommandQueueCounter:
+						cqc->created++;
+						cqc->actionCalls += counter->u.commandQueue.actionCalls;
+						break;
+					case kIOStatisticsDerivedEventSourceCounter:
+						dec->created++;
+						dec->timeOnGate += counter->timeOnGate;
+						dec->closeGateCalls += counter->closeGateCalls;
+						dec->openGateCalls += counter->openGateCalls;
+						break;
+					default:
+						break;
+				}
+			}
+		
+			stats++;
+		}
+	}
+
+	return sizeof(IOStatisticsCounter) * registeredClasses;
+}
+
+uint32_t IOStatistics::copyKextIdentifiers(IOStatisticsKextIdentifier *kextIDs)
+{
+	KextNode *ke;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		strncpy(kextIDs->identifier, ke->kext->getIdentifierCString(), kIOStatisticsDriverNameLength);
+		kextIDs++;
+	}
+
+	return (sizeof(IOStatisticsKextIdentifier) * loadedKexts);
+}
+
+uint32_t IOStatistics::copyClassNames(IOStatisticsClassName *classNames)
+{
+	KextNode *ke;
+	ClassNode *ce;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		SLIST_FOREACH(ce, &ke->classList, lLink) {
+			strncpy(classNames->name, ce->metaClass->getClassName(), kIOStatisticsClassNameLength);
+			classNames++;
+		}
+	}
+		
+	return (sizeof(IOStatisticsClassName) * registeredClasses);
+}
+
+uint32_t IOStatistics::copyWorkLoopStatistics(IOStatisticsWorkLoop *stats) 
+{
+	KextNode *ke;
+	IOWorkLoopCounter *wlc;
+	IOWorkLoopDependency *dependentNode;
+	uint32_t size, accumulatedSize = 0;
+
+	RB_FOREACH(ke, KextTree, &kextHead) {
+		SLIST_FOREACH(wlc, &ke->workLoopList, link) {
+			stats->kextLoadTag = ke->loadTag;
+			stats->attachedEventSources = wlc->attachedEventSources;
+			stats->timeOnGate = wlc->timeOnGate;
+			stats->dependentKexts = 0;
+			RB_FOREACH(dependentNode, IOWorkLoopCounter::DependencyTree, &wlc->dependencyHead) {
+				stats->dependentKextLoadTags[stats->dependentKexts] = dependentNode->loadTag;
+				stats->dependentKexts++;
+			}
+			
+			size = sizeof(IOStatisticsWorkLoop) + (sizeof(uint32_t) * stats->dependentKexts);
+			
+			accumulatedSize += size;
+			stats = (IOStatisticsWorkLoop*)((void*)((char*)stats + size));
+		}
+	}
+
+	return accumulatedSize;
+}
+
+uint32_t IOStatistics::copyUserClientStatistics(IOStatisticsUserClientHeader *stats, uint32_t loadTag) 
+{
+	KextNode *sought, *found = NULL;
+	uint32_t procs = 0;
+	IOUserClientProcessEntry *processEntry;
+
+	RB_FOREACH(sought, KextTree, &kextHead) {
+		if (sought->loadTag == loadTag) {
+			found = sought;
+			break;
+		}
+	}
+	
+	if (!found) {
+		return 0;
+	}
+
+	TAILQ_FOREACH(processEntry, &found->userClientCallList, link) {
+		strncpy(stats->userClientCalls[procs].processName, processEntry->processName, kIOStatisticsProcessNameLength);
+		stats->userClientCalls[procs].pid = processEntry->pid;
+		stats->userClientCalls[procs].calls = processEntry->calls;
+		stats->processes++;
+		procs++;
+	}
+
+	return sizeof(IOStatisticsUserClientCall) * stats->processes;
+}
+
+void IOStatistics::storeUserClientCallInfo(IOUserClient *userClient, IOUserClientCounter *counter)
+{	
+	OSString *ossUserClientCreator = NULL;
+	int32_t pid = -1;
+	KextNode *parentKext;
+	IOUserClientProcessEntry *entry, *nextEntry, *prevEntry = NULL;
+	uint32_t count = 0;
+	const char *ptr = NULL;
+	OSObject *obj;
+	
+	/* TODO: see if this can be more efficient */
+	obj = userClient->copyProperty("IOUserClientCreator",
+					gIOServicePlane,
+					kIORegistryIterateRecursively | kIORegistryIterateParents);
+
+	if (!obj)
+		goto err_nounlock;
+
+	ossUserClientCreator = OSDynamicCast(OSString, obj);
+
+	if (ossUserClientCreator) {
+		uint32_t len, lenIter = 0; 
+		
+		ptr = ossUserClientCreator->getCStringNoCopy();
+		len = ossUserClientCreator->getLength();
+		
+		while ((*ptr != ' ') && (lenIter < len)) {
+			ptr++;
+			lenIter++;
+		}
+		
+		if (lenIter < len) {
+			ptr++; // Skip the space
+			lenIter++;
+			pid = 0;
+			while ( (*ptr != ',') && (lenIter < len)) {
+				pid = pid*10 + (*ptr - '0');
+				ptr++;
+				lenIter++;
+			}
+			
+			if(lenIter == len) {
+				pid = -1;
+			} else {
+				ptr += 2;
+			}
+		}
+	}
+	
+	if (-1 == pid)
+		goto err_nounlock;
+	
+	IORWLockWrite(lock);
+
+	parentKext = counter->parentClass->parentKext;
+
+	TAILQ_FOREACH(entry, &parentKext->userClientCallList, link) {
+		if (entry->pid == pid) {
+			/* Found, so increment count and move to the head */
+			entry->calls++;
+			if (count) {
+				TAILQ_REMOVE(&parentKext->userClientCallList, entry, link);
+				break;
+			}
+			else {
+				/* At the head already, so increment and return */
+				goto err_unlock;
+			}
+		}
+		
+		count++;
+	}
+
+	if (!entry) {
+		if (count == IOKIT_STATISTICS_RECORDED_USERCLIENT_PROCS) {
+			/* Max elements hit, so reuse the last */
+			entry = TAILQ_LAST(&parentKext->userClientCallList, ProcessEntryList);
+			TAILQ_REMOVE(&parentKext->userClientCallList, entry, link);
+		}
+		else {
+			/* Otherwise, allocate a new entry */
+			entry = (IOUserClientProcessEntry*)kalloc(sizeof(IOUserClientProcessEntry));
+			if (!entry) {
+			    IORWLockUnlock(lock);
+				return;
+			}
+		}
+
+		strncpy(entry->processName, ptr, kIOStatisticsProcessNameLength);
+		entry->pid = pid;
+		entry->calls = 1;
+	}
+	
+	TAILQ_FOREACH(nextEntry, &parentKext->userClientCallList, link) {
+		if (nextEntry->calls <= entry->calls)
+			break;
+			
+		prevEntry = nextEntry;
+	}
+	
+	if (!prevEntry)
+		TAILQ_INSERT_HEAD(&parentKext->userClientCallList, entry, link);
+	else
+		TAILQ_INSERT_AFTER(&parentKext->userClientCallList, prevEntry, entry, link);
+	
+err_unlock:
+	IORWLockUnlock(lock);
+        
+err_nounlock:
+	if (obj)
+		obj->release();
+}
+
+void IOStatistics::countUserClientCall(IOUserClient *client) {
+	IOUserClient::ExpansionData *data;
+	IOUserClientCounter *counter;
+    
+	/* Guard against an uninitialized client object - <rdar://problem/8577946> */
+	if (!(data = client->reserved)) {
+		return;
+	}
+    
+	if ((counter = data->counter)) {
+		storeUserClientCallInfo(client, counter);
+		OSIncrementAtomic(&counter->clientCalls);
+	}
+}
+
+KextNode *IOStatistics::getKextNodeFromBacktrace(boolean_t write) {
+	const uint32_t btMin = 3;
+
+	void *bt[16];
+	unsigned btCount = sizeof(bt) / sizeof(bt[0]);
+	vm_offset_t *scanAddr = NULL;
+	uint32_t i;
+	KextNode *found = NULL, *ke = NULL;
+
+	btCount = OSBacktrace(bt, btCount);
+
+	if (write) {
+		IORWLockWrite(lock);
+	} else {
+		IORWLockRead(lock);
+	}
+
+	/* Ignore first levels */
+	scanAddr = (vm_offset_t *)&bt[btMin - 1];
+
+	for (i = 0; i < btCount; i++, scanAddr++) {
+		ke = RB_ROOT(&kextAddressHead);
+		while (ke) {
+			if (*scanAddr < ke->address) {
+				ke = RB_LEFT(ke, addressLink);
+			}
+			else {
+				if ((*scanAddr < ke->address_end) && (*scanAddr >= ke->address)) {
+ 					if (!ke->kext->isKernelComponent()) {
+ 						return ke;
+					} else {
+						found = ke;
+					}
+				}
+				ke = RB_RIGHT(ke, addressLink);
+			}
+		}
+	}
+
+	if (!found) {
+		IORWLockUnlock(lock);
+	}
+  
+	return found;
+}
+  
+void IOStatistics::releaseKextNode(KextNode *node) {
+#pragma unused(node)
+	IORWLockUnlock(lock);
+}
+
+/* IOLib allocations */
+void IOStatistics::countAlloc(uint32_t index, vm_size_t size) {
+	KextNode *ke;
+  
+	if (!enabled) {
+		return;
+	}
+
+	ke = getKextNodeFromBacktrace(FALSE);
+	if (ke) {
+		OSAddAtomic(size, &ke->memoryCounters[index]);
+		releaseKextNode(ke);
+	}
+}
+
+#endif /* IOKITSTATS */
diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp
index 112deeee7..c71feccf0 100644
--- a/iokit/Kernel/IOTimerEventSource.cpp
+++ b/iokit/Kernel/IOTimerEventSource.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2000, 2009-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -53,13 +53,42 @@ OSMetaClassDefineReservedUnused(IOTimerEventSource, 5);
 OSMetaClassDefineReservedUnused(IOTimerEventSource, 6);
 OSMetaClassDefineReservedUnused(IOTimerEventSource, 7);
 
+#if IOKITSTATS
+
+#define IOStatisticsInitializeCounter() \
+do { \
+	IOStatistics::setCounterType(IOEventSource::reserved->counter, kIOStatisticsTimerEventSourceCounter); \
+} while (0)
+
+#define IOStatisticsOpenGate() \
+do { \
+	IOStatistics::countOpenGate(me->IOEventSource::reserved->counter); \
+} while (0)
+
+#define IOStatisticsCloseGate() \
+do { \
+	IOStatistics::countCloseGate(me->IOEventSource::reserved->counter); \
+} while (0)
+
+#define IOStatisticsTimeout() \
+do { \
+	IOStatistics::countTimerTimeout(me->IOEventSource::reserved->counter); \
+} while (0)
+
+#else
+
+#define IOStatisticsInitializeCounter()
+#define IOStatisticsOpenGate()
+#define IOStatisticsCloseGate()
+#define IOStatisticsTimeout()
+
+#endif /* IOKITSTATS */
+
 // 
 // reserved != 0 means IOTimerEventSource::timeoutAndRelease is being used,
 // not a subclassed implementation. 
 //
 
-bool IOTimerEventSource::checkForWork() { return false; }
-
 // Timeout handler function. This function is called by the kernel when
 // the timeout interval expires.
 //
@@ -67,6 +96,8 @@ void IOTimerEventSource::timeout(void *self)
 {
     IOTimerEventSource *me = (IOTimerEventSource *) self;
 
+    IOStatisticsTimeout();
+
     if (me->enabled && me->action)
     {
         IOWorkLoop *
@@ -75,6 +106,7 @@ void IOTimerEventSource::timeout(void *self)
         {
             Action doit;
             wl->closeGate();
+            IOStatisticsCloseGate();
             doit = (Action) me->action;
             if (doit && me->enabled && AbsoluteTime_to_scalar(&me->abstime))
             {
@@ -82,7 +114,7 @@ void IOTimerEventSource::timeout(void *self)
             	
             	if (trace)
                 	IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION),
-                                    (uintptr_t) doit, (uintptr_t) me->owner);
+											 (uintptr_t) doit, (uintptr_t) me->owner);
 				
                 (*doit)(me->owner, me);
                 
@@ -90,6 +122,7 @@ void IOTimerEventSource::timeout(void *self)
                 	IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION),
 										   (uintptr_t) doit, (uintptr_t) me->owner);
             }
+            IOStatisticsOpenGate();
             wl->openGate();
         }
     }
@@ -102,6 +135,8 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c)
 	   must be cast to "long" before, in order to tell GCC we're not truncating a pointer. */
 	SInt32 count = (SInt32) (long) c;
 
+    IOStatisticsTimeout();
+	
     if (me->enabled && me->action)
     {
         IOWorkLoop *
@@ -110,6 +145,7 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c)
         {
             Action doit;
             wl->closeGate();
+            IOStatisticsCloseGate();
             doit = (Action) me->action;
             if (doit && (me->reserved->calloutGeneration == count))
             {
@@ -117,7 +153,7 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c)
             	
             	if (trace)
                 	IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION),
-                                    (uintptr_t) doit, (uintptr_t) me->owner);
+											 (uintptr_t) doit, (uintptr_t) me->owner);
 				
                 (*doit)(me->owner, me);
                 
@@ -125,6 +161,7 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c)
                 	IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION),
 										   (uintptr_t) doit, (uintptr_t) me->owner);
             }
+            IOStatisticsOpenGate();
             wl->openGate();
         }
     }
@@ -151,6 +188,8 @@ bool IOTimerEventSource::init(OSObject *inOwner, Action inAction)
     if (!calloutEntry)
         return false;
 
+    IOStatisticsInitializeCounter();
+
     return true;
 }
 
diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp
index 084471c3c..f031afd66 100644
--- a/iokit/Kernel/IOUserClient.cpp
+++ b/iokit/Kernel/IOUserClient.cpp
@@ -37,8 +37,22 @@
 #include <IOKit/IOMemoryDescriptor.h>
 #include <IOKit/IOBufferMemoryDescriptor.h>
 #include <IOKit/IOLib.h>
+#include <IOKit/IOStatisticsPrivate.h>
+#include <IOKit/IOTimeStamp.h>
 #include <libkern/OSDebug.h>
 #include <sys/proc.h>
+#include <sys/kauth.h>
+
+#if CONFIG_MACF
+
+extern "C" {
+#include <security/mac_framework.h>
+};
+#include <sys/kauth.h>
+
+#define IOMACF_LOG 0
+
+#endif /* CONFIG_MACF */
 
 #include <IOKit/assert.h>
 
@@ -57,6 +71,32 @@ enum
     kIOUCAsync64Flag = 1ULL
 };
 
+#if IOKITSTATS
+
+#define IOStatisticsRegisterCounter() \
+do { \
+	reserved->counter = IOStatistics::registerUserClient(this); \
+} while (0)
+
+#define IOStatisticsUnregisterCounter() \
+do { \
+	if (reserved) \
+		IOStatistics::unregisterUserClient(reserved->counter); \
+} while (0)
+
+#define IOStatisticsClientCall() \
+do { \
+	IOStatistics::countUserClientCall(client); \
+} while (0)
+
+#else
+
+#define IOStatisticsRegisterCounter()
+#define IOStatisticsUnregisterCounter()
+#define IOStatisticsClientCall()
+
+#endif /* IOKITSTATS */
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 // definitions we should get from osfmk
@@ -383,9 +423,11 @@ iokit_client_died( io_object_t obj, ipc_port_t /* port */,
 
     if( IKOT_IOKIT_CONNECT == type)
     {
-	if( (client = OSDynamicCast( IOUserClient, obj )))
+	if( (client = OSDynamicCast( IOUserClient, obj ))) {
+		IOStatisticsClientCall();
 	    client->clientDied();
     }
+    }
     else if( IKOT_IOKIT_OBJECT == type)
     {
 	if( (map = OSDynamicCast( IOMemoryMap, obj )))
@@ -851,21 +893,27 @@ static OSDictionary * CopyConsoleUser(UInt32 uid)
     return user;
 }
 
-static bool IOUCIsBackgroundTask(task_t task, bool * isBg)
+static OSDictionary * CopyUserOnConsole(void)
 {
-    kern_return_t               kr;
-    task_category_policy_data_t info;
-    mach_msg_type_number_t      count = TASK_CATEGORY_POLICY_COUNT;
-    boolean_t                   get_default = false;
-
-    kr = task_policy_get(current_task(),
-                         TASK_CATEGORY_POLICY,
-                         (task_policy_t) &info,
-                         &count,
-                         &get_default);
-
-    *isBg = ((KERN_SUCCESS == kr) && (info.role == TASK_THROTTLE_APPLICATION));
-    return (kr);
+    OSArray * array;
+    OSDictionary * user = 0; 
+    
+    if ((array = OSDynamicCast(OSArray,
+	IORegistryEntry::getRegistryRoot()->copyProperty(gIOConsoleUsersKey))))
+    {
+	for (unsigned int idx = 0;
+		(user = OSDynamicCast(OSDictionary, array->getObject(idx)));
+		idx++)
+	{
+	    if (kOSBooleanTrue == user->getObject(gIOConsoleSessionOnConsoleKey))
+	    {
+		user->retain();
+		break;
+	    }
+	}
+	array->release();
+    }
+    return (user);
 }
 
 IOReturn IOUserClient::clientHasPrivilege( void * securityToken,
@@ -882,12 +930,41 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken,
     if (!strncmp(privilegeName, kIOClientPrivilegeForeground, 
                 sizeof(kIOClientPrivilegeForeground)))
     {
-        bool isBg;
-        kern_return_t kr = IOUCIsBackgroundTask(current_task(), &isBg);
+	/* is graphics access denied for current task? */
+	if (proc_get_task_selfgpuacc_deny() != 0) 
+		return (kIOReturnNotPrivileged);
+	else 
+		return (kIOReturnSuccess);
+    }
 
-        if (KERN_SUCCESS != kr)
-            return (kr);
-        return (isBg ? kIOReturnNotPrivileged : kIOReturnSuccess);
+    if (!strncmp(privilegeName, kIOClientPrivilegeConsoleSession,
+                                sizeof(kIOClientPrivilegeConsoleSession)))
+    {
+	kauth_cred_t cred;
+	proc_t       p;
+
+        task = (task_t) securityToken;
+	if (!task)
+	    task = current_task();
+	p = (proc_t) get_bsdtask_info(task);
+	kr = kIOReturnNotPrivileged;
+
+	if (p && (cred = kauth_cred_proc_ref(p)))
+	{
+	    user = CopyUserOnConsole();
+	    if (user)
+	    {
+		OSNumber * num;
+		if ((num = OSDynamicCast(OSNumber, user->getObject(gIOConsoleSessionAuditIDKey)))
+		  && (cred->cr_audit.as_aia_p->ai_asid == (au_asid_t) num->unsigned32BitValue()))
+		{
+		    kr = kIOReturnSuccess;
+		}
+		user->release();
+	    }
+	    kauth_cred_unref(&cred);
+	}
+	return (kr);
     }
 
     if ((secureConsole = !strncmp(privilegeName, kIOClientPrivilegeSecureConsoleProcess,
@@ -895,7 +972,7 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken,
         task = (task_t)((IOUCProcessToken *)securityToken)->token;
     else
         task = (task_t)securityToken;
-    
+
     count = TASK_SECURITY_TOKEN_COUNT;
     kr = task_info( task, TASK_SECURITY_TOKEN, (task_info_t) &token, &count );
 
@@ -935,28 +1012,28 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken,
 
 bool IOUserClient::init()
 {
-    if( getPropertyTable())
-        return true;
-    else
-        return super::init();
+	if (getPropertyTable() || super::init())
+		return reserve();
+	
+	return false;
 }
 
 bool IOUserClient::init(OSDictionary * dictionary)
 {
-    if( getPropertyTable())
-        return true;
-    else
-        return super::init(dictionary);
+	if (getPropertyTable() || super::init(dictionary))
+		return reserve();
+	
+	return false;
 }
 
 bool IOUserClient::initWithTask(task_t owningTask,
                                 void * securityID,
                                 UInt32 type )
-{
-    if( getPropertyTable())
-        return true;
-    else
-        return super::init();
+{	
+	if (getPropertyTable() || super::init())
+		return reserve();
+	
+	return false;
 }
 
 bool IOUserClient::initWithTask(task_t owningTask,
@@ -972,11 +1049,30 @@ bool IOUserClient::initWithTask(task_t owningTask,
     return( ok );
 }
 
+bool IOUserClient::reserve()
+{		
+	if(!reserved) {
+		reserved = IONew(ExpansionData, 1);
+		if (!reserved) {
+			return false;
+		}
+	}
+
+	IOStatisticsRegisterCounter();
+	
+	return true;
+}
+
 void IOUserClient::free()
 {
     if( mappings)
         mappings->release();
+		
+    IOStatisticsUnregisterCounter();
 
+    if (reserved)
+        IODelete(reserved, ExpansionData, 1);
+		
     super::free();
 }
 
@@ -1802,6 +1898,7 @@ kern_return_t is_io_connect_get_notification_semaphore(
 {
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     return( client->getNotificationSemaphore( (UInt32) notification_type,
                                               semaphore ));
 }
@@ -2211,11 +2308,17 @@ kern_return_t is_io_registry_entry_set_properties
         obj = OSUnserializeXML( (const char *) data );
 	vm_deallocate( kernel_map, data, propertiesCnt );
 
-        if( obj) {
+	if (!obj)
+	    res = kIOReturnBadArgument;
+#if CONFIG_MACF
+	else if (0 != mac_iokit_check_set_properties(kauth_cred_get(),
+	    registry_entry, obj))
+	    res = kIOReturnNotPermitted;
+#endif
+	else
             res = entry->setProperties( obj );
-            obj->release();
-        } else
-            res = kIOReturnBadArgument;
+	if (obj)
+	    obj->release();
     } else
         res = err;
 
@@ -2305,30 +2408,6 @@ kern_return_t is_io_service_request_probe(
     return( service->requestProbe( options ));
 }
 
-
-/* Routine io_service_open */
-kern_return_t is_io_service_open(
-	io_object_t _service,
-	task_t owningTask,
-	uint32_t connect_type,
-	io_object_t *connection )
-{
-    IOUserClient	*	client;
-    IOReturn 			err;
-
-    CHECK( IOService, _service, service );
-
-    err = service->newUserClient( owningTask, (void *) owningTask,
-		connect_type, 0, &client );
-
-    if( err == kIOReturnSuccess) {
-	assert( OSDynamicCast(IOUserClient, client) );
-	*connection = client;
-    }
-
-    return( err);
-}
-
 /* Routine io_service_open_ndr */
 kern_return_t is_io_service_open_extended(
 	io_object_t _service,
@@ -2404,13 +2483,17 @@ kern_return_t is_io_service_open_extended(
 	    disallowAccess = (crossEndian
 		&& (kOSBooleanTrue != service->getProperty(kIOUserClientCrossEndianCompatibleKey))
 		&& (kOSBooleanTrue != client->getProperty(kIOUserClientCrossEndianCompatibleKey)));
-
-	    if (disallowAccess)
+            if (disallowAccess) res = kIOReturnUnsupported;
+#if CONFIG_MACF
+	    else if (0 != mac_iokit_check_open(kauth_cred_get(), client, connect_type))
+		res = kIOReturnNotPermitted;
+#endif
+	    if (kIOReturnSuccess != res)
 	    {
+		IOStatisticsClientCall();
 		client->clientClose();
 		client->release();
 		client = 0;
-		res = kIOReturnUnsupported;
 		break;
 	    }
 	    client->sharedInstance = (0 != client->getProperty(kIOUserClientSharedInstanceKey));
@@ -2440,6 +2523,7 @@ kern_return_t is_io_service_close(
 
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     client->clientClose();
 
     return( kIOReturnSuccess );
@@ -2472,6 +2556,7 @@ kern_return_t is_io_connect_set_notification_port(
 {
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     return( client->registerNotificationPort( port, notification_type,
 						(io_user_reference_t) reference ));
 }
@@ -2485,6 +2570,7 @@ kern_return_t is_io_connect_set_notification_port_64(
 {
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     return( client->registerNotificationPort( port, notification_type,
 						reference ));
 }
@@ -2505,6 +2591,7 @@ kern_return_t is_io_connect_map_memory_into_task
 
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     map = client->mapClientMemory64( memory_type, into_task, flags, *address );
 
     if( map) {
@@ -2563,6 +2650,8 @@ kern_return_t is_io_connect_map_memory(
     return (err);
 }
 
+} /* extern "C" */
+
 IOMemoryMap * IOUserClient::removeMappingForDescriptor(IOMemoryDescriptor * mem)
 {
     OSIterator *  iter;
@@ -2590,6 +2679,8 @@ IOMemoryMap * IOUserClient::removeMappingForDescriptor(IOMemoryDescriptor * mem)
     return (map);
 }
 
+extern "C" {
+
 /* Routine io_connect_unmap_memory_from_task */
 kern_return_t is_io_connect_unmap_memory_from_task
 (
@@ -2605,6 +2696,7 @@ kern_return_t is_io_connect_unmap_memory_from_task
 
     CHECK( IOUserClient, connection, client );
 
+    IOStatisticsClientCall();
     err = client->clientMemoryForType( (UInt32) memory_type, &options, &memory );
 
     if( memory && (kIOReturnSuccess == err)) {
@@ -2667,6 +2759,7 @@ kern_return_t is_io_connect_add_client(
     CHECK( IOUserClient, connection, client );
     CHECK( IOUserClient, connect_to, to );
 
+    IOStatisticsClientCall();
     return( client->connectClient( to ) );
 }
 
@@ -2693,10 +2786,10 @@ kern_return_t is_io_connect_method
 	mach_msg_type_number_t inband_inputCnt,
 	mach_vm_address_t ool_input,
 	mach_vm_size_t ool_input_size,
-	io_scalar_inband64_t scalar_output,
-	mach_msg_type_number_t *scalar_outputCnt,
 	io_struct_inband_t inband_output,
 	mach_msg_type_number_t *inband_outputCnt,
+	io_scalar_inband64_t scalar_output,
+	mach_msg_type_number_t *scalar_outputCnt,
 	mach_vm_address_t ool_output,
 	mach_vm_size_t * ool_output_size
 )
@@ -2742,6 +2835,7 @@ kern_return_t is_io_connect_method
     args.structureOutputDescriptor = outputMD;
     args.structureOutputDescriptorSize = *ool_output_size;
 
+	IOStatisticsClientCall();
     ret = client->externalMethod( selector, &args );
 
     *scalar_outputCnt = args.scalarOutputCount;
@@ -2770,10 +2864,10 @@ kern_return_t is_io_connect_async_method
 	mach_msg_type_number_t inband_inputCnt,
 	mach_vm_address_t ool_input,
 	mach_vm_size_t ool_input_size,
-	io_scalar_inband64_t scalar_output,
-	mach_msg_type_number_t *scalar_outputCnt,
 	io_struct_inband_t inband_output,
 	mach_msg_type_number_t *inband_outputCnt,
+	io_scalar_inband64_t scalar_output,
+	mach_msg_type_number_t *scalar_outputCnt,
 	mach_vm_address_t ool_output,
 	mach_vm_size_t * ool_output_size
 )
@@ -2823,6 +2917,7 @@ kern_return_t is_io_connect_async_method
     args.structureOutputDescriptor = outputMD;
     args.structureOutputDescriptorSize = *ool_output_size;
 
+    IOStatisticsClientCall();
     ret = client->externalMethod( selector, &args );
 
     *inband_outputCnt = args.structureOutputSize;
@@ -2860,8 +2955,8 @@ kern_return_t is_io_connect_method_scalarI_scalarO(
 		    _input, inputCount, 
 		    NULL, 0,
 		    0, 0,
-		    _output, outputCount,
 		    NULL, &struct_outputCnt,
+		    _output, outputCount,
 		    0, &ool_output_size);
 
     for (i = 0; i < *outputCount; i++)
@@ -2979,8 +3074,8 @@ kern_return_t is_io_async_method_scalarI_scalarO(
 		    _input, inputCount, 
 		    NULL, 0,
 		    0, 0,
-		    _output, outputCount,
 		    NULL, &struct_outputCnt,
+		    _output, outputCount,
 		    0, &ool_output_size);
 
     for (i = 0; i < *outputCount; i++)
@@ -3019,8 +3114,8 @@ kern_return_t is_io_async_method_scalarI_structureO(
 		    _input, inputCount, 
 		    NULL, 0,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    output, outputCount,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3056,8 +3151,8 @@ kern_return_t is_io_async_method_scalarI_structureI(
 		    _input, inputCount, 
 		    inputStruct, inputStructCount,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    NULL, &inband_outputCnt,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3087,8 +3182,8 @@ kern_return_t is_io_async_method_structureI_structureO(
 		    NULL, 0, 
 		    input, inputCount,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    output, outputCount,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3207,8 +3302,8 @@ kern_return_t is_io_connect_method_scalarI_structureO(
 		    _input, inputCount, 
 		    NULL, 0,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    output, outputCount,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3380,8 +3475,8 @@ kern_return_t is_io_connect_method_scalarI_structureI(
 		    _input, inputCount, 
 		    inputStruct, inputStructCount,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    NULL, &inband_outputCnt,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3553,8 +3648,8 @@ kern_return_t is_io_connect_method_structureI_structureO(
 		    NULL, 0, 
 		    input, inputCount,
 		    0, 0,
-		    NULL, &scalar_outputCnt,
 		    output, outputCount,
+		    NULL, &scalar_outputCnt,
 		    0, &ool_output_size));
 }
 
@@ -3772,6 +3867,23 @@ kern_return_t is_io_catalog_send_data(
     }
 
     switch ( flag ) {
+        case kIOCatalogResetDrivers:
+        case kIOCatalogResetDriversNoMatch: {
+                OSArray * array;
+
+                array = OSDynamicCast(OSArray, obj);
+                if (array) {
+                    if ( !gIOCatalogue->resetAndAddDrivers(array, 
+                        flag == kIOCatalogResetDrivers) ) {
+
+                        kr = kIOReturnError;
+                    }
+                } else {
+                    kr = kIOReturnBadArgument;
+                }
+            }
+            break;
+
         case kIOCatalogAddDrivers: 
         case kIOCatalogAddDriversNoMatch: {
                 OSArray * array;
@@ -3827,6 +3939,7 @@ kern_return_t is_io_catalog_send_data(
 
         case kIOCatalogKextdActive:
 #if !NO_KEXTD
+            IOServiceTrace(IOSERVICE_KEXTD_ALIVE, 0, 0, 0, 0);
             OSKext::setKextdActive();
 
            /* Dump all nonloaded startup extensions; kextd will now send them
@@ -3844,6 +3957,7 @@ kern_return_t is_io_catalog_send_data(
                 if (!clearedBusy) {
                     IOService * serviceRoot = IOService::getServiceRoot();
                     if (serviceRoot) {
+                        IOServiceTrace(IOSERVICE_KEXTD_READY, 0, 0, 0, 0);
                         serviceRoot->adjustBusy(-1);
                         clearedBusy = true;
                     }
@@ -3881,6 +3995,7 @@ kern_return_t is_io_catalog_terminate(
         return( kr );
 
     switch ( flag ) {
+#if !defined(SECURE_KERNEL)
         case kIOCatalogServiceTerminate:
             OSIterator *	iter;
             IOService *		service;
@@ -3910,6 +4025,7 @@ kern_return_t is_io_catalog_terminate(
             kr = gIOCatalogue->terminateDriversForModule(name,
                                         flag == kIOCatalogModuleUnload);
             break;
+#endif
 
         default:
             kr = kIOReturnBadArgument;
@@ -4048,6 +4164,8 @@ kern_return_t iokit_user_client_trap(struct iokit_user_client_trap_args *args)
     return result;
 }
 
+} /* extern "C" */
+
 IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArguments * args,
 					IOExternalMethodDispatch * dispatch, OSObject * target, void * reference )
 {
@@ -4113,10 +4231,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume
 
     if (kIOUCForegroundOnly & method->flags)
     {
-        bool isBg;
-        kern_return_t kr = IOUCIsBackgroundTask(current_task(), &isBg);
-    
-        if ((KERN_SUCCESS == kr) && isBg)
+	/* is graphics access denied for current task? */
+	if (proc_get_task_selfgpuacc_deny() != 0) 
             return (kIOReturnNotPermitted);
     }
 
@@ -4165,11 +4281,10 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume
 
     if (kIOUCForegroundOnly & method->flags)
     {
-        bool isBg;
-        kern_return_t kr = IOUCIsBackgroundTask(current_task(), &isBg);
-    
-        if ((KERN_SUCCESS == kr) && isBg)
+	/* is graphics access denied for current task? */
+	if (proc_get_task_selfgpuacc_deny() != 0) 
             return (kIOReturnNotPermitted);
+    
     }
 
 	switch (method->flags & kIOUCTypeMask)
@@ -4211,8 +4326,6 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume
 }
 
 
-};	/* extern "C" */
-
 #if __LP64__
 OSMetaClassDefineReservedUnused(IOUserClient, 0);
 OSMetaClassDefineReservedUnused(IOUserClient, 1);
diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp
index c32a565f6..51045a234 100644
--- a/iokit/Kernel/IOWorkLoop.cpp
+++ b/iokit/Kernel/IOWorkLoop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -56,31 +56,81 @@ OSMetaClassDefineReservedUnused(IOWorkLoop, 6);
 OSMetaClassDefineReservedUnused(IOWorkLoop, 7);
 
 enum IOWorkLoopState { kLoopRestart = 0x1, kLoopTerminate = 0x2 };
-#ifdef __ppc__
-static inline void SETP(void *addr, unsigned int flag)
-    { unsigned int *num = (unsigned int *) addr; *num |= flag; }
-static inline void CLRP(void *addr, unsigned int flag)
-    { unsigned int *num = (unsigned int *) addr; *num &= ~flag; }
-static inline bool ISSETP(void *addr, unsigned int flag)
-    { unsigned int *num = (unsigned int *) addr; return (*num & flag) != 0; }
-#else
 static inline void SETP(void *addr, unsigned int flag)
     { unsigned char *num = (unsigned char *) addr; *num |= flag; }
 static inline void CLRP(void *addr, unsigned int flag)
     { unsigned char *num = (unsigned char *) addr; *num &= ~flag; }
 static inline bool ISSETP(void *addr, unsigned int flag)
     { unsigned char *num = (unsigned char *) addr; return (*num & flag) != 0; }
-#endif
 
 #define fFlags loopRestart
 
+#define passiveEventChain	reserved->passiveEventChain
+
+#if IOKITSTATS
+
+#define IOStatisticsRegisterCounter() \
+do { \
+	reserved->counter = IOStatistics::registerWorkLoop(this); \
+} while(0)
+
+#define IOStatisticsUnregisterCounter() \
+do { \
+	if (reserved) \
+		IOStatistics::unregisterWorkLoop(reserved->counter); \
+} while(0)
+
+#define IOStatisticsOpenGate() \
+do { \
+	IOStatistics::countWorkLoopOpenGate(reserved->counter); \
+} while(0)
+
+#define IOStatisticsCloseGate() \
+do { \
+	IOStatistics::countWorkLoopCloseGate(reserved->counter); \
+} while(0)
+
+#define IOStatisticsAttachEventSource() \
+do { \
+	IOStatistics::attachWorkLoopEventSource(reserved->counter, inEvent->reserved->counter); \
+} while(0)
+
+#define IOStatisticsDetachEventSource() \
+do { \
+	IOStatistics::detachWorkLoopEventSource(reserved->counter, inEvent->reserved->counter); \
+} while(0)
+
+#else
+
+#define IOStatisticsRegisterCounter()
+#define IOStatisticsUnregisterCounter()
+#define IOStatisticsOpenGate()
+#define IOStatisticsCloseGate()
+#define IOStatisticsAttachEventSource()
+#define IOStatisticsDetachEventSource()
+
+#endif /* IOKITSTATS */
 
 bool IOWorkLoop::init()
 {
-    // The super init and gateLock allocation MUST be done first
+    // The super init and gateLock allocation MUST be done first.
     if ( !super::init() )
         return false;
 	
+	// Allocate our ExpansionData if it hasn't been allocated already.
+	if ( !reserved )
+	{
+		reserved = IONew(ExpansionData,1);
+		if ( !reserved )
+			return false;
+		
+		bzero(reserved,sizeof(ExpansionData));
+	}
+	
+#if DEBUG
+	OSBacktrace ( reserved->allocationBacktrace, sizeof ( reserved->allocationBacktrace ) / sizeof ( reserved->allocationBacktrace[0] ) );
+#endif
+	
     if ( gateLock == NULL ) {
         if ( !( gateLock = IORecursiveLockAlloc()) )
             return false;
@@ -93,6 +143,13 @@ bool IOWorkLoop::init()
         workToDo = false;
     }
 
+    if (!reserved) {
+        reserved = IONew(ExpansionData, 1);
+        reserved->options = 0;
+    }
+	
+    IOStatisticsRegisterCounter();
+
     if ( controlG == NULL ) {
         controlG = IOCommandGate::commandGate(
             this,
@@ -132,23 +189,24 @@ IOWorkLoop::workLoop()
 IOWorkLoop *
 IOWorkLoop::workLoopWithOptions(IOOptionBits options)
 {
-    IOWorkLoop *me = new IOWorkLoop;
-
-    if (me && options) {
-	me->reserved = IONew(ExpansionData, 1);
-	if (!me->reserved) {
-	    me->release();
-	    return 0;
+	IOWorkLoop *me = new IOWorkLoop;
+	
+	if (me && options) {
+		me->reserved = IONew(ExpansionData,1);
+		if (!me->reserved) {
+			me->release();
+			return 0;
+		}
+		bzero(me->reserved,sizeof(ExpansionData));
+		me->reserved->options = options;
 	}
-	me->reserved->options = options;
-    }
-
-    if (me && !me->init()) {
-        me->release();
-        return 0;
-    }
-
-    return me;
+	
+	if (me && !me->init()) {
+		me->release();
+		return 0;
+	}
+	
+	return me;
 }
 
 // Free is called twice:
@@ -187,6 +245,14 @@ void IOWorkLoop::free()
         }
         eventChain = 0;
 
+        for (event = passiveEventChain; event; event = next) {
+            next = event->getNext();
+            event->setWorkLoop(0);
+            event->setNext(0);
+            event->release();
+        }
+        passiveEventChain = 0;
+
 	// Either we have a partial initialization to clean up
 	// or the workThread itself is performing hari-kari.
 	// Either way clean up all of our resources and return.
@@ -205,6 +271,9 @@ void IOWorkLoop::free()
 	    IORecursiveLockFree(gateLock);
 	    gateLock = 0;
 	}
+	
+	IOStatisticsUnregisterCounter();
+	
 	if (reserved) {
 	    IODelete(reserved, ExpansionData, 1);
 	    reserved = 0;
@@ -230,6 +299,9 @@ void IOWorkLoop::enableAllEventSources() const
 
     for (event = eventChain; event; event = event->getNext())
         event->enable();
+
+    for (event = passiveEventChain; event; event = event->getNext())
+        event->enable();
 }
 
 void IOWorkLoop::disableAllEventSources() const
@@ -237,6 +309,10 @@ void IOWorkLoop::disableAllEventSources() const
     IOEventSource *event;
 
     for (event = eventChain; event; event = event->getNext())
+		event->disable();
+	
+	/* NOTE: controlG is in passiveEventChain since it's an IOCommandGate */
+    for (event = passiveEventChain; event; event = event->getNext())
         if (event != controlG)	// Don't disable the control gate
             event->disable();
 }
@@ -244,7 +320,7 @@ void IOWorkLoop::disableAllEventSources() const
 void IOWorkLoop::enableAllInterrupts() const
 {
     IOEventSource *event;
-
+	
     for (event = eventChain; event; event = event->getNext())
         if (OSDynamicCast(IOInterruptEventSource, event))
             event->enable();
@@ -253,43 +329,12 @@ void IOWorkLoop::enableAllInterrupts() const
 void IOWorkLoop::disableAllInterrupts() const
 {
     IOEventSource *event;
-
+	
     for (event = eventChain; event; event = event->getNext())
         if (OSDynamicCast(IOInterruptEventSource, event))
             event->disable();
 }
 
-#if KDEBUG
-#define IOTimeClientS()							\
-do {									\
-    IOTimeStampStart(IODBG_WORKLOOP(IOWL_CLIENT),			\
-                     (unsigned int) this, (unsigned int) event);	\
-} while(0)
-
-#define IOTimeClientE()							\
-do {									\
-    IOTimeStampEnd(IODBG_WORKLOOP(IOWL_CLIENT),				\
-                   (unsigned int) this, (unsigned int) event);		\
-} while(0)
-
-#define IOTimeWorkS()							\
-do {									\
-    IOTimeStampStart(IODBG_WORKLOOP(IOWL_WORK),	(unsigned int) this);	\
-} while(0)
-
-#define IOTimeWorkE()							\
-do {									\
-    IOTimeStampEnd(IODBG_WORKLOOP(IOWL_WORK),(unsigned int) this);	\
-} while(0)
-
-#else /* !KDEBUG */
-
-#define IOTimeClientS()
-#define IOTimeClientE()
-#define IOTimeWorkS()
-#define IOTimeWorkE()
-
-#endif /* KDEBUG */
 
 /* virtual */ bool IOWorkLoop::runEventSources()
 {
@@ -299,42 +344,43 @@ do {									\
     
     closeGate();
     if (ISSETP(&fFlags, kLoopTerminate))
-	goto abort;
-
+		goto abort;
+	
     if (traceWL)
     	IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this);
 	
     bool more;
     do {
-	CLRP(&fFlags, kLoopRestart);
-	more = false;
-	IOInterruptState is = IOSimpleLockLockDisableInterrupt(workToDoLock);
-	workToDo = false;
-	IOSimpleLockUnlockEnableInterrupt(workToDoLock, is);
-	for (IOEventSource *evnt = eventChain; evnt; evnt = evnt->getNext()) {
-
-		if (traceES)
-			IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
+		CLRP(&fFlags, kLoopRestart);
+		more = false;
+		IOInterruptState is = IOSimpleLockLockDisableInterrupt(workToDoLock);
+		workToDo = false;
+		IOSimpleLockUnlockEnableInterrupt(workToDoLock, is);
+		/* NOTE: only loop over event sources in eventChain. Bypass "passive" event sources for performance */
+		for (IOEventSource *evnt = eventChain; evnt; evnt = evnt->getNext()) {
 			
-	    more |= evnt->checkForWork();
+			if (traceES)
+				IOTimeStampStartConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
 			
-		if (traceES)
-			IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
-
-	    if (ISSETP(&fFlags, kLoopTerminate))
-		goto abort;
-	    else if (fFlags & kLoopRestart) {
-		more = true;
-		break;
-	    }
-	}
+			more |= evnt->checkForWork();
+			
+			if (traceES)
+				IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_CLIENT), (uintptr_t) this, (uintptr_t) evnt);
+			
+			if (ISSETP(&fFlags, kLoopTerminate))
+				goto abort;
+			else if (fFlags & kLoopRestart) {
+				more = true;
+				break;
+			}
+		}
     } while (more);
-
+	
     res = true;
 	
     if (traceWL)
     	IOTimeStampEndConstant(IODBG_WORKLOOP(IOWL_WORK), (uintptr_t) this);
-
+	
 abort:
     openGate();
     return res;
@@ -403,27 +449,41 @@ void IOWorkLoop::signalWorkAvailable()
 
 void IOWorkLoop::openGate()
 {
+    IOStatisticsOpenGate();
     IORecursiveLockUnlock(gateLock);
 }
 
 void IOWorkLoop::closeGate()
 {
     IORecursiveLockLock(gateLock);
+    IOStatisticsCloseGate();
 }
 
 bool IOWorkLoop::tryCloseGate()
 {
-    return IORecursiveLockTryLock(gateLock) != 0;
+    bool res = (IORecursiveLockTryLock(gateLock) != 0);
+    if (res) {
+        IOStatisticsCloseGate();
+    }
+    return res;
 }
 
 int IOWorkLoop::sleepGate(void *event, UInt32 interuptibleType)
 {
-    return IORecursiveLockSleep(gateLock, event, interuptibleType);
+    int res; 
+    IOStatisticsOpenGate();
+    res = IORecursiveLockSleep(gateLock, event, interuptibleType);
+    IOStatisticsCloseGate();
+    return res;
 }
 
 int IOWorkLoop::sleepGate(void *event, AbsoluteTime deadline, UInt32 interuptibleType)
 {
-    return IORecursiveLockSleepDeadline(gateLock, event, deadline, interuptibleType);
+    int res; 
+    IOStatisticsOpenGate();
+    res = IORecursiveLockSleepDeadline(gateLock, event, deadline, interuptibleType);
+    IOStatisticsCloseGate();
+    return res;
 }
 
 void IOWorkLoop::wakeupGate(void *event, bool oneThread)
@@ -460,41 +520,82 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
             inEvent->retain();
             inEvent->setWorkLoop(this);
             inEvent->setNext(0);
+
+    		/* Check if this is a passive or active event source being added */
+    		if (eventSourcePerformsWork(inEvent)) {
+    		
+	            if (!eventChain)
+    	            eventChain = inEvent;
+        	    else {
+            	    IOEventSource *event, *next;
     
-            if (!eventChain)
-                eventChain = inEvent;
+                	for (event = eventChain; (next = event->getNext()); event = next)
+                    	;
+                	event->setNext(inEvent);
+                	
+            	}
+            	
+            }
             else {
-                IOEventSource *event, *next;
+    		
+	            if (!passiveEventChain)
+    	            passiveEventChain = inEvent;
+        	    else {
+            	    IOEventSource *event, *next;
     
-                for (event = eventChain; (next = event->getNext()); event = next)
-                    ;
-                event->setNext(inEvent);
+                	for (event = passiveEventChain; (next = event->getNext()); event = next)
+                    	;
+                	event->setNext(inEvent);
+                	
+            	}
+            	
             }
+            IOStatisticsAttachEventSource();
         }
         break;
 
     case mRemoveEvent:
         if (inEvent->getWorkLoop()) {
-            if (eventChain == inEvent)
-                eventChain = inEvent->getNext();
-            else {
-                IOEventSource *event, *next;
-    
-                event = eventChain;
-                while ((next = event->getNext()) && next != inEvent)
-                    event = next;
-    
-                if (!next) {
-                    res = kIOReturnBadArgument;
-                    break;
-                }
-                event->setNext(inEvent->getNext());
-            }
-    
+        	if (eventSourcePerformsWork(inEvent)) {
+				if (eventChain == inEvent)
+					eventChain = inEvent->getNext();
+				else {
+					IOEventSource *event, *next;
+		
+					event = eventChain;
+					while ((next = event->getNext()) && next != inEvent)
+						event = next;
+		
+					if (!next) {
+						res = kIOReturnBadArgument;
+						break;
+					}
+					event->setNext(inEvent->getNext());
+				}
+    		}
+    		else {
+				if (passiveEventChain == inEvent)
+					passiveEventChain = inEvent->getNext();
+				else {
+					IOEventSource *event, *next;
+		
+					event = passiveEventChain;
+					while ((next = event->getNext()) && next != inEvent)
+						event = next;
+		
+					if (!next) {
+						res = kIOReturnBadArgument;
+						break;
+					}
+					event->setNext(inEvent->getNext());
+				}
+    		}
+    		
             inEvent->setWorkLoop(0);
             inEvent->setNext(0);
             inEvent->release();
             SETP(&fFlags, kLoopRestart);
+            IOStatisticsDetachEventSource();
         }
         break;
 
@@ -504,3 +605,40 @@ IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *)
 
     return res;
 }
+
+bool
+IOWorkLoop::eventSourcePerformsWork(IOEventSource *inEventSource)
+{
+	bool	result = true;
+
+	/*
+	 * The idea here is to see if the subclass of IOEventSource has overridden checkForWork().
+	 * The assumption is that if you override checkForWork(), you need to be
+	 * active and not passive.
+	 *
+	 * We picked a known quantity controlG that does not override
+	 * IOEventSource::checkForWork(), namely the IOCommandGate associated with
+	 * the workloop to which this event source is getting attached.
+	 * 
+	 * We do a pointer comparison on the offset in the vtable for inNewEvent against
+	 * the offset in the vtable for inReferenceEvent. This works because
+	 * IOCommandGate's slot for checkForWork() has the address of
+	 * IOEventSource::checkForWork() in it.
+	 * 
+	 * Think of OSMemberFunctionCast yielding the value at the vtable offset for
+	 * checkForWork() here. We're just testing to see if it's the same or not.
+	 *
+	 */
+	if (controlG) {
+		void *	ptr1;
+		void *	ptr2;
+		
+		ptr1 = OSMemberFunctionCast(void*, inEventSource, &IOEventSource::checkForWork);
+		ptr2 = OSMemberFunctionCast(void*, controlG, &IOEventSource::checkForWork);
+		
+		if (ptr1 == ptr2)
+			result = false;
+	}
+	
+    return result;
+}
diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp
index 69c0dfa1a..29c90deef 100644
--- a/iokit/Kernel/RootDomainUserClient.cpp
+++ b/iokit/Kernel/RootDomainUserClient.cpp
@@ -73,23 +73,14 @@ bool RootDomainUserClient::start( IOService * provider )
 
 IOReturn RootDomainUserClient::secureSleepSystem( uint32_t *return_code )
 {
-    IOByteCount     return_code_size = 1;
-
-    return secureSleepSystemOptions( NULL,      // inOptions
-                                     (void *)return_code, // returnCode
-                                     (void *)0,     // inSize
-                                     (void *)&return_code_size, // returnSize
-                                     NULL, NULL);
+    return secureSleepSystemOptions(NULL, 0, return_code);
 }
 
 IOReturn RootDomainUserClient::secureSleepSystemOptions( 
-    void * p1, void * p2, void * p3,
-    void * p4, void * p5, void * p6 )
+    const void      *inOptions, 
+    IOByteCount     inOptionsSize __unused,
+    uint32_t        *returnCode)
 {
-    void            *inOptions = (void *)p1;
-    uint32_t        *returnCode = (uint32_t *)p2;
-//  IOByteCount     inOptionsSize = (uintptr_t)p3;
-    IOByteCount     *returnCodeSize = (IOByteCount *)p4;
 
     int             local_priv = 0;
     int             admin_priv = 0;
@@ -103,7 +94,6 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions(
     ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator);
     admin_priv = (kIOReturnSuccess == ret);
     
-    *returnCodeSize = sizeof(uint32_t);
     
     if (inOptions)
     {
@@ -159,43 +149,32 @@ IOReturn RootDomainUserClient::secureSetAggressiveness(
 
     if((local_priv || admin_priv) && fOwner) {
         *return_code = fOwner->setAggressiveness(type, newLevel);
-        return kIOReturnSuccess;
     } else {
         *return_code = kIOReturnNotPrivileged;
-        return kIOReturnSuccess;
     }
+    return kIOReturnSuccess;
 }
 
-IOReturn RootDomainUserClient::secureSetMaintenanceWakeCalendar( 
-    void * p1, void * p2, void * p3,
-    void * p4, void * p5, void * p6 )
+IOReturn RootDomainUserClient::secureSetMaintenanceWakeCalendar(
+    IOPMCalendarStruct      *inCalendar,
+    uint32_t                *returnCode)
 {
-#if ROOT_DOMAIN_RUN_STATES
-    IOPMCalendarStruct *    inCalendar = (IOPMCalendarStruct *) p1;
-    uint32_t *              returnCode = (uint32_t *) p2;
-    IOByteCount *           returnCodeSize = (IOByteCount *) p4;
     int                     admin_priv = 0;
     IOReturn                ret = kIOReturnNotPrivileged;
     
     ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator);
     admin_priv = (kIOReturnSuccess == ret);
 
-    *returnCodeSize = sizeof(uint32_t);
-
     if (admin_priv && fOwner) {
         *returnCode = fOwner->setMaintenanceWakeCalendar(inCalendar);
-        return kIOReturnSuccess;
     } else {
         *returnCode = kIOReturnNotPrivileged;
-        return kIOReturnSuccess;
     }
-#else
-    return kIOReturnUnsupported;
-#endif
+    return kIOReturnSuccess;
 }
 
 IOReturn RootDomainUserClient::secureSetUserAssertionLevels(
-    uint32_t assertBits )
+    uint32_t    assertionBitfield)
 {
     int                     admin_priv = 0;
     IOReturn                ret = kIOReturnNotPrivileged;
@@ -204,7 +183,7 @@ IOReturn RootDomainUserClient::secureSetUserAssertionLevels(
     admin_priv = (kIOReturnSuccess == ret);
 
     if (admin_priv && fOwner) {
-        ret = fOwner->setPMAssertionUserLevels(assertBits);
+        ret = fOwner->setPMAssertionUserLevels(assertionBitfield);
     } else {
         ret = kIOReturnNotPrivileged;
     }
@@ -223,103 +202,185 @@ IOReturn RootDomainUserClient::clientClose( void )
     return kIOReturnSuccess;
 }
 
-IOExternalMethod *
-RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 index )
+IOReturn RootDomainUserClient::clientMemoryForType(
+    UInt32 type,
+    IOOptionBits *options,
+    IOMemoryDescriptor ** memory)
 {
-    static const IOExternalMethod sMethods[] = {
-        {   // kPMSetAggressiveness, 0
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1
-        },
-        {   // kPMGetAggressiveness, 1
-            0, (IOMethod)&IOPMrootDomain::getAggressiveness, kIOUCScalarIScalarO, 1, 1
-        },
-        {   // kPMSleepSystem, 2
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1
-        },
-        {   // kPMAllowPowerChange, 3
-            0, (IOMethod)&IOPMrootDomain::allowPowerChange, kIOUCScalarIScalarO, 1, 0
-        },
-        {   // kPMCancelPowerChange, 4
-            0, (IOMethod)&IOPMrootDomain::cancelPowerChange, kIOUCScalarIScalarO, 1, 0
-        },
-        {   // kPMShutdownSystem, 5
-            0, (IOMethod)&IOPMrootDomain::shutdownSystem, kIOUCScalarIScalarO, 0, 0
-        },
-        {   // kPMRestartSystem, 6
-            0, (IOMethod)&IOPMrootDomain::restartSystem, kIOUCScalarIScalarO, 0, 0
-        },
-        {   // kPMSleepSystemOptions, 7
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystemOptions, 
-            kIOUCStructIStructO, kIOUCVariableStructureSize, sizeof(uint32_t)
-        },
-        {   // kPMSetMaintenanceWakeCalendar, 8
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetMaintenanceWakeCalendar,
-            kIOUCStructIStructO, sizeof(IOPMCalendarStruct), sizeof(uint32_t)
-        },
-        {   // kPMSetUserAssertionLevels, 9
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetUserAssertionLevels,
-            kIOUCScalarIScalarO, 1, 0
+    if (!fOwner)
+        return kIOReturnNotReady;
+
+    if (kPMRootDomainMapTraceBuffer == type)
+    {
+        *memory = fOwner->getPMTraceMemoryDescriptor();
+        if (*memory) {
+            (*memory)->retain();
+            *options = 0;
+            return kIOReturnSuccess;
+        } else {
+            return kIOReturnNotFound;
         }
-    };
-    
-    if(index >= kNumPMMethods)
-    	return NULL;
-    else {
-        if (sMethods[index].object)
-            *targetP = this;
-        else
-            *targetP = fOwner;
-
-        return (IOExternalMethod *)&sMethods[index];
+
     }
+    return kIOReturnUnsupported;
 }
 
-#if 0
-IOReturn RootDomainUserClient::externalMethod( uint32_t selector, IOExternalMethodArguments * args,
-						IOExternalMethodDispatch * dispatch, OSObject * target, void * reference )
+IOReturn RootDomainUserClient::externalMethod(
+    uint32_t selector, 
+    IOExternalMethodArguments * arguments,
+    IOExternalMethodDispatch * dispatch __unused, 
+    OSObject * target __unused,
+    void * reference __unused )
 {
-    static const IOExternalMethodDispatch sMethods[] = {
-        { // kPMSetAggressiveness, 0
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1
-        },
-        { // kPMGetAggressiveness, 1
-            0, (IOMethod)&IOPMrootDomain::getAggressiveness, kIOUCScalarIScalarO, 1, 1
-        },
-        { // kPMSleepSystem, 2
-            (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1
-        },
-        { // kPMAllowPowerChange, 3
-            0, (IOMethod)&IOPMrootDomain::allowPowerChange, kIOUCScalarIScalarO, 1, 0
-        },
-        { // kPMCancelPowerChange, 4
-            0, (IOMethod)&IOPMrootDomain::cancelPowerChange, kIOUCScalarIScalarO, 1, 0
-        },
-        { // kPMShutdownSystem, 5
-            0, (IOMethod)&IOPMrootDomain::shutdownSystem, kIOUCScalarIScalarO, 0, 0
-        },
-        { // kPMRestartSystem, 6
-            0, (IOMethod)&IOPMrootDomain::restartSystem, kIOUCScalarIScalarO, 0, 0
-        },
-        { // kPMSetPreventative, 7
-            (IOService *)1, (IOMethod)&RootDomainUserClient::setPreventative, kIOUCScalarIScalarO, 2, 0
-        },
-    };
-
-    if (selector > (sizeof(sMethods) / sizeof(sMethods[0])))
-	return (kIOReturnBadArgument);
-
-    if ((1 << selector) & ((1 << 0) | (1 << 7))
-	target = this;
-    else
-	target = fOwner;
-
-    return (super::externalMethod(selector, args, &sMethods[selector], target, 0));
+    IOReturn    ret = kIOReturnBadArgument;    
+    
+    switch (selector)
+    {
+        case kPMSetAggressiveness:
+            if ((2 == arguments->scalarInputCount)
+                && (1 == arguments->scalarOutputCount))
+            {
+                ret = this->secureSetAggressiveness(
+                                (unsigned long)arguments->scalarInput[0],
+                                (unsigned long)arguments->scalarInput[1],
+                                (int *)&arguments->scalarOutput[0]);
+            }
+            break;
+        
+        case kPMGetAggressiveness:
+            if ((1 == arguments->scalarInputCount)
+                && (1 == arguments->scalarOutputCount))
+            {
+                ret = fOwner->getAggressiveness(
+                                (unsigned long)arguments->scalarInput[0],
+                                (unsigned long *)&arguments->scalarOutput[0]);
+            }
+            break;
+        
+        case kPMSleepSystem:
+            if (1 == arguments->scalarOutputCount)
+            {
+                ret = this->secureSleepSystem(
+                                (uint32_t *)&arguments->scalarOutput[0]);            
+            }
+            break;
+
+        case kPMAllowPowerChange:
+            if (1 == arguments->scalarInputCount)
+            {
+                ret = fOwner->allowPowerChange(
+                                arguments->scalarInput[0]);
+            }
+            break;
+
+        case kPMCancelPowerChange:
+            if (1 == arguments->scalarInputCount)
+            {
+                ret = fOwner->cancelPowerChange(
+                                arguments->scalarInput[0]);
+            }
+            break;
+
+        case kPMShutdownSystem:
+            // deperecated interface
+            ret = kIOReturnUnsupported;
+            break;
+
+        case kPMRestartSystem:
+            // deperecated interface
+            ret = kIOReturnUnsupported;
+            break;
+
+        case kPMSleepSystemOptions:
+            ret = this->secureSleepSystemOptions(
+                    arguments->structureInput,
+                    arguments->structureInputSize,
+                    (uint32_t *)&arguments->scalarOutput[0]);
+            break;
+        case kPMSetMaintenanceWakeCalendar:
+            ret = this->secureSetMaintenanceWakeCalendar(
+                    (IOPMCalendarStruct *)arguments->structureInput, 
+                    (uint32_t *)&arguments->structureOutput);
+            arguments->structureOutputSize = sizeof(uint32_t);
+            break;
+            
+        case kPMSetUserAssertionLevels:
+            ret = this->secureSetUserAssertionLevels(
+                        (uint32_t)arguments->scalarInput[0]);
+            break;
+            
+/*
+        case kPMMethodCopySystemTimeline:
+            // intentional fallthrough
+        case kPMMethodCopyDetailedTimeline:
+
+            if (!arguments->structureOutputDescriptor)
+            {
+                // TODO: Force IOKit.framework to always send this data out
+                // of line; so I don't have to create a MemoryDescriptor here.
+                mem_size = arguments->structureOutputSize;
+                mem = IOMemoryDescriptor::withAddressRange(
+                                    (mach_vm_address_t)arguments->structureOutput, 
+                                    (mach_vm_size_t)mem_size,
+                                    kIODirectionIn, current_task());
+            } else {
+                mem_size = arguments->structureOutputDescriptorSize;
+                if (( mem = arguments->structureOutputDescriptor ))
+                    mem->retain();   
+            }
+            
+            if (mem)
+            {
+                mem->prepare(kIODirectionNone);
+    
+                if (kPMMethodCopySystemTimeline == selector) {
+                    arguments->scalarOutput[0] = fOwner->copySystemTimeline(
+                                    mem, &mem_size);
+                } 
+                else
+                if (kPMMethodCopyDetailedTimeline == selector) {
+                    arguments->scalarOutput[0] = fOwner->copyDetailedTimeline(
+                                    mem, &mem_size);
+                }
+            
+                if (arguments->structureOutputDescriptor) {
+                    arguments->structureOutputDescriptorSize = mem_size;
+                } else {
+                    arguments->structureOutputSize = mem_size;
+                }
+            
+                mem->release();
+
+                ret = kIOReturnSuccess;
+            } else {
+                ret = kIOReturnCannotWire;
+            }
+            
+            break;
+*/
+        default:
+            // bad selector
+            return kIOReturnBadArgument;
+    }
+
+    return ret;
 }
-#endif
 
-void 
-RootDomainUserClient::setPreventative(UInt32 on_off, UInt32 types_of_sleep)
+/* getTargetAndMethodForIndex
+ * Not used. We prefer to use externalMethod() for user client invocations.
+ * We maintain getTargetAndExternalMethod since it's an exported symbol,
+ * and only for that reason.
+ */
+IOExternalMethod * RootDomainUserClient::getTargetAndMethodForIndex( 
+    IOService ** targetP, UInt32 index )
 {
-    return;
+    // DO NOT EDIT
+    return super::getTargetAndMethodForIndex(targetP, index);
 }
 
+/* setPreventative
+ * Does nothing. Exists only for exported symbol compatibility.
+ */
+void 
+RootDomainUserClient::setPreventative(UInt32 on_off, UInt32 types_of_sleep)
+{ return; } // DO NOT EDIT
diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h
index 4a277749c..9e6be4003 100644
--- a/iokit/Kernel/RootDomainUserClient.h
+++ b/iokit/Kernel/RootDomainUserClient.h
@@ -46,39 +46,50 @@ class RootDomainUserClient : public IOUserClient
 {
     OSDeclareDefaultStructors(RootDomainUserClient)
 
+    friend class IOPMrootDomain;
 private:
     IOPMrootDomain *	fOwner;
     task_t              fOwningTask;
 
     IOReturn            secureSleepSystem( uint32_t *return_code );
     
-    IOReturn            secureSleepSystemOptions( void *p1, void *p2, void *p3,
-                                                  void *p4, void *p5, void *p6 );
+    IOReturn            secureSleepSystemOptions( const void  *inOptions, 
+                                                  IOByteCount  inOptionsSize,
+                                                  uint32_t  *returnCode);
 
     IOReturn            secureSetAggressiveness( unsigned long type, 
                                                  unsigned long newLevel, 
                                                  int *return_code );
 
     IOReturn            secureSetMaintenanceWakeCalendar(
-                                void * p1, void * p2, void * p3,
-                                void * p4, void * p5, void * p6 );
-
-    IOReturn            secureSetUserAssertionLevels(
-                                uint32_t assertBits );
+                                                IOPMCalendarStruct  *inCalendar,
+                                                uint32_t            *returnCode);
+                                                
+    IOReturn            secureSetUserAssertionLevels(uint32_t    assertionBitfield);
 
 public:
 
     virtual IOReturn clientClose( void );
     
-    virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index );
+    virtual IOReturn clientMemoryForType( UInt32 type, IOOptionBits *options, IOMemoryDescriptor **memory);
+    
+    virtual IOReturn externalMethod( uint32_t selector, 
+                    IOExternalMethodArguments * arguments,
+					IOExternalMethodDispatch * dispatch, 
+					OSObject * target, 
+					void * reference );
 
     virtual bool start( IOService * provider );
 
     virtual bool initWithTask(task_t owningTask, void *security_id, 
 					UInt32 type, OSDictionary * properties);
 
+    // Unused - retained for symbol compatibility
     void setPreventative(UInt32 on_off, UInt32 types_of_sleep);
 
+    // Unused - retained for symbol compatibility
+    virtual IOExternalMethod * getTargetAndMethodForIndex( IOService ** targetP, UInt32 index );
+
 };
 
 #endif /* ! _IOKIT_ROOTDOMAINUSERCLIENT_H */
diff --git a/iokit/Kernel/i386/IOKeyStoreHelper.cpp b/iokit/Kernel/i386/IOKeyStoreHelper.cpp
new file mode 100644
index 000000000..fb09d9c2e
--- /dev/null
+++ b/iokit/Kernel/i386/IOKeyStoreHelper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+#include <stdbool.h>
+
+#include <IOKit/assert.h>
+#include <IOKit/system.h>
+#include <IOKit/IOLib.h>
+#include <IOKit/IOMemoryDescriptor.h>
+#include <IOKit/IOKitDebug.h>
+
+__BEGIN_DECLS
+
+#include <pexpert/pexpert.h>
+
+static volatile UInt32 alreadyFetched = 0;
+static IOMemoryDescriptor * newData;
+
+IOMemoryDescriptor *
+IOGetBootKeyStoreData(void);
+void
+IOSetKeyStoreData(IOMemoryDescriptor * data);
+
+__END_DECLS
+
+#if 1
+#define DEBG(fmt, args...)  	{ kprintf(fmt, ## args); }
+#else
+#define DEBG(fmt, args...)  	{}
+#endif
+
+void
+IOSetKeyStoreData(IOMemoryDescriptor * data)
+{
+    newData = data;
+    alreadyFetched = 0;
+}
+
+IOMemoryDescriptor *
+IOGetBootKeyStoreData(void)
+{
+  IOMemoryDescriptor *memoryDescriptor;
+  boot_args *args = (boot_args *)PE_state.bootArgs;
+  IOOptionBits options;
+  IOAddressRange ranges;
+
+  if (!OSCompareAndSwap(0, 1, &alreadyFetched))
+    return (NULL);
+
+  if (newData)
+  {
+    IOMemoryDescriptor * data = newData;
+    newData = NULL;
+    return (data);
+  }  
+
+  DEBG("%s: data at address %u size %u\n", __func__,
+       args->keyStoreDataStart,
+       args->keyStoreDataSize);
+
+  if (args->keyStoreDataStart == 0)
+    return (NULL);
+
+  ranges.address = args->keyStoreDataStart;
+  ranges.length = args->keyStoreDataSize;
+
+  options = kIODirectionInOut | kIOMemoryTypePhysical64;
+  
+  memoryDescriptor = IOMemoryDescriptor::withOptions(&ranges,
+						     1,
+						     0,
+						     NULL,
+						     options);
+
+  DEBG("%s: memory descriptor %p\n", __func__, memoryDescriptor);
+
+  return memoryDescriptor;
+}
diff --git a/iokit/Kernel/i386/IOSharedLock.s b/iokit/Kernel/i386/IOSharedLock.s
index 69183e016..9360dce09 100644
--- a/iokit/Kernel/i386/IOSharedLock.s
+++ b/iokit/Kernel/i386/IOSharedLock.s
@@ -1,19 +1,14 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -23,7 +18,47 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <architecture/i386/asm_help.h>
+
+	TEXT
+
+/*
+ * void
+ * OSSpinLockUnlock(p)
+ *	int *p;
+ *
+ * Unlock the lock pointed to by p.
+ */
+
+LEAF(_OSSpinLockUnlock, 0)
+LEAF(_IOSpinUnlock, 0)
+LEAF(_ev_unlock, 0)
+	movl		4(%esp), %ecx
+	movl		$0, (%ecx)
+END(_OSSpinLockUnlock)
+
+
+/*
+ * int
+ * OSSpinLockTry(p)
+ *	int *p;
+ *
+ * Try to lock p.  Return zero if not successful.
  */
-#include <IOKit/machine/IOSharedLockImp.h>
 
+LEAF(_OSSpinLockTry, 0)
+LEAF(_IOTrySpinLock, 0)
+LEAF(_ev_try_lock, 0)
+        movl            4(%esp), %ecx 
+	xorl		%eax, %eax
+        lock
+        cmpxchgl        %ecx, (%ecx)
+	jne	1f
+	movl	$1, %eax		/* yes */
+	ret
+1:
+	xorl	%eax, %eax		/* no */
+END(_OSSpinLockTry)
diff --git a/iokit/Kernel/ppc/IOAsmSupport.s b/iokit/Kernel/ppc/IOAsmSupport.s
deleted file mode 100644
index 56e068cc2..000000000
--- a/iokit/Kernel/ppc/IOAsmSupport.s
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- /*
- * Copyright (c) 1997-1998 Apple Computer, Inc.
- *
- *
- * HISTORY
- *
- * sdouglas  22 Oct 97 - first checked in from DriverServices
- * sdouglas  28 Jul 98 - start IOKit
- */
-
-#include <architecture/ppc/asm_help.h>
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;
-; ENTRY		functionName
-;
-; Assembly directives to begin an exported function.
-;
-; Takes: functionName - name of the exported function
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.macro ENTRY
-	.text
-	.align		2
-	.globl		$0
-$0:
-.endmacro
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-/*
-
-OSStatus SynchronizeIO( void )
-
-*/
-
-	ENTRY	__eSynchronizeIO
-
-	li	r0,	0
-	eieio
-	li	r3,	0
-	blr
-
-/*
-
-OSStatus CallTVector_NoRecover(
-	    void * p1, void * p2, void * p3, void * p4, void * p5, void * p6,	// r3-8
-	    LogicalAddress entry )						// r9
-
-*/
-
-#define PARAM_SIZE	24
-#define FM_SIZE		64
-#define FM_LR_SAVE	8
-#define FM_TOC_SAVE	20
-
-	ENTRY	_CallTVector
-
-#if 1
-	stw	r2,	FM_TOC_SAVE(r1)
-    	lwz	r0,	0(r9)
-	lwz	r2,	4(r9)
-	mtspr	ctr,	r0
-	bctr
-
-#else
-	mflr	r0
-	stw	r0,	FM_LR_SAVE(r1)
-	stw	r2,	FM_TOC_SAVE(r1)
-
-	stwu	r1,	-(PARAM_SIZE+FM_SIZE)(r1)
-	
-	lwz	r2,	4(r9)
-	lwz	r0,	0(r9)
-	mtspr	lr,	r0
-	mfspr	r12,	lr
-	blrl
-
-	addi	r1,	r1,(PARAM_SIZE+FM_SIZE)
-	lwz	r2,	FM_TOC_SAVE(r1)
-	lwz	r0,	FM_LR_SAVE(r1)
-	mtlr	r0
-	blr
-#endif
-
-/*
- * Seemingly unused references from cpp statically initialized objects.
- */
-
-.globl .constructors_used
-.constructors_used = 0
-.globl .destructors_used
-.destructors_used = 0
diff --git a/iokit/Kernel/ppc/IODBDMA.cpp b/iokit/Kernel/ppc/IODBDMA.cpp
deleted file mode 100644
index f706e2809..000000000
--- a/iokit/Kernel/ppc/IODBDMA.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1997 Apple Computer, Inc.
- *
- *
- * HISTORY
- *
- * Simon Douglas  10 Nov 97
- * - first checked in, mostly from machdep/ppc/dbdma.c
- *
- */
-
-
-#include <IOKit/ppc/IODBDMA.h>
-#include <IOKit/IOLib.h>
-
-void
-IODBDMAStart( volatile IODBDMAChannelRegisters *registers, volatile IODBDMADescriptor *physicalDescPtr)
-{
-
-    if( ((int) physicalDescPtr) & 0xf)
-	panic("IODBDMAStart: unaligned IODBDMADescriptor");
-
-    eieio();
-    IOSetDBDMAInterruptSelect(registers, 0xff000000);		// clear out interrupts
-
-    IOSetDBDMAChannelControl( registers,
-	IOClearDBDMAChannelControlBits( kdbdmaRun | kdbdmaPause | kdbdmaFlush | kdbdmaWake | kdbdmaDead | kdbdmaActive ));
-
-    while( IOGetDBDMAChannelStatus( registers) & kdbdmaActive)
-	eieio();
-
-    IOSetDBDMACommandPtr( registers, (unsigned int) physicalDescPtr);
-
-    IOSetDBDMAChannelControl( registers,
-	IOSetDBDMAChannelControlBits( kdbdmaRun | kdbdmaWake ));
-
-}
-
-void
-IODBDMAStop( volatile IODBDMAChannelRegisters *registers)
-{
-
-    IOSetDBDMAChannelControl( registers,
-	  IOClearDBDMAChannelControlBits( kdbdmaRun )
-	| IOSetDBDMAChannelControlBits(  kdbdmaFlush ));
-
-    while( IOGetDBDMAChannelStatus( registers) & ( kdbdmaActive | kdbdmaFlush))
-	eieio();
-
-}
-
-void
-IODBDMAFlush( volatile IODBDMAChannelRegisters *registers)
-{
-
-    IOSetDBDMAChannelControl( registers,
-	 IOSetDBDMAChannelControlBits(  kdbdmaFlush ));
-
-    while( IOGetDBDMAChannelStatus( registers) & kdbdmaFlush)
-	eieio();
-
-}
-
-void
-IODBDMAReset( volatile IODBDMAChannelRegisters *registers)
-{
-
-    IOSetDBDMAChannelControl( registers,
-	IOClearDBDMAChannelControlBits( kdbdmaRun | kdbdmaPause | kdbdmaFlush | kdbdmaWake | kdbdmaDead | kdbdmaActive ));
-
-    while( IOGetDBDMAChannelStatus( registers) & kdbdmaActive)
-	eieio();
-
-}
-
-void
-IODBDMAContinue( volatile IODBDMAChannelRegisters *registers)
-{
-
-    IOSetDBDMAChannelControl( registers,
-	  IOClearDBDMAChannelControlBits( kdbdmaPause | kdbdmaDead )
-	| IOSetDBDMAChannelControlBits(  kdbdmaRun | kdbdmaWake ));
-
-}
-
-void
-IODBDMAPause( volatile IODBDMAChannelRegisters *registers)
-{
-
-    IOSetDBDMAChannelControl( registers,
-	 IOSetDBDMAChannelControlBits(  kdbdmaPause ));
-
-    while( IOGetDBDMAChannelStatus( registers) & kdbdmaActive)
-	eieio();
-
-}
-
-IOReturn
-IOAllocatePhysicallyContiguousMemory(
-		unsigned int /* size */, unsigned int /* options */,
-		IOVirtualAddress * /* logical */,
-		IOPhysicalAddress * /* physical */ )
-{
-#if 0
-    IOReturn		err;
-    vm_offset_t		mem;
-
-    if( (size > 4096) || (options))
-	return( kIOReturnUnsupported);
-
-    mem = (vm_offset_t) IOMalloc( size);
-    *logical = (IOVirtualAddress) mem;
-
-    if( mem) {
-	err = IOPhysicalFromVirtual( IOVmTaskSelf(), mem, (vm_offset_t *) physical);
-	if( err)
-	    IOFree( (char *)mem, size);
-
-    } else {
-	err = kIOReturnNoMemory;
-	*physical = 0;
-    }
-
-    return( err);
-#endif /* 0 */
-	return (kIOReturnUnsupported);
-}
-
-IOReturn
-IOFreePhysicallyContiguousMemory( IOVirtualAddress * logical, unsigned int size)
-{
-    IOFree( logical, size);
-    return( kIOReturnSuccess);
-}
diff --git a/iokit/Kernel/x86_64/IOSharedLock.s b/iokit/Kernel/x86_64/IOSharedLock.s
index 69183e016..d5e5ecabc 100644
--- a/iokit/Kernel/x86_64/IOSharedLock.s
+++ b/iokit/Kernel/x86_64/IOSharedLock.s
@@ -1,19 +1,14 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2010 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -23,7 +18,43 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <architecture/i386/asm_help.h>
+
+	TEXT
+
+/*
+ * void
+ * OSSpinLockUnlock(p)
+ *	int *p;
+ *
+ * Unlock the lock pointed to by p.
+ */
+
+LEAF(_OSSpinLockUnlock, 0)
+LEAF(_IOSpinUnlock, 0)
+LEAF(_ev_unlock, 0)
+	movl		$0, (%rdi)
+END(_OSSpinLockUnlock)
+
+
+/*
+ * int
+ * OSSpinLockTry(p)
+ *	int *p;
+ *
+ * Try to lock p.  Return zero if not successful.
  */
-#include <IOKit/machine/IOSharedLockImp.h>
 
+LEAF(_OSSpinLockTry, 0)
+LEAF(_IOTrySpinLock, 0)
+LEAF(_ev_try_lock, 0)
+	xorl		%eax, %eax
+	orl		$-1, %edx
+	lock
+	cmpxchgl	%edx, (%rdi)
+	setz		%dl
+	movzbl		%dl, %eax
+END(_OSSpinLockTry)
diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp
index b8c590d06..ee06e47e5 100644
--- a/iokit/KernelConfigTables.cpp
+++ b/iokit/KernelConfigTables.cpp
@@ -37,35 +37,15 @@ const char * gIOKernelConfigTables =
 "   {"
 "     'IOClass'         = IOPanicPlatform;"
 "     'IOProviderClass' = IOPlatformExpertDevice;"
-"     'IOProbeScore'    = '-1';"
+"     'IOProbeScore'    = 0:32;"
 "   }"
-#ifdef PPC
-"   ,"
-"   {"
-"       'IOClass'               = AppleCPU;"
-"       'IOProviderClass'       = IOPlatformDevice;"
-"       'IONameMatch'           = 'cpu';"
-"       'IOProbeScore'          = 100:32;"
-"   },"
-"   {"
-"       'IOClass'              = AppleNMI;"
-"       'IOProviderClass'      = AppleMacIODevice;"
-"       'IONameMatch'          = 'programmer-switch';"
-"   },"
-"   {"
-"       'IOClass'               = AppleNVRAM;"
-"       'IOProviderClass'       = AppleMacIODevice;"
-"       'IONameMatch'           = nvram;"
-"   }"
-#endif /* PPC */
 ")";
 
-
 /* This stuff is no longer used at all but was exported in prior
  * releases, so we'll keep them around for PPC/i386 only.
  * See libkern's OSKext.cpp for other symbols, which have been moved
  * there for sanity.
  */
-#if __ppc__ || __i386__
+#if __i386__
 const char * gIOKernelKmods = "";
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
diff --git a/iokit/Makefile b/iokit/Makefile
index fee3c6fe1..498a2540a 100644
--- a/iokit/Makefile
+++ b/iokit/Makefile
@@ -8,20 +8,17 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = IOKit
-INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
 
-
 EXPINC_SUBDIRS = IOKit
-EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
 
 
-SETUP_SUBDIRS = conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = conf
 
diff --git a/iokit/bsddev/DINetBootHook.cpp b/iokit/bsddev/DINetBootHook.cpp
index ebf591eb0..8e83da5c1 100644
--- a/iokit/bsddev/DINetBootHook.cpp
+++ b/iokit/bsddev/DINetBootHook.cpp
@@ -95,6 +95,40 @@
 #define	kDIRootImageResultKey		"di-root-image-result"
 #define	kDIRootImageDevNameKey		"di-root-image-devname"
 #define	kDIRootImageDevTKey			"di-root-image-devt"
+#define kDIRootRamFileKey           "di-root-ram-file"
+
+static IOService *
+di_load_controller( void )
+{
+	OSIterator *	controllerIterator 	= 0;
+	OSDictionary *	matchDictionary 	= 0;
+	IOService *     controller			= 0;
+
+    do {
+        IOService::getResourceService()->publishResource("com.apple.AppleDiskImageController.load", kOSBooleanTrue);
+        IOService::getResourceService()->waitQuiet();
+
+        // first find IOHDIXController
+        matchDictionary = IOService::serviceMatching(kIOHDIXControllerClassName);
+        if (!matchDictionary)
+            break;
+
+        controllerIterator = IOService::getMatchingServices(matchDictionary);
+        if (!controllerIterator)
+            break;
+
+        controller = OSDynamicCast(IOService, controllerIterator->getNextObject());
+        if (!controller)
+            break;
+
+        controller->retain();
+    } while (false);
+
+	if (matchDictionary)	matchDictionary->release();
+	if (controllerIterator)	controllerIterator->release();
+
+    return controller;
+}
 
 extern "C" {
 /*
@@ -108,8 +142,6 @@ extern "C" {
 int di_root_image(const char *path, char devname[], dev_t *dev_p)
 {
 	IOReturn			res 				= 0;
-	OSIterator		*	controllerIterator 	= 0;
-	OSDictionary 	*	matchDictionary 	= 0;
 	IOService		*	controller			= 0;
 	OSString		*	pathString			= 0;
 	OSNumber		*	myResult			= 0;
@@ -124,24 +156,7 @@ int di_root_image(const char *path, char devname[], dev_t *dev_p)
 	if (!devname) 		return kIOReturnBadArgument;
 	if (!dev_p) 		return kIOReturnBadArgument;
 
-	(void)IOService::getResourceService()->publishResource("com.apple.AppleDiskImageController.load", kOSBooleanTrue);
-	IOService::getResourceService()->waitQuiet();
-
-	// first find IOHDIXController
-	matchDictionary = IOService::serviceMatching(kIOHDIXControllerClassName);
-	if (!matchDictionary) {
-		res = kIOReturnNoMemory;
-		goto serviceMatching_FAILED;
-	}
-	
-	controllerIterator = IOService::getMatchingServices(matchDictionary);
-	if (!controllerIterator) {
-		res = kIOReturnNoMemory;
-		goto getMatchingServices_FAILED;
-	}
-
-	// use the "setProperty" method of IOHDIXController to trigger the desired behaviour
-	controller = OSDynamicCast(IOService, controllerIterator->getNextObject());
+    controller = di_load_controller();
 	if (!controller) {
 		res = kIOReturnNotFound;
 		goto NoIOHDIXController;
@@ -191,16 +206,85 @@ int di_root_image(const char *path, char devname[], dev_t *dev_p)
 
 di_root_image_FAILED:
 CannotCreatePathOSString:
-serviceMatching_FAILED:
 NoIOHDIXController:
-getMatchingServices_FAILED:
 
 	// clean up memory allocations
 	if (pathString)			pathString->release();
-	if (matchDictionary)	matchDictionary->release();
-	if (controllerIterator)	controllerIterator->release();
+    if (controller)         controller->release();
 
 	return res;
 }
 
+void di_root_ramfile( IORegistryEntry * entry )
+{
+    OSData *                data;
+    IOMemoryDescriptor *    mem;
+    uint64_t                dmgSize;
+    uint64_t                remain, length;
+    OSData *                extentData = 0;
+    IOAddressRange *        extentList;
+    uint64_t                extentSize;
+    uint32_t                extentCount;
+
+    do {
+        data = OSDynamicCast(OSData, entry->getProperty("boot-ramdmg-size"));
+        if (!data || (data->getLength() != sizeof(uint64_t)))
+            break;  // bad disk image size
+
+        dmgSize = *(uint64_t *) data->getBytesNoCopy();
+        if (!dmgSize)
+            break;
+
+        data = OSDynamicCast(OSData, entry->getProperty("boot-ramdmg-extents"));
+        if (!data || (data->getLength() == 0) ||
+            ((data->getLength() & (sizeof(IOAddressRange)-1)) != 0))
+            break;  // bad extents
+
+        // make modifications to local copy
+        extentData  = OSData::withData(data);
+        assert(extentData);
+
+        extentList  = (IOAddressRange *) extentData->getBytesNoCopy();
+        extentCount = extentData->getLength() / sizeof(IOAddressRange);
+        extentSize  = 0;
+        remain = dmgSize;
+
+        // truncate extent length to enclosing disk image
+        for (uint32_t i = 0; i < extentCount; i++)
+        {
+            length = extentList[i].length;
+            if (!length) break;
+            
+            extentSize += length;
+            if (length >= remain)
+            {
+                extentList[i].length = remain;
+                extentCount = i + 1;
+                break;
+            }
+            remain -= length;
+        }
+        if (extentSize < dmgSize)
+            break;  // not enough extent bytes for enclosing disk image
+
+        mem = IOMemoryDescriptor::withAddressRanges(
+                extentList, extentCount,
+                kIODirectionOut | kIOMemoryMapperNone, NULL);
+        
+        if (mem)
+        {
+            IOService * controller = di_load_controller();
+            if (controller)
+            {
+                controller->setProperty(kDIRootRamFileKey, mem);
+                controller->release();
+            }
+            mem->release();
+        }
+    } while (false);
+    
+    if (extentData)
+        extentData->release();
+}
+
 };
diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp
index 6568afd57..feffd1a9e 100644
--- a/iokit/bsddev/IOKitBSDInit.cpp
+++ b/iokit/bsddev/IOKitBSDInit.cpp
@@ -46,9 +46,10 @@ extern "C" {
 #define ROOTDEVICETIMEOUT       60
 #endif
 
-extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys);
+extern dev_t mdevadd(int devid, uint64_t base, unsigned int size, int phys);
 extern dev_t mdevlookup(int devid);
 extern void mdevremoveall(void);
+extern void di_root_ramfile(IORegistryEntry * entry);
 
 kern_return_t
 IOKitBSDInit( void )
@@ -542,7 +543,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
     UInt32		flags = 0;
     int			mnr, mjr;
     bool		findHFSChild = false;
-    char *              mediaProperty = 0;
+    const char *        mediaProperty = 0;
     char *		rdBootVar;
     enum {		kMaxPathBuf = 512, kMaxBootVar = 128 };
     char *		str;
@@ -571,6 +572,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize,
 
     do {
 	if( (regEntry = IORegistryEntry::fromPath( "/chosen", gIODTPlane ))) {
+	    di_root_ramfile(regEntry);
             data = OSDynamicCast(OSData, regEntry->getProperty( "root-matching" ));
             if (data) {
                matching = OSDynamicCast(OSDictionary, OSUnserializeXML((char *)data->getBytesNoCopy()));
@@ -917,7 +919,7 @@ kern_return_t IOBSDGetPlatformUUID( uuid_t uuid, mach_timespec_t timeout )
     IOService * resources;
     OSString *  string;
 
-    resources = IOService::waitForService( IOService::resourceMatching( kIOPlatformUUIDKey ), &timeout );
+    resources = IOService::waitForService( IOService::resourceMatching( kIOPlatformUUIDKey ), ( timeout.tv_sec || timeout.tv_nsec ) ? &timeout : 0 );
     if ( resources == 0 ) return KERN_OPERATION_TIMED_OUT;
 
     string = ( OSString * ) IOService::getPlatform( )->getProvider( )->getProperty( kIOPlatformUUIDKey );
diff --git a/iokit/conf/MASTER b/iokit/conf/MASTER
index 0cb4dbb61..f1d0f0648 100644
--- a/iokit/conf/MASTER
+++ b/iokit/conf/MASTER
@@ -60,6 +60,7 @@ ident		IOKIT
 options		HIBERNATION	# system hibernation	# <hibernation>
 options		KERNOBJC	# Objective-C implementation	# <kernobjc>
 options		IOKITCPP	# C++ implementation		# <iokitcpp>
+options		IOKITSTATS	# IOKit statistics		# <iokitstats>
 options		KDEBUG		# kernel tracing		# <kdebug>
 options		NETWORKING	# kernel networking		# <networking>
 options		CRYPTO		# want crypto code		# <crypto>
@@ -67,7 +68,6 @@ options		CONFIG_DTRACE	# enable dtrace			# <config_dtrace>
 
 options		CONFIG_SLEEP	#				# <config_sleep>
 
-
 #makeoptions	LIBDRIVER = "libDriver_kern.o"			# <libdriver>
 #makeoptions	LIBOBJC   = "libkobjc.o"			# <kernobjc>
 
@@ -93,4 +93,15 @@ options   CONFIG_NO_KPRINTF_STRINGS		# <no_kprintf_str>
 #
 options   CONFIG_EMBEDDED                       # <config_embedded>
 
+# secure_kernel - secure kernel from user programs
+options   SECURE_KERNEL                       # <secure_kernel>
+
 options   MACH_ASSERT				# <mach_assert>
+
+#
+# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and 
+# security/conf MASTER files.
+#
+options		CONFIG_MACF			# Mandatory Access Control Framework
+
+options   DEVELOPMENT                           # <development>
diff --git a/iokit/conf/MASTER.i386 b/iokit/conf/MASTER.i386
index 3574359ef..ab7ff3360 100644
--- a/iokit/conf/MASTER.i386
+++ b/iokit/conf/MASTER.i386
@@ -3,13 +3,12 @@
 #  Standard Apple Mac OS Configurations:
 #  -------- ----- ------ ---------------
 #
-#  RELEASE	= [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep ]
+#  RELEASE	= [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats ]
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach iokitcpp hibernation no_kextd bsmall crypto ]
-#  DEVELOPMENT	= [ EMBEDDED config_dtrace ]
+#  DEVELOPMENT	= [ EMBEDDED config_dtrace development]
 #
 ######################################################################
 
diff --git a/iokit/conf/MASTER.ppc b/iokit/conf/MASTER.ppc
deleted file mode 100644
index 2318d5561..000000000
--- a/iokit/conf/MASTER.ppc
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-######################################################################
-#  
-#  Standard Apple MacOS X Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE = [ppc mach iokitcpp hibernation medium crypto config_dtrace]
-#  DEVELOPMENT = [ RELEASE ]
-#  PROFILE = [ RELEASE profile ]
-#  DEBUG = [ RELEASE debug]
-#  RELEASE_TRACE = [ RELEASE kdebug ]
-#  DEBUG_TRACE   = [ DEBUG kdebug ]
-#
-######################################################################
-
-machine		"ppc"					# <ppc>
-cpu		"ppc"					# <ppc>
-
diff --git a/iokit/conf/MASTER.x86_64 b/iokit/conf/MASTER.x86_64
index 857357c71..781ce8c7c 100644
--- a/iokit/conf/MASTER.x86_64
+++ b/iokit/conf/MASTER.x86_64
@@ -3,13 +3,12 @@
 #  Standard Apple Mac OS Configurations:
 #  -------- ----- ------ ---------------
 #
-#  RELEASE	= [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep ]
+#  RELEASE	= [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats ]
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach iokitcpp hibernation no_kextd bsmall crypto ]
-#  DEVELOPMENT	= [ EMBEDDED ]
+#  DEVELOPMENT	= [ EMBEDDED development ]
 #
 ######################################################################
 
diff --git a/iokit/conf/Makefile b/iokit/conf/Makefile
index 750aadb65..7b37a4736 100644
--- a/iokit/conf/Makefile
+++ b/iokit/conf/Makefile
@@ -7,8 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -24,30 +23,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(IOKIT_KERNEL_CONFIG) $(IOKIT_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(IOKIT_KERNEL_CONFIG) $(IOKIT_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile 
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile 
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(IOKIT_KERNEL_CONFIG)/Makefile	\
diff --git a/iokit/conf/Makefile.i386 b/iokit/conf/Makefile.i386
index 43a6e5b4f..8842b32d7 100644
--- a/iokit/conf/Makefile.i386
+++ b/iokit/conf/Makefile.i386
@@ -2,21 +2,9 @@
 #BEGIN  Machine dependent Makefile fragment for i386
 ######################################################################
  
-# Enable -Werror for i386 builds
-CFLAGS+= $(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-#OBJS_NO_WERROR=		\
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES=	\
-			IOHibernateRestoreKernel.o \
-			WKdmDecompress.o
+			IOHibernateRestoreKernel.o
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
diff --git a/iokit/conf/Makefile.ppc b/iokit/conf/Makefile.ppc
deleted file mode 100644
index c794da174..000000000
--- a/iokit/conf/Makefile.ppc
+++ /dev/null
@@ -1,27 +0,0 @@
-######################################################################
-#BEGIN  Machine dependent Makefile fragment for ppc
-######################################################################
- 
-# Enable -Werror for ppc builds
-CFLAGS+= $(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR=		\
-	AppleMacIO.cpo
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
-# Files that must go in the __HIB segment:
-UNCONFIGURED_HIB_FILES=	\
-			IOHibernateRestoreKernel.o \
-			WKdmDecompress.o
-
-HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
-
-######################################################################
-#END    Machine dependent Makefile fragment for ppc
-######################################################################
-
diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template
index 55d99d413..96fe217a1 100644
--- a/iokit/conf/Makefile.template
+++ b/iokit/conf/Makefile.template
@@ -26,15 +26,13 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -imacros meta_features.h -DKERNEL -DDRIVER_PRIVATE				\
-	-Wall -fno-common					\
+CFLAGS+= -include meta_features.h -DDRIVER_PRIVATE		\
 	-DIOMATCHDEBUG=1 -DIOALLOCDEBUG=1			\
-	-imacros meta_features.h $(CFLAGS_INLINE_CONFIG)
+	-include meta_features.h $(CFLAGS_INLINE_CONFIG)
 #-DIOKITDEBUG=-1
 
-CWARNFLAGS   += -Wno-unused-parameter -Wno-redundant-decls -Wno-nested-externs -Wno-write-strings
-MWARNFLAGS   += -Wno-unused-parameter -Wno-redundant-decls -Wno-nested-externs -Wno-write-strings
-CXXWARNFLAGS += -Wno-unused-parameter -Wno-redundant-decls -Wno-write-strings -Wno-cast-qual -Wno-shadow
+CWARNFLAGS   = $(CWARNFLAGS_STD) -Wno-unused-parameter
+CXXWARNFLAGS = $(CXXWARNFLAGS_STD) -Wno-unused-parameter -Wno-cast-qual -Wno-shadow
 
 CFLAGS_RELEASE += -DIOASSERT=0
 CFLAGS_DEBUG += -DIOASSERT=1
@@ -85,22 +83,24 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
-	$(_v)for hib_file in ${HIB_FILES};		\
+$(COMPONENT).filelist: $(LDOBJS)
+	$(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \
+	for hib_file in ${HIB_FILES};		\
 	do	\
                 $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \
                 mv $${hib_file}__ $${hib_file} ; \
-	done;
+	done; \
+	fi
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`
 	
 	
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_build_all: do_depend
 
diff --git a/iokit/conf/Makefile.x86_64 b/iokit/conf/Makefile.x86_64
index 09b0c0b71..463de5a20 100644
--- a/iokit/conf/Makefile.x86_64
+++ b/iokit/conf/Makefile.x86_64
@@ -1,22 +1,10 @@
 ######################################################################
 #BEGIN  Machine dependent Makefile fragment for x86_64
 ######################################################################
- 
-# Enable -Werror for x86_64 builds
-CFLAGS+= $(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-#OBJS_NO_WERROR=			\
- 
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
 
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES=	\
-			IOHibernateRestoreKernel.o \
-			WKdmDecompress.o
+			IOHibernateRestoreKernel.o
 
 HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
 
diff --git a/iokit/conf/files b/iokit/conf/files
index 18d44275a..532732d3b 100644
--- a/iokit/conf/files
+++ b/iokit/conf/files
@@ -10,8 +10,6 @@ OPTIONS/mach_assert				optional mach_assert
 
 # libIOKit
 
-iokit/Kernel/WKdmCompress.c				optional hibernation
-iokit/Kernel/WKdmDecompress.c				optional hibernation
 iokit/Kernel/IOHibernateIO.cpp				optional hibernation
 iokit/Kernel/IOHibernateRestoreKernel.c			optional hibernation
 
@@ -78,6 +76,8 @@ iokit/Kernel/IOSharedDataQueue.cpp			optional iokitcpp
 # iokit/Tests/TestContainers.cpp			optional iokitcpp
 # iokit/Tests/TestCollections.cpp			optional iokitcpp
 
+iokit/Kernel/IOStatistics.cpp			optional iokitcpp
+
 iokit/Kernel/IOStringFuncs.c				standard
 
 # Property tables for kernel-linked objects
diff --git a/iokit/conf/files.i386 b/iokit/conf/files.i386
index 2193ae37a..17c544f86 100644
--- a/iokit/conf/files.i386
+++ b/iokit/conf/files.i386
@@ -5,4 +5,7 @@ iokit/Kernel/i386/IOSharedLock.s                            standard
 iokit/Kernel/i386/IOAsmSupport.s                            standard
 
 # Power Domains
-iokit/Kernel/IOPMrootDomain.cpp    				optional iokitcpp
+iokit/Kernel/IOPMrootDomain.cpp    			    optional iokitcpp
+
+# Key Store helper
+iokit/Kernel/i386/IOKeyStoreHelper.cpp			    standard
diff --git a/iokit/conf/files.ppc b/iokit/conf/files.ppc
deleted file mode 100644
index 8d60fc863..000000000
--- a/iokit/conf/files.ppc
+++ /dev/null
@@ -1,20 +0,0 @@
-
-iokit/Kernel/ppc/IOAsmSupport.s					standard
-iokit/Kernel/ppc/IODBDMA.cpp					optional iokitcpp
-iokit/Kernel/ppc/IOSharedLock.s					standard
-
-iokit/Families/IONVRAM/IONVRAMController.cpp			optional iokitcpp
-iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp		optional iokitcpp
-
-# Apple Platform Expert
-iokit/Drivers/platform/drvApplePlatformExpert/ApplePlatformExpert.cpp	optional iokitcpp
-iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp		optional iokitcpp
-
-# Power Domains
-iokit/Kernel/IOPMrootDomain.cpp					optional iokitcpp
-
-# Apple Mac-IO driver
-iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp		optional iokitcpp
-
-# Apple NMI driver
-iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp			optional iokitcpp
diff --git a/iokit/conf/files.x86_64 b/iokit/conf/files.x86_64
index c81cf1178..9d6ca13ee 100644
--- a/iokit/conf/files.x86_64
+++ b/iokit/conf/files.x86_64
@@ -5,4 +5,7 @@ iokit/Kernel/x86_64/IOSharedLock.s                            standard
 iokit/Kernel/x86_64/IOAsmSupport.s                            standard
 
 # Power Domains
-iokit/Kernel/IOPMrootDomain.cpp    				optional iokitcpp
+iokit/Kernel/IOPMrootDomain.cpp    			    optional iokitcpp
+
+# Key Store helper
+iokit/Kernel/i386/IOKeyStoreHelper.cpp			    standard
diff --git a/iokit/conf/tools/Makefile b/iokit/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/iokit/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/iokit/conf/tools/doconf/Makefile b/iokit/conf/tools/doconf/Makefile
deleted file mode 100644
index aa55a9419..000000000
--- a/iokit/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/iokit/conf/tools/doconf/doconf.csh b/iokit/conf/tools/doconf/doconf.csh
deleted file mode 100755
index 6fedb4786..000000000
--- a/iokit/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/kgmacros b/kgmacros
index 5c2205e24..edb1e35db 100644
--- a/kgmacros
+++ b/kgmacros
@@ -1,3 +1,4 @@
+
 # Kernel gdb macros
 #
 #  These gdb macros should be useful during kernel development in
@@ -38,9 +39,12 @@ document kgm
 |     showallvm      Display a summary listing of all the vm maps
 |     showallvme     Display a summary listing of all the vm map entries
 |     showallipc     Display a summary listing of all the ipc spaces
+|     showipcsummary     Display a summary listing of the ipc spaces of all tasks
 |     showallrights  Display a summary listing of all the ipc rights
-|     showallkmods   Display a summary listing of all the kernel modules
-|     showallbusyports     Display a listing of all ports with unread messages
+|     showallkexts   Display a summary listing of all loaded kexts (alias: showallkmods)
+|     showallknownkexts   Display a summary listing of all kexts, loaded or not
+|     showallbusyports    Display a listing of all ports with unread messages
+|     showallprocessors   Display a listing of all psets and processors
 |
 |     showallclasses    Display info about all OSObject subclasses in the system
 |     showobject        Show info about an OSObject - its vtable ptr and retain count, & more info for simple container classes.
@@ -82,13 +86,14 @@ document kgm
 |     showprocfiles  Given a proc_t pointer, display the list of open file descriptors
 |     showproclocks  Given a proc_t pointer, display the list of advisory file locks
 |     zombproc       Print out all procs in the zombie list
+|     showproctree   Show all the processes in a hierarchical tree form
 |     allproc        Print out all process in the system not in the zombie list
 |     zombstacks     Print out all stacks of tasks that are exiting
 |
 |     showinitchild  Print out all processes in the system which are children of init process
 |
-|     showkmod	     Display info about a kernel module
-|     showkmodaddr   Given an address, display the kernel module and offset
+|     showkext	     Display info about a kext (alias: showkmod)
+|     showkextaddr   Given an address, display the kext and offset (alias: showkmodaddr)
 |
 |     dumpcallqueue  Dump out all the entries given a queue head
 |
@@ -103,6 +108,9 @@ document kgm
 |     switchtoctx    Switch to different context
 |     showuserstack  Display numeric backtrace of the user stack for an 
 |     		     activation
+|     showtaskuserstacks	 Display user stacks for a specified task
+|     showuserregisters		 Display user registers for the specified thread
+|     showtaskuserregisters		 Display user registers for the specified task
 |
 |     switchtouserthread Switch to the user context of the specified thread
 |     resetstacks    Return to the original kernel context
@@ -145,6 +153,21 @@ document kgm
 |
 |     inifa_showdbg  Print the debug information of an IPv4 interface address
 |     in6ifa_showdbg Print the debug information of an IPv6 interface address
+|     inm_showdbg    Print the debug information of an IPv4 multicast address
+|     ifma_showdbg   Print the debug information of a link multicast address
+|     ifpref_showdbg Print the debug information of an interface ref count
+|
+|     ndpr_showdbg   Print the debug information of a nd_prefix structure
+|     nddr_showdbg   Print the debug information of a nd_defrouter structure
+|
+|     imo_showdbg    Print the debug information of a ip_moptions structure
+|     im6o_showdbg   Print the debug information of a ip6_moptions structure
+|
+|     inifa_trash    Walk the list of trash in_ifaddr entries
+|     in6ifa_trash   Walk the list of trash in6_ifaddr entries
+|     inm_trash      Walk the list of trash in_multi entries
+|     in6m_trash     Walk the list of trash in6_multi entries
+|     ifma_trash     Walk the list of trash ifmultiaddr entries
 |
 |     mbuf_walkpkt   Walk the mbuf packet chain (m_nextpkt)
 |     mbuf_walk      Walk the mbuf chain (m_next)
@@ -157,6 +180,9 @@ document kgm
 |     mbuf_slabs     Print all slabs in the group
 |     mbuf_slabstbl  Print slabs table
 |     mbuf_stat      Print extended mbuf allocator statistics
+|     mbuf_countchain   Count the length of an mbuf chain
+|     mbuf_topleak   Print the top suspected mbuf leakers
+|     mbuf_traceleak Print the leak information for a given leak address
 |
 |     mcache_walkobj     Walk the mcache object chain (obj_next)
 |     mcache_stat        Print all mcaches in the system
@@ -188,8 +214,11 @@ document kgm
 |     shownewvnodes      Print the new vnode list
 |
 |     ifconfig           display ifconfig-like output
+|     showifnets         show the list of attached and detached interfaces
 |     showifaddrs        show the list of addresses for the given ifp
 |     showifmultiaddrs   show the list of multicast addresses for the given ifp
+|     showinmultiaddrs   show the list of IPv4 multicast addresses records 
+|     showin6multiaddrs  show the list of IPv6 multicast addresses records
 |
 |     showsocket         Display information about a socket
 |     showprocsockets    Given a proc_t pointer, display information about its sockets
@@ -202,7 +231,7 @@ document kgm
 |     show_rt_inet       Display the IPv4 routing table
 |     show_rt_inet6      Display the IPv6 routing table
 |
-|     showallpmworkqueues   Display info about all IOPMWorkQueue objects
+|     showpmworkqueue       Display the IOPMWorkQueue object
 |     showregistrypmstate   Display power management state for all IOPower registry entries
 |     showioservicepm       Display the IOServicePM object
 |     showstacksaftertask   showallstacks starting after a given task
@@ -214,19 +243,39 @@ document kgm
 |     showallgdbcorestacks Corefile equivalent of "showallgdbstacks"
 |     kdp-reenter	Schedule reentry into the debugger and continue.
 |     kdp-reboot	Restart remote target
-|     kdp-version	Get KDP version number
-|     kdp-connect	"shorthand" connection macro
+|     kdp-version       Get KDP version number
 |
 |     zstack		Print zalloc caller stack (zone leak debugging)
 |     findoldest	Find oldest zone leak debugging record
 |     countpcs		Print how often a pc occurs in the zone leak log
 |
+|     showtopztrace     Print the ztrace with the most outstanding allocated memory
+|     showztrace		Print a backtrace record given its index
+|     showzalloc	    Print an allocation record + stacktrace at index
+|     showztraceaddr    Print a backtrace record given its address
+|     showztracesabove  Print all the backtrace records with a size bigger than X
+|     showzstacktrace   Symbolicate and print a stored OSBacktrace
+|     
+|     showztraces       Finds all in-use traces in the ztraces table
+|     showzallocs       Finds all in-use allocations in the zallocs table
+|     showzstats        Shows the statistics gathered about the hash tables
+|
+|     showzallocsfortrace   Print all the allocations that refer to a trace
+|     showztracehistogram   Prints a histogram of the ztraces table
+|     showzallochistogram   Prints a histogram of the zallocs table	
+|
 |     pmap_walk     Perform a page-table walk
 |     pmap_vtop     Translate a virtual address to physical address
 |
-|     showuserlibraries      Show binary images known by dyld in the target task
+|     showuserdyldinfo       Show dyld information and error messages
+|                            in the target task
+|     showuserlibraries      Show binary images known by dyld in the
+|                            target task
+|     showallvmstats	Prints a summary of vm statistics in a table format
+|     memstats			Displays memory statistics in a table format
 |
-|     showthreadfortid	Displays the address of the thread structure for a given thread_id value. 
+|     showthreadfortid	Displays the address of the thread structure
+|                       for a given thread_id value. 
 |     
 |     strcmp_nomalloc   A version of strcmp that avoids the use of malloc
 |                       through the use of encoded strings created via
@@ -252,6 +301,12 @@ document kgm
 |     ioapic_write32    Write IOAPIC entry
 |     ioapic_dump       Dump IOAPIC entries
 |
+|     showallproviders  Display summary listing of all dtrace_providers
+|     showallmodctls    Display summary listing of all dtrace modctls
+|     showmodctl        Display info about a dtrace modctl
+|     showfbtprobe      Display info about an fbt probe given an id (traverses fbt_probetab)
+|     processortimers   Display all processor timers, noting any inconsistencies
+|
 | Type "help <macro>" for more specific help on a particular macro.
 | Type "show user <macro>" to see what the macro is really doing.
 end
@@ -304,6 +359,10 @@ set $kgm_kdp_pkt_input_off   = $kgm_kdp_pkt_data_len + 4
 set $kgm_kdp_pkt_hostreboot = 0x13
 set $kgm_kdp_pkt_hdr_size   = 8
 
+
+set $kgm_readphys_force_kdp     = 0
+set $kgm_readphys_force_physmap = 0
+
 set $kgm_lcpu_self      = 0xFFFE
 
 set $kgm_reg_depth = 0
@@ -366,8 +425,18 @@ define showptrhdrpad
     end
 end
 
+# Print a userspace pointer, using $kgm_tasp
+define showuserptr
+    set $kgm_userptr_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000)
+    if $kgm_userptr_task_64
+        printf "0x%016llx", $arg0
+    else
+        printf "0x%08x", $arg0
+    end
+end
+
 define showkmodheader
-    printf   "kmod      "
+    printf   "kmod_info "
     showptrhdrpad
     printf "  address   "
     showptrhdrpad
@@ -424,7 +493,6 @@ end
 define showkmodaddr
     showkmodaddrint $arg0
 end
-
 document showkmodaddr
 Syntax: (gdb) showkmodaddr <addr>
 | Given an address, print the offset and name for the kmod containing it
@@ -436,7 +504,15 @@ define showkmod
 end
 document showkmod
 Syntax: (gdb) showkmod <kmod>
-| Routine to print info about a kernel module
+| Routine to print info about a kext
+end
+
+define showkext
+    showkmod $arg0
+end
+document showkext
+Syntax: (gdb) showkext <kmod_info_address>
+| Routine to print info about a kext
 end
 
 define showallkmods
@@ -449,7 +525,108 @@ define showallkmods
 end
 document showallkmods
 Syntax: (gdb) showallkmods
-| Routine to print a summary listing of all the kernel modules
+| Routine to print a summary listing of all loaded kexts
+end
+
+define showallkexts
+    showallkmods
+end
+document showallkexts
+Syntax: (gdb) showallkexts
+| Routine to print a summary listing of all loaded kexts
+end
+
+# See OSKextVersion.c for the C code this is based on
+#
+set $KGM_OSKEXT_VERS_MAJ_MULT   = 100000000
+set $KGM_OSKEXT_VERS_MIN_MULT   = 1000000
+set $KGM_OSKEXT_VERS_REV_MULT   = 10000
+set $KGM_OSKEXT_VERS_STAGE_MULT = 1000
+
+define printoskextversion
+    set $vers_scratch = $arg0
+
+    if ($vers_scratch == -1)
+        printf "(invalid)"
+    else
+     
+        set $vers_major =  $vers_scratch / $KGM_OSKEXT_VERS_MAJ_MULT
+
+        set $vers_scratch = $vers_scratch - ($vers_major * $KGM_OSKEXT_VERS_MAJ_MULT)
+        set $vers_minor = $vers_scratch / $KGM_OSKEXT_VERS_MIN_MULT
+
+        set $vers_scratch = $vers_scratch - ( $vers_minor * $KGM_OSKEXT_VERS_MIN_MULT)
+        set $vers_revision =  $vers_scratch / $KGM_OSKEXT_VERS_REV_MULT
+
+        set $vers_scratch = $vers_scratch - ( $vers_revision * $KGM_OSKEXT_VERS_REV_MULT)
+        set $vers_stage =  $vers_scratch / $KGM_OSKEXT_VERS_STAGE_MULT
+
+        set $vers_scratch = $vers_scratch - ( $vers_stage * $KGM_OSKEXT_VERS_STAGE_MULT)
+        set $vers_stagelevel =  $vers_scratch
+
+        printf "%d.%d", $vers_major, $vers_minor
+        if ($vers_revision > 0)
+            printf ".%d", $vers_revision
+        end
+        
+        if ($vers_stage == 1)
+            printf "d"
+        end
+        if ($vers_stage == 3)
+            printf "a"
+        end
+        if ($vers_stage == 5)
+            printf "b"
+        end
+        if ($vers_stage == 7)
+            printf "fc"
+        end
+        if ($vers_stage == 1 || $vers_stage == 3 || $vers_stage == 5 || $vers_stage == 7)
+            printf "%d", $vers_stagelevel
+        end
+    end
+end
+
+define showallknownkexts
+   set $kext_count = sKextsByID->count
+   set $kext_index = 0
+   printf "%d kexts in sKextsByID:\n", $kext_count
+
+   printf "OSKext *    "
+   showptrhdrpad
+   printf "load_addr   "
+   showptrhdrpad
+   
+   printf " id  name (version)\n"
+
+   while $kext_index < $kext_count
+       set $kext_id = sKextsByID->dictionary[$kext_index].key->string
+       set $oskext = (OSKext *)sKextsByID->dictionary[$kext_index].value
+
+       showptr $oskext
+       printf "  "
+
+       if ($oskext->flags.loaded)
+           showptr $oskext->kmod_info
+           printf "  "
+           printf "%3d", $oskext->loadTag
+       else
+           showptrhdrpad
+           printf " -------- "
+           printf "  "
+           printf " --"
+        end
+        printf "  "
+
+       printf "%.64s (", $kext_id
+       printoskextversion (uint64_t)$oskext->version
+       printf ")\n"
+       set $kext_index = $kext_index + 1
+   end
+end
+document showallknownkexts
+Syntax: (gdb) showallknownkexts
+| Routine to print a summary listing of all kexts, loaded or not
 end
 
 define showactheader
@@ -477,7 +654,7 @@ define showactint
 	else
 	   printf "    "
 	end
-	printf "  %7ld   ", $kgm_thread.thread_id
+	printf "  0x%llx   ", $kgm_thread.thread_id
 	showptr $kgm_thread.last_processor
 	printf "   %3d ", $kgm_thread.sched_pri
 	if ($kgm_thread.uthread != 0)
@@ -488,18 +665,36 @@ define showactint
 	   else
 	      printf "     "
 	   end
-	   if ($kgm_uthread->uu_iopol_disk == 1)
-	      printf "NORM    "
-	      set $kgm_printed = 1
+	   set $diskpolicy = 0
+	   if ($kgm_thread->ext_actionstate.hw_disk != 0)
+		set $diskpolicy = $kgm_thread->ext_actionstate.hw_disk
+	   else 
+		if ($kgm_thread->actionstate.hw_disk != 0)
+			set $diskpolicy = $kgm_thread->actionstate.hw_disk
+		end
+	   end
+	   if ($kgm_thread->ext_actionstate.hw_bg != 0)
+		set $diskpolicy = 5
 	   end
-	   if ($kgm_uthread->uu_iopol_disk == 2)
+	   if ($kgm_thread->actionstate.hw_bg != 0)
+		set $diskpolicy = 4
+	   end
+	   if ($diskpolicy == 2)
 	      printf "PASS    "
 	      set $kgm_printed = 1
            end
-	   if ($kgm_uthread->uu_iopol_disk == 3)
+	   if ($diskpolicy == 3)
 	      printf "THROT   "
 	      set $kgm_printed = 1
 	   end
+	   if ($diskpolicy == 4)
+	      printf "BG_THRT "
+	      set $kgm_printed = 1
+           end
+	   if ($diskpolicy == 5)
+	      printf "EBG_THRT"
+	      set $kgm_printed = 1
+           end
 	   if ($kgm_printed == 0)
 	      printf "        "
 	   end
@@ -544,8 +739,25 @@ define showactint
 		    end
 		end
 	end
+	if ($kgm_thread.uthread != 0)
+	   set $kgm_uthread = (struct uthread *)$kgm_thread.uthread
+	   if ($kgm_uthread->pth_name && $kgm_uthread->pth_name[0])
+	   	   printf "\n\t\tThread Name: %s", $kgm_uthread->pth_name
+	   end
+	end
 	if $arg1 != 0
 	    if ($kgm_thread.kernel_stack != 0)
+		if ($kgm_thread.uthread != 0)
+			printf "\n          "
+			set $kgm_uthread = (struct uthread *)$kgm_thread.uthread
+			if ($kgm_uthread->uu_kwe.kwe_kwqqueue != 0)
+				set $kwq = (ksyn_wait_queue_t)$kgm_uthread->uu_kwe.kwe_kwqqueue
+				printf "              kwq_lockcount:0x%x; kwq_retval:0x%x", $kgm_uthread->uu_kwe.kwe_lockseq, $kgm_uthread->uu_kwe.kwe_psynchretval
+				printf "\n                "
+				show_kwq $kwq
+				printf "          "
+			end
+		end
 		if ($kgm_thread.reserved_stack != 0)
 			printf "\n          "
 			showptrhdrpad
@@ -683,10 +895,17 @@ Syntax: (gdb) showallthreads
 | Routine to print out info about all threads in the system.
 end
 
+define showprocessorint
+    set $kgm_processor_int = (struct processor *)$arg0
+    printf "Processor "
+    showptr $kgm_processor_int
+    printf " State %d (cpu_id 0x%x)\n", ($kgm_processor_int)->state, ($kgm_processor_int)->cpu_id
+end
+
 define showcurrentthreads
-set $kgm_prp = (struct processor *)processor_list
+    set $kgm_prp = (struct processor *)processor_list
     while $kgm_prp != 0
-    	printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_id
+        showprocessorint $kgm_prp
 	if ($kgm_prp)->active_thread != 0
 	    set $kgm_actp = ($kgm_prp)->active_thread
 	    showtaskheader
@@ -703,15 +922,192 @@ Syntax: (gdb) showcurrentthreads
 | Routine to print out info about the thread running on each cpu.
 end
 
+
+define _showrunqint
+	set $kgm_runq = (struct run_queue *)$arg0
+	
+	printf "    Priority Run Queue Info: Count %d\n", $kgm_runq->count
+	set $kgm_runq_queue_i = 0
+	set $kgm_runq_queue_count = sizeof($kgm_runq->queues)/sizeof($kgm_runq->queues[0])
+	while $kgm_runq->count && $kgm_runq_queue_i < $kgm_runq_queue_count
+		set $kgm_runq_queue_head = &$kgm_runq->queues[$kgm_runq_queue_i]
+		set $kgm_runq_queue_p = $kgm_runq_queue_head->next
+		if $kgm_runq_queue_p != $kgm_runq_queue_head
+			set $kgm_runq_queue_this_count = 0
+			while $kgm_runq_queue_p != $kgm_runq_queue_head
+				set $kgm_runq_queue_this_count = $kgm_runq_queue_this_count + 1
+				showtask ((thread_t)$kgm_runq_queue_p)->task
+				showactstack $kgm_runq_queue_p
+				set $kgm_runq_queue_p = $kgm_runq_queue_p->next
+			end
+			printf "      Queue Priority %3d [", $kgm_runq_queue_i
+			showptr $kgm_runq_queue_head
+			printf "] Count %d\n", $kgm_runq_queue_this_count
+		end
+		set $kgm_runq_queue_i = $kgm_runq_queue_i + 1
+	end
+
+end
+
+define _showgrrrint
+	set $kgm_grrr_runq = $arg0
+	
+	printf "    GRRR Info: Count %d Weight %d Current Group ", $kgm_grrr_runq->count, $kgm_grrr_runq->weight
+	showptr $kgm_grrr_runq->current_group
+	printf "\n"
+	set $kgm_grrr_group_i = 0
+	set $kgm_grrr_group_count = sizeof($kgm_grrr_runq->groups)/sizeof($kgm_grrr_runq->groups[0])
+	while $kgm_grrr_runq->count && $kgm_grrr_group_i < $kgm_grrr_group_count
+		set $kgm_grrr_group = &$kgm_grrr_runq->groups[$kgm_grrr_group_i]
+		if $kgm_grrr_group->count > 0
+			printf "      Group %3d [", $kgm_grrr_group->index
+			showptr $kgm_grrr_group
+			printf "] Count %d Weight %d\n", $kgm_grrr_group->count, $kgm_grrr_group->weight
+			set $kgm_grrr_group_client_head = &$kgm_grrr_group->clients
+			set $kgm_grrr_group_client = $kgm_grrr_group_client_head->next
+			while $kgm_grrr_group_client != $kgm_grrr_group_client_head
+				# showtask ((thread_t)$kgm_grrr_group_client)->task
+				# showactstack $kgm_grrr_group_client
+				set $kgm_grrr_group_client = $kgm_grrr_group_client->next
+			end
+		end
+		set $kgm_grrr_group_i = $kgm_grrr_group_i + 1
+	end
+end
+
+define showallprocessors
+	set $kgm_pset = &pset0
+
+	set $kgm_show_grrr = 0
+	set $kgm_show_priority_runq = 0
+	set $kgm_show_priority_pset_runq = 0
+	set $kgm_show_fairshare_grrr = 0
+	set $kgm_show_fairshare_list = 0
+
+	if _sched_enum == 1
+		set $kgm_show_priority_runq = 1
+		set $kgm_show_fairshare_list = 1
+	end
+	if _sched_enum == 2
+		set $kgm_show_priority_pset_runq = 1
+		set $kgm_show_fairshare_list = 1
+	end
+	if _sched_enum == 4
+		set $kgm_show_grrr = 1
+		set $kgm_show_fairshare_grrr = 1
+	end
+	if _sched_enum == 5
+		set $kgm_show_priority_runq = 1
+		set $kgm_show_fairshare_list = 1
+	end
+	if _sched_enum == 6
+		set $kgm_show_priority_pset_runq = 1
+		set $kgm_show_fairshare_list = 1
+	end
+
+	while $kgm_pset != 0
+		printf "Processor Set "
+		showptr $kgm_pset
+		printf " Count %d (cpu_id 0x%x-0x%x)\n", ($kgm_pset)->cpu_set_count, ($kgm_pset)->cpu_set_low, ($kgm_pset)->cpu_set_hi
+		printf "  Active Processors:\n"
+		set $kgm_active_queue_head = &($kgm_pset)->active_queue
+		set $kgm_active_elt = $kgm_active_queue_head->next
+		while $kgm_active_elt != $kgm_active_queue_head
+			set $kgm_processor = (processor_t)$kgm_active_elt
+			printf "    "
+			showprocessorint $kgm_processor
+
+			if $kgm_show_priority_runq
+				set $kgm_runq = &$kgm_processor->runq
+				_showrunqint $kgm_runq
+			end
+			if $kgm_show_grrr
+				set $kgm_grrr_runq = &$kgm_processor->grrr_runq
+				_showgrrrint $kgm_grrr_runq
+			end
+			
+			if $kgm_processor->processor_meta != 0 && $kgm_processor->processor_meta->primary == $kgm_processor
+				set $kgm_processor_meta_idle_head = &$kgm_processor->processor_meta->idle_queue
+				set $kgm_processor_meta_idle = $kgm_processor_meta_idle_head->next
+				while $kgm_processor_meta_idle != $kgm_processor_meta_idle_head
+					printf "      Idle Meta Processor: "
+					showprocessorint $kgm_processor_meta_idle
+					set $kgm_processor_meta_idle = $kgm_processor_meta_idle->next
+				end
+			end
+			
+			set $kgm_active_elt = $kgm_active_elt->next
+		end
+		printf "  Idle Processors:\n"
+		set $kgm_idle_queue_head = &($kgm_pset)->idle_queue
+		set $kgm_idle_elt = $kgm_idle_queue_head->next
+		while $kgm_idle_elt != $kgm_idle_queue_head
+			set $kgm_processor = (processor_t)$kgm_idle_elt
+			printf "    "
+			showprocessorint $kgm_processor
+			
+			if $kgm_processor->processor_meta != 0 && $kgm_processor->processor_meta->primary == $kgm_processor
+				set $kgm_processor_meta_idle_head = &$kgm_processor->processor_meta->idle_queue
+				set $kgm_processor_meta_idle = $kgm_processor_meta_idle_head->next
+				while $kgm_processor_meta_idle != $kgm_processor_meta_idle_head
+					printf "      Idle Meta Processor: "
+					showprocessorint $kgm_processor_meta_idle
+					set $kgm_processor_meta_idle = $kgm_processor_meta_idle->next
+				end
+			end
+
+			set $kgm_idle_elt = $kgm_idle_elt->next
+		end
+
+		if $kgm_show_priority_pset_runq
+			set $kgm_runq = &$kgm_pset->pset_runq
+			printf "\n"
+			_showrunqint $kgm_runq
+		end
+		set $kgm_pset = ($kgm_pset)->pset_list
+	end
+	
+	printf "\n"
+	printf "Realtime Queue Count %d\n", rt_runq.count
+	set $kgm_rt_runq_head = &rt_runq.queue
+	set $kgm_rt_runq = $kgm_rt_runq_head->next
+	while $kgm_rt_runq != $kgm_rt_runq_head
+		showtask ((thread_t)$kgm_rt_runq)->task
+		showact $kgm_rt_runq
+		set $kgm_rt_runq = $kgm_rt_runq->next
+	end
+	
+	printf "\n"
+	if $kgm_show_fairshare_list
+		printf "Fair Share Queue Count %d\n", fs_runq.count
+		set $kgm_fs_runq_head = &fs_runq.queue
+		set $kgm_fs_runq = $kgm_fs_runq_head->next
+		while $kgm_fs_runq != $kgm_fs_runq_head
+			showtask ((thread_t)$kgm_fs_runq)->task
+			showact $kgm_fs_runq
+			set $kgm_fs_runq = $kgm_fs_runq->next
+		end
+	end
+	if $kgm_show_fairshare_grrr
+		printf "Fair Share Queue Count %d\n", fs_grrr_runq.count
+		set $kgm_fs_grrr = &fs_grrr_runq
+		_showgrrrint $kgm_fs_grrr
+	end
+end
+document showallprocessors
+Syntax: (gdb) showallprocessors
+| Routine to print out info about all psets and processors
+end
+
 set $decode_wait_events = 0
 define showallstacks
     set $kgm_head_taskp = &tasks
     set $kgm_taskp = (struct task *)($kgm_head_taskp->next)
     while $kgm_taskp != $kgm_head_taskp
-        showtaskheader
+	showtaskheader
 	showtaskint $kgm_taskp
 	set $kgm_head_actp = &($kgm_taskp->threads)
-        set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
+	set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
 	while $kgm_actp != $kgm_head_actp
 	    showactheader
 	    if ($decode_wait_events > 0)
@@ -719,11 +1115,14 @@ define showallstacks
 	    else
 	       showactint $kgm_actp 2
 	    end
-  	    set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
-        end
+	    set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
+	end
 	printf "\n"
-    	set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next)
+	set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next)
     end
+
+    printf "\nZombie Processes:\n" 
+    zombstacks
 end
 
 document showallstacks
@@ -735,9 +1134,9 @@ Syntax: (gdb) showallstacks
 end
 
 define showcurrentstacks
-set $kgm_prp = processor_list
+    set $kgm_prp = processor_list
     while $kgm_prp != 0
-    	printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_id
+    	showprocessorint $kgm_prp
 	if ($kgm_prp)->active_thread != 0
 	    set $kgm_actp = ($kgm_prp)->active_thread
 	    showtaskheader
@@ -784,7 +1183,7 @@ define showwaitqwaitercount
     set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_linksp->next
     set $kgm_wc_count = 0
     while ( (queue_entry_t)$kgm_wc_wqe != (queue_entry_t)$kgm_wc_linksp)
-	if ($kgm_wc_wqe->wqe_type != &_wait_queue_link) && ($kgm_wc_wqe->wqe_type != &_wait_queue_link_noalloc)
+	if ($kgm_wc_wqe->wqe_type != &_wait_queue_link)
         	set $kgm_wc_count = $kgm_wc_count + 1
 	end
         set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_wqe->wqe_links.next
@@ -793,7 +1192,7 @@ define showwaitqwaitercount
 end
 
 define showwaitqmembercount
-    set $kgm_mc_waitqsetp = (struct wait_queue_set *)$arg0
+    set $kgm_mc_waitqsetp = (WaitQueueSet*)$arg0
     set $kgm_mc_setlinksp = &($kgm_mc_waitqsetp->wqs_setlinks)
     set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_setlinksp->next
     set $kgm_mc_count = 0
@@ -855,7 +1254,7 @@ define showwaitqmemberof
 end
 
 define showwaitqmembers
-    set $kgm_ms_waitqsetp = (struct wait_queue_set *)$arg0
+    set $kgm_ms_waitqsetp = (WaitQueueSet*)$arg0
     set $kgm_ms_setlinksp = &($kgm_ms_waitqsetp->wqs_setlinks)
     set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_setlinksp->next
     set $kgm_ms_found = 0
@@ -871,15 +1270,15 @@ define showwaitqmembers
 end
 
 define showwaitqheader
-    printf "wait_queue  prepostq    interlock   "
+    printf "wait_queue  ref_count   interlock   "
     printf "pol  type   member_cnt  waiter_cnt\n"
 end
 
 define showwaitqint
-    set $kgm_waitqp = (WaitQueue *)$arg0
+    set $kgm_waitqp = (WaitQueue*)$arg0
     printf "0x%08x  ", $kgm_waitqp
     if ($kgm_waitqp->wq_type == 0xf1d1)
-	printf "0x%08x  ", &((struct wait_queue_set *)$kgm_waitqp)->wqs_preposts
+	printf "0x%08x  ", ((WaitQueueSet*)$kgm_waitqp)->wqs_refcount
     else
 	printf "0x00000000  "
     end
@@ -1138,20 +1537,29 @@ define showipcheader
     showptrhdrpad
     printf "  table_next"
     showptrhdrpad
-    printf " flags tsize  splaytree   splaybase\n"
+    printf " flags ports  splaysize   "
+    showptrhdrpad
+    printf "splaybase\n"
 end
 
 define showipceheader
-    printf "            name        object    "
+    printf "            "
     showptrhdrpad
-    printf "  rite urefs  destname    destination\n"
+    printf "object      "
+    showptrhdrpad
+    showptrhdrpad
+    printf "name        rite urefs  destname    "
+    showptrhdrpad
+    printf "destination\n"
 end
 
 define showipceint
     set $kgm_ie = *(ipc_entry_t)$arg0
-    printf "            0x%08x  ", $arg1
+    printf "            "
+    showptrhdrpad
     showptr $kgm_ie.ie_object
-    printf "  "
+    showptrhdrpad
+    printf "  0x%08x  ", $arg1
     if $kgm_ie.ie_bits & 0x00100000
         printf "Dead "
         printf "%5d\n", $kgm_ie.ie_bits & 0xffff
@@ -1175,10 +1583,29 @@ define showipceint
                 printf "  O"
             end
             if $kgm_ie.index.request
-                printf "n"
+                set $kgm_port = (ipc_port_t)$kgm_ie.ie_object
+		set $kgm_requests = $kgm_port->ip_requests
+		set $kgm_req_soright = $kgm_requests[$kgm_ie.index.request].notify.port
+		if $kgm_req_soright
+#                   Armed send-possible notification?
+		    if (uintptr_t)$kgm_req_soright & 0x1
+		        printf "s"
+                    else
+#                       Delayed send-possible notification?
+		        if (uintptr_t)$kgm_req_soright & 0x2
+			    printf "d"
+			else
+#                           Dead-name notification
+			    printf "n"
+			end
+		    end     
+		else
+		    printf " "
+         	end
             else
                 printf " "
             end
+#           Collision (with tree)?
             if $kgm_ie.ie_bits & 0x00800000
                 printf "c"
     	    else
@@ -1214,8 +1641,9 @@ define showipcint
     else
         printf "   "
     end
-    printf "%5d  ", $kgm_is.is_table_size
-    printf "0x%08x  ", $kgm_is.is_tree_total
+    printf "%5d  ", $kgm_is.is_table_size + $kgm_is.is_tree_total
+    showptr $kgm_is.is_tree_total
+    printf "  "
     showptr &$kgm_isp->is_tree
     printf "\n"
     if $arg1 != 0
@@ -1228,12 +1656,12 @@ define showipcint
             if $kgm_ie.ie_bits & 0x001f0000
                 set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24))
                 showipceint $kgm_iep $kgm_name
-                if $arg2 != 0 && ipc_portbt != 0
-		    if $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0
-                        printf "              user bt: "
-                        showportbt $kgm_ie.ie_object $kgm_is.is_task
-                    end
-                end
+                if $arg2 != 0
+		   if $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0
+                   	printf "              user bt: "
+                   	showportbt $kgm_ie.ie_object $kgm_is.is_task
+                   end
+		end
             end
             set $kgm_iindex = $kgm_iindex + 1
             set $kgm_iep = &($kgm_is.is_table[$kgm_iindex])
@@ -1270,8 +1698,8 @@ end
 define showtaskipc
 	set $kgm_taskp = (task_t)$arg0
 	showtaskheader
-    showipcheader
 	showtaskint $kgm_taskp
+        showipcheader
 	showipcint $kgm_taskp->itk_space 0 0
 end
 document showtaskipc
@@ -1283,8 +1711,8 @@ end
 define showtaskrights
 	set $kgm_taskp = (task_t)$arg0
 	showtaskheader
-    showipcheader
 	showtaskint $kgm_taskp
+	showipcheader
 	showipcint $kgm_taskp->itk_space 1 0
 end
 document showtaskrights
@@ -1295,8 +1723,8 @@ end
 define showtaskrightsbt
 	set $kgm_taskp = (task_t)$arg0
 	showtaskheader
-    showipcheader
 	showtaskint $kgm_taskp
+  	showipcheader
 	showipcint $kgm_taskp->itk_space 1 1
 end
 document showtaskrightsbt
@@ -1309,8 +1737,8 @@ define showallipc
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
     while $kgm_cur_taskp != $kgm_head_taskp
         showtaskheader
-        showipcheader
         showtaskint $kgm_cur_taskp
+        showipcheader
         showipcint $kgm_cur_taskp->itk_space 0 0
     	set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
@@ -1320,14 +1748,49 @@ Syntax: (gdb) showallipc
 | Routine to print a summary listing of all the ipc spaces
 end
 
+define showipcsumheader
+    printf "task         "
+    showptrhdrpad
+    printf " pid         "
+    printf " #acts         "
+    printf " tsize "
+    printf "command\n"
+end
+
+define showipcsummaryint
+    set $kgm_taskp = (struct task *)$arg0
+    showptr $arg0
+    printf "%7d", ((struct proc *)$kgm_taskp->bsd_info)->p_pid
+    printf "%15d", $kgm_taskp->thread_count
+    printf "%15d", $kgm_cur_taskp->itk_space.is_table_size	
+	printf " %s\n", ((struct proc *)$kgm_taskp->bsd_info)->p_comm
+end
+
+define showipcsummary
+    showipcsumheader
+    set $kgm_head_taskp = &tasks
+    set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
+    while $kgm_cur_taskp != $kgm_head_taskp
+        showipcsummaryint $kgm_cur_taskp
+    	set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
+    end
+end
+
+document showipcsummary
+Syntax: (gdb) showipcsummary 
+| Summarizes the IPC state of all tasks. This is a convenient way to dump
+| some basic clues about IPC messaging. You can use the output to determine
+| tasks that are candidates for further investigation.
+end
+
 
 define showallrights
     set $kgm_head_taskp = &tasks
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
     while $kgm_cur_taskp != $kgm_head_taskp
         showtaskheader
-        showipcheader
         showtaskint $kgm_cur_taskp
+        showipcheader
         showipcint $kgm_cur_taskp->itk_space 1 0
     	set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
@@ -1353,8 +1816,8 @@ end
 define showtaskvme
 	set $kgm_taskp = (task_t)$arg0
 	showtaskheader
-	showmapheader
 	showtaskint $kgm_taskp
+	showmapheader
 	showvmint $kgm_taskp->map 1
 end
 document showtaskvme
@@ -1431,6 +1894,28 @@ Syntax: (gdb) showtaskstacks <task>
 | Routine to print out the stack for each thread in a task.
 end
 
+define showqueue_elems
+	set $queue_head = (struct queue_entry *)($arg0)
+	set $queue = (struct queue_entry *)($queue_head->next)
+    while $queue != $queue_head
+		showptr $queue
+		printf " "
+		set $thread = (struct thread *)$queue
+		set $task = (struct task *)$thread->task
+		set $bsd = (struct proc *)$task->bsd_info
+		set $guy = (char *)$bsd->p_comm
+		showptr $thread
+		printf " "
+		showptr $task
+		printf " "
+		showptr $bsd
+		printf " "
+		showptr $guy
+		#printf "  %s\n", $kgm_procp->p_comm
+		printf "\n"
+    	set $queue = (struct queue_entry *)($queue->next)
+    end
+end
 
 define showalltasks
     showtaskheader
@@ -1453,9 +1938,9 @@ Syntax: (gdb) showalltasks
 end
 
 define showprocheader
-    printf "  pid  process     io_policy    wq_state"
+    printf "  pid  process     "
     showptrhdrpad
-    printf "   command\n"
+    printf "io_policy    wq_state   command\n"
 end
 
 define showprocint
@@ -1469,17 +1954,36 @@ define showprocint
 	else
 	   printf "       "
 	end
-	if ($kgm_procp->p_iopol_disk == 1)
-	   printf "NORM  "
-	   set $kgm_printed = 1
+	set $ptask = (struct task *)$kgm_procp->task
+	set $diskpolicy = 0
+	if ($ptask->ext_actionstate.hw_disk != 0)
+		set $diskpolicy = $ptask->ext_actionstate.hw_disk
+	else 
+		if ($ptask->actionstate.hw_disk != 0)
+			set $diskpolicy = $ptask->actionstate.hw_disk
+		end
+	end
+	if ($ptask->ext_actionstate.hw_bg != 0)
+		set $diskpolicy = 5
 	end
-	if ($kgm_procp->p_iopol_disk == 2)
-	   printf "PASS  "
-	   set $kgm_printed = 1
-        end
-	if ($kgm_procp->p_iopol_disk == 3)
-	   printf "THROT "
-	   set $kgm_printed = 1
+	if ($ptask->actionstate.hw_bg != 0)
+		set $diskpolicy = 4
+	end
+	if ($diskpolicy == 2)
+		printf "PASS    "
+		set $kgm_printed = 1
+	end
+	if ($diskpolicy == 3)
+		printf "THROT   "
+		set $kgm_printed = 1
+	end
+	if ($diskpolicy == 4)
+		printf "BG_THRT "
+		set $kgm_printed = 1
+	end
+	if ($diskpolicy == 5)
+		printf "EBG_THRT"
+		set $kgm_printed = 1
 	end
 	if ($kgm_printed == 0)
 	   printf "      "
@@ -1537,29 +2041,95 @@ document kdb
 end
 
 define showpsetheader
-    printf "portset     waitqueue   recvname    "
-    printf "flags refs  recvname    process\n"
+    printf "portset     "
+    showptrhdrpad
+    printf "waitqueue   "
+    showptrhdrpad
+    showptrhdrpad
+    printf "recvname    flags refs  recvname    "
+    showptrhdrpad
+    printf "process\n"
 end
 
 define showportheader
-    printf "port        mqueue      recvname    "
-    printf "flags refs  recvname    process\n"
+    printf "port        "
+    showptrhdrpad
+    printf "mqueue      "
+    showptrhdrpad
+    showptrhdrpad
+    printf "recvname    flags refs  recvname    "
+    showptrhdrpad
+    printf "dest\n"
 end
 
 define showportmemberheader
-    printf "members     port        recvname    "
-    printf "flags refs  mqueue      msgcount\n"
+    printf "members     "
+    showptrhdrpad
+    printf "port        "
+    showptrhdrpad
+    showptrhdrpad
+    printf "recvname    "
+    printf "flags refs  mqueue      "
+    showptrhdrpad
+    printf "msgcount\n"
 end
 
 define showkmsgheader
-    printf "messages    kmsg        size        "
-    printf "disp msgid  remote-port local-port\n"
+    printf "dest-port   "
+    showptrhdrpad
+    printf "kmsg        "
+    showptrhdrpad
+    showptrhdrpad
+    printf "msgid       "
+    printf "disp  size  "
+    printf "reply-port  "
+    showptrhdrpad
+    printf "source\n"
+end
+
+define showkmsgsrcint
+    set $kgm_kmsgsrchp = ((ipc_kmsg_t)$arg0)->ikm_header
+#    set $kgm_kmsgsrctp = (mach_msg_audit_trailer_t *)((uintptr_t)$kgm_kmsgsrchp + $kgm_kmsgsrchp->msgh_size)
+#    set $kgm_kmsgpid = $kgm_kmsgsrctp->msgh_audit.val[5]
+    set $kgm_kmsgpid = (pid_t)((uint *)((uintptr_t)$kgm_kmsgsrchp + $kgm_kmsgsrchp->msgh_size))[10]
+# compare against a well-known or cached value as this may be slow
+    if ($kgm_kmsgpid == 0)
+       set $kgm_kmsgsrcpid = (pid_t)0
+       set $kgm_kmsgsrcprocp = (struct proc *)kernel_task->bsd_info
+    else 
+       if ($kgm_kmsgpid != $kgm_kmsgsrcpid)
+	  set $kgm_kmsgsrchead_taskp = &tasks
+          set $kgm_kmsgsrctaskp = (struct task *)($kgm_kmsgsrchead_taskp->next)
+          while $kgm_kmsgsrctaskp != $kgm_kmsgsrchead_taskp
+	      set $kgm_kmsgsrcprocp = (struct proc *)$kgm_kmsgsrctaskp->bsd_info
+	      set $kgm_kmsgsrcpid = $kgm_kmsgsrcprocp->p_pid
+	      if (($kgm_kmsgsrcprocp != 0) && ($kgm_kmsgsrcprocp->p_pid == $kgm_kmsgpid))
+	          set $kgm_kmsgsrctaskp = $kgm_kmsgsrchead_taskp
+	      else
+    	          set $kgm_kmsgsrctaskp = (struct task *)($kgm_kmsgsrctaskp->tasks.next)
+	      end
+          end
+       end
+    end
+    if ($kgm_kmsgsrcprocp->p_pid == $kgm_kmsgpid)
+    	printf "%s(%d)\n", $kgm_kmsgsrcprocp->p_comm, $kgm_kmsgpid
+    else
+        printf "unknown(%d)\n", $kgm_kmsgpid
+    end
 end
 
 define showkmsgint
-    printf "            0x%08x  ", $arg0
-    set $kgm_kmsgh = ((ipc_kmsg_t)$arg0)->ikm_header
-    printf "0x%08x  ", $kgm_kmsgh.msgh_size
+    set $kgm_kmsghp = ((ipc_kmsg_t)$arg0)->ikm_header
+    set $kgm_kmsgh = *$kgm_kmsghp
+    if ($arg1 != 0)
+        printf "            "
+        showptrhdrpad
+    else
+        showptr $kgm_kmsgh.msgh_remote_port
+    end
+    showptr $arg0
+    showptrhdrpad
+    printf "  0x%08x  ", $kgm_kmsgh.msgh_id
     if (($kgm_kmsgh.msgh_bits & 0xff) == 19)
 	printf "rC"
     else
@@ -1575,12 +2145,16 @@ define showkmsgint
     else
 	printf "s"
     end
-    printf "%5d  ", $kgm_kmsgh.msgh_id
-    printf "0x%08x  ", $kgm_kmsgh.msgh_remote_port
-    printf "0x%08x\n", $kgm_kmsgh.msgh_local_port
+    printf "%5d  ", $kgm_kmsgh.msgh_size
+    showptr $kgm_kmsgh.msgh_local_port
+    printf "  "
+    set $kgm_kmsgsrcpid = (pid_t)0
+    showkmsgsrcint $arg0
 end
 
-
+define showkmsg
+    showkmsgint $arg0 0
+end
 
 define showkobject
     set $kgm_portp = (struct ipc_port *)$arg0
@@ -1715,7 +2289,7 @@ end
 define showportdest
     set $kgm_portp = (struct ipc_port *)$arg0
     set $kgm_spacep = $kgm_portp->data.receiver
-    if ($kgm_spacep == ipc_space_kernel)
+    if ((uintptr_t)$kgm_spacep == (uintptr_t)ipc_space_kernel)
 	showkobject $kgm_portp
     else
 	if ($kgm_portp->ip_object.io_bits & 0x80000000)
@@ -1730,9 +2304,12 @@ define showportdest
 end
 
 define showportmember
-    printf "            0x%08x  ", $arg0
+    printf "            "
+    showptrhdrpad
+    showptr $arg0
+    showptrhdrpad
     set $kgm_portp = (struct ipc_port *)$arg0
-    printf "0x%08x  ", $kgm_portp->ip_messages.data.port.receiver_name
+    printf "  0x%08x  ", $kgm_portp->ip_messages.data.port.receiver_name
     if ($kgm_portp->ip_object.io_bits & 0x80000000)
 	printf "A"
     else
@@ -1740,8 +2317,8 @@ define showportmember
     end
     printf "Port"
     printf "%5d  ", $kgm_portp->ip_object.io_references
-    printf "0x%08x  ", &($kgm_portp->ip_messages)
-    printf "0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount
+    showptr &($kgm_portp->ip_messages)
+    printf "  0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount
 end
 
 define showportbt
@@ -1764,10 +2341,12 @@ define showportbt
 end
 
 define showportint
-    printf "0x%08x  ", $arg0
+    showptr $arg0
+    printf "  "
     set $kgm_portp = (struct ipc_port *)$arg0
-    printf "0x%08x  ", &($kgm_portp->ip_messages)
-    printf "0x%08x  ", $kgm_portp->ip_messages.data.port.receiver_name
+    showptr &($kgm_portp->ip_messages)
+    showptrhdrpad
+    printf "  0x%08x  ", $kgm_portp->ip_messages.data.port.receiver_name
     if ($kgm_portp->ip_object.io_bits & 0x80000000)
 	printf "A"
     else
@@ -1780,21 +2359,23 @@ define showportint
     set $kgm_kmsgp = (ipc_kmsg_t)$kgm_portp->ip_messages.data.port.messages.ikmq_base
     if $arg1 && $kgm_kmsgp
 	showkmsgheader
-	showkmsgint $kgm_kmsgp
+	showkmsgint $kgm_kmsgp 1
 	set $kgm_kmsgheadp = $kgm_kmsgp
 	set $kgm_kmsgp = $kgm_kmsgp->ikm_next
 	while $kgm_kmsgp != $kgm_kmsgheadp
-	    showkmsgint $kgm_kmsgp
+	    showkmsgint $kgm_kmsgp 1
 	    set $kgm_kmsgp = $kgm_kmsgp->ikm_next
         end
     end
 end
 
 define showpsetint
-    printf "0x%08x  ", $arg0
+    showptr $arg0
+    printf "  "
     set $kgm_psetp = (struct ipc_pset *)$arg0
-    printf "0x%08x  ", &($kgm_psetp->ips_messages)
-    printf "0x%08x  ", $kgm_psetp->ips_messages.data.pset.local_name
+    showptr &($kgm_psetp->ips_messages)
+    showptrhdrpad
+    printf "  0x%08x  ", $kgm_psetp->ips_messages.data.pset.local_name
     if ($kgm_psetp->ips_object.io_bits & 0x80000000)
 	printf "A"
     else
@@ -1802,12 +2383,13 @@ define showpsetint
     end
     printf "Set "
     printf "%5d  ", $kgm_psetp->ips_object.io_references
-    printf "0x%08x  ", $kgm_psetp->ips_messages.data.pset.local_name
+    showptr $kgm_psetp->ips_messages.data.pset.local_name
+    printf "  "
     set $kgm_setlinksp = &($kgm_psetp->ips_messages.data.pset.set_queue.wqs_setlinks)
     set $kgm_wql = (WaitQueueLink *)$kgm_setlinksp->next
     set $kgm_found = 0
     while ( (queue_entry_t)$kgm_wql != (queue_entry_t)$kgm_setlinksp)
-        set $kgm_portp = (struct ipc_port *)((uintptr_t)$kgm_wql->wql_element.wqe_queue - $kgm_portoff)
+        set $kgm_portp = (struct ipc_port *)((uintptr_t)($kgm_wql->wql_element->wqe_queue) - (uintptr_t)$kgm_portoff)
 	if !$kgm_found  
 	    set $kgm_destspacep = (struct ipc_space *)0
 	    showportdestproc $kgm_portp
@@ -1823,6 +2405,7 @@ define showpsetint
 end
 
 define showpset
+    set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages)
     showpsetheader
     showpsetint $arg0 1
 end
@@ -1833,8 +2416,9 @@ define showport
 end
 
 define showipcobject
-    set $kgm_object = (ipc_object_t)$arg0
+    set $kgm_objectp = (ipc_object_t)$arg0
     if ($kgm_objectp->io_bits & 0x7fff0000)
+        set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages)
 	showpset $kgm_objectp
     else
 	showport $kgm_objectp
@@ -1843,17 +2427,17 @@ end
 
 define showmqueue
     set $kgm_mqueue = *(struct ipc_mqueue *)$arg0
-    set $kgm_psetoff = (uintptr_t)&(((struct ipc_pset *)0)->ips_messages)
-    set $kgm_portoff = (uintptr_t)&(((struct ipc_port *)0)->ip_messages)
     if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d1)
-	set $kgm_psetp = (struct ipc_pset *)(((uintptr_t)$arg0) - $kgm_psetoff)
+	set $kgm_psetoff = &(((struct ipc_pset *)0)->ips_messages)
+	set $kgm_pset = (((long)$arg0) - ((long)$kgm_psetoff))
         showpsetheader
-	showpsetint $kgm_psetp 1
+	showpsetint $kgm_pset 1
     end
     if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d0)
-	set $kgm_portp = (struct ipc_port *)(((uintptr_t)$arg0) - $kgm_portoff)
+        set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages)
+	set $kgm_port = (((long)$arg0) - ((long)$kgm_portoff))
 	showportheader
-	showportint $kgm_portp 1
+	showportint $kgm_port 1
     end
 end
 
@@ -1866,6 +2450,8 @@ define zprint_one
     printf "%8x ",$kgm_zone->max_size
     printf "%6d ",$kgm_zone->elem_size
     printf "%8x ",$kgm_zone->alloc_size
+	printf " %8d ",$kgm_zone->num_allocs
+	printf "%8d ",$kgm_zone->num_frees
     printf "%s ",$kgm_zone->zone_name
 
     if ($kgm_zone->exhaustible)
@@ -1878,7 +2464,7 @@ define zprint_one
         printf "X"
     end
     if ($kgm_zone->noencrypt)
-       printf "$"
+        printf "$"
     end
     printf "\n"
 end
@@ -1887,7 +2473,7 @@ end
 define zprint
     printf "ZONE      "
     showptrhdrpad
-    printf "   COUNT   TOT_SZ   MAX_SZ ELT_SZ ALLOC_SZ NAME\n"
+    printf "   COUNT   TOT_SZ   MAX_SZ ELT_SZ ALLOC_SZ TOT_ALLOC TOT_FREE NAME\n"
     set $kgm_zone_ptr = (struct zone *)first_zone
     while ($kgm_zone_ptr != 0)
         zprint_one $kgm_zone_ptr
@@ -1965,6 +2551,7 @@ Syntax: (gdb) showallrwlck
 end
 
 set $kdp_act_counter = 0
+set $kdp_arm_act_counter = 0
 
 set $r0_save	= 0
 set $r1_save	= 0
@@ -1998,115 +2585,118 @@ define switchtoact
 		output/a (unsigned) $newact.continuation
 		echo \n
 	else
+		if ($kgm_mtype == $kgm_mtype_ppc)
+			if ($kdp_act_counter == 0)
+				set $kdpstate = (struct savearea *) kdp.saved_state
+			end
+			set $kdp_act_counter = $kdp_act_counter + 1
+			set (struct savearea *) kdp.saved_state=$newact->machine->pcb
+			flushregs
+			flushstack
+			set $pc=$newact->machine->pcb.save_srr0
+			update
+		end
+		if ($kgm_mtype == $kgm_mtype_i386)
+			set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state
+			if ($kdp_act_counter == 0)
+			   set $kdpstate = *($kdpstatep)
+			end	
+			set $kdp_act_counter = $kdp_act_counter + 1
+	
+			set $kgm_statep = (struct x86_kernel_state *) \
+						($newact->kernel_stack + kernel_stack_size \
+						 - sizeof(struct x86_kernel_state))
+			set $kdpstatep->ebx = $kgm_statep->k_ebx 
+			set $kdpstatep->ebp = $kgm_statep->k_ebp 
+			set $kdpstatep->edi = $kgm_statep->k_edi 
+			set $kdpstatep->esi = $kgm_statep->k_esi 
+			set $kdpstatep->eip = $kgm_statep->k_eip 
+			flushregs
+			flushstack
+			set $pc = $kgm_statep->k_eip
+			update
+		end
+		if ($kgm_mtype == $kgm_mtype_x86_64)
+			set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state
+			if ($kdp_act_counter == 0)
+			   set $kdpstate = *($kdpstatep)
+			end	
+			set $kdp_act_counter = $kdp_act_counter + 1
+	
+			set $kgm_statep = (struct x86_kernel_state *) \
+						($newact->kernel_stack + kernel_stack_size \
+						 - sizeof(struct x86_kernel_state))
+			set $kdpstatep->rbx = $kgm_statep->k_rbx 
+			set $kdpstatep->rbp = $kgm_statep->k_rbp 
+			set $kdpstatep->r12 = $kgm_statep->k_r12 
+			set $kdpstatep->r13 = $kgm_statep->k_r13 
+			set $kdpstatep->r14 = $kgm_statep->k_r14 
+			set $kdpstatep->r15 = $kgm_statep->k_r15 
+			set $kdpstatep->isf.rsp = $kgm_statep->k_rsp 
+			flushregs
+			flushstack
+			set $pc = $kgm_statep->k_rip
+			update
+		end
+		if ($kgm_mtype == $kgm_mtype_arm)
+			set $kdp_arm_act_counter = $kdp_arm_act_counter + 1
+			if ($kdp_arm_act_counter == 1)
+				set $r0_save   = $r0
+				set $r1_save   = $r1
+				set $r2_save   = $r2
+				set $r3_save   = $r3
+				set $r4_save   = $r4
+				set $r5_save   = $r5
+				set $r6_save   = $r6
+				set $r7_save   = $r7
+				set $r8_save   = $r8
+				set $r9_save   = $r9
+				set $r10_save  = $r10
+				set $r11_save  = $r11
+				set $r12_save  = $r12
+				set $sp_save   = $sp
+				set $lr_save   = $lr
+				set $pc_save   = $pc
+			end
+			set $pc_ctx = load_reg+8
+			set $kgm_statep = (struct arm_saved_state *)((struct thread*)$arg0)->machine.kstackptr
+			set $r0 =  $kgm_statep->r[0]
+			set $r1 =  $kgm_statep->r[1]
+			set $r2 =  $kgm_statep->r[2]
+			set $r3 =  $kgm_statep->r[3]
+			set $r4 =  $kgm_statep->r[4]
+			set $r5 =  $kgm_statep->r[5]
+			set $r6 =  $kgm_statep->r[6]
+			set $r8 =  $kgm_statep->r[8]
+			set $r9 =  $kgm_statep->r[9]
+			set $r10 = $kgm_statep->r[10]
+			set $r11 = $kgm_statep->r[11]
+			set $r12 = $kgm_statep->r[12]
+			set $sp = $kgm_statep->sp
+			set $lr = $kgm_statep->lr
+			set $pc = $pc_ctx
+			set $r7 =  $kgm_statep->r[7]
+			flushregs
+			flushstack
+		end
+	end
+	showcontext_int
+end
+
+document switchtoact  
+Syntax: switchtoact <address of activation>
+| This command allows gdb to examine the execution context and call
+| stack for the specified activation. For example, to view the backtrace
+| for an activation issue "switchtoact <address>", followed by "bt".
+| Before resuming execution, issue a "resetctx" command, to
+| return to the original execution context.
+end     
+
+define switchtoctx
+	select 0
 	if ($kgm_mtype == $kgm_mtype_ppc)
 		if ($kdp_act_counter == 0)
-			set $kdpstate = (struct savearea *) kdp.saved_state
-		end
-		set $kdp_act_counter = $kdp_act_counter + 1
-		set (struct savearea *) kdp.saved_state=$newact->machine->pcb
-		flushregs
-		flushstack
-		set $pc=$newact->machine->pcb.save_srr0
-		update
-	end
-	if ($kgm_mtype == $kgm_mtype_i386)
-		set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state
-		if ($kdp_act_counter == 0)
-		   set $kdpstate = *($kdpstatep)
-		end	
-		set $kdp_act_counter = $kdp_act_counter + 1
-
-		set $kgm_statep = (struct x86_kernel_state *) \
-					($newact->kernel_stack + kernel_stack_size \
-					 - sizeof(struct x86_kernel_state))
-	   	set $kdpstatep->ebx = $kgm_statep->k_ebx 
-		set $kdpstatep->ebp = $kgm_statep->k_ebp 
-		set $kdpstatep->edi = $kgm_statep->k_edi 
-		set $kdpstatep->esi = $kgm_statep->k_esi 
-	   	set $kdpstatep->eip = $kgm_statep->k_eip 
-		flushregs
-		flushstack
-		set $pc = $kgm_statep->k_eip
-		update
-	end
-	if ($kgm_mtype == $kgm_mtype_x86_64)
-		set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state
-		if ($kdp_act_counter == 0)
-		   set $kdpstate = *($kdpstatep)
-		end	
-		set $kdp_act_counter = $kdp_act_counter + 1
-
-		set $kgm_statep = (struct x86_kernel_state *) \
-					($newact->kernel_stack + kernel_stack_size \
-					 - sizeof(struct x86_kernel_state))
-	   	set $kdpstatep->rbx = $kgm_statep->k_rbx 
-	   	set $kdpstatep->rbp = $kgm_statep->k_rbp 
-	   	set $kdpstatep->r12 = $kgm_statep->k_r12 
-	   	set $kdpstatep->r13 = $kgm_statep->k_r13 
-	   	set $kdpstatep->r14 = $kgm_statep->k_r14 
-	   	set $kdpstatep->r15 = $kgm_statep->k_r15 
-	   	set $kdpstatep->isf.rsp = $kgm_statep->k_rsp 
-		flushregs
-		flushstack
-		set $pc = $kgm_statep->k_rip
-		update
-	end
-	if ($kgm_mtype == $kgm_mtype_arm)
-		set $r0_save   = $r0
-		set $r1_save   = $r1
-		set $r2_save   = $r2
-		set $r3_save   = $r3
-		set $r4_save   = $r4
-		set $r5_save   = $r5
-		set $r6_save   = $r6
-		set $r7_save   = $r7
-		set $r8_save   = $r8
-		set $r9_save   = $r9
-		set $r10_save  = $r10
-		set $r11_save  = $r11
-		set $r12_save  = $r12
-		set $sp_save   = $sp
-		set $lr_save   = $lr
-		set $pc_save   = $pc
-		set $pc_ctx = load_reg+8
-		set $kgm_statep = (struct arm_saved_state *)((struct thread*)$arg0)->machine.kstackptr
-		set $r0 =  $kgm_statep->r[0]
-		set $r1 =  $kgm_statep->r[1]
-		set $r2 =  $kgm_statep->r[2]
-		set $r3 =  $kgm_statep->r[3]
-		set $r4 =  $kgm_statep->r[4]
-		set $r5 =  $kgm_statep->r[5]
-		set $r6 =  $kgm_statep->r[6]
-		set $r8 =  $kgm_statep->r[8]
-		set $r9 =  $kgm_statep->r[9]
-		set $r10 = $kgm_statep->r[10]
-		set $r11 = $kgm_statep->r[11]
-		set $r12 = $kgm_statep->r[12]
-		set $sp = $kgm_statep->sp
-		set $lr = $kgm_statep->lr
-		set $pc = $pc_ctx
-		set $r7 =  $kgm_statep->r[7]
-		flushregs
-		flushstack
-		end
-	end
-	showcontext_int
-end
-
-document switchtoact  
-Syntax: switchtoact <address of activation>
-| This command allows gdb to examine the execution context and call
-| stack for the specified activation. For example, to view the backtrace
-| for an activation issue "switchtoact <address>", followed by "bt".
-| Before resuming execution, issue a "resetctx" command, to
-| return to the original execution context.
-end     
-
-define switchtoctx
-	select 0
-	if ($kgm_mtype == $kgm_mtype_ppc)
-		if ($kdp_act_counter == 0)
-		   set $kdpstate = (struct savearea *) kdp.saved_state
+		   set $kdpstate = (struct savearea *) kdp.saved_state
 		end
 		set $kdp_act_counter = $kdp_act_counter + 1
 		set (struct savearea *) kdp.saved_state=(struct savearea *) $arg0
@@ -2116,24 +2706,26 @@ define switchtoctx
 		update
 	else
 	if ($kgm_mtype == $kgm_mtype_arm)
-		set arm disassembler std
-		select-frame 0
-		set $r0_save   = $r0
-		set $r1_save   = $r1
-		set $r2_save   = $r2
-		set $r3_save   = $r3
-		set $r4_save   = $r4
-		set $r5_save   = $r5
-		set $r6_save   = $r6
-		set $r7_save   = $r7
-		set $r8_save   = $r8
-		set $r9_save   = $r9
-		set $r10_save  = $r10
-		set $r11_save  = $r11
-		set $r12_save  = $r12
-		set $sp_save   = $sp
-		set $lr_save   = $lr
-		set $pc_save   = $pc
+		select 0
+		set $kdp_arm_act_counter = $kdp_arm_act_counter + 1
+		if ($kdp_arm_act_counter == 1)
+			set $r0_save   = $r0
+			set $r1_save   = $r1
+			set $r2_save   = $r2
+			set $r3_save   = $r3
+			set $r4_save   = $r4
+			set $r5_save   = $r5
+			set $r6_save   = $r6
+			set $r7_save   = $r7
+			set $r8_save   = $r8
+			set $r9_save   = $r9
+			set $r10_save  = $r10
+			set $r11_save  = $r11
+			set $r12_save  = $r12
+			set $sp_save   = $sp
+			set $lr_save   = $lr
+			set $pc_save   = $pc
+		end
 		set $kgm_statep = (struct arm_saved_state *)$arg0
 		set $r0 =  $kgm_statep->r[0]
 		set $r1 =  $kgm_statep->r[1]
@@ -2170,33 +2762,36 @@ end
 define resetctx
 	select 0
 	if ($kdp_act_counter != 0)
-	if ($kgm_mtype == $kgm_mtype_ppc)
-		set (struct savearea *)kdp.saved_state=$kdpstate
-		flushregs
-		flushstack
-		set $pc=((struct savearea *) kdp.saved_state)->save_srr0
-		update
-		set $kdp_act_counter = 0
-	end
-	if ($kgm_mtype == $kgm_mtype_i386)
-		set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state
-		set *($kdpstatep)=$kdpstate
-		flushregs
-		flushstack
-		set $pc=$kdpstatep->eip
-		update
-		set $kdp_act_counter = 0
-	end
-	if ($kgm_mtype == $kgm_mtype_x86_64)
-		set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state
-		set *($kdpstatep)=$kdpstate
-		flushregs
-		flushstack
-		set $pc=$kdpstatep->isf.rip
-		update
-		set $kdp_act_counter = 0
-	end
-	if ($kgm_mtype == $kgm_mtype_arm)
+		if ($kgm_mtype == $kgm_mtype_ppc)
+			set (struct savearea *)kdp.saved_state=$kdpstate
+			flushregs
+			flushstack
+			set $pc=((struct savearea *) kdp.saved_state)->save_srr0
+			update
+			set $kdp_act_counter = 0
+		end
+		if ($kgm_mtype == $kgm_mtype_i386)
+			set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state
+			set *($kdpstatep)=$kdpstate
+			flushregs
+			flushstack
+			set $pc=$kdpstatep->eip
+			update
+			set $kdp_act_counter = 0
+		end
+		if ($kgm_mtype == $kgm_mtype_x86_64)
+			set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state
+			set *($kdpstatep)=$kdpstate
+			flushregs
+			flushstack
+			set $pc=$kdpstatep->isf.rip
+			update
+			set $kdp_act_counter = 0
+		end
+		showcontext_int
+	end
+	if ($kgm_mtype == $kgm_mtype_arm && $kdp_arm_act_counter != 0)
+		echo Restoring context\n
 		set $r0  = $r0_save
 		flushregs
 		set $r1  = $r1_save
@@ -2229,8 +2824,9 @@ define resetctx
 		flushregs
 		set $r7  = $r7_save
 		flushregs
-	end
-	showcontext_int
+		flushstack
+		update
+		set $kdp_arm_act_counter = 0
 	end
 end     
         
@@ -2462,7 +3058,7 @@ define showx86backtrace
 	_loadfrom ($kgm_tmp_frame+$kgm_ret_off)
 	set $kgm_prev_pc = $kgm_loadval
 	set $kgm_frameno = 1
-	while $kgm_prev_frame != 0
+	while ($kgm_prev_frame != 0) && ($kgm_prev_frame != 0x0000000800000008)
 		printf "%d: Saved frame: 0x%016llx Saved PC: 0x%016llx\n", $kgm_frameno, $kgm_prev_frame, $kgm_prev_pc
 		if (!(($kgm_x86_abi == 0xf) && ($kgm_mtype == $kgm_mtype_i386)))
 		   x/i $kgm_prev_pc
@@ -2517,7 +3113,7 @@ define showuserstack
 		else
 		if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
 			set $newact = (struct thread *) $arg0
-			set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss)
+			set $newiss = (x86_saved_state_t *) ($newact->machine->iss)
 			set $kgm_x86_abi = $newiss.flavor
 			if ($newiss.flavor == 0xf) 
 	   			set $checkpc = $newiss.uss.ss_64.isf.rip
@@ -2534,19 +3130,47 @@ define showuserstack
 			else
 			set $kgm_cur_frame = $checkframe
 			set $kgm_cur_pc = $checkpc
-			printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread ("
-			showptr $arg0
-			printf "); you can also examine memory locations in this address space (pmap "
-			showptr $newact->task->map->pmap
-			printf ") before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n"
+# When have more than one argument is present, don't print usage
+			if ( $argc == 1 )
+				printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread ("
+				showptr $arg0
+				printf "); you can also examine memory locations in this address space (pmap "
+				showptr $newact->task->map->pmap
+				printf ") before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n"
+			end
 			set kdp_pmap = $newact->task->map->pmap
 			_kgm_flush_loop
 			_kgm_update_loop
 			end			
+		else
+		if ($kgm_mtype == $kgm_mtype_arm)
+			if (kdp->is_conn > 0)
+				set $kgm_threadp = (struct thread *)$arg0
+				set $kgm_saved_pmap = kdp_pmap
+				showactheader
+				showactint $kgm_threadp 0
+				set $kgm_thread_pmap = $kgm_threadp->task->map->pmap
+				set $kgm_thread_sp = $kgm_threadp.machine->PcbData.r[7]
+				set kdp_pmap = $kgm_thread_pmap
+				while ($kgm_thread_sp != 0)
+    				set $link_register = *($kgm_thread_sp + 4)
+    				showptrhdrpad
+					printf "                  "
+					showptr $kgm_thread_sp
+					printf "  "
+    				showptr $link_register
+    				printf "\n"
+    				set $kgm_thread_sp = *$kgm_thread_sp
+  				end
+				set kdp_pmap = $kgm_saved_pmap
+			else
+				echo You must be connected via nanokdp to use this macro\n
+			end
 		else
 			echo showuserstack not supported on this architecture\n
 		end
 		end
+		end
 end
 document showuserstack
 Syntax: showuserstack <address of thread activation>
@@ -2560,6 +3184,216 @@ Syntax: showuserstack <address of thread activation>
 |macro in some cases.
 end
 
+define showtaskuserstacks
+    set $kgm_taskp = (struct task *)$arg0
+    set $kgm_head_actp = &($kgm_taskp->threads)
+    set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
+    while $kgm_actp != $kgm_head_actp
+    	printf "For thread "
+	showptr $kgm_actp
+	printf "\n"
+	showuserstack $kgm_actp quiet
+	if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
+		showx86backtrace
+	end
+	set kdp_pmap=0
+    	set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
+    	printf "\n"
+    end
+    showuserlibraries $kgm_taskp
+end
+document showtaskuserstacks
+Syntax: (gdb) showtaskuserstacks <task>
+| Print out the user stack for each thread in a task, followed by the user libraries.
+end
+
+
+define showuserregisters
+	set $kgm_threadp = (struct thread *)$arg0
+	set $kgm_taskp = $kgm_threadp->task
+	if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
+		set $newiss = (x86_saved_state_t *) ($kgm_threadp->machine.iss)
+		set $kgm_x86_abi = $newiss.flavor
+		if ($newiss.flavor == 0xf)
+			printf "X86 Thread State (64-bit):\n"
+	   		set $kgm_ss64 = $newiss.uss.ss_64
+
+			printf "  rax: "
+			showuserptr $kgm_ss64.rax
+			printf "  rbx: "
+			showuserptr $kgm_ss64.rbx
+			printf "  rcx: "
+			showuserptr $kgm_ss64.rcx
+			printf "  rdx: "
+			showuserptr $kgm_ss64.rdx
+			printf "\n"
+
+			printf "  rdi: "
+			showuserptr $kgm_ss64.rdi
+			printf "  rsi: "
+			showuserptr $kgm_ss64.rsi
+			printf "  rbp: "
+			showuserptr $kgm_ss64.rbp
+			printf "  rsp: "
+			showuserptr $kgm_ss64.isf.rsp
+			printf "\n"
+
+			printf "   r8: "
+			showuserptr $kgm_ss64.r8
+			printf "   r9: "
+			showuserptr $kgm_ss64.r9
+			printf "  r10: "
+			showuserptr $kgm_ss64.r10
+			printf "  r11: "
+			showuserptr $kgm_ss64.r11
+			printf "\n"
+
+			printf "  r12: "
+			showuserptr $kgm_ss64.r12
+			printf "  r13: "
+			showuserptr $kgm_ss64.r13
+			printf "  r14: "
+			showuserptr $kgm_ss64.r14
+			printf "  r15: "
+			showuserptr $kgm_ss64.r15
+			printf "\n"
+
+			printf "  rip: "
+			showuserptr $kgm_ss64.isf.rip
+			printf "  rfl: "
+			showuserptr $kgm_ss64.isf.rflags
+			printf "  cr2: "
+			showuserptr $kgm_ss64.cr2
+			printf "\n"
+		else
+			printf "X86 Thread State (32-bit):\n"
+	   		set $kgm_ss32 = $newiss.uss.ss_32
+
+			printf "  eax: "
+			showuserptr $kgm_ss32.eax
+			printf "  ebx: "
+			showuserptr $kgm_ss32.ebx
+			printf "  ecx: "
+			showuserptr $kgm_ss32.ecx
+			printf "  edx: "
+			showuserptr $kgm_ss32.edx
+			printf "\n"
+
+			printf "  edi: "
+			showuserptr $kgm_ss32.edi
+			printf "  esi: "
+			showuserptr $kgm_ss32.esi
+			printf "  ebp: "
+			showuserptr $kgm_ss32.ebp
+			printf "  esp: "
+			showuserptr $kgm_ss32.uesp
+			printf "\n"
+
+			printf "   ss: "
+			showuserptr $kgm_ss32.ss
+			printf "  efl: "
+			showuserptr $kgm_ss32.efl
+			printf "  eip: "
+			showuserptr $kgm_ss32.eip
+			printf "   cs: "
+			showuserptr $kgm_ss32.cs
+			printf "\n"
+
+			printf "   ds: "
+			showuserptr $kgm_ss32.ds
+			printf "   es: "
+			showuserptr $kgm_ss32.es
+			printf "   fs: "
+			showuserptr $kgm_ss32.fs
+			printf "   gs: "
+			showuserptr $kgm_ss32.gs
+			printf "\n"
+
+			printf "  cr2: "
+			showuserptr $kgm_ss32.cr2
+			printf "\n"
+		end
+	else
+	if ($kgm_mtype == $kgm_mtype_arm)
+		printf "ARM Thread State:\n"
+		set $kgm_pcb = (arm_saved_state_t *) ($kgm_threadp->machine.upcb)
+
+		printf "    r0: "
+		showuserptr $kgm_pcb.r[0]
+		printf "    r1: "
+		showuserptr $kgm_pcb.r[1]
+		printf "    r2: "
+		showuserptr $kgm_pcb.r[2]
+		printf "    r3: "
+		showuserptr $kgm_pcb.r[3]
+		printf "\n"
+
+		printf "    r4: "
+		showuserptr $kgm_pcb.r[4]
+		printf "    r5: "
+		showuserptr $kgm_pcb.r[5]
+		printf "    r6: "
+		showuserptr $kgm_pcb.r[6]
+		printf "    r7: "
+		showuserptr $kgm_pcb.r[7]
+		printf "\n"
+
+		printf "    r8: "
+		showuserptr $kgm_pcb.r[8]
+		printf "    r9: "
+		showuserptr $kgm_pcb.r[9]
+		printf "   r10: "
+		showuserptr $kgm_pcb.r[10]
+		printf "   r11: "
+		showuserptr $kgm_pcb.r[11]
+		printf "\n"
+
+		printf "    ip: "
+		showuserptr $kgm_pcb.r[12]
+		printf "    sp: "
+		showuserptr $kgm_pcb.sp
+		printf "    lr: "
+		showuserptr $kgm_pcb.lr
+		printf "    pc: "
+		showuserptr $kgm_pcb.pc
+		printf "\n"
+
+		printf "  cpsr: "
+		showuserptr $kgm_pcb.cpsr
+		printf "\n"
+	else
+		echo showuserregisters not supported on this architecture\n
+	end
+	end
+end
+document showuserregisters
+Syntax: showuserstack <address of thread>
+|This command displays the last known user register state
+|for the thread. This map not be correct for cases where
+|the thread is currently executing in userspace. However
+|for threads that have entered the kernel (either explicitly
+|with a system call or implicitly with a fault), it should
+|be accurate
+end
+
+define showtaskuserregisters
+    set $kgm_taskp = (struct task *)$arg0
+    set $kgm_head_actp = &($kgm_taskp->threads)
+    set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
+    while $kgm_actp != $kgm_head_actp
+    	printf "For thread "
+	showptr $kgm_actp
+	printf "\n"
+	showuserregisters $kgm_actp
+    	set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
+    	printf "\n"
+    end
+end
+document showtaskuserregisters
+Syntax: (gdb) showtaskuserregisters <task>
+| Print out the user registers for each thread in a task
+end
+
 define kdp-reboot
 # Alternatively, set *(*(unsigned **) 0x2498) = 1 
 # (or 0x5498 on PPC, 0xffffff8000002928 on x86_64, 0xffff049c on arm)
@@ -2605,7 +3439,7 @@ define dumpinfoint
        set manual_pkt.input        = 0
 
        set manual_pkt.len          = sizeof(kdp_dumpinfo_req_t)
-       set $kgm_pkt                = (kdp_dumpinfo_req_t *)manual_pkt.data
+       set $kgm_pkt                = (kdp_dumpinfo_req_t *)&manual_pkt.data
        set $kgm_pkt->hdr.request  = KDP_DUMPINFO
        set $kgm_pkt->hdr.len      = sizeof(kdp_dumpinfo_req_t)
        set $kgm_pkt->hdr.is_reply = 0
@@ -2697,7 +3531,7 @@ define getdumpinfo
        dumpinfoint KDP_DUMPINFO_GETINFO
        set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data
        if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT
-       	  printf "System will reboot after kernel info gets dumped.\n"
+       	  printf "Sysem will reboot after kernel info gets dumped.\n"
        else
        	  printf "Sysem will not reboot after kernel info gets dumped.\n"
        end
@@ -3257,6 +4091,28 @@ define showregdictionary
 end
 
 
+define showorderedsetarrayint
+    set $kgm$arg0_array = (_Element *)$arg1
+    set $kgm$arg0_count = $arg2
+
+    set $kgm$arg0_idx = 0
+    while ($kgm$arg0_idx < $kgm$arg0_count)
+        set $kgm_obj = $kgm$arg0_array[$kgm$arg0_idx++]
+        showobjectint _$arg0 $kgm_obj
+        if ($kgm$arg0_idx < $kgm$arg0_count)
+	    printf ","
+        end
+    end
+end
+
+define showorderedsetint
+    set $kgm_array = ((OSOrderedSet *)$arg1)->array
+    set $count = ((OSOrderedSet *)$arg1)->count
+    printf "["
+    showorderedsetarrayint $arg0 $kgm_array $count
+    printf "]"
+end
+
 define showarraysetint
     set $kgm$arg0_array = (OSArray *)$arg1
 
@@ -3341,6 +4197,10 @@ define showobjectint
         showsetint _$arg0 $arg1
         set $kgm_shown = 1
     end
+    if ($kgm_vt == &_ZTV12OSOrderedSet)
+        showorderedsetint _$arg0 $arg1
+        set $kgm_shown = 1
+    end
     
     if ($kgm_shown != 1)
         if ($kgm_show_object_addrs == 0)
@@ -3435,7 +4295,7 @@ define findregistryentryrecurse
 	     print $kgm_re
 	  end
 
-	  # if we want to show everything, then don't populate $kgm_registry_entry
+	  # don't populate $kgm_registry_entry if we want to show everything
 	  if !$kgm_findregistry_continue
        	     set $kgm_registry_entry = $kgm_re
 	  end
@@ -3845,38 +4705,84 @@ Syntax: (gdb) showosobjecttracking
 | Set gOSObjectTrackThread to 1 or a thread_t to capture new OSObjects allocated by a thread or all threads.
 end
 
+# $kgm_readphys_force_kdp and $kgm_readphys_force_physmap
+# can respectively cause physical memory access to use
+# a KDP manual packet or the physical memory mapping
+# even if the default behavior would be otherwise.
 define readphysint
-       set $kgm_readphysint_result = 0xBAD10AD
-       # set up the manual KDP packet
-       set manual_pkt.input = 0
-       set manual_pkt.len = sizeof(kdp_readphysmem64_req_t)
-       set $kgm_pkt = (kdp_readphysmem64_req_t *)&manual_pkt.data
-       set $kgm_pkt->hdr.request  = KDP_READPHYSMEM64
-       set $kgm_pkt->hdr.len      = sizeof(kdp_readphysmem64_req_t)
-       set $kgm_pkt->hdr.is_reply = 0
-       set $kgm_pkt->hdr.seq      = 0
-       set $kgm_pkt->hdr.key      = 0
-       set $kgm_pkt->address      = (uint64_t)$arg0
-       set $kgm_pkt->nbytes       = $arg1 >> 3
-       set $kgm_pkt->lcpu         = $arg2
-       set manual_pkt.input       = 1
-       # dummy to make sure manual packet is executed
-       set $kgm_dummy = &_mh_execute_header
-       set $kgm_pkt = (kdp_readphysmem64_reply_t *)&manual_pkt.data
-       if ($kgm_pkt->error == 0)
-       	  if $arg1 == 8
-       	     set $kgm_readphysint_result = *((uint8_t *)$kgm_pkt->data)
-	  end
-       	  if $arg1 == 16
-       	     set $kgm_readphysint_result = *((uint16_t *)$kgm_pkt->data)
-	  end
-       	  if $arg1 == 32
-       	     set $kgm_readphysint_result = *((uint32_t *)$kgm_pkt->data)
-	  end
-       	  if $arg1 == 64
-       	     set $kgm_readphysint_result = *((uint64_t *)$kgm_pkt->data)
-	  end
-       end
+    set $kgm_readphysint_result = 0xBAD10AD
+
+    if ($kgm_readphys_force_kdp != 0)
+        set $kgm_readphys_use_kdp = 1
+    else
+        if ($kgm_readphys_force_physmap)
+            set $kgm_readphys_use_kdp = 0
+        else
+            set $kgm_readphys_use_kdp = ( kdp->is_conn > 0 )
+        end
+    end
+
+    if ($kgm_readphys_use_kdp)
+
+        # set up the manual KDP packet
+        set manual_pkt.input = 0
+        set manual_pkt.len = sizeof(kdp_readphysmem64_req_t)
+        set $kgm_pkt = (kdp_readphysmem64_req_t *)&manual_pkt.data
+        set $kgm_pkt->hdr.request  = KDP_READPHYSMEM64
+        set $kgm_pkt->hdr.len      = sizeof(kdp_readphysmem64_req_t)
+        set $kgm_pkt->hdr.is_reply = 0
+        set $kgm_pkt->hdr.seq      = 0
+        set $kgm_pkt->hdr.key      = 0
+        set $kgm_pkt->address      = (uint64_t)$arg0
+        set $kgm_pkt->nbytes       = $arg1 >> 3
+        set $kgm_pkt->lcpu         = $arg2
+        set manual_pkt.input       = 1
+        # dummy to make sure manual packet is executed
+        set $kgm_dummy = &_mh_execute_header
+        set $kgm_pkt = (kdp_readphysmem64_reply_t *)&manual_pkt.data
+        if ($kgm_pkt->error == 0)
+            if $arg1 == 8
+                set $kgm_readphysint_result = *((uint8_t *)$kgm_pkt->data)
+            end
+            if $arg1 == 16
+                set $kgm_readphysint_result = *((uint16_t *)$kgm_pkt->data)
+            end
+            if $arg1 == 32
+                set $kgm_readphysint_result = *((uint32_t *)$kgm_pkt->data)
+            end
+            if $arg1 == 64
+                set $kgm_readphysint_result = *((uint64_t *)$kgm_pkt->data)
+            end
+        end
+
+    else
+        # No KDP. Attempt to use physical memory mapping
+
+        if ($kgm_mtype == $kgm_mtype_x86_64)
+            set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+        else
+            if ($kgm_mtype == $kgm_mtype_arm)
+                set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 - gPhysBase + gVirtBase
+            else
+                printf "readphys not available for current architecture.\n"
+                set $kgm_readphys_paddr_in_kva = 0
+            end
+        end
+        if $kgm_readphys_paddr_in_kva
+            if $arg1 == 8
+                set $kgm_readphysint_result = *((uint8_t *)$kgm_readphys_paddr_in_kva)
+            end
+            if $arg1 == 16
+                set $kgm_readphysint_result = *((uint16_t *)$kgm_readphys_paddr_in_kva)
+            end
+            if $arg1 == 32
+                set $kgm_readphysint_result = *((uint32_t *)$kgm_readphys_paddr_in_kva)
+            end
+            if $arg1 == 64
+                set $kgm_readphysint_result = *((uint64_t *)$kgm_readphys_paddr_in_kva)
+            end
+        end
+    end
 end
 
 define readphys8
@@ -3994,8 +4900,10 @@ document writephys64
 end
 
 define addkextsyms
-        shell ls $arg0/* | xargs -n 1 echo add-symbol-file > /tmp/gdb-syms
-        source /tmp/gdb-syms
+	shell echo cd `pwd` > /tmp/gdb-cd
+	cd $arg0
+	source kcbmacros
+	source /tmp/gdb-cd
 	set $kgm_show_kmod_syms = 1
 end
 
@@ -4176,7 +5084,7 @@ define showprocinfo
     # decode part of credential
     set $kgm_spi_cred = $kgm_spi_proc->p_ucred
     if ($kgm_spi_cred != 0)
-	printf "Cred: euid %d ruid %d svuid %d\n", $kgm_spi_cred->cr_uid, $kgm_spi_cred->cr_ruid, $kgm_spi_cred->cr_svuid
+	printf "Cred: euid %d ruid %d svuid %d\n", $kgm_spi_cred->cr_posix.cr_uid, $kgm_spi_cred->cr_posix.cr_ruid, $kgm_spi_cred->cr_posix.cr_svuid
     else
     	printf "Cred: (null)\n"
     end
@@ -4366,7 +5274,138 @@ Syntax: (gdb) allproc
 | Routine to print out all process in the system 
 | which are not in the zombie list
 end
+define showprocsiblingint
+    set $kgm_sibling_ptr = (struct proc *)$arg0
+    set $kgm_lx = $arg1 
+    while $kgm_lx
+        printf "|  " 
+        set $kgm_lx = $kgm_lx-3 
+    end   
+    printf "|--%d    %s    [ 0x%llx ]\n", $kgm_sibling_ptr->p_pid, $kgm_sibling_ptr->p_comm, $kgm_sibling_ptr
+end
+define showproctreeint
+#Initialize all the set variables used in this macro
+    set $kgm_basep1 = 0
+    set $kgm_sibling_ptr = 0
+    set $kgm_lx = 0
+    set $kgm_tmp_base = 0
+    set $kgm_head_ptr = 0
+    set $kgm_search_pid = 0 
+    set $kgm_rev = 0
+    set $kgm_x = 0
+
+    set $kgm_basep1 = (struct proc *)allproc->lh_first
+    if ($arg0 == 0)
+        set $kgm_head_ptr = (struct proc *)initproc
+    end       
+    if ($arg0 > 0)
+        set $kgm_tmp_base = (struct proc *)allproc->lh_first
+        set $kgm_search_pid = $arg0 
+        while $kgm_tmp_base
+            if ( $kgm_tmp_base->p_pid == $kgm_search_pid)
+               if ($kgm_tmp_base->p_childrencnt > 0)
+                    set $kgm_head_ptr = $kgm_tmp_base->p_children.lh_first
+               else
+                    set $kgm_head_ptr = 0
+                    printf "No children present for PID=%d", $kgm_search_pid
+               end
+               loop_break
+            end
+            set $kgm_tmp_base = $kgm_tmp_base->p_list.le_next
+        end
+    end
+    set $kgm_rev = 0
+    set $kgm_x = 0
+    if ($kgm_head_ptr)
+        printf "PID   PROCESS       POINTER]\n"
+        printf "===   =======       =======\n"
+        printf "%d    %s      [ 0x%llx ]\n", $kgm_head_ptr->p_ppid, $kgm_head_ptr->p_pptr->p_comm, $kgm_head_ptr
+        printf "|--%d    %s      [ 0x%llx ]\n", $kgm_head_ptr->p_pid, $kgm_head_ptr->p_comm, $kgm_head_ptr
+    end
+    while ($kgm_head_ptr)
+       #Is childrencnt = 0?       YES  {=> no children}
+        if ($kgm_head_ptr->p_childrencnt == 0)
+            # Does it have sibling? 
+            if($kgm_head_ptr->p_sibling.le_next == 0)
+                #No, it does not have sibling, so go back to its parent which will go to its sibling
+                if($kgm_head_ptr == $kgm_head_ptr->p_pptr)
+                    loop_break
+                end
+                set $kgm_head_ptr = $kgm_head_ptr->p_pptr
+                if ($kgm_head_ptr == $kgm_tmp_base)
+                    loop_break
+                end
+                if ($kgm_x > 3)
+                    set $kgm_x = $kgm_x - 3
+                end
+                set $kgm_rev = 1
+            end
+            if($kgm_head_ptr->p_sibling.le_next != 0)
+                # Yes, it has sibling. So print sibling
+                set $kgm_rev = 0
+                showprocsiblingint $kgm_head_ptr->p_sibling.le_next $kgm_x
+                set $kgm_head_ptr = $kgm_head_ptr->p_sibling.le_next
+            end
+        # childrencnt != 0  {=> it has children}
+        else
+            if ($kgm_rev == 1)
+                if($kgm_head_ptr->p_sibling.le_next == 0)
+                    #No, it does not have sibling, so go back to its parent which will go to its sibling
+                    if($kgm_head_ptr == $kgm_head_ptr->p_pptr)
+                        loop_break
+                    end
+                    set $kgm_head_ptr = $kgm_head_ptr->p_pptr
+                    if ($kgm_head_ptr == $kgm_tmp_base)
+                        loop_break
+                    end
 
+                    if ($kgm_x > 3)
+                        set $kgm_x = $kgm_x - 3
+                    end
+                    set $kgm_rev = 1
+                end
+                if($kgm_head_ptr->p_sibling.le_next != 0)
+                    set $kgm_rev = 0
+                    # Yes, it has sibling. So print sibling
+                    showprocsiblingint $kgm_head_ptr->p_sibling.le_next $kgm_x
+                    set $kgm_head_ptr = $kgm_head_ptr->p_sibling.le_next
+                end
+            else
+                set $kgm_head_ptr = $kgm_head_ptr->p_children.lh_first
+                set $kgm_x = $kgm_x + 3
+                set $kgm_lx = $kgm_x
+                while $kgm_lx
+                    printf "|  "
+                    set $kgm_lx = $kgm_lx-3
+                end
+                printf "|--%d    %s      [ 0x%llx ] \n", $kgm_head_ptr->p_pid, $kgm_head_ptr->p_comm, $kgm_head_ptr
+            end
+        end
+    end
+    printf "\n"
+#Unset all the set variables used in this macro
+    set $kgm_basep1 = 0
+    set $kgm_sibling_ptr = 0
+    set $kgm_lx = 0
+    set $kgm_tmp_base = 0
+    set $kgm_head_ptr = 0
+    set $kgm_search_pid = 0
+    set $kgm_rev = 0
+    set $kgm_x = 0
+end
+define showproctree
+    if ($argc > 0)
+        showproctreeint $arg0
+    else
+        showproctreeint 0
+    end
+end
+document showproctree
+Syntax: (gdb) showproctree <pid>
+| Routine to print the processes in the system in a hierarchical tree form. This routine does not print zombie processes.
+| If no argument is given, showproctree will print all the processes in the system.  
+| If pid is specified, showproctree prints all the descendants of the indicated process
+end
 
 
 define print_vnode
@@ -4644,9 +5683,13 @@ end
 define mbuf_buf2slab
 	set $addr = $arg0
 	set $gix = ((char *)$addr - (char *)mbutl) >> 20
-	set $ix = ((char *)$addr - (char *)mbutl) >> 11
+	set $ix = ((char *)$addr - (char *)slabstbl[$gix].slg_slab[0].sl_base) >> 12
 	set $slab = &slabstbl[$gix].slg_slab[$ix]
-	printf "%p", $slab
+	if $kgm_lp64
+		printf "0x%-16llx", $slab
+	else
+		printf "0x%-8x", $slab
+	end
 end
 
 document mbuf_buf2slab
@@ -4655,11 +5698,15 @@ end
 
 define mbuf_buf2mca
 	set $addr = $arg0
-	set $ix = ((char *)$addr - (char *)mbutl) >> 11
-	set $clbase = ((union mcluster *)(mbutl + $ix))
+	set $ix = ((char *)$addr - (char *)mbutl) >> 12
+	set $clbase = ((union mbigcluster *)mbutl) + $ix
 	set $mclidx = (((char *)$addr - (char *)$clbase) >> 8)
 	set $mca = mclaudit[$ix].cl_audit[$mclidx]
-	printf "mca: %p", $mca
+	if $kgm_lp64
+		printf "mca: 0x%-16llx", $mca
+	else
+		printf "mca: 0x%-8x", $mca
+	end
 end
 
 document mbuf_buf2mca
@@ -4677,11 +5724,11 @@ define mbuf_showmca
 	mbuf_mca_ctype $mca 1
 	printf "\ncontrolling mcache:\t%p (%s)\n", $mca->mca_cache, $cp->mc_name
 	if $mca->mca_uflags & $MB_SCVALID
-		set $ix = ((char *)$mca->mca_addr - (char *)mbutl) >> 11
-		set $clbase = ((union mcluster *)(mbutl + $ix))
+		set $ix = ((char *)$mca->mca_addr - (char *)mbutl) >> 12
+		set $clbase = ((union mbigcluster *)mbutl) + $ix
 		set $mclidx = (((char *)$mca->mca_addr - (char *)$clbase) >> 8)
 		printf "mbuf obj:\t\t%p\n", $mca->mca_addr
-		printf "mbuf index:\t\t%d (out of 8) in cluster base %p\n", \
+		printf "mbuf index:\t\t%d (out of 16) in cluster base %p\n", \
 		    $mclidx + 1, $clbase
 		if $mca->mca_uptr != 0
 			set $peer_mca = (mcache_audit_t *)$mca->mca_uptr
@@ -4729,10 +5776,56 @@ Syntax: (gdb) mbuf_showmca <addr>
 | records including the stack trace of the last buffer transaction.
 end
 
-set $MCF_NOCPUCACHE = 0x10
-
-define mcache_stat
-	set $head = (mcache_t *)mcache_head
+define mbuf_topleak
+	set language c
+	set $topcnt = 0
+	if $arg0 < 5
+		set $maxcnt = $arg0
+	else
+		set $maxcnt = 5
+	end
+	while $topcnt < $maxcnt
+		mbuf_traceleak mleak_top_trace[$topcnt]
+		set $topcnt = $topcnt + 1
+	end
+	set language auto
+end
+
+document mbuf_topleak
+Syntax: (gdb) mbuf_topleak <num>
+| Prints information about the top <num> suspected mbuf leakers
+| where <num> is a value <= 5
+end
+
+define mbuf_traceleak
+	set language c
+	set $trace = (struct mtrace *) $arg0
+	if $trace->allocs != 0
+		printf "%p:%d outstanding allocs\n", $trace, $trace->allocs
+		printf "backtrace saved %d deep:\n", $trace->depth
+		if $trace->depth != 0
+			set $cnt = 0
+			while $cnt < $trace->depth
+		                printf "%4d: ", $cnt + 1
+				pcprint $trace->addr[$cnt]
+				printf "\n"
+				set $cnt = $cnt + 1
+			end
+		end
+	end
+	set language auto
+end
+
+document mbuf_traceleak
+Syntax: (gdb) mbuf_traceleak <addr>
+| Given an mbuf leak trace (mtrace) structure address, print out the
+| stored information with that trace
+end
+
+set $MCF_NOCPUCACHE = 0x10
+
+define mcache_stat
+	set $head = (mcache_t *)mcache_head
 	set $mc = $head
 
     if $kgm_lp64
@@ -4868,16 +5961,26 @@ set $NSLABSPMB = sizeof(mcl_slabg_t)/sizeof(mcl_slab_t)
 define mbuf_slabstbl
 	set $x = 0
 
-	printf "slot addr       slabs range\n"
-	printf "---- ---------- -----------------------\n"
+	if $kgm_lp64
+		printf "slot slabg              slabs range\n"
+		printf "---- ------------------ -------------------------------------------\n"
+	else
+		printf "slot slabg      slabs range\n"
+		printf "---- ---------- ---------------------------\n"
+	end
 	while $x < maxslabgrp
 		set $slg = slabstbl[$x]
 		printf "%3d: ", $x
 		if $slg == 0
 			printf "-\n"
 		else
-			printf "%p [%p-%p]\n", $slg, &$slg->slg_slab[0], \
-			    &$slg->slg_slab[$NSLABSPMB-1]
+			if $kgm_lp64
+				printf "0x%-16llx [ 0x%-16llx - 0x%-16llx ]\n", $slg, &$slg->slg_slab[0], \
+				    &$slg->slg_slab[$NSLABSPMB-1]
+			else
+				printf "0x%-8x [ 0x%-8x - 0x%-8x ]\n", $slg, &$slg->slg_slab[0], \
+				    &$slg->slg_slab[$NSLABSPMB-1]
+			end
 		end
 		set $x += 1
 	end
@@ -4895,19 +5998,36 @@ define mbuf_slabs
 	set $slg = (mcl_slabg_t *)$arg0
 	set $x = 0
 
-    if $kgm_lp64
-		printf "slot addr               next               base               C  R  N   size flags\n"
-		printf "---- ------------------ ------------------ ------------------ -- -- -- ------ -----\n"
+	if $kgm_lp64
+		printf "slot slab               next               obj                mca                 C  R  N   size flags\n"
+		printf "---- ------------------ ------------------ ------------------ ------------------ -- -- -- ------ -----\n"
 	else
-		printf "slot addr       next       base        C  R  N   size flags\n"
-		printf "---- ---------- ---------- ---------- -- -- -- ------ -----\n"
+		printf "slot slab       next       obj        mca         C  R  N   size flags\n"
+		printf "---- ---------- ---------- ---------- ---------- -- -- -- ------ -----\n"
 	end
 	while $x < $NSLABSPMB
 		set $sl = &$slg->slg_slab[$x]
-		printf "%3d: %p %p %p %2d %2d %2d %6d 0x%04x ", \
-		    $x + 1, $sl, $sl->sl_next, $sl->sl_base, $sl->sl_class, \
-		    $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \
-		    $sl->sl_flags
+		set $mca = 0
+		set $obj = $sl->sl_base
+
+		if mclaudit != 0
+			set $ix = ((char *)$obj - (char *)mbutl) >> 12
+			set $clbase = ((union mbigcluster *)mbutl) + $ix
+			set $mclidx = (((char *)$obj - (char *)$clbase) >> 8)
+			set $mca = mclaudit[$ix].cl_audit[$mclidx]
+		end
+
+		if $kgm_lp64
+			printf "%3d: 0x%-16llx 0x%-16llx 0x%-16llx 0x%-16llx %2d %2d %2d %6d 0x%04x ", \
+			    $x + 1, $sl, $sl->sl_next, $obj, $mca, $sl->sl_class, \
+			    $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \
+			    $sl->sl_flags
+		else
+			printf "%3d: 0x%-8x 0x%-8x 0x%-8x 0x%-8x %2d %2d %2d %6d 0x%04x ", \
+			    $x + 1, $sl, $sl->sl_next, $obj, $mca, $sl->sl_class, \
+			    $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \
+			    $sl->sl_flags
+		end
 		if $sl->sl_flags != 0
 			printf "<"
 			if $sl->sl_flags & $SLF_MAPPED
@@ -4922,6 +6042,31 @@ define mbuf_slabs
 			printf ">"
 		end
 		printf "\n"
+
+		if $sl->sl_chunks > 1
+			set $z = 1
+			set $c = $sl->sl_len / $sl->sl_chunks
+
+			while $z < $sl->sl_chunks
+				set $obj = $sl->sl_base + ($c * $z)
+				set $mca = 0
+
+				if mclaudit != 0
+					set $ix = ((char *)$obj - (char *)mbutl) >> 12
+					set $clbase = ((union mbigcluster *)mbutl) + $ix
+					set $mclidx = (((char *)$obj - (char *)$clbase) >> 8)
+					set $mca = mclaudit[$ix].cl_audit[$mclidx]
+				end
+
+				if $kgm_lp64
+					printf "                                           0x%-16llx 0x%-16llx\n", $obj, $mca
+				else
+					printf "                           0x%-8x 0x%-8x\n", $obj, $mca
+				end
+				set $z += 1
+			end
+		end
+
 		set $x += 1
 	end
 end
@@ -5183,7 +6328,7 @@ define mbuf_walkallslabs
 	end
 	printf "objects; this may take a while ...)\n\n"
 
-    if $kgm_lp64
+	if $kgm_lp64
 		printf "                        slab                mca                obj        allocation\n"
 		printf "slot idx             address            address            address type        state\n"
 		printf "---- ---- ------------------ ------------------ ------------------ ----- -----------\n"
@@ -5200,8 +6345,8 @@ define mbuf_walkallslabs
 		while $y < $NSLABSPMB && $stop == 0
 			set $sl = &$slg->slg_slab[$y]
 			set $base = (char *)$sl->sl_base
-			set $ix = ($base - (char *)mbutl) >> 11
-			set $clbase = ((union mcluster *)(mbutl + $ix))
+			set $ix = ($base - (char *)mbutl) >> 12
+			set $clbase = ((union mbigcluster *)mbutl) + $ix
 			set $mclidx = ($base - (char *)$clbase) >> 8
 			set $mca = mclaudit[$ix].cl_audit[$mclidx]
 			set $first = 1
@@ -5218,7 +6363,11 @@ define mbuf_walkallslabs
 
 				if $printmca != 0
 					if $first == 1
-						printf "%4d %4d %p ", $x, $y, $sl
+						if $kgm_lp64
+							printf "%4d %4d 0x%-16llx ", $x, $y, $sl
+						else
+							printf "%4d %4d 0x%-8x ", $x, $y, $sl
+						end
 					else
 					    if $kgm_lp64
 							printf "                             "
@@ -5227,7 +6376,12 @@ define mbuf_walkallslabs
 					    end
 					end
 
-					printf "%p %p ", $mca, $mca->mca_addr
+					if $kgm_lp64
+						printf "0x%-16llx 0x%-16llx ", $mca, $mca->mca_addr
+					else
+						printf "0x%-8x 0x%-8x ", $mca, $mca->mca_addr
+					end
+
 					mbuf_mca_ctype $mca 0
 					if $mca->mca_uflags & ($MB_INUSE|$MB_COMP_INUSE)
 						printf "active      "
@@ -5276,6 +6430,38 @@ document mbuf_walkallslabs
 | parameter.  This is a backend routine for mbuf_show{active,inactive,all}.
 end
 
+define mbuf_countchain
+	set $mp = (struct mbuf *)$arg0
+	
+	set $pkt = 0
+	set $nxt = 0
+
+	while $mp != 0
+		set $pkt = $pkt + 1
+	
+		set $mn = (struct mbuf *)$mp->m_hdr.mh_next
+		while $mn != 0
+			set $nxt = $nxt + 1
+			
+			set $mn = (struct mbuf *)$mn->m_hdr.mh_next
+		end
+		
+		set $mp = $mp->m_hdr.mh_nextpkt
+
+		if (($pkt + $nxt) % 50) == 0
+			printf "... %d\n", $pkt + $nxt
+		end
+	end
+
+	printf "\ntotal: %d (via m_next: %d)\n", $pkt + $nxt, $nxt
+end
+
+document mbuf_countchain
+Syntax: mbuf_countchain <addr>
+| Count the total number of mbufs chained from the given the address of an mbuf. 
+| The routine follows both the m_next pointers and m_nextpkt pointers.
+end
+
 set $RTF_UP          = 0x1
 set $RTF_GATEWAY     = 0x2
 set $RTF_HOST        = 0x4
@@ -5659,6 +6845,8 @@ Syntax: (gdb) rtentry_showdbg <addr>
 | parameter.
 end
 
+set $INIFA_TRACE_HIST_SIZE = inifa_trace_hist_size
+
 define inifa_showdbg
 	set $inifa = (struct in_ifaddr_dbg *)$arg0
 	set $cnt = 0
@@ -5694,7 +6882,7 @@ define inifa_showdbg
 		end
 		set $ix = $ix + 1
 	end
-	while $cnt < $CTRACE_HIST_SIZE
+	while $cnt < $INIFA_TRACE_HIST_SIZE
 		set $ix = 0
 		while $ix < $CTRACE_STACK_SIZE
 			set $kgm_pc = $inifa->inifa_refhold[$cnt].pc[$ix]
@@ -5712,7 +6900,7 @@ define inifa_showdbg
 		set $cnt = $cnt + 1
 	end
 	set $cnt = 0
-	while $cnt < $CTRACE_HIST_SIZE
+	while $cnt < $INIFA_TRACE_HIST_SIZE
 		set $ix = 0
 		while $ix < $CTRACE_STACK_SIZE
 			set $kgm_pc = $inifa->inifa_refrele[$cnt].pc[$ix]
@@ -5739,6 +6927,8 @@ Syntax: (gdb) inifa_showdbg <addr>
 | parameter.
 end
 
+set $IN6IFA_TRACE_HIST_SIZE = in6ifa_trace_hist_size
+
 define in6ifa_showdbg
 	set $in6ifa = (struct in6_ifaddr_dbg *)$arg0
 	set $cnt = 0
@@ -5774,7 +6964,7 @@ define in6ifa_showdbg
 		end
 		set $ix = $ix + 1
 	end
-	while $cnt < $CTRACE_HIST_SIZE
+	while $cnt < $IN6IFA_TRACE_HIST_SIZE
 		set $ix = 0
 		while $ix < $CTRACE_STACK_SIZE
 			set $kgm_pc = $in6ifa->in6ifa_refhold[$cnt].pc[$ix]
@@ -5792,31 +6982,570 @@ define in6ifa_showdbg
 		set $cnt = $cnt + 1
 	end
 	set $cnt = 0
-	while $cnt < $CTRACE_HIST_SIZE
-		set $ix = 0
-		while $ix < $CTRACE_STACK_SIZE
-			set $kgm_pc = $in6ifa->in6ifa_refrele[$cnt].pc[$ix]
-			if $kgm_pc != 0
-				if $ix == 0
-					printf "\nRelease [%d] (thread %p):\n",\
-					  $cnt, $in6ifa->in6ifa_refrele[$cnt].th
-				end
-				printf "%4d: ", $ix + 1
-				pcprint $kgm_pc
-				printf "\n"
+	while $cnt < $IN6IFA_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $in6ifa->in6ifa_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					  $cnt, $in6ifa->in6ifa_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document in6ifa_showdbg
+Syntax: (gdb) in6ifa_showdbg <addr>
+| Given an IPv6 interface structure address, print the debug information
+| related to it.  This requires interface address debugging to be turned
+| on, by setting the appropriate flags to the "ifa_debug" boot-args
+| parameter.
+end
+
+set $IFMA_TRACE_HIST_SIZE = ifma_trace_hist_size
+
+define ifma_showdbg
+	set $ifma = (struct ifmultiaddr_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total holds:\t%d\n", $ifma->ifma_refhold_cnt
+	printf "Total releases:\t%d\n", $ifma->ifma_refrele_cnt
+
+	while $cnt < $IFMA_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $ifma->ifma_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					  $cnt, $ifma->ifma_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $IFMA_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $ifma->ifma_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					  $cnt, $ifma->ifma_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document ifma_showdbg
+Syntax: (gdb) ifma_showdbg <addr>
+| Given a link multicast structure address, print the debug information
+| related to it.  This requires interface address debugging to be turned
+| on, by setting the appropriate flags to the "ifa_debug" boot-args
+| parameter.
+end
+
+set $INM_TRACE_HIST_SIZE = inm_trace_hist_size
+
+define inm_showdbg
+	set $inm = (struct in_multi_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total holds:\t%d\n", $inm->inm_refhold_cnt
+	printf "Total releases:\t%d\n", $inm->inm_refrele_cnt
+
+	while $cnt < $INM_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $inm->inm_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					  $cnt, $inm->inm_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $INM_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $inm->inm_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					  $cnt, $inm->inm_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document inm_showdbg
+Syntax: (gdb) inm_showdbg <addr>
+| Given an IPv4 multicast structure address, print the debug information
+| related to it.  This requires interface address debugging to be turned
+| on, by setting the appropriate flags to the "ifa_debug" boot-args
+| parameter.
+end
+
+set $IF_REF_TRACE_HIST_SIZE = if_ref_trace_hist_size
+
+define ifpref_showdbg
+	set $dl_if = (struct dlil_ifnet_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total references:\t%d\n", $dl_if->dldbg_if_refhold_cnt
+	printf "Total releases:\t\t%d\n", $dl_if->dldbg_if_refrele_cnt
+
+	while $cnt < $IF_REF_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $dl_if->dldbg_if_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					    $cnt, \
+					    $dl_if->dldbg_if_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $IF_REF_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $dl_if->dldbg_if_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					    $cnt, \
+					    $dl_if->dldbg_if_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document ifpref_showdbg
+Syntax: (gdb) ifpref_showdbg <addr>
+| Given an ifnet structure address, print the debug information
+| related to its refcnt.  This requires ifnet debugging to be turned
+| on, by setting the appropriate flags to the "ifnet_debug" boot-args
+| parameter.
+end
+
+define in6ifa_trash
+	set $ifa = (struct in6_ifaddr_dbg *)in6ifa_trash_head.tqh_first
+	set $cnt = 0
+	while $ifa != 0
+		if $cnt == 0
+		    if $kgm_lp64
+				printf "                in6_ifa ref   hold   rele\n"
+				printf "      ----------------- --- ------ ------\n"
+			else
+				printf "        in6_ifa ref   hold   rele\n"
+				printf "      --------- --- ------ ------\n"
+			end
+		end
+		printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifa, \
+		    $ifa->in6ifa_refhold_cnt - $ifa->in6ifa_refrele_cnt, \
+		    $ifa->in6ifa_refhold_cnt, $ifa->in6ifa_refrele_cnt
+		showsockaddr_in6 $ifa->in6ifa.ia_ifa.ifa_addr
+		printf "\n"
+		set $ifa = $ifa->in6ifa_trash_link.tqe_next
+		set $cnt = $cnt + 1
+	end
+end
+
+set $NDPR_TRACE_HIST_SIZE = ndpr_trace_hist_size
+
+define ndpr_showdbg
+	set $ndpr = (struct nd_prefix_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total references:\t%d\n", $ndpr->ndpr_refhold_cnt
+	printf "Total releases:\t\t%d\n", $ndpr->ndpr_refrele_cnt
+
+	while $cnt < $NDPR_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $ndpr->ndpr_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					    $cnt, \
+					    $ndpr->ndpr_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $NDPR_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $ndpr->ndpr_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					    $cnt, \
+					    $ndpr->ndpr_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document ndpr_showdbg
+Syntax: (gdb) ndpr_showdbg <addr>
+| Given a nd_prefix structure address, print the debug information
+| related to its refcnt.  This requires the interface address debugging
+| to be turned on, by setting the appropriate flags to the "ifa_debug"
+| boot-args parameter.
+end
+
+set $NDDR_TRACE_HIST_SIZE = nddr_trace_hist_size
+
+define nddr_showdbg
+	set $nddr = (struct nd_defrouter_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total references:\t%d\n", $nddr->nddr_refhold_cnt
+	printf "Total releases:\t\t%d\n", $nddr->nddr_refrele_cnt
+
+	while $cnt < $NDDR_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $nddr->nddr_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					    $cnt, \
+					    $nddr->nddr_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $NDDR_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $nddr->nddr_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					    $cnt, \
+					    $nddr->nddr_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document nddr_showdbg
+Syntax: (gdb) nddr_showdbg <addr>
+| Given a nd_defrouter structure address, print the debug information
+| related to its refcnt.  This requires the interface address debugging
+| to be turned on, by setting the appropriate flags to the "ifa_debug"
+| boot-args parameter.
+end
+set $IMO_TRACE_HIST_SIZE = imo_trace_hist_size
+
+define imo_showdbg
+	set $imo = (struct ip_moptions_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total references:\t%d\n", $imo->imo_refhold_cnt
+	printf "Total releases:\t\t%d\n", $imo->imo_refrele_cnt
+
+	while $cnt < $IMO_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $imo->imo_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					    $cnt, \
+					    $imo->imo_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $IMO_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $imo->imo_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					    $cnt, \
+					    $imo->imo_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document imo_showdbg
+Syntax: (gdb) imo_showdbg <addr>
+| Given a ip_moptions structure address, print the debug information
+| related to its refcnt.  This requires the interface address debugging
+| to be turned on, by setting the appropriate flags to the "ifa_debug"
+| boot-args parameter.
+end
+
+set $IM6O_TRACE_HIST_SIZE = im6o_trace_hist_size
+
+define im6o_showdbg
+	set $im6o = (struct ip6_moptions_dbg *)$arg0
+	set $cnt = 0
+
+	printf "Total references:\t%d\n", $im6o->im6o_refhold_cnt
+	printf "Total releases:\t\t%d\n", $im6o->im6o_refrele_cnt
+
+	while $cnt < $IM6O_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $im6o->im6o_refhold[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nHold [%d] (thread %p):\n", \
+					    $cnt, \
+					    $im6o->im6o_refhold[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+	set $cnt = 0
+	while $cnt < $IM6O_TRACE_HIST_SIZE
+		set $ix = 0
+		while $ix < $CTRACE_STACK_SIZE
+			set $kgm_pc = $im6o->im6o_refrele[$cnt].pc[$ix]
+			if $kgm_pc != 0
+				if $ix == 0
+					printf "\nRelease [%d] (thread %p):\n",\
+					    $cnt, \
+					    $im6o->im6o_refrele[$cnt].th
+				end
+				printf "%4d: ", $ix + 1
+				pcprint $kgm_pc
+				printf "\n"
+			end
+			set $ix = $ix + 1
+		end
+		set $cnt = $cnt + 1
+	end
+end
+
+document im6o_showdbg
+Syntax: (gdb) im6o_showdbg <addr>
+| Given a ip6_moptions structure address, print the debug information
+| related to its refcnt.  This requires the interface address debugging
+| to be turned on, by setting the appropriate flags to the "ifa_debug"
+| boot-args parameter.
+end
+
+document in6ifa_trash
+Syntax: (gdb) in6ifa_trash
+| Walk the list of trash in6_ifaddr entries; this requires interface
+| address debugging to be turned on, by setting the appropriate flags
+| to the "ifa_debug" boot-args parameter.
+end
+
+define inifa_trash
+	set $ifa = (struct in_ifaddr_dbg *)inifa_trash_head.tqh_first
+	set $cnt = 0
+	while $ifa != 0
+		if $cnt == 0
+		    if $kgm_lp64
+				printf "                 in_ifa ref   hold   rele\n"
+				printf "      ----------------- --- ------ ------\n"
+			else
+				printf "         in_ifa ref   hold   rele\n"
+				printf "      --------- --- ------ ------\n"
+			end
+		end
+		printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifa, \
+		    $ifa->inifa_refhold_cnt - $ifa->inifa_refrele_cnt, \
+		    $ifa->inifa_refhold_cnt, $ifa->inifa_refrele_cnt
+		showsockaddr_in $ifa->inifa.ia_ifa.ifa_addr
+		printf "\n"
+		set $ifa = $ifa->inifa_trash_link.tqe_next
+		set $cnt = $cnt + 1
+	end
+end
+
+document inifa_trash
+Syntax: (gdb) inifa_trash
+| Walk the list of trash in_ifaddr entries; this requires interface
+| address debugging to be turned on, by setting the appropriate flags
+| to the "ifa_debug" boot-args parameter.
+end
+
+define ifma_trash
+	set $ifma = (struct ifmultiaddr_dbg *)ifma_trash_head.tqh_first
+	set $cnt = 0
+	while $ifma != 0
+		if $cnt == 0
+		    if $kgm_lp64
+				printf "                   ifma ref   hold   rele\n"
+				printf "      ----------------- --- ------ ------\n"
+			else
+				printf "          ifma  ref   hold   rele\n"
+				printf "      --------- --- ------ ------\n"
+			end
+		end
+		printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifma, \
+		    $ifma->ifma_refhold_cnt - $ifma->ifma_refrele_cnt, \
+		    $ifma->ifma_refhold_cnt, $ifma->ifma_refrele_cnt
+		showsockaddr $ifma->ifma.ifma_addr
+		printf " @ %s%d", $ifma->ifma.ifma_ifp->if_name, \
+		    $ifma->ifma.ifma_ifp->if_unit
+		printf "\n"
+		set $ifma = $ifma->ifma_trash_link.tqe_next
+		set $cnt = $cnt + 1
+	end
+end
+
+document ifma_trash
+Syntax: (gdb) ifma_trash
+| Walk the list of trash ifmultiaddr entries; this requires interface
+| address debugging to be turned on, by setting the appropriate flags
+| to the "ifa_debug" boot-args parameter.
+end
+
+define inm_trash
+	set $inm = (struct in_multi_dbg *)inm_trash_head.tqh_first
+	set $cnt = 0
+	while $inm != 0
+		if $cnt == 0
+		    if $kgm_lp64
+				printf "                    inm ref   hold   rele\n"
+				printf "      ----------------- --- ------ ------\n"
+			else
+				printf "            inm ref   hold   rele\n"
+				printf "      --------- --- ------ ------\n"
+			end
+		end
+		printf "%4d: %p %3d %6d %6d ", $cnt + 1, $inm, \
+		    $inm->inm_refhold_cnt - $inm->inm_refrele_cnt, \
+		    $inm->inm_refhold_cnt, $inm->inm_refrele_cnt
+		show_in_addr &($inm->inm.inm_addr)
+		printf "\n"
+		set $inm = $inm->inm_trash_link.tqe_next
+		set $cnt = $cnt + 1
+	end
+end
+
+document inm_trash
+Syntax: (gdb) inm_trash
+| Walk the list of trash in_multi entries; this requires interface
+| address debugging to be turned on, by setting the appropriate flags
+| to the "ifa_debug" boot-args parameter.
+end
+
+define in6m_trash
+	set $in6m = (struct in6_multi_dbg *)in6m_trash_head.tqh_first
+	set $cnt = 0
+	while $in6m != 0
+		if $cnt == 0
+		    if $kgm_lp64
+				printf "                   in6m ref   hold   rele\n"
+				printf "      ----------------- --- ------ ------\n"
+			else
+				printf "           in6m ref   hold   rele\n"
+				printf "      --------- --- ------ ------\n"
 			end
-			set $ix = $ix + 1
 		end
+		printf "%4d: %p %3d %6d %6d ", $cnt + 1, $in6m, \
+		    $in6m->in6m_refhold_cnt - $in6m->in6m_refrele_cnt, \
+		    $in6m->in6m_refhold_cnt, $in6m->in6m_refrele_cnt
+		show_in_addr &($in6m->in6m.in6m_addr)
+		printf "\n"
+		set $in6m = $in6m->in6m_trash_link.tqe_next
 		set $cnt = $cnt + 1
 	end
 end
 
-document in6ifa_showdbg
-Syntax: (gdb) in6ifa_showdbg <addr>
-| Given an IPv6 interface structure address, print the debug information
-| related to it.  This requires interface address debugging to be turned
-| on, by setting the appropriate flags to the "ifa_debug" boot-args
-| parameter.
+document in6m_trash
+Syntax: (gdb) in6m_trash
+| Walk the list of trash in6_multi entries; this requires interface
+| address debugging to be turned on, by setting the appropriate flags
+| to the "ifa_debug" boot-args parameter.
 end
 
 #
@@ -5835,11 +7564,11 @@ end
 
 define showosmalloc 
 printf "TAG          COUNT     STATE     ATTR     NAME\n"
-set $kgm_tagheadp = (OSMallocTag)&OSMalloc_tag_list
-    set $kgm_tagptr = (OSMallocTag )($kgm_tagheadp->OSMT_link.next)
+set $kgm_tagheadp = (struct _OSMallocTag_ *)&OSMalloc_tag_list
+    set $kgm_tagptr = (struct _OSMallocTag_ * )($kgm_tagheadp->OSMT_link.next)
     while $kgm_tagptr != $kgm_tagheadp
 	ostag_print $kgm_tagptr
-	set $kgm_tagptr = (OSMallocTag)$kgm_tagptr->OSMT_link.next
+	set $kgm_tagptr = (struct _OSMallocTag_ *)$kgm_tagptr->OSMT_link.next
     end
 	printf "\n"
 end
@@ -5850,7 +7579,8 @@ end
 
 
 define systemlog
-    if msgbufp->msg_bufc[msgbufp->msg_bufx] == 0
+    if msgbufp->msg_bufc[msgbufp->msg_bufx] == 0 \
+       && msgbufp->msg_bufc[0] != 0
         # The buffer hasn't wrapped, so take the easy (and fast!) path
         printf "%s", msgbufp->msg_bufc
     else
@@ -5878,7 +7608,9 @@ define systemlog
         set $kgm_i = 0
         while $kgm_i < $kgm_syslog_bufend
             set $kgm_syslog_char = $kgm_msgbuf.msg_bufc[$kgm_i]
-            printf "%c", $kgm_syslog_char
+            if $kgm_syslog_char != 0
+                printf "%c", $kgm_syslog_char
+            end
             set $kgm_i = $kgm_i + 1
         end
     end
@@ -5970,17 +7702,27 @@ define showsockaddr_at
     printcolonhex $addr $count
 end
 
+define show_in_addr
+    set $ia = (unsigned char *)$arg0
+    printf "%3u.%03u.%03u.%03u", $ia[0], $ia[1], $ia[2], $ia[3]
+end
+
 define showsockaddr_in
     set $sin = (struct sockaddr_in *)$arg0
     set $sa_bytes = (unsigned char *)&($sin->sin_addr)
-    printf "%3u.%03u.%03u.%03u", $sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3]
+    show_in_addr $sa_bytes
+end
+
+define show_in6_addr
+    set $ia = (unsigned char *)$arg0
+    printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \
+    	$ia[0], $ia[1], $ia[2], $ia[3], $ia[4], $ia[5], $ia[6], $ia[7], $ia[8], $ia[9], $ia[10], $ia[11], $ia[12], $ia[13], $ia[14], $ia[15]
 end
 
 define showsockaddr_in6
     set $sin6 = (struct sockaddr_in6 *)$arg0
     set $sa_bytes = $sin6->sin6_addr.__u6_addr.__u6_addr8
-    printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \
-    	$sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3], $sa_bytes[4], $sa_bytes[5], $sa_bytes[6], $sa_bytes[7], $sa_bytes[8], $sa_bytes[9], $sa_bytes[10], $sa_bytes[11], $sa_bytes[12], $sa_bytes[13], $sa_bytes[14], $sa_bytes[15]
+    show_in6_addr $sa_bytes
 end
 
 define showsockaddr_un
@@ -6002,7 +7744,7 @@ define showifmultiaddrs
    set $mymulti = $if_multi
    set $myi = 0
    while ($mymulti != 0)
-	printf "%2d. ", $myi
+	printf "%2d. %p ", $myi, $mymulti
 	set $sa_family = $mymulti->ifma_addr.sa_family
 	if ($sa_family == 2)
 	    if ($mymulti->ifma_ll != 0)
@@ -6036,6 +7778,48 @@ Syntax showifmultiaddrs <ifp>
 | show the (struct ifnet).if_multiaddrs list of multicast addresses for the given ifp
 end
 
+define showinmultiaddrs
+   set $in_multi = (struct in_multi *)(in_multihead->lh_first)
+   set $mymulti = $in_multi
+   set $myi = 0
+   while ($mymulti != 0)
+	set $ifp = (struct ifnet *)$mymulti->inm_ifp
+	printf "%2d. %p ", $myi, $mymulti
+	show_in_addr &($mymulti->inm_addr)
+	printf " (ifp %p [%s%d] ifma %p) ", $ifp, $ifp->if_name, \
+	    $ifp->if_unit, $mymulti->inm_ifma
+	printf "\n"
+	set $mymulti = $mymulti->inm_link.le_next
+	set $myi = $myi + 1
+   end
+end
+
+document showinmultiaddrs
+Syntax showinmultiaddrs
+| show the contents of IPv4 multicast address records
+end
+
+define showin6multiaddrs
+   set $in6_multi = (struct in6_multi *)(in6_multihead->lh_first)
+   set $mymulti = $in6_multi
+   set $myi = 0
+   while ($mymulti != 0)
+	set $ifp = (struct ifnet *)$mymulti->in6m_ifp
+	printf "%2d. %p ", $myi, $mymulti
+	show_in6_addr &($mymulti->in6m_addr)
+	printf " (ifp %p [%s%d] ifma %p) ", $ifp, $ifp->if_name, \
+	    $ifp->if_unit, $mymulti->in6m_ifma
+	printf "\n"
+	set $mymulti = $mymulti->in6m_entry.le_next
+	set $myi = $myi + 1
+   end
+end
+
+document showin6multiaddrs
+Syntax showin6multiaddrs
+| show the contents of IPv6 multicast address records
+end
+
 define showsockaddr
     set $mysock = (struct sockaddr *)$arg0
     set $showsockaddr_handled = 0
@@ -6116,10 +7900,10 @@ define showifflags
 	    end
 	    printf "POINTTOPOINT"
 	end
-#	if ($flags & 0x20)
-#	    if ($first == 1)
+##	if ($flags & 0x20)
+##	    if ($first == 1)
 #		set $first = 0
-#	    else
+##	    else
 #	    	printf ","
 #	    end
 #	    printf "NOTRAILERS"
@@ -6212,7 +7996,7 @@ define showifaddrs
    set $myifaddr = (struct ifaddr *)$ifp->if_addrhead->tqh_first
    set $myi = 0
    while ($myifaddr != 0)
-	printf "\t%d. ", $myi
+	printf "\t%d. %p ", $myi, $myifaddr
 	showsockaddr $myifaddr->ifa_addr
 	printf " [%d]\n", $myifaddr->ifa_refcnt
 	set $myifaddr = $myifaddr->ifa_link->tqe_next
@@ -6230,7 +8014,7 @@ define ifconfig
    if ($argc == 1)
 	set $ifconfig_all = 1
    end
-   set $ifp = (struct ifnet *)(ifnet->tqh_first)
+   set $ifp = (struct ifnet *)(ifnet_head->tqh_first)
    while ($ifp != 0)
 	printf "%s%d: flags=%hx", $ifp->if_name, $ifp->if_unit, (u_short)$ifp->if_flags
 	showifflags $ifp->if_flags
@@ -6250,6 +8034,44 @@ Syntax: (gdb) ifconfig
 | display ifconfig-like output, and print the (struct ifnet *) pointers for further inspection
 end
 
+set $DLIF_INUSE	= 0x1
+set $DLIF_REUSE	= 0x2
+
+define showifnets
+	set $all = 0
+	if ($argc == 1)
+		set $all = 1
+	end
+	set $dlifp = (struct dlil_ifnet *)(dlil_ifnet_head->tqh_first)
+	while ($dlifp != 0)
+		set $ifp = (struct ifnet *)$dlifp
+		if ($dlifp->dl_if_flags & $DLIF_REUSE)
+			printf "*"
+		end
+		if ($dlifp->dl_if_flags & $DLIF_INUSE)
+			printf "%s%d: ", $ifp->if_name, $ifp->if_unit
+		else
+			printf "[%s%d]: ", $ifp->if_name, $ifp->if_unit
+		end
+		printf "flags=%hx", (u_short)$ifp->if_flags
+		showifflags $ifp->if_flags
+		printf " index %d", $ifp->if_index
+		printf " mtu %d\n", $ifp->if_data.ifi_mtu
+		printf "\t(struct ifnet *)"
+		showptr $ifp
+		printf "\n"
+		if ($all == 1) 
+			showifaddrs $ifp
+		end
+		set $dlifp = $dlifp->dl_if_link->tqe_next
+	end
+end
+
+document showifnets
+Syntax: (gdb) showifnets
+| Display ifconfig-like output for all attached and detached interfaces
+end
+
 define _show_unix_domain_socket
 	set $so = (struct socket *)$arg0
 	set $pcb = (struct unpcb *)$so->so_pcb
@@ -6663,7 +8485,9 @@ set $UDBHASHSIZE=16
 
 define _dump_pcbinfo
 	set $snd_cc = 0
+	set $snd_buf = (unsigned int)0
 	set $rcv_cc = 0
+	set $rcv_buf = (unsigned int)0
 	set $pcbseen = 0
 	set $pcbi = (struct inpcbinfo *)$arg0
 	printf "lastport %d lastlow %d lasthi %d\n", \
@@ -6691,7 +8515,23 @@ define _dump_pcbinfo
 				set $so = (struct socket *)$pcb->inp_socket
 				if $so != 0
 					set $snd_cc += $so->so_snd.sb_cc
-					set $rcv_cc += $so-> so_rcv.sb_cc
+					set $mp = $so->so_snd.sb_mb
+					while $mp
+						set $snd_buf += 256
+						if ($mp->m_hdr.mh_flags & 0x01)
+							set $snd_buf += $mp->M_dat.MH.MH_dat.MH_ext.ext_size
+						end
+						set $mp = $mp->m_hdr.mh_next
+					end
+					set $rcv_cc += $so->so_rcv.sb_cc
+					set $mp = $so->so_rcv.sb_mb
+					while $mp
+						set $rcv_buf += 256
+						if ($mp->m_hdr.mh_flags & 0x01)
+							set $rcv_buf += $mp->M_dat.MH.MH_dat.MH_ext.ext_size
+						end
+						set $mp = $mp->m_hdr.mh_next
+					end
 				end
 				set $pcb0 = $pcb0->inp_hash.le_next
 				printf "\n"
@@ -6702,6 +8542,7 @@ define _dump_pcbinfo
 		set $head = *(uintptr_t *)$hashbase
 	end
 	printf "total seen %ld snd_cc %ld rcv_cc %ld\n", $pcbseen, $snd_cc, $rcv_cc
+	printf "total snd_buf %u rcv_buf %u \n", (unsigned int)$snd_buf, (unsigned int)$rcv_buf
 	printf "port hash base is %p\n", $pcbi->porthashbase
 	set $i = 0
 	set $hashbase = $pcbi->porthashbase
@@ -6922,10 +8763,16 @@ define showvnodeint
 
 	showptr $kgm_vnode->v_parent
     printf "  "
-	if $kgm_vnode->v_name != 0
+	if ($kgm_vnode->v_name != 0)
 		printf "%s\n", $kgm_vnode->v_name
-	else
-		printf "\n"
+	else 
+		# If this is HFS vnode, get name from the cnode
+		if ($kgm_vnode->v_tag == 16) 
+			set $kgm_cnode = (struct cnode *)$kgm_vnode->v_data
+			printf "hfs: %s\n", (char *)$kgm_cnode->c_desc->cd_nameptr
+		else 
+			printf "\n"
+		end
 	end
 end
 
@@ -7195,24 +9042,25 @@ end
 
 
 define showstacksaftertask
-    set $kgm_head_taskp = &default_pset.tasks
+    set $kgm_head_taskp = &tasks
     set $kgm_taskp = (struct task *)$arg0
+    set $kgm_taskp = (struct task *)$kgm_taskp->tasks.next
     while $kgm_taskp != $kgm_head_taskp
-        showtaskheader
-        showtaskint $kgm_taskp
-        set $kgm_head_actp = &($kgm_taskp->threads)
-        set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) 
-        while $kgm_actp != $kgm_head_actp
-            showactheader
-            if ($decode_wait_events > 0)
-               showactint $kgm_actp 1
-            else
-               showactint $kgm_actp 2
-            end
-            set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
-        end
-        printf "\n"
-        set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next)
+	showtaskheader
+	showtaskint $kgm_taskp
+	set $kgm_head_actp = &($kgm_taskp->threads)
+	set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
+	while $kgm_actp != $kgm_head_actp
+	    showactheader
+	    if ($decode_wait_events > 0)
+	       showactint $kgm_actp 1
+	    else
+	       showactint $kgm_actp 2
+	    end
+	    set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
+	end
+	printf "\n"
+	set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next)
     end
 end
 document showstacksaftertask
@@ -7222,20 +9070,19 @@ Syntax: (gdb) showstacksaftertask <task>
 end
 
 define showpmworkqueueint
-    set $kgm_pm_wq = (IOPMWorkQueue *)$arg0
-    set $kgm_pm_node = (IOService *)$kgm_pm_wq->owner
-    showptr $kgm_pm_wq
-    printf "  "
-    showptr $kgm_pm_node
-    printf "  "
-    printf "%02d  ", $kgm_pm_node->pwrMgt->CurrentPowerState
-    printf "%02d  ", $kgm_pm_node->pwrMgt->MachineState
-    printf "%02d  ", $kgm_pm_node->pwrMgt->WaitReason
-    printf "%s\n", $kgm_pm_node->pwrMgt->Name
-    set $kgm_pm_queue = &($kgm_pm_wq->fWorkQueue)
-    set $kgm_pm_req = (IOPMRequest *)$kgm_pm_queue->next
-    if ((queue_entry_t) $kgm_pm_req != (queue_entry_t) $kgm_pm_queue)
-        printf "\n"
+    set $kgm_pm_workqueue = (IOPMWorkQueue *)$arg0
+    set $kgm_pm_wq = &($kgm_pm_workqueue->fWorkQueue)
+    set $kgm_pm_wqe = (IOServicePM *)$kgm_pm_wq->next
+    while ((queue_entry_t) $kgm_pm_wqe != (queue_entry_t) $kgm_pm_wq)
+        printf "service   "
+        showptrhdrpad
+        printf "  ps  ms  wr  name\n"
+        showptr $kgm_pm_wqe->Owner
+        printf "  "
+        printf "%02d  ", $kgm_pm_wqe->CurrentPowerState
+        printf "%02d  ", $kgm_pm_wqe->MachineState
+        printf "%02d  ", $kgm_pm_wqe->WaitReason
+        printf "%s\n", $kgm_pm_wqe->Name
         printf "request   "
         showptrhdrpad
         printf "  type  next      "
@@ -7243,158 +9090,179 @@ define showpmworkqueueint
         printf "  root      "
         showptrhdrpad
         printf "  work_wait   free_wait\n"
-        while ((queue_entry_t) $kgm_pm_req != (queue_entry_t) $kgm_pm_queue)
-            showptr $kgm_pm_req
-            printf "  0x%02x  ", $kgm_pm_req->fType
-            showptr $kgm_pm_req->fRequestNext
+        set $kgm_pm_rq = &($kgm_pm_wqe->RequestHead)
+        set $kgm_pm_rqe = (IOPMRequest *)$kgm_pm_rq->next
+        while ((queue_entry_t) $kgm_pm_rqe != (queue_entry_t) $kgm_pm_rq)
+            showptr $kgm_pm_rqe
+            printf "  0x%02x  ", $kgm_pm_rqe->fType
+            showptr $kgm_pm_rqe->fRequestNext
             printf "  "
-            showptr $kgm_pm_req->fRequestRoot
-            printf "  0x%08x  0x%08x\n", $kgm_pm_req->fWorkWaitCount, $kgm_pm_req->fFreeWaitCount
+            showptr $kgm_pm_rqe->fRequestRoot
+            printf "  0x%08x  0x%08x\n", $kgm_pm_rqe->fWorkWaitCount, $kgm_pm_rqe->fFreeWaitCount
             showptrhdrpad
             printf "            args  "
-            showptr $kgm_pm_req->fArg0
+            showptr $kgm_pm_rqe->fArg0
             printf "  "
-            showptr $kgm_pm_req->fArg1
+            showptr $kgm_pm_rqe->fArg1
             printf "  "
-            showptr $kgm_pm_req->fArg2
+            showptr $kgm_pm_rqe->fArg2
             printf "\n"
-            set $kgm_pm_req = (IOPMRequest *)$kgm_pm_req->fCommandChain.next
+            set $kgm_pm_rqe = (IOPMRequest *)$kgm_pm_rqe->fCommandChain.next
         end
         printf "\n"
+        set $kgm_pm_wqe = (IOServicePM *)$kgm_pm_wqe->WorkChain.next
     end
 end
 
-define showallpmworkqueues
-    set $kgm_pm_next = gIOPMWorkLoop->eventChain
-    printf "queue     "
-    showptrhdrpad
-    printf "  owner     "
-    showptrhdrpad
-    printf "  ps  ms  wr  name\n"
-    while ( $kgm_pm_next )
-        set $kgm_vt = *((void **) $kgm_pm_next)
-        if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm)
-            set $kgm_vt = $kgm_vt -  2 * sizeof(void *)
-        end
-        if ($kgm_vt == &_ZTV13IOPMWorkQueue)
-            showpmworkqueueint $kgm_pm_next
-        end
-        set $kgm_pm_next = $kgm_pm_next->eventChainNext
+define showpmworkqueue
+    printf "IOPMWorkQueue "
+    showptr gIOPMWorkQueue
+    printf " length "
+    printf "%u", gIOPMWorkQueue->fQueueLength
+    printf "\n"
+    if (gIOPMWorkQueue->fQueueLength > 0)
+        showpmworkqueueint gIOPMWorkQueue
     end
 end
 
-document showallpmworkqueues
-Syntax: (gdb) showallpmworkqueues
-| Display info about all IOPMWorkQueue objects
+document showpmworkqueue
+Syntax: (gdb) showpmworkqueue
+| Display the IOPMWorkQueue object
 end
 
 define showioservicepm
-	set $kgm_iopmpriv = (IOServicePM *)$arg0
-	printf "{ "
-	printf "MachineState = %d (", $kgm_iopmpriv->MachineState
-	if ( $kgm_iopmpriv->MachineState == 1 )
-		printf "kIOPM_OurChangeTellClientsPowerDown"
-	else
-		if ( $kgm_iopmpriv->MachineState == 2 )
-			printf "kIOPM_OurChangeTellPriorityClientsPowerDown"
-		else
-			if ( $kgm_iopmpriv->MachineState == 3 )
-				printf "kIOPM_OurChangeNotifyInterestedDriversWillChange"
-			else
-				if ( $kgm_iopmpriv->MachineState == 4 )
-					printf "kIOPM_OurChangeSetPowerState"
-				else
-					if ( $kgm_iopmpriv->MachineState == 5 )
-						printf "kIOPM_OurChangeWaitForPowerSettle"
-					else
-						if ( $kgm_iopmpriv->MachineState == 6 )
-							printf "kIOPM_OurChangeNotifyInterestedDriversDidChange"
-						else
-							if ( $kgm_iopmpriv->MachineState == 7 )
-								printf "kIOPM_OurChangeFinish"
-							else
-								if ( $kgm_iopmpriv->MachineState == 8 )
-									printf "kIOPM_ParentDownTellPriorityClientsPowerDown"
-								else
-									if ( $kgm_iopmpriv->MachineState == 9 )
-										printf "kIOPM_ParentDownNotifyInterestedDriversWillChange"
-									else
-										if ( $kgm_iopmpriv->MachineState == 10 )
-											printf "Unused_MachineState_10"
-										else
-											if ( $kgm_iopmpriv->MachineState == 11 )
-												printf "kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange"
-											else
-												if ( $kgm_iopmpriv->MachineState == 12 )
-													printf "kIOPM_ParentDownSetPowerState"
-												else
-													if ( $kgm_iopmpriv->MachineState == 13 )
-														printf "kIOPM_ParentDownWaitForPowerSettle"
-													else
-														if ( $kgm_iopmpriv->MachineState == 14 )
-															printf "kIOPM_ParentDownAcknowledgeChange"
-														else
-															if ( $kgm_iopmpriv->MachineState == 15)
-																printf "kIOPM_ParentUpSetPowerState"
-															else
-																if ( $kgm_iopmpriv->MachineState == 16)
-																	printf "Unused_MachineState_16"
-																else
-																	if ( $kgm_iopmpriv->MachineState == 17)
-																		printf "kIOPM_ParentUpWaitForSettleTime"
-																	else
-																		if ( $kgm_iopmpriv->MachineState == 18)
-																			printf "kIOPM_ParentUpNotifyInterestedDriversDidChange"
-																		else
-																			if ( $kgm_iopmpriv->MachineState == 19)
-																				printf "kIOPM_ParentUpAcknowledgePowerChange"
-																			else
-																				if ( $kgm_iopmpriv->MachineState == 20)
-																					printf "kIOPM_Finished"
-                                                                                else
-                                                                                    if ( $kgm_iopmpriv->MachineState == 21)
-                                                                                        printf "kIOPM_DriverThreadCallDone"
-                                                                                    else
-                                                                                        if ( $kgm_iopmpriv->MachineState == 22)
-                                                                                            printf "kIOPM_NotifyChildrenDone"
-                                                                                        end
-                                                                                    end
-																				end
-																			end
-																		end
-																	end
-																end
-															end
-														end
-													end
-												end
-											end
-										end
-									end
-								end
-							end
-						end
-					end
-				end
-			end
-		end
+    set $kgm_iopmpriv = (IOServicePM *)$arg0
+    printf "{ "
+    printf "MachineState = %d (", $kgm_iopmpriv->MachineState
+    if ( $kgm_iopmpriv->MachineState == 0 )
+        printf "kIOPM_Finished"
+    else
+    if ( $kgm_iopmpriv->MachineState == 1 )
+        printf "kIOPM_OurChangeTellClientsPowerDown"
+    else
+    if ( $kgm_iopmpriv->MachineState == 2 )
+        printf "kIOPM_OurChangeTellPriorityClientsPowerDown"
+    else
+    if ( $kgm_iopmpriv->MachineState == 3 )
+        printf "kIOPM_OurChangeNotifyInterestedDriversWillChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 4 )
+        printf "kIOPM_OurChangeSetPowerState"
+    else
+    if ( $kgm_iopmpriv->MachineState == 5 )
+        printf "kIOPM_OurChangeWaitForPowerSettle"
+    else
+    if ( $kgm_iopmpriv->MachineState == 6 )
+        printf "kIOPM_OurChangeNotifyInterestedDriversDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 7 )
+        printf "kIOPM_OurChangeTellCapabilityDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 8 )
+        printf "kIOPM_OurChangeFinish"
+    else
+    if ( $kgm_iopmpriv->MachineState == 9 )
+        printf "Unused_MachineState_9"
+    else
+    if ( $kgm_iopmpriv->MachineState == 10 )
+        printf "kIOPM_ParentChangeTellPriorityClientsPowerDown"
+    else
+    if ( $kgm_iopmpriv->MachineState == 11 )
+        printf "kIOPM_ParentChangeNotifyInterestedDriversWillChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 12 )
+        printf "kIOPM_ParentChangeSetPowerState"
+    else
+    if ( $kgm_iopmpriv->MachineState == 13 )
+        printf "kIOPM_ParentChangeWaitForPowerSettle"
+    else
+    if ( $kgm_iopmpriv->MachineState == 14)
+        printf "kIOPM_ParentChangeNotifyInterestedDriversDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 15)
+        printf "kIOPM_ParentChangeTellCapabilityDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 16)
+        printf "kIOPM_ParentChangeAcknowledgePowerChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 17)
+        printf "kIOPM_NotifyChildrenStart"
+    else
+    if ( $kgm_iopmpriv->MachineState == 18)
+        printf "kIOPM_NotifyChildrenOrdered"
+    else
+    if ( $kgm_iopmpriv->MachineState == 19)
+        printf "kIOPM_NotifyChildrenDelayed"
+    else
+    if ( $kgm_iopmpriv->MachineState == 20)
+        printf "kIOPM_SyncTellClientsPowerDown"
+    else
+    if ( $kgm_iopmpriv->MachineState == 21)
+        printf "kIOPM_SyncTellPriorityClientsPowerDown"
+    else
+    if ( $kgm_iopmpriv->MachineState == 22)
+        printf "kIOPM_SyncNotifyWillChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 23)
+        printf "kIOPM_SyncNotifyDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 24)
+        printf "kIOPM_SyncTellCapabilityDidChange"
+    else
+    if ( $kgm_iopmpriv->MachineState == 25)
+        printf "kIOPM_SyncFinish"
+    else
+    if ( $kgm_iopmpriv->MachineState == 26)
+        printf "kIOPM_TellCapabilityChangeDone"
+    else
+    if ( $kgm_iopmpriv->MachineState == 27)
+        printf "kIOPM_DriverThreadCallDone"
+    else
+        printf "Unknown_MachineState"
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
+    end
 	end
 	printf "), "
 	
 	if ( $kgm_iopmpriv->MachineState != 20 )
         printf "DriverTimer = %d, ",(unsigned int)$kgm_iopmpriv->DriverTimer
         printf "SettleTime  = %d, ",(unsigned int)$kgm_iopmpriv->SettleTimeUS
-        printf "HeadNoteFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteFlags
+        printf "HeadNoteFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteChangeFlags
         printf "HeadNotePendingAcks = %x, ",(unsigned int)$kgm_iopmpriv->HeadNotePendingAcks
 	end
 
-    if ( $kgm_iopmpriv->DeviceOverrides != 0 )
+    if ( $kgm_iopmpriv->DeviceOverrideEnabled != 0 )
         printf"DeviceOverrides, "
     end
 	
     printf "DeviceDesire = %d, ",(unsigned int)$kgm_iopmpriv->DeviceDesire
     printf "DesiredPowerState = %d, ",(unsigned int)$kgm_iopmpriv->DesiredPowerState
-    printf "PreviousRequest = %d }\n",(unsigned int)$kgm_iopmpriv->PreviousRequest
+    printf "PreviousRequest = %d }\n",(unsigned int)$kgm_iopmpriv->PreviousRequestPowerFlags
 end
 
 document showioservicepm
@@ -7506,25 +9374,29 @@ Syntax: (gdb) showregistrypmstate
 end
 
 define showstacksafterthread
-    set $kgm_head_taskp = &default_pset.tasks
+    set $kgm_head_taskp = &tasks
     set $kgm_actp = (struct thread *)$arg0
     set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
     set $kgm_taskp = (struct task *)$kgm_actp->task
     while $kgm_taskp != $kgm_head_taskp
-        showtaskheader
-        showtaskint $kgm_taskp
-        set $kgm_head_actp = &($kgm_taskp->threads)
-        while $kgm_actp != $kgm_head_actp
-            showactheader
-            if ($decode_wait_events > 0)
-               showactint $kgm_actp 1
-            else
-               showactint $kgm_actp 2
-            end
-            set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
-        end
-        printf "\n"
-        set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) 
+	showtaskheader
+	showtaskint $kgm_taskp
+	set $kgm_head_actp = &($kgm_taskp->threads)
+	if $kgm_actp == 0
+	    set $kgm_actp = (struct thread *)($kgm_taskp->threads.next)
+	end
+	while $kgm_actp != $kgm_head_actp
+	    showactheader
+	    if ($decode_wait_events > 0)
+	       showactint $kgm_actp 1
+	    else
+	       showactint $kgm_actp 2
+	    end
+	    set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next)
+	end
+	printf "\n"
+	set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next)
+	set $kgm_actp = 0
     end
 end
 
@@ -7617,17 +9489,17 @@ define _pt_step
     set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index
     readphysint $kgm_entryp 64 $kgm_lcpu_self
     set $entry = $kgm_readphysint_result
-    if $kgm_pt_verbose == 2
+    if $kgm_pt_verbose >= 3
         set $kgm_pte_loop = 0
-	while $kgm_pte_loop < 512
-	    set $kgm_pt_paddr_tmp = $kgm_pt_paddr + $kgm_pte_loop*8
-	    readphys64 $kgm_pt_paddr_tmp
-	    set $kgm_pte_loop = $kgm_pte_loop + 1
-	end
+        while $kgm_pte_loop < 512
+            set $kgm_pt_paddr_tmp = $kgm_pt_paddr + $kgm_pte_loop*8
+            readphys64 $kgm_pt_paddr_tmp
+            set $kgm_pte_loop = $kgm_pte_loop + 1
+        end
     end
     set $kgm_paddr_mask = ~((0xfffULL<<52) | 0xfffULL)
     set $kgm_paddr_largemask = ~((0xfffULL<<52) | 0x1fffffULL)
-    if $kgm_pt_verbose == 0
+    if $kgm_pt_verbose < 2
         if $entry & (0x1 << 0)
             set $kgm_pt_valid = 1
             if $entry & (0x1 << 7)
@@ -7636,7 +9508,7 @@ define _pt_step
             else
                 set $kgm_pt_large = 0
                 set $kgm_pt_paddr = $entry & $kgm_paddr_mask
-            end	    
+            end     
         else
             set $kgm_pt_valid = 0
             set $kgm_pt_large = 0
@@ -7645,7 +9517,7 @@ define _pt_step
     else
         printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry
         if $entry & (0x1 << 0)
-            printf "valid"	
+            printf "valid"      
             set $kgm_pt_paddr = $entry & $kgm_paddr_mask
             set $kgm_pt_valid = 1
         else
@@ -7689,17 +9561,16 @@ define _pt_step
         if $entry & (0x3 << 9)
             printf " avail:0x%x", ($entry >> 9) & 0x3
         end
-        if $entry & (0x1 << 63)
+        if $entry & (0x1ULL << 63)
             printf " noexec" 
         end
         printf "\n"
     end
 end
 
-define _pmap_walk
-    set $kgm_pmap = (pmap_t) $arg0
+define _pml4_walk
+    set $kgm_pt_paddr = $arg0
     set $kgm_vaddr = $arg1
-    set $kgm_pt_paddr = $kgm_pmap->pm_cr3
     set $kgm_pt_valid = $kgm_pt_paddr != 0
     set $kgm_pt_large = 0
     set $kgm_pframe_offset = 0
@@ -7707,7 +9578,7 @@ define _pmap_walk
         # Look up bits 47:39 of the linear address in PML4T
         set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL
         set $kgm_pframe_offset = $kgm_vaddr & 0x7fffffffffULL
-        if $kgm_pt_verbose
+        if $kgm_pt_verbose >= 2
             printf "pml4 (index %d):\n", $kgm_pt_index
         end
         _pt_step
@@ -7716,7 +9587,7 @@ define _pmap_walk
         # Look up bits 38:30 of the linear address in PDPT
         set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL
         set $kgm_pframe_offset = $kgm_vaddr & 0x3fffffffULL
-        if $kgm_pt_verbose
+        if $kgm_pt_verbose >= 2
             printf "pdpt (index %d):\n", $kgm_pt_index
         end
         _pt_step
@@ -7725,7 +9596,7 @@ define _pmap_walk
         # Look up bits 29:21 of the linear address in PDT
         set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL
         set $kgm_pframe_offset = $kgm_vaddr & 0x1fffffULL
-        if $kgm_pt_verbose
+        if $kgm_pt_verbose >= 2
             printf "pdt (index %d):\n", $kgm_pt_index
         end
         _pt_step
@@ -7734,37 +9605,60 @@ define _pmap_walk
         # Look up bits 20:21 of the linear address in PT
         set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL
         set $kgm_pframe_offset = $kgm_vaddr & 0xfffULL
-        if $kgm_pt_verbose
+        if $kgm_pt_verbose >= 2
             printf "pt (index %d):\n", $kgm_pt_index
         end
         _pt_step
     end
+
     if $kgm_pt_valid
         set $kgm_paddr = $kgm_pt_paddr + $kgm_pframe_offset
-        readphysint $kgm_paddr 32 $kgm_lcpu_self
-        set $kgm_value = $kgm_readphysint_result
-        printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+        set $kgm_paddr_isvalid = 1
     else
         set $kgm_paddr = 0
-        printf "(no translation)\n"
+        set $kgm_paddr_isvalid = 0
+    end
+
+    if $kgm_pt_verbose >= 1
+        if $kgm_paddr_isvalid
+            readphysint $kgm_paddr 32 $kgm_lcpu_self
+            set $kgm_value = $kgm_readphysint_result
+            printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value
+        else
+            printf "(no translation)\n"
+        end
     end
 end
 
+define _pmap_walk_x86
+    set $kgm_pmap = (pmap_t) $arg0
+    _pml4_walk $kgm_pmap->pm_cr3 $arg1
+end
+
+define _pmap_walk_arm
+    set $kgm_paddr = 0
+    set $kgm_paddr_isvalid = 0
+end
+
 define pmap_walk
-    if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any)
-        printf "Not available for current architecture.\n"
+    if $argc != 2
+        printf "pmap_walk <pmap> <vaddr>\n"
     else
-        if $argc != 2
-            printf "pmap_walk <pmap> <vaddr>\n"
+        if !$kgm_pt_verbose
+            set $kgm_pt_verbose = 2
+        else
+            if $kgm_pt_verbose > 3
+                set $kgm_pt_verbose = 2
+            end
+        end
+        if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
+            _pmap_walk_x86 $arg0 $arg1
         else
-            if !$kgm_pt_verbose
-                set $kgm_pt_verbose = 1
+            if ($kgm_mtype == $kgm_mtype_arm)
+                _pmap_walk_arm $arg0 $arg1
             else
-                if $kgm_pt_verbose != 2
-                    set $kgm_pt_verbose = 1
-                end
+                printf "Not available for current architecture.\n"
             end
-            _pmap_walk $arg0 $arg1
         end
     end
 end
@@ -7772,18 +9666,27 @@ end
 document pmap_walk
 Syntax: (gdb) pmap_walk <pmap> <virtual_address>
 | Perform a page-table walk in <pmap> for <virtual_address>.
-| Set $kgm_pt_verbose=2 for full hex dump of page tables.
+| Set:
+|     $kgm_pt_verbose=0 for no output, $kgm_paddr will be set
+|                       if $kgm_paddr_isvalid is 1
+|     $kgm_pt_verbose=1 for final physical address
+|     $kgm_pt_verbose=2 for dump of page table entry.
+|     $kgm_pt_verbose=3 for full hex dump of page tables.
 end
 
 define pmap_vtop
-    if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any)
-        printf "Not available for current architecture.\n"
+    if $argc != 2
+        printf "pmap_vtop <pamp> <vaddr>\n"
     else
-        if $argc != 2
-            printf "pmap_vtop <pamp> <vaddr>\n"
+        set $kgm_pt_verbose = 1
+        if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
+            _pmap_walk_x86 $arg0 $arg1
         else
-            set $kgm_pt_verbose = 0
-            _pmap_walk $arg0 $arg1
+            if ($kgm_mtype == $kgm_mtype_arm)
+                _pmap_walk_arm $arg0 $arg1
+            else
+                printf "Not available for current architecture.\n"
+            end
         end
     end
 end
@@ -7971,7 +9874,12 @@ end
 # in the kernel's address space and use that instead. Don't rely on
 # kdp_pmap between invocations of map/unmap. Since the shadow
 # codepath uses a manual KDP packet, request no more than 128 bytes.
-# Uses $kgm_lp64 for kernel address space size
+# Uses $kgm_lp64 for kernel address space size, and
+# $kgm_readphys_use_kdp/$kgm_readphys_force_physmap to override
+# how the user pages are accessed ($kgm_readphys_force_physmap
+# implies walking the user task's pagetables to get a physical
+# address and then shadowing data from there using the
+# physical mapping of memory).
 define _map_user_data_from_task
     set $kgm_map_user_taskp = (task_t)$arg0
     set $kgm_map_user_map = $kgm_map_user_taskp->map
@@ -7980,47 +9888,117 @@ define _map_user_data_from_task
     set $kgm_map_user_window = 0
     set $kgm_map_switch_map = 0
     
-    if $kgm_lp64
-        set $kgm_map_switch_map = 1
+    if ($kgm_readphys_force_kdp != 0)
+        set $kgm_readphys_use_kdp = 1
     else
-        if !$kgm_map_user_task_64
-            set $kgm_map_switch_map = 1
+        if ($kgm_readphys_force_physmap)
+            set $kgm_readphys_use_kdp = 0
+        else
+            set $kgm_readphys_use_kdp = ( kdp->is_conn > 0 )
         end
     end
-    
-    if ($kgm_map_switch_map)
-        # switch the map safely
-        set $kgm_map_user_window = $arg1
-        set kdp_pmap = $kgm_map_user_pmap
-    else
-        # requires shadowing/copying
 
-        # set up the manual KDP packet
-        set manual_pkt.input = 0
-        set manual_pkt.len = sizeof(kdp_readmem64_req_t)
-        set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data
-        set $kgm_pkt->hdr.request = KDP_READMEM64
-        set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t)
-        set $kgm_pkt->hdr.is_reply = 0
-        set $kgm_pkt->hdr.seq = 0
-        set $kgm_pkt->hdr.key = 0
-        set $kgm_pkt->address = (uint64_t)$arg1
-        set $kgm_pkt->nbytes = (uint32_t)$arg2
+    if ($kgm_readphys_use_kdp)
 
-        set kdp_pmap = $kgm_map_user_pmap
-        set manual_pkt.input = 1
-        # dummy to make sure manual packet is executed
-        set $kgm_dummy = &_mh_execute_header
-        # Go back to kernel map so that we can access buffer directly
-        set kdp_pmap = 0
+        if $kgm_lp64
+            set $kgm_map_switch_map = 1
+        else
+            if !$kgm_map_user_task_64
+                set $kgm_map_switch_map = 1
+            end
+        end
+    
+        if ($kgm_map_switch_map)
+            # switch the map safely
+            set $kgm_map_user_window = $arg1
+            set kdp_pmap = $kgm_map_user_pmap
+        else
+            # requires shadowing/copying
+
+            # set up the manual KDP packet
+            set manual_pkt.input = 0
+            set manual_pkt.len = sizeof(kdp_readmem64_req_t)
+            set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data
+            set $kgm_pkt->hdr.request = KDP_READMEM64
+            set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t)
+            set $kgm_pkt->hdr.is_reply = 0
+            set $kgm_pkt->hdr.seq = 0
+            set $kgm_pkt->hdr.key = 0
+            set $kgm_pkt->address = (uint64_t)$arg1
+            set $kgm_pkt->nbytes = (uint32_t)$arg2
+
+            set kdp_pmap = $kgm_map_user_pmap
+            set manual_pkt.input = 1
+            # dummy to make sure manual packet is executed
+            set $kgm_dummy = &_mh_execute_header
+            # Go back to kernel map so that we can access buffer directly
+            set kdp_pmap = 0
+
+            set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data
+            if ($kgm_pkt->error == 0)
+                set $kgm_map_user_window = $kgm_pkt->data
+            else
+                set $kgm_map_user_window = 0
+            end
+        end
 
-        set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data
-        if ($kgm_pkt->error == 0)
-            set $kgm_map_user_window = $kgm_pkt->data
+    else
+        # without the benefit of a KDP stub on the target, try to
+        # find the user task's physical mapping and memcpy the data.
+        # If it straddles a page boundary, copy in two passes
+        set $kgm_vaddr_range1_start = (unsigned long long)$arg1
+        set $kgm_vaddr_range1_count = (unsigned long long)$arg2
+        if (($kgm_vaddr_range1_start + $kgm_vaddr_range1_count) & 0xFFF) < $kgm_vaddr_range1_count
+            set $kgm_vaddr_range2_start = ($kgm_vaddr_range1_start + $kgm_vaddr_range1_count) & ~((unsigned long long)0xFFF)
+            set $kgm_vaddr_range2_count = $kgm_vaddr_range1_start + $kgm_vaddr_range1_count - $kgm_vaddr_range2_start
+            set $kgm_vaddr_range1_count = $kgm_vaddr_range2_start - $kgm_vaddr_range1_start
         else
-            set $kgm_map_user_window = 0
+            set $kgm_vaddr_range2_start = 0
+            set $kgm_vaddr_range2_count = 0
         end
+        set $kgm_paddr_range1_in_kva = 0
+        set $kgm_paddr_range2_in_kva = 0
 
+        if ($kgm_mtype == $kgm_mtype_x86_64)
+            set $kgm_pt_verbose = 0
+            _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range1_start
+            if $kgm_paddr_isvalid
+                set $kgm_paddr_range1_in_kva = $kgm_paddr + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+            end
+            if $kgm_vaddr_range2_start
+                _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range2_start
+                if $kgm_paddr_isvalid
+                    set $kgm_paddr_range2_in_kva = $kgm_paddr + (((unsigned long long)-1 << 47) | ((unsigned long long)509 << 39))
+                end
+            end
+        else
+            if ($kgm_mtype == $kgm_mtype_arm)
+                set $kgm_pt_verbose = 0
+                _pmap_walk_arm $kgm_map_user_pmap $kgm_vaddr_range1_start
+                if $kgm_paddr_isvalid
+                   set $kgm_paddr_range1_in_kva = $kgm_paddr - gPhysBase + gVirtBase
+                end
+                if $kgm_vaddr_range2_start
+                    _pmap_walk_arm $kgm_map_user_pmap $kgm_vaddr_range2_start
+                    if $kgm_paddr_isvalid
+                        set $kgm_paddr_range2_in_kva = $kgm_paddr - gPhysBase + gVirtBase
+                    end
+                end
+            else
+                printf "Not available for current architecture.\n"
+                set $kgm_paddr_isvalid = 0
+            end
+        end
+        if $kgm_paddr_range1_in_kva
+            set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data
+            memcpy $kgm_pkt->data $kgm_paddr_range1_in_kva $kgm_vaddr_range1_count
+            if $kgm_paddr_range2_in_kva
+                memcpy &$kgm_pkt->data[$kgm_vaddr_range1_count] $kgm_paddr_range2_in_kva $kgm_vaddr_range2_count
+            end
+            set $kgm_map_user_window  = $kgm_pkt->data
+        else
+            set $kgm_map_user_window  = 0
+        end
     end
 end
 
@@ -8032,6 +10010,10 @@ end
 define _print_path_for_image
     set $kgm_print_path_address = (unsigned long long)$arg0
     set $kgm_path_str_notdone = 1
+
+    if ($kgm_print_path_address == 0)
+       set $kgm_path_str_notdone = 0
+    end
     
     while $kgm_path_str_notdone
         _map_user_data_from_task $kgm_taskp $kgm_print_path_address 32
@@ -8045,7 +10027,7 @@ define _print_path_for_image
         
         _unmap_user_data_from_task $kgm_taskp
         
-        # if we terminated on NUL, break out
+        # break out if we terminated on NUL
         if $kgm_path_i < 32
             set $kgm_path_str_notdone = 0
         else
@@ -8054,7 +10036,7 @@ define _print_path_for_image
     end
 end
 
-# uses $kgm_taskp and $kgm_task_64
+# uses $kgm_taskp and $kgm_task_64. May modify $kgm_dyld_load_path
 define _print_image_info
     set $kgm_mh_image_address = (unsigned long long)$arg0
     set $kgm_mh_path_address = (unsigned long long)$arg1
@@ -8135,6 +10117,10 @@ define _print_image_info
 
             loop_break
         else
+            if $kgm_lc_cmd == 0xe
+                set $kgm_load_dylinker_data = $kgm_lc_data
+                set $kgm_dyld_load_path = $kgm_lc_address + *((unsigned int *)$kgm_load_dylinker_data)
+            end
             _unmap_user_data_from_task $kgm_taskp
         end
 
@@ -8184,20 +10170,24 @@ define _print_images_for_dyld_image_info
     set $kgm_task_64 = $arg1
     set $kgm_dyld_all_image_infos_address = (unsigned long long)$arg2
 
-    _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 16
+    _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 112
 
     set $kgm_dyld_all_image_infos = (unsigned int *)$kgm_map_user_window    
-    if ($kgm_dyld_all_image_infos[0] != 6)
-        printf "Invalid version number %d\n", $kgm_dyld_all_image_infos[0]
+    set $kgm_dyld_all_image_infos_version = $kgm_dyld_all_image_infos[0]
+    if ($kgm_dyld_all_image_infos_version > 12)
+        printf "Unknown dyld all_image_infos version number %d\n", $kgm_dyld_all_image_infos_version
     end
     set $kgm_image_info_count = $kgm_dyld_all_image_infos[1]
-    
+
+    set $kgm_dyld_load_path = 0    
     if $kgm_task_64
         set $kgm_image_info_size = 24
         set $kgm_image_info_array_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[1]
+        set $kgm_dyld_load_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[4]
     else
         set $kgm_image_info_size = 12
         set $kgm_image_info_array_address = ((unsigned int *)$kgm_dyld_all_image_infos)[2]
+        set $kgm_dyld_load_address = ((unsigned int *)$kgm_dyld_all_image_infos)[5]
     end
 
     _unmap_user_data_from_task $kgm_taskp
@@ -8222,28 +10212,33 @@ define _print_images_for_dyld_image_info
 
         set $kgm_image_info_i = $kgm_image_info_i + 1
     end
+
+    # $kgm_dyld_load_path may get set when the main executable is processed
+    # printf "[dyld] = image address %llx path address %llx\n", $kgm_dyld_load_address, $kgm_dyld_load_path
+    _print_image_info $kgm_dyld_load_address $kgm_dyld_load_path
+
 end
 
 define showuserlibraries
-    set $kgm_taskp = (task_t)$arg0
-    set $kgm_dyld_image_info = $kgm_taskp->all_image_info_addr
+	set $kgm_taskp = (task_t)$arg0
+	set $kgm_dyld_image_info = $kgm_taskp->all_image_info_addr
 
-    set $kgm_map = $kgm_taskp->map
-    set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000)
+	set $kgm_map = $kgm_taskp->map
+	set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000)
 
-    if ($kgm_dyld_image_info != 0)
-        printf "address   "
-        if $kgm_task_64
-            printf "        "
-        end
-        printf "  type       "
-        printf "  uuid                                  "
-        printf "path\n"
+	if ($kgm_dyld_image_info != 0)
+		printf "address   "
+		if $kgm_task_64
+			printf "        "
+		end
+		printf "  type       "
+		printf "  uuid                                  "
+		printf "path\n"
 
-        _print_images_for_dyld_image_info $kgm_taskp $kgm_task_64 $kgm_dyld_image_info
-    else
-        printf "No dyld shared library information available for task\n"
-    end
+		_print_images_for_dyld_image_info $kgm_taskp $kgm_task_64 $kgm_dyld_image_info
+	else
+		printf "No dyld shared library information available for task\n"
+	end
 end
 document showuserlibraries
 Syntax: (gdb) showuserlibraries <task_t>
@@ -8251,6 +10246,191 @@ Syntax: (gdb) showuserlibraries <task_t>
 | information about all Mach-O images.
 end
 
+define showuserdyldinfo
+	set $kgm_taskp = (task_t)$arg0
+	set $kgm_dyld_all_image_infos_address = (unsigned long long)$kgm_taskp->all_image_info_addr
+
+	set $kgm_map = $kgm_taskp->map
+	set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000)
+
+	if ($kgm_dyld_all_image_infos_address != 0)
+
+	   _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 112
+
+	   set $kgm_dyld_all_image_infos = (unsigned char *)$kgm_map_user_window
+	   set $kgm_dyld_all_image_infos_version = ((unsigned int *)$kgm_dyld_all_image_infos)[0]
+	   if ($kgm_dyld_all_image_infos_version > 12)
+		  printf "Unknown dyld all_image_infos version number %d\n", $kgm_dyld_all_image_infos_version
+	   end
+
+	   # Find fields by byte offset. We assume at least version 9 is supported
+	   if $kgm_task_64
+		  set $kgm_dyld_all_image_infos_infoArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[4])
+		  set $kgm_dyld_all_image_infos_infoArray = *(unsigned long long *)(&$kgm_dyld_all_image_infos[8])
+		  set $kgm_dyld_all_image_infos_notification = *(unsigned long long *)(&$kgm_dyld_all_image_infos[16])
+		  set $kgm_dyld_all_image_infos_processDetachedFromSharedRegion = *(unsigned char *)(&$kgm_dyld_all_image_infos[24])
+		  set $kgm_dyld_all_image_infos_libSystemInitialized = *(unsigned char *)(&$kgm_dyld_all_image_infos[25])
+		  set $kgm_dyld_all_image_infos_dyldImageLoadAddress = *(unsigned long long *)(&$kgm_dyld_all_image_infos[32])
+		  set $kgm_dyld_all_image_infos_jitInfo = *(unsigned long long *)(&$kgm_dyld_all_image_infos[40])
+		  set $kgm_dyld_all_image_infos_dyldVersion = *(unsigned long long *)(&$kgm_dyld_all_image_infos[48])
+		  set $kgm_dyld_all_image_infos_errorMessage = *(unsigned long long *)(&$kgm_dyld_all_image_infos[56])
+		  set $kgm_dyld_all_image_infos_terminationFlags = *(unsigned long long *)(&$kgm_dyld_all_image_infos[64])
+		  set $kgm_dyld_all_image_infos_coreSymbolicationShmPage = *(unsigned long long *)(&$kgm_dyld_all_image_infos[72])
+		  set $kgm_dyld_all_image_infos_systemOrderFlag = *(unsigned long long *)(&$kgm_dyld_all_image_infos[80])
+		  set $kgm_dyld_all_image_infos_uuidArrayCount = *(unsigned long long *)(&$kgm_dyld_all_image_infos[88])
+		  set $kgm_dyld_all_image_infos_uuidArray = *(unsigned long long *)(&$kgm_dyld_all_image_infos[96])
+		  set $kgm_dyld_all_image_infos_dyldAllImageInfosAddress = *(unsigned long long *)(&$kgm_dyld_all_image_infos[104])
+	   else
+		  set $kgm_dyld_all_image_infos_infoArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[4])
+		  set $kgm_dyld_all_image_infos_infoArray = *(unsigned int *)(&$kgm_dyld_all_image_infos[8])
+		  set $kgm_dyld_all_image_infos_notification = *(unsigned int *)(&$kgm_dyld_all_image_infos[12])
+		  set $kgm_dyld_all_image_infos_processDetachedFromSharedRegion = *(unsigned char *)(&$kgm_dyld_all_image_infos[16])
+		  set $kgm_dyld_all_image_infos_libSystemInitialized = *(unsigned char *)(&$kgm_dyld_all_image_infos[17])
+		  set $kgm_dyld_all_image_infos_dyldImageLoadAddress = *(unsigned int *)(&$kgm_dyld_all_image_infos[20])
+		  set $kgm_dyld_all_image_infos_jitInfo = *(unsigned int *)(&$kgm_dyld_all_image_infos[24])
+		  set $kgm_dyld_all_image_infos_dyldVersion = *(unsigned int *)(&$kgm_dyld_all_image_infos[28])
+		  set $kgm_dyld_all_image_infos_errorMessage = *(unsigned int *)(&$kgm_dyld_all_image_infos[32])
+		  set $kgm_dyld_all_image_infos_terminationFlags = *(unsigned int *)(&$kgm_dyld_all_image_infos[36])
+		  set $kgm_dyld_all_image_infos_coreSymbolicationShmPage = *(unsigned int *)(&$kgm_dyld_all_image_infos[40])
+		  set $kgm_dyld_all_image_infos_systemOrderFlag = *(unsigned int *)(&$kgm_dyld_all_image_infos[44])
+		  set $kgm_dyld_all_image_infos_uuidArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[48])
+		  set $kgm_dyld_all_image_infos_uuidArray = *(unsigned int *)(&$kgm_dyld_all_image_infos[52])
+		  set $kgm_dyld_all_image_infos_dyldAllImageInfosAddress = *(unsigned int *)(&$kgm_dyld_all_image_infos[56])
+	   end
+
+	   _unmap_user_data_from_task $kgm_taskp
+
+	   printf "                        version %u\n", $kgm_dyld_all_image_infos_version
+	   printf "                 infoArrayCount %u\n", $kgm_dyld_all_image_infos_infoArrayCount
+	   printf "                      infoArray "
+	   showuserptr $kgm_dyld_all_image_infos_infoArray
+	   printf "\n"
+	   printf "                   notification "
+	   showuserptr $kgm_dyld_all_image_infos_notification
+	   printf "\n"
+	   printf "processDetachedFromSharedRegion %d\n", $kgm_dyld_all_image_infos_processDetachedFromSharedRegion
+	   printf "           libSystemInitialized %d\n", $kgm_dyld_all_image_infos_libSystemInitialized
+	   printf "           dyldImageLoadAddress "
+	   showuserptr $kgm_dyld_all_image_infos_dyldImageLoadAddress
+	   printf "\n"
+	   printf "                        jitInfo "
+	   showuserptr $kgm_dyld_all_image_infos_jitInfo
+	   printf "\n"
+	   printf "                    dyldVersion "
+	   showuserptr $kgm_dyld_all_image_infos_dyldVersion
+	   printf "\n"
+	   printf "                                "
+	   _print_path_for_image $kgm_dyld_all_image_infos_dyldVersion
+	   printf "\n"
+
+	   printf "                   errorMessage "
+	   showuserptr $kgm_dyld_all_image_infos_errorMessage
+	   printf "\n"
+	   if $kgm_dyld_all_image_infos_errorMessage != 0
+		  printf "                                "
+		  _print_path_for_image $kgm_dyld_all_image_infos_errorMessage
+		  printf "\n"
+	   end
+
+	   printf "               terminationFlags "
+	   showuserptr $kgm_dyld_all_image_infos_terminationFlags
+	   printf "\n"
+	   printf "       coreSymbolicationShmPage "
+	   showuserptr $kgm_dyld_all_image_infos_coreSymbolicationShmPage
+	   printf "\n"
+	   printf "                systemOrderFlag "
+	   showuserptr $kgm_dyld_all_image_infos_systemOrderFlag
+	   printf "\n"
+	   printf "                 uuidArrayCount "
+	   showuserptr $kgm_dyld_all_image_infos_uuidArrayCount
+	   printf "\n"
+	   printf "                      uuidArray "
+	   showuserptr $kgm_dyld_all_image_infos_uuidArray
+	   printf "\n"
+	   printf "       dyldAllImageInfosAddress "
+	   showuserptr $kgm_dyld_all_image_infos_dyldAllImageInfosAddress
+	   printf "\n"
+	   printf "                                (currently "
+	   showuserptr $kgm_dyld_all_image_infos_address
+	   printf ")\n"
+
+	   if $kgm_task_64
+		  set $kgm_dyld_all_image_infos_address = $kgm_dyld_all_image_infos_address + 112
+		  _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 64
+		  set $kgm_dyld_all_image_infos_v10 = (unsigned char *)$kgm_map_user_window
+		  set $kgm_dyld_all_image_infos_initialImageCount = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[112-112])
+		  set $kgm_dyld_all_image_infos_errorKind = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[120-112])
+		  set $kgm_dyld_all_image_infos_errorClientOfDylibPath = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[128-112])
+		  set $kgm_dyld_all_image_infos_errorTargetDylibPath = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[136-112])
+		  set $kgm_dyld_all_image_infos_errorSymbol = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[144-112])
+		  set $kgm_dyld_all_image_infos_sharedCacheSlide = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[152-112])
+
+		  _unmap_user_data_from_task $kgm_taskp
+	   else
+		  set $kgm_dyld_all_image_infos_address = $kgm_dyld_all_image_infos_address + 60
+		  _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 64
+		  set $kgm_dyld_all_image_infos_v10 = (unsigned char *)$kgm_map_user_window
+		  set $kgm_dyld_all_image_infos_initialImageCount = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[60-60])
+		  set $kgm_dyld_all_image_infos_errorKind = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[64-60])
+		  set $kgm_dyld_all_image_infos_errorClientOfDylibPath = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[68-60])
+		  set $kgm_dyld_all_image_infos_errorTargetDylibPath = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[72-60])
+		  set $kgm_dyld_all_image_infos_errorSymbol = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[76-60])
+		  set $kgm_dyld_all_image_infos_sharedCacheSlide = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[80-60])
+		  _unmap_user_data_from_task $kgm_taskp
+	   end
+
+	   if $kgm_dyld_all_image_infos_version >= 10
+		  printf "              initialImageCount "
+		  showuserptr $kgm_dyld_all_image_infos_initialImageCount
+		  printf "\n"
+	   end
+
+	   if $kgm_dyld_all_image_infos_version >= 11
+		  printf "                      errorKind "
+		  showuserptr $kgm_dyld_all_image_infos_errorKind
+		  printf "\n"
+		  printf "         errorClientOfDylibPath "
+		  showuserptr $kgm_dyld_all_image_infos_errorClientOfDylibPath
+		  printf "\n"
+		  if $kgm_dyld_all_image_infos_errorClientOfDylibPath != 0
+			 printf "                                "
+			 _print_path_for_image $kgm_dyld_all_image_infos_errorClientOfDylibPath
+			 printf "\n"
+		  end
+		  printf "           errorTargetDylibPath "
+		  showuserptr $kgm_dyld_all_image_infos_errorTargetDylibPath
+		  printf "\n"
+		  if $kgm_dyld_all_image_infos_errorTargetDylibPath != 0
+			 printf "                                "
+			 _print_path_for_image $kgm_dyld_all_image_infos_errorTargetDylibPath
+			 printf "\n"
+		  end
+		  printf "                    errorSymbol "
+		  showuserptr $kgm_dyld_all_image_infos_errorSymbol
+		  printf "\n"
+		  if $kgm_dyld_all_image_infos_errorSymbol != 0
+			 printf "                                "
+			 _print_path_for_image $kgm_dyld_all_image_infos_errorSymbol
+			 printf "\n"
+		  end
+	   end
+
+	   if $kgm_dyld_all_image_infos_version >= 12
+		  printf "               sharedCacheSlide "
+		  showuserptr $kgm_dyld_all_image_infos_sharedCacheSlide
+		  printf "\n"
+	   end
+
+	else
+		printf "No dyld information available for task\n"
+	end
+end
+document showuserdyldinfo
+Syntax: (gdb) showuserdyldinfo <task_t>
+| For a given user task, inspect the dyld global info and print
+| out all fields, including error messages.
+end
+
 define showkerneldebugheader
 	printf "kd_buf     "
 	showptrhdrpad
@@ -8546,8 +10726,7 @@ define showkerneldebugbuffercpu
 	set $kgm_entry_count = (int) $arg1
 	set $kgm_debugentriesfound = 0
 	
-	#if kdebug_flags & KDBG_BFINIT
-	if (kdebug_flags & 0x80000000)	
+	if (kdebug_flags & 0x80000000)	# 0x80000000 == KDBG_BFINIT
 		showkerneldebugheader
 		
 		if $kgm_entry_count == 0
@@ -8584,8 +10763,7 @@ end
 
 define showkerneldebugbuffer
 	
-	#if kdebug_flags & KDBG_BFINIT
-	if (kdebug_flags & 0x80000000)	
+	if (kdebug_flags & 0x80000000)	# 0x80000000 == KDBG_BFINIT
 	
 		set $kgm_entrycount = (int) $arg0
 	
@@ -8629,19 +10807,30 @@ Syntax: showallvmstats
 | prints a summary of vm statistics in a table format
 end
 
+define memstats
+	if ($kgm_mtype == $kgm_mtype_arm)
+		printf "kern_memorystatus_level:  %8d\n", kern_memorystatus_level
+	end	
+	printf "vm_page_throttled_count:  %8d\n", vm_page_throttled_count 
+	printf "vm_page_active_count:     %8d\n", vm_page_active_count
+	printf "vm_page_inactive_count:   %8d\n", vm_page_inactive_count
+	printf "vm_page_wire_count:       %8d\n", vm_page_wire_count
+	printf "vm_page_free_count:       %8d\n", vm_page_free_count
+	printf "vm_page_purgeable_count:  %8d\n", vm_page_purgeable_count
+	printf "vm_page_inactive_target:  %8d\n", vm_page_inactive_target
+	printf "vm_page_free_target:      %8d\n", vm_page_free_target
+	printf "inuse_ptepages_count:     %8d\n", inuse_ptepages_count
+	printf "vm_page_free_reserved:    %8d\n", vm_page_free_reserved
+end
+
+document memstats
+Syntax: (gdb) memstats 
+| Prints out a summary of various memory statistics. In particular vm_page_wire_count should
+| be greater than 2K or you are under memory pressure.
+end
+
 define show_user_registers
-	if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
-		set $kgm_thread = (thread_t)$arg0
-		if ((*(thread_t)$kgm_thread)->machine.xxx_pcb.iss.flavor == 15)
-			p/x ($kgm_thread)->machine.xxx_pcb.iss->uss.ss_64
-		else
-			p/x ($kgm_thread)->machine.xxx_pcb.iss->uss.ss_32
-		end
-	end
-	if ($kgm_mtype == $kgm_mtype_ppc)
-		set $kgm_thread = (thread_t)$arg0
-		p/x *($kgm_thread)->machine.pcb
-	end
+       showuserregisters $arg0
 end
 
 document show_user_registers
@@ -8786,6 +10975,35 @@ Syntax: strcmp_nomalloc <string> <a> [b] [c] [d] [e] [f] [g] [h] [i]
 | strcmp_nomalloc version $kgm_strcmp_arg
 end
 
+define memcpy
+    set $kgm_dst = (unsigned char *)$arg0
+    set $kgm_src = (unsigned char *)$arg1
+    set $kgm_count = $arg2
+
+    # printf "src %p dst %p len %d\n", $kgm_src, $kgm_dst, $kgm_count
+
+    while ($kgm_count >= 8)
+        set *(unsigned long long *)$kgm_dst = *(unsigned long long *)$kgm_src
+
+        set $kgm_dst = $kgm_dst + 8
+        set $kgm_src = $kgm_src + 8
+        set $kgm_count = $kgm_count - 8
+    end
+    while ($kgm_count > 0)
+        set *$kgm_dst = *$kgm_src
+
+        set $kgm_dst = $kgm_dst + 1
+        set $kgm_src = $kgm_src + 1
+        set $kgm_count = $kgm_count - 1
+    end
+end
+
+document memcpy
+Syntax: memcpy <dst> <src> <n>
+| Given two addresses that are accessible by the debugger, perform
+| a memory copy of <n> bytes from <src> to <dst>
+end
+
 # _pci_cfg_addr_value $addr $size
 define _pci_cfg_addr_value
    readphysint $arg0 $arg1 $kgm_lcpu_self
@@ -8825,7 +11043,7 @@ define _pci_cfg_init
 	  end
        end
 
-       # if the above fails, search for 0:0:0 in likely places.
+       # search for 0:0:0 in likely places if the above fails
        if $kgm_pci_cfg_init == 0
        	  set $kgm_pci_cfg_base = 0xF0000000
 	  while $kgm_pci_cfg_init == 0 && $kgm_pci_cfg_base > 0xA0000000
@@ -10125,6 +12343,31 @@ Syntax: (gdb) showeventsourceobject <prefix> <object>
 | Routine to display information about an IOEventSource subclass.
 end
 
+define showworkloopallocator
+	set $kgm_workloop = (struct IOWorkLoop*)$arg0
+	set $kgm_bt = (void**)$kgm_workloop->reserved->allocationBacktrace
+	set $kgm_bt_count = 0
+	while $kgm_bt_count != (sizeof(IOWorkLoop::ExpansionData.allocationBacktrace) / sizeof(IOWorkLoop::ExpansionData.allocationBacktrace[0]))
+		set $kgm_frame_address = (void*)$kgm_bt[$kgm_bt_count]
+		if $kgm_frame_address != 0
+			if (((unsigned long) $kgm_frame_address < (unsigned long) &_mh_execute_header || \
+			     (unsigned long) $kgm_frame_address >= (unsigned long) &last_kernel_symbol ) \
+			    && ($kgm_show_kmod_syms == 0))
+				showkmodaddr $kgm_frame_address
+			else
+				output /a $kgm_frame_address
+			end
+			printf "\n"
+		end
+		set $kgm_bt_count = $kgm_bt_count + 1
+	end
+end
+document showworkloopallocator
+Syntax: (gdb) showworkloopallocator <workloop>
+| Routine to display the backtrace of the thread which allocated the workloop in question. Only
+| valid on DEBUG kernels.
+end
+
 define showworkloopeventsources
 	set $kgm_eventsource = (struct IOEventSource*)$arg0
     while $kgm_eventsource != 0
@@ -10204,10 +12447,27 @@ define showworkloop
 	end
 	printf "\t\t"
 	set $kgm_gateLock = ( struct _IORecursiveLock *)$kgm_workloop->gateLock
-	set $kgm_lockGroup = (struct _lck_grp_*)($kgm_gateLock->group)
-	printf "%s", $kgm_lockGroup->lck_grp_name
-	printf "\n"
-	showworkloopeventsources $kgm_workloop->eventChain
+	if $kgm_gateLock != 0
+		set $kgm_lockGroup = (struct _lck_grp_*)($kgm_gateLock->group)
+		printf "%s", $kgm_lockGroup->lck_grp_name
+	else
+		printf "No WorkLoop Lock found"
+	end
+	printf "\n\n"
+	
+	#Allocation backtrace is only valid on DEBUG kernels.
+	#printf "Allocation path:\n\n"
+	#showworkloopallocator $kgm_workloop
+	#printf "\n\n"
+	
+	if $kgm_workloop->eventChain != 0
+		printf "Active event sources:\n\n"
+		showworkloopeventsources $kgm_workloop->eventChain
+	end
+	if $kgm_workloop->reserved->passiveEventChain != 0
+		printf "Passive event sources:\n"
+		showworkloopeventsources $kgm_workloop->reserved->passiveEventChain
+	end
 end
 document showworkloop
 Syntax: (gdb) showworkloop <thread> <workloop>
@@ -10293,7 +12553,7 @@ Syntax:  showthreadfortid  <thread_id>
 |corresponding to a given thread_id.
 end
 
-define showtaskbusyports
+define showtaskbusyportsint
     set $kgm_isp = ((task_t)$arg0)->itk_space
 	set $kgm_iindex = 0
 	while ( $kgm_iindex < $kgm_isp->is_table_size )
@@ -10308,6 +12568,10 @@ define showtaskbusyports
 	end
 end
 
+define showtaskbusyports
+    showtaskbusyportsint $arg0
+end
+
 document showtaskbusyports
 Syntax:  showtaskbusyports <task>
 |Routine to print information about receive rights belonging to this task that
@@ -10318,7 +12582,7 @@ define showallbusyports
     set $kgm_head_taskp = &tasks
     set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next)
     while $kgm_cur_taskp != $kgm_head_taskp
-		showtaskbusyports $kgm_cur_taskp
+		showtaskbusyportsint $kgm_cur_taskp
     	set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next)
     end
 end
@@ -10329,16 +12593,689 @@ Syntax:  showallbusyports
 |have enqueued messages.
 end
 
-define kdp-connect
-    if $argc > 0
-	kdp-reattach $arg0
+define showallproviders
+    set $kgm_providerp = dtrace_provider
+    while $kgm_providerp
+        p *(dtrace_provider_t *)$kgm_providerp
+        printf "\n"
+        set $kgm_providerp = (dtrace_provider_t *)($kgm_providerp->dtpv_next)
+    end
+end
+
+document showallproviders
+Syntax: showallproviders
+| Display summary listing of all dtrace_providers
+end
+
+define showmodctlheader
+    printf   "modctl    "
+    showptrhdrpad
+    printf "  stale     "
+    showptrhdrpad
+    printf "  symbols   "
+    showptrhdrpad
+    printf "  address   "
+    showptrhdrpad
+    printf "  size      "
+    showptrhdrpad
+    printf "  loadid    loaded  nenabled  flags      name\n"
+end
+
+define showmodctlint
+    set $kgm_modctlp = (struct modctl *)$arg0
+    showptr $kgm_modctlp
+    printf "  "
+    showptr $kgm_modctlp->mod_stale
+    printf "  "
+    showptr $kgm_modctlp->mod_user_symbols
+    printf "  "
+    showptr $kgm_modctlp->mod_address
+    printf "  "
+    showptr $kgm_modctlp->mod_size
+    printf "  "
+    printf "%6d  ", $kgm_modctlp->mod_loadcnt
+    printf "%6d  ", $kgm_modctlp->mod_loaded
+    printf "%6d  ", $kgm_modctlp->mod_nenabled
+    printf "    0x%x  ", $kgm_modctlp->mod_flags
+    printf "%s\n", $kgm_modctlp->mod_modname
+end
+
+define showmodctl
+    showmodctlheader
+    showmodctlint $arg0
+end
+document showmodctl
+Syntax: (gdb) showmodctl <addr>
+| Display info about a dtrace modctl
+end
+
+define showallmodctls
+    showmodctlheader
+    set $kgm_modctlp = (struct modctl *)dtrace_modctl_list
+    while $kgm_modctlp
+        showmodctlint $kgm_modctlp
+        set $kgm_modctlp = $kgm_modctlp->mod_next
+    end
+end
+document showallmodctls
+Syntax: (gdb) showallmodctls
+| Display summary listing of all dtrace modctls
+end
+
+define showfbtprobe
+  printf "Be very patient, this traverses a large list \n"
+  set $kgm_indx = 0
+  set $kgm_found = 0
+  set $kgm_depth = 0
+  while $kgm_indx < fbt_probetab_size && !$kgm_found
+    set $kgm_fbt_probep = (struct fbt_probe *)fbt_probetab[$kgm_indx]
+    set $kgm_depth = 0
+    if $kgm_fbt_probep
+      set $kgm_probeid = (struct fbt_probe *)$kgm_fbt_probep->fbtp_id
+      if $kgm_probeid == $arg0
+        set $kgm_found = 1
+        loop_break
+      else
+	set $kgm_fbt_probep = $kgm_fbt_probep->fbtp_hashnext
+	while $kgm_fbt_probep
+	  set $kgm_depth++
+	  set $kgm_probeid = (struct fbt_probe *)$kgm_fbt_probep->fbtp_id
+	  if $kgm_probeid == $arg0
+	    set $kgm_found = 1
+	    loop_break
+	  else
+	    set $kgm_fbt_probep = $kgm_fbt_probep->fbtp_hashnext
+	  end
+        end
+      end
+    end
+    if !$kgm_found
+      set $kgm_indx++
     else
-    	printf "Attempting to attach to localhost...\n"
-    	kdp-reattach localhost
+      printf "fbt_probetab[index=%d], depth=%d, 0x%x\n", $kgm_indx, $kgm_depth, $kgm_fbt_probep
+      printf "(gdb) p *(struct fbt_probe *)0x%x\n", $kgm_fbt_probep
+      p *(struct fbt_probe *)$kgm_fbt_probep
+      set $kgm_fbtp_ctl = (struct fbt_probe *)$kgm_fbt_probep->fbtp_ctl
+      showmodctl $kgm_fbtp_ctl
+      loop_break
+    end
+  end
+end
+document showfbtprobe
+Syntax: (gdb) showfbtprobe <id>
+| Display info about an fbt probe given an id.
+| Traverses fbt_probetab and matches <id> with fbtp_id.
+| The <id> is found using dtrace -l
+end
+
+define showzstacktrace
+	set $kgm_trace = (void*)$arg0
+	if ($argc == 1)
+		set $kgm_trace_size = 15
+	end
+	if ($argc == 2)
+		set $kgm_trace_size = $arg1
+	end
+	set $kgm_trace_current = 0
+	while ($kgm_trace_current < $kgm_trace_size)
+		set $kgm_trace_addr = (void**)$kgm_trace + $kgm_trace_current
+		set $kgm_trace_value = *((void**)$kgm_trace_addr) 
+		#printf "\t\t"
+		output /a $kgm_trace_value
+		set $kgm_trace_current = $kgm_trace_current + 1
+		printf "\n"
+	end
+end
+
+document showzstacktrace
+Syntax:  showzstacktrace <saved stacktrace> [size]
+| Routine to print a stacktrace stored by OSBacktrace.
+| size is optional, defaults to 15.
+end
+
+define showzalloc
+	set $kgm_zallocation = zallocations[$arg0]
+	print $kgm_zallocation
+	showztrace $kgm_zallocation->za_trace_index
+end
+
+document showzalloc
+Syntax:  showzalloc <index>
+| Prints a zallocation from the zallocations array based off its index, 
+| and prints the associated symbolicated backtrace.
+end
+
+define showztrace
+	set $kgm_ztrace = &ztraces[$arg0]
+	showztraceaddr $kgm_ztrace
+end
+
+document showztrace
+Syntax:  showztrace <trace index>
+| Prints the backtrace from the ztraces array at index
+end
+
+define showztraceaddr
+	print *$arg0
+	showzstacktrace $arg0->zt_stack ($arg0)->zt_depth
+end
+
+document showztraceaddr
+Syntax:  showztraceaddr <trace address>
+| Prints the struct ztrace passed in
+end
+
+#TODO: Iterate through the hash table, or make top_ztrace accurate in the face of deallocations (better idea).
+define showtopztrace
+	set $kgm_top_ztrace = top_ztrace
+	printf "Index: %d\n", (top_ztrace - ztraces)
+	showztraceaddr $kgm_top_ztrace
+end
+
+document showtopztrace
+Syntax:  showtopztrace 
+| Shows the ztrace with the biggest size. (according to top_ztrace, not by iterating through the hash table)
+end
+
+define showzallocs
+	set $kgm_zallocation_current_index = 0
+	set $kgm_zallocations_count = 0
+	set $kgm_max_zallocation = zleak_alloc_buckets
+	printf "INDEX  ADDRESS     "
+	if $kgm_lp64
+        printf "         "
+    end
+	printf "TRACE   SIZE\n"
+	while ($kgm_zallocation_current_index < $kgm_max_zallocation)
+		set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index]
+		if ($kgm_zallocation_current->element != 0)
+			printf "%5d  %p   ", $kgm_zallocation_current_index, $kgm_zallocation_current->za_element
+			printf "%5d %6lu\n", $kgm_zallocation_current->za_trace_index, $kgm_zallocation_current->za_size
+			set $kgm_zallocations_count = $kgm_zallocations_count + 1
+		end
+		set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1
+	end
+	printf "Total allocations: %d\n", $kgm_zallocations_count
+end
+
+document showzallocs
+Syntax:  showzallocs
+| Prints all allocations in the zallocations table
+end
+
+define showzallocsfortrace
+	set $kgm_zallocation_current_index = 0
+	set $kgm_zallocations_count = 0
+	set $kgm_max_zallocation = zleak_alloc_buckets
+	printf "INDEX  ADDRESS     "
+	if $kgm_lp64
+        printf "         "
+    end
+	printf "SIZE\n"
+	while ($kgm_zallocation_current_index < $kgm_max_zallocation)
+		set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index]
+		if ($kgm_zallocation_current->element != 0 && $kgm_zallocation_current->za_trace_index == $arg0)
+			printf "%5d  %p ", $kgm_zallocation_current_index, $kgm_zallocation_current->za_element
+			printf "%6lu\n", $kgm_zallocation_current->size
+			set $kgm_zallocations_count = $kgm_zallocations_count + 1
+		end
+		set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1
+	end
+	printf "Total allocations: %d\n", $kgm_zallocations_count
+end
+
+document showzallocsfortrace
+Syntax:  showzallocsfortrace <trace index>
+| Prints all allocations pointing to the passed in trace's index into ztraces by looking through zallocations table
+end
+
+define showztraces
+	showztracesabove 0
+end
+
+document showztraces
+Syntax:  showztraces
+| Prints all traces with size > 0
+end
+
+define showztracesabove
+	set $kgm_ztrace_current_index = 0
+	set $kgm_ztrace_count = 0
+	set $kgm_max_ztrace = zleak_trace_buckets
+	printf "INDEX    SIZE\n"
+	while ($kgm_ztrace_current_index < $kgm_max_ztrace)
+		set $kgm_ztrace_current = ztraces[$kgm_ztrace_current_index]
+		if ($kgm_ztrace_current->zt_size > $arg0)
+			printf "%5d  %6lu\n", $kgm_ztrace_current_index, $kgm_ztrace_current->zt_size
+			set $kgm_ztrace_count = $kgm_ztrace_count + 1
+		end
+		set $kgm_ztrace_current_index = $kgm_ztrace_current_index + 1
+	end
+	printf "Total traces: %d\n", $kgm_ztrace_count
+end
+
+document showztracesabove
+Syntax:  showztracesabove <size>
+| Prints all traces with size greater than X
+end
+
+define showztracehistogram
+	set $kgm_ztrace_current_index = 0
+	set $kgm_ztrace_count = 0
+	set $kgm_max_ztrace = zleak_trace_buckets
+	printf "INDEX  HIT_COUNT  COLLISIONS\n"
+	while ($kgm_ztrace_current_index < $kgm_max_ztrace)
+		set $kgm_ztrace_current = ztraces[$kgm_ztrace_current_index]
+		if ($kgm_ztrace_current->zt_hit_count != 0)
+			printf "%5d      %5d    %5d\n", $kgm_ztrace_current_index, $kgm_ztrace_current->zt_hit_count, $kgm_ztrace_current->zt_collisions
+			set $kgm_ztrace_count = $kgm_ztrace_count + 1
+		end
+		set $kgm_ztrace_current_index = $kgm_ztrace_current_index + 1
+	end
+	printf "Total traces: %d\n", $kgm_ztrace_count
+end
+
+document showztracehistogram
+Syntax:  showztracehistogram
+| Prints the histogram of the ztrace table
+end
+
+define showzallochistogram
+	set $kgm_zallocation_current_index = 0
+	set $kgm_zallocations_count = 0
+	set $kgm_max_zallocation = zleak_alloc_buckets
+	printf "INDEX  HIT_COUNT\n"
+	while ($kgm_zallocation_current_index < $kgm_max_zallocation)
+		set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index]
+		if ($kgm_zallocation_current->za_hit_count != 0)
+			printf "%5d      %5d\n", $kgm_zallocation_current_index, $kgm_zallocation_current->za_hit_count
+			set $kgm_zallocations_count = $kgm_zallocations_count + 1
+		end
+		set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1
+	end
+	printf "Total allocations: %d\n", $kgm_zallocations_count
+end
+
+document showzallochistogram
+Syntax:  showzallochistogram
+| Prints the histogram for the zalloc table
+end
+
+define showzstats
+	printf "z_alloc_collisions: %u, z_trace_collisions: %u\n", z_alloc_collisions, z_trace_collisions
+	printf "z_alloc_overwrites: %u, z_trace_overwrites: %u\n", z_alloc_overwrites, z_trace_overwrites
+	printf "z_alloc_recorded: %u, z_trace_recorded: %u\n", z_alloc_recorded, z_trace_recorded
+end
+
+document showzstats
+Syntax:  showzstats
+| Prints the zone leak detection stats
+end
+
+
+set $kgm_au_sentry_hash_table_size = 97
+
+define showsession1
+  set $p = (struct au_sentry *)$arg0
+  showptr $p
+  printf "  0x%08x  0x%08x  0x%016x", $p->se_auinfo.ai_asid, $p->se_auinfo.ai_auid, $p->se_auinfo.ai_flags
+  printf "  %3ld  %3ld", $p->se_refcnt, $p->se_procnt
+  printf "\n"
+end
+
+define showsessionhdr
+  printf "au_sentry "
+  showptrhdrpad
+  printf "  ASID        AUID        FLAGS               C    P\n"
+end
+
+define showsession
+  showsessionhdr
+  showsession1 $arg0
+end
+
+document showsession
+Syntax:  showsession
+| Display info about a specified audit session
+end
+
+define showallsessions
+  showsessionhdr
+  set $kgm_au_sentry_hash_table = au_sentry_bucket
+  set $i = $kgm_au_sentry_hash_table_size - 1
+  while $i >= 0
+    set $p = $kgm_au_sentry_hash_table[$i].lh_first
+    while $p != 0
+      showsession1 $p
+      set $p = $p->se_link.le_next
+    end
+    set $i = $i - 1
+  end
+end
+
+document showallsessions
+Syntax:  showallsessions
+| Prints the audit sessions in the global hash table
+end
+
+define showauhistorystack
+  set $ii = $arg0
+  set $pp = (void **)$arg1
+  while $ii > 0
+    printf "  "
+    x/i $pp[$ii-1]
+    set $ii = $ii - 1
+  end
+end
+
+define showauhistory1
+  set $p = (struct au_history *)$arg0
+  set $stack_depth = $p->stack_depth
+  set $stack = $p->stack
+  showptr $p->ptr
+  if $p->event == 1
+    printf "    REF"
+  end
+  if $p->event == 2
+    printf "  UNREF"
+  end
+  if $p->event == 3
+    printf "  BIRTH"
+  end
+  if $p->event == 4
+    printf "  DEATH"
+  end
+  if $p->event == 5
+    printf "  FIND"
+  end
+  set $p = &$p->se
+  printf "  0x%08x  0x%08x  0x%016x", $p->se_auinfo.ai_asid, $p->se_auinfo.ai_auid, $p->se_auinfo.ai_flags
+  printf "  %3ld  %3ld", $p->se_refcnt, $p->se_procnt
+  printf "\n"
+  showauhistorystack $stack_depth $stack
+end
+
+define showauhistory
+  set $i = (au_history_index-1) % au_history_size
+  if au_history_index >= au_history_size
+    set $n = au_history_size
+  else
+    set $n = au_history_index
+  end
+  while $n > 0
+    if au_history[$i].ptr != 0 && (0 == $arg0 || au_history[$i].ptr == $arg0)
+      printf "[% 4d]  ", $i
+      showauhistory1 &au_history[$i]
+    end
+    set $n = $n - 1
+    set $i = ($i - 1) % au_history_size
+  end
+end
+
+define showallauhistory
+  showauhistory 0
+end
+
+define showkwqheader
+	printf "        kwq     "
+    showptrhdrpad
+	printf "    kwqaddr     "
+    showptrhdrpad
+    printf "  inqueue  fakecount  highseq  lowseq   flags   lastunlock    p_rwwc"
+    printf "\n          "
+end
+
+define showkwqint
+	printf "              "
+    	set $kgm_kwq = (ksyn_wait_queue_t)$arg0
+	showptr $kgm_kwq
+	printf "  "
+	showptr $kgm_kwq->kw_addr
+	printf "   "
+	printf "  %d      ", $kgm_kwq->kw_inqueue
+	printf "    %d  ", $kgm_kwq->kw_fakecount
+	printf "     0x%x  ", $kgm_kwq->kw_highseq
+	printf "  0x%x  ", $kgm_kwq->kw_lowseq
+	printf "  0x%x  ", $kgm_kwq->kw_flags
+	printf "  0x%x  ", $kgm_kwq->kw_lastunlockseq
+	printf "    0x%x  ", $kgm_kwq->kw_pre_rwwc
+	printf "\n"
+end
+
+define show_kwq
+	showkwqheader
+	showkwqint $arg0
+end
+
+document show_kwq
+Syntax: (gdb) show_kwq <kwq>
+| Display info about one ksyn_wait_queue
+end
+
+# Internal routine used by "showpthread_mutex" to abstract possible loads from
+# user space
+define _loadfrommutex
+        if (kdp_pmap == 0)
+                set $kgm_loadval = *(uintptr_t *)$arg0
+        else
+        if ($kgm_x86_abi == 0xe)
+              set $kgm_loadval = *(uint32_t *)$arg0
+        else
+        if ($kgm_x86_abi == 0xf)
+            if ($kgm_mtype == $kgm_mtype_i386)
+                    _loadk32m64 $arg0
+                    set $kgm_loadval = $kgm_k32read64  
+            else
+                    set $kgm_loadval = *(uint32_t *)$arg0
+            end
+        end
+        end
+end
+end
+
+define show_pthreadmutex
+	set $newact = (struct thread *) $arg0
+	set $ourtask = (struct task *)($newact->task)
+    	set $our_user_is64 = ($ourtask->taskFeatures[0] & 0x80000000)
+	_kgm_flush_loop
+	set $mutex = (void *)$arg1
+	set kdp_pmap = $newact->task->map->pmap
+	_kgm_flush_loop
+	_kgm_update_loop
+	set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss)
+	set $kgm_x86_abi = $newiss.flavor
+	if ($our_user_is64 != 0)
+		printf "\tUser 64Bit\n "
+		printf "\tSignature: "
+		set $nextval = $mutex
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tflags: "
+		set $nextval = $mutex + 12
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tSeqs: "
+		set $nextval = $mutex + 20
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $mutex + 24
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $mutex + 28
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\ttid[0]: "
+		set $nextval = $mutex + 32
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\ttid[1]: "
+		set $nextval = $mutex + 36
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+	else
+		printf "\tUser 32Bit\n "
+		printf "\tSignature: "
+		set $nextval = $mutex
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tflags: "
+		set $nextval = $mutex + 8
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tSeqs: "
+		set $nextval = $mutex + 16
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $mutex + 20
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $mutex + 24
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\ttid[0]: "
+		set $nextval = $mutex + 32
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\ttid[1]: "
+		set $nextval = $mutex + 36
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+	end
+	printf "\n"
+	resetstacks
+end
+
+
+document show_pthreadmutex
+Syntax: (gdb) show_pthreadmutex <thread> <user_mutexaddr>
+| Display the mutex contents from userspace.
+end
+
+
+define show_pthreadcondition
+	set $newact = (struct thread *) $arg0
+	set $ourtask = (struct task *)($newact->task)
+    	set $our_user_is64 = ($ourtask->taskFeatures[0] & 0x80000000)
+	_kgm_flush_loop
+	set $cond = (void *)$arg1
+	set kdp_pmap = $newact->task->map->pmap
+	_kgm_flush_loop
+	_kgm_update_loop
+	set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss)
+	set $kgm_x86_abi = $newiss.flavor
+	if ($our_user_is64 != 0)
+		printf "\tUser 64Bit\n "
+		printf "\tSignature: "
+		set $nextval = $cond
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tflags: "
+		set $nextval = $cond + 12
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tSeqs: "
+		set $nextval = $cond + 24
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $cond + 28
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $cond + 32
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tMutex lowaddr: "
+		set $nextval = $cond + 16
+		_loadfrommutex $nextval
+		printf "0x%08x\n",$kgm_loadval
+		printf "\tMutex highaddr: "
+		set $nextval = $cond + 20
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+	else
+		printf "\tUser 32Bit\n "
+		printf "\tSignature: "
+		set $nextval = $cond
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tflags: "
+		set $nextval = $cond + 8
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tSeqs: "
+		set $nextval = $cond + 16
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $cond + 20
+		_loadfrommutex $nextval
+		printf "0x%x   ",$kgm_loadval
+		set $nextval = $cond + 24
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+		printf "\tMutex addr: "
+		set $nextval = $cond + 12
+		_loadfrommutex $nextval
+		printf "0x%x\n",$kgm_loadval
+	end
+	printf "\n"
+	resetstacks
+end
+
+
+document show_pthreadcondition
+Syntax: (gdb) show_pthreadcondition <thread> <user_cvaddr>
+| Display the condition variable contents from userspace.
+end
+
+define processortimers
+    set $kgm_p = processor_list
+    printf "Processor\t\t\t Last dispatch\t\t Next deadline\t\t difference\n"
+    while $kgm_p
+        printf "Processor %d: %p\t", $kgm_p->cpu_id, $kgm_p
+        printf " 0x%016llx\t", $kgm_p->last_dispatch
+        set $kgm_rt_timer = &(cpu_data_ptr[$kgm_p->cpu_id].rtclock_timer)
+        printf " 0x%016llx    \t", $kgm_rt_timer->deadline
+        set $kgm_rt_diff =  ((long long)$kgm_p->last_dispatch) - ((long long)$kgm_rt_timer->deadline)
+        printf " 0x%016llx  ", $kgm_rt_diff
+# normally the $kgm_rt_diff will be close to the last dispatch time, or negative 
+# When it isn't, mark the result as bad. This is a suggestion, not an absolute
+        if ( ($kgm_rt_diff > 0) && ((long long)$kgm_p->last_dispatch) - ($kgm_rt_diff + 1) > 0 )
+            printf "probably BAD\n"
+        else
+            printf "(ok)\n"
+        end
+        # dump the call entries (Intel only)
+        if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any)
+            printf "Next deadline set at: 0x%016llx. Timer call list:", $kgm_rt_timer->when_set
+            set $kgm_entry = (queue_t *)$kgm_rt_timer->queue
+            if ($kgm_entry == $kgm_rt_timer)
+                printf " (empty)\n"
+            else
+                printf "\n entry:      "
+                showptrhdrpad
+                printf "deadline           soft_deadline      delta      (*func)(param0,param1)\n"
+                while $kgm_entry != $kgm_rt_timer
+                    set $kgm_timer_call = (timer_call_t) $kgm_entry
+                    set $kgm_call_entry = (struct call_entry *) $kgm_entry
+                    printf " "
+                    showptr $kgm_entry
+                    printf ": 0x%016llx 0x%016llx 0x%08x (%p)(%p,%p)\n", \
+                        $kgm_call_entry->deadline, \
+                        $kgm_timer_call->soft_deadline, \
+                        ($kgm_call_entry->deadline - $kgm_timer_call->soft_deadline), \
+                        $kgm_call_entry->func, \
+                        $kgm_call_entry->param0, $kgm_call_entry->param1
+                    set $kgm_entry = $kgm_entry->next
+                end
+            end
+        end
+        set $kgm_p = $kgm_p->processor_list
     end
+    printf "\n"
 end
 
-document kdp-connect
-Syntax: (gdb) kdpconnect <address-of-remote-host>
-| Attach to the machine with given hostname or IP address, or 'localhost' if blank 
+document processortimers
+Syntax: (gdb) processortimers 
+| Print details of processor timers, noting any timer which might be suspicious
 end
+
+
diff --git a/libkern/Makefile b/libkern/Makefile
index 583dcb221..ff3bbec5f 100644
--- a/libkern/Makefile
+++ b/libkern/Makefile
@@ -9,37 +9,24 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = \
 	libkern \
 	uuid
-
-INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS}
-
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS}
-
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS}
-
 INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
 
 EXPINC_SUBDIRS = \
 	libkern \
 	uuid
-
-EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS}
-
 EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS}
-
 EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS}
-
 EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
 
-SETUP_SUBDIRS = conf
-
-COMP_SUBDIRS_PPC = conf kmod
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS_I386 = conf kmod
-
 COMP_SUBDIRS_X86_64 = conf kmod
-
 COMP_SUBDIRS_ARM = conf kmod
 
+
 INST_SUBDIRS = kmod
 
 include $(MakeInc_rule)
diff --git a/libkern/OSKextLib.cpp b/libkern/OSKextLib.cpp
index 4876839af..c782a830f 100644
--- a/libkern/OSKextLib.cpp
+++ b/libkern/OSKextLib.cpp
@@ -29,10 +29,6 @@
 extern "C" {
 #include <libkern/OSKextLibPrivate.h>
 #include <libkern/mkext.h>
-
-#include <mach/host_special_ports.h>
-#include <kextd/kextd_mach.h>
-#include <kern/host.h>
 };
 
 #include <libkern/c++/OSContainers.h>
@@ -185,45 +181,6 @@ OSReturn OSKextCancelRequest(
 #if PRAGMA_MARK
 #pragma mark MIG Functions & Wrappers
 #endif
-/*********************************************************************
-* This function is for use only by OSKextLib.cpp and OSKext.cpp.
-*
-* xxx - can we cache the kextd port or do we have to get it each time
-* xxx - in case it relaunches?
-*********************************************************************/
-extern void ipc_port_release_send(ipc_port_t);
-
-kern_return_t OSKextPingKextd(void)
-{
-    kern_return_t result     = KERN_FAILURE;
-    mach_port_t   kextd_port = IPC_PORT_NULL;
-
-    result = host_get_kextd_port(host_priv_self(), &kextd_port);
-    if (result != KERN_SUCCESS || !IPC_PORT_VALID(kextd_port)) {
-	OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogIPCFlag,
-            "Can't get kextd port.");
-        goto finish;
-    }
-
-    result = kextd_ping(kextd_port);
-    if (result != KERN_SUCCESS) {
-	OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogIPCFlag,
-            "kextd ping failed (0x%x).", (int)result);
-        goto finish;
-    }
-
-finish:
-    if (IPC_PORT_VALID(kextd_port)) {
-        ipc_port_release_send(kextd_port);
-    }
-
-    return result;
-}
-
 /*********************************************************************
 * IMPORTANT: Once we have done the vm_map_copyout(), we *must* return
 * KERN_SUCCESS or the kernel map gets messed up (reason as yet
@@ -442,6 +399,16 @@ void OSKextRemoveKextBootstrap(void)
     return;
 }
 
+#if CONFIG_DTRACE
+/*********************************************************************
+*********************************************************************/
+void OSKextRegisterKextsWithDTrace(void)
+{
+    OSKext::registerKextsWithDTrace();
+    return;
+}
+#endif /* CONFIG_DTRACE */
+
 /*********************************************************************
 *********************************************************************/
 void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...))
@@ -491,7 +458,7 @@ kmod_dump_log(
 * Compatibility implementation for kmod_get_info() host_priv routine.
 * Only supported on old 32-bit architectures.
 *********************************************************************/
-#if __ppc__ || __i386__
+#if __i386__
 kern_return_t
 kext_get_kmod_info(
     kmod_info_array_t      * kmod_list,
@@ -499,6 +466,16 @@ kext_get_kmod_info(
 {
     return OSKext::getKmodInfo(kmod_list, kmodCount);
 }
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
+
+#if PRAGMA_MARK
+#pragma mark Loaded Kext Summary
+#endif
+
+void 
+OSKextLoadedKextSummariesUpdated(void)
+{
+    // Do nothing.
+}
 
 };
diff --git a/libkern/OSKextVersion.c b/libkern/OSKextVersion.c
index bc1cc253c..f9013853c 100644
--- a/libkern/OSKextVersion.c
+++ b/libkern/OSKextVersion.c
@@ -388,6 +388,7 @@ finish:
 }
 
 /*********************************************************************
+* This function must be safe to call in panic context.
 *********************************************************************/
 Boolean OSKextVersionGetString(
     OSKextVersion   aVersion,
diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp
index acc3e3e98..14f0643c2 100644
--- a/libkern/c++/OSKext.cpp
+++ b/libkern/c++/OSKext.cpp
@@ -38,8 +38,13 @@ extern "C" {
 #include <libkern/prelink.h>
 #include <libkern/version.h>
 #include <libkern/zlib.h>
+#include <mach/host_special_ports.h>
 #include <mach/mach_vm.h>
+#include <mach/mach_time.h>
 #include <sys/sysctl.h>
+#include <uuid/uuid.h>
+// 04/18/11 - gab: <rdar://problem/9236163>
+#include <sys/random.h>
 };
 
 #include <libkern/OSKextLibPrivate.h>
@@ -51,15 +56,14 @@ extern "C" {
 #include <IOKit/IORegistryEntry.h>
 #include <IOKit/IOService.h>
 
+#include <IOKit/IOStatisticsPrivate.h>
+
 #if PRAGMA_MARK
 #pragma mark External & Internal Function Protos
 #endif
 /*********************************************************************
 *********************************************************************/
 extern "C" {
-// in libkern/OSKextLib.cpp, not in header for a reason.
-extern kern_return_t OSKextPingKextd(void);
-
 extern int  IODTGetLoaderInfo(const char * key, void ** infoAddr, int * infoSize);
 extern void IODTFreeLoaderInfo(const char * key, void * infoAddr, int infoSize);
 extern void OSRuntimeUnloadCPPForSegment(kernel_segment_command_t * segment);
@@ -84,6 +88,11 @@ static OSReturn _OSDictionarySetCStringValue(
     OSDictionary * dict,
     const char   * key,
     const char   * value);
+    
+// We really should add containsObject() & containsCString to OSCollection & subclasses.
+// So few pad slots, though....
+static bool _OSArrayContainsCString(OSArray * array, const char * cString);
+
 #if CONFIG_MACF_KEXT
 static void * MACFCopyModuleDataForKext(
     OSKext                 * theKext,
@@ -177,18 +186,32 @@ typedef struct MkextEntryRef {
 
 static  bool                sPrelinkBoot               = false;
 static  bool                sSafeBoot                  = false;
+static  bool                sKeepSymbols               = false;
 
-/******
-* sKextLock is the principal lock for OSKext. Below, there is also an
-* sKextInnerLock used to guard access to data accessed on in-calls from
-* IOService. This 2nd lock is required to prevent a deadlock
-* with IOService calling back into OSKext::considerUnloads()
-* on a separate thread during a kext load operation.
+/*********************************************************************
+* sKextLock is the principal lock for OSKext, and guards all static
+* and global variables not owned by other locks (declared further
+* below). It must be taken by any entry-point method or function,
+* including internal functions called on scheduled threads.
+*
+* sKextLock and sKextInnerLock are recursive due to multiple functions
+* that are called both externally and internally. The other locks are
+* nonrecursive.
+*
+* Which locks are taken depends on what they protect, but if more than
+* one must be taken, they must always be locked in this order
+* (and unlocked in reverse order) to prevent deadlocks:
+*
+*    1. sKextLock
+*    2. sKextInnerLock
+*    3. sKextSummariesLock
+*    4. sKextLoggingLock
 */
 static IORecursiveLock    * sKextLock                  = NULL;
 
 static OSDictionary       * sKextsByID                 = NULL;
 static OSArray            * sLoadedKexts               = NULL;
+static OSArray            * sUnloadedPrelinkedKexts    = NULL;
 
 // Requests to kextd waiting to be picked up.
 static OSArray            * sKernelRequests            = NULL;
@@ -207,7 +230,11 @@ static bool                 sKextdActive               = false;
 static bool                 sDeferredLoadSucceeded     = false;
 static bool                 sConsiderUnloadsExecuted   = false;
 
+#if NO_KEXTD
+static bool                 sKernelRequestsEnabled     = false;
+#else
 static bool                 sKernelRequestsEnabled     = true;
+#endif
 static bool                 sLoadEnabled               = true;
 static bool                 sUnloadEnabled             = true;
 
@@ -252,32 +279,26 @@ kmod_info_t * kmod = NULL;
 
 #define KEXT_PANICLIST_SIZE  (2 * PAGE_SIZE)
 
-static char     * unloaded_kext_paniclist        = NULL;
-static uint32_t   unloaded_kext_paniclist_size   = 0;
-static uint32_t   unloaded_kext_paniclist_length = 0;
+
+static char     * loaded_kext_paniclist         = NULL;
+static uint32_t   loaded_kext_paniclist_size    = 0;
+static uint32_t   loaded_kext_paniclist_length  = 0;
+
 AbsoluteTime      last_loaded_timestamp;
+static char       last_loaded_str[2*KMOD_MAX_NAME];
+static u_long     last_loaded_strlen            = 0;
+static void     * last_loaded_address           = NULL;
+static u_long     last_loaded_size              = 0;
 
-static char     * loaded_kext_paniclist          = NULL;
-static uint32_t   loaded_kext_paniclist_size     = 0;
-static uint32_t   loaded_kext_paniclist_length   = 0;
 AbsoluteTime      last_unloaded_timestamp;
-static void     * last_unloaded_address          = NULL;
-#if __LP64__
-static uint64_t   last_unloaded_size             = 0;
-#else
-static uint32_t   last_unloaded_size             = 0;
-#endif /* __LP64__ */
-
-};
+static char       last_unloaded_str[2*KMOD_MAX_NAME];
+static u_long     last_unloaded_strlen          = 0;
+static void     * last_unloaded_address         = NULL;
+static u_long     last_unloaded_size            = 0;
 
 /*********************************************************************
-* Because we can start IOService matching from OSKext (via IOCatalogue)
-* and IOService can call into OSKext, there is potential for cross-lock
-* contention, so OSKext needs two locks. The regular sKextLock above
-* guards most OSKext class/static variables, and sKextInnerLock guards
-* variables that can be accessed on in-calls from IOService, currently:
-*
-*   * OSKext::considerUnloads()
+* sKextInnerLock protects against cross-calls with IOService and
+* IOCatalogue, and owns the variables declared immediately below.
 *
 * Note that sConsiderUnloadsExecuted above belongs to sKextLock!
 *
@@ -286,9 +307,6 @@ static uint32_t   last_unloaded_size             = 0;
 * locks in an entry point to OSKext; if you need to do so, you must
 * spawn an independent thread to avoid potential deadlocks for threads
 * calling into OSKext.
-*
-* All static variables from here to the closing comment block fall
-* under sKextInnerLock.
 **********/
 static IORecursiveLock *    sKextInnerLock             = NULL;
 
@@ -301,11 +319,33 @@ static thread_call_t        sUnloadCallout             = 0;
 static thread_call_t        sDestroyLinkContextThread  = 0;      // one-shot, one-at-a-time thread
 static bool                 sSystemSleep               = false;  // true when system going to sleep
 
+/*********************************************************************
+* Backtraces can be printed at various times so we need a tight lock
+* on data used for that. sKextSummariesLock protects the variables
+* declared immediately below.
+*
+* gLoadedKextSummaries is accessed by other modules, but only during
+* a panic so the lock isn't needed then.
+**********/
+static IOLock                 * sKextSummariesLock                = NULL;
+
+void (*sLoadedKextSummariesUpdated)(void) = OSKextLoadedKextSummariesUpdated;
+OSKextLoadedKextSummaryHeader * gLoadedKextSummaries = NULL;
+static size_t sLoadedKextSummariesAllocSize = 0;
+OSKextLoadedKextSummaryHeader * sPrevLoadedKextSummaries = NULL;
+static size_t sPrevLoadedKextSummariesAllocSize = 0;
+};
+
+/*********************************************************************
+* sKextLoggingLock protects the logging variables declared immediately below.
+**********/
+static IOLock             * sKextLoggingLock           = NULL;
+
 static  const OSKextLogSpec kDefaultKernelLogFilter    = kOSKextLogBasicLevel |
                                                          kOSKextLogVerboseFlagsMask;
 static  OSKextLogSpec       sKernelLogFilter           = kDefaultKernelLogFilter;
 static  bool                sBootArgLogFilterFound     = false;
-SYSCTL_INT(_debug, OID_AUTO, kextlog, CTLFLAG_RW, &sKernelLogFilter,
+SYSCTL_INT(_debug, OID_AUTO, kextlog, CTLFLAG_RW | CTLFLAG_LOCKED, &sKernelLogFilter,
     sKernelLogFilter, "kernel kext logging");
 
 static  OSKextLogSpec       sUserSpaceKextLogFilter    = kOSKextLogSilentFilter;
@@ -338,6 +378,12 @@ void osdata_vm_deallocate(void * ptr, unsigned int length)
     (void)vm_deallocate(kernel_map, (vm_offset_t)ptr, length);
     return;
 }
+
+void osdata_kext_free(void * ptr, unsigned int length)
+{
+    (void)kext_free((vm_offset_t)ptr, length);
+}
+
 };
 
 #if PRAGMA_MARK
@@ -370,9 +416,6 @@ kern_allocate(
     }
 
    /* Create an OSData wrapper for the allocated buffer.
-    * Note that we do not set a dealloc function on it here.
-    * We have to call vm_map_unwire() on it in OSKext::unload()
-    * and an OSData dealloc function can't take all those parameters.
     */
     linkBuffer = OSData::withBytesNoCopy((void *)result, roundSize);
     if (!linkBuffer) {
@@ -383,6 +426,7 @@ kern_allocate(
             theKext->getIdentifierCString());
         goto finish;
     }
+    linkBuffer->setDeallocFunction(osdata_kext_free);
 
     OSKextLog(theKext,
         kOSKextLogProgressLevel |
@@ -453,6 +497,41 @@ kxld_log_callback(
     OSKextVLog(theKext, logSpec, format, argList);
 }
 
+#if PRAGMA_MARK
+#pragma mark IOStatistics defines
+#endif
+
+#if IOKITSTATS
+
+#define notifyKextLoadObservers(kext, kmod_info) \
+do { \
+	IOStatistics::onKextLoad(kext, kmod_info); \
+} while (0)
+
+#define notifyKextUnloadObservers(kext) \
+do { \
+	IOStatistics::onKextUnload(kext); \
+} while (0)
+
+#define notifyAddClassObservers(kext, addedClass, flags) \
+do { \
+	IOStatistics::onClassAdded(kext, addedClass); \
+} while (0)
+
+#define notifyRemoveClassObservers(kext, removedClass, flags) \
+do { \
+	IOStatistics::onClassRemoved(kext, removedClass); \
+} while (0)
+
+#else
+
+#define notifyKextLoadObservers(kext, kmod_info)
+#define notifyKextUnloadObservers(kext)
+#define notifyAddClassObservers(kext, addedClass, flags)
+#define notifyRemoveClassObservers(kext, removedClass, flags)
+
+#endif /* IOKITSTATS */
+
 #if PRAGMA_MARK
 #pragma mark Module Config (Startup & Shutdown)
 #endif
@@ -484,18 +563,23 @@ OSKext::initialize(void)
     */
     sKextLock = IORecursiveLockAlloc();
     sKextInnerLock = IORecursiveLockAlloc();
+    sKextSummariesLock = IOLockAlloc();
+    sKextLoggingLock = IOLockAlloc();
     assert(sKextLock);
     assert(sKextInnerLock);
+    assert(sKextSummariesLock);
+    assert(sKextLoggingLock);
 
     sKextsByID = OSDictionary::withCapacity(kOSKextTypicalLoadCount);
     sLoadedKexts = OSArray::withCapacity(kOSKextTypicalLoadCount);
+    sUnloadedPrelinkedKexts = OSArray::withCapacity(kOSKextTypicalLoadCount / 10);
     sKernelRequests = OSArray::withCapacity(0);
     sPostedKextLoadIdentifiers = OSSet::withCapacity(0);
     sAllKextLoadIdentifiers = OSSet::withCapacity(kOSKextTypicalLoadCount);
     sRequestCallbackRecords = OSArray::withCapacity(0);
     assert(sKextsByID && sLoadedKexts && sKernelRequests &&
         sPostedKextLoadIdentifiers && sAllKextLoadIdentifiers &&
-        sRequestCallbackRecords);
+        sRequestCallbackRecords && sUnloadedPrelinkedKexts);
 
    /* Read the log flag boot-args and set the log flags.
     */
@@ -521,6 +605,8 @@ OSKext::initialize(void)
             "only valid OSBundleRequired kexts will be loaded.");
     }
 
+    PE_parse_boot_argn("keepsyms", &sKeepSymbols, sizeof(sKeepSymbols));
+
    /* Set up an OSKext instance to represent the kernel itself.
     */
     sKernelKext = new OSKext;
@@ -538,7 +624,6 @@ OSKext::initialize(void)
     sKernelKext->version = OSKextParseVersionString(osrelease);
     sKernelKext->compatibleVersion = sKernelKext->version;
     sKernelKext->linkedExecutable = kernelExecutable;
-    // linkState will be set first time we do a link
     
     sKernelKext->flags.hasAllDependencies = 1;
     sKernelKext->flags.kernelComponent = 1;
@@ -614,6 +699,8 @@ OSKext::initialize(void)
         kOSKextLogGeneralFlag,
         "Kext system initialized.");
 
+    notifyKextLoadObservers(sKernelKext, sKernelKext->kmod_info);
+	
     return;
 }
 
@@ -628,7 +715,6 @@ OSKext::removeKextBootstrap(void)
     OSReturn                   result                = kOSReturnError;
     
     static bool                alreadyDone           = false;
-    boolean_t                  keepsyms              = FALSE;
 
     const char               * dt_kernel_header_name = "Kernel-__HEADER";
     const char               * dt_kernel_symtab_name = "Kernel-__SYMTAB";
@@ -639,11 +725,6 @@ OSKext::removeKextBootstrap(void)
     int                        dt_result             = 0;
 
     kernel_segment_command_t * seg_to_remove         = NULL;
-#if __ppc__ || __arm__
-    const char               * dt_segment_name       = NULL;
-    void                     * segment_paddress      = NULL;
-    int                        segment_size          = 0;
-#endif
 
    /* This must be the very first thing done by this function.
     */
@@ -661,8 +742,6 @@ OSKext::removeKextBootstrap(void)
         kOSKextLogGeneralFlag,
         "Jettisoning kext bootstrap segments.");
 
-    PE_parse_boot_argn("keepsyms", &keepsyms, sizeof(keepsyms));
- 
    /*****
     * Dispose of unnecessary stuff that the booter didn't need to load.
     */
@@ -688,21 +767,17 @@ OSKext::removeKextBootstrap(void)
         OSRuntimeUnloadCPPForSegment(seg_to_remove);
     }
 
-#if __ppc__ || __arm__
-   /* Free the memory that was set up by bootx.
-    */
-    dt_segment_name = "Kernel-__KLD";
-    if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) {
-        IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress,
-            (int)segment_size);
-    }
-#elif __i386__ || __x86_64__
+#if   __i386__ || __x86_64__
    /* On x86, use the mapping data from the segment load command to
     * unload KLD directly.
     * This may invalidate any assumptions about  "avail_start"
     * defining the lower bound for valid physical addresses.
     */
     if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) {
+    	// 04/18/11 - gab: <rdar://problem/9236163>
+    	// overwrite memory occupied by KLD segment with random data before
+    	// releasing it.
+    	read_random((void *) seg_to_remove->vmaddr, seg_to_remove->vmsize);
         ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize);
     }
 #else
@@ -711,7 +786,7 @@ OSKext::removeKextBootstrap(void)
 
     seg_to_remove = NULL;
 
-   /*****
+    /*****
     * Prelinked kernel's symtab (if there is one).
     */
     kernel_section_t * sect;
@@ -720,36 +795,101 @@ OSKext::removeKextBootstrap(void)
         ml_static_mfree(sect->addr, sect->size);
     }
 
-   /*****
-    * Dump the LINKEDIT segment, unless keepsyms is set.
-    */
-    if (!keepsyms) {
-        seg_to_remove = (kernel_segment_command_t *)getsegbyname("__LINKEDIT");
-        if (seg_to_remove) {
-            OSRuntimeUnloadCPPForSegment(seg_to_remove);
+    seg_to_remove = (kernel_segment_command_t *)getsegbyname("__LINKEDIT");
+
+    /* kxld always needs the kernel's __LINKEDIT segment, but we can make it
+     * pageable, unless keepsyms is set.  To do that, we have to copy it from
+     * its booter-allocated memory, free the booter memory, reallocate proper
+     * managed memory, then copy the segment back in.
+     */
+#if CONFIG_KXLD
+    if (!sKeepSymbols) {
+        kern_return_t mem_result;
+        void *seg_copy = NULL;
+        void *seg_data = NULL;
+        vm_map_offset_t seg_offset = 0;
+        vm_map_offset_t seg_copy_offset = 0;
+        vm_map_size_t seg_length = 0;
+
+        seg_data = (void *) seg_to_remove->vmaddr;
+        seg_offset = (vm_map_offset_t) seg_to_remove->vmaddr;
+        seg_length = (vm_map_size_t) seg_to_remove->vmsize;
+
+       /* Allocate space for the LINKEDIT copy.
+        */
+        mem_result = kmem_alloc(kernel_map, (vm_offset_t *) &seg_copy,
+            seg_length);
+        if (mem_result != KERN_SUCCESS) {
+            OSKextLog(/* kext */ NULL,
+                kOSKextLogErrorLevel |
+                kOSKextLogGeneralFlag | kOSKextLogArchiveFlag,
+                "Can't copy __LINKEDIT segment for VM reassign.");
+            goto finish;
         }
+        seg_copy_offset = (vm_map_offset_t) seg_copy;
 
-#if __ppc__ || __arm__
-        dt_segment_name = "Kernel-__LINKEDIT";
-        if (0 == IODTGetLoaderInfo(dt_segment_name,
-            &segment_paddress, &segment_size)) {
+       /* Copy it out.
+        */
+        memcpy(seg_copy, seg_data, seg_length);
+        
+       /* Dump the booter memory.
+        */
+        ml_static_mfree(seg_offset, seg_length);
 
-            IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress,
-                (int)segment_size);
+       /* Set up the VM region.
+        */
+        mem_result = vm_map_enter_mem_object(
+            kernel_map,
+            &seg_offset,
+            seg_length, /* mask */ 0, 
+            VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, 
+            (ipc_port_t)NULL,
+            (vm_object_offset_t) 0,
+            /* copy */ FALSE,
+            /* cur_protection */ VM_PROT_ALL,
+            /* max_protection */ VM_PROT_ALL,
+            /* inheritance */ VM_INHERIT_DEFAULT);
+        if ((mem_result != KERN_SUCCESS) || 
+            (seg_offset != (vm_map_offset_t) seg_data))
+        {
+            OSKextLog(/* kext */ NULL,
+                kOSKextLogErrorLevel |
+                kOSKextLogGeneralFlag | kOSKextLogArchiveFlag,
+                "Can't create __LINKEDIT VM entry at %p, length 0x%llx (error 0x%x).",
+                seg_data, seg_length, mem_result);
+            goto finish;
         }
-#elif __i386__ || __x86_64__
+
+       /* And copy it back.
+        */
+        memcpy(seg_data, seg_copy, seg_length);
+
+       /* Free the copy.
+        */
+        kmem_free(kernel_map, seg_copy_offset, seg_length);
+    }
+#else /* we are not CONFIG_KXLD */
+
+    /*****
+    * Dump the LINKEDIT segment, unless keepsyms is set.
+    */
+    if (!sKeepSymbols) {
+#if   __i386__ || __x86_64__
         if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) {
             ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize);
         }
-#else
+#else /* from if __arm__ */
+
 #error arch
-#endif
+#endif /* from if __arm__ */
+
     } else {
         OSKextLog(/* kext */ NULL,
-            kOSKextLogBasicLevel |
-            kOSKextLogGeneralFlag,
-            "keepsyms boot arg specified; keeping linkedit segment for symbols.");
+           kOSKextLogBasicLevel |
+           kOSKextLogGeneralFlag,
+           "keepsyms boot arg specified; keeping linkedit segment for symbols.");
     }
+#endif /* CONFIG_KXLD */
 
     seg_to_remove = NULL;
 
@@ -866,13 +1006,61 @@ OSKext::setKextdActive(Boolean active)
     IORecursiveLockLock(sKextLock);
     sKextdActive = active;
     if (sKernelRequests->getCount()) {
-        OSKextPingKextd();
+        OSKext::pingKextd();
     }
     IORecursiveLockUnlock(sKextLock);
 
     return;
 }
 
+/*********************************************************************
+* OSKextLib.cpp might need access to this someday but for now it's
+* private.
+*********************************************************************/
+extern "C" {
+extern void ipc_port_release_send(ipc_port_t);
+};
+
+/* static */
+OSReturn
+OSKext::pingKextd(void)
+{
+    OSReturn    result     = kOSReturnError;
+#if !NO_KEXTD
+    mach_port_t kextd_port = IPC_PORT_NULL;
+
+    if (!sKextdActive) {
+        result = kOSKextReturnDisabled;  // basically unavailable
+        goto finish;
+    }
+
+    result = host_get_kextd_port(host_priv_self(), &kextd_port);
+    if (result != KERN_SUCCESS || !IPC_PORT_VALID(kextd_port)) {
+        OSKextLog(/* kext */ NULL,
+            kOSKextLogErrorLevel |
+            kOSKextLogIPCFlag,
+            "Can't get kextd port.");
+        goto finish;
+    }
+
+    result = kextd_ping(kextd_port);
+    if (result != KERN_SUCCESS) {
+        OSKextLog(/* kext */ NULL,
+            kOSKextLogErrorLevel |
+            kOSKextLogIPCFlag,
+            "kextd ping failed (0x%x).", (int)result);
+        goto finish;
+    }
+
+finish:
+    if (IPC_PORT_VALID(kextd_port)) {
+        ipc_port_release_send(kextd_port);
+    }
+#endif
+
+    return result;
+}
+
 /*********************************************************************
 *********************************************************************/
 /* static */
@@ -893,7 +1081,9 @@ OSKext::setDeferredLoadSucceeded(Boolean succeeded)
 void
 OSKext::willShutdown(void)
 {
+#if !NO_KEXTD
     OSReturn       checkResult = kOSReturnError;
+#endif
     OSDictionary * exitRequest = NULL;  // must release
 
     IORecursiveLockLock(sKextLock);
@@ -903,6 +1093,7 @@ OSKext::willShutdown(void)
     OSKext::setAutounloadsEnabled(false);
     OSKext::setKernelRequestsEnabled(false);
 
+#if !NO_KEXTD
     OSKextLog(/* kext */ NULL,
         kOSKextLogProgressLevel |
         kOSKextLogGeneralFlag,
@@ -917,9 +1108,11 @@ OSKext::willShutdown(void)
         goto finish;
     }
 
-    OSKextPingKextd();
+    OSKext::pingKextd();
 
 finish:
+#endif
+
     IORecursiveLockUnlock(sKextLock);
 
     OSSafeRelease(exitRequest);
@@ -1129,15 +1322,12 @@ OSKext::initWithPrelinkedInfoDict(
     OSDictionary * anInfoDict)
 {
     bool            result              = false;
-    kern_return_t   alloc_result        = KERN_SUCCESS;
     OSString      * kextPath            = NULL;  // do not release
     OSNumber      * addressNum          = NULL;  // reused; do not release
     OSNumber      * lengthNum           = NULL;  // reused; do not release
     void          * data                = NULL;  // do not free
     void          * srcData             = NULL;  // do not free
     OSData        * prelinkedExecutable = NULL;  // must release
-    void          * linkStateCopy       = NULL;  // kmem_free on error
-    uint32_t        linkStateLength     = 0;
     uint32_t        length              = 0;     // reused
 
     if (!super::init()) {
@@ -1153,62 +1343,19 @@ OSKext::initWithPrelinkedInfoDict(
         goto finish;
     }
 
-   /* Don't need the path to be in the info dictionary any more.
+   /* Also get the executable's bundle-relative path if present.
+    * Don't look for an arch-specific path property.
     */
-    anInfoDict->removeObject(kPrelinkBundlePathKey);
+    executableRelPath = OSDynamicCast(OSString,
+        anInfoDict->getObject(kPrelinkExecutableRelativePathKey));
+    if (executableRelPath) {
+        executableRelPath->retain();
+    }
 
-   /* If we have a link state, create an OSData wrapper for it.
+   /* Don't need the paths to be in the info dictionary any more.
     */
-    addressNum = OSDynamicCast(OSNumber,
-        anInfoDict->getObject(kPrelinkLinkStateKey));
-    if (addressNum) {
-        lengthNum = OSDynamicCast(OSNumber, 
-            anInfoDict->getObject(kPrelinkLinkStateSizeKey));
-        if (!lengthNum) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogArchiveFlag,
-                "Kext %s can't find prelinked kext link state size.",
-                getIdentifierCString());
-            goto finish;
-        }
-
-        data = (void *) (intptr_t) (addressNum->unsigned64BitValue());
-        linkStateLength = (uint32_t) (lengthNum->unsigned32BitValue());
-
-        anInfoDict->removeObject(kPrelinkLinkStateKey);
-        anInfoDict->removeObject(kPrelinkLinkStateSizeKey);
-
-       /* Copy the link state out of the booter-provided memory so it is in
-        * the VM system and we can page it out.
-        */
-        alloc_result = kmem_alloc_pageable(kernel_map,
-            (vm_offset_t *)&linkStateCopy, linkStateLength);
-        if (alloc_result != KERN_SUCCESS) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogArchiveFlag,
-                "Kext %s failed to copy prelinked link state.",
-                getIdentifierCString());
-            goto finish;
-        }
-        memcpy(linkStateCopy, data, linkStateLength);
-
-        linkState = OSData::withBytesNoCopy(linkStateCopy, linkStateLength);
-        if (!linkState) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogArchiveFlag,
-                "Kext %s failed to create link state wrapper.",
-                getIdentifierCString());
-            goto finish;
-        }
-        linkState->setDeallocFunction(osdata_kmem_free);
-
-       /* Clear linkStateCopy; the OSData owns it now so we mustn't free it.
-        */
-        linkStateCopy = NULL;
-    }
+    anInfoDict->removeObject(kPrelinkBundlePathKey);
+    anInfoDict->removeObject(kPrelinkExecutableRelativePathKey);
 
    /* Create an OSData wrapper around the linked executable.
     */
@@ -1241,6 +1388,8 @@ OSKext::initWithPrelinkedInfoDict(
 
             if (data != srcData) {
 #if __LP64__
+                kern_return_t alloc_result;
+
                 alloc_result = kext_alloc((vm_offset_t *)&data, length, /* fixed */ TRUE);
                 if (alloc_result != KERN_SUCCESS) {
                     OSKextLog(this,
@@ -1263,11 +1412,6 @@ OSKext::initWithPrelinkedInfoDict(
             anInfoDict->removeObject(kPrelinkExecutableSourceKey);
         }
 
-       /* We don't need to set a dealloc function for the linked executable
-        * because it is freed separately in OSKext::unload(), which must unwire
-        * part of the memory.
-        * xxx - do we *have* to do it that way?
-        */
         prelinkedExecutable = OSData::withBytesNoCopy(data, length);
         if (!prelinkedExecutable) {
             OSKextLog(this,
@@ -1277,6 +1421,7 @@ OSKext::initWithPrelinkedInfoDict(
                 getIdentifierCString());
             goto finish;
         }
+        prelinkedExecutable->setDeallocFunction(osdata_kext_free);
         setLinkedExecutable(prelinkedExecutable);
 
         addressNum = OSDynamicCast(OSNumber,
@@ -1316,13 +1461,6 @@ OSKext::initWithPrelinkedInfoDict(
     result = registerIdentifier();
 
 finish:
-
-   /* If we didn't hand linkStateCopy off to an OSData, free it.
-    */
-    if (linkStateCopy) {
-        kmem_free(kernel_map, (vm_offset_t)linkStateCopy, linkStateLength);
-    }
-    
     OSSafeRelease(prelinkedExecutable);
 
     return result;
@@ -1367,7 +1505,7 @@ OSKext::initWithBooterData(
     void                * executableAddr = NULL;  // do not free
     char                * bundlePathAddr = NULL;  // do not free
 
-    OSObject            * parsedXML = NULL;  // must release
+    OSObject            * parsedXML      = NULL;  // must release
     OSDictionary        * theInfoDict    = NULL;  // do not release
     OSString            * kextPath       = NULL;  // must release
     OSString            * errorString    = NULL;  // must release
@@ -1527,6 +1665,8 @@ OSKext::registerIdentifier(void)
     OSData        * newUUID             = NULL;  // must release
     OSData        * existingUUID        = NULL;  // must release
 
+    IORecursiveLockLock(sKextLock);
+
    /* Get the new kext's version for checks & log messages.
     */
     newVersion = getVersion();
@@ -1691,6 +1831,8 @@ OSKext::registerIdentifier(void)
 
 finish:
 
+    IORecursiveLockUnlock(sKextLock);
+
     if (result) {
         OSKextLog(this,
             kOSKextLogStepLevel |
@@ -1708,21 +1850,20 @@ finish:
 /*********************************************************************
 * Does the bare minimum validation to look up a kext.
 * All other validation is done on the spot as needed.
-*
-* No need for lock, only called from init
 **********************************************************************/
 bool
 OSKext::setInfoDictionaryAndPath(
     OSDictionary * aDictionary,
     OSString     * aPath)
 {
-    bool          result                   = false;
-    OSString    * bundleIDString           = NULL;  // do not release
-    OSString    * versionString            = NULL;  // do not release
-    OSString    * compatibleVersionString  = NULL;  // do not release
-    const char  * versionCString           = NULL;  // do not free
-    const char  * compatibleVersionCString = NULL;  // do not free
-    OSBoolean   * scratchBool              = NULL;  // do not release
+    bool           result                   = false;
+    OSString     * bundleIDString           = NULL;  // do not release
+    OSString     * versionString            = NULL;  // do not release
+    OSString     * compatibleVersionString  = NULL;  // do not release
+    const char   * versionCString           = NULL;  // do not free
+    const char   * compatibleVersionCString = NULL;  // do not free
+    OSBoolean    * scratchBool              = NULL;  // do not release
+    OSDictionary * scratchDict              = NULL;  // do not release
 
     if (infoDict) {
         panic("Attempt to set info dictionary on a kext "
@@ -1845,13 +1986,13 @@ OSKext::setInfoDictionaryAndPath(
     */
     scratchBool = OSDynamicCast(OSBoolean,
         getPropertyForHostArch(kOSBundleIsInterfaceKey));
-    if (scratchBool && scratchBool->isTrue()) {
+    if (scratchBool == kOSBooleanTrue) {
         flags.interface = 1;
     }
     
     scratchBool = OSDynamicCast(OSBoolean,
         getPropertyForHostArch(kOSKernelResourceKey));
-    if (scratchBool && scratchBool->isTrue()) {
+    if (scratchBool == kOSBooleanTrue) {
         flags.kernelComponent = 1;
         flags.interface = 1;  // xxx - hm. the kernel itself isn't an interface...
         flags.started = 1;
@@ -1861,6 +2002,14 @@ OSKext::setInfoDictionaryAndPath(
         flags.hasAllDependencies = 1;
     }
 
+   /* Make sure common string values in personalities are uniqued to OSSymbols.
+    */
+    scratchDict = OSDynamicCast(OSDictionary, 
+        getPropertyForHostArch(kIOKitPersonalitiesKey));
+    if (scratchDict) {
+        uniquePersonalityProperties(scratchDict);
+    }
+
     result = true;
 
 finish:
@@ -1917,6 +2066,95 @@ finish:
     return result;
 }
 
+/*********************************************************************
+*********************************************************************/
+static void
+uniqueStringPlistProperty(OSDictionary * dict, const char * key)
+{
+    OSString       * stringValue = NULL;  // do not release
+    const OSSymbol * symbolValue = NULL;  // must release
+
+    stringValue = OSDynamicCast(OSString, dict->getObject(key));
+    if (!stringValue) {
+        goto finish;
+    }
+    
+    symbolValue = OSSymbol::withString(stringValue);
+    if (!symbolValue) {
+        goto finish;
+    }
+
+    dict->setObject(key, symbolValue);
+    
+finish:
+    if (symbolValue) symbolValue->release();
+
+    return;
+}
+
+/*********************************************************************
+*********************************************************************/
+static void
+uniqueStringPlistProperty(OSDictionary * dict, const OSString * key)
+{
+    OSString       * stringValue = NULL;  // do not release
+    const OSSymbol * symbolValue = NULL;  // must release
+
+    stringValue = OSDynamicCast(OSString, dict->getObject(key));
+    if (!stringValue) {
+        goto finish;
+    }
+    
+    symbolValue = OSSymbol::withString(stringValue);
+    if (!symbolValue) {
+        goto finish;
+    }
+
+    dict->setObject(key, symbolValue);
+    
+finish:
+    if (symbolValue) symbolValue->release();
+
+    return;
+}
+
+/*********************************************************************
+* Replace common personality property values with uniqued instances
+* to save on wired memory.
+*********************************************************************/
+/* static */
+void
+OSKext::uniquePersonalityProperties(OSDictionary * personalityDict)
+{
+   /* Properties every personality has.
+    */
+    uniqueStringPlistProperty(personalityDict, kCFBundleIdentifierKey);
+    uniqueStringPlistProperty(personalityDict, kIOProviderClassKey);
+    uniqueStringPlistProperty(personalityDict, gIOClassKey);
+    
+   /* Other commonly used properties.
+    */
+    uniqueStringPlistProperty(personalityDict, gIOMatchCategoryKey);
+    uniqueStringPlistProperty(personalityDict, gIOResourceMatchKey);
+    uniqueStringPlistProperty(personalityDict, gIOUserClientClassKey);
+
+    uniqueStringPlistProperty(personalityDict, "HIDDefaultBehavior");
+    uniqueStringPlistProperty(personalityDict, "HIDPointerAccelerationType");
+    uniqueStringPlistProperty(personalityDict, "HIDRemoteControlType");
+    uniqueStringPlistProperty(personalityDict, "HIDScrollAccelerationType");
+    uniqueStringPlistProperty(personalityDict, "IOPersonalityPublisher"); 
+    uniqueStringPlistProperty(personalityDict, "Physical Interconnect");
+    uniqueStringPlistProperty(personalityDict, "Physical Interconnect Location");
+    uniqueStringPlistProperty(personalityDict, "Vendor");
+    uniqueStringPlistProperty(personalityDict, "Vendor Identification");
+    uniqueStringPlistProperty(personalityDict, "Vendor Name");
+    uniqueStringPlistProperty(personalityDict, "bConfigurationValue");
+    uniqueStringPlistProperty(personalityDict, "bInterfaceNumber");
+    uniqueStringPlistProperty(personalityDict, "idProduct");
+
+    return;
+}
+
 /*********************************************************************
 *********************************************************************/
 void
@@ -1929,8 +2167,8 @@ OSKext::free(void)
     OSSafeRelease(infoDict);
     OSSafeRelease(bundleID);
     OSSafeRelease(path);
+    OSSafeRelease(executableRelPath);
     OSSafeRelease(dependencies);
-    OSSafeRelease(linkState);
     OSSafeRelease(linkedExecutable);
     OSSafeRelease(metaClasses);
     OSSafeRelease(interfaceUUID);
@@ -2032,7 +2270,7 @@ OSKext::readMkext1Archive(
     uint32_t        numKexts            = 0;
     
     OSData        * infoDictDataObject  = NULL;  // must release
-    OSObject      * parsedXML      = NULL;  // must release
+    OSObject      * parsedXML           = NULL;  // must release
     OSDictionary  * infoDict            = NULL;  // do not release
     OSString      * errorString         = NULL;  // must release
     OSData        * mkextExecutableInfo = NULL;  // must release
@@ -2361,7 +2599,7 @@ OSKext::readMkext2Archive(
     OSString      * errorString                = NULL;  // must release
     OSData        * mkextPlistUncompressedData = NULL;  // must release
     const char    * mkextPlistDataBuffer       = NULL;  // do not free
-    OSObject      * parsedXML           = NULL;  // must release
+    OSObject      * parsedXML                  = NULL;  // must release
     OSDictionary  * mkextPlist                 = NULL;  // do not release
     OSArray       * mkextInfoDictArray         = NULL;  // do not release
     uint32_t        count, i;
@@ -2550,9 +2788,18 @@ OSKext::initWithMkext2Info(
         goto finish;
     }
 
-   /* Don't need the path to be in the info dictionary any more.
+   /* If we have a path to the executable, save it.
+    */
+    executableRelPath = OSDynamicCast(OSString,
+        anInfoDict->getObject(kMKEXTExecutableRelativePathKey));
+    if (executableRelPath) {
+        executableRelPath->retain();
+    }
+
+   /* Don't need the paths to be in the info dictionary any more.
     */
     anInfoDict->removeObject(kMKEXTBundlePathKey);
+    anInfoDict->removeObject(kMKEXTExecutableRelativePathKey);
 
     executableOffsetNum = OSDynamicCast(OSNumber,
         infoDict->getObject(kMKEXTExecutableKey));
@@ -2688,7 +2935,7 @@ OSKext::extractMkext2FileData(
        /* How's this for cheesy? The kernel is only asked to extract
         * kext plists so we tailor the log messages.
         */
-        if (this == sKernelKext) {
+        if (isKernel()) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
                 kOSKextLogArchiveFlag,
@@ -2705,7 +2952,7 @@ OSKext::extractMkext2FileData(
     }
     uncompressedData = OSData::withBytesNoCopy(uncompressedDataBuffer, fullSize);
     if (!uncompressedData) {
-        if (this == sKernelKext) {
+        if (isKernel()) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
                 kOSKextLogArchiveFlag,
@@ -2721,7 +2968,7 @@ OSKext::extractMkext2FileData(
     }
     uncompressedData->setDeallocFunction(&osdata_kmem_free);
 
-    if (this == sKernelKext) {
+    if (isKernel()) {
         OSKextLog(this,
             kOSKextLogDetailLevel |
             kOSKextLogArchiveFlag,
@@ -2747,7 +2994,7 @@ OSKext::extractMkext2FileData(
 
     zlib_result = inflateInit(&zstream);
     if (Z_OK != zlib_result) {
-        if (this == sKernelKext) {
+        if (isKernel()) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
                 kOSKextLogArchiveFlag,
@@ -2770,7 +3017,7 @@ OSKext::extractMkext2FileData(
     if (zlib_result == Z_STREAM_END || zlib_result == Z_OK) {
         uncompressedSize = zstream.total_out;
     } else {
-        if (this == sKernelKext) {
+        if (isKernel()) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
                 kOSKextLogArchiveFlag,
@@ -2793,7 +3040,7 @@ OSKext::extractMkext2FileData(
     }
 
     if (uncompressedSize != fullSize) {
-        if (this == sKernelKext) {
+        if (isKernel()) {
             OSKextLog(this,
                 kOSKextLogErrorLevel |
                 kOSKextLogArchiveFlag,
@@ -2954,7 +3201,7 @@ OSKext::loadFromMkext(
     }
 
     startKextExcludeNum = OSDynamicCast(OSNumber,
-        requestArgs->getObject(kKextKextRequestArgumentStartExcludeKey));
+        requestArgs->getObject(kKextRequestArgumentStartExcludeKey));
     startMatchingExcludeNum = OSDynamicCast(OSNumber,
         requestArgs->getObject(kKextRequestArgumentStartMatchingExcludeKey));
     delayAutounloadBool = OSDynamicCast(OSBoolean,
@@ -3243,6 +3490,7 @@ OSKext::removeKext(
     }
 
     if (aKext->isLoaded()) {
+
        /* If we are terminating, send the request to the IOCatalogue
         * (which will actually call us right back but that's ok we have
         * a recursive lock don't you know) but do not ask the IOCatalogue
@@ -3253,7 +3501,7 @@ OSKext::removeKext(
                 aKext->getIdentifierCString(), /* unload */ false);
             if (result != kOSReturnSuccess) {
                 OSKextLog(aKext,
-                    kOSKextLogProgressLevel |
+                    kOSKextLogErrorLevel |
                     kOSKextLogKextBookkeepingFlag,
                     "Can't remove kext %s; services failed to terminate - 0x%x.",
                     aKext->getIdentifierCString(), result);
@@ -3417,6 +3665,14 @@ OSKext::getCompatibleVersion(void)
     return compatibleVersion;
 }
 
+/*********************************************************************
+*********************************************************************/
+bool
+OSKext::isLibrary(void)
+{
+    return (getCompatibleVersion() > 0);
+}
+
 /*********************************************************************
 *********************************************************************/
 bool
@@ -3434,10 +3690,7 @@ OSKext::isCompatibleWithVersion(OSKextVersion aVersion)
 bool
 OSKext::declaresExecutable(void)
 {
-    if (getPropertyForHostArch(kCFBundleExecutableKey)) {
-        return true;
-     }
-     return false;
+    return (getPropertyForHostArch(kCFBundleExecutableKey) != NULL);
 }
 
 /*********************************************************************
@@ -3512,6 +3765,14 @@ OSKext::isInterface(void)
     return flags.interface;
 }
 
+/*********************************************************************
+*********************************************************************/
+bool
+OSKext::isKernel(void)
+{
+    return (this == sKernelKext);
+}
+
 /*********************************************************************
 *********************************************************************/
 bool
@@ -3520,6 +3781,14 @@ OSKext::isKernelComponent(void)
     return flags.kernelComponent ? true : false;
 }
 
+/*********************************************************************
+*********************************************************************/
+bool
+OSKext::isExecutable(void)
+{
+    return (!isKernel() && !isInterface() && declaresExecutable());
+}
+
 /*********************************************************************
 * We might want to check this recursively for all dependencies,
 * since a subtree of dependencies could get loaded before we hit
@@ -3538,6 +3807,10 @@ OSKext::isLoadableInSafeBoot(void)
     bool       result   = false;
     OSString * required = NULL;  // do not release
     
+    if (isKernel()) {
+        result = true;
+        goto finish;
+    }
     
     required = OSDynamicCast(OSString,
         getPropertyForHostArch(kOSBundleRequiredKey));
@@ -3604,6 +3877,28 @@ OSKext::getLoadTag(void)
     return loadTag;
 }
 
+/*********************************************************************
+ *********************************************************************/
+void OSKext::getSizeInfo(uint32_t *loadSize, uint32_t *wiredSize)
+{
+	if (linkedExecutable) {
+		*loadSize = linkedExecutable->getLength();
+           
+		/* If we have a kmod_info struct, calculated the wired size
+		 * from that. Otherwise it's the full load size.
+		 */
+		if (kmod_info) {
+			*wiredSize = *loadSize - kmod_info->hdr_size;
+		} else {
+			*wiredSize = *loadSize;
+		}
+	}
+	else {
+		*wiredSize = 0;
+		*loadSize = 0;
+	}
+}
+
 /*********************************************************************
 *********************************************************************/
 OSData *
@@ -3654,9 +3949,7 @@ finish:
 
 /*********************************************************************
 *********************************************************************/
-#if defined (__ppc__)
-#define ARCHNAME "ppc"
-#elif defined (__i386__)
+#if defined (__i386__)
 #define ARCHNAME "i386"
 #elif defined (__x86_64__)
 #define ARCHNAME "x86_64"
@@ -3734,6 +4027,7 @@ finish:
 #endif
 /*********************************************************************
 *********************************************************************/
+/* static */
 OSReturn
 OSKext::loadKextWithIdentifier(
     const char       * kextIdentifierCString,
@@ -3760,7 +4054,6 @@ finish:
     return result;
 }
 
-
 /*********************************************************************
 *********************************************************************/
 OSReturn
@@ -3773,6 +4066,7 @@ OSKext::loadKextWithIdentifier(
     OSArray           * personalityNames)
 {
     OSReturn          result               = kOSReturnError;
+    OSReturn          pingResult           = kOSReturnError;
     OSKext          * theKext              = NULL;  // do not release
     OSDictionary    * loadRequest          = NULL;  // must release
     const OSSymbol  * kextIdentifierSymbol = NULL;  // must release
@@ -3840,13 +4134,12 @@ OSKext::loadKextWithIdentifier(
                 kextIdentifier->getCStringNoCopy());
         }
 
-        if (sKextdActive) {
-            OSKextPingKextd();
-        } else {
+        pingResult = OSKext::pingKextd();
+        if (pingResult == kOSKextReturnDisabled) {
             OSKextLog(/* kext */ NULL,
                 ((sPrelinkBoot) ? kOSKextLogDebugLevel : kOSKextLogErrorLevel) |
                 kOSKextLogLoadFlag,
-                "Not loading kext %s - not found and kextd not available in early boot.",
+                "Kext %s might not load - kextd is currently unavailable.",
                 kextIdentifier->getCStringNoCopy());
         }
 
@@ -4106,7 +4399,7 @@ OSKext::load(
 
    /* Keep the kernel itself out of the kmod list.
     */
-    if (lastLoadedKext == sKernelKext) {
+    if (lastLoadedKext->isKernel()) {
         lastLoadedKext = NULL;
     }
 
@@ -4114,6 +4407,8 @@ OSKext::load(
         kmod_info->next = lastLoadedKext->kmod_info;
     }
 
+    notifyKextLoadObservers(this, kmod_info);
+
    /* Make the global kmod list point at the just-loaded kext. Note that the
     * __kernel__ kext isn't in this list, as it wasn't before SnowLeopard,
     * although we do report it in kextstat these days by using the newer
@@ -4127,19 +4422,30 @@ OSKext::load(
 
    /* Save the list of loaded kexts in case we panic.
     */
-    clock_get_uptime(&last_loaded_timestamp);
     OSKext::saveLoadedKextPanicList();
 
-loaded:
+    if (isExecutable()) {
+        OSKext::updateLoadedKextSummaries();
+        savePanicString(/* isLoading */ true);
 
-    if (declaresExecutable() && (startOpt == kOSKextExcludeNone)) {
-        result = start();
-        if (result != kOSReturnSuccess) {
-            OSKextLog(this,
-                kOSKextLogErrorLevel | kOSKextLogLoadFlag,
-                "Kext %s start failed (result 0x%x).",
-                getIdentifierCString(), result);
-            result = kOSKextReturnStartStopError;
+#if CONFIG_DTRACE
+        registerWithDTrace();
+#else
+        jettisonLinkeditSegment();
+#endif /* CONFIG_DTRACE */
+    }
+
+loaded:
+    if (isExecutable() && !flags.started) {
+        if (startOpt == kOSKextExcludeNone) {
+            result = start();
+            if (result != kOSReturnSuccess) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel | kOSKextLogLoadFlag,
+                    "Kext %s start failed (result 0x%x).",
+                    getIdentifierCString(), result);
+                result = kOSKextReturnStartStopError;
+            }
         }
     }
     
@@ -4151,6 +4457,7 @@ loaded:
     if (result == kOSReturnSuccess && startMatchingOpt == kOSKextExcludeNone) {
         result = sendPersonalitiesToCatalog(true, personalityNames);
     }
+
 finish:
 
    /* More hack! If the kext doesn't declare an executable, even if we
@@ -4183,10 +4490,37 @@ finish:
             kOSKextLogLoadFlag,
             "Kext %s loaded.",
             getIdentifierCString());
+
+        queueKextNotification(kKextRequestPredicateLoadNotification,
+            OSDynamicCast(OSString, bundleID));
     }
     return result;
 }
 
+/*********************************************************************
+* 
+*********************************************************************/
+static char * strdup(const char * string)
+{
+    char * result = NULL;
+    size_t size;
+    
+    if (!string) {
+        goto finish;
+    }
+    
+    size = 1 + strlen(string);
+    result = (char *)kalloc(size);
+    if (!result) {
+        goto finish;
+    }
+    
+    memcpy(result, string, size);
+
+finish:
+    return result;
+}
+
 /*********************************************************************
 * called only by load()
 *********************************************************************/
@@ -4195,13 +4529,11 @@ OSKext::loadExecutable()
 {
     OSReturn              result             = kOSReturnError;
     kern_return_t         kxldResult;
-    u_char            **  kxlddeps           = NULL;  // must kfree
+    KXLDDependency     *  kxlddeps           = NULL;  // must kfree
     uint32_t              num_kxlddeps       = 0;
+    OSArray            *  linkDependencies   = NULL;  // must release
+    uint32_t              numDirectDependencies   = 0;
     uint32_t              num_kmod_refs      = 0;
-    u_char              * linkStateBytes     = NULL;  // do not free
-    u_long                linkStateLength    = 0;
-    u_char             ** linkStateBytesPtr  = NULL;  // do not free
-    u_long              * linkStateLengthPtr = NULL;  // do not free
     struct mach_header ** kxldHeaderPtr      = NULL;  // do not free
     struct mach_header  * kxld_header        = NULL;  // xxx - need to free here?
     OSData              * theExecutable      = NULL;  // do not release
@@ -4221,6 +4553,7 @@ OSKext::loadExecutable()
 
     if (isKernelComponent()) {
        if (STRING_HAS_PREFIX(versCString, KERNEL_LIB_PREFIX)) {
+
            if (strncmp(versCString, KERNEL6_VERSION, strlen(KERNEL6_VERSION))) {
                 OSKextLog(this,
                     kOSKextLogErrorLevel |
@@ -4263,11 +4596,37 @@ OSKext::loadExecutable()
         goto register_kmod;
     }
 
-    if (isKernelComponent()) {
-        num_kxlddeps = 1; // the kernel itself
-    } else {
-        num_kxlddeps = getNumDependencies();
+    if (isInterface()) {
+        OSData *executableCopy = OSData::withData(theExecutable);
+        setLinkedExecutable(executableCopy);
+        executableCopy->release();
+        goto register_kmod;
     }
+
+    numDirectDependencies = getNumDependencies();
+
+    if (flags.hasBleedthrough) {
+        linkDependencies = dependencies;
+        linkDependencies->retain();
+    } else {
+        linkDependencies = OSArray::withArray(dependencies);
+        if (!linkDependencies) {
+            OSKextLog(this,
+                kOSKextLogErrorLevel |
+                kOSKextLogLoadFlag | kOSKextLogLinkFlag,
+                "Can't allocate link dependencies to load kext %s.",
+                getIdentifierCString());
+            goto finish;
+        }
+
+        for (i = 0; i < numDirectDependencies; ++i) {
+            OSKext * dependencyKext = OSDynamicCast(OSKext,
+                dependencies->getObject(i));
+            dependencyKext->addBleedthroughDependencies(linkDependencies);
+        }
+    } 
+
+    num_kxlddeps = linkDependencies->getCount();
     if (!num_kxlddeps) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -4276,7 +4635,8 @@ OSKext::loadExecutable()
             getIdentifierCString());
         goto finish;
     }
-    kxlddeps = (u_char **)kalloc(num_kxlddeps * sizeof(*kxlddeps));
+
+    kxlddeps = (KXLDDependency *)kalloc(num_kxlddeps * sizeof(*kxlddeps));
     if (!kxlddeps) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -4285,37 +4645,51 @@ OSKext::loadExecutable()
             getIdentifierCString());
         goto finish;
     }
-    
-    if (isKernelComponent()) {
-        OSData * kernelLinkState = OSKext::getKernelLinkState();
-        kxlddeps[0] = (u_char *)kernelLinkState->getBytesNoCopy();
-    } else for (i = 0; i < num_kxlddeps; i++) {
-        OSKext * dependency = OSDynamicCast(OSKext, dependencies->getObject(i));
-        if (!dependency->linkState) {
-            // xxx - maybe we should panic here
-            OSKextLog(this,
-                kOSKextLogErrorLevel |
-                kOSKextLogLoadFlag | kOSKextLogLinkFlag,
-                "Can't load kext %s - link state missing.",
-                getIdentifierCString());
-            goto finish;
+    bzero(kxlddeps, num_kxlddeps * sizeof(*kxlddeps));
+
+    for (i = 0; i < num_kxlddeps; ++i ) {
+        OSKext * dependency = OSDynamicCast(OSKext, linkDependencies->getObject(i));
+
+        if (dependency->isInterface()) {
+            OSKext *interfaceTargetKext = NULL;
+            OSData * interfaceTarget = NULL;
+
+            if (dependency->isKernelComponent()) {
+                interfaceTargetKext = sKernelKext;
+                interfaceTarget = sKernelKext->linkedExecutable;
+            } else {
+                interfaceTargetKext = OSDynamicCast(OSKext, 
+                    dependency->dependencies->getObject(0));
+
+                interfaceTarget = interfaceTargetKext->linkedExecutable;
+            }
+
+            if (!interfaceTarget) {
+                // panic?
+                goto finish;
+            }
+
+           /* The names set here aren't actually logged yet <rdar://problem/7941514>,
+            * it will be useful to have them in the debugger.
+            * strdup() failing isn't critical right here so we don't check that.
+            */
+            kxlddeps[i].kext = (u_char *) interfaceTarget->getBytesNoCopy();
+            kxlddeps[i].kext_size = interfaceTarget->getLength();
+            kxlddeps[i].kext_name = strdup(interfaceTargetKext->getIdentifierCString());
+
+            kxlddeps[i].interface = (u_char *) dependency->linkedExecutable->getBytesNoCopy();
+            kxlddeps[i].interface_size = dependency->linkedExecutable->getLength();
+            kxlddeps[i].interface_name = strdup(dependency->getIdentifierCString());
+        } else {
+            kxlddeps[i].kext = (u_char *) dependency->linkedExecutable->getBytesNoCopy();
+            kxlddeps[i].kext_size = dependency->linkedExecutable->getLength();
+            kxlddeps[i].kext_name = strdup(dependency->getIdentifierCString());
         }
-        kxlddeps[i] = (u_char *)dependency->linkState->getBytesNoCopy();
-        assert(kxlddeps[i]);
-    }
 
-   /* We only need link state for a library kext.
-    */
-    if (compatibleVersion > -1 && (declaresExecutable() || isKernelComponent())) {
-        linkStateBytesPtr = &linkStateBytes;
-        linkStateLengthPtr = &linkStateLength;
+        kxlddeps[i].is_direct_dependency = (i < numDirectDependencies);
     }
 
-   /* We only need the linked executable for a real kext.
-    */
-    if (!isInterface()) {
-        kxldHeaderPtr = &kxld_header;
-    }
+    kxldHeaderPtr = &kxld_header;
 
 #if DEBUG
     OSKextLog(this,
@@ -4326,13 +4700,11 @@ OSKext::loadExecutable()
         "    executable: %p    executable_length: %d\n"
         "    user_data: %p\n"
         "    kxld_dependencies: %p    num_dependencies: %d\n"
-        "    kxld_header_ptr: %p    kmod_info_ptr: %p\n"
-        "    link_state_ptr: %p    link_state_length_ptr: %p",
-        getIdentifierCString(), kxldContext,
+        "    kxld_header_ptr: %p    kmod_info_ptr: %p\n",
+        getIdentifierCString(), sKxldContext,
         theExecutable->getBytesNoCopy(), theExecutable->getLength(),
         this, kxlddeps, num_kxlddeps,
-        kxldHeaderPtr, kernelKmodInfoPtr,
-        linkStateBytesPtr, linkStateLengthPtr);
+        kxldHeaderPtr, &kmod_info);
 #endif
 
    /* After this call, the linkedExecutable instance variable
@@ -4342,9 +4714,7 @@ OSKext::loadExecutable()
         (u_char *)theExecutable->getBytesNoCopy(),
         theExecutable->getLength(),
         getIdentifierCString(), this, kxlddeps, num_kxlddeps,
-        (u_char **)kxldHeaderPtr, (kxld_addr_t *)&kmod_info,
-        linkStateBytesPtr, linkStateLengthPtr,
-        /* symbolFile */ NULL, /* symbolFileSize */ NULL);
+        (u_char **)kxldHeaderPtr, (kxld_addr_t *)&kmod_info);
 
     if (kxldResult != KERN_SUCCESS) {
         // xxx - add kxldResult here?
@@ -4356,24 +4726,15 @@ OSKext::loadExecutable()
         result = kOSKextReturnLinkError;
         goto finish;
     }
-
-   /* If we got a link state, wrap it in an OSData and keep it
-    * around for later use linking other kexts that depend on this kext.
-    */
-    if (linkStateBytes && linkStateLength > 0) {
-        linkState = OSData::withBytesNoCopy(linkStateBytes, linkStateLength);
-        assert(linkState);
-        linkState->setDeallocFunction(&osdata_kmem_free);
-    }
     
-   /* If this isn't an interface, We've written data & instructions into kernel 
-    * memory, so flush the data cache and invalidate the instruction cache.
+   /* We've written data & instructions into kernel memory, so flush the data
+    * cache and invalidate the instruction cache.
+    * I/D caches are coherent on x86
     */
-    if (!isInterface()) {
-        flush_dcache(kmod_info->address, kmod_info->size, false);
-        invalidate_icache(kmod_info->address, kmod_info->size, false);
-    }
-
+#if	!defined(__i386__) && !defined(__x86_64__)
+    flush_dcache(kmod_info->address, kmod_info->size, false);
+    invalidate_icache(kmod_info->address, kmod_info->size, false);
+#endif
 register_kmod:
 
     if (isInterface()) {
@@ -4454,7 +4815,23 @@ register_kmod:
     result = kOSReturnSuccess;
 
 finish:
-    if (kxlddeps) kfree(kxlddeps, (num_kxlddeps * sizeof(void *)));
+    OSSafeRelease(linkDependencies);
+
+   /* Clear up locally allocated dependency info.
+    */
+    for (i = 0; i < num_kxlddeps; ++i ) {
+        size_t size;
+
+        if (kxlddeps[i].kext_name) {
+            size = 1 + strlen(kxlddeps[i].kext_name);
+            kfree(kxlddeps[i].kext_name, size);
+        }
+        if (kxlddeps[i].interface_name) {
+            size = 1 + strlen(kxlddeps[i].interface_name);
+            kfree(kxlddeps[i].interface_name, size);
+        }
+    }
+    if (kxlddeps) kfree(kxlddeps, (num_kxlddeps * sizeof(*kxlddeps)));
 
    /* We no longer need the unrelocated executable (which the linker
     * has altered anyhow).
@@ -4486,7 +4863,131 @@ finish:
 }
 
 /*********************************************************************
-* xxx - initWithPrelinkedInfoDict doesn't use this
+* The linkedit segment is used by the kext linker for dependency
+* resolution, and by dtrace for probe initialization. We can free it
+* for non-library kexts, since no kexts depend on non-library kexts
+* by definition, once dtrace has been initialized.
+*********************************************************************/
+void
+OSKext::jettisonLinkeditSegment(void)
+{
+    kernel_mach_header_t     * machhdr = (kernel_mach_header_t *)kmod_info->address;
+    kernel_segment_command_t * linkedit = NULL;
+    vm_size_t                  linkeditsize, kextsize;
+    OSData                   * data = NULL;
+
+    if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable) {
+        goto finish;
+    }
+
+   /* Find the linkedit segment.  If it's not the last segment, then freeing
+    * it will fragment the kext into multiple VM regions, which OSKext is not
+    * designed to handle, so we'll have to skip it.
+    */
+    linkedit = getsegbynamefromheader(machhdr, SEG_LINKEDIT);
+    if (!linkedit) {
+        goto finish;
+    }
+
+    if (round_page(kmod_info->address + kmod_info->size) !=
+        round_page(linkedit->vmaddr + linkedit->vmsize))
+    {
+        goto finish;
+    }
+
+   /* Create a new OSData for the smaller kext object.
+    */
+    linkeditsize = round_page(linkedit->vmsize);
+    kextsize = kmod_info->size - linkeditsize;
+
+    data = OSData::withBytesNoCopy((void *)kmod_info->address, kextsize);
+    if (!data) {
+        goto finish;
+    }
+    data->setDeallocFunction(osdata_kext_free);
+
+   /* Rewrite the Mach-O headers.
+    */
+    if (KERN_SUCCESS != removeLinkeditHeaders(linkedit)) {
+        goto finish;
+    }
+
+   /* Fix the kmod info and linkedExecutable.
+    */
+    kmod_info->size = kextsize;
+    linkedExecutable->setDeallocFunction(NULL);
+    linkedExecutable->release();
+    linkedExecutable = data;
+
+   /* Free the linkedit segment.
+    */
+    kext_free(linkedit->vmaddr, linkeditsize);
+
+finish:
+    return;
+}
+
+/*********************************************************************
+*********************************************************************/
+OSReturn
+OSKext::removeLinkeditHeaders(kernel_segment_command_t *linkedit)
+{
+    OSReturn               result  = KERN_FAILURE;
+    kernel_mach_header_t * machhdr = (kernel_mach_header_t *)kmod_info->address;
+    vm_map_t               kext_map;
+    u_char               * src, * dst;
+    uint32_t               cmdsize, ncmds;
+    u_int                  i = 0;
+
+    kext_map = kext_get_vm_map(kmod_info);
+    if (!kext_map) {
+        result = KERN_MEMORY_ERROR;
+        goto finish;
+    }
+
+    result = vm_map_protect(kext_map, kmod_info->address,
+        kmod_info->address + kmod_info->hdr_size, VM_PROT_DEFAULT, TRUE);
+    if (result != KERN_SUCCESS) {
+        goto finish;
+    }
+
+    ncmds = machhdr->ncmds;
+    src = dst = (u_char *)(kmod_info->address + sizeof(*machhdr));
+
+    for (i = 0; i < ncmds; ++i, src += cmdsize) {
+        struct load_command * lc = (struct load_command *) src;
+        cmdsize = lc->cmdsize;
+
+        switch (lc->cmd) {
+        case LC_SEGMENT:
+        case LC_SEGMENT_64:
+            if (src != (u_char *)linkedit) break;
+            /* FALLTHROUGH */
+        case LC_SYMTAB:
+        case LC_DYSYMTAB:
+            bzero(src, cmdsize);
+            machhdr->ncmds--;
+            machhdr->sizeofcmds -= cmdsize;
+            continue;
+        }
+
+        memmove(dst, src, cmdsize);
+        dst += cmdsize;
+    }
+
+    result = vm_map_protect(kext_map, kmod_info->address,
+        kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE);
+    if (result != KERN_SUCCESS) {
+        goto finish;
+    }
+
+    result = KERN_SUCCESS;
+
+finish:
+    return result;
+}
+
+/*********************************************************************
 *********************************************************************/
 void
 OSKext::setLinkedExecutable(OSData * anExecutable)
@@ -4501,6 +5002,73 @@ OSKext::setLinkedExecutable(OSData * anExecutable)
     return;
 }
 
+#if CONFIG_DTRACE
+/*********************************************************************
+* Go through all loaded kexts and tell them to register with dtrace.
+* The instance method only registers if necessary.
+*********************************************************************/
+/* static */
+void
+OSKext::registerKextsWithDTrace(void)
+{
+    uint32_t count = sLoadedKexts->getCount();
+    uint32_t i;
+
+    IORecursiveLockLock(sKextLock);
+
+    for (i = 0; i < count; i++) {
+        OSKext   * thisKext     = NULL;  // do not release
+
+        thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
+        if (!thisKext || !thisKext->isExecutable()) {
+            continue;
+        }
+
+        thisKext->registerWithDTrace();
+    }
+
+    IORecursiveLockUnlock(sKextLock);
+
+    return;
+}
+
+extern "C" {
+    extern int (*dtrace_modload)(struct kmod_info *);
+    extern int (*dtrace_modunload)(struct kmod_info *);
+};
+
+/*********************************************************************
+*********************************************************************/
+void
+OSKext::registerWithDTrace(void)
+{
+   /* Register kext with dtrace. A dtrace_modload failure should not
+    * prevent a kext from loading, so we ignore the return code.
+    */
+    if (!flags.dtraceInitialized && (dtrace_modload != NULL)) {
+        (void)(*dtrace_modload)(kmod_info);
+        flags.dtraceInitialized = true;
+        jettisonLinkeditSegment();
+    }
+    return;
+}
+/*********************************************************************
+*********************************************************************/
+void
+OSKext::unregisterWithDTrace(void)
+{
+   /* Unregister kext with dtrace. A dtrace_modunload failure should not
+    * prevent a kext from loading, so we ignore the return code.
+    */
+    if (flags.dtraceInitialized && (dtrace_modunload != NULL)) {
+        (void)(*dtrace_modunload)(kmod_info);
+        flags.dtraceInitialized = false;
+    }
+    return;
+}
+#endif /* CONFIG_DTRACE */
+
+
 /*********************************************************************
 * called only by loadExecutable()
 *********************************************************************/
@@ -4566,9 +5134,11 @@ OSKext::setVMProtections(void)
             goto finish;
         }
 
-        result = vm_map_wire(kext_map, start, end, seg->initprot, FALSE);
-        if (result != KERN_SUCCESS) {
-            goto finish;
+        if (segmentShouldBeWired(seg)) {
+            result = vm_map_wire(kext_map, start, end, seg->initprot, FALSE);
+            if (result != KERN_SUCCESS) {
+                goto finish;
+            }
         }
 
         seg = nextsegfromheader((kernel_mach_header_t *) kmod_info->address, seg);
@@ -4578,6 +5148,14 @@ finish:
     return result;
 }
 
+/*********************************************************************
+*********************************************************************/
+boolean_t 
+OSKext::segmentShouldBeWired(kernel_segment_command_t *seg)
+{
+    return (sKeepSymbols || strncmp(seg->segname, SEG_LINKEDIT, sizeof(seg->segname)));
+}
+
 /*********************************************************************
 *********************************************************************/
 OSReturn
@@ -4587,6 +5165,7 @@ OSKext::validateKextMapping(bool startFlag)
     const char                          * whichOp = startFlag ? "start" : "stop";
     kern_return_t                         kern_result = 0;
     vm_map_t                              kext_map    = NULL;
+    kernel_segment_command_t            * seg         = NULL;
     mach_vm_address_t                     address     = 0;
     mach_vm_size_t                        size        = 0;
     uint32_t                              depth       = 0;
@@ -4682,23 +5261,18 @@ OSKext::validateKextMapping(bool startFlag)
             goto finish;
         }
 
-       /* Verify that the kext is backed by physical memory.
+       /* Verify that the kext's segments are backed by physical memory.
         */
-        for (address = kmod_info->address;
-             address < round_page(kmod_info->address + kmod_info->size);
-             address += PAGE_SIZE)
-        {
-            if (!pmap_find_phys(kernel_pmap, (vm_offset_t)address)) {
-                OSKextLog(this,
-                    kOSKextLogErrorLevel |
-                    kOSKextLogLoadFlag,
-                    "Kext %s - page %p is not backed by physical memory.",
-                    getIdentifierCString(), 
-                    (void *)address);
+        seg = firstsegfromheader((kernel_mach_header_t *)kmod_info->address);
+        while (seg) {
+            if (!verifySegmentMapping(seg)) {
                 result = kOSKextReturnBadData;
                 goto finish;
             }
+
+            seg = nextsegfromheader((kernel_mach_header_t *) kmod_info->address, seg);
         }
+
     }
 
     result = kOSReturnSuccess;
@@ -4706,6 +5280,33 @@ finish:
     return result;
 }
 
+/*********************************************************************
+*********************************************************************/
+boolean_t
+OSKext::verifySegmentMapping(kernel_segment_command_t *seg)
+{
+    mach_vm_address_t address = 0;
+
+    if (!segmentShouldBeWired(seg)) return true;
+
+    for (address = seg->vmaddr;
+         address < round_page(seg->vmaddr + seg->vmsize);
+         address += PAGE_SIZE)
+    {
+        if (!pmap_find_phys(kernel_pmap, (vm_offset_t)address)) {
+            OSKextLog(this,
+                kOSKextLogErrorLevel |
+                kOSKextLogLoadFlag,
+                "Kext %s - page %p is not backed by physical memory.",
+                getIdentifierCString(), 
+                (void *)address);
+            return false;
+        }
+    }
+
+    return true;
+}
+
 /*********************************************************************
 *********************************************************************/
 OSReturn
@@ -4805,14 +5406,14 @@ OSKext::start(bool startDependenciesFlag)
 
     flags.starting = 1;
 
-#if !__i386__ && !__ppc__
+#if !CONFIG_STATIC_CPPINIT
     result = OSRuntimeInitializeCPP(kmod_info, NULL);
     if (result == KERN_SUCCESS) {
 #endif
 
         result = startfunc(kmod_info, kmodStartData);
 
-#if !__i386__ && !__ppc__
+#if !CONFIG_STATIC_CPPINIT
         if (result != KERN_SUCCESS) {
             (void) OSRuntimeFinalizeCPP(kmod_info, NULL);
         }
@@ -4951,10 +5552,6 @@ OSKext::stop(void)
         goto finish;
     }
 
-   /* Save the list of loaded kexts in case we panic.
-    */
-    OSKext::saveUnloadedKextPanicList(this);
-
     stopfunc = kmod_info->stop;
     if (stopfunc) {
         OSKextLog(this,
@@ -4966,7 +5563,7 @@ OSKext::stop(void)
         flags.stopping = 1;
 
         result = stopfunc(kmod_info, /* userData */ NULL);
-#if !__i386__ && !__ppc__
+#if !CONFIG_STATIC_CPPINIT
         if (result == KERN_SUCCESS) {
             result = OSRuntimeFinalizeCPP(kmod_info, NULL);
         }
@@ -5030,7 +5627,6 @@ OSKext::unload(void)
         goto finish;
     }
 
-
     if (hasOSMetaClassInstances()) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
@@ -5059,6 +5655,10 @@ OSKext::unload(void)
     * of unloading.
     */
     flags.unloading = 1;
+
+   /* Update the string describing the last kext to unload in case we panic.
+    */
+    savePanicString(/* isLoading */ false);
     
     if (isStarted()) {
         result = stop();
@@ -5113,7 +5713,7 @@ OSKext::unload(void)
         }
 
         OSKext * lastKext = OSDynamicCast(OSKext, sLoadedKexts->getLastObject());
-        if (lastKext && lastKext != sKernelKext) {
+        if (lastKext && !lastKext->isKernel()) {
             kmod = lastKext->kmod_info;
         } else {
             kmod = NULL;  // clear the global kmod variable
@@ -5134,35 +5734,56 @@ OSKext::unload(void)
             num_kmod_refs * sizeof(kmod_reference_t));
     }
 
-   /* If we have a linked executable, release & clear it, and then
-    * unwire & deallocate the buffer the OSData wrapped.
-    */
+#if CONFIG_DTRACE
+    unregisterWithDTrace();
+#endif /* CONFIG_DTRACE */
+
+    notifyKextUnloadObservers(this);
+
+    /* Unwire and free the linked executable.
+     */
     if (linkedExecutable) {
-        vm_map_t kext_map;
+        if (!isInterface()) {
+            kernel_segment_command_t *seg = NULL;
+            vm_map_t kext_map = kext_get_vm_map(kmod_info);
 
-       /* linkedExecutable is just a wrapper for the executable and doesn't
-        * free it.
-        */
-        linkedExecutable->release();
-        linkedExecutable = NULL;
+            if (!kext_map) {
+                OSKextLog(this,
+                    kOSKextLogErrorLevel |
+                    kOSKextLogLoadFlag,
+                    "Failed to free kext %s; couldn't find the kext map.",
+                    getIdentifierCString());
+                result = kOSKextReturnInternalError;
+                goto finish;
+            }
 
-        OSKextLog(this,
-            kOSKextLogProgressLevel |
-            kOSKextLogLoadFlag,
-            "Kext %s unwiring and unmapping linked executable.",
-            getIdentifierCString());
+            OSKextLog(this,
+                kOSKextLogProgressLevel |
+                kOSKextLogLoadFlag,
+                "Kext %s unwiring and unmapping linked executable.",
+                getIdentifierCString());
+
+            seg = firstsegfromheader((kernel_mach_header_t *)kmod_info->address);
+            while (seg) {
+                if (segmentShouldBeWired(seg)) {
+                    result = vm_map_unwire(kext_map, seg->vmaddr, 
+                        seg->vmaddr + seg->vmsize, FALSE);
+                    if (result != KERN_SUCCESS) {
+                        OSKextLog(this,
+                            kOSKextLogErrorLevel |
+                            kOSKextLogLoadFlag,
+                            "Failed to unwire kext %s.",
+                            getIdentifierCString());
+                        result = kOSKextReturnInternalError;
+                        goto finish;
+                    }
+                }
 
-        kext_map = kext_get_vm_map(kmod_info);
-        if (kext_map) {
-            // xxx - do we have to do this before freeing? Why can't we just free it?
-            // xxx - we should be able to set a dealloc func on the linkedExecutable
-            result = vm_map_unwire(kext_map,
-                kmod_info->address + kmod_info->hdr_size, 
-                kmod_info->address + kmod_info->size, FALSE);
-            if (result == KERN_SUCCESS) {
-                kext_free(kmod_info->address, kmod_info->size);
+                seg = nextsegfromheader((kernel_mach_header_t *) kmod_info->address, seg);
             }
         }
+
+        OSSafeReleaseNULL(linkedExecutable);
     }
 
    /* An interface kext has a fake kmod_info that was allocated,
@@ -5177,17 +5798,80 @@ OSKext::unload(void)
     flags.loaded = false;
     flushDependencies();
 
+    /* save a copy of the bundle ID for us to check when deciding to 
+     * rebuild the kernel cache file.  If a kext was already in the kernel 
+     * cache and unloaded then later loaded we do not need to rebuild the 
+     * kernel cache.  9055303
+     */
+    if (isPrelinked()) {
+        sUnloadedPrelinkedKexts->setObject(bundleID);
+    }
+
     OSKextLog(this,
         kOSKextLogProgressLevel | kOSKextLogLoadFlag,
         "Kext %s unloaded.", getIdentifierCString());
 
+    queueKextNotification(kKextRequestPredicateUnloadNotification,
+        OSDynamicCast(OSString, bundleID));
+
 finish:
     OSKext::saveLoadedKextPanicList();
+    OSKext::updateLoadedKextSummaries();
 
     flags.unloading = 0;
     return result;
 }
 
+/*********************************************************************
+* Assumes sKextLock is held.
+*********************************************************************/
+/* static */
+OSReturn
+OSKext::queueKextNotification(
+    const char * notificationName,
+    OSString   * kextIdentifier)
+{
+    OSReturn          result               = kOSReturnError;
+    OSDictionary    * loadRequest          = NULL;  // must release
+
+    if (!kextIdentifier) {
+        result = kOSKextReturnInvalidArgument;
+        goto finish;
+    }
+
+   /* Create a new request unless one is already sitting
+    * in sKernelRequests for this bundle identifier
+    */
+    result = _OSKextCreateRequest(notificationName, &loadRequest);
+    if (result != kOSReturnSuccess) {
+        goto finish;
+    }
+    if (!_OSKextSetRequestArgument(loadRequest,
+        kKextRequestArgumentBundleIdentifierKey, kextIdentifier)) {
+        
+        result = kOSKextReturnNoMemory;
+        goto finish;
+    }
+    if (!sKernelRequests->setObject(loadRequest)) {
+        result = kOSKextReturnNoMemory;
+        goto finish;
+    }
+
+   /* We might want to only queue the notification if kextd is active,
+    * but that wouldn't work for embedded. Note that we don't care if
+    * the ping immediately succeeds here so don't do anything with the
+    * result of this call.
+    */
+    OSKext::pingKextd();
+
+    result = kOSReturnSuccess;
+
+finish:
+    OSSafeRelease(loadRequest);
+
+    return result;
+}
+
 /*********************************************************************
 *********************************************************************/
 static void
@@ -5195,8 +5879,7 @@ _OSKextConsiderDestroyingLinkContext(
     __unused thread_call_param_t p0,
     __unused thread_call_param_t p1)
 {
-   /* Once both recursive locks are taken in correct order, we shouldn't
-    * have to worry about further recursive lock takes.
+   /* Take multiple locks in the correct order.
     */
     IORecursiveLockLock(sKextLock);
     IORecursiveLockLock(sKextInnerLock);
@@ -5235,9 +5918,8 @@ _OSKextConsiderDestroyingLinkContext(
 * to avoid deadlocks with IOService, with which OSKext has a reciprocal
 * call relationship.
 *
-* Do not call any function that takes sKextLock here! This function
-* can be invoked with sKextInnerLock, and the two must always
-* be taken in the order: sKextLock -> sKextInnerLock.
+* This function must be invoked with sKextInnerLock held.
+* Do not call any function that takes sKextLock here!
 *********************************************************************/
 /* static */
 void
@@ -5264,56 +5946,11 @@ OSKext::considerDestroyingLinkContext(void)
         goto finish;
     }
 
-    thread_call_enter(sDestroyLinkContextThread);
-
-finish:
-    IORecursiveLockUnlock(sKextInnerLock);
-    return;
-}
-
-/*********************************************************************
-*********************************************************************/
-OSData *
-OSKext::getKernelLinkState()
-{
-    kern_return_t   kxldResult;
-    u_char        * kernel          = NULL;
-    size_t          kernelLength;
-    u_char        * linkStateBytes  = NULL;
-    u_long          linkStateLength;
-    OSData        * linkState       = NULL;
-
-    if (sKernelKext && sKernelKext->linkState) {
-        goto finish;
-    }
-
-    kernel = (u_char *)&_mh_execute_header;
-    kernelLength = getlastaddr() - (vm_offset_t)kernel;
-
-    kxldResult = kxld_link_file(sKxldContext,
-        kernel,
-        kernelLength,
-        kOSKextKernelIdentifier,
-        /* callbackData */ NULL,
-        /* dependencies */ NULL,
-        /* numDependencies */ 0,
-        /* linkedObjectOut */ NULL,
-        /* kmod_info_kern out */ NULL,
-        &linkStateBytes,
-        &linkStateLength,
-        /* symbolFile */ NULL,
-        /* symbolFileSize */ NULL);
-    if (kxldResult) {
-        panic("Can't generate kernel link state; no kexts can be loaded.");
-        goto finish;
-    }
-
-    linkState = OSData::withBytesNoCopy(linkStateBytes, linkStateLength);
-    linkState->setDeallocFunction(&osdata_kmem_free);
-    sKernelKext->linkState = linkState;
+    thread_call_enter(sDestroyLinkContextThread);
 
 finish:
-    return sKernelKext->linkState;
+    IORecursiveLockUnlock(sKextInnerLock);
+    return;
 }
 
 #if PRAGMA_MARK
@@ -5323,6 +5960,7 @@ finish:
 * This is a static method because the kext will be deallocated if it
 * does unload!
 *********************************************************************/
+/* static */
 OSReturn
 OSKext::autounloadKext(OSKext * aKext)
 {
@@ -5373,14 +6011,27 @@ _OSKextConsiderUnloads(
     bool         didUnload = false;
     unsigned int count, i;
 
-   /* Once both recursive locks are taken in correct order, we shouldn't
-    * have to worry about further recursive lock takes.
+   /* Take multiple locks in the correct order
+    * (note also sKextSummaries lock further down).
     */
     IORecursiveLockLock(sKextLock);
     IORecursiveLockLock(sKextInnerLock);
 
     OSKext::flushNonloadedKexts(/* flushPrelinkedKexts */ true);
 
+    IOLockLock(sKextSummariesLock);
+
+   /* If there is an old kext summary, free that now.
+    */
+    if (sPrevLoadedKextSummaries) {
+        kmem_free(kernel_map, (vm_offset_t)sPrevLoadedKextSummaries,
+            sPrevLoadedKextSummariesAllocSize);
+        sPrevLoadedKextSummaries = NULL;
+        sPrevLoadedKextSummariesAllocSize = 0;
+    }
+
+    IOLockUnlock(sKextSummariesLock);
+
    /* If the system is powering down, don't try to unload anything.
     */
     if (sSystemSleep) {
@@ -5405,7 +6056,7 @@ _OSKextConsiderUnloads(
             OSBoolean * stale = OSDynamicCast(OSBoolean,
                 callbackRecord->getObject(kKextRequestStaleKey));
             
-            if (stale && stale->isTrue()) {
+            if (stale == kOSBooleanTrue) {
                 OSKext::invokeRequestCallback(callbackRecord,
                     kOSKextReturnTimeout);
             } else {
@@ -5442,8 +6093,8 @@ finish:
     sConsiderUnloadsPending = false;
     sConsiderUnloadsExecuted = true;
 
-    (void) OSKext::considerRebuildOfPrelinkedKernel();
-
+    (void) OSKext::considerRebuildOfPrelinkedKernel(NULL);
+    
     IORecursiveLockUnlock(sKextInnerLock);
     IORecursiveLockUnlock(sKextLock);
 
@@ -5463,6 +6114,9 @@ void OSKext::considerUnloads(Boolean rescheduleOnlyFlag)
         sUnloadCallout = thread_call_allocate(&_OSKextConsiderUnloads, 0);
     }
 
+    /* we only reset delay value for unloading if we already have something
+     * pending.  rescheduleOnlyFlag should not start the count down.
+     */
     if (rescheduleOnlyFlag && !sConsiderUnloadsPending) {
         goto finish;
     }
@@ -5540,13 +6194,40 @@ IOReturn OSKextSystemSleepOrWake(UInt32 messageType)
 *********************************************************************/
 /* static */
 void
-OSKext::considerRebuildOfPrelinkedKernel(void)
+OSKext::considerRebuildOfPrelinkedKernel(OSString * moduleName)
 {
     OSReturn       checkResult      = kOSReturnError;
     static bool    requestedPrelink = false;
     OSDictionary * prelinkRequest   = NULL;  // must release
 
     IORecursiveLockLock(sKextLock);
+    
+    /* moduleName is only passed when we see a load come in.  We are only 
+     * interested in rebuilding the kernel cache if the kext we are loading
+     * is not already in the original kernel cache.  9055303
+     */
+    if (moduleName) {
+        int         count = sUnloadedPrelinkedKexts->getCount();
+        int         i;
+
+        for (i = 0; i < count; i++) {
+            const OSSymbol *    myBundleID;     // do not release
+
+            myBundleID = OSDynamicCast(OSSymbol, sUnloadedPrelinkedKexts->getObject(i));
+            if (!myBundleID) continue;
+            if (moduleName->isEqualTo(myBundleID->getCStringNoCopy())) {
+                OSKextLog(/* kext */ NULL,
+                          kOSKextLogDetailLevel |
+                          kOSKextLogArchiveFlag,
+                          "bundleID %s already in cache skipping rebuild.",
+                          myBundleID->getCStringNoCopy());
+
+                /* no need to rebuild, already in kernel cache  */
+                goto finish;
+            }
+        }
+        (void) OSKext::setDeferredLoadSucceeded();
+    }
 
     if (!sDeferredLoadSucceeded || !sConsiderUnloadsExecuted || 
         sSafeBoot || requestedPrelink) 
@@ -5569,7 +6250,7 @@ OSKext::considerRebuildOfPrelinkedKernel(void)
         goto finish;
     }
 
-    OSKextPingKextd();
+    OSKext::pingKextd();
     requestedPrelink = true;
 
 finish:
@@ -5745,7 +6426,24 @@ OSKext::resolveDependencies(
                 libraryVersion->getCStringNoCopy());
             goto finish;
         }
-        
+
+       /* If a nonprelinked library somehow got into the mix for a
+        * prelinked kext, at any point in the chain, we must fail
+        * because the prelinked relocs for the library will be all wrong.
+        */
+        if (this->isPrelinked() &&
+            libraryKext->declaresExecutable() &&
+            !libraryKext->isPrelinked()) {
+
+            OSKextLog(this,
+                kOSKextLogErrorLevel |
+                kOSKextLogDependenciesFlag,
+                "Kext %s (prelinked) - library kext %s (v%s) not prelinked.",
+                getIdentifierCString(), library_id,
+                libraryVersion->getCStringNoCopy());
+            goto finish;
+        }
+
         if (!libraryKext->resolveDependencies(loopStack)) {
             goto finish;
         }
@@ -5815,8 +6513,16 @@ OSKext::resolveDependencies(
         }
     }
     
+    if (hasRawKernelDependency) {
+        OSKextLog(this,
+            kOSKextLogErrorLevel |
+            kOSKextLogValidationFlag | kOSKextLogDependenciesFlag,
+            "Error - kext %s declares a dependency on %s, which is not permitted.",
+            getIdentifierCString(), KERNEL_LIB);
+        goto finish;
+    }
 #if __LP64__
-    if (hasRawKernelDependency || hasKernelDependency) {
+    if (hasKernelDependency) {
         OSKextLog(this,
             kOSKextLogErrorLevel |
             kOSKextLogValidationFlag | kOSKextLogDependenciesFlag,
@@ -5838,17 +6544,7 @@ OSKext::resolveDependencies(
     // xxx - is it invalid to do both "com.apple.kernel" and any
     // xxx - "com.apple.kernel.*"?
 
-    if (hasRawKernelDependency && hasKernelDependency) {
-        OSKextLog(this,
-            kOSKextLogErrorLevel |
-            kOSKextLogValidationFlag | kOSKextLogDependenciesFlag,
-            "Error - kext %s declares dependencies on both "
-            "%s and %s.",
-            getIdentifierCString(), KERNEL_LIB, KERNEL6_LIB);
-        goto finish;
-    }
-    
-    if ((hasRawKernelDependency || hasKernelDependency) && hasKPIDependency) {
+    if (hasKernelDependency && hasKPIDependency) {
         OSKextLog(this,
             kOSKextLogWarningLevel |
             kOSKextLogDependenciesFlag,
@@ -5857,7 +6553,7 @@ OSKext::resolveDependencies(
             getIdentifierCString(), KERNEL_LIB, KPI_LIB_PREFIX);
     }
 
-    if (!hasRawKernelDependency && !hasKernelDependency && !hasKPIDependency) {
+    if (!hasKernelDependency && !hasKPIDependency) {
         // xxx - do we want to use validation flag for these too?
         OSKextLog(this,
             kOSKextLogWarningLevel |
@@ -5881,9 +6577,11 @@ OSKext::resolveDependencies(
     * its indirect dependencies to simulate old-style linking.  XXX - Should
     * check for duplicates.
     */
-    if (!hasRawKernelDependency && !hasKPIDependency) {
+    if (!hasKPIDependency) {
         unsigned int i;
 
+        flags.hasBleedthrough = true;
+
         count = getNumDependencies();
         
        /* We add to the dependencies array in this loop, but do not iterate
@@ -6107,6 +6805,8 @@ OSKext::addClass(
         }
     }
 
+    notifyAddClassObservers(this, aClass, flags);
+
     result = kOSReturnSuccess;
 
 finish:
@@ -6154,6 +6854,8 @@ OSKext::removeClass(
 
     metaClasses->removeObject(aClass);
     
+    notifyRemoveClassObservers(this, aClass, flags);
+
     result = kOSReturnSuccess;
 
 finish:
@@ -6285,12 +6987,12 @@ OSKext::handleRequest(
     char         * response           = NULL;  // returned by reference
     uint32_t       responseLength     = 0;
 
-    OSObject     * parsedXML      = NULL;  // must release
+    OSObject     * parsedXML          = NULL;  // must release
     OSDictionary * requestDict        = NULL;  // do not release
     OSString     * errorString        = NULL;  // must release
 
     OSData       * responseData       = NULL;  // must release
-    OSObject     * responseObject = NULL;  // must release
+    OSObject     * responseObject     = NULL;  // must release
     
     OSSerialize  * serializer         = NULL;  // must release
 
@@ -6365,7 +7067,7 @@ OSKext::handleRequest(
     result = kOSKextReturnNotPrivileged;
     if (hostPriv == HOST_PRIV_NULL) {
         if (!predicate->isEqualTo(kKextRequestPredicateGetLoaded) &&
-            !predicate->isEqualTo(kKextRequestPredicateGetKernelLinkState) &&
+            !predicate->isEqualTo(kKextRequestPredicateGetKernelImage) &&
             !predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) {
 
             goto finish;
@@ -6444,7 +7146,10 @@ OSKext::handleRequest(
         result = OSKext::dispatchResource(requestDict);
 
     } else if (predicate->isEqualTo(kKextRequestPredicateGetLoaded)) {
-        OSBoolean * delayAutounloadBool = NULL;
+        OSBoolean    * delayAutounloadBool = NULL;
+        OSObject     * infoKeysRaw         = NULL;
+        OSArray      * infoKeys            = NULL;
+        uint32_t       infoKeysCount       = 0;
         
         delayAutounloadBool = OSDynamicCast(OSBoolean,
             _OSKextGetRequestArgument(requestDict,
@@ -6457,8 +7162,31 @@ OSKext::handleRequest(
             OSKext::considerUnloads(/* rescheduleOnly? */ true);
         }
 
-        responseObject = OSDynamicCast(OSObject,
-            OSKext::copyLoadedKextInfo(kextIdentifiers));
+        infoKeysRaw = _OSKextGetRequestArgument(requestDict,
+                kKextRequestArgumentInfoKeysKey);
+        infoKeys = OSDynamicCast(OSArray, infoKeysRaw);
+        if (infoKeysRaw && !infoKeys) {
+            OSKextLog(/* kext */ NULL,
+                kOSKextLogErrorLevel |
+                kOSKextLogIPCFlag,
+                "Invalid arguments to kext info request.");
+            goto finish;
+        }
+        
+        if (infoKeys) {
+            infoKeysCount = infoKeys->getCount();
+            for (uint32_t i = 0; i < infoKeysCount; i++) {
+                if (!OSDynamicCast(OSString, infoKeys->getObject(i))) {
+                    OSKextLog(/* kext */ NULL,
+                        kOSKextLogErrorLevel |
+                        kOSKextLogIPCFlag,
+                        "Invalid arguments to kext info request.");
+                    goto finish;
+                }
+            }
+        }
+
+        responseObject = OSKext::copyLoadedKextInfo(kextIdentifiers, infoKeys);
         if (!responseObject) {
             result = kOSKextReturnInternalError;
         } else {
@@ -6489,16 +7217,15 @@ OSKext::handleRequest(
             (unsigned long long)textseg->vmaddr);
         addressNum = OSNumber::withNumber((long long unsigned int)textseg->vmaddr,
             8 * sizeof(long long unsigned int));
-        responseObject = OSDynamicCast(OSObject, addressNum);
+        responseObject = addressNum;
         result = kOSReturnSuccess;
 
-    } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelLinkState)) {
+    } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelImage)) {
         OSKextLog(/* kext */ NULL,
             kOSKextLogDebugLevel |
             kOSKextLogIPCFlag,
-            "Returning kernel link state.");
-        responseData = sKernelKext->linkState;
-        responseData->retain();
+            "Returning kernel image.");
+        responseData = OSKext::copySanitizedKernelImage();
         result = kOSReturnSuccess;
 
     } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelRequests)) {
@@ -6506,7 +7233,7 @@ OSKext::handleRequest(
        /* Hand the current sKernelRequests array to the caller
         * (who must release it), and make a new one.
         */
-        responseObject = OSDynamicCast(OSObject, sKernelRequests);
+        responseObject = sKernelRequests;
         sKernelRequests = OSArray::withCapacity(0);
         sPostedKextLoadIdentifiers->flushCollection();
         OSKextLog(/* kext */ NULL,
@@ -6518,7 +7245,7 @@ OSKext::handleRequest(
     } else if (predicate->isEqualTo(kKextRequestPredicateGetAllLoadRequests)) {
         
         /* Return the set of all requested bundle identifiers */
-        responseObject = OSDynamicCast(OSObject, sAllKextLoadIdentifiers);
+        responseObject = sAllKextLoadIdentifiers;
         responseObject->retain();
         OSKextLog(/* kext */ NULL,
             kOSKextLogDebugLevel |
@@ -6613,7 +7340,7 @@ finish:
 
     IORecursiveLockUnlock(sKextLock);
 
-    OSSafeRelease(requestDict);
+    OSSafeRelease(parsedXML);
     OSSafeRelease(errorString);
     OSSafeRelease(responseData);
     OSSafeRelease(responseObject);
@@ -6626,10 +7353,12 @@ finish:
 /*********************************************************************
 *********************************************************************/
 /* static */
-OSArray *
-OSKext::copyLoadedKextInfo(OSArray * kextIdentifiers)
+OSDictionary *
+OSKext::copyLoadedKextInfo(
+    OSArray * kextIdentifiers,
+    OSArray * infoKeys)
 {
-    OSArray      * result = NULL;
+    OSDictionary * result = NULL;
     OSDictionary * kextInfo = NULL;  // must release
     uint32_t       count, i;
     uint32_t       idCount = 0;
@@ -6645,8 +7374,14 @@ OSKext::copyLoadedKextInfo(OSArray * kextIdentifiers)
         idCount = kextIdentifiers->getCount();
     }
 
+   /* Same for keys.
+    */
+    if (infoKeys && !infoKeys->getCount()) {
+        infoKeys = NULL;
+    }
+
     count = sLoadedKexts->getCount();
-    result = OSArray::withCapacity(count);
+    result = OSDictionary::withCapacity(count);
     if (!result) {
         goto finish;
     }
@@ -6685,8 +7420,10 @@ OSKext::copyLoadedKextInfo(OSArray * kextIdentifiers)
             continue;
         }
 
-        kextInfo = thisKext->copyInfo();
-        result->setObject(kextInfo);
+        kextInfo = thisKext->copyInfo(infoKeys);
+        if (kextInfo) {
+            result->setObject(thisKext->getIdentifier(), kextInfo);
+        }
     }
     
 finish:
@@ -6698,38 +7435,33 @@ finish:
 }
 
 /*********************************************************************
-Load Tag
-Bundle ID
-Bundle Version
-Path
-Load Address
-Load Size
-Wired Size
-Version
-Dependency Load Tags
-# Dependent References
-UUID
-RetainCount
+* Any info that needs to do allocations must goto finish on alloc
+* failure. Info that is just a lookup should just not set the object
+* if the info does not exist.
 *********************************************************************/
 #define _OSKextLoadInfoDictCapacity   (12)
 
 OSDictionary *
-OSKext::copyInfo(void)
-{
-    OSDictionary         * result             = NULL;
-    bool                   success            = false;
-    OSNumber             * cpuTypeNumber      = NULL;  // must release
-    OSNumber             * cpuSubtypeNumber   = NULL;  // must release
-    OSString             * versionString      = NULL;  // do not release
-    OSData               * uuid               = NULL;  // must release
-    OSNumber             * scratchNumber      = NULL;  // must release
-    OSArray              * dependencyLoadTags = NULL;  // must release
-    OSCollectionIterator * metaClassIterator  = NULL;  // must release
-    OSArray              * metaClassInfo      = NULL;  // must release
-    OSDictionary         * metaClassDict      = NULL;  // must release
-    OSMetaClass          * thisMetaClass      = NULL;  // do not release
-    OSString             * metaClassName      = NULL;  // must release
-    OSString             * superclassName     = NULL;  // must release
+OSKext::copyInfo(OSArray * infoKeys)
+{
+    OSDictionary         * result                      = NULL;
+    bool                   success                     = false;
+    OSData               * headerData                  = NULL;  // must release
+    OSNumber             * cpuTypeNumber               = NULL;  // must release
+    OSNumber             * cpuSubtypeNumber            = NULL;  // must release
+    OSString             * versionString               = NULL;  // do not release
+    uint32_t               executablePathCStringSize = 0;
+    char                 * executablePathCString       = NULL;  // must release
+    OSString             * executablePathString        = NULL;  // must release
+    OSData               * uuid                        = NULL;  // must release
+    OSNumber             * scratchNumber               = NULL;  // must release
+    OSArray              * dependencyLoadTags          = NULL;  // must release
+    OSCollectionIterator * metaClassIterator           = NULL;  // must release
+    OSArray              * metaClassInfo               = NULL;  // must release
+    OSDictionary         * metaClassDict               = NULL;  // must release
+    OSMetaClass          * thisMetaClass               = NULL;  // do not release
+    OSString             * metaClassName               = NULL;  // must release
+    OSString             * superclassName              = NULL;  // must release
     uint32_t               count, i;
 
     result = OSDictionary::withCapacity(_OSKextLoadInfoDictCapacity);
@@ -6737,232 +7469,340 @@ OSKext::copyInfo(void)
         goto finish;
     }
 
-   /* CPU Type & Subtype.
-    * Use the CPU type of the kernel for all (loaded) kexts.
-    * xxx - should we not include this for the kernel components,
-    * xxx - or for any interface? they have mach-o files, they're just weird.
+    
+   /* Empty keys means no keys, but NULL is quicker to check.
+    */
+    if (infoKeys && !infoKeys->getCount()) {
+        infoKeys = NULL;
+    }
+
+   /* Headers, CPU type, and CPU subtype.
     */
-    if (linkedExecutable || (this == sKernelKext)) {
+    if (!infoKeys ||
+        _OSArrayContainsCString(infoKeys, kOSBundleMachOHeadersKey) ||
+        _OSArrayContainsCString(infoKeys, kOSBundleCPUTypeKey) ||
+        _OSArrayContainsCString(infoKeys, kOSBundleCPUSubtypeKey))
+    {
+
+        if (linkedExecutable && !isInterface()) {
+
+            kernel_mach_header_t *kext_mach_hdr = (kernel_mach_header_t *)
+                linkedExecutable->getBytesNoCopy();
+
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleMachOHeadersKey)) {
+                headerData = OSData::withBytes(kext_mach_hdr,
+                    (u_int) (sizeof(*kext_mach_hdr) + kext_mach_hdr->sizeofcmds));
+                if (!headerData) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleMachOHeadersKey, headerData);
+            }
+
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleCPUTypeKey)) {
+                cpuTypeNumber = OSNumber::withNumber(
+                    (uint64_t) kext_mach_hdr->cputype,
+                    8 * sizeof(kext_mach_hdr->cputype));
+                if (!cpuTypeNumber) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleCPUTypeKey, cpuTypeNumber);
+            }
 
-        cpuTypeNumber = OSNumber::withNumber(
-            (long long unsigned int)_mh_execute_header.cputype,
-            8 * sizeof(_mh_execute_header.cputype));
-        if (cpuTypeNumber) {
-            result->setObject(kOSBundleCPUTypeKey, cpuTypeNumber);
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleCPUSubtypeKey)) {
+                cpuSubtypeNumber = OSNumber::withNumber(
+                    (uint64_t) kext_mach_hdr->cpusubtype,
+                    8 * sizeof(kext_mach_hdr->cpusubtype));
+                if (!cpuSubtypeNumber) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleCPUSubtypeKey, cpuSubtypeNumber);
+            }
         }
     }
     
-    // I don't want to rely on a mach header for nonkernel kexts, yet
-    if (this == sKernelKext) {
-        cpuSubtypeNumber = OSNumber::withNumber(
-            (long long unsigned int)_mh_execute_header.cputype,
-            8 * sizeof(_mh_execute_header.cputype));
-        if (cpuSubtypeNumber) {
-            result->setObject(kOSBundleCPUSubtypeKey, cpuSubtypeNumber);
-        }
-    }
-
-   /* CFBundleIdentifier.
+   /* CFBundleIdentifier. We set this regardless because it's just stupid not to.
     */
     result->setObject(kCFBundleIdentifierKey, bundleID);
 
    /* CFBundleVersion.
     */
-    versionString = OSDynamicCast(OSString,
-        getPropertyForHostArch(kCFBundleVersionKey));
-    if (versionString) {
-        result->setObject(kCFBundleVersionKey, versionString);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kCFBundleVersionKey)) {
+        versionString = OSDynamicCast(OSString,
+            getPropertyForHostArch(kCFBundleVersionKey));
+        if (versionString) {
+            result->setObject(kCFBundleVersionKey, versionString);
+        }
     }
 
    /* OSBundleCompatibleVersion.
     */
-    versionString = OSDynamicCast(OSString,
-        getPropertyForHostArch(kOSBundleCompatibleVersionKey));
-    if (versionString) {
-        result->setObject(kOSBundleCompatibleVersionKey, versionString);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleCompatibleVersionKey)) {
+        versionString = OSDynamicCast(OSString,
+            getPropertyForHostArch(kOSBundleCompatibleVersionKey));
+        if (versionString) {
+            result->setObject(kOSBundleCompatibleVersionKey, versionString);
+        }
     }
 
    /* Path.
     */
-    if (path) {
-        result->setObject(kOSBundlePathKey, path);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundlePathKey)) {
+        if (path) {
+            result->setObject(kOSBundlePathKey, path);
+        }
     }
 
-   /* UUID.
+
+   /* OSBundleExecutablePath.
     */
-    uuid = copyUUID();
-    if (uuid) {
-        result->setObject(kOSBundleUUIDKey, uuid);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleExecutablePathKey)) {
+         if (path && executableRelPath) {
+
+            uint32_t pathLength = path->getLength();  // gets incremented below
+
+            // +1 for slash, +1 for \0
+            executablePathCStringSize = pathLength + executableRelPath->getLength() + 2;
+
+            executablePathCString = (char *)kalloc((executablePathCStringSize) *
+                sizeof(char)); // +1 for \0
+            if (!executablePathCString) {
+                goto finish;
+            }
+            strlcpy(executablePathCString, path->getCStringNoCopy(),
+                executablePathCStringSize);
+            executablePathCString[pathLength++] = '/';
+            executablePathCString[pathLength++] = '\0';
+            strlcat(executablePathCString, executableRelPath->getCStringNoCopy(),
+                executablePathCStringSize);
+
+            executablePathString = OSString::withCString(executablePathCString);
+
+            if (!executablePathCString) {
+                goto finish;
+            }
+
+            result->setObject(kOSBundleExecutablePathKey, executablePathString);
+        }
+    }
+
+   /* UUID, if the kext has one.
+    */
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleUUIDKey)) {
+        uuid = copyUUID();
+        if (uuid) {
+            result->setObject(kOSBundleUUIDKey, uuid);
+        }
     }
     
    /*****
     * OSKernelResource, OSBundleIsInterface, OSBundlePrelinked, OSBundleStarted.
     */
-    result->setObject(kOSKernelResourceKey,
-        isKernelComponent() ? kOSBooleanTrue : kOSBooleanFalse);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleUUIDKey)) {
+        result->setObject(kOSKernelResourceKey,
+            isKernelComponent() ? kOSBooleanTrue : kOSBooleanFalse);
+    }
     
-    result->setObject(kOSBundleIsInterfaceKey,
-        isInterface() ? kOSBooleanTrue : kOSBooleanFalse);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleIsInterfaceKey)) {
+        result->setObject(kOSBundleIsInterfaceKey,
+            isInterface() ? kOSBooleanTrue : kOSBooleanFalse);
+    }
     
-    result->setObject(kOSBundlePrelinkedKey,
-        isPrelinked() ? kOSBooleanTrue : kOSBooleanFalse);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundlePrelinkedKey)) {
+        result->setObject(kOSBundlePrelinkedKey,
+            isPrelinked() ? kOSBooleanTrue : kOSBooleanFalse);
+    }
     
-    result->setObject(kOSBundleStartedKey,
-        isStarted() ? kOSBooleanTrue : kOSBooleanFalse);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleStartedKey)) {
+        result->setObject(kOSBundleStartedKey,
+            isStarted() ? kOSBooleanTrue : kOSBooleanFalse);
+    }
 
    /* LoadTag (Index).
     */
-    scratchNumber = OSNumber::withNumber((unsigned long long)loadTag,
-        /* numBits */ 8 * sizeof(loadTag));
-    if (scratchNumber) {
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleLoadTagKey)) {
+        scratchNumber = OSNumber::withNumber((unsigned long long)loadTag,
+            /* numBits */ 8 * sizeof(loadTag));
+        if (!scratchNumber) {
+            goto finish;
+        }
         result->setObject(kOSBundleLoadTagKey, scratchNumber);
         OSSafeReleaseNULL(scratchNumber);
     }
     
    /* LoadAddress, LoadSize.
     */
-    if (isInterface() || linkedExecutable) {
-       /* These go to userspace via serialization, so we don't want any doubts
-        * about their size.
-        */
-        uint64_t    loadAddress = 0;
-        uint32_t    loadSize    = 0;
-        uint32_t    wiredSize   = 0;
-
-       /* Interfaces always report 0 load address & size.
-        * Just the way they roll.
-        *
-        * xxx - leaving in # when we have a linkedExecutable...a kernelcomp
-        * xxx - shouldn't have one!
-        */
-        if (linkedExecutable /* && !isInterface() */) {
-            loadAddress = (uint64_t)linkedExecutable->getBytesNoCopy();
-            loadSize = linkedExecutable->getLength();
-            
-           /* If we have a kmod_info struct, calculated the wired size
-            * from that. Otherwise it's the full load size.
+    if (!infoKeys ||
+        _OSArrayContainsCString(infoKeys, kOSBundleLoadAddressKey) ||
+        _OSArrayContainsCString(infoKeys, kOSBundleLoadSizeKey) ||
+        _OSArrayContainsCString(infoKeys, kOSBundleWiredSizeKey))
+    {
+        if (isInterface() || linkedExecutable) {
+           /* These go to userspace via serialization, so we don't want any doubts
+            * about their size.
             */
-            if (kmod_info) {
-                wiredSize = loadSize - kmod_info->hdr_size;
-            } else {
-                wiredSize = loadSize;
+            uint64_t    loadAddress = 0;
+            uint32_t    loadSize    = 0;
+            uint32_t    wiredSize   = 0;
+
+           /* Interfaces always report 0 load address & size.
+            * Just the way they roll.
+            *
+            * xxx - leaving in # when we have a linkedExecutable...a kernelcomp
+            * xxx - shouldn't have one!
+            */
+            if (linkedExecutable /* && !isInterface() */) {
+                loadAddress = (uint64_t)linkedExecutable->getBytesNoCopy();
+                loadSize = linkedExecutable->getLength();
+                
+               /* If we have a kmod_info struct, calculated the wired size
+                * from that. Otherwise it's the full load size.
+                */
+                if (kmod_info) {
+                    wiredSize = loadSize - kmod_info->hdr_size;
+                } else {
+                    wiredSize = loadSize;
+                }
             }
-        }
 
-        scratchNumber = OSNumber::withNumber(
-            (unsigned long long)(loadAddress),
-            /* numBits */ 8 * sizeof(loadAddress));
-        if (scratchNumber) {
-            result->setObject(kOSBundleLoadAddressKey, scratchNumber);
-            OSSafeReleaseNULL(scratchNumber);
-        }
-        scratchNumber = OSNumber::withNumber(
-            (unsigned long long)(loadSize),
-            /* numBits */ 8 * sizeof(loadSize));
-        if (scratchNumber) {
-            result->setObject(kOSBundleLoadSizeKey, scratchNumber);
-            OSSafeReleaseNULL(scratchNumber);
-        }
-        scratchNumber = OSNumber::withNumber(
-            (unsigned long long)(wiredSize),
-            /* numBits */ 8 * sizeof(wiredSize));
-        if (scratchNumber) {
-            result->setObject(kOSBundleWiredSizeKey, scratchNumber);
-            OSSafeReleaseNULL(scratchNumber);
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleLoadAddressKey)) {
+                scratchNumber = OSNumber::withNumber(
+                    (unsigned long long)(loadAddress),
+                    /* numBits */ 8 * sizeof(loadAddress));
+                if (!scratchNumber) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleLoadAddressKey, scratchNumber);
+                OSSafeReleaseNULL(scratchNumber);
+            }
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleLoadSizeKey)) {
+                scratchNumber = OSNumber::withNumber(
+                    (unsigned long long)(loadSize),
+                    /* numBits */ 8 * sizeof(loadSize));
+                if (!scratchNumber) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleLoadSizeKey, scratchNumber);
+                OSSafeReleaseNULL(scratchNumber);
+            }
+            if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleWiredSizeKey)) {
+                scratchNumber = OSNumber::withNumber(
+                    (unsigned long long)(wiredSize),
+                    /* numBits */ 8 * sizeof(wiredSize));
+                if (!scratchNumber) {
+                    goto finish;
+                }
+                result->setObject(kOSBundleWiredSizeKey, scratchNumber);
+                OSSafeReleaseNULL(scratchNumber);
+            }
         }
     }
-    
+
    /* OSBundleDependencies. In descending order for
     * easy compatibility with kextstat(8).
     */
-    if ((count = getNumDependencies())) {
-        dependencyLoadTags = OSArray::withCapacity(count);
-        result->setObject(kOSBundleDependenciesKey, dependencyLoadTags);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleDependenciesKey)) {
+        if ((count = getNumDependencies())) {
+            dependencyLoadTags = OSArray::withCapacity(count);
+            result->setObject(kOSBundleDependenciesKey, dependencyLoadTags);
 
-        i = count - 1;
-        do {
-            OSKext * dependency = OSDynamicCast(OSKext,
-                dependencies->getObject(i));
+            i = count - 1;
+            do {
+                OSKext * dependency = OSDynamicCast(OSKext,
+                    dependencies->getObject(i));
 
-            OSSafeReleaseNULL(scratchNumber);
-            
-            if (!dependency) {
-                continue;
-            }
-            scratchNumber = OSNumber::withNumber(
-                (unsigned long long)dependency->getLoadTag(),
-                /* numBits*/ 8 * sizeof(loadTag));
-            if (scratchNumber) {
+                OSSafeReleaseNULL(scratchNumber);
+                
+                if (!dependency) {
+                    continue;
+                }
+                scratchNumber = OSNumber::withNumber(
+                    (unsigned long long)dependency->getLoadTag(),
+                    /* numBits*/ 8 * sizeof(loadTag));
+                if (!scratchNumber) {
+                    goto finish;
+                }
                 dependencyLoadTags->setObject(scratchNumber);
-            }
-        } while (i--);
+            } while (i--);
+        }
     }
 
     OSSafeReleaseNULL(scratchNumber);
 
    /* OSBundleMetaClasses.
     */
-    if (metaClasses && metaClasses->getCount()) {
-        metaClassIterator = OSCollectionIterator::withCollection(metaClasses);
-        metaClassInfo = OSArray::withCapacity(metaClasses->getCount());
-        if (!metaClassIterator || !metaClassInfo) {
-            goto finish;
-        }
-        result->setObject(kOSBundleClassesKey, metaClassInfo);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleClassesKey)) {
+        if (metaClasses && metaClasses->getCount()) {
+            metaClassIterator = OSCollectionIterator::withCollection(metaClasses);
+            metaClassInfo = OSArray::withCapacity(metaClasses->getCount());
+            if (!metaClassIterator || !metaClassInfo) {
+                goto finish;
+            }
+            result->setObject(kOSBundleClassesKey, metaClassInfo);
 
-        while ( (thisMetaClass = OSDynamicCast(OSMetaClass,
-            metaClassIterator->getNextObject())) ) {
+            while ( (thisMetaClass = OSDynamicCast(OSMetaClass,
+                metaClassIterator->getNextObject())) ) {
 
-            OSSafeReleaseNULL(metaClassDict);
-            OSSafeReleaseNULL(metaClassName);
-            OSSafeReleaseNULL(superclassName);
-            OSSafeReleaseNULL(scratchNumber);
+				OSSafeReleaseNULL(metaClassDict);
+				OSSafeReleaseNULL(scratchNumber);
+				OSSafeReleaseNULL(metaClassName);
+				OSSafeReleaseNULL(superclassName);
 
-            metaClassDict = OSDictionary::withCapacity(3);
-            if (!metaClassDict) {
-                goto finish;
-            }
+                metaClassDict = OSDictionary::withCapacity(3);
+                if (!metaClassDict) {
+                    goto finish;
+                }
 
-            metaClassName = OSString::withCString(thisMetaClass->getClassName());
-            if (thisMetaClass->getSuperClass()) {
-                superclassName = OSString::withCString(
-                    thisMetaClass->getSuperClass()->getClassName());
-            }
-            scratchNumber = OSNumber::withNumber(thisMetaClass->getInstanceCount(),
-                8 * sizeof(unsigned int));
-            if (!metaClassDict || !metaClassName || !superclassName ||
-                !scratchNumber) {
+                metaClassName = OSString::withCString(thisMetaClass->getClassName());
+                if (thisMetaClass->getSuperClass()) {
+                    superclassName = OSString::withCString(
+                        thisMetaClass->getSuperClass()->getClassName());
+                }
+                scratchNumber = OSNumber::withNumber(thisMetaClass->getInstanceCount(),
+                    8 * sizeof(unsigned int));
+                    
+               /* Bail if any of the essentials is missing. The root class lacks a superclass,
+                * of course.
+                */
+                if (!metaClassDict || !metaClassName || !scratchNumber) {
+                    goto finish;
+                }
 
-                goto finish;
+                metaClassInfo->setObject(metaClassDict);
+                metaClassDict->setObject(kOSMetaClassNameKey, metaClassName);
+                if (superclassName) {
+                    metaClassDict->setObject(kOSMetaClassSuperclassNameKey, superclassName);
+                }
+                metaClassDict->setObject(kOSMetaClassTrackingCountKey, scratchNumber);
             }
-
-            metaClassInfo->setObject(metaClassDict);
-            metaClassDict->setObject(kOSMetaClassNameKey, metaClassName);
-            metaClassDict->setObject(kOSMetaClassSuperclassNameKey, superclassName);
-            metaClassDict->setObject(kOSMetaClassTrackingCountKey, scratchNumber);
         }
     }
     
    /* OSBundleRetainCount.
     */
-    OSSafeReleaseNULL(scratchNumber);
-    {
-        int extRetainCount = getRetainCount() - 1;
-        if (isLoaded()) {
-            extRetainCount--;
-        }
-        scratchNumber = OSNumber::withNumber(
-            (int)extRetainCount,
-            /* numBits*/ 8 * sizeof(int));
-        if (scratchNumber) {
-            result->setObject(kOSBundleRetainCountKey, scratchNumber);
+    if (!infoKeys || _OSArrayContainsCString(infoKeys, kOSBundleRetainCountKey)) {
+        OSSafeReleaseNULL(scratchNumber);
+        {
+            int kextRetainCount = getRetainCount() - 1;
+            if (isLoaded()) {
+                kextRetainCount--;
+            }
+            scratchNumber = OSNumber::withNumber(
+                (int)kextRetainCount,
+                /* numBits*/ 8 * sizeof(int));
+            if (scratchNumber) {
+                result->setObject(kOSBundleRetainCountKey, scratchNumber);
+            }
         }
     }
 
     success = true;
+
 finish:
+    OSSafeRelease(headerData);
     OSSafeRelease(cpuTypeNumber);
     OSSafeRelease(cpuSubtypeNumber);
+    OSSafeRelease(executablePathString);
+    if (executablePathString) kfree(executablePathCString, executablePathCStringSize);
     OSSafeRelease(uuid);
     OSSafeRelease(scratchNumber);
     OSSafeRelease(dependencyLoadTags);
@@ -6977,6 +7817,216 @@ finish:
     return result;
 }
 
+/********************************************************************/
+static struct symtab_command * getKernelSymtab(void)
+{
+    struct symtab_command * result   = NULL;
+    struct load_command   * load_cmd = NULL;
+	unsigned long i;
+
+	load_cmd = (struct load_command *)
+		((uintptr_t)&_mh_execute_header + sizeof(_mh_execute_header));
+	for(i = 0; i < _mh_execute_header.ncmds; i++){
+		if (load_cmd->cmd == LC_SYMTAB) {
+            result = (struct symtab_command *)load_cmd;
+            goto finish;
+        }
+		load_cmd = (struct load_command *)
+            ((uintptr_t)load_cmd + load_cmd->cmdsize);
+	}
+
+finish:
+    return result;
+}
+
+/*********************************************************************
+*********************************************************************/
+/* static */
+OSData *
+OSKext::copySanitizedKernelImage(void)
+{
+    OSData                   * result            = NULL;
+
+    kernel_mach_header_t     * kernelHeader      = NULL;
+    uint32_t                   sizeofcmds        = 0;
+
+   /* These start out pointing to running kernel but
+    * after copying point to the copied info.
+    */
+    kernel_segment_command_t * text_seg          = NULL;
+    kernel_segment_command_t * data_seg          = NULL;
+    kernel_segment_command_t * linkedit_seg      = NULL;
+    struct symtab_command    * symtab_cmd        = NULL;
+    kernel_section_t         * text_const_sect   = NULL;
+    kernel_section_t         * data_const_sect   = NULL;
+
+    kern_return_t              kern_result       = 0;
+    u_long                     kernelCopyLength  = 0;
+    vm_offset_t                kernelCopyAddr    = 0;
+    u_char                   * kernelCopy        = NULL;
+    
+    vm_offset_t                contentOffset     = 0;
+    struct load_command      * scan_cmd          = NULL;
+    kernel_section_t         * scan_sect         = NULL;
+    int64_t                    stroff_shift      = 0;
+    
+    uint32_t                   i;
+
+    text_seg = getsegbyname("__TEXT");
+    data_seg = getsegbyname("__DATA");
+    linkedit_seg = getsegbyname("__LINKEDIT");
+    symtab_cmd = getKernelSymtab();
+    
+    text_const_sect = getsectbyname("__TEXT", "__const");
+    data_const_sect = getsectbyname("__DATA", "__const");
+    
+    if (!text_seg || !data_seg || !linkedit_seg || !symtab_cmd ||
+        !text_const_sect || ! data_const_sect) {
+
+        OSKextLog(/* kext */ NULL, 
+            kOSKextLogErrorLevel | kOSKextLogIPCFlag,
+            "Can't provide kernel image for linking; missing component.");
+        goto finish;
+    }
+
+   /* Figure the size of the kernel image to build. We don't use the sizes of
+    * the __TEXT & __DATA segments overall as we only use the __const sections,
+    * so add those in manually. We're going to round each part to page size
+    * multiples too, just to be extra cautious.
+    */
+    sizeofcmds = text_seg->cmdsize + data_seg->cmdsize +
+        linkedit_seg->cmdsize + symtab_cmd->cmdsize;
+    kernelCopyLength = round_page(sizeof(_mh_execute_header) + sizeofcmds) +
+        round_page(text_const_sect->size) +
+        round_page(data_const_sect->size) +
+        round_page(linkedit_seg->filesize);
+
+    kern_result = kmem_alloc(kernel_map, &kernelCopyAddr, kernelCopyLength);
+    if (kern_result != KERN_SUCCESS) {
+        goto finish;
+    }
+
+    kernelCopy = (u_char *)kernelCopyAddr;
+    bzero(kernelCopy, kernelCopyLength);  // ??? - is this really necessary?
+
+   /*****
+    * Copy the kernel Mach header and the load commands we want.
+    */
+    memcpy(kernelCopy, &_mh_execute_header, sizeof(_mh_execute_header));
+    kernelHeader = (kernel_mach_header_t *)kernelCopy;
+    kernelHeader->ncmds = 0;
+    kernelHeader->sizeofcmds = sizeofcmds;
+    contentOffset = round_page(sizeof(_mh_execute_header) + sizeofcmds);
+
+   /* __TEXT segment load command and sections.
+    * Note that the __TEXT segment's 'offset' and 'filesize' include
+    * the data from the beginning of the mach header.
+    *
+    * Don't muck with the __TEXT segment's vmsize here;
+    * user-space linking requires it to match what is in the running kernel.
+    * We'll just have to live with it not being accurate
+    * (not like we can run the sanitized image after all).
+    */
+    scan_cmd = (struct load_command *)&kernelHeader[1]; // just past mach header
+    memcpy(scan_cmd, text_seg, text_seg->cmdsize);
+    kernelHeader->ncmds++;
+    text_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
+    text_seg->fileoff = 0;
+    text_seg->filesize = round_page(sizeof(_mh_execute_header) + sizeofcmds);
+
+    scan_sect = (kernel_section_t *)(text_seg + 1);
+    for (i = 0; i < text_seg->nsects; i++, scan_sect++) {
+        if (0 == strncmp("__const", scan_sect->sectname, sizeof("__const"))) {
+            text_const_sect   = scan_sect;  // retarget to constructed section
+
+            text_seg->filesize += scan_sect->size;
+
+            scan_sect->offset = contentOffset;
+            contentOffset    += scan_sect->size;
+
+            memcpy(kernelCopy + scan_sect->offset, (void *)(uintptr_t)scan_sect->addr,
+                scan_sect->size);
+        } else {
+            scan_sect->addr  = 0;
+            scan_sect->size  = 0;
+            scan_sect->offset = contentOffset;
+            scan_sect->nreloc = 0;
+        }
+    }
+
+    contentOffset = round_page(contentOffset);
+
+   /* __DATA segment load command and sections.
+    * Leave the vmsize as in the running kernel here, too.
+    */
+    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
+    memcpy(scan_cmd, data_seg, data_seg->cmdsize);
+    kernelHeader->ncmds++;
+    data_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
+    data_seg->fileoff = contentOffset;
+    data_seg->filesize = 0;
+
+    scan_sect = (kernel_section_t *)(data_seg + 1);
+    for (i = 0; i < data_seg->nsects; i++, scan_sect++) {
+        if (0 == strncmp("__const", scan_sect->sectname, sizeof("__const"))) {
+            data_const_sect   = scan_sect;  // retarget to constructed section
+
+            data_seg->filesize += scan_sect->size;
+
+            scan_sect->offset = contentOffset;
+            contentOffset    += scan_sect->size;
+
+            memcpy(kernelCopy + scan_sect->offset, (void *)(uintptr_t)scan_sect->addr,
+                scan_sect->size);
+        } else {
+            scan_sect->addr  = 0;
+            scan_sect->size  = 0;
+            scan_sect->offset = contentOffset;
+            scan_sect->nreloc = 0;
+        }
+    }
+
+    contentOffset = round_page(contentOffset);
+
+   /* __LINKEDIT segment load command.
+    * Leave the vmsize as in the running kernel here, too.
+    */
+    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
+    memcpy(scan_cmd, linkedit_seg, linkedit_seg->cmdsize);
+    kernelHeader->ncmds++;
+    linkedit_seg = (kernel_segment_command_t *)scan_cmd;  // retarget to constructed segment
+    linkedit_seg->fileoff = contentOffset;
+    linkedit_seg->filesize = linkedit_seg->vmsize;
+
+    contentOffset += round_page(linkedit_seg->vmsize);
+
+    memcpy(kernelCopy + linkedit_seg->fileoff, (void *)(uintptr_t)linkedit_seg->vmaddr,
+        linkedit_seg->vmsize);
+
+   /* __SYMTAB load command (contents shared with __LINKEDIT).
+    */
+    scan_cmd = (struct load_command *)((uintptr_t)scan_cmd + scan_cmd->cmdsize);
+    memcpy(scan_cmd, symtab_cmd, symtab_cmd->cmdsize);
+    kernelHeader->ncmds++;
+    symtab_cmd = (struct symtab_command *)scan_cmd;  // retarget to constructed cmd
+    stroff_shift = symtab_cmd->stroff - symtab_cmd->symoff;
+    symtab_cmd->symoff = linkedit_seg->fileoff;
+    symtab_cmd->stroff = symtab_cmd->symoff + stroff_shift;
+
+   /* Wrap the thing up in an OSData.
+    */
+    result = OSData::withBytesNoCopy(kernelCopy, kernelCopyLength);
+    if (result) {
+        result->setDeallocFunction(osdata_kmem_free);
+        kernelCopy = NULL;
+    }
+    
+finish:
+    if (kernelCopy) kmem_free(kernel_map, kernelCopyAddr, kernelCopyLength);
+
+    return result;
+}
+
 /*********************************************************************
 *********************************************************************/
 /* static */
@@ -7009,6 +8059,17 @@ OSKext::requestResource(
         *requestTagOut = kOSKextRequestTagInvalid;
     }
 
+    /* If requests to user space are disabled, don't go any further */
+    if (!sKernelRequestsEnabled) {
+        OSKextLog(/* kext */ NULL, 
+            kOSKextLogErrorLevel | kOSKextLogIPCFlag,
+            "Can't request resource %s for %s - requests to user space are disabled.",
+			resourceNameCString,
+			kextIdentifierCString);
+        result = kOSKextReturnDisabled;
+        goto finish;
+    }
+
     if (!kextIdentifierCString || !resourceNameCString || !callback) {
         result = kOSKextReturnInvalidArgument;
         goto finish;
@@ -7109,7 +8170,7 @@ OSKext::requestResource(
         goto finish;
     }
 
-    OSKextPingKextd();
+    OSKext::pingKextd();
 
     result = kOSReturnSuccess;
     if (requestTagOut) {
@@ -7153,6 +8214,7 @@ finish:
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 /* static */
 OSReturn
@@ -7179,6 +8241,7 @@ finish:
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 /* static */
 OSReturn
@@ -7191,8 +8254,6 @@ OSKext::dequeueCallbackForRequestTag(
     OSNumber      * callbackTagNum  = NULL;  // do not release
     unsigned int    count, i;
 
-    IORecursiveLockLock(sKextLock);
-
     result = kOSReturnError;
     count = sRequestCallbackRecords->getCount();
     for (i = 0; i < count; i++) {
@@ -7227,11 +8288,11 @@ OSKext::dequeueCallbackForRequestTag(
     result = kOSKextReturnNotFound;
 
 finish:
-    IORecursiveLockUnlock(sKextLock);
     return result;
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 /* static */
 OSReturn
@@ -7250,8 +8311,6 @@ OSKext::dispatchResource(OSDictionary * requestDict)
     void                          * context         = NULL;  // do not free
     OSKext                        * callbackKext    = NULL;  // must release (looked up)
 
-    IORecursiveLockLock(sKextLock);
-
    /* Get the args from the request. Right now we need the tag
     * to look up the callback record, and the result for invoking the callback.
     */
@@ -7329,7 +8388,6 @@ finish:
     if (callbackKext)   callbackKext->release();
     if (callbackRecord) callbackRecord->release();
 
-    IORecursiveLockUnlock(sKextLock);
     return result;
 }
 
@@ -7372,6 +8430,7 @@ finish:
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 /* static */
 OSReturn
@@ -7383,9 +8442,11 @@ OSKext::cancelRequest(
     OSDictionary * callbackRecord = NULL;  // must release
     OSData       * contextWrapper = NULL;  // do not release
 
+    IORecursiveLockLock(sKextLock);
     result = OSKext::dequeueCallbackForRequestTag(requestTag,
         &callbackRecord);
-        
+    IORecursiveLockUnlock(sKextLock);
+
     if (result == kOSReturnSuccess && contextOut) {
         contextWrapper = OSDynamicCast(OSData,
             _OSKextGetRequestArgument(callbackRecord,
@@ -7399,6 +8460,7 @@ OSKext::cancelRequest(
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 void
 OSKext::invokeOrCancelRequestCallbacks(
@@ -7407,8 +8469,6 @@ OSKext::invokeOrCancelRequestCallbacks(
 {
     unsigned int count, i;
     
-    IORecursiveLockLock(sKextLock);
-
     count = sRequestCallbackRecords->getCount();
     if (!count) {
         goto finish;
@@ -7448,11 +8508,11 @@ OSKext::invokeOrCancelRequestCallbacks(
     } while (i--);
 
 finish:
-    IORecursiveLockUnlock(sKextLock);
     return;
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
 uint32_t
 OSKext::countRequestCallbacks(void)
@@ -7460,8 +8520,6 @@ OSKext::countRequestCallbacks(void)
     uint32_t     result = 0;
     unsigned int count, i;
     
-    IORecursiveLockLock(sKextLock);
-
     count = sRequestCallbackRecords->getCount();
     if (!count) {
         goto finish;
@@ -7494,7 +8552,6 @@ OSKext::countRequestCallbacks(void)
     } while (i--);
 
 finish:
-    IORecursiveLockUnlock(sKextLock);
     return result;
 }
 
@@ -7619,6 +8676,39 @@ finish:
     return result;
 }
 
+/*********************************************************************
+*********************************************************************/
+static bool _OSArrayContainsCString(
+    OSArray    * array,
+    const char * cString)
+{
+    bool             result = false;
+    const OSSymbol * symbol = NULL;
+    uint32_t         count, i;
+    
+    if (!array || !cString) {
+        goto finish;
+    }
+
+    symbol = OSSymbol::withCStringNoCopy(cString);
+    if (!symbol) {
+        goto finish;
+    }
+
+    count = array->getCount();
+    for (i = 0; i < count; i++) {
+        OSObject * thisObject = array->getObject(i);
+        if (symbol->isEqualTo(thisObject)) {
+            result = true;
+            goto finish;
+        }
+    }
+
+finish:
+    if (symbol) symbol->release();
+    return result;
+}
+
 #if PRAGMA_MARK
 #pragma mark Personalities (IOKit Drivers)
 #endif
@@ -7906,29 +8996,21 @@ OSKext::removePersonalitiesFromCatalog(void)
 /* static */
 OSKextLogSpec
 OSKext::setUserSpaceLogFilter(
-    OSKextLogSpec   userLogFilter,
+    OSKextLogSpec   newUserLogFilter,
     bool            captureFlag)
 {
     OSKextLogSpec result;
+    bool          allocError = false;
 
-    IORecursiveLockLock(sKextInnerLock);
+   /* Do not call any function that takes sKextLoggingLock during
+    * this critical block. That means do logging after.
+    */
+    IOLockLock(sKextLoggingLock);
 
     result = sUserSpaceKextLogFilter;
-    sUserSpaceKextLogFilter = userLogFilter;
-
-   /* If the config flag itself is changing, log the state change
-    * going both ways, before setting up the user-space log arrays,
-    * so that this is only logged in the kernel.
-    */
-    if (sUserSpaceKextLogFilter != result) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogDebugLevel |
-            kOSKextLogGeneralFlag,
-            "User-space log flags changed from 0x%x to 0x%x.",
-            result, sUserSpaceKextLogFilter);
-    }
+    sUserSpaceKextLogFilter = newUserLogFilter;
 
-    if (userLogFilter && captureFlag &&
+    if (newUserLogFilter && captureFlag &&
         !sUserSpaceLogSpecArray && !sUserSpaceLogMessageArray) {
 
         // xxx - do some measurements for a good initial capacity?
@@ -7936,16 +9018,31 @@ OSKext::setUserSpaceLogFilter(
         sUserSpaceLogMessageArray = OSArray::withCapacity(0);
         
         if (!sUserSpaceLogSpecArray || !sUserSpaceLogMessageArray) {
-            OSKextLog(/* kext */ NULL,
-                kOSKextLogErrorLevel |
-                kOSKextLogGeneralFlag,
-                "Failed to allocate user-space log message arrays.");
             OSSafeReleaseNULL(sUserSpaceLogSpecArray);
             OSSafeReleaseNULL(sUserSpaceLogMessageArray);
+            allocError = true;
         }
     }
 
-    IORecursiveLockUnlock(sKextInnerLock);
+    IOLockUnlock(sKextLoggingLock);
+
+   /* If the config flag itself is changing, log the state change
+    * going both ways, before setting up the user-space log arrays,
+    * so that this is only logged in the kernel.
+    */
+    if (result != newUserLogFilter) {
+        OSKextLog(/* kext */ NULL,
+            kOSKextLogDebugLevel |
+            kOSKextLogGeneralFlag,
+            "User-space log flags changed from 0x%x to 0x%x.",
+            result, newUserLogFilter);
+    }
+    if (allocError) {
+        OSKextLog(/* kext */ NULL,
+            kOSKextLogErrorLevel |
+            kOSKextLogGeneralFlag,
+            "Failed to allocate user-space log message arrays.");
+    }
 
     return result;
 }
@@ -7957,10 +9054,14 @@ OSKext::setUserSpaceLogFilter(
 OSArray *
 OSKext::clearUserSpaceLogFilter(void)
 {
-    OSArray        * result        = NULL;
+    OSArray       * result       = NULL;
     OSKextLogSpec   oldLogFilter;
+    OSKextLogSpec   newLogFilter = kOSKextLogSilentFilter;
 
-    IORecursiveLockLock(sKextInnerLock);
+   /* Do not call any function that takes sKextLoggingLock during
+    * this critical block. That means do logging after.
+    */
+    IOLockLock(sKextLoggingLock);
 
     result = OSArray::withCapacity(2);
     if (result) {
@@ -7971,25 +9072,26 @@ OSKext::clearUserSpaceLogFilter(void)
     OSSafeReleaseNULL(sUserSpaceLogMessageArray);
 
     oldLogFilter = sUserSpaceKextLogFilter;
-    sUserSpaceKextLogFilter = kOSKextLogSilentFilter;
+    sUserSpaceKextLogFilter = newLogFilter;
+
+    IOLockUnlock(sKextLoggingLock);
 
    /* If the config flag itself is changing, log the state change
     * going both ways, after tearing down the user-space log
     * arrays, so this is only logged within the kernel.
     */
-    if (oldLogFilter != sUserSpaceKextLogFilter) {
+    if (oldLogFilter != newLogFilter) {
         OSKextLog(/* kext */ NULL,
             kOSKextLogDebugLevel |
             kOSKextLogGeneralFlag,
             "User-space log flags changed from 0x%x to 0x%x.",
-            oldLogFilter, sUserSpaceKextLogFilter);
+            oldLogFilter, newLogFilter);
     }
 
-    IORecursiveLockUnlock(sKextInnerLock);
-
     return result;
 }
 
+
 /*********************************************************************
 * Do not call any function that takes sKextLock here!
 *********************************************************************/
@@ -7999,9 +9101,9 @@ OSKext::getUserSpaceLogFilter(void)
 {
     OSKextLogSpec result;
 
-    IORecursiveLockLock(sKextInnerLock);
+    IOLockLock(sKextLoggingLock);
     result = sUserSpaceKextLogFilter;
-    IORecursiveLockUnlock(sKextInnerLock);
+    IOLockUnlock(sKextLoggingLock);
 
     return result;
 }
@@ -8120,7 +9222,7 @@ OSKextVLog(
     OSKext         * aKext,
     OSKextLogSpec    msgLogSpec,
     const char     * format,
-    va_list    srcArgList)
+    va_list          srcArgList)
 {
     extern int       disableConsoleOutput;
 
@@ -8134,7 +9236,7 @@ OSKextVLog(
     OSString       * logString         = NULL;         // must release
     char           * buffer            = stackBuffer;  // do not free
 
-    IORecursiveLockLock(sKextInnerLock);
+    IOLockLock(sKextLoggingLock);
 
    /* Set the kext/global bit in the message spec if we have no
     * kext or if the kext requests logging.
@@ -8203,12 +9305,13 @@ OSKextVLog(
     }
 
 finish:
+    IOLockUnlock(sKextLoggingLock);
+
     if (allocBuffer) {
         kfree(allocBuffer, (length + 1) * sizeof(char));
     }
     OSSafeRelease(logString);
     OSSafeRelease(logSpecNum);
-    IORecursiveLockUnlock(sKextInnerLock);
     return;
 }
 
@@ -8218,6 +9321,7 @@ finish:
 #pragma mark Backtrace Dump & kmod_get_info() support
 #endif
 /*********************************************************************
+* This function must be safe to call in panic context.
 *********************************************************************/
 /* static */
 void
@@ -8227,74 +9331,164 @@ OSKext::printKextsInBacktrace(
     int         (* printf_func)(const char *fmt, ...),
     bool           lockFlag)
 {
-    vm_offset_t      * kscan_addr = NULL;
-    kmod_info_t      * k = NULL;
-    kmod_reference_t * r = NULL;
-    unsigned int       i;
-    int                found_kmod = 0;
+    addr64_t    summary_page = 0;
+    addr64_t    last_summary_page = 0;
+    bool        found_kmod = false;
+    u_int       i = 0;
 
     if (lockFlag) {
-        IORecursiveLockLock(sKextLock);
+        IOLockLock(sKextSummariesLock);
     }
 
-    for (k = kmod; k; k = k->next) {
-        if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)k)) == 0) {
-            (*printf_func)("         kmod scan stopped due to missing "
-                "kmod page: %p\n", k);
-            break;
+    if (!gLoadedKextSummaries) {
+        (*printf_func)("         can't perform kext scan: no kext summary");
+        goto finish;
+    }
+
+    summary_page = trunc_page((addr64_t)(uintptr_t)gLoadedKextSummaries);
+    last_summary_page = round_page(summary_page + sLoadedKextSummariesAllocSize);
+    for (; summary_page < last_summary_page; summary_page += PAGE_SIZE) {
+        if (pmap_find_phys(kernel_pmap, summary_page) == 0) {
+            (*printf_func)("         can't perform kext scan: "
+                "missing kext summary page %p", summary_page);
+            goto finish;
+        }
+    }
+
+    for (i = 0; i < gLoadedKextSummaries->numSummaries; ++i) {
+        OSKextLoadedKextSummary * summary;
+        
+        summary = gLoadedKextSummaries->summaries + i;
+        if (!summary->address) {
+            continue;
         }
-        if (!k->address) {
-            continue; // skip fake entries for built-in kernel components
+
+        if (!summaryIsInBacktrace(summary, addr, cnt)) {
+            continue;
         }
-        for (i = 0, kscan_addr = addr; i < cnt; i++, kscan_addr++) {
-            if ((*kscan_addr >= k->address) &&
-                (*kscan_addr < (k->address + k->size))) {
 
-                if (!found_kmod) {
-                    (*printf_func)("      Kernel Extensions in backtrace "
-                        "(with dependencies):\n");
-                }
-                found_kmod = 1;
-                (*printf_func)("         %s(%s)@%p->%p\n",
-                    k->name, k->version, k->address, k->address + k->size - 1);
+        if (!found_kmod) {
+            (*printf_func)("      Kernel Extensions in backtrace:\n");
+            found_kmod = true;
+        }
 
-                for (r = k->reference_list; r; r = r->next) {
-                    kmod_info_t * rinfo;
+        printSummary(summary, printf_func);
+    }
 
-                    if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)r)) == 0) {
-                        (*printf_func)("            kmod dependency scan stopped "
-                            "due to missing dependency page: %p\n", r);
-                        break;
-                    }
+finish:
+    if (lockFlag) {
+        IOLockUnlock(sKextSummariesLock);
+    }
 
-                    rinfo = r->info;
+    return;
+}
 
-                    if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)rinfo)) == 0) {
-                        (*printf_func)("            kmod dependency scan stopped "
-                            "due to missing kmod page: %p\n", rinfo);
-                        break;
-                    }
+/*********************************************************************
+* This function must be safe to call in panic context.
+*********************************************************************/
+/* static */
+boolean_t
+OSKext::summaryIsInBacktrace(
+    OSKextLoadedKextSummary   * summary,
+    vm_offset_t               * addr,
+    unsigned int                cnt)
+{
+    u_int i = 0;
+
+    for (i = 0; i < cnt; i++) {
+        vm_offset_t kscan_addr = addr[i];
+        if ((kscan_addr >= summary->address) &&
+            (kscan_addr < (summary->address + summary->size))) 
+        {
+            return TRUE;
+        }
+    }
 
-                    if (!rinfo->address) {
-                        continue; // skip fake entries for built-ins
-                    }
+    return FALSE;
+}
 
-                    (*printf_func)("            dependency: %s(%s)@%p\n",
-                        rinfo->name, rinfo->version, rinfo->address);
-                }
+/*********************************************************************
+ * scan list of loaded kext summaries looking for a load address match and if
+ * found return the UUID C string.  If not found then set empty string.
+ *********************************************************************/
+static void findSummaryUUID(
+                            uint32_t        tag_ID,
+                            uuid_string_t   uuid);
 
-                break;  // only report this kmod for one backtrace address
-            }
+static void findSummaryUUID(
+                            uint32_t        tag_ID, 
+                            uuid_string_t   uuid)
+{
+    u_int     i;
+    
+    uuid[0] = 0x00; // default to no UUID
+    
+    for (i = 0; i < gLoadedKextSummaries->numSummaries; ++i) {
+        OSKextLoadedKextSummary * summary;
+        
+        summary = gLoadedKextSummaries->summaries + i;
+        
+        if (summary->loadTag == tag_ID) {
+            (void) uuid_unparse(summary->uuid, uuid);
+            break;
         }
     }
+    return;
+}
 
-    if (lockFlag) {
-        IORecursiveLockUnlock(sKextLock);
+/*********************************************************************
+* This function must be safe to call in panic context.
+*********************************************************************/
+void OSKext::printSummary(
+    OSKextLoadedKextSummary * summary,
+    int                    (* printf_func)(const char *fmt, ...))
+{
+    kmod_reference_t * kmod_ref = NULL;
+    uuid_string_t uuid;
+    char version[kOSKextVersionMaxLength];
+
+    if (!OSKextVersionGetString(summary->version, version, sizeof(version))) {
+        strlcpy(version, "unknown version", sizeof(version));
     }
+    (void) uuid_unparse(summary->uuid, uuid);
 
+    (*printf_func)("         %s(%s)[%s]@0x%llx->0x%llx\n",
+        summary->name, version, uuid,
+        summary->address, summary->address + summary->size - 1);
+    
+    /* print dependency info */
+    for (kmod_ref = (kmod_reference_t *) summary->reference_list; 
+         kmod_ref; 
+         kmod_ref = kmod_ref->next) {
+        kmod_info_t * rinfo;
+        
+        if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)kmod_ref)) == 0) {
+            (*printf_func)("            kmod dependency scan stopped "
+                           "due to missing dependency page: %p\n", kmod_ref);
+            break;
+        }
+        rinfo = kmod_ref->info;
+        
+        if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)rinfo)) == 0) {
+            (*printf_func)("            kmod dependency scan stopped "
+                           "due to missing kmod page: %p\n", rinfo);
+            break;
+        }
+        
+        if (!rinfo->address) {
+            continue; // skip fake entries for built-ins
+        }
+        
+        /* locate UUID in gLoadedKextSummaries */
+        findSummaryUUID(rinfo->id, uuid);
+        
+        (*printf_func)("            dependency: %s(%s)[%s]@%p\n",
+                       rinfo->name, rinfo->version, uuid, rinfo->address);
+    }
     return;
 }
 
+
 /*******************************************************************************
 * substitute() looks at an input string (a pointer within a larger buffer)
 * for a match to a substring, and on match it writes the marker & substitution
@@ -8442,10 +9636,8 @@ assemble_identifier_and_version(
 }
 
 /*******************************************************************************
+* Assumes sKextLock is held.
 *******************************************************************************/
-#define LAST_LOADED " - last loaded "
-#define LAST_LOADED_TS_WIDTH  (16)
-
 /* static */
 uint32_t
 OSKext::saveLoadedKextPanicListTyped(
@@ -8467,12 +9659,23 @@ OSKext::saveLoadedKextPanicListTyped(
 
     i = count - 1;
     do {
-        OSKext      * theKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
-        kmod_info_t * kmod_info = theKext->kmod_info;
+        OSObject    * rawKext = sLoadedKexts->getObject(i);
+        OSKext      * theKext = OSDynamicCast(OSKext, rawKext);
         int           match;
         char          identPlusVers[2*KMOD_MAX_NAME];
         uint32_t      identPlusVersLength;
-        char          timestampBuffer[17]; // enough for a uint64_t
+
+        if (!rawKext) {
+            printf("OSKext::saveLoadedKextPanicListTyped - "
+                "NULL kext in loaded kext list; continuing\n");
+            continue;
+        }
+
+        if (!theKext) {
+            printf("OSKext::saveLoadedKextPanicListTyped - "
+                "Kext type cast failed in loaded kext list; continuing\n");
+            continue;
+        }
 
        /* Skip all built-in kexts.
         */
@@ -8480,6 +9683,8 @@ OSKext::saveLoadedKextPanicListTyped(
             continue;
         }
 
+        kmod_info_t * kmod_info = theKext->kmod_info;
+
        /* Filter for kmod name (bundle identifier).
         */
         match = !strncmp(kmod_info->name, prefix, strnlen(prefix, KMOD_MAX_NAME));
@@ -8511,15 +9716,6 @@ OSKext::saveLoadedKextPanicListTyped(
             goto finish;
         }
 
-       /* We're going to note the last-loaded kext in the list.
-        */
-        if (i + 1 == count) {
-            snprintf(timestampBuffer, sizeof(timestampBuffer), "%llu",
-                AbsoluteTime_to_scalar(&last_loaded_timestamp));
-            identPlusVersLength += sizeof(LAST_LOADED) - 1 +
-                strnlen(timestampBuffer, sizeof(timestampBuffer));
-        }
-
        /* Adding 1 for the newline.
         */
         if (*list_length_ptr + identPlusVersLength + 1 >= list_size) {
@@ -8527,10 +9723,6 @@ OSKext::saveLoadedKextPanicListTyped(
         }
         
         *list_length_ptr = strlcat(paniclist, identPlusVers, list_size);
-        if (i + 1 == count) {
-            *list_length_ptr = strlcat(paniclist, LAST_LOADED, list_size);
-            *list_length_ptr = strlcat(paniclist, timestampBuffer, list_size);
-        }
         *list_length_ptr = strlcat(paniclist, "\n", list_size);
         
     } while (i--);
@@ -8555,8 +9747,6 @@ OSKext::saveLoadedKextPanicList(void)
     uint32_t   newlist_size   = 0;
     uint32_t   newlist_length = 0;
 
-    IORecursiveLockLock(sKextLock);
-
     newlist_length = 0;
     newlist_size = KEXT_PANICLIST_SIZE;
     newlist = (char *)kalloc(newlist_size);
@@ -8597,90 +9787,64 @@ OSKext::saveLoadedKextPanicList(void)
     loaded_kext_paniclist_length = newlist_length;
 
 finish:
-    IORecursiveLockUnlock(sKextLock);
     return;
 }
 
 /*********************************************************************
+* Assumes sKextLock is held.
 *********************************************************************/
-/* static */
 void
-OSKext::saveUnloadedKextPanicList(OSKext * aKext)
+OSKext::savePanicString(bool isLoading)
 {
-    char     * newlist        = NULL;
-    uint32_t   newlist_size   = 0;
-    uint32_t   newlist_length = 0;
-    char       identPlusVers[2*KMOD_MAX_NAME];
-    uint32_t   identPlusVersLength;
+    u_long len;
 
-    if (!aKext->kmod_info) {
+    if (!kmod_info) {
         return;  // do not goto finish here b/c of lock
     }
 
-    IORecursiveLockLock(sKextLock);
-
-    clock_get_uptime(&last_unloaded_timestamp);
-    last_unloaded_address = (void *)aKext->kmod_info->address;
-    last_unloaded_size = aKext->kmod_info->size;
-
-
-    identPlusVersLength = assemble_identifier_and_version(aKext->kmod_info,
-        identPlusVers);
-    if (!identPlusVersLength) {
+    len = assemble_identifier_and_version(kmod_info,
+        (isLoading) ? last_loaded_str : last_unloaded_str);
+    if (!len) {
         printf("error saving unloaded kext info\n");
         goto finish;
     }
 
-    newlist_length = identPlusVersLength;
-    newlist_size = newlist_length + 1;
-    newlist = (char *)kalloc(newlist_size);
-    
-    if (!newlist) {
-        printf("couldn't allocate kext panic log buffer\n");
-        goto finish;
-    }
-    
-    newlist[0] = '\0';
-
-    strlcpy(newlist, identPlusVers, newlist_size);
-
-    if (unloaded_kext_paniclist) {
-        kfree(unloaded_kext_paniclist, unloaded_kext_paniclist_size);
+    if (isLoading) {
+        last_loaded_strlen = len;
+        last_loaded_address = (void *)kmod_info->address;
+        last_loaded_size = kmod_info->size;
+        clock_get_uptime(&last_loaded_timestamp);
+    } else {
+        last_unloaded_strlen = len;
+        last_unloaded_address = (void *)kmod_info->address;
+        last_unloaded_size = kmod_info->size;
+        clock_get_uptime(&last_unloaded_timestamp);
     }
-    unloaded_kext_paniclist = newlist;
-    unloaded_kext_paniclist_size = newlist_size;
-    unloaded_kext_paniclist_length = newlist_length;
 
 finish:
-    IORecursiveLockUnlock(sKextLock);
     return;
 }
 
 /*********************************************************************
 *********************************************************************/
-#if __LP64__
-#define __kLoadSizeEscape  "0x%lld"
-#else
-#define __kLoadSizeEscape  "0x%ld"
-#endif /* __LP64__ */
-
 /* static */
 void
 OSKext::printKextPanicLists(int (*printf_func)(const char *fmt, ...))
 {
-    printf_func("unloaded kexts:\n");
-    if (unloaded_kext_paniclist &&
-        pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) unloaded_kext_paniclist) &&
-        unloaded_kext_paniclist[0]) {
+    if (last_loaded_strlen) {
+        printf_func("last loaded kext at %llu: %.*s (addr %p, size %lu)\n",
+            AbsoluteTime_to_scalar(&last_loaded_timestamp),
+            last_loaded_strlen, last_loaded_str,
+            last_loaded_address, last_loaded_size);
+    }
 
-        printf_func(
-            "%.*s (addr %p, size " __kLoadSizeEscape ") - last unloaded %llu\n",
-            unloaded_kext_paniclist_length, unloaded_kext_paniclist,
-            last_unloaded_address, last_unloaded_size,
-            AbsoluteTime_to_scalar(&last_unloaded_timestamp));
-    } else {
-        printf_func("(none)\n");
+    if (last_unloaded_strlen) {
+        printf_func("last unloaded kext at %llu: %.*s (addr %p, size %lu)\n",
+            AbsoluteTime_to_scalar(&last_unloaded_timestamp),
+            last_unloaded_strlen, last_unloaded_str,
+            last_unloaded_address, last_unloaded_size);
     }
+
     printf_func("loaded kexts:\n");
     if (loaded_kext_paniclist &&
         pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) loaded_kext_paniclist) &&
@@ -8693,21 +9857,163 @@ OSKext::printKextPanicLists(int (*printf_func)(const char *fmt, ...))
     return;
 }
 
+/*********************************************************************
+* Assumes sKextLock is held.
+*********************************************************************/
+/* static */
+void
+OSKext::updateLoadedKextSummaries(void)
+{
+    kern_return_t result = KERN_FAILURE;
+    OSKextLoadedKextSummaryHeader *summaryHeader = NULL;
+    OSKextLoadedKextSummaryHeader *summaryHeaderAlloc = NULL;
+    OSKext *aKext;
+    vm_map_offset_t start, end;
+    size_t summarySize = 0;
+    size_t size;
+    u_int count;
+    u_int numKexts;
+    u_int i, j;
+
+    IOLockLock(sKextSummariesLock);
+
+    count = sLoadedKexts->getCount();
+    for (i = 0, numKexts = 0; i < count; ++i) {
+        aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
+        numKexts += (aKext && aKext->isExecutable());
+    }
+
+    if (!numKexts) goto finish;
+
+   /* Calculate the size needed for the new summary headers.
+    */
+    
+    size = sizeof(*gLoadedKextSummaries);
+    size += numKexts * sizeof(*gLoadedKextSummaries->summaries);
+    size = round_page(size);
+
+   /* If the previous summary is large enough, use it (and be sure to make
+    * it writable). If it's too small, free it and allocate a new buffer.
+    */
+
+    if (sPrevLoadedKextSummariesAllocSize < size) {
+        if (sPrevLoadedKextSummaries) {
+            kmem_free(kernel_map, (vm_offset_t)sPrevLoadedKextSummaries,
+                sPrevLoadedKextSummariesAllocSize);
+            sPrevLoadedKextSummaries = NULL;
+            sPrevLoadedKextSummariesAllocSize = 0;
+        }
+
+        result = kmem_alloc(kernel_map, 
+            (vm_offset_t*)&summaryHeaderAlloc, size);
+        if (result != KERN_SUCCESS) goto finish;
+
+        summaryHeader = summaryHeaderAlloc;
+        summarySize = size;
+    } else {
+        summaryHeader = sPrevLoadedKextSummaries;
+        summarySize = sPrevLoadedKextSummariesAllocSize;
+
+        start = (vm_map_offset_t) summaryHeader;
+        end = start + summarySize;
+        result = vm_map_protect(kernel_map, start, end, VM_PROT_DEFAULT, FALSE);
+        if (result != KERN_SUCCESS) goto finish;
+    }
+
+   /* Populate the summary header.
+    */
+
+    bzero(summaryHeader, summarySize);
+    summaryHeader->version = kOSKextLoadedKextSummaryVersion;
+    summaryHeader->entry_size = sizeof(OSKextLoadedKextSummary);
+    summaryHeader->numSummaries = numKexts;
+
+   /* Populate each kext summary.
+    */
+
+    count = sLoadedKexts->getCount();
+    for (i = 0, j = 0; i < count; ++i) {
+        aKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i));
+        if (!aKext || !aKext->isExecutable()) continue;
+
+        aKext->updateLoadedKextSummary(&summaryHeader->summaries[j++]);
+    }
+
+   /* Write protect the buffer and move it into place.
+    */
+
+    start = (vm_map_offset_t) summaryHeader;
+    end = start + summarySize;
+    result = vm_map_protect(kernel_map, start, end, VM_PROT_READ, FALSE);
+    if (result != KERN_SUCCESS) goto finish;
+
+    sPrevLoadedKextSummaries = gLoadedKextSummaries;
+    sPrevLoadedKextSummariesAllocSize = sLoadedKextSummariesAllocSize;
+
+    gLoadedKextSummaries = summaryHeader;
+    sLoadedKextSummariesAllocSize = summarySize;
+
+    summaryHeaderAlloc = NULL;
+
+   /* Call the magic breakpoint function through a static function pointer so
+    * the compiler can't optimize the function away.
+    */
+    if (sLoadedKextSummariesUpdated) (*sLoadedKextSummariesUpdated)();
+
+finish:
+    IOLockUnlock(sKextSummariesLock);
+
+   /* If we had to allocate a new buffer but failed to generate the summaries,
+    * free that now.
+    */
+    if (summaryHeaderAlloc) {
+        kmem_free(kernel_map, (vm_offset_t)summaryHeaderAlloc, summarySize);
+    }
+
+    return;
+}
+
+/*********************************************************************
+*********************************************************************/
+void
+OSKext::updateLoadedKextSummary(OSKextLoadedKextSummary *summary)
+{
+    OSData *uuid;
+
+    strlcpy(summary->name, getIdentifierCString(), 
+        sizeof(summary->name));
+
+    uuid = copyUUID();
+    if (uuid) {
+        memcpy(summary->uuid, uuid->getBytesNoCopy(), sizeof(summary->uuid));
+        OSSafeRelease(uuid);
+    }
+
+    summary->address = kmod_info->address;
+    summary->size = kmod_info->size;
+    summary->version = getVersion();
+    summary->loadTag = kmod_info->id;
+    summary->flags = 0;
+    summary->reference_list = (uint64_t) kmod_info->reference_list;
+
+    return;
+}
+
 /*********************************************************************
 *********************************************************************/
-#if __ppc__ || __i386__
+#if __i386__
 /* static */
 kern_return_t
 OSKext::getKmodInfo(
     kmod_info_array_t      * kmodList,
     mach_msg_type_number_t * kmodCount)
 {
-    kern_return_t result = KERN_FAILURE;
-    vm_offset_t data;
-    kmod_info_t * k, * kmod_info_scan_ptr;
+    kern_return_t      result = KERN_FAILURE;
+    vm_offset_t        data   = 0;
+    kmod_info_t      * k, * kmod_info_scan_ptr;
     kmod_reference_t * r, * ref_scan_ptr;
-    int ref_count;
-    unsigned size = 0;
+    int                ref_count;
+    unsigned           size   = 0;
 
     *kmodList = (kmod_info_t *)0;
     *kmodCount = 0;
@@ -8796,7 +10102,7 @@ finish:
     }
     return result;
 }
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 #if PRAGMA_MARK
 #pragma mark MAC Framework Support
 #endif
@@ -8849,7 +10155,7 @@ MACFLengthForObject(OSObject * obj)
         len = sizeof("4294967295");    /* UINT32_MAX */
     } else if (typeID == OSTypeID(OSBoolean)) {
         OSBoolean * boolObj = OSDynamicCast(OSBoolean, obj);
-        len = boolObj->isTrue() ? sizeof("true") : sizeof("false");
+        len = (boolObj == kOSBooleanTrue) ? sizeof("true") : sizeof("false");
     } else if (typeID == OSTypeID(OSData)) {
         OSData * dataObj = OSDynamicCast(OSData, obj);
         len = dataObj->getLength();
@@ -8885,7 +10191,7 @@ MACFInitElementFromObject(
     } else if (typeID == OSTypeID(OSBoolean)) {
         OSBoolean * boolObj = OSDynamicCast(OSBoolean, value);
         element->value_type = MAC_DATA_TYPE_PRIMITIVE;
-        if (boolObj->isTrue()) {
+        if (boolObj == kOSBooleanTrue) {
             strcpy(element->value, "true");
             element->value_size = 5;
         } else {
diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp
index bc67307d2..0696e2b02 100644
--- a/libkern/c++/OSMetaClass.cpp
+++ b/libkern/c++/OSMetaClass.cpp
@@ -491,6 +491,7 @@ int  OSMetaClass::getRetainCount() const { return 0; }
 const char *
 OSMetaClass::getClassName() const
 {
+    if (!className) return NULL;
     return className->getCStringNoCopy();
 }
 
diff --git a/libkern/c++/OSObject.cpp b/libkern/c++/OSObject.cpp
index 814fbbdf6..7da83e9a1 100644
--- a/libkern/c++/OSObject.cpp
+++ b/libkern/c++/OSObject.cpp
@@ -87,25 +87,6 @@ OSMetaClassDefineReservedUnused(OSObject, 13);
 OSMetaClassDefineReservedUnused(OSObject, 14);
 OSMetaClassDefineReservedUnused(OSObject, 15);
 
-#ifdef __ppc__
-OSMetaClassDefineReservedUnused(OSObject, 16);
-OSMetaClassDefineReservedUnused(OSObject, 17);
-OSMetaClassDefineReservedUnused(OSObject, 18);
-OSMetaClassDefineReservedUnused(OSObject, 19);
-OSMetaClassDefineReservedUnused(OSObject, 20);
-OSMetaClassDefineReservedUnused(OSObject, 21);
-OSMetaClassDefineReservedUnused(OSObject, 22);
-OSMetaClassDefineReservedUnused(OSObject, 23);
-OSMetaClassDefineReservedUnused(OSObject, 24);
-OSMetaClassDefineReservedUnused(OSObject, 25);
-OSMetaClassDefineReservedUnused(OSObject, 26);
-OSMetaClassDefineReservedUnused(OSObject, 27);
-OSMetaClassDefineReservedUnused(OSObject, 28);
-OSMetaClassDefineReservedUnused(OSObject, 29);
-OSMetaClassDefineReservedUnused(OSObject, 30);
-OSMetaClassDefineReservedUnused(OSObject, 31);
-#endif
-
 static const char *getClassName(const OSObject *obj)
 {
     const OSMetaClass *meta = obj->getMetaClass();
@@ -115,12 +96,6 @@ static const char *getClassName(const OSObject *obj)
 bool OSObject::init()
     { return true; }
 
-#if (!__ppc__) || (__GNUC__ < 3)
-
-// Implemented in assembler in post gcc 3.x systems as we have a problem
-// where the destructor in gcc2.95 gets 2 arguments.  The second argument
-// appears to be a flag argument.  I have copied the assembler from Puma xnu
-// to OSRuntimeSupport.c  So for 2.95 builds use the C 
 void OSObject::free()
 {
     const OSMetaClass *meta = getMetaClass();
@@ -129,7 +104,6 @@ void OSObject::free()
 	meta->instanceDestructed();
     delete this;
 }
-#endif /* (!__ppc__) || (__GNUC__ < 3) */
 
 int OSObject::getRetainCount() const
 {
diff --git a/libkern/c++/OSObjectAsm.s b/libkern/c++/OSObjectAsm.s
deleted file mode 100644
index eba1bc781..000000000
--- a/libkern/c++/OSObjectAsm.s
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#if __GNUC__ >= 3
-
-;
-; This function was generated by disassembling the 'OSObject::free(void)'
-; function of the Panther7B7 kernel in gdb.
-;
-; Then add the 'li r4,3' flag taken fropm the Puma kernel 'OSObject::free'
-;
-    .text
-
-    .align  5
-    .globl  __ZN8OSObject4freeEv
-
-__ZN8OSObject4freeEv:
-    ; function prologue
-    stw     r31,-4(r1)
-    mflr    r0
-    stw     r0,8(r1)
-    mr      r31,r3
-    stwu    r1,-80(r1)
-
-    ; const OSMetaClass *meta = getMetaClass();
-    lwz     r9,0(r3)
-    lwz     r12,32(r9)
-    mtctr   r12
-    bctrl
-
-    ; if (meta)
-    ;   meta->instanceDestructed();
-    cmpwi   r3,0
-    beq     delete_this
-    bl      __ZNK11OSMetaClass18instanceDestructedEv
-
-delete_this:
-    ; delete this;
-    lwz     r9,0(r31)
-    mr      r3,r31
-    li      r4,0        ; Load up some sort of flags, for 2.95 destructors?
-    lwz     r0,88(r1)
-    addi    r1,r1,80
-    lwz     r12,8(r9)
-    mtlr    r0
-    lwz     r31,-4(r1)
-    mtctr   r12
-    bctr
-
-#endif /* __GNUC__ >= 3 */
diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp
index 47d5f4970..5a5fb83e4 100644
--- a/libkern/c++/OSOrderedSet.cpp
+++ b/libkern/c++/OSOrderedSet.cpp
@@ -226,9 +226,9 @@ void OSOrderedSet::removeObject(const OSMetaClassBase *anObject)
 
     for (i = 0; i < count; i++) {
 
-        if( deleted)
+        if (deleted)
             array[i-1] = array[i];
-        else if( (array[i].obj == anObject)) {
+        else if (array[i].obj == anObject) {
             deleted = true;
 	    haveUpdated();	// Pity we can't flush the log
             array[i].obj->taggedRelease(OSTypeID(OSCollection));
diff --git a/libkern/c++/OSRuntime.cpp b/libkern/c++/OSRuntime.cpp
index 88df070d0..ae8faf0ef 100644
--- a/libkern/c++/OSRuntime.cpp
+++ b/libkern/c++/OSRuntime.cpp
@@ -224,6 +224,41 @@ void __pure_virtual( void )        { panic("%s", __FUNCTION__); }
 
 typedef void (*structor_t)(void);
 
+/*********************************************************************
+*********************************************************************/
+static boolean_t
+sectionIsDestructor(kernel_section_t * section)
+{
+    boolean_t result;
+
+    result = !strncmp(section->sectname, SECT_MODTERMFUNC,
+        sizeof(SECT_MODTERMFUNC) - 1);
+#if !__LP64__
+    result = result || !strncmp(section->sectname, SECT_DESTRUCTOR, 
+        sizeof(SECT_DESTRUCTOR) - 1);
+#endif
+
+    return result;
+}
+
+/*********************************************************************
+*********************************************************************/
+static boolean_t
+sectionIsConstructor(kernel_section_t * section)
+{
+    boolean_t result;
+
+    result = !strncmp(section->sectname, SECT_MODINITFUNC,
+        sizeof(SECT_MODINITFUNC) - 1);
+#if !__LP64__
+    result = result || !strncmp(section->sectname, SECT_CONSTRUCTOR, 
+        sizeof(SECT_CONSTRUCTOR) - 1);
+#endif
+
+    return result;
+}
+
+
 /*********************************************************************
 * OSRuntimeUnloadCPPForSegment()
 *
@@ -249,9 +284,7 @@ OSRuntimeUnloadCPPForSegmentInKmod(
          section != 0;
          section = nextsect(segment, section)) {
 
-        if (strncmp(section->sectname, SECT_DESTRUCTOR, 
-            sizeof(SECT_DESTRUCTOR)) == 0) {
-
+        if (sectionIsDestructor(section)) {
             structor_t * destructors = (structor_t *)section->addr;
 
             if (destructors) {
@@ -422,9 +455,7 @@ OSRuntimeInitializeCPP(
              section != NULL;
              section = nextsect(segment, section)) {
 
-            if (strncmp(section->sectname, SECT_CONSTRUCTOR, 
-                sizeof(SECT_CONSTRUCTOR)) == 0) {
-
+            if (sectionIsConstructor(section)) {
                 structor_t * constructors = (structor_t *)section->addr;
 
                 if (constructors) {
diff --git a/libkern/c++/OSSet.cpp b/libkern/c++/OSSet.cpp
index a97158413..f2d5c3e8c 100644
--- a/libkern/c++/OSSet.cpp
+++ b/libkern/c++/OSSet.cpp
@@ -196,29 +196,36 @@ void OSSet::flushCollection()
 
 bool OSSet::setObject(const OSMetaClassBase *anObject)
 {
-    if (containsObject(anObject))
+    if (containsObject(anObject)) {
         return false;
-    else {
+    } else {
         haveUpdated();
         return members->setObject(anObject);
     }
 }
 
-bool OSSet::merge(const OSArray *array)
+bool OSSet::merge(const OSArray * array)
 {
-    const OSMetaClassBase *anObject;
-    bool retVal = false;
+    const OSMetaClassBase * anObject = 0;
+    bool                    result   = true;
 
-// xx-review: if any setObject fails due to memory allocation failure,
-// xx-review: this function should return false
-    for (int i = 0; (anObject = array->getObject(i)); i++)
-        if (setObject(anObject))
-            retVal = true;
+    for (int i = 0; (anObject = array->getObject(i)); i++) {
 
-    return retVal;
+       /* setObject() returns false if the object is already in the set,
+        * so we have to check beforehand here with containsObject().
+        */
+        if (containsObject(anObject)) {
+            continue;
+        }
+        if (!setObject(anObject)) {
+            result = false;
+        }
+    }
+
+    return result;
 }
 
-bool OSSet::merge(const OSSet *set)
+bool OSSet::merge(const OSSet * set)
 {
     return merge(set->members);
 }
diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp
index 1d6e6c2f0..5f9fad84e 100644
--- a/libkern/c++/OSSymbol.cpp
+++ b/libkern/c++/OSSymbol.cpp
@@ -361,8 +361,11 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
     j = thisBucket->count;
     list = thisBucket->symbolP;
 
-    if (!j)
+    if (!j) {
+	// couldn't find the symbol; probably means string hash changed
+        panic("removeSymbol");
         return;
+    }
 
     if (j == 1) {
         probeSymbol = (OSSymbol *) list;
@@ -374,6 +377,8 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
             SHRINK_POOL();
             return;
         }
+	// couldn't find the symbol; probably means string hash changed
+    	panic("removeSymbol");
         return;
     }
 
@@ -399,6 +404,8 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
             SHRINK_POOL();
             return;
         }
+	// couldn't find the symbol; probably means string hash changed
+    	panic("removeSymbol");
         return;
     }
 
@@ -424,6 +431,8 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym)
             return;
         }
     }
+    // couldn't find the symbol; probably means string hash changed
+    panic("removeSymbol");
 }
 
 /*
diff --git a/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp b/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp
old mode 100644
new mode 100755
diff --git a/libkern/conf/MASTER b/libkern/conf/MASTER
index b303c3603..783f1af08 100644
--- a/libkern/conf/MASTER
+++ b/libkern/conf/MASTER
@@ -51,6 +51,7 @@
 #
 ident		LIBKERN
 
+options         HIBERNATION     # system hibernation    # <hibernation>
 options		KDEBUG			# kernel tracing		# <kdebug>
 options		GPROF			# kernel profiling		# <profile>
 options		LIBKERNCPP		# C++ implementation	# <libkerncpp>
@@ -58,6 +59,7 @@ options		NETWORKING		# kernel networking		# <networking>
 options     CONFIG_DTRACE 	# dtrace support		# <config_dtrace>
 options		CRYPTO			# cryptographic routines	# <ipsec,crypto>
 options		ZLIB		# zlib support			# <zlib>
+options		IOKITSTATS		# IOKit statistics		# <iokitstats>
 
 options		CONFIG_NO_PANIC_STRINGS			# <no_panic_str>
 options		CONFIG_NO_PRINTF_STRINGS		# <no_printf_str>
@@ -67,6 +69,11 @@ options		IPSEC			# IP security	# <ipsec>
 
 options		CONFIG_KXLD		# kxld/runtime linking of kexts # <config_kxld>
 
+# Note that when adding this config option to an architecture, one MUST
+# add the architecture to the preprocessor test at the beginning of
+# libkern/kmod/cplus_{start.c,stop.c}.
+options         CONFIG_STATIC_CPPINIT   # Static library initializes kext cpp runtime # <config_static_cppinit>
+
 # secure_kernel - secure kernel from user programs
 options     SECURE_KERNEL       # <secure_kernel> 
 
diff --git a/libkern/conf/MASTER.i386 b/libkern/conf/MASTER.i386
index 2bf2e22da..46f20d9ec 100644
--- a/libkern/conf/MASTER.i386
+++ b/libkern/conf/MASTER.i386
@@ -1,11 +1,10 @@
 ######################################################################
 #
-#  RELEASE	= [ intel mach libkerncpp networking config_dtrace crypto zlib config_kxld ]
+#  RELEASE	= [ intel mach libkerncpp hibernation networking config_dtrace crypto zlib config_kxld config_static_cppinit iokitstats ]
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug mach_kdb ]
 #
-#
-#  EMBEDDED	= [ intel mach libkerncpp networking crypto zlib ]
+#  EMBEDDED	= [ intel mach libkerncpp hibernation networking crypto zlib ]
 #  DEVELOPMENT	= [ EMBEDDED config_dtrace ]
 #
 ######################################################################
diff --git a/libkern/conf/MASTER.ppc b/libkern/conf/MASTER.ppc
deleted file mode 100644
index 21e317660..000000000
--- a/libkern/conf/MASTER.ppc
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-######################################################################
-#  
-#  Standard Apple MacOS X Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE = [ppc mach libkerncpp networking config_dtrace crypto zlib config_kxld ]
-#  DEVELOPMENT = [ RELEASE ]
-#  PROFILE = [RELEASE profile]
-#  DEBUG = [RELEASE debug mach_kdb ]
-#  RELEASE_TRACE = [ RELEASE kdebug ]
-#  DEBUG_TRACE   = [ DEBUG kdebug ]
-#
-######################################################################
-
-machine		"ppc"					# <ppc>
-cpu		"ppc"					# <ppc>
-
-options		MACH_KDB	#				# <mach_kdb>
diff --git a/libkern/conf/MASTER.x86_64 b/libkern/conf/MASTER.x86_64
index da71fbe23..a9fd68364 100644
--- a/libkern/conf/MASTER.x86_64
+++ b/libkern/conf/MASTER.x86_64
@@ -1,11 +1,10 @@
 ######################################################################
 #
-#  RELEASE	= [ intel mach libkerncpp networking config_dtrace crypto zlib config_kxld ]
+#  RELEASE	= [ intel mach libkerncpp hibernation networking config_dtrace crypto zlib config_kxld iokitstats ]
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug mach_kdb ]
 #
-#
-#  EMBEDDED	= [ intel mach libkerncpp networking crypto zlib ]
+#  EMBEDDED	= [ intel mach libkerncpp hibernation networking crypto zlib ]
 #  DEVELOPMENT	= [ EMBEDDED ]
 #
 ######################################################################
diff --git a/libkern/conf/Makefile b/libkern/conf/Makefile
index 8f54b1af4..f0cf53e3d 100644
--- a/libkern/conf/Makefile
+++ b/libkern/conf/Makefile
@@ -7,8 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -24,30 +23,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(LIBKERN_KERNEL_CONFIG) $(LIBKERN_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(LIBKERN_KERNEL_CONFIG) $(LIBKERN_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(LIBKERN_KERNEL_CONFIG)/Makefile	\
diff --git a/libkern/conf/Makefile.i386 b/libkern/conf/Makefile.i386
index 3695a666c..f28e7a459 100644
--- a/libkern/conf/Makefile.i386
+++ b/libkern/conf/Makefile.i386
@@ -2,6 +2,12 @@
 #BEGIN	Machine dependent Makefile fragment for i386
 ######################################################################
 
+# Files that must go in the __HIB segment:
+UNCONFIGURED_HIB_FILES= \
+			WKdmDecompress.o
+
+HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
+
 ######################################################################
 #END	Machine dependent Makefile fragment for i386
 ######################################################################
diff --git a/libkern/conf/Makefile.ppc b/libkern/conf/Makefile.ppc
deleted file mode 100644
index cd79f229a..000000000
--- a/libkern/conf/Makefile.ppc
+++ /dev/null
@@ -1,7 +0,0 @@
-######################################################################
-#BEGIN	Machine dependent Makefile fragment for ppc
-######################################################################
-
-######################################################################
-#END	Machine dependent Makefile fragment for ppc
-######################################################################
diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template
index 005aa9ca3..9dad4c816 100644
--- a/libkern/conf/Makefile.template
+++ b/libkern/conf/Makefile.template
@@ -26,8 +26,8 @@ include $(MakeInc_def)
 #
 # CFLAGS
 #
-CFLAGS+= -imacros meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \
-         -Werror $(CFLAGS_INLINE_CONFIG)
+CFLAGS+= -include meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \
+	$(CFLAGS_INLINE_CONFIG)
 
 # zlib is 3rd party source
 compress.o_CWARNFLAGS_ADD = -Wno-cast-qual 
@@ -85,13 +85,20 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
+$(COMPONENT).filelist: $(LDOBJS)
+	$(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \
+	for hib_file in ${HIB_FILES};		\
+	do	\
+                $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \
+                mv $${hib_file}__ $${hib_file} ; \
+	done; \
+	fi
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`
diff --git a/libkern/conf/Makefile.x86_64 b/libkern/conf/Makefile.x86_64
index 7b0de925d..a7fda56ca 100644
--- a/libkern/conf/Makefile.x86_64
+++ b/libkern/conf/Makefile.x86_64
@@ -2,6 +2,12 @@
 #BEGIN	Machine dependent Makefile fragment for x86_64
 ######################################################################
 
+# Files that must go in the __HIB segment:
+UNCONFIGURED_HIB_FILES= \
+                        WKdmDecompress.o
+
+HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
+
 ######################################################################
 #END	Machine dependent Makefile fragment for x86_64
 ######################################################################
diff --git a/libkern/conf/files b/libkern/conf/files
index 15f992d67..6f3d432ac 100644
--- a/libkern/conf/files
+++ b/libkern/conf/files
@@ -4,6 +4,7 @@ OPTIONS/libkerncpp					optional libkerncpp
 OPTIONS/kdebug						optional kdebug
 OPTIONS/gprof						optional gprof
 OPTIONS/config_dtrace				optional config_dtrace
+OPTIONS/hibernation				optional hibernation
 OPTIONS/networking					optional networking
 OPTIONS/crypto						optional crypto
 OPTIONS/zlib						optional zlib
@@ -72,9 +73,9 @@ libkern/kxld/kxld_demangle.c    optional config_kxld
 libkern/kxld/kxld_dict.c        optional config_kxld
 libkern/kxld/kxld_kext.c        optional config_kxld
 libkern/kxld/kxld_reloc.c       optional config_kxld
+libkern/kxld/kxld_object.c      optional config_kxld
 libkern/kxld/kxld_sect.c        optional config_kxld
 libkern/kxld/kxld_seg.c         optional config_kxld
-libkern/kxld/kxld_state.c       optional config_kxld
 libkern/kxld/kxld_sym.c         optional config_kxld
 libkern/kxld/kxld_symtab.c      optional config_kxld
 libkern/kxld/kxld_util.c        optional config_kxld
diff --git a/libkern/conf/files.i386 b/libkern/conf/files.i386
index 70f37ed51..18edb6e7d 100644
--- a/libkern/conf/files.i386
+++ b/libkern/conf/files.i386
@@ -1 +1,8 @@
 libkern/i386/OSAtomic.s			standard
+libkern/zlib/intel/inffastS.s	optional zlib
+libkern/zlib/intel/adler32vec.s	optional zlib
+libkern/crypto/intel/sha1edp.s	optional crypto	
+
+# Optimized WKdm compressor
+libkern/kxld/i386/WKdmCompress.s                         optional hibernation
+libkern/kxld/i386/WKdmDecompress.s                       optional hibernation
diff --git a/libkern/conf/files.ppc b/libkern/conf/files.ppc
deleted file mode 100644
index 0e495aa18..000000000
--- a/libkern/conf/files.ppc
+++ /dev/null
@@ -1,6 +0,0 @@
-libkern/ppc/OSAtomic.s			standard
-libkern/ppc/bcmp.s			standard
-libkern/ppc/memcmp.s			standard
-libkern/ppc/strlen.s			standard
-libkern/c++/OSObjectAsm.s               optional libkerncpp
-
diff --git a/libkern/conf/files.x86_64 b/libkern/conf/files.x86_64
index bcf047445..bc32a4846 100644
--- a/libkern/conf/files.x86_64
+++ b/libkern/conf/files.x86_64
@@ -1 +1,8 @@
 libkern/x86_64/OSAtomic.s			standard
+libkern/zlib/intel/inffastS.s		optional zlib
+libkern/zlib/intel/adler32vec.s		optional zlib
+libkern/crypto/intel/sha1edp.s		optional crypto
+
+# Optimized WKdm compressor
+libkern/kxld/i386/WKdmCompress.s                         optional hibernation
+libkern/kxld/i386/WKdmDecompress.s                       optional hibernation
diff --git a/libkern/conf/tools/Makefile b/libkern/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/libkern/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/libkern/conf/tools/doconf/Makefile b/libkern/conf/tools/doconf/Makefile
deleted file mode 100644
index aa55a9419..000000000
--- a/libkern/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/libkern/conf/tools/doconf/doconf.csh b/libkern/conf/tools/doconf/doconf.csh
deleted file mode 100755
index 6fedb4786..000000000
--- a/libkern/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/libkern/crypto/intel/sha1edp.h b/libkern/crypto/intel/sha1edp.h
new file mode 100644
index 000000000..ba90122fd
--- /dev/null
+++ b/libkern/crypto/intel/sha1edp.h
@@ -0,0 +1,51 @@
+#if !defined sha1edp_h
+#define	sha1edp_h
+
+
+/*	This file is included in sha1edpLittleEndian.s and sha1edpBigEndian.s to
+	define the symbols below for use in assembly code.
+
+	It is also included in sha1_locl.h and compiled in C to test that the
+	hard-coded values here match the values used in C.  CC_SHA1_BLOCK_BYTES
+	is defined in another header, so an error will be generated if its
+	definition here conflicts.  The other symbols are tested below, with
+	the CheckAssertion definition.
+*/
+
+
+// Number of bytes in a SHA-1 block.
+#define	CC_SHA1_BLOCK_BYTES	64
+
+// Offset of h0 to h4 members in SHA-1 context structure.
+#define	Contexth0	(0*4)
+#define	Contexth1	(1*4)
+#define	Contexth2	(2*4)
+#define	Contexth3	(3*4)
+#define	Contexth4	(4*4)
+
+
+#if !defined __ASSEMBLER__
+
+	#include <stddef.h>	// Get offsetof macro.
+
+	/*	Declare CheckAssertion so that if any of the declarations below
+		differ from it, the compiler will report an error.
+	*/
+	extern char CheckAssertion[1];
+
+	/*	Ensure that Contexth0 through Contexth4 are the byte offsets of the
+		h0 through h4 members of the SHA-1 context structure.
+	*/
+	extern char CheckAssertion[Contexth0 == offsetof(SHA_CTX, h0)];
+	extern char CheckAssertion[Contexth1 == offsetof(SHA_CTX, h1)];
+	extern char CheckAssertion[Contexth2 == offsetof(SHA_CTX, h2)];
+	extern char CheckAssertion[Contexth3 == offsetof(SHA_CTX, h3)];
+	extern char CheckAssertion[Contexth4 == offsetof(SHA_CTX, h4)];
+		/*	If these assertions fail, change the definitions of Contexth0 to
+			Contexth4 to match the offsets of the members.
+		*/
+
+#endif	// !defined __ASSEMBLER__
+
+
+#endif	// !defined sha1edp_h
diff --git a/libkern/crypto/intel/sha1edp.s b/libkern/crypto/intel/sha1edp.s
new file mode 100644
index 000000000..80da81a62
--- /dev/null
+++ b/libkern/crypto/intel/sha1edp.s
@@ -0,0 +1,1481 @@
+/* 	sha1edp.s : this file provides optimized x86_64 and i386 implementation of the sha1 function
+	CoreOS - vector and numerics group
+	cclee	6-21-10
+	
+	The implementation is based on the principle described in an Intel online article
+	"Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+	http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
+
+
+	Update HASH[] by processing a one 64-byte block in MESSAGE[] can be represented by the following C function
+
+void SHA1( int HASH[], int MESSAGE[] )
+{
+    int A[81], B[81], C[81], D[81], E[81];
+    int W[80];
+
+    int i, FN;
+
+    A[0] = HASH[0]; 
+    B[0] = HASH[1];
+    C[0] = HASH[2];
+    D[0] = HASH[3];
+    E[0] = HASH[4];
+
+    for ( i=0; i<80; ++i )
+    {
+        if ( i < 16 )
+            W[i] = BIG_ENDIAN_LOAD( MESSAGE[i] );
+        else
+            W[i] = ROTATE_LEFT( W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1 );
+
+        FN = F( i, B[i], C[i], D[i] );
+
+        A[i+1] = FN + E[i] + ROTATE_LEFT( A[i], 5 ) + W[i] + K(i);
+        B[i+1] = A[i];
+        C[i+1] = ROTATE_LEFT( B[i], 30 );
+        D[i+1] = C[i];
+        E[i+1] = D[i];
+    }
+
+    HASH[0] += A[80];
+    HASH[1] += B[80];
+    HASH[2] += C[80];
+    HASH[3] += D[80];
+    HASH[4] += E[80];
+} 
+
+	For i=0:15, W[i] is simply big-endian loading of MESSAGE[i]. For i=16:79, W[i] is updated according to W[i] = ROTATE_LEFT( W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1 );
+
+	The approach (by Dean Gaudet) can be used to vectorize the computation of W[i] for i=16:79,
+
+	1. done on 4 consequtive W[i] values in a single XMM register
+    W[i  ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1
+    W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1
+    W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1
+    W[i+3] = (   0   ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1
+
+    2. this additional calculation unfortunately requires many additional operations
+    W[i+3] ^= W[i] rol 1
+
+    3. once we have 4 W[i] values in XMM we can also add four K values with one instruction
+    W[i:i+3] += {K,K,K,K}
+
+	Let W0 = {W[i] W[i+1] W[i+2] W[i+3]} be the current W-vector to be computed, W4 = {W[i-4] W[i-3] W[i-2] W[i-1]} be the previous vector, and so on
+	The Dean Gaudet approach can be expressed as
+
+	1. W0 = rotate_left(left_shift(W4,32) ^ W8 ^ left_shift(concatenate(W16,W12),64) ^ W16,1);
+	2. W[i+3] ^= W[i] rol 1
+	3. W0 += {K,K,K,K}
+
+	For i>=32, the Intel online article suggests that (using a basic identity (X rol 1) rol 1 = X rol 2) the update equation is equivalent to
+
+	1. W0 = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2); 
+
+	Note:
+	1. In total, we need 8 16-byte registers or memory for W0,W4,...,W28. W0 and W32 can be the same register or memory.
+	2. The registers are used in a circular buffering mode. For example, we start with W28,W24,...,W0 (with W0 indicating the most recent 16-byte)
+		i=0, W28,W24,...,W0
+		i=4, W24,W20,...,W28
+		i=8, W20,W16,...,W24
+		.
+		.
+		and so forth.
+	3. 2 ssse3 instructions are used in the Intel article, pshufb and palignr.
+		a. pshufb is used to simplify the BIG_ENDIAN_LOAD operation 
+		b. palignr is used to simplify the computation of left_shift(concatenate(W12,W8),64)
+	4. we probe __cpu_capabilities to detect ssse3 support and dispatch code with ssse3 support when available.
+	   If ssse3 is not supported, a suboptimal code (pshufb and palignr workaround) is dispatched. 
+
+*/
+
+/* the code can be compiled into single block (64 bytes) per call mode by setting Multiple_blocks to 0 */
+#define	Multiple_Blocks	1
+
+#if defined (__x86_64__) || defined(__i386__)		// x86_64 or i386 architectures
+
+#if defined(__x86_64__)
+
+	// set up for x86_64
+#define	stack_size	(8+16*11+16*4)					// 8 (alignedment) + x0-x10 + 4 128-bits for intermediate WK(t) storage
+#define	sp			%rsp							// unifying architectural stack pointer representation
+#define	ctx			%rdi							// 1st input argument, will move to HASH_PTR (%r9)
+#define	buf			%rsi							// 2nd input argument, will move to BUFFER_PTR (%r10) 
+#define	cnt			%r11							// will copy from the 3rd input argument (%rdx)
+#define K_BASE		%r8								// an aligned pointer to point to shufb reference numbers of table of K values
+#define HASH_PTR	%r9								// pointer to Hash values (A,B,C,D,E)
+#define BUFFER_PTR	%r10							// pointer to input blocks 
+
+#else	// !__x86_64__
+
+	// set up for i386 
+#define stack_size	(12+16*2+16*11+16*4)			// 12-bytes (alignment) + extra 2 + 3 (W24/W28/XMM_SHUFB_BSWAP) + 8 (xmm0-xmm7) + 4 (WK(t))
+#define	sp			%esp							// unifying architectural stack pointer representation
+#define HASH_PTR	stack_size+16+4(sp)				// use 1st input argument from caller function, 16 for (esi/edi/ebx/ebp)
+#define BUFFER_PTR	stack_size+16+8(sp)				// use 2nd input argument from caller function 
+#define cnt			stack_size+16+12(sp)			// use 3rd input argument from caller function
+#define K_BASE		stack_size-4(sp)				// use for K_BASE
+
+#endif	// __x86_64__
+
+// symbolizing registers or stack memory with algorithmic variables	W0,W4,...,W28 + W_TMP, W_TMP2, and XMM_SHUFB_BSWAP for code with ssse3 support
+
+#define W_TMP  	%xmm0
+#define W_TMP2 	%xmm1
+#define W0  	%xmm2
+#define W4  	%xmm3
+#define W8  	%xmm4
+#define W12 	%xmm5
+#define W16 	%xmm6
+#define W20 	%xmm7
+#if defined(__x86_64__)
+#define W24 	%xmm8
+#define W28 	%xmm9
+#define XMM_SHUFB_BSWAP %xmm10				// used only when ssse3 is supported
+#else	// defined (__i386__)
+#define W24     12*16(sp)
+#define W28     13*16(sp)
+#define XMM_SHUFB_BSWAP 14*16(sp)			// used only when ssse3 is supported 
+#endif
+
+#define	xmov	movaps						// aligned 16-byte move
+#define	xmovu	movups						// unaligned 16-byte move
+
+// intermediate hash variables
+#define A %ecx
+#define B %esi
+#define C %edi
+#define D %ebp
+#define E %edx
+
+// temp variables
+#define T1 %eax
+#define T2 %ebx
+
+#define	WK(t)	(t&15)*4(sp)
+
+	// int F1(int B, int C, int D) { return (D ^ ( B & (C ^ D)); }
+	// result in T1
+	.macro	F1
+	mov	$1, T1
+	xor	$2, T1
+	and	$0, T1
+	xor	$2, T1
+	.endm
+
+	// int F2(int B, int C, int D) { return (D ^ B ^ C); }
+	// result in T1
+	.macro	F2
+	mov	$2, T1
+	xor	$1, T1
+	xor	$0, T1
+	.endm
+
+	// int F3(int B, int C, int D) { return (B & C) | (D & (B ^ C)); }
+	// result in T1
+	.macro	F3
+		mov $1, T1
+        mov $0, T2
+        or  $0, T1
+        and $1, T2
+        and $2, T1
+        or  T2, T1
+	.endm
+
+	// for i=60:79, F4 is identical to F2
+	#define	F4	F2
+
+
+	/*
+		i=0:15, W[i] = BIG_ENDIAN_LOAD(MESSAGE[i]);
+
+		with ssse3 support, this is achived via
+		for (i=0;i<16;i+=4) {
+			1. W_TMP = new 16 bytes from MESSAGE[]
+			2. W_TMP = pshufb(W_TMP, XMM_SHUFB_BSWAP); save to W circular buffer for updating W 
+			3. WTMP += {K,K,K,K};
+			4. save quadruple W[i]+K[i] = W_TMP in the stack memory;
+		}
+
+		each step is represented in one of the following 4 macro definitions
+
+	*/
+
+	.macro	W_PRECALC_00_15_0_ssse3			// input argument $0 : 0/4/8/12
+#if defined (__x86_64__)					// BUFFER_PTR is already an address register in x86_64
+	xmovu	$0*4(BUFFER_PTR), W_TMP			// read 16-bytes into W_TMP, BUFFER_PTR possibly not 16-byte aligned
+#else										// BUFFER_PTR is from the argument set up in the caller
+	mov     BUFFER_PTR, T1					// T1 = BUFFER_PTR
+    xmovu  $0*4(T1), W_TMP					// read 16-bytes into W_TMP, BUFFER_PTR possibly not 16-byte aligned
+#endif
+	.endm
+
+	.macro	W_PRECALC_00_15_1_ssse3			// input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28
+	pshufb	XMM_SHUFB_BSWAP, W_TMP			// convert W_TMP from little-endian into big-endian
+	xmov	W_TMP, $0						// save W_TMP in the circular buffer
+	.endm
+
+	.macro	W_PRECALC_00_15_2				// K_BASE points to the current K quadruple.
+#if defined (__x86_64__)					// K_BASE is already an address register in x86_64
+	paddd	(K_BASE), W_TMP					// W_TMP += {K,K,K,K};
+#else										// K_BASE is previously set up in the stack memory
+	mov     K_BASE, T1						// T1 = K_BASE
+    paddd   (T1), W_TMP						// W_TMP += {K,K,K,K};
+#endif
+	.endm
+
+	.macro	W_PRECALC_00_15_3
+	xmov	W_TMP, WK($0&~3)				// save quadruple W[i]+K in the stack memory, which would be used later for updating the hashes A/B/C/D/E
+	.endm
+
+	/*
+		without ssse3 support, steps 1 and 2 need to be modified
+		1. sequentially load 4 words into T1, bswap T1, and save it to 4-bytes in the stack space
+		2. load the 16-bytes from the aligned stack memory into W_TMP
+	*/
+
+	.macro	W_PRECALC_00_15_0_nossse3		// input argument $0 : 0/4/8/12
+
+#if	defined (__x86_64__)
+	#define	BUFFERP	BUFFER_PTR
+#else
+	mov		BUFFER_PTR, T2					// copy BUFFER_PTR (from caller 2nd argument) to T2
+	#define	BUFFERP	T2
+#endif
+
+	// load 1st word, bswap it, save it to stack
+	mov		$0*4(BUFFERP), T1
+	bswap	T1
+	mov		T1, 14*16(sp)
+
+	// load 2nd word, bswap it, save it to stack
+	mov		4+$0*4(BUFFERP), T1
+	bswap	T1
+	mov		T1, 4+14*16(sp)
+
+	// load 3rd word, bswap it, save it to stack
+	mov		8+$0*4(BUFFERP), T1
+	bswap	T1
+	mov		T1, 8+14*16(sp)
+
+	// load 4th word, bswap it, save it to stack
+	mov		12+$0*4(BUFFERP), T1
+	bswap	T1
+	mov		T1, 12+14*16(sp)
+	.endm
+
+	.macro	W_PRECALC_00_15_1_nossse3 		// input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28
+	xmov	14*16(sp), W_TMP				// load the bswapped 16-bytes from the aligned stack memory
+	xmov	W_TMP, $0						// save W = W_TMP in the circular buffer
+	.endm
+
+	// rounds 16-31 compute W[0] using the vectorization approach by Dean Gaudet
+	/*
+	W[i  ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1
+    W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1
+    W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1
+    W[i+3] = (   0   ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1
+
+	W[i+3] ^= W[i] rol 1;	// this W[i] is already rol by 1, if we are taking from the intial W before rol 1, we should rol this by 2
+
+	The operation (updating W and W+K) is scheduled as and divided into 4 steps
+
+	0. W_tmp = W3; W = W14 ^ W8
+	1. W = W3 ^ W8 ^ W14 ^ W16; W_TMP = W; W_TMP2 = (W[i] 0 0 0); 
+	2. W_TMP = (W3 ^ W8 ^ W14 ^ W16) rol 1; split (W[i] 0 0 0) rol 2 in W_TMP2 and W
+	3. W = W_TMP = W_TMP ^ W_TMP2 ^ W = (W3 ^ W8 ^ W14 ^ W16) rol 1 ^ (W[i] 0 0 0) rol 2; WK = W _TMP+K;  
+
+	*/
+
+	.macro	W_PRECALC_16_31_0_ssse3	// input arguments : W16,W12,W8,W4,W
+	xmov	$1, $4					// W = W12
+	palignr	$$8, $0, $4				// W = W14
+	xmov	$3, W_TMP				// W_TMP = W4
+	psrldq	$$4, W_TMP				// W_TMP = W3
+	pxor	$2, $4					// W = W8 ^ W14	
+	.endm
+
+	.macro	W_PRECALC_16_31_1		// input arguments : W16,W
+	pxor	$0, W_TMP				// W_TMP = W3 ^ W16
+	pxor	W_TMP, $1				// W = W3 ^ W16 ^ W8 ^ W14
+	xmov	$1, W_TMP2				// W_TMP2 = W3 ^ W16 ^ W8 ^ W14 
+	xmov	$1, W_TMP				// W_TMP = W3 ^ W16 ^ W8 ^ W14
+	pslldq	$$12, W_TMP2			// W_TMP2 = (W[i] 0 0 0)
+	.endm
+
+	.macro	W_PRECALC_16_31_2		// input argument : W
+	psrld	$$31, $0				// (W3 ^ W16 ^ W8 ^ W14)>>31
+	pslld	$$1, W_TMP				// (W3 ^ W16 ^ W8 ^ W14)<<1
+	por		$0, W_TMP				// W_TMP = (W3 ^ W16 ^ W8 ^ W14) rol 1
+	xmov	W_TMP2, $0				// copy W[i] at location of W[i+3]
+	psrld	$$30, W_TMP2			// W_TMP2 = W[i] lower 2 bits after rol 2
+	pslld	$$2, $0					// W = W[i] higher 30 bits after rol 2
+	.endm
+
+	.macro	W_PRECALC_16_31_3		// input arguments: W, i, K_XMM
+#if defined (__i386__)
+	mov     K_BASE, T1				// K_BASE is store in the stack memory for i386
+#endif
+	pxor	$0, W_TMP
+	pxor	W_TMP2, W_TMP			// W_TMP = (W3 ^ W16 ^ W8 ^ W14) rol 1 ^ (W[i] 0 0 0) rol 2
+	xmov	W_TMP, $0				// save W = W_TMP in the W circular buffer
+#if defined (__x86_64__)
+	paddd	$2(K_BASE), W_TMP		// W+K
+#else
+    paddd   $2(T1), W_TMP			// W+K
+#endif
+	xmov	W_TMP, WK($1&~3)		// save WK = W+K for later update of the hashes A/B/C/D/E
+	.endm
+
+	// the following is a variant of W_PRECALC_16_31_0_ssse3 to be used for system without ssse3, palignr is replaced with 4 instructions
+
+	.macro	W_PRECALC_16_31_0_nossse3	// input arguments : W16,W12,W8,W4,W
+	xmov	$1, $4						// W = W12 = (w9 w10 w11 w12)
+
+	// the following is a wrokaround for palignr
+	xmov	$0, W_TMP					// W16 = (w13 w14 w15 w16)
+	pslldq	$$8, $4						// shift left to make (w11 w12 0 0)
+	psrldq	$$8, W_TMP					// shift right to make (0 0 w13 w14)
+	por		W_TMP, $4					// W = W14 = (w11 w12 w13 w14)
+
+	xmov	$3, W_TMP					// W_TMP = W4 = (w1 w2 w3 w4)
+	psrldq	$$4, W_TMP					// W_TMP = W3 = (0 w1 w2 w3)
+	pxor	$2, $4						// W = W8 ^ W14	
+	.endm
+
+	/* rounds 32-79 compute W und W+K iusing the vectorization approach from the Intel article
+
+		W = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2);
+
+		where left_shift(concatenate(W8,W4),64) is equivalent to W6. Note also that W32 and W use the same register.
+
+
+	0. W_tmp = W6; W = W28 ^ W32;
+	1. W = W_tmp = W6 ^ W16 ^ W28 ^ W32;
+	2. W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2;
+	3. W = W_Tmp; WK = W_tmp + K;
+
+	*/
+
+
+	.macro	W_PRECALC_32_79_0_ssse3		// inputr arguments : W28,W8,W4,W
+	xmov	$2, W_TMP					// (w1 w2 w3 w4)
+	pxor	$0, $3						// W = W28 ^ W32;
+	palignr	$$8, $1, W_TMP				// W_tmp = (w3 w4 w5 w6) = W6;
+	.endm
+
+	// the following is a variant and will be used for system without ssse3 support
+	.macro	W_PRECALC_32_79_0_nossse3	// input arguments : W28,W8,W4,W
+	xmov	$2, W_TMP					// (w1 w2 w3 w4)
+	xmov    $1, W_TMP2					// (w5 w6 w7 w8)
+	pxor	$0, $3						// W = W28 ^ W32
+	pslldq	$$8, W_TMP					// (w3 w4 0 0)
+	psrldq	$$8, W_TMP2					// (0 0 w5 w6)
+	por		W_TMP2, W_TMP				// W_tmp = (w3 w4 w5 w6) = W6
+	.endm
+
+	// this is a variant of W_PRECALC_32_79_0_ssse3 for i386 (as W24/W28 are stored in memory, not in registers)
+	.macro  W_PRECALC_32_79_0_i386_ssse3	// input arguments : W28,W8,W4,W
+    xmov    $3, W_TMP						// W32
+    pxor    $0, W_TMP						// W28 ^ W32
+    xmov    W_TMP, $3						// W = W28 ^ W32;
+    xmov    $2, W_TMP						// W4
+    palignr $$8, $1, W_TMP					// W_tmp = (w3 w4 w5 w6) = W6;
+    .endm
+
+	// this is a variant of W_PRECALC_32_79_0_nossse3 for i386 (as W24/W28 are stored in memory, not in registers)
+	.macro  W_PRECALC_32_79_0_i386_nossse3  // input arguments : W28,W8,W4,W
+    xmov    $3, W_TMP						// W32
+    pxor    $0, W_TMP						// W28 ^ W32
+    xmov    W_TMP, $3						// W = W28 ^ W32
+    xmov    $2, W_TMP						// W4 = (w1 w2 w3 w4)
+	xmov    $1, W_TMP2						// W8 = (w5 w6 w7 w8)
+	pslldq	$$8, W_TMP						// (w3 w4 0 0)
+	psrldq	$$8, W_TMP2						// (0 0 w5 w6)
+	por		W_TMP2, W_TMP					// W_tmp = (w3 w4 w5 w6) = W6
+    .endm
+
+	.macro	W_PRECALC_32_79_1			// input arguments : W16,W
+	pxor	$0, W_TMP					// W_tmp = W6 ^ W16
+	pxor	$1, W_TMP					// W_tmp = W6 ^ W16 ^ W28 ^ W32
+	xmov	W_TMP, $1					// W = W_tmp = W6 ^ W16 ^ W28 ^ W32
+	.endm
+
+	.macro	W_PRECALC_32_79_2			// input argument : W
+	psrld	$$30, $0					// W >> 30
+	pslld	$$2, W_TMP					// W << 2
+	por		$0, W_TMP					// W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2
+	.endm
+
+	// this is a variant of W_PRECALC_32_79_2 for i386 (as W24/W28 are stored in memory, not in registers)
+	// this should be used when the input is either W24 or W28 on i386 architecture
+    .macro  W_PRECALC_32_79_2_i386  	// input argument : W
+    xmov    $0, W_TMP2					// W
+    psrld   $$30, W_TMP2				// W >> 30
+    xmov    W_TMP2, $0					// save (W >> 30) at W
+    pslld   $$2, W_TMP					// W_tmp << 2
+    por     $0, W_TMP					// W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2
+    .endm
+
+	.macro	W_PRECALC_32_79_3			// input argument W, i, K_XMM
+#if defined (__x86_64__)
+	xmov	W_TMP, $0					// W = (W6 ^ W16 ^ W28 ^ W32) rol 2
+	paddd	$2(K_BASE), W_TMP			// W + K
+	xmov	W_TMP, WK($1&~3)			// write W+K
+#else
+    mov     K_BASE, T1					// T1 = K_BASE (which is in the caller argument)
+    xmov    W_TMP, $0					// W = (W6 ^ W16 ^ W28 ^ W32) rol 2
+    paddd   $2(T1), W_TMP				// W_tmp = W + K
+    xmov    W_TMP, WK($1&~3)			// write WK
+#endif
+	.endm
+
+
+	/* The hash update operation is completed by the following statements.
+
+		A[i+1] = FN + E[i] + ROTATE_LEFT( A[i], 5 ) + WK(i);
+        B[i+1] = A[i];
+        C[i+1] = ROTATE_LEFT( B[i], 30 );
+        D[i+1] = C[i];
+        E[i+1] = D[i];
+
+		Suppose we start with A0,B0,C0,D0,E0. The 1st iteration can be expressed as follows:
+
+		A1 = FN + E0 + rol(A0,5) + WK;
+		B1 = A0;
+		C1 = rol(B0, 30);
+		D1 = C0;
+		E1 = D0;
+
+		to avoid excessive memory movement between registers, 
+			1. A1 = FN + E0 + rol(A0,5) + WK; can be temporarily saved in E0, 
+			2. C1 = rol(B0,30) can be temporarily saved in B0. 
+
+		Therefore, ignoring the time index, the update operation is equivalent to
+			1. E = FN(B,C,D) + E + rol(A,5) + WK(i)
+			2. B = rol(B,30)
+			3. the hashes are now stored in the order of E,A,B,C,D
+
+
+		To pack 2 hash update operations in 1 iteration, starting with A,B,C,D,E
+		1. E = FN(B,C,D) + E + rol(A,5) + WK(i)
+		2. B = rol(B,30)
+		// now the hashes are in the order of E,A,B,C,D
+		3. D = FN(A,B,C) + D + rol(E,5) + WK(i+1)
+		4. A = rol(A,30)
+		// now the hashes are in the order of D,E,A,B,C
+	
+		These operations are distributed into the following 2 macro definitions RR0 and RR1.	
+
+	*/
+
+	.macro	RR0				// input arguments : FN, A, B, C, D, E, i
+	$0		$2, $3, $4		// T1 = FN(B,C,D)
+	add		WK($6), $5		// E + WK(i)
+	rol		$$30, $2		// B = rol(B,30)
+	mov		$1, T2			// T2 = A
+	add		WK($6+1), $4	// D + WK(i+1)
+	rol		$$5, T2			// rol(A,5)
+	add		T1, $5			// E = FN(B,C,D) + E + WK(i)
+	.endm
+
+	.macro	RR1
+	add		$5, T2			// T2 = FN(B,C,D) + E + rol(A,5) + WK(i)
+	mov		T2, $5			// E = FN(B,C,D) + E + rol(A,5) + WK(i)
+	rol		$$5, T2			// rol(E,5)
+	add		T2, $4			// D + WK(i+1) + rol(E,5)
+	$0		$1, $2, $3		// FN(A,B,C)
+	add		T1, $4			// D = FN(A,B,C) + D + rol(E,5) + WK(i+1)
+	rol		$$30, $1		// A = rol(A,30)
+	.endm
+
+
+
+	/*
+
+		The following macro definitions are used to expand code for the per-block sha1 operation.
+
+			INITIAL_W_PRECALC_ssse3	: BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack memory
+			INTERNAL_ssse3 : updating W (16:79) and update the digests A/B/C/D/E (i=0:63, based on W+K stored in the stack memory) 
+			ENDING : finishing up update the digests A/B/C/D/E (i=64:79)
+
+		For multiple-block sha1 operation (Multiple_Blocks = 1), INITIAL_W_PRECALC_ssse3 and ENDING are combined
+		into 1 macro definition for software pipeling.
+
+			SOFTWARE_PIPELINING_ssse3 : BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack, and finishing up update the digests A/B/C/D/E (i=64:79) 
+
+		assume cnt (the number of blocks)  >= 1, the main code body should look like
+
+		INITIAL_W_PRECALC_ssse3				// W = big_endian_load and pre-compute W+K (i=0:15)
+		do {
+			INTERNAL_ssse3					// update W(i=16:79), and update hash digests A/B/C/D/E (i=0:63)
+			cnt--;
+			if (cnt==0) break;
+			BUFFER_PTR += 64;
+			SOFTWARE_PIPELINING_ssse3;		// update hash digests A/B/C/D/E (i=64:79) + W = big_endian_load and pre-compute W+K (i=0:15)
+		}
+		ENDING								// update hash digests A/B/C/D/E (i=64:79)
+
+	*/
+
+	#define	W_PRECALC_00_15_0	W_PRECALC_00_15_0_ssse3
+	#define	W_PRECALC_00_15_1	W_PRECALC_00_15_1_ssse3
+	#define	W_PRECALC_16_31_0	W_PRECALC_16_31_0_ssse3
+	#define	W_PRECALC_32_79_0	W_PRECALC_32_79_0_ssse3
+	#define	W_PRECALC_32_79_0_i386	W_PRECALC_32_79_0_i386_ssse3
+
+
+	.macro	INITIAL_W_PRECALC_ssse3			// BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack memory	
+
+	// i=0 	: W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_00_15_0	0					// W_TMP = (BUFFER_PTR)
+	W_PRECALC_00_15_1	W0					// convert W_TMP to big-endian, and save W0 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W0 + K
+	W_PRECALC_00_15_3	3					// (sp) = W_TMP = W0 + K
+
+	// i=4	: W24,W20,W16,W12,W8,W4,W0,W28
+	W_PRECALC_00_15_0	4					// W_TMP = 16(BUFFER_PTR)
+	W_PRECALC_00_15_1	W28					// convert W_TMP to big-endian, and save W28 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W28 + K
+	W_PRECALC_00_15_3	7					// 16(sp) = W_TMP = W28 + K
+
+	// i=8  : W20,W16,W12,W8,W4,W0,W28,W24
+	W_PRECALC_00_15_0	8					// W_TMP = 32(BUFFER_PTR)
+	W_PRECALC_00_15_1	W24					// convert W_TMP to big-endian, and save W24 = W_TMP
+	W_PRECALC_00_15_2						// W_TMP = W24 + K
+	W_PRECALC_00_15_3	11					// 32(sp) = W_TMP = W24 + K
+
+	// i=12 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_00_15_0	12					// W_TMP = 48(BUFFER_PTR)
+	W_PRECALC_00_15_1	W20					// convert W_TMP to big-endian, and save W20 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W20 + K
+	W_PRECALC_00_15_3	15					// 48(sp) = W_TMP = W20 + K
+
+	.endm
+
+
+	.macro	INTERNAL_ssse3					// updating W (16:79) and update the digests A/B/C/D/E (i=0:63, based on W+K stored in the stack memory)
+
+	// i=16 : W12,W8,W4,W0,W28,W24,W20,W16
+	W_PRECALC_16_31_0	W0,W28,W24,W20,W16
+	RR0					F1,A,B,C,D,E,0
+	W_PRECALC_16_31_1	W0,W16
+	RR1					F1,A,B,C,D,E,0
+	W_PRECALC_16_31_2	W16
+	RR0					F1,D,E,A,B,C,2
+	W_PRECALC_16_31_3	W16, 2, 0
+	RR1					F1,D,E,A,B,C,2
+
+	// i=20 : W8,W4,W0,W28,W24,W20,W16,W12
+	W_PRECALC_16_31_0	W28,W24,W20,W16,W12
+	RR0					F1,B,C,D,E,A,4
+	W_PRECALC_16_31_1	W28,W12
+	RR1					F1,B,C,D,E,A,4
+	W_PRECALC_16_31_2	W12
+	RR0					F1,E,A,B,C,D,6
+	W_PRECALC_16_31_3	W12, 6, 16
+	RR1					F1,E,A,B,C,D,6
+
+	// i=24 : W4,W0,W28,W24,W20,W16,W12,W8
+	W_PRECALC_16_31_0	W24,W20,W16,W12,W8
+	RR0					F1,C,D,E,A,B,8
+	W_PRECALC_16_31_1	W24,W8
+	RR1					F1,C,D,E,A,B,8
+	W_PRECALC_16_31_2	W8
+	RR0					F1,A,B,C,D,E,10
+	W_PRECALC_16_31_3	W8,10,16
+	RR1					F1,A,B,C,D,E,10
+
+	// i=28 : W0,W28,W24,W20,W16,W12,W8,W4
+	W_PRECALC_16_31_0	W20,W16,W12,W8,W4
+	RR0					F1,D,E,A,B,C,12
+	W_PRECALC_16_31_1	W20,W4
+	RR1					F1,D,E,A,B,C,12
+	W_PRECALC_16_31_2	W4
+	RR0					F1,B,C,D,E,A,14
+	W_PRECALC_16_31_3	W4,14,16
+	RR1					F1,B,C,D,E,A,14
+
+	// i=32 : W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_32_79_0	W28,W8,W4,W0
+	RR0					F1,E,A,B,C,D,16
+	W_PRECALC_32_79_1	W16,W0
+	RR1					F1,E,A,B,C,D,16
+	W_PRECALC_32_79_2	W0
+	RR0					F1,C,D,E,A,B,18
+	W_PRECALC_32_79_3	W0,18,16
+	RR1					F1,C,D,E,A,B,18
+
+	// starting using F2	
+
+	// i=36 : W24,W20,W16,W12,W8,W4,W0,W28
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W24,W4,W0,W28
+#else
+	W_PRECALC_32_79_0_i386	W24,W4,W0,W28
+#endif
+	RR0					F2,A,B,C,D,E,20
+	W_PRECALC_32_79_1	W12,W28
+	RR1					F2,A,B,C,D,E,20
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W28
+#else
+	W_PRECALC_32_79_2_i386	W28
+#endif
+	RR0					F2,D,E,A,B,C,22
+	W_PRECALC_32_79_3	W28,22,16
+	RR1					F2,D,E,A,B,C,22
+
+	// i=40 : W20,W16,W12,W8,W4,W0,W28,W24	
+	#undef  K_XMM
+    #define K_XMM   32
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W20,W0,W28,W24
+#else
+	W_PRECALC_32_79_0_i386	W20,W0,W28,W24
+#endif
+	RR0					F2,B,C,D,E,A,24
+	W_PRECALC_32_79_1	W8,W24
+	RR1					F2,B,C,D,E,A,24
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W24
+#else
+	W_PRECALC_32_79_2_i386	W24
+#endif
+	RR0					F2,E,A,B,C,D,26
+	W_PRECALC_32_79_3	W24,26,K_XMM
+	RR1					F2,E,A,B,C,D,26
+
+	// i=44 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_32_79_0	W16,W28,W24,W20
+	RR0					F2,C,D,E,A,B,28
+	W_PRECALC_32_79_1	W4,W20
+	RR1					F2,C,D,E,A,B,28
+	W_PRECALC_32_79_2	W20
+	RR0					F2,A,B,C,D,E,30
+	W_PRECALC_32_79_3	W20,30,K_XMM
+	RR1					F2,A,B,C,D,E,30
+
+	// i=48 : W12,W8,W4,W0,W28,W24,W20,W16
+	W_PRECALC_32_79_0	W12,W24,W20,W16
+	RR0					F2,D,E,A,B,C,32
+	W_PRECALC_32_79_1	W0,W16
+	RR1					F2,D,E,A,B,C,32
+	W_PRECALC_32_79_2	W16
+	RR0					F2,B,C,D,E,A,34
+	W_PRECALC_32_79_3	W16,34,K_XMM
+	RR1					F2,B,C,D,E,A,34
+
+	// i=52 : W8,W4,W0,W28,W24,W20,W16,W12
+	W_PRECALC_32_79_0	W8,W20,W16,W12
+	RR0					F2,E,A,B,C,D,36
+	W_PRECALC_32_79_1	W28,W12
+	RR1					F2,E,A,B,C,D,36
+	W_PRECALC_32_79_2	W12
+	RR0					F2,C,D,E,A,B,38
+	W_PRECALC_32_79_3	W12,38,K_XMM
+	RR1					F2,C,D,E,A,B,38
+
+	// starting using F3	
+
+	// i=56 : W4,W0,W28,W24,W20,W16,W12,W8
+	W_PRECALC_32_79_0	W4,W16,W12,W8
+	RR0					F3,A,B,C,D,E,40
+	W_PRECALC_32_79_1	W24,W8
+	RR1					F3,A,B,C,D,E,40
+	W_PRECALC_32_79_2	W8
+	RR0					F3,D,E,A,B,C,42
+	W_PRECALC_32_79_3	W8,42,K_XMM
+	RR1					F3,D,E,A,B,C,42
+
+	// i=60 : W0,W28,W24,W20,W16,W12,W8,W4
+	#undef	K_XMM
+	#define	K_XMM	48
+	W_PRECALC_32_79_0	W0,W12,W8,W4
+	RR0					F3,B,C,D,E,A,44
+	W_PRECALC_32_79_1	W20,W4
+	RR1					F3,B,C,D,E,A,44
+	W_PRECALC_32_79_2	W4
+	RR0					F3,E,A,B,C,D,46
+	W_PRECALC_32_79_3	W4,46,K_XMM
+	RR1					F3,E,A,B,C,D,46
+
+	// i=64 : W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_32_79_0	W28,W8,W4,W0
+	RR0					F3,C,D,E,A,B,48
+	W_PRECALC_32_79_1	W16,W0
+	RR1					F3,C,D,E,A,B,48
+	W_PRECALC_32_79_2	W0
+	RR0					F3,A,B,C,D,E,50
+	W_PRECALC_32_79_3	W0,50,K_XMM
+	RR1					F3,A,B,C,D,E,50
+
+	// i=68 : W24,W20,W16,W12,W8,W4,W0,W28
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W24,W4,W0,W28
+#else
+	W_PRECALC_32_79_0_i386	W24,W4,W0,W28
+#endif
+	RR0					F3,D,E,A,B,C,52
+	W_PRECALC_32_79_1	W12,W28
+	RR1					F3,D,E,A,B,C,52
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W28
+#else
+	W_PRECALC_32_79_2_i386	W28
+#endif
+	RR0					F3,B,C,D,E,A,54
+	W_PRECALC_32_79_3	W28,54,K_XMM
+	RR1					F3,B,C,D,E,A,54
+
+	// i=72 : W20,W16,W12,W8,W4,W0,W28,W24
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W20,W0,W28,W24
+#else
+	W_PRECALC_32_79_0_i386	W20,W0,W28,W24
+#endif
+	RR0					F3,E,A,B,C,D,56
+	W_PRECALC_32_79_1	W8,W24
+	RR1					F3,E,A,B,C,D,56
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W24
+#else
+	W_PRECALC_32_79_2_i386	W24
+#endif
+	RR0					F3,C,D,E,A,B,58
+	W_PRECALC_32_79_3	W24,58,K_XMM
+	RR1					F3,C,D,E,A,B,58
+
+	// starting using F4	
+
+	// i=76 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_32_79_0	W16,W28,W24,W20
+	RR0					F4,A,B,C,D,E,60
+	W_PRECALC_32_79_1	W4,W20
+	RR1					F4,A,B,C,D,E,60
+	W_PRECALC_32_79_2	W20
+	RR0					F4,D,E,A,B,C,62
+	W_PRECALC_32_79_3	W20,62,K_XMM
+	RR1					F4,D,E,A,B,C,62
+
+	.endm
+
+	.macro	SOFTWARE_PIPELINING_ssse3
+	// i=0  : W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_00_15_0	0					// W_TMP = (BUFFER_PTR)
+	RR0					F4,B,C,D,E,A,64
+	W_PRECALC_00_15_1	W0					// convert W_TMP to big-endian, and save W0 = W_TMP	
+	RR1					F4,B,C,D,E,A,64
+	W_PRECALC_00_15_2						// W_TMP = W0 + K
+	RR0					F4,E,A,B,C,D,66
+	W_PRECALC_00_15_3	3					// (sp) = W_TMP = W0 + K
+	RR1					F4,E,A,B,C,D,66
+
+	// i=4  : W24,W20,W16,W12,W8,W4,W0,W28
+	W_PRECALC_00_15_0	4					// W_TMP = 16(BUFFER_PTR)
+	RR0					F4,C,D,E,A,B,68
+	W_PRECALC_00_15_1	W28					// convert W_TMP to big-endian, and save W28 = W_TMP	
+	RR1					F4,C,D,E,A,B,68
+	W_PRECALC_00_15_2						// W_TMP = W28 + K
+	RR0					F4,A,B,C,D,E,70
+	W_PRECALC_00_15_3	7					// 16(sp) = W_TMP = W28 + K[0]
+	RR1					F4,A,B,C,D,E,70
+
+	// i=8  : W20,W16,W12,W8,W4,W0,W28,W24
+	W_PRECALC_00_15_0	8					// W_TMP = 32(BUFFER_PTR)
+	RR0					F4,D,E,A,B,C,72
+	W_PRECALC_00_15_1	W24					// convert W_TMP to big-endian, and save W24 = W_TMP
+	RR1					F4,D,E,A,B,C,72
+	W_PRECALC_00_15_2						// W_TMP = W24 + K
+	RR0					F4,B,C,D,E,A,74
+	W_PRECALC_00_15_3	11					// 32(sp) = W_TMP = W24 + K
+	RR1					F4,B,C,D,E,A,74
+
+	// i=12 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_00_15_0	12					// W_TMP = 48(BUFFER_PTR)
+	RR0					F4,E,A,B,C,D,76
+	W_PRECALC_00_15_1	W20					// convert W_TMP to big-endian, and save W20 = W_TMP	
+	RR1					F4,E,A,B,C,D,76
+	W_PRECALC_00_15_2						// W_TMP = W20 + K
+	RR0					F4,C,D,E,A,B,78
+	W_PRECALC_00_15_3	15					// 48(sp) = W_TMP = W20 + K
+	RR1					F4,C,D,E,A,B,78
+	.endm
+
+
+	#undef	W_PRECALC_00_15_0
+	#undef	W_PRECALC_00_15_1
+	#undef	W_PRECALC_16_31_0
+	#undef	W_PRECALC_32_79_0
+	#undef	W_PRECALC_32_79_0_i386
+
+
+
+	/* 
+
+		The following are 3 macro definitions that are no-ssse3 variants of the previous 3 macro definitions.
+
+		INITIAL_W_PRECALC_nossse3
+		INTERNAL_nossse3
+		SOFTWARE_PIPELINING_nossse3
+		
+		They will be used in a sha1 code main body definition that will be used for system without ssse3 support.	
+
+	*/
+
+	#define	W_PRECALC_00_15_0	W_PRECALC_00_15_0_nossse3
+	#define	W_PRECALC_00_15_1	W_PRECALC_00_15_1_nossse3
+	#define	W_PRECALC_16_31_0	W_PRECALC_16_31_0_nossse3
+	#define	W_PRECALC_32_79_0	W_PRECALC_32_79_0_nossse3
+	#define	W_PRECALC_32_79_0_i386	W_PRECALC_32_79_0_i386_nossse3
+
+
+	.macro	INITIAL_W_PRECALC_nossse3
+
+	// i=0 	: W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_00_15_0	0					// W_TMP = (BUFFER_PTR)
+	W_PRECALC_00_15_1	W0					// convert W_TMP to big-endian, and save W0 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W0 + K
+	W_PRECALC_00_15_3	3					// (sp) = W_TMP = W0 + K
+
+	// i=4	: W24,W20,W16,W12,W8,W4,W0,W28
+	W_PRECALC_00_15_0	4					// W_TMP = 16(BUFFER_PTR)
+	W_PRECALC_00_15_1	W28					// convert W_TMP to big-endian, and save W28 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W28 + K
+	W_PRECALC_00_15_3	7					// 16(sp) = W_TMP = W28 + K
+
+	// i=8  : W20,W16,W12,W8,W4,W0,W28,W24
+	W_PRECALC_00_15_0	8					// W_TMP = 32(BUFFER_PTR)
+	W_PRECALC_00_15_1	W24					// convert W_TMP to big-endian, and save W24 = W_TMP
+	W_PRECALC_00_15_2						// W_TMP = W24 + K
+	W_PRECALC_00_15_3	11					// 32(sp) = W_TMP = W24 + K
+
+	// i=12 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_00_15_0	12					// W_TMP = 48(BUFFER_PTR)
+	W_PRECALC_00_15_1	W20					// convert W_TMP to big-endian, and save W20 = W_TMP	
+	W_PRECALC_00_15_2						// W_TMP = W20 + K
+	W_PRECALC_00_15_3	15					// 48(sp) = W_TMP = W20 + K
+
+	.endm
+
+
+	.macro	INTERNAL_nossse3
+	// i=16
+	// circular buffer : W12,W8,W4,W0,W28,W24,W20,W16
+	W_PRECALC_16_31_0	W0,W28,W24,W20,W16
+	RR0					F1,A,B,C,D,E,0
+	W_PRECALC_16_31_1	W0,W16
+	RR1					F1,A,B,C,D,E,0
+	W_PRECALC_16_31_2	W16
+	RR0					F1,D,E,A,B,C,2
+	W_PRECALC_16_31_3	W16, 2, 0
+	RR1					F1,D,E,A,B,C,2
+
+	// i=20,
+	// W8,W4,W0,W28,W24,W20,W16,W12
+	W_PRECALC_16_31_0	W28,W24,W20,W16,W12
+	RR0					F1,B,C,D,E,A,4
+	W_PRECALC_16_31_1	W28,W12
+	RR1					F1,B,C,D,E,A,4
+
+	W_PRECALC_16_31_2	W12
+	RR0					F1,E,A,B,C,D,6
+	W_PRECALC_16_31_3	W12, 6, 16
+	RR1					F1,E,A,B,C,D,6
+
+	// i=24,
+	// W4,W0,W28,W24,W20,W16,W12,W8
+	W_PRECALC_16_31_0	W24,W20,W16,W12,W8
+	RR0					F1,C,D,E,A,B,8
+	W_PRECALC_16_31_1	W24,W8
+	RR1					F1,C,D,E,A,B,8
+
+	W_PRECALC_16_31_2	W8
+	RR0					F1,A,B,C,D,E,10
+	W_PRECALC_16_31_3	W8,10,16
+	RR1					F1,A,B,C,D,E,10
+
+	// i=28
+	// W0,W28,W24,W20,W16,W12,W8,W4
+	W_PRECALC_16_31_0	W20,W16,W12,W8,W4
+	RR0					F1,D,E,A,B,C,12
+	W_PRECALC_16_31_1	W20,W4
+	RR1					F1,D,E,A,B,C,12
+
+	W_PRECALC_16_31_2	W4
+	RR0					F1,B,C,D,E,A,14
+	W_PRECALC_16_31_3	W4,14,16
+	RR1					F1,B,C,D,E,A,14
+
+	//i=32
+	// W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_32_79_0	W28,W8,W4,W0
+	RR0					F1,E,A,B,C,D,16
+	W_PRECALC_32_79_1	W16,W0
+	RR1					F1,E,A,B,C,D,16
+	W_PRECALC_32_79_2	W0
+	RR0					F1,C,D,E,A,B,18
+	W_PRECALC_32_79_3	W0,18,16
+	RR1					F1,C,D,E,A,B,18
+
+	//i=36
+	// W24,W20,W16,W12,W8,W4,W0,W28
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W24,W4,W0,W28
+#else
+	W_PRECALC_32_79_0_i386	W24,W4,W0,W28
+#endif
+	RR0					F2,A,B,C,D,E,20
+	W_PRECALC_32_79_1	W12,W28
+	RR1					F2,A,B,C,D,E,20
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W28
+#else
+	W_PRECALC_32_79_2_i386	W28
+#endif
+	RR0					F2,D,E,A,B,C,22
+	W_PRECALC_32_79_3	W28,22,16
+	RR1					F2,D,E,A,B,C,22
+
+	//i=40
+	#undef  K_XMM
+    #define K_XMM   32
+	// W20,W16,W12,W8,W4,W0,W28,W24	
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W20,W0,W28,W24
+#else
+	W_PRECALC_32_79_0_i386	W20,W0,W28,W24
+#endif
+	RR0					F2,B,C,D,E,A,24
+	W_PRECALC_32_79_1	W8,W24
+	RR1					F2,B,C,D,E,A,24
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W24
+#else
+	W_PRECALC_32_79_2_i386	W24
+#endif
+	RR0					F2,E,A,B,C,D,26
+	W_PRECALC_32_79_3	W24,26,K_XMM
+	RR1					F2,E,A,B,C,D,26
+
+	//i=44
+	// W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_32_79_0	W16,W28,W24,W20
+	RR0					F2,C,D,E,A,B,28
+	W_PRECALC_32_79_1	W4,W20
+	RR1					F2,C,D,E,A,B,28
+	W_PRECALC_32_79_2	W20
+	RR0					F2,A,B,C,D,E,30
+	W_PRECALC_32_79_3	W20,30,K_XMM
+	RR1					F2,A,B,C,D,E,30
+
+	//i=48
+	// W12,W8,W4,W0,W28,W24,W20,W16
+	W_PRECALC_32_79_0	W12,W24,W20,W16
+	RR0					F2,D,E,A,B,C,32
+	W_PRECALC_32_79_1	W0,W16
+	RR1					F2,D,E,A,B,C,32
+	W_PRECALC_32_79_2	W16
+	RR0					F2,B,C,D,E,A,34
+	W_PRECALC_32_79_3	W16,34,K_XMM
+	RR1					F2,B,C,D,E,A,34
+
+	//i=52
+	// W8,W4,W0,W28,W24,W20,W16,W12
+	W_PRECALC_32_79_0	W8,W20,W16,W12
+	RR0					F2,E,A,B,C,D,36
+	W_PRECALC_32_79_1	W28,W12
+	RR1					F2,E,A,B,C,D,36
+	W_PRECALC_32_79_2	W12
+	RR0					F2,C,D,E,A,B,38
+	W_PRECALC_32_79_3	W12,38,K_XMM
+	RR1					F2,C,D,E,A,B,38
+
+	//i=56
+	// W4,W0,W28,W24,W20,W16,W12,W8
+	W_PRECALC_32_79_0	W4,W16,W12,W8
+	RR0					F3,A,B,C,D,E,40
+	W_PRECALC_32_79_1	W24,W8
+	RR1					F3,A,B,C,D,E,40
+	W_PRECALC_32_79_2	W8
+	RR0					F3,D,E,A,B,C,42
+	W_PRECALC_32_79_3	W8,42,K_XMM
+	RR1					F3,D,E,A,B,C,42
+
+	//i=60
+	#undef	K_XMM
+	#define	K_XMM	48
+	// W0,W28,W24,W20,W16,W12,W8,W4
+	W_PRECALC_32_79_0	W0,W12,W8,W4
+	RR0					F3,B,C,D,E,A,44
+	W_PRECALC_32_79_1	W20,W4
+	RR1					F3,B,C,D,E,A,44
+	W_PRECALC_32_79_2	W4
+	RR0					F3,E,A,B,C,D,46
+	W_PRECALC_32_79_3	W4,46,K_XMM
+	RR1					F3,E,A,B,C,D,46
+
+	//i=64
+	// W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_32_79_0	W28,W8,W4,W0
+	RR0					F3,C,D,E,A,B,48
+	W_PRECALC_32_79_1	W16,W0
+	RR1					F3,C,D,E,A,B,48
+	W_PRECALC_32_79_2	W0
+	RR0					F3,A,B,C,D,E,50
+	W_PRECALC_32_79_3	W0,50,K_XMM
+	RR1					F3,A,B,C,D,E,50
+
+	//i=68
+	// W24,W20,W16,W12,W8,W4,W0,W28
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W24,W4,W0,W28
+#else
+	W_PRECALC_32_79_0_i386	W24,W4,W0,W28
+#endif
+	RR0					F3,D,E,A,B,C,52
+	W_PRECALC_32_79_1	W12,W28
+	RR1					F3,D,E,A,B,C,52
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W28
+#else
+	W_PRECALC_32_79_2_i386	W28
+#endif
+	RR0					F3,B,C,D,E,A,54
+	W_PRECALC_32_79_3	W28,54,K_XMM
+	RR1					F3,B,C,D,E,A,54
+
+	//i=72
+	// W20,W16,W12,W8,W4,W0,W28,W24
+#if defined (__x86_64__)
+	W_PRECALC_32_79_0	W20,W0,W28,W24
+#else
+	W_PRECALC_32_79_0_i386	W20,W0,W28,W24
+#endif
+	RR0					F3,E,A,B,C,D,56
+	W_PRECALC_32_79_1	W8,W24
+	RR1					F3,E,A,B,C,D,56
+#if defined (__x86_64__)
+	W_PRECALC_32_79_2	W24
+#else
+	W_PRECALC_32_79_2_i386	W24
+#endif
+	RR0					F3,C,D,E,A,B,58
+	W_PRECALC_32_79_3	W24,58,K_XMM
+	RR1					F3,C,D,E,A,B,58
+
+	// starting using F4	
+
+	//i=76
+	// W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_32_79_0	W16,W28,W24,W20
+	RR0					F4,A,B,C,D,E,60
+	W_PRECALC_32_79_1	W4,W20
+	RR1					F4,A,B,C,D,E,60
+	W_PRECALC_32_79_2	W20
+	RR0					F4,D,E,A,B,C,62
+	W_PRECALC_32_79_3	W20,62,K_XMM
+	RR1					F4,D,E,A,B,C,62
+
+	.endm
+
+	.macro	SOFTWARE_PIPELINING_nossse3
+	// i=0  : W28,W24,W20,W16,W12,W8,W4,W0
+	W_PRECALC_00_15_0	0					// W_TMP = (BUFFER_PTR)
+	RR0					F4,B,C,D,E,A,64
+	W_PRECALC_00_15_1	W0					// convert W_TMP to big-endian, and save W0 = W_TMP	
+	RR1					F4,B,C,D,E,A,64
+	W_PRECALC_00_15_2						// W_TMP = W0 + K
+	RR0					F4,E,A,B,C,D,66
+	W_PRECALC_00_15_3	3					// (sp) = W_TMP = W0 + K
+	RR1					F4,E,A,B,C,D,66
+
+	// i=4  : W24,W20,W16,W12,W8,W4,W0,W28
+	W_PRECALC_00_15_0	4					// W_TMP = 16(BUFFER_PTR)
+	RR0					F4,C,D,E,A,B,68
+	W_PRECALC_00_15_1	W28					// convert W_TMP to big-endian, and save W28 = W_TMP	
+	RR1					F4,C,D,E,A,B,68
+	W_PRECALC_00_15_2						// W_TMP = W28 + K
+	RR0					F4,A,B,C,D,E,70
+	W_PRECALC_00_15_3	7					// 16(sp) = W_TMP = W28 + K[0]
+	RR1					F4,A,B,C,D,E,70
+
+	// i=8  : W20,W16,W12,W8,W4,W0,W28,W24
+	W_PRECALC_00_15_0	8					// W_TMP = 32(BUFFER_PTR)
+	RR0					F4,D,E,A,B,C,72
+	W_PRECALC_00_15_1	W24					// convert W_TMP to big-endian, and save W24 = W_TMP
+	RR1					F4,D,E,A,B,C,72
+	W_PRECALC_00_15_2						// W_TMP = W24 + K
+	RR0					F4,B,C,D,E,A,74
+	W_PRECALC_00_15_3	11					// 32(sp) = W_TMP = W24 + K
+	RR1					F4,B,C,D,E,A,74
+
+	// i=12 : W16,W12,W8,W4,W0,W28,W24,W20
+	W_PRECALC_00_15_0	12					// W_TMP = 48(BUFFER_PTR)
+	RR0					F4,E,A,B,C,D,76
+	W_PRECALC_00_15_1	W20					// convert W_TMP to big-endian, and save W20 = W_TMP	
+	RR1					F4,E,A,B,C,D,76
+	W_PRECALC_00_15_2						// W_TMP = W20 + K
+	RR0					F4,C,D,E,A,B,78
+	W_PRECALC_00_15_3	15					// 48(sp) = W_TMP = W20 + K
+	RR1					F4,C,D,E,A,B,78
+	.endm
+
+	.macro	ENDING		// finish up updating hash digests (i=64:79)
+	//i=80
+	RR0					F4,B,C,D,E,A,64
+	RR1					F4,B,C,D,E,A,64
+	RR0					F4,E,A,B,C,D,66
+	RR1					F4,E,A,B,C,D,66
+
+	//i=84
+	RR0					F4,C,D,E,A,B,68
+	RR1					F4,C,D,E,A,B,68
+	RR0					F4,A,B,C,D,E,70
+	RR1					F4,A,B,C,D,E,70
+
+	//i=88
+	RR0					F4,D,E,A,B,C,72
+	RR1					F4,D,E,A,B,C,72
+	RR0					F4,B,C,D,E,A,74
+	RR1					F4,B,C,D,E,A,74
+
+	//i=92
+	RR0					F4,E,A,B,C,D,76
+	RR1					F4,E,A,B,C,D,76
+	RR0					F4,C,D,E,A,B,78
+	RR1					F4,C,D,E,A,B,78
+	.endm
+
+	// load hash digests A,B,C,D,E from memory into registers
+	.macro	LOAD_HASH
+#if defined (__x86_64__)
+	mov			(HASH_PTR), A
+	mov			4(HASH_PTR), B
+	mov			8(HASH_PTR), C
+	mov			12(HASH_PTR), D
+	mov			16(HASH_PTR), E
+#else
+    mov         HASH_PTR, T1
+    mov         (T1), A
+    mov         4(T1), B
+    mov         8(T1), C
+    mov         12(T1), D
+    mov         16(T1), E
+#endif
+	.endm
+
+	.macro	UPDATE_HASH
+	add		$0, $1
+	mov		$1, $0
+	.endm
+
+	.macro UPDATE_ALL_HASH	
+#if defined (__x86_64__)
+	UPDATE_HASH		(HASH_PTR), A
+	UPDATE_HASH		4(HASH_PTR), B
+	UPDATE_HASH		8(HASH_PTR), C
+	UPDATE_HASH		12(HASH_PTR), D
+	UPDATE_HASH		16(HASH_PTR), E
+#else
+    mov             HASH_PTR, T1
+    UPDATE_HASH     (T1), A
+    UPDATE_HASH     4(T1), B
+    UPDATE_HASH     8(T1), C
+    UPDATE_HASH     12(T1), D
+    UPDATE_HASH     16(T1), E
+#endif
+	.endm
+
+
+	/*
+		 main sha1 code for system without ssse3 support
+	*/
+
+	.macro  SHA1_PIPELINED_MAIN_BODY_nossse3
+	LOAD_HASH						// load initial hashes into A,B,C,D,E (registers)
+	INITIAL_W_PRECALC_nossse3		// big_endian_load(W) and W+K (i=0:15)
+	.align	4,0x90
+0:
+	INTERNAL_nossse3				// update W (i=16:79) and update ABCDE (i=0:63)	
+#if Multiple_Blocks
+	add	$$64, BUFFER_PTR			// BUFFER_PTR+=64;
+	sub	$$1, cnt					// pre-decrement cnt by 1
+	jbe	1f							// if cnt <= 0, branch to finish off
+	SOFTWARE_PIPELINING_nossse3		// update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15)
+	UPDATE_ALL_HASH					// update output hashes
+	jmp	0b							// repeat for next block
+	.align	4,0x90
+1:
+#endif
+	ENDING							// update ABCDE (i=64:79)
+	UPDATE_ALL_HASH					// update output hashes
+	.endm
+
+	/*
+		 main sha1 code for system with ssse3 support
+	*/
+
+	.macro  SHA1_PIPELINED_MAIN_BODY_ssse3
+	LOAD_HASH						// load initial hashes into A,B,C,D,E
+	INITIAL_W_PRECALC_ssse3			// big_endian_load(W) and W+K (i=0:15)
+	.align	4,0x90
+0:
+	INTERNAL_ssse3					// update W (i=16:79) and update ABCDE (i=0:63)
+#if Multiple_Blocks
+	add	$$64, BUFFER_PTR			// BUFFER_PTR+=64;
+	sub	$$1, cnt					// pre-decrement cnt by 1
+	jbe	1f							// if cnt <= 0, branch to finish off
+	SOFTWARE_PIPELINING_ssse3		// update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15)
+	UPDATE_ALL_HASH					// update output hashes
+	jmp	0b							// repeat for next block
+	.align	4,0x90
+1:
+#endif
+	ENDING							// update ABCDE (i=64:79)
+	UPDATE_ALL_HASH					// update output hashes
+	.endm
+
+#include <i386/cpu_capabilities.h>
+
+	.text
+
+	.globl _SHA1Transform
+	.private_extern	_SHA1Transform	
+_SHA1Transform:
+
+	// detect SSSE3 and dispatch appropriate code branch
+	#if defined __x86_64__
+    	movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
+    	mov     (%rax), %eax                                    // %eax = __cpu_capabilities
+	#else       // i386
+		#if defined KERNEL
+    		leal    __cpu_capabilities, %eax                    // %eax -> __cpu_capabilities
+    		mov     (%eax), %eax                                // %eax = __cpu_capabilities
+		#else
+    		mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+		#endif
+	#endif
+    test    $(kHasSupplementalSSE3), %eax
+    je      _SHA1Transform_nossse3                    			// branch to no-ssse3 code
+
+
+	// start the sha1 code with ssse3 support
+
+	// save callee-save registers
+#if defined (__x86_64__)
+	push	%rbx
+	push	%rbp
+#else
+    push    %ebx
+    push    %ebp
+    push    %esi
+    push    %edi
+#endif
+
+	sub		$stack_size, sp					// allocate stack memory for use
+
+	// save used xmm register if this is for kernel
+#if	KERNEL
+	xmov	%xmm0, 4*16(sp)
+	xmov	%xmm1, 5*16(sp)
+	xmov	%xmm2, 6*16(sp)
+	xmov	%xmm3, 7*16(sp)
+	xmov	%xmm4, 8*16(sp)
+	xmov	%xmm5, 9*16(sp)
+	xmov	%xmm6, 10*16(sp)
+	xmov	%xmm7, 11*16(sp)
+#if defined (__x86_64__)
+	xmov	%xmm8, 12*16(sp)
+	xmov	%xmm9, 13*16(sp)
+	xmov	%xmm10, 14*16(sp)
+#endif
+#endif
+
+#if defined (__x86_64__)
+
+	// set up registers to free %edx/%edi/%esi for other use (ABCDE)
+	mov		ctx, HASH_PTR
+	mov		buf, BUFFER_PTR
+#if Multiple_Blocks
+	mov		%rdx, cnt
+#endif
+	lea		K_XMM_AR(%rip), K_BASE
+	xmov	0x40(K_BASE), XMM_SHUFB_BSWAP
+
+#else	// __i386__
+
+#if	KERNEL
+    lea     K_XMM_AR, %eax
+#else
+	// Get address of 0 in R.
+           call    0f          // Push program counter onto stack.
+        0: pop     %eax      // Get program counter.
+		lea	K_XMM_AR-0b(%eax), %eax
+#endif
+    mov     %eax, K_BASE
+    xmov    0x40(%eax), %xmm0
+    xmov    %xmm0, XMM_SHUFB_BSWAP
+
+#endif
+
+	SHA1_PIPELINED_MAIN_BODY_ssse3
+
+	// restore used xmm registers if this is for kernel
+#if	KERNEL
+	xmov	4*16(sp), %xmm0
+	xmov	5*16(sp), %xmm1
+	xmov	6*16(sp), %xmm2
+	xmov	7*16(sp), %xmm3
+	xmov	8*16(sp), %xmm4
+	xmov	9*16(sp), %xmm5
+	xmov	10*16(sp), %xmm6
+	xmov	11*16(sp), %xmm7
+#if defined (__x86_64__)
+	xmov	12*16(sp), %xmm8
+	xmov	13*16(sp), %xmm9
+	xmov	14*16(sp), %xmm10
+#endif
+#endif
+
+	add		$stack_size, sp		// deallocate stack memory
+
+	// restore callee-save registers
+#if defined (__x86_64__)
+	pop		%rbp
+	pop		%rbx
+#else
+    pop     %edi
+    pop     %esi
+    pop     %ebp
+    pop     %ebx
+#endif
+
+	ret							// return
+
+	// this is equivalent to the above function _SHA1Transform, but it does not use ssse3 instructions
+
+	.globl _SHA1Transform_nossse3
+	.private_extern	_SHA1Transform_nossse3
+_SHA1Transform_nossse3:
+
+	// push callee-save registers
+#if defined (__x86_64__)
+	push	%rbx
+	push	%rbp
+#else
+    push    %ebx
+    push    %ebp
+    push    %esi
+    push    %edi
+#endif
+
+	sub		$stack_size, sp			// allocate stack memory for local use
+
+	// save used xmm registers if this is for kernel
+#if	KERNEL
+	xmov	%xmm0, 4*16(sp)
+	xmov	%xmm1, 5*16(sp)
+	xmov	%xmm2, 6*16(sp)
+	xmov	%xmm3, 7*16(sp)
+	xmov	%xmm4, 8*16(sp)
+	xmov	%xmm5, 9*16(sp)
+	xmov	%xmm6, 10*16(sp)
+	xmov	%xmm7, 11*16(sp)
+#if defined (__x86_64__)
+	xmov	%xmm8, 12*16(sp)
+	xmov	%xmm9, 13*16(sp)
+#endif
+#endif
+
+#if defined (__x86_64__)
+
+	// set up registers to free %edx/%edi/%esi for other use (ABCDE)
+	mov		ctx, HASH_PTR
+	mov		buf, BUFFER_PTR
+#if Multiple_Blocks
+	mov		%rdx, cnt
+#endif
+	lea		K_XMM_AR(%rip), K_BASE
+
+#else	// __i386__
+
+#if	KERNEL
+    lea     K_XMM_AR, %eax
+#else
+	// Get address of 0 in R.
+           call    0f          // Push program counter onto stack.
+        0: pop     %eax      // Get program counter.
+		lea	K_XMM_AR-0b(%eax), %eax
+#endif
+    mov     %eax, K_BASE
+
+#endif
+
+	SHA1_PIPELINED_MAIN_BODY_nossse3
+
+	// restore used xmm registers if this is for kernel
+#if	KERNEL
+	xmov	4*16(sp), %xmm0
+	xmov	5*16(sp), %xmm1
+	xmov	6*16(sp), %xmm2
+	xmov	7*16(sp), %xmm3
+	xmov	8*16(sp), %xmm4
+	xmov	9*16(sp), %xmm5
+	xmov	10*16(sp), %xmm6
+	xmov	11*16(sp), %xmm7
+#if defined (__x86_64__)
+	xmov	12*16(sp), %xmm8
+	xmov	13*16(sp), %xmm9
+#endif
+#endif
+
+	add		$stack_size, sp		// deallocate stack memory
+
+	// restore callee-save registers
+#if defined (__x86_64__)
+	pop		%rbp
+	pop		%rbx
+#else
+    pop     %edi
+    pop     %esi
+    pop     %ebp
+    pop     %ebx
+#endif
+
+	ret							// return
+
+	.const
+	.align	4, 0x90	
+
+#define K1 0x5a827999
+#define K2 0x6ed9eba1
+#define K3 0x8f1bbcdc
+#define K4 0xca62c1d6
+
+K_XMM_AR:
+    .long	K1
+	.long	K1
+	.long	K1
+	.long	K1
+    .long	K2
+	.long	K2
+	.long	K2
+	.long	K2
+    .long	K3
+	.long	K3
+	.long	K3
+	.long	K3
+    .long	K4
+	.long	K4
+	.long	K4
+	.long	K4
+// bswap_shufb_ctl:	invoked thru 0x40(K_XMM_AR)
+    .long	0x00010203
+    .long	0x04050607
+    .long	0x08090a0b
+    .long	0x0c0d0e0f
+
+
+
+#endif	// architecture x86_64 or i386
diff --git a/libkern/crypto/sha1.c b/libkern/crypto/sha1.c
index 69e9eec42..b85cbec96 100644
--- a/libkern/crypto/sha1.c
+++ b/libkern/crypto/sha1.c
@@ -57,6 +57,11 @@
 #include <sys/systm.h>
 #include <libkern/OSAtomic.h>
 #include <libkern/crypto/sha1.h>
+#define	SHA1_TIMER	0		// change to nonzero to write timing stamps to profile sha1transform
+
+#if SHA1_TIMER
+#include <sys/kdebug.h>
+#endif
 
 #define	memset(x, y, z)	bzero(x, z);
 #define	memcpy(x, y, z)	bcopy(y, x, z)
@@ -139,8 +144,11 @@ static unsigned char PADDING[64] = { 0x80, /* zeros */ };
 	(p) = ROTATE_LEFT(p, 1);	\
 }
 
-static void SHA1Transform(u_int32_t, u_int32_t, u_int32_t, u_int32_t,
-    u_int32_t, const u_int8_t *, SHA1_CTX *);
+#if (defined (__x86_64__) || defined (__i386__)) 
+extern void SHA1Transform(SHA1_CTX *, const u_int8_t *, u_int32_t Nblocks);
+#else
+static void SHA1Transform(SHA1_CTX *, const u_int8_t *);
+#endif
 
 void _SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen);
 
@@ -199,19 +207,36 @@ _SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
 	if (inputLen >= partLen) {
 		if (index != 0) {
 			memcpy(&context->buffer[index], input, partLen);
-			SHA1Transform(context->state[0], context->state[1],
-			    context->state[2], context->state[3],
-			    context->state[4], context->buffer, context);
+#if (defined (__x86_64__) || defined (__i386__)) 
+			SHA1Transform(context, context->buffer, 1);
+#else
+			SHA1Transform(context, context->buffer);
+#endif
 			i = partLen;
 		}
 
+#if SHA1_TIMER
+		KERNEL_DEBUG_CONSTANT(0xaa800004 | DBG_FUNC_START, 0, 0, 0, 0, 0);
+#endif
+#if (defined (__x86_64__) || defined (__i386__)) 
+			{	
+				int	kk = (inputLen-i)>>6;
+				if (kk>0) {
+					SHA1Transform(context, &input[i], kk);
+					i += (kk<<6);
+				}
+			}
+#else
 		for (; i + 63 < inputLen; i += 64)
-			SHA1Transform(context->state[0], context->state[1],
-			    context->state[2], context->state[3],
-			    context->state[4], &input[i], context);
+			SHA1Transform(context, &input[i]);
+#endif
 
-		if (inputLen == i)
+		 if (inputLen == i) {
+#if SHA1_TIMER
+		        KERNEL_DEBUG_CONSTANT(0xaa800004 | DBG_FUNC_END, 0, 0, 0, 0, 0);
+#endif
 			return;
+		 }
 
 		index = 0;
 	}
@@ -358,14 +383,21 @@ SHA1Final(void *digest, SHA1_CTX *context)
 /*
  * SHA1 basic transformation. Transforms state based on block.
  */
+#if !(defined (__x86_64__) || defined (__i386__)) 
 static void
-SHA1Transform(u_int32_t a, u_int32_t b, u_int32_t c, u_int32_t d,
-    u_int32_t e, const u_int8_t block[64], SHA1_CTX *context)
+SHA1Transform(SHA1_CTX *context, const u_int8_t block[64])
 {
 	/* Register (instead of array) is a win in most cases */
+	register u_int32_t a, b, c, d, e;
 	register u_int32_t w0, w1, w2, w3, w4, w5, w6, w7;
 	register u_int32_t w8, w9, w10, w11, w12, w13, w14, w15;
 
+	a = context->state[0];
+	b = context->state[1];
+	c = context->state[2];
+	d = context->state[3];
+	e = context->state[4];
+
 	w15 = FETCH_32(block + 60);
 	w14 = FETCH_32(block + 56);
 	w13 = FETCH_32(block + 52);
@@ -480,3 +512,4 @@ SHA1Transform(u_int32_t a, u_int32_t b, u_int32_t c, u_int32_t d,
 	w15 = w14 = w13 = w12 = w11 = w10 = w9 = w8 = 0;
 	w7 = w6 = w5 = w4 = w3 = w2 = w1 = w0 = 0;
 }
+#endif
diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c
index aeeb09364..3484791d5 100644
--- a/libkern/gen/OSAtomicOperations.c
+++ b/libkern/gen/OSAtomicOperations.c
@@ -54,8 +54,8 @@ enum {
  * Like standards, there are a lot of atomic ops to choose from!
  */
 
-#if !defined(__ppc__) && !defined(__i386__) && !defined(__x86_64__)
-/* Implemented in assembly for ppc and i386 and x86_64 */
+#if !defined(__i386__) && !defined(__x86_64__)
+/* Implemented in assembly for i386 and x86_64 */
 #undef OSAddAtomic
 SInt32
 OSAddAtomic(SInt32 amount, volatile SInt32 * value)
@@ -72,6 +72,7 @@ OSAddAtomic(SInt32 amount, volatile SInt32 * value)
 	return oldValue;
 }
 
+#undef OSAddAtomicLong
 long
 OSAddAtomicLong(long theAmount, volatile long *address)
 {
@@ -82,7 +83,7 @@ OSAddAtomicLong(long theAmount, volatile long *address)
 #endif
 }
 
-/* Implemented as an assembly alias for i386 and linker alias for ppc */
+/* Implemented as an assembly alias for i386 */
 #undef OSCompareAndSwapPtr
 Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue,
 			    void * volatile *address)
@@ -97,9 +98,6 @@ Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue,
 }
 #endif
 
-#ifndef __ppc__
-/* Implemented as assembly for ppc */
-
 #undef OSIncrementAtomic
 SInt32	OSIncrementAtomic(volatile SInt32 * value)
 {
@@ -111,7 +109,6 @@ SInt32	OSDecrementAtomic(volatile SInt32 * value)
 {
 	return OSAddAtomic(-1, value);
 }
-#endif	/* !__ppc__ */
 
 static UInt32	OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt32 * value)
 {
diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp
index de1d99372..3e67cfff8 100644
--- a/libkern/gen/OSDebug.cpp
+++ b/libkern/gen/OSDebug.cpp
@@ -28,6 +28,7 @@
 
 // NOTE:  This file is only c++ so I can get static initialisers going
 #include <libkern/OSDebug.h>
+#include <IOKit/IOLib.h>
 
 #include <sys/cdefs.h>
 
@@ -179,41 +180,23 @@ x86_64_validate_stackptr(vm_offset_t stackptr)
 }
 #endif
 
+void
+OSPrintBacktrace(void)
+{
+	void * btbuf[20];
+	int tmp = OSBacktrace(btbuf, 20);
+	int i;
+	for(i=0;i<tmp;i++)
+	{
+		kprintf("bt[%.2d] = %p\n", i, btbuf[i]);
+	}
+}
 
 unsigned OSBacktrace(void **bt, unsigned maxAddrs)
 {
     unsigned frame;
 
-#if __ppc__
-    vm_offset_t stackptr, stackptr_prev;
-    const vm_offset_t * const mem = (vm_offset_t *) 0;
-    unsigned i = 0;
-
-    __asm__ volatile("mflr %0" : "=r" (stackptr)); 
-    bt[i++] = (void *) stackptr;
-
-    __asm__ volatile("mr %0,r1" : "=r" (stackptr)); 
-    for ( ; i < maxAddrs; i++) {
-	// Validate we have a reasonable stackptr
-	if ( !(minstackaddr <= stackptr && stackptr < maxstackaddr)
-	|| (stackptr & 3))
-	    break;
-
-	stackptr_prev = stackptr;
-	stackptr = mem[stackptr_prev >> 2];
-	if ((stackptr - stackptr_prev) > 8 * 1024)	// Sanity check
-	    break;
-
-	vm_offset_t addr = mem[(stackptr >> 2) + 2]; 
-	if ((addr & 3) || (addr < 0x8000))	// More sanity checks
-	    break;
-	bt[i] = (void *) addr;
-    }
-    frame = i;
-
-    for ( ; i < maxAddrs; i++)
-	    bt[i] = (void *) 0;
-#elif __i386__
+#if __i386__
 #define SANE_i386_FRAME_SIZE (kernel_stack_size >> 1)
     vm_offset_t stackptr, stackptr_prev, raddr;
     unsigned frame_index = 0;
diff --git a/libkern/kernel_mach_header.c b/libkern/kernel_mach_header.c
index e0830d99d..0edc6b64d 100644
--- a/libkern/kernel_mach_header.c
+++ b/libkern/kernel_mach_header.c
@@ -68,6 +68,33 @@ getlastaddr(void)
 	return last_addr;
 }
 
+/*
+ * Find the UUID load command in the Mach-O headers, and return
+ * the address of the UUID blob and size in "*size". If the
+ * Mach-O image is missing a UUID, NULL is returned.
+ */
+void *
+getuuidfromheader(kernel_mach_header_t *mhp, unsigned long *size)
+{
+	struct uuid_command *uuidp;
+	unsigned long i;
+
+	uuidp = (struct uuid_command *)
+		((uintptr_t)mhp + sizeof(kernel_mach_header_t));
+	for(i = 0; i < mhp->ncmds; i++){
+		if(uuidp->cmd == LC_UUID) {
+			if (size)
+				*size = sizeof(uuidp->uuid);
+
+			return (void *)uuidp->uuid;
+		}
+
+		uuidp = (struct uuid_command *)((uintptr_t)uuidp + uuidp->cmdsize);
+	}
+
+	return NULL;
+}
+
 /*
  * This routine returns the a pointer to the data for the named section in the
  * named segment if it exist in the mach header passed to it.  Also it returns
diff --git a/libkern/kmod/Makefile.kmod b/libkern/kmod/Makefile.kmod
index 0c35bdf4d..62ffd893b 100644
--- a/libkern/kmod/Makefile.kmod
+++ b/libkern/kmod/Makefile.kmod
@@ -18,7 +18,8 @@ LIB_INSTALL_FLAGS = 	-p -m 444
 
 # -mkernel implies -mlong-branch/-mlong-calls/-mno-red-zone as needed for
 # code linked into kexts
-CFLAGS_KMOD = $(filter-out -O0 -O1 -O2 -O3 -O4 -Os -Oz,$(CFLAGS)) \
+# -fno-stack-protector is necessary for the kernel, but not for kexts
+CFLAGS_KMOD = $(filter-out -O0 -O1 -O2 -O3 -O4 -Os -Oz -freorder-blocks -flto -fno-stack-protector,$(CFLAGS)) \
 	    -Os -mkernel -Wall
 
 ifneq ($(MACHINE_CONFIG), DEFAULT)
@@ -39,8 +40,8 @@ KMODCPP_OFILES = $(KMODCPP_CFILES:.c=.o)
 ALL_OFILES = $(KMOD_OFILES) $(KMODCPP_OFILES)
 
 $(ALL_OFILES): %.o : %.c
-	@echo CC $@
-	$(_v)${KCC} -c ${CFLAGS_KMOD} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $<
+	@echo LIBKMOD_CC $@
+	$(_v)${LIBKMOD_CC} -c ${CFLAGS_KMOD} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $<
 
 $(COMPOBJROOT)/$(KMOD_NAME).a: $(KMOD_OFILES)
 	@echo LIBTOOL $(notdir $@)
@@ -59,7 +60,11 @@ $(INSTALL_DIR)/%.a: $(INSTOBJROOT)/%.a
 	if [ $(MACHINE_CONFIG) = DEFAULT ]; then			\
 		allarchs="";						\
 		for onearch in $(INSTALL_ARCHS); do			\
-			archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \
+			if [ $${onearch} = ARM ] ; then			\
+				archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}_$(DEFAULT_ARM_MACHINE_CONFIG)/$(COMPONENT); \
+			else						\
+				archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \
+			fi;						\
 			if [ -e $${archdir}/kmod/$(*F).a ]; then	\
 				allarchs="$${allarchs} $${archdir}/kmod/$(*F).a"; \
 			fi;						\
@@ -79,6 +84,11 @@ $(INSTALL_DIR)/%.a: $(INSTOBJROOT)/%.a
 				my_aconfig=$${my_config};		\
 			else						\
 				my_counter=1;				\
+				if [ $${my_aconfig} = ARM ] ; then	\
+					if [ $${my_config} = DEFAULT ] ; then	\
+						my_config=$(DEFAULT_ARM_MACHINE_CONFIG);	\
+					fi;		    		\
+				fi;					\
 				inputfile=$(OBJROOT)/$${my_kconfig}_$${my_aconfig}_$${my_config}/$(COMPONENT)/kmod/$(*F).a; \
 				if [ -e $${inputfile} ]; then		\
 					if [ $${my_innercounter} -eq 1 ]; then \
diff --git a/libkern/kmod/cplus_start.c b/libkern/kmod/cplus_start.c
index 8ae7a0193..1a2f3b9a0 100644
--- a/libkern/kmod/cplus_start.c
+++ b/libkern/kmod/cplus_start.c
@@ -42,7 +42,12 @@
     The linkline must look like this.
         *.o -lkmodc++ kmod_info.o -lkmod
  */
-#if __i386__ || __ppc__
+
+/* The following preprocessor test must match exactly with the architectures
+ * that define the CONFIG_STATIC_CPPINIT config option.
+ */
+#if __i386__
+
 #include <mach/mach_types.h>
 #include <libkern/OSKextLib.h>
 
diff --git a/libkern/kmod/cplus_stop.c b/libkern/kmod/cplus_stop.c
index b4ce5236a..2b2bdc688 100644
--- a/libkern/kmod/cplus_stop.c
+++ b/libkern/kmod/cplus_stop.c
@@ -42,7 +42,12 @@
     The linkline must look like this.
         *.o -lkmodc++ kmod_info.o -lkmod
  */
-#if __i386__ || __ppc__
+
+/* The following preprocessor test must match exactly with the architectures
+ * that define the CONFIG_STATIC_CPPINIT config option.
+ */
+#if __i386__
+
 #include <mach/mach_types.h>
 
 asm(".destructors_used = 0");
diff --git a/libkern/kxld/Makefile b/libkern/kxld/Makefile
index 9bc3566c6..0e962487b 100644
--- a/libkern/kxld/Makefile
+++ b/libkern/kxld/Makefile
@@ -36,15 +36,16 @@ endif
 PRODUCT_TYPE ?= DYLIB
 
 HDRDST=$(DSTROOT)/usr/local/include
-LIBDST=$(DSTROOT)/usr/lib/system
+DYLIBDST=$(DSTROOT)/usr/lib/system
 ARCHIVEDST=$(DSTROOT)/usr/local/lib
-LIBOBJ=$(OBJROOT)/libkxld.o
 LIBKXLD_DYLIB=libkxld.dylib
 LIBKXLD_ARCHIVE=libkxld.a
-LIBKXLDNAME=/usr/lib/system/$(LIBKXLD_DYLIB)
+LIBKXLD_INSTALLNAME=/usr/lib/system/$(LIBKXLD_DYLIB)
 LIBKXLDOBJ_DYLIB=$(OBJROOT)/$(LIBKXLD_DYLIB)
 LIBKXLDOBJ_ARCHIVE=$(OBJROOT)/$(LIBKXLD_ARCHIVE)
-LIBKXLDDST_DYLIB=$(LIBDST)/$(LIBKXLD_DYLIB)
+LIBKXLDSYM_DYLIB=$(SYMROOT)/$(LIBKXLD_DYLIB)
+LIBKXLDSYM_ARCHIVE=$(SYMROOT)/$(LIBKXLD_ARCHIVE)
+LIBKXLDDST_DYLIB=$(DYLIBDST)/$(LIBKXLD_DYLIB)
 LIBKXLDDST_ARCHIVE=$(ARCHIVEDST)/$(LIBKXLD_ARCHIVE)
 TESTSRC=$(SRCROOT)/tests
 TESTDST=./BUILD/tests
@@ -55,15 +56,20 @@ CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \
 	-Wwrite-strings -Wshorten-64-to-32 -Wshadow -Winit-self -Wpointer-arith \
 	-Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes -Wreturn-type \
 	-Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wbad-function-cast \
-	-Wchar-subscripts -Winline -Wnested-externs -Wredundant-decls -g \
-	-isysroot $(SDKROOT)
-LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLDNAME) \
+	-Wchar-subscripts -Winline -Wnested-externs -Wredundant-decls -g
+LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLD_INSTALLNAME) \
 	-compatibility_version $(COMPATIBILITY_VERSION) \
-	-current_version $(CURRENT_VERSION) -isysroot $(SDKROOT) -lstdc++
-INCLUDES=-I$(HDRSRC) $(INCFLAGS_EXTERN)
+	-current_version $(CURRENT_VERSION) -lstdc++
+INCLUDES=-I$(HDRSRC)
+
+ifneq ($(SDKROOT),/)
+	CFLAGS += -isysroot $(SDKROOT)
+	LDFLAGS += -isysroot $(SDKROOT)
+endif
 
 # Tools
 CC = xcrun -sdk $(SDKROOT) cc
+CLANG_ANALYZER = clang --analyze
 LIBTOOL = xcrun -sdk $(SDKROOT) libtool
 STRIP = xcrun -sdk $(SDKROOT) strip
 
@@ -73,10 +79,11 @@ CFLAGS+=-Wno-cast-align
 endif
 
 # Files
-HDR_NAMES=kxld.h kxld_types.h
+HDR_NAMES=kxld.h kxld_types.h WKdm.h
 OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_demangle.o kxld_dict.o \
-	kxld_kext.o kxld_reloc.o kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o \
-	kxld_symtab.o kxld_util.o kxld_uuid.o kxld_vtable.o
+	kxld_kext.o kxld_object.o kxld_reloc.o kxld_sect.o kxld_seg.o 		\
+	kxld_sym.o kxld_symtab.o kxld_util.o kxld_uuid.o kxld_vtable.o \
+	WKdmCompress.o WKdmDecompress.o
 HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES))
 OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES))
 
@@ -108,11 +115,17 @@ profile: OPTIM=-Os -pg -dynamic
 profile: build
 
 tests: OPTIM=-O0 -DDEBUG
-tests: kxld_dict_test copyrighttest
+tests: kxld_array_test kxld_dict_test copyrighttest
+
+build: $(LIBKXLDSYM_$(PRODUCT_TYPE))
 
-build: $(LIBKXLDOBJ_$(PRODUCT_TYPE))
-	@[ -d $(SYMROOT) ] || mkdir -p $(SYMROOT)
-	install -c -m 644 $< $(SYMROOT)
+$(LIBKXLDSYM_DYLIB): $(LIBKXLDOBJ_DYLIB)
+	@mkdir -p $(SYMROOT)
+	install -c -m 644 $< $@
+
+$(LIBKXLDSYM_ARCHIVE): $(LIBKXLDOBJ_ARCHIVE)
+	@mkdir -p $(SYMROOT)
+	install -c -m 644 $< $@
 
 $(LIBKXLDOBJ_DYLIB): $(OBJS)
 	$(CC) $(LDFLAGS) -o $@ $^
@@ -121,39 +134,56 @@ $(LIBKXLDOBJ_ARCHIVE): $(OBJS)
 	$(LIBTOOL) -static -o $@ $^
 
 installhdrs:
-	@[ -d $(HDRDST) ] || mkdir -p $(HDRDST)
+	@mkdir -p $(HDRDST)
 	install -o 0 -g 0 -c -m 444 $(HDRS) $(HDRDST)
 
 install: release installhdrs $(LIBKXLDDST_$(PRODUCT_TYPE))
 
-$(LIBKXLDDST_DYLIB):
-	@[ -d $(LIBDST) ] || mkdir -p $(LIBDST)
-	install -o 0 -g 0 -c -m 555 $(SYMROOT)/$(LIBKXLD_DYLIB) $(LIBDST)
+$(LIBKXLDDST_DYLIB): $(LIBKXLDSYM_DYLIB)
+	@mkdir -p $(DYLIBDST)
+	install -o 0 -g 0 -c -m 555 $< $@
 	$(STRIP) -S -x $@
 
-$(LIBKXLDDST_ARCHIVE):
-	@[ -d $(ARCHIVEDST) ] || mkdir -p $(ARCHIVEDST)
-	install -o 0 -g 0 -c -m 555 $(SYMROOT)/$(LIBKXLD_ARCHIVE) $(ARCHIVEDST)
+$(LIBKXLDDST_ARCHIVE): $(LIBKXLDSYM_ARCHIVE)
+	@mkdir -p $(ARCHIVEDST)
+	install -o 0 -g 0 -c -m 555 $< $@
 
 
 KEXTCOPYOBJS=$(OBJROOT)/kextcopyright.o $(OBJROOT)/kxld_copyright.o $(OBJROOT)/kxld_util.o 
-kextcopyright: $(KEXTCOPYOBJS) $(TESTDST)
+kextcopyright: $(TESTDST)/kextcopyright
+$(TESTDST)/kextcopyright: $(KEXTCOPYOBJS)
+	@mkdir -p $(TESTDST)
 	$(CC) $(ARCHS) $(KEXTCOPYOBJS) -framework CoreFoundation -o $(OBJROOT)/kextcopyright
-	install -c -m 755 $(OBJROOT)/kextcopyright $(TESTDST)
+	install -c -m 755 $(OBJROOT)/kextcopyright $@
+
+TESTOBJS=$(OBJROOT)/kxld_test.o $(OBJROOT)/kxld_util.o
+
+ARRAYOBJS=$(OBJROOT)/kxld_array_test.o $(OBJROOT)/kxld_array.o $(TESTOBJS)
+kxld_array_test: $(TESTDST)/kxld_array_test
+$(TESTDST)/kxld_array_test: $(ARRAYOBJS)
+	@mkdir -p $(TESTDST)
+	$(CC) $(ARCHS) $(ARRAYOBJS) -o $(OBJROOT)/kxld_array_test
+	install -c -m 755 $(OBJROOT)/kxld_array_test $@
 
-DICTOBJS=$(OBJROOT)/kxld_dict_test.o $(OBJROOT)/kxld_dict.o $(OBJROOT)/kxld_array.o $(OBJROOT)/kxld_util.o
-kxld_dict_test: $(DICTOBJS) $(TESTDST)
+DICTOBJS=$(OBJROOT)/kxld_dict_test.o $(OBJROOT)/kxld_dict.o $(OBJROOT)/kxld_array.o $(TESTOBJS)
+kxld_dict_test: $(TESTDST)/kxld_dict_test
+$(TESTDST)/kxld_dict_test: $(DICTOBJS)
+	@mkdir -p $(TESTDST)
 	$(CC) $(ARCHS) $(DICTOBJS) -o $(OBJROOT)/kxld_dict_test
-	install -c -m 755 $(OBJROOT)/kxld_dict_test $(TESTDST)
+	install -c -m 755 $(OBJROOT)/kxld_dict_test $@
 
 COPYTESTOBJS=$(OBJROOT)/kxld_copyright.o $(OBJROOT)/kxld_util.o
 copyrighttest: OPTIM+=-DTEST
-copyrighttest: $(KEXTCOPYOBJS) $(TESTDST)
+copyrighttest: $(TESTDST)/copyrighttest
+$(TESTDST)/copyrighttest: $(COPYTESTOBJS)
+	@mkdir -p $(TESTDST)
 	$(CC) $(ARCHS) $(COPYTESTOBJS) -framework CoreFoundation -framework IOKit -o $(OBJROOT)/copyrighttest
-	install -c -m 755 $(OBJROOT)/copyrighttest $(TESTDST)
+	install -c -m 755 $(OBJROOT)/copyrighttest $@
 
-$(TESTDST):
-	@[ -d $(TESTDST) ] || mkdir -p $(TESTDST)
+analyze:
+	@$(CLANG_ANALYZER) *.c
+	@$(CLANG_ANALYZER) -I. tests/*.c
+	@rm -f *.plist
 
 clean: 
 	@rm -rf $(OBJROOT)/*
diff --git a/iokit/Kernel/WKdmCompress.c b/libkern/kxld/WKdmCompress.c
similarity index 97%
rename from iokit/Kernel/WKdmCompress.c
rename to libkern/kxld/WKdmCompress.c
index c58477371..db2c5c05b 100644
--- a/iokit/Kernel/WKdmCompress.c
+++ b/libkern/kxld/WKdmCompress.c
@@ -245,7 +245,7 @@ WKdm_compress (WK_word* src_buf,
    */
 
   {
-    unsigned int num_bytes_to_pack = next_qp - (char *) tempQPosArray;
+    unsigned int num_bytes_to_pack = (unsigned int)(next_qp - (char *) tempQPosArray);
     unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8);
     unsigned int num_source_words = num_packed_words * 2;
     WK_word* endQPosArray = tempQPosArray + num_source_words;
@@ -296,7 +296,7 @@ WKdm_compress (WK_word* src_buf,
 
   {
     unsigned int num_tenbits_to_pack =
-      next_low_bits - tempLowBitsArray;
+      (unsigned int)(next_low_bits - tempLowBitsArray);
     unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3);
     unsigned int num_source_words = num_packed_words * 3;
     WK_word* endLowBitsArray = tempLowBitsArray + num_source_words;
@@ -324,5 +324,5 @@ WKdm_compress (WK_word* src_buf,
 
   }
 
-  return ((char *) boundary_tmp - (char *) dest_buf);
+  return (unsigned int)((char *) boundary_tmp - (char *) dest_buf);
 } 
diff --git a/iokit/Kernel/WKdmDecompress.c b/libkern/kxld/WKdmDecompress.c
similarity index 100%
rename from iokit/Kernel/WKdmDecompress.c
rename to libkern/kxld/WKdmDecompress.c
diff --git a/libkern/kxld/i386/WKdmCompress.s b/libkern/kxld/i386/WKdmCompress.s
new file mode 100644
index 000000000..f7d98440c
--- /dev/null
+++ b/libkern/kxld/i386/WKdmCompress.s
@@ -0,0 +1,597 @@
+// $Id: WKdmCompress.intel.s,v 1.1 2010/01/28 22:33:24 cclee Exp cclee $
+//
+// This file contains i386 and x86_64 (no SSE) optimized implementation of WKdm Compressor. The function prototype is
+// 
+// unsigned int WKdm_compress (WK_word* src_buf, WK_word* dest_buf, unsigned int num_input_words);
+// 
+// The implementation assumes the input buffer is a memory page (4096 bytes or 1024 words), or something less than 4KB.
+//
+// WKdm Compression algorithm is briefly stated as follows:
+// 
+// There is a dynamically updated dictionary of 16 words, each initialized with "1".
+//
+// the dictionary is indexed as follows, 
+//	0, x = input_word
+//  1, hash_index = (x>>10)&255
+//  2, dict_location = &dictionary[hash_index]
+//  3, dict_word = *dict_location
+//
+// Sequentially for each input word, it is classified/tagged into 4 classes
+//	0 : if the input word is 0
+//  1 : the higher 22 bits of the input word is identically to the higher bits from the dictionary (hash table indexed)
+//  2 : the above condition (partially 22 higher bits matched) is not met, a dictionary miss condition
+//  3 : the input word is exactly matched to the word from the dictionary (hash table index)
+//
+// after each input word is classified, each tag is represented by 2 bits. Furthermore, for each class
+//	0 : no further info is needed
+//  1 : the hash_index is represented by 4-bits (8 packed into a word),
+//		the lower 10-bits is sent to the decompressor (3 packed into a word)
+//  2 : the 32-bit word is sent to the decompressor
+//  3 : the hash_index is represented by 4-bits (8 packed into a word)
+//
+// for classes 1 and 2, the input word is used to update the dictionary after it is classified/tagged
+//
+// the following implementation was started from compiling (gcc -O3) the original C code (WKdmCompress.c)
+// and then subsequentially improved and documented.
+// For i386, it speeds up ~ 1.5 times
+// For x86_64, it speeds up ~ 1.3 times
+//
+// cclee, 1/28/10
+
+#if !(defined __i386__ || defined __x86_64__)
+
+typedef char DummyDefinition;
+
+#else		// i386 or x86_64 architectures
+
+#if defined	__i386__			// 32-bit implementation
+
+	.text
+	.align 4,0x90
+
+.globl _WKdm_compress
+_WKdm_compress:
+
+	pushl	%ebp
+	movl	%esp, %ebp
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	// allocate stack memory for local variables
+
+	subl	$6316, %esp
+
+	leal	_hashLookupTable, %ebx			        // hashTable
+
+	movl	8(%ebp), %edx					// %edx = src_buf
+	movl	12(%ebp), %esi					// %esi = dest_buf
+	movl	16(%ebp), %eax					// %eax = num_input_words
+
+	leal	-1112(%ebp), %ecx				// tempTagsArray
+	movl	%ecx, -6272(%ebp)				// a copy of char* next_tag = (char *) tempTagsArray;
+
+	leal	-2136(%ebp), %ecx				// tempQPosArray
+	movl	%ecx, -6264(%ebp)				// char* next_qp = (char *) tempQPosArray;
+	movl	%ecx, -6252(%ebp)
+
+	leal	(%edx,%eax,4), %ecx				// src_buf + num_input_words*4
+	movl	%ecx, -6244(%ebp)				// end_of_input = src_buf + num_input_words;
+
+	// PRELOAD_DICTIONARY;
+	movl	$1, -88(%ebp)
+	movl	$1, -84(%ebp)
+	movl	$1, -80(%ebp)
+	movl	$1, -76(%ebp)
+	movl	$1, -72(%ebp)
+	movl	$1, -68(%ebp)
+	movl	$1, -64(%ebp)
+	movl	$1, -60(%ebp)
+	movl	$1, -56(%ebp)
+	movl	$1, -52(%ebp)
+	movl	$1, -48(%ebp)
+	movl	$1, -44(%ebp)
+	movl	$1, -40(%ebp)
+	movl	$1, -36(%ebp)
+	movl	$1, -32(%ebp)
+	movl	$1, -28(%ebp)
+
+	shrl	$4, %eax						// (num_input_words / 16)
+	leal	16(%esi,%eax,4), %eax			// dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4
+	movl	%eax, -6256(%ebp)				// next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16);
+
+	leal	-6232(%ebp), %eax				// &tempLowBitsArray[0]
+	movl	%eax, -6260(%ebp)				// save a copy of &tempLowBitsArray[0]
+	movl	%eax, -6248(%ebp)				// save a copy of &tempLowBitsArray[0]
+
+	cmpl	%ecx, %edx						// next_input_word (%edx) vs end_of_input (%ecx) 
+	jae		L_done_search					// if (next_input_word >= end_of_input) skip the following search loop
+
+	leal	-1111(%ebp), %esi				// &next_tag[1]
+	leal	-88(%ebp), %ebp					// dictionary 
+
+	movl	%edx, %edi						// next_input_word
+
+	#define		next_input_word		%edi
+	#define		dictionary			%ebp
+	#define		next_tag			%esi
+
+	jmp		L5
+
+	.align 4,0x90
+L_RECORD_ZERO:
+	movb	$0, -1(next_tag)				// *next_tag = ZERO;
+L8:
+	addl	$4, next_input_word				// next_input_word++; 
+	incl	next_tag						// next_tag++
+	cmpl	next_input_word, 84(%esp)		// end_of_input vs next_input_word
+	jbe		L_done_search					// if (next_input_word>=end_of_input), skip to L_done_search 
+L5:
+	movl	(next_input_word), %ecx			// input_word = *next_input_word;
+	movl	%ecx, %eax						// a copy of input_word
+	testl	%ecx, %ecx						// input_word
+	je		L_RECORD_ZERO					// if (input_word==0) RECORD_ZERO
+	shrl	$10, %eax						// input_high_bits = HIGH_BITS(input_word);
+	movl	%eax, (%esp)					// save a copy of input_high_bits;
+	andl	$255, %eax						// 8 bits index to Hash Table
+	movsbl	(%ebx,%eax),%edx				// HASH_TO_DICT_BYTE_OFFSET(input_word)
+	addl	dictionary, %edx				// ((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word));
+	movl	(%edx), %eax					// dict_word = *dict_location;
+	cmpl	%eax, %ecx						// cmp input_word vs dict_word
+	je		L_RECORD_EXACT
+	shrl	$10, %eax						// HIGH_BITS(dict_word)
+	cmpl	%eax, (%esp)					// input_high_bits vs HIGH_BITS(dict_word)
+	je		L_RECORD_PARTIAL				// if (input_high_bits == HIGH_BITS(dict_word)) RECORD_PARTIAL 
+
+L_RECORD_MISS:
+	movb	$2, -1(next_tag)				// *next_tag = 2 for miss
+	movl	72(%esp), %eax					// next_full_patt
+	movl	%ecx, (%eax)					// *next_full_patt = input_word;
+	addl	$4, %eax						// next_full_patt++;
+	movl	%eax, 72(%esp)					// save next_full_patt
+	movl	%ecx, (%edx)					// *dict_location = input_word
+	jmp		L8
+
+	.align 4,0x90
+L_RECORD_EXACT:
+	movb	$3, -1(next_tag)				// *next_tag = 3 for exact
+	subl	dictionary, %edx				// dict_location - dictionary 
+	sarl	$2, %edx						// divide by 4 for word offset 
+	movl	76(%esp), %eax					// next_qp
+	movb	%dl, (%eax)						// *next_qp = word offset (4-bit)
+	incl	%eax							// next_qp++
+	movl	%eax, 76(%esp)					// save next_qp
+	jmp		L8
+
+L_done_search:
+
+	// restore %ebp as normal use (was used as dictionary)
+	movl	%esp, %ebp						
+	addl	$6328, %ebp
+
+	// SET_QPOS_AREA_START(dest_buf,next_full_patt);
+	movl	-6256(%ebp), %edi				// next_full_patt
+	subl	12(%ebp), %edi					// next_full_patt - dest_buf
+	movl	%edi, %eax						// next_full_patt - dest_buf
+	sarl	$2, %eax						// in 4-byte words
+	movl	%eax, -6240(%ebp)				// save (next_full_patt - dest_buf) in words
+	movl	12(%ebp), %edx					// dest_buf
+	movl	%eax, 4(%edx)					// dest_buf[1] = next_full_patt - dest_buf
+
+	movl	-6272(%ebp), %ecx				// &tempTagsArray[0]
+	decl	next_tag
+	cmpl	next_tag, %ecx					// next_tag vs &tempTagsArray[0]
+	jae		L13								// if &tempTagsArray[0] >= next_tag, skip the following WK_pack_2bits
+
+	movl	%edx, %ebx						// a copy of dest_buf
+
+	// boundary_tmp = WK_pack_2bits(tempTagsArray, (WK_word *) next_tag, dest_buf + HEADER_SIZE_IN_WORDS);	
+
+	.align 4,0x90
+L_WK_pack_2bits:
+	movl	4(%ecx), %eax					// w1
+	sall	$2, %eax						// w1 << 2		
+	movl	8(%ecx), %edx					// w2
+	sall	$4, %edx						// w2 << 4
+	orl		%edx, %eax						// (w1<<2) | (w2<<4)
+	orl		(%ecx), %eax					// (w0) | (w1<<2) | (w2<<4)
+	movl	12(%ecx), %edx					// w3
+	sall	$6, %edx						// (w3<<6)
+	orl		%edx, %eax						// (w0) | (w1<<2) | (w2<<4) | (w3<<6)
+	movl	%eax, 16(%ebx)					// save at *(dest_buf + HEADER_SIZE_IN_WORDS)
+	addl	$16, %ecx						// tempTagsArray += 16;
+	addl	$4, %ebx						// dest_buf += 4;
+	cmpl    %ecx, next_tag					// cmp next_tag vs dest_buf
+	ja		L_WK_pack_2bits					// if (next_tag > dest_buf) repeat L_WK_pack_2bits
+
+	/* Pack the queue positions into the area just after the full words. */
+L13:
+	movl	-6252(%ebp), %eax				// next_qp
+	movl	-6264(%ebp), %ecx				// (char *) tempQPosArray
+	movl	%eax, %esi						// next_qp
+	subl	%ecx, %eax						// num_bytes_to_pack = next_qp - (char *) tempQPosArray;
+	addl	$7, %eax						// num_bytes_to_pack + 7
+	andl	$-8, %eax						// clear lower 3 bits, (num_packed_words<<3)
+	addl	%eax, %ecx						// endQPosArray = tempQPosArray + num_source_words;
+	cmpl	%ecx, %esi						// next_qp vs endQPosArray
+	jae		L16
+	.align 4,0x90
+L30:
+	movb	$0, (%esi)						// *next_qp = 0;
+	incl	%esi							// next_qp++
+	cmpl	%ecx, %esi						// next_qp vs endQPosArray
+	jne		L30								// 
+
+L16:
+	movl	-6256(%ebp), %ebx				// next_full_patt
+	cmpl	-6264(%ebp), %ecx				// endQPosArray vs tempQPosArray
+	jbe		L20								// if (endQPosArray<=tempQPosArray) skip L_WK_pack_4bits
+	movl	-6264(%ebp), %edx				// tempQPosArray
+
+
+	// boundary_tmp = WK_pack_4bits(tempQPosArray, endQPosArray, next_full_patt);
+
+	.align 4,0x90
+L21:
+	movl	4(%edx), %eax					// src_next[1]
+	sall	$4, %eax						// (src_next[1] << 4)
+	orl		(%edx), %eax					// temp = src_next[0] | (src_next[1] << 4)
+	movl	%eax, (%ebx)					// dest_next[0] = temp; 
+	addl	$4, %ebx						// dest_next++;
+	addl	$8, %edx						// src_next += 2;
+	cmpl	%edx, %ecx						// source_end vs src_next
+	ja		L21								// while (src_next < source_end) repeat the loop
+
+	movl	%ebx, %edi						// boundary_tmp
+
+	subl	12(%ebp), %edi					// boundary_tmp - dest_buf
+	movl	%edi, %eax						// boundary_tmp - dest_buf
+	sarl	$2, %eax						// translate into word offset
+
+	movl	%eax, -6240(%ebp)				// save (next_full_patt - dest_buf) in words 
+
+L20:
+	// SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp);
+	movl	-6240(%ebp), %ecx				// boundary_tmp - dest_buf 
+	movl	12(%ebp), %edx					// dest_buf
+	movl	%ecx, 8(%edx)					// dest_buf[2] = boundary_tmp - dest_buf
+
+	movl	-6260(%ebp), %ecx				// tempLowBitsArray
+	movl	-6248(%ebp), %edx				// next_low_bits 
+	subl	%ecx, %edx						// next_low_bits - tempLowBitsArray
+	sarl	$2, %edx						// num_tenbits_to_pack 
+
+	subl	$3, %edx						// pre-decrement num_tenbits_to_pack by 3
+	jl		1f								// if num_tenbits_to_pack < 3, skip the following loop
+	.align	4,0x90
+0:
+	movl	4(%ecx), %eax					// w1
+	sall	$10, %eax						// w1<<10
+	movl	8(%ecx), %esi					// w2
+	sall	$20, %esi						// w2<<20
+	orl		%esi, %eax						// (w1<<10) | (w2<<20)
+	orl		(%ecx), %eax					// (w0) | (w1<<10) | (w2<<20)
+	movl	%eax, (%ebx)					// pack w0,w1,w2 into 1 dest_buf word
+	addl	$4, %ebx						// dest_buf++
+	addl	$12, %ecx						// next w0/w1/w2 triplet
+	subl	$3, %edx						// num_tenbits_to_pack-=3 
+	jge		0b								// if no less than 3 elements, back to loop head
+
+1:	addl	$3, %edx						// post-increment num_tenbits_to_pack by 3
+	je		3f								// if num_tenbits_to_pack is a multiple of 3, skip the following
+	movl	(%ecx), %eax					// w0
+	subl	$1, %edx						// num_tenbits_to_pack --
+	je		2f								// 
+	movl    4(%ecx), %esi					// w1
+	sall	$10, %esi						// w1<<10
+	orl		%esi, %eax
+2:
+	movl	%eax, (%ebx)					// write the final dest_buf word
+	addl	$4, %ebx						// dest_buf++
+3:
+	movl	%ebx, %eax						// boundary_tmp
+	subl	12(%ebp), %eax					// boundary_tmp - dest_buf
+	sarl	$2, %eax						// boundary_tmp - dest_buf in terms of words
+	movl	12(%ebp), %esi					// dest_buf
+	movl	%eax, 12(%esi)					// SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp);
+	sall	$2, %eax						// boundary_tmp - dest_buf in terms of bytes
+	addl	$6316, %esp						// pop out stack memory
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	leave
+	ret
+
+	.align 4,0x90
+
+L_RECORD_PARTIAL:
+	movb	$1, -1(next_tag)						// *next_tag = 1 for partial matched
+	movl	%edx, %eax								// dict_location
+	subl	dictionary, %eax						// %eax = dict_location - dictionary
+	movl	%ecx, (%edx)							// *dict_location = input_word;
+	sarl	$2, %eax								// offset in 32-bit word
+	movl	76(%esp), %edx							// next_qp
+	movb	%al, (%edx)								// update *next_qp
+	incl	%edx									// next_qp++
+	movl	%edx, 76(%esp)							// save next_qp
+	movl	%ecx, %eax								// a copy of input_word
+	andl	$1023, %eax								// lower 10 bits
+	movl	80(%esp), %edx							// next_low_bits
+	movl	%eax, (%edx)							// EMIT_WORD(next_low_bits,(low_bits_pattern))
+	addl	$4, %edx								// next_low_bits++
+	movl	%edx, 80(%esp)							// save next_low_bits
+	jmp		L8
+
+#endif		// i386 architectures
+
+#if defined __x86_64__			// 64-bit implementation	
+	.text
+	.align 4,0x90
+
+.globl _WKdm_compress
+_WKdm_compress:
+	pushq	%rbp
+	movq	%rsp, %rbp
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$6112, %rsp
+
+	#define	tempTagsArray	-6264(%rbp)
+	#define	tempLowBitsArray	-6272(%rbp)
+	#define	next_tag			%r8
+	#define	next_input_word		%rdi
+	#define	end_of_input		%r13
+	#define	next_full_patt		%rbx
+	#define	dict_location		%rcx
+	#define	next_qp				%r10
+	#define	dictionary			%r11
+	#define	dest_buf			%r12
+	#define	hashTable			%r14
+	#define tempQPosArray		%r15
+	#define	next_low_bits		%rsi
+
+	movq	%rsi, %r12						// dest_buf
+
+	leaq	-1136(%rbp), %rax				// &tempTagsArray[0]
+	movq	%rax, tempTagsArray 
+	leaq	1(%rax), next_tag				// next_tag always points to the one following the current tag 
+
+	leaq	-2160(%rbp), %r15				// &tempQPosArray[0]
+	movq	%r15, next_qp					// next_qp
+
+	mov		%edx, %eax						// num_input_words
+	leaq	(%rdi,%rax,4), end_of_input		// end_of_input = src_buf + num_input_words
+
+	// PRELOAD_DICTIONARY;
+	movl	$1, -112(%rbp)
+	movl	$1, -108(%rbp)
+	movl	$1, -104(%rbp)
+	movl	$1, -100(%rbp)
+	movl	$1, -96(%rbp)
+	movl	$1, -92(%rbp)
+	movl	$1, -88(%rbp)
+	movl	$1, -84(%rbp)
+	movl	$1, -80(%rbp)
+	movl	$1, -76(%rbp)
+	movl	$1, -72(%rbp)
+	movl	$1, -68(%rbp)
+	movl	$1, -64(%rbp)
+	movl	$1, -60(%rbp)
+	movl	$1, -56(%rbp)
+	movl	$1, -52(%rbp)
+
+	shrl	$4, %edx						// (num_input_words / 16)
+	mov		%edx, %edx						// sign extension into quad word
+	leaq	16(%rsi,%rdx,4), %rbx			// dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4
+
+	leaq	-6256(%rbp), %rax				// &tempLowBitsArray[0]
+	movq	%rax, tempLowBitsArray			// save for later reference
+	movq	%rax, next_low_bits				// next_low_bits	
+
+	cmpq	end_of_input, next_input_word	// next_input_word vs end_of_input
+	jae		L_done_search					// if (next_input_word>=end_of_input) no work to do in search
+	leaq	-112(%rbp), dictionary			// dictionary
+	leaq	_hashLookupTable(%rip), hashTable	// hash look up table
+	jmp	L5
+
+	.align 4,0x90
+L_RECORD_ZERO:
+	movb	$0, -1(next_tag)						// *next_tag = ZERO;
+L8:
+	addq	$4, next_input_word 					// next_input_word++;
+	incq	next_tag								// next_tag++
+	cmpq	next_input_word, end_of_input 			// end_of_input vs next_input_word
+	jbe		L_done_search
+L5:
+	movl	(next_input_word), %edx					// input_word = *next_input_word;
+	movl	%edx, %r9d								// a copy of input_word
+	testl	%edx, %edx								// input_word
+	je		L_RECORD_ZERO							// if (input_word==0) RECORD_ZERO
+	shrl	$10, %r9d								// input_high_bits = HIGH_BITS(input_word);
+	movzbl	%r9b, %eax								// 8-bit index to the Hash Table
+	movsbq	(hashTable,%rax),%rax					// HASH_TO_DICT_BYTE_OFFSET(input_word)
+	leaq	(dictionary, %rax), dict_location		// ((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word));
+	movl	(dict_location), %eax					// dict_word = *dict_location;
+	cmpl	%eax, %edx								// dict_word vs input_word
+	je		L_RECORD_EXACT							// if identical, RECORD_EXACT
+	shrl	$10, %eax								// HIGH_BITS(dict_word)
+	cmpl	%eax, %r9d								// input_high_bits vs HIGH_BITS(dict_word)
+	je		L_RECORD_PARTIAL						// if identical, RECORD_PARTIAL
+
+L_RECORD_MISS:
+	movb	$2, -1(next_tag)						// *next_tag = 2 for miss
+	movl	%edx, (next_full_patt)					// *next_full_patt = input_word;
+	addq	$4, next_full_patt						// next_full_patt++ 
+	movl	%edx, (dict_location)					// *dict_location = input_word
+	addq	$4, next_input_word						// next_input_word++
+	incq	next_tag								// next_tag++
+	cmpq	next_input_word, end_of_input			// end_of_input vs next_input_word	
+	ja		L5										// if (end_of_input>next_input_word) repeat from L5
+
+L_done_search:
+
+	// SET_QPOS_AREA_START(dest_buf,next_full_patt);
+	//movq	next_full_patt, %r11					// next_full_patt
+	movq	next_full_patt, %rax					// next_full_patt
+	subq	dest_buf, %rax							// next_full_patt - dest_buf								
+	sarq	$2, %rax								// offset in 4-bytes
+	movl	%eax, %r13d								// r13d = (next_full_patt - dest_buf)
+	movl	%eax, 4(dest_buf)						// dest_buf[1] = next_full_patt - dest_buf
+
+	decq	next_tag
+	cmpq	next_tag, tempTagsArray					// &tempTagsArray[0] vs next_tag
+	jae		L13										// if (&tempTagsArray[0] >= next_tag), skip the following
+
+	// boundary_tmp = WK_pack_2bits(tempTagsArray, (WK_word *) next_tag, dest_buf + HEADER_SIZE_IN_WORDS);
+
+	movq	dest_buf, %rdi							// dest_buf
+	movq	tempTagsArray, %rcx						// &tempTagsArray[0]
+
+	.align 4,0x90
+L_pack_2bits:
+	movl	4(%rcx), %eax							// w1
+	sall	$2, %eax								// w1 << 2
+	movl	8(%rcx), %edx							// w2
+	sall	$4, %edx								// w2 << 4
+	orl		%edx, %eax								// (w1<<2) | (w2<<4)
+	orl		(%rcx), %eax							// (w0) | (w1<<2) | (w2<<4)
+	movl	12(%rcx), %edx							// w3
+	sall	$6, %edx								// w3 << 6
+	orl		%edx, %eax								// (w0) | (w1<<2) | (w2<<4) | (w3<<6)
+	movl	%eax, 16(%rdi)							// save at *(dest_buf + HEADER_SIZE_IN_WORDS)
+	addq	$16, %rcx								// tempTagsArray += 16;
+	addq	$4, %rdi								// dest_buf += 4;
+	cmpq	%rcx, next_tag							// cmp next_tag vs dest_buf
+	ja		L_pack_2bits							// if (next_tag > dest_buf) repeat L_pack_2bits
+
+	/* Pack the queue positions into the area just after the full words. */
+
+L13:
+	movl	%r10d, %eax								// next_qp
+	subl	%r15d, %eax								// num_bytes_to_pack = next_qp - (char *) tempQPosArray; 
+	addl	$7, %eax								// num_bytes_to_pack+7
+	shrl	$3, %eax								// num_packed_words = (num_bytes_to_pack + 7) >> 3
+	addl	%eax, %eax								// num_source_words = num_packed_words * 2;
+	mov		%eax, %eax
+	leaq	(tempQPosArray,%rax,4), %rcx			// endQPosArray = tempQPosArray + num_source_words
+	cmpq	%rcx, %r10								// next_qp vs endQPosArray
+	jae		L16										// if (next_qp >= endQPosArray) skip the following zero paddings
+	.align 4,0x90
+L30:
+	movb	$0, (next_qp)							// *next_qp = 0
+	incq	next_qp									// next_qp++							
+	cmpq	%rcx, next_qp							// next_qp vs endQPosArray								
+	jne		L30										// repeat while next_qp < endQPosArray
+L16:
+	movq	%rbx, %rdi								// next_full_patt
+	cmpq	tempQPosArray, %rcx						// endQPosArray vs tempQPosArray
+	jbe		L20										// if (endQPosArray <= tempQPosArray) skip the following
+	movq	tempQPosArray, %rdx						// tempQPosArray
+
+	.align 4,0x90
+L_pack_4bits:
+	movl	4(%rdx), %eax							// src_next[1]
+	sall	$4, %eax								// (src_next[1] << 4)
+	orl		(%rdx), %eax							// temp = src_next[0] | (src_next[1] << 4)
+	movl	%eax, (%rdi)							// dest_next[0] = temp;
+	addq	$4, %rdi								// dest_next++;
+	addq	$8, %rdx								// src_next += 2;
+	cmpq	%rdx, %rcx								// source_end vs src_next
+	ja		L_pack_4bits							// while (src_next < source_end) repeat the loop
+
+	// SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp);
+	//movq	%rdi, %r11								// boundary_tmp
+	movq	%rdi, %rax								// boundary_tmp
+	subq	dest_buf, %rax							// boundary_tmp - dest_buf
+	movq	%rax, %r13								// boundary_tmp - dest_buf
+	shrq	$2, %r13								// boundary_tmp - dest_buf in words
+L20:
+	movl	%r13d, 8(dest_buf)						// dest_buf[2] = boundary_tmp - dest_buf
+
+	movq	tempLowBitsArray, %rcx					// tempLowBitsArray
+	movq	next_low_bits, %rbx						// next_low_bits
+	subq	%rcx, %rbx								// next_low_bits - tempLowBitsArray (in bytes)
+	sarq	$2, %rbx								// num_tenbits_to_pack (in words)
+
+	#define	size	%ebx
+
+	subl	$3, size								// pre-decrement num_tenbits_to_pack by 3
+	jl		1f										// if num_tenbits_to_pack < 3, skip the following loop
+
+	.align	4,0x90
+0:
+	movl	4(%rcx), %eax							// w1
+	sall	$10, %eax								// w1 << 10
+	movl	8(%rcx), %edx							// w2	
+	sall	$20, %edx								// w2 << 20
+	orl		%edx, %eax								// (w1<<10) | (w2<<20)
+	orl		(%rcx), %eax							// (w0) | (w1<<10) | (w2<<20)
+	movl	%eax, (%rdi)							// pack w0,w1,w2 into 1 dest_buf word
+	addq	$4, %rdi								// dest_buf++
+	addq	$12, %rcx								// next w0/w1/w2 triplet
+	subl	$3, size								// num_tenbits_to_pack-=3
+	jge		0b										// if no less than 3 elements, back to loop head
+
+1: 	addl	$3, size								// post-increment num_tenbits_to_pack by 3
+	je		3f										// if num_tenbits_to_pack is a multiple of 3, skip the following
+	movl	(%rcx), %eax							// w0
+	subl	$1, size								// num_tenbits_to_pack--
+	je		2f										//
+	movl	4(%rcx), %edx							// w1
+	sall	$10, %edx								// w1 << 10
+	orl		%edx, %eax								// w0 | (w1<<10)
+
+2:	movl	%eax, (%rdi)							// write the final dest_buf word
+	addq	$4, %rdi								// dest_buf++
+
+3:	movq	%rdi, %rax								// boundary_tmp
+	subq	dest_buf, %rax							// boundary_tmp - dest_buf
+	shrq	$2, %rax								// boundary_tmp - dest_buf in terms of words
+	movl	%eax, 12(dest_buf)						// SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp)
+	shlq	$2, %rax								// boundary_tmp - dest_buf in terms of bytes
+
+	// restore registers and return
+	addq	$6112, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	leave
+	ret
+
+	.align 4,0x90
+L_RECORD_EXACT:
+	movb	$3, -1(next_tag)					// *next_tag = 3 for exact
+	subq	dictionary, %rcx					// dict_location - dictionary
+	sarq	$2, %rcx							// divide by 4 for word offset
+	movb	%cl, (next_qp)						// *next_qp = word offset (4-bit)
+	incq	next_qp								// next_qp++
+	jmp		L8
+
+	.align 4,0x90
+L_RECORD_PARTIAL:
+	movb	$1, -1(next_tag)					// *next_tag = 1 for partial matched
+	movq	%rcx, %rax							// dict_location
+	subq	dictionary, %rax					// dict_location - dictionary
+	movl	%edx, (%rcx)						// *dict_location = input_word;
+	sarq	$2, %rax							// offset in 32-bit word
+	movb	%al, (next_qp)						// update *next_qp
+	incq	next_qp								// next_qp++
+	andl	$1023, %edx							// lower 10 bits
+	movl	%edx, (next_low_bits)				// save next_low_bits
+	addq	$4, next_low_bits					// next_low_bits++
+	jmp	L8
+
+	// for some reason, keeping the following never executed code yields a better performance
+L41:
+	leaq	-6256(%rbp), %rax
+	movq	%rax, -6272(%rbp)
+	movq	%rax, %rsi
+	jmp		L_done_search
+#endif		// x86_64 architectures
+#endif		// i386 or x86_64 architectures
diff --git a/libkern/kxld/i386/WKdmDecompress.s b/libkern/kxld/i386/WKdmDecompress.s
new file mode 100644
index 000000000..c2e6e9345
--- /dev/null
+++ b/libkern/kxld/i386/WKdmDecompress.s
@@ -0,0 +1,675 @@
+// $Id: WKdmDecompress.intel.s,v 1.1 2010/01/30 00:39:21 cclee Exp cclee $
+
+// This file contains i386 and x86_64 (no SSE) optimized implementation of WKdm Decompressor.
+// The implementation is derived by compiling (gcc -O3) the original C code (WKdmDecompress.c)
+// followed by hand tweaking of the compiled assembly code.
+// cclee, 1/29/10
+
+#if defined __i386__
+	.text
+	.align 4,0x90
+
+	.globl _WKdm_decompress
+_WKdm_decompress:
+
+	// save registers, set up base pointer %ebp, and allocate stack memory for local veriables
+
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	subl	$7324, %esp
+
+	// PRELOAD_DICTIONARY; dictionary starting address : -88(%ebp)
+	movl	$1, -88(%ebp)
+	movl	$1, -84(%ebp)
+	movl	$1, -80(%ebp)
+	movl	$1, -76(%ebp)
+	movl	$1, -72(%ebp)
+	movl	$1, -68(%ebp)
+	movl	$1, -64(%ebp)
+	movl	$1, -60(%ebp)
+	movl	$1, -56(%ebp)
+	movl	$1, -52(%ebp)
+	movl	$1, -48(%ebp)
+	movl	$1, -44(%ebp)
+	movl	$1, -40(%ebp)
+	movl	$1, -36(%ebp)
+	movl	$1, -32(%ebp)
+	movl	$1, -28(%ebp)
+
+	#define	dictionary_addr			-88(%ebp)
+	#define	TAGS_AREA_END -7292(%ebp)
+	#define	tempTagsArray	-7300(%ebp)
+	#define	tempQPosArray	-2488(%ebp)
+	#define	tempLowBitsArray	-7288(%ebp)
+	#define	next_low_bits		-7296(%ebp)
+	#define	dictionary		-7308(%ebp)
+	#define	tag_area_end	-7304(%ebp)
+
+	// WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray);
+
+	movl	8(%ebp), %eax						// src_buf
+	addl	$272, %eax							// src_buf + 16 (WKdm Header) + 256 (Tags)
+	movl	%eax, TAGS_AREA_END					// TAGS_AREA_END(src_buf)
+	movl	8(%ebp), %eax						// src_buf
+	movl	%eax, %edi							// src_buf
+	addl	$16, %eax							// TAGS_AREA_START(src_buf) = src_buf + 16 (WKdm Header)
+	leal	-1288(%ebp), %edx					// tempTagsArray
+	movl	%edx, tempTagsArray					// save a copy of tempTagsArray[] at the said location
+	cmpl	%eax, TAGS_AREA_END					// TAGS_AREA_END vs TAGS_AREA_START
+	jbe		1f									// if TAGS_AREA_END<=TAGS_AREA_START, no need for WK_unpack_2bits
+	movl	%edx, %ecx							// %ecx -> tempTagsArray[0]
+	xorl	%esi, %esi							// i=0
+	movl	$50529027, %ebx						// 0x03030303, mask to extract 4 2-bit tags
+	.align 4,0x90
+L_WK_unpack_2bits:
+	movl	16(%edi,%esi,4), %edx				// src_buf[i] for 16 tags, 16 (WKdm header)
+	movl	%edx, %eax							// w = src_buf[i]
+	andl	%ebx, %eax							// 1st 4 tags, each in bytes	
+	movl	%eax, (%ecx)						// save 1st 4 tags
+	movl	%edx, %eax							// w = src_buf[i]
+	shrl	$2, %eax							// shift down 2 bits
+	andl	%ebx, %eax							// 2nd 4 tags, each in bytes
+	movl	%eax, 4(%ecx)						// save 2nd 4 tags
+	shrl	$4, %edx							// shift down w by 4 bits
+	movl	%edx, %eax							// w>>4
+	andl	%ebx, %eax							// 3rd 4 tags
+	movl	%eax, 8(%ecx)						// save 3rd 4 tags
+	shrl	$2, %edx							// w>>6
+	andl	%ebx, %edx							// 4th 4 tags
+	movl	%edx, 12(%ecx)						// save 4th 4 tags
+	addl	$16, %ecx							// point to next tempTagsArray[i*16]
+	incl	%esi								// i++
+	cmpl	$64, %esi							// i vs 64
+	jne		L_WK_unpack_2bits					// repeat the loop until i==64	
+1:
+
+	// WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray);
+
+	movl	8(%edi), %eax						// WKdm header qpos end
+	leal	(%edi,%eax,4), %esi					// QPOS_AREA_END
+	movl	4(%edi), %eax						// WKdm header qpos start
+	leal	(%edi,%eax,4), %ecx					// QPOS_AREA_START
+	cmpl	%ecx, %esi							// QPOS_AREA_END vs QPOS_AREA_START
+	jbe		1f									// if QPOS_AREA_END <= QPOS_AREA_START, skip WK_unpack_4bits
+	leal	tempQPosArray, %edi					// tempQPosArray
+	movl	$252645135, %ebx					// 0x0f0f0f0f : mask to extract 4 4-bit qpos
+L_WK_unpack_4bits:
+	movl	(%ecx), %eax						// w
+	movl	%eax, %edx							// w
+	andl	%ebx, %edx							// 1st 4 qpos
+	movl	%edx, (%edi)						// save 1st 4 qpos
+	shrl	$4, %eax							// w>>4
+	andl	%ebx, %eax							// 2nd 4 qpos
+	movl	%eax, 4(%edi)						// save 2nd 4 qpos
+	addl	$4, %ecx							// point to next word w
+	addl	$8, %edi							// qpos += 8
+	cmpl	%ecx, %esi							// QPOS_AREA_END vs qpos_pointer
+	ja		L_WK_unpack_4bits					// repeat until qpos_pointer >= QPOS_AREA_END	
+
+	// WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray);
+
+1:
+	movl	8(%ebp), %edx						// src_buf
+	movl	12(%edx), %eax 						// LOW_BITS_AREA_END offset
+	leal	(%edx,%eax,4), %edi					// LOW_BITS_AREA_END 
+	cmpl	%edi, %esi							// LOW_BITS_AREA_START(=QPOS_AREA_END) vs LOW_BITS_AREA_END	
+	jae		1f									// if (LOW_BITS_AREA_START>=LOW_BITS_AREA_END) skip unpack_3_tenbits
+	leal	tempLowBitsArray, %ecx				// tempLowBitsArray
+	movl	$1023, %ebx							// 0x03ff to extact lower 10-bits
+
+	.align 4,0x90
+L_WK_unpack_3_tenbits:
+	movl	(%esi), %eax						// w = *next_low_bits
+	movl	%eax, %edx							// w
+	andl	%ebx, %edx							// 1st 10-bit
+	movl	%edx, (%ecx)						// save 1st 10-bit
+	shrl	$10, %eax							// (w>>10)
+	movl	%eax, %edx							// (w>>10)
+	andl	%ebx, %edx							// 2nd 10-bit
+	movl	%edx, 4(%ecx)						// save 2nd 10-bit
+	shrl	$10, %eax							// (w>>20), no need to and with mask, the top 2 bits should be zero
+	movl	%eax, 8(%ecx)						// save 3rd 10-bits
+	addl	$4, %esi							// point to next w
+	addl	$12, %ecx							// tempLowBitsArray += 3;
+	cmpl	%esi, %edi							// LOW_BITS_AREA_END vs next_low_bits
+	ja		L_WK_unpack_3_tenbits				// repeat until next_low_bits>=LOW_BITS_AREA_END	
+1:
+	call	Lhash
+Lhash:	
+	popl	%ebx								// set up %ebx for use in Hash Table loopup[
+
+	#define	next_tag	%esi
+	#define	next_qpos	%edi
+
+	movl	tempTagsArray, next_tag				// next_tag = tempTagsArray
+	leal	tempQPosArray, next_qpos			// next_qpos = tempQPosArray
+	movl	12(%ebp), %ecx						// dest_buf
+	addl	$4, %ecx							// for some reason, performance is better if we points to the next one
+	leal	tempLowBitsArray, %eax				// tempLowBitsArray
+	movl	%eax, next_low_bits					// next_low_bits = next_low_bits;
+	leal	-264(%ebp), %edx
+	movl	%edx, tag_area_end					// tag_area_end
+	leal	dictionary_addr, %eax				// dictionary starting address
+	movl	%eax, dictionary					// dictionary
+	jmp		L11
+	.align 4,0x90
+L29:
+	jle		L_ZERO_TAG
+	cmpb	$2, %al								// MISS_TAG
+	je		L_MISS_TAG
+L_EXACT_TAG:
+	movsbl	(next_qpos),%eax					// qpos = *next_qpos
+	incl	next_qpos							// next_qpos++
+	movl	dictionary, %edx					// dictionary
+	movl	(%edx,%eax,4), %eax					// w = dictionary[qpos]
+	movl	%eax, -4(%ecx)						// *dest_buf = w
+	.align 4,0x90
+L_next:
+	incl	next_tag							// next_tag++
+	addl	$4, %ecx							// dest_buf++
+	cmpl	tag_area_end, next_tag				// next_tag vs tag_area_end
+	jae		L_done								// if (next_tag>=tag_area_end)
+L11:
+	movzbl	(next_tag), %eax					// tag = *next_tag
+	cmpb	$1, %al								// Partial match?
+	jne		L29
+L_PARTIAL_TAG:
+	movsbl	(next_qpos),%edx					// qpos = *next_qpos
+	movl	dictionary, %eax					// dictionary
+	leal	(%eax,%edx,4), %edx					// dict_location = &dictionary[qpos]
+	movl	%edx, -7324(%ebp)					// save dict_location to release %edx
+	incl	next_qpos							// next_qpos++
+	movl	(%edx), %eax						// read dictionary word
+	andl	$-1024, %eax						// keep only higher 22-bits
+	movl	next_low_bits, %edx					// low_bits = *next_low_bits
+	orl		(%edx), %eax						// construct the new partially matched word
+	addl	$4, %edx							// 
+	movl	%edx, next_low_bits					// next_low_bits++
+	movl	-7324(%ebp), %edx					// dict_location
+	movl	%eax, (%edx)						// update *dict_location with the newly constructed word
+	movl	%eax, -4(%ecx)						// *dest_buf = the newly constructed word
+	incl	next_tag							// next_tag++
+	addl	$4, %ecx							// dest_buf++
+	cmpl	tag_area_end, next_tag				// next_tag vs tag_area_end
+	jb		L11									// if next_tag < tag_area_end, repeat the loop
+L_done:
+
+	// release stack memory, restore registers, and return
+	addl	$7324, %esp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	leave
+	ret
+
+	#define	next_full_patt	-7292(%ebp) /* next_full_patt starts with initial value of TAGS_AREA_END */
+
+	.align 4,0x90
+L_MISS_TAG:
+	movl	next_full_patt, %edx					// next_full_patt
+	movl	(%edx), %eax							// word = *next_full_patt
+	addl	$4, %edx								// next_full_patt++
+	movl	%edx, next_full_patt					// save next_full_patt
+	movl	%eax, %edx								// word
+	shrl	$10, %edx								// word>>10
+	andl	$255, %edx								// 8-bit hash table index
+	movsbl	_hashLookupTable-Lhash(%ebx,%edx),%edx	// qpos
+	movl	%eax, -88(%ebp,%edx)					// dictionary[qpos] = word
+	movl	%eax, -4(%ecx)							// *dest_buf = word
+	jmp		L_next									// repeat the loop
+
+	.align 4,0x90
+L_ZERO_TAG:
+	movl	$0, -4(%ecx)							// *dest_buf = 0
+	jmp		L_next									// repeat the loop
+
+#endif	// __i386__
+
+#if defined __x86_64__
+
+
+	.text
+	.align 4,0x90
+
+	.globl _WKdm_decompress
+_WKdm_decompress:
+
+	// save registers, and allocate stack memory for local variables
+
+	pushq	%rbp
+	movq	%rsp, %rbp
+	pushq	%r12
+	pushq	%rbx
+	subq	$7144, %rsp
+
+	movq	%rsi, %r12					// dest_buf
+
+	// PRELOAD_DICTIONARY; dictionary starting address : starting address -80(%rpb)
+	movl	$1, -80(%rbp)
+	movl	$1, -76(%rbp)
+	movl	$1, -72(%rbp)
+	movl	$1, -68(%rbp)
+	movl	$1, -64(%rbp)
+	movl	$1, -60(%rbp)
+	movl	$1, -56(%rbp)
+	movl	$1, -52(%rbp)
+	movl	$1, -48(%rbp)
+	movl	$1, -44(%rbp)
+	movl	$1, -40(%rbp)
+	movl	$1, -36(%rbp)
+	movl	$1, -32(%rbp)
+	movl	$1, -28(%rbp)
+	movl	$1, -24(%rbp)
+	movl	$1, -20(%rbp)
+
+	// WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray);
+	leaq	272(%rdi), %r10				// TAGS_AREA_END
+	leaq	16(%rdi), %rax				// TAGS_AREA_START 
+	leaq	-1280(%rbp), %rsi			// tempTagsArray
+	cmpq	%rax, %r10					// TAGS_AREA_END vs TAGS_AREA_START
+	jbe		1f							// if TAGS_AREA_END <= TAGS_AREA_START, skip L_WK_unpack_2bits
+	movq	%rsi, %rcx					// next_word
+	xorl	%r8d, %r8d					// i = 0
+	.align 4,0x90
+L_WK_unpack_2bits:
+	movl	16(%rdi,%r8,4), %edx		// w = *next_word
+	movl	%edx, %eax					// w
+	andl	$50529027, %eax				// 1st 4 tags
+	movl	%eax, (%rcx)				// write 1st 4 tags
+	movl	%edx, %eax					// w
+	shrl	$2, %eax					// w>>2
+	andl	$50529027, %eax				// 2nd 4 tags
+	movl	%eax, 4(%rcx)				// write 2nd 4 tags
+	shrl	$4, %edx					// w>>4
+	movl	%edx, %eax					// w>>4
+	andl	$50529027, %eax				// 3rd 4 tags
+	movl	%eax, 8(%rcx)				// write 3rd 4 tags
+	shrl	$2, %edx					// w>>6
+	andl	$50529027, %edx				// 4th 4 tags
+	movl	%edx, 12(%rcx)				// write 4th 4 tags
+	addq	$16, %rcx					// next_tags += 16
+	incq	%r8							// i++
+	cmpq	$64, %r8					// i vs 64
+	jne		L_WK_unpack_2bits			// repeat loop until i==64
+1:
+
+	// WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray);
+
+	mov		8(%rdi), %eax				// WKdm header qpos end
+	leaq	(%rdi,%rax,4), %r9			// QPOS_AREA_END
+	mov		4(%rdi), %eax				// WKdm header qpos start
+	leaq	(%rdi,%rax,4), %r8			// QPOS_AREA_START
+	leaq	-2480(%rbp), %rbx			// tempQPosArray
+	cmpq	%r8, %r9					// QPOS_AREA_END vs QPOS_AREA_START
+	jbe		1f							// if QPOS_AREA_END <= QPOS_AREA_START, skip L_WK_unpack_4bits
+	leaq	8(%rbx), %rcx				// next_qpos
+L_WK_unpack_4bits:
+	movl	(%r8), %eax					// w = *next_word
+	movl	%eax, %edx					// w
+	andl	$252645135, %edx			// 1st 4 qpos
+	movl	%edx, -8(%rcx)				// write 1st 4 qpos
+	shrl	$4, %eax					// w>>4
+	andl	$252645135, %eax			// 2nd 4 qpos
+	movl	%eax, -4(%rcx)				// write 2nd 4 qpos
+	addq	$4, %r8						// next_word++
+	addq	$8, %rcx					// next_qpos+=8
+	cmpq	%r8, %r9					// QPOS_AREA_END vs QPOS_AREA_START
+	ja		L_WK_unpack_4bits			// repeat loop until QPOS_AREA_END <= QPOS_AREA_START
+1:
+
+	// WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray);
+
+	mov		12(%rdi), %eax				// LOW_BITS_AREA_END offset
+	leaq	(%rdi,%rax,4), %rdi			// LOW_BITS_AREA_END
+	leaq	-7280(%rbp), %r11			// tempLowBitsArray
+	cmpq	%rdi, %r9					// LOW_BITS_AREA_START vs LOW_BITS_AREA_END
+	jae		1f							// if START>=END, skip L_WK_unpack_3_tenbits
+	leaq	12(%r11), %rcx				// next_low_bits
+L_WK_unpack_3_tenbits:
+	movl	(%r9), %eax					// w = *next_word
+	movl	%eax, %edx					// w
+	andl	$1023, %edx					// 1st tenbits
+	movl	%edx, -12(%rcx)				// write 1st tenbits
+	shrl	$10, %eax					// w >> 10
+	movl	%eax, %edx					// w >> 10
+	andl	$1023, %edx					// 2nd tenbits
+	movl	%edx, -8(%rcx)				// write 2nd tenbits
+	shrl	$10, %eax					// w >> 20, 3rd tenbits
+	movl	%eax, -4(%rcx)				// write 3rd tenbits
+	addq	$4, %r9						// next_word++
+	addq	$12, %rcx					// next_low_bits += 3
+	cmpq	%r9, %rdi					// LOW_BITS_AREA_END vs next_word
+	ja		L_WK_unpack_3_tenbits		// repeat loop if LOW_BITS_AREA_END > next_word
+1:
+	movq	%rsi, %rdi						// next_tag
+	movq	%rbx, %r8						// next_qpos
+	leaq	4(%r12), %rcx					// dest_buf
+	movq	%r11, %r9						// next_low_bits
+	leaq	-80(%rbp), %r11					// dictionary
+	leaq	_hashLookupTable(%rip), %rbx	// hash look up table
+	leaq	1024(%rsi), %rsi				// tag_area_end
+
+	jmp	L11
+	.align 4,0x90
+L31:
+	jle		L_ZERO_TAG
+	cmpb	$2, %al							// MISS_TAG
+	je		L_MISS_TAG
+L_EXACT_TAG:
+	movsbq	(%r8),%rax						// qpos = *next_qpos
+	incq	%r8								// next_qpos++
+	movl	(%r11,%rax,4), %eax				// w = dictionary[qpos]
+	movl	%eax, -4(%rcx)					// *dest_buf = w
+	.align 4,0x90
+L_next:
+	incq	%rdi							// next_tag++
+	addq	$4, %rcx						// dest_buf++
+	cmpq	%rsi, %rdi						// next_tag vs tag_area_end
+	jae		L_done							// if next_tag >= tag_area_end, we're done
+L11:
+	movzbl	(%rdi), %eax					// tag = *next_tag
+	cmpb	$1, %al							// partial match tag ?
+	jne		L31
+L_PARTIAL_TAG:
+	movsbq	(%r8),%rdx						// qpos = *next_qpos
+	leaq	(%r11,%rdx,4), %rdx				// dict_location = &dictionary[qpos]
+	incq	%r8								// next_qpos++
+	movl	(%rdx), %eax					// read dictionary word
+	andl	$-1024, %eax					// clear lower 10 bits
+	orl		(%r9), %eax						// pad the lower 10-bits from *next_low_bits
+	addq	$4, %r9							// next_low_bits++
+	movl	%eax, (%rdx)					// *dict_location = newly formed word 
+	movl	%eax, -4(%rcx)					// *dest_buf = newly formed word
+	cmpq	%rsi, %rdi						// compare next_tag vs tag_area_end
+	jne		L_next							// repeat loop until next_tag==tag_area_end
+L_done:
+
+	// release stack memory, restore registers, and return
+	addq	$7144, %rsp
+	popq	%rbx
+	popq	%r12
+	leave
+	ret
+
+	.align 4,0x90
+L_MISS_TAG:
+	movl	(%r10), %eax					// w = *next_full_patt
+	addq	$4, %r10						// next_full_patt++
+	movl	%eax, %edx						// w 
+	shrl	$10, %edx						// w>>10
+	movzbl	%dl, %edx						// 8-bit hash table index
+	movsbq	(%rbx,%rdx),%rdx				// qpos
+	movl	%eax, -80(%rbp,%rdx)			// dictionary[qpos] = word
+	movl	%eax, -4(%rcx)					// *dest_buf = word
+	jmp		L_next							// repeat the loop
+
+	.align 4,0x90
+L_ZERO_TAG:
+	movl	$0, -4(%rcx)					// *dest_buf = 0
+	jmp		L_next							// repeat the loop
+
+#endif	// --X86_64__
+
+.globl _hashLookupTable
+	.const
+	.align 5
+_hashLookupTable:
+	.byte	0
+	.byte	52
+	.byte	8
+	.byte	56
+	.byte	16
+	.byte	12
+	.byte	28
+	.byte	20
+	.byte	4
+	.byte	36
+	.byte	48
+	.byte	24
+	.byte	44
+	.byte	40
+	.byte	32
+	.byte	60
+	.byte	8
+	.byte	12
+	.byte	28
+	.byte	20
+	.byte	4
+	.byte	60
+	.byte	16
+	.byte	36
+	.byte	24
+	.byte	48
+	.byte	44
+	.byte	32
+	.byte	52
+	.byte	56
+	.byte	40
+	.byte	12
+	.byte	8
+	.byte	48
+	.byte	16
+	.byte	52
+	.byte	60
+	.byte	28
+	.byte	56
+	.byte	32
+	.byte	20
+	.byte	24
+	.byte	36
+	.byte	40
+	.byte	44
+	.byte	4
+	.byte	8
+	.byte	40
+	.byte	60
+	.byte	32
+	.byte	20
+	.byte	44
+	.byte	4
+	.byte	36
+	.byte	52
+	.byte	24
+	.byte	16
+	.byte	56
+	.byte	48
+	.byte	12
+	.byte	28
+	.byte	16
+	.byte	8
+	.byte	40
+	.byte	36
+	.byte	28
+	.byte	32
+	.byte	12
+	.byte	4
+	.byte	44
+	.byte	52
+	.byte	20
+	.byte	24
+	.byte	48
+	.byte	60
+	.byte	56
+	.byte	40
+	.byte	48
+	.byte	8
+	.byte	32
+	.byte	28
+	.byte	36
+	.byte	4
+	.byte	44
+	.byte	20
+	.byte	56
+	.byte	60
+	.byte	24
+	.byte	52
+	.byte	16
+	.byte	12
+	.byte	12
+	.byte	4
+	.byte	48
+	.byte	20
+	.byte	8
+	.byte	52
+	.byte	16
+	.byte	60
+	.byte	24
+	.byte	36
+	.byte	44
+	.byte	28
+	.byte	56
+	.byte	40
+	.byte	32
+	.byte	36
+	.byte	20
+	.byte	24
+	.byte	60
+	.byte	40
+	.byte	44
+	.byte	52
+	.byte	16
+	.byte	32
+	.byte	4
+	.byte	48
+	.byte	8
+	.byte	28
+	.byte	56
+	.byte	12
+	.byte	28
+	.byte	32
+	.byte	40
+	.byte	52
+	.byte	36
+	.byte	16
+	.byte	20
+	.byte	48
+	.byte	8
+	.byte	4
+	.byte	60
+	.byte	24
+	.byte	56
+	.byte	44
+	.byte	12
+	.byte	8
+	.byte	36
+	.byte	24
+	.byte	28
+	.byte	16
+	.byte	60
+	.byte	20
+	.byte	56
+	.byte	32
+	.byte	40
+	.byte	48
+	.byte	12
+	.byte	4
+	.byte	44
+	.byte	52
+	.byte	44
+	.byte	40
+	.byte	12
+	.byte	56
+	.byte	8
+	.byte	36
+	.byte	24
+	.byte	60
+	.byte	28
+	.byte	48
+	.byte	4
+	.byte	32
+	.byte	20
+	.byte	16
+	.byte	52
+	.byte	60
+	.byte	12
+	.byte	24
+	.byte	36
+	.byte	8
+	.byte	4
+	.byte	16
+	.byte	56
+	.byte	48
+	.byte	44
+	.byte	40
+	.byte	52
+	.byte	32
+	.byte	20
+	.byte	28
+	.byte	32
+	.byte	12
+	.byte	36
+	.byte	28
+	.byte	24
+	.byte	56
+	.byte	40
+	.byte	16
+	.byte	52
+	.byte	44
+	.byte	4
+	.byte	20
+	.byte	60
+	.byte	8
+	.byte	48
+	.byte	48
+	.byte	52
+	.byte	12
+	.byte	20
+	.byte	32
+	.byte	44
+	.byte	36
+	.byte	28
+	.byte	4
+	.byte	40
+	.byte	24
+	.byte	8
+	.byte	56
+	.byte	60
+	.byte	16
+	.byte	36
+	.byte	32
+	.byte	8
+	.byte	40
+	.byte	4
+	.byte	52
+	.byte	24
+	.byte	44
+	.byte	20
+	.byte	12
+	.byte	28
+	.byte	48
+	.byte	56
+	.byte	16
+	.byte	60
+	.byte	4
+	.byte	52
+	.byte	60
+	.byte	48
+	.byte	20
+	.byte	16
+	.byte	56
+	.byte	44
+	.byte	24
+	.byte	8
+	.byte	40
+	.byte	12
+	.byte	32
+	.byte	28
+	.byte	36
+	.byte	24
+	.byte	32
+	.byte	12
+	.byte	4
+	.byte	20
+	.byte	16
+	.byte	60
+	.byte	36
+	.byte	28
+	.byte	8
+	.byte	52
+	.byte	40
+	.byte	48
+	.byte	44
+	.byte	56
diff --git a/libkern/kxld/kxld.c b/libkern/kxld/kxld.c
index 3d9de9588..ada1cf3cf 100644
--- a/libkern/kxld/kxld.c
+++ b/libkern/kxld/kxld.c
@@ -43,7 +43,7 @@
 #include "kxld_array.h"
 #include "kxld_dict.h"
 #include "kxld_kext.h"
-#include "kxld_state.h"
+#include "kxld_object.h"
 #include "kxld_sym.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
@@ -54,11 +54,12 @@ struct kxld_vtable;
 struct kxld_context {
     KXLDKext *kext;
     KXLDArray *section_order;
-    KXLDArray deps;
-    KXLDArray tmps;
-    KXLDDict defined_symbols;
-    KXLDDict obsolete_symbols;
-    KXLDDict vtables;
+    KXLDArray objects;
+    KXLDArray dependencies;
+    KXLDDict defined_symbols_by_name;
+    KXLDDict defined_cxx_symbols_by_value;
+    KXLDDict obsolete_symbols_by_name;
+    KXLDDict vtables_by_name;
     KXLDFlags flags;
     KXLDAllocateCallback allocate_callback;
     cpu_type_t cputype;
@@ -88,6 +89,14 @@ static KXLDDict *s_order_dict;
 * Prototypes
 *******************************************************************************/
 
+static kern_return_t init_context(KXLDContext *context, u_int ndependencies);
+static kern_return_t init_kext_objects(KXLDContext *context, u_char *file, 
+    u_long size, const char *name, KXLDDependency *dependencies, 
+    u_int ndependencies);
+static KXLDObject * get_object_for_file(KXLDContext *context, 
+    u_char *file, u_long size, const char *name);
+static u_char * allocate_kext(KXLDContext *context, void *callback_data,
+    kxld_addr_t *vmaddr, u_long *vmsize, u_char **linked_object_alloc_out);
 static void clear_context(KXLDContext *context);
 
 /*******************************************************************************
@@ -98,10 +107,10 @@ kxld_create_context(KXLDContext **_context,
     KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDContext *context = NULL;
-    KXLDArray *section_order = NULL;
+    KXLDContext       * context         = NULL;
+    KXLDArray         * section_order   = NULL;
 #if !KERNEL
-    cpu_type_t *cputype_p = NULL;
+    cpu_type_t        * cputype_p       = NULL;
 #endif
 
     check(_context);
@@ -177,7 +186,7 @@ kxld_create_context(KXLDContext **_context,
     context = NULL;
 
 finish:
-    if (context) kxld_free(context, sizeof(*context));
+    if (context) kxld_destroy_context(context);
     if (section_order) kxld_free(section_order, sizeof(*section_order));
 #if !KERNEL
     if (cputype_p) kxld_free(cputype_p, sizeof(*cputype_p));
@@ -191,24 +200,30 @@ finish:
 void
 kxld_destroy_context(KXLDContext *context)
 {
-    KXLDState *dep = NULL;
+    KXLDObject *object = NULL;
+    KXLDKext *dep = NULL;
     u_int i = 0;
 
     check(context);
 
     kxld_kext_deinit(context->kext);
 
-    for (i = 0; i < context->deps.maxitems; ++i) {
-        dep = kxld_array_get_slot(&context->deps, i);
-        kxld_state_deinit(dep);
+    for (i = 0; i < context->objects.maxitems; ++i) {
+        object = kxld_array_get_slot(&context->objects, i);
+        kxld_object_deinit(object);
     }
+    kxld_array_deinit(&context->objects);
 
-    kxld_array_deinit(&context->deps);
-    kxld_array_deinit(&context->tmps);
+    for (i = 0; i < context->dependencies.maxitems; ++i) {
+        dep = kxld_array_get_slot(&context->dependencies, i);
+        kxld_kext_deinit(dep);
+    }
+    kxld_array_deinit(&context->dependencies);
 
-    kxld_dict_deinit(&context->defined_symbols);
-    kxld_dict_deinit(&context->obsolete_symbols);
-    kxld_dict_deinit(&context->vtables);
+    kxld_dict_deinit(&context->defined_symbols_by_name);
+    kxld_dict_deinit(&context->defined_cxx_symbols_by_value);
+    kxld_dict_deinit(&context->obsolete_symbols_by_name);
+    kxld_dict_deinit(&context->vtables_by_name);
 
     kxld_free(context->kext, kxld_kext_sizeof());
     kxld_free(context, sizeof(*context));
@@ -220,229 +235,262 @@ kxld_destroy_context(KXLDContext *context)
 *******************************************************************************/
 kern_return_t
 kxld_link_file(
-    KXLDContext *context,
-    u_char *file,
-    u_long size,
-    const char *name,
-    void *callback_data,
-    u_char **deps,
-    u_int ndeps,
-    u_char **_linked_object,
-    kxld_addr_t *kmod_info_kern,
-    u_char **_link_state,
-    u_long *_link_state_size,
-    u_char **_symbol_file __unused,
-    u_long *_symbol_file_size __unused)
+    KXLDContext       * context,
+    u_char            * file,
+    u_long              size,
+    const char        * name,
+    void              * callback_data,
+    KXLDDependency    * dependencies,
+    u_int               ndependencies,
+    u_char           ** linked_object_out,
+    kxld_addr_t       * kmod_info_kern)
 {
-    kern_return_t rval = KERN_FAILURE;
-    KXLDState *state = NULL;
-    KXLDAllocateFlags flags = 0;
-    kxld_addr_t vmaddr = 0;
-    u_long header_size = 0;
-    u_long vmsize = 0;
-    u_int nsyms = 0;
-    u_int nvtables = 0;
-    u_int i = 0;
-    u_char *linked_object = NULL;
-    u_char *linked_object_alloc = NULL;
-    u_char *link_state = NULL;
-    u_char *symbol_file = NULL;
-    u_long link_state_size = 0;
-    u_long symbol_file_size = 0;
+    kern_return_t       rval                    = KERN_FAILURE;
+    kxld_addr_t         vmaddr                  = 0;
+    u_long              vmsize                  = 0;
+    u_char            * linked_object           = NULL;
+    u_char            * linked_object_alloc     = NULL;
 
     kxld_set_logging_callback_data(name, callback_data);
 
+    kxld_log(kKxldLogLinking, kKxldLogBasic, "Linking kext %s", name);
+
     require_action(context, finish, rval=KERN_INVALID_ARGUMENT);
     require_action(file, finish, rval=KERN_INVALID_ARGUMENT);
     require_action(size, finish, rval=KERN_INVALID_ARGUMENT);
+    require_action(dependencies, finish, rval=KERN_INVALID_ARGUMENT);
+    require_action(ndependencies, finish, rval=KERN_INVALID_ARGUMENT);
+    require_action(linked_object_out, finish, rval=KERN_INVALID_ARGUMENT);
+    require_action(kmod_info_kern, finish, rval=KERN_INVALID_ARGUMENT);
 
-    rval = kxld_array_init(&context->deps, sizeof(struct kxld_state), ndeps);
+    rval = init_context(context, ndependencies);
     require_noerr(rval, finish);
 
-    if (deps) {
-        /* Initialize the dependencies */
-        for (i = 0; i < ndeps; ++i) {
-            state = kxld_array_get_item(&context->deps, i);
-
-            rval = kxld_state_init_from_file(state, deps[i], 
-                context->section_order);
-            require_noerr(rval, finish);
-        }
-    }
-
-    rval = kxld_kext_init(context->kext, file, size, name, 
-        context->flags, (deps == 0) /* is_kernel */, context->section_order, 
-        context->cputype, context->cpusubtype);
+    rval = init_kext_objects(context, file, size, name, 
+        dependencies, ndependencies);
     require_noerr(rval, finish);
 
-    if (deps) {
+    linked_object = allocate_kext(context, callback_data, 
+        &vmaddr, &vmsize, &linked_object_alloc);
+    require_action(linked_object, finish, rval=KERN_RESOURCE_SHORTAGE);
 
-        /* Calculate the base number of symbols and vtables in the kext */
+    rval = kxld_kext_relocate(context->kext, vmaddr, 
+        &context->vtables_by_name, 
+        &context->defined_symbols_by_name, 
+        &context->obsolete_symbols_by_name,
+        &context->defined_cxx_symbols_by_value);
+    require_noerr(rval, finish);
 
-        nsyms += kxld_kext_get_num_symbols(context->kext);
-        nvtables += kxld_kext_get_num_vtables(context->kext);
+    rval = kxld_kext_export_linked_object(context->kext, 
+        linked_object, kmod_info_kern);
+    require_noerr(rval, finish);
 
-        /* Extract the symbol and vtable counts from the dependencies.
-         */
+    *linked_object_out = linked_object;
+    linked_object_alloc = NULL;
 
-        for (i = 0; i < ndeps; ++i) {
-            cpu_type_t cputype; 
-            cpu_subtype_t cpusubtype; 
+    rval = KERN_SUCCESS;
+finish:
+    if (linked_object_alloc) {
+        kxld_page_free_untracked(linked_object_alloc, vmsize);
+    }
 
-            state = kxld_array_get_item(&context->deps, i);
+    clear_context(context);
+    kxld_set_logging_callback_data(NULL, NULL);
 
-            kxld_state_get_cputype(state, &cputype, &cpusubtype);
+    return rval;
+}
 
-            rval = kxld_kext_validate_cputype(context->kext, 
-                cputype, cpusubtype);
-            require_noerr(rval, finish);
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+init_context(KXLDContext *context, u_int ndependencies)
+{
+    kern_return_t rval = KERN_FAILURE;
 
-            nsyms += kxld_state_get_num_symbols(state);
-            nvtables += kxld_state_get_num_vtables(state);
-        }
+    /* Create an array of objects large enough to hold an object
+     * for every dependency, an interface for each dependency, and a kext. */
+    rval = kxld_array_init(&context->objects,
+        kxld_object_sizeof(), 2 * ndependencies + 1);
+    require_noerr(rval, finish);
 
-        /* Create the global symbol and vtable tables */
+    rval = kxld_array_init(&context->dependencies, 
+        kxld_kext_sizeof(), ndependencies);
+    require_noerr(rval, finish);
 
-        rval = kxld_dict_init(&context->defined_symbols, kxld_dict_string_hash,
-            kxld_dict_string_cmp, nsyms);
-        require_noerr(rval, finish);
+    rval = kxld_dict_init(&context->defined_symbols_by_name, 
+        kxld_dict_string_hash, kxld_dict_string_cmp, 0);
+    require_noerr(rval, finish);
 
-        rval = kxld_dict_init(&context->obsolete_symbols, kxld_dict_string_hash,
-            kxld_dict_string_cmp, 0);
-        require_noerr(rval, finish);
+    rval = kxld_dict_init(&context->defined_cxx_symbols_by_value, 
+        kxld_dict_kxldaddr_hash, kxld_dict_kxldaddr_cmp, 0);
+    require_noerr(rval, finish);
 
-        rval = kxld_dict_init(&context->vtables, kxld_dict_string_hash,
-            kxld_dict_string_cmp, nvtables);
-        require_noerr(rval, finish);
+    rval = kxld_dict_init(&context->obsolete_symbols_by_name, 
+        kxld_dict_string_hash, kxld_dict_string_cmp, 0);
+    require_noerr(rval, finish);
 
-        /* Populate the global tables */
+    rval = kxld_dict_init(&context->vtables_by_name, kxld_dict_string_hash,
+        kxld_dict_string_cmp, 0);
+    require_noerr(rval, finish);
 
-        for (i = 0; i < ndeps; ++i) {
-            state = kxld_array_get_item(&context->deps, i);
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
 
-            rval = kxld_state_get_symbols(state, &context->defined_symbols,
-                &context->obsolete_symbols);
-            require_noerr(rval, finish);
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t 
+init_kext_objects(KXLDContext *context, u_char *file, u_long size, 
+    const char *name, KXLDDependency *dependencies, u_int ndependencies)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDKext *kext = NULL;
+    KXLDObject *kext_object = NULL;
+    KXLDObject *interface_object = NULL;
+    u_int i = 0;
 
-            rval = kxld_state_get_vtables(state, &context->vtables);
-            require_noerr(rval, finish);
+    /* Create a kext object for each dependency.  If it's a direct dependency,
+     * export its symbols by name by value.  If it's indirect, just export the
+     * C++ symbols by value.
+     */
+    for (i = 0; i < ndependencies; ++i) { kext =
+        kxld_array_get_item(&context->dependencies, i); kext_object = NULL;
+        interface_object = NULL;
+
+        kext_object = get_object_for_file(context, dependencies[i].kext,
+            dependencies[i].kext_size, dependencies[i].kext_name);
+        require_action(kext_object, finish, rval=KERN_FAILURE);
+
+        if (dependencies[i].interface) {
+            interface_object = get_object_for_file(context, 
+                dependencies[i].interface, dependencies[i].interface_size,
+                dependencies[i].interface_name);
+            require_action(interface_object, finish, rval=KERN_FAILURE);
         }
 
-        if (kxld_kext_is_true_kext(context->kext)) {
-
-            /* Allocate the kext object */
-
-            kxld_kext_get_vmsize(context->kext, &header_size, &vmsize);
-            vmaddr = context->allocate_callback(vmsize, &flags, callback_data);
-            require_action(!(vmaddr & (PAGE_SIZE-1)), finish, rval=KERN_FAILURE;
-                kxld_log(kKxldLogLinking, kKxldLogErr,
-                    "Load address %p is not page-aligned.",
-                    (void *) (uintptr_t) vmaddr));
-
-            if (flags & kKxldAllocateWritable) {
-                linked_object = (u_char *) (u_long) vmaddr;
-            } else {
-                linked_object_alloc = kxld_page_alloc_untracked(vmsize);
-                require_action(linked_object_alloc, finish, rval=KERN_RESOURCE_SHORTAGE);
-                linked_object = linked_object_alloc;
-            }
-
-            /* Zero out the memory before we fill it.  We fill this buffer in a
-             * sparse fashion, and it's simpler to clear it now rather than
-             * track and zero any pieces we didn't touch after we've written
-             * all of the sections to memory.
-             */
-            bzero(linked_object, vmsize);
-
-            /* Relocate to the new link address */
+        rval = kxld_kext_init(kext, kext_object, interface_object);
+        require_noerr(rval, finish);
 
-            rval = kxld_kext_relocate(context->kext, vmaddr, &context->vtables, 
-                &context->defined_symbols, &context->obsolete_symbols);
+        if (dependencies[i].is_direct_dependency) {
+            rval = kxld_kext_export_symbols(kext,
+                &context->defined_symbols_by_name, 
+                &context->obsolete_symbols_by_name,
+                &context->defined_cxx_symbols_by_value);
             require_noerr(rval, finish);
-
-            /* Generate linked object if requested */
-
-            if (_linked_object) {
-                check(kmod_info_kern);
-                *_linked_object = NULL;
-                *kmod_info_kern = 0;
-
-                rval = kxld_kext_export_linked_object(context->kext, linked_object, 
-                    kmod_info_kern);
-                require_noerr(rval, finish);
-            }
-
-        } else  {
-            /* Resolve the pseudokext's symbols */
-
-            rval = kxld_kext_resolve(context->kext, &context->vtables, 
-                &context->defined_symbols);
+        } else {
+            rval = kxld_kext_export_symbols(kext, 
+                /* defined_symbols */ NULL, /* obsolete_symbols */ NULL, 
+                &context->defined_cxx_symbols_by_value);
             require_noerr(rval, finish);
         }
     }
 
-    /* Generate link state if requested */
-
-    if (_link_state) {
-        check(_link_state_size);
-        *_link_state = NULL;
-        *_link_state_size = 0;
+    /* Export the vtables for all of the dependencies. */
+    for (i = 0; i < context->dependencies.nitems; ++i) {
+        kext = kxld_array_get_item(&context->dependencies, i);
 
-        kxld_dict_clear(&context->defined_symbols);
-        rval = kxld_state_export_kext_to_file(context->kext, &link_state,
-            &link_state_size, &context->defined_symbols, &context->tmps);
+        rval = kxld_kext_export_vtables(kext,
+            &context->defined_cxx_symbols_by_value,
+            &context->defined_symbols_by_name,
+            &context->vtables_by_name);
         require_noerr(rval, finish);
     }
 
-#if !KERNEL
-    /* Generate symbol file if requested */
-
-    if (_symbol_file) {
-        check(_symbol_file_size);
-        *_symbol_file = NULL;
-        *_symbol_file_size = 0;
+    /* Create a kext object for the kext we're linking and export its locally
+     * defined C++ symbols. 
+     */
+    kext_object = get_object_for_file(context, file, size, name);
+    require_action(kext_object, finish, rval=KERN_FAILURE);
 
-        rval = kxld_kext_export_symbol_file(context->kext, &symbol_file,
-            &symbol_file_size);
-        require_noerr(rval, finish);
-    }
-#endif /* !KERNEL */
+    rval = kxld_kext_init(context->kext, kext_object, /* interface */ NULL);
+    require_noerr(rval, finish);
 
-    /* Commit output to return variables */
+    rval = kxld_kext_export_symbols(context->kext,
+        /* defined_symbols */ NULL, /* obsolete_symbols */ NULL, 
+        &context->defined_cxx_symbols_by_value);
+    require_noerr(rval, finish);
 
-    if (_linked_object) {
-        *_linked_object = linked_object;
-        linked_object = NULL;
-        linked_object_alloc = NULL;
-    }
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
 
-    if (_link_state) {
-        *_link_state = link_state;
-        *_link_state_size = link_state_size;
-        link_state = NULL;
-    }
+/*******************************************************************************
+*******************************************************************************/
+static KXLDObject *
+get_object_for_file(KXLDContext *context, u_char *file, u_long size,
+    const char *name)
+{
+    KXLDObject *rval = NULL;
+    KXLDObject *object = NULL;
+    kern_return_t result = 0;
+    u_int i = 0;
 
-#if !KERNEL
-    if (_symbol_file) {
-        *_symbol_file = symbol_file;
-        *_symbol_file_size = symbol_file_size;
-        symbol_file = NULL;
-    }
-#endif
+    for (i = 0; i < context->objects.nitems; ++i) {
+        object = kxld_array_get_item(&context->objects, i);
 
-    rval = KERN_SUCCESS;
+        if (!kxld_object_get_file(object)) {
+            result = kxld_object_init_from_macho(object, file, size, name,
+                context->section_order, context->cputype, context->cpusubtype);
+            require_noerr(result, finish);
 
-finish:
+            rval = object;
+            break;
+        }
 
-    if (linked_object_alloc) kxld_page_free_untracked(linked_object_alloc, vmsize);
-    if (link_state) kxld_page_free_untracked(link_state, link_state_size);
-    if (symbol_file) kxld_page_free_untracked(symbol_file, symbol_file_size);
+        if (kxld_object_get_file(object) == file) {
+            rval = object;
+            break;
+        }
+    }
 
-    clear_context(context);
+finish:
+    return rval;
+}
+ 
+/*******************************************************************************
+*******************************************************************************/
+static u_char *
+allocate_kext(KXLDContext *context, void *callback_data,
+    kxld_addr_t *vmaddr_out, u_long *vmsize_out, 
+    u_char **linked_object_alloc_out)
+{
+    KXLDAllocateFlags   flags                   = 0;
+    kxld_addr_t         vmaddr                  = 0;
+    u_long              vmsize                  = 0;
+    u_long              header_size             = 0;
+    u_char            * linked_object           = NULL;
+
+    *linked_object_alloc_out = NULL;
+
+    kxld_kext_get_vmsize(context->kext, &header_size, &vmsize);
+    vmaddr = context->allocate_callback(vmsize, &flags, callback_data);
+    require_action(!(vmaddr & (PAGE_SIZE-1)), finish,
+        kxld_log(kKxldLogLinking, kKxldLogErr,
+            "Load address %p is not page-aligned.",
+            (void *) (uintptr_t) vmaddr));
+
+    if (flags & kKxldAllocateWritable) {
+        linked_object = (u_char *) (u_long) vmaddr;
+    } else {
+        linked_object = kxld_page_alloc_untracked(vmsize);
+        require(linked_object, finish);
+
+        *linked_object_alloc_out = linked_object;
+    }
 
-    kxld_set_logging_callback_data(NULL, NULL);
+    /* Zero out the memory before we fill it.  We fill this buffer in a
+     * sparse fashion, and it's simpler to clear it now rather than
+     * track and zero any pieces we didn't touch after we've written
+     * all of the sections to memory.
+     */
+    bzero(linked_object, vmsize);
+    *vmaddr_out = vmaddr;
+    *vmsize_out = vmsize;
 
-    return rval;
+finish:
+    return linked_object;
 }
 
 /*******************************************************************************
@@ -450,21 +498,29 @@ finish:
 static void
 clear_context(KXLDContext *context)
 {
-    KXLDState *state = NULL;
+    KXLDObject * object = NULL;
+    KXLDKext   * dep     = NULL;
     u_int i = 0;
 
     check(context);
 
     kxld_kext_clear(context->kext);
-    for (i = 0; i < context->deps.nitems; ++i) {
-        state = kxld_array_get_item(&context->deps, i);
-        kxld_state_clear(state);
+    
+    for (i = 0; i < context->objects.nitems; ++i) {
+        object = kxld_array_get_item(&context->objects, i);
+        kxld_object_clear(object);
+    }
+    kxld_array_reset(&context->objects);
+
+    for (i = 0; i < context->dependencies.nitems; ++i) {
+        dep = kxld_array_get_item(&context->dependencies, i);
+        kxld_kext_clear(dep);
     }
-    kxld_array_reset(&context->deps);
+    kxld_array_reset(&context->dependencies);
 
-    kxld_array_clear(&context->tmps);
-    kxld_dict_clear(&context->defined_symbols);
-    kxld_dict_clear(&context->obsolete_symbols);
-    kxld_dict_clear(&context->vtables);
+    kxld_dict_clear(&context->defined_symbols_by_name);
+    kxld_dict_clear(&context->defined_cxx_symbols_by_value);
+    kxld_dict_clear(&context->obsolete_symbols_by_name);
+    kxld_dict_clear(&context->vtables_by_name);
 }
 
diff --git a/libkern/kxld/kxld_array.c b/libkern/kxld/kxld_array.c
index 9720f3d08..55d009ba4 100644
--- a/libkern/kxld/kxld_array.c
+++ b/libkern/kxld/kxld_array.c
@@ -139,6 +139,9 @@ array_init(KXLDArray *array, size_t itemsize, u_int nitems)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDArrayPool *pool = NULL;
+    
+    require_action(itemsize, finish, rval=KERN_INVALID_ARGUMENT);
+    require_action(array->npools < 2, finish, rval=KERN_INVALID_ARGUMENT);
  
     array->itemsize = itemsize;
 
diff --git a/libkern/kxld/kxld_copyright.c b/libkern/kxld/kxld_copyright.c
index 9b70348e8..e1f13c257 100644
--- a/libkern/kxld/kxld_copyright.c
+++ b/libkern/kxld/kxld_copyright.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 #include <string.h>
 #include <sys/types.h>
 #include <AssertMacros.h>
@@ -186,7 +214,7 @@ dates_are_valid(const char *str, const u_long len)
         if (is_token_break(token_ptr)) {
             if (!token_index) continue;
 
-            token_buffer[token_index++] = '\0';
+            token_buffer[token_index] = '\0';
 
             if (!token_is_year(token_buffer) && 
                 !token_is_yearRange(token_buffer)) 
@@ -230,7 +258,7 @@ kxld_validate_copyright_string(const char *str)
     str = copyright + const_strlen(kCopyrightToken);
 
     len = rights - str;
-    date_str = kxld_alloc(len);
+    date_str = kxld_alloc(len+1);
     if (!date_str) goto finish;
 
     strncpy(date_str, str, len);
@@ -240,7 +268,7 @@ kxld_validate_copyright_string(const char *str)
 
     result = TRUE;
 finish:
-    if (date_str) kxld_free(date_str, len);
+    if (date_str) kxld_free(date_str, len+1);
     return result;
 }
 
diff --git a/libkern/kxld/kxld_demangle.c b/libkern/kxld/kxld_demangle.c
index 98ca4d55a..c0bb5e276 100644
--- a/libkern/kxld/kxld_demangle.c
+++ b/libkern/kxld/kxld_demangle.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 #if !KERNEL
 
 #include <stdlib.h>
diff --git a/libkern/kxld/kxld_demangle.h b/libkern/kxld/kxld_demangle.h
index 1fee33193..5c38abc8f 100644
--- a/libkern/kxld/kxld_demangle.h
+++ b/libkern/kxld/kxld_demangle.h
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 #ifndef _KXLD_DEMANGLE_H_
 #define _KXLD_DEMANGLE_H_
 
diff --git a/libkern/kxld/kxld_kext.c b/libkern/kxld/kxld_kext.c
index a5520711e..b2be1535a 100644
--- a/libkern/kxld/kxld_kext.c
+++ b/libkern/kxld/kxld_kext.c
@@ -54,10 +54,10 @@
 #include "kxld_demangle.h"
 #include "kxld_dict.h"
 #include "kxld_kext.h"
+#include "kxld_object.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_seg.h"
-#include "kxld_state.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
 #include "kxld_uuid.h"
@@ -65,125 +65,44 @@
 
 struct symtab_command;
 
-enum kxld_link_type {
-    KXLD_LINK_KERNEL,
-    KXLD_LINK_PSEUDO_KEXT,
-    KXLD_LINK_KEXT,
-    KXLD_LINK_UNKNOWN
-};
-
-typedef enum kxld_link_type KXLDLinkType;
-
 struct kxld_kext {
-    u_char *file;
-    u_long size;
-    const char *name;
-    uint32_t filetype;
-    KXLDArray segs;
-    KXLDArray sects;
+    KXLDObject *kext;
+    KXLDObject *interface;
     KXLDArray vtables;
-    KXLDArray extrelocs;
-    KXLDArray locrelocs;
     KXLDDict vtable_index;
-    KXLDRelocator relocator;
-    KXLDuuid uuid;
-    KXLDSymtab *symtab;
-    kxld_addr_t link_addr;
-    kmod_info_t *kmod_info;
-    kxld_addr_t kmod_link_addr;
-    cpu_type_t cputype;
-    cpu_subtype_t cpusubtype;
-    KXLDLinkType link_type;
-    KXLDFlags flags;
-    boolean_t is_final_image;
-    boolean_t got_is_created;
-    struct dysymtab_command *dysymtab_hdr;
-#if KXLD_USER_OR_OBJECT
-    KXLDArray *section_order;
-#endif
-#if !KERNEL
-    enum NXByteOrder host_order;
-    enum NXByteOrder target_order;
-#endif
+    boolean_t vtables_created;
+    boolean_t vtable_index_created;
 };
 
 /*******************************************************************************
 * Prototypes
 *******************************************************************************/
 
-static kern_return_t get_target_machine_info(KXLDKext *kext, cpu_type_t cputype, 
-    cpu_subtype_t cpusubtype);
-static kern_return_t get_file_for_arch(KXLDKext *kext, u_char *file, u_long size);
-
-static u_long get_macho_header_size(const KXLDKext *kext);
-static u_long get_macho_data_size(const KXLDKext *kext);
-static kern_return_t export_macho_header(const KXLDKext *kext, u_char *buf, 
-    u_int ncmds, u_long *header_offset, u_long header_size);
-
-static kern_return_t init_from_execute(KXLDKext *kext);
-static kern_return_t init_from_final_linked_image(KXLDKext *kext, u_int *filetype_out,
-    struct symtab_command **symtab_hdr_out);
-
-static boolean_t target_supports_protected_segments(const KXLDKext *kext)
-    __attribute__((pure));
-
-#if KXLD_USER_OR_OBJECT
-static boolean_t target_supports_object(const KXLDKext *kext) __attribute((pure));
-static kern_return_t init_from_object(KXLDKext *kext);
-static kern_return_t process_relocs_from_sections(KXLDKext *kext);
-#endif /* KXLD_USER_OR_OBJECT */
-
-#if KXLD_USER_OR_BUNDLE
-static boolean_t target_supports_bundle(const KXLDKext *kext) __attribute((pure));
-static kern_return_t init_from_bundle(KXLDKext *kext);
-static kern_return_t process_relocs_from_tables(KXLDKext *kext);
-static kern_return_t process_symbol_pointers(KXLDKext *kext);
-static void add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit);
-#endif /* KXLD_USER_OR_BUNDLE */
-
-static kern_return_t get_metaclass_symbol_from_super_meta_class_pointer_symbol(
-    KXLDKext *kext, KXLDSym *super_metaclass_pointer_sym, KXLDSym **meta_class);
-
-static kern_return_t resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols);
+static kern_return_t export_symbols_through_interface(
+    const KXLDObject *kext, const KXLDObject *interface, 
+    KXLDDict *defined_symbols_by_name, 
+    KXLDDict *defined_cxx_symbol_by_value,
+    KXLDDict *obsolete_symbols_by_name);
+static kern_return_t export_symbols(const KXLDObject *kext,
+    KXLDDict *defined_symbols_by_name, 
+    KXLDDict *defined_cxx_symbols_by_value);
+
+static kern_return_t create_vtables(KXLDKext *kext,
+    const KXLDDict *defined_symbols, const KXLDDict *defined_cxx_symbols);
+static kern_return_t get_vtable_syms_from_smcp(KXLDKext *kext, 
+    const KXLDDict *defined_symbols, KXLDSym *super_metaclass_ptr_sym, 
+    KXLDSym **vtable_sym_out, KXLDSym **meta_vtable_sym_out);
+
+static kern_return_t resolve_symbols(KXLDKext *kext, 
+    const KXLDDict *defined_symbols, const KXLDDict *obsolete_symbols);
+
 static kern_return_t patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables,
-    KXLDDict *defined_symbols);
+    const KXLDDict *defined_symbols);
+static const KXLDSym *get_metaclass_symbol_from_super_meta_class_pointer_symbol(
+    KXLDKext *kext, KXLDSym *super_metaclass_pointer_sym);
+static kern_return_t create_vtable_index(KXLDKext *kext);
+
 static kern_return_t validate_symbols(KXLDKext *kext);
-static kern_return_t populate_kmod_info(KXLDKext *kext);
-static kern_return_t copy_vtables(KXLDKext *kext, const KXLDDict *patched_vtables);
-static kern_return_t create_vtables(KXLDKext *kext);
-static void restrict_private_symbols(KXLDKext *kext);
-
-#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON
-static kern_return_t add_section(KXLDKext *kext, KXLDSect **sect);
-#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */
-
-#if KXLD_USER_OR_GOT
-static boolean_t target_has_got(const KXLDKext *kext) __attribute__((pure));
-static kern_return_t create_got(KXLDKext *kext);
-static kern_return_t populate_got(KXLDKext *kext);
-#endif /* KXLD_USER_OR_GOT */
-
-static boolean_t target_supports_common(const KXLDKext *kext) __attribute((pure));
-#if KXLD_USER_OR_COMMON
-static kern_return_t resolve_common_symbols(KXLDKext *kext);
-#endif /* KXLD_USER_OR_COMMON */
-
-static boolean_t target_supports_strict_patching(KXLDKext *kext)
-    __attribute__((pure));
-
-#if KXLD_USER_OR_ILP32
-static u_long get_macho_cmd_data_32(u_char *file, u_long offset, 
-    u_int *filetype, u_int *ncmds);
-static kern_return_t export_macho_header_32(const KXLDKext *kext, u_char *buf, 
-    u_int ncmds, u_long *header_offset, u_long header_size);
-#endif /* KXLD_USER_OR_ILP32 */
-#if KXLD_USER_OR_LP64
-static u_long get_macho_cmd_data_64(u_char *file, u_long offset,
-    u_int *filetype, u_int *ncmds);
-static kern_return_t export_macho_header_64(const KXLDKext *kext, u_char *buf, 
-    u_int ncmds, u_long *header_offset, u_long header_size);
-#endif /* KXLD_USER_OR_LP64 */
 
 /*******************************************************************************
 *******************************************************************************/
@@ -196,109 +115,93 @@ kxld_kext_sizeof(void)
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-kxld_kext_init(KXLDKext *kext, u_char *file, u_long size,
-    const char *name, KXLDFlags flags, boolean_t is_kernel,
-    KXLDArray *section_order __unused, 
-    cpu_type_t cputype, cpu_subtype_t cpusubtype)
+kxld_kext_init(KXLDKext *kext, KXLDObject *kext_object, 
+    KXLDObject *interface_object)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    u_int i = 0;
 
     check(kext);
-    check(file);
-    check(size);
+    check(kext_object);
 
-    kext->name = name;
-    kext->flags = flags;
-#if KXLD_USER_OR_OBJECT
-    kext->section_order = section_order;
-#endif
+    kext->kext = kext_object;
 
-    /* Find the local architecture */
+    if (interface_object) {
+        kext->interface = interface_object;
 
-    rval = get_target_machine_info(kext, cputype, cpusubtype);
-    require_noerr(rval, finish);
-
-    /* Find the Mach-O file for the target architecture */
+        rval = kxld_object_index_symbols_by_name(kext->kext);
+        require_noerr(rval, finish);
+    }
+    
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
 
-    rval = get_file_for_arch(kext, file, size);
-    require_noerr(rval, finish);
+/*******************************************************************************
+*******************************************************************************/
+void
+kxld_kext_clear(KXLDKext *kext)
+{
+    KXLDVTable *vtable = NULL;
+    u_int i;
 
-    /* Build the relocator */
+    check(kext);
 
-    rval = kxld_relocator_init(&kext->relocator, kext->cputype, 
-        kext->cpusubtype, kxld_kext_target_needs_swap(kext));
-    require_noerr(rval, finish);
+    for (i = 0; i < kext->vtables.nitems; ++i) {
+        vtable = kxld_array_get_item(&kext->vtables, i);
+        kxld_vtable_clear(vtable);
+    }
+    kxld_array_reset(&kext->vtables);
+    kxld_dict_clear(&kext->vtable_index);
 
-    /* Allocate the symbol table */
+    kext->kext = NULL;
+    kext->interface = NULL;
+    kext->vtables_created = FALSE;
+    kext->vtable_index_created = FALSE;
+}
 
-    if (!kext->symtab) {
-        kext->symtab = kxld_alloc(kxld_symtab_sizeof());
-        require_action(kext->symtab, finish, rval=KERN_RESOURCE_SHORTAGE);
-        bzero(kext->symtab, kxld_symtab_sizeof());
-    }
 
-    if (is_kernel) {
-        kext->link_type = KXLD_LINK_KERNEL;
-    } else {
-        kext->link_type = KXLD_LINK_UNKNOWN;
-    }
+/*******************************************************************************
+*******************************************************************************/
+void 
+kxld_kext_deinit(KXLDKext *kext)
+{
+    KXLDVTable *vtable = NULL;
+    u_int i;
 
-    /* There are four types of Mach-O files that we can support:
-     *   1) 32-bit MH_OBJECT      - All pre-SnowLeopard systems
-     *   2) 32-bit MH_KEXT_BUNDLE - Not supported
-     *   3) 64-bit MH_OBJECT      - Needed for K64 bringup
-     *   4) 64-bit MH_KEXT_BUNDLE - The likely 64-bit kext filetype
-     */
+    check(kext);
 
-    if (kxld_kext_is_32_bit(kext)) {
-        struct mach_header *mach_hdr = (struct mach_header *) kext->file;
-        kext->filetype = mach_hdr->filetype;
-    } else {
-        struct mach_header_64 *mach_hdr = (struct mach_header_64 *) kext->file;
-        kext->filetype = mach_hdr->filetype;
+    for (i = 0; i < kext->vtables.maxitems; ++i) {
+        vtable = kxld_array_get_slot(&kext->vtables, i);
+        kxld_vtable_deinit(vtable);
     }
+    kxld_array_deinit(&kext->vtables);
+    kxld_dict_deinit(&kext->vtable_index);
 
-    switch (kext->filetype) {
-#if KXLD_USER_OR_OBJECT
-    case MH_OBJECT:
-        rval = init_from_object(kext);
-        require_noerr(rval, finish);
-        break;
-#endif /* KXLD_USER_OR_OBJECT */
-#if KXLD_USER_OR_BUNDLE
-    case MH_KEXT_BUNDLE:
-        rval = init_from_bundle(kext);
-        require_noerr(rval, finish);
-        break;
-#endif /* KXLD_USER_OR_BUNDLE */
-    case MH_EXECUTE:
-        rval = init_from_execute(kext);
-        require_noerr(rval, finish);
-        break;
-    default:
-        rval = KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr,
-            kKxldLogFiletypeNotSupported, kext->filetype);
-        goto finish;
-    }
+    bzero(kext, sizeof(*kext));
+}
 
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        kxld_seg_set_vm_protections(seg, target_supports_protected_segments(kext));
-    }
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_kext_export_symbols(const KXLDKext *kext, 
+    struct kxld_dict *defined_symbols_by_name,
+    struct kxld_dict *obsolete_symbols_by_name,
+    struct kxld_dict *defined_cxx_symbols_by_value)
+{
+    kern_return_t rval = KERN_FAILURE;
+
+    check(kext);
 
-    switch (kext->link_type) {
-    case KXLD_LINK_KEXT:
-        (void) restrict_private_symbols(kext);
-        /* Fallthrough */
-    case KXLD_LINK_KERNEL:
-        rval = create_vtables(kext);
+    if (kext->interface) {
+        rval = export_symbols_through_interface(kext->kext, kext->interface, 
+            defined_symbols_by_name, obsolete_symbols_by_name,
+            defined_cxx_symbols_by_value);
+        require_noerr(rval, finish);
+    } else {
+        rval = export_symbols(kext->kext, defined_symbols_by_name,
+            defined_cxx_symbols_by_value);
         require_noerr(rval, finish);
-        break;
-    default:
-        break;
     }
 
     rval = KERN_SUCCESS;
@@ -309,189 +212,114 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-get_target_machine_info(KXLDKext *kext, cpu_type_t cputype __unused, 
-    cpu_subtype_t cpusubtype __unused)
+export_symbols_through_interface(const KXLDObject *kext,
+    const KXLDObject *interface, KXLDDict *defined_symbols_by_name,
+    KXLDDict *obsolete_symbols_by_name, KXLDDict *defined_cxx_symbols_by_value)
 {
-#if KERNEL
-
-    /* Because the kernel can only link for its own architecture, we know what
-     * the host and target architectures are at compile time, so we can use
-     * a vastly simplified version of this function.
-     */ 
-
-    check(kext);
-
-#if defined(__i386__)
-    kext->cputype = CPU_TYPE_I386;
-    kext->cpusubtype = CPU_SUBTYPE_I386_ALL;
-    return KERN_SUCCESS;
-#elif defined(__ppc__)
-    kext->cputype = CPU_TYPE_POWERPC;
-    kext->cpusubtype = CPU_SUBTYPE_POWERPC_ALL;
-    return KERN_SUCCESS;
-#elif defined(__x86_64__)
-    kext->cputype = CPU_TYPE_X86_64;
-    kext->cpusubtype = CPU_SUBTYPE_X86_64_ALL;
-    return KERN_SUCCESS;
-#else 
-    kxld_log(kKxldLogLinking, kKxldLogErr, 
-        kKxldLogArchNotSupported, _mh_execute_header->cputype);
-    return KERN_NOT_SUPPORTED;
-#endif /* Supported architecture defines */
-
-
-#else /* !KERNEL */
-
-    /* User-space must look up the architecture it's running on and the target
-     * architecture at run-time.
-     */
-
     kern_return_t rval = KERN_FAILURE;
-    const NXArchInfo *host_arch = NULL;
+    KXLDSymtabIterator iter;
+    const KXLDSymtab *kext_symtab = NULL;
+    const KXLDSymtab *interface_symtab = NULL;
+    KXLDSym *kext_sym = NULL;
+    const KXLDSym *interface_sym = NULL;
 
     check(kext);
+    check(interface);
+
+    kext_symtab = kxld_object_get_symtab(kext);
+    interface_symtab = kxld_object_get_symtab(interface);
+
+    if (defined_symbols_by_name) {
+        /* Add exported symbols */
+        (void) kxld_symtab_iterator_init(&iter, interface_symtab, 
+            kxld_sym_is_undefined, FALSE);
+        while ((interface_sym = kxld_symtab_iterator_get_next(&iter))) {
+            kext_sym = kxld_symtab_get_locally_defined_symbol_by_name(kext_symtab, 
+                interface_sym->name);
+            if (!kext_sym) {
+                kxld_log(kKxldLogLinking, kKxldLogWarn,
+                    "In interface %s of %s, couldn't find symbol %s\n", 
+                    kxld_object_get_name(interface), kxld_object_get_name(kext),
+                    interface_sym->name);
+                continue;
+            }
 
-    host_arch = NXGetLocalArchInfo();
-    require_action(host_arch, finish, rval=KERN_FAILURE);
-
-    kext->host_order = host_arch->byteorder;
+            rval = kxld_dict_insert(defined_symbols_by_name, 
+                kext_sym->name, kext_sym);
+            require_noerr(rval, finish);
+        }
 
-    /* If the user did not specify a cputype, use the local architecture.
-     */
+        /* Add indirect symbols */
+        (void) kxld_symtab_iterator_init(&iter, interface_symtab, 
+            kxld_sym_is_indirect, FALSE);
+        while ((interface_sym = kxld_symtab_iterator_get_next(&iter))) {
+            kext_sym = kxld_symtab_get_locally_defined_symbol_by_name(kext_symtab, 
+                interface_sym->alias);
+            if (!kext_sym) {
+                kxld_log(kKxldLogLinking, kKxldLogWarn,
+                    "In interface %s of %s, couldn't find indirect symbol %s (%s)\n", 
+                    kxld_object_get_name(interface), kxld_object_get_name(kext),
+                    interface_sym->alias, interface_sym->name);
+                continue;
+            }
 
-    if (cputype) {
-        kext->cputype = cputype;
-        kext->cpusubtype = cpusubtype;
-    } else {
-        kext->cputype = host_arch->cputype;
-        kext->target_order = kext->host_order;
-
-        switch (kext->cputype) {
-        case CPU_TYPE_I386:
-            kext->cpusubtype = CPU_SUBTYPE_I386_ALL;
-            break;
-        case CPU_TYPE_POWERPC:
-            kext->cpusubtype = CPU_SUBTYPE_POWERPC_ALL;
-            break;
-        case CPU_TYPE_X86_64:
-            kext->cpusubtype = CPU_SUBTYPE_X86_64_ALL;
-            break;
-        case CPU_TYPE_ARM:
-            kext->cpusubtype = CPU_SUBTYPE_ARM_ALL;
-            break;
-        default:
-            kext->cpusubtype = 0;
+            rval = kxld_dict_insert(defined_symbols_by_name, 
+                interface_sym->name, kext_sym);
+            require_noerr(rval, finish);
         }
     }
 
-    /* Validate that we support the target architecture and record its 
-     * endianness.
-     */
+    /* Add obsolete symbols */
+    if (obsolete_symbols_by_name) {
+        (void) kxld_symtab_iterator_init(&iter, interface_symtab, 
+            kxld_sym_is_obsolete, FALSE);
+        while ((kext_sym = kxld_symtab_iterator_get_next(&iter))) {
+            rval = kxld_dict_insert(obsolete_symbols_by_name, 
+                kext_sym->name, kext_sym);
+            require_noerr(rval, finish);
+        }
+    }
 
-    switch(kext->cputype) {
-    case CPU_TYPE_ARM:
-    case CPU_TYPE_I386:
-    case CPU_TYPE_X86_64:
-        kext->target_order = NX_LittleEndian;
-        break;
-    case CPU_TYPE_POWERPC:
-        kext->target_order = NX_BigEndian;
-        break;
-    default:
-        rval = KERN_NOT_SUPPORTED;
-        kxld_log(kKxldLogLinking, kKxldLogErr, 
-            kKxldLogArchNotSupported, kext->cputype);
-        goto finish;
+    /* Add C++ symbols */
+    if (defined_cxx_symbols_by_value) {
+        (void) kxld_symtab_iterator_init(&iter, kext_symtab,
+            kxld_sym_is_cxx, FALSE);
+        while ((kext_sym = kxld_symtab_iterator_get_next(&iter))) {
+            rval = kxld_dict_insert(defined_cxx_symbols_by_value,
+                &kext_sym->link_addr, kext_sym);
+            require_noerr(rval, finish);
+        }
     }
 
     rval = KERN_SUCCESS;
-
 finish:
     return rval;
-#endif /* KERNEL */
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static kern_return_t
-get_file_for_arch(KXLDKext *kext, u_char *file, u_long size)
+kern_return_t
+export_symbols(const KXLDObject *kext, KXLDDict *defined_symbols_by_name,
+    KXLDDict *defined_cxx_symbols_by_value)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct mach_header *mach_hdr = NULL;
-#if !KERNEL
-    struct fat_header *fat = (struct fat_header *) file;
-    struct fat_arch *archs = (struct fat_arch *) &fat[1];
-    boolean_t swap = FALSE;
-#endif /* KERNEL */
-
-    check(kext);
-    check(file);
-    check(size);
-
-    kext->file = file;
-    kext->size = size;
-
-    /* We are assuming that we will never receive a fat file in the kernel */
-
-#if !KERNEL
-    require_action(size >= sizeof(*fat), finish, 
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
-
-    /* The fat header is always big endian, so swap if necessary */
-    if (fat->magic == FAT_CIGAM) {
-        (void) swap_fat_header(fat, kext->host_order);
-        swap = TRUE;
-    }
-
-    if (fat->magic == FAT_MAGIC) {
-        struct fat_arch *arch = NULL;
-
-        require_action(size >= (sizeof(*fat) + (fat->nfat_arch * sizeof(*archs))),
-            finish, 
-            rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
+    KXLDSymtabIterator iter;
+    KXLDSym *sym = NULL;
 
-        /* Swap the fat_arch structures if necessary */
-        if (swap) {
-            (void) swap_fat_arch(archs, fat->nfat_arch, kext->host_order);
+    (void) kxld_symtab_iterator_init(&iter, kxld_object_get_symtab(kext), 
+        kxld_sym_is_exported, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        if (defined_symbols_by_name) {
+            rval = kxld_dict_insert(defined_symbols_by_name, sym->name, sym);
+            require_noerr(rval, finish);
         }
 
-        /* Locate the Mach-O for the requested architecture */
-
-        arch = NXFindBestFatArch(kext->cputype, kext->cpusubtype, archs, 
-            fat->nfat_arch);
-        require_action(arch, finish, rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogArchNotFound));
-        require_action(size >= arch->offset + arch->size, finish, 
-            rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
-
-        kext->file = file + arch->offset;
-        kext->size = arch->size;
-    }
-#endif /* !KERNEL */
-
-    /* Swap the Mach-O's headers to this architecture if necessary */
-    if (kxld_kext_is_32_bit(kext)) {
-        rval = validate_and_swap_macho_32(kext->file, kext->size
-#if !KERNEL
-            , kext->host_order
-#endif /* !KERNEL */
-            );
-    } else {
-        rval = validate_and_swap_macho_64(kext->file, kext->size
-#if !KERNEL
-            , kext->host_order
-#endif /* !KERNEL */
-            );
+        if (kxld_sym_is_cxx(sym) && defined_cxx_symbols_by_value) {
+            rval = kxld_dict_insert(defined_cxx_symbols_by_value,
+                &sym->link_addr, sym);
+            require_noerr(rval, finish);
+        }
     }
-    require_noerr(rval, finish);
-
-    mach_hdr = (struct mach_header *) kext->file;
-    require_action(kext->cputype == mach_hdr->cputype, finish,
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
 
     rval = KERN_SUCCESS;
 finish:
@@ -500,395 +328,204 @@ finish:
 
 /*******************************************************************************
 *******************************************************************************/
-boolean_t
-kxld_kext_is_32_bit(const KXLDKext *kext)
+kern_return_t 
+kxld_kext_export_vtables(KXLDKext *kext, const KXLDDict *defined_cxx_symbols,
+    const KXLDDict *defined_symbols, KXLDDict *vtables)
 {
+    kern_return_t rval = KERN_FAILURE;
+    KXLDVTable *vtable = NULL;
+    u_int i = 0;
+
     check(kext);
+    check(defined_symbols);
+    check(defined_cxx_symbols);
+    check(vtables);
 
-    return kxld_is_32_bit(kext->cputype);
-}
+    rval = create_vtables(kext, defined_cxx_symbols, defined_symbols);
+    require_noerr(rval, finish);
 
-/*******************************************************************************
-*******************************************************************************/
-void
-kxld_kext_get_cputype(const KXLDKext *kext, cpu_type_t *cputype,
-    cpu_subtype_t *cpusubtype)
-{
-    check(kext);
-    check(cputype);
-    check(cpusubtype);
+    for (i = 0; i < kext->vtables.nitems; ++i) {
+        vtable = kxld_array_get_item(&kext->vtables, i);
 
-    *cputype = kext->cputype;
-    *cpusubtype = kext->cpusubtype;
+        rval = kxld_dict_insert(vtables, vtable->name, vtable);
+        require_noerr(rval, finish);
+    }
+    
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-kern_return_t
-kxld_kext_validate_cputype(const KXLDKext *kext, cpu_type_t cputype,
-    cpu_subtype_t cpusubtype __unused)
+void 
+kxld_kext_get_vmsize(const KXLDKext *kext, 
+    u_long *header_size, u_long *vmsize)
 {
-    if (kext->cputype != cputype) return KERN_FAILURE;
-    return KERN_SUCCESS;
+    (void) kxld_object_get_vmsize(kext->kext, header_size, vmsize);
 }
-
+    
 /*******************************************************************************
 *******************************************************************************/
-static boolean_t
-target_supports_protected_segments(const KXLDKext *kext)
+kern_return_t 
+kxld_kext_export_linked_object(const KXLDKext *kext, 
+    u_char *linked_object, kxld_addr_t *kmod_info)
 {
-    return (kext->is_final_image && 
-            kext->cputype == CPU_TYPE_X86_64);
-}
+    kern_return_t rval = KERN_FAILURE;
+    const KXLDSym *kmodsym = NULL;
 
-#if KXLD_USER_OR_OBJECT
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t target_supports_object(const KXLDKext *kext)
-{
-    return (kext->cputype == CPU_TYPE_POWERPC ||
-            kext->cputype == CPU_TYPE_I386    ||
-            kext->cputype == CPU_TYPE_ARM);
+    kmodsym = kxld_symtab_get_locally_defined_symbol_by_name(
+        kxld_object_get_symtab(kext->kext), KXLD_KMOD_INFO_SYMBOL);
+    require_action(kmodsym, finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogNoKmodInfo));
+ 
+    *kmod_info = kmodsym->link_addr;
+
+    rval = kxld_object_export_linked_object(kext->kext, linked_object);
+finish:
+    return rval;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static kern_return_t 
-init_from_object(KXLDKext *kext)
+kern_return_t
+kxld_kext_relocate(KXLDKext *kext, kxld_addr_t link_address,
+    KXLDDict *patched_vtables, const KXLDDict *defined_symbols, 
+    const KXLDDict *obsolete_symbols, const KXLDDict *defined_cxx_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
-    struct load_command *cmd_hdr = NULL;
-    struct symtab_command *symtab_hdr = NULL;
-    struct uuid_command *uuid_hdr = NULL;
-    KXLDSect *sect = NULL;
-    u_long offset = 0;
-    u_long sect_offset = 0;
-    u_int filetype = 0;
-    u_int ncmds = 0;
-    u_int nsects = 0;
-    u_int i = 0;
-    boolean_t has_segment = FALSE;
 
     check(kext);
-
-    require_action(target_supports_object(kext),
-        finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr,
-            kKxldLogFiletypeNotSupported, MH_OBJECT));
-
-    KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), offset,
-        get_macho_cmd_data_32, get_macho_cmd_data_64,
-        kext->file, offset, &filetype, &ncmds);
-
-    require_action(filetype == MH_OBJECT, finish, rval=KERN_FAILURE);
-
-    /* MH_OBJECTs use one unnamed segment to contain all of the sections.  We
-     * loop over all of the load commands to initialize the structures we
-     * expect.  Then, we'll use the unnamed segment to get to all of the
-     * sections, and then use those sections to create the actual segments.
+    check(patched_vtables);
+    check(defined_symbols);
+    check(obsolete_symbols);
+
+    /* Kexts that are being relocated need symbols indexed by value for vtable
+     * creation and patching. Note that we don't need to index by value for
+     * dependencies that have already been linked because their symbols are
+     * already in the global cxx value table. It's important to index the
+     * symbols by value before we relocate the symbols because the vtable
+     * entries will still have unrelocated values.
      */
+    rval = kxld_object_index_cxx_symbols_by_value(kext->kext);
+    require_noerr(rval, finish);
 
-    for (; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (kext->file + offset);
-
-        switch(cmd_hdr->cmd) {
-#if KXLD_USER_OR_ILP32
-        case LC_SEGMENT:
-            {
-                struct segment_command *seg_hdr = 
-                    (struct segment_command *) cmd_hdr;
-
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
-
-                require_action(kxld_kext_is_32_bit(kext), finish, rval=KERN_FAILURE;
-                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                        "LC_SEGMENT in 64-bit kext."));
-                require_action(!has_segment, finish, rval=KERN_FAILURE;
-                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                        "Multiple segments in an MH_OBJECT kext."));
-
-                nsects = seg_hdr->nsects;
-                sect_offset = offset + sizeof(*seg_hdr);
-                has_segment = TRUE;
-            }
-            break;
-#endif /* KXLD_USER_OR_ILP32 */
-#if KXLD_USER_OR_LP64
-        case LC_SEGMENT_64:
-            {
-                struct segment_command_64 *seg_hdr =
-                    (struct segment_command_64 *) cmd_hdr;
-
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
-
-                require_action(!kxld_kext_is_32_bit(kext), finish, rval=KERN_FAILURE;
-                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                        "LC_SEGMENT_64 in a 32-bit kext."));
-                require_action(!has_segment, finish, rval=KERN_FAILURE;
-                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                        "Multiple segments in an MH_OBJECT kext."));
-
-                nsects = seg_hdr->nsects;
-                sect_offset = offset + sizeof(*seg_hdr);
-                has_segment = TRUE;
-            }
-            break;
-#endif /* KXLD_USER_OR_LP64 */
-        case LC_SYMTAB:
-            symtab_hdr = (struct symtab_command *) cmd_hdr;
-
-            KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval,
-                kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
-                kext->symtab, kext->file, symtab_hdr, 0);
-            require_noerr(rval, finish);
-            break;
-        case LC_UUID:
-            uuid_hdr = (struct uuid_command *) cmd_hdr;
-            kxld_uuid_init_from_macho(&kext->uuid, uuid_hdr);
-            break;
-        case LC_UNIXTHREAD:
-            /* Don't need to do anything with UNIXTHREAD */
-            break;
-        default:
-            rval = KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                "Invalid segment type in MH_OBJECT kext: %u.", cmd_hdr->cmd);
-            goto finish;
-        }
-    }
-
-    if (has_segment) {
-
-        /* Get the number of sections from the segment and build the section index */
-
-        rval = kxld_array_init(&kext->sects, sizeof(KXLDSect), nsects);
-        require_noerr(rval, finish);
-
-        /* Loop over all of the sections to initialize the section index */
-
-        for (i = 0; i < nsects; ++i) {
-            sect = kxld_array_get_item(&kext->sects, i);
-            KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval,
-                kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64,
-                sect, kext->file, &sect_offset, i, &kext->relocator); 
-            require_noerr(rval, finish);
-        }
-
-        /* Create special sections */
-
-#if KXLD_USER_OR_GOT
-        rval = create_got(kext);
-        require_noerr(rval, finish);
-#endif /* KXLD_USER_OR_GOT */
+    rval = kxld_object_index_symbols_by_name(kext->kext);
+    require_noerr(rval, finish);
 
-#if KXLD_USER_OR_COMMON
-        rval = resolve_common_symbols(kext);
-        require_noerr(rval, finish);
-#endif /* KXLD_USER_OR_COMMON */
+    rval = kxld_object_relocate(kext->kext, link_address);
+    require_noerr(rval, finish);
 
-        /* Create the segments from the section index */
+    rval = resolve_symbols(kext, defined_symbols, obsolete_symbols);
+    require_noerr(rval, finish);
 
-        rval = kxld_seg_create_seg_from_sections(&kext->segs, &kext->sects);
-        require_noerr(rval, finish);
+    rval = create_vtables(kext, defined_cxx_symbols, /* defined_symbols */ NULL);
+    require_noerr(rval, finish);
 
-        rval = kxld_seg_finalize_object_segment(&kext->segs, 
-            kext->section_order, get_macho_header_size(kext));
-        require_noerr(rval, finish);
+    rval = patch_vtables(kext, patched_vtables, defined_symbols);
+    require_noerr(rval, finish);
+    
+    rval = validate_symbols(kext);
+    require_noerr(rval, finish);
 
-        kext->link_type = KXLD_LINK_KEXT;
-    } else {
-        kext->link_type = KXLD_LINK_PSEUDO_KEXT;
-    }
+    rval = kxld_object_process_relocations(kext->kext, patched_vtables);
+    require_noerr(rval, finish);
 
     rval = KERN_SUCCESS;
 finish:
     return rval;
 }
-#endif /* KXLD_USER_OR_OBJECT */
 
 /*******************************************************************************
+* The defined symbols argument is optional.  When supplied, create_vtables()
+* will look for vtable symbols in the defined_symbols dictionary.  Otherwise,
+* it will look in the kext's symbol table for vtable symbols.
+*
+* We do this because there are two types of KXLDKext objects that call
+* create_vtables(), those that have been linked, and those that haven't.  The
+* linked kexts export their symbols into the global symbol table that is used
+* for symbol resolution, so we can look there for vtable symbols without
+* having to index their local symbol table separately.
+* 
+* Unlinked kexts haven't yet had their symbols exported into the global table,
+* so we have to index their local symbol table separately.
 *******************************************************************************/
-static kern_return_t
-init_from_final_linked_image(KXLDKext *kext, u_int *filetype_out,
-    struct symtab_command **symtab_hdr_out)
+static kern_return_t 
+create_vtables(KXLDKext *kext, const KXLDDict *defined_cxx_symbols,
+    const KXLDDict *defined_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    KXLDSect *sect = NULL;
-    struct load_command *cmd_hdr = NULL;
-    struct symtab_command *symtab_hdr = NULL;
-    struct uuid_command *uuid_hdr = NULL;
-    u_long base_offset = 0;
-    u_long offset = 0;
-    u_long sect_offset = 0;
-    u_int filetype = 0;
+    const KXLDSymtab *symtab = NULL;
+    KXLDSymtabIterator iter;
+    KXLDSym *sym = NULL;
+    KXLDSym *vtable_sym = NULL;
+    KXLDSym *meta_vtable_sym = NULL;
+    KXLDVTable *vtable = NULL;
+    KXLDVTable *meta_vtable = NULL;
     u_int i = 0;
-    u_int j = 0;
-    u_int segi = 0;
-    u_int secti = 0;
-    u_int nsegs = 0;
-    u_int nsects = 0;
-    u_int ncmds = 0;
-
-    KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), base_offset,
-        get_macho_cmd_data_32, get_macho_cmd_data_64,
-        kext->file, offset, &filetype, &ncmds);
-
-    /* First pass to count segments and sections */
-
-    offset = base_offset;
-    for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (kext->file + offset);
-
-        switch(cmd_hdr->cmd) {
-#if KXLD_USER_OR_ILP32
-        case LC_SEGMENT:
-            {
-                struct segment_command *seg_hdr = 
-                    (struct segment_command *) cmd_hdr;
-
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
-
-                ++nsegs;
-                nsects += seg_hdr->nsects;
-            }
-            break;
-#endif /* KXLD_USER_OR_ILP32 */
-#if KXLD_USER_OR_LP64
-        case LC_SEGMENT_64:
-            {
-                struct segment_command_64 *seg_hdr = 
-                    (struct segment_command_64 *) cmd_hdr;
-
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
-
-                ++nsegs;
-                nsects += seg_hdr->nsects;
-            }
-            break;
-#endif /* KXLD_USER_OR_LP64 */
-        default:
-            continue;
-        }
-    }
-
-    /* Allocate the segments and sections */
-
-    if (nsegs) {
-        rval = kxld_array_init(&kext->segs, sizeof(KXLDSeg), nsegs);
-        require_noerr(rval, finish);
+    u_int nvtables = 0;
 
-        rval = kxld_array_init(&kext->sects, sizeof(KXLDSect), nsects);
-        require_noerr(rval, finish);
+    if (kext->vtables_created) {
+        rval = KERN_SUCCESS;
+        goto finish;
     }
 
-    /* Initialize the segments and sections */
+    symtab = kxld_object_get_symtab(kext->kext);
 
-    offset = base_offset;
-    for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
-        cmd_hdr = (struct load_command *) (kext->file + offset); 
-        seg = NULL;
+    if (kxld_object_is_linked(kext->kext)) {
+        /* Create a vtable object for every vtable symbol */
+        kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_vtable, FALSE);
+        nvtables = kxld_symtab_iterator_get_num_remaining(&iter);
+    } else {
+        /* We walk over the super metaclass pointer symbols because classes
+         * with them are the only ones that need patching.  Then we double the
+         * number of vtables we're expecting, because every pointer will have a
+         * class vtable and a MetaClass vtable.
+         */
+        kxld_symtab_iterator_init(&iter, symtab, 
+            kxld_sym_is_super_metaclass_pointer, FALSE);
+        nvtables = kxld_symtab_iterator_get_num_remaining(&iter) * 2;
+    }
 
-        switch(cmd_hdr->cmd) {
-#if KXLD_USER_OR_ILP32
-        case LC_SEGMENT:
-            {
-                struct segment_command *seg_hdr =
-                    (struct segment_command *) cmd_hdr;
+    rval = kxld_array_init(&kext->vtables, sizeof(KXLDVTable), nvtables);
+    require_noerr(rval, finish);
 
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        if (kxld_object_is_linked(kext->kext)) {
+            vtable_sym = sym;
+            meta_vtable_sym = NULL;
+            meta_vtable = NULL;
+        } else {
+            rval = get_vtable_syms_from_smcp(kext, defined_symbols, sym,
+                &vtable_sym, &meta_vtable_sym);
+            require_noerr(rval, finish);
+        }
 
-                seg = kxld_array_get_item(&kext->segs, segi++);
+        vtable = kxld_array_get_item(&kext->vtables, i++);
+        rval = kxld_vtable_init(vtable, vtable_sym, kext->kext, 
+            defined_cxx_symbols);
+        require_noerr(rval, finish);
 
-                rval = kxld_seg_init_from_macho_32(seg, seg_hdr);
+        /* meta_vtable_sym will be null when we don't support strict
+         * patching and can't find the metaclass vtable. If that's the
+         * case, we just reduce the expect number of vtables by 1.
+         */
+        if (!kxld_object_is_linked(kext->kext)) {
+            if (meta_vtable_sym) {
+                meta_vtable = kxld_array_get_item(&kext->vtables, i++);
+                rval = kxld_vtable_init(meta_vtable, meta_vtable_sym, 
+                    kext->kext, defined_cxx_symbols);
                 require_noerr(rval, finish);
-
-                sect_offset = offset + sizeof(*seg_hdr);
+            } else {
+                kxld_array_resize(&kext->vtables, --nvtables);
+                meta_vtable = NULL;
             }
-            break;
-#endif /* KXLD_USER_OR_ILP32 */
-#if KXLD_USER_OR_LP64
-        case LC_SEGMENT_64:
-            {
-                struct segment_command_64 *seg_hdr = 
-                    (struct segment_command_64 *) cmd_hdr;
-
-                /* Ignore segments with no vm size */
-                if (!seg_hdr->vmsize) continue;
+        }
+    }
+    require_action(i == kext->vtables.nitems, finish, 
+        rval=KERN_FAILURE);
 
-                seg = kxld_array_get_item(&kext->segs, segi++);
-
-                rval = kxld_seg_init_from_macho_64(seg, seg_hdr);
-                require_noerr(rval, finish);
-
-                sect_offset = offset + sizeof(*seg_hdr);
-            }
-            break;
-#endif /* KXLD_USER_OR_LP64 */
-        case LC_SYMTAB:
-            symtab_hdr = (struct symtab_command *) cmd_hdr;
-            break;
-        case LC_UUID:
-            uuid_hdr = (struct uuid_command *) cmd_hdr;
-            kxld_uuid_init_from_macho(&kext->uuid, uuid_hdr);
-            break;
-        case LC_DYSYMTAB:
-            kext->dysymtab_hdr = (struct dysymtab_command *) cmd_hdr;            
-
-            rval = kxld_reloc_create_macho(&kext->extrelocs, &kext->relocator,
-                (struct relocation_info *) (kext->file + kext->dysymtab_hdr->extreloff), 
-                kext->dysymtab_hdr->nextrel);
-            require_noerr(rval, finish);
-
-            rval = kxld_reloc_create_macho(&kext->locrelocs, &kext->relocator,
-                (struct relocation_info *) (kext->file + kext->dysymtab_hdr->locreloff), 
-                kext->dysymtab_hdr->nlocrel);
-            require_noerr(rval, finish);
-
-            break;
-        case LC_UNIXTHREAD:
-            /* Don't need to do anything with UNIXTHREAD for the kernel */
-            require_action(kext->link_type == KXLD_LINK_KERNEL, finish, 
-                rval=KERN_FAILURE;
-                kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                    "LC_UNIXTHREAD segment is not valid in a kext."));
-            break;
-        default:
-            rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                "Invalid segment type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd);
-            goto finish;
-        }
-
-        if (seg) {
-
-            /* Initialize the sections */
-            for (j = 0; j < seg->sects.nitems; ++j, ++secti) {
-                sect = kxld_array_get_item(&kext->sects, secti);
-                KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval,
-                    kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64,
-                    sect, kext->file, &sect_offset, secti, &kext->relocator);
-                require_noerr(rval, finish);
-
-                /* Add the section to the segment.  This will also make sure
-                 * that the sections and segments have the same segname.
-                 */
-                rval = kxld_seg_add_section(seg, sect);
-                require_noerr(rval, finish);
-            }
-            rval = kxld_seg_finish_init(seg);
-            require_noerr(rval, finish);
-        }
-    }
-
-    if (filetype_out) *filetype_out = filetype;
-    if (symtab_hdr_out) *symtab_hdr_out = symtab_hdr;
-    kext->is_final_image = TRUE;
+    kext->vtables_created = TRUE;
     rval = KERN_SUCCESS;
 finish:
     return rval;
@@ -897,1433 +534,82 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-init_from_execute(KXLDKext *kext)
-{
-    kern_return_t rval = KERN_FAILURE;
-    struct symtab_command *symtab_hdr = NULL;
-    kxld_addr_t linkedit_offset = 0;
-    u_int filetype = 0;
-#if KERNEL
-    KXLDSeg *textseg = NULL;
-    KXLDSeg *linkeditseg = NULL;
-#endif /*KERNEL */
-#if KXLD_USER_OR_OBJECT
-    KXLDSeg *seg = NULL;
-    KXLDSect *sect = NULL;
-    KXLDSectionName *sname = NULL;
-    u_int i = 0, j = 0, k = 0;
-#endif /* KXLD_USER_OR_OBJECT */
-
-    check(kext);
-
-    require_action(kext->link_type == KXLD_LINK_KERNEL, finish,
-        rval=KERN_FAILURE);
-
-    rval = init_from_final_linked_image(kext, &filetype, &symtab_hdr);
-    require_noerr(rval, finish);
-
-    require_action(filetype == MH_EXECUTE, finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO 
-            "The kernel file is not of type MH_EXECUTE."));
-
-#if KERNEL
-    /* When we're in the kernel, the symbol table can no longer be found by the
-     * symtab_command alone because the command specifies offsets for the file
-     * on disk, not the file mapped into memory.  We can find the additional
-     * offset necessary by finding the difference between the linkedit segment's
-     * vm address and the text segment's vm address.
-     */
-
-    textseg = kxld_kext_get_seg_by_name(kext, SEG_TEXT);
-    require_action(textseg, finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
-
-    linkeditseg = kxld_kext_get_seg_by_name(kext, SEG_LINKEDIT);
-    require_action(linkeditseg, finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
-
-    linkedit_offset = linkeditseg->base_addr - textseg->base_addr - 
-        linkeditseg->fileoff;
-#endif /* KERNEL */
-
-    /* Initialize the symbol table */
-
-    KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval,
-        kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
-        kext->symtab, kext->file, symtab_hdr, linkedit_offset);
-    require_noerr(rval, finish);
-
-#if KXLD_USER_OR_OBJECT
-    /* Save off the order of section names so that we can lay out kext 
-     * sections for MH_OBJECT-based systems.
-     */
-    if (target_supports_object(kext)) {
-
-        rval = kxld_array_init(kext->section_order, sizeof(KXLDSectionName), 
-            kext->sects.nitems);
-        require_noerr(rval, finish);
-
-        /* Copy the section names into the section_order array for future kext
-         * section ordering.
-         */
-        for (i = 0, k = 0; i < kext->segs.nitems; ++i) {
-            seg = kxld_array_get_item(&kext->segs, i);
-
-            for (j = 0; j < seg->sects.nitems; ++j, ++k) {
-                sect = *(KXLDSect **) kxld_array_get_item(&seg->sects, j);
-                sname = kxld_array_get_item(kext->section_order, k);
-
-                strlcpy(sname->segname, sect->segname, sizeof(sname->segname));
-                strlcpy(sname->sectname, sect->sectname, sizeof(sname->sectname));
-            }
-        }
-    }
-#endif /* KXLD_USER_OR_OBJECT */
-
-    rval = KERN_SUCCESS;
-finish:
-    return rval;
-}
-
-#if KXLD_USER_OR_BUNDLE
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-target_supports_bundle(const KXLDKext *kext)
-{
-    return (kext->cputype == CPU_TYPE_X86_64);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t 
-init_from_bundle(KXLDKext *kext)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    struct symtab_command *symtab_hdr = NULL;
-    u_int filetype = 0;
-    u_int idx = 0;
-
-    check(kext);
-
-    require_action(target_supports_bundle(kext), finish,
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr,
-            kKxldLogFiletypeNotSupported, MH_KEXT_BUNDLE));
-
-    rval = init_from_final_linked_image(kext, &filetype, &symtab_hdr);
-    require_noerr(rval, finish);
-
-    require_action(filetype == MH_KEXT_BUNDLE, finish, 
-        rval=KERN_FAILURE);
-
-    KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval,
-        kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
-        kext->symtab, kext->file, symtab_hdr, /* linkedit offset */ 0);
-    require_noerr(rval, finish);
-
-    if (kext->segs.nitems) {
-        /* Remove the __LINKEDIT segment, since we never keep the symbol
-         * table around in memory for kexts.
-         */
-        seg = kxld_kext_get_seg_by_name(kext, SEG_LINKEDIT);
-        if (seg) {
-            rval = kxld_array_get_index(&kext->segs, seg, &idx);
-            require_noerr(rval, finish);
-
-            kxld_seg_deinit(seg);
-
-            rval = kxld_array_remove(&kext->segs, idx);
-            require_noerr(rval, finish);
-        }
-
-        kext->link_type = KXLD_LINK_KEXT;
-    } else {
-        kext->link_type = KXLD_LINK_PSEUDO_KEXT;
-    }
-
-    rval = KERN_SUCCESS;
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_BUNDLE */
-
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-static u_long
-get_macho_cmd_data_32(u_char *file, u_long offset, u_int *filetype, u_int *ncmds)
-{
-    struct mach_header *mach_hdr = (struct mach_header *) (file + offset);
-
-    if (filetype) *filetype = mach_hdr->filetype;
-    if (ncmds) *ncmds = mach_hdr->ncmds;
-
-    return sizeof(*mach_hdr);
-}
-
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
-/*******************************************************************************
-*******************************************************************************/
-static u_long
-get_macho_cmd_data_64(u_char *file, u_long offset, u_int *filetype,  u_int *ncmds)
-{
-    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) (file + offset);
-
-    if (filetype) *filetype = mach_hdr->filetype;
-    if (ncmds) *ncmds = mach_hdr->ncmds;
-
-    return sizeof(*mach_hdr);
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t 
-create_vtables(KXLDKext *kext)
+get_vtable_syms_from_smcp(KXLDKext *kext, const KXLDDict *defined_symbols,
+    KXLDSym *super_metaclass_ptr_sym, KXLDSym **vtable_sym_out, 
+    KXLDSym **meta_vtable_sym_out)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDSymtabIterator iter;
-    KXLDSym *sym = NULL;
+    const KXLDSymtab *symtab = NULL;
     KXLDSym *vtable_sym = NULL;
     KXLDSym *meta_vtable_sym = NULL;
-    KXLDSect *vtable_sect = NULL;
-    KXLDSect *meta_vtable_sect = NULL;
-    KXLDVTable *vtable = NULL;
-    KXLDVTable *meta_vtable = NULL;
     char class_name[KXLD_MAX_NAME_LEN];
     char vtable_name[KXLD_MAX_NAME_LEN];
     char meta_vtable_name[KXLD_MAX_NAME_LEN];
-    char *demangled_name1 = NULL;
-    char *demangled_name2 = NULL;
-    size_t demangled_length1 = 0;
-    size_t demangled_length2 = 0;
-    u_int i = 0;
-    u_int nvtables = 0;
-
-    if (kext->link_type == KXLD_LINK_KERNEL) {
-        /* Create a vtable object for every vtable symbol */
-        kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_vtable, FALSE);
-        nvtables = kxld_symtab_iterator_get_num_remaining(&iter);
-    } else {
-        /* We walk over the super metaclass pointer symbols, because classes
-         * with them are the only ones that need patching.  Then we double the
-         * number of vtables we're expecting, because every pointer will have a
-         * class vtable and a MetaClass vtable.
-         */
-        kxld_symtab_iterator_init(&iter, kext->symtab, 
-            kxld_sym_is_super_metaclass_pointer, FALSE);
-        nvtables = kxld_symtab_iterator_get_num_remaining(&iter) * 2;
-    }
-
-    /* Allocate the array of vtable objects.
-     */
-    rval = kxld_array_init(&kext->vtables, sizeof(KXLDVTable), nvtables);
-    require_noerr(rval, finish);
-
-    /* Initialize from each vtable symbol */
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-
-        if (kext->link_type == KXLD_LINK_KERNEL) {
-            vtable_sym = sym;
-        } else {
-            /* Get the class name from the smc pointer */
-            rval = kxld_sym_get_class_name_from_super_metaclass_pointer(
-                sym, class_name, sizeof(class_name));
-            require_noerr(rval, finish);
-
-            /* Get the vtable name from the class name */
-            rval = kxld_sym_get_vtable_name_from_class_name(class_name,
-                vtable_name, sizeof(vtable_name));
-            require_noerr(rval, finish);
-
-            /* Get the vtable symbol */
-            vtable_sym = kxld_symtab_get_symbol_by_name(kext->symtab, vtable_name);
-            require_action(vtable_sym, finish, rval=KERN_FAILURE;
-                kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable,
-                vtable_name, class_name));
-
-            /* Get the meta vtable name from the class name */
-            rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name,
-                meta_vtable_name, sizeof(meta_vtable_name));
-            require_noerr(rval, finish);
-
-            /* Get the meta vtable symbol */
-            meta_vtable_sym = kxld_symtab_get_symbol_by_name(kext->symtab,
-                meta_vtable_name);
-            if (!meta_vtable_sym) {
-                /* If we don't support strict patching and we can't find the vtable,
-                 * log a warning and reduce the expected number of vtables by 1.
-                 */
-                if (target_supports_strict_patching(kext)) {
-                    kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable, 
-                        meta_vtable_name, class_name);
-                    rval = KERN_FAILURE;
-                    goto finish;
-                } else {
-                    kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "Warning: " kKxldLogMissingVtable, 
-                        kxld_demangle(meta_vtable_name, &demangled_name1, 
-                            &demangled_length1), 
-                        kxld_demangle(class_name, &demangled_name2, 
-                            &demangled_length2));
-                    kxld_array_resize(&kext->vtables, --nvtables);
-                }
-            }
-        }
-
-        /* Get the vtable's section */
-        vtable_sect = kxld_array_get_item(&kext->sects, vtable_sym->sectnum);
-        require_action(vtable_sect, finish, rval=KERN_FAILURE);
-
-        vtable = kxld_array_get_item(&kext->vtables, i++);
-
-        if (kext->link_type == KXLD_LINK_KERNEL) {
-            /* Initialize the kernel vtable */
-            rval = kxld_vtable_init_from_kernel_macho(vtable, vtable_sym, 
-                vtable_sect, kext->symtab, &kext->relocator);
-            require_noerr(rval, finish);
-        } else {
-            /* Initialize the class vtable */
-            if (kext->is_final_image) {
-                rval = kxld_vtable_init_from_final_macho(vtable, vtable_sym, 
-                    vtable_sect, kext->symtab, &kext->relocator, &kext->extrelocs);
-                require_noerr(rval, finish);
-            } else {
-                rval = kxld_vtable_init_from_object_macho(vtable, vtable_sym, 
-                    vtable_sect, kext->symtab, &kext->relocator);
-                require_noerr(rval, finish);
-            }
-
-            /* meta_vtable_sym will be null when we don't support strict patching
-             * and can't find the metaclass vtable.
-             */
-            if (meta_vtable_sym) {
-                /* Get the vtable's section */
-                meta_vtable_sect = kxld_array_get_item(&kext->sects, 
-                    meta_vtable_sym->sectnum);
-                require_action(vtable_sect, finish, rval=KERN_FAILURE);
-               
-                meta_vtable = kxld_array_get_item(&kext->vtables, i++);
-                
-                /* Initialize the metaclass vtable */
-                if (kext->is_final_image) {
-                    rval = kxld_vtable_init_from_final_macho(meta_vtable, meta_vtable_sym, 
-                        meta_vtable_sect, kext->symtab, &kext->relocator, &kext->extrelocs);
-                    require_noerr(rval, finish);
-                } else {
-                    rval = kxld_vtable_init_from_object_macho(meta_vtable, meta_vtable_sym, 
-                        meta_vtable_sect, kext->symtab, &kext->relocator);
-                    require_noerr(rval, finish);
-                }
-            }
-        }
-    }
-    require_action(i == kext->vtables.nitems, finish, 
-        rval=KERN_FAILURE);
-
-    /* Map vtable names to the vtable structures */
-    rval = kxld_dict_init(&kext->vtable_index, kxld_dict_string_hash, 
-        kxld_dict_string_cmp, kext->vtables.nitems);
-    require_noerr(rval, finish);
-
-    for (i = 0; i < kext->vtables.nitems; ++i) {
-        vtable = kxld_array_get_item(&kext->vtables, i);
-        rval = kxld_dict_insert(&kext->vtable_index, vtable->name, vtable);
-        require_noerr(rval, finish);
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-
-    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
-    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
-
-    return rval;
-}
-
-/*******************************************************************************
-* Temporary workaround for PR-6668105 
-* new, new[], delete, and delete[] may be overridden globally in a kext.
-* We should do this with some sort of weak symbols, but we'll use a whitelist 
-* for now to minimize risk.  
-*******************************************************************************/
-static void
-restrict_private_symbols(KXLDKext *kext)
-{
-    const char *private_symbols[] = {
-        KXLD_KMOD_INFO_SYMBOL,
-        KXLD_OPERATOR_NEW_SYMBOL,
-        KXLD_OPERATOR_NEW_ARRAY_SYMBOL,
-        KXLD_OPERATOR_DELETE_SYMBOL,
-        KXLD_OPERATOR_DELETE_ARRAY_SYMBOL
-    };
-    KXLDSymtabIterator iter;
-    KXLDSym *sym = NULL;
-    const char *name = NULL;
-    u_int i = 0;
-
-    kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_exported, FALSE);
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-        for (i = 0; i < const_array_len(private_symbols); ++i) {
-            name = private_symbols[i];
-            if (!streq(sym->name, name)) {
-                continue;
-            }
-
-            kxld_sym_mark_private(sym);
-        }
-    }
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void
-kxld_kext_clear(KXLDKext *kext)
-{
-    KXLDSeg *seg = NULL;
-    KXLDSect *sect = NULL;
-    KXLDVTable *vtable = NULL;
-    u_int i;
-
-    check(kext);
-
-#if !KERNEL
-    if (kext->link_type == KXLD_LINK_KERNEL) {
-        unswap_macho(kext->file, kext->host_order, kext->target_order);
-    }
-#endif /* !KERNEL */
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        kxld_seg_clear(seg);
-    }
-    kxld_array_reset(&kext->segs);
-
-    for (i = 0; i < kext->sects.nitems; ++i) {
-        sect = kxld_array_get_item(&kext->sects, i);
-        kxld_sect_clear(sect);
-    }
-    kxld_array_reset(&kext->sects);
-
-    for (i = 0; i < kext->vtables.nitems; ++i) {
-        vtable = kxld_array_get_item(&kext->vtables, i);
-        kxld_vtable_clear(vtable);
-    }
-    kxld_array_reset(&kext->vtables);
-
-    kxld_array_reset(&kext->extrelocs);
-    kxld_array_reset(&kext->locrelocs);
-    kxld_dict_clear(&kext->vtable_index);
-    kxld_relocator_clear(&kext->relocator);
-    kxld_uuid_clear(&kext->uuid);
-
-    if (kext->symtab) kxld_symtab_clear(kext->symtab);
-
-    kext->link_addr = 0;
-    kext->kmod_link_addr = 0;
-    kext->cputype = 0;
-    kext->cpusubtype = 0;
-    kext->link_type = KXLD_LINK_UNKNOWN;
-    kext->is_final_image = FALSE;
-    kext->got_is_created = FALSE;
-}
-
-
-
-/*******************************************************************************
-*******************************************************************************/
-void 
-kxld_kext_deinit(KXLDKext *kext)
-{
-    KXLDSeg *seg = NULL;
-    KXLDSect *sect = NULL;
-    KXLDVTable *vtable = NULL;
-    u_int i;
-
-    check(kext);
-
-#if !KERNEL
-    if (kext->link_type == KXLD_LINK_KERNEL) {
-        unswap_macho(kext->file, kext->host_order, kext->target_order);
-    }
-#endif /* !KERNEL */
-
-    for (i = 0; i < kext->segs.maxitems; ++i) {
-        seg = kxld_array_get_slot(&kext->segs, i);
-        kxld_seg_deinit(seg);
-    }
-    kxld_array_deinit(&kext->segs);
-
-    for (i = 0; i < kext->sects.maxitems; ++i) {
-        sect = kxld_array_get_slot(&kext->sects, i);
-        kxld_sect_deinit(sect);
-    }
-    kxld_array_deinit(&kext->sects);
-
-    for (i = 0; i < kext->vtables.maxitems; ++i) {
-        vtable = kxld_array_get_slot(&kext->vtables, i);
-        kxld_vtable_deinit(vtable);
-    }
-    kxld_array_deinit(&kext->vtables);
-
-    kxld_array_deinit(&kext->extrelocs);
-    kxld_array_deinit(&kext->locrelocs);
-    kxld_dict_deinit(&kext->vtable_index);
-
-    if (kext->symtab) {
-        kxld_symtab_deinit(kext->symtab);
-        kxld_free(kext->symtab, kxld_symtab_sizeof());
-    }
-
-    bzero(kext, sizeof(*kext));
-}
-
-/*******************************************************************************
-*******************************************************************************/
-boolean_t
-kxld_kext_is_true_kext(const KXLDKext *kext)
-{
-    return (kext->link_type == KXLD_LINK_KEXT);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void
-kxld_kext_get_vmsize(const KXLDKext *kext, u_long *header_size, u_long *vmsize)
-{
-    check(kext);
-    check(header_size);
-    check(vmsize);
-    *header_size = 0;
-    *vmsize = 0;
-
-    /* vmsize is the padded header page(s) + segment vmsizes */
-
-    *header_size = (kext->is_final_image) ?
-        0 : round_page(get_macho_header_size(kext));
-    *vmsize = *header_size + get_macho_data_size(kext);
-
-}
-
-/*******************************************************************************
-*******************************************************************************/
-const struct kxld_symtab * 
-kxld_kext_get_symtab(const KXLDKext *kext)
-{
-    check(kext);
-
-    return kext->symtab;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-u_int
-kxld_kext_get_num_symbols(const KXLDKext *kext)
-{
-    check(kext);
-
-    return kxld_symtab_get_num_symbols(kext->symtab);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void 
-kxld_kext_get_vtables(KXLDKext *kext, const KXLDArray **vtables)
-{
-    check(kext);
-    check(vtables);
-
-    *vtables = &kext->vtables;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-u_int
-kxld_kext_get_num_vtables(const KXLDKext *kext)
-{
-    check(kext);
-
-    return kext->vtables.nitems;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-KXLDSeg *
-kxld_kext_get_seg_by_name(const KXLDKext *kext, const char *segname)
-{
-    KXLDSeg *seg = NULL;
-    u_int i = 0;
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-
-        if (streq(segname, seg->segname)) break;
-
-        seg = NULL;
-    }
-
-    return seg;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-KXLDSect *
-kxld_kext_get_sect_by_name(const KXLDKext *kext, const char *segname, 
-    const char *sectname)
-{
-    KXLDSect *sect = NULL;
-    u_int i = 0;
-
-    for (i = 0; i < kext->sects.nitems; ++i) {
-        sect = kxld_array_get_item(&kext->sects, i);
-
-        if (streq(segname, sect->segname) && streq(sectname, sect->sectname)) {
-            break;
-        }
-
-        sect = NULL;
-    }
-
-    return sect;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-int
-kxld_kext_get_sectnum_for_sect(const KXLDKext *kext, const KXLDSect *sect)
-{
-    kern_return_t rval = KERN_FAILURE;
-    u_int idx = -1;
-
-    rval = kxld_array_get_index(&kext->sects, sect, &idx);
-    if (rval) idx = -1;
-
-    return idx;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-const KXLDArray * 
-kxld_kext_get_section_order(const KXLDKext *kext __unused)
-{
-#if KXLD_USER_OR_OBJECT
-    if (kext->link_type == KXLD_LINK_KERNEL && target_supports_object(kext)) {
-        return kext->section_order;
-    }
-#endif /* KXLD_USER_OR_OBJECT */
-
-    return NULL;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static u_long
-get_macho_header_size(const KXLDKext *kext)
-{
-    KXLDSeg *seg = NULL;
-    u_long header_size = 0;
-    u_int i = 0;
-
-    check(kext);
-
-    /* Mach, segment, and UUID headers */
-
-    if (kxld_kext_is_32_bit(kext)) {
-        header_size += sizeof(struct mach_header);
-    } else {
-        header_size += sizeof(struct mach_header_64);
-    }
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        header_size += kxld_seg_get_macho_header_size(seg, kxld_kext_is_32_bit(kext));
-    }
-
-    if (kext->uuid.has_uuid) {
-        header_size += kxld_uuid_get_macho_header_size();
-    }
-
-    return header_size;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static u_long
-get_macho_data_size(const KXLDKext *kext)
-{
-    KXLDSeg *seg = NULL;
-    u_long data_size = 0;
-    u_int i = 0;
-
-    check(kext);
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        data_size += (u_long) kxld_seg_get_vmsize(seg);
-    }
-
-    return data_size;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t kxld_kext_export_linked_object(const KXLDKext *kext,
-    u_char *linked_object, kxld_addr_t *kmod_info_kern)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    u_long size = 0;
-    u_long header_size = 0;
-    u_long header_offset = 0;
-    u_long data_offset = 0;
-    u_int ncmds = 0;
-    u_int i = 0;
-
-    check(kext);
-    check(linked_object);
-    check(kmod_info_kern);
-    *kmod_info_kern = 0;
-
-    /* Calculate the size of the headers and data */
-
-    header_size = get_macho_header_size(kext);
-    data_offset = (kext->is_final_image) ? header_size : round_page(header_size);
-    size = data_offset + get_macho_data_size(kext);
-
-    /* Copy data to the file */
-
-    ncmds = kext->segs.nitems + (kext->uuid.has_uuid == TRUE);
-
-    rval = export_macho_header(kext, linked_object, ncmds, 
-        &header_offset, header_size);
-    require_noerr(rval, finish);
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-
-        rval = kxld_seg_export_macho_to_vm(seg, linked_object, &header_offset, 
-            header_size, size, kext->link_addr, kxld_kext_is_32_bit(kext));
-        require_noerr(rval, finish);
-    }
-
-    if (kext->uuid.has_uuid) {
-        rval = kxld_uuid_export_macho(&kext->uuid, linked_object, 
-            &header_offset, header_size);
-        require_noerr(rval, finish);
-    }
-
-    *kmod_info_kern = kext->kmod_link_addr;
-
-#if !KERNEL
-    unswap_macho(linked_object, kext->host_order, kext->target_order);
-#endif /* KERNEL */
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-#if !KERNEL
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t
-kxld_kext_export_symbol_file(const KXLDKext *kext, 
-    u_char **_symbol_file, u_long *_filesize)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    u_char *file = NULL;
-    u_long size = 0;
-    u_long header_size = 0;
-    u_long header_offset = 0;
-    u_long data_offset = 0;
-    u_int ncmds = 0;
-    u_int i = 0;
-
-    check(kext);
-    check(_symbol_file);
-    *_symbol_file = NULL;
-
-    /* Calculate the size of the file */
-
-    if (kxld_kext_is_32_bit(kext)) {
-        header_size += sizeof(struct mach_header);
-    } else {
-        header_size += sizeof(struct mach_header_64);
-    }
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        header_size += kxld_seg_get_macho_header_size(seg, kxld_kext_is_32_bit(kext));
-        size += kxld_seg_get_macho_data_size(seg);
-    }
-
-    header_size += kxld_symtab_get_macho_header_size();
-    size += kxld_symtab_get_macho_data_size(kext->symtab, FALSE, 
-        kxld_kext_is_32_bit(kext));
-
-    if (kext->uuid.has_uuid) {
-        header_size += kxld_uuid_get_macho_header_size();
-    }
-
-    data_offset = round_page(header_size);
-    size += data_offset;
-
-    /* Allocate the symbol file */
-
-    file = kxld_page_alloc_untracked(size);
-    require_action(file, finish, rval=KERN_RESOURCE_SHORTAGE);
-    bzero(file, size);
-
-    /* Copy data to the file */
-
-    ncmds = kext->segs.nitems + (kext->uuid.has_uuid == TRUE) + 1; /* +1 for symtab */
-    rval = export_macho_header(kext, file, ncmds, &header_offset, header_size);
-    require_noerr(rval, finish);
-
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        rval = kxld_seg_export_macho_to_file_buffer(seg, file, &header_offset, 
-            header_size, &data_offset, size, kxld_kext_is_32_bit(kext));
-        require_noerr(rval, finish);
-    }
-
-    rval = kxld_symtab_export_macho(kext->symtab, file, &header_offset,
-        header_size, &data_offset, size, FALSE, kxld_kext_is_32_bit(kext));
-    require_noerr(rval, finish);
-
-    if (kext->uuid.has_uuid) {
-        rval = kxld_uuid_export_macho(&kext->uuid, file, &header_offset, 
-            header_size);
-        require_noerr(rval, finish);
-    }
-
-    header_offset = header_size;
-
-    /* Commit */
-
-    unswap_macho(file, kext->host_order, kext->target_order);
-
-    *_filesize = size;
-    *_symbol_file = file;
-    file = NULL;
-    rval = KERN_SUCCESS;
-
-finish:
-
-    if (file) {
-        kxld_page_free_untracked(file, size);
-        file = NULL;
-    }
-
-    check(!file);
-    check((!rval) ^ (!*_symbol_file));
-
-    return rval;
-}
-#endif
-
-/*******************************************************************************
-*******************************************************************************/
-boolean_t 
-kxld_kext_target_needs_swap(const KXLDKext *kext __unused)
-{
-#if KERNEL
-    return FALSE;
-#else
-    return (kext->target_order != kext->host_order);
-#endif /* KERNEL */
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-export_macho_header(const KXLDKext *kext, u_char *buf, u_int ncmds,
-    u_long *header_offset, u_long header_size)
-{
-    kern_return_t rval = KERN_FAILURE;
-
-    check(kext);
-    check(buf);
-    check(header_offset);
-
-    KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, 
-        export_macho_header_32, export_macho_header_64, 
-        kext, buf, ncmds, header_offset, header_size);
-    require_noerr(rval, finish);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-export_macho_header_32(const KXLDKext *kext, u_char *buf, u_int ncmds,
-    u_long *header_offset, u_long header_size)
-{
-    kern_return_t rval = KERN_FAILURE;
-    struct mach_header *mach = NULL;
-
-    check(kext);
-    check(buf);
-    check(header_offset);
-
-    require_action(sizeof(*mach) <= header_size - *header_offset, finish,
-        rval=KERN_FAILURE);
-    mach = (struct mach_header *) (buf + *header_offset);
-
-    mach->magic = MH_MAGIC;
-    mach->cputype = kext->cputype;
-    mach->filetype = kext->filetype;
-    mach->ncmds = ncmds;
-    mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach));
-    mach->flags = MH_NOUNDEFS;
-
-    *header_offset += sizeof(*mach);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-export_macho_header_64(const KXLDKext *kext, u_char *buf, u_int ncmds,
-    u_long *header_offset, u_long header_size)
-{
-    kern_return_t rval = KERN_FAILURE;
-    struct mach_header_64 *mach = NULL;
-
-    check(kext);
-    check(buf);
-    check(header_offset);
-    
-    require_action(sizeof(*mach) <= header_size - *header_offset, finish,
-        rval=KERN_FAILURE);
-    mach = (struct mach_header_64 *) (buf + *header_offset);
-    
-    mach->magic = MH_MAGIC_64;
-    mach->cputype = kext->cputype;
-    mach->cpusubtype = kext->cpusubtype;
-    mach->filetype = kext->filetype;
-    mach->ncmds = ncmds;
-    mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach));
-    mach->flags = MH_NOUNDEFS;
-
-    *header_offset += sizeof(*mach);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_kext_resolve(KXLDKext *kext, struct kxld_dict *patched_vtables,
-    struct kxld_dict *defined_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-
-    require_action(kext->link_type == KXLD_LINK_PSEUDO_KEXT, finish,
-        rval=KERN_FAILURE);
-
-    /* Resolve symbols */
-    rval = resolve_symbols(kext, defined_symbols, NULL);
-    require_noerr(rval, finish);
-
-    /* Validate symbols */
-    rval = validate_symbols(kext);
-    require_noerr(rval, finish);
-
-    /* Pseudokexts re-export their dependencies' vtables */
-    rval = copy_vtables(kext, patched_vtables);
-    require_noerr(rval, finish);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t
-kxld_kext_relocate(KXLDKext *kext, kxld_addr_t link_address,
-    KXLDDict *patched_vtables, KXLDDict *defined_symbols, 
-    KXLDDict *obsolete_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSeg *seg = NULL;
-    u_int i = 0;
-
-    check(kext);
-    check(patched_vtables);
-    check(defined_symbols);
-
-    require_action(kext->link_type == KXLD_LINK_KEXT, finish, rval=KERN_FAILURE);
-
-    kext->link_addr = link_address;
-
-    /* Relocate segments (which relocates the sections) */
-    for (i = 0; i < kext->segs.nitems; ++i) {
-        seg = kxld_array_get_item(&kext->segs, i);
-        kxld_seg_relocate(seg, link_address);
-    }
-
-    /* Relocate symbols */
-    rval = kxld_symtab_relocate(kext->symtab, &kext->sects);
-    require_noerr(rval, finish);
- 
-    /* Populate kmod info structure */
-    rval = populate_kmod_info(kext);
-    require_noerr(rval, finish);
-   
-    /* Resolve symbols */
-    rval = resolve_symbols(kext, defined_symbols, obsolete_symbols);
-    require_noerr(rval, finish);
-  
-    /* Patch vtables */
-    rval = patch_vtables(kext, patched_vtables, defined_symbols);
-    require_noerr(rval, finish);
-    
-    /* Validate symbols */
-    rval = validate_symbols(kext);
-    require_noerr(rval, finish);
-
-    /* Process relocation entries and populate the global offset table.
-     *
-     * For final linked images: the relocation entries are contained in a couple
-     * of tables hanging off the end of the symbol table.  The GOT has its own
-     * section created by the linker; we simply need to fill it.
-     *
-     * For object files: the relocation entries are bound to each section.
-     * The GOT, if it exists for the target architecture, is created by kxld,
-     * and we must populate it according to our internal structures.
-     */
-    if (kext->is_final_image) {
-#if KXLD_USER_OR_BUNDLE
-        rval = process_symbol_pointers(kext);
-        require_noerr(rval, finish);
-
-        rval = process_relocs_from_tables(kext);
-        require_noerr(rval, finish);
-#else
-        require_action(FALSE, finish, rval=KERN_FAILURE);
-#endif /* KXLD_USER_OR_BUNDLE */
-    } else {
-#if KXLD_USER_OR_GOT
-        /* Populate GOT */
-        rval = populate_got(kext);
-        require_noerr(rval, finish);
-#endif /* KXLD_USER_OR_GOT */
-#if KXLD_USER_OR_OBJECT
-        rval = process_relocs_from_sections(kext);
-        require_noerr(rval, finish);
-#else
-        require_action(FALSE, finish, rval=KERN_FAILURE);
-#endif /* KXLD_USER_OR_OBJECT */
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols, 
-    KXLDDict *obsolete_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymtabIterator iter;
-    KXLDSym *sym = NULL;
-    void *addrp = NULL;
-    kxld_addr_t addr = 0;
-    const char *name = NULL;
-    boolean_t tests_for_weak = FALSE;
-    boolean_t error = FALSE;
-    boolean_t warning = FALSE;
-    char *demangled_name = NULL;
-    size_t demangled_length = 0;
-
-    check(kext);
-    check(defined_symbols);
-
-    /* Check if the kext tests for weak symbols */
-    sym = kxld_symtab_get_symbol_by_name(kext->symtab, KXLD_WEAK_TEST_SYMBOL);
-    tests_for_weak = (sym != NULL);
-
-    /* Check for duplicate symbols */
-    kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_exported, FALSE);
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-        addrp = kxld_dict_find(defined_symbols, sym->name);
-        if (addrp) { 
-            /* Convert to a kxld_addr_t */
-            if (kxld_kext_is_32_bit(kext)) {
-                addr = (kxld_addr_t) (*(uint32_t*)addrp);
-            } else {
-                addr = (kxld_addr_t) (*(uint64_t*)addrp);
-            }
-
-            /* Not a problem if the symbols have the same address */
-            if (addr == sym->link_addr) {
-                continue;
-            }
-
-            if (!error) {
-                error = TRUE;
-                kxld_log(kKxldLogLinking, kKxldLogErr,
-                    "The following symbols were defined more than once:");
-            }
-
-            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s: %p - %p", 
-                kxld_demangle(sym->name, &demangled_name, &demangled_length),
-                (void *) (uintptr_t) sym->link_addr, 
-                (void *) (uintptr_t) addr);
-        }
-    }
-    require_noerr_action(error, finish, rval=KERN_FAILURE);
-
-    /* Resolve undefined and indirect symbols */
-
-    /* Iterate over all unresolved symbols */
-    kxld_symtab_iterator_init(&iter, kext->symtab, 
-        kxld_sym_is_unresolved, FALSE);
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-
-        /* Common symbols are not supported */
-        if (kxld_sym_is_common(sym)) {
-
-            if (!error) {
-                error = TRUE;
-                if (target_supports_common(kext)) {
-                    kxld_log(kKxldLogLinking, kKxldLogErr, 
-                        "The following common symbols were not resolved:");
-                } else {
-                    kxld_log(kKxldLogLinking, kKxldLogErr, 
-                        "Common symbols are not supported in kernel extensions. " 
-                         "Use -fno-common to build your kext. "
-                         "The following are common symbols:");
-                }
-            }
-            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
-                kxld_demangle(sym->name, &demangled_name, &demangled_length));
-
-        } else {
-
-            /* Find the address of the defined symbol */
-            if (kxld_sym_is_undefined(sym)) {
-                name = sym->name;
-            } else {
-                name = sym->alias;
-            }
-            addrp = kxld_dict_find(defined_symbols, name);
-            
-            /* Resolve the symbol.  If a definition cannot be found, then:
-             * 1) Psuedokexts log a warning and proceed
-             * 2) Actual kexts delay the error until validation in case vtable
-             *    patching replaces the undefined symbol.
-             */
-
-            if (addrp) {
-
-                /* Convert to a kxld_addr_t */
-                if (kxld_kext_is_32_bit(kext)) {
-                    addr = (kxld_addr_t) (*(uint32_t*)addrp);
-                } else {
-                    addr = (kxld_addr_t) (*(uint64_t*)addrp);
-                }
-
-                boolean_t is_exported = (kext->link_type == KXLD_LINK_PSEUDO_KEXT);
-
-                rval = kxld_sym_resolve(sym, addr, is_exported);
-                require_noerr(rval, finish);
-
-                if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) {
-                    kxld_log(kKxldLogLinking, kKxldLogWarn, 
-                        "This kext uses obsolete symbol %s.", 
-                        kxld_demangle(name, &demangled_name, &demangled_length));
-                }
-
-            } else if (kext->link_type == KXLD_LINK_PSEUDO_KEXT) {
-                /* Pseudokexts ignore undefined symbols, because any actual
-                 * kexts that need those symbols will fail to link anyway, so
-                 * there's no need to block well-behaved kexts.
-                 */
-                if (!warning) {
-                    kxld_log(kKxldLogLinking, kKxldLogWarn, 
-                        "This symbol set has the following unresolved symbols:");
-                    warning = TRUE;
-                }
-                kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
-                    kxld_demangle(sym->name, &demangled_name, &demangled_length));
-                kxld_sym_delete(sym);
-
-            } else if (kxld_sym_is_weak(sym)) {
-                /* Make sure that the kext has referenced gOSKextUnresolved.
-                 */
-                require_action(tests_for_weak, finish, 
-                   rval=KERN_FAILURE;
-                   kxld_log(kKxldLogLinking, kKxldLogErr, 
-                      "This kext has weak references but does not test for "
-                      "them. Test for weak references with "
-                      "OSKextIsSymbolResolved()."));
-
-#if KERNEL
-                /* Get the address of the default weak address.
-                 */
-                addr = (kxld_addr_t) &kext_weak_symbol_referenced;
-#else  
-                /* This is run during symbol generation only, so we only 
-                 * need a filler value here.
-                 */
-                addr = kext->link_addr;
-#endif /* KERNEL */
-
-                rval = kxld_sym_resolve(sym, addr, /* exported */ FALSE);
-                require_noerr(rval, finish);
-            }
-        }
-    }
-    require_noerr_action(error, finish, rval=KERN_FAILURE);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    if (demangled_name) kxld_free(demangled_name, demangled_length);
-
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-target_supports_strict_patching(KXLDKext *kext)
-{
-    check(kext);
-
-    return (kext->cputype != CPU_TYPE_I386 && 
-            kext->cputype != CPU_TYPE_POWERPC);
-}
-
-/*******************************************************************************
-* We must patch vtables to ensure binary compatibility, and to perform that
-* patching, we have to determine the vtables' inheritance relationships.  The
-* MetaClass system gives us a way to do that:
-*   1) Iterate over all of the super MetaClass pointer symbols.  Every class
-*      that inherits from OSObject will have a pointer in its MetaClass that
-*      points to the MetaClass's super MetaClass.
-*   2) Derive the name of the class from the super MetaClass pointer.
-*   3) Derive the name of the class's vtable from the name of the class
-*   4) Follow the super MetaClass pointer to get the address of the super
-*      MetaClass's symbol
-*   5) Look up the super MetaClass symbol by address
-*   6) Derive the super class's name from the super MetaClass name
-*   7) Derive the super class's vtable from the super class's name
-* This procedure will allow us to find all of the OSObject-derived classes and
-* their super classes, and thus patch all of the vtables.
-*
-* We also have to take care to patch up the MetaClass's vtables.  The
-* MetaClasses follow a parallel hierarchy to the classes, so once we have the
-* class name and super class name, we can also derive the MetaClass name and
-* the super MetaClass name, and thus find and patch their vtables as well.
-*******************************************************************************/
-
-#define kOSMetaClassVTableName "__ZTV11OSMetaClass"
-
-static kern_return_t
-patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables, 
-    KXLDDict *defined_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymtabIterator iter;
-    KXLDSym *metaclass = NULL;
-    KXLDSym *super_metaclass_pointer = NULL;
-    KXLDSym *final_sym = NULL;
-    KXLDVTable *vtable = NULL;
-    KXLDVTable *super_vtable = NULL;
-    char class_name[KXLD_MAX_NAME_LEN];
-    char super_class_name[KXLD_MAX_NAME_LEN];
-    char vtable_name[KXLD_MAX_NAME_LEN];
-    char super_vtable_name[KXLD_MAX_NAME_LEN];
-    char final_sym_name[KXLD_MAX_NAME_LEN];
-    char *demangled_name1 = NULL;
-    char *demangled_name2 = NULL;
-    size_t demangled_length1 = 0;;
-    size_t demangled_length2 = 0;
-    size_t len = 0;
-    u_int nvtables = 0;
-    u_int npatched = 0;
-    u_int nprogress = 0;
-    boolean_t failure = FALSE;
-
-    check(kext);
-    check(patched_vtables);
-
-    /* Find each super meta class pointer symbol */
-
-    kxld_symtab_iterator_init(&iter, kext->symtab, 
-        kxld_sym_is_super_metaclass_pointer, FALSE);
-    nvtables = kxld_symtab_iterator_get_num_remaining(&iter);
-
-    while (npatched < nvtables) {
-        npatched = 0;
-        nprogress = 0;
-        kxld_symtab_iterator_reset(&iter);
-        while((super_metaclass_pointer = kxld_symtab_iterator_get_next(&iter))) 
-        {
-            /* Get the class name from the smc pointer */
-            rval = kxld_sym_get_class_name_from_super_metaclass_pointer(
-                super_metaclass_pointer, class_name, sizeof(class_name));
-            require_noerr(rval, finish);
-
-            /* Get the vtable name from the class name */
-            rval = kxld_sym_get_vtable_name_from_class_name(class_name,
-                vtable_name, sizeof(vtable_name));
-            require_noerr(rval, finish);
-
-            /* Get the vtable and make sure it hasn't been patched */
-            vtable = kxld_dict_find(&kext->vtable_index, vtable_name);
-            require_action(vtable, finish, rval=KERN_FAILURE;
-                kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable,
-                    vtable_name, class_name));
-
-            if (!vtable->is_patched) {
-
-                /* Find the SMCP's meta class symbol */
-                rval = get_metaclass_symbol_from_super_meta_class_pointer_symbol(
-                    kext, super_metaclass_pointer, &metaclass);
-                require_noerr(rval, finish);
-
-                /* Get the super class name from the super metaclass */
-                rval = kxld_sym_get_class_name_from_metaclass(metaclass,
-                    super_class_name, sizeof(super_class_name));
-                require_noerr(rval, finish);
-
-                /* Get the super vtable name from the class name */
-                rval = kxld_sym_get_vtable_name_from_class_name(super_class_name,
-                    super_vtable_name, sizeof(super_vtable_name));
-                require_noerr(rval, finish);
-
-                if (failure) {
-                    kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "\t'%s' (super vtable '%s')", 
-                        kxld_demangle(vtable_name, &demangled_name1, 
-                            &demangled_length1), 
-                        kxld_demangle(super_vtable_name, &demangled_name2, 
-                            &demangled_length2));
-                    continue;
-                }
-
-                /* Get the super vtable if it's been patched */
-                super_vtable = kxld_dict_find(patched_vtables, super_vtable_name);
-                if (!super_vtable) continue;
-
-                /* Get the final symbol's name from the super vtable */
-                rval = kxld_sym_get_final_sym_name_from_class_name(super_class_name, 
-                    final_sym_name, sizeof(final_sym_name));
-                require_noerr(rval, finish);
-
-                /* Verify that the final symbol does not exist.  First check
-                 * all the externally defined symbols, then check locally.
-                 */
-                final_sym = kxld_dict_find(defined_symbols, final_sym_name);
-                if (!final_sym) {
-                    final_sym = kxld_symtab_get_symbol_by_name(kext->symtab, 
-                        final_sym_name);
-                }
-                require_action(!final_sym, finish, 
-                    rval=KERN_FAILURE;
-                    kxld_log(kKxldLogPatching, kKxldLogErr, 
-                        "Class '%s' is a subclass of final class '%s'.",
-                        kxld_demangle(class_name, &demangled_name1, 
-                            &demangled_length1), 
-                        kxld_demangle(super_class_name, &demangled_name2, 
-                            &demangled_length2)));
-
-                /* Patch the class's vtable */
-                rval = kxld_vtable_patch(vtable, super_vtable, kext->symtab,
-                    target_supports_strict_patching(kext));
-                require_noerr(rval, finish);
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;
+    size_t demangled_length2 = 0;
 
-                /* Add the class's vtable to the set of patched vtables */
-                rval = kxld_dict_insert(patched_vtables, vtable->name, vtable);
-                require_noerr(rval, finish);
+    check(kext);
+    check(vtable_sym_out);
+    check(meta_vtable_sym_out);
 
-                /* Get the meta vtable name from the class name */
-                rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name,
-                    vtable_name, sizeof(vtable_name));
-                require_noerr(rval, finish);
+    require(!kxld_object_is_kernel(kext->kext), finish);
 
-                /* Get the meta vtable.  Whether or not it should exist has already
-                 * been tested in create_vtables(), so if it doesn't exist and we're
-                 * still running, we can safely skip it.
-                 */
-                vtable = kxld_dict_find(&kext->vtable_index, vtable_name);
-                if (!vtable) {
-                    ++nprogress;
-                    ++npatched;
-                    continue;
-                }
-                require_action(!vtable->is_patched, finish, rval=KERN_FAILURE);
+    symtab = kxld_object_get_symtab(kext->kext);
 
-                /* There is no way to look up a metaclass vtable at runtime, but
-                 * we know that every class's metaclass inherits directly from 
-                 * OSMetaClass, so we just hardcode that vtable name here.
-                 */
-                len = strlcpy(super_vtable_name, kOSMetaClassVTableName,
-                    sizeof(super_vtable_name));
-                require_action(len == const_strlen(kOSMetaClassVTableName),
-                    finish, rval=KERN_FAILURE);
-                       
-                /* Get the super meta vtable */
-                super_vtable = kxld_dict_find(patched_vtables, super_vtable_name);
-                require_action(super_vtable && super_vtable->is_patched, 
-                    finish, rval=KERN_FAILURE);
+    /* Get the class name from the smc pointer */
+    rval = kxld_sym_get_class_name_from_super_metaclass_pointer(
+        super_metaclass_ptr_sym, class_name, sizeof(class_name));
+    require_noerr(rval, finish);
 
-                /* Patch the meta class's vtable */
-                rval = kxld_vtable_patch(vtable, super_vtable,
-                    kext->symtab, target_supports_strict_patching(kext));
-                require_noerr(rval, finish);
+    /* Get the vtable name from the class name */
+    rval = kxld_sym_get_vtable_name_from_class_name(class_name,
+        vtable_name, sizeof(vtable_name));
+    require_noerr(rval, finish);
 
-                /* Add the MetaClass's vtable to the set of patched vtables */
-                rval = kxld_dict_insert(patched_vtables, vtable->name, vtable);
-                require_noerr(rval, finish);
-                
-                ++nprogress;
-            }
+    /* Get the vtable symbol */
+    if (defined_symbols) {
+        vtable_sym = kxld_dict_find(defined_symbols, vtable_name);
+    } else {
+        vtable_sym = kxld_symtab_get_locally_defined_symbol_by_name(symtab, 
+            vtable_name);
+    }
+    require_action(vtable_sym, finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable,
+        vtable_name, class_name));
 
-            ++npatched;
-        }
+    /* Get the meta vtable name from the class name */
+    rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name,
+        meta_vtable_name, sizeof(meta_vtable_name));
+    require_noerr(rval, finish);
 
-        require_action(!failure, finish, rval=KERN_FAILURE);
-        if (!nprogress) {
-            failure = TRUE;
+    /* Get the meta vtable symbol */
+    if (defined_symbols) {
+        meta_vtable_sym = kxld_dict_find(defined_symbols, meta_vtable_name);
+    } else {
+        meta_vtable_sym = kxld_symtab_get_locally_defined_symbol_by_name(symtab,
+            meta_vtable_name);
+    }
+    if (!meta_vtable_sym) {
+        if (kxld_object_target_supports_strict_patching(kext->kext)) {
+            kxld_log(kKxldLogPatching, kKxldLogErr, 
+                kKxldLogMissingVtable, 
+                meta_vtable_name, class_name);
+            rval = KERN_FAILURE;
+            goto finish;
+        } else {
             kxld_log(kKxldLogPatching, kKxldLogErr, 
-                "The following vtables were unpatchable because each one's " 
-                "parent vtable either was not found or also was not patchable:");
+                "Warning: " kKxldLogMissingVtable, 
+                kxld_demangle(meta_vtable_name, &demangled_name1, 
+                    &demangled_length1), 
+                kxld_demangle(class_name, &demangled_name2, 
+                    &demangled_length2));
         }
     }
-
+    
+    *vtable_sym_out = vtable_sym;
+    *meta_vtable_sym_out = meta_vtable_sym;
     rval = KERN_SUCCESS;
 finish:
     if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
@@ -2335,570 +621,454 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-validate_symbols(KXLDKext *kext)
+resolve_symbols(KXLDKext *kext, const KXLDDict *defined_symbols, 
+    const KXLDDict *obsolete_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
+    const KXLDSymtab *symtab = NULL;
     KXLDSymtabIterator iter;
     KXLDSym *sym = NULL;
-    u_int error = FALSE;
+    KXLDSym *defined_sym = NULL;
+    const char *name = NULL;
+    boolean_t tests_for_weak = FALSE;
+    boolean_t error = FALSE;
     char *demangled_name = NULL;
     size_t demangled_length = 0;
-    
-    /* Check for any unresolved symbols */
-    kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_unresolved, FALSE);
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-        if (!error) {
-            error = TRUE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, 
-                "The following symbols are unresolved for this kext:");
-        }
-        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
-            kxld_demangle(sym->name, &demangled_name, &demangled_length));
-    }
-    require_noerr_action(error, finish, rval=KERN_FAILURE);
 
-    rval = KERN_SUCCESS;
+    check(kext->kext);
+    check(defined_symbols);
+    check(obsolete_symbols);
 
-finish:
-    if (demangled_name) kxld_free(demangled_name, demangled_length);
-    return rval;
-}
+    symtab = kxld_object_get_symtab(kext->kext);
 
-#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-add_section(KXLDKext *kext, KXLDSect **sect)
-{
-    kern_return_t rval = KERN_FAILURE;
-    u_int nsects = kext->sects.nitems;
+    /* Check if the kext tests for weak symbols */
+    sym = kxld_symtab_get_symbol_by_name(symtab, KXLD_WEAK_TEST_SYMBOL);
+    tests_for_weak = (sym != NULL);
 
-    rval = kxld_array_resize(&kext->sects, nsects + 1);
-    require_noerr(rval, finish);
+    /* Check for duplicate symbols */
+    kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        defined_sym = kxld_dict_find(defined_symbols, sym->name);
+        if (defined_sym) { 
+            /* Not a problem if the symbols have the same address */
+            if (defined_sym->link_addr == sym->link_addr) {
+                continue;
+            }
 
-    *sect = kxld_array_get_item(&kext->sects, nsects);
+            if (!error) {
+                error = TRUE;
+                kxld_log(kKxldLogLinking, kKxldLogErr,
+                    "The following symbols were defined more than once:");
+            }
 
-    rval = KERN_SUCCESS;
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s: %p - %p", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length),
+                (void *) (uintptr_t) sym->link_addr, 
+                (void *) (uintptr_t) defined_sym->link_addr);
+        }
+    }
+    require_noerr_action(error, finish, rval=KERN_FAILURE);
 
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */
+    /* Resolve undefined and indirect symbols */
 
-#if KXLD_USER_OR_GOT
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-target_has_got(const KXLDKext *kext)
-{
-    return FALSE:
-}
+    /* Iterate over all unresolved symbols */
+    kxld_symtab_iterator_init(&iter, symtab, 
+        kxld_sym_is_unresolved, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
 
-/*******************************************************************************
-* Create and initialize the Global Offset Table
-*******************************************************************************/
-static kern_return_t
-create_got(KXLDKext *kext)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSect *sect = NULL;
-    u_int ngots = 0;
-    u_int i = 0;
+        /* Common symbols are not supported */
+        if (kxld_sym_is_common(sym)) {
 
-    if (!target_has_got(kext)) {
-        rval = KERN_SUCCESS;
-        goto finish;
-    }
+            if (!error) {
+                error = TRUE;
+                if (kxld_object_target_supports_common_symbols(kext->kext)) {
+                    kxld_log(kKxldLogLinking, kKxldLogErr, 
+                        "The following common symbols were not resolved:");
+                } else {
+                    kxld_log(kKxldLogLinking, kKxldLogErr, 
+                        "Common symbols are not supported in kernel extensions. " 
+                         "Use -fno-common to build your kext. "
+                         "The following are common symbols:");
+                }
+            }
+            kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+                kxld_demangle(sym->name, &demangled_name, &demangled_length));
 
-    for (i = 0; i < kext->sects.nitems; ++i) {
-        sect = kxld_array_get_item(&kext->sects, i);
-        ngots += kxld_sect_get_ngots(sect, &kext->relocator, 
-            kext->symtab);
-    }
+        } else {
 
-    rval = add_section(kext, &sect);
-    require_noerr(rval, finish);
+            /* Find the address of the defined symbol */
+            if (kxld_sym_is_undefined(sym)) {
+                name = sym->name;
+            } else {
+                name = sym->alias;
+            }
+            defined_sym = kxld_dict_find(defined_symbols, name);
+            
+            /* Resolve the symbol.  If a definition cannot be found, then:
+             * 1) Psuedokexts log a warning and proceed
+             * 2) Actual kexts delay the error until validation in case vtable
+             *    patching replaces the undefined symbol.
+             */
 
-    rval = kxld_sect_init_got(sect, ngots);
-    require_noerr(rval, finish);
+            if (defined_sym) {
 
-    kext->got_is_created = TRUE;
-    rval = KERN_SUCCESS;
+                rval = kxld_sym_resolve(sym, defined_sym->link_addr);
+                require_noerr(rval, finish);
 
-finish:
-    return rval;
-}
+                if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) {
+                    kxld_log(kKxldLogLinking, kKxldLogWarn, 
+                        "This kext uses obsolete symbol %s.", 
+                        kxld_demangle(name, &demangled_name, &demangled_length));
+                }
 
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-populate_got(KXLDKext *kext)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSect *sect = NULL;
-    u_int i = 0;
+            } else if (kxld_sym_is_weak(sym)) {
+                kxld_addr_t addr = 0;
 
-    if (!target_has_got(kext) || !kext->got_is_created) {
-        rval = KERN_SUCCESS;
-        goto finish;
-    }
+                /* Make sure that the kext has referenced gOSKextUnresolved.
+                 */
+                require_action(tests_for_weak, finish, 
+                   rval=KERN_FAILURE;
+                   kxld_log(kKxldLogLinking, kKxldLogErr, 
+                      "This kext has weak references but does not test for "
+                      "them. Test for weak references with "
+                      "OSKextIsSymbolResolved()."));
 
-    for (i = 0; i < kext->sects.nitems; ++i) {
-        sect = kxld_array_get_item(&kext->sects, i);
-        if (streq_safe(sect->segname, KXLD_SEG_GOT, sizeof(KXLD_SEG_GOT)) &&
-            streq_safe(sect->sectname, KXLD_SECT_GOT, sizeof(KXLD_SECT_GOT)))
-        {
-            kxld_sect_populate_got(sect, kext->symtab,
-                kxld_kext_target_needs_swap(kext));
-            break;
+#if KERNEL
+                /* Get the address of the default weak address.
+                 */
+                addr = (kxld_addr_t) &kext_weak_symbol_referenced;
+#else  
+                /* This is run during symbol generation only, so we only 
+                 * need a filler value here.
+                 */
+                addr = 0xF00DD00D;
+#endif /* KERNEL */
+
+                rval = kxld_sym_resolve(sym, addr);
+                require_noerr(rval, finish);
+            }
         }
     }
-
-    require_action(i < kext->sects.nitems, finish, rval=KXLD_MISSING_GOT);
+    require_noerr_action(error, finish, rval=KERN_FAILURE);
 
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
+
     return rval;
 }
-#endif /* KXLD_USER_OR_GOT */
 
 /*******************************************************************************
+* We must patch vtables to ensure binary compatibility, and to perform that
+* patching, we have to determine the vtables' inheritance relationships.  The
+* MetaClass system gives us a way to do that:
+*   1) Iterate over all of the super MetaClass pointer symbols.  Every class
+*      that inherits from OSObject will have a pointer in its MetaClass that
+*      points to the MetaClass's super MetaClass.
+*   2) Derive the name of the class from the super MetaClass pointer.
+*   3) Derive the name of the class's vtable from the name of the class
+*   4) Follow the super MetaClass pointer to get the address of the super
+*      MetaClass's symbol
+*   5) Look up the super MetaClass symbol by address
+*   6) Derive the super class's name from the super MetaClass name
+*   7) Derive the super class's vtable from the super class's name
+* This procedure will allow us to find all of the OSObject-derived classes and
+* their super classes, and thus patch all of the vtables.
+*
+* We also have to take care to patch up the MetaClass's vtables.  The
+* MetaClasses follow a parallel hierarchy to the classes, so once we have the
+* class name and super class name, we can also derive the MetaClass name and
+* the super MetaClass name, and thus find and patch their vtables as well.
 *******************************************************************************/
-static boolean_t
-target_supports_common(const KXLDKext *kext)
-{
-    check(kext);
-    return (kext->cputype == CPU_TYPE_I386 || 
-            kext->cputype == CPU_TYPE_POWERPC);
-}
 
-#if KXLD_USER_OR_COMMON
-/*******************************************************************************
-* If there are common symbols, calculate how much space they'll need
-* and create/grow the __DATA __common section to accommodate them.
-* Then, resolve them against that section.
-*******************************************************************************/
+#define kOSMetaClassVTableName "__ZTV11OSMetaClass"
+
 static kern_return_t
-resolve_common_symbols(KXLDKext *kext)
+patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables, 
+    const KXLDDict *defined_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDSymtabIterator iter;
-    KXLDSym *sym = NULL;
-    KXLDSect *sect = NULL;
-    kxld_addr_t base_addr = 0;
-    kxld_size_t size = 0;
-    kxld_size_t total_size = 0;
-    u_int align = 0;
-    u_int max_align = 0;
-    u_int sectnum = 0;
-
-    if (!target_supports_common(kext)) {
-        rval = KERN_SUCCESS;
-        goto finish;
-    }
-
-    /* Iterate over the common symbols to calculate their total aligned size */
-    kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_common, FALSE);
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-        align = kxld_sym_get_common_align(sym);
-        size = kxld_sym_get_common_size(sym);
-
-        if (align > max_align) max_align = align;
-
-        total_size = kxld_align_address(total_size, align) + size;
-    }
+    const KXLDSymtab *symtab = NULL;
+    const KXLDSym *metaclass = NULL;
+    KXLDSym *super_metaclass_pointer = NULL;
+    KXLDSym *final_sym = NULL;
+    KXLDVTable *vtable = NULL;
+    KXLDVTable *super_vtable = NULL;
+    char class_name[KXLD_MAX_NAME_LEN];
+    char super_class_name[KXLD_MAX_NAME_LEN];
+    char vtable_name[KXLD_MAX_NAME_LEN];
+    char super_vtable_name[KXLD_MAX_NAME_LEN];
+    char final_sym_name[KXLD_MAX_NAME_LEN];
+    char *demangled_name1 = NULL;
+    char *demangled_name2 = NULL;
+    size_t demangled_length1 = 0;;
+    size_t demangled_length2 = 0;
+    size_t len = 0;
+    u_int nvtables = 0;
+    u_int npatched = 0;
+    u_int nprogress = 0;
+    boolean_t failure = FALSE;
 
-    /* If there are common symbols, grow or create the __DATA __common section
-     * to hold them.
-     */
-    if (total_size) {
-        sect = kxld_kext_get_sect_by_name(kext, SEG_DATA, SECT_COMMON);
-        if (sect) {
-            base_addr = sect->base_addr + sect->size;
+    check(kext);
+    check(patched_vtables);
 
-            kxld_sect_grow(sect, total_size, max_align);
-        } else {
-            base_addr = 0;
+    symtab = kxld_object_get_symtab(kext->kext);
 
-            rval = add_section(kext, &sect);
-            require_noerr(rval, finish);
+    rval = create_vtable_index(kext);
+    require_noerr(rval, finish);
 
-            kxld_sect_init_zerofill(sect, SEG_DATA, SECT_COMMON, 
-                total_size, max_align);
-        }
+    /* Find each super meta class pointer symbol */
 
-        /* Resolve the common symbols against the new section */
-        rval = kxld_array_get_index(&kext->sects, sect, &sectnum);
-        require_noerr(rval, finish);
+    kxld_symtab_iterator_init(&iter, symtab, 
+        kxld_sym_is_super_metaclass_pointer, FALSE);
+    nvtables = kxld_symtab_iterator_get_num_remaining(&iter);
 
+    while (npatched < nvtables) {
+        npatched = 0;
+        nprogress = 0;
         kxld_symtab_iterator_reset(&iter);
-        while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-            align = kxld_sym_get_common_align(sym);
-            size = kxld_sym_get_common_size(sym);
+        while((super_metaclass_pointer = kxld_symtab_iterator_get_next(&iter))) 
+        {
+            /* Get the class name from the smc pointer */
+            rval = kxld_sym_get_class_name_from_super_metaclass_pointer(
+                super_metaclass_pointer, class_name, sizeof(class_name));
+            require_noerr(rval, finish);
 
-            base_addr = kxld_align_address(base_addr, align);
-            kxld_sym_resolve_common(sym, sectnum, base_addr);
+            /* Get the vtable name from the class name */
+            rval = kxld_sym_get_vtable_name_from_class_name(class_name,
+                vtable_name, sizeof(vtable_name));
+            require_noerr(rval, finish);
 
-            base_addr += size;
-        }
-    }
+            /* Get the vtable and make sure it hasn't been patched */
+            vtable = kxld_dict_find(&kext->vtable_index, vtable_name);
+            require_action(vtable, finish, rval=KERN_FAILURE;
+                kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable,
+                    vtable_name, class_name));
 
-    rval = KERN_SUCCESS;
+            if (!vtable->is_patched) {
 
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_COMMON */
+                /* Find the SMCP's meta class symbol */
+                metaclass = get_metaclass_symbol_from_super_meta_class_pointer_symbol(
+                    kext, super_metaclass_pointer);
+                require_action(metaclass, finish, rval=KERN_FAILURE);
 
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-get_metaclass_symbol_from_super_meta_class_pointer_symbol(KXLDKext *kext,
-    KXLDSym *super_metaclass_pointer_sym, KXLDSym **metaclass)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSect *sect = NULL;
-    KXLDReloc *reloc = NULL;
-    uint32_t offset = 0;
-    
-    check(kext);
-    check(super_metaclass_pointer_sym);
-    check(metaclass);
-    *metaclass = NULL;
+                /* Get the super class name from the super metaclass */
+                rval = kxld_sym_get_class_name_from_metaclass(metaclass,
+                    super_class_name, sizeof(super_class_name));
+                require_noerr(rval, finish);
 
-    sect = kxld_array_get_item(&kext->sects, super_metaclass_pointer_sym->sectnum);
-    require_action(sect, finish, rval=KERN_FAILURE);
+                /* Get the super vtable name from the class name */
+                rval = kxld_sym_get_vtable_name_from_class_name(super_class_name,
+                    super_vtable_name, sizeof(super_vtable_name));
+                require_noerr(rval, finish);
 
-    /* Find the relocation entry for the super metaclass pointer and get the 
-     * symbol associated with that relocation entry 
-     */
+                /* Get the super vtable if it's been patched */
+                super_vtable = kxld_dict_find(patched_vtables, super_vtable_name);
 
-    if (kext->is_final_image) {
-        /* The relocation entry could be in either the external or local
-         * relocation entries.  kxld_reloc_get_symbol() can handle either
-         * type.
-         */
-        reloc = kxld_reloc_get_reloc_by_offset(&kext->extrelocs, 
-            super_metaclass_pointer_sym->base_addr);
-        if (!reloc) {
-            reloc = kxld_reloc_get_reloc_by_offset(&kext->locrelocs,
-                super_metaclass_pointer_sym->base_addr);
-        }
-        require_action(reloc, finish, rval=KERN_FAILURE);
+                if (failure) {
+                    const KXLDVTable *unpatched_super_vtable;
+                    unpatched_super_vtable = kxld_dict_find(&kext->vtable_index,
+                        super_vtable_name);
+
+                    /* If the parent's vtable hasn't been patched, warn that
+                     * this vtable is unpatchable because of the parent.
+                     */
+                    if (!super_vtable) {
+                        kxld_log(kKxldLogPatching, kKxldLogErr, 
+                            "The %s was not patched because its parent, "
+                            "the %s, was not %s.",
+                            kxld_demangle(vtable_name, &demangled_name1, 
+                                &demangled_length1), 
+                            kxld_demangle(super_vtable_name, &demangled_name2, 
+                                &demangled_length2),
+                            (unpatched_super_vtable) ? "patchable" : "found");
+                    }
+                    continue;
+                }
 
-        *metaclass = kxld_reloc_get_symbol(&kext->relocator, reloc, kext->file,
-            kext->symtab);
-    } else {
-        offset = kxld_sym_get_section_offset(super_metaclass_pointer_sym, sect);
+                if (!super_vtable) continue;
 
-        reloc = kxld_reloc_get_reloc_by_offset(&sect->relocs, offset);
-        require_action(reloc, finish, rval=KERN_FAILURE);
+                /* Get the final symbol's name from the super vtable */
+                rval = kxld_sym_get_final_sym_name_from_class_name(super_class_name, 
+                    final_sym_name, sizeof(final_sym_name));
+                require_noerr(rval, finish);
 
-        *metaclass = kxld_reloc_get_symbol(&kext->relocator, reloc, sect->data,
-            kext->symtab);
-    }
-    require_action(*metaclass, finish, rval=KERN_FAILURE);
+                /* Verify that the final symbol does not exist.  First check
+                 * all the externally defined symbols, then check locally.
+                 */
+                final_sym = kxld_dict_find(defined_symbols, final_sym_name);
+                if (!final_sym) {
+                    final_sym = kxld_symtab_get_locally_defined_symbol_by_name(
+                        symtab, final_sym_name);
+                }
+                if (final_sym) {
+                    kxld_log(kKxldLogPatching, kKxldLogErr, 
+                        "Class '%s' is a subclass of final class '%s'.",
+                        kxld_demangle(class_name, &demangled_name1, 
+                            &demangled_length1), 
+                        kxld_demangle(super_class_name, &demangled_name2, 
+                            &demangled_length2));
+                    continue;
+                }
 
-    rval = KERN_SUCCESS;
+                /* Patch the class's vtable */
+                rval = kxld_vtable_patch(vtable, super_vtable, kext->kext);
+                if (rval) continue;
 
-finish:
-    return rval;
-}
+                /* Add the class's vtable to the set of patched vtables */
+                rval = kxld_dict_insert(patched_vtables, vtable->name, vtable);
+                require_noerr(rval, finish);
 
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-copy_vtables(KXLDKext *kext, const KXLDDict *patched_vtables)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymtabIterator iter;
-    KXLDSym *sym = NULL;
-    KXLDVTable *vtable = NULL, *src = NULL;
-    u_int i = 0;
-    u_int nvtables = 0;
-    char class_name[KXLD_MAX_NAME_LEN];
-    char meta_vtable_name[KXLD_MAX_NAME_LEN];
+                /* Get the meta vtable name from the class name */
+                rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name,
+                    vtable_name, sizeof(vtable_name));
+                require_noerr(rval, finish);
 
-    kxld_symtab_iterator_init(&iter, kext->symtab, 
-        kxld_sym_is_class_vtable, FALSE);
-    
-    /* The iterator tracks all the class vtables, so we double the number of
-     * vtables we're expecting because we use the class vtables to find the
-     * MetaClass vtables.
-     */
-    nvtables = kxld_symtab_iterator_get_num_remaining(&iter) * 2;
-    rval = kxld_array_init(&kext->vtables, sizeof(KXLDVTable), nvtables);
-    require_noerr(rval, finish);
-    
-    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
-        src = kxld_dict_find(patched_vtables, sym->name);
-        require_action(src, finish, rval=KERN_FAILURE);
+                /* Get the meta vtable.  Whether or not it should exist has already
+                 * been tested in create_vtables(), so if it doesn't exist and we're
+                 * still running, we can safely skip it.
+                 */
+                vtable = kxld_dict_find(&kext->vtable_index, vtable_name);
+                if (!vtable) {
+                    ++nprogress;
+                    ++npatched;
+                    continue;
+                }
+                require_action(!vtable->is_patched, finish, rval=KERN_FAILURE);
 
-        vtable = kxld_array_get_item(&kext->vtables, i++);
-        rval = kxld_vtable_copy(vtable, src);
-        require_noerr(rval, finish);
+                /* There is no way to look up a metaclass vtable at runtime, but
+                 * we know that every class's metaclass inherits directly from 
+                 * OSMetaClass, so we just hardcode that vtable name here.
+                 */
+                len = strlcpy(super_vtable_name, kOSMetaClassVTableName,
+                    sizeof(super_vtable_name));
+                require_action(len == const_strlen(kOSMetaClassVTableName),
+                    finish, rval=KERN_FAILURE);
+                       
+                /* Get the super meta vtable */
+                super_vtable = kxld_dict_find(patched_vtables, super_vtable_name);
+                require_action(super_vtable && super_vtable->is_patched, 
+                    finish, rval=KERN_FAILURE);
 
-        rval = kxld_sym_get_class_name_from_vtable(sym, 
-            class_name, sizeof(class_name));
-        require_noerr(rval, finish);
+                /* Patch the meta class's vtable */
+                rval = kxld_vtable_patch(vtable, super_vtable, kext->kext);
+                require_noerr(rval, finish);
 
-        rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name,
-            meta_vtable_name, sizeof(meta_vtable_name));
-        require_noerr(rval, finish);
+                /* Add the MetaClass's vtable to the set of patched vtables */
+                rval = kxld_dict_insert(patched_vtables, vtable->name, vtable);
+                require_noerr(rval, finish);
+                
+                ++nprogress;
+            }
 
-        /* Some classes don't have a MetaClass, so when we run across one
-         * of those, we shrink the vtable array by 1.
-         */
-        src = kxld_dict_find(patched_vtables, meta_vtable_name);
-        if (src) {
-            vtable = kxld_array_get_item(&kext->vtables, i++);
-            rval = kxld_vtable_copy(vtable, src);
-            require_noerr(rval, finish);
-        } else {
-            kxld_array_resize(&kext->vtables, kext->vtables.nitems - 1);
+            ++npatched;
         }
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-#if KXLD_USER_OR_OBJECT
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-process_relocs_from_sections(KXLDKext *kext)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSect *sect = NULL;
-    u_int i = 0;
 
-    for (i = 0; i < kext->sects.nitems; ++i) {
-        sect = kxld_array_get_item(&kext->sects, i);
-        rval = kxld_sect_process_relocs(sect, &kext->relocator,
-            &kext->sects, kext->symtab);
-        require_noerr_action(rval, finish,
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidSectReloc,
-                i, sect->segname, sect->sectname));
+        require_action(!failure, finish, rval=KERN_FAILURE);
+        failure = (nprogress == 0);
     }
 
     rval = KERN_SUCCESS;
-
 finish:
+    if (demangled_name1) kxld_free(demangled_name1, demangled_length1);
+    if (demangled_name2) kxld_free(demangled_name2, demangled_length2);
+
     return rval;
 }
-#endif /* KXLD_USER_OR_OBJECT */
 
-#if KXLD_USER_OR_BUNDLE
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-process_relocs_from_tables(KXLDKext *kext)
+create_vtable_index(KXLDKext *kext)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDReloc *reloc = NULL;
-    KXLDSeg *seg = NULL;
+    KXLDVTable *vtable = NULL;
     u_int i = 0;
 
-    /* Offsets for relocations in relocation tables are based on the vm
-     * address of the first segment.
-     */
-    seg = kxld_array_get_item(&kext->segs, 0);
-
-    /* Process external relocations */
-    for (i = 0; i < kext->extrelocs.nitems; ++i) {
-        reloc = kxld_array_get_item(&kext->extrelocs, i);
-
-        rval = kxld_relocator_process_table_reloc(&kext->relocator, reloc, seg, 
-            kext->file, &kext->sects, kext->symtab);
-        require_noerr_action(rval, finish,
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidExtReloc, i));
+    if (kext->vtable_index_created) {
+        rval = KERN_SUCCESS;
+        goto finish;
     }
 
-    /* Process local relocations */
-    for (i = 0; i < kext->locrelocs.nitems; ++i) {
-        reloc = kxld_array_get_item(&kext->locrelocs, i);
+    /* Map vtable names to the vtable structures */
+    rval = kxld_dict_init(&kext->vtable_index, kxld_dict_string_hash, 
+        kxld_dict_string_cmp, kext->vtables.nitems);
+    require_noerr(rval, finish);
 
-        rval = kxld_relocator_process_table_reloc(&kext->relocator, reloc, seg, 
-            kext->file, &kext->sects, kext->symtab);
-        require_noerr_action(rval, finish,
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidIntReloc, i));
+    for (i = 0; i < kext->vtables.nitems; ++i) {
+        vtable = kxld_array_get_item(&kext->vtables, i);
+        rval = kxld_dict_insert(&kext->vtable_index, vtable->name, vtable);
+        require_noerr(rval, finish);
     }
 
+    kext->vtable_index_created = TRUE;
     rval = KERN_SUCCESS;
-
 finish:
     return rval;
 }
 
 /*******************************************************************************
 *******************************************************************************/
-static void
-add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit)
-{
-    if (is_32_bit) {
-        uint32_t *ptr = (uint32_t *) symptr;
-        *ptr += (uint32_t) val;
-    } else {
-        uint64_t *ptr = (uint64_t *) symptr;
-        *ptr += (uint64_t) val;
-    }
-}
-
-#define SECT_SYM_PTRS "__nl_symbol_ptr"
-
-/*******************************************************************************
-* Final linked images create an __nl_symbol_ptr section for the global offset
-* table and for symbol pointer lookups in general.  Rather than use relocation
-* entries, the linker creates an "indirect symbol table" which stores indexes
-* into the symbol table corresponding to the entries of this section.  This
-* function populates the section with the relocated addresses of those symbols.
-*******************************************************************************/
-static kern_return_t
-process_symbol_pointers(KXLDKext *kext)
+static const KXLDSym *
+get_metaclass_symbol_from_super_meta_class_pointer_symbol(KXLDKext *kext,
+    KXLDSym *super_metaclass_pointer_sym)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDSect *sect = NULL;
-    KXLDSym *sym = NULL;
-    int32_t *symidx = NULL;
-    u_char *symptr = NULL;
-    u_long symptrsize = 0;
-    u_int nsyms = 0;
-    u_int firstsym = 0;
-    u_int i = 0;
-
+    const KXLDReloc *reloc = NULL;
+    const KXLDSect *sect = NULL;
+    const KXLDSym *metaclass = NULL;
+    
     check(kext);
+    check(super_metaclass_pointer_sym);
 
-    require_action(kext->is_final_image && kext->dysymtab_hdr, 
-        finish, rval=KERN_FAILURE);
-
-    /* Get the __DATA,__nl_symbol_ptr section.  If it doesn't exist, we have
-     * nothing to do.
-     */
-
-    sect = kxld_kext_get_sect_by_name(kext, SEG_DATA, SECT_SYM_PTRS);
-    if (!sect) {
-        rval = KERN_SUCCESS;
-        goto finish;
-    }
-
-    require_action(sect->flags & S_NON_LAZY_SYMBOL_POINTERS,
-        finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO 
-            "Section %s,%s does not have S_NON_LAZY_SYMBOL_POINTERS flag.",
-            SEG_DATA, SECT_SYM_PTRS));
-
-    /* Calculate the table offset and number of entries in the section */
-
-    if (kxld_kext_is_32_bit(kext)) {
-        symptrsize = sizeof(uint32_t);
-    } else {
-        symptrsize = sizeof(uint64_t);
-    }
-
-    nsyms = (u_int) (sect->size / symptrsize);
-    firstsym = sect->reserved1;
-
-    require_action(firstsym + nsyms <= kext->dysymtab_hdr->nindirectsyms,
-        finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
-
-    /* Iterate through the indirect symbol table and fill in the section of
-     * symbol pointers.  There are three cases:
-     *   1) A normal symbol - put its value directly in the table
-     *   2) An INDIRECT_SYMBOL_LOCAL - symbols that are local and already have
-     *      their offset from the start of the file in the section.  Simply
-     *      add the file's link address to fill this entry.
-     *   3) An INDIRECT_SYMBOL_ABS - prepopulated absolute symbols.  No
-     *      action is required.
-     */
-
-    symidx = (int32_t *) (kext->file + kext->dysymtab_hdr->indirectsymoff);
-    symidx += firstsym;
-    symptr = sect->data;
-    for (i = 0; i < nsyms; ++i, ++symidx, symptr+=symptrsize) {
-        if (*symidx & INDIRECT_SYMBOL_LOCAL) {
-            if (*symidx & INDIRECT_SYMBOL_ABS) continue;
-
-            add_to_ptr(symptr, kext->link_addr, kxld_kext_is_32_bit(kext));
-        } else {
-            sym = kxld_symtab_get_symbol_by_index(kext->symtab, *symidx);
-            require_action(sym, finish, rval=KERN_FAILURE);
+    /* Get the relocation entry that fills in the super metaclass pointer. */
+    reloc = kxld_object_get_reloc_at_symbol(kext->kext,
+        super_metaclass_pointer_sym);
+    require_action(reloc, finish, rval=KERN_FAILURE);
 
-            add_to_ptr(symptr, sym->link_addr, kxld_kext_is_32_bit(kext));
-        }
-    }
+    /* Get the section of the super metaclass pointer. */
+    sect = kxld_object_get_section_by_index(kext->kext,
+        super_metaclass_pointer_sym->sectnum);
+    require_action(sect, finish, rval=KERN_FAILURE);
 
-    rval = KERN_SUCCESS;
+    /* Get the symbol that will be filled into the super metaclass pointer. */
+    metaclass = kxld_object_get_symbol_of_reloc(kext->kext, reloc, sect);
 finish:
-    return rval;
+    return metaclass;
 }
-#endif /* KXLD_USER_OR_BUNDLE */
 
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-populate_kmod_info(KXLDKext *kext)
+validate_symbols(KXLDKext *kext)
 {
     kern_return_t rval = KERN_FAILURE;
-    KXLDSect *kmodsect = NULL;
-    KXLDSym *kmodsym = NULL;
-    u_long kmod_offset = 0;
-    u_long header_size;
-    u_long size;
-
-    if (kext->link_type != KXLD_LINK_KEXT) {
-        rval = KERN_SUCCESS;
-        goto finish;
-    }
-
-    kxld_kext_get_vmsize(kext, &header_size, &size);
-
-    kmodsym = kxld_symtab_get_symbol_by_name(kext->symtab, KXLD_KMOD_INFO_SYMBOL);
-    require_action(kmodsym, finish, rval=KERN_FAILURE;
-        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogNoKmodInfo));
- 
-    kmodsect = kxld_array_get_item(&kext->sects, kmodsym->sectnum);
-    kmod_offset = (u_long) (kmodsym->base_addr -  kmodsect->base_addr);
-
-    kext->kmod_info = (kmod_info_t *) (kmodsect->data + kmod_offset);
-    kext->kmod_link_addr = kmodsym->link_addr;
-
-    if (kxld_kext_is_32_bit(kext)) {
-        kmod_info_32_v1_t *kmod = (kmod_info_32_v1_t *) (kext->kmod_info);
-        kmod->address = (uint32_t) kext->link_addr;
-        kmod->size = (uint32_t) size;
-        kmod->hdr_size = (uint32_t) header_size;
-
-#if !KERNEL
-        if (kxld_kext_target_needs_swap(kext)) {
-            kmod->address = OSSwapInt32(kmod->address);
-            kmod->size = OSSwapInt32(kmod->size);
-            kmod->hdr_size = OSSwapInt32(kmod->hdr_size);
-        }
-#endif /* !KERNEL */
-    } else {
-        kmod_info_64_v1_t *kmod = (kmod_info_64_v1_t *) (kext->kmod_info);
-        kmod->address = kext->link_addr;
-        kmod->size = size;
-        kmod->hdr_size = header_size;
-
-#if !KERNEL
-        if (kxld_kext_target_needs_swap(kext)) {
-            kmod->address = OSSwapInt64(kmod->address);
-            kmod->size = OSSwapInt64(kmod->size);
-            kmod->hdr_size = OSSwapInt64(kmod->hdr_size);
+    KXLDSymtabIterator iter;
+    KXLDSym *sym = NULL;
+    u_int error = FALSE;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
+    
+    /* Check for any unresolved symbols */
+    kxld_symtab_iterator_init(&iter, kxld_object_get_symtab(kext->kext), 
+        kxld_sym_is_unresolved, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        if (!error) {
+            error = TRUE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, 
+                "The following symbols are unresolved for this kext:");
         }
-#endif /* !KERNEL */
+        kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", 
+            kxld_demangle(sym->name, &demangled_name, &demangled_length));
     }
-
+    require_noerr_action(error, finish, rval=KERN_FAILURE);
 
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
     return rval;
 }
 
diff --git a/libkern/kxld/kxld_kext.h b/libkern/kxld/kxld_kext.h
index 20eeaf501..f2b80c0f6 100644
--- a/libkern/kxld/kxld_kext.h
+++ b/libkern/kxld/kxld_kext.h
@@ -39,6 +39,7 @@
 struct kxld_array;
 struct kxld_kext;
 struct kxld_dict;
+struct kxld_object;
 struct kxld_sect;
 struct kxld_seg;
 struct kxld_symtab;
@@ -50,12 +51,11 @@ typedef struct kxld_kext KXLDKext;
 *******************************************************************************/
 
 size_t kxld_kext_sizeof(void)
-    __attribute__((const, nonnull, visibility("hidden")));
+    __attribute__((const, visibility("hidden")));
 
-kern_return_t kxld_kext_init(KXLDKext *kext, u_char *file, u_long size,
-    const char *name, KXLDFlags flags, boolean_t is_kernel, KXLDArray *seg_order, 
-    cpu_type_t cputype, cpu_subtype_t cpusubtype)
-    __attribute__((nonnull(1,2,4), visibility("hidden")));
+kern_return_t kxld_kext_init(KXLDKext *kext, struct kxld_object *kext_object,
+    struct kxld_object *interface_object)
+    __attribute__((nonnull(1,2), visibility("hidden")));
 
 void kxld_kext_clear(KXLDKext *kext)
     __attribute__((nonnull, visibility("hidden")));
@@ -67,76 +67,33 @@ void kxld_kext_deinit(KXLDKext *kext)
 * Accessors
 *******************************************************************************/
 
-boolean_t kxld_kext_is_true_kext(const KXLDKext *kext) 
-    __attribute__((pure, nonnull, visibility("hidden")));
+kern_return_t kxld_kext_export_symbols(const KXLDKext *kext, 
+    struct kxld_dict *defined_symbols_by_name,
+    struct kxld_dict *obsolete_symbols_by_name,
+    struct kxld_dict *defined_cxx_symbols_by_value)
+    __attribute__((nonnull(1), visibility("hidden")));
 
-boolean_t kxld_kext_is_32_bit(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-void kxld_kext_get_cputype(const KXLDKext *kext, cpu_type_t *cputype,
-    cpu_subtype_t *cpusubtype)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_kext_validate_cputype(const KXLDKext *kext, cpu_type_t cputype,
-    cpu_subtype_t cpusubtype)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-void kxld_kext_get_vmsize(const KXLDKext *kext, u_long *header_size, 
-    u_long *vmsize)
-    __attribute__((nonnull, visibility("hidden")));
-
-const struct kxld_symtab * kxld_kext_get_symtab(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-u_int kxld_kext_get_num_symbols(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-void kxld_kext_get_vtables(KXLDKext *kext, const struct kxld_array **vtables)
+void kxld_kext_get_vmsize(const KXLDKext *kext, 
+    u_long *header_size, u_long *vmsize)
     __attribute__((nonnull, visibility("hidden")));
-
-u_int kxld_kext_get_num_vtables(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-struct kxld_seg * kxld_kext_get_seg_by_name(const KXLDKext *kext, 
-    const char *segname)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-struct kxld_sect * kxld_kext_get_sect_by_name(const KXLDKext *kext, 
-    const char *segname, const char *sectname)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-int kxld_kext_get_sectnum_for_sect(const KXLDKext *kext, 
-    const struct kxld_sect *sect)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-const struct kxld_array * kxld_kext_get_section_order(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-/* This will be the same size as kxld_kext_get_vmsize */
+    
 kern_return_t kxld_kext_export_linked_object(const KXLDKext *kext,
-    u_char *linked_object, kxld_addr_t *kmod_info_kern)
+    u_char *linked_object, kxld_addr_t *kmod_info)
     __attribute__((nonnull, visibility("hidden")));
 
-#if !KERNEL
-kern_return_t kxld_kext_export_symbol_file(const KXLDKext *kext, 
-    u_char **symbol_file, u_long *filesize)
-    __attribute__((nonnull, visibility("hidden")));
-#endif
-
-boolean_t kxld_kext_target_needs_swap(const KXLDKext *kext)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
 /*******************************************************************************
 * Modifiers
 *******************************************************************************/
-
-kern_return_t kxld_kext_resolve(KXLDKext *kext, struct kxld_dict *patched_vtables,
-    struct kxld_dict *defined_symbols)
+kern_return_t kxld_kext_export_vtables(KXLDKext *kext, 
+    const struct kxld_dict *defined_cxx_symbols, 
+    const struct kxld_dict *defined_symbols,
+    struct kxld_dict *vtables)
     __attribute__((nonnull, visibility("hidden")));
 
 kern_return_t kxld_kext_relocate(KXLDKext *kext, kxld_addr_t link_address,
-    struct kxld_dict *patched_vtables, struct kxld_dict *defined_symbols,
-    struct kxld_dict *obsolete_symbols)
+    struct kxld_dict *patched_vtables, const struct kxld_dict *defined_symbols,
+    const struct kxld_dict *obsolete_symbols, 
+    const struct kxld_dict *defined_cxx_symbols)
     __attribute__((nonnull(1,3,4), visibility("hidden")));
 
 #endif /* _KXLD_KEXT_H_ */
diff --git a/libkern/kxld/kxld_object.c b/libkern/kxld/kxld_object.c
new file mode 100644
index 000000000..24b589912
--- /dev/null
+++ b/libkern/kxld/kxld_object.c
@@ -0,0 +1,2185 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <string.h>
+
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+#include <mach-o/reloc.h>
+#include <sys/types.h>
+
+#if KERNEL
+    #include <libkern/kernel_mach_header.h>
+    #include <mach/vm_param.h>
+    #include <mach-o/fat.h>
+#else /* !KERNEL */
+    #include <architecture/byte_order.h>
+    #include <mach/mach_init.h>
+    #include <mach-o/arch.h>
+    #include <mach-o/swap.h>
+#endif /* KERNEL */
+
+#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
+#include <AssertMacros.h>
+
+#include "kxld_demangle.h"
+#include "kxld_dict.h"
+#include "kxld_reloc.h"
+#include "kxld_sect.h"
+#include "kxld_seg.h"
+#include "kxld_symtab.h"
+#include "kxld_util.h"
+#include "kxld_uuid.h"
+#include "kxld_vtable.h"
+
+#include "kxld_object.h"
+
+/*******************************************************************************
+* Data structures
+*******************************************************************************/
+
+struct kxld_object {
+    u_char *file;
+    u_long size;
+    const char *name;
+    uint32_t filetype;
+    cpu_type_t cputype;
+    cpu_subtype_t cpusubtype;
+    KXLDArray segs;
+    KXLDArray sects;
+    KXLDArray extrelocs;
+    KXLDArray locrelocs;
+    KXLDRelocator relocator;
+    KXLDuuid uuid;
+    KXLDSymtab *symtab;
+    struct dysymtab_command *dysymtab_hdr;
+    kxld_addr_t link_addr;
+    boolean_t is_kernel;
+    boolean_t is_final_image;
+    boolean_t is_linked;
+    boolean_t got_is_created;
+#if KXLD_USER_OR_OBJECT
+    KXLDArray *section_order;
+#endif
+#if !KERNEL
+    enum NXByteOrder host_order;
+    enum NXByteOrder target_order;
+#endif
+};
+
+/*******************************************************************************
+* Prototypes
+*******************************************************************************/
+
+static kern_return_t get_target_machine_info(KXLDObject *object, 
+    cpu_type_t cputype, cpu_subtype_t cpusubtype);
+static kern_return_t get_macho_slice_for_arch(KXLDObject *object, 
+    u_char *file, u_long size);
+
+static u_long get_macho_header_size(const KXLDObject *object);
+static u_long get_macho_data_size(const KXLDObject *object) __unused;
+
+static kern_return_t init_from_execute(KXLDObject *object);
+static kern_return_t init_from_final_linked_image(KXLDObject *object, 
+    u_int *filetype_out, struct symtab_command **symtab_hdr_out);
+
+static boolean_t target_supports_protected_segments(const KXLDObject *object)
+    __attribute__((pure));
+static void set_is_object_linked(KXLDObject *object);
+
+#if KXLD_USER_OR_BUNDLE
+static boolean_t target_supports_bundle(const KXLDObject *object) 
+    __attribute((pure));
+static kern_return_t init_from_bundle(KXLDObject *object);
+static kern_return_t process_relocs_from_tables(KXLDObject *object);
+static KXLDSeg *get_seg_by_base_addr(KXLDObject *object,
+    kxld_addr_t base_addr);
+static kern_return_t process_symbol_pointers(KXLDObject *object);
+static void add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit);
+#endif /* KXLD_USER_OR_BUNDLE */
+
+#if KXLD_USER_OR_OBJECT
+static boolean_t target_supports_object(const KXLDObject *object) 
+    __attribute((pure));
+static kern_return_t init_from_object(KXLDObject *object);
+static kern_return_t process_relocs_from_sections(KXLDObject *object);
+#endif /* KXLD_USER_OR_OBJECT */
+
+static kern_return_t export_macho_header(const KXLDObject *object, u_char *buf, 
+    u_int ncmds, u_long *header_offset, u_long header_size);
+#if KXLD_USER_OR_ILP32
+static u_long get_macho_cmd_data_32(u_char *file, u_long offset, 
+    u_int *filetype, u_int *ncmds);
+static kern_return_t export_macho_header_32(const KXLDObject *object, 
+    u_char *buf, u_int ncmds, u_long *header_offset, u_long header_size);
+#endif /* KXLD_USER_OR_ILP32 */
+#if KXLD_USER_OR_LP64
+static u_long get_macho_cmd_data_64(u_char *file, u_long offset,
+    u_int *filetype, u_int *ncmds);
+static kern_return_t export_macho_header_64(const KXLDObject *object, 
+    u_char *buf, u_int ncmds, u_long *header_offset, u_long header_size);
+#endif /* KXLD_USER_OR_LP64 */
+
+#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON
+static kern_return_t add_section(KXLDObject *object, KXLDSect **sect);
+#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */
+
+#if KXLD_USER_OR_COMMON
+static kern_return_t resolve_common_symbols(KXLDObject *object);
+#endif /* KXLD_USER_OR_COMMON */
+
+#if KXLD_USER_OR_GOT
+static boolean_t target_has_got(const KXLDObject *object) __attribute__((pure));
+static kern_return_t create_got(KXLDObject *object);
+static kern_return_t populate_got(KXLDObject *object);
+#endif /* KXLD_USER_OR_GOT */
+
+static KXLDSym *get_mutable_sym(const KXLDObject *object, const KXLDSym *sym);
+
+static kern_return_t populate_kmod_info(KXLDObject *object);
+
+/*******************************************************************************
+* Prototypes that may need to be exported
+*******************************************************************************/
+static boolean_t kxld_object_target_needs_swap(const KXLDObject *object __unused);
+static KXLDSeg * kxld_object_get_seg_by_name(const KXLDObject *object, const char *segname);
+static KXLDSect * kxld_object_get_sect_by_name(const KXLDObject *object, const char *segname, 
+    const char *sectname);
+
+/*******************************************************************************
+*******************************************************************************/
+size_t 
+kxld_object_sizeof(void)
+{
+    return sizeof(KXLDObject);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_init_from_macho(KXLDObject *object, u_char *file, u_long size,
+    const char *name, KXLDArray *section_order __unused, 
+    cpu_type_t cputype, cpu_subtype_t cpusubtype)
+{
+    kern_return_t       rval    = KERN_FAILURE;
+    KXLDSeg           * seg     = NULL;
+    u_int               i       = 0;
+
+    check(object);
+    check(file);
+    check(name);
+
+    object->name = name;
+
+#if KXLD_USER_OR_OBJECT
+    object->section_order = section_order;
+#endif
+    /* Find the local architecture */
+
+    rval = get_target_machine_info(object, cputype, cpusubtype);
+    require_noerr(rval, finish);
+
+    /* Find the Mach-O slice for the target architecture */
+
+    rval = get_macho_slice_for_arch(object, file, size);
+    require_noerr(rval, finish);
+
+    /* Allocate the symbol table */
+
+    if (!object->symtab) {
+        object->symtab = kxld_alloc(kxld_symtab_sizeof());
+        require_action(object->symtab, finish, rval=KERN_RESOURCE_SHORTAGE);
+        bzero(object->symtab, kxld_symtab_sizeof());
+    }
+
+    /* Build the relocator */
+
+    rval = kxld_relocator_init(&object->relocator, object->file,
+        object->symtab, &object->sects, object->cputype, 
+        object->cpusubtype, kxld_object_target_needs_swap(object));
+    require_noerr(rval, finish);
+
+    /* There are four types of Mach-O files that we can support:
+     *   1) 32-bit MH_OBJECT      - Snow Leopard and earlier
+     *   2) 32-bit MH_KEXT_BUNDLE - Lion and Later
+     *   3) 64-bit MH_OBJECT      - Unsupported
+     *   4) 64-bit MH_KEXT_BUNDLE - Snow Leopard and Later
+     */
+
+    if (kxld_object_is_32_bit(object)) {
+        struct mach_header *mach_hdr = (struct mach_header *) object->file;
+        object->filetype = mach_hdr->filetype;
+    } else {
+        struct mach_header_64 *mach_hdr = (struct mach_header_64 *) object->file;
+        object->filetype = mach_hdr->filetype;
+    }
+
+    switch (object->filetype) {
+#if KXLD_USER_OR_BUNDLE
+    case MH_KEXT_BUNDLE:
+        rval = init_from_bundle(object);
+        require_noerr(rval, finish);
+        break;
+#endif /* KXLD_USER_OR_BUNDLE */
+#if KXLD_USER_OR_OBJECT
+    case MH_OBJECT:
+        rval = init_from_object(object);
+        require_noerr(rval, finish);
+        break;
+#endif /* KXLD_USER_OR_OBJECT */
+    case MH_EXECUTE:
+        object->is_kernel = TRUE;
+        rval = init_from_execute(object);
+        require_noerr(rval, finish);
+        break;
+    default:
+        rval = KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr,
+            kKxldLogFiletypeNotSupported, object->filetype);
+        goto finish;
+    }
+
+    if (!kxld_object_is_kernel(object)) {
+        for (i = 0; i < object->segs.nitems; ++i) {
+            seg = kxld_array_get_item(&object->segs, i);
+            kxld_seg_set_vm_protections(seg, 
+                target_supports_protected_segments(object));
+        }
+
+        seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
+        if (seg) {
+            (void) kxld_seg_populate_linkedit(seg, object->symtab,
+                kxld_object_is_32_bit(object));
+        }
+    }
+
+    (void) set_is_object_linked(object);
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t
+get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, 
+    cpu_subtype_t cpusubtype __unused)
+{
+#if KERNEL
+
+    /* Because the kernel can only link for its own architecture, we know what
+     * the host and target architectures are at compile time, so we can use
+     * a vastly simplified version of this function.
+     */ 
+
+    check(object);
+
+#if defined(__i386__)
+    object->cputype = CPU_TYPE_I386;
+    object->cpusubtype = CPU_SUBTYPE_I386_ALL;
+    return KERN_SUCCESS;
+#elif defined(__x86_64__)
+    object->cputype = CPU_TYPE_X86_64;
+    object->cpusubtype = CPU_SUBTYPE_X86_64_ALL;
+    return KERN_SUCCESS;
+#else 
+    kxld_log(kKxldLogLinking, kKxldLogErr, 
+        kKxldLogArchNotSupported, _mh_execute_header->cputype);
+    return KERN_NOT_SUPPORTED;
+#endif /* Supported architecture defines */
+
+
+#else /* !KERNEL */
+
+    /* User-space must look up the architecture it's running on and the target
+     * architecture at run-time.
+     */
+
+    kern_return_t rval = KERN_FAILURE;
+    const NXArchInfo *host_arch = NULL;
+
+    check(object);
+
+    host_arch = NXGetLocalArchInfo();
+    require_action(host_arch, finish, rval=KERN_FAILURE);
+
+    object->host_order = host_arch->byteorder;
+
+    /* If the user did not specify a cputype, use the local architecture.
+     */
+
+    if (cputype) {
+        object->cputype = cputype;
+        object->cpusubtype = cpusubtype;
+    } else {
+        object->cputype = host_arch->cputype;
+        object->target_order = object->host_order;
+
+        switch (object->cputype) {
+        case CPU_TYPE_I386:
+            object->cpusubtype = CPU_SUBTYPE_I386_ALL;
+            break;
+        case CPU_TYPE_POWERPC:
+            object->cpusubtype = CPU_SUBTYPE_POWERPC_ALL;
+            break;
+        case CPU_TYPE_X86_64:
+            object->cpusubtype = CPU_SUBTYPE_X86_64_ALL;
+            break;
+        case CPU_TYPE_ARM:
+            object->cpusubtype = CPU_SUBTYPE_ARM_ALL;
+            break;
+        default:
+            object->cpusubtype = 0;
+        }
+    }
+
+    /* Validate that we support the target architecture and record its 
+     * endianness.
+     */
+
+    switch(object->cputype) {
+    case CPU_TYPE_ARM:
+    case CPU_TYPE_I386:
+    case CPU_TYPE_X86_64:
+        object->target_order = NX_LittleEndian;
+        break;
+    case CPU_TYPE_POWERPC:
+        object->target_order = NX_BigEndian;
+        break;
+    default:
+        rval = KERN_NOT_SUPPORTED;
+        kxld_log(kKxldLogLinking, kKxldLogErr, 
+            kKxldLogArchNotSupported, object->cputype);
+        goto finish;
+    }
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+#endif /* KERNEL */
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+get_macho_slice_for_arch(KXLDObject *object, u_char *file, u_long size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct mach_header *mach_hdr = NULL;
+#if !KERNEL
+    struct fat_header *fat = (struct fat_header *) file;
+    struct fat_arch *archs = (struct fat_arch *) &fat[1];
+    boolean_t swap = FALSE;
+#endif /* KERNEL */
+
+    check(object);
+    check(file);
+    check(size);
+
+    object->file = file;
+    object->size = size;
+
+    /* We are assuming that we will never receive a fat file in the kernel */
+
+#if !KERNEL
+    require_action(size >= sizeof(*fat), finish, 
+        rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
+
+    /* The fat header is always big endian, so swap if necessary */
+    if (fat->magic == FAT_CIGAM) {
+        (void) swap_fat_header(fat, object->host_order);
+        swap = TRUE;
+    }
+
+    if (fat->magic == FAT_MAGIC) {
+        struct fat_arch *arch = NULL;
+
+        require_action(size >= (sizeof(*fat) + (fat->nfat_arch * sizeof(*archs))),
+            finish, 
+            rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
+
+        /* Swap the fat_arch structures if necessary */
+        if (swap) {
+            (void) swap_fat_arch(archs, fat->nfat_arch, object->host_order);
+        }
+
+        /* Locate the Mach-O for the requested architecture */
+
+        arch = NXFindBestFatArch(object->cputype, object->cpusubtype, archs, 
+            fat->nfat_arch);
+        require_action(arch, finish, rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogArchNotFound));
+        require_action(size >= arch->offset + arch->size, finish, 
+            rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
+
+        object->file = file + arch->offset;
+        object->size = arch->size;
+    }
+#endif /* !KERNEL */
+
+    /* Swap the Mach-O's headers to this architecture if necessary */
+    if (kxld_object_is_32_bit(object)) {
+        rval = validate_and_swap_macho_32(object->file, object->size
+#if !KERNEL
+            , object->host_order
+#endif /* !KERNEL */
+            );
+    } else {
+        rval = validate_and_swap_macho_64(object->file, object->size
+#if !KERNEL
+            , object->host_order
+#endif /* !KERNEL */
+            );
+    }
+    require_noerr(rval, finish);
+
+    mach_hdr = (struct mach_header *) object->file;
+    require_action(object->cputype == mach_hdr->cputype, finish,
+        rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO));
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+init_from_final_linked_image(KXLDObject *object, u_int *filetype_out,
+    struct symtab_command **symtab_hdr_out)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSeg *seg = NULL;
+    KXLDSect *sect = NULL;
+    struct load_command *cmd_hdr = NULL;
+    struct symtab_command *symtab_hdr = NULL;
+    struct uuid_command *uuid_hdr = NULL;
+    u_long base_offset = 0;
+    u_long offset = 0;
+    u_long sect_offset = 0;
+    u_int filetype = 0;
+    u_int i = 0;
+    u_int j = 0;
+    u_int segi = 0;
+    u_int secti = 0;
+    u_int nsegs = 0;
+    u_int nsects = 0;
+    u_int ncmds = 0;
+
+    KXLD_3264_FUNC(kxld_object_is_32_bit(object), base_offset,
+        get_macho_cmd_data_32, get_macho_cmd_data_64,
+        object->file, offset, &filetype, &ncmds);
+
+    /* First pass to count segments and sections */
+
+    offset = base_offset;
+    for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
+        cmd_hdr = (struct load_command *) (object->file + offset);
+
+        switch(cmd_hdr->cmd) {
+#if KXLD_USER_OR_ILP32
+        case LC_SEGMENT:
+            {
+                struct segment_command *seg_hdr = 
+                    (struct segment_command *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                ++nsegs;
+                nsects += seg_hdr->nsects;
+            }
+            break;
+#endif /* KXLD_USER_OR_ILP32 */
+#if KXLD_USER_OR_LP64
+        case LC_SEGMENT_64:
+            {
+                struct segment_command_64 *seg_hdr = 
+                    (struct segment_command_64 *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                ++nsegs;
+                nsects += seg_hdr->nsects;
+            }
+            break;
+#endif /* KXLD_USER_OR_LP64 */
+        default:
+            continue;
+        }
+    }
+
+    /* Allocate the segments and sections */
+
+    if (nsegs) {
+        rval = kxld_array_init(&object->segs, sizeof(KXLDSeg), nsegs);
+        require_noerr(rval, finish);
+
+        rval = kxld_array_init(&object->sects, sizeof(KXLDSect), nsects);
+        require_noerr(rval, finish);
+    }
+
+    /* Initialize the segments and sections */
+
+    offset = base_offset;
+    for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
+        cmd_hdr = (struct load_command *) (object->file + offset); 
+        seg = NULL;
+
+        switch(cmd_hdr->cmd) {
+#if KXLD_USER_OR_ILP32
+        case LC_SEGMENT:
+            {
+                struct segment_command *seg_hdr =
+                    (struct segment_command *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                seg = kxld_array_get_item(&object->segs, segi++);
+
+                rval = kxld_seg_init_from_macho_32(seg, seg_hdr);
+                require_noerr(rval, finish);
+
+                sect_offset = offset + sizeof(*seg_hdr);
+            }
+            break;
+#endif /* KXLD_USER_OR_ILP32 */
+#if KXLD_USER_OR_LP64
+        case LC_SEGMENT_64:
+            {
+                struct segment_command_64 *seg_hdr = 
+                    (struct segment_command_64 *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                seg = kxld_array_get_item(&object->segs, segi++);
+
+                rval = kxld_seg_init_from_macho_64(seg, seg_hdr);
+                require_noerr(rval, finish);
+
+                sect_offset = offset + sizeof(*seg_hdr);
+            }
+            break;
+#endif /* KXLD_USER_OR_LP64 */
+        case LC_SYMTAB:
+            symtab_hdr = (struct symtab_command *) cmd_hdr;
+            break;
+        case LC_UUID:
+            uuid_hdr = (struct uuid_command *) cmd_hdr;
+            kxld_uuid_init_from_macho(&object->uuid, uuid_hdr);
+            break;
+        case LC_DYSYMTAB:
+            object->dysymtab_hdr = (struct dysymtab_command *) cmd_hdr;            
+
+            rval = kxld_reloc_create_macho(&object->extrelocs, &object->relocator,
+                (struct relocation_info *) (object->file + object->dysymtab_hdr->extreloff), 
+                object->dysymtab_hdr->nextrel);
+            require_noerr(rval, finish);
+
+            rval = kxld_reloc_create_macho(&object->locrelocs, &object->relocator,
+                (struct relocation_info *) (object->file + object->dysymtab_hdr->locreloff), 
+                object->dysymtab_hdr->nlocrel);
+            require_noerr(rval, finish);
+
+            break;
+        case LC_UNIXTHREAD:
+            /* Don't need to do anything with UNIXTHREAD for the kernel */
+            require_action(kxld_object_is_kernel(object), 
+                finish, rval=KERN_FAILURE;
+                kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                    "LC_UNIXTHREAD segment is not valid in a kext."));
+            break;
+        default:
+            rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                "Invalid segment type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd);
+            goto finish;
+        }
+
+        if (seg) {
+
+            /* Initialize the sections */
+            for (j = 0; j < seg->sects.nitems; ++j, ++secti) {
+                sect = kxld_array_get_item(&object->sects, secti);
+                KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+                    kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64,
+                    sect, object->file, &sect_offset, secti, &object->relocator);
+                require_noerr(rval, finish);
+
+                /* Add the section to the segment.  This will also make sure
+                 * that the sections and segments have the same segname.
+                 */
+                rval = kxld_seg_add_section(seg, sect);
+                require_noerr(rval, finish);
+            }
+            rval = kxld_seg_finish_init(seg);
+            require_noerr(rval, finish);
+        }
+    }
+
+    if (filetype_out) *filetype_out = filetype;
+    if (symtab_hdr_out) *symtab_hdr_out = symtab_hdr;
+    object->is_final_image = TRUE;
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+init_from_execute(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct symtab_command *symtab_hdr = NULL;
+    u_int filetype = 0;
+    KXLDSeg * kernel_linkedit_seg = NULL;  // used if running kernel
+#if KXLD_USER_OR_OBJECT
+    KXLDSeg *seg = NULL;
+    KXLDSect *sect = NULL;
+    KXLDSectionName *sname = NULL;
+    u_int i = 0, j = 0, k = 0;
+#endif /* KXLD_USER_OR_OBJECT */
+
+    check(object);
+
+    require_action(kxld_object_is_kernel(object), finish, rval=KERN_FAILURE);
+
+    rval = init_from_final_linked_image(object, &filetype, &symtab_hdr);
+    require_noerr(rval, finish);
+
+    require_action(filetype == MH_EXECUTE, finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO 
+            "The kernel file is not of type MH_EXECUTE."));
+
+   /* Initialize the symbol table. If this is the running kernel
+    * we  will work from the in-memory linkedit segment;
+    * otherwise we work from the whole mach-o image.
+    */
+#if KERNEL
+    kernel_linkedit_seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
+    require_action(kernel_linkedit_seg, finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
+#endif
+
+	KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+        kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
+        object->symtab, symtab_hdr, object->file, kernel_linkedit_seg);
+    require_noerr(rval, finish);
+
+#if KXLD_USER_OR_OBJECT
+    /* Save off the order of section names so that we can lay out kext 
+     * sections for MH_OBJECT-based systems.
+     */
+    if (target_supports_object(object)) {
+
+        rval = kxld_array_init(object->section_order, sizeof(KXLDSectionName), 
+            object->sects.nitems);
+        require_noerr(rval, finish);
+
+        /* Copy the section names into the section_order array for future kext
+         * section ordering.
+         */
+        for (i = 0, k = 0; i < object->segs.nitems; ++i) {
+            seg = kxld_array_get_item(&object->segs, i);
+
+            for (j = 0; j < seg->sects.nitems; ++j, ++k) {
+                sect = *(KXLDSect **) kxld_array_get_item(&seg->sects, j);
+                sname = kxld_array_get_item(object->section_order, k);
+
+                strlcpy(sname->segname, sect->segname, sizeof(sname->segname));
+                strlcpy(sname->sectname, sect->sectname, sizeof(sname->sectname));
+            }
+        }
+    }
+#endif /* KXLD_USER_OR_OBJECT */
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+#if KXLD_USER_OR_BUNDLE
+/*******************************************************************************
+*******************************************************************************/
+static boolean_t
+target_supports_bundle(const KXLDObject *object)
+{
+    return (object->cputype == CPU_TYPE_I386    ||
+            object->cputype == CPU_TYPE_X86_64  ||
+            object->cputype == CPU_TYPE_ARM);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t 
+init_from_bundle(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct symtab_command *symtab_hdr = NULL;
+    u_int filetype = 0;
+
+    check(object);
+
+    require_action(target_supports_bundle(object), finish,
+        rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr,
+            kKxldLogFiletypeNotSupported, MH_KEXT_BUNDLE));
+
+    rval = init_from_final_linked_image(object, &filetype, &symtab_hdr);
+    require_noerr(rval, finish);
+
+    require_action(filetype == MH_KEXT_BUNDLE, finish, 
+        rval=KERN_FAILURE);
+
+    KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+        kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
+        object->symtab, symtab_hdr, object->file,
+        /* kernel_linkedit_seg */ NULL);
+    require_noerr(rval, finish);
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_BUNDLE */
+
+#if KXLD_USER_OR_OBJECT
+/*******************************************************************************
+*******************************************************************************/
+static boolean_t target_supports_object(const KXLDObject *object)
+{
+    return (object->cputype == CPU_TYPE_POWERPC ||
+            object->cputype == CPU_TYPE_I386    ||
+            object->cputype == CPU_TYPE_ARM);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t 
+init_from_object(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct load_command *cmd_hdr = NULL;
+    struct symtab_command *symtab_hdr = NULL;
+    struct uuid_command *uuid_hdr = NULL;
+    KXLDSect *sect = NULL;
+    u_long offset = 0;
+    u_long sect_offset = 0;
+    u_int filetype = 0;
+    u_int ncmds = 0;
+    u_int nsects = 0;
+    u_int i = 0;
+    boolean_t has_segment = FALSE;
+
+    check(object);
+
+    require_action(target_supports_object(object),
+        finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr,
+            kKxldLogFiletypeNotSupported, MH_OBJECT));
+
+    KXLD_3264_FUNC(kxld_object_is_32_bit(object), offset,
+        get_macho_cmd_data_32, get_macho_cmd_data_64,
+        object->file, offset, &filetype, &ncmds);
+
+    require_action(filetype == MH_OBJECT, finish, rval=KERN_FAILURE);
+
+    /* MH_OBJECTs use one unnamed segment to contain all of the sections.  We
+     * loop over all of the load commands to initialize the structures we
+     * expect.  Then, we'll use the unnamed segment to get to all of the
+     * sections, and then use those sections to create the actual segments.
+     */
+
+    for (; i < ncmds; ++i, offset += cmd_hdr->cmdsize) {
+        cmd_hdr = (struct load_command *) (object->file + offset);
+
+        switch(cmd_hdr->cmd) {
+#if KXLD_USER_OR_ILP32
+        case LC_SEGMENT:
+            {
+                struct segment_command *seg_hdr = 
+                    (struct segment_command *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                /* Ignore LINKEDIT segments */
+                if (streq_safe(seg_hdr->segname, SEG_LINKEDIT, 
+                        const_strlen(SEG_LINKEDIT))) 
+                {
+                    continue;
+                }
+
+                require_action(kxld_object_is_32_bit(object), finish, rval=KERN_FAILURE;
+                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                        "LC_SEGMENT in 64-bit kext."));
+                require_action(!has_segment, finish, rval=KERN_FAILURE;
+                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                        "Multiple segments in an MH_OBJECT kext."));
+
+                nsects = seg_hdr->nsects;
+                sect_offset = offset + sizeof(*seg_hdr);
+                has_segment = TRUE;
+            }
+            break;
+#endif /* KXLD_USER_OR_ILP32 */
+#if KXLD_USER_OR_LP64
+        case LC_SEGMENT_64:
+            {
+                struct segment_command_64 *seg_hdr =
+                    (struct segment_command_64 *) cmd_hdr;
+
+                /* Ignore segments with no vm size */
+                if (!seg_hdr->vmsize) continue;
+
+                /* Ignore LINKEDIT segments */
+                if (streq_safe(seg_hdr->segname, SEG_LINKEDIT, 
+                        const_strlen(SEG_LINKEDIT))) 
+                {
+                    continue;
+                }
+
+                require_action(!kxld_object_is_32_bit(object), finish, rval=KERN_FAILURE;
+                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                        "LC_SEGMENT_64 in a 32-bit kext."));
+                require_action(!has_segment, finish, rval=KERN_FAILURE;
+                    kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                        "Multiple segments in an MH_OBJECT kext."));
+
+                nsects = seg_hdr->nsects;
+                sect_offset = offset + sizeof(*seg_hdr);
+                has_segment = TRUE;
+            }
+            break;
+#endif /* KXLD_USER_OR_LP64 */
+        case LC_SYMTAB:
+            symtab_hdr = (struct symtab_command *) cmd_hdr;
+
+            KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+                kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64,
+                object->symtab, symtab_hdr, object->file,
+                /* kernel_linkedit_seg */ NULL);
+            require_noerr(rval, finish);
+            break;
+        case LC_UUID:
+            uuid_hdr = (struct uuid_command *) cmd_hdr;
+            kxld_uuid_init_from_macho(&object->uuid, uuid_hdr);
+            break;
+        case LC_UNIXTHREAD:
+            /* Don't need to do anything with UNIXTHREAD */
+            break;
+        default:
+            rval = KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
+                "Invalid segment type in MH_OBJECT kext: %u.", cmd_hdr->cmd);
+            goto finish;
+        }
+    }
+
+    if (has_segment) {
+
+        /* Get the number of sections from the segment and build the section index */
+
+        rval = kxld_array_init(&object->sects, sizeof(KXLDSect), nsects);
+        require_noerr(rval, finish);
+
+        /* Loop over all of the sections to initialize the section index */
+
+        for (i = 0; i < nsects; ++i) {
+            sect = kxld_array_get_item(&object->sects, i);
+            KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval,
+                kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64,
+                sect, object->file, &sect_offset, i, &object->relocator); 
+            require_noerr(rval, finish);
+        }
+
+        /* Create special sections */
+
+#if KXLD_USER_OR_GOT
+        rval = create_got(object);
+        require_noerr(rval, finish);
+#endif /* KXLD_USER_OR_GOT */
+
+#if KXLD_USER_OR_COMMON
+        rval = resolve_common_symbols(object);
+        require_noerr(rval, finish);
+#endif /* KXLD_USER_OR_COMMON */
+
+        /* Create the segments from the section index */
+
+        rval = kxld_seg_create_seg_from_sections(&object->segs, &object->sects);
+        require_noerr(rval, finish);
+
+        rval = kxld_seg_finalize_object_segment(&object->segs, 
+            object->section_order, get_macho_header_size(object));
+        require_noerr(rval, finish);
+
+        rval = kxld_seg_init_linkedit(&object->segs);
+        require_noerr(rval, finish);
+    }
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_OBJECT */
+
+#if KXLD_USER_OR_ILP32
+/*******************************************************************************
+*******************************************************************************/
+static u_long
+get_macho_cmd_data_32(u_char *file, u_long offset, u_int *filetype, u_int *ncmds)
+{
+    struct mach_header *mach_hdr = (struct mach_header *) (file + offset);
+
+    if (filetype) *filetype = mach_hdr->filetype;
+    if (ncmds) *ncmds = mach_hdr->ncmds;
+
+    return sizeof(*mach_hdr);
+}
+
+#endif /* KXLD_USER_OR_ILP32 */
+
+#if KXLD_USER_OR_LP64
+/*******************************************************************************
+*******************************************************************************/
+static u_long
+get_macho_cmd_data_64(u_char *file, u_long offset, u_int *filetype,  u_int *ncmds)
+{
+    struct mach_header_64 *mach_hdr = (struct mach_header_64 *) (file + offset);
+
+    if (filetype) *filetype = mach_hdr->filetype;
+    if (ncmds) *ncmds = mach_hdr->ncmds;
+
+    return sizeof(*mach_hdr);
+}
+#endif /* KXLD_USER_OR_LP64 */
+
+/*******************************************************************************
+*******************************************************************************/
+static u_long
+get_macho_header_size(const KXLDObject *object)
+{
+    KXLDSeg *seg = NULL;
+    u_long header_size = 0;
+    u_int i = 0;
+
+    check(object);
+
+    /* Mach, segment, symtab, and UUID headers */
+
+    if (kxld_object_is_32_bit(object)) {
+        header_size += sizeof(struct mach_header);
+    } else {
+        header_size += sizeof(struct mach_header_64);
+    }
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+        header_size += kxld_seg_get_macho_header_size(seg, kxld_object_is_32_bit(object));
+    }
+
+    header_size += kxld_symtab_get_macho_header_size();
+
+    if (object->uuid.has_uuid) {
+        header_size += kxld_uuid_get_macho_header_size();
+    }
+
+    return header_size;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static u_long
+get_macho_data_size(const KXLDObject *object)
+{
+    KXLDSeg *seg = NULL;
+    u_long data_size = 0;
+    u_int i = 0;
+
+    check(object);
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+        data_size += (u_long) kxld_seg_get_vmsize(seg);
+    }
+
+    return data_size;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t 
+kxld_object_target_needs_swap(const KXLDObject *object __unused)
+{
+#if KERNEL
+    return FALSE;
+#else
+    return (object->target_order != object->host_order);
+#endif /* KERNEL */
+}
+
+/*******************************************************************************
+*******************************************************************************/
+KXLDSeg *
+kxld_object_get_seg_by_name(const KXLDObject *object, const char *segname)
+{
+    KXLDSeg *seg = NULL;
+    u_int i = 0;
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+
+        if (streq_safe(segname, seg->segname, sizeof(seg->segname))) break;
+
+        seg = NULL;
+    }
+
+    return seg;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDRelocator * 
+kxld_object_get_relocator(const KXLDObject * object)
+{
+    check(object);
+
+    return &object->relocator;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+KXLDSect *
+kxld_object_get_sect_by_name(const KXLDObject *object, const char *segname, 
+    const char *sectname)
+{
+    KXLDSect *sect = NULL;
+    u_int i = 0;
+
+    for (i = 0; i < object->sects.nitems; ++i) {
+        sect = kxld_array_get_item(&object->sects, i);
+
+        if (streq_safe(segname, sect->segname, sizeof(sect->segname)) && 
+            streq_safe(sectname, sect->sectname, sizeof(sect->sectname))) 
+        {
+            break;
+        }
+
+        sect = NULL;
+    }
+
+    return sect;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDReloc * 
+kxld_object_get_reloc_at_symbol(const KXLDObject *object, const KXLDSym *sym)
+{
+    const KXLDReloc *reloc = NULL;
+    const KXLDSect *sect = NULL;
+    uint32_t offset = 0;
+
+    check(object);
+    check(sym);
+
+    sect = kxld_object_get_section_by_index(object, sym->sectnum);
+    require(sect, finish);
+
+    if (kxld_object_is_final_image(object)) {
+        reloc = kxld_reloc_get_reloc_by_offset(&object->extrelocs, 
+            sym->base_addr);
+        if (!reloc) {
+            reloc = kxld_reloc_get_reloc_by_offset(&object->locrelocs, 
+                sym->base_addr);
+        }
+    } else {
+        offset = kxld_sym_get_section_offset(sym, sect);
+        reloc = kxld_reloc_get_reloc_by_offset(&sect->relocs, offset);
+    }
+
+finish:
+    return reloc;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDSym * 
+kxld_object_get_symbol_of_reloc(const KXLDObject *object, 
+    const KXLDReloc *reloc, const KXLDSect *sect)
+{
+    const KXLDSym *sym = NULL;
+
+    if (kxld_object_is_final_image(object)) {
+        sym = kxld_reloc_get_symbol(&object->relocator, reloc, object->file);
+    } else {
+        sym = kxld_reloc_get_symbol(&object->relocator, reloc, sect->data);
+    }
+
+    return sym;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDSect * 
+kxld_object_get_section_by_index(const KXLDObject *object, u_int sectnum)
+{
+    KXLDSect *sect = NULL;
+    
+    check(object);
+
+    if (sectnum < object->sects.nitems) {
+        sect = kxld_array_get_item(&object->sects, sectnum);
+    }
+
+    return sect;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDArray  * 
+kxld_object_get_extrelocs(const KXLDObject *object)
+{
+    const KXLDArray *rval = NULL;
+    
+    check(object);
+
+    if (kxld_object_is_final_image(object)) {
+        rval = &object->extrelocs;
+    }
+
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const KXLDSymtab *
+kxld_object_get_symtab(const KXLDObject *object)
+{
+    check(object);
+
+    return object->symtab;
+}
+
+#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+add_section(KXLDObject *object, KXLDSect **sect)
+{
+    kern_return_t rval = KERN_FAILURE;
+    u_int nsects = object->sects.nitems;
+
+    rval = kxld_array_resize(&object->sects, nsects + 1);
+    require_noerr(rval, finish);
+
+    *sect = kxld_array_get_item(&object->sects, nsects);
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */
+
+#if KXLD_USER_OR_COMMON
+/*******************************************************************************
+* If there are common symbols, calculate how much space they'll need
+* and create/grow the __DATA __common section to accommodate them.
+* Then, resolve them against that section.
+*******************************************************************************/
+static kern_return_t
+resolve_common_symbols(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSymtabIterator iter;
+    KXLDSym *sym = NULL;
+    KXLDSect *sect = NULL;
+    kxld_addr_t base_addr = 0;
+    kxld_size_t size = 0;
+    kxld_size_t total_size = 0;
+    u_int align = 0;
+    u_int max_align = 0;
+    u_int sectnum = 0;
+
+    if (!kxld_object_target_supports_common_symbols(object)) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
+    /* Iterate over the common symbols to calculate their total aligned size */
+    kxld_symtab_iterator_init(&iter, object->symtab, kxld_sym_is_common, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        align = kxld_sym_get_common_align(sym);
+        size = kxld_sym_get_common_size(sym);
+
+        if (align > max_align) max_align = align;
+
+        total_size = kxld_align_address(total_size, align) + size;
+    }
+
+    /* If there are common symbols, grow or create the __DATA __common section
+     * to hold them.
+     */
+    if (total_size) {
+        sect = kxld_object_get_sect_by_name(object, SEG_DATA, SECT_COMMON);
+        if (sect) {
+            base_addr = sect->base_addr + sect->size;
+
+            kxld_sect_grow(sect, total_size, max_align);
+        } else {
+            base_addr = 0;
+
+            rval = add_section(object, &sect);
+            require_noerr(rval, finish);
+
+            kxld_sect_init_zerofill(sect, SEG_DATA, SECT_COMMON, 
+                total_size, max_align);
+        }
+
+        /* Resolve the common symbols against the new section */
+        rval = kxld_array_get_index(&object->sects, sect, &sectnum);
+        require_noerr(rval, finish);
+
+        kxld_symtab_iterator_reset(&iter);
+        while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+            align = kxld_sym_get_common_align(sym);
+            size = kxld_sym_get_common_size(sym);
+
+            base_addr = kxld_align_address(base_addr, align);
+            kxld_sym_resolve_common(sym, sectnum, base_addr);
+
+            base_addr += size;
+        }
+    }
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_COMMON */
+
+#if KXLD_USER_OR_GOT
+/*******************************************************************************
+*******************************************************************************/
+static boolean_t
+target_has_got(const KXLDObject *object)
+{
+    return FALSE:
+}
+
+/*******************************************************************************
+* Create and initialize the Global Offset Table
+*******************************************************************************/
+static kern_return_t
+create_got(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSect *sect = NULL;
+    u_int ngots = 0;
+    u_int i = 0;
+
+    if (!target_has_got(object)) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
+    for (i = 0; i < object->sects.nitems; ++i) {
+        sect = kxld_array_get_item(&object->sects, i);
+        ngots += kxld_sect_get_ngots(sect, &object->relocator, 
+            object->symtab);
+    }
+
+    rval = add_section(object, &sect);
+    require_noerr(rval, finish);
+
+    rval = kxld_sect_init_got(sect, ngots);
+    require_noerr(rval, finish);
+
+    object->got_is_created = TRUE;
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+populate_got(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSect *sect = NULL;
+    u_int i = 0;
+
+    if (!target_has_got(object) || !object->got_is_created) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
+    for (i = 0; i < object->sects.nitems; ++i) {
+        sect = kxld_array_get_item(&object->sects, i);
+        if (streq_safe(sect->segname, KXLD_SEG_GOT, sizeof(KXLD_SEG_GOT)) &&
+            streq_safe(sect->sectname, KXLD_SECT_GOT, sizeof(KXLD_SECT_GOT)))
+        {
+            kxld_sect_populate_got(sect, object->symtab,
+                kxld_object_target_needs_swap(object));
+            break;
+        }
+    }
+
+    require_action(i < object->sects.nitems, finish, rval=KXLD_MISSING_GOT);
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_GOT */
+
+/*******************************************************************************
+*******************************************************************************/
+static boolean_t
+target_supports_protected_segments(const KXLDObject *object)
+{
+    return (object->is_final_image && 
+            object->cputype == CPU_TYPE_X86_64);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static void
+set_is_object_linked(KXLDObject *object)
+{
+    u_int i = 0;
+
+    if (kxld_object_is_kernel(object)) {
+        object->is_linked = TRUE;
+        return;
+    }
+
+    if (object->is_final_image) {
+        object->is_linked = !object->extrelocs.nitems && !object->locrelocs.nitems;
+        return;
+    }
+
+    object->is_linked = TRUE;
+    for (i = 0; i < object->sects.nitems; ++i) {
+        KXLDSect *sect = kxld_array_get_item(&object->sects, i);
+        if (sect->relocs.nitems) {
+            object->is_linked = FALSE;
+            break;
+        }
+    }
+}
+
+
+/*******************************************************************************
+*******************************************************************************/
+void kxld_object_clear(KXLDObject *object __unused)
+{
+    KXLDSeg *seg = NULL;
+    KXLDSect *sect = NULL;
+    u_int i;
+
+    check(object);
+
+#if !KERNEL
+    if (kxld_object_is_kernel(object)) {
+        unswap_macho(object->file, object->host_order, object->target_order);
+    }
+#endif /* !KERNEL */
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+        kxld_seg_clear(seg);
+    }
+    kxld_array_reset(&object->segs);
+
+    for (i = 0; i < object->sects.nitems; ++i) {
+        sect = kxld_array_get_item(&object->sects, i);
+        kxld_sect_clear(sect);
+    }
+    kxld_array_reset(&object->sects);
+
+    kxld_array_reset(&object->extrelocs);
+    kxld_array_reset(&object->locrelocs);
+    kxld_relocator_clear(&object->relocator);
+    kxld_uuid_clear(&object->uuid);
+
+    if (object->symtab) kxld_symtab_clear(object->symtab);
+
+    object->file = NULL;
+    object->size = 0;
+    object->filetype = 0;
+    object->cputype = 0;
+    object->cpusubtype = 0;
+    object->is_kernel = FALSE;
+    object->is_final_image = FALSE;
+    object->is_linked = FALSE;
+    object->got_is_created = FALSE;
+
+#if KXLD_USER_OR_OBJECT
+    object->section_order = NULL;
+#endif
+#if !KERNEL
+    object->host_order = 0;
+    object->target_order = 0;
+#endif
+}
+
+/*******************************************************************************
+*******************************************************************************/
+void kxld_object_deinit(KXLDObject *object __unused)
+{
+    KXLDSeg *seg = NULL;
+    KXLDSect *sect = NULL;
+    u_int i;
+
+    check(object);
+
+#if !KERNEL
+    if (object->file && kxld_object_is_kernel(object)) {
+        unswap_macho(object->file, object->host_order, object->target_order);
+    }
+#endif /* !KERNEL */
+
+    for (i = 0; i < object->segs.maxitems; ++i) {
+        seg = kxld_array_get_slot(&object->segs, i);
+        kxld_seg_deinit(seg);
+    }
+    kxld_array_deinit(&object->segs);
+
+    for (i = 0; i < object->sects.maxitems; ++i) {
+        sect = kxld_array_get_slot(&object->sects, i);
+        kxld_sect_deinit(sect);
+    }
+    kxld_array_deinit(&object->sects);
+
+    kxld_array_deinit(&object->extrelocs);
+    kxld_array_deinit(&object->locrelocs);
+
+    if (object->symtab) {
+        kxld_symtab_deinit(object->symtab);
+        kxld_free(object->symtab, kxld_symtab_sizeof());
+    }
+
+    bzero(object, sizeof(*object));
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const u_char *
+kxld_object_get_file(const KXLDObject *object)
+{
+    check(object);
+
+    return object->file;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+const char *
+kxld_object_get_name(const KXLDObject *object)
+{
+    check(object);
+
+    return object->name;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t 
+kxld_object_is_32_bit(const KXLDObject *object)
+{
+    check(object);
+
+    return kxld_is_32_bit(object->cputype);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t 
+kxld_object_is_final_image(const KXLDObject *object)
+{
+    check(object);
+
+    return object->is_final_image;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t 
+kxld_object_is_kernel(const KXLDObject *object)
+{
+    check(object);
+
+    return object->is_kernel;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t 
+kxld_object_is_linked(const KXLDObject *object)
+{
+    check(object);
+
+    return object->is_linked;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t
+kxld_object_target_supports_strict_patching(const KXLDObject *object)
+{
+    check(object);
+
+    return (object->cputype != CPU_TYPE_I386 && 
+            object->cputype != CPU_TYPE_POWERPC);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t
+kxld_object_target_supports_common_symbols(const KXLDObject *object)
+{
+    check(object);
+
+    return (object->cputype == CPU_TYPE_I386 || 
+            object->cputype == CPU_TYPE_POWERPC);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+void
+kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size, 
+    u_long *vmsize)
+{
+    check(object);
+    check(header_size);
+    check(vmsize);
+    *header_size = 0;
+    *vmsize = 0;
+
+    /* vmsize is the padded header page(s) + segment vmsizes */
+
+    *header_size = (object->is_final_image) ?
+        0 : round_page(get_macho_header_size(object));
+    *vmsize = *header_size + get_macho_data_size(object);
+
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_export_linked_object(const KXLDObject *object, 
+    u_char *linked_object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSeg *seg = NULL;
+    u_long size = 0;
+    u_long header_size = 0;
+    u_long header_offset = 0;
+    u_long data_offset = 0;
+    u_int ncmds = 0;
+    u_int i = 0;
+
+    check(object);
+    check(linked_object);
+
+    /* Calculate the size of the headers and data */
+
+    header_size = get_macho_header_size(object);
+    data_offset = (object->is_final_image) ? header_size : round_page(header_size);
+    size = data_offset + get_macho_data_size(object);
+
+    /* Copy data to the file */
+
+    ncmds = object->segs.nitems + (object->uuid.has_uuid == TRUE) + 1 /* linkedit */;
+
+    rval = export_macho_header(object, linked_object, ncmds, 
+        &header_offset, header_size);
+    require_noerr(rval, finish);
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+
+        rval = kxld_seg_export_macho_to_vm(seg, linked_object, &header_offset, 
+            header_size, size, object->link_addr, kxld_object_is_32_bit(object));
+        require_noerr(rval, finish);
+    }
+
+    seg = kxld_object_get_seg_by_name(object, SEG_LINKEDIT);
+    data_offset = (u_long) (seg->link_addr - object->link_addr);
+    rval = kxld_symtab_export_macho(object->symtab, linked_object, &header_offset,
+        header_size, &data_offset, size, kxld_object_is_32_bit(object));
+    require_noerr(rval, finish);
+
+    if (object->uuid.has_uuid) {
+        rval = kxld_uuid_export_macho(&object->uuid, linked_object, 
+            &header_offset, header_size);
+        require_noerr(rval, finish);
+    }
+
+#if !KERNEL
+    unswap_macho(linked_object, object->host_order, object->target_order);
+#endif /* KERNEL */
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+export_macho_header(const KXLDObject *object, u_char *buf, u_int ncmds,
+    u_long *header_offset, u_long header_size)
+{
+    kern_return_t rval = KERN_FAILURE;
+
+    check(object);
+    check(buf);
+    check(header_offset);
+
+    KXLD_3264_FUNC(kxld_object_is_32_bit(object), rval, 
+        export_macho_header_32, export_macho_header_64, 
+        object, buf, ncmds, header_offset, header_size);
+    require_noerr(rval, finish);
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
+#if KXLD_USER_OR_ILP32
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+export_macho_header_32(const KXLDObject *object, u_char *buf, u_int ncmds,
+    u_long *header_offset, u_long header_size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct mach_header *mach = NULL;
+
+    check(object);
+    check(buf);
+    check(header_offset);
+
+    require_action(sizeof(*mach) <= header_size - *header_offset, finish,
+        rval=KERN_FAILURE);
+    mach = (struct mach_header *) (buf + *header_offset);
+
+    mach->magic = MH_MAGIC;
+    mach->cputype = object->cputype;
+    mach->cpusubtype = object->cpusubtype;
+    mach->filetype = object->filetype;
+    mach->ncmds = ncmds;
+    mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach));
+    mach->flags = MH_NOUNDEFS;
+
+    *header_offset += sizeof(*mach);
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_ILP32 */
+
+#if KXLD_USER_OR_LP64
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+export_macho_header_64(const KXLDObject *object, u_char *buf, u_int ncmds,
+    u_long *header_offset, u_long header_size)
+{
+    kern_return_t rval = KERN_FAILURE;
+    struct mach_header_64 *mach = NULL;
+
+    check(object);
+    check(buf);
+    check(header_offset);
+    
+    require_action(sizeof(*mach) <= header_size - *header_offset, finish,
+        rval=KERN_FAILURE);
+    mach = (struct mach_header_64 *) (buf + *header_offset);
+    
+    mach->magic = MH_MAGIC_64;
+    mach->cputype = object->cputype;
+    mach->cpusubtype = object->cpusubtype;
+    mach->filetype = object->filetype;
+    mach->ncmds = ncmds;
+    mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach));
+    mach->flags = MH_NOUNDEFS;
+
+    *header_offset += sizeof(*mach);
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_LP64 */
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_index_symbols_by_name(KXLDObject *object)
+{
+    return kxld_symtab_index_symbols_by_name(object->symtab);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_index_cxx_symbols_by_value(KXLDObject *object)
+{
+    return kxld_symtab_index_cxx_symbols_by_value(object->symtab);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_relocate(KXLDObject *object, kxld_addr_t link_address)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSeg *seg = NULL;
+    u_int i = 0;
+
+    check(object);
+
+    object->link_addr = link_address;
+
+    /* Relocate segments (which relocates the sections) */
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+        kxld_seg_relocate(seg, link_address);
+    }
+
+    /* Relocate symbols */
+    rval = kxld_symtab_relocate(object->symtab, &object->sects);
+    require_noerr(rval, finish);
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static KXLDSym *
+get_mutable_sym(const KXLDObject *object, const KXLDSym *sym)
+{
+    KXLDSym *rval = NULL;
+    kern_return_t result = KERN_FAILURE;
+    u_int i = 0;
+
+    result = kxld_symtab_get_sym_index(object->symtab, sym, &i);
+    require_noerr(result, finish);
+
+    rval = kxld_symtab_get_symbol_by_index(object->symtab, i);
+    require_action(rval == sym, finish, rval=NULL);
+
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_resolve_symbol(KXLDObject *object, 
+    const KXLDSym *sym, kxld_addr_t addr)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSym *resolved_sym = NULL;
+
+    resolved_sym = get_mutable_sym(object, sym);
+    require_action(resolved_sym, finish, rval=KERN_FAILURE);
+
+    rval = kxld_sym_resolve(resolved_sym, addr);
+    require_noerr(rval, finish);
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_patch_symbol(KXLDObject *object, const struct kxld_sym *sym)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSym *patched_sym = NULL;
+
+    patched_sym = get_mutable_sym(object, sym);
+    require_action(patched_sym, finish, rval=KERN_FAILURE);
+
+    (void) kxld_sym_patch(patched_sym);
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_add_symbol(KXLDObject *object, char *name, kxld_addr_t link_addr, 
+    const KXLDSym **sym_out)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSym *sym = NULL;
+
+    rval = kxld_symtab_add_symbol(object->symtab, name, link_addr, &sym);
+    require_noerr(rval, finish);
+
+    *sym_out = sym;
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t 
+kxld_object_process_relocations(KXLDObject *object, 
+    const KXLDDict *patched_vtables)
+{
+    kern_return_t rval = KERN_FAILURE;
+
+    (void) kxld_relocator_set_vtables(&object->relocator, patched_vtables);
+
+    /* Process relocation entries and populate the global offset table.
+     *
+     * For final linked images: the relocation entries are contained in a couple
+     * of tables hanging off the end of the symbol table.  The GOT has its own
+     * section created by the linker; we simply need to fill it.
+     *
+     * For object files: the relocation entries are bound to each section.
+     * The GOT, if it exists for the target architecture, is created by kxld,
+     * and we must populate it according to our internal structures.
+     */
+    if (object->is_final_image) {
+#if KXLD_USER_OR_BUNDLE
+        rval = process_symbol_pointers(object);
+        require_noerr(rval, finish);
+
+        rval = process_relocs_from_tables(object);
+        require_noerr(rval, finish);
+#else
+        require_action(FALSE, finish, rval=KERN_FAILURE);
+#endif /* KXLD_USER_OR_BUNDLE */
+    } else {
+#if KXLD_USER_OR_GOT
+        /* Populate GOT */
+        rval = populate_got(object);
+        require_noerr(rval, finish);
+#endif /* KXLD_USER_OR_GOT */
+#if KXLD_USER_OR_OBJECT
+        rval = process_relocs_from_sections(object);
+        require_noerr(rval, finish);
+#else
+        require_action(FALSE, finish, rval=KERN_FAILURE);
+#endif /* KXLD_USER_OR_OBJECT */
+    }
+
+    /* Populate kmod info structure */
+    rval = populate_kmod_info(object);
+    require_noerr(rval, finish);
+ 
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+#if KXLD_USER_OR_BUNDLE
+
+#define SECT_SYM_PTRS "__nl_symbol_ptr"
+
+/*******************************************************************************
+* Final linked images create an __nl_symbol_ptr section for the global offset
+* table and for symbol pointer lookups in general.  Rather than use relocation
+* entries, the linker creates an "indirect symbol table" which stores indexes
+* into the symbol table corresponding to the entries of this section.  This
+* function populates the section with the relocated addresses of those symbols.
+*******************************************************************************/
+static kern_return_t
+process_symbol_pointers(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSect *sect = NULL;
+    KXLDSym *sym = NULL;
+    int32_t *symidx = NULL;
+    u_char *symptr = NULL;
+    u_long symptrsize = 0;
+    u_int nsyms = 0;
+    u_int firstsym = 0;
+    u_int i = 0;
+
+    check(object);
+
+    require_action(object->is_final_image && object->dysymtab_hdr, 
+        finish, rval=KERN_FAILURE);
+
+    /* Get the __DATA,__nl_symbol_ptr section.  If it doesn't exist, we have
+     * nothing to do.
+     */
+
+    sect = kxld_object_get_sect_by_name(object, SEG_DATA, SECT_SYM_PTRS);
+    if (!sect) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
+    require_action(sect->flags & S_NON_LAZY_SYMBOL_POINTERS,
+        finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO 
+            "Section %s,%s does not have S_NON_LAZY_SYMBOL_POINTERS flag.",
+            SEG_DATA, SECT_SYM_PTRS));
+
+    /* Calculate the table offset and number of entries in the section */
+
+    if (kxld_object_is_32_bit(object)) {
+        symptrsize = sizeof(uint32_t);
+    } else {
+        symptrsize = sizeof(uint64_t);
+    }
+
+    nsyms = (u_int) (sect->size / symptrsize);
+    firstsym = sect->reserved1;
+
+    require_action(firstsym + nsyms <= object->dysymtab_hdr->nindirectsyms,
+        finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO));
+
+    /* Iterate through the indirect symbol table and fill in the section of
+     * symbol pointers.  There are three cases:
+     *   1) A normal symbol - put its value directly in the table
+     *   2) An INDIRECT_SYMBOL_LOCAL - symbols that are local and already have
+     *      their offset from the start of the file in the section.  Simply
+     *      add the file's link address to fill this entry.
+     *   3) An INDIRECT_SYMBOL_ABS - prepopulated absolute symbols.  No
+     *      action is required.
+     */
+
+    symidx = (int32_t *) (object->file + object->dysymtab_hdr->indirectsymoff);
+    symidx += firstsym;
+    symptr = sect->data;
+    for (i = 0; i < nsyms; ++i, ++symidx, symptr+=symptrsize) {
+        if (*symidx & INDIRECT_SYMBOL_LOCAL) {
+            if (*symidx & INDIRECT_SYMBOL_ABS) continue;
+
+            add_to_ptr(symptr, object->link_addr, kxld_object_is_32_bit(object));
+        } else {
+            sym = kxld_symtab_get_symbol_by_index(object->symtab, *symidx);
+            require_action(sym, finish, rval=KERN_FAILURE);
+
+            add_to_ptr(symptr, sym->link_addr, kxld_object_is_32_bit(object));
+        }
+    }
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static KXLDSeg *
+get_seg_by_base_addr(KXLDObject *object, kxld_addr_t base_addr)
+{
+    KXLDSeg *seg = NULL;
+    kxld_addr_t start = 0;
+    kxld_addr_t end = 0;
+    u_int i = 0;
+
+    for (i = 0; i < object->segs.nitems; ++i) {
+        seg = kxld_array_get_item(&object->segs, i);
+        start = seg->base_addr;
+        end = seg->base_addr + seg->vmsize;
+
+        if (start <= base_addr && base_addr < end) return seg;
+    }
+
+    return NULL;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+process_relocs_from_tables(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDReloc *reloc = NULL;
+    KXLDSeg *seg = NULL;
+    u_int i = 0;
+
+    /* Process external relocations */
+    for (i = 0; i < object->extrelocs.nitems; ++i) {
+        reloc = kxld_array_get_item(&object->extrelocs, i);
+
+        seg = get_seg_by_base_addr(object, reloc->address);
+        require_action(seg, finish, rval=KERN_FAILURE);
+
+        rval = kxld_relocator_process_table_reloc(&object->relocator, reloc,
+            seg, object->link_addr);
+        require_noerr(rval, finish);
+    }
+
+    /* Process local relocations */
+    for (i = 0; i < object->locrelocs.nitems; ++i) {
+        reloc = kxld_array_get_item(&object->locrelocs, i);
+
+        seg = get_seg_by_base_addr(object, reloc->address);
+        require_action(seg, finish, rval=KERN_FAILURE);
+
+        rval = kxld_relocator_process_table_reloc(&object->relocator, reloc,
+            seg, object->link_addr);
+        require_noerr(rval, finish);
+    }
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+static void
+add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit)
+{
+    if (is_32_bit) {
+        uint32_t *ptr = (uint32_t *) symptr;
+        *ptr += (uint32_t) val;
+    } else {
+        uint64_t *ptr = (uint64_t *) symptr;
+        *ptr += (uint64_t) val;
+    }
+}
+#endif /* KXLD_USER_OR_BUNDLE */
+
+#if KXLD_USER_OR_OBJECT
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+process_relocs_from_sections(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSect *sect = NULL;
+    u_int i = 0;
+
+    for (i = 0; i < object->sects.nitems; ++i) {
+        sect = kxld_array_get_item(&object->sects, i);
+        rval = kxld_sect_process_relocs(sect, &object->relocator);
+        require_noerr(rval, finish);
+    }
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+#endif /* KXLD_USER_OR_OBJECT */
+
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+populate_kmod_info(KXLDObject *object)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSect *kmodsect = NULL;
+    KXLDSym *kmodsym = NULL;
+    kmod_info_t *kmod_info = NULL;
+    u_long kmod_offset = 0;
+    u_long header_size;
+    u_long size;
+
+    if (kxld_object_is_kernel(object)) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
+    kxld_object_get_vmsize(object, &header_size, &size);
+
+    kmodsym = kxld_symtab_get_locally_defined_symbol_by_name(object->symtab, 
+        KXLD_KMOD_INFO_SYMBOL);
+    require_action(kmodsym, finish, rval=KERN_FAILURE;
+        kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogNoKmodInfo));
+ 
+    kmodsect = kxld_array_get_item(&object->sects, kmodsym->sectnum);
+    kmod_offset = (u_long) (kmodsym->base_addr -  kmodsect->base_addr);
+    kmod_info = (kmod_info_t *) (kmodsect->data + kmod_offset);
+
+    if (kxld_object_is_32_bit(object)) {
+        kmod_info_32_v1_t *kmod = (kmod_info_32_v1_t *) (kmod_info);
+        kmod->address = (uint32_t) object->link_addr;
+        kmod->size = (uint32_t) size;
+        kmod->hdr_size = (uint32_t) header_size;
+
+#if !KERNEL
+        if (kxld_object_target_needs_swap(object)) {
+            kmod->address = OSSwapInt32(kmod->address);
+            kmod->size = OSSwapInt32(kmod->size);
+            kmod->hdr_size = OSSwapInt32(kmod->hdr_size);
+        }
+#endif /* !KERNEL */
+    } else {
+        kmod_info_64_v1_t *kmod = (kmod_info_64_v1_t *) (kmod_info);
+        kmod->address = object->link_addr;
+        kmod->size = size;
+        kmod->hdr_size = header_size;
+
+#if !KERNEL
+        if (kxld_object_target_needs_swap(object)) {
+            kmod->address = OSSwapInt64(kmod->address);
+            kmod->size = OSSwapInt64(kmod->size);
+            kmod->hdr_size = OSSwapInt64(kmod->hdr_size);
+        }
+#endif /* !KERNEL */
+    }
+
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
+
diff --git a/libkern/kxld/kxld_object.h b/libkern/kxld/kxld_object.h
new file mode 100644
index 000000000..5b6b5064d
--- /dev/null
+++ b/libkern/kxld/kxld_object.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KXLD_OBJECT_H_
+#define _KXLD_OBJECT_H_
+
+#include <mach/machine.h>
+#include <sys/types.h>
+#if KERNEL
+    #include <libkern/kxld_types.h>
+#else
+    #include "kxld_types.h"
+#endif
+
+struct kxld_array;
+struct kxld_dict;
+struct kxld_reloc;
+struct kxld_relocator;
+struct kxld_sect;
+struct kxld_sym;
+struct kxld_symtab;
+
+typedef struct kxld_object KXLDObject; 
+
+/*******************************************************************************
+* Constructors and destructors
+*******************************************************************************/
+
+size_t kxld_object_sizeof(void)
+    __attribute__((const, visibility("hidden")));
+    
+kern_return_t kxld_object_init_from_macho(KXLDObject *object, 
+    u_char *file, u_long size, const char *name,
+    struct kxld_array *section_order,
+    cpu_type_t cputype, cpu_subtype_t cpusubtype)
+    __attribute__((nonnull(1,2,4) visibility("hidden")));
+
+void kxld_object_clear(KXLDObject *object)
+    __attribute__((nonnull, visibility("hidden")));
+
+void kxld_object_deinit(KXLDObject *object)
+    __attribute__((nonnull, visibility("hidden")));
+
+/*******************************************************************************
+* Accessors
+*******************************************************************************/
+
+const u_char * kxld_object_get_file(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const char * kxld_object_get_name(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_is_32_bit(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_is_final_image(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_is_kernel(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_is_linked(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_target_supports_strict_patching(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+boolean_t kxld_object_target_supports_common_symbols(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_relocator * kxld_object_get_relocator(
+    const KXLDObject * object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_reloc * kxld_object_get_reloc_at_symbol(
+    const KXLDObject *object, const struct kxld_sym *sym)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_sym * kxld_object_get_symbol_of_reloc(
+    const KXLDObject *object, const struct kxld_reloc *reloc,
+    const struct kxld_sect *sect)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_sect * kxld_object_get_section_by_index(
+    const KXLDObject *object, u_int sectnum)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_array * kxld_object_get_extrelocs(
+    const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+const struct kxld_symtab * kxld_object_get_symtab(const KXLDObject *object)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
+void kxld_object_get_vmsize(const KXLDObject *object, u_long *header_size, 
+    u_long *vmsize)
+    __attribute__((nonnull, visibility("hidden")));
+
+/* This will be the same size as kxld_kext_get_vmsize */
+kern_return_t kxld_object_export_linked_object(const KXLDObject *object,
+    u_char *linked_object)
+    __attribute__((nonnull, visibility("hidden")));
+
+/*******************************************************************************
+* Modifiers
+*******************************************************************************/
+
+kern_return_t kxld_object_index_symbols_by_name(KXLDObject *object)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_index_cxx_symbols_by_value(KXLDObject *object)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_relocate(KXLDObject *object, kxld_addr_t link_address)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_resolve_symbol(KXLDObject *object, 
+    const struct kxld_sym *sym, kxld_addr_t addr)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_patch_symbol(KXLDObject *object,
+    const struct kxld_sym *sym)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_add_symbol(KXLDObject *object, char *name, 
+    kxld_addr_t link_addr, const struct kxld_sym **sym_out)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_object_process_relocations(KXLDObject *object, 
+    const struct kxld_dict *patched_vtables)
+    __attribute__((nonnull, visibility("hidden")));
+
+#endif /* _KXLD_OBJECT_H_ */
+
diff --git a/libkern/kxld/kxld_reloc.c b/libkern/kxld/kxld_reloc.c
index c781d6dc2..4867c8c78 100644
--- a/libkern/kxld/kxld_reloc.c
+++ b/libkern/kxld/kxld_reloc.c
@@ -41,12 +41,15 @@
 #include <AssertMacros.h>
 
 #include "kxld_array.h"
+#include "kxld_demangle.h"
+#include "kxld_dict.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
 #include "kxld_seg.h"
 #include "kxld_sym.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
+#include "kxld_vtable.h"
 
 /* include target-specific relocation prototypes */
 #include <mach-o/reloc.h>
@@ -102,9 +105,10 @@ static boolean_t generic_reloc_is_pair(u_int _type, u_int _prev_type)
     __attribute__((const));
 static boolean_t generic_reloc_has_got(u_int _type)
     __attribute__((const));
-static kern_return_t generic_process_reloc(u_char *instruction, u_int length, 
-    u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp,
-    u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap);
+static kern_return_t generic_process_reloc(const KXLDRelocator *relocator,
+    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
+    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, 
+    kxld_addr_t pair_target, boolean_t swap);
 #endif /* KXLD_USER_OR_I386 */
 
 #if KXLD_USER_OR_PPC 
@@ -114,9 +118,10 @@ static boolean_t ppc_reloc_is_pair(u_int _type, u_int _prev_type)
     __attribute__((const));
 static boolean_t ppc_reloc_has_got(u_int _type)
     __attribute__((const));
-static kern_return_t ppc_process_reloc(u_char *instruction, u_int length, 
-    u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp,
-    u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap);
+static kern_return_t ppc_process_reloc(const KXLDRelocator *relocator, 
+    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
+    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, 
+    kxld_addr_t pair_target, boolean_t swap);
 #endif /* KXLD_USER_OR_PPC */
 
 #if KXLD_USER_OR_X86_64 
@@ -126,9 +131,10 @@ static boolean_t x86_64_reloc_is_pair(u_int _type, u_int _prev_type)
     __attribute__((const));
 static boolean_t x86_64_reloc_has_got(u_int _type)
     __attribute__((const));
-static kern_return_t x86_64_process_reloc(u_char *instruction, u_int length, 
-    u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp,
-    u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap);
+static kern_return_t x86_64_process_reloc(const KXLDRelocator *relocator, 
+    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
+    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, 
+    kxld_addr_t pair_target, boolean_t swap);
 static kern_return_t calculate_displacement_x86_64(uint64_t target, 
     uint64_t adjustment, int32_t *instr32);
 #endif /* KXLD_USER_OR_X86_64 */
@@ -140,19 +146,20 @@ static boolean_t arm_reloc_is_pair(u_int _type, u_int _prev_type)
     __attribute__((const));
 static boolean_t arm_reloc_has_got(u_int _type)
     __attribute__((const));
-static kern_return_t arm_process_reloc(u_char *instruction, u_int length, 
-    u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp,
-    u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap);
+static kern_return_t arm_process_reloc(const KXLDRelocator *relocator, 
+    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
+    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, kxld_addr_t target, 
+    kxld_addr_t pair_target, boolean_t swap);
 #endif /* KXLD_USER_OR_ARM */
 
 #if KXLD_USER_OR_ILP32
-static kxld_addr_t get_pointer_at_addr_32(u_char *data, u_long offset,
-    const KXLDRelocator *relocator __unused)
+static kxld_addr_t get_pointer_at_addr_32(const KXLDRelocator *relocator, 
+    const u_char *data, u_long offset)
     __attribute__((pure, nonnull));
 #endif /* KXLD_USER_OR_ILP32 */
 #if KXLD_USER_OR_LP64
-static kxld_addr_t get_pointer_at_addr_64(u_char *data, u_long offset,
-    const KXLDRelocator *relocator __unused)
+static kxld_addr_t get_pointer_at_addr_64(const KXLDRelocator *relocator, 
+    const u_char *data, u_long offset)
     __attribute__((pure, nonnull));
 #endif /* KXLD_USER_OR_LP64 */
 
@@ -160,16 +167,23 @@ static u_int count_relocatable_relocs(const KXLDRelocator *relocator,
     const struct relocation_info *relocs, u_int nrelocs)
     __attribute__((pure));
 
-static kern_return_t calculate_targets(kxld_addr_t *_target, 
-    kxld_addr_t *_pair_target, const KXLDReloc *reloc, 
-    const KXLDArray *sectarray, const KXLDSymtab *symtab);
+static kern_return_t calculate_targets(KXLDRelocator *relocator, 
+    kxld_addr_t *_target, kxld_addr_t *_pair_target, const KXLDReloc *reloc);
+
+static kxld_addr_t align_raw_function_address(const KXLDRelocator *relocator, 
+    kxld_addr_t value);
+
 static kern_return_t get_target_by_address_lookup(kxld_addr_t *target, 
     kxld_addr_t addr, const KXLDArray *sectarray);
 
+static kern_return_t check_for_direct_pure_virtual_call(
+    const KXLDRelocator *relocator, u_long offset);
+
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
-kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype, 
+kxld_relocator_init(KXLDRelocator *relocator, u_char *file,
+    const KXLDSymtab *symtab, const KXLDArray *sectarray, cpu_type_t cputype, 
     cpu_subtype_t cpusubtype __unused, boolean_t swap)
 {
     kern_return_t rval = KERN_FAILURE;
@@ -183,6 +197,7 @@ kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
         relocator->reloc_is_pair = generic_reloc_is_pair;
         relocator->reloc_has_got = generic_reloc_has_got;
         relocator->process_reloc = generic_process_reloc;
+        relocator->function_align = 0;
         relocator->is_32_bit = TRUE;
         break;
 #endif /* KXLD_USER_OR_I386 */
@@ -192,6 +207,7 @@ kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
         relocator->reloc_is_pair = ppc_reloc_is_pair;
         relocator->reloc_has_got = ppc_reloc_has_got;
         relocator->process_reloc = ppc_process_reloc;
+        relocator->function_align = 0;
         relocator->is_32_bit = TRUE;
         break;
 #endif /* KXLD_USER_OR_PPC */
@@ -201,6 +217,7 @@ kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
         relocator->reloc_is_pair = x86_64_reloc_is_pair;
         relocator->reloc_has_got = x86_64_reloc_has_got;
         relocator->process_reloc = x86_64_process_reloc;
+        relocator->function_align = 0;
         relocator->is_32_bit = FALSE;
         break;
 #endif /* KXLD_USER_OR_X86_64 */
@@ -210,6 +227,7 @@ kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
         relocator->reloc_is_pair = arm_reloc_is_pair;
         relocator->reloc_has_got = arm_reloc_has_got;
         relocator->process_reloc = arm_process_reloc;
+        relocator->function_align = 1;
         relocator->is_32_bit = TRUE;
         break;
 #endif /* KXLD_USER_OR_ARM */
@@ -220,6 +238,9 @@ kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
         goto finish;
     }
 
+    relocator->file = file;
+    relocator->symtab = symtab;
+    relocator->sectarray = sectarray;
     relocator->is_32_bit = kxld_is_32_bit(cputype);
     relocator->swap = swap;
 
@@ -238,8 +259,8 @@ kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator,
     kern_return_t rval = KERN_FAILURE;
     KXLDReloc *reloc = NULL;
     u_int nrelocs = 0;
-    const struct relocation_info *src = NULL, *prev_src = NULL;
-    const struct scattered_relocation_info *scatsrc = NULL, *prev_scatsrc = NULL;
+    const struct relocation_info *src = NULL;
+    const struct scattered_relocation_info *scatsrc = NULL;
     u_int i = 0;
     u_int reloc_index = 0;
 
@@ -313,9 +334,7 @@ kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator,
                 ++i;
                 require_action(i < nsrcs, finish, rval=KERN_FAILURE);
 
-                prev_src = src;
                 src = srcs + i;
-                prev_scatsrc = (const struct scattered_relocation_info *) prev_src;
                 scatsrc = (const struct scattered_relocation_info *) src;
                  
                 if (src->r_address & R_SCATTERED) {
@@ -447,25 +466,23 @@ kxld_relocator_has_got(const KXLDRelocator *relocator, u_int r_type)
 /*******************************************************************************
 *******************************************************************************/
 KXLDSym *
-kxld_reloc_get_symbol(const KXLDRelocator *relocator, const KXLDReloc *reloc, 
-    u_char *data, const KXLDSymtab *symtab)
+kxld_reloc_get_symbol(const KXLDRelocator *relocator, const KXLDReloc *reloc,
+    const u_char *data)
 {
     KXLDSym *sym = NULL;
     kxld_addr_t value = 0;
 
     check(reloc);
-    check(symtab);
 
     switch (reloc->target_type) {
     case KXLD_TARGET_SYMBOLNUM:
-        sym = kxld_symtab_get_symbol_by_index(symtab, reloc->target);
+        sym = kxld_symtab_get_symbol_by_index(relocator->symtab, reloc->target);
         break;
     case KXLD_TARGET_SECTNUM:
-        if (data) {
-            KXLD_3264_FUNC(relocator->is_32_bit, value,
-                get_pointer_at_addr_32, get_pointer_at_addr_64,
-                data, reloc->address, relocator);
-            sym = kxld_symtab_get_cxx_symbol_by_value(symtab, value);           
+        if (data) { 
+            value = kxld_relocator_get_pointer_at_addr(relocator, data, 
+                reloc->address);
+            sym = kxld_symtab_get_cxx_symbol_by_value(relocator->symtab, value);           
         }
         break;
     default:
@@ -521,26 +538,40 @@ finish:
     return reloc;
 }
 
+/*******************************************************************************
+*******************************************************************************/
+kxld_addr_t
+kxld_relocator_get_pointer_at_addr(const KXLDRelocator *relocator,
+    const u_char *data, u_long offset)
+{
+    kxld_addr_t value;
+
+    KXLD_3264_FUNC(relocator->is_32_bit, value,
+        get_pointer_at_addr_32, get_pointer_at_addr_64,
+        relocator, data, offset);
+
+    return value;
+}
+
 #if KXLD_USER_OR_ILP32
 /*******************************************************************************
 *******************************************************************************/
 static kxld_addr_t
-get_pointer_at_addr_32(u_char *data, u_long offset,
-    const KXLDRelocator *relocator __unused)
+get_pointer_at_addr_32(const KXLDRelocator *relocator, 
+    const u_char *data, u_long offset)
 {
     uint32_t addr = 0;
     
     check(relocator);
-    check(data);
 
-    addr = *(uint32_t *) (data + offset);
+    addr = *(const uint32_t *) (data + offset);
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt32(addr);
     }
 #endif
 
-    return (kxld_addr_t) addr;
+    return align_raw_function_address(relocator, addr);
 }
 #endif /* KXLD_USER_OR_ILP32 */
 
@@ -548,31 +579,55 @@ get_pointer_at_addr_32(u_char *data, u_long offset,
 /*******************************************************************************
 *******************************************************************************/
 static kxld_addr_t
-get_pointer_at_addr_64(u_char *data, u_long offset,
-    const KXLDRelocator *relocator __unused)
+get_pointer_at_addr_64(const KXLDRelocator *relocator, 
+    const u_char *data, u_long offset)
 {
     uint64_t addr = 0;
     
     check(relocator);
-    check(data);
 
-    addr = *(uint64_t *) (data + offset);
+    addr = *(const uint64_t *) (data + offset);
 #if !KERNEL
     if (relocator->swap) {
         addr = OSSwapInt64(addr);
     }
 #endif
 
-    return (kxld_addr_t) addr;
+    return align_raw_function_address(relocator, addr);
 }
 #endif /* KXLD_USER_OR_LP64 */
 
+/*******************************************************************************
+*******************************************************************************/
+void 
+kxld_relocator_set_vtables(KXLDRelocator *relocator, 
+    const struct kxld_dict *vtables)
+{
+    relocator->vtables = vtables;
+}
+
+/*******************************************************************************
+* When we're inspecting the raw binary and not the symbol table, value may
+* hold a THUMB address (with bit 0 set to 1) but the index will have the real
+* address (bit 0 set to 0). So if bit 0 is set here, we clear it. This only
+* impacts ARM for now, but it's implemented as a generic function alignment
+* mask.
+*******************************************************************************/
+static kxld_addr_t
+align_raw_function_address(const KXLDRelocator *relocator, kxld_addr_t value)
+{
+    if (relocator->function_align) { 
+        value &= ~((1ULL << relocator->function_align) - 1); 
+    }
+
+    return value; 
+}
+
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
-kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator,
-    const KXLDReloc *reloc, const struct kxld_sect *sect,
-    const KXLDArray *sectarray, const struct kxld_symtab *symtab)
+kxld_relocator_process_sect_reloc(KXLDRelocator *relocator,
+    const KXLDReloc *reloc, const struct kxld_sect *sect)
 {
     kern_return_t rval = KERN_FAILURE;
     u_char *instruction = NULL;
@@ -585,8 +640,6 @@ kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator,
     check(relocator);
     check(reloc);
     check(sect);
-    check(sectarray);
-    check(symtab);
 
     /* Find the instruction */
 
@@ -594,7 +647,7 @@ kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator,
 
     /* Calculate the target */
 
-    rval = calculate_targets(&target, &pair_target, reloc, sectarray, symtab);
+    rval = calculate_targets(relocator, &target, &pair_target, reloc);
     require_noerr(rval, finish);
 
     base_pc = reloc->address;
@@ -603,13 +656,14 @@ kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator,
 
     /* Relocate */
 
-    rval = relocator->process_reloc(instruction, reloc->length, reloc->pcrel,
-        base_pc, link_pc, link_disp, reloc->reloc_type, target, pair_target, 
-        relocator->swap);
+    rval = relocator->process_reloc(relocator, instruction, reloc->length, 
+        reloc->pcrel, base_pc, link_pc, link_disp, reloc->reloc_type, target, 
+        pair_target, relocator->swap);
     require_noerr(rval, finish);
     
     /* Return */
 
+    relocator->current_vtable = NULL;
     rval = KERN_SUCCESS;
 
 finish:
@@ -637,9 +691,8 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t 
-kxld_relocator_process_table_reloc(const KXLDRelocator *relocator,
-    const KXLDReloc *reloc, const KXLDSeg *seg, u_char *file, 
-    const struct kxld_array *sectarray, const struct kxld_symtab *symtab)
+kxld_relocator_process_table_reloc(KXLDRelocator *relocator,
+    const KXLDReloc *reloc, const KXLDSeg *seg, kxld_addr_t link_addr)
 {
     kern_return_t rval = KERN_FAILURE;
     u_char *instruction = NULL;
@@ -647,36 +700,34 @@ kxld_relocator_process_table_reloc(const KXLDRelocator *relocator,
     kxld_addr_t pair_target = 0;
     kxld_addr_t base_pc = 0;
     kxld_addr_t link_pc = 0;
-    kxld_addr_t link_disp = 0;
+    u_long offset = 0;
 
     check(relocator);
     check(reloc);
-    check(file);
-    check(sectarray);
-    check(symtab);
 
     /* Find the instruction */
 
-    instruction = file + seg->fileoff + reloc->address;
+    offset = (u_long)(seg->fileoff + (reloc->address - seg->base_addr));
+    instruction = relocator->file + offset;
 
     /* Calculate the target */
 
-    rval = calculate_targets(&target, &pair_target, reloc, sectarray, symtab);
+    rval = calculate_targets(relocator, &target, &pair_target, reloc);
     require_noerr(rval, finish);
 
     base_pc = reloc->address;
-    link_pc = base_pc + seg->link_addr;
-    link_disp = seg->link_addr - seg->base_addr;
+    link_pc = base_pc + link_addr;
 
     /* Relocate */
 
-    rval = relocator->process_reloc(instruction, reloc->length, reloc->pcrel,
-        base_pc, link_pc, link_disp, reloc->reloc_type, target, pair_target, 
-        relocator->swap);
+    rval = relocator->process_reloc(relocator, instruction, reloc->length, 
+        reloc->pcrel, base_pc, link_pc, link_addr, reloc->reloc_type, target,
+        pair_target, relocator->swap);
     require_noerr(rval, finish);
     
     /* Return */
 
+    relocator->current_vtable = NULL;
     rval = KERN_SUCCESS;
 
 finish:
@@ -686,19 +737,19 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
-    const KXLDReloc *reloc, const KXLDArray *sectarray, const KXLDSymtab *symtab)
+calculate_targets(KXLDRelocator *relocator, kxld_addr_t *_target, 
+    kxld_addr_t *_pair_target, const KXLDReloc *reloc)
 {
     kern_return_t rval = KERN_FAILURE;
     const KXLDSect *sect = NULL;
     const KXLDSym *sym = NULL;
     kxld_addr_t target = 0;
     kxld_addr_t pair_target = 0;
+    char *demangled_name = NULL;
+    size_t demangled_length = 0;
 
     check(_target);
     check(_pair_target);
-    check(sectarray);
-    check(symtab);
     *_target = 0;
     *_pair_target = 0;
 
@@ -711,12 +762,13 @@ calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
             reloc->pair_target_type == KXLD_TARGET_VALUE,
             finish, rval=KERN_FAILURE);
 
-        rval = get_target_by_address_lookup(&target, reloc->target, sectarray);
+        rval = get_target_by_address_lookup(&target, reloc->target, 
+            relocator->sectarray);
         require_noerr(rval, finish);
 
         if (reloc->pair_target_type == KXLD_TARGET_LOOKUP) {
             rval = get_target_by_address_lookup(&pair_target,
-                reloc->pair_target, sectarray);
+                reloc->pair_target, relocator->sectarray);
             require_noerr(rval, finish);
         } else if (reloc->pair_target_type == KXLD_TARGET_VALUE) {
             pair_target = reloc->pair_target;
@@ -728,7 +780,7 @@ calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
             finish, rval=KERN_FAILURE);
 
         /* Get the target's section by section number */
-        sect = kxld_array_get_item(sectarray, reloc->target);
+        sect = kxld_array_get_item(relocator->sectarray, reloc->target);
         require_action(sect, finish, rval=KERN_FAILURE);
 
         /* target is the change in the section's address */
@@ -751,10 +803,27 @@ calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
             rval=KERN_FAILURE);
 
         /* Get the target's symbol by symbol number */
-        sym = kxld_symtab_get_symbol_by_index(symtab, reloc->target);
+        sym = kxld_symtab_get_symbol_by_index(relocator->symtab, reloc->target);
         require_action(sym, finish, rval=KERN_FAILURE);
+
+        /* If this symbol is a padslot that has already been replaced, then the
+         * only way a relocation entry can still reference it is if there is a
+         * vtable that has not been patched.  The vtable patcher uses the
+         * MetaClass structure to find classes for patching, so an unpatched
+         * vtable means that there is an OSObject-dervied class that is missing
+         * its OSDeclare/OSDefine macros.
+         */
+        require_action(!kxld_sym_is_padslot(sym) || !kxld_sym_is_replaced(sym), 
+            finish, rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocatingPatchedSym,
+                kxld_demangle(sym->name, &demangled_name, &demangled_length)));
+
         target = sym->link_addr;
 
+        if (kxld_sym_is_vtable(sym)) {
+            relocator->current_vtable = kxld_dict_find(relocator->vtables, sym->name);
+        }
+
         /* Some relocation types need the GOT entry address instead of the
          * symbol's actual address.  These types don't have pair relocation
          * entries, so we store the GOT entry address as the pair target.
@@ -762,7 +831,8 @@ calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
         if (reloc->pair_target_type == KXLD_TARGET_VALUE) {
             pair_target = reloc->pair_target;
         } else if (reloc->pair_target_type == KXLD_TARGET_SYMBOLNUM ) {
-            sym = kxld_symtab_get_symbol_by_index(symtab, reloc->pair_target);
+            sym = kxld_symtab_get_symbol_by_index(relocator->symtab, 
+                reloc->pair_target);
             require_action(sym, finish, rval=KERN_FAILURE);
             pair_target = sym->link_addr;
         } else if (reloc->pair_target_type == KXLD_TARGET_GOT) {
@@ -779,6 +849,7 @@ calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target,
     rval = KERN_SUCCESS;
 
 finish:
+    if (demangled_name) kxld_free(demangled_name, demangled_length);
     return rval;
 }
 
@@ -804,9 +875,10 @@ get_target_by_address_lookup(kxld_addr_t *target, kxld_addr_t addr,
         end = start + sect->size;
 
         if (start <= addr && addr < end) break;
+        
+        sect = NULL;
     }
-    require_action(i < sectarray->nitems, finish, 
-        rval=KERN_FAILURE);
+    require_action(sect, finish, rval=KERN_FAILURE);
 
     *target = sect->link_addr - sect->base_addr;
     rval = KERN_SUCCESS;
@@ -815,6 +887,29 @@ finish:
     return rval;
 }
 
+/*******************************************************************************
+*******************************************************************************/
+static kern_return_t
+check_for_direct_pure_virtual_call(const KXLDRelocator *relocator, u_long offset)
+{
+    kern_return_t rval = KERN_FAILURE;
+    const KXLDVTableEntry *entry = NULL;
+
+    if (relocator->current_vtable) {
+        entry = kxld_vtable_get_entry_for_offset(relocator->current_vtable, 
+            offset, relocator->is_32_bit);
+        require_action(!entry || !entry->patched.name ||
+            !kxld_sym_name_is_pure_virtual(entry->patched.name),
+            finish, rval=KERN_FAILURE;
+            kxld_log(kKxldLogLinking, kKxldLogErr, 
+                kKxldLogDirectPureVirtualCall));
+    }
+
+    rval = KERN_SUCCESS;
+finish:
+    return rval;
+}
+
 #if KXLD_USER_OR_I386 
 /*******************************************************************************
 *******************************************************************************/
@@ -847,10 +942,10 @@ static boolean_t generic_reloc_has_got(u_int _type __unused)
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t 
-generic_process_reloc(u_char *instruction, u_int length, u_int pcrel,
-    kxld_addr_t _base_pc, kxld_addr_t _link_pc, kxld_addr_t _link_disp __unused, 
-    u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target, 
-    boolean_t swap __unused)
+generic_process_reloc(const KXLDRelocator *relocator, u_char *instruction, 
+    u_int length, u_int pcrel, kxld_addr_t _base_pc, kxld_addr_t _link_pc, 
+    kxld_addr_t _link_disp __unused, u_int _type, kxld_addr_t _target, 
+    kxld_addr_t _pair_target, boolean_t swap __unused)
 {
     kern_return_t rval = KERN_FAILURE;
     uint32_t base_pc = (uint32_t) _base_pc;
@@ -873,6 +968,9 @@ generic_process_reloc(u_char *instruction, u_int length, u_int pcrel,
     if (swap) instr_data = OSSwapInt32(instr_data);
 #endif
 
+    rval = check_for_direct_pure_virtual_call(relocator, instr_data);
+    require_noerr(rval, finish);
+
     switch (type) {
     case GENERIC_RELOC_VANILLA:
         instr_data += target;
@@ -944,10 +1042,10 @@ static boolean_t ppc_reloc_has_got(u_int _type __unused)
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-ppc_process_reloc(u_char *instruction, u_int length, u_int pcrel,
-    kxld_addr_t _base_pc, kxld_addr_t _link_pc, kxld_addr_t _link_disp __unused,
-    u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target __unused,
-    boolean_t swap __unused)
+ppc_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction, 
+    u_int length, u_int pcrel, kxld_addr_t _base_pc, kxld_addr_t _link_pc, 
+    kxld_addr_t _link_disp __unused, u_int _type, kxld_addr_t _target, 
+    kxld_addr_t _pair_target __unused, boolean_t swap __unused)
 {
     kern_return_t rval = KERN_FAILURE;
     uint32_t *instr_addr = NULL;
@@ -975,6 +1073,9 @@ ppc_process_reloc(u_char *instruction, u_int length, u_int pcrel,
     if (swap) instr_data = OSSwapInt32(instr_data);
 #endif
 
+    rval = check_for_direct_pure_virtual_call(relocator, instr_data);
+    require_noerr(rval, finish);
+
     switch (type) {
     case PPC_RELOC_VANILLA:
         require_action(!pcrel, finish, rval=KERN_FAILURE);
@@ -1123,10 +1224,10 @@ x86_64_reloc_has_got(u_int _type)
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t 
-x86_64_process_reloc(u_char *instruction, u_int length, u_int pcrel,
-    kxld_addr_t _base_pc __unused, kxld_addr_t _link_pc, kxld_addr_t _link_disp,
-    u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target, 
-    boolean_t swap __unused)
+x86_64_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction, 
+    u_int length, u_int pcrel, kxld_addr_t _base_pc __unused, 
+    kxld_addr_t _link_pc, kxld_addr_t _link_disp, u_int _type, 
+    kxld_addr_t _target, kxld_addr_t _pair_target, boolean_t swap __unused)
 {
     kern_return_t rval = KERN_FAILURE;
     enum reloc_type_x86_64 type = _type;
@@ -1152,6 +1253,9 @@ x86_64_process_reloc(u_char *instruction, u_int length, u_int pcrel,
         if (swap) instr32 = OSSwapInt32(instr32);
 #endif
 
+        rval = check_for_direct_pure_virtual_call(relocator, instr32);
+        require_noerr(rval, finish);
+
         /* There are a number of different small adjustments for pc-relative
          * relocation entries.  The general case is to subtract the size of the
          * relocation (represented by the length parameter), and it applies to
@@ -1251,6 +1355,9 @@ x86_64_process_reloc(u_char *instruction, u_int length, u_int pcrel,
         if (swap) instr64 = OSSwapInt64(instr64);
 #endif
 
+        rval = check_for_direct_pure_virtual_call(relocator, (u_long) instr64);
+        require_noerr(rval, finish);
+
         switch (type) {
         case X86_64_RELOC_UNSIGNED:
             require_action(!pcrel, finish, rval=KERN_FAILURE);
@@ -1349,10 +1456,11 @@ arm_reloc_has_got(u_int _type __unused)
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t 
-arm_process_reloc(u_char *instruction, u_int length, u_int pcrel,
-    kxld_addr_t _base_pc __unused, kxld_addr_t _link_pc __unused, kxld_addr_t _link_disp __unused,
-    u_int _type __unused, kxld_addr_t _target __unused, kxld_addr_t _pair_target __unused, 
-    boolean_t swap __unused)
+arm_process_reloc(const KXLDRelocator *relocator __unused, u_char *instruction, 
+    u_int length, u_int pcrel, kxld_addr_t _base_pc __unused, 
+    kxld_addr_t _link_pc __unused, kxld_addr_t _link_disp __unused,
+    u_int _type __unused, kxld_addr_t _target __unused, 
+    kxld_addr_t _pair_target __unused,  boolean_t swap __unused)
 {
     kern_return_t rval = KERN_FAILURE;
     uint32_t *instr_addr = NULL;
@@ -1375,9 +1483,11 @@ arm_process_reloc(u_char *instruction, u_int length, u_int pcrel,
     if (swap) instr_data = OSSwapInt32(instr_data);
 #endif
 
+    rval = check_for_direct_pure_virtual_call(relocator, instr_data);
+    require_noerr(rval, finish);
+
     switch (type) {
     case ARM_RELOC_VANILLA:
-        require_action(!pcrel, finish, rval=KERN_FAILURE);
         instr_data += target;
         break;
 
diff --git a/libkern/kxld/kxld_reloc.h b/libkern/kxld/kxld_reloc.h
index 679a95870..40a610d1a 100644
--- a/libkern/kxld/kxld_reloc.h
+++ b/libkern/kxld/kxld_reloc.h
@@ -37,16 +37,23 @@
 #endif
 
 struct kxld_array;
+struct kxld_dict;
+struct kxld_sect;
+struct kxld_seg;
 struct kxld_sym;
 struct kxld_symtab;
+struct kxld_vtable;
+struct relocation_info;
+
 typedef struct kxld_relocator KXLDRelocator;
 typedef struct kxld_reloc KXLDReloc;
 
 typedef boolean_t (*RelocHasPair)(u_int r_type);
 typedef boolean_t (*RelocIsPair)(u_int r_type, u_int prev_r_type);
 typedef boolean_t (*RelocHasGot)(u_int r_type);
-typedef kern_return_t(*ProcessReloc)(u_char *instruction, u_int length, u_int pcrel, 
-    kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, 
+typedef kern_return_t(*ProcessReloc)(const KXLDRelocator *relocator, 
+    u_char *instruction, u_int length, u_int pcrel, kxld_addr_t base_pc, 
+    kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, 
     kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap);
 
 struct kxld_relocator {
@@ -54,6 +61,12 @@ struct kxld_relocator {
     RelocIsPair reloc_is_pair;
     RelocHasGot reloc_has_got;
     ProcessReloc process_reloc;
+    const struct kxld_symtab *symtab;
+    const struct kxld_array *sectarray;
+    const struct kxld_dict *vtables;
+    const struct kxld_vtable *current_vtable;
+    u_char *file;
+    u_int function_align; /* Power of two alignment of functions */
     boolean_t is_32_bit;
     boolean_t swap;
 };
@@ -69,18 +82,12 @@ struct kxld_reloc {
     u_int pcrel:1;
 };
 
-struct kxld_array;
-struct kxld_sect;
-struct kxld_seg;
-struct kxld_symtab;
-struct relocation_info;
-
 /*******************************************************************************
 * Constructors and Destructors
 *******************************************************************************/
-
-kern_return_t kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype,
-    cpu_subtype_t cpusubtype, boolean_t swap)
+kern_return_t kxld_relocator_init(KXLDRelocator *relocator, u_char *file,
+    const struct kxld_symtab *symtab, const struct kxld_array *sectarray, 
+    cpu_type_t cputype, cpu_subtype_t cpusubtype, boolean_t swap)
     __attribute__((nonnull,visibility("hidden")));
 
 kern_return_t kxld_reloc_create_macho(struct kxld_array *relocarray,
@@ -104,10 +111,13 @@ boolean_t kxld_relocator_is_pair(const KXLDRelocator *relocator, u_int r_type,
 boolean_t kxld_relocator_has_got(const KXLDRelocator *relocator, u_int r_type)
     __attribute__((pure, nonnull,visibility("hidden")));
 
+kxld_addr_t kxld_relocator_get_pointer_at_addr(const KXLDRelocator *relocator,
+    const u_char *data, u_long offset)
+    __attribute__((pure, nonnull,visibility("hidden")));
+
 struct kxld_sym * kxld_reloc_get_symbol(const KXLDRelocator *relocator,
-    const KXLDReloc *reloc, u_char *data, 
-    const struct kxld_symtab *symtab)
-    __attribute__((pure, nonnull(1,2,4), visibility("hidden")));
+    const KXLDReloc *reloc, const u_char *data)
+    __attribute__((pure, nonnull(1,2), visibility("hidden")));
 
 kern_return_t kxld_reloc_get_reloc_index_by_offset(const struct kxld_array *relocs, 
     kxld_size_t offset, u_int *idx)
@@ -124,16 +134,18 @@ KXLDReloc * kxld_reloc_get_reloc_by_offset(const struct kxld_array *relocs,
 kern_return_t kxld_reloc_update_symindex(KXLDReloc *reloc, u_int symindex)
     __attribute__((nonnull,visibility("hidden")));
 
-kern_return_t kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator,
-    const KXLDReloc *reloc, const struct kxld_sect *sect,
-    const struct kxld_array *sectarray, const struct kxld_symtab *symtab)
+void kxld_relocator_set_vtables(KXLDRelocator *relocator, 
+    const struct kxld_dict *vtables)
+    __attribute__((nonnull,visibility("hidden")));
+
+kern_return_t kxld_relocator_process_sect_reloc(KXLDRelocator *relocator,
+    const KXLDReloc *reloc, const struct kxld_sect *sect)
     __attribute__((nonnull,visibility("hidden")));
 
-kern_return_t kxld_relocator_process_table_reloc(const KXLDRelocator *relocator,
-    const KXLDReloc *reloc, const struct kxld_seg *seg, u_char *file, 
-    const struct kxld_array *sectarray, 
-    const struct kxld_symtab *symtab)
+kern_return_t kxld_relocator_process_table_reloc(KXLDRelocator *relocator,
+    const KXLDReloc *reloc, const struct kxld_seg *seg, kxld_addr_t link_addr)
     __attribute__((nonnull,visibility("hidden")));
 
 #endif /* _KXLD_RELOC_H */
 
+
diff --git a/libkern/kxld/kxld_sect.c b/libkern/kxld/kxld_sect.c
index 0c286b5b6..d00d6596d 100644
--- a/libkern/kxld/kxld_sect.c
+++ b/libkern/kxld/kxld_sect.c
@@ -40,7 +40,7 @@
 #include "kxld_util.h"
 
 static kern_return_t export_macho(const KXLDSect *sect, u_char *buf, u_long offset, 
-    u_long bufsize, boolean_t is_32_bit);
+    u_long bufsize);
 #if KXLD_USER_OR_ILP32
 static kern_return_t sect_export_macho_header_32(const KXLDSect *sect, u_char *buf, 
     u_long *header_offset, u_long header_size, u_long data_offset);
@@ -326,7 +326,7 @@ kxld_sect_align_address(const KXLDSect *sect, kxld_addr_t address)
 kern_return_t
 kxld_sect_export_macho_to_file_buffer(const KXLDSect *sect, u_char *buf,
     u_long *header_offset, u_long header_size, u_long *data_offset, 
-    u_long data_size, boolean_t is_32_bit)
+    u_long data_size, boolean_t is_32_bit __unused)
 {
     kern_return_t rval = KERN_FAILURE;
 
@@ -351,7 +351,7 @@ kxld_sect_export_macho_to_file_buffer(const KXLDSect *sect, u_char *buf,
             sect, buf, header_offset, header_size, *data_offset);
         require_noerr(rval, finish);
 
-        rval = export_macho(sect, buf, *data_offset, data_size, is_32_bit);
+        rval = export_macho(sect, buf, *data_offset, data_size);
         require_noerr(rval, finish);
 
         *data_offset += (u_long) sect->size;
@@ -369,7 +369,7 @@ kern_return_t
 kxld_sect_export_macho_to_vm(const KXLDSect *sect, u_char *buf, 
     u_long *header_offset, u_long header_size, 
     kxld_addr_t link_addr, u_long data_size, 
-    boolean_t is_32_bit)
+    boolean_t is_32_bit __unused)
 {
     kern_return_t rval = KERN_FAILURE;
     u_long data_offset = (u_long) (sect->link_addr - link_addr);
@@ -383,7 +383,7 @@ kxld_sect_export_macho_to_vm(const KXLDSect *sect, u_char *buf,
         sect, buf, header_offset, header_size, data_offset);
     require_noerr(rval, finish);
 
-    rval = export_macho(sect, buf, data_offset, data_size, is_32_bit);
+    rval = export_macho(sect, buf, data_offset, data_size);
     require_noerr(rval, finish);
 
     rval = KERN_SUCCESS;
@@ -395,8 +395,7 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-export_macho(const KXLDSect *sect, u_char *buf, u_long offset, u_long bufsize,
-    boolean_t is_32_bit)
+export_macho(const KXLDSect *sect, u_char *buf, u_long offset, u_long bufsize)
 {
     kern_return_t rval = KERN_FAILURE;
 
@@ -424,11 +423,6 @@ export_macho(const KXLDSect *sect, u_char *buf, u_long offset, u_long bufsize,
     case S_NON_LAZY_SYMBOL_POINTERS:
     case S_MOD_INIT_FUNC_POINTERS:
     case S_MOD_TERM_FUNC_POINTERS:
-        require_action(!is_32_bit, finish, rval=KERN_FAILURE;
-            kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
-                "Invalid section type in 32-bit kext: %u.", 
-                sect->flags & SECTION_TYPE));
-        /* Fall through */
     case S_REGULAR:
     case S_CSTRING_LITERALS:
     case S_4BYTE_LITERALS:
@@ -607,8 +601,7 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-kxld_sect_process_relocs(KXLDSect *sect, const KXLDRelocator *relocator,
-    const KXLDArray *sectarray, const KXLDSymtab *symtab)
+kxld_sect_process_relocs(KXLDSect *sect, KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDReloc *reloc = NULL;
@@ -616,13 +609,11 @@ kxld_sect_process_relocs(KXLDSect *sect, const KXLDRelocator *relocator,
 
     for (i = 0; i < sect->relocs.nitems; ++i) {
         reloc = kxld_array_get_item(&sect->relocs, i);
-        rval = kxld_relocator_process_sect_reloc(relocator, reloc, sect, 
-            sectarray, symtab);
+        rval = kxld_relocator_process_sect_reloc(relocator, reloc, sect);
         require_noerr(rval, finish);
     }
 
     rval = KERN_SUCCESS;
-
 finish:
     return rval;
 }
diff --git a/libkern/kxld/kxld_sect.h b/libkern/kxld/kxld_sect.h
index cf79fde75..2f655b4af 100644
--- a/libkern/kxld/kxld_sect.h
+++ b/libkern/kxld/kxld_sect.h
@@ -125,7 +125,7 @@ kxld_addr_t kxld_sect_align_address(const KXLDSect *sect, kxld_addr_t address)
 
 /* Returns the space required by the exported Mach-O header */
 u_long kxld_sect_get_macho_header_size(boolean_t is_32_bit)
-    __attribute__((const, nonnull, visibility("hidden")));
+    __attribute__((const, visibility("hidden")));
 
 /* Returns the space required by the exported Mach-O data */
 u_long kxld_sect_get_macho_data_size(const KXLDSect *sect)
@@ -176,8 +176,7 @@ kern_return_t kxld_sect_populate_got(KXLDSect *sect, struct kxld_symtab *symtab,
 
 /* Processes all of a section's relocation entries */
 kern_return_t kxld_sect_process_relocs(KXLDSect *sect, 
-    const struct kxld_relocator *relocator, const KXLDArray *sectarray,
-    const struct kxld_symtab *symtab)
+    struct kxld_relocator *relocator)
     __attribute__((nonnull, visibility("hidden")));
 
 #endif /* _KXLD_SECT_H_ */
diff --git a/libkern/kxld/kxld_seg.c b/libkern/kxld/kxld_seg.c
index 5c11a1f9a..ba14b4917 100644
--- a/libkern/kxld/kxld_seg.c
+++ b/libkern/kxld/kxld_seg.c
@@ -41,6 +41,7 @@
 
 #include "kxld_sect.h"
 #include "kxld_seg.h"
+#include "kxld_symtab.h"
 #include "kxld_util.h"
 
 #define MAX_SEGS 20
@@ -402,6 +403,32 @@ reorder_section(KXLDArray *sects, u_int *sect_reorder_index,
 
     ++(*sect_reorder_index);
 }
+
+/*******************************************************************************
+*******************************************************************************/
+kern_return_t
+kxld_seg_init_linkedit(KXLDArray *segs)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDSeg *seg = NULL;
+    KXLDSeg *le = NULL;
+    
+    rval = kxld_array_resize(segs, 2);
+    require_noerr(rval, finish);
+
+    seg = kxld_array_get_item(segs, 0);
+    le = kxld_array_get_item(segs, 1);
+
+    strlcpy(le->segname, SEG_LINKEDIT, sizeof(le->segname));
+    le->link_addr = round_page(seg->link_addr + seg->vmsize);
+    le->maxprot = VM_PROT_ALL;
+    le->initprot = VM_PROT_DEFAULT;
+
+    rval = KERN_SUCCESS;
+
+finish:
+    return rval;
+}
 #endif /* KXLD_USER_OR_OBJECT */
 
 /*******************************************************************************
@@ -742,8 +769,14 @@ finish:
 void
 kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections)
 {
+    /* This is unnecessary except to make the clang analyzer happy.  When
+     * the analyzer no longer ignores nonnull attributes for if statements,
+     * we can remove this line.
+     */
+    if (!seg) return;
+
     if (strict_protections) {
-        if (streq_safe(seg->segname, SEG_TEXT, sizeof(SEG_TEXT))) {
+        if (streq_safe(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) {
             seg->initprot = TEXT_SEG_PROT;
             seg->maxprot = VM_PROT_ALL;
         } else {
@@ -771,3 +804,12 @@ kxld_seg_relocate(KXLDSeg *seg, kxld_addr_t link_addr)
     }
 }
 
+/*******************************************************************************
+*******************************************************************************/
+void 
+kxld_seg_populate_linkedit(KXLDSeg *seg,
+    const KXLDSymtab *symtab, boolean_t is_32_bit)
+{
+    seg->vmsize = round_page(kxld_symtab_get_macho_data_size(symtab, is_32_bit));
+}
+
diff --git a/libkern/kxld/kxld_seg.h b/libkern/kxld/kxld_seg.h
index e6484bf1a..ab5abcdc6 100644
--- a/libkern/kxld/kxld_seg.h
+++ b/libkern/kxld/kxld_seg.h
@@ -39,6 +39,7 @@
 #include "kxld_array.h"
 
 struct kxld_sect;
+struct kxld_symtab;
 struct segment_command;
 struct segment_command_64;
 typedef struct kxld_seg KXLDSeg;
@@ -77,6 +78,9 @@ kern_return_t kxld_seg_create_seg_from_sections(KXLDArray *segarray,
 kern_return_t kxld_seg_finalize_object_segment(KXLDArray *segarray,
     KXLDArray *section_order, u_long hdrsize)
     __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_seg_init_linkedit(KXLDArray *segs)
+    __attribute__((nonnull, visibility("hidden")));
 #endif /* KXLD_USER_OR_OBJECT */
 
 void kxld_seg_clear(KXLDSeg *seg)
@@ -127,7 +131,11 @@ kern_return_t kxld_seg_finish_init(KXLDSeg *seg)
 void kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections)
     __attribute__((nonnull, visibility("hidden")));
 
-void kxld_seg_relocate(KXLDSeg *Seg, kxld_addr_t link_addr)
+void kxld_seg_relocate(KXLDSeg *seg, kxld_addr_t link_addr)
+    __attribute__((nonnull, visibility("hidden")));
+
+void kxld_seg_populate_linkedit(KXLDSeg *seg,
+    const struct kxld_symtab *symtab, boolean_t is_32_bit)
     __attribute__((nonnull, visibility("hidden")));
 
 #endif /* _KXLD_SEG_H_ */
diff --git a/libkern/kxld/kxld_state.c b/libkern/kxld/kxld_state.c
deleted file mode 100644
index d3a06da19..000000000
--- a/libkern/kxld/kxld_state.c
+++ /dev/null
@@ -1,1072 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <string.h>
-
-#if !KERNEL
-    #include <libkern/OSByteOrder.h>
-#endif
-
-#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
-#include <AssertMacros.h>
-
-#include "kxld_array.h"
-#include "kxld_dict.h"
-#include "kxld_kext.h"
-#include "kxld_state.h"
-#include "kxld_sym.h"
-#include "kxld_symtab.h"
-#include "kxld_util.h"
-#include "kxld_vtable.h"
-
-#define LINK_STATE_MAGIC 0xF00DD00D
-#define CIGAM_ETATS_KNIL 0x0DD00DF0
-
-#define LINK_STATE_MAGIC_64 0xCAFEF00D
-#define CIGAM_ETATS_KNIL_64 0x0DF0FECA
-
-#define LINK_STATE_VERSION 1
-
-static kern_return_t init_string_index(KXLDDict *strings, KXLDArray *tmps, 
-    KXLDSymtabIterator *iter, const KXLDArray *vtables, u_int nsymentries, 
-    u_long *strsize);
-static kern_return_t add_string_to_index(KXLDDict *strings, const char *str, 
-    KXLDArray *tmps, u_int *tmpi, u_long *stroff);
-static kern_return_t create_link_state(u_char **_file, u_long *_filesize, 
-    const KXLDKext *kext,  KXLDSymtabIterator *iter, const KXLDArray *vtables, 
-    KXLDDict *strings, u_int nsyms, u_int nsymentries, u_long strsize);
-static boolean_t state_is_32_bit(KXLDLinkStateHdr *state);
-
-#if KXLD_USER_OR_ILP32
-static kern_return_t get_symbols_32(KXLDState *state, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols);
-static kern_return_t copy_symbols_32(u_char *file, u_long *data_offset, 
-    KXLDSymtabIterator *iter, const KXLDDict *strings);
-static kern_return_t copy_vtables_32(u_char *file, u_long *header_offset, 
-    u_long *data_offset, const KXLDArray *vtables, const KXLDDict *strings);
-#endif /* KXLD_USER_OR_ILP32*/
-#if KXLD_USER_OR_LP64
-static kern_return_t get_symbols_64(KXLDState *state, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols);
-static kern_return_t copy_symbols_64(u_char *file, u_long *data_offset, 
-    KXLDSymtabIterator *iter, const KXLDDict *strings);
-static kern_return_t copy_vtables_64(u_char *file, u_long *header_offset, 
-    u_long *data_offset, const KXLDArray *vtables, const KXLDDict *strings);
-#endif /* KXLD_USER_OR_ILP64 */
-
-#if !KERNEL
-static boolean_t swap_link_state(u_char *state);
-static void swap_link_state_32(u_char *state);
-static void swap_link_state_64(u_char *state);
-static boolean_t unswap_link_state(u_char *state);
-static void unswap_link_state_32(u_char *state);
-static void unswap_link_state_64(u_char *state);
-static void swap_state_hdr(KXLDLinkStateHdr *state_hdr);
-static void swap_vtable_hdr(KXLDVTableHdr *vtable_hdr);
-static void swap_sym_entry_32(KXLDSymEntry32 *entry);
-static void swap_sym_entry_64(KXLDSymEntry64 *entry);
-#endif
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t
-kxld_state_init_from_file(KXLDState *state, u_char *file, 
-    KXLDArray *section_order __unused)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) file;
-#if KXLD_USER_OR_OBJECT
-    KXLDSectionName *dstname = NULL;
-    KXLDSectionName *srcname = NULL;
-#endif
-    KXLDVTableHdr *vhdr = NULL;
-    KXLDVTable *vtable = NULL;
-    u_int i = 0;
-
-    check(state);
-    check(file);
-
-#if !KERNEL
-    /* Swap the link state file to host byte order for as long this kxld_state
-     * object owns the file.
-     */
-    state->swap = swap_link_state(file);
-#endif
-    require_action(hdr->magic == LINK_STATE_MAGIC || 
-        hdr->magic == LINK_STATE_MAGIC_64,
-        finish, rval=KERN_FAILURE);
-
-    state->file = file;
-
-#if KXLD_USER_OR_OBJECT
-    if (section_order && !section_order->nitems && hdr->nsects) {
-        rval = kxld_array_init(section_order, sizeof(*dstname), hdr->nsects);
-        require_noerr(rval, finish);
-
-        srcname = (KXLDSectionName *) (file + hdr->sectoff);
-        for (i = 0; i < hdr->nsects; ++i, ++srcname) {
-            dstname = kxld_array_get_item(section_order, i);
-            memcpy(dstname, srcname, sizeof(*srcname));
-        }
-    }
-#endif
-
-    rval = kxld_array_init(&state->vtables, sizeof(*vtable), hdr->nvtables);
-    require_noerr(rval, finish);
-    
-    vhdr = (KXLDVTableHdr *) (file + hdr->voff);
-    for (i = 0; i < hdr->nvtables; ++i, ++vhdr) {
-        vtable = kxld_array_get_item(&state->vtables, i);
-        KXLD_3264_FUNC(kxld_is_32_bit(hdr->cputype), rval,
-            kxld_vtable_init_from_link_state_32,
-            kxld_vtable_init_from_link_state_64,
-            vtable, file, vhdr);
-        require_noerr(rval, finish);
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void 
-kxld_state_clear(KXLDState *state)
-{
-    KXLDVTable *vtable = NULL;
-    u_int i = 0;
-
-    check(state);
-
-#if !KERNEL
-    /* We use kxld_state objects to wrap the link state files.  Whenever the
-     * file is wrapped by a kxld_state object, the file is kept in host byte
-     * order.  Once we are done, we must return it to target byte order.
-     */
-    if (state->swap) (void)unswap_link_state(state->file);
-#endif
-
-    state->file = NULL;
-    state->swap = FALSE;
-    for (i = 0; i < state->vtables.nitems; ++i) {
-        vtable = kxld_array_get_item(&state->vtables, i);
-        kxld_vtable_clear(vtable);
-    }
-    kxld_array_reset(&state->vtables);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void 
-kxld_state_deinit(KXLDState *state)
-{
-    KXLDVTable *vtable = NULL;
-    u_int i = 0;
-
-    check(state);
-
-#if !KERNEL
-    if (state->file && state->swap) (void)unswap_link_state(state->file);
-#endif
-   
-    for (i = 0; i < state->vtables.maxitems; ++i) {
-        vtable = kxld_array_get_slot(&state->vtables, i);
-        kxld_vtable_deinit(vtable);
-    }
-    kxld_array_deinit(&state->vtables);
-    bzero(state, sizeof(*state));
-}
-
-/*******************************************************************************
-*******************************************************************************/
-u_int 
-kxld_state_get_num_symbols(KXLDState *state)
-{
-    KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file;
-
-    return hdr->nsyms;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_state_get_symbols(KXLDState *state, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols)
-{
-    KXLDLinkStateHdr * hdr = (KXLDLinkStateHdr *) state->file;
-    kern_return_t rval = KERN_FAILURE;
-
-    check(state);
-    check(defined_symbols);
-    check(obsolete_symbols);
-
-    require_action(hdr->magic == LINK_STATE_MAGIC || 
-        hdr->magic == LINK_STATE_MAGIC_64,
-        finish, rval=KERN_FAILURE);
-
-    KXLD_3264_FUNC(state_is_32_bit(hdr), rval,
-        get_symbols_32, get_symbols_64,
-        state, defined_symbols, obsolete_symbols);
-    require_noerr(rval, finish);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-get_symbols_32(KXLDState *state, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file;
-    KXLDSymEntry32 *entry = NULL;
-    const char *name = NULL;
-    u_int i = 0;
-
-    entry = (KXLDSymEntry32 *) (state->file + hdr->symoff);
-    for (i = 0; i < hdr->nsyms; ++i, ++entry) {
-        name = (const char *) (state->file + entry->nameoff);
-        rval = kxld_dict_insert(defined_symbols, name, &entry->addr);
-        require_noerr(rval, finish);
-
-        if (entry->flags & KXLD_SYM_OBSOLETE) {
-            rval = kxld_dict_insert(obsolete_symbols, name, &entry->addr);
-            require_noerr(rval, finish);
-        }
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-get_symbols_64(KXLDState *state, KXLDDict *defined_symbols,
-    KXLDDict *obsolete_symbols)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file;
-    KXLDSymEntry64 *entry = NULL;
-    const char *name = NULL;
-    u_int i = 0;
-
-    entry = (KXLDSymEntry64 *) (state->file + hdr->symoff);
-    for (i = 0; i < hdr->nsyms; ++i, ++entry) {
-        name = (const char *) (state->file + entry->nameoff);
-        rval = kxld_dict_insert(defined_symbols, name, &entry->addr);
-        require_noerr(rval, finish);
-
-        if (entry->flags & KXLD_SYM_OBSOLETE) {
-            rval = kxld_dict_insert(obsolete_symbols, name, &entry->addr);
-            require_noerr(rval, finish);
-        }
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-/*******************************************************************************
-*******************************************************************************/
-u_int 
-kxld_state_get_num_vtables(KXLDState *state)
-{
-    return state->vtables.nitems;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_state_get_vtables(KXLDState *state, KXLDDict *patched_vtables)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDVTable *vtable = NULL;
-    u_int i = 0;
-
-    check(state);
-    check(patched_vtables);
-
-    for (i = 0; i < state->vtables.nitems; ++i) {
-        vtable = kxld_array_get_item(&state->vtables, i);
-        rval = kxld_dict_insert(patched_vtables, vtable->name, vtable);
-        require_noerr(rval, finish);
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-void
-kxld_state_get_cputype(const KXLDState *state, cpu_type_t *cputype, 
-    cpu_subtype_t *cpusubtype)
-{
-    KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file;
-    
-    check(state);
-    check(cputype);
-    check(cpusubtype);
-
-    *cputype = hdr->cputype;
-    *cpusubtype = hdr->cpusubtype;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_state_export_kext_to_file(KXLDKext *kext, u_char **file, u_long *filesize, 
-    KXLDDict *strings, KXLDArray *tmps)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymtabIterator iter;
-    const KXLDSymtab *symtab = NULL;
-    const KXLDArray *vtables = NULL;
-    const KXLDVTable *vtable = NULL;
-    u_int nsyms = 0;
-    u_int nsymentries = 0;
-    u_int i = 0;
-    u_long strsize = 0;
-
-    check(kext);
-    check(file);
-    check(tmps);
-
-    bzero(&iter, sizeof(iter));
-
-    /* Get the vtables and symbol tables from the kext */
-
-    kxld_kext_get_vtables(kext, &vtables);
-    symtab = kxld_kext_get_symtab(kext);
-    require_action(symtab, finish, rval=KERN_FAILURE);
-
-    /* Count the number of symentries we'll need in the linkstate */
-
-    kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE);
-
-    nsyms = kxld_symtab_iterator_get_num_remaining(&iter);
-    nsymentries = nsyms;
-    for (i = 0; i < vtables->nitems; ++i) {
-        vtable = kxld_array_get_item(vtables, i);
-        nsymentries += vtable->entries.nitems;
-    }
-
-    /* Initialize the string index */
-
-    rval = init_string_index(strings, tmps, &iter, vtables, nsymentries, 
-        &strsize);
-    require_noerr(rval, finish);
-
-    /* Create the linkstate file */
-
-    rval = create_link_state(file, filesize, kext, &iter, vtables, 
-        strings, nsyms, nsymentries, strsize);
-    require_noerr(rval, finish);
-
-    /* Swap if necessary */
-
-#if !KERNEL
-    if (kxld_kext_target_needs_swap(kext)) unswap_link_state(*file);
-#endif /* !KERNEL */
-    
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-init_string_index(KXLDDict *strings, KXLDArray *tmps, KXLDSymtabIterator *iter,
-    const KXLDArray *vtables, u_int nsymentries, u_long *_strsize)
-{
-    kern_return_t rval = KERN_SUCCESS;
-    const KXLDSym *sym = NULL;
-    const KXLDVTable *vtable = NULL;
-    const KXLDVTableEntry *ventry = NULL;
-    u_long strsize = 0;
-    u_int tmpi = 0;
-    u_int i = 0;
-    u_int j = 0;
-
-    check(strings);
-    check(tmps);
-    check(iter);
-    check(vtables);
-    check(_strsize);
-
-    *_strsize = 0;
-
-    /* Initialize the string dictionary and string offset array */
-    
-    rval = kxld_dict_init(strings, kxld_dict_string_hash, kxld_dict_string_cmp,
-        nsymentries);
-    require_noerr(rval, finish);
-
-    rval = kxld_array_init(tmps, sizeof(u_long), nsymentries);
-    require_noerr(rval, finish);
-
-    /* Add all of the strings from the symbol table to the dictionary */
-
-    kxld_symtab_iterator_reset(iter);
-    while ((sym = kxld_symtab_iterator_get_next(iter))) {
-        rval = add_string_to_index(strings, sym->name, tmps, &tmpi, &strsize);
-        require_noerr(rval, finish);
-    }
-
-    /* Add all of the strings from the vtables entries to the dictionary */
-
-    for (i = 0; i < vtables->nitems; ++i) {
-        vtable = kxld_array_get_item(vtables, i);
-        rval = add_string_to_index(strings, vtable->name, tmps, &tmpi, &strsize);
-        require_noerr(rval, finish);
-
-        for (j = 0; j < vtable->entries.nitems; ++j) {
-            ventry = kxld_array_get_item(&vtable->entries, j);
-            if (ventry->patched.name) {
-                rval = add_string_to_index(strings, ventry->patched.name, tmps, 
-                    &tmpi, &strsize);
-                require_noerr(rval, finish);
-            }
-        }
-    }
-
-    *_strsize = strsize;
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-add_string_to_index(KXLDDict *strings, const char *str, KXLDArray *tmps,
-    u_int *tmpi, u_long *stroff)
-{
-    kern_return_t rval = KERN_FAILURE;
-    u_long *tmpp = NULL;
-
-    if (!kxld_dict_find(strings, str)) {
-        tmpp = kxld_array_get_item(tmps, (*tmpi)++);
-        *tmpp = *stroff;
-        
-        rval = kxld_dict_insert(strings, str, tmpp);
-        require_noerr(rval, finish);
-    
-        *stroff += strlen(str) + 1;
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-state_is_32_bit(KXLDLinkStateHdr *state)
-{
-    return kxld_is_32_bit(state->cputype);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-create_link_state(u_char **_file, u_long *_filesize, const KXLDKext *kext,
-    KXLDSymtabIterator *iter, const KXLDArray *vtables, KXLDDict *strings, 
-    u_int nsyms, u_int nsymentries, u_long strsize)
-{
-    kern_return_t rval = KERN_SUCCESS;
-    u_char *file = NULL;
-    KXLDLinkStateHdr *hdr = NULL;
-    KXLDDictIterator striter;
-#if KXLD_USER_OR_OBJECT
-    KXLDSectionName *dstsectname = NULL;
-    KXLDSectionName *srcsectname = NULL;
-    const KXLDArray *section_order = NULL;
-    u_int i = 0;
-#endif
-    const char *name = NULL;
-    char *dstname = NULL;
-    u_long *stridx = 0;
-    u_long hsize = 0;
-    u_long dsize = 0;
-    u_long filesize = 0;
-    u_long hoff = 0;
-    u_long doff = 0;
-    u_long stroff = 0;
-
-    check(_file);
-    check(iter);
-    check(vtables);
-    check(strings);
-
-    *_file = NULL;
-    *_filesize = 0;
-
-#if KXLD_USER_OR_OBJECT
-    section_order = kxld_kext_get_section_order(kext);
-#endif
-
-    /* Calculate header and data size */
-
-    hsize = sizeof(KXLDLinkStateHdr);
-    hsize += vtables->nitems * sizeof(KXLDVTableHdr);
-#if KXLD_USER_OR_OBJECT
-    if (section_order) {
-        hsize += section_order->nitems * sizeof(KXLDSectionName);
-    }
-#endif
-
-    if (kxld_kext_is_32_bit(kext)) {
-        dsize = nsymentries * sizeof(KXLDSymEntry32);
-    } else {
-        dsize = nsymentries * sizeof(KXLDSymEntry64);
-    }
-
-    filesize = hsize + dsize + strsize;
-
-    hoff = 0;
-    doff = hsize;
-    stroff = hsize + dsize;
-
-    /* Allocate the link state */
-
-    file = kxld_alloc_pageable(filesize);
-    require_action(file, finish, rval=KERN_RESOURCE_SHORTAGE);
-
-    /* Initialize link state header */
-
-    hdr = (KXLDLinkStateHdr *) file;
-    hoff += sizeof(*hdr); 
-
-    if (state_is_32_bit(hdr)) {
-        hdr->magic = LINK_STATE_MAGIC;
-    } else {
-        hdr->magic = LINK_STATE_MAGIC_64;
-    }
-    hdr->version = LINK_STATE_VERSION;
-    kxld_kext_get_cputype(kext, &hdr->cputype, &hdr->cpusubtype);
-    hdr->nsects = 0;
-    hdr->nvtables = vtables->nitems;
-    hdr->nsyms = nsyms;
-
-#if KXLD_USER_OR_OBJECT
-    if (section_order) {
-        hdr->nsects = section_order->nitems;
-        hdr->sectoff = (uint32_t) hoff;
-
-        dstsectname = (KXLDSectionName *) (file + hoff);
-        hoff += section_order->nitems * sizeof(*dstsectname);
-
-        for (i = 0; i < section_order->nitems; ++i, ++dstsectname) {
-            srcsectname = kxld_array_get_item(section_order, i);
-            memcpy(dstsectname, srcsectname, sizeof(*srcsectname));
-        }
-    }
-#endif
-
-    hdr->voff = (uint32_t) hoff;
-    hdr->symoff = (uint32_t) doff;
-
-    /* Copy strings */
-    
-    kxld_dict_iterator_init(&striter, strings);
-    kxld_dict_iterator_get_next(&striter, (const void **) &name, (void **) &stridx);
-    while (name) {
-        *stridx += stroff;
-        dstname = (char *) (file + *stridx);
-        strlcpy(dstname, name, filesize - *stridx);
-        kxld_dict_iterator_get_next(&striter, (const void **) &name, (void **) &stridx);
-    }
-
-    /* Copy symbols */
-
-    KXLD_3264_FUNC(state_is_32_bit(hdr), rval,
-        copy_symbols_32, copy_symbols_64,
-        file, &doff, iter, strings);
-    require_noerr(rval, finish);
-
-    /* Copy vtables */
-
-    KXLD_3264_FUNC(state_is_32_bit(hdr), rval,
-        copy_vtables_32, copy_vtables_64,
-        file, &hoff, &doff, vtables, strings);
-    require_noerr(rval, finish);
-
-    *_file = file;
-    *_filesize = filesize;
-    file = NULL;
-    rval = KERN_SUCCESS;
-
-finish:
-
-    if (file) {
-        kxld_page_free(file, filesize);
-        file = NULL;
-    }
-
-    return rval;
-}
-
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-copy_symbols_32(u_char *file, u_long *data_offset, KXLDSymtabIterator *iter, 
-    const KXLDDict *strings)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymEntry32 *symentry = NULL;
-    const KXLDSym *sym = NULL;
-    u_long *stridx = 0;
-
-    kxld_symtab_iterator_reset(iter);
-    while ((sym = kxld_symtab_iterator_get_next(iter))) {
-        symentry = (KXLDSymEntry32 *) (file + *data_offset);
-        stridx = kxld_dict_find(strings, sym->name);
-        require_action(stridx, finish, rval=KERN_FAILURE);
-
-        /* Initialize the symentry */
-
-        symentry->nameoff = (uint32_t) *stridx;
-        if (sym->predicates.is_thumb) {
-            symentry->addr = (uint32_t) sym->link_addr | 1;
-        } else {            
-            symentry->addr = (uint32_t) sym->link_addr;
-        }
-        symentry->flags = 0;
-
-        /* Set any flags */
-
-        symentry->flags |= (kxld_sym_is_obsolete(sym)) ? KXLD_SYM_OBSOLETE : 0;
-
-        *data_offset += sizeof(*symentry);
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-copy_symbols_64(u_char *file, u_long *data_offset, KXLDSymtabIterator *iter, 
-    const KXLDDict *strings)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymEntry64 *symentry = NULL;
-    const KXLDSym *sym = NULL;
-    u_long *stridx = 0;
-
-    kxld_symtab_iterator_reset(iter);
-    while ((sym = kxld_symtab_iterator_get_next(iter))) {
-        symentry = (KXLDSymEntry64 *) (file + *data_offset);
-        stridx = kxld_dict_find(strings, sym->name);
-        require_action(stridx, finish, rval=KERN_FAILURE);
-
-        /* Initialize the symentry */
-
-        symentry->nameoff = (uint32_t) *stridx;
-        symentry->addr = (uint64_t) sym->link_addr;
-        symentry->flags = 0;
-
-        /* Set any flags */
-
-        symentry->flags |= (kxld_sym_is_obsolete(sym)) ? KXLD_SYM_OBSOLETE : 0;
-
-        *data_offset += sizeof(*symentry);
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-copy_vtables_32(u_char *file, u_long *header_offset, u_long *data_offset,
-    const KXLDArray *vtables, const KXLDDict *strings)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDVTable *vtable = NULL;
-    KXLDVTableHdr *vhdr = NULL;
-    KXLDVTableEntry *ventry = NULL;
-    KXLDSymEntry32 *symentry = NULL;
-    u_long *stridx = 0;
-    u_int i = 0;
-    u_int j = 0;
-
-    for (i = 0; i < vtables->nitems; ++i) {
-        vtable = kxld_array_get_item(vtables, i);
-        stridx = kxld_dict_find(strings, vtable->name);
-        require_action(stridx, finish, rval=KERN_FAILURE);
-
-        vhdr = (KXLDVTableHdr *) (file + *header_offset);
-        vhdr->nameoff = (uint32_t) *stridx;
-        vhdr->nentries = vtable->entries.nitems;
-        vhdr->vtableoff = (uint32_t) (*data_offset);
-
-        *header_offset += sizeof(*vhdr);
-
-        for(j = 0; j < vtable->entries.nitems; ++j) {
-
-            ventry = kxld_array_get_item(&vtable->entries, j);
-            symentry = (KXLDSymEntry32 *) (file + *data_offset);
-            
-            if (ventry->patched.name) {
-                stridx = kxld_dict_find(strings, ventry->patched.name);
-                require_action(stridx, finish, rval=KERN_FAILURE);
-
-                symentry->nameoff = (uint32_t) *stridx;
-                symentry->addr = (uint32_t) ventry->patched.addr;
-            } else {
-                symentry->nameoff = 0;
-                symentry->addr = 0;
-            }
-
-            *data_offset += sizeof(*symentry);
-        }
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
-/*******************************************************************************
-*******************************************************************************/
-static kern_return_t
-copy_vtables_64(u_char *file, u_long *header_offset, u_long *data_offset,
-    const KXLDArray *vtables, const KXLDDict *strings)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDVTable *vtable = NULL;
-    KXLDVTableHdr *vhdr = NULL;
-    KXLDVTableEntry *ventry = NULL;
-    KXLDSymEntry64 *symentry = NULL;
-    u_long *stridx = 0;
-    u_int i = 0;
-    u_int j = 0;
-
-    for (i = 0; i < vtables->nitems; ++i) {
-        vtable = kxld_array_get_item(vtables, i);
-        stridx = kxld_dict_find(strings, vtable->name);
-        require_action(stridx, finish, rval=KERN_FAILURE);
-
-        vhdr = (KXLDVTableHdr *) (file + *header_offset);
-        vhdr->nameoff = (uint32_t) *stridx;
-        vhdr->nentries = vtable->entries.nitems;
-        vhdr->vtableoff = (uint32_t) (*data_offset);
-
-        *header_offset += sizeof(*vhdr);
-
-        for(j = 0; j < vtable->entries.nitems; ++j) {
-
-            ventry = kxld_array_get_item(&vtable->entries, j);
-            symentry = (KXLDSymEntry64 *) (file + *data_offset);
-            
-            if (ventry->patched.name) {
-                stridx = kxld_dict_find(strings, ventry->patched.name);
-                require_action(stridx, finish, rval=KERN_FAILURE);
-
-                symentry->nameoff = (uint32_t) *stridx;
-                symentry->addr = (uint64_t) ventry->patched.addr;
-            } else {
-                symentry->nameoff = 0;
-                symentry->addr = 0;
-            }
-
-            *data_offset += sizeof(*symentry);
-        }
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-#if !KERNEL
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-swap_link_state(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic == CIGAM_ETATS_KNIL) {
-        swap_link_state_32(state);
-        return TRUE;
-    } else if (state_hdr->magic == CIGAM_ETATS_KNIL_64) {
-        swap_link_state_64(state);
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_link_state_32(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = NULL;
-    KXLDVTableHdr *vtable_hdr = NULL;
-    KXLDSymEntry32 *entry = NULL;
-    u_int i = 0;
-    u_int j = 0;
-    
-    state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic != CIGAM_ETATS_KNIL) return;
-
-    /* Swap the header */
-    swap_state_hdr(state_hdr);
-
-    /* Swap the symbols */
-    entry = (KXLDSymEntry32 *) (state + state_hdr->symoff);
-    for (i = 0; i < state_hdr->nsyms; ++i, ++entry) {
-        swap_sym_entry_32(entry);
-    }
-
-    /* Swap the vtable headers and entries */
-    vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff);
-    for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) {
-        swap_vtable_hdr(vtable_hdr);
-
-        entry = (KXLDSymEntry32 *) (state + vtable_hdr->vtableoff);
-        for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) {
-            swap_sym_entry_32(entry);
-        }
-    }
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_link_state_64(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = NULL;
-    KXLDVTableHdr *vtable_hdr = NULL;
-    KXLDSymEntry64 *entry = NULL;
-    u_int i = 0;
-    u_int j = 0;
-    
-    state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic != CIGAM_ETATS_KNIL_64) return;
-
-    /* Swap the header */
-    swap_state_hdr(state_hdr);
-
-    /* Swap the symbols */
-    entry = (KXLDSymEntry64 *) (state + state_hdr->symoff);
-    for (i = 0; i < state_hdr->nsyms; ++i, ++entry) {
-        swap_sym_entry_64(entry);
-    }
-
-    /* Swap the vtable headers and entries */
-    vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff);
-    for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) {
-        swap_vtable_hdr(vtable_hdr);
-
-        entry = (KXLDSymEntry64 *) (state + vtable_hdr->vtableoff);
-        for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) {
-            swap_sym_entry_64(entry);
-        }
-    }
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static boolean_t
-unswap_link_state(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic == LINK_STATE_MAGIC) {
-        unswap_link_state_32(state);
-        return TRUE;
-    } else if (state_hdr->magic == LINK_STATE_MAGIC_64) {
-        unswap_link_state_64(state);
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-unswap_link_state_32(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = NULL;
-    KXLDVTableHdr *vtable_hdr = NULL;
-    KXLDSymEntry32 *entry = NULL;
-    u_int i = 0;
-    u_int j = 0;
-    
-    state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic != LINK_STATE_MAGIC) return;
-
-    /* Unswap the vtables and their headers */
-    vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff);
-    for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) {
-        entry = (KXLDSymEntry32 *) (state + vtable_hdr->vtableoff);
-        for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) {
-            swap_sym_entry_32(entry);
-        }
-
-        swap_vtable_hdr(vtable_hdr);
-    }
-
-    /* Unswap the symbols themselves */
-    entry = (KXLDSymEntry32 *) (state + state_hdr->symoff);
-    for (i = 0; i < state_hdr->nsyms; ++i, ++entry) {
-        swap_sym_entry_32(entry);
-    }
-
-    /* Unswap the header */
-    swap_state_hdr(state_hdr);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-unswap_link_state_64(u_char *state)
-{
-    KXLDLinkStateHdr *state_hdr = NULL;
-    KXLDVTableHdr *vtable_hdr = NULL;
-    KXLDSymEntry64 *entry = NULL;
-    u_int i = 0;
-    u_int j = 0;
-    
-    state_hdr = (KXLDLinkStateHdr *) state;
-
-    if (state_hdr->magic != LINK_STATE_MAGIC_64) return;
-
-    /* Unswap the vtables and their headers */
-    vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff);
-    for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) {
-        entry = (KXLDSymEntry64 *) (state + vtable_hdr->vtableoff);
-        for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) {
-            swap_sym_entry_64(entry);
-        }
-
-        swap_vtable_hdr(vtable_hdr);
-    }
-
-    /* Unswap the symbols themselves */
-    entry = (KXLDSymEntry64 *) (state + state_hdr->symoff);
-    for (i = 0; i < state_hdr->nsyms; ++i, ++entry) {
-        swap_sym_entry_64(entry);
-    }
-
-    /* Unswap the header */
-    swap_state_hdr(state_hdr);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_state_hdr(KXLDLinkStateHdr *state_hdr)
-{
-    state_hdr->magic = OSSwapInt32(state_hdr->magic);
-    state_hdr->version = OSSwapInt32(state_hdr->version);
-    state_hdr->cputype = OSSwapInt32(state_hdr->cputype);
-    state_hdr->cpusubtype = OSSwapInt32(state_hdr->cpusubtype);
-    state_hdr->nsects = OSSwapInt32(state_hdr->nsects);
-    state_hdr->sectoff = OSSwapInt32(state_hdr->sectoff);
-    state_hdr->nvtables = OSSwapInt32(state_hdr->nvtables);
-    state_hdr->voff = OSSwapInt32(state_hdr->voff);
-    state_hdr->nsyms = OSSwapInt32(state_hdr->nsyms);
-    state_hdr->symoff = OSSwapInt32(state_hdr->symoff);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_vtable_hdr(KXLDVTableHdr *vtable_hdr)
-{
-    vtable_hdr->nameoff = OSSwapInt32(vtable_hdr->nameoff);
-    vtable_hdr->vtableoff = OSSwapInt32(vtable_hdr->vtableoff);
-    vtable_hdr->nentries = OSSwapInt32(vtable_hdr->nentries);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_sym_entry_32(KXLDSymEntry32 *entry)
-{
-    entry->nameoff = OSSwapInt32(entry->nameoff);
-    entry->addr = OSSwapInt32(entry->addr);
-}
-
-/*******************************************************************************
-*******************************************************************************/
-static void
-swap_sym_entry_64(KXLDSymEntry64 *entry)
-{
-    entry->nameoff = OSSwapInt32(entry->nameoff);
-    entry->addr = OSSwapInt64(entry->addr);
-}
-#endif /* !KERNEL */
-
diff --git a/libkern/kxld/kxld_state.h b/libkern/kxld/kxld_state.h
deleted file mode 100644
index 22878159c..000000000
--- a/libkern/kxld/kxld_state.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _KXLD_STATE_H_
-#define _KXLD_STATE_H_
-
-#include <sys/types.h>
-#if KERNEL
-    #include <libkern/kxld_types.h>
-#else 
-    #include "kxld_types.h"
-#endif
-
-#include "kxld_array.h"
-#include "kxld_util.h"
-
-struct kxld_dict;
-struct kxld_kext;
-struct kxld_link_state_hdr;
-typedef struct kxld_state KXLDState;
-typedef struct kxld_link_state_hdr KXLDLinkStateHdr;
-typedef struct kxld_vtable_hdr KXLDVTableHdr;
-typedef struct kxld_sym_entry_32 KXLDSymEntry32;
-typedef struct kxld_sym_entry_64 KXLDSymEntry64;
-
-struct kxld_state {
-    u_char *file;
-    KXLDArray vtables;
-    boolean_t swap;
-};
-
-/* 
- * The format of the link state object is as follows:
- 
-   *      Field            ***       Type           *
-   **************************************************
-   * Link state header     *** KXLDLinkStateHdr     *
-   **************************************************
-   * Section order entries *** KXLDSectionName      *
-   **************************************************
-   * Vtable headers        *** KXLDVTableHdr        *
-   **************************************************
-   * VTables               *** KXLDSymEntry[32|64]  *
-   **************************************************
-   * Exported symbols      *** KXLDSymEntry[32|64]  *
-   **************************************************
-   * String table          *** char[]               *
-   **************************************************
-   
- */
-
-struct kxld_link_state_hdr {
-    uint32_t magic;
-    uint32_t version;
-    cpu_type_t cputype;
-    cpu_subtype_t cpusubtype;
-    uint32_t nsects;
-    uint32_t sectoff;
-    uint32_t nvtables;
-    uint32_t voff;
-    uint32_t nsyms;
-    uint32_t symoff;
-};
-
-struct kxld_vtable_hdr {
-    uint32_t nameoff;
-    uint32_t vtableoff;
-    uint32_t nentries;
-};
-
-struct kxld_sym_entry_32 {
-    uint32_t addr;
-    uint32_t nameoff;
-    uint32_t flags;
-};
-
-struct kxld_sym_entry_64 {
-    uint64_t addr;
-    uint32_t nameoff;
-    uint32_t flags;
-} __attribute__((aligned(16)));
-
-#define KXLD_SYM_OBSOLETE 0x1
-
-/*******************************************************************************
-* Constructors and destructors
-*******************************************************************************/
-
-kern_return_t kxld_state_init_from_file(KXLDState *state, u_char *file,
-    KXLDArray *section_order)
-    __attribute__((nonnull(1,2), visibility("hidden")));
-
-void kxld_state_clear(KXLDState *state)
-    __attribute__((nonnull, visibility("hidden")));
-
-void kxld_state_deinit(KXLDState *state)
-    __attribute__((nonnull, visibility("hidden")));
-
-/*******************************************************************************
-* Accessors
-*******************************************************************************/
-
-u_int kxld_state_get_num_symbols(KXLDState *state)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-kern_return_t kxld_state_get_symbols(KXLDState *state, 
-    struct kxld_dict *defined_symbols,
-    struct kxld_dict *obsolete_symbols)
-    __attribute__((nonnull, visibility("hidden")));
-
-u_int kxld_state_get_num_vtables(KXLDState *state)
-    __attribute__((pure, nonnull, visibility("hidden")));
-
-kern_return_t kxld_state_get_vtables(KXLDState *state,
-    struct kxld_dict *patched_vtables)
-    __attribute__((nonnull, visibility("hidden")));
-
-void kxld_state_get_cputype(const KXLDState *state,
-    cpu_type_t *cputype, cpu_subtype_t *cpusubtype)
-    __attribute__((nonnull, visibility("hidden")));
-
-/*******************************************************************************
-* Exporters
-*******************************************************************************/
-
-kern_return_t kxld_state_export_kext_to_file(struct kxld_kext *kext, u_char **file,
-    u_long *filesize, struct kxld_dict *tmpdict, KXLDArray *tmps)
-    __attribute__((nonnull, visibility("hidden")));
-
-#endif /* _KXLD_STATE_H_ */
-
diff --git a/libkern/kxld/kxld_stubs.c b/libkern/kxld/kxld_stubs.c
index 511e82a10..2b10ce687 100644
--- a/libkern/kxld/kxld_stubs.c
+++ b/libkern/kxld/kxld_stubs.c
@@ -30,6 +30,7 @@
  * These kxld stubs panic if the kernel is built without kxld support but
  * something tries to use it anyway.
  */
+#if KERNEL
 
 #if !CONFIG_KXLD
 
@@ -56,19 +57,15 @@ kxld_destroy_context(KXLDContext *context __unused)
 
 kern_return_t
 kxld_link_file(
-    KXLDContext *context __unused,
-    u_char *file __unused,
-    u_long size __unused,
-    const char *name,
-    void *callback_data __unused,
-    u_char **deps __unused,
-    u_int ndeps __unused,
-    u_char **_linked_object __unused,
-    kxld_addr_t *kmod_info_kern __unused,
-    u_char **_link_state __unused,
-    u_long *_link_state_size __unused,
-    u_char **_symbol_file __unused,
-    u_long *_symbol_file_size __unused)
+    KXLDContext       * context                 __unused,
+    u_char            * file                    __unused,
+    u_long              size                    __unused,
+    const char        * name                    __unused,
+    void              * callback_data           __unused,
+    KXLDDependency    * dependencies            __unused,
+    u_int               ndependencies           __unused,
+    u_char           ** linked_object_out       __unused,
+    kxld_addr_t       * kmod_info_kern          __unused)
 {
     panic("%s (%s) called in kernel without kxld support", __PRETTY_FUNCTION__, name);
     return KERN_SUCCESS;
@@ -81,3 +78,5 @@ kxld_validate_copyright_string(const char *str __unused)
 }
 
 #endif
+
+#endif /* KERNEL */
diff --git a/libkern/kxld/kxld_sym.c b/libkern/kxld/kxld_sym.c
index 5dbd6b860..2e9cb16e9 100644
--- a/libkern/kxld/kxld_sym.c
+++ b/libkern/kxld/kxld_sym.c
@@ -2,7 +2,7 @@
  * Copyright (c) 2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -11,10 +11,10 @@
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- *
+ * 
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -22,7 +22,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- *
+ * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <string.h>
@@ -146,7 +146,7 @@ kxld_sym_init_absolute(KXLDSym *sym, char *name, kxld_addr_t link_addr)
     sym->sectnum = NO_SECT;
 
     init_predicates(sym, N_ABS | N_EXT, 0);
-    sym->predicates.is_resolved = TRUE;
+    sym->is_resolved = TRUE;
 }
 
 /*******************************************************************************
@@ -160,7 +160,7 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
 
     /* The type field is interpreted differently for normal symbols and stabs */
     if (n_type & N_STAB) {
-        sym->predicates.is_stab = 1;
+        sym->is_stab = 1;
 
         switch (n_type) {
         /* Labeled as NO_SECT in stab.h */
@@ -180,7 +180,7 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
         case N_LENG:
         case N_OPT:
         case N_OSO:
-            sym->predicates.is_absolute = 1;
+            sym->is_absolute = 1;
             break;
         /* Labeled as n_sect in stab.h */
         case N_FUN:
@@ -198,9 +198,9 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
          * section-based on OS X.  We must mark them as such so they get
          * relocated.
          */
-        case N_LBRAC:
         case N_RBRAC:
-            sym->predicates.is_section = 1;
+        case N_LBRAC:
+            sym->is_section = 1;
             break;
         default:
             rval = KERN_FAILURE;
@@ -214,42 +214,25 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
     } else {
         u_char type = n_type & N_TYPE;
 
-        /* Set the type-independent fields */
-        if ((n_type & N_EXT) && !(n_type & N_PEXT)) {
-            sym->predicates.is_external = 1;
-        }
-
-        if (n_desc & N_DESC_DISCARDED) {
-            sym->predicates.is_obsolete = 1;
-        }
-
-        if (n_desc & N_WEAK_REF) {
-            sym->predicates.is_weak = 1;
-        }
-
-        if (n_desc & N_ARM_THUMB_DEF) {
-            sym->predicates.is_thumb = 1;
-        }
-
         /* The first set of type fields are mutually exclusive, so they can be
          * set with a switch statement.
          */
         switch (type) {
         case N_ABS:
-            sym->predicates.is_absolute = 1;
+            sym->is_absolute = 1;
             break;
         case N_SECT:
-            sym->predicates.is_section = 1;
+            sym->is_section = 1;
             break;
         case N_UNDF:
             if (sym->base_addr) {
-                sym->predicates.is_common = 1;
+                sym->is_common = 1;
             } else {
-                sym->predicates.is_undefined = 1;
+                sym->is_undefined = 1;
             }
             break;
         case N_INDR:
-            sym->predicates.is_indirect = 1;
+            sym->is_indirect = 1;
             break;
         default:
             rval = KERN_FAILURE;
@@ -258,28 +241,47 @@ init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc)
             goto finish;
         }
 
+        /* Set the type-independent fields */
+        if ((n_type & N_EXT) && !(n_type & N_PEXT)) {
+            sym->is_external = 1;
+        }
+
+        if (n_desc & N_DESC_DISCARDED) {
+            sym->is_obsolete = 1;
+        }
+
+        if (n_desc & N_WEAK_REF) {
+           sym->is_weak = 1;
+        }
+
+        if (n_desc & N_ARM_THUMB_DEF) {
+           sym->is_thumb = 1;
+           sym->base_addr |= 1;
+           sym->link_addr |= 1;
+        }
+
         /* Set the C++-specific fields */
-        if ((0 == strncmp(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX)))) {
-            sym->predicates.is_cxx = 1;
+        if ((streq_safe(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX)))) {
+            sym->is_cxx = 1;
 
-            if (0 == strncmp(sym->name, METACLASS_VTABLE_PREFIX, 
+            if (streq_safe(sym->name, METACLASS_VTABLE_PREFIX, 
                 const_strlen(METACLASS_VTABLE_PREFIX)))
             {
-                sym->predicates.is_meta_vtable = 1;
-            } else if (0 == strncmp(sym->name, VTABLE_PREFIX, 
+                sym->is_meta_vtable = 1;
+            } else if (streq_safe(sym->name, VTABLE_PREFIX, 
                 const_strlen(VTABLE_PREFIX))) 
             {
-                sym->predicates.is_class_vtable = 1;
+                sym->is_class_vtable = 1;
             } else if (kxld_strstr(sym->name, RESERVED_TOKEN)) {
-                sym->predicates.is_padslot = 1;
+                sym->is_padslot = 1;
             } else if (kxld_strstr(sym->name, METACLASS_TOKEN)) {
-                sym->predicates.is_metaclass = 1;
+                sym->is_metaclass = 1;
             } else if (kxld_strstr(sym->name, SUPER_METACLASS_POINTER_TOKEN)) {
-                sym->predicates.is_super_metaclass_pointer = 1;
+                sym->is_super_metaclass_pointer = 1;
             }
-        } else if (streq_safe(CXX_PURE_VIRTUAL, sym->name, sizeof(CXX_PURE_VIRTUAL))) {
-            sym->predicates.is_cxx = 1;
-            sym->predicates.is_pure_virtual = 1;
+        } else if (kxld_sym_name_is_pure_virtual(sym->name)) {
+            sym->is_cxx = 1;
+            sym->is_pure_virtual = 1;
         }
     }
 
@@ -305,8 +307,8 @@ init_sym_sectnum(KXLDSym *sym, u_int n_sect)
             /* Convert the section number to an index into the section index */
             sym->sectnum = n_sect - 1;
         } else {
-            sym->predicates.is_absolute = 1;
-            sym->predicates.is_section = 0;
+            sym->is_absolute = 1;
+            sym->is_section = 0;
         }
     }
 
@@ -338,7 +340,7 @@ kxld_sym_is_absolute(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_absolute);
+    return (0 != sym->is_absolute);
 }
 
 /*******************************************************************************
@@ -348,7 +350,7 @@ kxld_sym_is_section(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_section);
+    return (0 != sym->is_section);
 }
 
 /*******************************************************************************
@@ -359,7 +361,7 @@ kxld_sym_is_defined(const KXLDSym *sym)
     check(sym);
 
     return ((kxld_sym_is_absolute(sym) || kxld_sym_is_section(sym)) && 
-        !sym->predicates.is_replaced);
+        !kxld_sym_is_replaced(sym));
 }
 
 
@@ -370,7 +372,7 @@ kxld_sym_is_defined_locally(const KXLDSym *sym)
 {
     check(sym);
 
-    return (kxld_sym_is_defined(sym) && !sym->predicates.is_resolved);
+    return (kxld_sym_is_defined(sym) && !sym->is_resolved);
 }
 
 /*******************************************************************************
@@ -380,7 +382,7 @@ kxld_sym_is_external(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_external);
+    return (0 != sym->is_external);
 }
 
 /*******************************************************************************
@@ -400,7 +402,7 @@ kxld_sym_is_undefined(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_undefined);
+    return (0 != sym->is_undefined);
 }
 
 /*******************************************************************************
@@ -410,7 +412,17 @@ kxld_sym_is_indirect(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_indirect);
+    return (0 != sym->is_indirect);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t
+kxld_sym_is_replaced(const KXLDSym *sym)
+{
+    check(sym);
+
+    return (0 != sym->is_replaced);
 }
 
 /*******************************************************************************
@@ -420,7 +432,7 @@ kxld_sym_is_common(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_common);
+    return (0 != sym->is_common);
 }
 
 /*******************************************************************************
@@ -428,7 +440,7 @@ kxld_sym_is_common(const KXLDSym *sym)
 boolean_t
 kxld_sym_is_unresolved(const KXLDSym *sym)
 {
-    return ((kxld_sym_is_undefined(sym) && !sym->predicates.is_replaced) ||
+    return ((kxld_sym_is_undefined(sym) && !kxld_sym_is_replaced(sym)) ||
             kxld_sym_is_indirect(sym) || kxld_sym_is_common(sym));
 }
 
@@ -437,7 +449,7 @@ kxld_sym_is_unresolved(const KXLDSym *sym)
 boolean_t
 kxld_sym_is_obsolete(const KXLDSym *sym)
 {
-    return (0 != sym->predicates.is_obsolete);
+    return (0 != sym->is_obsolete);
 }
 
 #if KXLD_USER_OR_GOT
@@ -448,7 +460,7 @@ kxld_sym_is_got(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_got);
+    return (0 != sym->is_got);
 }
 #endif /* KXLD_USER_OR_GOT */
 
@@ -459,7 +471,7 @@ kxld_sym_is_stab(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_stab);
+    return (0 != sym->is_stab);
 }
 
 /*******************************************************************************
@@ -469,7 +481,7 @@ kxld_sym_is_weak(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_weak);
+    return (0 != sym->is_weak);
 }
 
 /*******************************************************************************
@@ -479,7 +491,7 @@ kxld_sym_is_cxx(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_cxx);
+    return (0 != sym->is_cxx);
 }
 
 /*******************************************************************************
@@ -487,7 +499,7 @@ kxld_sym_is_cxx(const KXLDSym *sym)
 boolean_t
 kxld_sym_is_pure_virtual(const KXLDSym *sym)
 {
-    return (0 != sym->predicates.is_pure_virtual);
+    return (0 != sym->is_pure_virtual);
 }
 
 /*******************************************************************************
@@ -507,7 +519,7 @@ kxld_sym_is_class_vtable(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_class_vtable);
+    return (0 != sym->is_class_vtable);
 }
 
 /*******************************************************************************
@@ -517,7 +529,7 @@ kxld_sym_is_metaclass_vtable(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_meta_vtable);
+    return (0 != sym->is_meta_vtable);
 }
 
 /*******************************************************************************
@@ -527,7 +539,7 @@ kxld_sym_is_padslot(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_padslot);
+    return (0 != sym->is_padslot);
 }
 
 /*******************************************************************************
@@ -537,7 +549,7 @@ kxld_sym_is_metaclass(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_metaclass);
+    return (0 != sym->is_metaclass);
 }
 
 /*******************************************************************************
@@ -547,7 +559,15 @@ kxld_sym_is_super_metaclass_pointer(const KXLDSym *sym)
 {
     check(sym);
 
-    return (0 != sym->predicates.is_super_metaclass_pointer);
+    return (0 != sym->is_super_metaclass_pointer);
+}
+
+/*******************************************************************************
+*******************************************************************************/
+boolean_t
+kxld_sym_name_is_pure_virtual(const char *name)
+{
+    return streq_safe(CXX_PURE_VIRTUAL, name, sizeof(CXX_PURE_VIRTUAL));
 }
 
 /*******************************************************************************
@@ -813,7 +833,7 @@ finish:
 void
 kxld_sym_set_got(KXLDSym *sym)
 {
-    sym->predicates.is_got = 1;
+    sym->is_got = 1;
 }
 #endif /* KXLD_USER_OR_GOT */
 
@@ -833,7 +853,7 @@ kxld_sym_relocate(KXLDSym *sym, const KXLDSect *sect)
 *******************************************************************************/
 kern_return_t
 kxld_sym_export_macho_32(const KXLDSym *sym, u_char *_nl, char *strtab, 
-    u_long *stroff, u_long strsize, boolean_t is_link_state)
+    u_long *stroff, u_long strsize)
 {
     kern_return_t rval = KERN_FAILURE;
     struct nlist *nl = (struct nlist *) _nl;
@@ -849,17 +869,14 @@ kxld_sym_export_macho_32(const KXLDSym *sym, u_char *_nl, char *strtab,
     require_action((u_long)bytes <= strsize - *stroff, finish,
         rval = KERN_FAILURE);
 
-    if (is_link_state) {
-        nl->n_type = N_ABS | N_EXT;
-        nl->n_sect = NO_SECT;
-        nl->n_desc = 0;
-    } else {
-        nl->n_type = sym->type;
-        nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0;
-        nl->n_desc = sym->desc;
-    }
+    nl->n_type = sym->type;
+    nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0;
+    nl->n_desc = sym->desc;
     nl->n_un.n_strx = (uint32_t) *stroff;
     nl->n_value = (uint32_t) sym->link_addr;
+    if (sym->is_thumb) {
+        nl->n_value &= ~0x1U;
+    }
 
     str = (char *) (strtab + *stroff);
     strlcpy(str, sym->name, strsize - *stroff);
@@ -877,7 +894,7 @@ finish:
 *******************************************************************************/
 kern_return_t
 kxld_sym_export_macho_64(const KXLDSym *sym, u_char *_nl, char *strtab,
-    u_long *stroff, u_long strsize, boolean_t is_link_state)
+    u_long *stroff, u_long strsize)
 {
     kern_return_t rval = KERN_FAILURE;
     struct nlist_64 *nl = (struct nlist_64 *) _nl;
@@ -893,17 +910,14 @@ kxld_sym_export_macho_64(const KXLDSym *sym, u_char *_nl, char *strtab,
     require_action((u_long)bytes <= strsize - *stroff, finish,
         rval = KERN_FAILURE);
 
-    if (is_link_state) {
-        nl->n_type = N_ABS | N_EXT;
-        nl->n_sect = NO_SECT;
-        nl->n_desc = 0;
-    } else {
-        nl->n_type = sym->type;
-        nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0;
-        nl->n_desc = sym->desc;
-    }
+    nl->n_type = sym->type;
+    nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0;
+    nl->n_desc = sym->desc;
     nl->n_un.n_strx = (uint32_t) *stroff;
     nl->n_value = (uint64_t) sym->link_addr;
+    if (sym->is_thumb) {
+        nl->n_value &= ~0x1ULL;
+    }
 
     str = (char *) (strtab + *stroff);
     strlcpy(str, sym->name, strsize - *stroff);
@@ -919,7 +933,7 @@ finish:
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-kxld_sym_resolve(KXLDSym *sym, kxld_addr_t addr, boolean_t export_sym) 
+kxld_sym_resolve(KXLDSym *sym, kxld_addr_t addr) 
 {
     kern_return_t rval = KERN_FAILURE;
 
@@ -934,18 +948,16 @@ kxld_sym_resolve(KXLDSym *sym, kxld_addr_t addr, boolean_t export_sym)
     sym->type = N_ABS | N_EXT;
     sym->sectnum = NO_SECT;
  
-    /* Set the predicate bits for an externally resolved symbol.  We re-export
-     * indirect symbols and any symbols that the caller wants re-exported (for
-     * example, symbols from a pseudo-kext). */
+    /* Set the predicate bits for an externally resolved symbol. */
     
-    sym->predicates.is_external = TRUE;
-    sym->predicates.is_absolute = TRUE;
-    sym->predicates.is_resolved = !(kxld_sym_is_indirect(sym) || export_sym);
-  
+    sym->is_external = TRUE;
+    sym->is_absolute = TRUE;
+    sym->is_resolved = TRUE;
+
     /* Clear the predicate bits for types that can be resolved */
 
-    sym->predicates.is_undefined = FALSE;
-    sym->predicates.is_indirect = FALSE;
+    sym->is_undefined = FALSE;
+    sym->is_indirect = FALSE;
 
     rval = KERN_SUCCESS;
 
@@ -973,12 +985,12 @@ kxld_sym_resolve_common(KXLDSym *sym, u_int sectnum, kxld_addr_t base_addr)
     sym->sectnum = sectnum;
     sym->desc = 0;
 
-    sym->predicates.is_absolute = FALSE;
-    sym->predicates.is_section = TRUE;
-    sym->predicates.is_undefined = FALSE;
-    sym->predicates.is_indirect = FALSE;
-    sym->predicates.is_common = FALSE;
-    sym->predicates.is_external = TRUE;
+    sym->is_absolute = FALSE;
+    sym->is_section = TRUE;
+    sym->is_undefined = FALSE;
+    sym->is_indirect = FALSE;
+    sym->is_common = FALSE;
+    sym->is_external = TRUE;
 
     rval = KERN_SUCCESS;
 
@@ -996,7 +1008,7 @@ kxld_sym_delete(KXLDSym *sym)
     check(sym);
 
     bzero(sym, sizeof(*sym));
-    sym->predicates.is_replaced = TRUE;
+    sym->is_replaced = TRUE;
 }
 
 
@@ -1007,7 +1019,7 @@ kxld_sym_patch(KXLDSym *sym)
 {
     check(sym);
 
-    sym->predicates.is_replaced = TRUE;
+    sym->is_replaced = TRUE;
 }
 
 /*******************************************************************************
@@ -1018,6 +1030,6 @@ kxld_sym_mark_private(KXLDSym *sym)
     check(sym);
 
     sym->type |= N_PEXT;
-    sym->predicates.is_external = FALSE;
+    sym->is_external = FALSE;
 }
 
diff --git a/libkern/kxld/kxld_sym.h b/libkern/kxld/kxld_sym.h
index 237586263..69cb8cbf7 100644
--- a/libkern/kxld/kxld_sym.h
+++ b/libkern/kxld/kxld_sym.h
@@ -48,12 +48,11 @@ struct kxld_sym {
     kxld_addr_t base_addr;            // The symbol's base address
     kxld_addr_t link_addr;            // The relocated address
     kxld_addr_t got_addr;             // The address of this symbol's GOT entry
+    uint16_t desc;
     uint8_t type;
     uint8_t sectnum;                  // The symbol's section number
     uint8_t relocated_sectnum;
-    uint16_t desc;
-    struct {
-        u_int is_absolute:1,          // Set for absolute symbols
+    u_int is_absolute:1,              // Set for absolute symbols
         is_section:1,                 // Set for section symbols
         is_undefined:1,               // Set for undefined symbols
         is_indirect:1,                // Set for indirect symbols
@@ -74,7 +73,6 @@ struct kxld_sym {
         is_metaclass:1,               // Set for metaclass symbols
         is_super_metaclass_pointer:1, // Set for super metaclass pointer syms
         is_thumb:1;                   // Set for thumb symbols (ARM only)
-    } predicates;
 };
 
 /*******************************************************************************
@@ -128,6 +126,9 @@ boolean_t kxld_sym_is_undefined(const KXLDSym *sym)
 boolean_t kxld_sym_is_indirect(const KXLDSym *sym)
     __attribute__((pure, nonnull, visibility("hidden")));
 
+boolean_t kxld_sym_is_replaced(const KXLDSym *sym)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
 /* We don't wrap this in KXLD_USER_OR_COMMON because even though common symbols
  * aren't always supported, we always need to be able to detect them.
  */
@@ -175,6 +176,9 @@ boolean_t kxld_sym_is_metaclass(const KXLDSym *sym)
 boolean_t kxld_sym_is_super_metaclass_pointer(const KXLDSym *sym)
     __attribute__((pure, nonnull, visibility("hidden")));
 
+boolean_t kxld_sym_name_is_pure_virtual(const char *name)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
 boolean_t kxld_sym_name_is_padslot(const char *name)
     __attribute__((pure, nonnull, visibility("hidden")));
 
@@ -224,13 +228,13 @@ u_long kxld_sym_get_function_prefix_from_class_name(const char *class_name,
 
 #if KXLD_USER_OR_ILP32
 kern_return_t kxld_sym_export_macho_32(const KXLDSym *sym, u_char *nl, 
-    char *strtab, u_long *stroff, u_long strsize, boolean_t is_link_state)
+    char *strtab, u_long *stroff, u_long strsize)
     __attribute__((nonnull, visibility("hidden")));
 #endif
 
 #if KXLD_USER_OR_LP64
 kern_return_t kxld_sym_export_macho_64(const KXLDSym *sym, u_char *nl, 
-    char *strtab, u_long *stroff, u_long strsize, boolean_t is_link_state)
+    char *strtab, u_long *stroff, u_long strsize)
     __attribute__((nonnull, visibility("hidden")));
 #endif
 
@@ -246,8 +250,7 @@ void kxld_sym_set_got(KXLDSym *sym)
     __attribute__((nonnull, visibility("hidden")));
 #endif /* KXLD_USER_OR_GOT */
 
-kern_return_t kxld_sym_resolve(KXLDSym *sym, const kxld_addr_t addr,
-    boolean_t export_sym)
+kern_return_t kxld_sym_resolve(KXLDSym *sym, const kxld_addr_t addr)
     __attribute__((nonnull, visibility("hidden")));
 
 #if KXLD_USER_OR_COMMON
diff --git a/libkern/kxld/kxld_symtab.c b/libkern/kxld/kxld_symtab.c
index 569bd1bbe..6700774f4 100644
--- a/libkern/kxld/kxld_symtab.c
+++ b/libkern/kxld/kxld_symtab.c
@@ -47,33 +47,33 @@ struct kxld_symtab {
     KXLDDict name_index;
     char *strings;
     u_int strsize;
+    boolean_t cxx_index_initialized;
+    boolean_t name_index_initialized;
 };
 
 /*******************************************************************************
 * Prototypes
 *******************************************************************************/
 
-static kern_return_t init_macho(KXLDSymtab *symtab, u_char *macho, 
-    struct symtab_command *src, kxld_addr_t linkedit_offset, boolean_t is_32_bit)
-    __attribute__((nonnull));
+static kern_return_t init_macho(KXLDSymtab *symtab, struct symtab_command *src,
+    u_char *macho, KXLDSeg * kernel_linkedit_seg,
+    boolean_t is_32_bit)
+    __attribute__((nonnull(1,2)));
 
 #if KXLD_USER_OR_ILP32
-static kern_return_t init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset, 
+static kern_return_t init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset,
     u_int nsyms);
 #endif
 #if KXLD_USER_OR_LP64
-static kern_return_t init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset, 
+static kern_return_t init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset,
     u_int nsyms);
 #endif
 
-static kern_return_t make_cxx_index(KXLDSymtab *symtab)
+static void restrict_private_symbols(KXLDSymtab *symtab)
     __attribute__((nonnull));
 static boolean_t sym_is_defined_cxx(const KXLDSym *sym);
-static kern_return_t make_name_index(KXLDSymtab *symtab)
-    __attribute__((nonnull));
 static boolean_t sym_is_name_indexed(const KXLDSym *sym);
 
-
 /*******************************************************************************
 *******************************************************************************/
 size_t
@@ -86,10 +86,11 @@ kxld_symtab_sizeof()
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, u_char *macho, 
-    struct symtab_command *src, kxld_addr_t linkedit_offset)
+kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, struct symtab_command *src,
+    u_char *macho, KXLDSeg * kernel_linkedit_seg)
 {
-    return init_macho(symtab, macho, src, linkedit_offset, TRUE);
+    return init_macho(symtab, src, macho, kernel_linkedit_seg,
+        /* is_32_bit */ TRUE);
 }
 #endif /* KXLD_USER_ILP32 */
 
@@ -97,24 +98,28 @@ kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, u_char *macho,
 /*******************************************************************************
 *******************************************************************************/
 kern_return_t
-kxld_symtab_init_from_macho_64(KXLDSymtab *symtab, u_char *macho, 
-    struct symtab_command *src, kxld_addr_t linkedit_offset)
+kxld_symtab_init_from_macho_64(KXLDSymtab *symtab, struct symtab_command *src,
+    u_char *macho, KXLDSeg * kernel_linkedit_seg)
 {
-    return init_macho(symtab, macho, src, linkedit_offset, FALSE);
+    return init_macho(symtab, src, macho, kernel_linkedit_seg,
+        /* is_32_bit */ FALSE);
 }
 #endif /* KXLD_USER_OR_LP64 */
 
 /*******************************************************************************
 *******************************************************************************/
 static kern_return_t
-init_macho(KXLDSymtab *symtab, u_char *macho, struct symtab_command *src,
-    kxld_addr_t linkedit_offset, boolean_t is_32_bit __unused)
+init_macho(KXLDSymtab *symtab, struct symtab_command *src,
+    u_char *macho, KXLDSeg * kernel_linkedit_seg,
+    boolean_t is_32_bit __unused)
 {
     kern_return_t rval = KERN_FAILURE;
+	u_long symoff;
+    u_char * macho_or_linkedit = macho;
 
     check(symtab);
-    check(macho);
     check(src);
+    check(macho);
 
     /* Initialize the symbol array */
 
@@ -123,26 +128,45 @@ init_macho(KXLDSymtab *symtab, u_char *macho, struct symtab_command *src,
 
     /* Initialize the string table */
 
-    symtab->strings = (char *) (macho + src->stroff + linkedit_offset);
+	if (kernel_linkedit_seg) {
+
+       /* If initing the kernel file in memory, we can't trust
+        * the symtab offsets directly, because the kernel file has been mapped
+        * into memory and the mach-o offsets are disk-based.
+        *
+        * The symoff is an offset relative to the linkedit segment
+        * so we just subtract the fileoffset of the linkedit segment
+        * to get its relative start.
+        *
+        * The strings table is an actual pointer, so we calculate that from
+        * the linkedit's vmaddr.
+        *
+        * Further, the init_syms_... functions need an adjusted base
+        * pointer instead of the beginning of the macho, so we substitute
+        * the base of the linkedit segment.
+        */
+
+		symoff = (u_long)(src->symoff - kernel_linkedit_seg->fileoff);
+		symtab->strings = (char *)(uintptr_t)kernel_linkedit_seg->base_addr +
+            src->stroff - kernel_linkedit_seg->fileoff;
+        macho_or_linkedit = (u_char *)(uintptr_t)kernel_linkedit_seg->base_addr;
+	} else {
+		symoff = (u_long)src->symoff;
+		symtab->strings = (char *) (macho + src->stroff);
+    }
+
     symtab->strsize = src->strsize;
 
     /* Initialize the symbols */
 
     KXLD_3264_FUNC(is_32_bit, rval,
         init_syms_32, init_syms_64,
-        symtab, macho, (u_long) (src->symoff + linkedit_offset), src->nsyms);
-    require_noerr(rval, finish);
-       
-    /* Create the C++ index */
-
-    rval = make_cxx_index(symtab);
-    require_noerr(rval, finish);
-
-    /* Create the name index */
-
-    rval = make_name_index(symtab);
+        symtab, macho_or_linkedit, symoff, src->nsyms);
     require_noerr(rval, finish);
 
+    /* Some symbols must be forced private for compatibility */
+    (void) restrict_private_symbols(symtab);
+       
     /* Save the output */
 
     rval = KERN_SUCCESS;
@@ -153,6 +177,7 @@ finish:
 
 #if KXLD_USER_OR_ILP32
 /*******************************************************************************
+* In the running kernel, 'macho' is actually the start of the linkedit segment.
 *******************************************************************************/
 static kern_return_t
 init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms)
@@ -179,6 +204,7 @@ finish:
 
 #if KXLD_USER_OR_LP64
 /*******************************************************************************
+* In the running kernel, 'macho' is actually the start of the linkedit segment.
 *******************************************************************************/
 static kern_return_t
 init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms)
@@ -203,6 +229,41 @@ finish:
 }
 #endif /* KXLD_USER_OR_LP64 */
 
+/*******************************************************************************
+* Temporary workaround for PR-6668105 
+* new, new[], delete, and delete[] may be overridden globally in a kext.
+* We should do this with some sort of weak symbols, but we'll use a whitelist 
+* for now to minimize risk.  
+*******************************************************************************/
+static void
+restrict_private_symbols(KXLDSymtab *symtab)
+{
+    const char *private_symbols[] = {
+        KXLD_KMOD_INFO_SYMBOL,
+        KXLD_OPERATOR_NEW_SYMBOL,
+        KXLD_OPERATOR_NEW_ARRAY_SYMBOL,
+        KXLD_OPERATOR_DELETE_SYMBOL,
+        KXLD_OPERATOR_DELETE_ARRAY_SYMBOL
+    };
+    KXLDSymtabIterator iter;
+    KXLDSym *sym = NULL;
+    const char *name = NULL;
+    u_int i = 0;
+
+    kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE);
+    while ((sym = kxld_symtab_iterator_get_next(&iter))) {
+        for (i = 0; i < const_array_len(private_symbols); ++i) {
+            name = private_symbols[i];
+            if (!streq(sym->name, name)) {
+                continue;
+            }
+
+            kxld_sym_mark_private(sym);
+        }
+    }
+}
+
+
 /*******************************************************************************
 *******************************************************************************/
 void
@@ -229,6 +290,10 @@ kxld_symtab_clear(KXLDSymtab *symtab)
     kxld_array_clear(&symtab->syms);
     kxld_dict_clear(&symtab->cxx_index);
     kxld_dict_clear(&symtab->name_index);
+    symtab->strings = NULL;
+    symtab->strsize = 0;
+    symtab->cxx_index_initialized = 0;
+    symtab->name_index_initialized = 0;
 }
 
 /*******************************************************************************
@@ -241,6 +306,7 @@ kxld_symtab_deinit(KXLDSymtab *symtab)
     kxld_array_deinit(&symtab->syms);
     kxld_dict_deinit(&symtab->cxx_index);
     kxld_dict_deinit(&symtab->name_index);
+    bzero(symtab, sizeof(*symtab));
 }
 
 /*******************************************************************************
@@ -265,8 +331,28 @@ kxld_symtab_get_symbol_by_index(const KXLDSymtab *symtab, u_int idx)
 
 /*******************************************************************************
 *******************************************************************************/
-KXLDSym *
+KXLDSym * 
 kxld_symtab_get_symbol_by_name(const KXLDSymtab *symtab, const char *name)
+{
+    KXLDSym *sym = NULL;
+    u_int i = 0;
+
+    for (i = 0; i < symtab->syms.nitems; ++i) {
+        sym = kxld_array_get_item(&symtab->syms, i);
+
+        if (streq(sym->name, name)) {
+            return sym;
+        }
+    }
+    
+    return NULL;
+}
+
+/*******************************************************************************
+*******************************************************************************/
+KXLDSym *
+kxld_symtab_get_locally_defined_symbol_by_name(const KXLDSymtab *symtab, 
+    const char *name)
 {
     check(symtab);
     check(name);
@@ -281,14 +367,7 @@ kxld_symtab_get_cxx_symbol_by_value(const KXLDSymtab *symtab, kxld_addr_t value)
 {
     check(symtab);
 
-    /*
-     * value may hold a THUMB address (with bit 0 set to 1) but the index will
-     * have the real address (bit 0 set to 0).  So if bit 0 is set here,
-     * we clear it (should impact no architectures but ARM).
-     */
-    kxld_addr_t v = value & ~1;
-
-    return kxld_dict_find(&symtab->cxx_index, &v);
+    return kxld_dict_find(&symtab->cxx_index, &value);
 }
 
 /*******************************************************************************
@@ -319,8 +398,7 @@ kxld_symtab_get_macho_header_size(void)
 /*******************************************************************************
 *******************************************************************************/
 u_long 
-kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, 
-    boolean_t is_link_state, boolean_t is_32_bit)
+kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, boolean_t is_32_bit)
 {
     KXLDSymtabIterator iter;
     KXLDSym *sym = NULL;
@@ -329,12 +407,8 @@ kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab,
     
     check(symtab); 
 
-    if (is_link_state) {
-        kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE);
-    } else {
-        kxld_symtab_iterator_init(&iter, symtab, 
-            kxld_sym_is_defined_locally, FALSE);
-    }
+    kxld_symtab_iterator_init(&iter, symtab, 
+        kxld_sym_is_defined_locally, FALSE);
 
     while ((sym = kxld_symtab_iterator_get_next(&iter))) {
         size += strlen(sym->name) + 1;
@@ -356,7 +430,7 @@ kern_return_t
 kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf, 
     u_long *header_offset, u_long header_size,
     u_long *data_offset, u_long data_size,
-    boolean_t is_link_state, boolean_t is_32_bit)
+    boolean_t is_32_bit)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDSymtabIterator iter;
@@ -386,12 +460,8 @@ kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf,
     
     /* Find the size of the symbol and string tables */
 
-    if (is_link_state) {
-        kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE);
-    } else {
-        kxld_symtab_iterator_init(&iter, symtab, 
-            kxld_sym_is_defined_locally, FALSE);
-    }
+    kxld_symtab_iterator_init(&iter, symtab, 
+        kxld_sym_is_defined_locally, FALSE);
 
     while ((sym = kxld_symtab_iterator_get_next(&iter))) {
         symtabhdr->nsyms++;
@@ -421,7 +491,7 @@ kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf,
 
         KXLD_3264_FUNC(is_32_bit, rval,
             kxld_sym_export_macho_32, kxld_sym_export_macho_64,
-            sym, nl, strtab, &stroff, symtabhdr->strsize, is_link_state);
+            sym, nl, strtab, &stroff, symtabhdr->strsize);
         require_noerr(rval, finish);
 
         nl += nlistsize;
@@ -447,8 +517,6 @@ kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter)
 
     check(iter);
 
-    idx = iter->idx;
-
     for (idx = iter->idx; idx < iter->symtab->syms.nitems; ++idx) {
         count += iter->test(kxld_array_get_item(&iter->symtab->syms, idx));
     }
@@ -458,8 +526,8 @@ kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter)
 
 /*******************************************************************************
 *******************************************************************************/
-static kern_return_t
-make_cxx_index(KXLDSymtab *symtab)
+kern_return_t
+kxld_symtab_index_cxx_symbols_by_value(KXLDSymtab *symtab)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDSymtabIterator iter;
@@ -468,6 +536,11 @@ make_cxx_index(KXLDSymtab *symtab)
 
     check(symtab);
 
+    if (symtab->cxx_index_initialized) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
     /* Count the number of C++ symbols */
     kxld_symtab_iterator_init(&iter, symtab, sym_is_defined_cxx, FALSE);
     nsyms = kxld_symtab_iterator_get_num_remaining(&iter);
@@ -483,10 +556,9 @@ make_cxx_index(KXLDSymtab *symtab)
         require_noerr(rval, finish);
     }
 
+    symtab->cxx_index_initialized = TRUE;
     rval = KERN_SUCCESS;
-
 finish:
-
     return rval;
 }
 
@@ -500,8 +572,8 @@ sym_is_defined_cxx(const KXLDSym *sym)
 
 /*******************************************************************************
 *******************************************************************************/
-static kern_return_t
-make_name_index(KXLDSymtab *symtab)
+kern_return_t
+kxld_symtab_index_symbols_by_name(KXLDSymtab *symtab)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDSymtabIterator iter;
@@ -510,6 +582,11 @@ make_name_index(KXLDSymtab *symtab)
 
     check(symtab);
 
+    if (symtab->name_index_initialized) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+
     /* Count the number of symbols we need to index by name */
     kxld_symtab_iterator_init(&iter, symtab, sym_is_name_indexed, FALSE);
     nsyms = kxld_symtab_iterator_get_num_remaining(&iter);
@@ -525,23 +602,18 @@ make_name_index(KXLDSymtab *symtab)
         require_noerr(rval, finish);
     }
 
+    symtab->name_index_initialized = TRUE;
     rval = KERN_SUCCESS;
-
 finish:
 
     return rval;
 }
-
 /*******************************************************************************
 *******************************************************************************/
 static boolean_t
 sym_is_name_indexed(const KXLDSym *sym)
 {
-    return (kxld_sym_is_vtable(sym)                     ||
-        streq_safe(sym->name, KXLD_KMOD_INFO_SYMBOL, 
-            const_strlen(KXLD_KMOD_INFO_SYMBOL))        ||
-        streq_safe(sym->name, KXLD_WEAK_TEST_SYMBOL,
-            const_strlen(KXLD_WEAK_TEST_SYMBOL)));
+    return (kxld_sym_is_defined_locally(sym) && !kxld_sym_is_stab(sym));
 }
 
 /*******************************************************************************
diff --git a/libkern/kxld/kxld_symtab.h b/libkern/kxld/kxld_symtab.h
index cc2d91cec..a5a038756 100644
--- a/libkern/kxld/kxld_symtab.h
+++ b/libkern/kxld/kxld_symtab.h
@@ -37,6 +37,7 @@
 #endif
 
 #include "kxld_sym.h"
+#include "kxld_seg.h"
 
 struct kxld_array;
 struct symtab_command;
@@ -55,18 +56,18 @@ struct kxld_symtab_iterator {
 *******************************************************************************/
 
 size_t kxld_symtab_sizeof(void)
-    __attribute__((const, nonnull, visibility("hidden")));
+    __attribute__((const, visibility("hidden")));
 
 #if KXLD_USER_OR_ILP32
-kern_return_t kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, u_char *macho,
-    struct symtab_command *src, kxld_addr_t linkedit_offset) 
-    __attribute__((nonnull, visibility("hidden")));
+kern_return_t kxld_symtab_init_from_macho_32(KXLDSymtab *symtab,
+    struct symtab_command *src, u_char *macho, KXLDSeg * kernel_linkedit_seg)
+    __attribute__((nonnull(1,2), visibility("hidden")));
 #endif /* KXLD_USER_OR_ILP32 */
 
 #if KXLD_USER_OR_LP64
-kern_return_t kxld_symtab_init_from_macho_64(KXLDSymtab *symtab, u_char *macho,
-    struct symtab_command *src, kxld_addr_t linkedit_offset) 
-    __attribute__((nonnull, visibility("hidden")));
+kern_return_t kxld_symtab_init_from_macho_64(KXLDSymtab *symtab,
+    struct symtab_command *src, u_char *macho, KXLDSeg * kernel_linkedit_seg)
+    __attribute__((nonnull(1,2), visibility("hidden")));
 #endif /* KXLD_USER_OR_ILP64 */
 
 void kxld_symtab_iterator_init(KXLDSymtabIterator *iter, 
@@ -89,30 +90,34 @@ u_int kxld_symtab_get_num_symbols(const KXLDSymtab *symtab)
 KXLDSym * kxld_symtab_get_symbol_by_index(const KXLDSymtab *symtab, u_int idx)
     __attribute__((pure, nonnull, visibility("hidden")));
 
-KXLDSym * kxld_symtab_get_symbol_by_name(const KXLDSymtab *symtab,
+KXLDSym * kxld_symtab_get_symbol_by_name(const KXLDSymtab *symtab, 
     const char *name)
     __attribute__((pure, nonnull, visibility("hidden")));
 
+KXLDSym * kxld_symtab_get_locally_defined_symbol_by_name(
+    const KXLDSymtab *symtab, const char *name)
+    __attribute__((pure, nonnull, visibility("hidden")));
+
 KXLDSym * kxld_symtab_get_cxx_symbol_by_value(const KXLDSymtab *symtab,
     kxld_addr_t value)
     __attribute__((pure, nonnull, visibility("hidden")));
     
 kern_return_t kxld_symtab_get_sym_index(const KXLDSymtab *symtab, 
     const KXLDSym * sym, u_int *idx)
-    __attribute__((pure, nonnull, visibility("hidden")));
+    __attribute__((nonnull, visibility("hidden")));
 
 u_long kxld_symtab_get_macho_header_size(void)
     __attribute__((pure, visibility("hidden")));
 
 u_long kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, 
-    boolean_t is_link_state, boolean_t is_32_bit)
+    boolean_t is_32_bit)
     __attribute__((pure, nonnull, visibility("hidden")));
 
 kern_return_t
 kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf, 
     u_long *header_offset, u_long header_size,
     u_long *data_offset, u_long data_size, 
-    boolean_t is_link_state, boolean_t is_32_bit)
+    boolean_t is_32_bit)
     __attribute__((nonnull, visibility("hidden")));
     
 u_int kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter)
@@ -122,6 +127,12 @@ u_int kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter)
 * Modifiers 
 *******************************************************************************/
 
+kern_return_t kxld_symtab_index_symbols_by_name(KXLDSymtab *symtab)
+    __attribute__((nonnull, visibility("hidden")));
+
+kern_return_t kxld_symtab_index_cxx_symbols_by_value(KXLDSymtab *symtab)
+    __attribute__((nonnull, visibility("hidden")));
+
 kern_return_t kxld_symtab_relocate(KXLDSymtab *symtab,
     const struct kxld_array *sectarray)
     __attribute__((nonnull, visibility("hidden")));
diff --git a/libkern/kxld/kxld_util.c b/libkern/kxld/kxld_util.c
index 35dc1066b..2f7a10643 100644
--- a/libkern/kxld/kxld_util.c
+++ b/libkern/kxld/kxld_util.c
@@ -105,8 +105,8 @@ kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level,
             alloc_buffer = kxld_alloc(length);
             if (!alloc_buffer) return;
 
-            snprintf(alloc_buffer, sizeof(alloc_buffer), "kxld[%s]: %s",
-                name, format);
+            snprintf(alloc_buffer, length, "kxld[%s]: %s",
+                name, in_format);
             format = alloc_buffer;
         }
 
@@ -310,6 +310,17 @@ validate_and_swap_macho_32(u_char *file, u_long size
         kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
             "Invalid magic number: 0x%x.", mach_hdr->magic));
 
+   /* If in the running kernel, and asked to validate the kernel
+    * (which is the only file of type MH_EXECUTE we should ever see),
+    * then just assume it's ok or we wouldn't be running to begin with.
+    */
+#if KERNEL
+    if (mach_hdr->filetype == MH_EXECUTE) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+#endif /* KERNEL */
+
     /* Validate and potentially swap the load commands */
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) {
 
@@ -470,6 +481,17 @@ validate_and_swap_macho_64(u_char *file, u_long size
         kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO
             "Invalid magic number: 0x%x.", mach_hdr->magic));
 
+   /* If in the running kernel, and asked to validate the kernel
+    * (which is the only file of type MH_EXECUTE we should ever see),
+    * then just assume it's ok or we wouldn't be running to begin with.
+    */
+#if KERNEL
+    if (mach_hdr->filetype == MH_EXECUTE) {
+        rval = KERN_SUCCESS;
+        goto finish;
+    }
+#endif /* KERNEL */
+
     /* Validate and potentially swap the load commands */
     for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) {
         /* Get the load command and size */
@@ -717,6 +739,8 @@ kxld_align_address(kxld_addr_t address, u_int align)
     kxld_addr_t alignment = (1 << align);
     kxld_addr_t low_bits = 0;
 
+    if (!align) return address;
+
     low_bits = (address) & (alignment - 1);
     if (low_bits) {
         address += (alignment - low_bits);
diff --git a/libkern/kxld/kxld_util.h b/libkern/kxld/kxld_util.h
index 3392b4a74..9d5720f04 100644
--- a/libkern/kxld/kxld_util.h
+++ b/libkern/kxld/kxld_util.h
@@ -119,13 +119,15 @@ void kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level,
 #define kKxldLogMalformedMachO          "The Mach-O file is malformed: "
 #define kKxldLogMalformedVTable         "The vtable '%s' is malformed. Make sure your kext has been built against the correct headers."
 #define kKxldLogMissingVtable           "Cannot find the vtable '%s' for class '%s'. This vtable symbol is required for binary compatibility, and it may have been stripped."
+#define kKxldLogDirectPureVirtualCall   "This kext calls a pure virtual function. Make sure your kext's OSObject-derived classes implement all pure virtual functions."
 #define kKxldLogParentOutOfDate         "The super class vtable '%s' for vtable '%s' is out of date. Make sure your kext has been built against the correct headers."
 #define kKxldLogNoKmodInfo              "The kext is missing its kmod_info structure."
-#define kKxldLogInvalidSectReloc        "Relocation entry %u from section %s,%s cannot be processed."
-#define kKxldLogInvalidExtReloc         "External relocation entry %u cannot be processed."
-#define kKxldLogInvalidIntReloc         "Internal relocation entry %u cannot be processed."
 #define kKxldLogRelocationOverflow      "A relocation entry has overflowed. The kext may be too far from one " \
                                         "of its dependencies. Check your kext's load address."
+#define kKxldLogRelocatingPatchedSym    "Relocation failed because some class in this kext "    \
+    "didn't use the OSDeclareDefaultStructors and OSDefineMetaClassAndStructors, so it still "  \
+    "references %s, which has been patched with another symbol for binary compatibility. "      \
+    "Please make sure all classes that inherit from OSObject use these macros."
 
 /*******************************************************************************
 * Allocators 
@@ -183,10 +185,10 @@ void unswap_macho(u_char *file, enum NXByteOrder host_order,
 *******************************************************************************/
 
 kxld_addr_t kxld_align_address(kxld_addr_t address, u_int align)
-    __attribute__((const, nonnull, visibility("hidden")));
+    __attribute__((const, visibility("hidden")));
 
 boolean_t kxld_is_32_bit(cpu_type_t)
-    __attribute__((const, nonnull, visibility("hidden")));
+    __attribute__((const, visibility("hidden")));
 
 const char * kxld_strstr(const char *s, const char *find)
     __attribute__((pure, nonnull, visibility("hidden")));
diff --git a/libkern/kxld/kxld_vtable.c b/libkern/kxld/kxld_vtable.c
index 208c030d9..e792d3842 100644
--- a/libkern/kxld/kxld_vtable.c
+++ b/libkern/kxld/kxld_vtable.c
@@ -29,13 +29,24 @@
 #include <mach-o/loader.h>
 #include <sys/types.h>
 
+#if KERNEL
+    #ifdef MACH_ASSERT
+        #undef MACH_ASSERT
+    #endif
+    #define MACH_ASSERT 1
+    #include <kern/assert.h>
+#else
+    #include <assert.h>
+#endif
+
 #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
 #include <AssertMacros.h>
 
 #include "kxld_demangle.h"
+#include "kxld_dict.h"
+#include "kxld_object.h"
 #include "kxld_reloc.h"
 #include "kxld_sect.h"
-#include "kxld_state.h"
 #include "kxld_sym.h"
 #include "kxld_symtab.h"
 #include "kxld_util.h"
@@ -49,235 +60,102 @@
 #define VTABLE_HEADER_LEN_64 2
 #define VTABLE_HEADER_SIZE_64 (VTABLE_HEADER_LEN_64 * VTABLE_ENTRY_SIZE_64)
 
-static kern_return_t init_by_relocs(KXLDVTable *vtable, const KXLDSym *sym,
-    const KXLDSect *sect, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator);
-
-static kern_return_t init_by_entries_and_relocs(KXLDVTable *vtable, 
-    const KXLDSym *sym, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator, const KXLDArray *relocs);
-
-static kxld_addr_t get_entry_value(u_char *entry, const KXLDRelocator *relocator)
-    __attribute__((pure));
-#if !KERNEL
-static kxld_addr_t swap_entry_value(kxld_addr_t entry_value, 
-    const KXLDRelocator *relocator) __attribute__((const));
-#endif /* !KERNEL */
-static kern_return_t init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab,
-    const KXLDRelocator *relocator);
+static void  get_vtable_base_sizes(boolean_t is_32_bit, u_int *vtable_entry_size,
+    u_int *vtable_header_size);
 
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t
-kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym, 
-    const KXLDSect *sect, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator)
-{
-    kern_return_t rval = KERN_FAILURE;
-    char *demangled_name = NULL;
-    size_t demangled_length = 0;
-
-    check(vtable);
-    check(sym);
-    check(sect);
-    check(symtab);
+static kern_return_t init_by_relocs(KXLDVTable *vtable, const KXLDSym *vtable_sym,
+    const KXLDSect *sect, const KXLDRelocator *relocator);
 
-    vtable->name = sym->name;
-    vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect);
-    vtable->is_patched = FALSE;
-
-    require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable,
-            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
-
-    rval = init_by_entries(vtable, symtab, relocator);
-    require_noerr(rval, finish);
-
-    vtable->is_patched = TRUE;
-
-    rval = KERN_SUCCESS;
-
-finish:
-    if (rval) kxld_vtable_deinit(vtable);
-    if (demangled_name) kxld_free(demangled_name, demangled_length);
+static kern_return_t init_by_entries_and_relocs(KXLDVTable *vtable, 
+    const KXLDSym *vtable_sym, const KXLDRelocator *relocator, 
+    const KXLDArray *relocs, const KXLDDict *defined_cxx_symbols);
 
-    return rval;
-}
+static kern_return_t init_by_entries(KXLDVTable *vtable,
+    const KXLDRelocator *relocator, const KXLDDict *defined_cxx_symbols);
 
 /*******************************************************************************
 *******************************************************************************/
-kern_return_t
-kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym, 
-    const KXLDSect *sect, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator)
+kern_return_t 
+kxld_vtable_init(KXLDVTable *vtable, const KXLDSym *vtable_sym, 
+    const KXLDObject *object, const KXLDDict *defined_cxx_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
+    const KXLDArray *extrelocs = NULL;
+    const KXLDRelocator *relocator = NULL;
+    const KXLDSect *vtable_sect = NULL;
     char *demangled_name = NULL;
     size_t demangled_length = 0;
 
     check(vtable);
-    check(sym);
-    check(sect);
-    check(symtab);
+    check(vtable_sym);
+    check(object);
 
-    vtable->name = sym->name;
-    vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect);
-    vtable->is_patched = FALSE;
+    relocator = kxld_object_get_relocator(object);
 
-    require_action(kxld_sect_get_num_relocs(sect) > 0, finish,
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, 
-            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
-
-    rval = init_by_relocs(vtable, sym, sect, symtab, relocator);
-    require_noerr(rval, finish);
+    vtable_sect = kxld_object_get_section_by_index(object, 
+        vtable_sym->sectnum);
+    require_action(vtable_sect, finish, rval=KERN_FAILURE);
 
-    rval = KERN_SUCCESS;
-
-finish:
-    if (rval) kxld_vtable_deinit(vtable);
-    if (demangled_name) kxld_free(demangled_name, demangled_length);
-
-    return rval;
-}
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t
-kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym, 
-    const KXLDSect *sect, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator, const KXLDArray *relocs)
-{
-    kern_return_t rval = KERN_FAILURE;
-    char *demangled_name = NULL;
-    size_t demangled_length = 0;
+    vtable->name = vtable_sym->name;
+    vtable->vtable = vtable_sect->data + 
+        kxld_sym_get_section_offset(vtable_sym, vtable_sect);
 
-    check(vtable);
-    check(sym);
-    check(sect);
-    check(symtab);
+    if (kxld_object_is_linked(object)) {
+        rval = init_by_entries(vtable, relocator, defined_cxx_symbols);
+        require_noerr(rval, finish);
 
-    vtable->name = sym->name;
-    vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect);
-    vtable->is_patched = FALSE;
+        vtable->is_patched = TRUE;
+    } else {
+        if (kxld_object_is_final_image(object)) {
+            extrelocs = kxld_object_get_extrelocs(object);
+            require_action(extrelocs, finish,
+                rval=KERN_FAILURE;
+                kxld_log(kKxldLogPatching, kKxldLogErr, 
+                    kKxldLogMalformedVTable, 
+                    kxld_demangle(vtable->name, 
+                        &demangled_name, &demangled_length)));
 
-    require_action(kxld_sect_get_num_relocs(sect) == 0, finish,
-        rval=KERN_FAILURE;
-        kxld_log(kKxldLogPatching, kKxldLogErr, 
-            kKxldLogMalformedVTable, 
-            kxld_demangle(vtable->name, &demangled_name, &demangled_length)));
+            rval = init_by_entries_and_relocs(vtable, vtable_sym, 
+                relocator, extrelocs, defined_cxx_symbols);
+            require_noerr(rval, finish);
+        } else {
+            require_action(kxld_sect_get_num_relocs(vtable_sect) > 0, finish,
+                rval=KERN_FAILURE;
+                kxld_log(kKxldLogPatching, kKxldLogErr, 
+                    kKxldLogMalformedVTable, 
+                    kxld_demangle(vtable->name, 
+                        &demangled_name, &demangled_length)));
 
-    rval = init_by_entries_and_relocs(vtable, sym, symtab,
-        relocator, relocs);
-    require_noerr(rval, finish);
+            rval = init_by_relocs(vtable, vtable_sym, vtable_sect, relocator);
+            require_noerr(rval, finish);
+        }
+        
+        vtable->is_patched = FALSE;
+    }
 
     rval = KERN_SUCCESS;
-
 finish:
-    if (rval) kxld_vtable_deinit(vtable);
     if (demangled_name) kxld_free(demangled_name, demangled_length);
 
     return rval;
 }
 
-#if KXLD_USER_OR_ILP32
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_vtable_init_from_link_state_32(KXLDVTable *vtable, u_char *file, 
-    KXLDVTableHdr *hdr)
-{
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymEntry32 *sym = NULL;
-    KXLDVTableEntry *entry = NULL;
-    u_int i = 0;
-
-    check(vtable);
-    check(file);
-    check(hdr);
-
-    vtable->name = (char *) (file + hdr->nameoff);
-    vtable->is_patched = TRUE;
-
-    rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), 
-        hdr->nentries);
-    require_noerr(rval, finish);
-    
-    sym = (KXLDSymEntry32 *) (file + hdr->vtableoff);
-    for (i = 0; i < vtable->entries.nitems; ++i, ++sym) {
-        entry = kxld_array_get_item(&vtable->entries, i);
-        entry->patched.name = (char *) (file + sym->nameoff);
-        entry->patched.addr = sym->addr;
-    }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_ILP32 */
-
-#if KXLD_USER_OR_LP64
 /*******************************************************************************
 *******************************************************************************/
-kern_return_t 
-kxld_vtable_init_from_link_state_64(KXLDVTable *vtable, u_char *file, 
-    KXLDVTableHdr *hdr)
+static void 
+get_vtable_base_sizes(boolean_t is_32_bit, u_int *vtable_entry_size,
+    u_int *vtable_header_size)
 {
-    kern_return_t rval = KERN_FAILURE;
-    KXLDSymEntry64 *sym = NULL;
-    KXLDVTableEntry *entry = NULL;
-    u_int i = 0;
-
-    check(vtable);
-    check(file);
-    check(hdr);
-
-    vtable->name = (char *) (file + hdr->nameoff);
-    vtable->is_patched = TRUE;
+    check(vtable_entry_size);
+    check(vtable_header_size);
 
-    rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), 
-        hdr->nentries);
-    require_noerr(rval, finish);
-    
-    sym = (KXLDSymEntry64 *) (file + hdr->vtableoff);
-    for (i = 0; i < vtable->entries.nitems; ++i, ++sym) {
-        entry = kxld_array_get_item(&vtable->entries, i);
-        entry->patched.name = (char *) (file + sym->nameoff);
-        entry->patched.addr = sym->addr;
+    if (is_32_bit) {
+        *vtable_entry_size = VTABLE_ENTRY_SIZE_32;
+        *vtable_header_size = VTABLE_HEADER_SIZE_32;
+    } else {
+        *vtable_entry_size = VTABLE_ENTRY_SIZE_64;
+        *vtable_header_size = VTABLE_HEADER_SIZE_64;
     }
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
-}
-#endif /* KXLD_USER_OR_LP64 */
-
-/*******************************************************************************
-*******************************************************************************/
-kern_return_t 
-kxld_vtable_copy(KXLDVTable *vtable, const KXLDVTable *src)
-{
-    kern_return_t rval = KERN_FAILURE;
-
-    check(vtable);
-    check(src);
-    
-    vtable->vtable = src->vtable;
-    vtable->name = src->name;
-    vtable->is_patched = src->is_patched;
-
-    rval = kxld_array_copy(&vtable->entries, &src->entries);
-    require_noerr(rval, finish);
-
-    rval = KERN_SUCCESS;
-
-finish:
-    return rval;
 }
 
 /*******************************************************************************
@@ -285,38 +163,35 @@ finish:
 * entries and finding the corresponding symbols.
 *******************************************************************************/
 static kern_return_t
-init_by_relocs(KXLDVTable *vtable, const KXLDSym *sym, const KXLDSect *sect, 
-    const KXLDSymtab *symtab, const KXLDRelocator *relocator)
+init_by_relocs(KXLDVTable *vtable, const KXLDSym *vtable_sym, 
+    const KXLDSect *sect, const KXLDRelocator *relocator)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDReloc *reloc = NULL;
     KXLDVTableEntry *entry = NULL;
-    KXLDSym *tmpsym = NULL;
+    KXLDSym *sym = NULL;
     kxld_addr_t vtable_base_offset = 0;
     kxld_addr_t entry_offset = 0;
     u_int i = 0;
     u_int nentries = 0;
     u_int vtable_entry_size = 0;
+    u_int vtable_header_size = 0;
     u_int base_reloc_index = 0;
     u_int reloc_index = 0;
 
     check(vtable);
-    check(sym);
+    check(vtable_sym);
     check(sect);
-    check(symtab);
     check(relocator);
 
     /* Find the first entry past the vtable padding */
 
-    vtable_base_offset = kxld_sym_get_section_offset(sym, sect);
-    if (relocator->is_32_bit) {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_32;
-        vtable_base_offset += VTABLE_HEADER_SIZE_32;
-    } else {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_64;
-        vtable_base_offset += VTABLE_HEADER_SIZE_64;
-    }
+    (void) get_vtable_base_sizes(relocator->is_32_bit, 
+        &vtable_entry_size, &vtable_header_size);
 
+    vtable_base_offset = kxld_sym_get_section_offset(vtable_sym, sect) + 
+        vtable_header_size;
+   
     /* Find the relocation entry at the start of the vtable */
 
     rval = kxld_reloc_get_reloc_index_by_offset(&sect->relocs, 
@@ -359,9 +234,9 @@ init_by_relocs(KXLDVTable *vtable, const KXLDSym *sym, const KXLDSect *sect,
          * skip it.  We won't be able to patch subclasses with this symbol,
          * but there isn't much we can do about that.
          */
-        tmpsym = kxld_reloc_get_symbol(relocator, reloc, sect->data, symtab);
+        sym = kxld_reloc_get_symbol(relocator, reloc, sect->data);
 
-        entry->unpatched.sym = tmpsym;
+        entry->unpatched.sym = sym;
         entry->unpatched.reloc = reloc;
     }
 
@@ -370,77 +245,42 @@ finish:
     return rval;
 }
 
-/*******************************************************************************
-*******************************************************************************/
-static kxld_addr_t
-get_entry_value(u_char *entry, const KXLDRelocator *relocator)
-{
-    kxld_addr_t entry_value;
-
-    if (relocator->is_32_bit) {
-        entry_value = *(uint32_t *)entry;
-    } else {
-        entry_value = *(uint64_t *)entry;
-    }
-
-    return entry_value;
-}
-
-#if !KERNEL
-/*******************************************************************************
-*******************************************************************************/
-static kxld_addr_t
-swap_entry_value(kxld_addr_t entry_value, const KXLDRelocator *relocator)
-{
-    if (relocator->is_32_bit) {
-        entry_value = OSSwapInt32((uint32_t) entry_value);
-    } else {
-        entry_value = OSSwapInt64((uint64_t) entry_value);
-    }
-
-    return entry_value;
-}
-#endif /* KERNEL */
-
 /*******************************************************************************
 * Initializes a vtable object by reading the symbol values out of the vtable
 * entries and performing reverse symbol lookups on those values.
 *******************************************************************************/
 static kern_return_t
-init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab, 
-    const KXLDRelocator *relocator)
+init_by_entries(KXLDVTable *vtable, const KXLDRelocator *relocator,
+    const KXLDDict *defined_cxx_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDVTableEntry *tmpentry = NULL;
     KXLDSym *sym = NULL;
-    u_char *base_entry = NULL;
-    u_char *entry = NULL;
     kxld_addr_t entry_value = 0;
+    u_long entry_offset;
     u_int vtable_entry_size = 0;
     u_int vtable_header_size = 0;
     u_int nentries = 0;
     u_int i = 0;
 
-    if (relocator->is_32_bit) {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_32;
-        vtable_header_size = VTABLE_HEADER_SIZE_32;
-    } else {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_64;
-        vtable_header_size = VTABLE_HEADER_SIZE_64;
-    }
+    check(vtable);
+    check(relocator);
 
-    base_entry = vtable->vtable + vtable_header_size;
+    (void) get_vtable_base_sizes(relocator->is_32_bit, 
+        &vtable_entry_size, &vtable_header_size);
 
     /* Count the number of entries (the vtable is null-terminated) */
 
-    entry = base_entry;
-    entry_value = get_entry_value(entry, relocator);
-    while (entry_value) {
+    entry_offset = vtable_header_size;
+    while (1) {
+        entry_value = kxld_relocator_get_pointer_at_addr(relocator,
+            vtable->vtable, entry_offset);
+        if (!entry_value) break;
+
+        entry_offset += vtable_entry_size;
         ++nentries;
-        entry += vtable_entry_size;
-        entry_value = get_entry_value(entry, relocator);
     }
-    
+
     /* Allocate the symbol index */
 
     rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), nentries);
@@ -448,24 +288,19 @@ init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab,
 
     /* Look up the symbols for each entry */
 
-    entry = base_entry;
-    rval = KERN_SUCCESS;
-    for (i = 0; i < vtable->entries.nitems; ++i) {
-        entry = base_entry + (i * vtable_entry_size);
-        entry_value = get_entry_value(entry, relocator);
+    for (i = 0, entry_offset = vtable_header_size; 
+         i < vtable->entries.nitems; 
+         ++i, entry_offset += vtable_entry_size) 
+    {
+        entry_value = kxld_relocator_get_pointer_at_addr(relocator,
+            vtable->vtable, entry_offset);
 
-#if !KERNEL
-        if (relocator->swap) {
-            entry_value = swap_entry_value(entry_value, relocator);
-        }
-#endif /* !KERNEL */
-        
         /* If we can't find the symbol, it means that the virtual function was
          * defined inline.  There's not much I can do about this; it just means
          * I can't patch this function.
          */
         tmpentry = kxld_array_get_item(&vtable->entries, i);
-        sym = kxld_symtab_get_cxx_symbol_by_value(symtab, entry_value);
+        sym = kxld_dict_find(defined_cxx_symbols, &entry_value);
 
         if (sym) {
             tmpentry->patched.name = sym->name;
@@ -477,7 +312,6 @@ init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab,
     }
 
     rval = KERN_SUCCESS;
-
 finish:
     return rval;
 }
@@ -493,63 +327,49 @@ finish:
 * external symbols.
 *******************************************************************************/
 static kern_return_t
-init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym, 
-    const KXLDSymtab *symtab, const KXLDRelocator *relocator, 
-    const KXLDArray *relocs)
+init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *vtable_sym, 
+    const KXLDRelocator *relocator, const KXLDArray *relocs,
+    const KXLDDict *defined_cxx_symbols)
 {
     kern_return_t rval = KERN_FAILURE;
     KXLDReloc *reloc = NULL;
     KXLDVTableEntry *tmpentry = NULL;
-    KXLDSym *tmpsym = NULL;
+    KXLDSym *sym = NULL;
     u_int vtable_entry_size = 0;
     u_int vtable_header_size = 0;
-    u_char *base_entry = NULL;
-    u_char *entry = NULL;
     kxld_addr_t entry_value = 0;
-    kxld_addr_t base_entry_offset = 0;
-    kxld_addr_t entry_offset = 0;
+    u_long entry_offset = 0;
     u_int nentries = 0;
     u_int i = 0;
     char *demangled_name1 = NULL;
     size_t demangled_length1 = 0;
 
     check(vtable);
-    check(sym);
-    check(symtab);
+    check(vtable_sym);
+    check(relocator);
     check(relocs);
 
     /* Find the first entry and its offset past the vtable padding */
 
-    if (relocator->is_32_bit) {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_32;
-        vtable_header_size = VTABLE_HEADER_SIZE_32;
-    } else {
-        vtable_entry_size = VTABLE_ENTRY_SIZE_64;
-        vtable_header_size = VTABLE_HEADER_SIZE_64;
-    }
-
-    base_entry = vtable->vtable + vtable_header_size;
-
-    base_entry_offset = sym->base_addr;
-    base_entry_offset += vtable_header_size;
+    (void) get_vtable_base_sizes(relocator->is_32_bit, 
+        &vtable_entry_size, &vtable_header_size);
 
     /* In a final linked image, a vtable slot is valid if it is nonzero
-     * (meaning the userspace linker has already resolved it, or if it has
+     * (meaning the userspace linker has already resolved it) or if it has
      * a relocation entry.  We'll know the end of the vtable when we find a
      * slot that meets neither of these conditions.
      */
-    entry = base_entry;
-    entry_value = get_entry_value(entry, relocator);
-    entry_offset = base_entry_offset;
+    entry_offset = vtable_header_size;
     while (1) {
-        entry_value = get_entry_value(entry, relocator);
+        entry_value = kxld_relocator_get_pointer_at_addr(relocator,
+            vtable->vtable, entry_offset);
         if (!entry_value) {
-            reloc = kxld_reloc_get_reloc_by_offset(relocs, entry_offset);
+            reloc = kxld_reloc_get_reloc_by_offset(relocs, 
+                vtable_sym->base_addr + entry_offset);
             if (!reloc) break;
         }
 
         ++nentries;
-        entry += vtable_entry_size;
         entry_offset += vtable_entry_size;
     }
 
@@ -560,11 +380,12 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
 
     /* Find the symbols for each vtable entry */
 
-    entry = base_entry;
-    entry_value = get_entry_value(entry, relocator);
-    entry_offset = base_entry_offset;
-    for (i = 0; i < vtable->entries.nitems; ++i) {
-        entry_value = get_entry_value(entry, relocator);
+    for (i = 0, entry_offset = vtable_header_size; 
+         i < vtable->entries.nitems; 
+         ++i, entry_offset += vtable_entry_size) 
+    {
+        entry_value = kxld_relocator_get_pointer_at_addr(relocator,
+            vtable->vtable, entry_offset);
 
         /* If we can't find a symbol, it means it is a locally-defined,
          * non-external symbol that has been stripped.  We don't patch over
@@ -573,16 +394,11 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
          * but there isn't much we can do about that.
          */
         if (entry_value) {
-#if !KERNEL
-            if (relocator->swap) {
-                entry_value = swap_entry_value(entry_value, relocator);
-            }
-#endif /* !KERNEL */
-
             reloc = NULL;
-            tmpsym = kxld_symtab_get_cxx_symbol_by_value(symtab, entry_value);
+            sym = kxld_dict_find(defined_cxx_symbols, &entry_value);
         } else {
-            reloc = kxld_reloc_get_reloc_by_offset(relocs, entry_offset);
+            reloc = kxld_reloc_get_reloc_by_offset(relocs,
+                vtable_sym->base_addr + entry_offset);
             require_action(reloc, finish,
                 rval=KERN_FAILURE;
                 kxld_log(kKxldLogPatching, kKxldLogErr, 
@@ -590,20 +406,15 @@ init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym,
                     kxld_demangle(vtable->name, &demangled_name1, 
                         &demangled_length1)));
         
-            tmpsym = kxld_reloc_get_symbol(relocator, reloc, 
-                /* data */ NULL, symtab);
+            sym = kxld_reloc_get_symbol(relocator, reloc, /* data */ NULL);
         }
- 
+
         tmpentry = kxld_array_get_item(&vtable->entries, i);
         tmpentry->unpatched.reloc = reloc;
-        tmpentry->unpatched.sym = tmpsym;
-
-        entry += vtable_entry_size;
-        entry_offset += vtable_entry_size;
+        tmpentry->unpatched.sym = sym;
     }
 
     rval = KERN_SUCCESS;
-
 finish:
     return rval;
 }
@@ -632,17 +443,42 @@ kxld_vtable_deinit(KXLDVTable *vtable)
     bzero(vtable, sizeof(*vtable));
 }
 
+/*******************************************************************************
+*******************************************************************************/
+KXLDVTableEntry * 
+kxld_vtable_get_entry_for_offset(const KXLDVTable *vtable, u_long offset, 
+    boolean_t is_32_bit)
+{
+    KXLDVTableEntry *rval = NULL;
+    u_int vtable_entry_size = 0;
+    u_int vtable_header_size = 0;
+    u_int vtable_entry_idx = 0;
+
+    (void) get_vtable_base_sizes(is_32_bit, 
+        &vtable_entry_size, &vtable_header_size);
+
+    if (offset % vtable_entry_size) {
+        goto finish;
+    }
+
+    vtable_entry_idx = (u_int) ((offset - vtable_header_size) / vtable_entry_size);
+    rval = kxld_array_get_item(&vtable->entries, vtable_entry_idx);
+finish:
+    return rval;
+}
+
 /*******************************************************************************
 * Patching vtables allows us to preserve binary compatibility across releases.
 *******************************************************************************/
 kern_return_t
 kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
-    KXLDSymtab *symtab, boolean_t strict_patching __unused)
+    KXLDObject *object)
 {
     kern_return_t rval = KERN_FAILURE;
+    const KXLDSymtab *symtab = NULL;
+    const KXLDSym *sym = NULL;
     KXLDVTableEntry *child_entry = NULL;
     KXLDVTableEntry *parent_entry = NULL;
-    KXLDSym *sym = NULL;
     u_int symindex = 0;
     u_int i = 0;
     char *demangled_name1 = NULL;
@@ -651,10 +487,13 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
     size_t demangled_length1 = 0;
     size_t demangled_length2 = 0;
     size_t demangled_length3 = 0;
+    boolean_t failure = FALSE;
 
     check(vtable);
     check(super_vtable);
 
+    symtab = kxld_object_get_symtab(object);
+
     require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS);
     require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish,
         rval=KERN_FAILURE;
@@ -679,7 +518,7 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
          */
 
         if (!parent_entry->patched.name) continue;
-
+        
         /* 1) If the symbol is defined locally, do not patch */
 
         if (kxld_sym_is_defined_locally(child_entry->unpatched.sym)) continue;
@@ -726,7 +565,8 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
          * should not patch it.
          */
 
-        if (strict_patching && !kxld_sym_is_defined(child_entry->unpatched.sym))
+        if (kxld_object_target_supports_strict_patching(object) && 
+            !kxld_sym_is_defined(child_entry->unpatched.sym))
         {
             char class_name[KXLD_MAX_NAME_LEN];
             char function_prefix[KXLD_MAX_NAME_LEN];
@@ -744,6 +584,14 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
             if (!strncmp(child_entry->unpatched.sym->name, 
                     function_prefix, function_prefix_len)) 
             {
+                failure = TRUE;
+                kxld_log(kKxldLogPatching, kKxldLogErr,
+                    "The %s is unpatchable because its class declares the "
+                    "method '%s' without providing an implementation.",
+                    kxld_demangle(vtable->name,
+                        &demangled_name1, &demangled_length1),
+                    kxld_demangle(child_entry->unpatched.sym->name,
+                        &demangled_name2, &demangled_length2));
                 continue;
             }
         }
@@ -758,9 +606,10 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
          * that.
          */
 
-        sym = kxld_symtab_get_symbol_by_name(symtab, parent_entry->patched.name);
+        sym = kxld_symtab_get_locally_defined_symbol_by_name(symtab, 
+            parent_entry->patched.name);
         if (!sym) {
-            rval = kxld_symtab_add_symbol(symtab, parent_entry->patched.name,
+            rval = kxld_object_add_symbol(object, parent_entry->patched.name,
                 parent_entry->patched.addr, &sym);
             require_noerr(rval, finish);
         }
@@ -771,7 +620,6 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
 
         rval = kxld_reloc_update_symindex(child_entry->unpatched.reloc, symindex);
         require_noerr(rval, finish);
-
         kxld_log(kKxldLogPatching, kKxldLogDetail,
             "In vtable '%s', patching '%s' with '%s'.", 
             kxld_demangle(vtable->name, &demangled_name1, &demangled_length1),
@@ -779,13 +627,28 @@ kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
                 &demangled_name2, &demangled_length2), 
             kxld_demangle(sym->name, &demangled_name3, &demangled_length3));
 
-        kxld_sym_patch(child_entry->unpatched.sym);
+        rval = kxld_object_patch_symbol(object, child_entry->unpatched.sym);
+        require_noerr(rval, finish);
+
         child_entry->unpatched.sym = sym;
+
+        /*
+         * The C++ ABI requires that functions be aligned on a 2-byte boundary:
+         * http://www.codesourcery.com/public/cxx-abi/abi.html#member-pointers
+         * If the LSB of any virtual function's link address is 1, then the
+         * compiler has violated that part of the ABI, and we're going to panic
+         * in _ptmf2ptf() (in OSMetaClass.h). Better to panic here with some
+         * context.
+         */
+        assert(kxld_sym_is_pure_virtual(sym) || !(sym->link_addr & 1)); 
     }
 
+    require_action(!failure, finish, rval=KERN_FAILURE);
+
     /* Change the vtable representation from the unpatched layout to the
      * patched layout.
      */
+
     for (i = 0; i < vtable->entries.nitems; ++i) {
         char *name;
         kxld_addr_t addr;
diff --git a/libkern/kxld/kxld_vtable.h b/libkern/kxld/kxld_vtable.h
index 124756994..4dd304a76 100644
--- a/libkern/kxld/kxld_vtable.h
+++ b/libkern/kxld/kxld_vtable.h
@@ -38,6 +38,7 @@
 #include "kxld_array.h"
 
 struct kxld_array;
+struct kxld_object;
 struct kxld_reloc;
 struct kxld_relocator;
 struct kxld_sect;
@@ -62,7 +63,7 @@ struct kxld_vtable_patched_entry {
 };
 
 struct kxld_vtable_unpatched_entry {
-    struct kxld_sym *sym;
+    const struct kxld_sym *sym;
     struct kxld_reloc *reloc;
 };
 
@@ -75,31 +76,9 @@ union kxld_vtable_entry {
 * Constructors and destructors
 *******************************************************************************/
 
-kern_return_t kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable,
-    const struct kxld_sym *sym, const struct kxld_sect *sect, 
-    const struct kxld_symtab *symtab, const struct kxld_relocator *relocator)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_vtable_init_from_final_macho(KXLDVTable *vtable,
-    const struct kxld_sym *sym, const struct kxld_sect *sect, 
-    const struct kxld_symtab *symtab, const struct kxld_relocator *relocator,
-    const struct kxld_array *relocs)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_vtable_init_from_object_macho(KXLDVTable *vtable,
-    const struct kxld_sym *sym, const struct kxld_sect *sect, 
-    const struct kxld_symtab *symtab, const struct kxld_relocator *relocator)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_vtable_init_from_link_state_32(KXLDVTable *vtable, u_char *state,
-    struct kxld_vtable_hdr *hdr)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_vtable_init_from_link_state_64(KXLDVTable *vtable, u_char *state,
-    struct kxld_vtable_hdr *hdr)
-    __attribute__((nonnull, visibility("hidden")));
-
-kern_return_t kxld_vtable_copy(KXLDVTable *vtable, const KXLDVTable *src)
+kern_return_t kxld_vtable_init(KXLDVTable *vtable, 
+    const struct kxld_sym *vtable_sym, const struct kxld_object *object,
+    const struct kxld_dict *defined_cxx_symbols)
     __attribute__((nonnull, visibility("hidden")));
 
 void kxld_vtable_clear(KXLDVTable *vtable)
@@ -108,13 +87,21 @@ void kxld_vtable_clear(KXLDVTable *vtable)
 void kxld_vtable_deinit(KXLDVTable *vtable)
     __attribute__((visibility("hidden")));
 
+/*******************************************************************************
+* Accessors
+*******************************************************************************/
+
+KXLDVTableEntry * kxld_vtable_get_entry_for_offset(const KXLDVTable *vtable,
+    u_long offset, boolean_t is_32_bit)
+    __attribute__((pure,nonnull,visibility("hidden")));
+
 /*******************************************************************************
 * Modifiers
 *******************************************************************************/
 
 /* With strict patching, the vtable patcher with only patch pad slots */
 kern_return_t kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable,
-    struct kxld_symtab *symtab, boolean_t strict_patching)
+    struct kxld_object *object)
     __attribute__((nonnull, visibility("hidden")));
 
 #endif /* _KXLD_VTABLE_H_ */
diff --git a/libkern/kxld/tests/kextcopyright.c b/libkern/kxld/tests/kextcopyright.c
index dffbdbc22..7e545d328 100644
--- a/libkern/kxld/tests/kextcopyright.c
+++ b/libkern/kxld/tests/kextcopyright.c
@@ -1,3 +1,31 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
 #include <stdio.h>
 
 #include <CoreFoundation/CoreFoundation.h>
@@ -84,6 +112,7 @@ convert_cfstring(CFStringRef the_string)
 
     result = converted_string;
 finish:
+    CFRelease(the_data);
     return result;
 }
 
diff --git a/libkern/kxld/tests/kxld_array_test.c b/libkern/kxld/tests/kxld_array_test.c
new file mode 100644
index 000000000..4791712e1
--- /dev/null
+++ b/libkern/kxld/tests/kxld_array_test.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include <mach/mach_init.h>
+
+#include "kxld_array.h"
+#include "kxld_test.h"
+#include "kxld_util.h"
+
+#define kNumStorageTestItems (u_int) (4 * PAGE_SIZE / sizeof(u_int))
+
+int
+main(int argc __unused, char *argv[] __unused)
+{
+    kern_return_t rval = KERN_FAILURE;
+    KXLDArray array;
+    u_int *item = 0;
+    u_int test_num = 0;
+    u_int idx = 0;
+    u_int titems = 0;
+    u_int storageTestItems[kNumStorageTestItems];
+    u_int i = 0;
+    
+    bzero(&array, sizeof(array));
+
+    kxld_set_logging_callback(kxld_test_log);
+    kxld_set_logging_callback_data("kxld_array_test", NULL);
+
+    kxld_log(0, 0, "%d: Initialize", ++test_num);
+
+    titems = PAGE_SIZE / sizeof(u_int);
+    rval = kxld_array_init(&array, sizeof(u_int), titems);
+    assert(rval == KERN_SUCCESS);
+    assert(array.nitems == titems);
+
+    kxld_log(0, 0, "%d: Get item", ++test_num);
+    idx = 0;
+    item = kxld_array_get_item(&array, idx);
+    assert(item);
+    assert(item == kxld_array_get_slot(&array, idx));
+
+    idx = titems - 1;
+    item = kxld_array_get_item(&array, idx);
+    assert(item);
+    assert(item == kxld_array_get_slot(&array, idx));
+
+    idx = titems;
+    item = kxld_array_get_item(&array, idx);
+    assert(!item);
+    /* We allocated the max number of items that could be stored in a page,
+     * so get_slot() and get_item() are equivalent.
+     */
+    assert(item == kxld_array_get_slot(&array, idx));
+
+    kxld_log(0, 0, "%d: Resize", ++test_num);
+
+    titems = 2 * PAGE_SIZE / sizeof(u_int) + 100;
+    rval = kxld_array_resize(&array, titems);
+    assert(rval == KERN_SUCCESS);
+    assert(array.nitems == titems);
+
+    kxld_log(0, 0, "%d: Get more items", ++test_num);
+    idx = 0;
+    item = kxld_array_get_item(&array, idx);
+    assert(item);
+    assert(item == kxld_array_get_slot(&array, idx));
+
+    idx = titems - 1;
+    item = kxld_array_get_item(&array, idx);
+    assert(item);
+    assert(item == kxld_array_get_slot(&array, idx));
+
+    idx = titems;
+    item = kxld_array_get_item(&array, idx);
+    assert(!item);
+    /* We allocated fewer items than could fit in a page, so get_slot() will
+     * return items even when get_item() does not.  See below for details.
+     */
+    assert(item != kxld_array_get_slot(&array, idx));
+
+    kxld_log(0, 0, "%d: Clear and attempt to get an item", ++test_num);
+    (void) kxld_array_clear(&array);
+    item = kxld_array_get_item(&array, 0);
+    assert(!item);
+
+    kxld_log(0, 0, "%d: Get slot", ++test_num);
+    /* The array allocates its internal storage in pages. Because get_slot()
+     * fetches items based on the allocated size, not the logical size, we
+     * calculate the max items get_slot() can retrieve based on page size.
+     */
+    titems = (u_int) (round_page(titems * sizeof(u_int)) / sizeof(u_int));
+    assert(!item);
+    item = kxld_array_get_slot(&array, 0);
+    assert(item);
+    item = kxld_array_get_slot(&array, titems - 1);
+    assert(item);
+    item = kxld_array_get_slot(&array, titems);
+    assert(!item);
+
+    kxld_log(0, 0, "%d: Reinitialize", ++test_num);
+
+    titems = kNumStorageTestItems;
+    rval = kxld_array_init(&array, sizeof(u_int), titems);
+    assert(rval == KERN_SUCCESS);
+    assert(array.nitems == titems);
+
+    kxld_log(0, 0, "%d: Storage test - %d insertions and finds", 
+        ++test_num, kNumStorageTestItems);
+    for (i = 0; i < titems; ++i) {
+        item = kxld_array_get_item(&array, i);
+        assert(item);
+
+        *item = (u_int) (random() % UINT_MAX);
+        storageTestItems[i] = *item;
+    }
+
+    for (i = 0; i < titems; ++i) {
+        item = kxld_array_get_item(&array, i);
+        assert(item);
+        assert(*item == storageTestItems[i]);
+    }
+
+    (void) kxld_array_deinit(&array);
+
+    kxld_log(0, 0, " ");
+    kxld_log(0, 0, "All tests passed!  Now check for memory leaks...");
+    
+    kxld_print_memory_report();
+
+    return 0;
+}
diff --git a/libkern/kxld/tests/kxld_dict_test.c b/libkern/kxld/tests/kxld_dict_test.c
index d831a44ed..a9b2f5f23 100644
--- a/libkern/kxld/tests/kxld_dict_test.c
+++ b/libkern/kxld/tests/kxld_dict_test.c
@@ -27,11 +27,10 @@
  */
 #include <assert.h>
 #include <stdlib.h>
-#include <stdio.h>
 #include <strings.h>
 
 #include "kxld_dict.h"
-#include "kxld_util.h"
+#include "kxld_test.h"
 
 #define KEYLEN 40
 #define STRESSNUM 10000
@@ -41,22 +40,6 @@ typedef struct {
     int * value;
 } Stress;
 
-
-void kxld_test_log(KXLDLogSubsystem sys, KXLDLogLevel level,
-    const char *format, va_list ap, void *user_data);
-
-void 
-kxld_test_log(KXLDLogSubsystem sys __unused, KXLDLogLevel level __unused,
-    const char *format, va_list ap, void *user_data __unused)
-{
-    va_list args;
-
-    va_copy(args, ap);
-    vfprintf(stderr, format, args);
-    fprintf(stderr, "\n");
-    va_end(args);
-}
-
 int 
 main(int argc __unused, char *argv[] __unused)
 {
@@ -69,20 +52,21 @@ main(int argc __unused, char *argv[] __unused)
     Stress stress_test[STRESSNUM];
 
     kxld_set_logging_callback(kxld_test_log);
+    kxld_set_logging_callback_data("kxld_dict_test", NULL);
 
     bzero(&dict, sizeof(dict));
     
-    fprintf(stderr, "%d: Initialize\n", ++test_num);
+    kxld_log(0, 0, "%d: Initialize", ++test_num);
     result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10);
     assert(result == KERN_SUCCESS);
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 0);
     
-    fprintf(stderr, "%d: Find nonexistant key\n", ++test_num);
+    kxld_log(0, 0, "%d: Find nonexistant key", ++test_num);
     b = kxld_dict_find(&dict, "hi");
     assert(b == NULL);
     
-    fprintf(stderr, "%d: Insert and find\n", ++test_num);
+    kxld_log(0, 0, "%d: Insert and find", ++test_num);
     result = kxld_dict_insert(&dict, "hi", &a1);
     assert(result == KERN_SUCCESS);
     b = kxld_dict_find(&dict, "hi");
@@ -90,7 +74,7 @@ main(int argc __unused, char *argv[] __unused)
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 1);
     
-    fprintf(stderr, "%d: Insert same key with different values\n", ++test_num);
+    kxld_log(0, 0, "%d: Insert same key with different values", ++test_num);
     result = kxld_dict_insert(&dict, "hi", &a2);
     assert(result == KERN_SUCCESS);
     b = kxld_dict_find(&dict, "hi");
@@ -98,15 +82,16 @@ main(int argc __unused, char *argv[] __unused)
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 1);
     
-    fprintf(stderr, "%d: Clear and find of nonexistant key\n", ++test_num);
+    kxld_log(0, 0, "%d: Clear and find of nonexistant key", ++test_num);
     kxld_dict_clear(&dict);
     result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10);
+    assert(result == KERN_SUCCESS);
     b = kxld_dict_find(&dict, "hi");
     assert(b == NULL);
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 0);
     
-    fprintf(stderr, "%d: Insert multiple keys\n", ++test_num);
+    kxld_log(0, 0, "%d: Insert multiple keys", ++test_num);
     result = kxld_dict_insert(&dict, "hi", &a1);
     assert(result == KERN_SUCCESS);
     result = kxld_dict_insert(&dict, "hello", &a2);
@@ -119,7 +104,7 @@ main(int argc __unused, char *argv[] __unused)
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 2);
     
-    fprintf(stderr, "%d: Remove keys\n", ++test_num);
+    kxld_log(0, 0, "%d: Remove keys", ++test_num);
     kxld_dict_remove(&dict, "hi", &b);
     assert(b && *(int*)b == a1);
     b = kxld_dict_find(&dict, "hi");
@@ -129,17 +114,18 @@ main(int argc __unused, char *argv[] __unused)
     size = kxld_dict_get_num_entries(&dict);
     assert(size == 1);
     
-    fprintf(stderr, "%d: Stress test - %d insertions and finds\n", ++test_num, STRESSNUM);
+    kxld_log(0, 0, "%d: Stress test - %d insertions and finds", ++test_num, STRESSNUM);
 
     kxld_dict_clear(&dict);
     result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10);
+    assert(result == KERN_SUCCESS);
     for (i = 0; i < STRESSNUM; ++i) {
         int * tmp_value = kxld_alloc(sizeof(int));
         char * tmp_key = kxld_alloc(sizeof(char) * (KEYLEN + 1));
         
         *tmp_value = i;
         for (j = 0; j < KEYLEN; ++j) {
-            tmp_key[j] = (rand() % 26) + 'a';
+            tmp_key[j] = (random() % 26) + 'a';
         }
         tmp_key[KEYLEN] = '\0';
         
@@ -161,10 +147,10 @@ main(int argc __unused, char *argv[] __unused)
         kxld_free(stress_test[i].value, sizeof(int));
     }
 
-    fprintf(stderr, "%d: Destroy\n", ++test_num);
+    kxld_log(0, 0, "%d: Destroy", ++test_num);
     kxld_dict_deinit(&dict);
     
-    fprintf(stderr, "\nAll tests passed!  Now check for memory leaks...\n");
+    kxld_log(0, 0, "\nAll tests passed!  Now check for memory leaks...");
     
     kxld_print_memory_report();
     
diff --git a/bsd/ppc/psl.h b/libkern/kxld/tests/kxld_test.c
similarity index 76%
rename from bsd/ppc/psl.h
rename to libkern/kxld/tests/kxld_test.c
index 14abec125..d802cc7a1 100644
--- a/bsd/ppc/psl.h
+++ b/libkern/kxld/tests/kxld_test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,19 +25,21 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/* Copyright (c) 1993 NeXT Computer, Inc.  All rights reserved.
- *
- *	File:	bsd/ppc/psl.h
- *
- */
+#include <stdio.h>
 
-#if	KERNEL_PRIVATE
+#include "kxld_test.h"
+#include "kxld_util.h"
 
-#ifndef _BSD_PPC_PSL_H_
-#define _BSD_PPC_PSL_H_
+void 
+kxld_test_log(KXLDLogSubsystem sys __unused, KXLDLogLevel level __unused,
+    const char *format, va_list ap, void *user_data __unused)
+{
+    va_list args;
 
-/* empty */
+    va_copy(args, ap);
+    vfprintf(stderr, format, args);
+    fprintf(stderr, "\n");
+    va_end(args);
+}
 
-#endif /* _BSD_PPC_PSL_H_ */
 
-#endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/machine_rpc.h b/libkern/kxld/tests/kxld_test.h
similarity index 86%
rename from osfmk/ppc/machine_rpc.h
rename to libkern/kxld/tests/kxld_test.h
index ffbf6c762..98e05c778 100644
--- a/osfmk/ppc/machine_rpc.h
+++ b/libkern/kxld/tests/kxld_test.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,14 +25,9 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#ifndef _MACHINE_RPC_H_
-#define _MACHINE_RPC_H_
 
-#endif /* _MACHINE_RPC_H_ */
+#include "kxld_util.h"
 
+void kxld_test_log(KXLDLogSubsystem sys, KXLDLogLevel level,
+    const char *format, va_list ap, void *user_data);
 
diff --git a/libkern/kxld/tests/loadtest.py b/libkern/kxld/tests/loadtest.py
index def56cfed..ce7a95d96 100644
--- a/libkern/kxld/tests/loadtest.py
+++ b/libkern/kxld/tests/loadtest.py
@@ -1,3 +1,31 @@
+##
+# Copyright (c) 2009 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. The rights granted to you under the License
+# may not be used to create, or enable the creation or redistribution of,
+# unlawful or unlicensed copies of an Apple operating system, or to
+# circumvent, violate, or enable the circumvention or violation of, any
+# terms of an Apple operating system software license agreement.
+# 
+# Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+##
+
 #!/usr/bin/env python
 
 import sys
diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile
index 76e4d9f99..2d86d6882 100644
--- a/libkern/libkern/Makefile
+++ b/libkern/libkern/Makefile
@@ -11,21 +11,14 @@ INSTINC_SUBDIRS = \
         machine \
 	c++ \
 	crypto
-
-INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} \
-        ppc
-
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} \
         i386
-
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} \
         i386
-
 INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} \
         arm
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
@@ -42,15 +35,16 @@ DATAFILES = \
         OSTypes.h	\
 	locks.h		\
 	sysctl.h	\
+	tree.h		\
 	zconf.h		\
 	zlib.h
 
 PRIVATE_DATAFILES =         \
-	OSKextLibPrivate.h  \
 	OSKextLibPrivate.h  \
 	kext_request_keys.h \
 	mkext.h		    \
-	prelink.h
+	prelink.h           \
+	WKdm.h
 
 INSTALL_MI_LIST	=       \
 	OSByteOrder.h   \
@@ -65,6 +59,7 @@ INSTALL_MI_DIR = libkern
 INSTALL_MI_LCL_LIST =        \
 	${INSTALL_MI_LIST}   \
 	${PRIVATE_DATAFILES} \
+	tree.h		     \
 	kext_panic_report.h  \
 	OSCrossEndian.h
 
diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h
index 36d9127e2..72ff30594 100644
--- a/libkern/libkern/OSAtomic.h
+++ b/libkern/libkern/OSAtomic.h
@@ -41,6 +41,22 @@
 extern "C" {
 #endif
 
+#ifdef XNU_KERNEL_PRIVATE
+/*
+ * The macro SAFE_CAST_PTR() casts one type of pointer to another type, making sure
+ * the data the pointer is referencing is the same size. If it is not, it will cause
+ * a division by zero compiler warning. This is to work around "SInt32" being defined
+ * as "long" on ILP32 and as "int" on LP64, which would require an explicit cast to
+ * "SInt32*" when for instance passing an "int*" to OSAddAtomic() - which masks size
+ * mismatches.
+ * -- var is used, but sizeof does not evaluate the
+ *    argument, i.e. we're safe against "++" etc. in var --
+ */
+#define __SAFE_CAST_PTR(type, var) (((type)(var))+(0/(sizeof(*var) == sizeof(*(type)0) ? 1 : 0)))
+#else
+#define __SAFE_CAST_PTR(type, var) ((type)(var))
+#endif
+
 /*!
  * @header
  *
@@ -64,6 +80,8 @@ extern Boolean OSCompareAndSwap64(
     UInt64            oldValue,
     UInt64            newValue,
     volatile UInt64 * address);
+#define OSCompareAndSwap64(a, b, c) \
+	(OSCompareAndSwap64(a, b, __SAFE_CAST_PTR(volatile UInt64*,c)))
 
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
@@ -81,6 +99,8 @@ extern Boolean OSCompareAndSwap64(
 extern SInt64 OSAddAtomic64(
     SInt64            theAmount,
     volatile SInt64 * address);
+#define OSAddAtomic64(a, b) \
+	(OSAddAtomic64(a, __SAFE_CAST_PTR(volatile SInt64*,b)))
 
 /*!
  * @function OSIncrementAtomic64
@@ -126,6 +146,8 @@ inline static SInt64 OSDecrementAtomic64(volatile SInt64 * address)
 extern long OSAddAtomicLong(
     long            theAmount,
     volatile long * address);
+#define OSAddAtomicLong(a, b) \
+	(OSAddAtomicLong(a, __SAFE_CAST_PTR(volatile long*,b)))
 
 /* Not to be included in headerdoc.
  *
@@ -156,22 +178,6 @@ inline static long OSDecrementAtomicLong(volatile long * address)
 }
 #endif /* XNU_KERNEL_PRIVATE */
 
-/*
- * The macro SAFE_CAST_PTR() casts one type of pointer to another type, making sure
- * the data the pointer is referencing is the same size. If it is not, it will cause
- * a division by zero compiler warning. This is to work around "SInt32" being defined
- * as "long" on ILP32 and as "int" on LP64, which would require an explicit cast to
- * "SInt32*" when for instance passing an "int*" to OSAddAtomic() - which masks size
- * mismatches.
- * -- var is used twice, but sizeof does not evaluate the
- *    argument, i.e. we're safe against "++" etc. in var --
- */
-#ifdef XNU_KERNEL_PRIVATE
-#define SAFE_CAST_PTR(type, var) (((type)(var))+(0/(sizeof(*var) == sizeof(*(type)0) ? 1 : 0)))
-#else
-#define SAFE_CAST_PTR(type, var) ((type)(var))
-#endif
-
 /*!
  * @function OSCompareAndSwap
  *
@@ -193,7 +199,7 @@ extern Boolean OSCompareAndSwap(
     UInt32            newValue,
     volatile UInt32 * address);
 #define OSCompareAndSwap(a, b, c) \
-	(OSCompareAndSwap(a, b, SAFE_CAST_PTR(volatile UInt32*,c)))
+	(OSCompareAndSwap(a, b, __SAFE_CAST_PTR(volatile UInt32*,c)))
 
 /*!
  * @function OSCompareAndSwapPtr
@@ -215,7 +221,7 @@ extern Boolean OSCompareAndSwapPtr(
     void            * newValue,
     void * volatile * address);
 #define OSCompareAndSwapPtr(a, b, c) \
-	(OSCompareAndSwapPtr(a, b, SAFE_CAST_PTR(void * volatile *,c)))
+	(OSCompareAndSwapPtr(a, b, __SAFE_CAST_PTR(void * volatile *,c)))
 
 /*!
  * @function OSAddAtomic
@@ -235,7 +241,7 @@ extern SInt32 OSAddAtomic(
     SInt32            amount,
     volatile SInt32 * address);
 #define OSAddAtomic(a, b) \
-	(OSAddAtomic(a, SAFE_CAST_PTR(volatile SInt32*,b)))
+	(OSAddAtomic(a, __SAFE_CAST_PTR(volatile SInt32*,b)))
 
 /*!
  * @function OSAddAtomic16
@@ -288,7 +294,7 @@ extern SInt8 OSAddAtomic8(
  */
 extern SInt32 OSIncrementAtomic(volatile SInt32 * address);
 #define OSIncrementAtomic(a) \
-	(OSIncrementAtomic(SAFE_CAST_PTR(volatile SInt32*,a)))
+	(OSIncrementAtomic(__SAFE_CAST_PTR(volatile SInt32*,a)))
 
 /*!
  * @function OSIncrementAtomic16
@@ -335,7 +341,7 @@ extern SInt8 OSIncrementAtomic8(volatile SInt8 * address);
  */
 extern SInt32 OSDecrementAtomic(volatile SInt32 * address);
 #define OSDecrementAtomic(a) \
-	(OSDecrementAtomic(SAFE_CAST_PTR(volatile SInt32*,a)))
+	(OSDecrementAtomic(__SAFE_CAST_PTR(volatile SInt32*,a)))
 
 /*!
  * @function OSDecrementAtomic16
@@ -385,7 +391,7 @@ extern UInt32 OSBitAndAtomic(
     UInt32            mask,
     volatile UInt32 * address);
 #define OSBitAndAtomic(a, b) \
-	(OSBitAndAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b)))
+	(OSBitAndAtomic(a, __SAFE_CAST_PTR(volatile UInt32*,b)))
 
 /*!
  * @function OSBitAndAtomic16
@@ -441,7 +447,7 @@ extern UInt32 OSBitOrAtomic(
     UInt32            mask,
     volatile UInt32 * address);
 #define OSBitOrAtomic(a, b) \
-	(OSBitOrAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b)))
+	(OSBitOrAtomic(a, __SAFE_CAST_PTR(volatile UInt32*,b)))
 
 /*!
  * @function OSBitOrAtomic16
@@ -497,7 +503,7 @@ extern UInt32 OSBitXorAtomic(
     UInt32            mask,
     volatile UInt32 * address);
 #define OSBitXorAtomic(a, b) \
-	(OSBitXorAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b)))
+	(OSBitXorAtomic(a, __SAFE_CAST_PTR(volatile UInt32*,b)))
 
 /*!
  * @function OSBitXorAtomic16
@@ -571,44 +577,54 @@ extern Boolean OSTestAndClear(
     UInt32           bit,
     volatile UInt8 * startAddress);
 
-#ifdef __ppc__
 /*!
- * @function OSEnqueueAtomic
+ * @defined OS_SPINLOCK_INIT
  *
  * @abstract
- * Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
+ * The default value for an OSSpinLock.
  *
  * @discussion
- * The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified.
+ * The convention is that unlocked is zero, locked is nonzero.
+ */
+#define	OS_SPINLOCK_INIT 0
+
+/*! 
+ * @typedef OSSpinLock
  *
- * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures.
- * @param listHead The address of a head pointer for the list .
- * @param element The list element to insert at the head of the list.
- * @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored.
+ * @abstract
+ * Data type for a spinlock.
+ *
+ * @discussion
+ * You should always initialize a spinlock to OS_SPINLOCK_INIT before using it.
  */
-extern void OSEnqueueAtomic(
-    void * volatile * listHead,
-    void            * element,
-    SInt32            elementNextFieldOffset);
+typedef SInt32 OSSpinLock;
 
+#ifdef PRIVATE
 /*!
- * @function OSDequeueAtomic
+ * @function OSSpinLockTry
  *
  * @abstract
- * Singly linked list element head removal, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
+ * Locks a spinlock if it would not block.
  *
  * @discussion
- * The OSDequeueAtomic function removes an element from the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified.
+ * Multiprocessor locks used within the shared memory area between the kernel and event system.  These must work in both user and kernel mode.
  *
- * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures.
- * @param listHead The address of a head pointer for the list .
- * @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored.
- * @result A removed element, or zero if the list is empty.
+ * @result
+ * Returns false if the lock was already held by another thread, true if it took the lock successfully. 
+ */
+extern Boolean OSSpinLockTry(volatile OSSpinLock * lock);
+
+/*!
+ * @function OSSpinLockUnlock
+ *
+ * @abstract
+ * Unlocks a spinlock.
+ *
+ * @discussion
+ * Unlocks a spinlock.
  */
-extern void * OSDequeueAtomic(
-    void * volatile * listHead,
-    SInt32            elementNextFieldOffset);
-#endif /* __ppc__ */
+extern void OSSpinLockUnlock(volatile OSSpinLock * lock);
+#endif /* PRIVATE */
 
 /*!
  * @function OSSynchronizeIO
@@ -621,9 +637,6 @@ extern void * OSDequeueAtomic(
  */
 static __inline__ void OSSynchronizeIO(void)
 {
-#if defined(__ppc__)
-        __asm__ ("eieio");
-#endif
 }
 
 #if defined(__cplusplus)
diff --git a/libkern/libkern/OSAtomic.h.save b/libkern/libkern/OSAtomic.h.save
deleted file mode 100644
index 1870272b5..000000000
--- a/libkern/libkern/OSAtomic.h.save
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
- * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1999 Apple Computer, Inc.  All rights reserved.
- *
- * HISTORY
- *
- */
-
-#ifndef _OS_OSATOMIC_H
-#define _OS_OSATOMIC_H
-
-#include <libkern/OSBase.h>
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-/*! @function OSCompareAndSwap
-    @abstract Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSCompareAndSwap function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param oldValue The value to compare at address.
-    @param newValue The value to write to address if oldValue compares true.
-    @param address The 4-byte aligned address of the data to update atomically.
-    @result true if newValue was written to the address. */
-
-extern Boolean OSCompareAndSwap( UInt32 oldValue, UInt32 newValue, UInt32 * address );
-
-/*! @function OSAddAtomic
-    @abstract 32-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSAddAtomic function adds the specified amount to the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param amount The amount to add.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the addition */
-
-extern SInt32	OSAddAtomic(SInt32 amount, SInt32 * address);
-
-/*! @function OSAddAtomic16
-    @abstract 16-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param amount The amount to add.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the addition */
-
-extern SInt16	OSAddAtomic16(SInt32 amount, SInt16 * address);
-
-/*! @function OSAddAtomic8
-    @abstract 8-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param amount The amount to add.
-    @param address The address of the value to update atomically.
-    @result The value before the addition */
-
-extern SInt8	OSAddAtomic8(SInt32 amount, SInt8 * address);
-
-/*! @function OSIncrementAtomic
-    @abstract 32-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSIncrementAtomic function increments the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the increment. */
-
-extern SInt32	OSIncrementAtomic(SInt32 * address);
-
-/*! @function OSIncrementAtomic16
-    @abstract 16-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSIncrementAtomic16 function increments the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the increment. */
-
-extern SInt16	OSIncrementAtomic16(SInt16 * address);
-
-/*! @function OSIncrementAtomic8
-    @abstract 8-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSIncrementAtomic8 function increments the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The address of the value to update atomically.
-    @result The value before the increment. */
-
-extern SInt8	OSIncrementAtomic8(SInt8 * address);
-
-/*! @function OSDecrementAtomic
-    @abstract 32-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSDecrementAtomic function decrements the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the decrement. */
-
-extern SInt32	OSDecrementAtomic(SInt32 * address);
-
-/*! @function OSDecrementAtomic16
-    @abstract 16-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the decrement. */
-
-extern SInt16	OSDecrementAtomic16(SInt16 * address);
-
-/*! @function OSDecrementAtomic8
-    @abstract 8-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the original value. 
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param address The address of the value to update atomically.
-    @result The value before the decrement. */
-
-extern SInt8	OSDecrementAtomic8(SInt8 * address);
-
-/*! @function OSBitAndAtomic
-    @abstract 32-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically and with the value.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation */
-
-extern UInt32	OSBitAndAtomic(UInt32 mask, UInt32 * address);
-
-/*! @function OSBitAndAtomic16
-    @abstract 16-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitAndAtomic16 function logically ands the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically and with the value.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt16	OSBitAndAtomic16(UInt32 mask, UInt16 * address);
-
-/*! @function OSBitAndAtomic8
-    @abstract 8-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitAndAtomic8 function logically ands the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically and with the value.
-    @param address The address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt8	OSBitAndAtomic8(UInt32 mask, UInt8 * address);
-
-/*! @function OSBitOrAtomic
-    @abstract 32-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitOrAtomic function logically ors the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically or with the value.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt32	OSBitOrAtomic(UInt32 mask, UInt32 * address);
-
-/*! @function OSBitOrAtomic16
-    @abstract 16-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitOrAtomic16 function logically ors the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically or with the value.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt16	OSBitOrAtomic16(UInt32 mask, UInt16 * address);
-
-/*! @function OSBitOrAtomic8
-    @abstract 8-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @discussion The OSBitOrAtomic8 function logically ors the bits of the specified mask into the value at the specified address and returns the original value.
-    @param mask The mask to logically or with the value.
-    @param address The address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt8	OSBitOrAtomic8(UInt32 mask, UInt8 * address);
-
-/*! @function OSBitXorAtomic
-    @abstract 32-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @discussion The OSBitXorAtomic function logically xors the bits of the specified mask into the value at the specified address and returns the original value.
-    @param mask The mask to logically or with the value.
-    @param address The 4-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt32	OSBitXorAtomic(UInt32 mask, UInt32 * address);
-
-/*! @function OSBitXorAtomic16
-    @abstract 16-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSBitXorAtomic16 function logically xors the bits of the specified mask into the value at the specified address and returns the original value.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param mask The mask to logically or with the value.
-    @param address The 2-byte aligned address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt16	OSBitXorAtomic16(UInt32 mask, UInt16 * address);
-
-/*! @function OSBitXorAtomic8
-    @abstract 8-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @discussion The OSBitXorAtomic8 function logically xors the bits of the specified mask into the value at the specified address and returns the original value.
-    @param mask The mask to logically or with the value.
-    @param address The address of the value to update atomically.
-    @result The value before the bitwise operation. */
-
-extern UInt8	OSBitXorAtomic8(UInt32 mask, UInt8 * address);
-
-/*! @function OSTestAndSet
-    @abstract Bit test and set operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @discussion The OSTestAndSet function sets a single bit in a byte at a specified address. It returns true if the bit was already set, false otherwise.
-    @param bit The bit number in the range 0 through 7.
-    @param address The address of the byte to update atomically.
-    @result true if the bit was already set, false otherwise. */
-
-extern Boolean	OSTestAndSet(UInt32 bit, UInt8 * startAddress);
-
-/*! @function OSTestAndClear
-    @abstract Bit test and clear operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSTestAndClear function clears a single bit in a byte at a specified address. It returns true if the bit was already clear, false otherwise.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param bit The bit number in the range 0 through 7.
-    @param address The address of the byte to update atomically.
-    @result true if the bit was already clear, false otherwise. */
-
-extern Boolean	OSTestAndClear(UInt32 bit, UInt8 * startAddress);
-
-#ifdef __ppc__
-/*! @function OSEnqueueAtomic
-    @abstract Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param listHead The address of a head pointer for the list .
-    @param element The list element to insert at the head of the list.
-    @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored. */
-
-extern void	OSEnqueueAtomic(void ** listHead, void * element,
-				SInt32 elementNextFieldOffset);
-
-/*! @function OSDequeueAtomic
-    @abstract Singly linked list element head removal, performed atomically with respect to all devices that participate in the coherency architecture of the platform.
-    @discussion The OSDequeueAtomic function removes an element from the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified.
-
-    This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device.
-    @param listHead The address of a head pointer for the list .
-    @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored.
-    @result A removed element, or zero if the list is empty. */
-
-extern void *	OSDequeueAtomic(void ** listHead,
-				SInt32 elementNextFieldOffset);
-#endif /* __ppc__ */
-
-/*! @function OSSynchronizeIO
-    @abstract The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices.
-    @discussion The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. It executes the eieio instruction on PowerPC processors. */
-
-static __inline__ void OSSynchronizeIO(void)
-{
-#if defined(__ppc__)
-        __asm__ ("eieio");
-#endif
-}
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif /* ! _OS_OSATOMIC_H */
diff --git a/libkern/libkern/OSByteOrder.h b/libkern/libkern/OSByteOrder.h
index cb12cb31a..8ae2c33b8 100644
--- a/libkern/libkern/OSByteOrder.h
+++ b/libkern/libkern/OSByteOrder.h
@@ -39,9 +39,7 @@
 
 #if defined(__GNUC__)
 
-#if (defined(__ppc__) || defined(__ppc64__))
-#include <libkern/ppc/OSByteOrder.h>
-#elif (defined(__i386__) || defined(__x86_64__))
+#if (defined(__i386__) || defined(__x86_64__))
 #include <libkern/i386/OSByteOrder.h>
 #else
 #include <libkern/machine/OSByteOrder.h>
diff --git a/libkern/libkern/OSCrossEndian.h b/libkern/libkern/OSCrossEndian.h
index 0bbbf58e3..6038319ef 100644
--- a/libkern/libkern/OSCrossEndian.h
+++ b/libkern/libkern/OSCrossEndian.h
@@ -60,29 +60,8 @@
 
 #include <sys/sysctl.h>
 
-#if __ppc__
-
-static __inline__ int
-_OSRosettaCheck(void)
-{
-	int isCrossEndian = 0;
-	int val = 0;
-	size_t size = sizeof val;
-
-	if (sysctlbyname("sysctl.proc_native", &val, &size, NULL, 0) == -1)
-		isCrossEndian = 0;
-	else
-		isCrossEndian = val ? 0 : 1;
-
-	return isCrossEndian;
-}
-
-#else /* __ppc__ */
-
 static __inline__ int _OSRosettaCheck(void) { return 0; }
 
-#endif /* __ppc__ */
-
 #define IF_ROSETTA() if (__builtin_expect(_OSRosettaCheck(), 0) )
 
 #define ROSETTA_ONLY(exprs)	\
diff --git a/libkern/libkern/OSDebug.h b/libkern/libkern/OSDebug.h
index eaeefc129..84611f320 100644
--- a/libkern/libkern/OSDebug.h
+++ b/libkern/libkern/OSDebug.h
@@ -48,6 +48,9 @@ extern void trace_backtrace(unsigned int debugid, unsigned int debugid2, unsigne
 extern void OSReportWithBacktrace(const char *str, ...);
 extern unsigned OSBacktrace(void **bt, unsigned maxAddrs);
 
+/* Simple dump of 20 backtrace entries */
+extern void OSPrintBacktrace(void);
+
 /*! @function OSKernelStackRemaining
     @abstract Returns bytes available below the current stack frame.
     @discussion Returns bytes available below the current stack frame. Safe for interrupt or thread context.
diff --git a/libkern/libkern/OSKextLib.h b/libkern/libkern/OSKextLib.h
index 9842b7546..6ecc3548d 100644
--- a/libkern/libkern/OSKextLib.h
+++ b/libkern/libkern/OSKextLib.h
@@ -671,6 +671,11 @@ OSReturn OSKextReleaseKextWithLoadTag(OSKextLoadTag loadTag);
 #pragma mark Kext Requests
 /********************************************************************/
 #endif
+/*!
+ * @group Kext Requests to User Space
+ * Functions for making requests to kextd in user space.
+ */
+
 /*!
  * @typedef OSKextRequestTag
  *
@@ -679,9 +684,15 @@ OSReturn OSKextReleaseKextWithLoadTag(OSKextLoadTag loadTag);
  */
 typedef uint32_t OSKextRequestTag;
 
+/*!
+ * @define kOSKextRequestTagInvalid
+ *
+ * @abstract
+ * A request tag value that will never be used for a kext request;
+ * indicates failure to create/queue the request.
+ */
 #define kOSKextRequestTagInvalid  ((OSKextRequestTag)-1)
 
-
 /*!
  * @typedef OSKextRequestResourceCallback
  *
@@ -732,7 +743,10 @@ typedef void (* OSKextRequestResourceCallback)(
  *                         when it is invoked. May be <code>NULL</code>.
  * @param  requestTagOut   If non-<code>NULL</code>,
  *                         filled on success with a tag identifying the
- *                         pending request; can be used with
+ *                         pending request
+ *                         (or on failure with <code>@link kOSKextRequestTagInvalid
+ *                         kOSKextRequestTagInvalid@/link</code>;
+ *                         can be used with
  *                         <code>@link OSKextCancelRequest
  *                         OSKextCancelRequest@/link</code>.
  *
@@ -748,12 +762,23 @@ typedef void (* OSKextRequestResourceCallback)(
  * Other <code>OSKextReturn...</code> errors are possible.
  *
  * @discussion
- * This function queues a request to the user-space kext daemon
+ * This function queues an asynchronous request to the user-space kext daemon
  * <code>@link //apple_ref/doc/man/8/kextd kextd(8)@/link</code>;
  * requests for resources early in system startup
  * will not be fulfilled until that daemon starts.
- * Note also that the localization context of the kext daemon
- * (namely tha tof the superuser)
+ * Requests made by a kext while that kext is loading
+ * (specifically in the kext's module start routine)
+ * will not be fulfilled until after the start routine returns and
+ * the kext is completely loaded.
+ * Kexts requesting resources should be sure to perform appropriate locking
+ * in the callback function.
+ *
+ * Kext resources are stored in the kext's on-disk bundle under the
+ * Resources subdirectory.
+ * See {@linkdoc //apple_ref/doc/uid/10000123i Bundle Programming Guide}
+ * for an overview of bundle structure.
+ * The localization context of the kext daemon
+ * (namely that of the superuser)
  * will be used in retrieving resources;
  * kext resources intended for use in the kernel
  * should generally not be localized.
@@ -828,15 +853,12 @@ OSReturn OSKextCancelRequest(
     void             ** contextOut);
 
 
-#if (__x86_64__)
-
 #if PRAGMA_MARK
 #pragma mark -
 /********************************************************************/
 #pragma mark Weak linking
 /********************************************************************/
 #endif
-
 /*!
  * @group Weak Linking
  * Support for weak references to symbols in kexts.
@@ -894,8 +916,6 @@ extern const void * gOSKextUnresolved;
 #define OSKextSymbolIsResolved(weak_sym)        \
     (&(weak_sym) != gOSKextUnresolved)
 
-#endif /* (__x86_64__) */
-
 #endif /* KERNEL */
 
 __END_DECLS
diff --git a/libkern/libkern/OSKextLibPrivate.h b/libkern/libkern/OSKextLibPrivate.h
index cc4f3aa29..53fbc3921 100644
--- a/libkern/libkern/OSKextLibPrivate.h
+++ b/libkern/libkern/OSKextLibPrivate.h
@@ -30,6 +30,7 @@
 #define _LIBKERN_OSKEXTLIBPRIVATE_H
 
 #include <sys/cdefs.h>
+#include <uuid/uuid.h>
 
 __BEGIN_DECLS
 #ifdef KERNEL
@@ -38,6 +39,7 @@ __BEGIN_DECLS
 #include <mach/vm_types.h>
 #else
 #include <CoreFoundation/CoreFoundation.h>
+#include <System/mach/kmod.h>
 #endif /* KERNEL */
 __END_DECLS
 
@@ -85,15 +87,17 @@ typedef uint8_t OSKextExcludeLevel;
 #endif
 /*********************************************************************
 * In addition to the keys defined here, you will find:
-*   CFBundleIdentifier
-*   CFBundleVersion
-*   OSBundleCompatibleVersion
-*   OSKernelResource
-*   OSBundleInterface
+*   kCFBundleIdentifierKey
+*   kCFBundleVersionKey
+*   kOSBundleCompatibleVersionKey
+*   kOSBundleIsInterfaceKey
+*   kOSKernelResourceKey
 *********************************************************************/
+#define kOSBundleMachOHeadersKey                "OSBundleMachOHeaders"
 #define kOSBundleCPUTypeKey                     "OSBundleCPUType"
 #define kOSBundleCPUSubtypeKey                  "OSBundleCPUSubtype"
 #define kOSBundlePathKey                        "OSBundlePath"
+#define kOSBundleExecutablePathKey              "OSBundleExecutablePath"
 #define kOSBundleUUIDKey                        "OSBundleUUID"
 #define kOSBundleStartedKey                     "OSBundleStarted"
 #define kOSBundlePrelinkedKey                   "OSBundlePrelinked"
@@ -104,8 +108,13 @@ typedef uint8_t OSKextExcludeLevel;
 #define kOSBundleDependenciesKey                "OSBundleDependencies"
 #define kOSBundleRetainCountKey                 "OSBundleRetainCount"
 
+/* Dictionary of metaclass info keyed by classname.
+ */
 #define kOSBundleClassesKey                     "OSBundleClasses"
 
+/* These are contained in kOSBundleClassesKey. kOSMetaClassSuperclassNameKey
+ * may be absent (for the root class).
+ */
 #define kOSMetaClassNameKey                     "OSMetaClassName"
 #define kOSMetaClassSuperclassNameKey           "OSMetaClassSuperclassName"
 #define kOSMetaClassTrackingCountKey            "OSMetaClassTrackingCount"
@@ -653,15 +662,6 @@ Boolean OSKextVersionGetString(
 void kext_weak_symbol_referenced(void);
 #endif /* XNU_KERNEL_PRIVATE */
 
-#if !(__x86_64__)
-
-extern const void *gOSKextUnresolved;
-
-#define OSKextSymbolIsResolved(weak_sym)        \
-    (&(weak_sym) != gOSKextUnresolved)
-
-#endif /* !(__x86_64__) */
-
 #if PRAGMA_MARK
 #pragma mark -
 /********************************************************************/
@@ -681,6 +681,17 @@ vm_map_t kext_get_vm_map(kmod_info_t * info);
 
 #ifdef XNU_KERNEL_PRIVATE
 
+#if CONFIG_DTRACE
+/*!
+ * @function OSKextRegisterKextsWithDTrace
+ * @abstract
+ * DTrace calls this once when it has started up so that the kext system
+ * will register any already-loaded kexts with it.
+ */
+void OSKextRegisterKextsWithDTrace(void);
+
+#endif /* CONFIG_DTRACE */
+
 /*!
  * @function kext_dump_panic_lists
  * @abstract Prints compacted lists of last unloaded & all loaded kexts
@@ -787,6 +798,89 @@ OSReturn OSKextUnloadKextWithLoadTag(uint32_t loadTag);
 
 #endif /* KERNEL */
 
+#if PRAGMA_MARK
+#pragma mark -
+/********************************************************************/
+#pragma mark Loaded Kext Summary
+/********************************************************************/
+#endif
+
+/*!
+ * @define kOSKextLoadedKextSummaryVersion
+ * @abstract The current version of the loaded kext summary headers.
+ */
+#define kOSKextLoadedKextSummaryVersion 2
+
+/*!
+ * @typedef OSKextLoadedKextSummary
+ * @abstract A structure that describes a loaded kext.
+ *
+ * @field name The kext's bundle identifier.
+ * @field uuid The kext's UUID;
+ * @field address The kext's load address.
+ * @field size The kext's load size.
+ * @field version The binary format (OSKextVersion) version of the kext.
+ * @field loadTag The kext's load tag.
+ * @field flags Internal tracking flags.
+ * @field reference_list who this refs (links on).
+ * 
+ * @discussion
+ * The OSKextLoadedKextSummary structure contains a basic set of information
+ * about the kext to facilitate kext debugging and panic debug log output.
+ */
+typedef struct _loaded_kext_summary {
+    char        name[KMOD_MAX_NAME];
+    uuid_t      uuid;
+    uint64_t    address;
+    uint64_t    size;
+    uint64_t    version;
+    uint32_t    loadTag;
+    uint32_t    flags;
+    uint64_t    reference_list;
+} OSKextLoadedKextSummary;
+
+/*!
+ * @typedef OSKextLoadedKextSummaryHeader
+ * @abstract A structure that describes the set of loaded kext summaries.
+ *
+ * @field version The version of the loaded kext summary structures.
+ * @field entry_size The size of each entry in summaries.
+ * @field numSummaries The number of OSKextLoadedKextSummary structures
+ *        following the header.
+ * @field summaries A convenience pointer to the array of summaries following
+ *        the header.
+ *
+ * @discussion
+ * The OSKextLoadedKextSummaryHeader describes the set of loaded kext summaries
+ * available for use by the debugger or panic log routine.
+ * The array of summaries contains one OSKextLoadedKextSummary for every kext
+ * that declares an executable and is not an interface to the kernel.
+ */
+typedef struct _loaded_kext_summary_header {
+    uint32_t version;
+    uint32_t entry_size;
+    uint32_t numSummaries;
+    uint32_t reserved; /* explicit alignment for gdb  */
+    OSKextLoadedKextSummary summaries[0];
+} OSKextLoadedKextSummaryHeader;
+
+/*!
+ * @var gLoadedKextSummaries
+ * @abstract The global pointer to the current set of loaded kext summaries.
+ */
+extern OSKextLoadedKextSummaryHeader * gLoadedKextSummaries;
+
+/*!
+ * @function OSKextLoadedKextSummariesUpdated
+ * @abstract Called when gLoadedKextSummaries has been updated.
+ *
+ * @discussion
+ * gLoadedKextSummaries is updated when a kext is loaded or unloaded.
+ * When the update is complete, OSKextLoadedKextSummariesUpdated is called.
+ * gdb can set a breakpoint on this function to detect kext loads and unloads.
+ */
+void OSKextLoadedKextSummariesUpdated(void);
+
 __END_DECLS
 
 #endif /* ! _LIBKERN_OSKEXTLIBPRIVATE_H */
diff --git a/iokit/Kernel/WKdm.h b/libkern/libkern/WKdm.h
similarity index 97%
rename from iokit/Kernel/WKdm.h
rename to libkern/libkern/WKdm.h
index fc73454ae..f88b9971b 100644
--- a/iokit/Kernel/WKdm.h
+++ b/libkern/libkern/WKdm.h
@@ -68,11 +68,11 @@ typedef unsigned int WK_word;
 
 /* the next few are used during compression to write the header */
 #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr)  \
-        (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf)
+        (compr_dest_buf[1] = (unsigned int)(qpos_start_addr - compr_dest_buf))
 #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \
-        (compr_dest_buf[2] = lb_start_addr - compr_dest_buf)
+        (compr_dest_buf[2] = (unsigned int)(lb_start_addr - compr_dest_buf))
 #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \
-        (compr_dest_buf[3] = lb_end_addr - compr_dest_buf)
+        (compr_dest_buf[3] = (unsigned int)(lb_end_addr - compr_dest_buf))
 
 /* the next few are only use during decompression to read the header */
 #define TAGS_AREA_START(decomp_src_buf)       \
diff --git a/libkern/libkern/_OSByteOrder.h b/libkern/libkern/_OSByteOrder.h
index f01425b02..3ceec32eb 100644
--- a/libkern/libkern/_OSByteOrder.h
+++ b/libkern/libkern/_OSByteOrder.h
@@ -69,7 +69,7 @@
 
 
 #define __DARWIN_OSSwapInt16(x) \
-    (__builtin_constant_p(x) ? __DARWIN_OSSwapConstInt16(x) : _OSSwapInt16(x))
+    ((__uint16_t)(__builtin_constant_p(x) ? __DARWIN_OSSwapConstInt16(x) : _OSSwapInt16(x)))
 
 #define __DARWIN_OSSwapInt32(x) \
     (__builtin_constant_p(x) ? __DARWIN_OSSwapConstInt32(x) : _OSSwapInt32(x))
diff --git a/libkern/libkern/c++/Makefile b/libkern/libkern/c++/Makefile
index 4d2eb7d29..8045763a1 100644
--- a/libkern/libkern/c++/Makefile
+++ b/libkern/libkern/c++/Makefile
@@ -8,8 +8,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = 
 
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 = 
 
 INSTINC_SUBDIRS_X86_64 = 
@@ -18,8 +16,6 @@ INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h
index 312d53993..d3f0fa232 100644
--- a/libkern/libkern/c++/OSKext.h
+++ b/libkern/libkern/c++/OSKext.h
@@ -32,6 +32,7 @@
 extern "C" {
 #include <kern/thread_call.h>
 #include <libkern/OSKextLibPrivate.h>
+#include <libkern/kernel_mach_header.h>
 #include <libkern/kxld.h>
 #include <mach/kmod.h>
 
@@ -96,11 +97,11 @@ kern_return_t is_io_catalog_send_data(
 
 void kmod_dump_log(vm_offset_t*, unsigned int);
 
-#if __ppc__ || __i386__
+#if __i386__
 kern_return_t kext_get_kmod_info(
     kmod_info_array_t      * kmod_list,
     mach_msg_type_number_t * kmodCount);
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 #endif /* XNU_KERNEL_PRIVATE */
 };
@@ -123,7 +124,6 @@ class OSKext : public OSObject
 /**************************************/
 #endif
     friend class IOCatalogue;
-    friend class IOPMrootDomain;
     friend class KLDBootstrap;
     friend class OSMetaClass;
 
@@ -183,11 +183,11 @@ class OSKext : public OSObject
     friend void kmod_dump_log(vm_offset_t*, unsigned int);
     friend void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...));
 
-#if __ppc__ || __i386__
+#if __i386__
     friend kern_return_t kext_get_kmod_info(
         kmod_info_array_t      * kmod_list,
         mach_msg_type_number_t * kmodCount);
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 #endif /* XNU_KERNEL_PRIVATE */
 
@@ -200,6 +200,7 @@ private:
 
     const OSSymbol * bundleID;
     OSString       * path;               // not necessarily correct :-/
+    OSString       * executableRelPath;  // relative to bundle
 
     OSKextVersion    version;            // parsed
     OSKextVersion    compatibleVersion;  // parsed
@@ -213,14 +214,13 @@ private:
 
     OSArray        * dependencies;       // kernel resource does not have any;
                                          // links directly to kernel
-    OSData         * linkState;          // only kept for libraries
 
    /* Only real kexts have these; interface kexts do not.
     */
     OSData         * linkedExecutable;
     OSSet          * metaClasses;           // for C++/OSMetaClass kexts
     
-   /* Only interface kexts have these; interface kexts can get at them
+   /* Only interface kexts have these; non-interface kexts can get at them
     * in the linked Executable.
     */
     OSData         * interfaceUUID;
@@ -229,11 +229,13 @@ private:
         unsigned int loggingEnabled:1;
 
         unsigned int hasAllDependencies:1;
+        unsigned int hasBleedthrough:1;
 
         unsigned int interface:1;
         unsigned int kernelComponent:1;
         unsigned int prelinked:1;
         unsigned int loaded:1;
+        unsigned int dtraceInitialized:1;
         unsigned int starting:1;
         unsigned int started:1;
         unsigned int stopping:1;
@@ -250,15 +252,18 @@ private:
 #pragma mark Private Functions
 /**************************************/
 #endif
-private:
 
+#ifdef XNU_KERNEL_PRIVATE
    /* Startup/shutdown phases.
     */
+public:
     static void           initialize(void);
     static OSDictionary * copyKexts(void);
     static OSReturn       removeKextBootstrap(void);
     static void           willShutdown(void);  // called by IOPMrootDomain on shutdown
+#endif /* XNU_KERNEL_PRIVATE */
 
+private:
    /* Called by power management at sleep/shutdown.
     */
     static bool setLoadEnabled(bool flag);
@@ -338,7 +343,6 @@ private:
         const void * mkextFileBase,
         const void * entry);
 
-
    /* Dependencies.
     */
     virtual bool resolveDependencies(
@@ -377,20 +381,33 @@ private:
         OSKextExcludeLevel   startMatchingOpt = kOSKextExcludeAll,
         OSArray            * personalityNames = NULL); // priv/prot
     virtual OSReturn unload(void);
+    virtual OSReturn queueKextNotification(
+        const char * notificationName,
+        OSString   * kextIdentifier);
 
     static void recordIdentifierRequest(
         OSString * kextIdentifier);
 
     virtual OSReturn loadExecutable(void);
+    virtual void     jettisonLinkeditSegment(void);
+    virtual OSReturn removeLinkeditHeaders(kernel_segment_command_t *linkedit);
     static  void     considerDestroyingLinkContext(void);
-    static  OSData  * getKernelLinkState(void);
     virtual OSData * getExecutable(void);
     virtual void     setLinkedExecutable(OSData * anExecutable);
+    
+#if CONFIG_DTRACE
+    friend  void OSKextRegisterKextsWithDTrace(void);
+    static  void registerKextsWithDTrace(void);
+    virtual void registerWithDTrace(void);
+    virtual void unregisterWithDTrace(void);
+#endif /* CONFIG_DTRACE */
 
     virtual OSReturn start(bool startDependenciesFlag = true);
     virtual OSReturn stop(void);
     virtual OSReturn setVMProtections(void);
+    virtual boolean_t segmentShouldBeWired(kernel_segment_command_t *seg);
     virtual OSReturn validateKextMapping(bool startFlag);
+    virtual boolean_t verifySegmentMapping(kernel_segment_command_t *seg);
 
     static OSArray * copyAllKextPersonalities(
         bool filterSafeBootFlag = false);
@@ -409,10 +426,18 @@ private:
 
     static OSReturn autounloadKext(OSKext * aKext);
 
+   /* Sync with user space.
+    */
+    static OSReturn pingKextd(void);
+
    /* Getting info about loaded kexts (kextstat).
     */
-    static  OSArray      * copyLoadedKextInfo(OSArray * kextIdentifiers);
-    virtual OSDictionary * copyInfo(void);
+    static  OSDictionary * copyLoadedKextInfo(
+        OSArray * kextIdentifiers = NULL,
+        OSArray * keys = NULL);
+    virtual OSDictionary * copyInfo(OSArray * keys = NULL);
+
+    static  OSData       * copySanitizedKernelImage(void);
 
    /* Logging to user space.
     */
@@ -437,6 +462,8 @@ private:
     virtual void reportOSMetaClassInstances(
         OSKextLogSpec msgLogSpec);
 
+   /* Resource requests and other callback stuff.
+    */
     static OSReturn dispatchResource(OSDictionary * requestDict);
 
     static OSReturn dequeueCallbackForRequestTag(
@@ -460,6 +487,14 @@ private:
         unsigned int    cnt,
         int          (* printf_func)(const char *fmt, ...),
         bool            lockFlag);
+    static boolean_t summaryIsInBacktrace(
+        OSKextLoadedKextSummary * summary,
+        vm_offset_t             * addr,
+        unsigned int              cnt);
+    static void printSummary(
+        OSKextLoadedKextSummary * summary,
+        int                    (* printf_func)(const char *fmt, ...));
+
     static uint32_t saveLoadedKextPanicListTyped(
         const char * prefix,
         int          invertFlag,
@@ -468,21 +503,25 @@ private:
         uint32_t     list_size,
         uint32_t   * list_length_ptr);
     static void saveLoadedKextPanicList(void);
-    static void saveUnloadedKextPanicList(OSKext * aKext);
+    void savePanicString(bool isLoading);
     static void printKextPanicLists(int (*printf_func)(const char *fmt, ...));
 
+   /* Kext summary support.
+    */
+    static void updateLoadedKextSummaries(void);
+    void updateLoadedKextSummary(OSKextLoadedKextSummary *summary);
+
     /* C++ Initialization.
      */
-
     virtual void               setCPPInitialized(bool initialized=true);
 
-#if __ppc__ || __i386__
+#if __i386__
    /* Backward compatibility for kmod_get_info() MIG call.
     */
     static kern_return_t getKmodInfo(
         kmod_info_array_t      * kmodList,
         mach_msg_type_number_t * kmodCount);
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 
 #if PRAGMA_MARK
@@ -530,29 +569,41 @@ public:
         OSKextRequestTag    requestTag,
         void             ** contextOut); 
 
-    static void considerUnloads(Boolean rescheduleOnlyFlag = false);
-    static void flushNonloadedKexts(Boolean flushPrelinkedKexts);
-    static void setKextdActive(Boolean active = true);
-    static void setDeferredLoadSucceeded(Boolean succeeded = true);
-    static void considerRebuildOfPrelinkedKernel(void);
+    static void     considerUnloads(Boolean rescheduleOnlyFlag = false);
+    static void     flushNonloadedKexts(Boolean flushPrelinkedKexts);
+    static void     setKextdActive(Boolean active = true);
+    static void     setDeferredLoadSucceeded(Boolean succeeded = true);
+    static void     considerRebuildOfPrelinkedKernel(OSString * moduleName);
 
-    virtual bool setAutounloadEnabled(bool flag);
+    virtual bool    setAutounloadEnabled(bool flag);
 
     virtual const OSSymbol   * getIdentifier(void);
     virtual const char       * getIdentifierCString(void);
     virtual OSKextVersion      getVersion(void);
     virtual OSKextVersion      getCompatibleVersion(void);
+    virtual bool               isLibrary(void);
     virtual bool               isCompatibleWithVersion(OSKextVersion aVersion);
     virtual OSObject         * getPropertyForHostArch(const char * key);
         
     virtual OSKextLoadTag      getLoadTag(void);
+    virtual void               getSizeInfo(uint32_t *loadSize, uint32_t *wiredSize);
     virtual OSData           * copyUUID(void);
     virtual OSArray          * copyPersonalitiesArray(void);
+    
+   /* This removes personalities naming the kext (by CFBundleIdentifier),
+    * not all personalities defined by the kext (IOPersonalityPublisher or CFBundleIdentifier).
+    */
     virtual void               removePersonalitiesFromCatalog(void);
 
+   /* Converts common string-valued properties to OSSymbols for lower memory consumption.
+    */
+    static void uniquePersonalityProperties(OSDictionary * personalityDict);
+
     virtual bool               declaresExecutable(void);     // might be missing
     virtual bool               isInterface(void);
+    virtual bool               isKernel(void);
     virtual bool               isKernelComponent(void);
+    virtual bool               isExecutable(void);
     virtual bool               isLoadableInSafeBoot(void);
     virtual bool               isPrelinked(void);
     virtual bool               isLoaded(void);
diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h
index 85f9553e0..662021550 100644
--- a/libkern/libkern/c++/OSMetaClass.h
+++ b/libkern/libkern/c++/OSMetaClass.h
@@ -51,12 +51,8 @@ class OSSerialize;
  */
  
  
-#if !defined(__ppc__) || __GNUC__ < 3
 /*! @parseOnly */
 #define APPLE_KEXT_COMPATIBILITY
-#else
-#define APPLE_KEXT_COMPATIBILITY __attribute__ ((apple_kext_compatibility))
-#endif
 
 /*! @parseOnly */
 #define APPLE_KEXT_VTABLE_PADDING   1
@@ -846,6 +842,9 @@ private:
 class OSMetaClass : private OSMetaClassBase
 {
     friend class OSKext;
+#if IOKITSTATS
+	friend class IOStatistics;
+#endif
 
 private:
     // Can never be allocated must be created at compile time
@@ -862,7 +861,7 @@ private:
    /* className OSSymbol of the class' name. */
     const OSSymbol *className;
 
-   /* classSize How big is a single instancde of this class. */
+   /* classSize How big is a single instance of this class. */
     unsigned int classSize;
 
    /* instanceCount Roughly number of instances of the object,
diff --git a/libkern/libkern/c++/OSObject.h b/libkern/libkern/c++/OSObject.h
index cfd75269c..b33ed3c47 100644
--- a/libkern/libkern/c++/OSObject.h
+++ b/libkern/libkern/c++/OSObject.h
@@ -164,6 +164,9 @@ class OSString;
 class OSObject : public OSMetaClassBase
 {
     OSDeclareAbstractStructors(OSObject)
+#if IOKITSTATS
+	friend class IOStatistics;
+#endif
 
 private:
    /* Not to be included in headerdoc.
@@ -435,24 +438,6 @@ public:
     OSMetaClassDeclareReservedUnused(OSObject, 14);
     OSMetaClassDeclareReservedUnused(OSObject, 15);
 
-#ifdef __ppc__
-    OSMetaClassDeclareReservedUnused(OSObject, 16);
-    OSMetaClassDeclareReservedUnused(OSObject, 17);
-    OSMetaClassDeclareReservedUnused(OSObject, 18);
-    OSMetaClassDeclareReservedUnused(OSObject, 19);
-    OSMetaClassDeclareReservedUnused(OSObject, 20);
-    OSMetaClassDeclareReservedUnused(OSObject, 21);
-    OSMetaClassDeclareReservedUnused(OSObject, 22);
-    OSMetaClassDeclareReservedUnused(OSObject, 23);
-    OSMetaClassDeclareReservedUnused(OSObject, 24);
-    OSMetaClassDeclareReservedUnused(OSObject, 25);
-    OSMetaClassDeclareReservedUnused(OSObject, 26);
-    OSMetaClassDeclareReservedUnused(OSObject, 27);
-    OSMetaClassDeclareReservedUnused(OSObject, 28);
-    OSMetaClassDeclareReservedUnused(OSObject, 29);
-    OSMetaClassDeclareReservedUnused(OSObject, 30);
-    OSMetaClassDeclareReservedUnused(OSObject, 31);
-#endif
 };
 
 #endif /* !_LIBKERN_OSOBJECT_H */
diff --git a/libkern/libkern/c++/OSOrderedSet.h b/libkern/libkern/c++/OSOrderedSet.h
index 64609d863..8819f9332 100644
--- a/libkern/libkern/c++/OSOrderedSet.h
+++ b/libkern/libkern/c++/OSOrderedSet.h
@@ -111,8 +111,8 @@ public:
     * @result
     * A comparison result of the object:
     * <ul>
-    *   <li>a positive value if obj2 should precede obj1,</li>
-    *   <li>a negative value if obj1 should precede obj2,</li>
+    *   <li>a negative value if obj2 should precede obj1,</li>
+    *   <li>a positive value if obj1 should precede obj2,</li>
     *   <li>and 0 if obj1 and obj2 have an equivalent ordering.</li>
     * </ul>
     */
diff --git a/libkern/libkern/c++/OSSet.h b/libkern/libkern/c++/OSSet.h
index 65fd45d6e..0e82f7a87 100644
--- a/libkern/libkern/c++/OSSet.h
+++ b/libkern/libkern/c++/OSSet.h
@@ -537,23 +537,21 @@ public:
     * @param array  The OSArray object containing the objects to be added.
     *
     * @result
-    * <code>true</code> if any object from <code>array</code>
-    * was successfully added the receiver,
+    * <code>true</code> if all objects from <code>array</code>
+    * are successfully added the receiver (or were already present),
     * <code>false</code> otherwise.
     *
     * @discussion
     * This functions adds to the receiving set
     * all objects from <code>array</code>
-    * that are not already in the set.
-    * Objects successfully added to the receiver are retained.
+    * that are not already in the receiving set.
+    * Objects added to the receiver are retained.
     *
-    * A <code>false</code> return value can mean either
-    * that all the objects in <code>array</code> are already present in the set,
-    * or that a memory allocation failure occurred.
-    * If you need to know whether the objects
-    * are already present, use
-    * <code>@link containsObject containsObject@/link</code>
-    * for each object.
+    * In  releases prior to 10.7, this function would return <code>false</code>
+    * if an object from <code>array</code> was already present in the set,
+    * or if <code>array</code> was empty.
+    * This is no longer the case, so this function correctly returns <code>true</code>
+    * when the semantic of merging is met.
     */
     virtual bool merge(const OSArray * array);
 
@@ -568,22 +566,20 @@ public:
     *
     * @result
     * <code>true</code> if any object from <code>set</code>
-    * was successfully added the receiver,
+    * are successfully added the receiver (or were already present),
     * <code>false</code> otherwise.
     *
     * @discussion
     * This functions adds to the receiving set
     * all objects from <code>set</code>
     * that are not already in the receiving set.
-    * Objects successfully added to the receiver are retained.
+    * Objects  added to the receiver are retained.
     *
-    * A <code>false</code> return value can mean either
-    * that all the objects in <code>array</code> are already present in the set,
-    * or that a memory allocation failure occurred.
-    * If you need to know whether the objects
-    * are already present, use
-    * <code>@link containsObject containsObject@/link</code>
-    * for each object.
+    * In  releases prior to 10.7, this function would return <code>false</code>
+    * if an object from <code>set</code> was already present in the set,
+    * or if <code>set</code> was empty.
+    * This is no longer the case, so this function correctly returns <code>true</code>
+    * when the semantic of merging is met.
     */
     virtual bool merge(const OSSet * set);
 
diff --git a/libkern/libkern/crypto/Makefile b/libkern/libkern/crypto/Makefile
index 5c8103efa..38aaa055e 100644
--- a/libkern/libkern/crypto/Makefile
+++ b/libkern/libkern/crypto/Makefile
@@ -8,14 +8,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC =
-
 INSTINC_SUBDIRS_I386 =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 DATAFILES = md5.h sha1.h
diff --git a/libkern/libkern/crypto/sha1.h b/libkern/libkern/crypto/sha1.h
index 9acc46b91..ad6b798f9 100644
--- a/libkern/libkern/crypto/sha1.h
+++ b/libkern/libkern/crypto/sha1.h
@@ -60,7 +60,9 @@ typedef struct sha1_ctxt {
 
 extern void SHA1Init(SHA1_CTX *);
 extern void SHA1Update(SHA1_CTX *, const void *, size_t);
+#ifdef XNU_KERNEL_PRIVATE
 extern void SHA1UpdateUsePhysicalAddress(SHA1_CTX *, const void *, size_t);
+#endif
 extern void SHA1Final(void *, SHA1_CTX *);
 
 #ifdef  __cplusplus
diff --git a/libkern/libkern/kernel_mach_header.h b/libkern/libkern/kernel_mach_header.h
index 03e95969f..6588b9b09 100644
--- a/libkern/libkern/kernel_mach_header.h
+++ b/libkern/libkern/kernel_mach_header.h
@@ -57,8 +57,6 @@ typedef struct segment_command_64 kernel_segment_command_t;
 typedef struct section_64		kernel_section_t;
 
 #define LC_SEGMENT_KERNEL		LC_SEGMENT_64
-#define SECT_CONSTRUCTOR		"__mod_init_func"
-#define SECT_DESTRUCTOR			"__mod_term_func"
 
 #else
 
@@ -72,6 +70,9 @@ typedef struct section			kernel_section_t;
 
 #endif
 
+#define SECT_MODINITFUNC		"__mod_init_func"
+#define SECT_MODTERMFUNC		"__mod_term_func"
+
 extern kernel_mach_header_t _mh_execute_header;
 
 vm_offset_t getlastaddr(void);
@@ -94,6 +95,7 @@ kernel_section_t *getsectbynamefromheader(
 void *getsectdatafromheader(kernel_mach_header_t *, const char *, const char *, unsigned long *);
 kernel_section_t *firstsect(kernel_segment_command_t *sgp);
 kernel_section_t *nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp);
+void *getuuidfromheader(kernel_mach_header_t *, unsigned long *);
 
 #if MACH_KDB
 boolean_t getsymtab(kernel_mach_header_t *header,
diff --git a/libkern/libkern/kext_request_keys.h b/libkern/libkern/kext_request_keys.h
index aa5c1da4a..6b908f133 100644
--- a/libkern/libkern/kext_request_keys.h
+++ b/libkern/libkern/kext_request_keys.h
@@ -88,19 +88,15 @@ extern "C" {
  */
 #define kKextRequestPredicateGetLoaded             "Get Loaded Kext Info"
 
-/* Predicate: Get Kernel Link State
+/* Predicate: Get Kernel Image
  * Argument:  None
- * Response:  Raw bytes + length containing the link state of the kernel.
+ * Response:  Raw bytes + length containing the sanitized image of the kernel.
  * Op result: OSReturn indicating any errors in processing (see OSKextLib.h)
  *
- * Retrieves the link state of the running kernel for use in generating
+ * Retrieves a sanitized image of the running kernel for use in generating
  * debug symbols in user space.
- *
- * xxx - Should this allow retrieval of any kext's link state (maybe for
- * xxx - debugging)? Could rename "Get Kext Link State" and take a bundle ID
- * xxx - or none for kernel, just like with "Get Kext UUID".
  */
-#define kKextRequestPredicateGetKernelLinkState    "Get Kernel Link State"
+#define kKextRequestPredicateGetKernelImage    "Get Kernel Image"
 
 /* Predicate: Get Kernel Load Address
  * Argument:  None
@@ -223,6 +219,26 @@ extern "C" {
  */
 #define kKextRequestPredicateRequestLoad           "Kext Load Request"
 
+/* Predicate: Kext Load Notification
+ * Argument:  kext identifier
+ * Response:  None
+ * Op result: OSReturn indicating result (see OSKextLib.h)
+ *
+ * Informs kextd that the kernel has successfully loaded and started
+ * a kext.
+ */
+#define kKextRequestPredicateLoadNotification      "Kext Load Notification"
+
+/* Predicate: Kext Unload Notification
+ * Argument:  kext identifier
+ * Response:  None
+ * Op result: OSReturn indicating result (see OSKextLib.h)
+ *
+ * Informs kextd that the kernel has successfully stopped and unloaded
+ * a kext.
+ */
+#define kKextRequestPredicateUnloadNotification    "Kext Unload Notification"
+
 /* Predicate: Prelinked Kernel Request
  * Argument:  None
  * Response:  None
@@ -281,6 +297,14 @@ extern "C" {
  */
 #define kKextRequestArgumentBundleIdentifierKey    "CFBundleIdentifier"
 
+/* Argument:  OSReturn
+ * Type:      Dictionary
+ * Used by:   OSKext::copyInfo()
+ *
+ * Used to specify a subset of all possible info to be returned.
+ */
+#define kKextRequestArgumentInfoKeysKey          "Kext Request Info Keys"
+
 /* Argument:  OSReturn
  * Type:      Number (OSReturn)
  * Used by:   several
@@ -358,7 +382,7 @@ extern "C" {
  * either the primary kext, or the whole load list (any that weren't
  * already loaded & started).
  */
-#define kKextKextRequestArgumentStartExcludeKey        "Start Exclude Level"
+#define kKextRequestArgumentStartExcludeKey        "Start Exclude Level"
 
 /* Argument:  Start Matching Exclude Level
  * Type:      Integer, corresponding to OSKextExcludeLevel
diff --git a/libkern/libkern/kxld.h b/libkern/libkern/kxld.h
index 6fa11e422..4fa1e9021 100644
--- a/libkern/libkern/kxld.h
+++ b/libkern/libkern/kxld.h
@@ -78,45 +78,28 @@ void kxld_destroy_context(
 *   size                The size of the kext in memory.  Must be nonzero.
 *   name                The name, usually the bundle identifier, of the kext
 *   callback_data       Data that is to be passed to the callback functions.
-*   deps                An array of pointers to the link state of kexts upon 
-*                       which this kext is dependent.
-*   ndeps               Number of entries in the 'deps' array.
-*   linked_object       If this is not null, it will be set to the address of 
-*                       the linked kext object.  If the address provided by the 
-*                       kxld_alloc_callback is considered writable, this pointer 
-*                       will be set to that address.  Otherwise, the linked
-*                       object will be written to a temporary buffer that should
-*                       be freed by the caller.
+*   dependencies        An array of pointers to the kexts upon which this kext 
+*                       is dependent.
+*   num_dependencies    Number of entries in the 'dependencies' array.
+*   linked_object       This will be set to the address of the linked kext 
+*                       object. If the address provided by the 
+*                       kxld_alloc_callback is considered writable, this 
+*                       pointer will be set to that address.  Otherwise, the 
+*                       linked object will be written to a temporary buffer 
+*                       that should be freed by the caller.
 *   kmod_info_kern      Kernel address of the kmod_info_t structure.
-*   link_state          If this is not null, it will be set to the address of a
-*                       block of memory that contains state generated by the
-*                       linking process for use by links of dependent kexts.  
-*                       The link state object is serialized and can be written
-*                       directly to disk.  This memory should be freed by the 
-*                       caller when no longer needed.
-*   link_state_size     The size of the returned link state buffer.
-*   symbol_file         If this is not null, it will be set to the address of a
-*                       buffer containing a Mach-O symbol file that may be
-*                       written to disk.  This should be freed by the caller
-*                       when no longer needed.
-*                       Note: symbol files are never generated in the kernel
-*   symbol_file_size    The size of the returned symbol file buffer.
-*******************************************************************************/
+******************************************************************************/
 kern_return_t kxld_link_file(
     KXLDContext *context,
     u_char *file,
     u_long size,
     const char *name,
     void *callback_data,
-    u_char **deps,
-    u_int ndeps,
+    KXLDDependency *dependencies,
+    u_int num_dependencies,
     u_char **linked_object,
-    kxld_addr_t *kmod_info_kern,
-    u_char **link_state,
-    u_long *link_state_size,
-    u_char **symbol_file,
-    u_long *symbol_file_size)
-    __attribute__((nonnull(1, 2), visibility("default")));
+    kxld_addr_t *kmod_info_kern)
+    __attribute__((nonnull(1,2,4,6,8,9), visibility("default")));
 
 /*******************************************************************************
 *******************************************************************************/
diff --git a/libkern/libkern/kxld_types.h b/libkern/libkern/kxld_types.h
index cd7153c8b..1578b5859 100644
--- a/libkern/libkern/kxld_types.h
+++ b/libkern/libkern/kxld_types.h
@@ -30,6 +30,7 @@
 
 #include <stdarg.h>
 #include <stdint.h>
+#include <mach/boolean.h>       // boolean_t
 #include <mach/kern_return.h>
 
 /*******************************************************************************
@@ -82,7 +83,7 @@
 #endif
 
 /* For linking code specific to architectures that use MH_KEXT_BUNDLE */
-#if (!KERNEL || __x86_64__)
+#if (!KERNEL || __i386__ || __x86_64__ || __arm__)
     #define KXLD_USER_OR_BUNDLE 1
 #endif
 
@@ -115,14 +116,14 @@ typedef uint64_t kxld_size_t;
 
 /* Flags for general linker behavior */
 enum kxld_flags {
-    kKxldFlagDefault = 0x0
+    kKxldFlagDefault = 0x0,
 };
 typedef enum kxld_flags KXLDFlags;
 
 /* Flags for the allocation callback */
 enum kxld_allocate_flags {
     kKxldAllocateDefault = 0x0,
-    kKxldAllocateWritable = 0x1  /* kxld may write into the allocated memory */
+    kKxldAllocateWritable = 0x1,        /* kxld may write into the allocated memory */
 };
 typedef enum kxld_allocate_flags KXLDAllocateFlags;
 
@@ -149,6 +150,25 @@ typedef enum kxld_log_level {
     kKxldLogDebug = 0x5
 } KXLDLogLevel;
 
+/* This structure is used to describe a dependency kext. The kext field
+ * is a pointer to the binary executable of the dependency. The interface
+ * field is a pointer to an optional interface kext that restricts the
+ * symbols that may be accessed in the dependency kext.
+ * 
+ * For example, to use this structure with the KPIs, set the kext field
+ * to point to the kernel's Mach-O binary, and set interface to point
+ * to the KPI's Mach-O binary.
+ */
+typedef struct kxld_dependency {
+    u_char      * kext;
+    u_long        kext_size;
+    char        * kext_name;
+    u_char      * interface;
+    u_long        interface_size;
+    char        * interface_name;
+    boolean_t     is_direct_dependency;
+} KXLDDependency;
+
 typedef void (*KXLDLoggingCallback) (KXLDLogSubsystem sys, KXLDLogLevel level, 
     const char *format, va_list ap, void *user_data);
 
diff --git a/libkern/libkern/machine/Makefile b/libkern/libkern/machine/Makefile
index 0a072f9f9..e4d4ce152 100644
--- a/libkern/libkern/machine/Makefile
+++ b/libkern/libkern/machine/Makefile
@@ -8,8 +8,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC =
-
 INSTINC_SUBDIRS_I386 =
 
 INSTINC_SUBDIRS_X86_64 =
@@ -18,8 +16,6 @@ INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
 
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
diff --git a/libkern/libkern/mkext.h b/libkern/libkern/mkext.h
index 6b43a6b82..0024e84a0 100644
--- a/libkern/libkern/mkext.h
+++ b/libkern/libkern/mkext.h
@@ -149,6 +149,7 @@ typedef struct mkext_basic_header {
 #define kMKEXTInfoDictionariesKey             "_MKEXTInfoDictionaries"
 
 #define kMKEXTBundlePathKey                   "_MKEXTBundlePath"
+#define kMKEXTExecutableRelativePathKey       "_MKEXTExecutableRelativePath"
 #define kMKEXTExecutableKey                   "_MKEXTExecutable"
 
 #define kMKEXTLoadRequestKey                  "_MKEXTLoadRequest"
diff --git a/libkern/libkern/ppc/Makefile b/libkern/libkern/ppc/Makefile
deleted file mode 100644
index e892ce42f..000000000
--- a/libkern/libkern/ppc/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-INSTINC_SUBDIRS = 
-
-INSTINC_SUBDIRS_PPC = 
-
-EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
-DATAFILES = \
-          OSByteOrder.h
-
-INSTALL_MD_LIST	= ${DATAFILES}
-
-INSTALL_MD_DIR = libkern/ppc
-
-EXPORT_MD_LIST	= ${DATAFILES}
-
-EXPORT_MD_DIR = libkern/ppc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/libkern/libkern/ppc/OSByteOrder.h b/libkern/libkern/ppc/OSByteOrder.h
deleted file mode 100644
index c6666859d..000000000
--- a/libkern/libkern/ppc/OSByteOrder.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _OS_OSBYTEORDERPPC_H
-#define _OS_OSBYTEORDERPPC_H
-
-#include <stdint.h>
-
-#if !defined(OS_INLINE)
-# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-#        define OS_INLINE static inline
-# elif defined(__MWERKS__) || defined(__cplusplus)
-#        define OS_INLINE static inline
-# else
-#        define OS_INLINE static __inline__
-# endif
-#endif
-
-/* Functions for byte reversed loads. */
-
-OS_INLINE
-uint16_t
-OSReadSwapInt16(
-    const volatile void * base,
-    uintptr_t             byteOffset
-)
-{
-    uint16_t result;
-    volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset);
-
-#if defined(__llvm__)
-    result = *addr;
-    result = ((result << 8) | (result >> 8));
-#else
-    __asm__ ("lhbrx %0, %2, %1"
-             : "=r" (result)
-             : "r"  (base), "bO" (byteOffset), "m" (*addr));
-#endif
-
-    return result;
-}
-
-OS_INLINE
-uint32_t
-OSReadSwapInt32(
-    const volatile void * base,
-    uintptr_t             byteOffset
-)
-{
-    uint32_t result;
-    volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset);
-
-#if defined(__llvm__)
-    result = __builtin_bswap32(*addr);
-#else
-    __asm__ ("lwbrx %0, %2, %1"
-             : "=r" (result)
-             : "r"  (base), "bO" (byteOffset), "m" (*addr));
-#endif
-
-    return result;
-}
-
-OS_INLINE
-uint64_t
-OSReadSwapInt64(
-    const volatile void * base,
-    uintptr_t             byteOffset
-)
-{
-    volatile uint64_t *addr = (volatile uint64_t *)((uintptr_t)base + byteOffset);
-    union {
-        uint64_t u64;
-        uint32_t u32[2];
-    } u;
-
-#if defined(__llvm__)
-    u.u64 = __builtin_bswap64(*addr);
-#else
-    __asm__ ("lwbrx %0, %3, %2\n\t"
-             "lwbrx %1, %4, %2"
-             : "=&r" (u.u32[1]), "=r" (u.u32[0])
-             : "r"  (base), "bO" (byteOffset), "b" (byteOffset + 4), "m" (*addr));
-#endif
-
-    return u.u64;
-}
-
-/* Functions for byte reversed stores. */
-
-OS_INLINE
-void
-OSWriteSwapInt16(
-    volatile void * base,
-    uintptr_t       byteOffset,
-    uint16_t        data
-)
-{
-    volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset);
-
-#if defined(__llvm__)
-    *addr = ((data >> 8) | (data << 8));
-#else
-    __asm__ ("sthbrx %1, %3, %2"
-             : "=m" (*addr)
-             : "r" (data), "r" (base), "bO" (byteOffset));
-#endif
-}
-
-OS_INLINE
-void
-OSWriteSwapInt32(
-    volatile void * base,
-    uintptr_t       byteOffset,
-    uint32_t        data
-)
-{
-    volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset);
-
-#if defined(__llvm__)
-    *addr = __builtin_bswap32(data);
-#else
-    __asm__ ("stwbrx %1, %3, %2"
-             : "=m" (*addr)
-             : "r" (data), "r" (base), "bO" (byteOffset));
-#endif
-}
-
-OS_INLINE
-void
-OSWriteSwapInt64(
-    volatile void * base,
-    uintptr_t       byteOffset,
-    uint64_t        data
-)
-{
-    volatile uint64_t *addr = (volatile uint64_t *)((uintptr_t)base + byteOffset);
-
-#if defined(__llvm__)
-    *addr = __builtin_bswap64(data);
-#else
-    uint32_t hi = (uint32_t)(data >> 32);
-    uint32_t lo = (uint32_t)(data & 0xffffffff);
-
-    __asm__ ("stwbrx %1, %4, %3\n\t"
-             "stwbrx %2, %5, %3"
-             : "=m" (*addr)
-             : "r" (lo), "r" (hi), "r" (base), "bO" (byteOffset), "b" (byteOffset + 4));
-#endif
-}
-
-/* Generic byte swapping functions. */
-
-OS_INLINE
-uint16_t
-_OSSwapInt16(
-    uint16_t data
-)
-{
-    return OSReadSwapInt16(&data, 0);
-}
-
-OS_INLINE
-uint32_t
-_OSSwapInt32(
-    uint32_t data
-)
-{
-    return OSReadSwapInt32(&data, 0);
-}
-
-OS_INLINE
-uint64_t
-_OSSwapInt64(
-    uint64_t data
-)
-{
-    return OSReadSwapInt64(&data, 0);
-}
-
-#endif /* ! _OS_OSBYTEORDERPPC_H */
diff --git a/libkern/libkern/prelink.h b/libkern/libkern/prelink.h
index e8f37e1f0..59aefd3a2 100644
--- a/libkern/libkern/prelink.h
+++ b/libkern/libkern/prelink.h
@@ -40,6 +40,7 @@
 #define kPrelinkInfoSection                "__info"
 
 #define kPrelinkBundlePathKey              "_PrelinkBundlePath"
+#define kPrelinkExecutableRelativePathKey  "_PrelinkExecutableRelativePath"
 #define kPrelinkExecutableLoadKey          "_PrelinkExecutableLoadAddr"
 #define kPrelinkExecutableSourceKey        "_PrelinkExecutableSourceAddr"
 #define kPrelinkExecutableSizeKey          "_PrelinkExecutableSize"
@@ -48,7 +49,6 @@
 #define kPrelinkKmodInfoKey                "_PrelinkKmodInfo"
 #define kPrelinkLinkStateKey               "_PrelinkLinkState"
 #define kPrelinkLinkStateSizeKey           "_PrelinkLinkStateSize"
-#define kPrelinkPersonalitiesKey           "_PrelinkPersonalities"
 
 #endif /* _PRELINK_H_ */
 
diff --git a/libkern/libkern/tree.h b/libkern/libkern/tree.h
new file mode 100644
index 000000000..3a26162bd
--- /dev/null
+++ b/libkern/libkern/tree.h
@@ -0,0 +1,802 @@
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*	$NetBSD: tree.h,v 1.13 2006/08/27 22:32:38 christos Exp $	*/
+/*	$OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $	*/
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_LIBKERN_TREE_H_
+#define	_LIBKERN_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure.  Every operation
+ * on the tree causes a splay to happen.  The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree.  On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n).  The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute.  It fulfills a set of conditions:
+ *	- every search path from the root to a leaf consists of the
+ *	  same number of black nodes,
+ *	- each red node (except for the root) has a black parent,
+ *	- each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type)						\
+struct name {								\
+	struct type *sph_root; /* root of the tree */			\
+}
+
+#define SPLAY_INITIALIZER(root)						\
+	{ NULL }
+
+#define SPLAY_INIT(root) do {						\
+	(root)->sph_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ENTRY(type)						\
+struct {								\
+	struct type *spe_left; /* left element */			\
+	struct type *spe_right; /* right element */			\
+}
+
+#define SPLAY_LEFT(elm, field)		(elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field)		(elm)->field.spe_right
+#define SPLAY_ROOT(head)		(head)->sph_root
+#define SPLAY_EMPTY(head)		(SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do {			\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field);	\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do {			\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field);	\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	(head)->sph_root = tmp;						\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do {				\
+	SPLAY_LEFT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);		\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do {				\
+	SPLAY_RIGHT(tmp, field) = (head)->sph_root;			\
+	tmp = (head)->sph_root;						\
+	(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);	\
+} while (/*CONSTCOND*/ 0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do {		\
+	SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field);	\
+	SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+	SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field);	\
+	SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field);	\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp)				\
+void name##_SPLAY(struct name *, struct type *);			\
+void name##_SPLAY_MINMAX(struct name *, int);				\
+struct type *name##_SPLAY_INSERT(struct name *, struct type *);		\
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *);		\
+									\
+/* Finds the node with the same key as elm */				\
+static __inline struct type *						\
+name##_SPLAY_FIND(struct name *head, struct type *elm)			\
+{									\
+	if (SPLAY_EMPTY(head))						\
+		return(NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0)				\
+		return (head->sph_root);				\
+	return (NULL);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_NEXT(struct name *head, struct type *elm)			\
+{									\
+	name##_SPLAY(head, elm);					\
+	if (SPLAY_RIGHT(elm, field) != NULL) {				\
+		elm = SPLAY_RIGHT(elm, field);				\
+		while (SPLAY_LEFT(elm, field) != NULL) {		\
+			elm = SPLAY_LEFT(elm, field);			\
+		}							\
+	} else								\
+		elm = NULL;						\
+	return (elm);							\
+}									\
+									\
+static __inline struct type *						\
+name##_SPLAY_MIN_MAX(struct name *head, int val)			\
+{									\
+	name##_SPLAY_MINMAX(head, val);					\
+        return (SPLAY_ROOT(head));					\
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_SPLAY_INSERT(struct name *head, struct type *elm)		\
+{									\
+    if (SPLAY_EMPTY(head)) {						\
+	    SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL;	\
+    } else {								\
+	    int __comp;							\
+	    name##_SPLAY(head, elm);					\
+	    __comp = (cmp)(elm, (head)->sph_root);			\
+	    if(__comp < 0) {						\
+		    SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+		    SPLAY_RIGHT(elm, field) = (head)->sph_root;		\
+		    SPLAY_LEFT((head)->sph_root, field) = NULL;		\
+	    } else if (__comp > 0) {					\
+		    SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+		    SPLAY_LEFT(elm, field) = (head)->sph_root;		\
+		    SPLAY_RIGHT((head)->sph_root, field) = NULL;	\
+	    } else							\
+		    return ((head)->sph_root);				\
+    }									\
+    (head)->sph_root = (elm);						\
+    return (NULL);							\
+}									\
+									\
+struct type *								\
+name##_SPLAY_REMOVE(struct name *head, struct type *elm)		\
+{									\
+	struct type *__tmp;						\
+	if (SPLAY_EMPTY(head))						\
+		return (NULL);						\
+	name##_SPLAY(head, elm);					\
+	if ((cmp)(elm, (head)->sph_root) == 0) {			\
+		if (SPLAY_LEFT((head)->sph_root, field) == NULL) {	\
+			(head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+		} else {						\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			(head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+			name##_SPLAY(head, elm);			\
+			SPLAY_RIGHT((head)->sph_root, field) = __tmp;	\
+		}							\
+		return (elm);						\
+	}								\
+	return (NULL);							\
+}									\
+									\
+void									\
+name##_SPLAY(struct name *head, struct type *elm)			\
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+	int __comp;							\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) {		\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) < 0){			\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if ((cmp)(elm, __tmp) > 0){			\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}									\
+									\
+/* Splay with either the minimum or the maximum element			\
+ * Used to find minimum or maximum element in tree.			\
+ */									\
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{									\
+	struct type __node, *__left, *__right, *__tmp;			\
+\
+	SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+	__left = __right = &__node;					\
+\
+	while (1) {							\
+		if (__comp < 0) {					\
+			__tmp = SPLAY_LEFT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp < 0){				\
+				SPLAY_ROTATE_RIGHT(head, __tmp, field);	\
+				if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKLEFT(head, __right, field);		\
+		} else if (__comp > 0) {				\
+			__tmp = SPLAY_RIGHT((head)->sph_root, field);	\
+			if (__tmp == NULL)				\
+				break;					\
+			if (__comp > 0) {				\
+				SPLAY_ROTATE_LEFT(head, __tmp, field);	\
+				if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+					break;				\
+			}						\
+			SPLAY_LINKRIGHT(head, __left, field);		\
+		}							\
+	}								\
+	SPLAY_ASSEMBLE(head, &__node, __left, __right, field);		\
+}
+
+#define SPLAY_NEGINF	-1
+#define SPLAY_INF	1
+
+#define SPLAY_INSERT(name, x, y)	name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y)	name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y)		name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y)		name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x)		(SPLAY_EMPTY(x) ? NULL	\
+					: name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head)					\
+	for ((x) = SPLAY_MIN(name, head);				\
+	     (x) != NULL;						\
+	     (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-black tree */
+#define RB_HEAD(name, type)						\
+struct name {								\
+	struct type *rbh_root; /* root of the tree */			\
+}
+
+#define RB_INITIALIZER(root)						\
+	{ NULL }
+
+#define RB_INIT(root) do {						\
+	(root)->rbh_root = NULL;					\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_BLACK	0
+#define RB_RED		1
+#define RB_PLACEHOLDER	NULL
+#define RB_ENTRY(type)							\
+struct {								\
+	struct type *rbe_left;		/* left element */		\
+	struct type *rbe_right;		/* right element */		\
+	struct type *rbe_parent;	/* parent element */		\
+}
+
+#define RB_COLOR_MASK			(uintptr_t)0x1
+#define RB_LEFT(elm, field)		(elm)->field.rbe_left
+#define RB_RIGHT(elm, field)		(elm)->field.rbe_right
+#define _RB_PARENT(elm, field)		(elm)->field.rbe_parent
+#define RB_ROOT(head)			(head)->rbh_root
+#define RB_EMPTY(head)			(RB_ROOT(head) == NULL)
+
+#define RB_SET(name, elm, parent, field) do {					\
+	name##_RB_SETPARENT(elm, parent);					\
+	RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;		\
+	name##_RB_SETCOLOR(elm, RB_RED);				\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_SET_BLACKRED(name, black, red, field) do {				\
+	name##_RB_SETCOLOR(black,  RB_BLACK);				\
+	name##_RB_SETCOLOR(red, RB_RED);					\
+} while (/*CONSTCOND*/ 0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x) (void)(x)
+#endif
+
+#define RB_ROTATE_LEFT(name, head, elm, tmp, field) do {			\
+	(tmp) = RB_RIGHT(elm, field);					\
+	if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) {	\
+		name##_RB_SETPARENT(RB_LEFT(tmp, field),(elm));		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if (name##_RB_SETPARENT(tmp, name##_RB_GETPARENT(elm)) != NULL) {	\
+		if ((elm) == RB_LEFT(name##_RB_GETPARENT(elm), field))	\
+			RB_LEFT(name##_RB_GETPARENT(elm), field) = (tmp);	\
+		else							\
+			RB_RIGHT(name##_RB_GETPARENT(elm), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_LEFT(tmp, field) = (elm);					\
+	name##_RB_SETPARENT(elm, (tmp));					\
+	RB_AUGMENT(tmp);						\
+	if ((name##_RB_GETPARENT(tmp)))					\
+		RB_AUGMENT(name##_RB_GETPARENT(tmp));			\
+} while (/*CONSTCOND*/ 0)
+
+#define RB_ROTATE_RIGHT(name, head, elm, tmp, field) do {			\
+	(tmp) = RB_LEFT(elm, field);					\
+	if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) {	\
+		name##_RB_SETPARENT(RB_RIGHT(tmp, field), (elm));		\
+	}								\
+	RB_AUGMENT(elm);						\
+	if (name##_RB_SETPARENT(tmp, name##_RB_GETPARENT(elm)) != NULL) {	\
+		if ((elm) == RB_LEFT(name##_RB_GETPARENT(elm), field))	\
+			RB_LEFT(name##_RB_GETPARENT(elm), field) = (tmp);	\
+		else							\
+			RB_RIGHT(name##_RB_GETPARENT(elm), field) = (tmp);	\
+	} else								\
+		(head)->rbh_root = (tmp);				\
+	RB_RIGHT(tmp, field) = (elm);					\
+	name##_RB_SETPARENT(elm, tmp);					\
+	RB_AUGMENT(tmp);						\
+	if ((name##_RB_GETPARENT(tmp)))					\
+		RB_AUGMENT(name##_RB_GETPARENT(tmp));			\
+} while (/*CONSTCOND*/ 0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp)				\
+void name##_RB_INSERT_COLOR(struct name *, struct type *);	\
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *);		\
+struct type *name##_RB_INSERT(struct name *, struct type *);		\
+struct type *name##_RB_FIND(struct name *, struct type *);		\
+struct type *name##_RB_NEXT(struct type *);				\
+struct type *name##_RB_MINMAX(struct name *, int);			\
+struct type *name##_RB_GETPARENT(struct type*);				\
+struct type *name##_RB_SETPARENT(struct type*, struct type*);		\
+int name##_RB_GETCOLOR(struct type*);					\
+void name##_RB_SETCOLOR(struct type*,int);				
+
+/* Generates prototypes (with storage class) and inline functions */
+#define RB_PROTOTYPE_SC(_sc_, name, type, field, cmp)			\
+_sc_ void name##_RB_INSERT_COLOR(struct name *, struct type *);		\
+_sc_ void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \
+_sc_ struct type *name##_RB_REMOVE(struct name *, struct type *);	\
+_sc_ struct type *name##_RB_INSERT(struct name *, struct type *);	\
+_sc_ struct type *name##_RB_FIND(struct name *, struct type *);		\
+_sc_ struct type *name##_RB_NEXT(struct type *);			\
+_sc_ struct type *name##_RB_MINMAX(struct name *, int);			\
+_sc_ struct type *name##_RB_GETPARENT(struct type*);			\
+_sc_ struct type *name##_RB_SETPARENT(struct type*, struct type*);			\
+_sc_ int name##_RB_GETCOLOR(struct type*);			\
+_sc_ void name##_RB_SETCOLOR(struct type*,int);
+
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp)				\
+struct type *name##_RB_GETPARENT(struct type *elm) {				\
+	struct type *parent = _RB_PARENT(elm, field);			\
+	if( parent != NULL) {						\
+		parent = (struct type*)((uintptr_t)parent & ~RB_COLOR_MASK);\
+		return( (struct type*) ( (parent == (struct type*) RB_PLACEHOLDER) ? NULL: parent));\
+	}								\
+	return((struct type*)NULL);					\
+}									\
+int name##_RB_GETCOLOR(struct type *elm) {					\
+	int color = 0;							\
+	color = (int)((uintptr_t)_RB_PARENT(elm,field) & RB_COLOR_MASK);\
+	return(color);							\
+}									\
+void name##_RB_SETCOLOR(struct type *elm,int color) {				\
+	struct type *parent = name##_RB_GETPARENT(elm);			\
+	if(parent == (struct type*)NULL) 				\
+		parent = (struct type*) RB_PLACEHOLDER;			\
+	_RB_PARENT(elm, field) = (struct type*)((uintptr_t)parent | (unsigned int)color);\
+}									\
+struct type *name##_RB_SETPARENT(struct type *elm, struct type *parent) {	\
+	int color = name##_RB_GETCOLOR(elm);					\
+	_RB_PARENT(elm, field) = parent;				\
+	if(color) name##_RB_SETCOLOR(elm, color);				\
+	return(name##_RB_GETPARENT(elm));					\
+}									\
+									\
+void									\
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm)		\
+{									\
+	struct type *parent, *gparent, *tmp;				\
+	while ((parent = name##_RB_GETPARENT(elm)) != NULL &&		\
+	    name##_RB_GETCOLOR(parent) == RB_RED) {			\
+		gparent = name##_RB_GETPARENT(parent);			\
+		if (parent == RB_LEFT(gparent, field)) {		\
+			tmp = RB_RIGHT(gparent, field);			\
+			if (tmp && name##_RB_GETCOLOR(tmp) == RB_RED) {	\
+				name##_RB_SETCOLOR(tmp,  RB_BLACK);	\
+				RB_SET_BLACKRED(name, parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_RIGHT(parent, field) == elm) {		\
+				RB_ROTATE_LEFT(name, head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(name, parent, gparent, field);	\
+			RB_ROTATE_RIGHT(name,head, gparent, tmp, field);	\
+		} else {						\
+			tmp = RB_LEFT(gparent, field);			\
+			if (tmp && name##_RB_GETCOLOR(tmp) == RB_RED) {	\
+				name##_RB_SETCOLOR(tmp,  RB_BLACK);	\
+				RB_SET_BLACKRED(name, parent, gparent, field);\
+				elm = gparent;				\
+				continue;				\
+			}						\
+			if (RB_LEFT(parent, field) == elm) {		\
+				RB_ROTATE_RIGHT(name, head, parent, tmp, field);\
+				tmp = parent;				\
+				parent = elm;				\
+				elm = tmp;				\
+			}						\
+			RB_SET_BLACKRED(name, parent, gparent, field);	\
+			RB_ROTATE_LEFT(name, head, gparent, tmp, field);	\
+		}							\
+	}								\
+	name##_RB_SETCOLOR(head->rbh_root,  RB_BLACK);			\
+}									\
+									\
+void									\
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{									\
+	struct type *tmp;						\
+	while ((elm == NULL || name##_RB_GETCOLOR(elm) == RB_BLACK) &&	\
+	    elm != RB_ROOT(head)) {					\
+		if (RB_LEFT(parent, field) == elm) {			\
+			tmp = RB_RIGHT(parent, field);			\
+			if (name##_RB_GETCOLOR(tmp) == RB_RED) {		\
+				RB_SET_BLACKRED(name, tmp, parent, field);	\
+				RB_ROTATE_LEFT(name, head, parent, tmp, field);\
+				tmp = RB_RIGHT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    name##_RB_GETCOLOR(RB_LEFT(tmp, field)) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    name##_RB_GETCOLOR(RB_RIGHT(tmp, field)) == RB_BLACK)) {\
+				name##_RB_SETCOLOR(tmp,  RB_RED);		\
+				elm = parent;				\
+				parent = name##_RB_GETPARENT(elm);		\
+			} else {					\
+				if (RB_RIGHT(tmp, field) == NULL ||	\
+				    name##_RB_GETCOLOR(RB_RIGHT(tmp, field)) == RB_BLACK) {\
+					struct type *oleft;		\
+					if ((oleft = RB_LEFT(tmp, field)) \
+					    != NULL)			\
+						name##_RB_SETCOLOR(oleft,  RB_BLACK);\
+					name##_RB_SETCOLOR(tmp, RB_RED);	\
+					RB_ROTATE_RIGHT(name, head, tmp, oleft, field);\
+					tmp = RB_RIGHT(parent, field);	\
+				}					\
+				name##_RB_SETCOLOR(tmp, (name##_RB_GETCOLOR(parent)));\
+				name##_RB_SETCOLOR(parent, RB_BLACK);	\
+				if (RB_RIGHT(tmp, field))		\
+					name##_RB_SETCOLOR(RB_RIGHT(tmp, field),RB_BLACK);\
+				RB_ROTATE_LEFT(name, head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		} else {						\
+			tmp = RB_LEFT(parent, field);			\
+			if (name##_RB_GETCOLOR(tmp) == RB_RED) {		\
+				RB_SET_BLACKRED(name, tmp, parent, field);	\
+				RB_ROTATE_RIGHT(name, head, parent, tmp, field);\
+				tmp = RB_LEFT(parent, field);		\
+			}						\
+			if ((RB_LEFT(tmp, field) == NULL ||		\
+			    name##_RB_GETCOLOR(RB_LEFT(tmp, field)) == RB_BLACK) &&\
+			    (RB_RIGHT(tmp, field) == NULL ||		\
+			    name##_RB_GETCOLOR(RB_RIGHT(tmp, field)) == RB_BLACK)) {\
+				name##_RB_SETCOLOR(tmp, RB_RED);		\
+				elm = parent;				\
+				parent = name##_RB_GETPARENT(elm);		\
+			} else {					\
+				if (RB_LEFT(tmp, field) == NULL ||	\
+				    name##_RB_GETCOLOR(RB_LEFT(tmp, field)) == RB_BLACK) {\
+					struct type *oright;		\
+					if ((oright = RB_RIGHT(tmp, field)) \
+					    != NULL)			\
+						name##_RB_SETCOLOR(oright,  RB_BLACK);\
+					name##_RB_SETCOLOR(tmp,  RB_RED);	\
+					RB_ROTATE_LEFT(name, head, tmp, oright, field);\
+					tmp = RB_LEFT(parent, field);	\
+				}					\
+				name##_RB_SETCOLOR(tmp,(name##_RB_GETCOLOR(parent)));\
+				name##_RB_SETCOLOR(parent, RB_BLACK);	\
+				if (RB_LEFT(tmp, field))		\
+					name##_RB_SETCOLOR(RB_LEFT(tmp, field), RB_BLACK);\
+				RB_ROTATE_RIGHT(name, head, parent, tmp, field);\
+				elm = RB_ROOT(head);			\
+				break;					\
+			}						\
+		}							\
+	}								\
+	if (elm)							\
+		name##_RB_SETCOLOR(elm,  RB_BLACK);			\
+}									\
+									\
+struct type *								\
+name##_RB_REMOVE(struct name *head, struct type *elm)			\
+{									\
+	struct type *child, *parent, *old = elm;			\
+	int color;							\
+	if (RB_LEFT(elm, field) == NULL)				\
+		child = RB_RIGHT(elm, field);				\
+	else if (RB_RIGHT(elm, field) == NULL)				\
+		child = RB_LEFT(elm, field);				\
+	else {								\
+		struct type *left;					\
+		elm = RB_RIGHT(elm, field);				\
+		while ((left = RB_LEFT(elm, field)) != NULL)		\
+			elm = left;					\
+		child = RB_RIGHT(elm, field);				\
+		parent = name##_RB_GETPARENT(elm);				\
+		color = name##_RB_GETCOLOR(elm);				\
+		if (child)						\
+			name##_RB_SETPARENT(child, parent);		\
+		if (parent) {						\
+			if (RB_LEFT(parent, field) == elm)		\
+				RB_LEFT(parent, field) = child;		\
+			else						\
+				RB_RIGHT(parent, field) = child;	\
+			RB_AUGMENT(parent);				\
+		} else							\
+			RB_ROOT(head) = child;				\
+		if (name##_RB_GETPARENT(elm) == old)			\
+			parent = elm;					\
+		(elm)->field = (old)->field;				\
+		if (name##_RB_GETPARENT(old)) {				\
+			if (RB_LEFT(name##_RB_GETPARENT(old), field) == old)\
+				RB_LEFT(name##_RB_GETPARENT(old), field) = elm;\
+			else						\
+				RB_RIGHT(name##_RB_GETPARENT(old), field) = elm;\
+			RB_AUGMENT(name##_RB_GETPARENT(old));		\
+		} else							\
+			RB_ROOT(head) = elm;				\
+		name##_RB_SETPARENT(RB_LEFT(old, field), elm);		\
+		if (RB_RIGHT(old, field))				\
+			name##_RB_SETPARENT(RB_RIGHT(old, field), elm);	\
+		if (parent) {						\
+			left = parent;					\
+			do {						\
+				RB_AUGMENT(left);			\
+			} while ((left = name##_RB_GETPARENT(left)) != NULL); \
+		}							\
+		goto color;						\
+	}								\
+	parent = name##_RB_GETPARENT(elm);					\
+	color = name##_RB_GETCOLOR(elm);					\
+	if (child)							\
+		name##_RB_SETPARENT(child, parent);			\
+	if (parent) {							\
+		if (RB_LEFT(parent, field) == elm)			\
+			RB_LEFT(parent, field) = child;			\
+		else							\
+			RB_RIGHT(parent, field) = child;		\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = child;					\
+color:									\
+	if (color == RB_BLACK)						\
+		name##_RB_REMOVE_COLOR(head, parent, child);		\
+	return (old);							\
+}									\
+									\
+/* Inserts a node into the RB tree */					\
+struct type *								\
+name##_RB_INSERT(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp;						\
+	struct type *parent = NULL;					\
+	int comp = 0;							\
+	tmp = RB_ROOT(head);						\
+	while (tmp) {							\
+		parent = tmp;						\
+		comp = (cmp)(elm, parent);				\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	RB_SET(name, elm, parent, field);					\
+	if (parent != NULL) {						\
+		if (comp < 0)						\
+			RB_LEFT(parent, field) = elm;			\
+		else							\
+			RB_RIGHT(parent, field) = elm;			\
+		RB_AUGMENT(parent);					\
+	} else								\
+		RB_ROOT(head) = elm;					\
+	name##_RB_INSERT_COLOR(head, elm);				\
+	return (NULL);							\
+}									\
+									\
+/* Finds the node with the same key as elm */				\
+struct type *								\
+name##_RB_FIND(struct name *head, struct type *elm)			\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	int comp;							\
+	while (tmp) {							\
+		comp = cmp(elm, tmp);					\
+		if (comp < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else if (comp > 0)					\
+			tmp = RB_RIGHT(tmp, field);			\
+		else							\
+			return (tmp);					\
+	}								\
+	return (NULL);							\
+}									\
+									\
+/* ARGSUSED */								\
+struct type *								\
+name##_RB_NEXT(struct type *elm)					\
+{									\
+	if (RB_RIGHT(elm, field)) {					\
+		elm = RB_RIGHT(elm, field);				\
+		while (RB_LEFT(elm, field))				\
+			elm = RB_LEFT(elm, field);			\
+	} else {							\
+		if (name##_RB_GETPARENT(elm) &&				\
+		    (elm == RB_LEFT(name##_RB_GETPARENT(elm), field)))	\
+			elm = name##_RB_GETPARENT(elm);			\
+		else {							\
+			while (name##_RB_GETPARENT(elm) &&			\
+			    (elm == RB_RIGHT(name##_RB_GETPARENT(elm), field)))\
+				elm = name##_RB_GETPARENT(elm);		\
+			elm = name##_RB_GETPARENT(elm);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+									\
+struct type *								\
+name##_RB_MINMAX(struct name *head, int val)				\
+{									\
+	struct type *tmp = RB_ROOT(head);				\
+	struct type *parent = NULL;					\
+	while (tmp) {							\
+		parent = tmp;						\
+		if (val < 0)						\
+			tmp = RB_LEFT(tmp, field);			\
+		else							\
+			tmp = RB_RIGHT(tmp, field);			\
+	}								\
+	return (parent);						\
+}
+
+
+#define RB_PROTOTYPE_PREV(name, type, field, cmp)			\
+	RB_PROTOTYPE(name, type, field, cmp)				\
+struct type *name##_RB_PREV(struct type *);
+
+
+#define RB_PROTOTYPE_SC_PREV(_sc_, name, type, field, cmp)		\
+	RB_PROTOTYPE_SC(_sc_, name, type, field, cmp)			\
+_sc_ struct type *name##_RB_PREV(struct type *);
+
+#define RB_GENERATE_PREV(name, type, field, cmp)			\
+	RB_GENERATE(name, type, field, cmp)				\
+struct type *								\
+name##_RB_PREV(struct type *elm)					\
+{									\
+	if (RB_LEFT(elm, field)) {					\
+		elm = RB_LEFT(elm, field);				\
+		while (RB_RIGHT(elm, field))				\
+			elm = RB_RIGHT(elm, field);			\
+	} else {							\
+		if (name##_RB_GETPARENT(elm) &&				\
+		    (elm == RB_RIGHT(name##_RB_GETPARENT(elm), field)))	\
+			elm = name##_RB_GETPARENT(elm);			\
+		else {							\
+			while (name##_RB_GETPARENT(elm) &&		\
+			    (elm == RB_LEFT(name##_RB_GETPARENT(elm), field)))\
+				elm = name##_RB_GETPARENT(elm);		\
+			elm = name##_RB_GETPARENT(elm);			\
+		}							\
+	}								\
+	return (elm);							\
+}									\
+
+#define RB_NEGINF	-1
+#define RB_INF	1
+
+#define RB_INSERT(name, x, y)	name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y)	name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y)	name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y)	name##_RB_NEXT(y)
+#define RB_PREV(name, x, y)	name##_RB_PREV(y)
+#define RB_MIN(name, x)		name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x)		name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head)					\
+	for ((x) = RB_MIN(name, head);					\
+	     (x) != NULL;						\
+	     (x) = name##_RB_NEXT(x))
+
+#define RB_FOREACH_FROM(x, name, y)                                     \
+	for ((x) = (y);                                                 \
+	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);    \
+            (x) = (y))
+
+#define RB_FOREACH_REVERSE_FROM(x, name, y)				\
+	for ((x) = (y);							\
+	    ((x) != NULL) && ((y) = name##_RB_PREV(x), (x) != NULL);	\
+	     (x) = (y))
+
+#define RB_FOREACH_SAFE(x, name, head, y)				\
+	for ((x) = RB_MIN(name, head);					\
+	    ((x) != NULL) && ((y) = name##_RB_NEXT(x), (x) != NULL);	\
+	     (x) = (y))
+
+#endif	/* _LIBKERN_TREE_H_ */
diff --git a/libkern/libkern/version.h.template b/libkern/libkern/version.h.template
index 32793fe63..57b97d48a 100644
--- a/libkern/libkern/version.h.template
+++ b/libkern/libkern/version.h.template
@@ -23,10 +23,6 @@
 #ifndef	LIBKERN_VERSION_H
 #define LIBKERN_VERSION_H
 
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
 /* Kernel versions conform to kext version strings, as described in:
  * http://developer.apple.com/technotes/tn/tn1132.html
  */
@@ -35,25 +31,21 @@ extern "C" {
  * of the kernel
  */
 #define VERSION_MAJOR		###KERNEL_VERSION_MAJOR###
-extern const int version_major;
 
 /* VERSION_MINOR, version_minor is an integer that represents the minor version
  * of the kernel
  */
 #define VERSION_MINOR		###KERNEL_VERSION_MINOR###
-extern const int version_minor;
 
 /* VERSION_VARIANT, version_variant is a string that contains the revision,
  * stage, and prerelease level of the kernel
  */
 #define VERSION_VARIANT		"###KERNEL_VERSION_VARIANT###"
-extern const char version_variant[];
 
 /* VERSION_REVISION, version_revision is an integer that represents the revision
  * of the kernel
  */
 #define VERSION_REVISION	###KERNEL_VERSION_REVISION###
-extern const int version_revision;
 
 /* VERSION_STAGE, version_stage, is an integer set to one of the following: */
 #define VERSION_STAGE_DEV	0x20
@@ -61,20 +53,46 @@ extern const int version_revision;
 #define VERSION_STAGE_BETA	0x60
 #define VERSION_STAGE_RELEASE	0x80
 #define VERSION_STAGE		###KERNEL_VERSION_STAGE###
-extern const int version_stage;
 
 /* VERSION_PRERELEASE_LEVEL, version_prerelease_level, is an integer sequence
  * number to distinguish between pre-release builds
  */
 #define VERSION_PRERELEASE_LEVEL	###KERNEL_VERSION_PRERELEASE_LEVEL###
-extern const int version_prerelease_level;
 
 /* OSTYPE, ostype, is a string as returned by uname -s */
 #define	OSTYPE		"Darwin"
-extern const char ostype[];
 
 /* OSRELEASE, osrelease, is a string as returned by uname -r */
 #define OSRELEASE	"###KERNEL_VERSION_LONG###"
+
+#ifndef ASSEMBLER
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Build-time value of VERSION_MAJOR */
+extern const int version_major;
+
+/* Build-time value of VERSION_MINOR */
+extern const int version_minor;
+
+/* Build-time value of VERSION_VARIANT */
+extern const char version_variant[];
+
+/* Build-time value of VERSION_REVISION */
+extern const int version_revision;
+
+/* Build-time value of VERSION_STAGE */
+extern const int version_stage;
+
+/* Build-time value of VERSION_PRERELEASE_LEVEL */
+extern const int version_prerelease_level;
+
+/* Build-time value of OSTYPE */
+extern const char ostype[];
+
+/* Build-time value of OSRELEASE */
 extern const char osrelease[];
 
 /* osbuilder is a string as returned by uname -r */
@@ -94,4 +112,6 @@ extern char osversion[];
 }
 #endif
 
+#endif /* !ASSEMBLER */
+
 #endif	/* LIBKERN_VERSION_H */
diff --git a/libkern/ppc/OSAtomic.s b/libkern/ppc/OSAtomic.s
deleted file mode 100644
index 82b1f3c99..000000000
--- a/libkern/ppc/OSAtomic.s
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- /*
- * Copyright (c) 1997-1998 Apple Computer, Inc.
- *
- *
- * HISTORY
- *
- * sdouglas  22 Oct 97 - first checked in from DriverServices
- * sdouglas  28 Jul 98 - start IOKit
- */
-
-#include <architecture/ppc/asm_help.h>
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;
-; ENTRY		functionName
-;
-; Assembly directives to begin an exported function.
-;
-; Takes: functionName - name of the exported function
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-.macro ENTRY
-    .text
-    .align	2
-    .globl	$0
-$0:
-.endmacro
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-/*
-int     OSCompareAndSwap( UInt32 oldVal, UInt32 newVal, UInt32 * addr )
-    This is now an alias to hw_compare_and_store, see xnu/libkern/Makefile.
-
-void *	OSDequeueAtomic(void ** inList, SInt32 inOffset)
-    This is also aliased, to hw_dequeue_atomic.
-    
-void	OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset)
-    This is aliased to hw_queue_atomic.
-*/
-
-/*
-Note:  We can not use the hw_atomic routines provided by osfmk/ppc as
-the return the result of the addition not the original value.
-*/
-/*
-SInt32	OSDecrementAtomic(SInt32 * value)
-*/
-    ENTRY	_OSDecrementAtomic
-    mr		r4, r3
-    li		r3, -1
-    b		_OSAddAtomic
-
-/*
-SInt32	OSIncrementAtomic(SInt32 * value)
-*/
-
-    .align	5
-
-    ENTRY	_OSIncrementAtomic
-    mr		r4, r3
-    li		r3, 1
-
-/*
-SInt32	OSAddAtomic(SInt32 amount, SInt32 * value)
-*/
-
-    ENTRY	_OSAddAtomic
-    ENTRY	_OSAddAtomicLong
-
-    mr		r5,r3		/* Save the increment */
-.L_AAretry:
-    lwarx	r3, 0, r4	/* Grab the area value */
-    add		r6, r3, r5	/* Add the value */
-    stwcx.	r6, 0, r4	/* Try to save the new value */
-    bne-	.L_AAretry	/* Didn't get it, try again... */
-    blr				/* Return the original value */
diff --git a/libkern/ppc/bcmp.s b/libkern/ppc/bcmp.s
deleted file mode 100644
index 901850379..000000000
--- a/libkern/ppc/bcmp.s
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-;
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-;
-; int	bcmp(const void *LHS, const void *RHS, size_t len);
-;
-; Because bcmp returns zero if equal and nonzero otherwise, it is slightly
-; faster than memcmp, which returns the difference between the first different
-; bytes.
-; 	r3 - LHS
-; 	r4 - RHS
-; 	r5 - len
-
-	.align	5
-	.globl	EXT(bcmp)
-LEXT(bcmp)
-
-	cmpwi	cr1,r5,6		; six chars long?
-	mr	r6,r3			; copy LHS ptr so we can use r3 as result
-	mr.	r3,r5			; test length and move to r3
-	bgt	cr1,Llong		; more than 6 chars long
-	blt	cr1,Lshort		; less than 6
-
-	; most common operand length is 6 chars (enet addrs)
-
-	lwz	r8,0(r6)		; first 4 bytes of LHS
-	lwz	r7,0(r4)		; and RHS
-	lhz	r9,4(r6)		; next 2 of LHS
-	sub.	r3,r8,r7		; compare first 4
-	bnelr				; first 4 differed (r3!=0)
-	lhz	r10,4(r4)		; next 2 of RHS
-	sub	r3,r9,r10		; compare last 2
-	blr				; done, result in r3
-
-	; handle long strings
-Llong:
-	srwi	r0,r5,2			; r0 = word len
-	mtctr	r0			; set up for loop
-Llongloop:
-	lwz	r8,0(r6)		; next 4 bytes from LHS
-	addi	r6,r6,4
-	lwz	r7,0(r4)		; next 4 from RHS
-	addi	r4,r4,4
-	sub.	r3,r8,r7		; compare next 4 bytes
-	bdnzt+	eq,Llongloop		; loop if ctr!=0 and cr0_eq
-	bnelr				; done if not equal (r3!=0)
-
-	andi.	r5,r5,3			; more to go?
-
-	; compare short strings (0-5 bytes long)
-	;  	r5 = length remaining
-	;	cr0= set on length
-	;	r3 = zero if length is zero
-Lshort:
-	beqlr				; done (r3=0)
-	mtctr	r5
-Lshortloop:
-	lbz	r8,0(r6)		; get next byte from LHS
-	addi	r6,r6,1
-	lbz	r7,0(r4)		; and next byte from RHS
-	addi	r4,r4,1
-	sub.	r3,r8,r7		; compare
-	bdnzt+	eq,Lshortloop		; loop if ctr!=0 and cr0_eq
-	blr				; done, r3 set correctly by the subtract
-
diff --git a/libkern/ppc/memcmp.s b/libkern/ppc/memcmp.s
deleted file mode 100644
index 9968bf6f4..000000000
--- a/libkern/ppc/memcmp.s
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-;
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-;
-; int	memcmp(const void *LHS, const void *RHS, size_t len);
-;
-; Memcmp returns the difference between the first two different bytes, 
-; or 0 if the two strings are equal.  Because we compare a word at a
-; time, this requires a little additional processing once we find a
-; difference.
-; 	r3 - LHS
-; 	r4 - RHS
-; 	r5 - len
-
-	.align	5
-	.globl	EXT(memcmp)
-LEXT(memcmp)
-
-	cmpwi	cr1,r5,6		; six is the most common length
-	mr	r6,r3			; we want to use r3 for compare result
-	mr.	r3,r5			; test length for 0
-	bgt	cr1,Llong		; handle long strings
-	blt	cr1,Lshort		; and short strings
-
-	; six char strings are special cased because they are the most common
-Lsix:
-	lwz	r8,0(r6)		; first 4 bytes of LHS
-	lwz	r7,0(r4)		; and RHS
-	xor.	r3,r8,r7		; compare first 4
-	bne	Ldifferent		; first 4 differed
-	lhz	r8,4(r6)		; last 2 of LHS
-	lhz	r7,4(r4)		; last 2 of RHS
-	xor.	r3,r8,r7		; compare last 2
-	beqlr				; done if equal
-
-	; strings differ, so we must compute difference between first two
-	; differing bytes.
-	;	r8 = LHS bytes
-	;	r7 = RHS bytes
-	;	r3 = r8 xor r7 (r3!=0)
-Ldifferent:
-	cntlzw	r9,r3			; count leading 0s in xor
-	rlwinm	r10,r9,0,0,28		; mask off low 3 bits, so r10 = 0, 8, 16, or 24
-	subfic	r6,r10,24		; r6 := (24 - r10)
-	srw	r4,r8,r6		; r4 = LHS differing byte
-	srw	r5,r7,r6		; r5 = RHS differing byte
-	sub	r3,r4,r5		; r3 = difference
-	blr
-
-	; handle long strings
-Llong:
-	srwi	r0,r5,2			; r0 = word length
-	mtctr	r0			; set up for loop
-Llongloop:
-	lwz	r8,0(r6)		; next 4 bytes from LHS
-	addi	r6,r6,4
-	lwz	r7,0(r4)		; next 4 from RHS
-	addi	r4,r4,4
-	xor.	r3,r8,r7		; compare next 4 bytes
-	bdnzt+	eq,Llongloop		; loop if ctr!=0 and cr0_eq
-	bne	Ldifferent		; these 4 bytes not equal
-	
-	andi.	r5,r5,3			; more to go?
-
-	; compare short strings (0-5 bytes long)
-	;	r5 = length (0-5)
-	;	cr0= set on length
-	;	r3 = if r5=0, then r3=0
-Lshort:
-	beqlr				; 0-length strings are defined to be equal (r3=0)
-	mtctr	r5
-Lshortloop:
-	lbz	r8,0(r6)		; get next byte from LHS
-	addi	r6,r6,1
-	lbz	r7,0(r4)		; and next byte from RHS
-	addi	r4,r4,1
-	sub.	r3,r8,r7		; compare
-	bdnzt+	eq,Lshortloop		; lloop if ctr!=0 and cr0_eq
-	blr				; done, r3 set correctly by the subtract
diff --git a/libkern/ppc/strlen.s b/libkern/ppc/strlen.s
deleted file mode 100644
index 0bb80cf99..000000000
--- a/libkern/ppc/strlen.s
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-;
-;
-; Strlen, optimized for PPC.  The routine we use is 2-3x faster
-; then the simple loop which checks each byte for zero.
-; For 0- and 1-byte strings, the simple routine is faster, but
-; only by a few cycles.  The algorithm used was adapted from the
-; Mac OS 9 stdCLib strcopy routine, which was originally
-; written by Gary Davidian.  It relies on the following rather
-; inobvious but very efficient test: 
-; 
-;	y =  dataWord + 0xFEFEFEFF
-;	z = ~dataWord & 0x80808080
-;	if ( y & z ) = 0 then all bytes in dataWord are non-zero
-;
-; The test maps any non-zero byte to zeros and any zero byte to 0x80,
-; with one exception: 0x01 bytes preceeding the first zero are also
-; mapped to 0x80.
-;
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-;
-; int	strlen(ptr)
-;
-;
-
-	.align	5
-	.globl	EXT(strlen)
-LEXT(strlen)
-
-	andi.	r4,r3,0x03		; test alignment first
-	mr	r9,r3			; store the original address for later use....
-	bne	LalignSource		; align the source addr if not already aligned
-Llentry:
-	lis	r5,hi16(0xFEFEFEFF)
-	lis	r6,hi16(0x80808080)
-	subi	r3,r3,0x04		; pre-decrement r3 for the lwzu
-	ori	r5,r5,lo16(0xFEFEFEFF)	; r5=0xFEFEFEFF
-	ori	r6,r6,lo16(0x80808080)	; r6=0x80808080
-
-LLoop:
-	lwzu	r8,4(r3)		; get the first 4 bytes and increment address
-	add	r4,r5,r8		; r4= data + 0xFEFEFEFF
-	andc	r7,r6,r8		; r7= ~data & 0x80808080
-	and.	r4,r4,r7		; r4= r4 & r7
-	beq	LLoop			; if r4 is zero, then all bytes are non-zero
-
-; Now we know one of the bytes in r8 is zero,
-; we just have to figure out which one. 
-; We have mapped 0 bytes to 0x80, and nonzero bytes to 0x00,
-; with one exception:
-; 0x01 bytes preceeding the first zero are also mapped to 0x80.
-; So we have to mask out the 0x80s caused by 0x01s before
-; counting leading zeroes to get the bytes in last word.
-
-	rlwinm	r5,r8,7,0,31		; move 0x01 bits to 0x80 position
-	subf	r3,r9,r3		; start to compute string length
-	andc	r4,r4,r5		; turn off false hits from 0x0100 worst case
-	cntlzw	r7,r4			; now we can count leading 0s
-	srwi	r7,r7,3			; convert 0,8,16,24 to 0,1,2,3
-	add	r3,r3,r7		; add in nonzero bytes in last word
-	blr
-
-; We must align the source address for two reasons: to avoid spurious page
-; faults, and for speed.  
-;	r4 = low 2 bits of address (1,2, or 3)
-;	r3 = address
-;	r9 = original address (still same as r3)
-
-LalignSource:
-	lbz	r5,0(r3)		; get the first byte...
-	subic.	r4,r4,2			; test for 1, 2 or 3 bytes
-	addi	r3,r3,1			; increment address
-	addi	r6,r9,1			; now r6==r3
-	cmpwi	cr1,r5,0		; zero?
-	beq	cr1,Lreturn		; if its zero return zero
-	bgt	Llentry			; address is aligned now if low bits were 3
-
-	lbz	r5,0(r3)		; get the next byte...
-	addi	r3,r3,1			; increment address
-	cmpwi	cr1,r5,0		; zero?
-	beq	cr1,Lreturn		; if its zero return one
-	beq	Llentry			; addr is aligned now if low bits were 2
-
-	lbz	r5,0(r3)		; get the next byte...
-	addi	r3,r3,1			; increment address
-	cmpwi	cr1,r5,0		; zero?
-	bne	cr1,Llentry		; not zero, continue check (now aligned)
-Lreturn:
-	sub	r3,r3,r6		; get string length (0, 1, or 2)
-	blr
-
diff --git a/libkern/uuid/Makefile b/libkern/uuid/Makefile
index c7c467538..13a3f1969 100644
--- a/libkern/uuid/Makefile
+++ b/libkern/uuid/Makefile
@@ -9,8 +9,6 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC = \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
@@ -19,8 +17,6 @@ INSTINC_SUBDIRS_ARM = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
diff --git a/libkern/uuid/uuid.c b/libkern/uuid/uuid.c
index ac9db3f84..ffc5c8059 100644
--- a/libkern/uuid/uuid.c
+++ b/libkern/uuid/uuid.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
  *
  * %Begin-Header%
  * Redistribution and use in source and binary forms, with or without
@@ -51,19 +51,22 @@ read_node(uint8_t *node)
 {
 #if NETWORKING
 	struct ifnet *ifp;
-	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifnet_head_lock_shared();
 	TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
-		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-			sdl = (struct sockaddr_dl *)ifa->ifa_addr;
-			if (sdl && sdl->sdl_family == AF_LINK && sdl->sdl_type == IFT_ETHER) {
-				memcpy(node, LLADDR(sdl), 6);
-				ifnet_head_done();
-				return;
-			}
+		ifnet_lock_shared(ifp);
+		IFA_LOCK_SPIN(ifp->if_lladdr);
+		sdl = (struct sockaddr_dl *)ifp->if_lladdr->ifa_addr;
+		if (sdl->sdl_type == IFT_ETHER) {
+			memcpy(node, LLADDR(sdl), 6);
+			IFA_UNLOCK(ifp->if_lladdr);
+			ifnet_lock_done(ifp);
+			ifnet_head_done();
+			return;
 		}
+		IFA_UNLOCK(ifp->if_lladdr);
+		ifnet_lock_done(ifp);
 	}
 	ifnet_head_done();
 #endif /* NETWORKING */
diff --git a/libkern/x86_64/OSAtomic.s b/libkern/x86_64/OSAtomic.s
index b3b26164a..30713ef3d 100644
--- a/libkern/x86_64/OSAtomic.s
+++ b/libkern/x86_64/OSAtomic.s
@@ -31,13 +31,11 @@
 #;***************************************************************************
 
 	.globl _OSCompareAndSwap
-
-// TODO FIXME!!
 _OSCompareAndSwap: #;oldValue, newValue, ptr
 	movl		 %edi, %eax
 	lock
-	cmpxchgl	%esi, 0(%rdx)	#; CAS (eax is an implicit operand)
-	sete		%al				#; did CAS succeed? (TZ=1)
+	cmpxchgl	%esi, (%rdx)	#; CAS (eax is an implicit operand)
+	sete		%al			#; did CAS succeed? (TZ=1)
 	movzbq		%al, %rax		#; clear out the high bytes
 	ret
 
@@ -50,10 +48,10 @@ _OSCompareAndSwap: #;oldValue, newValue, ptr
 
 _OSCompareAndSwap64:
 _OSCompareAndSwapPtr: #;oldValue, newValue, ptr
-	movq		 %rdi, %rax
+	movq		%rdi, %rax
 	lock
-	cmpxchgq	%rsi, 0(%rdx)	#; CAS (eax is an implicit operand)
-	sete		%al				#; did CAS succeed? (TZ=1)
+	cmpxchgq	%rsi, (%rdx)	#; CAS (rax is an implicit operand)
+	sete		%al			#; did CAS succeed? (TZ=1)
 	movzbq		%al, %rax		#; clear out the high bytes
 	ret
 
@@ -66,7 +64,7 @@ _OSCompareAndSwapPtr: #;oldValue, newValue, ptr
 _OSAddAtomic64:
 _OSAddAtomicLong:
 	lock
-	xaddq	%rdi, 0(%rsi)		#; Atomic exchange and add
+	xaddq	%rdi, (%rsi)		#; Atomic exchange and add
 	movq	%rdi, %rax;
 	ret
 
@@ -78,6 +76,6 @@ _OSAddAtomicLong:
 	.globl	_OSAddAtomic
 _OSAddAtomic:
 	lock
-	xaddl	%edi, 0(%rsi)		#; Atomic exchange and add
+	xaddl	%edi, (%rsi)		#; Atomic exchange and add
 	movl	%edi, %eax;
 	ret
diff --git a/libkern/zlib/adler32.c b/libkern/zlib/adler32.c
index bf0d9723a..00214cd2e 100644
--- a/libkern/zlib/adler32.c
+++ b/libkern/zlib/adler32.c
@@ -32,8 +32,6 @@
 
 /* @(#) $Id$ */
 
-#include <stdint.h> // For uintptr_t.
-
 
 #define ZLIB_INTERNAL
 #if KERNEL
@@ -42,8 +40,9 @@
     #include "zlib.h"
 #endif /* KERNEL */
 
-#if defined _ARM_ARCH_6
-	extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
+#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6
+#include <stdint.h> // For uintptr_t.
+    extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
 #endif
 
 #define BASE 65521UL    /* largest prime smaller than 65536 */
@@ -98,9 +97,7 @@ uLong ZEXPORT adler32(adler, buf, len)
     uInt len;
 {
     unsigned long sum2;
-#if !defined _ARM_ARCH_6
     unsigned n;
-#endif
 
     /* split Adler-32 into component sums */
     sum2 = (adler >> 16) & 0xffff;
@@ -133,8 +130,10 @@ uLong ZEXPORT adler32(adler, buf, len)
         return adler | (sum2 << 16);
     }
 
-#if defined _ARM_ARCH_6
-    /* align buf to 16-byte boundary */
+#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6
+
+	if (len>=32000) {	/* use vector code only if len is sufficiently large to compensate registers save/restore */
+	/* align buf to 16-byte boundary */
     while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */
         len--;
         adler += *buf++;
@@ -143,9 +142,10 @@ uLong ZEXPORT adler32(adler, buf, len)
         MOD4(sum2);             /* only added so many BASE's */
     }
 
-    return adler32_vec(adler, sum2, buf, len);      // armv7 neon vectorized implementation
+    return adler32_vec(adler, sum2, buf, len);      // x86_64 or i386 (up to SSE3) or armv6 or up
+	}
 
-#else   //  _ARM_ARCH_6
+#endif	// defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6
 
     /* do length NMAX blocks -- requires just one modulo operation */
     while (len >= NMAX) {
@@ -176,8 +176,6 @@ uLong ZEXPORT adler32(adler, buf, len)
 
     /* return recombined sums */
     return adler | (sum2 << 16);
-
-#endif  // _ARM_ARCH_6
 }
 
 /* ========================================================================= */
diff --git a/libkern/zlib/arm/adler32vec.s b/libkern/zlib/arm/adler32vec.s
deleted file mode 100644
index 3af072caa..000000000
--- a/libkern/zlib/arm/adler32vec.s
+++ /dev/null
@@ -1,428 +0,0 @@
-#include <arm/arch.h>
-
-#define BASE 65521	    /* largest prime smaller than 65536 */
-#define NMAX 5552 		/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
-
-// Note: buf should have been 16-byte aligned in the caller function,
-
-// uLong adler32_vec(unsigned int adler, unsigned int sum2, const Bytef* buf, int len) {
-//    unsigned n;
-//    while (len >= NMAX) {
-//        len -= NMAX;
-//        n = NMAX / 16;          /* NMAX is divisible by 16 */
-//        do {
-//            DO16(buf);          /* 16 sums unrolled */
-//            buf += 16;
-//        } while (--n);
-//        MOD(adler);
-//        MOD(sum2);
-//    }
-//    if (len) {                  /* avoid modulos if none remaining */
-//        while (len >= 16) {
-//            len -= 16;
-//            DO16(buf);
-//            buf += 16;
-//        }
-//        while (len--) {
-//            adler += *buf++;
-//            sum2 += adler;
-//        }
-//        MOD(adler);
-//        MOD(sum2);
-//    }
-//    return adler | (sum2 << 16); 		/* return recombined sums */
-// }
-
-
-/* 
-	DO16 vectorization:
-	given initial unsigned int sum2 and adler, and a new set of 16 input bytes (x[0:15]), it can be shown that
-	sum2  += (16*adler + 16*x[0] + 15*x[1] + ... + 1*x[15]);
-	adler += (x[0] + x[1] + ... + x[15]);
-
-	therefore, this is what can be done to vectorize the above computation
-	1. 16-byte aligned vector load into q2 (x[0:x15])
-	2. sum2 += (adler<<4);
-	3. vmull.u8 (q9,q8),q2,d2 where d2 = (1,1,1,1...,1), (q9,q8) + 16 16-bit elements x[0:15]
-	4. vmull.u8 (q11,q10),q2,q0 where q0 = (1,2,3,4...,16), (q11,q10) + 16 16-bit elements (16:1)*x[0:15]
-	5. parallel add (with once expansion to 32-bit) (q9,q8) and (q11,q10) all the way to accumulate to adler and sum2 
-
-	In this revision, whenever possible, 2 DO16 loops are combined into a DO32 loop.
-	1. 32-byte aligned vector load into q2,q14 (x[0:x31])
-    2. sum2 += (adler<<5);
-    3. vmull.u8 (4 q registers),(q2,q14),d2 where d2 = (1,1,1,1...,1), (4 q registers) : 32 16-bit elements x[0:31]
-	4. vmull.u8 (4 q registers),(q2,q14),(q0,q15) where q0 = (1,...,32), (4 q regs) : 32 16-bit elements (32:1)*x[0:31]
-    5. parallel add (with once expansion to 32-bit) the pair of (4 q regs) all the way to accumulate to adler and sum2 
-
-	This change improves the performance by ~ 0.55 cycle/uncompress byte on ARM Cortex-A8.
-
-*/
-
-/*
-	MOD implementation:
-	adler%BASE = adler - floor(adler*(1/BASE))*BASE; where (1/BASE) = 0x80078071 in Q47
-	1. vmull.u32   q2,(adler,sum2),(1/BASE)		// *(1/BASE) in Q47
-    2. vshr.u64    q2,q2,#47					// floor function
-    3. vpadd.u32   d4,d4,d5						// merge into a double word in d4
-    4. vmls.u32    (adler,sum2),d4,d3[0]        // (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
-	 
-*/
-
-#if defined _ARM_ARCH_6			// this file would be used only for armv6 or above
-
-
-	.text
-	.align 2
-	.globl _adler32_vec
-_adler32_vec:
- 
-#if (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)	// for armv6 or armv7 without neon support
-
-
-	#define	adler			r0
-	#define	sum2			r1
-	#define	buf				r2
-	#define	len				r3	
-	#define	one_by_base		r4
-	#define	base			r5
-	#define nmax			r6
-	#define	t				r12
-	#define	vecs			lr
-	#define	x0				r8
-	#define	x1				r10
-	#define	x2				r11
-	#define	x3				r12
-	#define	zero			r9
-
-	// this macro performs adler/sum2 update for 4 input bytes
-
-	.macro DO4
-	add		sum2, adler, lsl #2				// sum2 += 4*adler;
-	ldr		x0,[buf]						// 4 bytes in 1 32-bit word
-	usada8	adler, x0, zero, adler			// adler += sum(x0:x3)
-	ldrb	x0,[buf], #4					// x0
-	ldrb	x2,[buf,#-2]					// x2
-	ldrb	x1,[buf,#-3]					// x1
-	ldrb	x3,[buf,#-1]					// x3
-	add		sum2, x0, lsl #2				// sum2 += 4*x0
-	add		x3, x3, x1, lsl #1				// x3+2*x1
-	add		sum2, x2, lsl #1				// sum2 += 2*x2
-	add		x3, x1							// x3+3*x1
-	add		sum2, x3						// sum2 += x3+3*x1
-	.endm
-
-	// the following macro cascades 4 DO4 into a adler/sum2 update for 16 bytes
-	.macro DO16
-	DO4										// adler/sum2 update for 4 input bytes
-	DO4										// adler/sum2 update for 4 input bytes
-	DO4										// adler/sum2 update for 4 input bytes
-	DO4										// adler/sum2 update for 4 input bytes
-	.endm
-
-	// the following macro performs adler sum2 modulo BASE
-	.macro	modulo_base
-	umull	x0,x1,adler,one_by_base			// adler/BASE in Q47
-	umull	x2,x3,sum2,one_by_base			// sum2/BASE in Q47
-	lsr		x1, #15							// x1 >> 15 = floor(adler/BASE)
-	lsr		x3, #15							// x3 >> 15 = floor(sum2/BASE)
-	mla		adler, x1, base, adler			// adler %= base;
-	mla		sum2, x3, base, sum2			// sum2 %= base;
-	.endm
-
-	adr		t, coeffs	
-	push	{r4-r6, r8-r11, lr}
-	ldmia	t, {one_by_base, base, nmax}	// load up coefficients
-
-	subs        len, nmax                   // pre-subtract len by NMAX
-	eor			zero, zero					// a dummy zero register to use usada8 instruction
-    blt         len_lessthan_NMAX           // if (len < NMAX) skip the while loop     
-
-while_lengenmax_loop:						// do {
-    lsr         vecs, nmax, #4              // vecs = NMAX/16;
-
-len16_loop:									// do {
-
-	DO16
-
-	subs	vecs, #1						// vecs--;
-	bgt			len16_loop					// } while (vec>0);	
-
-	modulo_base								// adler sum2 modulo BASE
-
-	subs		len, nmax					// len -= NMAX
-	bge			while_lengenmax_loop		// } while (len >= NMAX);
-
-len_lessthan_NMAX:
-	adds		len, nmax					// post-subtract len by NMAX
-
-	subs		len, #16					// pre-decrement len by 16
-	blt			len_lessthan_16
-
-len16_loop2:
-
-	DO16
-
-	subs		len, #16
-	bge			len16_loop2
-
-len_lessthan_16:
-	adds		len, #16					// post-increment len by 16
-	beq			len_is_zero
-
-remaining_buf:
-	ldrb		x0, [buf], #1
-	subs		len, #1
-	add			adler, x0
-	add			sum2, adler
-	bgt			remaining_buf
-
-len_is_zero:
-
-	modulo_base 							// adler sum2 modulo BASE
-
-	add		r0, adler, sum2, lsl #16		// to return sum2<<16 | adler 
-
-	pop		{r4-r6, r8-r11, pc}
-
-	.align 2
-coeffs:
-	.long	-2146992015
-	.long	-BASE
-	.long	NMAX
-
-#else	// KERNEL_SUPPORT_NEON
-
-
-
-	#define	adler	r0
-	#define	sum2	r1
-	#define	buf		r2
-	#define	len		r3	
-	#define	nmax	r4
-	#define	vecs	lr				// vecs = NMAX/16
-	#define	n		r5
-
-	#define	t		r12
-
-	#define	sum2_coeff		q0
-	#define	sum2_coeff0		d0
-	#define	sum2_coeff1		d1
-	#define	alder_coeff		q1
-	#define	ones			d2
-	#define	x0_x15			q2
-	#define	x0_x7			d4
-	#define	x8_x15			d5
-	#define	adlersum2		d6
-	#define	adler16			d25
-
-#if defined _ARM_ARCH_7 
-
-	adr			t, vec_table				// address to vec_table[]
-	stmfd		sp!, {r4, r5, lr}
-
-	vld1.32		{q0-q1},[t,:128]!			// loading up coefficients for adler/sum2 computation
-	vld1.32		{q15},[t,:128]!				// for sum2 computation
-	ldr			nmax, [t]					// NMAX
-
-	vmov		adlersum2, sum2, adler		// pack up adler/sum2 into a double register 
-
-	cmp			len, nmax					// len vs NMAX
-	lsr			vecs, nmax, #4				// vecs = NMAX/16;
-	blt			len_lessthan_NMAX			// if (len < NMAX) skip the while loop		
-
-	sub			len, nmax					// pre-decrement len by NMAX
-
-while_len_ge_NMAX_loop: 					// while (len>=NMAX) {
-
-	mov			n, vecs, lsr #1			// n = NMAX/16; 
-
-do_loop:									// do {
-
-	vshll.u32	q12, adlersum2, #5			// d25 = (0,32*adler) to be added into (adler,sum2)
-	vld1.32		{x0_x15},[buf,:128]!		// 16-byte input x0:x15
-	vmull.u8	q8, x0_x7, ones				// 16-bit x0-x7
-	vld1.32		{q14}, [buf,:128]!			// x16:x31
-	vmull.u8	q9, x8_x15, ones			// 16-bit x8-x15
-	vadd.u32	adlersum2,adler16			// sum2 += old adler*32;
-	vmull.u8	q12, d28, ones				// 16-bit x16-x23
-	vmull.u8	q13, d29, ones				// 16-bit x24-x31
-	vmull.u8	q10, d28, sum2_coeff0		// 16-bit x16*16, x17*15, ..., x23*9
-	vmull.u8	q11, d29, sum2_coeff1		// 16-bit x24*8, x25*7, ..., x31*1	
-	vadd.u16	q8, q8, q9					// q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
-	vmull.u8	q9, x0_x7, d30				// 16-bit x0*32,...,x7*25
-	vmull.u8	q14, x8_x15, d31			// 16-bit x8*24,...,x15*17
-	vadd.u16	q12, q12, q13				// q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
-	vadd.u16	q10, q11					// 8 16-bit elements for sum2
-	vadd.u16	q8, q12						// 8 16-bit elements for adler
-	vadd.u16	q9, q14						// 8 16-bit elements for sum2 
-	vadd.u16	q10, q9						// 8 16-bit elements for sum2
-	vpaddl.u16	q8, q8						// 4 32-bit elements for adler
-	vpaddl.u16	q10, q10					// 4 32-bit elements for sum2
-	vpadd.u32	d16,d16,d17					// 2 32-bit elements for adler
-	vpadd.u32	d17,d20,d21					// 2 32-bit elements for sum2
-	subs		n, #1						//  --n 
-	vpadd.u32	d4,d17,d16					// s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
-	vadd.u32	adlersum2,d4				// update adler/sum2 with the new 16 bytes input
-
-	bgt			do_loop						// } while (--n);
-
-	vshll.u32	q12, adlersum2, #4			// d25 = (0,16*adler) to be added into (adler,sum2)
-
-	vld1.32		{x0_x15},[buf,:128]!		// 	16-byte input
-
-	vmull.u8	q8, x0_x7, ones				// 16-bit x0-x7
-	vmull.u8	q9, x8_x15, ones			// 16-bit x8-x15
-	vmull.u8	q10, x0_x7, sum2_coeff0		// 16-bit x0*16, x1*15, ..., x7*9
-	vmull.u8	q11, x8_x15, sum2_coeff1	// 16-bit x8*8, x9*7, ..., x15*1	
-
-	vadd.u16	q8, q8, q9					// 8 16-bit elements for adler
-	vadd.u16	q10, q10, q11				// 8 16-bit elements for sum2
-	vpaddl.u16	q8, q8						// 4 32-bit elements for adler
-	vpaddl.u16	q10, q10					// 4 32-bit elements for sum2
-	vpadd.u32	d16,d16,d17					// 2 32-bit elements for adler
-	vpadd.u32	d17,d20,d21					// 2 32-bit elements for sum2
-	vadd.u32	adlersum2,adler16			// sum2 += old adler;
-	vpadd.u32	d4,d17,d16					// s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
-	vadd.u32	adlersum2,d4				// update adler/sum2 with the new 16 bytes input
-
-	// mod(alder,BASE); mod(sum2,BASE);
-	vmull.u32	q2,adlersum2,d3[1]			// alder/BASE, sum2/BASE in Q47
-	vshr.u64	q2,q2,#47					// take the integer part
-	vpadd.u32	d4,d4,d5					// merge into a double word in d4
-	vmls.u32	adlersum2,d4,d3[0]			// (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
-
-	subs		len, nmax					// len -= NMAX;
-	bge			while_len_ge_NMAX_loop		// repeat while len >= NMAX
-
-	add			len, nmax					// post-increment len by NMAX
-
-len_lessthan_NMAX:
-
-	cmp			len, #0
-	beq			len_is_zero					// if len==0, branch to skip the following
-
-
-	subs		len, #32					// pre-decrement len by 32
-	blt			len_lessthan_32				// if len < 32, branch to len16_loop 
-
-len32_loop:
-
-	vshll.u32	q12, adlersum2, #5			// d25 = (0,32*adler) to be added into (adler,sum2)
-	vld1.32		{x0_x15},[buf,:128]!		// 16-byte input x0:x15
-	vmull.u8	q8, x0_x7, ones				// 16-bit x0-x7
-	vld1.32		{q14}, [buf,:128]!			// x16:x31
-	vmull.u8	q9, x8_x15, ones			// 16-bit x8-x15
-	vadd.u32	adlersum2,adler16			// sum2 += old adler*32;
-	vmull.u8	q12, d28, ones				// 16-bit x16-x23
-	vmull.u8	q13, d29, ones				// 16-bit x24-x31
-	vmull.u8	q10, d28, sum2_coeff0		// 16-bit x16*16, x17*15, ..., x23*9
-	vmull.u8	q11, d29, sum2_coeff1		// 16-bit x24*8, x25*7, ..., x31*1	
-	vadd.u16	q8, q8, q9					// q8 = (x0+x8):(x7+x15) 8 16-bit elements for adler
-	vmull.u8	q9, x0_x7, d30				// 16-bit x0*32,...,x7*25
-	vmull.u8	q14, x8_x15, d31			// 16-bit x8*24,...,x15*17
-	vadd.u16	q12, q12, q13				// q12 = (x16+x24):(x23+x31) 8 16-bit elements for adler
-	vadd.u16	q10, q11					// 8 16-bit elements for sum2
-	vadd.u16	q8, q12						// 8 16-bit elements for adler
-	vadd.u16	q9, q14						// 8 16-bit elements for sum2 
-	vadd.u16	q10, q9						// 8 16-bit elements for sum2
-	vpaddl.u16	q8, q8						// 4 32-bit elements for adler
-	vpaddl.u16	q10, q10					// 4 32-bit elements for sum2
-	vpadd.u32	d16,d16,d17					// 2 32-bit elements for adler
-	vpadd.u32	d17,d20,d21					// 2 32-bit elements for sum2
-	subs		len, #32					// len -= 32; 
-	vpadd.u32	d4,d17,d16					// s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
-	vadd.u32	adlersum2,d4				// update adler/sum2 with the new 16 bytes input
-
-	bge			len32_loop
-
-len_lessthan_32:
-
-	adds		len, #(32-16)				// post-increment len by 32, then pre-decrement by 16
-	blt			len_lessthan_16				// if len < 16, branch to len_lessthan_16
-
-	vshll.u32	q12, adlersum2, #4			// d25 = (0,16*adler) to be added into (adler,sum2)
-
-	vld1.32		{x0_x15},[buf,:128]!		// 	16-byte input
-
-
-	vmull.u8	q8, x0_x7, ones				// 16-bit x0-x7
-	vmull.u8	q9, x8_x15, ones			// 16-bit x8-x15
-	vmull.u8	q10, x0_x7, sum2_coeff0		// 16-bit x0*16, x1*15, ..., x7*9
-	vmull.u8	q11, x8_x15, sum2_coeff1	// 16-bit x8*8, x9*7, ..., x15*1	
-
-	vadd.u16	q8, q8, q9					// 8 16-bit elements for adler
-	vadd.u16	q10, q10, q11				// 8 16-bit elements for sum2
-	vpaddl.u16	q8, q8						// 4 32-bit elements for adler
-	vpaddl.u16	q10, q10					// 4 32-bit elements for sum2
-	vpadd.u32	d16,d16,d17					// 2 32-bit elements for adler
-	vpadd.u32	d17,d20,d21					// 2 32-bit elements for sum2
-	subs		len, #16					// decrement len by 16
-	vadd.u32	adlersum2,adler16			// sum2 += old adler;
-	vpadd.u32	d4,d17,d16					// s8 : 32-bit elements for sum2, s9 : 32-bit element for adler
-	vadd.u32	adlersum2,d4				// update adler/sum2 with the new 16 bytes input
-
-len_lessthan_16:
-	adds		len, #16					// post-increment len by 16
-	beq			len_is_zero_internal		// if len==0, branch to len_is_zero_internal
-
-	// restore adler/sum2 into general registers for remaining (<16) bytes
-
-	vmov		sum2, adler, adlersum2
-remaining_len_loop:
-	ldrb		t, [buf], #1				// *buf++;
-	subs		len, #1						// len--;
-	add			adler,t						// adler += *buf
-	add			sum2,adler					// sum2 += adler
-	bgt			remaining_len_loop			// break if len<=0
-
-	vmov		adlersum2, sum2, adler		// move to double register for modulo operation
-
-len_is_zero_internal:
-
-	// mod(alder,BASE); mod(sum2,BASE);
-
-	vmull.u32	q2,adlersum2,d3[1]			// alder/BASE, sum2/BASE in Q47
-	vshr.u64	q2,q2,#47					// take the integer part
-	vpadd.u32	d4,d4,d5					// merge into a double word in d4
-	vmls.u32	adlersum2,d4,d3[0]			// (adler,sum2) -= floor[(adler,sum2)/BASE]*BASE
-
-len_is_zero:
-
-	vmov        sum2, adler, adlersum2		// restore adler/sum2 from (s12=sum2, s13=adler)
-	add			r0, adler, sum2, lsl #16	// to return adler | (sum2 << 16);
-	ldmfd       sp!, {r4, r5, pc}			// restore registers and return 
-
-
-	// constants to be loaded into q registers
-	.align	4		// 16 byte aligned
-
-vec_table:
-
-	// coefficients for computing sum2
-	.long	0x0d0e0f10		// s0
-	.long	0x090a0b0c		// s1
-	.long	0x05060708		// s2
-	.long	0x01020304		// s3
-
-	// coefficients for computing adler
-	.long	0x01010101		// s4/d2
-	.long	0x01010101		// s5
-
-	.long	BASE			// s6 : BASE 
-	.long	0x80078071		// s7 : 1/BASE in Q47
-
-	// q15 : d30.d31
-	.long	0x1d1e1f20		// s0
-	.long	0x191a1b1c		// s1
-	.long	0x15161718		// s2
-	.long	0x11121314		// s3
-
-NMAX_loc:
-	.long	NMAX			// NMAX
-	
-#endif		// _ARM_ARCH_7
-
-#endif		//  (!KERNEL_SUPPORT_NEON) || (!defined _ARM_ARCH_7)
-
-#endif		// _ARM_ARCH_6
-
diff --git a/libkern/zlib/arm/inffastS.s b/libkern/zlib/arm/inffastS.s
deleted file mode 100644
index fcf018e82..000000000
--- a/libkern/zlib/arm/inffastS.s
+++ /dev/null
@@ -1,565 +0,0 @@
-#include <arm/arch.h>
-
-// the follow assembly code was hard wired to POSTINC not defined, 
-
-#if 0			// #ifdef POSTINC
-#  define OFF 0
-#  define PUP(a) *(a)++
-#else
-#  define OFF 1
-#  define PUP(a) *++(a)
-#endif
-
-// the code uses r9, therefore, it does not meet the register protocol for armv5 and below
-// the code can only be used for armv6 and above
-
-#if defined _ARM_ARCH_6 
-
-	.cstring
-	.align 2
-LC0:
-	.ascii "invalid distance too far back\0"
-	.align 2
-LC1:
-	.ascii "invalid distance code\0"
-	.align 2
-LC2:
-	.ascii "invalid literal/length code\0"
-
-	// renaming the register and stack memory use
-
-	#define		out			r0
-	#define		strm		r10
-	#define		state		r5
-	#define		in			r11
-	#define		write		r9
-	#define		distcode	r8
-	#define		bits		lr
-	#define		hold		r4
-
-	// stack memory allocation
-
-	#define		window_loc	[sp,#0]
-	#define		last_loc	[sp,#4]
-	#define		beg_loc		[sp,#8]
-	#define		end_loc		[sp,#12]
-	#define		wsize_loc	[sp,#16]
-	#define		whave_loc	[sp,#20]
-	#define		windowm1_loc	[sp,#28]
-	#define		lmask_loc	[sp,#32]
-	#define		dmask_loc	[sp,#36]
-	#define		op_loc		[sp,#44]
-	#define		dist_loc	[sp,#48]
-
-	#define		local_size	52
-
-	// the following defines the variable offset in the inflate_state structure	(in inflate.h)
-
-	#define		state_mode		[state, #0]
-	#define		state_last		[state, #4]
-	#define		state_wrap		[state, #8]
-	#define		state_havedict	[state, #12]
-	#define		state_flags		[state, #16]
-	#define		state_dmax		[state, #20]
-	#define		state_wbits		[state, #36]
-	#define		state_wsize		[state, #40]
-	#define		state_whave		[state, #44]
-	#define		state_write		[state, #48]
-	#define		state_window	[state, #52]
-	#define		state_hold		[state, #56]
-	#define		state_bits		[state, #60]
-	#define		state_lencode	[state, #76]
-	#define		state_distcode	[state, #80]
-	#define		state_lenbits	[state, #84]
-	#define		state_distbits	[state, #88]
-
-
-// void inflate_fast(z_streamp strm, unsigned start)
-// input : 	
-//			r0 = strm, (move to r10) 
-//			r1 = start	
-
-	.text
-	.align 2
-	.globl _inflate_fast
-_inflate_fast:
-
-	stmfd	sp!, {r4-r6,r8-r11,lr}
-	sub		sp, sp, #local_size
-
-#if defined(_ARM_ARCH_5)
-	ldrd	r2,r3,[r0, #0]			// r2 = strm->next_in, r3 = strm->avail_in
-#else
-	ldmia	r0, {r2-r3}
-#endif
-
-	sub		in, r2, #OFF			// in = strm->next_in - OFF; 
-	sub		r2, #(OFF+5)			// next_in -= (OFF+5);
-	ldr		state, [r0, #28]		// state = (struct inflate_state FAR *)strm->state;
-	add		r3, r3, r2				// last = next_in - OFF + (avail_in - 5);	next_in already updated
-	mov		strm, r0
-	str		r3, last_loc			// store last to release r3
-
-	ldr		r3, [r0, #12]			// next_out
-	ldr		r2, [strm, #16]			// avail_out
-
-	sub		out, r3, #OFF			// out = strm->next_out - OFF; r0 is used as out from this point on
-
-	sub		r3, r3, #256			// next_out - 256
-	rsb		r1, r2, r1				// start - avail_out
-	sub		r3, r3, #(1+OFF)		// next_out-OFF-257 
-	add		r3, r3, r2				// r3 = end = avail_out + (next_out-OFF) - 257 = avail_out + out - 257
-	rsb		r2, r1, out				// r2 = beg = out - (start - avail_out);
-#if defined(_ARM_ARCH_5)
-	strd	r2,r3, beg_loc			// store beg/end
-	ldrd	r2,r3, state_wsize		// wsize/whave
-	strd	r2,r3, wsize_loc		// store wsize/whave
-	//ldrd	r6,hold, state_window	// window/hold, hold use r7
-	ldr		r6, state_window		// state->window
-	ldr		hold, state_hold		// state->hold
-	nop
-#else
-	// for architecture < armv5, ldrd/strd is not available
-	str		r2, beg_loc				// store beg
-	str		r3, end_loc				// store end
-	ldr		r2, state_wsize			// state->wsize
-	ldr		r3, state_whave			// state->whave
-	str		r2, wsize_loc			// store wsize
-	str		r3, whave_loc			// store whave
-	ldr		r6, state_window		// state->window
-	ldr		hold, state_hold		// state->hold
-#endif
-
-	ldr		ip, state_lencode		// lencode
-	mov		r3, #1					// used to derive lmask and dmask
-	ldr		write, state_write		// write (r9 from this point on) : window write index
-	nop
-	str		ip, [sp, #40]			// save lencode
-	sub		ip, r6, #1				// window-1
-	str		r6, window_loc			// store window
-	str		ip, windowm1_loc		// store window-1
-	ldr		r2, state_lenbits		// lenbits
-	ldr		bits, state_bits		// bits, use lr from this point on
-	ldr		distcode, state_distcode// distcode, use r8
-	mov		r2, r3, asl r2			// (1<<lensbits)
-	ldr		r12, state_distbits		// distbits
-	sub		r2, r2, #1				// lmask = (1U << state->lenbits) - 1;
-	mov		r3, r3, asl r12			// (1U << state->distbits)
-	sub		r3, r3, #1				// dmask = (1U << state->distbits) - 1;
-
-#if defined(_ARM_ARCH_5)
-	strd	r2, r3, lmask_loc		// store lmask/dmask
-#else
-	str		r2, lmask_loc			// lmask
-	str		r3, dmask_loc			// dmask
-#endif
-
-	// start the do loop decoding literals and length/distances 
-	// until end-of-block or not enough input data or output space
-
-do_loop:
-	cmp		bits, #15				// bits vs 15
-	ldr		r1, lmask_loc			// lmask
-	bge		bitsge15				// if bits >= 15, skip loading new 16 bits	
-
-	// this is a shortcut with the processor reads data in little-endian mode
-	ldrh	r3, [in,#1]					// read 2 bytes 
-	add		in, #2						// in pointer += 2
-	add		hold, hold, r3, asl bits	// deposit the new 2 bytes into hold
-	add		bits, #16					// bits count += 16
-
-bitsge15:
-	ldr		ip, [sp, #40]			// restore lencode
-	and		r3, hold, r1				// r3 = hold & lmask
-	b		dolen
-
-op_not_zero:
-
-	tst	r2, #16							// if (op&16)
-	bne	length_base						// 		branch to length_base
-
-	tst	r2, #64							// else if (op&64) 
-	bne	end_of_block					// 		branch to end_of_block processing 
-
-	// 2nd-level length code, this is the part where if ((op & 64) == 0) { ... }
-
-	// this.val + (hold & ((1U << op) - 1)); 
-	// r3 = r1 + hold & ((1<<r2)-1);
-
-	rsb		r12, r2, #32				// r12 = (32-op)
-	ror 	r3, hold, r2				// rotate the op least significant bits of hold to MSB
-	add		r3, r1, r3, lsr r12			// r3 = r1 + (op LSBs in hold) = r1 + hold & ((1<<r2)-1); 
-
-	ldr		ip, [sp, #40]			// restore lencode
-
-dolen:
-
-	// code -> 8-bit code, 8-bit bits, 16-bit val
-	ldrb	r2, [ip,r3,asl #2]		// op = (unsigned)(this.bits);
-	add		r3, ip, r3, asl #2		// r3 = this
-	ldrb	ip, [r3, #1]				// ip = this.bits
-	ldrh	r1, [r3, #2]				// r1 = this.value
-	cmp		r2, #0						// op == 0 ?
-
-	mov		hold, hold, lsr ip			// hold >>= this.bits
-	rsb		bits, ip, bits				// bits -= this.bits
-	bne		op_not_zero					// branch to op_not_zero if this.op != 0
-
-	strb	r1, [out, #1]!				// PUP(out) = (unsigned char)(this.val);
-
-do_loop_while:
-	ldr		r1, last_loc				// last
-	ldr		r2, end_loc					// end
-	cmp		in, r1						// compare in vs last 
-	cmpcc	out, r2						// if in < last, compare out vs end
-	bcc		do_loop						// if (in < last && out < end) go back to do_loop
-
-update_state_and_return:
-
-	sub		r2, in, bits, lsr #3		// r2 = in - (bits>>3)
-
-	add		r3, r2, #OFF				// r3 = (in - (bits>>3)) + OFF
-	str		r3, [strm, #0]				// strm->next_in = in + OFF;
-
-	add		r3, out, #OFF				// out + OFF
-	str		r3, [strm, #12]				// strm->next_out = out + OFF;
-
-	ldr		r3, last_loc				// r3 = last
-	ldr		ip, end_loc					// ip = end
-
-	cmp		r3, r2						// compare last vs in
-	addhi	r3, r3, #5					// if last > in, last +=5
-	movls	r6, r3						// o.w., r6 = last
-	rsbls	r3, r6, r2					//       r3 = in-last
-	rsbhi	r3, r2, r3					// r3 = (last+5) - in
-	rsbls	r3, r3, #5					// r3 = 5 - (in-last);
-	cmp		out, ip						// compare out vs end
-	str		r3, [strm, #4]				// strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
-	movcs	r2, ip						// if out<end, r2=end
-	addcc	r3, ip, #256				// if out>=end, r3 = end+256
-	rsbcs	r3, r2, out					// if out<end, r3 = out-end
-	addcc	r3, r3, #1					// if out>=end, r3 = end+257
-	rsbcs	r3, r3, #256				// if out<end, r3 = 256-(out-end) = 256 + (end-out)
-	and		bits, #7					// this is equivalent to bits -= (bits>>3) << 3;
-	rsbcc	r3, out, r3					// if out<end, r3 = 257+end-out
-	addcs	r3, r3, #1					// if out>=end, r3 = 257 + (end-out)
-	str		r3, [strm, #16]				// strm->avail_out = (unsigned)(out < end ?  257 + (end - out) : 257 - (out - end)); 
-
-	// hold &= (1U << bits) - 1;
-
-	rsb		ip, bits, #32				// 32-bits
-    ror 	hold, hold, bits			// this is equivalent to hold<<(32-bits)
-    lsr 	hold, hold, ip				// logical shift right by (32-bits), hold now only keeps the bits LSBs
-
-	str		bits, state_bits			// state->bits = bits;
-	str		hold, state_hold			// state->hold = hold;
-
-	add		sp, #local_size				// pop out stack memory
-	ldmfd	sp!,{r4-r6,r8-r11,pc}				// restore registers and return
-
-length_base:							// r2=op, r1=lmask
-	ands	r2, r2, #15					// op&=15;
-	mov		r6, r1						// len = (unsigned) this.val;
-	beq		op_is_zero					// if op==0, branch to op_is_zero
-	cmp		r2, bits					// op vs bits
-	ldrhib	r3, [in, #1]!				// if (op>bits) r3 = (PUP(in));
-	addhi	hold, hold, r3, asl bits	// if (op>bits) hold += (unsigned long)(PUP(in)) << bits;
-
-	rsb		ip, r2, #32					// 32-op
-    ror 	r3, hold, r2				// (hold<<(32-op))
-	add		r6, r1, r3, lsr ip			// len += (unsigned)hold & ((1U << op) - 1);
-
-	addhi	bits, bits, #8				// if (op>bits) bits += 8;
-
-	mov		hold, hold, lsr r2			// hold >>= op;
-	rsb		bits, r2, bits				// bits -= op;
-
-op_is_zero:
-	cmp		bits, #14
-	ldrh    r3,[in,#1]                  // if (bits < 15) { 2 (PUP(in));  no condition code for better performance
-    addls   in, #2                      // 	in+=2;
-    addls   hold, hold, r3, asl bits    // 	twice hold += (unsigned long)(PUP(in)) << bits;
-    addls   bits, #16                   // 	2 bits += 8; }
-
-dodist:
-
-	ldr		r2, dmask_loc				// r2 = dmask
-	and		r3, hold, r2				// r3 = hold & dmask
-	mov		r2, r3, asl #2
-	add		r3, r2, distcode			// &dcode[hold&dmask];
-	ldrb	ip, [r2, distcode]			// op
-	ldrh	r1, [r3, #2]				// dist = (unsigned)(this.val);
-	tst		ip, #16						// op vs 16
-	ldrb	r3, [r3, #1]				// this.bits
-	mov		hold, hold, lsr r3			// hold >>= this.bits;
-	rsb		bits, r3, bits				// bits -= this.bits;
-	bne		distance_base				// if (op&16) { distance base processing  }	
-	tst		ip, #64						// 
-	beq		second_distance_code		// else if ((op&64)==0) branch to 2nd level distance code
-
-	b		invalide_distance_code
-
-check_2nd_level_distance_code:
-
-	tst		r2, #64						// check for esle if ((op & 64) == 0) for 2nd level distance code
-	bne		invalide_distance_code
-
-second_distance_code:
-
-	rsb		r2, ip, #32					// 32-op
-	ror		r3, hold, ip				// hold<<(32-op)
-	add		r3, r1, r3, lsr r2			// this.val + (hold & ((1U << op) - 1))
-
-	mov		r2, r3, asl #2
-	add		r3, r2, distcode			// this = dcode[this.val + (hold & ((1U << op) - 1))];
-	ldrb	r2, [r2, distcode]			// this.op
-	ldrh	r1, [r3, #2]				// this.val
-
-	tst		r2, #16						// op&16
-	ldrb	r3, [r3, #1]				// this.bits
-	mov		ip, r2						// op
-	mov		hold, hold, lsr r3			// hold >> = this.bits
-	rsb		bits, r3, bits				// bits -= this.bits
-	beq		check_2nd_level_distance_code
-
-distance_base:			// this is invoked from if ((op&16)!=0)
-
-	and		r2, ip, #15					// op &= 15;
-	cmp		r2, bits					// op vs bits
-	ldrhib	r3, [in, #1]!				// if (op > bits) (PUP(in))
-	addhi	hold, hold, r3, asl bits	// 		hold += (unsigned long)(PUP(in)) << bits;
-	addhi	bits, bits, #8				//		bits += 8;	
-	cmphi	r2, bits					// 		internel (bits < op)
-	ldrhib	r3, [in, #1]!				//		if (op > bits) (PUP(in))
-	addhi	hold, hold, r3, asl bits	//			hold += (unsigned long)(PUP(in)) << bits;
-	addhi	bits, bits, #8				//			bits += 8
-
-	rsb		ip, r2, #32					// (32-op)
-	ror		r3, hold, r2				// hold<<(32-op)
-	add		r3, r1, r3, lsr ip			// dist += (unsigned)hold & ((1U << op) - 1);
-	str		r3, dist_loc				// save dist
-
-
-#ifdef INFLATE_STRICT
-	ldr     r1, state_dmax						// r1 = dmax
-	cmp		r3, r1								// dist vs dmax	
-	bgt		invalid_distance_too_far_back		// if dist > dmax, set up msg/mode = bad and break
-#endif
-
-	mov		hold, hold, lsr r2			// hold >>= op ;
-	rsb		bits, r2, bits				// bits -= op;
-
-	ldr		ip, beg_loc					// beg
-	ldr		r1, dist_loc				// dist
-	rsb		r3, ip, out					// (out - beg);
-
-	cmp		r1, r3						// dist vs (out - beg) 
-
-	rsbls	r2, r1, out					// if (dist<=op) r2 = from = out-dist
-	bls		copy_direct_from_output		// if (dist<=op) branch to copy_direct_from_output
-
-	ldr		r2, whave_loc					// whave
-	rsb		r1, r3, r1						// op = dist-op
-	cmp		r2, r1							// whave vs op
-	str		r1, op_loc						// save a copy of op
-	bcc		invalid_distance_too_far_back	// if whave < op,  message invalid distance too far back, and break
-
-	cmp		write, #0						// write
-	bne		non_very_common_case			// if (write ==0) non_very_common_case
-
-	// the following : if (write == 0) { /* very common case */ }
-	ldr		r1, op_loc						// restore op in r1
-	ldr		ip, wsize_loc					// wsize
-	cmp		r6, r1							// len vs op 
-	rsb		r3, r1, ip						// wsize - op
-	ldr		ip, windowm1_loc				// window - 1
-	add		r2, ip, r3						// from = window - 1 + wsize - op : setup for using PUP(from)
-	//movhi	r3, r1							// if len > op, r3 = op
-	//movhi	r1, out							// if len > op, r1 = out
-	bhi		some_from_window				// if (len > op), branch to some_from_window
-
-finish_copy:
-
-	//	while (len > 2) { 
-	//		PUP(out) = PUP(from); 
-	//		PUP(out) = PUP(from); 
-	//		PUP(out) = PUP(from); 
-	//		len -= 3; 
-	//	} 
-	//	if (len) { 
-	//		PUP(out) = PUP(from); 
-	//		if (len > 1) 
-	//		PUP(out) = PUP(from); 
-	//	}
-
-	cmp		r6, #2							// len > 2 ?
-	movls	r1, r6							// if (len<=2) r1 = len
-	bls		lenle2							// if (len<=2) branch to lenle2
-	mov		r1, r6
-fcopy_per3bytes:
-	ldrb	r3, [r2, #1]					// 1st PUP(from)
-	sub		r1, r1, #3						// len-=3
-	cmp		r1, #2							// len > 2 ?
-	strb	r3, [out, #1]					// 1st PUP(out) = PUP(from);
-	ldrb	r3, [r2, #2]					// 2nd PUP(from)
-	add		r2, r2, #3						// from+=3
-	strb	r3, [out, #2]					// 2nd PUP(out) = PUP(from);
-	ldrb	r3, [r2, #0]					// 3rd PUP(from)
-	add		out, out, #3					// out+=3
-	strb	r3, [out, #0]					// 3rd PUP(out) = PUP(from);
-	bgt		fcopy_per3bytes					// while (len>3) back to loop head	
-lenle2:
-	cmp		r1, #0							// len
-	beq		do_loop_while					// back to while loop head if len==0	
-	ldrb	r3, [r2, #1]					// PUP(from)
-	cmp		r1, #2							// check whether len==2
-	strb	r3, [out, #1]!					// PUP(out) = PUP(from);
-	bne		do_loop_while					// back to while loop head if len==1 
-	ldrb	r3, [r2, #2]					// 2nd PUP(from)
-	strb	r3, [out, #1]!					// 2nd PUP(out) = PUP(from);
-	b		do_loop_while					// back to while loop head
-
-end_of_block:
-	tst		r2, #32						// if (op&32)
-	movne	r3, #11						//   TYPE?
-	strne	r3, state_mode				// state-mode = TYPE
-	bne		update_state_and_return		// break the do loop and branch to get ready to return
-	ldr		r3, messages				// "invalid literal/length code" message
-L75:
-	add		r3, pc, r3
-	str		r3, [strm, #24]				// strm->msg = (char *)"invalid literal/length code";
-	mov		r3, #27						// BAD?
-	str		r3, state_mode				// state->mode = BAD;
-	b		update_state_and_return		// break the do loop and branch to get ready to return
-
-//Read_2_bytes:
-//	ldrh	r3,[in,#1]					// 2 (PUP(in)) together
-//	add		in, #2						// 2 in++
-//	add		hold, hold, r3, asl bits	// twice hold += (unsigned long)(PUP(in)) << bits;
-//	add		bits, #16					// 2 bits += 8;
-//	b		dodist						// branch to dodist 
-	nop									// a pad dummy instruction to give better performance
-
-copy_direct_from_output:				// r2 = from = out - dist ;
-
-										// do {
-	ldrb	r3, [r2, #1]				// 	1st PUP(from)
-	sub		r6, r6, #3					// 	len-=3
-	cmp		r6, #2						// 	len vs 2
-	strb	r3, [out, #1]				// 	1st PUP(out) = PUP(from);
-	ldrb	r3, [r2, #2]				// 	2nd PUP(from)
-	add		r2, r2, #3					// 	update from+=3
-	strb	r3, [out, #2]				// 	2nd PUP(out) = PUP(from);
-	ldrb	r3, [r2, #0]				// 	3rd PUP(from);
-	add		out, out, #3				// 	update out+=3
-	strb	r3, [out, #0]				// 	3rd PUP(out) = PUP(from);
-	bhi		copy_direct_from_output		// while (len>2);
-
-	// len in r6 can now be 0 1 or 2
-
-	subs    r6,#1						// len--;
-    ldrb    r3, [r2, #1]				// PUP(from)
-    blt     do_loop_while				// if len<0 back to while loop head
-    strb    r3, [out, #1]!				// PUP(out) = PUP(from);
-    subs    r6, #1						// len--;
-    ldrb    r3, [r2, #2]				// 2nd PUP(from)
-    blt     do_loop_while				// if len<0 back to while loop head
-    strb    r3, [out, #1]!				// 2nd PUP(out) = PUP(from);
-    b       do_loop_while				// back to while loop head
-
-
-invalide_distance_code:
-	ldr		r3, messages+4				// "invalid distance code"
-L72:
-	add		r3, pc, r3
-	str		r3, [strm, #24]				// strm->msg = (char *)"invalid distance code";
-	mov		r3, #27
-	str		r3, state_mode				// state->mode = BAD;
-	b		update_state_and_return		// break, restore registers, and return
-
-
-some_from_window:
-	ldr		r3, dist_loc				// dist
-	rsb		r6, r1, r6					// len -= op 
-some_from_window_loop:					// do {
-	ldrb	ip, [r2, #1]!				// 		PUP(from);
-	subs	r1, #1						//		--op	
-	strb	ip, [out, #1]!				//		PUP(out) = PUP(from);
-	bne		some_from_window_loop		// } while(op);
-	rsb		r2, r3, out					// from = out - dist;
-	b		finish_copy
-
-non_very_common_case:
-	ldr		r1, op_loc					// restore op in r1
-	cmp		write, r1					// write vs op
-	bcs		contiguous_in_window 		// if (write >= op) branch to contiguous_in_window
-
-	/* wrap around window */
-
-	ldr		r2, wsize_loc				// wsize
-	ldr		ip, windowm1_loc			// window-1
-	add		r3, write, r2				// r3 = wsize+write
-	rsb		r3, r1, r3					// r3 = wsize+write-op
-	add		r2, ip, r3					// r2 = from = wsize+write-op+window-1;
-	rsb		r1, write, r1				// op -= write;
-
-	cmp		r6, r1						// len vs op
-	bls		finish_copy					// if (len <= op) branch to finish_copy
-	rsb		r6, r1, r6					// len -= op
-waw_loop:								// do {
-	ldrb	r3, [r2, #1]!				// 	PUP(from)
-	subs	r1, r1, #1					//  --op; 
-	strb	r3, [out, #1]!				//  PUP(out) = PUP(from);
-	bne		waw_loop					// } while (op); 
-
-	cmp		write, r6					// write vs len
-	ldr		r2, windowm1_loc			// if (write>=len) r2 = from = window-1;
-	bcs		finish_copy					// if (write>=len) branch to finish_copy
-
-	// some from start of window
-
-	mov		r1, write				// op = write
-	sub		r6, write				// len -= op
-sow_loop:							// do { 
-	ldrb	r3,[r2, #1]!			// 	PUP(from)
-	subs	r1, #1					//  --op;
-	strb	r3, [out,#1]!			//  PUP(out) = PUP(from);
-	bne		sow_loop				// } while (op);
-
-	ldr		r2, dist_loc			// dist
-	rsb		r2, r2, out				// r2 = from = out-dist
-	b		finish_copy				// continue to finish_copy
-
-
-contiguous_in_window:
-	ldr		ip, windowm1_loc		// window-1
-	cmp		r6, r1					// len vs op
-	rsb		r3, r1, write			// r3 = write-op
-	add		r2, ip, r3				// r2 = from = window+write-op-1
-	bls		finish_copy				// if (len <= op) branch to finish_copy
-	rsb		r6, r1, r6				// len -= op 
-	ldr		r3, dist_loc			// dist
-ciw_loop:
-	ldrb	ip, [r2, #1]!			// PUP(from)
-	subs	r1, r1, #1				// op--
-	strb	ip, [out, #1]!			// PUP(out) = PUP(from);
-	bne		ciw_loop				// while (--op); 
-	rsb		r2, r3, out				// from = out - dist;
-	b		finish_copy
-
-invalid_distance_too_far_back:
-	ldr		r3, messages+8					// "invalid distance too far back"
-L42:
-	add		r3, pc, r3
-	str		r3, [strm, #24]					// strm->msg = (char *)"invalid distance too far back";
-	mov		r3, #27
-	str		r3, state_mode					// state->mode = BAD;
-	b		update_state_and_return			// break, restore registers, and return
-
-	.align 2
-messages:
-	.long	LC2-8-(L75)
-	.long	LC1-8-(L72)
-	.long	LC0-8-(L42)
-
-#endif // defined _ARM_ARCH_6
diff --git a/libkern/zlib/inffast.c b/libkern/zlib/inffast.c
index 54f0ee815..8be51094c 100644
--- a/libkern/zlib/inffast.c
+++ b/libkern/zlib/inffast.c
@@ -31,13 +31,14 @@
  */
 
 
-#if defined _ARM_ARCH_6
+#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6
 
-	// dummy definition, for armv6 or above, compile code from inffastS.s
-	typedef char DummyDefinition;
+	// dummy definition, for x86_64 or i386 or armv6 or up, compile code from inffastS.s
+    typedef char DummyDefinition;
 
 #else	// architecture
 
+
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
@@ -352,4 +353,4 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 
 #endif /* !ASMINF */
 
-#endif	// architecture
+#endif 	// architecture
diff --git a/libkern/zlib/intel/adler32vec.s b/libkern/zlib/intel/adler32vec.s
new file mode 100644
index 000000000..df9dcf328
--- /dev/null
+++ b/libkern/zlib/intel/adler32vec.s
@@ -0,0 +1,1050 @@
+/* Apple Copyright 2009
+   CoreOS - vector & Numerics, cclee 10-22-09
+
+	This following source code implements a vectorized version of adler32 computation that is defined in zlib.
+	The target architectures are x86_64 and i386.
+
+	Given 2 unsigned 32-bit alder and sum2 (both pre-modulo by BASE=65521) and a sequence of input bytes x[0],...x[N-1].
+	The adler-sum2 pair is updated according to
+
+		for (i=0;i<N;i++) {
+			adler = (adler+x[i])%BASE;
+			sum2 = (sum2+adler)%BASE;
+		}
+
+	To reduce/save the modulo operations, it can be shown that, if initial alder and sum2 are less than BASE(=65521),
+	adler and sum2 (in 32-bit representation), will never overflow for the next NMAX=5552 bytes. This simplifies the
+	algorithm to 
+
+		for (i=0;i<N;i+=NMAX) {
+			for (k=0;k<NMAX;k++) {
+				adler+=x[i+k];
+				sum2+=adler;
+			}
+			adler%=BASE;
+			sum2%=BASE;
+		}
+
+	The hand optimization of this function is now reduced to 
+
+			for (k=0;k<NMAX;k++) {
+                adler+=x[k];
+                sum2+=adler;
+            }
+
+	This subtask turns out to be very vecterizable. Suppose we perform the adler/sum2 update once per K bytes,
+
+			for (k=0;k<K;k++) {
+                adler+=x[k];
+                sum2+=adler;
+            }
+
+	It can be shown that the sum2-adler pair can be updated according to
+
+		sum2 += adler*K;
+		adler += (x[0] + x[1] + ... + x[K-1]); 
+		sum2 += (x[0]*K + x[1]*(K-1) + ... + x[K-1]*1);
+
+	The last 2 equations obviously show that the adler-sum2 pair update can be speeded up using vector processor.
+	The input vector [ x[0] x[1] ... x[K-1] ]. And we need two coefficient vectors
+		[ 1 1 1 ... 1 ] for adler update.
+		[ K K-1 ... 1 ] for sum2 update.
+
+	The implementation below reads vector (K=16,32,48,64) into xmm registers, and sets up coefficient vectors in xmm
+	registers. It then uses SSE instructions to perform the aforementioned vector computation.
+
+	For i386, NMAX/16 = 347, whenever possible (NMAX-bytes block), it calls 173 times of macro code DO32 (K=32),
+	followed by a single DO16 (K=16), before calling a modulo operation for adler and sum2.
+
+	For x86_64 (where more xmm registers are available), NMAX/64 = 86, whenever possible (NMAX-bytes block), 
+	it calls 86 times of macro code DO64 (K=64), followed by a single DO48 (K=48), 
+	before calling a modulo operation for adler and sum2.
+
+*/
+
+/* added cpu_capability to detect kHasSupplementalSSE3 to branch into code w or wo SupplementalSSE3
+
+	Previously, ssse3 code was intentionally turned off, because Yonah does not support ssse3
+	add code here to probe cpu_capabilities for ssse3 support
+		if ssse3 is supported, branch to ssse3-based code, otherwise use the original code
+
+	cclee 5-3-10
+*/
+
+#define BASE 65521  /* largest prime smaller than 65536 */
+#define NMAX 5552 	/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+// uLong	adler32_vec(unsigned int adler, unsigned int sum2, const Bytef *buf, int len) {
+//    unsigned n;
+//    while (len >= NMAX) {
+//        len -= NMAX;
+//        n = NMAX / 16;          /* NMAX is divisible by 16 */
+//        do {
+//            DO16(buf);          /* 16 sums unrolled */
+//            buf += 16;
+//        } while (--n);
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    if (len) {                  /* avoid modulos if none remaining */
+//        while (len >= 16) {
+//            len -= 16;
+//            DO16(buf);
+//            buf += 16;
+//        }
+//        while (len--) {
+//            adler += *buf++;
+//            sum2 += adler;
+//        }
+//        MOD(adler);
+//        MOD(sum2);
+//    }
+//    return adler | (sum2 << 16);
+// }
+
+#if (defined __i386__ || defined __x86_64__)
+
+#include <i386/cpu_capabilities.h>
+
+	.text
+	.align 4,0x90
+.globl _adler32_vec
+_adler32_vec:
+
+#if (defined __i386__)
+
+	pushl	%ebp
+	movl	%esp, %ebp
+
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+
+#ifdef	KERNEL 						// if this is for kernel, need to save xmm registers
+	subl	$140, %esp				// to save %xmm0-%xmm7 into stack, extra 12 to align %esp to 16-byte boundary
+	movaps	%xmm0, 0(%esp)		// save xmm0, offset -12 for ebx/edi/esi
+	movaps	%xmm1, 16(%esp)		// save xmm1
+	movaps	%xmm2, 32(%esp)		// save xmm2
+	movaps	%xmm3, 48(%esp)		// save xmm3
+	movaps	%xmm4, 64(%esp)		// save xmm4
+	movaps	%xmm5, 80(%esp)		// save xmm5
+	movaps	%xmm6, 96(%esp)		// save xmm6
+	movaps	%xmm7, 112(%esp)		// save xmm7, if this is for SSSE3 or above
+#endif
+
+	#define	adler	%edi				// 8(%ebp)
+	#define	sum2	%esi				// 12(%ebp)
+	#define	buf		%ecx				// 16(%ebp)
+	#define	len		%ebx				// 20(%ebp)
+	#define	zero	%xmm0
+	#define ones	%xmm5
+
+	movl	8(%ebp), adler
+	movl	12(%ebp), sum2
+	movl	16(%ebp), buf			// use ecx as buf pointer
+	movl	20(%ebp), len
+
+	.macro		modulo_BASE
+	movl		$$-2146992015, %eax		// 1/BASE in Q47
+	mull		adler					// edx:eax = adler divided by BASE in Q47
+	shrl		$$15, %edx				// edx is now the floor integer of adler and BASE
+	imull		$$BASE, %edx, %edx		// edx * BASE
+	subl		%edx, adler				// adler -= edx*BASE
+	movl		$$-2146992015, %eax		// 1/BASE in Q47
+	mull		sum2					// edx:eax = sum2 divided by BASE in Q47
+	shrl		$$15, %edx				// edx is now the floor integer of sum2 and BASE
+	imull		$$BASE, %edx, %eax		// eax = edx * BASE
+	subl		%eax, sum2				// sum2 -= sdx*BASE
+	.endmacro
+
+	// update adler/sum2 according to a new 16-byte vector
+	.macro		DO16
+	movaps		(buf), %xmm1			// 16 bytes vector, in xmm1
+	movaps		%xmm1, %xmm3			// a copy of the vector, used for unsigned byte in the destination of pmaddubsw
+	addl		$$16, buf				// buf -> next vector
+	psadbw		zero, %xmm1				// 2 16-bit words to be added for adler in xmm1
+	pmaddubsw	%xmm4, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	imull		$$16, adler, %edx		// edx = 16*adler;
+	movhlps		%xmm1, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+	paddq		%xmm2, %xmm1			// xmm1 lower 32-bit to be added to adler
+	addl		%edx, sum2				// sum2 += adler*16;
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm1, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addl		%edx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addl		%edx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addl		%edx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// update adler/sum2 according to a new 32-byte vector
+	.macro		DO32
+	imull		$$32, adler, %edx		// edx = 32*adler
+	movaps		(buf), %xmm1			// 1st 16 bytes vector
+	movaps		16(buf), %xmm7			// 2nd 16 bytes vector
+	movaps		%xmm1, %xmm3			// a copy of 1st vector, used for unsigned byte in the destination of pmaddubsw
+	movaps		%xmm7, %xmm2			// a copy of 2nd vector, used for unsigned byte in the destination of pmaddubsw
+	psadbw		zero, %xmm1				// 2 16-bit words to be added for adler in xmm1
+	psadbw		zero, %xmm7				// 2 16-bit words to be added for adler in xmm7
+	addl		%edx, sum2				// sum2 += adler*32;
+	pmaddubsw	%xmm6, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	pmaddubsw	%xmm4, %xmm2			// 8 16-bit words to be added for sum2 in xmm2
+	paddd		%xmm7, %xmm1			// 2 16-bit words to be added for adler in xmm1
+	paddd		%xmm2, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	addl		$$32, buf				// buf -> vector for next iteration
+	movhlps		%xmm1, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+	paddq		%xmm2, %xmm1			// xmm1 lower 32-bit to be added to adler
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm1, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addl		%edx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addl		%edx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addl		%edx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// this defines the macro DO16 for SSSE3 not supported
+    .macro      DO16_nossse3
+    movaps      (buf), %xmm1            // 16 bytes vector
+    movaps      %xmm1, %xmm3            // a copy of the vector, the lower 8 bytes to be shuffled into 8 words
+    movaps      %xmm1, %xmm2            // a copy of the vector, the higher 8 bytes to be shuffled into 8 words
+    psrldq      $$8, %xmm2              // shift down 8 bytes, to reuse the shuffle vector
+    punpcklbw   zero, %xmm3             // convert lower 8 bytes into 8 words
+    punpcklbw   zero, %xmm2             // convert higher 8 bytes into 8 words
+    pmullw      %xmm6, %xmm3            // lower 8 words * 16:9
+    pmullw      %xmm4, %xmm2            // higher 8 words * 8:1
+    addl        $$16, buf               // buf -> next vector
+    psadbw      zero, %xmm1             // 2 16-bit words to be added for adler in xmm1
+    paddw       %xmm2, %xmm3            // 8 16-bit words to be added for sum2 in xmm3
+    imull       $$16, adler, %edx       // edx = 16*adler;
+    movhlps     %xmm1, %xmm2            // higher 16-bit word (for adler) in xmm2   
+    pmaddwd     ones, %xmm3             // 4 32-bit elements to be added for sum2 in xmm3
+    paddq       %xmm2, %xmm1            // xmm1 lower 32-bit to be added to adler
+    addl        %edx, sum2              // sum2 += adler*16;
+    movhlps     %xmm3, %xmm2            // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+    movd        %xmm1, %edx             // to be added to adler
+    paddd       %xmm2, %xmm3            // 2 32-bits elements in xmm3 to be added to sum2
+    addl        %edx, adler             // update adler
+    movd        %xmm3, %edx             // to be added to sum2
+    psrlq       $$32, %xmm3             // another 32-bit to be added to sum2
+    addl        %edx, sum2              // sum2 += 1st half of update
+    movd        %xmm3, %edx             // to be added to sum2
+    addl        %edx, sum2              // sum2 += 2nd half of update
+    .endm
+
+#ifdef  KERNEL
+    leal    __cpu_capabilities, %eax                        // %eax -> __cpu_capabilities
+    mov     (%eax), %eax                                    // %eax = __cpu_capabilities
+#else
+    mov    _COMM_PAGE_CPU_CAPABILITIES, %eax
+#endif
+    test    $(kHasSupplementalSSE3), %eax 					// __cpu_capabilities & kHasAES
+	je		L_no_ssse3
+
+	// i386 adler32 with ssse3
+
+	// need to fill up xmm4/xmm5/xmm6 only if len>=16
+	cmpl	$16, len
+	jl		L_skip_loading_tables
+
+	// set up table starting address to %eax
+	leal	sum2_coefficients, %eax
+
+	// reading coefficients
+	pxor	zero, zero
+	movaps	(%eax), %xmm6			// coefficients for computing sum2 : pmaddubsw 32:17
+	movaps	16(%eax), %xmm4			// coefficients for computing sum2 : pmaddubsw 16:1
+	movaps	32(%eax), ones			// coefficients for computing sum2 : pmaddwd 1,1,...,1
+
+L_skip_loading_tables:
+
+	cmpl	$NMAX, len				// len vs NMAX
+	jl		len_lessthan_NMAX		// if (len < NMAX), skip the following NMAX batches processing
+
+len_ge_NMAX_loop:					// while (len>=NMAX) {
+
+	subl	$NMAX, len				// 		len -= NMAX
+	movl	$(NMAX/32), %eax		// 		n = NMAX/32
+
+n_loop:								// 		do {
+	DO32							// 			update adler/sum2 for a 32-byte input
+	decl 	%eax					// 			n--;
+	jg		n_loop					//  	} while (n);
+	DO16							//  	update adler/sum2 for a 16-byte input
+	modulo_BASE						// 		(adler/sum2) modulo BASE;
+	cmpl	$NMAX, len				//  
+	jge		len_ge_NMAX_loop		// }	/* len>=NMAX */
+
+len_lessthan_NMAX:
+
+	subl	$32, len				// pre-decrement len by 32
+	jl		len_lessthan_32			// if len < 32, skip the 32-vector code
+len32_loop:							// while (len>=32) {
+	DO32							//   update adler/sum2 for a 32-byte input
+	subl	$32, len				//   len -= 32;
+	jge		len32_loop				// } 
+
+len_lessthan_32:
+
+	addl	$(32-16), len			// post-increment by 32 + pre-decrement by 16 on len
+	jl		L_len_lessthan_16			// if len < 16, skip the 16-vector code
+	DO16							// update adler/sum2 for a 16-byte input
+	subl	$16, len				// len -= 16;
+
+L_len_lessthan_16:
+	addl	$16, len				// post-increment len by 16
+	jz		len_is_zero				// if len==0, branch over scalar processing
+
+0:									// while (len) {
+	movzbl	(buf), %edx				// 	new input byte
+	incl	buf						// 	buf++
+	addl	%edx, adler				// 	adler += *buf
+	addl	adler, sum2				// 	sum2 += adler
+	subl	$1, len					// 	len--
+	jg		0b						// }
+
+len_is_zero:
+
+	modulo_BASE						// (adler/sum2) modulo BASE;
+
+	// construct 32-bit (sum2<<16 | adler) to be returned
+
+	sall	$16, sum2				// sum2 <<16
+	movl	adler, %eax				// adler		
+	orl		sum2, %eax				// sum2<<16 | adler
+
+
+#ifdef	KERNEL 					// if this is for kernel code, need to restore xmm registers
+	movaps	(%esp), %xmm0		// restore xmm0, offset -12 for ebx/edi/esi
+	movaps	16(%esp), %xmm1		// restore xmm1
+	movaps	32(%esp), %xmm2		// restore xmm2
+	movaps	48(%esp), %xmm3		// restore xmm3
+	movaps	64(%esp), %xmm4		// restore xmm4
+	movaps	80(%esp), %xmm5		// restore xmm5
+	movaps	96(%esp), %xmm6		// restore xmm6
+	movaps	112(%esp), %xmm7	// restore xmm7, if this is for SSSE3 or above
+	addl	$140, %esp			// we've already restored %xmm0-%xmm7 from stack
+#endif
+
+    popl   %esi
+    popl   %edi
+	popl   %ebx
+	leave						// pop ebp out from stack
+	ret
+
+
+L_no_ssse3:
+
+	// i386 adler32 without ssse3
+
+	// need to fill up xmm4/xmm5/xmm6 only if len>=16
+	cmpl	$16, len
+	jl		2f
+
+	// set up table starting address to %eax
+	leal	sum2_coefficients, %eax
+
+	// reading coefficients
+	pxor	zero, zero
+	movaps  48(%eax), %xmm6         // coefficients for computing sum2 : pmaddubsw 16:9
+    movaps  64(%eax), %xmm4         // coefficients for computing sum2 : pmaddubsw 8:1
+    movaps  80(%eax), ones          // coefficients for computing sum2 : pmaddwd 1,1,...,1
+
+2:
+
+	cmpl	$NMAX, len				// len vs NMAX
+	jl		3f						// if (len < NMAX), skip the following NMAX batches processing
+
+0:									// while (len>=NMAX) {
+
+	subl	$NMAX, len				// 		len -= NMAX
+	movl	$(NMAX/16), %eax		// 		n = NMAX/16
+
+1:									// 		do {
+	DO16_nossse3					//			update adler/sum2 for a 16-byte input
+	decl 	%eax					// 			n--;
+	jg		1b						//  	} while (n);
+
+	modulo_BASE						// 		(adler/sum2) modulo BASE;
+
+	cmpl	$NMAX, len				//  
+	jge		0b						// }	/* len>=NMAX */
+
+3:
+
+	subl	$16, len				// pre-decrement len by 16
+	jl		L_len_lessthan_16		// if len < 16, skip the 16-vector code
+	DO16_nossse3					// update adler/sum2 for a 16-byte input
+	subl	$16, len				// len -= 16;
+	jmp		L_len_lessthan_16
+
+
+	.const
+	.align	4
+sum2_coefficients:	// used for vectorizing adler32 computation
+
+	.byte	32
+	.byte	31
+	.byte	30
+	.byte	29
+	.byte	28
+	.byte	27
+	.byte	26
+	.byte	25
+	.byte	24
+	.byte	23
+	.byte	22
+	.byte	21
+	.byte	20
+	.byte	19
+	.byte	18
+	.byte	17
+	.byte	16
+	.byte	15
+	.byte	14
+	.byte	13
+	.byte	12
+	.byte	11
+	.byte	10
+	.byte	9
+	.byte	8
+	.byte	7
+	.byte	6
+	.byte	5
+	.byte	4
+	.byte	3
+	.byte	2
+	.byte	1
+
+	// coefficients for pmaddwd, to combine into 4 32-bit elements for sum2
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+
+
+	// data for without ssse3
+
+	.word   16
+    .word   15
+    .word   14
+    .word   13
+    .word   12
+    .word   11
+    .word   10
+    .word   9
+    .word   8
+    .word   7
+    .word   6
+    .word   5
+    .word   4
+    .word   3
+    .word   2
+    .word   1
+
+	// coefficients for pmaddwd, to combine into 4 32-bit elements for sum2
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+
+#else	// (defined __x86_64__)
+
+	movq    __cpu_capabilities@GOTPCREL(%rip), %rax         // %rax -> __cpu_capabilities
+	mov     (%rax), %eax                                    // %eax = __cpu_capabilities
+	test    $(kHasSupplementalSSE3), %eax                   // __cpu_capabilities & kHasSupplementalSSE3
+    jne      L_has_ssse3
+
+	// ----------------------------------------------------------------------------------
+	// the following is added for x86_64 without SSSE3 support
+	// it is essentially a translated copy of the i386 code without SSSE3 code
+	// ----------------------------------------------------------------------------------
+
+	// input :
+	//		 adler : rdi
+	//		 sum2  : rsi
+	// 		 buf   : rdx
+	//		 len   : rcx
+
+	pushq	%rbp
+	movq	%rsp, %rbp
+	pushq	%rbx
+
+#ifdef	KERNEL			// if for kernel, save %xmm0-%xmm11
+	subq	$200, %rsp	// allocate for %xmm0-%xmm11 (192 bytes), extra 8 to align %rsp to 16-byte boundary
+	movaps	%xmm0, -32(%rbp)
+	movaps	%xmm1, -48(%rbp)
+	movaps	%xmm2, -64(%rbp)
+	movaps	%xmm3, -80(%rbp)
+	movaps	%xmm4, -96(%rbp)
+	movaps	%xmm5, -112(%rbp)
+	movaps	%xmm6, -128(%rbp)
+#endif
+
+	#define	adler	%rdi				// 16(%rbp)
+	#define	sum2	%rsi				// 24(%ebp)
+	#define	buf		%rcx				// 32(%ebp)
+	#define	len		%rbx				// 40(%ebp)
+	#define	zero	%xmm0
+	#define ones	%xmm5
+
+	movq	%rcx, len
+	movq	%rdx, buf
+
+	.macro		modulo_BASE
+	movl		$$-2146992015, %eax		// 1/BASE in Q47
+	mull		%edi					// edx:eax = adler divided by BASE in Q47
+	shrl		$$15, %edx				// edx is now the floor integer of adler and BASE
+	imull		$$BASE, %edx, %edx		// edx * BASE
+	subq		%rdx, adler				// adler -= edx*BASE
+	movl		$$-2146992015, %eax		// 1/BASE in Q47
+	mull		%esi					// edx:eax = sum2 divided by BASE in Q47
+	shrl		$$15, %edx				// edx is now the floor integer of sum2 and BASE
+	imull		$$BASE, %edx, %eax		// eax = edx * BASE
+	subq		%rax, sum2				// sum2 -= sdx*BASE
+	.endmacro
+
+	// update adler/sum2 according to a new 16-byte vector, no ssse3
+	.macro		DO16_nossse3
+    movaps      (buf), %xmm1            // 16 bytes vector
+    movaps      %xmm1, %xmm3            // a copy of the vector, the lower 8 bytes to be shuffled into 8 words
+    movaps      %xmm1, %xmm2            // a copy of the vector, the higher 8 bytes to be shuffled into 8 words
+    psrldq      $$8, %xmm2              // shift down 8 bytes, to reuse the shuffle vector
+    punpcklbw   zero, %xmm3             // convert lower 8 bytes into 8 words
+    punpcklbw   zero, %xmm2             // convert higher 8 bytes into 8 words
+    pmullw      %xmm6, %xmm3            // lower 8 words * 16:9
+    pmullw      %xmm4, %xmm2            // higher 8 words * 8:1
+    add	        $$16, buf               // buf -> next vector
+    psadbw      zero, %xmm1             // 2 16-bit words to be added for adler in xmm1
+    paddw       %xmm2, %xmm3            // 8 16-bit words to be added for sum2 in xmm3
+    imulq       $$16, adler, %rdx       // edx = 16*adler;
+    movhlps     %xmm1, %xmm2            // higher 16-bit word (for adler) in xmm2   
+    pmaddwd     ones, %xmm3             // 4 32-bit elements to be added for sum2 in xmm3
+    paddq       %xmm2, %xmm1            // xmm1 lower 32-bit to be added to adler
+    add         %rdx, sum2              // sum2 += adler*16;
+    movhlps     %xmm3, %xmm2            // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+    movd        %xmm1, %edx             // to be added to adler
+    paddd       %xmm2, %xmm3            // 2 32-bits elements in xmm3 to be added to sum2
+    addq        %rdx, adler             // update adler
+    movd        %xmm3, %edx             // to be added to sum2
+    psrlq       $$32, %xmm3             // another 32-bit to be added to sum2
+    addq        %rdx, sum2              // sum2 += 1st half of update
+    movd        %xmm3, %edx             // to be added to sum2
+    addq        %rdx, sum2              // sum2 += 2nd half of update
+	.endm
+
+	// need to fill up xmm4/xmm5/xmm6 only if len>=16
+	cmpq	$16, len
+	jl		0f
+
+	// set up table starting address to %eax
+	leaq    sum2_coefficients_nossse3(%rip), %rax
+
+	// reading coefficients
+	pxor	zero, zero
+	movaps  (%rax), %xmm6           // coefficients for computing sum2 : pmaddubsw 16:9
+    movaps  16(%rax), %xmm4         // coefficients for computing sum2 : pmaddubsw 8:1
+    movaps  32(%rax), ones          // coefficients for computing sum2 : pmaddwd 1,1,...,1
+0:
+
+	cmp		$NMAX, len				// len vs NMAX
+	jl		3f						// if (len < NMAX), skip the following NMAX batches processing
+
+0:									// while (len>=NMAX) {
+
+	sub		$NMAX, len				// 		len -= NMAX
+	mov		$(NMAX/16), %eax		// 		n = NMAX/16
+
+1:									// 		do {
+	DO16_nossse3					//			update adler/sum2 for a 16-byte input
+	decl 	%eax					// 			n--;
+	jg		1b						//  	} while (n);
+
+	modulo_BASE						// 		(adler/sum2) modulo BASE;
+
+	cmp		$NMAX, len				//  
+	jge		0b						// }	/* len>=NMAX */
+
+3:
+
+	sub		$16, len				// pre-decrement len by 16
+	jl		2f						// if len < 16, skip the 16-vector code
+	DO16_nossse3					// update adler/sum2 for a 16-byte input
+	sub		$16, len				// len -= 16;
+
+2:
+	add		$16, len				// post-increment len by 16
+	jz		1f						// if len==0, branch over scalar processing
+
+0:									// while (len) {
+	movzbq	(buf), %rdx				// 	new input byte
+	incq	buf						// 	buf++
+	addq	%rdx, adler				// 	adler += *buf
+	addq	adler, sum2				// 	sum2 += adler
+	decq	len						// 	len--
+	jg		0b						// }
+
+1:
+
+	modulo_BASE						// (adler/sum2) modulo BASE;
+
+	// construct 32-bit (sum2<<16 | adler) to be returned
+
+	salq	$16, sum2				// sum2 <<16
+	movq	adler, %rax				// adler		
+	orq		sum2, %rax				// sum2<<16 | adler
+
+#ifdef	KERNEL 					// if this is for kernel code, need to restore xmm registers
+	movaps	-32(%rbp), %xmm0
+	movaps	-48(%rbp), %xmm1
+	movaps	-64(%rbp), %xmm2
+	movaps	-80(%rbp), %xmm3
+	movaps	-96(%rbp), %xmm4
+	movaps	-112(%rbp), %xmm5
+	movaps	-128(%rbp), %xmm6
+	addq	$200, %rsp	// we've already restored %xmm0-%xmm11 from stack
+#endif
+
+	popq   %rbx
+	leave
+	ret
+
+
+
+	.const
+	.align	4
+sum2_coefficients_nossse3:	// used for vectorizing adler32 computation
+
+	// data for without ssse3
+
+	.word   16
+    .word   15
+    .word   14
+    .word   13
+    .word   12
+    .word   11
+    .word   10
+    .word   9
+    .word   8
+    .word   7
+    .word   6
+    .word   5
+    .word   4
+    .word   3
+    .word   2
+    .word   1
+
+	// coefficients for pmaddwd, to combine into 4 32-bit elements for sum2
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+
+
+	.text
+
+	// ----------------------------------------------------------------------------------
+	// the following is the original x86_64 adler32_vec code that uses SSSE3 instructions
+	// ----------------------------------------------------------------------------------
+
+L_has_ssse3:
+
+	// input :
+	//		 adler : rdi
+	//		 sum2  : rsi
+	// 		 buf   : rdx
+	//		 len   : rcx
+
+	pushq	%rbp
+	movq	%rsp, %rbp
+	pushq	%rbx
+
+#ifdef	KERNEL			// if for kernel, save %xmm0-%xmm11
+	subq	$200, %rsp	// allocate for %xmm0-%xmm11 (192 bytes), extra 8 to align %rsp to 16-byte boundary
+	movaps	%xmm0, -32(%rbp)
+	movaps	%xmm1, -48(%rbp)
+	movaps	%xmm2, -64(%rbp)
+	movaps	%xmm3, -80(%rbp)
+	movaps	%xmm4, -96(%rbp)
+	movaps	%xmm5, -112(%rbp)
+	movaps	%xmm6, -128(%rbp)
+	movaps	%xmm7, -144(%rbp)
+	movaps	%xmm8, -160(%rbp)
+	movaps	%xmm9, -176(%rbp)
+	movaps	%xmm10, -192(%rbp)
+	movaps	%xmm11, -208(%rbp)
+#endif
+
+	#define	adler	%rdi				// 16(%rbp)
+	#define	sum2	%rsi				// 24(%ebp)
+	#define	buf		%rcx				// 32(%ebp)
+	#define	len		%rbx				// 40(%ebp)
+	#define	zero	%xmm0
+	#define ones	%xmm5
+
+	movq	%rcx, len
+	movq	%rdx, buf
+
+	// update adler/sum2 according to a new 16-byte vector
+	.macro		DO16
+	movaps		(buf), %xmm1			// 16 bytes vector
+	movaps		%xmm1, %xmm3			// a copy of the vector, used for unsigned byte in the destination of pmaddubsw
+	addq		$$16, buf				// buf -> next vector
+	psadbw		zero, %xmm1				// 2 16-bit words to be added for adler in xmm1
+	pmaddubsw	%xmm4, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	imulq		$$16, adler, %rdx		// edx = 16*adler;
+	movhlps		%xmm1, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+	paddq		%xmm2, %xmm1			// xmm1 lower 32-bit to be added to adler
+	addq		%rdx, sum2				// sum2 += adler*16;
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm1, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addq		%rdx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addq		%rdx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addq		%rdx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// update adler/sum2 according to a new 32-byte vector
+	.macro		DO32
+	imulq		$$32, adler, %rdx		// edx = 32*adler
+	movaps		(buf), %xmm1			// 1st 16 bytes vector
+	movaps		16(buf), %xmm7			// 2nd 16 bytes vector
+	movaps		%xmm1, %xmm3			// a copy of 1st vector, used for unsigned byte in the destination of pmaddubsw
+	movaps		%xmm7, %xmm2			// a copy of 2nd vector, used for unsigned byte in the destination of pmaddubsw
+	psadbw		zero, %xmm1				// 2 16-bit words to be added for adler in xmm1
+	psadbw		zero, %xmm7				// 2 16-bit words to be added for adler in xmm7
+	addq		%rdx, sum2				// sum2 += adler*32;
+	pmaddubsw	%xmm6, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	pmaddubsw	%xmm4, %xmm2			// 8 16-bit words to be added for sum2 in xmm2
+	paddd		%xmm7, %xmm1			// 2 16-bit words to be added for adler in xmm1
+	paddw		%xmm2, %xmm3			// 8 16-bit words to be added for sum2 in xmm3
+	addq		$$32, buf				// buf -> vector for next iteration
+	movhlps		%xmm1, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+	paddq		%xmm2, %xmm1			// xmm1 lower 32-bit to be added to adler
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm1, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addq		%rdx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addq		%rdx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addq		%rdx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// update adler/sum2 according to a new 48-byte vector
+
+	.macro		DO48
+	imulq		$$48, adler, %rdx		// edx = 48*adler
+
+	movaps		(buf), %xmm7			// 1st 16 bytes vector
+	movaps		16(buf), %xmm10			// 2nd 16 bytes vector
+	movaps		32(buf), %xmm11			// 3rd 16 bytes vector
+
+	movaps		%xmm7, %xmm1			// 1st vector
+	movaps		%xmm10, %xmm2			// 2nd vector
+	movaps		%xmm11, %xmm3			// 3rd vector
+
+	psadbw		zero, %xmm7				// 1st vector for adler
+	psadbw		zero, %xmm10			// 2nd vector for adler
+	psadbw		zero, %xmm11			// 3rd vector for adler
+
+	addq		%rdx, sum2				// sum2 += adler*48;
+
+	pmaddubsw	%xmm9, %xmm1			// 8 16-bit words to be added for sum2 : 1st vector
+	pmaddubsw	%xmm6, %xmm2			// 8 16-bit words to be added for sum2 : 2nd vector
+	pmaddubsw	%xmm4, %xmm3			// 8 16-bit words to be added for sum2 : 3rd vector
+
+	pmaddwd		ones, %xmm1				// 4 32-bit elements to be added for sum2 in xmm1
+	pmaddwd		ones, %xmm2				// 4 32-bit elements to be added for sum2 in xmm2
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+
+	paddd		%xmm10, %xmm7			// 2 16-bit words to be added for adler 
+	paddd		%xmm11, %xmm7			// 2 16-bit words to be added for adler
+
+	paddd		%xmm1, %xmm3			// 4 32-bit elements to be added for sum2
+	paddd		%xmm2, %xmm3			// 4 32-bit elements to be added for sum2
+
+	addq		$$48, buf				// buf -> vector for next iteration
+
+	movhlps		%xmm7, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	paddq		%xmm2, %xmm7			// xmm7 lower 32-bit to be added to adler
+
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm7, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addq		%rdx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addq		%rdx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addq		%rdx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// update adler/sum2 according to a new 64-byte vector
+	.macro		DO64
+	imulq		$$64, adler, %rdx		// edx = 64*adler
+
+	movaps		(buf), %xmm1			// 1st 16 bytes vector
+	movaps		16(buf), %xmm7			// 2nd 16 bytes vector
+	movaps		32(buf), %xmm10			// 3rd 16 bytes vector
+	movaps		48(buf), %xmm11			// 4th 16 bytes vector
+
+	movaps		%xmm1, %xmm3			// 1st vector
+	movaps		%xmm11, %xmm2			// 4th vector
+	psadbw		zero, %xmm1				// 1st vector for adler
+	psadbw		zero, %xmm11			// 4th vector for adler
+
+	addq		%rdx, sum2				// sum2 += adler*64;
+
+	pmaddubsw	%xmm8, %xmm3			// 8 16-bit words to be added for sum2 : 1st vector
+	pmaddubsw	%xmm4, %xmm2			// 8 16-bit words to be added for sum2 : 4th vector
+	pmaddwd		ones, %xmm3				// 4 32-bit elements to be added for sum2 in xmm3
+	pmaddwd		ones, %xmm2				// 4 32-bit elements to be added for sum2 in xmm2
+
+	paddd		%xmm11, %xmm1			// 2 16-bit words to be added for adler in xmm1
+	paddd		%xmm2, %xmm3			// 4 32-bit elements to be added for sum2 in xmm3 
+
+	movaps		%xmm7, %xmm2			// 2nd vector
+	movaps		%xmm10, %xmm11			// 3rd vector
+
+	psadbw		zero, %xmm7				// 2nd vector for adler
+	psadbw		zero, %xmm10			// 3rd vector for adler
+
+	pmaddubsw	%xmm9, %xmm2			// 8 16-bit words to be added for sum2 : 2nd vector
+	pmaddubsw	%xmm6, %xmm11			// 8 16-bit words to be added for sum2 : 3rd vector 
+	pmaddwd		ones, %xmm2				// 4 32-bit elements to be added for sum2 in xmm2
+	pmaddwd		ones, %xmm11			// 4 32-bit elements to be added for sum2 in xmm11
+
+	paddd		%xmm7, %xmm1			// 2 16-bit words to be added for adler in xmm1
+	paddd		%xmm10, %xmm1			// 2 16-bit words to be added for adler in xmm1
+
+	paddd		%xmm2, %xmm3			// 4 32-bit elements to be added for sum2 in xmm3
+	paddd		%xmm11, %xmm3			// 4 32-bit elements to be added for sum2 in xmm3
+
+	addq		$$64, buf				// buf -> vector for next iteration
+
+	movhlps		%xmm1, %xmm2			// higher 16-bit word (for adler) in xmm2 	
+	paddq		%xmm2, %xmm1			// xmm1 lower 32-bit to be added to adler
+	movhlps		%xmm3, %xmm2			// 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements
+	movd		%xmm1, %edx				// to be added to adler
+	paddd		%xmm2, %xmm3			// 2 32-bits elements in xmm3 to be added to sum2
+	addq		%rdx, adler				// update adler
+	movd		%xmm3, %edx				// to be added to sum2
+	psrlq		$$32, %xmm3				// another 32-bit to be added to sum2
+	addq		%rdx, sum2				// sum2 += 1st half of update
+	movd		%xmm3, %edx				// to be added to sum2
+	addq		%rdx, sum2				// sum2 += 2nd half of update
+	.endm
+
+	// need to fill up xmm4/xmm5/xmm6 only if len>=16
+	cmpq	$16, len
+	jl		skip_loading_tables
+
+	// set up table starting address to %eax
+	leaq    sum2_coefficients(%rip), %rax
+
+	// reading coefficients
+	pxor	zero, zero
+	movaps	(%rax), %xmm8			// coefficients for computing sum2 : pmaddubsw 64:49
+	movaps	16(%rax), %xmm9			// coefficients for computing sum2 : pmaddubsw 48:33
+	movaps	32(%rax), %xmm6			// coefficients for computing sum2 : pmaddubsw 32:17
+	movaps	48(%rax), %xmm4			// coefficients for computing sum2 : pmaddubsw 16:1
+	movaps	64(%rax), ones			// coefficients for computing sum2 : pmaddwd 1,1,...,1
+
+skip_loading_tables:
+
+
+	cmpq	$NMAX, len				// len vs NMAX
+	jl		len_lessthan_NMAX		// if (len < NMAX), skip the following NMAX batches processing
+
+len_ge_NMAX_loop:					// while (len>=NMAX) {
+
+	subq	$NMAX, len				// 		len -= NMAX
+	movq	$(NMAX/64), %rax		// 		n = NMAX/64
+
+n_loop:								// 		do {
+	DO64							// 			update adler/sum2 for a 64-byte input
+	decq 	%rax					// 			n--;
+	jg		n_loop					//  	} while (n);
+
+	DO48							//		update adler/sum2 for a 48-byte input
+
+	modulo_BASE						// 		(adler/sum2) modulo BASE;
+
+	cmpq	$NMAX, len				//  
+	jge		len_ge_NMAX_loop		// }	/* len>=NMAX */
+
+len_lessthan_NMAX:
+
+	subq	$64, len				// pre-decrement len by 64
+	jl		len_lessthan_64			// if len < 64, skip the 64-vector code
+len64_loop:							// while (len>=64) {
+	DO64							//   update adler/sum2 for a 64-byte input
+	subq	$64, len				//   len -= 64;
+	jge		len64_loop				// } 
+
+len_lessthan_64:
+	addq	$(64-32), len			// post-increment 64 + pre-decrement 32 of len
+	jl		len_lessthan_32			// if len < 32, skip the 32-vector code
+	DO32							//   update adler/sum2 for a 32-byte input
+	subq	$32, len				//   len -= 32;
+
+len_lessthan_32:
+
+	addq	$(32-16), len			// post-increment by 32 + pre-decrement by 16 on len
+	jl		len_lessthan_16			// if len < 16, skip the 16-vector code
+	DO16							// update adler/sum2 for a 16-byte input
+	subq	$16, len				// len -= 16;
+
+len_lessthan_16:
+	addq	$16, len				// post-increment len by 16
+	jz		len_is_zero				// if len==0, branch over scalar processing
+
+scalar_loop:						// while (len) {
+	movzbq	(buf), %rdx				// 	new input byte
+	incq	buf						// 	buf++
+	addq	%rdx, adler				// 	adler += *buf
+	addq	adler, sum2				// 	sum2 += adler
+	decq	len						// 	len--
+	jg		scalar_loop				// }
+
+len_is_zero:
+
+	modulo_BASE						// (adler/sum2) modulo BASE;
+
+	// construct 32-bit (sum2<<16 | adler) to be returned
+
+	salq	$16, sum2				// sum2 <<16
+	movq	adler, %rax				// adler		
+	orq		sum2, %rax				// sum2<<16 | adler
+
+
+#ifdef	KERNEL			// if for kernel, restore %xmm0-%xmm11
+	movaps	-32(%rbp), %xmm0
+	movaps	-48(%rbp), %xmm1
+	movaps	-64(%rbp), %xmm2
+	movaps	-80(%rbp), %xmm3
+	movaps	-96(%rbp), %xmm4
+	movaps	-112(%rbp), %xmm5
+	movaps	-128(%rbp), %xmm6
+	movaps	-144(%rbp), %xmm7
+	movaps	-160(%rbp), %xmm8
+	movaps	-176(%rbp), %xmm9
+	movaps	-192(%rbp), %xmm10
+	movaps	-208(%rbp), %xmm11
+	addq	$200, %rsp	// we've already restored %xmm0-%xmm11 from stack
+#endif
+
+	popq   %rbx
+	leave							// pop ebp out from stack
+	ret
+
+
+	.const
+	.align	4
+sum2_coefficients:	// used for vectorizing adler32 computation
+
+	// coefficients for pmaddubsw instruction, used to generate 16-bit elements for sum2
+
+	.byte	64
+	.byte	63
+	.byte	62
+	.byte	61
+	.byte	60
+	.byte	59
+	.byte	58
+	.byte	57
+	.byte	56
+	.byte	55
+	.byte	54
+	.byte	53
+	.byte	52
+	.byte	51
+	.byte	50
+	.byte	49
+	.byte	48
+	.byte	47
+	.byte	46
+	.byte	45
+	.byte	44
+	.byte	43
+	.byte	42
+	.byte	41
+	.byte	40
+	.byte	39
+	.byte	38
+	.byte	37
+	.byte	36
+	.byte	35
+	.byte	34
+	.byte	33
+	.byte	32
+	.byte	31
+	.byte	30
+	.byte	29
+	.byte	28
+	.byte	27
+	.byte	26
+	.byte	25
+	.byte	24
+	.byte	23
+	.byte	22
+	.byte	21
+	.byte	20
+	.byte	19
+	.byte	18
+	.byte	17
+	.byte	16
+	.byte	15
+	.byte	14
+	.byte	13
+	.byte	12
+	.byte	11
+	.byte	10
+	.byte	9
+	.byte	8
+	.byte	7
+	.byte	6
+	.byte	5
+	.byte	4
+	.byte	3
+	.byte	2
+	.byte	1
+
+	// coefficients for pmaddwd, to combine into 4 32-bit elements for sum2
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+	.word	1
+
+#endif	// (defined __i386__)
+
+#endif	// (defined __i386__ || defined __x86_64__)
diff --git a/libkern/zlib/intel/inffastS.s b/libkern/zlib/intel/inffastS.s
new file mode 100644
index 000000000..4252121bf
--- /dev/null
+++ b/libkern/zlib/intel/inffastS.s
@@ -0,0 +1,1179 @@
+#if (defined __i386__)
+
+/* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */
+
+	.cstring
+LC0:
+	.ascii "invalid distance too far back\0"
+LC1:
+	.ascii "invalid distance code\0"
+LC2:
+	.ascii "invalid literal/length code\0"
+	.text
+	.align 4,0x90
+
+
+#ifdef  INFLATE_STRICT
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+#endif
+.globl _inflate_fast
+_inflate_fast:
+
+	// set up ebp to refer to arguments strm and start
+	pushl	%ebp
+	movl	%esp, %ebp
+
+	// push edi/esi/ebx into stack
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	// allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary
+	subl	$92, %esp
+	movl	8(%ebp), %ebx					
+
+	/* definitions to help code readability */
+
+	#define	bits	%edi
+	#define	strm	%ebx
+	#define	state	28(strm)		// state = (struct inflate_state FAR *)strm->state;	
+	#define	in		-84(%ebp)		// in = strm->next_in - OFF; OFF=0
+	#define	last	-80(%ebp)		// last = in + (strm->avail_in - 5);
+	#define	out		-28(%ebp)		// out = strm->next_out - OFF;
+	#define	beg		-76(%ebp)		// beg = out - (start - strm->avail_out);
+	#define	end		-72(%ebp)		// end = out + (strm->avail_out - 257);
+	#define	wsize	-68(%ebp)		// wsize = state->wsize;
+	#define whave	-64(%ebp)		// whave = state->whave;
+	#define write	-60(%ebp)		// write = state->write;
+	#define window	-56(%ebp)		// window = state->window;
+	#define	hold	-52(%ebp)		// hold = state->hold;
+	#define	lcode	-48(%ebp)		// lcode = state->lencode;
+	#define	dcode	-44(%ebp)		// dcode = state->distcode;
+	#define	lmask	-40(%ebp)		// lmask = (1U << state->lenbits) - 1; 
+	#define	dmask	-36(%ebp)		// dmask = (1U << state->distbits) - 1; 
+	#define	len		-32(%ebp)
+	#define dmax	-20(%ebp)		
+	#define	dist	-16(%ebp)		// dist
+	#define	write_wsize	-24(%ebp)	// write+wsize
+	#define	write_1		-88(%ebp)	// write-1
+	#define	op		-92(%ebp)		// op
+
+	movl	(strm), %eax			// strm->next_in
+	movl	%eax, in				// in = strm->next_in - OFF; OFF=0
+
+	subl	$5, %eax				// in - 5;
+	movl	4(strm), %ecx			// strm->avail_in
+	addl	%ecx, %eax				// in + (strm->avail_in - 5);
+	movl	%eax, last				// last = in + (strm->avail_in - 5);
+
+	movl	12(strm), %esi			// strm->next_out
+	movl	%esi, out				// out = strm->next_out - OFF;
+
+	movl	16(strm), %ecx			// strm->avail_out
+	movl	%esi, %eax				// out		
+	subl	12(%ebp), %eax			// out - start
+	addl	%ecx, %eax				// out - (start - strm->avail_out);
+	movl	%eax, beg				// beg = out - (start - strm->avail_out);
+
+	leal	-257(%esi,%ecx), %ecx	// out + (strm->avail_out - 257);
+	movl	%ecx, end				// end = out + (strm->avail_out - 257);
+
+	movl	state, %edx
+
+#ifdef	INFLATE_STRICT
+	movl	20(%edx), %ecx			// state->dmax
+	movl	%ecx, dmax				// dmax = state->dmax;
+#endif
+
+	movl	40(%edx), %ecx			// state->wsize
+	movl	%ecx, wsize				// wsize = state->wsize;
+
+	movl	44(%edx), %ecx			// state->whave
+	movl	%ecx, whave				// whave = state->whave;
+
+	movl	48(%edx), %esi			// state->write
+	movl	%esi, write				// write = state->write;
+
+	movl	52(%edx), %eax			// state->window
+	movl	%eax, window			// window = state->window;
+
+
+	movl	56(%edx), %ecx			// state->hold
+	movl	%ecx, hold				// hold = state->hold
+
+	movl	60(%edx), bits			// bits = state->bits;
+
+	movl	76(%edx), %esi			// state->lencode
+	movl	%esi, lcode				// lcode = state->lencode;
+
+	movl	80(%edx), %eax			// state->distcode
+	movl	%eax, dcode				// dcode = state->distcode;
+
+	movl	84(%edx), %ecx			// state->lenbits
+	movl	$1, %eax
+	movl	%eax, %esi				// a copy of 1
+	sall	%cl, %esi				// 1 << state->lenbits
+	decl	%esi					// (1U << state->lenbits) - 1;
+	movl	%esi, lmask				// lmask = (1U << state->lenbits) - 1;
+
+	movl	88(%edx), %ecx			// state->distbits
+	sall	%cl, %eax				// 1 << state->distbits
+	decl	%eax					// (1U << state->distbits) - 1;
+	movl	%eax, dmask				// dmask = (1U << state->distbits) - 1;
+
+
+	// these 2 might be used often, precomputed and saved in stack	
+	movl	write, %eax
+	addl	wsize, %eax
+	movl	%eax, write_wsize		// write+wsize
+
+	movl	write, %edx
+	decl	%edx
+	movl	%edx, write_1			// write-1
+
+
+L_do_while_loop:						// do {
+
+	cmpl	$15, bits
+	jae		bits_ge_15					//		if (bits < 15) {
+#if 0
+	leal	8(bits), %esi				// esi = bits+8
+	movl	in, %eax					// eax = in
+	movzbl	(%eax), %edx				// edx = *in++
+	movl	bits, %ecx					// cl = bits
+	sall	%cl, %edx					// 1st *in << bits
+	addl	hold, %edx					// hold += 1st *in << bits
+	movzbl	1(%eax), %eax				// 2nd *in
+	movl	%esi, %ecx					// cl = bits+8
+	sall	%cl, %eax					// 2nd *in << (bits+8)
+	addl	%eax, %edx					// hold += 2nd *in << (bits+8) 
+	movl	%edx, hold					// update hold
+	addl	$2, in						// in += 2
+	addl	$16, bits					// bits += 16;
+#else
+	/* from simulation, this code segment performs better than the other case
+		possibly, we are more often hit with aligned memory access */
+	movl	in, %ecx					//			unsigned short *inp = (unsigned short *) (in+OFF);
+	movzwl	(%ecx), %eax				// 			*((unsigned short *) in);
+	movl	bits, %ecx					//			bits
+	sall	%cl, %eax					// 			*((unsigned short *) in) << bits
+	addl	%eax, hold					// 			hold += (unsigned long) *((unsigned short *) in) << bits;
+	addl	$2, in						// 			in += 2;
+	addl	$16, bits					// 			bits += 16;
+#endif
+
+bits_ge_15:								// 		}	/* bits < 15 */
+
+	movl	hold, %eax					// 		hold
+	andl	lmask, %eax					// 		hold & lmask;
+	movl	lcode, %esi					// 		lcode[] : 4-byte aligned
+	movl	(%esi,%eax,4), %eax			// 		this = lcode[hold&lmask];
+	jmp		dolen
+	.align 4,0x90
+op_nonzero:
+	movzbl	%al, %ecx					// a copy of op to cl
+	testb	$16, %cl					// if op&16
+	jne		Llength_base				// 		branch to length_base
+
+	testb	$64, %cl					// elif op&64
+	jne		length_2nd_level_else		//		branch to 2nd level length code else conditions
+
+	// 2nd level length code
+
+	movl	$1, %eax
+	sall	%cl, %eax					// 1 << op
+	decl	%eax						// ((1<<op) - 1)
+	andl	hold, %eax					// hold & ((1U << op) - 1)
+	movzwl	%si, %ecx					// this.val
+	addl	%ecx, %eax					// this.val + (hold & ((1U << op) - 1))
+
+	movl	lcode, %ecx					// lcode[] : 4-byte aligned
+	movl	(%ecx,%eax,4), %eax			// this = lcode[this.val + (hold & ((1U << op) - 1))];
+										// goto dolen (compiler rearranged the order of code)
+dolen:
+	movl	%eax, %esi					// make a copy of this (val 16-bit, bits 8-bit, op 8-bit)
+	shrl	$16, %esi					// %esi = this.val;
+	movzbl	%ah, %ecx					// op = (unsigned)(this.bits); 
+	shrl	%cl, hold					// hold >>= op; 
+	subl	%ecx, bits					// bits -= op;
+	testb	%al, %al					// op = (unsigned)(this.op);
+	jne		op_nonzero					// if op!=0, branch to op_nonzero 
+
+	movl	%esi, %ecx					// this.val;
+	movl	out, %eax					// out
+	movb	%cl, (%eax)					// PUP(out) = (unsigned char)(this.val);
+	incl	%eax						// out++;
+	movl	%eax, out					// save out
+
+L_tst_do_while_loop_end:
+	movl	last, %eax					// last
+	cmpl	%eax, in					// in vs last
+	jae		return_unused_bytes 		// branch to return_unused_bytes if in >= last
+	movl	end, %edx					// end
+	cmpl	%edx, out					// out vs end
+	jb		L_do_while_loop				// branch to do loop if out < end
+
+return_unused_bytes:
+
+	movl	bits, %eax					// bits
+	shrl	$3, %eax					// len = bits >> 3
+	movl	in, %edx					// in
+	subl	%eax, %edx					// in -= len
+	sall	$3, %eax					// len << 3
+	movl	bits, %ecx					// bits
+	subl	%eax, %ecx					// bits -= len << 3
+
+	movl	%edx, (strm)				// strm->next_in = in + OFF;
+	movl	out, %eax
+	movl	%eax, 12(strm)				// strm->next_out = out + OFF;
+
+	cmpl	%edx, last					// last vs in
+	jbe		L67							// if (last <= in) branch to L67 and return to L69
+	movl	last, %eax					// last
+	addl	$5, %eax					// 5 + last
+	subl	%edx, %eax					// 5 + last - in	
+L69:
+	movl	%eax, 4(strm)				// update strm->avail_in
+
+	movl	end, %eax
+	cmpl	%eax, out					// out vs end
+	jae		L70							// if (out>=end) branch to L70, and return to L72
+	addl	$257, %eax					// 257 + end
+	subl	out, %eax					// 257 + end - out
+L72:
+	movl	%eax, 16(strm)				// update strm->avail_out
+
+	movl	$1, %eax
+	sall	%cl, %eax					// 1 << bits
+	decl	%eax						// (1 << bits) -1
+	andl	hold, %eax					// hold &= (1U << bits) - 1;
+	movl	state, %esi
+	movl	%eax, 56(%esi)				// state->hold = hold;
+	movl	%ecx, 60(%esi)				// state->bits = bits;
+
+	addl	$92, %esp					// pop out local from stack
+
+	// restore saved registers and return
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	leave
+	ret
+
+	// this code segment is branched in from op_nonzero, with op in cl and this.value in esi
+Llength_base:
+	movzwl	%si, %esi			// this instruction might not be needed, pad here to give better performance
+	movl	%esi, len			// len = (unsigned)(this.val);
+ 
+	movl	%ecx, %esi			// leave a copy of op at ecx
+	andl	$15, %esi			// op&=15;
+	je		Lop_is_zero			// if (op) {
+	cmpl	bits, %esi			//		op vs bits
+	jbe		Lop_be_bits			//		if (bits < op) {
+	movl	in, %edx			//			in
+	movzbl	(%edx), %eax		//			*in
+	movl	bits, %ecx			//			bits
+	sall	%cl, %eax			//			*in << bits
+	addl	%eax, hold			// 			hold += (unsigned long)(PUP(in)) << bits;
+	incl	%edx				//			in++
+	movl	%edx, in			//			update in
+	addl	$8, bits			//			bits += 8
+Lop_be_bits:					//		}
+	movl	$1, %eax			//		1
+	movl	%esi, %ecx			//		op
+	sall	%cl, %eax			//		1 << op
+	decl	%eax				// 		(1<<op)-1	
+	andl	hold, %eax			//		hold & ((1U << op) - 1)
+	addl	%eax, len			//		len += (unsigned)hold & ((1U << op) - 1);
+	shrl	%cl, hold			//		hold >>= op;
+	subl	%esi, bits			//		bits -= op;
+Lop_is_zero:					// }
+	cmpl	$14, bits			// if (bits < 15) {
+	jbe		bits_le_14			//		branch to refill 16-bit into hold, and branch back to next
+L19:							// }
+	movl	hold, %eax			// hold
+	andl	dmask, %eax			// hold&dmask
+	movl	dcode, %esi			// dcode[] : 4-byte aligned
+	movl	(%esi,%eax,4), %eax	// this = dcode[hold & dmask];
+	jmp		dodist
+
+Lop_16_zero:
+	testb	$64, %cl					// op&64
+	jne		Linvalid_distance_code		// if (op&64)!=0, branch to invalid distance code
+	movl	$1, %eax					// 1
+	sall	%cl, %eax					// (1<<op)
+	decl	%eax						// (1<<op)-1 
+	andl	hold, %eax					// (hold & ((1U << op) - 1))
+	movzwl	%dx, %edx					// this.val
+	addl	%edx, %eax					// this.val + (hold & ((1U << op) - 1))
+	movl	dcode, %edx					// dcode[] : 4 byte aligned
+	movl	(%edx,%eax,4), %eax			// this = dcode[this.val + (hold & ((1U << op) - 1))];
+dodist:
+	movl	%eax, %edx					// this : (val 16-bit, bits 8-bit, op 8-bit)
+	shrl	$16, %edx					// edx = this.val
+	movzbl	%ah, %ecx					// op = (unsigned)(this.bits); 
+	shrl	%cl, hold					// hold >>= op;
+	subl	%ecx, bits					// bits -= op;
+	movzbl	%al, %ecx					// op = (unsigned)(this.op);
+	testb	$16, %cl					// op & 16
+	je		Lop_16_zero					// if (op&16)==0 goto test op&64
+
+Ldistance_base:							// if (op&16) {		/* distance base */
+	andl	$15, %ecx					//	  op &= 15; edx = dist = this.val;
+	movl	%ecx, op					// 		save a copy of op
+	cmpl	bits, %ecx					//		op vs bits
+	jbe		0f							//		if (bits < op) {
+	movl	in, %ecx					//			in
+	movzbl	(%ecx), %eax				//			*in
+	movl	bits, %ecx					//			bits
+	sall	%cl, %eax					//			*in << bits
+	addl	%eax, hold					//			hold += (unsigned long)(PUP(in)) << bits;
+	incl	in							//			in++
+	addl	$8, bits					//			bits += 8
+	cmpl	bits, op					//			op vs bits
+	jbe		0f							//			if (bits < op) {
+	movl	in, %esi					//				i
+	movzbl	(%esi), %eax				// 				*in
+	movl	bits, %ecx					//				cl = bits
+	sall	%cl, %eax					//				*in << bits
+	addl	%eax, hold					//				hold += (unsigned long)(PUP(in)) << bits;
+	incl	%esi						//				in++
+	movl	%esi, in					//				update in
+	addl	$8, bits					//				bits += 8
+0:										// }		}
+
+	movzwl	%dx, %edx					// dist = (unsigned)(this.val); 
+	movl	$1, %eax					// 1
+	movzbl	op, %ecx					// cl = op
+	sall	%cl, %eax					// 1 << op
+	decl	%eax						// ((1U << op) - 1)
+	andl	hold, %eax					// (unsigned)hold & ((1U << op) - 1)
+	addl	%edx, %eax					// dist += (unsigned)hold & ((1U << op) - 1);
+
+#ifdef INFLATE_STRICT
+
+	cmpl	dmax, %eax						// dist vs dmax
+	ja		Linvalid_distance_too_far_back	// if (dist > dmax) break for invalid distance too far back	
+
+#endif
+
+	movl	%eax, dist						// save a copy of dist in stack
+	shrl	%cl, hold						// hold >>= op; 
+	subl	%ecx, bits						// bits -= op;
+
+	movl	out, %eax
+	subl	beg, %eax						// eax = op = out - beg
+	cmpl	%eax, dist						// dist vs op
+	jbe		Lcopy_direct_from_output		// if (dist <= op) branch to copy direct from output	
+
+											// if (dist > op) {
+	movl	dist, %ecx						//	dist
+	subl	%eax, %ecx						//	esi = op = dist - op;
+	cmpl	%ecx, whave						//  whave vs op
+	jb		Linvalid_distance_too_far_back	//  if (op > whave) break for error;
+
+	movl	write, %edx
+	testl	%edx, %edx
+	jne		Lwrite_non_zero					// if (write==0) {
+	movl	wsize, %eax						//		wsize
+	subl	%ecx, %eax						//		wsize-op
+	movl	window, %esi					//		from=window-OFF
+	addl	%eax, %esi						//		from += wsize-op
+	movl	out, %edx						//		out
+	cmpl	%ecx, len						//		len vs op
+	jbe		L38								// 		if !(op < len) skip
+    subl    %ecx, len						// len - op
+0:											// do {
+	movzbl  (%esi), %eax					//
+    movb    %al, (%edx)						//	
+    incl    %edx							//
+    incl    %esi							//  	PUP(out) = PUP(from);
+    decl    %ecx							//		--op;
+    jne     0b								// } while (op);
+
+    movl    %edx, out						// update out
+    movl    %edx, %esi						// out 
+    subl    dist, %esi						// esi = from = out - dist;
+
+L38:			/* copy from output */
+
+			//		while (len > 2) {
+            //            PUP(out) = PUP(from);
+            //            PUP(out) = PUP(from);
+            //            PUP(out) = PUP(from);
+            //            len -= 3;
+            //        }
+            //        if (len) {
+            //            PUP(out) = PUP(from);
+            //            if (len > 1)
+            //                PUP(out) = PUP(from);
+            //       }
+
+	movl	len, %ecx						// len
+	movl	out, %edx						// out
+	subl	$3, %ecx						// pre-decrement len by 3
+	jl		1f								// if len < 3, branch to 1f for remaining processing
+0:											// while (len>2) {
+	movzbl	(%esi), %eax
+	movb	%al, (%edx)						// 		PUP(out) = PUP(from);
+	movzbl	1(%esi), %eax
+	movb	%al, 1(%edx)					//		PUP(out) = PUP(from);
+	movzbl	2(%esi), %eax
+	movb	%al, 2(%edx)					//		PUP(out) = PUP(from);
+	addl	$3, %esi						//		from += 3;
+	addl	$3, %edx						//		out += 3;
+	subl	$3, %ecx						//		len -= 3;
+	jge		0b								// }
+	movl	%edx, out						// update out, in case len == 0
+1:
+	addl	$3, %ecx						// post-increment len by 3
+	je		L_tst_do_while_loop_end			// if (len) {
+	movzbl	(%esi), %eax					//
+	movb	%al, (%edx)						//		PUP(out) = PUP(from);
+	incl	%edx							//		out++
+	movl	%edx, out						//		update out, in case len == 1
+	cmpl	$2, %ecx						//
+	jne		L_tst_do_while_loop_end			//		if len==1, break
+	movzbl	1(%esi), %eax
+	movb	%al, (%edx)						//		PUP(out) = PUP(from);
+	incl	%edx							//		out++
+	movl	%edx, out						//		update out
+	jmp		L_tst_do_while_loop_end			//	}
+	
+	.align 4,0x90
+length_2nd_level_else:
+	andl	$32, %ecx						// test end-of-block
+	je		invalid_literal_length_code		// if (op&32)==0, branch for invalid literal/length code break
+	movl	state, %edx						// if (op&32), end-of-block is detected
+	movl	$11, (%edx)						// state->mode = TYPE
+	jmp		return_unused_bytes
+
+L70:
+	movl	out, %edx						// out
+	subl	%edx, end						// (end-out)
+	movl	end, %esi						// %esi = (end-out) = -(out - end);
+	leal	257(%esi), %eax					// %eax = 257 + %esi = 257 - (out -end)
+	jmp		L72								// return to update state and return
+
+L67:										// %edx = in, to return 5 - (in - last) in %eax
+	subl	%edx, last						// last - in 
+	movl	last, %edx						// %edx = last - in = - (in - last);
+	leal	5(%edx), %eax					// %eax = 5 + %edx = 5 - (in - last);
+	jmp		L69								// return to update state and return
+
+bits_le_14:
+#if 1
+	leal	8(bits), %esi				// esi = bits+8
+	movl	in, %eax					// eax = in
+	movzbl	(%eax), %edx				// edx = *in++
+	movl	bits, %ecx					// cl = bits
+	sall	%cl, %edx					// 1st *in << bits
+	addl	hold, %edx					// hold += 1st *in << bits
+	movzbl	1(%eax), %eax				// 2nd *in
+	movl	%esi, %ecx					// cl = bits+8
+	sall	%cl, %eax					// 2nd *in << (bits+8)
+	addl	%eax, %edx					// hold += 2nd *in << (bits+8) 
+	movl	%edx, hold					// update hold
+	addl	$2, in						// in += 2
+	addl	$16, bits					// bits += 16;
+	jmp	L19
+#else
+	/* this code segment does not run as fast as the other original code segment, possibly the processor
+		need extra time to handle unaligned short access */
+	movl    in, %edx                    //          unsigned short *inp = (unsigned short *) (in+OFF);
+    movzwl  (%edx), %eax                //          *((unsigned short *) in);
+    movl    bits, %ecx                  //          bits
+    sall    %cl, %eax                   //          *((unsigned short *) in) << bits
+    addl    %eax, hold                  //          hold += (unsigned long) *((unsigned short *) in) << bits;
+    addl    $2, %edx                    //          in += 2;
+    addl    $16, %ecx                   //          bits += 16;
+	movl	%edx, in
+	movl	%ecx, bits
+	jmp	L19
+#endif
+invalid_literal_length_code:
+    call    0f
+0:	popl    %eax
+	leal	LC2-0b(%eax), %eax
+	movl	%eax, 24(strm)
+	movl	state, %esi
+	movl	$27, (%esi)
+	jmp		return_unused_bytes
+Linvalid_distance_code:
+    call    0f
+0:	popl    %eax
+	leal	LC1-0b(%eax), %eax
+	movl	%eax, 24(strm)
+	movl	state, %eax
+	movl	$27, (%eax)
+	jmp		return_unused_bytes
+
+#ifdef	INFLATE_STRICT
+	.align	4,0x90
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+#endif
+Lcopy_direct_from_output:
+	movl	out, %edx							// out
+	subl	dist, %edx							// from = out - dist
+	movl	out, %ecx							// out
+	movl	len, %esi							// len
+	subl	$3, %esi							// pre-decement len by 3
+0:												// do {
+	movzbl	(%edx), %eax
+	movb	%al, (%ecx)							// 	PUP(out) = PUP(from);
+	movzbl	1(%edx), %eax
+	movb	%al, 1(%ecx)						// 	PUP(out) = PUP(from);
+	movzbl	2(%edx), %eax
+	movb	%al, 2(%ecx)						// 	PUP(out) = PUP(from);
+	addl	$3, %edx							// 	from += 3
+	addl	$3, %ecx							// 	out += 3	
+	subl	$3, %esi							// 	len -= 3
+	jge		0b									// } while (len > 2);
+	movl	%ecx, out							// update out in case len == 0
+	addl	$3, %esi							// post-increment len by 3
+	je		L_tst_do_while_loop_end				// if (len) {
+	movzbl	(%edx), %eax
+	movb	%al, (%ecx)							//		PUP(out) = PUP(from);
+	incl	%ecx
+	movl	%ecx, out							//		out++
+	cmpl	$2, %esi							//
+	jne		L_tst_do_while_loop_end				//		if (len>2)
+	movzbl	1(%edx), %eax
+	movb	%al, (%ecx)							//			PUP(out) = PUP(from);
+	incl	%ecx
+	movl	%ecx, out							//			out++
+	jmp		L_tst_do_while_loop_end				// }
+
+	.align 4,0x90
+Lwrite_non_zero:								// %edx = write, %ecx = op
+	movl	window, %esi						// from = window - OFF;
+	cmp		%ecx, %edx							// write vs op, test for wrap around window or contiguous in window
+	jae		Lcontiguous_in_window				// if (write >= op) branch to contiguous in window 
+
+Lwrap_around_window: 							// wrap around window
+	addl	write_wsize, %esi					// from += write+wsize
+	subl	%ecx, %esi							// from += wsize + write - op;		
+	subl	%edx, %ecx							// op -= write
+	cmpl	%ecx, len							// len vs op
+	jbe		L38									// if (len <= op) break to copy from output
+	subl	%ecx, len							// len -= op;
+	movl	out, %edx							// out
+0:												// do {
+	movzbl	(%esi), %eax						// 	*from
+	movb	%al, (%edx)							// 	*out
+	incl	%esi								// 	from++
+	incl	%edx								// 	out++	
+	decl	%ecx								// 	--op
+	jne		0b									// } while (op);
+
+	movl	%edx, out							// save out in case we need to break to L38
+	movl	window, %esi						// from = window - OFF;
+	movl	len, %eax							// len
+	cmpl	%eax, write							// write vs len
+	jae		L38									// if (write >= len) break to L38 
+
+	movl	write, %ecx							// op = write
+	subl	%ecx, len							// len -= op;
+0:												// do {
+	movzbl	(%esi), %eax						//	*from
+	movb	%al, (%edx)							//  *out
+	incl	%esi								//  from++
+	incl	%edx								//	out++
+	decl	%ecx								//  --op
+	jne		0b									// } while (op);
+
+	movl	%edx, %esi							// from = out
+	movl	%edx, out							// save a copy of out
+	subl	dist, %esi							// from = out - dist;
+	jmp		L38									// break to copy from output
+
+Lcontiguous_in_window:								// contiguous in window, edx = write, %ecx = op
+	subl	%ecx, %edx								// write - op
+	addl	%edx, %esi								// from += write - op;
+	cmpl	%ecx, len								// len vs op
+	jbe		L38										// if (len <= op) break to copy from output 
+	movl	out, %edx								// out
+	subl	%ecx, len								// len -= op;
+
+0:													// do {
+	movzbl	(%esi), %eax							// 	*from
+	movb	%al, (%edx)								// 	*out
+	incl	%esi									// 	from++
+	incl	%edx									// 	out++
+	decl	%ecx									// 	op-- 
+	jne		0b										// } while (op); 
+
+	movl	%edx, out								// update out
+	movl	%edx, %esi								// from = out
+	subl	dist, %esi								// from = out - dist;
+	jmp		L38
+
+Linvalid_distance_too_far_back:
+    call    0f
+0:	popl    %eax
+	leal	LC0-0b(%eax), %eax
+	movl	%eax, 24(strm)
+	movl	state, %ecx
+	movl	$27, (%ecx)
+	jmp		return_unused_bytes
+
+#endif
+
+#if (defined __x86_64__)
+	.cstring
+LC0:
+	.ascii "invalid distance too far back\0"
+LC1:
+	.ascii "invalid distance code\0"
+LC2:
+	.ascii "invalid literal/length code\0"
+	.text
+	.align 4,0x90
+
+#ifdef  INFLATE_STRICT
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+#endif
+
+.globl _inflate_fast
+_inflate_fast:
+
+	// set up rbp
+	pushq	%rbp
+	movq	%rsp, %rbp
+
+	// save registers in stack
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+
+	#define	strm		%r13
+	#define	state		%rdi
+	#define	in			%r12
+	#define	in_d		%r12d
+	#define	out			%r10
+	#define	out_d		%r10d
+	#define	write		%r15d
+	#define hold		%r9
+	#define holdd		%r9d
+	#define	bits		%r8d
+	#define	lcode		%r14
+	#define	len			%ebx
+	#define from		%rcx
+	#define	dmax		%r11d
+
+	#define	last		-104(%rbp)
+	#define	beg			-96(%rbp)
+	#define	end			-88(%rbp)
+	#define	wsize		-80(%rbp)
+	#define	whave		-76(%rbp)
+	#define	window		-72(%rbp)
+	#define	dcode		-64(%rbp)
+	#define	lmask		-56(%rbp)
+	#define	dmask		-112(%rbp)
+	#define	wsize_write	-116(%rbp)
+	#define	write_1		-128(%rbp)
+	#define	dist		-44(%rbp)
+
+	// reserve stack memory for local variables 128-40=88
+	subq	$88, %rsp
+
+	movq	%rdi, strm
+	movq	56(%rdi), state						// state = (struct inflate_state FAR *)strm->state;	
+	movq	(strm), in							// in = strm->next_in - OFF;
+	movl	8(strm), %eax						// strm->avail_in
+	subl	$5, %eax							// (strm->avail_in - 5)
+	addq	in, %rax							// in + (strm->avail_in - 5)
+	movq	%rax, last							// last = in + (strm->avail_in - 5)
+	movq	24(strm), out						// out = strm->next_out
+	movl	32(strm), %eax						// strm->avail_out
+	subl	%eax, %esi							// (start - strm->avail_out);
+	movq	out, %rdx							// strm->next_out
+	subq	%rsi, %rdx							// out - (start - strm->avail_out); 
+	movq	%rdx, beg							// beg = out - (start - strm->avail_out);
+	subl	$257, %eax							// (strm->avail_out - 257)
+	addq	out, %rax							// out + (strm->avail_out - 257); 
+	movq	%rax, end							// end = out + (strm->avail_out - 257);
+
+#ifdef INFLATE_STRICT
+	movl	20(state), dmax						// dmax = state->dmax;
+#endif
+
+	movl	52(state), %ecx						// state->wsize
+	movl	%ecx, wsize							// wsize = state->wsize;
+	movl	56(state), %ebx						// state->whave;
+	movl	%ebx, whave							// whave = state->whave;
+	movl	60(state), write					// write = state->write;
+	movq	64(state), %rax						// state->window
+	movq	%rax, window						// window = state->window;
+	movq	72(state), hold						// hold = state->hold;
+	movl	80(state), bits						// bits = state->bits;
+
+	movq	96(state), lcode					// lcode = state->lencode;
+	movq	104(state), %rdx					// state->distcode;
+	movq	%rdx, dcode							// dcode = state->distcode;
+
+	movl	116(state), %ecx					// state->distbits
+	movl	$1, %eax
+	movl	%eax, %edx							// 1
+	sall	%cl, %edx							// (1U << state->distbits)
+	movl	112(state), %ecx					// state->lenbits
+	sall	%cl, %eax							// (1U << state->lenbits)
+	decl	%eax								// (1U << state->lenbits) - 1
+	movq	%rax, lmask							// lmask = (1U << state->lenbits) - 1
+	decl	%edx								// (1U << state->distbits) - 1
+	movq	%rdx, dmask							// dmask = (1U << state->distbits) - 1
+
+	movl	wsize, %ecx							// wsize
+	addl	write, %ecx							// wsize + write
+	movl	%ecx, wsize_write					// wsize_write = wsize + write
+
+	leal	-1(%r15), %ebx						// write - 1
+	movq	%rbx, write_1						// write_1 = write - 1
+
+L_do_while_loop:
+	cmpl	$14, bits							// bits vs 14
+	ja		0f									// if (bits < 15) {
+	movzwl	(in), %eax							//		read 2 bytes from in
+	movl	bits, %ecx							//		set up cl = bits
+	salq	%cl, %rax							//		(*in) << bits	
+	addq	%rax, hold							// 		hold += (*in) << bits
+	addq	$2, in								//		in += 2
+	addl	$16, bits							//		bits += 16
+0:												// }
+	movq	lmask, %rax							//	lmask
+	andq	hold, %rax							//	hold & lmask
+	jmp		1f
+	.align 4,0x90
+Lop_nonzero:
+	movzbl	%al, %ecx							// op in al and cl 
+	testb	$16, %cl							// check for length base processing (op&16)
+	jne		L_length_base						// if (op&16) branch to length base processing	
+	testb	$64, %cl							// check for 2nd level length code (op&64==0)
+	jne		L_end_of_block						// if (op&64)!=0, branch for end-of-block processing
+
+	/* 2nd level length code : (op&64) == 0*/
+L_2nd_level_length_code:
+	movl	$1, %eax							// 1
+	sall	%cl, %eax							// 1 << op
+	decl	%eax								// ((1U << op) - 1)
+	andq	hold, %rax							// (hold & ((1U << op) - 1))
+	movzwl	%dx, %edx
+	addq	%rdx, %rax							// this = lcode[this.val + (hold & ((1U << op) - 1))];
+1:	
+	movl	(lcode,%rax,4), %eax				// this = lcode[hold & lmask];
+Ldolen:
+	movl	%eax, %edx							// a copy of this
+	shrl	$16, %edx							// edx = this.val;
+	movzbl	%ah, %ecx							// op = this.bits
+	shrq	%cl, hold							// hold >>= op; 
+	subl	%ecx, bits							// bits -= op;
+	testb	%al, %al							// op = (unsigned)(this.op);
+	jne		Lop_nonzero							// if (op!-0) branch for copy operation
+L_literal:
+	movb	%dl, (out)							// *out = this.val
+	incq	out									// out ++
+L_do_while_loop_check:
+	cmpq	last, in							// in vs last
+	jae		L_return_unused_byte				// if in >= last, break to return unused byte processing
+	cmpq	end, out							// out vs end
+	jb		L_do_while_loop						// back to do_while_loop if out < end
+
+	/* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+
+L_return_unused_byte:
+	movl	out_d, %esi
+	jmp		L34
+
+L_length_base:				/* al = cl = op, edx = this.val, op&16 = 16 */ 
+	movzwl	%dx, len							// len = (unsigned)(this.val);
+	movl	%ecx, %edx							// op
+	andl	$15, %edx							// op &= 15;
+	je		1f									// if (op) {
+	cmpl	bits, %edx							//		op vs bits
+	jbe		0f									//		if (bits < op) {
+	movzbl	(in), %eax							//			*in
+	movl	bits, %ecx							//			cl = bits
+	salq	%cl, %rax							//			*in << bits
+	addq	%rax, hold							//			hold += (unsigned long)(PUP(in)) << bits;
+	incq	in									//			in++
+	addl	$8, bits							//			bits += 8
+0:												//		}
+	movl	$1, %eax							//		1
+	movl	%edx, %ecx							//		cl = op
+	sall	%cl, %eax							//		1 << op
+	decl	%eax								//		(1 << op) - 1
+	andl	holdd, %eax							//		 (unsigned)hold & ((1U << op) - 1);
+	addl	%eax, len							//		len += (unsigned)hold & ((1U << op) - 1);
+	shrq	%cl, hold							//		hold >>= op;
+	subl	%edx, bits							//		bits -= op;
+1:												// }
+	cmpl	$14, bits							// bits vs 14
+	jbe		L99									// if (bits < 15) go to loading to hold and return to L19
+L19:												// }
+	movq	dmask, %rax							// dmask
+	andq	hold, %rax							// hold & dmask
+	movq	dcode, %rdx							// dcode[]
+	movl	(%rdx,%rax,4), %eax					// this = dcode[hold & dmask];
+	jmp		L_dodist
+	.align 4,0x90
+0:												// op&16 == 0, test (op&64)==0 for 2nd level distance code
+	testb	$64, %cl							// op&64	
+	jne		L_invalid_distance_code				// if ((op&64)==0) { /* 2nd level distance code */
+	movl	$1, %eax							//	1
+	sall	%cl, %eax							//  1 << op 
+	decl	%eax								// (1 << op) - 1
+	andq	hold, %rax							// (hold & ((1U << op) - 1))
+	movzwl	%dx, %edx							// this.val	
+	addq	%rdx, %rax							// this.val + (hold & ((1U << op) - 1))
+	movq	dcode, %rcx							// dcode[]
+	movl	(%rcx,%rax,4), %eax					// this = dcode[this.val + (hold & ((1U << op) - 1))];
+L_dodist:
+	movl	%eax, %edx							// this
+	shrl	$16, %edx							// dist = (unsigned)(this.val);
+	movzbl	%ah, %ecx							// cl = op = this.bits
+	shrq	%cl, hold							// hold >>= op;
+	subl	%ecx, bits							// bits -= op;
+	movzbl	%al, %ecx							// op = (unsigned)(this.op);
+	testb	$16, %cl							// (op & 16)	test for distance base
+	je		0b									// if (op&16) == 0, branch to check for 2nd level distance code
+
+L_distance_base:								/* distance base */
+
+	movl	%ecx, %esi							// op
+	andl	$15, %esi							// op&=15
+	cmpl	bits, %esi							// op vs bits
+	jbe		1f									// if (bits < op) {
+	movzbl	(in), %eax							//		*in
+	movl	bits, %ecx							//		cl = bits
+	salq	%cl, %rax							//		*in << bits
+	addq	%rax, hold							//		hold += (unsigned long)(PUP(in)) << bits;
+	incq	in									//		in++
+	addl	$8, bits							//		bits += 8
+	cmpl	bits, %esi							//		op vs bits
+	jbe		1f									//		if (bits < op) {
+	movzbl	(in), %eax							//			*in
+	movl	bits, %ecx							//			cl = bits
+	salq	%cl, %rax							//			*in << bits
+	addq	%rax, hold							//			hold += (unsigned long)(PUP(in)) << bits;
+	incq	in									//			in++
+	addl	$8, bits							//			bits += 8
+1:												// }	}
+
+	movzwl	%dx, %edx							// dist
+	movl	$1, %eax							// 1
+	movl	%esi, %ecx							// cl = op
+	sall	%cl, %eax							// (1 << op)
+	decl	%eax								// (1 << op) - 1
+	andl	holdd, %eax							// (unsigned)hold & ((1U << op) - 1)
+	addl	%edx, %eax							// dist += (unsigned)hold & ((1U << op) - 1);
+	movl	%eax, dist							// save a copy of dist in stack
+
+#ifdef INFLATE_STRICT
+	cmp		%eax, dmax							// dmax vs dist 
+	jb		L_invalid_distance_too_far_back		// if (dmax < dist) break for invalid distance too far back
+#endif
+
+	shrq	%cl, hold							// hold >>= op;
+	subl	%esi, bits							// bits -= op;
+	movl	out_d, %esi							// out
+	movl	out_d, %eax							// out
+	subl	beg, %eax							// op = out - beg
+	cmpl	%eax, dist							// dist vs op,  /* see if copy from window */
+	jbe		L_copy_direct_from_output			// if (dist <= op) branch to copy direct from output
+
+L_distance_back_in_window:			
+
+	movl	dist, %edx							// dist
+	subl	%eax, %edx							// op = dist - op;	/* distance back in window */
+
+	cmpl	%edx, whave							// whave vs op
+	jb		L_invalid_distance_too_far_back		// if (op > whave), break for invalid distance too far back
+
+	testl	write, write						// if (write!=0)
+	jne		L_wrap_around_window				//		branch to wrap around window
+
+L_very_common_case:
+
+	movl	wsize, %eax							//	wsize
+	subl	%edx, %eax							//	wsize - op
+	movq	window, from						//	from = window - OFF;
+	addq	%rax, from							//	from += wsize - op;
+
+	movl	%edx, %esi							//  op
+	cmpl	%edx, len							//  len vs op
+	ja		L_some_from_window					//  if (len > op), branch for aligned code block L_some_from_window
+L38:
+	subl	$3, len								// pre-decrement len by 3
+	jge		0f									// if len >= 3, branch to the aligned code block 
+1:	addl	$3, len								// post-increment len by 3
+	je		L_do_while_loop_check				// if (len==0) break to L_do_while_loop_check
+	movzbl	(from), %eax						// *from
+	movb	%al, (out)							// *out
+	incq	out									// out++
+	cmpl	$2, len								// len vs 2
+	jne		L_do_while_loop_check				// if len!=2 break to L_do_while_loop_check
+	movzbl	1(from), %eax						// *from
+	movb	%al, (out)							// *out
+	incq	out									// out++
+	jmp		L_do_while_loop_check				// break to L_do_while_loop_check
+
+	.align 4,0x90
+0:												// do {				
+	movzbl	(from), %eax						//		*from
+	movb	%al, (out)							//		*out
+	movzbl	1(from), %eax						//		*from
+	movb	%al, 1(out)							//		*out
+	movzbl	2(from), %eax						//		*from
+	movb	%al, 2(out)							//		*out
+	addq	$3, out								//		out += 3
+	addq	$3, from							//		from += 3
+	subl	$3, len								//		len -= 3
+	jge		0b									// } while (len>=0);
+	jmp		1b									// branch back to the possibly unaligned code
+
+	.align 4,0x90
+L_end_of_block:
+	andl	$32, %ecx							// op & 32
+	jne		L101								// if (op&32) branch to end-of-block break
+	leaq	LC2(%rip), from
+	movq	from, 48(strm)						// state->mode
+	movl	$27, (state)						// state->mode = BAD;
+	movl	out_d, %esi
+
+L34:
+	movl	bits, %eax							// bits
+	shrl	$3, %eax							// len = bits >> 3;
+	mov		%eax, %edx							// len
+	subq	%rdx, in							// in -= len
+	sall	$3, %eax							// len << 3
+	movl	bits, %ecx							// bits
+	subl	%eax, %ecx							// bits -= len << 3
+	movq	in, (strm)							// strm->next_in = in + OFF;
+	movq	out, 24(strm)						// strm->next_out = out + OFF;
+	cmpq	in, last							// last vs in
+	jbe		L67									// if (last <= in) branch to L67 and return to L69
+	movl	last, %eax							// last
+	addl	$5, %eax							// last + 5
+	subl	in_d, %eax							// 5 + last - in
+L69:
+	movl	%eax, 8(strm)						// update strm->avail_in
+
+	cmpq	end, out							// out vs end
+	jae		L70									// if out<=end branch to L70 and return to L72
+	movl	end, %eax							// end
+	addl	$257, %eax							// 257 + end
+	subl	%esi, %eax							// 257 + end - out;
+L72:
+	movl	%eax, 32(strm)						// update strm->avail_out
+
+	movl	$1, %eax							// 1
+	sall	%cl, %eax							// 1 << bits
+	decl	%eax								// (1U << bits) - 1
+	andq	hold, %rax							// hold &= (1U << bits) - 1;
+	movq	%rax, 72(state)						// state->hold = hold;
+	movl	%ecx, 80(state)						// state->bits = bits;
+
+	// clear stack memory for local variables
+	addq	$88, %rsp
+
+	// restore registers from stack 
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+
+	// return to caller
+	leave
+	ret
+
+	.align 4,0x90
+L99:
+	leal	8(bits), %esi						//		esi = bits+8
+	movzbl	(in), %edx							//		1st *in	
+	movl	bits, %ecx							//		cl = bits
+	salq	%cl, %rdx							//		1st *in << 8
+	addq	%rdx, hold							// 		1st hold += (unsigned long)(PUP(in)) << bits;
+	movzbl	1(in), %eax							//		2nd *in
+	movl	%esi, %ecx							//		cl = bits + 8
+	salq	%cl, %rax							//		2nd *in << bits+8	
+	addq	%rax, hold							// 		2nd hold += (unsigned long)(PUP(in)) << bits;
+	addq	$2, in								//		in += 2
+	addl	$16, bits							//		bits += 16
+	jmp		L19
+
+L101:
+	movl	$11, (state)
+	movl	out_d, %esi
+	jmp	L34
+	.align 4,0x90
+L70:
+	movl	end, %eax							// end
+	subl	%esi, %eax							// end - out
+	addl	$257, %eax							// 257 + end - out
+	jmp		L72
+	.align 4,0x90
+L67:
+	movl	last, %eax							// last
+	subl	in_d, %eax							// last - in
+	addl	$5, %eax							// 5 + last - in
+	jmp		L69
+
+
+	.align 4,0x90
+
+	// stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance
+	.byte 0
+	.byte 0
+	.byte 0
+	.byte 0
+L_copy_direct_from_output:
+	mov		dist, %eax						// dist
+	movq	out, %rdx						// out
+	subq	%rax, %rdx						// from = out - dist;
+	subl	$3, len							// pre-decrement len by 3
+											// do {
+0:	movzbl	(%rdx), %eax					// 	*from
+	movb	%al, (out)						//	*out
+	movzbl	1(%rdx), %eax					//	*from
+	movb	%al, 1(out)						//	*out
+	movzbl	2(%rdx), %eax					//	*from
+	movb	%al, 2(out)						//	*out
+	addq	$3, out							//	out+=3
+	addq	$3, %rdx						//  from+=3
+	subl	$3, len							//  len-=3
+	jge		0b								// } while (len>=0);
+1:	addl	$3, len							// post-increment len by 3
+	je		L_do_while_loop_check			// if len==0, branch to do_while_loop_check
+
+	movzbl	(%rdx), %eax					// *from
+	movb	%al, (out)						// *out
+	incq	out								// out++
+	cmpl	$2, len							// len == 2 ?
+	jne		L_do_while_loop_check			// if len==1, branch to do_while_loop_check
+
+	movzbl	1(%rdx), %eax					// *from
+	movb	%al, (out)						// *out
+	incq	out								// out++
+	jmp	L_do_while_loop_check				// branch to do_while_loop_check
+
+	.align 4,0x90
+L_some_from_window:		// from : from, out, %esi/%edx = op
+									// do {
+	movzbl	(from), %eax			// 	*from
+	movb	%al, (out)				// 	*out
+	incq	from					// 	from++
+	incq	out						// 	out++
+	decl	%esi					// 	--op
+	jne		L_some_from_window		// } while (op);
+	subl	%edx, len				// len -= op;
+	mov		dist, %eax				// dist
+	movq	out, from				// out
+	subq	%rax, from				// from = out - dist;
+	jmp		L38						// copy from output
+
+	.align 4,0x90
+L_wrap_around_window:
+	cmpl	%edx, write					// write vs op
+	jae		L_contiguous_in_window		// if (write >= op) branch to contiguous in window
+	movl	wsize_write, %eax			// wsize+write
+	subl	%edx, %eax					// wsize+write-op
+	movq	window, from				// from = window - OFF
+	addq	%rax, from					// from += wsize+write-op
+	subl	write, %edx					// op -= write
+	cmpl	%edx, len					// len vs op
+	jbe		L38							// if (len<=op) branch to copy from output
+ 
+	subl	%edx, len					// len -= op;
+0:										// do {
+	movzbl	(from), %eax				//		*from
+	movb	%al, (out)					//		*out
+	incq	from						//		from++
+	incq	out							//		out++
+	decl	%edx						//		op--
+	jne		0b							// } while (op);
+	movq	window, from
+
+	cmpl	len, write					// write vs len
+	jae		L38							// if (write >= len) branch to copy from output
+	movl	write, %esi					// op = write
+	subl	write, len					// len -= op
+1:										// do {
+	movzbl	(from), %eax				//		*from	
+	movb	%al, (out)					//		*out
+	incq	from						//		from++
+	incq	out							//		out++
+	decl	%esi						//		op--
+	jne		1b							// } while (op);
+	mov		dist, %eax					// dist
+	movq	out, from					// out
+	subq	%rax, from					// from = out - dist;
+	jmp		L38
+
+	.align 4,0x90
+L_contiguous_in_window:
+	movl	write, %eax					// write
+	subl	%edx, %eax					// write - op
+	movq	window, from				// from = window - OFF
+	addq	%rax, from					// from += write - op
+	cmpl	%edx, len					// len vs op
+	jbe		L38							// if (len <= op) branch to copy from output
+	subl    %edx, len					// len -= op;
+2:										// do {
+	movzbl	(from), %eax				// 	*from
+	movb	%al, (out)					// 	*out
+	incq	from						// 	from++
+	incq	out							// 	out++
+	decl	%edx						// 	op--
+	jne		2b							// } while (op);
+
+	mov		dist, %eax					// dist
+	movq	out, from					// out
+	subq	%rax, from					// from = out - dist;
+	jmp		L38							// copy from output
+
+	.align 4,0x90
+L_invalid_distance_code:
+	leaq	LC1(%rip), %rdx
+	movq	%rdx, 48(strm)
+	movl	$27, (state)
+	movl	out_d, %esi
+	jmp		L34
+
+L_invalid_distance_too_far_back:
+	leaq	LC0(%rip), %rbx
+	movq	%rbx, 48(strm)				// error message
+	movl	$27, (state)				// state->mode = BAD
+	jmp		L34
+
+#endif
diff --git a/libsa/Makefile b/libsa/Makefile
index eea21bddd..3815c667f 100644
--- a/libsa/Makefile
+++ b/libsa/Makefile
@@ -8,20 +8,18 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = libsa
-INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
 
 
 EXPINC_SUBDIRS = libsa
-EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
 
 
-SETUP_SUBDIRS = conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = conf
 
diff --git a/libsa/bootstrap.cpp b/libsa/bootstrap.cpp
index 286255265..9ad023c1a 100644
--- a/libsa/bootstrap.cpp
+++ b/libsa/bootstrap.cpp
@@ -36,7 +36,7 @@ extern "C" {
 #include <libkern/OSKextLibPrivate.h>
 #include <libkern/c++/OSKext.h>
 #include <IOKit/IOLib.h>
-#include <IOKit/IORegistryEntry.h>
+#include <IOKit/IOService.h>
 #include <IOKit/IODeviceTreeSupport.h>
 #include <IOKit/IOCatalogue.h>
 
@@ -66,6 +66,7 @@ extern "C" {
 static void bootstrapRecordStartupExtensions(void);
 static void bootstrapLoadSecurityExtensions(void);
 
+
 #if PRAGMA_MARK
 #pragma mark Macros
 #endif
@@ -100,7 +101,7 @@ static const char * sKernelComponentNames[] = {
    "com.apple.iokit.IOSystemManagementFamily",
    "com.apple.iokit.ApplePlatformFamily",
    
-#if defined(__ppc__) || defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__)
    /* These ones are not supported on x86_64 or any newer platforms.
     * They must be version 7.9.9; check by "com.apple.kernel.", with
     * the trailing period; "com.apple.kernel" always represents the
@@ -163,7 +164,6 @@ KLDBootstrap::KLDBootstrap(void)
     }
     record_startup_extensions_function = &bootstrapRecordStartupExtensions;
     load_security_extensions_function = &bootstrapLoadSecurityExtensions;
-    OSKext::initialize();
 }
 
 /*********************************************************************
@@ -175,6 +175,8 @@ KLDBootstrap::~KLDBootstrap(void)
     if (this != &sBootstrapObject) {
         panic("Attempt to access bootstrap segment.");
     }
+
+
     record_startup_extensions_function = 0;
     load_security_extensions_function = 0;
 }
@@ -218,16 +220,11 @@ KLDBootstrap::readPrelinkedExtensions(
     kernel_section_t * prelinkInfoSect)
 {
     OSArray                   * infoDictArray           = NULL;  // do not release
-    OSArray                   * personalitiesArray      = NULL;  // do not release
     OSObject                  * parsedXML       = NULL;  // must release
     OSDictionary              * prelinkInfoDict         = NULL;  // do not release
     OSString                  * errorString             = NULL;  // must release
     OSKext                    * theKernel               = NULL;  // must release
 
-#if CONFIG_KXLD
-    kernel_section_t          * kernelLinkStateSection  = NULL;  // see code
-#endif
-    kernel_segment_command_t  * prelinkLinkStateSegment = NULL;  // see code
     kernel_segment_command_t  * prelinkTextSegment      = NULL;  // see code
     kernel_segment_command_t  * prelinkInfoSegment      = NULL;  // see code
 
@@ -235,13 +232,13 @@ KLDBootstrap::readPrelinkedExtensions(
     * going to fail the boot, so these won't be cleaned up on error.
     */
     void                      * prelinkData             = NULL;  // see code
-    void                      * prelinkCopy             = NULL;  // see code
     vm_size_t                   prelinkLength           = 0;
+
 #if !__LP64__ && !defined(__arm__)
     vm_map_offset_t             prelinkDataMapOffset    = 0;
-#endif
-
+    void                      * prelinkCopy             = NULL;  // see code
     kern_return_t               mem_result              = KERN_SUCCESS;
+#endif
 
     OSDictionary              * infoDict                = NULL;  // do not release
 
@@ -255,57 +252,6 @@ KLDBootstrap::readPrelinkedExtensions(
         kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag,
         "Starting from prelinked kernel.");
 
-   /*****
-    * Wrap the kernel link state in-place in an OSData.
-    * This is unnecessary (and the link state may not be present) if the kernel
-    * does not have kxld support because this information is only used for
-    * runtime linking.
-    */
-#if CONFIG_KXLD
-    kernelLinkStateSection = getsectbyname(kPrelinkLinkStateSegment,
-        kPrelinkKernelLinkStateSection);
-    if (!kernelLinkStateSection) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogArchiveFlag,
-            "Can't find prelinked kernel link state.");
-        goto finish;
-    }
-
-    theKernel = OSKext::lookupKextWithIdentifier(kOSKextKernelIdentifier);
-    if (!theKernel) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogArchiveFlag,
-            "Can't find kernel kext object in prelinked kernel.");
-        goto finish;
-    }
-
-    prelinkData = (void *) kernelLinkStateSection->addr;
-    prelinkLength = kernelLinkStateSection->size;
-
-    mem_result = kmem_alloc_pageable(kernel_map,
-        (vm_offset_t *) &prelinkCopy, prelinkLength);
-    if (mem_result != KERN_SUCCESS) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogGeneralFlag | kOSKextLogArchiveFlag,
-            "Can't copy prelinked kernel link state.");
-        goto finish;
-    }
-    memcpy(prelinkCopy, prelinkData, prelinkLength);
-
-    theKernel->linkState = OSData::withBytesNoCopy(prelinkCopy, prelinkLength);
-    if (!theKernel->linkState) {
-        OSKextLog(/* kext */ NULL,
-            kOSKextLogErrorLevel |
-            kOSKextLogGeneralFlag | kOSKextLogArchiveFlag,
-            "Can't create prelinked kernel link state wrapper.");
-        goto finish;
-    }
-    theKernel->linkState->setDeallocFunction(osdata_kmem_free);
-#endif
-
     prelinkTextSegment = getsegbyname(kPrelinkTextSegment);
     if (!prelinkTextSegment) {
         OSKextLog(/* kext */ NULL,
@@ -318,7 +264,9 @@ KLDBootstrap::readPrelinkedExtensions(
     prelinkData = (void *) prelinkTextSegment->vmaddr;
     prelinkLength = prelinkTextSegment->vmsize;
 
-#if !__LP64__
+#if !__LP64__ && !__arm__
+    /* XXX: arm's pmap implementation doesn't seem to let us do this */
+
     /* To enable paging and write/execute protections on the kext
      * executables, we need to copy them out of the booter-created
      * memory, reallocate that space with VM, then prelinkCopy them back in.
@@ -375,7 +323,7 @@ KLDBootstrap::readPrelinkedExtensions(
     memcpy(prelinkData, prelinkCopy, prelinkLength);
 
     kmem_free(kernel_map, (vm_offset_t)prelinkCopy, prelinkLength);
-#endif /* !__LP64__ */
+#endif /* !__LP64__ && !__arm__*/
 
    /* Unserialize the info dictionary from the prelink info section.
     */
@@ -425,21 +373,6 @@ KLDBootstrap::readPrelinkedExtensions(
         OSSafeReleaseNULL(newKext);
     }
     
-    /* Get all of the personalities for kexts that were not prelinked and
-     * add them to the catalogue.
-     */
-    personalitiesArray = OSDynamicCast(OSArray,
-        prelinkInfoDict->getObject(kPrelinkPersonalitiesKey));
-    if (!personalitiesArray) {
-        OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogArchiveFlag,
-            "The prelinked kernel has no personalities array");
-        goto finish;
-    }
-
-    if (personalitiesArray->getCount()) {
-        gIOCatalogue->addDrivers(personalitiesArray);
-    }
-
    /* Store the number of prelinked kexts in the registry so we can tell
     * when the system has been started from a prelinked kernel.
     */
@@ -454,21 +387,12 @@ KLDBootstrap::readPrelinkedExtensions(
         registryRoot->setProperty(kOSPrelinkKextCountKey, prelinkCountObj);
     }
 
-    OSSafeReleaseNULL(prelinkCountObj);
-    prelinkCountObj = OSNumber::withNumber(
-        (unsigned long long)personalitiesArray->getCount(),
-        8 * sizeof(uint32_t));
-    assert(prelinkCountObj);
-    if (prelinkCountObj) {
-        registryRoot->setProperty(kOSPrelinkPersonalityCountKey, prelinkCountObj);
-    }
-
     OSKextLog(/* kext */ NULL,
         kOSKextLogProgressLevel |
         kOSKextLogGeneralFlag | kOSKextLogKextBookkeepingFlag |
         kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag,
-        "%u prelinked kexts, and %u additional personalities.", 
-        infoDictArray->getCount(), personalitiesArray->getCount());
+        "%u prelinked kexts", 
+        infoDictArray->getCount());
 
 #if __LP64__
         /* On LP64 systems, kexts are copied to their own special VM region
@@ -477,14 +401,6 @@ KLDBootstrap::readPrelinkedExtensions(
         ml_static_mfree((vm_offset_t) prelinkData, prelinkLength);
 #endif /* __LP64__ */
 
-   /* Free the link state segment, kexts have copied out what they need.
-    */
-    prelinkLinkStateSegment = getsegbyname(kPrelinkLinkStateSegment);
-    if (prelinkLinkStateSegment) {
-        ml_static_mfree((vm_offset_t)prelinkLinkStateSegment->vmaddr,
-            (vm_size_t)prelinkLinkStateSegment->vmsize);
-    }
-
    /* Free the prelink info segment, we're done with it.
     */
     prelinkInfoSegment = getsegbyname(kPrelinkInfoSegment);
@@ -946,3 +862,4 @@ static void bootstrapLoadSecurityExtensions(void)
     sBootstrapObject.loadSecurityExtensions();
     return;
 }
+
diff --git a/libsa/conf/MASTER b/libsa/conf/MASTER
index c2690d207..99865aa3e 100644
--- a/libsa/conf/MASTER
+++ b/libsa/conf/MASTER
@@ -64,4 +64,3 @@ options		CONFIG_NO_PRINTF_STRINGS			# <no_printf_str>
 options		CONFIG_NO_KPRINTF_STRINGS			# <no_kprintf_str>
 
 options		CONFIG_KXLD		# kxld/runtime linking of kexts # <config_kxld>
-
diff --git a/libsa/conf/MASTER.i386 b/libsa/conf/MASTER.i386
index 66fe402b0..448133126 100644
--- a/libsa/conf/MASTER.i386
+++ b/libsa/conf/MASTER.i386
@@ -4,7 +4,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach libkerncpp ]
 #  DEVELOPMENT	= [ EMBEDDED config_dtrace ]
 #
diff --git a/libsa/conf/MASTER.ppc b/libsa/conf/MASTER.ppc
deleted file mode 100644
index 5119c4062..000000000
--- a/libsa/conf/MASTER.ppc
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-######################################################################
-#  
-#  Standard Apple MacOS X Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE		= [ppc mach libkerncpp config_dtrace config_kxld ]
-#  DEVELOPMENT	= [ RELEASE ]
-#  PROFILE = [RELEASE profile]
-#  DEBUG = [ppc mach libkerncpp debug]
-#  RELEASE_TRACE = [ RELEASE kdebug ]
-#  DEBUG_TRACE   = [ DEBUG kdebug ]
-#
-######################################################################
-
-machine		"ppc"					# <ppc>
-cpu		"ppc"					# <ppc>
-
diff --git a/libsa/conf/MASTER.x86_64 b/libsa/conf/MASTER.x86_64
index 68218d47d..89c745125 100644
--- a/libsa/conf/MASTER.x86_64
+++ b/libsa/conf/MASTER.x86_64
@@ -4,7 +4,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach libkerncpp ]
 #  DEVELOPMENT	= [ EMBEDDED ]
 #
diff --git a/libsa/conf/Makefile b/libsa/conf/Makefile
index f2daf7618..b463b2528 100644
--- a/libsa/conf/Makefile
+++ b/libsa/conf/Makefile
@@ -7,8 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -24,30 +23,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(LIBSA_KERNEL_CONFIG) $(LIBSA_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(LIBSA_KERNEL_CONFIG) $(LIBSA_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(LIBSA_KERNEL_CONFIG)/Makefile	\
diff --git a/libsa/conf/Makefile.i386 b/libsa/conf/Makefile.i386
index 3695a666c..b89fdd145 100644
--- a/libsa/conf/Makefile.i386
+++ b/libsa/conf/Makefile.i386
@@ -2,6 +2,7 @@
 #BEGIN	Machine dependent Makefile fragment for i386
 ######################################################################
 
+
 ######################################################################
 #END	Machine dependent Makefile fragment for i386
 ######################################################################
diff --git a/libsa/conf/Makefile.ppc b/libsa/conf/Makefile.ppc
deleted file mode 100644
index cd79f229a..000000000
--- a/libsa/conf/Makefile.ppc
+++ /dev/null
@@ -1,7 +0,0 @@
-######################################################################
-#BEGIN	Machine dependent Makefile fragment for ppc
-######################################################################
-
-######################################################################
-#END	Machine dependent Makefile fragment for ppc
-######################################################################
diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template
index a975da2a5..26aede6b2 100644
--- a/libsa/conf/Makefile.template
+++ b/libsa/conf/Makefile.template
@@ -27,8 +27,8 @@ include $(MakeInc_def)
 #
 # CFLAGS
 #
-CFLAGS+= -imacros meta_features.h -DLIBSA_KERNEL_PRIVATE \
-         -Werror $(CFLAGS_INLINE_CONFIG)
+CFLAGS+= -include meta_features.h -DLIBSA_KERNEL_PRIVATE \
+         $(CFLAGS_INLINE_CONFIG)
 
 #
 # Directories for mig generated files
@@ -74,23 +74,26 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
-	$(_v)for kld_file in ${LDOBJS}; do      \
+$(COMPONENT).filelist: $(LDOBJS)
+	$(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \
+	for kld_file in ${LDOBJS}; do      \
 		$(SEG_HACK) __KLD $${kld_file} -o $${kld_file}__; \
 		mv $${kld_file}__ $${kld_file} ; \
-	done;
+	done; \
+	fi
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`
 	
 do_build_all: do_depend 
 
+
 %RULES
 
 include $(MakeInc_rule)
diff --git a/libsa/conf/Makefile.x86_64 b/libsa/conf/Makefile.x86_64
index 7b0de925d..d7024f6c7 100644
--- a/libsa/conf/Makefile.x86_64
+++ b/libsa/conf/Makefile.x86_64
@@ -2,6 +2,7 @@
 #BEGIN	Machine dependent Makefile fragment for x86_64
 ######################################################################
 
+
 ######################################################################
 #END	Machine dependent Makefile fragment for x86_64
 ######################################################################
diff --git a/libsa/conf/files.ppc b/libsa/conf/files.ppc
deleted file mode 100644
index 8b1378917..000000000
--- a/libsa/conf/files.ppc
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/libsa/conf/tools/Makefile b/libsa/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/libsa/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/libsa/conf/tools/doconf/Makefile b/libsa/conf/tools/doconf/Makefile
deleted file mode 100644
index aa55a9419..000000000
--- a/libsa/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/libsa/conf/tools/doconf/doconf.csh b/libsa/conf/tools/doconf/doconf.csh
deleted file mode 100755
index 6fedb4786..000000000
--- a/libsa/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/libsa/lastkernelconstructor.c b/libsa/lastkernelconstructor.c
index 97980f080..5b62f3fe6 100644
--- a/libsa/lastkernelconstructor.c
+++ b/libsa/lastkernelconstructor.c
@@ -26,10 +26,10 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-static void last_kernel_constructor(void) __attribute__ ((constructor,section("__TEXT, initcode")));
-
 extern void iokit_post_constructor_init(void);
 
+static void last_kernel_constructor(void) __attribute__ ((constructor,section("__TEXT, initcode")));
+
 static void last_kernel_constructor(void)
 {
     iokit_post_constructor_init();
diff --git a/libsa/libsa/Makefile b/libsa/libsa/Makefile
index c96349565..4554d46ba 100644
--- a/libsa/libsa/Makefile
+++ b/libsa/libsa/Makefile
@@ -8,13 +8,11 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = 
-INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS}
 INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS}
 
 EXPINC_SUBDIRS = 
-EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS}
 EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS}
diff --git a/libsyscall/BSDmakefile b/libsyscall/BSDmakefile
deleted file mode 100644
index 57eda28ba..000000000
--- a/libsyscall/BSDmakefile
+++ /dev/null
@@ -1,141 +0,0 @@
-.ifndef DSTROOT
-DSTROOT != x=`pwd`/DSTROOT && mkdir -p $$x && echo $$x
-.endif
-.ifndef OBJROOT
-OBJROOT != x=`pwd`/OBJROOT && mkdir -p $$x && echo $$x
-.endif
-.ifndef SRCROOT
-SRCROOT != dirname `pwd`
-.endif
-.ifndef SYMROOT
-SYMROOT != x=`pwd`/SYMROOT && mkdir -p $$x && echo $$x
-.endif
-ARCH != arch
-.ifndef RC_ARCHS
-RC_ARCHS = $(ARCH)
-RC_$(RC_ARCHS) = 1
-.endif
-SDKROOT ?= /
-NARCHS != echo $(RC_ARCHS) | wc -w
-LIBSYS = $(SDKROOT)/usr/local/lib/system
-NJOBS != perl -e '$$n = `/usr/sbin/sysctl -n hw.ncpu`; printf "%d\n", $$n < 2 ? 2 : ($$n * 1.5)'
-BSDMAKE = bsdmake -f Makefile
-BSDMAKEJ = $(BSDMAKE) -j $(NJOBS)
-
-# This variables are to guarantee that the left-hand side of an expression is
-# always a variable
-dynamic = dynamic
-static = static
-
-# Remove the arch stuff, since we know better here.
-LOCAL_CFLAGS != echo $(RC_CFLAGS) | sed 's/ *-arch [^ ][^ ]*//g'
-
-FORMS := debug dynamic profile static
-
-all: setup build
-
-build: build-debug build-dynamic build-profile build-static
-
-# These are the non B&I defaults
-.ifndef RC_ProjectName
-install: installhdrs install-all
-
-.else # RC_ProjectName
-
-install: setup
-.for F in $(FORMS)
-install: BI-install-$(F)
-.endfor # FORMS
-install:
-	install -c -m 444 $(OBJROOT)/sys/libsyscall.list $(DSTROOT)/usr/local/lib/system
-.endif # RC_ProjectName
-
-.for F in $(FORMS)
-.if $(dynamic) == $(F)
-SUFFIX$(F) =
-.else
-SUFFIX$(F) = _$(F)
-.endif
-LIPOARGS$(F) != perl -e 'printf "%s\n", join(" ", map(qq(-arch $$_ \"$(OBJROOT)/obj.$$_/libsyscall$(SUFFIX$(F)).a\"), qw($(RC_ARCHS))))'
-
-.for A in $(RC_ARCHS)
-build-$(F): build-$(A)-$(F)
-.endfor # RC_ARCHS
-build-$(F):
-	mkdir -p $(SYMROOT)
-.if $(NARCHS) == 1
-	cp -p "$(OBJROOT)/obj.$(RC_ARCHS)/libsyscall$(SUFFIX$(F)).a" "$(SYMROOT)"
-.else
-	xcrun -sdk $(SDKROOT) lipo -create $(LIPOARGS$(F)) -output $(SYMROOT)/libsyscall$(SUFFIX$(F)).a
-.endif
-
-.for A in $(RC_ARCHS)
-build-$(A)-$(F):
-	mkdir -p $(OBJROOT)/obj.$(A) && \
-	MAKEOBJDIR="$(OBJROOT)/obj.$(A)" MACHINE_ARCH="$(A)" \
-		DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \
-		MAKEFLAGS="" MIGDEFINES="" CFLAGS="-arch $(A) $(LOCAL_CFLAGS)" $(BSDMAKEJ) libsyscall$(SUFFIX$(F)).a
-.endfor # RC_ARCHS
-.endfor # FORMS
-
-installhdrs:
-	MAKEOBJDIR="$(OBJROOT)" DESTDIR="$(DSTROOT)" MAKEFLAGS="" \
-		DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \
-		MIGDEFINES="-DLIBSYSCALL_INTERFACE=1" \
-		$(BSDMAKE) installhdrs
-.for A in $(RC_ARCHS)
-	mkdir -p "$(OBJROOT)/obj.$(A)" && \
-	MAKEOBJDIR="$(OBJROOT)/obj.$(A)" MACHINE_ARCH="$(A)" \
-		DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \
-		MAKEFLAGS="" MIGDEFINES="" $(BSDMAKE) installhdrs-md
-.endfor # RC_ARCHS
-
-.for F in $(FORMS)
-BI-install-$(F): build-$(F)
-	mkdir -p $(DSTROOT)/usr/local/lib/system
-	if [ -f "$(SYMROOT)/libsyscall$(SUFFIX$(F)).a" ]; then \
-		echo "Installing libsyscall$(SUFFIX$(F)).a" && \
-		install -c -m 644 "$(SYMROOT)/libsyscall$(SUFFIX$(F)).a" \
-			$(DSTROOT)/usr/local/lib/system && \
-		ranlib "$(DSTROOT)/usr/local/lib/system/libsyscall$(SUFFIX$(F)).a"; \
-		chmod 444 "$(DSTROOT)/usr/local/lib/system/libsyscall$(SUFFIX$(F)).a"; \
-	fi
-.endfor # FORMS
-
-install-man:
-	mkdir -p $(DSTROOT)/usr/share/man/man2
-	MAKEOBJDIR="$(OBJROOT)" DESTDIR="$(DSTROOT)" \
-		DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \
-		MACHINE_ARCH="$(ARCH)" MAKEFLAGS="" $(BSDMAKE) all-man maninstall
-
-install-all: setup build install-man
-.for F in $(FORMS)
-install-all: BI-install-$(F)
-.endfor # FORMS
-
-clean:
-.for F in $(FORMS)
-	rm -f $(OBJROOT)/libsyscall$(SUFFIX$(F)).a
-.endfor # FORMS
-.for A in $(RC_ARCHS)
-	rm -rf $(OBJROOT)/obj.$(A)
-.endfor # RC_ARCHS
-
-INCLUDEDIR = $(OBJROOT)/include
-SYSDIR = $(OBJROOT)/sys
-
-setup: $(INCLUDEDIR) $(SYSDIR)
-
-USR-INCLUDE = /usr/include
-MOD-HEADERS = architecture/ppc/mode_independent_asm.h architecture/i386/asm_help.h
-
-$(INCLUDEDIR):
-	mkdir -p $(INCLUDEDIR)
-.for h in $(MOD-HEADERS)
-	mkdir -p $(INCLUDEDIR)/$(h:H)
-	sed 's/\.globl/.private_extern/g' $(USR-INCLUDE)/$(h) > $(INCLUDEDIR)/$(h)
-.endfor # MOD-HEADERS
-
-$(SYSDIR):
-	mkdir -p $(SYSDIR)
-	$(SRCROOT)/libsyscall/create-syscalls.pl $(SRCROOT)/bsd/kern/syscalls.master $(SRCROOT)/libsyscall/custom $(SYSDIR)
diff --git a/libsyscall/GNUmakefile b/libsyscall/GNUmakefile
deleted file mode 100644
index 6965e8628..000000000
--- a/libsyscall/GNUmakefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# This GNUmakefile is only used when running "make" by-hand; it is not
-# used by buildit or XBS
-
-all:
-	@bsdmake
-
-.DEFAULT:
-	@bsdmake $@
diff --git a/libsyscall/Libsyscall.xcconfig b/libsyscall/Libsyscall.xcconfig
new file mode 100644
index 000000000..8881d5028
--- /dev/null
+++ b/libsyscall/Libsyscall.xcconfig
@@ -0,0 +1,31 @@
+#include "<DEVELOPER_DIR>/Makefiles/CoreOS/Xcode/BSD.xcconfig"
+BUILD_VARIANTS = normal
+ONLY_ACTIVE_ARCH = NO
+DEBUG_INFORMATION_FORMAT = dwarf-with-dsym
+INSTALL_PATH = /usr/lib/system
+INSTALL_PATH[sdk=iphoneos*] = /usr/lib/system
+INSTALL_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/lib/system
+INSTALL_PATH[sdk=macosx*] = /usr/lib/system
+PUBLIC_HEADERS_FOLDER_PATH = /usr/include/mach
+PUBLIC_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/include/mach
+PUBLIC_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/include/mach
+PUBLIC_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/include/mach
+EXECUTABLE_PREFIX = libsystem_
+PRODUCT_NAME = kernel
+ALWAYS_SEARCH_USER_PATHS = NO
+OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-protector -pipe -DLIBSYSCALL_INTERFACE -D__DARWIN_VERS_1050=1
+OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050
+OTHER_CFLAGS[sdk=macosx*][arch=x86_64] = $(inherited) -DNO_SYSCALL_LEGACY
+OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY
+GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0
+HEADER_SEARCH_PATHS = /System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/wrappers
+WARNING_CFLAGS = -Wmost
+GCC_TREAT_WARNINGS_AS_ERRORS = YES
+GCC_WARN_ABOUT_MISSING_NEWLINE = YES
+CODE_SIGN_IDENTITY = -
+DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion)
+OTHER_LDFLAGS = 
+INSTALLHDRS_SCRIPT_PHASE = YES
+USE_HEADERMAP = NO
+LINK_WITH_STANDARD_LIBRARIES = NO
+ALWAYS_SEARCH_USER_PATHS = YES
diff --git a/libsyscall/Libsyscall.xcodeproj/project.pbxproj b/libsyscall/Libsyscall.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..6310cd437
--- /dev/null
+++ b/libsyscall/Libsyscall.xcodeproj/project.pbxproj
@@ -0,0 +1,1029 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 45;
+	objects = {
+
+/* Begin PBXAggregateTarget section */
+		24614EF311E7C98600E78584 /* Syscalls */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 24614EFD11E7C9B900E78584 /* Build configuration list for PBXAggregateTarget "Syscalls" */;
+			buildPhases = (
+				24614EF211E7C98600E78584 /* Generate Syscalls */,
+				24614EF611E7C9A000E78584 /* Compile Syscalls */,
+			);
+			dependencies = (
+			);
+			name = Syscalls;
+			productName = Syscalls;
+		};
+		249C61101194755D00ED73F3 /* Build */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 249C61191194756B00ED73F3 /* Build configuration list for PBXAggregateTarget "Build" */;
+			buildPhases = (
+			);
+			dependencies = (
+				249C61151194756A00ED73F3 /* PBXTargetDependency */,
+			);
+			name = Build;
+			productName = Build;
+		};
+/* End PBXAggregateTarget section */
+
+/* Begin PBXBuildFile section */
+		240BAC4C1214770F000A1719 /* memcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B028D511FF4FBB00CA64A9 /* memcpy.c */; };
+		2419382B12135FF6003CDE41 /* chmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 2419382A12135FF6003CDE41 /* chmod.c */; };
+		242AB66611EBDC1200107336 /* errno.c in Sources */ = {isa = PBXBuildFile; fileRef = 242AB66511EBDC1200107336 /* errno.c */; };
+		24484A7511F6178E00E10CD2 /* string.c in Sources */ = {isa = PBXBuildFile; fileRef = 24484A7411F51E9800E10CD2 /* string.c */; };
+		24484A9411F61D2B00E10CD2 /* mig_reply_port.c in Sources */ = {isa = PBXBuildFile; fileRef = 24484A9311F61D1900E10CD2 /* mig_reply_port.c */; };
+		24614F0411E7CB5B00E78584 /* syscalls.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 24614F0311E7CB5B00E78584 /* syscalls.a */; };
+		247A08C211F8BDC900E4693F /* _libkernel_init.c in Sources */ = {isa = PBXBuildFile; fileRef = 247A08B311F8B05900E4693F /* _libkernel_init.c */; };
+		247A090011F8E18000E4693F /* abort.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A08FF11F8E18000E4693F /* abort.h */; };
+		247A091711F8E7A800E4693F /* exc_catcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A091611F8E7A800E4693F /* exc_catcher.h */; };
+		2485235511582D8F0051B413 /* mach_legacy.c in Sources */ = {isa = PBXBuildFile; fileRef = 2485235411582D8F0051B413 /* mach_legacy.c */; };
+		248AA963122C7B2A0085F5B1 /* unlink.c in Sources */ = {isa = PBXBuildFile; fileRef = 248AA962122C7B2A0085F5B1 /* unlink.c */; };
+		248AA965122C7C330085F5B1 /* rmdir.c in Sources */ = {isa = PBXBuildFile; fileRef = 248AA964122C7C330085F5B1 /* rmdir.c */; };
+		248AA967122C7CDA0085F5B1 /* rename.c in Sources */ = {isa = PBXBuildFile; fileRef = 248AA966122C7CDA0085F5B1 /* rename.c */; };
+		248BA01D121C56BF008C073F /* connect.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA01C121C56BF008C073F /* connect.c */; };
+		248BA01F121C607E008C073F /* fchmod.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA01E121C607E008C073F /* fchmod.c */; };
+		248BA04F121C8F06008C073F /* fcntl.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA04E121C8F06008C073F /* fcntl.c */; };
+		248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA051121C8FE2008C073F /* fcntl-cancel.c */; };
+		248BA069121D9E27008C073F /* getrlimit.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA068121D9E27008C073F /* getrlimit.c */; };
+		248BA080121DA36B008C073F /* ioctl.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA07F121DA36B008C073F /* ioctl.c */; };
+		248BA082121DA4F3008C073F /* kill.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA081121DA4F3008C073F /* kill.c */; };
+		248BA085121DA5E4008C073F /* kill.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA084121DA5E4008C073F /* kill.c */; };
+		248BA087121DA72D008C073F /* mmap.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA086121DA72D008C073F /* mmap.c */; };
+		248BA089121DA8E0008C073F /* mprotect.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA088121DA8E0008C073F /* mprotect.c */; };
+		248BA08B121DAC86008C073F /* msync.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA08A121DAC86008C073F /* msync.c */; };
+		248BA08D121DB0E7008C073F /* munmap.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA08C121DB0E7008C073F /* munmap.c */; };
+		248BA08F121DC545008C073F /* open.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA08E121DC545008C073F /* open.c */; };
+		248BA093121DE369008C073F /* select.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA092121DE369008C073F /* select.c */; };
+		248BA095121DE565008C073F /* select-pre1050.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA094121DE565008C073F /* select-pre1050.c */; };
+		248BA0B3121DE760008C073F /* select-cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA0B2121DE760008C073F /* select-cancel.c */; };
+		248BA0BE121DE902008C073F /* select.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA0BC121DE902008C073F /* select.c */; };
+		248BA0CD121DEBEF008C073F /* setrlimit.c in Sources */ = {isa = PBXBuildFile; fileRef = 248BA0CC121DEBEF008C073F /* setrlimit.c */; };
+		249C610B1194750E00ED73F3 /* libsystem_kernel.a in Frameworks */ = {isa = PBXBuildFile; fileRef = D2AAC0630554660B00DB518D /* libsystem_kernel.a */; };
+		249C612F1194828600ED73F3 /* dylib_link.c in Sources */ = {isa = PBXBuildFile; fileRef = 249C612C1194827D00ED73F3 /* dylib_link.c */; };
+		24A7C5BC11FF8DA6007669EB /* accept.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5AE11FF8DA6007669EB /* accept.c */; };
+		24A7C5BD11FF8DA6007669EB /* bind.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5AF11FF8DA6007669EB /* bind.c */; };
+		24A7C5BF11FF8DA6007669EB /* getattrlist.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B111FF8DA6007669EB /* getattrlist.c */; };
+		24A7C5C011FF8DA6007669EB /* getpeername.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B211FF8DA6007669EB /* getpeername.c */; };
+		24A7C5C111FF8DA6007669EB /* getsockname.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B311FF8DA6007669EB /* getsockname.c */; };
+		24A7C5C211FF8DA6007669EB /* lchown.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B411FF8DA6007669EB /* lchown.c */; };
+		24A7C5C311FF8DA6007669EB /* listen.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B511FF8DA6007669EB /* listen.c */; };
+		24A7C5C411FF8DA6007669EB /* recvfrom.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B611FF8DA6007669EB /* recvfrom.c */; };
+		24A7C5C511FF8DA6007669EB /* recvmsg.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B711FF8DA6007669EB /* recvmsg.c */; };
+		24A7C5C611FF8DA6007669EB /* sendmsg.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B811FF8DA6007669EB /* sendmsg.c */; };
+		24A7C5C711FF8DA6007669EB /* sendto.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5B911FF8DA6007669EB /* sendto.c */; };
+		24A7C5C811FF8DA6007669EB /* setattrlist.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5BA11FF8DA6007669EB /* setattrlist.c */; };
+		24A7C5C911FF8DA6007669EB /* socketpair.c in Sources */ = {isa = PBXBuildFile; fileRef = 24A7C5BB11FF8DA6007669EB /* socketpair.c */; };
+		24B028F511FF5C3500CA64A9 /* _libkernel_init.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A08B211F8B05900E4693F /* _libkernel_init.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		24B223B0121DFD36007DAEDE /* sigsuspend.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B223AF121DFD36007DAEDE /* sigsuspend.c */; };
+		24B223B2121DFE6D007DAEDE /* sigsuspend-cancel.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B223B1121DFE6D007DAEDE /* sigsuspend-cancel.c */; };
+		24B223B5121DFF29007DAEDE /* sigsuspend.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B223B4121DFF29007DAEDE /* sigsuspend.c */; };
+		24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */ = {isa = PBXBuildFile; fileRef = 24B8C2611237F53900D36CC3 /* remove-counter.c */; };
+		24D1158311E671B20063D54D /* SYS.h in Headers */ = {isa = PBXBuildFile; fileRef = 24D1157411E671B20063D54D /* SYS.h */; };
+		24E4782712088267009A384D /* _libc_funcptr.c in Sources */ = {isa = PBXBuildFile; fileRef = 24E47824120881DF009A384D /* _libc_funcptr.c */; };
+		C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */; };
+		C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */; };
+		C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */; };
+		C9D9BD19114B00600000D8B9 /* clock_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC5114B00600000D8B9 /* clock_priv.defs */; };
+		C9D9BD1A114B00600000D8B9 /* clock_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC6114B00600000D8B9 /* clock_reply.defs */; };
+		C9D9BD1B114B00600000D8B9 /* clock_sleep.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC7114B00600000D8B9 /* clock_sleep.c */; };
+		C9D9BD1C114B00600000D8B9 /* clock.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC8114B00600000D8B9 /* clock.defs */; };
+		C9D9BD1D114B00600000D8B9 /* error_codes.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD0114B00600000D8B9 /* error_codes.c */; };
+		C9D9BD1E114B00600000D8B9 /* errorlib.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD1114B00600000D8B9 /* errorlib.h */; };
+		C9D9BD1F114B00600000D8B9 /* exc_catcher_state_identity.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD2114B00600000D8B9 /* exc_catcher_state_identity.c */; };
+		C9D9BD20114B00600000D8B9 /* exc_catcher_state.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD3114B00600000D8B9 /* exc_catcher_state.c */; };
+		C9D9BD21114B00600000D8B9 /* exc_catcher.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD4114B00600000D8B9 /* exc_catcher.c */; };
+		C9D9BD22114B00600000D8B9 /* exc.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD5114B00600000D8B9 /* exc.defs */; settings = {ATTRIBUTES = (Client, Server, ); }; };
+		C9D9BD23114B00600000D8B9 /* externs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD6114B00600000D8B9 /* externs.h */; };
+		C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */; };
+		C9D9BD25114B00600000D8B9 /* errorlib.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD9114B00600000D8B9 /* errorlib.h */; };
+		C9D9BD26114B00600000D8B9 /* mach.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDA114B00600000D8B9 /* mach.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDB114B00600000D8B9 /* mach_error.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDC114B00600000D8B9 /* mach_init.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDD114B00600000D8B9 /* mach_interface.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDF114B00600000D8B9 /* port_obj.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD2C114B00600000D8B9 /* sync.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE0114B00600000D8B9 /* sync.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD2D114B00600000D8B9 /* task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE1114B00600000D8B9 /* task.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD2E114B00600000D8B9 /* thread_act.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE2114B00600000D8B9 /* thread_act.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE3114B00600000D8B9 /* vm_task.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		C9D9BD30114B00600000D8B9 /* host_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE4114B00600000D8B9 /* host_priv.defs */; };
+		C9D9BD31114B00600000D8B9 /* host_security.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE5114B00600000D8B9 /* host_security.defs */; };
+		C9D9BD34114B00600000D8B9 /* ledger.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE9114B00600000D8B9 /* ledger.defs */; };
+		C9D9BD35114B00600000D8B9 /* lock_set.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEA114B00600000D8B9 /* lock_set.defs */; };
+		C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEB114B00600000D8B9 /* mach_error_string.c */; };
+		C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEC114B00600000D8B9 /* mach_error.c */; };
+		C9D9BD38114B00600000D8B9 /* mach_host.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCED114B00600000D8B9 /* mach_host.defs */; };
+		C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF0114B00600000D8B9 /* mach_init.c */; };
+		C9D9BD3C114B00600000D8B9 /* mach_msg.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF1114B00600000D8B9 /* mach_msg.c */; };
+		C9D9BD3D114B00600000D8B9 /* mach_port.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF2114B00600000D8B9 /* mach_port.defs */; };
+		C9D9BD3E114B00600000D8B9 /* mach_traps.s in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF3114B00600000D8B9 /* mach_traps.s */; };
+		C9D9BD3F114B00600000D8B9 /* mach_vm.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF4114B00600000D8B9 /* mach_vm.defs */; };
+		C9D9BD41114B00600000D8B9 /* mig_allocate.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF6114B00600000D8B9 /* mig_allocate.c */; };
+		C9D9BD42114B00600000D8B9 /* mig_deallocate.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF7114B00600000D8B9 /* mig_deallocate.c */; };
+		C9D9BD43114B00600000D8B9 /* mig_reply_setup.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF8114B00600000D8B9 /* mig_reply_setup.c */; };
+		C9D9BD44114B00600000D8B9 /* mig_strncpy.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCF9114B00600000D8B9 /* mig_strncpy.c */; };
+		C9D9BD45114B00600000D8B9 /* ms_thread_switch.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCFA114B00600000D8B9 /* ms_thread_switch.c */; };
+		C9D9BD46114B00600000D8B9 /* notify.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCFB114B00600000D8B9 /* notify.defs */; };
+		C9D9BD47114B00600000D8B9 /* panic.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCFC114B00600000D8B9 /* panic.c */; };
+		C9D9BD48114B00600000D8B9 /* port_obj.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCFD114B00600000D8B9 /* port_obj.c */; };
+		C9D9BD4C114B00600000D8B9 /* processor_set.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD03114B00600000D8B9 /* processor_set.defs */; };
+		C9D9BD4D114B00600000D8B9 /* processor.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD04114B00600000D8B9 /* processor.defs */; };
+		C9D9BD4F114B00600000D8B9 /* semaphore.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD06114B00600000D8B9 /* semaphore.c */; };
+		C9D9BD50114B00600000D8B9 /* key_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD08114B00600000D8B9 /* key_defs.h */; };
+		C9D9BD51114B00600000D8B9 /* ls_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD09114B00600000D8B9 /* ls_defs.h */; };
+		C9D9BD53114B00600000D8B9 /* netname.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD0B114B00600000D8B9 /* netname.defs */; };
+		C9D9BD54114B00600000D8B9 /* netname_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD0C114B00600000D8B9 /* netname_defs.h */; };
+		C9D9BD55114B00600000D8B9 /* nm_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD0D114B00600000D8B9 /* nm_defs.h */; };
+		C9D9BD56114B00600000D8B9 /* slot_name.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD0E114B00600000D8B9 /* slot_name.c */; };
+		C9D9BD57114B00600000D8B9 /* task.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD0F114B00600000D8B9 /* task.defs */; };
+		C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD10114B00600000D8B9 /* thread_act.defs */; };
+		C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD11114B00600000D8B9 /* vm_map.defs */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		242AB67811ED03ED00107336 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 24614EF311E7C98600E78584;
+			remoteInfo = Syscalls;
+		};
+		249C61091194750700ED73F3 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = D2AAC0620554660B00DB518D;
+			remoteInfo = "Libmach Static";
+		};
+		249C61141194756A00ED73F3 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 08FB7793FE84155DC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 249C60FE1194747600ED73F3;
+			remoteInfo = Libmach;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXFileReference section */
+		240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = "<group>"; };
+		2419382A12135FF6003CDE41 /* chmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = chmod.c; sourceTree = "<group>"; };
+		2427FA821200BCF800EF7A1F /* compat-symlinks.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = "compat-symlinks.sh"; sourceTree = "<group>"; };
+		242AB66511EBDC1200107336 /* errno.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = errno.c; sourceTree = "<group>"; };
+		24484A7311F51E9800E10CD2 /* string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string.h; sourceTree = "<group>"; };
+		24484A7411F51E9800E10CD2 /* string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = string.c; sourceTree = "<group>"; };
+		24484A9211F61D1900E10CD2 /* mig_reply_port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mig_reply_port.h; sourceTree = "<group>"; };
+		24484A9311F61D1900E10CD2 /* mig_reply_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_reply_port.c; sourceTree = "<group>"; };
+		24614EA111E7A2ED00E78584 /* compile-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "compile-syscalls.pl"; sourceTree = "<group>"; };
+		24614F0311E7CB5B00E78584 /* syscalls.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = syscalls.a; path = $BUILD_ROOT/syscalls.a; sourceTree = "<absolute>"; };
+		247A08B211F8B05900E4693F /* _libkernel_init.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = _libkernel_init.h; sourceTree = "<group>"; };
+		247A08B311F8B05900E4693F /* _libkernel_init.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libkernel_init.c; sourceTree = "<group>"; };
+		247A08FF11F8E18000E4693F /* abort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = abort.h; sourceTree = "<group>"; };
+		247A091611F8E7A800E4693F /* exc_catcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = exc_catcher.h; sourceTree = "<group>"; };
+		2485235411582D8F0051B413 /* mach_legacy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_legacy.c; sourceTree = "<group>"; };
+		248AA962122C7B2A0085F5B1 /* unlink.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = unlink.c; sourceTree = "<group>"; };
+		248AA964122C7C330085F5B1 /* rmdir.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = rmdir.c; sourceTree = "<group>"; };
+		248AA966122C7CDA0085F5B1 /* rename.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = rename.c; sourceTree = "<group>"; };
+		248BA01C121C56BF008C073F /* connect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = connect.c; sourceTree = "<group>"; };
+		248BA01E121C607E008C073F /* fchmod.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fchmod.c; sourceTree = "<group>"; };
+		248BA04B121C8EE4008C073F /* fcntl-base.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "fcntl-base.c"; sourceTree = "<group>"; };
+		248BA04E121C8F06008C073F /* fcntl.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fcntl.c; sourceTree = "<group>"; };
+		248BA051121C8FE2008C073F /* fcntl-cancel.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "fcntl-cancel.c"; sourceTree = "<group>"; };
+		248BA068121D9E27008C073F /* getrlimit.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getrlimit.c; sourceTree = "<group>"; };
+		248BA07F121DA36B008C073F /* ioctl.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ioctl.c; sourceTree = "<group>"; };
+		248BA081121DA4F3008C073F /* kill.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kill.c; sourceTree = "<group>"; };
+		248BA084121DA5E4008C073F /* kill.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = kill.c; sourceTree = "<group>"; };
+		248BA086121DA72D008C073F /* mmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mmap.c; sourceTree = "<group>"; };
+		248BA088121DA8E0008C073F /* mprotect.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mprotect.c; sourceTree = "<group>"; };
+		248BA08A121DAC86008C073F /* msync.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = msync.c; sourceTree = "<group>"; };
+		248BA08C121DB0E7008C073F /* munmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = munmap.c; sourceTree = "<group>"; };
+		248BA08E121DC545008C073F /* open.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open.c; sourceTree = "<group>"; };
+		248BA090121DDD7F008C073F /* select-base.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "select-base.c"; sourceTree = "<group>"; };
+		248BA092121DE369008C073F /* select.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = select.c; sourceTree = "<group>"; };
+		248BA094121DE565008C073F /* select-pre1050.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "select-pre1050.c"; sourceTree = "<group>"; };
+		248BA0B2121DE760008C073F /* select-cancel.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "select-cancel.c"; sourceTree = "<group>"; };
+		248BA0BC121DE902008C073F /* select.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = select.c; sourceTree = "<group>"; };
+		248BA0CC121DEBEF008C073F /* setrlimit.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = setrlimit.c; sourceTree = "<group>"; };
+		249C60FF1194747600ED73F3 /* libsystem_kernel.dylib */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.dylib"; includeInIndex = 0; path = libsystem_kernel.dylib; sourceTree = BUILT_PRODUCTS_DIR; };
+		249C612C1194827D00ED73F3 /* dylib_link.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dylib_link.c; sourceTree = "<group>"; };
+		24A7C5AE11FF8DA6007669EB /* accept.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = accept.c; sourceTree = "<group>"; };
+		24A7C5AF11FF8DA6007669EB /* bind.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = bind.c; sourceTree = "<group>"; };
+		24A7C5B111FF8DA6007669EB /* getattrlist.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getattrlist.c; sourceTree = "<group>"; };
+		24A7C5B211FF8DA6007669EB /* getpeername.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getpeername.c; sourceTree = "<group>"; };
+		24A7C5B311FF8DA6007669EB /* getsockname.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getsockname.c; sourceTree = "<group>"; };
+		24A7C5B411FF8DA6007669EB /* lchown.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = lchown.c; sourceTree = "<group>"; };
+		24A7C5B511FF8DA6007669EB /* listen.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = listen.c; sourceTree = "<group>"; };
+		24A7C5B611FF8DA6007669EB /* recvfrom.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = recvfrom.c; sourceTree = "<group>"; };
+		24A7C5B711FF8DA6007669EB /* recvmsg.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = recvmsg.c; sourceTree = "<group>"; };
+		24A7C5B811FF8DA6007669EB /* sendmsg.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sendmsg.c; sourceTree = "<group>"; };
+		24A7C5B911FF8DA6007669EB /* sendto.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sendto.c; sourceTree = "<group>"; };
+		24A7C5BA11FF8DA6007669EB /* setattrlist.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = setattrlist.c; sourceTree = "<group>"; };
+		24A7C5BB11FF8DA6007669EB /* socketpair.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = socketpair.c; sourceTree = "<group>"; };
+		24A7C5CB11FF973C007669EB /* _errno.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = _errno.h; sourceTree = "<group>"; };
+		24B028D511FF4FBB00CA64A9 /* memcpy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = memcpy.c; sourceTree = "<group>"; };
+		24B223AF121DFD36007DAEDE /* sigsuspend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sigsuspend.c; sourceTree = "<group>"; };
+		24B223B1121DFE6D007DAEDE /* sigsuspend-cancel.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "sigsuspend-cancel.c"; sourceTree = "<group>"; };
+		24B223B3121DFF12007DAEDE /* sigsuspend-base.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "sigsuspend-base.c"; sourceTree = "<group>"; };
+		24B223B4121DFF29007DAEDE /* sigsuspend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sigsuspend.c; sourceTree = "<group>"; };
+		24B8C2611237F53900D36CC3 /* remove-counter.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "remove-counter.c"; sourceTree = "<group>"; };
+		24D1156611E671B20063D54D /* __fork.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __fork.s; sourceTree = "<group>"; };
+		24D1156711E671B20063D54D /* __getpid.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __getpid.s; sourceTree = "<group>"; };
+		24D1156811E671B20063D54D /* __gettimeofday.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __gettimeofday.s; sourceTree = "<group>"; };
+		24D1156911E671B20063D54D /* __lseek.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __lseek.s; sourceTree = "<group>"; };
+		24D1156A11E671B20063D54D /* __pipe.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __pipe.s; sourceTree = "<group>"; };
+		24D1156B11E671B20063D54D /* __psynch_cvbroad.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __psynch_cvbroad.s; sourceTree = "<group>"; };
+		24D1156C11E671B20063D54D /* __psynch_cvwait.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __psynch_cvwait.s; sourceTree = "<group>"; };
+		24D1156D11E671B20063D54D /* __ptrace.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __ptrace.s; sourceTree = "<group>"; };
+		24D1156E11E671B20063D54D /* __sigaltstack.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __sigaltstack.s; sourceTree = "<group>"; };
+		24D1156F11E671B20063D54D /* __sigreturn.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __sigreturn.s; sourceTree = "<group>"; };
+		24D1157011E671B20063D54D /* __syscall.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __syscall.s; sourceTree = "<group>"; };
+		24D1157111E671B20063D54D /* __thread_selfid.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __thread_selfid.s; sourceTree = "<group>"; };
+		24D1157211E671B20063D54D /* __vfork.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __vfork.s; sourceTree = "<group>"; };
+		24D1157311E671B20063D54D /* custom.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = custom.s; sourceTree = "<group>"; };
+		24D1157411E671B20063D54D /* SYS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SYS.h; sourceTree = "<group>"; };
+		24D1158C11E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+		24D1158F11E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+		24D1159111E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+		24D1159711E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+		24D1159811E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = "<group>"; };
+		24D1159911E6723E0063D54D /* create-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "create-syscalls.pl"; sourceTree = "<group>"; };
+		24E47824120881DF009A384D /* _libc_funcptr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libc_funcptr.c; sourceTree = "<group>"; };
+		C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = __get_cpu_capabilities.s; sourceTree = "<group>"; };
+		C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = init_cpu_capabilities.c; sourceTree = "<group>"; };
+		C9D9BCBF114B00600000D8B9 /* .open_source_exclude */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .open_source_exclude; sourceTree = "<group>"; };
+		C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = vm_map_compat.c; sourceTree = "<group>"; };
+		C9D9BCC5114B00600000D8B9 /* clock_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock_priv.defs; sourceTree = "<group>"; };
+		C9D9BCC6114B00600000D8B9 /* clock_reply.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock_reply.defs; sourceTree = "<group>"; };
+		C9D9BCC7114B00600000D8B9 /* clock_sleep.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = clock_sleep.c; sourceTree = "<group>"; };
+		C9D9BCC8114B00600000D8B9 /* clock.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock.defs; sourceTree = "<group>"; };
+		C9D9BCC9114B00600000D8B9 /* err_iokit.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_iokit.sub; sourceTree = "<group>"; };
+		C9D9BCCA114B00600000D8B9 /* err_ipc.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_ipc.sub; sourceTree = "<group>"; };
+		C9D9BCCB114B00600000D8B9 /* err_kern.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_kern.sub; sourceTree = "<group>"; };
+		C9D9BCCC114B00600000D8B9 /* err_libkern.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_libkern.sub; sourceTree = "<group>"; };
+		C9D9BCCD114B00600000D8B9 /* err_mach_ipc.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_mach_ipc.sub; sourceTree = "<group>"; };
+		C9D9BCCE114B00600000D8B9 /* err_server.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_server.sub; sourceTree = "<group>"; };
+		C9D9BCCF114B00600000D8B9 /* err_us.sub */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = err_us.sub; sourceTree = "<group>"; };
+		C9D9BCD0114B00600000D8B9 /* error_codes.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = error_codes.c; sourceTree = "<group>"; };
+		C9D9BCD1114B00600000D8B9 /* errorlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = errorlib.h; sourceTree = "<group>"; };
+		C9D9BCD2114B00600000D8B9 /* exc_catcher_state_identity.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = exc_catcher_state_identity.c; sourceTree = "<group>"; };
+		C9D9BCD3114B00600000D8B9 /* exc_catcher_state.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = exc_catcher_state.c; sourceTree = "<group>"; };
+		C9D9BCD4114B00600000D8B9 /* exc_catcher.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = exc_catcher.c; sourceTree = "<group>"; };
+		C9D9BCD5114B00600000D8B9 /* exc.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = exc.defs; sourceTree = "<group>"; };
+		C9D9BCD6114B00600000D8B9 /* externs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = externs.h; sourceTree = "<group>"; };
+		C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fprintf_stderr.c; sourceTree = "<group>"; };
+		C9D9BCD9114B00600000D8B9 /* errorlib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = errorlib.h; sourceTree = "<group>"; };
+		C9D9BCDA114B00600000D8B9 /* mach.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach.h; sourceTree = "<group>"; };
+		C9D9BCDB114B00600000D8B9 /* mach_error.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_error.h; sourceTree = "<group>"; };
+		C9D9BCDC114B00600000D8B9 /* mach_init.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_init.h; sourceTree = "<group>"; };
+		C9D9BCDD114B00600000D8B9 /* mach_interface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_interface.h; sourceTree = "<group>"; };
+		C9D9BCDF114B00600000D8B9 /* port_obj.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port_obj.h; sourceTree = "<group>"; };
+		C9D9BCE0114B00600000D8B9 /* sync.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sync.h; sourceTree = "<group>"; };
+		C9D9BCE1114B00600000D8B9 /* task.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = task.h; sourceTree = "<group>"; };
+		C9D9BCE2114B00600000D8B9 /* thread_act.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_act.h; sourceTree = "<group>"; };
+		C9D9BCE3114B00600000D8B9 /* vm_task.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vm_task.h; sourceTree = "<group>"; };
+		C9D9BCE4114B00600000D8B9 /* host_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_priv.defs; sourceTree = "<group>"; };
+		C9D9BCE5114B00600000D8B9 /* host_security.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_security.defs; sourceTree = "<group>"; };
+		C9D9BCE9114B00600000D8B9 /* ledger.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = ledger.defs; sourceTree = "<group>"; };
+		C9D9BCEA114B00600000D8B9 /* lock_set.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = lock_set.defs; sourceTree = "<group>"; };
+		C9D9BCEB114B00600000D8B9 /* mach_error_string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_error_string.c; sourceTree = "<group>"; };
+		C9D9BCEC114B00600000D8B9 /* mach_error.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_error.c; sourceTree = "<group>"; };
+		C9D9BCED114B00600000D8B9 /* mach_host.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = mach_host.defs; sourceTree = "<group>"; };
+		C9D9BCF0114B00600000D8B9 /* mach_init.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_init.c; sourceTree = "<group>"; };
+		C9D9BCF1114B00600000D8B9 /* mach_msg.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_msg.c; sourceTree = "<group>"; };
+		C9D9BCF2114B00600000D8B9 /* mach_port.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = mach_port.defs; sourceTree = "<group>"; };
+		C9D9BCF3114B00600000D8B9 /* mach_traps.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_traps.s; sourceTree = "<group>"; };
+		C9D9BCF4114B00600000D8B9 /* mach_vm.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = mach_vm.defs; sourceTree = "<group>"; };
+		C9D9BCF6114B00600000D8B9 /* mig_allocate.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_allocate.c; sourceTree = "<group>"; };
+		C9D9BCF7114B00600000D8B9 /* mig_deallocate.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_deallocate.c; sourceTree = "<group>"; };
+		C9D9BCF8114B00600000D8B9 /* mig_reply_setup.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_reply_setup.c; sourceTree = "<group>"; };
+		C9D9BCF9114B00600000D8B9 /* mig_strncpy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_strncpy.c; sourceTree = "<group>"; };
+		C9D9BCFA114B00600000D8B9 /* ms_thread_switch.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ms_thread_switch.c; sourceTree = "<group>"; };
+		C9D9BCFB114B00600000D8B9 /* notify.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = notify.defs; sourceTree = "<group>"; };
+		C9D9BCFC114B00600000D8B9 /* panic.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = panic.c; sourceTree = "<group>"; };
+		C9D9BCFD114B00600000D8B9 /* port_obj.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = port_obj.c; sourceTree = "<group>"; };
+		C9D9BD03114B00600000D8B9 /* processor_set.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = processor_set.defs; sourceTree = "<group>"; };
+		C9D9BD04114B00600000D8B9 /* processor.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = processor.defs; sourceTree = "<group>"; };
+		C9D9BD06114B00600000D8B9 /* semaphore.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = semaphore.c; sourceTree = "<group>"; };
+		C9D9BD08114B00600000D8B9 /* key_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = key_defs.h; sourceTree = "<group>"; };
+		C9D9BD09114B00600000D8B9 /* ls_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls_defs.h; sourceTree = "<group>"; };
+		C9D9BD0B114B00600000D8B9 /* netname.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = netname.defs; sourceTree = "<group>"; };
+		C9D9BD0C114B00600000D8B9 /* netname_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = netname_defs.h; sourceTree = "<group>"; };
+		C9D9BD0D114B00600000D8B9 /* nm_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nm_defs.h; sourceTree = "<group>"; };
+		C9D9BD0E114B00600000D8B9 /* slot_name.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = slot_name.c; sourceTree = "<group>"; };
+		C9D9BD0F114B00600000D8B9 /* task.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = task.defs; sourceTree = "<group>"; };
+		C9D9BD10114B00600000D8B9 /* thread_act.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = thread_act.defs; sourceTree = "<group>"; };
+		C9D9BD11114B00600000D8B9 /* vm_map.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = vm_map.defs; sourceTree = "<group>"; };
+		C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Libsyscall.xcconfig; sourceTree = "<group>"; };
+		D2AAC0630554660B00DB518D /* libsystem_kernel.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_kernel.a; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		249C60FD1194747600ED73F3 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				249C610B1194750E00ED73F3 /* libsystem_kernel.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D289988505E68E00004EDB86 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				24614F0411E7CB5B00E78584 /* syscalls.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* mach */ = {
+			isa = PBXGroup;
+			children = (
+				C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */,
+				24D1158911E672270063D54D /* Platforms */,
+				24D1156511E671B20063D54D /* custom */,
+				08FB7795FE84155DC02AAC07 /* mach */,
+				247A08B011F8AF1700E4693F /* wrappers */,
+				240D716611933ED300556E97 /* xcodescripts */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+			);
+			name = mach;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* mach */ = {
+			isa = PBXGroup;
+			children = (
+				C9D9BCBE114B00600000D8B9 /* arm */,
+				247A08FF11F8E18000E4693F /* abort.h */,
+				C9D9BCC5114B00600000D8B9 /* clock_priv.defs */,
+				C9D9BCC6114B00600000D8B9 /* clock_reply.defs */,
+				C9D9BCC7114B00600000D8B9 /* clock_sleep.c */,
+				C9D9BCC8114B00600000D8B9 /* clock.defs */,
+				C9D9BCC9114B00600000D8B9 /* err_iokit.sub */,
+				C9D9BCCA114B00600000D8B9 /* err_ipc.sub */,
+				C9D9BCCB114B00600000D8B9 /* err_kern.sub */,
+				C9D9BCCC114B00600000D8B9 /* err_libkern.sub */,
+				C9D9BCCD114B00600000D8B9 /* err_mach_ipc.sub */,
+				C9D9BCCE114B00600000D8B9 /* err_server.sub */,
+				C9D9BCCF114B00600000D8B9 /* err_us.sub */,
+				C9D9BCD0114B00600000D8B9 /* error_codes.c */,
+				C9D9BCD1114B00600000D8B9 /* errorlib.h */,
+				247A091611F8E7A800E4693F /* exc_catcher.h */,
+				C9D9BCD2114B00600000D8B9 /* exc_catcher_state_identity.c */,
+				C9D9BCD3114B00600000D8B9 /* exc_catcher_state.c */,
+				C9D9BCD4114B00600000D8B9 /* exc_catcher.c */,
+				C9D9BCD5114B00600000D8B9 /* exc.defs */,
+				C9D9BCD6114B00600000D8B9 /* externs.h */,
+				C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */,
+				C9D9BCD8114B00600000D8B9 /* mach */,
+				C9D9BCE4114B00600000D8B9 /* host_priv.defs */,
+				C9D9BCE5114B00600000D8B9 /* host_security.defs */,
+				C9D9BCE9114B00600000D8B9 /* ledger.defs */,
+				C9D9BCEA114B00600000D8B9 /* lock_set.defs */,
+				C9D9BCEB114B00600000D8B9 /* mach_error_string.c */,
+				C9D9BCEC114B00600000D8B9 /* mach_error.c */,
+				C9D9BCED114B00600000D8B9 /* mach_host.defs */,
+				C9D9BCF0114B00600000D8B9 /* mach_init.c */,
+				2485235411582D8F0051B413 /* mach_legacy.c */,
+				C9D9BCF1114B00600000D8B9 /* mach_msg.c */,
+				C9D9BCF2114B00600000D8B9 /* mach_port.defs */,
+				C9D9BCF3114B00600000D8B9 /* mach_traps.s */,
+				C9D9BCF4114B00600000D8B9 /* mach_vm.defs */,
+				C9D9BCF6114B00600000D8B9 /* mig_allocate.c */,
+				C9D9BCF7114B00600000D8B9 /* mig_deallocate.c */,
+				C9D9BCF8114B00600000D8B9 /* mig_reply_setup.c */,
+				C9D9BCF9114B00600000D8B9 /* mig_strncpy.c */,
+				24484A9211F61D1900E10CD2 /* mig_reply_port.h */,
+				24484A9311F61D1900E10CD2 /* mig_reply_port.c */,
+				C9D9BCFA114B00600000D8B9 /* ms_thread_switch.c */,
+				C9D9BCFB114B00600000D8B9 /* notify.defs */,
+				C9D9BCFC114B00600000D8B9 /* panic.c */,
+				C9D9BCFD114B00600000D8B9 /* port_obj.c */,
+				C9D9BD03114B00600000D8B9 /* processor_set.defs */,
+				C9D9BD04114B00600000D8B9 /* processor.defs */,
+				C9D9BD06114B00600000D8B9 /* semaphore.c */,
+				C9D9BD07114B00600000D8B9 /* servers */,
+				C9D9BD0E114B00600000D8B9 /* slot_name.c */,
+				24484A7311F51E9800E10CD2 /* string.h */,
+				24484A7411F51E9800E10CD2 /* string.c */,
+				C9D9BD0F114B00600000D8B9 /* task.defs */,
+				C9D9BD10114B00600000D8B9 /* thread_act.defs */,
+				C9D9BD11114B00600000D8B9 /* vm_map.defs */,
+				249C612C1194827D00ED73F3 /* dylib_link.c */,
+			);
+			path = mach;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				24614F0311E7CB5B00E78584 /* syscalls.a */,
+				D2AAC0630554660B00DB518D /* libsystem_kernel.a */,
+				249C60FF1194747600ED73F3 /* libsystem_kernel.dylib */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		240D716611933ED300556E97 /* xcodescripts */ = {
+			isa = PBXGroup;
+			children = (
+				24D1159911E6723E0063D54D /* create-syscalls.pl */,
+				24614EA111E7A2ED00E78584 /* compile-syscalls.pl */,
+				240D716711933ED300556E97 /* mach_install_mig.sh */,
+				2427FA821200BCF800EF7A1F /* compat-symlinks.sh */,
+			);
+			path = xcodescripts;
+			sourceTree = "<group>";
+		};
+		2419382912135FE1003CDE41 /* unix03 */ = {
+			isa = PBXGroup;
+			children = (
+				2419382A12135FF6003CDE41 /* chmod.c */,
+				248BA01E121C607E008C073F /* fchmod.c */,
+				248BA068121D9E27008C073F /* getrlimit.c */,
+				248BA086121DA72D008C073F /* mmap.c */,
+				248BA0CC121DEBEF008C073F /* setrlimit.c */,
+			);
+			path = unix03;
+			sourceTree = "<group>";
+		};
+		247A08B011F8AF1700E4693F /* wrappers */ = {
+			isa = PBXGroup;
+			children = (
+				248BA04A121C8EE4008C073F /* cancelable */,
+				2419382912135FE1003CDE41 /* unix03 */,
+				24A7C6951200AF8A007669EB /* legacy */,
+				C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */,
+				247A08B211F8B05900E4693F /* _libkernel_init.h */,
+				247A08B311F8B05900E4693F /* _libkernel_init.c */,
+				24E47824120881DF009A384D /* _libc_funcptr.c */,
+				24A7C5CB11FF973C007669EB /* _errno.h */,
+				C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */,
+				248BA07F121DA36B008C073F /* ioctl.c */,
+				248BA081121DA4F3008C073F /* kill.c */,
+				24B028D511FF4FBB00CA64A9 /* memcpy.c */,
+				24B8C2611237F53900D36CC3 /* remove-counter.c */,
+				248AA966122C7CDA0085F5B1 /* rename.c */,
+				248AA964122C7C330085F5B1 /* rmdir.c */,
+				248BA090121DDD7F008C073F /* select-base.c */,
+				24B223B3121DFF12007DAEDE /* sigsuspend-base.c */,
+				248AA962122C7B2A0085F5B1 /* unlink.c */,
+			);
+			path = wrappers;
+			sourceTree = "<group>";
+		};
+		248BA04A121C8EE4008C073F /* cancelable */ = {
+			isa = PBXGroup;
+			children = (
+				248BA04B121C8EE4008C073F /* fcntl-base.c */,
+				248BA04E121C8F06008C073F /* fcntl.c */,
+				248BA051121C8FE2008C073F /* fcntl-cancel.c */,
+				248BA0BC121DE902008C073F /* select.c */,
+				248BA0B2121DE760008C073F /* select-cancel.c */,
+				24B223AF121DFD36007DAEDE /* sigsuspend.c */,
+				24B223B1121DFE6D007DAEDE /* sigsuspend-cancel.c */,
+			);
+			path = cancelable;
+			sourceTree = "<group>";
+		};
+		24A7C6951200AF8A007669EB /* legacy */ = {
+			isa = PBXGroup;
+			children = (
+				24A7C5AE11FF8DA6007669EB /* accept.c */,
+				24A7C5AF11FF8DA6007669EB /* bind.c */,
+				248BA01C121C56BF008C073F /* connect.c */,
+				24A7C5B111FF8DA6007669EB /* getattrlist.c */,
+				24A7C5B211FF8DA6007669EB /* getpeername.c */,
+				24A7C5B311FF8DA6007669EB /* getsockname.c */,
+				24A7C5B411FF8DA6007669EB /* lchown.c */,
+				24A7C5B511FF8DA6007669EB /* listen.c */,
+				248BA084121DA5E4008C073F /* kill.c */,
+				248BA088121DA8E0008C073F /* mprotect.c */,
+				248BA08A121DAC86008C073F /* msync.c */,
+				248BA08C121DB0E7008C073F /* munmap.c */,
+				248BA08E121DC545008C073F /* open.c */,
+				24A7C5B611FF8DA6007669EB /* recvfrom.c */,
+				24A7C5B711FF8DA6007669EB /* recvmsg.c */,
+				248BA092121DE369008C073F /* select.c */,
+				248BA094121DE565008C073F /* select-pre1050.c */,
+				24A7C5B811FF8DA6007669EB /* sendmsg.c */,
+				24A7C5B911FF8DA6007669EB /* sendto.c */,
+				24A7C5BA11FF8DA6007669EB /* setattrlist.c */,
+				24A7C5BB11FF8DA6007669EB /* socketpair.c */,
+				24B223B4121DFF29007DAEDE /* sigsuspend.c */,
+			);
+			path = legacy;
+			sourceTree = "<group>";
+		};
+		24D1156511E671B20063D54D /* custom */ = {
+			isa = PBXGroup;
+			children = (
+				24D1156611E671B20063D54D /* __fork.s */,
+				24D1156711E671B20063D54D /* __getpid.s */,
+				24D1156811E671B20063D54D /* __gettimeofday.s */,
+				24D1156911E671B20063D54D /* __lseek.s */,
+				24D1156A11E671B20063D54D /* __pipe.s */,
+				24D1156B11E671B20063D54D /* __psynch_cvbroad.s */,
+				24D1156C11E671B20063D54D /* __psynch_cvwait.s */,
+				24D1156D11E671B20063D54D /* __ptrace.s */,
+				24D1156E11E671B20063D54D /* __sigaltstack.s */,
+				24D1156F11E671B20063D54D /* __sigreturn.s */,
+				24D1157011E671B20063D54D /* __syscall.s */,
+				24D1157111E671B20063D54D /* __thread_selfid.s */,
+				24D1157211E671B20063D54D /* __vfork.s */,
+				24D1157311E671B20063D54D /* custom.s */,
+				24D1157411E671B20063D54D /* SYS.h */,
+				242AB66511EBDC1200107336 /* errno.c */,
+			);
+			path = custom;
+			sourceTree = "<group>";
+		};
+		24D1158911E672270063D54D /* Platforms */ = {
+			isa = PBXGroup;
+			children = (
+				24D1158A11E672270063D54D /* iPhoneOS */,
+				24D1158D11E672270063D54D /* MacOSX */,
+				24D1159811E672270063D54D /* syscall.map */,
+			);
+			path = Platforms;
+			sourceTree = "<group>";
+		};
+		24D1158A11E672270063D54D /* iPhoneOS */ = {
+			isa = PBXGroup;
+			children = (
+				24D1158B11E672270063D54D /* arm */,
+			);
+			path = iPhoneOS;
+			sourceTree = "<group>";
+		};
+		24D1158B11E672270063D54D /* arm */ = {
+			isa = PBXGroup;
+			children = (
+				24D1158C11E672270063D54D /* syscall.map */,
+			);
+			path = arm;
+			sourceTree = "<group>";
+		};
+		24D1158D11E672270063D54D /* MacOSX */ = {
+			isa = PBXGroup;
+			children = (
+				24D1158E11E672270063D54D /* arm */,
+				24D1159011E672270063D54D /* i386 */,
+				24D1159611E672270063D54D /* x86_64 */,
+			);
+			path = MacOSX;
+			sourceTree = "<group>";
+		};
+		24D1158E11E672270063D54D /* arm */ = {
+			isa = PBXGroup;
+			children = (
+				24D1158F11E672270063D54D /* syscall.map */,
+			);
+			path = arm;
+			sourceTree = "<group>";
+		};
+		24D1159011E672270063D54D /* i386 */ = {
+			isa = PBXGroup;
+			children = (
+				24D1159111E672270063D54D /* syscall.map */,
+			);
+			path = i386;
+			sourceTree = "<group>";
+		};
+		24D1159611E672270063D54D /* x86_64 */ = {
+			isa = PBXGroup;
+			children = (
+				24D1159711E672270063D54D /* syscall.map */,
+			);
+			path = x86_64;
+			sourceTree = "<group>";
+		};
+		C9D9BCBE114B00600000D8B9 /* arm */ = {
+			isa = PBXGroup;
+			children = (
+				C9D9BCBF114B00600000D8B9 /* .open_source_exclude */,
+				C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */,
+			);
+			path = arm;
+			sourceTree = "<group>";
+		};
+		C9D9BCD8114B00600000D8B9 /* mach */ = {
+			isa = PBXGroup;
+			children = (
+				C9D9BCD9114B00600000D8B9 /* errorlib.h */,
+				C9D9BCDA114B00600000D8B9 /* mach.h */,
+				C9D9BCDB114B00600000D8B9 /* mach_error.h */,
+				C9D9BCDC114B00600000D8B9 /* mach_init.h */,
+				C9D9BCDD114B00600000D8B9 /* mach_interface.h */,
+				C9D9BCDF114B00600000D8B9 /* port_obj.h */,
+				C9D9BCE0114B00600000D8B9 /* sync.h */,
+				C9D9BCE1114B00600000D8B9 /* task.h */,
+				C9D9BCE2114B00600000D8B9 /* thread_act.h */,
+				C9D9BCE3114B00600000D8B9 /* vm_task.h */,
+			);
+			path = mach;
+			sourceTree = "<group>";
+		};
+		C9D9BD07114B00600000D8B9 /* servers */ = {
+			isa = PBXGroup;
+			children = (
+				C9D9BD08114B00600000D8B9 /* key_defs.h */,
+				C9D9BD09114B00600000D8B9 /* ls_defs.h */,
+				C9D9BD0B114B00600000D8B9 /* netname.defs */,
+				C9D9BD0C114B00600000D8B9 /* netname_defs.h */,
+				C9D9BD0D114B00600000D8B9 /* nm_defs.h */,
+			);
+			path = servers;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		D2AAC0600554660B00DB518D /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				C9D9BD1E114B00600000D8B9 /* errorlib.h in Headers */,
+				C9D9BD23114B00600000D8B9 /* externs.h in Headers */,
+				C9D9BD25114B00600000D8B9 /* errorlib.h in Headers */,
+				C9D9BD26114B00600000D8B9 /* mach.h in Headers */,
+				C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */,
+				C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */,
+				C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */,
+				C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */,
+				C9D9BD2C114B00600000D8B9 /* sync.h in Headers */,
+				C9D9BD2D114B00600000D8B9 /* task.h in Headers */,
+				C9D9BD2E114B00600000D8B9 /* thread_act.h in Headers */,
+				C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */,
+				C9D9BD50114B00600000D8B9 /* key_defs.h in Headers */,
+				C9D9BD51114B00600000D8B9 /* ls_defs.h in Headers */,
+				C9D9BD54114B00600000D8B9 /* netname_defs.h in Headers */,
+				C9D9BD55114B00600000D8B9 /* nm_defs.h in Headers */,
+				24D1158311E671B20063D54D /* SYS.h in Headers */,
+				247A090011F8E18000E4693F /* abort.h in Headers */,
+				247A091711F8E7A800E4693F /* exc_catcher.h in Headers */,
+				24B028F511FF5C3500CA64A9 /* _libkernel_init.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		249C60FE1194747600ED73F3 /* Libmach Dynamic */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libmach Dynamic" */;
+			buildPhases = (
+				249C61281194815000ED73F3 /* Sources */,
+				249C60FD1194747600ED73F3 /* Frameworks */,
+				2427FA811200BCDA00EF7A1F /* Compat Symlinks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				249C610A1194750700ED73F3 /* PBXTargetDependency */,
+			);
+			name = "Libmach Dynamic";
+			productName = Libmach;
+			productReference = 249C60FF1194747600ED73F3 /* libsystem_kernel.dylib */;
+			productType = "com.apple.product-type.library.dynamic";
+		};
+		D2AAC0620554660B00DB518D /* Libmach */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libmach" */;
+			buildPhases = (
+				D2AAC0600554660B00DB518D /* Headers */,
+				D2AAC0610554660B00DB518D /* Sources */,
+				D289988505E68E00004EDB86 /* Frameworks */,
+				2487545E11629934000975E0 /* Install Headers */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				242AB67911ED03ED00107336 /* PBXTargetDependency */,
+			);
+			name = Libmach;
+			productName = mach;
+			productReference = D2AAC0630554660B00DB518D /* libsystem_kernel.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB914E08733D8E0010E9CD /* Build configuration list for PBXProject "Libsyscall" */;
+			compatibilityVersion = "Xcode 3.1";
+			developmentRegion = English;
+			hasScannedForEncodings = 1;
+			knownRegions = (
+				English,
+				Japanese,
+				French,
+				German,
+			);
+			mainGroup = 08FB7794FE84155DC02AAC07 /* mach */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				249C61101194755D00ED73F3 /* Build */,
+				24614EF311E7C98600E78584 /* Syscalls */,
+				D2AAC0620554660B00DB518D /* Libmach */,
+				249C60FE1194747600ED73F3 /* Libmach Dynamic */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		2427FA811200BCDA00EF7A1F /* Compat Symlinks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 8;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "Compat Symlinks";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+			shellPath = /bin/sh;
+			shellScript = "\"$PROJECT_DIR\"/xcodescripts/compat-symlinks.sh";
+		};
+		24614EF211E7C98600E78584 /* Generate Syscalls */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "Generate Syscalls";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "set -x\n\nmkdir -p $OBJROOT/sys\n\n$SRCROOT/xcodescripts/create-syscalls.pl \\\n\t$SRCROOT/../bsd/kern/syscalls.master \\\n\t$SRCROOT/custom \\\n\t$SRCROOT/Platforms \\\n\t$MAP_PLATFORM \\\n\t$OBJROOT/sys\n";
+		};
+		24614EF611E7C9A000E78584 /* Compile Syscalls */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "Compile Syscalls";
+			outputPaths = (
+				"$(BUILD_ROOT)/syscalls.a",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "set -x\n\nmkdir -p $OBJROOT/UninstalledProducts\n\n$SRCROOT/xcodescripts/compile-syscalls.pl \\\n\t$OBJROOT/sys/stubs.list \\\n\t$BUILD_ROOT/syscalls.a";
+		};
+		2487545E11629934000975E0 /* Install Headers */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			name = "Install Headers";
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"$PROJECT_DIR\"/xcodescripts/mach_install_mig.sh";
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		249C61281194815000ED73F3 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				24E4782712088267009A384D /* _libc_funcptr.c in Sources */,
+				240BAC4C1214770F000A1719 /* memcpy.c in Sources */,
+				249C612F1194828600ED73F3 /* dylib_link.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		D2AAC0610554660B00DB518D /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				C9D9BD19114B00600000D8B9 /* clock_priv.defs in Sources */,
+				C9D9BD1A114B00600000D8B9 /* clock_reply.defs in Sources */,
+				C9D9BD1C114B00600000D8B9 /* clock.defs in Sources */,
+				C9D9BD22114B00600000D8B9 /* exc.defs in Sources */,
+				C9D9BD30114B00600000D8B9 /* host_priv.defs in Sources */,
+				C9D9BD31114B00600000D8B9 /* host_security.defs in Sources */,
+				C9D9BD34114B00600000D8B9 /* ledger.defs in Sources */,
+				C9D9BD35114B00600000D8B9 /* lock_set.defs in Sources */,
+				C9D9BD38114B00600000D8B9 /* mach_host.defs in Sources */,
+				C9D9BD3D114B00600000D8B9 /* mach_port.defs in Sources */,
+				C9D9BD3F114B00600000D8B9 /* mach_vm.defs in Sources */,
+				C9D9BD46114B00600000D8B9 /* notify.defs in Sources */,
+				C9D9BD4C114B00600000D8B9 /* processor_set.defs in Sources */,
+				C9D9BD4D114B00600000D8B9 /* processor.defs in Sources */,
+				C9D9BD53114B00600000D8B9 /* netname.defs in Sources */,
+				C9D9BD57114B00600000D8B9 /* task.defs in Sources */,
+				C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */,
+				C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */,
+				C9D9BD1B114B00600000D8B9 /* clock_sleep.c in Sources */,
+				C9D9BD1D114B00600000D8B9 /* error_codes.c in Sources */,
+				C9D9BD1F114B00600000D8B9 /* exc_catcher_state_identity.c in Sources */,
+				C9D9BD20114B00600000D8B9 /* exc_catcher_state.c in Sources */,
+				C9D9BD21114B00600000D8B9 /* exc_catcher.c in Sources */,
+				C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */,
+				C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */,
+				C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */,
+				C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */,
+				C9D9BD3C114B00600000D8B9 /* mach_msg.c in Sources */,
+				C9D9BD3E114B00600000D8B9 /* mach_traps.s in Sources */,
+				C9D9BD41114B00600000D8B9 /* mig_allocate.c in Sources */,
+				C9D9BD42114B00600000D8B9 /* mig_deallocate.c in Sources */,
+				C9D9BD43114B00600000D8B9 /* mig_reply_setup.c in Sources */,
+				24484A9411F61D2B00E10CD2 /* mig_reply_port.c in Sources */,
+				C9D9BD44114B00600000D8B9 /* mig_strncpy.c in Sources */,
+				C9D9BD45114B00600000D8B9 /* ms_thread_switch.c in Sources */,
+				C9D9BD47114B00600000D8B9 /* panic.c in Sources */,
+				C9D9BD48114B00600000D8B9 /* port_obj.c in Sources */,
+				C9D9BD4F114B00600000D8B9 /* semaphore.c in Sources */,
+				C9D9BD56114B00600000D8B9 /* slot_name.c in Sources */,
+				24484A7511F6178E00E10CD2 /* string.c in Sources */,
+				2485235511582D8F0051B413 /* mach_legacy.c in Sources */,
+				C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */,
+				242AB66611EBDC1200107336 /* errno.c in Sources */,
+				247A08C211F8BDC900E4693F /* _libkernel_init.c in Sources */,
+				24A7C5BC11FF8DA6007669EB /* accept.c in Sources */,
+				24A7C5BD11FF8DA6007669EB /* bind.c in Sources */,
+				24A7C5BF11FF8DA6007669EB /* getattrlist.c in Sources */,
+				24A7C5C011FF8DA6007669EB /* getpeername.c in Sources */,
+				24A7C5C111FF8DA6007669EB /* getsockname.c in Sources */,
+				24A7C5C211FF8DA6007669EB /* lchown.c in Sources */,
+				24A7C5C311FF8DA6007669EB /* listen.c in Sources */,
+				24A7C5C411FF8DA6007669EB /* recvfrom.c in Sources */,
+				24A7C5C511FF8DA6007669EB /* recvmsg.c in Sources */,
+				24A7C5C611FF8DA6007669EB /* sendmsg.c in Sources */,
+				24A7C5C711FF8DA6007669EB /* sendto.c in Sources */,
+				24A7C5C811FF8DA6007669EB /* setattrlist.c in Sources */,
+				24A7C5C911FF8DA6007669EB /* socketpair.c in Sources */,
+				2419382B12135FF6003CDE41 /* chmod.c in Sources */,
+				248BA01D121C56BF008C073F /* connect.c in Sources */,
+				248BA01F121C607E008C073F /* fchmod.c in Sources */,
+				248BA04F121C8F06008C073F /* fcntl.c in Sources */,
+				248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */,
+				248BA069121D9E27008C073F /* getrlimit.c in Sources */,
+				248BA080121DA36B008C073F /* ioctl.c in Sources */,
+				248BA082121DA4F3008C073F /* kill.c in Sources */,
+				248BA085121DA5E4008C073F /* kill.c in Sources */,
+				248BA087121DA72D008C073F /* mmap.c in Sources */,
+				248BA089121DA8E0008C073F /* mprotect.c in Sources */,
+				248BA08B121DAC86008C073F /* msync.c in Sources */,
+				248BA08D121DB0E7008C073F /* munmap.c in Sources */,
+				248BA08F121DC545008C073F /* open.c in Sources */,
+				248BA093121DE369008C073F /* select.c in Sources */,
+				248BA095121DE565008C073F /* select-pre1050.c in Sources */,
+				248BA0B3121DE760008C073F /* select-cancel.c in Sources */,
+				248BA0BE121DE902008C073F /* select.c in Sources */,
+				248BA0CD121DEBEF008C073F /* setrlimit.c in Sources */,
+				24B223B0121DFD36007DAEDE /* sigsuspend.c in Sources */,
+				24B223B2121DFE6D007DAEDE /* sigsuspend-cancel.c in Sources */,
+				24B223B5121DFF29007DAEDE /* sigsuspend.c in Sources */,
+				248AA963122C7B2A0085F5B1 /* unlink.c in Sources */,
+				248AA965122C7C330085F5B1 /* rmdir.c in Sources */,
+				248AA967122C7CDA0085F5B1 /* rename.c in Sources */,
+				24B8C2621237F53900D36CC3 /* remove-counter.c in Sources */,
+				C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */,
+				C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		242AB67911ED03ED00107336 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 24614EF311E7C98600E78584 /* Syscalls */;
+			targetProxy = 242AB67811ED03ED00107336 /* PBXContainerItemProxy */;
+		};
+		249C610A1194750700ED73F3 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = D2AAC0620554660B00DB518D /* Libmach */;
+			targetProxy = 249C61091194750700ED73F3 /* PBXContainerItemProxy */;
+		};
+		249C61151194756A00ED73F3 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 249C60FE1194747600ED73F3 /* Libmach Dynamic */;
+			targetProxy = 249C61141194756A00ED73F3 /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB914C08733D8E0010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				INSTALL_PATH = /usr/local/lib/dyld;
+				"INSTALL_PATH[sdk=iphoneos*]" = /usr/local/lib/dyld;
+				"INSTALL_PATH[sdk=iphonesimulator*]" = "$(SDKROOT)/usr/local/lib/dyld";
+				"INSTALL_PATH[sdk=macosx*]" = /usr/local/lib/dyld;
+			};
+			name = Release;
+		};
+		1DEB915008733D8E0010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+			};
+			name = Release;
+		};
+		24614EF411E7C98600E78584 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_ENABLE_FIX_AND_CONTINUE = NO;
+				MAP_PLATFORM = "$(MAP_PLATFORM_$(PLATFORM_NAME))";
+				MAP_PLATFORM_iphoneos = iPhoneOS;
+				MAP_PLATFORM_macosx = MacOSX;
+				PRODUCT_NAME = Syscalls;
+				ZERO_LINK = NO;
+			};
+			name = Release;
+		};
+		249C61001194747600ED73F3 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */;
+			buildSettings = {
+				OTHER_LDFLAGS = (
+					"-umbrella",
+					System,
+					"-all_load",
+				);
+				VERSION_INFO_PREFIX = "___";
+			};
+			name = Release;
+		};
+		249C61111194755E00ED73F3 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */;
+			buildSettings = {
+				PRODUCT_NAME = Build;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libmach" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB914C08733D8E0010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB914E08733D8E0010E9CD /* Build configuration list for PBXProject "Libsyscall" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB915008733D8E0010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		24614EFD11E7C9B900E78584 /* Build configuration list for PBXAggregateTarget "Syscalls" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				24614EF411E7C98600E78584 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libmach Dynamic" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				249C61001194747600ED73F3 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		249C61191194756B00ED73F3 /* Build configuration list for PBXAggregateTarget "Build" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				249C61111194755E00ED73F3 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/libsyscall/Makefile b/libsyscall/Makefile
deleted file mode 100644
index 33f3e99ff..000000000
--- a/libsyscall/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-#	@(#)Makefile	8.2 (Berkeley) 2/3/94
-# $FreeBSD: src/lib/libc/Makefile,v 1.31 2001/08/13 21:48:43 peter Exp $
-#
-# All library objects contain rcsid strings by default; they may be
-# excluded as a space-saving measure.  To produce a library that does
-# not contain these strings, delete -DLIBC_RCS and -DSYSLIBC_RCS
-# from CFLAGS below.  To remove these strings from just the system call
-# stubs, remove just -DSYSLIBC_RCS from CFLAGS.
-#
-# Yes, we build everything with -g, and strip it out later...
-#
-LIB=syscall
-SHLIB_MAJOR= 1
-SHLIB_MINOR= 0
-.if (${MACHINE_ARCH} == unknown)
-.ifdef RC_ARCHS
-MACHINE_ARCH != echo $(RC_ARCHS) | cut -f 1 -d " "
-.else
-MACHINE_ARCH != /usr/bin/arch
-.endif
-.endif
-.if !empty $(MACHINE_ARCH:M*64)
-LP64 = 1
-.endif
-SDKROOT ?= /
-CC = xcrun -sdk $(SDKROOT) gcc
-MIG = xcrun -sdk $(SDKROOT) mig
-MIGCC != xcrun -find -sdk $(SDKROOT) gcc
-.ifdef ALTFRAMEWORKSPATH
-PRIVINC = -F${ALTFRAMEWORKSPATH} -I${ALTFRAMEWORKSPATH}/System.framework/PrivateHeaders
-.else
-PRIVINC = -I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders
-.endif
-CFLAGS += ${PRIVINC}
-CFLAGS += -no-cpp-precomp
-CFLAGS += -fno-common -pipe -Wmost -g
-CFLAGS += -DCF_EXCLUDE_CSTD_HEADERS -DCF_OPEN_SOURCE
-CFLAGS += -isysroot ${SDKROOT}
-AINC= -no-cpp-precomp
-AINC+= -arch ${MACHINE_ARCH} -g
-MIGDEFINES ?=
-CLEANFILES+=tags
-INSTALL_PIC_ARCHIVE=	yes
-PRECIOUSLIB=	yes
-
-# workaround for 3649783
-AINC += -fdollars-in-identifiers
-
-# If these aren't set give it expected defaults
-DESTDIR ?= ${DSTROOT}
-MAKEOBJDIR ?= ${OBJROOT}
-
-# add version string
-SRCS += libsyscall_version.c
-libsyscall_version.c:
-	${SDKROOT}/Developer/Makefiles/bin/version.pl Libsyscall > $@
-
-CFLAGS += -I${SYMROOT}
-.include "${.CURDIR}/Makefile.inc"
-.PATH: ${SYMROOT}
-.include "Makefile.xbs"
-.if exists(/usr/share/mk/bsd.init.mk)
-.include <bsd.init.mk>
-.endif
-.include <bsd.man.mk>
diff --git a/libsyscall/Makefile.inc b/libsyscall/Makefile.inc
deleted file mode 100644
index 0bc95b71c..000000000
--- a/libsyscall/Makefile.inc
+++ /dev/null
@@ -1,52 +0,0 @@
-# $FreeBSD: src/lMakefile.inc,v 1.7 2001/04/04 18:17:25 tmm Exp $
-#
-# This file contains make rules that are shared by libc and libc_r.
-#
-# Define (empty) variables so that make doesn't give substitution
-# errors if the included makefiles don't change these:
-MDSRCS=
-MISRCS=
-MDASM=
-MIASM=
-NOASM=
-
-# SUPPRESSSRCS is used to prevent machine-independent files from being
-# built, when a machine-dependent file defines multiple symbols.
-# Use MDSRCS to block one file, and SUPPRESSSRCS to block the others.
-SUPPRESSSRCS=
-
-# set object file suffix
-.if make(lib${LIB}_static.a)
-OBJSUFFIX = o
-.endif
-.if make(lib${LIB}_profile.a)
-OBJSUFFIX = po
-.endif
-.if make(lib${LIB}_debug.a)
-OBJSUFFIX = do
-.endif
-.if make(lib${LIB}.a)
-OBJSUFFIX = So
-.endif
-
-.if exists(${OBJROOT}/sys/Makefile.inc)
-.include "${OBJROOT}/sys/Makefile.inc"
-.endif
-.include "${.CURDIR}/include/Makefile.inc"
-.include "${.CURDIR}/mach/Makefile.inc"
-
-# If there are no machine dependent sources, append all the
-# machine-independent sources:
-.if empty(MDSRCS)
-SRCS+=	${MISRCS}
-.else
-# Append machine-dependent sources, then append machine-independent sources
-# for which there is no machine-dependent variant, and not being suppressed.
-SRCS+=	${MDSRCS}
-_SUPPRESS= ${MDSRCS} ${SUPPRESSSRCS}
-.for _src in ${MISRCS}
-.if ${_SUPPRESS:R:M${_src:R}} == ""
-SRCS+=	${_src}
-.endif
-.endfor
-.endif
diff --git a/libsyscall/Makefile.xbs b/libsyscall/Makefile.xbs
deleted file mode 100644
index 556597fef..000000000
--- a/libsyscall/Makefile.xbs
+++ /dev/null
@@ -1,130 +0,0 @@
-BSDMAKE = bsdmake -f Makefile
-
-.PATH: .
-.MAIN: all
-all: lib${LIB}.a lib${LIB}_static.a lib${LIB}_debug.a lib${LIB}_profile.a
-install: installhdrs install_lib${LIB}.a install_lib${LIB}_static.a \
-	 install_lib${LIB}_profile.a install_lib${LIB}_debug.a maninstall
-
-.SUFFIXES:
-.SUFFIXES: .o .po .So .do
-.SUFFIXES: .S .s .c .cc .cpp .cxx .m .C
-.SUFFIXES: .defs .h
-.SUFFIXES: User.c User.o User.po User.So User.do
-.SUFFIXES: Server.c Server.o Server.po Server.So Server.do
-
-OBJS+=  ${SRCS:N*.h:R:S/$/.o/g}
-DOBJS+= ${OBJS:.o=.do}
-POBJS+= ${OBJS:.o=.po} ${STATICOBJS:.o=.po}
-SOBJS+= ${OBJS:.o=.So}
-
-#### Standard C Rules #################################################
-.c.o User.cUser.o Server.cServer.o:
-	${CC} -static ${PRECFLAGS-${.IMPSRC:T}} ${CFLAGS} \
-	    ${CFLAGS-${.IMPSRC:T}} -Os ${OPTIMIZE-${.IMPSRC:T}} \
-	    -c ${.IMPSRC} -o ${.TARGET}
-.c.po User.cUser.po Server.cServer.po:
-	${CC} -pg ${PRECFLAGS-${.IMPSRC:T}} -DPROFILE ${CFLAGS} \
-	    ${CFLAGS-${.IMPSRC:T}} -Os ${OPTIMIZE-${.IMPSRC:T}} \
-	    -c ${.IMPSRC} -o ${.TARGET}
-.c.So User.cUser.So Server.cServer.So: 
-	${CC} ${PRECFLAGS-${.IMPSRC:T}} ${CFLAGS} ${CFLAGS-${.IMPSRC:T}} \
-	    -Os ${OPTIMIZE-${.IMPSRC:T}} -c ${.IMPSRC} -o ${.TARGET}
-.c.do User.cUser.do Server.cServer.do:
-	${CC} -g ${PRECFLAGS-${.IMPSRC:T}} -DDEBUG ${CFLAGS} \
-	    ${CFLAGS-${.IMPSRC:T}} -c ${.IMPSRC} -o ${.TARGET}
-
-#### Standard Assembler Rules #########################################
-.s.o .S.o:
-	${CC} -static -x assembler-with-cpp ${PRECFLAGS-${.IMPSRC:T}} \
-	    ${AINC} ${CFLAGS:M-[BIDFU]*} ${CFLAGS-${.IMPSRC:T}:M-[BIDFU]*} \
-	    -Os ${OPTIMIZE-${.IMPSRC:T}} -c ${.IMPSRC} -o ${.TARGET}
-.s.po .S.po:
-	${CC} -pg -x assembler-with-cpp ${PRECFLAGS-${.IMPSRC:T}} -DPROFILE \
-	    ${AINC} ${CFLAGS:M-[BIDFU]*} ${CFLAGS-${.IMPSRC:T}:M-[BIDFU]*} \
-	    -Os ${OPTIMIZE-${.IMPSRC:T}} -c ${.IMPSRC} -o ${.TARGET}
-.s.So .S.So:
-	${CC} -x assembler-with-cpp ${PRECFLAGS-${.IMPSRC:T}} \
-	    ${AINC} ${CFLAGS:M-[BIDFU]*} ${CFLAGS-${.IMPSRC:T}:M-[BIDFU]*} \
-	    -Os ${OPTIMIZE-${.IMPSRC:T}} -c ${.IMPSRC} -o ${.TARGET}
-.s.do .S.do:
-	${CC} -g -x assembler-with-cpp ${PRECFLAGS-${.IMPSRC:T}} -DDEBUG \
-	    ${AINC} ${CFLAGS:M-[BIDFU]*} ${CFLAGS-${.IMPSRC:T}:M-[BIDFU]*} \
-	    -c ${.IMPSRC} -o ${.TARGET}
-
-#### mig Rules ########################################################
-.defs.h .defsUser.c .defsServer.c:
-	$(MIG) ${PRIVINC} ${MIGDEFINES} -arch ${MACHINE_ARCH} -cc ${MIGCC} -user ${.PREFIX}User.c -server ${.PREFIX}Server.c -header ${.PREFIX}.h ${.IMPSRC}
-
-gen_mig_defs: ${SRVMIGHDRS} ${MIGHDRS}
-gen_md_mig_defs: ${MD_MIGHDRS}
-
-#### Library Rules ####################################################
-lib${LIB}_static.a:: ${OBJS} ${STATICOBJS} 
-	@${ECHO} building static ${LIB} library
-	@rm -f lib${LIB}_static.a
-	@${AR} cq lib${LIB}_static.a `lorder ${OBJS} ${STATICOBJS} | tsort -q` ${ARADD}
-	${RANLIB} lib${LIB}_static.a
-
-lib${LIB}_profile.a:: ${POBJS} ${POBJS2}
-	@${ECHO} building profiled ${LIB} library
-	@rm -f lib${LIB}_profile.a
-	@${AR} cq lib${LIB}_profile.a `lorder ${POBJS} | tsort -q` ${ARADD}
-	${RANLIB} lib${LIB}_profile.a
-
-lib${LIB}_debug.a:: ${DOBJS} ${DOBJS2}
-	@${ECHO} building debug ${LIB} library
-	@rm -f lib${LIB}_debug.a
-	@${AR} cq lib${LIB}_debug.a `lorder ${DOBJS} | tsort -q` ${ARADD}
-	${RANLIB} lib${LIB}_debug.a
-
-lib${LIB}.a:: ${SOBJS} ${SOBJS2}
-	@${ECHO} building standard ${LIB} library
-	@rm -f lib${LIB}.a
-	@${AR} cq lib${LIB}.a `lorder ${SOBJS} | tsort -q` ${ARADD}
-	${RANLIB} lib${LIB}.a
-
-CLEANFILES += ${DOBJS} lib${LIB}_static.a lib${LIB}_profile.a lib${LIB}_debug.a
-
-INCDIR = ${DESTDIR}/usr/include
-LOCINCDIR = ${DESTDIR}/usr/local/include
-SYSTEMFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/System.framework
-PRIVHDRS = ${SYSTEMFRAMEWORK}/Versions/B/PrivateHeaders
-PRIVHDRSPPC = ${PRIVHDRS}/architecture/ppc
-KERNELFRAMEWORK = ${DESTDIR}/System/Library/Frameworks/Kernel.framework
-PRIVKERNELHDRS = ${KERNELFRAMEWORK}/Versions/A/PrivateHeaders
-
-ARCHDIR = ${MACHINE_ARCH:C/^armv.*$/arm/}
-
-installhdrs-md: gen_md_mig_defs
-	mkdir -p ${INCDIR}/mach/${ARCHDIR}
-	${INSTALL} -o 0 -c -m 444 ${MD_MIGHDRS} ${INCDIR}/mach/${ARCHDIR}
-	mkdir -p ${PRIVHDRSPPC}
-	${INSTALL} -c -m 444 ${PRIVHDRSPPCHDRS} ${PRIVHDRSPPC}
-
-installhdrs: gen_mig_defs
-	mkdir -p ${INCDIR}/mach
-	mkdir -p ${INCDIR}/servers
-	${INSTALL} -o 0 -c -m 444 ${MACH_INSTHDRS} ${INCDIR}/mach
-	${INSTALL} -o 0 -c -m 444 ${SRVHDRS} ${INCDIR}/servers
-	@for i in `find ${DESTDIR}/usr/include/mach ${DESTDIR}/usr/include/servers -name \*.h`; do \
-	    x=`fgrep '<mach/mig.h>' $$i | uniq -d`; \
-	    if [ -n "$$x" ]; then \
-		echo patching $$i; \
-		ed - $$i < ${SRCROOT}/libsyscall/fixdups.ed; \
-	    fi; \
-	done
-
-install_lib${LIB}_static.a:
-	${INSTALL} -c -m 444 lib${LIB}_static.a ${DESTDIR}/usr/local/lib/system/
-install_lib${LIB}_profile.a:
-	${INSTALL} -c -m 444 lib${LIB}_profile.a ${DESTDIR}/usr/local/lib/system
-install_lib${LIB}_debug.a:
-	${INSTALL} -c -m 444 lib${LIB}_debug.a ${DESTDIR}/usr/local/lib/system/
-install_lib${LIB}.a:
-	${INSTALL} -c -m 444 lib${LIB}.a ${DESTDIR}/usr/local/lib/system/
-
-clean:
-	rm -f ${OBJS} ${POBJS} ${DOBJS} ${SOBJS} ${CLEANFILES}
-	rm -f lib${LIB}.a lib${LIB}_static.a lib${LIB}_profile.a \
-		lib${LIB}_debug.a 
diff --git a/libsyscall/Platforms/MacOSX/i386/syscall.map b/libsyscall/Platforms/MacOSX/i386/syscall.map
new file mode 100644
index 000000000..bdfa11aac
--- /dev/null
+++ b/libsyscall/Platforms/MacOSX/i386/syscall.map
@@ -0,0 +1,93 @@
+_accept$NOCANCEL$UNIX2003	___accept_nocancel
+_accept$UNIX2003	___accept
+_aio_suspend	___aio_suspend_nocancel
+_aio_suspend$NOCANCEL$UNIX2003	___aio_suspend_nocancel
+_aio_suspend$UNIX2003	___aio_suspend
+_bind$UNIX2003	___bind
+_close	___close_nocancel
+_close$NOCANCEL$UNIX2003	___close_nocancel
+_close$UNIX2003	___close
+_chmod ___chmod
+_connect$NOCANCEL$UNIX2003	___connect_nocancel
+_connect$UNIX2003	___connect
+_fcntl	___fcntl_nocancel
+_fcntl$NOCANCEL$UNIX2003	___fcntl_nocancel
+_fcntl$UNIX2003	___fcntl
+_fstat$INODE64	___fstat64
+_fstatfs$INODE64	___fstatfs64
+_fsync	___fsync_nocancel
+_fsync$NOCANCEL$UNIX2003	___fsync_nocancel
+_fsync$UNIX2003	___fsync
+_getattrlist$UNIX2003	___getattrlist
+_getfsstat$INODE64	___getfsstat64
+_getpeername$UNIX2003	___getpeername
+_getsockname$UNIX2003	___getsockname
+_lchown$UNIX2003	___lchown
+_listen$UNIX2003	___listen
+_lstat$INODE64	___lstat64
+_mmap	___mmap
+_mprotect$UNIX2003	___mprotect
+_msgctl$UNIX2003	___msgctl
+_msgrcv	___msgrcv_nocancel
+_msgrcv$NOCANCEL$UNIX2003	___msgrcv_nocancel
+_msgrcv$UNIX2003	___msgrcv
+_msgsnd	___msgsnd_nocancel
+_msgsnd$NOCANCEL$UNIX2003	___msgsnd_nocancel
+_msgsnd$UNIX2003	___msgsnd
+_msync$NOCANCEL$UNIX2003	___msync_nocancel
+_msync$UNIX2003	___msync
+_munmap$UNIX2003	___munmap
+_open$NOCANCEL$UNIX2003	___open_nocancel
+_open$UNIX2003	___open
+_poll	___poll_nocancel
+_poll$NOCANCEL$UNIX2003	___poll_nocancel
+_poll$UNIX2003	___poll
+_pread	___pread_nocancel
+_pread$NOCANCEL$UNIX2003	___pread_nocancel
+_pread$UNIX2003	___pread
+_pwrite	___pwrite_nocancel
+_pwrite$NOCANCEL$UNIX2003	___pwrite_nocancel
+_pwrite$UNIX2003	___pwrite
+_read	___read_nocancel
+_read$NOCANCEL$UNIX2003	___read_nocancel
+_read$UNIX2003	___read
+_readv	___readv_nocancel
+_readv$NOCANCEL$UNIX2003	___readv_nocancel
+_readv$UNIX2003	___readv
+_recvfrom$NOCANCEL$UNIX2003	___recvfrom_nocancel
+_recvfrom$UNIX2003	___recvfrom
+_recvmsg$NOCANCEL$UNIX2003	___recvmsg_nocancel
+_recvmsg$UNIX2003	___recvmsg
+_select$DARWIN_EXTSN	___select
+_select$DARWIN_EXTSN$NOCANCEL	___select_nocancel
+_sem_wait	___sem_wait_nocancel
+_sem_wait$NOCANCEL$UNIX2003	___sem_wait_nocancel
+_sem_wait$UNIX2003	___sem_wait
+_semctl$UNIX2003	___semctl
+_sendmsg$NOCANCEL$UNIX2003	___sendmsg_nocancel
+_sendmsg$UNIX2003	___sendmsg
+_sendto$NOCANCEL$UNIX2003	___sendto_nocancel
+_sendto$UNIX2003	___sendto
+_setattrlist$UNIX2003	___setattrlist
+_setpgrp	___setpgid
+_setregid$UNIX2003	___setregid
+_setreuid$UNIX2003	___setreuid
+_shmctl$UNIX2003	___shmctl
+_socketpair$UNIX2003	___socketpair
+_stat$INODE64	___stat64
+_statfs$INODE64	___statfs64
+_waitid	___waitid_nocancel
+_waitid$NOCANCEL$UNIX2003	___waitid_nocancel
+_waitid$UNIX2003	___waitid
+_write	___write_nocancel
+_write$NOCANCEL$UNIX2003	___write_nocancel
+_write$UNIX2003	___write
+_writev	___writev_nocancel
+_writev$NOCANCEL$UNIX2003	___writev_nocancel
+_writev$UNIX2003	___writev
+
+_ioctl  ___ioctl
+_sigaltstack ___sigaltstack
+_fchmod ___fchmod
+_setrlimit ___setrlimit
+_getrlimit ___getrlimit
diff --git a/libsyscall/Platforms/MacOSX/x86_64/syscall.map b/libsyscall/Platforms/MacOSX/x86_64/syscall.map
new file mode 100644
index 000000000..b8cb6b1e1
--- /dev/null
+++ b/libsyscall/Platforms/MacOSX/x86_64/syscall.map
@@ -0,0 +1,54 @@
+_accept$NOCANCEL	___accept_nocancel
+_aio_suspend$NOCANCEL	___aio_suspend_nocancel
+_close$NOCANCEL	___close_nocancel
+_connect$NOCANCEL	___connect_nocancel
+_fstat$INODE64	___fstat64
+_fstatfs$INODE64	___fstatfs64
+_fsync$NOCANCEL	___fsync_nocancel
+_getfsstat$INODE64	___getfsstat64
+_lstat$INODE64	___lstat64
+_msgrcv$NOCANCEL	___msgrcv_nocancel
+_msgsnd$NOCANCEL	___msgsnd_nocancel
+_msync$NOCANCEL	___msync_nocancel
+_open$NOCANCEL	___open_nocancel
+_poll$NOCANCEL	___poll_nocancel
+_pread$NOCANCEL	___pread_nocancel
+_pwrite$NOCANCEL	___pwrite_nocancel
+_read$NOCANCEL	___read_nocancel
+_readv$NOCANCEL	___readv_nocancel
+_recvfrom$NOCANCEL	___recvfrom_nocancel
+_recvmsg$NOCANCEL	___recvmsg_nocancel
+_select$DARWIN_EXTSN	___select
+_select$DARWIN_EXTSN$NOCANCEL	___select_nocancel
+_sem_wait$NOCANCEL	___sem_wait_nocancel
+_sendmsg$NOCANCEL	___sendmsg_nocancel
+_sendto$NOCANCEL	___sendto_nocancel
+_stat$INODE64	___stat64
+_statfs$INODE64	___statfs64
+_waitid$NOCANCEL	___waitid_nocancel
+_write$NOCANCEL	___write_nocancel
+_writev$NOCANCEL	___writev_nocancel
+
+_accept	___accept
+_bind	___bind
+_connect	___connect
+_getattrlist	___getattrlist
+_getpeername	___getpeername
+_getsockname	___getsockname
+_lchown	___lchown
+_listen	___listen
+_mprotect	___mprotect
+_msgctl	___msgctl
+_msync	___msync
+_munmap	___munmap
+_open	___open
+_recvfrom	___recvfrom
+_recvmsg	___recvmsg
+_semctl	___semctl
+_sendmsg	___sendmsg
+_sendto	___sendto
+_setattrlist	___setattrlist
+_setregid	___setregid
+_setreuid	___setreuid
+_shmctl	___shmctl
+_socketpair	  ___socketpair
diff --git a/libsyscall/Platforms/syscall.map b/libsyscall/Platforms/syscall.map
new file mode 100644
index 000000000..3c24170a5
--- /dev/null
+++ b/libsyscall/Platforms/syscall.map
@@ -0,0 +1,16 @@
+___sandbox_me	___mac_execve
+___sandbox_mm	___mac_mount
+___sandbox_ms	___mac_syscall
+___sandbox_msp	___mac_set_proc
+__exit	___exit
+_accessx_np	___access_extended
+_getsgroups_np	___getsgroups
+_getwgroups_np	___getwgroups
+# initgroups wrapper is defined in Libinfo
+_initgroups
+_posix_madvise	___madvise
+_pthread_getugid_np	___gettid
+_pthread_setugid_np	___settid
+_setsgroups_np	___setsgroups
+_setwgroups_np	___setwgroups
+_wait4	___wait4_nocancel
diff --git a/libsyscall/create-syscalls.pl b/libsyscall/create-syscalls.pl
deleted file mode 100755
index 285a170a0..000000000
--- a/libsyscall/create-syscalls.pl
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/usr/bin/perl
-#
-# Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
-#
-# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
-# 
-# This file contains Original Code and/or Modifications of Original Code
-# as defined in and that are subject to the Apple Public Source License
-# Version 2.0 (the 'License'). You may not use this file except in
-# compliance with the License. Please obtain a copy of the License at
-# http://www.opensource.apple.com/apsl/ and read it before using this
-# file.
-# 
-# The Original Code and all software distributed under the License are
-# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
-# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
-# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
-# Please see the License for the specific language governing rights and
-# limitations under the License.
-# 
-# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
-#
-##########################################################################
-#
-# % create-syscalls.pl syscalls.master custom-directory out-directory
-#
-# This script fills the the out-directory with a Makefile.inc and *.s
-# files to create the double-underbar syscall stubs.  It reads the
-# syscall.master file to get the symbol names and number of arguments,
-# and whether Libsystem should automatically create the (non-double-underbar)
-# stubs if Libc doesn't provide a wrapper.  Which system calls will get
-# the automatic treatment is writen to the libsyscall.list file, also
-# written to the out-directory.
-#
-# The custom-directory contains:
-# 1. SYS.h - used by the automatically created *.s and custom files
-# 2. custom.s - contains architecture specific additional system calls and
-#    auxilliary routines (like cerror)
-# 3. special case double-underbar stub files - which are copied into
-#    the out-directory
-#
-# The BSDmakefile copies /usr/include/architecture/ppc/emode_independent_asm.h
-# and /usr/include/architecture/i386/asm_help.h to $(OBJDIR)/include,
-# replacing .globl with .private_extern.  These headers, along with SYS.h
-# make the double-underbar syscall stub private_extern, so that then become
-# static in the resulting libSystem.dylib.
-#
-##########################################################################
-
-use strict;
-use File::Basename ();
-use File::Copy ();
-use File::Spec;
-use IO::File;
-
-my $MyName = File::Basename::basename($0);
-
-my @CustomSrc = qw(custom.s);
-
-my @Copy = (qw(SYS.h), @CustomSrc);
-my $CustomDir;
-my %NoStub;
-my $OutDir;
-my %Stub = (
-    quota => [4, 0],	# unimplemented
-    setquota => [2, 0],	# unimplemented
-    syscall => [0, 0],	# custom/__syscall.s will be used
-);
-my $StubFile = 'libsyscall.list';
-# size in bytes of known types (only used for i386)
-my %TypeBytes = (
-    'au_asid_t'		=> 4,
-    'caddr_t'		=> 4,
-    'gid_t'		=> 4,
-    'id_t'		=> 4,
-    'idtype_t'		=> 4,
-    'int'		=> 4,
-    'int32_t'		=> 4,
-    'int64_t'		=> 8,
-    'key_t'		=> 4,
-    'long'		=> 4,
-    'mach_port_name_t'	=> 4,
-    'mode_t'		=> 4,
-    'off_t'		=> 8,
-    'pid_t'		=> 4,
-    'semun_t'		=> 4,
-    'sigset_t'		=> 4,
-    'size_t'		=> 4,
-    'socklen_t'		=> 4,
-    'ssize_t'		=> 4,
-    'u_int'		=> 4,
-    'u_long'		=> 4,
-    'uid_t'		=> 4,
-    'uint32_t'		=> 4,
-    'uint64_t'		=> 8,
-    'user_addr_t'	=> 4,
-    'user_long_t'	=> 4,
-    'user_size_t'	=> 4,
-    'user_ssize_t'	=> 4,
-    'user_ulong_t'	=> 4,
-);
-
-##########################################################################
-# Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
-# create one.  We define the macro __SYSCALL_32BIT_ARG_BYTES so that SYS.h could
-# use that to define __SYSCALL dependent on the arguments' total size.
-##########################################################################
-sub make_s {
-    my($name, $args, $bytes) = @_;
-    local $_;
-    my $pseudo = $name;
-    $pseudo = '__' . $pseudo unless $pseudo =~ /^__/;
-    my $file = $pseudo . '.s';
-    my $custom = File::Spec->join($CustomDir, $file);
-    my $path = File::Spec->join($OutDir, $file);
-    if(-f $custom) {
-	File::Copy::copy($custom, $path) || die "$MyName: copy($custom, $path): $!\n";
-	print "Copying $path\n";
-    } else {
-	my $f = IO::File->new($path, 'w');
-	die "$MyName: $path: $!\n" unless defined($f);
-	print $f "#define __SYSCALL_32BIT_ARG_BYTES $bytes\n\n";
-	print $f "#include \"SYS.h\"\n\n";
-	print $f "__SYSCALL($pseudo, $name, $args)\n";
-	print "Creating $path\n";
-    }
-    return $file;
-}
-
-sub usage {
-    die "Usage: $MyName syscalls.master custom-directory out-directory\n";
-}
-
-##########################################################################
-# Read the syscall.master file and collect the system call names and number
-# of arguments.  It looks for the NO_SYSCALL_STUB quailifier following the
-# prototype to determine if no automatic stub should be created by Libsystem.
-# System call name that are already prefixed with double-underbar are set as
-# if the NO_SYSCALL_STUB qualifier were specified (whether it is or not).
-#
-# For the #if lines in syscall.master, all macros are assumed to be defined,
-# except COMPAT_GETFSSTAT (assumed undefined).
-##########################################################################
-sub readmaster {
-    my $file = shift;
-    local $_;
-    my $f = IO::File->new($file, 'r');
-    die "$MyName: $file: $!\n" unless defined($f);
-    my $line = 0;
-    my $skip = 0;
-    while(<$f>) {
-	$line++;
-	if(/^#\s*endif/) {
-	    $skip = 0;
-	    next;
-	}
-	if(/^#\s*else/) {
-	    $skip = -$skip;
-	    next;
-	}
-	chomp;
-	if(/^#\s*if\s+(\S+)$/) {
-	    $skip = ($1 eq 'COMPAT_GETFSSTAT') ? -1 : 1;
-	    next;
-	}
-	next if $skip < 0;
-	next unless /^\d/;
-	s/^[^{]*{\s*//;
-	s/\s*}.*$//; # }
-	die "$MyName: no function prototype on line $line\n" unless length($_) > 0 && /;$/;
-	my $no_syscall_stub = /\)\s*NO_SYSCALL_STUB\s*;/;
-	my($name, $args) = /\s(\S+)\s*\(([^)]*)\)/;
-	next if $name =~ /e?nosys/;
-	$args =~ s/^\s+//;
-	$args =~ s/\s+$//;
-	my $argbytes = 0;
-	my $nargs = 0;
-	if($args ne '' && $args ne 'void') {
-	    my @a = split(',', $args);
-	    $nargs = scalar(@a);
-	    # Calculate the size of all the arguments (only used for i386)
-	    for my $type (@a) {
-		$type =~ s/\s*\w+$//; # remove the argument name
-		if($type =~ /\*$/) {
-		    $argbytes += 4; # a pointer type
-		} else {
-		    $type =~ s/^.*\s//; # remove any type qualifier, like unsigned
-		    my $b = $TypeBytes{$type};
-		    die "$MyName: $name: unknown type '$type'\n" unless defined($b);
-		    $argbytes += $b;
-		}
-	    }
-	}
-	if($no_syscall_stub || $name =~ /^__/) {
-	    $NoStub{$name} = [$nargs, $argbytes];
-	} else {
-	    $Stub{$name} = [$nargs, $argbytes];
-	}
-    }
-}
-
-usage() unless scalar(@ARGV) == 3;
-$CustomDir = $ARGV[1];
-die "$MyName: $CustomDir: No such directory\n" unless -d $CustomDir;
-$OutDir = $ARGV[2];
-die "$MyName: $OutDir: No such directory\n" unless -d $OutDir;
-
-readmaster($ARGV[0]);
-
-##########################################################################
-# copy the files specified in @Copy from the $CustomDir to $OutDir
-##########################################################################
-for(@Copy) {
-    my $custom = File::Spec->join($CustomDir, $_);
-    my $path = File::Spec->join($OutDir, $_);
-    File::Copy::copy($custom, $path) || die "$MyName: copy($custom, $path): $!\n";
-}
-
-##########################################################################
-# make all the *.s files
-##########################################################################
-my @src;
-my($k, $v);
-while(($k, $v) = each(%Stub)) {
-    push(@src, make_s($k, @$v));
-}
-while(($k, $v) = each(%NoStub)) {
-    push(@src, make_s($k, @$v));
-}
-
-##########################################################################
-# create the Makefile.inc file from the list for files in @src and @CustomSrc
-##########################################################################
-my $path = File::Spec->join($OutDir, 'Makefile.inc');
-my $f = IO::File->new($path, 'w');
-die "$MyName: $path: $!\n" unless defined($f);
-print $f ".PATH: $OutDir\n\n";
-print $f "SYSCALLSRCS= " . join(" \\\n\t", sort(@src, @CustomSrc)) . "\n\n";
-print $f "MDSRCS+= \$(SYSCALLSRCS)\n\n";
-print $f ".for S in \$(SYSCALLSRCS)\n";
-print $f "PRECFLAGS-\$(S)+= -I\$(OBJROOT)/include\n";
-print $f ".endfor\n";
-undef $f;
-
-##########################################################################
-# create the libsyscall.list file for Libsystem to use.  For the one that
-# should not have auto-generated stubs, the line begins with #.
-##########################################################################
-$path = File::Spec->join($OutDir, $StubFile);
-$f = IO::File->new($path, 'w');
-die "$MyName: $path: $!\n" unless defined($f);
-# Add the %NoStub entries to %Stub, appending '#' to the name, so we can sort
-while(($k, $v) = each(%NoStub)) {
-    $k =~ s/^__//;
-    $Stub{"$k#"} = $v;
-}
-for(sort(keys(%Stub))) {
-    $k = $_;
-    if($k =~ s/#$//) {
-	printf $f "#___%s\t%s\n", $k, $Stub{$_}->[0];
-    } else {
-	printf $f "___%s\t%s\n", $_, $Stub{$_}->[0];
-    }
-}
-undef $f;
diff --git a/libsyscall/custom/SYS.h b/libsyscall/custom/SYS.h
index a4eb976a2..a16f358d8 100644
--- a/libsyscall/custom/SYS.h
+++ b/libsyscall/custom/SYS.h
@@ -55,46 +55,7 @@
 #define SYS_quota	149
 #endif
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-#include <architecture/ppc/mode_independent_asm.h>
-
-/*
- * Macros.
- */
-
-#define	SYSCALL(name, nargs)			\
-	.globl	cerror				@\
-	MI_ENTRY_POINT(_##name)     @\
-	li	r0,SYS_##name			@\
-	sc                          @\
-	b	1f                      @\
-	blr                         @\
-1:	MI_BRANCH_EXTERNAL(cerror)
-
-
-#define	SYSCALL_NONAME(name, nargs)		\
-	.globl	cerror				@\
-	li	r0,SYS_##name			@\
-	sc                          @\
-	b	1f                      @\
-	b	2f                      @\
-1:	MI_BRANCH_EXTERNAL(cerror)  @\
-2:
-
-
-#define	PSEUDO(pseudo, name, nargs)		\
-    .private_extern  _##pseudo           @\
-    .text                       @\
-    .align  2                   @\
-_##pseudo:                      @\
-	SYSCALL_NONAME(name, nargs)
-
-#define __SYSCALL(pseudo, name, nargs)	\
-    PSEUDO(pseudo, name, nargs)	@\
-    blr
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 #include <architecture/i386/asm_help.h>
 #include <mach/i386/syscall_sw.h>
@@ -150,11 +111,11 @@ LEAF(_##name, 0)					;\
 2:
 
 #define PSEUDO(pseudo, name, nargs)			\
-LEAF(_##pseudo, 0)					;\
+LEAF(pseudo, 0)					;\
 	UNIX_SYSCALL_NONAME(name, nargs)
 
 #define PSEUDO_INT(pseudo, name, nargs)			\
-LEAF(_##pseudo, 0)					;\
+LEAF(pseudo, 0)					;\
 	UNIX_SYSCALL_INT_NONAME(name, nargs)
 
 #define __SYSCALL(pseudo, name, nargs)			\
@@ -192,7 +153,7 @@ LEAF(_##name, 0)					;\
 2:
 
 #define PSEUDO(pseudo, name, nargs)			\
-LEAF(_##pseudo, 0)					;\
+LEAF(pseudo, 0)					;\
 	UNIX_SYSCALL_NONAME(name, nargs)
 
 #define __SYSCALL(pseudo, name, nargs)			\
diff --git a/libsyscall/custom/__fork.s b/libsyscall/custom/__fork.s
index baff6eb82..2de3a9a28 100644
--- a/libsyscall/custom/__fork.s
+++ b/libsyscall/custom/__fork.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,104 +36,26 @@
  * 11-Jan-92  Peter King (king@next.com)
  *	Created from M68K sources
  */
-
-#include "SYS.h"
-
-#if defined(__ppc__) || defined(__ppc64__)
-
-/* We use mode-independent "g" opcodes such as "srgi".  These expand
- * into word operations when targeting __ppc__, and into doubleword
- * operations when targeting __ppc64__.
- */
-#include <architecture/ppc/mode_independent_asm.h>
-
-MI_ENTRY_POINT(___fork)
-    MI_PUSH_STACK_FRAME
-    
-    MI_CALL_EXTERNAL(__cthread_fork_prepare)
-    
-	li      r0,SYS_fork
-	sc                      // do the fork
-	b       Lbotch			// error return
-
-	cmpwi	r4,0            // parent (r4==0) or child (r4==1) ?
-	beq     Lparent         // parent, since r4==0
-
-                            
-/* Here if we are the child.  */
-
-#if defined(__DYNAMIC__)
-    .cstring
-LC3:
-	.ascii	"__dyld_fork_child\0"
-    .text
-	.align 2
-	mflr	r0
-	bcl     20,31,1f
-1:	mflr	r3
-	mtlr	r0
-	addis	r3,r3,ha16(LC3-1b)
-	addi	r3,r3,lo16(LC3-1b)
-	addi 	r4,r1,SF_LOCAL1
-	bl      __dyld_func_lookup
-	lg      r3,SF_LOCAL1(r1)
-	mtspr 	ctr,r3
-	bctrl	
-#endif
-
-    li      r9,0
-    MI_GET_ADDRESS(r8,__current_pid)
-    stw     r9,0(r8)            // clear cached pid in child
-    
-	MI_CALL_EXTERNAL(__cthread_fork_child)
-    
-	li	r3,0        // flag for "we are the child"
-	b	Lreturn
-
-
-/* Here if we are the parent, with:
- *  r3 = child's pid
+ 
+/*
+ * All of the asm stubs in this file have been adjusted so the pre/post
+ * fork handlers and dyld fixup are done in C inside Libc. As such, Libc
+ * expects the __fork asm to fix up the return code to be -1, 0 or pid
+ * and errno if needed.
  */
-Lparent:
-	stg     r3,SF_LOCAL2(r1)	// save child pid in stack
-    
-    b       Lparent_return      // clean up and return child's pid
-
-
-/* Here if the fork() syscall failed.  We're still the parent.  */
-
-Lbotch:	
 
-	MI_CALL_EXTERNAL(cerror)
-    li      r3,-1               // get an error return code
-	stg     r3,SF_LOCAL2(r1)	// save return code in stack
-    
-	/*
-	 * We use cthread_fork_parent() to clean up after a fork error
-	 * (unlock cthreads and mailloc packages) so the parent
-	 * process can Malloc() after fork() errors without
-	 * deadlocking.
-	 */
-     
-Lparent_return:
-	MI_CALL_EXTERNAL(__cthread_fork_parent)
-	lg      r3,SF_LOCAL2(r1)    // return -1 on error, child's pid on success
-    
-Lreturn:
-    MI_POP_STACK_FRAME_AND_RETURN
+#include "SYS.h"
 
-#elif defined(__i386__)
+#if defined(__i386__)
 
 LEAF(___fork, 0)
 	subl  $28, %esp   // Align the stack, with 16 bytes of extra padding that we'll need
-	CALL_EXTERN(__cthread_fork_prepare)
 
 	movl 	$ SYS_fork,%eax; 	// code for fork -> eax
 	UNIX_SYSCALL_TRAP		// do the system call
 	jnc	L1			// jump if CF==0
 
 	CALL_EXTERN(cerror)
-	CALL_EXTERN(__cthread_fork_parent)
 	movl	$-1,%eax
 	addl	$28, %esp   // restore the stack
 	ret
@@ -143,60 +65,23 @@ L1:
 	jz	L2		// parent, since r1 == 0 in parent, 1 in child
 	
 	//child here...
-#if defined(__DYNAMIC__)
-// Here on the child side of the fork we need to tell the dynamic linker that
-// we have forked.  To do this we call __dyld_fork_child in the dyanmic
-// linker.  But since we can't dynamically bind anything until this is done we
-// do this by using the private extern __dyld_func_lookup() function to get the
-// address of __dyld_fork_child (the 'C' code equivlent):
-//
-//	_dyld_func_lookup("__dyld_fork_child", &address);
-//	address();
-//
-.cstring
-LC0:
-	.ascii "__dyld_fork_child\0"
-
-.text
-	leal	0x8(%esp),%eax		// get the address where we're going to store the pointer
-	movl	%eax, 0x4(%esp)		// copy the address of the pointer
-	call	1f
-1:	popl	%eax
-	leal	LC0-1b(%eax),%eax
-	movl 	%eax, 0x0(%esp)		// copy the name of the function to look up
-	call 	__dyld_func_lookup
-	movl	0x8(%esp),%eax		// move the value returned in address parameter
-	call	*%eax		// call __dyld_fork_child indirectly
-#endif
-	xorl	%eax, %eax
-	REG_TO_EXTERN(%eax, __current_pid)
-	CALL_EXTERN(__cthread_fork_child)
-
 	xorl	%eax,%eax	// zero eax
-	addl	$28, %esp   // restore the stack
-	ret
-
-	//parent here...
+	REG_TO_EXTERN(%eax, __current_pid);
 L2:
-	movl	%eax, 0xc(%esp)		// save pid
-
-	CALL_EXTERN_AGAIN(__cthread_fork_parent)
-	movl	0xc(%esp), %eax		// return pid
 	addl	$28, %esp   // restore the stack
-	ret		
+	// parent ends up here skipping child portion
+	ret
 
 #elif defined(__x86_64__)
 
 LEAF(___fork, 0)
 	subq  $24, %rsp   // Align the stack, plus room for local storage
-	CALL_EXTERN(__cthread_fork_prepare)
 
 	movl 	$ SYSCALL_CONSTRUCT_UNIX(SYS_fork),%eax; // code for fork -> rax
 	UNIX_SYSCALL_TRAP		// do the system call
 	jnc	L1			// jump if CF==0
 
 	CALL_EXTERN(cerror)
-	CALL_EXTERN(__cthread_fork_parent)
 	movq	$-1, %rax
 	addq	$24, %rsp   // restore the stack
 	ret
@@ -206,42 +91,13 @@ L1:
 	jz	L2		// parent, since r1 == 0 in parent, 1 in child
 	
 	//child here...
-#if defined(__DYNAMIC__)
-// Here on the child side of the fork we need to tell the dynamic linker that
-// we have forked.  To do this we call __dyld_fork_child in the dyanmic
-// linker.  But since we can't dynamically bind anything until this is done we
-// do this by using the private extern __dyld_func_lookup() function to get the
-// address of __dyld_fork_child (the 'C' code equivlent):
-//
-//	_dyld_func_lookup("__dyld_fork_child", &address);
-//	address();
-//
-.cstring
-LC0:
-	.ascii "__dyld_fork_child\0"
-
-.text
-	leaq	8(%rsp),%rsi		// get the address where we're going to store the pointer
-	leaq 	LC0(%rip), %rdi		// copy the name of the function to look up
-	call 	__dyld_func_lookup
-	call	*8(%rsp)		// call __dyld_fork_child indirectly
-#endif
 	xorq	%rax, %rax
-	REG_TO_EXTERN(%rax, __current_pid)
-	CALL_EXTERN(__cthread_fork_child)
-
-	xorq	%rax,%rax	// zero rax
-	addq	$24, %rsp   // restore the stack
-	ret
-
-	//parent here...
+	PICIFY(__current_pid)
+	movl	%eax,(%r11)
 L2:
-	movl	%eax, 16(%rsp)		// save pid
-
-	CALL_EXTERN_AGAIN(__cthread_fork_parent)
-	movl	16(%rsp), %eax		// return pid
+	// parent ends up here skipping child portion
 	addq	$24, %rsp   // restore the stack
-	ret		
+	ret
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__getpid.s b/libsyscall/custom/__getpid.s
index 1299a1645..48c85313c 100644
--- a/libsyscall/custom/__getpid.s
+++ b/libsyscall/custom/__getpid.s
@@ -28,45 +28,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-        .data
-        .globl  __current_pid
-        .align  2
-__current_pid:
-        .long 0
-
-MI_ENTRY_POINT(___getpid)
-#if defined(__DYNAMIC__)
-        mflr    r0              // note we cannot use MI_GET_ADDRESS...
-        bcl    20,31,1f         // ...because we define __current_pid
-1:
-        mflr    r5
-        mtlr    r0
-        addis   r5, r5, ha16(__current_pid - 1b)
-        addi    r5, r5, lo16(__current_pid - 1b)
-#else
-	lis	r5,hi16(__current_pid)
-	ori	r5,r5,lo16(__current_pid)
-#endif
-        lwz     r3,0(r5)		// get the cached pid
-        cmpwi 	r3,0			// if positive,
-        bgtlr++                 // return it
-	
-        SYSCALL_NONAME(getpid, 0)
-
-        lwarx	r4,0,r5			// see if we can cache it
-        cmpwi	r4,0			// we can't if there are any...
-        blt--	1f              // ...vforks in progress
-
-        stwcx.	r3,0,r5			// ignore cache conflicts
-        blr
-1:
-        li      r6,-4           // on 970, cancel the reservation using red zone...
-        stwcx.  r3,r6,r1        // ...to avoid an errata
-        blr
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 	.data
 	.private_extern __current_pid
diff --git a/libsyscall/custom/__gettimeofday.s b/libsyscall/custom/__gettimeofday.s
index c43ee761e..1dbf19c77 100644
--- a/libsyscall/custom/__gettimeofday.s
+++ b/libsyscall/custom/__gettimeofday.s
@@ -29,21 +29,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-/* This syscall is special cased: the timeval is returned in r3/r4.
- * Note also that the "seconds" field of the timeval is a long, so
- * it's size is mode dependent.
- */
-MI_ENTRY_POINT(___gettimeofday)
-    mr      r12,r3              // save ptr to timeval
-    SYSCALL_NONAME(gettimeofday,0)
-	stg     r3,0(r12)           // "stw" in 32-bit mode, "std" in 64-bit mode
-	stw     r4,GPR_BYTES(r12)
-	li      r3,0
-	blr
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 /*
  *	This syscall is special cased: the timeval is returned in eax/edx.
diff --git a/libsyscall/custom/__lseek.s b/libsyscall/custom/__lseek.s
index 909443b17..b051cc5a4 100644
--- a/libsyscall/custom/__lseek.s
+++ b/libsyscall/custom/__lseek.s
@@ -28,13 +28,13 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__) || defined(__x86_64__)
+#if defined(__x86_64__)
 
-__SYSCALL(__lseek, lseek, 3)
+__SYSCALL(___lseek, lseek, 3)
 
 #elif defined(__i386__)
 
-__SYSCALL_INT(__lseek, lseek, 3)
+__SYSCALL_INT(___lseek, lseek, 3)
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__pipe.s b/libsyscall/custom/__pipe.s
index 107a37799..0131d476d 100644
--- a/libsyscall/custom/__pipe.s
+++ b/libsyscall/custom/__pipe.s
@@ -29,19 +29,9 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
+#if defined(__i386__)
 
-MI_ENTRY_POINT(___pipe)
-    mr      r12,r3              // save fildes across syscall
-	SYSCALL_NONAME(pipe, 0)
-	stw     r3,0(r12)
-	stw     r4,4(r12)
-	li      r3,0
-	blr
-
-#elif defined(__i386__)
-
-PSEUDO_INT(__pipe, pipe, 0)
+PSEUDO_INT(___pipe, pipe, 0)
 	movl	4(%esp),%ecx
 	movl	%eax,(%ecx)
 	movl	%edx,4(%ecx)
@@ -50,7 +40,7 @@ PSEUDO_INT(__pipe, pipe, 0)
 
 #elif defined(__x86_64__)
 
-PSEUDO(__pipe, pipe, 0)
+PSEUDO(___pipe, pipe, 0)
 	movl	%eax, (%rdi)
 	movl	%edx, 4(%rdi)
 	xorl	%eax, %eax
diff --git a/libsyscall/custom/__psynch_cvbroad.s b/libsyscall/custom/__psynch_cvbroad.s
index 86d9d8024..037fcfc07 100644
--- a/libsyscall/custom/__psynch_cvbroad.s
+++ b/libsyscall/custom/__psynch_cvbroad.s
@@ -31,9 +31,9 @@
 
 #define __SYSCALL_32BIT_ARG_BYTES 36
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__)
+#if defined(__i386__) || defined(__x86_64__)
 
-__SYSCALL(__psynch_cvbroad, psynch_cvbroad, 8)
+__SYSCALL(___psynch_cvbroad, psynch_cvbroad, 8)
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__psynch_cvwait.s b/libsyscall/custom/__psynch_cvwait.s
index f29bceab4..c5d69ce8c 100644
--- a/libsyscall/custom/__psynch_cvwait.s
+++ b/libsyscall/custom/__psynch_cvwait.s
@@ -31,9 +31,9 @@
 
 #define __SYSCALL_32BIT_ARG_BYTES 40
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__)
+#if defined(__i386__) || defined(__x86_64__)
 
-__SYSCALL(__psynch_cvwait, psynch_cvwait, 8)
+__SYSCALL(___psynch_cvwait, psynch_cvwait, 8)
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__ptrace.s b/libsyscall/custom/__ptrace.s
index 2fd53b460..9eae221f2 100644
--- a/libsyscall/custom/__ptrace.s
+++ b/libsyscall/custom/__ptrace.s
@@ -29,16 +29,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-MI_ENTRY_POINT(___ptrace)
-	li      r7,0
-    MI_GET_ADDRESS(r8,_errno)
-    stw     r7,0(r8)
-    SYSCALL_NONAME(ptrace, 4)
-	blr
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 	.globl	_errno
 
@@ -54,7 +45,8 @@ UNIX_SYSCALL_NONAME(ptrace, 4)
 
 LEAF(___ptrace, 0)
 	xorq	%rax,%rax
-	REG_TO_EXTERN(%rax,_errno)
+	PICIFY(_errno)
+	movl	%eax,(%r11)
 UNIX_SYSCALL_NONAME(ptrace, 4)
 	ret
 
diff --git a/libsyscall/custom/__sigaltstack.s b/libsyscall/custom/__sigaltstack.s
index 514822ba2..d5f1803ff 100644
--- a/libsyscall/custom/__sigaltstack.s
+++ b/libsyscall/custom/__sigaltstack.s
@@ -28,13 +28,13 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__) || defined(__x86_64__)
+#if defined(__x86_64__)
 
-__SYSCALL(__sigaltstack, sigaltstack, 3)
+__SYSCALL(___sigaltstack, sigaltstack, 3)
 
 #elif defined(__i386__)
 
-__SYSCALL_INT(__sigaltstack, sigaltstack, 3)
+__SYSCALL_INT(___sigaltstack, sigaltstack, 3)
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__sigreturn.s b/libsyscall/custom/__sigreturn.s
index 776351abb..16d5be4fc 100644
--- a/libsyscall/custom/__sigreturn.s
+++ b/libsyscall/custom/__sigreturn.s
@@ -28,13 +28,13 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__) || defined(__x86_64__)
+#if defined(__x86_64__)
 
-__SYSCALL(__sigreturn, sigreturn, 2)
+__SYSCALL(___sigreturn, sigreturn, 2)
 
 #elif defined(__i386__)
 
-__SYSCALL_INT(__sigreturn, sigreturn, 2)
+__SYSCALL_INT(___sigreturn, sigreturn, 2)
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__syscall.s b/libsyscall/custom/__syscall.s
index dae18a831..73735bd4b 100644
--- a/libsyscall/custom/__syscall.s
+++ b/libsyscall/custom/__syscall.s
@@ -29,11 +29,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-__SYSCALL(__syscall, syscall, 7)
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 LEAF(___syscall, 0)
 	popl	%ecx		// ret addr
@@ -52,7 +48,7 @@ END(___syscall)
 // For x86-64, the kernel slides the argument list for us.
 // The number of arguments here is variable, but our macros ignore
 // that value anyway.
-__SYSCALL(__syscall, syscall, 0);
+__SYSCALL(___syscall, syscall, 0);
 
 #else
 #error Unsupported architecture
diff --git a/libsyscall/custom/__thread_selfid.s b/libsyscall/custom/__thread_selfid.s
index 5e70787cf..2c4dd934c 100644
--- a/libsyscall/custom/__thread_selfid.s
+++ b/libsyscall/custom/__thread_selfid.s
@@ -30,10 +30,10 @@
 
 #if defined(__x86_64__)
 
-__SYSCALL(__thread_selfid, thread_selfid, 1)
+__SYSCALL(___thread_selfid, thread_selfid, 1)
 
 #elif defined(__i386__)
 
-__SYSCALL_INT(__thread_selfid, thread_selfid, 1)
+__SYSCALL_INT(___thread_selfid, thread_selfid, 1)
 
 #endif
diff --git a/libsyscall/custom/__vfork.s b/libsyscall/custom/__vfork.s
index 073b90840..91408f9c3 100644
--- a/libsyscall/custom/__vfork.s
+++ b/libsyscall/custom/__vfork.s
@@ -37,56 +37,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-/* We use mode-independent "g" opcodes such as "srgi", and/or
- * mode-independent macros such as MI_GET_ADDRESS.  These expand
- * into word operations when targeting __ppc__, and into doubleword
- * operations when targeting __ppc64__.
- */
-#include <architecture/ppc/mode_independent_asm.h>
-
-/* In vfork(), the child runs in parent's address space.  */
-
-
-MI_ENTRY_POINT(___vfork)
-    MI_GET_ADDRESS(r5,__current_pid)  // get address of __current_pid in r5
-2:
-	lwarx	r6,0,r5			// don't cache pid across vfork
-	cmpwi	r6,0
-	ble--	3f              // is another vfork in progress
-	li      r6,0			// if not, erase the stored pid
-3:	
-	addi	r6,r6,-1		// count the parallel vforks in
-	stwcx.	r6,0,r5			// negative cached pid values
-	bne--	2b
-	
-	li      r0,SYS_vfork
-	sc
-	b       Lbotch			// error return
-
-	cmpwi	r4,0
-	beq     Lparent			// parent, since a1 == 0 in parent,
-
-	li      r3,0			// child
-	blr
-
-Lparent:                    // r3 == child's pid
-	lwarx	r6,0,r5			// we're back, decrement vfork count
-	addi	r6,r6,1
-	stwcx.	r6,0,r5
-	bne--	Lparent
-	blr                     // return pid
-
-Lbotch:
-	lwarx	r6,0,r5			// never went, decrement vfork count
-	addi	r6,r6,1
-	stwcx.	r6,0,r5
-	bne--	Lbotch
-
-	MI_BRANCH_EXTERNAL(cerror)
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 #if defined(__DYNAMIC__)
 #define GET_CURRENT_PID	PICIFY(__current_pid)
diff --git a/libsyscall/custom/custom.s b/libsyscall/custom/custom.s
index 5f34a7434..b9d46ba13 100644
--- a/libsyscall/custom/custom.s
+++ b/libsyscall/custom/custom.s
@@ -30,35 +30,7 @@
 
 #include "SYS.h"
 
-#if defined(__ppc__) || defined(__ppc64__)
-
-/* We use mode-independent "g" opcodes such as "srgi", and/or
- * mode-independent macros such as MI_GET_ADDRESS.  These expand
- * into word operations when targeting __ppc__, and into doubleword
- * operations when targeting __ppc64__.
- */
-#include <architecture/ppc/mode_independent_asm.h>
-
-    .globl  _errno
-
-MI_ENTRY_POINT(cerror)
-    MI_PUSH_STACK_FRAME
-    MI_GET_ADDRESS(r12,_errno)
-    stw     r3,0(r12)               /* save syscall return code in global */
-    MI_CALL_EXTERNAL(_cthread_set_errno_self)
-    li      r3,-1                   /* then bug return value */
-    li      r4,-1                   /* in case we're returning a long-long in 32-bit mode, etc */
-    MI_POP_STACK_FRAME_AND_RETURN
-
-
-    .globl _processor_facilities_used
-    .align 2
-_processor_facilities_used:
-    li	r0,0x7FF3
-    sc
-    blr
-
-#elif defined(__i386__)
+#if defined(__i386__)
 
 	.globl	_errno
 
@@ -75,9 +47,7 @@ LABEL(cerror)
 	movl	$-1,%edx /* in case a 64-bit value is returned */
 	ret
 
-	.private_extern __sysenter_trap
-	ALIGN
-__sysenter_trap:
+LABEL(__sysenter_trap)
 	popl %edx
 	movl %esp, %ecx
 	sysenter
@@ -87,8 +57,9 @@ __sysenter_trap:
 	.globl	_errno
 
 LABEL(cerror)
-	REG_TO_EXTERN(%rax, _errno)
-	mov		%rsp,%rdx
+	PICIFY(_errno) /* address -> %r11 */
+	movl	%eax,(%r11)
+	mov 	%rsp,%rdx
 	andq	$-16,%rsp
 	subq	$16,%rsp
 	// Preserve the original stack
diff --git a/osfmk/chud/ppc/chud_xnu_glue.h b/libsyscall/custom/errno.c
similarity index 95%
rename from osfmk/chud/ppc/chud_xnu_glue.h
rename to libsyscall/custom/errno.c
index 7145052d0..58da2c114 100644
--- a/osfmk/chud/ppc/chud_xnu_glue.h
+++ b/libsyscall/custom/errno.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -26,3 +26,4 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
+int errno;
diff --git a/libsyscall/include/Makefile.inc b/libsyscall/include/Makefile.inc
deleted file mode 100644
index 7bf41dc30..000000000
--- a/libsyscall/include/Makefile.inc
+++ /dev/null
@@ -1 +0,0 @@
-PRIVHDRSPPCHDRS += ${.CURDIR}/include/processor_facilities.h
diff --git a/libsyscall/include/processor_facilities.h b/libsyscall/include/processor_facilities.h
deleted file mode 100644
index 7ba9747bc..000000000
--- a/libsyscall/include/processor_facilities.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/* Does the current CPU have Altivec support? */
-extern int _cpu_has_altivec;
-
-/* What processor facilities is the current thread using? */
-#define floatUsed	0x40000000
-#define vectorUsed	0x20000000
-
-extern int processor_facilities_used(void);
diff --git a/libsyscall/mach/Makefile.inc b/libsyscall/mach/Makefile.inc
deleted file mode 100644
index 516300d2a..000000000
--- a/libsyscall/mach/Makefile.inc
+++ /dev/null
@@ -1,74 +0,0 @@
-# machine-dependent mach sources
-ARCHDIR = ${MACHINE_ARCH:C/^armv.*$/arm/}
-.if exists(${.CURDIR}/mach/${ARCHDIR}/Makefile.inc)
-.include "${.CURDIR}/mach/${ARCHDIR}/Makefile.inc"
-.endif
-
-.PATH: ${.CURDIR}/mach
-
-.include "${.CURDIR}/mach/headers/Makefile.inc"
-.include "${.CURDIR}/mach/servers/Makefile.inc"
-
-MD_MIGDEFS += task.defs \
-	thread_act.defs 
-
-MD_MIGHDRS += ${MD_MIGDEFS:.defs=.h}
-
-MIGDEFS += \
-	clock.defs \
-	clock_priv.defs \
-	exc.defs \
-	host_priv.defs \
-	host_security.defs \
-	ledger.defs \
-	lock_set.defs \
-	mach_port.defs \
-	mach_host.defs \
-	mach_vm.defs \
-	processor.defs \
-	processor_set.defs \
-	vm_map.defs
-
-MIGHDRS = ${MIGDEFS:.defs=.h}
-MIGHDRS += clock_reply.h
-MACH_INSTHDRS += ${MIGHDRS}
-
-# These files are generated from the .defs files
-MIGSRCS = ${MIGDEFS:.defs=User.c} ${MD_MIGDEFS:.defs=User.c}
-
-MISRCS += ${MIGSRCS} \
-	bootstrap_ports.c \
-	clock_sleep.c \
-	error_codes.c \
-	excServer.c \
-	excUser.c \
-	exc_catcher.c \
-	exc_catcher_state.c \
-	exc_catcher_state_identity.c \
-	fprintf_stderr.c \
-	mig_allocate.c \
-	mig_deallocate.c \
-	mig_reply_setup.c  \
-	mig_strncpy.c \
-	mach_error.c \
-	mach_error_string.c \
-	mach_init.c \
-	mach_init_libSystem.c \
-	mach_init_ports.c \
-	mach_msg.c \
-	mach_traps.s \
-	ms_thread_switch.c  \
-	notifyUser.c \
-	panic.c  \
-	port_obj.c  \
-	sbrk.c  \
-	semaphore.c  \
-	slot_name.c 
-
-CLEANFILES += ${MIGHDRS} ${MIGSRCS} ${MD_MIGDEFS:.defs=Server.c} \
-		${MIGDEFS:.defs=Server.c} exc.h excUser.c excServer.c \
-		notify.h notifyUser.c notifyServer.c
-
-MAN2 += brk.2
-
-MLINKS += brk.2 sbrk.2
diff --git a/osfmk/ppc/hardclock_entries.h b/libsyscall/mach/abort.h
similarity index 83%
rename from osfmk/ppc/hardclock_entries.h
rename to libsyscall/mach/abort.h
index 3a804ea9e..7b99c1cf0 100644
--- a/osfmk/ppc/hardclock_entries.h
+++ b/libsyscall/mach/abort.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,13 +25,12 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- */
 
-#ifndef _HARDCLOCK_ENTRIES_H_
-#define _HARDCLOCK_ENTRIES_H_
+// Defined because we don't have Libc
+#define __SIGABRT 6
+#define __STDERR_FILENO 2
 
-extern void hardclock(struct ppc_saved_state*);
+int __getpid(void);
+int __kill(int pid, int signum, int posix);
 
-#endif /* _HARDCLOCK_ENTRIES_H_ */
+#define abort()	__kill(__getpid(), __SIGABRT, 0)
diff --git a/libsyscall/mach/bootstrap_ports.c b/libsyscall/mach/bootstrap_ports.c
deleted file mode 100644
index 33399332b..000000000
--- a/libsyscall/mach/bootstrap_ports.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <mach/mach.h>
-
-/*
- * Stub out the old bootstrap_ports() API, as some applications need
- * it to exist.  We do not publish a prototype for this, and the stub
- * WILL disappear in a future release.
- */
-kern_return_t
-bootstrap_ports(
-	mach_port_t	bootstrap,
-	mach_port_t	*priv_host,
-	mach_port_t	*device_master,
-	mach_port_t	*wired_ledger,
-	mach_port_t	*paged_ledger,
-	mach_port_t	*host_security)
-{
-	return KERN_FAILURE;
-}
-
diff --git a/libsyscall/mach/brk.2 b/libsyscall/mach/brk.2
deleted file mode 100644
index 9ea4f61c2..000000000
--- a/libsyscall/mach/brk.2
+++ /dev/null
@@ -1,150 +0,0 @@
-.\"	$NetBSD: brk.2,v 1.7 1995/02/27 12:31:57 cgd Exp $
-.\"
-.\" Copyright (c) 1980, 1991, 1993
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\"    must display the following acknowledgement:
-.\"	This product includes software developed by the University of
-.\"	California, Berkeley and its contributors.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\"     @(#)brk.2	8.2 (Berkeley) 12/11/93
-.\"
-.Dd December 11, 1993
-.Dt BRK 2
-.Os BSD 4
-.Sh NAME
-.Nm brk ,
-.Nm sbrk
-.Nd change data segment size
-.Sh SYNOPSIS
-.Fd #include <unistd.h>
-.Ft void *
-.Fn brk "const void *addr"
-.Ft void *
-.Fn sbrk "int incr"
-.Sh DESCRIPTION
-.Bf -symbolic
-The brk and sbrk functions are historical curiosities
-left over from earlier days before the advent of virtual memory management.
-.Ef
-The
-.Fn brk
-function
-sets the break or lowest address
-of a process's data segment (uninitialized data) to
-.Fa addr
-(immediately above bss).
-Data addressing is restricted between
-.Fa addr
-and the lowest stack pointer to the stack segment.
-Memory is allocated by
-.Fa brk
-in page size pieces;
-if
-.Fa addr
-is not evenly divisible by the system page size, it is
-increased to the next page boundary.
-.Pp
-.\" The
-.\" .Nm sbrk
-.\" function
-.\" allocates chunks of
-.\" .Fa incr
-.\" bytes
-.\" to the process's data space
-.\" and returns an address pointer.
-.\" The
-.\" .Xr malloc 3
-.\" function utilizes
-.\" .Nm sbrk .
-.\" .Pp
-The current value of the program break is reliably returned by
-.Dq Li sbrk(0)
-(see also 
-.Xr end 3 ) .
-The
-.Xr getrlimit 2
-system call may be used to determine
-the maximum permissible size of the
-.Em data
-segment;
-it will not be possible to set the break
-beyond the
-.Em rlim_max
-value returned from a call to
-.Xr getrlimit ,
-e.g.
-.Dq qetext + rlp\(->rlim_max.
-(see
-.Xr end 3
-for the definition of
-.Em etext ) .
-.Sh RETURN VALUES
-.Nm Brk
-returns a pointer to the new end of memory if successful;
-otherwise -1 with
-.Va errno
-set to indicate why the allocation failed.
-The
-.Nm sbrk
-function returns a pointer to the base of the new storage if successful;
-otherwise -1 with
-.Va errno
-set to indicate why the allocation failed.
-.Sh ERRORS
-.Xr Sbrk
-will fail and no additional memory will be allocated if
-one of the following are true:
-.Bl -tag -width Er
-.It Bq Er ENOMEM
-The limit, as set by
-.Xr setrlimit 2 ,
-was exceeded.
-.It Bq Er ENOMEM
-The maximum possible size of a data segment (compiled into the
-system) was exceeded.
-.It Bq Er ENOMEM
-Insufficient space existed in the swap area
-to support the expansion.
-.El
-.Sh SEE ALSO
-.Xr execve 2 ,
-.Xr getrlimit 2 ,
-.Xr malloc 3 ,
-.Xr mmap 2 ,
-.Xr end 3
-.Sh BUGS
-Setting the break may fail due to a temporary lack of
-swap space.  It is not possible to distinguish this
-from a failure caused by exceeding the maximum size of
-the data segment without consulting 
-.Xr getrlimit .
-.Sh HISTORY
-A
-.Fn brk
-function call appeared in 
-.At v7 .
diff --git a/libsyscall/mach/clock_sleep.c b/libsyscall/mach/clock_sleep.c
index 6470f2713..dbcca39d2 100644
--- a/libsyscall/mach/clock_sleep.c
+++ b/libsyscall/mach/clock_sleep.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -29,10 +29,11 @@
 #include <mach/mach_syscalls.h>
 #include <mach/clock_types.h>
 
-kern_return_t clock_sleep(mach_port_t clock_name,
-                          sleep_type_t clock_type,
-                          mach_timespec_t sleep_time,
-                          mach_timespec_t *wake_time) {
-
+kern_return_t
+clock_sleep(mach_port_t clock_name,
+	sleep_type_t clock_type,
+	mach_timespec_t sleep_time,
+	mach_timespec_t *wake_time)
+{
     return clock_sleep_trap(clock_name, clock_type, sleep_time.tv_sec, sleep_time.tv_nsec, wake_time);
 }
diff --git a/iokit/Kernel/ppc/IOSharedLock.s b/libsyscall/mach/dylib_link.c
similarity index 90%
rename from iokit/Kernel/ppc/IOSharedLock.s
rename to libsyscall/mach/dylib_link.c
index 69183e016..5aa27f230 100644
--- a/iokit/Kernel/ppc/IOSharedLock.s
+++ b/libsyscall/mach/dylib_link.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,5 +25,5 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <IOKit/machine/IOSharedLockImp.h>
 
+/* This empty file is here to force the dylib target to actually link */
diff --git a/libsyscall/mach/err_iokit.sub b/libsyscall/mach/err_iokit.sub
index 02e657aa8..b5361b8be 100755
--- a/libsyscall/mach/err_iokit.sub
+++ b/libsyscall/mach/err_iokit.sub
@@ -34,12 +34,12 @@
 #include <IOKit/firewire/IOFireWireLib.h>
 #endif
 
-static struct error_sparse_map err_codes_iokit_common_map[] = {
+static const struct error_sparse_map err_codes_iokit_common_map[] = {
     err_code_map_entry(kIOReturnInvalid, kIOReturnInvalid ),
     err_code_map_entry(kIOReturnError,   kIOReturnNotFound),
 };
 
-static const char * err_codes_iokit_common[] = {
+static const char * const err_codes_iokit_common[] = {
     "(iokit/common) invalid - should never be seen",			// 0x001
 
     "(iokit/common) general error",					// 0x2bc
@@ -98,7 +98,7 @@ static const char * err_codes_iokit_common[] = {
 };
 
 #if !TARGET_OS_EMBEDDED
-static struct error_sparse_map err_codes_iokit_usb_map[] = {
+static const struct error_sparse_map err_codes_iokit_usb_map[] = {
     err_code_map_entry(kIOUSBCRCErr, kIOUSBDataToggleErr),
     err_code_map_entry(kIOUSBPIDCheckErr, kIOUSBWrongPIDErr),
     err_code_map_entry(kIOUSBReserved1Err, kIOUSBLinkErr),
@@ -108,7 +108,7 @@ static struct error_sparse_map err_codes_iokit_usb_map[] = {
 };
 
 // error codes with in 0xe0004000
-static const char * err_codes_iokit_usb[] = {
+static const char * const err_codes_iokit_usb[] = {
     "(iokit/usb) USB Controller Error: bad CRC received",		// 0x001
     "(iokit/usb) USB Controller Error:  bitstuffing",			// 0x002
     "(iokit/usb) USB Controller Error: Bad data toggle",		// 0x003
@@ -144,14 +144,14 @@ static const char * err_codes_iokit_usb[] = {
     "(iokit/usb) pipe ref not recognized",				// 0x061
 };
 
-static struct error_sparse_map err_codes_iokit_fw_map[] = {
+static const struct error_sparse_map err_codes_iokit_fw_map[] = {
     err_code_map_entry(kIOConfigNoEntry, kIOFireWireBusReset),
     err_code_map_entry(kIOFireWireBogusDCLProgram, kIOFireWireCompleting),
     err_code_map_entry(kIOFWMessageServiceIsRequestingClose, kIOFWMessageTopologyChanged),
 };
 
 // error codes with in 0xe0008000
-static const char * err_codes_iokit_fw[] = {
+static const char * const err_codes_iokit_fw[] = {
     "(iokit/firewire) can't find requested entry in config ROM",	// 001
     "(iokit/firewire) command pending (internal)",			// 002
     "(iokit/firewire) DCL callback is final callback (internal)",	// 003
@@ -196,7 +196,7 @@ static const char * err_codes_iokit_fw[] = {
 };
 
 // error codes with in 0xe0020000
-static const char * err_codes_iokit_bluetooth[] = {
+static const char * const err_codes_iokit_bluetooth[] = {
     "(iokit/bluetooth) unknown error",					// 000
     "(iokit/bluetooth) interrupted operation, hardware reset",		// 001
     "(iokit/bluetooth) connection to device already exists",		// 002
@@ -211,7 +211,7 @@ static const struct error_sparse_map err_iokit_sub_map[] = {
 };
 
 #define err_iokit_null_sub	{ "(iokit/?", 0 }
-static struct error_subsystem err_iokit_sub[] =
+static const struct error_subsystem err_iokit_sub[] =
 {
     /* 0 */ {
 	"(iokit/common)",			// 0xe0000000
diff --git a/libsyscall/mach/err_ipc.sub b/libsyscall/mach/err_ipc.sub
index f5c0905f4..35cbce0c6 100644
--- a/libsyscall/mach/err_ipc.sub
+++ b/libsyscall/mach/err_ipc.sub
@@ -36,7 +36,7 @@
  *	Definitions of error strings for original IPC
  */
 
-static const char * err_codes_send[] = {
+static const char * const err_codes_send[] = {
 	"(ipc/send) unknown error",		/* -100 */
 	"(ipc/send) invalid memory",		/* -101 */
 	"(ipc/send) invalid port",		/* -102 */
@@ -51,7 +51,7 @@ static const char * err_codes_send[] = {
 	"(ipc/send) message size changed while being copied",	/* -111 */
 };
 
-static const char * err_codes_rcv[] = {
+static const char * const err_codes_rcv[] = {
 	"(ipc/rcv) unknown error",			/* -200 */
 	"(ipc/rcv) invalid memory",			/* -201 */
 	"(ipc/rcv) invalid port",			/* -202 */
@@ -63,7 +63,7 @@ static const char * err_codes_rcv[] = {
 	"(ipc/rcv) port receiver changed or port became enabled", /* -208 */
 };
 
-static const char 	* err_codes_mig[] = {
+static const char * const err_codes_mig[] = {
 	"(ipc/mig) type check failure in message interface",	/* 0 (-300) */
 	"(ipc/mig) wrong return message ID",			/* 1 */
 	"(ipc/mig) server detected error",			/* 2 */
diff --git a/libsyscall/mach/err_kern.sub b/libsyscall/mach/err_kern.sub
index f00943599..bc059a5dd 100644
--- a/libsyscall/mach/err_kern.sub
+++ b/libsyscall/mach/err_kern.sub
@@ -36,7 +36,7 @@
  *	error codes for Mach and Unix kernels
  */
 
-static const char * err_codes_kern[] = {
+static const char * const err_codes_kern[] = {
 	"(os/kern) successful",			/* 0 */
 	"(os/kern) invalid address",
 	"(os/kern) protection failure",
@@ -87,7 +87,7 @@ static const char * err_codes_kern[] = {
 	"(os/kern) remote node down",
 };
 
-static const char * err_codes_unix[] = {
+static const char * const err_codes_unix[] = {
 	NO_SUCH_ERROR,
 	"(os/unix) no rights to object",
 	"(os/unix) file or directory does not exist",
diff --git a/libsyscall/mach/err_libkern.sub b/libsyscall/mach/err_libkern.sub
index a9a9c27c2..f419d04fa 100644
--- a/libsyscall/mach/err_libkern.sub
+++ b/libsyscall/mach/err_libkern.sub
@@ -34,14 +34,14 @@
 
 /* These codes are specified in decimal in OSReturn.h.
  */
-static const char * err_codes_libkern_common[] = {
+static const char * const err_codes_libkern_common[] = {
 	NO_SUCH_ERROR,
 	"(libkern/common) general/unspecified error",    /* 1 */
 };
 
 /* These codes are specified in decimal in OSReturn.h.
  */
-static const char * err_codes_libkern_metaclass[] = {
+static const char * const err_codes_libkern_metaclass[] = {
 	NO_SUCH_ERROR,
 	"(libkern/metaclass) runtime internal error",                                       /* 1 */
 	"(libkern/metaclass) class has instances",                                          /* 2 */
@@ -58,7 +58,7 @@ static const char * err_codes_libkern_metaclass[] = {
 
 /* These codes are specified in hexadecimal in OSKextLib.h.
  */
-static const char * err_codes_libkern_kext[] = {
+static const char * const err_codes_libkern_kext[] = {
 	NO_SUCH_ERROR,
 	"(libkern/kext) internal error",                                      /* 0x1 */
 	"(libkern/kext) allocation failure",                                  /* 0x2 */
diff --git a/libsyscall/mach/err_mach_ipc.sub b/libsyscall/mach/err_mach_ipc.sub
index c9e6a79c9..d1e542fae 100644
--- a/libsyscall/mach/err_mach_ipc.sub
+++ b/libsyscall/mach/err_mach_ipc.sub
@@ -36,7 +36,7 @@
  *	Error string definitions for the new Mach IPC
  */
 
-static const char * err_codes_mach_send[] = {
+static const char * const err_codes_mach_send[] = {
 	/* 0 */	"(ipc/send) no error",
 	/* 1 */	"(ipc/send) send in progress",
 	/* 2 */	"(ipc/send) invalid data",
@@ -61,7 +61,7 @@ static const char * err_codes_mach_send[] = {
        /* 21 */ "(ipc/send) out-of-line buffer too large",
 };
 
-static const char * err_codes_mach_rcv[] = {
+static const char * const err_codes_mach_rcv[] = {
 	/* 0 */	"(ipc/rcv) no error",
 	/* 1 */	"(ipc/rcv) receive in progress",
 	/* 2 */	"(ipc/rcv) invalid name",
@@ -81,7 +81,7 @@ static const char * err_codes_mach_rcv[] = {
        /* 16 */ "(ipc/rcv) DIPC transport error",
 };
 
-static const char 	* err_codes_mach_mig[] = {
+static const char * const err_codes_mach_mig[] = {
 	/* 0 */	"(ipc/mig) client type check failure",
 	/* 1 */	"(ipc/mig) wrong reply message ID",
 	/* 2 */	"(ipc/mig) server detected error",
diff --git a/libsyscall/mach/err_server.sub b/libsyscall/mach/err_server.sub
index acac59a0e..3fed18fe1 100644
--- a/libsyscall/mach/err_server.sub
+++ b/libsyscall/mach/err_server.sub
@@ -36,13 +36,13 @@
  *	Definitions of Servers error strings
  */
 
-static const char * err_codes_netname[] = {			/* 0 */
+static const char * const err_codes_netname[] = {			/* 0 */
 		"(server/netname) name is not yours",
 		"(server/netname) name not checked in",
 		"(server/netname) no such host",
 		"(server/netname) host not found",
 };
-static const char * err_codes_env_mgr[] = {			/* 1 */
+static const char * const err_codes_env_mgr[] = {			/* 1 */
 		NO_SUCH_ERROR,
 		"(server/env_mgr) variable not found",
 		"(server/env_mgr) wrong type of variable",
@@ -52,23 +52,23 @@ static const char * err_codes_env_mgr[] = {			/* 1 */
 		"(server/env_mgr) port table full",
 		"(server/env_mgr) attempting to enter a null port ",
 };
-static const char * err_codes_execd[] = {			/* 2 */
+static const char * const err_codes_execd[] = {			/* 2 */
 		NO_SUCH_ERROR,
 		"(server/execd) could not find file to run",
 		"(server/execd) userid or password incorrect",
 		"(server/execd) fork failed",
 };
-static const char * err_codes_netmemory[] = {			/* 3 */
+static const char * const err_codes_netmemory[] = {			/* 3 */
 		"(server/netmemory) successful",
 		"(server/netmemory) invalid argument",
 		"(server/netmemory) resource shortage",
 };
-static const char * err_codes_ufs[] = {				/* 4 */
+static const char * const err_codes_ufs[] = {				/* 4 */
 		NO_SUCH_ERROR,
 /* XXX		"(server/ufs) invalid port", */
 };
 
-static const char * err_codes_task_master[] = {			/* 5 */
+static const char * const err_codes_task_master[] = {			/* 5 */
 		"(server/task_master) GENERIC ERROR",
 		"(server/task_master) invalid tm_task port",
 		"(server/task_master) invalid task id",
@@ -77,7 +77,7 @@ static const char * err_codes_task_master[] = {			/* 5 */
 		"(server/task_master) invalid action",
 };
 
-static const char * err_codes_ns[] = {				/* 6 */
+static const char * const err_codes_ns[] = {				/* 6 */
 		"(server/ns) GENERIC ERROR",
 		"(server/ns) invalid handle",
 		"(server/ns) name not found",
@@ -98,7 +98,7 @@ static const char * err_codes_ns[] = {				/* 6 */
 		"(server/ns) entry not reserved",
 };
 
-static const char 	* err_codes_io[] = {			/* 7 */
+static const char * const err_codes_io[] = {			/* 7 */
 		"(server/io) GENERIC ERROR",
 		"(server/io) invalid offset",
 		"(server/io) invalid size",
@@ -107,7 +107,7 @@ static const char 	* err_codes_io[] = {			/* 7 */
 		"(server/io) operation rejected under current I/O strategy",
 };
 
-static const char * err_codes_auth[] = {			/* 8 */
+static const char * const err_codes_auth[] = {			/* 8 */
 		"(server/auth) GENERIC ERROR",
 		"(server/auth) bad private port",
 		"(server/auth) bad name",
@@ -119,7 +119,7 @@ static const char * err_codes_auth[] = {			/* 8 */
 		"(server/auth) not secondary",
 };
 
-static const char * err_codes_us[] = {				/* 9 */
+static const char * const err_codes_us[] = {				/* 9 */
 		"(server/us) GENERIC ERROR",
 		"(server/us) unknown error",
 		"(server/us) object not found",
@@ -146,7 +146,7 @@ static const char * err_codes_us[] = {				/* 9 */
 		"(server/us) internal error",
 };
 
-static const char * err_codes_sunrpc[] = {			/* 10 */
+static const char * const err_codes_sunrpc[] = {			/* 10 */
 		"(server/sunrpc) GENERIC ERROR",
 		"(server/sunrpc) cannot encode arguments",
 		"(server/sunrpc) cannot decode results",
@@ -167,7 +167,7 @@ static const char * err_codes_sunrpc[] = {			/* 10 */
 		"(server/sunrpc) unknown protocol",
 };
 
-static const char	* err_codes_machobj[] = {		/* 11 */
+static const char * const err_codes_machobj[] = {		/* 11 */
 		"(server/object system) GENERIC ERROR",
 		"(server/object system) object not found",
 		"(server/object system) no such operation",
@@ -176,7 +176,7 @@ static const char	* err_codes_machobj[] = {		/* 11 */
 		"(server/object system) bad ipc message format",
 };
 
-static const char 	* err_codes_loader[] = {		/* 12 */
+static const char * const err_codes_loader[] = {		/* 12 */
 		"(server/loader) GENERIC ERROR",
 		"(server/loader) object file not relocated",
 		"(server/loader) unknown file type",
@@ -186,7 +186,7 @@ static const char 	* err_codes_loader[] = {		/* 12 */
 };
 
 
-static const char	* err_codes_exception[] = {		/* 13 */
+static const char * const err_codes_exception[] = {		/* 13 */
 	"(server/exception) GENERIC ERROR",
 	"(server/exception) invalid access",
 	"(server/exception) invalid instruction",
@@ -196,7 +196,7 @@ static const char	* err_codes_exception[] = {		/* 13 */
 	"(server/exception) breakpoint exception",
 };
 
-static const char	* err_codes_ux_signal[] = {		/* 14 */
+static const char * const err_codes_ux_signal[] = {		/* 14 */
 	"(server/unix-signal) GENERIC ERROR",
 	"(server/unix-signal) hangup",
 	"(server/unix-signal) interrupt",
@@ -229,7 +229,7 @@ static const char	* err_codes_ux_signal[] = {		/* 14 */
 	"(server/unix-signal) user-defined signal 2",
 };
 
-static const char	* err_codes_xkernel[] = {		/* 15 */
+static const char * const err_codes_xkernel[] = {		/* 15 */
 	"(server/xkernel) GENERIC ERROR",
 	"(server/xkernel) map full",
 	"(server/xkernel) inconsistent bind",
diff --git a/libsyscall/mach/error_codes.c b/libsyscall/mach/error_codes.c
index c87e18b8b..085f468dc 100644
--- a/libsyscall/mach/error_codes.c
+++ b/libsyscall/mach/error_codes.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -72,7 +72,7 @@
 #include "err_server.sub"
 #include "err_us.sub"
 
-__private_extern__ struct error_system _mach_errors[err_max_system+1] = {
+const struct error_system _mach_errors[err_max_system+1] = {
 	/* 0; err_kern */
 	{
 		errlib_count(err_os_sub),
diff --git a/libsyscall/mach/errorlib.h b/libsyscall/mach/errorlib.h
index 0ed4a27c8..931184372 100644
--- a/libsyscall/mach/errorlib.h
+++ b/libsyscall/mach/errorlib.h
@@ -91,9 +91,9 @@ struct error_sparse_map {
 #define err_code_map_entry(start, end) { err_get_code(start), err_get_code(end) }
 
 struct error_subsystem {
-	const char			* subsys_name;
+	const char			*subsys_name;
 	int				max_code;
-	const char			* * codes;
+	const char * const		*codes;
 	const struct error_sparse_map	*map_table;
 	int				map_count;
 };
@@ -101,15 +101,14 @@ struct error_subsystem {
 
 struct error_system {
 	int				max_sub;
-	const char			* bad_sub;
-	const struct error_subsystem	* subsystem;
-	const struct error_sparse_map	* map_table;
+	const char			*bad_sub;
+	const struct error_subsystem	*subsystem;
+	const struct error_sparse_map	*map_table;
 	int				map_count;
 };
 #define errorlib_sub_null	{ NULL, 0, NULL, NULL, 0 }
 
-__private_extern__ struct error_system 	_mach_errors[err_max_system+1];
-
-__private_extern__ char *mach_error_string_int(mach_error_t, boolean_t *);
+extern const struct error_system 	_mach_errors[err_max_system+1];
+char *mach_error_string_int(mach_error_t, boolean_t *);
 
 #define	errlib_count(s)		(sizeof(s)/sizeof(s[0]))
diff --git a/libsyscall/mach/exc_catcher.c b/libsyscall/mach/exc_catcher.c
index a85086519..9915eb2a0 100644
--- a/libsyscall/mach/exc_catcher.c
+++ b/libsyscall/mach/exc_catcher.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -35,10 +35,12 @@
 #include <mach/message.h>
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
-#include <dlfcn.h>
-#include <stdlib.h>
 
-__private_extern__ kern_return_t internal_catch_exception_raise (
+#include "abort.h"
+#include "exc_catcher.h"
+
+__private_extern__ kern_return_t
+internal_catch_exception_raise(
     mach_port_t exception_port,
     mach_port_t thread,
     mach_port_t task,
@@ -47,21 +49,19 @@ __private_extern__ kern_return_t internal_catch_exception_raise (
     mach_msg_type_number_t codeCnt)
 {
 #if defined(__DYNAMIC__)
-    static int checkForFunction = 0;
-    /* This will be non-zero if the user has defined this function */
-    static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t);
-    if (checkForFunction == 0) {
-        checkForFunction = 1;
-		func = dlsym(RTLD_DEFAULT, "catch_exception_raise");
-    }
-    if (func == 0) {
-        /* The user hasn't defined catch_exception_raise in their binary */
-        abort();
-    }
-    return (*func)(exception_port, thread, task, exception, code, codeCnt);
+	static _libkernel_exc_raise_func_t exc_raise_func = (void*)-1;
+	
+	if (exc_raise_func == ((void*)-1)) {
+		exc_raise_func = _dlsym(RTLD_DEFAULT, "catch_exception_raise");
+	}
+	if (exc_raise_func == 0) {
+		/* The user hasn't defined catch_exception_raise in their binary */
+		abort();
+	}
+	return (*exc_raise_func)(exception_port, thread, task, exception, code, codeCnt);
 #else
-    extern kern_return_t catch_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t);
-    return catch_exception_raise(exception_port, thread, task, exception, code, codeCnt);
+	extern kern_return_t catch_exception_raise(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t);
+	return catch_exception_raise(exception_port, thread, task, exception, code, codeCnt);
 #endif
 }
 
diff --git a/libsyscall/mach/exc_catcher.h b/libsyscall/mach/exc_catcher.h
new file mode 100644
index 000000000..28aac2508
--- /dev/null
+++ b/libsyscall/mach/exc_catcher.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef __EXC_CATCHER_H
+#define __EXC_CATCHER_H
+
+#include "_libkernel_init.h"
+
+typedef kern_return_t (*_libkernel_exc_raise_func_t)(mach_port_t, 
+																										 mach_port_t,
+																										 mach_port_t,
+																										 exception_type_t,
+																										 exception_data_t,
+																										 mach_msg_type_number_t);
+
+typedef kern_return_t (*_libkernel_exc_raise_state_func_t)(mach_port_t, 
+																													 exception_type_t,
+																													 exception_data_t,
+																													 mach_msg_type_number_t,
+																													 int *,
+																													 thread_state_t, 
+																													 mach_msg_type_number_t, 
+																													 thread_state_t, 
+																													 mach_msg_type_number_t *);
+
+typedef kern_return_t (*_libkernel_exec_raise_state_identity_t)(mach_port_t, 
+																																mach_port_t, mach_port_t, 
+																																exception_type_t, 
+																																exception_data_t, 
+																																mach_msg_type_number_t, 
+																																int *, thread_state_t, 
+																																mach_msg_type_number_t, 
+																																thread_state_t, 
+																																mach_msg_type_number_t *);
+
+#define	RTLD_DEFAULT	((void *) -2)
+extern void* (*_dlsym)(void*, const char*);
+
+#endif // __EXC_CATCHER_H
diff --git a/libsyscall/mach/exc_catcher_state.c b/libsyscall/mach/exc_catcher_state.c
index efcb5344c..c6674d56c 100644
--- a/libsyscall/mach/exc_catcher_state.c
+++ b/libsyscall/mach/exc_catcher_state.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,9 +36,12 @@
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
 #include <dlfcn.h>
-#include <stdlib.h>
 
-__private_extern__ kern_return_t internal_catch_exception_raise_state (
+#include "abort.h"
+#include "exc_catcher.h"
+
+__private_extern__ kern_return_t
+internal_catch_exception_raise_state(
         mach_port_t exception_port,
         exception_type_t exception,
         exception_data_t code,
@@ -50,21 +53,19 @@ __private_extern__ kern_return_t internal_catch_exception_raise_state (
         mach_msg_type_number_t *new_stateCnt)
 {
 #if defined(__DYNAMIC__)
-    static int checkForFunction = 0;
-    /* This will be non-zero if the user has defined this function */
-    static kern_return_t (*func)(mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
-    if (checkForFunction == 0) {
-        checkForFunction = 1;
-		func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state");
-    }
-    if (func == 0) {
-        /* The user hasn't defined catch_exception_raise in their binary */
-        abort();
-    }
-    return (*func)(exception_port, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
+	static _libkernel_exc_raise_state_func_t exc_raise_state_func = (void*)-1;
+	
+	if (exc_raise_state_func == ((void*)-1)) {
+		exc_raise_state_func = _dlsym(RTLD_DEFAULT, "catch_exception_raise_state");
+	}
+	if (exc_raise_state_func == 0) {
+		/* The user hasn't defined catch_exception_raise in their binary */
+		abort();
+	}
+	return (*exc_raise_state_func)(exception_port, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
 #else
-    extern kern_return_t catch_exception_raise_state(mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
-    return catch_exception_raise_state(exception_port, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
+	extern kern_return_t catch_exception_raise_state(mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
+	return catch_exception_raise_state(exception_port, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
 #endif
 }
 
diff --git a/libsyscall/mach/exc_catcher_state_identity.c b/libsyscall/mach/exc_catcher_state_identity.c
index 1e0c5c0df..b92f5892e 100644
--- a/libsyscall/mach/exc_catcher_state_identity.c
+++ b/libsyscall/mach/exc_catcher_state_identity.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -36,9 +36,12 @@
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
 #include <dlfcn.h>
-#include <stdlib.h>
 
-__private_extern__ kern_return_t internal_catch_exception_raise_state_identity (
+#include "abort.h"
+#include "exc_catcher.h"
+
+__private_extern__ kern_return_t
+internal_catch_exception_raise_state_identity(
         mach_port_t exception_port,
         mach_port_t thread,
         mach_port_t task,
@@ -52,21 +55,19 @@ __private_extern__ kern_return_t internal_catch_exception_raise_state_identity (
         mach_msg_type_number_t *new_stateCnt)
 {
 #if defined(__DYNAMIC__)
-    static int checkForFunction = 0;
-    /* This will be non-zero if the user has defined this function */
-    static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
-    if (checkForFunction == 0) {
-        checkForFunction = 1;
-		func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state_identity");
-    }
-    if (func == 0) {
-        /* The user hasn't defined catch_exception_raise in their binary */
-        abort();
-    }
-    return (*func)(exception_port, thread, task, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
+	static _libkernel_exec_raise_state_identity_t exc_raise_state_identity_func = (void*)-1;
+	
+	if (exc_raise_state_identity_func == ((void*)-1)) {
+		exc_raise_state_identity_func = _dlsym(RTLD_DEFAULT, "catch_exception_raise_state_identity");
+	}	
+	if (exc_raise_state_identity_func == 0) {
+		/* The user hasn't defined catch_exception_raise in their binary */
+		abort();
+	}
+	return (*exc_raise_state_identity_func)(exception_port, thread, task, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
 #else
-    extern kern_return_t catch_exception_raise_state_identity(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
-    return catch_exception_raise_state_identity(exception_port, thread, task, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
+	extern kern_return_t catch_exception_raise_state_identity(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
+	return catch_exception_raise_state_identity(exception_port, thread, task, exception, code, codeCnt, flavor, old_state, old_stateCnt, new_state, new_stateCnt);
 #endif
 }
 
diff --git a/libsyscall/mach/fprintf_stderr.c b/libsyscall/mach/fprintf_stderr.c
index e89df1136..4d92bfc1c 100644
--- a/libsyscall/mach/fprintf_stderr.c
+++ b/libsyscall/mach/fprintf_stderr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,30 +32,36 @@
 
 #include <mach/mach.h>
 #include <mach/mach_init.h>
-#include <stdio.h>
 #include <stdarg.h>
+#include "string.h"
 
 int (*vprintf_stderr_func)(const char *format, va_list ap);
 
+#define __STDERR_FILENO	2
+int write(int fd, const char* cbuf, int nbyte);
 
 /* This function allows the writing of a mach error message to an
  * application-controllable output method, the default being to
  * use printf if no other method is specified by the application.
  *
- * To override, set the global (static) function pointer vprintf_stderr to
+ * To override, set the global function pointer vprintf_stderr to
  * a function which takes the same parameters as vprintf.
  */
 
-int fprintf_stderr(const char *format, ...)
+__private_extern__ int
+fprintf_stderr(const char *format, ...)
 {
-        va_list args;
+	va_list args;
 	int retval;
 
 	va_start(args, format);
-	if (vprintf_stderr_func == NULL)
-		retval = vprintf(format, args);
-	else
+	if (vprintf_stderr_func == NULL) {
+		char buffer[1024];
+		retval = _mach_vsnprintf(buffer, sizeof(buffer), format, args);
+		write(__STDERR_FILENO, buffer, retval);
+	} else {
 		retval = (*vprintf_stderr_func)(format, args);
+	}
 	va_end(args);
 
 	return retval;
diff --git a/libsyscall/mach/headers/Makefile.inc b/libsyscall/mach/headers/Makefile.inc
deleted file mode 100644
index f747c3d76..000000000
--- a/libsyscall/mach/headers/Makefile.inc
+++ /dev/null
@@ -1,10 +0,0 @@
-MACH_INSTHDRS += mach.h \
-                 mach_error.h \
-                 mach_init.h \
-                 mach_interface.h \
-                 port_obj.h \
-                 sync.h \
-                 task.h \
-                 thread_act.h \
-                 vm_task.h
-MACH_INSTHDRS := ${MACH_INSTHDRS:S/^/${.CURDIR}\/mach\/headers\//}
diff --git a/libsyscall/mach/i386/Makefile.inc b/libsyscall/mach/i386/Makefile.inc
deleted file mode 100644
index 4afb1ae3a..000000000
--- a/libsyscall/mach/i386/Makefile.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-.PATH: ${.CURDIR}/mach/i386
-
-MDSRCS += mach_absolute_time.S
diff --git a/libsyscall/mach/headers/errorlib.h b/libsyscall/mach/mach/errorlib.h
similarity index 94%
rename from libsyscall/mach/headers/errorlib.h
rename to libsyscall/mach/mach/errorlib.h
index 0c3cc64c2..a5b6daf32 100644
--- a/libsyscall/mach/headers/errorlib.h
+++ b/libsyscall/mach/mach/errorlib.h
@@ -81,21 +81,21 @@
 #define	NO_SUCH_ERROR		"unknown error code"
 
 struct error_subsystem {
-	char			* subsys_name;
+	const char		*subsys_name;
 	int			max_code;
-	char			* * codes;
+	const char * const	*codes;
 };
 
 struct error_system {
-	int			max_sub;
-	char			* bad_sub;
-	struct error_subsystem	* subsystem;
+	int				max_sub;
+	const char			*bad_sub;
+	const struct error_subsystem	*subsystem;
 };
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
-extern	struct error_system 	errors[err_max_system+1];
+extern const struct error_system 	errors[err_max_system+1];
 __END_DECLS
 
 #define	errlib_count(s)		(sizeof(s)/sizeof(s[0]))
diff --git a/libsyscall/mach/headers/mach.h b/libsyscall/mach/mach/mach.h
similarity index 100%
rename from libsyscall/mach/headers/mach.h
rename to libsyscall/mach/mach/mach.h
diff --git a/libsyscall/mach/headers/mach_error.h b/libsyscall/mach/mach/mach_error.h
similarity index 100%
rename from libsyscall/mach/headers/mach_error.h
rename to libsyscall/mach/mach/mach_error.h
diff --git a/libsyscall/mach/headers/mach_init.h b/libsyscall/mach/mach/mach_init.h
similarity index 95%
rename from libsyscall/mach/headers/mach_init.h
rename to libsyscall/mach/mach/mach_init.h
index 36a47fac1..9816f1138 100644
--- a/libsyscall/mach/headers/mach_init.h
+++ b/libsyscall/mach/mach/mach_init.h
@@ -68,12 +68,11 @@
  */
 
 __BEGIN_DECLS
-extern mach_port_t mach_task_self(void);
 extern mach_port_t mach_host_self(void);
 extern mach_port_t mach_thread_self(void);
 extern kern_return_t host_page_size(host_t, vm_size_t *);
 
-extern	mach_port_t	mach_task_self_;
+extern mach_port_t	mach_task_self_;
 #define	mach_task_self() mach_task_self_
 #define	current_task()	mach_task_self()
 
@@ -86,9 +85,6 @@ __BEGIN_DECLS
  */
 
 extern	mach_port_t	bootstrap_port;
-extern	mach_port_t	name_server_port;
-extern	mach_port_t	environment_port;
-extern	mach_port_t	service_port;
 
 /*
  *	Where these ports occur in the "mach_ports_register"
@@ -125,6 +121,7 @@ extern	int		vm_page_shift;
  *	application to point to a user-specified output function
  */
 extern int (*vprintf_stderr_func)(const char *format, va_list ap);
+
 __END_DECLS
 
 #endif	/* _MACH_INIT_ */
diff --git a/libsyscall/mach/headers/mach_interface.h b/libsyscall/mach/mach/mach_interface.h
similarity index 100%
rename from libsyscall/mach/headers/mach_interface.h
rename to libsyscall/mach/mach/mach_interface.h
diff --git a/libsyscall/mach/headers/port_obj.h b/libsyscall/mach/mach/port_obj.h
similarity index 100%
rename from libsyscall/mach/headers/port_obj.h
rename to libsyscall/mach/mach/port_obj.h
diff --git a/libsyscall/mach/headers/sync.h b/libsyscall/mach/mach/sync.h
similarity index 100%
rename from libsyscall/mach/headers/sync.h
rename to libsyscall/mach/mach/sync.h
diff --git a/libsyscall/mach/headers/task.h b/libsyscall/mach/mach/task.h
similarity index 93%
rename from libsyscall/mach/headers/task.h
rename to libsyscall/mach/mach/task.h
index a919ee664..6cef51794 100644
--- a/libsyscall/mach/headers/task.h
+++ b/libsyscall/mach/mach/task.h
@@ -29,10 +29,6 @@
 #include <mach/i386/task.h>
 #elif defined(__x86_64__)
 #include <mach/x86_64/task.h>
-#elif defined(__ppc__)
-#include <mach/ppc/task.h>
-#elif defined(__ppc64__)
-#include <mach/ppc64/task.h>
 #else
 #error unknown architecture
 #endif 
diff --git a/libsyscall/mach/headers/thread_act.h b/libsyscall/mach/mach/thread_act.h
similarity index 92%
rename from libsyscall/mach/headers/thread_act.h
rename to libsyscall/mach/mach/thread_act.h
index 2696b626d..b413f7a7e 100644
--- a/libsyscall/mach/headers/thread_act.h
+++ b/libsyscall/mach/mach/thread_act.h
@@ -29,10 +29,6 @@
 #include <mach/i386/thread_act.h>
 #elif defined(__x86_64__)
 #include <mach/x86_64/thread_act.h>
-#elif defined(__ppc__)
-#include <mach/ppc/thread_act.h>
-#elif defined(__ppc64__)
-#include <mach/ppc64/thread_act.h>
 #else
 #error unknown architecture
 #endif 
diff --git a/libsyscall/mach/headers/vm_task.h b/libsyscall/mach/mach/vm_task.h
similarity index 100%
rename from libsyscall/mach/headers/vm_task.h
rename to libsyscall/mach/mach/vm_task.h
diff --git a/libsyscall/mach/mach_error.c b/libsyscall/mach/mach_error.c
index b87c0adbf..4b9542726 100644
--- a/libsyscall/mach/mach_error.c
+++ b/libsyscall/mach/mach_error.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -60,26 +60,24 @@
  *      or returns a descriptive string.
  */
 
-#include <stdio.h>
 #include <mach/mach_error.h>
 #include <mach/boolean.h>
 #include "errorlib.h"
+#include "string.h"
 
 int fprintf_stderr(const char *format, ...);
 
 void
-mach_error( str, err )
-	const char *str; 
-	mach_error_t err;
+mach_error(const char *str, mach_error_t err)
 {
-	char * err_str;
+	char *err_str;
 	char buf[1024];
 	boolean_t diag;
 
-	err_str=mach_error_string_int(err, &diag);
+	err_str = mach_error_string_int(err, &diag);
 
-	if ( diag ) {
-		sprintf( buf, "%s %s (%x)", mach_error_type(err), err_str, err );
+	if (diag) {
+		_mach_snprintf(buf, sizeof(buf), "%s %s (%x)", mach_error_type(err), err_str, err);
 		err_str = buf;
 	}
 
diff --git a/libsyscall/mach/mach_error_string.c b/libsyscall/mach/mach_error_string.c
index 9240629d9..82dc4da99 100644
--- a/libsyscall/mach/mach_error_string.c
+++ b/libsyscall/mach/mach_error_string.c
@@ -173,7 +173,6 @@ mach_error_string(mach_error_t err)
 	boolean_t diag;
 
 	return mach_error_string_int( err, &diag );
-
 }
 
 /* vim: set ts=4: */
diff --git a/libsyscall/mach/mach_init.c b/libsyscall/mach/mach_init.c
index ce2eeed8c..c2702539e 100644
--- a/libsyscall/mach/mach_init.c
+++ b/libsyscall/mach/mach_init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -58,32 +58,26 @@
 #include <mach/mach_host.h>
 #include <mach/mach_init.h>
 #include <mach/vm_param.h>
+#include <stdbool.h>
 #include "externs.h"
+#include "mig_reply_port.h"
 
 mach_port_t	mach_task_self_ = MACH_PORT_NULL;
-mach_port_t     mach_host_self_ = MACH_PORT_NULL;
+#ifdef __i386__
+mach_port_t	mach_host_self_ = MACH_PORT_NULL;
+#endif
 
-__private_extern__ kern_return_t _host_mach_msg_trap_return_;
+vm_size_t vm_page_size	= PAGE_SIZE;
+vm_size_t vm_page_mask	= PAGE_MASK;
+int vm_page_shift		= PAGE_SHIFT;
 
-vm_size_t	vm_page_size;
-vm_size_t	vm_page_mask;
-int		vm_page_shift;
+int mach_init(void);
+int _mach_fork_child(void);
 
-/*
- * Forward internal declarations for automatic mach_init during
- * fork() implementation.
- */
-/* fork() calls through atfork_child_routine */
-void (*_atfork_child_routine)(void);
+static int mach_init_doit(bool forkchild);
 
-static void mach_atfork_child_routine(void);
-static boolean_t first = TRUE;
-static void (*previous_atfork_child_routine)(void);
-static boolean_t mach_init_inited = FALSE;
-extern int mach_init(void);
 extern void _pthread_set_self(void *);
 extern void cthread_set_self(void *);
-extern void __libc_init(void); /* Libc initialization routine */
 
 kern_return_t
 host_page_size(__unused host_t host, vm_size_t *out_page_size)
@@ -92,114 +86,74 @@ host_page_size(__unused host_t host, vm_size_t *out_page_size)
 	return KERN_SUCCESS;
 }
 
-static void mach_atfork_child_routine(void)
+/* 
+ * mach_init() must be called explicitly in static executables (including dyld).
+ * called by libSystem_initializer() in dynamic executables
+ */
+int
+mach_init(void)
 {
-	/*
-	 * If an (*_atfork_child_routine)() was registered when
-	 * mach_init was first called, then call that routine
-	 * prior to performing our re-initialization. This ensures
-	 * that the post-fork handlers are called in exactly the
-	 * same order as the crt0 (exec) handlers. Any library 
-	 * that makes use of the _atfork_child_routine must follow
-	 * the same technique.
-	 */
-	if (previous_atfork_child_routine) {
-		(*previous_atfork_child_routine)();
+	static bool mach_init_inited = false;
+
+	if (mach_init_inited) {
+		return 0;
 	}
-	mach_init_inited = FALSE;
-	mach_init();
+	mach_init_inited = true;
+	
+	return mach_init_doit(false);
 }
 
-mach_port_t
-mach_host_self(void)
+// called by libSystem_atfork_child()
+int
+_mach_fork_child(void)
 {
-        return(host_self_trap());
+	return mach_init_doit(true);
 }
 
-int mach_init_doit(int forkchild)
+int
+mach_init_doit(bool forkchild)
 {
-	host_t host;
-
 	/*
 	 *	Get the important ports into the cached values,
 	 *	as required by "mach_init.h".
 	 */
-	 
 	mach_task_self_ = task_self_trap();
-	host = host_self_trap();
-
-
-	if (!forkchild) {
-		/*
-		 * Set up the post-fork child handler in the libc stub
-		 * to invoke this routine if this process forks. Save the
-		 * previous value in order that we can call that handler
-		 * prior to performing our postfork work.
-		 */
-            
-		first = FALSE;
-		previous_atfork_child_routine = _atfork_child_routine;
-		_atfork_child_routine = mach_atfork_child_routine;
-                _pthread_set_self(0);
-                cthread_set_self(0);
-	}
-
+	
 	/*
 	 *	Initialize the single mig reply port
 	 */
 
-	mig_init(0);
-
-	/*
-	 *	Cache some other valuable system constants
-	 */
-
-	(void)host_page_size(host, &vm_page_size);
-	vm_page_mask = vm_page_size - 1;
-	if (vm_page_size == 0) {
-		/* guard against unlikely craziness */
-		vm_page_shift = 0;
-	} else {
-		/*
-		 * Unfortunately there's no kernel interface to get the
-		 * vm_page_shift, but it's easy enough to calculate.
-		 */
-		for (vm_page_shift = 0;
-		     (vm_page_size & (1 << vm_page_shift)) == 0;
-		     vm_page_shift++)
-			continue;
-	}
-
-	mach_port_deallocate(mach_task_self_, host);
-
-	mach_init_ports();
+	_pthread_set_self(0);
+	_mig_init(0);
 
 #if WE_REALLY_NEED_THIS_GDB_HACK
 	/*
 	 * Check to see if GDB wants us to stop
 	 */
 	{
-	  task_user_data_data_t	user_data;
-	  mach_msg_type_number_t	user_data_count = TASK_USER_DATA_COUNT;
+	task_user_data_data_t	user_data;
+	mach_msg_type_number_t	user_data_count = TASK_USER_DATA_COUNT;
 	  
 	user_data.user_data = 0;
 	(void)task_info(mach_task_self_, TASK_USER_DATA,
 		(task_info_t)&user_data, &user_data_count);
 #define MACH_GDB_RUN_MAGIC_NUMBER 1
 #ifdef	MACH_GDB_RUN_MAGIC_NUMBER	
-	  /* This magic number is set in mach-aware gdb 
-	   *  for RUN command to allow us to suspend user's
-	   *  executable (linked with this libmach!) 
-	   *  with the code below.
-	* This hack should disappear when gdb improves.
-	*/
+	/* This magic number is set in mach-aware gdb 
+	 *  for RUN command to allow us to suspend user's
+	 *  executable (linked with this libmach!) 
+	 *  with the code below.
+	 * This hack should disappear when gdb improves.
+	 */
 	if ((int)user_data.user_data == MACH_GDB_RUN_MAGIC_NUMBER) {
 	    kern_return_t ret;
 	    user_data.user_data = 0;
 	    
-	    ret = task_suspend (mach_task_self_);
+	    ret = task_suspend(mach_task_self_);
 	    if (ret != KERN_SUCCESS) {
-		while(1) (void)task_terminate(mach_task_self_);
+			while (1) {
+				(void)task_terminate(mach_task_self_);
+			}
 	    }
 	}
 #undef MACH_GDB_RUN_MAGIC_NUMBER  
@@ -207,62 +161,5 @@ int mach_init_doit(int forkchild)
 	}
 #endif /* WE_REALLY_NEED_THIS_GDB_HACK */
 
-	/*
-         * Reserve page 0 so that the program doesn't get it as
-	 * the result of a vm_allocate() or whatever.
-	 */
-	{
-		vm_offset_t zero_page_start;
-
-		zero_page_start = 0;
-		(void)vm_map(mach_task_self_, &zero_page_start, vm_page_size,
-			     0, FALSE, MEMORY_OBJECT_NULL, 0, TRUE,
-			     VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_COPY);
-		/* ignore result, we don't care if it failed */
-	}
-
-	return(0);
-}
-
-
-
-
-/* 
- * mach_init() is called explicitly in static executables (including dyld)
- * It is called implicitly by libSystem_initializer() in dynamic executables
- */
-int mach_init(void)
-{
-	int ret;
-
-	if (mach_init_inited)
-		return(0);
-	mach_init_inited = TRUE;
-	ret = mach_init_doit(0);
-
-	return ret;
-}
-
-
-
-
-/* called by _cthread_fork_child() */
-int fork_mach_init(void)
-{
-	/* called only from child */
-	return(mach_init_doit(1));
-}
-
-#undef	mach_task_self
-
-mach_port_t
-mach_task_self(void)
-{
-	return(task_self_trap());
-}
-
-mach_port_t
-mach_thread_self(void)
-{
-	return(thread_self_trap());
+	return 0;
 }
diff --git a/libsyscall/mach/mach_init_libSystem.c b/libsyscall/mach/mach_init_libSystem.c
deleted file mode 100644
index 86ca46aca..000000000
--- a/libsyscall/mach/mach_init_libSystem.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifdef __DYNAMIC__
-struct ProgramVars; /* forward reference */
-
-extern void pthread_init(void);				// from libc.a
-extern void __libc_init(const struct ProgramVars* vars);	// from libc.a
-extern void __keymgr_initializer(void);		// from libkeymgr.a
-extern void _dyld_initializer(void);		// from libdyld.a
-extern void libdispatch_init(void);		// from libdispatch.a
-
-/*
- * libsyscall_initializer() initializes all of libSystem.dylib <rdar://problem/4892197>
- */
-static __attribute__((constructor)) 
-void libSystem_initializer(int argc, const char* argv[], const char* envp[], const char* apple[], const struct ProgramVars* vars)
-{
-	mach_init();
-	pthread_init();
-	__libc_init(vars);
-	__keymgr_initializer();
-	_dyld_initializer();
-	libdispatch_init();
-}
-
-/*  
- *  Old crt1.o glue used to call through mach_init_routine which was used to initialize libSystem.
- *  LibSystem now auto-initializes but mach_init_routine is left for binary compatibility.
- */
-static void mach_init_old() {}
-void (*mach_init_routine)(void) = &mach_init_old;
-
-#endif /* __DYNAMIC__ */
diff --git a/libsyscall/mach/mach_init_ports.c b/libsyscall/mach/mach_init_ports.c
deleted file mode 100644
index fcb6d2227..000000000
--- a/libsyscall/mach/mach_init_ports.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <mach/mach.h>
-#include <stdlib.h>
-#include "externs.h"
-
-mach_port_t	bootstrap_port = MACH_PORT_NULL;
-mach_port_t	name_server_port = MACH_PORT_NULL;
-mach_port_t	environment_port = MACH_PORT_NULL;
-mach_port_t	service_port = MACH_PORT_NULL;
-semaphore_t	clock_sem = MACH_PORT_NULL;
-mach_port_t	clock_port = MACH_PORT_NULL;
-mach_port_t thread_recycle_port = MACH_PORT_NULL;
-
-void
-mach_init_ports(void)
-{
-	mach_port_array_t	ports;
-	mach_msg_type_number_t	ports_count;
-	kern_return_t		kr;
-	host_t			host;
-
-	/*
-	 *	Find those ports important to every task.
-	 */
-	kr = task_get_special_port(mach_task_self(),
-				   TASK_BOOTSTRAP_PORT,
-				   &bootstrap_port);
-	if (kr != KERN_SUCCESS)
-	    return;
-
-        /* Get the clock service port for nanosleep */
-	host = mach_host_self();
-        kr = host_get_clock_service(host, SYSTEM_CLOCK, &clock_port);
-        if (kr != KERN_SUCCESS) {
-            abort();
-	}
-        kr = semaphore_create(mach_task_self(), &clock_sem, SYNC_POLICY_FIFO, 0);
-        if (kr != KERN_SUCCESS) {
-            abort();
-        }
-	mach_port_deallocate(mach_task_self(), host);
-        kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &thread_recycle_port);
-        if (kr != KERN_SUCCESS) {
-            abort();
-        }
-
-	/*
-	 *	Find the options service ports.
-	 *	XXX - Don't need these on Darwin, should go away.
-	 */
-	kr = mach_ports_lookup(mach_task_self(), &ports,
-			       &ports_count);
-	if (kr == KERN_SUCCESS) {
-		if (ports_count >= MACH_PORTS_SLOTS_USED) {
-			name_server_port = ports[NAME_SERVER_SLOT];
-			environment_port = ports[ENVIRONMENT_SLOT];
-			service_port     = ports[SERVICE_SLOT];
-		}
-
-		/* get rid of out-of-line data */
-		(void) vm_deallocate(mach_task_self(),
-			     (vm_offset_t) ports,
-			     (vm_size_t) (ports_count * sizeof *ports));
-	}
-}
-
-#ifdef notdef
-/* will have problems with dylib build --> not needed anyway */
-#ifndef	lint
-/*
- *	Routines which our library must suck in, to avoid
- *	a later library from referencing them and getting
- *	the wrong version.
- */
-extern void _replacements(void);
-
-void
-_replacements(void)
-{
-	(void)sbrk(0);			/* Pull in our sbrk/brk */
-	(void)malloc(0);		/* Pull in our malloc package */
-}
-#endif	/* lint */
-#endif /* notdef */
diff --git a/pexpert/ppc/pe_bootargs.c b/libsyscall/mach/mach_legacy.c
similarity index 80%
rename from pexpert/ppc/pe_bootargs.c
rename to libsyscall/mach/mach_legacy.c
index a0d2b2a08..f425d78f8 100644
--- a/pexpert/ppc/pe_bootargs.c
+++ b/libsyscall/mach/mach_legacy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,12 +25,27 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <pexpert/pexpert.h>
-#include <pexpert/boot.h>
 
-char *
-PE_boot_args(
-	void)
+#include <mach/mach.h>
+
+#undef mach_host_self
+
+mach_port_t
+mach_host_self(void)
+{
+	return host_self_trap();
+}
+
+#undef mach_task_self
+
+mach_port_t
+mach_task_self(void)
+{
+	return task_self_trap();
+}
+
+mach_port_t
+mach_thread_self(void)
 {
-	return((char *)((boot_args*)PE_state.bootArgs)->CommandLine);
+	return thread_self_trap();
 }
diff --git a/libsyscall/mach/mach_msg.c b/libsyscall/mach/mach_msg.c
index 644313d6b..d8b094119 100644
--- a/libsyscall/mach/mach_msg.c
+++ b/libsyscall/mach/mach_msg.c
@@ -210,28 +210,28 @@ mach_msg_destroy_port(mach_port_t port, mach_msg_type_name_t type)
       case MACH_MSG_TYPE_MOVE_SEND:
       case MACH_MSG_TYPE_MOVE_SEND_ONCE:
 	/* destroy the send/send-once right */
-	(void) mach_port_deallocate(mach_task_self(), port);
+	(void) mach_port_deallocate(mach_task_self_, port);
 	break;
 
       case MACH_MSG_TYPE_MOVE_RECEIVE:
 	/* destroy the receive right */
-	(void) mach_port_mod_refs(mach_task_self(), port,
+	(void) mach_port_mod_refs(mach_task_self_, port,
 				  MACH_PORT_RIGHT_RECEIVE, -1);
 	break;
 
       case MACH_MSG_TYPE_MAKE_SEND:
 	/* create a send right and then destroy it */
-	(void) mach_port_insert_right(mach_task_self(), port,
+	(void) mach_port_insert_right(mach_task_self_, port,
 				      port, MACH_MSG_TYPE_MAKE_SEND);
-	(void) mach_port_deallocate(mach_task_self(), port);
+	(void) mach_port_deallocate(mach_task_self_, port);
 	break;
 
       case MACH_MSG_TYPE_MAKE_SEND_ONCE:
 	/* create a send-once right and then destroy it */
-	(void) mach_port_extract_right(mach_task_self(), port,
+	(void) mach_port_extract_right(mach_task_self_, port,
 				       MACH_MSG_TYPE_MAKE_SEND_ONCE,
 				       &port, &type);
-	(void) mach_port_deallocate(mach_task_self(), port);
+	(void) mach_port_deallocate(mach_task_self_, port);
 	break;
     }
 }
@@ -240,7 +240,7 @@ static void
 mach_msg_destroy_memory(vm_offset_t addr, vm_size_t size)
 {
     if (size != 0)
-	(void) vm_deallocate(mach_task_self(), addr, size);
+	(void) vm_deallocate(mach_task_self_, addr, size);
 }
 
 
@@ -273,43 +273,56 @@ mach_msg_destroy(mach_msg_header_t *msg)
     mach_msg_destroy_port(msg->msgh_remote_port, MACH_MSGH_BITS_REMOTE(mbits));
 
     if (mbits & MACH_MSGH_BITS_COMPLEX) {
-	mach_msg_body_t		*body;
-	mach_msg_descriptor_t	*saddr, *eaddr;
+	mach_msg_base_t		*base;
+	mach_msg_type_number_t	count, i;
+	mach_msg_descriptor_t	*daddr;
 	
-    	body = (mach_msg_body_t *) (msg + 1);
-    	saddr = (mach_msg_descriptor_t *) 
-			((mach_msg_base_t *) msg + 1);
-    	eaddr =  saddr + body->msgh_descriptor_count;
+    	base = (mach_msg_base_t *) msg;
+	count = base->body.msgh_descriptor_count;
 
-	for  ( ; saddr < eaddr; saddr++) {
-	    switch (saddr->type.type) {
+    	daddr = (mach_msg_descriptor_t *) (base + 1);
+	for (i = 0; i < count; i++) {
+
+	    switch (daddr->type.type) {
 	    
-	        case MACH_MSG_PORT_DESCRIPTOR: {
+	    case MACH_MSG_PORT_DESCRIPTOR: {
 		    mach_msg_port_descriptor_t *dsc;
 
 		    /* 
 		     * Destroy port rights carried in the message 
 		     */
-		    dsc = &saddr->port;
-		    mach_msg_destroy_port(dsc->name, dsc->disposition);		
+		    dsc = &daddr->port;
+		    mach_msg_destroy_port(dsc->name, dsc->disposition);
+		    daddr = (mach_msg_descriptor_t *)(dsc + 1);
 		    break;
-	        }
+	    }
 
-	        case MACH_MSG_OOL_DESCRIPTOR : {
+	    case MACH_MSG_OOL_DESCRIPTOR: {
 		    mach_msg_ool_descriptor_t *dsc;
 
 		    /* 
 		     * Destroy memory carried in the message 
 		     */
-		    dsc = &saddr->out_of_line;
+		    dsc = &daddr->out_of_line;
 		    if (dsc->deallocate) {
 		        mach_msg_destroy_memory((vm_offset_t)dsc->address,
 						dsc->size);
 		    }
+		    daddr = (mach_msg_descriptor_t *)(dsc + 1);
 		    break;
-	        }
+	    }
 
-	        case MACH_MSG_OOL_PORTS_DESCRIPTOR : {
+	    case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: {
+		    mach_msg_ool_descriptor_t *dsc;
+
+		    /*
+		     * Just skip it.
+		     */
+		    daddr = (mach_msg_descriptor_t *)(dsc + 1);
+		    break;
+	    }
+
+	    case MACH_MSG_OOL_PORTS_DESCRIPTOR: {
 		    mach_port_t             		*ports;
 		    mach_msg_ool_ports_descriptor_t	*dsc;
 		    mach_msg_type_number_t   		j;
@@ -317,7 +330,7 @@ mach_msg_destroy(mach_msg_header_t *msg)
 		    /*
 		     * Destroy port rights carried in the message 
 		     */
-		    dsc = &saddr->ool_ports;
+		    dsc = &daddr->ool_ports;
 		    ports = (mach_port_t *) dsc->address;
 		    for (j = 0; j < dsc->count; j++, ports++)  {
 		        mach_msg_destroy_port(*ports, dsc->disposition);
@@ -330,8 +343,9 @@ mach_msg_destroy(mach_msg_header_t *msg)
 		        mach_msg_destroy_memory((vm_offset_t)dsc->address, 
 					dsc->count * sizeof(mach_port_t));
 		    }
+		    daddr = (mach_msg_descriptor_t *)(dsc + 1);
 		    break;
-	        }
+	    }
 	    }
 	}
     }
@@ -362,7 +376,7 @@ mach_msg_server_once(
 	mach_msg_size_t reply_alloc;
 	mach_msg_return_t mr;
 	kern_return_t kr;
-	mach_port_t self = mach_task_self();
+	mach_port_t self = mach_task_self_;
 
 	options &= ~(MACH_SEND_MSG|MACH_RCV_MSG);
 
@@ -487,7 +501,7 @@ mach_msg_server(
 	mach_msg_size_t reply_alloc;
 	mach_msg_return_t mr;
 	kern_return_t kr;
-	mach_port_t self = mach_task_self();
+	mach_port_t self = mach_task_self_;
 
 	options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_OVERWRITE);
 
diff --git a/libsyscall/mach/mig_allocate.c b/libsyscall/mach/mig_allocate.c
index 14b8a2933..ed1288662 100644
--- a/libsyscall/mach/mig_allocate.c
+++ b/libsyscall/mach/mig_allocate.c
@@ -59,7 +59,7 @@
 void
 mig_allocate(vm_address_t *addr_p, vm_size_t size)
 {
-	if (vm_allocate(mach_task_self(),
+	if (vm_allocate(mach_task_self_,
 			addr_p,
 			size,
 			VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE)
diff --git a/libsyscall/mach/mig_deallocate.c b/libsyscall/mach/mig_deallocate.c
index bbcf15e6b..2b58e2c70 100644
--- a/libsyscall/mach/mig_deallocate.c
+++ b/libsyscall/mach/mig_deallocate.c
@@ -59,7 +59,7 @@
 void
 mig_deallocate(vm_address_t addr, vm_size_t size)
 {
-	(void) vm_deallocate(mach_task_self(),
+	(void) vm_deallocate(mach_task_self_,
 			addr,
 			size);
 }
diff --git a/libsyscall/mach/mig_reply_port.c b/libsyscall/mach/mig_reply_port.c
new file mode 100644
index 000000000..aa2890ac6
--- /dev/null
+++ b/libsyscall/mach/mig_reply_port.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach.h>
+#include <mach/mach_init.h>
+
+//extern mach_port_t _pthread_reply_port(pthread_t);
+static mach_port_t _task_reply_port = MACH_PORT_NULL;
+
+extern mach_port_t _mig_get_reply_port(void);
+extern void _mig_set_reply_port(mach_port_t port);
+
+/*
+ * Called by mach_init with 0 before cthread_init is
+ * called and again with 1 at the end of cthread_init.
+ */
+void
+_mig_init(int init_done)
+{
+	if (init_done == 0) {
+		_task_reply_port = mach_reply_port();
+	}
+}
+
+/*
+ * Called by mig interface code whenever a reply port is needed.
+ * Tracing is masked during this call; otherwise, a call to printf()
+ * can result in a call to malloc() which eventually reenters
+ * mig_get_reply_port() and deadlocks.
+ */
+mach_port_t
+mig_get_reply_port(void)
+{
+	register mach_port_t port = _mig_get_reply_port();
+	if (port == MACH_PORT_NULL) {
+		port = mach_reply_port();
+		_mig_set_reply_port(port);
+	}
+	return port;
+}
+
+/*
+ * Called by mig interface code after a timeout on the reply port.
+ * May also be called by user. The new mig calls with port passed in.
+ */
+void
+mig_dealloc_reply_port(mach_port_t migport)
+{
+	register mach_port_t port;
+	
+	port = _mig_get_reply_port();
+	if (port != MACH_PORT_NULL && port != _task_reply_port) {
+		_mig_set_reply_port(_task_reply_port);
+		(void) mach_port_mod_refs(mach_task_self(), port, MACH_PORT_RIGHT_RECEIVE, -1);
+		if (migport != port) {
+			(void) mach_port_deallocate(mach_task_self(), migport);
+		}
+		_mig_set_reply_port(MACH_PORT_NULL);
+	}
+}
+
+/*************************************************************
+ *  Called by mig interfaces after each RPC.
+ *  Could be called by user.
+ ***********************************************************/
+
+void
+mig_put_reply_port(mach_port_t reply_port)
+{
+}
diff --git a/osfmk/mach/ppc/rpc.h b/libsyscall/mach/mig_reply_port.h
similarity index 85%
rename from osfmk/mach/ppc/rpc.h
rename to libsyscall/mach/mig_reply_port.h
index b3a274a2d..54e27879c 100644
--- a/osfmk/mach/ppc/rpc.h
+++ b/libsyscall/mach/mig_reply_port.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,11 +25,10 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- */
 
-#ifndef	_MACH_PPC_RPC_H_
-#define	_MACH_PPC_RPC_H_
+#include <mach/mach.h>
+
+void _mig_fork_child(void);
+void _mig_init(int init_done);
 
-#endif	/* _MACH_PPC_RPC_H_ */
+void _mig_reply_port_callbacks(mach_port_t (*get)(void), void (*set)(mach_port_t));
diff --git a/libsyscall/mach/mig_strncpy.c b/libsyscall/mach/mig_strncpy.c
index 4366563fa..ed17aaff2 100644
--- a/libsyscall/mach/mig_strncpy.c
+++ b/libsyscall/mach/mig_strncpy.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,18 +73,21 @@
 
 int
 mig_strncpy(
-    register char *dest,
-    register const char *src,
-    register int len)
+    char *dest,
+    const char *src,
+    int len)
 {
-    register int i;
+    int i;
 
-    if (len <= 0)
-	return 0;
+    if (len <= 0) {
+		return 0;
+	}
 
-    for (i=1; i<len; i++)
-	if (! (*dest++ = *src++))
-	    return i;
+    for (i = 1; i < len; i++) {
+		if (!(*dest++ = *src++)) {
+			return i;
+		}
+	}
 
     *dest = '\0';
     return i;
diff --git a/libsyscall/mach/ms_thread_switch.c b/libsyscall/mach/ms_thread_switch.c
index e7769c11a..2d1f16fc8 100644
--- a/libsyscall/mach/ms_thread_switch.c
+++ b/libsyscall/mach/ms_thread_switch.c
@@ -65,12 +65,12 @@ extern kern_return_t syscall_thread_switch(mach_port_name_t, int, mach_msg_timeo
 
 kern_return_t
 thread_switch(
-	mach_port_t		thread,
-	int			option,
-	mach_msg_timeout_t	option_time)
+	mach_port_t thread,
+	int option,
+	mach_msg_timeout_t option_time)
 {
 	kern_return_t result;
 	
-		result = syscall_thread_switch(thread, option, option_time);
-	return (result);
+	result = syscall_thread_switch(thread, option, option_time);
+	return result;
 }
diff --git a/libsyscall/mach/panic.c b/libsyscall/mach/panic.c
index d0a658643..3992225f1 100644
--- a/libsyscall/mach/panic.c
+++ b/libsyscall/mach/panic.c
@@ -57,8 +57,11 @@
 
 #include <mach/mach.h>
 #include <mach/mach_host.h>
-#include <stdarg.h>
-#include <stdio.h>
+
+#include "abort.h"
+#include "string.h"
+
+int write(int fd, const char* cbuf, int nbyte);
 
 static mach_port_t master_host_port;
 
@@ -72,14 +75,10 @@ panic_init(mach_port_t port)
 void
 panic(const char *s, ...)
 {
-	va_list listp;
-
-	printf("panic: ");
-	va_start(listp, s);
-	vprintf(s, listp);
-	va_end(listp);
-	printf("\n");
-
+	char buffer[1024];
+	int len = _mach_snprintf(buffer, sizeof(buffer), "panic: %s\n", s);
+	write(__STDERR_FILENO, buffer, len+1);
+	
 #define RB_DEBUGGER	0x1000	/* enter debugger NOW */
 	(void) host_reboot(master_host_port, RB_DEBUGGER);
 
diff --git a/libsyscall/mach/port_obj.c b/libsyscall/mach/port_obj.c
index 1951d1ce5..b23054a15 100644
--- a/libsyscall/mach/port_obj.c
+++ b/libsyscall/mach/port_obj.c
@@ -47,7 +47,7 @@ void port_obj_init(
 {
 	kern_return_t kr;
 
-	kr = vm_allocate(mach_task_self(),
+	kr = vm_allocate(mach_task_self_,
 		(vm_offset_t *)&port_obj_table,
 		(vm_size_t)(maxsize * sizeof (*port_obj_table)),
 		TRUE);
diff --git a/libsyscall/mach/ppc/Makefile.inc b/libsyscall/mach/ppc/Makefile.inc
deleted file mode 100644
index faa3b19e8..000000000
--- a/libsyscall/mach/ppc/Makefile.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-.PATH: ${.CURDIR}/mach/ppc
-
-MDSRCS += mach_absolute_time.s
diff --git a/libsyscall/mach/ppc64/Makefile.inc b/libsyscall/mach/ppc64/Makefile.inc
deleted file mode 100644
index 302f57141..000000000
--- a/libsyscall/mach/ppc64/Makefile.inc
+++ /dev/null
@@ -1,4 +0,0 @@
-# searching ppc directory as a fallback to avoid unnecessary code duplication
-.PATH: ${.CURDIR}/mach/ppc
-
-MDSRCS += mach_absolute_time.s
diff --git a/libsyscall/mach/sbrk.c b/libsyscall/mach/sbrk.c
deleted file mode 100644
index 702534a1b..000000000
--- a/libsyscall/mach/sbrk.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *	File:	sbrk.c
- *
- *	Unix compatibility for sbrk system call.
- *
- * HISTORY
- * 09-Mar-90  Gregg Kellogg (gk) at NeXT.
- *	include <kern/mach_interface.h> instead of <kern/mach.h>
- *
- * 14-Feb-89  Avadis Tevanian (avie) at NeXT.
- *	Total rewrite using a fixed area of VM from break region.
- */
-
-#include <mach/mach.h>		/* for vm_allocate, vm_offset_t */
-#include <mach/vm_statistics.h>
-
-static int sbrk_needs_init = TRUE;
-static vm_size_t sbrk_region_size = 4*1024*1024; /* Well, what should it be? */
-static vm_address_t sbrk_curbrk;
-
-void *sbrk(size)
-	int	size;
-{
-	kern_return_t	ret;
-
-	if (sbrk_needs_init) {
-		sbrk_needs_init = FALSE;
-		/*
-		 *	Allocate a big region to simulate break region.
-		 */
-		ret =  vm_allocate(mach_task_self(), &sbrk_curbrk, sbrk_region_size,
-				  VM_MAKE_TAG(VM_MEMORY_SBRK)|TRUE);
-		if (ret != KERN_SUCCESS)
-			return((void *)-1);
-	}
-	
-	if (size <= 0)
-		return((void *)sbrk_curbrk);
-	else if (size > sbrk_region_size)
-		return((void *)-1);
-	sbrk_curbrk += size;
-	sbrk_region_size -= size;
-	return((void *)(sbrk_curbrk - size));
-}
-
-void *brk(x)
-	void *x;
-{
-	return((void *)-1);
-}
-
diff --git a/libsyscall/mach/servers/Makefile.inc b/libsyscall/mach/servers/Makefile.inc
deleted file mode 100644
index 848379a88..000000000
--- a/libsyscall/mach/servers/Makefile.inc
+++ /dev/null
@@ -1,16 +0,0 @@
-.PATH: ${.CURDIR}/${MACHINE_ARCH}/mach/servers ${.CURDIR}/mach/servers
-
-SRVMIGDEFS += netname.defs
-
-SRVMIGHDRS = ${SRVMIGDEFS:S/.defs$/.h/}
-#SRVMIGHDRS = ${SRVMIGDEFS:S/.defs$/.h/:S/^/${.CURDIR}\/mach\/servers\//}
-SRVMIGSRCS = ${SRVMIGDEFS:S/.defs$/User.c/}
-
-SRVHDRS = netname_defs.h key_defs.h nm_defs.h ls_defs.h 
-SRVHDRS := ${SRVHDRS:S/^/${.CURDIR}\/mach\/servers\//}
-SRVHDRS += ${SRVMIGHDRS}
-
-MISRCS+= ${SRVMIGDEFS:S/.defs$/User.defs/}
-
-CLEANFILES += ${SRVMIGHDRS} ${SRVMIGHDRS:S/.h$/User.c/} \
-		${SRVMIGHDRS:S/.h$/Server.c/}
diff --git a/libsyscall/mach/slot_name.c b/libsyscall/mach/slot_name.c
index a059c1c59..fa733527c 100644
--- a/libsyscall/mach/slot_name.c
+++ b/libsyscall/mach/slot_name.c
@@ -46,26 +46,6 @@
 #include <mach-o/arch.h>
 #include <stddef.h>
 
-/*
- *	Convert the specified cpu_type/cpu_subtype pair to their
- *	human readable form.
- */
-void slot_name(cpu_type, cpu_subtype, cpu_name, cpu_subname)
-        cpu_type_t	cpu_type;
-        cpu_subtype_t	cpu_subtype;
-        char		**cpu_name, **cpu_subname;
-{
-        register char	*name = "Unknown CPU";
-        register char	*subname = "";
-        const NXArchInfo *ai = NXGetArchInfoFromCpuType(cpu_type, cpu_subtype);
-        if (ai != NULL) {
-            name = (char *)ai->name;
-            subname = (char *)ai->description;
-        }
-        *cpu_name = name;
-        *cpu_subname = subname;
-}
-
 kern_return_t msg_rpc(void) {
 	return KERN_FAILURE;
 }
diff --git a/libsyscall/mach/string.c b/libsyscall/mach/string.c
new file mode 100644
index 000000000..000a0f88f
--- /dev/null
+++ b/libsyscall/mach/string.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include "string.h"
+
+static const char hex[] = "0123456789abcdef";
+
+static int
+_mach_strlen(const char *str)
+{
+	const char *p;
+	for (p = str; p; p++) {
+		if (*p == '\0') {
+			return (p - str);
+		}
+	}
+	/* NOTREACHED */
+	return 0;
+}
+
+static void
+_mach_hex(char **buffer, int *length, unsigned long long n)
+{
+	char buf[32];
+	char *cp = buf + sizeof(buf);
+	
+	if (n) {
+		*--cp = '\0';
+		while (n) {
+			*--cp = hex[n & 0xf];
+			n >>= 4;
+		}
+		
+		int width = _mach_strlen(cp);
+		while (width > 0 && length > 0) {
+			*(*buffer)++ = *cp++;
+			(*length)--;
+			width--;
+		}
+	}
+}
+
+int
+_mach_vsnprintf(char *buffer, int length, const char *fmt, va_list ap)
+{
+	int width, max = length;
+	char *out_ptr = buffer;
+	
+	// we only ever write n-1 bytes so we can put a \0 at the end
+	length--;
+	while (length > 0 && *fmt) {
+		if (*fmt == '\0') {
+			break;
+		}
+		if (*fmt != '%') {
+			*(out_ptr++) = *(fmt++);
+			length--;
+			continue;
+		}
+		fmt++;
+		// only going to support a specific subset of sprintf flags
+		// namely %s, %x, with no padding modifiers
+		switch (*fmt++) {
+			case 's':
+			{
+				char *cp = va_arg(ap, char*);
+				width = _mach_strlen(cp);
+				while (width > 0 && length > 0) {
+					*(out_ptr++) = *(cp++);
+					width--;
+					length--;
+				}
+				break;
+			}
+			case 'x':
+			{
+				_mach_hex(&out_ptr, &length, va_arg(ap, unsigned int));
+				break;
+			}
+		}
+	}
+	*out_ptr = '\0';
+	return max - length;
+}
+
+int
+_mach_snprintf(char *buffer, int length, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+	va_start(ap, fmt);
+	ret = _mach_vsnprintf(buffer, length, fmt, ap);
+	va_end(ap);
+	return ret;
+}
diff --git a/osfmk/mach/ppc/thread_state.h b/libsyscall/mach/string.h
similarity index 59%
rename from osfmk/mach/ppc/thread_state.h
rename to libsyscall/mach/string.h
index 3ab7baa1f..9b20980cf 100644
--- a/osfmk/mach/ppc/thread_state.h
+++ b/libsyscall/mach/string.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,18 +25,37 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- */
 
-#ifndef _MACH_PPC_THREAD_STATE_H_
-#define _MACH_PPC_THREAD_STATE_H_
+#ifndef _STRING_H_
+#define _STRING_H_
 
-/* Size of maximum exported thread state in words */
-#define PPC_THREAD_STATE_MAX	(144)    /* Size of biggest state possible */
+#include <stdarg.h>
+#include <_types.h>
 
-#if defined (__ppc__) || defined (__ppc64__)
-#define THREAD_STATE_MAX	PPC_THREAD_STATE_MAX
+#ifndef SIZE_T
+#define SIZE_T
+typedef __darwin_size_t size_t;
 #endif
 
-#endif	/* _MACH_PPC_THREAD_STATE_H_ */
+#ifndef NULL
+#define NULL __DARWIN_NULL
+#endif
+
+#ifndef _UINTPTR_T
+#define _UINTPTR_T
+typedef unsigned long   uintptr_t;
+#endif /* _UINTPTR_T */
+
+// We're purposefully called "string.h" in order to superceed any use
+// of Libc's string.h (which no one should be using bar MIG) in order
+// to override their use of memcpy.
+
+int _mach_snprintf(char *buffer, int length, const char *fmt, ...);
+int _mach_vsnprintf(char *buffer, int length, const char *fmt, va_list ap);
+
+// Actually in memcpy.c but MIG likes to include string.h
+
+void *memcpy(void *dst0, const void *src0, size_t length);
+int memcmp(const void *s1, const void *s2, size_t n);
+
+#endif /* _STRING_H_ */
diff --git a/libsyscall/mach/x86_64/Makefile.inc b/libsyscall/mach/x86_64/Makefile.inc
deleted file mode 100644
index 475e5a5b8..000000000
--- a/libsyscall/mach/x86_64/Makefile.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-.PATH: ${.CURDIR}/mach/x86_64
-
-MDSRCS += mach_absolute_time.S
diff --git a/libsyscall/wrappers/__get_cpu_capabilities.s b/libsyscall/wrappers/__get_cpu_capabilities.s
new file mode 100644
index 000000000..12e9c7652
--- /dev/null
+++ b/libsyscall/wrappers/__get_cpu_capabilities.s
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/* Get the cpu_capabilities bit vector out of the comm page */
+
+#define	__APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef	__APPLE_API_PRIVATE
+
+#if defined(__x86_64__)
+
+	.text
+	.align 2, 0x90
+	.globl __get_cpu_capabilities
+__get_cpu_capabilities:
+	movq	$(_COMM_PAGE_CPU_CAPABILITIES), %rax
+	movl	(%rax), %eax
+	ret
+
+#elif defined(__i386__)
+
+	.text
+	.align 2, 0x90
+	.globl __get_cpu_capabilities
+__get_cpu_capabilities:
+	movl	_COMM_PAGE_CPU_CAPABILITIES, %eax
+	ret
+
+#endif
diff --git a/osfmk/x86_64/genassym.c b/libsyscall/wrappers/_errno.h
similarity index 88%
rename from osfmk/x86_64/genassym.c
rename to libsyscall/wrappers/_errno.h
index 2fc719cff..0c3c2da96 100644
--- a/osfmk/x86_64/genassym.c
+++ b/libsyscall/wrappers/_errno.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -26,4 +26,8 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-#include "../i386/genassym.c"
+#include <sys/errno.h>
+
+extern int* (*_libc_get_errno)(void);
+#undef errno
+#define errno (*_libc_get_errno())
diff --git a/osfmk/ppc/testjump.c b/libsyscall/wrappers/_libc_funcptr.c
similarity index 55%
rename from osfmk/ppc/testjump.c
rename to libsyscall/wrappers/_libc_funcptr.c
index be2ae5afa..60fd52142 100644
--- a/osfmk/ppc/testjump.c
+++ b/libsyscall/wrappers/_libc_funcptr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,56 +25,48 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
 
-#define VERBOSE 0
+#include <mach/mach.h>
+#include <mach/mach_init.h>
+
+extern void (*_libc_set_errno)(int);
 
-#include <ppc/setjmp.h>
+static mach_port_t (*_libc_get_reply_port)(void);
+static void (*_libc_set_reply_port)(mach_port_t);
 
-int recursed(jmp_buf_t *bufp, int retval, int depth)
+/* 
+ * Called at Libsystem initialise time, sets up callbacks we
+ * need to get at thread variables inside of Libc
+ */
+void
+_mig_reply_port_callbacks(mach_port_t (*get)(void), void (*set)(mach_port_t))
 {
-  int mumbojumbo[16];
-  int i;
- 
-#if VERBOSE
-  for (i=0;i<depth;i++)
-    printf(" ");
-  printf("in recursed(0x%x,%d,%d)\n",bufp,retval,depth);
-#endif
-  if (depth == 0) {
-#if VERBOSE
-    printf("LONGJUMPING from depth %d to buffer at 0x%x!\n",retval, bufp);
-#endif
-    _longjmp(bufp, retval);
-    printf("SHOULDN'T GET HERE\n");
-  } else {
-    recursed(bufp,retval,depth-1);
-  }
-  return mumbojumbo[15]=-1; /* make sure we generate our own frame */
+	_libc_get_reply_port = get;
+	_libc_set_reply_port = set;
 }
 
-int testjump()
+mach_port_t _mig_get_reply_port(void) __attribute__((visibility("hidden")));
+mach_port_t
+_mig_get_reply_port()
 {
-  jmp_buf_t  buf;
-  int val;
-  int i;
-
-  printf("calling setjmp\n");
-
-  val = _setjmp(&buf);
-#if VERBOSE
-  for (i=0; i<64; i++) {
-    if ((i % 8) == 0) printf("\n%2d :",i);
-    printf(" %8x",buf.jmp_buf[i]);
-  }
-#endif
-  printf("\nsetjmp returned %d, structure at 0x%x\n",val,&buf);
+	return _libc_get_reply_port();
+}
 
-  if (val < 5)
-    recursed(&buf,val+1,val+1);
+void _mig_set_reply_port(mach_port_t port) __attribute__((visibility("hidden")));
+void
+_mig_set_reply_port(mach_port_t port)
+{
+	_libc_set_reply_port(port);
+}
 
-  printf("returning from setjmp/longjmp test\n");
+void cthread_set_errno_self(int errno) __attribute__((visibility("hidden")));
+void
+cthread_set_errno_self(int errno)
+{
+	_libc_set_errno(errno);
 }
 
+
+void _pthread_set_self(void* ptr) __attribute__((visibility("hidden")));
+void
+_pthread_set_self(void* ptr) {}
diff --git a/bsd/hfs/cprotect.c b/libsyscall/wrappers/_libkernel_init.c
similarity index 67%
rename from bsd/hfs/cprotect.c
rename to libsyscall/wrappers/_libkernel_init.c
index 3935ac267..274dbb917 100644
--- a/bsd/hfs/cprotect.c
+++ b/libsyscall/wrappers/_libkernel_init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,26 +25,26 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-#include <sys/mount.h>
-#include <sys/vnode_if.h>
-#include <sys/vnode_internal.h>
 
-#include <sys/cprotect.h>
-#include <sys/random.h>
-#include <sys/xattr.h>
-#include <sys/uio_internal.h>
+#include "_libkernel_init.h"
+#include "mig_reply_port.h"
 
-#include "hfs.h"
-#include "hfs_cnode.h"
+void (*_libc_set_errno)(int) __attribute__((visibility("hidden")));
+int* (*_libc_get_errno)(void) __attribute__((visibility("hidden")));
 
-int cp_key_store_action(int action __unused)
-{
-	return ENOTSUP;
-}
+/* dlsym() funcptr is for legacy support in exc_catcher */
+void* (*_dlsym)(void*, const char*) __attribute__((visibility("hidden")));
 
-
-int cp_register_wraps(cp_wrap_func_t key_store_func __unused)
+void
+_libkernel_init(_libkernel_functions_t fns)
 {
-	return ENOTSUP;
+	/* libc */
+	_libc_set_errno = fns.set_errno;
+	_libc_get_errno = fns.get_errno;
+	
+	/* mach */
+	_mig_reply_port_callbacks(fns.get_reply_port, fns.set_reply_port);
+	
+	/* dlsym */
+	_dlsym = fns.dlsym;
 }
-
diff --git a/bsd/ppc/disklabel.h b/libsyscall/wrappers/_libkernel_init.h
similarity index 65%
rename from bsd/ppc/disklabel.h
rename to libsyscall/wrappers/_libkernel_init.h
index f7cfe1155..609975abd 100644
--- a/bsd/ppc/disklabel.h
+++ b/libsyscall/wrappers/_libkernel_init.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,24 +25,30 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 
-#ifndef _BSD_PPC_DISKLABEL_H_
-#define _BSD_PPC_DISKLABEL_H_
+#ifndef __LIBKERNEL_INIT_H
+#define __LIBKERNEL_INIT_H
 
-#include <sys/appleapiopts.h>
+#include <mach/mach.h>
 
-#ifdef __APPLE_API_OBSOLETE
-#define	LABELSECTOR	(1024 / DEV_BSIZE)	/* sector containing label */
-#define	LABELOFFSET	0			/* offset of label in sector */
-#define	MAXPARTITIONS	8			/* number of partitions */
-#define	RAW_PART	2			/* raw partition: xx?c */
+typedef struct _libkernel_functions {
+	/* for mach dependencies on libc */
+	mach_port_t (*get_reply_port)(void);
+	void (*set_reply_port)(mach_port_t);
+	
+	/* dlsym() for looking up catch_exception_raise */
+	void* (*dlsym)(void*, const char*);
 
-/* Just a dummy */
-struct cpu_disklabel {
-	int	cd_dummy;			/* must have one element. */
-};
+	/* placeholders for struct layout compatibility with Libsystem */
+	void *_placeholder_1;
+	void *_placeholder_2;
+	
+	/* for setting errno in libc */
+	void (*set_errno)(int);
+	int* (*get_errno)(void);
 
-#endif /* __APPLE_API_OBSOLETE */
+} _libkernel_functions_t;
 
-#endif /* _BSD_PPC_DISKLABEL_H_ */
+void _libkernel_init(_libkernel_functions_t fns);
+
+#endif // __LIBKERNEL_INIT_H`
diff --git a/libsyscall/wrappers/cancelable/fcntl-base.c b/libsyscall/wrappers/cancelable/fcntl-base.c
new file mode 100644
index 000000000..2f48a42c1
--- /dev/null
+++ b/libsyscall/wrappers/cancelable/fcntl-base.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <stdarg.h>
+
+int __FCNTL(int, int, void *);
+
+/*
+ * Stub function to account for the differences in the size of the third
+ * argument when int and void * are different sizes. Also add pthread
+ * cancelability.
+ *
+ * This is for LP64 only.
+ */
+int
+fcntl(int fd, int cmd, ...)
+{
+	va_list	ap;
+	void *arg;
+
+	va_start(ap, cmd);
+	switch(cmd) {
+	 case F_GETLK:
+	 case F_SETLK:
+	 case F_SETLKW:
+	 case F_PREALLOCATE:
+	 case F_SETSIZE:
+	 case F_RDADVISE:
+	 case F_READBOOTSTRAP:
+	 case F_WRITEBOOTSTRAP:
+	 case F_LOG2PHYS:
+	 case F_GETPATH:
+	 case F_GETPATH_MTMINFO:
+	 case F_PATHPKG_CHECK:
+	 case F_OPENFROM:
+	 case F_UNLINKFROM:
+	 case F_ADDSIGS:
+		arg = va_arg(ap, void *);
+		break;
+	 default:
+		arg = (void *)((unsigned long)va_arg(ap, int));
+		break;
+	}
+	va_end(ap);
+	return (__FCNTL(fd, cmd, arg));
+}
diff --git a/libsyscall/mach/x86_64/mach_absolute_time.S b/libsyscall/wrappers/cancelable/fcntl-cancel.c
similarity index 81%
rename from libsyscall/mach/x86_64/mach_absolute_time.S
rename to libsyscall/wrappers/cancelable/fcntl-cancel.c
index 7c53025b6..e5db000a6 100644
--- a/libsyscall/mach/x86_64/mach_absolute_time.S
+++ b/libsyscall/wrappers/cancelable/fcntl-cancel.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -21,12 +21,11 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
-#include <machine/cpu_capabilities.h>
+#if defined(__LP64__) || defined(__arm__)
 
+#include <fcntl.h>
+#define __FCNTL	__fcntl
 
-	.text
-	.align	2
-	.globl	_mach_absolute_time
-_mach_absolute_time:
-	movq	$(_COMM_PAGE_NANOTIME), %rax
-	jmp	*%rax
+#include "fcntl-base.c"
+
+#endif
diff --git a/libsyscall/wrappers/cancelable/fcntl.c b/libsyscall/wrappers/cancelable/fcntl.c
new file mode 100644
index 000000000..f31bff7ef
--- /dev/null
+++ b/libsyscall/wrappers/cancelable/fcntl.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#if defined(__LP64__) || defined(__arm__)
+
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 1
+
+#include <fcntl.h>
+#define __FCNTL	__fcntl_nocancel
+
+#include "fcntl-base.c"
+
+#endif
diff --git a/libsyscall/wrappers/cancelable/select-cancel.c b/libsyscall/wrappers/cancelable/select-cancel.c
new file mode 100644
index 000000000..dba3fc291
--- /dev/null
+++ b/libsyscall/wrappers/cancelable/select-cancel.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#define VARIANT_CANCELABLE
+
+#include "../select-base.c"
diff --git a/libsyscall/wrappers/cancelable/select.c b/libsyscall/wrappers/cancelable/select.c
new file mode 100644
index 000000000..af06d655f
--- /dev/null
+++ b/libsyscall/wrappers/cancelable/select.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 1
+
+#include "../select-base.c"
diff --git a/libsyscall/wrappers/cancelable/sigsuspend-cancel.c b/libsyscall/wrappers/cancelable/sigsuspend-cancel.c
new file mode 100644
index 000000000..a7e7a320d
--- /dev/null
+++ b/libsyscall/wrappers/cancelable/sigsuspend-cancel.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#define __SIGSUSPEND	__sigsuspend
+
+#include "../sigsuspend-base.c"
diff --git a/libsyscall/mach/i386/mach_absolute_time.S b/libsyscall/wrappers/cancelable/sigsuspend.c
similarity index 81%
rename from libsyscall/mach/i386/mach_absolute_time.S
rename to libsyscall/wrappers/cancelable/sigsuspend.c
index 71e746235..2b1e2d877 100644
--- a/libsyscall/mach/i386/mach_absolute_time.S
+++ b/libsyscall/wrappers/cancelable/sigsuspend.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -21,12 +21,9 @@
  * @APPLE_LICENSE_HEADER_END@
  */
 
-#include <machine/cpu_capabilities.h>
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 1
 
+#define __SIGSUSPEND	__sigsuspend_nocancel
 
-	.text
-	.align	2
-	.globl	_mach_absolute_time
-_mach_absolute_time:
-	movl	$(_COMM_PAGE_NANOTIME), %eax
-	jmpl	*%eax
+#include "../sigsuspend-base.c"
diff --git a/bsd/dev/ppc/sysctl.c b/libsyscall/wrappers/init_cpu_capabilities.c
similarity index 61%
rename from bsd/dev/ppc/sysctl.c
rename to libsyscall/wrappers/init_cpu_capabilities.c
index 7bc509e16..7eecac6bf 100644
--- a/bsd/dev/ppc/sysctl.c
+++ b/libsyscall/wrappers/init_cpu_capabilities.c
@@ -1,19 +1,14 @@
 /*
  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -23,6 +18,24 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ * @APPLE_LICENSE_HEADER_END@
  */
 
+#define	__APPLE_API_PRIVATE
+#include <machine/cpu_capabilities.h>
+#undef	__APPLE_API_PRIVATE
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/* Initialize the "_cpu_capabilities" vector on x86 processors. */
+
+int _cpu_has_altivec = 0;     // DEPRECATED
+int _cpu_capabilities = 0;
+
+void
+_init_cpu_capabilities( void )
+{
+	_cpu_capabilities = _get_cpu_capabilities();
+}
+
+#endif
diff --git a/libsyscall/wrappers/ioctl.c b/libsyscall/wrappers/ioctl.c
new file mode 100644
index 000000000..eced7e7e1
--- /dev/null
+++ b/libsyscall/wrappers/ioctl.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#if defined(__LP64__) || defined(__arm__)
+
+#include <sys/ioctl.h>
+#include <stdarg.h>
+
+int __ioctl(int, unsigned long, void *);
+/*
+ * Stub function to account for the third argument being void *
+ *
+ * This is for LP64 only.
+ */
+int
+ioctl(int d, unsigned long request, ...)
+{
+	va_list	ap;
+	void *arg;
+
+	va_start(ap, request);
+	arg = va_arg(ap, void *);
+	va_end(ap);
+	return (__ioctl(d, request, arg));
+}
+
+#endif
diff --git a/libsyscall/wrappers/kill.c b/libsyscall/wrappers/kill.c
new file mode 100644
index 000000000..74e3ca2fe
--- /dev/null
+++ b/libsyscall/wrappers/kill.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <signal.h>
+
+extern int __kill(pid_t pid, int sig, int posix);
+
+/*
+ * kill stub, which wraps a modified kill system call that takes a posix
+ * behaviour indicator as the third parameter to indicate whether or not
+ * conformance to standards is needed.  We use a trailing parameter in
+ * case the call is called directly via syscall(), since for most uses,
+ * it won't matter to the caller.
+ */
+int
+kill(pid_t pid, int sig)
+{
+#if __DARWIN_UNIX03
+	return(__kill(pid, sig, 1));
+#else	/* !__DARWIN_UNIX03 */
+	return(__kill(pid, sig, 0));
+#endif	/* !__DARWIN_UNIX03 */
+}
diff --git a/libsyscall/wrappers/legacy/accept.c b/libsyscall/wrappers/legacy/accept.c
new file mode 100644
index 000000000..216b76685
--- /dev/null
+++ b/libsyscall/wrappers/legacy/accept.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "_errno.h"
+
+int __accept_nocancel(int, struct sockaddr *, socklen_t *);
+
+/*
+ * accept stub, legacy version
+ */
+int
+accept(int s, struct sockaddr *addr, socklen_t *addrlen)
+{
+	int ret = __accept_nocancel(s, addr, addrlen);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
diff --git a/libsyscall/wrappers/legacy/bind.c b/libsyscall/wrappers/legacy/bind.c
new file mode 100644
index 000000000..f30281d61
--- /dev/null
+++ b/libsyscall/wrappers/legacy/bind.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "_errno.h"
+
+extern int __bind(int, const struct sockaddr *, socklen_t);
+
+/*
+ * bind stub, legacy version
+ */
+int
+bind(int s, const struct sockaddr *name, socklen_t namelen)
+{
+	int ret = __bind(s, name, namelen);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
diff --git a/libsyscall/wrappers/legacy/connect.c b/libsyscall/wrappers/legacy/connect.c
new file mode 100644
index 000000000..39910566b
--- /dev/null
+++ b/libsyscall/wrappers/legacy/connect.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "_errno.h"
+
+int __connect_nocancel(int, const struct sockaddr *, socklen_t);
+
+/*
+ * connect stub, legacy version
+ */
+int
+connect(int s, const struct sockaddr *name, socklen_t namelen)
+{
+	int ret = __connect_nocancel(s, name, namelen);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/getattrlist.c b/libsyscall/wrappers/legacy/getattrlist.c
new file mode 100644
index 000000000..a0444a3da
--- /dev/null
+++ b/libsyscall/wrappers/legacy/getattrlist.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/attr.h>
+#include "_errno.h"
+
+#ifdef NO_SYSCALL_LEGACY
+extern int __getattrlist(const char *, void *, void *, size_t, unsigned int);
+#else /* !__LP64__ */
+extern int __getattrlist(const char *, void *, void *, size_t, unsigned long);
+#endif /* __LP64__ */
+
+/*
+ * getattrlist stub, legacy version
+ */
+int
+#ifdef __LP64__
+getattrlist(const char *path, void *attrList, void *attrBuf,
+	size_t attrBufSize, unsigned int options)
+#else /* !__LP64__ */
+getattrlist(const char *path, void *attrList, void *attrBuf,
+	size_t attrBufSize, unsigned long options)
+#endif /* __LP64__ */
+{
+	int ret = __getattrlist(path, attrList, attrBuf, attrBufSize, options);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/getpeername.c b/libsyscall/wrappers/legacy/getpeername.c
new file mode 100644
index 000000000..a5619ece0
--- /dev/null
+++ b/libsyscall/wrappers/legacy/getpeername.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/socket.h>
+#include "_errno.h"
+
+extern int __getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
+
+/*
+ * getpeername stub, legacy version
+ */
+int
+getpeername(int socket, struct sockaddr * __restrict address,
+	socklen_t * __restrict address_len)
+{
+	int ret = __getpeername(socket, address, address_len);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
diff --git a/libsyscall/wrappers/legacy/getsockname.c b/libsyscall/wrappers/legacy/getsockname.c
new file mode 100644
index 000000000..9a2a94cd5
--- /dev/null
+++ b/libsyscall/wrappers/legacy/getsockname.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/socket.h>
+#include "_errno.h"
+
+extern int __getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
+
+/*
+ * getsockname stub, legacy version
+ */
+int
+getsockname(int socket, struct sockaddr * __restrict address,
+	socklen_t * __restrict address_len)
+{
+	int ret = __getsockname(socket, address, address_len);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
diff --git a/libsyscall/wrappers/legacy/kill.c b/libsyscall/wrappers/legacy/kill.c
new file mode 100644
index 000000000..1f25079e9
--- /dev/null
+++ b/libsyscall/wrappers/legacy/kill.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+
+#include "../kill.c"
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/lchown.c b/libsyscall/wrappers/legacy/lchown.c
new file mode 100644
index 000000000..05279fe4c
--- /dev/null
+++ b/libsyscall/wrappers/legacy/lchown.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include "_errno.h"
+
+int __lchown(const char *, uid_t, gid_t);
+
+/*
+ * lchown stub, legacy version
+ */
+int
+lchown(const char *path, uid_t owner, gid_t group)
+{
+	int ret = __lchown(path, owner, group);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/osfmk/ppc/xpr.h b/libsyscall/wrappers/legacy/listen.c
similarity index 50%
rename from osfmk/ppc/xpr.h
rename to libsyscall/wrappers/legacy/listen.c
index c81865886..0e21db52b 100644
--- a/osfmk/ppc/xpr.h
+++ b/libsyscall/wrappers/legacy/listen.c
@@ -1,19 +1,14 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
  *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * @APPLE_LICENSE_HEADER_START@
  * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
  * 
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
@@ -23,14 +18,38 @@
  * Please see the License for the specific language governing rights and
  * limitations under the License.
  * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ * @APPLE_LICENSE_HEADER_END@
  */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
 /*
- * @OSF_COPYRIGHT@
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
  */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/socket.h>
+#include "_errno.h"
+
+extern int __listen(int, int);
 
 /*
- *	Machine dependent module for the XPR tracing facility.
+ * listen stub, legacy version
  */
+int
+listen(int socket, int backlog)
+{
+	int ret = __listen(socket, backlog);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
 
-#define XPR_TIMESTAMP	(0)
+#endif
diff --git a/libsyscall/wrappers/legacy/mprotect.c b/libsyscall/wrappers/legacy/mprotect.c
new file mode 100644
index 000000000..666b8974d
--- /dev/null
+++ b/libsyscall/wrappers/legacy/mprotect.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+#include "_errno.h"
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <mach/vm_param.h>
+
+/*
+ * Stub function to account for the differences in standard compliance
+ * while maintaining binary backward compatibility.
+ *
+ * This is only the legacy behavior.
+ */
+extern int __mprotect(void *, size_t, int);
+
+int
+mprotect(void *addr, size_t len, int prot)
+{
+	void	*aligned_addr;
+	size_t	offset;
+	int	rv;
+
+	/*
+	 * Page-align "addr" since the system now requires it
+	 * for standards compliance.
+	 * Update "len" to reflect the alignment.
+	 */
+	offset = ((uintptr_t) addr) & PAGE_MASK;
+	aligned_addr = (void *) (((uintptr_t) addr) & ~PAGE_MASK);
+	len += offset;
+	rv = __mprotect(aligned_addr, len, prot);
+	if (rv == -1 && errno == ENOMEM) {
+		/*
+		 * Standards now require that we return ENOMEM if there was
+		 * a hole in the address range.  Panther and earlier used
+		 * to return an EINVAL error, so honor backwards compatibility.
+		 */
+		errno = EINVAL;
+	}
+	return rv;
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/msync.c b/libsyscall/wrappers/legacy/msync.c
new file mode 100644
index 000000000..7ba2a82c7
--- /dev/null
+++ b/libsyscall/wrappers/legacy/msync.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <mach/vm_param.h>
+
+int __msync_nocancel(void *, size_t, int);
+
+/*
+ * Stub function for legacy version
+ */
+int
+msync(void *addr, size_t len, int flags)
+{
+	size_t	offset;
+
+	/*
+	 * Page-align "addr" since the system now requires it
+	 * for standards compliance.
+	 * Update "len" to reflect the alignment.
+	 */
+	offset = ((uintptr_t) addr) & PAGE_MASK;
+	addr = (void *) (((uintptr_t) addr) & ~PAGE_MASK);
+	len += offset;
+	return __msync_nocancel(addr, len, flags);
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/munmap.c b/libsyscall/wrappers/legacy/munmap.c
new file mode 100644
index 000000000..24b5b5eaa
--- /dev/null
+++ b/libsyscall/wrappers/legacy/munmap.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <mach/vm_param.h>
+
+/*
+ * Stub function to account for the differences in standard compliance
+ * while maintaining binary backward compatibility.
+ *
+ * This is only the legacy behavior.
+ */
+extern int __munmap(void *, size_t);
+
+int
+munmap(void *addr, size_t len)
+{
+	size_t	offset;
+
+	if (len == 0) {
+		/*
+		 * Standard compliance now requires the system to return EINVAL
+		 * for munmap(addr, 0).  Return success now to maintain
+		 * backwards compatibility.
+		 */
+		return 0;
+	}
+	/*
+	 * Page-align "addr" since the system now requires it
+	 * for standards compliance.
+	 * Update "len" to reflect the adjustment and still cover the same area.
+	 */
+	offset = ((uintptr_t) addr) & PAGE_MASK;
+	addr = (void *) (((uintptr_t) addr) & ~PAGE_MASK);
+	len += offset;
+	return __munmap(addr, len);
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/open.c b/libsyscall/wrappers/legacy/open.c
new file mode 100644
index 000000000..c11f4e919
--- /dev/null
+++ b/libsyscall/wrappers/legacy/open.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2005, 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <stdarg.h>
+
+int __open_nocancel(const char *path, int flags, mode_t mode);
+
+/*
+ * open stub: The legacy interface never automatically associated a controlling
+ * tty, so we always pass O_NOCTTY.
+ */
+int
+open(const char *path, int flags, ...)
+{
+	mode_t mode = 0;
+
+	if(flags & O_CREAT) {
+		va_list ap;
+		va_start(ap, flags);
+		// compiler warns to pass int (not mode_t) to va_arg
+		mode = va_arg(ap, int);
+		va_end(ap);
+	}
+	return(__open_nocancel(path, flags | O_NOCTTY, mode));
+}
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/recvfrom.c b/libsyscall/wrappers/legacy/recvfrom.c
new file mode 100644
index 000000000..1f53ed8ba
--- /dev/null
+++ b/libsyscall/wrappers/legacy/recvfrom.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/socket.h>
+#include "_errno.h"
+
+ssize_t __recvfrom_nocancel(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
+
+/*
+ * recvfrom stub, legacy version
+ */
+ssize_t
+recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr * __restrict from, socklen_t * __restrict fromlen)
+{
+	int ret = __recvfrom_nocancel(s, buf, len, flags, from, fromlen);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/legacy/recvmsg.c b/libsyscall/wrappers/legacy/recvmsg.c
new file mode 100644
index 000000000..dea590555
--- /dev/null
+++ b/libsyscall/wrappers/legacy/recvmsg.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/socket.h>
+#include "_errno.h"
+
+ssize_t __recvmsg_nocancel(int, struct msghdr *, int);
+
+/*
+ * recvmsg stub, legacy version
+ */
+ssize_t
+recvmsg(int s, struct msghdr *msg, int flags)
+{
+	int ret = __recvmsg_nocancel(s, msg, flags);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/legacy/select-pre1050.c b/libsyscall/wrappers/legacy/select-pre1050.c
new file mode 100644
index 000000000..2b8bd8e17
--- /dev/null
+++ b/libsyscall/wrappers/legacy/select-pre1050.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#if defined(SYSCALL_PRE1050) && defined(__LP64__)
+
+#undef __DARWIN_VERS_1050
+#define __DARWIN_VERS_1050 0
+#define VARIANT_PRE1050 
+
+#include "../select-base.c"
+
+#endif
diff --git a/libsyscall/wrappers/legacy/select.c b/libsyscall/wrappers/legacy/select.c
new file mode 100644
index 000000000..2ababf8cc
--- /dev/null
+++ b/libsyscall/wrappers/legacy/select.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#define VARIANT_LEGACY
+
+#include "../select-base.c"
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/sendmsg.c b/libsyscall/wrappers/legacy/sendmsg.c
new file mode 100644
index 000000000..e337f2515
--- /dev/null
+++ b/libsyscall/wrappers/legacy/sendmsg.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "_errno.h"
+
+ssize_t __sendmsg_nocancel(int, const struct msghdr *, int);
+
+/*
+ * sendmsg stub, legacy version
+ */
+ssize_t
+sendmsg(int s, const struct msghdr *msg, int flags)
+{
+	int ret = __sendmsg_nocancel(s, msg, flags);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/legacy/sendto.c b/libsyscall/wrappers/legacy/sendto.c
new file mode 100644
index 000000000..095282119
--- /dev/null
+++ b/libsyscall/wrappers/legacy/sendto.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "_errno.h"
+
+ssize_t __sendto_nocancel(int, const void *, size_t, int, const struct sockaddr *, socklen_t);
+
+/*
+ * sendto stub, legacy version
+ */
+ssize_t
+sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to, socklen_t tolen)
+{
+	int ret = __sendto_nocancel(s, msg, len, flags, to, tolen);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/legacy/setattrlist.c b/libsyscall/wrappers/legacy/setattrlist.c
new file mode 100644
index 000000000..d9e5a5e37
--- /dev/null
+++ b/libsyscall/wrappers/legacy/setattrlist.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/attr.h>
+#include "_errno.h"
+
+#ifdef __LP64__
+extern int __setattrlist(const char *, void *, void *, size_t, unsigned int);
+#else /* !__LP64__ */
+extern int __setattrlist(const char *, void *, void *, size_t, unsigned long);
+#endif /* __LP64__ */
+
+/*
+ * setattrlist stub, legacy version
+ */
+int
+#ifdef __LP64__
+setattrlist(const char *path, void *attrList, void *attrBuf,
+	size_t attrBufSize, unsigned int options)
+#else /* !__LP64__ */
+setattrlist(const char *path, void *attrList, void *attrBuf,
+	size_t attrBufSize, unsigned long options)
+#endif /* __LP64__ */
+{
+	int ret = __setattrlist(path, attrList, attrBuf, attrBufSize, options);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/legacy/sigsuspend.c b/libsyscall/wrappers/legacy/sigsuspend.c
new file mode 100644
index 000000000..98ffc8c7f
--- /dev/null
+++ b/libsyscall/wrappers/legacy/sigsuspend.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#define __SIGSUSPEND	__sigsuspend_nocancel
+
+#include "../sigsuspend-base.c"
+
+#endif /* NO_SYSCALL_LEGACY */
diff --git a/libsyscall/wrappers/legacy/socketpair.c b/libsyscall/wrappers/legacy/socketpair.c
new file mode 100644
index 000000000..8249814e8
--- /dev/null
+++ b/libsyscall/wrappers/legacy/socketpair.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2006-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifndef NO_SYSCALL_LEGACY
+
+#define _NONSTD_SOURCE
+#include <sys/cdefs.h>
+
+/*
+ * We need conformance on so that EOPNOTSUPP=102.  But the routine symbol
+ * will still be the legacy (undecorated) one.
+ */
+#undef __DARWIN_UNIX03
+#define __DARWIN_UNIX03 1
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "_errno.h"
+
+extern int __socketpair(int, int, int, int [2]);
+
+/*
+ * socketpair stub, legacy version
+ */
+int
+socketpair(int domain, int type, int protocol, int socket_vector[2])
+{
+	int ret = __socketpair(domain, type, protocol, socket_vector);
+
+	/* use ENOTSUP for legacy behavior */
+	if (ret < 0 && errno == EOPNOTSUPP)
+		errno = ENOTSUP;
+	return ret;
+}
+
+#endif
diff --git a/libsyscall/wrappers/memcpy.c b/libsyscall/wrappers/memcpy.c
new file mode 100644
index 000000000..c9af35506
--- /dev/null
+++ b/libsyscall/wrappers/memcpy.c
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "string.h"
+
+/*
+ * sizeof(word) MUST BE A POWER OF TWO
+ * SO THAT wmask BELOW IS ALL ONES
+ */
+typedef	int word;		/* "word" used for optimal copy speed */
+
+#define	wsize	sizeof(word)
+#define	wmask	(wsize - 1)
+
+/*
+ * Copy a block of memory, handling overlap.
+ * This is the routine that actually implements
+ * (the portable versions of) bcopy, memcpy, and memmove.
+ */
+
+__private_extern__
+void * memcpy(void *dst0, const void *src0, size_t length)
+{
+	char *dst = dst0;
+	const char *src = src0;
+	size_t t;
+	
+	if (length == 0 || dst == src)		/* nothing to do */
+		goto done;
+	
+	/*
+	 * Macros: loop-t-times; and loop-t-times, t>0
+	 */
+#define	TLOOP(s) if (t) TLOOP1(s)
+#define	TLOOP1(s) do { s; } while (--t)
+	
+	if ((unsigned long)dst < (unsigned long)src) {
+		/*
+		 * Copy forward.
+		 */
+		t = (uintptr_t)src;	/* only need low bits */
+		if ((t | (uintptr_t)dst) & wmask) {
+			/*
+			 * Try to align operands.  This cannot be done
+			 * unless the low bits match.
+			 */
+			if ((t ^ (uintptr_t)dst) & wmask || length < wsize)
+				t = length;
+			else
+				t = wsize - (t & wmask);
+			length -= t;
+			TLOOP1(*dst++ = *src++);
+		}
+		/*
+		 * Copy whole words, then mop up any trailing bytes.
+		 */
+		t = length / wsize;
+		TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize);
+		t = length & wmask;
+		TLOOP(*dst++ = *src++);
+	} else {
+		/*
+		 * Copy backwards.  Otherwise essentially the same.
+		 * Alignment works as before, except that it takes
+		 * (t&wmask) bytes to align, not wsize-(t&wmask).
+		 */
+		src += length;
+		dst += length;
+		t = (uintptr_t)src;
+		if ((t | (uintptr_t)dst) & wmask) {
+			if ((t ^ (uintptr_t)dst) & wmask || length <= wsize)
+				t = length;
+			else
+				t &= wmask;
+			length -= t;
+			TLOOP1(*--dst = *--src);
+		}
+		t = length / wsize;
+		TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src);
+		t = length & wmask;
+		TLOOP(*--dst = *--src);
+	}
+done:
+	return (dst0);
+}
+
+__private_extern__ void *
+memmove(void *s1, const void *s2, size_t n)
+{
+	return memcpy(s1, s2, n);
+}
+
+__private_extern__ void
+bcopy(const void *s1, void *s2, size_t n)
+{
+	memcpy(s2, s1, n);
+}
+
+/*
+ * Compare memory regions.
+ */
+__private_extern__ int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+	if (n != 0) {
+		const unsigned char *p1 = s1, *p2 = s2;
+
+		do {
+			if (*p1++ != *p2++)
+				return (*--p1 - *--p2);
+		} while (--n != 0);
+	}
+	return (0);
+}
diff --git a/libsyscall/wrappers/remove-counter.c b/libsyscall/wrappers/remove-counter.c
new file mode 100644
index 000000000..d6a2846d8
--- /dev/null
+++ b/libsyscall/wrappers/remove-counter.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/types.h>
+
+#if defined(__ppc64__) || defined(__i386__) || defined(__x86_64__)
+static int64_t __remove_counter = 0;
+#else
+static int32_t __remove_counter = 0;
+#endif
+
+__uint64_t
+__get_remove_counter(void) {
+#if defined(__arm__) && !defined(_ARM_ARCH_6)
+	return __remove_counter;
+#else
+	return __sync_add_and_fetch(&__remove_counter, 0);
+#endif
+}
+
+void
+__inc_remove_counter(void)
+{
+#if defined(__arm__) && !defined(_ARM_ARCH_6)
+	__remove_counter++;
+#else
+	__sync_add_and_fetch(&__remove_counter, 1);
+#endif
+}
diff --git a/libsyscall/wrappers/rename.c b/libsyscall/wrappers/rename.c
new file mode 100644
index 000000000..a73c0ec82
--- /dev/null
+++ b/libsyscall/wrappers/rename.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+void __inc_remove_counter(void);
+int __rename(const char *old, const char *new);
+
+int
+rename(const char *old, const char *new)
+{
+	int res = __rename(old, new);
+	if (res == 0) __inc_remove_counter();
+	return res;
+}
diff --git a/libsyscall/wrappers/rmdir.c b/libsyscall/wrappers/rmdir.c
new file mode 100644
index 000000000..07bfb9588
--- /dev/null
+++ b/libsyscall/wrappers/rmdir.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+void __inc_remove_counter(void);
+int __rmdir(const char *path);
+
+int
+rmdir(const char *path)
+{
+	int res = __rmdir(path);
+	if (res == 0) __inc_remove_counter();
+	return res;
+}
diff --git a/libsyscall/wrappers/select-base.c b/libsyscall/wrappers/select-base.c
new file mode 100644
index 000000000..09f8816f6
--- /dev/null
+++ b/libsyscall/wrappers/select-base.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005, 2007 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#if defined(__LP64__) && (defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050))
+#undef __DARWIN_NON_CANCELABLE
+#define __DARWIN_NON_CANCELABLE 0
+#endif /* __LP64__ && (VARIANT_CANCELABLE || VARIANT_PRE1050) */
+
+#include <sys/select.h>
+#include "_errno.h"
+
+#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+extern int __select(int, fd_set * __restrict, fd_set * __restrict,
+	fd_set * __restrict, struct timeval * __restrict);
+#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+int __select_nocancel(int, fd_set * __restrict, fd_set * __restrict,
+	fd_set * __restrict, struct timeval * __restrict);
+#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
+
+/*
+ * select stub, return error if nfds > FD_SETSIZE
+ * add pthread cancelability
+ * mandated for conformance.
+ *
+ * This is only for (non DARWINEXTSN) UNIX03 (both cancelable and
+ * non-cancelable) and for legacy
+ */
+int
+select(int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds,
+	fd_set * __restrict exceptfds, struct timeval * __restrict
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+	intimeout
+#else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
+	timeout
+#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+	)
+{
+
+#if defined(VARIANT_LEGACY) || defined(VARIANT_PRE1050)
+	struct timeval tb, *timeout;
+
+	/*
+	 * Legacy select behavior is minimum 10 msec when tv_usec is non-zero
+	 */
+	if (intimeout && intimeout->tv_sec == 0 && intimeout->tv_usec > 0 && intimeout->tv_usec < 10000) {
+		tb.tv_sec = 0;
+		tb.tv_usec = 10000;
+		timeout = &tb;
+	} else
+		timeout = intimeout;
+#else /* !VARIANT_LEGACY && !VARIANT_PRE1050 */
+	if (nfds > FD_SETSIZE) {
+		errno = EINVAL;
+		return -1;
+	}
+#endif /* VARIANT_LEGACY || VARIANT_PRE1050 */
+#if defined(VARIANT_CANCELABLE) || defined(VARIANT_PRE1050)
+	return __select(nfds, readfds, writefds, exceptfds, timeout);
+#else /* !VARIANT_CANCELABLE && !VARIANT_PRE1050 */
+	return __select_nocancel(nfds, readfds, writefds, exceptfds, timeout);
+#endif /* VARIANT_CANCELABLE || VARIANT_PRE1050 */
+}
diff --git a/libsyscall/wrappers/sigsuspend-base.c b/libsyscall/wrappers/sigsuspend-base.c
new file mode 100644
index 000000000..98f3fc3d0
--- /dev/null
+++ b/libsyscall/wrappers/sigsuspend-base.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*	@(#)sigsuspend.c	1.0	9/22/95	(c) 1995 NeXT	*/
+
+#include <signal.h>
+#include <unistd.h>
+
+int __SIGSUSPEND(const sigset_t);
+
+int
+sigsuspend (const sigset_t *sigmask_p)
+{
+    sigset_t	mask;
+
+    if (sigmask_p)
+        mask = *sigmask_p;
+    else
+        sigemptyset(&mask);
+    return __SIGSUSPEND(mask);
+}
+
diff --git a/libsyscall/wrappers/unix03/chmod.c b/libsyscall/wrappers/unix03/chmod.c
new file mode 100644
index 000000000..ca5077780
--- /dev/null
+++ b/libsyscall/wrappers/unix03/chmod.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#if __DARWIN_UNIX03
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "_errno.h"
+
+extern int __chmod(const char *path, mode_t mode);
+
+/*
+ * chmod stub, ignore S_ISUID and/or S_ISGID on EPERM,
+ * mandated for conformance.
+ *
+ * This is for UNIX03 only.
+ */
+int
+chmod(const char *path, mode_t mode)
+{
+	int res = __chmod(path, mode);
+
+	if (res >= 0 || errno != EPERM || (mode & (S_ISUID | S_ISGID)) == 0)
+		return res;
+	if (mode & S_ISGID) {
+		res = __chmod(path, mode ^ S_ISGID);
+		if (res >= 0 || errno != EPERM)
+			return res;
+	}
+	if (mode & S_ISUID) {
+		res = __chmod(path, mode ^ S_ISUID);
+		if (res >= 0 || errno != EPERM)
+			return res;
+	}
+	if ((mode & (S_ISUID | S_ISGID)) == (S_ISUID | S_ISGID))
+		res = __chmod(path, mode ^ (S_ISUID | S_ISGID));
+	return res;
+}
+
+#endif /* __DARWIN_UNIX03 */
diff --git a/libsyscall/wrappers/unix03/fchmod.c b/libsyscall/wrappers/unix03/fchmod.c
new file mode 100644
index 000000000..648c53edc
--- /dev/null
+++ b/libsyscall/wrappers/unix03/fchmod.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#if __DARWIN_UNIX03
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "_errno.h"
+
+extern int __fchmod(int fd, mode_t mode);
+
+/*
+ * fchmod stub, ignore S_ISUID and/or S_ISGID on EPERM,
+ * mandated for conformance.
+ *
+ * This is for UNIX03 only.
+ */
+int
+fchmod(int fd, mode_t mode)
+{
+	int res = __fchmod(fd, mode);
+
+	if (res >= 0 || errno != EPERM || (mode & (S_ISUID | S_ISGID)) == 0)
+		return res;
+	if (mode & S_ISGID) {
+		res = __fchmod(fd, mode ^ S_ISGID);
+		if (res >= 0 || errno != EPERM)
+			return res;
+	}
+	if (mode & S_ISUID) {
+		res = __fchmod(fd, mode ^ S_ISUID);
+		if (res >= 0 || errno != EPERM)
+			return res;
+	}
+	if ((mode & (S_ISUID | S_ISGID)) == (S_ISUID | S_ISGID))
+		res = __fchmod(fd, mode ^ (S_ISUID | S_ISGID));
+	return res;
+}
+
+#endif /* __DARWIN_UNIX03 */
diff --git a/libsyscall/wrappers/unix03/getrlimit.c b/libsyscall/wrappers/unix03/getrlimit.c
new file mode 100644
index 000000000..ab38b7170
--- /dev/null
+++ b/libsyscall/wrappers/unix03/getrlimit.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#if __DARWIN_UNIX03
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+extern int __getrlimit(int resource, struct rlimit *rlp);
+
+/*
+ * getrlimit stub, for conformance, OR in _RLIMIT_POSIX_FLAG
+ *
+ * This is for UNIX03 only.
+ */
+int
+getrlimit(int resource, struct rlimit *rlp)
+{
+	resource |= _RLIMIT_POSIX_FLAG;
+	return(__getrlimit(resource, rlp));
+}
+
+#endif /* __DARWIN_UNIX03 */
diff --git a/libsyscall/wrappers/unix03/mmap.c b/libsyscall/wrappers/unix03/mmap.c
new file mode 100644
index 000000000..60c6bba32
--- /dev/null
+++ b/libsyscall/wrappers/unix03/mmap.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#if __DARWIN_UNIX03
+
+#include <sys/mman.h>
+#include <mach/vm_param.h>
+#include <errno.h>
+
+void *__mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off);
+
+/*
+ * mmap stub, with preemptory failures due to extra parameter checking
+ * mandated for conformance.
+ *
+ * This is for UNIX03 only.
+ */
+void *
+mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
+{
+	/*
+	 * Preemptory failures:
+	 * 
+	 * o	off is not a multiple of the page size
+	 * o	flags does not contain either MAP_PRIVATE or MAP_SHARED
+	 * o	len is zero
+	 */
+	extern void cthread_set_errno_self(int);
+	if ((off & PAGE_MASK) ||
+	    (((flags & MAP_PRIVATE) != MAP_PRIVATE) &&
+	     ((flags & MAP_SHARED) != MAP_SHARED)) ||
+	    (len == 0)) {
+		cthread_set_errno_self(EINVAL);
+		return(MAP_FAILED);
+	}
+
+	return(__mmap(addr, len, prot, flags, fildes, off));
+}
+
+#endif /* __DARWIN_UNIX03 */
diff --git a/libsyscall/wrappers/unix03/setrlimit.c b/libsyscall/wrappers/unix03/setrlimit.c
new file mode 100644
index 000000000..ebc872deb
--- /dev/null
+++ b/libsyscall/wrappers/unix03/setrlimit.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <sys/cdefs.h>
+
+#if __DARWIN_UNIX03
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+extern int __setrlimit(int resource, const struct rlimit *rlp);
+
+/*
+ * setrlimit stub, for conformance, OR in _RLIMIT_POSIX_FLAG
+ *
+ * This is for UNIX03 only.
+ */
+int
+setrlimit(int resource, const struct rlimit *rlp)
+{
+	resource |= _RLIMIT_POSIX_FLAG;
+	return(__setrlimit(resource, rlp));
+}
+
+#endif /* __DARWIN_UNIX03 */
diff --git a/libsyscall/wrappers/unlink.c b/libsyscall/wrappers/unlink.c
new file mode 100644
index 000000000..8f2144a82
--- /dev/null
+++ b/libsyscall/wrappers/unlink.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+void __inc_remove_counter(void);
+int __unlink(const char *path);
+
+int
+unlink(const char *path)
+{
+	int res = __unlink(path);
+	if (res == 0) __inc_remove_counter();
+	return res;
+}
diff --git a/libsyscall/xcodescripts/compat-symlinks.sh b/libsyscall/xcodescripts/compat-symlinks.sh
new file mode 100755
index 000000000..78b504777
--- /dev/null
+++ b/libsyscall/xcodescripts/compat-symlinks.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+#
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. The rights granted to you under the License
+# may not be used to create, or enable the creation or redistribution of,
+# unlawful or unlicensed copies of an Apple operating system, or to
+# circumvent, violate, or enable the circumvention or violation of, any
+# terms of an Apple operating system software license agreement.
+# 
+# Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+
+if [ "x$ACTION" != "xinstallhdrs" ]; then
+	ln -sf libsystem_kernel.a "$DSTROOT/usr/local/lib/dyld/libsystem_mach.a"
+fi
diff --git a/libsyscall/xcodescripts/compile-syscalls.pl b/libsyscall/xcodescripts/compile-syscalls.pl
new file mode 100755
index 000000000..a75737992
--- /dev/null
+++ b/libsyscall/xcodescripts/compile-syscalls.pl
@@ -0,0 +1,130 @@
+#!/usr/bin/perl
+#
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. The rights granted to you under the License
+# may not be used to create, or enable the creation or redistribution of,
+# unlawful or unlicensed copies of an Apple operating system, or to
+# circumvent, violate, or enable the circumvention or violation of, any
+# terms of an Apple operating system software license agreement.
+# 
+# Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+
+use warnings;
+use strict;
+
+use Data::Dumper;
+use File::Spec;
+use IO::File;
+use File::Basename ();
+
+my $basename = File::Basename::basename($0);
+
+sub usage {
+	print "$basename: <source list> <output archive>";
+	exit 1;
+}
+
+usage unless scalar(@ARGV) == 2;
+
+my $sourceList = $ARGV[0];
+my $outputFile = $ARGV[1];
+
+my $f = IO::File->new($sourceList, 'r');
+die "$basename: $sourceList: $!\n" unless defined($f);
+
+my @objects;
+my @archs = split / /, $ENV{"ARCHS"};
+my @sources = <$f>;
+chomp @sources;
+
+undef $f;
+
+# compiler options
+chomp(my $CC = `xcrun -sdk "$ENV{'SDKROOT'}" -find cc`);
+my @CFLAGS = (
+	"-x assembler-with-cpp",
+	"-c",
+	"-I".$ENV{"SDKROOT"}."/System/Library/Frameworks/System.framework/PrivateHeaders",
+);
+
+chomp(my $LIBTOOL = `xcrun -sdk "$ENV{'SDKROOT'}" -find libtool`);
+my @LIBTOOLFLAGS = (
+	"-static",
+);
+
+# architectures
+for my $arch (@archs) {
+	push(@CFLAGS, "-arch $arch");
+}
+
+# do each compile
+my $jobs = `sysctl -n hw.ncpu` + 2;
+
+for my $src (@sources) {
+	if ($jobs == 0) {
+		if (wait != -1) {
+			$jobs++;
+		} else {
+			printf "wait exited with -1 (no children) and exhausted allowed jobs. Exiting.\n";
+			exit 1;
+		}
+		
+		if ($? != 0) {
+			printf "$CC exited with value %d\n", $? >> 8;
+			exit 1;
+		}
+	}
+	
+	(my $o = $src) =~ s/\.s$/\.o/;
+	my $compileCommand = "$CC " . join(' ', @CFLAGS) . " -o $o $src";
+	printf $compileCommand . "\n";
+
+	$jobs--;
+	my $pid = fork();
+	if ($pid == 0) {
+		exec($compileCommand);
+	}
+	push(@objects, $o);
+}
+
+while (wait != -1) {
+	if ($? != 0) {
+		printf "$CC exited with value %d\n", $? >> 8;
+		exit 1;
+	}
+}
+
+printf "Finished assembly, beginning link.\n";
+
+# final link
+
+if (-f $outputFile) {
+	unlink($outputFile);
+}
+
+my $linkCommand = "$LIBTOOL " . join(' ', @LIBTOOLFLAGS) . " -o $outputFile " . join(' ', @objects);
+
+printf $linkCommand . "\n";
+system($linkCommand);
+if ($? != 0) {
+	print "$LIBTOOL exited with value %d\n", $? >> 8;
+	exit 1;
+}
diff --git a/libsyscall/xcodescripts/create-syscalls.pl b/libsyscall/xcodescripts/create-syscalls.pl
new file mode 100755
index 000000000..68366de86
--- /dev/null
+++ b/libsyscall/xcodescripts/create-syscalls.pl
@@ -0,0 +1,403 @@
+#!/usr/bin/perl
+#
+# Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this
+# file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+##########################################################################
+#
+# % create-syscalls.pl syscalls.master custom-directory out-directory
+#
+# This script fills the the out-directory with a Makefile.inc and *.s
+# files to create the double-underbar syscall stubs.  It reads the
+# syscall.master file to get the symbol names and number of arguments,
+# and whether Libsystem should automatically create the (non-double-underbar)
+# stubs if Libc doesn't provide a wrapper.  Which system calls will get
+# the automatic treatment is writen to the libsyscall.list file, also
+# written to the out-directory.
+#
+# The custom-directory contains:
+# 1. SYS.h - used by the automatically created *.s and custom files
+# 2. custom.s - contains architecture specific additional system calls and
+#    auxilliary routines (like cerror)
+# 3. special case double-underbar stub files - which are copied into
+#    the out-directory
+#
+##########################################################################
+
+use strict;
+use File::Basename ();
+use File::Copy ();
+use File::Spec;
+use IO::File;
+
+my $MyName = File::Basename::basename($0);
+
+my @CustomSrc = qw(custom.s);
+
+my @Architectures = split /\s/, $ENV{"ARCHS"};
+my @Copy = (qw(SYS.h), @CustomSrc);
+my $CustomDir;
+my $PlatformsDir;
+my $PlatformName;
+my $OutDir;
+# size in bytes of known types (only used for i386)
+my %TypeBytes = (
+    'au_asid_t'		=> 4,
+    'caddr_t'		=> 4,
+    'gid_t'		=> 4,
+    'id_t'		=> 4,
+    'idtype_t'		=> 4,
+    'int'		=> 4,
+    'int32_t'		=> 4,
+    'int64_t'		=> 8,
+    'key_t'		=> 4,
+    'long'		=> 4,
+    'mach_port_name_t'	=> 4,
+    'mode_t'		=> 4,
+    'off_t'		=> 8,
+    'pid_t'		=> 4,
+    'semun_t'		=> 4,
+    'sigset_t'		=> 4,
+    'size_t'		=> 4,
+    'socklen_t'		=> 4,
+    'ssize_t'		=> 4,
+    'u_int'		=> 4,
+    'u_long'		=> 4,
+    'uid_t'		=> 4,
+    'uint32_t'		=> 4,
+    'uint64_t'		=> 8,
+    'user_addr_t'	=> 4,
+    'user_long_t'	=> 4,
+    'user_size_t'	=> 4,
+    'user_ssize_t'	=> 4,
+    'user_ulong_t'	=> 4,
+);
+
+# Moving towards storing all data in this hash, then we always know
+# if data is aliased or not, or promoted or not.
+my %Symbols = (
+    "quota" => {
+        c_sym => "quota",
+        syscall => "quota",
+        asm_sym => "_quota",
+        is_private => undef,
+        is_custom => undef,
+        nargs => 4,
+        bytes => 0,
+        aliases => {},
+    },
+    "setquota" => {
+        c_sym => "setquota",
+        syscall => "setquota",
+        asm_sym => "_setquota",
+        is_private => undef,
+        is_custom => undef,
+        nargs => 2,
+        bytes => 0,
+        aliases => {},
+    },
+    "syscall" => {
+        c_sym => "syscall",
+        syscall => "syscall",
+        asm_sym => "_syscall",
+        is_private => undef,
+        is_custom => undef,
+        nargs => 0,
+        bytes => 0,
+        aliases => {},
+    },
+);
+
+sub usage {
+    die "Usage: $MyName syscalls.master custom-directory platforms-directory out-directory\n";
+}
+
+##########################################################################
+# Read the syscall.master file and collect the system call names and number
+# of arguments.  It looks for the NO_SYSCALL_STUB quailifier following the
+# prototype to determine if no automatic stub should be created by Libsystem.
+# System call name that are already prefixed with double-underbar are set as
+# if the NO_SYSCALL_STUB qualifier were specified (whether it is or not).
+#
+# For the #if lines in syscall.master, all macros are assumed to be defined,
+# except COMPAT_GETFSSTAT (assumed undefined).
+##########################################################################
+sub readMaster {
+    my $file = shift;
+    local $_;
+    my $f = IO::File->new($file, 'r');
+    die "$MyName: $file: $!\n" unless defined($f);
+    my $line = 0;
+    my $skip = 0;
+    while(<$f>) {
+        $line++;
+        if(/^#\s*endif/) {
+            $skip = 0;
+            next;
+        }
+        if(/^#\s*else/) {
+            $skip = -$skip;
+            next;
+        }
+        chomp;
+        if(/^#\s*if\s+(\S+)$/) {
+            $skip = ($1 eq 'COMPAT_GETFSSTAT') ? -1 : 1;
+            next;
+        }
+        next if $skip < 0;
+        next unless /^\d/;
+        s/^[^{]*{\s*//;
+        s/\s*}.*$//; # }
+        die "$MyName: no function prototype on line $line\n" unless length($_) > 0 && /;$/;
+        my $no_syscall_stub = /\)\s*NO_SYSCALL_STUB\s*;/;
+        my($name, $args) = /\s(\S+)\s*\(([^)]*)\)/;
+        next if $name =~ /e?nosys/;
+        $args =~ s/^\s+//;
+        $args =~ s/\s+$//;
+        my $argbytes = 0;
+        my $nargs = 0;
+        if($args ne '' && $args ne 'void') {
+            my @a = split(',', $args);
+            $nargs = scalar(@a);
+            # Calculate the size of all the arguments (only used for i386)
+            for my $type (@a) {
+                $type =~ s/\s*\w+$//; # remove the argument name
+                if($type =~ /\*$/) {
+                    $argbytes += 4; # a pointer type
+                } else {
+                    $type =~ s/^.*\s//; # remove any type qualifier, like unsigned
+                    my $b = $TypeBytes{$type};
+                    die "$MyName: $name: unknown type '$type'\n" unless defined($b);
+                    $argbytes += $b;
+                }
+            }
+        }
+        $Symbols{$name} = {
+            c_sym => $name,
+            syscall => $name,
+            asm_sym => $no_syscall_stub ? "___$name" : "_$name",
+            is_private => $no_syscall_stub,
+            is_custom => undef,
+            nargs => $nargs,
+            bytes => $argbytes,
+            aliases => {},
+            except => [],
+        };
+    }
+}
+
+sub checkForCustomStubs {
+    my ($dir) = @_;
+    
+    my ($c_sym_name, $sym);
+    while (($c_sym_name, $sym) = each %Symbols) {
+        my $source = "__".$$sym{c_sym}.".s";
+        my $custom = File::Spec->join($dir, $source);
+        next unless -f $custom;
+
+        $$sym{is_custom} = $source;
+        if (!$$sym{is_private}) {
+            foreach my $subarch (@Architectures) {
+                (my $arch = $subarch) =~ s/arm(.*)/arm/;
+                $$sym{aliases}{$arch} = [] unless $$sym{aliases}{$arch};
+                push(@{$$sym{aliases}{$arch}}, $$sym{asm_sym});
+            }
+            $$sym{asm_sym} = "__".$$sym{asm_sym};
+            $$sym{is_private} = 1;
+        }
+    }    
+}
+
+sub readAliases {
+    my ($platformDir, $platformName) = @_;
+    my $genericMap = File::Spec->join($platformDir, "syscall.map");
+    
+    my %sym_to_c;
+    foreach my $k (keys %Symbols) {
+        $sym_to_c{$Symbols{$k}{asm_sym}} = $k;
+    }
+    
+    my @a = ();
+    for my $arch (@Architectures) {
+        (my $new_arch = $arch) =~ s/arm(.*)/arm/g;
+        push(@a, $new_arch) unless grep { $_ eq $new_arch } @a;
+    }
+    
+    foreach my $arch (@a) {
+        my $syscallFile = File::Spec->join($platformDir, $platformName, $arch, "syscall.map");
+        
+        my @files = ();
+        push(@files, IO::File->new($syscallFile, 'r'));
+        die "$MyName: $syscallFile: $!\n" unless defined($files[$#files]);
+        push(@files, IO::File->new($genericMap, 'r'));
+        die "$MyName: $genericMap: $!\n" unless defined($files[$#files]);
+        
+        foreach my $f (@files) {
+            while (<$f>) {
+                next if /^#/;
+                chomp;
+                
+                my ($alias, $target_symbol) = split;
+                if (defined($target_symbol)) {
+                    foreach my $sym (values %Symbols) {
+                        # I've eliminated most of the ugly from this script except
+                        # the need to try stripping underbars here.
+                        if ($$sym{is_private}) {
+                            next unless $$sym{asm_sym} eq $target_symbol;
+                        } else {
+                            (my $target = $target_symbol) =~ s/^__//;
+                            next unless ($$sym{asm_sym} eq $target || $$sym{asm_sym} eq $target_symbol);
+                        }
+                        $$sym{aliases}{$arch} = [] unless $$sym{aliases}{$arch};
+                        
+                        die "$MyName: $arch $$sym{asm_sym} -> $alias: Duplicate alias.\n" if grep { $_ eq $alias } @{$$sym{aliases}{$arch}};
+                        push(@{$$sym{aliases}{$arch}}, $alias);
+                        
+                        # last thing to do, if we aliased over a first class symbol, we need
+                        # to mark it
+                        my $c = $sym_to_c{$alias};
+                        if ($Symbols{$c}) {
+                            push(@{$Symbols{$c}{except}}, $arch);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+##########################################################################
+# Make a __xxx.s file: if it exists in the $CustomDir, just copy it, otherwise
+# create one.  We define the macro __SYSCALL_32BIT_ARG_BYTES so that SYS.h could
+# use that to define __SYSCALL dependent on the arguments' total size.
+##########################################################################
+sub writeStubForSymbol {
+    my ($f, $symbol) = @_;
+    
+    my @conditions;
+    for my $subarch (@Architectures) {
+        (my $arch = $subarch) =~ s/arm(.*)/arm/;
+        push(@conditions, "defined(__${arch}__)") unless grep { $_ eq $arch } @{$$symbol{except}};
+    }
+    
+    print $f "#define __SYSCALL_32BIT_ARG_BYTES $$symbol{bytes}\n";
+    print $f "#include \"SYS.h\"\n\n";
+    if (scalar(@conditions)) {
+        printf $f "#if " . join(" || ", @conditions) . "\n";
+        printf $f "__SYSCALL(%s, %s, %d)\n", $$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs};
+        if (!$$symbol{is_private} && (scalar(@conditions) < scalar(@Architectures))) {
+            printf $f "#else\n";
+            printf $f "__SYSCALL(%s, %s, %d)\n", "__".$$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs};
+        }
+        printf $f "#endif\n\n";
+    } else {
+        # actually this isnt an inconsistency. kernel can expose what it wants but if all our arches
+        # override it we need to honour that.
+    }
+}
+
+sub writeAliasesForSymbol {
+    my ($f, $symbol) = @_;
+    
+    foreach my $subarch (@Architectures) {
+        (my $arch = $subarch) =~ s/arm(.*)/arm/;
+        
+        next unless scalar($$symbol{aliases}{$arch});
+        
+				printf $f "#if defined(__${arch}__)\n";
+        foreach my $alias_sym (@{$$symbol{aliases}{$arch}}) {
+            my $sym = (grep { $_ eq $arch } @{$$symbol{except}}) ? "__".$$symbol{asm_sym} : $$symbol{asm_sym};
+					
+						printf $f "\t.globl\t$alias_sym\n";
+						printf $f "\t.set\t$alias_sym, $sym\n";
+        }
+				printf $f "#endif\n\n";
+    }
+}
+
+usage() unless scalar(@ARGV) == 5;
+$CustomDir = $ARGV[1];
+die "$MyName: $CustomDir: No such directory\n" unless -d $CustomDir;
+$PlatformsDir = $ARGV[2];
+die "$MyName: $PlatformsDir: No such directory\n" unless -d $PlatformsDir;
+$PlatformName = $ARGV[3];
+die "$MyName: $PlatformsDir/$PlatformName: No such directory\n" unless -d "$PlatformsDir/$PlatformName";
+$OutDir = $ARGV[4];
+die "$MyName: $OutDir: No such directory\n" unless -d $OutDir;
+
+readMaster($ARGV[0]);
+checkForCustomStubs($CustomDir);
+readAliases($PlatformsDir, $PlatformName);
+
+##########################################################################
+# copy the files specified in @Copy from the $CustomDir to $OutDir
+##########################################################################
+for(@Copy) {
+    my $custom = File::Spec->join($CustomDir, $_);
+    my $path = File::Spec->join($OutDir, $_);
+    print "Copy $custom -> $path\n";
+    File::Copy::copy($custom, $path) || die "$MyName: copy($custom, $path): $!\n";
+}
+
+##########################################################################
+# make all the *.s files
+##########################################################################
+my @src;
+my($k, $sym);
+while (($k, $sym) = each %Symbols)
+{
+	my $srcname = $$sym{asm_sym} . ".s";
+	my $outpath = File::Spec->join($OutDir, $srcname);
+
+	if ($$sym{is_custom}) {
+		my $custom = File::Spec->join($CustomDir, $$sym{is_custom});
+		File::Copy::copy($custom, $outpath);
+		print "Copied $outpath\n";
+		
+		print "Writing aliases for $srcname\n";
+		my $f = IO::File->new($outpath, 'a');
+		die "$MyName: $outpath: $!\n" unless defined($f);
+		writeAliasesForSymbol($f, $sym);
+		undef $f;
+	} else {
+		my $f = IO::File->new($outpath, 'w');
+		die "$MyName: $outpath: $!\n" unless defined($f);
+		
+		printf "Creating $outpath\n";
+		writeStubForSymbol($f, $sym);
+		writeAliasesForSymbol($f, $sym);
+		undef $f;
+	}
+	push(@src, $srcname);
+}
+
+##########################################################################
+# create the Makefile.inc file from the list for files in @src and @CustomSrc
+##########################################################################
+my $path = File::Spec->join($OutDir, 'stubs.list');
+my $f = IO::File->new($path, 'w');
+my @sources = sort(@src, @CustomSrc);
+for my $s (@sources) {
+	printf $f File::Spec->join($OutDir, $s) . "\n";
+}
+undef $f;
+undef $path;
+
diff --git a/libsyscall/xcodescripts/mach_install_mig.sh b/libsyscall/xcodescripts/mach_install_mig.sh
new file mode 100755
index 000000000..068bc30ad
--- /dev/null
+++ b/libsyscall/xcodescripts/mach_install_mig.sh
@@ -0,0 +1,97 @@
+#!/bin/sh
+#
+# Copyright (c) 2010 Apple Inc. All rights reserved.
+#
+# @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+# 
+# This file contains Original Code and/or Modifications of Original Code
+# as defined in and that are subject to the Apple Public Source License
+# Version 2.0 (the 'License'). You may not use this file except in
+# compliance with the License. The rights granted to you under the License
+# may not be used to create, or enable the creation or redistribution of,
+# unlawful or unlicensed copies of an Apple operating system, or to
+# circumvent, violate, or enable the circumvention or violation of, any
+# terms of an Apple operating system software license agreement.
+# 
+# Please obtain a copy of the License at
+# http://www.opensource.apple.com/apsl/ and read it before using this file.
+# 
+# The Original Code and all software distributed under the License are
+# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+# Please see the License for the specific language governing rights and
+# limitations under the License.
+# 
+# @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+#
+
+# build inside OBJROOT
+cd $OBJROOT
+
+# check if we're building for the simulator
+[ "$RC_ProjectName" == "Libmach_Sim" ] && DSTROOT="$DSTROOT$SDKROOT"
+
+MIG=`xcrun -sdk "$SDKROOT" -find mig`
+MIGCC=`xcrun -sdk "$SDKROOT" -find cc`
+export MIGCC
+MIG_DEFINES="-DLIBSYSCALL_INTERFACE"
+MIG_HEADER_DST="$DSTROOT/usr/include/mach"
+SERVER_HEADER_DST="$DSTROOT/usr/include/servers"
+# from old Libsystem makefiles
+MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 1`
+SRC="$SRCROOT/mach"
+
+MIGS="clock.defs
+	clock_priv.defs
+	clock_reply.defs
+	exc.defs
+	host_priv.defs
+	host_security.defs
+	ledger.defs
+	lock_set.defs
+	mach_port.defs
+	mach_host.defs
+	mach_vm.defs
+	processor.defs
+	processor_set.defs
+	vm_map.defs"
+
+MIGS_ARCH="thread_act.defs
+	task.defs"
+
+SERVER_HDRS="key_defs.h
+	ls_defs.h
+	netname_defs.h
+	nm_defs.h"
+
+# install /usr/include/server headers 
+mkdir -p $SERVER_HEADER_DST
+for hdr in $SERVER_HDRS; do
+	install -o 0 -c -m 444 $SRC/servers/$hdr $SERVER_HEADER_DST
+done
+
+# special case because we only have one to do here
+$MIG -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC/servers/netname.defs
+
+# install /usr/include/mach mig headers
+
+mkdir -p $MIG_HEADER_DST
+
+for mig in $MIGS; do
+	MIG_NAME=`basename $mig .defs`
+	$MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_HEADER_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig
+done
+
+ARCHS=`echo $ARCHS | sed -e 's/armv./arm/g'`
+for arch in $ARCHS; do
+	MIG_ARCH_DST="$MIG_HEADER_DST/$arch"
+
+	mkdir -p $MIG_ARCH_DST
+
+	for mig in $MIGS_ARCH; do
+		MIG_NAME=`basename $mig .defs`
+		$MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_ARCH_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig
+	done	
+done
diff --git a/makedefs/MakeInc.cmd b/makedefs/MakeInc.cmd
index 06457b6f2..5fea21d30 100644
--- a/makedefs/MakeInc.cmd
+++ b/makedefs/MakeInc.cmd
@@ -24,21 +24,88 @@ else
 endif
 
 SDKROOT ?= /
+HOST_SDKROOT ?= /
 
-CC := $(XCRUN) -sdk $(SDKROOT) cc
-CXX := $(XCRUN) -sdk $(SDKROOT) g++
-MIG := $(XCRUN) -sdk $(SDKROOT) mig
+ifeq ($(PLATFORM),)
+	export PLATFORM := $(shell xcodebuild -sdk $(SDKROOT) -version PlatformPath | head -1 | sed 's,^.*/\([^/]*\)\.platform$$,\1,')
+	ifeq ($(PLATFORM),)
+		export PLATFORM := MacOSX
+	endif
+endif
+
+# CC/CXX get defined by make(1) by default, so we can't check them
+# against the empty string to see if they haven't been set
+ifeq ($(origin CC),default)
+ifneq ($(findstring iPhone,$(PLATFORM)),)
+	export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find gcc-4.2)
+else
+	export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find cc)
+endif
+endif
+ifeq ($(origin CXX),default)
+ifneq ($(findstring iPhone,$(PLATFORM)),)
+	export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find g++-4.2)
+else
+	export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find c++)
+endif
+endif
+ifeq ($(MIG),)
+	export MIG := $(shell $(XCRUN) -sdk $(SDKROOT) -find mig)
+endif
 ifeq ($(MIGCC),)
-	export MIGCC := $(shell $(XCRUN) -sdk $(SDKROOT) -find cc)
+	export MIGCC := $(CC)
 endif
 ifeq ($(RELPATH),)
 	export RELPATH := $(shell $(XCRUN) -sdk $(SDKROOT) -find relpath)
 endif
-SEG_HACK := $(XCRUN) -sdk $(SDKROOT) setsegname
-KEXT_CREATE_SYMBOL_SET := $(XCRUN) -sdk $(SDKROOT) kextsymboltool
+ifeq ($(STRIP),)
+	export STRIP := $(shell $(XCRUN) -sdk $(SDKROOT) -find strip)
+endif
+ifeq ($(LIPO),)
+	export LIPO := $(shell $(XCRUN) -sdk $(SDKROOT) -find lipo)
+endif
+ifeq ($(LIBTOOL),)
+	export LIBTOOL := $(shell $(XCRUN) -sdk $(SDKROOT) -find libtool)
+endif
+ifeq ($(NM),)
+	export NM := $(shell $(XCRUN) -sdk $(SDKROOT) -find nm)
+endif
+ifeq ($(UNIFDEF),)
+	export UNIFDEF := $(shell $(XCRUN) -sdk $(SDKROOT) -find unifdef)
+endif
+ifeq ($(DECOMMENT),)
+	export DECOMMENT := $(shell $(XCRUN) -sdk $(SDKROOT) -find decomment)
+endif
+ifeq ($(DSYMUTIL),)
+	export DSYMUTIL := $(shell $(XCRUN) -sdk $(SDKROOT) -find dsymutil)
+endif
+ifeq ($(CTFCONVERT),)
+	export CTFCONVERT := $(shell $(XCRUN) -sdk $(SDKROOT) -find ctfconvert)
+endif
+ifeq ($(CTFMERGE),)
+	export CTFMERGE :=  $(shell $(XCRUN) -sdk $(SDKROOT) -find ctfmerge)
+endif
+ifeq ($(CTFSCRUB),)
+	export CTFSCRUB := $(shell $(XCRUN) -sdk $(SDKROOT) -find ctfdump) -r
+endif
+ifeq ($(NMEDIT),)
+	export NMEDIT := $(shell $(XCRUN) -sdk $(SDKROOT) -find nmedit)
+endif
 
-MD = /usr/bin/md
+# Platform-specific tools
+ifneq ($(findstring iPhone,$(PRODUCT)),)
+ifeq ($(IPHONEOS_OPTIMIZE),)
+	export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize)
+endif
+endif
+
+# Scripts or tools we build ourselves
+SEG_HACK := $(OBJROOT)/SETUP/setsegname/setsegname
+KEXT_CREATE_SYMBOL_SET := $(OBJROOT)/SETUP/kextsymboltool/kextsymboltool
+NEWVERS = $(SRCROOT)/config/newvers.pl
 
+# Standard BSD tools
+MD = /usr/bin/md
 RM = /bin/rm -f
 CP = /bin/cp
 MV = /bin/mv
@@ -47,23 +114,43 @@ CAT = /bin/cat
 MKDIR = /bin/mkdir -p
 FIND = /usr/bin/find
 INSTALL = /usr/bin/install
-
 TAR = /usr/bin/gnutar
-STRIP = $(XCRUN) -sdk $(SDKROOT) strip
-LIPO = $(XCRUN) -sdk $(SDKROOT) lipo
-LIBTOOL = $(XCRUN) -sdk $(SDKROOT) libtool
-NM = $(XCRUN) -sdk $(SDKROOT) nm
-
 BASENAME = /usr/bin/basename
 TR = /usr/bin/tr
 
-UNIFDEF   = $(XCRUN) -sdk $(SDKROOT) unifdef
-DECOMMENT = /usr/local/bin/decomment
-NEWVERS = $(SRCROOT)/config/newvers.pl
+# Platform-specific tools
+ifeq (iPhoneOS,$(PLATFORM))
+ifeq ($(IPHONEOS_OPTIMIZE),)
+	export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize || echo /usr/bin/true)
+endif
+endif
+
+CTFINSERT = $(XCRUN) -sdk $(SDKROOT) ctf_insert
 
-DSYMUTIL = $(XCRUN) -sdk $(SDKROOT) dsymutil
-CTFCONVERT = $(XCRUN) -sdk $(SDKROOT) ctfconvert
-CTFMERGE =  $(XCRUN) -sdk $(SDKROOT) ctfmerge
-CTFSCRUB = $(XCRUN) -sdk $(SDKROOT) ctfdump -r
+#
+# Command to generate host binaries. Intentionally not
+# $(CC), which controls the target compiler
+#
+ifeq ($(HOST_CC),)
+	export HOST_CC		:= $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find cc)
+endif
+ifeq ($(HOST_FLEX),)
+	export HOST_FLEX	:= $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find flex)
+endif
+ifeq ($(HOST_BISON),)
+	export HOST_BISON	:= $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find bison)
+endif
+ifeq ($(HOST_CODESIGN),)
+	export HOST_CODESIGN	:= $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find codesign)
+endif
+
+#
+# Command to build libkmod.a/libkmodc++.a, which are
+# linked into kext binaries, and should be built as if
+# they followed system-wide policies
+#
+ifeq ($(LIBKMOD_CC),)
+	export LIBKMOD_CC	:= $(shell $(XCRUN) -sdk $(SDKROOT) -find cc)
+endif
 
 # vim: set ft=make:
diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def
index 984c4f316..92d80379f 100644
--- a/makedefs/MakeInc.def
+++ b/makedefs/MakeInc.def
@@ -37,14 +37,14 @@ endif
 # Architecture options
 #
 ifndef SUPPORTED_ARCH_CONFIGS
-export SUPPORTED_ARCH_CONFIGS = PPC I386 X86_64 ARM
+export SUPPORTED_ARCH_CONFIGS = I386 X86_64
 endif
 
 ifndef ARCH_CONFIGS
 ifdef RC_ARCHS
-export ARCH_CONFIGS 	:= $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z)
+export ARCH_CONFIGS 	:= $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -e 's/ARMV./ARM/g')
 else
-export ARCH_CONFIGS 	:= $(shell arch | $(TR) a-z A-Z)
+export ARCH_CONFIGS 	:= $(shell arch | $(TR) a-z A-Z | sed -e 's/ARMV./ARM/g')
 endif
 endif
 ifdef	ARCH_CONFIG
@@ -53,6 +53,16 @@ export ARCH_CONFIG_LC 	:= $(shell printf "%s" "$(ARCH_CONFIG)" | $(TR) A-Z a-z)
 endif
 endif
 
+#
+# Platform options
+#
+ifndef SUPPORTED_PLATFORMS
+export SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator
+endif
+
+# PLATFORM is set earlier in MakeInc.cmd, closer to where decisions about
+# platform tools are made
+
 #
 # Kernel Configuration options  
 #
@@ -94,6 +104,11 @@ export MACHINE_CONFIG 	= DEFAULT
 endif
 
 
+
+ifndef SUPPORTED_MACHINE_CONFIGS
+export SUPPORTED_MACHINE_CONFIGS = DEFAULT
+endif
+
 #
 # Target configuration options.  NOTE - target configurations will 
 # override ARCH_CONFIGS and KERNEL_CONFIGS.
@@ -105,7 +120,7 @@ endif
 # seperated by whitespace.
 #
 # Example:
-#	TARGET_CONFIGS="release ppc default debug i386 default release arm MX31ADS"
+#	TARGET_CONFIGS="release x86_64 default debug i386 default release arm MX31ADS"
 # Parameters may be in upper or lower case (they are converted to upper).
 #
 # "default" parameter is a special case.  It means use the default value for 
@@ -113,10 +128,9 @@ endif
 #
 # default kernel configuration = DEFAULT_KERNEL_CONFIG
 # default architecture configuration = system architecture where you are running make.
-# default machine configuration for ppc = none at this time.
 # default machine configuration for i386 = none at this time.
 # default machine configuration for x86_64 = none at this time.
-# default machine configuration for arm = "S5L8900X".
+# default machine configuration for arm = "S5L8920X".
 #
 ifndef TARGET_CONFIGS_UC
 ifdef TARGET_CONFIGS
@@ -136,24 +150,55 @@ endif
 export MACHINE_CONFIG_LC := $(shell printf "%s" "$(MACHINE_CONFIG)" | $(TR) A-Z a-z)
 export KERNEL_CONFIG_LC := $(shell printf "%s" "$(KERNEL_CONFIG)" | $(TR) A-Z a-z)
 
+#
+# Validate configuration options
+#
+ifneq ($(ARCH_CONFIG),)
+ifeq ($(filter $(ARCH_CONFIG),$(SUPPORTED_ARCH_CONFIGS)),)
+$(error Unsupported ARCH_CONFIG $(ARCH_CONFIG))
+endif
+endif
+
+ifneq ($(KERNEL_CONFIG),)
+ifeq ($(filter $(KERNEL_CONFIG),$(SUPPORTED_KERNEL_CONFIGS)),)
+$(error Unsupported KERNEL_CONFIG $(KERNEL_CONFIG))
+endif
+endif
+
+ifneq ($(MACHINE_CONFIG),)
+ifeq ($(filter $(MACHINE_CONFIG),$(SUPPORTED_MACHINE_CONFIGS)),)
+$(error Unsupported MACHINE_CONFIG $(MACHINE_CONFIG))
+endif
+endif
+
+ifneq ($(PLATFORM),)
+ifeq ($(filter $(PLATFORM),$(SUPPORTED_PLATFORMS)),)
+$(error Unsupported PLATFORM $(PLATFORM))
+endif
+endif
+
 #
 # Kernel Configuration to install
 #
-#  supported install architecture : PPC I386 X86_64 ARM
+#  supported install architecture : I386 X86_64 ARM
 #
 export INSTALL_TYPE 	= $(DEFAULT_KERNEL_CONFIG)
 
 ifndef INSTALL_ARCHS
-export INSTALL_ARCHS 	= $(strip $(foreach my_config, $(SUPPORTED_ARCH_CONFIGS), $(findstring $(my_config), $(TARGET_CONFIGS_UC))))
+export INSTALL_ARCHS 	= $(strip $(foreach my_config, $(SUPPORTED_ARCH_CONFIGS), $(filter $(TARGET_CONFIGS_UC),$(my_config))))
 export INSTALL_ARCHS_LC := $(shell printf "%s" "$(INSTALL_ARCHS)" | $(TR) A-Z a-z)
 endif
 
 export INSTALL_ARCH_DEFAULT	= $(firstword $(INSTALL_ARCHS))
+ifeq ($(INSTALL_ARCH_DEFAULT),)
+$(error Could not determine INSTALL_ARCH_DEFAULT)
+endif
 
 #
 # Standard defines list
 #
-export DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT)
+export DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE \
+       -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT)
 
 #
 # Compiler command
@@ -166,20 +211,35 @@ KC++ := $(CXX)
 #
 
 CWARNFLAGS_STD = \
-	-Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \
-	-Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \
-	-Wshadow -Wcast-align -Wchar-subscripts -Winline \
-	-Wnested-externs -Wredundant-decls
+	-Wall -Werror -Wno-format-y2k -Wextra -Wstrict-prototypes \
+	-Wmissing-prototypes -Wpointer-arith -Wreturn-type -Wcast-qual \
+	-Wwrite-strings -Wswitch -Wshadow -Wcast-align -Wchar-subscripts \
+	-Winline -Wnested-externs -Wredundant-decls -Wextra-tokens
+
+# Certain warnings are non-fatal (8474835)
+CWARNFLAGS_STD += -Wno-error=cast-align
 
+# Can be overridden in Makefile.template or Makefile.$arch
 export CWARNFLAGS ?= $(CWARNFLAGS_STD)
 
+define add_perfile_cflags
+$(1)_CWARNFLAGS_ADD += $2
+endef
+
 CXXWARNFLAGS_STD = \
-	-Wall -Wno-format-y2k -W \
-	-Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \
-	-Wcast-align -Wchar-subscripts -Wredundant-decls
+	-Wall -Werror -Wno-format-y2k -Wextra -Wpointer-arith -Wreturn-type \
+	-Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \
+	-Wredundant-decls -Wextra-tokens
+
+# Certain warnings are non-fatal (8474835)
+CXXWARNFLAGS_STD += -Wno-error=cast-align
 
+# Can be overridden in Makefile.template or Makefile.$arch
 export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD)
 
+define add_perfile_cxxflags
+$(1)_CXXWARNFLAGS_ADD += $2
+endef
 
 #
 # Setup for parallel sub-makes based on 2 times number of logical CPUs
@@ -191,12 +251,10 @@ endif
 #
 # Default ARCH_FLAGS, for use with compiler/linker/assembler/mig drivers
 
-ARCH_FLAGS_PPC		  = -arch ppc
 ARCH_FLAGS_I386		  = -arch i386
 ARCH_FLAGS_X86_64	  = -arch x86_64
 ARCH_FLAGS_ARM		  = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_))
 
-ARCH_FLAGS_ALL_PPC		= $(ARCH_FLAGS_PPC)
 ARCH_FLAGS_ALL_I386		= $(ARCH_FLAGS_I386)
 ARCH_FLAGS_ALL_X86_64		= $(ARCH_FLAGS_X86_64)
 ARCH_FLAGS_ALL_ARM		= -arch arm
@@ -209,14 +267,16 @@ ifdef RC_CFLAGS
 export OTHER_CFLAGS	= $(subst $(addprefix -arch ,$(RC_ARCHS)),,$(RC_CFLAGS))
 endif
 
+export DSYMRESDIR   = ./Contents/Resources/
 export DSYMBUILDDIR = ./Contents/Resources/DWARF/
 
 #
 # We must not use -fno-keep-inline-functions, or it will remove the dtrace
 # probes from the kernel.
 #
-export CFLAGS_GEN = -static $(DEBUG_CFLAGS) -nostdinc -nostdlib \
-	-fno-builtin -finline -fno-common -msoft-float \
+export CFLAGS_GEN = -static $(DEBUG_CFLAGS) -nostdinc \
+	-freorder-blocks				      \
+	-fno-builtin -fno-common -msoft-float \
 	-fsigned-bitfields -fno-stack-protector $(OTHER_CFLAGS)
 
 ifeq ($(BUILD_STABS),1)
@@ -234,15 +294,14 @@ export CFLAGS_DEVELOPMENT 	=
 export CFLAGS_DEBUG 	= 
 export CFLAGS_PROFILE 	=  -pg
 
-export CFLAGS_PPC 	= -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \
-				-mno-altivec -force_cpusubtype_ALL
 export CFLAGS_I386 	= -Di386 -DI386 -D__I386__ \
-				-DPAGE_SIZE_FIXED -force_cpusubtype_ALL
+				-DPAGE_SIZE_FIXED
 export CFLAGS_X86_64	= -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \
 				-DPAGE_SIZE_FIXED -mkernel
 export CFLAGS_ARM 	= -Darm -DARM -D__ARM__ -DPAGE_SIZE_FIXED \
 				-fno-strict-aliasing -fno-keep-inline-functions
 
+
 ifeq (-arch armv7,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM		+= -mthumb
 endif
@@ -256,21 +315,16 @@ ifeq (-arch xscale,$(ARCH_FLAGS_ARM))
 CFLAGS_ARM		+= -mthumb
 endif
 
-export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple
-export CFLAGS_DEVELOPMENTPPC = -O2 -mcpu=750 -mmultiple
-export CFLAGS_DEBUGPPC = -O2 -mcpu=750 -mmultiple
-export CFLAGS_PROFILEPPC = -O2 -mcpu=750 -mmultiple
+export CFLAGS_RELEASEI386 = -O2
+export CFLAGS_DEVELOPMENTI386 = -O2
+export CFLAGS_DEBUGI386 = -O0
+export CFLAGS_PROFILEI386 = -O2
 
-export CFLAGS_RELEASEI386 = -Os
-export CFLAGS_DEVELOPMENTI386 = -Os
-export CFLAGS_DEBUGI386 = -Os
-export CFLAGS_PROFILEI386 = -Os
-
-export CFLAGS_RELEASEX86_64 = -Os
-export CFLAGS_DEVELOPMENTX86_64 = -Os
+export CFLAGS_RELEASEX86_64 = -O2
+export CFLAGS_DEVELOPMENTX86_64 = -O2
 # No space optimization for the DEBUG kernel for the benefit of gdb:
 export CFLAGS_DEBUGX86_64 = -O0
-export CFLAGS_PROFILEX86_64 = -Os
+export CFLAGS_PROFILEX86_64 = -O2
 
 export CFLAGS_RELEASEARM = -O2
 export CFLAGS_DEVELOPMENTARM = -O2
@@ -285,14 +339,35 @@ export CFLAGS 	= $(CFLAGS_GEN) \
 		  $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),CFLAGS_))) \
 		  $(DEFINES)
 
+#
 # Default C++ flags
 #
-CXXFLAGS_GEN  = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext
+
+OTHER_CXXFLAGS	=
+	      	
+CXXFLAGS_GEN  = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext \
+			$(OTHER_CXXFLAGS)
 
 CXXFLAGS      = $(CXXFLAGS_GEN) \
 		  $($(addsuffix $(ARCH_CONFIG),CXXFLAGS_)) \
 		  $($(addsuffix $(KERNEL_CONFIG),CXXFLAGS_))
 
+
+#
+# Support for LLVM Link Time Optimization (LTO)
+#
+
+ifeq ($(BUILD_LTO),1)
+export CFLAGS_GEN	+= -flto
+export CXXFLAGS_GEN	+= -flto
+export BUILD_MACHO_OBJ	= 0
+export BUILD_LTO	= 1
+else
+export BUILD_MACHO_OBJ	= 1
+export BUILD_LTO	= 0
+endif
+
+
 #
 # Assembler command
 #
@@ -309,7 +384,6 @@ export SFLAGS_DEVELOPMENT 	=
 export SFLAGS_DEBUG 	= 
 export SFLAGS_PROFILE 	= 
 
-export SFLAGS_PPC 	= $(CFLAGS_PPC) -force_cpusubtype_ALL
 export SFLAGS_I386	= $(CFLAGS_I386)
 export SFLAGS_ARM	= $(CFLAGS_ARM)
 export SFLAGS_X86_64 	= $(CFLAGS_X86_64)
@@ -332,6 +406,7 @@ LD	= $(KC++) -nostdlib
 
 export LDFLAGS_KERNEL_GEN = \
 	-static \
+	-nostdlib \
 	-fapple-kext \
 	-Wl,-e,__start \
 	-Wl,-sectalign,__TEXT,__text,0x1000 \
@@ -342,10 +417,12 @@ export LDFLAGS_KERNEL_GEN = \
         -Wl,-sectcreate,__PRELINK_STATE,__kexts,/dev/null \
 	-Wl,-sectcreate,__PRELINK_INFO,__info,/dev/null
 
-# Availability of DWARF allows DTrace CTF (compressed type format) to be constructed
+# Availability of DWARF allows DTrace CTF (compressed type format) to be constructed.
+# ctf_insert creates the CTF section.  It needs reserved padding in the
+# headers for the load command segment and the CTF section structures.
 ifeq ($(BUILD_DWARF),1)
 export LDFLAGS_KERNEL_GEN += \
-    -Wl,-sectcreate,__CTF,__ctf,/dev/null
+    -Wl,-headerpad,152
 endif
 
 export LDFLAGS_KERNEL_RELEASE 	=
@@ -353,14 +430,6 @@ export LDFLAGS_KERNEL_DEVELOPMENT 	=
 export LDFLAGS_KERNEL_DEBUG 	= 
 export LDFLAGS_KERNEL_PROFILE 	= 
 
-export LDFLAGS_KERNEL_PPC      = \
-	-force_cpusubtype_ALL \
-	-Wl,-new_linker \
-	-Wl,-pagezero_size,0x0 \
-	-Wl,-segaddr,__VECTORS,0x0 \
-	-Wl,-segaddr,__HIB,0x7000  \
-	-Wl,-segaddr,__TEXT,0xe000
- 
 export LDFLAGS_KERNEL_RELEASEI386     = \
 	-Wl,-new_linker \
 	-Wl,-pagezero_size,0x0 \
@@ -393,13 +462,13 @@ export LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64)
 export LDFLAGS_KERNEL_RELEASEARM     = \
 	-Wl,-new_linker \
 	-Wl,-pagezero_size,0x0 \
-	-Wl,-image_base,0xC0001000 \
+	-Wl,-image_base,0x80001000 \
 	-Wl,-exported_symbols_list,$(TARGET)/kernel-kpi.exp
 
 export LDFLAGS_KERNEL_DEVELOPMENTARM     = \
 	-Wl,-new_linker \
 	-Wl,-pagezero_size,0x0 \
-	-Wl,-image_base,0xC0001000
+	-Wl,-image_base,0x80001000
 
 export LDFLAGS_KERNEL_DEBUGARM = $(LDFLAGS_KERNEL_DEVELOPMENTARM)
 
@@ -417,11 +486,6 @@ export LDFLAGS_KERNEL	= $(LDFLAGS_KERNEL_GEN) \
 #
 export LD_KERNEL_LIBS	= -lcc_kext
 
-#
-# Command to generate host binaries. Intentionally not
-# $(CC), which controls the target compiler
-#
-HOST_CC			= cc
 
 #
 # Default INCFLAGS
@@ -462,12 +526,25 @@ DATA_INSTALL_FLAGS = -c -m 0644
 #
 # Header file destinations
 #
-FRAMEDIR = System/Library/Frameworks
+ifeq ($(RC_ProjectName),xnu_headers_Sim)
+	HEADER_INSTALL_PREFIX = $(SDKROOT)
+else
+	HEADER_INSTALL_PREFIX = 
+endif
+
+FRAMEDIR = $(HEADER_INSTALL_PREFIX)/System/Library/Frameworks
+
+SINCVERS = B
+SINCFRAME = $(FRAMEDIR)/System.framework
+SINCDIR = $(SINCFRAME)/Versions/$(SINCVERS)/Headers
+SPINCDIR = $(SINCFRAME)/Versions/$(SINCVERS)/PrivateHeaders
+SRESDIR = $(SINCFRAME)/Versions/$(SINCVERS)/Resources
+
 ifndef INCDIR
-    INCDIR = /usr/include
+    INCDIR = $(HEADER_INSTALL_PREFIX)/usr/include
 endif
 ifndef LCLDIR
-    LCLDIR = $(FRAMEDIR)/System.framework/Versions/B/PrivateHeaders
+    LCLDIR = $(SPINCDIR)
 endif
 
 KINCVERS = A
@@ -478,10 +555,12 @@ KRESDIR = $(KINCFRAME)/Versions/$(KINCVERS)/Resources
 
 XNU_PRIVATE_UNIFDEF = -UMACH_KERNEL_PRIVATE -UBSD_KERNEL_PRIVATE -UIOKIT_KERNEL_PRIVATE -ULIBKERN_KERNEL_PRIVATE -ULIBSA_KERNEL_PRIVATE -UPEXPERT_KERNEL_PRIVATE -UXNU_KERNEL_PRIVATE
 
-SPINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -DPRIVATE -U_OPEN_SOURCE_
-SINCFRAME_UNIFDEF  = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE -D_OPEN_SOURCE_
-KPINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_
-KINCFRAME_UNIFDEF  = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_
+PLATFORM_UNIFDEF = $(foreach x,$(SUPPORTED_PLATFORMS),$(if $(filter $(PLATFORM),$(x)),-DPLATFORM_$(x),-UPLATFORM_$(x)))
+
+SPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -DPRIVATE -U_OPEN_SOURCE_
+SINCFRAME_UNIFDEF  = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE -D_OPEN_SOURCE_
+KPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_
+KINCFRAME_UNIFDEF  = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_
 
 
 #
@@ -504,7 +583,6 @@ export STRIP_FLAGS 	= $($(addsuffix $(KERNEL_CONFIG),STRIP_FLAGS_))
 #
 export DSYMUTIL_FLAGS_I386 = --arch=i386
 export DSYMUTIL_FLAGS_X86_64 = --arch=x86_64
-export DSYMUTIL_FLAGS_PPC = --arch=ppc
 export DSYMUTIL_FLAGS_ARM = --arch=arm
 
 export DSYMUTIL_FLAGS = $($(addsuffix $(ARCH_CONFIG),DSYMUTIL_FLAGS_))
diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir
index 7f98650e6..b4b594cd6 100644
--- a/makedefs/MakeInc.dir
+++ b/makedefs/MakeInc.dir
@@ -1,15 +1,25 @@
 #
 # Install kernel header files
 #
+.PHONY: installhdrs
+
 ifeq ($(RC_ProjectName),Libsyscall)
 installhdrs:
-	bsdmake -C libsyscall installhdrs
+	cd libsyscall ; \
+		sdk="$(SDKROOT)" ;								\
+		if [ $${sdk} = / ] ; then						\
+			sdk="" ;									\
+		fi;												\
+		xcrun -sdk "$(SDKROOT)" xcodebuild installhdrs 	\
+			"SRCROOT=$(SRCROOT)/libsyscall" 				\
+			"OBJROOT=$(OBJROOT)" 						\
+			"SYMROOT=$(SYMROOT)" 						\
+			"DSTROOT=$(DSTROOT)"						\
+			"SDKROOT=$${sdk}"
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
 installhdrs:
 	make -C libkern/kxld/ installhdrs
-else ifeq ($(RC_ProjectName),xnu_debug)
-installhdrs:
-else # xnu
+else # xnu, xnu_debug, or xnu_headers_Sim
 installhdrs: exporthdrs installhdrs_mi installhdrs_md 
 	@echo "[ $(SRCROOT) ] make installhdrs installing Kernel.framework"
 	$(_v)kincpath=$(DSTROOT)/$(KINCDIR);					\
@@ -28,28 +38,52 @@ installhdrs: exporthdrs installhdrs_mi installhdrs_md
 	[ -d $(DSTROOT)/$(KPINCDIR) ] || $(MKDIR) $(DSTROOT)/$(KPINCDIR);	\
 	cd $$kframepath; [ -L PrivateHeaders ] ||		 	\
 	    $(LN) Versions/Current/PrivateHeaders PrivateHeaders;
+ifeq ($(RC_ProjectName),xnu_headers_Sim)
+	@echo "[ $(SRCROOT) ] make installhdrs installing System.framework"
+	$(_v)spincpath=$(DSTROOT)/$(SPINCDIR);				\
+	sframepath=$(DSTROOT)/$(SINCFRAME);				\
+	[ -d $$spincpath ] || $(MKDIR) $$spincpath;			\
+	cd $$sframepath/Versions;					\
+	    [ -L Current ] || $(LN) $(SINCVERS) Current;		\
+	cd $$sframepath; [ -L PrivateHeaders ] ||		 	\
+	    $(LN) Versions/Current/PrivateHeaders PrivateHeaders;
+endif
+ifeq (iPhoneOS,$(PLATFORM))
+	$(_v)$(IPHONEOS_OPTIMIZE) $(DSTROOT)/$(KRESDIR)/Info.plist
+endif
 endif
 
 #
 # Install header files order
 #
 .ORDER: installhdrs_mi installhdrs_md
+.PHONY: installhdrs_mi installhdrs_md
 
 #
 # Install machine independent header files
 #
 installhdrs_mi:
-	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));			\
+	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));		\
 	kernel_config=$(INSTALL_TYPE);					\
-	arch_config=$(INSTALL_ARCH_DEFAULT);					\
-	installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
-	[ -d $${installinc_dir} ] ||$(MKDIR) $${installinc_dir};	\
+	machine_config=$(MACHINE_CONFIG);				\
+	arch_config=$(INSTALL_ARCH_DEFAULT);				\
+	if [ $${arch_config} = ARM ] ; then				\
+		if [ $${machine_config} = DEFAULT ] ; then		\
+			machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);	\
+		fi;							\
+	fi;								\
+	if [ $${machine_config} = DEFAULT ] ; then			\
+		installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
+	else								\
+		installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};	\
+	fi;								\
+	[ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir};	\
 	${MAKE} ${MAKEJOBS} -C $${installinc_dir}			\
 		KERNEL_CONFIG=$${kernel_config}				\
 		ARCH_CONFIG=$${arch_config}				\
 		MAKEFILES=${SOURCE}/Makefile				\
 		SOURCE=${SOURCE}/					\
-		TARGET=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}/	\
+		TARGET=$${installinc_dir}/				\
 		build_installhdrs_mi;					\
 
 #
@@ -67,17 +101,17 @@ installhdrs_md:
 		fi;		\
 	fi;		\
 	if [ $${machine_config} = DEFAULT ] ; then	\
-		objpath=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
+		installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
 	else		\
-		objpath=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};	\
+		installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};	\
 	fi;		\
-	[ -d $${objpath} ] || $(MKDIR) $${objpath};	\
-	${MAKE} ${MAKEJOBS} -C $${objpath}				\
+	[ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir};	\
+	${MAKE} ${MAKEJOBS} -C $${installinc_dir}			\
 		KERNEL_CONFIG=$${kernel_config}				\
 		ARCH_CONFIG=$${arch_config}				\
 		MAKEFILES=${SOURCE}/Makefile				\
 		SOURCE=${SOURCE}/					\
-		TARGET=$${objpath}/			\
+		TARGET=$${installinc_dir}/				\
 		build_installhdrs_md;					\
 	done;
 
@@ -126,12 +160,15 @@ build_installhdrs_md: $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS)
 #
 # Install kernel header files
 #
+.PHONY: exporthdrs
+
 exporthdrs: exporthdrs_mi exporthdrs_md 
 
 #
 # Install header files order
 #
 .ORDER: exporthdrs_mi exporthdrs_md
+.PHONY: exporthdrs_mi exporthdrs_md
 
 #
 # Install machine independent header files
@@ -139,11 +176,21 @@ exporthdrs: exporthdrs_mi exporthdrs_md
 do_exporthdrs_mi:
 
 exporthdrs_mi:
-	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));			\
+	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));		\
 	kernel_config=$(INSTALL_TYPE);					\
-	arch_config=$(INSTALL_ARCH_DEFAULT);					\
-	exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
-	[ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir};					\
+	machine_config=$(MACHINE_CONFIG);				\
+	arch_config=$(INSTALL_ARCH_DEFAULT);				\
+	if [ $${arch_config} = ARM ] ; then				\
+		if [ $${machine_config} = DEFAULT ] ; then		\
+			machine_config=$(DEFAULT_ARM_MACHINE_CONFIG);	\
+		fi;							\
+	fi;								\
+	if [ $${machine_config} = DEFAULT ] ; then			\
+		exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path};	\
+	else								\
+		exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path};	\
+	fi;								\
+	[ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir};		\
 	${MAKE} ${MAKEJOBS} -C $${exportinc_dir}			\
 		KERNEL_CONFIG=$${kernel_config}				\
 		ARCH_CONFIG=$${arch_config}				\
@@ -253,23 +300,21 @@ build_exporthdrs_md: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS)
 #
 # Setup pass for  all architectures for all Configuration/Architecture options
 #
+.PHONY: setup
+
 setup:
-	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));			\
-	for kernel_config in $(KERNEL_CONFIGS);				\
-	do								\
-	for arch_config in $(ARCH_CONFIGS);				\
-	do								\
-	setup_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \
-	[ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir};					\
-	${MAKE} -C $${setup_subdir}					\
+	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));		\
+	kernel_config=$(INSTALL_TYPE);					\
+	arch_config=$(INSTALL_ARCH_DEFAULT);				\
+	setup_subdir=${OBJROOT}/$${rel_path}; 				\
+	[ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir};		\
+	${MAKE} ${MAKEJOBS} -C $${setup_subdir}				\
 		KERNEL_CONFIG=$${kernel_config}				\
 		ARCH_CONFIG=$${arch_config}				\
 		MAKEFILES=${SOURCE}/Makefile				\
 		SOURCE=${SOURCE}/					\
 		TARGET=$${setup_subdir}/				\
-	build_setup;							\
-	done;								\
-	done;
+	build_setup;
 
 do_build_setup:
 
@@ -297,15 +342,32 @@ build_setup: $(BUILD_SETUP_SUBDIRS_TARGETS)
 # item in the set is the kernel configuration.  The second item in the set is the architecture  
 # and the third item is the machine configuration.  There may be multiple sets to build.
 #
+.PHONY: all
+
 ifeq ($(RC_ProjectName),Libsyscall)
 all:
-	bsdmake -C libsyscall install
-else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
+	cd libsyscall ; \
+		sdk="$(SDKROOT)" ;								\
+		if [ $${sdk} = / ] ; then						\
+			sdk="" ;									\
+		fi;												\
+		xcrun -sdk "$(SDKROOT)" xcodebuild install	 	\
+			"SRCROOT=$(SRCROOT)/libsyscall"					\
+			"OBJROOT=$(OBJROOT)" 						\
+			"SYMROOT=$(SYMROOT)" 						\
+			"DSTROOT=$(DSTROOT)"						\
+			"SDKROOT=$${sdk}"
+else ifeq ($(RC_ProjectName),libkxld)
 all:
 	make -C libkern/kxld/ install
+else ifeq ($(RC_ProjectName),libkxld_host)
+all:
+	make -C libkern/kxld/ install PRODUCT_TYPE=ARCHIVE
+else ifeq ($(RC_ProjectName),xnu_headers_Sim)
+all: exporthdrs
 else # xnu or xnu_debug
 ifeq ($(COMPONENT), .)
-all: exporthdrs
+all: exporthdrs setup
 else
 all:
 endif
@@ -462,6 +524,8 @@ install: installhdrs all installman installmachinekernels
 ifeq ($(RC_ProjectName),Libsyscall)
 # nothing to do
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
+# nothing to do, work performed in "all" action
+else ifeq ($(RC_ProjectName),xnu_headers_Sim)
 # nothing to do
 else # xnu or xnu_debug
 	$(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE));			\
@@ -523,10 +587,10 @@ installmachinekernels:
 		machine_config=$${my_config};		\
 		if [ $${machine_config} != DEFAULT ] ; then	\
 			build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config};	\
-			install_file_list=mach.`printf "%s" "$${kernel_config}" | $(TR) A-Z a-z`.`printf "%s" "$${machine_config}" | $(TR) A-Z a-z`;	\
+			install_kernel_file=mach.`printf "%s" "$${kernel_config}" | $(TR) A-Z a-z`.`printf "%s" "$${machine_config}" | $(TR) A-Z a-z`;	\
 			[ -d $${build_subdir} ] || $(MKDIR) $${build_subdir};					\
 			${MAKE} ${MAKEJOBS} -C $${build_subdir}					\
-				INSTALL_FILE_LIST=$${install_file_list}			\
+				INSTALL_KERNEL_FILE=$${install_kernel_file}		\
 				KERNEL_CONFIG=$${kernel_config}				\
 				ARCH_CONFIG=$${arch_config}				\
 				MACHINE_CONFIG=$${machine_config}			\
@@ -578,13 +642,17 @@ build_install: $(BUILD_INSTALL_SUBDIRS_TARGETS)
 #
 # Install source tree
 #
+.PHONY: installsrc
+
 installsrc:
-	$(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .svn --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -)
+	$(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .svn --exclude .git --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -)
 
 
 #
 # Clean up source tree
 #
+.PHONY: clean
+
 clean:
 
 #
@@ -626,12 +694,17 @@ TAGS: cscope.files
 #
 # Install Man Pages
 #
+.PHONY: installman
+
 installman:
 ifeq ($(RC_ProjectName),Libsyscall)
-	bsdmake -C libsyscall install-man
+# nothing to do
 else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld)
 # nothing to do
-else # xnu or xnu_debug
+else ifeq ($(findstring xnu_,$(RC_ProjectName)),xnu_)
+installman:
+# nothing to do
+else # xnu
 	@echo "[ $(SRCROOT) ] Installing man pages"
 	$(_v)manpath=$(DSTROOT)/$(MANDIR);   \
 	[ -d $$manpath ] || $(MKDIR) $$manpath;             \
diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule
index 3ba713083..b2d7e3af3 100644
--- a/makedefs/MakeInc.rule
+++ b/makedefs/MakeInc.rule
@@ -523,7 +523,9 @@ S_RULE_3=
 C_RULE_1A=$(_v)${KCC} -c ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CWARNFLAGS}} -MD ${$@_CFLAGS_ADD} ${$@_CWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} 
 C_RULE_1B=$*.c
 C_RULE_2=@echo CC $@
-ifeq    ($(BUILD_STABS),1)
+ifeq ($(BUILD_MACHO_OBJ),0)
+C_RULE_3=
+else ifeq ($(BUILD_STABS),1)
 C_RULE_3=
 else
 C_RULE_3=$(_v)${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true;
@@ -547,7 +549,9 @@ P_RULE_1A=$(_v)${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${$@_CFLAGS_RM}, ${CFLA
 P_RULE_1B=$(<F)
 P_RULE_2=$(_v)sed 's/.c.o: /.cpo: /' $(@:.cpo=.d) > $(@:.cpo=.d~) && mv $(@:.cpo=.d~) $(@:.cpo=.d)
 P_RULE_3=@echo C++ $@
-ifeq    ($(BUILD_STABS),1)
+ifeq ($(BUILD_MACHO_OBJ),0)
+P_RULE_4=
+else ifeq ($(BUILD_STABS),1)
 P_RULE_4=
 else
 P_RULE_4=$(_v)${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true;
@@ -568,25 +572,37 @@ STATIC_KMODS =  $(SRCROOT)/kmods.a
 
 do_build_mach_kernel: $(TARGET)/kgmacros $(TARGET)/mach_kernel
 
-$(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) lastkernelconstructor.o
+$(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .filelist, $(component))))) lastkernelconstructor.o
 	$(_v)${MAKE} version.o
 	$(_v)${MAKE} build_mach_kernel_exports
 	@echo LD mach_kernel.sys
-	$(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) > mach_kernel.filelist
-	$(_v)$(LD) $(LDFLAGS_KERNEL) -filelist mach_kernel.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \
+	$(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .filelist, $(component))))) < /dev/null > link.filelist
+	$(_v)$(LD) $(LDFLAGS_KERNEL) -filelist link.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \
 		-o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS)
-	@echo DSYMUTIL mach_kernel.sys
 	$(_v)if [ $(BUILD_DWARF)  -eq  1 ]; then \
+		echo DSYMUTIL mach_kernel.sys; \
 		$(DSYMUTIL) $(DSYMUTIL_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel.sys.dSYM > /dev/null; \
+		$(INSTALL) $(INSTALL_FLAGS) $(SRCROOT)/kgmacros $(TARGET)/mach_kernel.sys.dSYM/$(DSYMRESDIR)/kgmacros; \
+	fi;
+	$(_v)if [ $(MACHINE_CONFIG) != DEFAULT ] ; then     \
+		kernel_file_name=mach.`printf "%s" "$(KERNEL_CONFIG)" | $(TR) A-Z a-z`.`printf "%s" "$(MACHINE_CONFIG)" | $(TR) A-Z a-z`; \
+		echo  kernel_file_name $${kernel_file_name}; \
+		[ -h  ${OBJROOT}/$${kernel_file_name} ] || $(LN) $(TARGET)/mach_kernel ${OBJROOT}/$${kernel_file_name};	\
 	fi;
 	@echo STRIP mach_kernel
 	$(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel
-	@echo CTFMERGE mach_kernel
-	$(_v)if [ $(BUILD_DWARF) -eq 1 ]; then			\
+	$(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 -a $(BUILD_DWARF) -eq 1 ]; then			\
+		echo CTFMERGE mach_kernel;   		     	\
 		$(FIND) $(OBJPATH)/ -name \*.ctf -size 0	\
 			-exec $(RM) -rf {} \;	;		\
-		$(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel 		\
-			$(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true;	\
+		$(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel     \
+			    -Z $(TARGET)/mach_kernel.ctfdata	\
+			    $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; 	\
+		echo CTFINSERT mach_kernel;   		     	\
+		$(CTFINSERT) $(TARGET)/mach_kernel		\
+			 $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) $(TARGET)/mach_kernel.ctfdata \
+			 -o $(TARGET)/mach_kernel || true;	\
+			 $(RM) -f $(TARGET)/mach_kernel.ctfdata > /dev/null || true; \
 	fi;							\
 
 version.o: $(OBJPATH)/version.c
@@ -599,9 +615,13 @@ $(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/M
 	$(_v)$(CP) $< $@
 	$(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null;
 
+# "/libsa" needed because TARGET ends in "/."
+lastkernelconstructor.o: COMP_OBJ_DIR=/libsa
 lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c
+	$(_v)$(MKDIR) $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)
 	${C_RULE_1A}$<
 	${C_RULE_2}
+	${C_RULE_3}
 	${C_RULE_4}
 
 $(TARGET)/kgmacros: $(SRCROOT)/kgmacros
@@ -615,65 +635,19 @@ build_mach_kernel_exports:
 		TARGET=$${TARGET}			\
 	build_mach_kernel_exports;
 
-# Special rules to install machine configuration variants
-
-$(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel force_file_install
-	@echo Installing $< in $@;
-	$(_v)if [ ! -e $(DSTROOT)$(INSTALL_FILE_DIR) ]; then	\
-		$(MKDIR) $(DSTROOT)$(INSTALL_FILE_DIR);		\
-	fi;							\
-	if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then	\
-		$(RM) $(RMFLAGS) $@;				\
-		$(INSTALL) $(FILE_INSTALL_FLAGS) $< $@;		\
-	else							\
-		if [ ! -e $@ ]; then				\
-			print "" >empty_file_$(notdir $@);			\
-			lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \
-			$(LIPO) $${lipo_arg} -create -output $@;	\
-			$(RM) $(RMFLAGS) empty_file_$(notdir $@);		\
-		fi;							\
-		$(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@;	\
-	fi
-
-$(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel.sys force_file_install
-	@echo Installing $< in $@;
-	$(_v)if [ ! -e $(SYMROOT)$(INSTALL_FILE_DIR) ]; then	\
-		$(MKDIR) $(SYMROOT)$(INSTALL_FILE_DIR);		\
-	fi;							\
-	if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then	\
-		$(RM) $(RMFLAGS) $@;				\
-		$(INSTALL) $(FILE_INSTALL_FLAGS) $< $@;		\
-		if [ $(BUILD_DWARF) -eq 1 ]; then			\
-			$(RM) -rf $@.dSYM;				\
-			$(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR);	\
-			$(INSTALL) $(INSTALL_FLAGS)			\
-				$<.dSYM/$(DSYMBUILDDIR)/$(notdir $<)	\
-				$@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);	\
-		fi;							\
-	else							\
-		if [ ! -e $@ ]; then				\
-			printf "" >empty_file_$(notdir $@);			\
-			lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \
-			$(LIPO) $${lipo_arg} -create -output $@;	\
-			$(RM) $(RMFLAGS) empty_file_$(notdir $@);		\
-		fi;							\
-		$(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@;	\
-	fi
-
 endif # mach_kernel-specific build rules
 
 #
-# Generic Install rules
+# Kernel Install rules
 #
-INSTALL_FILE_FILES = $(addprefix $(DSTROOT)$(INSTALL_FILE_DIR), $(INSTALL_FILE_LIST))
-INSTALL_FILE_FILES_GENERIC = $(filter-out $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC), $(INSTALL_FILE_FILES))
+INSTALL_KERNEL_FILE_FILES = $(addprefix $(DSTROOT)$(INSTALL_KERNEL_DIR), $(INSTALL_KERNEL_FILE))
 
-force_file_install:
+force_kernel_file_install:
 
-$(INSTALL_FILE_FILES_GENERIC): $(DSTROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/% force_file_install
+$(INSTALL_KERNEL_FILE_FILES): $(TARGET)/mach_kernel force_kernel_file_install
 	@echo Installing $< in $@;
-	$(_v)if [ ! -e $(DSTROOT)$(INSTALL_FILE_DIR) ]; then		\
-		$(MKDIR) $(DSTROOT)$(INSTALL_FILE_DIR);			\
+	$(_v)if [ ! -e $(DSTROOT)$(INSTALL_KERNEL_DIR) ]; then		\
+		$(MKDIR) $(DSTROOT)$(INSTALL_KERNEL_DIR);		\
 	fi;								\
 	if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then		\
 		$(RM) $(RMFLAGS) $@;					\
@@ -688,28 +662,27 @@ $(INSTALL_FILE_FILES_GENERIC): $(DSTROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/% forc
 		$(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@;	\
 	fi
 
-INSTALL_FILESYS_FILES = $(addprefix $(SYMROOT)$(INSTALL_FILE_DIR), $(INSTALL_FILE_LIST))
-INSTALL_FILESYS_FILES_GENERIC = $(filter-out $(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC), $(INSTALL_FILESYS_FILES))
+INSTALL_KERNEL_FILESYS_FILES = $(addprefix $(SYMROOT)$(INSTALL_KERNEL_DIR), $(INSTALL_KERNEL_FILE))
 
-force_filesys_install:
+force_kernel_filesys_install:
 
-$(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.sys force_filesys_install
+$(INSTALL_KERNEL_FILESYS_FILES): $(TARGET)/mach_kernel.sys force_kernel_filesys_install
 	@echo Installing $< in $@;
-	$(_v)if [ ! -e $(SYMROOT)$(INSTALL_FILE_DIR) ]; then	\
-		$(MKDIR) $(SYMROOT)$(INSTALL_FILE_DIR);		\
+	$(_v)if [ ! -e $(SYMROOT)$(INSTALL_KERNEL_DIR) ]; then	\
+		$(MKDIR) $(SYMROOT)$(INSTALL_KERNEL_DIR);		\
 	fi;							\
 	if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then	\
 		$(RM) $(RMFLAGS) $@;				\
 		$(INSTALL) $(INSTALL_FLAGS) $< $@;			\
 		if [ $(BUILD_DWARF) -eq 1 ]; then			\
-			$(DSYMUTIL) $(DSYMUTIL_FLAGS)			\
-				$(TARGET)/mach_kernel.sys		\
-				-o $(TARGET)/mach_kernel.sys.dSYM;	\
 			$(RM) -rf $@.dSYM;				\
 			$(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR);	\
 			$(INSTALL) $(INSTALL_FLAGS)			\
 				$<.dSYM/$(DSYMBUILDDIR)/$(notdir $<)	\
 				$@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);	\
+			$(INSTALL) $(INSTALL_FLAGS)			\
+				$<.dSYM/$(DSYMRESDIR)/kgmacros		\
+				$@.dSYM/$(DSYMRESDIR)/kgmacros;		\
 		fi;							\
 	else							\
 		if [ ! -e $@ ]; then				\
@@ -730,29 +703,21 @@ $(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.s
 					$@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);  \
 				$(RM) $(RMFLAGS) empty_filesys_$(notdir $@);   \
 			fi;						       \
-			$(DSYMUTIL) $(DSYMUTIL_FLAGS)			\
-				$(TARGET)/mach_kernel.sys		\
-				-o $(TARGET)/mach_kernel.sys.dSYM;	\
 			$(LIPO) $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@)	       \
 				-replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG)))		       \
 				$<.dSYM/$(DSYMBUILDDIR)/$(notdir $<)	       \
 				-o $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);       \
+			$(INSTALL) $(INSTALL_FLAGS)			\
+				$<.dSYM/$(DSYMRESDIR)/kgmacros		\
+				$@.dSYM/$(DSYMRESDIR)/kgmacros;		\
 		fi;							       \
 	fi
 	$(INSTALL) $(INSTALL_FLAGS) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR)
 
-INSTALL_DATA_FILES = $(addprefix $(DSTROOT)$(INSTALL_DATA_DIR), $(INSTALL_DATA_LIST))
-
-$(INSTALL_DATA_FILES): $(DSTROOT)$(INSTALL_DATA_DIR)% : $(SOURCE)/%
-	@echo Installing $< in $@;
-	$(_v)[ -d $(dir $@) ] ||$(MKDIR) $(dir $@);		\
-	$(RM) $(RMFLAGS) $@;		\
-	$(INSTALL) $(DATA_INSTALL_FLAGS) $< $(dir $@);
-
 setup_build_install:
 	@echo "[ $(SOURCE) ] make setup_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
 
-do_build_install: $(INSTALL_FILESYS_FILES) $(INSTALL_FILE_FILES) $(INSTALL_DATA_FILES)
+do_build_install: $(INSTALL_KERNEL_FILESYS_FILES) $(INSTALL_KERNEL_FILE_FILES)
 	@echo "[ $(SOURCE) ] make do_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
 
 INSTALL_MAN_FILES = $(addprefix $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/, $(INSTALL_MAN_LIST))
diff --git a/osfmk/Makefile b/osfmk/Makefile
index f07b7e1f3..7b609b5ae 100644
--- a/osfmk/Makefile
+++ b/osfmk/Makefile
@@ -22,20 +22,13 @@ INSTINC_SUBDIRS = \
 	libsa \
 	kdp \
 	pmc
-
-INSTINC_SUBDIRS_PPC = \
-	mach \
-	ppc
-
 INSTINC_SUBDIRS_I386 = \
 	mach	\
 	i386
-
 INSTINC_SUBDIRS_X86_64 = \
 	mach	\
 	i386	\
 	x86_64
-
 INSTINC_SUBDIRS_ARM = \
 	mach	\
 	arm
@@ -58,25 +51,18 @@ EXPINC_SUBDIRS = \
 	kdp \
 	pmc
 
-EXPINC_SUBDIRS_PPC = \
-	mach \
-	ppc
-
 EXPINC_SUBDIRS_I386 = \
 	mach	\
 	i386
-
 EXPINC_SUBDIRS_X86_64 = \
 	mach	\
 	i386	\
 	x86_64
-
 EXPINC_SUBDIRS_ARM = \
 	mach	\
 	arm
 
-SETUP_SUBDIRS = 	\
-	conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 	\
 	conf
diff --git a/osfmk/UserNotification/Makefile b/osfmk/UserNotification/Makefile
index edc1f17b4..98a5377e8 100644
--- a/osfmk/UserNotification/Makefile
+++ b/osfmk/UserNotification/Makefile
@@ -8,16 +8,12 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 =
 
 INSTINC_SUBDIRS_ARM =
 
 EXPINC_SUBDIRS =
 
-EXPINC_SUBDIRS_PPC =
-
 EXPINC_SUBDIRS_I386 =
 
 EXPINC_SUBDIRS_ARM =
diff --git a/osfmk/UserNotification/UNDRequest.defs b/osfmk/UserNotification/UNDRequest.defs
index 9a3a7c954..bfe925c80 100644
--- a/osfmk/UserNotification/UNDRequest.defs
+++ b/osfmk/UserNotification/UNDRequest.defs
@@ -74,10 +74,7 @@ simpleroutine UNDDisplayCustomFromBundle_rpc(
 			in	messageKey: UNDKey;
 			in	tokenKey: UNDPath);
 
-simpleroutine UNDDisplayCustomFromDictionary_rpc(
-				server: UNDServerRef;
-			in	reply: UNDReplyRef;
-			in	data: xmlData);
+skip; /* was UNDDisplayCustomFromDictionary_rpc */
 
 simpleroutine UNDCancelNotification_rpc(
 				server: UNDServerRef;
diff --git a/osfmk/chud/chud_cpu.c b/osfmk/chud/chud_cpu.c
index 19b639cd7..c21a40eb2 100644
--- a/osfmk/chud/chud_cpu.c
+++ b/osfmk/chud/chud_cpu.c
@@ -73,24 +73,12 @@ chudxnu_cpu_number(void)
 #pragma mark **** interrupts enable/disable ****
 #endif
 
-__private_extern__ boolean_t
-chudxnu_get_interrupts_enabled(void)
-{
-    return ml_get_interrupts_enabled();
-}
-
 __private_extern__ boolean_t
 chudxnu_set_interrupts_enabled(boolean_t enable)
 {
     return ml_set_interrupts_enabled(enable);
 }
 
-__private_extern__ boolean_t
-chudxnu_at_interrupt_context(void)
-{
-    return ml_at_interrupt_context();
-}
-
 __private_extern__ void
 chudxnu_cause_interrupt(void)
 {
diff --git a/osfmk/chud/chud_thread.c b/osfmk/chud/chud_thread.c
index 0f955bb6e..97c07757b 100644
--- a/osfmk/chud/chud_thread.c
+++ b/osfmk/chud/chud_thread.c
@@ -46,8 +46,6 @@
 // include the correct file to find real_ncpus
 #if defined(__i386__) || defined(__x86_64__)
 #	include <i386/mp.h>	
-#elif defined(__ppc__) || defined(__ppc64__)
-#	include <ppc/cpu_internal.h>
 #else
 // fall back on declaring it extern.  The linker will sort us out.
 extern unsigned int real_ncpus;
@@ -124,6 +122,51 @@ chudxnu_thread_get_idle(thread_t thread) {
 	return ((thread->state & TH_IDLE) == TH_IDLE);
 }
 
+__private_extern__ int
+chudxnu_thread_get_scheduler_state(thread_t thread) {
+	/* 
+	 * Instantaneous snapshot of the scheduler state of
+	 * a given thread.
+	 *
+	 * MUST ONLY be called on an interrupted or 
+	 * locked thread, to avoid a race.
+	 */
+	
+	int state = 0;
+	int schedulerState = (volatile int)(thread->state);
+	processor_t lastProcessor = (volatile processor_t)(thread->last_processor);
+	
+	if ((PROCESSOR_NULL != lastProcessor) && (thread == lastProcessor->active_thread)) {
+		state |= CHUDXNU_TS_RUNNING;
+	}
+		
+	if (schedulerState & TH_RUN) {
+		state |= CHUDXNU_TS_RUNNABLE;
+	}
+	
+	if (schedulerState & TH_WAIT) {
+		state |= CHUDXNU_TS_WAIT;
+	}
+	
+	if (schedulerState & TH_UNINT) {
+		state |= CHUDXNU_TS_UNINT;
+	}
+	
+	if (schedulerState & TH_SUSP) {
+		state |= CHUDXNU_TS_SUSP;
+	}
+	
+	if (schedulerState & TH_TERMINATE) {
+		state |= CHUDXNU_TS_TERMINATE;
+	}	
+	
+	if (schedulerState & TH_IDLE) {
+		state |= CHUDXNU_TS_IDLE;
+	}
+	
+	return state;
+}
+
 #if 0
 #pragma mark **** task and thread info ****
 #endif
diff --git a/osfmk/chud/chud_xnu.h b/osfmk/chud/chud_xnu.h
index 91465bd61..2e8168577 100644
--- a/osfmk/chud/chud_xnu.h
+++ b/osfmk/chud/chud_xnu.h
@@ -71,7 +71,6 @@ extern kern_return_t chudxnu_unbind_thread(thread_t thread, int options);
 
 extern kern_return_t chudxnu_thread_get_state(thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, boolean_t user_only);
 extern kern_return_t chudxnu_thread_set_state(thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count, boolean_t user_only);
-extern kern_return_t chudxnu_thread_user_state_available(thread_t thread);
 
 extern kern_return_t chudxnu_thread_get_callstack64(thread_t thread, uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only);
 
@@ -84,12 +83,22 @@ extern kern_return_t chudxnu_free_thread_list(thread_array_t *thread_list, mach_
 
 extern kern_return_t chudxnu_thread_info(  thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count);
 
-extern kern_return_t chudxnu_thread_last_context_switch(thread_t thread, uint64_t *timestamp);
-
 extern boolean_t chudxnu_thread_set_marked(thread_t thread, boolean_t marked);
 extern boolean_t chudxnu_thread_get_marked(thread_t thread);
 extern boolean_t chudxnu_thread_get_idle(thread_t thread);
 
+enum {
+	CHUDXNU_TS_RUNNING = 0x1,
+	CHUDXNU_TS_RUNNABLE = 0x2,
+	CHUDXNU_TS_WAIT = 0x4,
+	CHUDXNU_TS_UNINT = 0x8,
+	CHUDXNU_TS_SUSP = 0x10,
+	CHUDXNU_TS_TERMINATE = 0x20,
+	CHUDXNU_TS_IDLE = 0x40
+};
+
+extern int chudxnu_thread_get_scheduler_state(thread_t thread);
+
 #if 0
 #pragma mark **** memory ****
 #endif
@@ -114,9 +123,7 @@ extern int chudxnu_cpu_number(void);
 
 extern kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable);
 
-extern boolean_t chudxnu_get_interrupts_enabled(void);
 extern boolean_t chudxnu_set_interrupts_enabled(boolean_t enable);
-extern boolean_t chudxnu_at_interrupt_context(void);
 extern void chudxnu_cause_interrupt(void);
 
 extern void chudxnu_enable_preemption(void);
@@ -246,15 +253,9 @@ extern kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val);
 extern kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable);
 extern boolean_t chudxnu_cpu_nap_enabled(int cpu);
 
-extern uint32_t chudxnu_get_orig_cpu_l2cr(int cpu);
-extern uint32_t chudxnu_get_orig_cpu_l3cr(int cpu);
-
 extern kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p);
 extern kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p);
 extern kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val);
 extern kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val);
 
-extern void chudxnu_flush_caches(void);
-extern void chudxnu_enable_caches(boolean_t enable);
-
 #endif /* _CHUD_XNU_H_ */
diff --git a/osfmk/chud/chud_xnu_glue.h b/osfmk/chud/chud_xnu_glue.h
index 20626c064..b2ac2189c 100644
--- a/osfmk/chud/chud_xnu_glue.h
+++ b/osfmk/chud/chud_xnu_glue.h
@@ -26,9 +26,7 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
-#if defined (__ppc__)
-#include "ppc/chud_xnu_glue.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/chud_xnu_glue.h"
 #else
 #error architecture not supported
diff --git a/osfmk/chud/chud_xnu_private.h b/osfmk/chud/chud_xnu_private.h
index 0932a6497..56b6eb22c 100644
--- a/osfmk/chud/chud_xnu_private.h
+++ b/osfmk/chud/chud_xnu_private.h
@@ -33,9 +33,7 @@
 #include <mach/boolean.h>
 #include <mach/mach_types.h>
 
-#if defined (__ppc__)
-#include "chud/ppc/chud_xnu_private.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "chud/i386/chud_xnu_private.h"
 #else
 #error architecture not supported
diff --git a/osfmk/chud/i386/chud_osfmk_callback_i386.c b/osfmk/chud/i386/chud_osfmk_callback_i386.c
index b3fc4d685..aa576cbc7 100644
--- a/osfmk/chud/i386/chud_osfmk_callback_i386.c
+++ b/osfmk/chud/i386/chud_osfmk_callback_i386.c
@@ -73,6 +73,11 @@ void chudxnu_cancel_all_callbacks(void)
 	chudxnu_dtrace_callback_cancel();
 }
 
+static lck_grp_t	chud_request_lck_grp;
+static lck_grp_attr_t	chud_request_lck_grp_attr;
+static lck_attr_t	chud_request_lck_attr;
+
+
 static chudcpu_data_t chudcpu_boot_cpu;
 void *
 chudxnu_cpu_alloc(boolean_t boot_processor)
@@ -81,6 +86,11 @@ chudxnu_cpu_alloc(boolean_t boot_processor)
 
 	if (boot_processor) {
 		chud_proc_info = &chudcpu_boot_cpu;
+
+		lck_attr_setdefault(&chud_request_lck_attr);
+		lck_grp_attr_setdefault(&chud_request_lck_grp_attr);
+		lck_grp_init(&chud_request_lck_grp, "chud_request", &chud_request_lck_grp_attr);
+
 	} else {
 		chud_proc_info = (chudcpu_data_t *)
 					kalloc(sizeof(chudcpu_data_t));
@@ -90,7 +100,8 @@ chudxnu_cpu_alloc(boolean_t boot_processor)
 	}
 	bzero((char *)chud_proc_info, sizeof(chudcpu_data_t));
 	chud_proc_info->t_deadline = 0xFFFFFFFFFFFFFFFFULL;
-	mpqueue_init(&chud_proc_info->cpu_request_queue);
+
+	mpqueue_init(&chud_proc_info->cpu_request_queue, &chud_request_lck_grp, &chud_request_lck_attr);
 
 
 	return (void *)chud_proc_info;
@@ -161,7 +172,8 @@ chudxnu_cpu_timer_callback_enter(
 	timer_call_setup(&(chud_proc_info->cpu_timer_call),
 			 chudxnu_private_cpu_timer_callback, NULL);
 	timer_call_enter(&(chud_proc_info->cpu_timer_call),
-			 chud_proc_info->t_deadline);
+			 chud_proc_info->t_deadline,
+			 TIMER_CALL_CRITICAL|TIMER_CALL_LOCAL);
 
 	ml_set_interrupts_enabled(oldlevel);
 	return KERN_SUCCESS;
@@ -316,46 +328,40 @@ static kern_return_t chud_null_ast(thread_flavor_t flavor __unused,
 }
 
 static kern_return_t
-chudxnu_private_chud_ast_callback(
-	int			trapno,
-	void			*regs,
-	int			unused1,
-	int			unused2)
-{
-#pragma unused (trapno)
-#pragma unused (regs)
-#pragma unused (unused1)
-#pragma unused (unused2)
-	boolean_t	oldlevel = ml_set_interrupts_enabled(FALSE);
-	ast_t		*myast = ast_pending();
-	kern_return_t	retval = KERN_FAILURE;
+chudxnu_private_chud_ast_callback(ast_t reasons, ast_t *myast)
+{	
+	boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
+	kern_return_t retval = KERN_FAILURE;
 	chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn;
-    
-	if (*myast & AST_CHUD_URGENT) {
-		*myast &= ~(AST_CHUD_URGENT | AST_CHUD);
-		if ((*myast & AST_PREEMPTION) != AST_PREEMPTION)
-			*myast &= ~(AST_URGENT);
-		retval = KERN_SUCCESS;
-	} else if (*myast & AST_CHUD) {
-		*myast &= ~(AST_CHUD);
-		retval = KERN_SUCCESS;
-	}
-
+	
 	if (fn) {
-		x86_thread_state_t state;
-		mach_msg_type_number_t count;
-		count = x86_THREAD_STATE_COUNT;
-
-		if (chudxnu_thread_get_state(
-			current_thread(),
-			x86_THREAD_STATE,
-			(thread_state_t) &state, &count,
-			TRUE) == KERN_SUCCESS) {
-
-			(fn)(
-				x86_THREAD_STATE,
-				(thread_state_t) &state,
-				count);
+		if ((*myast & AST_CHUD_URGENT) && (reasons & (AST_URGENT | AST_CHUD_URGENT))) { // Only execute urgent callbacks if reasons specifies an urgent context.
+			*myast &= ~AST_CHUD_URGENT;
+			
+			if (AST_URGENT == *myast) { // If the only flag left is AST_URGENT, we can clear it; we know that we set it, but if there are also other bits set in reasons then someone else might still need AST_URGENT, so we'll leave it set.  The normal machinery in ast_taken will ensure it gets cleared eventually, as necessary.
+				*myast = AST_NONE;
+			}
+			
+			retval = KERN_SUCCESS;
+		}
+		
+		if ((*myast & AST_CHUD) && (reasons & AST_CHUD)) { // Only execute non-urgent callbacks if reasons actually specifies AST_CHUD.  This implies non-urgent callbacks since the only time this'll happen is if someone either calls ast_taken with AST_CHUD explicitly (not done at time of writing, but possible) or with AST_ALL, which of course includes AST_CHUD.
+			*myast &= ~AST_CHUD;
+			retval = KERN_SUCCESS;
+		}
+	
+		if (KERN_SUCCESS == retval) {
+			x86_thread_state_t state;
+			mach_msg_type_number_t count = x86_THREAD_STATE_COUNT;
+			thread_t thread = current_thread();
+			
+			if (KERN_SUCCESS == chudxnu_thread_get_state(thread,
+														 x86_THREAD_STATE,
+														 (thread_state_t)&state,
+														 &count,
+														 (thread->task != kernel_task))) {
+				(fn)(x86_THREAD_STATE, (thread_state_t)&state, count);
+			}
 		}
 	}
     
@@ -425,6 +431,9 @@ static kern_return_t chud_null_int(uint32_t trapentry __unused, thread_flavor_t
 	return KERN_FAILURE;
 }
 
+static void
+chudxnu_private_interrupt_callback(void *foo) __attribute__((used));
+
 static void
 chudxnu_private_interrupt_callback(void *foo)
 {
@@ -460,7 +469,6 @@ chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func)
 	if(OSCompareAndSwapPtr(chud_null_int, func, 
 		(void * volatile *)&interrupt_callback_fn)) {
 		lapic_set_pmi_func((i386_intr_func_t)chudxnu_private_interrupt_callback);
-
 		return KERN_SUCCESS;
 	}
     return KERN_FAILURE;
diff --git a/osfmk/chud/i386/chud_thread_i386.c b/osfmk/chud/i386/chud_thread_i386.c
index f5c992fef..a8edff8fa 100644
--- a/osfmk/chud/i386/chud_thread_i386.c
+++ b/osfmk/chud/i386/chud_thread_i386.c
@@ -49,13 +49,6 @@
 #pragma mark **** thread state ****
 #endif
 
-__private_extern__ kern_return_t
-chudxnu_thread_user_state_available(thread_t thread)
-{
-#pragma unused (thread)
-	return KERN_SUCCESS;
-}
-
 __private_extern__ kern_return_t
 chudxnu_thread_get_state(
 						 thread_t	 	thread, 
diff --git a/osfmk/chud/ppc/chud_cpu_asm.h b/osfmk/chud/ppc/chud_cpu_asm.h
deleted file mode 100644
index a385f7664..000000000
--- a/osfmk/chud/ppc/chud_cpu_asm.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _CHUD_CPU_ASM_H_
-#define _CHUD_CPU_ASM_H_
-
-kern_return_t mfspr64(uint64_t *val, int spr);
-kern_return_t mfmsr64(uint64_t *val);
-
-kern_return_t mtspr64(int spr, uint64_t *val);
-kern_return_t mtmsr64(uint64_t *val);
-
-#endif // _CHUD_CPU_ASM_H_
diff --git a/osfmk/chud/ppc/chud_cpu_asm.s b/osfmk/chud/ppc/chud_cpu_asm.s
deleted file mode 100644
index 81482361a..000000000
--- a/osfmk/chud/ppc/chud_cpu_asm.s
+++ /dev/null
@@ -1,593 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- 
-#include <chud/ppc/chud_spr.h>
-#include <ppc/asm.h>
-#include <mach/kern_return.h>
-
-/*
- * kern_return_t mfspr64(uint64_t *val, int spr);
- * 
- * r3: address to store value in
- * r4: spr to read from
- *
- */
- 
-;           Force a line boundry here
-            .align  5
-            .globl  EXT(mfspr64)
-
-EXT(mfspr64):
-            ;; generic PPC 64-bit wide SPRs
-            cmpwi	r4,chud_ppc_srr0
-            beq		mfspr64_srr0
-            cmpwi	r4,chud_ppc_srr1
-            beq		mfspr64_srr1
-            cmpwi	r4,chud_ppc_dar
-            beq		mfspr64_dar
-            cmpwi	r4,chud_ppc_sdr1
-            beq		mfspr64_sdr1
-            cmpwi	r4,chud_ppc_sprg0
-            beq		mfspr64_sprg0       
-            cmpwi	r4,chud_ppc_sprg1
-            beq		mfspr64_sprg1  
-            cmpwi	r4,chud_ppc_sprg2
-            beq		mfspr64_sprg2  
-            cmpwi	r4,chud_ppc_sprg3
-            beq		mfspr64_sprg3
-            cmpwi	r4,chud_ppc64_asr
-            beq		mfspr64_asr
-            cmpwi	r4,chud_ppc_dabr
-            beq		mfspr64_dabr
-            
-            ;; GPUL specific 64-bit wide SPRs
-            cmpwi	r4,chud_970_hid0
-            beq		mfspr64_hid0
-            cmpwi	r4,chud_970_hid1
-            beq		mfspr64_hid1
-            cmpwi	r4,chud_970_hid4
-            beq		mfspr64_hid4
-            cmpwi	r4,chud_970_hid5
-            beq		mfspr64_hid5       
-            cmpwi	r4,chud_970_mmcr0
-            beq		mfspr64_mmcr0            
-            cmpwi	r4,chud_970_mmcr1
-            beq		mfspr64_mmcr1
-            cmpwi	r4,chud_970_mmcra
-            beq		mfspr64_mmcra
-            cmpwi	r4,chud_970_siar
-            beq		mfspr64_siar
-            cmpwi	r4,chud_970_sdar
-            beq		mfspr64_sdar
-            cmpwi	r4,chud_970_imc
-            beq		mfspr64_imc
-            cmpwi	r4,chud_970_rmor
-            beq		mfspr64_rmor
-            cmpwi	r4,chud_970_hrmor
-            beq		mfspr64_hrmor
-            cmpwi	r4,chud_970_hior
-            beq		mfspr64_hior
-            cmpwi	r4,chud_970_lpidr
-            beq		mfspr64_lpidr
-            cmpwi	r4,chud_970_lpcr
-            beq		mfspr64_lpcr
-            cmpwi	r4,chud_970_dabrx
-            beq		mfspr64_dabrx
-            cmpwi	r4,chud_970_hsprg0
-            beq		mfspr64_hsprg0
-            cmpwi	r4,chud_970_hsprg1
-            beq		mfspr64_hsprg1
-            cmpwi	r4,chud_970_hsrr0
-            beq		mfspr64_hsrr0
-            cmpwi	r4,chud_970_hsrr1
-            beq		mfspr64_hsrr1
-            cmpwi	r4,chud_970_hdec
-            beq		mfspr64_hdec
-            cmpwi	r4,chud_970_trig0
-            beq		mfspr64_trig0
-            cmpwi	r4,chud_970_trig1
-            beq		mfspr64_trig1
-            cmpwi	r4,chud_970_trig2
-            beq		mfspr64_trig2
-            cmpwi	r4,chud_ppc64_accr
-            beq		mfspr64_accr
-            cmpwi	r4,chud_970_scomc
-            beq		mfspr64_scomc
-            cmpwi	r4,chud_970_scomd
-            beq		mfspr64_scomd
-                                                                                                                                                            
-            b		mfspr64_failure
-            
-mfspr64_srr0:
-            mfspr	r5,chud_ppc_srr0
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_srr1:
-            mfspr	r5,chud_ppc_srr1
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_dar:
-            mfspr	r5,chud_ppc_dar
-            std		r5,0(r3)
-            b		mfspr64_success          
-mfspr64_sdr1:
-            mfspr	r5,chud_ppc_sdr1
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_sprg0:
-            mfspr	r5,chud_ppc_sprg0
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_sprg1:
-            mfspr	r5,chud_ppc_sprg1
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_sprg2:
-            mfspr	r5,chud_ppc_sprg2
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_sprg3:
-            mfspr	r5,chud_ppc_sprg3
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_asr:
-            mfspr	r5,chud_ppc64_asr
-            std		r5,0(r3)
-            b		mfspr64_success             
-mfspr64_dabr:
-            mfspr	r5,chud_ppc_dabr
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_hid0:
-            mfspr	r5,chud_970_hid0
-            std		r5,0(r3)
-            b		mfspr64_success            
-mfspr64_hid1:
-            mfspr	r5,chud_970_hid1
-            std		r5,0(r3)
-            b		mfspr64_success     
-mfspr64_hid4:
-            mfspr	r5,chud_970_hid4
-            std		r5,0(r3)
-            b		mfspr64_success             
-mfspr64_hid5:
-            mfspr	r5,chud_970_hid5
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_mmcr0:
-            mfspr	r5,chud_970_mmcr0
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_mmcr1:
-            mfspr	r5,chud_970_mmcr1
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_mmcra:
-            mfspr	r5,chud_970_mmcra
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_siar:
-            mfspr	r5,chud_970_siar
-            std		r5,0(r3)
-            b		mfspr64_success            
-mfspr64_sdar:
-            mfspr	r5,chud_970_sdar
-            std		r5,0(r3)
-            b		mfspr64_success              
-mfspr64_imc:
-            mfspr	r5,chud_970_imc
-            std		r5,0(r3)
-            b		mfspr64_success                          
-mfspr64_rmor:
-            mfspr	r5,chud_970_rmor
-            std		r5,0(r3)
-            b		mfspr64_success              
-mfspr64_hrmor:
-            mfspr	r5,chud_970_hrmor
-            std		r5,0(r3)
-            b		mfspr64_success  
-mfspr64_hior:
-            mfspr	r5,chud_970_hior
-            std		r5,0(r3)
-            b		mfspr64_success  
-mfspr64_lpidr:
-            mfspr	r5,chud_970_lpidr
-            std		r5,0(r3)
-            b		mfspr64_success   
-mfspr64_lpcr:
-            mfspr	r5,chud_970_lpcr
-            std		r5,0(r3)
-            b		mfspr64_success  
-mfspr64_dabrx:
-            mfspr	r5,chud_970_dabrx
-            std		r5,0(r3)
-            b		mfspr64_success  
-mfspr64_hsprg0:
-            mfspr	r5,chud_970_hsprg0
-            std		r5,0(r3)
-            b		mfspr64_success   
-mfspr64_hsprg1:
-            mfspr	r5,chud_970_hsprg1
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_hsrr0:
-            mfspr	r5,chud_970_hsrr0
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_hsrr1:
-            mfspr	r5,chud_970_hsrr1
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_hdec:
-            mfspr	r5,chud_970_hdec
-            std		r5,0(r3)
-            b		mfspr64_success             
-mfspr64_trig0:
-            mfspr	r5,chud_970_trig0
-            std		r5,0(r3)
-            b		mfspr64_success 
-mfspr64_trig1:
-            mfspr	r5,chud_970_trig1
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_trig2:
-            mfspr	r5,chud_970_trig2
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_accr:
-            mfspr	r5,chud_ppc64_accr
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_scomc:
-            mfspr	r5,chud_970_scomc
-            std		r5,0(r3)
-            b		mfspr64_success
-mfspr64_scomd:
-            mfspr	r5,chud_970_scomd
-            std		r5,0(r3)
-            b		mfspr64_success
-            
-mfspr64_failure:
-            li		r3,KERN_FAILURE
-            blr
-            
-mfspr64_success:
-            li		r3,KERN_SUCCESS
-            blr
-
-
-/*
- * kern_return_t mtspr64(int spr, uint64_t *val);
- * 
- * r3: spr to write to
- * r4: address to get value from
- *
- */
- 
-;           Force a line boundry here
-            .align  5
-            .globl  EXT(mtspr64)
-
-EXT(mtspr64):
-            ;; generic PPC 64-bit wide SPRs
-            cmpwi	r3,chud_ppc_srr0
-            beq		mtspr64_srr0
-            cmpwi	r3,chud_ppc_srr1
-            beq		mtspr64_srr1
-            cmpwi	r3,chud_ppc_dar
-            beq		mtspr64_dar
-            cmpwi	r3,chud_ppc_sdr1
-            beq		mtspr64_sdr1
-            cmpwi	r3,chud_ppc_sprg0
-            beq		mtspr64_sprg0       
-            cmpwi	r3,chud_ppc_sprg1
-            beq		mtspr64_sprg1  
-            cmpwi	r3,chud_ppc_sprg2
-            beq		mtspr64_sprg2  
-            cmpwi	r3,chud_ppc_sprg3
-            beq		mtspr64_sprg3
-            cmpwi	r3,chud_ppc64_asr
-            beq		mtspr64_asr
-            cmpwi	r3,chud_ppc_dabr
-            beq		mtspr64_dabr
-            
-            ;; GPUL specific 64-bit wide SPRs
-            cmpwi	r3,chud_970_hid0
-            beq		mtspr64_hid0
-            cmpwi	r3,chud_970_hid1
-            beq		mtspr64_hid1
-            cmpwi	r3,chud_970_hid4
-            beq		mtspr64_hid4
-            cmpwi	r3,chud_970_hid5
-            beq		mtspr64_hid5       
-            cmpwi	r3,chud_970_mmcr0
-            beq		mtspr64_mmcr0            
-            cmpwi	r3,chud_970_mmcr1
-            beq		mtspr64_mmcr1
-            cmpwi	r3,chud_970_mmcra
-            beq		mtspr64_mmcra
-            cmpwi	r3,chud_970_siar
-            beq		mtspr64_siar
-            cmpwi	r3,chud_970_sdar
-            beq		mtspr64_sdar
-            cmpwi	r3,chud_970_imc
-            beq		mtspr64_imc
-            cmpwi	r3,chud_970_rmor
-            beq		mtspr64_rmor
-            cmpwi	r3,chud_970_hrmor
-            beq		mtspr64_hrmor
-            cmpwi	r3,chud_970_hior
-            beq		mtspr64_hior
-            cmpwi	r3,chud_970_lpidr
-            beq		mtspr64_lpidr
-            cmpwi	r3,chud_970_lpcr
-            beq		mtspr64_lpcr
-            cmpwi	r3,chud_970_dabrx
-            beq		mtspr64_dabrx       
-            cmpwi	r3,chud_970_hsprg0
-            beq		mtspr64_hsprg0
-            cmpwi	r3,chud_970_hsprg1
-            beq		mtspr64_hsprg1
-            cmpwi	r3,chud_970_hsrr0
-            beq		mtspr64_hsrr0
-            cmpwi	r3,chud_970_hsrr1
-            beq		mtspr64_hsrr1
-            cmpwi	r3,chud_970_hdec
-            beq		mtspr64_hdec
-            cmpwi	r3,chud_970_trig0
-            beq		mtspr64_trig0
-            cmpwi	r3,chud_970_trig1
-            beq		mtspr64_trig1
-            cmpwi	r3,chud_970_trig2
-            beq		mtspr64_trig2
-            cmpwi	r3,chud_ppc64_accr
-            beq		mtspr64_accr
-            cmpwi	r3,chud_970_scomc
-            beq		mtspr64_scomc
-            cmpwi	r3,chud_970_scomd
-            beq		mtspr64_scomd
-                                                                                                                                                            
-            b		mtspr64_failure
-            
-mtspr64_srr0:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_srr0,r5
-            b		mtspr64_success
-mtspr64_srr1:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_srr1,r5
-            b		mtspr64_success
-mtspr64_dar:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_dar,r5
-            b		mtspr64_success          
-mtspr64_sdr1:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_sdr1,r5
-            b		mtspr64_success 
-mtspr64_sprg0:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_sprg0,r5
-            b		mtspr64_success
-mtspr64_sprg1:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_sprg1,r5
-            b		mtspr64_success 
-mtspr64_sprg2:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_sprg2,r5
-            b		mtspr64_success 
-mtspr64_sprg3:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_sprg3,r5
-            b		mtspr64_success 
-mtspr64_asr:
-            ld		r5,0(r4)
-            mtspr	chud_ppc64_asr,r5
-            b		mtspr64_success             
-mtspr64_dabr:
-            ld		r5,0(r4)
-            mtspr	chud_ppc_dabr,r5
-            b		mtspr64_success 
-mtspr64_hid0:
-            ld		r5,0(r4)
-            sync
-            mtspr	chud_970_hid0,r5
-            mfspr	r5,chud_970_hid0	/* syncronization requirements */
-            mfspr	r5,chud_970_hid0
-            mfspr	r5,chud_970_hid0
-            mfspr	r5,chud_970_hid0
-            mfspr	r5,chud_970_hid0
-            mfspr	r5,chud_970_hid0
-            b		mtspr64_success            
-mtspr64_hid1:
-            ld		r5,0(r4)
-            mtspr	chud_970_hid1,r5	/* tell you twice */
-            mtspr	chud_970_hid1,r5
-            isync
-            b		mtspr64_success     
-mtspr64_hid4:
-            ld		r5,0(r4)
-            sync				/* syncronization requirements */
-            mtspr	chud_970_hid4,r5
-            isync
-            b		mtspr64_success             
-mtspr64_hid5:
-            ld		r5,0(r4)
-            mtspr	chud_970_hid5,r5
-            b		mtspr64_success
-mtspr64_mmcr0:
-            ld		r5,0(r4)
-            mtspr	chud_970_mmcr0,r5
-            b		mtspr64_success
-mtspr64_mmcr1:
-            ld		r5,0(r4)
-            mtspr	chud_970_mmcr1,r5
-            b		mtspr64_success
-mtspr64_mmcra:
-            ld		r5,0(r4)
-            mtspr	chud_970_mmcra,r5
-            b		mtspr64_success
-mtspr64_siar:
-            ld		r5,0(r4)
-            mtspr	chud_970_siar,r5
-            b		mtspr64_success            
-mtspr64_sdar:
-            ld		r5,0(r4)
-            mtspr	chud_970_sdar,r5
-            b		mtspr64_success              
-mtspr64_imc:
-            ld		r5,0(r4)
-            mtspr	chud_970_imc,r5
-            b		mtspr64_success                          
-mtspr64_rmor:
-            ld		r5,0(r4)
-            mtspr	chud_970_rmor,r5
-            b		mtspr64_success              
-mtspr64_hrmor:
-            ld		r5,0(r4)
-            mtspr	chud_970_hrmor,r5
-            b		mtspr64_success  
-mtspr64_hior:
-            ld		r5,0(r4)
-            mtspr	chud_970_hior,r5
-            b		mtspr64_success  
-mtspr64_lpidr:
-            ld		r5,0(r4)
-            mtspr	chud_970_lpidr,r5
-            b		mtspr64_success   
-mtspr64_lpcr:
-            ld		r5,0(r4)
-            mtspr	chud_970_lpcr,r5
-            b		mtspr64_success    
-mtspr64_dabrx:
-            ld		r5,0(r4)
-            mtspr	chud_970_dabrx,r5
-            b		mtspr64_success    
-mtspr64_hsprg0:
-            ld		r5,0(r4)
-            mtspr	chud_970_hsprg0,r5
-            b		mtspr64_success   
-mtspr64_hsprg1:
-            ld		r5,0(r4)
-            mtspr	chud_970_hsprg1,r5
-            b		mtspr64_success 
-mtspr64_hsrr0:
-            ld		r5,0(r4)
-            mtspr	chud_970_hsrr0,r5
-            b		mtspr64_success 
-mtspr64_hsrr1:
-            ld		r5,0(r4)
-            mtspr	chud_970_hsrr1,r5
-            b		mtspr64_success 
-mtspr64_hdec:
-            ld		r5,0(r4)
-            mtspr	chud_970_hdec,r5
-            b		mtspr64_success             
-mtspr64_trig0:
-            ld		r5,0(r4)
-            mtspr	chud_970_trig0,r5
-            b		mtspr64_success 
-mtspr64_trig1:
-            ld		r5,0(r4)
-            mtspr	chud_970_trig1,r5
-            b		mtspr64_success
-mtspr64_trig2:
-            ld		r5,0(r4)
-            mtspr	chud_970_trig2,r5
-            b		mtspr64_success
-mtspr64_accr:
-            ld		r5,0(r4)
-            mtspr	chud_ppc64_accr,r5
-            b		mtspr64_success
-mtspr64_scomc:
-            ld		r5,0(r4)
-            mtspr	chud_970_scomc,r5
-            b		mtspr64_success
-mtspr64_scomd:
-            ld		r5,0(r4)
-            mtspr	chud_970_scomd,r5
-            b		mtspr64_success
-            
-mtspr64_failure:
-            li		r3,KERN_FAILURE
-            blr
-            
-mtspr64_success:
-            li		r3,KERN_SUCCESS
-            blr
-
-
-/*
- * kern_return_t mfmsr64(uint64_t *val);
- * 
- * r3: address to store value in
- *
- */
- 
-;           Force a line boundry here
-            .align  5
-            .globl  EXT(mfmsr64)
-
-EXT(mfmsr64):            
-            mfmsr	r5
-            std		r5,0(r3)
-mfmsr64_success:
-            li		r3,KERN_SUCCESS
-            blr
-
-mfmsr64_failure:
-            li		r3,KERN_FAILURE
-            blr
-
-
-/*
- * kern_return_t mtmsr64(uint64_t *val);
- * 
- * r3: address to load value from
- *
- */
- 
-;           Force a line boundry here
-            .align  5
-            .globl  EXT(mtmsr64)
-
-EXT(mtmsr64):            
-            ld		r5,0(r3)
-            mtmsrd	r5
-            b		mtmsr64_success
-            
-mtmsr64_success:
-            li		r3,KERN_SUCCESS
-            blr
-
-mtmsr64_failure:
-            li		r3,KERN_FAILURE
-            blr
-
-.L_end:
diff --git a/osfmk/chud/ppc/chud_cpu_ppc.c b/osfmk/chud/ppc/chud_cpu_ppc.c
deleted file mode 100644
index 60f279c3f..000000000
--- a/osfmk/chud/ppc/chud_cpu_ppc.c
+++ /dev/null
@@ -1,1182 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <mach/mach_types.h>
-#include <mach/mach_host.h>
-
-#include <kern/host.h>
-#include <kern/processor.h>
-
-#include <chud/chud_xnu.h>
-#include <chud/ppc/chud_spr.h>
-#include <chud/ppc/chud_cpu_asm.h>
-#include <ppc/machine_routines.h>
-#include <ppc/exception.h>
-#include <ppc/hw_perfmon.h>
-#include <ppc/Diagnostics.h>
-
-// the macros in proc_reg.h fail with "expression must be absolute"
-
-#undef mtsprg
-#undef mfsprg
-#define mtsprg(n, reg)  __asm__ volatile("mtsprg  " # n ", %0" : : "r" (reg))
-#define mfsprg(reg, n)  __asm__ volatile("mfsprg  %0, " # n : "=r" (reg))
-
-#undef mtspr
-#undef mfspr
-#define mtspr(spr, reg)   __asm__ volatile ("mtspr %0, %1" : : "n" (spr), "r" (reg))
-#define mfspr(reg, spr)  __asm__ volatile("mfspr %0, %1" : "=r" (reg) : "n" (spr));
-     
-#undef mtsr
-#undef mfsr
-#define mtsr(sr, reg) __asm__ volatile("sync" "@" "mtsr sr%0, %1 " "@" "isync" : : "i" (sr), "r" (reg)); 
-#define mfsr(reg, sr) __asm__ volatile("mfsr %0, sr%1" : "=r" (reg) : "i" (sr));
-
-#if 0
-#pragma mark **** cpu enable/disable ****
-#endif
-
-extern kern_return_t processor_start(processor_t     processor); // osfmk/kern/processor.c
-extern kern_return_t processor_exit(processor_t     processor); // osfmk/kern/processor.c
-
-__private_extern__
-kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable)
-{
-    chudxnu_unbind_thread(current_thread(), 0);
-
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    if((PerProcTable[cpu].ppe_vaddr != (struct per_proc_info *)NULL)
-        && cpu != master_cpu) {
-		processor_t		processor = cpu_to_processor(cpu);
-
-        if(enable) {
-            return processor_start(processor);
-        } else {
-            return processor_exit(processor);
-        }
-    }
-    return KERN_FAILURE;
-}
-
-#if 0
-#pragma mark **** nap ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable)
-{
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    if(PerProcTable[cpu].ppe_vaddr != (struct per_proc_info *)NULL) {
-        ml_enable_nap(cpu, enable);
-        return KERN_SUCCESS;
-    }
-
-    return KERN_FAILURE;
-}
-
-__private_extern__
-boolean_t chudxnu_cpu_nap_enabled(int cpu)
-{
-    boolean_t prev;
-
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        cpu = 0;
-    }
-
-    prev = ml_enable_nap(cpu, TRUE);
-    ml_enable_nap(cpu, prev);
-
-    return prev;
-}
-
-#if 0
-#pragma mark **** shadowed spr ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val)
-{
-    cpu_subtype_t target_cpu_subtype;
-    uint32_t available;
-    kern_return_t retval = KERN_FAILURE;
-    struct per_proc_info *per_proc;
-    boolean_t didBind = FALSE;
-
-    if(cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    if(cpu<0) { // cpu<0 means don't bind (current cpu)
-		cpu = chudxnu_cpu_number();
-		didBind = FALSE;
-    } else {
-    chudxnu_bind_thread(current_thread(), cpu, 0);
-		didBind = TRUE;
-    }
-
-    per_proc = PerProcTable[cpu].ppe_vaddr;
-    available = per_proc->pf.Available;
-    target_cpu_subtype = per_proc->cpu_subtype;
-
-    if(spr==chud_750_l2cr) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_750:
-        case CPU_SUBTYPE_POWERPC_7400:
-        case CPU_SUBTYPE_POWERPC_7450:
-            if(available & pfL2) {
-//               int enable = (val & 0x80000000) ? TRUE : FALSE;
-//               if(enable) {
-//                 per_proc->pf.l2cr = val;
-//              } else {
-//                 per_proc->pf.l2cr = 0;
-//              }
-                per_proc->pf.l2cr = val;
-                cacheInit();
- //             mtspr(l2cr, per_proc->pf.l2cr); // XXXXXXX why is this necessary? XXXXXXX
-                retval = KERN_SUCCESS;
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_7450_l3cr) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_7450:
-            if(available & pfL3) {
-                int enable = (val & 0x80000000) ? TRUE : FALSE;
-                if(enable) {
-                    per_proc->pf.l3cr = val;
-                } else {
-                    per_proc->pf.l3cr = 0;
-                }
-                cacheInit();
-                retval = KERN_SUCCESS;
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_750_hid0) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_750:
-            cacheInit();
-            cacheDisable(); /* disable caches */
-	    mtspr(chud_750_hid0, val);
-            per_proc->pf.pfHID0 = val;
-            cacheInit(); /* reenable caches */
-            retval = KERN_SUCCESS;
-            break;
-        case CPU_SUBTYPE_POWERPC_7400:
-        case CPU_SUBTYPE_POWERPC_7450:
-	    mtspr(chud_750_hid0, val);
-            per_proc->pf.pfHID0 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_750_hid1) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_750:
-        case CPU_SUBTYPE_POWERPC_7400:
-        case CPU_SUBTYPE_POWERPC_7450:
-	    mtspr(chud_750_hid1, val);
-            per_proc->pf.pfHID1 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_750fx_hid2 && target_cpu_subtype==CPU_SUBTYPE_POWERPC_750) {
-	mtspr(chud_750fx_hid2, val);
-        per_proc->pf.pfHID2 = val;
-        retval = KERN_SUCCESS;
-    }
-    else if(spr==chud_7400_msscr0 && (target_cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450)) {
-	mtspr(chud_7400_msscr0, val);
-        per_proc->pf.pfMSSCR0 = val;
-        retval = KERN_SUCCESS;
-    }
-    else if(spr==chud_7400_msscr1 && (target_cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450)) { // called msssr0 on 7450
-	mtspr(chud_7400_msscr1, val);
-        per_proc->pf.pfMSSCR1 = val;
-        retval = KERN_SUCCESS;
-    }
-    else if(spr==chud_7450_ldstcr && target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450) {
-	mtspr(chud_7450_ldstcr, val);
-        per_proc->pf.pfLDSTCR = val;
-        retval = KERN_SUCCESS;
-    }
-    else if(spr==chud_7450_ictrl && target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450) {
-	mtspr(chud_7450_ictrl, val);
-        per_proc->pf.pfICTRL = val;
-        retval = KERN_SUCCESS;
-    } else {
-        retval = KERN_INVALID_ARGUMENT;
-    }
-
-    if(didBind) {
-    chudxnu_unbind_thread(current_thread(), 0);
-    }
-    
-    return retval;
-}
-
-__private_extern__
-kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val)
-{
-    cpu_subtype_t target_cpu_subtype;
-    kern_return_t retval = KERN_FAILURE;
-    struct per_proc_info *per_proc;
-    boolean_t didBind = FALSE;
-
-    if(cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    if(cpu<0) { // cpu<0 means don't bind (current cpu)
-		cpu = chudxnu_cpu_number();
-		didBind = FALSE;
-    } else {
-    chudxnu_bind_thread(current_thread(), cpu, 0);
-		didBind = TRUE;
-    }
-
-    per_proc = PerProcTable[cpu].ppe_vaddr;
-    target_cpu_subtype = per_proc->cpu_subtype;
-
-    if(spr==chud_970_hid0) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_970:
-            mtspr64(chud_970_hid0, &val);
-            per_proc->pf.pfHID0 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_970_hid1) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_970:
-            mtspr64(chud_970_hid1, &val);
-            per_proc->pf.pfHID1 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_970_hid4) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_970:
-            mtspr64(chud_970_hid4, &val);
-            per_proc->pf.pfHID4 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    }
-    else if(spr==chud_970_hid5) {
-        switch(target_cpu_subtype) {
-        case CPU_SUBTYPE_POWERPC_970:
-            mtspr64(chud_970_hid5, &val);
-            per_proc->pf.pfHID5 = val;
-            retval = KERN_SUCCESS;
-            break;
-        default:
-            retval = KERN_INVALID_ARGUMENT;
-            break;
-        }
-    } else {
-        retval = KERN_INVALID_ARGUMENT;
-    }
-
-    if(didBind) {
-    chudxnu_unbind_thread(current_thread(), 0);
-    }
-
-    return retval;
-}
-
-__private_extern__
-uint32_t chudxnu_get_orig_cpu_l2cr(int cpu)
-{
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        cpu = 0;
-    }
-    return PerProcTable[cpu].ppe_vaddr->pf.l2crOriginal;
-}
-
-__private_extern__
-uint32_t chudxnu_get_orig_cpu_l3cr(int cpu)
-{
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        cpu = 0;
-    }
-    return PerProcTable[cpu].ppe_vaddr->pf.l3crOriginal;
-}
-
-#if 0
-#pragma mark **** spr ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p)
-{
-    kern_return_t retval = KERN_SUCCESS;
-    boolean_t oldlevel;
-    uint32_t val = 0xFFFFFFFF;
-
-    /* bind to requested CPU */
-    if(cpu>=0 && !(ml_at_interrupt_context() && cpu_number() == cpu)) { // cpu<0 means don't bind
-		if(chudxnu_bind_thread(current_thread(), cpu, 0)!=KERN_SUCCESS) {
-			return KERN_INVALID_ARGUMENT;
-		}
-    }
-  
-    oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */
-
-    do {
-        /* PPC SPRs - 32-bit and 64-bit implementations */
-        if(spr==chud_ppc_srr0) { mfspr(val, chud_ppc_srr0); break; }
-        if(spr==chud_ppc_srr1) { mfspr(val, chud_ppc_srr1); break; }
-        if(spr==chud_ppc_dsisr) { mfspr(val, chud_ppc_dsisr); break; }
-        if(spr==chud_ppc_dar) { mfspr(val, chud_ppc_dar); break; }
-        if(spr==chud_ppc_dec) { mfspr(val, chud_ppc_dec); break; }
-        if(spr==chud_ppc_sdr1) { mfspr(val, chud_ppc_sdr1); break; }
-        if(spr==chud_ppc_sprg0) { mfspr(val, chud_ppc_sprg0); break; }
-        if(spr==chud_ppc_sprg1) { mfspr(val, chud_ppc_sprg1); break; }
-        if(spr==chud_ppc_sprg2) { mfspr(val, chud_ppc_sprg2); break; }
-        if(spr==chud_ppc_sprg3) { mfspr(val, chud_ppc_sprg3); break; }
-        if(spr==chud_ppc_ear) { mfspr(val, chud_ppc_ear); break; }
-        if(spr==chud_ppc_tbl) { mfspr(val, 268); break; } /* timebase consists of read registers and write registers */
-        if(spr==chud_ppc_tbu) { mfspr(val, 269); break; }
-        if(spr==chud_ppc_pvr) { mfspr(val, chud_ppc_pvr); break; }
-        if(spr==chud_ppc_ibat0u) { mfspr(val, chud_ppc_ibat0u); break; }
-        if(spr==chud_ppc_ibat0l) { mfspr(val, chud_ppc_ibat0l); break; }
-        if(spr==chud_ppc_ibat1u) { mfspr(val, chud_ppc_ibat1u); break; }
-        if(spr==chud_ppc_ibat1l) { mfspr(val, chud_ppc_ibat1l); break; }
-        if(spr==chud_ppc_ibat2u) { mfspr(val, chud_ppc_ibat2u); break; }
-        if(spr==chud_ppc_ibat2l) { mfspr(val, chud_ppc_ibat2l); break; }
-        if(spr==chud_ppc_ibat3u) { mfspr(val, chud_ppc_ibat3u); break; }
-        if(spr==chud_ppc_ibat3l) { mfspr(val, chud_ppc_ibat3l); break; }
-        if(spr==chud_ppc_dbat0u) { mfspr(val, chud_ppc_dbat0u); break; }
-        if(spr==chud_ppc_dbat0l) { mfspr(val, chud_ppc_dbat0l); break; }
-        if(spr==chud_ppc_dbat1u) { mfspr(val, chud_ppc_dbat1u); break; }
-        if(spr==chud_ppc_dbat1l) { mfspr(val, chud_ppc_dbat1l); break; }
-        if(spr==chud_ppc_dbat2u) { mfspr(val, chud_ppc_dbat2u); break; }
-        if(spr==chud_ppc_dbat2l) { mfspr(val, chud_ppc_dbat2l); break; }
-        if(spr==chud_ppc_dbat3u) { mfspr(val, chud_ppc_dbat3u); break; }
-        if(spr==chud_ppc_dbat3l) { mfspr(val, chud_ppc_dbat3l); break; }
-        if(spr==chud_ppc_dabr) { mfspr(val, chud_ppc_dabr); break; }
-        if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */
-            struct ppc_thread_state64 state;
-            mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-            kern_return_t kr;
-            kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */);
-            if(KERN_SUCCESS==kr) {
-                val = state.srr1;
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        }
-        
-        /* PPC SPRs - 32-bit implementations */
-        if(spr==chud_ppc32_sr0) { mfsr(val, 0); break; }
-        if(spr==chud_ppc32_sr1) { mfsr(val, 1); break; }
-        if(spr==chud_ppc32_sr2) { mfsr(val, 2); break; }
-        if(spr==chud_ppc32_sr3) { mfsr(val, 3); break; }
-        if(spr==chud_ppc32_sr4) { mfsr(val, 4); break; }
-        if(spr==chud_ppc32_sr5) { mfsr(val, 5); break; }
-        if(spr==chud_ppc32_sr6) { mfsr(val, 6); break; }
-        if(spr==chud_ppc32_sr7) { mfsr(val, 7); break; }
-        if(spr==chud_ppc32_sr8) { mfsr(val, 8); break; }
-        if(spr==chud_ppc32_sr9) { mfsr(val, 9); break; }
-        if(spr==chud_ppc32_sr10) { mfsr(val, 10); break; }
-        if(spr==chud_ppc32_sr11) { mfsr(val, 11); break; }
-        if(spr==chud_ppc32_sr12) { mfsr(val, 12); break; }
-        if(spr==chud_ppc32_sr13) { mfsr(val, 13); break; }
-        if(spr==chud_ppc32_sr14) { mfsr(val, 14); break; }
-        if(spr==chud_ppc32_sr15) { mfsr(val, 15); break; }
-        
-        /* PPC SPRs - 64-bit implementations */
-        if(spr==chud_ppc64_ctrl) { mfspr(val, chud_ppc64_ctrl); break; }
-        
-        /* Implementation Specific SPRs */
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_750) {
-            if(spr==chud_750_mmcr0) { mfspr(val, chud_750_mmcr0); break; }
-            if(spr==chud_750_pmc1) { mfspr(val, chud_750_pmc1); break; }
-            if(spr==chud_750_pmc2) { mfspr(val, chud_750_pmc2); break; }
-            if(spr==chud_750_sia) { mfspr(val, chud_750_sia); break; }
-            if(spr==chud_750_mmcr1) { mfspr(val, chud_750_mmcr1); break; }
-            if(spr==chud_750_pmc3) { mfspr(val, chud_750_pmc3); break; }
-            if(spr==chud_750_pmc4) { mfspr(val, chud_750_pmc4); break; }
-            if(spr==chud_750_hid0) { mfspr(val, chud_750_hid0); break; }
-            if(spr==chud_750_hid1) { mfspr(val, chud_750_hid1); break; }
-            if(spr==chud_750_iabr) { mfspr(val, chud_750_iabr); break; }
-            if(spr==chud_750_ictc) { mfspr(val, chud_750_ictc); break; }
-            if(spr==chud_750_thrm1) { mfspr(val, chud_750_thrm1); break; }
-            if(spr==chud_750_thrm2) { mfspr(val, chud_750_thrm2); break; }
-            if(spr==chud_750_thrm3) { mfspr(val, chud_750_thrm3); break; }
-            if(spr==chud_750_l2cr) { mfspr(val, chud_750_l2cr); break; }
-
-	    // 750FX only
-            if(spr==chud_750fx_ibat4u) { mfspr(val, chud_750fx_ibat4u); break; }
-            if(spr==chud_750fx_ibat4l) { mfspr(val, chud_750fx_ibat4l); break; }
-            if(spr==chud_750fx_ibat5u) { mfspr(val, chud_750fx_ibat5u); break; }
-            if(spr==chud_750fx_ibat5l) { mfspr(val, chud_750fx_ibat5l); break; }
-            if(spr==chud_750fx_ibat6u) { mfspr(val, chud_750fx_ibat6u); break; }
-            if(spr==chud_750fx_ibat6l) { mfspr(val, chud_750fx_ibat6l); break; }
-            if(spr==chud_750fx_ibat7u) { mfspr(val, chud_750fx_ibat7u); break; }
-            if(spr==chud_750fx_ibat7l) { mfspr(val, chud_750fx_ibat7l); break; }
-            if(spr==chud_750fx_dbat4u) { mfspr(val, chud_750fx_dbat4u); break; }
-            if(spr==chud_750fx_dbat4l) { mfspr(val, chud_750fx_dbat4l); break; }
-            if(spr==chud_750fx_dbat5u) { mfspr(val, chud_750fx_dbat5u); break; }
-            if(spr==chud_750fx_dbat5l) { mfspr(val, chud_750fx_dbat5l); break; }
-            if(spr==chud_750fx_dbat6u) { mfspr(val, chud_750fx_dbat6u); break; }
-            if(spr==chud_750fx_dbat6l) { mfspr(val, chud_750fx_dbat6l); break; }
-            if(spr==chud_750fx_dbat7u) { mfspr(val, chud_750fx_dbat7u); break; }
-            if(spr==chud_750fx_dbat7l) { mfspr(val, chud_750fx_dbat7l); break; }
-
-	    // 750FX >= DDR2.x only
-	    if(spr==chud_750fx_hid2) { mfspr(val, chud_750fx_hid2); break; }
-        }
-        
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7400) {
-            if(spr==chud_7400_mmcr2) { mfspr(val, chud_7400_mmcr2); break; }
-            if(spr==chud_7400_bamr) { mfspr(val, chud_7400_bamr); break; }
-            if(spr==chud_7400_mmcr0) { mfspr(val, chud_7400_mmcr0); break; }
-            if(spr==chud_7400_pmc1) { mfspr(val, chud_7400_pmc1); break; }
-            if(spr==chud_7400_pmc2) { mfspr(val, chud_7400_pmc2); break; }
-            if(spr==chud_7400_siar) { mfspr(val, chud_7400_siar); break; }
-            if(spr==chud_7400_mmcr1) { mfspr(val, chud_7400_mmcr1); break; }
-            if(spr==chud_7400_pmc3) { mfspr(val, chud_7400_pmc3); break; }
-            if(spr==chud_7400_pmc4) { mfspr(val, chud_7400_pmc4); break; }
-            if(spr==chud_7400_hid0) { mfspr(val, chud_7400_hid0); break; }
-            if(spr==chud_7400_hid1) { mfspr(val, chud_7400_hid1); break; }
-            if(spr==chud_7400_iabr) { mfspr(val, chud_7400_iabr); break; }
-            if(spr==chud_7400_msscr0) { mfspr(val, chud_7400_msscr0); break; }
-            if(spr==chud_7400_msscr1) { mfspr(val, chud_7400_msscr1); break; } /* private */
-            if(spr==chud_7400_ictc) { mfspr(val, chud_7400_ictc); break; }
-            if(spr==chud_7400_thrm1) { mfspr(val, chud_7400_thrm1); break; }
-            if(spr==chud_7400_thrm2) { mfspr(val, chud_7400_thrm2); break; }
-            if(spr==chud_7400_thrm3) { mfspr(val, chud_7400_thrm3); break; }
-            if(spr==chud_7400_pir) { mfspr(val, chud_7400_pir); break; }
-            if(spr==chud_7400_l2cr) { mfspr(val, chud_7400_l2cr); break; }
-	    
-	    // 7410 only
-            if(spr==chud_7410_l2pmcr) { mfspr(val, chud_7410_l2pmcr); break; }
-        }
-
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7450) {
-            if(spr==chud_7450_mmcr2) { mfspr(val, chud_7450_mmcr2); break; }
-            if(spr==chud_7450_pmc5) { mfspr(val, chud_7450_pmc5); break; }
-            if(spr==chud_7450_pmc6) { mfspr(val, chud_7450_pmc6); break; }
-            if(spr==chud_7450_bamr) { mfspr(val, chud_7450_bamr); break; }
-            if(spr==chud_7450_mmcr0) { mfspr(val, chud_7450_mmcr0); break; }
-            if(spr==chud_7450_pmc1) { mfspr(val, chud_7450_pmc1); break; }
-            if(spr==chud_7450_pmc2) { mfspr(val, chud_7450_pmc2); break; }
-            if(spr==chud_7450_siar) { mfspr(val, chud_7450_siar); break; }
-            if(spr==chud_7450_mmcr1) { mfspr(val, chud_7450_mmcr1); break; }
-            if(spr==chud_7450_pmc3) { mfspr(val, chud_7450_pmc3); break; }
-            if(spr==chud_7450_pmc4) { mfspr(val, chud_7450_pmc4); break; }
-            if(spr==chud_7450_tlbmiss) { mfspr(val, chud_7450_tlbmiss); break; }
-            if(spr==chud_7450_ptehi) { mfspr(val, chud_7450_ptehi); break; }
-            if(spr==chud_7450_ptelo) { mfspr(val, chud_7450_ptelo); break; }
-            if(spr==chud_7450_l3pm) { mfspr(val, chud_7450_l3pm); break; }
-            if(spr==chud_7450_hid0) { mfspr(val, chud_7450_hid0); break; }
-            if(spr==chud_7450_hid1) { mfspr(val, chud_7450_hid1); break; }
-            if(spr==chud_7450_iabr) { mfspr(val, chud_7450_iabr); break; }
-            if(spr==chud_7450_ldstdb) { mfspr(val, chud_7450_ldstdb); break; }
-            if(spr==chud_7450_msscr0) { mfspr(val, chud_7450_msscr0); break; }
-            if(spr==chud_7450_msssr0) { mfspr(val, chud_7450_msssr0); break; }
-            if(spr==chud_7450_ldstcr) { mfspr(val, chud_7450_ldstcr); break; }
-            if(spr==chud_7450_ictc) { mfspr(val, chud_7450_ictc); break; }
-            if(spr==chud_7450_ictrl) { mfspr(val, chud_7450_ictrl); break; }
-            if(spr==chud_7450_thrm1) { mfspr(val, chud_7450_thrm1); break; }
-            if(spr==chud_7450_thrm2) { mfspr(val, chud_7450_thrm2); break; }
-            if(spr==chud_7450_thrm3) { mfspr(val, chud_7450_thrm3); break; }
-            if(spr==chud_7450_pir) { mfspr(val, chud_7450_pir); break; }
-            if(spr==chud_7450_l2cr) { mfspr(val, chud_7450_l2cr); break; }
-            if(spr==chud_7450_l3cr) { mfspr(val, chud_7450_l3cr); break; }
-	    
-	    // 7455/7457 only
-            if(spr==chud_7455_sprg4) { mfspr(val, chud_7455_sprg4); break; }
-            if(spr==chud_7455_sprg5) { mfspr(val, chud_7455_sprg5); break; }
-            if(spr==chud_7455_sprg6) { mfspr(val, chud_7455_sprg6); break; }
-            if(spr==chud_7455_sprg7) { mfspr(val, chud_7455_sprg7); break; }
-            if(spr==chud_7455_ibat4u) { mfspr(val, chud_7455_ibat4u); break; }
-            if(spr==chud_7455_ibat4l) { mfspr(val, chud_7455_ibat4l); break; }
-            if(spr==chud_7455_ibat5u) { mfspr(val, chud_7455_ibat5u); break; }
-            if(spr==chud_7455_ibat5l) { mfspr(val, chud_7455_ibat5l); break; }
-            if(spr==chud_7455_ibat6u) { mfspr(val, chud_7455_ibat6u); break; }
-            if(spr==chud_7455_ibat6l) { mfspr(val, chud_7455_ibat6l); break; }
-            if(spr==chud_7455_ibat7u) { mfspr(val, chud_7455_ibat7u); break; }
-            if(spr==chud_7455_ibat7l) { mfspr(val, chud_7455_ibat7l); break; }
-            if(spr==chud_7455_dbat4u) { mfspr(val, chud_7455_dbat4u); break; }
-            if(spr==chud_7455_dbat4l) { mfspr(val, chud_7455_dbat4l); break; }
-            if(spr==chud_7455_dbat5u) { mfspr(val, chud_7455_dbat5u); break; }
-            if(spr==chud_7455_dbat5l) { mfspr(val, chud_7455_dbat5l); break; }
-            if(spr==chud_7455_dbat6u) { mfspr(val, chud_7455_dbat6u); break; }
-            if(spr==chud_7455_dbat6l) { mfspr(val, chud_7455_dbat6l); break; }
-            if(spr==chud_7455_dbat7u) { mfspr(val, chud_7455_dbat7u); break; }
-            if(spr==chud_7455_dbat7l) { mfspr(val, chud_7455_dbat7l); break; }
-        }
-        
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) {
-	    if(spr==chud_970_pir) { mfspr(val, chud_970_pir); break; }
-	    if(spr==chud_970_pmc1) { mfspr(val, chud_970_pmc1); break; }
-	    if(spr==chud_970_pmc2) { mfspr(val, chud_970_pmc2); break; }
-	    if(spr==chud_970_pmc3) { mfspr(val, chud_970_pmc3); break; }
-	    if(spr==chud_970_pmc4) { mfspr(val, chud_970_pmc4); break; }
-	    if(spr==chud_970_pmc5) { mfspr(val, chud_970_pmc5); break; }
-	    if(spr==chud_970_pmc6) { mfspr(val, chud_970_pmc6); break; }
-	    if(spr==chud_970_pmc7) { mfspr(val, chud_970_pmc7); break; }
-	    if(spr==chud_970_pmc8) { mfspr(val, chud_970_pmc8); break; }
-	    if(spr==chud_970_hdec) { mfspr(val, chud_970_hdec); break; }
-        }
-
-        /* we only get here if none of the above cases qualify */
-        retval = KERN_INVALID_ARGUMENT;
-    } while(0);
-    
-    chudxnu_set_interrupts_enabled(oldlevel); /* enable interrupts */
-
-    if(cpu>=0) { // cpu<0 means don't bind
-		chudxnu_unbind_thread(current_thread(), 0);
-    }
-
-    *val_p = val;
-
-    return retval;
-}
-
-__private_extern__
-kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p)
-{
-    kern_return_t retval = KERN_SUCCESS;
-    boolean_t oldlevel;
-
-    /* bind to requested CPU */
-    if(cpu>=0 && !(ml_at_interrupt_context() && cpu_number() == cpu)) { // cpu<0 means don't bind
-		if(chudxnu_bind_thread(current_thread(), cpu, 0)!=KERN_SUCCESS) {
-			return KERN_INVALID_ARGUMENT;
-		}
-    }
-  
-    oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */
-
-    do {
-        /* PPC SPRs - 32-bit and 64-bit implementations */
-        if(spr==chud_ppc_srr0) { retval = mfspr64(val_p, chud_ppc_srr0); break; }
-        if(spr==chud_ppc_srr1) { retval = mfspr64(val_p, chud_ppc_srr1); break; }
-        if(spr==chud_ppc_dar) { retval = mfspr64(val_p, chud_ppc_dar); break; }
-        if(spr==chud_ppc_dsisr) { retval = mfspr64(val_p, chud_ppc_dsisr); break; }
-        if(spr==chud_ppc_sdr1) { retval = mfspr64(val_p, chud_ppc_sdr1); break; }
-        if(spr==chud_ppc_sprg0) { retval = mfspr64(val_p, chud_ppc_sprg0); break; }
-        if(spr==chud_ppc_sprg1) { retval = mfspr64(val_p, chud_ppc_sprg1); break; }
-        if(spr==chud_ppc_sprg2) { retval = mfspr64(val_p, chud_ppc_sprg2); break; }
-        if(spr==chud_ppc_sprg3) { retval = mfspr64(val_p, chud_ppc_sprg3); break; }
-        if(spr==chud_ppc_dabr) { retval = mfspr64(val_p, chud_ppc_dabr); break; }
-        if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */
-            struct ppc_thread_state64 state;
-            mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-            kern_return_t kr;
-            kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */);
-            if(KERN_SUCCESS==kr) {
-                *val_p = state.srr1;
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        }
-        
-        /* PPC SPRs - 64-bit implementations */
-        if(spr==chud_ppc64_asr) { retval = mfspr64(val_p, chud_ppc64_asr); break; }
-        if(spr==chud_ppc64_accr) { retval = mfspr64(val_p, chud_ppc64_accr); break; }        
-        
-        /* Implementation Specific SPRs */
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) {
-            if(spr==chud_970_hid0) { retval = mfspr64(val_p, chud_970_hid0); break; }
-            if(spr==chud_970_hid1) { retval = mfspr64(val_p, chud_970_hid1); break; }
-            if(spr==chud_970_hid4) { retval = mfspr64(val_p, chud_970_hid4); break; }
-            if(spr==chud_970_hid5) { retval = mfspr64(val_p, chud_970_hid5); break; }
-            if(spr==chud_970_mmcr0) { retval = mfspr64(val_p, chud_970_mmcr0); break; }
-            if(spr==chud_970_mmcr1) { retval = mfspr64(val_p, chud_970_mmcr1); break; }
-            if(spr==chud_970_mmcra) { retval = mfspr64(val_p, chud_970_mmcra); break; }
-            if(spr==chud_970_siar) { retval = mfspr64(val_p, chud_970_siar); break; }
-            if(spr==chud_970_sdar) { retval = mfspr64(val_p, chud_970_sdar); break; }
-            if(spr==chud_970_imc) { retval = mfspr64(val_p, chud_970_imc); break; }
-            if(spr==chud_970_rmor) { retval = mfspr64(val_p, chud_970_rmor); break; }
-            if(spr==chud_970_hrmor) { retval = mfspr64(val_p, chud_970_hrmor); break; }
-            if(spr==chud_970_hior) { retval = mfspr64(val_p, chud_970_hior); break; }
-            if(spr==chud_970_lpidr) { retval = mfspr64(val_p, chud_970_lpidr); break; }
-            if(spr==chud_970_lpcr) { retval = mfspr64(val_p, chud_970_lpcr); break; }
-            if(spr==chud_970_dabrx) { retval = mfspr64(val_p, chud_970_dabrx); break; }
-            if(spr==chud_970_hsprg0) { retval = mfspr64(val_p, chud_970_hsprg0); break; }
-            if(spr==chud_970_hsprg1) { retval = mfspr64(val_p, chud_970_hsprg1); break; }
-            if(spr==chud_970_hsrr0) { retval = mfspr64(val_p, chud_970_hsrr0); break; }
-            if(spr==chud_970_hsrr1) { retval = mfspr64(val_p, chud_970_hsrr1); break; }
-            if(spr==chud_970_hdec) { retval = mfspr64(val_p, chud_970_hdec); break; }
-            if(spr==chud_970_trig0) { retval = mfspr64(val_p, chud_970_trig0); break; }
-            if(spr==chud_970_trig1) { retval = mfspr64(val_p, chud_970_trig1); break; }
-            if(spr==chud_970_trig2) { retval = mfspr64(val_p, chud_970_trig2); break; }
-            if(spr==chud_970_scomc) { retval = mfspr64(val_p, chud_970_scomc); break; }
-            if(spr==chud_970_scomd) { retval = mfspr64(val_p, chud_970_scomd); break; }
-        }
-
-        /* we only get here if none of the above cases qualify */
-	*val_p = 0xFFFFFFFFFFFFFFFFLL;
-        retval = KERN_INVALID_ARGUMENT;
-    } while(0);
-    
-    chudxnu_set_interrupts_enabled(oldlevel); /* enable interrupts */
-
-    if(cpu>=0) { // cpu<0 means don't bind
-		chudxnu_unbind_thread(current_thread(), 0);
-    }
-
-    return retval;
-}
-
-__private_extern__
-kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val)
-{
-    kern_return_t retval = KERN_SUCCESS;
-    boolean_t oldlevel;
-
-    /* bind to requested CPU */
-    if(cpu>=0 && !(ml_at_interrupt_context() && cpu_number() == cpu)) { // cpu<0 means don't bind
-		if(chudxnu_bind_thread(current_thread(), cpu, 0)!=KERN_SUCCESS) {
-			return KERN_INVALID_ARGUMENT;
-		}
-    }
-
-    oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */
-
-    do {          
-        /* PPC SPRs - 32-bit and 64-bit implementations */
-        if(spr==chud_ppc_srr0) { mtspr(chud_ppc_srr0, val); break; }
-        if(spr==chud_ppc_srr1) { mtspr(chud_ppc_srr1, val); break; }
-        if(spr==chud_ppc_dsisr) { mtspr(chud_ppc_dsisr, val); break; }
-        if(spr==chud_ppc_dar) { mtspr(chud_ppc_dar, val); break; }
-        if(spr==chud_ppc_dec) { mtspr(chud_ppc_dec, val); break; }
-        if(spr==chud_ppc_sdr1) { mtspr(chud_ppc_sdr1, val); break; }
-        if(spr==chud_ppc_sprg0) { mtspr(chud_ppc_sprg0, val); break; }
-        if(spr==chud_ppc_sprg1) { mtspr(chud_ppc_sprg1, val); break; }
-        if(spr==chud_ppc_sprg2) { mtspr(chud_ppc_sprg2, val); break; }
-        if(spr==chud_ppc_sprg3) { mtspr(chud_ppc_sprg3, val); break; }
-        if(spr==chud_ppc_ear) { mtspr(chud_ppc_ear, val); break; }
-        if(spr==chud_ppc_tbl) { mtspr(284, val); break; } /* timebase consists of read registers and write registers */
-        if(spr==chud_ppc_tbu) { mtspr(285, val); break; }
-        if(spr==chud_ppc_pvr) { mtspr(chud_ppc_pvr, val); break; }
-        if(spr==chud_ppc_ibat0u) { mtspr(chud_ppc_ibat0u, val); break; }
-        if(spr==chud_ppc_ibat0l) { mtspr(chud_ppc_ibat0l, val); break; }
-        if(spr==chud_ppc_ibat1u) { mtspr(chud_ppc_ibat1u, val); break; }
-        if(spr==chud_ppc_ibat1l) { mtspr(chud_ppc_ibat1l, val); break; }
-        if(spr==chud_ppc_ibat2u) { mtspr(chud_ppc_ibat2u, val); break; }
-        if(spr==chud_ppc_ibat2l) { mtspr(chud_ppc_ibat2l, val); break; }
-        if(spr==chud_ppc_ibat3u) { mtspr(chud_ppc_ibat3u, val); break; }
-        if(spr==chud_ppc_ibat3l) { mtspr(chud_ppc_ibat3l, val); break; }
-        if(spr==chud_ppc_dbat0u) { mtspr(chud_ppc_dbat0u, val); break; }
-        if(spr==chud_ppc_dbat0l) { mtspr(chud_ppc_dbat0l, val); break; }
-        if(spr==chud_ppc_dbat1u) { mtspr(chud_ppc_dbat1u, val); break; }
-        if(spr==chud_ppc_dbat1l) { mtspr(chud_ppc_dbat1l, val); break; }
-        if(spr==chud_ppc_dbat2u) { mtspr(chud_ppc_dbat2u, val); break; }
-        if(spr==chud_ppc_dbat2l) { mtspr(chud_ppc_dbat2l, val); break; }
-        if(spr==chud_ppc_dbat3u) { mtspr(chud_ppc_dbat3u, val); break; }
-        if(spr==chud_ppc_dbat3l) { mtspr(chud_ppc_dbat3l, val); break; }
-        if(spr==chud_ppc_dabr) { mtspr(chud_ppc_dabr, val); break; }
-        if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */
-            struct ppc_thread_state64 state;
-            mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-            kern_return_t kr;
-            kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */);
-            if(KERN_SUCCESS==kr) {
-                state.srr1 = val;
-                kr = chudxnu_thread_set_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, count, TRUE /* user only */);
-                if(KERN_SUCCESS!=kr) {
-                    retval = KERN_FAILURE;
-                }
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        }
-        
-        /* PPC SPRs - 32-bit implementations */
-        if(spr==chud_ppc32_sr0) { mtsr(0, val); break; }
-        if(spr==chud_ppc32_sr1) { mtsr(1, val); break; }
-        if(spr==chud_ppc32_sr2) { mtsr(2, val); break; }
-        if(spr==chud_ppc32_sr3) { mtsr(3, val); break; }
-        if(spr==chud_ppc32_sr4) { mtsr(4, val); break; }
-        if(spr==chud_ppc32_sr5) { mtsr(5, val); break; }
-        if(spr==chud_ppc32_sr6) { mtsr(6, val); break; }
-        if(spr==chud_ppc32_sr7) { mtsr(7, val); break; }
-        if(spr==chud_ppc32_sr8) { mtsr(8, val); break; }
-        if(spr==chud_ppc32_sr9) { mtsr(9, val); break; }
-        if(spr==chud_ppc32_sr10) { mtsr(10, val); break; }
-        if(spr==chud_ppc32_sr11) { mtsr(11, val); break; }
-        if(spr==chud_ppc32_sr12) { mtsr(12, val); break; }
-        if(spr==chud_ppc32_sr13) { mtsr(13, val); break; }
-        if(spr==chud_ppc32_sr14) { mtsr(14, val); break; }
-        if(spr==chud_ppc32_sr15) { mtsr(15, val); break; }
-        
-        /* Implementation Specific SPRs */
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_750) {
-            if(spr==chud_750_mmcr0) { mtspr(chud_750_mmcr0, val); break; }
-            if(spr==chud_750_pmc1) { mtspr(chud_750_pmc1, val); break; }
-            if(spr==chud_750_pmc2) { mtspr(chud_750_pmc2, val); break; }
-            if(spr==chud_750_sia) { mtspr(chud_750_sia, val); break; }
-            if(spr==chud_750_mmcr1) { mtspr(chud_750_mmcr1, val); break; }
-            if(spr==chud_750_pmc3) { mtspr(chud_750_pmc3, val); break; }
-            if(spr==chud_750_pmc4) { mtspr(chud_750_pmc4, val); break; }
-            if(spr==chud_750_iabr) { mtspr(chud_750_iabr, val); break; }
-            if(spr==chud_750_ictc) { mtspr(chud_750_ictc, val); break; }
-            if(spr==chud_750_thrm1) { mtspr(chud_750_thrm1, val); break; }
-            if(spr==chud_750_thrm2) { mtspr(chud_750_thrm2, val); break; }
-            if(spr==chud_750_thrm3) { mtspr(chud_750_thrm3, val); break; }
-            if(spr==chud_750_l2cr) { 
-		retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_750_hid0) {
-		retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_750_hid1) {
-		retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-
-	    // 750FX only
-            if(spr==chud_750fx_ibat4u) { mtspr(chud_750fx_ibat4u, val); break; }
-            if(spr==chud_750fx_ibat4l) { mtspr(chud_750fx_ibat4l, val); break; }
-            if(spr==chud_750fx_ibat5u) { mtspr(chud_750fx_ibat5u, val); break; }
-            if(spr==chud_750fx_ibat5l) { mtspr(chud_750fx_ibat5l, val); break; }
-            if(spr==chud_750fx_ibat6u) { mtspr(chud_750fx_ibat6u, val); break; }
-            if(spr==chud_750fx_ibat6l) { mtspr(chud_750fx_ibat6l, val); break; }
-            if(spr==chud_750fx_ibat7u) { mtspr(chud_750fx_ibat7u, val); break; }
-            if(spr==chud_750fx_ibat7l) { mtspr(chud_750fx_ibat7l, val); break; }
-            if(spr==chud_750fx_dbat4u) { mtspr(chud_750fx_dbat4u, val); break; }
-            if(spr==chud_750fx_dbat4l) { mtspr(chud_750fx_dbat4l, val); break; }
-            if(spr==chud_750fx_dbat5u) { mtspr(chud_750fx_dbat5u, val); break; }
-            if(spr==chud_750fx_dbat5l) { mtspr(chud_750fx_dbat5l, val); break; }
-            if(spr==chud_750fx_dbat6u) { mtspr(chud_750fx_dbat6u, val); break; }
-            if(spr==chud_750fx_dbat6l) { mtspr(chud_750fx_dbat6l, val); break; }
-            if(spr==chud_750fx_dbat7u) { mtspr(chud_750fx_dbat7u, val); break; }
-            if(spr==chud_750fx_dbat7l) { mtspr(chud_750fx_dbat7l, val); break; }
-	    
-	    // 750FX >= DDR2.x
-	    if(spr==chud_750fx_hid2) { mtspr(chud_750fx_hid2, val); break; }
-        }
-        
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7400) {
-            if(spr==chud_7400_mmcr2) { mtspr(chud_7400_mmcr2, val); break; }
-            if(spr==chud_7400_bamr) { mtspr(chud_7400_bamr, val); break; }
-            if(spr==chud_7400_mmcr0) { mtspr(chud_7400_mmcr0, val); break; }
-            if(spr==chud_7400_pmc1) { mtspr(chud_7400_pmc1, val); break; }
-            if(spr==chud_7400_pmc2) { mtspr(chud_7400_pmc2, val); break; }
-            if(spr==chud_7400_siar) { mtspr(chud_7400_siar, val); break; }
-            if(spr==chud_7400_mmcr1) { mtspr(chud_7400_mmcr1, val); break; }
-            if(spr==chud_7400_pmc3) { mtspr(chud_7400_pmc3, val); break; }
-            if(spr==chud_7400_pmc4) { mtspr(chud_7400_pmc4, val); break; }
-            if(spr==chud_7400_iabr) { mtspr(chud_7400_iabr, val); break; }
-            if(spr==chud_7400_ictc) { mtspr(chud_7400_ictc, val); break; }
-            if(spr==chud_7400_thrm1) { mtspr(chud_7400_thrm1, val); break; }
-            if(spr==chud_7400_thrm2) { mtspr(chud_7400_thrm2, val); break; }
-            if(spr==chud_7400_thrm3) { mtspr(chud_7400_thrm3, val); break; }
-            if(spr==chud_7400_pir) { mtspr(chud_7400_pir, val); break; }
-            
-            if(spr==chud_7400_l2cr) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7400_hid0) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7400_hid1) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7400_msscr0) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7400_msscr1) { /* private */
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-
-	    // 7410 only
-            if(spr==chud_7410_l2pmcr) { mtspr(chud_7410_l2pmcr, val); break; }
-        }
-
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7450) {
-            if(spr==chud_7450_mmcr2) { mtspr(chud_7450_mmcr2, val); break; }
-            if(spr==chud_7450_pmc5) { mtspr(chud_7450_pmc5, val); break; }
-            if(spr==chud_7450_pmc6) { mtspr(chud_7450_pmc6, val); break; }
-            if(spr==chud_7450_bamr) { mtspr(chud_7450_bamr, val); break; }
-            if(spr==chud_7450_mmcr0) { mtspr(chud_7450_mmcr0, val); break; }
-            if(spr==chud_7450_pmc1) { mtspr(chud_7450_pmc1, val); break; }
-            if(spr==chud_7450_pmc2) { mtspr(chud_7450_pmc2, val); break; }
-            if(spr==chud_7450_siar) { mtspr(chud_7450_siar, val); break; }
-            if(spr==chud_7450_mmcr1) { mtspr(chud_7450_mmcr1, val); break; }
-            if(spr==chud_7450_pmc3) { mtspr(chud_7450_pmc3, val); break; }
-            if(spr==chud_7450_pmc4) { mtspr(chud_7450_pmc4, val); break; }
-            if(spr==chud_7450_tlbmiss) { mtspr(chud_7450_tlbmiss, val); break; }
-            if(spr==chud_7450_ptehi) { mtspr(chud_7450_ptehi, val); break; }
-            if(spr==chud_7450_ptelo) { mtspr(chud_7450_ptelo, val); break; }
-            if(spr==chud_7450_l3pm) { mtspr(chud_7450_l3pm, val); break; }
-            if(spr==chud_7450_iabr) { mtspr(chud_7450_iabr, val); break; }
-            if(spr==chud_7450_ldstdb) { mtspr(chud_7450_ldstdb, val); break; }
-            if(spr==chud_7450_ictc) { mtspr(chud_7450_ictc, val); break; }
-            if(spr==chud_7450_thrm1) { mtspr(chud_7450_thrm1, val); break; }
-            if(spr==chud_7450_thrm2) { mtspr(chud_7450_thrm2, val); break; }
-            if(spr==chud_7450_thrm3) { mtspr(chud_7450_thrm3, val); break; }
-            if(spr==chud_7450_pir) { mtspr(chud_7450_pir, val); break; }
-
-            if(spr==chud_7450_l2cr) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            
-            if(spr==chud_7450_l3cr) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_ldstcr) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_hid0) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_hid1) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_msscr0) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_msssr0) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-            if(spr==chud_7450_ictrl) {
-                retval = chudxnu_set_shadowed_spr(cpu, spr, val);
-		break;
-            }
-
-	    // 7455/7457 only
-            if(spr==chud_7455_sprg4) { mtspr(chud_7455_sprg4, val); break; }
-            if(spr==chud_7455_sprg5) { mtspr(chud_7455_sprg5, val); break; }
-            if(spr==chud_7455_sprg6) { mtspr(chud_7455_sprg6, val); break; }
-            if(spr==chud_7455_sprg7) { mtspr(chud_7455_sprg7, val); break; }
-            if(spr==chud_7455_ibat4u) { mtspr(chud_7455_ibat4u, val); break; }
-            if(spr==chud_7455_ibat4l) { mtspr(chud_7455_ibat4l, val); break; }
-            if(spr==chud_7455_ibat5u) { mtspr(chud_7455_ibat5u, val); break; }
-            if(spr==chud_7455_ibat5l) { mtspr(chud_7455_ibat5l, val); break; }
-            if(spr==chud_7455_ibat6u) { mtspr(chud_7455_ibat6u, val); break; }
-            if(spr==chud_7455_ibat6l) { mtspr(chud_7455_ibat6l, val); break; }
-            if(spr==chud_7455_ibat7u) { mtspr(chud_7455_ibat7u, val); break; }
-            if(spr==chud_7455_ibat7l) { mtspr(chud_7455_ibat7l, val); break; }
-            if(spr==chud_7455_dbat4u) { mtspr(chud_7455_dbat4u, val); break; }
-            if(spr==chud_7455_dbat4l) { mtspr(chud_7455_dbat4l, val); break; }
-            if(spr==chud_7455_dbat5u) { mtspr(chud_7455_dbat5u, val); break; }
-            if(spr==chud_7455_dbat5l) { mtspr(chud_7455_dbat5l, val); break; }
-            if(spr==chud_7455_dbat6u) { mtspr(chud_7455_dbat6u, val); break; }
-            if(spr==chud_7455_dbat6l) { mtspr(chud_7455_dbat6l, val); break; }
-            if(spr==chud_7455_dbat7u) { mtspr(chud_7455_dbat7u, val); break; }
-            if(spr==chud_7455_dbat7l) { mtspr(chud_7455_dbat7l, val); break; }
-        }
-        
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) {
-            if(spr==chud_970_pir) { mtspr(chud_970_pir, val); break; }
-            if(spr==chud_970_pmc1) { mtspr(chud_970_pmc1, val); break; }
-            if(spr==chud_970_pmc2) { mtspr(chud_970_pmc2, val); break; }
-            if(spr==chud_970_pmc3) { mtspr(chud_970_pmc3, val); break; }
-            if(spr==chud_970_pmc4) { mtspr(chud_970_pmc4, val); break; }
-            if(spr==chud_970_pmc5) { mtspr(chud_970_pmc5, val); break; }
-            if(spr==chud_970_pmc6) { mtspr(chud_970_pmc6, val); break; }
-            if(spr==chud_970_pmc7) { mtspr(chud_970_pmc7, val); break; }
-            if(spr==chud_970_pmc8) { mtspr(chud_970_pmc8, val); break; }
-            if(spr==chud_970_hdec) { mtspr(chud_970_hdec, val); break; }
-        }
-        
-        /* we only get here if none of the above cases qualify */
-        retval = KERN_INVALID_ARGUMENT;
-    } while(0);
-
-    chudxnu_set_interrupts_enabled(oldlevel); /* re-enable interrupts */
-	
-    if(cpu>=0) { // cpu<0 means don't bind
-		chudxnu_unbind_thread(current_thread(), 0);
-    }
-  
-    return retval;
-}
-
-__private_extern__
-kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val)
-{
-    kern_return_t retval = KERN_SUCCESS;
-    boolean_t oldlevel;
-    uint64_t *val_p = &val;
-
-    /* bind to requested CPU */
-    if(cpu>=0 && !(ml_at_interrupt_context() && cpu_number() == cpu)) { // cpu<0 means don't bind
-		if(chudxnu_bind_thread(current_thread(), cpu, 0)!=KERN_SUCCESS) {
-			return KERN_INVALID_ARGUMENT;
-		}
-    }
-
-    oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */
-
-    do {
-        /* PPC SPRs - 32-bit and 64-bit implementations */
-        if(spr==chud_ppc_srr0) { retval = mtspr64(chud_ppc_srr0, val_p); break; }
-        if(spr==chud_ppc_srr1) { retval = mtspr64(chud_ppc_srr1, val_p); break; }
-        if(spr==chud_ppc_dar) { retval = mtspr64(chud_ppc_dar, val_p); break; }
-        if(spr==chud_ppc_dsisr) { retval = mtspr64(chud_ppc_dsisr, val_p); break; }
-        if(spr==chud_ppc_sdr1) { retval = mtspr64(chud_ppc_sdr1, val_p); break; }
-        if(spr==chud_ppc_sprg0) { retval = mtspr64(chud_ppc_sprg0, val_p); break; }
-        if(spr==chud_ppc_sprg1) { retval = mtspr64(chud_ppc_sprg1, val_p); break; }
-        if(spr==chud_ppc_sprg2) { retval = mtspr64(chud_ppc_sprg2, val_p); break; }
-        if(spr==chud_ppc_sprg3) { retval = mtspr64(chud_ppc_sprg3, val_p); break; }
-        if(spr==chud_ppc_dabr) { retval = mtspr64(chud_ppc_dabr, val_p); break; }
-        if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */
-            struct ppc_thread_state64 state;
-            mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-            kern_return_t kr;
-            kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */);
-            if(KERN_SUCCESS==kr) {
-                state.srr1 = val;
-                kr = chudxnu_thread_set_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, count, TRUE /* user only */);
-                if(KERN_SUCCESS!=kr) {
-                    retval = KERN_FAILURE;
-                }
-            } else {
-                retval = KERN_FAILURE;
-            }
-            break;
-        }
-        
-        /* PPC SPRs - 64-bit implementations */
-        if(spr==chud_ppc64_asr) { retval = mtspr64(chud_ppc64_asr, val_p); break; }
-        if(spr==chud_ppc64_accr) { retval = mtspr64(chud_ppc64_accr, val_p); break; }
-        if(spr==chud_ppc64_ctrl) { retval = mtspr64(chud_ppc64_ctrl, val_p); break; }
-        
-        /* Implementation Specific SPRs */
-        if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) {
-            if(spr==chud_970_hid0) { retval = mtspr64(chud_970_hid0, val_p); break; }
-            if(spr==chud_970_hid1) { retval = mtspr64(chud_970_hid1, val_p); break; }
-            if(spr==chud_970_hid4) { retval = mtspr64(chud_970_hid4, val_p); break; }
-            if(spr==chud_970_hid5) { retval = mtspr64(chud_970_hid5, val_p); break; }
-            if(spr==chud_970_mmcr0) { retval = mtspr64(chud_970_mmcr0, val_p); break; }
-            if(spr==chud_970_mmcr1) { retval = mtspr64(chud_970_mmcr1, val_p); break; }
-            if(spr==chud_970_mmcra) { retval = mtspr64(chud_970_mmcra, val_p); break; }
-            if(spr==chud_970_siar) { retval = mtspr64(chud_970_siar, val_p); break; }
-            if(spr==chud_970_sdar) { retval = mtspr64(chud_970_sdar, val_p); break; }
-            if(spr==chud_970_imc) { retval = mtspr64(chud_970_imc, val_p); break; }
-
-            if(spr==chud_970_rmor) { retval = mtspr64(chud_970_rmor, val_p); break; }
-            if(spr==chud_970_hrmor) { retval = mtspr64(chud_970_hrmor, val_p); break; }
-            if(spr==chud_970_hior) { retval = mtspr64(chud_970_hior, val_p); break; }
-            if(spr==chud_970_lpidr) { retval = mtspr64(chud_970_lpidr, val_p); break; }
-            if(spr==chud_970_lpcr) { retval = mtspr64(chud_970_lpcr, val_p); break; }
-            if(spr==chud_970_dabrx) { retval = mtspr64(chud_970_dabrx, val_p); break; }
-            
-            if(spr==chud_970_hsprg0) { retval = mtspr64(chud_970_hsprg0, val_p); break; }
-            if(spr==chud_970_hsprg1) { retval = mtspr64(chud_970_hsprg1, val_p); break; }
-            if(spr==chud_970_hsrr0) { retval = mtspr64(chud_970_hsrr0, val_p); break; }
-            if(spr==chud_970_hsrr1) { retval = mtspr64(chud_970_hsrr1, val_p); break; }
-            if(spr==chud_970_hdec) { retval = mtspr64(chud_970_hdec, val_p); break; }
-            if(spr==chud_970_trig0) { retval = mtspr64(chud_970_trig0, val_p); break; }
-            if(spr==chud_970_trig1) { retval = mtspr64(chud_970_trig1, val_p); break; }
-            if(spr==chud_970_trig2) { retval = mtspr64(chud_970_trig2, val_p); break; }
-            if(spr==chud_970_scomc) { retval = mtspr64(chud_970_scomc, val_p); break; }
-            if(spr==chud_970_scomd) { retval = mtspr64(chud_970_scomd, val_p); break; }
-            
-            if(spr==chud_970_hid0) {
-                retval = chudxnu_set_shadowed_spr64(cpu, spr, val);
-                break;
-            }
-
-            if(spr==chud_970_hid1) {
-                retval = chudxnu_set_shadowed_spr64(cpu, spr, val);
-                break;
-            }
-
-            if(spr==chud_970_hid4) {
-                retval = chudxnu_set_shadowed_spr64(cpu, spr, val);
-                break;
-            }
-            
-            if(spr==chud_970_hid5) {
-                retval = chudxnu_set_shadowed_spr64(cpu, spr, val);
-                break;
-            }
-            
-        }
-
-        /* we only get here if none of the above cases qualify */
-        retval = KERN_INVALID_ARGUMENT;
-    } while(0);
-
-    chudxnu_set_interrupts_enabled(oldlevel); /* re-enable interrupts */
-
-    if(cpu>=0) { // cpu<0 means don't bind
-		chudxnu_unbind_thread(current_thread(), 0);
-    }
- 
-    return retval;
-}
-
-#if 0
-#pragma mark **** perfmon facility ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_perfmon_acquire_facility(task_t task)
-{
-    return perfmon_acquire_facility(task);
-}
-
-__private_extern__
-kern_return_t chudxnu_perfmon_release_facility(task_t task)
-{
-    return perfmon_release_facility(task);
-}
-
-#if 0
-#pragma mark **** rupt counters ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_get_cpu_interrupt_counters(int cpu, interrupt_counters_t *rupts)
-{
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    if(rupts) {
-        boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
-        struct per_proc_info *per_proc;
-
-        per_proc = PerProcTable[cpu].ppe_vaddr;
-        rupts->hwResets = per_proc->hwCtr.hwResets;
-        rupts->hwMachineChecks = per_proc->hwCtr.hwMachineChecks;
-        rupts->hwDSIs = per_proc->hwCtr.hwDSIs;
-        rupts->hwISIs = per_proc->hwCtr.hwISIs;
-        rupts->hwExternals = per_proc->hwCtr.hwExternals;
-        rupts->hwAlignments = per_proc->hwCtr.hwAlignments;
-        rupts->hwPrograms = per_proc->hwCtr.hwPrograms;
-        rupts->hwFloatPointUnavailable = per_proc->hwCtr.hwFloatPointUnavailable;
-        rupts->hwDecrementers = per_proc->hwCtr.hwDecrementers;
-        rupts->hwIOErrors = per_proc->hwCtr.hwIOErrors;
-        rupts->hwSystemCalls = per_proc->hwCtr.hwSystemCalls;
-        rupts->hwTraces = per_proc->hwCtr.hwTraces;
-        rupts->hwFloatingPointAssists = per_proc->hwCtr.hwFloatingPointAssists;
-        rupts->hwPerformanceMonitors = per_proc->hwCtr.hwPerformanceMonitors;
-        rupts->hwAltivecs = per_proc->hwCtr.hwAltivecs;
-        rupts->hwInstBreakpoints = per_proc->hwCtr.hwInstBreakpoints;
-        rupts->hwSystemManagements = per_proc->hwCtr.hwSystemManagements;
-        rupts->hwAltivecAssists = per_proc->hwCtr.hwAltivecAssists;
-        rupts->hwThermal = per_proc->hwCtr.hwThermal;
-        rupts->hwSoftPatches = per_proc->hwCtr.hwSoftPatches;
-        rupts->hwMaintenances = per_proc->hwCtr.hwMaintenances;
-        rupts->hwInstrumentations = per_proc->hwCtr.hwInstrumentations;
-
-        ml_set_interrupts_enabled(oldlevel);
-        return KERN_SUCCESS;
-    } else {
-        return KERN_FAILURE;
-    }
-}
-
-__private_extern__
-kern_return_t chudxnu_clear_cpu_interrupt_counters(int cpu)
-{
-    if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument
-        return KERN_FAILURE;
-    }
-
-    bzero((char *)&(PerProcTable[cpu].ppe_vaddr->hwCtr), sizeof(struct hwCtrs));
-    return KERN_SUCCESS;
-}
-
-#if 0
-#pragma mark *** deprecated ***
-#endif
-
-//DEPRECATED
-__private_extern__
-void chudxnu_flush_caches(void)
-{
-	cacheInit();
-}
-
-//DEPRECATED
-__private_extern__
-void chudxnu_enable_caches(boolean_t enable)
-{
-	if(!enable) {
-		cacheInit();
-		cacheDisable();
-	} else {
-		cacheInit();
-	}
-}
diff --git a/osfmk/chud/ppc/chud_osfmk_callback_ppc.c b/osfmk/chud/ppc/chud_osfmk_callback_ppc.c
deleted file mode 100644
index 3077f07c3..000000000
--- a/osfmk/chud/ppc/chud_osfmk_callback_ppc.c
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <stdint.h>
-#include <mach/boolean.h>
-#include <mach/mach_types.h>
-
-#include <kern/kern_types.h>
-#include <kern/processor.h>
-#include <kern/thread_call.h>
-#include <kern/kalloc.h>
-#include <kern/thread.h>
-
-#include <libkern/OSAtomic.h>
-
-#include <ppc/machine_routines.h>
-#include <ppc/cpu_data.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/thread.h>
-#include <ppc/trap.h>
-
-#include <chud/chud_xnu.h>
-#include <chud/chud_xnu_private.h>
-
-__private_extern__
-void chudxnu_cancel_all_callbacks(void)
-{
-    chudxnu_cpu_timer_callback_cancel_all();
-    chudxnu_trap_callback_cancel();
-    chudxnu_interrupt_callback_cancel();
-    chudxnu_perfmon_ast_callback_cancel();
-    chudxnu_cpusig_callback_cancel();
-    chudxnu_kdebug_callback_cancel();
-	chudxnu_syscall_callback_cancel();
-	chudxnu_dtrace_callback_cancel();
-}
-
-static chudcpu_data_t chudcpu_boot_cpu;
-
-void *chudxnu_per_proc_alloc(boolean_t boot_processor)
-{
-	chudcpu_data_t	*chud_proc_info;
-
-	if (boot_processor) {
-		chud_proc_info = &chudcpu_boot_cpu;
-	} else {
-		chud_proc_info = (chudcpu_data_t *)kalloc(sizeof(chudcpu_data_t));
-		if (chud_proc_info == (chudcpu_data_t *)NULL) {
-			return (void *)NULL;
-		}
-	}
-	bzero((char *)chud_proc_info, sizeof(chudcpu_data_t));
-	chud_proc_info->t_deadline = 0xFFFFFFFFFFFFFFFFULL;
-	return (void *)chud_proc_info;
-}
-
-void chudxnu_per_proc_free(void *per_proc_chud)
-{
-	if (per_proc_chud == (void *)&chudcpu_boot_cpu) {
-		return;
-	} else {
-		kfree(per_proc_chud,sizeof(chudcpu_data_t));
-	}
-}
-
-static void
-chudxnu_private_cpu_timer_callback(__unused timer_call_param_t param0,
-				   __unused timer_call_param_t param1)
-{
-    chudcpu_data_t	*chud_proc_info;
-    boolean_t oldlevel;
-    struct ppc_thread_state64 state;
-    mach_msg_type_number_t count;
-    chudxnu_cpu_timer_callback_func_t fn = NULL;
-
-    oldlevel = ml_set_interrupts_enabled(FALSE);
-    chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud);
-
-    count = PPC_THREAD_STATE64_COUNT;
-    if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) {
-        fn = chud_proc_info->cpu_timer_callback_fn;
-        if(fn) {
-            (fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count);
-        }
-    }
-
-    ml_set_interrupts_enabled(oldlevel);
-}
-
-__private_extern__
-kern_return_t chudxnu_cpu_timer_callback_enter(chudxnu_cpu_timer_callback_func_t func, uint32_t time, uint32_t units)
-{
-    chudcpu_data_t	*chud_proc_info;
-    boolean_t oldlevel;
-
-    oldlevel = ml_set_interrupts_enabled(FALSE);
-    chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud);
-
-    timer_call_cancel(&(chud_proc_info->cpu_timer_call)); // cancel any existing callback for this cpu
-
-    chud_proc_info->cpu_timer_callback_fn = func;
-
-    clock_interval_to_deadline(time, units, &(chud_proc_info->t_deadline));
-    timer_call_setup(&(chud_proc_info->cpu_timer_call), chudxnu_private_cpu_timer_callback, NULL);
-    timer_call_enter(&(chud_proc_info->cpu_timer_call), chud_proc_info->t_deadline);
-
-    ml_set_interrupts_enabled(oldlevel);
-    return KERN_SUCCESS;
-}
-
-__private_extern__
-kern_return_t chudxnu_cpu_timer_callback_cancel(void)
-{
-    chudcpu_data_t	*chud_proc_info;
-    boolean_t oldlevel;
-
-    oldlevel = ml_set_interrupts_enabled(FALSE);
-    chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud);
-
-    timer_call_cancel(&(chud_proc_info->cpu_timer_call));
-    chud_proc_info->t_deadline = chud_proc_info->t_deadline | ~(chud_proc_info->t_deadline); // set to max value
-    chud_proc_info->cpu_timer_callback_fn = NULL;
-
-    ml_set_interrupts_enabled(oldlevel);
-    return KERN_SUCCESS;
-}
-
-__private_extern__
-kern_return_t chudxnu_cpu_timer_callback_cancel_all(void)
-{
-    unsigned int cpu;
-    chudcpu_data_t	*chud_proc_info;
-
-    for(cpu=0; cpu<real_ncpus; cpu++) {
-    	if ((PerProcTable[cpu].ppe_vaddr == 0)
-    	    || (PerProcTable[cpu].ppe_vaddr->pp_chud == 0))
-			continue;
-    	chud_proc_info = (chudcpu_data_t *)PerProcTable[cpu].ppe_vaddr->pp_chud;
-        timer_call_cancel(&(chud_proc_info->cpu_timer_call));
-        chud_proc_info->t_deadline = chud_proc_info->t_deadline | ~(chud_proc_info->t_deadline); // set to max value
-        chud_proc_info->cpu_timer_callback_fn = NULL;
-    }
-    return KERN_SUCCESS;
-}
-
-#if 0
-#pragma mark **** trap ****
-#endif
-static kern_return_t chud_null_trap(uint32_t trapentry, thread_flavor_t flavor,
-	thread_state_t tstate,  mach_msg_type_number_t count);
-static chudxnu_trap_callback_func_t trap_callback_fn = chud_null_trap;
-
-static kern_return_t chud_null_trap(uint32_t trapentry __unused, thread_flavor_t flavor __unused,
-	thread_state_t tstate __unused,  mach_msg_type_number_t count __unused) {
-	return KERN_FAILURE;
-}
-
-
-#define TRAP_ENTRY_POINT(t) ((t==T_RESET) ? 0x100 : \
-                             (t==T_MACHINE_CHECK) ? 0x200 : \
-                             (t==T_DATA_ACCESS) ? 0x300 : \
-                             (t==T_DATA_SEGMENT) ? 0x380 : \
-                             (t==T_INSTRUCTION_ACCESS) ? 0x400 : \
-                             (t==T_INSTRUCTION_SEGMENT) ? 0x480 : \
-                             (t==T_INTERRUPT) ? 0x500 : \
-                             (t==T_ALIGNMENT) ? 0x600 : \
-                             (t==T_PROGRAM) ? 0x700 : \
-                             (t==T_FP_UNAVAILABLE) ? 0x800 : \
-                             (t==T_DECREMENTER) ? 0x900 : \
-                             (t==T_IO_ERROR) ? 0xa00 : \
-                             (t==T_RESERVED) ? 0xb00 : \
-                             (t==T_SYSTEM_CALL) ? 0xc00 : \
-                             (t==T_TRACE) ? 0xd00 : \
-                             (t==T_FP_ASSIST) ? 0xe00 : \
-                             (t==T_PERF_MON) ? 0xf00 : \
-                             (t==T_VMX) ? 0xf20 : \
-                             (t==T_INVALID_EXCP0) ? 0x1000 : \
-                             (t==T_INVALID_EXCP1) ? 0x1100 : \
-                             (t==T_INVALID_EXCP2) ? 0x1200 : \
-                             (t==T_INSTRUCTION_BKPT) ? 0x1300 : \
-                             (t==T_SYSTEM_MANAGEMENT) ? 0x1400 : \
-                             (t==T_SOFT_PATCH) ? 0x1500 : \
-                             (t==T_ALTIVEC_ASSIST) ? 0x1600 : \
-                             (t==T_THERMAL) ? 0x1700 : \
-                             (t==T_ARCHDEP0) ? 0x1800 : \
-                             (t==T_INSTRUMENTATION) ? 0x2000 : \
-                             0x0)
-
-static kern_return_t
-chudxnu_private_trap_callback(int trapno, struct savearea *ssp,
-			      __unused unsigned int dsisr,
-			      __unused addr64_t dar)
-{
-    boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
-    kern_return_t retval = KERN_FAILURE;
-    uint32_t trapentry = TRAP_ENTRY_POINT(trapno);
-    chudxnu_trap_callback_func_t fn = trap_callback_fn;
-
-    if(trapentry!=0x0) {
-        if(fn) {
-            struct ppc_thread_state64 state;
-            mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-            chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp);
-            retval = (fn)(trapentry, PPC_THREAD_STATE64, (thread_state_t)&state, count);
-        }
-    }
-
-    ml_set_interrupts_enabled(oldlevel);
-
-    return retval;
-}
-
-__private_extern__ kern_return_t
-chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func)
-{
-	if(OSCompareAndSwapPtr(NULL, chudxnu_private_trap_callback, 
-		(void * volatile *)&perfTrapHook)) {
-
-		chudxnu_trap_callback_func_t old = trap_callback_fn;
-		while(!OSCompareAndSwapPtr(old, func, 
-			(void * volatile *)&trap_callback_fn)) {
-			old = trap_callback_fn;
-		}
-	
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__ kern_return_t
-chudxnu_trap_callback_cancel(void)
-{
-	if(OSCompareAndSwapPtr(chudxnu_private_trap_callback,  NULL,
-		(void * volatile *)&perfTrapHook)) {
-
-		chudxnu_trap_callback_func_t old = trap_callback_fn;
-		while(!OSCompareAndSwapPtr(old, chud_null_trap, 
-			(void * volatile *)&trap_callback_fn)) {
-			old = trap_callback_fn;
-		}
-		
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-#if 0
-#pragma mark **** ast ****
-#endif
-static kern_return_t chud_null_ast(thread_flavor_t flavor, thread_state_t tstate,  
-	mach_msg_type_number_t count);
-static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = chud_null_ast;
-
-static kern_return_t chud_null_ast(thread_flavor_t flavor __unused,
-	thread_state_t tstate __unused,  mach_msg_type_number_t count __unused) {
-	return KERN_FAILURE;
-}
-
-
-static kern_return_t
-chudxnu_private_chud_ast_callback(__unused int trapno,
-				  __unused struct savearea *ssp,
-				  __unused unsigned int dsisr,
-				  __unused addr64_t dar)
-{
-    boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
-    ast_t *myast = ast_pending();
-    kern_return_t retval = KERN_FAILURE;
-    chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn;
-    
-	if(*myast & AST_CHUD_URGENT) {
-		*myast &= ~(AST_CHUD_URGENT | AST_CHUD);
-		if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT);
-		retval = KERN_SUCCESS;
-	} else if(*myast & AST_CHUD) {
-		*myast &= ~(AST_CHUD);
-		retval = KERN_SUCCESS;
-	}
-
-    if(fn) {
-		struct ppc_thread_state64 state;
-		mach_msg_type_number_t count;
-		count = PPC_THREAD_STATE64_COUNT;
-		
-		if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) {
-			(fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count);
-		}
-    }
-    
-#if 0
-    // ASTs from ihandler go through thandler and are made to look like traps
-    // always handle AST_CHUD_URGENT if there's a callback
-    // only handle AST_CHUD if it's the only AST pending
-    if(perfmon_ast_callback_fn && ((*myast & AST_CHUD_URGENT) || ((*myast & AST_CHUD) && !(*myast & AST_URGENT)))) {
-        struct ppc_thread_state64 state;
-        mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-        chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp);
-        if(*myast & AST_CHUD_URGENT) {
-            *myast &= ~(AST_CHUD_URGENT | AST_CHUD);
-            if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT);
-			retval = KERN_SUCCESS;
-        } else if(*myast & AST_CHUD) {
-            *myast &= ~(AST_CHUD);
-			retval = KERN_SUCCESS;
-        }
-        (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count);
-    }
-#endif
-
-    ml_set_interrupts_enabled(oldlevel);
-	return retval;
-}
-
-__private_extern__ kern_return_t
-chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func)
-{
-	if(OSCompareAndSwapPtr(NULL, chudxnu_private_chud_ast_callback,
-		(void * volatile *)&perfASTHook)) {
-		chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, func,
-			(void * volatile *)&perfmon_ast_callback_fn)) {
-			old = perfmon_ast_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__ kern_return_t
-chudxnu_perfmon_ast_callback_cancel(void)
-{
-	if(OSCompareAndSwapPtr(chudxnu_private_chud_ast_callback, NULL,
-		(void * volatile *)&perfASTHook)) {
-		chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, chud_null_ast,
-			(void * volatile *)&perfmon_ast_callback_fn)) {
-			old = perfmon_ast_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__
-kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent)
-{
-    boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
-	ast_t *myast = ast_pending();
-
-    if(urgent) {
-        *myast |= (AST_CHUD_URGENT | AST_URGENT);
-    } else {
-        *myast |= (AST_CHUD);
-    }
-
-    ml_set_interrupts_enabled(oldlevel);
-    return KERN_SUCCESS;
-}
-
-#if 0
-#pragma mark **** interrupt ****
-#endif
-static kern_return_t chud_null_int(uint32_t trapentry, thread_flavor_t flavor, 
-	thread_state_t tstate,  mach_msg_type_number_t count);
-static chudxnu_interrupt_callback_func_t interrupt_callback_fn = chud_null_int;
-
-static kern_return_t chud_null_int(uint32_t trapentry __unused, thread_flavor_t flavor __unused,
-	thread_state_t tstate __unused,  mach_msg_type_number_t count __unused) {
-	return KERN_FAILURE;
-}
-
-
-static kern_return_t
-chudxnu_private_interrupt_callback(int trapno, struct savearea *ssp,
-				   __unused unsigned int dsisr,
-				   __unused addr64_t dar)
-{
-    chudxnu_interrupt_callback_func_t fn = interrupt_callback_fn;
-    
-    if(fn) {
-        struct ppc_thread_state64 state;
-        mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-        chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp);
-        return (fn)(TRAP_ENTRY_POINT(trapno), PPC_THREAD_STATE64, (thread_state_t)&state, count);
-    } else {
-        return KERN_FAILURE;
-    }
-}
-
-__private_extern__
-kern_return_t chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func)
-{
-	if(OSCompareAndSwapPtr(NULL, chudxnu_private_interrupt_callback,
-		(void * volatile *)&perfIntHook)) {
-		chudxnu_interrupt_callback_func_t old = interrupt_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, func,
-			(void * volatile *)&interrupt_callback_fn)) {
-			old = interrupt_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__
-kern_return_t chudxnu_interrupt_callback_cancel(void)
-{
-	if(OSCompareAndSwapPtr(chudxnu_private_interrupt_callback, NULL, 
-		(void * volatile *)&perfIntHook)) {
-		chudxnu_interrupt_callback_func_t old = interrupt_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, chud_null_int,
-			(void * volatile *)&interrupt_callback_fn)) {
-			old = interrupt_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-#if 0
-#pragma mark **** cpu signal ****
-#endif
-static chudxnu_cpusig_callback_func_t cpusig_callback_fn = NULL;
-extern perfCallback perfCpuSigHook; /* function hook into cpu_signal_handler() */
-
-static kern_return_t
-chudxnu_private_cpu_signal_handler(int request, struct savearea *ssp,
-				   __unused unsigned int arg0,
-				   __unused addr64_t arg1)
-{
-    chudxnu_cpusig_callback_func_t fn = cpusig_callback_fn;
-    
-    if(fn) {
-        struct ppc_thread_state64 state;
-        mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT;
-        chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp);
-        (fn)(request, PPC_THREAD_STATE64, (thread_state_t)&state, count);
-    }
-    return KERN_SUCCESS; // ignored
-}
-
-__private_extern__
-kern_return_t chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func)
-{
-	if(OSCompareAndSwapPtr(NULL, chudxnu_private_cpu_signal_handler,
-		(void * volatile *)&perfCpuSigHook)) {
-		chudxnu_cpusig_callback_func_t old = cpusig_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, func,
-			(void * volatile *)&cpusig_callback_fn)) {
-			old = cpusig_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__
-kern_return_t chudxnu_cpusig_callback_cancel(void)
-{
-    if(OSCompareAndSwapPtr(chudxnu_private_cpu_signal_handler, NULL,
-		(void * volatile *)&perfCpuSigHook)) {
-		chudxnu_cpusig_callback_func_t old = cpusig_callback_fn;
-
-		while(!OSCompareAndSwapPtr(old, NULL,
-			(void * volatile *)&cpusig_callback_fn)) {
-			old = cpusig_callback_fn;
-		}
-
-		return KERN_SUCCESS;
-	}
-	return KERN_FAILURE;
-}
-
-__private_extern__
-kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request)
-{
-    int thisCPU;
-    kern_return_t retval = KERN_FAILURE;
-    int retries = 0;
-    boolean_t oldlevel;
-    uint32_t temp[2];
-
-    oldlevel = ml_set_interrupts_enabled(FALSE);
-    thisCPU = cpu_number();
-
-    if(thisCPU!=otherCPU) {
-        temp[0] = 0xFFFFFFFF;		/* set sync flag */
-        temp[1] = request;			/* set request */
-        __asm__ volatile("eieio");	/* force order */
-        __asm__ volatile("sync");	/* force to memory */
-
-        do {
-            retval=cpu_signal(otherCPU, SIGPcpureq, CPRQchud, (uint32_t)&temp);
-        } while(retval!=KERN_SUCCESS && (retries++)<16);
-    
-        if(retries>=16) {
-            retval = KERN_FAILURE;
-        } else {
-            retval = hw_cpu_sync(temp, LockTimeOut); /* wait for the other processor */
-            if(!retval) {
-                retval = KERN_FAILURE;
-            } else {
-                retval = KERN_SUCCESS;
-            }
-        }
-    } else {
-        retval = KERN_INVALID_ARGUMENT;
-    }
-
-    ml_set_interrupts_enabled(oldlevel);
-    return retval;
-}
-
diff --git a/osfmk/chud/ppc/chud_spr.h b/osfmk/chud/ppc/chud_spr.h
deleted file mode 100644
index 479f664be..000000000
--- a/osfmk/chud/ppc/chud_spr.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _CHUD_SPR_H_
-#define _CHUD_SPR_H_
-
-/* PPC SPRs - 32-bit and 64-bit implementations */
-#define chud_ppc_srr0		26
-#define chud_ppc_srr1		27
-#define chud_ppc_dsisr		18
-#define chud_ppc_dar		19
-#define chud_ppc_dec		22
-#define chud_ppc_sdr1		25
-#define chud_ppc_sprg0		272
-#define chud_ppc_sprg1		273
-#define chud_ppc_sprg2		274
-#define chud_ppc_sprg3		275
-#define chud_ppc_ear		282
-#define chud_ppc_tbl		284
-#define chud_ppc_tbu		285
-#define chud_ppc_pvr		287
-#define chud_ppc_ibat0u		528
-#define chud_ppc_ibat0l		529
-#define chud_ppc_ibat1u		530
-#define chud_ppc_ibat1l		531
-#define chud_ppc_ibat2u		532
-#define chud_ppc_ibat2l		533
-#define chud_ppc_ibat3u		534
-#define chud_ppc_ibat3l		535
-#define chud_ppc_dbat0u		536
-#define chud_ppc_dbat0l		537
-#define chud_ppc_dbat1u		538
-#define chud_ppc_dbat1l		539
-#define chud_ppc_dbat2u		540
-#define chud_ppc_dbat2l		541
-#define chud_ppc_dbat3u		542
-#define chud_ppc_dbat3l		543
-#define chud_ppc_dabr		1013
-#define chud_ppc_msr		10000	/* FAKE */
-
-/* PPC SPRs - 32-bit implementations */
-#define chud_ppc32_sr0		20000	/* FAKE */
-#define chud_ppc32_sr1		20001	/* FAKE */
-#define chud_ppc32_sr2		20002	/* FAKE */
-#define chud_ppc32_sr3		20003	/* FAKE */
-#define chud_ppc32_sr4		20004	/* FAKE */
-#define chud_ppc32_sr5		20005	/* FAKE */
-#define chud_ppc32_sr6		20006	/* FAKE */
-#define chud_ppc32_sr7		20007	/* FAKE */
-#define chud_ppc32_sr8		20008	/* FAKE */
-#define chud_ppc32_sr9		20009	/* FAKE */
-#define chud_ppc32_sr10		20010	/* FAKE */
-#define chud_ppc32_sr11		20011	/* FAKE */
-#define chud_ppc32_sr12		20012	/* FAKE */
-#define chud_ppc32_sr13		20013	/* FAKE */
-#define chud_ppc32_sr14		20014	/* FAKE */
-#define chud_ppc32_sr15		20015	/* FAKE */
-
-/* PPC SPRs - 64-bit implementations */
-#define chud_ppc64_asr		280
-
-/* PPC SPRs - 750/750CX/750CXe/750FX Specific */
-#define chud_750_upmc1		937
-#define chud_750_upmc2		938
-#define chud_750_upmc3		941
-#define chud_750_upmc4		942
-#define chud_750_mmcr0		952
-#define chud_750_pmc1		953
-#define chud_750_pmc2		954
-#define chud_750_sia		955
-#define chud_750_mmcr1		956
-#define chud_750_pmc3		957
-#define chud_750_pmc4		958
-#define chud_750_hid0		1008
-#define chud_750_hid1		1009
-#define chud_750_iabr		1010
-#define chud_750_l2cr		1017
-#define chud_750_ictc		1019
-#define chud_750_thrm1		1020
-#define chud_750_thrm2		1021
-#define chud_750_thrm3		1022
-#define chud_750fx_ibat4u	560 /* 750FX only */
-#define chud_750fx_ibat4l	561 /* 750FX only */
-#define chud_750fx_ibat5u	562 /* 750FX only */
-#define chud_750fx_ibat5l	563 /* 750FX only */
-#define chud_750fx_ibat6u	564 /* 750FX only */
-#define chud_750fx_ibat6l	565 /* 750FX only */
-#define chud_750fx_ibat7u	566 /* 750FX only */
-#define chud_750fx_ibat7l	567 /* 750FX only */
-#define chud_750fx_dbat4u	568 /* 750FX only */
-#define chud_750fx_dbat4l	569 /* 750FX only */
-#define chud_750fx_dbat5u	570 /* 750FX only */
-#define chud_750fx_dbat5l	571 /* 750FX only */
-#define chud_750fx_dbat6u	572 /* 750FX only */
-#define chud_750fx_dbat6l	573 /* 750FX only */
-#define chud_750fx_dbat7u	574 /* 750FX only */
-#define chud_750fx_dbat7l	575 /* 750FX only */
-#define chud_750fx_hid2		1016  /* 750FX only */
-
-/* PPC SPRs - 7400/7410 Specific */
-#define chud_7400_upmc1		937
-#define chud_7400_upmc2		938
-#define chud_7400_upmc3		941
-#define chud_7400_upmc4		942
-#define chud_7400_mmcr2		944
-#define chud_7400_bamr		951
-#define chud_7400_mmcr0		952
-#define chud_7400_pmc1		953
-#define chud_7400_pmc2		954
-#define chud_7400_siar		955 
-#define chud_7400_mmcr1		956
-#define chud_7400_pmc3		957
-#define chud_7400_pmc4		958
-#define chud_7400_sda		959
-#define chud_7400_hid0		1008
-#define chud_7400_hid1		1009
-#define chud_7400_iabr		1010
-#define chud_7400_msscr0	1014
-#define chud_7410_l2pmcr	1016 /* 7410 only */
-#define chud_7400_l2cr		1017
-#define chud_7400_ictc		1019
-#define chud_7400_thrm1		1020
-#define chud_7400_thrm2		1021
-#define chud_7400_thrm3		1022
-#define chud_7400_pir		1023
-
-/* PPC SPRs - 7450/7455 Specific */
-#define chud_7455_sprg4		276 /* 7455 only */
-#define chud_7455_sprg5		277 /* 7455 only */
-#define chud_7455_sprg6		278 /* 7455 only */
-#define chud_7455_sprg7		279 /* 7455 only */
-#define chud_7455_ibat4u	560 /* 7455 only */
-#define chud_7455_ibat4l	561 /* 7455 only */
-#define chud_7455_ibat5u	562 /* 7455 only */
-#define chud_7455_ibat5l	563 /* 7455 only */
-#define chud_7455_ibat6u	564 /* 7455 only */
-#define chud_7455_ibat6l	565 /* 7455 only */
-#define chud_7455_ibat7u	566 /* 7455 only */
-#define chud_7455_ibat7l	567 /* 7455 only */
-#define chud_7455_dbat4u	568 /* 7455 only */
-#define chud_7455_dbat4l	569 /* 7455 only */
-#define chud_7455_dbat5u	570 /* 7455 only */
-#define chud_7455_dbat5l	571 /* 7455 only */
-#define chud_7455_dbat6u	572 /* 7455 only */
-#define chud_7455_dbat6l	573 /* 7455 only */
-#define chud_7455_dbat7u	574 /* 7455 only */
-#define chud_7455_dbat7l	575 /* 7455 only */
-#define chud_7450_upmc5		929
-#define chud_7450_upmc6		930
-#define chud_7450_upmc1		937
-#define chud_7450_upmc2		938
-#define chud_7450_upmc3		941
-#define chud_7450_upmc4		942
-#define chud_7450_mmcr2		944
-#define chud_7450_pmc5		945
-#define chud_7450_pmc6		946
-#define chud_7450_bamr		951
-#define chud_7450_mmcr0		952
-#define chud_7450_pmc1		953
-#define chud_7450_pmc2		954
-#define chud_7450_siar		955 
-#define chud_7450_mmcr1		956
-#define chud_7450_pmc3		957
-#define chud_7450_pmc4		958
-#define chud_7450_tlbmiss	980
-#define chud_7450_ptehi		981
-#define chud_7450_ptelo		982
-#define chud_7450_l3pm		983
-#define chud_7450_hid0		1008
-#define chud_7450_hid1		1009
-#define chud_7450_iabr		1010
-#define chud_7450_ldstdb	1012
-#define chud_7450_msscr0	1014
-#define chud_7450_msssr0	1015
-#define chud_7450_ldstcr	1016
-#define chud_7450_l2cr		1017
-#define chud_7450_l3cr		1018
-#define chud_7450_ictc		1019
-#define chud_7450_ictrl		1011
-#define chud_7450_thrm1		1020
-#define chud_7450_thrm2		1021
-#define chud_7450_thrm3		1022
-#define chud_7450_pir		1023
-
-/* PPC SPRs - 970 Specific */
-#define chud_970_vrsave		256
-#define chud_970_ummcra		770
-#define chud_970_upmc1		771
-#define chud_970_upmc2		772
-#define chud_970_upmc3		773
-#define chud_970_upmc4		774
-#define chud_970_upmc5		775
-#define chud_970_upmc6		776
-#define chud_970_upmc7		777
-#define chud_970_upmc8		778
-#define chud_970_ummcr0		779
-#define chud_970_usiar		780
-#define chud_970_usdar		781
-#define chud_970_ummcr1		782
-#define chud_970_uimc		783
-#define chud_970_mmcra		786
-#define chud_970_pmc1		787
-#define chud_970_pmc2		788
-#define chud_970_pmc3		789
-#define chud_970_pmc4		790
-#define chud_970_pmc5		791
-#define chud_970_pmc6		792
-#define chud_970_pmc7		793
-#define chud_970_pmc8		794
-#define chud_970_mmcr0		795
-#define chud_970_siar		796
-#define chud_970_sdar		797
-#define chud_970_mmcr1		798
-#define chud_970_imc		799
-
-/* PPC SPRs - 7400/7410 Specific, Private */
-#define chud_7400_msscr1	1015
-
-/* PPC SPRs - 64-bit implementations,  Private */
-#define chud_ppc64_accr		29
-#define chud_ppc64_ctrl		152
-
-/* PPC SPRs - 970 Specific, Private */
-#define chud_970_scomc		276
-#define chud_970_scomd		277
-#define chud_970_hsprg0		304
-#define chud_970_hsprg1		305
-#define chud_970_hdec		310
-#define chud_970_hior		311
-#define chud_970_rmor		312
-#define chud_970_hrmor		313
-#define chud_970_hsrr0		314
-#define chud_970_hsrr1		315
-#define chud_970_lpcr		318
-#define chud_970_lpidr		319
-#define chud_970_trig0		976
-#define chud_970_trig1		977
-#define chud_970_trig2		978
-#define chud_970_hid0		1008
-#define chud_970_hid1		1009
-#define chud_970_hid4		1012
-#define chud_970_hid5		1014
-#define chud_970_dabrx		1015
-#define chud_970_trace		1022
-#define chud_970_pir		1023
-
-#endif // _CHUD_SPR_H_
-
diff --git a/osfmk/chud/ppc/chud_thread_ppc.c b/osfmk/chud/ppc/chud_thread_ppc.c
deleted file mode 100644
index 0bca0ac92..000000000
--- a/osfmk/chud/ppc/chud_thread_ppc.c
+++ /dev/null
@@ -1,586 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <mach/mach_types.h>
-#include <mach/task.h>
-#include <mach/thread_act.h>
-
-#include <kern/kern_types.h>
-#include <kern/processor.h>
-#include <kern/thread.h>
-#include <kern/ipc_tt.h>
-
-#include <vm/vm_map.h>
-#include <vm/pmap.h>
-
-#include <chud/chud_xnu.h>
-#include <chud/chud_xnu_private.h>
-
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/machine_routines.h>
-#include <ppc/fpu_protos.h>
-
-#if 0
-#pragma mark **** thread state ****
-#endif
-
-__private_extern__
-kern_return_t chudxnu_copy_savearea_to_threadstate(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, struct savearea *sv)
-{
-    struct ppc_thread_state *ts;
-    struct ppc_thread_state64 *xts;
-
-    switch(flavor) {
-    case PPC_THREAD_STATE:
-        if(*count < PPC_THREAD_STATE_COUNT) { /* Is the count ok? */
-            *count = 0;
-            return KERN_INVALID_ARGUMENT;
-        }
-        ts = (struct ppc_thread_state *) tstate;
-        if(sv) {
-            ts->r0	= (unsigned int)sv->save_r0;
-            ts->r1	= (unsigned int)sv->save_r1;
-            ts->r2	= (unsigned int)sv->save_r2;
-            ts->r3	= (unsigned int)sv->save_r3;
-            ts->r4	= (unsigned int)sv->save_r4;
-            ts->r5	= (unsigned int)sv->save_r5;
-            ts->r6	= (unsigned int)sv->save_r6;
-            ts->r7	= (unsigned int)sv->save_r7;
-            ts->r8	= (unsigned int)sv->save_r8;
-            ts->r9	= (unsigned int)sv->save_r9;
-            ts->r10	= (unsigned int)sv->save_r10;
-            ts->r11	= (unsigned int)sv->save_r11;
-            ts->r12	= (unsigned int)sv->save_r12;
-            ts->r13	= (unsigned int)sv->save_r13;
-            ts->r14	= (unsigned int)sv->save_r14;
-            ts->r15	= (unsigned int)sv->save_r15;
-            ts->r16	= (unsigned int)sv->save_r16;
-            ts->r17	= (unsigned int)sv->save_r17;
-            ts->r18	= (unsigned int)sv->save_r18;
-            ts->r19	= (unsigned int)sv->save_r19;
-            ts->r20	= (unsigned int)sv->save_r20;
-            ts->r21	= (unsigned int)sv->save_r21;
-            ts->r22	= (unsigned int)sv->save_r22;
-            ts->r23	= (unsigned int)sv->save_r23;
-            ts->r24	= (unsigned int)sv->save_r24;
-            ts->r25	= (unsigned int)sv->save_r25;
-            ts->r26	= (unsigned int)sv->save_r26;
-            ts->r27	= (unsigned int)sv->save_r27;
-            ts->r28	= (unsigned int)sv->save_r28;
-            ts->r29	= (unsigned int)sv->save_r29;
-            ts->r30	= (unsigned int)sv->save_r30;
-            ts->r31	= (unsigned int)sv->save_r31;
-            ts->cr	= (unsigned int)sv->save_cr;
-            ts->xer	= (unsigned int)sv->save_xer;
-            ts->lr	= (unsigned int)sv->save_lr;
-            ts->ctr	= (unsigned int)sv->save_ctr;
-            ts->srr0 	= (unsigned int)sv->save_srr0;
-            ts->srr1 	= (unsigned int)sv->save_srr1;
-            ts->mq	= 0;
-            ts->vrsave	= (unsigned int)sv->save_vrsave;
-        } else {
-            bzero((void *)ts, sizeof(struct ppc_thread_state));
-        }
-            *count = PPC_THREAD_STATE_COUNT; /* Pass back the amount we actually copied */
-        return KERN_SUCCESS;
-        break;
-    case PPC_THREAD_STATE64:
-        if(*count < PPC_THREAD_STATE64_COUNT) { /* Is the count ok? */
-            return KERN_INVALID_ARGUMENT;
-        }
-        xts = (struct ppc_thread_state64 *) tstate;
-        if(sv) {
-            xts->r0	= sv->save_r0;
-            xts->r1	= sv->save_r1;
-            xts->r2	= sv->save_r2;
-            xts->r3	= sv->save_r3;
-            xts->r4	= sv->save_r4;
-            xts->r5	= sv->save_r5;
-            xts->r6	= sv->save_r6;
-            xts->r7	= sv->save_r7;
-            xts->r8	= sv->save_r8;
-            xts->r9	= sv->save_r9;
-            xts->r10	= sv->save_r10;
-            xts->r11	= sv->save_r11;
-            xts->r12	= sv->save_r12;
-            xts->r13	= sv->save_r13;
-            xts->r14	= sv->save_r14;
-            xts->r15	= sv->save_r15;
-            xts->r16	= sv->save_r16;
-            xts->r17	= sv->save_r17;
-            xts->r18	= sv->save_r18;
-            xts->r19	= sv->save_r19;
-            xts->r20	= sv->save_r20;
-            xts->r21	= sv->save_r21;
-            xts->r22	= sv->save_r22;
-            xts->r23	= sv->save_r23;
-            xts->r24	= sv->save_r24;
-            xts->r25	= sv->save_r25;
-            xts->r26	= sv->save_r26;
-            xts->r27	= sv->save_r27;
-            xts->r28	= sv->save_r28;
-            xts->r29	= sv->save_r29;
-            xts->r30	= sv->save_r30;
-            xts->r31	= sv->save_r31;
-            xts->cr	= sv->save_cr;
-            xts->xer	= sv->save_xer;
-            xts->lr	= sv->save_lr;
-            xts->ctr	= sv->save_ctr;
-            xts->srr0 	= sv->save_srr0;
-            xts->srr1 	= sv->save_srr1;
-            xts->vrsave	= sv->save_vrsave;
-        } else {
-            bzero((void *)xts, sizeof(struct ppc_thread_state64));
-        }
-        *count = PPC_THREAD_STATE64_COUNT; /* Pass back the amount we actually copied */
-        return KERN_SUCCESS;
-        break;
-    default:
-        *count = 0;
-        return KERN_INVALID_ARGUMENT;
-        break;
-    }
-}
-
-__private_extern__
-kern_return_t chudxnu_copy_threadstate_to_savearea(struct savearea *sv, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count)
-{
-    struct ppc_thread_state *ts;
-    struct ppc_thread_state64 *xts;
-
-    switch(flavor) {
-    case PPC_THREAD_STATE:
-        if(*count < PPC_THREAD_STATE_COUNT) { /* Is the count ok? */
-            return KERN_INVALID_ARGUMENT;
-        }
-        ts = (struct ppc_thread_state *) tstate;
-        if(sv) {
-            sv->save_r0		= (uint64_t)ts->r0;
-            sv->save_r1		= (uint64_t)ts->r1;
-            sv->save_r2		= (uint64_t)ts->r2;
-            sv->save_r3		= (uint64_t)ts->r3;
-            sv->save_r4		= (uint64_t)ts->r4;
-            sv->save_r5		= (uint64_t)ts->r5;
-            sv->save_r6		= (uint64_t)ts->r6;
-            sv->save_r7		= (uint64_t)ts->r7;
-            sv->save_r8		= (uint64_t)ts->r8;
-            sv->save_r9		= (uint64_t)ts->r9;
-            sv->save_r10	= (uint64_t)ts->r10;
-            sv->save_r11	= (uint64_t)ts->r11;
-            sv->save_r12	= (uint64_t)ts->r12;
-            sv->save_r13	= (uint64_t)ts->r13;
-            sv->save_r14	= (uint64_t)ts->r14;
-            sv->save_r15	= (uint64_t)ts->r15;
-            sv->save_r16	= (uint64_t)ts->r16;
-            sv->save_r17	= (uint64_t)ts->r17;
-            sv->save_r18	= (uint64_t)ts->r18;
-            sv->save_r19	= (uint64_t)ts->r19;
-            sv->save_r20	= (uint64_t)ts->r20;
-            sv->save_r21	= (uint64_t)ts->r21;
-            sv->save_r22	= (uint64_t)ts->r22;
-            sv->save_r23	= (uint64_t)ts->r23;
-            sv->save_r24	= (uint64_t)ts->r24;
-            sv->save_r25	= (uint64_t)ts->r25;
-            sv->save_r26	= (uint64_t)ts->r26;
-            sv->save_r27	= (uint64_t)ts->r27;
-            sv->save_r28	= (uint64_t)ts->r28;
-            sv->save_r29	= (uint64_t)ts->r29;
-            sv->save_r30	= (uint64_t)ts->r30;
-            sv->save_r31	= (uint64_t)ts->r31;
-            sv->save_cr		= ts->cr;
-            sv->save_xer	= (uint64_t)ts->xer;
-            sv->save_lr		= (uint64_t)ts->lr;
-            sv->save_ctr	= (uint64_t)ts->ctr;
-            sv->save_srr0	= (uint64_t)ts->srr0;
-            sv->save_srr1	= (uint64_t)ts->srr1;
-            sv->save_vrsave	= ts->vrsave;
-            return KERN_SUCCESS;
-        }
-            break;
-    case PPC_THREAD_STATE64:
-        if(*count < PPC_THREAD_STATE64_COUNT) { /* Is the count ok? */
-            return KERN_INVALID_ARGUMENT;
-        }
-        xts = (struct ppc_thread_state64 *) tstate;
-        if(sv) {
-            sv->save_r0		= xts->r0;
-            sv->save_r1		= xts->r1;
-            sv->save_r2		= xts->r2;
-            sv->save_r3		= xts->r3;
-            sv->save_r4		= xts->r4;
-            sv->save_r5		= xts->r5;
-            sv->save_r6		= xts->r6;
-            sv->save_r7		= xts->r7;
-            sv->save_r8		= xts->r8;
-            sv->save_r9		= xts->r9;
-            sv->save_r10	= xts->r10;
-            sv->save_r11	= xts->r11;
-            sv->save_r12	= xts->r12;
-            sv->save_r13	= xts->r13;
-            sv->save_r14	= xts->r14;
-            sv->save_r15	= xts->r15;
-            sv->save_r16	= xts->r16;
-            sv->save_r17	= xts->r17;
-            sv->save_r18	= xts->r18;
-            sv->save_r19	= xts->r19;
-            sv->save_r20	= xts->r20;
-            sv->save_r21	= xts->r21;
-            sv->save_r22	= xts->r22;
-            sv->save_r23	= xts->r23;
-            sv->save_r24	= xts->r24;
-            sv->save_r25	= xts->r25;
-            sv->save_r26	= xts->r26;
-            sv->save_r27	= xts->r27;
-            sv->save_r28	= xts->r28;
-            sv->save_r29	= xts->r29;
-            sv->save_r30	= xts->r30;
-            sv->save_r31	= xts->r31;
-            sv->save_cr		= xts->cr;
-            sv->save_xer	= xts->xer;
-            sv->save_lr		= xts->lr;
-            sv->save_ctr	= xts->ctr;
-            sv->save_srr0	= xts->srr0;
-            sv->save_srr1	= xts->srr1;
-            sv->save_vrsave	= xts->vrsave;
-            return KERN_SUCCESS;
-        }
-    }
-    return KERN_FAILURE;
-}
-
-__private_extern__
-kern_return_t chudxnu_thread_user_state_available(thread_t thread)
-{
-    if(find_user_regs(thread)) {
-	return KERN_SUCCESS;
-    } else {
-	return KERN_FAILURE;
-    }
-}
-
-__private_extern__
-kern_return_t chudxnu_thread_get_state(thread_t thread, 
-				    thread_flavor_t flavor,
-                                    thread_state_t tstate,
-                                    mach_msg_type_number_t *count,
-                                    boolean_t user_only)
-{
-    if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { // machine_thread_get_state filters out some bits
-		struct savearea *sv;
-		if(user_only) {
-			sv = find_user_regs(thread);
-		} else {
-			sv = find_kern_regs(thread);
-		}
-		return chudxnu_copy_savearea_to_threadstate(flavor, tstate, count, sv);
-    } else {
-		if(user_only) {
-			return machine_thread_get_state(thread, flavor, tstate, count);
-		} else {
-			// doesn't do FP or VMX
-			return machine_thread_get_kern_state(thread, flavor, tstate, count);
-		}    
-    }
-}
-
-__private_extern__
-kern_return_t chudxnu_thread_set_state(thread_t thread, 
-					thread_flavor_t flavor,
-					thread_state_t tstate,
-					mach_msg_type_number_t count,
-					boolean_t user_only)
-{
-    if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { // machine_thread_set_state filters out some bits
-		struct savearea *sv;
-		if(user_only) {
-			sv = find_user_regs(thread);
-		} else {
-			sv = find_kern_regs(thread);
-		}
-		return chudxnu_copy_threadstate_to_savearea(sv, flavor, tstate, &count);
-    } else {
-		return machine_thread_set_state(thread, flavor, tstate, count); // always user
-    }
-}
-
-#if 0
-#pragma mark **** task memory read/write ****
-#endif
-    
-__private_extern__
-kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size)
-{
-    kern_return_t ret = KERN_SUCCESS;
-
-	if(ml_at_interrupt_context()) {
-		// can't do this on an interrupt stack
-		return KERN_FAILURE;
-	}
-    
-	if(!chudxnu_is_64bit_task(task)) { // clear any cruft out of upper 32-bits for 32-bit tasks
-		usraddr &= 0x00000000FFFFFFFFULL;
-	}
-
-    if(current_task()==task) {
-		thread_t      cur_thr = current_thread();
-		vm_offset_t   recover_handler = cur_thr->recover; 
-		
-		if(copyin(usraddr, kernaddr, size)) {
-			ret = KERN_FAILURE;
-		}
-
-		cur_thr->recover = recover_handler;
-    } else {
-		
-		vm_map_t map = get_task_map(task);
-		ret = vm_map_read_user(map, usraddr, kernaddr, size);
-    }
-    
-    return ret;
-}
-			
-__private_extern__
-kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size)
-{
-    kern_return_t ret = KERN_SUCCESS;
- 
-	if(ml_at_interrupt_context()) {
-		// can't do this on an interrupt stack
-		return KERN_FAILURE;
-	}
-
-	if(!chudxnu_is_64bit_task(task)) { // clear any cruft out of upper 32-bits for 32-bit tasks
-		useraddr &= 0x00000000FFFFFFFFULL;
-	}
-
-    if(current_task()==task) {    
-		thread_t      cur_thr = current_thread();
-		vm_offset_t   recover_handler = cur_thr->recover; 
-					
-		if(copyout(kernaddr, useraddr, size)) {
-			ret = KERN_FAILURE;
-		}
-		cur_thr->recover = recover_handler;
-    } else {
-		
-		vm_map_t map = get_task_map(task);
-		ret = vm_map_write_user(map, kernaddr, useraddr, size);
-    }		
-    
-    return ret;
-}
-
-__private_extern__
-kern_return_t chudxnu_kern_read(void *dstaddr, vm_offset_t srcaddr, vm_size_t size)
-{
-	return (ml_nofault_copy(srcaddr, (vm_offset_t) dstaddr, size) == size ?
-	    KERN_SUCCESS: KERN_FAILURE);
-}
-
-__private_extern__
-kern_return_t chudxnu_kern_write(vm_offset_t dstaddr, void *srcaddr, vm_size_t	size)
-{
-	return (ml_nofault_copy((vm_offset_t) srcaddr, dstaddr, size) == size ?
-	    KERN_SUCCESS: KERN_FAILURE);
-}
-
-// chudxnu_thread_get_callstack gathers a raw callstack along with any information needed to
-// fix it up later (in case we stopped program as it was saving values into prev stack frame, etc.)
-// after sampling has finished.
-//
-// For an N-entry callstack:
-//
-// [0]      current pc
-// [1..N-3] stack frames (including current one)
-// [N-2]    current LR (return value if we're in a leaf function)
-// [N-1]    current r0 (in case we've saved LR in r0)
-//
-
-#define FP_LINK_OFFSET 			2
-#define STACK_ALIGNMENT_MASK	0xF // PPC stack frames are supposed to be 16-byte aligned
-#define INST_ALIGNMENT_MASK		0x3 // Instructions are always 4-bytes wide
-
-#ifndef USER_MODE
-#define USER_MODE(msr) ((msr) & MASK(MSR_PR) ? TRUE : FALSE)
-#endif
-
-#ifndef SUPERVISOR_MODE
-#define SUPERVISOR_MODE(msr) ((msr) & MASK(MSR_PR) ? FALSE : TRUE)
-#endif
-
-#define VALID_STACK_ADDRESS(addr)   (addr>=0x1000ULL &&			 \
-				     (addr&STACK_ALIGNMENT_MASK)==0x0 && \
-				     (supervisor ?			 \
-					 (addr>=kernStackMin &&		 \
-					  addr<=kernStackMax) :		 \
-					 TRUE))
-
-
-__private_extern__
-kern_return_t chudxnu_thread_get_callstack64(	thread_t thread,
-						uint64_t *callStack,
-						mach_msg_type_number_t *count,
-						boolean_t user_only)
-{
-    kern_return_t kr;
-    task_t task = get_threadtask(thread);
-    uint64_t nextFramePointer = 0;
-    uint64_t currPC, currLR, currR0;
-    uint64_t framePointer;
-    uint64_t prevPC = 0;
-    uint64_t kernStackMin = thread->kernel_stack;
-    uint64_t kernStackMax = kernStackMin + kernel_stack_size;
-    uint64_t *buffer = callStack;
-    uint32_t tmpWord;
-    int bufferIndex = 0;
-    int bufferMaxIndex = *count;
-    boolean_t supervisor;
-    boolean_t is64Bit;
-    struct savearea *sv;
-
-    if(user_only) {
-        sv = find_user_regs(thread);
-    } else {
-        sv = find_kern_regs(thread);
-    }
-
-    if(!sv) {
-        *count = 0;
-        return KERN_FAILURE;
-    }
-
-    supervisor = SUPERVISOR_MODE(sv->save_srr1);
-    if(supervisor) {
-		is64Bit = FALSE; /* XXX assuming task is always 32-bit */
-    } else {
-		is64Bit = chudxnu_is_64bit_task(task);
-    }
-
-    bufferMaxIndex = bufferMaxIndex - 2; // allot space for saving the LR and R0 on the stack at the end.
-    if(bufferMaxIndex<2) {
-        *count = 0;
-        return KERN_RESOURCE_SHORTAGE;
-    }
-
-    currPC = sv->save_srr0;
-    framePointer = sv->save_r1; /* r1 is the stack pointer (no FP on PPC)  */
-    currLR = sv->save_lr;
-    currR0 = sv->save_r0;
-
-    bufferIndex = 0;  // start with a stack of size zero
-    buffer[bufferIndex++] = currPC; // save PC in position 0.
-
-    // Now, fill buffer with stack backtraces.
-    while(bufferIndex<bufferMaxIndex && VALID_STACK_ADDRESS(framePointer)) {
-        uint64_t pc = 0;
-        // Above the stack pointer, the following values are saved:
-        // saved LR
-        // saved CR
-        // saved SP
-        //-> SP
-        // Here, we'll get the lr from the stack.
-        uint64_t fp_link;
-
-		if(is64Bit) {
-			fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint64_t);
-		} else {
-			fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint32_t);
-		}
-
-        // Note that we read the pc even for the first stack frame (which, in theory,
-        // is always empty because the callee fills it in just before it lowers the
-        // stack.  However, if we catch the program in between filling in the return
-        // address and lowering the stack, we want to still have a valid backtrace.
-        // FixupStack correctly disregards this value if necessary.
-
-        if(supervisor) {
-			if(is64Bit) {
-				kr = chudxnu_kern_read(&pc, fp_link, sizeof(uint64_t));
-			} else {
-				kr = chudxnu_kern_read(&tmpWord, fp_link, sizeof(uint32_t));
-				pc = tmpWord;
-			}    
-        } else {
-			if(is64Bit) {
-				kr = chudxnu_task_read(task, &pc, fp_link, sizeof(uint64_t));
-			} else {
-				kr = chudxnu_task_read(task, &tmpWord, fp_link, sizeof(uint32_t));
-				pc = tmpWord;
-	    	}
-		}
-        if(kr!=KERN_SUCCESS) {
-            pc = 0;
-            break;
-        }
-
-        // retrieve the contents of the frame pointer and advance to the next stack frame if it's valid
-        if(supervisor) {
-			if(is64Bit) {
-				kr = chudxnu_kern_read(&nextFramePointer, framePointer, sizeof(uint64_t));
-			} else {
-				kr = chudxnu_kern_read(&tmpWord, framePointer, sizeof(uint32_t));
-				nextFramePointer = tmpWord;
-			}  
-        } else {
-			if(is64Bit) {
-				kr = chudxnu_task_read(task, &nextFramePointer, framePointer, sizeof(uint64_t));
-			} else {
-				kr = chudxnu_task_read(task, &tmpWord, framePointer, sizeof(uint32_t));
-				nextFramePointer = tmpWord;
-			}
-		}
-        if(kr!=KERN_SUCCESS) {
-            nextFramePointer = 0;
-        }
-
-        if(nextFramePointer) {
-            buffer[bufferIndex++] = pc;
-            prevPC = pc;
-        }
-    
-        if(nextFramePointer<framePointer) {
-            break;
-        } else {
-	    	framePointer = nextFramePointer;
-		}
-    }
-
-    if(bufferIndex>=bufferMaxIndex) {
-        *count = 0;
-        return KERN_RESOURCE_SHORTAGE;
-    }
-
-    // Save link register and R0 at bottom of stack (used for later fixup).
-    buffer[bufferIndex++] = currLR;
-    buffer[bufferIndex++] = currR0;
-
-    *count = bufferIndex;
-    return KERN_SUCCESS;
-}
-
diff --git a/osfmk/chud/ppc/chud_xnu_private.h b/osfmk/chud/ppc/chud_xnu_private.h
deleted file mode 100644
index 72b2ed663..000000000
--- a/osfmk/chud/ppc/chud_xnu_private.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _PPC_CHUD_XNU_PRIVATE_H_
-#define _PPC_CHUD_XNU_PRIVATE_H_
-
-#if 0
-#pragma mark **** thread ****
-#endif
-// *****************************************************************************
-// thread
-// *****************************************************************************
-extern kern_return_t chudxnu_copy_savearea_to_threadstate(
-			thread_flavor_t flavor, 
-			thread_state_t tstate,
-			mach_msg_type_number_t *count,
-			struct savearea *sv);
-							    
-extern kern_return_t chudxnu_copy_threadstate_to_savearea(
-			struct savearea *sv,
-			thread_flavor_t flavor,
-			thread_state_t tstate,
-			mach_msg_type_number_t *count);
-
-#if 0
-#pragma mark **** cpu timer ****
-#endif
-typedef struct {
-	timer_call_data_t			cpu_timer_call;
-	uint64_t				t_deadline;
-	chudxnu_cpu_timer_callback_func_t	cpu_timer_callback_fn;
-} chudcpu_data_t;
-
-#endif /* _PPC_CHUD_XNU_PRIVATE_H_ */
diff --git a/osfmk/conf/MASTER b/osfmk/conf/MASTER
index cadb1a976..e34f671cb 100644
--- a/osfmk/conf/MASTER
+++ b/osfmk/conf/MASTER
@@ -65,7 +65,7 @@ ident		MACH
 #	option should be on.
 #
 options		MACH_KERNEL
-options		MACH_PAGEMAP
+options		MACH_PAGEMAP	#	<mach_pagemap>
 options		MACH_LOAD
 options		MACH_RT
 options		TASK_SWAPPER	#	<task_swapper_disabled>
@@ -108,6 +108,9 @@ options		MACH_MP_DEBUG	#		# <debug>
 #	operations on each element.
 #
 options		ZONE_DEBUG	#		# <debug>
+
+options		CONFIG_ZLEAKS	# Live zone leak debugging	# <zleaks>
+
 #
 options		ZONE_ALIAS_ADDR	#		# <zone_alias_addr>
 # 
@@ -141,7 +144,6 @@ options		CONFIG_DTRACE	#		# <config_dtrace>
 # 
 options		MACH_COUNTERS	#		# <debug>
 
-
 ##########################################################
 #
 # This defines configuration options that are normally used only during
@@ -207,9 +209,6 @@ options   CONFIG_ZONE_MAP_MIN=12582912	# <medium,large,xlarge>
 options   CONFIG_ZONE_MAP_MIN=6291456	# <small,xsmall>
 options   CONFIG_ZONE_MAP_MIN=1048576	# <bsmall>
 
-options   CONFIG_TOKEN_QUEUE_SMALL=1		# <bsmall>
-options   CONFIG_TOKEN_QUEUE_SMALL=0		# <xsmall,small,medium,large,xlarge>
-
 #
 #  configurable kernel - use these options to strip strings from panic
 #  and printf calls.
@@ -250,3 +249,23 @@ options		CONFIG_CODE_DECRYPTION	# <config_embedded>
 # Context switched counters 
 #
 options		CONFIG_COUNTERS			# <config_counters>
+
+#
+# Timeshare scheduler implementations
+#
+options		CONFIG_SCHED_TRADITIONAL	# <config_sched_traditional>
+options		CONFIG_SCHED_PROTO		# <config_sched_proto>
+options		CONFIG_SCHED_GRRR		# <config_sched_grrr>
+options		CONFIG_SCHED_FIXEDPRIORITY	# <config_sched_fixedpriority>
+options		CONFIG_SCHED_GRRR_CORE		# <config_sched_grrr,config_sched_fixedpriority>
+
+options		CONFIG_SCHED_IDLE_IN_PLACE		# <config_sched_idle_in_place>
+
+#
+# freeze - support app hibernation, used on embedded
+#
+options		CONFIG_FREEZE	# <freeze>
+
+
+options		CHECK_CS_VALIDATION_BITMAP	# <config_cs_validation_bitmap>
+
diff --git a/osfmk/conf/MASTER.i386 b/osfmk/conf/MASTER.i386
index b8cd08e05..42b4294e1 100644
--- a/osfmk/conf/MASTER.i386
+++ b/osfmk/conf/MASTER.i386
@@ -9,11 +9,10 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_counters ]
+#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap config_sched_idle_in_place ]
 #  DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert]
 #  PROFILE = [ RELEASE profile ]
 #
-#
 #  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ]
 #  EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace config_counters ]
@@ -58,6 +57,8 @@ options		CONFIG_SERIAL_KDP	# KDP over serial				# <config_serial_kdp>
 options		PAE
 options		X86_64
 options		DISPATCH_COUNTS
+options		PAL_I386
+options		CONFIG_YONAH	# 32-bit Yonah support		# <config_yonah>
 
 #
 # Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and 
@@ -75,5 +76,6 @@ options		CONFIG_CODE_DECRYPTION
 
 options	    CONFIG_MCA			    # Machine Check Architecture	# <config_mca>
 options	    CONFIG_VMX			    # Virtual Machine Extensions	# <config_vmx>
+options	    CONFIG_MTRR			    # Memory Type Range Registers	# <config_mtrr>
 
 options     NO_NESTED_PMAP                  # <no_nested_pmap>
diff --git a/osfmk/conf/MASTER.ppc b/osfmk/conf/MASTER.ppc
deleted file mode 100644
index 98036b366..000000000
--- a/osfmk/conf/MASTER.ppc
+++ /dev/null
@@ -1,67 +0,0 @@
-#
-# Mach Operating System
-# Copyright (c) 1986 Carnegie-Mellon University
-# All rights reserved.  The CMU software License Agreement
-# specifies the terms and conditions for use and redistribution.
-#  
-######################################################################
-#
-#  Standard Apple MacOS X Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE       = [ medium mach_bsd mach_kdp iokit mach_pe ppc mach hibernation crypto config_dtrace config_counters ]
-#  DEVELOPMENT	 = [ RELEASE ]
-#  RELEASE_TRACE = [ RELEASE kdebug ]
-#  DEBUG         = [ RELEASE mach_kdb debug mach_assert ]
-#  DEBUG_TRACE   = [ DEBUG kdebug ]
-#  PROFILE       = [ RELEASE profile ]
-#
-######################################################################
-#
-##############################################################################
-#
-# MACH_PROF enables code for mach profiling.
-#
-options         MACH_PROF	#		# <mach_prof>
-##############################################################################
-#
-# Debug
-#
-options		DEBUG		#		# <debug>
-
-options		PROFILE		# kernel profiling	# <profile>
-
-machine		"ppc"
-cpu		"ppc"
-pseudo-device	scc		1
-pseudo-device	vc		1
-
-options		MACHINE_TIMER_ROUTINES
-
-# Disabled by default, since mklinux does not need this
-# unless running multiserver - the atalk stack at time of
-# writing inserts a null filter!
-#options	NET_FILTER_COMPILER
-
-# Turn on the serial console by uncommenting the this:
-#options	SERIAL_CONSOLE_DEFAULT
-
-options		MACH_KDP	#		# <mach_kdp>
-options		MACH_KDB	#		# <mach_kdb>
-options		MACH_BSD	#		# <mach_bsd>
-options		IOKIT		#		# <iokit>
-options		MACH_PE		#		# <mach_pe>
-
-# XXX for bringup, turns on mac disklabels, 
-# and some other nice stuff for the diskshim
-options		POWERMAC
-
-options		DISPATCH_COUNTS
-
-#
-# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and 
-# security/conf MASTER files.
-#
-options		CONFIG_MACF		# Mandatory Access Control Framework
-#options	CONFIG_MACF_MACH	# MACF applied to Mach services
-options		CONFIG_AUDIT		# Kernel auditing
diff --git a/osfmk/conf/MASTER.x86_64 b/osfmk/conf/MASTER.x86_64
index a3f336c06..993fa17ab 100644
--- a/osfmk/conf/MASTER.x86_64
+++ b/osfmk/conf/MASTER.x86_64
@@ -9,10 +9,8 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_counters ]
-#  DEBUG= [ RELEASE osf_debug debug mach_assert ]
-#  PROFILE = [ RELEASE profile ]
-#
+#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap config_sched_idle_in_place ]
+#  DEBUG = [ RELEASE osf_debug debug mach_assert ]
 #
 #  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ]
 #  EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
@@ -20,8 +18,8 @@
 #
 ######################################################################
 #
-machine		"x86_64"						# <intel>
-cpu		"x86_64"						# <intel>
+machine		"x86_64"					# <intel>
+cpu		"x86_64"					# <intel>
 
 pseudo-device	com		2
 pseudo-device	vc		1
@@ -56,6 +54,7 @@ options		CONFIG_SERIAL_KDP	# KDP over serial				# <config_serial_kdp>
 options		PAE
 options		X86_64
 options		DISPATCH_COUNTS
+options		PAL_I386
 
 #
 # Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and 
@@ -73,6 +72,7 @@ options		CONFIG_CODE_DECRYPTION
 
 options	    CONFIG_MCA			    # Machine Check Architecture	# <config_mca>
 options	    CONFIG_VMX			    # Virtual Machine Extensions	# <config_vmx>
+options	    CONFIG_MTRR			    # Memory Type Range Registers	# <config_mtrr>
 
 options     NO_NESTED_PMAP                  # <no_nested_pmap>
 options     CONFIG_NO_NESTED_PMAP           # <no_nested_pmap>
diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile
index 4010dbcba..330f94ab6 100644
--- a/osfmk/conf/Makefile
+++ b/osfmk/conf/Makefile
@@ -6,8 +6,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -23,22 +22,21 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile:  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(OSFMK_KERNEL_CONFIG) $(OSFMK_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(OSFMK_KERNEL_CONFIG) $(OSFMK_KERNEL_CONFIG); \
 	);
 
 $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h: $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile
@@ -47,11 +45,8 @@ $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h: $(COMPOBJROOT)/$(OSFMK_KERNEL
 	${LN} cputypes.h $@;					\
 	)
 
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile \
+do_all: $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile \
 		$(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h 
-
-do_all: do_setup_conf
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(OSFMK_KERNEL_CONFIG)/Makefile	\
diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386
index 387d4aafb..e232c0e32 100644
--- a/osfmk/conf/Makefile.i386
+++ b/osfmk/conf/Makefile.i386
@@ -2,28 +2,19 @@
 #BEGIN	Machine dependent Makefile fragment for i386
 ######################################################################
 
-CFLAGS+= -DAT386=1
-SFLAGS+= -DAT386=1
-
-# Enable -Werror for i386 builds
-CFLAGS+= $(WERROR) -Wshorten-64-to-32
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
+CWARNFLAGS = $(CWARNFLAGS_STD) -Wshorten-64-to-32
 
 # Objects that don't compile cleanly:
 OBJS_NO_WERROR=				\
-	UNDRequest.o			\
 	db_macro.o			\
 	db_print.o			\
 	db_sym.o			\
 	db_variables.o			\
 	db_disasm.o			\
 	db_interface.o			\
-	db_trace.o			\
-	gssd_mach.o
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
+	db_trace.o
 
-$(OBJS_WERROR):		WERROR=-Werror
+$(foreach file,$(OBJS_NO_WERROR),$(eval $(call add_perfile_cflags,$(file),-Wno-error)))
 
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES=					\
diff --git a/osfmk/conf/Makefile.ppc b/osfmk/conf/Makefile.ppc
deleted file mode 100644
index 35d7f0dd2..000000000
--- a/osfmk/conf/Makefile.ppc
+++ /dev/null
@@ -1,76 +0,0 @@
-######################################################################
-#BEGIN	Machine dependent Makefile fragment for ppc
-######################################################################
-
-#
-# ppc should be (mostly) warning free
-#
-CFLAGS+= $(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR=                         \
-	UNDRequest.o			\
-	machine_routines.o		\
-	db_examine.o			\
-	db_macro.o			\
-	db_print.o			\
-	db_sym.o			\
-	db_variables.o			\
-	ppc_disasm.o			\
-	db_disasm.o			\
-	db_trace.o			\
-	db_low_trace.o			\
-	gssd_mach.o			\
-	kdp_machdep.o
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):         WERROR=-Werror
-
-export bsd_vm.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export device_vm.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export memory_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm32_user.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_apple_protect.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_debug.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_external.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_fault.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_init.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_kern.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_map.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_pageout.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_purgeable.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_resident.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_shared_region.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_swapfile_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export vm_user.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-
-export default_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export dp_backing_store.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-export dp_memory_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32
-
-#
-# KDB support
-#
-
-makedis: $(SRCROOT)/osfmk/ddb/makedis.c
-	$(HOST_CC) -Werror -Wall -o $@ $<
-
-ppc_disasm.o_CFLAGS_ADD = -Dperror=db_printf -Dexit=db_error -Dmalloc=db_disasm_malloc
-
-ppc_disasm.c ppc_disasm.h : $(SRCROOT)/osfmk/ppc/ppc_disasm.i makedis
-	./makedis -w -h ./ppc_disasm.h $(SOURCE_DIR)/osfmk/ppc/ppc_disasm.i > ./ppc_disasm.c
-
-
-db_disasm.o : ppc_disasm.h
-
-# Files that must go in the __HIB segment:
-UNCONFIGURED_HIB_FILES=                              \
-           hibernate_restore.o
-HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS))
-
-######################################################################
-#END	Machine dependent Makefile fragment for ppc
-######################################################################
diff --git a/osfmk/conf/Makefile.template b/osfmk/conf/Makefile.template
index 75f1c7f31..c39e844cf 100644
--- a/osfmk/conf/Makefile.template
+++ b/osfmk/conf/Makefile.template
@@ -26,7 +26,7 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -imacros meta_features.h -DMACH_KERNEL_PRIVATE $(CFLAGS_INLINE_CONFIG)
+CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE $(CFLAGS_INLINE_CONFIG)
 
 #
 # Directories for mig generated files
@@ -86,18 +86,20 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS) assym.s
-	$(_v)for hib_file in ${HIB_FILES};		\
+$(COMPONENT).filelist: $(LDOBJS) assym.s
+	$(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \
+	for hib_file in ${HIB_FILES};		\
 	do	\
                 $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \
                 mv $${hib_file}__ $${hib_file} ; \
-	done;
+	done; \
+	fi
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`;
@@ -108,8 +110,14 @@ do_build_all: do_depend
 # we name it genassym.o to help with the automatic
 # dependency generation
 
-genassym.o: $(SOURCE_DIR)/$(COMPONENT)/$(ARCH_CONFIG_LC)/genassym.c
-	$(_v)${KCC} ${CFLAGS} -MD ${_HOST_EXTRA_CFLAGS} -S -o ${@} -c ${INCFLAGS} $<
+GENASSYM_LOCATION = $(ARCH_CONFIG_LC)
+
+ifeq ($(ARCH_CONFIG_LC),x86_64)
+GENASSYM_LOCATION = i386
+endif
+
+genassym.o: $(SOURCE_DIR)/$(COMPONENT)/$(GENASSYM_LOCATION)/genassym.c
+	$(_v)${KCC} $(subst -flto,,${CFLAGS}) -MD ${_HOST_EXTRA_CFLAGS} -S -o ${@} -c ${INCFLAGS} $<
 
 assym.s: genassym.o
 	$(_v)sed -e '/#DEFINITION#/!d' -e 's/^.*#DEFINITION#//' -e 's/\$$//' -e 'p' -e 's/#//2' -e 's/[^A-Za-z0-9_]*\([A-Za-z0-9_]*\)/ \1_NUM/2' genassym.o > ${@}
diff --git a/osfmk/conf/Makefile.x86_64 b/osfmk/conf/Makefile.x86_64
index d24ace3bf..768a50845 100644
--- a/osfmk/conf/Makefile.x86_64
+++ b/osfmk/conf/Makefile.x86_64
@@ -2,32 +2,7 @@
 #BEGIN	Machine dependent Makefile fragment for x86_64
 ######################################################################
 
-CFLAGS+= -DAT386=1
-SFLAGS+= -DAT386=1
-
-CFLAGS+= $(WERROR) -Wshorten-64-to-32
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-OBJS_NO_WERROR=				\
-	UNDRequest.o			\
-	db_examine.o			\
-	db_macro.o			\
-	db_print.o			\
-	db_sym.o			\
-	db_variables.o			\
-	db_disasm.o			\
-	db_interface.o			\
-	db_trace.o			\
-	host_priv_server.o \
-	mach_host_server.o \
-	security_server.o \
-	device_server.o \
-	gssd_mach.o \
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
+CWARNFLAGS = $(CWARNFLAGS_STD) -Wshorten-64-to-32
 
 # Files that must go in the __HIB segment:
 UNCONFIGURED_HIB_FILES=					\
diff --git a/osfmk/conf/files b/osfmk/conf/files
index 40e2d16d2..7a97e71c6 100644
--- a/osfmk/conf/files
+++ b/osfmk/conf/files
@@ -72,6 +72,8 @@ OPTIONS/stack_usage		optional stack_usage
 OPTIONS/config_dtrace		optional config_dtrace
 OPTIONS/config_counters		optional config_counters
 
+OPTIONS/no_kextd		optional no_kextd
+
 # Default pager and system pager files, to be moved to separate component
  
 osfmk/default_pager/default_pager.c	standard
@@ -93,7 +95,7 @@ osfmk/default_pager/dp_memory_object.c	standard
 #
 # kextd files
 #
-./kextd/kextd_mach.c				standard
+./kextd/kextd_mach.c				optional not no_kextd
 
 #
 # UserNotification files
@@ -152,13 +154,14 @@ osfmk/kern/clock_oldops.c		standard
 osfmk/kern/counters.c			standard
 osfmk/kern/debug.c			standard
 osfmk/kern/exception.c		standard
+osfmk/kern/extmod_statistics.c		standard
 osfmk/kern/host.c			standard
 osfmk/kern/host_notify.c		standard
 osfmk/kern/ipc_clock.c		standard
 osfmk/kern/ipc_host.c			standard
 osfmk/kern/ipc_kobject.c		standard
 osfmk/kern/ipc_mig.c			standard
-osfmk/kern/ipc_misc.c			optional config_embedded
+osfmk/kern/ipc_misc.c			standard
 osfmk/kern/ipc_sync.c			standard
 osfmk/kern/ipc_tt.c			standard
 osfmk/kern/kalloc.c			standard
@@ -176,6 +179,9 @@ osfmk/kern/processor_data.c		standard
 osfmk/kern/queue.c			standard
 osfmk/kern/sched_average.c		standard
 osfmk/kern/sched_prim.c		standard
+osfmk/kern/sched_proto.c	optional config_sched_proto
+osfmk/kern/sched_grrr.c	optional config_sched_grrr_core
+osfmk/kern/sched_fixedpriority.c	optional config_sched_fixedpriority
 osfmk/kern/security.c		optional config_macf
 osfmk/kern/stack.c			standard
 osfmk/kern/startup.c			standard
@@ -235,6 +241,7 @@ osfmk/pmc/pmc.c				standard
 ./mach/security_server.c		optional config_macf
 
 osfmk/vm/bsd_vm.c			optional mach_bsd
+osfmk/vm/default_freezer.c		optional config_freeze
 osfmk/vm/device_vm.c			standard
 osfmk/vm/memory_object.c		standard
 osfmk/vm/vm_debug.c			standard
@@ -243,6 +250,9 @@ osfmk/vm/vm_fault.c			standard
 osfmk/vm/vm_init.c			standard
 osfmk/vm/vm_kern.c			standard
 osfmk/vm/vm_map.c			standard
+osfmk/vm/vm_map_store.c			standard
+osfmk/vm/vm_map_store_ll.c		standard
+osfmk/vm/vm_map_store_rb.c		standard
 osfmk/vm/vm_object.c			standard
 osfmk/vm/vm_pageout.c			standard
 osfmk/vm/vm_purgeable.c			standard
@@ -280,4 +290,3 @@ osfmk/chud/chud_osfmk_callback.c	standard
 osfmk/chud/chud_thread.c		standard
 
 osfmk/console/serial_general.c	standard
-
diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386
index 9fe585040..8c2864527 100644
--- a/osfmk/conf/files.i386
+++ b/osfmk/conf/files.i386
@@ -22,11 +22,15 @@ osfmk/vm/vm_apple_protect.c	 standard
 
 osfmk/i386/pmap.c		standard
 osfmk/i386/pmap_x86_common.c	standard
+osfmk/i386/pmap_common.c	standard
 
+osfmk/i386/pal_routines.c	optional pal_i386
+osfmk/i386/pal_routines_asm.s	optional pal_i386
 
 osfmk/ddb/db_aout.c		optional mach_kdb
 
 osfmk/i386/bsd_i386.c		optional mach_bsd
+osfmk/i386/bsd_i386_native.c	optional mach_bsd
 osfmk/i386/machdep_call.c	optional mach_bsd
 
 osfmk/i386/_setjmp.s		standard
@@ -50,7 +54,8 @@ osfmk/i386/idt.s		standard
 osfmk/i386/io_map.c		standard
 osfmk/i386/ktss.c		standard
 osfmk/i386/ldt.c		standard
-osfmk/i386/loose_ends.c	standard
+osfmk/i386/loose_ends.c		standard
+osfmk/i386/copyio.c		standard
 osfmk/i386/locks_i386.c	standard
 osfmk/i386/locore.s	standard
 osfmk/i386/start.s	standard
@@ -64,9 +69,12 @@ osfmk/i386/mcount.s		optional profile
 osfmk/i386/mp_desc.c		standard
 #osfmk/i386/ntoh.s		standard
 osfmk/i386/pcb.c		standard
+osfmk/i386/pcb_native.c		standard
 osfmk/i386/phys.c		standard
 osfmk/i386/rtclock.c		standard
+osfmk/i386/rtclock_native.c	standard
 osfmk/i386/trap.c		standard
+osfmk/i386/trap_native.c	standard
 osfmk/i386/user_ldt.c		standard
 osfmk/i386/Diagnostics.c	standard
 osfmk/i386/pmCPU.c		standard
@@ -74,41 +82,21 @@ osfmk/i386/tsc.c		standard
 
 osfmk/i386/commpage/commpage.c	standard
 osfmk/i386/commpage/commpage_asm.s	standard
-osfmk/i386/commpage/atomic.s	standard
-osfmk/i386/commpage/cpu_number.s	standard
-osfmk/i386/commpage/commpage_mach_absolute_time.s	standard
-osfmk/i386/commpage/spinlocks.s	standard
 osfmk/i386/commpage/pthreads.s	standard
-osfmk/i386/commpage/cacheflush.s	standard
-osfmk/i386/commpage/commpage_gettimeofday.s	standard
-osfmk/i386/commpage/bcopy_scalar.s	standard
-osfmk/i386/commpage/bcopy_sse2.s	standard
-osfmk/i386/commpage/bcopy_sse3x.s	standard
-osfmk/i386/commpage/bcopy_sse3x_64.s	standard
-osfmk/i386/commpage/bcopy_sse42.s	standard
-osfmk/i386/commpage/bcopy_sse42_64.s	standard
-osfmk/i386/commpage/bzero_scalar.s	standard
-osfmk/i386/commpage/bzero_sse2.s	standard
-osfmk/i386/commpage/bzero_sse2_64.s	standard
-osfmk/i386/commpage/bzero_sse42.s	standard
-osfmk/i386/commpage/bzero_sse42_64.s	standard
-osfmk/i386/commpage/memset_pattern_sse2.s	standard
-osfmk/i386/commpage/memset_pattern_sse2_64.s	standard
-osfmk/i386/commpage/longcopy_sse3x.s	standard
-osfmk/i386/commpage/longcopy_sse3x_64.s	standard
-osfmk/i386/commpage/commpage_sigs.c	standard
 osfmk/i386/commpage/fifo_queues.s	standard
 
 osfmk/i386/AT386/conf.c		standard
 osfmk/i386/AT386/model_dep.c	standard
 
 osfmk/i386/lapic.c		standard
+osfmk/i386/lapic_native.c	standard
 osfmk/i386/mp.c			standard
+osfmk/i386/mp_native.c		standard
 
 osfmk/i386/acpi.c		standard
 osfmk/i386/acpi_wakeup.s	standard
 
-osfmk/i386/mtrr.c		standard
+osfmk/i386/mtrr.c		optional    config_mtrr
 
 osfmk/console/i386/serial_console.c  optional  com device-driver
 
@@ -134,6 +122,8 @@ osfmk/chud/i386/chud_osfmk_callback_i386.c	standard
 osfmk/chud/i386/chud_cpu_i386.c			standard
 osfmk/chud/i386/chud_thread_i386.c		standard
 
+osfmk/i386/ucode.c				standard
+
 osfmk/i386/vmx/vmx_cpu.c			optional config_vmx
 osfmk/i386/vmx/vmx_shims.c			optional config_vmx
 
@@ -145,6 +135,6 @@ osfmk/i386/vmx/vmx_shims.c			optional config_vmx
 #osfmk/OPTIONS/hi_res_clock	optional hi_res_clock
 
 
-osfmk/i386/startup64.c			optional x86_64
-osfmk/i386/start64.s			optional x86_64
-osfmk/i386/idt64.s			optional x86_64
+osfmk/i386/startup64.c			standard
+osfmk/i386/start64.s			standard
+osfmk/i386/idt64.s			standard
diff --git a/osfmk/conf/files.ppc b/osfmk/conf/files.ppc
deleted file mode 100644
index 2866dd820..000000000
--- a/osfmk/conf/files.ppc
+++ /dev/null
@@ -1,120 +0,0 @@
-# @OSF_COPYRIGHT@
-# 
-
-OPTIONS/db_machine_commands	optional db_machine_commands
-OPTIONS/gprof			optional gprof
-OPTIONS/fpe			optional fpe
-OPTIONS/fddi			optional fddi
-OPTIONS/serial_console_default	optional serial_console_default
-OPTIONS/mp			optional mp
-
-# lowmem_vectors.s must be at head of link line.
-# template.mk treats this as a special case and makes sure
-# that the file is placed at the front of the line
-
-
-osfmk/ddb/db_aout.c			optional mach_kdb
-./ppc_disasm.c				optional mach_kdb
-osfmk/ppc/db_disasm.c		optional mach_kdb
-osfmk/ppc/db_interface.c	optional mach_kdb
-osfmk/ppc/db_trace.c		optional mach_kdb
-osfmk/ppc/db_low_trace.c	optional mach_kdb
-osfmk/ppc/bcopytest.c		optional mach_kdb
-
-osfmk/ppc/lowmem_vectors.s	standard
-osfmk/ppc/start.s			standard
-osfmk/ppc/_setjmp.s			standard
-osfmk/ppc/mcount.s			optional profile
-
-osfmk/ppc/cpu.c 			standard
-osfmk/ppc/ppc_init.c		standard
-osfmk/ppc/ppc_vm_init.c		standard
-osfmk/ppc/model_dep.c		standard
-osfmk/ppc/locks_ppc.c			standard
-osfmk/ppc/pmap.c			standard
-osfmk/ppc/mappings.c		standard
-osfmk/ppc/savearea.c		standard
-osfmk/ppc/savearea_asm.s	standard
-osfmk/ppc/hw_vm.s			standard
-osfmk/ppc/skiplists.s		standard
-osfmk/ppc/hw_lock.s			standard
-osfmk/ppc/misc_asm.s		standard
-osfmk/ppc/status.c			standard
-osfmk/ppc/io_map.c			standard
-osfmk/ppc/trap.c			standard
-osfmk/ppc/pcb.c				standard
-osfmk/ppc/bits.s			standard
-osfmk/ppc/cswtch.s			standard
-osfmk/ppc/cache.s			standard
-osfmk/ppc/movc.s			standard
-osfmk/ppc/hw_exception.s	standard
-osfmk/ppc/bzero.s			standard
-osfmk/ppc/bcopy.s			standard
-osfmk/ppc/atomic_switch.s	standard
-osfmk/ppc/PseudoKernel.c	standard
-osfmk/ppc/interrupt.c		standard
-osfmk/ppc/machine_routines.c		standard
-osfmk/ppc/machine_routines_asm.s		standard
-osfmk/ppc/machine_task.c	standard
-osfmk/ppc/Emulate.s			standard
-osfmk/ppc/Emulate64.s		standard
-osfmk/ppc/AltiAssist.s		standard
-osfmk/ppc/conf.c			standard
-osfmk/ppc/etimer.c		standard
-osfmk/ppc/rtclock.c			standard
-osfmk/ppc/Diagnostics.c		standard
-osfmk/ppc/PPCcalls.c		standard
-osfmk/ppc/vmachmon.c		standard
-osfmk/ppc/vmachmon_asm.s	standard
-osfmk/ppc/pms.c			standard
-osfmk/ppc/pmsCPU.c		standard
-
-osfmk/ppc/Firmware.s		standard
-osfmk/ppc/FirmwareC.c		standard
-
-osfmk/ppc/aligned_data.s	standard
-
-osfmk/ppc/hw_perfmon.c		standard
-
-osfmk/ppc/commpage/commpage.c	standard
-osfmk/ppc/commpage/commpage_asm.s	standard
-osfmk/ppc/commpage/bcopy_g3.s	standard
-osfmk/ppc/commpage/bcopy_g4.s	standard
-osfmk/ppc/commpage/bcopy_970.s	standard
-osfmk/ppc/commpage/bcopy_64.s	standard
-osfmk/ppc/commpage/bzero_32.s	standard
-osfmk/ppc/commpage/bzero_128.s	standard
-osfmk/ppc/commpage/cacheflush.s	standard
-osfmk/ppc/commpage/gettimeofday.s	standard
-osfmk/ppc/commpage/mach_absolute_time.s	standard
-osfmk/ppc/commpage/pthread.s	standard
-osfmk/ppc/commpage/spinlocks.s	standard
-osfmk/ppc/commpage/bigcopy_970.s	standard
-osfmk/ppc/commpage/atomic.s		standard
-osfmk/ppc/commpage/memset_64.s  standard
-osfmk/ppc/commpage/memset_g3.s  standard
-osfmk/ppc/commpage/memset_g4.s  standard
-osfmk/ppc/commpage/memset_g5.s  standard
-
-osfmk/chud/ppc/chud_cpu_asm.s			standard
-osfmk/chud/ppc/chud_cpu_ppc.c			standard
-osfmk/chud/ppc/chud_osfmk_callback_ppc.c	standard
-osfmk/chud/ppc/chud_thread_ppc.c		standard
-
-osfmk/kdp/ml/ppc/kdp_machdep.c        optional mach_kdp
-osfmk/kdp/ml/ppc/kdp_vm.c             optional mach_kdp
-osfmk/kdp/ml/ppc/kdp_misc.s           optional mach_kdp
-
-osfmk/console/ppc/serial_console.c	optional	scc device-driver
-osfmk/ppc/serial_io.c			optional	scc device-driver
-
-osfmk/console/panic_dialog.c	optional	vc device-driver
-osfmk/console/video_console.c	optional	vc device-driver
-osfmk/console/ppc/video_scroll.s	optional	vc device-driver
-
-osfmk/ppc/hibernate_ppc.c			optional hibernation
-osfmk/ppc/hibernate_restore.s		optional hibernation
-
-# DUMMIES TO FORCE GENERATION OF .h FILES
-OPTIONS/bm			optional bm
-OPTIONS/debug			optional debug
diff --git a/osfmk/conf/files.x86_64 b/osfmk/conf/files.x86_64
index fbdaf097a..a147f68de 100644
--- a/osfmk/conf/files.x86_64
+++ b/osfmk/conf/files.x86_64
@@ -24,9 +24,14 @@ osfmk/vm/vm_apple_protect.c	 standard
 
 osfmk/x86_64/pmap.c		standard
 osfmk/i386/pmap_x86_common.c	standard
+osfmk/i386/pmap_common.c	standard
+osfmk/x86_64/pmap_pcid.c	standard
 
+osfmk/i386/pal_routines.c	optional pal_i386
+osfmk/x86_64/pal_routines_asm.s	optional pal_i386
 
 osfmk/i386/bsd_i386.c		optional mach_bsd
+osfmk/i386/bsd_i386_native.c	optional mach_bsd
 osfmk/i386/machdep_call.c	optional mach_bsd
 
 osfmk/x86_64/bcopy.s		standard
@@ -46,6 +51,7 @@ osfmk/i386/io_map.c		standard
 osfmk/i386/ktss.c		standard
 osfmk/i386/ldt.c		standard
 osfmk/x86_64/loose_ends.c	standard
+osfmk/x86_64/copyio.c		standard
 osfmk/i386/locks_i386.c	standard
 osfmk/x86_64/locore.s	standard
 osfmk/x86_64/start.s	standard
@@ -59,9 +65,12 @@ osfmk/x86_64/mcount.s		optional profile
 osfmk/i386/mp_desc.c		standard
 #osfmk/x86_64/ntoh.s		standard
 osfmk/i386/pcb.c		standard
+osfmk/i386/pcb_native.c		standard
 osfmk/i386/phys.c		standard
 osfmk/i386/rtclock.c		standard
+osfmk/i386/rtclock_native.c	standard
 osfmk/i386/trap.c		standard
+osfmk/i386/trap_native.c	standard
 osfmk/i386/user_ldt.c		standard
 osfmk/i386/Diagnostics.c	standard
 osfmk/i386/pmCPU.c		standard
@@ -69,40 +78,20 @@ osfmk/i386/tsc.c		standard
 
 osfmk/i386/commpage/commpage.c	standard
 osfmk/i386/commpage/commpage_asm.s	standard
-osfmk/i386/commpage/atomic.s	standard
-osfmk/i386/commpage/cpu_number.s	standard
-osfmk/i386/commpage/commpage_mach_absolute_time.s	standard
-osfmk/i386/commpage/spinlocks.s	standard
 osfmk/i386/commpage/pthreads.s	standard
-osfmk/i386/commpage/cacheflush.s	standard
-osfmk/i386/commpage/commpage_gettimeofday.s	standard
-osfmk/i386/commpage/bcopy_scalar.s	standard
-osfmk/i386/commpage/bcopy_sse2.s	standard
-osfmk/i386/commpage/bcopy_sse3x.s	standard
-osfmk/i386/commpage/bcopy_sse3x_64.s	standard
-osfmk/i386/commpage/bcopy_sse42.s	standard
-osfmk/i386/commpage/bcopy_sse42_64.s	standard
-osfmk/i386/commpage/bzero_scalar.s	standard
-osfmk/i386/commpage/bzero_sse2.s	standard
-osfmk/i386/commpage/bzero_sse2_64.s	standard
-osfmk/i386/commpage/bzero_sse42.s	standard
-osfmk/i386/commpage/bzero_sse42_64.s	standard
-osfmk/i386/commpage/memset_pattern_sse2.s	standard
-osfmk/i386/commpage/memset_pattern_sse2_64.s	standard
-osfmk/i386/commpage/longcopy_sse3x.s	standard
-osfmk/i386/commpage/longcopy_sse3x_64.s	standard
-osfmk/i386/commpage/commpage_sigs.c	standard
 osfmk/i386/commpage/fifo_queues.s	standard
 
 osfmk/i386/AT386/conf.c		standard
 osfmk/i386/AT386/model_dep.c	standard
 
 osfmk/i386/lapic.c		standard
+osfmk/i386/lapic_native.c	standard
 osfmk/i386/mp.c			standard
+osfmk/i386/mp_native.c		standard
 
 osfmk/i386/acpi.c		standard
 
-osfmk/i386/mtrr.c		standard
+osfmk/i386/mtrr.c		optional    config_mtrr
 
 osfmk/console/i386/serial_console.c  optional  com device-driver
 
@@ -128,6 +117,8 @@ osfmk/chud/i386/chud_osfmk_callback_i386.c	standard
 osfmk/chud/i386/chud_cpu_i386.c			standard
 osfmk/chud/i386/chud_thread_i386.c		standard
 
+osfmk/i386/ucode.c				standard
+
 osfmk/i386/vmx/vmx_cpu.c			optional config_vmx
 osfmk/i386/vmx/vmx_shims.c			optional config_vmx
 
@@ -139,5 +130,5 @@ osfmk/i386/vmx/vmx_shims.c			optional config_vmx
 #osfmk/OPTIONS/hi_res_clock	optional hi_res_clock
 
 
-osfmk/i386/startup64.c			optional x86_64
-osfmk/x86_64/idt64.s			optional x86_64
+osfmk/i386/startup64.c		standard
+osfmk/x86_64/idt64.s		standard
diff --git a/osfmk/conf/tools/Makefile b/osfmk/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/osfmk/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/osfmk/conf/tools/doconf/Makefile b/osfmk/conf/tools/doconf/Makefile
deleted file mode 100644
index aa55a9419..000000000
--- a/osfmk/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/osfmk/conf/tools/doconf/doconf.csh b/osfmk/conf/tools/doconf/doconf.csh
deleted file mode 100755
index 6fedb4786..000000000
--- a/osfmk/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c
index 234a022b8..2af1a9553 100644
--- a/osfmk/console/i386/serial_console.c
+++ b/osfmk/console/i386/serial_console.c
@@ -58,9 +58,6 @@ typedef struct console_buf {
 	char	buf[CPU_BUFFER_LEN];
 } console_buf_t;
 
-extern int serial_getc(void);
-extern void serial_putc(int);
-
 static void _serial_putc(int, int, int);
 
 struct console_ops cons_ops[] = {
@@ -138,6 +135,13 @@ console_cpu_free(void *buf)
 		kfree((void *) buf, sizeof(console_buf_t));
 }
 
+/* So we can re-write the serial device functions at boot-time */
+void
+console_set_serial_ops( struct console_ops *newops )
+{
+	cons_ops[SERIAL_CONS_OPS] = *newops;
+}
+
 static inline int
 console_ring_space(void)
 {
diff --git a/osfmk/console/ppc/serial_console.c b/osfmk/console/ppc/serial_console.c
deleted file mode 100644
index 648ea791e..000000000
--- a/osfmk/console/ppc/serial_console.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-#include <mach_kdb.h>
-#include <platforms.h>
-#include <serial_console_default.h>
-
-#include <kern/spl.h>
-#include <machine/machparam.h>		/* spl definitions */
-#include <types.h>
-#include <console/video_console.h>
-#include <console/serial_protos.h>
-#include <kern/kalloc.h>
-#include <kern/thread.h>
-#include <ppc/misc_protos.h>
-#include <ppc/serial_io.h>
-#include <kern/cpu_number.h>
-#include <ppc/Firmware.h>
-#include <ppc/proc_reg.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <pexpert/pexpert.h>
-
-/*
- * A machine MUST have a console.  In our case
- * things are a little complicated by the graphic
- * display: people expect it to be their "console",
- * but we'd like to be able to live without it.
- * This is not to be confused with the "rconsole" thing:
- * that just duplicates the console I/O to
- * another place (for debugging/logging purposes).
- */
-
-const int console_unit = 0;
-const uint32_t console_chan_default = CONSOLE_PORT;
-#define console_chan (console_chan_default) /* ^ cpu_number()) */
-
-#define MP_SAFE_CONSOLE 1	/* Set this to 1 to allow more than 1 processor to print at once */
-#if MP_SAFE_CONSOLE
-struct ppcbfr {													/* Controls multiple processor output */
-	unsigned int 	pos;										/* Current position in buffer */
-	unsigned int	noprompt;									/* Set if we skip the prompt */
-	unsigned int	echo;										/* Control character echoing */
-	char			buffer[256];								/* Fairly big buffer */	
-};
-typedef struct ppcbfr ppcbfr_t;
-
-ppcbfr_t cbfr_boot_cpu;											/* Get one for boot cpu */
-volatile unsigned int cbfpend;									/* A buffer is pending output */
-volatile unsigned int sconowner=-1;								/* Mark who's actually writing */
-#endif /* MP_SAFE_CONSOLE */
-
-struct console_ops cons_ops[] = {
-	{
-		.putc = scc_putc,
-		.getc = scc_getc,
-	},
-	{
-		.putc = vcputc,
-		.getc = vcgetc,
-	},
-};
-
-uint32_t nconsops = (sizeof cons_ops / sizeof cons_ops[0]);
-
-uint32_t cons_ops_index = VC_CONS_OPS;
-
-unsigned int killprint = 0;
-unsigned int debcnputc = 0;
-extern unsigned int	mappingdeb0;
-extern int debugger_cpu;
-
-void *console_per_proc_alloc(boolean_t boot_processor)
-{
-	ppcbfr_t  *cbfr_cpu;
-
-	if (boot_processor)
-		cbfr_cpu = &cbfr_boot_cpu;
-	else {
-		cbfr_cpu = (ppcbfr_t *)kalloc(sizeof(ppcbfr_t));
-		if (cbfr_cpu == (ppcbfr_t *)NULL)
-			return (void *)NULL;
-	}
-	bzero((char *)cbfr_cpu, sizeof(ppcbfr_t));
-	return (void *)cbfr_cpu;
-}
-
-void console_per_proc_free(void *per_proc_cbfr)
-{
-	if (per_proc_cbfr == (void *)&cbfr_boot_cpu)
-		return;
-	else
-		kfree(per_proc_cbfr, sizeof(ppcbfr_t));
-}
-
-static void _cnputc(char c)
-{
-	cons_ops[cons_ops_index].putc(console_unit, console_chan, c);
-}
-
-void cnputc_unbuffered(char c) {
-	_cnputc(c);
-}
-
-void cnputcusr(char c) {										/* Echo input character directly */
-	struct per_proc_info	*procinfo;
-	spl_t 					s;
-	
-	s=splhigh();
-	procinfo = getPerProc();
-
-	(void)hw_atomic_add(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */
-
-	_cnputc( c);												/* Echo the character */
-	if(c=='\n') _cnputc( '\r');									/* Add a return if we had a new line */
-
-	(void)hw_atomic_sub(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */
-	splx(s);
-	return;
-}
-
-void
-cnputc(char c)
-{
-	unsigned int				oldpend, i, cpu, ourbit, sccpu;
-	struct per_proc_info		*procinfo;
-	ppcbfr_t					*cbfr, *cbfr_cpu;
-	spl_t 						s;
-		
-#if MP_SAFE_CONSOLE
-
-/*
- *		Handle multiple CPU console output.
- *		Note: this thing has gotten god-awful complicated.  We need a better way.
- */
- 	
-
-	if(killprint) {		
-		return;													/* If printing is disabled, bail... */
-	}	
-	
-	s=splhigh();												/* Don't bother me */
-	procinfo = getPerProc();
-	cpu = procinfo->cpu_number;
-	cbfr = procinfo->pp_cbfr;
-
-	(void)hw_atomic_add(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */
-
-	ourbit = 1 << cpu;											/* Make a mask for just us */
-	if(debugger_cpu != -1) {									/* Are we in the debugger with empty buffers? */
-	
-		while(sconowner != cpu) {								/* Anyone but us? */
-			hw_compare_and_store(-1, cpu, &sconowner);	/* Try to mark it for us if idle */
-		}
-	
-		_cnputc( c);											/* Yeah, just write it */
-		if(c=='\n')												/* Did we just write a new line? */
-			_cnputc( '\r');										/* Yeah, just add a return */
-			
-		sconowner=-1;											/* Mark it idle */	
-		(void)hw_atomic_sub(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */
-		
-		splx(s);
-		return;													/* Leave... */
-	}
-
-	
-	while(ourbit&cbfpend);										/* We aren't "double buffered," so we'll just wait until the buffers are written */
-	isync();													/* Just in case we had to wait */
-	
-	if(c) {														/* If the character is not null */
-		cbfr->buffer[cbfr->pos]=c;							/* Fill in the buffer for our CPU */
-		cbfr->pos++;										/* Up the count */
-		if(cbfr->pos > 253) {								/* Is the buffer full? */
-			cbfr->buffer[254]='\n';							/* Yeah, set the second to last as a LF */
-			cbfr->buffer[255]='\r';							/* And the last to a CR */
-			cbfr->pos=256;									/* Push the buffer to the end */
-			c='\r';												/* Set character to a CR */
-		}
-	}
-	
-	if(c == '\n') {												/* Are we finishing a line? */
-		cbfr->buffer[cbfr->pos]='\r';							/* And the last to a CR */
-		cbfr->pos++;											/* Up the count */
-		c='\r';													/* Set character to a CR */
-	}
-
-#if 1
-	if(cbfr->echo == 1) {										/* Did we hit an escape last time? */
-		if(c == 'K') {											/* Is it a partial clear? */
-			cbfr->echo = 2;										/* Yes, enter echo mode */
-		}
-		else cbfr->echo = 0;									/* Otherwise reset escape */
-	}
-	else if(cbfr->echo == 0) {									/* Not in escape sequence, see if we should enter */
-		cbfr->echo = 1;											/* Set that we are in escape sequence */
-	}
-#endif
-
-	if((c == 0x00) || (c == '\r') || (cbfr->echo == 2)) {		/* Try to push out all buffers if we see CR or null */
-				
-		while(1) {												/* Loop until we see who's doing this */
-			oldpend=cbfpend;									/* Get the currentest pending buffer flags */
-			if(hw_compare_and_store(oldpend, oldpend|ourbit, &cbfpend))	/* Swap ours on if no change */
-				break;											/* Bail the loop if it worked */
-		}
-		
-		if(!hw_compare_and_store(-1, cpu, &sconowner)) {	/* See if someone else has this, and take it if not */
-			procinfo->debugger_holdoff = 0;						/* Allow debugger entry (this is a HACK) */
-			splx(s);											/* Let's take some 'rupts now */
-			return;												/* We leave here, 'cause another processor is already writing the buffers */
-		}
-				
-		while(1) {												/* Loop to dump out all of the finished buffers */
-			oldpend=cbfpend;									/* Get the most current finished buffers */
-			for(sccpu=0; sccpu<real_ncpus; sccpu++) {				/* Cycle through all CPUs buffers */
-				if ((PerProcTable[sccpu].ppe_vaddr == 0)
-				    || (PerProcTable[sccpu].ppe_vaddr->pp_cbfr == 0))
-					continue;
-
-				cbfr_cpu = PerProcTable[sccpu].ppe_vaddr->pp_cbfr;
-				
-				if(oldpend&(1<<sccpu)) {						/* Does this guy have a buffer to do? */
-
-#if 0
-					if(!cbfr_cpu->noprompt) {					/* Don't prompt if there was not CR before */
-						_cnputc( '{');	/* Mark CPU number */
-						_cnputc( '0'+sccpu);	/* Mark CPU number */
-						_cnputc( '.');	/* (TEST/DEBUG) */
-						_cnputc( '0'+cpu);	/* (TEST/DEBUG) */
-						_cnputc( '}');	/* Mark CPU number */
-						_cnputc( ' ');	/* Mark CPU number */
-					}
-#endif
-					
-					for(i=0; i<cbfr_cpu->pos; i++) {				/* Do the whole buffer */
-						_cnputc(cbfr_cpu->buffer[i]);	 			/* Write it */
-					}
-					
-					if(cbfr_cpu->buffer[cbfr_cpu->pos-1]!='\r') {	/* Was the last character a return? */
-						cbfr_cpu->noprompt = 1;						/* Remember not to prompt */
-					}
-					else {											/* Last was a return */
-						cbfr_cpu->noprompt = 0;						/* Otherwise remember to prompt */
-						cbfr_cpu->echo = 0;							/* And clear echo */
-					}
-						
-					cbfr_cpu->pos=0;								/* Reset the buffer pointer */
-		
-					while(!hw_compare_and_store(cbfpend, cbfpend&~(1<<sccpu), &cbfpend));	/* Swap it off */
-				}
-			}
-			sconowner=-1;										/* Set the writer to idle */
-			sync();												/* Insure that everything's done */
-			if(hw_compare_and_store(0, 0, &cbfpend)) break;	/* If there are no new buffers, we are done... */
-			if(!hw_compare_and_store(-1, cpu, &sconowner)) break;	/* If this isn't idle anymore, we're done */
-	
-		}
-	}
-	(void)hw_atomic_sub(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */
-	splx(s);													/* Let's take some 'rupts now */
-
-#else  /* MP_SAFE_CONSOLE */
-	_cnputc( c);
-	if (c == '\n')
-		_cnputc('\r');
-#endif  /* MP_SAFE_CONSOLE */
-
-}
-
-int
-cngetc(void)
-{
-	return cons_ops[cons_ops_index].getc(console_unit, console_chan,
-					     TRUE, FALSE);
-}
-
-int
-cnmaygetc(void)
-{
-	return cons_ops[cons_ops_index].getc(console_unit, console_chan,
-					     FALSE, FALSE);
-}
-
-
-int
-vcgetc(__unused int l, 
-       __unused int u, 
-       __unused boolean_t wait, 
-       __unused boolean_t raw)
-{
-	char c;
-
-	if( 0 == (*PE_poll_input)( 0, &c))
-		return( c);
-	else
-		return( 0);
-}
diff --git a/osfmk/console/ppc/video_scroll.s b/osfmk/console/ppc/video_scroll.s
deleted file mode 100644
index 77e4ffcfc..000000000
--- a/osfmk/console/ppc/video_scroll.s
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- * 
- */
-
-/* Routines to perform high-speed scrolling, assuming that the memory is
- * non-cached, and that the amount of memory to be scrolled is a multiple
- * of (at least) 16.
- */
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-
-/*
- * void video_scroll_up(unsigned long start,
- *		        unsigned long end,
- *		        unsigned long dest)
- */
-
-ENTRY(video_scroll_up, TAG_NO_FRAME_USED)
-
-			mfmsr	r0									/* Get the MSR */
-			mflr	r6									/* Get the LR */
-			ori		r7,r0,1<<(31-MSR_FP_BIT)			/* Turn on floating point */
-			stwu	r1,-(FM_SIZE+16)(r1)				/* Get space for a couple of registers on stack */
-			rlwinm	r7,r7,0,MSR_EE_BIT+1,MSR_EE_BIT-1	/* Turn off interrupts */
-			stw		r6,(FM_SIZE+16+FM_LR_SAVE)(r1)		/* Save the return */
-			
-			mtmsr	r7									/* Turn on FPU */
-			isync										/* Wait for it */
-			
-vsufpuon1:	stfd	f0,(FM_SIZE+0)(r1)					/* Save one register */
-			stfd	f1,(FM_SIZE+8)(r1)					/* and the second */
-
-/* ok, now we can use the FPU registers to do some fast copying
- */
-
-.L_vscr_up_loop:
-			lfd	f0,	0(r3)
-			lfd	f1,	8(r3)
-		
-			addi	r3,	r3,	16
-			
-			stfd	f0,	0(r5)
-		
-			cmpl	cr0,	r3,	r4
-		
-			stfd	f1,	8(r5)
-		
-			addi	r5,	r5,	16
-		
-			blt+	cr0,	.L_vscr_up_loop
-
-			lfd		f0,(FM_SIZE+0)(r1)					/* Load back one register */
-			lfd		f1,(FM_SIZE+8)(r1)					/* and the second */
-			lwz		r1,0(r1)							/* Pop the stack */
-		
-			mtmsr	r0									/* Turn off FPU again */
-			isync										/* Wait for it */
-			blr											/* Go away, don't bother me... */
-
-
-/*
- * void video_scroll_down(unsigned long start,   HIGH address to scroll from
- *		          unsigned long end,     LOW address 
- *		          unsigned long dest)    HIGH address
- */
-
-ENTRY(video_scroll_down, TAG_NO_FRAME_USED)
-
-	/* Save off the link register, we want to call fpu_save.
-	 */
-	
-
-			mfmsr	r0									/* Get the MSR */
-			mflr	r6									/* Get the LR */
-			ori		r7,r0,1<<(31-MSR_FP_BIT)			/* Turn on floating point */
-			stwu	r1,-(FM_SIZE+16)(r1)				/* Get space for a couple of registers on stack */
-			rlwinm	r7,r7,0,MSR_EE_BIT+1,MSR_EE_BIT-1	/* Turn off interrupts */
-			stw		r6,(FM_SIZE+16+FM_LR_SAVE)(r1)		/* Save the return */
-			
-			mtmsr	r7									/* Turn on FPU */
-			isync										/* Wait for it */
-			
-vsdfpuon1:	stfd	f0,(FM_SIZE+0)(r1)					/* Save one register */
-			stfd	f1,(FM_SIZE+8)(r1)					/* and the second */
-
-/* ok, now we can use the FPU registers to do some fast copying	 */
-
-.L_vscr_down_loop:
-			lfd	f0,	-16(r3)
-			lfd	f1,	-8(r3)
-		
-			subi	r3,	r3,	16
-			
-			stfd	f0,	-16(r5)
-		
-			cmpl	cr0,	r3,	r4
-		
-			stfd	f1,	-8(r5)
-		
-			subi	r5,	r5,	16
-		
-			bgt+	cr0,	.L_vscr_down_loop
-		
-
-			lfd		f0,(FM_SIZE+0)(r1)					/* Load back one register */
-			lfd		f1,(FM_SIZE+8)(r1)					/* and the second */
-			lwz		r1,0(r1)							/* Pop the stack */
-		
-			mtmsr	r0									/* Turn off FPU again */
-			isync										/* Wait for it */
-			blr											/* Go away, don't bother me... */
-
diff --git a/osfmk/console/serial_general.c b/osfmk/console/serial_general.c
index a2d78e79a..d51e98dab 100644
--- a/osfmk/console/serial_general.c
+++ b/osfmk/console/serial_general.c
@@ -80,7 +80,6 @@ serial_keyboard_poll(void)
 	int chr;
 	uint64_t next;
 
-
 	while(1) {
 		chr = _serial_getc(0, 1, 0, 1);	/* Get a character if there is one */
 		if(chr < 0) /* The serial buffer is empty */
diff --git a/osfmk/console/serial_protos.h b/osfmk/console/serial_protos.h
index 90b691f1d..99da75451 100644
--- a/osfmk/console/serial_protos.h
+++ b/osfmk/console/serial_protos.h
@@ -46,16 +46,17 @@ extern unsigned int disable_serial_output;
 
 int _serial_getc(int unit, int line, boolean_t wait, boolean_t raw);
 
-boolean_t console_is_serial(void);
-int switch_to_serial_console(void);
-int switch_to_video_console(void);
-void switch_to_old_console(int old_console);
-
 struct console_ops {
 	void	(*putc)(int, int, int);
 	int	(*getc)(int, int, boolean_t, boolean_t);
 };
 
+boolean_t console_is_serial(void);
+int switch_to_serial_console(void);
+int switch_to_video_console(void);
+void switch_to_old_console(int old_console);
+void console_set_serial_ops( struct console_ops *newops );
+
 #define SERIAL_CONS_OPS 0
 #define VC_CONS_OPS 1
 
diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c
index 4b088aa41..9c5460016 100644
--- a/osfmk/console/video_console.c
+++ b/osfmk/console/video_console.c
@@ -187,7 +187,7 @@ MACRO_END
 
 #define VCPUTC_LOCK_LOCK()				\
 MACRO_BEGIN						\
-	if (!hw_lock_to(&vcputc_lock, LockTimeOut*10))	\
+	if (!hw_lock_to(&vcputc_lock, hwLockTimeOut*10))\
 	{						\
 		panic("VCPUTC_LOCK_LOCK");		\
 	}						\
@@ -1274,7 +1274,7 @@ gc_update_color(int color, boolean_t fore)
 void
 vcputc(__unused int l, __unused int u, int c)
 {
-	if ( gc_enabled || debug_mode )
+	if ( gc_initialized && ( gc_enabled || debug_mode ) )
 	{
 		spl_t s;
 
@@ -2444,7 +2444,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
 
 #if defined(__x86_64__)
 		// Adjust the video buffer pointer to point to where it is in high virtual (above the hole)
-		new_vinfo.v_baseaddr |= VM_MIN_KERNEL_ADDRESS;
+		new_vinfo.v_baseaddr |= (VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK);
 #endif
 
 		/* Update the vinfo structure atomically with respect to the vc_progress task if running */
@@ -2632,6 +2632,10 @@ vcattach(void)
 
 		for ( index = 0 ; index < msgbufp->msg_bufx ; index++ )
 		{
+			if (msgbufp->msg_bufc[index] == '\0') {
+				continue;
+			}
+
 			vcputc( 0, 0, msgbufp->msg_bufc[index] );
 
 			if ( msgbufp->msg_bufc[index] == '\n' )
diff --git a/osfmk/ddb/db_command.c b/osfmk/ddb/db_command.c
index 13815b525..7e21b12b4 100644
--- a/osfmk/ddb/db_command.c
+++ b/osfmk/ddb/db_command.c
@@ -64,9 +64,6 @@
  * Command dispatcher.
  */
 #include <norma_vm.h>
-#ifdef	AT386
-#include <norma_scsi.h>
-#endif	/* AT386 */
 
 #include <mach/boolean.h>
 #include <string.h>
@@ -86,9 +83,6 @@
 #include <ddb/db_cond.h>
 #include <ddb/db_examine.h>
 #include <ddb/db_expr.h>
-#if defined(__ppc__)
-#include <ppc/db_low_trace.h>
-#endif
 #include <ddb/db_macro.h>
 #include <ddb/db_print.h>
 #include <ddb/db_run.h>
@@ -741,7 +735,6 @@ struct db_command db_command_table[] = {
 		.name = "reboot",
 		(db_func)db_reboot,
 	},
-#if !defined(__ppc__)	
 	{
 		.name = "ms",
 		.fcn = db_msr,
@@ -757,69 +750,6 @@ struct db_command db_command_table[] = {
 		.fcn = db_apic,
 		.flag = CS_MORE,
 	},
-#endif /* !__ppc__ */
-#if defined(__ppc__)	
-	{
-		.name = "lt",
-		.fcn = db_low_trace,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dl",
-		.fcn = db_display_long,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dc",
-		.fcn = db_display_char,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dv",
-		.fcn = db_display_virtual,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dm",
-		.fcn = db_display_mappings,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dh",
-		.fcn = db_display_hash,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dp",
-		.fcn = db_display_pmap,
-		.flag = CS_MORE,
-	},
-	{
-		.name = "ds",
-		.fcn = db_display_save,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "dx",
-		.fcn = db_display_xregs,
-		.flag = CS_MORE|CS_SET_DOT,
-	},
-	{
-		.name = "gs",
-		.fcn = db_gsnoop,
-		.flag = CS_MORE,
-	},
-	{
-		.name = "cm",
-		.fcn = db_check_mappings,
-		.flag = CS_MORE,
-	},
-	{
-		.name = "cp",
-		.fcn = db_check_pmaps,
-		.flag = CS_MORE,
-	},
-#endif /* __ppc__ */
 	{
 		.name = (const char *)NULL,
 	},
diff --git a/osfmk/ddb/db_print.c b/osfmk/ddb/db_print.c
index 1a6fac39b..d773823d0 100644
--- a/osfmk/ddb/db_print.c
+++ b/osfmk/ddb/db_print.c
@@ -278,7 +278,7 @@ db_print_act(
 		    db_printf("%s ID:   ACT     STAT  SW STACK    SHUTTLE", indent);
 		    db_printf("  SUS  PRI  WAIT_FUNC\n");
 		}
-		policy = ((athread && (athread->sched_mode&TH_MODE_TIMESHARE))? 1: 2);
+		policy = ((athread && (athread->sched_mode == TH_MODE_TIMESHARE))? 1: 2);
 		db_printf("%s%3d%c %0*X %s %s %0*X %0*X %3d %3d/%s ",
 		    indent, act_id,
 		    (thr_act == current_thread())? '#': ':',
diff --git a/osfmk/ddb/db_sym.c b/osfmk/ddb/db_sym.c
index b99f9674f..1e118054e 100644
--- a/osfmk/ddb/db_sym.c
+++ b/osfmk/ddb/db_sym.c
@@ -185,7 +185,7 @@ db_add_symbol_table(
 		st->map_pointer = 0;
 	else
 		st->map_pointer = map_pointer;
-	strcpy(st->name, name);
+	strlcpy(st->name, name, sizeof (st->name));
 	st->minsym = minsym;
 	st->maxsym = maxsym;
 	if (maxsym == 0)
@@ -1329,7 +1329,7 @@ db_clone_symtabXXX(
 	}
 
 	*st = *st_src;			/* bulk copy src -> dest	*/
-	strcpy(st->name, cloner);	/* new name			*/
+	strlcpy(st->name, cloner, sizeof (st->name));	/* new name	*/
 	st->private = memp;		/* copy symbols			*/
 	bcopy((const char *)st_src->private, st->private, size);
 	st->start = memp + sizeof(int);	/* fixup pointers to symtab	*/
diff --git a/osfmk/ddb/db_trap.c b/osfmk/ddb/db_trap.c
index 41acbd325..759649b82 100644
--- a/osfmk/ddb/db_trap.c
+++ b/osfmk/ddb/db_trap.c
@@ -132,11 +132,7 @@ db_task_trap(__unused int type, __unused int code, boolean_t user_space)
 		db_printf("\n\t");
 		db_print_inst(db_dot, task_space);
 #else /* !defined(__alpha) */
-#if defined(__ppc__)
-		db_print_loc_and_inst(db_dot, task_space);
-#else	/* __ppc__ */
 		db_print_loc_and_inst(db_dot, task);
-#endif	/* __ppc__ */
 #endif /* defined(__alpha) */
 	    } else
 		db_printf("Trouble printing location %#llX.\n", (unsigned long long)db_dot);
diff --git a/osfmk/ddb/db_variables.c b/osfmk/ddb/db_variables.c
index 0fe14d1e6..f30e5cad4 100644
--- a/osfmk/ddb/db_variables.c
+++ b/osfmk/ddb/db_variables.c
@@ -197,7 +197,7 @@ db_cmp_variable_name(struct db_variable *vp, const char *name,
 	    || (level > 0 && (ap->suffix[0] < vp->low 
 		  	      || (vp->high >= 0 && ap->suffix[0] > vp->high))))
 	    return(FALSE);
-	strcpy(ap->modif, (*np)? np+1: "");
+	strlcpy(ap->modif, (*np)? np+1: "", TOK_STRING_SIZE);
 	ap->thr_act = (db_option(ap->modif, 't')?db_default_act: THREAD_NULL);
 	ap->level = level;
 	ap->hidden_level = -1;
@@ -451,7 +451,7 @@ db_show_one_variable(void)
 		return;
 	    }
 
-	    strcpy(aux_param.modif, *p ? p + 1 : "");
+	    strlcpy(aux_param.modif, *p ? p + 1 : "", TOK_STRING_SIZE);
 	    aux_param.thr_act = (db_option(aux_param.modif, 't') ?
 			db_default_act : THREAD_NULL);
 	}
diff --git a/osfmk/ddb/db_variables.h b/osfmk/ddb/db_variables.h
index 8d0b1c817..3ff52cf16 100644
--- a/osfmk/ddb/db_variables.h
+++ b/osfmk/ddb/db_variables.h
@@ -171,7 +171,7 @@
  * auxiliary parameters passed to a variable handler
  */
 struct db_var_aux_param {
-	char		*modif;			/* option strings */
+	char		*modif;			/* option strings, must be TOK_STRING_SIZE */
 	short		level;			/* number of levels */
 	short		hidden_level;		/* hidden level */
 	short		suffix[DB_VAR_LEVEL];	/* suffix */
diff --git a/osfmk/ddb/makedis.c b/osfmk/ddb/makedis.c
index 59afa5290..a33bf216e 100644
--- a/osfmk/ddb/makedis.c
+++ b/osfmk/ddb/makedis.c
@@ -2371,9 +2371,10 @@ void *xmalloc(size_t size) {
 
 void *xstrdup(char *s) {
     char *p;
+    size_t i = strlen(s) + 1;
 
-    p = xmalloc(strlen(s) + 1);
-    strcpy(p, s);
+    p = xmalloc(i);
+    strlcpy(p, s, i);
     return p;
 }
 
diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c
index 20f3e361a..5b2ee7b41 100644
--- a/osfmk/default_pager/default_pager.c
+++ b/osfmk/default_pager/default_pager.c
@@ -396,6 +396,7 @@ default_pager_initialize(void)
 	vstruct_zone = zinit(sizeof(struct vstruct),
 			     10000 * sizeof(struct vstruct),
 			     8192, "vstruct zone");
+	zone_change(vstruct_zone, Z_CALLERACCT, FALSE);
 	zone_change(vstruct_zone, Z_NOENCRYPT, TRUE);
 
 	VSL_LOCK_INIT();
diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h
index d1c4883e1..904643741 100644
--- a/osfmk/default_pager/default_pager_internal.h
+++ b/osfmk/default_pager/default_pager_internal.h
@@ -87,12 +87,14 @@
 
 #define MACH_PORT_FACE mach_port_t
 
-#if 0
-#ifndef	USE_PRECIOUS
-#define	USE_PRECIOUS	TRUE
-#endif
+#if CONFIG_FREEZE
+#define	RECLAIM_SWAP	1
+#else
+#define	RECLAIM_SWAP	0
 #endif
 
+#define	USE_PRECIOUS	0
+
 #ifdef	USER_PAGER
 #define UP(stuff)	stuff
 #else	/* USER_PAGER */
@@ -737,6 +739,9 @@ extern boolean_t	bs_add_device(char *,
 				      MACH_PORT_FACE);
 extern vstruct_t	ps_vstruct_create(dp_size_t);
 extern void		ps_vstruct_dealloc(vstruct_t);
+extern void		ps_vstruct_reclaim(vstruct_t,
+					   boolean_t,
+					   boolean_t);
 extern kern_return_t	pvs_cluster_read(vstruct_t,
 					 dp_offset_t,
 					 dp_size_t,
diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c
index ceda0a902..9fcf6a2bd 100644
--- a/osfmk/default_pager/dp_backing_store.c
+++ b/osfmk/default_pager/dp_backing_store.c
@@ -163,11 +163,15 @@ unsigned int	maximum_pages_free = 0;
 ipc_port_t	min_pages_trigger_port = NULL;
 ipc_port_t	max_pages_trigger_port = NULL;
 
+#if CONFIG_FREEZE
+boolean_t	use_emergency_swap_file_first = TRUE;
+#else
 boolean_t	use_emergency_swap_file_first = FALSE;
+#endif
 boolean_t	bs_low = FALSE;
 int		backing_store_release_trigger_disable = 0;
 boolean_t	backing_store_stop_compaction = FALSE;
-
+boolean_t	backing_store_abort_compaction = FALSE;
 
 /* Have we decided if swap needs to be encrypted yet ? */
 boolean_t	dp_encryption_inited = FALSE;
@@ -176,7 +180,6 @@ boolean_t	dp_encryption = FALSE;
 
 boolean_t	dp_isssd = FALSE;
 
-
 /*
  * Object sizes are rounded up to the next power of 2,
  * unless they are bigger than a given maximum size.
@@ -205,6 +208,15 @@ unsigned  int	dp_pages_free = 0;
 unsigned  int	dp_pages_reserve = 0;
 unsigned  int	cluster_transfer_minimum = 100;
 
+/* 
+ * Trim state 
+ */
+struct ps_vnode_trim_data {
+	struct vnode *vp;
+	dp_offset_t   offset;
+	dp_size_t     length;
+};
+
 /* forward declarations */
 kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, int);	/* forward */
 kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, unsigned int *, int);	/* forward */
@@ -227,6 +239,10 @@ vs_map_t vs_get_map_entry(
 kern_return_t
 default_pager_backing_store_delete_internal( MACH_PORT_FACE );
 
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length);
+
 default_pager_thread_t *
 get_read_buffer( void )
 {
@@ -441,7 +457,7 @@ backing_store_lookup(
 
 	if ((port == MACH_PORT_NULL) || port_is_vs(port))
 */
-	if ((port == MACH_PORT_NULL))
+	if (port == MACH_PORT_NULL)
 		return BACKING_STORE_NULL;
 
 	BSL_LOCK();
@@ -714,6 +730,10 @@ ps_delete(
 	if ((vs_count != 0) && (vs != NULL))
 		vs->vs_async_pending += 1;  /* hold parties calling  */
 					    /* vs_async_wait */
+
+	if (bs_low == FALSE)
+		backing_store_abort_compaction = FALSE;
+
 	VS_UNLOCK(vs);
 	VSL_UNLOCK();
 	while((vs_count != 0) && (vs != NULL)) {
@@ -736,13 +756,19 @@ ps_delete(
 			vm_object_t	transfer_object;
 			unsigned int	count;
 			upl_t		upl;
+			int		upl_flags;
 
 			transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
 			count = 0;
+			upl_flags = (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
+				     UPL_SET_LITE | UPL_SET_INTERNAL);
+			if (dp_encryption) {
+				/* mark the pages as "encrypted" when they come in */
+				upl_flags |= UPL_ENCRYPT;
+			}
 			error = vm_object_upl_request(transfer_object, 
 				(vm_object_offset_t)0, VM_SUPER_CLUSTER,
-				&upl, NULL, &count,
-				UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_LITE | UPL_SET_INTERNAL);
+				&upl, NULL, &count, upl_flags);
 
 			if(error == KERN_SUCCESS) {
 				error = ps_vstruct_transfer_from_segment(
@@ -754,7 +780,7 @@ ps_delete(
 			}
 			vm_object_deallocate(transfer_object);
 		}
-		if(error || current_thread_aborted() || backing_store_stop_compaction) {
+		if(error || current_thread_aborted()) {
 			VS_LOCK(vs);
 			vs->vs_async_pending -= 1;  /* release vs_async_wait */
 			if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
@@ -1408,6 +1434,7 @@ ps_select_segment(
 					trigger = min_pages_trigger_port;
 					min_pages_trigger_port = NULL;
 					bs_low = TRUE;
+					backing_store_abort_compaction = TRUE;
 				}
 				lps = ps;
 			} 
@@ -1428,6 +1455,8 @@ ps_select_segment(
 		PSL_UNLOCK();
 
 		if (trigger != IP_NULL) {
+			dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
 			default_pager_space_alert(trigger, HI_WAT_ALERT);
 			ipc_port_release_send(trigger);
 		}
@@ -1497,6 +1526,8 @@ ps_select_segment(
 							minimum_pages_remaining)) {
 							trigger = min_pages_trigger_port;
 							min_pages_trigger_port = NULL;
+							bs_low = TRUE;
+							backing_store_abort_compaction = TRUE;
 						}
 						PS_UNLOCK(ps);
 						/*
@@ -1506,6 +1537,8 @@ ps_select_segment(
 						PSL_UNLOCK();
 						
 						if (trigger != IP_NULL) {
+							dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
 							default_pager_space_alert(
 								trigger,
 								HI_WAT_ALERT);
@@ -1592,10 +1625,14 @@ retry:
 				(dp_pages_free < minimum_pages_remaining)) {
 			trigger = min_pages_trigger_port;
 			min_pages_trigger_port = NULL;
+			bs_low = TRUE;
+			backing_store_abort_compaction = TRUE;
 		}
 		PSL_UNLOCK();
 		PS_UNLOCK(ps);
 		if (trigger != IP_NULL) {
+			dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
 			default_pager_space_alert(trigger, HI_WAT_ALERT);
 			ipc_port_release_send(trigger);
 		}
@@ -1688,9 +1725,12 @@ retry:
 			trigger = min_pages_trigger_port;
 			min_pages_trigger_port = NULL;
 			bs_low = TRUE;
+			backing_store_abort_compaction = TRUE;
 		}
 		PSL_UNLOCK();
 		if (trigger != IP_NULL) {
+			dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
 			default_pager_space_alert(trigger, HI_WAT_ALERT);
 			ipc_port_release_send(trigger);
 		}
@@ -1780,10 +1820,23 @@ ps_dealloc_vsmap(
 	dp_size_t	size)
 {
 	unsigned int i;
-	for (i = 0; i < size; i++)
-		if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
+	struct ps_vnode_trim_data trim_data;
+
+	ps_vnode_trim_init(&trim_data);
+
+	for (i = 0; i < size; i++) {
+		if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) {
+			ps_vnode_trim_more(&trim_data,
+					      &vsmap[i],
+					      VSM_PS(vsmap[i])->ps_clshift,
+					      vm_page_size << VSM_PS(vsmap[i])->ps_clshift);
 			ps_deallocate_cluster(VSM_PS(vsmap[i]),
 					      VSM_CLOFF(vsmap[i]));
+		} else {
+			ps_vnode_trim_now(&trim_data);
+		}
+	}
+	ps_vnode_trim_now(&trim_data);
 }
 
 void
@@ -1826,6 +1879,134 @@ ps_vstruct_dealloc(
 	zfree(vstruct_zone, vs);
 }
 
+void
+ps_vstruct_reclaim(
+	vstruct_t vs,
+	boolean_t return_to_vm,
+	boolean_t reclaim_backing_store)
+{
+	unsigned int	i, j;
+//	spl_t	s;
+	unsigned int	request_flags;
+	struct vs_map	*vsmap;
+	boolean_t	vsmap_all_clear, vsimap_all_clear;
+	struct vm_object_fault_info fault_info;
+	int		clmap_off;
+	unsigned int	vsmap_size;
+	kern_return_t	kr;
+
+	request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
+	if (reclaim_backing_store) {
+#if USE_PRECIOUS
+		request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE;
+#else	/* USE_PRECIOUS */
+		request_flags |= UPL_REQUEST_SET_DIRTY;
+#endif	/* USE_PRECIOUS */
+	}
+
+	VS_MAP_LOCK(vs);
+
+	fault_info.cluster_size = VM_SUPER_CLUSTER;
+	fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+	fault_info.user_tag = 0;
+	fault_info.lo_offset = 0;
+	fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift);
+	fault_info.io_sync = reclaim_backing_store;
+
+	/*
+	 * If this is an indirect structure, then we walk through the valid
+	 * (non-zero) indirect pointers and deallocate the clusters
+	 * associated with each used map entry (via ps_dealloc_vsmap).
+	 * When all of the clusters in an indirect block have been
+	 * freed, we deallocate the block.  When all of the indirect
+	 * blocks have been deallocated we deallocate the memory
+	 * holding the indirect pointers.
+	 */
+	if (vs->vs_indirect) {
+		vsimap_all_clear = TRUE;
+		for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
+			vsmap = vs->vs_imap[i];
+			if (vsmap == NULL)
+				continue;
+			/* loop on clusters in this indirect map */
+			clmap_off = (vm_page_size * CLMAP_ENTRIES *
+				     VSCLSIZE(vs) * i);
+			if (i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
+				vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
+			else
+				vsmap_size = CLMAP_ENTRIES;
+			vsmap_all_clear = TRUE;
+			if (return_to_vm) {
+				for (j = 0; j < vsmap_size;) {
+					if (VSM_ISCLR(vsmap[j]) ||
+					    VSM_ISERR(vsmap[j])) {
+						j++;
+						clmap_off += vm_page_size * VSCLSIZE(vs);
+						continue;
+					}
+					VS_MAP_UNLOCK(vs);
+					kr = pvs_cluster_read(
+						vs,
+						clmap_off,
+						(dp_size_t) -1, /* read whole cluster */
+						&fault_info);
+					VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+					if (kr != KERN_SUCCESS) {
+						vsmap_all_clear = FALSE;
+						vsimap_all_clear = FALSE;
+					}
+				}
+			}
+			if (vsmap_all_clear) {
+				ps_dealloc_vsmap(vsmap, CLMAP_ENTRIES);
+				kfree(vsmap, CLMAP_THRESHOLD);
+				vs->vs_imap[i] = NULL;
+			}
+		}
+		if (vsimap_all_clear) {
+//			kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
+		}
+	} else {
+		/*
+		 * Direct map.  Free used clusters, then memory.
+		 */
+		vsmap = vs->vs_dmap;
+		if (vsmap == NULL) {
+			goto out;
+		}
+		vsmap_all_clear = TRUE;
+		/* loop on clusters in the direct map */
+		if (return_to_vm) {
+			for (j = 0; j < vs->vs_size;) {
+				if (VSM_ISCLR(vsmap[j]) ||
+				    VSM_ISERR(vsmap[j])) {
+					j++;
+					continue;
+				}
+				clmap_off = vm_page_size * (j << vs->vs_clshift);
+				VS_MAP_UNLOCK(vs);
+				kr = pvs_cluster_read(
+					vs,
+					clmap_off,
+					(dp_size_t) -1, /* read whole cluster */
+					&fault_info);
+				VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+				if (kr != KERN_SUCCESS) {
+					vsmap_all_clear = FALSE;
+				} else {
+//					VSM_CLR(vsmap[j]);
+				}
+			}
+		}
+		if (vsmap_all_clear) {
+			ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
+//			kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
+		}
+	}
+out:
+	VS_MAP_UNLOCK(vs);
+}
+
 int ps_map_extend(vstruct_t, unsigned int);	/* forward */
 
 int ps_map_extend(
@@ -2156,6 +2337,9 @@ ps_clunmap(
 {
 	dp_offset_t		cluster; /* The cluster number of offset */
 	struct vs_map		*vsmap;
+	struct ps_vnode_trim_data trim_data;
+
+	ps_vnode_trim_init(&trim_data);
 
 	VS_MAP_LOCK(vs);
 
@@ -2173,11 +2357,13 @@ ps_clunmap(
 		else
 			vsmap = vs->vs_dmap;
 		if (vsmap == NULL) {
+			ps_vnode_trim_now(&trim_data);
 			VS_MAP_UNLOCK(vs);
 			return;
 		}
 		vsmap += cluster%CLMAP_ENTRIES;
 		if (VSM_ISCLR(*vsmap)) {
+			ps_vnode_trim_now(&trim_data);
 			length -= vm_page_size;
 			offset += vm_page_size;
 			continue;
@@ -2206,12 +2392,19 @@ ps_clunmap(
 		/*
 		 * If map entry is empty, clear and deallocate cluster.
 		 */
-		if (!VSM_ALLOC(*vsmap)) {
+		if (!VSM_BMAP(*vsmap)) {
+			ps_vnode_trim_more(&trim_data, 
+					      vsmap,
+					      vs->vs_clshift,
+					      VSCLSIZE(vs) * vm_page_size);
 			ps_deallocate_cluster(VSM_PS(*vsmap),
 					      VSM_CLOFF(*vsmap));
 			VSM_CLR(*vsmap);
+		} else {
+			ps_vnode_trim_now(&trim_data);
 		}
 	}
+	ps_vnode_trim_now(&trim_data);
 
 	VS_MAP_UNLOCK(vs);
 }
@@ -2670,16 +2863,31 @@ pvs_object_data_provided(
 	ASSERT(size > 0);
 	GSTAT(global_stats.gs_pages_in += atop_32(size));
 
-
-#if	USE_PRECIOUS
-	ps_clunmap(vs, offset, size);
-#endif	/* USE_PRECIOUS */
+/* check upl iosync flag instead of using RECLAIM_SWAP*/
+#if	RECLAIM_SWAP
+	if (size != upl->size) {
+		upl_abort(upl, UPL_ABORT_ERROR);
+		upl_deallocate(upl);
+	} else {
+		ps_clunmap(vs, offset, size);
+		upl_commit(upl, NULL, 0);
+		upl_deallocate(upl);
+	}
+#endif	/* RECLAIM_SWAP */
 
 }
 
 static memory_object_offset_t   last_start;
 static vm_size_t		last_length;
 
+/*
+ * A "cnt" of 0 means that the caller just wants to check if the page at
+ * offset "vs_offset" exists in the backing store.  That page hasn't been
+ * prepared, so no need to release it.
+ *
+ * A "cnt" of -1 means that the caller wants to bring back from the backing
+ * store all existing pages in the cluster containing "vs_offset".
+ */
 kern_return_t
 pvs_cluster_read(
 	vstruct_t	vs,
@@ -2707,16 +2915,32 @@ pvs_cluster_read(
 	memory_object_offset_t	cluster_start;
 	vm_size_t		cluster_length;
 	uint32_t		io_streaming;
+	int			i;
+	boolean_t		io_sync = FALSE;
 
 	pages_in_cl = 1 << vs->vs_clshift;
 	cl_size = pages_in_cl * vm_page_size;
 	cl_mask = cl_size - 1;
 
-#if	USE_PRECIOUS
-	request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
-#else
-	request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
-#endif
+	request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
+
+	if (cnt == (dp_size_t) -1) {
+		/*
+		 * We've been called from ps_vstruct_reclaim() to move all
+		 * the object's swapped pages back to VM pages.
+		 * This can put memory pressure on the system, so we do want
+		 * to wait for free pages, to avoid getting in the way of the
+		 * vm_pageout_scan() thread.
+		 * Let's not use UPL_NOBLOCK in this case.
+		 */
+		vs_offset &= ~cl_mask;
+		i = pages_in_cl;
+	} else {
+		i = 1;
+		request_flags |= UPL_NOBLOCK;
+	}
+
+again:
 	cl_index = (vs_offset & cl_mask) / vm_page_size;
 
         if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (dp_offset_t)-1) ||
@@ -2735,6 +2959,16 @@ pvs_cluster_read(
 			 */
 			return KERN_FAILURE;
 		}
+		if (cnt == (dp_size_t) -1) {
+			i--;
+			if (i == 0) {
+				/* no more pages in this cluster */
+				return KERN_FAILURE;
+			}
+			/* try the next page in this cluster */
+			vs_offset += vm_page_size;
+			goto again;
+		}
 
 		page_list_count = 0;
 
@@ -2762,6 +2996,24 @@ pvs_cluster_read(
 		return KERN_SUCCESS;
 	}
 		
+	if(((vm_object_fault_info_t)fault_info)->io_sync == TRUE ) {
+		io_sync = TRUE;
+	} else {
+#if RECLAIM_SWAP
+		io_sync = TRUE;
+#endif	/* RECLAIM_SWAP */
+	}
+
+	if( io_sync == TRUE ) {
+
+		io_flags |= UPL_IOSYNC | UPL_NOCOMMIT;
+#if USE_PRECIOUS
+		request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE;
+#else	/* USE_PRECIOUS */
+		request_flags |= UPL_REQUEST_SET_DIRTY;
+#endif	/* USE_PRECIOUS */
+	}
+
 	assert(dp_encryption_inited);
 	if (dp_encryption) {
 		/*
@@ -2770,6 +3022,7 @@ pvs_cluster_read(
 		 * decryption.
 		 */
 		request_flags |= UPL_ENCRYPT;
+		io_flags |= UPL_PAGING_ENCRYPTED;
 	}
 	orig_vs_offset = vs_offset;
 
@@ -2970,7 +3223,7 @@ pvs_cluster_read(
 			memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
 							xfer_size, xfer_size, 
 							&upl, NULL, &page_list_count,
-							request_flags | UPL_SET_INTERNAL | UPL_NOBLOCK);
+							request_flags | UPL_SET_INTERNAL);
 
 			error = ps_read_file(psp[beg_pseg], 
 					     upl, (upl_offset_t) 0, 
@@ -3091,15 +3344,33 @@ vs_cluster_write(
 	boolean_t	minimal_clustering = FALSE;
 	boolean_t	found_dirty;
 
+	if (!dp_encryption_inited) {
+		/*
+		 * ENCRYPTED SWAP:
+		 * Once we've started using swap, we
+		 * can't change our mind on whether
+		 * it needs to be encrypted or
+		 * not.
+		 */
+		dp_encryption_inited = TRUE;
+	}
+	if (dp_encryption) {
+		/*
+		 * ENCRYPTED SWAP:
+		 * the UPL will need to be encrypted...
+		 */
+		flags |= UPL_PAGING_ENCRYPTED;
+	}
+
 	pages_in_cl = 1 << vs->vs_clshift;
 	cl_size = pages_in_cl * vm_page_size;
 	
 #if CONFIG_FREEZE
 	minimal_clustering = TRUE;
-#endif
+#else
 	if (dp_isssd == TRUE)
 		minimal_clustering = TRUE;
-
+#endif
 	if (!dp_internal) {
 		unsigned int page_list_count;
 		int	     request_flags;
@@ -3124,16 +3395,6 @@ vs_cluster_write(
 			        UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | 
 				UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE;
 
-		if (!dp_encryption_inited) {
-			/*
-			 * ENCRYPTED SWAP:
-			 * Once we've started using swap, we
-			 * can't change our mind on whether
-			 * it needs to be encrypted or
-			 * not.
-			 */
-			dp_encryption_inited = TRUE;
-		}
 		if (dp_encryption) {
 			/*
 			 * ENCRYPTED SWAP:
@@ -3143,6 +3404,7 @@ vs_cluster_write(
 			request_flags |= UPL_ENCRYPT;
 			flags |= UPL_PAGING_ENCRYPTED;
 		}
+
 		page_list_count = 0;
 		memory_object_super_upl_request(vs->vs_control,
 				(memory_object_offset_t)offset,
@@ -3168,6 +3430,7 @@ vs_cluster_write(
 		found_dirty = TRUE;
 
 		for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) {
+
 			unsigned int	seg_pgcnt;
 
 			seg_pgcnt = seg_size / PAGE_SIZE;
@@ -3208,7 +3471,7 @@ vs_cluster_write(
 				page_index += seg_pgcnt;
 			        transfer_size -= seg_size;
 				upl_offset_aligned += cl_size;
-				seg_size    = cl_size;
+				seg_size = cl_size;
 				seg_index++;
 			} else
 			        transfer_size = 0;
@@ -3588,6 +3851,14 @@ vs_changed:
 				vs->vs_xfer_pending = FALSE;
 				VS_UNLOCK(vs);
 				vs_finish_write(vs);
+
+				if (backing_store_abort_compaction || backing_store_stop_compaction) {
+					backing_store_abort_compaction = FALSE;
+					dprintf(("ps_vstruct_transfer_from_segment - ABORTED\n"));
+					return KERN_FAILURE;
+				}
+				vnode_pager_throttle();
+
 				VS_LOCK(vs);
 				vs->vs_xfer_pending = TRUE;
 				vs_wait_for_sync_writers(vs);
@@ -3810,7 +4081,7 @@ vs_cluster_transfer(
 			/* NEED TO ISSUE WITH SYNC & NO COMMIT */
 			error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset, 
 					size, &residual, 
-					(UPL_IOSYNC | UPL_NOCOMMIT));
+					(UPL_IOSYNC | UPL_NOCOMMIT | (dp_encryption ? UPL_PAGING_ENCRYPTED : 0)));
 		}
 
 		read_vsmap = *vsmap_ptr;
@@ -4028,12 +4299,17 @@ default_pager_add_file(
 	 * emergency segment will be back to its original state of
 	 * online but not activated (till it's needed the next time).
 	 */
-	ps = paging_segments[EMERGENCY_PSEG_INDEX];
-	if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) {
-		if(default_pager_backing_store_delete(emergency_segment_backing_store)) {
-			dprintf(("Failed to recover emergency paging segment\n"));
-		} else {
-			dprintf(("Recovered emergency paging segment\n"));
+#if CONFIG_FREEZE
+	if (!vm_freeze_enabled)
+#endif
+	{
+		ps = paging_segments[EMERGENCY_PSEG_INDEX];
+		if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) {
+			if(default_pager_backing_store_delete(emergency_segment_backing_store)) {
+				dprintf(("Failed to recover emergency paging segment\n"));
+			} else {
+				dprintf(("Recovered emergency paging segment\n"));
+			}
 		}
 	}
 	
@@ -4123,6 +4399,49 @@ ps_write_file(
 	return result;
 }
 
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data)
+{
+#if CONFIG_EMBEDDED
+	data->vp = NULL;
+	data->offset = 0;
+	data->length = 0;
+#else
+#pragma unused(data)
+#endif
+}
+
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data)
+{
+#if CONFIG_EMBEDDED
+	if ((data->vp) != NULL) {
+		vnode_trim(data->vp,
+				   data->offset,
+				   data->length);
+		ps_vnode_trim_init(data); 
+	}
+#else
+#pragma unused(data)
+#endif
+}
+
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length)
+{
+#if CONFIG_EMBEDDED
+	struct vnode *vp = VSM_PS(*map)->ps_vnode;
+	dp_offset_t offset = ptoa_32(VSM_CLOFF(*map)) << shift;
+
+	if ((vp != data->vp) || (offset) != (data->offset + data->length)) {
+		ps_vnode_trim_now(data);
+		data->vp = vp;
+		data->offset = offset;
+		data->length = 0;
+	}
+	data->length += (length);
+#else
+#pragma unused(data, map, shift, length)
+#endif
+}
+
 kern_return_t
 default_pager_triggers( __unused MACH_PORT_FACE default_pager,
 	int		hi_wat,
@@ -4130,7 +4449,7 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager,
 	int		flags,
 	MACH_PORT_FACE  trigger_port)
 {
-	MACH_PORT_FACE release;
+	MACH_PORT_FACE release = IPC_PORT_NULL;
 	kern_return_t kr;
 	clock_sec_t now;
 	clock_nsec_t nanoseconds_dummy;
@@ -4159,15 +4478,42 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager,
 		}
 	} else if (flags == HI_WAT_ALERT) {
 		release = min_pages_trigger_port;
-		min_pages_trigger_port = trigger_port;
-		minimum_pages_remaining = hi_wat/vm_page_size;
-		bs_low = FALSE;
-		kr = KERN_SUCCESS;
+#if CONFIG_FREEZE
+		/* High and low water signals aren't applicable when freeze is */
+		/* enabled, so release the trigger ports here and return       */
+		/* KERN_FAILURE.                                               */
+		if (vm_freeze_enabled) {
+			if (IP_VALID( trigger_port )){
+				ipc_port_release_send( trigger_port );
+			}
+			min_pages_trigger_port = IPC_PORT_NULL;
+			kr = KERN_FAILURE;
+		}
+		else
+#endif
+		{
+			min_pages_trigger_port = trigger_port;
+			minimum_pages_remaining = hi_wat/vm_page_size;
+			bs_low = FALSE;
+			kr = KERN_SUCCESS;
+		}
 	} else if (flags ==  LO_WAT_ALERT) {
 		release = max_pages_trigger_port;
-		max_pages_trigger_port = trigger_port;
-		maximum_pages_free = lo_wat/vm_page_size;
-		kr = KERN_SUCCESS;
+#if CONFIG_FREEZE
+		if (vm_freeze_enabled) {
+			if (IP_VALID( trigger_port )){
+				ipc_port_release_send( trigger_port );
+			}
+			max_pages_trigger_port = IPC_PORT_NULL;
+			kr = KERN_FAILURE;
+		}
+		else
+#endif
+		{
+			max_pages_trigger_port = trigger_port;
+			maximum_pages_free = lo_wat/vm_page_size;
+			kr = KERN_SUCCESS;
+		}
 	} else if (flags == USE_EMERGENCY_SWAP_FILE_FIRST) {
 		use_emergency_swap_file_first = TRUE;
 		release = trigger_port;
@@ -4259,6 +4605,8 @@ default_pager_backing_store_monitor(__unused thread_call_param_t p1,
 		} else {
 			VSL_UNLOCK();
 		}
+		dprintf(("default_pager_backing_store_monitor - send LO_WAT_ALERT\n"));
+
 		default_pager_space_alert(trigger, LO_WAT_ALERT);
 		ipc_port_release_send(trigger);
 		dp_pages_free_low_count = 0;
@@ -4267,3 +4615,9 @@ default_pager_backing_store_monitor(__unused thread_call_param_t p1,
 	clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
 	thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline);
 }
+
+#if CONFIG_FREEZE
+unsigned int default_pager_swap_pages_free() {
+	return dp_pages_free;
+}
+#endif
diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c
index c85278056..e122e7711 100644
--- a/osfmk/default_pager/dp_memory_object.c
+++ b/osfmk/default_pager/dp_memory_object.c
@@ -369,6 +369,7 @@ const struct memory_object_pager_ops default_pager_ops = {
 	dp_memory_object_synchronize,
 	dp_memory_object_map,
 	dp_memory_object_last_unmap,
+	dp_memory_object_data_reclaim,
 	"default pager"
 };
 
@@ -431,6 +432,33 @@ dp_memory_object_last_unmap(
 	return KERN_FAILURE;
 }
 
+kern_return_t
+dp_memory_object_data_reclaim(
+	memory_object_t		mem_obj,
+	boolean_t		reclaim_backing_store)
+{
+	vstruct_t		vs;
+
+	vs_lookup(mem_obj, vs);
+	for (;;) {
+		vs_lock(vs);
+		vs_async_wait(vs);
+		if (!vs->vs_xfer_pending) {
+			break;
+		}
+	}
+	vs->vs_xfer_pending = TRUE;
+	vs_unlock(vs);
+
+	ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store);
+
+	vs_lock(vs);
+	vs->vs_xfer_pending = FALSE;
+	vs_unlock(vs);
+
+	return KERN_SUCCESS;
+}
+
 kern_return_t
 dp_memory_object_terminate(
 	memory_object_t		mem_obj)
diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs
index 5410b050e..2e39dc559 100644
--- a/osfmk/device/device.defs
+++ b/osfmk/device/device.defs
@@ -215,12 +215,14 @@ routine io_registry_entry_get_parent_iterator(
 	out iterator		: io_object_t
 	);
 
-routine io_service_open(
+skip;
+/* was routine io_service_open
 	    service		: io_object_t;
 	in  owningTask		: task_t;
 	in  connect_type	: uint32_t;
 	out connection		: io_connect_t
 	);
+*/
 
 routine io_service_close(
 	    connection		: io_connect_t
@@ -599,8 +601,8 @@ routine io_connect_method(
 	in  ool_input		: mach_vm_address_t;
 	in  ool_input_size	: mach_vm_size_t;
 
-	out scalar_output	: io_scalar_inband64_t, CountInOut;
 	out inband_output	: io_struct_inband_t, CountInOut;
+	out scalar_output	: io_scalar_inband64_t, CountInOut;
 	in  ool_output		: mach_vm_address_t;
 	inout ool_output_size	: mach_vm_size_t
 	);
@@ -616,8 +618,8 @@ routine io_connect_async_method(
 	in  ool_input		: mach_vm_address_t;
 	in  ool_input_size	: mach_vm_size_t;
 
-	out scalar_output	: io_scalar_inband64_t, CountInOut;
 	out inband_output	: io_struct_inband_t, CountInOut;
+	out scalar_output	: io_scalar_inband64_t, CountInOut;
 	in  ool_output		: mach_vm_address_t;
 	inout  ool_output_size	: mach_vm_size_t
 	);
diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c
index 5990a3e5c..5c5f8b742 100644
--- a/osfmk/device/iokit_rpc.c
+++ b/osfmk/device/iokit_rpc.c
@@ -63,9 +63,6 @@
 
 #include <machine/machparam.h>
 
-#ifdef __ppc__
-#include <ppc/mappings.h>
-#endif
 #if defined(__i386__) || defined(__x86_64__)
 #include <i386/pmap.h>
 #endif
@@ -449,13 +446,16 @@ unsigned int IODefaultCacheBits(addr64_t pa)
 kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t pa,
 			mach_vm_size_t length, unsigned int options)
 {
-    vm_prot_t	prot;
+    vm_prot_t	 prot;
     unsigned int flags;
+    ppnum_t	 pagenum;
     pmap_t 	 pmap = map->pmap;
 
     prot = (options & kIOMapReadOnly)
 		? VM_PROT_READ : (VM_PROT_READ|VM_PROT_WRITE);
 
+    pagenum = (ppnum_t)atop_64(pa);
+
     switch(options & kIOMapCacheMask ) {			/* What cache mode do we need? */
 
 	case kIOMapDefaultCache:
@@ -480,8 +480,13 @@ kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t p
 	    break;
     }
 
+    pmap_set_cache_attributes(pagenum, flags);
+
+    vm_map_set_cache_attr(map, (vm_map_offset_t)va);
+
+
     // Set up a block mapped area
-    pmap_map_block(pmap, va, (ppnum_t)atop_64(pa), (uint32_t) atop_64(round_page_64(length)), prot, flags, 0);
+    pmap_map_block(pmap, va, pagenum, (uint32_t) atop_64(round_page_64(length)), prot, 0, 0);
 
     return( KERN_SUCCESS );
 }
@@ -498,10 +503,6 @@ kern_return_t IOUnmapPages(vm_map_t map, mach_vm_address_t va, mach_vm_size_t le
 kern_return_t IOProtectCacheMode(vm_map_t __unused map, mach_vm_address_t __unused va,
 					mach_vm_size_t __unused length, unsigned int __unused options)
 {
-#if __ppc__
-    // can't remap block mappings, but ppc doesn't speculatively read from WC
-#else
-
     mach_vm_size_t off;
     vm_prot_t	   prot;
     unsigned int   flags;
@@ -542,31 +543,25 @@ kern_return_t IOProtectCacheMode(vm_map_t __unused map, mach_vm_address_t __unus
 	    pmap_enter(pmap, va + off, ppnum, prot, flags, TRUE);
     }
 
-#endif
-
     return (KERN_SUCCESS);
 }
 
 ppnum_t IOGetLastPageNumber(void)
 {
-    ppnum_t	 lastPage, highest = 0;
-    unsigned int idx;
-
-#if __ppc__
-    for (idx = 0; idx < pmap_mem_regions_count; idx++)
-    {
-	lastPage = pmap_mem_regions[idx].mrEnd;
-#elif __i386__ || __x86_64__
-    for (idx = 0; idx < pmap_memory_region_count; idx++)
-    {
-	lastPage = pmap_memory_regions[idx].end - 1;
+#if __i386__ || __x86_64__
+	ppnum_t	 lastPage, highest = 0;
+	unsigned int idx;
+
+	for (idx = 0; idx < pmap_memory_region_count; idx++)
+	{
+		lastPage = pmap_memory_regions[idx].end - 1;
+		if (lastPage > highest)
+			highest = lastPage;
+	}
+	return (highest);
 #else
-#error arch
+#error unknown arch
 #endif
-	if (lastPage > highest)
-	    highest = lastPage;
-    }
-    return (highest);
 }
 
 
diff --git a/osfmk/device/subrs.c b/osfmk/device/subrs.c
index b9aafe509..105edff0f 100644
--- a/osfmk/device/subrs.c
+++ b/osfmk/device/subrs.c
@@ -261,7 +261,7 @@ strncasecmp(const char *s1, const char *s2, size_t n)
  * Deprecation Warning: 
  *	strcpy() is being deprecated. Please use strlcpy() instead.
  */
-
+#if !CONFIG_EMBEDDED
 char *
 strcpy(
         char *to,
@@ -274,7 +274,7 @@ strcpy(
 
         return ret;
 }
-
+#endif
 
 /*
  * Abstract:
@@ -428,6 +428,7 @@ itoa(
  * Deprecation Warning:
  *	strcat() is being deprecated. Please use strlcat() instead.
  */
+#if !CONFIG_EMBEDDED
 char *
 strcat(
 	char *dest,
@@ -441,6 +442,7 @@ strcat(
 		;
 	return (old);
 }
+#endif
 
 /*
  * Appends src to string dst of size siz (unlike strncat, siz is the
@@ -535,7 +537,7 @@ strlcpy(char *dst, const char *src, size_t siz)
  *              one should use FREE() with the allocated buffer.
  *
  */
-inline char *
+char *
 STRDUP(const char *string, int type)
 {
 	size_t len;
diff --git a/osfmk/gssd/Makefile b/osfmk/gssd/Makefile
index 2f7167424..bda924f4b 100644
--- a/osfmk/gssd/Makefile
+++ b/osfmk/gssd/Makefile
@@ -8,14 +8,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 =
 
 EXPINC_SUBDIRS =
 
-EXPINC_SUBDIRS_PPC =
-
 EXPINC_SUBDIRS_I386 =
 
 MIG_DEFS = gssd_mach.defs
diff --git a/osfmk/gssd/gssd_mach.defs b/osfmk/gssd/gssd_mach.defs
index 014785f3a..abe5ffe08 100644
--- a/osfmk/gssd/gssd_mach.defs
+++ b/osfmk/gssd/gssd_mach.defs
@@ -32,16 +32,18 @@
 #ifdef KERNEL
 import <gssd/gssd_mach_types.h>;
 #else
-import <gssd_mach_types.h>;
+import <System/gssd/gssd_mach_types.h>;
 #endif
 
-type mechtype = int32_t;
-type string_t = c_string[*:1024];
-type byte_buffer = array [] of uint8_t;
+type gssd_mechtype = int32_t;
+type gssd_nametype = int32_t;
+type gssd_string = c_string[*:1024]; /* MAX_PRINC_STR must be < 1024 */
+type gssd_dstring = c_string[*:128]; /* MAX_DISPLAY_STR must be < 128 */
+type gssd_byte_buffer = array [] of uint8_t;
 type gssd_verifier =  uint64_t;
-type gid_list = array [*:16] of uint32_t;
-type gss_ctx = uint64_t;
-type gss_cred = uint64_t;
+type gssd_gid_list = array [*:16] of uint32_t;
+type gssd_ctx = uint64_t;
+type gssd_cred = uint64_t;
 
 subsystem
 #if KERNEL_USER
@@ -53,43 +55,99 @@ serverprefix svc_;
 
 routine mach_gss_init_sec_context(
 	server			: mach_port_t;
-	in  mech		: mechtype;
-	in  intoken		: byte_buffer;
+	in  mech		: gssd_mechtype;
+	in  intoken		: gssd_byte_buffer;
 	in  uid			: uint32_t;
-	in  princ_namestr	: string_t;
-	in  svc_namestr		: string_t;
+	in  princ_namestr	: gssd_string;
+	in  svc_namestr		: gssd_string;
 	in  flags		: uint32_t;
 	in  gssd_flags		: uint32_t;
-	inout context		: gss_ctx;
-	inout cred_handle	: gss_cred;
+	inout context		: gssd_ctx;
+	inout cred_handle	: gssd_cred;
 	out  ret_flags		: uint32_t;
-	out  key		: byte_buffer, dealloc;
-	out outtoken		: byte_buffer, dealloc;
+	out  key		: gssd_byte_buffer, dealloc;
+	out outtoken		: gssd_byte_buffer, dealloc;
 	out major_stat		: uint32_t;
 	out minor_stat		: uint32_t
 );
 
 routine mach_gss_accept_sec_context(
 	server			: mach_port_t;
-	in  intoken		: byte_buffer;
-	in  svc_namestr		: string_t;
+	in  intoken		: gssd_byte_buffer;
+	in  svc_namestr		: gssd_string;
 	in  gssd_flags		: uint32_t;
-	inout context		: gss_ctx;
-	inout cred_handle	: gss_cred;
+	inout context		: gssd_ctx;
+	inout cred_handle	: gssd_cred;
 	out flags		: uint32_t;
 	out uid			: uint32_t;
-	out gids		: gid_list;
-	out  key		: byte_buffer, dealloc;
-	out outtoken		: byte_buffer, dealloc;
+	out gids		: gssd_gid_list;
+	out key			: gssd_byte_buffer, dealloc;
+	out outtoken		: gssd_byte_buffer, dealloc;
 	out major_stat		: uint32_t;
 	out minor_stat		: uint32_t
 );
 
 simpleroutine mach_gss_log_error(
 	server			: mach_port_t;
-	in  mnt			: string_t;
+	in  mnt			: gssd_string;
 	in  uid			: uint32_t;
-	in  source		: string_t;
+	in  source		: gssd_string;
 	in  major_stat		: uint32_t;
 	in  minor_stat		: uint32_t
 );
+
+routine mach_gss_init_sec_context_v2(
+	server			: mach_port_t;
+	in  mech		: gssd_mechtype;
+	in  intoken		: gssd_byte_buffer;
+	in  uid			: uint32_t;
+	in  clnt_nt		: gssd_nametype;
+	in  clnt_princ		: gssd_byte_buffer;
+	in  svc_nt		: gssd_nametype;
+	in  svc_princ		: gssd_byte_buffer;
+	in  flags		: uint32_t;
+	inout gssd_flags	: uint32_t;
+	inout context		: gssd_ctx;
+	inout cred_handle	: gssd_cred;
+	out  ret_flags		: uint32_t;
+	out  key		: gssd_byte_buffer, dealloc;
+	out outtoken		: gssd_byte_buffer, dealloc;
+	out displayname		: gssd_dstring;
+	out major_stat		: uint32_t;
+	out minor_stat		: uint32_t
+);
+
+routine mach_gss_accept_sec_context_v2(
+	server			: mach_port_t;
+	in  intoken		: gssd_byte_buffer;
+	in  svc_nt		: gssd_nametype;
+	in  svc_princ		: gssd_byte_buffer;
+	inout gssd_flags	: uint32_t;
+	inout context		: gssd_ctx;
+	inout cred_handle	: gssd_cred;
+	out flags		: uint32_t;
+	out uid			: uint32_t;
+	out gids		: gssd_gid_list;
+	out key			: gssd_byte_buffer, dealloc;
+	out outtoken		: gssd_byte_buffer, dealloc;
+	out major_stat		: uint32_t;
+	out minor_stat		: uint32_t
+);
+
+routine mach_gss_hold_cred(
+	server			: mach_port_t;
+	in  mech		: gssd_mechtype;
+	in  nt			: gssd_nametype;
+	in  princ		: gssd_byte_buffer;
+	out major_stat		: uint32_t;
+	out minor_stat		: uint32_t
+);
+
+routine mach_gss_unhold_cred(
+	server			: mach_port_t;
+	in  mech		: gssd_mechtype;
+	in  nt			: gssd_nametype;
+	in  princ		: gssd_byte_buffer;
+	out major_stat		: uint32_t;
+	out minor_stat		: uint32_t
+);
diff --git a/osfmk/gssd/gssd_mach_types.h b/osfmk/gssd/gssd_mach_types.h
index c091cc3ef..e3bde951a 100644
--- a/osfmk/gssd/gssd_mach_types.h
+++ b/osfmk/gssd/gssd_mach_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2006, 2008, 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -29,34 +29,41 @@
 #ifndef _GSSD_MACH_TYPES_H_
 #define _GSSD_MACH_TYPES_H_
 
-typedef enum mechtype { DEFAULT_MECH = 0, KRB5_MECH = 0, SPNEGO_MECH } mechtype;
-typedef char *string_t;
-typedef uint8_t *byte_buffer;
-typedef uint32_t *gid_list;
-typedef uint64_t gss_ctx;
-typedef uint64_t gss_cred;
+#define MAX_DISPLAY_STR 128
+#define MAX_PRINC_STR 1024
+
+typedef enum gssd_mechtype { GSSD_NO_MECH = -1, GSSD_KRB5_MECH = 0,
+				GSSD_SPNEGO_MECH, GSSD_NTLM_MECH } gssd_mechtype;
+typedef enum gssd_nametype { GSSD_STRING_NAME = 0, GSSD_EXPORT, 
+				GSSD_ANONYMOUS, GSSD_HOSTBASED, GSSD_USER, GSSD_MACHINE_UID,
+				GSSD_STRING_UID, GSSD_KRB5_PRINCIPAL, GSSD_KRB5_REFERRAL, 
+				GSSD_NTLM_PRINCIPAL, GSSD_NTLM_BLOB} gssd_nametype;
+typedef char *gssd_string;
+typedef char *gssd_dstring;
+typedef uint8_t *gssd_byte_buffer;
+typedef uint32_t *gssd_gid_list;
+typedef uint64_t gssd_ctx;
+typedef uint64_t gssd_cred;
 
-#define GSSD_GSS_FLAGS_MASK	0x1FF
 /* The following need to correspond to GSS_C_*_FLAG in gssapi.h */
 #define GSSD_DELEG_FLAG		1
-#define GSSD_MUTUAL_FLAG		2
-#define GSSD_REPLAY_FLAG		4
+#define GSSD_MUTUAL_FLAG	2
+#define GSSD_REPLAY_FLAG	4
 #define GSSD_SEQUENCE_FLAG	8
 #define GSSD_CONF_FLAG		16
 #define GSSD_INTEG_FLAG		32
 #define GSSD_ANON_FLAG		64
 #define GSSD_PROT_FLAG		128
 #define GSSD_TRANS_FLAG		256
-#define GSSD_C_DELEG_POLICY_FLAG 32768
+#define GSSD_DELEG_POLICY_FLAG	32768
 
-#define GSSD_FLAGS_SHIFT		0
-#define GSSD_NO_DEFAULT		(1 << GSSD_FLAGS_SHIFT) // Only use principal from uid
-#define GSSD_NO_CANON		(2 << GSSD_FLAGS_SHIFT) // Don't canononicalize host names
-#define GSSD_HOME_ACCESS_OK	(4 << GSSD_FLAGS_SHIFT) // OK to access home directory
-#define GSSD_UI_OK		(8 << GSSD_FLAGS_SHIFT) // OK to bring up UI
-#define GSSD_RESTART		(16 << GSSD_FLAGS_SHIFT) // Destroy the supplied context and start over
-#define GSSD_NFS_1DES		(64 << GSSD_FLAGS_SHIFT) // Only get single DES session keys
-#define GSSD_WIN2K_HACK		(128 << GSSD_FLAGS_SHIFT) // Hack for Win2K
+#define GSSD_NO_DEFAULT		1  // Only use the supplied principal, do not fallback to the default.
+#define GSSD_NO_CANON		2  // Don't canononicalize host names
+#define GSSD_HOME_ACCESS_OK	4  // OK to access home directory
+#define GSSD_GUEST_ONLY		8  // NTLM Server is forcing guest access
+#define GSSD_RESTART		16 // Destroy the supplied context and start over
+#define GSSD_NFS_1DES		64 // Only get single DES session keys
+#define GSSD_WIN2K_HACK		128 // Hack for Win2K
 
 
 #endif /* _GSSD_MACH_TYPES_H_ */
diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c
index 6727bae26..c21012552 100644
--- a/osfmk/i386/AT386/model_dep.c
+++ b/osfmk/i386/AT386/model_dep.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,8 +93,12 @@
 #include <i386/mp.h>		/* mp_rendezvous_break_lock */
 #include <i386/cpuid.h>
 #include <i386/fpu.h>
-#include <i386/ipl.h>
+#include <i386/machine_cpu.h>
+#include <i386/pmap.h>
+#if CONFIG_MTRR
 #include <i386/mtrr.h>
+#endif
+#include <i386/ucode.h>
 #include <i386/pmCPU.h>
 #include <architecture/i386/pio.h> /* inb() */
 #include <pexpert/i386/boot.h>
@@ -116,6 +120,10 @@
 #include <mach-o/nlist.h>
 
 #include <libkern/kernel_mach_header.h>
+#include <libkern/OSKextLibPrivate.h>
+
+#define DPRINTF(x...)
+//#define DPRINTF(x...)	kprintf(x)
 
 static void machine_conf(void);
 
@@ -130,6 +138,8 @@ volatile int pbtcpu = -1;
 hw_lock_data_t pbtlock;		/* backtrace print lock */
 uint32_t pbtcnt = 0;
 
+volatile int panic_double_fault_cpu = -1;
+
 #if defined (__i386__)
 #define PRINT_ARGS_FROM_STACK_FRAME	1
 #elif defined (__x86_64__)
@@ -168,10 +178,10 @@ machine_startup(void)
 #endif
 
 	if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
+		panicDebugging = TRUE;
 		if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
 		if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE; 
 		if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE; 
-		if (debug_boot_arg & DB_NMI) panicDebugging=TRUE; 
 		if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
 	} else {
 		debug_boot_arg = 0;
@@ -369,8 +379,14 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
     uint32_t hdr_cksum;
     uint32_t cksum;
 
-    kprintf("Processing 64-bit EFI tables at %p\n", system_table);
+    DPRINTF("Processing 64-bit EFI tables at %p\n", system_table);
     do {
+	DPRINTF("Header:\n");
+	DPRINTF("  Signature:   0x%016llx\n", system_table->Hdr.Signature);
+	DPRINTF("  Revision:    0x%08x\n", system_table->Hdr.Revision);
+	DPRINTF("  HeaderSize:  0x%08x\n", system_table->Hdr.HeaderSize);
+	DPRINTF("  CRC32:       0x%08x\n", system_table->Hdr.CRC32);
+	DPRINTF("RuntimeServices: 0x%016llx\n", system_table->RuntimeServices);
         if (system_table->Hdr.Signature != EFI_SYSTEM_TABLE_SIGNATURE) {
 	    kprintf("Bad EFI system table signature\n");
             break;
@@ -380,7 +396,7 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
         system_table->Hdr.CRC32 = 0;
         cksum = crc32(0L, system_table, system_table->Hdr.HeaderSize);
 
-        //kprintf("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
+        DPRINTF("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
         system_table->Hdr.CRC32 = hdr_cksum;
         if (cksum != hdr_cksum) {
             kprintf("Bad EFI system table checksum\n");
@@ -389,7 +405,6 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
 
         gPEEFISystemTable     = system_table;
 
-
         if (!cpu_mode_is64bit()) {
             kprintf("Skipping 64-bit EFI runtime services for 32-bit legacy mode\n");			
             break;
@@ -399,10 +414,10 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
             kprintf("No runtime table present\n");
             break;
         }
-        kprintf("RuntimeServices table at 0x%qx\n", system_table->RuntimeServices);
+        DPRINTF("RuntimeServices table at 0x%qx\n", system_table->RuntimeServices);
         // 64-bit virtual address is OK for 64-bit EFI and 64/32-bit kernel.
         runtime = (EFI_RUNTIME_SERVICES_64 *) (uintptr_t)system_table->RuntimeServices;
-        kprintf("Checking runtime services table %p\n", runtime);
+        DPRINTF("Checking runtime services table %p\n", runtime);
         if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) {
             kprintf("Bad EFI runtime table signature\n");
             break;
@@ -413,7 +428,7 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table)
 	runtime->Hdr.CRC32 = 0;
 	cksum = crc32(0L, runtime, runtime->Hdr.HeaderSize);
 
-	//kprintf("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
+	DPRINTF("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
 	runtime->Hdr.CRC32 = hdr_cksum;
 	if (cksum != hdr_cksum) {
 	    kprintf("Bad EFI runtime table checksum\n");
@@ -432,8 +447,14 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table)
     uint32_t hdr_cksum;
     uint32_t cksum;
 
-    kprintf("Processing 32-bit EFI tables at %p\n", system_table);
+    DPRINTF("Processing 32-bit EFI tables at %p\n", system_table);
     do {
+	DPRINTF("Header:\n");
+	DPRINTF("  Signature:   0x%016llx\n", system_table->Hdr.Signature);
+	DPRINTF("  Revision:    0x%08x\n", system_table->Hdr.Revision);
+	DPRINTF("  HeaderSize:  0x%08x\n", system_table->Hdr.HeaderSize);
+	DPRINTF("  CRC32:       0x%08x\n", system_table->Hdr.CRC32);
+	DPRINTF("RuntimeServices: 0x%08x\n", system_table->RuntimeServices);
         if (system_table->Hdr.Signature != EFI_SYSTEM_TABLE_SIGNATURE) {
             kprintf("Bad EFI system table signature\n");
             break;
@@ -441,9 +462,10 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table)
         // Verify signature of the system table
         hdr_cksum = system_table->Hdr.CRC32;
         system_table->Hdr.CRC32 = 0;
+        DPRINTF("System table at %p HeaderSize 0x%x\n", system_table, system_table->Hdr.HeaderSize);
         cksum = crc32(0L, system_table, system_table->Hdr.HeaderSize);
 
-        //kprintf("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
+        DPRINTF("System table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
         system_table->Hdr.CRC32 = hdr_cksum;
         if (cksum != hdr_cksum) {
             kprintf("Bad EFI system table checksum\n");
@@ -452,15 +474,20 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table)
 
         gPEEFISystemTable     = system_table;
 
-
         if(system_table->RuntimeServices == 0) {
             kprintf("No runtime table present\n");
             break;
         }
-        kprintf("RuntimeServices table at 0x%x\n", system_table->RuntimeServices);
+        DPRINTF("RuntimeServices table at 0x%x\n", system_table->RuntimeServices);
         // 32-bit virtual address is OK for 32-bit EFI and 32-bit kernel.
-        // For a 64-bit kernel, booter will ensure pointer is zeroed out
-        runtime = (EFI_RUNTIME_SERVICES_32 *) (intptr_t)system_table->RuntimeServices;
+        // For a 64-bit kernel, booter provides a virtual address mod 4G
+        runtime = (EFI_RUNTIME_SERVICES_32 *)
+#ifdef __x86_64__
+			(system_table->RuntimeServices | VM_MIN_KERNEL_ADDRESS);
+#else
+			system_table->RuntimeServices;
+#endif
+	DPRINTF("Runtime table addressed at %p\n", runtime);
         if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) {
             kprintf("Bad EFI runtime table signature\n");
             break;
@@ -471,13 +498,26 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table)
 	runtime->Hdr.CRC32 = 0;
 	cksum = crc32(0L, runtime, runtime->Hdr.HeaderSize);
 
-	//kprintf("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
+	DPRINTF("Runtime table calculated CRC32 = 0x%x, header = 0x%x\n", cksum, hdr_cksum);
 	runtime->Hdr.CRC32 = hdr_cksum;
 	if (cksum != hdr_cksum) {
 	    kprintf("Bad EFI runtime table checksum\n");
 	    break;
 	}
 
+	DPRINTF("Runtime functions\n");
+	DPRINTF("  GetTime                  : 0x%x\n", runtime->GetTime);
+	DPRINTF("  SetTime                  : 0x%x\n", runtime->SetTime);
+	DPRINTF("  GetWakeupTime            : 0x%x\n", runtime->GetWakeupTime);
+	DPRINTF("  SetWakeupTime            : 0x%x\n", runtime->SetWakeupTime);
+	DPRINTF("  SetVirtualAddressMap     : 0x%x\n", runtime->SetVirtualAddressMap);
+	DPRINTF("  ConvertPointer           : 0x%x\n", runtime->ConvertPointer);
+	DPRINTF("  GetVariable              : 0x%x\n", runtime->GetVariable);
+	DPRINTF("  GetNextVariableName      : 0x%x\n", runtime->GetNextVariableName);
+	DPRINTF("  SetVariable              : 0x%x\n", runtime->SetVariable);
+	DPRINTF("  GetNextHighMonotonicCount: 0x%x\n", runtime->GetNextHighMonotonicCount);
+	DPRINTF("  ResetSystem              : 0x%x\n", runtime->ResetSystem);
+
 	gPEEFIRuntimeServices = runtime;
     }
     while (FALSE);
@@ -503,24 +543,41 @@ efi_init(void)
 	msize = args->MemoryMapDescriptorSize;
 	mcount = args->MemoryMapSize / msize;
 
+	DPRINTF("efi_init() kernel base: 0x%x size: 0x%x\n",
+		args->kaddr, args->ksize);
+	DPRINTF("           efiSystemTable physical: 0x%x virtual: %p\n",
+		args->efiSystemTable,
+		(void *) ml_static_ptovirt(args->efiSystemTable));
+	DPRINTF("           efiRuntimeServicesPageStart: 0x%x\n",
+		args->efiRuntimeServicesPageStart);
+	DPRINTF("           efiRuntimeServicesPageCount: 0x%x\n",
+		args->efiRuntimeServicesPageCount);
+	DPRINTF("           efiRuntimeServicesVirtualPageStart: 0x%016llx\n",
+		args->efiRuntimeServicesVirtualPageStart);
 	mptr = (EfiMemoryRange *)ml_static_ptovirt(args->MemoryMap);
 	for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) {
 	    if (((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) ) {
 		vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages);
 		vm_addr =   (vm_offset_t) mptr->VirtualStart;
-		phys_addr = (vm_map_offset_t) mptr->PhysicalStart;
-#if defined(__i386__)
-		pmap_map
-#elif defined(__x86_64__)
-		pmap_map_bd /* K64todo resolve pmap layer inconsistency */
+#ifdef __x86_64__
+		/* For K64 on EFI32, shadow-map into high KVA */
+		if (vm_addr < VM_MIN_KERNEL_ADDRESS)
+			vm_addr |= VM_MIN_KERNEL_ADDRESS;
 #endif
-			(vm_addr, phys_addr, phys_addr + round_page(vm_size),
+		phys_addr = (vm_map_offset_t) mptr->PhysicalStart;
+		DPRINTF(" Type: %x phys: %p EFIv: %p kv: %p size: %p\n",
+			mptr->Type,
+			(void *) (uintptr_t) phys_addr,
+			(void *) (uintptr_t) mptr->VirtualStart,
+			(void *) vm_addr,
+			(void *) vm_size);
+		pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size),
 		     (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE,
 		     (mptr->Type == EfiMemoryMappedIO)       ? VM_WIMG_IO   : VM_WIMG_USE_DEFAULT);
 	    }
 	}
 
-        if ((args->Version != kBootArgsVersion1) || (args->Version == kBootArgsVersion1 && args->Revision < kBootArgsRevision1_5 ))
+        if (args->Version != kBootArgsVersion2)
             panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision);
 
         kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode);
@@ -543,8 +600,6 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
 
     kprintf("Reinitializing EFI runtime services\n");
 
-    if (args->Version != kBootArgsVersion1)
-	return;
     do
     {
         vm_offset_t vm_size, vm_addr;
@@ -572,6 +627,11 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
 
 		vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages);
 		vm_addr =   (vm_offset_t) mptr->VirtualStart;
+#ifdef __x86_64__
+		/* K64 on EFI32 */
+		if (vm_addr < VM_MIN_KERNEL_ADDRESS)
+			vm_addr |= VM_MIN_KERNEL_ADDRESS;
+#endif
 		phys_addr = (vm_map_offset_t) mptr->PhysicalStart;
 
 		kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages);
@@ -590,22 +650,21 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o
 
 		vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages);
 		vm_addr =   (vm_offset_t) mptr->VirtualStart;
+#ifdef __x86_64__
+		if (vm_addr < VM_MIN_KERNEL_ADDRESS)
+			vm_addr |= VM_MIN_KERNEL_ADDRESS;
+#endif
 		phys_addr = (vm_map_offset_t) mptr->PhysicalStart;
 
 		kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages);
 
-#if defined(__i386__)
-		pmap_map
-#elif defined(__x86_64__)
-		pmap_map_bd /* K64todo resolve pmap layer inconsistency */
-#endif
-			(vm_addr, phys_addr, phys_addr + round_page(vm_size),
+		pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size),
 			 (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE,
 			 (mptr->Type == EfiMemoryMappedIO)       ? VM_WIMG_IO   : VM_WIMG_USE_DEFAULT);
 	    }
 	}
 
-        if ((args->Version != kBootArgsVersion1) || (args->Version == kBootArgsVersion1 && args->Revision < kBootArgsRevision1_5 ))
+        if (args->Version != kBootArgsVersion2)
             panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision);
 
         kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode);
@@ -655,6 +714,7 @@ machine_init(void)
 	 */
 	clock_config();
 
+#if CONFIG_MTRR
 	/*
 	 * Initialize MTRR from boot processor.
 	 */
@@ -664,6 +724,7 @@ machine_init(void)
 	 * Set up PAT for boot processor.
 	 */
 	pat_init();
+#endif
 
 	/*
 	 * Free lowmem pages and complete other setup
@@ -712,9 +773,25 @@ panic_io_port_read(void) {
 /* For use with the MP rendezvous mechanism
  */
 
+uint64_t panic_restart_timeout = ~(0ULL);
+
 static void
-machine_halt_cpu(__unused void *arg) {
+machine_halt_cpu(void) {
 	panic_io_port_read();
+
+	if (panic_restart_timeout != ~(0ULL)) {
+		uint64_t deadline = mach_absolute_time() + panic_restart_timeout;
+		while (mach_absolute_time() < deadline) {
+			cpu_pause();
+		}
+		kprintf("Invoking PE_halt_restart\n");
+		/* Attempt restart via ACPI RESET_REG; at the time of this
+		 * writing, this is routine is chained through AppleSMC->
+		 * AppleACPIPlatform
+		 */
+		if (PE_halt_restart)
+			(*PE_halt_restart)(kPERestartCPU);
+	}
 	pmCPUHalt(PM_HALT_DEBUG);
 }
 
@@ -724,6 +801,7 @@ Debugger(
 {
 	unsigned long pi_size = 0;
 	void *stackptr;
+	int cn = cpu_number();
 
 	hw_atomic_add(&debug_mode, 1);   
 	if (!panic_is_inited) {
@@ -731,7 +809,6 @@ Debugger(
 		asm("hlt");
 	}
 
-
 	printf("Debugger called: <%s>\n", message);
 	kprintf("Debugger called: <%s>\n", message);
 
@@ -758,7 +835,7 @@ Debugger(
 #endif
 
 		/* Print backtrace - callee is internally synchronized */
-		panic_i386_backtrace(stackptr, 64, NULL, FALSE, NULL);
+		panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80: 48), NULL, FALSE, NULL);
 
 		/* everything should be printed now so copy to NVRAM
 		 */
@@ -794,7 +871,7 @@ Debugger(
 			 * since we can subsequently halt the system.
 			 */
 
-			kprintf("Attempting to commit panic log to NVRAM\n");
+
 /* The following sequence is a workaround for:
  * <rdar://problem/5915669> SnowLeopard10A67: AppleEFINVRAM should not invoke
  * any routines that use floating point (MMX in this case) when saving panic
@@ -802,10 +879,12 @@ Debugger(
  */
 			cr0 = get_cr0();
 			clear_ts();
-			
+
+			kprintf("Attempting to commit panic log to NVRAM\n");
 			pi_size = PESavePanicInfo((unsigned char *)debug_buf,
 					(uint32_t)pi_size );
 			set_cr0(cr0);
+
 			/* Uncompress in-place, to permit examination of
 			 * the panic log by debuggers.
 			 */
@@ -823,20 +902,27 @@ Debugger(
 			draw_panic_dialog();
 
 		if (!panicDebugging) {
+			unsigned cnum;
 			/* Clear the MP rendezvous function lock, in the event
 			 * that a panic occurred while in that codepath.
 			 */
 			mp_rendezvous_break_lock();
 			if (PE_reboot_on_panic()) {
-				PEHaltRestart(kPEPanicRestartCPU);
+				if (PE_halt_restart)
+					(*PE_halt_restart)(kPERestartCPU);
 			}
 
-			/* Force all CPUs to disable interrupts and HLT.
-			 * We've panicked, and shouldn't depend on the
-			 * PEHaltRestart() mechanism, which relies on several
-			 * bits of infrastructure.
+			/* Non-maskably interrupt all other processors
+			 * If a restart timeout is specified, this processor
+			 * will attempt a restart.
 			 */
-			mp_rendezvous_no_intrs(machine_halt_cpu, NULL);
+			kprintf("Invoking machine_halt_cpu on CPU %d\n", cn);
+			for (cnum = 0; cnum < real_ncpus; cnum++) {
+				if (cnum != (unsigned) cn) {
+					cpu_NMI_interrupt(cnum);
+				}
+			}
+			machine_halt_cpu();
 			/* NOT REACHED */
 		}
         }
@@ -852,26 +938,12 @@ machine_boot_info(char *buf, __unused vm_size_t size)
 	return buf;
 }
 
-
-struct pasc {
-    unsigned a: 7;
-    unsigned b: 7;
-    unsigned c: 7;
-    unsigned d: 7;
-    unsigned e: 7;
-    unsigned f: 7;
-    unsigned g: 7;
-    unsigned h: 7;
-}  __attribute__((packed));
-
-typedef struct pasc pasc_t;
-
 /* Routines for address - symbol translation. Not called unless the "keepsyms"
  * boot-arg is supplied.
  */
 
 static int
-panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search)
+panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search, const char *module_name)
 {
     kernel_nlist_t	*sym = NULL;
     struct load_command		*cmd;
@@ -896,7 +968,7 @@ panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search)
                 orig_le = orig_sg;
             else if (strncmp("", orig_sg->segname,
 				    sizeof(orig_sg->segname)) == 0)
-                orig_ts = orig_sg; /* kexts have a single unnamed segment */
+                orig_ts = orig_sg; /* pre-Barolo i386 kexts have a single unnamed segment */
         }
         else if (cmd->cmd == LC_SYMTAB)
             orig_st = (struct symtab_command *) cmd;
@@ -907,12 +979,6 @@ panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search)
     if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
         return 0;
     
-    /* kexts don't have a LINKEDIT segment for now, so we'll never get this far for kexts */
-    
-    vm_offset_t slide = ((vm_address_t)mh) - orig_ts->vmaddr;
-    if (slide != 0)
-        search -= slide; /* adjusting search since the binary has slid */
-    
     if ((search < orig_ts->vmaddr) ||
         (search >= orig_ts->vmaddr + orig_ts->vmsize)) {
         /* search out of range for this mach header */
@@ -938,9 +1004,9 @@ panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search)
     
     if (bestsym != NULL) {
         if (diff != 0) {
-            kdb_printf("%s + 0x%lx", bestsym, (unsigned long)diff);
+            kdb_printf("%s : %s + 0x%lx", module_name, bestsym, (unsigned long)diff);
         } else {
-            kdb_printf("%s", bestsym);
+            kdb_printf("%s : %s", module_name, bestsym);
         }
         return 1;
     }
@@ -952,17 +1018,22 @@ extern kmod_info_t * kmod; /* the list of modules */
 static void
 panic_print_kmod_symbol_name(vm_address_t search)
 {
-    kmod_info_t *			current_kmod = kmod;
-    
-    while (current_kmod != NULL) {
-        if ((current_kmod->address <= search) &&
-            (current_kmod->address + current_kmod->size > search))
+    u_int i;
+
+    if (gLoadedKextSummaries == NULL)
+	    return;
+    for (i = 0; i < gLoadedKextSummaries->numSummaries; ++i) {
+        OSKextLoadedKextSummary *summary = gLoadedKextSummaries->summaries + i;
+
+        if ((search >= summary->address) &&
+            (search < (summary->address + summary->size)))
+        {
+            kernel_mach_header_t *header = (kernel_mach_header_t *)(uintptr_t) summary->address;
+            if (panic_print_macho_symbol_name(header, search, summary->name) == 0) {
+                kdb_printf("%s + %llu", summary->name, (unsigned long)search - summary->address);
+            }
             break;
-        current_kmod = current_kmod->next;
-    }
-    if (current_kmod != NULL) {
-        /* if kexts had symbol table loaded, we'd call search_symbol_name again; alas, they don't */
-      kdb_printf("%s + %lu \n", current_kmod->name, (unsigned long)search - current_kmod->address);
+        }
     }
 }
 
@@ -970,7 +1041,7 @@ static void
 panic_print_symbol_name(vm_address_t search)
 {
     /* try searching in the kernel */
-    if (panic_print_macho_symbol_name(&_mh_execute_header, search) == 0) {
+    if (panic_print_macho_symbol_name(&_mh_execute_header, search, "mach_kernel") == 0) {
         /* that failed, now try to search for the right kext */
         panic_print_kmod_symbol_name(search);
     }
@@ -994,14 +1065,15 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
 	volatile uint32_t *ppbtcnt = &pbtcnt;
 	uint64_t bt_tsc_timeout;
 	boolean_t keepsyms = FALSE;
+	int cn = cpu_number();
 
-	if(pbtcpu != cpu_number()) {
+	if(pbtcpu != cn) {
 		hw_atomic_add(&pbtcnt, 1);
 		/* Spin on print backtrace lock, which serializes output
 		 * Continue anyway if a timeout occurs.
 		 */
-		hw_lock_to(&pbtlock, LockTimeOutTSC);
-		pbtcpu = cpu_number();
+		hw_lock_to(&pbtlock, LockTimeOutTSC*2);
+		pbtcpu = cn;
 	}
 
 	PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
@@ -1041,9 +1113,9 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
 
 	kdb_printf("Backtrace (CPU %d), "
 #if PRINT_ARGS_FROM_STACK_FRAME
-	"Frame : Return Address (4 potential args on stack)\n", cpu_number());
+	"Frame : Return Address (4 potential args on stack)\n", cn);
 #else
-	"Frame : Return Address\n", cpu_number());
+	"Frame : Return Address\n", cn);
 #endif
 
 	for (frame_index = 0; frame_index < nframes; frame_index++) {
@@ -1058,7 +1130,7 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
 		}
 
 		if (!kvtophys(curframep) ||
-		    !kvtophys(curframep + sizeof(cframe_t))) {
+		    !kvtophys(curframep + sizeof(cframe_t) - 1)) {
 			kdb_printf("No mapping exists for frame pointer\n");
 			goto invalid;
 		}
@@ -1119,5 +1191,3 @@ out:
 	bt_tsc_timeout = rdtsc64() + PBT_TIMEOUT_CYCLES;
 	while(*ppbtcnt && (rdtsc64() < bt_tsc_timeout));
 }
-
-void *apic_table = NULL;
diff --git a/osfmk/i386/Diagnostics.h b/osfmk/i386/Diagnostics.h
index c8d385c7b..f5281c604 100644
--- a/osfmk/i386/Diagnostics.h
+++ b/osfmk/i386/Diagnostics.h
@@ -42,8 +42,8 @@
 #ifndef _DIAGNOSTICS_H_
 #define _DIAGNOSTICS_H_
 
-#ifdef __ppc__
-#error This file is not useful on PowerPC.
+#if !(defined(__i386__) || defined(__x86_64__))
+#error This file is not useful on non-Intel
 #endif
 
 int diagCall(x86_saved_state_t *regs);
diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile
index d07d32aac..270006bdb 100644
--- a/osfmk/i386/Makefile
+++ b/osfmk/i386/Makefile
@@ -26,14 +26,18 @@ EXPORT_ONLY_FILES = 	\
 		    mp.h \
 		    mp_desc.h \
 		    mp_events.h \
+		    pal_native.h \
+		    pal_routines.h \
+		    pal_hibernate.h \
 		    pmCPU.h \
 		    pmap.h \
 		    proc_reg.h \
-		    rtclock.h \
+		    rtclock_protos.h \
 		    seg.h \
 		    simple_lock.h \
 		    tsc.h \
 		    tss.h \
+		    ucode.h \
 		    vmx.h
 
 INSTALL_MD_DIR = i386
@@ -44,7 +48,7 @@ INSTALL_MD_LCL_LIST = cpu_capabilities.h
 
 INSTALL_KF_MD_LIST = asm.h cpuid.h eflags.h locks.h machine_routines.h proc_reg.h vmx.h
 
-INSTALL_KF_MD_LCL_LIST = $(filter-out cpu_data.h, $(EXPORT_ONLY_FILES))
+INSTALL_KF_MD_LCL_LIST = $(filter-out cpu_data.h pal_i386.h, $(EXPORT_ONLY_FILES))
 
 EXPORT_MD_LIST = ${EXPORT_ONLY_FILES}
 
diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c
index fb2cbe334..f13561244 100644
--- a/osfmk/i386/acpi.c
+++ b/osfmk/i386/acpi.c
@@ -32,10 +32,13 @@
 #include <i386/misc_protos.h>
 #include <i386/mp.h>
 #include <i386/cpu_data.h>
+#if CONFIG_MTRR
 #include <i386/mtrr.h>
+#endif
 #if CONFIG_VMX
 #include <i386/vmx/vmx_cpu.h>
 #endif
+#include <i386/ucode.h>
 #include <i386/acpi.h>
 #include <i386/fpu.h>
 #include <i386/lapic.h>
@@ -51,6 +54,7 @@
 
 #include <kern/cpu_data.h>
 #include <console/serial_protos.h>
+#include <machine/pal_routines.h>
 #include <vm/vm_page.h>
 
 #if HIBERNATION
@@ -103,7 +107,6 @@ acpi_hibernate(void *refcon)
 #if defined(__i386__)
 		cpu_IA32e_enable(current_cpu_datap());
 #endif
-
 		mode = hibernate_write_image();
 
 		if( mode == kIOHibernatePostWriteHalt )
@@ -145,7 +148,8 @@ acpi_hibernate(void *refcon)
 #endif /* CONFIG_SLEEP */
 #endif /* HIBERNATION */
 
-extern void		slave_pstart(void);
+extern void			slave_pstart(void);
+
 
 void
 acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
@@ -161,8 +165,8 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 	uint64_t	my_tsc;
 	uint64_t	my_abs;
 
-	kprintf("acpi_sleep_kernel hib=%d\n",
-			current_cpu_datap()->cpu_hibernate);
+	kprintf("acpi_sleep_kernel hib=%d, cpu=%d\n",
+			current_cpu_datap()->cpu_hibernate, cpu_number());
 
     	/* Get all CPUs to be in the "off" state */
     	my_cpu = cpu_number();
@@ -175,7 +179,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 			      rc, cpu);
 	}
 
-	/* shutdown local APIC before passing control to BIOS */
+	/* shutdown local APIC before passing control to firmware */
 	lapic_shutdown();
 
 #if HIBERNATION
@@ -238,7 +242,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 	 */
 
 	if (FALSE == disable_serial_output)
-		serial_init();
+		pal_serial_init();
 
 #if HIBERNATION
 	if (current_cpu_datap()->cpu_hibernate) {
@@ -263,8 +267,13 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 	mca_cpu_init();
 #endif
 
+#if CONFIG_MTRR
 	/* restore MTRR settings */
 	mtrr_update_cpu();
+#endif
+
+	/* update CPU microcode */
+	ucode_update_wake();
 
 #if CONFIG_VMX
 	/* 
@@ -273,8 +282,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 	vmx_resume();
 #endif
 
+#if CONFIG_MTRR
 	/* set up PAT following boot processor power up */
 	pat_init();
+#endif
 
 	/*
 	 * Go through all of the CPUs and mark them as requiring
diff --git a/osfmk/i386/asm.h b/osfmk/i386/asm.h
index 02a5620ae..a51f8ae92 100644
--- a/osfmk/i386/asm.h
+++ b/osfmk/i386/asm.h
@@ -305,4 +305,104 @@
 #endif /* __NO_UNDERSCORES__ */
 #endif /* ASSEMBLER */
 
+/*
+ * The following macros make calls into C code.
+ * They dynamically align the stack to 16 bytes.
+ */
+#if defined(__i386__)
+/*
+ * Arguments are moved (not pushed) onto the correctly aligned stack.
+ * NOTE: ESI is destroyed in the process, and hence cannot
+ * be directly used as a parameter. Users of this macro must
+ * independently preserve ESI (a non-volatile) if the routine is
+ * intended to be called from C, for instance.
+ */
+
+#define CCALL(fn)			\
+	movl	%esp, %esi		;\
+	andl	$0xFFFFFFF0, %esp	;\
+	call	EXT(fn)			;\
+	movl	%esi, %esp
+
+#define CCALL1(fn, arg1)		\
+	movl	%esp, %esi		;\
+	subl	$4, %esp		;\
+	andl	$0xFFFFFFF0, %esp	;\
+	movl	arg1, (%esp)		;\
+	call	EXT(fn)			;\
+	movl	%esi, %esp
+
+#define CCALL2(fn, arg1, arg2)		\
+	movl	%esp, %esi		;\
+	subl	$8, %esp		;\
+	andl	$0xFFFFFFF0, %esp	;\
+	movl	arg2, 4(%esp)		;\
+	movl	arg1, (%esp)		;\
+	call	EXT(fn)			;\
+	movl	%esi, %esp
+
+/* This variant exists to permit adjustment of the stack by "dtrace" */
+#define CCALL1WITHSP(fn, arg1)		\
+	movl	%esp, %esi		;\
+	subl	$12, %esp		;\
+	andl	$0xFFFFFFF0, %esp	;\
+	movl	%esi, 8(%esp)		;\
+	leal	8(%esp), %esi		;\
+	movl	%esi, 4(%esp)		;\
+	movl	arg1, (%esp)		;\
+	call	EXT(fn)			;\
+	movl	8(%esp), %esp
+
+/*
+ * CCALL5 is used for callee functions with 3 arguments but
+ * where arg2 (a3:a2) and arg3 (a5:a4) are 64-bit values.
+ */
+#define CCALL5(fn, a1, a2, a3, a4, a5)	\
+	movl	%esp, %esi		;\
+	subl	$20, %esp		;\
+	andl	$0xFFFFFFF0, %esp	;\
+	movl	a5, 16(%esp)		;\
+	movl	a4, 12(%esp)		;\
+	movl	a3,  8(%esp)		;\
+	movl	a2,  4(%esp)		;\
+	movl	a1,  (%esp)		;\
+	call	EXT(fn)			;\
+	movl	%esi, %esp
+
+#elif defined(__x86_64__)
+
+/* This variant exists to permit adjustment of the stack by "dtrace" */
+#define CCALLWITHSP(fn)				 \
+	mov	%rsp, %r12			;\
+	sub	$8, %rsp			;\
+	and	$0xFFFFFFFFFFFFFFF0, %rsp	;\
+	mov	%r12, (%rsp)			;\
+	leaq	(%rsp), %rsi			;\
+	call	EXT(fn)				;\
+	mov	(%rsp), %rsp
+	
+#define CCALL(fn)				 \
+	mov	%rsp, %r12			;\
+	and	$0xFFFFFFFFFFFFFFF0, %rsp	;\
+	call	EXT(fn)				;\
+	mov	%r12, %rsp
+
+#define CCALL1(fn, arg1) 			 \
+	mov	arg1, %rdi 			;\
+	CCALL(fn)
+
+#define CCALL2(fn, arg1, arg2)		 	 \
+	mov	arg1, %rdi 			;\
+	CCALL(fn)
+
+#define CCALL3(fn, arg1, arg2, arg3) 		 \
+	mov	arg1, %rdi 			;\
+	mov	arg2, %rsi 			;\
+	mov	arg3, %rdx 			;\
+	CCALL(fn)
+
+#else
+#error unsupported architecture
+#endif
+
 #endif /* _I386_ASM_H_ */
diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c
index 4b933d763..57b222a14 100644
--- a/osfmk/i386/bsd_i386.c
+++ b/osfmk/i386/bsd_i386.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -73,25 +73,6 @@
 extern void	mach_kauth_cred_uthread_update(void);
 #endif
 
-kern_return_t
-thread_userstack(
-    thread_t,
-    int,
-    thread_state_t,
-    unsigned int,
-    mach_vm_offset_t *,
-	int *
-);
-
-kern_return_t
-thread_entrypoint(
-    thread_t,
-    int,
-    thread_state_t,
-    unsigned int,
-    mach_vm_offset_t *
-); 
-
 void * find_user_regs(thread_t);
 
 unsigned int get_msr_exportmask(void);
@@ -100,8 +81,7 @@ unsigned int get_msr_nbits(void);
 
 unsigned int get_msr_rbits(void);
 
-extern void throttle_lowpri_io(boolean_t);
-
+extern void throttle_lowpri_io(int);
 
 /*
  * thread_userstack:
@@ -115,7 +95,7 @@ thread_userstack(
     int                 flavor,
     thread_state_t      tstate,
     __unused unsigned int        count,
-    user_addr_t    *user_stack,
+    mach_vm_offset_t    *user_stack,
 	int					*customstack
 )
 {
@@ -129,14 +109,15 @@ thread_userstack(
 
 			state25 = (x86_thread_state32_t *) tstate;
 
-			if (state25->esp)
+			if (state25->esp) {
 				*user_stack = state25->esp;
-			else 
+				if (customstack)
+					*customstack = 1;
+			} else {
 				*user_stack = VM_USRSTACK32;
-			if (customstack && state25->esp)
-				*customstack = 1;
-			else
-				*customstack = 0;
+				if (customstack)
+					*customstack = 0;
+			}
 			break;
 		}
 
@@ -146,14 +127,15 @@ thread_userstack(
 
 			state25 = (x86_thread_state64_t *) tstate;
 
-			if (state25->rsp)
+			if (state25->rsp) {
 				*user_stack = state25->rsp;
-			else 
+				if (customstack)
+					*customstack = 1;
+			} else {
 				*user_stack = VM_USRSTACK64;
-			if (customstack && state25->rsp)
-				*customstack = 1;
-			else
-				*customstack = 0;
+				if (customstack)
+					*customstack = 0;
+			}
 			break;
 		}
 
@@ -202,62 +184,6 @@ thread_entrypoint(
 	return (KERN_SUCCESS);
 }
 
-/*
- * Duplicate parent state in child
- * for U**X fork.
- */
-kern_return_t
-machine_thread_dup(
-    thread_t		parent,
-    thread_t		child
-)
-{
-	
-	pcb_t		parent_pcb;
-	pcb_t		child_pcb;
-
-	if ((child_pcb = child->machine.pcb) == NULL ||
-	    (parent_pcb = parent->machine.pcb) == NULL)
-		return (KERN_FAILURE);
-	/*
-	 * Copy over the x86_saved_state registers
-	 */
-	if (cpu_mode_is64bit()) {
-		if (thread_is_64bit(parent))
-			bcopy(USER_REGS64(parent), USER_REGS64(child), sizeof(x86_saved_state64_t));
-		else
-			bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state_compat32_t));
-	} else
-		bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state32_t));
-
-	/*
-	 * Check to see if parent is using floating point
-	 * and if so, copy the registers to the child
-	 */
-	fpu_dup_fxstate(parent, child);
-
-#ifdef	MACH_BSD
-	/*
-	 * Copy the parent's cthread id and USER_CTHREAD descriptor, if 32-bit.
-	 */
-	child_pcb->cthread_self = parent_pcb->cthread_self;
-	if (!thread_is_64bit(parent))
-		child_pcb->cthread_desc = parent_pcb->cthread_desc;
-
-	/*
-	 * FIXME - should a user specified LDT, TSS and V86 info
-	 * be duplicated as well?? - probably not.
-	 */
-	// duplicate any use LDT entry that was set I think this is appropriate.
-        if (parent_pcb->uldt_selector!= 0) {
-	        child_pcb->uldt_selector = parent_pcb->uldt_selector;
-		child_pcb->uldt_desc = parent_pcb->uldt_desc;
-	}
-#endif
-
-	return (KERN_SUCCESS);
-}
-
 /* 
  * FIXME - thread_set_child
  */
@@ -266,6 +192,7 @@ void thread_set_child(thread_t child, int pid);
 void
 thread_set_child(thread_t child, int pid)
 {
+	pal_register_cache_state(child, DIRTY);
 
 	if (thread_is_64bit(child)) {
 		x86_saved_state64_t	*iss64;
@@ -287,31 +214,6 @@ thread_set_child(thread_t child, int pid)
 }
 
 
-void thread_set_parent(thread_t parent, int pid);
-
-void
-thread_set_parent(thread_t parent, int pid)
-{
-
-	if (thread_is_64bit(parent)) {
-		x86_saved_state64_t	*iss64;
-
-		iss64 = USER_REGS64(parent);
-
-		iss64->rax = pid;
-		iss64->rdx = 0;
-		iss64->isf.rflags &= ~EFL_CF;
-	} else {
-		x86_saved_state32_t	*iss32;
-
-		iss32 = USER_REGS32(parent);
-
-		iss32->eax = pid;
-		iss32->edx = 0;
-		iss32->efl &= ~EFL_CF;
-	}
-}
-
 
 /*
  * System Call handling code
@@ -449,142 +351,6 @@ machdep_syscall64(x86_saved_state_t *state)
 	/* NOTREACHED */
 }
 
-/*
- * thread_fast_set_cthread_self: Sets the machine kernel thread ID of the
- * current thread to the given thread ID; fast version for 32-bit processes
- *
- * Parameters:    self                    Thread ID to set
- *                
- * Returns:        0                      Success
- *                !0                      Not success
- */
-kern_return_t
-thread_fast_set_cthread_self(uint32_t self)
-{
-	thread_t thread = current_thread();
-	pcb_t pcb = thread->machine.pcb;
-	struct real_descriptor desc = {
-		.limit_low = 1,
-		.limit_high = 0,
-		.base_low = self & 0xffff,
-		.base_med = (self >> 16) & 0xff,
-		.base_high = (self >> 24) & 0xff,
-		.access = ACC_P|ACC_PL_U|ACC_DATA_W,
-		.granularity = SZ_32|SZ_G,
-	};
-
-	current_thread()->machine.pcb->cthread_self = (uint64_t) self;	/* preserve old func too */
-
-	/* assign descriptor */
-	mp_disable_preemption();
-	pcb->cthread_desc = desc;
-	*ldt_desc_p(USER_CTHREAD) = desc;
-	saved_state32(pcb->iss)->gs = USER_CTHREAD;
-	mp_enable_preemption();
-
-	return (USER_CTHREAD);
-}
-
-/*
- * thread_fast_set_cthread_self64: Sets the machine kernel thread ID of the
- * current thread to the given thread ID; fast version for 64-bit processes 
- *
- * Parameters:    self                    Thread ID
- *                
- * Returns:        0                      Success
- *                !0                      Not success
- */
-kern_return_t
-thread_fast_set_cthread_self64(uint64_t self)
-{
-	pcb_t pcb = current_thread()->machine.pcb;
-	cpu_data_t              *cdp;
-
-	/* check for canonical address, set 0 otherwise  */
-	if (!IS_USERADDR64_CANONICAL(self))
-		self = 0ULL;
-
-	pcb->cthread_self = self;
-	mp_disable_preemption();
-	cdp = current_cpu_datap();
-#if defined(__x86_64__)
-	if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
-	    (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE)))
-		wrmsr64(MSR_IA32_KERNEL_GS_BASE, self);
-#endif
-	cdp->cpu_uber.cu_user_gs_base = self;
-	mp_enable_preemption();
-	return (USER_CTHREAD);
-}
-
-/*
- * thread_set_user_ldt routine is the interface for the user level
- * settable ldt entry feature.  allowing a user to create arbitrary
- * ldt entries seems to be too large of a security hole, so instead
- * this mechanism is in place to allow user level processes to have
- * an ldt entry that can be used in conjunction with the FS register.
- *
- * Swapping occurs inside the pcb.c file along with initialization
- * when a thread is created. The basic functioning theory is that the
- * pcb->uldt_selector variable will contain either 0 meaning the
- * process has not set up any entry, or the selector to be used in
- * the FS register. pcb->uldt_desc contains the actual descriptor the
- * user has set up stored in machine usable ldt format.
- *
- * Currently one entry is shared by all threads (USER_SETTABLE), but
- * this could be changed in the future by changing how this routine
- * allocates the selector. There seems to be no real reason at this
- * time to have this added feature, but in the future it might be
- * needed.
- *
- * address is the linear address of the start of the data area size
- * is the size in bytes of the area flags should always be set to 0
- * for now. in the future it could be used to set R/W permisions or
- * other functions. Currently the segment is created as a data segment
- * up to 1 megabyte in size with full read/write permisions only.
- *
- * this call returns the segment selector or -1 if any error occurs
- */
-kern_return_t
-thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags)
-{
-	pcb_t pcb;
-	struct fake_descriptor temp;
-	int mycpu;
-
-	if (flags != 0)
-		return -1;		// flags not supported
-	if (size > 0xFFFFF)
-		return -1;		// size too big, 1 meg is the limit
-
-	mp_disable_preemption();
-	mycpu = cpu_number();
-
-	// create a "fake" descriptor so we can use fix_desc()
-	// to build a real one...
-	//   32 bit default operation size
-	//   standard read/write perms for a data segment
-	pcb = (pcb_t)current_thread()->machine.pcb;
-	temp.offset = address;
-	temp.lim_or_seg = size;
-	temp.size_or_wdct = SZ_32;
-	temp.access = ACC_P|ACC_PL_U|ACC_DATA_W;
-
-	// turn this into a real descriptor
-	fix_desc(&temp,1);
-
-	// set up our data in the pcb
-	pcb->uldt_desc = *(struct real_descriptor*)&temp;
-	pcb->uldt_selector = USER_SETTABLE;		// set the selector value
-
-	// now set it up in the current table...
-	*ldt_desc_p(USER_SETTABLE) = *(struct real_descriptor*)&temp;
-
-	mp_enable_preemption();
-
-	return USER_SETTABLE;
-}
-
 #endif	/* MACH_BSD */
 
 
@@ -791,6 +557,7 @@ thread_setuserstack(
 	thread_t	thread,
 	mach_vm_address_t	user_stack)
 {
+	pal_register_cache_state(thread, DIRTY);
 	if (thread_is_64bit(thread)) {
 		x86_saved_state64_t	*iss64;
 
@@ -817,6 +584,7 @@ thread_adjuserstack(
 	thread_t	thread,
 	int		adjust)
 {
+	pal_register_cache_state(thread, DIRTY);
 	if (thread_is_64bit(thread)) {
 		x86_saved_state64_t	*iss64;
 
@@ -845,6 +613,7 @@ thread_adjuserstack(
 void
 thread_setentrypoint(thread_t thread, mach_vm_address_t entry)
 {
+	pal_register_cache_state(thread, DIRTY);
 	if (thread_is_64bit(thread)) {
 		x86_saved_state64_t	*iss64;
 
@@ -864,6 +633,7 @@ thread_setentrypoint(thread_t thread, mach_vm_address_t entry)
 kern_return_t
 thread_setsinglestep(thread_t thread, int on)
 {
+	pal_register_cache_state(thread, DIRTY);
 	if (thread_is_64bit(thread)) {
 		x86_saved_state64_t	*iss64;
 
@@ -897,18 +667,15 @@ thread_setsinglestep(thread_t thread, int on)
 void *
 find_user_regs(thread_t thread)
 {
+	pal_register_cache_state(thread, DIRTY);
 	return USER_STATE(thread);
 }
 
 void *
 get_user_regs(thread_t th)
 {
-	if (th->machine.pcb)
-		return(USER_STATE(th));
-	else {
-		printf("[get_user_regs: thread does not have pcb]");
-		return NULL;
-	}
+	pal_register_cache_state(th, DIRTY);
+	return(USER_STATE(th));
 }
 
 #if CONFIG_DTRACE
diff --git a/osfmk/i386/bsd_i386_native.c b/osfmk/i386/bsd_i386_native.c
new file mode 100644
index 000000000..13a7cb0aa
--- /dev/null
+++ b/osfmk/i386/bsd_i386_native.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach_rt.h>
+#include <mach_debug.h>
+#include <mach_ldebug.h>
+
+#include <mach/kern_return.h>
+#include <mach/mach_traps.h>
+#include <mach/thread_status.h>
+#include <mach/vm_param.h>
+
+#include <kern/counters.h>
+#include <kern/cpu_data.h>
+#include <kern/mach_param.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
+#include <kern/misc_protos.h>
+#include <kern/assert.h>
+#include <kern/debug.h>
+#include <kern/spl.h>
+#include <kern/syscall_sw.h>
+#include <ipc/ipc_port.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+
+#include <i386/cpu_number.h>
+#include <i386/eflags.h>
+#include <i386/proc_reg.h>
+#include <i386/tss.h>
+#include <i386/user_ldt.h>
+#include <i386/fpu.h>
+#include <i386/machdep_call.h>
+#include <i386/vmparam.h>
+#include <i386/mp_desc.h>
+#include <i386/misc_protos.h>
+#include <i386/thread.h>
+#include <i386/trap.h>
+#include <i386/seg.h>
+#include <mach/i386/syscall_sw.h>
+#include <sys/syscall.h>
+#include <sys/kdebug.h>
+#include <sys/errno.h>
+#include <../bsd/sys/sysent.h>
+
+
+/*
+ * Duplicate parent state in child
+ * for U**X fork.
+ */
+kern_return_t
+machine_thread_dup(
+    thread_t		parent,
+    thread_t		child
+)
+{
+	
+	pcb_t		parent_pcb = THREAD_TO_PCB(parent);
+	pcb_t		child_pcb = THREAD_TO_PCB(child);
+
+	/*
+	 * Copy over the x86_saved_state registers
+	 */
+	if (cpu_mode_is64bit()) {
+		if (thread_is_64bit(parent))
+			bcopy(USER_REGS64(parent), USER_REGS64(child), sizeof(x86_saved_state64_t));
+		else
+			bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state_compat32_t));
+	} else
+		bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state32_t));
+
+	/*
+	 * Check to see if parent is using floating point
+	 * and if so, copy the registers to the child
+	 */
+	fpu_dup_fxstate(parent, child);
+
+#ifdef	MACH_BSD
+	/*
+	 * Copy the parent's cthread id and USER_CTHREAD descriptor, if 32-bit.
+	 */
+	child_pcb->cthread_self = parent_pcb->cthread_self;
+	if (!thread_is_64bit(parent))
+		child_pcb->cthread_desc = parent_pcb->cthread_desc;
+
+	/*
+	 * FIXME - should a user specified LDT, TSS and V86 info
+	 * be duplicated as well?? - probably not.
+	 */
+	// duplicate any use LDT entry that was set I think this is appropriate.
+        if (parent_pcb->uldt_selector!= 0) {
+	        child_pcb->uldt_selector = parent_pcb->uldt_selector;
+		child_pcb->uldt_desc = parent_pcb->uldt_desc;
+	}
+#endif
+
+	return (KERN_SUCCESS);
+}
+
+void thread_set_parent(thread_t parent, int pid);
+
+void
+thread_set_parent(thread_t parent, int pid)
+{
+	pal_register_cache_state(parent, DIRTY);
+
+	if (thread_is_64bit(parent)) {
+		x86_saved_state64_t	*iss64;
+
+		iss64 = USER_REGS64(parent);
+
+		iss64->rax = pid;
+		iss64->rdx = 0;
+		iss64->isf.rflags &= ~EFL_CF;
+	} else {
+		x86_saved_state32_t	*iss32;
+
+		iss32 = USER_REGS32(parent);
+
+		iss32->eax = pid;
+		iss32->edx = 0;
+		iss32->efl &= ~EFL_CF;
+	}
+}
+
+/*
+ * thread_fast_set_cthread_self: Sets the machine kernel thread ID of the
+ * current thread to the given thread ID; fast version for 32-bit processes
+ *
+ * Parameters:    self                    Thread ID to set
+ *                
+ * Returns:        0                      Success
+ *                !0                      Not success
+ */
+kern_return_t
+thread_fast_set_cthread_self(uint32_t self)
+{
+	thread_t thread = current_thread();
+	pcb_t pcb = THREAD_TO_PCB(thread);
+	struct real_descriptor desc = {
+		.limit_low = 1,
+		.limit_high = 0,
+		.base_low = self & 0xffff,
+		.base_med = (self >> 16) & 0xff,
+		.base_high = (self >> 24) & 0xff,
+		.access = ACC_P|ACC_PL_U|ACC_DATA_W,
+		.granularity = SZ_32|SZ_G,
+	};
+
+	current_thread()->machine.cthread_self = (uint64_t) self;	/* preserve old func too */
+
+	/* assign descriptor */
+	mp_disable_preemption();
+	pcb->cthread_desc = desc;
+	*ldt_desc_p(USER_CTHREAD) = desc;
+	saved_state32(pcb->iss)->gs = USER_CTHREAD;
+	mp_enable_preemption();
+
+	return (USER_CTHREAD);
+}
+
+/*
+ * thread_fast_set_cthread_self64: Sets the machine kernel thread ID of the
+ * current thread to the given thread ID; fast version for 64-bit processes 
+ *
+ * Parameters:    self                    Thread ID
+ *                
+ * Returns:        0                      Success
+ *                !0                      Not success
+ */
+kern_return_t
+thread_fast_set_cthread_self64(uint64_t self)
+{
+	pcb_t pcb = THREAD_TO_PCB(current_thread());
+	cpu_data_t              *cdp;
+
+	/* check for canonical address, set 0 otherwise  */
+	if (!IS_USERADDR64_CANONICAL(self))
+		self = 0ULL;
+
+	pcb->cthread_self = self;
+	mp_disable_preemption();
+	cdp = current_cpu_datap();
+#if defined(__x86_64__)
+	if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) ||
+	    (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE)))
+		wrmsr64(MSR_IA32_KERNEL_GS_BASE, self);
+#endif
+	cdp->cpu_uber.cu_user_gs_base = self;
+	mp_enable_preemption();
+	return (USER_CTHREAD); /* N.B.: not a kern_return_t! */
+}
+
+/*
+ * thread_set_user_ldt routine is the interface for the user level
+ * settable ldt entry feature.  allowing a user to create arbitrary
+ * ldt entries seems to be too large of a security hole, so instead
+ * this mechanism is in place to allow user level processes to have
+ * an ldt entry that can be used in conjunction with the FS register.
+ *
+ * Swapping occurs inside the pcb.c file along with initialization
+ * when a thread is created. The basic functioning theory is that the
+ * pcb->uldt_selector variable will contain either 0 meaning the
+ * process has not set up any entry, or the selector to be used in
+ * the FS register. pcb->uldt_desc contains the actual descriptor the
+ * user has set up stored in machine usable ldt format.
+ *
+ * Currently one entry is shared by all threads (USER_SETTABLE), but
+ * this could be changed in the future by changing how this routine
+ * allocates the selector. There seems to be no real reason at this
+ * time to have this added feature, but in the future it might be
+ * needed.
+ *
+ * address is the linear address of the start of the data area size
+ * is the size in bytes of the area flags should always be set to 0
+ * for now. in the future it could be used to set R/W permisions or
+ * other functions. Currently the segment is created as a data segment
+ * up to 1 megabyte in size with full read/write permisions only.
+ *
+ * this call returns the segment selector or -1 if any error occurs
+ */
+kern_return_t
+thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags)
+{
+	pcb_t pcb;
+	struct fake_descriptor temp;
+
+	if (flags != 0)
+		return -1;		// flags not supported
+	if (size > 0xFFFFF)
+		return -1;		// size too big, 1 meg is the limit
+
+	mp_disable_preemption();
+
+	// create a "fake" descriptor so we can use fix_desc()
+	// to build a real one...
+	//   32 bit default operation size
+	//   standard read/write perms for a data segment
+	pcb = THREAD_TO_PCB(current_thread());
+	temp.offset = address;
+	temp.lim_or_seg = size;
+	temp.size_or_wdct = SZ_32;
+	temp.access = ACC_P|ACC_PL_U|ACC_DATA_W;
+
+	// turn this into a real descriptor
+	fix_desc(&temp,1);
+
+	// set up our data in the pcb
+	pcb->uldt_desc = *(struct real_descriptor*)&temp;
+	pcb->uldt_selector = USER_SETTABLE;		// set the selector value
+
+	// now set it up in the current table...
+	*ldt_desc_p(USER_SETTABLE) = *(struct real_descriptor*)&temp;
+
+	mp_enable_preemption();
+
+	return USER_SETTABLE;
+}
diff --git a/osfmk/i386/bzero.s b/osfmk/i386/bzero.s
index 034a6469c..cb6a0536b 100644
--- a/osfmk/i386/bzero.s
+++ b/osfmk/i386/bzero.s
@@ -90,7 +90,7 @@ ENTRY(memset)
  * void bzero(char * addr, size_t length)
  */
 Entry(blkclr)
-ENTRY(bzero)
+ENTRY2(bzero,__bzero)
 	pushl	%edi
 	movl	4+ 4(%esp),%edi		/* addr */
 	movl	4+ 8(%esp),%edx		/* length */
diff --git a/osfmk/i386/commpage/atomic.s b/osfmk/i386/commpage/atomic.s
deleted file mode 100644
index 769698b0f..000000000
--- a/osfmk/i386/commpage/atomic.s
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/* OSAtomic.h library native implementations. */
-
-// This is a regparm(3) subroutine used by:
-
-// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
-// int32_t OSAtomicAnd32( int32_t mask, int32_t *value);
-// int32_t OSAtomicOr32( int32_t mask, int32_t *value);
-// int32_t OSAtomicXor32( int32_t mask, int32_t *value);
-
-// It assumes old -> %eax, new -> %edx, value -> %ecx
-// on success: returns with ZF set
-// on failure: returns with *value in %eax, ZF clear
-
-// The first word of the routine contains the address of the first instruction,
-// so callers can pass parameters in registers by using the absolute:
-
-// 	call *_COMPARE_AND_SWAP32
-
-//	TODO: move the .long onto a separate page to reduce icache pollution (?)
-
-COMMPAGE_FUNCTION_START(compare_and_swap32_mp, 32, 4)
-.long	_COMM_PAGE_COMPARE_AND_SWAP32+4
-	lock
-	cmpxchgl  %edx, (%ecx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
-
-COMMPAGE_FUNCTION_START(compare_and_swap32_up, 32, 4)
-.long	_COMM_PAGE_COMPARE_AND_SWAP32+4
-	cmpxchgl %edx, (%ecx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
-
-// It assumes old -> %eax/%edx, new -> %ebx/%ecx, value -> %esi
-// on success: returns with ZF set
-// on failure: returns with *value in %eax/%edx, ZF clear
-
-COMMPAGE_FUNCTION_START(compare_and_swap64_mp, 32, 4)
-.long	_COMM_PAGE_COMPARE_AND_SWAP64+4
-	lock
-	cmpxchg8b (%esi)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
-
-COMMPAGE_FUNCTION_START(compare_and_swap64_up, 32, 4)
-.long	_COMM_PAGE_COMPARE_AND_SWAP64+4
-	cmpxchg8b (%esi)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicTestAndSet( uint32_t n, void *value );
-// It assumes n -> %eax, value -> %edx
-
-// Returns: old value of bit in CF
-
-COMMPAGE_FUNCTION_START(bit_test_and_set_mp, 32, 4)
-.long	_COMM_PAGE_BTS+4
-	lock
-	btsl %eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP)
-
-COMMPAGE_FUNCTION_START(bit_test_and_set_up, 32, 4)
-.long	_COMM_PAGE_BTS+4
-	btsl %eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicTestAndClear( uint32_t n, void *value );
-// It assumes n -> %eax, value -> %edx
-
-// Returns: old value of bit in CF
-
-COMMPAGE_FUNCTION_START(bit_test_and_clear_mp, 32, 4)
-.long	_COMM_PAGE_BTC+4
-	lock
-	btrl %eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP)
-
-COMMPAGE_FUNCTION_START(bit_test_and_clear_up, 32, 4)
-.long	_COMM_PAGE_BTC+4
-	btrl %eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0)
-
-// This is a subroutine used by:
-// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
-// It assumes amt -> %eax, value -> %edx
-
-// Returns: old value in %eax
-// NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %eax 
-
-COMMPAGE_FUNCTION_START(atomic_add32_mp, 32, 4)
-.long	_COMM_PAGE_ATOMIC_ADD32+4
-	lock
-	xaddl	%eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
-
-COMMPAGE_FUNCTION_START(atomic_add32_up, 32, 4)
-.long	_COMM_PAGE_ATOMIC_ADD32+4
-	xaddl	%eax, (%edx)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
-    
-    
-// OSMemoryBarrier()
-// These are used both in 32 and 64-bit mode.  We use a fence even on UP
-// machines, so this function can be used with nontemporal stores.
-
-COMMPAGE_FUNCTION_START(memory_barrier, 32, 4)
-	lock
-	addl	$0,(%esp)
-	ret
-COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2);
-
-COMMPAGE_FUNCTION_START(memory_barrier_sse2, 32, 4)
-	mfence
-	ret
-COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0);
-    
-
-/*
- *	typedef	volatile struct {
- *		void	*opaque1;  <-- ptr to 1st queue element or null
- *		long	 opaque2;  <-- generation count
- *	} OSQueueHead;
- *
- * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
- */
-
-COMMPAGE_FUNCTION_START(AtomicEnqueue, 32, 4)
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	movl	16(%esp),%edi	// %edi == ptr to list head
-	movl	20(%esp),%ebx	// %ebx == new
-	movl	24(%esp),%esi	// %esi == offset
-	movl	(%edi),%eax	// %eax == ptr to 1st element in Q
-	movl	4(%edi),%edx	// %edx == current generation count
-1:
-	movl	%eax,(%ebx,%esi)// link to old list head from new element
-	movl	%edx,%ecx
-	incl	%ecx		// increment generation count
-	lock			// always lock for now...
-	cmpxchg8b (%edi)	// ...push on new element
-	jnz	1b
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0)
-	
-	
-/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
-
-COMMPAGE_FUNCTION_START(AtomicDequeue, 32, 4)
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	movl	16(%esp),%edi	// %edi == ptr to list head
-	movl	20(%esp),%esi	// %esi == offset
-	movl	(%edi),%eax	// %eax == ptr to 1st element in Q
-	movl	4(%edi),%edx	// %edx == current generation count
-1:
-	testl	%eax,%eax	// list empty?
-	jz	2f		// yes
-	movl	(%eax,%esi),%ebx // point to 2nd in Q
-	movl	%edx,%ecx
-	incl	%ecx		// increment generation count
-	lock			// always lock for now...
-	cmpxchg8b (%edi)	// ...pop off 1st element
-	jnz	1b
-2:
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret			// ptr to 1st element in Q still in %eax
-COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0)
-
-
-
-/************************* x86_64 versions follow **************************/
-
-
-// This is a subroutine used by:
-
-// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
-// int32_t OSAtomicAnd32( int32_t mask, int32_t *value);
-// int32_t OSAtomicOr32( int32_t mask, int32_t *value);
-// int32_t OSAtomicXor32( int32_t mask, int32_t *value);
-
-// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
-//             new -> %rsi
-//             value -> %rdx
-// on success: returns with ZF set
-// on failure: returns with *value in %eax, ZF clear
-
-COMMPAGE_FUNCTION_START(compare_and_swap32_mp_64, 64, 4)
-	movl	%edi,%eax			// put old value where "cmpxchg" wants it
-	lock
-	cmpxchgl  %esi, (%rdx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP)
-
-COMMPAGE_FUNCTION_START(compare_and_swap32_up_64, 64, 4)
-	movl	%edi,%eax			// put old value where "cmpxchg" wants it
-	cmpxchgl  %esi, (%rdx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
-
-// It assumes: old -> %rdi  (ie, it follows the ABI parameter conventions)
-//             new -> %rsi
-//             value -> %rdx
-// on success: returns with ZF set
-// on failure: returns with *value in %rax, ZF clear
-
-COMMPAGE_FUNCTION_START(compare_and_swap64_mp_64, 64, 4)
-	movq	%rdi,%rax			// put old value where "cmpxchg" wants it
-	lock
-	cmpxchgq  %rsi, (%rdx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP)
-
-COMMPAGE_FUNCTION_START(compare_and_swap64_up_64, 64, 4)
-	movq	%rdi,%rax			// put old value where "cmpxchg" wants it
-	cmpxchgq  %rsi, (%rdx)
-	ret
-COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicTestAndSet( uint32_t n, void *value );
-// It is called with standard register conventions:
-//			n = %rdi
-//			value = %rsi
-// Returns: old value of bit in CF
-
-COMMPAGE_FUNCTION_START(bit_test_and_set_mp_64, 64, 4)
-	lock
-	btsl %edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP)
-
-COMMPAGE_FUNCTION_START(bit_test_and_set_up_64, 64, 4)
-	btsl %edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0)
-
-// This is a subroutine used by:
-// bool OSAtomicTestAndClear( uint32_t n, void *value );
-// It is called with standard register conventions:
-//			n = %rdi
-//			value = %rsi
-// Returns: old value of bit in CF
-
-COMMPAGE_FUNCTION_START(bit_test_and_clear_mp_64, 64, 4)
-	lock
-	btrl %edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP)
-
-COMMPAGE_FUNCTION_START(bit_test_and_clear_up_64, 64, 4)
-	btrl %edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0)
-
-// This is a subroutine used by:
-// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
-// It is called with standard register conventions:
-//			amt = %rdi
-//			value = %rsi
-// Returns: old value in %edi
-// NB: OSAtomicAdd32 returns the new value,  so clients will add amt to %edi 
-
-COMMPAGE_FUNCTION_START(atomic_add32_mp_64, 64, 4)
-	lock
-	xaddl	%edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP)
-
-COMMPAGE_FUNCTION_START(atomic_add32_up_64, 64, 4)
-	xaddl	%edi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0)
-
-// This is a subroutine used by:
-// int64_t OSAtomicAdd64( int64_t amt, int64_t *value );
-// It is called with standard register conventions:
-//			amt = %rdi
-//			value = %rsi
-// Returns: old value in %rdi
-// NB: OSAtomicAdd64 returns the new value,  so clients will add amt to %rdi 
-
-COMMPAGE_FUNCTION_START(atomic_add64_mp_64, 64, 4)
-	lock
-	xaddq	%rdi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP)
-
-COMMPAGE_FUNCTION_START(atomic_add64_up_64, 64, 4)
-	xaddq	%rdi, (%rsi)
-	ret
-COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0)
-
-
-/*
- *	typedef	volatile struct {
- *		void	*opaque1;  <-- ptr to 1st queue element or null
- *		long	 opaque2;  <-- generation count
- *	} OSQueueHead;
- *
- * void  OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset);
- */
-
-// %rdi == list head, %rsi == new, %rdx == offset
-
-COMMPAGE_FUNCTION_START(AtomicEnqueue_64, 64, 4)
-	pushq	%rbx
-	movq	%rsi,%rbx	// %rbx == new
-	movq	%rdx,%rsi	// %rsi == offset
-	movq	(%rdi),%rax	// %rax == ptr to 1st element in Q
-	movq	8(%rdi),%rdx	// %rdx == current generation count
-1:
-	movq	%rax,(%rbx,%rsi)// link to old list head from new element
-	movq	%rdx,%rcx
-	incq	%rcx		// increment generation count
-	lock			// always lock for now...
-	cmpxchg16b (%rdi)	// ...push on new element
-	jnz	1b
-	popq	%rbx
-	ret
-COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0)
-	
-	
-/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
-
-// %rdi == list head, %rsi == offset
-
-COMMPAGE_FUNCTION_START(AtomicDequeue_64, 64, 4)
-	pushq	%rbx
-	movq	(%rdi),%rax	// %rax == ptr to 1st element in Q
-	movq	8(%rdi),%rdx	// %rdx == current generation count
-1:
-	testq	%rax,%rax	// list empty?
-	jz	2f		// yes
-	movq	(%rax,%rsi),%rbx // point to 2nd in Q
-	movq	%rdx,%rcx
-	incq	%rcx		// increment generation count
-	lock			// always lock for now...
-	cmpxchg16b (%rdi)	// ...pop off 1st element
-	jnz	1b
-2:
-	popq	%rbx
-	ret			// ptr to 1st element in Q still in %rax
-COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0)
diff --git a/osfmk/i386/commpage/bcopy_scalar.s b/osfmk/i386/commpage/bcopy_scalar.s
deleted file mode 100644
index f87242ac6..000000000
--- a/osfmk/i386/commpage/bcopy_scalar.s
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from locore.s.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:   
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by the University of
- *      California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-        
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-#include <machine/asm.h>
-
-        /*
-         * (ov)bcopy (src,dst,cnt)
-         *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
-         */
- 
-COMMPAGE_FUNCTION_START(bcopy_scalar, 32, 5)
-	pushl	%ebp		/* set up a frame for backtraces */
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%esi
-        movl    12(%ebp),%edi
-	jmp	1f
-/*
- * These need to be 32 bytes from Lbcopy_scalar
- */
-	.align 5, 0x90
-Lmemcpy_scalar:
-Lmemmove_scalar:
-	pushl	%ebp		/* set up a frame for backtraces */
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%edi
-        movl    12(%ebp),%esi
-        movl    %edi,%eax
-1:
-        movl    16(%ebp),%ecx
-        movl    %edi,%edx
-        subl    %esi,%edx
-        cmpl    %ecx,%edx       /* overlapping? */
-        jb      2f
-        cld                     /* nope, copy forwards. */
-        movl    %ecx,%edx
-        shrl    $2,%ecx         /* copy by words */
-        rep
-        movsl
-        movl    %edx,%ecx
-        andl    $3,%ecx         /* any bytes left? */
-        rep
-        movsb
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-2:
-        addl    %ecx,%edi       /* copy backwards. */
-        addl    %ecx,%esi
-        std
-        movl    %ecx,%edx
-        andl    $3,%ecx         /* any fractional bytes? */
-        decl    %edi
-        decl    %esi
-        rep
-        movsb
-        movl    %edx,%ecx       /* copy remainder by words */
-        shrl    $2,%ecx
-        subl    $3,%esi
-        subl    $3,%edi
-        rep
-        movsl
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        cld
-        ret
-
-COMMPAGE_DESCRIPTOR(bcopy_scalar,_COMM_PAGE_BCOPY,0,kHasSSE2+kHasSupplementalSSE3)
diff --git a/osfmk/i386/commpage/bcopy_sse2.s b/osfmk/i386/commpage/bcopy_sse2.s
deleted file mode 100644
index 9e19b3892..000000000
--- a/osfmk/i386/commpage/bcopy_sse2.s
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * The bcopy/memcpy loops, tuned for Pentium-M class processors with SSE2
- * and 64-byte cache lines, such as Core and Core 2.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kShort  80			// too short to bother with SSE (must be >=80)
-#define kVeryLong   (500*1024)          // large enough for non-temporal stores (must be >= 8192)
-#define kBigChunk   (256*1024)          // outer loop chunk size for kVeryLong sized operands
-#define kFastUCode  (16*1024)		// cutoff for microcode fastpath for "rep/movsl"
-
-
-// void bcopy(const void *src, void *dst, size_t len);
- 
-COMMPAGE_FUNCTION_START(bcopy_sse2, 32, 5)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%esi		// get source ptr
-        movl    12(%ebp),%edi           // get dest ptr
-        jmp     Ljoin 
-
-//
-// void *memcpy(void *dst, const void *src, size_t len);
-// void *memmove(void *dst, const void *src, size_t len);
-//
-// NB: These need to be 32 bytes from bcopy():
-//
-
-        .align	5, 0x90
-Lmemcpy:				// void *memcpy(void *dst, const void *src, size_t len)
-Lmemmove:				// void *memmove(void *dst, const void *src, size_t len)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%edi		// get dest ptr
-        movl    12(%ebp),%esi           // get source ptr
-        
-Ljoin:                                  // here from bcopy() with esi and edi loaded
-        movl    16(%ebp),%ecx           // get length
-        movl    %edi,%edx
-        subl    %esi,%edx               // (dest - source)
-        cmpl    %ecx,%edx               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-Lrejoin:                                // here from very-long-operand copies
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LNotShort               // yes
-        
-// Handle short forward copies.  As the most common case, this is the fall-through path.
-//      ecx = length (<= kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-Lshort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// get #doublewords
-	jz	LLeftovers
-2:					// loop copying doublewords
-	movl	(%esi),%eax
-	addl	$4,%esi
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	dec	%ecx
-	jnz	2b
-LLeftovers:				// handle leftover bytes (0..3) in last word
-	andl	$3,%edx			// any leftover bytes?
-	jz	5f
-4:					// loop copying bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	4b
-5:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-
-LReverseIsland:				// keep the "jb" above a short branch...
-	jmp	LReverse		// ...because reverse moves are uncommon
-
-
-// Handle forward moves that are long enough to justify use of SSE3.
-// First, 16-byte align the destination.
-//      ecx = length (> kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-LNotShort:
-        cmpl    $(kVeryLong),%ecx       // long enough to justify heavyweight loops?
-        movl    %edi,%edx               // copy destination
-        jae     LVeryLong		// use very-long-operand path
-        negl    %edx
-        andl    $15,%edx                // get #bytes to align destination
-	jz	LDestAligned		// already aligned
-        subl    %edx,%ecx               // decrement length
-1:					// loop copying 1..15 bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for forward loops over 64-byte chunks.
-// Since kShort>=80 and we've moved at most 15 bytes already, there is at least one chunk.
-
-LDestAligned:
-        movl    %ecx,%edx               // copy length
-	movl	%ecx,%eax		// twice
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-        addl    %edx,%esi               // point to 1st byte not copied
-        addl    %edx,%edi
-        negl    %edx                    // now generate offset to 1st byte to be copied
-	testl	$15,%esi		// is source aligned too?
-	jnz	LUnalignedLoop		// no
-
-
-	cmpl	$(kFastUCode),%eax	// long enough for the fastpath in microcode?
-	jb	LAlignedLoop		// no, use SSE
-	cld				// we'll move forward
-	movl	%eax,%ecx		// copy length again
-	shrl	$2,%ecx			// compute #words to move
-        addl    %edx,%esi               // restore ptrs to 1st byte of source and dest
-        addl    %edx,%edi
-	rep				// the u-code will optimize this
-	movsl
-	movl	%eax,%edx		// original length
-	jmp	LLeftovers		// handle 0..3 leftover bytes
-
-
-// Forward aligned loop for medium length operands (kShort < n < kVeryLong).
-
-	.align	4,0x90			// 16-byte align inner loops
-LAlignedLoop:                           // loop over 64-byte chunks
-        movdqa  (%esi,%edx),%xmm0
-        movdqa  16(%esi,%edx),%xmm1
-        movdqa  32(%esi,%edx),%xmm2
-        movdqa  48(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,(%edi,%edx)
-        movdqa  %xmm1,16(%edi,%edx)
-        movdqa  %xmm2,32(%edi,%edx)
-        movdqa  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     LAlignedLoop
-        
-        jmp     Lshort                  // copy remaining 0..15 bytes and done
-            
-
-// Forward unaligned loop for medium length operands (kShort < n < kVeryLong).
-// Note that LDDQU==MOVDQU on these machines, ie we don't care when we cross
-// source cache lines.
-
-	.align	4,0x90			// 16-byte align inner loops
-LUnalignedLoop:                         // loop over 64-byte chunks
-        movdqu  (%esi,%edx),%xmm0	// the loads are unaligned
-        movdqu  16(%esi,%edx),%xmm1
-        movdqu  32(%esi,%edx),%xmm2
-        movdqu  48(%esi,%edx),%xmm3
-        
-        movdqa  %xmm0,(%edi,%edx)	// we can use aligned stores
-        movdqa  %xmm1,16(%edi,%edx)
-        movdqa  %xmm2,32(%edi,%edx)
-        movdqa  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     LUnalignedLoop
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-        
-
-// Very long forward moves.  These are at least several pages, so we loop over big
-// chunks of memory (kBigChunk in size.)  We first prefetch the chunk, and then copy
-// it using non-temporal stores.  Hopefully all the reads occur in the prefetch loop,
-// so the copy loop reads from L2 and writes directly to memory (with write combining.)
-// This minimizes bus turnaround and maintains good DRAM page locality.
-// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache
-// size.  Otherwise, it is counter-productive to bypass L2 on the stores.
-//      ecx = length (>= kVeryLong bytes)
-//      edi = dest (aligned)
-//      esi = source
-
-LVeryLong:
-	pushl	%ebx			// we'll need to use this
-	movl	%edi,%ebx		// copy dest ptr
-	negl	%ebx
-	andl	$63,%ebx		// get #bytes to cache line align destination
-	jz	LBigChunkLoop		// already aligned
-	
-// Cache line align destination, so temporal stores in copy loops work right.
-
-	pushl	%ecx			// save total length remaining
-	pushl	%ebx			// arg3 - #bytes to align destination (1..63)
-	pushl	%esi			// arg2 - source
-	pushl	%edi			// arg1 - dest
-	call	Lmemcpy			// align the destination
-	movl	12(%esp),%ecx		// recover total length
-	addl	$16,%esp
-	addl	%ebx,%esi		// adjust ptrs and lengths past copy
-	addl	%ebx,%edi
-	subl	%ebx,%ecx
-	
-// Loop over big chunks.
-//      ecx = length remaining (>= 4096)
-//      edi = dest (64-byte aligned)
-//      esi = source (may be unaligned)
-	
-LBigChunkLoop:
-        movl    $(kBigChunk),%edx       // assume we can do a full chunk
-	cmpl	%edx,%ecx		// do we have a full chunk left to do?
-	cmovbl	%ecx,%edx		// if not, only move what we have left
-	andl	$-4096,%edx		// we work in page multiples
-	xor	%eax,%eax		// initialize chunk offset
-	jmp	LTouchLoop
-
-// Because the source may be unaligned, we use byte loads to touch.
-//      ecx = length remaining (including this chunk)
-//      edi = ptr to start of dest chunk
-//      esi = ptr to start of source chunk
-//	edx = chunk length (multiples of pages)
-//	ebx = scratch reg used to read a byte of each cache line
-//	eax = chunk offset
-
-	.align	4,0x90			// 16-byte align inner loops
-LTouchLoop:
-	movzb	(%esi,%eax),%ebx	// touch line 0, 2, 4, or 6 of page
-	movzb	1*64(%esi,%eax),%ebx	// touch line 1, 3, 5, or 7
-	movzb	8*64(%esi,%eax),%ebx	// touch line 8, 10, 12, or 14
-	movzb	9*64(%esi,%eax),%ebx	// etc
-	
-	movzb	16*64(%esi,%eax),%ebx
-	movzb	17*64(%esi,%eax),%ebx
-	movzb	24*64(%esi,%eax),%ebx
-	movzb	25*64(%esi,%eax),%ebx
-
-	movzb	32*64(%esi,%eax),%ebx
-	movzb	33*64(%esi,%eax),%ebx
-	movzb	40*64(%esi,%eax),%ebx
-	movzb	41*64(%esi,%eax),%ebx
-
-	movzb	48*64(%esi,%eax),%ebx
-	movzb	49*64(%esi,%eax),%ebx
-	movzb	56*64(%esi,%eax),%ebx
-	movzb	57*64(%esi,%eax),%ebx
-	
-	subl	$-128,%eax		// next slice of page (adding 128 w 8-bit immediate)
-	testl	$512,%eax		// done with this page?
-	jz	LTouchLoop		// no, next of four slices
-	addl	$(4096-512),%eax	// move on to next page
-	cmpl	%eax,%edx		// done with this chunk?
-	jnz	LTouchLoop		// no, do next page
-	
-// The chunk has been pre-fetched, now copy it using non-temporal stores.
-// There are two copy loops, depending on whether the source is 16-byte aligned
-// or not.
-
-	addl	%edx,%esi		// increment ptrs by chunk length
-	addl	%edx,%edi
-	subl	%edx,%ecx		// adjust remaining length
-	negl	%edx			// prepare loop index (counts up to 0)
-	testl	$15,%esi		// is source 16-byte aligned?
-	jnz	LVeryLongUnaligned	// source is not aligned
-	jmp	LVeryLongAligned
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongAligned:			// aligned loop over 128-bytes
-        movdqa  (%esi,%edx),%xmm0
-        movdqa  16(%esi,%edx),%xmm1
-        movdqa  32(%esi,%edx),%xmm2
-        movdqa  48(%esi,%edx),%xmm3
-        movdqa  64(%esi,%edx),%xmm4
-        movdqa  80(%esi,%edx),%xmm5
-        movdqa  96(%esi,%edx),%xmm6
-        movdqa  112(%esi,%edx),%xmm7
-
-        movntdq %xmm0,(%edi,%edx)
-        movntdq %xmm1,16(%edi,%edx)
-        movntdq %xmm2,32(%edi,%edx)
-        movntdq %xmm3,48(%edi,%edx)
-        movntdq %xmm4,64(%edi,%edx)
-        movntdq %xmm5,80(%edi,%edx)
-        movntdq %xmm6,96(%edi,%edx)
-        movntdq %xmm7,112(%edi,%edx)
-        
-        subl    $-128,%edx		// add 128 with an 8-bit immediate
-        jnz	LVeryLongAligned
-	jmp	LVeryLongChunkEnd
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongUnaligned:			// unaligned loop over 128-bytes
-        movdqu  (%esi,%edx),%xmm0
-        movdqu  16(%esi,%edx),%xmm1
-        movdqu  32(%esi,%edx),%xmm2
-        movdqu  48(%esi,%edx),%xmm3
-        movdqu  64(%esi,%edx),%xmm4
-        movdqu  80(%esi,%edx),%xmm5
-        movdqu  96(%esi,%edx),%xmm6
-        movdqu  112(%esi,%edx),%xmm7
-
-        movntdq %xmm0,(%edi,%edx)
-        movntdq %xmm1,16(%edi,%edx)
-        movntdq %xmm2,32(%edi,%edx)
-        movntdq %xmm3,48(%edi,%edx)
-        movntdq %xmm4,64(%edi,%edx)
-        movntdq %xmm5,80(%edi,%edx)
-        movntdq %xmm6,96(%edi,%edx)
-        movntdq %xmm7,112(%edi,%edx)
-        
-        subl    $-128,%edx		// add 128 with an 8-bit immediate
-        jnz     LVeryLongUnaligned
-
-LVeryLongChunkEnd:
-	cmpl	$4096,%ecx		// at least another page to go?
-	jae	LBigChunkLoop		// yes
-	
-	sfence				// required by non-temporal stores
-	popl	%ebx
-        jmp     Lrejoin                 // handle remaining (0..4095) bytes
-
-
-// Reverse moves.
-//      ecx = length
-//      esi = source ptr
-//      edi = dest ptr
-
-LReverse:
-        addl    %ecx,%esi               // point to end of strings
-        addl    %ecx,%edi
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LReverseNotShort        // yes
-
-// Handle reverse short copies.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseShort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// #words
-	jz	3f
-1:
-	subl	$4,%esi
-	movl	(%esi),%eax
-	subl	$4,%edi
-	movl	%eax,(%edi)
-	dec	%ecx
-	jnz	1b
-3:
-	andl	$3,%edx			// bytes?
-	jz	5f
-4:
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	4b
-5:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-// Handle a reverse move long enough to justify using SSE.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseNotShort:
-        movl    %edi,%edx               // copy destination
-        andl    $15,%edx                // get #bytes to align destination
-        je      LReverseDestAligned     // already aligned
-        subl	%edx,%ecx		// adjust length
-1:					// loop copying 1..15 bytes
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for reverse loops.
-
-LReverseDestAligned:
-        movl    %ecx,%edx               // copy length
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-        subl    %edx,%esi               // point to endpoint of copy
-        subl    %edx,%edi
-	testl	$15,%esi		// is source aligned too?
-        jnz     LReverseUnalignedLoop   // no
-	jmp	LReverseAlignedLoop	// use aligned loop
-
-	.align	4,0x90			// 16-byte align inner loops
-LReverseAlignedLoop:                    // loop over 64-byte chunks
-        movdqa  -16(%esi,%edx),%xmm0
-        movdqa  -32(%esi,%edx),%xmm1
-        movdqa  -48(%esi,%edx),%xmm2
-        movdqa  -64(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseAlignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-    
-// Reverse, unaligned loop.  LDDQU==MOVDQU on these machines.
-        
-	.align	4,0x90			// 16-byte align inner loops
-LReverseUnalignedLoop:                  // loop over 64-byte chunks
-        movdqu  -16(%esi,%edx),%xmm0
-        movdqu  -32(%esi,%edx),%xmm1
-        movdqu  -48(%esi,%edx),%xmm2
-        movdqu  -64(%esi,%edx),%xmm3
-        
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseUnalignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-COMMPAGE_DESCRIPTOR(bcopy_sse2,_COMM_PAGE_BCOPY,kHasSSE2+kCache64,kHasSupplementalSSE3)
diff --git a/osfmk/i386/commpage/bcopy_sse3x.s b/osfmk/i386/commpage/bcopy_sse3x.s
deleted file mode 100644
index 017895aab..000000000
--- a/osfmk/i386/commpage/bcopy_sse3x.s
+++ /dev/null
@@ -1,823 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * The bcopy/memcpy loops, tuned for Pentium-M class processors with
- * Supplemental SSE3 and 64-byte cache lines.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kShort  80			// too short to bother with SSE (must be >=80)
-#define kVeryLong   (500*1024)          // large enough for non-temporal stores (must be >= 8192)
-#define kFastUCode  ((16*1024)-15)	// cutoff for microcode fastpath for "rep/movsl"
-
-// void bcopy(const void *src, void *dst, size_t len);
- 
-COMMPAGE_FUNCTION_START(bcopy_sse3x, 32, 5)
-LZero:
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%esi		// get source ptr
-        movl    12(%ebp),%edi           // get dest ptr
-        movl    16(%ebp),%ecx           // get length
-        movl    %edi,%edx
-        subl    %esi,%edx               // (dest - source)
-        cmpl    %ecx,%edx               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        jbe     Lshort			// no
-	jmp	LNotShort
-
-//
-// void *memcpy(void *dst, const void *src, size_t len);
-// void *memmove(void *dst, const void *src, size_t len);
-//
-// NB: These need to be 32 bytes from bcopy():
-//
-
-        .align	5, 0x90
-Lmemcpy:				// void *memcpy(void *dst, const void *src, size_t len)
-Lmemmove:				// void *memmove(void *dst, const void *src, size_t len)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%edi		// get dest ptr
-        movl    12(%ebp),%esi           // get source ptr
-        movl    16(%ebp),%ecx           // get length
-        movl    %edi,%edx
-        subl    %esi,%edx               // (dest - source)
-        cmpl    %ecx,%edx               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LNotShort               // yes
-        
-// Handle short forward copies.  As the most common case, this is the fall-through path.
-//      ecx = length (<= kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-Lshort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// get #doublewords
-	jz	LLeftovers
-2:					// loop copying doublewords
-	movl	(%esi),%eax
-	addl	$4,%esi
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	dec	%ecx
-	jnz	2b
-LLeftovers:				// handle leftover bytes (0..3) in last word
-	andl	$3,%edx			// any leftover bytes?
-	jz	Lexit
-4:					// loop copying bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	4b
-Lexit:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-
-LReverseIsland:				// keep the "jb" above a short branch...
-	jmp	LReverse		// ...because reverse moves are uncommon
-
-
-// Handle forward moves that are long enough to justify use of SSE3.
-// First, 16-byte align the destination.
-//      ecx = length (> kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-LNotShort:
-        cmpl    $(kVeryLong),%ecx       // long enough to justify heavyweight loops?
-        movl    %edi,%edx               // copy destination
-        jae     LVeryLong		// use very-long-operand path
-        negl    %edx
-        andl    $15,%edx                // get #bytes to align destination
-	jz	LDestAligned		// already aligned
-        subl    %edx,%ecx               // decrement length
-1:					// loop copying 1..15 bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Dispatch to one of sixteen loops over 64-byte chunks,
-// based on the alignment of the source.  All vector loads and stores are aligned.
-// Even though this means we have to shift and repack vectors, doing so is much faster
-// than unaligned loads.  Since kShort>=80 and we've moved at most 15 bytes already,
-// there is at least one chunk.  When we enter the copy loops, the following registers
-// are set up:
-//      ecx = residual length (0..63)
-//	edx = -(length to move), a multiple of 64
-//      esi = ptr to 1st source byte not to move (unaligned)
-//      edi = ptr to 1st dest byte not to move (aligned)
-
-LDestAligned:
-        movl    %ecx,%edx               // copy length
-	movl	%esi,%eax		// copy source address
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-	andl	$15,%eax		// mask to low 4 bits of source address
-        addl    %edx,%esi               // point to 1st byte not copied
-        addl    %edx,%edi
-        negl    %edx                    // now generate offset to 1st byte to be copied
-.set LTableOffset, LTable - LZero
-	leal	(LTableOffset)(,%eax,4), %eax	// load jump table entry address, relative to LZero
-	movl	_COMM_PAGE_BCOPY(%eax), %eax	// load jump table entry
-	addl	$(_COMM_PAGE_BCOPY), %eax	// add runtime address of LZero to get final function
-	jmp	*%eax
-	
-	.align	2
-LTable:					// table of copy loop addresses
-// force generation of assembly-time constants. Otherwise assembler
-// creates subtractor relocations relative to first external symbol,
-// and this file has none
-.set LMod0Offset, LMod0 - LZero
-.set LMod1Offset, LMod1 - LZero
-.set LMod2Offset, LMod2 - LZero
-.set LMod3Offset, LMod3 - LZero
-.set LMod4Offset, LMod4 - LZero
-.set LMod5Offset, LMod5 - LZero
-.set LMod6Offset, LMod6 - LZero
-.set LMod7Offset, LMod7 - LZero
-.set LMod8Offset, LMod8 - LZero
-.set LMod9Offset, LMod9 - LZero
-.set LMod10Offset, LMod10 - LZero
-.set LMod11Offset, LMod11 - LZero
-.set LMod12Offset, LMod12 - LZero
-.set LMod13Offset, LMod13 - LZero
-.set LMod14Offset, LMod14 - LZero
-.set LMod15Offset, LMod15 - LZero
-	.long LMod0Offset
-	.long LMod1Offset
-	.long LMod2Offset
-	.long LMod3Offset
-	.long LMod4Offset
-	.long LMod5Offset
-	.long LMod6Offset
-	.long LMod7Offset
-	.long LMod8Offset
-	.long LMod9Offset
-	.long LMod10Offset
-	.long LMod11Offset
-	.long LMod12Offset
-	.long LMod13Offset
-	.long LMod14Offset
-	.long LMod15Offset
-
-
-// Very long forward moves.  These are at least several pages.  They are special cased
-// and aggressively optimized, not so much because they are common or useful, but
-// because they are subject to benchmark.  There isn't enough room for them in the
-// area reserved on the commpage for bcopy, so we put them elsewhere.  We call
-// the longcopy routine using the normal ABI.
-
-LVeryLong:
-	pushl	%ecx			// length (>= kVeryLong)
-	pushl	%esi			// source ptr
-	pushl	%edi			// dest ptr
-	movl	$(_COMM_PAGE_LONGCOPY),%eax
-	call	*%eax			// do the long copy
-	addl	$12,%esp		// pop off our parameters
-	jmp	Lexit
-
-
-// On Pentium-M, the microcode for "rep/movsl" is faster than SSE for 8-byte
-// aligned operands from about 32KB up to kVeryLong for the hot cache case, and from
-// about 256 bytes up to kVeryLong for cold caches.  This is because the microcode
-// avoids having to read destination cache lines that will be completely overwritten.
-// The cutoff we use (ie, kFastUCode) must somehow balance the two cases, since
-// we do not know if the destination is in cache or not.
-
-Lfastpath:
-        addl    %edx,%esi               // restore ptrs to 1st byte of source and dest
-        addl    %edx,%edi
-	negl	%edx			// make length positive
-	orl	%edx,%ecx		// restore total #bytes remaining to move
-	cld				// we'll move forward
-	movl	%ecx,%edx		// copy total length to move
-	shrl	$2,%ecx			// compute #words to move
-	rep				// the u-code will optimize this
-	movsl
-	jmp	LLeftovers		// handle 0..3 leftover bytes
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0000
-
-LMod0:
-	cmpl	$(-kFastUCode),%edx	// %edx == -length, where (length < kVeryLong)
-	jle	Lfastpath		// long enough for fastpath in microcode
-	jmp	1f
-	.align	4,0x90			// 16-byte align inner loops
-1:					// loop over 64-byte chunks
-        movdqa  (%esi,%edx),%xmm0
-        movdqa  16(%esi,%edx),%xmm1
-        movdqa  32(%esi,%edx),%xmm2
-        movdqa  48(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,(%edi,%edx)
-        movdqa  %xmm1,16(%edi,%edx)
-        movdqa  %xmm2,32(%edi,%edx)
-        movdqa  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0001
-
-LMod1:
-	movdqa	-1(%esi,%edx),%xmm0	// prime the loop by loading 1st quadword
-1:					// loop over 64-byte chunks
-        movdqa  15(%esi,%edx),%xmm1
-        movdqa  31(%esi,%edx),%xmm2
-        movdqa  47(%esi,%edx),%xmm3
-        movdqa  63(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$1,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$1,%xmm2,%xmm3
-	palignr	$1,%xmm1,%xmm2
-	palignr	$1,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0010
-
-LMod2:
-	movdqa	-2(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  14(%esi,%edx),%xmm1
-        movdqa  30(%esi,%edx),%xmm2
-        movdqa  46(%esi,%edx),%xmm3
-        movdqa  62(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$2,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$2,%xmm2,%xmm3
-	palignr	$2,%xmm1,%xmm2
-	palignr	$2,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0011
-
-LMod3:
-	movdqa	-3(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  13(%esi,%edx),%xmm1
-        movdqa  29(%esi,%edx),%xmm2
-        movdqa  45(%esi,%edx),%xmm3
-        movdqa  61(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$3,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$3,%xmm2,%xmm3
-	palignr	$3,%xmm1,%xmm2
-	palignr	$3,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-	
-	
-// Forward loop for medium length operands in which low four bits of %esi == 0100
-// We use the float single data type in order to use "movss" to merge vectors.
-
-LMod4:
-	movaps	-4(%esi,%edx),%xmm0	// 4-byte aligned: prime the loop
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-        movaps  12(%esi,%edx),%xmm1
-        movaps  28(%esi,%edx),%xmm2
-	movss	%xmm1,%xmm0		// copy low 4 bytes of source into destination
-	pshufd	$(0x39),%xmm0,%xmm0	// rotate right 4 bytes (mask -- 00 11 10 01)
-        movaps  44(%esi,%edx),%xmm3
-	movss	%xmm2,%xmm1
-	pshufd	$(0x39),%xmm1,%xmm1
-	movaps	60(%esi,%edx),%xmm4
-	movss	%xmm3,%xmm2
-	pshufd	$(0x39),%xmm2,%xmm2
-
-        movaps  %xmm0,(%edi,%edx)
-	movss	%xmm4,%xmm3
-	pshufd	$(0x39),%xmm3,%xmm3
-        movaps  %xmm1,16(%edi,%edx)
-        movaps  %xmm2,32(%edi,%edx)
-	movaps	%xmm4,%xmm0
-        movaps  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0101
-
-LMod5:
-	movdqa	-5(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  11(%esi,%edx),%xmm1
-        movdqa  27(%esi,%edx),%xmm2
-        movdqa  43(%esi,%edx),%xmm3
-        movdqa  59(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$5,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$5,%xmm2,%xmm3
-	palignr	$5,%xmm1,%xmm2
-	palignr	$5,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0110
-
-LMod6:
-	movdqa	-6(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  10(%esi,%edx),%xmm1
-        movdqa  26(%esi,%edx),%xmm2
-        movdqa  42(%esi,%edx),%xmm3
-        movdqa  58(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$6,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$6,%xmm2,%xmm3
-	palignr	$6,%xmm1,%xmm2
-	palignr	$6,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 0111
-
-LMod7:
-	movdqa	-7(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  9(%esi,%edx),%xmm1
-        movdqa  25(%esi,%edx),%xmm2
-        movdqa  41(%esi,%edx),%xmm3
-        movdqa  57(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$7,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$7,%xmm2,%xmm3
-	palignr	$7,%xmm1,%xmm2
-	palignr	$7,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-	
-	
-// Forward loop for medium length operands in which low four bits of %esi == 1000
-// We use the float double data type in order to use "shufpd" to shift by 8 bytes.
-
-LMod8:
-	cmpl	$(-kFastUCode),%edx	// %edx == -length, where (length < kVeryLong)
-	jle	Lfastpath		// long enough for fastpath in microcode
-	movapd	-8(%esi,%edx),%xmm0	// 8-byte aligned: prime the loop
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-        movapd  8(%esi,%edx),%xmm1
-        movapd  24(%esi,%edx),%xmm2
-	shufpd	$01,%xmm1,%xmm0		// %xmm0 <- shr( %xmm0 || %xmm1, 8 bytes)
-        movapd  40(%esi,%edx),%xmm3
-	shufpd	$01,%xmm2,%xmm1
-	movapd	56(%esi,%edx),%xmm4
-	shufpd	$01,%xmm3,%xmm2
-
-        movapd  %xmm0,(%edi,%edx)
-	shufpd	$01,%xmm4,%xmm3
-        movapd  %xmm1,16(%edi,%edx)
-        movapd  %xmm2,32(%edi,%edx)
-	movapd	%xmm4,%xmm0
-        movapd  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1001
-
-LMod9:
-	movdqa	-9(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  7(%esi,%edx),%xmm1
-        movdqa  23(%esi,%edx),%xmm2
-        movdqa  39(%esi,%edx),%xmm3
-        movdqa  55(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$9,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$9,%xmm2,%xmm3
-	palignr	$9,%xmm1,%xmm2
-	palignr	$9,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1010
-
-LMod10:
-	movdqa	-10(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  6(%esi,%edx),%xmm1
-        movdqa  22(%esi,%edx),%xmm2
-        movdqa  38(%esi,%edx),%xmm3
-        movdqa  54(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$10,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$10,%xmm2,%xmm3
-	palignr	$10,%xmm1,%xmm2
-	palignr	$10,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1011
-
-LMod11:
-	movdqa	-11(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  5(%esi,%edx),%xmm1
-        movdqa  21(%esi,%edx),%xmm2
-        movdqa  37(%esi,%edx),%xmm3
-        movdqa  53(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$11,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$11,%xmm2,%xmm3
-	palignr	$11,%xmm1,%xmm2
-	palignr	$11,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-	
-
-// Forward loop for medium length operands in which low four bits of %esi == 1100
-// We use the float single data type in order to use "movss" to merge vectors.
-
-LMod12:
-	movss	(%esi,%edx),%xmm0	// prefetch 1st four bytes of source, right justified
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-	pshufd	$(0x93),4(%esi,%edx),%xmm1 // load and rotate right 12 bytes (mask -- 10 01 00 11)
-	pshufd	$(0x93),20(%esi,%edx),%xmm2
-	pshufd	$(0x93),36(%esi,%edx),%xmm3
-	pshufd	$(0x93),52(%esi,%edx),%xmm4
-	
-	movaps	%xmm4,%xmm5
-	movss	%xmm3,%xmm4		// copy low 4 bytes of source into destination
-	movss	%xmm2,%xmm3
-	movss	%xmm1,%xmm2
-	movss	%xmm0,%xmm1
-	
-        movaps  %xmm1,(%edi,%edx)
-        movaps  %xmm2,16(%edi,%edx)
-	movaps	%xmm5,%xmm0
-        movaps  %xmm3,32(%edi,%edx)
-        movaps  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1101
-
-LMod13:
-	movdqa	-13(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  3(%esi,%edx),%xmm1
-        movdqa  19(%esi,%edx),%xmm2
-        movdqa  35(%esi,%edx),%xmm3
-        movdqa  51(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$13,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$13,%xmm2,%xmm3
-	palignr	$13,%xmm1,%xmm2
-	palignr	$13,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1110
-
-LMod14:
-	movdqa	-14(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  2(%esi,%edx),%xmm1
-        movdqa  18(%esi,%edx),%xmm2
-        movdqa  34(%esi,%edx),%xmm3
-        movdqa  50(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$14,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$14,%xmm2,%xmm3
-	palignr	$14,%xmm1,%xmm2
-	palignr	$14,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %esi == 1111
-
-LMod15:
-	movdqa	-15(%esi,%edx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  1(%esi,%edx),%xmm1
-        movdqa  17(%esi,%edx),%xmm2
-        movdqa  33(%esi,%edx),%xmm3
-        movdqa  49(%esi,%edx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$15,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$15,%xmm2,%xmm3
-	palignr	$15,%xmm1,%xmm2
-	palignr	$15,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%edi,%edx)
-        movdqa  %xmm2,16(%edi,%edx)
-        movdqa  %xmm3,32(%edi,%edx)
-        movdqa  %xmm4,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     1b
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-        
-
-// Reverse moves.  These are not optimized as aggressively as their forward
-// counterparts, as they are only used with destructive overlap.
-//      ecx = length
-//      esi = source ptr
-//      edi = dest ptr
-
-LReverse:
-        addl    %ecx,%esi               // point to end of strings
-        addl    %ecx,%edi
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LReverseNotShort        // yes
-
-// Handle reverse short copies.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseShort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// #words
-	jz	3f
-1:
-	subl	$4,%esi
-	movl	(%esi),%eax
-	subl	$4,%edi
-	movl	%eax,(%edi)
-	dec	%ecx
-	jnz	1b
-3:
-	andl	$3,%edx			// bytes?
-	jz	5f
-4:
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	4b
-5:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-// Handle a reverse move long enough to justify using SSE.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseNotShort:
-        movl    %edi,%edx               // copy destination
-        andl    $15,%edx                // get #bytes to align destination
-        je      LReverseDestAligned     // already aligned
-        subl	%edx,%ecx		// adjust length
-1:					// loop copying 1..15 bytes
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for reverse loops.
-
-LReverseDestAligned:
-        movl    %ecx,%edx               // copy length
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-        subl    %edx,%esi               // point to endpoint of copy
-        subl    %edx,%edi
-	testl	$15,%esi		// is source aligned too?
-        jnz     LReverseUnalignedLoop   // no
-
-LReverseAlignedLoop:                    // loop over 64-byte chunks
-        movdqa  -16(%esi,%edx),%xmm0
-        movdqa  -32(%esi,%edx),%xmm1
-        movdqa  -48(%esi,%edx),%xmm2
-        movdqa  -64(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseAlignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-    
-// Reverse, unaligned loop.  LDDQU==MOVDQU on these machines.
-        
-LReverseUnalignedLoop:                  // loop over 64-byte chunks
-        movdqu  -16(%esi,%edx),%xmm0
-        movdqu  -32(%esi,%edx),%xmm1
-        movdqu  -48(%esi,%edx),%xmm2
-        movdqu  -64(%esi,%edx),%xmm3
-        
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseUnalignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-COMMPAGE_DESCRIPTOR(bcopy_sse3x,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2)
diff --git a/osfmk/i386/commpage/bcopy_sse3x_64.s b/osfmk/i386/commpage/bcopy_sse3x_64.s
deleted file mode 100644
index 2a0e46be9..000000000
--- a/osfmk/i386/commpage/bcopy_sse3x_64.s
+++ /dev/null
@@ -1,820 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * The bcopy/memcpy loops, tuned for 64-bit Pentium-M class processors with 
- * Supplemental SSE3 and 64-byte cache lines.  This is the 64-bit version.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kShort  80			// too short to bother with SSE (must be >=80)
-#define kVeryLong   (500*1024)          // large enough for non-temporal stores (>=8192 and <2GB)
-#define kFastUCode  ((16*1024)-15)	// cutoff for microcode fastpath for "rep/movsl"
-
-// void bcopy(const void *src, void *dst, size_t len);
- 
-COMMPAGE_FUNCTION_START(bcopy_sse3x_64, 64, 5)
-LZero:
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	%rsi,%rax		// copy dest ptr
-	movq	%rdi,%rsi		// xchange source and dest ptrs
-	movq	%rax,%rdi
-        subq    %rsi,%rax               // (dest - source)
-        cmpq    %rdx,%rax               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        jbe     LShort			// no
-	jmp	LNotShort
-
-//
-// void *memcpy(void *dst, const void *src, size_t len);
-// void *memmove(void *dst, const void *src, size_t len);
-//
-// NB: These need to be 32 bytes from bcopy():
-//
-
-        .align	5, 0x90
-Lmemcpy:				// void *memcpy(void *dst, const void *src, size_t len)
-Lmemmove:				// void *memmove(void *dst, const void *src, size_t len)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	%rdi,%r11		// save return value here        
-        movq    %rdi,%rax
-        subq    %rsi,%rax               // (dest - source)
-        cmpq    %rdx,%rax               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        ja      LNotShort               // yes
-        
-// Handle short forward copies.  As the most common case, this is the fall-through path.
-//      rdx = length (<= kShort)
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LShort:
-	movl    %edx,%ecx		// copy length using 32-bit operation
-	shrl	$2,%ecx			// get #doublewords
-	jz	LLeftovers
-2:					// loop copying doublewords
-	movl	(%rsi),%eax
-	addq	$4,%rsi
-	movl	%eax,(%rdi)
-	addq	$4,%rdi
-	decl	%ecx
-	jnz	2b
-LLeftovers:				// handle leftover bytes (0..3) in last word
-	andl	$3,%edx			// any leftover bytes?
-	jz	5f
-4:					// loop copying bytes
-	movb	(%rsi),%al
-	incq	%rsi
-	movb	%al,(%rdi)
-	incq	%rdi
-	decl	%edx
-	jnz	4b
-5:
-        movq	%r11,%rax		// get return value (dst ptr) for memcpy/memmove
-	popq	%rbp
-        ret
-
-
-LReverseIsland:				// keep the "jb" above a short branch...
-	jmp	LReverse		// ...because reverse moves are uncommon
-
-
-// Handle forward moves that are long enough to justify use of SSE.
-// First, 16-byte align the destination.
-//      rdx = length (> kShort)
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LNotShort:
-        cmpq    $(kVeryLong),%rdx       // long enough to justify heavyweight loops?
-        jae     LVeryLong		// use very-long-operand path
-        movl    %edi,%ecx               // copy low half of destination ptr
-        negl    %ecx
-        andl    $15,%ecx                // get #bytes to align destination
-	jz	LDestAligned		// already aligned
-        subl    %ecx,%edx               // decrement length
-	rep				// align destination
-	movsb
-
-        
-// Destination is now aligned.  Dispatch to one of sixteen loops over 64-byte chunks,
-// based on the alignment of the source.  All vector loads and stores are aligned.
-// Even though this means we have to shift and repack vectors, doing so is much faster
-// than unaligned loads.  Since kShort>=80 and we've moved at most 15 bytes already,
-// there is at least one chunk.  When we enter the copy loops, the following registers
-// are set up:
-//      rdx = residual length (0..63)
-//	rcx = -(length to move), a multiple of 64 less than 2GB
-//      rsi = ptr to 1st source byte not to move (unaligned)
-//      rdi = ptr to 1st dest byte not to move (aligned)
-
-LDestAligned:
-        movq    %rdx,%rcx               // copy length
-	movl	%esi,%eax		// copy low half of source address
-        andl    $63,%edx                // get remaining bytes for LShort
-	andl	$15,%eax		// mask to low 4 bits of source address
-        andq    $-64,%rcx               // get number of bytes we will copy in inner loop
-// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block in
-// a 32-bit object file (4586528). Generate the leaq opcode manually.
-#if defined(__i386__)
-        .byte 0x4c
-        .byte 0x8d
-        .byte 0x05
-        .long LTable-LRIP
-LRIP:	
-#elif defined(__x86_64__)
-        leaq	LTable(%rip), %r8
-#else
-#error Unsupported architecture
-#endif
-        addq    %rcx,%rsi               // point to 1st byte not copied
-        addq    %rcx,%rdi
-	movl	(%r8,%rax,4),%eax	// get offset of routine
-        negq    %rcx                    // now generate offset to 1st byte to be copied
-	addq	%r8,%rax		// generate address of copy loop
-	jmp	*%rax			// enter copy loop, selected by source alignment
-
-	.align	2
-LTable:					// table of copy loop addresses
-// force generation of assembly-time constants. Otherwise assembler
-// creates subtractor relocations relative to first external symbol,
-// and this file has none
-.set LMod0Offset, LMod0 - LTable
-.set LMod1Offset, LMod1 - LTable
-.set LMod2Offset, LMod2 - LTable
-.set LMod3Offset, LMod3 - LTable
-.set LMod4Offset, LMod4 - LTable
-.set LMod5Offset, LMod5 - LTable
-.set LMod6Offset, LMod6 - LTable
-.set LMod7Offset, LMod7 - LTable
-.set LMod8Offset, LMod8 - LTable
-.set LMod9Offset, LMod9 - LTable
-.set LMod10Offset, LMod10 - LTable
-.set LMod11Offset, LMod11 - LTable
-.set LMod12Offset, LMod12 - LTable
-.set LMod13Offset, LMod13 - LTable
-.set LMod14Offset, LMod14 - LTable
-.set LMod15Offset, LMod15 - LTable
-	.long LMod0Offset
-	.long LMod1Offset
-	.long LMod2Offset
-	.long LMod3Offset
-	.long LMod4Offset
-	.long LMod5Offset
-	.long LMod6Offset
-	.long LMod7Offset
-	.long LMod8Offset
-	.long LMod9Offset
-	.long LMod10Offset
-	.long LMod11Offset
-	.long LMod12Offset
-	.long LMod13Offset
-	.long LMod14Offset
-	.long LMod15Offset
-
-
-// Very long forward moves.  These are at least several pages.  They are special cased
-// and aggressively optimized, not so much because they are common or useful, but
-// because they are subject to benchmark.  There isn't enough room for them in the
-// area reserved on the commpage for bcopy, so we put them elsewhere.  We call
-// the longcopy routine using the normal ABI:
-//      rdi = dest
-//      rsi = source
-//      rdx = length (>= kVeryLong bytes)
-
-LVeryLong:
-	pushq	%r11			// save return value
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_LONGCOPY),%rax
-	call	*%rax			// call very long operand routine
-	popq	%rax			// pop return value
-	popq	%rbp
-	ret	
-
-
-// On Pentium-M, the microcode for "rep/movsl" is faster than SSE for 16-byte
-// aligned operands from about 32KB up to kVeryLong for the hot cache case, and from
-// about 256 bytes up to kVeryLong for cold caches.  This is because the microcode
-// avoids having to read destination cache lines that will be completely overwritten.
-// The cutoff we use (ie, kFastUCode) must somehow balance the two cases, since
-// we do not know if the destination is in cache or not.
-
-Lfastpath:
-        addq    %rcx,%rsi               // restore ptrs to 1st byte of source and dest
-        addq    %rcx,%rdi
-	negl	%ecx			// make length positive (known to be < 2GB)
-	orl	%edx,%ecx		// restore total #bytes remaining to move
-	cld				// we'll move forward
-	shrl	$2,%ecx			// compute #words to move
-	rep				// the u-code will optimize this
-	movsl
-	jmp	LLeftovers		// handle 0..3 leftover bytes
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0000
-
-LMod0:
-	cmpl	$(-kFastUCode),%ecx	// %rcx == -length, where (length < kVeryLong)
-	jle	Lfastpath		// long enough for fastpath in microcode
-	jmp	1f
-	.align	4,0x90			// 16-byte align inner loops
-1:					// loop over 64-byte chunks
-        movdqa  (%rsi,%rcx),%xmm0
-        movdqa  16(%rsi,%rcx),%xmm1
-        movdqa  32(%rsi,%rcx),%xmm2
-        movdqa  48(%rsi,%rcx),%xmm3
-
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm1,16(%rdi,%rcx)
-        movdqa  %xmm2,32(%rdi,%rcx)
-        movdqa  %xmm3,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0001
-
-LMod1:
-	movdqa	-1(%rsi,%rcx),%xmm0	// prime the loop by loading 1st quadword
-1:					// loop over 64-byte chunks
-        movdqa  15(%rsi,%rcx),%xmm1
-        movdqa  31(%rsi,%rcx),%xmm2
-        movdqa  47(%rsi,%rcx),%xmm3
-        movdqa  63(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$1,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$1,%xmm2,%xmm3
-	palignr	$1,%xmm1,%xmm2
-	palignr	$1,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0010
-
-LMod2:
-	movdqa	-2(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  14(%rsi,%rcx),%xmm1
-        movdqa  30(%rsi,%rcx),%xmm2
-        movdqa  46(%rsi,%rcx),%xmm3
-        movdqa  62(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$2,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$2,%xmm2,%xmm3
-	palignr	$2,%xmm1,%xmm2
-	palignr	$2,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0011
-
-LMod3:
-	movdqa	-3(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  13(%rsi,%rcx),%xmm1
-        movdqa  29(%rsi,%rcx),%xmm2
-        movdqa  45(%rsi,%rcx),%xmm3
-        movdqa  61(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$3,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$3,%xmm2,%xmm3
-	palignr	$3,%xmm1,%xmm2
-	palignr	$3,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-	
-	
-// Forward loop for medium length operands in which low four bits of %rsi == 0100
-// We use the float single data type in order to use "movss" to merge vectors.
-
-LMod4:
-	movaps	-4(%rsi,%rcx),%xmm0	// 4-byte aligned: prime the loop
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-        movaps  12(%rsi,%rcx),%xmm1
-        movaps  28(%rsi,%rcx),%xmm2
-	movss	%xmm1,%xmm0		// copy low 4 bytes of source into destination
-	pshufd	$(0x39),%xmm0,%xmm0	// rotate right 4 bytes (mask -- 00 11 10 01)
-        movaps  44(%rsi,%rcx),%xmm3
-	movss	%xmm2,%xmm1
-	pshufd	$(0x39),%xmm1,%xmm1
-	movaps	60(%rsi,%rcx),%xmm4
-	movss	%xmm3,%xmm2
-	pshufd	$(0x39),%xmm2,%xmm2
-
-        movaps  %xmm0,(%rdi,%rcx)
-	movss	%xmm4,%xmm3
-	pshufd	$(0x39),%xmm3,%xmm3
-        movaps  %xmm1,16(%rdi,%rcx)
-        movaps  %xmm2,32(%rdi,%rcx)
-	movaps	%xmm4,%xmm0
-        movaps  %xmm3,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0101
-
-LMod5:
-	movdqa	-5(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  11(%rsi,%rcx),%xmm1
-        movdqa  27(%rsi,%rcx),%xmm2
-        movdqa  43(%rsi,%rcx),%xmm3
-        movdqa  59(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$5,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$5,%xmm2,%xmm3
-	palignr	$5,%xmm1,%xmm2
-	palignr	$5,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0110
-
-LMod6:
-	movdqa	-6(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  10(%rsi,%rcx),%xmm1
-        movdqa  26(%rsi,%rcx),%xmm2
-        movdqa  42(%rsi,%rcx),%xmm3
-        movdqa  58(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$6,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$6,%xmm2,%xmm3
-	palignr	$6,%xmm1,%xmm2
-	palignr	$6,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 0111
-
-LMod7:
-	movdqa	-7(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  9(%rsi,%rcx),%xmm1
-        movdqa  25(%rsi,%rcx),%xmm2
-        movdqa  41(%rsi,%rcx),%xmm3
-        movdqa  57(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$7,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$7,%xmm2,%xmm3
-	palignr	$7,%xmm1,%xmm2
-	palignr	$7,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-	
-	
-// Forward loop for medium length operands in which low four bits of %rsi == 1000
-// We use the float double data type in order to use "shufpd" to shift by 8 bytes.
-
-LMod8:
-	cmpl	$(-kFastUCode),%ecx	// %rcx == -length, where (length < kVeryLong)
-	jle	Lfastpath		// long enough for fastpath in microcode
-	movapd	-8(%rsi,%rcx),%xmm0	// 8-byte aligned: prime the loop
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-        movapd  8(%rsi,%rcx),%xmm1
-        movapd  24(%rsi,%rcx),%xmm2
-	shufpd	$01,%xmm1,%xmm0		// %xmm0 <- shr( %xmm0 || %xmm1, 8 bytes)
-        movapd  40(%rsi,%rcx),%xmm3
-	shufpd	$01,%xmm2,%xmm1
-	movapd	56(%rsi,%rcx),%xmm4
-	shufpd	$01,%xmm3,%xmm2
-
-        movapd  %xmm0,(%rdi,%rcx)
-	shufpd	$01,%xmm4,%xmm3
-        movapd  %xmm1,16(%rdi,%rcx)
-        movapd  %xmm2,32(%rdi,%rcx)
-	movapd	%xmm4,%xmm0
-        movapd  %xmm3,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1001
-
-LMod9:
-	movdqa	-9(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  7(%rsi,%rcx),%xmm1
-        movdqa  23(%rsi,%rcx),%xmm2
-        movdqa  39(%rsi,%rcx),%xmm3
-        movdqa  55(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$9,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$9,%xmm2,%xmm3
-	palignr	$9,%xmm1,%xmm2
-	palignr	$9,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1010
-
-LMod10:
-	movdqa	-10(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  6(%rsi,%rcx),%xmm1
-        movdqa  22(%rsi,%rcx),%xmm2
-        movdqa  38(%rsi,%rcx),%xmm3
-        movdqa  54(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$10,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$10,%xmm2,%xmm3
-	palignr	$10,%xmm1,%xmm2
-	palignr	$10,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1011
-
-LMod11:
-	movdqa	-11(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  5(%rsi,%rcx),%xmm1
-        movdqa  21(%rsi,%rcx),%xmm2
-        movdqa  37(%rsi,%rcx),%xmm3
-        movdqa  53(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$11,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$11,%xmm2,%xmm3
-	palignr	$11,%xmm1,%xmm2
-	palignr	$11,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-	
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1100
-// We use the float single data type in order to use "movss" to merge vectors.
-
-LMod12:
-	movss	(%rsi,%rcx),%xmm0	// prefetch 1st four bytes of source, right justified
-	jmp	1f
-	.align	4,0x90
-1:					// loop over 64-byte chunks
-	pshufd	$(0x93),4(%rsi,%rcx),%xmm1 // load and rotate right 12 bytes (mask -- 10 01 00 11)
-	pshufd	$(0x93),20(%rsi,%rcx),%xmm2
-	pshufd	$(0x93),36(%rsi,%rcx),%xmm3
-	pshufd	$(0x93),52(%rsi,%rcx),%xmm4
-	
-	movaps	%xmm4,%xmm5
-	movss	%xmm3,%xmm4		// copy low 4 bytes of source into destination
-	movss	%xmm2,%xmm3
-	movss	%xmm1,%xmm2
-	movss	%xmm0,%xmm1
-	
-        movaps  %xmm1,(%rdi,%rcx)
-        movaps  %xmm2,16(%rdi,%rcx)
-	movaps	%xmm5,%xmm0
-        movaps  %xmm3,32(%rdi,%rcx)
-        movaps  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1101
-
-LMod13:
-	movdqa	-13(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  3(%rsi,%rcx),%xmm1
-        movdqa  19(%rsi,%rcx),%xmm2
-        movdqa  35(%rsi,%rcx),%xmm3
-        movdqa  51(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$13,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$13,%xmm2,%xmm3
-	palignr	$13,%xmm1,%xmm2
-	palignr	$13,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1110
-
-LMod14:
-	movdqa	-14(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  2(%rsi,%rcx),%xmm1
-        movdqa  18(%rsi,%rcx),%xmm2
-        movdqa  34(%rsi,%rcx),%xmm3
-        movdqa  50(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$14,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$14,%xmm2,%xmm3
-	palignr	$14,%xmm1,%xmm2
-	palignr	$14,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for medium length operands in which low four bits of %rsi == 1111
-
-LMod15:
-	movdqa	-15(%rsi,%rcx),%xmm0	// prime the loop by loading 1st source dq
-1:					// loop over 64-byte chunks
-        movdqa  1(%rsi,%rcx),%xmm1
-        movdqa  17(%rsi,%rcx),%xmm2
-        movdqa  33(%rsi,%rcx),%xmm3
-        movdqa  49(%rsi,%rcx),%xmm4
-	
-	movdqa	%xmm0,%xmm5
-	movdqa	%xmm4,%xmm0
-
-	palignr	$15,%xmm3,%xmm4		// dest <- shr( dest || source, imm*8 )
-	palignr	$15,%xmm2,%xmm3
-	palignr	$15,%xmm1,%xmm2
-	palignr	$15,%xmm5,%xmm1
-	
-        movdqa  %xmm1,(%rdi,%rcx)
-        movdqa  %xmm2,16(%rdi,%rcx)
-        movdqa  %xmm3,32(%rdi,%rcx)
-        movdqa  %xmm4,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     1b
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-	
-
-// Reverse moves.  These are not optimized as aggressively as their forward
-// counterparts, as they are only used with destructive overlap.
-//      rdx = length
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LReverse:
-        addq    %rdx,%rsi               // point to end of strings
-        addq    %rdx,%rdi
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        ja      LReverseNotShort        // yes
-
-// Handle reverse short copies.
-//      edx = length (<= kShort)
-//      rsi = one byte past end of source
-//      rdi = one byte past end of dest
-
-LReverseShort:
-	movl    %edx,%ecx		// copy length
-	shrl	$3,%ecx			// #quadwords
-	jz	3f
-1:
-	subq	$8,%rsi
-	movq	(%rsi),%rax
-	subq	$8,%rdi
-	movq	%rax,(%rdi)
-	decl	%ecx
-	jnz	1b
-3:
-	andl	$7,%edx			// bytes?
-	jz	5f
-4:
-	decq	%rsi
-	movb	(%rsi),%al
-	decq	%rdi
-	movb	%al,(%rdi)
-	decl	%edx
-	jnz	4b
-5:
-        movq	%r11,%rax		// get return value (dst ptr) for memcpy/memmove
-	popq	%rbp
-        ret
-
-// Handle a reverse move long enough to justify using SSE.
-//      rdx = length (> kShort)
-//      rsi = one byte past end of source
-//      rdi = one byte past end of dest
-
-LReverseNotShort:
-        movl    %edi,%ecx               // copy destination
-        andl    $15,%ecx                // get #bytes to align destination
-        je      LReverseDestAligned     // already aligned
-        subq	%rcx,%rdx		// adjust length
-1:					// loop copying 1..15 bytes
-	decq	%rsi
-	movb	(%rsi),%al
-	decq	%rdi
-	movb	%al,(%rdi)
-	decl	%ecx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for reverse loops.
-
-LReverseDestAligned:
-        movq    %rdx,%rcx               // copy length
-        andl    $63,%edx                // get remaining bytes for LReverseShort
-        andq    $-64,%rcx               // get number of bytes we will copy in inner loop
-        subq    %rcx,%rsi               // point to endpoint of copy
-        subq    %rcx,%rdi
-	testl	$15,%esi		// is source aligned too?
-        jnz     LReverseUnalignedLoop   // no
-
-LReverseAlignedLoop:                    // loop over 64-byte chunks
-        movdqa  -16(%rsi,%rcx),%xmm0
-        movdqa  -32(%rsi,%rcx),%xmm1
-        movdqa  -48(%rsi,%rcx),%xmm2
-        movdqa  -64(%rsi,%rcx),%xmm3
-
-        movdqa  %xmm0,-16(%rdi,%rcx)
-        movdqa  %xmm1,-32(%rdi,%rcx)
-        movdqa  %xmm2,-48(%rdi,%rcx)
-        movdqa  %xmm3,-64(%rdi,%rcx)
-        
-        subq    $64,%rcx
-        jne     LReverseAlignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-    
-// Reverse, unaligned loop.  LDDQU==MOVDQU on these machines.
-        
-LReverseUnalignedLoop:                  // loop over 64-byte chunks
-        movdqu  -16(%rsi,%rcx),%xmm0
-        movdqu  -32(%rsi,%rcx),%xmm1
-        movdqu  -48(%rsi,%rcx),%xmm2
-        movdqu  -64(%rsi,%rcx),%xmm3
-        
-        movdqa  %xmm0,-16(%rdi,%rcx)
-        movdqa  %xmm1,-32(%rdi,%rcx)
-        movdqa  %xmm2,-48(%rdi,%rcx)
-        movdqa  %xmm3,-64(%rdi,%rcx)
-        
-        subq    $64,%rcx
-        jne     LReverseUnalignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-COMMPAGE_DESCRIPTOR(bcopy_sse3x_64,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2)
diff --git a/osfmk/i386/commpage/bcopy_sse42.s b/osfmk/i386/commpage/bcopy_sse42.s
deleted file mode 100644
index 6a0bcd528..000000000
--- a/osfmk/i386/commpage/bcopy_sse42.s
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * The bcopy/memcpy loops, tuned for Nehalem.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kShort  80			// too short to bother with SSE (must be >=80)
-
-
-// void bcopy(const void *src, void *dst, size_t len);
- 
-COMMPAGE_FUNCTION_START(bcopy_sse42, 32, 5)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%esi		// get source ptr
-        movl    12(%ebp),%edi           // get dest ptr
-        movl    16(%ebp),%ecx           // get length
-        movl    %edi,%edx
-        subl    %esi,%edx               // (dest - source)
-        cmpl    %ecx,%edx               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        jbe     Lshort			// no
-	jmp	LNotShort
-
-//
-// void *memcpy(void *dst, const void *src, size_t len);
-// void *memmove(void *dst, const void *src, size_t len);
-//
-// NB: These need to be 32 bytes from bcopy():
-//
-
-        .align	5, 0x90
-Lmemcpy:				// void *memcpy(void *dst, const void *src, size_t len)
-Lmemmove:				// void *memmove(void *dst, const void *src, size_t len)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-        movl    8(%ebp),%edi		// get dest ptr
-        movl    12(%ebp),%esi           // get source ptr
-        movl    16(%ebp),%ecx           // get length
-        movl    %edi,%edx
-        subl    %esi,%edx               // (dest - source)
-        cmpl    %ecx,%edx               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LNotShort               // yes
-        
-// Handle short forward copies.  As the most common case, this is the fall-through path.
-//      ecx = length (<= kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-Lshort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// get #doublewords
-	jz	3f
-2:					// loop copying doublewords
-	movl	(%esi),%eax
-	addl	$4,%esi
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	dec	%ecx
-	jnz	2b
-3:					// handle leftover bytes (0..3) in last word
-	andl	$3,%edx			// any leftover bytes?
-	jz	Lexit
-4:					// loop copying bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	4b
-Lexit:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-
-LReverseIsland:				// keep the "jb" above a short branch...
-	jmp	LReverse		// ...because reverse moves are uncommon
-
-
-// Handle forward moves that are long enough to justify use of SSE.
-// First, 16-byte align the destination.
-//      ecx = length (> kShort)
-//      esi = source ptr
-//      edi = dest ptr
-
-LNotShort:
-        movl    %edi,%edx               // copy destination
-        negl    %edx
-        andl    $15,%edx                // get #bytes to align destination
-	jz	LDestAligned		// already aligned
-        subl    %edx,%ecx               // decrement length
-1:					// loop copying 1..15 bytes
-	movb	(%esi),%al
-	inc	%esi
-	movb	%al,(%edi)
-	inc	%edi
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Nehalem does a great job with unaligned SSE loads,
-// so we use MOVDQU rather than aligned loads and shifts.  Since kShort>=80, we
-// know there is at least one 64-byte chunk to move.
-// When we enter the copy loops, the following registers are set up:
-//      ecx = residual length (0..63)
-//	edx = -(length to move), a multiple of 64
-//      esi = ptr to 1st source byte not to move (unaligned)
-//      edi = ptr to 1st dest byte not to move (aligned)
-
-LDestAligned:
-        movl    %ecx,%edx               // copy length
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-        addl    %edx,%esi               // point to 1st byte not copied
-        addl    %edx,%edi
-        negl    %edx                    // now generate offset to 1st byte to be copied
-	testl	$15,%esi		// source also aligned?
-	jnz	LUnalignedLoop
-	jmp	LAlignedLoop
-
-
-// Forward loop for aligned operands.
-
-	.align	4,0x90			// 16-byte align inner loops
-LAlignedLoop:				// loop over 64-byte chunks
-        movdqa  (%esi,%edx),%xmm0
-        movdqa  16(%esi,%edx),%xmm1
-        movdqa  32(%esi,%edx),%xmm2
-        movdqa  48(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,(%edi,%edx)
-        movdqa  %xmm1,16(%edi,%edx)
-        movdqa  %xmm2,32(%edi,%edx)
-        movdqa  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     LAlignedLoop
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for unaligned operands.
-
-	.align	4,0x90			// 16-byte align inner loops
-LUnalignedLoop:				// loop over 64-byte chunks
-        movdqu  (%esi,%edx),%xmm0
-        movdqu  16(%esi,%edx),%xmm1
-        movdqu  32(%esi,%edx),%xmm2
-        movdqu  48(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,(%edi,%edx)
-        movdqa  %xmm1,16(%edi,%edx)
-        movdqa  %xmm2,32(%edi,%edx)
-        movdqa  %xmm3,48(%edi,%edx)
-        
-        addl    $64,%edx
-        jnz     LUnalignedLoop
-        
-        jmp     Lshort                  // copy remaining 0..63 bytes and done
-
-
-// Reverse moves.  They are only used with destructive overlap.
-//      ecx = length
-//      esi = source ptr
-//      edi = dest ptr
-
-LReverse:
-        addl    %ecx,%esi               // point to end of strings
-        addl    %ecx,%edi
-        cmpl    $(kShort),%ecx          // long enough to bother with SSE?
-        ja      LReverseNotShort        // yes
-
-// Handle reverse short copies.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseShort:
-	movl    %ecx,%edx		// copy length
-	shrl	$2,%ecx			// #words
-	jz	3f
-1:
-	subl	$4,%esi
-	movl	(%esi),%eax
-	subl	$4,%edi
-	movl	%eax,(%edi)
-	dec	%ecx
-	jnz	1b
-3:
-	andl	$3,%edx			// bytes?
-	jz	5f
-4:
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	4b
-5:
-        movl    8(%ebp),%eax		// get return value (dst ptr) for memcpy/memmove
-        popl    %edi
-        popl    %esi
-	popl	%ebp
-        ret
-
-// Handle a reverse move long enough to justify using SSE.
-//      ecx = length
-//      esi = one byte past end of source
-//      edi = one byte past end of dest
-
-LReverseNotShort:
-        movl    %edi,%edx               // copy destination
-        andl    $15,%edx                // get #bytes to align destination
-        je      LReverseDestAligned     // already aligned
-        subl	%edx,%ecx		// adjust length
-1:					// loop copying 1..15 bytes
-	dec	%esi
-	movb	(%esi),%al
-	dec	%edi
-	movb	%al,(%edi)
-	dec	%edx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for reverse loops.
-
-LReverseDestAligned:
-        movl    %ecx,%edx               // copy length
-        andl    $63,%ecx                // get remaining bytes for Lshort
-        andl    $-64,%edx               // get number of bytes we will copy in inner loop
-        subl    %edx,%esi               // point to endpoint of copy
-        subl    %edx,%edi
-	testl	$15,%esi		// is source aligned too?
-        jnz     LReverseUnalignedLoop   // no
-
-LReverseAlignedLoop:                    // loop over 64-byte chunks
-        movdqa  -16(%esi,%edx),%xmm0
-        movdqa  -32(%esi,%edx),%xmm1
-        movdqa  -48(%esi,%edx),%xmm2
-        movdqa  -64(%esi,%edx),%xmm3
-
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseAlignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-    
-// Reverse, unaligned loop.  LDDQU==MOVDQU on these machines.
-        
-LReverseUnalignedLoop:                  // loop over 64-byte chunks
-        movdqu  -16(%esi,%edx),%xmm0
-        movdqu  -32(%esi,%edx),%xmm1
-        movdqu  -48(%esi,%edx),%xmm2
-        movdqu  -64(%esi,%edx),%xmm3
-        
-        movdqa  %xmm0,-16(%edi,%edx)
-        movdqa  %xmm1,-32(%edi,%edx)
-        movdqa  %xmm2,-48(%edi,%edx)
-        movdqa  %xmm3,-64(%edi,%edx)
-        
-        subl    $64,%edx
-        jne     LReverseUnalignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-
-	COMMPAGE_DESCRIPTOR(bcopy_sse42,_COMM_PAGE_BCOPY,kHasSSE4_2,0)
diff --git a/osfmk/i386/commpage/bcopy_sse42_64.s b/osfmk/i386/commpage/bcopy_sse42_64.s
deleted file mode 100644
index c8817d955..000000000
--- a/osfmk/i386/commpage/bcopy_sse42_64.s
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * The bcopy/memcpy loops, tuned for Nehalem.  This is the 64-bit version.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kShort  80			// too short to bother with SSE (must be >=80)
-
-
-// void bcopy(const void *src, void *dst, size_t len);
- 
-COMMPAGE_FUNCTION_START(bcopy_sse42_64, 64, 5)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	%rsi,%rax		// copy dest ptr
-	movq	%rdi,%rsi		// xchange source and dest ptrs
-	movq	%rax,%rdi
-        subq    %rsi,%rax               // (dest - source)
-        cmpq    %rdx,%rax               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        jbe     LShort			// no
-	jmp	LNotShort
-
-//
-// void *memcpy(void *dst, const void *src, size_t len);
-// void *memmove(void *dst, const void *src, size_t len);
-//
-// NB: These need to be 32 bytes from bcopy():
-//
-
-        .align	5, 0x90
-Lmemcpy:				// void *memcpy(void *dst, const void *src, size_t len)
-Lmemmove:				// void *memmove(void *dst, const void *src, size_t len)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	%rdi,%r11		// save return value here        
-        movq    %rdi,%rax
-        subq    %rsi,%rax               // (dest - source)
-        cmpq    %rdx,%rax               // must move in reverse if (dest - source) < length
-        jb      LReverseIsland
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        ja      LNotShort               // yes
-        
-// Handle short forward copies.  As the most common case, this is the fall-through path.
-//      rdx = length (<= kShort)
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LShort:
-	movl    %edx,%ecx		// copy length using 32-bit operation
-	shrl	$2,%ecx			// get #doublewords
-	jz	3f
-2:					// loop copying doublewords
-	movl	(%rsi),%eax
-	addq	$4,%rsi
-	movl	%eax,(%rdi)
-	addq	$4,%rdi
-	decl	%ecx
-	jnz	2b
-3:					// handle leftover bytes (0..3) in last word
-	andl	$3,%edx			// any leftover bytes?
-	jz	5f
-4:					// loop copying bytes
-	movb	(%rsi),%al
-	incq	%rsi
-	movb	%al,(%rdi)
-	incq	%rdi
-	decl	%edx
-	jnz	4b
-5:
-        movq	%r11,%rax		// get return value (dst ptr) for memcpy/memmove
-	popq	%rbp
-        ret
-
-
-LReverseIsland:				// keep the "jb" above a short branch...
-	jmp	LReverse		// ...because reverse moves are uncommon
-
-
-// Handle forward moves that are long enough to justify use of SSE.
-// First, 16-byte align the destination.
-//      rdx = length (> kShort)
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LNotShort:
-        movl    %edi,%ecx               // copy low half of destination ptr
-        negl    %ecx
-        andl    $15,%ecx                // get #bytes to align destination
-	jz	LDestAligned		// already aligned
-        subl    %ecx,%edx               // decrement length
-1:					// loop copying 1..15 bytes
-	movb	(%rsi),%al
-	inc	%rsi
-	movb	%al,(%rdi)
-	inc	%rdi
-	dec	%ecx
-	jnz	1b
-
-
-// Destination is now aligned.  Nehalem does a great job with unaligned SSE loads,
-// so we use MOVDQU rather than aligned loads and shifts.  Since kShort>=80, we
-// know there is at least one 64-byte chunk to move.
-// When we enter the copy loops, the following registers are set up:
-//      rdx = residual length (0..63)
-//	rcx = -(length to move), a multiple of 64 less than 2GB
-//      rsi = ptr to 1st source byte not to move (unaligned)
-//      rdi = ptr to 1st dest byte not to move (aligned)
-
-LDestAligned:
-        movq    %rdx,%rcx               // copy length
-        andl    $63,%edx                // get remaining bytes for LShort
-        andq    $-64,%rcx               // get number of bytes we will copy in inner loop
-        addq    %rcx,%rsi               // point to 1st byte not copied
-        addq    %rcx,%rdi
-        negq    %rcx                    // now generate offset to 1st byte to be copied
-	testl	$15,%esi		// source also aligned?
-	jnz	LUnalignedLoop
-	jmp	LAlignedLoop
-
-
-// Forward loop for aligned operands.
-
-	.align	4,0x90			// 16-byte align inner loops
-LAlignedLoop:				// loop over 64-byte chunks
-        movdqa  (%rsi,%rcx),%xmm0
-        movdqa  16(%rsi,%rcx),%xmm1
-        movdqa  32(%rsi,%rcx),%xmm2
-        movdqa  48(%rsi,%rcx),%xmm3
-
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm1,16(%rdi,%rcx)
-        movdqa  %xmm2,32(%rdi,%rcx)
-        movdqa  %xmm3,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     LAlignedLoop
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-
-
-// Forward loop for unaligned operands.
-
-	.align	4,0x90			// 16-byte align inner loops
-LUnalignedLoop:				// loop over 64-byte chunks
-        movdqu  (%rsi,%rcx),%xmm0
-        movdqu  16(%rsi,%rcx),%xmm1
-        movdqu  32(%rsi,%rcx),%xmm2
-        movdqu  48(%rsi,%rcx),%xmm3
-
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm1,16(%rdi,%rcx)
-        movdqa  %xmm2,32(%rdi,%rcx)
-        movdqa  %xmm3,48(%rdi,%rcx)
-        
-        addq    $64,%rcx
-        jnz     LUnalignedLoop
-        
-        jmp     LShort                  // copy remaining 0..63 bytes and done
-	
-
-// Reverse moves.  These are only used with destructive overlap.
-//      rdx = length
-//      rsi = source ptr
-//      rdi = dest ptr
-
-LReverse:
-        addq    %rdx,%rsi               // point to end of strings
-        addq    %rdx,%rdi
-        cmpq    $(kShort),%rdx          // long enough to bother with SSE?
-        ja      LReverseNotShort        // yes
-
-// Handle reverse short copies.
-//      edx = length (<= kShort)
-//      rsi = one byte past end of source
-//      rdi = one byte past end of dest
-
-LReverseShort:
-	movl    %edx,%ecx		// copy length
-	shrl	$3,%ecx			// #quadwords
-	jz	3f
-1:
-	subq	$8,%rsi
-	movq	(%rsi),%rax
-	subq	$8,%rdi
-	movq	%rax,(%rdi)
-	decl	%ecx
-	jnz	1b
-3:
-	andl	$7,%edx			// bytes?
-	jz	5f
-4:
-	decq	%rsi
-	movb	(%rsi),%al
-	decq	%rdi
-	movb	%al,(%rdi)
-	decl	%edx
-	jnz	4b
-5:
-        movq	%r11,%rax		// get return value (dst ptr) for memcpy/memmove
-	popq	%rbp
-        ret
-
-// Handle a reverse move long enough to justify using SSE.
-//      rdx = length (> kShort)
-//      rsi = one byte past end of source
-//      rdi = one byte past end of dest
-
-LReverseNotShort:
-        movl    %edi,%ecx               // copy destination
-        andl    $15,%ecx                // get #bytes to align destination
-        jz      LReverseDestAligned     // already aligned
-        subq	%rcx,%rdx		// adjust length
-1:					// loop copying 1..15 bytes
-	decq	%rsi
-	movb	(%rsi),%al
-	decq	%rdi
-	movb	%al,(%rdi)
-	decl	%ecx
-	jnz	1b
-        
-// Destination is now aligned.  Prepare for reverse loops.
-
-LReverseDestAligned:
-        movq    %rdx,%rcx               // copy length
-        andl    $63,%edx                // get remaining bytes for LReverseShort
-        andq    $-64,%rcx               // get number of bytes we will copy in inner loop
-        subq    %rcx,%rsi               // point to endpoint of copy
-        subq    %rcx,%rdi
-	testl	$15,%esi		// is source aligned too?
-        jnz     LReverseUnalignedLoop   // no
-
-LReverseAlignedLoop:                    // loop over 64-byte chunks
-        movdqa  -16(%rsi,%rcx),%xmm0
-        movdqa  -32(%rsi,%rcx),%xmm1
-        movdqa  -48(%rsi,%rcx),%xmm2
-        movdqa  -64(%rsi,%rcx),%xmm3
-
-        movdqa  %xmm0,-16(%rdi,%rcx)
-        movdqa  %xmm1,-32(%rdi,%rcx)
-        movdqa  %xmm2,-48(%rdi,%rcx)
-        movdqa  %xmm3,-64(%rdi,%rcx)
-        
-        subq    $64,%rcx
-        jne     LReverseAlignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-    
-// Reverse, unaligned loop.  LDDQU==MOVDQU on these machines.
-        
-LReverseUnalignedLoop:                  // loop over 64-byte chunks
-        movdqu  -16(%rsi,%rcx),%xmm0
-        movdqu  -32(%rsi,%rcx),%xmm1
-        movdqu  -48(%rsi,%rcx),%xmm2
-        movdqu  -64(%rsi,%rcx),%xmm3
-        
-        movdqa  %xmm0,-16(%rdi,%rcx)
-        movdqa  %xmm1,-32(%rdi,%rcx)
-        movdqa  %xmm2,-48(%rdi,%rcx)
-        movdqa  %xmm3,-64(%rdi,%rcx)
-        
-        subq    $64,%rcx
-        jne     LReverseUnalignedLoop
-        
-        jmp     LReverseShort           // copy remaining 0..63 bytes and done
-
-
-	COMMPAGE_DESCRIPTOR(bcopy_sse42_64,_COMM_PAGE_BCOPY,kHasSSE4_2,0)
diff --git a/osfmk/i386/commpage/bzero_scalar.s b/osfmk/i386/commpage/bzero_scalar.s
deleted file mode 100644
index 6c496b9e9..000000000
--- a/osfmk/i386/commpage/bzero_scalar.s
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (c) 1993 Winning Strategies, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Winning Strategies, Inc.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software withough specific prior written permission
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * bzero (void *b, size_t len)
- *      write len zero bytes to the string b.
- *
- * Written by:
- *      J.T. Conklin (jtc@wimsey.com), Winning Strategies, Inc.
- */
-
-COMMPAGE_FUNCTION_START(bzero_scalar, 32, 4)
-	pushl	%ebp			/* set up a frame for backtraces */
-	movl	%esp,%ebp
-        pushl   %edi
-        pushl   %ebx
-        movl    8(%ebp),%edi
-        movl    12(%ebp),%ecx
-
-        cld                             /* set fill direction forward */
-        xorl    %eax,%eax               /* set fill data to 0 */
-
-        /*
-         * if the string is too short, it's really not worth the overhead
-         * of aligning to word boundries, etc.  So we jump to a plain 
-         * unaligned set.
-         */
-        cmpl    $0x0f,%ecx
-        jbe     L1
-
-        movl    %edi,%edx               /* compute misalignment */
-        negl    %edx
-        andl    $3,%edx
-        movl    %ecx,%ebx
-        subl    %edx,%ebx
-
-        movl    %edx,%ecx               /* zero until word aligned */
-        rep
-        stosb
-
-        movl    %ebx,%ecx               /* zero by words */
-        shrl    $2,%ecx
-        rep
-        stosl
-
-        movl    %ebx,%ecx
-        andl    $3,%ecx                 /* zero remainder by bytes */
-L1:     rep
-        stosb
-
-        popl    %ebx
-        popl    %edi
-	popl	%ebp
-	ret
-
-COMMPAGE_DESCRIPTOR(bzero_scalar,_COMM_PAGE_BZERO,0,kHasSSE2)
diff --git a/osfmk/i386/commpage/bzero_sse2.s b/osfmk/i386/commpage/bzero_sse2.s
deleted file mode 100644
index be5facd29..000000000
--- a/osfmk/i386/commpage/bzero_sse2.s
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * Bzero, tuned for Pentium-M class processors with SSE2
- * and 64-byte cache lines.
- *
- * This routine is also used for memset(p,0,n), which is a common case
- * since gcc sometimes silently maps bzero() into memset().  As a result,
- * we always load the original ptr into %eax before returning.
- */
-
-#define kShort		80		// too short to bother with SSE (must be >=80)
-#define	kVeryLong	(1024*1024)
-
-// void	bzero(void *b, size_t len);
-
-COMMPAGE_FUNCTION_START(bzero_sse2, 32, 5)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %edi
-        movl    8(%ebp),%edi            // get ptr
-        movl    12(%ebp),%edx           // get length
-
-        xorl    %eax,%eax               // set fill data to 0
-        cmpl    $(kShort),%edx          // long enough for SSE?
-        jg	LNotShort               // yes
-        
-// Here for short operands or the end of long ones.
-//      %edx = length
-//      %edi = ptr
-//      %eax = zero
-
-Lshort:
-	cmpl	$16,%edx		// long enough to word align?
-	jge	3f			// yes
-	test	%edx,%edx		// length==0?
-	jz	6f
-1:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%edx
-	jnz	1b
-	jmp	6f
-2:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%edx
-3:
-	test	$3,%edi			// is ptr doubleword aligned?
-	jnz	2b			// no
-	movl	%edx,%ecx		// copy length
-	shrl	$2,%edx			// #doublewords to store
-4:      
-	movl	%eax,(%edi)		// zero an aligned doubleword
-	addl	$4,%edi
-	dec	%edx
-	jnz	4b
-	andl	$3,%ecx			// mask down to #bytes at end (0..3)
-	jz	6f			// none
-5:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%ecx
-	jnz	5b
-6:
-	movl	8(%ebp),%eax		// get return value in case this was a call of memset()
-        popl    %edi
-	popl	%ebp
-        ret
-
-        
-// We will be using SSE, so align ptr.
-
-LNotShort:
-        movl    %edi,%ecx
-        negl    %ecx
-        andl    $15,%ecx                // mask down to #bytes to 16-byte align
-	jz	LDestAligned		// already aligned
-        subl    %ecx,%edx               // decrement length
-0:					// loop storing bytes to align the ptr
-	movb	%al,(%edi)		// pack in a byte
-	inc	%edi
-	dec	%ecx
-	jnz	0b
-	
-// Destination is now 16-byte aligned.  Prepare to loop over 64-byte chunks.
-//      %edx = length
-//      %edi = ptr
-//      %eax = zero
-
-LDestAligned:
-        movl    %edx,%ecx
-        andl    $63,%edx                // mask down to residual length (0..63)
-        andl    $-64,%ecx               // get #bytes we will zero in this loop
-        pxor    %xmm0,%xmm0             // zero an SSE register
-        addl    %ecx,%edi               // increment ptr by length to move
-	cmpl	$(kVeryLong),%ecx	// long enough to justify non-temporal stores?
-	jae	LVeryLong		// yes
-        negl    %ecx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%edi,%ecx)
-        movdqa  %xmm0,16(%edi,%ecx)
-        movdqa  %xmm0,32(%edi,%ecx)
-        movdqa  %xmm0,48(%edi,%ecx)
-        addl    $64,%ecx
-        jne     1b
-	
-	jmp	Lshort
-	
-// Very long operands: use non-temporal stores to bypass cache.
-
-LVeryLong:
-        negl    %ecx			// negate length to move
-	jmp	1f
-	
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movntdq %xmm0,(%edi,%ecx)
-        movntdq %xmm0,16(%edi,%ecx)
-        movntdq %xmm0,32(%edi,%ecx)
-        movntdq %xmm0,48(%edi,%ecx)
-        addl    $64,%ecx
-        jne     1b
-	
-        sfence                          // required by non-temporal stores
-	jmp	Lshort
-
-COMMPAGE_DESCRIPTOR(bzero_sse2,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2)
diff --git a/osfmk/i386/commpage/bzero_sse2_64.s b/osfmk/i386/commpage/bzero_sse2_64.s
deleted file mode 100644
index c0ec8a458..000000000
--- a/osfmk/i386/commpage/bzero_sse2_64.s
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * Bzero, tuned for Pentium-M class processors with SSE2
- * and 64-byte cache lines.  This is the 64-bit version.
- *
- * This routine is also used for memset(p,0,n), which is a common case
- * since gcc sometimes silently maps bzero() into memset().  As a result,
- * we always load the original ptr into %eax before returning.
- */
-
-#define kShort		80		// too short to bother with SSE (must be >=80)
-#define	kVeryLong	(1024*1024)
-
-// void	bzero(void *b, size_t len);
-
-COMMPAGE_FUNCTION_START(bzero_sse2_64, 64, 5)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-        xorl    %eax,%eax               // set fill data to 0
-	movq	%rdi,%r11		// save original ptr as return value
-        cmpq    $(kShort),%rsi          // long enough for SSE?
-        jg	LNotShort               // yes
-        
-// Here for short operands or the end of long ones.
-//      %esi = length (<= kShort)
-//      %rdi = ptr
-//      %eax = zero
-
-Lshort:
-	cmpl	$16,%esi		// long enough to word align?
-	jge	3f			// yes
-	test	%esi,%esi		// length==0?
-	jz	6f
-1:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%esi
-	jnz	1b
-	jmp	6f
-2:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%esi
-3:
-	testl	$3,%edi			// is ptr doubleword aligned?
-	jnz	2b			// no
-	movl	%esi,%ecx		// copy length
-	shrl	$2,%esi			// #doublewords to store
-4:      
-	movl	%eax,(%rdi)		// zero an aligned doubleword
-	addq	$4,%rdi
-	decl	%esi
-	jnz	4b
-	andl	$3,%ecx			// mask down to #bytes at end (0..3)
-	jz	6f			// none
-5:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%ecx
-	jnz	5b
-6:
-	movq	%r11,%rax		// set return value in case this was a call of memset()
-	popq	%rbp
-        ret
-		
-        
-// We will be using SSE, so align ptr.
-//      %rsi = length (> kShort)
-//      %rdi = ptr
-//      %eax = zero
-
-LNotShort:
-        movl    %edi,%ecx		// get #bytes to 16-byte align ptr
-        negl    %ecx
-        andl    $15,%ecx
-	jz	LDestAligned		// already aligned
-        subq    %rcx,%rsi               // decrement length
-0:					// loop storing bytes to align the ptr
-	movb	%al,(%rdi)		// pack in a byte
-	incq	%rdi
-	decl	%ecx
-	jnz	0b
-	
-// Destination is now 16-byte aligned.  Prepare to loop over 64-byte chunks.
-//      %rsi = length (> (kShort-15))
-//      %rdi = ptr (aligned)
-//      %eax = zero
-
-LDestAligned:
-        movq    %rsi,%rcx
-        andl    $63,%esi                // mask down to residual length (0..63)
-        andq    $-64,%rcx               // get #bytes we will zero in this loop
-        pxor    %xmm0,%xmm0             // zero an SSE register
-        addq    %rcx,%rdi               // increment ptr by length to move
-	cmpq	$(kVeryLong),%rcx	// long enough to justify non-temporal stores?
-	jae	LVeryLong		// yes
-        negq    %rcx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm0,16(%rdi,%rcx)
-        movdqa  %xmm0,32(%rdi,%rcx)
-        movdqa  %xmm0,48(%rdi,%rcx)
-        addq    $64,%rcx
-        jne     1b
-	
-	jmp	Lshort
-	
-// Very long operands: use non-temporal stores to bypass cache.
-
-LVeryLong:
-        negq    %rcx			// negate length to move
-	jmp	1f
-	
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movntdq %xmm0,(%rdi,%rcx)
-        movntdq %xmm0,16(%rdi,%rcx)
-        movntdq %xmm0,32(%rdi,%rcx)
-        movntdq %xmm0,48(%rdi,%rcx)
-        addq    $64,%rcx
-        jne     1b
-	
-        sfence                          // required by non-temporal stores
-	jmp	Lshort
-
-COMMPAGE_DESCRIPTOR(bzero_sse2_64,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2)
diff --git a/osfmk/i386/commpage/bzero_sse42.s b/osfmk/i386/commpage/bzero_sse42.s
deleted file mode 100644
index 32e8ea65f..000000000
--- a/osfmk/i386/commpage/bzero_sse42.s
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * Bzero, tuned for processors with SSE4.2 and 64-byte cache lines, ie Nehalem.
- * We don't actually use SSE4.2, but rather use it to identify Nehalem.
- *
- * We do not use nontemporal operations, but use MOVDQA in preference to REP/STOS. 
- *
- * This routine is also used for memset(p,0,n), which is a common case
- * since gcc sometimes silently maps bzero() into memset().  As a result,
- * we always load the original ptr into %eax before returning.
- */
-
-#define kShort		80		// too short to bother with SSE (must be >=80)
-
-
-COMMPAGE_FUNCTION_START(bzero_sse42, 32, 5)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %edi
-        movl    8(%ebp),%edi            // get ptr
-        movl    12(%ebp),%edx           // get length
-
-        xorl    %eax,%eax               // set fill data to 0
-        cmpl    $(kShort),%edx          // long enough for SSE?
-        jg	LNotShort               // yes
-        
-// Here for short operands or the end of long ones.
-//      %edx = length
-//      %edi = ptr
-//      %eax = zero
-
-Lshort:
-	cmpl	$12,%edx		// long enough to word align?
-	jge	3f			// yes
-	test	%edx,%edx		// length==0?
-	jz	6f
-1:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%edx
-	jnz	1b
-	jmp	6f
-2:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%edx
-3:
-	test	$3,%edi			// is ptr doubleword aligned?
-	jnz	2b			// no
-	movl	%edx,%ecx		// copy length
-	shrl	$2,%edx			// #doublewords to store
-4:      
-	movl	%eax,(%edi)		// zero an aligned doubleword
-	addl	$4,%edi
-	dec	%edx
-	jnz	4b
-	andl	$3,%ecx			// mask down to #bytes at end (0..3)
-	jz	6f			// none
-5:
-	movb	%al,(%edi)		// zero a byte
-	inc	%edi
-	dec	%ecx
-	jnz	5b
-6:
-	movl	8(%ebp),%eax		// get return value in case this was a call of memset()
-        popl    %edi
-	popl	%ebp
-        ret
-
-        
-// We will be using SSE, so align ptr.
-//      %edx = length
-//      %edi = ptr
-//      %eax = zero
-
-LNotShort:
-	testl	$3,%edi			// 4-byte aligned?
-	jz	2f			// yes
-	movb	%al,(%edi)		// zero another byte
-	incl	%edi
-	decl	%edx
-	jmp	LNotShort
-1:					// zero doublewords until 16-byte aligned
-	movl	%eax,(%edi)
-	addl	$4,%edi
-	subl	$4,%edx
-2:
-	testl	$15,%edi		// 16-byte aligned?
-	jnz	1b			// no
-
-	
-// Destination is now 16-byte aligned.  Prepare to loop over 64-byte chunks.
-//      %edx = length
-//      %edi = ptr
-//      %eax = zero
-
-LDestAligned:
-        movl    %edx,%ecx
-        andl    $63,%edx                // mask down to residual length (0..63)
-        andl    $-64,%ecx               // get #bytes we will zero in this loop
-        pxor    %xmm0,%xmm0             // zero an SSE register
-        addl    %ecx,%edi               // increment ptr by length to move
-        negl    %ecx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%edi,%ecx)
-        movdqa  %xmm0,16(%edi,%ecx)
-        movdqa  %xmm0,32(%edi,%ecx)
-        movdqa  %xmm0,48(%edi,%ecx)
-        addl    $64,%ecx
-        jne     1b
-	
-	jmp	Lshort
-	
-
-
-	COMMPAGE_DESCRIPTOR(bzero_sse42,_COMM_PAGE_BZERO,kHasSSE4_2,0)
diff --git a/osfmk/i386/commpage/bzero_sse42_64.s b/osfmk/i386/commpage/bzero_sse42_64.s
deleted file mode 100644
index 999b9311a..000000000
--- a/osfmk/i386/commpage/bzero_sse42_64.s
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * Bzero, tuned for processors with SSE4.2 and 64-byte cache lines, ie Nehalem.
- * We don't actually use SSE4.2, but rather use it to identify Nehalem.
- * This is the 64-bit version.
- *
- * We do not use nontemporal operations, but use MOVDQA in preference to REP/STOS. 
- *
- * This routine is also used for memset(p,0,n), which is a common case
- * since gcc sometimes silently maps bzero() into memset().  As a result,
- * we always load the original ptr into %eax before returning.
- */
-
-#define kShort		80		// too short to bother with SSE (must be >=80)
-
-
-// void	bzero(void *b, size_t len);
-	
-COMMPAGE_FUNCTION_START(bzero_sse42_64, 64, 5)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-        xorl    %eax,%eax               // set fill data to 0
-	movq	%rdi,%r11		// save original ptr as return value
-        cmpq    $(kShort),%rsi          // long enough for SSE?
-        jg	LNotShort               // yes
-        
-// Here for short operands or the end of long ones.
-//      %esi = length (<= kShort)
-//      %rdi = ptr
-//      %eax = zero
-
-Lshort:
-	cmpl	$12,%esi		// long enough to word align?
-	jge	3f			// yes
-	test	%esi,%esi		// length==0?
-	jz	6f
-1:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%esi
-	jnz	1b
-	jmp	6f
-2:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%esi
-3:
-	testl	$3,%edi			// is ptr doubleword aligned?
-	jnz	2b			// no
-	movl	%esi,%ecx		// copy length
-	shrl	$2,%esi			// #doublewords to store
-4:      
-	movl	%eax,(%rdi)		// zero an aligned doubleword
-	addq	$4,%rdi
-	decl	%esi
-	jnz	4b
-	andl	$3,%ecx			// mask down to #bytes at end (0..3)
-	jz	6f			// none
-5:
-	movb	%al,(%rdi)		// zero a byte
-	incq	%rdi
-	decl	%ecx
-	jnz	5b
-6:
-	movq	%r11,%rax		// set return value in case this was a call of memset()
-	popq	%rbp
-        ret
-		
-        
-// We will be using SSE, so align ptr.
-//      %rsi = length (> kShort)
-//      %rdi = ptr
-//      %eax = zero
-
-LNotShort:
-	testl	$3,%edi			// 4-byte aligned?
-	jz	2f			// yes
-	movb	%al,(%rdi)		// zero another byte
-	incq	%rdi
-	decq	%rsi
-	jmp	LNotShort
-1:					// zero doublewords until 16-byte aligned
-	movl	%eax,(%rdi)
-	addq	$4,%rdi
-	subq	$4,%rsi
-2:
-	testl	$15,%edi		// 16-byte aligned?
-	jnz	1b			// no
-	
-// Destination is now 16-byte aligned.  Prepare to loop over 64-byte chunks.
-//      %rsi = length (> (kShort-15))
-//      %rdi = ptr (aligned)
-//      %eax = zero
-
-LDestAligned:
-        movq    %rsi,%rcx
-        andl    $63,%esi                // mask down to residual length (0..63)
-        andq    $-64,%rcx               // get #bytes we will zero in this loop
-        pxor    %xmm0,%xmm0             // zero an SSE register
-        addq    %rcx,%rdi               // increment ptr by length to move
-        negq    %rcx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm0,16(%rdi,%rcx)
-        movdqa  %xmm0,32(%rdi,%rcx)
-        movdqa  %xmm0,48(%rdi,%rcx)
-        addq    $64,%rcx
-        jne     1b
-	
-	jmp	Lshort
-
-
-	COMMPAGE_DESCRIPTOR(bzero_sse42_64,_COMM_PAGE_BZERO,kHasSSE4_2,0)
diff --git a/osfmk/i386/commpage/cacheflush.s b/osfmk/i386/commpage/cacheflush.s
deleted file mode 100644
index 4d9e98b0b..000000000
--- a/osfmk/i386/commpage/cacheflush.s
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-// void sysFlushDcache( void *p, size_t len );
-// 32-bit version
-
-COMMPAGE_FUNCTION_START(sys_flush_dcache, 32, 4)
-	movl	8(%esp),%ecx		// get length
-	movl	4(%esp),%edx		// get ptr
-	testl	%ecx,%ecx		// length 0?
-	jz	2f			// yes
-	mfence				// ensure previous stores make it to memory
-	clflush	-1(%edx,%ecx)		// make sure last line is flushed
-1:
-	clflush	(%edx)			// flush a line
-	addl	$64,%edx
-	subl	$64,%ecx
-	ja	1b
-	mfence				// make sure memory is updated before we return
-2:
-	ret
-COMMPAGE_DESCRIPTOR(sys_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,kCache64,0)
-
-
-// void sysFlushDcache( void *p, size_t len );
-// 64-bit version
-// %rdi = ptr,  %rsi = length
-COMMPAGE_FUNCTION_START(sys_flush_dcache_64, 64, 4)
-	testq	%rsi,%rsi		// length 0?
-	jz	2f			// yes
-	mfence				// ensure previous stores make it to memory
-	clflush	-1(%rdi,%rsi)		// make sure last line is flushed
-1:
-	clflush	(%rdi)			// flush a line
-	addq	$64,%rdi
-	subq	$64,%rsi
-	ja	1b
-	mfence				// make sure memory is updated before we return
-2:
-	ret
-COMMPAGE_DESCRIPTOR(sys_flush_dcache_64,_COMM_PAGE_FLUSH_DCACHE,kCache64,0)
-
-// void sysIcacheInvalidate( void *p, size_t len );
-
-COMMPAGE_FUNCTION_START(sys_icache_invalidate, 32, 4)
-	// This is a NOP on intel processors, since the intent of the API
-	// is to make data executable, and Intel L1Is are coherent with L1D.
-	// We can use same routine both in 32 and 64-bit mode, since it is
-	// just a RET instruction.
-	ret
-COMMPAGE_DESCRIPTOR(sys_icache_invalidate,_COMM_PAGE_FLUSH_ICACHE,0,0)
diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c
index 53030645b..cc52576c5 100644
--- a/osfmk/i386/commpage/commpage.c
+++ b/osfmk/i386/commpage/commpage.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -52,7 +52,7 @@
 #include <mach/machine.h>
 #include <i386/cpuid.h>
 #include <i386/tsc.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 #include <i386/cpu_data.h>
 #include <i386/machine_routines.h>
 #include <i386/misc_protos.h>
@@ -66,21 +66,18 @@
 #include <ipc/ipc_port.h>
 
 #include <kern/page_decrypt.h>
+#include <kern/processor.h>
 
 /* the lists of commpage routines are in commpage_asm.s  */
 extern	commpage_descriptor*	commpage_32_routines[];
 extern	commpage_descriptor*	commpage_64_routines[];
 
-/* translated commpage descriptors from commpage_sigs.c  */
-extern	commpage_descriptor sigdata_descriptor;
-extern	commpage_descriptor *ba_descriptors[];
-
 extern vm_map_t	commpage32_map;	// the shared submap, set up in vm init
 extern vm_map_t	commpage64_map;	// the shared submap, set up in vm init
 
 char	*commPagePtr32 = NULL;		// virtual addr in kernel map of 32-bit commpage
 char	*commPagePtr64 = NULL;		// ...and of 64-bit commpage
-int     _cpu_capabilities = 0;          // define the capability vector
+uint32_t     _cpu_capabilities = 0;          // define the capability vector
 
 int	noVMX = 0;		/* if true, do not set kHasAltivec in ppc _cpu_capabilities */
 
@@ -96,6 +93,8 @@ static commpage_address_t	commPageBaseOffset; // subtract from 32-bit runtime ad
 static	commpage_time_data	*time_data32 = NULL;
 static	commpage_time_data	*time_data64 = NULL;
 
+decl_simple_lock_data(static,commpage_active_cpus_lock);
+
 /* Allocate the commpage and add to the shared submap created by vm:
  * 	1. allocate a page in the kernel map (RW)
  *	2. wire it down
@@ -157,6 +156,13 @@ commpage_allocate(
 		panic("cannot map commpage");
 
 	ipc_port_release(handle);
+	
+	// Initialize the text section of the commpage with INT3
+	char *commpage_ptr = (char*)(intptr_t)kernel_addr;
+	vm_size_t i;
+	for( i = _COMM_PAGE_TEXT_START - _COMM_PAGE_START_ADDRESS; i < size; i++ )
+		// This is the hex for the X86 opcode INT3
+		commpage_ptr[i] = 0xCC;
 
 	return (void*)(intptr_t)kernel_addr;                     // return address in kernel map
 }
@@ -193,7 +199,7 @@ commpage_cpus( void )
 static void
 commpage_init_cpu_capabilities( void )
 {
-	int bits;
+	uint32_t bits;
 	int cpus;
 	ml_cpu_info_t cpu_info;
 
@@ -201,6 +207,9 @@ commpage_init_cpu_capabilities( void )
 	ml_cpu_get_info(&cpu_info);
 	
 	switch (cpu_info.vector_unit) {
+		case 9:
+			bits |= kHasAVX1_0;
+			/* fall thru */
 		case 8:
 			bits |= kHasSSE4_2;
 			/* fall thru */
@@ -275,48 +284,13 @@ commpage_stuff(
     void	*dest = commpage_addr_of(address);
     
     if (address < next)
-    	panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next);
+       panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next);
     
     bcopy(source,dest,length);
     
     next = address + length;
 }
 
-static void
-commpage_stuff_swap(
-	commpage_address_t	address,
-	void	*source,
-	int	length,
-	int	legacy )
-{
-	if ( legacy ) {
-		void *dest = commpage_addr_of(address);
-		dest = (void *)((uintptr_t) dest + _COMM_PAGE_SIGS_OFFSET);
-		switch (length) {
-			case 2:
-				OSWriteSwapInt16(dest, 0, *(uint16_t *)source);
-				break;
-			case 4:
-				OSWriteSwapInt32(dest, 0, *(uint32_t *)source);
-				break;
-			case 8:
-				OSWriteSwapInt64(dest, 0, *(uint64_t *)source);
-				break;
-		}
-	}
-}
-
-static void
-commpage_stuff2(
-	commpage_address_t	address,
-	void	*source,
-	int	length,
-	int	legacy )
-{
-	commpage_stuff_swap(address, source, length, legacy);
-	commpage_stuff(address, source, length);
-}
-
 /* Copy a routine into comm page if it matches running machine.
  */
 static void
@@ -345,8 +319,6 @@ commpage_stuff_routine(
 }
 
 /* Fill in the 32- or 64-bit commpage.  Called once for each.
- * The 32-bit ("legacy") commpage has a bunch of stuff added to it
- * for translated processes, some of which is byte-swapped.
  */
 
 static void
@@ -356,17 +328,16 @@ commpage_populate_one(
 	size_t		area_used,	// _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED
 	commpage_address_t base_offset,	// will become commPageBaseOffset
 	commpage_descriptor** commpage_routines, // list of routine ptrs for this commpage
-	boolean_t	legacy,		// true if 32-bit commpage
 	commpage_time_data** time_data,	// &time_data32 or &time_data64
 	const char*	signature )	// "commpage 32-bit" or "commpage 64-bit"
 {
+	uint8_t	c1;
    	short   c2;
-	int	c4;
-	static double   two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52
-	static double   ten6 = 1000000.0;                       // 10**6
+	int	    c4;
+	uint64_t c8;
+	uint32_t	cfamily;
 	commpage_descriptor **rd;
 	short   version = _COMM_PAGE_THIS_VERSION;
-	int		swapcaps;
 
 	next = 0;
 	cur_routine = 0;
@@ -380,25 +351,11 @@ commpage_populate_one(
 	* ascending order, so we can check for overlap and panic if so.
 	*/
 	commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)strlen(signature));
-	commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short),legacy);
+	commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short));
 	commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int));
 
-	/* excuse our magic constants, we cannot include ppc/cpu_capabilities.h */
-	/* always set kCache32 and kDcbaAvailable */
-	swapcaps =  0x44;
-	if ( _cpu_capabilities & kUP )
-		swapcaps |= (kUP + (1 << kNumCPUsShift));
-	else
-		swapcaps |= 2 << kNumCPUsShift;	/* limit #cpus to 2 */
-	if ( ! noVMX )		/* if rosetta will be emulating altivec... */
-		swapcaps |= 0x101;	/* ...then set kHasAltivec and kDataStreamsAvailable too */
-	commpage_stuff_swap(_COMM_PAGE_CPU_CAPABILITIES, &swapcaps, sizeof(int), legacy);
-	c2 = 32;
-	commpage_stuff_swap(_COMM_PAGE_CACHE_LINESIZE,&c2,2,legacy);
-
-	if (_cpu_capabilities & kCache32)
-		c2 = 32;
-	else if (_cpu_capabilities & kCache64)
+	c2 = 32;  // default
+	if (_cpu_capabilities & kCache64)
 		c2 = 64;
 	else if (_cpu_capabilities & kCache128)
 		c2 = 128;
@@ -407,10 +364,17 @@ commpage_populate_one(
 	c4 = MP_SPIN_TRIES;
 	commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4);
 
-	if ( legacy ) {
-		commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8,legacy);
-		commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8,legacy);
-	}
+	/* machine_info valid after ml_get_max_cpus() */
+	c1 = machine_info.physical_cpu_max;
+	commpage_stuff(_COMM_PAGE_PHYSICAL_CPUS,&c1,1);
+	c1 = machine_info.logical_cpu_max;
+	commpage_stuff(_COMM_PAGE_LOGICAL_CPUS,&c1,1);
+
+	c8 = ml_cpu_cache_size(0);
+	commpage_stuff(_COMM_PAGE_MEMORY_SIZE, &c8, 8);
+
+	cfamily = cpuid_info()->cpuid_cpufamily;
+	commpage_stuff(_COMM_PAGE_CPUFAMILY, &cfamily, 4);
 
 	for( rd = commpage_routines; *rd != NULL ; rd++ )
 		commpage_stuff_routine(*rd);
@@ -421,14 +385,6 @@ commpage_populate_one(
 	if (next > _COMM_PAGE_END)
 		panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr);
 
-	if ( legacy ) {
-		next = 0;
-		for( rd = ba_descriptors; *rd != NULL ; rd++ )
-			commpage_stuff_routine(*rd);
-
-		next = 0;
-		commpage_stuff_routine(&sigdata_descriptor);
-	}	
 }
 
 
@@ -449,7 +405,6 @@ commpage_populate( void )
 				_COMM_PAGE32_AREA_USED,
 				_COMM_PAGE32_BASE_ADDRESS,
 				commpage_32_routines, 
-				TRUE,			/* legacy (32-bit) commpage */
 				&time_data32,
 				"commpage 32-bit");
 #ifndef __LP64__
@@ -464,7 +419,6 @@ commpage_populate( void )
 					_COMM_PAGE64_AREA_USED,
 					_COMM_PAGE32_START_ADDRESS, /* commpage address are relative to 32-bit commpage placement */
 					commpage_64_routines, 
-					FALSE,		/* not a legacy commpage */
 					&time_data64,
 					"commpage 64-bit");
 #ifndef __LP64__
@@ -473,6 +427,9 @@ commpage_populate( void )
 #endif
 	}
 
+	simple_lock_init(&commpage_active_cpus_lock, 0);
+
+	commpage_update_active_cpus();
 	rtc_nanotime_init_commpage();
 }
 
@@ -629,6 +586,34 @@ commpage_set_spin_count(
 
 }
 
+/* Updated every time a logical CPU goes offline/online */
+void
+commpage_update_active_cpus(void)
+{
+	char	    *cp;
+	volatile uint8_t    *ip;
+	
+	/* At least 32-bit commpage must be initialized */
+	if (!commPagePtr32)
+		return;
+
+	simple_lock(&commpage_active_cpus_lock);
+
+	cp = commPagePtr32;
+	cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_BASE_ADDRESS);
+	ip = (volatile uint8_t*) cp;
+	*ip = (uint8_t) processor_avail_count;
+	
+	cp = commPagePtr64;
+	if ( cp ) {
+		cp += (_COMM_PAGE_ACTIVE_CPUS - _COMM_PAGE32_START_ADDRESS);
+		ip = (volatile uint8_t*) cp;
+		*ip = (uint8_t) processor_avail_count;
+	}
+
+	simple_unlock(&commpage_active_cpus_lock);
+}
+
 
 /* Check to see if a given address is in the Preemption Free Zone (PFZ) */
 
diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h
index 013ca246e..c8369d78d 100644
--- a/osfmk/i386/commpage/commpage.h
+++ b/osfmk/i386/commpage/commpage.h
@@ -160,6 +160,7 @@ extern  void	commpage_set_nanotime(uint64_t tsc_base, uint64_t ns_base, uint32_t
 extern	void	commpage_set_memory_pressure(unsigned int  pressure);
 extern	void	commpage_set_spin_count(unsigned int  count);
 extern	void	commpage_sched_gen_inc(void);
+extern	void	commpage_update_active_cpus(void);
 
 extern	uint32_t	commpage_is_in_pfz32(uint32_t);
 extern	uint32_t	commpage_is_in_pfz64(addr64_t);
diff --git a/osfmk/i386/commpage/commpage_asm.s b/osfmk/i386/commpage/commpage_asm.s
index 4e3ad82e2..af6227f72 100644
--- a/osfmk/i386/commpage/commpage_asm.s
+++ b/osfmk/i386/commpage/commpage_asm.s
@@ -90,49 +90,8 @@ _commpage_sched_gen_inc:
 	.align	3
 	.globl	_commpage_32_routines
 _commpage_32_routines:
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicEnqueue)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicDequeue)
-	COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier)
-	COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier_sse2)
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(cpu_number)
-	COMMPAGE_DESCRIPTOR_REFERENCE(mach_absolute_time)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_unlock)
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_getspecific)
-	COMMPAGE_DESCRIPTOR_REFERENCE(gettimeofday)
-	COMMPAGE_DESCRIPTOR_REFERENCE(sys_flush_dcache)
-	COMMPAGE_DESCRIPTOR_REFERENCE(sys_icache_invalidate)
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_self)
 	COMMPAGE_DESCRIPTOR_REFERENCE(preempt)
-//	COMMPAGE_DESCRIPTOR_REFERENCE(relinquish)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_mp)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_up)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bzero_scalar)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse2)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse42)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_scalar)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse2)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3x)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse42)
-	COMMPAGE_DESCRIPTOR_REFERENCE(memset_pattern_sse2)
-	COMMPAGE_DESCRIPTOR_REFERENCE(longcopy_sse3x)
 	COMMPAGE_DESCRIPTOR_REFERENCE(backoff)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoEnqueue)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoDequeue)
-	COMMPAGE_DESCRIPTOR_REFERENCE(nanotime)
-	COMMPAGE_DESCRIPTOR_REFERENCE(nanotime_slow)
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_mutex_lock)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock)
@@ -151,45 +110,8 @@ _commpage_32_routines:
 	.align	3
 	.globl	_commpage_64_routines
 _commpage_64_routines:
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicEnqueue_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicDequeue_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier_sse2)	/* same routine as 32-bit version */
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add64_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add64_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(cpu_number_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(mach_absolute_time)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(spin_unlock_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_getspecific_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(gettimeofday_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(sys_flush_dcache_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(sys_icache_invalidate)	/* same routine as 32-bit version, just a "ret" */
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_self_64)
 	COMMPAGE_DESCRIPTOR_REFERENCE(preempt_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_mp_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_up_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse2_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse42_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3x_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse42_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(memset_pattern_sse2_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(longcopy_sse3x_64)
 	COMMPAGE_DESCRIPTOR_REFERENCE(backoff_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoEnqueue_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoDequeue_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(nanotime_64)
-	COMMPAGE_DESCRIPTOR_REFERENCE(pthread_mutex_lock_64)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue_64)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue_64)
 	COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock_64)
diff --git a/osfmk/i386/commpage/commpage_gettimeofday.s b/osfmk/i386/commpage/commpage_gettimeofday.s
deleted file mode 100644
index afa87ca02..000000000
--- a/osfmk/i386/commpage/commpage_gettimeofday.s
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-#define NSEC_PER_SEC	1000*1000*1000
-#define NSEC_PER_USEC	1000
-
-COMMPAGE_FUNCTION_START(gettimeofday, 32, 4)
-	push	%ebp
-	mov	%esp,%ebp
-	push	%esi
-	push	%ebx
-
-0:
-	movl	_COMM_PAGE_GTOD_GENERATION,%esi	/* get generation (0 if disabled) */
-	testl	%esi,%esi			/* disabled? */
-	jz	4f
-
-	mov	$ _COMM_PAGE_NANOTIME,%eax
-	call	*%eax				/* get ns in %edx:%eax */
-
-	
-	sub	_COMM_PAGE_GTOD_NS_BASE,%eax
-	sbb	_COMM_PAGE_GTOD_NS_BASE+4,%edx
-	mov	_COMM_PAGE_GTOD_SEC_BASE,%ebx	/* load all the data before checking generation */
-	mov	$ NSEC_PER_SEC,%ecx
-	
-	cmpl	_COMM_PAGE_GTOD_GENERATION,%esi	/* has time data changed out from under us? */
-	jne	0b
-	
-	div	%ecx
-	add	%eax,%ebx
-
-	mov	$ NSEC_PER_USEC,%ecx
-	mov	%edx,%eax
-	xor	%edx,%edx
-	div	%ecx
-
-	mov	8(%ebp),%ecx
-	mov	%ebx,(%ecx)
-	mov	%eax,4(%ecx)
-	xor	%eax,%eax
-
-3:
-	pop	%ebx
-	pop	%esi
-	pop	%ebp
-	ret
-4:				/* fail */
-	movl	$1,%eax
-	jmp	3b
-COMMPAGE_DESCRIPTOR(gettimeofday,_COMM_PAGE_GETTIMEOFDAY,0,0)
-
-
-COMMPAGE_FUNCTION_START(gettimeofday_64, 64, 4)
-	// %rdi = ptr to timeval
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	%rdi,%r9		// save ptr to timeval
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_TIME_DATA_START),%r10
-0:
-	movl	_GTOD_GENERATION(%r10),%r11d	// get generation (0 if disabled)
-	testl	%r11d,%r11d		// disabled?
-	jz	4f
-	
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_NANOTIME),%rax
-	call	*%rax			// get %rax <- nanotime(), preserving %r9, %r10 and %r11
-	
-	movl	_GTOD_SEC_BASE(%r10),%r8d	// get _COMM_PAGE_TIMESTAMP
-	subq	_GTOD_NS_BASE(%r10),%rax	// generate nanoseconds since timestamp
-	cmpl	_GTOD_GENERATION(%r10),%r11d	// has data changed out from under us?
-	jne	0b
-	
-	movl	$ NSEC_PER_SEC,%ecx
-	movq	%rax,%rdx
-	shrq	$32,%rdx		// get high half of delta in %edx
-	divl	%ecx			// %eax <- seconds since timestamp, %edx <- nanoseconds
-	addl	%eax,%r8d		// add seconds elapsed to timestamp seconds
-
-	movl	$ NSEC_PER_USEC,%ecx
-	movl	%edx,%eax
-	xorl	%edx,%edx
-	divl	%ecx			// divide residual ns by 1000 to get residual us in %eax
-	
-	movq	%r8,(%r9)		// store 64-bit seconds into timeval
-	movl	%eax,8(%r9)		// store 32-bit useconds into timeval
-	xorl	%eax,%eax		// return 0 for success
-3:
-	popq	%rbp
-	ret
-4:					// fail
-	movl	$1,%eax
-	jmp	3b
-COMMPAGE_DESCRIPTOR(gettimeofday_64,_COMM_PAGE_GETTIMEOFDAY,0,0)
diff --git a/osfmk/i386/commpage/commpage_mach_absolute_time.s b/osfmk/i386/commpage/commpage_mach_absolute_time.s
deleted file mode 100644
index 590e4d7b6..000000000
--- a/osfmk/i386/commpage/commpage_mach_absolute_time.s
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-#include <i386/asm.h>
-
-#include <assym.s>
-
-COMMPAGE_FUNCTION_START(mach_absolute_time, 32, 4)
-	int	$0x3
-	ret
-COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,0,0)
-
-
-/* return nanotime in %edx:%eax */
-
-COMMPAGE_FUNCTION_START(nanotime, 32, 4)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%ebx
-
-0:
-	movl	_COMM_PAGE_NT_GENERATION,%esi	/* get generation (0 if being changed) */
-	testl	%esi,%esi			/* if being updated, loop until stable */
-	jz	0b
-
-	lfence
-	rdtsc					/* get TSC in %edx:%eax */
-	lfence
-
-	subl	_COMM_PAGE_NT_TSC_BASE,%eax
-	sbbl	_COMM_PAGE_NT_TSC_BASE+4,%edx
-
-	movl	_COMM_PAGE_NT_SCALE,%ecx
-
-	movl	%edx,%ebx
-	mull	%ecx
-	movl	%ebx,%eax
-	movl	%edx,%ebx
-	mull	%ecx
-	addl	%ebx,%eax
-	adcl	$0,%edx
-
-	addl	_COMM_PAGE_NT_NS_BASE,%eax
-	adcl	_COMM_PAGE_NT_NS_BASE+4,%edx
-
-	cmpl	_COMM_PAGE_NT_GENERATION,%esi	/* have the parameters changed? */
-	jne	0b				/* yes, loop until stable */
-
-	popl	%ebx
-	popl	%esi
-	popl	%ebp
-	ret
-COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,0,kSlow)
-
-
-/* nanotime routine for machines slower than ~1Gz (SLOW_TSC_THRESHOLD) */
-COMMPAGE_FUNCTION_START(nanotime_slow, 32, 4)
-	push	%ebp
-	mov	%esp,%ebp
-	push	%esi
-	push	%edi
-	push	%ebx
-
-0:
-	movl	_COMM_PAGE_NT_GENERATION,%esi
-	testl	%esi,%esi			/* if generation is 0, data being changed */
-	jz	0b				/* so loop until stable */
-
-	lfence
-	rdtsc					/* get TSC in %edx:%eax */
-	lfence
-	subl	_COMM_PAGE_NT_TSC_BASE,%eax
-	sbbl	_COMM_PAGE_NT_TSC_BASE+4,%edx
-
-	pushl	%esi				/* save generation */
-	/*
-	 * Do the math to convert tsc ticks to nanoseconds.  We first
-	 * do long multiply of 1 billion times the tsc.  Then we do
-	 * long division by the tsc frequency
-	 */
-	mov	$1000000000, %ecx		/* number of nanoseconds in a second */
-	mov	%edx, %ebx
-	mul	%ecx
-	mov	%edx, %edi
-	mov	%eax, %esi
-	mov	%ebx, %eax
-	mul	%ecx
-	add	%edi, %eax
-	adc	$0, %edx			/* result in edx:eax:esi */
-	mov	%eax, %edi
-	mov	_COMM_PAGE_NT_SHIFT,%ecx	/* overloaded as the low 32 tscFreq */
-	xor	%eax, %eax
-	xchg	%edx, %eax
-	div	%ecx
-	xor	%eax, %eax
-	mov	%edi, %eax
-	div	%ecx
-	mov	%eax, %ebx
-	mov	%esi, %eax
-	div	%ecx
-	mov	%ebx, %edx			/* result in edx:eax */
-	popl	%esi				/* recover generation */
-
-	add	_COMM_PAGE_NT_NS_BASE,%eax
-	adc	_COMM_PAGE_NT_NS_BASE+4,%edx
-
-	cmpl	_COMM_PAGE_NT_GENERATION,%esi	/* have the parameters changed? */
-	jne	0b				/* yes, loop until stable */
-
-	pop	%ebx
-	pop	%edi
-	pop	%esi
-	pop	%ebp
-	ret					/* result in edx:eax */
-COMMPAGE_DESCRIPTOR(nanotime_slow,_COMM_PAGE_NANOTIME,kSlow,0)
-
-
-/* The 64-bit version.  We return the 64-bit nanotime in %rax,
- * and by convention we must preserve %r9, %r10, and %r11.
- */
-COMMPAGE_FUNCTION_START(nanotime_64, 64, 4)
-	pushq	%rbp				// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_TIME_DATA_START),%rsi
-1:
-	movl	_NT_GENERATION(%rsi),%r8d	// get generation
-	testl	%r8d,%r8d			// if 0, data is being changed...
-	jz	1b				// ...so loop until stable
-	lfence
-	rdtsc					// edx:eax := tsc
-	lfence
-	shlq	$32,%rdx			// rax := ((edx << 32) | eax), ie 64-bit tsc
-	orq	%rdx,%rax
-	subq	_NT_TSC_BASE(%rsi), %rax	// rax := (tsc - base_tsc)
-	movl	_NT_SCALE(%rsi),%ecx
-	mulq	%rcx				// rdx:rax := (tsc - base_tsc) * scale
-	shrdq	$32,%rdx,%rax			// _COMM_PAGE_NT_SHIFT is always 32
-	addq	_NT_NS_BASE(%rsi),%rax		// (((tsc - base_tsc) * scale) >> 32) + ns_base
-	
-	cmpl	_NT_GENERATION(%rsi),%r8d	// did the data change during computation?
-	jne	1b
-	popq	%rbp
-	ret
-COMMPAGE_DESCRIPTOR(nanotime_64,_COMM_PAGE_NANOTIME,0,kSlow)
diff --git a/osfmk/i386/commpage/commpage_sigs.c b/osfmk/i386/commpage/commpage_sigs.c
deleted file mode 100644
index 0c100a276..000000000
--- a/osfmk/i386/commpage/commpage_sigs.c
+++ /dev/null
@@ -1,189 +0,0 @@
-#include "commpage.h"
-
-static unsigned int sigdata[] =
-{
-	0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 
-	0x00323370, 0x06004018, 0x2828e07c, 0x4018077c, 0x14008240, 0x2d29807c, 
-	0xf0ffa240, 0x01006038, 0x2000804e, 0x00006038, 0x2000804e, 0x06004018, 
-	0x00000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 0x00323370, 
-	0x06004018, 0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, 
-	0x6177735f, 0x62323370, 0x00000000, 0x06004018, 0xac06007c, 0x2828e07c, 
-	0x4018077c, 0x18008240, 0x2d29807c, 0xf0ffa240, 0x2c01004c, 0x01006038, 
-	0x2000804e, 0x00006038, 0x2000804e, 0x2000804e, 0x06004018, 0x00000000, 
-	0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 0x62323370, 0x00000000, 
-	0x06004018, 0x06004018, 0x01000000, 0x6d6f635f, 0x65726170, 0x646e615f, 
-	0x6177735f, 0x62343670, 0x00000000, 0x06004018, 0xac04207c, 0xa828e07c, 
-	0x4018277c, 0x1800c240, 0xad29807c, 0xf0ffc240, 0x2c01004c, 0x01006038, 
-	0x2000804e, 0xf8ff0039, 0x00006038, 0xad09887c, 0x2000804e, 0x2000804e, 
-	0x06004018, 0x00000000, 0x6d6f635f, 0x65726170, 0x646e615f, 0x6177735f, 
-	0x62343670, 0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x6d656d5f, 
-	0x5f746573, 0x74746170, 0x006e7265, 0x06004018, 0xc0008428, 0xa642407c, 
-	0x00804064, 0x10000571, 0xa643007c, 0x1000a038, 0xce48007c, 0x2000c038, 
-	0x7c008441, 0x30002039, 0x10008241, 0xce41007c, 0x10000839, 0xf0ff8438, 
-	0xbed18054, 0xec45007c, 0xbe068454, 0xffff0030, 0x40004039, 0x60006039, 
-	0xa603097c, 0xec45067c, 0x10000048, 0x00000060, 0x00000060, 0x00000060, 
-	0xec450a7c, 0xec450b7c, 0xce41007c, 0xce41057c, 0xce41067c, 0xce41097c, 
-	0x40000839, 0xe4ff0042, 0xce41007c, 0xce41057c, 0xce41067c, 0xce41097c, 
-	0x40000839, 0x7fd98054, 0xfe068454, 0x18008241, 0xa603097c, 0xce41007c, 
-	0xce41057c, 0x20000839, 0xf4ff0042, 0xa643407c, 0x2000804e, 0x2000804e, 
-	0x06004018, 0x00000000, 0x6d656d5f, 0x5f746573, 0x74746170, 0x006e7265, 
-	0x06004018, 0x06004018, 0x01000000, 0x6f74615f, 0x5f63696d, 0x75716e65, 
-	0x00657565, 0x06004018, 0x2818c07c, 0x2e29c47c, 0x00000060, 0x2d19807c, 
-	0x2000e24d, 0xecffff4b, 0x2000804e, 0x06004018, 0x00000000, 0x6f74615f, 
-	0x5f63696d, 0x75716e65, 0x00657565, 0x06004018, 0x06004018, 0x01000000, 
-	0x6f74615f, 0x5f63696d, 0x75716564, 0x00657565, 0x06004018, 0x781b657c, 
-	0x2828607c, 0x0000032c, 0x2000824d, 0x2e20c37c, 0x2d29c07c, 0xecffc240, 
-	0x00000060, 0x2000804e, 0x06004018, 0x00000000, 0x6f74615f, 0x5f63696d, 
-	0x75716564, 0x00657565, 0x06004018, 0x06004018, 0x01000000, 0x6f74615f, 
-	0x5f63696d, 0x72726162, 0x00726569, 0x06004018, 0x2000804e, 0x06004018, 
-	0x00000000, 0x6f74615f, 0x5f63696d, 0x72726162, 0x00726569, 0x06004018, 
-	0x06004018, 0x01000000, 0x6f74615f, 0x5f63696d, 0x5f646461, 0x00003233, 
-	0x06004018, 0x2820a07c, 0x142ac37c, 0x2d21c07c, 0xf4ffc240, 0x7833c37c, 
-	0x2000804e, 0x06004018, 0x00000000, 0x6f74615f, 0x5f63696d, 0x5f646461, 
-	0x00003233, 0x06004018, 0x06004018, 0x01000000, 0x63616d5f, 0x62615f68, 
-	0x756c6f73, 0x745f6574, 0x00656d69, 0x06004018, 0x00004018, 0x04006000, 
-	0x00000000, 0x00000000, 0x63616d5f, 0x62615f68, 0x756c6f73, 0x745f6574, 
-	0x5f656d69, 0x68676968, 0x00000000, 0x00004018, 0x00004018, 0x04008000, 
-	0x00000000, 0x00000000, 0x63616d5f, 0x62615f68, 0x756c6f73, 0x745f6574, 
-	0x5f656d69, 0x00776f6c, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 
-	0x63616d5f, 0x62615f68, 0x756c6f73, 0x745f6574, 0x00656d69, 0x06004018, 
-	0x06004018, 0x01000000, 0x6970735f, 0x6f6c5f6e, 0x745f6b63, 0x00007972, 
-	0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x00004018, 0x04006000, 
-	0x00000000, 0x00000000, 0x6970735f, 0x6f6c5f6e, 0x745f6b63, 0x775f7972, 
-	0x70706172, 0x00007265, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 
-	0x6970735f, 0x6f6c5f6e, 0x745f6b63, 0x00007972, 0x06004018, 0x06004018, 
-	0x01000000, 0x6970735f, 0x6f6c5f6e, 0x00006b63, 0x06004018, 0x05004018, 
-	0x04006000, 0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, 
-	0x6970735f, 0x6f6c5f6e, 0x775f6b63, 0x70706172, 0x00007265, 0x00004018, 
-	0x2000804e, 0x06004018, 0x00000000, 0x6970735f, 0x6f6c5f6e, 0x00006b63, 
-	0x06004018, 0x06004018, 0x01000000, 0x6970735f, 0x6e755f6e, 0x6b636f6c, 
-	0x00000000, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x00004018, 
-	0x00000000, 0x00000000, 0x00000000, 0x6970735f, 0x6e755f6e, 0x6b636f6c, 
-	0x00000000, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6970735f, 
-	0x6e755f6e, 0x6b636f6c, 0x00000000, 0x06004018, 0x06004018, 0x01000000, 
-	0x6874705f, 0x64616572, 0x7465675f, 0x63657073, 0x63696669, 0x00000000, 
-	0x06004018, 0x02004018, 0xc082ffff, 0x02004018, 0x2000804e, 0x06004018, 
-	0x00000000, 0x6874705f, 0x64616572, 0x7465675f, 0x63657073, 0x63696669, 
-	0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x7465675f, 0x656d6974, 
-	0x6164666f, 0x00000079, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 
-	0x00004018, 0x04006000, 0x00000000, 0x00000000, 0x7465675f, 0x656d6974, 
-	0x6164666f, 0x72775f79, 0x65707061, 0x00000072, 0x00004018, 0x2000804e, 
-	0x06004018, 0x00000000, 0x7465675f, 0x656d6974, 0x6164666f, 0x00000079, 
-	0x06004018, 0x06004018, 0x01000000, 0x7379735f, 0x6163645f, 0x5f656863, 
-	0x73756c66, 0x00000068, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 
-	0x05004018, 0x04008000, 0x05004018, 0x00004018, 0x00000000, 0x00000000, 
-	0x00000000, 0x7379735f, 0x6163645f, 0x5f656863, 0x73756c66, 0x00000068, 
-	0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x7379735f, 0x6163645f, 
-	0x5f656863, 0x73756c66, 0x00000068, 0x06004018, 0x06004018, 0x01000000, 
-	0x7379735f, 0x6163695f, 0x5f656863, 0x61766e69, 0x6164696c, 0x00006574, 
-	0x06004018, 0x05004018, 0x04006000, 0x05004018, 0x05004018, 0x04008000, 
-	0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, 0x7379735f, 
-	0x6163695f, 0x5f656863, 0x61766e69, 0x6164696c, 0x775f6574, 0x70706172, 
-	0x00007265, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x7379735f, 
-	0x6163695f, 0x5f656863, 0x61766e69, 0x6164696c, 0x00006574, 0x06004018, 
-	0x06004018, 0x01000000, 0x6874705f, 0x64616572, 0x6c65735f, 0x00000066, 
-	0x06004018, 0x02004018, 0x8085ffff, 0x02004018, 0x2000804e, 0x06004018, 
-	0x00000000, 0x6874705f, 0x64616572, 0x6c65735f, 0x00000066, 0x06004018, 
-	0x06004018, 0x01000000, 0x657a625f, 0x00006f72, 0x06004018, 0x05004018, 
-	0x04006000, 0x05004018, 0x05004018, 0x04008000, 0x05004018, 0x00004018, 
-	0x00000000, 0x00000000, 0x00000000, 0x657a625f, 0x00006f72, 0x00004018, 
-	0x2000804e, 0x06004018, 0x00000000, 0x657a625f, 0x00006f72, 0x06004018, 
-	0x06004018, 0x01000000, 0x6f63625f, 0x00007970, 0x06004018, 0x05004018, 
-	0x04006000, 0x05004018, 0x05004018, 0x04008000, 0x05004018, 0x05004018, 
-	0x0400a000, 0x05004018, 0x00004018, 0x00000000, 0x00000000, 0x00000000, 
-	0x6f63625f, 0x00007970, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 
-	0x6f63625f, 0x00007970, 0x06004018, 0x06004018, 0x01000000, 0x6d656d5f, 
-	0x65766f6d, 0x00000000, 0x06004018, 0x05004018, 0x04006000, 0x05004018, 
-	0x05004018, 0x04008000, 0x05004018, 0x05004018, 0x0400a000, 0x05004018, 
-	0x00004018, 0x00000000, 0x00000000, 0x00000000, 0x6d656d5f, 0x65766f6d, 
-	0x00000000, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 0x6d656d5f, 
-	0x65766f6d, 0x00000000, 0x06004018, 0x06004018, 0x01000000, 0x6e616e5f, 
-	0x6d69746f, 0x00000065, 0x06004018, 0x00004018, 0x04006000, 0x00000000, 
-	0x00000000, 0x6e616e5f, 0x6d69746f, 0x69685f65, 0x00006867, 0x00004018, 
-	0x00004018, 0x04008000, 0x00000000, 0x00000000, 0x6e616e5f, 0x6d69746f, 
-	0x6f6c5f65, 0x00000077, 0x00004018, 0x2000804e, 0x06004018, 0x00000000, 
-	0x6e616e5f, 0x6d69746f, 0x00000065, 0x06004018, 0x06004018, 0x01000000, 
-	0x6a626f5f, 0x736d5f63, 0x6e655367, 0x00000064, 0x06004018, 0x00004018, 
-	0x00000000, 0x00000400, 0x00000000, 0x6a626f5f, 0x736d5f63, 0x6e655367, 
-	0x00000064, 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 0x736d5f63, 
-	0x6e655367, 0x00000064, 0x06004018, 0x06004018, 0x01000000, 0x6a626f5f, 
-	0x73615f63, 0x6e676973, 0x6176695f, 0x00000072, 0x06004018, 0x00004018, 
-	0x00000000, 0x00000400, 0x00000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 
-	0x6176695f, 0x65675f72, 0x6972656e, 0x00000063, 0x00004018, 0x06004018, 
-	0x00000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6176695f, 0x00000072, 
-	0x06004018, 0x06004018, 0x01000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 
-	0x6f6c675f, 0x006c6162, 0x06004018, 0x00004018, 0x00000000, 0x00000400, 
-	0x00000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 0x6f6c675f, 0x5f6c6162, 
-	0x656e6567, 0x00636972, 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 
-	0x73615f63, 0x6e676973, 0x6f6c675f, 0x006c6162, 0x06004018, 0x06004018, 
-	0x01000000, 0x6a626f5f, 0x73615f63, 0x6e676973, 0x7274735f, 0x43676e6f, 
-	0x00747361, 0x06004018, 0x00004018, 0x00000000, 0x00000400, 0x00000000, 
-	0x6a626f5f, 0x73615f63, 0x6e676973, 0x7274735f, 0x43676e6f, 0x5f747361, 
-	0x656e6567, 0x00636972, 0x00004018, 0x06004018, 0x00000000, 0x6a626f5f, 
-	0x73615f63, 0x6e676973, 0x7274735f, 0x43676e6f, 0x00747361, 0x06004018, 
-};
-commpage_descriptor sigdata_descriptor =
-{ sigdata, sizeof(sigdata), 0xffff3000, 0, 0 };
-
-static unsigned int badata[] =
-{
-	0xae3aff4b, 0x3e3aff4b, 0xce39ff4b, 0x7239ff4b, 0x0230ff4b, 0x5e32ff4b, 
-	0xb232ff4b, 0x0e33ff4b, 0x4a33ff4b, 0x9a33ff4b, 0x3a34ff4b, 0xae34ff4b, 
-	0x1635ff4b, 0x8235ff4b, 0xda35ff4b, 0x4e36ff4b, 0xd236ff4b, 0x6a37ff4b, 
-	0xb237ff4b, 0x1238ff4b, 0x7e38ff4b, 0x6630ff4b, 0xde30ff4b, 0x5e31ff4b, 
-};
-static commpage_descriptor badata_descriptor_ary[] =
-{
-	{ &badata[ 0], 4, 0xfffefea0, 0, 0 },
-	{ &badata[ 1], 4, 0xfffefeb0, 0, 0 },
-	{ &badata[ 2], 4, 0xfffefec0, 0, 0 },
-	{ &badata[ 3], 4, 0xfffeff00, 0, 0 },
-	{ &badata[ 4], 4, 0xffff8080, 0, 0 },
-	{ &badata[ 5], 4, 0xffff8100, 0, 0 },
-	{ &badata[ 6], 4, 0xffff8140, 0, 0 },
-	{ &badata[ 7], 4, 0xffff8180, 0, 0 },
-	{ &badata[ 8], 4, 0xffff81a0, 0, 0 },
-	{ &badata[ 9], 4, 0xffff8200, 0, 0 },
-	{ &badata[10], 4, 0xffff8220, 0, 0 },
-	{ &badata[11], 4, 0xffff8260, 0, 0 },
-	{ &badata[12], 4, 0xffff82a0, 0, 0 },
-	{ &badata[13], 4, 0xffff82c0, 0, 0 },
-	{ &badata[14], 4, 0xffff82e0, 0, 0 },
-	{ &badata[15], 4, 0xffff84e0, 0, 0 },
-	{ &badata[16], 4, 0xffff8520, 0, 0 },
-	{ &badata[17], 4, 0xffff8580, 0, 0 },
-	{ &badata[18], 4, 0xffff8600, 0, 0 },
-	{ &badata[19], 4, 0xffff8780, 0, 0 },
-	{ &badata[20], 4, 0xffff87a0, 0, 0 },
-	{ &badata[21], 4, 0xffff8f80, 0, 0 },
-	{ &badata[22], 4, 0xffff8fc0, 0, 0 },
-	{ &badata[23], 4, 0xffff9000, 0, 0 },
-};
-commpage_descriptor *ba_descriptors[] =
-{
-	&badata_descriptor_ary[ 0],
-	&badata_descriptor_ary[ 1],
-	&badata_descriptor_ary[ 2],
-	&badata_descriptor_ary[ 3],
-	&badata_descriptor_ary[ 4],
-	&badata_descriptor_ary[ 5],
-	&badata_descriptor_ary[ 6],
-	&badata_descriptor_ary[ 7],
-	&badata_descriptor_ary[ 8],
-	&badata_descriptor_ary[ 9],
-	&badata_descriptor_ary[10],
-	&badata_descriptor_ary[11],
-	&badata_descriptor_ary[12],
-	&badata_descriptor_ary[13],
-	&badata_descriptor_ary[14],
-	&badata_descriptor_ary[15],
-	&badata_descriptor_ary[16],
-	&badata_descriptor_ary[17],
-	&badata_descriptor_ary[18],
-	&badata_descriptor_ary[19],
-	&badata_descriptor_ary[20],
-	&badata_descriptor_ary[21],
-	&badata_descriptor_ary[22],
-	&badata_descriptor_ary[23],
-	0
-};
diff --git a/osfmk/i386/commpage/cpu_number.s b/osfmk/i386/commpage/cpu_number.s
deleted file mode 100644
index d86b13ba1..000000000
--- a/osfmk/i386/commpage/cpu_number.s
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-#include <i386/asm.h>
-
-#include <assym.s>
-
-/*
- * These commpage routines provide fast access to the logical cpu number
- * of the calling processor assuming no pre-emption occurs. This number
- * is encoded in the bottom 12-bits of the limit field of the IDTR (the
- * Interrupt Descriptor Table Register). The SIDT instruction is used in
- * userspace to read this register and thus to gain access to the cpu number.
- * The IDTR is loaded by the kernel for each processor at startup - see
- * osfmk/i386/mp_desc.c.
- */
-
-/* return logical cpu number in %eax */
-
-COMMPAGE_FUNCTION_START(cpu_number, 32, 4)
-	push	%ebp
-	mov	%esp,%ebp
-	sub	$8, %esp		// space to read IDTR
-
-	sidt	(%esp)			// store limit:base on stack
-	movw	(%esp), %ax		// get limit 
-	and	$0xfff, %eax		// mask off lower 12 bits to return
-
-	mov	%ebp,%esp
-	pop	%ebp
-	ret
-COMMPAGE_DESCRIPTOR(cpu_number,_COMM_PAGE_CPU_NUMBER,0,0)
-
-
-/* The 64-bit version. 
- */
-COMMPAGE_FUNCTION_START(cpu_number_64, 64, 4)
-	push	%rbp
-	mov	%rsp,%rbp
-	sub	$16,%rsp		// space to read IDTR
-
-	sidt	(%rsp)			// store limit:base on stack
-	movw	(%rsp), %rax		// get limit
-	and	$0xfff, %rax		// mask off lower 12 bits to return
-
-	mov	%rbp,%rsp
-	pop	%rbp
-	ret
-COMMPAGE_DESCRIPTOR(cpu_number_64,_COMM_PAGE_CPU_NUMBER,0,0)
diff --git a/osfmk/i386/commpage/fifo_queues.s b/osfmk/i386/commpage/fifo_queues.s
index e390a3b17..fa2bbf82c 100644
--- a/osfmk/i386/commpage/fifo_queues.s
+++ b/osfmk/i386/commpage/fifo_queues.s
@@ -66,48 +66,6 @@
  * void  OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset);
  */
 
-COMMPAGE_FUNCTION_START(AtomicFifoEnqueue, 32, 4)
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	xorl	%ebx,%ebx	// clear "preemption pending" flag
-	movl	16(%esp),%edi	// %edi == ptr to list head
-	movl	20(%esp),%esi	// %esi == new
-	movl	24(%esp),%edx	// %edx == offset
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue)
-	testl	%ebx,%ebx	// pending preemption?
-	jz	1f
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue)
-1:	
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue,_COMM_PAGE_FIFO_ENQUEUE,0,0)
-	
-	
-/* void* OSAtomicFifoDequeue( OSFifoQueueHead *list, size_t offset); */
-
-COMMPAGE_FUNCTION_START(AtomicFifoDequeue, 32, 4)
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	xorl	%ebx,%ebx	// clear "preemption pending" flag
-	movl	16(%esp),%edi	// %edi == ptr to list head
-	movl	20(%esp),%edx	// %edx == offset
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue)
-	testl	%ebx,%ebx	// pending preemption?
-	jz	1f
-	pushl	%eax		// save return value across sysenter
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue)
-	popl	%eax
-1:	
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret			// ptr to 1st element in Q still in %eax
-COMMPAGE_DESCRIPTOR(AtomicFifoDequeue,_COMM_PAGE_FIFO_DEQUEUE,0,0)
-
 
 /* Subroutine to make a preempt syscall.  Called when we notice %ebx is
  * nonzero after returning from a PFZ subroutine.
@@ -255,38 +213,6 @@ COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0)
  * void  OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset);
  */
 
-// %rdi == list head, %rsi == new, %rdx == offset
-
-COMMPAGE_FUNCTION_START(AtomicFifoEnqueue_64, 64, 4)
-	pushq	%rbx
-	xorl	%ebx,%ebx	// clear "preemption pending" flag
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64)
-	testl	%ebx,%ebx	// pending preemption?
-	jz	1f
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64)
-1:	
-	popq	%rbx
-	ret
-COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue_64,_COMM_PAGE_FIFO_ENQUEUE,0,0)
-	
-	
-/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */
-
-// %rdi == list head, %rsi == offset
-
-COMMPAGE_FUNCTION_START(AtomicFifoDequeue_64, 64, 4)
-	pushq	%rbx
-	xorl	%ebx,%ebx	// clear "preemption pending" flag
-	movq	%rsi,%rdx	// move offset to %rdx to be like the Enqueue case
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64)
-	testl	%ebx,%ebx	// pending preemption?
-	jz	1f
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64)
-1:	
-	popq	%rbx
-	ret			// ptr to 1st element in Q in %rax
-COMMPAGE_DESCRIPTOR(AtomicFifoDequeue_64,_COMM_PAGE_FIFO_DEQUEUE,0,0)
-
 
 /* Subroutine to make a preempt syscall.  Called when we notice %ebx is
  * nonzero after returning from a PFZ subroutine.  Not in PFZ.
diff --git a/osfmk/i386/commpage/longcopy_sse3x.s b/osfmk/i386/commpage/longcopy_sse3x.s
deleted file mode 100644
index 3a1de25ed..000000000
--- a/osfmk/i386/commpage/longcopy_sse3x.s
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-         
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-       
-
-/*
- * The bcopy/memcpy loops for very long operands, tuned for Pentium-M
- * class processors with Supplemental SSE3 and 64-byte cache lines.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kBigChunk   (256*1024)          // outer loop chunk size for kVeryLong sized operands
-
-
-// Very long forward moves.  These are at least several pages, so we loop over big
-// chunks of memory (kBigChunk in size.)  We first prefetch the chunk, and then copy
-// it using non-temporal stores.  Hopefully all the reads occur in the prefetch loop,
-// so the copy loop reads from L2 and writes directly to memory (with write combining.)
-// This minimizes bus turnaround and maintains good DRAM page locality.
-// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache
-// size.  Otherwise, it is counter-productive to bypass L2 on the stores.
-//
-// We are called from the commpage bcopy loops when they encounter very long
-// operands, with the standard ABI.
-//
-//	void longcopy(const void *dest, void *sou, size_t len)
-
-// void longcopy(const void *dest, void *sou, size_t len)
-
-COMMPAGE_FUNCTION_START(longcopy_sse3x, 32, 5)
-	pushl	%ebp			// set up a frame for backtraces
-	movl	%esp,%ebp
-        pushl   %esi
-        pushl   %edi
-	pushl	%ebx			// we'll need to use this too
-        movl    8(%ebp),%edi		// get dest ptr
-        movl    12(%ebp),%esi           // get source ptr
-        movl    16(%ebp),%ecx           // get length
-	movl	%edi,%ebx		// copy dest ptr
-	negl	%ebx
-	andl	$63,%ebx		// get #bytes to cache line align destination
-	jz	LBigChunkLoop		// already aligned
-	
-// Cache line align destination, so temporal stores in copy loops work right.
-
-	pushl	%ebx			// arg3 - #bytes to align destination (1..63)
-	pushl	%esi			// arg2 - source
-	pushl	%edi			// arg1 - dest
-	movl	$(_COMM_PAGE_MEMCPY),%eax
-	call	*%eax			// align the destination
-	addl	$12,%esp
-        movl    8(%ebp),%edi		// recover dest ptr
-        movl    12(%ebp),%esi           // recover source ptr
-        movl    16(%ebp),%ecx           // recover length
-	addl	%ebx,%esi		// adjust ptrs and lengths past copy
-	addl	%ebx,%edi
-	subl	%ebx,%ecx
-	
-// Loop over big chunks.
-//      ecx = length remaining (>= 4096)
-//      edi = dest (64-byte aligned)
-//      esi = source (may be unaligned)
-	
-LBigChunkLoop:
-        movl    $(kBigChunk),%edx       // assume we can do a full chunk
-	cmpl	%edx,%ecx		// do we have a full chunk left to do?
-	cmovbl	%ecx,%edx		// if not, only move what we have left
-	andl	$-4096,%edx		// we work in page multiples
-	xor	%eax,%eax		// initialize chunk offset
-	jmp	LTouchLoop
-
-// Touch in the next chunk.  We try to keep the prefetch unit in "kick-start" mode,
-// by touching two adjacent cache lines every 8 lines of each page, in four slices.
-// Because the source may be unaligned, we use byte loads to touch.
-//      ecx = length remaining (including this chunk)
-//      edi = ptr to start of dest chunk
-//      esi = ptr to start of source chunk
-//	edx = chunk length (multiples of pages)
-//	ebx = scratch reg used to read a byte of each cache line
-//	eax = chunk offset
-
-	.align	4,0x90			// 16-byte align inner loops
-LTouchLoop:
-	movzb	(%esi,%eax),%ebx	// touch line 0, 2, 4, or 6 of page
-	movzb	1*64(%esi,%eax),%ebx	// touch line 1, 3, 5, or 7
-	movzb	8*64(%esi,%eax),%ebx	// touch line 8, 10, 12, or 14
-	movzb	9*64(%esi,%eax),%ebx	// etc
-	
-	movzb	16*64(%esi,%eax),%ebx
-	movzb	17*64(%esi,%eax),%ebx
-	movzb	24*64(%esi,%eax),%ebx
-	movzb	25*64(%esi,%eax),%ebx
-
-	movzb	32*64(%esi,%eax),%ebx
-	movzb	33*64(%esi,%eax),%ebx
-	movzb	40*64(%esi,%eax),%ebx
-	movzb	41*64(%esi,%eax),%ebx
-
-	movzb	48*64(%esi,%eax),%ebx
-	movzb	49*64(%esi,%eax),%ebx
-	movzb	56*64(%esi,%eax),%ebx
-	movzb	57*64(%esi,%eax),%ebx
-	
-	subl	$-128,%eax		// next slice of page (adding 128 w 8-bit immediate)
-	testl	$512,%eax		// done with this page?
-	jz	LTouchLoop		// no, next of four slices
-	addl	$(4096-512),%eax	// move on to next page
-	cmpl	%eax,%edx		// done with this chunk?
-	jnz	LTouchLoop		// no, do next page
-	
-// The chunk has been pre-fetched, now copy it using non-temporal stores.
-// There are two copy loops, depending on whether the source is 16-byte aligned
-// or not.
-
-	addl	%edx,%esi		// increment ptrs by chunk length
-	addl	%edx,%edi
-	subl	%edx,%ecx		// adjust remaining length
-	negl	%edx			// prepare loop index (counts up to 0)
-	testl	$15,%esi		// is source 16-byte aligned?
-	jnz	LVeryLongUnaligned	// source is not aligned
-	jmp	LVeryLongAligned
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongAligned:			// aligned loop over 128-bytes
-        movdqa  (%esi,%edx),%xmm0
-        movdqa  16(%esi,%edx),%xmm1
-        movdqa  32(%esi,%edx),%xmm2
-        movdqa  48(%esi,%edx),%xmm3
-        movdqa  64(%esi,%edx),%xmm4
-        movdqa  80(%esi,%edx),%xmm5
-        movdqa  96(%esi,%edx),%xmm6
-        movdqa  112(%esi,%edx),%xmm7
-
-        movntdq %xmm0,(%edi,%edx)
-        movntdq %xmm1,16(%edi,%edx)
-        movntdq %xmm2,32(%edi,%edx)
-        movntdq %xmm3,48(%edi,%edx)
-        movntdq %xmm4,64(%edi,%edx)
-        movntdq %xmm5,80(%edi,%edx)
-        movntdq %xmm6,96(%edi,%edx)
-        movntdq %xmm7,112(%edi,%edx)
-        
-        subl    $-128,%edx		// add 128 with an 8-bit immediate
-        jnz	LVeryLongAligned
-	jmp	LVeryLongChunkEnd
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongUnaligned:			// unaligned loop over 128-bytes
-        movdqu  (%esi,%edx),%xmm0
-        movdqu  16(%esi,%edx),%xmm1
-        movdqu  32(%esi,%edx),%xmm2
-        movdqu  48(%esi,%edx),%xmm3
-        movdqu  64(%esi,%edx),%xmm4
-        movdqu  80(%esi,%edx),%xmm5
-        movdqu  96(%esi,%edx),%xmm6
-        movdqu  112(%esi,%edx),%xmm7
-
-        movntdq %xmm0,(%edi,%edx)
-        movntdq %xmm1,16(%edi,%edx)
-        movntdq %xmm2,32(%edi,%edx)
-        movntdq %xmm3,48(%edi,%edx)
-        movntdq %xmm4,64(%edi,%edx)
-        movntdq %xmm5,80(%edi,%edx)
-        movntdq %xmm6,96(%edi,%edx)
-        movntdq %xmm7,112(%edi,%edx)
-        
-        subl    $-128,%edx		// add 128 with an 8-bit immediate
-        jnz     LVeryLongUnaligned
-
-LVeryLongChunkEnd:
-	cmpl	$4096,%ecx		// at least another page to go?
-	jae	LBigChunkLoop		// yes
-	
-// Done.  Call memcpy() again to handle the 0-4095 bytes at the end.
-	
-	sfence				// required by non-temporal stores
-	testl	%ecx,%ecx		// anything left to copy?
-	jz	1f
-	pushl	%ecx			// arg3 - #bytes to align destination (1..63)
-	pushl	%esi			// arg2 - source
-	pushl	%edi			// arg1 - dest
-	movl	$(_COMM_PAGE_MEMCPY),%eax
-	call	*%eax			// align the destination
-	addl	$12,%esp		// pop off arguments
-1:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	popl	%ebp
-	ret
-
-/* always match for now, as commpage_stuff_routine() will panic if no match */
-COMMPAGE_DESCRIPTOR(longcopy_sse3x, _COMM_PAGE_LONGCOPY, 0 ,0)
diff --git a/osfmk/i386/commpage/longcopy_sse3x_64.s b/osfmk/i386/commpage/longcopy_sse3x_64.s
deleted file mode 100644
index 439c4447f..000000000
--- a/osfmk/i386/commpage/longcopy_sse3x_64.s
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-        
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-      
-
-/*
- * The bcopy/memcpy loops for very long operands, tuned for 64-bit
- * Pentium-M class processors with Supplemental SSE3 and 64-byte cache lines.
- * This is the 64-bit version.
- *
- * The following #defines are tightly coupled to the u-architecture:
- */
-
-#define kBigChunk   (256*1024)          // outer loop chunk size for kVeryLong sized operands
-
-
-// Very long forward moves.  These are at least several pages, so we loop over big
-// chunks of memory (kBigChunk in size.)  We first prefetch the chunk, and then copy
-// it using non-temporal stores.  Hopefully all the reads occur in the prefetch loop,
-// so the copy loop reads from L2 and writes directly to memory (with write combining.)
-// This minimizes bus turnaround and maintains good DRAM page locality.
-// Note that for this scheme to work, kVeryLong must be a large fraction of L2 cache
-// size.  Otherwise, it is counter-productive to bypass L2 on the stores.
-//
-// We are called from the commpage bcopy loops when they encounter very long
-// operands, with the standard ABI:
-//      rdi = dest ptr
-//      rsi = source ptr
-//      rdx = length (>= 8kb, probably much bigger)
-
-// void longcopy(const void *dest, void *sou, size_t len)
-			
-COMMPAGE_FUNCTION_START(longcopy_sse3x_64, 64, 5)
-	pushq	%rbp			// set up a frame for backtraces
-	movq	%rsp,%rbp
-	movl	%edi,%eax		// copy dest ptr
-	negl	%eax
-	andl	$63,%eax		// get #bytes to cache line align destination
-	jz	LBigChunkLoop		// already aligned
-	
-// Cache line align destination, so temporal stores in copy loops work right.
-// The recursive call returns with the source and dest ptrs properly updated.
-
-	subq	%rax,%rdx		// get length remaining after dest is aligned
-	pushq	%rdx			// save length remaining
-	movl	%eax,%edx		// #bytes to copy to align destination
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_MEMCPY),%rax
-	call	*%rax
-	popq	%rdx			// recover adjusted length
-	
-// Loop over big chunks.
-//      rdx = length remaining (>= 4096)
-//      rdi = dest (64-byte aligned)
-//      rsi = source (may be unaligned)
-	
-LBigChunkLoop:
-        movl    $(kBigChunk),%r8d       // assume we can do a full chunk
-	cmpq	%r8,%rdx		// do we have a full chunk left to do?
-	cmovbl	%edx,%r8d		// if not, only move what we have left
-	andl	$-4096,%r8d		// we work in page multiples
-	xorl	%eax,%eax		// initialize chunk offset
-	jmp	LTouchLoop
-
-// Touch in the next chunk.  We try to keep the prefetch unit in "kick-start" mode,
-// by touching two adjacent cache lines every 8 lines of each page, in four slices.
-// Because the source may be unaligned, we use byte loads to touch.
-//      rdx = length remaining (including this chunk)
-//      rdi = ptr to start of dest chunk
-//      rsi = ptr to start of source chunk
-//	r8d = chunk length (multiples of pages, less than  2**32)
-//	ecx = scratch reg used to read a byte of each cache line
-//	eax = chunk offset
-
-	.align	4,0x90			// 16-byte align inner loops
-LTouchLoop:
-	movzb	(%rsi,%rax),%ecx	// touch line 0, 2, 4, or 6 of page
-	movzb	1*64(%rsi,%rax),%ecx	// touch line 1, 3, 5, or 7
-	movzb	8*64(%rsi,%rax),%ecx	// touch line 8, 10, 12, or 14
-	movzb	9*64(%rsi,%rax),%ecx	// etc
-	
-	movzb	16*64(%rsi,%rax),%ecx
-	movzb	17*64(%rsi,%rax),%ecx
-	movzb	24*64(%rsi,%rax),%ecx
-	movzb	25*64(%rsi,%rax),%ecx
-
-	movzb	32*64(%rsi,%rax),%ecx
-	movzb	33*64(%rsi,%rax),%ecx
-	movzb	40*64(%rsi,%rax),%ecx
-	movzb	41*64(%rsi,%rax),%ecx
-
-	movzb	48*64(%rsi,%rax),%ecx
-	movzb	49*64(%rsi,%rax),%ecx
-	movzb	56*64(%rsi,%rax),%ecx
-	movzb	57*64(%rsi,%rax),%ecx
-	
-	subl	$-128,%eax		// next slice of page (adding 128 w 8-bit immediate)
-	testl	$512,%eax		// done with this page?
-	jz	LTouchLoop		// no, next of four slices
-	addl	$(4096-512),%eax	// move on to next page
-	cmpl	%eax,%r8d		// done with this chunk?
-	jnz	LTouchLoop		// no, do next page
-	
-// The chunk has been pre-fetched, now copy it using non-temporal stores.
-// There are two copy loops, depending on whether the source is 16-byte aligned
-// or not.
-
-	movl	%r8d,%ecx		// copy chunk size to a reg that doesn't use REX prefix
-	addq	%rcx,%rsi		// increment ptrs by chunk length
-	addq	%rcx,%rdi
-	subq	%rcx,%rdx		// adjust remaining length
-	negq	%rcx			// prepare loop index (counts up to 0)
-	testl	$15,%esi		// is source 16-byte aligned?
-	jnz	LVeryLongUnaligned	// no
-	jmp	LVeryLongAligned
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongAligned:			// aligned loop over 128-bytes
-        movdqa  (%rsi,%rcx),%xmm0
-        movdqa  16(%rsi,%rcx),%xmm1
-        movdqa  32(%rsi,%rcx),%xmm2
-        movdqa  48(%rsi,%rcx),%xmm3
-        movdqa  64(%rsi,%rcx),%xmm4
-        movdqa  80(%rsi,%rcx),%xmm5
-        movdqa  96(%rsi,%rcx),%xmm6
-        movdqa  112(%rsi,%rcx),%xmm7
-
-        movntdq %xmm0,(%rdi,%rcx)
-        movntdq %xmm1,16(%rdi,%rcx)
-        movntdq %xmm2,32(%rdi,%rcx)
-        movntdq %xmm3,48(%rdi,%rcx)
-        movntdq %xmm4,64(%rdi,%rcx)
-        movntdq %xmm5,80(%rdi,%rcx)
-        movntdq %xmm6,96(%rdi,%rcx)
-        movntdq %xmm7,112(%rdi,%rcx)
-        
-        subq    $-128,%rcx		// add 128 with an 8-bit immediate
-        jnz	LVeryLongAligned
-	jmp	LVeryLongChunkEnd
-
-	.align	4,0x90			// 16-byte align inner loops
-LVeryLongUnaligned:			// unaligned loop over 128-bytes
-        movdqu  (%rsi,%rcx),%xmm0
-        movdqu  16(%rsi,%rcx),%xmm1
-        movdqu  32(%rsi,%rcx),%xmm2
-        movdqu  48(%rsi,%rcx),%xmm3
-        movdqu  64(%rsi,%rcx),%xmm4
-        movdqu  80(%rsi,%rcx),%xmm5
-        movdqu  96(%rsi,%rcx),%xmm6
-        movdqu  112(%rsi,%rcx),%xmm7
-
-        movntdq %xmm0,(%rdi,%rcx)
-        movntdq %xmm1,16(%rdi,%rcx)
-        movntdq %xmm2,32(%rdi,%rcx)
-        movntdq %xmm3,48(%rdi,%rcx)
-        movntdq %xmm4,64(%rdi,%rcx)
-        movntdq %xmm5,80(%rdi,%rcx)
-        movntdq %xmm6,96(%rdi,%rcx)
-        movntdq %xmm7,112(%rdi,%rcx)
-        
-        subq    $-128,%rcx		// add 128 with an 8-bit immediate
-        jnz     LVeryLongUnaligned
-
-LVeryLongChunkEnd:
-	cmpq	$4096,%rdx		// at least another page to go?
-	jae	LBigChunkLoop		// yes
-	
-// Done.  Call memcpy() again to handle the 0-4095 bytes at the end.
-// We still have the args in the right registers:
-//	rdi = destination ptr
-//	rsi = source ptr
-//	rdx = length remaining (0..4095)
-
-	sfence				// required by non-temporal stores
-	testl	%edx,%edx		// anything left to copy?
-	jz	1f
-	movq	$_COMM_PAGE_32_TO_64(_COMM_PAGE_MEMCPY),%rax
-	call	*%rax
-1:
-	popq	%rbp			// restore frame ptr
-	ret
-
-/* always match for now, as commpage_stuff_routine() will panic if no match */
-COMMPAGE_DESCRIPTOR(longcopy_sse3x_64, _COMM_PAGE_LONGCOPY, 0 ,0)
diff --git a/osfmk/i386/commpage/memset_pattern_sse2.s b/osfmk/i386/commpage/memset_pattern_sse2.s
deleted file mode 100644
index 3025ef62b..000000000
--- a/osfmk/i386/commpage/memset_pattern_sse2.s
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/* The common path for nonzero memset and the memset_pattern routines,
- * tuned for Pentium-M class processors with SSE2 and 64-byte cache lines.
- * This is used by the following functions:
- *
- *	void *memset(void *b, int c, size_t len);                   // when c!=0
- *	void memset_pattern4(void *b, const void *c4, size_t len);
- *	void memset_pattern8(void *b, const void *c8, size_t len);
- *	void memset_pattern16(void *b, const void *c16, size_t len);
- *
- * Note bzero() and memset() of 0 are handled separately.
- */
-
-#define	kShort		63
-#define	kVeryLong	(1024*1024)
-
-// Initial entry from Libc with parameters passed in registers.  Although we
-// correctly handle misaligned ptrs and short operands, they are inefficient.
-// Therefore our caller should filter out short operands and exploit local
-// knowledge (ie, original pattern length) to align the ptr if possible.
-// When called, we expect:
-//	%edi = ptr to memory to set (not necessarily aligned)
-//	%edx = length (may be short or even 0)
-//	%xmm0 = the pattern to store
-// Return conditions:
-//	%eax, %edi, %esi, %ecx, and %edx all trashed
-
-COMMPAGE_FUNCTION_START(memset_pattern_sse2, 32, 5)
-        cmpl    $(kShort),%edx		// long enough to bother aligning?
-        ja	LNotShort		// yes
-	jmp	LShort			// no
-        
-// Here for short operands or the end of long ones.
-//      %edx = length
-//      %edi = ptr (may not be not aligned)
-//      %xmm0 = pattern
-
-LUnalignedStore16:
-	movdqu	%xmm0,(%edi)		// stuff in another 16 bytes
-	subl	$16,%edx
-	addl	$16,%edi
-LShort:	
-	cmpl	$16,%edx		// room for another vector?
-	jge	LUnalignedStore16	// yes
-LLessThan16:				// here at end of copy with < 16 bytes remaining
-	test	$8,%dl			// 8-byte store required?
-	jz	2f			// no
-	movq	%xmm0,(%edi)		// pack in 8 low bytes
-	psrldq	$8,%xmm0		// then shift vector down 8 bytes
-	addl	$8,%edi
-2:
-	test	$4,%dl			// 4-byte store required?
-	jz	3f			// no
-	movd	%xmm0,(%edi)		// pack in 4 low bytes
-	psrldq	$4,%xmm0		// then shift vector down 4 bytes
-	addl	$4,%edi
-3:
-	andl	$3,%edx			// more to go?
-	jz	5f			// no
-	movd	%xmm0,%eax		// move remainders out into %eax
-4:					// loop on up to three bytes
-	movb	%al,(%edi)		// pack in next byte
-	shrl	$8,%eax			// shift next byte into position
-	inc	%edi
-	dec	%edx
-	jnz	4b
-5:	ret
-        
-// Long enough to justify aligning ptr.  Note that we have to rotate the
-// pattern to account for any alignment.  We do this by doing two unaligned
-// stores, and then an aligned load from the middle of the two stores.
-// This will stall on store forwarding alignment mismatch, and the unaligned
-// stores can be pretty slow too, but the alternatives aren't any better.
-// Fortunately, in most cases our caller has already aligned the ptr.
-//      %edx = length (> kShort)
-//      %edi = ptr (may not be aligned)
-//      %xmm0 = pattern
-
-LNotShort:
-        movl    %edi,%ecx		// copy dest ptr
-        negl    %ecx
-        andl    $15,%ecx                // mask down to #bytes to 16-byte align
-	jz	LAligned		// skip if already aligned
-	movdqu	%xmm0,(%edi)		// store 16 unaligned bytes
-	movdqu	%xmm0,16(%edi)		// and 16 more, to be sure we have an aligned chunk
-	addl	%ecx,%edi		// now point to the aligned chunk
-	subl	%ecx,%edx		// adjust remaining count
-	movdqa	(%edi),%xmm0		// get the rotated pattern (probably stalling)
-	addl	$16,%edi		// skip past the aligned chunk
-	subl	$16,%edx
-
-// Set up for 64-byte loops.
-//      %edx = length remaining
-//      %edi = ptr (aligned)
-//      %xmm0 = rotated pattern
-
-LAligned:
-	movl	%edx,%ecx		// copy length remaining
-        andl    $63,%edx                // mask down to residual length (0..63)
-        andl    $-64,%ecx               // %ecx <- #bytes we will zero in by-64 loop
-	jz	LNoMoreChunks		// no 64-byte chunks
-        addl    %ecx,%edi               // increment ptr by length to move
-	cmpl	$(kVeryLong),%ecx	// long enough to justify non-temporal stores?
-	jge	LVeryLong		// yes
-        negl    %ecx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%edi,%ecx)
-        movdqa  %xmm0,16(%edi,%ecx)
-        movdqa  %xmm0,32(%edi,%ecx)
-        movdqa  %xmm0,48(%edi,%ecx)
-        addl    $64,%ecx
-        jne     1b
-	
-	jmp	LNoMoreChunks
-	
-// Very long operands: use non-temporal stores to bypass cache.
-
-LVeryLong:
-        negl    %ecx			// negate length to move
-	jmp	1f
-	
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movntdq %xmm0,(%edi,%ecx)
-        movntdq %xmm0,16(%edi,%ecx)
-        movntdq %xmm0,32(%edi,%ecx)
-        movntdq %xmm0,48(%edi,%ecx)
-        addl    $64,%ecx
-        jne     1b
-
-        sfence                          // required by non-temporal stores
-	jmp	LNoMoreChunks
-	
-// Handle leftovers: loop by 16.
-//      %edx = length remaining (<64)
-//      %edi = ptr (aligned)
-//      %xmm0 = rotated pattern
-
-LLoopBy16:
-	movdqa	%xmm0,(%edi)		// pack in 16 more bytes
-	subl	$16,%edx		// decrement count
-	addl	$16,%edi		// increment ptr
-LNoMoreChunks:
-	cmpl	$16,%edx		// more to go?
-	jge	LLoopBy16		// yes
-	jmp	LLessThan16		// handle up to 15 remaining bytes
-
-COMMPAGE_DESCRIPTOR(memset_pattern_sse2,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0)
diff --git a/osfmk/i386/commpage/memset_pattern_sse2_64.s b/osfmk/i386/commpage/memset_pattern_sse2_64.s
deleted file mode 100644
index e2d1bb007..000000000
--- a/osfmk/i386/commpage/memset_pattern_sse2_64.s
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/* The common path for nonzero memset and the memset_pattern routines,
- * tuned for Pentium-M class processors with SSE2 and 64-byte cache lines.
- * This is the 64-bit bersion.  It is used by the following functions:
- *
- *	void *memset(void *b, int c, size_t len);                   // when c!=0
- *	void memset_pattern4(void *b, const void *c4, size_t len);
- *	void memset_pattern8(void *b, const void *c8, size_t len);
- *	void memset_pattern16(void *b, const void *c16, size_t len);
- *
- * Note bzero() and memset() of 0 are handled separately.
- */
-
-#define	kShort		63
-#define	kVeryLong	(1024*1024)
-
-// Initial entry from Libc with parameters passed in registers.  Although we
-// correctly handle misaligned ptrs and short operands, they are inefficient.
-// Therefore our caller should filter out short operands and exploit local
-// knowledge (ie, original pattern length) to align the ptr if possible.
-// When called, we expect:
-//	%rdi = ptr to memory to set (not necessarily aligned)
-//	%rdx = length (may be short or even 0)
-//	%xmm0 = the pattern to store
-// Return conditions:
-//	%rax, %rdi, %rsi, %rcx, and %rdx all trashed
-//	we preserve %r8, %r9, %r10, and %r11
-
-COMMPAGE_FUNCTION_START(memset_pattern_sse2_64, 64, 5)
-        cmpq    $(kShort),%rdx		// long enough to bother aligning?
-        ja	LNotShort		// yes
-	jmp	LShort			// no
-        
-// Here for short operands or the end of long ones.
-//      %rdx = length (<= kShort)
-//      %rdi = ptr (may not be not aligned)
-//      %xmm0 = pattern
-
-LUnalignedStore16:
-	movdqu	%xmm0,(%rdi)		// stuff in another 16 bytes
-	subl	$16,%edx
-	addq	$16,%rdi
-LShort:	
-	cmpl	$16,%edx		// room for another vector?
-	jge	LUnalignedStore16	// yes
-LLessThan16:				// here at end of copy with < 16 bytes remaining
-	test	$8,%dl			// 8-byte store required?
-	jz	2f			// no
-	movq	%xmm0,(%rdi)		// pack in 8 low bytes
-	psrldq	$8,%xmm0		// then shift vector down 8 bytes
-	addq	$8,%rdi
-2:
-	test	$4,%dl			// 4-byte store required?
-	jz	3f			// no
-	movd	%xmm0,(%rdi)		// pack in 4 low bytes
-	psrldq	$4,%xmm0		// then shift vector down 4 bytes
-	addq	$4,%rdi
-3:
-	andl	$3,%edx			// more to go?
-	jz	5f			// no
-	movd	%xmm0,%eax		// move remainders out into %eax
-4:					// loop on up to three bytes
-	movb	%al,(%rdi)		// pack in next byte
-	shrl	$8,%eax			// shift next byte into position
-	incq	%rdi
-	dec	%edx
-	jnz	4b
-5:	ret
-        
-// Long enough to justify aligning ptr.  Note that we have to rotate the
-// pattern to account for any alignment.  We do this by doing two unaligned
-// stores, and then an aligned load from the middle of the two stores.
-// This will stall on store forwarding alignment mismatch, and the unaligned
-// stores can be pretty slow too, but the alternatives aren't any better.
-// Fortunately, in most cases our caller has already aligned the ptr.
-//      %rdx = length (> kShort)
-//      %rdi = ptr (may not be aligned)
-//      %xmm0 = pattern
-
-LNotShort:
-        movl    %edi,%ecx		// copy low bits of dest ptr
-        negl    %ecx
-        andl    $15,%ecx                // mask down to #bytes to 16-byte align
-	jz	LAligned		// skip if already aligned
-	movdqu	%xmm0,(%rdi)		// store 16 unaligned bytes
-	movdqu	%xmm0,16(%rdi)		// and 16 more, to be sure we have an aligned chunk
-	addq	%rcx,%rdi		// now point to the aligned chunk
-	subq	%rcx,%rdx		// adjust remaining count
-	movdqa	(%rdi),%xmm0		// get the rotated pattern (probably stalling)
-	addq	$16,%rdi		// skip past the aligned chunk
-	subq	$16,%rdx
-
-// Set up for 64-byte loops.
-//      %rdx = length remaining
-//      %rdi = ptr (aligned)
-//      %xmm0 = rotated pattern
-
-LAligned:
-	movq	%rdx,%rcx		// copy length remaining
-        andl    $63,%edx                // mask down to residual length (0..63)
-        andq    $-64,%rcx               // %ecx <- #bytes we will zero in by-64 loop
-	jz	LNoMoreChunks		// no 64-byte chunks
-        addq    %rcx,%rdi               // increment ptr by length to move
-	cmpq	$(kVeryLong),%rcx	// long enough to justify non-temporal stores?
-	jge	LVeryLong		// yes
-        negq    %rcx			// negate length to move
-	jmp	1f
-	
-// Loop over 64-byte chunks, storing into cache.
-
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movdqa  %xmm0,(%rdi,%rcx)
-        movdqa  %xmm0,16(%rdi,%rcx)
-        movdqa  %xmm0,32(%rdi,%rcx)
-        movdqa  %xmm0,48(%rdi,%rcx)
-        addq    $64,%rcx
-        jne     1b
-	
-	jmp	LNoMoreChunks
-	
-// Very long operands: use non-temporal stores to bypass cache.
-
-LVeryLong:
-        negq    %rcx			// negate length to move
-	jmp	1f
-	
-	.align	4,0x90			// keep inner loops 16-byte aligned
-1:
-        movntdq %xmm0,(%rdi,%rcx)
-        movntdq %xmm0,16(%rdi,%rcx)
-        movntdq %xmm0,32(%rdi,%rcx)
-        movntdq %xmm0,48(%rdi,%rcx)
-        addq    $64,%rcx
-        jne     1b
-
-        sfence                          // required by non-temporal stores
-	jmp	LNoMoreChunks
-	
-// Handle leftovers: loop by 16.
-//      %edx = length remaining (<64)
-//      %edi = ptr (aligned)
-//      %xmm0 = rotated pattern
-
-LLoopBy16:
-	movdqa	%xmm0,(%rdi)		// pack in 16 more bytes
-	subl	$16,%edx		// decrement count
-	addq	$16,%rdi		// increment ptr
-LNoMoreChunks:
-	cmpl	$16,%edx		// more to go?
-	jge	LLoopBy16		// yes
-	jmp	LLessThan16		// handle up to 15 remaining bytes
-
-COMMPAGE_DESCRIPTOR(memset_pattern_sse2_64,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0)
diff --git a/osfmk/i386/commpage/pthreads.s b/osfmk/i386/commpage/pthreads.s
index 217662445..1794228ff 100644
--- a/osfmk/i386/commpage/pthreads.s
+++ b/osfmk/i386/commpage/pthreads.s
@@ -31,37 +31,6 @@
 #include <machine/commpage.h>
 #include <mach/i386/syscall_sw.h>
 
-#define _PTHREAD_TSD_OFFSET32 0x48
-#define _PTHREAD_TSD_OFFSET64 0x60
-
-
-/* These routines do not need to be on the copmmpage on Intel.  They are for now
- * to avoid revlock, but the code should move to Libc, and we should eventually remove
- * these.
- */
-COMMPAGE_FUNCTION_START(pthread_getspecific, 32, 4)
-	movl	4(%esp), %eax
-	movl	%gs:_PTHREAD_TSD_OFFSET32(,%eax,4), %eax
-	ret
-COMMPAGE_DESCRIPTOR(pthread_getspecific,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0)
-
-COMMPAGE_FUNCTION_START(pthread_self, 32, 4)
-	movl	%gs:_PTHREAD_TSD_OFFSET32, %eax
-	ret
-COMMPAGE_DESCRIPTOR(pthread_self,_COMM_PAGE_PTHREAD_SELF,0,0)
-
-/* the 64-bit versions: */
-COMMPAGE_FUNCTION_START(pthread_getspecific_64, 64, 4)
-	movq	%gs:_PTHREAD_TSD_OFFSET64(,%rdi,8), %rax
-	ret
-COMMPAGE_DESCRIPTOR(pthread_getspecific_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0)
-
-COMMPAGE_FUNCTION_START(pthread_self_64, 64, 4)
-	movq	%gs:_PTHREAD_TSD_OFFSET64, %rax
-	ret
-COMMPAGE_DESCRIPTOR(pthread_self_64,_COMM_PAGE_PTHREAD_SELF,0,0)
-
-
 /* Temporary definitions.  Replace by #including the correct file when available.  */
 
 #define PTHRW_EBIT      0x01
@@ -114,47 +83,6 @@ COMMPAGE_DESCRIPTOR(pthread_self_64,_COMM_PAGE_PTHREAD_SELF,0,0)
  */
 
 
-/* int							    // we return 0 on acquire, 1 on syscall
- * pthread_mutex_lock(	uint32_t    *lvalp,		    // ptr to mutex LVAL/UVAL pair
- *			int	    flags,		    // flags to pass kernel if we do syscall
- *			uint64_t    mtid,		    // my Thread ID
- *			uint32_t    mask,		    // bits to test in LVAL (ie, EBIT etc)
- *			uint64_t    *tidp,		    // ptr to TID field of mutex
- *			int	    *syscall_return );	    // if syscall, return value stored here
- */
-COMMPAGE_FUNCTION_START(pthread_mutex_lock, 32, 4)
-	pushl	%ebp			    // set up frame for backtrace
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-	xorl	%ebx,%ebx		    // clear "preemption pending" flag
-	movl	20(%esp),%edi		    // %edi == ptr to LVAL/UVAL structure
-	lea	20(%esp),%esi		    // %esi == ptr to argument list
-	movl	_COMM_PAGE_SPIN_COUNT, %edx
-	movl	16(%esi),%ecx		    // get mask (ie, PTHRW_EBIT etc)
-1:
-	testl	PTHRW_LVAL(%edi),%ecx	    // is mutex available?
-	jz	2f			    // yes, it is available
-	pause
-	decl	%edx			    // decrement max spin count
-	jnz	1b			    // keep spinning
-2:
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_MUTEX_LOCK,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock)
-	testl	%ebx,%ebx		    // pending preemption?
-	jz	3f
-	pushl	%eax			    // save return value across sysenter
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock)
-	popl	%eax
-3:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	popl	%ebp
-	ret
-COMMPAGE_DESCRIPTOR(pthread_mutex_lock,_COMM_PAGE_MUTEX_LOCK,0,0)
-
-
 /* Internal routine to handle pthread mutex lock operation.  This is in the PFZ.
  *	%edi == ptr to LVAL/UVAL pair
  *	%esi == ptr to argument list on stack
@@ -233,45 +161,6 @@ COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0)
 
 
 
-/* int							    // we return 0 on acquire, 1 on syscall
- * pthread_mutex_lock(	uint32_t    *lvalp,		    // ptr to mutex LVAL/UVAL pair
- *			int	    flags,		    // flags to pass kernel if we do syscall
- *			uint64_t    mtid,		    // my Thread ID
- *			uint32_t    mask,		    // bits to test in LVAL (ie, EBIT etc)
- *			uint64_t    *tidp,		    // ptr to TID field of mutex
- *			int	    *syscall_return );	    // if syscall, return value stored here
- *
- *	%rdi = lvalp
- *	%esi = flags
- *	%rdx = mtid
- *	%ecx = mask
- *	%r8  = tidp
- *	%r9  = &syscall_return
- */
-COMMPAGE_FUNCTION_START(pthread_mutex_lock_64, 64, 4)
-	pushq	%rbp		    // set up frame for backtrace
-	movq	%rsp,%rbp
-	pushq	%rbx
-	xorl	%ebx,%ebx	    // clear "preemption pending" flag
-	movl	_COMM_PAGE_32_TO_64(_COMM_PAGE_SPIN_COUNT), %eax
-1:
-	testl	PTHRW_LVAL(%rdi),%ecx // is mutex available?
-	jz	2f		    // yes, it is available
-	pause
-	decl	%eax		    // decrement max spin count
-	jnz	1b		    // keep spinning
-2:
-	COMMPAGE_CALL(_COMM_PAGE_PFZ_MUTEX_LOCK,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock_64)
-	testl	%ebx,%ebx	    // pending preemption?
-	jz	1f		    // no
-	COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock_64)
-1:
-	popq	%rbx
-	popq	%rbp
-	ret
-COMMPAGE_DESCRIPTOR(pthread_mutex_lock_64,_COMM_PAGE_MUTEX_LOCK,0,0)
-
-
 /* Internal routine to handle pthread mutex lock operation.  This is in the PFZ.
  *	%rdi = lvalp
  *	%esi = flags
diff --git a/osfmk/i386/commpage/spinlocks.s b/osfmk/i386/commpage/spinlocks.s
deleted file mode 100644
index a0e98bcb3..000000000
--- a/osfmk/i386/commpage/spinlocks.s
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2003-2009 Apple, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-#include <mach/i386/syscall_sw.h>
-	
-
-COMMPAGE_FUNCTION_START(spin_lock_try_up, 32, 4)
-	movl		4(%esp), %ecx 
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	cmpxchgl	%edx, (%ecx)
-	setz		%dl
-	movzbl		%dl, %eax
-	ret
-COMMPAGE_DESCRIPTOR(spin_lock_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,0)
- 
-
-COMMPAGE_FUNCTION_START(spin_lock_try_mp, 32, 4)
-	movl		4(%esp), %ecx 
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	lock
-	cmpxchgl	%edx, (%ecx)
-	setz		%dl
-	movzbl		%dl, %eax
-	ret
-COMMPAGE_DESCRIPTOR(spin_lock_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,kUP)
-
-
-COMMPAGE_FUNCTION_START(spin_lock_up, 32, 4)
-	movl		4(%esp), %ecx
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	cmpxchgl	%edx, (%ecx)
-	jnz		1f
-	ret
-1:
-	/* failed to get lock so relinquish the processor immediately on UP */
-	pushl		$1		/* 1 ms				*/
-	pushl		$1		/* SWITCH_OPTION_DEPRESS	*/
-	pushl		$0		/* THREAD_NULL			*/
-	pushl		$0		/* push dummy stack ret addr    */
-	movl		$-61,%eax	/* SYSCALL_THREAD_SWITCH */
-	int		$(MACH_INT)
-	addl		$16, %esp	/* adjust stack*/
-	jmp		Lspin_lock_up
-COMMPAGE_DESCRIPTOR(spin_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,0)
-
-
-COMMPAGE_FUNCTION_START(spin_lock_mp, 32, 4)
-	movl		4(%esp), %ecx
-	xorl		%eax, %eax
-0:
-	orl		$-1, %edx
-	lock
-	cmpxchgl	%edx, (%ecx)
-	jnz		1f
-	ret
-1:
-	xorl		%eax, %eax
-	movl		$(MP_SPIN_TRIES), %edx
-2:
-	pause	
-	cmpl		%eax, (%ecx)
-	jz		0b		/* favor success and slow down spin loop */
-	decl		%edx
-	jnz		2b
-	/* failed to get lock after spinning so relinquish  */
-	pushl		$1		/* 1 ms				*/
-	pushl		$1		/* SWITCH_OPTION_DEPRESS	*/
-	pushl		$0		/* THREAD_NULL			*/
-	pushl		$0		/* push dummy stack ret addr    */
-	movl		$-61,%eax	/* SYSCALL_THREAD_SWITCH */
-	int		$(MACH_INT)
-	addl		$16, %esp	/* adjust stack*/
-	jmp		Lspin_lock_mp
-COMMPAGE_DESCRIPTOR(spin_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,kUP)
-
-
-COMMPAGE_FUNCTION_START(spin_unlock, 32, 4)
-	movl		4(%esp), %ecx
-	movl		$0, (%ecx)
-	ret
-COMMPAGE_DESCRIPTOR(spin_unlock,_COMM_PAGE_SPINLOCK_UNLOCK,0,0)
-
-
-/* ============================ 64-bit versions follow ===================== */
-
-
-COMMPAGE_FUNCTION_START(spin_lock_try_up_64, 64, 4)
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	cmpxchgl	%edx, (%rdi)
-	setz		%dl
-	movzbl		%dl, %eax
-	ret
-COMMPAGE_DESCRIPTOR(spin_lock_try_up_64,_COMM_PAGE_SPINLOCK_TRY,kUP,0)
-
-
-COMMPAGE_FUNCTION_START(spin_lock_try_mp_64, 64, 4)
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	lock
-	cmpxchgl	%edx, (%rdi)
-	setz		%dl
-	movzbl		%dl, %eax
-	ret
-COMMPAGE_DESCRIPTOR(spin_lock_try_mp_64,_COMM_PAGE_SPINLOCK_TRY,0,kUP)
-
-
-COMMPAGE_FUNCTION_START(spin_lock_up_64, 64, 4)
-	movq		%rdi,%r8
-0:
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	cmpxchgl	%edx, (%r8)
-	jnz		1f
-	ret
-1:
-	/* failed to get lock so relinquish the processor immediately on UP */
-	xorl		%edi,%edi	/* THREAD_NULL			*/
-	movl		$1,%esi		/* SWITCH_OPTION_DEPRESS	*/
-	movl		$1,%edx		/* 1 ms				*/
-	movl		$(SYSCALL_CONSTRUCT_MACH(61)),%eax	/* 61 = thread_switch */
-	syscall
-	jmp		0b
-COMMPAGE_DESCRIPTOR(spin_lock_up_64,_COMM_PAGE_SPINLOCK_LOCK,kUP,0)
-	
-	
-COMMPAGE_FUNCTION_START(spin_lock_mp_64, 64, 4)
-	movq		%rdi,%r8
-0:
-	xorl		%eax, %eax
-	orl		$-1, %edx
-	lock
-	cmpxchgl	%edx, (%r8)
-	jnz		1f
-	ret
-1:
-	xorl		%eax, %eax
-	movl		$(MP_SPIN_TRIES), %edx
-2:					/* spin for awhile before relinquish */
-	pause	
-	cmpl		%eax, (%r8)
-	jz		0b
-	decl		%edx
-	jnz		2b
-	/* failed to get lock after spinning so relinquish  */
-	xorl		%edi,%edi	/* THREAD_NULL			*/
-	movl		$1,%esi		/* SWITCH_OPTION_DEPRESS	*/
-	movl		$1,%edx		/* 1 ms				*/
-	movl		$(SYSCALL_CONSTRUCT_MACH(61)),%eax	/* 61 = thread_switch */
-	syscall
-	jmp		0b
-COMMPAGE_DESCRIPTOR(spin_lock_mp_64,_COMM_PAGE_SPINLOCK_LOCK,0,kUP)
-
-
-COMMPAGE_FUNCTION_START(spin_unlock_64, 64, 4)
-	movl		$0, (%rdi)
-	ret
-COMMPAGE_DESCRIPTOR(spin_unlock_64,_COMM_PAGE_SPINLOCK_UNLOCK,0,0)
diff --git a/osfmk/i386/copyio.c b/osfmk/i386/copyio.c
new file mode 100644
index 000000000..82516b196
--- /dev/null
+++ b/osfmk/i386/copyio.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach_assert.h>
+#include <sys/errno.h>
+#include <i386/param.h>
+#include <i386/misc_protos.h>
+#include <i386/cpu_data.h>
+#include <i386/machine_routines.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_fault.h>
+
+#include <sys/kdebug.h>
+
+/*
+ * the copy engine has the following characteristics
+ *   - copyio handles copies to/from user or kernel space
+ *   - copypv deals with physical or virtual addresses
+ *
+ * implementation details as follows
+ *   - a cache of up to NCOPY_WINDOWS is maintained per thread for
+ *     access of user virutal space
+ *   - the window size is determined by the amount of virtual space
+ *     that can be mapped by a single page table
+ *   - the mapping is done by copying the page table pointer from
+ *     the user's directory entry corresponding to the window's
+ *     address in user space to the directory entry corresponding
+ *     to the window slot in the kernel's address space
+ *   - the set of mappings is preserved across context switches,
+ *     so the copy can run with pre-emption enabled
+ *   - there is a gdt entry set up to anchor the kernel window on
+ *     each processor
+ *   - the copies are done using the selector corresponding to the
+ *     gdt entry
+ *   - the addresses corresponding to the user virtual address are
+ *     relative to the beginning of the window being used to map
+ *     that region... thus the thread can be pre-empted and switched
+ *     to a different processor while in the midst of a copy
+ *   - the window caches must be invalidated if the pmap changes out
+ *     from under the thread... this can happen during vfork/exec...
+ *     inval_copy_windows is the invalidation routine to be used
+ *   - the copyio engine has 4 different states associated with it
+ *     that allows for lazy tlb flushes and the ability to avoid
+ *     a flush all together if we've just come from user space
+ *     the 4 states are as follows...
+ *
+ *	WINDOWS_OPENED - set by copyio to indicate to the context
+ *	  switch code that it is necessary to do a tlbflush after
+ * 	  switching the windows since we're in the middle of a copy
+ *
+ *	WINDOWS_CLOSED - set by copyio to indicate that it's done
+ *	  using the windows, so that the context switch code need
+ *	  not do the tlbflush... instead it will set the state to...
+ *
+ *	WINDOWS_DIRTY - set by the context switch code to indicate
+ *	  to the copy engine that it is responsible for doing a 
+ *	  tlbflush before using the windows again... it's also
+ *	  set by the inval_copy_windows routine to indicate the
+ *	  same responsibility.
+ *
+ *	WINDOWS_CLEAN - set by the return to user path to indicate
+ * 	  that a tlbflush has happened and that there is no need
+ *	  for copyio to do another when it is entered next...
+ *
+ *   - a window for mapping single physical pages is provided for copypv
+ *   - this window is maintained across context switches and has the
+ *     same characteristics as the user space windows w/r to pre-emption
+ */
+
+extern int copyout_user(const char *, vm_offset_t, vm_size_t);
+extern int copyout_kern(const char *, vm_offset_t, vm_size_t);
+extern int copyin_user(const vm_offset_t, char *, vm_size_t);
+extern int copyin_kern(const vm_offset_t, char *, vm_size_t);
+extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t);
+extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t);
+extern int copyinphys_user(const vm_offset_t, char *, vm_size_t);
+extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t);
+extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *);
+extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *);
+
+static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
+static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
+
+
+#define COPYIN		0
+#define COPYOUT		1
+#define COPYINSTR	2
+#define COPYINPHYS	3
+#define COPYOUTPHYS	4
+
+void inval_copy_windows(thread_t thread)
+{
+        int	i;
+
+	for (i = 0; i < NCOPY_WINDOWS; i++) {
+                thread->machine.copy_window[i].user_base = -1;
+	}
+	thread->machine.nxt_window = 0;
+	thread->machine.copyio_state = WINDOWS_DIRTY;
+
+	KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0);
+}
+
+
+static int
+copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
+       vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
+{
+        thread_t	thread;
+	pmap_t		pmap;
+	pt_entry_t	*updp;
+	pt_entry_t	*kpdp;
+	user_addr_t 	user_base;
+	vm_offset_t 	user_offset;
+	vm_offset_t 	kern_vaddr;
+	vm_size_t	cnt;
+	vm_size_t	bytes_copied;
+	int		error = 0;
+	int		window_index;
+	int		copyio_state;
+        boolean_t	istate;
+#if KDEBUG
+	int		debug_type = 0xeff70010;
+	debug_type += (copy_type << 2);
+#endif
+
+	thread = current_thread();
+
+	KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr,
+		     (int)nbytes, thread->machine.copyio_state, 0);
+
+	if (nbytes == 0) {
+	        KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
+			     (unsigned)kernel_addr, (unsigned)nbytes, 0, 0);
+	        return (0);
+	}
+        pmap = thread->map->pmap;
+
+        if (pmap == kernel_pmap || use_kernel_map) {
+
+	        kern_vaddr = (vm_offset_t)user_addr;
+	  
+	        switch (copy_type) {
+
+		case COPYIN:
+		        error = copyin_kern(kern_vaddr, kernel_addr, nbytes);
+			break;
+
+		case COPYOUT:
+		        error = copyout_kern(kernel_addr, kern_vaddr, nbytes);
+			break;
+
+		case COPYINSTR:
+		        error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied);
+			break;
+
+		case COPYINPHYS:
+		        error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes);
+			break;
+
+		case COPYOUTPHYS:
+		        error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes);
+			break;
+		}
+		KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
+			     (unsigned)kernel_addr, (unsigned)nbytes,
+			     error | 0x80000000, 0);
+		return (error);
+	}
+	
+#if CONFIG_DTRACE
+	thread->machine.specFlags |= CopyIOActive;
+#endif /* CONFIG_DTRACE */
+	
+	if ((nbytes && (user_addr + nbytes <= user_addr)) ||
+	    (user_addr          < vm_map_min(thread->map)) ||
+	    (user_addr + nbytes > vm_map_max(thread->map))) {
+		error = EFAULT;
+		goto done;
+	}
+
+	user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
+	user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
+
+	KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base,
+		     (int)user_offset, 0, 0);
+
+	cnt = NBPDE - user_offset;
+
+	if (cnt > nbytes)
+	        cnt = nbytes;
+
+	istate = ml_set_interrupts_enabled(FALSE);
+
+	copyio_state = thread->machine.copyio_state;
+	thread->machine.copyio_state = WINDOWS_OPENED;
+
+	(void) ml_set_interrupts_enabled(istate);
+
+
+	for (;;) {
+
+	        for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) {
+		        if (thread->machine.copy_window[window_index].user_base == user_base)
+					break;
+		}
+	        if (window_index >= NCOPY_WINDOWS) {
+
+		        window_index = thread->machine.nxt_window;
+			thread->machine.nxt_window++;
+
+			if (thread->machine.nxt_window >= NCOPY_WINDOWS)
+			        thread->machine.nxt_window = 0;
+
+			/*
+			 * it's necessary to disable pre-emption
+			 * since I have to compute the kernel descriptor pointer
+			 * for the new window
+			 */
+			istate = ml_set_interrupts_enabled(FALSE);
+
+			thread->machine.copy_window[window_index].user_base = user_base;
+
+		        updp = pmap_pde(pmap, user_base);
+
+			kpdp = current_cpu_datap()->cpu_copywindow_pdp;
+			kpdp += window_index;
+
+			pmap_store_pte(kpdp, updp ? *updp : 0);
+
+			(void) ml_set_interrupts_enabled(istate);
+
+		        copyio_state = WINDOWS_DIRTY;
+
+			KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index,
+				     (unsigned)user_base, (unsigned)updp,
+				     (unsigned)kpdp, 0);
+
+		}
+#if JOE_DEBUG
+		else {
+			istate = ml_set_interrupts_enabled(FALSE);
+
+		        updp = pmap_pde(pmap, user_base);
+
+			kpdp = current_cpu_datap()->cpu_copywindow_pdp;
+
+			kpdp += window_index;
+
+			if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) {
+				panic("copyio: user pdp mismatch - kpdp = 0x%qx,  updp = 0x%qx\n", *kpdp, *updp);
+			}
+			(void) ml_set_interrupts_enabled(istate);
+		}
+#endif
+		if (copyio_state == WINDOWS_DIRTY) {
+		        flush_tlb();
+
+		        copyio_state = WINDOWS_CLEAN;
+
+			KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0);
+		}
+		user_offset += (window_index * NBPDE);
+
+		KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_offset,
+			     (unsigned)kernel_addr, cnt, 0, 0);
+
+	        switch (copy_type) {
+
+		case COPYIN:
+		        error = copyin_user(user_offset, kernel_addr, cnt);
+			break;
+			
+		case COPYOUT:
+		        error = copyout_user(kernel_addr, user_offset, cnt);
+			break;
+
+		case COPYINPHYS:
+		        error = copyinphys_user(user_offset, kernel_addr, cnt);
+			break;
+			
+		case COPYOUTPHYS:
+		        error = copyoutphys_user(kernel_addr, user_offset, cnt);
+			break;
+
+		case COPYINSTR:
+		        error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied);
+
+			/*
+			 * lencopied should be updated on success
+			 * or ENAMETOOLONG...  but not EFAULT
+			 */
+			if (error != EFAULT)
+			        *lencopied += bytes_copied;
+
+			/*
+			 * if we still have room, then the ENAMETOOLONG
+			 * is just an artifact of the buffer straddling
+			 * a window boundary and we should continue
+			 */
+			if (error == ENAMETOOLONG && nbytes > cnt)
+			        error = 0;
+
+			if (error) {
+#if KDEBUG
+			        nbytes = *lencopied;
+#endif
+			        break;
+			}
+			if (*(kernel_addr + bytes_copied - 1) == 0) {
+			        /*
+				 * we found a NULL terminator... we're done
+				 */
+#if KDEBUG
+			        nbytes = *lencopied;
+#endif
+				goto done;
+			}
+			if (cnt == nbytes) {
+			        /*
+				 * no more room in the buffer and we haven't
+				 * yet come across a NULL terminator
+				 */
+#if KDEBUG
+			        nbytes = *lencopied;
+#endif
+			        error = ENAMETOOLONG;
+				break;
+			}
+			assert(cnt == bytes_copied);
+
+			break;
+		}
+		if (error)
+		        break;
+		if ((nbytes -= cnt) == 0)
+		        break;
+
+		kernel_addr += cnt;
+		user_base += NBPDE;
+		user_offset = 0;
+
+		if (nbytes > NBPDE)
+		        cnt = NBPDE;
+		else
+		        cnt = nbytes;
+	}
+done:
+	thread->machine.copyio_state = WINDOWS_CLOSED;
+
+	KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
+		     (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
+
+#if CONFIG_DTRACE
+	thread->machine.specFlags &= ~CopyIOActive;
+#endif /* CONFIG_DTRACE */
+
+	return (error);
+}
+
+static int
+copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
+{
+        pmap_paddr_t paddr;
+	user_addr_t vaddr;
+	char        *window_offset;
+	pt_entry_t  pentry;
+	int         ctype;
+	int	    retval;
+	boolean_t   istate;
+
+
+	if (which & cppvPsnk) {
+		paddr  = (pmap_paddr_t)sink;
+	        vaddr  = (user_addr_t)source;
+		ctype  = COPYINPHYS;
+		pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW);
+	} else {
+	        paddr  = (pmap_paddr_t)source;
+		vaddr  = (user_addr_t)sink;
+		ctype  = COPYOUTPHYS;
+		pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME));
+	}
+	/* Fold in cache attributes for this physical page */
+	pentry |= pmap_get_cache_attributes(i386_btop(paddr));
+	window_offset = (char *)(uintptr_t)((uint32_t)paddr & (PAGE_SIZE - 1));
+
+	assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
+
+	if (current_thread()->machine.physwindow_busy) {
+	        pt_entry_t	old_pentry;
+
+	        KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0);
+		/*
+		 * we had better be targeting wired memory at this point
+		 * we will not be able to handle a fault with interrupts
+		 * disabled... we disable them because we can't tolerate
+		 * being preempted during this nested use of the window
+		 */
+		istate = ml_set_interrupts_enabled(FALSE);
+
+		old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep);
+		pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
+
+		invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
+
+		retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
+
+		pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry);
+
+		invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
+
+		(void) ml_set_interrupts_enabled(istate);
+	} else {
+	        /*
+		 * mark the window as in use... if an interrupt hits while we're
+		 * busy, or we trigger another coyppv from the fault path into
+		 * the driver on a user address space page fault due to a copyin/out
+		 * then we need to save and restore the current window state instead
+		 * of caching the window preserving it across context switches
+		 */
+	        current_thread()->machine.physwindow_busy = 1;
+
+	        if (current_thread()->machine.physwindow_pte != pentry) {
+		        KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0);
+
+			current_thread()->machine.physwindow_pte = pentry;
+			
+			/*
+			 * preemption at this point would be bad since we
+			 * could end up on the other processor after we grabbed the
+			 * pointer to the current cpu data area, but before we finished
+			 * using it to stuff the page table entry since we would
+			 * be modifying a window that no longer belonged to us
+			 * the invlpg can be done unprotected since it only flushes
+			 * this page address from the tlb... if it flushes the wrong
+			 * one, no harm is done, and the context switch that moved us
+			 * to the other processor will have already take care of 
+			 * flushing the tlb after it reloaded the page table from machine.physwindow_pte
+			 */
+			istate = ml_set_interrupts_enabled(FALSE);
+
+			pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
+			(void) ml_set_interrupts_enabled(istate);
+
+			invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
+		}
+#if JOE_DEBUG
+		else {
+		        if (pentry !=
+			    (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW)))
+			        panic("copyio_phys: pentry != *physwindow_ptep");
+		}
+#endif
+		retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
+
+	        current_thread()->machine.physwindow_busy = 0;
+	}
+	return (retval);
+}
+
+int
+copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes)
+{
+        return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
+}    
+
+int
+copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
+{
+        return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
+}
+
+int
+copyinstr(const user_addr_t user_addr,  char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
+{
+	*lencopied = 0;
+
+        return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0));
+}
+
+int
+copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes)
+{
+	return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
+}
+
+int
+copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
+{
+	return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
+}
+
+
+kern_return_t
+copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
+{
+	unsigned int lop, csize;
+	int bothphys = 0;
+	
+	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
+		     (unsigned)snk64, size, which, 0);
+
+	if ((which & (cppvPsrc | cppvPsnk)) == 0 )				/* Make sure that only one is virtual */
+		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */
+
+	if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
+	        bothphys = 1;							/* both are physical */
+
+	while (size) {
+	  
+	        if (bothphys) {
+		        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));		/* Assume sink smallest */
+
+			if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
+			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));	/* No, source is smaller */
+		} else {
+		        /*
+			 * only need to compute the resid for the physical page
+			 * address... we don't care about where we start/finish in
+			 * the virtual since we just call the normal copyin/copyout
+			 */
+		        if (which & cppvPsrc)
+			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
+			else
+			        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
+		}
+		csize = size;						/* Assume we can copy it all */
+		if (lop < size)
+		        csize = lop;					/* Nope, we can't do it all */
+#if 0		
+		/*
+		 * flush_dcache64 is currently a nop on the i386... 
+		 * it's used when copying to non-system memory such
+		 * as video capture cards... on PPC there was a need
+		 * to flush due to how we mapped this memory... not
+		 * sure if it's needed on i386.
+		 */
+		if (which & cppvFsrc)
+		        flush_dcache64(src64, csize, 1);		/* If requested, flush source before move */
+		if (which & cppvFsnk)
+		        flush_dcache64(snk64, csize, 1);		/* If requested, flush sink before move */
+#endif
+		if (bothphys) {
+			bcopy_phys(src64, snk64, csize);		/* Do a physical copy, virtually */
+		}
+		else {
+			if (copyio_phys(src64, snk64, csize, which)) {
+				return (KERN_FAILURE);
+			}
+		}
+#if 0
+		if (which & cppvFsrc)
+		        flush_dcache64(src64, csize, 1);	/* If requested, flush source after move */
+		if (which & cppvFsnk)
+		        flush_dcache64(snk64, csize, 1);	/* If requested, flush sink after move */
+#endif
+		size   -= csize;					/* Calculate what is left */
+		snk64 += csize;					/* Bump sink to next physical address */
+		src64 += csize;					/* Bump source to next physical address */
+	}
+	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
+		     (unsigned)snk64, size, which, 0);
+
+	return KERN_SUCCESS;
+}
+void
+copy_window_fault(thread_t thread, vm_map_t map, int window)
+{
+	pt_entry_t	*updp;
+	pt_entry_t	*kpdp;
+
+	/*
+	 * in case there was no page table assigned
+	 * for the user base address and the pmap
+	 * got 'expanded' due to this fault, we'll
+	 * copy in the descriptor 
+	 *
+	 * we're either setting the page table descriptor
+	 * to the same value or it was 0... no need
+	 * for a TLB flush in either case
+	 */
+
+        updp = pmap_pde(map->pmap, thread->machine.copy_window[window].user_base);
+	assert(updp);
+	if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */
+	kpdp = current_cpu_datap()->cpu_copywindow_pdp;
+	kpdp += window;
+
+#if JOE_DEBUG
+	if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME))
+	        panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp);
+#endif
+	pmap_store_pte(kpdp, *updp);
+}
diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c
index 0bcfbb77f..4cdeed647 100644
--- a/osfmk/i386/cpu.c
+++ b/osfmk/i386/cpu.c
@@ -40,12 +40,14 @@
 #include <i386/machine_routines.h>
 #include <i386/misc_protos.h>
 #include <i386/cpu_threads.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 #include <i386/cpuid.h>
 #if CONFIG_VMX
 #include <i386/vmx/vmx_cpu.h>
 #endif
 #include <vm/vm_kern.h>
+#include <kern/etimer.h>
+#include <kern/timer_call.h>
 
 struct processor	processor_master;
 
@@ -101,6 +103,9 @@ cpu_init(void)
 {
 	cpu_data_t	*cdp = current_cpu_datap();
 
+	timer_call_initialize_queue(&cdp->rtclock_timer.queue);
+	cdp->rtclock_timer.deadline = EndOfAllTime;
+
 	cdp->cpu_type = cpuid_cputype();
 	cdp->cpu_subtype = cpuid_cpusubtype();
 
@@ -167,13 +172,6 @@ cpu_machine_init(
 	PE_cpu_machine_init(cdp->cpu_id, !cdp->cpu_boot_complete);
 	cdp->cpu_boot_complete = TRUE;
 	cdp->cpu_running = TRUE;
-#if 0
-	if (cpu_datap(cpu)->hibernate)
-	{
-	    cpu_datap(cpu)->hibernate = 0;
-	    hibernate_machine_init();
-	}
-#endif
 	ml_init_interrupt();
 
 #if CONFIG_VMX
@@ -246,8 +244,6 @@ slot_threadtype(
 	return (cpu_datap(slot_num)->cpu_threadtype);
 }
 
-
-
 cpu_type_t
 cpu_type(void)
 {
diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h
index 470e8a3e7..a820ea7aa 100644
--- a/osfmk/i386/cpu_capabilities.h
+++ b/osfmk/i386/cpu_capabilities.h
@@ -53,11 +53,11 @@
 #define	kHasSSE4_1			0x00000400
 #define	kHasSSE4_2			0x00000800
 #define	kHasAES				0x00001000
-#define	kInOrderPipeline		0x00002000	/* in-order execution */
+#define	kInOrderPipeline		0x00002000
 #define	kSlow				0x00004000	/* tsc < nanosecond */
 #define	kUP				0x00008000	/* set if (kNumCPUs == 1) */
 #define	kNumCPUs			0x00FF0000	/* number of CPUs (see _NumCPUs() below) */
-
+#define	kHasAVX1_0			0x01000000
 #define	kNumCPUsShift			16		/* see _NumCPUs() below */
 
 #ifndef	__ASSEMBLER__
@@ -75,6 +75,13 @@ int _NumCPUs( void )
 
 #endif /* __ASSEMBLER__ */
 
+/* The following macro is used to generate the 64-bit commpage address for a given
+ * routine, based on its 32-bit address.  This is used in the kernel to compile
+ * the 64-bit commpage.  Since the kernel can be a 32-bit object, cpu_capabilities.h
+ * only defines the 32-bit address.
+ */
+#define	_COMM_PAGE_32_TO_64( ADDRESS )	( ADDRESS + _COMM_PAGE64_START_ADDRESS - _COMM_PAGE32_START_ADDRESS )
+
 
 /*
  * The shared kernel/user "comm page(s)":
@@ -86,15 +93,19 @@ int _NumCPUs( void )
  * Because Mach VM cannot map the last page of an address space, we don't use it.
  */
  
-#define	_COMM_PAGE32_AREA_LENGTH	( 19 * 4096 )				/* reserved length of entire comm area */
-#define _COMM_PAGE32_BASE_ADDRESS	( 0xfffec000 )				/* base address of allocated memory, -20 pages */
-#define _COMM_PAGE32_START_ADDRESS	( 0xffff0000 )				/* address traditional commpage code starts on, -16 pages */
-#define _COMM_PAGE32_AREA_USED		( 19 * 4096 )				/* this is the amt actually allocated */
-#define _COMM_PAGE32_SIGS_OFFSET	0x8000					/* offset to routine signatures */
+#define	_COMM_PAGE32_AREA_LENGTH	( 2 * 4096 )				/* reserved length of entire comm area */
+#define _COMM_PAGE32_BASE_ADDRESS	( 0xffff0000 )				/* base address of allocated memory */
+#define _COMM_PAGE32_START_ADDRESS	( _COMM_PAGE32_BASE_ADDRESS )	/* address traditional commpage code starts on */
+#define _COMM_PAGE32_AREA_USED		( 2 * 4096 )				/* this is the amt actually allocated */
+#define _COMM_PAGE32_SIGS_OFFSET	0x8000					    /* offset to routine signatures */
 
 #define	_COMM_PAGE64_AREA_LENGTH	( 2 * 1024 * 1024 )			/* reserved length of entire comm area (2MB) */
-#define _COMM_PAGE64_BASE_ADDRESS	( 0x00007fffffe00000ULL )		/* base address of allocated memory */
-#define _COMM_PAGE64_START_ADDRESS	( _COMM_PAGE64_BASE_ADDRESS )		/* address traditional commpage code starts on */
+#ifdef __ASSEMBLER__
+#define _COMM_PAGE64_BASE_ADDRESS	( 0x00007fffffe00000 )   /* base address of allocated memory */
+#else /* __ASSEMBLER__ */
+#define _COMM_PAGE64_BASE_ADDRESS	( 0x00007fffffe00000ULL )   /* base address of allocated memory */
+#endif /* __ASSEMBLER__ */
+#define _COMM_PAGE64_START_ADDRESS	( _COMM_PAGE64_BASE_ADDRESS )	/* address traditional commpage code starts on */
 #define _COMM_PAGE64_AREA_USED		( 2 * 4096 )				/* this is the amt actually populated */
 
 /* no need for an Objective-C area on Intel */
@@ -139,34 +150,25 @@ int _NumCPUs( void )
  
 #define _COMM_PAGE_SIGNATURE		(_COMM_PAGE_START_ADDRESS+0x000)	/* first few bytes are a signature */
 #define _COMM_PAGE_VERSION		(_COMM_PAGE_START_ADDRESS+0x01E)	/* 16-bit version# */
-#define _COMM_PAGE_THIS_VERSION		11					/* version of the commarea format */
+#define _COMM_PAGE_THIS_VERSION		12					/* version of the commarea format */
   
 #define _COMM_PAGE_CPU_CAPABILITIES	(_COMM_PAGE_START_ADDRESS+0x020)	/* uint32_t _cpu_capabilities */
-#define _COMM_PAGE_NCPUS		(_COMM_PAGE_START_ADDRESS+0x022)	/* uint8_t number of configured CPUs */
+#define _COMM_PAGE_NCPUS		(_COMM_PAGE_START_ADDRESS+0x022)	/* uint8_t number of configured CPUs (hw.logicalcpu at boot time) */
+#define _COMM_PAGE_UNUSED0			(_COMM_PAGE_START_ADDRESS+0x024)	/* 2 unused bytes, reserved for future expansion of cpu_capabilities */
 #define _COMM_PAGE_CACHE_LINESIZE	(_COMM_PAGE_START_ADDRESS+0x026)	/* uint16_t cache line size */
 
 #define _COMM_PAGE_SCHED_GEN		(_COMM_PAGE_START_ADDRESS+0x028)	/* uint32_t scheduler generation number (count of pre-emptions) */
 #define _COMM_PAGE_MEMORY_PRESSURE	(_COMM_PAGE_START_ADDRESS+0x02c)	/* uint32_t copy of vm_memory_pressure */
 #define	_COMM_PAGE_SPIN_COUNT		(_COMM_PAGE_START_ADDRESS+0x030)	/* uint32_t max spin count for mutex's */
 
-#define _COMM_PAGE_UNUSED1		(_COMM_PAGE_START_ADDRESS+0x034)	/* 12 unused bytes */
+#define _COMM_PAGE_ACTIVE_CPUS		(_COMM_PAGE_START_ADDRESS+0x034)	/* uint8_t number of active CPUs (hw.activecpu) */
+#define _COMM_PAGE_PHYSICAL_CPUS	(_COMM_PAGE_START_ADDRESS+0x035)	/* uint8_t number of physical CPUs (hw.physicalcpu_max) */
+#define _COMM_PAGE_LOGICAL_CPUS	(_COMM_PAGE_START_ADDRESS+0x036)	/* uint8_t number of logical CPUs (hw.logicalcpu_max) */
+#define _COMM_PAGE_UNUSED1		(_COMM_PAGE_START_ADDRESS+0x037)	/* 1 unused bytes */
+#define _COMM_PAGE_MEMORY_SIZE		(_COMM_PAGE_START_ADDRESS+0x038)	/* uint64_t max memory size */
 
-#ifdef KERNEL_PRIVATE
-
-/* slots defined in all cases, but commpage setup code must not populate for 64-bit commpage */
-#define _COMM_PAGE_2_TO_52		(_COMM_PAGE_START_ADDRESS+0x040)	/* double float constant 2**52 */
-#define _COMM_PAGE_10_TO_6		(_COMM_PAGE_START_ADDRESS+0x048)	/* double float constant 10**6 */
-
-#else /* !KERNEL_PRIVATE */
-
-#if defined(__i386__)								/* following are not defined in 64-bit */
-#define _COMM_PAGE_2_TO_52		(_COMM_PAGE_START_ADDRESS+0x040)	/* double float constant 2**52 */
-#define _COMM_PAGE_10_TO_6		(_COMM_PAGE_START_ADDRESS+0x048)	/* double float constant 10**6 */
-#else
-#define _COMM_PAGE_UNUSED2		(_COMM_PAGE_START_ADDRESS+0x040)	/* 16 unused bytes */
-#endif
-
-#endif /* !KERNEL_PRIVATE */
+#define _COMM_PAGE_CPUFAMILY		(_COMM_PAGE_START_ADDRESS+0x040)	/* uint32_t hw.cpufamily, x86*/
+#define _COMM_PAGE_UNUSED2		(_COMM_PAGE_START_ADDRESS+0x044)	/* [0x44,0x50) unused */
 
 #define	_COMM_PAGE_TIME_DATA_START	(_COMM_PAGE_START_ADDRESS+0x050)	/* base of offsets below (_NT_SCALE etc) */
 #define _COMM_PAGE_NT_TSC_BASE		(_COMM_PAGE_START_ADDRESS+0x050)	/* used by nanotime() */
@@ -194,49 +196,10 @@ int _NumCPUs( void )
  /* When new jump table entries are added, corresponding symbols should be added below        */
  /* New slots should be allocated with at least 16-byte alignment. Some like bcopy require    */
  /* 32-byte alignment, and should be aligned as such in the assembly source before they are relocated */
-#define _COMM_PAGE_COMPARE_AND_SWAP32   (_COMM_PAGE_START_ADDRESS+0x080)	/* compare-and-swap word */
-#define _COMM_PAGE_COMPARE_AND_SWAP64   (_COMM_PAGE_START_ADDRESS+0x0c0)	/* compare-and-swap doubleword */
-#define _COMM_PAGE_ENQUEUE              (_COMM_PAGE_START_ADDRESS+0x100)	/* enqueue */
-#define _COMM_PAGE_DEQUEUE              (_COMM_PAGE_START_ADDRESS+0x140)	/* dequeue */
-#define _COMM_PAGE_MEMORY_BARRIER       (_COMM_PAGE_START_ADDRESS+0x180)	/* memory barrier */
-#define _COMM_PAGE_ATOMIC_ADD32         (_COMM_PAGE_START_ADDRESS+0x1a0)	/* add atomic word */
-#define _COMM_PAGE_ATOMIC_ADD64         (_COMM_PAGE_START_ADDRESS+0x1c0)	/* add atomic doubleword */
-
-#define	_COMM_PAGE_CPU_NUMBER		(_COMM_PAGE_START_ADDRESS+0x1e0)	/* user-level cpu_number() */
-
-#define _COMM_PAGE_ABSOLUTE_TIME	(_COMM_PAGE_START_ADDRESS+0x200)	/* mach_absolute_time() */
-#define _COMM_PAGE_SPINLOCK_TRY		(_COMM_PAGE_START_ADDRESS+0x220)	/* spinlock_try() */
-#define _COMM_PAGE_SPINLOCK_LOCK	(_COMM_PAGE_START_ADDRESS+0x260)	/* spinlock_lock() */
-#define _COMM_PAGE_SPINLOCK_UNLOCK	(_COMM_PAGE_START_ADDRESS+0x2a0)	/* spinlock_unlock() */
-#define _COMM_PAGE_PTHREAD_GETSPECIFIC  (_COMM_PAGE_START_ADDRESS+0x2c0)	/* pthread_getspecific() */
-#define _COMM_PAGE_GETTIMEOFDAY		(_COMM_PAGE_START_ADDRESS+0x2e0)	/* used by gettimeofday() */
-#define _COMM_PAGE_FLUSH_DCACHE		(_COMM_PAGE_START_ADDRESS+0x4e0)	/* sys_dcache_flush() */
-#define _COMM_PAGE_FLUSH_ICACHE		(_COMM_PAGE_START_ADDRESS+0x520)	/* sys_icache_invalidate() */
-#define _COMM_PAGE_PTHREAD_SELF		(_COMM_PAGE_START_ADDRESS+0x580)	/* pthread_self() */
+#define _COMM_PAGE_TEXT_START           (_COMM_PAGE_START_ADDRESS+0x080)    /* start of text section */
 
 #define _COMM_PAGE_PREEMPT		(_COMM_PAGE_START_ADDRESS+0x5a0)	/* used by PFZ code */
-
-#define _COMM_PAGE_RELINQUISH		(_COMM_PAGE_START_ADDRESS+0x5c0)	/* used by spinlocks */ 
-#define _COMM_PAGE_BTS		        (_COMM_PAGE_START_ADDRESS+0x5e0)	/* bit test-and-set */
-#define _COMM_PAGE_BTC			(_COMM_PAGE_START_ADDRESS+0x5f0)	/* bit test-and-clear */
- 
-#define _COMM_PAGE_BZERO		(_COMM_PAGE_START_ADDRESS+0x600)	/* bzero() */
-#define _COMM_PAGE_BCOPY		(_COMM_PAGE_START_ADDRESS+0x780)	/* bcopy() */
-#define	_COMM_PAGE_MEMCPY		(_COMM_PAGE_START_ADDRESS+0x7a0)	/* memcpy() */
-#define	_COMM_PAGE_MEMMOVE		(_COMM_PAGE_START_ADDRESS+0x7a0)	/* memmove() */
-#define	_COMM_PAGE_BCOPY_END		(_COMM_PAGE_START_ADDRESS+0xfff)	/* used by rosetta */
-
-#define	_COMM_PAGE_MEMSET_PATTERN       (_COMM_PAGE_START_ADDRESS+0x1000)	/* used by nonzero memset() */
-#define	_COMM_PAGE_LONGCOPY		(_COMM_PAGE_START_ADDRESS+0x1200)	/* used by bcopy() for very long operands */
-#define	_COMM_PAGE_LONGCOPY_END		(_COMM_PAGE_START_ADDRESS+0x15ff)	/* used by rosetta */
-
 #define _COMM_PAGE_BACKOFF		(_COMM_PAGE_START_ADDRESS+0x1600)	/* called from PFZ */
-#define _COMM_PAGE_FIFO_ENQUEUE		(_COMM_PAGE_START_ADDRESS+0x1680)	/* FIFO enqueue */
-#define _COMM_PAGE_FIFO_DEQUEUE		(_COMM_PAGE_START_ADDRESS+0x16c0)	/* FIFO dequeue */
-#define	_COMM_PAGE_NANOTIME		(_COMM_PAGE_START_ADDRESS+0x1700)	/* nanotime() */
-#define	_COMM_PAGE_MUTEX_LOCK		(_COMM_PAGE_START_ADDRESS+0x1780)	/* pthread_mutex_lock() */
-
-#define	_COMM_PAGE_UNUSED5		(_COMM_PAGE_START_ADDRESS+0x17e0)	/* unused space for regular code up to 0x1c00 */
 
 #define _COMM_PAGE_PFZ_START		(_COMM_PAGE_START_ADDRESS+0x1c00)	/* start of Preemption Free Zone */
 
@@ -265,38 +228,8 @@ symbol_name: nop
 
 	.text		/* Required to make a well behaved symbol file */
 
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32, _COMM_PAGE_COMPARE_AND_SWAP32)
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64, _COMM_PAGE_COMPARE_AND_SWAP64)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_enqueue, _COMM_PAGE_ENQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_dequeue, _COMM_PAGE_DEQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___memory_barrier, _COMM_PAGE_MEMORY_BARRIER)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_add32, _COMM_PAGE_ATOMIC_ADD32)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_add64, _COMM_PAGE_ATOMIC_ADD64)
-	CREATE_COMM_PAGE_SYMBOL(___cpu_number, _COMM_PAGE_CPU_NUMBER)
-	CREATE_COMM_PAGE_SYMBOL(___mach_absolute_time, _COMM_PAGE_ABSOLUTE_TIME)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock_try, _COMM_PAGE_SPINLOCK_TRY)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock, _COMM_PAGE_SPINLOCK_LOCK)
-	CREATE_COMM_PAGE_SYMBOL(___spin_unlock, _COMM_PAGE_SPINLOCK_UNLOCK)
-	CREATE_COMM_PAGE_SYMBOL(___pthread_getspecific, _COMM_PAGE_PTHREAD_GETSPECIFIC)
-	CREATE_COMM_PAGE_SYMBOL(___gettimeofday, _COMM_PAGE_GETTIMEOFDAY)
-	CREATE_COMM_PAGE_SYMBOL(___sys_dcache_flush, _COMM_PAGE_FLUSH_DCACHE)
-	CREATE_COMM_PAGE_SYMBOL(___sys_icache_invalidate, _COMM_PAGE_FLUSH_ICACHE)
-	CREATE_COMM_PAGE_SYMBOL(___pthread_self, _COMM_PAGE_PTHREAD_SELF)
-	CREATE_COMM_PAGE_SYMBOL(___pfz_preempt, _COMM_PAGE_PREEMPT)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock_relinquish, _COMM_PAGE_RELINQUISH)
-	CREATE_COMM_PAGE_SYMBOL(___bit_test_and_set, _COMM_PAGE_BTS)
-	CREATE_COMM_PAGE_SYMBOL(___bit_test_and_clear, _COMM_PAGE_BTC)
-	CREATE_COMM_PAGE_SYMBOL(___bzero, _COMM_PAGE_BZERO)
-	CREATE_COMM_PAGE_SYMBOL(___bcopy, _COMM_PAGE_BCOPY)
-	CREATE_COMM_PAGE_SYMBOL(___memcpy, _COMM_PAGE_MEMCPY)
-/*	CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) */
-	CREATE_COMM_PAGE_SYMBOL(___memset_pattern, _COMM_PAGE_MEMSET_PATTERN)
-	CREATE_COMM_PAGE_SYMBOL(___longcopy, _COMM_PAGE_LONGCOPY)
+	CREATE_COMM_PAGE_SYMBOL(___preempt, _COMM_PAGE_PREEMPT)
 	CREATE_COMM_PAGE_SYMBOL(___backoff, _COMM_PAGE_BACKOFF)
-	CREATE_COMM_PAGE_SYMBOL(___fifo_enqueue, _COMM_PAGE_FIFO_ENQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___fifo_dequeue, _COMM_PAGE_FIFO_DEQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___nanotime, _COMM_PAGE_NANOTIME)
-	CREATE_COMM_PAGE_SYMBOL(___mutex_lock, _COMM_PAGE_MUTEX_LOCK)
 	CREATE_COMM_PAGE_SYMBOL(___pfz_enqueue, _COMM_PAGE_PFZ_ENQUEUE)
 	CREATE_COMM_PAGE_SYMBOL(___pfz_dequeue, _COMM_PAGE_PFZ_DEQUEUE)
 	CREATE_COMM_PAGE_SYMBOL(___pfz_mutex_lock, _COMM_PAGE_PFZ_MUTEX_LOCK)
diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h
index 63eb4446b..22de8b2b0 100644
--- a/osfmk/i386/cpu_data.h
+++ b/osfmk/i386/cpu_data.h
@@ -43,7 +43,7 @@
 #include <pexpert/pexpert.h>
 #include <mach/i386/thread_status.h>
 #include <mach/i386/vm_param.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 #include <i386/pmCPU.h>
 #include <i386/cpu_topology.h>
 
@@ -51,6 +51,8 @@
 #include <i386/vmx/vmx_cpu.h>
 #endif
 
+#include <machine/pal_routines.h>
+
 /*
  * Data structures referenced (anonymously) from per-cpu data:
  */
@@ -58,14 +60,13 @@ struct cpu_cons_buffer;
 struct cpu_desc_table;
 struct mca_state;
 
-
 /*
  * Data structures embedded in per-cpu data:
  */
 typedef struct rtclock_timer {
-	queue_head_t	queue;
+	mpqueue_head_t		queue;
 	uint64_t		deadline;
-	boolean_t		is_set;
+	uint64_t		when_set;
 	boolean_t		has_expired;
 } rtclock_timer_t;
 
@@ -125,9 +126,11 @@ typedef enum {
 typedef struct {
 	addr64_t	cu_isf;		/* thread->pcb->iss.isf */
 	uint64_t	cu_tmp;		/* temporary scratch */	
-        addr64_t	cu_user_gs_base;
+	addr64_t	cu_user_gs_base;
 } cpu_uber_t;
 
+typedef	uint16_t	pcid_t;
+typedef	uint8_t		pcid_ref_t;
 /*
  * Per-cpu data.
  *
@@ -143,16 +146,17 @@ typedef struct {
  */
 typedef struct cpu_data
 {
+	struct pal_cpu_data	cpu_pal_data;		/* PAL-specific data */
+#define				cpu_pd cpu_pal_data	/* convenience alias */
 	struct cpu_data		*cpu_this;		/* pointer to myself */
 	thread_t		cpu_active_thread;
+	int			cpu_preemption_level;
+	int			cpu_number;		/* Logical CPU */
 	void			*cpu_int_state;		/* interrupt state */
 	vm_offset_t		cpu_active_stack;	/* kernel stack base */
 	vm_offset_t		cpu_kernel_stack;	/* kernel stack top */
 	vm_offset_t		cpu_int_stack_top;
-	int			cpu_preemption_level;
-	int			cpu_simple_lock_count;
 	int			cpu_interrupt_level;
-	int			cpu_number;		/* Logical CPU */
 	int			cpu_phys_number;	/* Physical CPU */
 	cpu_id_t		cpu_id;			/* Platform Expert */
 	int			cpu_signals;		/* IPI events */
@@ -167,9 +171,16 @@ typedef struct cpu_data
 	int			cpu_running;
 	rtclock_timer_t		rtclock_timer;
 	boolean_t		cpu_is64bit;
-	task_map_t		cpu_task_map;
+	volatile addr64_t	cpu_active_cr3 __attribute((aligned(64)));
+	union {
+		volatile uint32_t cpu_tlb_invalid;
+		struct {
+			volatile uint16_t cpu_tlb_invalid_local;
+			volatile uint16_t cpu_tlb_invalid_global;
+		};
+	};
+	volatile task_map_t	cpu_task_map;
 	volatile addr64_t	cpu_task_cr3;
-	volatile addr64_t	cpu_active_cr3;
 	addr64_t		cpu_kernel_cr3;
 	cpu_uber_t		cpu_uber;
 	void			*cpu_chud;
@@ -195,20 +206,17 @@ typedef struct cpu_data
 	boolean_t		cpu_iflag;
 	boolean_t		cpu_boot_complete;
 	int			cpu_hibernate;
-
 #if NCOPY_WINDOWS > 0
 	vm_offset_t		cpu_copywindow_base;
 	uint64_t		*cpu_copywindow_pdp;
 
 	vm_offset_t		cpu_physwindow_base;
 	uint64_t		*cpu_physwindow_ptep;
-	void 			*cpu_hi_iss;
 #endif
+	void 			*cpu_hi_iss;
 
-
-
-	volatile boolean_t	cpu_tlb_invalid;
-	uint32_t		cpu_hwIntCnt[256];	/* Interrupt counts */
+#define HWINTCNT_SIZE 256
+	uint32_t		cpu_hwIntCnt[HWINTCNT_SIZE];	/* Interrupt counts */
 	uint64_t		cpu_dr7; /* debug control register */
 	uint64_t		cpu_int_event_time;	/* intr entry/exit time */
 #if CONFIG_VMX
@@ -226,11 +234,26 @@ typedef struct cpu_data
 							   * arg store
 							   * validity flag.
 							   */
-	rtc_nanotime_t		*cpu_nanotime;		/* Nanotime info */
+	pal_rtc_nanotime_t	*cpu_nanotime;		/* Nanotime info */
 	thread_t		csw_old_thread;
 	thread_t		csw_new_thread;
-	uint64_t		cpu_max_observed_int_latency;
-	int			cpu_max_observed_int_latency_vector;
+#if	defined(__x86_64__)
+	uint32_t		cpu_pmap_pcid_enabled;
+	pcid_t			cpu_active_pcid;
+	pcid_t			cpu_last_pcid;
+	volatile pcid_ref_t	*cpu_pmap_pcid_coherentp;
+	volatile pcid_ref_t	*cpu_pmap_pcid_coherentp_kernel;
+#define	PMAP_PCID_MAX_PCID      (0x1000)
+	pcid_t			cpu_pcid_free_hint;
+	pcid_ref_t		cpu_pcid_refcounts[PMAP_PCID_MAX_PCID];
+	pmap_t			cpu_pcid_last_pmap_dispatched[PMAP_PCID_MAX_PCID];
+#ifdef	PCID_STATS
+	uint64_t		cpu_pmap_pcid_flushes;
+	uint64_t		cpu_pmap_pcid_preserves;
+#endif
+#endif /* x86_64 */
+	uint64_t                cpu_max_observed_int_latency;
+	int                     cpu_max_observed_int_latency_vector;
 	uint64_t		debugger_entry_time;
 	volatile boolean_t	cpu_NMI_acknowledged;
 	/* A separate nested interrupt stack flag, to account
@@ -240,6 +263,8 @@ typedef struct cpu_data
 	 */
 	uint32_t		cpu_nested_istack;
 	uint32_t		cpu_nested_istack_events;
+	x86_saved_state64_t	*cpu_fatal_trap_state;
+	x86_saved_state64_t	*cpu_post_fatal_trap_state;
 } cpu_data_t;
 
 extern cpu_data_t	*cpu_data_ptr[];  
@@ -256,6 +281,24 @@ extern cpu_data_t	cpu_data_master;
 		: "i" (offsetof(cpu_data_t,member)));			\
 	return ret;
 
+#define CPU_DATA_GET_INDEX(member,index,type)	\
+	type ret;							\
+	__asm__ volatile ("mov %%gs:(%1),%0"				\
+		: "=r" (ret)						\
+		: "r" (offsetof(cpu_data_t,member[index])));			\
+	return ret;
+
+#define CPU_DATA_SET(member,value)					\
+	__asm__ volatile ("mov %0,%%gs:%P1"				\
+		:							\
+		: "r" (value), "i" (offsetof(cpu_data_t,member)));
+#define CPU_DATA_XCHG(member,value,type)				\
+	type ret;							\
+	__asm__ volatile ("xchg %0,%%gs:%P1"				\
+		: "=r" (ret)						\
+		: "i" (offsetof(cpu_data_t,member)), "0" (value));	\
+	return ret;
+
 /*
  * Everyone within the osfmk part of the kernel can use the fast
  * inline versions of these routines.  Everyone outside, must call
@@ -269,14 +312,14 @@ get_active_thread(void)
 #define current_thread_fast()		get_active_thread()
 #define current_thread()		current_thread_fast()
 
-#if defined(__i386__)
 static inline boolean_t
 get_is64bit(void)
 {
 	CPU_DATA_GET(cpu_is64bit, boolean_t)
 }
+#if CONFIG_YONAH
 #define cpu_mode_is64bit()		get_is64bit()
-#elif defined(__x86_64__)
+#else
 #define cpu_mode_is64bit()		TRUE
 #endif
 
@@ -286,11 +329,6 @@ get_preemption_level(void)
 	CPU_DATA_GET(cpu_preemption_level,int)
 }
 static inline int
-get_simple_lock_count(void)
-{
-	CPU_DATA_GET(cpu_simple_lock_count,int)
-}
-static inline int
 get_interrupt_level(void)
 {
 	CPU_DATA_GET(cpu_interrupt_level,int)
diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c
index b836ba88a..c6891aefb 100644
--- a/osfmk/i386/cpuid.c
+++ b/osfmk/i386/cpuid.c
@@ -235,7 +235,7 @@ static void cpuid_fn(uint32_t selector, uint32_t *result)
 #else
 static void cpuid_fn(uint32_t selector, uint32_t *result)
 {
-	if (cpu_mode_is64bit()) {
+	if (get_is64bit()) {
 	       asm("call _cpuid64"
 			: "=a" (result[0]),
 			  "=b" (result[1]),
@@ -353,7 +353,15 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
 			info_p->cache_sharing[type] = cache_sharing;
 			info_p->cache_partitions[type] = cache_partitions;
 			linesizes[type] = cache_linesize;
-			
+
+			/*
+			 * Overwrite associativity determined via
+			 * CPUID.0x80000006 -- this leaf is more
+			 * accurate
+			 */
+			if (type == L2U)
+				info_p->cpuid_cache_L2_associativity = cache_associativity;
+
 			/* Compute the number of page colors for this cache,
 			 * which is:
 			 *	( linesize * sets ) / page_size
@@ -501,10 +509,24 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
     
 	/* Get cache and addressing info. */
 	if (info_p->cpuid_max_ext >= 0x80000006) {
+		uint32_t assoc;
 		cpuid_fn(0x80000006, reg);
 		info_p->cpuid_cache_linesize   = bitfield32(reg[ecx], 7, 0);
-		info_p->cpuid_cache_L2_associativity =
-						 bitfield32(reg[ecx],15,12);
+		assoc = bitfield32(reg[ecx],15,12);
+		/*
+		 * L2 associativity is encoded, though in an insufficiently
+		 * descriptive fashion, e.g. 24-way is mapped to 16-way.
+		 * Represent a fully associative cache as 0xFFFF.
+		 * Overwritten by associativity as determined via CPUID.4
+		 * if available.
+		 */
+		if (assoc == 6)
+			assoc = 8;
+		else if (assoc == 8)
+			assoc = 16;
+		else if (assoc == 0xF)
+			assoc = 0xFFFF;
+		info_p->cpuid_cache_L2_associativity = assoc;
 		info_p->cpuid_cache_size       = bitfield32(reg[ecx],31,16);
 		cpuid_fn(0x80000008, reg);
 		info_p->cpuid_address_bits_physical =
@@ -513,8 +535,15 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
 						 bitfield32(reg[eax],15, 8);
 	}
 
-	/* get processor signature and decode */
+	/*
+	 * Get processor signature and decode
+	 * and bracket this with the approved procedure for reading the
+	 * the microcode version number a.k.a. signature a.k.a. BIOS ID
+	 */
+	wrmsr64(MSR_IA32_BIOS_SIGN_ID, 0);
 	cpuid_fn(1, reg);
+	info_p->cpuid_microcode_version =
+		(uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
 	info_p->cpuid_signature = reg[eax];
 	info_p->cpuid_stepping  = bitfield32(reg[eax],  3,  0);
 	info_p->cpuid_model     = bitfield32(reg[eax],  7,  4);
@@ -525,6 +554,9 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
 	info_p->cpuid_brand     = bitfield32(reg[ebx],  7,  0);
 	info_p->cpuid_features  = quad(reg[ecx], reg[edx]);
 
+	/* Get "processor flag"; necessary for microcode update matching */
+	info_p->cpuid_processor_flag = (rdmsr64(MSR_IA32_PLATFORM_ID)>> 50) & 3;
+
 	/* Fold extensions into family/model */
 	if (info_p->cpuid_family == 0x0f)
 		info_p->cpuid_family += info_p->cpuid_extfamily;
@@ -550,10 +582,6 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
 				reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI;
 	}
 
-	/* Find the microcode version number a.k.a. signature a.k.a. BIOS ID */
-        info_p->cpuid_microcode_version =
-                (uint32_t) (rdmsr64(MSR_IA32_BIOS_SIGN_ID) >> 32);
-
 	if (info_p->cpuid_max_basic >= 0x5) {
 		cpuid_mwait_leaf_t	*cmp = &info_p->cpuid_mwait_leaf;
 
@@ -625,12 +653,11 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p)
 	switch (info_p->cpuid_family) {
 	case 6:
 		switch (info_p->cpuid_model) {
-		case 13:
-			cpufamily = CPUFAMILY_INTEL_6_13;
-			break;
+#if CONFIG_YONAH
 		case 14:
 			cpufamily = CPUFAMILY_INTEL_YONAH;
 			break;
+#endif
 		case 15:
 			cpufamily = CPUFAMILY_INTEL_MEROM;
 			break;
@@ -681,7 +708,7 @@ cpuid_set_info(void)
 
 	info_p->cpuid_cpu_type = CPU_TYPE_X86;
 	info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1;
-
+	/* Must be invoked after set_generic_info */
 	cpuid_set_cache_info(&cpuid_cpu_info);
 
 	/*
@@ -764,11 +791,11 @@ static struct {
 	{CPUID_FEATURE_MOVBE,     "MOVBE"},
 	{CPUID_FEATURE_POPCNT,    "POPCNT"},
 	{CPUID_FEATURE_AES,       "AES"},
+	{CPUID_FEATURE_VMM,       "VMM"},
+	{CPUID_FEATURE_PCID,      "PCID"},
 	{CPUID_FEATURE_XSAVE,     "XSAVE"},
 	{CPUID_FEATURE_OSXSAVE,   "OSXSAVE"},
-	{CPUID_FEATURE_VMM,       "VMM"},
 	{CPUID_FEATURE_SEGLIM64,  "SEGLIM64"},
-	{CPUID_FEATURE_PCID,      "PCID"},
 	{CPUID_FEATURE_TSCTMR,    "TSCTMR"},
 	{CPUID_FEATURE_AVX1_0,    "AVX1.0"},
 	{0, 0}
diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h
index 4c3c329c0..51bd428f6 100644
--- a/osfmk/i386/cpuid.h
+++ b/osfmk/i386/cpuid.h
@@ -147,7 +147,6 @@
 #define CPUID_MODEL_DALES_32NM	0x25	/* Clarkdale, Arrandale */
 #define CPUID_MODEL_WESTMERE	0x2C	/* Gulftown, Westmere-EP, Westmere-WS */
 #define CPUID_MODEL_WESTMERE_EX	0x2F
-/* Additional internal models go here */
 #define CPUID_MODEL_SANDYBRIDGE	0x2A
 #define CPUID_MODEL_JAKETOWN	0x2D
 
@@ -266,6 +265,7 @@ typedef struct {
 	uint64_t	cpuid_extfeatures;
 	uint32_t	cpuid_signature;
 	uint8_t   	cpuid_brand; 
+	uint8_t		cpuid_processor_flag;
 	
 	uint32_t	cache_size[LCACHE_MAX];
 	uint32_t	cache_linesize;
diff --git a/osfmk/i386/cswitch.s b/osfmk/i386/cswitch.s
index 3110cc2c6..6651e5404 100644
--- a/osfmk/i386/cswitch.s
+++ b/osfmk/i386/cswitch.s
@@ -60,15 +60,9 @@
 
 #include <i386/asm.h>
 #include <i386/proc_reg.h>
+#include <i386/mp.h>
 #include <assym.s>
 
-#ifdef	SYMMETRY
-#include <sqt/asm_macros.h>
-#endif
-
-#if	AT386
-#include <i386/mp.h>
-#endif	/* AT386 */
 
 #define	CX(addr, reg)	addr(,reg,4)
 
@@ -80,7 +74,7 @@
 Entry(Load_context)
 	movl	S_ARG0,%ecx			/* get thread */
 	movl	TH_KERNEL_STACK(%ecx),%ecx	/* get kernel stack */
-	lea	-IKS_SIZE-IEL_SIZE(%ecx),%edx
+	lea	-IKS_SIZE(%ecx),%edx
 	add	EXT(kernel_stack_size),%edx		/* point to stack top */
 	movl	%ecx,%gs:CPU_ACTIVE_STACK	/* store stack address */
 	movl	%edx,%gs:CPU_KERNEL_STACK	/* store stack top */
@@ -116,7 +110,7 @@ Entry(Switch_context)
 	movl	8(%esp),%ecx			/* get new thread */
 	movl    %ecx,%gs:CPU_ACTIVE_THREAD      /* new thread is active */
 	movl	TH_KERNEL_STACK(%ecx),%ebx	/* get its kernel stack */
-	lea	-IKS_SIZE-IEL_SIZE(%ebx),%ecx
+	lea	-IKS_SIZE(%ebx),%ecx
 	add	EXT(kernel_stack_size),%ecx
 						/* point to stack top */
 
diff --git a/osfmk/i386/db_interface.c b/osfmk/i386/db_interface.c
index e4c025bdf..9e76b5406 100644
--- a/osfmk/i386/db_interface.c
+++ b/osfmk/i386/db_interface.c
@@ -806,6 +806,7 @@ db_machdep_init(void)
 	int c;
 
 	db_simple_lock_init(&kdb_lock, 0);
+#if MACH_KDB /*this only works for legacy 32-bit machines */
 	for (c = 0; c < real_ncpus; ++c) {
 		if (c == master_cpu) {
 			master_dbtss.esp0 = (int)(db_task_stack_store +
@@ -818,6 +819,7 @@ db_machdep_init(void)
 			 */
 		}
 	}
+#endif
 }
 
 /*
diff --git a/osfmk/i386/db_machdep.h b/osfmk/i386/db_machdep.h
index ca046869d..e57dfca36 100644
--- a/osfmk/i386/db_machdep.h
+++ b/osfmk/i386/db_machdep.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -187,7 +187,7 @@ extern void		db_task_name(
 
 /* macro for checking if a thread has used floating-point */
 
-#define db_act_fp_used(act)	(act && act->machine.pcb->ifps)
+#define db_act_fp_used(act)	(act && act->machine.ifps)
 
 extern void		db_tss_to_frame(
 				int			tss_sel,
diff --git a/osfmk/i386/db_trace.c b/osfmk/i386/db_trace.c
index a14bb16b5..136418ea2 100644
--- a/osfmk/i386/db_trace.c
+++ b/osfmk/i386/db_trace.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -255,9 +255,9 @@ db_i386_reg_value(
 		}
 	    }
 	    if (dp == 0) {
-		if (!thr_act || thr_act->machine.pcb == 0)
+		if (!thr_act)
 		    db_error("no pcb\n");
-		dp = (unsigned int *)((unsigned int)(thr_act->machine.pcb->iss) + 
+		dp = (unsigned int *)((unsigned int)(thr_act->machine.iss) + 
 			     ((unsigned int)vp->valuep - (unsigned int)&ddb_regs));
 	    }
 	}
@@ -409,8 +409,8 @@ db_nextframe(
 		break;
 
 	case SYSCALL:
-		if (thr_act != THREAD_NULL && thr_act->machine.pcb) {
-			iss32 = (x86_saved_state32_t *)thr_act->machine.pcb->iss;
+		if (thr_act != THREAD_NULL) {
+			iss32 = (x86_saved_state32_t *)thr_act->machine.iss;
 
 			*ip = (db_addr_t)(iss32->eip);
 			*fp = (struct i386_frame *)(iss32->ebp);
@@ -548,10 +548,6 @@ next_thread:
 	        frame = (struct i386_frame *)ddb_regs.ebp;
 	        callpc = (db_addr_t)ddb_regs.eip;
 	    } else {
-		if (th->machine.pcb == 0) {
-		    db_printf("thread has no pcb\n");
-		    return;
-		}
 		if (!th) {
 		    db_printf("thread has no shuttle\n");
 
@@ -565,7 +561,7 @@ next_thread:
 							DB_STGY_PROC, task);
 		    db_printf("\n");
 
-		    iss32 = (x86_saved_state32_t *)th->machine.pcb->iss;
+		    iss32 = (x86_saved_state32_t *)th->machine.iss;
 
 			frame = (struct i386_frame *) (iss32->ebp);
 			callpc = (db_addr_t) (iss32->eip);
@@ -586,7 +582,7 @@ next_thread:
 			     * which is not the top_most one in the RPC chain:
 			     * use the activation's pcb.
 			     */
-		            iss32 = (x86_saved_state32_t *)th->machine.pcb->iss;
+		            iss32 = (x86_saved_state32_t *)th->machine.iss;
 
 				    frame = (struct i386_frame *) (iss32->ebp);
 				    callpc = (db_addr_t) (iss32->eip);
diff --git a/osfmk/i386/endian.h b/osfmk/i386/endian.h
index 88fbc2cd5..62dedd9e9 100644
--- a/osfmk/i386/endian.h
+++ b/osfmk/i386/endian.h
@@ -50,8 +50,8 @@
  */
 
 #if !defined(ntohs)
-unsigned short	ntohs(unsigned short);
-extern __inline__
+static __inline__ unsigned short	ntohs(unsigned short);
+static __inline__
 unsigned short
 ntohs(unsigned short w_int)
 {
@@ -67,8 +67,8 @@ unsigned short	htons(unsigned short);
 #endif
 
 #if !defined(ntohl)
-unsigned long	ntohl(unsigned long);
-extern __inline__
+static __inline__ unsigned long	ntohl(unsigned long);
+static __inline__
 unsigned long
 ntohl(register unsigned long value)
 {
diff --git a/osfmk/i386/etimer.c b/osfmk/i386/etimer.c
index 72d3c94b7..c196f8b9f 100644
--- a/osfmk/i386/etimer.c
+++ b/osfmk/i386/etimer.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,40 +76,46 @@ etimer_intr(int		user_mode,
 
 	pp = current_cpu_datap();
 
-	abstime = mach_absolute_time();			/* Get the time now */
+	SCHED_STATS_TIMER_POP(current_processor());
+
+	abstime = mach_absolute_time();		/* Get the time now */
 
 	/* has a pending clock timer expired? */
-	mytimer = &pp->rtclock_timer;
+	mytimer = &pp->rtclock_timer;		/* Point to the event timer */
 	if (mytimer->deadline <= abstime) {
-	    	/*
+		/*
 		 * Log interrupt service latency (-ve value expected by tool)
 		 * a non-PM event is expected next.
+		 * The requested deadline may be earlier than when it was set 
+		 * - use MAX to avoid reporting bogus latencies.
 		 */
-	    	latency = (int32_t) (abstime - mytimer->deadline);
+		latency = (int32_t) (abstime - MAX(mytimer->deadline,
+						   mytimer->when_set));
 		KERNEL_DEBUG_CONSTANT(
-		    MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
-		    -latency,
-		    (uint32_t)rip, user_mode, 0, 0);
+			DECR_TRAP_LATENCY | DBG_FUNC_NONE,
+			-latency, rip, user_mode, 0, 0);
 
-		mytimer->has_expired = TRUE;		/* Remember that we popped */
+		mytimer->has_expired = TRUE;	/* Remember that we popped */
 		mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
 		mytimer->has_expired = FALSE;
 
-		/* Get the time again since we ran for a bit */
+		/* Get the time again since we ran a bit */
 		abstime = mach_absolute_time();
+		mytimer->when_set = abstime;
 	}
 
 	/* is it time for power management state change? */
 	if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
 	        KERNEL_DEBUG_CONSTANT(
-		    MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START,
-		    0, 0, 0, 0, 0);
+			DECR_PM_DEADLINE | DBG_FUNC_START,
+			0, 0, 0, 0, 0);
 		pmCPUDeadline(pp);
 	        KERNEL_DEBUG_CONSTANT(
-		    MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END,
-		    0, 0, 0, 0, 0);
+			DECR_PM_DEADLINE | DBG_FUNC_END,
+			0, 0, 0, 0, 0);
 	}
 
+	/* schedule our next deadline */
 	etimer_resync_deadlines();
 }
 
@@ -126,7 +132,8 @@ void etimer_set_deadline(uint64_t deadline)
 	pp = current_cpu_datap();
 
 	mytimer = &pp->rtclock_timer;		/* Point to the timer itself */
-	mytimer->deadline = deadline;		/* Set the new expiration time */
+	mytimer->deadline = deadline;		/* Set new expiration time */
+	mytimer->when_set = mach_absolute_time();
 
 	etimer_resync_deadlines();
 
@@ -164,7 +171,7 @@ etimer_resync_deadlines(void)
 	 */
 	pmdeadline = pmCPUGetDeadline(pp);
 	if (0 < pmdeadline && pmdeadline < deadline)
-	    deadline = pmdeadline;
+		deadline = pmdeadline;
 
 	/*
 	 * Go and set the "pop" event.
@@ -173,10 +180,10 @@ etimer_resync_deadlines(void)
 
 	/* Record non-PM deadline for latency tool */
 	if (deadline != pmdeadline) {
-	    	KERNEL_DEBUG_CONSTANT(
-		    MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE,
-		    decr, 2,
-		    deadline, (uint32_t)(deadline >> 32), 0);
+		KERNEL_DEBUG_CONSTANT(
+			DECR_SET_DEADLINE | DBG_FUNC_NONE,
+			decr, 2,
+			deadline, (uint32_t)(deadline >> 32), 0);
 	}
 	splx(s);
 }
@@ -199,32 +206,45 @@ __unused void			*arg)
 	mytimer->has_expired = TRUE;
 	mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
 	mytimer->has_expired = FALSE;
+	mytimer->when_set = mach_absolute_time();
 
 	etimer_resync_deadlines();
 }
 
-queue_t
+uint64_t	
+timer_call_slop(
+	uint64_t	deadline)
+{
+	uint64_t now = mach_absolute_time();
+	if (deadline > now) {
+		return MIN((deadline - now) >> 3, NSEC_PER_MSEC); /* Min of 12.5% and 1ms */
+	}
+
+	return 0;
+}
+
+mpqueue_head_t *
 timer_queue_assign(
     uint64_t        deadline)
 {
-	cpu_data_t			*cdp = current_cpu_datap();
-	rtclock_timer_t		*timer;
+	cpu_data_t		*cdp = current_cpu_datap();
+	mpqueue_head_t		*queue;
 
 	if (cdp->cpu_running) {
-		timer = &cdp->rtclock_timer;
+		queue = &cdp->rtclock_timer.queue;
 
-		if (deadline < timer->deadline)
+		if (deadline < cdp->rtclock_timer.deadline)
 			etimer_set_deadline(deadline);
 	}
 	else
-		timer = &cpu_datap(master_cpu)->rtclock_timer;
+		queue = &cpu_datap(master_cpu)->rtclock_timer.queue;
 
-    return (&timer->queue);
+    return queue;
 }
 
 void
 timer_queue_cancel(
-    queue_t         queue,
+    mpqueue_head_t  *queue,
     uint64_t        deadline,
     uint64_t        new_deadline)
 {
@@ -233,3 +253,53 @@ timer_queue_cancel(
             etimer_set_deadline(new_deadline);
     }
 }
+
+/*
+ * etimer_queue_migrate() is called from the Power-Management kext
+ * when a logical processor goes idle (in a deep C-state) with a distant
+ * deadline so that it's timer queue can be moved to another processor.
+ * This target processor should be the least idle (most busy) --
+ * currently this is the primary processor for the calling thread's package.
+ * Locking restrictions demand that the target cpu must be the boot cpu. 
+ */
+uint32_t
+etimer_queue_migrate(int target_cpu)
+{
+	cpu_data_t	*target_cdp = cpu_datap(target_cpu);
+	cpu_data_t	*cdp = current_cpu_datap();
+	int		ntimers_moved;
+
+	assert(!ml_get_interrupts_enabled());
+	assert(target_cpu != cdp->cpu_number);
+	assert(target_cpu == master_cpu);
+
+	KERNEL_DEBUG_CONSTANT(
+		DECR_TIMER_MIGRATE | DBG_FUNC_START,
+		target_cpu,
+		cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32),
+		0, 0);
+
+	/*
+	 * Move timer requests from the local queue to the target processor's.
+	 * The return value is the number of requests moved. If this is 0,
+	 * it indicates that the first (i.e. earliest) timer is earlier than
+	 * the earliest for the target processor. Since this would force a
+	 * resync, the move of this and all later requests is aborted.
+	 */
+	ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue,
+					    &target_cdp->rtclock_timer.queue);
+
+	/*
+	 * Assuming we moved stuff, clear local deadline.
+	 */
+	if (ntimers_moved > 0) {
+		cdp->rtclock_timer.deadline = EndOfAllTime;
+		setPop(EndOfAllTime);
+	}
+ 
+	KERNEL_DEBUG_CONSTANT(
+		DECR_TIMER_MIGRATE | DBG_FUNC_END,
+		target_cpu, ntimers_moved, 0, 0, 0);
+
+	return ntimers_moved;
+}
diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c
index 7b4be4ebe..7227b93a2 100644
--- a/osfmk/i386/fpu.c
+++ b/osfmk/i386/fpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -61,6 +61,7 @@
 #include <mach/exception_types.h>
 #include <mach/i386/thread_status.h>
 #include <mach/i386/fp_reg.h>
+#include <mach/branch_predicates.h>
 
 #include <kern/mach_param.h>
 #include <kern/processor.h>
@@ -434,7 +435,7 @@ fpu_save_context(thread_t thread)
 	struct x86_fx_thread_state *ifps;
 
 	assert(ml_get_interrupts_enabled() == FALSE);
-	ifps = (thread)->machine.pcb->ifps;
+	ifps = (thread)->machine.ifps;
 #if	DEBUG
 	if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
 		panic("ifps->fp_valid: %u\n", ifps->fp_valid);
@@ -448,7 +449,7 @@ fpu_save_context(thread_t thread)
 		 */
 		clear_ts();
 		/* registers are in FPU - save to memory */
-		fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.pcb->iss)));
+		fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
 		ifps->fp_valid = TRUE;
 	}
 	set_ts();
@@ -492,7 +493,7 @@ fpu_set_fxstate(
 	state = (x86_float_state64_t *)tstate;
 
 	assert(thr_act != THREAD_NULL);
-	pcb = thr_act->machine.pcb;
+	pcb = THREAD_TO_PCB(thr_act);
 
 	if (state == NULL) {
 	    /*
@@ -598,7 +599,7 @@ fpu_get_fxstate(
 	state = (x86_float_state64_t *)tstate;
 
 	assert(thr_act != THREAD_NULL);
-	pcb = thr_act->machine.pcb;
+	pcb = THREAD_TO_PCB(thr_act);
 
 	simple_lock(&pcb->lock);
 
@@ -657,12 +658,12 @@ fpu_dup_fxstate(
 	boolean_t	intr;
 	pcb_t		ppcb;
 
-	ppcb = parent->machine.pcb;
+	ppcb = THREAD_TO_PCB(parent);
 
 	if (ppcb->ifps == NULL)
 	        return;
 
-        if (child->machine.pcb->ifps)
+        if (child->machine.ifps)
 	        panic("fpu_dup_fxstate: child's ifps non-null");
 
 	new_ifps = fp_state_alloc();
@@ -683,11 +684,11 @@ fpu_dup_fxstate(
 		(void)ml_set_interrupts_enabled(intr);
 
 		if (ifps->fp_valid) {
-			child->machine.pcb->ifps = new_ifps;
+			child->machine.ifps = new_ifps;
 			assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
 			    (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
 			bcopy((char *)(ppcb->ifps),
-			    (char *)(child->machine.pcb->ifps), fp_register_state_size);
+			    (char *)(child->machine.ifps), fp_register_state_size);
 
 			/* Mark the new fp saved state as non-live. */
 			/* Temporarily disabled: radar 4647827
@@ -750,7 +751,7 @@ fpnoextflt(void)
 	struct x86_fx_thread_state *ifps = 0;
 
 	thr_act = current_thread();
-	pcb = thr_act->machine.pcb;
+	pcb = THREAD_TO_PCB(thr_act);
 
 	assert(fp_register_state_size != 0);
 
@@ -769,7 +770,7 @@ fpnoextflt(void)
 
 	clear_ts();			/*  Enable FPU use */
 
-	if (get_interrupt_level()) {
+	if (__improbable(get_interrupt_level())) {
 		/*
 		 * Save current coprocessor context if valid
 		 * Initialize coprocessor live context
@@ -816,7 +817,7 @@ fpextovrflt(void)
 	 * This is a non-recoverable error.
 	 * Invalidate the thread`s FPU state.
 	 */
-	pcb = thr_act->machine.pcb;
+	pcb = THREAD_TO_PCB(thr_act);
 	simple_lock(&pcb->lock);
 	ifps = pcb->ifps;
 	pcb->ifps = 0;
@@ -853,7 +854,7 @@ void
 fpexterrflt(void)
 {
 	thread_t	thr_act = current_thread();
-	struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
+	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
 	boolean_t	intr;
 
 	intr = ml_set_interrupts_enabled(FALSE);
@@ -896,7 +897,7 @@ void
 fp_save(
 	thread_t	thr_act)
 {
-	pcb_t pcb = thr_act->machine.pcb;
+	pcb_t pcb = THREAD_TO_PCB(thr_act);
 	struct x86_fx_thread_state *ifps = pcb->ifps;
 
 	assert(ifps != 0);
@@ -918,7 +919,7 @@ void
 fp_load(
 	thread_t	thr_act)
 {
-	pcb_t pcb = thr_act->machine.pcb;
+	pcb_t pcb = THREAD_TO_PCB(thr_act);
 	struct x86_fx_thread_state *ifps = pcb->ifps;
 
 	assert(ifps);
@@ -941,7 +942,7 @@ void
 fpSSEexterrflt(void)
 {
 	thread_t	thr_act = current_thread();
-	struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
+	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
 	boolean_t	intr;
 
 	intr = ml_set_interrupts_enabled(FALSE);
@@ -972,7 +973,7 @@ fpSSEexterrflt(void)
 void
 fp_setvalid(boolean_t value) {
         thread_t	thr_act = current_thread();
-	struct x86_fx_thread_state *ifps = thr_act->machine.pcb->ifps;
+	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
 
 	if (ifps) {
 	        ifps->fp_valid = value;
@@ -985,7 +986,7 @@ fp_setvalid(boolean_t value) {
 	}
 }
 
-boolean_t
+__private_extern__ boolean_t
 ml_fpu_avx_enabled(void) {
 	return (fpu_YMM_present == TRUE);
 }
diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h
index a606aab41..5b0658f60 100644
--- a/osfmk/i386/fpu.h
+++ b/osfmk/i386/fpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -68,6 +68,14 @@
 #include <i386/proc_reg.h>
 #include <i386/thread.h>
 
+typedef	enum {
+		FXSAVE32 = 1,
+		FXSAVE64 = 2,
+		XSAVE32  = 3,
+		XSAVE64  = 4,
+		FP_UNUSED = 5
+	} fp_save_layout_t;
+
 extern int		fp_kind;
 
 extern void		init_fpu(void);
diff --git a/osfmk/i386/gdt.c b/osfmk/i386/gdt.c
index c3502e06b..7677f2488 100644
--- a/osfmk/i386/gdt.c
+++ b/osfmk/i386/gdt.c
@@ -63,44 +63,44 @@
 #include <i386/seg.h>
 
 struct real_descriptor master_gdt[GDTSZ] __attribute__ ((section("__INITGDT,__data")))= {
-	[SEL_TO_INDEX(KERNEL32_CS)] MAKE_REAL_DESCRIPTOR(	/* kernel 32-bit code */ 
+	[SEL_TO_INDEX(KERNEL32_CS)] = MAKE_REAL_DESCRIPTOR(	/* kernel 32-bit code */ 
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_K|ACC_CODE_R
 	),
-	[SEL_TO_INDEX(KERNEL_DS)] MAKE_REAL_DESCRIPTOR(	/* kernel data */
+	[SEL_TO_INDEX(KERNEL_DS)] = MAKE_REAL_DESCRIPTOR(	/* kernel data */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_K|ACC_DATA_W
 	),
-	[SEL_TO_INDEX(KERNEL64_CS)] MAKE_REAL_DESCRIPTOR(	/* kernel 64-bit code */ 
+	[SEL_TO_INDEX(KERNEL64_CS)] = MAKE_REAL_DESCRIPTOR(	/* kernel 64-bit code */ 
 		0,
 		0xfffff,
 		SZ_64|SZ_G,
 		ACC_P|ACC_PL_K|ACC_CODE_R
 	),
-	[SEL_TO_INDEX(KERNEL64_SS)] MAKE_REAL_DESCRIPTOR(	/* kernel 64-bit syscall stack */ 
+	[SEL_TO_INDEX(KERNEL64_SS)] = MAKE_REAL_DESCRIPTOR(	/* kernel 64-bit syscall stack */ 
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_K|ACC_DATA_W
 	),
 #ifdef __x86_64__
-	[SEL_TO_INDEX(USER_CS)] MAKE_REAL_DESCRIPTOR(	/* 32-bit user code segment */
+	[SEL_TO_INDEX(USER_CS)] = MAKE_REAL_DESCRIPTOR(	/* 32-bit user code segment */
 		0,
 		0xfffff,
  		SZ_32|SZ_G,
 		ACC_P|ACC_PL_U|ACC_CODE_R
 	),
-	[SEL_TO_INDEX(USER_DS)] MAKE_REAL_DESCRIPTOR(	/* 32-bit user data segment */
+	[SEL_TO_INDEX(USER_DS)] = MAKE_REAL_DESCRIPTOR(	/* 32-bit user data segment */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_U|ACC_DATA_W
 	),
-	[SEL_TO_INDEX(USER64_CS)] MAKE_REAL_DESCRIPTOR(	/* user 64-bit code segment */
+	[SEL_TO_INDEX(USER64_CS)] = MAKE_REAL_DESCRIPTOR(	/* user 64-bit code segment */
 		0,
 		0xfffff,
 		SZ_64|SZ_G,
diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c
index 0f5edf0e5..bb77d38a2 100644
--- a/osfmk/i386/genassym.c
+++ b/osfmk/i386/genassym.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,7 +93,6 @@
 #include <../bsd/sys/lockstat.h>
 #endif
 
-
 /*
  * genassym.c is used to produce an
  * assembly file which, intermingled with unuseful assembly code,
@@ -131,6 +130,8 @@ main(
 	DECLARE("AST_URGENT",		AST_URGENT);
 	DECLARE("AST_BSD",			AST_BSD);
 
+	DECLARE("MAX_CPUS",			MAX_CPUS);
+
 	/* Simple Lock structure */
 	DECLARE("SLOCK_ILK",	offsetof(usimple_lock_t, interlock));
 #if	MACH_LDEBUG
@@ -149,7 +150,6 @@ main(
 #ifdef __i386__
 	DECLARE("MUTEX_TYPE",	offsetof(lck_mtx_ext_t *, lck_mtx_deb.type));
 	DECLARE("MUTEX_PC",		offsetof(lck_mtx_ext_t *, lck_mtx_deb.pc));
-	DECLARE("MUTEX_THREAD",	offsetof(lck_mtx_ext_t *, lck_mtx_deb.thread));
 	DECLARE("MUTEX_ATTR",	offsetof(lck_mtx_ext_t *, lck_mtx_attr));
 	DECLARE("MUTEX_ATTR_DEBUG", LCK_MTX_ATTR_DEBUG);
 	DECLARE("MUTEX_ATTR_DEBUGb", LCK_MTX_ATTR_DEBUGb);
@@ -158,8 +158,6 @@ main(
 	DECLARE("MUTEX_TAG",	MUTEX_TAG);
 #endif
 	DECLARE("MUTEX_IND",	LCK_MTX_TAG_INDIRECT);
-	DECLARE("MUTEX_EXT",	LCK_MTX_PTR_EXTENDED);
-	DECLARE("MUTEX_ITAG",	offsetof(lck_mtx_t *, lck_mtx_tag));
 	DECLARE("MUTEX_PTR",	offsetof(lck_mtx_t *, lck_mtx_ptr));
 	DECLARE("MUTEX_ASSERT_OWNED",	LCK_MTX_ASSERT_OWNED);
 	DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED);
@@ -189,62 +187,55 @@ main(
 	DECLARE("TH_RECOVER",		offsetof(thread_t, recover));
 	DECLARE("TH_CONTINUATION",	offsetof(thread_t, continuation));
 	DECLARE("TH_KERNEL_STACK",	offsetof(thread_t, kernel_stack));
+	DECLARE("TH_MUTEX_COUNT",	offsetof(thread_t, mutex_count));
+	DECLARE("TH_WAS_PROMOTED_ON_WAKEUP", offsetof(thread_t, was_promoted_on_wakeup));
 
-	DECLARE("TASK_MACH_EXC_PORT",
-		offsetof(task_t, exc_actions[EXC_MACH_SYSCALL].port));
-	DECLARE("TASK_SYSCALLS_MACH",	offsetof(struct task *, syscalls_mach));
-	DECLARE("TASK_SYSCALLS_UNIX",	offsetof(struct task *, syscalls_unix));
+	DECLARE("TH_SYSCALLS_MACH",	offsetof(thread_t, syscalls_mach));
+	DECLARE("TH_SYSCALLS_UNIX",	offsetof(thread_t, syscalls_unix));
 
 	DECLARE("TASK_VTIMERS",			offsetof(struct task *, vtimers));
 
 	/* These fields are being added on demand */
-	DECLARE("ACT_MACH_EXC_PORT",
-		offsetof(thread_t, exc_actions[EXC_MACH_SYSCALL].port));
-
-	DECLARE("ACT_TASK",	offsetof(thread_t, task));
-	DECLARE("ACT_AST",	offsetof(thread_t, ast));
-	DECLARE("ACT_PCB",	offsetof(thread_t, machine.pcb));
-	DECLARE("ACT_SPF",	offsetof(thread_t, machine.specFlags));
-	DECLARE("ACT_MAP",	offsetof(thread_t, map));
-	DECLARE("ACT_PCB_ISS", 	offsetof(thread_t, machine.xxx_pcb.iss));
-	DECLARE("ACT_PCB_IDS", 	offsetof(thread_t, machine.xxx_pcb.ids));
+	DECLARE("TH_TASK",	offsetof(thread_t, task));
+	DECLARE("TH_AST",	offsetof(thread_t, ast));
+	DECLARE("TH_MAP",	offsetof(thread_t, map));
+	DECLARE("TH_SPF",	offsetof(thread_t, machine.specFlags));
+	DECLARE("TH_PCB_ISS", 	offsetof(thread_t, machine.iss));
+	DECLARE("TH_PCB_IDS", 	offsetof(thread_t, machine.ids));
+	DECLARE("TH_PCB_FPS",	offsetof(thread_t, machine.ifps));
 #if NCOPY_WINDOWS > 0
-	DECLARE("ACT_COPYIO_STATE", offsetof(thread_t, machine.copyio_state));
+	DECLARE("TH_COPYIO_STATE", offsetof(thread_t, machine.copyio_state));
 	DECLARE("WINDOWS_CLEAN", WINDOWS_CLEAN);
 #endif
 
 	DECLARE("MAP_PMAP",	offsetof(vm_map_t, pmap));
 
 #define IEL_SIZE		(sizeof(struct i386_exception_link *))
-	DECLARE("IEL_SIZE",	IEL_SIZE);
 	DECLARE("IKS_SIZE",	sizeof(struct x86_kernel_state));
 
 	/*
 	 * KSS_* are offsets from the top of the kernel stack (cpu_kernel_stack)
 	 */
 #if defined(__i386__)
-	DECLARE("KSS_EBX", IEL_SIZE + offsetof(struct x86_kernel_state *, k_ebx));
-	DECLARE("KSS_ESP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_esp));
-	DECLARE("KSS_EBP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_ebp));
-	DECLARE("KSS_EDI", IEL_SIZE + offsetof(struct x86_kernel_state *, k_edi));
-	DECLARE("KSS_ESI", IEL_SIZE + offsetof(struct x86_kernel_state *, k_esi));
-	DECLARE("KSS_EIP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_eip));
+	DECLARE("KSS_EBX",	offsetof(struct x86_kernel_state *, k_ebx));
+	DECLARE("KSS_ESP",	offsetof(struct x86_kernel_state *, k_esp));
+	DECLARE("KSS_EBP",	offsetof(struct x86_kernel_state *, k_ebp));
+	DECLARE("KSS_EDI",	offsetof(struct x86_kernel_state *, k_edi));
+	DECLARE("KSS_ESI",	offsetof(struct x86_kernel_state *, k_esi));
+	DECLARE("KSS_EIP",	offsetof(struct x86_kernel_state *, k_eip));
 #elif defined(__x86_64__)
-	DECLARE("KSS_RBX", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rbx));
-	DECLARE("KSS_RSP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rsp));
-	DECLARE("KSS_RBP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rbp));
-	DECLARE("KSS_R12", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r12));
-	DECLARE("KSS_R13", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r13));
-	DECLARE("KSS_R14", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r14));
-	DECLARE("KSS_R15", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r15));
-	DECLARE("KSS_RIP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rip));	
+	DECLARE("KSS_RBX",	offsetof(struct x86_kernel_state *, k_rbx));
+	DECLARE("KSS_RSP",	offsetof(struct x86_kernel_state *, k_rsp));
+	DECLARE("KSS_RBP",	offsetof(struct x86_kernel_state *, k_rbp));
+	DECLARE("KSS_R12",	offsetof(struct x86_kernel_state *, k_r12));
+	DECLARE("KSS_R13",	offsetof(struct x86_kernel_state *, k_r13));
+	DECLARE("KSS_R14",	offsetof(struct x86_kernel_state *, k_r14));
+	DECLARE("KSS_R15",	offsetof(struct x86_kernel_state *, k_r15));
+	DECLARE("KSS_RIP",	offsetof(struct x86_kernel_state *, k_rip));	
 #else
 #error Unsupported architecture
 #endif
 	
-	DECLARE("PCB_FPS",	offsetof(pcb_t, ifps));
-	DECLARE("PCB_ISS",	offsetof(pcb_t, iss));
-
 	DECLARE("DS_DR0",	offsetof(struct x86_debug_state32 *, dr0));
 	DECLARE("DS_DR1",	offsetof(struct x86_debug_state32 *, dr1));
 	DECLARE("DS_DR2",	offsetof(struct x86_debug_state32 *, dr2));
@@ -432,9 +423,7 @@ main(
         DECLARE("CPU_INTERRUPT_LEVEL",
 		offsetof(cpu_data_t *, cpu_interrupt_level));
 	DECLARE("CPU_NESTED_ISTACK",
- 	    offsetof(cpu_data_t *, cpu_nested_istack));
-        DECLARE("CPU_SIMPLE_LOCK_COUNT",
-		offsetof(cpu_data_t *,cpu_simple_lock_count));
+	    offsetof(cpu_data_t *, cpu_nested_istack));
         DECLARE("CPU_NUMBER_GS",
 		offsetof(cpu_data_t *,cpu_number));
         DECLARE("CPU_RUNNING",
@@ -500,7 +489,31 @@ main(
 		offsetof(cpu_data_t *, cpu_dr7));
 
 	DECLARE("hwIntCnt", 	offsetof(cpu_data_t *,cpu_hwIntCnt));
-
+#if	defined(__x86_64__)
+	DECLARE("CPU_ACTIVE_PCID",
+		offsetof(cpu_data_t *, cpu_active_pcid));
+	DECLARE("CPU_PCID_COHERENTP",
+		offsetof(cpu_data_t *, cpu_pmap_pcid_coherentp));
+	DECLARE("CPU_PCID_COHERENTP_KERNEL",
+		offsetof(cpu_data_t *, cpu_pmap_pcid_coherentp_kernel));
+	DECLARE("CPU_PMAP_PCID_ENABLED",
+	    offsetof(cpu_data_t *, cpu_pmap_pcid_enabled));
+
+#ifdef	PCID_STATS	
+	DECLARE("CPU_PMAP_USER_RETS",
+	    offsetof(cpu_data_t *, cpu_pmap_user_rets));
+	DECLARE("CPU_PMAP_PCID_PRESERVES",
+	    offsetof(cpu_data_t *, cpu_pmap_pcid_preserves));
+	DECLARE("CPU_PMAP_PCID_FLUSHES",
+	    offsetof(cpu_data_t *, cpu_pmap_pcid_flushes));
+#endif
+	DECLARE("CPU_TLB_INVALID",
+		offsetof(cpu_data_t *, cpu_tlb_invalid));
+	DECLARE("CPU_TLB_INVALID_LOCAL",
+	    offsetof(cpu_data_t *, cpu_tlb_invalid_local));
+	DECLARE("CPU_TLB_INVALID_GLOBAL",
+		offsetof(cpu_data_t *, cpu_tlb_invalid_global));
+#endif /* x86_64 */
 	DECLARE("enaExpTrace",	enaExpTrace);
 	DECLARE("enaExpTraceb",	enaExpTraceb);
 	DECLARE("enaUsrFCall",	enaUsrFCall);
@@ -561,15 +574,15 @@ main(
 	DECLARE("DEVICETREEP", offsetof(struct boot_args *, deviceTreeP));
 
 	DECLARE("RNT_TSC_BASE",
-		offsetof(rtc_nanotime_t *, tsc_base));
+		offsetof(pal_rtc_nanotime_t *, tsc_base));
 	DECLARE("RNT_NS_BASE",
-		offsetof(rtc_nanotime_t *, ns_base));
+		offsetof(pal_rtc_nanotime_t *, ns_base));
 	DECLARE("RNT_SCALE",
-		offsetof(rtc_nanotime_t *, scale));
+		offsetof(pal_rtc_nanotime_t *, scale));
 	DECLARE("RNT_SHIFT",
-		offsetof(rtc_nanotime_t *, shift));
+		offsetof(pal_rtc_nanotime_t *, shift));
 	DECLARE("RNT_GENERATION",
-		offsetof(rtc_nanotime_t *, generation));
+		offsetof(pal_rtc_nanotime_t *, generation));
 
 	/* values from kern/timer.h */
 #ifdef __LP64__
diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c
index b333db549..6cdc1cec9 100644
--- a/osfmk/i386/hibernate_i386.c
+++ b/osfmk/i386/hibernate_i386.c
@@ -48,8 +48,6 @@ extern ppnum_t max_ppnum;
 
 #define MAX_BANKS	32
 
-int hibernate_page_list_allocate_avoided;
-
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 hibernate_page_list_t *
@@ -73,8 +71,6 @@ hibernate_page_list_allocate(void)
     msize = args->MemoryMapDescriptorSize;
     mcount = args->MemoryMapSize / msize;
 
-    hibernate_page_list_allocate_avoided = 0;
-
     num_banks = 0;
     for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize))
     {
@@ -86,7 +82,7 @@ hibernate_page_list_allocate(void)
 	if ((base + num - 1) > max_ppnum)
 		num = max_ppnum - base + 1;
 	if (!num)
-	    continue;
+		continue;
 
 	switch (mptr->Type)
 	{
@@ -131,9 +127,6 @@ hibernate_page_list_allocate(void)
 	    case kEfiRuntimeServicesData:
 	    // contents are volatile once the platform expert starts
 	    case kEfiACPIReclaimMemory:
-		hibernate_page_list_allocate_avoided += num;
-		break;
-
 	    // non dram
 	    case kEfiReservedMemoryType:
 	    case kEfiUnusableMemory:
@@ -227,13 +220,8 @@ hibernate_processor_setup(IOHibernateImageHeader * header)
     header->runtimePages     = args->efiRuntimeServicesPageStart;
     header->runtimePageCount = args->efiRuntimeServicesPageCount;
     header->runtimeVirtualPages = args->efiRuntimeServicesVirtualPageStart;
-    if (args->Version == kBootArgsVersion1 && args->Revision >= kBootArgsRevision1_6) {
-        header->performanceDataStart = args->performanceDataStart;
-        header->performanceDataSize = args->performanceDataSize;
-    } else {
-        header->performanceDataStart = 0;
-        header->performanceDataSize = 0;
-    }
+    header->performanceDataStart = args->performanceDataStart;
+    header->performanceDataSize = args->performanceDataSize;
 
     return (KERN_SUCCESS);
 }
diff --git a/osfmk/i386/hibernate_restore.c b/osfmk/i386/hibernate_restore.c
index c1dfd4e16..bf0508a69 100644
--- a/osfmk/i386/hibernate_restore.c
+++ b/osfmk/i386/hibernate_restore.c
@@ -29,19 +29,18 @@
 #include <i386/proc_reg.h>
 #include <IOKit/IOHibernatePrivate.h>
 
+#include <i386/pal_hibernate.h>
+
 extern pd_entry_t BootstrapPTD[2048];
 
 #define TWO_MEG_MASK 0xFFFFFFFFFFE00000ULL
-
-#define DST_INDEX 2047UL
-
-static char *dstPtr = (char *)(DST_INDEX << PDSHIFT);
+#define FOUR_K_MASK 0xFFFFFFFFFFFFF000ULL
 
 // src is virtually mapped, not page aligned, 
 // dst is a physical 4k page aligned ptr, len is one 4K page
 // src & dst will not overlap
 
-void 
+uintptr_t 
 hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags)
 {
 	(void)procFlags;
@@ -50,25 +49,78 @@ hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t p
 	uint32_t idx;
 
 	if (src == 0)
-		return;
+		return (uintptr_t)dst;
 
-	if (dst < (uint64_t) (uintptr_t)dstPtr)
-	{
-		d = (uint64_t *) (uintptr_t)dst;
-	}
-	else
-	{
-		/* Outside 1-1 4G map so set up the mappings for the dest page using 2MB pages */
-		BootstrapPTD[DST_INDEX] = (dst & TWO_MEG_MASK) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_WRITE;
-		
-		/* Invalidate the page tables for this */
-		invlpg((uintptr_t) dstPtr);
-
-		/* Mask off the offset from the 2MB window */
-		dst &= ~TWO_MEG_MASK;
-		d = (uint64_t *) (dstPtr + dst);
-	}
+	d = (uint64_t *)pal_hib_map(DEST_COPY_AREA, dst);
 	s = (uint64_t *) (uintptr_t)src;
+
 	for (idx = 0; idx < (len / (uint32_t)sizeof(uint64_t)); idx++) 
 		d[idx] = s[idx];
+
+	return (uintptr_t)d;
+}
+#undef hibprintf
+
+void hibprintf(const char *fmt, ...);
+
+void
+pal_hib_window_setup(ppnum_t page)
+{
+	uint64_t *pp;
+	uint64_t phys = ptoa_64(page);
+	int i;
+
+	BootstrapPTD[2047] = (phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS  | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+
+	invlpg(HIB_PTES);
+
+	pp = (uint64_t *)(uintptr_t)(HIB_PTES + (phys & I386_LPGMASK));
+
+	for(i=0;i<512;i++)
+		*pp = 0;
+
+	pp[0] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+	BootstrapPTD[2047] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+
+	invlpg(HIB_PTES);
+}
+
+uintptr_t
+pal_hib_map(uintptr_t v, uint64_t p)
+{
+	int index;
+
+	switch(v) {
+		case DEST_COPY_AREA:
+			index = 1;
+			break;
+		case SRC_COPY_AREA:
+			index = 2;
+			break;
+		case COPY_PAGE_AREA:
+			index = 3;
+			break;
+		default:
+			index = -1;
+			asm("cli;hlt;");
+	}
+
+	uint64_t *ptes = (uint64_t *)HIB_PTES;
+
+	/* Outside 1-1 4G map so set up the mappings for the dest page using 2MB pages */
+	ptes[index] = (p & FOUR_K_MASK) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+		
+	/* Invalidate the page tables for this */
+	invlpg((uintptr_t)v);
+
+	return v;
+}
+
+void hibernateRestorePALState(uint32_t *arg)
+{
+	(void)arg;
+}
+void
+pal_hib_patchup(void)
+{
 }
diff --git a/osfmk/i386/hw_lock_types.h b/osfmk/i386/hw_lock_types.h
index bfeee9407..52f4355f6 100644
--- a/osfmk/i386/hw_lock_types.h
+++ b/osfmk/i386/hw_lock_types.h
@@ -90,7 +90,7 @@
  *	later in kern/lock.h..
  */
 struct hslock {
-	long	lock_data;
+	uintptr_t	lock_data;
 };
 typedef struct hslock hw_lock_data_t, *hw_lock_t;
 #define hw_lock_addr(hwl)	(&((hwl).lock_data))
diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c
index 445c6afed..596888b5f 100644
--- a/osfmk/i386/i386_init.c
+++ b/osfmk/i386/i386_init.c
@@ -78,20 +78,23 @@
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
+#include <machine/pal_routines.h>
 #include <i386/fpu.h>
 #include <i386/pmap.h>
-#include <i386/ipl.h>
 #include <i386/misc_protos.h>
 #include <i386/cpu_threads.h>
 #include <i386/cpuid.h>
 #include <i386/lapic.h>
 #include <i386/mp.h>
 #include <i386/mp_desc.h>
+#if CONFIG_MTRR
 #include <i386/mtrr.h>
+#endif
 #include <i386/machine_routines.h>
 #if CONFIG_MCA
 #include <i386/machine_check.h>
 #endif
+#include <i386/ucode.h>
 #include <i386/postcode.h>
 #include <i386/Diagnostics.h>
 #include <i386/pmCPU.h>
@@ -103,6 +106,9 @@
 #include <machine/db_machdep.h>
 #endif
 #endif
+#if DEBUG
+#include <machine/pal_routines.h>
+#endif
 
 #if DEBUG
 #define DBG(x...)       kprintf(x)
@@ -122,21 +128,16 @@ extern const char	version[];
 extern const char	version_variant[];
 extern int		nx_enabled;
 
-extern int		noVMX;	/* if set, rosetta should not emulate altivec */
-
 #ifdef __x86_64__
 extern void		*low_eintstack;
 #endif
 
-extern void serial_init(void);
-
 void			*KPTphys;
 pd_entry_t		*IdlePTD;
 #ifdef __i386__
 pd_entry_t		*IdlePDPT64;
 #endif
 
-
 char *physfree;
 
 /*
@@ -166,7 +167,7 @@ fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count)
 	}
 }
 
-extern vm_offset_t first_avail;
+extern pmap_paddr_t first_avail;
 
 #ifdef __x86_64__
 int break_kprintf = 0;
@@ -175,8 +176,8 @@ uint64_t
 x86_64_pre_sleep(void)
 {
 	IdlePML4[0] = IdlePML4[KERNEL_PML4_INDEX];
-	uint64_t oldcr3 = get_cr3();
-	set_cr3((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4));
+	uint64_t oldcr3 = get_cr3_raw();
+	set_cr3_raw((uint32_t) (uintptr_t)ID_MAP_VTOP(IdlePML4));
 	return oldcr3;
 }
 
@@ -184,7 +185,7 @@ void
 x86_64_post_sleep(uint64_t new_cr3)
 {
 	IdlePML4[0] = 0;
-	set_cr3((uint32_t) new_cr3);
+	set_cr3_raw((uint32_t) new_cr3);
 }
 
 #endif
@@ -194,7 +195,6 @@ x86_64_post_sleep(uint64_t new_cr3)
 #endif
 
 
-
 #ifdef __x86_64__
 // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address
 // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account
@@ -227,6 +227,10 @@ physmap_init(void)
 	IdlePML4[KERNEL_PHYSMAP_INDEX] = ((uintptr_t)ID_MAP_VTOP(physmapL3))
 						| INTEL_PTE_VALID
 						| INTEL_PTE_WRITE;
+	if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
+		IdlePML4[KERNEL_PHYSMAP_INDEX] |= INTEL_PTE_NX;
+	}
+
 	DBG("physical map idlepml4[%d]: 0x%llx\n",
 		KERNEL_PHYSMAP_INDEX, IdlePML4[KERNEL_PHYSMAP_INDEX]);
 }
@@ -267,7 +271,7 @@ Idle_PTs_init(void)
 #endif
 
 	// Flush the TLB now we're done rewriting the page tables..
-	set_cr3(get_cr3());
+	set_cr3_raw(get_cr3_raw());
 }
 
 /*
@@ -302,7 +306,7 @@ vstart(vm_offset_t boot_args_start)
 		lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
 		physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1));
 #if DEBUG
-		serial_init();
+		pal_serial_init();
 #endif
 		DBG("revision      0x%x\n", kernelBootArgs->Revision);
 		DBG("version       0x%x\n", kernelBootArgs->Version);
@@ -316,7 +320,13 @@ vstart(vm_offset_t boot_args_start)
 			kernelBootArgs, 
 			&kernelBootArgs->ksize,
 			&kernelBootArgs->kaddr);
-
+#ifdef	__x86_64__
+		/* enable NX/XD, boot processor */
+		if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
+			wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
+			DBG("vstart() NX/XD enabled\n");
+		}
+#endif
 		postcode(PSTART_PAGE_TABLES);
 
 		Idle_PTs_init();
@@ -324,12 +334,18 @@ vstart(vm_offset_t boot_args_start)
 		first_avail = (vm_offset_t)ID_MAP_VTOP(physfree);
 
 		cpu = 0;
+		cpu_data_alloc(TRUE);
 	} else {
 		/* Find our logical cpu number */
 		cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
+#ifdef	__x86_64__
+		if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
+			wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
+			DBG("vstart() NX/XD enabled, non-boot\n");
+		}
+#endif
 	}
 
-	if(is_boot_cpu) cpu_data_alloc(TRUE);
 #ifdef __x86_64__
 	if(is_boot_cpu)
 		cpu_desc_init64(cpu_datap(cpu));
@@ -339,14 +355,11 @@ vstart(vm_offset_t boot_args_start)
 		cpu_desc_init(cpu_datap(cpu));
 	cpu_desc_load(cpu_datap(cpu));
 #endif
-	cpu_mode_init(current_cpu_datap());
-
-	/* enable NX/XD */
-	if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD)
-        wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
-	DBG("vstart() NX/XD enabled\n");
-
-
+	if (is_boot_cpu)
+		cpu_mode_init(current_cpu_datap()); /* cpu_mode_init() will be
+						     * invoked on the APs
+						     * via i386_init_slave()
+						     */
 #ifdef __x86_64__
 	/* Done with identity mapping */
 	IdlePML4[0] = 0;
@@ -354,6 +367,11 @@ vstart(vm_offset_t boot_args_start)
 
 	postcode(VSTART_EXIT);
 #ifdef __i386__
+	if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) {
+		wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE);
+		DBG("vstart() NX/XD enabled, i386\n");
+	}
+
 	if (is_boot_cpu)
 		i386_init(boot_args_start);
 	else
@@ -394,13 +412,12 @@ i386_init(vm_offset_t boot_args_start)
 	uint64_t	maxmemtouse;
 	unsigned int	cpus = 0;
 	boolean_t	fidn;
-#ifdef __i386__
-	boolean_t	legacy_mode;
-#endif
 	boolean_t	IA32e = TRUE;
 
 	postcode(I386_INIT_ENTRY);
 
+	pal_i386_init();
+
 #if CONFIG_MCA
 	/* Initialize machine-check handling */
 	mca_cpu_init();
@@ -414,20 +431,19 @@ i386_init(vm_offset_t boot_args_start)
 	DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
 		(unsigned long)boot_args_start, kernelBootArgs);
 
+	PE_init_platform(FALSE, kernelBootArgs);
+	postcode(PE_INIT_PLATFORM_D);
+
+	kernel_early_bootstrap();
+
 	master_cpu = 0;
 	cpu_init();
 
 	postcode(CPU_INIT_D);
 
-
-	PE_init_platform(FALSE, kernelBootArgs);
-	postcode(PE_INIT_PLATFORM_D);
-
-
 	printf_init();			/* Init this in case we need debugger */
 	panic_init();			/* Init this in case we need debugger */
 
-
 	/* setup debugging output if one has been chosen */
 	PE_init_kprintf(FALSE);
 
@@ -460,7 +476,6 @@ i386_init(vm_offset_t boot_args_start)
                         max_ncpus = cpus;
 	}
 
-
 	/*
 	 * debug support for > 4G systems
 	 */
@@ -471,12 +486,21 @@ i386_init(vm_offset_t boot_args_start)
 		force_immediate_debugger_NMI = FALSE;
 	else
 		force_immediate_debugger_NMI = fidn;
-#ifdef __i386__
+
+#if DEBUG
+	nanoseconds_to_absolutetime(URGENCY_NOTIFICATION_ASSERT_NS, &urgency_notification_assert_abstime_threshold);
+#endif
+	PE_parse_boot_argn("urgency_notification_abstime",
+	    &urgency_notification_assert_abstime_threshold,
+	    sizeof(urgency_notification_assert_abstime_threshold));
+
+#if CONFIG_YONAH
 	/*
 	 * At this point we check whether we are a 64-bit processor
 	 * and that we're not restricted to legacy mode, 32-bit operation.
 	 */
 	if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) {
+		boolean_t	legacy_mode;
 		kprintf("EM64T supported");
 		if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) {
 			kprintf(" but legacy mode forced\n");
@@ -491,27 +515,20 @@ i386_init(vm_offset_t boot_args_start)
 	if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD))
 		nx_enabled = 0;
 
-	/* Obtain "lcks" options:this currently controls lock statistics */
-	if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
-		LcksOpts = 0;
-
 	/*   
 	 * VM initialization, after this we're using page tables...
 	 * The maximum number of cpus must be set beforehand.
 	 */
 	i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
 
-	if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX)))
-		noVMX = 0;	/* OK to support Altivec in rosetta? */
+	/* create the console for verbose or pretty mode */
+	/* Note: doing this prior to tsc_init() allows for graceful panic! */
+	PE_init_platform(TRUE, kernelBootArgs);
+	PE_create_console();
 
 	tsc_init();
 	power_management_init();
 
-	PE_init_platform(TRUE, kernelBootArgs);
-
-	/* create the console for verbose or pretty mode */
-	PE_create_console();
-
 	processor_bootstrap();
 	thread_bootstrap();
 
@@ -546,17 +563,24 @@ do_init_slave(boolean_t fast_restart)
   
 		init_fpu();
   
+#if CONFIG_MTRR
 		mtrr_update_cpu();
+#endif
 	} else
 	    init_param = FAST_SLAVE_INIT;
 
+	/* update CPU microcode */
+	ucode_update_wake();
+
 #if CONFIG_VMX
 	/* resume VT operation */
 	vmx_resume();
 #endif
 
+#if CONFIG_MTRR
 	if (!fast_restart)
 	    pat_init();
+#endif
 
 	cpu_thread_init();	/* not strictly necessary */
 
diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s
index 267b4b0db..9ea9f982b 100644
--- a/osfmk/i386/i386_lock.s
+++ b/osfmk/i386/i386_lock.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -48,11 +48,7 @@
 
 #define	PAUSE		rep; nop
 
-
-#define PUSHF pushf
-#define POPF  popf
-#define CLI   cli
-
+#include <i386/pal_lock_asm.h>
 
 /*
  *	When performance isn't the only concern, it's
@@ -124,18 +120,23 @@
 /* For x86_64, the varargs ABI requires that %al indicate
  * how many SSE register contain arguments. In our case, 0 */
 #if __i386__
-#define LOAD_STRING_ARG0(label)	pushl $##label ;
-#define LOAD_ARG1(x)		pushl x	;
+#define ALIGN_STACK()		subl $8, %esp; andl	$0xFFFFFFF0, %esp ;
+#define LOAD_STRING_ARG0(label)	movl $##label, (%esp) ;
+#define LOAD_ARG1(x)		mov  x, 4(%esp)	;
+#define LOAD_PTR_ARG1(x)	mov  x, 4(%esp)	;
 #define CALL_PANIC()		call EXT(panic) ;
 #else
+#define ALIGN_STACK() 		and  $0xFFFFFFFFFFFFFFF0, %rsp ;
 #define LOAD_STRING_ARG0(label)	leaq label(%rip), %rdi ;
-#define LOAD_ARG1(x)		movq x, %rsi ;
+#define LOAD_ARG1(x)		mov x, %esi ;
+#define LOAD_PTR_ARG1(x)	mov x, %rsi ;
 #define CALL_PANIC()		xorb %al,%al ; call EXT(panic) ;
 #endif
 
 #define	CHECK_UNLOCK(current, owner)				\
 	cmp	current, owner				;	\
 	je	1f					;	\
+	ALIGN_STACK()					;	\
 	LOAD_STRING_ARG0(2f)				;	\
 	CALL_PANIC()					;	\
 	hlt						;	\
@@ -157,6 +158,7 @@
 #define	CHECK_MUTEX_TYPE()					\
 	cmpl	$ MUTEX_TAG,M_TYPE			;	\
 	je	1f					;	\
+	ALIGN_STACK()					;	\
 	LOAD_STRING_ARG0(2f)				;	\
 	CALL_PANIC()					;	\
 	hlt						;	\
@@ -177,7 +179,9 @@
 	jne	1f					;	\
 	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		;	\
 	je	1f					;	\
-	LOAD_ARG1(%gs:CPU_PREEMPTION_LEVEL)             ;       \
+	ALIGN_STACK()					;	\
+	movl	%gs:CPU_PREEMPTION_LEVEL, %eax		;	\
+	LOAD_ARG1(%eax)					;	\
 	LOAD_STRING_ARG0(2f)				;	\
 	CALL_PANIC()					;	\
 	hlt						;	\
@@ -192,6 +196,7 @@
 #define	CHECK_MYLOCK(current, owner)				\
 	cmp	current, owner				;	\
 	jne	1f					;	\
+	ALIGN_STACK()					;	\
 	LOAD_STRING_ARG0(2f)				;	\
 	CALL_PANIC()					;	\
 	hlt						;	\
@@ -206,32 +211,47 @@
 #define	CHECK_MYLOCK(thd)
 #endif	/* MACH_LDEBUG */
 
-
 #define PREEMPTION_DISABLE				\
-	incl	%gs:CPU_PREEMPTION_LEVEL		
-	
-	
+	incl	%gs:CPU_PREEMPTION_LEVEL
+
+#if MACH_LDEBUG || 1
+#define	PREEMPTION_LEVEL_DEBUG 1	
+#endif
+#if	PREEMPTION_LEVEL_DEBUG
 #define	PREEMPTION_ENABLE				\
 	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
-	jne	9f				;	\
+	js	17f				;	\
+	jnz	19f				;	\
+	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
+	jz	19f				;	\
 	PUSHF					;	\
-	testl	$ EFL_IF,S_PC			;	\
-	je	8f				;	\
-	CLI					;	\
-	movl	%gs:CPU_PENDING_AST,%eax	;	\
-	testl	$ AST_URGENT,%eax		;	\
-	je	8f				;	\
-	movl	%gs:CPU_INTERRUPT_LEVEL,%eax	;	\
-	testl	%eax,%eax			;	\
-	jne	8f				;	\
+	testl	$EFL_IF, S_PC			;	\
+	jz	18f				;	\
 	POPF					;	\
 	int	$(T_PREEMPT)			;	\
-	jmp	9f				;	\
-8:							\
+	jmp	19f				;	\
+17:							\
+	call	_preemption_underflow_panic	;	\
+18:							\
 	POPF					;	\
-9:	
+19:
+#else
+#define	PREEMPTION_ENABLE				\
+	decl	%gs:CPU_PREEMPTION_LEVEL	;	\
+	jnz	19f				;	\
+	testl	$AST_URGENT,%gs:CPU_PENDING_AST	;	\
+	jz	19f				;	\
+	PUSHF					;	\
+	testl	$EFL_IF, S_PC			;	\
+	jz	18f				;	\
+	POPF					;	\
+	int	$(T_PREEMPT)			;	\
+	jmp	19f				;	\
+18:							\
+	POPF					;	\
+19:
+#endif
 
-	
 
 #if	CONFIG_DTRACE
 
@@ -646,8 +666,10 @@ Entry(lck_rw_lock_shared)
 	 */
 	LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point)
 	ret
-    /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER  */
-    LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
+	/*
+	Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER
+	*/
+	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER)
 #endif
 	ret
 2:
@@ -972,6 +994,7 @@ Entry(lck_rw_done)
 	PAUSE
 	jmp	1b
 8:
+	ALIGN_STACK()
 	LOAD_STRING_ARG0(rwl_release_error_str)
 	CALL_PANIC()
 	
@@ -1121,13 +1144,11 @@ Entry(lck_rw_held_read_or_upgrade)
 #define LMTX_A_REG32	%eax
 #define LMTX_C_REG	%ecx
 #define LMTX_C_REG32	%ecx
-#define LMTX_D_REG	%edx
 #define LMTX_RET_REG	%eax
+#define LMTX_RET_REG32	%eax
 #define LMTX_LGROUP_REG	%esi
 #define LMTX_SSTATE_REG	%edi	
 #define	LOAD_LMTX_REG(arg)	mov arg, LMTX_REG
-#define LOAD_REG_ARG0(reg)	push reg
-#define LOAD_REG_ARG1(reg)	push reg
 #define LMTX_CHK_EXTENDED	cmp LMTX_REG, LMTX_ARG0
 #define LMTX_ASSERT_OWNED	cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1
 
@@ -1222,13 +1243,11 @@ Entry(lck_rw_held_read_or_upgrade)
 #define LMTX_A_REG32	%eax
 #define LMTX_C_REG	%rcx
 #define LMTX_C_REG32	%ecx
-#define LMTX_D_REG	%rdx
 #define LMTX_RET_REG	%rax
+#define LMTX_RET_REG32	%eax
 #define LMTX_LGROUP_REG	%r10
 #define LMTX_SSTATE_REG	%r11	
 #define	LOAD_LMTX_REG(arg)	mov %rdi, %rdx
-#define LOAD_REG_ARG0(reg)	mov reg, %rdi
-#define LOAD_REG_ARG1(reg)	mov reg, %rsi
 #define LMTX_CHK_EXTENDED	cmp LMTX_REG, LMTX_REG_ORIG
 #define LMTX_ASSERT_OWNED	cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1
 
@@ -1319,7 +1338,7 @@ Entry(lck_rw_held_read_or_upgrade)
 	pop	LMTX_SSTATE_REG		;	\
 	pop	LMTX_LGROUP_REG		;	\
 12:
-	
+
 #else
 #error Unsupported architecture
 #endif
@@ -1332,8 +1351,6 @@ Entry(lck_rw_held_read_or_upgrade)
 #define M_PROMOTED_MSK		0x04000000
 #define M_SPIN_MSK		0x08000000
 
-	
-
 /*
  *	void lck_mtx_assert(lck_mtx_t* l, unsigned int)
  *	Takes the address of a lock, and an assertion type as parameters.
@@ -1348,10 +1365,11 @@ NONLEAF_ENTRY(lck_mtx_assert)
         LOAD_LMTX_REG(B_ARG0)	                   	/* Load lock address */
 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG	/* Load current thread */
 
-	mov	M_OWNER(LMTX_REG), LMTX_C_REG
-	cmp	$(MUTEX_IND), LMTX_C_REG	/* Is this an indirect mutex? */
-	cmove	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
-
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
+	jne	0f
+	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
+0:	
 	mov	M_OWNER(LMTX_REG), LMTX_C_REG	/* Load owner */
 	LMTX_ASSERT_OWNED
 	jne	2f				/* Assert ownership? */
@@ -1364,18 +1382,21 @@ NONLEAF_ENTRY(lck_mtx_assert)
 2:
 	cmp	LMTX_A_REG, LMTX_C_REG		/* Current thread match? */
 	jne	1b				/* No, return */
-	LOAD_REG_ARG1(LMTX_REG)
+	ALIGN_STACK()
+	LOAD_PTR_ARG1(LMTX_REG)
 	LOAD_STRING_ARG0(mutex_assert_owned_str)
 	jmp	4f
 3:
-	LOAD_REG_ARG1(LMTX_REG)
+	ALIGN_STACK()
+	LOAD_PTR_ARG1(LMTX_REG)
 	LOAD_STRING_ARG0(mutex_assert_not_owned_str)
 4:
 	CALL_PANIC()
 
 
 lck_mtx_destroyed:
-	LOAD_REG_ARG1(LMTX_REG)
+	ALIGN_STACK()
+	LOAD_PTR_ARG1(LMTX_REG)
 	LOAD_STRING_ARG0(mutex_interlock_destroyed_str)
 	CALL_PANIC()
 	
@@ -1396,54 +1417,38 @@ mutex_interlock_destroyed_str:
  * lck_mtx_try_lock()
  * lck_mtx_unlock()
  * lck_mtx_lock_spin()
+ * lck_mtx_lock_spin_always()
  * lck_mtx_convert_spin()
  */
-	
+NONLEAF_ENTRY(lck_mtx_lock_spin_always)
+	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
+	jmp	Llmls_avoid_check
+
 NONLEAF_ENTRY(lck_mtx_lock_spin)
 	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
 
 	CHECK_PREEMPTION_LEVEL()
-
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* is the interlock held */
-	je	Llmls_enter			/* no - can't be INDIRECT or DESTROYED */
-
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-	cmp	$(MUTEX_DESTROYED), LMTX_A_REG	/* check to see if its marked destroyed */
-	je	lck_mtx_destroyed
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex */
-	jne	Llmls_loop
-
-	LMTX_ENTER_EXTENDED
-
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_SPIN_MSK), LMTX_C_REG
-	je	Llmls_loop
-
-	LMTX_UPDATE_MISS
-Llmls_loop:
-	PAUSE
+Llmls_avoid_check:	
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* is the interlock held */
-	jne	Llmls_loop
-Llmls_enter:
-	test	$(M_MLOCKED_MSK), LMTX_C_REG	/* is the mutex locked */
-	jne	Llml_contended			/* fall back to normal mutex handling */
-
-	PUSHF					/* save interrupt state */
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
+	jnz	Llmls_slow
+Llmls_try:					/* no - can't be INDIRECT, DESTROYED or locked */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-	CLI					/* disable interrupts */
+	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	1f
+	jne	Llmls_busy_disabled
 
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of interlock */
-
-	PREEMPTION_DISABLE
-	POPF				/* restore interrupt state */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:	
+#endif	/* MACH_LDEBUG */
 
 	LMTX_CHK_EXTENDED_EXIT
 	/* return with the interlock held and preemption disabled */
@@ -1456,59 +1461,73 @@ Llmls_enter:
 #endif
 	ret
 
-1:	
-	POPF				/* restore interrupt state */
-	jmp	Llmls_loop
-
-
-	
-NONLEAF_ENTRY(lck_mtx_lock)
-	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
-
-	CHECK_PREEMPTION_LEVEL()
+Llmls_slow:	
+	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
+	jz	Llml_contended				/* no, must have been the mutex */
 
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* is the interlock held */
-	je	Llml_enter			/* no - can't be INDIRECT or DESTROYED */
-
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-	cmp	$(MUTEX_DESTROYED), LMTX_A_REG	/* check to see if its marked destroyed */
+	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
 	je	lck_mtx_destroyed
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
-	jne	Llml_loop
+	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex */
+	jne	Llmls_loop				/* no... must be interlocked */
 
 	LMTX_ENTER_EXTENDED
 
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_SPIN_MSK), LMTX_C_REG
-	je	Llml_loop
+	test	$(M_SPIN_MSK), LMTX_C_REG32
+	jz	Llmls_loop1
 
-	LMTX_UPDATE_MISS
-Llml_loop:
+	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llmls_loop:
 	PAUSE
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+Llmls_loop1:
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+	jz	Llmls_try
+	test	$(M_MLOCKED_MSK), LMTX_C_REG32
+	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
+	jmp	Llmls_loop
+
+Llmls_busy_disabled:
+	PREEMPTION_ENABLE
+	jmp	Llmls_loop
 
-	test	$(M_ILOCKED_MSK), LMTX_C_REG
-	jne	Llml_loop
-Llml_enter:
-	test	$(M_MLOCKED_MSK), LMTX_C_REG
-	jne	Llml_contended			/* mutex owned by someone else, go contend for it */
 
+	
+NONLEAF_ENTRY(lck_mtx_lock)
+	LOAD_LMTX_REG(B_ARG0)		/* fetch lock pointer */
+
+	CHECK_PREEMPTION_LEVEL()
+
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
+	jnz	Llml_slow
+Llml_try:					/* no - can't be INDIRECT, DESTROYED or locked */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_MLOCKED_MSK), LMTX_C_REG
+	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	Llml_loop
+	jne	Llml_busy_disabled
 
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
 
-Llml_acquired:
 	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
-	je	1f
+	jz	Llml_finish
 
 	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
-1:	
+
+Llml_finish:
+	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
+	PREEMPTION_ENABLE
+	
 	LMTX_CHK_EXTENDED		/* is this an extended mutex */
 	jne	2f
 
@@ -1530,8 +1549,39 @@ Llml_acquired:
 	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG)
 #endif
 	ret
+
+	
+Llml_slow:
+	test	$M_ILOCKED_MSK, LMTX_C_REG32		/* is the interlock held */
+	jz	Llml_contended				/* no, must have been the mutex */
 	
+	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
+	je	lck_mtx_destroyed
+	cmp	$(MUTEX_IND), LMTX_C_REG32		/* Is this an indirect mutex? */
+	jne	Llml_loop				/* no... must be interlocked */
+
+	LMTX_ENTER_EXTENDED
+
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	test	$(M_SPIN_MSK), LMTX_C_REG32
+	jz	Llml_loop1
+
+	LMTX_UPDATE_MISS		/* M_SPIN_MSK was set, so M_ILOCKED_MSK must also be present */
+Llml_loop:
+	PAUSE
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+Llml_loop1:
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+	jz	Llml_try
+	test	$(M_MLOCKED_MSK), LMTX_C_REG32
+	jnz	Llml_contended				/* mutex owned by someone else, go contend for it */
+	jmp	Llml_loop
+
+Llml_busy_disabled:
+	PREEMPTION_ENABLE
+	jmp	Llml_loop
 
+	
 Llml_contended:
 	LMTX_CHK_EXTENDED		/* is this an extended mutex */
 	je	0f
@@ -1540,7 +1590,8 @@ Llml_contended:
 	LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86)
 
 	test	LMTX_RET_REG, LMTX_RET_REG
-	je	Llml_acquired		/* acquired mutex */
+	jz	Llml_acquired		/* acquired mutex, interlock held and preemption disabled */
+
 	cmp	$1, LMTX_RET_REG	/* check for direct wait status */
 	je	2f
 	LMTX_CHK_EXTENDED		/* is this an extended mutex */
@@ -1548,32 +1599,43 @@ Llml_contended:
 	LMTX_UPDATE_DIRECT_WAIT
 2:	
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_ILOCKED_MSK), LMTX_C_REG
-	jne	6f
+	test	$(M_ILOCKED_MSK), LMTX_C_REG32
+	jnz	6f
 
-	PUSHF					/* save state of interrupt mask */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_ILOCKED_MSK), LMTX_C_REG	/* try to take the interlock */
-	CLI					/* disable interrupts */
+	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* try to take the interlock */
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
 	jne	5f
 
-	test	$(M_MLOCKED_MSK), LMTX_C_REG	/* we've got the interlock and */
-	jne	3f
-	or	$(M_MLOCKED_MSK), LMTX_C_REG	/* the mutex is free... grab it directly */
-	and	$(~M_ILOCKED_MSK), LMTX_C_REG
+	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* we've got the interlock and */
+	jnz	3f
+	or	$(M_MLOCKED_MSK), LMTX_C_REG32	/* the mutex is free... grab it directly */
+	mov	LMTX_C_REG32, M_STATE(LMTX_REG)
 	
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
-	mov	LMTX_C_REG32, M_STATE(LMTX_REG)	/* now drop the interlock */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
 
-	POPF				/* restore interrupt state */
-	jmp	Llml_acquired
-3:					/* interlock held, mutex busy */
-	PREEMPTION_DISABLE
-	POPF				/* restore interrupt state */
+Llml_acquired:
+	testl	$(M_WAITERS_MSK), M_STATE(LMTX_REG)
+	jnz	1f
+	mov	M_OWNER(LMTX_REG), LMTX_A_REG
+	mov	TH_WAS_PROMOTED_ON_WAKEUP(LMTX_A_REG), LMTX_A_REG32
+	test	LMTX_A_REG32, LMTX_A_REG32
+	jz	Llml_finish
+1:	
+	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+	jmp	Llml_finish
 
+3:					/* interlock held, mutex busy */
 	LMTX_CHK_EXTENDED		/* is this an extended mutex */
 	je	4f
 	LMTX_UPDATE_WAIT
@@ -1581,7 +1643,7 @@ Llml_contended:
 	LMTX_CALLEXT1(lck_mtx_lock_wait_x86)
 	jmp	Llml_contended
 5:	
-	POPF				/* restore interrupt state */
+	PREEMPTION_ENABLE
 6:
 	PAUSE
 	jmp	2b
@@ -1592,38 +1654,25 @@ NONLEAF_ENTRY(lck_mtx_try_lock_spin)
 	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
 
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* is the interlock held */
-	je	Llmts_enter			/* no - can't be INDIRECT or DESTROYED */
-
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-	cmp	$(MUTEX_DESTROYED), LMTX_A_REG	/* check to see if its marked destroyed */
-	je	lck_mtx_destroyed
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
-	jne	Llmts_enter
-
-	LMTX_ENTER_EXTENDED
-Llmts_loop:
-	PAUSE
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-Llmts_enter:
-	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-	jne	Llmts_fail
-	test	$(M_ILOCKED_MSK), LMTX_C_REG
-	jne	Llmts_loop
-
-	PUSHF					/* save interrupt state */
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
+	jnz	Llmts_slow
+Llmts_try:					/* no - can't be INDIRECT, DESTROYED or locked */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
 	or	$(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-	CLI					/* disable interrupts */
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	3f
+	jne	Llmts_busy_disabled
 
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
-
-	PREEMPTION_DISABLE
-	POPF				/* restore interrupt state */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
 
 	LMTX_CHK_EXTENDED_EXIT
 	leave
@@ -1637,52 +1686,68 @@ Llmts_enter:
 #endif
 	mov	$1, LMTX_RET_REG	/* return success */
 	ret
-3:	
-	POPF				/* restore interrupt state */
-	jmp	Llmts_loop
 
-	
-	
-NONLEAF_ENTRY(lck_mtx_try_lock)
-	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
+Llmts_slow:
+	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
+	jz	Llmts_fail			/* no, must be held as a mutex */
 
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* is the interlock held */
-	je	Llmt_enter			/* no - can't be INDIRECT or DESTROYED */
-
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-	cmp	$(MUTEX_DESTROYED), LMTX_A_REG	/* check to see if its marked destroyed */
+	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
 	je	lck_mtx_destroyed
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
-	jne	Llmt_enter
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
+	jne	Llmts_loop1
 
 	LMTX_ENTER_EXTENDED
-Llmt_loop:
+Llmts_loop:
 	PAUSE
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-Llmt_enter:
-	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG
-	jne	Llmt_fail
-	test	$(M_ILOCKED_MSK), LMTX_C_REG
-	jne	Llmt_loop
+Llmts_loop1:
+	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
+	jnz	Llmts_fail
+	test	$(M_ILOCKED_MSK), LMTX_C_REG32
+	jz	Llmts_try
+	jmp	Llmts_loop
+	
+Llmts_busy_disabled:
+	PREEMPTION_ENABLE
+	jmp	Llmts_loop
+
+
+	
+NONLEAF_ENTRY(lck_mtx_try_lock)
+	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
 
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* is the interlock or mutex held */
+	jnz	Llmt_slow	
+Llmt_try:					/* no - can't be INDIRECT, DESTROYED or locked */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_MLOCKED_MSK), LMTX_C_REG
+	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+	
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	Llmt_loop
+	jne	Llmt_busy_disabled
 
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
 
 	LMTX_CHK_EXTENDED_EXIT
 
-	test	$(M_WAITERS_MSK), LMTX_C_REG
-	je	2f
+	test	$(M_WAITERS_MSK), LMTX_C_REG32
+	jz	0f
+
 	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
-2:
-	leave
+0:
+	andl	$(~M_ILOCKED_MSK), M_STATE(LMTX_REG)
+	PREEMPTION_ENABLE
 
+	leave
 #if	CONFIG_DTRACE
 	mov	$1, LMTX_RET_REG		/* return success */
 	/* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */
@@ -1694,6 +1759,30 @@ Llmt_enter:
 	mov	$1, LMTX_RET_REG		/* return success */
 	ret
 
+Llmt_slow:
+	test	$(M_ILOCKED_MSK), LMTX_C_REG32	/* is the interlock held */
+	jz	Llmt_fail			/* no, must be held as a mutex */
+
+	cmp	$(MUTEX_DESTROYED), LMTX_C_REG32	/* check to see if its marked destroyed */
+	je	lck_mtx_destroyed
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
+	jne	Llmt_loop
+
+	LMTX_ENTER_EXTENDED
+Llmt_loop:
+	PAUSE
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+Llmt_loop1:
+	test	$(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG32
+	jnz	Llmt_fail
+	test	$(M_ILOCKED_MSK), LMTX_C_REG32
+	jz	Llmt_try
+	jmp	Llmt_loop
+
+Llmt_busy_disabled:
+	PREEMPTION_ENABLE
+	jmp	Llmt_loop
+
 
 Llmt_fail:
 Llmts_fail:
@@ -1710,34 +1799,36 @@ Llmts_fail:
 NONLEAF_ENTRY(lck_mtx_convert_spin)
 	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
 
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
-	cmove	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
+	jne	0f
+	mov	M_PTR(LMTX_REG), LMTX_REG	/* If so, take indirection */
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+0:
+	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* already owned as a mutex, just return */
+	jnz	2f
+	test	$(M_WAITERS_MSK), LMTX_C_REG32	/* are there any waiters? */
+	jz	1f
 
+	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_MLOCKED_MSK), LMTX_C_REG	/* already owned as a mutex, just return */
-	jne	2f
 1:	
-	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG	/* convert from spin version to mutex */
-	or	$(M_MLOCKED_MSK), LMTX_C_REG
+	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG32	/* convert from spin version to mutex */
+	or	$(M_MLOCKED_MSK), LMTX_C_REG32
 	mov	LMTX_C_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
 
-	PREEMPTION_ENABLE			/* only %eax is consumed */
-
-	test	$(M_WAITERS_MSK), LMTX_C_REG	/* are there any waiters? */
-	je	2f
-
-	LMTX_CALLEXT1(lck_mtx_lock_acquire_x86)
+	PREEMPTION_ENABLE
 2:	
 	NONLEAF_RET
 
+	
 
 #if	defined(__i386__)
 NONLEAF_ENTRY(lck_mtx_unlock)
 	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
 	mov	M_OWNER(LMTX_REG), LMTX_A_REG
 	test	LMTX_A_REG, LMTX_A_REG
-	jnz	Llmu_prim
+	jnz	Llmu_entry
 	leave
 	ret
 NONLEAF_ENTRY(lck_mtx_unlock_darwin10)
@@ -1745,49 +1836,56 @@ NONLEAF_ENTRY(lck_mtx_unlock_darwin10)
 NONLEAF_ENTRY(lck_mtx_unlock)
 #endif
 	LOAD_LMTX_REG(B_ARG0)			/* fetch lock pointer */
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
+Llmu_entry:
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
 Llmu_prim:
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
 	je	Llmu_ext
-0:	
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	test	$(M_MLOCKED_MSK), LMTX_C_REG	/* check for full mutex */
-	jne	1f
 
-	xor	LMTX_A_REG, LMTX_A_REG
-	mov	LMTX_A_REG, M_OWNER(LMTX_REG)
-	mov	LMTX_C_REG, LMTX_A_REG			/* keep original state in %ecx for later evaluation */
-	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
-	mov	LMTX_A_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
-
-	PREEMPTION_ENABLE			/* need to re-enable preemption - clobbers eax */
-	jmp	2f
-1:	
+Llmu_chktype:
+	test	$(M_MLOCKED_MSK), LMTX_C_REG32	/* check for full mutex */
+	jz	Llmu_unlock
+Llmu_mutex:
 	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
-	jne	7f
+	jnz	Llmu_busy
 
-	PUSHF					/* save interrupt state */
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	and	$(~M_MLOCKED_MSK), LMTX_C_REG	/* drop mutex */
-	or	$(M_ILOCKED_MSK), LMTX_C_REG	/* pick up interlock */
-	CLI
+	and	$(~M_MLOCKED_MSK), LMTX_C_REG32	/* drop mutex */
+	or	$(M_ILOCKED_MSK), LMTX_C_REG32	/* pick up interlock */
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	6f				/* branch on failure to spin loop */
+	jne	Llmu_busy_disabled		/* branch on failure to spin loop */
 
+Llmu_unlock:
 	xor	LMTX_A_REG, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)
 	mov	LMTX_C_REG, LMTX_A_REG			/* keep original state in %ecx for later evaluation */
-	and	$(~(M_ILOCKED_MSK | M_PROMOTED_MSK)), LMTX_A_REG
-	mov	LMTX_A_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
-	POPF						/* restore interrupt state */
+	and	$(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG
+
+	test	$(M_WAITERS_MSK), LMTX_A_REG32
+	jz	2f
+	dec	LMTX_A_REG32				/* decrement waiter count */
 2:	
-	test	$(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG
-	je	3f
-	and	$(M_PROMOTED_MSK), LMTX_C_REG
+	mov	LMTX_A_REG32, M_STATE(LMTX_REG)		/* since I own the interlock, I don't need an atomic update */
+
+#if	MACH_LDEBUG
+	/* perform lock statistics after drop to prevent delay */
+	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	decl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
+
+	test	$(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG32
+	jz	3f
 
 	LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG)
 3:	
+	PREEMPTION_ENABLE
+
 	LMTX_CHK_EXTENDED
 	jne	4f
 
@@ -1810,77 +1908,25 @@ Llmu_prim:
 	LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG)
 #endif
 	ret
-6:
-	POPF				/* restore interrupt state */
-7:
+
+
+Llmu_busy_disabled:
+	PREEMPTION_ENABLE
+Llmu_busy:
 	PAUSE
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-	jmp	1b
+	jmp	Llmu_mutex
+
 Llmu_ext:
 	mov	M_PTR(LMTX_REG), LMTX_REG
 	mov	M_OWNER(LMTX_REG), LMTX_A_REG
 	mov	%gs:CPU_ACTIVE_THREAD, LMTX_C_REG
 	CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG)
-	jmp 0b
-
-
-LEAF_ENTRY(lck_mtx_lock_decr_waiter)
-	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
-1:	
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	jmp 	Llmu_chktype
 
-	test	$(M_WAITERS_MSK), LMTX_C_REG
-	je	2f
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
-	jne	3f
-
-	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	dec	LMTX_C_REG			/* decrement waiter count */
-	lock
-	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	3f				/* branch on failure to spin loop */
 
-	mov	$1, LMTX_RET_REG
-	LEAF_RET
-2:	
-	xor	LMTX_RET_REG, LMTX_RET_REG
-	LEAF_RET
-3:	
-	PAUSE
-	jmp	1b
 	
-
-	
-LEAF_ENTRY(lck_mtx_lock_get_pri)
-	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
-1:	
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-
-	test	$(M_WAITERS_MSK), LMTX_C_REG
-	jne	2f
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
-	jne	3f
-
-	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	and	$(~M_PRIORITY_MSK), LMTX_C_REG	/* no waiters, reset mutex priority to 0 */
-	lock
-	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	3f				/* branch on failure to spin loop */
-
-	xor	LMTX_RET_REG, LMTX_RET_REG	/* return mutex priority == 0 */
-	LEAF_RET
-2:	
-	mov	LMTX_C_REG, LMTX_RET_REG
-	and	$(M_PRIORITY_MSK), LMTX_RET_REG
-	shr	$16, LMTX_RET_REG		/* return current mutex priority */
-	LEAF_RET
-3:	
-	PAUSE
-	jmp	1b
-	
-	
-
-
 LEAF_ENTRY(lck_mtx_ilk_unlock)
 	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
 
@@ -1897,93 +1943,80 @@ LEAF_ENTRY(lck_mtx_lock_grab_mutex)
 
 	mov	M_STATE(LMTX_REG), LMTX_C_REG32
 
-	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG	/* can't have the mutex yet */
-	jne	2f
+	test	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32	/* can't have the mutex yet */
+	jnz	3f
 
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_MLOCKED_MSK), LMTX_C_REG
+	or	$(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32
+
+	PREEMPTION_DISABLE
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
 	jne	2f				/* branch on failure to spin loop */
 
  	mov	%gs:CPU_ACTIVE_THREAD, LMTX_A_REG
 	mov	LMTX_A_REG, M_OWNER(LMTX_REG)	/* record owner of mutex */
+#if	MACH_LDEBUG
+	test	LMTX_A_REG, LMTX_A_REG
+	jz	1f
+	incl	TH_MUTEX_COUNT(LMTX_A_REG)	/* lock statistic */
+1:
+#endif	/* MACH_LDEBUG */
 
 	mov	$1, LMTX_RET_REG		/* return success */
 	LEAF_RET
 2:						
+	PREEMPTION_ENABLE
+3:
 	xor	LMTX_RET_REG, LMTX_RET_REG	/* return failure */
 	LEAF_RET
 	
 
 
-LEAF_ENTRY(lck_mtx_lock_mark_promoted)
-	LOAD_LMTX_REG(L_ARG0)			/* fetch lock pointer - no indirection here */
-1:	
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-
-	test	$(M_PROMOTED_MSK), LMTX_C_REG
-	jne	3f
-	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
-	jne	2f
-
-	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_PROMOTED_MSK), LMTX_C_REG
-	lock
-	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
-	jne	2f				/* branch on failure to spin loop */
-
-	mov	$1, LMTX_RET_REG
-	LEAF_RET
-2:	
-	PAUSE
-	jmp	1b
-3:
-	xor	LMTX_RET_REG, LMTX_RET_REG
-	LEAF_RET
-
-
-	
 LEAF_ENTRY(lck_mtx_lock_mark_destroyed)
 	LOAD_LMTX_REG(L_ARG0)
 1:
-	mov	M_OWNER(LMTX_REG), LMTX_A_REG
-
-	cmp	$(MUTEX_DESTROYED), LMTX_A_REG	/* check to see if its marked destroyed */
-	je	3f
-	cmp	$(MUTEX_IND), LMTX_A_REG	/* Is this an indirect mutex? */
+	mov	M_STATE(LMTX_REG), LMTX_C_REG32
+	cmp	$(MUTEX_IND), LMTX_C_REG32	/* Is this an indirect mutex? */
 	jne	2f
 
-	movl	$(MUTEX_DESTROYED), M_OWNER(LMTX_REG)	/* convert to destroyed state */
+	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
 	jmp	3f
 2:	
-	mov	M_STATE(LMTX_REG), LMTX_C_REG32
-
 	test	$(M_ILOCKED_MSK), LMTX_C_REG	/* have to wait for interlock to clear */
-	jne	5f
+	jnz	5f
 
-	PUSHF					/* save interrupt state */
+	PREEMPTION_DISABLE
 	mov	LMTX_C_REG, LMTX_A_REG		/* eax contains snapshot for cmpxchgl */
-	or	$(M_ILOCKED_MSK), LMTX_C_REG
-	CLI
+	or	$(M_ILOCKED_MSK), LMTX_C_REG32
 	lock
 	cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG)	/* atomic compare and exchange */
 	jne	4f				/* branch on failure to spin loop */
-	movl	$(MUTEX_DESTROYED), M_OWNER(LMTX_REG)	/* convert to destroyed state */
-	POPF					/* restore interrupt state */
+	movl	$(MUTEX_DESTROYED), M_STATE(LMTX_REG)	/* convert to destroyed state */
+	PREEMPTION_ENABLE
 3:
 	LEAF_RET				/* return with M_ILOCKED set */
 4:
-	POPF					/* restore interrupt state */
+	PREEMPTION_ENABLE
 5:
 	PAUSE
 	jmp	1b
 
-	
-	
+LEAF_ENTRY(preemption_underflow_panic)
+	FRAME
+	incl	%gs:CPU_PREEMPTION_LEVEL
+	ALIGN_STACK()
+	LOAD_STRING_ARG0(16f)
+	CALL_PANIC()
+	hlt
+	.data
+16:	String	"Preemption level underflow, possible cause unlocking an unlocked mutex or spinlock"
+	.text
+
+
 LEAF_ENTRY(_disable_preemption)
 #if	MACH_RT
-	_DISABLE_PREEMPTION
+	PREEMPTION_DISABLE
 #endif	/* MACH_RT */
 	LEAF_RET
 
@@ -1997,6 +2030,7 @@ LEAF_ENTRY(_enable_preemption)
 #else
 	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
 #endif
+	ALIGN_STACK()
 	LOAD_STRING_ARG0(_enable_preemption_less_than_zero)
 	CALL_PANIC()
 	hlt
@@ -2006,7 +2040,7 @@ _enable_preemption_less_than_zero:
 	.text
 1:
 #endif	/* MACH_ASSERT */
-	_ENABLE_PREEMPTION
+	PREEMPTION_ENABLE
 #endif	/* MACH_RT */
 	LEAF_RET
 
@@ -2015,6 +2049,7 @@ LEAF_ENTRY(_enable_preemption_no_check)
 #if	MACH_ASSERT
 	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
 	jg	1f
+	ALIGN_STACK()
 	LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero)
 	CALL_PANIC()
 	hlt
@@ -2031,7 +2066,7 @@ _enable_preemption_no_check_less_than_zero:
 	
 LEAF_ENTRY(_mp_disable_preemption)
 #if	MACH_RT
-	_DISABLE_PREEMPTION
+	PREEMPTION_DISABLE
 #endif	/* MACH_RT */
 	LEAF_RET
 
@@ -2045,6 +2080,7 @@ LEAF_ENTRY(_mp_enable_preemption)
 #else
 	movl	%gs:CPU_PREEMPTION_LEVEL,%esi
 #endif
+	ALIGN_PANIC()
 	LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero)
 	CALL_PANIC()
 	hlt
@@ -2054,7 +2090,7 @@ _mp_enable_preemption_less_than_zero:
 	.text
 1:
 #endif	/* MACH_ASSERT */
-	_ENABLE_PREEMPTION
+	PREEMPTION_ENABLE
 #endif	/* MACH_RT */
 	LEAF_RET
 
@@ -2063,6 +2099,7 @@ LEAF_ENTRY(_mp_enable_preemption_no_check)
 #if	MACH_ASSERT
 	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL
 	jg	1f
+	ALIGN_STACK()
 	LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero)
 	CALL_PANIC()
 	hlt
diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c
index 301d02274..989895eb0 100644
--- a/osfmk/i386/i386_vm_init.c
+++ b/osfmk/i386/i386_vm_init.c
@@ -75,31 +75,24 @@
 #include <vm/vm_kern.h>
 #include <i386/pmap.h>
 #include <i386/misc_protos.h>
-#include <i386/ipl.h>
 #include <i386/cpuid.h>
 #include <mach/thread_status.h>
 #include <pexpert/i386/efi.h>
 #include <i386/i386_lowmem.h>
 #include <i386/lowglobals.h>
+#include <i386/pal_routines.h>
 
 #include <mach-o/loader.h>
 #include <libkern/kernel_mach_header.h>
 
-#if DEBUG 
-#define DBG(x...)	kprintf("DBG: " x)
-#define PRINT_PMAP_MEMORY_TABLE
-#else
-#define DBG(x...)
-#endif
-
 vm_size_t	mem_size = 0; 
-vm_offset_t	first_avail = 0;/* first after page tables */
+pmap_paddr_t	first_avail = 0;/* first after page tables */
 
 uint64_t	max_mem;        /* Size of physical memory (bytes), adjusted by maxmem */
 uint64_t        mem_actual;
 uint64_t	sane_size = 0;  /* Memory size to use for defaults calculations */
 
-#define MAXLORESERVE	( 32 * 1024 * 1024)
+#define MAXLORESERVE	(32 * 1024 * 1024)
 
 ppnum_t		max_ppnum = 0;
 ppnum_t		lowest_lo = 0;
@@ -119,11 +112,6 @@ vm_offset_t     static_memory_end = 0;
 
 vm_offset_t	sHIB, eHIB, stext, etext, sdata, edata, end;
 
-boolean_t	kernel_text_ps_4K = TRUE;
-boolean_t	wpkernel = TRUE;
-
-extern void	*KPTphys;
-
 /*
  * _mh_execute_header is the mach_header for the currently executing kernel
  */
@@ -135,6 +123,9 @@ void *sectPRELINKB; unsigned long sectSizePRELINK;
 void *sectHIBB; unsigned long sectSizeHIB;
 void *sectINITPTB; unsigned long sectSizeINITPT;
 
+kernel_segment_command_t *segTEXT;
+kernel_section_t *cursectTEXT, *lastsectTEXT;
+
 extern uint64_t firmware_Conventional_bytes;
 extern uint64_t firmware_RuntimeServices_bytes;
 extern uint64_t firmware_ACPIReclaim_bytes;
@@ -145,6 +136,9 @@ extern uint64_t firmware_Unusable_bytes;
 extern uint64_t firmware_other_bytes;
 uint64_t firmware_MMIO_bytes;
 
+#if	DEBUG
+#define	PRINT_PMAP_MEMORY_TABLE
+#endif /* DEBUG */
 /*
  * Basic VM initialization.
  */
@@ -163,7 +157,19 @@ i386_vm_init(uint64_t	maxmem,
 	unsigned int safeboot;
 	ppnum_t maxpg = 0;
         uint32_t pmap_type;
+	uint32_t maxloreserve;
 	uint32_t maxdmaaddr;
+	uint32_t  mbuf_reserve = 0;
+	boolean_t mbuf_override = FALSE;
+
+#if DEBUG
+	kprintf("Boot args revision: %d version: %d",
+		args->Revision, args->Version);
+	kprintf("  commandline: \"");
+	for(i=0; i<BOOT_LINE_LENGTH; i++)
+		kprintf("%c", args->CommandLine[i]);
+	kprintf("\"\n");
+#endif
 
 	/*
 	 * Now retrieve addresses for end, edata, and etext 
@@ -185,11 +191,18 @@ i386_vm_init(uint64_t	maxmem,
 	sectPRELINKB = (void *) getsegdatafromheader(
 		&_mh_execute_header, "__PRELINK_TEXT", &sectSizePRELINK);
 
+	segTEXT = getsegbynamefromheader(&_mh_execute_header, "__TEXT");
+	cursectTEXT = lastsectTEXT = firstsect(segTEXT);
+	/* Discover the last TEXT section within the TEXT segment */
+	while ((cursectTEXT = nextsect(segTEXT, cursectTEXT)) != NULL) {
+		lastsectTEXT = cursectTEXT;
+	}
+
 	sHIB  = (vm_offset_t) sectHIBB;
 	eHIB  = (vm_offset_t) sectHIBB + sectSizeHIB;
 	/* Zero-padded from ehib to stext if text is 2M-aligned */
 	stext = (vm_offset_t) sectTEXTB;
-	etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
+	etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size);
 	/* Zero-padded from etext to sdata if text is 2M-aligned */
 	sdata = (vm_offset_t) sectDATAB;
 	edata = (vm_offset_t) sectDATAB + sectSizeDATA;
@@ -307,8 +320,10 @@ i386_vm_init(uint64_t	maxmem,
 			break;
 		}
 
+#if DEBUG
 		kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n",
 			i, mptr->Type, pmap_type, base, top);
+#endif
 
 		if (maxpg) {
 		        if (base >= maxpg)
@@ -327,7 +342,8 @@ i386_vm_init(uint64_t	maxmem,
 		        /*
 			 * Usable memory region
 			 */
-		        if (top < I386_LOWMEM_RESERVED) {
+		        if (top < I386_LOWMEM_RESERVED ||
+			    !pal_is_usable_memory(base, top)) {
 			        prev_pmptr = 0;
 				continue;
 			}
@@ -408,8 +424,11 @@ i386_vm_init(uint64_t	maxmem,
 			if (prev_pmptr &&
 			    pmptr->type == prev_pmptr->type &&
 			    pmptr->base == pmptr->alloc &&
-			    pmptr->base == (prev_pmptr->end + 1)) {
-			        prev_pmptr->end = pmptr->end;
+				pmptr->base == (prev_pmptr->end + 1))
+			{
+				if(prev_pmptr->end == prev_pmptr->alloc)
+					prev_pmptr->alloc = pmptr->base;
+				prev_pmptr->end = pmptr->end;
 			} else {
 			        pmap_memory_region_count++;
 				prev_pmptr = pmptr;
@@ -462,7 +481,7 @@ i386_vm_init(uint64_t	maxmem,
 	sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1));
 
 	/*
-	 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64).
+	 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 96GB for K64).
 	 * Unless overriden by the maxmem= boot-arg
 	 * -- which is a non-zero maxmem argument to this function.
 	 */
@@ -532,9 +551,6 @@ i386_vm_init(uint64_t	maxmem,
 			max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE);
 	}
 	if (avail_end >= max_valid_dma_address) {
-		uint32_t  maxloreserve;
-		uint32_t  mbuf_reserve = 0;
-		boolean_t mbuf_override = FALSE;
 
 		if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) {
 
@@ -562,10 +578,12 @@ i386_vm_init(uint64_t	maxmem,
 			vm_lopage_needed = TRUE;
 		}
 	}
+	
 	/*
 	 *	Initialize kernel physical map.
 	 *	Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS.
 	 */
+	kprintf("avail_remaining = 0x%lx\n", (unsigned long)avail_remaining);
 	pmap_bootstrap(0, IA32e);
 }
 
@@ -576,6 +594,7 @@ pmap_free_pages(void)
 	return (unsigned int)avail_remaining;
 }
 
+
 boolean_t pmap_next_page_reserved(ppnum_t *);
 
 /*
@@ -685,209 +704,3 @@ pmap_valid_page(
 	return FALSE;
 }
 
-/*
- * Called once VM is fully initialized so that we can release unused
- * sections of low memory to the general pool.
- * Also complete the set-up of identity-mapped sections of the kernel:
- *  1) write-protect kernel text
- *  2) map kernel text using large pages if possible
- *  3) read and write-protect page zero (for K32)
- *  4) map the global page at the appropriate virtual address.
- *
- * Use of large pages
- * ------------------
- * To effectively map and write-protect all kernel text pages, the text
- * must be 2M-aligned at the base, and the data section above must also be
- * 2M-aligned. That is, there's padding below and above. This is achieved
- * through linker directives. Large pages are used only if this alignment
- * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
- * memory layout is:
- * 
- *                       :                :
- *                       |     __DATA     |
- *               sdata:  ==================  2Meg
- *                       |                |
- *                       |  zero-padding  |
- *                       |                |
- *               etext:  ------------------ 
- *                       |                |
- *                       :                :
- *                       |                |
- *                       |     __TEXT     |
- *                       |                |
- *                       :                :
- *                       |                |
- *               stext:  ==================  2Meg
- *                       |                |
- *                       |  zero-padding  |
- *                       |                |
- *               eHIB:   ------------------ 
- *                       |     __HIB      |
- *                       :                :
- *
- * Prior to changing the mapping from 4K to 2M, the zero-padding pages
- * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
- * 4K pages covering [stext,etext] are coalesced as 2M large pages.
- * The now unused level-1 PTE pages are also freed.
- */
-void
-pmap_lowmem_finalize(void)
-{
-	spl_t           spl;
-	int		i;
-
-	/* Check the kernel is linked at the expected base address */
-	if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
-	    I386_KERNEL_IMAGE_BASE_PAGE)
-		panic("pmap_lowmem_finalize() unexpected kernel base address");
-
-	/*
-	 * Free all pages in pmap regions below the base:
-	 * rdar://6332712
-	 *	We can't free all the pages to VM that EFI reports available.
-	 *	Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
-	 *	There's also a size miscalculation here: pend is one page less
-	 *	than it should be but this is not fixed to be backwards
-	 *	compatible.
-	 *	Due to this current EFI limitation, we take only the first
-	 *	entry in the memory region table. However, the loop is retained
-	 * 	(with the intended termination criteria commented out) in the
-	 *	hope that some day we can free all low-memory ranges.
-	 *	This loop assumes the first range does not span the kernel
-	 *	image base & avail_start. We skip this process on systems
-	 *	with "kernel reserved" ranges, as the low memory reclamation
-	 *	is handled in the initial memory map processing loop on
-	 *	such systems.
-	 */
-	for (i = 0;
-//	     pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
-	     i < 1 && (pmap_reserved_ranges == 0);
-	     i++) {
-		vm_offset_t	pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
-		vm_offset_t	pend  = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
-//		vm_offset_t	pend  = i386_ptob(pmap_memory_regions[i].end+1);
-
-		DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
-		    (void *) ml_static_ptovirt(pbase),
-		    (void *) (pend - pbase), i);
-		ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
-	}
-
-	/*
-	 * If text and data are both 2MB-aligned,
-	 * we can map text with large-pages,
-	 * unless the -kernel_text_ps_4K boot-arg overrides.
-	 */
-	if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
-		kprintf("Kernel text is 2MB aligned");
-		kernel_text_ps_4K = FALSE;
-		if (PE_parse_boot_argn("-kernel_text_ps_4K",
-				       &kernel_text_ps_4K,
-				       sizeof (kernel_text_ps_4K)))
-			kprintf(" but will be mapped with 4K pages\n");
-		else
-			kprintf(" and will be mapped with 2M pages\n");
-	}
-
-	(void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
-	if (wpkernel)
-		kprintf("Kernel text %p-%p to be write-protected\n",
-			(void *) stext, (void *) etext);
-
-	spl = splhigh();
-
-	/*
-	 * Scan over text if mappings are to be changed:
-	 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 
- 	 * - Change to large-pages if possible and not overriden.
-	 */
-	if (kernel_text_ps_4K && wpkernel) {
-		vm_offset_t     myva;
-		for (myva = stext; myva < etext; myva += PAGE_SIZE) {
-			pt_entry_t     *ptep;
-
-			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
-			if (ptep)
-				pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
-		}
-	}
-
-	if (!kernel_text_ps_4K) {
-		vm_offset_t     myva;
-
-		/*
-		 * Release zero-filled page padding used for 2M-alignment.
-		 */
-		DBG("ml_static_mfree(%p,%p) for padding below text\n",
-			(void *) eHIB, (void *) (stext - eHIB));
-		ml_static_mfree(eHIB, stext - eHIB);
-		DBG("ml_static_mfree(%p,%p) for padding above text\n",
-			(void *) etext, (void *) (sdata - etext));
-		ml_static_mfree(etext, sdata - etext);
-
-		/*
-		 * Coalesce text pages into large pages.
-		 */
-		for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
-			pt_entry_t	*ptep;
-			vm_offset_t	pte_phys;
-			pt_entry_t	*pdep;
-			pt_entry_t	pde;
-
-			pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
-			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
-			DBG("myva: %p pdep: %p ptep: %p\n",
-				(void *) myva, (void *) pdep, (void *) ptep);
-			if ((*ptep & INTEL_PTE_VALID) == 0)
-				continue;
-			pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
-			pde = *pdep & PTMASK;	/* page attributes from pde */
-			pde |= INTEL_PTE_PS;	/* make it a 2M entry */
-			pde |= pte_phys;	/* take page frame from pte */
-
-			if (wpkernel)
-				pde &= ~INTEL_PTE_RW;
-			DBG("pmap_store_pte(%p,0x%llx)\n",
-				(void *)pdep, pde);
-			pmap_store_pte(pdep, pde);
-
-			/*
-			 * Free the now-unused level-1 pte.
-			 * Note: ptep is a virtual address to the pte in the
-			 *   recursive map. We can't use this address to free
-			 *   the page. Instead we need to compute its address
-			 *   in the Idle PTEs in "low memory".
-			 */
-			vm_offset_t vm_ptep = (vm_offset_t) KPTphys
-						+ (pte_phys >> PTPGSHIFT);
-			DBG("ml_static_mfree(%p,0x%x) for pte\n",
-				(void *) vm_ptep, PAGE_SIZE);
-			ml_static_mfree(vm_ptep, PAGE_SIZE);
-		}
-
-		/* Change variable read by sysctl machdep.pmap */
-		pmap_kernel_text_ps = I386_LPGBYTES;
-	}
-
-#if defined(__i386__)
-	/* no matter what,  kernel page zero is not accessible */
-	pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID);
-#endif
-
-	/* map lowmem global page into fixed addr */
-	pt_entry_t *pte = NULL;
-	if (0 == (pte = pmap_pte(kernel_pmap,
-				 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
-		panic("lowmem pte");
-	/* make sure it is defined on page boundary */
-	assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
-	pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
-				| INTEL_PTE_REF
-				| INTEL_PTE_MOD
-				| INTEL_PTE_WIRED
-				| INTEL_PTE_VALID
-				| INTEL_PTE_RW);
-	splx(spl);
-	flush_tlb();
-}
-
diff --git a/osfmk/i386/idle_pt.c b/osfmk/i386/idle_pt.c
index ebbfc556d..663375acf 100644
--- a/osfmk/i386/idle_pt.c
+++ b/osfmk/i386/idle_pt.c
@@ -27,10 +27,15 @@
  */
 #include <i386/pmap.h>
 
-pml4_entry_t		IdlePML4[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
+#define PML4_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE)
+pml4_entry_t	IdlePML4[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
 #ifdef __x86_64__
-		[  0] = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | INTEL_PTE_VALID | INTEL_PTE_WRITE),
-		[KERNEL_PML4_INDEX] = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | INTEL_PTE_VALID | INTEL_PTE_WRITE),
+		[  0]
+			= ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | PML4_PROT),
+#if KERNEL_PML4_INDEX != 0
+		[KERNEL_PML4_INDEX]
+			= ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | PML4_PROT),
+#endif
 #endif
 	};
 
@@ -39,7 +44,7 @@ pml4_entry_t		IdlePML4[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))
 #elif defined(__i386__)
 #define PDPT_PROT (INTEL_PTE_VALID)
 #endif
-pdpt_entry_t		IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
+pdpt_entry_t	IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = {
 		[0] = ((uint64_t)(INITPT_SEG_BASE + 2*PAGE_SIZE) | PDPT_PROT), 
 		[1] = ((uint64_t)(INITPT_SEG_BASE + 3*PAGE_SIZE) | PDPT_PROT), 
 		[2] = ((uint64_t)(INITPT_SEG_BASE + 4*PAGE_SIZE) | PDPT_PROT), 
@@ -50,6 +55,8 @@ pdpt_entry_t		IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))
 #error Please update idle_pt.c to reflect the new value of NPGPTD
 #endif
 
+#if MACHINE_BOOTSTRAPPTD
+
 #define ID_MAP_2MEG(x) [(x)] = ((((uint64_t)(x)) << 21) | (INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE)),
 
 #define L0(x,n) x(n)
@@ -70,3 +77,4 @@ pdpt_entry_t		IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))
 pd_entry_t		BootstrapPTD[2048] __attribute__((section("__INITPT, __data"))) = {
 	FOR_0_TO_2047(ID_MAP_2MEG)
 };
+#endif /* MACHINE_BOOTSTRAPPTD */
diff --git a/osfmk/i386/idt.s b/osfmk/i386/idt.s
index 17e6331ea..362b783a4 100644
--- a/osfmk/i386/idt.s
+++ b/osfmk/i386/idt.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -60,6 +60,24 @@
 #include <mach_kdb.h>
 #include <i386/eflags.h>
 #include <i386/trap.h>
+#include <i386/rtclock_asm.h>
+#define _ARCH_I386_ASM_HELP_H_  /* Prevent inclusion of user header */
+#include <mach/i386/syscall_sw.h>
+#include <i386/postcode.h>
+#include <i386/proc_reg.h>
+#include <mach/exception_types.h>
+
+/*
+ * Low-memory handlers.
+ */
+#define	LO_ALLINTRS		EXT(lo_allintrs32)
+#define	LO_ALLTRAPS		EXT(lo_alltraps32)
+#define	LO_SYSENTER		EXT(lo_sysenter32)
+#define	LO_UNIX_SCALL		EXT(lo_unix_scall32)
+#define	LO_MACH_SCALL		EXT(lo_mach_scall32)
+#define	LO_MDEP_SCALL		EXT(lo_mdep_scall32)
+#define	LO_DIAG_SCALL		EXT(lo_diag_scall32)
+
 
 #define HI_DATA(lo_addr)	( (EXT(lo_addr) - EXT(hi_remap_data)) + HIGH_IDT_BASE )
 #define HI_TEXT(lo_text)	( (EXT(lo_text) - EXT(hi_remap_text)) + HIGH_MEM_BASE )
@@ -103,7 +121,7 @@ Entry(name)				;\
 	pushl	$0			;\
 	pushl	$(n)			;\
 	pusha				;\
-	movl	$ EXT(lo_alltraps),%ebx	;\
+	movl	$(LO_ALLTRAPS),%ebx	;\
 	jmp	enter_lohandler
 
 	
@@ -116,7 +134,7 @@ Entry(name)				;\
 	pushl	$0			;\
 	pushl	$(n)			;\
 	pusha				;\
-	movl	$ EXT(lo_alltraps),%ebx	;\
+	movl	$(LO_ALLTRAPS),%ebx	;\
 	jmp	enter_lohandler
 	
 
@@ -152,11 +170,11 @@ Entry(name)				;\
  * Error code has been pushed.  Push trap number.
  */
 #define	EXCEP_ERR(n,name) \
-	IDT_ENTRY(name,K_INTR_GATE);\
-Entry(name)				;\
-	pushl	$(n)			;\
-	pusha				;\
-	movl	$ EXT(lo_alltraps),%ebx	;\
+	IDT_ENTRY(name,K_INTR_GATE)		;\
+Entry(name)					;\
+	pushl	$(n)				;\
+	pusha					;\
+	movl	$(LO_ALLTRAPS),%ebx		;\
 	jmp	enter_lohandler
 
 	
@@ -170,7 +188,7 @@ L_ ## n:					;\
 	pushl	$0				;\
 	pushl	$(n)				;\
 	pusha					;\
-	movl	$ EXT(lo_allintrs),%ebx		;\
+	movl	$(LO_ALLINTRS),%ebx		;\
 	jmp	enter_lohandler
 
 
@@ -471,8 +489,7 @@ Entry(lo_kernel_cr3)
         .text
 
 	
-/*******************************************************************************************************
- *
+/*
  * Trap/interrupt entry points.
  *
  * All traps must create the following save area on the PCB "stack":
@@ -498,14 +515,21 @@ Entry(lo_kernel_cr3)
  *	user ss  - if from user
  */
 
-	
+ret_to_kernel:
+	jmp *1f
+1:	.long HI_TEXT(hi_ret_to_kernel)
+
+ret_to_user:
+	jmp *1f
+1:	.long HI_TEXT(hi_ret_to_user) 
+
 Entry(hi_ret_to_user)
 	movl	%esp,%ebx
 	movl	%gs:CPU_ACTIVE_THREAD,%ecx
-	subl	ACT_PCB_ISS(%ecx),%ebx
-	movl	$(WINDOWS_CLEAN),ACT_COPYIO_STATE(%ecx)
+	subl	TH_PCB_ISS(%ecx),%ebx
+	movl	$(WINDOWS_CLEAN),TH_COPYIO_STATE(%ecx)
 
-	movl	ACT_PCB_IDS(%ecx),%eax	/* get debug state struct */
+	movl	TH_PCB_IDS(%ecx),%eax	/* get debug state struct */
 	cmpl	$0,%eax			/* is there a debug state */
 	je	1f 			/* branch if not */
 	movl	DS_DR0(%eax), %ecx	/* Load the 32 bit debug registers */
@@ -562,7 +586,7 @@ Entry(hi_unix_scall)
 	pushl   %eax                    /* save system call number */
         pushl   $0                      /* clear trap number slot */
         pusha                           /* save the general registers */
-	movl	$ EXT(lo_unix_scall),%ebx
+	movl	$(LO_UNIX_SCALL),%ebx
 	jmp	enter_lohandler
 
 	
@@ -570,7 +594,7 @@ Entry(hi_mach_scall)
 	pushl   %eax                    /* save system call number */
         pushl   $0                      /* clear trap number slot */
         pusha                           /* save the general registers */
-	movl	$ EXT(lo_mach_scall),%ebx
+	movl	$(LO_MACH_SCALL),%ebx
 	jmp	enter_lohandler
 
 	
@@ -578,7 +602,7 @@ Entry(hi_mdep_scall)
 	pushl   %eax                    /* save system call number */
         pushl   $0                      /* clear trap number slot */
         pusha                           /* save the general registers */
-	movl	$ EXT(lo_mdep_scall),%ebx
+	movl	$(LO_MDEP_SCALL),%ebx
 	jmp	enter_lohandler
 
 	
@@ -586,7 +610,7 @@ Entry(hi_diag_scall)
 	pushl   %eax                    // Save sselector
         pushl   $0                      // Clear trap number slot
         pusha                           // save the general registers
-	movl	$EXT(lo_diag_scall),%ebx	// Get the function down low to transfer to
+	movl	$(LO_DIAG_SCALL),%ebx	// Get the function down low to transfer to
 	jmp	enter_lohandler			// Leap to it...
 
 	
@@ -622,7 +646,7 @@ hi_sysenter_2:
 	pushl	$0			/* clear trap number slot */
 	pusha				/* save the general registers */
 	orl	$(EFL_IF),R32_EFLAGS-R32_EDI(%esp)	/* (edi was last reg pushed) */
-	movl	$ EXT(lo_sysenter),%ebx
+	movl	$(LO_SYSENTER),%ebx
 enter_lohandler:
 	pushl   %ds
 	pushl   %es
@@ -650,11 +674,12 @@ enter_lohandler1:
 	testb	$3,R32_CS(%esp)
 	jz	2f
 	movl	%esp,%edx			/* came from user mode */
+	xor	%ebp, %ebp
 	subl	%gs:CPU_HI_ISS,%edx
 	movl	%gs:CPU_ACTIVE_THREAD,%ecx
-	addl	ACT_PCB_ISS(%ecx),%edx		/* rebase the high stack to a low address */
+	addl	TH_PCB_ISS(%ecx),%edx		/* rebase the high stack to a low address */
 	movl	%edx,%esp
-	cmpl	$0, ACT_PCB_IDS(%ecx)	/* Is there a debug register state? */
+	cmpl	$0, TH_PCB_IDS(%ecx)	/* Is there a debug register state? */
 	je	2f
 	movl	$0, %ecx		/* If so, reset DR7 (the control) */
 	movl	%ecx, %dr7
@@ -673,7 +698,7 @@ Entry(hi_page_fault)
 	movl	%cr2,%eax		/* get the faulting address */
 	movl	%eax,R32_CR2-R32_EDI(%esp)/* save in esp save slot */
 
-	movl	$ EXT(lo_alltraps),%ebx
+	movl	$(LO_ALLTRAPS),%ebx
 	jmp	enter_lohandler
 
 
@@ -728,7 +753,7 @@ hi_debug_trap:
 	pushl	$0
 	pushl	$(T_DEBUG)		/* handle as user trap */
 	pusha				/* save the general registers */
-	movl	$ EXT(lo_alltraps),%ebx
+	movl	$(LO_ALLTRAPS),%ebx
 	jmp	enter_lohandler	
 
 
@@ -769,7 +794,7 @@ trap_check_kernel_exit:
 	je	fault_popl_gs
 hi_take_trap:
 	pusha				/* save the general registers */
-	movl	$ EXT(lo_alltraps),%ebx
+	movl	$(LO_ALLTRAPS),%ebx
 	jmp	enter_lohandler
 
 		
@@ -798,7 +823,7 @@ fault_iret:
 	popl	%eax			/* restore eax */
 					/* now treat as fault from user */
 	pusha				/* save the general registers */
-	movl	$ EXT(lo_alltraps),%ebx
+	movl	$(LO_ALLTRAPS),%ebx
 	jmp	enter_lohandler
 
 /*
@@ -840,19 +865,471 @@ push_none:
 					/* now treat as fault from user */
 					/* except that segment registers are */
 					/* already pushed */
-	movl	$ EXT(lo_alltraps),%ebx
+	movl	$(LO_ALLTRAPS),%ebx
 	jmp	enter_lohandler1
 
 	
         .text
 
 
-Entry(lo_ret_to_user)
-	jmp *1f
-1:	.long HI_TEXT(hi_ret_to_user) 
+Entry(hi_remap_etext)
 
-Entry(lo_ret_to_kernel)
-	jmp *1f
-1:	.long HI_TEXT(hi_ret_to_kernel)
 
-Entry(hi_remap_etext)
+/*
+ * All 32 bit task 'exceptions' enter lo_alltraps:
+ *	esp	-> x86_saved_state_t
+ * 
+ * The rest of the state is set up as:	
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+Entry(lo_alltraps32)
+	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
+	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
+	jne	1f
+	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
+1:
+	testb	$3,%al
+	jz	trap_from_kernel
+						/* user mode trap */
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx
+	movl	TH_TASK(%ecx),%ebx
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	movl	%gs:CPU_KERNEL_STACK,%ebx
+	xchgl	%ebx,%esp		/* switch to kernel stack */
+
+	CCALL1(user_trap, %ebx)		/* call user trap routine */
+	/* user_trap() unmasks interrupts */
+	cli				/* hold off intrs - critical section */
+	xorl	%ecx,%ecx		/* don't check if we're in the PFZ */
+	
+/*
+ * Return from trap or system call, checking for ASTs.
+ * On lowbase PCB stack with intrs disabled
+ */	
+Entry(return_from_trap32)
+	movl	%gs:CPU_ACTIVE_THREAD, %esp
+	movl	TH_PCB_ISS(%esp), %esp	/* switch back to PCB stack */
+	movl	%gs:CPU_PENDING_AST, %eax
+	testl	%eax, %eax
+	je	EXT(return_to_user)	/* branch if no AST */
+LEXT(return_from_trap_with_ast)
+	movl	%gs:CPU_KERNEL_STACK, %ebx
+	xchgl	%ebx, %esp		/* switch to kernel stack */
+
+	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
+	je	2f			/* no, go handle the AST */
+	cmpl	$(SS_64), SS_FLAVOR(%ebx)	/* are we a 64-bit task? */
+	je	1f
+					/* no... 32-bit user mode */
+	movl	R32_EIP(%ebx), %eax
+	pushl	%ebx			/* save PCB stack */
+	xorl	%ebp, %ebp		/* clear frame pointer */
+	CCALL1(commpage_is_in_pfz32, %eax)
+	popl	%ebx			/* retrieve pointer to PCB stack */
+	testl	%eax, %eax
+	je	2f			/* not in the PFZ... go service AST */
+	movl	%eax, R32_EBX(%ebx)	/* let the PFZ know we've pended an AST */
+	xchgl	%ebx, %esp		/* switch back to PCB stack */
+	jmp	EXT(return_to_user)
+1:					/* 64-bit user mode */
+	movl	R64_RIP(%ebx), %ecx
+	movl	R64_RIP+4(%ebx), %eax
+	pushl	%ebx			/* save PCB stack */
+	xorl	%ebp, %ebp		/* clear frame pointer */
+	CCALL2(commpage_is_in_pfz64, %ecx, %eax)
+	popl	%ebx			/* retrieve pointer to PCB stack */
+	testl	%eax, %eax		
+	je	2f			/* not in the PFZ... go service AST */
+	movl	%eax, R64_RBX(%ebx)	/* let the PFZ know we've pended an AST */
+	xchgl	%ebx, %esp		/* switch back to PCB stack */
+	jmp	EXT(return_to_user)
+2:	
+	sti				/* interrupts always enabled on return to user mode */
+	xorl	%ebp, %ebp		/* Clear framepointer */
+	CCALL1(i386_astintr, $0)	/* take the AST */
+	cli
+	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
+	jmp	EXT(return_from_trap32)	/* and check again (rare) */
+
+
+/*
+ * Trap from kernel mode.  No need to switch stacks.
+ * Interrupts must be off here - we will set them to state at time of trap
+ * as soon as it's safe for us to do so and not recurse doing preemption
+ */
+trap_from_kernel:
+	movl	%esp, %eax		/* saved state addr */
+	pushl	R32_EIP(%esp)		/* Simulate a CALL from fault point */
+	pushl   %ebp			/* Extend framepointer chain */
+	movl	%esp, %ebp
+	CCALL1WITHSP(kernel_trap, %eax)	/* Call kernel trap handler */
+	popl	%ebp
+	addl	$4, %esp
+	cli
+
+	movl	%gs:CPU_PENDING_AST,%eax		/* get pending asts */
+	testl	$ AST_URGENT,%eax	/* any urgent preemption? */
+	je	ret_to_kernel			/* no, nothing to do */
+	cmpl	$ T_PREEMPT,R32_TRAPNO(%esp)
+	je	ret_to_kernel			  /* T_PREEMPT handled in kernel_trap() */
+	testl	$ EFL_IF,R32_EFLAGS(%esp)		/* interrupts disabled? */
+	je	ret_to_kernel
+	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		/* preemption disabled? */
+	jne	ret_to_kernel
+	movl	%gs:CPU_KERNEL_STACK,%eax
+	movl	%esp,%ecx
+	xorl	%eax,%ecx
+	and	EXT(kernel_stack_mask),%ecx
+	testl	%ecx,%ecx		/* are we on the kernel stack? */
+	jne	ret_to_kernel		/* no, skip it */
+
+	CCALL1(i386_astintr, $1)	/* take the AST */
+
+	jmp	ret_to_kernel
+
+
+/*
+ * All interrupts on all tasks enter here with:
+ *	esp->	 -> x86_saved_state_t
+ *
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+Entry(lo_allintrs32)
+	/*
+	 * test whether already on interrupt stack
+	 */
+	movl	%gs:CPU_INT_STACK_TOP,%ecx
+	cmpl	%esp,%ecx
+	jb	1f
+	leal	-INTSTACK_SIZE(%ecx),%edx
+	cmpl	%esp,%edx
+	jb	int_from_intstack
+1:	
+	xchgl	%ecx,%esp		/* switch to interrupt stack */
+
+	movl	%cr0,%eax		/* get cr0 */
+	orl	$(CR0_TS),%eax		/* or in TS bit */
+	movl	%eax,%cr0		/* set cr0 */
+
+	subl	$8, %esp		/* for 16-byte stack alignment */
+	pushl	%ecx			/* save pointer to old stack */
+	movl	%ecx,%gs:CPU_INT_STATE	/* save intr state */
+	
+	TIME_INT_ENTRY			/* do timing */
+
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx
+	movl	TH_TASK(%ecx),%ebx
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	incl	%gs:CPU_PREEMPTION_LEVEL
+	incl	%gs:CPU_INTERRUPT_LEVEL
+
+	movl	%gs:CPU_INT_STATE, %eax
+	CCALL1(interrupt, %eax)		/* call generic interrupt routine */
+
+	cli				/* just in case we returned with intrs enabled */
+	xorl	%eax,%eax
+	movl	%eax,%gs:CPU_INT_STATE	/* clear intr state pointer */
+
+	decl	%gs:CPU_INTERRUPT_LEVEL
+	decl	%gs:CPU_PREEMPTION_LEVEL
+
+	TIME_INT_EXIT			/* do timing */
+
+	movl	%gs:CPU_ACTIVE_THREAD,%eax
+	movl	TH_PCB_FPS(%eax),%eax	/* get pcb's ifps */
+	testl	%eax, %eax		/* Is there a context */
+	je	1f			/* Branch if not */
+	cmpl	$0, FP_VALID(%eax)	/* Check fp_valid */
+	jne	1f			/* Branch if valid */
+	clts				/* Clear TS */
+	jmp	2f
+1:
+	movl	%cr0,%eax		/* get cr0 */
+	orl	$(CR0_TS),%eax		/* or in TS bit */
+	movl	%eax,%cr0		/* set cr0 */
+2:
+	popl	%esp			/* switch back to old stack */
+
+	/* Load interrupted code segment into %eax */
+	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
+	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
+	jne	3f
+	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
+3:
+	testb	$3,%al			/* user mode, */
+	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
+	/*
+	 * we only want to handle preemption requests if
+	 * the interrupt fell in the kernel context
+	 * and preemption isn't disabled
+	 */
+	movl	%gs:CPU_PENDING_AST,%eax	
+	testl	$ AST_URGENT,%eax		/* any urgent requests? */
+	je	ret_to_kernel			/* no, nothing to do */
+
+	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
+	jne	ret_to_kernel			/* yes, skip it */
+
+	movl	%gs:CPU_KERNEL_STACK,%eax
+	movl	%esp,%ecx
+	xorl	%eax,%ecx
+	and	EXT(kernel_stack_mask),%ecx
+	testl	%ecx,%ecx			/* are we on the kernel stack? */
+	jne	ret_to_kernel			/* no, skip it */
+
+	/*
+	 * Take an AST from kernel space.  We don't need (and don't want)
+	 * to do as much as the case where the interrupt came from user
+	 * space.
+	 */
+	CCALL1(i386_astintr, $1)
+
+	jmp	ret_to_kernel
+
+
+/*
+ * nested int - simple path, can't preempt etc on way out
+ */
+int_from_intstack:
+	incl	%gs:CPU_PREEMPTION_LEVEL
+	incl	%gs:CPU_INTERRUPT_LEVEL
+
+	movl	%esp, %edx		/* x86_saved_state */
+	CCALL1(interrupt, %edx)
+
+	decl	%gs:CPU_INTERRUPT_LEVEL
+	decl	%gs:CPU_PREEMPTION_LEVEL
+
+	jmp	ret_to_kernel
+
+/*
+ *	Take an AST from an interrupted user
+ */
+ast_from_interrupt_user:
+	movl	%gs:CPU_PENDING_AST,%eax
+	testl	%eax,%eax		/* pending ASTs? */
+	je	ret_to_user		/* no, nothing to do */
+
+	TIME_TRAP_UENTRY
+
+	movl	$1, %ecx		/* check if we're in the PFZ */
+	jmp	EXT(return_from_trap_with_ast)	/* return */
+
+
+/*
+ * 32bit Tasks
+ * System call entries via INTR_GATE or sysenter:
+ *
+ *	esp	 -> x86_saved_state32_t
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+
+Entry(lo_sysenter32)
+	/*
+	 * We can be here either for a mach syscall or a unix syscall,
+	 * as indicated by the sign of the code:
+	 */
+	movl	R32_EAX(%esp),%eax
+	testl	%eax,%eax
+	js	EXT(lo_mach_scall32)		/* < 0 => mach */
+						/* > 0 => unix */
+	
+Entry(lo_unix_scall32)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+	incl	TH_SYSCALLS_UNIX(%ecx)		/* increment call count   */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	sti
+
+	CCALL1(unix_syscall, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_mach_scall32)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+	incl	TH_SYSCALLS_MACH(%ecx)		/* increment call count   */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	sti
+
+	CCALL1(mach_call_munger, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_mdep_scall32)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+	
+	sti
+
+	CCALL1(machdep_syscall, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_diag_scall32)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	pushl	%edi			/* push pbc stack for later */
+
+	CCALL1(diagCall, %edi)		// Call diagnostics
+	
+	cli				// Disable interruptions just in case
+	popl	%esp			// Get back the original stack
+	cmpl	$0,%eax			// What kind of return is this?
+	jne	EXT(return_to_user)	// Normal return, do not check asts...
+
+	CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
+		// pass what would be the diag syscall
+		// error return - cause an exception
+	/* no return */
+	
+
+LEXT(return_to_user)
+	TIME_TRAP_UEXIT
+	jmp	ret_to_user
+
+
+/*
+ * Double-fault exception handler task. The last gasp...
+ */
+Entry(df_task_start)
+	CCALL1(panic_double_fault32, $(T_DOUBLE_FAULT))
+	hlt
+
+
+/*
+ * machine-check handler task. The last gasp...
+ */
+Entry(mc_task_start)
+	CCALL1(panic_machine_check32, $(T_MACHINE_CHECK))
+	hlt
+
+#if MACH_KDB
+#include <i386/lapic.h>
+#define CX(addr,reg)	addr(,reg,4)
+#if	0
+/*
+ * Note that the per-fault entry points are not currently
+ * functional.  The only way to make them work would be to
+ * set up separate TSS's for each fault type, which doesn't
+ * currently seem worthwhile.  (The offset part of a task
+ * gate is always ignored.)  So all faults that task switch
+ * currently resume at db_task_start.
+ */
+/*
+ * Double fault (Murphy's point) - error code (0) on stack
+ */
+Entry(db_task_dbl_fault)
+	popl	%eax
+	movl	$(T_DOUBLE_FAULT),%ebx
+	jmp	db_task_start
+/*
+ * Segment not present - error code on stack
+ */
+Entry(db_task_seg_np)
+	popl	%eax
+	movl	$(T_SEGMENT_NOT_PRESENT),%ebx
+	jmp	db_task_start
+/*
+ * Stack fault - error code on (current) stack
+ */
+Entry(db_task_stk_fault)
+	popl	%eax
+	movl	$(T_STACK_FAULT),%ebx
+	jmp	db_task_start
+/*
+ * General protection fault - error code on stack
+ */
+Entry(db_task_gen_prot)
+	popl	%eax
+	movl	$(T_GENERAL_PROTECTION),%ebx
+	jmp	db_task_start
+#endif	/* 0 */
+/*
+ * The entry point where execution resumes after last-ditch debugger task
+ * switch.
+ */
+Entry(db_task_start)
+	movl	%esp,%edx
+	subl	$(ISS32_SIZE),%edx
+	movl	%edx,%esp		/* allocate x86_saved_state on stack */
+	movl	%eax,R32_ERR(%esp)
+	movl	%ebx,R32_TRAPNO(%esp)
+	pushl	%edx
+	CPU_NUMBER(%edx)
+	movl	CX(EXT(master_dbtss),%edx),%edx
+	movl	TSS_LINK(%edx),%eax
+	pushl	%eax			/* pass along selector of previous TSS */
+	call	EXT(db_tss_to_frame)
+	popl	%eax			/* get rid of TSS selector */
+	call	EXT(db_trap_from_asm)
+	addl	$0x4,%esp
+	/*
+	 * And now...?
+	 */
+	iret				/* ha, ha, ha... */
+#endif	/* MACH_KDB */
diff --git a/osfmk/i386/idt64.s b/osfmk/i386/idt64.s
index 8efe5ed44..4d91cb82f 100644
--- a/osfmk/i386/idt64.s
+++ b/osfmk/i386/idt64.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -31,13 +31,16 @@
 #include <mach_kdb.h>
 #include <i386/eflags.h>
 #include <i386/trap.h>
+#include <i386/rtclock_asm.h>
 #define _ARCH_I386_ASM_HELP_H_		/* Prevent inclusion of user header */
 #include <mach/i386/syscall_sw.h>
 #include <i386/postcode.h>
 #include <i386/proc_reg.h>
+#include <mach/exception_types.h>
+
 
 /*
- * Locore handlers.
+ * Low-memory compability-mode handlers.
  */
 #define	LO_ALLINTRS		EXT(lo_allintrs)
 #define	LO_ALLTRAPS		EXT(lo_alltraps)
@@ -79,8 +82,8 @@
 #define	EXCEP64_ERR(n,name)				 \
 	IDT64_ENTRY(name,0,K_INTR_GATE)			;\
 Entry(name)						;\
+	push	$(LO_ALLTRAPS)				;\
 	push	$(n)					;\
-	movl	$(LO_ALLTRAPS), 4(%rsp)			;\
 	jmp	L_enter_lohandler
 
 
@@ -92,8 +95,8 @@ Entry(name)						;\
 	IDT64_ENTRY(name,0,K_INTR_GATE)			;\
 Entry(name)						;\
 	push	$0					;\
+	push	$(LO_ALLTRAPS)				;\
 	push	$(n)					;\
-	movl	$(LO_ALLTRAPS), 4(%rsp)			;\
 	jmp	L_enter_lohandler
 
 	
@@ -106,8 +109,8 @@ Entry(name)						;\
 	IDT64_ENTRY(name,0,U_INTR_GATE)			;\
 Entry(name)						;\
 	push	$0					;\
+	push	$(LO_ALLTRAPS)				;\
 	push	$(n)					;\
-	movl	$(LO_ALLTRAPS), 4(%rsp)			;\
 	jmp	L_enter_lohandler
 
 
@@ -138,8 +141,8 @@ Entry(name)						;\
 	.align FALIGN					;\
 L_ ## n:						;\
 	push	$0					;\
+	push	$(LO_ALLINTRS)				;\
 	push	$(n)					;\
-	movl	$(LO_ALLINTRS), 4(%rsp)			;\
 	jmp	L_enter_lohandler
 
 
@@ -474,16 +477,17 @@ EXCEPTION64(0xff,t64_preempt)
  */
 
 	.code32
+
 /*
  * Control is passed here to return to the compatibility mode user.
  * At this stage we're in kernel space in compatibility mode
  * but we need to switch into 64-bit mode in the 4G-based trampoline
  * space before performing the iret.
  */ 
-Entry(lo64_ret_to_user)
+ret_to_user:
 	movl	%gs:CPU_ACTIVE_THREAD,%ecx
 
-	movl	ACT_PCB_IDS(%ecx),%eax	/* Obtain this thread's debug state */
+	movl	TH_PCB_IDS(%ecx),%eax	/* Obtain this thread's debug state */
 	cmpl	$0,%eax			/* Is there a debug register context? */
 	je	2f 			/* branch if not */
 	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
@@ -529,7 +533,7 @@ Entry(lo64_ret_to_user)
 	je	1f
 	/* flag the copyio engine state as WINDOWS_CLEAN */
 	mov	%gs:CPU_ACTIVE_THREAD,%eax
-	movl	$(WINDOWS_CLEAN),ACT_COPYIO_STATE(%eax)
+	movl	$(WINDOWS_CLEAN),TH_COPYIO_STATE(%eax)
 	mov	%rcx,%cr3               /* switch to user's address space */
 1:
 
@@ -551,7 +555,7 @@ Entry(lo64_ret_to_user)
 	jne	L_64bit_return
 	jmp	L_32bit_return
 
-Entry(lo64_ret_to_kernel)
+ret_to_kernel:
 	ENTER_64BIT_MODE()
 	ENTER_UBERSPACE()	
 
@@ -610,9 +614,9 @@ EXT(ret32_set_fs):
 EXT(ret32_set_gs):
 	movw	R32_GS(%rsp), %gs
 
-	add	$(ISC32_OFFSET)+8+8, %rsp	/* pop compat frame +
-						   trapno/trapfn and error */	
-        cmp	$(SYSENTER_CS),ISF64_CS-8-8(%rsp)
+	add	$(ISC32_OFFSET)+8+8+8, %rsp	/* pop compat frame +
+						   trapno, trapfn and error */	
+        cmp	$(SYSENTER_CS),ISF64_CS-8-8-8(%rsp)
 					/* test for fast entry/exit */
         je      L_fast_exit
 EXT(ret32_iret):
@@ -661,9 +665,9 @@ L_64bit_return:
 	mov	R64_RCX(%rsp), %rcx
 	mov	R64_RAX(%rsp), %rax
 
-	add	$(ISS64_OFFSET)+8+8, %rsp	/* pop saved state frame +
-						   trapno/trapfn and error */	
-        cmpl	$(SYSCALL_CS),ISF64_CS-8-8(%rsp)
+	add	$(ISS64_OFFSET)+8+8+8, %rsp	/* pop saved state frame +
+						   trapno, trapfn and error */	
+        cmpl	$(SYSCALL_CS),ISF64_CS-8-8-8(%rsp)
 					/* test for fast entry/exit */
         je      L_sysret
 EXT(ret64_iret):
@@ -676,9 +680,9 @@ L_sysret:
 	 *	r1	user rflags
 	 *	rsp	user stack pointer
 	 */
-	mov	ISF64_RIP-16(%rsp), %rcx
-	mov	ISF64_RFLAGS-16(%rsp), %r11
-	mov	ISF64_RSP-16(%rsp), %rsp
+	mov	ISF64_RIP-8-8-8(%rsp), %rcx
+	mov	ISF64_RFLAGS-8-8-8(%rsp), %r11
+	mov	ISF64_RSP-8-8-8(%rsp), %rsp
 	sysretq				/* return from system call */
 
 /*
@@ -704,8 +708,8 @@ Entry(hi64_unix_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
 L_unix_scall_continue:
 	push	%rax			/* save system call number */
+	push	$(LO_UNIX_SCALL)
 	push	$(UNIX_INT)
-	movl	$(LO_UNIX_SCALL), 4(%rsp)
 	jmp	L_32bit_enter_check
 
 	
@@ -713,8 +717,8 @@ Entry(hi64_mach_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
 L_mach_scall_continue:
 	push	%rax			/* save system call number */
+	push	$(LO_MACH_SCALL)
 	push	$(MACH_INT)
-	movl	$(LO_MACH_SCALL), 4(%rsp)
 	jmp	L_32bit_enter_check
 
 	
@@ -722,8 +726,8 @@ Entry(hi64_mdep_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
 L_mdep_scall_continue:
 	push	%rax			/* save system call number */
+	push	$(LO_MDEP_SCALL)
 	push	$(MACHDEP_INT)
-	movl	$(LO_MDEP_SCALL), 4(%rsp)
 	jmp	L_32bit_enter_check
 
 	
@@ -731,8 +735,8 @@ Entry(hi64_diag_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
 L_diag_scall_continue:
 	push	%rax			/* save system call number */
+	push	$(LO_DIAG_SCALL)
 	push	$(DIAG_INT)
-	movl	$(LO_DIAG_SCALL), 4(%rsp)
 	jmp	L_32bit_enter_check
 
 Entry(hi64_syscall)
@@ -794,6 +798,7 @@ Entry(hi64_sysenter)
 L_sysenter_continue:
 	push	%rdx			/* eip */
 	push	%rax			/* err/eax - syscall code */
+	push	$0
 	push	$(T_SYSENTER)
 	orl	$(EFL_IF), ISF64_RFLAGS(%rsp)
 	movl	$(LO_MACH_SCALL), ISF64_TRAPFN(%rsp)
@@ -830,6 +835,7 @@ EXT(hi64_sysenter_user_arg_copy):
 	/* Fall through to 32-bit handler */
 
 L_32bit_enter:
+	cld
 	/*
 	 * Make space for the compatibility save area.
 	 */
@@ -908,6 +914,12 @@ L_enter_lohandler2:
 	mov	%rcx, %cr3
 	mov	%rcx, %gs:CPU_ACTIVE_CR3
 2:
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* Get the active thread */
+	cmpl	$0, TH_PCB_IDS(%ecx)	/* Is there a debug register state? */
+	jz	21f
+	xor	%ecx, %ecx		/* If so, reset DR7 (the control) */
+	mov	%rcx, %dr7
+21:	
 	/*
 	 * Switch to compatibility mode.
 	 * Then establish kernel segments.
@@ -931,13 +943,8 @@ L_enter_lohandler2:
 	mov	$(CPU_DATA_GS), %eax
 	mov	%eax, %gs
 
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* Get the active thread */
-	cmpl	$0, ACT_PCB_IDS(%ecx)	/* Is there a debug register state? */
-	je	1f
-	movl	$0, %ecx		/* If so, reset DR7 (the control) */
-	movl	%ecx, %dr7
-1:
-	addl	$1,%gs:hwIntCnt(,%ebx,4)	// Bump the trap/intr count
+	incl	%gs:hwIntCnt(,%ebx,4)	/* Bump the trap/intr count */
+
 	/* Dispatch the designated lo handler */
 	jmp	*%edx
 
@@ -958,6 +965,7 @@ L_64bit_enter:
 	sub	$(ISS64_OFFSET), %rsp
 	movl	$(SS_64), SS_FLAVOR(%rsp)
 
+	cld
 	/*
 	 * Save segment regs
 	 */
@@ -1001,12 +1009,12 @@ L_64bit_enter_after_fault:
 	jmp	L_enter_lohandler2
 
 Entry(hi64_page_fault)
+	push	$(LO_ALLTRAPS)
 	push	$(T_PAGE_FAULT)
-	movl	$(LO_ALLTRAPS), 4(%rsp)
 	cmpl	$(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp)
 	jne	L_enter_lohandler
 	cmpl	$(EXT(hi64_sysenter_user_arg_copy)), ISF64_RIP(%rsp)
-	jne	L_enter_lohandler
+	jne	hi64_kernel_trap
 	mov	ISF64_RSP(%rsp), %rsp
 	jmp	L_32bit_enter
 
@@ -1019,8 +1027,8 @@ Entry(hi64_page_fault)
 Entry(hi64_debug)
 	swapgs				/* set %gs for cpu data */
 	push	$0			/* error code */
+	push	$(LO_ALLTRAPS)
 	push	$(T_DEBUG)
-	movl	$(LO_ALLTRAPS), ISF64_TRAPFN(%rsp)
 
 	testb	$3, ISF64_CS(%rsp)
 	jnz	L_enter_lohandler_continue
@@ -1052,7 +1060,7 @@ Entry(hi64_debug)
 	 * Interrupt stack frame has been pushed on the temporary stack.
 	 * We have to switch to pcb stack and copy eflags.
 	 */ 
-	add	$32,%rsp		/* remove trapno/trapfn/err/rip/cs */
+	add	$40,%rsp		/* remove trapno/trapfn/err/rip/cs */
 	push	%rcx			/* save %rcx - user stack pointer */
 	mov	32(%rsp),%rcx		/* top of intr stack -> pcb stack */
 	xchg	%rcx,%rsp		/* switch to pcb stack */
@@ -1066,8 +1074,8 @@ Entry(hi64_debug)
 
 Entry(hi64_double_fault)
 	swapgs				/* set %gs for cpu data */
+	push	$(LO_DOUBLE_FAULT)
 	push	$(T_DOUBLE_FAULT)
-	movl	$(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp)
 
 	cmpl	$(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp)
 	jne	L_enter_lohandler_continue	/* trap not in uber-space */
@@ -1084,96 +1092,133 @@ Entry(hi64_double_fault)
  * Check for a GP/NP fault in the kernel_return
  * sequence; if there, report it as a GP/NP fault on the user's instruction.
  *
- * rsp->     0:	trap code (NP or GP) and trap function
- *	     8:	segment number in error (error code)
- *	    16	rip
- *	    24	cs
- *	    32	rflags 
- *	    40	rsp
- *	    48	ss
- *	    56	old registers (trap is from kernel)
+ * rsp->     0 ISF64_TRAPNO:	trap code (NP or GP)
+ *	     8 ISF64_TRAPFN:	trap function
+ *	    16 ISF64_ERR:	segment number in error (error code)
+ *	    24 ISF64_RIP:	rip
+ *	    32 ISF64_CS:	cs
+ *	    40 ISF64_RFLAGS:	rflags 
+ *	    48 ISF64_RSP:	rsp
+ *	    56 ISF64_SS:	ss
+ *	    64 			old registers (trap is from kernel)
  */
 Entry(hi64_gen_prot)
+	push	$(LO_ALLTRAPS)
 	push	$(T_GENERAL_PROTECTION)
 	jmp	trap_check_kernel_exit	/* check for kernel exit sequence */
 
 Entry(hi64_stack_fault)
+	push	$(LO_ALLTRAPS)
 	push	$(T_STACK_FAULT)
 	jmp	trap_check_kernel_exit	/* check for kernel exit sequence */
 
 Entry(hi64_segnp)
+	push	$(LO_ALLTRAPS)
 	push	$(T_SEGMENT_NOT_PRESENT)
 					/* indicate fault type */
 trap_check_kernel_exit:
-	movl	$(LO_ALLTRAPS), 4(%rsp)
-	testb	$3,24(%rsp)
-	jnz	hi64_take_trap
+	testb	$3,ISF64_CS(%rsp)
+	jnz	L_enter_lohandler
 					/* trap was from kernel mode, so */
 					/* check for the kernel exit sequence */
-	cmpl	$(KERNEL_UBER_BASE_HI32), 16+4(%rsp)
-	jne	hi64_take_trap		/* trap not in uber-space */
+	cmpl	$(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp)
+	jne	L_enter_lohandler_continue	/* trap not in uber-space */
 
-	cmpl	$(EXT(ret32_iret)), 16(%rsp)
+	cmpl	$(EXT(ret32_iret)), ISF64_RIP(%rsp)
 	je	L_fault_iret32
-	cmpl	$(EXT(ret32_set_ds)), 16(%rsp)
+	cmpl	$(EXT(ret32_set_ds)), ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
-	cmpl	$(EXT(ret32_set_es)), 16(%rsp)
+	cmpl	$(EXT(ret32_set_es)), ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
-	cmpl	$(EXT(ret32_set_fs)), 16(%rsp)
+	cmpl	$(EXT(ret32_set_fs)), ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
-	cmpl	$(EXT(ret32_set_gs)), 16(%rsp)
+	cmpl	$(EXT(ret32_set_gs)), ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
 
-	cmpl	$(EXT(ret64_iret)), 16(%rsp)
+	cmpl	$(EXT(ret64_iret)), ISF64_RIP(%rsp)
 	je	L_fault_iret64
 
 	cmpl	$(EXT(hi64_sysenter_user_arg_copy)), ISF64_RIP(%rsp)
-	jne	hi64_take_trap
-	mov	ISF64_RSP(%rsp), %rsp
-	jmp	L_32bit_enter
-hi64_take_trap:
-	jmp	L_enter_lohandler
+	cmove	ISF64_RSP(%rsp), %rsp
+	je	L_32bit_enter
+
+hi64_kernel_trap:
+	/*
+	 * Here after taking an unexpected trap from kernel mode - perhaps
+	 * while running in the trampolines hereabouts.
+	 * Make sure we're not on the PCB stack, if so move to the kernel stack.
+	 * This is likely a fatal condition.
+	 * But first, try to be sure we have the kernel gs base active...
+	 */
+	cmpq	$0, %gs:CPU_THIS		/* test gs_base */
+	js	1f				/* -ve kernel addr, no swap */
+	swapgs					/* +ve user addr, swap */
+1:
+	movq	%rax, %gs:CPU_UBER_TMP		/* save %rax */
+	movq	%gs:CPU_UBER_ISF, %rax		/* PCB stack addr */
+	subq	%rsp, %rax
+	cmpq	$(PAGE_SIZE), %rax		/* current stack in PCB? */
+	movq	%gs:CPU_UBER_TMP, %rax		/* restore %rax */
+	ja	L_enter_lohandler_continue	/* stack not in PCB */
+
+	/*
+	 *  Here if %rsp is in the PCB
+	 *  Copy the interrupt stack frame from PCB stack to kernel stack
+	 */
+	movq	%gs:CPU_KERNEL_STACK, %rax	/* note: %rax restored below */
+	xchgq	%rax, %rsp
+	pushq	ISF64_SS(%rax)
+	pushq	ISF64_RSP(%rax)
+	pushq	ISF64_RFLAGS(%rax)
+	pushq	ISF64_CS(%rax)
+	pushq	ISF64_RIP(%rax)
+	pushq	ISF64_ERR(%rax)
+	pushq	ISF64_TRAPFN(%rax)
+	pushq	ISF64_TRAPNO(%rax)
+	movq	%gs:CPU_UBER_TMP, %rax		/* restore %rax */
+	jmp	L_enter_lohandler_continue
+
 
-		
 /*
  * GP/NP fault on IRET: CS or SS is in error.
  * All registers contain the user's values.
  *
  * on SP is
- *   0	trap number/function
- *   8	errcode
- *  16	rip
- *  24	cs
- *  32	rflags
- *  40	rsp
- *  48	ss			--> new trapno/trapfn
- *  56  (16-byte padding)	--> new errcode
- *  64	user rip
- *  72	user cs
- *  80	user rflags
- *  88	user rsp
- *  96  user ss
+ *   0 ISF64_TRAPNO:	trap code (NP or GP)
+ *   8 ISF64_TRAPFN:	trap function
+ *  16 ISF64_ERR:	segment number in error (error code)
+ *  24 ISF64_RIP:	rip
+ *  32 ISF64_CS:	cs
+ *  40 ISF64_RFLAGS:	rflags 
+ *  48 ISF64_RSP:	rsp
+ *  56 ISF64_SS:	ss  --> new new trapno/trapfn
+ *  64			pad --> new errcode
+ *  72			user rip
+ *  80			user cs
+ *  88			user rflags
+ *  96			user rsp
+ * 104 			user ss	(16-byte aligned)
  */
 L_fault_iret32:
-	mov	%rax, 16(%rsp)		/* save rax (we don`t need saved rip) */
-	mov	0(%rsp), %rax		/* get trap number */
-	mov	%rax, 48(%rsp)		/* put in user trap number */
-	mov	8(%rsp), %rax		/* get error code */
-	mov	%rax, 56(%rsp)		/* put in user errcode */
-	mov	16(%rsp), %rax		/* restore rax */
-	add	$48, %rsp		/* reset to original frame */
+	mov	%rax, ISF64_RIP(%rsp)	/* save rax (we don`t need saved rip) */
+	mov	ISF64_TRAPNO(%rsp), %rax
+	mov	%rax, ISF64_SS(%rsp)	/* put in user trap number */
+	mov	ISF64_ERR(%rsp), %rax
+	mov	%rax, 8+ISF64_SS(%rsp)	/* put in user errcode */
+	mov	ISF64_RIP(%rsp), %rax	/* restore rax */
+	add	$(ISF64_SS), %rsp	/* reset to original frame */
 					/* now treat as fault from user */
 	swapgs
 	jmp	L_32bit_enter
 
 L_fault_iret64:
-	mov	%rax, 16(%rsp)		/* save rax (we don`t need saved rip) */
-	mov	0(%rsp), %rax		/* get trap number */
-	mov	%rax, 48(%rsp)		/* put in user trap number */
-	mov	8(%rsp), %rax		/* get error code */
-	mov	%rax, 56(%rsp)		/* put in user errcode */
-	mov	16(%rsp), %rax		/* restore rax */
-	add	$48, %rsp		/* reset to original frame */
+	mov	%rax, ISF64_RIP(%rsp)	/* save rax (we don`t need saved rip) */
+	mov	ISF64_TRAPNO(%rsp), %rax
+	mov	%rax, ISF64_SS(%rsp)	/* put in user trap number */
+	mov	ISF64_ERR(%rsp), %rax
+	mov	%rax, 8+ISF64_SS(%rsp)	/* put in user errcode */
+	mov	ISF64_RIP(%rsp), %rax	/* restore rax */
+	add	$(ISF64_SS), %rsp	/* reset to original frame */
 					/* now treat as fault from user */
 	swapgs
 	jmp	L_64bit_enter
@@ -1183,9 +1228,9 @@ L_fault_iret64:
  * on the stack untouched since we didn't move the stack pointer.
  */
 L_32bit_fault_set_seg:
-	mov	0(%rsp), %rax		/* get trap number/function */
-	mov	8(%rsp), %rdx		/* get error code */
-	mov	40(%rsp), %rsp		/* reload stack prior to fault */
+	mov	ISF64_TRAPNO(%rsp), %rax
+	mov	ISF64_ERR(%rsp), %rdx
+	mov	ISF64_RSP(%rsp), %rsp	/* reload stack prior to fault */
 	mov	%rax,ISC32_TRAPNO(%rsp)
 	mov	%rdx,ISC32_ERR(%rsp)
 					/* now treat as fault from user */
@@ -1201,17 +1246,503 @@ L_32bit_fault_set_seg:
  * Fatal exception handlers:
  */
 Entry(db_task_dbl_fault64)
+	push	$(LO_DOUBLE_FAULT)
 	push	$(T_DOUBLE_FAULT)
-	movl	$(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp)
 	jmp	L_enter_lohandler	
 
 Entry(db_task_stk_fault64)
+	push	$(LO_DOUBLE_FAULT)
 	push	$(T_STACK_FAULT)
-	movl	$(LO_DOUBLE_FAULT), ISF64_TRAPFN(%rsp)
 	jmp	L_enter_lohandler	
 
 Entry(mc64)
 	push	$(0)			/* Error */
+	push	$(LO_MACHINE_CHECK)
 	push	$(T_MACHINE_CHECK)
-	movl	$(LO_MACHINE_CHECK), ISF64_TRAPFN(%rsp)
 	jmp	L_enter_lohandler	
+
+
+	.code32
+
+/*
+ * All task 'exceptions' enter lo_alltraps:
+ *	esp	-> x86_saved_state_t
+ * 
+ * The rest of the state is set up as:	
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+Entry(lo_alltraps)
+	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
+	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
+	jne	1f
+	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
+1:
+	testb	$3,%al
+	jz	trap_from_kernel
+						/* user mode trap */
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx
+	movl	TH_TASK(%ecx),%ebx
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	movl	%gs:CPU_KERNEL_STACK,%ebx
+	xchgl	%ebx,%esp		/* switch to kernel stack */
+
+	CCALL1(user_trap, %ebx)		/* call user trap routine */
+	/* user_trap() unmasks interrupts */
+	cli				/* hold off intrs - critical section */
+	xorl	%ecx,%ecx		/* don't check if we're in the PFZ */
+	
+/*
+ * Return from trap or system call, checking for ASTs.
+ * On lowbase PCB stack with intrs disabled
+ */	
+Entry(return_from_trap)
+	movl	%gs:CPU_ACTIVE_THREAD, %esp
+	movl	TH_PCB_ISS(%esp),%esp	/* switch back to PCB stack */
+	movl	%gs:CPU_PENDING_AST, %eax
+	testl	%eax, %eax
+	je	return_to_user		/* branch if no AST */
+LEXT(return_from_trap_with_ast)
+	movl	%gs:CPU_KERNEL_STACK, %ebx
+	xchgl	%ebx, %esp		/* switch to kernel stack */
+
+	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
+	je	2f			/* no, go handle the AST */
+	cmpl	$(SS_64), SS_FLAVOR(%ebx)	/* are we a 64-bit task? */
+	je	1f
+					/* no... 32-bit user mode */
+	movl	R32_EIP(%ebx), %eax
+	pushl	%ebx			/* save PCB stack */
+	xorl	%ebp, %ebp		/* clear frame pointer */
+	CCALL1(commpage_is_in_pfz32, %eax)
+	popl	%ebx			/* retrieve pointer to PCB stack */
+	testl	%eax, %eax
+	je	2f			/* not in the PFZ... go service AST */
+	movl	%eax, R32_EBX(%ebx)	/* let the PFZ know we've pended an AST */
+	xchgl	%ebx, %esp		/* switch back to PCB stack */
+	jmp	return_to_user
+1:					/* 64-bit user mode */
+	movl	R64_RIP(%ebx), %ecx
+	movl	R64_RIP+4(%ebx), %eax
+	pushl	%ebx			/* save PCB stack */
+	xorl	%ebp, %ebp		/* clear frame pointer */
+	CCALL2(commpage_is_in_pfz64, %ecx, %eax)
+	popl	%ebx			/* retrieve pointer to PCB stack */
+	testl	%eax, %eax		
+	je	2f			/* not in the PFZ... go service AST */
+	movl	%eax, R64_RBX(%ebx)	/* let the PFZ know we've pended an AST */
+	xchgl	%ebx, %esp		/* switch back to PCB stack */
+	jmp	return_to_user
+2:	
+	sti				/* interrupts always enabled on return to user mode */
+	pushl	%ebx			/* save PCB stack */
+	xorl	%ebp, %ebp		/* Clear framepointer */
+	CCALL1(i386_astintr, $0)	/* take the AST */
+	cli
+	
+	popl	%esp			/* switch back to PCB stack (w/exc link) */
+
+	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
+	jmp	EXT(return_from_trap)	/* and check again (rare) */
+
+
+
+/*
+ * Trap from kernel mode.  No need to switch stacks.
+ * Interrupts must be off here - we will set them to state at time of trap
+ * as soon as it's safe for us to do so and not recurse doing preemption
+ */
+trap_from_kernel:
+	movl	%esp, %eax		/* saved state addr */
+	pushl	R32_EIP(%esp)		/* Simulate a CALL from fault point */
+	pushl   %ebp			/* Extend framepointer chain */
+	movl	%esp, %ebp
+	CCALL1WITHSP(kernel_trap, %eax)	/* Call kernel trap handler */
+	popl	%ebp
+	addl	$4, %esp
+	cli
+
+	movl	%gs:CPU_PENDING_AST,%eax		/* get pending asts */
+	testl	$ AST_URGENT,%eax	/* any urgent preemption? */
+	je	ret_to_kernel			/* no, nothing to do */
+	cmpl	$ T_PREEMPT,R32_TRAPNO(%esp)
+	je	ret_to_kernel			  /* T_PREEMPT handled in kernel_trap() */
+	testl	$ EFL_IF,R32_EFLAGS(%esp)		/* interrupts disabled? */
+	je	ret_to_kernel
+	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		/* preemption disabled? */
+	jne	ret_to_kernel
+	movl	%gs:CPU_KERNEL_STACK,%eax
+	movl	%esp,%ecx
+	xorl	%eax,%ecx
+	and	EXT(kernel_stack_mask),%ecx
+	testl	%ecx,%ecx		/* are we on the kernel stack? */
+	jne	ret_to_kernel		/* no, skip it */
+
+	CCALL1(i386_astintr, $1)	/* take the AST */
+
+
+/*
+ * All interrupts on all tasks enter here with:
+ *	esp->	 -> x86_saved_state_t
+ *
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+Entry(lo_allintrs)
+	/*
+	 * test whether already on interrupt stack
+	 */
+	movl	%gs:CPU_INT_STACK_TOP,%ecx
+	cmpl	%esp,%ecx
+	jb	1f
+	leal	-INTSTACK_SIZE(%ecx),%edx
+	cmpl	%esp,%edx
+	jb	int_from_intstack
+1:	
+	xchgl	%ecx,%esp		/* switch to interrupt stack */
+
+	movl	%cr0,%eax		/* get cr0 */
+	orl	$(CR0_TS),%eax		/* or in TS bit */
+	movl	%eax,%cr0		/* set cr0 */
+
+	subl	$8, %esp		/* for 16-byte stack alignment */
+	pushl	%ecx			/* save pointer to old stack */
+	movl	%ecx,%gs:CPU_INT_STATE	/* save intr state */
+	
+	TIME_INT_ENTRY			/* do timing */
+
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx
+	movl	TH_TASK(%ecx),%ebx
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	incl	%gs:CPU_PREEMPTION_LEVEL
+	incl	%gs:CPU_INTERRUPT_LEVEL
+
+	movl	%gs:CPU_INT_STATE, %eax
+	CCALL1(interrupt, %eax)		/* call generic interrupt routine */
+
+	cli				/* just in case we returned with intrs enabled */
+	xorl	%eax,%eax
+	movl	%eax,%gs:CPU_INT_STATE	/* clear intr state pointer */
+
+	decl	%gs:CPU_INTERRUPT_LEVEL
+	decl	%gs:CPU_PREEMPTION_LEVEL
+
+	TIME_INT_EXIT			/* do timing */
+
+	movl	%gs:CPU_ACTIVE_THREAD,%eax
+	movl	TH_PCB_FPS(%eax),%eax	/* get pcb's ifps */
+	testl	%eax, %eax		/* Is there a context */
+	je	1f			/* Branch if not */
+	cmpl	$0, FP_VALID(%eax)	/* Check fp_valid */
+	jne	1f			/* Branch if valid */
+	clts				/* Clear TS */
+	jmp	2f
+1:
+	movl	%cr0,%eax		/* get cr0 */
+	orl	$(CR0_TS),%eax		/* or in TS bit */
+	movl	%eax,%cr0		/* set cr0 */
+2:
+	popl	%esp			/* switch back to old stack */
+
+	/* Load interrupted code segment into %eax */
+	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
+	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
+	jne	3f
+	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
+3:
+	testb	$3,%al			/* user mode, */
+	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
+	/*
+	 * we only want to handle preemption requests if
+	 * the interrupt fell in the kernel context
+	 * and preemption isn't disabled
+	 */
+	movl	%gs:CPU_PENDING_AST,%eax	
+	testl	$ AST_URGENT,%eax		/* any urgent requests? */
+	je	ret_to_kernel			/* no, nothing to do */
+
+	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
+	jne	ret_to_kernel			/* yes, skip it */
+
+	movl	%gs:CPU_KERNEL_STACK,%eax
+	movl	%esp,%ecx
+	xorl	%eax,%ecx
+	and	EXT(kernel_stack_mask),%ecx
+	testl	%ecx,%ecx			/* are we on the kernel stack? */
+	jne	ret_to_kernel			/* no, skip it */
+
+	/*
+	 * Take an AST from kernel space.  We don't need (and don't want)
+	 * to do as much as the case where the interrupt came from user
+	 * space.
+	 */
+	CCALL1(i386_astintr, $1)
+
+	jmp	ret_to_kernel
+
+
+/*
+ * nested int - simple path, can't preempt etc on way out
+ */
+int_from_intstack:
+	incl	%gs:CPU_PREEMPTION_LEVEL
+	incl	%gs:CPU_INTERRUPT_LEVEL
+	incl	%gs:CPU_NESTED_ISTACK
+
+	movl	%esp, %edx		/* x86_saved_state */
+	CCALL1(interrupt, %edx)
+
+	decl	%gs:CPU_INTERRUPT_LEVEL
+	decl	%gs:CPU_PREEMPTION_LEVEL
+	decl	%gs:CPU_NESTED_ISTACK
+
+	jmp	ret_to_kernel
+
+/*
+ *	Take an AST from an interrupted user
+ */
+ast_from_interrupt_user:
+	movl	%gs:CPU_PENDING_AST,%eax
+	testl	%eax,%eax		/* pending ASTs? */
+	je	ret_to_user		/* no, nothing to do */
+
+	TIME_TRAP_UENTRY
+
+	movl	$1, %ecx		/* check if we're in the PFZ */
+	jmp	EXT(return_from_trap_with_ast)	/* return */
+
+
+/*
+ * 32bit Tasks
+ * System call entries via INTR_GATE or sysenter:
+ *
+ *	esp	 -> x86_saved_state32_t
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+
+Entry(lo_unix_scall)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+	incl	TH_SYSCALLS_UNIX(%ecx)		/* increment call count   */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	sti
+
+	CCALL1(unix_syscall, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_mach_scall)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+	incl	TH_SYSCALLS_MACH(%ecx)		/* increment call count   */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	sti
+
+	CCALL1(mach_call_munger, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_mdep_scall)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+	
+	sti
+
+	CCALL1(machdep_syscall, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo_diag_scall)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	pushl	%edi			/* push pbc stack for later */
+
+	CCALL1(diagCall, %edi)		// Call diagnostics
+	
+	cli				// Disable interruptions just in case
+	cmpl	$0,%eax			// What kind of return is this?
+	je	1f			// - branch if bad (zero)
+	popl	%esp			// Get back the original stack
+	jmp	return_to_user		// Normal return, do not check asts...
+1:
+	CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
+		// pass what would be the diag syscall
+		// error return - cause an exception
+	/* no return */
+
+
+return_to_user:
+	TIME_TRAP_UEXIT
+	jmp	ret_to_user
+	
+
+/*
+ * 64bit Tasks
+ * System call entries via syscall only:
+ *
+ *	esp	 -> x86_saved_state64_t
+ *	cr3	 -> kernel directory
+ *	esp	 -> low based stack
+ *	gs	 -> CPU_DATA_GS
+ *	cs	 -> KERNEL32_CS
+ *	ss/ds/es -> KERNEL_DS
+ *
+ *	interrupts disabled
+ *	direction flag cleared
+ */
+
+Entry(lo_syscall)
+	TIME_TRAP_UENTRY
+
+	movl	%gs:CPU_KERNEL_STACK,%edi
+	xchgl	%edi,%esp			/* switch to kernel stack */
+
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
+	movl	TH_TASK(%ecx),%ebx		/* point to current task  */
+
+	/* Check for active vtimers in the current task */
+	TASK_VTIMER_CHECK(%ebx, %ecx)
+
+	/*
+	 * We can be here either for a mach, unix machdep or diag syscall,
+	 * as indicated by the syscall class:
+	 */
+	movl	R64_RAX(%edi), %eax		/* syscall number/class */
+	movl	%eax, %edx
+	andl	$(SYSCALL_CLASS_MASK), %edx	/* syscall class */
+	cmpl	$(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
+	je	EXT(lo64_mach_scall)
+	cmpl	$(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
+	je	EXT(lo64_unix_scall)
+	cmpl	$(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
+	je	EXT(lo64_mdep_scall)
+	cmpl	$(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
+	je	EXT(lo64_diag_scall)
+
+	sti
+
+	/* Syscall class unknown */
+	CCALL5(i386_exception, $(EXC_SYSCALL), %eax, $0, $1, $0)
+	/* no return */
+
+
+Entry(lo64_unix_scall)
+	incl	TH_SYSCALLS_UNIX(%ecx)		/* increment call count   */
+	sti
+
+	CCALL1(unix_syscall64, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo64_mach_scall)
+	incl	TH_SYSCALLS_MACH(%ecx)		/* increment call count   */
+	sti
+
+	CCALL1(mach_call_munger64, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+
+Entry(lo64_mdep_scall)
+	sti
+
+	CCALL1(machdep_syscall64, %edi)
+	/*
+	 * always returns through thread_exception_return
+	 */
+
+
+Entry(lo64_diag_scall)
+	CCALL1(diagCall64, %edi)	// Call diagnostics
+		
+	cli				// Disable interruptions just in case
+	cmpl	$0,%eax			// What kind of return is this?
+	je	1f
+	movl	%edi, %esp		// Get back the original stack
+	jmp	return_to_user		// Normal return, do not check asts...
+1:	
+	CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
+		// pass what would be the diag syscall
+		// error return - cause an exception
+	/* no return */
+
+
+	
+/*
+ * Compatibility mode's last gasp...
+ */
+Entry(lo_df64)
+	movl	%esp, %eax
+	CCALL1(panic_double_fault64, %eax)
+	hlt
+
+Entry(lo_mc64)
+	movl	%esp, %eax
+	CCALL1(panic_machine_check64, %eax)
+	hlt
diff --git a/osfmk/i386/ipl.h b/osfmk/i386/ipl.h
deleted file mode 100644
index 76c8c2d1d..000000000
--- a/osfmk/i386/ipl.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/* 
- */
-
-/*
-Copyright (c) 1988,1989 Prime Computer, Inc.  Natick, MA 01760
-All Rights Reserved.
-
-Permission to use, copy, modify, and distribute this
-software and its documentation for any purpose and
-without fee is hereby granted, provided that the above
-copyright notice appears in all copies and that both the
-copyright notice and this permission notice appear in
-supporting documentation, and that the name of Prime
-Computer, Inc. not be used in advertising or publicity
-pertaining to distribution of the software without
-specific, written prior permission.
-
-THIS SOFTWARE IS PROVIDED "AS IS", AND PRIME COMPUTER,
-INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
-SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN
-NO EVENT SHALL PRIME COMPUTER, INC.  BE LIABLE FOR ANY
-SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
-DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
-PROFITS, WHETHER IN ACTION OF CONTRACT, NEGLIGENCE, OR
-OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION
-WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*/
-
-#include <mach_kprof.h>
-
-#define SPL0            0
-#define SPL1            1
-#define SPL2            2
-#define SPL3            3
-#define SPL4            4
-#define SPL5            5
-#define SPL6            6
-
-#define SPLPP           5
-#define SPLTTY          6
-#define SPLNI           6
-
-#define IPLHI           8
-#define SPLHI           IPLHI
-
-#if	MACH_KPROF
-#define	SPL7		7
-#else
-#define SPL7		IPLHI
-#endif
-
-#define	SPL_CMP_GT(a, b)	((unsigned)(a) >  (unsigned)(b))
-#define	SPL_CMP_LT(a, b)	((unsigned)(a) <  (unsigned)(b))
-#define	SPL_CMP_GE(a, b)	((unsigned)(a) >= (unsigned)(b))
-#define	SPL_CMP_LE(a, b)	((unsigned)(a) <= (unsigned)(b))
-
diff --git a/osfmk/i386/lapic.c b/osfmk/i386/lapic.c
index e98665f04..b365d6070 100644
--- a/osfmk/i386/lapic.c
+++ b/osfmk/i386/lapic.c
@@ -32,16 +32,6 @@
 #include <mach/mach_types.h>
 #include <mach/kern_return.h>
 
-#include <kern/kern_types.h>
-#include <kern/cpu_number.h>
-#include <kern/cpu_data.h>
-#include <kern/assert.h>
-#include <kern/machine.h>
-#include <kern/debug.h>
-
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-
 #include <i386/lapic.h>
 #include <i386/cpuid.h>
 #include <i386/proc_reg.h>
@@ -52,53 +42,17 @@
 #include <i386/cpu_threads.h>
 #include <i386/machine_routines.h>
 #include <i386/tsc.h>
-#if CONFIG_MCA
-#include <i386/machine_check.h>
-#endif
-
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif
-
-#if MACH_KDB
-#include <machine/db_machdep.h>
-#endif
 
 #include <sys/kdebug.h>
 
-#if	MP_DEBUG
-#define PAUSE		delay(1000000)
-#define DBG(x...)	kprintf(x)
-#else
-#define DBG(x...)
-#define PAUSE
-#endif	/* MP_DEBUG */
-
 /* Base vector for local APIC interrupt sources */
 int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
 
-lapic_ops_table_t	*lapic_ops;	/* Lapic operations switch */
-
 #define		MAX_LAPICIDS	(LAPIC_ID_MAX+1)
 int		lapic_to_cpu[MAX_LAPICIDS];
 int		cpu_to_lapic[MAX_CPUS];
 
-static vm_offset_t	lapic_pbase;	/* Physical base memory-mapped regs */
-static vm_offset_t	lapic_vbase;	/* Virtual base memory-mapped regs */
-
-static i386_intr_func_t	lapic_intr_func[LAPIC_FUNC_TABLE_SIZE];
-
-/* TRUE if local APIC was enabled by the OS not by the BIOS */
-static boolean_t lapic_os_enabled = FALSE;
-
-static boolean_t lapic_errors_masked = FALSE;
-static uint64_t lapic_last_master_error = 0;
-static uint64_t lapic_error_time_threshold = 0;
-static unsigned lapic_master_error_count = 0;
-static unsigned lapic_error_count_threshold = 5;
-static boolean_t lapic_dont_panic = FALSE;
-
-static void
+void
 lapic_cpu_map_init(void)
 {
 	int	i;
@@ -146,821 +100,3 @@ ml_get_cpuid(uint32_t lapic_index)
 	return (uint32_t)lapic_to_cpu[lapic_index];
 
 }
-
-#ifdef MP_DEBUG
-void
-lapic_cpu_map_dump(void)
-{
-	int	i;
-
-	for (i = 0; i < MAX_CPUS; i++) {
-		if (cpu_to_lapic[i] == -1)
-			continue;
-		kprintf("cpu_to_lapic[%d]: %d\n",
-			i, cpu_to_lapic[i]);
-	}
-	for (i = 0; i < MAX_LAPICIDS; i++) {
-		if (lapic_to_cpu[i] == -1)
-			continue;
-		kprintf("lapic_to_cpu[%d]: %d\n",
-			i, lapic_to_cpu[i]);
-	}
-}
-#endif /* MP_DEBUG */
-
-static void
-legacy_init(void)
-{
-	int		result;
-	vm_map_entry_t	entry;
-
-	/* Establish a map to the local apic */
-	lapic_vbase = (vm_offset_t)vm_map_min(kernel_map);
-	result = vm_map_find_space(kernel_map,
-				   (vm_map_address_t *) &lapic_vbase,
-				   round_page(LAPIC_SIZE), 0,
-				   VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
-	if (result != KERN_SUCCESS) {
-		panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
-	}
-	vm_map_unlock(kernel_map);
-/* Map in the local APIC non-cacheable, as recommended by Intel
- * in section 8.4.1 of the "System Programming Guide".
- */
-	pmap_enter(pmap_kernel(),
-			lapic_vbase,
-			(ppnum_t) i386_btop(lapic_pbase),
-			VM_PROT_READ|VM_PROT_WRITE,
-			VM_WIMG_IO,
-			TRUE);
-}
-
-
-static uint32_t
-legacy_read(lapic_register_t reg)
-{
-	return  *LAPIC_MMIO(reg);
-}
-
-static void
-legacy_write(lapic_register_t reg, uint32_t value)
-{
-	*LAPIC_MMIO(reg) = value;
-}
-
-static lapic_ops_table_t legacy_ops = {
-	legacy_init,
-	legacy_read,
-	legacy_write
-};
-
-static void
-x2apic_init(void)
-{
-}
-
-static uint32_t
-x2apic_read(lapic_register_t reg)
-{
-	uint32_t	lo;
-	uint32_t	hi;
-
-	rdmsr(LAPIC_MSR(reg), lo, hi);
-	return lo;
-}
-
-static void
-x2apic_write(lapic_register_t reg, uint32_t value)
-{
-	wrmsr(LAPIC_MSR(reg), value, 0);
-}
-
-static lapic_ops_table_t x2apic_ops = {
-	x2apic_init,
-	x2apic_read,
-	x2apic_write
-};
-
-
-void
-lapic_init(void)
-{
-	uint32_t	lo;
-	uint32_t	hi;
-	boolean_t	is_boot_processor;
-	boolean_t	is_lapic_enabled;
-	boolean_t	is_x2apic;
-
-	/* Examine the local APIC state */
-	rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-	is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
-	is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
-	is_x2apic         = (lo & MSR_IA32_APIC_BASE_EXTENDED) != 0;
-	lapic_pbase = (lo &  MSR_IA32_APIC_BASE_BASE);
-	kprintf("MSR_IA32_APIC_BASE %p %s %s mode %s\n", (void *) lapic_pbase,
-		is_lapic_enabled ? "enabled" : "disabled",
-		is_x2apic ? "extended" : "legacy",
-		is_boot_processor ? "BSP" : "AP");
-	if (!is_boot_processor || !is_lapic_enabled)
-		panic("Unexpected local APIC state\n");
-
-	lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops;
-
-	lapic_ops->init();
-
-	if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
-		panic("Local APIC version 0x%x, 0x14 or more expected\n",
-			(LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
-	}
-
-	/* Set up the lapic_id <-> cpu_number map and add this boot processor */
-	lapic_cpu_map_init();
-	lapic_cpu_map((LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
-	kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
-}
-
-
-static int
-lapic_esr_read(void)
-{
-	/* write-read register */
-	LAPIC_WRITE(ERROR_STATUS, 0);
-	return LAPIC_READ(ERROR_STATUS);
-}
-
-static void 
-lapic_esr_clear(void)
-{
-	LAPIC_WRITE(ERROR_STATUS, 0);
-	LAPIC_WRITE(ERROR_STATUS, 0);
-}
-
-static const char *DM_str[8] = {
-	"Fixed",
-	"Lowest Priority",
-	"Invalid",
-	"Invalid",
-	"NMI",
-	"Reset",
-	"Invalid",
-	"ExtINT"};
-
-static const char *TMR_str[] = {
-	"OneShot",
-	"Periodic",
-	"TSC-Deadline",
-	"Illegal"
-};
-
-void
-lapic_dump(void)
-{
-	int	i;
-
-#define BOOL(a) ((a)?' ':'!')
-#define VEC(lvt) \
-	LAPIC_READ(lvt)&LAPIC_LVT_VECTOR_MASK
-#define	DS(lvt)	\
-	(LAPIC_READ(lvt)&LAPIC_LVT_DS_PENDING)?" SendPending" : "Idle"
-#define DM(lvt) \
-	DM_str[(LAPIC_READ(lvt)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK]
-#define MASK(lvt) \
-	BOOL(LAPIC_READ(lvt)&LAPIC_LVT_MASKED)
-#define TM(lvt) \
-	(LAPIC_READ(lvt)&LAPIC_LVT_TM_LEVEL)? "Level" : "Edge"
-#define IP(lvt) \
-	(LAPIC_READ(lvt)&LAPIC_LVT_IP_PLRITY_LOW)? "Low " : "High"
-
-	kprintf("LAPIC %d at %p version 0x%x\n", 
-		(LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
-		(void *) lapic_vbase,
-		LAPIC_READ(VERSION)&LAPIC_VERSION_MASK);
-	kprintf("Priorities: Task 0x%x  Arbitration 0x%x  Processor 0x%x\n",
-		LAPIC_READ(TPR)&LAPIC_TPR_MASK,
-		LAPIC_READ(APR)&LAPIC_APR_MASK,
-		LAPIC_READ(PPR)&LAPIC_PPR_MASK);
-	kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
-		LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
-		LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
-	kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
-		BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
-		BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)),
-		LAPIC_READ(SVR) & LAPIC_SVR_MASK);
-#if CONFIG_MCA
-	if (mca_is_cmci_present())
-		kprintf("LVT_CMCI:    Vector 0x%02x [%s] %s %cmasked\n",
-			VEC(LVT_CMCI),
-			DM(LVT_CMCI),
-			DS(LVT_CMCI),
-			MASK(LVT_CMCI));
-#endif
-	kprintf("LVT_TIMER:   Vector 0x%02x %s %cmasked %s\n",
-		VEC(LVT_TIMER),
-		DS(LVT_TIMER),
-		MASK(LVT_TIMER),
-		TMR_str[(LAPIC_READ(LVT_TIMER) >> LAPIC_LVT_TMR_SHIFT)
-                                               &  LAPIC_LVT_TMR_MASK]);
-	kprintf("  Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT));
-	kprintf("  Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT));
-	kprintf("  Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG));
-	kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
-		VEC(LVT_PERFCNT),
-		DM(LVT_PERFCNT),
-		DS(LVT_PERFCNT),
-		MASK(LVT_PERFCNT));
-	kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
-		VEC(LVT_THERMAL),
-		DM(LVT_THERMAL),
-		DS(LVT_THERMAL),
-		MASK(LVT_THERMAL));
-	kprintf("LVT_LINT0:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
-		VEC(LVT_LINT0),
-		DM(LVT_LINT0),
-		TM(LVT_LINT0),
-		IP(LVT_LINT0),
-		DS(LVT_LINT0),
-		MASK(LVT_LINT0));
-	kprintf("LVT_LINT1:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
-		VEC(LVT_LINT1),
-		DM(LVT_LINT1),
-		TM(LVT_LINT1),
-		IP(LVT_LINT1),
-		DS(LVT_LINT1),
-		MASK(LVT_LINT1));
-	kprintf("LVT_ERROR:   Vector 0x%02x %s %cmasked\n",
-		VEC(LVT_ERROR),
-		DS(LVT_ERROR),
-		MASK(LVT_ERROR));
-	kprintf("ESR: %08x \n", lapic_esr_read());
-	kprintf("       ");
-	for(i=0xf; i>=0; i--)
-		kprintf("%x%x%x%x",i,i,i,i);
-	kprintf("\n");
-	kprintf("TMR: 0x");
-	for(i=7; i>=0; i--)
-		kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i));
-	kprintf("\n");
-	kprintf("IRR: 0x");
-	for(i=7; i>=0; i--)
-		kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i));
-	kprintf("\n");
-	kprintf("ISR: 0x");
-	for(i=7; i >= 0; i--)
-		kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i));
-	kprintf("\n");
-}
-
-#if MACH_KDB
-/*
- *	Displays apic junk
- *
- *	da
- */
-void 
-db_apic(__unused db_expr_t addr,
-	__unused int have_addr,
-	__unused db_expr_t count,
-	__unused char *modif)
-{
-
-	lapic_dump();
-
-	return;
-}
-
-#endif
-
-boolean_t
-lapic_probe(void)
-{
-	uint32_t	lo;
-	uint32_t	hi;
-
-	if (cpuid_features() & CPUID_FEATURE_APIC)
-		return TRUE;
-
-	if (cpuid_family() == 6 || cpuid_family() == 15) {
-		/*
-		 * Mobile Pentiums:
-		 * There may be a local APIC which wasn't enabled by BIOS.
-		 * So we try to enable it explicitly.
-		 */
-		rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-		lo &= ~MSR_IA32_APIC_BASE_BASE;
-		lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
-		lo |= MSR_IA32_APIC_BASE_ENABLE;
-		wrmsr(MSR_IA32_APIC_BASE, lo, hi);
-
-		/*
-		 * Re-initialize cpu features info and re-check.
-		 */
-		cpuid_set_info();
-		if (cpuid_features() & CPUID_FEATURE_APIC) {
-			printf("Local APIC discovered and enabled\n");
-			lapic_os_enabled = TRUE;
-			lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
-			return TRUE;
-		}
-	}
-
-	return FALSE;
-}
-
-void
-lapic_shutdown(void)
-{
-	uint32_t lo;
-	uint32_t hi;
-	uint32_t value;
-
-	/* Shutdown if local APIC was enabled by OS */
-	if (lapic_os_enabled == FALSE)
-		return;
-
-	mp_disable_preemption();
-
-	/* ExtINT: masked */
-	if (get_cpu_number() == master_cpu) {
-		value = LAPIC_READ(LVT_LINT0);
-		value |= LAPIC_LVT_MASKED;
-		LAPIC_WRITE(LVT_LINT0, value);
-	}
-
-	/* Error: masked */
-	LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
-
-	/* Timer: masked */
-	LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) | LAPIC_LVT_MASKED);
-
-	/* Perfmon: masked */
-	LAPIC_WRITE(LVT_PERFCNT, LAPIC_READ(LVT_PERFCNT) | LAPIC_LVT_MASKED);
-
-	/* APIC software disabled */
-	LAPIC_WRITE(SVR, LAPIC_READ(SVR) & ~LAPIC_SVR_ENABLE);
-
-	/* Bypass the APIC completely and update cpu features */
-	rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-	lo &= ~MSR_IA32_APIC_BASE_ENABLE;
-	wrmsr(MSR_IA32_APIC_BASE, lo, hi);
-	cpuid_set_info();
-
-	mp_enable_preemption();
-}
-
-void
-lapic_configure(void)
-{
-	int	value;
-
-	if (lapic_error_time_threshold == 0 && cpu_number() == 0) {
-		nanoseconds_to_absolutetime(NSEC_PER_SEC >> 2, &lapic_error_time_threshold);
-		if (!PE_parse_boot_argn("lapic_dont_panic", &lapic_dont_panic, sizeof(lapic_dont_panic))) {
-			lapic_dont_panic = FALSE;
-		}
-	}
-
-	/* Set flat delivery model, logical processor id */
-	LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
-	LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
-
-	/* Accept all */
-	LAPIC_WRITE(TPR, 0);
-
-	LAPIC_WRITE(SVR, LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE);
-
-	/* ExtINT */
-	if (get_cpu_number() == master_cpu) {
-		value = LAPIC_READ(LVT_LINT0);
-		value &= ~LAPIC_LVT_MASKED;
-		value |= LAPIC_LVT_DM_EXTINT;
-		LAPIC_WRITE(LVT_LINT0, value);
-	}
-
-	/* Timer: unmasked, one-shot */
-	LAPIC_WRITE(LVT_TIMER, LAPIC_VECTOR(TIMER));
-
-	/* Perfmon: unmasked */
-	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
-
-	/* Thermal: unmasked */
-	LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL));
-
-#if CONFIG_MCA
-	/* CMCI, if available */
-	if (mca_is_cmci_present())
-		LAPIC_WRITE(LVT_CMCI, LAPIC_VECTOR(CMCI));
-#endif
-
-	if (((cpu_number() == master_cpu) && lapic_errors_masked == FALSE) ||
-		(cpu_number() != master_cpu)) {
-		lapic_esr_clear();
-		LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
-	}
-}
-
-void
-lapic_set_timer(
-	boolean_t		interrupt_unmasked,
-	lapic_timer_mode_t	mode,
-	lapic_timer_divide_t	divisor,
-	lapic_timer_count_t	initial_count)
-{
-	uint32_t	timer_vector;
-
-	mp_disable_preemption();
-	timer_vector = LAPIC_READ(LVT_TIMER);
-	timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
-	timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
-	timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
-	LAPIC_WRITE(LVT_TIMER, timer_vector);
-	LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
-	LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
-	mp_enable_preemption();
-}
-
-void
-lapic_config_timer(
-	boolean_t		interrupt_unmasked,
-	lapic_timer_mode_t	mode,
-	lapic_timer_divide_t	divisor)
-{
-	uint32_t	timer_vector;
-
-	mp_disable_preemption();
-	timer_vector = LAPIC_READ(LVT_TIMER);
-	timer_vector &= ~(LAPIC_LVT_MASKED |
-			  LAPIC_LVT_PERIODIC |
-			  LAPIC_LVT_TSC_DEADLINE);
-	timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
-	timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
-	LAPIC_WRITE(LVT_TIMER, timer_vector);
-	LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
-	mp_enable_preemption();
-}
-
-/*
- * Configure TSC-deadline timer mode. The lapic interrupt is always unmasked.
- */
-void
-lapic_config_tsc_deadline_timer(void)
-{
-	uint32_t	timer_vector;
-
-	DBG("lapic_config_tsc_deadline_timer()\n");
-	mp_disable_preemption();
-	timer_vector = LAPIC_READ(LVT_TIMER);
-	timer_vector &= ~(LAPIC_LVT_MASKED |
-			  LAPIC_LVT_PERIODIC);
-	timer_vector |= LAPIC_LVT_TSC_DEADLINE;
-	LAPIC_WRITE(LVT_TIMER, timer_vector);
-
-	/* Serialize writes per Intel OSWG */
-	do {
-		lapic_set_tsc_deadline_timer(rdtsc64() + (1ULL<<32));
-	} while (lapic_get_tsc_deadline_timer() == 0);
-	lapic_set_tsc_deadline_timer(0);
-
-	mp_enable_preemption();
-	DBG("lapic_config_tsc_deadline_timer() done\n");
-}
-
-void
-lapic_set_timer_fast(
-	lapic_timer_count_t	initial_count)
-{
-	LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) & ~LAPIC_LVT_MASKED);
-	LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
-}
-
-void
-lapic_set_tsc_deadline_timer(uint64_t deadline)
-{
-	wrmsr64(MSR_IA32_TSC_DEADLINE, deadline);
-}
-
-uint64_t
-lapic_get_tsc_deadline_timer(void)
-{
-	return rdmsr64(MSR_IA32_TSC_DEADLINE);
-}
-
-void
-lapic_get_timer(
-	lapic_timer_mode_t	*mode,
-	lapic_timer_divide_t	*divisor,
-	lapic_timer_count_t	*initial_count,
-	lapic_timer_count_t	*current_count)
-{
-	mp_disable_preemption();
-	if (mode)
-		*mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
-				periodic : one_shot;
-	if (divisor)
-		*divisor = LAPIC_READ(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
-	if (initial_count)
-		*initial_count = LAPIC_READ(TIMER_INITIAL_COUNT);
-	if (current_count)
-		*current_count = LAPIC_READ(TIMER_CURRENT_COUNT);
-	mp_enable_preemption();
-} 
-
-static inline void
-_lapic_end_of_interrupt(void)
-{
-	LAPIC_WRITE(EOI, 0);
-}
-
-void
-lapic_end_of_interrupt(void)
-{
-	_lapic_end_of_interrupt();
-}
-
-void lapic_unmask_perfcnt_interrupt(void) {
-	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
-}
-
-void lapic_set_perfcnt_interrupt_mask(boolean_t mask) {
-	uint32_t m = (mask ? LAPIC_LVT_MASKED : 0);
-	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT) | m);
-}
-
-void
-lapic_set_intr_func(int vector, i386_intr_func_t func)
-{
-	if (vector > lapic_interrupt_base)
-		vector -= lapic_interrupt_base;
-
-	switch (vector) {
-	case LAPIC_NMI_INTERRUPT:
-	case LAPIC_INTERPROCESSOR_INTERRUPT:
-	case LAPIC_TIMER_INTERRUPT:
-	case LAPIC_THERMAL_INTERRUPT:
-	case LAPIC_PERFCNT_INTERRUPT:
-	case LAPIC_CMCI_INTERRUPT:
-	case LAPIC_PM_INTERRUPT:
-		lapic_intr_func[vector] = func;
-		break;
-	default:
-		panic("lapic_set_intr_func(%d,%p) invalid vector\n",
-			vector, func);
-	}
-}
-
-void	lapic_set_pmi_func(i386_intr_func_t func) {
-	lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
-}
-
-int
-lapic_interrupt(int interrupt_num, x86_saved_state_t *state)
-{
-	int	retval = 0;
-	int 	esr = -1;
-
-	interrupt_num -= lapic_interrupt_base;
-	if (interrupt_num < 0) {
-		if (interrupt_num == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) &&
-		    lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) {
-			retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state);
-			return retval;
-		}
-		else
-			return 0;
-	}
-
-	switch(interrupt_num) {
-	case LAPIC_TIMER_INTERRUPT:
-	case LAPIC_THERMAL_INTERRUPT:
-	case LAPIC_INTERPROCESSOR_INTERRUPT:
-	case LAPIC_PM_INTERRUPT:
-		if (lapic_intr_func[interrupt_num] != NULL)
-			(void) (*lapic_intr_func[interrupt_num])(state);
-		_lapic_end_of_interrupt();
-		retval = 1;
-		break;
-	case LAPIC_PERFCNT_INTERRUPT:
-		/* If a function has been registered, invoke it.  Otherwise,
-		 * pass up to IOKit.
-		 */
-		if (lapic_intr_func[interrupt_num] != NULL) {
-			(void) (*lapic_intr_func[interrupt_num])(state);
-			/* Unmask the interrupt since we don't expect legacy users
-			 * to be responsible for it.
-			 */
-			lapic_unmask_perfcnt_interrupt();
-			_lapic_end_of_interrupt();
-			retval = 1;
-		}
-		break;
-	case LAPIC_CMCI_INTERRUPT:
-		if (lapic_intr_func[interrupt_num] != NULL)
-			(void) (*lapic_intr_func[interrupt_num])(state);
-		/* return 0 for plaform expert to handle */
-		break;
-	case LAPIC_ERROR_INTERRUPT:
-		/* We treat error interrupts on APs as fatal.
-		 * The current interrupt steering scheme directs most
-		 * external interrupts to the BSP (HPET interrupts being
-		 * a notable exception); hence, such an error
-		 * on an AP may signify LVT corruption (with "may" being
-		 * the operative word). On the BSP, we adopt a more
-		 * lenient approach, in the interests of enhancing
-		 * debuggability and reducing fragility.
-		 * If "lapic_error_count_threshold" error interrupts
-		 * occur within "lapic_error_time_threshold" absolute
-		 * time units, we mask the error vector and log. The
-		 * error interrupts themselves are likely
-		 * side effects of issues which are beyond the purview of
-		 * the local APIC interrupt handler, however. The Error
-		 * Status Register value (the illegal destination
-		 * vector code is one observed in practice) indicates
-		 * the immediate cause of the error.
-		 */
-		esr = lapic_esr_read();
-		lapic_dump();
-
-		if ((debug_boot_arg && (lapic_dont_panic == FALSE)) ||
-			cpu_number() != master_cpu) {
-			panic("Local APIC error, ESR: %d\n", esr);
-		}
-
-		if (cpu_number() == master_cpu) {
-			uint64_t abstime = mach_absolute_time();
-			if ((abstime - lapic_last_master_error) < lapic_error_time_threshold) {
-				if (lapic_master_error_count++ > lapic_error_count_threshold) {
-					lapic_errors_masked = TRUE;
-					LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
-					printf("Local APIC: errors masked\n");
-				}
-			}
-			else {
-				lapic_last_master_error = abstime;
-				lapic_master_error_count = 0;
-			}
-			printf("Local APIC error on master CPU, ESR: %d, error count this run: %d\n", esr, lapic_master_error_count);
-		}
-
-		_lapic_end_of_interrupt();
-		retval = 1;
-		break;
-	case LAPIC_SPURIOUS_INTERRUPT:
-		kprintf("SPIV\n");
-		/* No EOI required here */
-		retval = 1;
-		break;
-	case LAPIC_PMC_SW_INTERRUPT: 
-		{
-#if CONFIG_COUNTERS
-			thread_t old, new;
-			ml_get_csw_threads(&old, &new);
-
-			if (pmc_context_switch(old, new) == TRUE) {
-				retval = 1;
-				/* No EOI required for SWI */
-			}
-#endif /* CONFIG_COUNTERS */
-		}
-		break;
-	}
-
-	return retval;
-}
-
-void
-lapic_smm_restore(void)
-{
-	boolean_t state;
-
-	if (lapic_os_enabled == FALSE)
-		return;
-
-	state = ml_set_interrupts_enabled(FALSE);
-
- 	if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
-		/*
-		 * Bogus SMI handler enables interrupts but does not know about
-		 * local APIC interrupt sources. When APIC timer counts down to
-		 * zero while in SMM, local APIC will end up waiting for an EOI
-		 * but no interrupt was delivered to the OS.
- 		 */
-		_lapic_end_of_interrupt();
-
-		/*
-		 * timer is one-shot, trigger another quick countdown to trigger
-		 * another timer interrupt.
-		 */
-		if (LAPIC_READ(TIMER_CURRENT_COUNT) == 0) {
-			LAPIC_WRITE(TIMER_INITIAL_COUNT, 1);
-		}
-
-		kprintf("lapic_smm_restore\n");
-	}
-
-	ml_set_interrupts_enabled(state);
-}
-
-void
-lapic_send_ipi(int cpu, int vector)
-{
-	boolean_t	state;
-
-	if (vector < lapic_interrupt_base)
-		vector += lapic_interrupt_base;
-
-	state = ml_set_interrupts_enabled(FALSE);
-
-	/* Wait for pending outgoing send to complete */
-	while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
-		cpu_pause();
-	}
-
-	LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
-	LAPIC_WRITE(ICR, vector | LAPIC_ICR_DM_FIXED);
-
-	(void) ml_set_interrupts_enabled(state);
-}
-
-/*
- * The following interfaces are privately exported to AICPM.
- */
-
-boolean_t
-lapic_is_interrupt_pending(void)
-{
-	int		i;
-
-	for (i = 0; i < 8; i += 1) {
-		if ((LAPIC_READ_OFFSET(IRR_BASE, i) != 0) ||
-		    (LAPIC_READ_OFFSET(ISR_BASE, i) != 0))
-			return (TRUE);
-	}
-
-	return (FALSE);
-}
-
-boolean_t
-lapic_is_interrupting(uint8_t vector)
-{
-	int		i;
-	int		bit;
-	uint32_t	irr;
-	uint32_t	isr;
-
-	i = vector / 32;
-	bit = 1 << (vector % 32);
-
-	irr = LAPIC_READ_OFFSET(IRR_BASE, i);
-	isr = LAPIC_READ_OFFSET(ISR_BASE, i);
-
-	if ((irr | isr) & bit)
-		return (TRUE);
-
-	return (FALSE);
-}
-
-void
-lapic_interrupt_counts(uint64_t intrs[256])
-{
-	int		i;
-	int		j;
-	int		bit;
-	uint32_t	irr;
-	uint32_t	isr;
-
-	if (intrs == NULL)
-		return;
-
-	for (i = 0; i < 8; i += 1) {
-		irr = LAPIC_READ_OFFSET(IRR_BASE, i);
-		isr = LAPIC_READ_OFFSET(ISR_BASE, i);
-
-		if ((isr | irr) == 0)
-			continue;
-
-		for (j = (i == 0) ? 16 : 0; j < 32; j += 1) {
-			bit = (32 * i) + j;
-			if ((isr | irr) & (1 << j))
-				intrs[bit] += 1;
-		}
-	}
-}
-
-void
-lapic_disable_timer(void)
-{
-	uint32_t	lvt_timer;
-
-	/*
-         * If we're in deadline timer mode,
-	 * simply clear the deadline timer, otherwise
-	 * mask the timer interrupt and clear the countdown.
-         */
-	lvt_timer = LAPIC_READ(LVT_TIMER);
-	if (lvt_timer & LAPIC_LVT_TSC_DEADLINE) {
-		wrmsr64(MSR_IA32_TSC_DEADLINE, 0);
-	} else {
-		LAPIC_WRITE(LVT_TIMER, lvt_timer | LAPIC_LVT_MASKED);
-		LAPIC_WRITE(TIMER_INITIAL_COUNT, 0);
-		lvt_timer = LAPIC_READ(LVT_TIMER);
-	}
-}
diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h
index 655864230..9d6f53abb 100644
--- a/osfmk/i386/lapic.h
+++ b/osfmk/i386/lapic.h
@@ -246,6 +246,7 @@ extern void		lapic_shutdown(void);
 extern void		lapic_smm_restore(void);
 extern boolean_t	lapic_probe(void);
 extern void		lapic_dump(void);
+extern void		lapic_cpu_map_dump(void);
 extern int		lapic_interrupt(
 				int interrupt, x86_saved_state_t *state);
 extern void		lapic_end_of_interrupt(void);
@@ -256,6 +257,7 @@ extern void		lapic_send_ipi(int cpu, int interupt);
 extern int		lapic_to_cpu[];
 extern int		cpu_to_lapic[];
 extern int		lapic_interrupt_base;
+extern void		lapic_cpu_map_init(void);
 extern void		lapic_cpu_map(int lapic, int cpu_num);
 extern uint32_t		ml_get_apicid(uint32_t cpu);
 extern uint32_t		ml_get_cpuid(uint32_t lapic_index);
diff --git a/osfmk/i386/lapic_native.c b/osfmk/i386/lapic_native.c
new file mode 100644
index 000000000..7142be269
--- /dev/null
+++ b/osfmk/i386/lapic_native.c
@@ -0,0 +1,919 @@
+/*
+ * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/kern_return.h>
+
+#include <kern/kern_types.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/assert.h>
+#include <kern/machine.h>
+#include <kern/debug.h>
+
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+
+#include <i386/lapic.h>
+#include <i386/cpuid.h>
+#include <i386/proc_reg.h>
+#include <i386/machine_cpu.h>
+#include <i386/misc_protos.h>
+#include <i386/mp.h>
+#include <i386/postcode.h>
+#include <i386/cpu_threads.h>
+#include <i386/machine_routines.h>
+#include <i386/tsc.h>
+#if CONFIG_MCA
+#include <i386/machine_check.h>
+#endif
+
+#if CONFIG_COUNTERS
+#include <pmc/pmc.h>
+#endif
+
+#if MACH_KDB
+#include <machine/db_machdep.h>
+#endif
+
+#include <sys/kdebug.h>
+
+#if	MP_DEBUG
+#define PAUSE		delay(1000000)
+#define DBG(x...)	kprintf(x)
+#else
+#define DBG(x...)
+#define PAUSE
+#endif	/* MP_DEBUG */
+
+lapic_ops_table_t	*lapic_ops;	/* Lapic operations switch */
+
+static vm_map_offset_t	lapic_pbase;	/* Physical base memory-mapped regs */
+static vm_offset_t	lapic_vbase;	/* Virtual base memory-mapped regs */
+
+static i386_intr_func_t	lapic_intr_func[LAPIC_FUNC_TABLE_SIZE];
+
+/* TRUE if local APIC was enabled by the OS not by the BIOS */
+static boolean_t lapic_os_enabled = FALSE;
+
+static boolean_t lapic_errors_masked = FALSE;
+static uint64_t lapic_last_master_error = 0;
+static uint64_t lapic_error_time_threshold = 0;
+static unsigned lapic_master_error_count = 0;
+static unsigned lapic_error_count_threshold = 5;
+static boolean_t lapic_dont_panic = FALSE;
+
+#ifdef MP_DEBUG
+void
+lapic_cpu_map_dump(void)
+{
+	int	i;
+
+	for (i = 0; i < MAX_CPUS; i++) {
+		if (cpu_to_lapic[i] == -1)
+			continue;
+		kprintf("cpu_to_lapic[%d]: %d\n",
+			i, cpu_to_lapic[i]);
+	}
+	for (i = 0; i < MAX_LAPICIDS; i++) {
+		if (lapic_to_cpu[i] == -1)
+			continue;
+		kprintf("lapic_to_cpu[%d]: %d\n",
+			i, lapic_to_cpu[i]);
+	}
+}
+#endif /* MP_DEBUG */
+
+static void
+legacy_init(void)
+{
+	int		result;
+	vm_map_entry_t	entry;
+	vm_map_offset_t lapic_vbase64;
+	/* Establish a map to the local apic */
+
+	lapic_vbase64 = (vm_offset_t)vm_map_min(kernel_map);
+	result = vm_map_find_space(kernel_map,
+				   &lapic_vbase64,
+				   round_page(LAPIC_SIZE), 0,
+				   VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+	/* Convert 64-bit vm_map_offset_t to "pointer sized" vm_offset_t
+	 */
+	lapic_vbase = (vm_offset_t) lapic_vbase64;
+	if (result != KERN_SUCCESS) {
+		panic("legacy_init: vm_map_find_entry FAILED (err=%d)", result);
+	}
+	vm_map_unlock(kernel_map);
+/* Map in the local APIC non-cacheable, as recommended by Intel
+ * in section 8.4.1 of the "System Programming Guide".
+ */
+	pmap_enter(pmap_kernel(),
+			lapic_vbase,
+			(ppnum_t) i386_btop(lapic_pbase),
+			VM_PROT_READ|VM_PROT_WRITE,
+			VM_WIMG_IO,
+			TRUE);
+}
+
+
+static uint32_t
+legacy_read(lapic_register_t reg)
+{
+	return  *LAPIC_MMIO(reg);
+}
+
+static void
+legacy_write(lapic_register_t reg, uint32_t value)
+{
+	*LAPIC_MMIO(reg) = value;
+}
+
+static lapic_ops_table_t legacy_ops = {
+	legacy_init,
+	legacy_read,
+	legacy_write
+};
+
+static void
+x2apic_init(void)
+{
+}
+
+static uint32_t
+x2apic_read(lapic_register_t reg)
+{
+	uint32_t	lo;
+	uint32_t	hi;
+
+	rdmsr(LAPIC_MSR(reg), lo, hi);
+	return lo;
+}
+
+static void
+x2apic_write(lapic_register_t reg, uint32_t value)
+{
+	wrmsr(LAPIC_MSR(reg), value, 0);
+}
+
+static lapic_ops_table_t x2apic_ops = {
+	x2apic_init,
+	x2apic_read,
+	x2apic_write
+};
+
+
+void
+lapic_init(void)
+{
+	uint32_t	lo;
+	uint32_t	hi;
+	boolean_t	is_boot_processor;
+	boolean_t	is_lapic_enabled;
+	boolean_t	is_x2apic;
+
+	/* Examine the local APIC state */
+	rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+	is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
+	is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
+	is_x2apic         = (lo & MSR_IA32_APIC_BASE_EXTENDED) != 0;
+	lapic_pbase = (lo &  MSR_IA32_APIC_BASE_BASE);
+	kprintf("MSR_IA32_APIC_BASE 0x%llx %s %s mode %s\n", lapic_pbase,
+		is_lapic_enabled ? "enabled" : "disabled",
+		is_x2apic ? "extended" : "legacy",
+		is_boot_processor ? "BSP" : "AP");
+	if (!is_boot_processor || !is_lapic_enabled)
+		panic("Unexpected local APIC state\n");
+
+	lapic_ops = is_x2apic ? &x2apic_ops : &legacy_ops;
+
+	lapic_ops->init();
+
+	if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
+		panic("Local APIC version 0x%x, 0x14 or more expected\n",
+			(LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
+	}
+
+	/* Set up the lapic_id <-> cpu_number map and add this boot processor */
+	lapic_cpu_map_init();
+	lapic_cpu_map((LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
+	kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
+}
+
+
+static int
+lapic_esr_read(void)
+{
+	/* write-read register */
+	LAPIC_WRITE(ERROR_STATUS, 0);
+	return LAPIC_READ(ERROR_STATUS);
+}
+
+static void 
+lapic_esr_clear(void)
+{
+	LAPIC_WRITE(ERROR_STATUS, 0);
+	LAPIC_WRITE(ERROR_STATUS, 0);
+}
+
+static const char *DM_str[8] = {
+	"Fixed",
+	"Lowest Priority",
+	"Invalid",
+	"Invalid",
+	"NMI",
+	"Reset",
+	"Invalid",
+	"ExtINT"};
+
+static const char *TMR_str[] = {
+	"OneShot",
+	"Periodic",
+	"TSC-Deadline",
+	"Illegal"
+	"Illegal"
+};
+
+void
+lapic_dump(void)
+{
+	int	i;
+
+#define BOOL(a) ((a)?' ':'!')
+#define VEC(lvt) \
+	LAPIC_READ(lvt)&LAPIC_LVT_VECTOR_MASK
+#define	DS(lvt)	\
+	(LAPIC_READ(lvt)&LAPIC_LVT_DS_PENDING)?" SendPending" : "Idle"
+#define DM(lvt) \
+	DM_str[(LAPIC_READ(lvt)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK]
+#define MASK(lvt) \
+	BOOL(LAPIC_READ(lvt)&LAPIC_LVT_MASKED)
+#define TM(lvt) \
+	(LAPIC_READ(lvt)&LAPIC_LVT_TM_LEVEL)? "Level" : "Edge"
+#define IP(lvt) \
+	(LAPIC_READ(lvt)&LAPIC_LVT_IP_PLRITY_LOW)? "Low " : "High"
+
+	kprintf("LAPIC %d at %p version 0x%x\n", 
+		(LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
+		(void *) lapic_vbase,
+		LAPIC_READ(VERSION)&LAPIC_VERSION_MASK);
+	kprintf("Priorities: Task 0x%x  Arbitration 0x%x  Processor 0x%x\n",
+		LAPIC_READ(TPR)&LAPIC_TPR_MASK,
+		LAPIC_READ(APR)&LAPIC_APR_MASK,
+		LAPIC_READ(PPR)&LAPIC_PPR_MASK);
+	kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
+		LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
+		LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
+	kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
+		BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
+		BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)),
+		LAPIC_READ(SVR) & LAPIC_SVR_MASK);
+#if CONFIG_MCA
+	if (mca_is_cmci_present())
+		kprintf("LVT_CMCI:    Vector 0x%02x [%s] %s %cmasked\n",
+			VEC(LVT_CMCI),
+			DM(LVT_CMCI),
+			DS(LVT_CMCI),
+			MASK(LVT_CMCI));
+#endif
+	kprintf("LVT_TIMER:   Vector 0x%02x %s %cmasked %s\n",
+		VEC(LVT_TIMER),
+		DS(LVT_TIMER),
+		MASK(LVT_TIMER),
+		TMR_str[(LAPIC_READ(LVT_TIMER) >> LAPIC_LVT_TMR_SHIFT)
+                                               &  LAPIC_LVT_TMR_MASK]);
+	kprintf("  Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT));
+	kprintf("  Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT));
+	kprintf("  Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG));
+	kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
+		VEC(LVT_PERFCNT),
+		DM(LVT_PERFCNT),
+		DS(LVT_PERFCNT),
+		MASK(LVT_PERFCNT));
+	kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
+		VEC(LVT_THERMAL),
+		DM(LVT_THERMAL),
+		DS(LVT_THERMAL),
+		MASK(LVT_THERMAL));
+	kprintf("LVT_LINT0:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+		VEC(LVT_LINT0),
+		DM(LVT_LINT0),
+		TM(LVT_LINT0),
+		IP(LVT_LINT0),
+		DS(LVT_LINT0),
+		MASK(LVT_LINT0));
+	kprintf("LVT_LINT1:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+		VEC(LVT_LINT1),
+		DM(LVT_LINT1),
+		TM(LVT_LINT1),
+		IP(LVT_LINT1),
+		DS(LVT_LINT1),
+		MASK(LVT_LINT1));
+	kprintf("LVT_ERROR:   Vector 0x%02x %s %cmasked\n",
+		VEC(LVT_ERROR),
+		DS(LVT_ERROR),
+		MASK(LVT_ERROR));
+	kprintf("ESR: %08x \n", lapic_esr_read());
+	kprintf("       ");
+	for(i=0xf; i>=0; i--)
+		kprintf("%x%x%x%x",i,i,i,i);
+	kprintf("\n");
+	kprintf("TMR: 0x");
+	for(i=7; i>=0; i--)
+		kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i));
+	kprintf("\n");
+	kprintf("IRR: 0x");
+	for(i=7; i>=0; i--)
+		kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i));
+	kprintf("\n");
+	kprintf("ISR: 0x");
+	for(i=7; i >= 0; i--)
+		kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i));
+	kprintf("\n");
+}
+
+#if MACH_KDB
+/*
+ *	Displays apic junk
+ *
+ *	da
+ */
+void 
+db_apic(__unused db_expr_t addr,
+	__unused int have_addr,
+	__unused db_expr_t count,
+	__unused char *modif)
+{
+
+	lapic_dump();
+
+	return;
+}
+
+#endif
+
+boolean_t
+lapic_probe(void)
+{
+	uint32_t	lo;
+	uint32_t	hi;
+
+	if (cpuid_features() & CPUID_FEATURE_APIC)
+		return TRUE;
+
+	if (cpuid_family() == 6 || cpuid_family() == 15) {
+		/*
+		 * Mobile Pentiums:
+		 * There may be a local APIC which wasn't enabled by BIOS.
+		 * So we try to enable it explicitly.
+		 */
+		rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+		lo &= ~MSR_IA32_APIC_BASE_BASE;
+		lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
+		lo |= MSR_IA32_APIC_BASE_ENABLE;
+		wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+
+		/*
+		 * Re-initialize cpu features info and re-check.
+		 */
+		cpuid_set_info();
+		if (cpuid_features() & CPUID_FEATURE_APIC) {
+			printf("Local APIC discovered and enabled\n");
+			lapic_os_enabled = TRUE;
+			lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+void
+lapic_shutdown(void)
+{
+	uint32_t lo;
+	uint32_t hi;
+	uint32_t value;
+
+	/* Shutdown if local APIC was enabled by OS */
+	if (lapic_os_enabled == FALSE)
+		return;
+
+	mp_disable_preemption();
+
+	/* ExtINT: masked */
+	if (get_cpu_number() == master_cpu) {
+		value = LAPIC_READ(LVT_LINT0);
+		value |= LAPIC_LVT_MASKED;
+		LAPIC_WRITE(LVT_LINT0, value);
+	}
+
+	/* Error: masked */
+	LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+
+	/* Timer: masked */
+	LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) | LAPIC_LVT_MASKED);
+
+	/* Perfmon: masked */
+	LAPIC_WRITE(LVT_PERFCNT, LAPIC_READ(LVT_PERFCNT) | LAPIC_LVT_MASKED);
+
+	/* APIC software disabled */
+	LAPIC_WRITE(SVR, LAPIC_READ(SVR) & ~LAPIC_SVR_ENABLE);
+
+	/* Bypass the APIC completely and update cpu features */
+	rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+	lo &= ~MSR_IA32_APIC_BASE_ENABLE;
+	wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+	cpuid_set_info();
+
+	mp_enable_preemption();
+}
+
+void
+lapic_configure(void)
+{
+	int	value;
+
+	if (lapic_error_time_threshold == 0 && cpu_number() == 0) {
+		nanoseconds_to_absolutetime(NSEC_PER_SEC >> 2, &lapic_error_time_threshold);
+		if (!PE_parse_boot_argn("lapic_dont_panic", &lapic_dont_panic, sizeof(lapic_dont_panic))) {
+			lapic_dont_panic = FALSE;
+		}
+	}
+
+	/* Set flat delivery model, logical processor id */
+	LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
+	LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
+
+	/* Accept all */
+	LAPIC_WRITE(TPR, 0);
+
+	LAPIC_WRITE(SVR, LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE);
+
+	/* ExtINT */
+	if (get_cpu_number() == master_cpu) {
+		value = LAPIC_READ(LVT_LINT0);
+		value &= ~LAPIC_LVT_MASKED;
+		value |= LAPIC_LVT_DM_EXTINT;
+		LAPIC_WRITE(LVT_LINT0, value);
+	}
+
+	/* Timer: unmasked, one-shot */
+	LAPIC_WRITE(LVT_TIMER, LAPIC_VECTOR(TIMER));
+
+	/* Perfmon: unmasked */
+	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+
+	/* Thermal: unmasked */
+	LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL));
+
+#if CONFIG_MCA
+	/* CMCI, if available */
+	if (mca_is_cmci_present())
+		LAPIC_WRITE(LVT_CMCI, LAPIC_VECTOR(CMCI));
+#endif
+
+	if (((cpu_number() == master_cpu) && lapic_errors_masked == FALSE) ||
+		(cpu_number() != master_cpu)) {
+		lapic_esr_clear();
+		LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
+	}
+}
+
+void
+lapic_set_timer(
+	boolean_t		interrupt_unmasked,
+	lapic_timer_mode_t	mode,
+	lapic_timer_divide_t	divisor,
+	lapic_timer_count_t	initial_count)
+{
+	uint32_t	timer_vector;
+
+	mp_disable_preemption();
+	timer_vector = LAPIC_READ(LVT_TIMER);
+	timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
+	timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
+	timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
+	LAPIC_WRITE(LVT_TIMER, timer_vector);
+	LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
+	LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
+	mp_enable_preemption();
+}
+
+void
+lapic_config_timer(
+	boolean_t		interrupt_unmasked,
+	lapic_timer_mode_t	mode,
+	lapic_timer_divide_t	divisor)
+{
+	uint32_t	timer_vector;
+
+	mp_disable_preemption();
+	timer_vector = LAPIC_READ(LVT_TIMER);
+	timer_vector &= ~(LAPIC_LVT_MASKED |
+			  LAPIC_LVT_PERIODIC |
+			  LAPIC_LVT_TSC_DEADLINE);
+	timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED;
+	timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
+	LAPIC_WRITE(LVT_TIMER, timer_vector);
+	LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
+	mp_enable_preemption();
+}
+
+/*
+ * Configure TSC-deadline timer mode. The lapic interrupt is always unmasked.
+ */
+__private_extern__
+void
+lapic_config_tsc_deadline_timer(void)
+{
+	uint32_t	timer_vector;
+
+	DBG("lapic_config_tsc_deadline_timer()\n");
+	mp_disable_preemption();
+	timer_vector = LAPIC_READ(LVT_TIMER);
+	timer_vector &= ~(LAPIC_LVT_MASKED |
+			  LAPIC_LVT_PERIODIC);
+	timer_vector |= LAPIC_LVT_TSC_DEADLINE;
+	LAPIC_WRITE(LVT_TIMER, timer_vector);
+
+	/* Serialize writes per Intel OSWG */
+	do {
+		lapic_set_tsc_deadline_timer(rdtsc64() + (1ULL<<32));
+	} while (lapic_get_tsc_deadline_timer() == 0);
+	lapic_set_tsc_deadline_timer(0);
+
+	mp_enable_preemption();
+	DBG("lapic_config_tsc_deadline_timer() done\n");
+}
+
+void
+lapic_set_timer_fast(
+	lapic_timer_count_t	initial_count)
+{
+	LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) & ~LAPIC_LVT_MASKED);
+	LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
+}
+
+__private_extern__
+void
+lapic_set_tsc_deadline_timer(uint64_t deadline)
+{
+	/* Don't bother disarming: wrmsr64(MSR_IA32_TSC_DEADLINE, 0); */
+	wrmsr64(MSR_IA32_TSC_DEADLINE, deadline);
+}
+
+__private_extern__
+uint64_t
+lapic_get_tsc_deadline_timer(void)
+{
+	return rdmsr64(MSR_IA32_TSC_DEADLINE);
+}
+
+void
+lapic_get_timer(
+	lapic_timer_mode_t	*mode,
+	lapic_timer_divide_t	*divisor,
+	lapic_timer_count_t	*initial_count,
+	lapic_timer_count_t	*current_count)
+{
+	mp_disable_preemption();
+	if (mode)
+		*mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
+				periodic : one_shot;
+	if (divisor)
+		*divisor = LAPIC_READ(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
+	if (initial_count)
+		*initial_count = LAPIC_READ(TIMER_INITIAL_COUNT);
+	if (current_count)
+		*current_count = LAPIC_READ(TIMER_CURRENT_COUNT);
+	mp_enable_preemption();
+} 
+
+static inline void
+_lapic_end_of_interrupt(void)
+{
+	LAPIC_WRITE(EOI, 0);
+}
+
+void
+lapic_end_of_interrupt(void)
+{
+	_lapic_end_of_interrupt();
+}
+
+void lapic_unmask_perfcnt_interrupt(void) {
+	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+}
+
+void lapic_set_perfcnt_interrupt_mask(boolean_t mask) {
+	uint32_t m = (mask ? LAPIC_LVT_MASKED : 0);
+	LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT) | m);
+}
+
+void
+lapic_set_intr_func(int vector, i386_intr_func_t func)
+{
+	if (vector > lapic_interrupt_base)
+		vector -= lapic_interrupt_base;
+
+	switch (vector) {
+	case LAPIC_NMI_INTERRUPT:
+	case LAPIC_INTERPROCESSOR_INTERRUPT:
+	case LAPIC_TIMER_INTERRUPT:
+	case LAPIC_THERMAL_INTERRUPT:
+	case LAPIC_PERFCNT_INTERRUPT:
+	case LAPIC_CMCI_INTERRUPT:
+	case LAPIC_PM_INTERRUPT:
+		lapic_intr_func[vector] = func;
+		break;
+	default:
+		panic("lapic_set_intr_func(%d,%p) invalid vector\n",
+			vector, func);
+	}
+}
+
+void	lapic_set_pmi_func(i386_intr_func_t func) {
+	lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
+}
+
+int
+lapic_interrupt(int interrupt_num, x86_saved_state_t *state)
+{
+	int	retval = 0;
+	int 	esr = -1;
+
+	interrupt_num -= lapic_interrupt_base;
+	if (interrupt_num < 0) {
+		if (interrupt_num == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) &&
+		    lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) {
+			retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state);
+			return retval;
+		}
+		else
+			return 0;
+	}
+
+	switch(interrupt_num) {
+	case LAPIC_TIMER_INTERRUPT:
+	case LAPIC_THERMAL_INTERRUPT:
+	case LAPIC_INTERPROCESSOR_INTERRUPT:
+	case LAPIC_PM_INTERRUPT:
+		if (lapic_intr_func[interrupt_num] != NULL)
+			(void) (*lapic_intr_func[interrupt_num])(state);
+		_lapic_end_of_interrupt();
+		retval = 1;
+		break;
+	case LAPIC_PERFCNT_INTERRUPT:
+		/* If a function has been registered, invoke it.  Otherwise,
+		 * pass up to IOKit.
+		 */
+		if (lapic_intr_func[interrupt_num] != NULL) {
+			(void) (*lapic_intr_func[interrupt_num])(state);
+			/* Unmask the interrupt since we don't expect legacy users
+			 * to be responsible for it.
+			 */
+			lapic_unmask_perfcnt_interrupt();
+			_lapic_end_of_interrupt();
+			retval = 1;
+		}
+		break;
+	case LAPIC_CMCI_INTERRUPT:
+		if (lapic_intr_func[interrupt_num] != NULL)
+			(void) (*lapic_intr_func[interrupt_num])(state);
+		/* return 0 for plaform expert to handle */
+		break;
+	case LAPIC_ERROR_INTERRUPT:
+		/* We treat error interrupts on APs as fatal.
+		 * The current interrupt steering scheme directs most
+		 * external interrupts to the BSP (HPET interrupts being
+		 * a notable exception); hence, such an error
+		 * on an AP may signify LVT corruption (with "may" being
+		 * the operative word). On the BSP, we adopt a more
+		 * lenient approach, in the interests of enhancing
+		 * debuggability and reducing fragility.
+		 * If "lapic_error_count_threshold" error interrupts
+		 * occur within "lapic_error_time_threshold" absolute
+		 * time units, we mask the error vector and log. The
+		 * error interrupts themselves are likely
+		 * side effects of issues which are beyond the purview of
+		 * the local APIC interrupt handler, however. The Error
+		 * Status Register value (the illegal destination
+		 * vector code is one observed in practice) indicates
+		 * the immediate cause of the error.
+		 */
+		esr = lapic_esr_read();
+		lapic_dump();
+
+		if ((debug_boot_arg && (lapic_dont_panic == FALSE)) ||
+			cpu_number() != master_cpu) {
+			panic("Local APIC error, ESR: %d\n", esr);
+		}
+
+		if (cpu_number() == master_cpu) {
+			uint64_t abstime = mach_absolute_time();
+			if ((abstime - lapic_last_master_error) < lapic_error_time_threshold) {
+				if (lapic_master_error_count++ > lapic_error_count_threshold) {
+					lapic_errors_masked = TRUE;
+					LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+					printf("Local APIC: errors masked\n");
+				}
+			}
+			else {
+				lapic_last_master_error = abstime;
+				lapic_master_error_count = 0;
+			}
+			printf("Local APIC error on master CPU, ESR: %d, error count this run: %d\n", esr, lapic_master_error_count);
+		}
+
+		_lapic_end_of_interrupt();
+		retval = 1;
+		break;
+	case LAPIC_SPURIOUS_INTERRUPT:
+		kprintf("SPIV\n");
+		/* No EOI required here */
+		retval = 1;
+		break;
+	case LAPIC_PMC_SW_INTERRUPT: 
+		{
+#if CONFIG_COUNTERS
+			thread_t old, new;
+			ml_get_csw_threads(&old, &new);
+
+			if (pmc_context_switch(old, new) == TRUE) {
+				retval = 1;
+				/* No EOI required for SWI */
+			}
+#endif /* CONFIG_COUNTERS */
+		}
+		break;
+	}
+
+	return retval;
+}
+
+void
+lapic_smm_restore(void)
+{
+	boolean_t state;
+
+	if (lapic_os_enabled == FALSE)
+		return;
+
+	state = ml_set_interrupts_enabled(FALSE);
+
+ 	if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
+		/*
+		 * Bogus SMI handler enables interrupts but does not know about
+		 * local APIC interrupt sources. When APIC timer counts down to
+		 * zero while in SMM, local APIC will end up waiting for an EOI
+		 * but no interrupt was delivered to the OS.
+ 		 */
+		_lapic_end_of_interrupt();
+
+		/*
+		 * timer is one-shot, trigger another quick countdown to trigger
+		 * another timer interrupt.
+		 */
+		if (LAPIC_READ(TIMER_CURRENT_COUNT) == 0) {
+			LAPIC_WRITE(TIMER_INITIAL_COUNT, 1);
+		}
+
+		kprintf("lapic_smm_restore\n");
+	}
+
+	ml_set_interrupts_enabled(state);
+}
+
+void
+lapic_send_ipi(int cpu, int vector)
+{
+	boolean_t	state;
+
+	if (vector < lapic_interrupt_base)
+		vector += lapic_interrupt_base;
+
+	state = ml_set_interrupts_enabled(FALSE);
+
+	/* Wait for pending outgoing send to complete */
+	while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
+		cpu_pause();
+	}
+
+	LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
+	LAPIC_WRITE(ICR, vector | LAPIC_ICR_DM_FIXED);
+
+	(void) ml_set_interrupts_enabled(state);
+}
+
+/*
+ * The following interfaces are privately exported to AICPM.
+ */
+
+boolean_t
+lapic_is_interrupt_pending(void)
+{
+	int		i;
+
+	for (i = 0; i < 8; i += 1) {
+		if ((LAPIC_READ_OFFSET(IRR_BASE, i) != 0) ||
+		    (LAPIC_READ_OFFSET(ISR_BASE, i) != 0))
+			return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+boolean_t
+lapic_is_interrupting(uint8_t vector)
+{
+	int		i;
+	int		bit;
+	uint32_t	irr;
+	uint32_t	isr;
+
+	i = vector / 32;
+	bit = 1 << (vector % 32);
+
+	irr = LAPIC_READ_OFFSET(IRR_BASE, i);
+	isr = LAPIC_READ_OFFSET(ISR_BASE, i);
+
+	if ((irr | isr) & bit)
+		return (TRUE);
+
+	return (FALSE);
+}
+
+void
+lapic_interrupt_counts(uint64_t intrs[256])
+{
+	int		i;
+	int		j;
+	int		bit;
+	uint32_t	irr;
+	uint32_t	isr;
+
+	if (intrs == NULL)
+		return;
+
+	for (i = 0; i < 8; i += 1) {
+		irr = LAPIC_READ_OFFSET(IRR_BASE, i);
+		isr = LAPIC_READ_OFFSET(ISR_BASE, i);
+
+		if ((isr | irr) == 0)
+			continue;
+
+		for (j = (i == 0) ? 16 : 0; j < 32; j += 1) {
+			bit = (32 * i) + j;
+			if ((isr | irr) & (1 << j))
+				intrs[bit] += 1;
+		}
+	}
+}
+
+void
+lapic_disable_timer(void)
+{
+	uint32_t	lvt_timer;
+
+	/*
+         * If we're in deadline timer mode,
+	 * simply clear the deadline timer, otherwise
+	 * mask the timer interrupt and clear the countdown.
+         */
+	lvt_timer = LAPIC_READ(LVT_TIMER);
+	if (lvt_timer & LAPIC_LVT_TSC_DEADLINE) {
+		wrmsr64(MSR_IA32_TSC_DEADLINE, 0);
+	} else {
+		LAPIC_WRITE(LVT_TIMER, lvt_timer | LAPIC_LVT_MASKED);
+		LAPIC_WRITE(TIMER_INITIAL_COUNT, 0);
+		lvt_timer = LAPIC_READ(LVT_TIMER);
+	}
+}
diff --git a/osfmk/i386/ldt.c b/osfmk/i386/ldt.c
index 91416fd20..ff1facd34 100644
--- a/osfmk/i386/ldt.c
+++ b/osfmk/i386/ldt.c
@@ -65,38 +65,38 @@
 
 struct real_descriptor	master_ldt[LDTSZ] __attribute__ ((aligned (4096))) = {
 #ifdef __i386__
-	[SEL_TO_INDEX(SYSENTER_CS)] MAKE_REAL_DESCRIPTOR(	/* kernel code (sysenter) */
+	[SEL_TO_INDEX(SYSENTER_CS)] = MAKE_REAL_DESCRIPTOR(	/* kernel code (sysenter) */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_K|ACC_CODE_R
 	),
-	[SEL_TO_INDEX(SYSENTER_DS)] MAKE_REAL_DESCRIPTOR(	/* kernel data (sysenter) */
+	[SEL_TO_INDEX(SYSENTER_DS)] = MAKE_REAL_DESCRIPTOR(	/* kernel data (sysenter) */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_K|ACC_DATA_W
 	),
-	[SEL_TO_INDEX(USER_CS)] MAKE_REAL_DESCRIPTOR(	/* user code segment */
+	[SEL_TO_INDEX(USER_CS)] = MAKE_REAL_DESCRIPTOR(	/* user code segment */
 		0,
 		0xfffff,
  		SZ_32|SZ_G,
 		ACC_P|ACC_PL_U|ACC_CODE_R
 	),
-	[SEL_TO_INDEX(USER_DS)] MAKE_REAL_DESCRIPTOR(	/* user data segment */
+	[SEL_TO_INDEX(USER_DS)] = MAKE_REAL_DESCRIPTOR(	/* user data segment */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
 		ACC_P|ACC_PL_U|ACC_DATA_W
 	),
-	[SEL_TO_INDEX(USER64_CS)] MAKE_REAL_DESCRIPTOR(	/* user 64-bit code segment */
+	[SEL_TO_INDEX(USER64_CS)] = MAKE_REAL_DESCRIPTOR(	/* user 64-bit code segment */
 		0,
 		0xfffff,
 		SZ_64|SZ_G,
 		ACC_P|ACC_PL_U|ACC_CODE_R
 	),
 #endif
-	[SEL_TO_INDEX(USER_CTHREAD)] MAKE_REAL_DESCRIPTOR(	/* user cthread segment */
+	[SEL_TO_INDEX(USER_CTHREAD)] = MAKE_REAL_DESCRIPTOR(	/* user cthread segment */
 		0,
 		0xfffff,
 		SZ_32|SZ_G,
diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h
index d74e94156..a0409d257 100644
--- a/osfmk/i386/locks.h
+++ b/osfmk/i386/locks.h
@@ -41,47 +41,61 @@ extern	unsigned int	LcksOpts;
 #define enaLkDeb		0x00000001	/* Request debug in default attribute */
 #define enaLkStat		0x00000002	/* Request statistic in default attribute */
 
-#endif
+#endif /* MACH_KERNEL_PRIVATE */
 
-#ifdef	MACH_KERNEL_PRIVATE
+#if	defined(MACH_KERNEL_PRIVATE)
 typedef struct {
-	unsigned long	interlock;
-	unsigned long	lck_spin_pad[9];	/* XXX - usimple_lock_data_t */
+	volatile uintptr_t	interlock;
+#if	MACH_LDEBUG
+	unsigned long   lck_spin_pad[9];	/* XXX - usimple_lock_data_t */
+#endif
 } lck_spin_t;
 
 #define	LCK_SPIN_TAG_DESTROYED		0x00002007	/* lock marked as Destroyed */
 
-#else
+#else /* MACH_KERNEL_PRIVATE */
 #ifdef	KERNEL_PRIVATE
 typedef struct {
 	unsigned long    opaque[10];
 } lck_spin_t;
-#else
+#else /* KERNEL_PRIVATE */
 typedef	struct __lck_spin_t__	lck_spin_t;
 #endif
 #endif
 
 #ifdef	MACH_KERNEL_PRIVATE
+/* The definition of this structure, including the layout of the
+ * state bitfield, is tailored to the asm implementation in i386_lock.s
+ */
 typedef struct _lck_mtx_ {
 	union {
 		struct {
 			volatile uintptr_t		lck_mtxd_owner;
-			unsigned long			lck_mtxd_ptr;
-			volatile uint32_t		lck_mtxd_waiters:16,
-				                        lck_mtxd_pri:8,
-			                                lck_mtxd_ilocked:1,
-			                                lck_mtxd_mlocked:1,
-			                                lck_mtxd_promoted:1,
-				                        lck_mtxd_spin:1,
-				                        lck_mtxd_pad4:4; 	/* padding */
-#ifdef __x86_64__
-			unsigned int			lck_mtxd_pad;
-#endif
+			union {
+				struct {
+					volatile uint32_t
+						lck_mtxd_waiters:16,
+						lck_mtxd_pri:8,
+						lck_mtxd_ilocked:1,
+						lck_mtxd_mlocked:1,
+						lck_mtxd_promoted:1,
+						lck_mtxd_spin:1,
+						lck_mtxd_is_ext:1,
+						lck_mtxd_pad3:3;
+				};
+					uint32_t	lck_mtxd_state;
+			};
+#if	defined(__x86_64__)
+			/* Pad field used as a canary, initialized to ~0 */
+			uint32_t			lck_mtxd_pad32;
+#endif			
 		} lck_mtxd;
 		struct {
-			unsigned long			lck_mtxi_tag;
 			struct _lck_mtx_ext_		*lck_mtxi_ptr;
-			unsigned long			lck_mtxi_pad;
+			uint32_t			lck_mtxi_tag;
+#if	defined(__x86_64__)				
+			uint32_t			lck_mtxi_pad32;
+#endif			
 		} lck_mtxi;
 	} lck_mtx_sw;
 } lck_mtx_t;
@@ -89,31 +103,25 @@ typedef struct _lck_mtx_ {
 #define	lck_mtx_owner	lck_mtx_sw.lck_mtxd.lck_mtxd_owner
 #define	lck_mtx_waiters	lck_mtx_sw.lck_mtxd.lck_mtxd_waiters
 #define	lck_mtx_pri	lck_mtx_sw.lck_mtxd.lck_mtxd_pri
-#define	lck_mtx_ilocked	lck_mtx_sw.lck_mtxd.lck_mtxd_ilocked
-#define	lck_mtx_mlocked	lck_mtx_sw.lck_mtxd.lck_mtxd_mlocked
 #define	lck_mtx_promoted lck_mtx_sw.lck_mtxd.lck_mtxd_promoted
-#define	lck_mtx_spin	lck_mtx_sw.lck_mtxd.lck_mtxd_spin
+#define lck_mtx_is_ext  lck_mtx_sw.lck_mtxd.lck_mtxd_is_ext
 
 #define lck_mtx_tag	lck_mtx_sw.lck_mtxi.lck_mtxi_tag
 #define lck_mtx_ptr	lck_mtx_sw.lck_mtxi.lck_mtxi_ptr
-#define lck_mtx_state	lck_mtx_sw.lck_mtxi.lck_mtxi_pad
-
-#define	LCK_MTX_TAG_INDIRECT			0x00001007	/* lock marked as Indirect  */
-#define	LCK_MTX_TAG_DESTROYED			0x00002007	/* lock marked as Destroyed */
-#define LCK_MTX_PTR_EXTENDED			0x00003007	/* lock is extended version */
+#define lck_mtx_state	lck_mtx_sw.lck_mtxd.lck_mtxd_state
+/* This pattern must subsume the interlocked, mlocked and spin bits */
+#define	LCK_MTX_TAG_INDIRECT			0x07ff1007	/* lock marked as Indirect  */
+#define	LCK_MTX_TAG_DESTROYED			0x07fe2007	/* lock marked as Destroyed */
 
 /* Adaptive spin before blocking */
 extern unsigned int	MutexSpin;
 extern int		lck_mtx_lock_spinwait_x86(lck_mtx_t *mutex);
 extern void		lck_mtx_lock_wait_x86(lck_mtx_t *mutex);
 extern void		lck_mtx_lock_acquire_x86(lck_mtx_t *mutex);
-extern void		lck_mtx_unlock_wakeup_x86(lck_mtx_t *mutex, int owner_was_promoted);
+extern void		lck_mtx_unlock_wakeup_x86(lck_mtx_t *mutex, int prior_lock_state);
 
 extern void		lck_mtx_lock_mark_destroyed(lck_mtx_t *mutex);
-extern int		lck_mtx_lock_mark_promoted(lck_mtx_t *mutex);
-extern int		lck_mtx_lock_decr_waiter(lck_mtx_t *mutex);
 extern int		lck_mtx_lock_grab_mutex(lck_mtx_t *mutex);
-extern integer_t	lck_mtx_lock_get_pri(lck_mtx_t *mutex);
 
 extern void		hw_lock_byte_init(uint8_t *lock_byte);
 extern void		hw_lock_byte_lock(uint8_t *lock_byte);
@@ -153,10 +161,19 @@ typedef struct _lck_mtx_ext_ {
 #define	LCK_MTX_ATTR_STAT	0x2
 #define	LCK_MTX_ATTR_STATb	1
 
+#else /* MACH_KERNEL_PRIVATE */
+#ifdef	XNU_KERNEL_PRIVATE
+typedef struct {
+	unsigned long		opaque[2];
+} lck_mtx_t;
+
+typedef struct {
+	unsigned long		opaque[10];
+} lck_mtx_ext_t;
 #else
 #ifdef	KERNEL_PRIVATE
 typedef struct {
-	unsigned long		opaque[3];
+	unsigned long		opaque[2];
 } lck_mtx_t;
 
 typedef struct {
@@ -168,6 +185,7 @@ typedef struct __lck_mtx_t__		lck_mtx_t;
 typedef struct __lck_mtx_ext_t__	lck_mtx_ext_t;
 #endif
 #endif
+#endif
 
 #ifdef	MACH_KERNEL_PRIVATE
 #pragma pack(1)		/* Make sure the structure stays as we defined it */
diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c
index 8c715d086..3f94ba02b 100644
--- a/osfmk/i386/locks_i386.c
+++ b/osfmk/i386/locks_i386.c
@@ -88,6 +88,7 @@
 #include <i386/mp.h>
 
 #include <sys/kdebug.h>
+#include <mach/branch_predicates.h>
 
 /*
  * We need only enough declarations from the BSD-side to be able to
@@ -211,7 +212,6 @@ lck_rw_type_t lck_rw_done_gen(
 	lck_rw_t	*lck,
 	int		prior_lock_state);
 
-
 /*
  *      Routine:        lck_spin_alloc_init
  */
@@ -329,7 +329,7 @@ static uint32_t spinlock_timeout_NMI(uintptr_t thread_addr) {
 	for (i = 0; i < real_ncpus; i++) {
 		if ((uintptr_t)cpu_data_ptr[i]->cpu_active_thread == thread_addr) {
 			spinlock_owner_cpu = i;
-			if ((uint32_t)cpu_number() == i)
+			if ((uint32_t) cpu_number() == i)
 				break;
 			cpu_datap(i)->cpu_NMI_acknowledged = FALSE;
 			cpu_NMI_interrupt(i);
@@ -359,14 +359,15 @@ usimple_lock(
 
 	OBTAIN_PC(pc);
 	USLDBG(usld_lock_pre(l, pc));
-/* Try to get the lock with a timeout */
-	if(!hw_lock_to(&l->interlock, LockTimeOutTSC))	{
+
+	if(__improbable(hw_lock_to(&l->interlock, LockTimeOutTSC) == 0))	{
 		boolean_t uslock_acquired = FALSE;
 		while (machine_timeout_suspended()) {
 			enable_preemption();
 			if ((uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC)))
 				break;
-	}
+		}
+
 		if (uslock_acquired == FALSE) {
 			uint32_t lock_cpu;
 			spinlock_timed_out = l;
@@ -903,8 +904,8 @@ lck_rw_destroy(
 		((event_t) (((unsigned char*) (x)) + (offsetof(lck_rw_t, lck_rw_pad8))))
 
 /*
- * We need to disable interrupts while holding the mutex interlock
- * to prevent an IPI intervening.
+ * We disable interrupts while holding the RW interlock to prevent an
+ * interrupt from exacerbating hold time.
  * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
  */
 static boolean_t
@@ -1635,6 +1636,9 @@ lck_rw_assert(
 	panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck);
 }
 
+#ifdef	MUTEX_ZONE
+extern zone_t lck_mtx_zone;
+#endif
 /*
  *      Routine:        lck_mtx_alloc_init
  */
@@ -1644,10 +1648,13 @@ lck_mtx_alloc_init(
 	lck_attr_t	*attr)
 {
 	lck_mtx_t	*lck;
-
+#ifdef	MUTEX_ZONE
+	if ((lck = (lck_mtx_t *)zalloc(lck_mtx_zone)) != 0)
+		lck_mtx_init(lck, grp, attr);
+#else
 	if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
 		lck_mtx_init(lck, grp, attr);
-		
+#endif		
 	return(lck);
 }
 
@@ -1660,7 +1667,11 @@ lck_mtx_free(
 	lck_grp_t	*grp)
 {
 	lck_mtx_destroy(lck, grp);
+#ifdef	MUTEX_ZONE
+	zfree(lck_mtx_zone, lck);
+#else
 	kfree(lck, sizeof(lck_mtx_t));
+#endif
 }
 
 /*
@@ -1682,9 +1693,12 @@ lck_mtx_ext_init(
 	lck->lck_mtx_grp = grp;
 
 	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
-		 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
+		lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
 
-	lck->lck_mtx.lck_mtx_ptr = (void *)LCK_MTX_PTR_EXTENDED;
+	lck->lck_mtx.lck_mtx_is_ext = 1;
+#if	defined(__x86_64__)
+	lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
+#endif
 }
 
 /*
@@ -1709,18 +1723,14 @@ lck_mtx_init(
 			lck_mtx_ext_init(lck_ext, grp, lck_attr);	
 			lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
 			lck->lck_mtx_ptr = lck_ext;
-			lck->lck_mtx_ilocked = 1;
 		}
 	} else {
 		lck->lck_mtx_owner = 0;
-		lck->lck_mtx_ptr = 0;
-		lck->lck_mtx_waiters = 0;
-		lck->lck_mtx_pri = 0;
-		lck->lck_mtx_ilocked = 0;
-		lck->lck_mtx_mlocked = 0;
-		lck->lck_mtx_promoted = 0;
-		lck->lck_mtx_spin = 0;
+		lck->lck_mtx_state = 0;
 	}
+#if	defined(__x86_64__)
+	lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
+#endif
 	lck_grp_reference(grp);
 	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
 }
@@ -1746,17 +1756,14 @@ lck_mtx_init_ext(
 		lck_mtx_ext_init(lck_ext, grp, lck_attr);
 		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
 		lck->lck_mtx_ptr = lck_ext;
-		lck->lck_mtx_ilocked = 1;
 	} else {
 		lck->lck_mtx_owner = 0;
-		lck->lck_mtx_ptr = 0;
-		lck->lck_mtx_waiters = 0;
-		lck->lck_mtx_pri = 0;
-		lck->lck_mtx_ilocked = 0;
-		lck->lck_mtx_mlocked = 0;
-		lck->lck_mtx_promoted = 0;
-		lck->lck_mtx_spin = 0;
+		lck->lck_mtx_state = 0;
 	}
+#if	defined(__x86_64__)
+	lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF;
+#endif
+
 	lck_grp_reference(grp);
 	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
 }
@@ -1795,50 +1802,67 @@ lck_mtx_destroy(
 /*
  * Routine: 	lck_mtx_unlock_wakeup_x86
  *
- * Invoked on unlock when there is contention.
+ * Invoked on unlock when there is 
+ * contention (i.e. the assembly routine sees that
+ * that mutex->lck_mtx_waiters != 0 or 
+ * that mutex->lck_mtx_promoted != 0...
  *
+ * neither the mutex or interlock is held
  */
 void
 lck_mtx_unlock_wakeup_x86 (
 	lck_mtx_t	*mutex,
-	int		owner_was_promoted)
+	int		prior_lock_state)
 {
+	lck_mtx_t	fake_lck;
+
+	/*
+	 * prior_lock state is a snapshot of the 2nd word of the
+	 * lock in question... we'll fake up a lock with the bits
+	 * copied into place and carefully not access anything
+	 * beyond whats defined in the second word of a lck_mtx_t
+	 */
+	fake_lck.lck_mtx_state = prior_lock_state;
+
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START,
+		     mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0);
 
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, (int)mutex, owner_was_promoted, mutex->lck_mtx_waiters, 0, 0);
+	if (__probable(fake_lck.lck_mtx_waiters)) {
 
-	if (lck_mtx_lock_decr_waiter(mutex))
-		thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+		if (fake_lck.lck_mtx_waiters > 1)
+			thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri);
+		else
+			thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
+	}
 
-	if (owner_was_promoted) {
+	if (__improbable(fake_lck.lck_mtx_promoted)) {
 		thread_t	thread = current_thread();
 
 
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->promotions,
-			     thread->sched_mode & TH_MODE_PROMOTED, 0, 0);
+		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE,
+			     thread_tid(thread), thread->promotions, thread->sched_flags & TH_SFLAG_PROMOTED, 0, 0);
 
 		if (thread->promotions > 0) {
 			spl_t	s = splsched();
 
 			thread_lock(thread);
 
-			if (--thread->promotions == 0 && (thread->sched_mode & TH_MODE_PROMOTED)) {
+			if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED)) {
 
-				thread->sched_mode &= ~TH_MODE_PROMOTED;
+				thread->sched_flags &= ~TH_SFLAG_PROMOTED;
 
-				if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
-					KERNEL_DEBUG_CONSTANT(
-						MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
-						thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
+				if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+					KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
+							      thread->sched_pri, DEPRESSPRI, 0, mutex, 0);
 
 					set_sched_pri(thread, DEPRESSPRI);
 				}
 				else {
 					if (thread->priority < thread->sched_pri) {
-						KERNEL_DEBUG_CONSTANT(
-							MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
-							thread->sched_pri, thread->priority, 0, mutex, 0);
+						KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_DEMOTE) | DBG_FUNC_NONE,
+								      thread->sched_pri, thread->priority, 0, mutex, 0);
 
-						compute_priority(thread, FALSE);
+						SCHED(compute_priority)(thread, FALSE);
 					}
 				}
 			}
@@ -1846,7 +1870,8 @@ lck_mtx_unlock_wakeup_x86 (
 			splx(s);
 		}
 	}
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END,
+		     mutex, 0, mutex->lck_mtx_waiters, 0, 0);
 }
 
 
@@ -1854,43 +1879,54 @@ lck_mtx_unlock_wakeup_x86 (
  * Routine: 	lck_mtx_lock_acquire_x86
  *
  * Invoked on acquiring the mutex when there is
- * contention.
- * mutex is owned...  interlock is not held
+ * contention (i.e. the assembly routine sees that
+ * that mutex->lck_mtx_waiters != 0 or 
+ * thread->was_promoted_on_wakeup != 0)...
+ *
+ * mutex is owned...  interlock is held... preemption is disabled
  */
 void
 lck_mtx_lock_acquire_x86(
 	lck_mtx_t	*mutex)
 {
-	thread_t	thread = current_thread();
+	thread_t	thread;
 	integer_t	priority;
+	spl_t		s;
 
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START,
+		     mutex, thread->was_promoted_on_wakeup, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
-	priority = lck_mtx_lock_get_pri(mutex);
+	if (mutex->lck_mtx_waiters)
+		priority = mutex->lck_mtx_pri;
+	else
+		priority = 0;
 
-	if (thread->sched_pri < priority) {
+	thread = (thread_t)mutex->lck_mtx_owner;	/* faster then current_thread() */
 
-		if (lck_mtx_lock_mark_promoted(mutex)) {
-			spl_t	s = splsched();
+	if (thread->sched_pri < priority || thread->was_promoted_on_wakeup) {
 
-			thread_lock(thread);
+		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
+				      thread->sched_pri, priority, thread->was_promoted_on_wakeup, mutex, 0);
 
-			if (thread->sched_pri < priority) {
+		s = splsched();
+		thread_lock(thread);
 
-				KERNEL_DEBUG_CONSTANT(
-					MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
-					thread->sched_pri, priority, 0, mutex, 0);
+		if (thread->sched_pri < priority)
+			set_sched_pri(thread, priority);
 
-				set_sched_pri(thread, priority);
-			}
+		if (mutex->lck_mtx_promoted == 0) {
+			mutex->lck_mtx_promoted = 1;
+			
 			thread->promotions++;
-			thread->sched_mode |= TH_MODE_PROMOTED;
-
-			thread_unlock(thread);
-			splx(s);
+			thread->sched_flags |= TH_SFLAG_PROMOTED;
 		}
+		thread->was_promoted_on_wakeup = 0;
+		
+		thread_unlock(thread);
+		splx(s);
 	}
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END,
+		     mutex, 0, mutex->lck_mtx_waiters, 0, 0);
 }
 
 
@@ -1903,6 +1939,9 @@ lck_mtx_lock_acquire_x86(
  * time waiting for the lock to be released.
  *
  * Called with the interlock unlocked.
+ * returns 0 if mutex acquired
+ * returns 1 if we spun
+ * returns 2 if we didn't spin due to the holder not running
  */
 int
 lck_mtx_lock_spinwait_x86(
@@ -1913,9 +1952,9 @@ lck_mtx_lock_spinwait_x86(
 	int		retval = 1;
 	int		loopcount = 0;
 
-	KERNEL_DEBUG(
-		MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
-		(int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
+
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
+		     mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
 
 	deadline = mach_absolute_time() + MutexSpin;
 
@@ -1928,7 +1967,7 @@ lck_mtx_lock_spinwait_x86(
 	 *   - we haven't spun for long enough.
 	 */
 	do {
-		if (lck_mtx_lock_grab_mutex(mutex)) {
+		if (__probable(lck_mtx_lock_grab_mutex(mutex))) {
 			retval = 0;
 			break;
 		}
@@ -1959,7 +1998,7 @@ lck_mtx_lock_spinwait_x86(
 	 * penalize only lock groups that have debug/stats enabled
 	 * with dtrace processing if desired.
 	 */
-	if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
+	if (__probable(mutex->lck_mtx_is_ext == 0)) {
 		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex,
 		    mach_absolute_time() - (deadline - MutexSpin));
 	} else {
@@ -1969,9 +2008,8 @@ lck_mtx_lock_spinwait_x86(
 	/* The lockstat acquire event is recorded by the assembly code beneath us. */
 #endif
 
-	KERNEL_DEBUG(
-		MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
-		(int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
+		     mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0);
 
 	return retval;
 }
@@ -1984,7 +2022,8 @@ lck_mtx_lock_spinwait_x86(
  * Invoked in order to wait on contention.
  *
  * Called with the interlock locked and
- * returns it unlocked.
+ * preemption disabled...  
+ * returns it unlocked and with preemption enabled
  */
 void
 lck_mtx_lock_wait_x86 (
@@ -1993,7 +2032,6 @@ lck_mtx_lock_wait_x86 (
 	thread_t	self = current_thread();
 	thread_t	holder;
 	integer_t	priority;
-	integer_t	old_lck_mtx_pri;
 	spl_t		s;
 #if	CONFIG_DTRACE
 	uint64_t	sleep_start = 0;
@@ -2002,7 +2040,8 @@ lck_mtx_lock_wait_x86 (
 		sleep_start = mach_absolute_time();
 	}
 #endif
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
+		     mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
 	priority = self->sched_pri;
 
@@ -2011,45 +2050,41 @@ lck_mtx_lock_wait_x86 (
 	if (priority < BASEPRI_DEFAULT)
 		priority = BASEPRI_DEFAULT;
 
-	if (mutex->lck_mtx_waiters == 0)
-		old_lck_mtx_pri = 0;
-	else
-		old_lck_mtx_pri = mutex->lck_mtx_pri;
-
-	if (old_lck_mtx_pri < priority)
+	if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri)
 		mutex->lck_mtx_pri = priority;
+	mutex->lck_mtx_waiters++;
 
-	if ( (holder = (thread_t)mutex->lck_mtx_owner) ) {
+	if ( (holder = (thread_t)mutex->lck_mtx_owner) &&
+	     holder->sched_pri < mutex->lck_mtx_pri ) {
 
 		s = splsched();
 		thread_lock(holder);
 
-		if (holder->sched_pri < priority) {
+		if (holder->sched_pri < mutex->lck_mtx_pri) {
 			KERNEL_DEBUG_CONSTANT(
 				MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE,
-				holder->sched_pri, priority, holder, mutex, 0);
+				holder->sched_pri, priority, thread_tid(holder), mutex, 0);
 
 			set_sched_pri(holder, priority);
 			
 			if (mutex->lck_mtx_promoted == 0) {
 				holder->promotions++;
-				holder->sched_mode |= TH_MODE_PROMOTED;
-
+				holder->sched_flags |= TH_SFLAG_PROMOTED;
+				
 				mutex->lck_mtx_promoted = 1;
 			}
 		}
 		thread_unlock(holder);
 		splx(s);
 	}
-	mutex->lck_mtx_waiters++;
-
 	assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
 
 	lck_mtx_ilk_unlock(mutex);
 
 	thread_block(THREAD_CONTINUE_NULL);
 
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0);
+	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END,
+		     mutex, mutex->lck_mtx_owner, mutex->lck_mtx_waiters, mutex->lck_mtx_pri, 0);
 
 #if	CONFIG_DTRACE
 	/*
@@ -2057,7 +2092,7 @@ lck_mtx_lock_wait_x86 (
 	 * measured from when we were entered.
 	 */
 	if (sleep_start) {
-		if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) {
+		if (mutex->lck_mtx_is_ext == 0) {
 			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex,
 			    mach_absolute_time() - sleep_start);
 		} else {
diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s
index 65f7006c6..6e8e3d3a2 100644
--- a/osfmk/i386/locore.s
+++ b/osfmk/i386/locore.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -66,23 +66,12 @@
 #include <i386/asm.h>
 #include <i386/cpuid.h>
 #include <i386/eflags.h>
-#include <i386/lapic.h>
-#include <i386/rtclock.h>
 #include <i386/proc_reg.h>
 #include <i386/trap.h>
 #include <assym.s>
-#include <mach/exception_types.h>
-#include <config_dtrace.h>
-
-#define _ARCH_I386_ASM_HELP_H_          /* Prevent inclusion of user header */
-#include <mach/i386/syscall_sw.h>
-
-#include <i386/mp.h>
 
+#include <config_dtrace.h>
 
-#define CLI cli
-#define STI sti
-	
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
@@ -92,15 +81,6 @@
 	.set	_PTD,_PTmap + (PTDPTDI * NBPG)
 	.set	_PTDpde,_PTD + (PTDPTDI * PDESIZE)
 
-/*
- * APTmap, APTD is the alternate recursive pagemap.
- * It's used when modifying another process's page tables.
- */
-	.globl	_APTmap,_APTD,_APTDpde
-	.set	_APTmap,(APTDPTDI << PDESHIFT)
-	.set	_APTD,_APTmap + (APTDPTDI * NBPG)
-	.set	_APTDpde,_PTD + (APTDPTDI * PDESIZE)
-
 #if __MACHO__
 /* Under Mach-O, etext is a variable which contains
  * the last text address
@@ -113,56 +93,6 @@
 #define ETEXT_ADDR	$ EXT(etext)
 #endif
 
-#define	CX(addr,reg)	addr(,reg,4)
-
-/*
- * The following macros make calls into C code.
- * They dynamically align the stack to 16 bytes.
- * Arguments are moved (not pushed) onto the correctly aligned stack.
- * NOTE: EDI is destroyed in the process, and hence cannot
- * be directly used as a parameter. Users of this macro must
- * independently preserve EDI (a non-volatile) if the routine is
- * intended to be called from C, for instance.
- */
-
-#define CCALL(fn)			\
-	movl	%esp, %edi		;\
-	andl	$0xFFFFFFF0, %esp	;\
-	call	EXT(fn)			;\
-	movl	%edi, %esp
-
-#define CCALL1(fn, arg1)		\
-	movl	%esp, %edi		;\
-	subl	$4, %esp		;\
-	andl	$0xFFFFFFF0, %esp	;\
-	movl	arg1, 0(%esp)		;\
-	call	EXT(fn)			;\
-	movl	%edi, %esp
-
-#define CCALL2(fn, arg1, arg2)		\
-	movl	%esp, %edi		;\
-	subl	$8, %esp		;\
-	andl	$0xFFFFFFF0, %esp	;\
-	movl	arg2, 4(%esp)		;\
-	movl	arg1, 0(%esp)		;\
-	call	EXT(fn)			;\
-	movl	%edi, %esp
-
-/*
- * CCALL5 is used for callee functions with 3 arguments but
- * where arg2 (a3:a2) and arg3 (a5:a4) are 64-bit values.
- */
-#define CCALL5(fn, a1, a2, a3, a4, a5)	\
-	movl	%esp, %edi		;\
-	subl	$20, %esp		;\
-	andl	$0xFFFFFFF0, %esp	;\
-	movl	a5, 16(%esp)		;\
-	movl	a4, 12(%esp)		;\
-	movl	a3,  8(%esp)		;\
-	movl	a2,  4(%esp)		;\
-	movl	a1,  0(%esp)		;\
-	call	EXT(fn)			;\
-	movl	%edi, %esp
 
 	.text
 locore_start:
@@ -212,275 +142,6 @@ LEXT(recover_table_end)			;\
 	RECOVERY_SECTION
 	RECOVER_TABLE_START
 
-/*
- * Timing routines.
- */
-Entry(timer_update)
-	movl	4(%esp),%ecx
-	movl	8(%esp),%eax
-	movl	12(%esp),%edx
-	movl	%eax,TIMER_HIGHCHK(%ecx)
-	movl	%edx,TIMER_LOW(%ecx)
-	movl	%eax,TIMER_HIGH(%ecx)
-	ret
-
-Entry(timer_grab)
-	movl	4(%esp),%ecx
-0:	movl	TIMER_HIGH(%ecx),%edx
-	movl	TIMER_LOW(%ecx),%eax
-	cmpl	TIMER_HIGHCHK(%ecx),%edx
-	jne	0b
-	ret
-
-#if	STAT_TIME
-
-#define	TIME_TRAP_UENTRY
-#define	TIME_TRAP_UEXIT
-#define	TIME_INT_ENTRY
-#define	TIME_INT_EXIT
-
-#else
-/*
- * Nanosecond timing.
- */
-
-/*
- * Nanotime returned in %edx:%eax.
- * Computed from tsc based on the scale factor
- * and an implicit 32 bit shift.
- *
- * Uses %eax, %ebx, %ecx, %edx, %esi, %edi.
- */
-#define NANOTIME							\
-	mov	%gs:CPU_NANOTIME,%edi					; \
-	RTC_NANOTIME_READ_FAST()
-
-
-/*
- * Add 64-bit delta in register dreg : areg to timer pointed to by register treg.
- */
-#define TIMER_UPDATE(treg,dreg,areg,offset)									\
-	addl	(TIMER_LOW+(offset))(treg),areg		/* add low bits */			;\
-	adcl	dreg,(TIMER_HIGH+(offset))(treg)	/* add carry high bits */	;\
-	movl	areg,(TIMER_LOW+(offset))(treg)		/* store updated low bit */	;\
-	movl	(TIMER_HIGH+(offset))(treg),dreg	/* copy high bits */		;\
-	movl	dreg,(TIMER_HIGHCHK+(offset))(treg)	/* to high check */
-
-/*
- * Add time delta to old timer and start new.
- */
-#define TIMER_EVENT(old,new)											  \
-	NANOTIME							/* edx:eax nanosecs */			; \
-	movl	%eax,%esi					/* save timestamp */			; \
-	movl	%edx,%edi					/* save timestamp */			; \
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx  /* get current thread */			; \
-	subl	(old##_TIMER)+TIMER_TSTAMP(%ecx),%eax	   /* compute elapsed time */	  ; \
-	sbbl	(old##_TIMER)+TIMER_TSTAMP+4(%ecx),%edx	 /* compute elapsed time */	  ; \
-	TIMER_UPDATE(%ecx,%edx,%eax,old##_TIMER)			/* update timer */			  ; \
-	movl	%esi,(new##_TIMER)+TIMER_TSTAMP(%ecx)	   /* set timestamp */			 ; \
-	movl	%edi,(new##_TIMER)+TIMER_TSTAMP+4(%ecx)	 /* set timestamp */			 ; \
-	leal	(new##_TIMER)(%ecx), %ecx   /* compute new timer pointer */ ; \
-	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */		; \
-	movl	%ecx,THREAD_TIMER(%ebx)		/* set current timer */			; \
-	movl	%esi,%eax					/* restore timestamp */			; \
-	movl	%edi,%edx					/* restore timestamp */			; \
-	subl	(old##_STATE)+TIMER_TSTAMP(%ebx),%eax	   /* compute elapsed time */	  ; \
-	sbbl	(old##_STATE)+TIMER_TSTAMP+4(%ebx),%edx	 /* compute elapsed time */	  ; \
-	TIMER_UPDATE(%ebx,%edx,%eax,old##_STATE)			/* update timer */			  ; \
-	leal	(new##_STATE)(%ebx),%ecx	/* compute new state pointer */ ; \
-	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */			; \
-	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */				; \
-	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
-
-/*
- * Update time on user trap entry.
- * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
- */
-#define	TIME_TRAP_UENTRY			TIMER_EVENT(USER,SYSTEM)
-
-/*
- * update time on user trap exit.
- * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
- */
-#define	TIME_TRAP_UEXIT				TIMER_EVENT(SYSTEM,USER)
-
-/*
- * update time on interrupt entry.
- * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
- * Saves processor state info on stack.
- */
-#define	TIME_INT_ENTRY													  \
-	NANOTIME							/* edx:eax nanosecs */			; \
-	movl	%eax,%gs:CPU_INT_EVENT_TIME		/* save in cpu data */		; \
-	movl	%edx,%gs:CPU_INT_EVENT_TIME+4	/* save in cpu data */		; \
-	movl	%eax,%esi					/* save timestamp */			; \
-	movl	%edx,%edi					/* save timestamp */			; \
-	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */		; \
-	movl 	THREAD_TIMER(%ebx),%ecx		/* get current timer */			; \
-	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */		; \
-	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */		; \
-	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */				; \
-	movl	KERNEL_TIMER(%ebx),%ecx		/* point to kernel timer */		; \
-	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */				; \
-	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */				; \
-	movl	%esi,%eax					/* restore timestamp */			; \
-	movl	%edi,%edx					/* restore timestamp */			; \
-	movl	CURRENT_STATE(%ebx),%ecx	/* get current state */			; \
-	pushl	%ecx						/* save state */				; \
-	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */		; \
-	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */		; \
-	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */				; \
-	leal	IDLE_STATE(%ebx),%eax		/* get idle state */			; \
-	cmpl	%eax,%ecx					/* compare current state */		; \
-	je		0f							/* skip if equal */				; \
-	leal	SYSTEM_STATE(%ebx),%ecx		/* get system state */			; \
-	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */			; \
-0:	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */				; \
-	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
-
-/*
- * update time on interrupt exit.
- * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
- * Restores processor state info from stack.
- */
-#define	TIME_INT_EXIT													  \
-	NANOTIME							/* edx:eax nanosecs */			; \
-	movl	%eax,%gs:CPU_INT_EVENT_TIME		/* save in cpu data */		; \
-	movl	%edx,%gs:CPU_INT_EVENT_TIME+4	/* save in cpu data */		; \
-	movl	%eax,%esi					/* save timestamp */			; \
-	movl	%edx,%edi					/* save timestamp */			; \
-	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */		; \
-	movl	KERNEL_TIMER(%ebx),%ecx		/* point to kernel timer */		; \
-	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */		; \
-	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */		; \
-	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */				; \
-	movl	THREAD_TIMER(%ebx),%ecx		/* interrupted timer */			; \
-	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */				; \
-	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */				; \
-	movl	%esi,%eax					/* restore timestamp */			; \
-	movl	%edi,%edx					/* restore timestamp */			; \
-	movl	CURRENT_STATE(%ebx),%ecx	/* get current state */			; \
-	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */		; \
-	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */		; \
-	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */				; \
-	popl	%ecx						/* restore state */				; \
-	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */			; \
-	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */				; \
-	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
-
-#endif /* STAT_TIME */
-
-#undef PDEBUG
-
-#ifdef PDEBUG
-
-/*
- * Traditional, not ANSI.
- */
-#define CAH(label) \
-	.data ;\
-	.globl label/**/count ;\
-label/**/count: ;\
-	.long	0 ;\
-	.globl label/**/limit ;\
-label/**/limit: ;\
-	.long	0 ;\
-	.text ;\
-	addl	$1,%ss:label/**/count ;\
-	cmpl	$0,label/**/limit ;\
-	jz	label/**/exit ;\
-	pushl	%eax ;\
-label/**/loop: ;\
-	movl	%ss:label/**/count,%eax ;\
-	cmpl	%eax,%ss:label/**/limit ;\
-	je	label/**/loop ;\
-	popl	%eax ;\
-label/**/exit:
-
-#else	/* PDEBUG */
-
-#define CAH(label)
-
-#endif	/* PDEBUG */
-	
-#if	MACH_KDB
-/*
- * Last-ditch debug code to handle faults that might result
- * from entering kernel (from collocated server) on an invalid
- * stack.  On collocated entry, there's no hardware-initiated
- * stack switch, so a valid stack must be in place when an
- * exception occurs, or we may double-fault.
- *
- * In case of a double-fault, our only recourse is to switch
- * hardware "tasks", so that we avoid using the current stack.
- *
- * The idea here is just to get the processor into the debugger,
- * post-haste.  No attempt is made to fix up whatever error got
- * us here, so presumably continuing from the debugger will
- * simply land us here again -- at best.
- */
-#if	0
-/*
- * Note that the per-fault entry points are not currently
- * functional.  The only way to make them work would be to
- * set up separate TSS's for each fault type, which doesn't
- * currently seem worthwhile.  (The offset part of a task
- * gate is always ignored.)  So all faults that task switch
- * currently resume at db_task_start.
- */
-/*
- * Double fault (Murphy's point) - error code (0) on stack
- */
-Entry(db_task_dbl_fault)
-	popl	%eax
-	movl	$(T_DOUBLE_FAULT),%ebx
-	jmp	db_task_start
-/*
- * Segment not present - error code on stack
- */
-Entry(db_task_seg_np)
-	popl	%eax
-	movl	$(T_SEGMENT_NOT_PRESENT),%ebx
-	jmp	db_task_start
-/*
- * Stack fault - error code on (current) stack
- */
-Entry(db_task_stk_fault)
-	popl	%eax
-	movl	$(T_STACK_FAULT),%ebx
-	jmp	db_task_start
-/*
- * General protection fault - error code on stack
- */
-Entry(db_task_gen_prot)
-	popl	%eax
-	movl	$(T_GENERAL_PROTECTION),%ebx
-	jmp	db_task_start
-#endif	/* 0 */
-/*
- * The entry point where execution resumes after last-ditch debugger task
- * switch.
- */
-Entry(db_task_start)
-	movl	%esp,%edx
-	subl	$(ISS32_SIZE),%edx
-	movl	%edx,%esp		/* allocate x86_saved_state on stack */
-	movl	%eax,R32_ERR(%esp)
-	movl	%ebx,R32_TRAPNO(%esp)
-	pushl	%edx
-	CPU_NUMBER(%edx)
-	movl	CX(EXT(master_dbtss),%edx),%edx
-	movl	TSS_LINK(%edx),%eax
-	pushl	%eax			/* pass along selector of previous TSS */
-	call	EXT(db_tss_to_frame)
-	popl	%eax			/* get rid of TSS selector */
-	call	EXT(db_trap_from_asm)
-	addl	$0x4,%esp
-	/*
-	 * And now...?
-	 */
-	iret				/* ha, ha, ha... */
-#endif	/* MACH_KDB */
 
 /*
  *	Called as a function, makes the current thread
@@ -497,650 +158,17 @@ LEXT(thread_bootstrap_return)
 #endif
 
 LEXT(thread_exception_return)
-	CLI
-	movl	%gs:CPU_KERNEL_STACK,%ecx
-
-	movl	(%ecx),%esp			/* switch back to PCB stack */
-	xorl	%ecx,%ecx		/* don't check if we're in the PFZ */
-	jmp	EXT(return_from_trap)
-
-Entry(call_continuation)
-	movl	S_ARG0,%eax			/* get continuation */
-	movl	S_ARG1,%edx			/* continuation param */
-	movl	S_ARG2,%ecx			/* wait result */
-	movl	%gs:CPU_KERNEL_STACK,%esp	/* pop the stack */
-	xorl	%ebp,%ebp			/* zero frame pointer */
-	subl	$8,%esp				/* align the stack */
-	pushl	%ecx
-	pushl	%edx
-	call	*%eax				/* call continuation */
-	addl	$16,%esp
-	movl	%gs:CPU_ACTIVE_THREAD,%eax
-	pushl	%eax
-	call	EXT(thread_terminate)
-	
-	
-	
-/*******************************************************************************************************
- *
- * All 64 bit task 'exceptions' enter lo_alltraps:
- *	esp	-> x86_saved_state_t
- * 
- * The rest of the state is set up as:	
- *	cr3	 -> kernel directory
- *	esp	 -> low based stack
- *	gs	 -> CPU_DATA_GS
- *	cs	 -> KERNEL32_CS
- *	ss/ds/es -> KERNEL_DS
- *
- *	interrupts disabled
- *	direction flag cleared
- */
-Entry(lo_alltraps)
-	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
-	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
-	jne	1f
-	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
-1:
-	testb	$3,%al
-	jz	trap_from_kernel
-						/* user mode trap */
-	TIME_TRAP_UENTRY
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx
-	movl	ACT_TASK(%ecx),%ebx
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp			/* switch to kernel stack */
-	sti
-
-	CCALL1(user_trap, %ebx)		/* call user trap routine */
-	cli				/* hold off intrs - critical section */
-	popl	%esp			/* switch back to PCB stack */
-	xorl	%ecx,%ecx		/* don't check if we're in the PFZ */
-	
-/*
- * Return from trap or system call, checking for ASTs.
- * On lowbase PCB stack with intrs disabled
- */	
-LEXT(return_from_trap)
-	movl	%gs:CPU_PENDING_AST, %eax
-	testl	%eax, %eax
-	je	EXT(return_to_user)	/* branch if no AST */
-
-LEXT(return_from_trap_with_ast)
-	movl	%gs:CPU_KERNEL_STACK, %ebx
-	xchgl	%ebx, %esp		/* switch to kernel stack */
-
-	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
-	je	2f			/* no, go handle the AST */
-	cmpl	$(SS_64), SS_FLAVOR(%ebx)	/* are we a 64-bit task? */
-	je	1f
-					/* no... 32-bit user mode */
-	movl	R32_EIP(%ebx), %eax
-	pushl	%ebx			/* save PCB stack */
-	xorl	%ebp, %ebp		/* clear frame pointer */
-	CCALL1(commpage_is_in_pfz32, %eax)
-	popl	%ebx			/* retrieve pointer to PCB stack */
-	testl	%eax, %eax
-	je	2f			/* not in the PFZ... go service AST */
-	movl	%eax, R32_EBX(%ebx)	/* let the PFZ know we've pended an AST */
-	xchgl	%ebx, %esp		/* switch back to PCB stack */
-	jmp	EXT(return_to_user)
-1:					/* 64-bit user mode */
-	movl	R64_RIP(%ebx), %ecx
-	movl	R64_RIP+4(%ebx), %eax
-	pushl	%ebx			/* save PCB stack */
-	xorl	%ebp, %ebp		/* clear frame pointer */
-	CCALL2(commpage_is_in_pfz64, %ecx, %eax)
-	popl	%ebx			/* retrieve pointer to PCB stack */
-	testl	%eax, %eax		
-	je	2f			/* not in the PFZ... go service AST */
-	movl	%eax, R64_RBX(%ebx)	/* let the PFZ know we've pended an AST */
-	xchgl	%ebx, %esp		/* switch back to PCB stack */
-	jmp	EXT(return_to_user)
-2:	
-	STI				/* interrupts always enabled on return to user mode */
-	pushl	%ebx			/* save PCB stack */
-	xorl	%ebp, %ebp		/* Clear framepointer */
-	CCALL1(i386_astintr, $0)	/* take the AST */
-	CLI
-	
-	popl	%esp			/* switch back to PCB stack (w/exc link) */
-
-	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
-	jmp	EXT(return_from_trap)	/* and check again (rare) */
-
-LEXT(return_to_user)
-	TIME_TRAP_UEXIT
-	
-LEXT(ret_to_user)
-	cmpl	$0, %gs:CPU_IS64BIT
-	je	EXT(lo_ret_to_user)
-	jmp	EXT(lo64_ret_to_user)
-
-
-
-/*
- * Trap from kernel mode.  No need to switch stacks.
- * Interrupts must be off here - we will set them to state at time of trap
- * as soon as it's safe for us to do so and not recurse doing preemption
- */
-trap_from_kernel:
-	movl	%esp, %eax		/* saved state addr */
-	pushl	R32_EIP(%esp)		/* Simulate a CALL from fault point */
-	pushl   %ebp			/* Extend framepointer chain */
-	movl	%esp, %ebp
-	CCALL1(kernel_trap, %eax)	/* Call kernel trap handler */
-	popl	%ebp
-	addl	$4, %esp
 	cli
+	xorl	%ecx,%ecx			/* don't check if in the PFZ */
+	cmpl    $0, %gs:CPU_IS64BIT
+	je	EXT(return_from_trap32)
+	jmp	EXT(return_from_trap)
 
-	movl	%gs:CPU_PENDING_AST,%eax		/* get pending asts */
-	testl	$ AST_URGENT,%eax	/* any urgent preemption? */
-	je	ret_to_kernel			/* no, nothing to do */
-	cmpl	$ T_PREEMPT,R32_TRAPNO(%esp)
-	je	ret_to_kernel			  /* T_PREEMPT handled in kernel_trap() */
-	testl	$ EFL_IF,R32_EFLAGS(%esp)		/* interrupts disabled? */
-	je	ret_to_kernel
-	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL		/* preemption disabled? */
-	jne	ret_to_kernel
-	movl	%gs:CPU_KERNEL_STACK,%eax
-	movl	%esp,%ecx
-	xorl	%eax,%ecx
-	and	EXT(kernel_stack_mask),%ecx
-	testl	%ecx,%ecx		/* are we on the kernel stack? */
-	jne	ret_to_kernel		/* no, skip it */
-
-	CCALL1(i386_astintr, $1)	/* take the AST */
-
-ret_to_kernel:
-	cmpl	$0, %gs:CPU_IS64BIT
-	je	EXT(lo_ret_to_kernel)
-	jmp	EXT(lo64_ret_to_kernel)
-
-
-
-/*******************************************************************************************************
- *
- * All interrupts on all tasks enter here with:
- *	esp->	 -> x86_saved_state_t
- *
- *	cr3	 -> kernel directory
- *	esp	 -> low based stack
- *	gs	 -> CPU_DATA_GS
- *	cs	 -> KERNEL32_CS
- *	ss/ds/es -> KERNEL_DS
- *
- *	interrupts disabled
- *	direction flag cleared
- */
-Entry(lo_allintrs)
-	/*
-	 * test whether already on interrupt stack
-	 */
-	movl	%gs:CPU_INT_STACK_TOP,%ecx
-	cmpl	%esp,%ecx
-	jb	1f
-	leal	-INTSTACK_SIZE(%ecx),%edx
-	cmpl	%esp,%edx
-	jb	int_from_intstack
-1:	
-	xchgl	%ecx,%esp		/* switch to interrupt stack */
-
-	movl	%cr0,%eax		/* get cr0 */
-	orl	$(CR0_TS),%eax		/* or in TS bit */
-	movl	%eax,%cr0		/* set cr0 */
-
-	subl	$8, %esp		/* for 16-byte stack alignment */
-	pushl	%ecx			/* save pointer to old stack */
-	movl	%ecx,%gs:CPU_INT_STATE	/* save intr state */
-	
-	TIME_INT_ENTRY			/* do timing */
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx
-	movl	ACT_TASK(%ecx),%ebx
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	incl	%gs:CPU_PREEMPTION_LEVEL
-	incl	%gs:CPU_INTERRUPT_LEVEL
-
-	movl	%gs:CPU_INT_STATE, %eax
-	CCALL1(interrupt, %eax)		/* call generic interrupt routine */
-
-	cli				/* just in case we returned with intrs enabled */
-	xorl	%eax,%eax
-	movl	%eax,%gs:CPU_INT_STATE	/* clear intr state pointer */
-
-	decl	%gs:CPU_INTERRUPT_LEVEL
-	decl	%gs:CPU_PREEMPTION_LEVEL
-
-	TIME_INT_EXIT			/* do timing */
-
-	movl	%gs:CPU_ACTIVE_THREAD,%eax
-	movl	ACT_PCB(%eax),%eax	/* get act`s PCB */
-	movl	PCB_FPS(%eax),%eax	/* get pcb's ims.ifps */
-	cmpl	$0,%eax			/* Is there a context */
-	je	1f			/* Branch if not */
-	movl	FP_VALID(%eax),%eax	/* Load fp_valid */
-	cmpl	$0,%eax			/* Check if valid */
-	jne	1f			/* Branch if valid */
-	clts				/* Clear TS */
-	jmp	2f
-1:
-	movl	%cr0,%eax		/* get cr0 */
-	orl	$(CR0_TS),%eax		/* or in TS bit */
-	movl	%eax,%cr0		/* set cr0 */
-2:
-	popl	%esp			/* switch back to old stack */
-
-	/* Load interrupted code segment into %eax */
-	movl	R32_CS(%esp),%eax	/* assume 32-bit state */
-	cmpl	$(SS_64),SS_FLAVOR(%esp)/* 64-bit? */	
-	jne	3f
-	movl	R64_CS(%esp),%eax	/* 64-bit user mode */
-3:
-	testb	$3,%al			/* user mode, */
-	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
-	/*
-	 * we only want to handle preemption requests if
-	 * the interrupt fell in the kernel context
-	 * and preemption isn't disabled
-	 */
-	movl	%gs:CPU_PENDING_AST,%eax	
-	testl	$ AST_URGENT,%eax		/* any urgent requests? */
-	je	ret_to_kernel			/* no, nothing to do */
-
-	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
-	jne	ret_to_kernel			/* yes, skip it */
-
-	movl	%gs:CPU_KERNEL_STACK,%eax
-	movl	%esp,%ecx
-	xorl	%eax,%ecx
-	and	EXT(kernel_stack_mask),%ecx
-	testl	%ecx,%ecx			/* are we on the kernel stack? */
-	jne	ret_to_kernel			/* no, skip it */
-
-	/*
-	 * Take an AST from kernel space.  We don't need (and don't want)
-	 * to do as much as the case where the interrupt came from user
-	 * space.
-	 */
-	CCALL1(i386_astintr, $1)
-
-	jmp	ret_to_kernel
-
-
-/*
- * nested int - simple path, can't preempt etc on way out
- */
-int_from_intstack:
-	incl	%gs:CPU_PREEMPTION_LEVEL
-	incl	%gs:CPU_INTERRUPT_LEVEL
-	incl	%gs:CPU_NESTED_ISTACK
-
-	movl	%esp, %edx		/* x86_saved_state */
-	CCALL1(interrupt, %edx)
-
-	decl	%gs:CPU_INTERRUPT_LEVEL
-	decl	%gs:CPU_PREEMPTION_LEVEL
- 	decl	%gs:CPU_NESTED_ISTACK
-	jmp	ret_to_kernel
-
-/*
- *	Take an AST from an interrupted user
- */
-ast_from_interrupt_user:
-	movl	%gs:CPU_PENDING_AST,%eax
-	testl	%eax,%eax		/* pending ASTs? */
-	je	EXT(ret_to_user)	/* no, nothing to do */
-
-	TIME_TRAP_UENTRY
-
-	movl	$1, %ecx		/* check if we're in the PFZ */
-	jmp	EXT(return_from_trap_with_ast)	/* return */
-
-
-/*******************************************************************************************************
- *
- * 32bit Tasks
- * System call entries via INTR_GATE or sysenter:
- *
- *	esp	 -> x86_saved_state32_t
- *	cr3	 -> kernel directory
- *	esp	 -> low based stack
- *	gs	 -> CPU_DATA_GS
- *	cs	 -> KERNEL32_CS
- *	ss/ds/es -> KERNEL_DS
- *
- *	interrupts disabled
- *	direction flag cleared
- */
-
-Entry(lo_sysenter)
-	/*
-	 * We can be here either for a mach syscall or a unix syscall,
-	 * as indicated by the sign of the code:
-	 */
-	movl	R32_EAX(%esp),%eax
-	testl	%eax,%eax
-	js	EXT(lo_mach_scall)		/* < 0 => mach */
-						/* > 0 => unix */
-	
-Entry(lo_unix_scall)
-	TIME_TRAP_UENTRY
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-	addl	$1,TASK_SYSCALLS_UNIX(%ebx)	/* increment call count   */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(unix_syscall, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-Entry(lo_mach_scall)
-	TIME_TRAP_UENTRY
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-	addl	$1,TASK_SYSCALLS_MACH(%ebx)	/* increment call count   */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(mach_call_munger, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-Entry(lo_mdep_scall)
-	TIME_TRAP_UENTRY
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(machdep_syscall, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-Entry(lo_diag_scall)
-	TIME_TRAP_UENTRY
-
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx	// Get the address of the kernel stack
-	xchgl	%ebx,%esp		// Switch to it, saving the previous
-
-	CCALL1(diagCall, %ebx)		// Call diagnostics
-	
-	cmpl	$0,%eax			// What kind of return is this?
-	je	2f
-	cli				// Disable interruptions just in case they were enabled
-	popl	%esp			// Get back the original stack
-	jmp	EXT(return_to_user)	// Normal return, do not check asts...
-2:	
-	CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
-		// pass what would be the diag syscall
-		// error return - cause an exception
-	/* no return */
-	
-
-
-/*******************************************************************************************************
- *
- * 64bit Tasks
- * System call entries via syscall only:
- *
- *	esp	 -> x86_saved_state64_t
- *	cr3	 -> kernel directory
- *	esp	 -> low based stack
- *	gs	 -> CPU_DATA_GS
- *	cs	 -> KERNEL32_CS
- *	ss/ds/es -> KERNEL_DS
- *
- *	interrupts disabled
- *	direction flag cleared
- */
-
-Entry(lo_syscall)
-	TIME_TRAP_UENTRY
-
-	/*
-	 * We can be here either for a mach, unix machdep or diag syscall,
-	 * as indicated by the syscall class:
-	 */
-	movl	R64_RAX(%esp), %eax		/* syscall number/class */
-	movl	%eax, %ebx
-	andl	$(SYSCALL_CLASS_MASK), %ebx	/* syscall class */
-	cmpl	$(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %ebx
-	je	EXT(lo64_mach_scall)
-	cmpl	$(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %ebx
-	je	EXT(lo64_unix_scall)
-	cmpl	$(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %ebx
-	je	EXT(lo64_mdep_scall)
-	cmpl	$(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %ebx
-	je	EXT(lo64_diag_scall)
-
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	/* Syscall class unknown */
-	CCALL5(i386_exception, $(EXC_SYSCALL), %eax, $0, $1, $0)
-	/* no return */
-
-
-Entry(lo64_unix_scall)
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-	addl	$1,TASK_SYSCALLS_UNIX(%ebx)	/* increment call count   */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(unix_syscall64, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-Entry(lo64_mach_scall)
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-	addl	$1,TASK_SYSCALLS_MACH(%ebx)	/* increment call count   */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(mach_call_munger64, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-
-Entry(lo64_mdep_scall)
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
-
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx
-	xchgl	%ebx,%esp		/* switch to kernel stack */
-
-	sti
-
-	CCALL1(machdep_syscall64, %ebx)
-	/*
-	 * always returns through thread_exception_return
-	 */
-
-
-Entry(lo64_diag_scall)
-	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread     */
-	movl	ACT_TASK(%ecx),%ebx			/* point to current task  */
-
-	/* Check for active vtimers in the current task */
-	cmpl	$0,TASK_VTIMERS(%ebx)
-	jz		1f
-
-	/* Set a pending AST */
-	orl		$(AST_BSD),%gs:CPU_PENDING_AST
 
-	/* Set a thread AST (atomic) */
-	lock
-	orl		$(AST_BSD),ACT_AST(%ecx)
-	
-1:
-	movl	%gs:CPU_KERNEL_STACK,%ebx // Get the address of the kernel stack
-	xchgl	%ebx,%esp		// Switch to it, saving the previous
-
-	CCALL1(diagCall64, %ebx)	// Call diagnostics
-		
-	cmpl	$0,%eax			// What kind of return is this?
-	je	2f
-	cli				// Disable interruptions just in case they were enabled
-	popl	%esp			// Get back the original stack
-	jmp	EXT(return_to_user)	// Normal return, do not check asts...
-2:	
-	CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
-		// pass what would be the diag syscall
-		// error return - cause an exception
-	/* no return */
-
-/**/
 /*
  * Utility routines.
  */
 
-
 /*
  * Copy from user/kernel address space.
  * arg0:	window offset or kernel address
@@ -1312,6 +340,7 @@ copyout_fail:
 	movl	$(EFAULT),%eax		/* return error for failure */
 	jmp	copyout_ret		/* pop frame and return */
 
+
 /*
  * io register must not be used on slaves (no AT bus)
  */
@@ -1336,141 +365,6 @@ copyout_fail:
 
 #endif	/* MACH_ASSERT */
 
-/*
- * void loutb(unsigned byte *io_port,
- *	      unsigned byte *data,
- *	      unsigned int count)
- *
- * Output an array of bytes to an IO port.
- */
-ENTRY(loutb)
-ENTRY(outsb)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%esi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%esi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	outsb				/* output */
-	movl	%eax,%esi		/* restore register */
-	POP_FRAME
-	ret	
-
-
-/*
- * void loutw(unsigned short *io_port,
- *	      unsigned short *data,
- *	      unsigned int count)
- *
- * Output an array of shorts to an IO port.
- */
-ENTRY(loutw)
-ENTRY(outsw)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%esi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%esi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	outsw				/* output */
-	movl	%eax,%esi		/* restore register */
-	POP_FRAME
-	ret
-
-/*
- * void loutw(unsigned short io_port,
- *	      unsigned int *data,
- *	      unsigned int count)
- *
- * Output an array of longs to an IO port.
- */
-ENTRY(loutl)
-ENTRY(outsl)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%esi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%esi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	outsl				/* output */
-	movl	%eax,%esi		/* restore register */
-	POP_FRAME
-	ret
-
-
-/*
- * void linb(unsigned char *io_port,
- *	     unsigned char *data,
- *	     unsigned int count)
- *
- * Input an array of bytes from an IO port.
- */
-ENTRY(linb)
-ENTRY(insb)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%edi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%edi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	insb				/* input */
-	movl	%eax,%edi		/* restore register */
-	POP_FRAME
-	ret
-
-
-/*
- * void linw(unsigned short *io_port,
- *	     unsigned short *data,
- *	     unsigned int count)
- *
- * Input an array of shorts from an IO port.
- */
-ENTRY(linw)
-ENTRY(insw)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%edi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%edi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	insw				/* input */
-	movl	%eax,%edi		/* restore register */
-	POP_FRAME
-	ret
-
-
-/*
- * void linl(unsigned short io_port,
- *	     unsigned int *data,
- *	     unsigned int count)
- *
- * Input an array of longs from an IO port.
- */
-ENTRY(linl)
-ENTRY(insl)
-	PUSH_FRAME
-	ILL_ON_SLAVE
-	movl	%edi,%eax		/* save register */
-	movl	ARG0,%edx		/* get io port number */
-	movl	ARG1,%edi		/* get data address */
-	movl	ARG2,%ecx		/* get count */
-	cld				/* count up */
-	rep
-	insl				/* input */
-	movl	%eax,%edi		/* restore register */
-	POP_FRAME
-	ret
 
 /*
  * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi)
@@ -1509,8 +403,6 @@ ENTRY(dr_addr)
 
 	.text
 
-#ifndef	SYMMETRY
-
 /*
  * ffs(mask)
  */
@@ -1537,8 +429,6 @@ Entry(cpu_shutdown)
         div     %ecx,%eax       /* reboot now */
         ret                     /* this will "never" be executed */
 
-#endif	/* SYMMETRY */
-
 
 /*
  * setbit(int bitno, int *s) - set bit in bit string
@@ -1587,120 +477,3 @@ ENTRY(testbit)
 	sbbl	%eax,%eax
 	ret
 
-ENTRY(get_pc)
-	movl	4(%ebp),%eax
-	ret
-
-ENTRY(minsecurity)
-	pushl	%ebp
-	movl	%esp,%ebp
-/*
- * jail: set the EIP to "jail" to block a kernel thread.
- * Useful to debug synchronization problems on MPs.
- */
-ENTRY(jail)
-	jmp	EXT(jail)
-
-/*
- * unsigned int
- * div_scale(unsigned int dividend,
- *	     unsigned int divisor,
- *	     unsigned int *scale)
- *
- * This function returns (dividend << *scale) //divisor where *scale
- * is the largest possible value before overflow. This is used in
- * computation where precision must be achieved in order to avoid
- * floating point usage.
- *
- * Algorithm:
- *	*scale = 0;
- *	while (((dividend >> *scale) >= divisor))
- *		(*scale)++;
- *	*scale = 32 - *scale;
- *	return ((dividend << *scale) / divisor);  
- */
-ENTRY(div_scale)
-	PUSH_FRAME
-	xorl	%ecx, %ecx		/* *scale = 0 */
-	xorl	%eax, %eax
-	movl	ARG0, %edx		/* get dividend */
-0:
-	cmpl	ARG1, %edx 		/* if (divisor > dividend) */
-	jle	1f			/* goto 1f */
-	addl	$1, %ecx		/* (*scale)++ */
-	shrdl	$1, %edx, %eax		/* dividend >> 1 */
-	shrl	$1, %edx 		/* dividend >> 1 */
-	jmp	0b			/* goto 0b */
-1:	
-	divl	ARG1			/* (dividend << (32 - *scale)) / divisor */
-	movl	ARG2, %edx		/* get scale */
-	movl	$32, (%edx)		/* *scale = 32 */
-	subl	%ecx, (%edx)		/* *scale -= %ecx */
-	POP_FRAME
-	ret
-
-/*
- * unsigned int
- * mul_scale(unsigned int multiplicand,
- *	     unsigned int multiplier,
- *	     unsigned int *scale)
- *
- * This function returns ((multiplicand * multiplier) >> *scale) where
- * scale is the largest possible value before overflow. This is used in
- * computation where precision must be achieved in order to avoid
- * floating point usage.
- *
- * Algorithm:
- *	*scale = 0;
- *	while (overflow((multiplicand * multiplier) >> *scale))
- *		(*scale)++;
- *	return ((multiplicand * multiplier) >> *scale);
- */
-ENTRY(mul_scale)
-	PUSH_FRAME
-	xorl	%ecx, %ecx		/* *scale = 0 */
-	movl	ARG0, %eax		/* get multiplicand */
-	mull	ARG1			/* multiplicand * multiplier */
-0:
-	cmpl	$0, %edx		/* if (!overflow()) */
-	je	1f			/* goto 1 */
-	addl	$1, %ecx		/* (*scale)++ */
-	shrdl	$1, %edx, %eax		/* (multiplicand * multiplier) >> 1 */
-	shrl	$1, %edx		/* (multiplicand * multiplier) >> 1 */
-	jmp	0b
-1:
-	movl	ARG2, %edx		/* get scale */
-	movl	%ecx, (%edx)		/* set *scale */
-	POP_FRAME
-	ret
-
-
-	
-/*
- * Double-fault exception handler task. The last gasp...
- */
-Entry(df_task_start)
-	CCALL1(panic_double_fault32, $(T_DOUBLE_FAULT))
-	hlt
-
-
-/*
- * machine-check handler task. The last gasp...
- */
-Entry(mc_task_start)
-	CCALL1(panic_machine_check32, $(T_MACHINE_CHECK))
-	hlt
-
-/*
- * Compatibility mode's last gasp...
- */
-Entry(lo_df64)
-	movl	%esp, %eax
-	CCALL1(panic_double_fault64, %eax)
-	hlt
-
-Entry(lo_mc64)
-	movl	%esp, %eax
-	CCALL1(panic_machine_check64, %eax)
-	hlt
-
diff --git a/osfmk/i386/loose_ends.c b/osfmk/i386/loose_ends.c
index bc205fe1e..ee59d599a 100644
--- a/osfmk/i386/loose_ends.c
+++ b/osfmk/i386/loose_ends.c
@@ -78,7 +78,6 @@
 #include <libkern/OSAtomic.h>
 #include <sys/kdebug.h>
 
-
 #if 0
 
 #undef KERNEL_DEBUG
@@ -105,8 +104,7 @@ void machine_callstack(natural_t *buf, vm_size_t callstack_max);
 #define value_64bit(value)  ((value) & 0xFFFFFFFF00000000LL)
 #define low32(x)  ((unsigned int)((x) & 0x00000000FFFFFFFFLL))
 
-
-
+#define JOE_DEBUG 0
 
 void
 bzero_phys_nc(
@@ -166,6 +164,38 @@ bcopy_phys(
 	mp_enable_preemption();
 }
 
+/*
+ * allow a function to get a quick virtual mapping of a physical page 
+ */
+
+int
+apply_func_phys(
+		addr64_t dst64,
+		vm_size_t bytes,
+		int (*func)(void * buffer, vm_size_t bytes, void * arg),
+		void * arg)
+{
+        mapwindow_t *dst_map;
+	int rc = -1;
+
+	/* ensure we stay within a page */
+	if ( ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) {
+	        panic("apply_func_phys alignment");
+	}
+	mp_disable_preemption();
+
+	dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) |
+						  INTEL_PTE_REF | INTEL_PTE_MOD));
+
+	rc = func((void *)((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)), bytes, arg);
+
+	pmap_put_mapwindow(dst_map);
+
+	mp_enable_preemption();
+
+	return rc;
+}
+
 /* 
  * ovbcopy - like bcopy, but recognizes overlapping ranges and handles 
  *           them correctly.
@@ -324,7 +354,7 @@ ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size)
             break;
         case 4:
         default:
-	    *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data;
+	    *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (uint32_t)data;
             break;
         }
         pmap_put_mapwindow(map);
@@ -455,7 +485,7 @@ int bcmp(
 			break;
 	while (--len);
 
-	return len;
+	return (int)len;
 }
 
 int
@@ -569,7 +599,7 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count)
 
         count += offset;
         offset = (uint32_t)(addr & ((addr64_t) (page_size - 1)));
-        chunk  = page_size - offset;
+        chunk  = (uint32_t)page_size - offset;
 
         do
         {
@@ -581,7 +611,7 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count)
 
             count -= chunk;
             addr  += chunk;
-            chunk  = page_size;
+            chunk  = (uint32_t) page_size;
             offset = 0;
 
 	    if (count) {
@@ -603,11 +633,23 @@ void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count)
     return(dcache_incoherent_io_store64(pa,count));
 }
 
+
 void
-flush_dcache64(__unused addr64_t addr,
-	       __unused unsigned count,
-	       __unused int phys)
+flush_dcache64(addr64_t addr, unsigned count, int phys)
 {
+	if (phys) {
+		dcache_incoherent_io_flush64(addr, count);
+	}
+	else {
+		uint32_t  linesize = cpuid_info()->cache_linesize;
+		addr64_t  bound = (addr + count + linesize - 1) & ~(linesize - 1);
+		__mfence();
+		while (addr < bound) {
+			__clflush((void *) (uintptr_t) addr);
+			addr += linesize;
+		}
+		__mfence();
+	}
 }
 
 void
@@ -661,570 +703,78 @@ cache_flush_page_phys(ppnum_t pa)
 }
 
 
-/*
- * the copy engine has the following characteristics
- *   - copyio handles copies to/from user or kernel space
- *   - copypv deals with physical or virtual addresses
- *
- * implementation details as follows
- *   - a cache of up to NCOPY_WINDOWS is maintained per thread for
- *     access of user virutal space
- *   - the window size is determined by the amount of virtual space
- *     that can be mapped by a single page table
- *   - the mapping is done by copying the page table pointer from
- *     the user's directory entry corresponding to the window's
- *     address in user space to the directory entry corresponding
- *     to the window slot in the kernel's address space
- *   - the set of mappings is preserved across context switches,
- *     so the copy can run with pre-emption enabled
- *   - there is a gdt entry set up to anchor the kernel window on
- *     each processor
- *   - the copies are done using the selector corresponding to the
- *     gdt entry
- *   - the addresses corresponding to the user virtual address are
- *     relative to the beginning of the window being used to map
- *     that region... thus the thread can be pre-empted and switched
- *     to a different processor while in the midst of a copy
- *   - the window caches must be invalidated if the pmap changes out
- *     from under the thread... this can happen during vfork/exec...
- *     inval_copy_windows is the invalidation routine to be used
- *   - the copyio engine has 4 different states associated with it
- *     that allows for lazy tlb flushes and the ability to avoid
- *     a flush all together if we've just come from user space
- *     the 4 states are as follows...
- *
- *	WINDOWS_OPENED - set by copyio to indicate to the context
- *	  switch code that it is necessary to do a tlbflush after
- * 	  switching the windows since we're in the middle of a copy
- *
- *	WINDOWS_CLOSED - set by copyio to indicate that it's done
- *	  using the windows, so that the context switch code need
- *	  not do the tlbflush... instead it will set the state to...
- *
- *	WINDOWS_DIRTY - set by the context switch code to indicate
- *	  to the copy engine that it is responsible for doing a 
- *	  tlbflush before using the windows again... it's also
- *	  set by the inval_copy_windows routine to indicate the
- *	  same responsibility.
- *
- *	WINDOWS_CLEAN - set by the return to user path to indicate
- * 	  that a tlbflush has happened and that there is no need
- *	  for copyio to do another when it is entered next...
- *
- *   - a window for mapping single physical pages is provided for copypv
- *   - this window is maintained across context switches and has the
- *     same characteristics as the user space windows w/r to pre-emption
- */
-
-extern int copyout_user(const char *, vm_offset_t, vm_size_t);
-extern int copyout_kern(const char *, vm_offset_t, vm_size_t);
-extern int copyin_user(const vm_offset_t, char *, vm_size_t);
-extern int copyin_kern(const vm_offset_t, char *, vm_size_t);
-extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t);
-extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t);
-extern int copyinphys_user(const vm_offset_t, char *, vm_size_t);
-extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t);
-extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *);
-extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *);
-
-static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
-static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
-
-
-#define COPYIN		0
-#define COPYOUT		1
-#define COPYINSTR	2
-#define COPYINPHYS	3
-#define COPYOUTPHYS	4
-
-
-void inval_copy_windows(thread_t thread)
+#if !MACH_KDP
+void
+kdp_register_callout(void)
 {
-        int	i;
-	
-	for (i = 0; i < NCOPY_WINDOWS; i++) {
-                thread->machine.copy_window[i].user_base = -1;
-	}
-	thread->machine.nxt_window = 0;
-	thread->machine.copyio_state = WINDOWS_DIRTY;
-
-	KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0);
 }
-
-
-static int
-copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
-       vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
-{
-        thread_t	thread;
-	pmap_t		pmap;
-	pt_entry_t	*updp;
-	pt_entry_t	*kpdp;
-	user_addr_t 	user_base;
-	vm_offset_t 	user_offset;
-	vm_offset_t 	kern_vaddr;
-	vm_size_t	cnt;
-	vm_size_t	bytes_copied;
-	int		error = 0;
-	int		window_index;
-	int		copyio_state;
-        boolean_t	istate;
-#if KDEBUG
-	int		debug_type = 0xeff70010;
-	debug_type += (copy_type << 2);
-#endif
-
-	thread = current_thread();
-
-	KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr,
-		     (int)nbytes, thread->machine.copyio_state, 0);
-
-	if (nbytes == 0) {
-	        KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
-			     (unsigned)kernel_addr, (unsigned)nbytes, 0, 0);
-	        return (0);
-	}
-        pmap = thread->map->pmap;
-
-        if (pmap == kernel_pmap || use_kernel_map) {
-
-	        kern_vaddr = (vm_offset_t)user_addr;
-	  
-	        switch (copy_type) {
-
-		case COPYIN:
-		        error = copyin_kern(kern_vaddr, kernel_addr, nbytes);
-			break;
-
-		case COPYOUT:
-		        error = copyout_kern(kernel_addr, kern_vaddr, nbytes);
-			break;
-
-		case COPYINSTR:
-		        error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied);
-			break;
-
-		case COPYINPHYS:
-		        error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes);
-			break;
-
-		case COPYOUTPHYS:
-		        error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes);
-			break;
-		}
-		KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr,
-			     (unsigned)kernel_addr, (unsigned)nbytes,
-			     error | 0x80000000, 0);
-		return (error);
-	}
-
-#if CONFIG_DTRACE
-	thread->machine.specFlags |= CopyIOActive;
-#endif /* CONFIG_DTRACE */
-
-	if ((nbytes && (user_addr + nbytes <= user_addr)) ||
-	    (user_addr          < vm_map_min(thread->map)) ||
-	    (user_addr + nbytes > vm_map_max(thread->map))) {
-		error = EFAULT;
-		goto done;
-	}
-
-	user_base = user_addr & ~((user_addr_t)(NBPDE - 1));
-	user_offset = (vm_offset_t)(user_addr & (NBPDE - 1));
-
-	KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base,
-		     (int)user_offset, 0, 0);
-
-	cnt = NBPDE - user_offset;
-
-	if (cnt > nbytes)
-	        cnt = nbytes;
-
-	istate = ml_set_interrupts_enabled(FALSE);
-
-	copyio_state = thread->machine.copyio_state;
-	thread->machine.copyio_state = WINDOWS_OPENED;
-
-	(void) ml_set_interrupts_enabled(istate);
-
-
-	for (;;) {
-
-	        for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) {
-		        if (thread->machine.copy_window[window_index].user_base == user_base)
-			        break;
-		}
-	        if (window_index >= NCOPY_WINDOWS) {
-
-		        window_index = thread->machine.nxt_window;
-			thread->machine.nxt_window++;
-
-			if (thread->machine.nxt_window >= NCOPY_WINDOWS)
-			        thread->machine.nxt_window = 0;
-			thread->machine.copy_window[window_index].user_base = user_base;
-
-			/*
-			 * it's necessary to disable pre-emption
-			 * since I have to compute the kernel descriptor pointer
-			 * for the new window
-			 */
-			istate = ml_set_interrupts_enabled(FALSE);
-
-		        updp = pmap_pde(pmap, user_base);
-
-			kpdp = current_cpu_datap()->cpu_copywindow_pdp;
-			kpdp += window_index;
-
-			pmap_store_pte(kpdp, updp ? *updp : 0);
-
-			(void) ml_set_interrupts_enabled(istate);
-
-		        copyio_state = WINDOWS_DIRTY;
-
-			KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index,
-				     (unsigned)user_base, (unsigned)updp,
-				     (unsigned)kpdp, 0);
-
-		}
-#if JOE_DEBUG
-		else {
-			istate = ml_set_interrupts_enabled(FALSE);
-
-		        updp = pmap_pde(pmap, user_base);
-
-			kpdp = current_cpu_datap()->cpu_copywindow_pdp;
-
-			kpdp += window_index;
-
-			if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) {
-				panic("copyio: user pdp mismatch - kpdp = 0x%qx,  updp = 0x%qx\n", *kpdp, *updp);
-			}
-			(void) ml_set_interrupts_enabled(istate);
-		}
-#endif
-		if (copyio_state == WINDOWS_DIRTY) {
-		        flush_tlb();
-
-		        copyio_state = WINDOWS_CLEAN;
-
-			KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0);
-		}
-		user_offset += (window_index * NBPDE);
-
-		KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_offset,
-			     (unsigned)kernel_addr, cnt, 0, 0);
-
-	        switch (copy_type) {
-
-		case COPYIN:
-		        error = copyin_user(user_offset, kernel_addr, cnt);
-			break;
-			
-		case COPYOUT:
-		        error = copyout_user(kernel_addr, user_offset, cnt);
-			break;
-
-		case COPYINPHYS:
-		        error = copyinphys_user(user_offset, kernel_addr, cnt);
-			break;
-			
-		case COPYOUTPHYS:
-		        error = copyoutphys_user(kernel_addr, user_offset, cnt);
-			break;
-
-		case COPYINSTR:
-		        error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied);
-
-			/*
-			 * lencopied should be updated on success
-			 * or ENAMETOOLONG...  but not EFAULT
-			 */
-			if (error != EFAULT)
-			        *lencopied += bytes_copied;
-
-			/*
-			 * if we still have room, then the ENAMETOOLONG
-			 * is just an artifact of the buffer straddling
-			 * a window boundary and we should continue
-			 */
-			if (error == ENAMETOOLONG && nbytes > cnt)
-			        error = 0;
-
-			if (error) {
-#if KDEBUG
-			        nbytes = *lencopied;
-#endif
-			        break;
-			}
-			if (*(kernel_addr + bytes_copied - 1) == 0) {
-			        /*
-				 * we found a NULL terminator... we're done
-				 */
-#if KDEBUG
-			        nbytes = *lencopied;
-#endif
-				goto done;
-			}
-			if (cnt == nbytes) {
-			        /*
-				 * no more room in the buffer and we haven't
-				 * yet come across a NULL terminator
-				 */
-#if KDEBUG
-			        nbytes = *lencopied;
 #endif
-			        error = ENAMETOOLONG;
-				break;
-			}
-			assert(cnt == bytes_copied);
-
-			break;
-		}
-		if (error)
-		        break;
-		if ((nbytes -= cnt) == 0)
-		        break;
-
-		kernel_addr += cnt;
-		user_base += NBPDE;
-		user_offset = 0;
-
-		if (nbytes > NBPDE)
-		        cnt = NBPDE;
-		else
-		        cnt = nbytes;
-	}
-done:
-	thread->machine.copyio_state = WINDOWS_CLOSED;
-
-	KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
-		     (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
 
-#if CONFIG_DTRACE
-	thread->machine.specFlags &= ~CopyIOActive;
-#endif /* CONFIG_DTRACE */
-
-	return (error);
+#if !CONFIG_VMX
+int host_vmxon(boolean_t exclusive __unused)
+{
+	return VMX_UNSUPPORTED;
 }
 
-
-static int
-copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
+void host_vmxoff(void)
 {
-        pmap_paddr_t paddr;
-	user_addr_t vaddr;
-	char        *window_offset;
-	pt_entry_t  pentry;
-	int         ctype;
-	int	    retval;
-	boolean_t   istate;
-
-	if (which & cppvPsnk) {
-		paddr  = (pmap_paddr_t)sink;
-	        vaddr  = (user_addr_t)source;
-		ctype  = COPYINPHYS;
-		pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW);
-	} else {
-	        paddr  = (pmap_paddr_t)source;
-		vaddr  = (user_addr_t)sink;
-		ctype  = COPYOUTPHYS;
-		pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME));
-	}
-	window_offset = (char *)((uint32_t)paddr & (PAGE_SIZE - 1));
-
-	assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0)));
-
-	if (current_thread()->machine.physwindow_busy) {
-	        pt_entry_t	old_pentry;
-
-	        KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0);
-		/*
-		 * we had better be targeting wired memory at this point
-		 * we will not be able to handle a fault with interrupts
-		 * disabled... we disable them because we can't tolerate
-		 * being preempted during this nested use of the window
-		 */
-		istate = ml_set_interrupts_enabled(FALSE);
-
-		old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep);
-		pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
-
-		invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
-
-		retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
-
-		pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry);
-
-		invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
-
-		(void) ml_set_interrupts_enabled(istate);
-	} else {
-	        /*
-		 * mark the window as in use... if an interrupt hits while we're
-		 * busy, or we trigger another coyppv from the fault path into
-		 * the driver on a user address space page fault due to a copyin/out
-		 * then we need to save and restore the current window state instead
-		 * of caching the window preserving it across context switches
-		 */
-	        current_thread()->machine.physwindow_busy = 1;
-
-	        if (current_thread()->machine.physwindow_pte != pentry) {
-		        KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0);
-
-			current_thread()->machine.physwindow_pte = pentry;
-			
-			/*
-			 * preemption at this point would be bad since we
-			 * could end up on the other processor after we grabbed the
-			 * pointer to the current cpu data area, but before we finished
-			 * using it to stuff the page table entry since we would
-			 * be modifying a window that no longer belonged to us
-			 * the invlpg can be done unprotected since it only flushes
-			 * this page address from the tlb... if it flushes the wrong
-			 * one, no harm is done, and the context switch that moved us
-			 * to the other processor will have already take care of 
-			 * flushing the tlb after it reloaded the page table from machine.physwindow_pte
-			 */
-			istate = ml_set_interrupts_enabled(FALSE);
-
-			pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry);
-			(void) ml_set_interrupts_enabled(istate);
-
-			invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);
-		}
-#if JOE_DEBUG
-		else {
-		        if (pentry !=
-			    (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW)))
-			        panic("copyio_phys: pentry != *physwindow_ptep");
-		}
+	return;
+}
 #endif
-		retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap);
 
-	        current_thread()->machine.physwindow_busy = 0;
-	}
-	return (retval);
-}
+#ifdef __LP64__
 
-int
-copyinmsg(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
-{
-        return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
-}    
+#define INT_SIZE	(BYTE_SIZE * sizeof (int))
 
-int
-copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
+/*
+ * Set indicated bit in bit string.
+ */
+void
+setbit(int bitno, int *s)
 {
-        return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0));
+	s[bitno / INT_SIZE] |= 1 << (bitno % INT_SIZE);
 }
 
-int
-copyinstr(const user_addr_t user_addr,  char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
+/*
+ * Clear indicated bit in bit string.
+ */
+void
+clrbit(int bitno, int *s)
 {
-	*lencopied = 0;
-
-        return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0));
+	s[bitno / INT_SIZE] &= ~(1 << (bitno % INT_SIZE));
 }
 
+/*
+ * Test if indicated bit is set in bit string.
+ */
 int
-copyoutmsg(const char *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
+testbit(int bitno, int *s)
 {
-	return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
+	return s[bitno / INT_SIZE] & (1 << (bitno % INT_SIZE));
 }
 
+/*
+ * Find first bit set in bit string.
+ */
 int
-copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
-{
-	return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0));
-}
-
-
-kern_return_t
-copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
+ffsbit(int *s)
 {
-	unsigned int lop, csize;
-	int bothphys = 0;
-	
-	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
-		     (unsigned)snk64, size, which, 0);
-
-	if ((which & (cppvPsrc | cppvPsnk)) == 0 )				/* Make sure that only one is virtual */
-		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */
-
-	if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
-	        bothphys = 1;							/* both are physical */
-
-	while (size) {
-	  
-	        if (bothphys) {
-		        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));		/* Assume sink smallest */
-
-			if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
-			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));	/* No, source is smaller */
-		} else {
-		        /*
-			 * only need to compute the resid for the physical page
-			 * address... we don't care about where we start/finish in
-			 * the virtual since we just call the normal copyin/copyout
-			 */
-		        if (which & cppvPsrc)
-			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
-			else
-			        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
-		}
-		csize = size;						/* Assume we can copy it all */
-		if (lop < size)
-		        csize = lop;					/* Nope, we can't do it all */
-#if 0		
-		/*
-		 * flush_dcache64 is currently a nop on the i386... 
-		 * it's used when copying to non-system memory such
-		 * as video capture cards... on PPC there was a need
-		 * to flush due to how we mapped this memory... not
-		 * sure if it's needed on i386.
-		 */
-		if (which & cppvFsrc)
-		        flush_dcache64(src64, csize, 1);		/* If requested, flush source before move */
-		if (which & cppvFsnk)
-		        flush_dcache64(snk64, csize, 1);		/* If requested, flush sink before move */
-#endif
-		if (bothphys)
-		        bcopy_phys(src64, snk64, csize);		/* Do a physical copy, virtually */
-		else {
-		        if (copyio_phys(src64, snk64, csize, which))
-			        return (KERN_FAILURE);
-		}
-#if 0
-		if (which & cppvFsrc)
-		        flush_dcache64(src64, csize, 1);	/* If requested, flush source after move */
-		if (which & cppvFsnk)
-		        flush_dcache64(snk64, csize, 1);	/* If requested, flush sink after move */
-#endif
-		size   -= csize;					/* Calculate what is left */
-		snk64 += csize;					/* Bump sink to next physical address */
-		src64 += csize;					/* Bump source to next physical address */
-	}
-	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
-		     (unsigned)snk64, size, which, 0);
+	int             offset;
 
-	return KERN_SUCCESS;
-}
-
-#if !MACH_KDP
-void
-kdp_register_callout(void)
-{
+	for (offset = 0; !*s; offset += (int)INT_SIZE, ++s);
+	return offset + __builtin_ctz(*s);
 }
-#endif
 
-#if !CONFIG_VMX
-int host_vmxon(boolean_t exclusive __unused)
+int
+ffs(unsigned int mask)
 {
-	return VMX_UNSUPPORTED;
-}
+	if (mask == 0)
+		return 0;
 
-void host_vmxoff(void)
-{
-	return;
+	/*
+	 * NOTE: cannot use __builtin_ffs because it generates a call to
+	 * 'ffs'
+	 */
+	return 1 + __builtin_ctz(mask);
 }
 #endif
diff --git a/osfmk/i386/machine_check.c b/osfmk/i386/machine_check.c
index 6eaff9d8f..77681d340 100644
--- a/osfmk/i386/machine_check.c
+++ b/osfmk/i386/machine_check.c
@@ -45,6 +45,7 @@ static uint32_t		mca_family = 0;
 static unsigned int	mca_error_bank_count = 0;
 static boolean_t	mca_control_MSR_present = FALSE;
 static boolean_t	mca_threshold_status_present = FALSE;
+static boolean_t	mca_sw_error_recovery_present = FALSE;
 static boolean_t	mca_extended_MSRs_present = FALSE;
 static unsigned int	mca_extended_MSRs_count = 0;
 static boolean_t	mca_cmci_present = FALSE;
@@ -89,6 +90,7 @@ mca_get_availability(void)
 		mca_error_bank_count = ia32_mcg_cap.bits.count;
 		mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p;
 		mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p;
+		mca_sw_error_recovery_present = ia32_mcg_cap.bits.mcg_ser_p;
 		mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p;
 		if (family == 0x0F) {
 			mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p;
@@ -269,25 +271,22 @@ static void mca_dump_32bit_state(void)
 static void
 mca_report_cpu_info(void)
 {
-	uint64_t	microcode;
 	i386_cpu_info_t *infop = cpuid_info();
 
-	// microcode revision is top 32 bits of MSR_IA32_UCODE_REV
-	microcode = rdmsr64(MSR_IA32_UCODE_REV) >> 32;
 	kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n",
 		infop->cpuid_family,
 		infop->cpuid_model,
 		infop->cpuid_stepping,
-		(uint32_t) microcode);
+		infop->cpuid_microcode_version);
 	kdb_printf(" %s\n", infop->cpuid_brand_string);
 }
 
 static const char *mc8_memory_operation[] = {
-	[MC8_MMM_GENERIC]		"generic",
-	[MC8_MMM_READ]			"read",
-	[MC8_MMM_WRITE]			"write",
-	[MC8_MMM_ADDRESS_COMMAND]	"address/command",
-	[MC8_MMM_RESERVED]		"reserved"
+	[MC8_MMM_GENERIC] =		"generic",
+	[MC8_MMM_READ] =		"read",
+	[MC8_MMM_WRITE] =		"write",
+	[MC8_MMM_ADDRESS_COMMAND] =	"address/command",
+	[MC8_MMM_RESERVED] =		"reserved"
 };
 
 static void
@@ -312,19 +311,20 @@ mca_dump_bank_mc8(mca_state_t *state, int i)
 	kdb_printf(
 		"  Channel number:         %d%s\n"
 		"  Memory Operation:       %s\n"
-		"  Machine-specific error: %s%s%s%s%s%s%s%s\n"
+		"  Machine-specific error: %s%s%s%s%s%s%s%s%s\n"
 		"  COR_ERR_CNT:            %d\n",
 		mc8.channel_number,
 		IF(mc8.channel_number == 15, " (unknown)"),
 		mc8_memory_operation[mmm],
-		IF(mc8.read_ecc,            "Read ECC"),
-		IF(mc8.ecc_on_a_scrub,      "ECC on scrub"),
-		IF(mc8.write_parity,        "Write parity"),
-		IF(mc8.redundant_memory,    "Redundant memory"),
-		IF(mc8.sparing,	            "Sparing/Resilvering"),
-		IF(mc8.access_out_of_range, "Access out of Range"),
-		IF(mc8.address_parity,      "Address Parity"),
-		IF(mc8.byte_enable_parity,  "Byte Enable Parity"),
+		IF(mc8.read_ecc,            "Read ECC "),
+		IF(mc8.ecc_on_a_scrub,      "ECC on scrub "),
+		IF(mc8.write_parity,        "Write parity "),
+		IF(mc8.redundant_memory,    "Redundant memory "),
+		IF(mc8.sparing,	            "Sparing/Resilvering "),
+		IF(mc8.access_out_of_range, "Access out of Range "),
+		IF(mc8.rtid_out_of_range,   "RTID out of Range "),
+		IF(mc8.address_parity,      "Address Parity "),
+		IF(mc8.byte_enable_parity,  "Byte Enable Parity "),
 		mc8.cor_err_cnt);
 	kdb_printf(
 		"  Status bits:\n%s%s%s%s%s%s",
@@ -344,10 +344,12 @@ mca_dump_bank_mc8(mca_state_t *state, int i)
 		mc8_misc.u64 = bank->mca_mci_misc;
 		kdb_printf(
 			" IA32_MC%d_MISC(0x%x): 0x%016qx\n"
+			"  RTID:     %d\n"
 			"  DIMM:     %d\n"
 			"  Channel:  %d\n"
 			"  Syndrome: 0x%x\n",
 			i, IA32_MCi_MISC(i), mc8_misc.u64,
+			mc8_misc.bits.rtid,
 			mc8_misc.bits.dimm,
 			mc8_misc.bits.channel,
 			(int) mc8_misc.bits.syndrome);
@@ -355,10 +357,10 @@ mca_dump_bank_mc8(mca_state_t *state, int i)
 }
 
 static const char *mca_threshold_status[] = {
-	[THRESHOLD_STATUS_NO_TRACKING]	"No tracking",
-	[THRESHOLD_STATUS_GREEN]	"Green",
-	[THRESHOLD_STATUS_YELLOW]	"Yellow",
-	[THRESHOLD_STATUS_RESERVED]	"Reserved"
+	[THRESHOLD_STATUS_NO_TRACKING] =	"No tracking",
+	[THRESHOLD_STATUS_GREEN] =	"Green",
+	[THRESHOLD_STATUS_YELLOW] =	"Yellow",
+	[THRESHOLD_STATUS_RESERVED] =	"Reserved"
 };
 
 static void
@@ -395,6 +397,13 @@ mca_dump_bank(mca_state_t *state, int i)
 			    mca_threshold_status[threshold] :
 			    "Undefined");
 	}
+	if (mca_threshold_status_present &&
+	    mca_sw_error_recovery_present) {
+		kdb_printf(
+			"  Software Error Recovery:\n%s%s",
+			IF(status.bits_tes_p.ar, "   Recovery action reqd\n"),
+			IF(status.bits_tes_p.s,  "   Signaling UCR error\n"));
+	}
 	kdb_printf(
 		"  Status bits:\n%s%s%s%s%s%s",
 		IF(status.bits.pcc,   "   Processor context corrupt\n"),
diff --git a/osfmk/i386/machine_check.h b/osfmk/i386/machine_check.h
index e940fa8c0..70c75c826 100644
--- a/osfmk/i386/machine_check.h
+++ b/osfmk/i386/machine_check.h
@@ -58,6 +58,7 @@ typedef union {
 	uint64_t	mcg_ecms		:BIT1(12);
 	uint64_t	mcg_reserved2		:BITS(15,13);
 	uint64_t	mcg_ext_cnt		:BITS(23,16);
+	uint64_t	mcg_ser_p		:BIT1(24);
      }		bits;
      uint64_t	u64;
 } ia32_mcg_cap_t;
@@ -128,11 +129,13 @@ typedef union {
 	uint64_t	over			:BIT1(62);
 	uint64_t	val			:BIT1(63);
     }		bits;
-     struct {		/* Variant if threshold-based error status present: */
+    struct {		/* Variant if threshold-based error status present: */
 	uint64_t	mca_error		:BITS(15,0);
 	uint64_t	model_specific_error	:BITS(31,16);
 	uint64_t	other_information	:BITS(52,32);
 	uint64_t	threshold		:BITS(54,53);
+	uint64_t	ar			:BIT1(55);
+	uint64_t	s			:BIT1(56);
 	uint64_t	pcc			:BIT1(57);
 	uint64_t	addrv			:BIT1(58);
 	uint64_t	miscv			:BIT1(59);
@@ -151,6 +154,7 @@ typedef union {
 	uint64_t	redundant_memory	:BIT1(19);
 	uint64_t	sparing			:BIT1(20);
 	uint64_t	access_out_of_range	:BIT1(21);
+	uint64_t	rtid_out_of_range	:BIT1(22);
 	uint64_t	address_parity		:BIT1(23);
 	uint64_t	byte_enable_parity	:BIT1(24);
 	uint64_t	reserved		:BITS(37,25);
@@ -173,7 +177,8 @@ typedef union {
 #define	MC8_MMM_RESERVED		4
 typedef union {
     struct {
-	uint64_t	reserved1		:BITS(15,0);
+	uint64_t	rtid			:BITS(7,0);
+	uint64_t	reserved1		:BITS(15,8);
 	uint64_t	dimm			:BITS(17,16);
 	uint64_t	channel			:BITS(19,18);
 	uint64_t	reserved2		:BITS(31,20);
diff --git a/osfmk/i386/machine_cpu.h b/osfmk/i386/machine_cpu.h
index 2460bf606..82532f088 100644
--- a/osfmk/i386/machine_cpu.h
+++ b/osfmk/i386/machine_cpu.h
@@ -41,6 +41,7 @@ void	cpu_machine_init(
 void	handle_pending_TLB_flushes(
 	void);
 
+int cpu_signal_handler(x86_saved_state_t *regs);
 
 kern_return_t cpu_register(
         int *slot_nump);
diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c
index 4525c8a31..b7d3f559a 100644
--- a/osfmk/i386/machine_routines.c
+++ b/osfmk/i386/machine_routines.c
@@ -38,16 +38,16 @@
 #include <kern/thread.h>
 #include <i386/machine_cpu.h>
 #include <i386/lapic.h>
+#include <i386/lock.h>
 #include <i386/mp_events.h>
 #include <i386/pmCPU.h>
+#include <i386/trap.h>
 #include <i386/tsc.h>
 #include <i386/cpu_threads.h>
 #include <i386/proc_reg.h>
 #include <mach/vm_param.h>
 #include <i386/pmap.h>
 #include <i386/misc_protos.h>
-#include <i386/mp.h>
-
 #if MACH_KDB
 #include <machine/db_machdep.h>
 #include <ddb/db_aout.h>
@@ -65,7 +65,6 @@
 #define DBG(x...)
 #endif
 
-
 extern void 	wakeup(void *);
 
 static int max_cpus_initialized = 0;
@@ -75,6 +74,10 @@ unsigned int	LockTimeOutTSC;
 unsigned int	MutexSpin;
 uint64_t	LastDebuggerEntryAllowance;
 
+extern uint64_t panic_restart_timeout;
+
+boolean_t virtualized = FALSE;
+
 #define MAX_CPUS_SET    0x1
 #define MAX_CPUS_WAIT   0x2
 
@@ -131,7 +134,6 @@ ml_static_mfree(
 
 	assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */
 
-
 	for (vaddr_cur = vaddr;
 	     vaddr_cur < round_page_64(vaddr+size);
 	     vaddr_cur += PAGE_SIZE) {
@@ -207,7 +209,6 @@ void ml_init_interrupt(void)
 }
 
 
-
 /* Get Interrupts Enabled */
 boolean_t ml_get_interrupts_enabled(void)
 {
@@ -220,27 +221,25 @@ boolean_t ml_get_interrupts_enabled(void)
 /* Set Interrupts Enabled */
 boolean_t ml_set_interrupts_enabled(boolean_t enable)
 {
-  unsigned long flags;
-
-  __asm__ volatile("pushf; pop	%0" :  "=r" (flags));
+	unsigned long flags;
+	boolean_t istate;
+	
+	__asm__ volatile("pushf; pop	%0" :  "=r" (flags));
 
-  if (enable) {
-	ast_t		*myast;
+	istate = ((flags & EFL_IF) != 0);
 
-	myast = ast_pending();
+	if (enable) {
+		__asm__ volatile("sti;nop");
 
-	if ( (get_preemption_level() == 0) &&  (*myast & AST_URGENT) ) {
-	__asm__ volatile("sti");
-          __asm__ volatile ("int $0xff");
-        } else {
-	  __asm__ volatile ("sti");
+		if ((get_preemption_level() == 0) && (*ast_pending() & AST_URGENT))
+			__asm__ volatile ("int $0xff");
+	}
+	else {
+		if (istate)
+			__asm__ volatile("cli");
 	}
-  }
-  else {
-	__asm__ volatile("cli");
-  }
 
-  return (flags & EFL_IF) != 0;
+	return istate;
 }
 
 /* Check if running at interrupt context */
@@ -435,7 +434,10 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop)
 	 * As distinct from whether the cpu has these capabilities.
 	 */
 	os_supports_sse = !!(get_cr4() & CR4_OSXMM);
-	if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
+
+	if (ml_fpu_avx_enabled())
+		cpu_infop->vector_unit = 9;
+	else if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse)
 		cpu_infop->vector_unit = 8;
 	else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse)
 		cpu_infop->vector_unit = 7;
@@ -525,7 +527,8 @@ ml_init_lock_timeout(void)
 	uint32_t	mtxspin;
 	uint64_t	default_timeout_ns = NSEC_PER_SEC>>2;
 	uint32_t	slto;
-	
+	uint32_t	prt;
+
 	if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto)))
 		default_timeout_ns = slto * NSEC_PER_USEC;
 
@@ -544,6 +547,9 @@ ml_init_lock_timeout(void)
 	MutexSpin = (unsigned int)abstime;
 
 	nanoseconds_to_absolutetime(4ULL * NSEC_PER_SEC, &LastDebuggerEntryAllowance);
+	if (PE_parse_boot_argn("panic_restart_timeout", &prt, sizeof (prt)))
+		nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout);
+	virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0);
 	interrupt_latency_tracker_setup();
 }
 
@@ -649,8 +655,33 @@ vm_offset_t ml_stack_remaining(void)
 	}
 }
 
+void
+kernel_preempt_check(void)
+{
+	boolean_t	intr;
+	unsigned long flags;
+
+	assert(get_preemption_level() == 0);
+
+	__asm__ volatile("pushf; pop	%0" :  "=r" (flags));
+
+	intr = ((flags & EFL_IF) != 0);
+
+	if ((*ast_pending() & AST_URGENT) && intr == TRUE) {
+		/*
+		 * can handle interrupts and preemptions 
+		 * at this point
+		 */
+
+		/*
+		 * now cause the PRE-EMPTION trap
+		 */
+		__asm__ volatile ("int %0" :: "N" (T_PREEMPT));
+	}
+}
+
 boolean_t machine_timeout_suspended(void) {
-	return (mp_recent_debugger_activity() || panic_active() || pmap_tlb_flush_timeout || spinlock_timed_out);
+	return (virtualized || pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity());
 }
 
 #if MACH_KDB
diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h
index e222fb18d..42f77f6c4 100644
--- a/osfmk/i386/machine_routines.h
+++ b/osfmk/i386/machine_routines.h
@@ -67,7 +67,6 @@ void	ml_cpu_set_ldt(int);
 /* Initialize Interrupts */
 void    ml_init_interrupt(void);
 
-
 /* Generate a fake interrupt */
 void ml_cause_interrupt(void);
 
@@ -132,11 +131,9 @@ void	ml_get_bouncepool_info(
 boolean_t machine_timeout_suspended(void);
 #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE  */
 
+/* Warm up a CPU to receive an interrupt */
+kern_return_t ml_interrupt_prewarm(uint64_t deadline);
 
-void interrupt_latency_tracker_setup(void);
-void interrupt_reset_latency_stats(void);
-void interrupt_populate_latency_stats(char *, unsigned);
-boolean_t ml_fpu_avx_enabled(void);
 #endif /* XNU_KERNEL_PRIVATE */
 
 #ifdef KERNEL_PRIVATE
@@ -235,14 +232,14 @@ void ml_phys_write_double_64(
 
 /* Struct for ml_cpu_get_info */
 struct ml_cpu_info {
-	unsigned long		vector_unit;
-	unsigned long		cache_line_size;
-	unsigned long		l1_icache_size;
-	unsigned long		l1_dcache_size;
-	unsigned long		l2_settings;
-	unsigned long		l2_cache_size;
-	unsigned long		l3_settings;
-	unsigned long		l3_cache_size;
+	uint32_t	vector_unit;
+	uint32_t	cache_line_size;
+	uint32_t	l1_icache_size;
+	uint32_t	l1_dcache_size;
+	uint32_t	l2_settings;
+	uint32_t	l2_cache_size;
+	uint32_t	l3_settings;
+	uint32_t	l3_cache_size;
 };
 
 typedef struct ml_cpu_info ml_cpu_info_t;
@@ -273,6 +270,7 @@ extern void ml_set_maxbusdelay(uint32_t mdelay);
 extern uint32_t ml_get_maxbusdelay(void);
 extern void ml_set_maxintdelay(uint64_t mdelay);
 extern uint64_t ml_get_maxintdelay(void);
+extern boolean_t ml_get_interrupt_prewake_applicable(void);
 
 
 extern uint64_t tmrCvt(uint64_t time, uint64_t conversion);
@@ -304,6 +302,11 @@ void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/);
 
 __END_DECLS
 
+#ifdef	XNU_KERNEL_PRIVATE
+boolean_t ml_fpu_avx_enabled(void);
+void interrupt_latency_tracker_setup(void);
+void interrupt_reset_latency_stats(void);
+void interrupt_populate_latency_stats(char *, unsigned);
 
-
+#endif /* XNU_KERNEL_PRIVATE */
 #endif /* _I386_MACHINE_ROUTINES_H_ */
diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s
index ae2e8aaf1..0e3d9fb68 100644
--- a/osfmk/i386/machine_routines_asm.s
+++ b/osfmk/i386/machine_routines_asm.s
@@ -27,12 +27,11 @@
  */
  
 #include <i386/asm.h>
-#include <i386/rtclock.h>
-#include <i386/proc_reg.h>
+#include <i386/apic.h>
 #include <i386/eflags.h>
-       
+#include <i386/rtclock_asm.h>
 #include <i386/postcode.h>
-#include <i386/apic.h>
+#include <i386/proc_reg.h>
 #include <assym.s>
 
 /*
@@ -136,51 +135,6 @@ LEXT(tmrCvt)
 			ret						// Leave...
 
 
-/* void             _rtc_nanotime_store(uint64_t                tsc,
-	                                uint64_t                nsec,
-	                                uint32_t                scale,
-	                                uint32_t                shift,
-	                                rtc_nanotime_t  *dst) ;
-*/
-			.globl	EXT(_rtc_nanotime_store)
-			.align	FALIGN
-
-LEXT(_rtc_nanotime_store)
-		push		%ebp
-		movl		%esp,%ebp
-		push		%esi
-
-		mov		32(%ebp),%edx				/* get ptr to rtc_nanotime_info */
-		
-		movl		RNT_GENERATION(%edx),%esi		/* get current generation */
-		movl		$0,RNT_GENERATION(%edx)			/* flag data as being updated */
-
-		mov		8(%ebp),%eax
-		mov		%eax,RNT_TSC_BASE(%edx)
-		mov		12(%ebp),%eax
-		mov		%eax,RNT_TSC_BASE+4(%edx)
-
-		mov		24(%ebp),%eax
-		mov		%eax,RNT_SCALE(%edx)
-
-		mov		28(%ebp),%eax
-		mov		%eax,RNT_SHIFT(%edx)
-
-		mov		16(%ebp),%eax
-		mov		%eax,RNT_NS_BASE(%edx)
-		mov		20(%ebp),%eax
-		mov		%eax,RNT_NS_BASE+4(%edx)
-		
-		incl		%esi					/* next generation */
-		jnz		1f
-		incl		%esi					/* skip 0, which is a flag */
-1:		movl		%esi,RNT_GENERATION(%edx)		/* update generation and make usable */
-
-		pop		%esi
-		pop		%ebp
-		ret
-
-
 /* void  _rtc_nanotime_adjust(	
 		uint64_t         tsc_base_delta,
 	        rtc_nanotime_t  *dst);
@@ -252,7 +206,7 @@ LEXT(_rtc_nanotime_read)
 		jnz		Lslow
 		
 		/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
-		RTC_NANOTIME_READ_FAST()
+		PAL_RTC_NANOTIME_READ_FAST()
 
 		popl		%ebx
 		popl		%edi
@@ -316,3 +270,42 @@ Lslow:
 		pop		%ebp
 		ret							/* result in edx:eax */
 
+
+
+/*
+ * Timing routines.
+ */
+Entry(timer_update)
+	movl	4(%esp),%ecx
+	movl	8(%esp),%eax
+	movl	12(%esp),%edx
+	movl	%eax,TIMER_HIGHCHK(%ecx)
+	movl	%edx,TIMER_LOW(%ecx)
+	movl	%eax,TIMER_HIGH(%ecx)
+	ret
+
+Entry(timer_grab)
+	movl	4(%esp),%ecx
+0:	movl	TIMER_HIGH(%ecx),%edx
+	movl	TIMER_LOW(%ecx),%eax
+	cmpl	TIMER_HIGHCHK(%ecx),%edx
+	jne	0b
+	ret
+
+
+Entry(call_continuation)
+	movl	S_ARG0,%eax			/* get continuation */
+	movl	S_ARG1,%edx			/* continuation param */
+	movl	S_ARG2,%ecx			/* wait result */
+	movl	%gs:CPU_KERNEL_STACK,%esp	/* pop the stack */
+	xorl	%ebp,%ebp			/* zero frame pointer */
+	subl	$8,%esp				/* align the stack */
+	pushl	%ecx
+	pushl	%edx
+	call	*%eax				/* call continuation */
+	addl	$16,%esp
+	movl	%gs:CPU_ACTIVE_THREAD,%eax
+	pushl	%eax
+	call	EXT(thread_terminate)
+
+
diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h
index 71e707c07..724490e22 100644
--- a/osfmk/i386/misc_protos.h
+++ b/osfmk/i386/misc_protos.h
@@ -105,6 +105,9 @@ extern unsigned int	mul_scale(
 /* Move arbitrarily-aligned data from one physical address to another */
 extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes);
 
+/* allow a function to get a quick virtual mapping of a physical page */
+extern int apply_func_phys(addr64_t src64, vm_size_t bytes, int (*func)(void * buffer, vm_size_t bytes, void * arg), void * arg);
+
 extern void ml_copy_phys(addr64_t, addr64_t, vm_size_t);
 
 /* Flush all cachelines for a page. */
@@ -161,6 +164,8 @@ copy_debug_state32(x86_debug_state32_t *src, x86_debug_state32_t *target, boolea
 void 
 copy_debug_state64(x86_debug_state64_t *src, x86_debug_state64_t *target, boolean_t all);
 
+extern void act_machine_switch_pcb(thread_t old, thread_t new);
+
 /* Fast-restart parameters */
 #define FULL_SLAVE_INIT	(NULL)
 #define FAST_SLAVE_INIT	((void *)(uintptr_t)1)
diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c
index 021f0638f..e90a298f9 100644
--- a/osfmk/i386/mp.c
+++ b/osfmk/i386/mp.c
@@ -1,5 +1,4 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -49,6 +48,8 @@
 #include <kern/pms.h>
 #include <kern/misc_protos.h>
 #include <kern/etimer.h>
+#include <kern/kalloc.h>
+#include <kern/queue.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -65,11 +66,9 @@
 #include <i386/mp.h>
 #include <i386/mp_events.h>
 #include <i386/lapic.h>
-#include <i386/ipl.h>
 #include <i386/cpuid.h>
 #include <i386/fpu.h>
 #include <i386/machine_cpu.h>
-#include <i386/mtrr.h>
 #include <i386/pmCPU.h>
 #if CONFIG_MCA
 #include <i386/machine_check.h>
@@ -99,10 +98,17 @@
 #define PAUSE
 #endif	/* MP_DEBUG */
 
+/* Debugging/test trace events: */
+#define	TRACE_MP_TLB_FLUSH		MACHDBG_CODE(DBG_MACH_MP, 0)
+#define	TRACE_MP_CPUS_CALL		MACHDBG_CODE(DBG_MACH_MP, 1)
+#define	TRACE_MP_CPUS_CALL_LOCAL	MACHDBG_CODE(DBG_MACH_MP, 2)
+#define	TRACE_MP_CPUS_CALL_ACTION	MACHDBG_CODE(DBG_MACH_MP, 3)
+#define	TRACE_MP_CPUS_CALL_NOBUF	MACHDBG_CODE(DBG_MACH_MP, 4)
 
 #define ABS(v)		(((v) > 0)?(v):-(v))
 
 void 		slave_boot_init(void);
+void		i386_cpu_IPI(int cpu);
 
 #if MACH_KDB
 static void	mp_kdb_wait(void);
@@ -115,7 +121,6 @@ static void	mp_rendezvous_action(void);
 static void 	mp_broadcast_action(void);
 
 static boolean_t	cpu_signal_pending(int cpu, mp_event_t event);
-static int		cpu_signal_handler(x86_saved_state_t *regs);
 static int		NMIInterruptHandler(x86_saved_state_t *regs);
 
 boolean_t 		smp_initialized = FALSE;
@@ -165,11 +170,18 @@ lck_mtx_ext_t	mp_bc_lock_ext;
 static	volatile int 	debugger_cpu = -1;
 volatile long NMIPI_acks = 0;
 
+static void	mp_cpus_call_init(void); 
+static void	mp_cpus_call_cpu_init(void); 
 static void	mp_cpus_call_action(void); 
 static void	mp_call_PM(void);
 
 char		mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
 
+/* PAL-related routines */
+boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, 
+		int ipi_vector, i386_intr_func_t ipi_handler);
+void i386_start_cpu(int lapic_id, int cpu_num);
+void i386_send_NMI(int cpu);
 
 #if GPROF
 /*
@@ -193,7 +205,22 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
 static lck_grp_t 	smp_lck_grp;
 static lck_grp_attr_t	smp_lck_grp_attr;
 
-extern void	slave_pstart(void);
+#define NUM_CPU_WARM_CALLS	20
+struct timer_call	cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
+queue_head_t 		cpu_warm_call_list;
+decl_simple_lock_data(static, cpu_warm_lock);
+
+typedef struct cpu_warm_data {
+	timer_call_t 	cwd_call;
+	uint64_t	cwd_deadline;
+	int		cwd_result;
+} *cpu_warm_data_t;
+
+static void		cpu_prewarm_init(void);
+static void 		cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
+static void 		_cpu_warm_setup(void *arg);
+static timer_call_t 	grab_warm_timer_call(void);
+static void		free_warm_timer_call(timer_call_t call);
 
 void
 smp_init(void)
@@ -206,27 +233,25 @@ smp_init(void)
 	lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
 	console_init();
 
-	/* Local APIC? */
-	if (!lapic_probe())
+	if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler, 
+				LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
 		return;
 
-	lapic_init();
-	lapic_configure();
-	lapic_set_intr_func(LAPIC_NMI_INTERRUPT,  NMIInterruptHandler);
-	lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler);
-
 	cpu_thread_init();
 
 	GPROF_INIT();
 	DBGLOG_CPU_INIT(master_cpu);
 
-	install_real_mode_bootstrap(slave_pstart);
+	mp_cpus_call_init();
+	mp_cpus_call_cpu_init();
 
 	if (PE_parse_boot_argn("TSC_sync_margin",
 				&TSC_sync_margin, sizeof(TSC_sync_margin)))
 		kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
 	smp_initialized = TRUE;
 
+	cpu_prewarm_init();
+
 	return;
 }
 
@@ -285,6 +310,7 @@ intel_startCPU_fast(int slot_num)
 	 * longer than a full restart would require so it should be more
 	 * than long enough.
 	 */
+
 	mp_wait_for_cpu_up(slot_num, 30000, 1);
 	mp_enable_preemption();
 
@@ -328,12 +354,7 @@ start_cpu(void *arg)
 	if (cpu_number() != psip->starter_cpu)
 		return;
 
-	LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT);
-	LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
-	delay(100);
-
-	LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT);
-	LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
+	i386_start_cpu(psip->target_lapic, psip->target_cpu);
 
 #ifdef	POSTCODE_DELAY
 	/* Wait much longer if postcodes are displayed for a delay period. */
@@ -391,7 +412,7 @@ intel_startCPU(
 	DBGLOG_CPU_INIT(slot_num);
 
 	DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
-	DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD);
+	DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
 
 	/*
 	 * Initialize (or re-initialize) the descriptor tables for this cpu.
@@ -459,7 +480,7 @@ cpu_signal_handler(x86_saved_state_t *regs)
 	int		i=100;
 #endif	/* MACH_KDB && MACH_ASSERT */
 
-	mp_disable_preemption();
+	SCHED_STATS_IPI(current_processor());
 
 	my_cpu = cpu_number();
 	my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
@@ -467,6 +488,7 @@ cpu_signal_handler(x86_saved_state_t *regs)
 	 * signals could arrive while these are being processed
 	 * so it's no more than a hint.
 	 */
+
 	cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
 
 	do {
@@ -530,8 +552,6 @@ cpu_signal_handler(x86_saved_state_t *regs)
 		}
 	} while (*my_word);
 
-	mp_enable_preemption();
-
 	return 0;
 }
 
@@ -540,6 +560,13 @@ NMIInterruptHandler(x86_saved_state_t *regs)
 {
 	void 	*stackptr;
 
+	if (panic_active() && !panicDebugging) {
+		if (pmsafe_debug)
+			pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
+		for(;;)
+			cpu_pause();
+	}
+
 	atomic_incl(&NMIPI_acks, 1);
 	sync_iss_to_iks_unconditionally(regs);
 #if defined (__i386__)
@@ -555,11 +582,10 @@ NMIInterruptHandler(x86_saved_state_t *regs)
 		char pstr[160];
 		snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
 		panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
-		
 	} else if (pmap_tlb_flush_timeout == TRUE) {
 		char pstr[128];
-		snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor, TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
-		panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
+		snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
+		panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
 	}
 
 #if MACH_KDP
@@ -574,51 +600,6 @@ NMExit:
 	return 1;
 }
 
-#ifdef	MP_DEBUG
-int	max_lock_loops = 100000000;
-int		trappedalready = 0;	/* (BRINGUP) */
-#endif	/* MP_DEBUG */
-
-static void
-i386_cpu_IPI(int cpu)
-{
-	boolean_t	state;
-	
-#ifdef	MP_DEBUG
-	if(cpu_datap(cpu)->cpu_signals & 6) {	/* (BRINGUP) */
-		kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu);
-	}
-#endif	/* MP_DEBUG */
-
-#if MACH_KDB
-#ifdef	MP_DEBUG
-	if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) {	/* (BRINGUP) */
-		if(kdb_cpu != cpu_number()) {
-			trappedalready = 1;
-			panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", 
-				cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu);
-		}
-	}
-#endif	/* MP_DEBUG */
-#endif
-
-	/* Wait for previous interrupt to be delivered... */
-#ifdef	MP_DEBUG
-	int     pending_busy_count = 0;
-	while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
-		if (++pending_busy_count > max_lock_loops)
-			panic("i386_cpu_IPI() deadlock\n");
-#else
-	while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
-#endif	/* MP_DEBUG */
-		cpu_pause();
-	}
-
-	state = ml_set_interrupts_enabled(FALSE);
-	LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
-	LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED);
-	(void) ml_set_interrupts_enabled(state);
-}
 
 /*
  * cpu_interrupt is really just to be used by the scheduler to
@@ -628,10 +609,15 @@ i386_cpu_IPI(int cpu)
 void
 cpu_interrupt(int cpu)
 {
+	boolean_t did_IPI = FALSE;
+
 	if (smp_initialized
 	    && pmCPUExitIdle(cpu_datap(cpu))) {
 		i386_cpu_IPI(cpu);
+		did_IPI = TRUE;
 	}
+
+	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
 }
 
 /*
@@ -640,17 +626,8 @@ cpu_interrupt(int cpu)
 void
 cpu_NMI_interrupt(int cpu)
 {
-	boolean_t	state;
-
 	if (smp_initialized) {
-		state = ml_set_interrupts_enabled(FALSE);
-/* Program the interrupt command register */
-		LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
-/* The vector is ignored in this case--the target CPU will enter on the
- * NMI vector.
- */
-		LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
-		(void) ml_set_interrupts_enabled(state);
+		i386_send_NMI(cpu);
 	}
 }
 
@@ -695,7 +672,7 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
 		return;
 
 	if (event == MP_TLB_FLUSH)
-	        KERNEL_DEBUG(0xef800020 | DBG_FUNC_START, cpu, 0, 0, 0, 0);
+	        KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
 
 	DBGLOG(cpu_signal, cpu, event);
 	
@@ -714,7 +691,7 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
 		}
 	}
 	if (event == MP_TLB_FLUSH)
-	        KERNEL_DEBUG(0xef800020 | DBG_FUNC_END, cpu, 0, 0, 0, 0);
+	        KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
 }
 
 /*
@@ -780,7 +757,6 @@ mp_rendezvous_action(void)
 
 	intrs_enabled = ml_get_interrupts_enabled();
 
-
 	/* spin on entry rendezvous */
 	atomic_incl(&mp_rv_entry, 1);
 	while (mp_rv_entry < mp_rv_ncpus) {
@@ -789,9 +765,11 @@ mp_rendezvous_action(void)
 			handle_pending_TLB_flushes();
 		cpu_pause();
 	}
+
 	/* action function */
 	if (mp_rv_action_func != NULL)
 		mp_rv_action_func(mp_rv_func_arg);
+
 	/* spin on exit rendezvous */
 	atomic_incl(&mp_rv_exit, 1);
 	while (mp_rv_exit < mp_rv_ncpus) {
@@ -799,6 +777,7 @@ mp_rendezvous_action(void)
 			handle_pending_TLB_flushes();
 		cpu_pause();
 	}
+
 	/* teardown function */
 	if (mp_rv_teardown_func != NULL)
 		mp_rv_teardown_func(mp_rv_func_arg);
@@ -907,38 +886,186 @@ mp_rendezvous_no_intrs(
 		      arg);	
 }
 
-void
-handle_pending_TLB_flushes(void)
+
+typedef struct {
+	queue_chain_t	link;			/* queue linkage */
+	void		(*func)(void *,void *);	/* routine to call */
+	void		*arg0;			/* routine's 1st arg */
+	void		*arg1;			/* routine's 2nd arg */
+	volatile long	*countp;		/* completion counter */
+} mp_call_t;
+	
+#define MP_CPUS_CALL_BUFS_PER_CPU	MAX_CPUS
+static queue_head_t	mp_cpus_call_freelist;
+static queue_head_t	mp_cpus_call_queue[MAX_CPUS];
+/*
+ * The free list and the per-cpu call queues are protected by the following
+ * lock which is taken wil interrupts disabled.
+ */
+decl_simple_lock_data(,mp_cpus_call_lock);
+
+static inline boolean_t
+mp_call_lock(void)
+{
+	boolean_t	intrs_enabled;
+
+	intrs_enabled = ml_set_interrupts_enabled(FALSE);
+	simple_lock(&mp_cpus_call_lock);
+
+	return intrs_enabled;
+}
+
+static inline boolean_t
+mp_call_is_locked(void)
+{
+	return !ml_get_interrupts_enabled() &&
+		hw_lock_held((hw_lock_t)&mp_cpus_call_lock);
+}
+
+static inline void
+mp_call_unlock(boolean_t intrs_enabled)
+{
+	simple_unlock(&mp_cpus_call_lock);
+	ml_set_interrupts_enabled(intrs_enabled);
+}
+
+static inline mp_call_t *
+mp_call_alloc(void)
+{
+	mp_call_t	*callp;
+
+	assert(mp_call_is_locked());
+	if (queue_empty(&mp_cpus_call_freelist))
+		return NULL;
+	queue_remove_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
+	return callp;
+}
+
+static inline void
+mp_call_free(mp_call_t *callp)
 {
-	volatile int	*my_word = &current_cpu_datap()->cpu_signals;
+	assert(mp_call_is_locked());
+	queue_enter_first(&mp_cpus_call_freelist, callp, typeof(callp), link);
+}
+
+static inline mp_call_t *
+mp_call_dequeue(queue_t call_queue)
+{
+	mp_call_t	*callp;
 
-	if (i_bit(MP_TLB_FLUSH, my_word) && (pmap_tlb_flush_timeout == FALSE)) {
-		DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH);
-		i_bit_clear(MP_TLB_FLUSH, my_word);
-		pmap_update_interrupt();
+	assert(mp_call_is_locked());
+	if (queue_empty(call_queue))
+		return NULL;
+	queue_remove_first(call_queue, callp, typeof(callp), link);
+	return callp;
+}
+
+/* Called on the boot processor to initialize global structures */
+static void
+mp_cpus_call_init(void)
+{
+	DBG("mp_cpus_call_init()\n");
+	simple_lock_init(&mp_cpus_call_lock, 0);
+	queue_init(&mp_cpus_call_freelist);
+}
+
+/*
+ * Called by each processor to add call buffers to the free list
+ * and to initialize the per-cpu call queue.
+ * Also called but ignored on slave processors on re-start/wake.
+ */
+static void
+mp_cpus_call_cpu_init(void)
+{
+	boolean_t	intrs_enabled;
+	int		i;
+	mp_call_t	*callp;
+
+	if (mp_cpus_call_queue[cpu_number()].next != NULL)
+		return; /* restart/wake case: called already */
+
+	queue_init(&mp_cpus_call_queue[cpu_number()]);
+	for (i = 0; i < MP_CPUS_CALL_BUFS_PER_CPU; i++) {
+		callp = (mp_call_t *) kalloc(sizeof(mp_call_t));
+		intrs_enabled = mp_call_lock();
+		mp_call_free(callp);
+		mp_call_unlock(intrs_enabled);
 	}
+
+	DBG("mp_cpus_call_init() done on cpu %d\n", cpu_number());
 }
 
 /*
  * This is called from cpu_signal_handler() to process an MP_CALL signal.
+ * And also from i386_deactivate_cpu() when a cpu is being taken offline.
  */
 static void
 mp_cpus_call_action(void)
 {
-	if (mp_rv_action_func != NULL)
-		mp_rv_action_func(mp_rv_func_arg);
-	atomic_incl(&mp_rv_complete, 1);
+	queue_t		cpu_head;
+	boolean_t	intrs_enabled;
+	mp_call_t	*callp;
+	mp_call_t	call;
+
+	assert(!ml_get_interrupts_enabled());
+	cpu_head = &mp_cpus_call_queue[cpu_number()];
+	intrs_enabled = mp_call_lock();
+	while ((callp = mp_call_dequeue(cpu_head)) != NULL) {
+		/* Copy call request to the stack to free buffer */
+		call = *callp;
+		mp_call_free(callp);
+		if (call.func != NULL) {
+			mp_call_unlock(intrs_enabled);
+			KERNEL_DEBUG_CONSTANT(
+				TRACE_MP_CPUS_CALL_ACTION,
+				call.func, call.arg0, call.arg1, call.countp, 0);
+			call.func(call.arg0, call.arg1);
+			(void) mp_call_lock();
+		}
+		if (call.countp != NULL)
+			atomic_incl(call.countp, 1);
+	}
+	mp_call_unlock(intrs_enabled);
+}
+
+static boolean_t
+mp_call_queue(
+	int		cpu, 
+        void		(*action_func)(void *, void *),
+        void		*arg0,
+        void		*arg1,
+	volatile long	*countp)
+{
+	queue_t		cpu_head = &mp_cpus_call_queue[cpu];
+	mp_call_t	*callp;
+
+	assert(mp_call_is_locked());
+	callp = mp_call_alloc();
+	if (callp == NULL)
+		return FALSE;
+
+	callp->func = action_func;
+	callp->arg0 = arg0;
+	callp->arg1 = arg1;
+	callp->countp = countp;
+
+	queue_enter(cpu_head, callp, typeof(callp), link);
+
+	return TRUE;
 }
 
 /*
  * mp_cpus_call() runs a given function on cpus specified in a given cpu mask.
- * If the mode is SYNC, the function is called serially on the target cpus
- * in logical cpu order. If the mode is ASYNC, the function is called in
- * parallel over the specified cpus.
+ * Possible modes are:
+ *  SYNC:   function is called serially on target cpus in logical cpu order
+ *	    waiting for each call to be acknowledged before proceeding
+ *  ASYNC:  function call is queued to the specified cpus
+ *	    waiting for all calls to complete in parallel before returning
+ *  NOSYNC: function calls are queued
+ *	    but we return before confirmation of calls completing. 
  * The action function may be NULL.
  * The cpu mask may include the local cpu. Offline cpus are ignored.
- * Return does not occur until the function has completed on all cpus.
- * The return value is the number of cpus on which the function was called.
+ * The return value is the number of cpus on which the call was made or queued.
  */
 cpu_t
 mp_cpus_call(
@@ -946,32 +1073,77 @@ mp_cpus_call(
 	mp_sync_t	mode,
         void		(*action_func)(void *),
         void		*arg)
+{
+	return mp_cpus_call1(
+			cpus,
+			mode,
+			(void (*)(void *,void *))action_func,
+			arg,
+			NULL,
+			NULL,
+			NULL);
+}
+
+static void
+mp_cpus_call_wait(boolean_t intrs_enabled,
+		  long mp_cpus_signals,
+		  volatile long *mp_cpus_calls)
+{
+	queue_t		cpu_head;
+
+	cpu_head = &mp_cpus_call_queue[cpu_number()];
+
+	while (*mp_cpus_calls < mp_cpus_signals) {
+		if (!intrs_enabled) {
+			if (!queue_empty(cpu_head))
+				mp_cpus_call_action();
+
+			handle_pending_TLB_flushes();
+		}
+		cpu_pause();
+	}
+}
+
+cpu_t
+mp_cpus_call1(
+	cpumask_t	cpus,
+	mp_sync_t	mode,
+        void		(*action_func)(void *, void *),
+        void		*arg0,
+        void		*arg1,
+	cpumask_t	*cpus_calledp,
+	cpumask_t	*cpus_notcalledp)
 {
 	cpu_t		cpu;
-	boolean_t	intrs_enabled = ml_get_interrupts_enabled();
+	boolean_t	intrs_enabled = FALSE;
 	boolean_t	call_self = FALSE;
+	cpumask_t	cpus_called = 0;
+	cpumask_t	cpus_notcalled = 0;
+	long 		mp_cpus_signals = 0;
+	volatile long	mp_cpus_calls = 0;
+
+	KERNEL_DEBUG_CONSTANT(
+		TRACE_MP_CPUS_CALL | DBG_FUNC_START,
+		cpus, mode, action_func, arg0, arg1);
 
 	if (!smp_initialized) {
 		if ((cpus & CPUMASK_SELF) == 0)
-			return 0;
+			goto out;
 		if (action_func != NULL) {
-			(void) ml_set_interrupts_enabled(FALSE);
-			action_func(arg);
+			intrs_enabled = ml_set_interrupts_enabled(FALSE);
+			action_func(arg0, arg1);
 			ml_set_interrupts_enabled(intrs_enabled);
 		}
-		return 1;
+		call_self = TRUE;
+		goto out;
 	}
-		
-	/* obtain rendezvous lock */
-	simple_lock(&mp_rv_lock);
-
-	/* Use the rendezvous data structures for this call */
-	mp_rv_action_func = action_func;
-	mp_rv_func_arg = arg;
-	mp_rv_ncpus = 0;
-	mp_rv_complete = 0;
 
-	simple_lock(&x86_topo_lock);
+	/*
+	 * Queue the call for each non-local requested cpu.
+	 * The topo lock is not taken. Instead we sniff the cpu_running state
+	 * and then re-check it after taking the call lock. A cpu being taken
+	 * offline runs the action function after clearing the cpu_running.
+	 */ 
 	for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) {
 		if (((cpu_to_cpumask(cpu) & cpus) == 0) ||
 		    !cpu_datap(cpu)->cpu_running)
@@ -982,61 +1154,92 @@ mp_cpus_call(
 			 * we defer our call until we have signalled all others.
 			 */
 			call_self = TRUE;
+			cpus_called |= cpu_to_cpumask(cpu);
 			if (mode == SYNC && action_func != NULL) {
-				(void) ml_set_interrupts_enabled(FALSE);
-				action_func(arg);
-				ml_set_interrupts_enabled(intrs_enabled);
+				KERNEL_DEBUG_CONSTANT(
+					TRACE_MP_CPUS_CALL_LOCAL,
+					action_func, arg0, arg1, 0, 0);
+				action_func(arg0, arg1);
 			}
 		} else {
 			/*
-			 * Bump count of other cpus called and signal this cpu.
-			 * Note: we signal asynchronously regardless of mode
-			 * because we wait on mp_rv_complete either here
-			 * (if mode == SYNC) or later (if mode == ASYNC).
-			 * While spinning, poll for TLB flushes if interrupts
-			 * are disabled.
+			 * Here to queue a call to cpu and IPI.
+			 * Spinning for request buffer unless NOSYNC.
 			 */
-			mp_rv_ncpus++;
-			i386_signal_cpu(cpu, MP_CALL, ASYNC);
-			if (mode == SYNC) {
-				simple_unlock(&x86_topo_lock);
-				while (mp_rv_complete < mp_rv_ncpus) {
-					if (!intrs_enabled)
+		queue_call:
+			intrs_enabled = mp_call_lock();
+			if (!cpu_datap(cpu)->cpu_running) {
+				mp_call_unlock(intrs_enabled);
+				continue;
+			}
+			if (mode == NOSYNC) {
+				if (!mp_call_queue(cpu, action_func, arg0, arg1,
+						   NULL)) {
+					cpus_notcalled |= cpu_to_cpumask(cpu);
+					mp_call_unlock(intrs_enabled);
+					KERNEL_DEBUG_CONSTANT(
+						TRACE_MP_CPUS_CALL_NOBUF,
+						cpu, 0, 0, 0, 0);
+					continue;
+				}
+			} else {
+				if (!mp_call_queue(cpu, action_func, arg0, arg1,
+						      &mp_cpus_calls)) {
+					mp_call_unlock(intrs_enabled);
+					KERNEL_DEBUG_CONSTANT(
+						TRACE_MP_CPUS_CALL_NOBUF,
+						cpu, 0, 0, 0, 0);
+					if (!intrs_enabled) {
+						mp_cpus_call_action();
 						handle_pending_TLB_flushes();
+					}
 					cpu_pause();
+					goto queue_call;
 				}
-				simple_lock(&x86_topo_lock);
+			}
+			mp_cpus_signals++;
+			cpus_called |= cpu_to_cpumask(cpu);
+			i386_signal_cpu(cpu, MP_CALL, ASYNC);
+			mp_call_unlock(intrs_enabled);
+			if (mode == SYNC) {
+				mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
 			}
 		}
 	}
-	simple_unlock(&x86_topo_lock);
 
-	/*
-	 * If calls are being made asynchronously,
-	 * make the local call now if needed, and then
-	 * wait for all other cpus to finish their calls.
-	 */
-	if (mode == ASYNC) {
-		if (call_self && action_func != NULL) {
-			(void) ml_set_interrupts_enabled(FALSE);
-			action_func(arg);
+	/* Call locally if mode not SYNC */
+	if (mode != SYNC && call_self ) {
+		KERNEL_DEBUG_CONSTANT(
+			TRACE_MP_CPUS_CALL_LOCAL,
+			action_func, arg0, arg1, 0, 0);
+		if (action_func != NULL) {
+			ml_set_interrupts_enabled(FALSE);
+			action_func(arg0, arg1);
 			ml_set_interrupts_enabled(intrs_enabled);
 		}
-		while (mp_rv_complete < mp_rv_ncpus) {
-			if (!intrs_enabled)
-				handle_pending_TLB_flushes();
-			cpu_pause();
-		}
 	}
-	
-	/* Determine the number of cpus called */
-	cpu = mp_rv_ncpus + (call_self ? 1 : 0);
 
-	simple_unlock(&mp_rv_lock);
+	/* For ASYNC, now wait for all signaled cpus to complete their calls */
+	if (mode == ASYNC) {
+		mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls);
+	}
+
+out:
+	cpu = (cpu_t) mp_cpus_signals + (call_self ? 1 : 0);
+
+	if (cpus_calledp)
+		*cpus_calledp = cpus_called;
+	if (cpus_notcalledp)
+		*cpus_notcalledp = cpus_notcalled;
+
+	KERNEL_DEBUG_CONSTANT(
+		TRACE_MP_CPUS_CALL | DBG_FUNC_END,
+		cpu, cpus_called, cpus_notcalled, 0, 0);
 
 	return cpu;
 }
 
+
 static void
 mp_broadcast_action(void)
 {
@@ -1156,7 +1359,7 @@ void
 mp_kdp_enter(void)
 {
 	unsigned int	cpu;
-	unsigned int	ncpus;
+	unsigned int	ncpus = 0;
 	unsigned int	my_cpu;
 	uint64_t	tsc_timeout;
 
@@ -1170,7 +1373,6 @@ mp_kdp_enter(void)
 	mp_kdp_state = ml_set_interrupts_enabled(FALSE);
 	my_cpu = cpu_number();
 	cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time();
-
 	simple_lock(&mp_kdp_lock);
 
 	if (pmsafe_debug && !kdp_snapshot)
@@ -1184,7 +1386,6 @@ mp_kdp_enter(void)
 #endif
 		simple_lock(&mp_kdp_lock);
 	}
-	my_cpu = cpu_number();
 	debugger_cpu = my_cpu;
 	ncpus = 1;
 	mp_kdp_ncpus = 1;	/* self */
@@ -1246,7 +1447,7 @@ mp_kdp_enter(void)
 		}
 
 	DBG("mp_kdp_enter() %lu processors done %s\n",
-	    mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
+	    (int)mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out");
 	
 	postcode(MP_KDP_ENTER);
 }
@@ -1353,6 +1554,8 @@ mp_kdp_exit(void)
 	if (pmsafe_debug && !kdp_snapshot)
 	    pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
 
+	debugger_exit_time = mach_absolute_time();
+
 	DBG("mp_kdp_exit() done\n");
 	(void) ml_set_interrupts_enabled(mp_kdp_state);
 	postcode(0);
@@ -1381,6 +1584,7 @@ cause_ast_check(
 
 	if (cpu != cpu_number()) {
 		i386_signal_cpu(cpu, MP_AST, ASYNC);
+		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, 1, 0, 0, 0);
 	}
 }
 
@@ -1419,7 +1623,7 @@ remote_kdb(void)
 
 		cpu_pause();
 	}
-	DBG("mp_kdp_enter() %d processors done %s\n",
+	DBG("mp_kdp_enter() %lu processors done %s\n",
 		mp_kdb_ncpus, (mp_kdb_ncpus == kdb_ncpus) ? "OK" : "timed out");
 }
 
@@ -1495,8 +1699,8 @@ slave_machine_init(void *param)
 		 * Cold start
 		 */
 		clock_init();
-
 		cpu_machine_init();	/* Interrupts enabled hereafter */
+		mp_cpus_call_cpu_init();
 	}
 }
 
@@ -1554,3 +1758,117 @@ db_trap_hist(void)
 #endif	/* TRAP_DEBUG */
 #endif	/* MACH_KDB */
 
+static void
+cpu_prewarm_init()
+{
+	int i;
+
+	simple_lock_init(&cpu_warm_lock, 0);
+	queue_init(&cpu_warm_call_list);
+	for (i = 0; i < NUM_CPU_WARM_CALLS; i++) {
+		enqueue_head(&cpu_warm_call_list, (queue_entry_t)&cpu_warm_call_arr[i]);
+	}
+}
+
+static timer_call_t
+grab_warm_timer_call()
+{
+	spl_t x;
+	timer_call_t call = NULL;
+
+	x = splsched();
+	simple_lock(&cpu_warm_lock);
+	if (!queue_empty(&cpu_warm_call_list)) {
+		call = (timer_call_t) dequeue_head(&cpu_warm_call_list);
+	}
+	simple_unlock(&cpu_warm_lock);
+	splx(x);
+
+	return call;
+}
+
+static void
+free_warm_timer_call(timer_call_t call)
+{
+	spl_t x;
+
+	x = splsched();
+	simple_lock(&cpu_warm_lock);
+	enqueue_head(&cpu_warm_call_list, (queue_entry_t)call);
+	simple_unlock(&cpu_warm_lock);
+	splx(x);
+}
+
+/*
+ * Runs in timer call context (interrupts disabled).
+ */
+static void
+cpu_warm_timer_call_func(
+		call_entry_param_t p0,
+		__unused call_entry_param_t p1)
+{
+	free_warm_timer_call((timer_call_t)p0);
+	return;
+}
+
+/*
+ * Runs with interrupts disabled on the CPU we wish to warm (i.e. CPU 0).
+ */
+static void
+_cpu_warm_setup(
+		void *arg)
+{
+	cpu_warm_data_t cwdp = (cpu_warm_data_t)arg;
+
+	timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL);
+	cwdp->cwd_result = 0;
+
+	return;
+}
+
+/*
+ * Not safe to call with interrupts disabled.
+ */
+kern_return_t
+ml_interrupt_prewarm(
+	uint64_t 	deadline)
+{
+	struct cpu_warm_data cwd;
+	timer_call_t call;
+	cpu_t ct;
+
+	if (ml_get_interrupts_enabled() == FALSE) {
+		panic("%s: Interrupts disabled?\n", __FUNCTION__);
+	}
+
+	/* 
+	 * If the platform doesn't need our help, say that we succeeded. 
+	 */
+	if (!ml_get_interrupt_prewake_applicable()) {
+		return KERN_SUCCESS;
+	}
+
+	/*
+	 * Grab a timer call to use.
+	 */
+	call = grab_warm_timer_call();
+	if (call == NULL) {
+		return KERN_RESOURCE_SHORTAGE;
+	}
+
+	timer_call_setup(call, cpu_warm_timer_call_func, call);
+	cwd.cwd_call = call;
+	cwd.cwd_deadline = deadline;
+	cwd.cwd_result = 0;
+
+	/*
+	 * For now, non-local interrupts happen on the master processor.
+	 */
+	ct = mp_cpus_call(cpu_to_cpumask(master_cpu), SYNC, _cpu_warm_setup, &cwd);
+	if (ct == 0) {
+		free_warm_timer_call(call);
+		return KERN_FAILURE;
+	} else {
+		return cwd.cwd_result;
+	}
+}
diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h
index 8a2abbd0a..6974ef256 100644
--- a/osfmk/i386/mp.h
+++ b/osfmk/i386/mp.h
@@ -106,7 +106,7 @@ extern	volatile boolean_t mp_kdp_trap;
 extern 	volatile boolean_t force_immediate_debugger_NMI;
 extern  volatile boolean_t pmap_tlb_flush_timeout;
 extern  volatile usimple_lock_t spinlock_timed_out;
-extern	volatile uint32_t spinlock_owner_cpu;
+extern  volatile uint32_t spinlock_owner_cpu;
 
 extern	uint64_t	LastDebuggerEntryAllowance;
 
@@ -163,10 +163,12 @@ cpu_to_cpumask(cpu_t cpu)
  * Invoke a function (possibly NULL) on a set of cpus specified by a mask.
  * The mask may include the local cpu.
  * If the mode is:
- *	- ASYNC: other cpus make their calls in parallel.
- * 	- SYNC: the calls are performed serially in logical cpu order.
- * This call returns when the function has been run on all specified cpus.
- * The return value is the number of cpus on which the call was made.
+ *	- ASYNC:  other cpus make their calls in parallel
+ * 	- SYNC:   the calls are performed serially in logical cpu order
+ * 	- NOSYNC: the calls are queued
+ * Unless the mode is NOSYNC, mp_cpus_call() returns when the function has been
+ * called on all specified cpus.
+ * The return value is the number of cpus where the call was made or queued.
  * The action function is called with interrupts disabled.
  */
 extern cpu_t mp_cpus_call(
@@ -174,6 +176,14 @@ extern cpu_t mp_cpus_call(
 		mp_sync_t	mode,
 		void		(*action_func)(void *),
 		void		*arg);
+extern cpu_t mp_cpus_call1(
+		cpumask_t	cpus,
+		mp_sync_t	mode,
+		void		(*action_func)(void *, void*),
+		void		*arg0,
+		void		*arg1,
+		cpumask_t	*cpus_calledp,
+		cpumask_t	*cpus_notcalledp);
 
 /*
  * Power-management-specific SPI to:
@@ -183,7 +193,6 @@ extern cpu_t mp_cpus_call(
 extern void PM_interrupt_register(void (*fn)(void));
 extern void cpu_PM_interrupt(int cpu);
 
-
 __END_DECLS
 
 #if MP_DEBUG
@@ -249,38 +258,14 @@ extern cpu_signal_event_log_t	*cpu_handle[];
 #ifdef ASSEMBLER
 #define i_bit(bit, word)	((long)(*(word)) & (1L << (bit)))
 #else
-// Workaround for 6640051
-static inline long 
+__attribute__((always_inline)) static inline long 
 i_bit_impl(long word, long bit) {
-	return word & 1L << bit;
+	long bitmask = 1L << bit;
+	return word & bitmask;
 }
 #define i_bit(bit, word)	i_bit_impl((long)(*(word)), bit)
 #endif
 
-
-/* 
- *	Device driver synchronization. 
- *
- *	at386_io_lock(op) and at386_io_unlock() are called
- *	by device drivers when accessing H/W. The underlying 
- *	Processing is machine dependant. But the op argument
- *	to the at386_io_lock is generic
- */
-
-#define MP_DEV_OP_MAX	  4
-#define MP_DEV_WAIT	  MP_DEV_OP_MAX	/* Wait for the lock */
-
-/*
- * If the caller specifies an op value different than MP_DEV_WAIT, the
- * at386_io_lock function must return true if lock was successful else
- * false
- */
-
-#define MP_DEV_OP_START 0	/* If lock busy, register a pending start op */
-#define MP_DEV_OP_INTR	1	/* If lock busy, register a pending intr */
-#define MP_DEV_OP_TIMEO	2	/* If lock busy, register a pending timeout */
-#define MP_DEV_OP_CALLB	3	/* If lock busy, register a pending callback */
-
 #if	MACH_RT
 
 #if defined(__i386__)
diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c
index 084038e4b..2421dc734 100644
--- a/osfmk/i386/mp_desc.c
+++ b/osfmk/i386/mp_desc.c
@@ -81,7 +81,6 @@
 
 #include <mach_kdb.h>
 
-
 #ifdef __x86_64__
 #define K_INTR_GATE (ACC_P|ACC_PL_K|ACC_INTR_GATE)
 #define U_INTR_GATE (ACC_P|ACC_PL_U|ACC_INTR_GATE)
@@ -108,7 +107,7 @@
 #undef USER_TRAP_SPC
 
 #define TRAP(n, name)			\
-	[n] {				\
+	[n] = {				\
 		(uintptr_t)&name,	\
 		KERNEL64_CS,		\
 		0,			\
@@ -120,7 +119,7 @@
 #define TRAP_SPC TRAP
 
 #define TRAP_IST(n, name) \
-	[n] {				\
+	[n] = {				\
 		(uintptr_t)&name,	\
 		KERNEL64_CS,		\
 		1,			\
@@ -129,7 +128,7 @@
 	},
 
 #define INTERRUPT(n) \
-	[n] {				\
+	[n] = {				\
 		(uintptr_t)&_intr_ ## n,\
 		KERNEL64_CS,		\
 		0,			\
@@ -138,7 +137,7 @@
 	},
 
 #define USER_TRAP(n, name) \
-	[n] {				\
+	[n] = {				\
 		(uintptr_t)&name,	\
 		KERNEL64_CS,		\
 		0,			\
@@ -174,7 +173,7 @@ extern uint32_t		low_eintstack[];	/* top */
  */
 cpu_data_t	cpu_data_master = {
 	.cpu_this = &cpu_data_master,
-	.cpu_nanotime = &rtc_nanotime_info,
+	.cpu_nanotime = &pal_rtc_nanotime_info,
 	.cpu_int_stack_top = (vm_offset_t) low_eintstack,
 #ifdef __i386__
 	.cpu_is64bit = FALSE,
@@ -182,7 +181,7 @@ cpu_data_t	cpu_data_master = {
 	.cpu_is64bit = TRUE
 #endif
 };
-cpu_data_t	*cpu_data_ptr[MAX_CPUS] = { [0] &cpu_data_master };
+cpu_data_t	*cpu_data_ptr[MAX_CPUS] = { [0] = &cpu_data_master };
 
 decl_simple_lock_data(,ncpus_lock);	/* protects real_ncpus */
 unsigned int	real_ncpus = 1;
@@ -383,21 +382,21 @@ fix_desc64(void *descp, int count)
 		case ACC_CALL_GATE:
 		case ACC_INTR_GATE:
 		case ACC_TRAP_GATE:
-			real.gate.offset_low16 = fakep->offset64 & 0xFFFF;
+			real.gate.offset_low16 = (uint16_t)(fakep->offset64 & 0xFFFF);
 			real.gate.selector16 = fakep->lim_or_seg & 0xFFFF;
 			real.gate.IST = fakep->size_or_IST & 0x7;
 			real.gate.access8 = fakep->access;
-			real.gate.offset_high16 = (fakep->offset64>>16)&0xFFFF;
+			real.gate.offset_high16 = (uint16_t)((fakep->offset64>>16) & 0xFFFF);
 			real.gate.offset_top32 = (uint32_t)(fakep->offset64>>32);
 			break;
 		default:	/* Otherwise */
 			real.desc.limit_low16 = fakep->lim_or_seg & 0xFFFF;
-			real.desc.base_low16 = fakep->offset64 & 0xFFFF;
-			real.desc.base_med8 = (fakep->offset64 >> 16) & 0xFF;
+			real.desc.base_low16 = (uint16_t)(fakep->offset64 & 0xFFFF);
+			real.desc.base_med8 = (uint8_t)((fakep->offset64 >> 16) & 0xFF);
 			real.desc.access8 = fakep->access;
 			real.desc.limit_high4 = (fakep->lim_or_seg >> 16) & 0xFF;
 			real.desc.granularity4 = fakep->size_or_IST;
-			real.desc.base_high8 = (fakep->offset64 >> 24) & 0xFF;
+			real.desc.base_high8 = (uint8_t)((fakep->offset64 >> 24) & 0xFF);
 			real.desc.base_top32 = (uint32_t)(fakep->offset64>>32);
 		}
 
@@ -536,13 +535,13 @@ cpu_desc_init(cpu_data_t *cdp)
 		cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp;
 		fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1);
 
-#if	MACH_KDB
+#if	MACH_KDB /* this only works for legacy 32-bit machines */
 		cdt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
 		cdt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) cdi->cdi_dbtss;
 		fix_desc(&cdt->gdt[sel_idx(DEBUG_TSS)], 1);
 
 		cdt->dbtss.esp0 = (int)(db_task_stack_store +
-				(INTSTACK_SIZE * (cdp->cpu_number)) - sizeof (natural_t));
+				(INTSTACK_SIZE * (cdp->cpu_number + 1)) - sizeof (natural_t));
 		cdt->dbtss.esp = cdt->dbtss.esp0;
 		cdt->dbtss.eip = (int)&db_task_start;
 #endif	/* MACH_KDB */
@@ -635,7 +634,8 @@ cpu_desc_init64(cpu_data_t *cdp)
 			kernel_tss_desc64;
 		fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1);
 
-		/* Set double-fault stack as IST1 */
+		/* Set (zeroed) double-fault stack as IST1 */
+		bzero((void *) cdt->dfstk, sizeof(cdt->dfstk));
 		cdt->ktss.ist1 = UBER64((unsigned long)cdt->dfstk + sizeof(cdt->dfstk));
 #ifdef __i386__
 		cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern;
@@ -775,6 +775,7 @@ fast_syscall_init64(__unused cpu_data_t *cdp)
 #endif
 }
 
+
 cpu_data_t *
 cpu_data_alloc(boolean_t is_boot_cpu)
 {
@@ -790,8 +791,6 @@ cpu_data_alloc(boolean_t is_boot_cpu)
 #if NCOPY_WINDOWS > 0
 			cdp->cpu_pmap = pmap_cpu_alloc(TRUE);
 #endif
-			queue_init(&cdp->rtclock_timer.queue);
-			cdp->rtclock_timer.deadline = EndOfAllTime;
 		}
 		return cdp;
 	}
@@ -823,7 +822,6 @@ cpu_data_alloc(boolean_t is_boot_cpu)
 	bzero((void*) cdp->cpu_int_stack_top, INTSTACK_SIZE);
 	cdp->cpu_int_stack_top += INTSTACK_SIZE;
 
-
 	/*
 	 * Allocate descriptor table:
 	 * Size depends on cpu mode.
@@ -860,9 +858,7 @@ cpu_data_alloc(boolean_t is_boot_cpu)
 	real_ncpus++;
 	simple_unlock(&ncpus_lock);
 
-	cdp->cpu_nanotime = &rtc_nanotime_info;
-	queue_init(&cdp->rtclock_timer.queue);
-	cdp->rtclock_timer.deadline = EndOfAllTime;
+	cdp->cpu_nanotime = &pal_rtc_nanotime_info;
 
 	kprintf("cpu_data_alloc(%d) %p desc_table: %p "
 		"ldt: %p "
@@ -885,6 +881,64 @@ abort:
 	return NULL;
 }
 
+boolean_t
+valid_user_data_selector(uint16_t selector)
+{
+    sel_t	sel = selector_to_sel(selector);
+    
+    if (selector == 0)
+    	return (TRUE);
+
+    if (sel.ti == SEL_LDT)
+	return (TRUE);
+    else if (sel.index < GDTSZ) {
+	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
+	    return (TRUE);
+    }
+		
+    return (FALSE);
+}
+
+boolean_t
+valid_user_code_selector(uint16_t selector)
+{
+    sel_t	sel = selector_to_sel(selector);
+    
+    if (selector == 0)
+    	return (FALSE);
+
+    if (sel.ti == SEL_LDT) {
+	if (sel.rpl == USER_PRIV)
+	    return (TRUE);
+    }
+    else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) {
+	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
+	    return (TRUE);
+    }
+
+    return (FALSE);
+}
+
+boolean_t
+valid_user_stack_selector(uint16_t selector)
+{
+    sel_t	sel = selector_to_sel(selector);
+    
+    if (selector == 0)
+    	return (FALSE);
+
+    if (sel.ti == SEL_LDT) {
+	if (sel.rpl == USER_PRIV)
+	    return (TRUE);
+    }
+    else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) {
+	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
+	    return (TRUE);
+    }
+		
+    return (FALSE);
+}
+
 boolean_t
 valid_user_segment_selectors(uint16_t cs,
 		uint16_t ss,
@@ -901,7 +955,6 @@ valid_user_segment_selectors(uint16_t cs,
 		valid_user_data_selector(gs);
 }
 
-
 #if NCOPY_WINDOWS > 0
 
 static vm_offset_t user_window_base = 0;
@@ -954,6 +1007,10 @@ cpu_userwindow_init(int cpu)
  	user_window = user_window_base + (cpu * NCOPY_WINDOWS * NBPDE);
 
 	cdp->cpu_copywindow_base = user_window;
+	/*
+	 * Abuse this pdp entry, the pdp now actually points to 
+	 * an array of copy windows addresses.
+	 */
 	cdp->cpu_copywindow_pdp  = pmap_pde(kernel_pmap, user_window);
 
 #ifdef __i386__
@@ -1005,7 +1062,7 @@ void
 cpu_mode_init(cpu_data_t *cdp)
 {
 #ifdef __i386__
-	if (cpu_mode_is64bit()) {
+	if (cdp->cpu_is64bit) {
 		cpu_IA32e_enable(cdp);
 		cpu_desc_load64(cdp);
 		fast_syscall_init64(cdp);
diff --git a/osfmk/i386/mp_desc.h b/osfmk/i386/mp_desc.h
index 14d186eb1..97b04c9cb 100644
--- a/osfmk/i386/mp_desc.h
+++ b/osfmk/i386/mp_desc.h
@@ -121,63 +121,14 @@ extern void	cpu_desc_init64(cpu_data_t *cdp);
 extern void	cpu_desc_load(cpu_data_t *cdp);
 extern void	cpu_desc_load64(cpu_data_t *cdp);
 
-static inline boolean_t
-valid_user_data_selector(uint16_t selector)
-{
-    sel_t	sel = selector_to_sel(selector);
-    
-    if (selector == 0)
-    	return (TRUE);
-
-    if (sel.ti == SEL_LDT)
-	return (TRUE);
-    else if (sel.index < GDTSZ) {
-	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
-	    return (TRUE);
-    }
-		
-    return (FALSE);
-}
-
-static inline boolean_t
-valid_user_code_selector(uint16_t selector)
-{
-    sel_t	sel = selector_to_sel(selector);
-    
-    if (selector == 0)
-    	return (FALSE);
-
-    if (sel.ti == SEL_LDT) {
-	if (sel.rpl == USER_PRIV)
-	    return (TRUE);
-    }
-    else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) {
-	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
-	    return (TRUE);
-    }
-
-    return (FALSE);
-}
-
-static inline boolean_t
-valid_user_stack_selector(uint16_t selector)
-{
-    sel_t	sel = selector_to_sel(selector);
-    
-    if (selector == 0)
-    	return (FALSE);
-
-    if (sel.ti == SEL_LDT) {
-	if (sel.rpl == USER_PRIV)
-	    return (TRUE);
-    }
-    else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) {
-	if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U)
-	    return (TRUE);
-    }
-		
-    return (FALSE);
-}
+extern boolean_t
+valid_user_data_selector(uint16_t selector);
+
+extern boolean_t
+valid_user_code_selector(uint16_t selector);
+
+extern boolean_t
+valid_user_stack_selector(uint16_t selector);
 
 extern boolean_t
 valid_user_segment_selectors(uint16_t cs,
diff --git a/osfmk/i386/mp_events.h b/osfmk/i386/mp_events.h
index e870b0d03..32fde7cc5 100644
--- a/osfmk/i386/mp_events.h
+++ b/osfmk/i386/mp_events.h
@@ -65,7 +65,7 @@ const char *mp_event_name[] = {	\
 	"MP_LAST"		\
 }
 
-typedef enum { SYNC, ASYNC } mp_sync_t;
+typedef enum { SYNC, ASYNC, NOSYNC } mp_sync_t;
 
 __BEGIN_DECLS
 
diff --git a/osfmk/i386/mp_native.c b/osfmk/i386/mp_native.c
new file mode 100644
index 000000000..73d3b1ca0
--- /dev/null
+++ b/osfmk/i386/mp_native.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/vm_types.h>
+#include <i386/acpi.h> /* install_real_mode_bootstrap */
+#include <i386/mp.h>
+#include <i386/lapic.h> /* lapic_* functions */
+#include <i386/machine_routines.h>
+#include <i386/cpu_data.h>
+#include <i386/pmap.h>
+
+/* PAL-related routines */
+void i386_cpu_IPI(int cpu);
+boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, 
+		int ipi_vector, i386_intr_func_t ipi_handler);
+void i386_start_cpu(int lapic_id, int cpu_num);
+void i386_send_NMI(int cpu);
+void handle_pending_TLB_flushes(void);
+
+extern void	slave_pstart(void);
+
+#ifdef	MP_DEBUG
+int	trappedalready = 0;	/* (BRINGUP) */
+#endif	/* MP_DEBUG */
+
+boolean_t
+i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler, int ipi_vector, i386_intr_func_t ipi_handler)
+{
+	/* Local APIC? */
+	if (!lapic_probe())
+		return FALSE;
+
+	lapic_init();
+	lapic_configure();
+	lapic_set_intr_func(nmi_vector,  nmi_handler);
+	lapic_set_intr_func(ipi_vector, ipi_handler);
+
+	install_real_mode_bootstrap(slave_pstart);
+
+	return TRUE;
+}
+
+void
+i386_start_cpu(int lapic_id, __unused int cpu_num )
+{
+	LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
+	LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
+	delay(100);
+
+	LAPIC_WRITE(ICRD, lapic_id << LAPIC_ICRD_DEST_SHIFT);
+	LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12));
+}
+
+void
+i386_send_NMI(int cpu)
+{
+	boolean_t state = ml_set_interrupts_enabled(FALSE);
+	/* Program the interrupt command register */
+	LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
+	/* The vector is ignored in this case--the target CPU will enter on the
+	 * NMI vector.
+	 */
+	LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
+	(void) ml_set_interrupts_enabled(state);
+}
+
+void
+handle_pending_TLB_flushes(void)
+{
+	volatile int	*my_word = &current_cpu_datap()->cpu_signals;
+
+	if (i_bit(MP_TLB_FLUSH, my_word)  && (pmap_tlb_flush_timeout == FALSE)) {
+		DBGLOG(cpu_handle, cpu_number(), MP_TLB_FLUSH);
+		i_bit_clear(MP_TLB_FLUSH, my_word);
+		pmap_update_interrupt();
+	}
+}
+
+void
+i386_cpu_IPI(int cpu)
+{
+#ifdef	MP_DEBUG
+	if(cpu_datap(cpu)->cpu_signals & 6) {	/* (BRINGUP) */
+		kprintf("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d\n", cpu_datap(cpu)->cpu_signals, cpu);
+	}
+#endif	/* MP_DEBUG */
+
+#if MACH_KDB
+#ifdef	MP_DEBUG
+	if(!trappedalready && (cpu_datap(cpu)->cpu_signals & 6)) {	/* (BRINGUP) */
+		if(kdb_cpu != cpu_number()) {
+			trappedalready = 1;
+			panic("i386_cpu_IPI: sending enter debugger signal (%08X) to cpu %d and I do not own debugger, owner = %08X\n", 
+				cpu_datap(cpu)->cpu_signals, cpu, kdb_cpu);
+		}
+	}
+#endif	/* MP_DEBUG */
+#endif
+
+	lapic_send_ipi(cpu, LAPIC_VECTOR(INTERPROCESSOR));
+}
diff --git a/osfmk/i386/mtrr.c b/osfmk/i386/mtrr.c
index 9129f40c6..63a19c6a2 100644
--- a/osfmk/i386/mtrr.c
+++ b/osfmk/i386/mtrr.c
@@ -333,7 +333,7 @@ mtrr_update_action(void * cache_control_type)
 		set_cr4(cr4 & ~CR4_PGE);
 
 	/* flush TLBs */
-	flush_tlb();   
+	flush_tlb_raw();
 
 	if (CACHE_CONTROL_PAT == cache_control_type) {
 		/* Change PA6 attribute field to WC */
@@ -365,7 +365,7 @@ mtrr_update_action(void * cache_control_type)
 
 	/* flush all caches and TLBs a second time */
 	wbinvd();
-	flush_tlb();
+	flush_tlb_raw();
 
 	/* restore normal cache mode */
 	set_cr0(cr0);
@@ -486,7 +486,6 @@ mtrr_range_add(addr64_t address, uint64_t length, uint32_t type)
 		return KERN_NOT_SUPPORTED;
 	}
 
-
 	/* check memory type (GPF exception for undefined types) */
 	if ((type != MTRR_TYPE_UNCACHEABLE)  &&
 	    (type != MTRR_TYPE_WRITECOMBINE) &&
diff --git a/osfmk/i386/pal_hibernate.h b/osfmk/i386/pal_hibernate.h
new file mode 100644
index 000000000..025e56ea4
--- /dev/null
+++ b/osfmk/i386/pal_hibernate.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _I386_PAL_HIBERNATE_H
+#define _I386_PAL_HIBERNATE_H
+
+#define HIB_PTES		(4*GB - 1*I386_LPGBYTES) /*4GB - 2m */
+#define DEST_COPY_AREA	(HIB_PTES + 1*I386_PGBYTES)
+#define SRC_COPY_AREA	(HIB_PTES + 2*I386_PGBYTES)
+#define COPY_PAGE_AREA	(HIB_PTES + 3*I386_PGBYTES)
+
+#define HIB_BASE sectINITPTB
+#define HIB_ENTRYPOINT acpi_wake_prot_entry
+
+void pal_hib_window_setup(ppnum_t page);
+uintptr_t pal_hib_map(uintptr_t v, uint64_t p);
+void hibernateRestorePALState(uint32_t *src);
+void pal_hib_patchup(void);
+#define PAL_HIBERNATE_MAGIC_1 0xfeedfacedeadbeef 
+#define PAL_HIBERNATE_MAGIC_2 0x41b312133714 
+#endif /* _I386_PAL_HIBERNATE_H */
diff --git a/osfmk/ppc/cpu_number.h b/osfmk/i386/pal_lock_asm.h
similarity index 82%
rename from osfmk/ppc/cpu_number.h
rename to osfmk/i386/pal_lock_asm.h
index cd38aa6d6..a7baad7f9 100644
--- a/osfmk/ppc/cpu_number.h
+++ b/osfmk/i386/pal_lock_asm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,16 +25,15 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- */
-#ifdef	KERNEL_PRIVATE
+#ifndef _I386_PAL_LOCK_ASM_H
+#define _I386_PAL_LOCK_ASM_H
 
-#ifndef	_PPC_CPU_NUMBER_H_
-#define	_PPC_CPU_NUMBER_H_
+#ifdef XNU_KERNEL_PRIVATE
 
-extern int cpu_number(void);
+#define PUSHF pushf
+#define POPF  popf
+#define CLI   cli
 
-#endif	/* _PPC_CPU_NUMBER_H_ */
+#endif /* XNU_KERNEL_PRIVATE */
 
-#endif	/* KERNEL_PRIVATE */
+#endif /* _I386_PAL_LOCK_ASM_H */
diff --git a/osfmk/i386/pal_native.h b/osfmk/i386/pal_native.h
new file mode 100644
index 000000000..13cbf69fb
--- /dev/null
+++ b/osfmk/i386/pal_native.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _I386_PAL_I386_H
+#define _I386_PAL_I386_H
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* No-op on bare-metal */
+#define pal_dbg_page_fault(x, y, z)
+#define pal_dbg_set_task_name( x )
+#define pal_set_signal_delivery( x )
+
+#define pal_is_usable_memory(b, t)	(TRUE)
+
+#define pal_hlt()			__asm__ volatile ("sti; hlt")
+#define pal_sti()			__asm__ volatile ("sti")
+#define pal_cli()			__asm__ volatile ("cli")
+
+static inline
+void pal_stop_cpu(boolean_t cli)
+{
+	if( cli )
+		__asm__ volatile ( "cli" );
+	__asm__ volatile ( "wbinvd; hlt" );
+}
+
+#define pal_register_cache_state(t, v)
+
+#define pal_execve_return(t)
+#define pal_thread_terminate_self(t)
+#define pal_ast_check(t)
+#define pal_switch_pmap(t,u,v)
+
+#define	panic_display_pal_info() do { } while(0)
+#define	pal_kernel_announce() do { } while(0)
+
+#define PAL_AICPM_PROPERTY_VALUE 0
+
+#define pal_pmc_swi() __asm__ __volatile__("int %0"::"i"(LAPIC_PMC_SWI_VECTOR):"memory")
+
+/* Macro used by non-native xnus for access to low globals when it may
+ * have moved.
+ */
+#define PAL_KDP_ADDR(x) (x)
+
+struct pal_rtc_nanotime {
+	volatile uint64_t	tsc_base;	/* timestamp */
+	volatile uint64_t	ns_base;	/* nanoseconds */
+	uint32_t		scale;		/* tsc -> nanosec multiplier */
+	uint32_t		shift;		/* tsc -> nanosec shift/div */
+						/* shift is overloaded with
+						 * lower 32bits of tsc_freq
+						 * on slower machines (SLOW_TSC_THRESHOLD) */
+	volatile uint32_t	generation;	/* 0 == being updated */
+	uint32_t		spare1;
+};
+
+
+#ifdef MACH_KERNEL_PRIVATE
+
+struct pal_cpu_data {
+
+};
+
+struct pal_pcb {
+    
+};
+
+struct pal_apic_table {
+    
+};
+
+#endif /* MACH_KERNEL_PRIVATE */
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#endif /* _I386_PAL_I386_H */
diff --git a/osfmk/i386/pal_routines.c b/osfmk/i386/pal_routines.c
new file mode 100644
index 000000000..34e5bd0a5
--- /dev/null
+++ b/osfmk/i386/pal_routines.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+/*
+ * file: pal_routines.c
+ *       Platform Abstraction Layer routines for bare-metal i386 and x86_64
+ */
+
+
+#include <kern/kern_types.h>
+#include <mach/mach_types.h>
+#include <kern/thread.h>
+#include <kern/simple_lock.h>
+
+#include <sys/kdebug.h>
+#include <machine/pal_routines.h>
+#include <i386/serial_io.h>
+#include <i386/lapic.h>
+#include <i386/proc_reg.h>
+#include <i386/misc_protos.h>
+#include <i386/machine_routines.h>
+#include <i386/pmap.h>
+
+//#define PAL_DEBUG 1
+#ifdef PAL_DEBUG
+#define DBG(x...)       kprintf("PAL_DBG: " x)
+#else
+#define DBG(x...)
+#endif /* PAL_DEBUG */
+
+extern void *gPEEFIRuntimeServices;
+extern void *gPEEFISystemTable;
+
+/* nanotime conversion information */
+pal_rtc_nanotime_t pal_rtc_nanotime_info = {0,0,0,0,1,0};
+
+/* APIC kext may use this to access xnu internal state */
+struct pal_apic_table *apic_table = NULL;
+
+decl_simple_lock_data(static , pal_efi_lock);
+#ifdef __x86_64__
+#define PML4_PROT	(INTEL_PTE_VALID | INTEL_PTE_WRITE)
+#define INIT_PDPT_BASE	(INITPT_SEG_BASE + PAGE_SIZE)
+static pml4_entry_t IDPML4[PTE_PER_PAGE] __attribute__ ((aligned (4096))) = {
+	[0]		    = (uint64_t)(INIT_PDPT_BASE | PML4_PROT),
+	[KERNEL_PML4_INDEX] = (uint64_t)(INIT_PDPT_BASE | PML4_PROT),
+};
+uint64_t	pal_efi_saved_cr0;
+uint64_t	pal_efi_saved_cr3;
+#endif
+
+
+/* Serial routines */
+int
+pal_serial_init(void)
+{
+	return serial_init();
+}
+
+void
+pal_serial_putc(char c)
+{
+	serial_putc(c);
+}
+
+int
+pal_serial_getc(void)
+{
+	return serial_getc();
+}
+
+
+/* Generic routines */
+void
+pal_i386_init(void)
+{
+    simple_lock_init(&pal_efi_lock, 0);
+}
+
+void
+pal_get_control_registers( pal_cr_t *cr0, pal_cr_t *cr2, 
+			   pal_cr_t *cr3, pal_cr_t *cr4 )
+{
+	*cr0 = get_cr0();
+	*cr2 = get_cr2();
+	*cr3 = get_cr3_raw();
+	*cr4 = get_cr4();
+}
+
+
+/*
+ * define functions below here to ensure we have symbols for these,
+ * even though they're not used on this platform.
+ */
+#undef pal_dbg_page_fault
+void
+pal_dbg_page_fault( thread_t thread __unused,
+		    user_addr_t vaddr __unused,
+		    kern_return_t kr __unused )
+{
+}
+
+#undef pal_dbg_set_task_name
+void
+pal_dbg_set_task_name( task_t task __unused )
+{
+}
+
+#undef pal_set_signal_delivery
+void
+pal_set_signal_delivery(thread_t thread __unused)
+{
+}
+
+/* EFI thunks */
+extern void
+_pal_efi_call_in_64bit_mode_asm(uint64_t func,
+                                struct pal_efi_registers *efi_reg,
+                                void *stack_contents,
+                                size_t stack_contents_size);
+
+kern_return_t
+pal_efi_call_in_64bit_mode(uint64_t func,
+                           struct pal_efi_registers *efi_reg,
+                           void *stack_contents,
+                           size_t stack_contents_size, /* 16-byte multiple */
+                           uint64_t *efi_status)
+{
+    DBG("pal_efi_call_in_64bit_mode(0x%016llx, %p, %p, %lu, %p)\n",
+        func, efi_reg, stack_contents, stack_contents_size, efi_status);
+
+    if (func == 0) {
+        return KERN_INVALID_ADDRESS;
+    }
+
+    if ((efi_reg == NULL)
+        || (stack_contents == NULL)
+        || (stack_contents_size % 16 != 0)) {
+        return KERN_INVALID_ARGUMENT;
+    }
+
+    if (!gPEEFISystemTable || !gPEEFIRuntimeServices) {
+        return KERN_NOT_SUPPORTED;
+    }
+
+    _pal_efi_call_in_64bit_mode_asm(func,
+                                    efi_reg,
+                                    stack_contents,
+                                    stack_contents_size);
+
+    *efi_status = efi_reg->rax;
+
+    return KERN_SUCCESS;
+}
+
+extern void
+_pal_efi_call_in_32bit_mode_asm(uint32_t func,
+                                struct pal_efi_registers *efi_reg,
+                                void *stack_contents,
+                                size_t stack_contents_size);
+
+kern_return_t
+pal_efi_call_in_32bit_mode(uint32_t func,
+                           struct pal_efi_registers *efi_reg,
+                           void *stack_contents,
+                           size_t stack_contents_size, /* 16-byte multiple */
+                           uint32_t *efi_status)
+{
+    DBG("pal_efi_call_in_32bit_mode(0x%08x, %p, %p, %lu, %p)\n",
+	func, efi_reg, stack_contents, stack_contents_size, efi_status);
+
+    if (func == 0) {
+        return KERN_INVALID_ADDRESS;
+    }
+
+    if ((efi_reg == NULL)
+        || (stack_contents == NULL)
+        || (stack_contents_size % 16 != 0)) {
+        return KERN_INVALID_ARGUMENT;
+    }
+
+    if (!gPEEFISystemTable || !gPEEFIRuntimeServices) {
+        return KERN_NOT_SUPPORTED;
+    }
+
+    DBG("pal_efi_call_in_32bit_mode() efi_reg:\n");
+    DBG("  rcx: 0x%016llx\n", efi_reg->rcx);
+    DBG("  rdx: 0x%016llx\n", efi_reg->rdx);
+    DBG("   r8: 0x%016llx\n", efi_reg->r8);
+    DBG("   r9: 0x%016llx\n", efi_reg->r9);
+    DBG("  rax: 0x%016llx\n", efi_reg->rax);
+
+    DBG("pal_efi_call_in_32bit_mode() stack:\n");
+#if PAL_DEBUG
+    size_t i;
+    for (i = 0; i < stack_contents_size; i += sizeof(uint32_t)) {
+	uint32_t *p = (uint32_t *) ((uintptr_t)stack_contents + i);
+	DBG("  %p: 0x%08x\n", p, *p);
+    } 
+#endif
+
+#ifdef __x86_64__
+    /*
+     * Ensure no interruptions.
+     * Taking a spinlock for serialization is technically unnecessary
+     * because the EFIRuntime kext should serialize.
+     */
+    boolean_t istate = ml_set_interrupts_enabled(FALSE);
+    simple_lock(&pal_efi_lock);
+
+    /*
+     * Switch to special page tables with the entire high kernel space
+     * double-mapped into the bottom 4GB.
+     *
+     * NB: We assume that all data passed exchanged with RuntimeServices is
+     * located in the 4GB of KVA based at VM_MIN_ADDRESS. In particular, kexts
+     * loaded the basement (below VM_MIN_ADDRESS) cannot pass static data.
+     * Kernel stack and heap space is OK.
+     */
+    MARK_CPU_IDLE(cpu_number());
+    pal_efi_saved_cr3 = get_cr3_raw();
+    pal_efi_saved_cr0 = get_cr0();
+    clear_ts();
+    set_cr3_raw((uint64_t) ID_MAP_VTOP(IDPML4));
+    
+    swapgs();			/* Save kernel's GS base */
+
+    /* Set segment state ready for compatibility mode */
+    set_gs(NULL_SEG);
+    set_fs(NULL_SEG);
+    set_es(KERNEL_DS);
+    set_ds(KERNEL_DS);
+    set_ss(KERNEL_DS);
+
+    _pal_efi_call_in_32bit_mode_asm(func,
+                                    efi_reg,
+                                    stack_contents,
+                                    stack_contents_size);
+    
+    /* Restore NULL segment state */
+    set_ss(NULL_SEG);
+    set_es(NULL_SEG);
+    set_ds(NULL_SEG);
+
+    swapgs();			/* Restore kernel's GS base */
+
+    /* Restore the 64-bit user GS base we just destroyed */
+    wrmsr64(MSR_IA32_KERNEL_GS_BASE,
+	    current_cpu_datap()->cpu_uber.cu_user_gs_base);
+
+    /* End of mapping games */
+    set_cr3_raw(pal_efi_saved_cr3);
+    set_cr0(pal_efi_saved_cr0);
+    MARK_CPU_ACTIVE(cpu_number());
+    
+    simple_unlock(&pal_efi_lock);
+    ml_set_interrupts_enabled(istate);
+#else
+    _pal_efi_call_in_32bit_mode_asm(func,
+                                    efi_reg,
+                                    stack_contents,
+                                    stack_contents_size);
+#endif
+
+    *efi_status = (uint32_t)efi_reg->rax;
+    DBG("pal_efi_call_in_32bit_mode() efi_status: 0x%x\n", *efi_status);
+
+    return KERN_SUCCESS;
+}
+
+/* wind-back a syscall instruction */
+void
+pal_syscall_restart(thread_t thread __unused, x86_saved_state_t *state)
+{
+	/* work out which flavour thread it is */
+	if( is_saved_state32(state) )
+	{
+		x86_saved_state32_t	*regs32;
+		regs32 = saved_state32(state);
+
+		if (regs32->cs == SYSENTER_CS || regs32->cs == SYSENTER_TF_CS)
+			regs32->eip -= 5;
+		else
+			regs32->eip -= 2;
+	}
+	else
+	{
+		x86_saved_state64_t	*regs64;
+
+		assert( is_saved_state64(state) );
+		regs64 = saved_state64(state);
+
+		/* Only one instruction for 64-bit threads */
+		regs64->isf.rip -= 2;
+	}
+
+}
+
+/* Helper function to put the machine to sleep (or shutdown) */
+
+boolean_t
+pal_machine_sleep(uint8_t type_a __unused, uint8_t type_b __unused, uint32_t bit_position __unused, 
+		uint32_t disable_mask __unused, uint32_t enable_mask __unused)
+{
+	return 0;
+}
+
+
+/* shouldn't be used on native */
+void
+pal_get_kern_regs( x86_saved_state_t *state )
+{
+	panic( "pal_get_kern_regs called. state %p\n", state );
+}
+
+void
+pal_preemption_assert(void)
+{
+}
+
+void
+hibernate_pal_prepare(void)
+{
+}
diff --git a/osfmk/i386/pal_routines.h b/osfmk/i386/pal_routines.h
new file mode 100644
index 000000000..dc59735b9
--- /dev/null
+++ b/osfmk/i386/pal_routines.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _I386_PAL_ROUTINES_H
+#define _I386_PAL_ROUTINES_H
+
+#include <stdint.h>
+#include <mach/kern_return.h>
+#include <mach/mach_types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* PAL routines exported to kexts */
+
+/*
+ * Load registers with these values. In 32-bit mode,
+ * only the low-order half is loaded (if applicable)
+ */
+struct pal_efi_registers {
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t rax;
+};
+
+/*
+ * Load registers and stack with these values before
+ * executing "call" instruction
+ */
+kern_return_t
+pal_efi_call_in_64bit_mode(uint64_t func,
+                           struct pal_efi_registers *efi_reg,
+                           void *stack_contents,
+                           size_t stack_contents_size, /* 16-byte multiple */
+                           uint64_t *efi_status);
+
+kern_return_t
+pal_efi_call_in_32bit_mode(uint32_t func,
+                           struct pal_efi_registers *efi_reg,
+                           void *stack_contents,
+                           size_t stack_contents_size, /* 16-byte multiple */
+                           uint32_t *efi_status);
+
+/* Go into ACPI sleep */
+
+boolean_t pal_machine_sleep(uint8_t type_a, 
+                            uint8_t type_b, 
+                            uint32_t bit_position, 
+                            uint32_t disable_mask, 
+                            uint32_t enable_mask);
+
+/* xnu internal PAL routines */
+#ifdef XNU_KERNEL_PRIVATE
+
+/* Define any PAL-specific types for x86 */
+#ifdef __i386__
+typedef uint32_t pal_cr_t;
+#else
+typedef uint64_t pal_cr_t;
+#endif
+
+struct pal_cpu_data; /* Defined per-platform */
+struct pal_pcb; /* Defined per-platform */
+struct pal_apic_table; /* Defined per-platform */
+
+/* For use by APIC kext */
+extern struct pal_apic_table *apic_table;
+    
+/* serial / debug output routines */
+extern int  pal_serial_init(void);
+extern void pal_serial_putc(char);
+extern int  pal_serial_getc(void);
+
+/* Generic I386 PAL functions go here */
+extern void pal_i386_init(void);
+extern void pal_set_signal_delivery(thread_t);
+
+/* Get values for cr0..4 */
+extern void pal_get_control_registers( pal_cr_t *cr0, pal_cr_t *cr2, 
+				       pal_cr_t *cr3, pal_cr_t *cr4 );
+
+/* Debug hook invoked in the page-fault path */
+extern void pal_dbg_page_fault( thread_t thread, user_addr_t vadddr, 
+				kern_return_t kr );
+
+/* Set a task's name in the platform kernel debugger */
+extern void pal_dbg_set_task_name( task_t task );
+
+/* wind-back to the start of a system call */
+void pal_syscall_restart(thread_t thread, x86_saved_state_t *state);
+
+/* Hook for non-vfork exec */
+void pal_execve_return(thread_t thread);
+
+/* Called by thread_terminate_self() */
+void pal_thread_terminate_self(thread_t thread);
+
+/* Called by ast_check() */
+void pal_ast_check(thread_t thread);
+
+/* Called by sync_iss_to_iks */
+extern void pal_get_kern_regs( x86_saved_state_t *state );
+
+/* Called by load_machfile */
+void pal_switch_pmap(thread_t, pmap_t, boolean_t);
+
+/*
+ * Platform-specific hlt/sti.
+ */ 
+extern void pal_hlt(void);
+extern void pal_sti(void);
+extern void pal_cli(void);
+
+/*
+ * Mark in-memory thread register cache state validity.
+ */
+typedef enum { DIRTY, VALID } pal_cache_state_t;
+void pal_register_cache_state(thread_t thread, pal_cache_state_t state);
+
+
+/* Catch code running on the except thread that shouldn't be */
+void pal_preemption_assert(void);
+
+void hibernate_pal_prepare(void);
+void pal_efi_hibernate_prepare(void);
+
+/* Include a PAL-specific header, too, for xnu-internal overrides */
+#include <i386/pal_native.h>
+
+
+/* Allow for tricky IOKit property matching */
+#define PAL_AICPM_PROPERTY_NAME "intel_cpupm_matching"
+static inline void 
+pal_get_resource_property(const char **property_name, int *property_value)
+{
+	*property_name = PAL_AICPM_PROPERTY_NAME;
+	*property_value = PAL_AICPM_PROPERTY_VALUE;
+}
+
+/* assembly function to update TSC / timebase info */
+extern void _pal_rtc_nanotime_store(
+	uint64_t		tsc,
+	uint64_t		nsec,
+	uint32_t		scale,
+	uint32_t		shift,
+	struct pal_rtc_nanotime	*dst);
+
+/* global nanotime info */
+extern struct pal_rtc_nanotime pal_rtc_nanotime_info;
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _I386_PAL_ROUTINES_H */
diff --git a/osfmk/i386/pal_routines_asm.s b/osfmk/i386/pal_routines_asm.s
new file mode 100644
index 000000000..0c4089af9
--- /dev/null
+++ b/osfmk/i386/pal_routines_asm.s
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+ 
+#include <i386/asm.h>
+#include <i386/asm64.h>
+       
+#include <assym.s>
+
+/*
+ * Copy "count" bytes from "src" to %esp, using
+ * "tmpindex" for a scratch counter and %eax
+ */
+#define COPY_STACK(src, count, tmpindex) \
+	mov	$0, tmpindex	/* initial scratch counter */ ; \
+1: \
+	mov	0(src,tmpindex,1), %eax	 /* copy one 32-bit word from source... */ ; \
+	mov	%eax, 0(%esp,tmpindex,1) /* ... to stack */ ; \
+	add	$4, tmpindex		 /* increment counter */ ; \
+	cmp	count, tmpindex		 /* exit it stack has been copied */ ; \
+	jne 1b
+	
+/*
+	void
+	pal_efi_call_in_64bit_mode_asm(uint64_t func,
+	                           struct pal_efi_registers *efi_reg,
+	                           void *stack_contents,
+	                           size_t stack_contents_size)
+
+	* Switch from compatibility mode to long mode, and
+	* then execute the function pointer with the specified
+	* register and stack contents (based at %rsp). Afterwards,
+	* collect the return value, restore the original state,
+	* and return.
+*/
+ENTRY(_pal_efi_call_in_64bit_mode_asm)
+	FRAME
+
+	/* save non-volatile registers */
+	push	%ebx
+	push	%esi
+	push	%edi
+
+	sub	$12, %esp	/* align to 16-byte boundary */
+	mov	16(%ebp), %esi	/* load efi_reg into %esi */
+	mov	20(%ebp), %edx	/* load stack_contents into %edx */
+	mov	24(%ebp), %ecx	/* load s_c_s into %ecx */
+	sub	%ecx, %esp	/* make room for stack contents */
+
+	COPY_STACK(%edx, %ecx, %edi)
+	
+	ENTER_64BIT_MODE()
+
+	/* load efi_reg into real registers */
+	mov	0(%rsi),  %rcx
+	mov	8(%rsi),  %rdx
+	mov	16(%rsi), %r8
+	mov	24(%rsi), %r9
+	mov	32(%rsi), %rax
+
+	mov	8(%rbp), %rdi		/* load func pointer */
+	call	*%rdi			/* call EFI runtime */
+
+	mov	16(%rbp), %esi		/* load efi_reg into %esi */
+	mov	%rax, 32(%rsi)		/* save RAX back */
+
+	ENTER_COMPAT_MODE()
+
+	add	24(%ebp), %esp	/* discard stack contents */
+	add	$12, %esp	/* restore stack pointer */
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+
+	EMARF
+	ret
+
+/*
+	void
+	pal_efi_call_in_32bit_mode_asm(uint32_t func,
+	                           struct pal_efi_registers *efi_reg,
+	                           void *stack_contents,
+	                           size_t stack_contents_size)
+*/
+ENTRY(_pal_efi_call_in_32bit_mode_asm)
+	FRAME
+
+	/* save non-volatile registers */
+	push	%ebx
+	push	%esi
+	push	%edi
+
+	sub	$12, %esp	/* align to 16-byte boundary */
+	mov	12(%ebp), %esi	/* load efi_reg into %esi */
+	mov	16(%ebp), %edx	/* load stack_contents into %edx */
+	mov	20(%ebp), %ecx	/* load s_c_s into %ecx */
+	sub	%ecx, %esp	/* make room for stack contents */
+
+	COPY_STACK(%edx, %ecx, %edi)
+	
+	/* load efi_reg into real registers */
+	mov	0(%esi),  %ecx
+	mov	8(%esi),  %edx
+	mov	32(%esi), %eax
+
+	mov	8(%ebp), %edi		/* load func pointer */
+	call	*%edi			/* call EFI runtime */
+
+	mov	12(%ebp), %esi		/* load efi_reg into %esi */
+	mov	%eax, 32(%esi)		/* save RAX back */
+	movl	$0, 36(%esi)		/* zero out high bits of RAX */
+
+	add	20(%ebp), %esp	/* discard stack contents */
+	add	$12, %esp	/* restore stack pointer */
+
+	pop	%edi
+	pop	%esi
+	pop	%ebx
+
+	EMARF
+	ret
+
+
+/* void             _rtc_nanotime_store(uint64_t                tsc,
+	                                uint64_t                nsec,
+	                                uint32_t                scale,
+	                                uint32_t                shift,
+	                                rtc_nanotime_t  *dst) ;
+*/
+
+ENTRY(_pal_rtc_nanotime_store)
+	push		%ebp
+	movl		%esp,%ebp
+	push		%esi
+
+	mov		32(%ebp),%edx				/* get ptr to rtc_nanotime_info */
+		
+	movl		RNT_GENERATION(%edx),%esi		/* get current generation */
+	movl		$0,RNT_GENERATION(%edx)			/* flag data as being updated */
+
+	mov		8(%ebp),%eax
+	mov		%eax,RNT_TSC_BASE(%edx)
+	mov		12(%ebp),%eax
+	mov		%eax,RNT_TSC_BASE+4(%edx)
+
+	mov		24(%ebp),%eax
+	mov		%eax,RNT_SCALE(%edx)
+
+	mov		28(%ebp),%eax
+	mov		%eax,RNT_SHIFT(%edx)
+
+	mov		16(%ebp),%eax
+	mov		%eax,RNT_NS_BASE(%edx)
+	mov		20(%ebp),%eax
+	mov		%eax,RNT_NS_BASE+4(%edx)
+		
+	incl		%esi					/* next generation */
+	jnz		1f
+	incl		%esi					/* skip 0, which is a flag */
+1:	movl		%esi,RNT_GENERATION(%edx)		/* update generation and make usable */
+
+	pop		%esi
+	pop		%ebp
+
+	ret
+
+
diff --git a/osfmk/ppc/mp.h b/osfmk/i386/pal_rtclock_asm.h
similarity index 86%
rename from osfmk/ppc/mp.h
rename to osfmk/i386/pal_rtclock_asm.h
index 9b2dde5a2..69070bebb 100644
--- a/osfmk/ppc/mp.h
+++ b/osfmk/i386/pal_rtclock_asm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,14 +25,9 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_PPC_MP_H_
-#define	_PPC_MP_H_
+#ifndef _I386_PAL_RTCLOCK_ASM_H
+#define _I386_PAL_RTCLOCK_ASM_H
 
-#include <cpus.h>
-#include <mach_kdb.h>
+#include <i386/rtclock_asm_native.h>
 
-#endif	/* _PPC_MP_H_ */
+#endif /* _I386_PAL_RTCLOCK_ASM_H */
diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c
index 421cc3f53..caf0c68db 100644
--- a/osfmk/i386/pcb.c
+++ b/osfmk/i386/pcb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -87,21 +87,16 @@
 #include <i386/cpu_number.h>
 #include <i386/eflags.h>
 #include <i386/proc_reg.h>
-#include <i386/tss.h>
-#include <i386/user_ldt.h>
 #include <i386/fpu.h>
-#include <i386/mp_desc.h>
 #include <i386/misc_protos.h>
+#include <i386/mp_desc.h>
 #include <i386/thread.h>
 #if defined(__i386__)
 #include <i386/fpu.h>
 #endif
-#include <i386/seg.h>
 #include <i386/machine_routines.h>
 #include <i386/lapic.h> /* LAPIC_PMC_SWI_VECTOR */
 
-#include <machine/commpage.h>
-
 #if CONFIG_COUNTERS
 #include <pmc/pmc.h>
 #endif /* CONFIG_COUNTERS */
@@ -134,9 +129,6 @@ zone_t		ids_zone;		/* zone for debug_state area */
 
 /* Forward */
 
-void		act_machine_throughcall(thread_t thr_act);
-void		act_machine_return(int);
-
 extern void		Thread_continue(void);
 extern void		Load_context(
 				thread_t			thread);
@@ -185,7 +177,7 @@ static inline void
 pmc_swi(thread_t old, thread_t new) {
 	current_cpu_datap()->csw_old_thread = old;
 	current_cpu_datap()->csw_new_thread = new;
-	__asm__ __volatile__("int %0"::"i"(LAPIC_PMC_SWI_VECTOR):"memory");
+	pal_pmc_swi();
 }
 
 static inline void
@@ -366,7 +358,7 @@ set_debug_state32(thread_t thread, x86_debug_state32_t *ds)
 	x86_debug_state32_t *ids;
 	pcb_t pcb;
 
-	pcb = thread->machine.pcb;
+	pcb = THREAD_TO_PCB(thread);
 	ids = pcb->ids;
 
 	if (debug_state_is_valid32(ds) != TRUE) {
@@ -400,7 +392,7 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds)
 	x86_debug_state64_t *ids;
 	pcb_t pcb;
 
-	pcb = thread->machine.pcb;
+	pcb = THREAD_TO_PCB(thread);
 	ids = pcb->ids;
 
 	if (debug_state_is_valid64(ds) != TRUE) {
@@ -432,7 +424,7 @@ get_debug_state32(thread_t thread, x86_debug_state32_t *ds)
 {
 	x86_debug_state32_t *saved_state;
 
-	saved_state = thread->machine.pcb->ids;
+	saved_state = thread->machine.ids;
 
 	if (saved_state) {
 		copy_debug_state32(saved_state, ds, TRUE);
@@ -445,7 +437,7 @@ get_debug_state64(thread_t thread, x86_debug_state64_t *ds)
 {
 	x86_debug_state64_t *saved_state;
 
-	saved_state = (x86_debug_state64_t *)thread->machine.pcb->ids;
+	saved_state = (x86_debug_state64_t *)thread->machine.ids;
 
 	if (saved_state) {
 		copy_debug_state64(saved_state, ds, TRUE);
@@ -467,318 +459,6 @@ void
 consider_machine_adjust(void)
 {
 }
-extern void *get_bsduthreadarg(thread_t th);
-
-#if defined(__x86_64__)
-static void
-act_machine_switch_pcb( thread_t new )
-{
-        pcb_t			pcb = new->machine.pcb;
-	struct real_descriptor	*ldtp;
-	mach_vm_offset_t	pcb_stack_top;
-	cpu_data_t              *cdp = current_cpu_datap();
-
-	assert(new->kernel_stack != 0);
-
-	if (!cpu_mode_is64bit()) {
-		panic("K64 is 64bit!");
-	} else if (is_saved_state64(pcb->iss)) {
-		/*
-		 * The test above is performed against the thread save state
-		 * flavor and not task's 64-bit feature flag because of the
-		 * thread/task 64-bit state divergence that can arise in
-		 * task_set_64bit() x86: the task state is changed before
-		 * the individual thread(s).
-		 */
-	        x86_saved_state64_tagged_t	*iss64;
-		vm_offset_t			isf;
-
-		assert(is_saved_state64(pcb->iss));
-						   
-		iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
-	
-		/*
-		 * Set pointer to PCB's interrupt stack frame in cpu data.
-		 * Used by syscall and double-fault trap handlers.
-		 */
-		isf = (vm_offset_t) &iss64->state.isf;
-		cdp->cpu_uber.cu_isf = isf;
-		pcb_stack_top = (vm_offset_t) (iss64 + 1);
-		/* require 16-byte alignment */
-		assert((pcb_stack_top & 0xF) == 0);
-
-		/* Interrupt stack is pcb */
-		current_ktss64()->rsp0 = pcb_stack_top;
-
-		/*
-		 * Top of temporary sysenter stack points to pcb stack.
-		 * Although this is not normally used by 64-bit users,
-		 * it needs to be set in case a sysenter is attempted.
-		 */
-		*current_sstk64() = pcb_stack_top;
-
-		cdp->cpu_task_map = new->map->pmap->pm_task_map; 
-
-		/*
-		 * Enable the 64-bit user code segment, USER64_CS.
-		 * Disable the 32-bit user code segment, USER_CS.
-		 */
-		ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
-		ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
-
-		/*
-		 * Switch user's GS base if necessary
-		 * by setting the Kernel GS base MSR
-		 * - this will become the user's on the swapgs when
-		 * returning to user-space. Avoid this for
-		 * kernel threads (no user TLS support required)
-		 * and verify the memory shadow of the segment base
-		 * in the event it was altered in user space.
-		 */
-		if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
-			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
-				cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-				wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
-			}
-		}
-	} else {
-		x86_saved_state_compat32_t	*iss32compat;
-		vm_offset_t			isf;
-
-		assert(is_saved_state32(pcb->iss));
-		iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
-
-		pcb_stack_top = (uintptr_t) (iss32compat + 1);
-		/* require 16-byte alignment */
-		assert((pcb_stack_top & 0xF) == 0);
-
-		/*
-		 * Set pointer to PCB's interrupt stack frame in cpu data.
-		 * Used by debug trap handler.
-		 */
-		isf = (vm_offset_t) &iss32compat->isf64;
-		cdp->cpu_uber.cu_isf = isf;
-
-		/* Top of temporary sysenter stack points to pcb stack */
-		*current_sstk64() = pcb_stack_top;
-
-		/* Interrupt stack is pcb */
-		current_ktss64()->rsp0 = pcb_stack_top;
-
-		cdp->cpu_task_map = TASK_MAP_32BIT;
-		/* Precalculate pointers to syscall argument store, for use
-		 * in the trampolines.
-		 */
-		cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new);
-		cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid;
-		pcb->arg_store_valid = 0;
-
-		/*
-		 * Disable USER64_CS
-		 * Enable USER_CS
-		 */
-		ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
-		ldt_desc_p(USER_CS)->access |= ACC_PL_U;
-
-		/*
-		 * Set the thread`s cthread (a.k.a pthread)
-		 * For 32-bit user this involves setting the USER_CTHREAD
-		 * descriptor in the LDT to point to the cthread data.
-		 * The involves copying in the pre-initialized descriptor.
-		 */ 
-		ldtp = (struct real_descriptor *)current_ldt();
-		ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
-		if (pcb->uldt_selector != 0)
-			ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
-		cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-
-		/*
-		 * Set the thread`s LDT or LDT entry.
-		 */
-		if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
-			/*
-			 * Use system LDT.
-			 */
-		       	ml_cpu_set_ldt(KERNEL_LDT);
-		} else {
-			/*
-			 * Task has its own LDT.
-			 */
-			user_ldt_set(new);
-		}
-	}
-
-	/*
-	 * Bump the scheduler generation count in the commpage.
-	 * This can be read by user code to detect its preemption.
-	 */
-	commpage_sched_gen_inc();
-}
-#else
-static void
-act_machine_switch_pcb( thread_t new )
-{
-        pcb_t			pcb = new->machine.pcb;
-	struct real_descriptor	*ldtp;
-	vm_offset_t		pcb_stack_top;
-	vm_offset_t		hi_pcb_stack_top;
-	vm_offset_t		hi_iss;
-	cpu_data_t              *cdp = current_cpu_datap();
-
-	assert(new->kernel_stack != 0);
-	STACK_IEL(new->kernel_stack)->saved_state = pcb->iss;
-
-	if (!cpu_mode_is64bit()) {
-		x86_saved_state32_tagged_t	*hi_iss32;
-		/*
-		 *	Save a pointer to the top of the "kernel" stack -
-		 *	actually the place in the PCB where a trap into
-		 *	kernel mode will push the registers.
-		 */
-		hi_iss = (vm_offset_t)((unsigned long)
-			pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) |
-			((unsigned long)pcb->iss & PAGE_MASK));
-
-		cdp->cpu_hi_iss = (void *)hi_iss;
-
-		pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0);
-		pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1);
-
-		hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss;
-		assert(hi_iss32->tag == x86_SAVED_STATE32);
-
-		hi_pcb_stack_top = (int) (hi_iss32 + 1);
-
-		/*
-		 * For fast syscall, top of interrupt stack points to pcb stack
-		 */
-		*(vm_offset_t *) current_sstk() = hi_pcb_stack_top;
-
-		current_ktss()->esp0 = hi_pcb_stack_top;
-
-	} else if (is_saved_state64(pcb->iss)) {
-		/*
-		 * The test above is performed against the thread save state
-		 * flavor and not task's 64-bit feature flag because of the
-		 * thread/task 64-bit state divergence that can arise in
-		 * task_set_64bit() x86: the task state is changed before
-		 * the individual thread(s).
-		 */
-	        x86_saved_state64_tagged_t	*iss64;
-		vm_offset_t			isf;
-
-		assert(is_saved_state64(pcb->iss));
-						   
-		iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
-	
-		/*
-		 * Set pointer to PCB's interrupt stack frame in cpu data.
-		 * Used by syscall and double-fault trap handlers.
-		 */
-		isf = (vm_offset_t) &iss64->state.isf;
-		cdp->cpu_uber.cu_isf = UBER64(isf);
-		pcb_stack_top = (vm_offset_t) (iss64 + 1);
-		/* require 16-byte alignment */
-		assert((pcb_stack_top & 0xF) == 0);
-		/* Interrupt stack is pcb */
-		current_ktss64()->rsp0 = UBER64(pcb_stack_top);
-
-		/*
-		 * Top of temporary sysenter stack points to pcb stack.
-		 * Although this is not normally used by 64-bit users,
-		 * it needs to be set in case a sysenter is attempted.
-		 */
-		*current_sstk64() = UBER64(pcb_stack_top);
-
-		cdp->cpu_task_map = new->map->pmap->pm_task_map; 
-
-		/*
-		 * Enable the 64-bit user code segment, USER64_CS.
-		 * Disable the 32-bit user code segment, USER_CS.
-		 */
-		ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
-		ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
-
-	} else {
-		x86_saved_state_compat32_t	*iss32compat;
-		vm_offset_t			isf;
-
-		assert(is_saved_state32(pcb->iss));
-		iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
-
-		pcb_stack_top = (int) (iss32compat + 1);
-		/* require 16-byte alignment */
-		assert((pcb_stack_top & 0xF) == 0);
-
-		/*
-		 * Set pointer to PCB's interrupt stack frame in cpu data.
-		 * Used by debug trap handler.
-		 */
-		isf = (vm_offset_t) &iss32compat->isf64;
-		cdp->cpu_uber.cu_isf = UBER64(isf);
-
-		/* Top of temporary sysenter stack points to pcb stack */
-		*current_sstk64() = UBER64(pcb_stack_top);
-
-		/* Interrupt stack is pcb */
-		current_ktss64()->rsp0 = UBER64(pcb_stack_top);
-
-		cdp->cpu_task_map = TASK_MAP_32BIT;
-		/* Precalculate pointers to syscall argument store, for use
-		 * in the trampolines.
-		 */
-		cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new));
-		cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid);
-		pcb->arg_store_valid = 0;
-
-		/*
-		 * Disable USER64_CS
-		 * Enable USER_CS
-		 */
-		ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
-		ldt_desc_p(USER_CS)->access |= ACC_PL_U;
-	}
-
-	/*
-	 * Set the thread`s cthread (a.k.a pthread)
-	 * For 32-bit user this involves setting the USER_CTHREAD
-	 * descriptor in the LDT to point to the cthread data.
-	 * The involves copying in the pre-initialized descriptor.
-	 */ 
-	ldtp = (struct real_descriptor *)current_ldt();
-	ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
-	if (pcb->uldt_selector != 0)
-		ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
-
-
-	/*
-	 * For 64-bit, we additionally set the 64-bit User GS base
-	 * address. On return to 64-bit user, the GS.Base MSR will be written.
-	 */
-	cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-
-	/*
-	 * Set the thread`s LDT or LDT entry.
-	 */
-	if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
-		/*
-		 * Use system LDT.
-		 */
-	       	ml_cpu_set_ldt(KERNEL_LDT);
-	} else {
-		/*
-		 * Task has its own LDT.
-		 */
-		user_ldt_set(new);
-	}
-
-	/*
-	 * Bump the scheduler generation count in the commpage.
-	 * This can be read by user code to detect its preemption.
-	 */
-	commpage_sched_gen_inc();
-}
-#endif
 
 /*
  * Switch to the first thread on a CPU.
@@ -791,7 +471,7 @@ machine_load_context(
 	machine_pmc_cswitch(NULL, new);
 #endif
 	new->machine.specFlags |= OnProc;
-	act_machine_switch_pcb(new);
+	act_machine_switch_pcb(NULL, new);
 	Load_context(new);
 }
 
@@ -817,7 +497,6 @@ machine_switch_context(
 	 */
 	fpu_save_context(old);
 
-
 	old->machine.specFlags &= ~OnProc;
 	new->machine.specFlags |= OnProc;
 
@@ -837,12 +516,12 @@ machine_switch_context(
 	 *	Switch address maps if need be, even if not switching tasks.
 	 *	(A server activation may be "borrowing" a client map.)
 	 */
-	PMAP_SWITCH_CONTEXT(old, new, cpu_number())
+	PMAP_SWITCH_CONTEXT(old, new, cpu_number());
 
 	/*
 	 *	Load the rest of the user state for the new thread
 	 */
-	act_machine_switch_pcb(new);
+	act_machine_switch_pcb(old, new);
 
 	return(Switch_context(old, continuation, new));
 }
@@ -861,16 +540,6 @@ machine_processor_shutdown(
 	return(Shutdown_context(thread, doshutdown, processor));
 }
 
-/*
- * act_machine_sv_free
- * release saveareas associated with an act.  if flag is true, release
- * user level savearea(s) too, else don't
- */
-void
-act_machine_sv_free(__unused thread_t act, __unused int flag)
-{
-}
-
 
 /*
  * This is where registers that are not normally specified by the mach-o
@@ -885,16 +554,16 @@ machine_thread_state_initialize(
      * The initialized state will then be lazily faulted-in, if required.
      * And if we're target, re-arm the no-fpu trap.
      */
-	if (thread->machine.pcb->ifps) {
+	if (thread->machine.ifps) {
 		(void) fpu_set_fxstate(thread, NULL, x86_FLOAT_STATE64);
 
 		if (thread == current_thread())
 			clear_fpu();
 	}
 
-	if (thread->machine.pcb->ids) {
-		zfree(ids_zone, thread->machine.pcb->ids);
-		thread->machine.pcb->ids = NULL;
+	if (thread->machine.ids) {
+		zfree(ids_zone, thread->machine.ids);
+		thread->machine.ids = NULL;
 	}
 
 	return  KERN_SUCCESS;
@@ -940,6 +609,7 @@ get_exception_state64(thread_t thread, x86_exception_state64_t *es)
         saved_state = USER_REGS64(thread);
 
 	es->trapno = saved_state->isf.trapno;
+	es->cpu = saved_state->isf.cpu;
 	es->err = (typeof(es->err))saved_state->isf.err;
 	es->faultvaddr = saved_state->cr2;
 }		
@@ -952,6 +622,7 @@ get_exception_state32(thread_t thread, x86_exception_state32_t *es)
         saved_state = USER_REGS32(thread);
 
 	es->trapno = saved_state->trapno;
+	es->cpu = saved_state->cpu;
 	es->err = saved_state->err;
 	es->faultvaddr = saved_state->cr2;
 }		
@@ -962,6 +633,7 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts)
 {
         x86_saved_state32_t	*saved_state;
 
+	pal_register_cache_state(thread, DIRTY);
 
 	saved_state = USER_REGS32(thread);
 
@@ -1027,6 +699,7 @@ set_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 {
         x86_saved_state64_t	*saved_state;
 
+	pal_register_cache_state(thread, DIRTY);
 
 	saved_state = USER_REGS64(thread);
 
@@ -1066,6 +739,7 @@ get_thread_state32(thread_t thread, x86_thread_state32_t *ts)
 {
         x86_saved_state32_t	*saved_state;
 
+	pal_register_cache_state(thread, VALID);
 
 	saved_state = USER_REGS32(thread);
 
@@ -1093,6 +767,7 @@ get_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 {
         x86_saved_state64_t	*saved_state;
 
+	pal_register_cache_state(thread, VALID);
 
 	saved_state = USER_REGS64(thread);
 
@@ -1120,87 +795,6 @@ get_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 }
 
 
-void
-thread_set_wq_state32(thread_t thread, thread_state_t tstate)
-{
-        x86_thread_state32_t	*state;
-        x86_saved_state32_t	*saved_state;
-	thread_t curth = current_thread();
-	spl_t			s=0;
-
-
-	saved_state = USER_REGS32(thread);
-
-	state = (x86_thread_state32_t *)tstate;
-	
-	if (curth != thread) {
-		s = splsched();
-	        thread_lock(thread);
-	}
-
-	saved_state->ebp = 0;
-	saved_state->eip = state->eip;
-	saved_state->eax = state->eax;
-	saved_state->ebx = state->ebx;
-	saved_state->ecx = state->ecx;
-	saved_state->edx = state->edx;
-	saved_state->edi = state->edi;
-	saved_state->esi = state->esi;
-	saved_state->uesp = state->esp;
-	saved_state->efl = EFL_USER_SET;
-
-	saved_state->cs = USER_CS;
-	saved_state->ss = USER_DS;
-	saved_state->ds = USER_DS;
-	saved_state->es = USER_DS;
-
-
-	if (curth != thread) {
-	        thread_unlock(thread);
-		splx(s);
-	}
-}
-
-
-void
-thread_set_wq_state64(thread_t thread, thread_state_t tstate)
-{
-        x86_thread_state64_t	*state;
-        x86_saved_state64_t	*saved_state;
-	thread_t curth = current_thread();
-	spl_t			s=0;
-
-
-	saved_state = USER_REGS64(thread);
-	state = (x86_thread_state64_t *)tstate;
-	
-	if (curth != thread) {
-		s = splsched();
-	        thread_lock(thread);
-	}
-
-	saved_state->rbp = 0;
-	saved_state->rdi = state->rdi;
-	saved_state->rsi = state->rsi;
-	saved_state->rdx = state->rdx;
-	saved_state->rcx = state->rcx;
-	saved_state->r8  = state->r8;
-	saved_state->r9  = state->r9;
-
-	saved_state->isf.rip = state->rip;
-	saved_state->isf.rsp = state->rsp;
-	saved_state->isf.cs = USER64_CS;
-	saved_state->isf.rflags = EFL_USER_SET;
-
-
-	if (curth != thread) {
-	        thread_unlock(thread);
-		splx(s);
-	}
-}
-
-
-
 /*
  *	act_machine_set_state:
  *
@@ -1237,6 +831,7 @@ machine_thread_set_state(
 					state->gs))
 			return KERN_INVALID_ARGUMENT;
 
+		pal_register_cache_state(thr_act, DIRTY);
 
 		saved_state = USER_REGS32(thr_act);
 
@@ -1307,6 +902,7 @@ machine_thread_set_state(
 		    !IS_USERADDR64_CANONICAL(state->isf.rip))
 			return KERN_INVALID_ARGUMENT;
 
+		pal_register_cache_state(thr_act, DIRTY);
 
 		saved_state = USER_REGS64(thr_act);
 
@@ -1757,6 +1353,11 @@ machine_thread_get_state(
 		*count = x86_EXCEPTION_STATE32_COUNT;
 
 		get_exception_state32(thr_act, (x86_exception_state32_t *)tstate);
+		/*
+		 * Suppress the cpu number for binary compatibility
+		 * of this deprecated state.
+		 */
+		((x86_exception_state32_t *)tstate)->cpu = 0;
 		break;
 	    }
 
@@ -1771,6 +1372,11 @@ machine_thread_get_state(
 		*count = x86_EXCEPTION_STATE64_COUNT;
 
 		get_exception_state64(thr_act, (x86_exception_state64_t *)tstate);
+		/*
+		 * Suppress the cpu number for binary compatibility
+		 * of this deprecated state.
+		 */
+		((x86_exception_state64_t *)tstate)->cpu = 0;
 		break;
 	    }
 
@@ -2029,156 +1635,6 @@ machine_thread_get_kern_state(
 }
 
 
-/*
- * Initialize the machine-dependent state for a new thread.
- */
-kern_return_t
-machine_thread_create(
-	thread_t		thread,
-	task_t			task)
-{
-	pcb_t			pcb = &thread->machine.xxx_pcb;
-	x86_saved_state_t	*iss;
-
-#if NCOPY_WINDOWS > 0
-	inval_copy_windows(thread);
-
-	thread->machine.physwindow_pte = 0;
-	thread->machine.physwindow_busy = 0;
-#endif
-
-	/*
-	 * Allocate pcb only if required.
-	 */
-	if (pcb->sf == NULL) {
-		pcb->sf = zalloc(iss_zone);
-		if (pcb->sf == NULL)
-			panic("iss_zone");
-	}
-
-        if (task_has_64BitAddr(task)) {
-		x86_sframe64_t		*sf64;
-
-		sf64 = (x86_sframe64_t *) pcb->sf;
-
-		bzero((char *)sf64, sizeof(x86_sframe64_t));
-
-		iss = (x86_saved_state_t *) &sf64->ssf;
-		iss->flavor = x86_SAVED_STATE64;
-		/*
-		 *      Guarantee that the bootstrapped thread will be in user
-		 *      mode.
-		 */
-		iss->ss_64.isf.rflags = EFL_USER_SET;
-		iss->ss_64.isf.cs = USER64_CS;
-		iss->ss_64.isf.ss = USER_DS;
-		iss->ss_64.fs = USER_DS;
-		iss->ss_64.gs = USER_DS;
-	} else {
-		if (cpu_mode_is64bit()) {
-			x86_sframe_compat32_t      *sfc32;
-
-			sfc32 = (x86_sframe_compat32_t *)pcb->sf;
-
-			bzero((char *)sfc32, sizeof(x86_sframe_compat32_t));
-
-			iss = (x86_saved_state_t *) &sfc32->ssf.iss32;
-			iss->flavor = x86_SAVED_STATE32;
-#if defined(__i386__)
-#if DEBUG
-			{
-				x86_saved_state_compat32_t *xssc;
-
-				xssc  = (x86_saved_state_compat32_t *) iss;
-
-				xssc->pad_for_16byte_alignment[0] = 0x64326432;
-				xssc->pad_for_16byte_alignment[1] = 0x64326432;
-			}
-#endif /* DEBUG */
-		} else {
-			x86_sframe32_t		*sf32;
-			struct real_descriptor	*ldtp;
-			pmap_paddr_t		paddr;
-
-			sf32 = (x86_sframe32_t *) pcb->sf;
-
-			bzero((char *)sf32, sizeof(x86_sframe32_t));
-
-			iss = (x86_saved_state_t *) &sf32->ssf;
-			iss->flavor = x86_SAVED_STATE32;
-			pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss));
-			if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE))))
-			        pcb->iss_pte1 = INTEL_PTE_INVALID;
-			else
-	      			pcb->iss_pte1 = pte_kernel_rw(paddr);
-
-
-			ldtp = (struct real_descriptor *)
-				    pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN);
-			pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
-			pcb->uldt_desc = ldtp[sel_idx(USER_DS)];
-#endif /* __i386__ */
-		}
-		/*
-		 *      Guarantee that the bootstrapped thread will be in user
-		 *      mode.
-		 */
-		iss->ss_32.cs = USER_CS;
-		iss->ss_32.ss = USER_DS;
-		iss->ss_32.ds = USER_DS;
-		iss->ss_32.es = USER_DS;
-		iss->ss_32.fs = USER_DS;
-		iss->ss_32.gs = USER_DS;
-		iss->ss_32.efl = EFL_USER_SET;
-
-	}
-	pcb->iss = iss;
-
-	thread->machine.pcb = pcb;
-	simple_lock_init(&pcb->lock, 0);
-
-	pcb->arg_store_valid = 0;
-	pcb->cthread_self = 0;
-	pcb->uldt_selector = 0;
-
-	/* Ensure that the "cthread" descriptor describes a valid
-	 * segment.
-	 */
-	if ((pcb->cthread_desc.access & ACC_P) == 0) {
-		struct real_descriptor	*ldtp;
-		ldtp = (struct real_descriptor *)current_ldt();
-		pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
-	}
-
-
-	return(KERN_SUCCESS);
-}
-
-/*
- * Machine-dependent cleanup prior to destroying a thread
- */
-void
-machine_thread_destroy(
-	thread_t		thread)
-{
-	register pcb_t	pcb = thread->machine.pcb;
-
-	assert(pcb);
-        
-	if (pcb->ifps != 0)
-		fpu_free(pcb->ifps);
-	if (pcb->sf != 0) {
-		zfree(iss_zone, pcb->sf);
-		pcb->sf = 0;
-	}
-	if (pcb->ids) {
-		zfree(ids_zone, pcb->ids);
-		pcb->ids = NULL;
-	}
-	thread->machine.pcb = (pcb_t)0;
-
-}
-
 void
 machine_thread_switch_addrmode(thread_t thread)
 {
@@ -2189,17 +1645,20 @@ machine_thread_switch_addrmode(thread_t thread)
 	disable_preemption();
 
 	/*
-	 * Reset the state saveareas.
+	 * Reset the state saveareas. As we're resetting, we anticipate no
+	 * memory allocations in this path.
 	 */
 	machine_thread_create(thread, thread->task);
 
 	/* If we're switching ourselves, reset the pcb addresses etc. */
 	if (thread == current_thread()) {
+		boolean_t istate = ml_set_interrupts_enabled(FALSE);
 #if defined(__i386__)
-	  if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3)
-		pmap_load_kernel_cr3();
+		if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3)
+			pmap_load_kernel_cr3();
 #endif /* defined(__i386) */
-	  act_machine_switch_pcb(thread);
+		act_machine_switch_pcb(NULL, thread);
+		ml_set_interrupts_enabled(istate);
 	}
 	enable_preemption();
 }
@@ -2238,30 +1697,6 @@ machine_thread_terminate_self(void)
 	}
 }
 
-void
-act_machine_return(
-		int code
-		)
-{
-	/*
-	 * This code is called with nothing locked.
-	 * It also returns with nothing locked, if it returns.
-	 *
-	 * This routine terminates the current thread activation.
-	 * If this is the only activation associated with its
-	 * thread shuttle, then the entire thread (shuttle plus
-	 * activation) is terminated.
-	 */
-	assert( code == KERN_TERMINATED );
-
-	thread_terminate_self();
-
-	/*NOTREACHED*/
-
-	panic("act_machine_return(%d): TALKING ZOMBIE! (1)", code);
-}
-
-
 /*
  * Perform machine-dependent per-thread initializations
  */
@@ -2329,9 +1764,6 @@ dump_handlers(thread_t thr_act)
 void
 dump_regs(thread_t thr_act)
 {
-	if (thr_act->machine.pcb == NULL)
-		return;
-
 	if (thread_is_64bit(thr_act)) {
 		x86_saved_state64_t	*ssp;
 
@@ -2371,14 +1803,14 @@ dump_act(thread_t thr_act)
 	printf("\tsusp=%d user_stop=%d active=%x ast=%x\n",
 			thr_act->suspend_count, thr_act->user_stop_count,
 			thr_act->active, thr_act->ast);
-	printf("\tpcb=%p\n", thr_act->machine.pcb);
+	printf("\tpcb=%p\n", &thr_act->machine);
 
 	if (thr_act->kernel_stack) {
 		vm_offset_t stack = thr_act->kernel_stack;
 
 		printf("\tk_stk %lx  eip %x ebx %x esp %x iss %p\n",
 			(long)stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx,
-			STACK_IKS(stack)->k_esp, STACK_IEL(stack)->saved_state);
+			STACK_IKS(stack)->k_esp, thr_act->machine.iss);
 	}
 
 	dump_handlers(thr_act);
@@ -2392,9 +1824,6 @@ get_useraddr(void)
 {
         thread_t thr_act = current_thread();
  
-	if (thr_act->machine.pcb == NULL) 
-		return(0);
-
         if (thread_is_64bit(thr_act)) {
 	        x86_saved_state64_t	*iss64;
 		
@@ -2452,11 +1881,11 @@ machine_stack_attach(
 #if defined(__x86_64__)
 	statep->k_rip = (unsigned long) Thread_continue;
 	statep->k_rbx = (unsigned long) thread_continue;
-	statep->k_rsp = (unsigned long) STACK_IEL(stack);
+	statep->k_rsp = (unsigned long) (STACK_IKS(stack) - 1);
 #else
 	statep->k_eip = (unsigned long) Thread_continue;
 	statep->k_ebx = (unsigned long) thread_continue;
-	statep->k_esp = (unsigned long) STACK_IEL(stack);
+	statep->k_esp = (unsigned long) (STACK_IKS(stack) - 1);
 #endif
 
 	return;
@@ -2494,12 +1923,11 @@ machine_stack_handoff(thread_t old,
 
 	fpu_save_context(old);
 	
-
 	old->machine.specFlags &= ~OnProc;
 	new->machine.specFlags |= OnProc;
 
 	PMAP_SWITCH_CONTEXT(old, new, cpu_number());
-	act_machine_switch_pcb(new);
+	act_machine_switch_pcb(old, new);
 
 	machine_set_current_thread(new);
 
@@ -2644,13 +2072,13 @@ void act_thread_cfree(__unused void *ctx)
 }
 void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
 void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid) {
-	thread->machine.pcb->arg_store_valid = valid;
+	thread->machine.arg_store_valid = valid;
 }
 
 boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
 
 boolean_t x86_sysenter_arg_store_isvalid(thread_t thread) {
-	return (thread->machine.pcb->arg_store_valid);
+	return (thread->machine.arg_store_valid);
 }
 
 /*
@@ -2702,4 +2130,3 @@ copy_debug_state64(
 	target->dr6 = src->dr6;
 	target->dr7 = src->dr7;
 }
-
diff --git a/osfmk/i386/pcb_native.c b/osfmk/i386/pcb_native.c
new file mode 100644
index 000000000..8ce815029
--- /dev/null
+++ b/osfmk/i386/pcb_native.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/* 
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ * 
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ * 
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach_rt.h>
+#include <mach_debug.h>
+#include <mach_ldebug.h>
+
+#include <sys/kdebug.h>
+
+#include <mach/kern_return.h>
+#include <mach/thread_status.h>
+#include <mach/vm_param.h>
+
+#include <kern/counters.h>
+#include <kern/kalloc.h>
+#include <kern/mach_param.h>
+#include <kern/processor.h>
+#include <kern/cpu_data.h>
+#include <kern/cpu_number.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/sched_prim.h>
+#include <kern/misc_protos.h>
+#include <kern/assert.h>
+#include <kern/spl.h>
+#include <kern/machine.h>
+#include <ipc/ipc_port.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/pmap.h>
+#include <vm/vm_protos.h>
+
+#include <i386/commpage/commpage.h>
+#include <i386/cpu_data.h>
+#include <i386/cpu_number.h>
+#include <i386/eflags.h>
+#include <i386/proc_reg.h>
+#include <i386/tss.h>
+#include <i386/user_ldt.h>
+#include <i386/fpu.h>
+#include <i386/mp_desc.h>
+#include <i386/misc_protos.h>
+#include <i386/thread.h>
+#if defined(__i386__)
+#include <i386/fpu.h>
+#endif
+#include <i386/seg.h>
+#include <i386/machine_routines.h>
+
+#define ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(_type_)	\
+extern char assert_is_16byte_multiple_sizeof_ ## _type_	\
+		[(sizeof(_type_) % 16) == 0 ? 1 : -1]
+
+/* Compile-time checks for vital save area sizing: */
+ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t);
+ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_sframe64_t);
+ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_compat32_t);
+ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t);
+
+#define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT)
+
+extern zone_t		iss_zone;		/* zone for saved_state area */
+extern zone_t		ids_zone;		/* zone for debug_state area */
+
+extern void *get_bsduthreadarg(thread_t);
+void
+act_machine_switch_pcb(__unused thread_t old, thread_t new)
+{
+        pcb_t			pcb = THREAD_TO_PCB(new);
+	cpu_data_t      	*cdp = current_cpu_datap();
+	struct real_descriptor	*ldtp;
+	mach_vm_offset_t	pcb_stack_top;
+
+	assert(new->kernel_stack != 0);
+	assert(ml_get_interrupts_enabled() == FALSE);
+#ifdef	DIRECTION_FLAG_DEBUG
+	if (x86_get_flags() & EFL_DF) {
+		panic("Direction flag detected: 0x%lx", x86_get_flags());
+	}
+#endif
+
+#if defined(__x86_64__)
+	/*
+	 * Clear segment state
+	 * unconditionally for DS/ES/FS but more carefully for GS whose
+	 * cached state we track.
+	 */
+	set_ds(NULL_SEG);
+	set_es(NULL_SEG);
+	set_fs(NULL_SEG);
+	if (get_gs() != NULL_SEG) {
+		swapgs();		/* switch to user's GS context */
+		set_gs(NULL_SEG);
+		swapgs();		/* and back to kernel */
+
+		/* record the active machine state lost */
+		cdp->cpu_uber.cu_user_gs_base = 0;
+	} 
+
+	if (is_saved_state64(pcb->iss)) {
+		/*
+		 * The test above is performed against the thread save state
+		 * flavor and not task's 64-bit feature flag because of the
+		 * thread/task 64-bit state divergence that can arise in
+		 * task_set_64bit() x86: the task state is changed before
+		 * the individual thread(s).
+		 */
+	        x86_saved_state64_tagged_t	*iss64;
+		vm_offset_t			isf;
+
+		assert(is_saved_state64(pcb->iss));
+						   
+		iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
+	
+		/*
+		 * Set pointer to PCB's interrupt stack frame in cpu data.
+		 * Used by syscall and double-fault trap handlers.
+		 */
+		isf = (vm_offset_t) &iss64->state.isf;
+		cdp->cpu_uber.cu_isf = isf;
+		pcb_stack_top = (vm_offset_t) (iss64 + 1);
+		/* require 16-byte alignment */
+		assert((pcb_stack_top & 0xF) == 0);
+
+		/* Interrupt stack is pcb */
+		current_ktss64()->rsp0 = pcb_stack_top;
+
+		/*
+		 * Top of temporary sysenter stack points to pcb stack.
+		 * Although this is not normally used by 64-bit users,
+		 * it needs to be set in case a sysenter is attempted.
+		 */
+		*current_sstk64() = pcb_stack_top;
+
+		cdp->cpu_task_map = new->map->pmap->pm_task_map; 
+
+		/*
+		 * Enable the 64-bit user code segment, USER64_CS.
+		 * Disable the 32-bit user code segment, USER_CS.
+		 */
+		ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
+		ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
+
+		/*
+		 * Switch user's GS base if necessary
+		 * by setting the Kernel's GS base MSR
+		 * - this will become the user's on the swapgs when
+		 * returning to user-space.  Avoid this for
+		 * kernel threads (no user TLS support required)
+		 * and verify the memory shadow of the segment base
+		 * in the event it was altered in user space.
+		 */
+		if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
+			if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
+				cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
+				wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
+			}
+		}
+	} else {
+		x86_saved_state_compat32_t	*iss32compat;
+		vm_offset_t			isf;
+
+		assert(is_saved_state32(pcb->iss));
+		iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
+
+		pcb_stack_top = (uintptr_t) (iss32compat + 1);
+		/* require 16-byte alignment */
+		assert((pcb_stack_top & 0xF) == 0);
+
+		/*
+		 * Set pointer to PCB's interrupt stack frame in cpu data.
+		 * Used by debug trap handler.
+		 */
+		isf = (vm_offset_t) &iss32compat->isf64;
+		cdp->cpu_uber.cu_isf = isf;
+
+		/* Top of temporary sysenter stack points to pcb stack */
+		*current_sstk64() = pcb_stack_top;
+
+		/* Interrupt stack is pcb */
+		current_ktss64()->rsp0 = pcb_stack_top;
+
+		cdp->cpu_task_map = TASK_MAP_32BIT;
+		/* Precalculate pointers to syscall argument store, for use
+		 * in the trampolines.
+		 */
+		cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new);
+		cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid;
+		pcb->arg_store_valid = 0;
+
+		/*
+		 * Disable USER64_CS
+		 * Enable USER_CS
+		 */
+		ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
+		ldt_desc_p(USER_CS)->access |= ACC_PL_U;
+
+		/*
+		 * Set the thread`s cthread (a.k.a pthread)
+		 * For 32-bit user this involves setting the USER_CTHREAD
+		 * descriptor in the LDT to point to the cthread data.
+		 * The involves copying in the pre-initialized descriptor.
+		 */ 
+		ldtp = (struct real_descriptor *)current_ldt();
+		ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
+		if (pcb->uldt_selector != 0)
+			ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
+		cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
+
+		/*
+		 * Set the thread`s LDT or LDT entry.
+		 */
+		if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
+			/*
+			 * Use system LDT.
+			 */
+		       	ml_cpu_set_ldt(KERNEL_LDT);
+		} else {
+			/*
+			 * Task has its own LDT.
+			 */
+			user_ldt_set(new);
+		}
+	}
+
+#else /* !__x86_64__ */
+
+	vm_offset_t		hi_pcb_stack_top;
+	vm_offset_t		hi_iss;
+
+	if (!cpu_mode_is64bit()) {
+		x86_saved_state32_tagged_t	*hi_iss32;
+		/*
+		 *	Save a pointer to the top of the "kernel" stack -
+		 *	actually the place in the PCB where a trap into
+		 *	kernel mode will push the registers.
+		 */
+		hi_iss = (vm_offset_t)((unsigned long)
+			pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) |
+			((unsigned long)pcb->iss & PAGE_MASK));
+
+		cdp->cpu_hi_iss = (void *)hi_iss;
+
+		pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0);
+		pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1);
+
+		hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss;
+		assert(hi_iss32->tag == x86_SAVED_STATE32);
+
+		hi_pcb_stack_top = (int) (hi_iss32 + 1);
+
+		/*
+		 * For fast syscall, top of interrupt stack points to pcb stack
+		 */
+		*(vm_offset_t *) current_sstk() = hi_pcb_stack_top;
+
+		current_ktss()->esp0 = hi_pcb_stack_top;
+
+	} else if (is_saved_state64(pcb->iss)) {
+		/*
+		 * The test above is performed against the thread save state
+		 * flavor and not task's 64-bit feature flag because of the
+		 * thread/task 64-bit state divergence that can arise in
+		 * task_set_64bit() x86: the task state is changed before
+		 * the individual thread(s).
+		 */
+	        x86_saved_state64_tagged_t	*iss64;
+		vm_offset_t			isf;
+
+		assert(is_saved_state64(pcb->iss));
+						   
+		iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
+	
+		/*
+		 * Set pointer to PCB's interrupt stack frame in cpu data.
+		 * Used by syscall and double-fault trap handlers.
+		 */
+		isf = (vm_offset_t) &iss64->state.isf;
+		cdp->cpu_uber.cu_isf = UBER64(isf);
+		pcb_stack_top = (vm_offset_t) (iss64 + 1);
+		/* require 16-byte alignment */
+		assert((pcb_stack_top & 0xF) == 0);
+		/* Interrupt stack is pcb */
+		current_ktss64()->rsp0 = UBER64(pcb_stack_top);
+
+		/*
+		 * Top of temporary sysenter stack points to pcb stack.
+		 * Although this is not normally used by 64-bit users,
+		 * it needs to be set in case a sysenter is attempted.
+		 */
+		*current_sstk64() = UBER64(pcb_stack_top);
+
+		cdp->cpu_task_map = new->map->pmap->pm_task_map; 
+
+		/*
+		 * Enable the 64-bit user code segment, USER64_CS.
+		 * Disable the 32-bit user code segment, USER_CS.
+		 */
+		ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
+		ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
+
+	} else {
+		x86_saved_state_compat32_t	*iss32compat;
+		vm_offset_t			isf;
+
+		assert(is_saved_state32(pcb->iss));
+		iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
+
+		pcb_stack_top = (int) (iss32compat + 1);
+		/* require 16-byte alignment */
+		assert((pcb_stack_top & 0xF) == 0);
+
+		/*
+		 * Set pointer to PCB's interrupt stack frame in cpu data.
+		 * Used by debug trap handler.
+		 */
+		isf = (vm_offset_t) &iss32compat->isf64;
+		cdp->cpu_uber.cu_isf = UBER64(isf);
+
+		/* Top of temporary sysenter stack points to pcb stack */
+		*current_sstk64() = UBER64(pcb_stack_top);
+
+		/* Interrupt stack is pcb */
+		current_ktss64()->rsp0 = UBER64(pcb_stack_top);
+
+		cdp->cpu_task_map = TASK_MAP_32BIT;
+		/* Precalculate pointers to syscall argument store, for use
+		 * in the trampolines.
+		 */
+		cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new));
+		cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid);
+		pcb->arg_store_valid = 0;
+
+		/*
+		 * Disable USER64_CS
+		 * Enable USER_CS
+		 */
+		ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
+		ldt_desc_p(USER_CS)->access |= ACC_PL_U;
+	}
+
+	/*
+	 * Set the thread`s cthread (a.k.a pthread)
+	 * For 32-bit user this involves setting the USER_CTHREAD
+	 * descriptor in the LDT to point to the cthread data.
+	 * The involves copying in the pre-initialized descriptor.
+	 */ 
+	ldtp = (struct real_descriptor *)current_ldt();
+	ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
+	if (pcb->uldt_selector != 0)
+		ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
+
+	/*
+	 * For 64-bit, we additionally set the 64-bit User GS base
+	 * address. On return to 64-bit user, the GS.Base MSR will be written.
+	 */
+	cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
+
+	/*
+	 * Set the thread`s LDT or LDT entry.
+	 */
+	if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
+		/*
+		 * Use system LDT.
+		 */
+	       	ml_cpu_set_ldt(KERNEL_LDT);
+	} else {
+		/*
+		 * Task has its own LDT.
+		 */
+		user_ldt_set(new);
+	}
+#endif
+
+	/*
+	 * Bump the scheduler generation count in the commpage.
+	 * This can be read by user code to detect its preemption.
+	 */
+	commpage_sched_gen_inc();
+}
+void
+thread_set_wq_state32(thread_t thread, thread_state_t tstate)
+{
+        x86_thread_state32_t	*state;
+        x86_saved_state32_t	*saved_state;
+	thread_t curth = current_thread();
+	spl_t			s=0;
+
+	pal_register_cache_state(thread, DIRTY);
+
+	saved_state = USER_REGS32(thread);
+
+	state = (x86_thread_state32_t *)tstate;
+	
+	if (curth != thread) {
+		s = splsched();
+	        thread_lock(thread);
+	}
+
+	saved_state->ebp = 0;
+	saved_state->eip = state->eip;
+	saved_state->eax = state->eax;
+	saved_state->ebx = state->ebx;
+	saved_state->ecx = state->ecx;
+	saved_state->edx = state->edx;
+	saved_state->edi = state->edi;
+	saved_state->esi = state->esi;
+	saved_state->uesp = state->esp;
+	saved_state->efl = EFL_USER_SET;
+
+	saved_state->cs = USER_CS;
+	saved_state->ss = USER_DS;
+	saved_state->ds = USER_DS;
+	saved_state->es = USER_DS;
+
+	if (curth != thread) {
+	        thread_unlock(thread);
+		splx(s);
+	}
+}
+
+
+void
+thread_set_wq_state64(thread_t thread, thread_state_t tstate)
+{
+        x86_thread_state64_t	*state;
+        x86_saved_state64_t	*saved_state;
+	thread_t curth = current_thread();
+	spl_t			s=0;
+
+	pal_register_cache_state(thread, DIRTY);
+
+	saved_state = USER_REGS64(thread);
+	state = (x86_thread_state64_t *)tstate;
+	
+	if (curth != thread) {
+		s = splsched();
+	        thread_lock(thread);
+	}
+
+	saved_state->rbp = 0;
+	saved_state->rdi = state->rdi;
+	saved_state->rsi = state->rsi;
+	saved_state->rdx = state->rdx;
+	saved_state->rcx = state->rcx;
+	saved_state->r8  = state->r8;
+	saved_state->r9  = state->r9;
+
+	saved_state->isf.rip = state->rip;
+	saved_state->isf.rsp = state->rsp;
+	saved_state->isf.cs = USER64_CS;
+	saved_state->isf.rflags = EFL_USER_SET;
+
+	if (curth != thread) {
+	        thread_unlock(thread);
+		splx(s);
+	}
+}
+
+/*
+ * Initialize the machine-dependent state for a new thread.
+ */
+kern_return_t
+machine_thread_create(
+	thread_t		thread,
+	task_t			task)
+{
+        pcb_t			pcb = THREAD_TO_PCB(thread);
+	x86_saved_state_t	*iss;
+
+#if NCOPY_WINDOWS > 0
+	inval_copy_windows(thread);
+
+	thread->machine.physwindow_pte = 0;
+	thread->machine.physwindow_busy = 0;
+#endif
+
+	/*
+	 * Allocate save frame only if required.
+	 */
+	if (pcb->sf == NULL) {
+		assert((get_preemption_level() == 0));
+		pcb->sf = zalloc(iss_zone);
+		if (pcb->sf == NULL)
+			panic("iss_zone");
+	}
+
+        if (task_has_64BitAddr(task)) {
+		x86_sframe64_t		*sf64;
+
+		sf64 = (x86_sframe64_t *) pcb->sf;
+
+		bzero((char *)sf64, sizeof(x86_sframe64_t));
+
+		iss = (x86_saved_state_t *) &sf64->ssf;
+		iss->flavor = x86_SAVED_STATE64;
+		/*
+		 *      Guarantee that the bootstrapped thread will be in user
+		 *      mode.
+		 */
+		iss->ss_64.isf.rflags = EFL_USER_SET;
+		iss->ss_64.isf.cs = USER64_CS;
+		iss->ss_64.isf.ss = USER_DS;
+		iss->ss_64.fs = USER_DS;
+		iss->ss_64.gs = USER_DS;
+	} else {
+		if (cpu_mode_is64bit()) {
+			x86_sframe_compat32_t      *sfc32;
+
+			sfc32 = (x86_sframe_compat32_t *)pcb->sf;
+
+			bzero((char *)sfc32, sizeof(x86_sframe_compat32_t));
+
+			iss = (x86_saved_state_t *) &sfc32->ssf.iss32;
+			iss->flavor = x86_SAVED_STATE32;
+#if defined(__i386__)
+#if DEBUG
+			{
+				sfc32->pad_for_16byte_alignment[0] = 0x64326432;
+				sfc32->pad_for_16byte_alignment[1] = 0x64326432;
+			}
+#endif /* DEBUG */
+		} else {
+			x86_sframe32_t		*sf32;
+			struct real_descriptor	*ldtp;
+			pmap_paddr_t		paddr;
+
+			sf32 = (x86_sframe32_t *) pcb->sf;
+
+			bzero((char *)sf32, sizeof(x86_sframe32_t));
+
+			iss = (x86_saved_state_t *) &sf32->ssf;
+			iss->flavor = x86_SAVED_STATE32;
+
+			pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss));
+			if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE))))
+			        pcb->iss_pte1 = INTEL_PTE_INVALID;
+			else
+	      			pcb->iss_pte1 = pte_kernel_rw(paddr);
+
+			ldtp = (struct real_descriptor *)
+				    pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN);
+			pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
+			pcb->uldt_desc = ldtp[sel_idx(USER_DS)];
+#endif /* __i386__ */
+		}
+		/*
+		 *      Guarantee that the bootstrapped thread will be in user
+		 *      mode.
+		 */
+		iss->ss_32.cs = USER_CS;
+		iss->ss_32.ss = USER_DS;
+		iss->ss_32.ds = USER_DS;
+		iss->ss_32.es = USER_DS;
+		iss->ss_32.fs = USER_DS;
+		iss->ss_32.gs = USER_DS;
+		iss->ss_32.efl = EFL_USER_SET;
+
+	}
+	pcb->iss = iss;
+
+	simple_lock_init(&pcb->lock, 0);
+
+	pcb->arg_store_valid = 0;
+	pcb->cthread_self = 0;
+	pcb->uldt_selector = 0;
+
+	/* Ensure that the "cthread" descriptor describes a valid
+	 * segment.
+	 */
+	if ((pcb->cthread_desc.access & ACC_P) == 0) {
+		struct real_descriptor  *ldtp;
+		ldtp = (struct real_descriptor *)current_ldt();
+		pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
+	}
+
+	return(KERN_SUCCESS);
+}
+
+/*
+ * Machine-dependent cleanup prior to destroying a thread
+ */
+void
+machine_thread_destroy(
+	thread_t		thread)
+{
+	register pcb_t	pcb = THREAD_TO_PCB(thread);
+
+	if (pcb->ifps != 0)
+		fpu_free(pcb->ifps);
+	if (pcb->sf != 0) {
+		zfree(iss_zone, pcb->sf);
+		pcb->sf = 0;
+	}
+	if (pcb->ids) {
+		zfree(ids_zone, pcb->ids);
+		pcb->ids = NULL;
+	}
+}
diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c
index c469d7a1c..22eafd1b8 100644
--- a/osfmk/i386/pmCPU.c
+++ b/osfmk/i386/pmCPU.c
@@ -42,13 +42,15 @@
 #include <kern/pms.h>
 #include <kern/processor.h>
 #include <kern/etimer.h>
-#include <sys/kdebug.h>
 #include <i386/cpu_threads.h>
 #include <i386/pmCPU.h>
 #include <i386/cpuid.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 #include <kern/sched_prim.h>
 #include <i386/lapic.h>
+#include <i386/pal_routines.h>
+
+#include <sys/kdebug.h>
 
 extern int disableConsoleOutput;
 
@@ -57,7 +59,7 @@ decl_simple_lock_data(,pm_init_lock);
 /*
  * The following is set when the KEXT loads and initializes.
  */
-pmDispatch_t		*pmDispatch	= NULL;
+pmDispatch_t	*pmDispatch	= NULL;
 
 static uint32_t		pmInitDone	= 0;
 static boolean_t	earlyTopology	= FALSE;
@@ -111,7 +113,10 @@ machine_idle(void)
 	 * cause problems in some MP configurations w.r.t. the APIC
 	 * stopping during a GV3 transition).
 	 */
-	__asm__ volatile ("sti; hlt");
+	pal_hlt();
+
+	/* Once woken, re-disable interrupts. */
+	pal_cli();
     }
 
     /*
@@ -125,7 +130,7 @@ machine_idle(void)
      * Re-enable interrupts.
      */
   out:
-    __asm__ volatile("sti");
+    pal_sti();
 }
 
 /*
@@ -140,19 +145,19 @@ pmCPUHalt(uint32_t reason)
     switch (reason) {
     case PM_HALT_DEBUG:
 	cpup->lcpu.state = LCPU_PAUSE;
-	__asm__ volatile ("wbinvd; hlt");
+	pal_stop_cpu(FALSE);
 	break;
 
     case PM_HALT_PANIC:
 	cpup->lcpu.state = LCPU_PAUSE;
-	__asm__ volatile ("cli; wbinvd; hlt");
+	pal_stop_cpu(TRUE);
 	break;
 
     case PM_HALT_NORMAL:
     default:
-	__asm__ volatile ("cli");
+        pal_cli();
 
-    if (pmInitDone
+	if (pmInitDone
 	    && pmDispatch != NULL
 	    && pmDispatch->pmCPUHalt != NULL) {
 	    /*
@@ -166,7 +171,8 @@ pmCPUHalt(uint32_t reason)
 	    i386_init_slave_fast();
 
 	    panic("init_slave_fast returned");
-	} else {
+	} else
+	{
 	    /*
 	     * If no power managment and a processor is taken off-line,
 	     * then invalidate the cache and halt it (it will not be able
@@ -174,10 +180,11 @@ pmCPUHalt(uint32_t reason)
 	     */
 	    __asm__ volatile ("wbinvd");
 	    cpup->lcpu.state = LCPU_HALT;
-	    __asm__ volatile ( "wbinvd; hlt" );
+	    pal_stop_cpu(FALSE);
 
 	    panic("back from Halt");
 	}
+
 	break;
     }
 }
@@ -269,13 +276,15 @@ pmLockCPUTopology(int lock)
 /*
  * Called to get the next deadline that has been set by the
  * power management code.
+ * Note: a return of 0 from AICPM and this routine signifies
+ * that no deadline is set.
  */
 uint64_t
 pmCPUGetDeadline(cpu_data_t *cpu)
 {
     uint64_t	deadline	= 0;
 
-    if (pmInitDone
+	if (pmInitDone
 	&& pmDispatch != NULL
 	&& pmDispatch->GetDeadline != NULL)
 	deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
@@ -500,6 +509,19 @@ ml_set_maxintdelay(uint64_t mdelay)
 	pmDispatch->setMaxIntDelay(mdelay);
 }
 
+boolean_t
+ml_get_interrupt_prewake_applicable()
+{
+    boolean_t applicable = FALSE;
+
+    if (pmInitDone 
+	&& pmDispatch != NULL
+	&& pmDispatch->pmInterruptPrewakeApplicable != NULL)
+	applicable = pmDispatch->pmInterruptPrewakeApplicable();
+
+    return applicable;
+}
+
 /*
  * Put a CPU into "safe" mode with respect to power.
  *
@@ -604,26 +626,58 @@ machine_choose_processor(processor_set_t pset,
 }
 
 static int
-pmThreadGetUrgency(__unused uint64_t *rt_period, __unused uint64_t *rt_deadline)
+pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
 {
 
-    return(0);
+    return(thread_get_urgency(rt_period, rt_deadline));
 }
 
+#if	DEBUG
+uint32_t	urgency_stats[64][THREAD_URGENCY_MAX];
+#endif
+
+#define		URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
+uint64_t	urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
+
 void
 thread_tell_urgency(int urgency,
-		    uint64_t rt_period,
-		    uint64_t rt_deadline)
-{
-    KERNEL_DEBUG_CONSTANT(0x1400054,
-			  urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
-
-    if (!pmInitDone
-	|| pmDispatch == NULL
-	|| pmDispatch->pmThreadTellUrgency == NULL)
-	return;
+    uint64_t rt_period,
+    uint64_t rt_deadline)
+{
+	uint64_t	urgency_notification_time_start, delta;
+	boolean_t	urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
+	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
+#if	DEBUG
+	urgency_stats[cpu_number() % 64][urgency]++;
+#endif
+	if (!pmInitDone
+	    || pmDispatch == NULL
+	    || pmDispatch->pmThreadTellUrgency == NULL)
+		return;
+
+	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+
+	if (__improbable((urgency_assert == TRUE)))
+		urgency_notification_time_start = mach_absolute_time();
+
+	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+
+	if (__improbable((urgency_assert == TRUE))) {
+		delta = mach_absolute_time() - urgency_notification_time_start;
+
+		if (__improbable(delta > urgency_notification_max_recorded)) {
+			/* This is not synchronized, but it doesn't matter
+			 * if we (rarely) miss an event, as it is statistically
+			 * unlikely that it will never recur.
+			 */
+			urgency_notification_max_recorded = delta;
+
+			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
+				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
+		}
+	}
 
-    pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
 }
 
 void
@@ -693,21 +747,25 @@ pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
 	 * Make sure that nanotime didn't change while we were reading it.
 	 */
 	do {
-		rtc_nanotime->generation = rtc_nanotime_info.generation; /* must be first */
-		rtc_nanotime->tsc_base = rtc_nanotime_info.tsc_base;
-		rtc_nanotime->ns_base = rtc_nanotime_info.ns_base;
-		rtc_nanotime->scale = rtc_nanotime_info.scale;
-		rtc_nanotime->shift = rtc_nanotime_info.shift;
-	} while(rtc_nanotime_info.generation != 0
-		&& rtc_nanotime->generation != rtc_nanotime_info.generation);
+		rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
+		rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
+		rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
+		rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
+		rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
+	} while(pal_rtc_nanotime_info.generation != 0
+		&& rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
 }
 
 static uint32_t
-pmTimerQueueMigrate(__unused int target_cpu)
+pmTimerQueueMigrate(int target_cpu)
 {
-    return (0);
+    /* Call the etimer code to do this. */
+    return (target_cpu != cpu_number())
+		? etimer_queue_migrate(target_cpu)
+		: 0;
 }
 
+
 /*
  * Called by the power management kext to register itself and to get the
  * callbacks it might need into other kernel functions.  This interface
@@ -736,19 +794,18 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
 	callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
 	callbacks->ThreadBind           = thread_bind;
 	callbacks->GetSavedRunCount     = pmGetSavedRunCount;
-	callbacks->pmSendIPI		= pmSendIPI;
 	callbacks->GetNanotimeInfo	= pmGetNanotimeInfo;
 	callbacks->ThreadGetUrgency	= pmThreadGetUrgency;
 	callbacks->RTCClockAdjust	= rtc_clock_adjust;
 	callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
 	callbacks->topoParms            = &topoParms;
+	callbacks->pmSendIPI		= pmSendIPI;
 	callbacks->InterruptPending	= lapic_is_interrupt_pending;
 	callbacks->IsInterrupting	= lapic_is_interrupting;
 	callbacks->InterruptStats	= lapic_interrupt_counts;
 	callbacks->DisableApicTimer	= lapic_disable_timer;
     } else {
-	panic("Version mis-match between Kernel (%d) and CPU PM (%d)",
-	      PM_DISPATCH_VERSION, version);
+	panic("Version mis-match between Kernel and CPU PM");
     }
 
     if (cpuFuncs != NULL) {
diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h
index 55041fc10..c443c1efa 100644
--- a/osfmk/i386/pmCPU.h
+++ b/osfmk/i386/pmCPU.h
@@ -30,15 +30,14 @@
 #define _I386_PMCPU_H_
 
 #include <i386/cpu_topology.h>
-#include <i386/rtclock.h>
 
 #ifndef ASSEMBLER
 
 /*
- * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
+ * This value should be changed each time that pmDispatch_t or pmCallBacks_t
  * changes.
  */
-#define PM_DISPATCH_VERSION	23
+#define PM_DISPATCH_VERSION	102
 
 /*
  * Dispatch table for functions that get installed when the power
@@ -79,11 +78,10 @@ typedef struct
     int			(*pmIPIHandler)(void *state);
     void		(*pmThreadTellUrgency)(int urgency, uint64_t rt_period, uint64_t rt_deadline);
     void		(*pmActiveRTThreads)(boolean_t active);
+    boolean_t           (*pmInterruptPrewakeApplicable)(void);
 } pmDispatch_t;
 
-
-/*
- * common time fields exported to PM code. This structure may be
+/* common time fields exported to PM code. This structure may be
  * allocated on the stack, so avoid making it unnecessarily large.
  */
 typedef struct pm_rtc_nanotime {
@@ -115,9 +113,8 @@ typedef struct {
     void		(*pmSendIPI)(int cpu);
     void		(*GetNanotimeInfo)(pm_rtc_nanotime_t *);
     int			(*ThreadGetUrgency)(uint64_t *rt_period, uint64_t *rt_deadline);
-    uint32_t		(*timeQueueMigrate)(int cpu);
-    void		(*RTCClockAdjust)(uint64_t adjustment);
     uint32_t		(*timerQueueMigrate)(int cpu);
+    void		(*RTCClockAdjust)(uint64_t adjustment);
     x86_topology_parameters_t	*topoParms;
     boolean_t		(*InterruptPending)(void);
     boolean_t		(*IsInterrupting)(uint8_t vector);
@@ -144,8 +141,6 @@ void pmTimerSave(void);
 void pmTimerRestore(void);
 kern_return_t pmCPUExitHalt(int cpu);
 kern_return_t pmCPUExitHaltToOff(int cpu);
-void thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline);
-void active_rt_threads(boolean_t active);
 
 #define PM_HALT_NORMAL		0		/* normal halt path */
 #define PM_HALT_DEBUG		1		/* debug code wants to halt */
@@ -160,7 +155,9 @@ void pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags);
 #define PM_SAFE_FL_RESUME	0x00000020	/* resume execution on the CPU */
 
 extern int pmsafe_debug;
-extern int idlehalt;
+/* Default urgency timing threshold for the DEBUG build */
+#define		URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
+extern uint64_t	urgency_notification_assert_abstime_threshold;
 
 /******************************************************************************
  *
diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c
index 3d12ba9f2..b672bdc6b 100644
--- a/osfmk/i386/pmap.c
+++ b/osfmk/i386/pmap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -144,6 +144,7 @@
 #include <i386/mp.h>
 #include <i386/mp_desc.h>
 #include <i386/i386_lowmem.h>
+#include <i386/lowglobals.h>
 
 
 /* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
@@ -160,35 +161,6 @@
 #include <i386/postcode.h>
 #endif /* IWANTTODEBUG */
 
-/*
- * Forward declarations for internal functions.
- */
-
-void		pmap_remove_range(
-			pmap_t		pmap,
-			vm_map_offset_t	va,
-			pt_entry_t	*spte,
-			pt_entry_t	*epte);
-
-void		phys_attribute_clear(
-			ppnum_t		phys,
-			int		bits);
-
-int		phys_attribute_test(
-			ppnum_t		phys,
-			int		bits);
-
-void		phys_attribute_set(
-			ppnum_t		phys,
-			int		bits);
-
-void		pmap_set_reference(
-			ppnum_t pn);
-
-boolean_t	phys_page_exists(
-			ppnum_t pn);
-
-
 #ifdef PMAP_DEBUG
 void dump_pmap(pmap_t);
 void dump_4GB_pdpt(pmap_t p);
@@ -203,26 +175,19 @@ int allow_data_exec  = VM_ABI_32;	/* 32-bit apps may execute data by default, 64
 #endif
 int allow_stack_exec = 0;		/* No apps may execute from the stack by default */
 
+#if CONFIG_YONAH
 boolean_t cpu_64bit  = FALSE;
+#else
+const boolean_t cpu_64bit  = TRUE;
+#endif
 boolean_t pmap_trace = FALSE;
 
-/*
- * when spinning through pmap_remove
- * ensure that we don't spend too much
- * time with preemption disabled.
- * I'm setting the current threshold
- * to 20us
- */
-#define MAX_PREEMPTION_LATENCY_NS 20000
-
 uint64_t max_preemption_latency_tsc = 0;
 
-
 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
 
 uint32_t npvhash = 0;
 
-
 /*
  *	pv_list entries are kept on a list that can only be accessed
  *	with the pmap system locked (at SPLVM, not in the cpus_active set).
@@ -235,27 +200,10 @@ decl_simple_lock_data(,pv_hashed_free_list_lock)
 decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
 decl_simple_lock_data(,pv_hash_table_lock)
 
-int pv_free_count = 0;
-int pv_hashed_free_count = 0;
-int pv_kern_free_count = 0;
-int pv_hashed_kern_free_count = 0;
-
 zone_t		pv_hashed_list_zone;	/* zone of pv_hashed_entry structures */
 
 static zone_t pdpt_zone;
 
-/*
- *	Each entry in the pv_head_table is locked by a bit in the
- *	pv_lock_table.  The lock bits are accessed by the physical
- *	address of the page they lock.
- */
-
-char	*pv_lock_table;		/* pointer to array of bits */
-#define pv_lock_table_size(n)	(((n)+BYTE_SIZE-1)/BYTE_SIZE)
-
-char    *pv_hash_lock_table;
-#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
-
 /*
  *	First and last physical addresses that we maintain any information
  *	for.  Initialized to zero so that pmap operations done before
@@ -266,6 +214,10 @@ boolean_t	pmap_initialized = FALSE;/* Has pmap_init completed? */
 static struct vm_object kptobj_object_store;
 static vm_object_t kptobj;
 
+/*
+ *	Index into pv_head table, its lock bits, and the modify/reference and managed bits
+ */
+
 /*
  *	Array of physical page attribites for managed pages.
  *	One byte per physical page.
@@ -273,11 +225,6 @@ static vm_object_t kptobj;
 char	*pmap_phys_attributes;
 unsigned int	last_managed_page = 0;
 
-/*
- *	Amount of virtual memory mapped by one
- *	page-directory entry.
- */
-#define	PDE_MAPPED_SIZE		(pdetova(1))
 uint64_t pde_mapped_size;
 
 /*
@@ -369,30 +316,37 @@ struct zone	*pmap_zone;		/* zone of pmap structures */
 int		pmap_debug = 0;		/* flag for debugging prints */
 
 unsigned int	inuse_ptepages_count = 0;
+long long	alloc_ptepages_count __attribute__((aligned(8))) = 0LL; /* aligned for atomic access */
+unsigned int	bootstrap_wired_pages = 0;
+int 		pt_fake_zone_index = -1;
+
+extern 	long	NMIPI_acks;
+
+static inline void
+PMAP_ZINFO_SALLOC(vm_size_t bytes)
+{
+	current_thread()->tkm_shared.alloc += bytes;
+}
+
+static inline void
+PMAP_ZINFO_SFREE(vm_size_t bytes)
+{
+	current_thread()->tkm_shared.free += (bytes);
+}
 
 addr64_t	kernel64_cr3;
 boolean_t	no_shared_cr3 = FALSE;	/* -no_shared_cr3 boot arg */
 
-
-/*
- *	Pmap cache.  Cache is threaded through ref_count field of pmap.
- *	Max will eventually be constant -- variable for experimentation.
- */
-int		pmap_cache_max = 32;
-int		pmap_alloc_chunk = 8;
-pmap_t		pmap_cache_list;
-int		pmap_cache_count;
-decl_simple_lock_data(,pmap_cache_lock)
+boolean_t	kernel_text_ps_4K = TRUE;
+boolean_t	wpkernel = TRUE;
 
 extern char end;
-
 static int nkpt;
 
-extern 	long	NMIPI_acks;
-
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
+
 /*
  * for legacy, returns the address of the pde entry.
  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
@@ -412,7 +366,6 @@ pmap_pde(pmap_t m, vm_map_offset_t v)
 	return pde;
 }
 
-
 /*
  * the single pml4 page per pmap is allocated at pmap create time and exists
  * for the duration of the pmap. we allocate this page in kernel vm (to save us one
@@ -644,7 +597,6 @@ pmap_map_bd(
 	if (prot & VM_PROT_WRITE)
 	    template |= INTEL_PTE_WRITE;
 
-
 	while (start_addr < end_addr) {
 	        spl = splhigh();
 		pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
@@ -658,14 +610,20 @@ pmap_map_bd(
 		start_addr += PAGE_SIZE;
 	} 
 
-
 	flush_tlb();
 	return(virt);
 }
 
-extern	char			*first_avail;
+extern	pmap_paddr_t		first_avail;
 extern	vm_offset_t		virtual_avail, virtual_end;
 extern	pmap_paddr_t		avail_start, avail_end;
+extern  vm_offset_t		sHIB;
+extern  vm_offset_t		eHIB;
+extern  vm_offset_t		stext;
+extern  vm_offset_t		etext;
+extern  vm_offset_t		sdata;
+
+extern void	*KPTphys;
 
 void
 pmap_cpu_init(void)
@@ -834,7 +792,6 @@ pmap_bootstrap(
 	boolean_t		IA32e)
 {
 	vm_offset_t	va;
-	pt_entry_t	*pte;
 	int i;
 	pdpt_entry_t *pdpt;
 	spl_t s;
@@ -872,7 +829,10 @@ pmap_bootstrap(
 	  pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
 	}
 
+#if CONFIG_YONAH
+	/* 32-bit and legacy support depends on IA32e mode being disabled */
 	cpu_64bit = IA32e;
+#endif
 	
 	lo_kernel_cr3 = kernel_pmap->pm_cr3;
 	current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
@@ -887,6 +847,8 @@ pmap_bootstrap(
 
 	nkpt = NKPT;
 	OSAddAtomic(NKPT, &inuse_ptepages_count);
+	OSAddAtomic64(NKPT, &alloc_ptepages_count);
+	bootstrap_wired_pages = NKPT;
 
 	virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
 	virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
@@ -895,11 +857,11 @@ pmap_bootstrap(
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
-#define	SYSMAP(c, p, v, n)	\
-	v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
-
 	va = virtual_avail;
+	pt_entry_t	*pte;
 	pte = vtopte(va);
+#define	SYSMAP(c, p, v, n)	\
+	v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
 
         for (i=0; i<PMAP_NWINDOWS; i++) {
             SYSMAP(caddr_t,
@@ -1115,7 +1077,6 @@ pmap_init(void)
 		vaddr += PAGE_SIZE;
 		vsize -= PAGE_SIZE;
 	}
-
 	/*
 	 *	Create the zone of physical maps,
 	 *	and of the physical-to-virtual entries.
@@ -1125,7 +1086,8 @@ pmap_init(void)
 	zone_change(pmap_zone, Z_NOENCRYPT, TRUE);
 
 	s = (vm_size_t) sizeof(struct pv_hashed_entry);
-	pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
+	pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */,
+	    4096 * 4 /* LCM i386 */, "pv_list");
 	zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE);
 
 	s = 63;
@@ -1153,17 +1115,220 @@ pmap_init(void)
 
 	pmap_initialized = TRUE;
 
+	max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
+
+}
+
+#ifdef	PMAP_DEBUG
+#define DBG(x...)	kprintf("DBG: " x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * Called once VM is fully initialized so that we can release unused
+ * sections of low memory to the general pool.
+ * Also complete the set-up of identity-mapped sections of the kernel:
+ *  1) write-protect kernel text
+ *  2) map kernel text using large pages if possible
+ *  3) read and write-protect page zero (for K32)
+ *  4) map the global page at the appropriate virtual address.
+ *
+ * Use of large pages
+ * ------------------
+ * To effectively map and write-protect all kernel text pages, the text
+ * must be 2M-aligned at the base, and the data section above must also be
+ * 2M-aligned. That is, there's padding below and above. This is achieved
+ * through linker directives. Large pages are used only if this alignment
+ * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
+ * memory layout is:
+ * 
+ *                       :                :
+ *                       |     __DATA     |
+ *               sdata:  ==================  2Meg
+ *                       |                |
+ *                       |  zero-padding  |
+ *                       |                |
+ *               etext:  ------------------ 
+ *                       |                |
+ *                       :                :
+ *                       |                |
+ *                       |     __TEXT     |
+ *                       |                |
+ *                       :                :
+ *                       |                |
+ *               stext:  ==================  2Meg
+ *                       |                |
+ *                       |  zero-padding  |
+ *                       |                |
+ *               eHIB:   ------------------ 
+ *                       |     __HIB      |
+ *                       :                :
+ *
+ * Prior to changing the mapping from 4K to 2M, the zero-padding pages
+ * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
+ * 4K pages covering [stext,etext] are coalesced as 2M large pages.
+ * The now unused level-1 PTE pages are also freed.
+ */
+extern uint32_t pmap_reserved_ranges;
+void
+pmap_lowmem_finalize(void)
+{
+	spl_t           spl;
+	int		i;
+
+	/* Check the kernel is linked at the expected base address */
+	if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
+	    I386_KERNEL_IMAGE_BASE_PAGE)
+		panic("pmap_lowmem_finalize() unexpected kernel base address");
+
 	/*
-	 *	Initialize pmap cache.
+	 * Update wired memory statistics for early boot pages
 	 */
-	pmap_cache_list = PMAP_NULL;
-	pmap_cache_count = 0;
-	simple_lock_init(&pmap_cache_lock, 0);
+	PMAP_ZINFO_PALLOC(bootstrap_wired_pages * PAGE_SIZE);
 
-	max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
+	/*
+	 * Free all pages in pmap regions below the base:
+	 * rdar://6332712
+	 *	We can't free all the pages to VM that EFI reports available.
+	 *	Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
+	 *	There's also a size miscalculation here: pend is one page less
+	 *	than it should be but this is not fixed to be backwards
+	 *	compatible.
+	 *	Due to this current EFI limitation, we take only the first
+	 *	entry in the memory region table. However, the loop is retained
+	 * 	(with the intended termination criteria commented out) in the
+	 *	hope that some day we can free all low-memory ranges.
+	 */
+	for (i = 0;
+//	     pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
+	     i < 1 && (pmap_reserved_ranges == 0);
+	     i++) {
+		vm_offset_t	pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
+		vm_offset_t	pend  = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
+//		vm_offset_t	pend  = i386_ptob(pmap_memory_regions[i].end+1);
+
+		DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
+		    (void *) ml_static_ptovirt(pbase),
+		    (void *) (pend - pbase), i);
+		ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
+	}
 
-}
+	/*
+	 * If text and data are both 2MB-aligned,
+	 * we can map text with large-pages,
+	 * unless the -kernel_text_ps_4K boot-arg overrides.
+	 */
+	if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
+		kprintf("Kernel text is 2MB aligned");
+		kernel_text_ps_4K = FALSE;
+		if (PE_parse_boot_argn("-kernel_text_ps_4K",
+				       &kernel_text_ps_4K,
+				       sizeof (kernel_text_ps_4K)))
+			kprintf(" but will be mapped with 4K pages\n");
+		else
+			kprintf(" and will be mapped with 2M pages\n");
+	}
+
+	(void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
+	if (wpkernel)
+		kprintf("Kernel text %p-%p to be write-protected\n",
+			(void *) stext, (void *) etext);
+
+	spl = splhigh();
+
+	/*
+	 * Scan over text if mappings are to be changed:
+	 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 
+ 	 * - Change to large-pages if possible and not overriden.
+	 */
+	if (kernel_text_ps_4K && wpkernel) {
+		vm_offset_t     myva;
+		for (myva = stext; myva < etext; myva += PAGE_SIZE) {
+			pt_entry_t     *ptep;
+
+			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+			if (ptep)
+				pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
+		}
+	}
+
+	if (!kernel_text_ps_4K) {
+		vm_offset_t     myva;
+
+		/*
+		 * Release zero-filled page padding used for 2M-alignment.
+		 */
+		DBG("ml_static_mfree(%p,%p) for padding below text\n",
+			(void *) eHIB, (void *) (stext - eHIB));
+		ml_static_mfree(eHIB, stext - eHIB);
+		DBG("ml_static_mfree(%p,%p) for padding above text\n",
+			(void *) etext, (void *) (sdata - etext));
+		ml_static_mfree(etext, sdata - etext);
+
+		/*
+		 * Coalesce text pages into large pages.
+		 */
+		for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
+			pt_entry_t	*ptep;
+			vm_offset_t	pte_phys;
+			pt_entry_t	*pdep;
+			pt_entry_t	pde;
+
+			pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
+			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+			DBG("myva: %p pdep: %p ptep: %p\n",
+				(void *) myva, (void *) pdep, (void *) ptep);
+			if ((*ptep & INTEL_PTE_VALID) == 0)
+				continue;
+			pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
+			pde = *pdep & PTMASK;	/* page attributes from pde */
+			pde |= INTEL_PTE_PS;	/* make it a 2M entry */
+			pde |= pte_phys;	/* take page frame from pte */
+
+			if (wpkernel)
+				pde &= ~INTEL_PTE_RW;
+			DBG("pmap_store_pte(%p,0x%llx)\n",
+				(void *)pdep, pde);
+			pmap_store_pte(pdep, pde);
+
+			/*
+			 * Free the now-unused level-1 pte.
+			 * Note: ptep is a virtual address to the pte in the
+			 *   recursive map. We can't use this address to free
+			 *   the page. Instead we need to compute its address
+			 *   in the Idle PTEs in "low memory".
+			 */
+			vm_offset_t vm_ptep = (vm_offset_t) KPTphys
+						+ (pte_phys >> PTPGSHIFT);
+			DBG("ml_static_mfree(%p,0x%x) for pte\n",
+				(void *) vm_ptep, PAGE_SIZE);
+			ml_static_mfree(vm_ptep, PAGE_SIZE);
+		}
+
+		/* Change variable read by sysctl machdep.pmap */
+		pmap_kernel_text_ps = I386_LPGBYTES;
+	}
 
+	/* no matter what,  kernel page zero is not accessible */
+	pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID);
+
+	/* map lowmem global page into fixed addr */
+	pt_entry_t *pte = NULL;
+	if (0 == (pte = pmap_pte(kernel_pmap,
+				 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
+		panic("lowmem pte");
+	/* make sure it is defined on page boundary */
+	assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
+	pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
+				| INTEL_PTE_REF
+				| INTEL_PTE_MOD
+				| INTEL_PTE_WIRED
+				| INTEL_PTE_VALID
+				| INTEL_PTE_RW);
+	splx(spl);
+	flush_tlb();
+}
 
 #define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) )
 
@@ -1324,6 +1489,8 @@ pmap_create(
 		va = (vm_offset_t)p->dirbase;
 		p->pdirbase = kvtophys(va);
 
+		PMAP_ZINFO_SALLOC(NBPTD);
+
 		template = INTEL_PTE_VALID;
 		for (i = 0; i< NPGPTD; i++, pdpt++ ) {
 			pmap_paddr_t pa;
@@ -1347,6 +1514,8 @@ pmap_create(
 		p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
 
 		OSAddAtomic(1,  &inuse_ptepages_count);
+		OSAddAtomic64(1,  &alloc_ptepages_count);
+		PMAP_ZINFO_SALLOC(PAGE_SIZE);
 
 		/* allocate the vm_objs to hold the pdpt, pde and pte pages */
 
@@ -1362,7 +1531,7 @@ pmap_create(
 		/* uber space points to uber mapped kernel */
 		s = splhigh();
 		pml4p = pmap64_pml4(p, 0ULL);
-		pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4);
+		pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
 
 
 		if (!is_64bit) {
@@ -1458,6 +1627,7 @@ void
 pmap_clear_4GB_pagezero(pmap_t p)
 {
 	pdpt_entry_t	*user_pdptp;
+	boolean_t istate;
 
 	if (p->pm_task_map != TASK_MAP_64BIT_SHARED)
 		return;
@@ -1466,6 +1636,9 @@ pmap_clear_4GB_pagezero(pmap_t p)
 
 	p->pm_task_map = TASK_MAP_64BIT;
 
+	istate = ml_set_interrupts_enabled(FALSE);
+	if (current_cpu_datap()->cpu_task_map == TASK_MAP_64BIT_SHARED)
+	  current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
 	pmap_load_kernel_cr3();
 
 	user_pdptp = pmap64_pdpt(p, 0x0);
@@ -1474,6 +1647,8 @@ pmap_clear_4GB_pagezero(pmap_t p)
 	pmap_store_pte(user_pdptp+2, 0);
 	pmap_store_pte(user_pdptp+3, 0);
 
+	ml_set_interrupts_enabled(istate);
+
 	PMAP_UNLOCK(p);
 }
 
@@ -1542,8 +1717,11 @@ pmap_destroy(
 	 */
 	if (!cpu_64bit) {
 		OSAddAtomic(-p->pm_obj->resident_page_count,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(p->pm_obj->resident_page_count * PAGE_SIZE);
 
 		kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
+		PMAP_ZINFO_SFREE(NBPTD);
+
 		zfree(pdpt_zone, (void *)p->pm_hold);
 
 		vm_object_deallocate(p->pm_obj);
@@ -1552,8 +1730,8 @@ pmap_destroy(
 	        int inuse_ptepages = 0;
 
 		/* free 64 bit mode structs */
-		inuse_ptepages++;
 		kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
+		PMAP_ZINFO_SFREE(PAGE_SIZE);
 
 		inuse_ptepages += p->pm_obj_pml4->resident_page_count;
 		vm_object_deallocate(p->pm_obj_pml4);
@@ -1564,8 +1742,10 @@ pmap_destroy(
 		inuse_ptepages += p->pm_obj->resident_page_count;
 		vm_object_deallocate(p->pm_obj);
 
-		OSAddAtomic(-inuse_ptepages,  &inuse_ptepages_count);
+		OSAddAtomic(-(inuse_ptepages+1),  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(inuse_ptepages * PAGE_SIZE);
 	}
+
 	zfree(pmap_zone, p);
 
 	PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
@@ -1589,7 +1769,6 @@ pmap_reference(
 	}
 }
 
-
 /*
  *	Remove phys addr if mapped in specified map
  *
@@ -1604,22 +1783,6 @@ pmap_remove_some_phys(
 
 }
 
-/*
- *	Routine:
- *		pmap_disconnect
- *
- *	Function:
- *		Disconnect all mappings for this page and return reference and change status
- *		in generic format.
- *
- */
-unsigned int pmap_disconnect(
-	ppnum_t pa)
-{
-	pmap_page_protect(pa, 0);			/* disconnect the page */
-	return (pmap_get_refmod(pa));			/* return ref/chg status */
-}
-
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
@@ -1694,7 +1857,9 @@ pmap_protect(
 	    sva = lva;
 	}
 	if (num_found)
+	{
 	    PMAP_UPDATE_TLBS(map, orig_sva, eva);
+	}
 
 	PMAP_UNLOCK(map);
 
@@ -1723,51 +1888,6 @@ pmap_map_block(
     }
 }
 
-
-/*
- *	Routine:	pmap_change_wiring
- *	Function:	Change the wiring attribute for a map/virtual-address
- *			pair.
- *	In/out conditions:
- *			The mapping must already exist in the pmap.
- */
-void
-pmap_change_wiring(
-	register pmap_t	map,
-	vm_map_offset_t	vaddr,
-	boolean_t	wired)
-{
-	register pt_entry_t	*pte;
-
-	/*
-	 *	We must grab the pmap system lock because we may
-	 *	change a pte_page queue.
-	 */
-	PMAP_LOCK(map);
-
-	if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
-		panic("pmap_change_wiring: pte missing");
-
-	if (wired && !iswired(*pte)) {
-	    /*
-	     *	wiring down mapping
-	     */
-	    OSAddAtomic(+1,  &map->stats.wired_count);
-	    pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
-	}
-	else if (!wired && iswired(*pte)) {
-	    /*
-	     *	unwiring mapping
-	     */
-	    assert(map->stats.wired_count >= 1);
-	    OSAddAtomic(-1,  &map->stats.wired_count);
-	    pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
-	}
-
-	PMAP_UNLOCK(map);
-}
-
-
 /*
  *	Routine:	pmap_extract
  *	Function:
@@ -1838,6 +1958,8 @@ pmap_expand_pml4(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj_pml4);
@@ -1853,6 +1975,7 @@ pmap_expand_pml4(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 	pmap_set_noencrypt(pn);
@@ -1928,6 +2051,8 @@ pmap_expand_pdpt(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj_pdpt);
@@ -1943,6 +2068,7 @@ pmap_expand_pdpt(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 	pmap_set_noencrypt(pn);
@@ -2040,6 +2166,8 @@ pmap_expand(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj);
@@ -2056,6 +2184,7 @@ pmap_expand(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 	pmap_set_noencrypt(pn);
@@ -2205,11 +2334,12 @@ pmap_collect(
 			if (m == VM_PAGE_NULL)
 			    panic("pmap_collect: pte page not in object");
 
+			vm_object_unlock(p->pm_obj);
+
 			VM_PAGE_FREE(m);
 
 			OSAddAtomic(-1,  &inuse_ptepages_count);
-
-			vm_object_unlock(p->pm_obj);
+			PMAP_ZINFO_PFREE(PAGE_SIZE);
 		    }
 
 		    PMAP_LOCK(p);
@@ -2261,319 +2391,6 @@ pmap_pageable(
 #endif	/* lint */
 }
 
-/*
- *	Clear specified attribute bits.
- */
-void
-phys_attribute_clear(
-	ppnum_t		pn,
-	int		bits)
-{
-	pv_rooted_entry_t		pv_h;
-	register pv_hashed_entry_t	pv_e;
-	register pt_entry_t	*pte;
-	int			pai;
-	register pmap_t		pmap;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return;
-
-	pai = ppn_to_pai(pn);
-
-	if (!managed_page(pai)) {
-	    /*
-	     *	Not a managed page.
-	     */
-	    return;
-	}
-
-
-	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
-		   (int) pn, bits, 0, 0, 0);
-
-	pv_h = pai_to_pvh(pai);
-
-	LOCK_PVH(pai);
-
-	/*
-	 * Walk down PV list, clearing all modify or reference bits.
-	 * We do not have to lock the pv_list because we have
-	 * the entire pmap system locked.
-	 */
-	if (pv_h->pmap != PMAP_NULL) {
-	    /*
-	     * There are some mappings.
-	     */
-
-	  pv_e = (pv_hashed_entry_t)pv_h;
-
-	  do {
-		pmap = pv_e->pmap;
-
-		{
-		    vm_map_offset_t va;
-
-		    va = pv_e->va;
-
-		    /*
-		     * Clear modify and/or reference bits.
-		     */
-
-		    pte = pmap_pte(pmap, va);
-		    pmap_update_pte(pte, *pte, (*pte & ~bits));
-		    /* Ensure all processors using this translation
-		     * invalidate this TLB entry. The invalidation *must* follow
-		     * the PTE update, to ensure that the TLB shadow of the
-		     * 'D' bit (in particular) is synchronized with the
-		     * updated PTE.
-		     */
-		    PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
-		}
-
-		pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
-
-	  } while (pv_e != (pv_hashed_entry_t)pv_h);
-	}
-	pmap_phys_attributes[pai] &= ~bits;
-
-	UNLOCK_PVH(pai);
-
-	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
-		   0, 0, 0, 0, 0);
-
-}
-
-/*
- *	Check specified attribute bits.
- */
-int
-phys_attribute_test(
-	ppnum_t		pn,
-	int		bits)
-{
-	pv_rooted_entry_t		pv_h;
-	register pv_hashed_entry_t	pv_e;
-	register pt_entry_t	*pte;
-	int			pai;
-	register pmap_t		pmap;
-	int			attributes = 0;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return 0;
-
-	pai = ppn_to_pai(pn);
-
-	if (!managed_page(pai)) {
-	    /*
-	     *	Not a managed page.
-	     */
-	    return (0);
-	}
-
-	/*
-	 * super fast check...  if bits already collected
-	 * no need to take any locks...
-	 * if not set, we need to recheck after taking
-	 * the lock in case they got pulled in while
-	 * we were waiting for the lock
-	 */
-	if ( (pmap_phys_attributes[pai] & bits) == bits)
-	    return (bits);
-
-	pv_h = pai_to_pvh(pai);
-
-	LOCK_PVH(pai);
-
-	attributes = pmap_phys_attributes[pai] & bits;
-
-
-	/*
-	 * Walk down PV list, checking the mappings until we
-	 * reach the end or we've found the attributes we've asked for
-	 * We do not have to lock the pv_list because we have
-	 * the entire pmap system locked.
-	 */
-	if (pv_h->pmap != PMAP_NULL) {
-	    /*
-	     * There are some mappings.
-	     */
-	  pv_e = (pv_hashed_entry_t)pv_h;
-	  if (attributes != bits) do {
-
-	        pmap = pv_e->pmap;
-
-		{
-		    vm_map_offset_t va;
-
-		    va = pv_e->va;
-		    /*
-		     * first make sure any processor actively
-		     * using this pmap, flushes its TLB state
-		     */
-		    PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
-
-		    /*
-		     * pick up modify and/or reference bits from this mapping
-		     */
-		    pte = pmap_pte(pmap, va);
-		    attributes |= (int)(*pte & bits);
-
-		}
-
-		pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
-
-	    } while ((attributes != bits) && (pv_e != (pv_hashed_entry_t)pv_h));
-	}
-
-	UNLOCK_PVH(pai);
-	return (attributes);
-}
-
-/*
- *	Set specified attribute bits.
- */
-void
-phys_attribute_set(
-	ppnum_t		pn,
-	int		bits)
-{
-	int		pai;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return;
-
-	pai = ppn_to_pai(pn);
-
-	if (!managed_page(pai)) {
-	    /*
-	     *	Not a managed page.
-	     */
-	    return;
-	}
-
-	LOCK_PVH(pai);
-
-	pmap_phys_attributes[pai] |= bits;
-
-	UNLOCK_PVH(pai);
-}
-
-/*
- *	Set the modify bit on the specified physical page.
- */
-
-void pmap_set_modify(
-		     ppnum_t pn)
-{
-	phys_attribute_set(pn, PHYS_MODIFIED);
-}
-
-/*
- *	Clear the modify bits on the specified physical page.
- */
-
-void
-pmap_clear_modify(
-		  ppnum_t pn)
-{
-	phys_attribute_clear(pn, PHYS_MODIFIED);
-}
-
-/*
- *	pmap_is_modified:
- *
- *	Return whether or not the specified physical page is modified
- *	by any physical maps.
- */
-
-boolean_t
-pmap_is_modified(
-		 ppnum_t pn)
-{
-        if (phys_attribute_test(pn, PHYS_MODIFIED))
-	        return TRUE;
-
-	return FALSE;
-}
-
-/*
- *	pmap_clear_reference:
- *
- *	Clear the reference bit on the specified physical page.
- */
-
-void
-pmap_clear_reference(
-		     ppnum_t pn)
-{
-	phys_attribute_clear(pn, PHYS_REFERENCED);
-}
-
-void
-pmap_set_reference(ppnum_t pn)
-{
-	phys_attribute_set(pn, PHYS_REFERENCED);
-}
-
-/*
- *	pmap_is_referenced:
- *
- *	Return whether or not the specified physical page is referenced
- *	by any physical maps.
- */
-
-boolean_t
-pmap_is_referenced(
-		   ppnum_t pn)
-{
-        if (phys_attribute_test(pn, PHYS_REFERENCED))
-	        return TRUE;
-
-	return FALSE;
-}
-
-/*
- * pmap_get_refmod(phys)
- *  returns the referenced and modified bits of the specified
- *  physical page.
- */
-unsigned int
-pmap_get_refmod(ppnum_t pa)
-{
-        int	refmod;
-	unsigned int retval = 0;
-
-	refmod = phys_attribute_test(pa, PHYS_MODIFIED | PHYS_REFERENCED);
-
-	if (refmod & PHYS_MODIFIED)
-	        retval |= VM_MEM_MODIFIED;
-	if (refmod & PHYS_REFERENCED)
-	        retval |= VM_MEM_REFERENCED;
-
-	return (retval);
-}
-
-/*
- * pmap_clear_refmod(phys, mask)
- *  clears the referenced and modified bits as specified by the mask
- *  of the specified physical page.
- */
-void
-pmap_clear_refmod(ppnum_t pa, unsigned int mask)
-{
-	unsigned int  x86Mask;
-
-	x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
-	            | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
-	phys_attribute_clear(pa, x86Mask);
-}
-
 void 
 invalidate_icache(__unused vm_offset_t	addr,
 		  __unused unsigned	cnt,
@@ -2879,7 +2696,6 @@ pmap_cpu_free(struct cpu_pmap *cp)
 	}
 }
 
-
 mapwindow_t *
 pmap_get_mapwindow(pt_entry_t pentry)
 {
@@ -2887,7 +2703,8 @@ pmap_get_mapwindow(pt_entry_t pentry)
     int i;
 
     assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
-
+    /* fold in cache attributes for this physical page */
+    pentry |= pmap_get_cache_attributes(i386_btop(pte_to_pa(pentry)));
     /*
      * Note: 0th map reserved for pmap_pte()
      */
@@ -2895,11 +2712,11 @@ pmap_get_mapwindow(pt_entry_t pentry)
             mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
 
 	    if (*mp->prv_CMAP == 0) {
-	            pmap_store_pte(mp->prv_CMAP, pentry);
+			pmap_store_pte(mp->prv_CMAP, pentry);
 
-		    invlpg((uintptr_t)mp->prv_CADDR);
+			invlpg((uintptr_t)mp->prv_CADDR);
 
-		    return (mp);
+			return (mp);
 	    }
     }
     panic("pmap_get_mapwindow: no windows available");
@@ -2937,17 +2754,26 @@ void pmap_disable_NX(pmap_t pmap) {
 }
 
 void
-pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
-		  vm_size_t *alloc_size, int *collectable, int *exhaustable)
+pt_fake_zone_init(int zone_index)
+{
+	pt_fake_zone_index = zone_index;
+}
+
+void
+pt_fake_zone_info(int *count, 
+		  vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
+		  uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
 {
         *count      = inuse_ptepages_count;
 	*cur_size   = PAGE_SIZE * inuse_ptepages_count;
 	*max_size   = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
 	*elem_size  = PAGE_SIZE;
 	*alloc_size = PAGE_SIZE;
+	*sum_size   = alloc_ptepages_count * PAGE_SIZE;
 
 	*collectable = 1;
 	*exhaustable = 0;
+	*caller_acct = 1;
 }
 
 vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
@@ -2986,7 +2812,7 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) {
 		if (cpu_mask & cpu_bit)
 			cpu_NMI_interrupt(cpu);
 	}
-	deadline = mach_absolute_time() + (LockTimeOut * 2);
+	deadline = mach_absolute_time() + (((uint64_t)LockTimeOut) * 3);
 	while (mach_absolute_time() < deadline)
 		cpu_pause();
 }
@@ -3001,7 +2827,7 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) {
  *  - return ... the caller will unlock the pmap
  */
 void
-pmap_flush_tlbs(pmap_t	pmap)
+pmap_flush_tlbs(pmap_t	pmap, vm_map_offset_t startv, vm_map_offset_t endv)
 {
 	unsigned int	cpu;
 	unsigned int	cpu_bit;
@@ -3044,8 +2870,8 @@ pmap_flush_tlbs(pmap_t	pmap)
 		}
 	}
 
-	PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
-		   (int) pmap, cpus_to_signal, flush_self, 0, 0);
+	PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
+		   (uintptr_t) pmap, cpus_to_signal, flush_self, startv, 0);
 
 	if (cpus_to_signal) {
 		cpu_set	cpus_to_respond = cpus_to_signal;
@@ -3056,6 +2882,7 @@ pmap_flush_tlbs(pmap_t	pmap)
 		 */
 		while (cpus_to_respond != 0) {
 			long orig_acks = 0;
+
 			for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
 				if ((cpus_to_respond & cpu_bit) != 0) {
 					if (!cpu_datap(cpu)->cpu_running ||
@@ -3068,7 +2895,8 @@ pmap_flush_tlbs(pmap_t	pmap)
 				if (cpus_to_respond == 0)
 					break;
 			}
-			if (mach_absolute_time() > deadline) {
+
+			if (cpus_to_respond && (mach_absolute_time() > deadline)) {
 				if (machine_timeout_suspended())
 					continue;
 				pmap_tlb_flush_timeout = TRUE;
@@ -3094,8 +2922,8 @@ pmap_flush_tlbs(pmap_t	pmap)
 		panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
 	}
 
-	PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
-		   (int) pmap, cpus_to_signal, flush_self, 0, 0);
+	PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
+		   (uintptr_t) pmap, cpus_to_signal, startv, endv, 0);
 }
 
 void
@@ -3120,16 +2948,6 @@ pmap_update_interrupt(void)
         PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
 		   0, 0, 0, 0, 0);
 }
-
-
-unsigned int pmap_cache_attributes(ppnum_t pn) {
-
-	if (!managed_page(ppn_to_pai(pn)))
-	        return (VM_WIMG_IO);
-
-	return (VM_WIMG_COPYBACK);
-}
-
 #ifdef PMAP_DEBUG
 void
 pmap_dump(pmap_t p)
@@ -3222,4 +3040,3 @@ void dump_4GB_pdpt_thread(thread_t tp)
 
 
 #endif
-
diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h
index 5d3ac764e..44b6bf742 100644
--- a/osfmk/i386/pmap.h
+++ b/osfmk/i386/pmap.h
@@ -80,10 +80,13 @@
 #include <kern/kern_types.h>
 #include <kern/thread.h>
 #include <kern/lock.h>
+#include <mach/branch_predicates.h>
 
 #include <i386/mp.h>
 #include <i386/proc_reg.h>
 
+#include <i386/pal_routines.h>
+
 /*
  *	Define the generic in terms of the specific
  */
@@ -172,6 +175,24 @@ typedef uint64_t        pt_entry_t;
 
 typedef uint64_t  pmap_paddr_t;
 
+#if	DEBUG
+#define PMAP_ASSERT 1
+#endif
+#if PMAP_ASSERT
+#define	pmap_assert(ex) ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
+
+#define pmap_assert2(ex, fmt, args...)					\
+	do {								\
+		if (!(ex)) {						\
+			kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0),  ##args); \
+			panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0),  ##args); 		\
+		}							\
+	} while(0)
+#else
+#define pmap_assert(ex)
+#define pmap_assert2(ex, fmt, args...)
+#endif
+
 /* superpages */
 #ifdef __x86_64__
 #define SUPERPAGE_NBASEPAGES 512
@@ -385,19 +406,10 @@ enum  high_fixed_addresses {
 #define pdenum(pmap, a)	(((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
 #define PMAP_INVALID_PDPTNUM (~0ULL)
 
-#ifdef __i386__
 #define pdeidx(pmap, a)    (((a) >> PDSHIFT)   & ((1ULL<<(48 - PDSHIFT)) -1))
 #define pdptidx(pmap, a)   (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
 #define pml4idx(pmap, a)   (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
-#else
-#define VAMASK		   ((1ULL<<48)-1)
-#define pml4idx(pmap, a)   ((((a) & VAMASK) >> PML4SHIFT) &	\
-				((1ULL<<(48 - PML4SHIFT))-1))
-#define pdptidx(pmap, a)   ((((a) & PML4MASK) >> PDPTSHIFT) &	\
-				((1ULL<<(48 - PDPTSHIFT))-1))
-#define pdeidx(pmap, a)    ((((a) & PML4MASK) >> PDSHIFT) &	\
-				((1ULL<<(48 - PDSHIFT)) - 1))
-#endif
+
 
 /*
  *	Convert page descriptor index to user virtual address
@@ -433,7 +445,8 @@ enum  high_fixed_addresses {
 
 #define INTEL_PTE_INVALID       0
 /* This is conservative, but suffices */
-#define INTEL_PTE_RSVD		((1ULL << 8) | (1ULL << 9) | (1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
+#define INTEL_PTE_RSVD		((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
+
 #define	pa_to_pte(a)		((a) & INTEL_PTE_PFN) /* XXX */
 #define	pte_to_pa(p)		((p) & INTEL_PTE_PFN) /* XXX */
 #define	pte_increment_pa(p)	((p) += INTEL_OFFMASK+1)
@@ -513,26 +526,26 @@ struct md_page {
  */
 
 struct pmap {
+	decl_simple_lock_data(,lock)	/* lock on map */
+	pmap_paddr_t    pm_cr3;         /* physical addr */
+	boolean_t       pm_shared;
         pd_entry_t      *dirbase;        /* page directory pointer */
 #ifdef __i386__
 	pmap_paddr_t    pdirbase;        /* phys. address of dirbase */
+	vm_offset_t     pm_hold;        /* true pdpt zalloc addr */
 #endif
         vm_object_t     pm_obj;         /* object to hold pde's */
-	int		ref_count;	/* reference count */
-        int		nx_enabled;
         task_map_t      pm_task_map;
-	decl_simple_lock_data(,lock)	/* lock on map */
-	struct pmap_statistics	stats;	/* map statistics */
-#ifdef __i386__
-	vm_offset_t     pm_hold;        /* true pdpt zalloc addr */
-#endif
-	pmap_paddr_t    pm_cr3;         /* physical addr */
         pdpt_entry_t    *pm_pdpt;       /* KVA of 3rd level page */
 	pml4_entry_t    *pm_pml4;       /* VKA of top level */
 	vm_object_t     pm_obj_pdpt;    /* holds pdpt pages */
 	vm_object_t     pm_obj_pml4;    /* holds pml4 pages */
-	vm_object_t     pm_obj_top;     /* holds single top level page */
-        boolean_t       pm_shared;
+#define	PMAP_PCID_MAX_CPUS	(48)	/* Must be a multiple of 8 */
+	pcid_t		pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
+	volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
+	struct pmap_statistics	stats;	/* map statistics */
+	int		ref_count;	/* reference count */
+        int		nx_enabled;
 };
 
 
@@ -578,23 +591,30 @@ extern unsigned pmap_memory_region_current;
 #define PMAP_MEMORY_REGIONS_SIZE 128
 
 extern pmap_memory_region_t pmap_memory_regions[];
+#include <i386/pmap_pcid.h>
 
 static inline void
 set_dirbase(pmap_t tpmap, __unused thread_t thread) {
-	current_cpu_datap()->cpu_task_cr3 = tpmap->pm_cr3;
-	current_cpu_datap()->cpu_task_map = tpmap->pm_task_map;
+	int ccpu = cpu_number();
+	cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
+	cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
 #ifndef __i386__
 	/*
 	 * Switch cr3 if necessary
 	 * - unless running with no_shared_cr3 debugging mode
 	 *   and we're not on the kernel's cr3 (after pre-empted copyio)
 	 */
-	if (!no_shared_cr3) {
-		if (get_cr3() != tpmap->pm_cr3)
-			set_cr3(tpmap->pm_cr3);
+	if (__probable(!no_shared_cr3)) {
+		if (get_cr3_base() != tpmap->pm_cr3) {
+			if (pmap_pcid_ncpus) {
+				pmap_pcid_activate(tpmap, ccpu);
+			}
+			else
+				set_cr3_raw(tpmap->pm_cr3);
+		}
 	} else {
-		if (get_cr3() != current_cpu_datap()->cpu_kernel_cr3)
-			set_cr3(current_cpu_datap()->cpu_kernel_cr3);
+		if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
+			set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
 	}
 #endif
 }
@@ -616,7 +636,7 @@ extern addr64_t		(kvtophys)(
 extern void		pmap_expand(
 				pmap_t		pmap,
 				vm_map_offset_t	addr);
-
+#if	!defined(__x86_64__)
 extern pt_entry_t	*pmap_pte(
 				struct pmap	*pmap,
 				vm_map_offset_t	addr);
@@ -632,7 +652,7 @@ extern pd_entry_t	*pmap64_pde(
 extern pdpt_entry_t	*pmap64_pdpt(
 				struct pmap	*pmap,
 				vm_map_offset_t	addr);
-
+#endif
 extern vm_offset_t	pmap_map(
 				vm_offset_t	virt,
 				vm_map_offset_t	start,
@@ -670,7 +690,10 @@ extern void             pmap_commpage64_init(
 					   int count);
 
 #endif
-
+/*
+ * Get cache attributes (as pagetable bits) for the specified phys page
+ */
+extern	unsigned	pmap_get_cache_attributes(ppnum_t);
 #if NCOPY_WINDOWS > 0
 extern struct cpu_pmap	*pmap_cpu_alloc(
 				boolean_t	is_boot_cpu);
@@ -704,10 +727,11 @@ extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offse
 extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
 #endif
 
-extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *);
+extern void pt_fake_zone_init(int);
+extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, 
+			      uint64_t *, int *, int *, int *);
 extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
 
-
 /*
  *	Macros for speed.
  */
@@ -727,8 +751,11 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 #define PMAP_DEACTIVATE_MAP(map, thread)				\
 	if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED)	\
 		pmap_load_kernel_cr3();
+#elif defined(__x86_64__)
+#define PMAP_DEACTIVATE_MAP(map, thread)				\
+	pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
 #else
-#define PMAP_DEACTIVATE_MAP(map, my_cpu)
+#define PMAP_DEACTIVATE_MAP(map, thread)
 #endif
 
 #if   defined(__i386__)
@@ -772,18 +799,16 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 
 #else /* __x86_64__ */
 #define	PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) {			\
-	spl_t		spl;						\
                                                                         \
-        spl = splhigh();						\
+	pmap_assert(ml_get_interrupts_enabled() == FALSE);		\
 	if (old_th->map != new_th->map) {				\
 		PMAP_DEACTIVATE_MAP(old_th->map, old_th);		\
 		PMAP_ACTIVATE_MAP(new_th->map, new_th);			\
 	}								\
-	splx(spl);							\
 }
 #endif /* __i386__ */
 
-#ifdef __i386__
+#if NCOPY_WINDOWS > 0
 #define	PMAP_SWITCH_USER(th, new_map, my_cpu) {				\
 	spl_t		spl;						\
 									\
@@ -792,7 +817,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 	th->map = new_map;						\
 	PMAP_ACTIVATE_MAP(th->map, th);					\
 	splx(spl);							\
-        inval_copy_windows(th);						\
+	inval_copy_windows(th);						\
 }
 #else
 #define	PMAP_SWITCH_USER(th, new_map, my_cpu) {				\
@@ -810,7 +835,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
  * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
  * Marking the current cpu's cr3 active once more involves clearng this bit.
  * Note that valid page tables are page-aligned and so the bottom 12 bits
- * are noramlly zero.
+ * are normally zero, modulo PCID.
  * We can only mark the current cpu active/inactive but we can test any cpu.
  */
 #define CPU_CR3_MARK_INACTIVE()						\
@@ -837,13 +862,13 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
  */
 #if   defined(__x86_64__)
 #define MARK_CPU_IDLE(my_cpu)	{					\
-	int	s = splhigh();						\
+	assert(ml_get_interrupts_enabled() == FALSE);			\
 	CPU_CR3_MARK_INACTIVE();					\
 	__asm__ volatile("mfence");					\
-	splx(s);							\
 }
 #else /* __i386__ native */
 #define MARK_CPU_IDLE(my_cpu)	{					\
+	assert(ml_get_interrupts_enabled() == FALSE);			\
 	/*								\
 	 *	Mark this cpu idle, and remove it from the active set,	\
 	 *	since it is not actively using any pmap.  Signal_cpus	\
@@ -851,20 +876,17 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 	 *	but will queue the update request for when the cpu	\
 	 *	becomes active.						\
 	 */								\
-	int	s = splhigh();						\
 	if (!cpu_mode_is64bit() || no_shared_cr3)			\
 		process_pmap_updates();					\
 	else								\
 		pmap_load_kernel_cr3();					\
 	CPU_CR3_MARK_INACTIVE();					\
 	__asm__ volatile("mfence");					\
-	splx(s);							\
 }
 #endif /* __i386__ */
 
 #define MARK_CPU_ACTIVE(my_cpu) {					\
-									\
-	int	s = splhigh();						\
+	assert(ml_get_interrupts_enabled() == FALSE);			\
 	/*								\
 	 *	If a kernel_pmap update was requested while this cpu	\
 	 *	was idle, process it as if we got the interrupt.	\
@@ -880,7 +902,6 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 									\
 	if (current_cpu_datap()->cpu_tlb_invalid)			\
 	    process_pmap_updates();					\
-	splx(s);							\
 }
 
 #define PMAP_CONTEXT(pmap, thread)
@@ -898,11 +919,13 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 #define	pmap_attribute_cache_sync(addr,size,attr,value) \
 					(KERN_INVALID_ADDRESS)
 
-#define MACHINE_PMAP_IS_EMPTY 1
+#define MACHINE_PMAP_IS_EMPTY	1
 extern boolean_t pmap_is_empty(pmap_t		pmap,
 			       vm_map_offset_t	start,
 			       vm_map_offset_t	end);
 
+#define MACHINE_BOOTSTRAPPTD	1	/* Static bootstrap page-tables */
+
 
 #endif	/* ASSEMBLER */
 
diff --git a/osfmk/i386/pmap_common.c b/osfmk/i386/pmap_common.c
new file mode 100644
index 000000000..d81248dae
--- /dev/null
+++ b/osfmk/i386/pmap_common.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <vm/pmap.h>
+#include <i386/pmap_internal.h>
+
+/*
+ *	Each entry in the pv_head_table is locked by a bit in the
+ *	pv_lock_table.  The lock bits are accessed by the physical
+ *	address of the page they lock.
+ */
+
+char	*pv_lock_table;		/* pointer to array of bits */
+char    *pv_hash_lock_table;
+
+pv_rooted_entry_t	pv_head_table;		/* array of entries, one per
+						 * page */
+uint32_t			pv_hashed_free_count = 0;
+uint32_t			pv_hashed_kern_free_count = 0;
+
+pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
+uint32_t pmap_pagetable_corruption_incidents;
+uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
+uint64_t pmap_pagetable_corruption_interval_abstime;
+thread_call_t 	pmap_pagetable_corruption_log_call;
+static thread_call_data_t 	pmap_pagetable_corruption_log_call_data;
+boolean_t pmap_pagetable_corruption_timeout = FALSE;
+
+volatile uint32_t	mappingrecurse = 0;
+
+uint32_t  pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark, pv_hashed_alloc_chunk, pv_hashed_kern_alloc_chunk;
+
+thread_t mapping_replenish_thread;
+event_t	mapping_replenish_event, pmap_user_pv_throttle_event;
+
+uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
+
+unsigned int pmap_cache_attributes(ppnum_t pn) {
+	if (pmap_get_cache_attributes(pn) & INTEL_PTE_NCACHE)
+	        return (VM_WIMG_IO);
+	else
+		return (VM_WIMG_COPYBACK);
+}
+
+void	pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr) {
+	unsigned int current, template = 0;
+	int pai;
+
+	if (cacheattr & VM_MEM_NOT_CACHEABLE) {
+		if(!(cacheattr & VM_MEM_GUARDED))
+			template |= PHYS_PTA;
+		template |= PHYS_NCACHE;
+	}
+
+	pmap_intr_assert();
+
+	assert((pn != vm_page_fictitious_addr) && (pn != vm_page_guard_addr));
+
+	pai = ppn_to_pai(pn);
+
+	if (!IS_MANAGED_PAGE(pai)) {
+		return;
+	}
+
+	/* override cache attributes for this phys page
+	 * Does not walk through existing mappings to adjust,
+	 * assumes page is disconnected
+	 */
+
+	LOCK_PVH(pai);
+
+	pmap_update_cache_attributes_locked(pn, template);
+
+	current = pmap_phys_attributes[pai] & PHYS_CACHEABILITY_MASK;
+	pmap_phys_attributes[pai] &= ~PHYS_CACHEABILITY_MASK;
+	pmap_phys_attributes[pai] |= template;
+
+	UNLOCK_PVH(pai);
+
+	if ((template & PHYS_NCACHE) && !(current & PHYS_NCACHE)) {
+		pmap_sync_page_attributes_phys(pn);
+	}
+}
+
+unsigned	pmap_get_cache_attributes(ppnum_t pn) {
+	if (last_managed_page == 0)
+		return 0;
+
+	if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) {
+	    return INTEL_PTE_NCACHE;
+	}
+
+	/*
+	 * The cache attributes are read locklessly for efficiency.
+	 */
+	unsigned int attr = pmap_phys_attributes[ppn_to_pai(pn)];
+	unsigned int template = 0;
+	
+	if (attr & PHYS_PTA)
+		template |= INTEL_PTE_PTA;
+	if (attr & PHYS_NCACHE)
+		template |= INTEL_PTE_NCACHE;
+	return template;
+}
+
+
+
+boolean_t
+pmap_is_noencrypt(ppnum_t pn)
+{
+	int		pai;
+
+	pai = ppn_to_pai(pn);
+
+	if (!IS_MANAGED_PAGE(pai))
+		return (TRUE);
+
+	if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
+		return (TRUE);
+
+	return (FALSE);
+}
+
+
+void
+pmap_set_noencrypt(ppnum_t pn)
+{
+	int		pai;
+
+	pai = ppn_to_pai(pn);
+
+	if (IS_MANAGED_PAGE(pai)) {
+		LOCK_PVH(pai);
+
+		pmap_phys_attributes[pai] |= PHYS_NOENCRYPT;
+
+		UNLOCK_PVH(pai);
+	}
+}
+
+
+void
+pmap_clear_noencrypt(ppnum_t pn)
+{
+	int		pai;
+
+	pai = ppn_to_pai(pn);
+
+	if (IS_MANAGED_PAGE(pai)) {
+		LOCK_PVH(pai);
+
+		pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
+
+		UNLOCK_PVH(pai);
+	}
+}
+
+void
+compute_pmap_gc_throttle(void *arg __unused)
+{
+	
+}
+
+
+__private_extern__ void
+pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
+	if (pmap_pagetable_corruption_incidents > 0) {
+		int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
+		(*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
+		for (i = 0; i < e; i++) {
+			(*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
+		}
+	}
+}
+
+static inline void
+pmap_pagetable_corruption_log_setup(void) {
+	if (pmap_pagetable_corruption_log_call == NULL) {
+		nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
+		thread_call_setup(&pmap_pagetable_corruption_log_call_data,
+		    (thread_call_func_t) pmap_pagetable_corruption_msg_log,
+		    (thread_call_param_t) &printf);
+		pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
+	}
+}
+
+void
+mapping_free_prime(void)
+{
+	unsigned		i;
+	pv_hashed_entry_t	pvh_e;
+	pv_hashed_entry_t	pvh_eh;
+	pv_hashed_entry_t	pvh_et;
+	int			pv_cnt;
+
+	/* Scale based on DRAM size */
+	pv_hashed_low_water_mark = MAX(PV_HASHED_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 2000);
+	pv_hashed_low_water_mark = MIN(pv_hashed_low_water_mark, 16000);
+	/* Alterable via sysctl */
+	pv_hashed_kern_low_water_mark = MAX(PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 1000);
+	pv_hashed_kern_low_water_mark = MIN(pv_hashed_kern_low_water_mark, 16000);
+	pv_hashed_kern_alloc_chunk = PV_HASHED_KERN_ALLOC_CHUNK_INITIAL;
+	pv_hashed_alloc_chunk = PV_HASHED_ALLOC_CHUNK_INITIAL;
+
+	pv_cnt = 0;
+	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+
+	for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK_INITIAL); i++) {
+		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+		pvh_eh = pvh_e;
+
+		if (pvh_et == PV_HASHED_ENTRY_NULL)
+		        pvh_et = pvh_e;
+		pv_cnt++;
+	}
+	PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+	pv_cnt = 0;
+	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+	for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK_INITIAL; i++) {
+		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+		pvh_eh = pvh_e;
+
+		if (pvh_et == PV_HASHED_ENTRY_NULL)
+		        pvh_et = pvh_e;
+		pv_cnt++;
+	}
+	PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+}
+
+void mapping_replenish(void);
+
+void mapping_adjust(void) {
+	kern_return_t mres;
+
+	pmap_pagetable_corruption_log_setup();
+
+	mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
+	if (mres != KERN_SUCCESS) {
+		panic("pmap: mapping_replenish_thread creation failed");
+	}
+	thread_deallocate(mapping_replenish_thread);
+}
+
+unsigned pmap_mapping_thread_wakeups;	
+unsigned pmap_kernel_reserve_replenish_stat;
+unsigned pmap_user_reserve_replenish_stat;
+unsigned pmap_kern_reserve_alloc_stat;
+
+void mapping_replenish(void)
+{
+	pv_hashed_entry_t	pvh_e;
+	pv_hashed_entry_t	pvh_eh;
+	pv_hashed_entry_t	pvh_et;
+	int			pv_cnt;
+	unsigned             	i;
+
+	/* We qualify for VM privileges...*/
+	current_thread()->options |= TH_OPT_VMPRIV;
+
+	for (;;) {
+
+		while (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
+			pv_cnt = 0;
+			pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+
+			for (i = 0; i < pv_hashed_kern_alloc_chunk; i++) {
+				pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+				pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+				pvh_eh = pvh_e;
+
+				if (pvh_et == PV_HASHED_ENTRY_NULL)
+					pvh_et = pvh_e;
+				pv_cnt++;
+			}
+			pmap_kernel_reserve_replenish_stat += pv_cnt;
+			PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+		}
+
+		pv_cnt = 0;
+		pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
+
+		if (pv_hashed_free_count < pv_hashed_low_water_mark) {
+			for (i = 0; i < pv_hashed_alloc_chunk; i++) {
+				pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+
+				pvh_e->qlink.next = (queue_entry_t)pvh_eh;
+				pvh_eh = pvh_e;
+
+				if (pvh_et == PV_HASHED_ENTRY_NULL)
+					pvh_et = pvh_e;
+				pv_cnt++;
+			}
+			pmap_user_reserve_replenish_stat += pv_cnt;
+			PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+		}
+/* Wake threads throttled while the kernel reserve was being replenished.
+ */
+		if (pmap_pv_throttled_waiters) {
+			pmap_pv_throttled_waiters = 0;
+			thread_wakeup(&pmap_user_pv_throttle_event);
+		}
+		/* Check if the kernel pool has been depleted since the
+		 * first pass, to reduce refill latency.
+		 */
+		if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark)
+			continue;
+		/* Block sans continuation to avoid yielding kernel stack */
+		assert_wait(&mapping_replenish_event, THREAD_UNINT);
+		mappingrecurse = 0;
+		thread_block(THREAD_CONTINUE_NULL);
+		pmap_mapping_thread_wakeups++;
+	}
+}
+
+/*
+ *	Set specified attribute bits.
+ */
+
+void
+phys_attribute_set(
+	ppnum_t		pn,
+	int		bits)
+{
+	int		pai;
+
+	pmap_intr_assert();
+	assert(pn != vm_page_fictitious_addr);
+	if (pn == vm_page_guard_addr)
+		return;
+
+	pai = ppn_to_pai(pn);
+
+	if (!IS_MANAGED_PAGE(pai)) {
+		/* Not a managed page.  */
+		return;
+	}
+
+	LOCK_PVH(pai);
+	pmap_phys_attributes[pai] |= bits;
+	UNLOCK_PVH(pai);
+}
+
+/*
+ *	Set the modify bit on the specified physical page.
+ */
+
+void
+pmap_set_modify(ppnum_t pn)
+{
+	phys_attribute_set(pn, PHYS_MODIFIED);
+}
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+
+void
+pmap_clear_modify(ppnum_t pn)
+{
+	phys_attribute_clear(pn, PHYS_MODIFIED);
+}
+
+/*
+ *	pmap_is_modified:
+ *
+ *	Return whether or not the specified physical page is modified
+ *	by any physical maps.
+ */
+
+boolean_t
+pmap_is_modified(ppnum_t pn)
+{
+	if (phys_attribute_test(pn, PHYS_MODIFIED))
+		return TRUE;
+	return FALSE;
+}
+
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+
+void
+pmap_clear_reference(ppnum_t pn)
+{
+	phys_attribute_clear(pn, PHYS_REFERENCED);
+}
+
+void
+pmap_set_reference(ppnum_t pn)
+{
+	phys_attribute_set(pn, PHYS_REFERENCED);
+}
+
+/*
+ *	pmap_is_referenced:
+ *
+ *	Return whether or not the specified physical page is referenced
+ *	by any physical maps.
+ */
+
+boolean_t
+pmap_is_referenced(ppnum_t pn)
+{
+        if (phys_attribute_test(pn, PHYS_REFERENCED))
+		return TRUE;
+	return FALSE;
+}
+
+
+/*
+ * pmap_get_refmod(phys)
+ *  returns the referenced and modified bits of the specified
+ *  physical page.
+ */
+unsigned int
+pmap_get_refmod(ppnum_t pn)
+{
+        int		refmod;
+	unsigned int	retval = 0;
+
+	refmod = phys_attribute_test(pn, PHYS_MODIFIED | PHYS_REFERENCED);
+
+	if (refmod & PHYS_MODIFIED)
+	        retval |= VM_MEM_MODIFIED;
+	if (refmod & PHYS_REFERENCED)
+	        retval |= VM_MEM_REFERENCED;
+
+	return (retval);
+}
+
+/*
+ * pmap_clear_refmod(phys, mask)
+ *  clears the referenced and modified bits as specified by the mask
+ *  of the specified physical page.
+ */
+void
+pmap_clear_refmod(ppnum_t pn, unsigned int mask)
+{
+	unsigned int  x86Mask;
+
+	x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
+	            | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
+	phys_attribute_clear(pn, x86Mask);
+}
+
+/*
+ *	Routine:
+ *		pmap_disconnect
+ *
+ *	Function:
+ *		Disconnect all mappings for this page and return reference and change status
+ *		in generic format.
+ *
+ */
+unsigned int
+pmap_disconnect(ppnum_t pa)
+{
+	unsigned refmod, vmrefmod = 0;
+
+	pmap_page_protect(pa, 0);		/* disconnect the page */
+
+	pmap_assert(pa != vm_page_fictitious_addr);
+	if ((pa == vm_page_guard_addr) || !IS_MANAGED_PAGE(pa))
+		return 0;
+	refmod = pmap_phys_attributes[pa] & (PHYS_MODIFIED | PHYS_REFERENCED);
+	
+	if (refmod & PHYS_MODIFIED)
+	        vmrefmod |= VM_MEM_MODIFIED;
+	if (refmod & PHYS_REFERENCED)
+	        vmrefmod |= VM_MEM_REFERENCED;
+
+	return vmrefmod;
+}
diff --git a/osfmk/i386/pmap_internal.h b/osfmk/i386/pmap_internal.h
index 63bebc3ab..37757f191 100644
--- a/osfmk/i386/pmap_internal.h
+++ b/osfmk/i386/pmap_internal.h
@@ -28,7 +28,6 @@
 
 #include <vm/pmap.h>
 #include <sys/kdebug.h>
-#include <kern/debug.h>
 
 #ifdef MACH_KERNEL_PRIVATE
 
@@ -44,9 +43,8 @@
 	simple_unlock(&(pmap)->lock);		\
 }
 
-
 #define PMAP_UPDATE_TLBS(pmap, s, e)					\
-	pmap_flush_tlbs(pmap)
+	pmap_flush_tlbs(pmap, s, e)
 
 #define	iswired(pte)	((pte) & INTEL_PTE_WIRED)
 
@@ -60,6 +58,9 @@ extern	boolean_t	pmap_trace;
 #define PMAP_TRACE(x,a,b,c,d,e)	KERNEL_DEBUG(x,a,b,c,d,e)
 #endif /* PMAP_TRACES */
 
+#define PMAP_TRACE_CONSTANT(x,a,b,c,d,e)				\
+	KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e);				\
+
 void		pmap_expand_pml4(
 			pmap_t		map,
 			vm_map_offset_t	v);
@@ -67,12 +68,26 @@ void		pmap_expand_pml4(
 void		pmap_expand_pdpt(
 			pmap_t		map,
 			vm_map_offset_t	v);
-extern void 	pmap_flush_tlbs(pmap_t pmap);
 
-#if	defined(__x86_64__)
-extern const boolean_t cpu_64bit;
-#else
+void		phys_attribute_set(
+			ppnum_t		phys,
+			int		bits);
+
+void		pmap_set_reference(
+			ppnum_t pn);
+
+boolean_t	phys_page_exists(
+			ppnum_t pn);
+
+void pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t);
+
+void
+pmap_update_cache_attributes_locked(ppnum_t, unsigned);
+
+#if CONFIG_YONAH
 extern boolean_t cpu_64bit;
+#else
+extern const boolean_t cpu_64bit;
 #endif
 
 /*
@@ -99,8 +114,8 @@ extern boolean_t cpu_64bit;
 PV HASHING Changes - JK 1/2007
 
 Pve's establish physical to virtual mappings.  These are used for aliasing of a 
-physical page to (potentially many) virtual addresses within pmaps. In the previous 
-implementation the structure of the pv_entries (each 16 bytes in size) was
+physical page to (potentially many) virtual addresses within pmaps. In the
+previous implementation the structure of the pv_entries (each 16 bytes in size) was
 
 typedef struct pv_entry {
     struct pv_entry_t    next;
@@ -108,20 +123,23 @@ typedef struct pv_entry {
     vm_map_offset_t   va;
 } *pv_entry_t;
 
-An initial array of these is created at boot time, one per physical page of memory, 
-indexed by the physical page number. Additionally, a pool of entries is created from a 
-pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
-Originally, we kept this pool around because the code in pmap_enter() was unable to 
-block if it needed an entry and none were available - we'd panic.  Some time ago I 
-restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
-a pv structure and restart, removing a panic from the code (in the case of the kernel 
-pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
-kernel pmaps).  The pool has not been removed since there is a large performance gain 
-keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
-pv array for that ppn (off the next pointer).  These pve's are accessed for several 
-operations, one of them being address space teardown.  In that case, we basically do this
+An initial array of these is created at boot time, one per physical page of
+memory, indexed by the physical page number. Additionally, a pool of entries
+is created from a pv_zone to be used as needed by pmap_enter() when it is
+creating new mappings.  Originally, we kept this pool around because the code
+in pmap_enter() was unable to block if it needed an entry and none were
+available - we'd panic.  Some time ago I restructured the pmap_enter() code
+so that for user pmaps it can block while zalloc'ing a pv structure and restart,
+removing a panic from the code (in the case of the kernel pmap we cannot block
+and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
+The pool has not been removed since there is a large performance gain keeping
+freed pv's around for reuse and not suffering the overhead of zalloc for every
+new pv we need.
+
+As pmap_enter() created new mappings it linked the new pve's for them off the
+fixed pv array for that ppn (off the next pointer).  These pve's are accessed
+for several operations, one of them being address space teardown. In that case,
+we basically do this
 
 	for (every page/pte in the space) {
 		calc pve_ptr from the ppn in the pte
@@ -133,124 +151,197 @@ operations, one of them being address space teardown.  In that case, we basicall
 		}
 	}
 
-The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
-and one or all terminate. The list hanging off each pv array entry could have thousands of 
-entries.  We were continuously linearly searching each of these lists as we stepped through 
-the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
-miss for each node,  and interrupt disabling for MP issues the system became completely 
-unresponsive for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
-for operations like pmap_page_protect and finding and modifying/removing a single pve as 
-part of pmap_enter processing) has led to modifying the pve structures and databases.
-
-There are now two types of pve structures.  A "rooted" structure which is basically the 
-original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
-hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
-minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
-pages in the system are not aliased and hence represented by a single pv entry I've kept 
-the rooted entry size as small as possible because there is one of these dedicated for 
-every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
-link and the ppn entry needed for matching while running the hash list to find the entry we 
-are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
-will pay the extra memory price. Both structures have the same first three fields allowing 
-some simplification in the code.
+The problem arose when we were running, say 8000 (or even 2000) apache or
+other processes and one or all terminate. The list hanging off each pv array
+entry could have thousands of entries.  We were continuously linearly searching
+each of these lists as we stepped through the address space we were tearing
+down.  Because of the locks we hold, likely taking a cache miss for each node,
+and interrupt disabling for MP issues the system became completely unresponsive
+for many seconds while we did this.
+
+Realizing that pve's are accessed in two distinct ways (linearly running the
+list by ppn for operations like pmap_page_protect and finding and
+modifying/removing a single pve as part of pmap_enter processing) has led to
+modifying the pve structures and databases.
+
+There are now two types of pve structures.  A "rooted" structure which is
+basically the original structure accessed in an array by ppn, and a ''hashed''
+structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
+designed with the two goals of minimizing wired memory and making the lookup of
+a ppn faster.  Since a vast majority of pages in the system are not aliased
+and hence represented by a single pv entry I've kept the rooted entry size as
+small as possible because there is one of these dedicated for every physical
+page of memory.  The hashed pve's are larger due to the addition of the hash
+link and the ppn entry needed for matching while running the hash list to find
+the entry we are looking for.  This way, only systems that have lots of
+aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
+structures have the same first three fields allowing some simplification in
+the code.
 
 They have these shapes
 
 typedef struct pv_rooted_entry {
-        queue_head_t qlink;
-        vm_map_offset_t va;
-        pmap_t          pmap;
+	queue_head_t		qlink;
+        vm_map_offset_t		va;
+	pmap_t			pmap;
 } *pv_rooted_entry_t;
 
 
 typedef struct pv_hashed_entry {
-  queue_head_t qlink;
-  vm_map_offset_t va;
-  pmap_t        pmap;
-  ppnum_t ppn;
-  struct pv_hashed_entry *nexth;
+	queue_head_t		qlink;
+	vm_map_offset_t		va;
+	pmap_t			pmap;
+	ppnum_t 		ppn;
+	struct pv_hashed_entry *nexth;
 } *pv_hashed_entry_t;
 
-The main flow difference is that the code is now aware of the rooted entry and the hashed 
-entries.  Code that runs the pv list still starts with the rooted entry and then continues 
-down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
-checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
-lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
+The main flow difference is that the code is now aware of the rooted entry and
+the hashed entries.  Code that runs the pv list still starts with the rooted
+entry and then continues down the qlink onto the hashed entries.  Code that is
+looking up a specific pv entry first checks the rooted entry and then hashes
+and runs the hash list for the match. The hash list lengths are much smaller
+than the original pv lists that contained all aliases for the specific ppn.
 
 */
 
-typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
-        queue_head_t qlink;
-	vm_map_offset_t	va;		/* virtual address for mapping */
-	pmap_t		pmap;		/* pmap where mapping lies */
+typedef struct pv_rooted_entry {
+	/* first three entries must match pv_hashed_entry_t */
+        queue_head_t		qlink;
+	vm_map_offset_t		va;	/* virtual address for mapping */
+	pmap_t			pmap;	/* pmap where mapping lies */
 } *pv_rooted_entry_t;
 
 #define PV_ROOTED_ENTRY_NULL	((pv_rooted_entry_t) 0)
 
-
-typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
-	queue_head_t qlink;
-	vm_map_offset_t va;
-	pmap_t        pmap;
-	ppnum_t ppn;
-	struct pv_hashed_entry *nexth;
+typedef struct pv_hashed_entry {
+	/* first three entries must match pv_rooted_entry_t */
+	queue_head_t		qlink;
+	vm_map_offset_t		va;
+	pmap_t			pmap;
+	ppnum_t			ppn;
+	struct pv_hashed_entry	*nexth;
 } *pv_hashed_entry_t;
 
 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
 
-/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
+//#define PV_DEBUG 1   /* uncomment to enable some PV debugging code */
 #ifdef PV_DEBUG
 #define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
 #else
-#define CHK_NPVHASH()
+#define CHK_NPVHASH(x)
 #endif
 
 #define NPVHASH 4095   /* MUST BE 2^N - 1 */
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 400
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 200
-
-#define	PV_HASHED_ALLOC(pvh_e) { \
-	simple_lock(&pv_hashed_free_list_lock); \
-	if ((pvh_e = pv_hashed_free_list) != 0) { \
-	  pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;	\
-	  pv_hashed_free_count--;					\
-	  if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK)		\
-		  if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-			  thread_call_enter(mapping_adjust_call);	\
-	}								\
-	simple_unlock(&pv_hashed_free_list_lock); \
+#define PV_HASHED_LOW_WATER_MARK_DEFAULT 5000
+#define PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT 2000
+#define PV_HASHED_ALLOC_CHUNK_INITIAL 2000
+#define PV_HASHED_KERN_ALLOC_CHUNK_INITIAL 200
+
+extern volatile uint32_t	mappingrecurse;
+extern uint32_t  pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark;
+
+/*
+ * PV hash locking
+ */
+
+#define LOCK_PV_HASH(hash)	lock_hash_hash(hash)
+#define UNLOCK_PV_HASH(hash)	unlock_hash_hash(hash)
+extern uint32_t npvhash;
+extern pv_hashed_entry_t	*pv_hash_table;  /* hash lists */
+extern pv_hashed_entry_t	pv_hashed_free_list;
+extern pv_hashed_entry_t	pv_hashed_kern_free_list;
+decl_simple_lock_data(extern, pv_hashed_free_list_lock)
+decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
+decl_simple_lock_data(extern, pv_hash_table_lock)
+
+extern zone_t		pv_hashed_list_zone;	/* zone of pv_hashed_entry
+						 * structures */
+
+extern uint32_t		pv_hashed_free_count;
+extern uint32_t		pv_hashed_kern_free_count;
+/*
+ *	Each entry in the pv_head_table is locked by a bit in the
+ *	pv_lock_table.  The lock bits are accessed by the address of
+ *	the frame they lock.
+ */
+#define pv_lock_table_size(n)	(((n)+BYTE_SIZE-1)/BYTE_SIZE)
+#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+extern char		*pv_lock_table;		/* pointer to array of bits */
+extern char		*pv_hash_lock_table;
+extern pv_rooted_entry_t pv_head_table;	/* array of entries, one per page */
+
+extern event_t mapping_replenish_event;
+
+static inline void	PV_HASHED_ALLOC(pv_hashed_entry_t *pvh_ep) {
+
+	simple_lock(&pv_hashed_free_list_lock);
+	/* If the kernel reserved pool is low, let non-kernel mappings allocate
+	 * synchronously, possibly subject to a throttle.
+	 */
+	if ((pv_hashed_kern_free_count >= pv_hashed_kern_low_water_mark) &&
+	    (*pvh_ep = pv_hashed_free_list) != 0) {
+		pv_hashed_free_list = (pv_hashed_entry_t)(*pvh_ep)->qlink.next;
+		pv_hashed_free_count--;
+	}
+
+	simple_unlock(&pv_hashed_free_list_lock);
+
+	if (pv_hashed_free_count < pv_hashed_low_water_mark) {
+		if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
+			thread_wakeup(&mapping_replenish_event);
+	}
 }
 
-#define	PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {	\
-	simple_lock(&pv_hashed_free_list_lock); \
-	pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;	\
-	pv_hashed_free_list = pvh_eh; \
-	pv_hashed_free_count += pv_cnt;		  \
-	simple_unlock(&pv_hashed_free_list_lock); \
+static inline void	PV_HASHED_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
+	simple_lock(&pv_hashed_free_list_lock);
+	pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;
+	pv_hashed_free_list = pvh_eh;
+	pv_hashed_free_count += pv_cnt;
+	simple_unlock(&pv_hashed_free_list_lock);
 }
 
-#define	PV_HASHED_KERN_ALLOC(pvh_e) { \
-	simple_lock(&pv_hashed_kern_free_list_lock); \
-	if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
-	  pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;	\
-	  pv_hashed_kern_free_count--;					\
-	  if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
-		  if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
-			  thread_call_enter(mapping_adjust_call);	\
-	}								\
-	simple_unlock(&pv_hashed_kern_free_list_lock); \
+extern unsigned pmap_kern_reserve_alloc_stat;
+
+static inline void	PV_HASHED_KERN_ALLOC(pv_hashed_entry_t *pvh_e) {
+	simple_lock(&pv_hashed_kern_free_list_lock);
+
+	if ((*pvh_e = pv_hashed_kern_free_list) != 0) {
+		pv_hashed_kern_free_list = (pv_hashed_entry_t)(*pvh_e)->qlink.next;
+		pv_hashed_kern_free_count--;
+		pmap_kern_reserve_alloc_stat++;
+	}
+
+	simple_unlock(&pv_hashed_kern_free_list_lock);
+
+	if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
+		if (!mappingrecurse && hw_compare_and_store(0,1, &mappingrecurse))
+			thread_wakeup(&mapping_replenish_event);
+	}
 }
 
-#define	PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {	 \
-	simple_lock(&pv_hashed_kern_free_list_lock); \
-	pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;	\
-	pv_hashed_kern_free_list = pvh_eh; \
-	pv_hashed_kern_free_count += pv_cnt;	       \
-	simple_unlock(&pv_hashed_kern_free_list_lock); \
+static inline void	PV_HASHED_KERN_FREE_LIST(pv_hashed_entry_t pvh_eh, pv_hashed_entry_t pvh_et, int pv_cnt) {
+	simple_lock(&pv_hashed_kern_free_list_lock);
+	pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;
+	pv_hashed_kern_free_list = pvh_eh;
+	pv_hashed_kern_free_count += pv_cnt;
+	simple_unlock(&pv_hashed_kern_free_list_lock);
+}
+
+extern uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
+extern event_t pmap_user_pv_throttle_event;
+
+static inline void pmap_pv_throttle(__unused pmap_t p) {
+	pmap_assert(p != kernel_pmap);
+	/* Apply throttle on non-kernel mappings */
+	if (pv_hashed_kern_free_count < (pv_hashed_kern_low_water_mark / 2)) {
+		pmap_pv_throttle_stat++;
+		/* This doesn't need to be strictly accurate, merely a hint
+		 * to eliminate the timeout when the reserve is replenished.
+		 */
+		pmap_pv_throttled_waiters++;
+		assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
+		thread_block(THREAD_CONTINUE_NULL);
+	}
 }
 
 /*
@@ -264,7 +355,6 @@ typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted
 #define lock_pvh_pai(pai)	bit_lock(pai, (void *)pv_lock_table)
 #define unlock_pvh_pai(pai)	bit_unlock(pai, (void *)pv_lock_table)
 #define pvhash(idx)		(&pv_hash_table[idx])
-
 #define lock_hash_hash(hash)	bit_lock(hash, (void *)pv_hash_lock_table)
 #define unlock_hash_hash(hash)	bit_unlock(hash, (void *)pv_hash_lock_table)
 
@@ -279,6 +369,9 @@ typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted
 #define	PHYS_REFERENCED	INTEL_PTE_REF	/* page referenced */
 #define PHYS_MANAGED	INTEL_PTE_VALID /* page is managed */
 #define PHYS_NOENCRYPT	INTEL_PTE_USER	/* no need to encrypt this page in the hibernation image */
+#define	PHYS_NCACHE	INTEL_PTE_NCACHE
+#define	PHYS_PTA	INTEL_PTE_PTA
+#define	PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE)
 
 /*
  *	Amount of virtual memory mapped by one
@@ -325,31 +418,7 @@ typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted
 	unlock_pvh_pai(index);		\
 	mp_enable_preemption();		\
 }
-/*
- * PV hash locking
- */
-
-#define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
-#define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
-extern uint32_t npvhash;
-extern pv_hashed_entry_t     	*pv_hash_table;  /* hash lists */
-extern pv_hashed_entry_t	pv_hashed_free_list;
-extern pv_hashed_entry_t	pv_hashed_kern_free_list;
-decl_simple_lock_data(extern, pv_hashed_free_list_lock)
-decl_simple_lock_data(extern, pv_hashed_kern_free_list_lock)
-decl_simple_lock_data(extern, pv_hash_table_lock)
 
-extern zone_t		pv_hashed_list_zone;	/* zone of pv_hashed_entry structures */
-
-extern int			pv_hashed_free_count;
-extern int			pv_hashed_kern_free_count;
-#define pv_lock_table_size(n)	(((n)+BYTE_SIZE-1)/BYTE_SIZE)
-#define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
-extern char	*pv_lock_table;		/* pointer to array of bits */
-
-extern char    *pv_hash_lock_table;
-extern pv_rooted_entry_t	pv_head_table;		/* array of entries, one
-							 * per page */
 extern uint64_t pde_mapped_size;
 
 extern char		*pmap_phys_attributes;
@@ -379,23 +448,23 @@ extern uint64_t max_preemption_latency_tsc;
 #define pmap_intr_assert()
 #endif
 
-extern int		nx_enabled;
-extern unsigned int	inuse_ptepages_count;
+extern int 		nx_enabled;
+extern unsigned int    inuse_ptepages_count;
 
 static inline uint32_t
 pvhashidx(pmap_t pmap, vm_map_offset_t va)
 {
 	return ((uint32_t)(uintptr_t)pmap ^
-		((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
+		((uint32_t)(va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
 	       npvhash;
 }
 
+
 /*
  * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
  * properly deals with the anchor.
  * must be called with the hash locked, does not unlock it
  */
-
 static inline void 
 pmap_pvh_unlink(pv_hashed_entry_t pvh)
 {
@@ -457,7 +526,7 @@ pv_hash_remove(pv_hashed_entry_t pvh_e)
 	remque(&pvh_e->qlink);
 	pmap_pvh_unlink(pvh_e);
 	UNLOCK_PV_HASH(pvhash_idx);
-}
+} 
 
 static inline boolean_t popcnt1(uint64_t distance) {
 	return ((distance & (distance - 1)) == 0);
@@ -639,16 +708,16 @@ pmap_cpc_exit:
 	pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
 	return action;
 }
+
 /*
  * Remove pv list entry.
  * Called with pv_head_table entry locked.
  * Returns pv entry to be freed (or NULL).
  */
-
 static inline __attribute__((always_inline)) pv_hashed_entry_t
-pmap_pv_remove( pmap_t		pmap,
-		vm_map_offset_t	vaddr,
-		ppnum_t		*ppnp,
+pmap_pv_remove(pmap_t		pmap,
+	       vm_map_offset_t	vaddr,
+    		ppnum_t		*ppnp,
 		pt_entry_t	*pte) 
 {
 	pv_hashed_entry_t       pvh_e;
@@ -721,7 +790,8 @@ pmap_pv_remove_retry:
 		LOCK_PV_HASH(pvhash_idx);
 		pprevh = pvhash(pvhash_idx);
 		if (PV_HASHED_ENTRY_NULL == *pprevh) {
-			panic("pmap_pv_remove(%p,0x%llx,0x%x): empty hash", pmap, vaddr, ppn);
+			panic("pmap_pv_remove(%p,0x%llx,0x%x): empty hash",
+			      pmap, vaddr, ppn);
 		}
 		pvh_e = *pprevh;
 		pmap_pv_hashlist_walks++;
@@ -735,6 +805,7 @@ pmap_pv_remove_retry:
 			pprevh = &pvh_e->nexth;
 			pvh_e = pvh_e->nexth;
 		}
+
 		if (PV_HASHED_ENTRY_NULL == pvh_e) {
 			pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
 
@@ -755,6 +826,7 @@ pmap_pv_remove_retry:
 				}
 			}
 		}
+
 		pmap_pv_hashlist_cnts += pv_cnt;
 		if (pmap_pv_hashlist_max < pv_cnt)
 			pmap_pv_hashlist_max = pv_cnt;
@@ -766,4 +838,161 @@ pmap_pv_remove_exit:
 	return pvh_e;
 }
 
+
+extern int 	pt_fake_zone_index;
+static inline void
+PMAP_ZINFO_PALLOC(vm_size_t bytes)
+{
+	thread_t thr = current_thread();
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thr->tkm_private.alloc += bytes;
+	if (pt_fake_zone_index != -1 && 
+	    (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		OSAddAtomic64(bytes, (int64_t *)&zinfo[pt_fake_zone_index].alloc);
+}
+
+static inline void
+PMAP_ZINFO_PFREE(vm_size_t bytes)
+{
+	thread_t thr = current_thread();
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thr->tkm_private.free += bytes;
+	if (pt_fake_zone_index != -1 && 
+	    (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		OSAddAtomic64(bytes, (int64_t *)&zinfo[pt_fake_zone_index].free);
+}
+
+extern boolean_t	pmap_initialized;/* Has pmap_init completed? */
+#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
+
+// XXX
+#define HIGH_MEM_BASE  ((uint32_t)( -NBPDE) )  /* shared gdt etc seg addr */ /* XXX64 ?? */
+// XXX
+
+
+int		phys_attribute_test(
+			ppnum_t		phys,
+			int		bits);
+void		phys_attribute_clear(
+			ppnum_t		phys,
+			int		bits);
+
+//#define PCID_DEBUG 1
+#if	PCID_DEBUG
+#define pmap_pcid_log(fmt, args...)					\
+	do {								\
+		kprintf(fmt, ##args);					\
+		printf(fmt, ##args);					\
+	} while(0)
+#else
+#define pmap_pcid_log(fmt, args...)
+#endif
+void	pmap_pcid_configure(void);
+
+#if	defined(__x86_64__)
+/*
+ * The single pml4 page per pmap is allocated at pmap create time and exists
+ * for the duration of the pmap. we allocate this page in kernel vm.
+ * this returns the address of the requested pml4 entry in the top level page.
+ */
+static inline
+pml4_entry_t *
+pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
+{
+#if	PMAP_ASSERT
+	return PHYSMAP_PTOV(&((pml4_entry_t *)pmap->pm_cr3)[(vaddr >> PML4SHIFT) & (NPML4PG-1)]);
+#else
+	return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
+#endif
+}
+
+/*
+ * Returns address of requested PDPT entry in the physmap.
+ */
+static inline pdpt_entry_t *
+pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
+{
+	pml4_entry_t	newpf;
+	pml4_entry_t	*pml4;
+
+	assert(pmap);
+	if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
+	    (vaddr < 0xFFFF800000000000ULL)) {
+		return (0);
+	}
+
+	pml4 = pmap64_pml4(pmap, vaddr);
+	if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
+		newpf = *pml4 & PG_FRAME;
+		return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
+			[(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
+	}
+	return (NULL);
+}
+/*
+ * Returns the address of the requested PDE entry in the physmap.
+ */
+static inline pd_entry_t *
+pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
+{
+	pdpt_entry_t	newpf;
+	pdpt_entry_t	*pdpt;
+
+	assert(pmap);
+	if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
+	    (vaddr < 0xFFFF800000000000ULL)) {
+		return (0);
+	}
+
+	pdpt = pmap64_pdpt(pmap, vaddr);
+
+	if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
+		newpf = *pdpt & PG_FRAME;
+		return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
+			[(vaddr >> PDSHIFT) & (NPDPG-1)];
+	}
+	return (NULL);
+}
+
+static inline pd_entry_t     *
+pmap_pde(pmap_t m, vm_map_offset_t v)
+{
+	pd_entry_t     *pde;
+
+	assert(m);
+	pde = pmap64_pde(m, v);
+
+	return pde;
+}
+
+
+/*
+ * return address of mapped pte for vaddr va in pmap pmap.
+ *
+ * In case the pde maps a superpage, return the pde, which, in this case
+ * is the actual page table entry.
+ */
+static inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
+{
+	pd_entry_t	*pde;
+	pd_entry_t	newpf;
+
+	assert(pmap);
+	pde = pmap_pde(pmap, vaddr);
+
+	if (pde && ((*pde & INTEL_PTE_VALID))) {
+		if (*pde & INTEL_PTE_PS) 
+			return pde;
+		newpf = *pde & PG_FRAME;
+		return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
+			[i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
+	}
+	return (NULL);
+}
+#endif
 #endif /* MACH_KERNEL_PRIVATE */
diff --git a/osfmk/i386/pmap_pcid.h b/osfmk/i386/pmap_pcid.h
new file mode 100644
index 000000000..0e16f3e2d
--- /dev/null
+++ b/osfmk/i386/pmap_pcid.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef	_PMAP_PCID_
+#define _PMAP_PCID_	1
+#if defined(__x86_64__)
+void pmap_pcid_initialize(pmap_t);
+void pmap_pcid_initialize_kernel(pmap_t);
+pcid_t	pmap_pcid_allocate_pcid(int);
+void	pmap_pcid_deallocate_pcid(int, pmap_t);
+void	pmap_destroy_pcid_sync_action(void *);
+void	pmap_destroy_pcid_sync(pmap_t);
+void	pmap_pcid_lazy_flush(pmap_t);
+void	pmap_pcid_activate(pmap_t, int);
+pcid_t	pcid_for_pmap_cpu_tuple(pmap_t, int);
+
+#define PMAP_INVALID ((pmap_t)0xDEAD7347)
+#define PMAP_PCID_INVALID_PCID	(0xDEAD)
+#define	PMAP_PCID_MAX_REFCOUNT (0xF0)
+#define	PMAP_PCID_MIN_PCID (1)
+
+extern uint32_t pmap_pcid_ncpus;
+
+static inline void
+tlb_flush_global(void) {
+	uintptr_t cr4 = get_cr4();
+	pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
+	pmap_assert2(((cr4 & CR4_PGE) || ml_at_interrupt_context()), "CR4: 0x%lx", cr4);
+	/*
+	 * We are, unfortunately, forced to rely on this expensive
+	 * read-modify-write-write scheme due to the inadequate
+	 * TLB invalidation ISA. The read is necessary as
+	 * the kernel does not "own" the contents of CR4, the VMX
+	 * feature in particular. It may be possible to
+	 * avoid a global flush and instead track a generation
+	 * count of kernel invalidations, but that scheme
+	 * has its disadvantages as well.
+	 */
+	set_cr4(cr4 & ~CR4_PGE);
+	set_cr4(cr4 | CR4_PGE); 
+	return;
+}
+
+static inline void pmap_pcid_invalidate_all_cpus(pmap_t tpmap) {
+	unsigned i;
+
+	pmap_assert((sizeof(tpmap->pmap_pcid_coherency_vector) >= real_ncpus) && (!(sizeof(tpmap->pmap_pcid_coherency_vector) & 7)));
+
+	for (i = 0; i < real_ncpus; i+=8) {
+          *(uint64_t *)(uintptr_t)&tpmap->pmap_pcid_coherency_vector[i] = (~0ULL);
+        }
+}
+
+static inline void pmap_pcid_validate_current(void) {
+	int	ccpu = cpu_number();
+	volatile uint8_t *cptr = cpu_datap(ccpu)->cpu_pmap_pcid_coherentp;
+#ifdef	PMAP_MODULE
+	pmap_assert(cptr == &(current_thread()->map->pmap->pmap_pcid_coherency_vector[ccpu]));
+#endif
+	if (cptr) {
+		*cptr = 0;
+	}
+
+}
+
+static inline void pmap_pcid_invalidate_cpu(pmap_t tpmap, int ccpu) {
+	tpmap->pmap_pcid_coherency_vector[ccpu] = 0xFF;
+}
+
+static inline void pmap_pcid_validate_cpu(pmap_t tpmap, int ccpu) {
+	tpmap->pmap_pcid_coherency_vector[ccpu] = 0;
+}
+#endif /* x86_64 */
+#endif
diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c
index a8c3423b4..d7e63d6b0 100644
--- a/osfmk/i386/pmap_x86_common.c
+++ b/osfmk/i386/pmap_x86_common.c
@@ -29,27 +29,12 @@
 #include <vm/vm_map.h>
 #include <i386/pmap_internal.h>
 
-
 void		pmap_remove_range(
 			pmap_t		pmap,
 			vm_map_offset_t	va,
 			pt_entry_t	*spte,
 			pt_entry_t	*epte);
 
-pv_rooted_entry_t	pv_head_table;		/* array of entries, one per
-						 * page */
-thread_call_t 		mapping_adjust_call;
-static thread_call_data_t mapping_adjust_call_data;
-uint32_t		mappingrecurse = 0;
-
-pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
-uint32_t pmap_pagetable_corruption_incidents;
-uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
-uint64_t pmap_pagetable_corruption_interval_abstime;
-thread_call_t 	pmap_pagetable_corruption_log_call;
-static thread_call_data_t 	pmap_pagetable_corruption_log_call_data;
-boolean_t pmap_pagetable_corruption_timeout = FALSE;
-
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  * on a NBPDE boundary.
@@ -103,8 +88,8 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 		panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
 
 	PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
-	    (int) grand, (int) subord,
-	    (int) (va_start>>32), (int) va_start, 0);
+	(uintptr_t) grand, (uintptr_t) subord,
+	    (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
 
 	nvaddr = (vm_map_offset_t)nstart;
 	num_pde = size >> PDESHIFT;
@@ -216,8 +201,8 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
 	uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 
 	PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
-	    (int) grand, 
-	    (int) (vaddr>>32), (int) vaddr, 0, 0);
+	    (uintptr_t) grand, 
+	    (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
 
 	if ((size & (pmap_nesting_size_min-1)) ||
 	    (vaddr & (pmap_nesting_size_min-1))) {
@@ -337,6 +322,67 @@ pfp_exit:
         return ppn;
 }
 
+/*
+ * Update cache attributes for all extant managed mappings.
+ * Assumes PV for this page is locked, and that the page
+ * is managed.
+ */
+
+void
+pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
+	pv_rooted_entry_t	pv_h, pv_e;
+	pv_hashed_entry_t       pvh_e, nexth;
+	vm_map_offset_t vaddr;
+	pmap_t	pmap;
+	pt_entry_t	*ptep;
+	
+	assert(IS_MANAGED_PAGE(pn));
+
+	pv_h = pai_to_pvh(pn);
+	/* TODO: translate the PHYS_* bits to PTE bits, while they're
+	 * currently identical, they may not remain so
+	 * Potential optimization (here and in page_protect),
+	 * parallel shootdowns, check for redundant
+	 * attribute modifications.
+	 */
+	
+	/*
+	 * Alter attributes on all mappings
+	 */
+	if (pv_h->pmap != PMAP_NULL) {
+		pv_e = pv_h;
+		pvh_e = (pv_hashed_entry_t)pv_e;
+
+		do {
+			pmap = pv_e->pmap;
+			vaddr = pv_e->va;
+			ptep = pmap_pte(pmap, vaddr);
+		
+			if (0 == ptep)
+				panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
+
+			nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
+			pmap_update_pte(ptep, *ptep, (*ptep & ~PHYS_CACHEABILITY_MASK) | attributes);
+			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+			pvh_e = nexth;
+		} while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
+	}
+}
+
+void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
+	assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+
+	if (dofilter) {
+		CPU_CR3_MARK_INACTIVE();
+	} else {
+		CPU_CR3_MARK_ACTIVE();
+		__asm__ volatile("mfence");
+		if (current_cpu_datap()->cpu_tlb_invalid)
+			process_pmap_updates();
+	}
+}
+
+
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
@@ -444,7 +490,6 @@ Retry:
 		*pte = 0;
 	}
 
-
 	old_pa = pte_to_pa(*pte);
 	pai = pa_index(old_pa);
 	old_pa_locked = FALSE;
@@ -469,12 +514,15 @@ Retry:
 	 *	at this address.
 	 */
 	if (old_pa == pa) {
+		pt_entry_t old_attributes =
+		    *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
 
 		/*
 	         *	May be changing its wired attribute or protection
 	         */
 
 		template = pa_to_pte(pa) | INTEL_PTE_VALID;
+		template |= pmap_get_cache_attributes(pa_index(pa));
 
 		if (VM_MEM_NOT_CACHEABLE ==
 		    (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
@@ -492,11 +540,11 @@ Retry:
 
 		if (wired) {
 			template |= INTEL_PTE_WIRED;
-			if (!iswired(*pte))
+			if (!iswired(old_attributes))
 				OSAddAtomic(+1,
 					&pmap->stats.wired_count);
 		} else {
-			if (iswired(*pte)) {
+			if (iswired(old_attributes)) {
 				assert(pmap->stats.wired_count >= 1);
 				OSAddAtomic(-1,
 					&pmap->stats.wired_count);
@@ -504,6 +552,9 @@ Retry:
 		}
 		if (superpage)		/* this path can not be used */
 			template |= INTEL_PTE_PS;	/* to change the page size! */
+		/* Determine delta, PV locked */
+		need_tlbflush =
+		    ((old_attributes ^ template) != INTEL_PTE_WIRED);
 
 		/* store modified PTE and preserve RC bits */
 		pmap_update_pte(pte, *pte,
@@ -512,7 +563,6 @@ Retry:
 			UNLOCK_PVH(pai);
 			old_pa_locked = FALSE;
 		}
-		need_tlbflush = TRUE;
 		goto Done;
 	}
 
@@ -548,19 +598,12 @@ Retry:
 		pmap_store_pte(pte, 0);
 
 		if (IS_MANAGED_PAGE(pai)) {
-#if TESTING
-			if (pmap->stats.resident_count < 1)
-				panic("pmap_enter: resident_count");
-#endif
+			pmap_assert(old_pa_locked == TRUE);
 			assert(pmap->stats.resident_count >= 1);
 			OSAddAtomic(-1,
 				&pmap->stats.resident_count);
 
 			if (iswired(*pte)) {
-#if TESTING
-				if (pmap->stats.wired_count < 1)
-					panic("pmap_enter: wired_count");
-#endif
 				assert(pmap->stats.wired_count >= 1);
 				OSAddAtomic(-1,
 					&pmap->stats.wired_count);
@@ -624,7 +667,7 @@ Retry:
 				pvh_e = pvh_new;
 				pvh_new = PV_HASHED_ENTRY_NULL;
 			} else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-				PV_HASHED_ALLOC(pvh_e);
+				PV_HASHED_ALLOC(&pvh_e);
 				if (PV_HASHED_ENTRY_NULL == pvh_e) {
 					/*
 					 * the pv list is empty. if we are on
@@ -636,10 +679,11 @@ Retry:
 					 * us.
 					 */
 					if (kernel_pmap == pmap) {
-						PV_HASHED_KERN_ALLOC(pvh_e);
+						PV_HASHED_KERN_ALLOC(&pvh_e);
 					} else {
 						UNLOCK_PVH(pai);
 						PMAP_UNLOCK(pmap);
+						pmap_pv_throttle(pmap);
 						pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 						goto Retry;
 					}
@@ -664,7 +708,7 @@ Retry:
 	         * only count the mapping
 	         * for 'managed memory'
 	         */
-		OSAddAtomic(+1,  & pmap->stats.resident_count);
+		OSAddAtomic(+1,  &pmap->stats.resident_count);
 		if (pmap->stats.resident_count > pmap->stats.resident_max) {
 			pmap->stats.resident_max = pmap->stats.resident_count;
 		}
@@ -681,7 +725,13 @@ Retry:
 	 *	only the pfn changes.
 	 */
 	template = pa_to_pte(pa) | INTEL_PTE_VALID;
+	/*
+	 * DRK: It may be worth asserting on cache attribute flags that diverge
+	 * from the existing physical page attributes.
+	 */
 
+	template |= pmap_get_cache_attributes(pa_index(pa));
+	
 	if (flags & VM_MEM_NOT_CACHEABLE) {
 		if (!(flags & VM_MEM_GUARDED))
 			template |= INTEL_PTE_PTA;
@@ -728,9 +778,10 @@ Done:
 		m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
 		if (m == VM_PAGE_NULL)
 		    panic("pmap_enter: pte page not in object");
+		vm_object_unlock(delpage_pm_obj);
 		VM_PAGE_FREE(m);
 		OSAddAtomic(-1,  &inuse_ptepages_count);
-		vm_object_unlock(delpage_pm_obj);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 	}
 
 	PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
@@ -1076,10 +1127,9 @@ pmap_page_protect(
 		vaddr = pv_e->va;
 		pte = pmap_pte(pmap, vaddr);
 
-#if	DEBUG
-		if (pa_index(pte_to_pa(*pte)) != pn)
-			panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
-#endif
+		pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
+		    "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
+
 		if (0 == pte) {
 			panic("pmap_page_protect() "
 				"pmap=%p pn=0x%x vaddr=0x%llx\n",
@@ -1089,16 +1139,21 @@ pmap_page_protect(
 
 		/*
 		 * Remove the mapping if new protection is NONE
-		 * or if write-protecting a kernel mapping.
 		 */
-		if (remove || pmap == kernel_pmap) {
+		if (remove) {
 			/*
 		         * Remove the mapping, collecting dirty bits.
 		         */
 			pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
+
+			/* Remove per-pmap wired count */
+			if (iswired(*pte)) {
+				OSAddAtomic(-1, &pmap->stats.wired_count);
+			}
+
 			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
 			pmap_phys_attributes[pai] |=
-				*pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+			    *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
 			pmap_store_pte(pte, 0);
 
 #if TESTING
@@ -1117,8 +1172,6 @@ pmap_page_protect(
 				 * Fix up head later.
 				 */
 				pv_h->pmap = PMAP_NULL;
-
-				pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
 			} else {
 				/*
 				 * Delete this entry.
@@ -1133,8 +1186,11 @@ pmap_page_protect(
 			}
 		} else {
 			/*
-		         * Write-protect.
+		         * Write-protect, after opportunistic refmod collect
 		         */
+			pmap_phys_attributes[pai] |=
+			    *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+
 			pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
 			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
 		}
@@ -1170,180 +1226,206 @@ done:
 		   0, 0, 0, 0, 0);
 }
 
-__private_extern__ void
-pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
-	if (pmap_pagetable_corruption_incidents > 0) {
-		int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
-		(*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
-		for (i = 0; i < e; i++) {
-			(*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
-		}
-	}
-}
-
+/*
+ *	Clear specified attribute bits.
+ */
 void
-mapping_free_prime(void)
+phys_attribute_clear(
+	ppnum_t		pn,
+	int		bits)
 {
-	int			i;
-	pv_hashed_entry_t	pvh_e;
-	pv_hashed_entry_t	pvh_eh;
-	pv_hashed_entry_t	pvh_et;
-	int			pv_cnt;
-
-	pv_cnt = 0;
-	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-	for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+	pv_rooted_entry_t	pv_h;
+	pv_hashed_entry_t	pv_e;
+	pt_entry_t		*pte;
+	int			pai;
+	pmap_t			pmap;
+	char			attributes = 0;
+	
+	pmap_intr_assert();
+	assert(pn != vm_page_fictitious_addr);
+	if (pn == vm_page_guard_addr)
+		return;
 
-		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-		pvh_eh = pvh_e;
+	pai = ppn_to_pai(pn);
 
-		if (pvh_et == PV_HASHED_ENTRY_NULL)
-		        pvh_et = pvh_e;
-		pv_cnt++;
+	if (!IS_MANAGED_PAGE(pai)) {
+		/*
+		 *	Not a managed page.
+		 */
+		return;
 	}
-	PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 
-	pv_cnt = 0;
-	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-	for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
+		   pn, bits, 0, 0, 0);
 
-		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-		pvh_eh = pvh_e;
+	pv_h = pai_to_pvh(pai);
 
-		if (pvh_et == PV_HASHED_ENTRY_NULL)
-		        pvh_et = pvh_e;
-		pv_cnt++;
-	}
-	PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+	LOCK_PVH(pai);
 
-}
+	/*
+	 * Walk down PV list, clearing all modify or reference bits.
+	 * We do not have to lock the pv_list because we have
+	 * the entire pmap system locked.
+	 */
+	if (pv_h->pmap != PMAP_NULL) {
+		/*
+		 * There are some mappings.
+		 */
 
-static inline void
-pmap_pagetable_corruption_log_setup(void) {
-	if (pmap_pagetable_corruption_log_call == NULL) {
-		nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
-		thread_call_setup(&pmap_pagetable_corruption_log_call_data,
-		    (thread_call_func_t) pmap_pagetable_corruption_msg_log,
-		    (thread_call_param_t) &printf);
-		pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
-	}
-}
+		pv_e = (pv_hashed_entry_t)pv_h;
 
-void
-mapping_adjust(void)
-{
-	pv_hashed_entry_t	pvh_e;
-	pv_hashed_entry_t	pvh_eh;
-	pv_hashed_entry_t	pvh_et;
-	int			pv_cnt;
-	int             	i;
-
-	if (mapping_adjust_call == NULL) {
-		thread_call_setup(&mapping_adjust_call_data,
-				  (thread_call_func_t) mapping_adjust,
-				  (thread_call_param_t) NULL);
-		mapping_adjust_call = &mapping_adjust_call_data;
-	}
+		do {
+			vm_map_offset_t	va;
 
-	pmap_pagetable_corruption_log_setup();
+			pmap = pv_e->pmap;
+			va = pv_e->va;
 
-	pv_cnt = 0;
-	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-	if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-		for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-			pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+			 /*
+			  * Clear modify and/or reference bits.
+			  */
+			pte = pmap_pte(pmap, va);
+			attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
 
-			pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-			pvh_eh = pvh_e;
+			pmap_update_pte(pte, *pte, (*pte & ~bits));
+			/* Ensure all processors using this translation
+			 * invalidate this TLB entry. The invalidation *must*
+			 * follow the PTE update, to ensure that the TLB
+			 * shadow of the 'D' bit (in particular) is
+			 * synchronized with the updated PTE.
+			 */
+			PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
 
-			if (pvh_et == PV_HASHED_ENTRY_NULL)
-			        pvh_et = pvh_e;
-			pv_cnt++;
-		}
-		PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
+
+		} while (pv_e != (pv_hashed_entry_t)pv_h);
 	}
+	/* Opportunistic refmod collection, annulled
+	 * if both REF and MOD are being cleared.
+	 */
 
-	pv_cnt = 0;
-	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-	if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-		for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-			pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+	pmap_phys_attributes[pai] |= attributes;
+	pmap_phys_attributes[pai] &= (~bits);
 
-			pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-			pvh_eh = pvh_e;
+	UNLOCK_PVH(pai);
 
-			if (pvh_et == PV_HASHED_ENTRY_NULL)
-			        pvh_et = pvh_e;
-			pv_cnt++;
-		}
-		PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-	}
-	mappingrecurse = 0;
+	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
+		   0, 0, 0, 0, 0);
 }
 
-
-boolean_t
-pmap_is_noencrypt(ppnum_t pn)
+/*
+ *	Check specified attribute bits.
+ */
+int
+phys_attribute_test(
+	ppnum_t		pn,
+	int		bits)
 {
-	int		pai;
+	pv_rooted_entry_t	pv_h;
+	pv_hashed_entry_t	pv_e;
+	pt_entry_t		*pte;
+	int			pai;
+	pmap_t			pmap;
+	int			attributes = 0;
+
+	pmap_intr_assert();
+	assert(pn != vm_page_fictitious_addr);
+	if (pn == vm_page_guard_addr)
+		return 0;
 
 	pai = ppn_to_pai(pn);
 
-	if (!IS_MANAGED_PAGE(pai))
-		return (TRUE);
+	if (!IS_MANAGED_PAGE(pai)) {
+		/*
+		 *	Not a managed page.
+		 */
+		return 0;
+	}
 
-	if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
-		return (TRUE);
+	/*
+	 * Fast check...  if bits already collected
+	 * no need to take any locks...
+	 * if not set, we need to recheck after taking
+	 * the lock in case they got pulled in while
+	 * we were waiting for the lock
+	 */
+	if ((pmap_phys_attributes[pai] & bits) == bits)
+		return bits;
 
-	return (FALSE);
-}
+	pv_h = pai_to_pvh(pai);
 
+	LOCK_PVH(pai);
 
-void
-pmap_set_noencrypt(ppnum_t pn)
-{
-	int		pai;
+	attributes = pmap_phys_attributes[pai] & bits;
 
-	pai = ppn_to_pai(pn);
 
-	if (IS_MANAGED_PAGE(pai)) {
-		LOCK_PVH(pai);
+	/*
+	 * Walk down PV list, checking the mappings until we
+	 * reach the end or we've found the desired attributes.
+	 */
+	if (attributes != bits &&
+	    pv_h->pmap != PMAP_NULL) {
+		/*
+		 * There are some mappings.
+		 */
+		pv_e = (pv_hashed_entry_t)pv_h;
+		do {
+			vm_map_offset_t va;
 
-		pmap_phys_attributes[pai] |= PHYS_NOENCRYPT;
+			pmap = pv_e->pmap;
+			va = pv_e->va;
+			/*
+	 		 * pick up modify and/or reference bits from mapping
+			 */
 
-		UNLOCK_PVH(pai);
+			pte = pmap_pte(pmap, va);
+			attributes |= (int)(*pte & bits);
+
+			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
+
+		} while ((attributes != bits) &&
+			 (pv_e != (pv_hashed_entry_t)pv_h));
 	}
-}
+	pmap_phys_attributes[pai] |= attributes;
 
+	UNLOCK_PVH(pai);
+	return (attributes);
+}
 
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
 void
-pmap_clear_noencrypt(ppnum_t pn)
+pmap_change_wiring(
+	pmap_t		map,
+	vm_map_offset_t	vaddr,
+	boolean_t	wired)
 {
-	int		pai;
+	pt_entry_t	*pte;
 
-	pai = ppn_to_pai(pn);
-
-	if (IS_MANAGED_PAGE(pai)) {
-		LOCK_PVH(pai);
+	PMAP_LOCK(map);
 
-		pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
+	if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
+		panic("pmap_change_wiring: pte missing");
 
-		UNLOCK_PVH(pai);
+	if (wired && !iswired(*pte)) {
+		/*
+		 * wiring down mapping
+		 */
+		OSAddAtomic(+1,  &map->stats.wired_count);
+		pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
 	}
-}
-
-void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
-	assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
-
-	if (dofilter) {
-		CPU_CR3_MARK_INACTIVE();
-	} else {
-		CPU_CR3_MARK_ACTIVE();
-		__asm__ volatile("mfence");
-		if (current_cpu_datap()->cpu_tlb_invalid)
-			process_pmap_updates();
+	else if (!wired && iswired(*pte)) {
+		/*
+		 * unwiring mapping
+		 */
+		assert(map->stats.wired_count >= 1);
+		OSAddAtomic(-1,  &map->stats.wired_count);
+		pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
 	}
-}
 
+	PMAP_UNLOCK(map);
+}
diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h
index 54fca68e3..b35d5c0a0 100644
--- a/osfmk/i386/proc_reg.h
+++ b/osfmk/i386/proc_reg.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -146,6 +146,7 @@
  * CR4
  */
 #define CR4_OSXSAVE 0x00040000	/* OS supports XSAVE */
+#define CR4_PCIDE   0x00020000	/* PCID Enable */
 #define CR4_SMXE    0x00004000	/* Enable SMX operation */
 #define CR4_VMXE    0x00002000	/* Enable VMX operation */
 #define CR4_OSXMM   0x00000400  /* SSE/SSE2 exceptions supported in OS */
@@ -170,6 +171,9 @@
 #define XFEM_SSE XCR0_SSE
 #define XFEM_X87 XCR0_X87
 #define XCR0 (0)
+
+#define	PMAP_PCID_PRESERVE (1ULL << 63)
+#define	PMAP_PCID_MASK (0xFFF)
 #ifndef	ASSEMBLER
 
 #include <sys/cdefs.h>
@@ -179,6 +183,66 @@ __BEGIN_DECLS
 
 #define	set_ts() set_cr0(get_cr0() | CR0_TS)
 
+static inline uint16_t get_es(void)
+{
+	uint16_t es;
+	__asm__ volatile("mov %%es, %0" : "=r" (es));
+	return es;
+}
+
+static inline void set_es(uint16_t es)
+{
+	__asm__ volatile("mov %0, %%es" : : "r" (es));
+}
+
+static inline uint16_t get_ds(void)
+{
+	uint16_t ds;
+	__asm__ volatile("mov %%ds, %0" : "=r" (ds));
+	return ds;
+}
+
+static inline void set_ds(uint16_t ds)
+{
+	__asm__ volatile("mov %0, %%ds" : : "r" (ds));
+}
+
+static inline uint16_t get_fs(void)
+{
+	uint16_t fs;
+	__asm__ volatile("mov %%fs, %0" : "=r" (fs));
+	return fs;
+}
+
+static inline void set_fs(uint16_t fs)
+{
+	__asm__ volatile("mov %0, %%fs" : : "r" (fs));
+}
+
+static inline uint16_t get_gs(void)
+{
+	uint16_t gs;
+	__asm__ volatile("mov %%gs, %0" : "=r" (gs));
+	return gs;
+}
+
+static inline void set_gs(uint16_t gs)
+{
+	__asm__ volatile("mov %0, %%gs" : : "r" (gs));
+}
+
+static inline uint16_t get_ss(void)
+{
+	uint16_t ss;
+	__asm__ volatile("mov %%ss, %0" : "=r" (ss));
+	return ss;
+}
+
+static inline void set_ss(uint16_t ss)
+{
+	__asm__ volatile("mov %0, %%ss" : : "r" (ss));
+}
+
 static inline uintptr_t get_cr0(void)
 {
 	uintptr_t cr0; 
@@ -198,6 +262,19 @@ static inline uintptr_t get_cr2(void)
 	return(cr2);
 }
 
+static inline uintptr_t get_cr3_raw(void)
+{
+	register uintptr_t cr3;
+	__asm__ volatile("mov %%cr3, %0" : "=r" (cr3));
+	return(cr3);
+}
+
+static inline void set_cr3_raw(uintptr_t value)
+{
+	__asm__ volatile("mov %0, %%cr3" : : "r" (value));
+}
+
+#if	defined(__i386__)
 static inline uintptr_t get_cr3(void)
 {
 	register uintptr_t cr3;
@@ -209,7 +286,20 @@ static inline void set_cr3(uintptr_t value)
 {
 	__asm__ volatile("mov %0, %%cr3" : : "r" (value));
 }
+#else
+static inline uintptr_t get_cr3_base(void)
+{
+	register uintptr_t cr3;
+	__asm__ volatile("mov %%cr3, %0" : "=r" (cr3));
+	return(cr3 & ~(0xFFFULL));
+}
+
+static inline void set_cr3_composed(uintptr_t base, uint16_t pcid, uint32_t preserve)
+{
+	__asm__ volatile("mov %0, %%cr3" : : "r" (base | pcid | ( ( (uint64_t)preserve) << 63) ) );
+}
 
+#endif
 static inline uintptr_t get_cr4(void)
 {
 	uintptr_t cr4;
@@ -222,6 +312,13 @@ static inline void set_cr4(uintptr_t value)
 	__asm__ volatile("mov %0, %%cr4" : : "r" (value));
 }
 
+static inline uintptr_t x86_get_flags(void)
+{
+	uintptr_t erflags;
+	__asm__ volatile("pushf; pop	%0" :  "=r" (erflags));
+	return erflags;
+}
+
 static inline void clear_ts(void)
 {
 	__asm__ volatile("clts");
@@ -268,8 +365,6 @@ static inline void swapgs(void)
 
 #ifdef MACH_KERNEL_PRIVATE
 
-
-
 #ifdef __i386__
 
 #include <i386/cpu_data.h>
@@ -286,16 +381,17 @@ static inline void flush_tlb(void)
 		set_cr3(get_cr3());
 	}
 }
+static inline void flush_tlb_raw(void)
+{
+	flush_tlb();
+}
+
 #elif defined(__x86_64__)
-static inline void flush_tlb(void)
+static inline void flush_tlb_raw(void)
 {
-	set_cr3(get_cr3());
+	set_cr3_raw(get_cr3_raw());
 }
-#else
-#error Unsupported architecture
 #endif
-
-
 #endif	/* MACH_KERNEL_PRIVATE */
 
 static inline void wbinvd(void)
@@ -375,19 +471,19 @@ static inline void wrmsr64(uint32_t msr, uint64_t val)
 
 static inline uint64_t rdtsc64(void)
 {
-	uint32_t lo, hi;
+	uint64_t lo, hi;
 	rdtsc(lo, hi);
-	return (((uint64_t)hi) << 32) | ((uint64_t)lo);
+	return ((hi) << 32) | (lo);
 }
 
 static inline uint64_t rdtscp64(uint32_t *aux)
 {
-	uint32_t lo, hi;
+	uint64_t lo, hi;
 	__asm__ volatile("rdtscp; mov %%ecx, %1"
 					 : "=a" (lo), "=d" (hi), "=m" (*aux)
 					 :
 					 : "ecx");
-	return (((uint64_t)hi) << 32) | ((uint64_t)lo);
+	return ((hi) << 32) | (lo);
 }
 
 #else
diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c
index 72b1f556f..d9de63185 100644
--- a/osfmk/i386/rtclock.c
+++ b/osfmk/i386/rtclock.c
@@ -56,22 +56,21 @@
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>		/* for kernel_map */
-#include <i386/ipl.h>
 #include <architecture/i386/pio.h>
 #include <i386/machine_cpu.h>
 #include <i386/cpuid.h>
 #include <i386/cpu_threads.h>
 #include <i386/mp.h>
 #include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
 #include <i386/proc_reg.h>
 #include <i386/misc_protos.h>
-#include <i386/lapic.h>
 #include <pexpert/pexpert.h>
 #include <machine/limits.h>
 #include <machine/commpage.h>
 #include <sys/kdebug.h>
 #include <i386/tsc.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 
 #define UI_CPUFREQ_ROUNDING_FACTOR	10000000
 
@@ -81,165 +80,9 @@ int		rtclock_init(void);
 
 uint64_t	tsc_rebase_abs_time = 0;
 
-void		rtclock_intr(x86_saved_state_t *regs);
-
 static void	rtc_set_timescale(uint64_t cycles);
 static uint64_t	rtc_export_speed(uint64_t cycles);
 
-rtc_nanotime_t	rtc_nanotime_info = {0,0,0,0,1,0};
-
-static uint64_t	rtc_decrementer_min;
-static uint64_t	rtc_decrementer_max;
-
-static uint64_t
-deadline_to_decrementer(
-	uint64_t	deadline,
-	uint64_t	now)
-{
-	uint64_t	delta;
-
-	if (deadline <= now)
-		return rtc_decrementer_min;
-	else {
-		delta = deadline - now;
-		return MIN(MAX(rtc_decrementer_min,delta),rtc_decrementer_max); 
-	}
-}
-
-static inline uint64_t
-_absolutetime_to_tsc(uint64_t ns)
-{
-	uint32_t	generation;
-	uint64_t	tsc;
-
-	do {
-		generation =  rtc_nanotime_info.generation;
-		tsc = tmrCvt(ns - rtc_nanotime_info.ns_base, tscFCvtn2t)
-			+ rtc_nanotime_info.tsc_base;
-	} while (generation == 0 ||
-		 generation != rtc_nanotime_info.generation);
-
-	return tsc;
-}
-
-/*
- * Regular local APIC timer case:
- */
-static void
-rtc_lapic_config_timer(void)
-{
-	lapic_config_timer(TRUE, one_shot, divide_by_1);
-}
-static uint64_t
-rtc_lapic_set_timer(uint64_t deadline, uint64_t now)
-{
-	uint64_t count;
-	uint64_t set = 0;
-
-	if (deadline > 0) {
-		/*
-		 * Convert delta to bus ticks
-		 * - time now is not relevant
-		 */
-		count = deadline_to_decrementer(deadline, now);
-		set = now + count;
-		lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t));
-	} else {
-		lapic_set_timer(FALSE, one_shot, divide_by_1, 0);
-	}
-	return set;
-}
-
-/*
- * TSC-deadline timer case:
- */
-static void
-rtc_lapic_config_tsc_deadline_timer(void)
-{
-	lapic_config_tsc_deadline_timer();
-}
-static uint64_t
-rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now)
-{
-	uint64_t	set = 0;
-
-	if (deadline > 0) {
-		/*
-		 * Convert to TSC
-		 */
-		set = now + deadline_to_decrementer(deadline, now);
-		lapic_set_tsc_deadline_timer(_absolutetime_to_tsc(set));
-	} else {
-		lapic_set_tsc_deadline_timer(0);
-	}
-	return set;
-} 
-
-/*
- * Definitions for timer operations table
- */
-typedef struct {
-	void	 (*config)(void);
-	uint64_t (*set)   (uint64_t, uint64_t);
-} rtc_timer_t;
-
-rtc_timer_t	rtc_timer_lapic  = {
-	rtc_lapic_config_timer,
-	rtc_lapic_set_timer
-};
-
-rtc_timer_t	rtc_timer_tsc_deadline  = {
-	rtc_lapic_config_tsc_deadline_timer,
-	rtc_lapic_set_tsc_deadline_timer
-};
-
-rtc_timer_t	*rtc_timer = &rtc_timer_lapic;	/* defaults to LAPIC timer */
-
-/*
- * rtc_timer_init() is called at startup on the boot processor only.
- */
-static void
-rtc_timer_init(void)
-{
-	int	TSC_deadline_timer = 0;
-	
-	/* See whether we can use the local apic in TSC-deadline mode */
-	if ((cpuid_features() & CPUID_FEATURE_TSCTMR)) {
-		TSC_deadline_timer = 1;
-		PE_parse_boot_argn("TSC_deadline_timer", &TSC_deadline_timer,
-				   sizeof(TSC_deadline_timer));
-		printf("TSC Deadline Timer supported %s enabled\n",
-			TSC_deadline_timer ? "and" : "but not");
-	}
-
-	if (TSC_deadline_timer) {
-		rtc_timer = &rtc_timer_tsc_deadline;
-		rtc_decrementer_max = UINT64_MAX;	/* effectively none */
-		/*
-		 * The min could be as low as 1nsec,
-		 * but we're being conservative for now and making it the same
-		 * as for the local apic timer.
-		 */
-		rtc_decrementer_min = 1*NSEC_PER_USEC;	/* 1 usec */
-	} else {
-		/*
-		 * Compute the longest interval using LAPIC timer.
-		 */
-		rtc_decrementer_max = tmrCvt(0x7fffffffULL, busFCvtt2n);
-		kprintf("maxDec: %lld\n", rtc_decrementer_max);
-		rtc_decrementer_min = 1*NSEC_PER_USEC;	/* 1 usec */
-	}
-
-	/* Point LAPIC interrupts to hardclock() */
-	lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
-}
-
-static inline uint64_t
-rtc_timer_set(uint64_t deadline, uint64_t now)
-{
-	return rtc_timer->set(deadline, now);
-}
-
 void
 rtc_timer_start(void)
 {
@@ -268,7 +111,7 @@ _tsc_to_nanoseconds(uint64_t value)
 		 "addl	%%edi,%%eax	;"	
 		 "adcl	$0,%%edx	 "
 		 : "+A" (value)
-		 : "c" (rtc_nanotime_info.scale)
+		 : "c" (pal_rtc_nanotime_info.scale)
 		 : "esi", "edi");
 #elif defined(__x86_64__)
     asm volatile("mul %%rcx;"
@@ -276,7 +119,7 @@ _tsc_to_nanoseconds(uint64_t value)
 		 "shlq $32, %%rdx;"
 		 "orq %%rdx, %%rax;"
 		 : "=a"(value)
-		 : "a"(value), "c"(rtc_nanotime_info.scale)
+		 : "a"(value), "c"(pal_rtc_nanotime_info.scale)
 		 : "rdx", "cc" );
 #else
 #error Unsupported architecture
@@ -359,7 +202,7 @@ rtclock_config(void)
  * be guaranteed by the caller.
  */
 static inline void
-rtc_nanotime_set_commpage(rtc_nanotime_t *rntp)
+rtc_nanotime_set_commpage(pal_rtc_nanotime_t *rntp)
 {
 	commpage_set_nanotime(rntp->tsc_base, rntp->ns_base, rntp->scale, rntp->shift);
 }
@@ -370,18 +213,18 @@ rtc_nanotime_set_commpage(rtc_nanotime_t *rntp)
  * Intialize the nanotime info from the base time.
  */
 static inline void
-_rtc_nanotime_init(rtc_nanotime_t *rntp, uint64_t base)
+_rtc_nanotime_init(pal_rtc_nanotime_t *rntp, uint64_t base)
 {
 	uint64_t	tsc = rdtsc64();
 
-	_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
+	_pal_rtc_nanotime_store(tsc, base, rntp->scale, rntp->shift, rntp);
 }
 
 static void
 rtc_nanotime_init(uint64_t base)
 {
-	_rtc_nanotime_init(&rtc_nanotime_info, base);
-	rtc_nanotime_set_commpage(&rtc_nanotime_info);
+	_rtc_nanotime_init(&pal_rtc_nanotime_info, base);
+	rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
 }
 
 /*
@@ -396,8 +239,7 @@ rtc_nanotime_init_commpage(void)
 {
 	spl_t			s = splclock();
 
-	rtc_nanotime_set_commpage(&rtc_nanotime_info);
-
+	rtc_nanotime_set_commpage(&pal_rtc_nanotime_info);
 	splx(s);
 }
 
@@ -416,7 +258,7 @@ rtc_nanotime_read(void)
 		return	_rtc_nanotime_read(&rtc_nanotime_info, 1);	/* slow processor */
 	else
 #endif
-	return	_rtc_nanotime_read(&rtc_nanotime_info, 0);	/* assume fast processor */
+	return	_rtc_nanotime_read(&pal_rtc_nanotime_info, 0);	/* assume fast processor */
 }
 
 /*
@@ -429,7 +271,7 @@ rtc_nanotime_read(void)
 void
 rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
-	rtc_nanotime_t	*rntp = &rtc_nanotime_info;
+	pal_rtc_nanotime_t	*rntp = &pal_rtc_nanotime_info;
 	uint64_t	oldnsecs;
 	uint64_t	newnsecs;
 	uint64_t	tsc;
@@ -444,30 +286,29 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 	 * is later than the time using the old base values.
 	 */
 	if (oldnsecs < newnsecs) {
-	    _rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
+	    _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp);
 	    rtc_nanotime_set_commpage(rntp);
+		trace_set_timebases(tsc_base, base);
 	}
 }
 
-
 /*
  * Invoked from power management to correct the SFLM TSC entry drift problem:
- * a small delta is added to the tsc_base. This is equivalent to nudging time
- * backwards. We require this of the order of a TSC quantum which won't cause
- * callers of mach_absolute_time() to see time going backwards!
+ * a small delta is added to the tsc_base.  This is equivalent to nudgin time
+ * backwards.  We require this to be on the order of a TSC quantum which won't
+ * cause callers of mach_absolute_time() to see time going backwards!
  */
 void
 rtc_clock_adjust(uint64_t tsc_base_delta)
 {
-	rtc_nanotime_t	*rntp = &rtc_nanotime_info;
+    pal_rtc_nanotime_t	*rntp = &pal_rtc_nanotime_info;
 
-	assert(!ml_get_interrupts_enabled());
-	assert(tsc_base_delta < 100ULL);	/* i.e. it's small */
-	_rtc_nanotime_adjust(tsc_base_delta, rntp);
-	rtc_nanotime_set_commpage(rntp);
+    assert(!ml_get_interrupts_enabled());
+    assert(tsc_base_delta < 100ULL);	/* i.e. it's small */
+    _rtc_nanotime_adjust(tsc_base_delta, rntp);
+    rtc_nanotime_set_commpage(rntp);
 }
 
-
 void
 rtc_clock_stepping(__unused uint32_t new_frequency,
 		   __unused uint32_t old_frequency)
@@ -485,7 +326,7 @@ rtc_clock_stepped(__unused uint32_t new_frequency,
 /*
  * rtc_sleep_wakeup:
  *
- * Invoked from power manageent when we have awoken from a sleep (S3)
+ * Invoked from power management when we have awoken from a sleep (S3)
  * and the TSC has been reset.  The nanotime data is updated based on
  * the passed in value.
  *
@@ -539,9 +380,8 @@ rtclock_init(void)
 		ml_init_lock_timeout();
 	}
 
-	/* Set fixed configuration for lapic timers */
+    	/* Set fixed configuration for lapic timers */
 	rtc_timer->config();
-
 	rtc_timer_start();
 
 	return (1);
@@ -553,12 +393,14 @@ rtclock_init(void)
 static void
 rtc_set_timescale(uint64_t cycles)
 {
-	rtc_nanotime_t	*rntp = &rtc_nanotime_info;
+	pal_rtc_nanotime_t	*rntp = &pal_rtc_nanotime_info;
 	rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles);
 
+#if CONFIG_EMBEDDED
 	if (cycles <= SLOW_TSC_THRESHOLD)
 		rntp->shift = (uint32_t)cycles;
 	else
+#endif
 		rntp->shift = 32;
 
 	if (tsc_rebase_abs_time == 0)
@@ -680,26 +522,23 @@ uint64_t
 setPop(
 	uint64_t time)
 {
-	uint64_t now;
-	uint64_t pop;
+	uint64_t	now;
+	uint64_t	pop;
 
 	/* 0 and EndOfAllTime are special-cases for "clear the timer" */
-	if (time == 0 || time == EndOfAllTime) {
+	if (time == 0 || time == EndOfAllTime ) {
 		time = EndOfAllTime;
 		now = 0;
-		pop = rtc_timer_set(0, 0);
+		pop = rtc_timer->set(0, 0);
 	} else {
-		now = rtc_nanotime_read();
-		pop = rtc_timer_set(time, now);
+		now = rtc_nanotime_read();	/* The time in nanoseconds */
+		pop = rtc_timer->set(time, now);
 	}
 
-	/* Record actual deadline set */
+	/* Record requested and actual deadlines set */
 	x86_lcpu()->rtcDeadline = time;
-	x86_lcpu()->rtcPop = pop;
+	x86_lcpu()->rtcPop	= pop;
 
-	/*
-	 * Pass back the delta we set
-	 */
 	return pop - now;
 }
 
diff --git a/osfmk/i386/rtclock_asm.h b/osfmk/i386/rtclock_asm.h
new file mode 100644
index 000000000..fedf7a4f4
--- /dev/null
+++ b/osfmk/i386/rtclock_asm.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/*
+ * @APPLE_FREE_COPYRIGHT@
+ */
+/*
+ *	File:		rtclock_asm.h
+ *	Purpose:	Assembly routines for handling the machine dependent
+ *				real-time clock.
+ */
+
+#ifndef _I386_RTCLOCK_H_
+#define _I386_RTCLOCK_H_
+
+#include <i386/pal_rtclock_asm.h>
+
+#if defined(__i386__)
+
+/*
+ * Nanotime returned in %edx:%eax.
+ * Computed from tsc based on the scale factor
+ * and an implicit 32 bit shift.
+ *
+ * Uses %eax, %ebx, %ecx, %edx, %esi, %edi.
+ */
+#define NANOTIME							  \
+	mov	%gs:CPU_NANOTIME,%edi					; \
+	PAL_RTC_NANOTIME_READ_FAST()
+
+
+/*
+ * Add 64-bit delta in register dreg : areg to timer pointed to by register treg.
+ */
+#define TIMER_UPDATE(treg,dreg,areg,offset)				       \
+	addl	(TIMER_LOW+(offset))(treg),areg		/* add low bits */   ; \
+	adcl	dreg,(TIMER_HIGH+(offset))(treg)	/* carry high bits */; \
+	movl	areg,(TIMER_LOW+(offset))(treg)		/* updated low bit */; \
+	movl	(TIMER_HIGH+(offset))(treg),dreg	/* copy high bits */ ; \
+	movl	dreg,(TIMER_HIGHCHK+(offset))(treg)	/* to high check */
+
+/*
+ * Add time delta to old timer and start new.
+ */
+#define TIMER_EVENT(old,new)						       \
+	NANOTIME				/* edx:eax nanosecs */       ; \
+	movl	%eax,%esi			/* save timestamp */	     ; \
+	movl	%edx,%edi			/* save timestamp */	     ; \
+	movl	%gs:CPU_ACTIVE_THREAD,%ecx	/* get current thread */     ; \
+	subl	(old##_TIMER)+TIMER_TSTAMP(%ecx),%eax   /* elapsed */	     ; \
+	sbbl	(old##_TIMER)+TIMER_TSTAMP+4(%ecx),%edx	/* time */	     ; \
+	TIMER_UPDATE(%ecx,%edx,%eax,old##_TIMER)  /* update timer */	     ; \
+	movl	%esi,(new##_TIMER)+TIMER_TSTAMP(%ecx)   /* set timestamp */  ; \
+	movl	%edi,(new##_TIMER)+TIMER_TSTAMP+4(%ecx)	 /* set timestamp */ ; \
+	leal	(new##_TIMER)(%ecx), %ecx   /* compute new timer pointer */  ; \
+	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */  ; \
+	movl	%ecx,THREAD_TIMER(%ebx)		/* set current timer */	     ; \
+	movl	%esi,%eax			/* restore timestamp */	     ; \
+	movl	%edi,%edx			/* restore timestamp */	     ; \
+	subl	(old##_STATE)+TIMER_TSTAMP(%ebx),%eax	 /* elapsed */	     ; \
+	sbbl	(old##_STATE)+TIMER_TSTAMP+4(%ebx),%edx	 /* time */	     ; \
+	TIMER_UPDATE(%ebx,%edx,%eax,old##_STATE)/* update timer */	     ; \
+	leal	(new##_STATE)(%ebx),%ecx	/* new state pointer */      ; \
+	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */	     ; \
+	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */          ; \
+	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
+
+/*
+ * Update time on user trap entry.
+ * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
+ */
+#define	TIME_TRAP_UENTRY			TIMER_EVENT(USER,SYSTEM)
+
+/*
+ * update time on user trap exit.
+ * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
+ */
+#define	TIME_TRAP_UEXIT				TIMER_EVENT(SYSTEM,USER)
+
+/*
+ * update time on interrupt entry.
+ * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
+ * Saves processor state info on stack.
+ */
+#define	TIME_INT_ENTRY							       \
+	NANOTIME				/* edx:eax nanosecs */	     ; \
+	movl	%eax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	     ; \
+	movl	%edx,%gs:CPU_INT_EVENT_TIME+4	/* save in cpu data */	     ; \
+	movl	%eax,%esi			/* save timestamp */	     ; \
+	movl	%edx,%edi			/* save timestamp */	     ; \
+	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */  ; \
+	movl 	THREAD_TIMER(%ebx),%ecx		/* get current timer */	     ; \
+	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */   ; \
+	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */   ; \
+	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */	     ; \
+	movl	KERNEL_TIMER(%ebx),%ecx		/* point to kernel timer */  ; \
+	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */	     ; \
+	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */	     ; \
+	movl	%esi,%eax			/* restore timestamp */	     ; \
+	movl	%edi,%edx			/* restore timestamp */	     ; \
+	movl	CURRENT_STATE(%ebx),%ecx	/* get current state */	     ; \
+	pushl	%ecx				/* save state */	     ; \
+	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */   ; \
+	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */   ; \
+	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */	     ; \
+	leal	IDLE_STATE(%ebx),%eax		/* get idle state */	     ; \
+	cmpl	%eax,%ecx			/* compare current state */  ; \
+	je	0f				/* skip if equal */	     ; \
+	leal	SYSTEM_STATE(%ebx),%ecx		/* get system state */	     ; \
+	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */	     ; \
+0:	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */	     ; \
+	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
+
+/*
+ * update time on interrupt exit.
+ * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
+ * Restores processor state info from stack.
+ */
+#define	TIME_INT_EXIT							       \
+	NANOTIME				/* edx:eax nanosecs */       ; \
+	movl	%eax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	     ; \
+	movl	%edx,%gs:CPU_INT_EVENT_TIME+4	/* save in cpu data */	     ; \
+	movl	%eax,%esi			/* save timestamp */	     ; \
+	movl	%edx,%edi			/* save timestamp */	     ; \
+	movl	%gs:CPU_PROCESSOR,%ebx		/* get current processor */  ; \
+	movl	KERNEL_TIMER(%ebx),%ecx		/* point to kernel timer */  ; \
+	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */   ; \
+	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */   ; \
+	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */	     ; \
+	movl	THREAD_TIMER(%ebx),%ecx		/* interrupted timer */	     ; \
+	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */	     ; \
+	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */	     ; \
+	movl	%esi,%eax			/* restore timestamp */	     ; \
+	movl	%edi,%edx			/* restore timestamp */	     ; \
+	movl	CURRENT_STATE(%ebx),%ecx	/* get current state */	     ; \
+	subl	TIMER_TSTAMP(%ecx),%eax		/* compute elapsed time */   ; \
+	sbbl	TIMER_TSTAMP+4(%ecx),%edx	/* compute elapsed time */   ; \
+	TIMER_UPDATE(%ecx,%edx,%eax,0)		/* update timer */	     ; \
+	popl	%ecx				/* restore state */	     ; \
+	movl	%ecx,CURRENT_STATE(%ebx)	/* set current state */	     ; \
+	movl	%esi,TIMER_TSTAMP(%ecx)		/* set timestamp */	     ; \
+	movl	%edi,TIMER_TSTAMP+4(%ecx)	/* set timestamp */
+
+#elif defined(__x86_64__)
+
+/*
+ * Nanotime returned in %rax.
+ * Computed from tsc based on the scale factor and an implicit 32 bit shift.
+ * This code must match what _rtc_nanotime_read does in
+ * machine_routines_asm.s.  Failure to do so can
+ * result in "weird" timing results.
+ *
+ * Uses: %rsi, %rdi, %rdx, %rcx
+ */
+#define NANOTIME							       \
+	movq	%gs:CPU_NANOTIME,%rdi					     ; \
+	PAL_RTC_NANOTIME_READ_FAST()
+
+/*
+ * Add 64-bit delta in register reg to timer pointed to by register treg.
+ */
+#define TIMER_UPDATE(treg,reg,offset)					       \
+	addq	reg,(offset)+TIMER_ALL(treg)		/* add timer */
+
+/*
+ * Add time delta to old timer and start new.
+ * Uses: %rsi, %rdi, %rdx, %rcx, %rax
+ */
+#define TIMER_EVENT(old,new)						       \
+	NANOTIME				/* %rax := nanosecs */       ; \
+	movq	%rax,%rsi			/* save timestamp */	     ; \
+	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get thread */	     ; \
+	subq	(old##_TIMER)+TIMER_TSTAMP(%rcx),%rax	/* compute elapsed */; \
+	TIMER_UPDATE(%rcx,%rax,old##_TIMER)	/* update timer */	     ; \
+	leaq	(new##_TIMER)(%rcx),%rcx	/* point to new timer */     ; \
+	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	     ; \
+	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	     ; \
+	movq	%rcx,THREAD_TIMER(%rdx)		/* set current timer */	     ; \
+	movq	%rsi,%rax			/* restore timestamp */	     ; \
+	subq	(old##_STATE)+TIMER_TSTAMP(%rdx),%rax	/* compute elapsed */; \
+	TIMER_UPDATE(%rdx,%rax,old##_STATE)	/* update timer */	     ; \
+	leaq	(new##_STATE)(%rdx),%rcx 	/* point to new state */     ; \
+	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	     ; \
+	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
+
+/*
+ * Update time on user trap entry.
+ * Uses: %rsi, %rdi, %rdx, %rcx, %rax
+ */
+#define	TIME_TRAP_UENTRY	TIMER_EVENT(USER,SYSTEM)
+
+/*
+ * update time on user trap exit.
+ * Uses: %rsi, %rdi, %rdx, %rcx, %rax
+ */
+#define	TIME_TRAP_UEXIT		TIMER_EVENT(SYSTEM,USER)
+
+/*
+ * update time on interrupt entry.
+ * Uses: %rsi, %rdi, %rdx, %rcx, %rax
+ * Saves processor state info on stack.
+ */
+#define	TIME_INT_ENTRY							       \
+	NANOTIME				/* %rax := nanosecs */	     ; \
+	movq	%rax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	     ; \
+	movq	%rax,%rsi			/* save timestamp */	     ; \
+	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	     ; \
+	movq 	THREAD_TIMER(%rdx),%rcx		/* get current timer */	     ; \
+	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	     ; \
+	TIMER_UPDATE(%rcx,%rax,0)		/* update timer */	     ; \
+	movq	KERNEL_TIMER(%rdx),%rcx		/* get kernel timer */	     ; \
+	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	     ; \
+	movq	%rsi,%rax			/* restore timestamp */	     ; \
+	movq	CURRENT_STATE(%rdx),%rcx	/* get current state */	     ; \
+	pushq	%rcx				/* save state */	     ; \
+	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	     ; \
+	TIMER_UPDATE(%rcx,%rax,0)		/* update timer */	     ; \
+	leaq	IDLE_STATE(%rdx),%rax		/* get idle state */	     ; \
+	cmpq	%rax,%rcx			/* compare current */	     ; \
+	je	0f				/* skip if equal */	     ; \
+	leaq	SYSTEM_STATE(%rdx),%rcx		/* get system state */	     ; \
+	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	     ; \
+0:	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
+
+/*
+ * update time on interrupt exit.
+ * Uses: %rsi, %rdi, %rdx, %rcx, %rax
+ * Restores processor state info from stack.
+ */
+#define	TIME_INT_EXIT							       \
+	NANOTIME				/* %rax := nanosecs */	     ; \
+	movq	%rax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	     ; \
+	movq	%rax,%rsi			/* save timestamp */	     ; \
+	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	     ; \
+	movq	KERNEL_TIMER(%rdx),%rcx		/* get kernel timer */	     ; \
+	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */        ; \
+	TIMER_UPDATE(%rcx,%rax,0)		/* update timer */	     ; \
+	movq	THREAD_TIMER(%rdx),%rcx		/* interrupted timer */	     ; \
+	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	     ; \
+	movq	%rsi,%rax			/* restore timestamp */	     ; \
+	movq	CURRENT_STATE(%rdx),%rcx	/* get current state */	     ; \
+	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	     ; \
+	TIMER_UPDATE(%rcx,%rax,0)		/* update timer */	     ; \
+	popq	%rcx				/* restore state */	     ; \
+	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	     ; \
+	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
+
+#endif
+
+/*
+ * Check for vtimers for task.
+ *   task_reg   is register pointing to current task
+ *   thread_reg is register pointing to current thread
+ */
+#define TASK_VTIMER_CHECK(task_reg,thread_reg)				       \
+	cmpl	$0,TASK_VTIMERS(task_reg)				     ; \
+	jz	1f							     ; \
+	orl	$(AST_BSD),%gs:CPU_PENDING_AST	/* Set pending AST */	     ; \
+	lock								     ; \
+	orl	$(AST_BSD),TH_AST(thread_reg)	/* Set thread AST  */	     ; \
+1:									     ; \
+
+#endif /* _I386_RTCLOCK_H_ */
diff --git a/osfmk/i386/rtclock.h b/osfmk/i386/rtclock_asm_native.h
similarity index 67%
rename from osfmk/i386/rtclock.h
rename to osfmk/i386/rtclock_asm_native.h
index d98b8808f..c17320b7a 100644
--- a/osfmk/i386/rtclock.h
+++ b/osfmk/i386/rtclock_asm_native.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,54 +32,13 @@
  * @APPLE_FREE_COPYRIGHT@
  */
 /*
- *	File:		rtclock.h
- *	Purpose:	Routines for handling the machine dependent
- *				real-time clock.
+ *	File:		rtclock_asm_native.h
+ *	Purpose:	Native routines for reading nanotime
  */
 
-#ifndef _I386_RTCLOCK_H_
-#define _I386_RTCLOCK_H_
+#ifndef _PAL_RTCLOCK_ASM_NATIVE_H_
+#define _PAL_RTCLOCK_ASM_NATIVE_H_
 
-#ifndef ASSEMBLER
-typedef struct rtc_nanotime {
-	volatile uint64_t	tsc_base;	/* timestamp */
-	volatile uint64_t	ns_base;	/* nanoseconds */
-	uint32_t		scale;		/* tsc -> nanosec multiplier */
-	uint32_t		shift;		/* tsc -> nanosec shift/div */
-						/* shift is overloaded with
-						 * lower 32bits of tsc_freq
-						 * on slower machines (SLOW_TSC_THRESHOLD) */
-	volatile uint32_t	generation;	/* 0 == being updated */
-	uint32_t		spare1;
-} rtc_nanotime_t;
-
-#if 0
-#include <kern/etimer.h>
-#endif
-
-struct cpu_data;
-
-extern uint64_t tsc_rebase_abs_time;
-
-extern void	_rtc_nanotime_store(
-			uint64_t	tsc,
-			uint64_t	nsec,
-			uint32_t	scale,
-			uint32_t	shift,
-			rtc_nanotime_t	*dst);
-
-extern void	_rtc_nanotime_adjust(
-			uint64_t	tsc_base_delta,
-			rtc_nanotime_t	*dst);
-
-extern uint64_t	_rtc_nanotime_read(
-			rtc_nanotime_t	*rntp,
-			int		slow);
-
-extern rtc_nanotime_t rtc_nanotime_info;
-#endif
-
-#define	SLOW_TSC_THRESHOLD	1000067800	/* TSC is too slow for regular nanotime() algorithm */
 
 #if defined(__i386__)
 /*
@@ -87,7 +46,7 @@ extern rtc_nanotime_t rtc_nanotime_info;
  * %edi points to nanotime info struct
  * %edx:%eax returns nanotime
  */
-#define RTC_NANOTIME_READ_FAST()					  \
+#define PAL_RTC_NANOTIME_READ_FAST()					  \
 0:	movl	RNT_GENERATION(%edi),%esi	/* being updated? */	; \
 	testl	%esi,%esi						; \
 	jz	0b				/* wait until done */	; \
@@ -116,10 +75,10 @@ extern rtc_nanotime_t rtc_nanotime_info;
  * %rdi points to nanotime info struct.
  * %rax returns nanotime
  */
-#define RTC_NANOTIME_READ_FAST()					  \
+#define PAL_RTC_NANOTIME_READ_FAST()					  \
 0:	movl	RNT_GENERATION(%rdi),%esi				; \
-	test	%esi,%esi			/* info updating? */	; \
-	jz	0b				/* - wait if so */	; \
+	test        %esi,%esi		/* info updating? */		; \
+        jz        0b			/* - wait if so */		; \
 	lfence								; \
 	rdtsc								; \
 	lfence								; \
@@ -134,6 +93,7 @@ extern rtc_nanotime_t rtc_nanotime_info;
 	cmpl	RNT_GENERATION(%rdi),%esi	/* repeat if changed */ ; \
 	jne	0b
 
-#endif
+#endif /* !defined(x86_64) */
+
 
-#endif /* _I386_RTCLOCK_H_ */
+#endif /* _PAL_RTCLOCK_ASM_NATIVE_H_ */
diff --git a/osfmk/i386/rtclock_native.c b/osfmk/i386/rtclock_native.c
new file mode 100644
index 000000000..5ffaf91d8
--- /dev/null
+++ b/osfmk/i386/rtclock_native.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+
+#include <platforms.h>
+#include <mach_kdb.h>
+
+#include <mach/mach_types.h>
+
+#include <architecture/i386/pio.h>
+#include <i386/machine_cpu.h>
+#include <i386/cpuid.h>
+#include <i386/cpu_threads.h>
+#include <i386/mp.h>
+#include <i386/machine_routines.h>
+#include <i386/pal_routines.h>
+#include <i386/proc_reg.h>
+#include <i386/misc_protos.h>
+#include <i386/lapic.h>
+#include <pexpert/pexpert.h>
+#include <machine/limits.h>
+#include <sys/kdebug.h>
+#include <i386/tsc.h>
+#include <i386/rtclock_protos.h>
+#include <i386/pal_routines.h>
+#include <kern/etimer.h>
+
+static uint64_t	rtc_decrementer_min;
+static uint64_t	rtc_decrementer_max;
+
+static uint64_t
+deadline_to_decrementer(
+	uint64_t	deadline,
+	uint64_t	now)
+{
+	uint64_t	delta;
+
+	if (deadline <= now)
+		return rtc_decrementer_min;
+	else {
+		delta = deadline - now;
+		return MIN(MAX(rtc_decrementer_min,delta),rtc_decrementer_max); 
+	}
+}
+
+static inline uint64_t
+_absolutetime_to_tsc(uint64_t ns)
+{
+	uint32_t	generation;
+	uint64_t	tsc;
+
+	do {
+		generation =  pal_rtc_nanotime_info.generation;
+		tsc = tmrCvt(ns - pal_rtc_nanotime_info.ns_base, tscFCvtn2t)
+			+ pal_rtc_nanotime_info.tsc_base;
+	} while (generation == 0 ||
+		 generation != pal_rtc_nanotime_info.generation);
+
+	return tsc;
+}
+
+/*
+ * Regular local APIC timer case:
+ */
+static void
+rtc_lapic_config_timer(void)
+{
+	lapic_config_timer(TRUE, one_shot, divide_by_1);
+}
+static uint64_t
+rtc_lapic_set_timer(uint64_t deadline, uint64_t now)
+{
+	uint64_t count;
+	uint64_t set = 0;
+
+	if (deadline > 0) {
+		/*
+		 * Convert delta to bus ticks
+		 * - time now is not relevant
+		 */
+		count = deadline_to_decrementer(deadline, now);
+		set = now + count;
+		lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t));
+	} else {
+		lapic_set_timer(FALSE, one_shot, divide_by_1, 0);
+	}
+	return set;
+}
+
+/*
+ * TSC-deadline timer case:
+ */
+static void
+rtc_lapic_config_tsc_deadline_timer(void)
+{
+	lapic_config_tsc_deadline_timer();
+}
+static uint64_t
+rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now)
+{
+	uint64_t	set = 0;
+
+	if (deadline > 0) {
+		/*
+		 * Convert to TSC
+		 */
+		set = now + deadline_to_decrementer(deadline, now);
+		lapic_set_tsc_deadline_timer(_absolutetime_to_tsc(set));
+	} else {
+		lapic_set_tsc_deadline_timer(0);
+	}
+	
+	KERNEL_DEBUG_CONSTANT(
+		DECR_SET_TSC_DEADLINE | DBG_FUNC_NONE,
+		now, deadline,
+		rdtsc64(), lapic_get_tsc_deadline_timer(),
+		0);
+
+	return set;
+} 
+
+/*
+ * Definitions for timer operations table
+ */
+
+rtc_timer_t	rtc_timer_lapic  = {
+	rtc_lapic_config_timer,
+	rtc_lapic_set_timer
+};
+
+rtc_timer_t	rtc_timer_tsc_deadline  = {
+	rtc_lapic_config_tsc_deadline_timer,
+	rtc_lapic_set_tsc_deadline_timer
+};
+
+rtc_timer_t	*rtc_timer = &rtc_timer_lapic; /* defaults to LAPIC timer */
+
+/*
+ * rtc_timer_init() is called at startup on the boot processor only.
+ */
+void
+rtc_timer_init(void)
+{
+	int	TSC_deadline_timer = 0;
+
+	/* See whether we can use the local apic in TSC-deadline mode */
+	if ((cpuid_features() & CPUID_FEATURE_TSCTMR)) {
+		TSC_deadline_timer = 1;
+		PE_parse_boot_argn("TSC_deadline_timer", &TSC_deadline_timer,
+				   sizeof(TSC_deadline_timer));
+		printf("TSC Deadline Timer supported %s enabled\n",
+			TSC_deadline_timer ? "and" : "but not");
+	}
+
+	if (TSC_deadline_timer) {
+		rtc_timer = &rtc_timer_tsc_deadline;
+		rtc_decrementer_max = UINT64_MAX;	/* effectively none */
+		/*
+		 * The min could be as low as 1nsec,
+		 * but we're being conservative for now and making it the same
+		 * as for the local apic timer.
+		 */
+		rtc_decrementer_min = 1*NSEC_PER_USEC;	/* 1 usec */
+	} else {
+		/*
+		 * Compute the longest interval using LAPIC timer.
+		 */
+		rtc_decrementer_max = tmrCvt(0x7fffffffULL, busFCvtt2n);
+		kprintf("maxDec: %lld\n", rtc_decrementer_max);
+		rtc_decrementer_min = 1*NSEC_PER_USEC;	/* 1 usec */
+	}
+
+	/* Point LAPIC interrupts to hardclock() */
+	lapic_set_timer_func((i386_intr_func_t) rtclock_intr);
+}
diff --git a/osfmk/ppc/rtclock.h b/osfmk/i386/rtclock_protos.h
similarity index 64%
rename from osfmk/ppc/rtclock.h
rename to osfmk/i386/rtclock_protos.h
index 77f287ead..2d944765d 100644
--- a/osfmk/ppc/rtclock.h
+++ b/osfmk/i386/rtclock_protos.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -32,30 +32,37 @@
  * @APPLE_FREE_COPYRIGHT@
  */
 /*
- *	File:		rtclock.h
- *	Purpose:	Routines for handling the machine dependent
+ *	File:		rtclock_protos.h
+ *	Purpose:	C Routines for handling the machine dependent
  *				real-time clock.
  */
 
-#ifndef _PPC_RTCLOCK_H_
-#define _PPC_RTCLOCK_H_
+#ifndef _I386_RTCLOCK_PROTOS_H_
+#define _I386_RTCLOCK_PROTOS_H_
 
-#include <kern/etimer.h>
+typedef struct pal_rtc_nanotime pal_rtc_nanotime_t;
+extern uint64_t tsc_rebase_abs_time;
 
-#define EndOfAllTime	0xFFFFFFFFFFFFFFFFULL
+extern void	_rtc_nanotime_adjust(
+		        uint64_t		tsc_base_delta,
+		        pal_rtc_nanotime_t	*dst);
 
-extern void rtclock_intr(struct savearea *ssp);
+extern uint64_t	_rtc_nanotime_read(
+			pal_rtc_nanotime_t	*rntp,
+			int			slow);
 
-#pragma pack(push,4)
-struct rtclock_timer_t  {
-	queue_head_t	queue;
-	uint64_t		deadline;
-	uint32_t
-	/*boolean_t*/	is_set:1,
-					has_expired:1,
-					:0;
-};
-#pragma pack(pop)
-typedef struct rtclock_timer_t rtclock_timer_t;
+extern void	rtclock_intr(x86_saved_state_t *regs);
 
-#endif /* _PPC_RTCLOCK_H_ */
+
+/*
+ * Timer control.
+ */
+typedef struct {
+	void	 (*config)(void);
+	uint64_t (*set)   (uint64_t, uint64_t);
+} rtc_timer_t;
+extern rtc_timer_t	*rtc_timer;
+
+extern void		rtc_timer_init(void);
+
+#endif /* _I386_RTCLOCK_PROTOS_H_ */
diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h
index 89643edf2..df191c5d1 100644
--- a/osfmk/i386/seg.h
+++ b/osfmk/i386/seg.h
@@ -220,7 +220,6 @@ extern struct i386_tss		master_mctss;
 extern void			mc_task_start(void);
 
 #if	MACH_KDB
-extern char			db_stack_store[];
 extern char			db_task_stack_store[];
 extern struct i386_tss		master_dbtss;
 extern void			db_task_start(void);
@@ -345,9 +344,10 @@ __END_DECLS
 #endif
 
 #ifdef __i386__
+#if !defined(USER_WINDOW_SEL)
 #define USER_WINDOW_SEL	0x70		/* 14:  window for copyin/copyout */
 #define PHYS_WINDOW_SEL	0x78		/* 15:  window for copyin/copyout */
-
+#endif
 #define	KERNEL64_CS	0x80		/* 16:  kernel 64-bit code */
 #define	KERNEL64_SS	0x88		/* 17:  kernel 64-bit (syscall) stack */
 #else // __x86_64__
diff --git a/osfmk/i386/serial_io.h b/osfmk/i386/serial_io.h
index 58e75e4a3..1640256a2 100644
--- a/osfmk/i386/serial_io.h
+++ b/osfmk/i386/serial_io.h
@@ -37,6 +37,8 @@
 
 #include <console/serial_protos.h>
 
-int serial_init(void);
+int  serial_init(void);
+void serial_putc(char);
+int  serial_getc(void);
 
 #endif /* _I386_SERIAL_IO_H_ */
diff --git a/osfmk/i386/simple_lock.h b/osfmk/i386/simple_lock.h
index fb30ba83f..563c17739 100644
--- a/osfmk/i386/simple_lock.h
+++ b/osfmk/i386/simple_lock.h
@@ -72,9 +72,6 @@
 #if defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE)
 #include <i386/hw_lock_types.h>
 #include <mach_ldebug.h>
-#endif
-
-#if defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE)
 
 #if	MACH_LDEBUG
 #define	USLOCK_DEBUG 1
@@ -85,19 +82,21 @@
 typedef struct uslock_debug {
 	void			*lock_pc;	/* pc where lock operation began    */
 	void			*lock_thread;	/* thread that acquired lock */
+	void			*unlock_thread;	/* last thread to release lock */
+	void			*unlock_pc;	/* pc where lock operation ended    */
 	unsigned long	duration[2];
 	unsigned short	state;
 	unsigned char	lock_cpu;
-	void			*unlock_thread;	/* last thread to release lock */
 	unsigned char	unlock_cpu;
-	void			*unlock_pc;	/* pc where lock operation ended    */
 } uslock_debug;
 
 typedef struct slock {
 	hw_lock_data_t	interlock;	/* must be first... see lock.c */
+#if	USLOCK_DEBUG
 	unsigned short	lock_type;	/* must be second... see lock.c */
 #define USLOCK_TAG	0x5353
 	uslock_debug	debug;
+#endif
 } usimple_lock_data_t, *usimple_lock_t;
 
 extern void			i386_lock_unlock_with_flush(
diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s
index b0ba8110a..5472ffde3 100644
--- a/osfmk/i386/start.s
+++ b/osfmk/i386/start.s
@@ -65,7 +65,6 @@
 
 #define	CX(addr,reg)	addr(,reg,4)
 
-#include <i386/lapic.h>
 #include <i386/acpi.h>
 #include <i386/cpuid.h>
 
@@ -82,7 +81,7 @@ EXT(low_intstack):
 	.globl  EXT(gIOHibernateRestoreStack)
 EXT(gIOHibernateRestoreStack):
 
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 
 	.globl	EXT(low_eintstack)
 EXT(low_eintstack:)
@@ -110,7 +109,7 @@ LEXT(gdtptr)
 	.align	12
 	.globl	EXT(df_task_stack)
 EXT(df_task_stack):
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 	.globl	EXT(df_task_stack_end)
 EXT(df_task_stack_end):
 
@@ -121,38 +120,22 @@ EXT(df_task_stack_end):
 	.align	12
 	.globl	EXT(mc_task_stack)
 EXT(mc_task_stack):
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 	.globl	EXT(mc_task_stack_end)
 EXT(mc_task_stack_end):
 
 #if	MACH_KDB
-/*
- * Kernel debugger stack for each processor.
- */
-	.align	12
-	.globl	EXT(db_stack_store)
-EXT(db_stack_store):
-	.set	., .+(INTSTACK_SIZE*MAX_CPUS)
-
 /*
  * Stack for last-ditch debugger task for each processor.
  */
 	.align	12
 	.globl	EXT(db_task_stack_store)
 EXT(db_task_stack_store):
-	.set	., .+(INTSTACK_SIZE*MAX_CPUS)
+	.space	(INTSTACK_SIZE*MAX_CPUS)
 
-/*
- * per-processor kernel debugger stacks
- */
-        .align  ALIGN
-        .globl  EXT(kgdb_stack_store)
-EXT(kgdb_stack_store):
-        .set    ., .+(INTSTACK_SIZE*MAX_CPUS)
 #endif	/* MACH_KDB */
 
 
-	
 /*
  * BSP CPU start here.
  *	eax points to kernbootstruct
@@ -229,7 +212,7 @@ LEXT(slave_pstart)
 	mov $EXT(mp_slave_stack)+PAGE_SIZE, %esp;
 	jmp paging
 
-	
+
 /* Code to get from real mode to protected mode */
 
 #define	operand_size_prefix	.byte 0x66
diff --git a/osfmk/i386/startup64.c b/osfmk/i386/startup64.c
index c85bf1955..b445882cd 100644
--- a/osfmk/i386/startup64.c
+++ b/osfmk/i386/startup64.c
@@ -264,11 +264,7 @@ dump_frame64(x86_saved_state64_t *sp)
 		kprintf("%p: 0x%016llx\n", ip, *ip);
 
 	kprintf("sp->isf.trapno: 0x%08x\n", sp->isf.trapno);
-#ifdef __i386__
-	kprintf("sp->isf.trapfn: 0x%08x\n", sp->isf.trapfn);
-#else
 	kprintf("sp->isf.trapfn: 0x%016llx\n", sp->isf.trapfn);
-#endif
 	kprintf("sp->isf.err:    0x%016llx\n", sp->isf.err);
 	kprintf("sp->isf.rip:    0x%016llx\n", sp->isf.rip);
 	kprintf("sp->isf.cs:     0x%016llx\n", sp->isf.cs);
diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h
index faab785af..bbccc7832 100644
--- a/osfmk/i386/thread.h
+++ b/osfmk/i386/thread.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,34 +80,7 @@
 
 #include <i386/cpu_data.h>
 
-
-/*
- *	i386_saved_state:
- *
- *	Has been exported to servers.  See: mach/i386/thread_status.h
- *
- *	This structure corresponds to the state of user registers
- *	as saved upon kernel entry.  It lives in the pcb.
- *	It is also pushed onto the stack for exceptions in the kernel.
- *	For performance, it is also used directly in syscall exceptions
- *	if the server has requested i386_THREAD_STATE flavor for the exception
- *	port.
- */
-
-/*
- *	Save area for user floating-point state.
- *	Allocated only when necessary.
- */
-
-typedef	enum {
-		FXSAVE32 = 1,
-		FXSAVE64 = 2,
-		XSAVE32  = 3,
-		XSAVE64  = 4,
-		FP_UNUSED = 5
-	} fp_save_layout_t;
-
-
+#include <machine/pal_routines.h>
 
 /*
  *	x86_kernel_state:
@@ -118,92 +91,89 @@ typedef	enum {
 
 #ifdef __i386__
 struct x86_kernel_state {
-	int			k_ebx;	/* kernel context */
-	int			k_esp;
-	int			k_ebp;
-	int			k_edi;
-	int			k_esi;
-	int			k_eip;
+	uint32_t	k_ebx;	/* kernel context */
+	uint32_t	k_esp;
+	uint32_t	k_ebp;
+	uint32_t	k_edi;
+	uint32_t	k_esi;
+	uint32_t	k_eip;
 	/*
-	 * Kernel stacks are 16-byte aligned with a 4-byte i386_exception_link at
-	 * the top, followed by an x86_kernel_state.  After both structs have
-	 * been pushed, we want to be 16-byte aligned.  A dummy int gets us there.
+	 * Kernel stacks are 16-byte aligned with x86_kernel_state at the top,
+	 * so we need a couple of dummy 32-bit words here.
 	 */
-	int			dummy;
+	uint32_t	dummy[2];
 };
 #else
 struct x86_kernel_state {
-	unsigned long k_rbx;	/* kernel context */
-	unsigned long k_rsp;
-	unsigned long k_rbp;
-	unsigned long k_r12;
-	unsigned long k_r13;
-	unsigned long k_r14;
-	unsigned long k_r15;
-	unsigned long k_rip;
-	unsigned long dummy;
+	uint64_t	k_rbx;	/* kernel context */
+	uint64_t	k_rsp;
+	uint64_t	k_rbp;
+	uint64_t	k_r12;
+	uint64_t	k_r13;
+	uint64_t	k_r14;
+	uint64_t	k_r15;
+	uint64_t	k_rip;
 };
 #endif
 
-typedef struct pcb {
-	void			*sf;
-	x86_saved_state_t	*iss;
-	void			*ifps;
-#ifdef	MACH_BSD
-	uint64_t	cthread_self;		/* for use of cthread package */
-        struct real_descriptor cthread_desc;
-	unsigned long  uldt_selector;          /* user ldt selector to set */
-	struct real_descriptor uldt_desc;      /* the actual user setable ldt data */
-#endif
-	decl_simple_lock_data(,lock);
-	uint64_t	iss_pte0;
-	uint64_t	iss_pte1;
-	void		*ids;
-	uint32_t	arg_store_valid;
-} *pcb_t;
-
 /*
  * Maps state flavor to number of words in the state:
  */
 __private_extern__ unsigned int _MachineStateCount[];
 
-#define USER_STATE(ThrAct)	((ThrAct)->machine.pcb->iss)
-#define USER_REGS32(ThrAct)	(saved_state32(USER_STATE(ThrAct)))
-#define USER_REGS64(ThrAct)	(saved_state64(USER_STATE(ThrAct)))
-
-#define	user_pc(ThrAct)		(is_saved_state32(USER_STATE(ThrAct)) ?	\
-					USER_REGS32(ThrAct)->eip :	\
-					USER_REGS64(ThrAct)->isf.rip )
-
-
+/*
+ * The machine-dependent thread state - registers and all platform-dependent
+ * state - is saved in the machine thread structure which is embedded in
+ * the thread data structure. For historical reasons this is also referred to
+ * as the PCB.
+ */
 struct machine_thread {
-	/*
-	 * pointer to process control block
-	 *	(actual storage may as well be here, too)
-	 */
-	struct pcb xxx_pcb;
-	pcb_t pcb;
+	void			*sf;
+	x86_saved_state_t	*iss;
+	void			*ifps;
+	void			*ids;
+	decl_simple_lock_data(,lock);		/* protects ifps and ids */
+	uint64_t		iss_pte0;
+	uint64_t		iss_pte1;
+	uint32_t		arg_store_valid;
+#ifdef	MACH_BSD
+	uint64_t		cthread_self;	/* for use of cthread package */
+        struct real_descriptor	cthread_desc;
+	unsigned long		uldt_selector;	/* user ldt selector to set */
+	struct real_descriptor	uldt_desc;	/* actual user setable ldt */
+#endif
 
-	uint32_t	specFlags;
+	struct pal_pcb		pal_pcb;
+
+	uint32_t		specFlags;
 #define		OnProc		0x1
 #define		CopyIOActive 	0x2 /* Checked to ensure DTrace actions do not re-enter copyio(). */
   
 #if NCOPY_WINDOWS > 0
-
         struct {
 	        user_addr_t	user_base;
 	} copy_window[NCOPY_WINDOWS];
-        int		nxt_window;
-        int		copyio_state;
+        int			nxt_window;
+        int			copyio_state;
 #define		WINDOWS_DIRTY	0
 #define		WINDOWS_CLEAN	1
 #define		WINDOWS_CLOSED	2
 #define		WINDOWS_OPENED	3
-        uint64_t	physwindow_pte;
-        int		physwindow_busy;
+        uint64_t		physwindow_pte;
+        int			physwindow_busy;
 #endif
 };
+typedef struct machine_thread *pcb_t;
 
+#define	THREAD_TO_PCB(Thr)	(&(Thr)->machine)
+
+#define USER_STATE(Thr)		((Thr)->machine.iss)
+#define USER_REGS32(Thr)	(saved_state32(USER_STATE(Thr)))
+#define USER_REGS64(Thr)	(saved_state64(USER_STATE(Thr)))
+
+#define	user_pc(Thr)		(is_saved_state32(USER_STATE(Thr)) ?	\
+					USER_REGS32(Thr)->eip :		\
+					USER_REGS64(Thr)->isf.rip )
 
 extern void *get_user_regs(thread_t);
 
@@ -211,33 +181,19 @@ extern void *act_thread_csave(void);
 extern void act_thread_catt(void *ctx);
 extern void act_thread_cfree(void *ctx);
 
-/*
- *	i386_exception_link:
- *
- *	This structure lives at the high end of the kernel stack.
- *	It points to the current thread`s user registers.
- */
-struct i386_exception_link {
-	x86_saved_state_t	*saved_state;
-};
-
 
 /*
  *	On the kernel stack is:
  *	stack:	...
- *		struct i386_exception_link (pointer to user state)
  *		struct x86_kernel_state
  *	stack+kernel_stack_size
  */
 
 #define STACK_IKS(stack)	\
 	((struct x86_kernel_state *)((stack) + kernel_stack_size) - 1)
-#define STACK_IEL(stack)	\
-	((struct i386_exception_link *)STACK_IKS(stack) - 1)
 
 /*
- * Return the current stack depth
- * including x86_kernel_state and i386_exception_link
+ * Return the current stack depth including x86_kernel_state
  */
 static inline vm_offset_t
 current_stack_depth(void)
@@ -253,7 +209,6 @@ current_stack_depth(void)
 #endif
 	return (current_cpu_datap()->cpu_kernel_stack
 		+ sizeof(struct x86_kernel_state)
-		+ sizeof(struct i386_exception_link *)
 		- stack_ptr); 
 }
 
@@ -263,11 +218,4 @@ current_stack_depth(void)
  */
 #define	GET_RETURN_PC(addr)	(__builtin_return_address(0))
 
-/*
- * Defining this indicates that MD code will supply an exception()
- * routine, conformant with kern/exception.c (dependency alert!)
- * but which does wonderfully fast, machine-dependent magic.
- */
-#define MACHINE_FAST_EXCEPTION 1
-
 #endif	/* _I386_THREAD_H_ */
diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c
index 07b3cf479..55be4fc75 100644
--- a/osfmk/i386/trap.c
+++ b/osfmk/i386/trap.c
@@ -118,8 +118,10 @@
 
 #include <libkern/OSDebug.h>
 
-extern void throttle_lowpri_io(boolean_t);
+#include <machine/pal_routines.h>
 
+extern void throttle_lowpri_io(int);
+extern void kprint_state(x86_saved_state64_t *saved_state);
 
 /*
  * Forward declarations
@@ -128,14 +130,13 @@ static void user_page_fault_continue(kern_return_t kret);
 #ifdef __i386__
 static void panic_trap(x86_saved_state32_t *saved_state);
 static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip);
-static void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
+extern void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
 #else
 static void panic_trap(x86_saved_state64_t *saved_state);
 static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip);
 #endif
 
 volatile perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */
-volatile perfCallback perfASTHook  = NULL; /* Pointer to CHUD AST hook routine */
 
 #if CONFIG_DTRACE
 /* See <rdar://problem/4613924> */
@@ -152,6 +153,7 @@ thread_syscall_return(
 	boolean_t	is_mach;
 	int		code;
 
+	pal_register_cache_state(thr_act, DIRTY);
 
         if (thread_is_64bit(thr_act)) {
 	        x86_saved_state64_t	*regs;
@@ -222,6 +224,7 @@ thread_kdb_return(void)
 	thread_t		thr_act = current_thread();
 	x86_saved_state_t	*iss = USER_STATE(thr_act);
 
+	pal_register_cache_state(thr_act, DIRTY);
 
         if (is_saved_state64(iss)) {
 	        x86_saved_state64_t	*regs;
@@ -247,16 +250,13 @@ thread_kdb_return(void)
 
 #endif	/* MACH_KDB */
 
-void
+static inline void
 user_page_fault_continue(
 			 kern_return_t	kr)
 {
 	thread_t	thread = current_thread();
-	ast_t		*myast;
-	boolean_t	intr;
 	user_addr_t	vaddr;
 
-
 #if	MACH_KDB
 	x86_saved_state_t *regs = USER_STATE(thread);
 	int		err;
@@ -288,7 +288,7 @@ user_page_fault_continue(
 		vaddr = uregs->cr2;
 	}
 
-	if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) {
+	if (__probable((kr == KERN_SUCCESS) || (kr == KERN_ABORTED))) {
 #if	MACH_KDB
 		if (!db_breakpoints_inserted) {
 			db_set_breakpoints();
@@ -301,15 +301,6 @@ user_page_fault_continue(
 				       saved_state32(regs)))
 			kdb_trap(T_WATCHPOINT, 0, saved_state32(regs));
 #endif	/* MACH_KDB */
-		intr = ml_set_interrupts_enabled(FALSE);
-		myast = ast_pending();
-		while (*myast & AST_ALL) {
-			ast_taken(AST_ALL, intr);
-			ml_set_interrupts_enabled(FALSE);
-			myast = ast_pending();
-		}
-		ml_set_interrupts_enabled(intr);
-
 		thread_exception_return();
 		/*NOTREACHED*/
 	}
@@ -322,6 +313,8 @@ user_page_fault_continue(
 	}
 #endif	/* MACH_KDB */
 
+	/* PAL debug hook */
+	pal_dbg_page_fault( thread, vaddr, kr );
 
 	i386_exception(EXC_BAD_ACCESS, kr, vaddr);
 	/*NOTREACHED*/
@@ -341,9 +334,11 @@ extern struct recovery	recover_table_end[];
 const char *	trap_type[] = {TRAP_NAMES};
 unsigned 	TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]);
 
+extern void	PE_incoming_interrupt(int interrupt);
+
 #if defined(__x86_64__) && DEBUG
-static void
-print_state(x86_saved_state64_t	*saved_state)
+void
+kprint_state(x86_saved_state64_t	*saved_state)
 {
 	kprintf("current_cpu_datap() 0x%lx\n", (uintptr_t)current_cpu_datap());
 	kprintf("Current GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_GS_BASE));
@@ -385,21 +380,9 @@ print_state(x86_saved_state64_t	*saved_state)
 	kprintf("  isf.rsp    0x%llx\n", saved_state->isf.rsp);
 	kprintf("  isf.ss     0x%llx\n", saved_state->isf.ss);
 }
-/*
- * K64 debug - fatal handler for debug code in the trap vectors.
- */
-extern void
-panic_idt64(x86_saved_state_t *rsp);
-void
-panic_idt64(x86_saved_state_t *rsp)
-{
-	print_state(saved_state64(rsp));
-	panic("panic_idt64");
-}
 #endif
 
 
-
 /*
  * Non-zero indicates latency assert is enabled and capped at valued
  * absolute time units.
@@ -442,9 +425,6 @@ void interrupt_populate_latency_stats(char *buf, unsigned bufsize) {
 	if (tcpu < real_ncpus)
 		snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency);
 }
-   
-
-extern void	PE_incoming_interrupt(int interrupt);
 
 /*
  * Handle interrupts:
@@ -458,6 +438,7 @@ interrupt(x86_saved_state_t *state)
 	uint64_t	rsp;
 	int		interrupt_num;
 	boolean_t	user_mode = FALSE;
+	int		ipl;
 	int		cnum = cpu_number();
 
 	if (is_saved_state64(state) == TRUE) {
@@ -484,27 +465,34 @@ interrupt(x86_saved_state_t *state)
 
 	KERNEL_DEBUG_CONSTANT(
 		MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
-		interrupt_num, (long) rip, user_mode, 0, 0);
+		interrupt_num, rip, user_mode, 0, 0);
+
+	SCHED_STATS_INTERRUPT(current_processor());
+
+	ipl = get_preemption_level();
 
 	/*
 	 * Handle local APIC interrupts
 	 * else call platform expert for devices.
-	 */ 
-	if (!lapic_interrupt(interrupt_num, state)) {
+	 */
+	if (!lapic_interrupt(interrupt_num, state))
 		PE_incoming_interrupt(interrupt_num);
+
+	if (__improbable(get_preemption_level() != ipl)) {
+		panic("Preemption level altered by interrupt vector 0x%x: initial 0x%x, final: 0x%x\n", interrupt_num, ipl, get_preemption_level());
 	}
 
 	KERNEL_DEBUG_CONSTANT(
 		MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
-		0, 0, 0, 0, 0);
+		interrupt_num, 0, 0, 0, 0);
 
  	if (cpu_data_ptr[cnum]->cpu_nested_istack) {
  		cpu_data_ptr[cnum]->cpu_nested_istack_events++;
  	}
- 	else {
+ 	else  {
 		uint64_t int_latency = mach_absolute_time() - cpu_data_ptr[cnum]->cpu_int_event_time;
 		if (ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended()) {
-			panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals);
+			panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x, current signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals, cpu_data_ptr[cnum]->cpu_signals);
 		}
 		if (int_latency > cpu_data_ptr[cnum]->cpu_max_observed_int_latency) {
 			cpu_data_ptr[cnum]->cpu_max_observed_int_latency = int_latency;
@@ -512,7 +500,6 @@ interrupt(x86_saved_state_t *state)
 		}
 	}
 
-
 	/*
 	 * Having serviced the interrupt first, look at the interrupted stack depth.
 	 */
@@ -550,7 +537,8 @@ unsigned kdp_has_active_watchpoints = 0;
 
 void
 kernel_trap(
-	x86_saved_state_t	*state)
+	x86_saved_state_t	*state,
+	uintptr_t *lo_spp)
 {
 #ifdef __i386__
 	x86_saved_state32_t	*saved_state;
@@ -579,19 +567,28 @@ kernel_trap(
 	thread = current_thread();
 
 #ifdef __i386__
-	if (is_saved_state64(state)) {
+	if (__improbable(is_saved_state64(state))) {
 		panic_64(state, 0, "Kernel trap with 64-bit state", FALSE);
 	}
+
 	saved_state = saved_state32(state);
+
+	/* Record cpu where state was captured (trampolines don't set this) */
+	saved_state->cpu = cpu_number();
+
 	vaddr = (user_addr_t)saved_state->cr2;
 	type  = saved_state->trapno;
 	code  = saved_state->err & 0xffff;
 	intr  = (saved_state->efl & EFL_IF) != 0;	/* state of ints at trap */
 	kern_ip = (vm_offset_t)saved_state->eip;
 #else
-	if (is_saved_state32(state))
+	if (__improbable(is_saved_state32(state)))
 		panic("kernel_trap(%p) with 32-bit state", state);
 	saved_state = saved_state64(state);
+
+	/* Record cpu where state was captured */
+	saved_state->isf.cpu = cpu_number();
+
 	vaddr = (user_addr_t)saved_state->cr2;
 	type  = saved_state->isf.trapno;
 	code  = (int)(saved_state->isf.err & 0xffff);
@@ -601,18 +598,18 @@ kernel_trap(
 
 	myast = ast_pending();
 
-	perfCallback fn = perfASTHook;
-	if (fn) {
+	perfASTCallback astfn = perfASTHook;
+	if (__improbable(astfn != NULL)) {
 		if (*myast & AST_CHUD_ALL)
-			fn(type, NULL, 0, 0);
+			astfn(AST_CHUD_ALL, myast);
 	} else
 		*myast &= ~AST_CHUD_ALL;
 
 	/*
 	 * Is there a hook?
 	 */
-	fn = perfTrapHook;
-	if (fn) {
+	perfCallback fn = perfTrapHook;
+	if (__improbable(fn != NULL)) {
 	        if (fn(type, NULL, 0, 0) == KERN_SUCCESS) {
 		        /*
 			 * If it succeeds, we are done...
@@ -622,8 +619,8 @@ kernel_trap(
 	}
 
 #if CONFIG_DTRACE
-	if (tempDTraceTrapHook) {
-		if (tempDTraceTrapHook(type, state, 0, 0) == KERN_SUCCESS) {
+	if (__improbable(tempDTraceTrapHook != NULL)) {
+		if (tempDTraceTrapHook(type, state, lo_spp, 0) == KERN_SUCCESS) {
 			/*
 			 * If it succeeds, we are done...
 			 */
@@ -637,7 +634,7 @@ kernel_trap(
 	 * on preemption below.  but we do want to re-enable interrupts
 	 * as soon we possibly can to hold latency down
 	 */
-	if (T_PREEMPT == type) {
+	if (__improbable(T_PREEMPT == type)) {
 	        ast_taken(AST_PREEMPTION, FALSE);
 
 		KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
@@ -651,7 +648,7 @@ kernel_trap(
 		 */
 		map = kernel_map;
 
-		if (thread != THREAD_NULL && thread->map != kernel_map) {
+		if (__probable(thread != THREAD_NULL && thread->map != kernel_map)) {
 #if NCOPY_WINDOWS > 0
 			vm_offset_t	copy_window_base;
 			vm_offset_t	kvaddr;
@@ -665,11 +662,11 @@ kernel_trap(
 			 * we only need to look at the window
 			 * associated with this processor
 			 */
-		        copy_window_base = current_cpu_datap()->cpu_copywindow_base;
+			copy_window_base = current_cpu_datap()->cpu_copywindow_base;
 
 			if (kvaddr >= copy_window_base && kvaddr < (copy_window_base + (NBPDE * NCOPY_WINDOWS)) ) {
 
-				window_index = (kvaddr - copy_window_base) / NBPDE;
+				window_index = (int)((kvaddr - copy_window_base) / NBPDE);
 
 				if (thread->machine.copy_window[window_index].user_base != (user_addr_t)-1) {
 
@@ -693,8 +690,9 @@ kernel_trap(
 				 */
 				if (no_shared_cr3 &&
 				    (thread->machine.specFlags&CopyIOActive) &&
-				    map->pmap->pm_cr3 != get_cr3()) {
-					set_cr3(map->pmap->pm_cr3);
+				    map->pmap->pm_cr3 != get_cr3_base()) {
+					pmap_assert(current_cpu_datap()->cpu_pmap_pcid_enabled == FALSE);
+					set_cr3_raw(map->pmap->pm_cr3);
 					return;
 				}
 			}
@@ -782,7 +780,7 @@ kernel_trap(
 #endif	/* MACH_KDB */
 
 #if CONFIG_DTRACE
-		if (thread->options & TH_OPT_DTRACE) {	/* Executing under dtrace_probe? */
+		if (thread != THREAD_NULL && thread->options & TH_OPT_DTRACE) {	/* Executing under dtrace_probe? */
 			if (dtrace_tally_fault(vaddr)) { /* Should a fault under dtrace be ignored? */
 				/*
 				 * DTrace has "anticipated" the possibility of this fault, and has
@@ -815,33 +813,9 @@ look_for_watchpoints:
 		if (result == KERN_SUCCESS) {
 #if NCOPY_WINDOWS > 0
 			if (fault_in_copy_window != -1) {
-			        pt_entry_t	*updp;
-				pt_entry_t	*kpdp;
-
-				/*
-				 * in case there was no page table assigned
-				 * for the user base address and the pmap
-				 * got 'expanded' due to this fault, we'll
-				 * copy in the descriptor 
-				 *
-				 * we're either setting the page table descriptor
-				 * to the same value or it was 0... no need
-				 * for a TLB flush in either case
-				 */
-
-		        ml_set_interrupts_enabled(FALSE);
-		        updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base);
-				assert(updp);
-				if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */
-				kpdp = current_cpu_datap()->cpu_copywindow_pdp;
-				kpdp += fault_in_copy_window;
-
-#if JOE_DEBUG
-				if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME))
-				        panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp);
-#endif
-				pmap_store_pte(kpdp, *updp);
-
+				ml_set_interrupts_enabled(FALSE);
+				copy_window_fault(thread, map,
+						  fault_in_copy_window);
 				(void) ml_set_interrupts_enabled(intr);
 			}
 #endif /* NCOPY_WINDOWS > 0 */
@@ -855,9 +829,6 @@ FALL_THROUGH:
 #endif /* CONFIG_DTRACE */
 
 	    case T_GENERAL_PROTECTION:
-#if defined(__x86_64__) && DEBUG
-		print_state(saved_state);
-#endif
 		/*
 		 * If there is a failure recovery address
 		 * for this fault, go there.
@@ -872,7 +843,7 @@ FALL_THROUGH:
 		/*
 		 * Check thread recovery address also.
 		 */
-		if (thread->recover) {
+		if (thread != THREAD_NULL && thread->recover) {
 			set_recovery_ip(saved_state, thread->recover);
 			thread->recover = 0;
 			return;
@@ -883,7 +854,6 @@ FALL_THROUGH:
 		 *
 		 * fall through...
 		 */
-
 	    default:
 		/*
 		 * Exception 15 is reserved but some chips may generate it
@@ -893,6 +863,9 @@ FALL_THROUGH:
 			kprintf("kernel_trap() ignoring spurious trap 15\n"); 
 			return;
 		}
+#if defined(__x86_64__) && DEBUG
+		kprint_state(saved_state);
+#endif
 debugger_entry:
 		/* Ensure that the i386_kernel_state at the base of the
 		 * current thread's stack (if any) is synchronized with the
@@ -923,7 +896,7 @@ restart_debugger:
 		}
 #endif
 	}
-
+	__asm__ volatile("cli":::"cc");
 	panic_trap(saved_state);
 	/*
 	 * NO RETURN
@@ -951,10 +924,10 @@ static void
 panic_trap(x86_saved_state32_t *regs)
 {
 	const char *trapname = "Unknown";
-	uint32_t	cr0 = get_cr0();
-	uint32_t	cr2 = get_cr2();
-	uint32_t	cr3 = get_cr3();
-	uint32_t	cr4 = get_cr4();
+	pal_cr_t	cr0, cr2, cr3, cr4;
+
+	pal_get_control_registers( &cr0, &cr2, &cr3, &cr4 );
+
 	/*
 	 * Issue an I/O port read if one has been requested - this is an
 	 * event logic analyzers can use as a trigger point.
@@ -977,7 +950,7 @@ panic_trap(x86_saved_state32_t *regs)
 	      regs->eip, regs->trapno, trapname, cr0, cr2, cr3, cr4,
 	      regs->eax,regs->ebx,regs->ecx,regs->edx,
 	      regs->cr2,regs->ebp,regs->esi,regs->edi,
-	      regs->efl,regs->eip,regs->cs, regs->ds, regs->err);
+	      regs->efl,regs->eip,regs->cs & 0xFFFF, regs->ds & 0xFFFF, regs->err);
 	/*
 	 * This next statement is not executed,
 	 * but it's needed to stop the compiler using tail call optimization
@@ -990,11 +963,11 @@ static void
 panic_trap(x86_saved_state64_t *regs)
 {
 	const char	*trapname = "Unknown";
-	uint64_t	cr0 = get_cr0();
-	uint64_t	cr2 = get_cr2();
-	uint64_t	cr3 = get_cr3();
-	uint64_t	cr4 = get_cr4();
+	pal_cr_t	cr0, cr2, cr3, cr4;
 
+	pal_get_control_registers( &cr0, &cr2, &cr3, &cr4 );
+	assert(ml_get_interrupts_enabled() == FALSE);
+	current_cpu_datap()->cpu_fatal_trap_state = regs;
 	/*
 	 * Issue an I/O port read if one has been requested - this is an
 	 * event logic analyzers can use as a trigger point.
@@ -1016,15 +989,15 @@ panic_trap(x86_saved_state64_t *regs)
 	      "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
 	      "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
 	      "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-	      "Error code: 0x%016llx\n",
+	      "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x\n",
 	      regs->isf.rip, regs->isf.trapno, trapname,
 	      cr0, cr2, cr3, cr4,
 	      regs->rax, regs->rbx, regs->rcx, regs->rdx,
 	      regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi,
 	      regs->r8,  regs->r9,  regs->r10, regs->r11,
 	      regs->r12, regs->r13, regs->r14, regs->r15,
-	      regs->isf.rflags, regs->isf.rip, regs->isf.cs,  regs->isf.ss,
-	      regs->isf.err);
+	      regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF,
+	      regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu);
 	/*
 	 * This next statement is not executed,
 	 * but it's needed to stop the compiler using tail call optimization
@@ -1034,181 +1007,6 @@ panic_trap(x86_saved_state64_t *regs)
 }
 #endif
 
-extern void     kprintf_break_lock(void);
-
-#ifdef __i386__
-static void
-panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t do_mca_dump, boolean_t do_bt)
-{
-	struct i386_tss *my_ktss = current_ktss();
-
-	/* Set postcode (DEBUG only) */
-	postcode(pc);
-
-	/*
-	 * Issue an I/O port read if one has been requested - this is an
-	 * event logic analyzers can use as a trigger point.
-	 */
-	panic_io_port_read();
-
-	/*
-	 * Break kprintf lock in case of recursion,
-	 * and record originally faulted instruction address.
-	 */
-	kprintf_break_lock();
-
-	if (do_mca_dump) {
-#if CONFIG_MCA
-		/*
-		 * Dump the contents of the machine check MSRs (if any).
-		 */
-		mca_dump();
-#endif
-	}
-
-#if MACH_KDP
-	/*
-	 * Print backtrace leading to first fault:
-	 */
-	if (do_bt)
-		panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL);
-#endif
-
-	panic("%s at 0x%08x, thread:%p, code:0x%x, "
-	      "registers:\n"
-	      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
-	      "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
-	      "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
-	      "EFL: 0x%08x, EIP: 0x%08x\n",
-		  msg,
-	      my_ktss->eip, current_thread(), code,
-	      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
-	      my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
-	      my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
-	      my_ktss->eflags, my_ktss->eip);
-}
-
-/*
- * Called from locore on a special reserved stack after a double-fault
- * is taken in kernel space.
- * Kernel stack overflow is one route here.
- */
-void
-panic_double_fault32(int code)
-{
-	panic_32(code, PANIC_DOUBLE_FAULT, "Double fault", FALSE, TRUE);
-}
-
-/*
- * Called from locore on a special reserved stack after a machine-check
- */
-void 
-panic_machine_check32(int code)
-{
-	panic_32(code, PANIC_MACHINE_CHECK, "Machine-check", TRUE, FALSE);
-}
-#endif /* __i386__ */
-
-static void
-panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump)
-{
-	/* Set postcode (DEBUG only) */
-	postcode(pc);
-
-	/*
-	 * Issue an I/O port read if one has been requested - this is an
-	 * event logic analyzers can use as a trigger point.
-	 */
-	panic_io_port_read();
-
-	/*
-	 * Break kprintf lock in case of recursion,
-	 * and record originally faulted instruction address.
-	 */
-	kprintf_break_lock();
-
-	if (do_mca_dump) {
-#if CONFIG_MCA
-		/*
-		 * Dump the contents of the machine check MSRs (if any).
-		 */
-		mca_dump();
-#endif
-	}
-
-#ifdef __i386__
-	/*
-	 * Dump the interrupt stack frame at last kernel entry.
-	 */
-	if (is_saved_state64(sp)) {
-		x86_saved_state64_t	*ss64p = saved_state64(sp);
-		panic("%s thread:%p, trapno:0x%x, err:0x%qx, "
-		      "registers:\n"
-		      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
-		      "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n"
-		      "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
-		      "R8:  0x%016qx, R9:  0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
-		      "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
-		      "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx\n",
-			  msg,
-		      current_thread(), ss64p->isf.trapno, ss64p->isf.err,
-		      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
-		      ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx,
-		      ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
-		      ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
-		      ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
-		      ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2);
-	} else {
-		x86_saved_state32_t	*ss32p = saved_state32(sp);
-		panic("%s at 0x%08x, thread:%p, trapno:0x%x, err:0x%x,"
-		      "registers:\n"
-		      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
-		      "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
-		      "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
-		      "EFL: 0x%08x, EIP: 0x%08x\n",
-		      msg,
-			  ss32p->eip, current_thread(), ss32p->trapno, ss32p->err,
-		      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
-		      ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
-		      ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
-		      ss32p->efl, ss32p->eip);
-	}
-#else
-	x86_saved_state64_t *regs = saved_state64(sp);
-	panic("%s thread:%p at 0x%016llx, registers:\n"
-	      "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n"
-	      "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n"
-	      "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n"
-	      "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
-	      "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
-	      "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
-	      "Error code: 0x%016llx\n",
-	      msg,
-		  current_thread(), regs->isf.rip,
-	      get_cr0(), get_cr2(), get_cr3(), get_cr4(),
-	      regs->rax, regs->rbx, regs->rcx, regs->rdx,
-	      regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi,
-	      regs->r8,  regs->r9,  regs->r10, regs->r11,
-	      regs->r12, regs->r13, regs->r14, regs->r15,
-	      regs->isf.rflags, regs->isf.rip, regs->isf.cs,  regs->isf.ss,
-	      regs->isf.err);
-#endif
-}
-
-void
-panic_double_fault64(x86_saved_state_t *sp)
-{
-	panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE);
-
-}
-void
-
-panic_machine_check64(x86_saved_state_t *sp)
-{
-	panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE);
-
-}
-
 #if CONFIG_DTRACE
 extern kern_return_t dtrace_user_probe(x86_saved_state_t *);
 #endif
@@ -1231,6 +1029,7 @@ user_trap(
 	ast_t			*myast;
 	kern_return_t		kret;
 	user_addr_t		rip;
+	unsigned long 		dr6 = 0; /* 32 bit for i386, 64 bit for x86_64 */
 
 	assert((is_saved_state32(saved_state) && !thread_is_64bit(thread)) ||
 	       (is_saved_state64(saved_state) &&  thread_is_64bit(thread)));
@@ -1240,6 +1039,9 @@ user_trap(
 
 		regs = saved_state64(saved_state);
 
+		/* Record cpu where state was captured */
+		regs->isf.cpu = cpu_number();
+
 		type = regs->isf.trapno;
 		err  = (int)regs->isf.err & 0xffff;
 		vaddr = (user_addr_t)regs->cr2;
@@ -1249,12 +1051,26 @@ user_trap(
 
 		regs = saved_state32(saved_state);
 
+		/* Record cpu where state was captured */
+		regs->cpu = cpu_number();
+
 		type  = regs->trapno;
 		err   = regs->err & 0xffff;
 		vaddr = (user_addr_t)regs->cr2;
 		rip   = (user_addr_t)regs->eip;
 	}
 
+	if ((type == T_DEBUG) && thread->machine.ids) {
+		unsigned long clear = 0;
+		/* Stash and clear this processor's DR6 value, in the event
+		 * this was a debug register match
+		 */
+		__asm__ volatile ("mov %%db6, %0" : "=r" (dr6)); 
+		__asm__ volatile ("mov %0, %%db6" : : "r" (clear));
+	}
+
+	pal_sti();
+
 	KERNEL_DEBUG_CONSTANT(
 		(MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE,
 		(unsigned)(vaddr>>32), (unsigned)vaddr,
@@ -1268,17 +1084,18 @@ user_trap(
 	kprintf("user_trap(0x%08x) type=%d vaddr=0x%016llx\n",
 		saved_state, type, vaddr);
 #endif
-	perfCallback fn = perfASTHook;
-	if (fn) {
+
+	perfASTCallback astfn = perfASTHook;
+	if (__improbable(astfn != NULL)) {
 		myast = ast_pending();
 		if (*myast & AST_CHUD_ALL) {
-			fn(type, saved_state, 0, 0);
+			astfn(AST_CHUD_ALL, myast);
 		}
 	}
 
 	/* Is there a hook? */
-	fn = perfTrapHook;
-	if (fn) {
+	perfCallback fn = perfTrapHook;
+	if (__improbable(fn != NULL)) {
 		if (fn(type, saved_state, 0, 0) == KERN_SUCCESS)
 			return;	/* If it succeeds, we are done... */
 	}
@@ -1291,7 +1108,7 @@ user_trap(
 	DEBUG_KPRINT_SYSCALL_MASK(1,
 		"user_trap: type=0x%x(%s) err=0x%x cr2=%p rip=%p\n",
 		type, trap_type[type], err, (void *)(long) vaddr, (void *)(long) rip);
-
+	
 	switch (type) {
 
 	    case T_DIVIDE_ERROR:
@@ -1302,12 +1119,11 @@ user_trap(
 	    case T_DEBUG:
 		{
 			pcb_t	pcb;
-			long clear = 0; /* 32 bit for i386, 64 bit for x86_64 */
 			/*
-			 * get dr6 and set it in the thread's pcb before
-			 * returning to userland
+			 * Update the PCB with this processor's DR6 value
+			 * in the event this was a debug register match.
 			 */
-			pcb = thread->machine.pcb;
+			pcb = THREAD_TO_PCB(thread);
 			if (pcb->ids) {
 				/*
 				 * We can get and set the status register
@@ -1315,16 +1131,13 @@ user_trap(
 				 * because the high order bits are not
 				 * used on x86_64
 				 */
-				unsigned long dr6_temp; /* 32 bit for i386, 64 bit for x86_64 */
-				__asm__ volatile ("mov %%db6, %0" : "=r" (dr6_temp)); /* Register constraint by necessity */
 				if (thread_is_64bit(thread)) {
 					x86_debug_state64_t *ids = pcb->ids;
-					ids->dr6 = dr6_temp;
+					ids->dr6 = dr6;
 				} else { /* 32 bit thread */
 					x86_debug_state32_t *ids = pcb->ids;
-					ids->dr6 = (uint32_t) dr6_temp;
+					ids->dr6 = (uint32_t) dr6;
 				}
-				__asm__ volatile ("mov %0, %%db6" : : "r" (clear));
 			}
 			exc = EXC_BREAKPOINT;
 			code = EXC_I386_SGL;
@@ -1406,7 +1219,7 @@ user_trap(
 		if (err & T_PF_WRITE)
 		        prot |= VM_PROT_WRITE;
 #if     PAE
-		if (err & T_PF_EXECUTE)
+		if (__improbable(err & T_PF_EXECUTE))
 		        prot |= VM_PROT_EXECUTE;
 #endif
 		kret = vm_fault(thread->map, vm_map_trunc_page(vaddr),
@@ -1462,8 +1275,6 @@ user_trap(
 
 /*
  * Handle AST traps for i386.
- * Check for delayed floating-point exception from
- * AT-bus machines.
  */
 
 extern void     log_thread_action (thread_t, char *);
@@ -1511,44 +1322,6 @@ i386_exception(
 }
 
 
-
-void
-kernel_preempt_check(void)
-{
-	ast_t		*myast;
-	boolean_t	intr;
-
-	/*
-	 * disable interrupts to both prevent pre-emption
-	 * and to keep the ast state from changing via
-	 * an interrupt handler making something runnable
-	 */
-	intr = ml_set_interrupts_enabled(FALSE);
-
-	myast = ast_pending();
-
-	if ((*myast & AST_URGENT) && intr == TRUE && get_interrupt_level() == 0) {
-		/*
-		 * can handle interrupts and preemptions 
-		 * at this point
-		 */
-		ml_set_interrupts_enabled(intr);
-
-		/*
-		 * now cause the PRE-EMPTION trap
-		 */
-		__asm__ volatile ("     int     $0xff");
-	} else {
-		/*
-		 * if interrupts were already disabled or
-		 * we're in an interrupt context, we can't
-		 * preempt...  of course if AST_URGENT
-		 * isn't set we also don't want to
-		 */
-		ml_set_interrupts_enabled(intr);
-	}
-}
-
 #if	MACH_KDB
 
 extern void 	db_i386_state(x86_saved_state32_t *regs);
@@ -1595,6 +1368,10 @@ sync_iss_to_iks(x86_saved_state_t *saved_state)
 	vm_offset_t kstack;
 	boolean_t record_active_regs = FALSE;
 
+	/* The PAL may have a special way to sync registers */
+	if( saved_state->flavor == THREAD_STATE_NONE )
+		pal_get_kern_regs( saved_state );
+
 	if ((kstack = current_thread()->kernel_stack) != 0) {
 #ifdef __i386__
 		x86_saved_state32_t	*regs = saved_state32(saved_state);
@@ -1604,8 +1381,7 @@ sync_iss_to_iks(x86_saved_state_t *saved_state)
 
 		iks = STACK_IKS(kstack);
 
-
-		 /* Did we take the trap/interrupt in kernel mode? */
+		/* Did we take the trap/interrupt in kernel mode? */
 #ifdef __i386__
 		if (regs == USER_REGS32(current_thread()))
 		        record_active_regs = TRUE;
diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h
index ff00c7476..f24141112 100644
--- a/osfmk/i386/trap.h
+++ b/osfmk/i386/trap.h
@@ -114,7 +114,6 @@
 #define T_PF_EXECUTE		0x10		/* instruction fetch when NX */
 #endif
 
-
 #if !defined(ASSEMBLER) && defined(MACH_KERNEL)
 
 #include <i386/thread.h>
@@ -129,7 +128,7 @@ extern void		sync_iss_to_iks(x86_saved_state_t *regs);
 extern void		sync_iss_to_iks_unconditionally(
 				x86_saved_state_t	*regs);
 
-extern void		kernel_trap(x86_saved_state_t *regs);
+extern void		kernel_trap(x86_saved_state_t *regs, uintptr_t *lo_spp);
 
 extern void		user_trap(x86_saved_state_t *regs);
 
@@ -148,11 +147,13 @@ extern void		i386_astintr(int preemption);
 typedef kern_return_t (*perfCallback)(
 				int			trapno,
 				void			*regs,
-				int			unused1,
-				int			unused2);
+				uintptr_t		*lo_spp,
+				      int);
+
+typedef kern_return_t (*perfASTCallback)(ast_t reasons, ast_t *myast);
 
 extern volatile perfCallback perfTrapHook;
-extern volatile perfCallback perfASTHook;
+extern volatile perfASTCallback perfASTHook;
 extern volatile perfCallback perfIntHook;
 
 extern void		panic_i386_backtrace(void *, int, const char *, boolean_t, x86_saved_state_t *);
diff --git a/osfmk/i386/trap_native.c b/osfmk/i386/trap_native.c
new file mode 100644
index 000000000..26a9cbf07
--- /dev/null
+++ b/osfmk/i386/trap_native.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+* @OSF_COPYRIGHT@
+*/
+/* 
+* Mach Operating System
+* Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+* All Rights Reserved.
+* 
+* Permission to use, copy, modify and distribute this software and its
+* documentation is hereby granted, provided that both the copyright
+* notice and this permission notice appear in all copies of the
+* software, derivative works or modified versions, and any portions
+* thereof, and that both notices appear in supporting documentation.
+* 
+* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+* CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+* 
+* Carnegie Mellon requests users of this software to return to
+* 
+*  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+*  School of Computer Science
+*  Carnegie Mellon University
+*  Pittsburgh PA 15213-3890
+* 
+* any improvements or extensions that they make and grant Carnegie Mellon
+* the rights to redistribute these changes.
+*/
+/*
+*/
+
+/*
+* Hardware trap/fault handler.
+ */
+
+#include <types.h>
+#include <i386/eflags.h>
+#include <i386/trap.h>
+#include <i386/pmap.h>
+#include <i386/fpu.h>
+#include <i386/misc_protos.h> /* panic_io_port_read() */
+
+#include <mach/exception.h>
+#include <mach/kern_return.h>
+#include <mach/vm_param.h>
+#include <mach/i386/thread_status.h>
+
+#include <kern/kern_types.h>
+#include <kern/processor.h>
+#include <kern/thread.h>
+#include <kern/task.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/exception.h>
+#include <kern/spl.h>
+#include <kern/misc_protos.h>
+#include <kern/debug.h>
+
+#include <sys/kdebug.h>
+
+#include <string.h>
+
+#include <i386/postcode.h>
+#include <i386/mp_desc.h>
+#include <i386/proc_reg.h>
+#if CONFIG_MCA
+#include <i386/machine_check.h>
+#endif
+#include <mach/i386/syscall_sw.h>
+
+#include <machine/pal_routines.h>
+#include <libkern/OSAtomic.h>
+
+extern void kprintf_break_lock(void);
+extern void kprint_state(x86_saved_state64_t *saved_state);
+void panic_64(x86_saved_state_t *, int, const char *, boolean_t);
+
+extern volatile int panic_double_fault_cpu;
+
+#if defined(__x86_64__) && DEBUG
+/*
+ * K64 debug - fatal handler for debug code in the trap vectors.
+ */
+extern void
+panic_idt64(x86_saved_state_t *rsp);
+void
+panic_idt64(x86_saved_state_t *rsp)
+{
+	kprint_state(saved_state64(rsp));
+	panic("panic_idt64");
+}
+#endif
+
+#ifdef __i386__
+static void
+panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t do_mca_dump, boolean_t do_bt)
+{
+	struct i386_tss *my_ktss = current_ktss();
+
+	/* Set postcode (DEBUG only) */
+	postcode(pc);
+
+	/*
+	 * Issue an I/O port read if one has been requested - this is an
+	 * event logic analyzers can use as a trigger point.
+	 */
+	panic_io_port_read();
+
+	/*
+	 * Break kprintf lock in case of recursion,
+	 * and record originally faulted instruction address.
+	 */
+	kprintf_break_lock();
+
+	if (do_mca_dump) {
+#if CONFIG_MCA
+		/*
+		 * Dump the contents of the machine check MSRs (if any).
+		 */
+		mca_dump();
+#endif
+	}
+
+#if MACH_KDP
+	/*
+	 * Print backtrace leading to first fault:
+	 */
+	if (do_bt)
+		panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL);
+#endif
+
+	panic("%s at 0x%08x, code:0x%x, "
+	      "registers:\n"
+	      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+	      "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+	      "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+	      "EFL: 0x%08x, EIP: 0x%08x\n",
+		  msg,
+	      my_ktss->eip, code,
+	      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
+	      my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
+	      my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
+	      my_ktss->eflags, my_ktss->eip);
+}
+
+/*
+ * Called from locore on a special reserved stack after a double-fault
+ * is taken in kernel space.
+ * Kernel stack overflow is one route here.
+ */
+void
+panic_double_fault32(int code)
+{
+	(void)OSCompareAndSwap((UInt32) -1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu);
+	panic_32(code, PANIC_DOUBLE_FAULT, "Double fault", FALSE, TRUE);
+}
+
+/*
+ * Called from locore on a special reserved stack after a machine-check
+ */
+void 
+panic_machine_check32(int code)
+{
+	panic_32(code, PANIC_MACHINE_CHECK, "Machine-check", TRUE, FALSE);
+}
+#endif /* __i386__ */
+
+void
+panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump)
+{
+	/* Set postcode (DEBUG only) */
+	postcode(pc);
+
+	/*
+	 * Issue an I/O port read if one has been requested - this is an
+	 * event logic analyzers can use as a trigger point.
+	 */
+	panic_io_port_read();
+
+	
+	/*
+	 * Break kprintf lock in case of recursion,
+	 * and record originally faulted instruction address.
+	 */
+	kprintf_break_lock();
+
+	if (do_mca_dump) {
+#if CONFIG_MCA
+		/*
+		 * Dump the contents of the machine check MSRs (if any).
+		 */
+		mca_dump();
+#endif
+	}
+
+#ifdef __i386__
+	/*
+	 * Dump the interrupt stack frame at last kernel entry.
+	 */
+	if (is_saved_state64(sp)) {
+		x86_saved_state64_t	*ss64p = saved_state64(sp);
+		panic("%s trapno:0x%x, err:0x%qx, "
+		      "registers:\n"
+		      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+		      "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n"
+		      "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
+		      "R8:  0x%016qx, R9:  0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
+		      "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
+		      "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx\n",
+			  msg,
+		      ss64p->isf.trapno, ss64p->isf.err,
+		      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
+		      ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx,
+		      ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
+		      ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
+		      ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
+		      ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2);
+	} else {
+		x86_saved_state32_t	*ss32p = saved_state32(sp);
+		panic("%s at 0x%08x, trapno:0x%x, err:0x%x,"
+		      "registers:\n"
+		      "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+		      "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+		      "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+		      "EFL: 0x%08x, EIP: 0x%08x\n",
+		      msg,
+			  ss32p->eip, ss32p->trapno, ss32p->err,
+		      (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(),
+		      ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
+		      ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
+		      ss32p->efl, ss32p->eip);
+	}
+#else
+	x86_saved_state64_t *regs = saved_state64(sp);
+	panic("%s at 0x%016llx, registers:\n"
+	      "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n"
+	      "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n"
+	      "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n"
+	      "R8:  0x%016llx, R9:  0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n"
+	      "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n"
+	      "RFL: 0x%016llx, RIP: 0x%016llx, CS:  0x%016llx, SS:  0x%016llx\n"
+	      "Error code: 0x%016llx\n",
+	      msg,
+		  regs->isf.rip,
+	      get_cr0(), get_cr2(), get_cr3_raw(), get_cr4(),
+	      regs->rax, regs->rbx, regs->rcx, regs->rdx,
+	      regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi,
+	      regs->r8,  regs->r9,  regs->r10, regs->r11,
+	      regs->r12, regs->r13, regs->r14, regs->r15,
+	      regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF,  regs->isf.ss & 0xFFFF,
+	      regs->isf.err);
+#endif
+}
+
+void
+panic_double_fault64(x86_saved_state_t *sp)
+{
+	(void)OSCompareAndSwap((UInt32) -1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu);
+	panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE);
+
+}
+void
+
+panic_machine_check64(x86_saved_state_t *sp)
+{
+	panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE);
+
+}
diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c
index 9e794797b..5205da23e 100644
--- a/osfmk/i386/tsc.c
+++ b/osfmk/i386/tsc.c
@@ -51,7 +51,6 @@
 #include <mach/vm_prot.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>		/* for kernel_map */
-#include <i386/ipl.h>
 #include <architecture/i386/pio.h>
 #include <i386/machine_cpu.h>
 #include <i386/cpuid.h>
diff --git a/osfmk/i386/tsc.h b/osfmk/i386/tsc.h
index f6c5eba78..2f6011b93 100644
--- a/osfmk/i386/tsc.h
+++ b/osfmk/i386/tsc.h
@@ -42,7 +42,9 @@
 
 #define BASE_NHM_CLOCK_SOURCE 	133333333ULL
 #define IA32_PERF_STS		0x198
+#define	SLOW_TSC_THRESHOLD	1000067800	/* TSC is too slow for regular nanotime() algorithm */
 
+#ifndef ASSEMBLER
 extern uint64_t	busFCvtt2n;
 extern uint64_t	busFCvtn2t;
 extern uint64_t tscFreq;
@@ -74,6 +76,6 @@ typedef struct tscInfo tscInfo_t;
 extern void tsc_get_info(tscInfo_t *info);
 
 extern void tsc_init(void);
-
+#endif /* ASSEMBLER */
 #endif /* _I386_TSC_H_ */
 #endif /* KERNEL_PRIVATE */
diff --git a/osfmk/i386/ucode.c b/osfmk/i386/ucode.c
new file mode 100644
index 000000000..e416cc5ff
--- /dev/null
+++ b/osfmk/i386/ucode.c
@@ -0,0 +1,201 @@
+/*
+ *  ucode.c
+ *
+ *  Microcode updater interface sysctl
+ */
+
+#include <kern/locks.h>
+#include <i386/ucode.h>
+#include <sys/errno.h>
+#include <i386/proc_reg.h>
+#include <i386/cpuid.h>
+#include <vm/vm_kern.h>
+#include <i386/mp.h>			// mp_broadcast
+#include <machine/cpu_number.h> // cpu_number
+
+#define IA32_BIOS_UPDT_TRIG (0x79) /* microcode update trigger MSR */
+
+struct intel_ucupdate *global_update = NULL;
+
+/* Exceute the actual update! */
+static void
+update_microcode(void)
+{
+	/* SDM Example 9-8 code shows that we load the
+	 * address of the UpdateData within the microcode blob,
+	 * not the address of the header.
+	 */
+	wrmsr64(IA32_BIOS_UPDT_TRIG, (uint64_t)(uintptr_t)&global_update->data);
+}
+
+/* locks */
+static lck_grp_attr_t *ucode_slock_grp_attr = NULL;
+static lck_grp_t *ucode_slock_grp = NULL;
+static lck_attr_t *ucode_slock_attr = NULL;
+static lck_spin_t *ucode_slock = NULL;
+
+static kern_return_t
+register_locks(void)
+{
+	/* already allocated? */
+	if (ucode_slock_grp_attr && ucode_slock_grp && ucode_slock_attr && ucode_slock)
+		return KERN_SUCCESS;
+
+	/* allocate lock group attribute and group */
+	if (!(ucode_slock_grp_attr = lck_grp_attr_alloc_init()))
+		goto nomem_out;
+
+	lck_grp_attr_setstat(ucode_slock_grp_attr);
+
+	if (!(ucode_slock_grp = lck_grp_alloc_init("uccode_lock", ucode_slock_grp_attr)))
+		goto nomem_out;
+
+	/* Allocate lock attribute */
+	if (!(ucode_slock_attr = lck_attr_alloc_init()))
+		goto nomem_out;
+
+	/* Allocate the spin lock */
+	/* We keep one global spin-lock. We could have one per update
+	 * request... but srsly, why would you update microcode like that?
+	 */
+	if (!(ucode_slock = lck_spin_alloc_init(ucode_slock_grp, ucode_slock_attr)))
+		goto nomem_out;
+
+	return KERN_SUCCESS;
+
+nomem_out:
+	/* clean up */
+	if (ucode_slock)
+		lck_spin_free(ucode_slock, ucode_slock_grp);
+	if (ucode_slock_attr)
+		lck_attr_free(ucode_slock_attr);
+	if (ucode_slock_grp)
+		lck_grp_free(ucode_slock_grp);
+	if (ucode_slock_grp_attr)
+		lck_grp_attr_free(ucode_slock_grp_attr);
+
+	return KERN_NO_SPACE;
+}
+
+/* Copy in an update */
+static int
+copyin_update(uint64_t inaddr)
+{
+	struct intel_ucupdate update_header;
+	struct intel_ucupdate *update;
+	vm_size_t size;
+	kern_return_t ret;
+	int error;
+
+	/* Copy in enough header to peek at the size */
+	error = copyin((user_addr_t)inaddr, (void *)&update_header, sizeof(update_header));
+	if (error)
+		return error;
+
+	/* Get the actual, alleged size */
+	size = update_header.total_size;
+
+	/* huge bogus piece of data that somehow made it through? */
+	if (size >= 1024 * 1024)
+		return ENOMEM;
+
+	/* Old microcodes? */
+	if (size == 0)
+		size = 2048; /* default update size; see SDM */
+
+	/*
+	 * create the buffer for the update
+	 * It need only be aligned to 16-bytes, according to the SDM.
+	 * This also wires it down
+	 */
+	ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&update, size);
+	if (ret != KERN_SUCCESS)
+		return ENOMEM;
+
+	/* Copy it in */
+	error = copyin((user_addr_t)inaddr, (void*)update, size);
+	if (error) {
+		kmem_free(kernel_map, (vm_offset_t)update, size);
+		return error;
+	}
+
+	global_update = update;
+	return 0;
+}
+
+/*
+ * This is called once by every CPU on a wake from sleep/hibernate
+ * and is meant to re-apply a microcode update that got lost
+ * by sleeping.
+ */
+void
+ucode_update_wake()
+{
+	if (global_update) {
+		kprintf("ucode: Re-applying update after wake (CPU #%d)\n", cpu_number());
+		update_microcode();
+#ifdef DEBUG
+	} else {
+		kprintf("ucode: No update to apply (CPU #%d)\n", cpu_number());
+#endif
+	}
+}
+
+static void
+cpu_update(__unused void *arg)
+{
+	/* grab the lock */
+	lck_spin_lock(ucode_slock);
+
+	/* execute the update */
+	update_microcode();
+
+	/* if CPU #0, update global CPU information */
+	if (!cpu_number())
+		cpuid_set_info();
+
+	/* release the lock */
+	lck_spin_unlock(ucode_slock);
+}
+
+/* Farm an update out to all CPUs */
+static void
+xcpu_update(void)
+{
+	if (register_locks() != KERN_SUCCESS)
+		return;
+
+	/* Get all CPUs to perform the update */
+	mp_broadcast(cpu_update, NULL);
+}
+
+/*
+ * sysctl function
+ *
+ */
+int
+ucode_interface(uint64_t addr)
+{
+	int error;
+
+#if !DEBUG
+	/*
+	 * Userland may only call this once per boot. Anything else
+	 * would not make sense (all updates are cumulative), and also
+	 * leak memory, because we don't free previous updates.
+	 */
+	if (global_update)
+		return EPERM;
+#endif
+
+	/* Get the whole microcode */
+	error = copyin_update(addr);
+
+	if (error)
+		return error;
+
+	/* Farm out the updates */
+	xcpu_update();
+
+	return 0;
+}
diff --git a/osfmk/i386/ucode.h b/osfmk/i386/ucode.h
new file mode 100644
index 000000000..55dc70645
--- /dev/null
+++ b/osfmk/i386/ucode.h
@@ -0,0 +1,30 @@
+/*
+ *  ucode.h
+ *
+ *  Interface definitions for the microcode updater interface sysctl
+ */
+
+/* Intel defined microcode format */
+struct intel_ucupdate {
+	/* Header information */
+	uint32_t header_version;
+	uint32_t update_revision;
+	uint32_t date;
+	uint32_t processor_signature;
+	uint32_t checksum;
+	uint32_t loader_revision;
+	uint32_t processor_flags;
+	uint32_t data_size;
+	uint32_t total_size;
+
+	/* Reserved for future expansion */
+	uint32_t reserved0;
+	uint32_t reserved1;
+	uint32_t reserved2;
+
+	/* First word of the update data */
+	uint32_t data;
+};
+
+extern int ucode_interface(uint64_t addr);
+extern void ucode_update_wake(void);
diff --git a/osfmk/i386/user_ldt.c b/osfmk/i386/user_ldt.c
index 45f51361f..8b5791455 100644
--- a/osfmk/i386/user_ldt.c
+++ b/osfmk/i386/user_ldt.c
@@ -237,7 +237,7 @@ i386_set_ldt(
 		    case 0:
 		    case ACC_P:
 			/* valid empty descriptor, clear Present preemptively */
-			dp->access &= ~ACC_P;
+			dp->access &= (~ACC_P & 0xff);
 			break;
 		    case ACC_P | ACC_PL_U | ACC_DATA:
 		    case ACC_P | ACC_PL_U | ACC_DATA_W:
@@ -389,7 +389,7 @@ user_ldt_set(
 	    bcopy(user_ldt->ldt, &ldtp[user_ldt->start],
 		  sizeof(struct real_descriptor) * (user_ldt->count));
 
-	    gdt_desc_p(USER_LDT)->limit_low = (sizeof(struct real_descriptor) * (user_ldt->start + user_ldt->count)) - 1;
+	    gdt_desc_p(USER_LDT)->limit_low = (uint16_t)((sizeof(struct real_descriptor) * (user_ldt->start + user_ldt->count)) - 1);
 
 	    ml_cpu_set_ldt(USER_LDT);
 	} else {
diff --git a/osfmk/i386/vmx/vmx_asm.h b/osfmk/i386/vmx/vmx_asm.h
index bd0de4688..c295f6b03 100644
--- a/osfmk/i386/vmx/vmx_asm.h
+++ b/osfmk/i386/vmx/vmx_asm.h
@@ -39,7 +39,7 @@
 #define VMX_FAIL_VALID		-2
 #define VMX_SUCCEED			0
 
-static inline void enter_64bit_mode(void) {
+__attribute__((always_inline)) static inline void enter_64bit_mode(void) {
 	__asm__ __volatile__ (
 		".byte   0xea    /* far jump longmode */	\n\t"
 		".long   1f					\n\t"
@@ -49,7 +49,7 @@ static inline void enter_64bit_mode(void) {
 		:: "i" (KERNEL64_CS)
 	);
 }
-static inline void enter_compat_mode(void) {
+__attribute__((always_inline)) static inline void enter_compat_mode(void) {
 	asm(
 		"ljmp    *4f					\n\t"
 	"4:							\n\t"
diff --git a/osfmk/i386/vmx/vmx_cpu.c b/osfmk/i386/vmx/vmx_cpu.c
index 34bd07acc..22cebe2d8 100644
--- a/osfmk/i386/vmx/vmx_cpu.c
+++ b/osfmk/i386/vmx/vmx_cpu.c
@@ -35,7 +35,6 @@
 #include <i386/vmx/vmx_asm.h>
 #include <i386/vmx/vmx_shims.h>
 #include <i386/vmx/vmx_cpu.h>
-#include <i386/mtrr.h>
 #include <mach/mach_host.h>             /* for host_info() */
 
 #define VMX_KPRINTF(x...) /* kprintf("vmx: " x) */
@@ -190,7 +189,7 @@ vmx_get_specs()
 	Enter VMX root operation on this CPU.
    -------------------------------------------------------------------------- */
 static void
-vmx_on(void)
+vmx_on(void *arg __unused)
 {
 	vmx_cpu_t *cpu = &current_cpu_datap()->cpu_vmx;
 	addr64_t vmxon_region_paddr;
@@ -222,7 +221,7 @@ vmx_on(void)
 	Leave VMX root operation on this CPU.
    -------------------------------------------------------------------------- */
 static void
-vmx_off(void)
+vmx_off(void *arg __unused)
 {
 	int result;
 	
@@ -322,7 +321,7 @@ host_vmxon(boolean_t exclusive)
 
 	if (do_it) {
 		vmx_allocate_vmxon_regions();
-		mp_rendezvous(NULL, (void (*)(void *))vmx_on, NULL, NULL);
+		mp_rendezvous(NULL, vmx_on, NULL, NULL);
 	}
 	return error;
 }
@@ -348,7 +347,7 @@ host_vmxoff()
 	simple_unlock(&vmx_use_count_lock);
 
 	if (do_it) {
-		mp_rendezvous(NULL, (void (*)(void *))vmx_off, NULL, NULL);
+		mp_rendezvous(NULL, vmx_off, NULL, NULL);
 		vmx_free_vmxon_regions();
 	}
 
@@ -365,7 +364,7 @@ vmx_suspend()
 {
 	VMX_KPRINTF("vmx_suspend\n");
 	if (vmx_use_count)
-		vmx_off();
+		vmx_off(NULL);
 }
 
 /* -----------------------------------------------------------------------------
@@ -378,5 +377,5 @@ vmx_resume()
 	VMX_KPRINTF("vmx_resume\n");
 	vmx_init(); /* init VMX on CPU #0 */
 	if (vmx_use_count)
-		vmx_on();
+		vmx_on(NULL);
 }
diff --git a/osfmk/ipc/ipc_entry.c b/osfmk/ipc/ipc_entry.c
index b468943ac..595660239 100644
--- a/osfmk/ipc/ipc_entry.c
+++ b/osfmk/ipc/ipc_entry.c
@@ -236,7 +236,7 @@ ipc_entry_get(
 
 		gen = IE_BITS_NEW_GEN(free_entry->ie_bits);
 		free_entry->ie_bits = gen;
-		free_entry->ie_request = 0;
+		free_entry->ie_request = IE_REQ_NONE;
 
 		/*
 		 *	The new name can't be MACH_PORT_NULL because index
@@ -377,7 +377,7 @@ ipc_entry_alloc_name(
 					table[next_index].ie_next;
 
 				entry->ie_bits = gen;
-				entry->ie_request = 0;
+				entry->ie_request = IE_REQ_NONE;
 				*entryp = entry;
 
 				assert(entry->ie_object == IO_NULL);
@@ -516,7 +516,12 @@ ipc_entry_dealloc(
 
 	assert(space->is_active);
 	assert(entry->ie_object == IO_NULL);
-	assert(entry->ie_request == 0);
+	assert(entry->ie_request == IE_REQ_NONE);
+
+#if 1
+	if (entry->ie_request != IE_REQ_NONE)
+		panic("ipc_entry_dealloc()\n");
+#endif
 
 	index = MACH_PORT_INDEX(name);
 	table = space->is_table;
diff --git a/osfmk/ipc/ipc_entry.h b/osfmk/ipc/ipc_entry.h
index e1c01d154..14d7d1846 100644
--- a/osfmk/ipc/ipc_entry.h
+++ b/osfmk/ipc/ipc_entry.h
@@ -114,6 +114,8 @@ struct ipc_entry {
 #define	ie_next		index.next
 #define	ie_index	hash.table
 
+#define IE_REQ_NONE		0		/* no request */
+
 #define	IE_BITS_UREFS_MASK	0x0000ffff	/* 16 bits of user-reference */
 #define	IE_BITS_UREFS(bits)	((bits) & IE_BITS_UREFS_MASK)
 
diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c
index 8e4773748..72f01383c 100644
--- a/osfmk/ipc/ipc_init.c
+++ b/osfmk/ipc/ipc_init.c
@@ -151,10 +151,6 @@ ipc_bootstrap(void)
 			       ipc_space_max * sizeof(struct ipc_space),
 			       sizeof(struct ipc_space),
 			       "ipc spaces");
-#if 0
-	/* make it exhaustible */
-	zone_change(ipc_space_zone, Z_EXHAUST, TRUE);
-#endif
 	zone_change(ipc_space_zone, Z_NOENCRYPT, TRUE);
 
 	ipc_tree_entry_zone =
@@ -162,10 +158,6 @@ ipc_bootstrap(void)
 			ipc_tree_entry_max * sizeof(struct ipc_tree_entry),
 			sizeof(struct ipc_tree_entry),
 			"ipc tree entries");
-#if 0
-	/* make it exhaustible */
-	zone_change(ipc_tree_entry_zone, Z_EXHAUST, TRUE);
-#endif
 	zone_change(ipc_tree_entry_zone, Z_NOENCRYPT, TRUE);
 
 	/*
@@ -176,11 +168,8 @@ ipc_bootstrap(void)
 		      ipc_port_max * sizeof(struct ipc_port),
 		      sizeof(struct ipc_port),
 		      "ipc ports");
-	/*
-	 * XXX  Can't make the port zone exhaustible because the kernel
-	 * XXX	panics when port allocation for an internal object fails.
-	 *zone_change(ipc_object_zones[IOT_PORT], Z_EXHAUST, TRUE);
-	 */
+	/* cant charge callers for port allocations (references passed) */
+	zone_change(ipc_object_zones[IOT_PORT], Z_CALLERACCT, FALSE);
 	zone_change(ipc_object_zones[IOT_PORT], Z_NOENCRYPT, TRUE);
 
 	ipc_object_zones[IOT_PORT_SET] =
@@ -188,8 +177,6 @@ ipc_bootstrap(void)
 		      ipc_pset_max * sizeof(struct ipc_pset),
 		      sizeof(struct ipc_pset),
 		      "ipc port sets");
-	/* make it exhaustible */
-	zone_change(ipc_object_zones[IOT_PORT_SET], Z_EXHAUST, TRUE);
 	zone_change(ipc_object_zones[IOT_PORT_SET], Z_NOENCRYPT, TRUE);
 
 	/*
@@ -201,6 +188,7 @@ ipc_bootstrap(void)
 			      IKM_SAVED_KMSG_SIZE,
 			      IKM_SAVED_KMSG_SIZE,
 			      "ipc kmsgs");
+	zone_change(ipc_kmsg_zone, Z_CALLERACCT, FALSE);
 	zone_change(ipc_kmsg_zone, Z_NOENCRYPT, TRUE);
 
 #if CONFIG_MACF_MACH
@@ -209,6 +197,9 @@ ipc_bootstrap(void)
 		      ipc_port_max * sizeof(struct ipc_labelh),
 		      sizeof(struct ipc_labelh),
 		      "label handles");
+	/* cant charge callers for label allocations (port refs passed) */
+	zone_change(ipc_labelh_zone, Z_CALLERACCT, FALSE);
+
 #endif
 
 	/* create special spaces */
diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c
index d61a26a77..167d42145 100644
--- a/osfmk/ipc/ipc_kmsg.c
+++ b/osfmk/ipc/ipc_kmsg.c
@@ -808,51 +808,59 @@ void
 ipc_kmsg_destroy(
 	ipc_kmsg_t	kmsg)
 {
-	ipc_kmsg_queue_t queue;
-	boolean_t empty;
-
 	/*
-	 *	ipc_kmsg_clean can cause more messages to be destroyed.
-	 *	Curtail recursion by queueing messages.  If a message
-	 *	is already queued, then this is a recursive call.
+	 *	Destroying a message can cause more messages to be destroyed.
+	 *	Curtail recursion by putting messages on the deferred
+	 *	destruction queue.  If this was the first message on the
+	 *	queue, this instance must process the full queue.
 	 */
+	if (ipc_kmsg_delayed_destroy(kmsg))
+		ipc_kmsg_reap_delayed();
+}
 
-	queue = &(current_thread()->ith_messages);
-	empty = ipc_kmsg_queue_empty(queue);
-	ipc_kmsg_enqueue(queue, kmsg);
+/*
+ *	Routine:	ipc_kmsg_delayed_destroy
+ *	Purpose:
+ *		Enqueues a kernel message for deferred destruction.
+ *	Returns:
+ *		Boolean indicator that the caller is responsible to reap
+ *		deferred messages.
+ */
 
-	if (empty) {
-		/* must leave kmsg in queue while cleaning it */
+boolean_t ipc_kmsg_delayed_destroy(
+	ipc_kmsg_t kmsg)
+{
+	ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+	boolean_t first = ipc_kmsg_queue_empty(queue);
 
-		while ((kmsg = ipc_kmsg_queue_first(queue)) != IKM_NULL) {
-			ipc_kmsg_clean(kmsg);
-			ipc_kmsg_rmqueue(queue, kmsg);
-			ipc_kmsg_free(kmsg);
-		}
-	}
+	ipc_kmsg_enqueue(queue, kmsg);
+	return first;
 }
 
 /*
- *	Routine:	ipc_kmsg_destroy_dest
+ *	Routine:	ipc_kmsg_destroy_queue
  *	Purpose:
- *		Destroys a kernel message.  Releases all rights,
- *		references, and memory held by the message (including
- *		the destination port reference.
- *		Frees the message.
+ *		Destroys messages from the per-thread
+ *		deferred reaping queue.
  *	Conditions:
  *		No locks held.
  */
+
 void
-ipc_kmsg_destroy_dest(
-	ipc_kmsg_t kmsg)
+ipc_kmsg_reap_delayed(void)
 {
-    ipc_port_t port;
-	
-    port = kmsg->ikm_header->msgh_remote_port;
+	ipc_kmsg_queue_t queue = &(current_thread()->ith_messages);
+	ipc_kmsg_t kmsg;
 
-    ipc_port_release(port);
-    kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL;
-    ipc_kmsg_destroy(kmsg);
+	/*
+	 * must leave kmsg in queue while cleaning it to assure
+	 * no nested calls recurse into here.
+	 */
+	while ((kmsg = ipc_kmsg_queue_first(queue)) != IKM_NULL) {
+		ipc_kmsg_clean(kmsg);
+		ipc_kmsg_rmqueue(queue, kmsg);
+		ipc_kmsg_free(kmsg);
+	}
 }
 
 /*
@@ -864,7 +872,7 @@ ipc_kmsg_destroy_dest(
  *	Conditions:
  *		No locks held.
  */
-
+static unsigned int _ipc_kmsg_clean_invalid_desc = 0;
 void
 ipc_kmsg_clean_body(
 	__unused ipc_kmsg_t	kmsg,
@@ -943,7 +951,7 @@ ipc_kmsg_clean_body(
 		break;
 	    }
 	    default : {
-		printf("cleanup: don't understand this type of descriptor\n");
+		    _ipc_kmsg_clean_invalid_desc++; /* don't understand this type of descriptor */
 	    }
 	}
     }
@@ -975,7 +983,7 @@ ipc_kmsg_clean_partial(
 
 	object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port;
 	assert(IO_VALID(object));
-	ipc_object_destroy(object, MACH_MSGH_BITS_REMOTE(mbits));
+	ipc_object_destroy_dest(object, MACH_MSGH_BITS_REMOTE(mbits));
 
 	object = (ipc_object_t) kmsg->ikm_header->msgh_local_port;
 	if (IO_VALID(object))
@@ -1007,7 +1015,7 @@ ipc_kmsg_clean(
 	mbits = kmsg->ikm_header->msgh_bits;
 	object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port;
 	if (IO_VALID(object))
-		ipc_object_destroy(object, MACH_MSGH_BITS_REMOTE(mbits));
+		ipc_object_destroy_dest(object, MACH_MSGH_BITS_REMOTE(mbits));
 
 	object = (ipc_object_t) kmsg->ikm_header->msgh_local_port;
 	if (IO_VALID(object))
@@ -1244,7 +1252,6 @@ ipc_kmsg_get_from_kernel(
 	assert(size >= sizeof(mach_msg_header_t));
 //	assert((size & 3) == 0);
 
-	assert(IP_VALID((ipc_port_t) msg->msgh_remote_port));
 	dest_port = (ipc_port_t)msg->msgh_remote_port;
 
 	msg_and_trailer_size = size + MAX_TRAILER_SIZE;
@@ -1254,7 +1261,7 @@ ipc_kmsg_get_from_kernel(
 	 * clients.  These are set up for those kernel clients
 	 * which cannot afford to wait.
 	 */
-	if (IP_PREALLOC(dest_port)) {
+	if (IP_VALID(dest_port) && IP_PREALLOC(dest_port)) {
 		mach_msg_size_t max_desc = 0;
 
 		ip_lock(dest_port);
@@ -1332,6 +1339,7 @@ ipc_kmsg_get_from_kernel(
  *		MACH_MSG_SUCCESS	The message was accepted.
  *		MACH_SEND_TIMED_OUT	Caller still has message.
  *		MACH_SEND_INTERRUPTED	Caller still has message.
+ *		MACH_SEND_INVALID_DEST	Caller still has message.
  */
 mach_msg_return_t
 ipc_kmsg_send(
@@ -1346,9 +1354,6 @@ ipc_kmsg_send(
 	port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port;
 	assert(IP_VALID(port));
 
-	if ((option & ~(MACH_SEND_TIMEOUT|MACH_SEND_ALWAYS)) != 0)
-		printf("ipc_kmsg_send: bad option 0x%x\n", option);
-
 	ip_lock(port);
 
 	if (port->ip_receiver == ipc_space_kernel) {
@@ -1543,16 +1548,6 @@ ipc_kmsg_put_to_kernel(
  *		and the bits field is updated.  The destination port
  *		will be a valid port pointer.
  *
- *		The notify argument implements the MACH_SEND_CANCEL option.
- *		If it is not MACH_PORT_NULL, it should name a receive right.
- *		If the processing of the destination port would generate
- *		a port-deleted notification (because the right for the
- *		destination port is destroyed and it had a request for
- *		a dead-name notification registered), and the port-deleted
- *		notification would be sent to the named receive right,
- *		then it isn't sent and the send-once right for the notify
- *		port is quietly destroyed.
- *
  *	Conditions:
  *		Nothing locked.
  *	Returns:
@@ -1560,9 +1555,6 @@ ipc_kmsg_put_to_kernel(
  *		MACH_SEND_INVALID_HEADER
  *			Illegal value in the message header bits.
  *		MACH_SEND_INVALID_DEST	The space is dead.
- *		MACH_SEND_INVALID_NOTIFY
- *			Notify is non-null and doesn't name a receive right.
- *			(Either KERN_INVALID_NAME or KERN_INVALID_RIGHT.)
  *		MACH_SEND_INVALID_DEST	Can't copyin destination port.
  *			(Either KERN_INVALID_NAME or KERN_INVALID_RIGHT.)
  *		MACH_SEND_INVALID_REPLY	Can't copyin reply port.
@@ -1573,7 +1565,7 @@ mach_msg_return_t
 ipc_kmsg_copyin_header(
 	mach_msg_header_t	*msg,
 	ipc_space_t		space,
-	mach_port_name_t	notify)
+	boolean_t		notify)
 {
 	mach_msg_bits_t mbits = msg->msgh_bits & MACH_MSGH_BITS_USER;
 	mach_port_name_t dest_name = CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port);
@@ -1584,8 +1576,7 @@ ipc_kmsg_copyin_header(
 	mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
 	ipc_object_t dest_port, reply_port;
 	ipc_port_t dest_soright, reply_soright;
-	ipc_port_t notify_port;
-	ipc_entry_t entry;
+	ipc_entry_t dest_entry, reply_entry;
 
 	if ((mbits != msg->msgh_bits) ||
 	    (!MACH_MSG_TYPE_PORT_ANY_SEND(dest_type)) ||
@@ -1609,10 +1600,10 @@ ipc_kmsg_copyin_header(
 	 * because copying the header involves copying the port rights too
 	 * and we need to do the send check before anything is actually copied.
 	 */
-	entry = ipc_entry_lookup(space, dest_name);
-	if (entry != IE_NULL) {
+	dest_entry = ipc_entry_lookup(space, dest_name);
+	if (dest_entry != IE_NULL) {
 		int error = 0;
-		ipc_port_t port = (ipc_port_t) entry->ie_object;
+		ipc_port_t port = (ipc_port_t) dest_entry->ie_object;
 		if (port == IP_NULL)
 			goto invalid_dest;
 		ip_lock(port);
@@ -1629,20 +1620,6 @@ ipc_kmsg_copyin_header(
 	}
 #endif
 
-	if (notify != MACH_PORT_NULL) {
-		if ((entry = ipc_entry_lookup(space, notify)) == IE_NULL) {
-			is_write_unlock(space);
-			return MACH_SEND_INVALID_NOTIFY;
-		}
-		if((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0) {
-			is_write_unlock(space);
-			return MACH_SEND_INVALID_NOTIFY;
-		}
-
-		notify_port = (ipc_port_t) entry->ie_object;
-	} else
-		notify_port = IP_NULL;
-
 	if (dest_name == reply_name) {
 		mach_port_name_t name = dest_name;
 
@@ -1658,13 +1635,14 @@ ipc_kmsg_copyin_header(
 		 *	copy-send and make-send.
 		 */
 
-		entry = ipc_entry_lookup(space, name);
-		if (entry == IE_NULL)
+		dest_entry = ipc_entry_lookup(space, name);
+		if (dest_entry == IE_NULL)
 			goto invalid_dest;
 
+		reply_entry = dest_entry;
 		assert(reply_type != 0); /* because name not null */
 
-		if (!ipc_right_copyin_check(space, name, entry, reply_type))
+		if (!ipc_right_copyin_check(space, name, reply_entry, reply_type))
 			goto invalid_reply;
 
 		if ((dest_type == MACH_MSG_TYPE_MOVE_SEND_ONCE) ||
@@ -1685,7 +1663,7 @@ ipc_kmsg_copyin_header(
 			   (dest_type == MACH_MSG_TYPE_MAKE_SEND_ONCE) ||
 			   (reply_type == MACH_MSG_TYPE_MAKE_SEND) ||
 			   (reply_type == MACH_MSG_TYPE_MAKE_SEND_ONCE)) {
-			kr = ipc_right_copyin(space, name, entry,
+			kr = ipc_right_copyin(space, name, dest_entry,
 					      dest_type, FALSE,
 					      &dest_port, &dest_soright);
 			if (kr != KERN_SUCCESS)
@@ -1702,16 +1680,15 @@ ipc_kmsg_copyin_header(
 			 */
 
 			assert(IO_VALID(dest_port));
-			assert(entry->ie_bits & MACH_PORT_TYPE_RECEIVE);
 			assert(dest_soright == IP_NULL);
 
-			kr = ipc_right_copyin(space, name, entry,
+			kr = ipc_right_copyin(space, name, reply_entry,
 					      reply_type, TRUE,
 					      &reply_port, &reply_soright);
 
 			assert(kr == KERN_SUCCESS);
 			assert(reply_port == dest_port);
-			assert(entry->ie_bits & MACH_PORT_TYPE_RECEIVE);
+			assert(reply_entry->ie_bits & MACH_PORT_TYPE_RECEIVE);
 			assert(reply_soright == IP_NULL);
 		} else if ((dest_type == MACH_MSG_TYPE_COPY_SEND) &&
 			   (reply_type == MACH_MSG_TYPE_COPY_SEND)) {
@@ -1720,13 +1697,13 @@ ipc_kmsg_copyin_header(
 			 *	and dup the send right we get out.
 			 */
 
-			kr = ipc_right_copyin(space, name, entry,
+			kr = ipc_right_copyin(space, name, dest_entry,
 					      dest_type, FALSE,
 					      &dest_port, &dest_soright);
 			if (kr != KERN_SUCCESS)
 				goto invalid_dest;
 
-			assert(entry->ie_bits & MACH_PORT_TYPE_SEND);
+			assert(dest_entry->ie_bits & MACH_PORT_TYPE_SEND);
 			assert(dest_soright == IP_NULL);
 
 			/*
@@ -1746,14 +1723,16 @@ ipc_kmsg_copyin_header(
 			 *	to get two send rights for the price of one.
 			 */
 
-			kr = ipc_right_copyin_two(space, name, entry,
+			kr = ipc_right_copyin_two(space, name, dest_entry,
 						  &dest_port, &dest_soright);
 			if (kr != KERN_SUCCESS)
 				goto invalid_dest;
 
 			/* the entry might need to be deallocated */
-			if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-				ipc_entry_dealloc(space, name, entry);
+			if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) {
+				ipc_entry_dealloc(space, name, dest_entry);
+				dest_entry = IE_NULL;
+			}
 
 			reply_port = dest_port;
 			reply_soright = IP_NULL;
@@ -1770,7 +1749,7 @@ ipc_kmsg_copyin_header(
 			 *	and dup the send right we get out.
 			 */
 
-			kr = ipc_right_copyin(space, name, entry,
+			kr = ipc_right_copyin(space, name, dest_entry,
 					      MACH_MSG_TYPE_MOVE_SEND, FALSE,
 					      &dest_port, &soright);
 			if (kr != KERN_SUCCESS)
@@ -1778,8 +1757,10 @@ ipc_kmsg_copyin_header(
 
 			/* the entry might need to be deallocated */
 
-			if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-				ipc_entry_dealloc(space, name, entry);
+			if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) {
+				ipc_entry_dealloc(space, name, dest_entry);
+				dest_entry = IE_NULL;
+			}
 
 			/*
 			 *	It's OK if the port we got is dead now,
@@ -1804,26 +1785,25 @@ ipc_kmsg_copyin_header(
 		 *	to make atomic.  Just copyin the destination.
 		 */
 
-		entry = ipc_entry_lookup(space, dest_name);
-		if (entry == IE_NULL)
+		dest_entry = ipc_entry_lookup(space, dest_name);
+		if (dest_entry == IE_NULL)
 			goto invalid_dest;
 
-		kr = ipc_right_copyin(space, dest_name, entry,
+		kr = ipc_right_copyin(space, dest_name, dest_entry,
 				      dest_type, FALSE,
 				      &dest_port, &dest_soright);
 		if (kr != KERN_SUCCESS)
 			goto invalid_dest;
 
 		/* the entry might need to be deallocated */
-
-		if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
-			ipc_entry_dealloc(space, dest_name, entry);
+		if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) {
+			ipc_entry_dealloc(space, dest_name, dest_entry);
+			dest_entry = IE_NULL;
+		}
 
 		reply_port = (ipc_object_t)CAST_MACH_NAME_TO_PORT(reply_name);
 		reply_soright = IP_NULL;
 	} else {
-		ipc_entry_t dest_entry, reply_entry;
-
 		/*
 		 *	This is the tough case to make atomic.
 		 *	The difficult problem is serializing with port death.
@@ -1856,7 +1836,7 @@ ipc_kmsg_copyin_header(
 		 * JMM - The code to handle this was too expensive and, anyway,
 		 * we intend to separate the dest lookup from the reply copyin
 		 * by a wide margin, so the user will have to learn to deal!
-		 * I will be making the change soon!
+		 * I will be making the change soon in rdar://problem/6275821.
 		 */
 
 		dest_entry = ipc_entry_lookup(space, dest_name);
@@ -1890,29 +1870,36 @@ ipc_kmsg_copyin_header(
 
 		/* the entries might need to be deallocated */
 
-		if (IE_BITS_TYPE(reply_entry->ie_bits) == MACH_PORT_TYPE_NONE)
+		if (IE_BITS_TYPE(reply_entry->ie_bits) == MACH_PORT_TYPE_NONE) {
 			ipc_entry_dealloc(space, reply_name, reply_entry);
+			reply_entry = IE_NULL;
+		}
 
-		if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE)
+		if (IE_BITS_TYPE(dest_entry->ie_bits) == MACH_PORT_TYPE_NONE) {
 			ipc_entry_dealloc(space, dest_name, dest_entry);
+			dest_entry = IE_NULL;
+		}
 	}
 
+	dest_type = ipc_object_copyin_type(dest_type);
+	reply_type = ipc_object_copyin_type(reply_type);
+
 	/*
-	 *	At this point, dest_port, reply_port,
-	 *	dest_soright, reply_soright are all initialized.
-	 *	Any defunct entries have been deallocated.
-	 *	The space is still write-locked, and we need to
-	 *	make the MACH_SEND_CANCEL check.  The notify_port pointer
-	 *	is still usable, because the copyin code above won't ever
-	 *	deallocate a receive right, so its entry still exists
-	 *	and holds a ref.  Note notify_port might even equal
-	 *	dest_port or reply_port.
+	 * JMM - Without rdar://problem/6275821, this is the last place we can
+	 * re-arm the send-possible notifications.  It may trigger unexpectedly
+	 * early (send may NOT have failed), but better than missing.
 	 */
-
-	if ((notify != MACH_PORT_NULL) &&
-	    (dest_soright == notify_port)) {
-		ipc_port_release_sonce(dest_soright);
-		dest_soright = IP_NULL;
+	if (notify && dest_type != MACH_MSG_TYPE_PORT_SEND_ONCE &&
+	    dest_entry != IE_NULL && dest_entry->ie_request != IE_REQ_NONE) {
+		ipc_port_t dport = (ipc_port_t)dest_port;
+
+		assert(dport != IP_NULL);
+		ip_lock(dport);
+		if (ip_active(dport) &&
+		    dport->ip_receiver != ipc_space_kernel && ip_full(dport)) {
+			ipc_port_request_sparm(dport, dest_name, dest_entry->ie_request);
+		}
+		ip_unlock(dport);
 	}
 
 	is_write_unlock(space);
@@ -1923,9 +1910,6 @@ ipc_kmsg_copyin_header(
 	if (reply_soright != IP_NULL)
 		ipc_notify_port_deleted(reply_soright, reply_name);
 
-	dest_type = ipc_object_copyin_type(dest_type);
-	reply_type = ipc_object_copyin_type(reply_type);
-
 	msg->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) |
 			  MACH_MSGH_BITS(dest_type, reply_type));
 	msg->msgh_remote_port = (ipc_port_t)dest_port;
@@ -2472,7 +2456,6 @@ ipc_kmsg_copyin_body(
  *		MACH_MSG_SUCCESS	Successful copyin.
  *		MACH_SEND_INVALID_HEADER
  *			Illegal value in the message header bits.
- *		MACH_SEND_INVALID_NOTIFY	Bad notify port.
  *		MACH_SEND_INVALID_DEST	Can't copyin destination port.
  *		MACH_SEND_INVALID_REPLY	Can't copyin reply port.
  *		MACH_SEND_INVALID_MEMORY	Can't grab out-of-line memory.
@@ -2486,7 +2469,7 @@ ipc_kmsg_copyin(
 	ipc_kmsg_t		kmsg,
 	ipc_space_t		space,
 	vm_map_t		map,
-	mach_port_name_t	notify)
+	boolean_t		notify)
 {
     mach_msg_return_t 		mr;
     
@@ -2535,7 +2518,7 @@ ipc_kmsg_copyin(
  *		Nothing locked.
  */
 
-void
+mach_msg_return_t
 ipc_kmsg_copyin_from_kernel(
 	ipc_kmsg_t	kmsg)
 {
@@ -2546,6 +2529,8 @@ ipc_kmsg_copyin_from_kernel(
 	ipc_object_t local = (ipc_object_t) kmsg->ikm_header->msgh_local_port;
 
 	/* translate the destination and reply ports */
+	if (!IO_VALID(remote))
+		return MACH_SEND_INVALID_DEST;
 
 	ipc_object_copyin_from_kernel(remote, rname);
 	if (IO_VALID(local))
@@ -2569,7 +2554,7 @@ ipc_kmsg_copyin_from_kernel(
 
 		kmsg->ikm_header->msgh_bits = bits;
 		if ((bits & MACH_MSGH_BITS_COMPLEX) == 0)
-			return;
+			return MACH_MSG_SUCCESS;
 	}
     {
     	mach_msg_descriptor_t	*saddr;
@@ -2663,10 +2648,11 @@ ipc_kmsg_copyin_from_kernel(
 	    }
 	}
     }
+    return MACH_MSG_SUCCESS;
 }
 
 #if IKM_SUPPORT_LEGACY
-void
+mach_msg_return_t
 ipc_kmsg_copyin_from_kernel_legacy(
 	ipc_kmsg_t	kmsg)
 {
@@ -2677,6 +2663,8 @@ ipc_kmsg_copyin_from_kernel_legacy(
 	ipc_object_t local = (ipc_object_t) kmsg->ikm_header->msgh_local_port;
 
 	/* translate the destination and reply ports */
+	if (!IO_VALID(remote))
+		return MACH_SEND_INVALID_DEST;
 
 	ipc_object_copyin_from_kernel(remote, rname);
 	if (IO_VALID(local))
@@ -2700,7 +2688,7 @@ ipc_kmsg_copyin_from_kernel_legacy(
 
 		kmsg->ikm_header->msgh_bits = bits;
 		if ((bits & MACH_MSGH_BITS_COMPLEX) == 0)
-			return;
+			return MACH_MSG_SUCCESS;
 	}
     {
     	mach_msg_legacy_descriptor_t	*saddr;
@@ -2833,6 +2821,7 @@ ipc_kmsg_copyin_from_kernel_legacy(
 	    }
 	}
     }
+    return MACH_MSG_SUCCESS;
 }
 #endif /* IKM_SUPPORT_LEGACY */
 
@@ -2845,13 +2834,6 @@ ipc_kmsg_copyin_from_kernel_legacy(
  *		If it does succeed the remote/local port fields
  *		contain port names instead of object pointers,
  *		and the bits field is updated.
- *
- *		The notify argument implements the MACH_RCV_NOTIFY option.
- *		If it is not MACH_PORT_NULL, it should name a receive right.
- *		If the process of receiving the reply port creates a
- *		new right in the receiving task, then the new right is
- *		automatically registered for a dead-name notification,
- *		with the notify port supplying the send-once right.
  *	Conditions:
  *		Nothing locked.
  *	Returns:
@@ -2872,14 +2854,20 @@ ipc_kmsg_copyin_from_kernel_legacy(
 mach_msg_return_t
 ipc_kmsg_copyout_header(
 	mach_msg_header_t	*msg,
-	ipc_space_t		space,
-	mach_port_name_t	notify)
+	ipc_space_t		space)
 {
 	mach_msg_bits_t mbits = msg->msgh_bits;
 	ipc_port_t dest = (ipc_port_t) msg->msgh_remote_port;
 
 	assert(IP_VALID(dest));
 
+	/*
+	 * While we still hold a reference on the received-from port,
+	 * process all send-possible notfications we received along with
+	 * the message.
+	 */
+	ipc_port_spnotify(dest);
+
     {
 	mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits);
 	mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits);
@@ -2887,64 +2875,27 @@ ipc_kmsg_copyout_header(
 	mach_port_name_t dest_name, reply_name;
 
 	if (IP_VALID(reply)) {
-		ipc_port_t notify_port;
 		ipc_entry_t entry;
 		kern_return_t kr;
 
 		/*
-		 *	Handling notify (for MACH_RCV_NOTIFY) is tricky.
-		 *	The problem is atomically making a send-once right
-		 *	from the notify port and installing it for a
-		 *	dead-name request in the new entry, because this
-		 *	requires two port locks (on the notify port and
-		 *	the reply port).  However, we can safely make
-		 *	and consume send-once rights for the notify port
-		 *	as long as we hold the space locked.  This isn't
-		 *	an atomicity problem, because the only way
-		 *	to detect that a send-once right has been created
-		 *	and then consumed if it wasn't needed is by getting
-		 *	at the receive right to look at ip_sorights, and
-		 *	because the space is write-locked status calls can't
-		 *	lookup the notify port receive right.  When we make
-		 *	the send-once right, we lock the notify port,
-		 *	so any status calls in progress will be done.
+		 *	Get reply port entry (if none, skip to dest port
+		 *	copyout).  This may require growing the space.
 		 */
 
 		is_write_lock(space);
 
 		for (;;) {
-			ipc_port_request_index_t request;
-
 			if (!space->is_active) {
 				is_write_unlock(space);
 				return (MACH_RCV_HEADER_ERROR|
 					MACH_MSG_IPC_SPACE);
 			}
 
-			if (notify != MACH_PORT_NULL) {
-				notify_port = ipc_port_lookup_notify(space,
-								     notify);
-				if (notify_port == IP_NULL) {
-					printf("ipc_kmsg_copyout_header: no notify port\n");
-					is_write_unlock(space);
-					return MACH_RCV_INVALID_NOTIFY;
-				}
-			} else
-				notify_port = IP_NULL;
-
 			if ((reply_type != MACH_MSG_TYPE_PORT_SEND_ONCE) &&
 			    ipc_right_reverse(space, (ipc_object_t) reply,
 					      &reply_name, &entry)) {
 				/* reply port is locked and active */
-
-				/*
-				 *	We don't need the notify_port
-				 *	send-once right, but we can't release
-				 *	it here because reply port is locked.
-				 *	Wait until after the copyout to
-				 *	release the notify port right.
-				 */
-
 				assert(entry->ie_bits &
 				       MACH_PORT_TYPE_SEND_RECEIVE);
 				break;
@@ -2955,9 +2906,6 @@ ipc_kmsg_copyout_header(
 				ip_release(reply);
 				ip_check_unlock(reply);
 
-				if (notify_port != IP_NULL)
-					ipc_port_release_sonce(notify_port);
-
 				ip_lock(dest);
 				is_write_unlock(space);
 
@@ -2971,24 +2919,12 @@ ipc_kmsg_copyout_header(
 			if (kr != KERN_SUCCESS) {
 				ip_unlock(reply);
 
-				if (notify_port != IP_NULL)
-					ipc_port_release_sonce(notify_port);
-
 				/* space is locked */
 				kr = ipc_entry_grow_table(space,
 							  ITS_SIZE_NONE);
 				if (kr != KERN_SUCCESS) {
-					/* space is unlocked */
-
-					if (kr == KERN_RESOURCE_SHORTAGE) {
-						printf("ipc_kmsg_copyout_header: can't grow kernel ipc space\n");
-						return (MACH_RCV_HEADER_ERROR|
-							MACH_MSG_IPC_KERNEL);
-					} else {
-						printf("ipc_kmsg_copyout_header: can't grow user ipc space\n");
-						return (MACH_RCV_HEADER_ERROR|
-							MACH_MSG_IPC_SPACE);
-					}
+					return (MACH_RCV_HEADER_ERROR|
+						MACH_MSG_IPC_SPACE);
 				}
 				/* space is locked again; start over */
 
@@ -2998,48 +2934,7 @@ ipc_kmsg_copyout_header(
 			       MACH_PORT_TYPE_NONE);
 			assert(entry->ie_object == IO_NULL); 
 
-			if (notify_port == IP_NULL) {
-				/* not making a dead-name request */
-
-				entry->ie_object = (ipc_object_t) reply;
-				break;
-			}
-
-			kr = ipc_port_dnrequest(reply, reply_name,
-						notify_port, &request);
-			if (kr != KERN_SUCCESS) {
-				ip_unlock(reply);
-
-				ipc_port_release_sonce(notify_port);
-
-				ipc_entry_dealloc(space, reply_name, entry);
-				is_write_unlock(space);
-
-				ip_lock(reply);
-				if (!ip_active(reply)) {
-					/* will fail next time around loop */
-
-					ip_unlock(reply);
-					is_write_lock(space);
-					continue;
-				}
-
-				kr = ipc_port_dngrow(reply, ITS_SIZE_NONE);
-				/* port is unlocked */
-				if (kr != KERN_SUCCESS) {
-					printf("ipc_kmsg_copyout_header: can't grow kernel ipc space2\n");
-					return (MACH_RCV_HEADER_ERROR|
-						MACH_MSG_IPC_KERNEL);
-				}
-
-				is_write_lock(space);
-				continue;
-			}
-
-			notify_port = IP_NULL; /* don't release right below */
-
 			entry->ie_object = (ipc_object_t) reply;
-			entry->ie_request = request;
 			break;
 		}
 
@@ -3052,17 +2947,13 @@ ipc_kmsg_copyout_header(
 		/* reply port is unlocked */
 		assert(kr == KERN_SUCCESS);
 
-		if (notify_port != IP_NULL)
-			ipc_port_release_sonce(notify_port);
-
 		ip_lock(dest);
 		is_write_unlock(space);
 	} else {
 		/*
 		 *	No reply port!  This is an easy case.
 		 *	We only need to have the space locked
-		 *	when checking notify and when locking
-		 *	the destination (to ensure atomicity).
+		 *	when locking the destination.
 		 */
 
 		is_read_lock(space);
@@ -3071,24 +2962,6 @@ ipc_kmsg_copyout_header(
 			return MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_SPACE;
 		}
 
-		if (notify != MACH_PORT_NULL) {
-			ipc_entry_t entry;
-
-			/* must check notify even though it won't be used */
-
-			if ((entry = ipc_entry_lookup(space, notify)) == IE_NULL) {
-				printf("ipc_kmsg_copyout_header: ipc_entry_lookup failed\n");
-				is_read_unlock(space);
-				return MACH_RCV_INVALID_NOTIFY;
-			}
-	
-			if ((entry->ie_bits & MACH_PORT_TYPE_RECEIVE) == 0) {
-				printf("ipc_kmsg_copyout_header: MACH_PORT_TYPE_RECEIVE not set!\n");
-				is_read_unlock(space);
-				return MACH_RCV_INVALID_NOTIFY;
-			}
-		}
-
 		ip_lock(dest);
 		is_read_unlock(space);
 
@@ -3704,8 +3577,6 @@ ipc_kmsg_copyout_size(
  *		Nothing locked.
  *	Returns:
  *		MACH_MSG_SUCCESS	Copied out all rights and memory.
- *		MACH_RCV_INVALID_NOTIFY	Bad notify port.
- *			Rights and memory in the message are intact.
  *		MACH_RCV_HEADER_ERROR + special bits
  *			Rights and memory in the message are intact.
  *		MACH_RCV_BODY_ERROR + special bits
@@ -3718,12 +3589,11 @@ ipc_kmsg_copyout(
 	ipc_kmsg_t		kmsg,
 	ipc_space_t		space,
 	vm_map_t		map,
-	mach_port_name_t	notify,
 	mach_msg_body_t		*slist)
 {
 	mach_msg_return_t mr;
 
-	mr = ipc_kmsg_copyout_header(kmsg->ikm_header, space, notify);
+	mr = ipc_kmsg_copyout_header(kmsg->ikm_header, space);
 	if (mr != MACH_MSG_SUCCESS) {
 		return mr;
 	}
diff --git a/osfmk/ipc/ipc_kmsg.h b/osfmk/ipc/ipc_kmsg.h
index 8687cafbf..6fb07b6dd 100644
--- a/osfmk/ipc/ipc_kmsg.h
+++ b/osfmk/ipc/ipc_kmsg.h
@@ -269,9 +269,12 @@ extern void ipc_kmsg_free(
 extern void ipc_kmsg_destroy(
 	ipc_kmsg_t	kmsg);
 
-/* destroy kernel message and a reference on the dest */
-extern void ipc_kmsg_destroy_dest(
-	ipc_kmsg_t	kmsg);
+/* Enqueue kernel message for deferred destruction */
+extern boolean_t ipc_kmsg_delayed_destroy(
+	ipc_kmsg_t kmsg);
+
+/* Process all the delayed message destroys */
+extern void ipc_kmsg_reap_delayed(void);
 
 /* Preallocate a kernel message buffer */
 extern ipc_kmsg_t ipc_kmsg_prealloc(
@@ -321,29 +324,28 @@ extern void ipc_kmsg_put_to_kernel(
 extern mach_msg_return_t ipc_kmsg_copyin_header(
 	mach_msg_header_t	*msg,
 	ipc_space_t		space,
-	mach_port_name_t	notify);
+	boolean_t		notify);
 
 /* Copyin port rights and out-of-line memory from a user message */
 extern mach_msg_return_t ipc_kmsg_copyin(
 	ipc_kmsg_t		kmsg,
 	ipc_space_t		space,
 	vm_map_t		map,
-	mach_port_name_t	notify);
+	boolean_t		notify);
 
 /* Copyin port rights and out-of-line memory from a kernel message */
-extern void ipc_kmsg_copyin_from_kernel(
+extern mach_msg_return_t ipc_kmsg_copyin_from_kernel(
 	ipc_kmsg_t		kmsg);
 
 #if IKM_SUPPORT_LEGACY
-extern void ipc_kmsg_copyin_from_kernel_legacy(
+extern mach_msg_return_t ipc_kmsg_copyin_from_kernel_legacy(
 	ipc_kmsg_t	kmsg);
 #endif
 
 /* Copyout port rights in the header of a message */
 extern mach_msg_return_t ipc_kmsg_copyout_header(
 	mach_msg_header_t	*msg,
-	ipc_space_t		space,
-	mach_port_name_t	notify);
+	ipc_space_t		space);
 
 /* Copyout a port right returning a name */
 extern mach_msg_return_t ipc_kmsg_copyout_object(
@@ -357,7 +359,6 @@ extern mach_msg_return_t ipc_kmsg_copyout(
 	ipc_kmsg_t		kmsg,
 	ipc_space_t		space,
 	vm_map_t		map,
-	mach_port_name_t	notify,
 	mach_msg_body_t		*slist);
 
 /* Copyout port rights and out-of-line memory from the body of a message */
diff --git a/osfmk/ipc/ipc_labelh.c b/osfmk/ipc/ipc_labelh.c
index e6763aa36..934eaf7e4 100644
--- a/osfmk/ipc/ipc_labelh.c
+++ b/osfmk/ipc/ipc_labelh.c
@@ -79,6 +79,9 @@ labelh_new_user(ipc_space_t space, struct label *inl, mach_port_name_t *namep)
 
 	/* XXX - perform entrypoint check here? */
 
+	/* JMM - redo as port allocation, kobject set, and then copyout */
+	assert(!CONFIG_MACF_MACH);
+
 	/*
 	 * Note: the calling task will have a receive right for the port.
 	 * This is different from label handles that reference tasks
@@ -94,6 +97,7 @@ labelh_new_user(ipc_space_t space, struct label *inl, mach_port_name_t *namep)
 	port->ip_mscount++;
 	port->ip_srights++;
 	is_write_lock(space);
+	/* XXX - must validate space is still active and unwind if not */
 	entry = ipc_entry_lookup(space, *namep);
 	if (entry != IE_NULL)
 		entry->ie_bits |= MACH_PORT_TYPE_SEND;
diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c
index 9d17b81b9..406b5ae93 100644
--- a/osfmk/ipc/ipc_mqueue.c
+++ b/osfmk/ipc/ipc_mqueue.c
@@ -949,7 +949,7 @@ ipc_mqueue_select_on_thread(
  *		Locks may be held by callers, so this routine cannot block.
  *		Caller holds reference on the message queue.
  */
-int
+unsigned
 ipc_mqueue_peek(ipc_mqueue_t mq)
 {
 	wait_queue_link_t	wql;
@@ -963,10 +963,7 @@ ipc_mqueue_peek(ipc_mqueue_t mq)
 	 * Don't block trying to get the lock.
 	 */
 	s = splsched();
-	if (!imq_lock_try(mq)) {
-		splx(s);
-		return -1;
-	}
+	imq_lock(mq);
 
 	/* 
 	 * peek at the contained port message queues, return as soon as
@@ -992,7 +989,8 @@ ipc_mqueue_peek(ipc_mqueue_t mq)
 /*
  *	Routine:	ipc_mqueue_destroy
  *	Purpose:
- *		Destroy a message queue.  Set any blocked senders running.
+ *		Destroy a (non-set) message queue.
+ *		Set any blocked senders running.
  *	   	Destroy the kmsgs in the queue.
  *	Conditions:
  *		Nothing locked.
@@ -1000,10 +998,11 @@ ipc_mqueue_peek(ipc_mqueue_t mq)
  */
 void
 ipc_mqueue_destroy(
-	ipc_mqueue_t	mqueue) 
+	ipc_mqueue_t	mqueue)
 {
 	ipc_kmsg_queue_t kmqueue;
 	ipc_kmsg_t kmsg;
+	boolean_t reap = FALSE;
 	spl_t s;
 
 
@@ -1019,19 +1018,27 @@ ipc_mqueue_destroy(
 				THREAD_RESTART,
 				FALSE);
 
+	/*
+	 * Move messages from the specified queue to the per-thread
+	 * clean/drain queue while we have the mqueue lock.
+	 */
 	kmqueue = &mqueue->imq_messages;
-
 	while ((kmsg = ipc_kmsg_dequeue(kmqueue)) != IKM_NULL) {
-		imq_unlock(mqueue);
-		splx(s);
-
-		ipc_kmsg_destroy_dest(kmsg);
-
-		s = splsched();
-		imq_lock(mqueue);
+		boolean_t first;
+		first = ipc_kmsg_delayed_destroy(kmsg);
+		if (first)
+			reap = first;
 	}
+
 	imq_unlock(mqueue);
 	splx(s);
+
+	/*
+	 * Destroy the messages we enqueued if we aren't nested
+	 * inside some other attempt to drain the same queue.
+	 */
+	if (reap)
+		ipc_kmsg_reap_delayed();
 }
 
 /*
diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h
index 90d3322cf..c8a3f7a2e 100644
--- a/osfmk/ipc/ipc_mqueue.h
+++ b/osfmk/ipc/ipc_mqueue.h
@@ -213,7 +213,7 @@ extern void ipc_mqueue_select_on_thread(
 	thread_t                thread);
 
 /* Peek into a messaqe queue to see if there are messages */
-extern int ipc_mqueue_peek(
+extern unsigned ipc_mqueue_peek(
 	ipc_mqueue_t		mqueue);
 
 /* Clear a message count reservation */
diff --git a/osfmk/ipc/ipc_notify.c b/osfmk/ipc/ipc_notify.c
index 25a26aa63..498401cc0 100644
--- a/osfmk/ipc/ipc_notify.c
+++ b/osfmk/ipc/ipc_notify.c
@@ -88,6 +88,24 @@ ipc_notify_port_deleted(
 	/* send-once right consumed */
 }
 
+/*
+ *	Routine:	ipc_notify_send_possible
+ *	Purpose:
+ *		Send a send-possible notification.
+ *	Conditions:
+ *		Nothing locked.
+ *		Consumes a ref/soright for port.
+ */
+
+void
+ipc_notify_send_possible(
+	ipc_port_t		port,
+	mach_port_name_t	name)
+{
+	(void)mach_notify_send_possible(port, name);
+	/* send-once right consumed */
+}
+
 /*
  *	Routine:	ipc_notify_port_destroyed
  *	Purpose:
diff --git a/osfmk/ipc/ipc_notify.h b/osfmk/ipc/ipc_notify.h
index a2da35065..0d87a6ec0 100644
--- a/osfmk/ipc/ipc_notify.h
+++ b/osfmk/ipc/ipc_notify.h
@@ -75,6 +75,11 @@ extern void ipc_notify_port_deleted(
 	ipc_port_t		port,
 	mach_port_name_t	name);
 
+/* Send a send-possible notification */
+extern void ipc_notify_send_possible(
+	ipc_port_t		port,
+	mach_port_name_t	name);
+
 /* Send a port-destroyed notification */
 extern void ipc_notify_port_destroyed(
 	ipc_port_t		port,
diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c
index 95a1cfbf3..176e80ec8 100644
--- a/osfmk/ipc/ipc_object.c
+++ b/osfmk/ipc/ipc_object.c
@@ -90,6 +90,7 @@
 #include <ipc/ipc_hash.h>
 #include <ipc/ipc_right.h>
 #include <ipc/ipc_notify.h>
+#include <ipc/ipc_port.h>
 #include <ipc/ipc_pset.h>
 #include <ipc/ipc_labelh.h>
 
@@ -680,6 +681,42 @@ ipc_object_destroy(
 	}
 }
 
+/*
+ *	Routine:	ipc_object_destroy_dest
+ *	Purpose:
+ *		Destroys a naked capability for the destination of
+ *		of a message. Consumes a ref for the object.
+ *
+ *	Conditions:
+ *		Nothing locked.
+ */
+
+void
+ipc_object_destroy_dest(
+	ipc_object_t		object,
+	mach_msg_type_name_t	msgt_name)
+{
+	assert(IO_VALID(object));
+	assert(io_otype(object) == IOT_PORT);
+
+	switch (msgt_name) {
+	    case MACH_MSG_TYPE_PORT_SEND:
+		ipc_port_release_send((ipc_port_t) object);
+		break;
+
+	    case MACH_MSG_TYPE_PORT_SEND_ONCE:
+		if (io_active(object) && 
+		    !ip_full_kernel((ipc_port_t) object))
+			ipc_notify_send_once((ipc_port_t) object);
+		else
+			ipc_port_release_sonce((ipc_port_t) object);
+		break;
+
+	    default:
+		panic("ipc_object_destroy_dest: strange rights");
+	}
+}
+
 /*
  *	Routine:	ipc_object_copyout
  *	Purpose:
@@ -1033,14 +1070,7 @@ io_free(
 
 	if (otype == IOT_PORT) {
 		port = (ipc_port_t) object;
-#if	MACH_ASSERT
-		ipc_port_track_dealloc(port);
-#endif	/* MACH_ASSERT */
-
-#if CONFIG_MACF_MACH
-		/* Port label should have been initialized after creation. */
-		mac_port_label_destroy(&port->ip_label);
-#endif	  
+		ipc_port_finalize(port);
 	}
 	io_lock_destroy(object);
 	zfree(ipc_object_zones[otype], object);
diff --git a/osfmk/ipc/ipc_object.h b/osfmk/ipc/ipc_object.h
index 003707f59..a813b29bf 100644
--- a/osfmk/ipc/ipc_object.h
+++ b/osfmk/ipc/ipc_object.h
@@ -300,6 +300,11 @@ extern void ipc_object_destroy(
 	ipc_object_t		object,
 	mach_msg_type_name_t	msgt_name);
 
+/* Destroy a naked destination capability */
+extern void ipc_object_destroy_dest(
+	ipc_object_t		object,
+	mach_msg_type_name_t	msgt_name);
+
 /* Copyout a capability, placing it into a space */
 extern kern_return_t ipc_object_copyout(
 	ipc_space_t		space,
diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c
index 76185c9ba..0ece0705c 100644
--- a/osfmk/ipc/ipc_port.c
+++ b/osfmk/ipc/ipc_port.c
@@ -139,9 +139,9 @@ ipc_port_timestamp(void)
 }
 
 /*
- *	Routine:	ipc_port_dnrequest
+ *	Routine:	ipc_port_request_alloc
  *	Purpose:
- *		Try to allocate a dead-name request slot.
+ *		Try to allocate a request slot.
  *		If successful, returns the request index.
  *		Otherwise returns zero.
  *	Conditions:
@@ -152,20 +152,24 @@ ipc_port_timestamp(void)
  */
 
 kern_return_t
-ipc_port_dnrequest(
+ipc_port_request_alloc(
 	ipc_port_t			port,
 	mach_port_name_t		name,
 	ipc_port_t			soright,
+	boolean_t			send_possible,
+	boolean_t			immediate,
 	ipc_port_request_index_t	*indexp)
 {
 	ipc_port_request_t ipr, table;
 	ipc_port_request_index_t index;
+	uintptr_t mask = 0;
 
 	assert(ip_active(port));
 	assert(name != MACH_PORT_NULL);
 	assert(soright != IP_NULL);
 
-	table = port->ip_dnrequests;
+	table = port->ip_requests;
+
 	if (table == IPR_NULL)
 		return KERN_NO_SPACE;
 
@@ -178,16 +182,25 @@ ipc_port_dnrequest(
 
 	table->ipr_next = ipr->ipr_next;
 	ipr->ipr_name = name;
-	ipr->ipr_soright = soright;
+	
+	if (send_possible) {
+		mask |= IPR_SOR_SPREQ_MASK;
+		if (immediate) {
+			mask |= IPR_SOR_SPARM_MASK;
+			port->ip_sprequests = TRUE;
+		}
+	}
+	ipr->ipr_soright = IPR_SOR_MAKE(soright, mask);
 
 	*indexp = index;
+
 	return KERN_SUCCESS;
 }
 
 /*
- *	Routine:	ipc_port_dngrow
+ *	Routine:	ipc_port_request_grow
  *	Purpose:
- *		Grow a port's table of dead-name requests.
+ *		Grow a port's table of requests.
  *	Conditions:
  *		The port must be locked and active.
  *		Nothing else locked; will allocate memory.
@@ -201,7 +214,7 @@ ipc_port_dnrequest(
  */
 
 kern_return_t
-ipc_port_dngrow(
+ipc_port_request_grow(
 	ipc_port_t		port,
 	ipc_table_elems_t 	target_size)
 {
@@ -210,9 +223,9 @@ ipc_port_dngrow(
 
 	assert(ip_active(port));
 
-	otable = port->ip_dnrequests;
+	otable = port->ip_requests;
 	if (otable == IPR_NULL)
-		its = &ipc_table_dnrequests[0];
+		its = &ipc_table_requests[0];
 	else
 		its = otable->ipr_size + 1;
 
@@ -235,7 +248,7 @@ ipc_port_dngrow(
 	ip_unlock(port);
 
 	if ((its->its_size == 0) ||
-	    ((ntable = it_dnrequests_alloc(its)) == IPR_NULL)) {
+	    ((ntable = it_requests_alloc(its)) == IPR_NULL)) {
 		ipc_port_release(port);
 		return KERN_RESOURCE_SHORTAGE;
 	}
@@ -246,12 +259,11 @@ ipc_port_dngrow(
 	/*
 	 *	Check that port is still active and that nobody else
 	 *	has slipped in and grown the table on us.  Note that
-	 *	just checking port->ip_dnrequests == otable isn't
-	 *	sufficient; must check ipr_size.
+	 *	just checking if the current table pointer == otable
+	 *	isn't sufficient; must check ipr_size.
 	 */
 
-	if (ip_active(port) &&
-	    (port->ip_dnrequests == otable) &&
+	if (ip_active(port) && (port->ip_requests == otable) &&
 	    ((otable == IPR_NULL) || (otable->ipr_size+1 == its))) {
 		ipc_table_size_t oits;
 		ipc_table_elems_t osize, nsize;
@@ -288,55 +300,125 @@ ipc_port_dngrow(
 
 		ntable->ipr_next = free;
 		ntable->ipr_size = its;
-		port->ip_dnrequests = ntable;
+		port->ip_requests = ntable;
 		ip_unlock(port);
 
 		if (otable != IPR_NULL) {
-			it_dnrequests_free(oits, otable);
+			it_requests_free(oits, otable);
 	        }
 	} else {
 		ip_check_unlock(port);
-		it_dnrequests_free(its, ntable);
+		it_requests_free(its, ntable);
 	}
 
 	return KERN_SUCCESS;
 }
  
 /*
- *	Routine:	ipc_port_dncancel
+ *	Routine:	ipc_port_request_sparm
+ *	Purpose:
+ *		Arm delayed send-possible request.
+ *	Conditions:
+ *		The port must be locked and active.
+ */
+
+void
+ipc_port_request_sparm(
+	ipc_port_t			port,
+	__assert_only mach_port_name_t	name,
+	ipc_port_request_index_t	index)
+{
+	if (index != IE_REQ_NONE) {
+		ipc_port_request_t ipr, table;
+
+		assert(ip_active(port));
+	
+		table = port->ip_requests;
+		assert(table != IPR_NULL);
+
+		ipr = &table[index];
+		assert(ipr->ipr_name == name);
+
+		if (IPR_SOR_SPREQ(ipr->ipr_soright)) {
+			ipr->ipr_soright = IPR_SOR_MAKE(ipr->ipr_soright, IPR_SOR_SPARM_MASK);
+			port->ip_sprequests = TRUE;
+		}
+	}
+}
+
+/*
+ *	Routine:	ipc_port_request_type
  *	Purpose:
- *		Cancel a dead-name request and return the send-once right.
+ *		Determine the type(s) of port requests enabled for a name.
  *	Conditions:
- *		The port must locked and active.
+ *		The port must be locked or inactive (to avoid table growth).
+ *		The index must not be IE_REQ_NONE and for the name in question.
+ */
+mach_port_type_t
+ipc_port_request_type(
+	ipc_port_t			port,
+	__assert_only mach_port_name_t	name,
+	ipc_port_request_index_t	index)
+{
+	ipc_port_request_t ipr, table;
+	mach_port_type_t type = 0;
+
+	table = port->ip_requests;
+	assert (table != IPR_NULL);
+
+	assert(index != IE_REQ_NONE);
+	ipr = &table[index];
+	assert(ipr->ipr_name == name);
+
+	if (IP_VALID(IPR_SOR_PORT(ipr->ipr_soright))) {
+		type |= MACH_PORT_TYPE_DNREQUEST;
+
+		if (IPR_SOR_SPREQ(ipr->ipr_soright)) {
+			type |= MACH_PORT_TYPE_SPREQUEST;
+
+			if (!IPR_SOR_SPARMED(ipr->ipr_soright)) {
+				type |= MACH_PORT_TYPE_SPREQUEST_DELAYED;
+			} else {
+				assert(port->ip_sprequests == TRUE);
+			}
+		}
+	}
+	return type;
+}
+
+/*
+ *	Routine:	ipc_port_request_cancel
+ *	Purpose:
+ *		Cancel a dead-name/send-possible request and return the send-once right.
+ *	Conditions:
+ *		The port must be locked and active.
+ *		The index must not be IPR_REQ_NONE and must correspond with name.
  */
 
 ipc_port_t
-ipc_port_dncancel(
-	ipc_port_t				port,
+ipc_port_request_cancel(
+	ipc_port_t			port,
 	__assert_only mach_port_name_t	name,
-	ipc_port_request_index_t		index)
+	ipc_port_request_index_t	index)
 {
 	ipc_port_request_t ipr, table;
-	ipc_port_t dnrequest;
+	ipc_port_t request = IP_NULL;
 
 	assert(ip_active(port));
-	assert(name != MACH_PORT_NULL);
-	assert(index != 0);
-
-	table = port->ip_dnrequests;
+	table = port->ip_requests;
 	assert(table != IPR_NULL);
 
+	assert (index != IE_REQ_NONE);
 	ipr = &table[index];
-	dnrequest = ipr->ipr_soright;
 	assert(ipr->ipr_name == name);
+	request = IPR_SOR_PORT(ipr->ipr_soright);
 
 	/* return ipr to the free list inside the table */
-
 	ipr->ipr_name = MACH_PORT_NULL;
 	ipr->ipr_next = table->ipr_next;
 	table->ipr_next = index;
 
-	return dnrequest;
+	return request;
 }
 
 /*
@@ -470,7 +552,7 @@ ipc_port_init(
 
 	port->ip_nsrequest = IP_NULL;
 	port->ip_pdrequest = IP_NULL;
-	port->ip_dnrequests = IPR_NULL;
+	port->ip_requests = IPR_NULL;
 
 	port->ip_pset_count = 0;
 	port->ip_premsg = IKM_NULL;
@@ -578,37 +660,105 @@ ipc_port_alloc_name(
 }
 
 /*
- * Generate dead name notifications.  Called from ipc_port_destroy.
- * Port is unlocked but still has reference(s);
- * dnrequests was taken from port while the port
- * was locked but the port now has port->ip_dnrequests set to IPR_NULL.
+ * 	Routine:	ipc_port_spnotify
+ *	Purpose:
+ *		Generate send-possible port notifications.
+ *	Conditions:
+ *		Nothing locked, reference held on port.
  */
 void
-ipc_port_dnnotify(
-	__unused ipc_port_t	port,
-	ipc_port_request_t	dnrequests)
+ipc_port_spnotify(
+	ipc_port_t	port)
 {
-	ipc_table_size_t	its = dnrequests->ipr_size;
-	ipc_table_elems_t	size = its->its_size;
-	ipc_port_request_index_t index;
-
-	for (index = 1; index < size; index++) {
-		ipc_port_request_t	ipr = &dnrequests[index];
-		mach_port_name_t	name = ipr->ipr_name;
-		ipc_port_t		soright;
+	ipc_port_request_index_t index = 0;
+	ipc_table_elems_t size = 0;
 
-		if (name == MACH_PORT_NULL)
-			continue;
+	/*
+	 * If the port has no send-possible request
+	 * armed, don't bother to lock the port.
+	 */
+	if (!port->ip_sprequests)
+		return;
 
-		soright = ipr->ipr_soright;
-		assert(soright != IP_NULL);
+	ip_lock(port);
+	if (!port->ip_sprequests) {
+		ip_unlock(port);
+		return;
+	}
+	port->ip_sprequests = FALSE;
 
-		ipc_notify_dead_name(soright, name);
+ revalidate:
+	if (ip_active(port)) {
+		ipc_port_request_t requests;
+
+		/* table may change each time port unlocked (reload) */
+		requests = port->ip_requests;
+		assert(requests != IPR_NULL);
+
+		/*
+		 * no need to go beyond table size when first
+		 * we entered - those are future notifications.
+		 */
+		if (size == 0)
+			size = requests->ipr_size->its_size;
+
+		/* no need to backtrack either */
+		while (++index < size) {
+			ipc_port_request_t ipr = &requests[index];
+			mach_port_name_t name = ipr->ipr_name;
+			ipc_port_t soright = IPR_SOR_PORT(ipr->ipr_soright);
+			boolean_t armed = IPR_SOR_SPARMED(ipr->ipr_soright);
+
+			if (MACH_PORT_VALID(name) && armed && IP_VALID(soright)) {
+				/* claim send-once right - slot still inuse */
+				ipr->ipr_soright = IP_NULL;
+				ip_unlock(port);
+
+				ipc_notify_send_possible(soright, name);
+
+				ip_lock(port);
+				goto revalidate;
+			}
+		}
 	}
+	ip_unlock(port);
+}
 
-	it_dnrequests_free(its, dnrequests);
+/*
+ * 	Routine:	ipc_port_dnnotify
+ *	Purpose:
+ *		Generate dead name notifications for
+ *		all outstanding dead-name and send-
+ *		possible requests.
+ *	Conditions:
+ *		Nothing locked.
+ *		Port must be inactive.
+ *		Reference held on port.
+ */
+void
+ipc_port_dnnotify(
+	ipc_port_t	port)
+{
+	ipc_port_request_t requests = port->ip_requests;
+
+	assert(!ip_active(port));
+	if (requests != IPR_NULL) {
+		ipc_table_size_t its = requests->ipr_size;
+		ipc_table_elems_t size = its->its_size;
+		ipc_port_request_index_t index;
+		for (index = 1; index < size; index++) {
+			ipc_port_request_t ipr = &requests[index];
+			mach_port_name_t name = ipr->ipr_name;
+			ipc_port_t soright = IPR_SOR_PORT(ipr->ipr_soright);
+
+			if (MACH_PORT_VALID(name) && IP_VALID(soright)) {
+				ipc_notify_dead_name(soright, name);
+			}
+		}
+	}
 }
 
+
 /*
  *	Routine:	ipc_port_destroy
  *	Purpose:
@@ -629,7 +779,6 @@ ipc_port_destroy(
 	ipc_port_t pdrequest, nsrequest;
 	ipc_mqueue_t mqueue;
 	ipc_kmsg_t kmsg;
-	ipc_port_request_t dnrequests;
 
 	assert(ip_active(port));
 	/* port->ip_receiver_name is garbage */
@@ -659,10 +808,6 @@ ipc_port_destroy(
 	port->ip_object.io_bits &= ~IO_BITS_ACTIVE;
 	port->ip_timestamp = ipc_port_timestamp();
 
-	/* save for later */
-	dnrequests = port->ip_dnrequests;
-	port->ip_dnrequests = IPR_NULL;
-
 	/*
 	 * If the port has a preallocated message buffer and that buffer
 	 * is not inuse, free it.  If it has an inuse one, then the kmsg
@@ -679,7 +824,6 @@ ipc_port_destroy(
 	ip_unlock(port);
 
 	/* throw away no-senders request */
-
 	nsrequest = port->ip_nsrequest;
 	if (nsrequest != IP_NULL)
 		ipc_notify_send_once(nsrequest); /* consumes ref */
@@ -689,9 +833,7 @@ ipc_port_destroy(
 	ipc_mqueue_destroy(mqueue);
 
 	/* generate dead-name notifications */
-	if (dnrequests != IPR_NULL) {
-		ipc_port_dnnotify(port, dnrequests);
-	}
+	ipc_port_dnnotify(port);
 
 	ipc_kobject_destroy(port);
 
@@ -1001,7 +1143,7 @@ ipc_port_copyout_send(
 /*
  *	Routine:	ipc_port_release_send
  *	Purpose:
- *		Release a (valid) naked send right.
+ *		Release a naked send right.
  *		Consumes a ref for the port.
  *	Conditions:
  *		Nothing locked.
@@ -1014,7 +1156,8 @@ ipc_port_release_send(
 	ipc_port_t nsrequest = IP_NULL;
 	mach_port_mscount_t mscount;
 
-	assert(IP_VALID(port));
+	if (!IP_VALID(port))
+		return;
 
 	ip_lock(port);
 	ip_release(port);
@@ -1049,7 +1192,8 @@ ipc_port_t
 ipc_port_make_sonce(
 	ipc_port_t	port)
 {
-	assert(IP_VALID(port));
+	if (!IP_VALID(port))
+		return port;
 
 	ip_lock(port);
 	assert(ip_active(port));
@@ -1078,7 +1222,8 @@ void
 ipc_port_release_sonce(
 	ipc_port_t	port)
 {
-	assert(IP_VALID(port));
+	if (!IP_VALID(port))
+		return;
 
 	ip_lock(port);
 
@@ -1111,7 +1256,8 @@ ipc_port_release_receive(
 {
 	ipc_port_t dest;
 
-	assert(IP_VALID(port));
+	if (!IP_VALID(port))
+		return;
 
 	ip_lock(port);
 	assert(ip_active(port));
@@ -1200,6 +1346,37 @@ ipc_port_dealloc_special(
 	ipc_port_destroy(port);
 }
 
+/*
+ *	Routine:	ipc_port_finalize
+ *	Purpose:
+ *		Called on last reference deallocate to
+ *		free any remaining data associated with the
+ *		port.
+ *	Conditions:
+ *		Nothing locked.
+ */
+void
+ipc_port_finalize(
+	ipc_port_t		port)
+{
+	ipc_port_request_t requests = port->ip_requests;
+
+	assert(!ip_active(port));
+	if (requests != IPR_NULL) {
+		ipc_table_size_t its = requests->ipr_size;
+		it_requests_free(its, requests);
+		port->ip_requests = IPR_NULL;
+	}
+	
+#if	MACH_ASSERT
+	ipc_port_track_dealloc(port);
+#endif	/* MACH_ASSERT */
+
+#if CONFIG_MACF_MACH
+	/* Port label should have been initialized after creation. */
+	mac_port_label_destroy(&port->ip_label);
+#endif	  
+}
 
 #if	MACH_ASSERT
 #include <kern/machine.h>
@@ -1314,6 +1491,7 @@ ipc_port_track_dealloc(
 }
 #endif
 
+
 #endif	/* MACH_ASSERT */
 
 
@@ -1396,7 +1574,7 @@ ipc_port_print(
 
 	iprintf("nsrequest=0x%x", port->ip_nsrequest);
 	printf(", pdrequest=0x%x", port->ip_pdrequest);
-	printf(", dnrequests=0x%x\n", port->ip_dnrequests);
+	printf(", requests=0x%x\n", port->ip_requests);
 
 	iprintf("pset_count=0x%x", port->ip_pset_count);
 	printf(", seqno=%d", port->ip_messages.imq_seqno);
diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h
index 4998a84bc..34aab79d8 100644
--- a/osfmk/ipc/ipc_port.h
+++ b/osfmk/ipc/ipc_port.h
@@ -127,13 +127,15 @@ struct ipc_port {
 	} data;
 
 	ipc_kobject_t ip_kobject;
+
 	mach_port_mscount_t ip_mscount;
 	mach_port_rights_t ip_srights;
 	mach_port_rights_t ip_sorights;
 
 	struct ipc_port *ip_nsrequest;
 	struct ipc_port *ip_pdrequest;
-	struct ipc_port_request *ip_dnrequests;
+	struct ipc_port_request *ip_requests;
+	boolean_t ip_sprequests;
 
 	unsigned int ip_pset_count;
 	struct ipc_kmsg *ip_premsg;
@@ -191,6 +193,9 @@ struct ipc_port {
 
 #define	ip_kotype(port)		io_kotype(&(port)->ip_object)
 
+#define ip_full_kernel(port)	imq_full_kernel(&(port)->ip_messages) 
+#define ip_full(port)		imq_full(&(port)->ip_messages) 
+
 /*
  * JMM - Preallocation flag
  * This flag indicates that there is a message buffer preallocated for this
@@ -215,7 +220,7 @@ MACRO_BEGIN								\
 	(port)->ip_premsg = IKM_NULL;					\
 MACRO_END
 
-
+/* JMM - address alignment/packing for LP64 */
 struct ipc_port_request {
 	union {
 		struct ipc_port *port;
@@ -234,6 +239,17 @@ struct ipc_port_request {
 #define	ipr_soright		notify.port
 #define	ipr_name		name.name
 
+/*
+ * Use the low bits in the ipr_soright to specify the request type
+ */
+#define IPR_SOR_SPARM_MASK	1		/* send-possible armed */
+#define IPR_SOR_SPREQ_MASK	2		/* send-possible requested */
+#define IPR_SOR_SPBIT_MASK	3		/* combo */
+#define IPR_SOR_SPARMED(sor)	(((uintptr_t)(sor) & IPR_SOR_SPARM_MASK) != 0)
+#define IPR_SOR_SPREQ(sor)	(((uintptr_t)(sor) & IPR_SOR_SPREQ_MASK) != 0)
+#define IPR_SOR_PORT(sor)	((ipc_port_t)((uintptr_t)(sor) & ~IPR_SOR_SPBIT_MASK))
+#define IPR_SOR_MAKE(p,m)	((ipc_port_t)((uintptr_t)(p) | (m)))
+
 extern lck_grp_t 	ipc_lck_grp;
 extern lck_attr_t 	ipc_lck_attr;
 
@@ -297,32 +313,47 @@ extern ipc_port_timestamp_t ipc_port_timestamp(void);
 				     MACH_PORT_RIGHT_SEND,		\
 				     (ipc_object_t *) (portp))
 
-/* Allocate a dead-name request slot */
+/* Allocate a notification request slot */
 extern kern_return_t
-ipc_port_dnrequest(
+ipc_port_request_alloc(
 	ipc_port_t			port,
 	mach_port_name_t		name,
 	ipc_port_t			soright,
+	boolean_t			send_possible,
+	boolean_t			immediate,
 	ipc_port_request_index_t	*indexp);
 
-/* Grow a port's table of dead-name requests */
-extern kern_return_t ipc_port_dngrow(
+/* Grow one of a port's tables of notifcation requests */
+extern kern_return_t ipc_port_request_grow(
 	ipc_port_t			port,
 	ipc_table_elems_t		target_size);
 
-/* Cancel a dead-name request and return the send-once right */
-extern ipc_port_t ipc_port_dncancel(
+/* Return the type(s) of notification requests outstanding */
+extern mach_port_type_t ipc_port_request_type(
 	ipc_port_t			port,
 	mach_port_name_t		name,
 	ipc_port_request_index_t	index);
 
-#define	ipc_port_dnrename(port, index, oname, nname)			\
+/* Cancel a notification request and return the send-once right */
+extern ipc_port_t ipc_port_request_cancel(
+	ipc_port_t			port,
+	mach_port_name_t		name,
+	ipc_port_request_index_t	index);
+
+/* Arm any delayed send-possible notification */
+extern void ipc_port_request_sparm(
+	ipc_port_t			port,
+	mach_port_name_t		name,
+	ipc_port_request_index_t	index);
+
+/* Macros for manipulating a port's dead name notificaiton requests */
+#define	ipc_port_request_rename(port, index, oname, nname)		\
 MACRO_BEGIN								\
 	ipc_port_request_t ipr, table;					\
 									\
 	assert(ip_active(port));					\
 									\
-	table = port->ip_dnrequests;					\
+	table = port->ip_requests;					\
 	assert(table != IPR_NULL);					\
 									\
 	ipr = &table[index];						\
@@ -331,6 +362,7 @@ MACRO_BEGIN								\
 	ipr->ipr_name = nname;						\
 MACRO_END
 
+
 /* Make a port-deleted request */
 extern void ipc_port_pdrequest(
 	ipc_port_t	port,
@@ -375,8 +407,11 @@ extern kern_return_t ipc_port_alloc_name(
 
 /* Generate dead name notifications */
 extern void ipc_port_dnnotify(
-	ipc_port_t		port,
-	ipc_port_request_t	dnrequests);
+	ipc_port_t		port);
+
+/* Generate send-possible notifications */
+extern void ipc_port_spnotify(
+	ipc_port_t		port);
 
 /* Destroy a port */
 extern void ipc_port_destroy(
@@ -435,6 +470,10 @@ extern void ipc_port_release_sonce(
 extern void ipc_port_release_receive(
 	ipc_port_t	port);
 
+/* finalize the destruction of a port before it gets freed */
+extern void ipc_port_finalize(
+	ipc_port_t	port);
+
 /* Allocate a port in a special space */
 extern ipc_port_t ipc_port_alloc_special(
 	ipc_space_t	space);
diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c
index 4e0dd2b68..462527119 100644
--- a/osfmk/ipc/ipc_pset.c
+++ b/osfmk/ipc/ipc_pset.c
@@ -307,7 +307,7 @@ static int      filt_machportattach(struct knote *kn);
 static void	filt_machportdetach(struct knote *kn);
 static int	filt_machport(struct knote *kn, long hint);
 static void     filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type);
-static int	filt_machportpeek(struct knote *kn);
+static unsigned filt_machportpeek(struct knote *kn);
 struct filterops machport_filtops = {
         .f_attach = filt_machportattach,
         .f_detach = filt_machportdetach,
@@ -515,7 +515,7 @@ filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type)
  * will catch changes in this status when the event gets posted
  * up to the knote's kqueue).
  */
-static int
+static unsigned
 filt_machportpeek(struct knote *kn)
 {
         ipc_pset_t              pset = kn->kn_ptr.p_pset;
diff --git a/osfmk/ipc/ipc_right.c b/osfmk/ipc/ipc_right.c
index e7ffd94ec..d3db278b8 100644
--- a/osfmk/ipc/ipc_right.c
+++ b/osfmk/ipc/ipc_right.c
@@ -243,12 +243,6 @@ ipc_right_reverse(
  *		registered send-once right.  If notify is IP_NULL,
  *		just cancels the previously registered request.
  *
- *		This interacts with the IE_BITS_COMPAT, because they
- *		both use ie_request.  If this is a compat entry, then
- *		previous always gets IP_NULL.  If notify is IP_NULL,
- *		then the entry remains a compat entry.  Otherwise
- *		the real dead-name request is registered and the entry
- *		is no longer a compat entry.
  *	Conditions:
  *		Nothing locked.  May allocate memory.
  *		Only consumes/returns refs if successful.
@@ -265,28 +259,39 @@ ipc_right_reverse(
  */
 
 kern_return_t
-ipc_right_dnrequest(
+ipc_right_request_alloc(
 	ipc_space_t		space,
 	mach_port_name_t	name,
 	boolean_t		immediate,
+	boolean_t		send_possible,
 	ipc_port_t		notify,
 	ipc_port_t		*previousp)
 {
-	ipc_port_t previous;
+	ipc_port_request_index_t prev_request;
+	ipc_port_t previous = IP_NULL;
+	ipc_entry_t entry;
+	kern_return_t kr;
 
 	for (;;) {
-		ipc_entry_t entry;
-		ipc_entry_bits_t bits;
-		kern_return_t kr;
-
 		kr = ipc_right_lookup_write(space, name, &entry);
 		if (kr != KERN_SUCCESS)
 			return kr;
+
 		/* space is write-locked and active */
-		bits = entry->ie_bits;
-		if (bits & MACH_PORT_TYPE_PORT_RIGHTS) {
+		
+		prev_request = entry->ie_request;
+
+		/* if nothing to do or undo, we're done */
+		if (notify == IP_NULL && prev_request == IE_REQ_NONE) {
+			is_write_unlock(space);
+			*previousp = IP_NULL;
+			return KERN_SUCCESS;
+		}
+
+		/* see if the entry is of proper type for requests */
+		if (entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) {
+			ipc_port_request_index_t new_request;
 			ipc_port_t port;
-			ipc_port_request_index_t request;
 
 			port = (ipc_port_t) entry->ie_object;
 			assert(port != IP_NULL);
@@ -294,70 +299,71 @@ ipc_right_dnrequest(
 			if (!ipc_right_check(space, port, name, entry)) {
 				/* port is locked and active */
 
+				/* if no new request, just cancel previous */
 				if (notify == IP_NULL) {
-					previous = ipc_right_dncancel_macro(
-						   space, port, name, entry);
-
+					if (prev_request != IE_REQ_NONE)
+						previous = ipc_port_request_cancel(port, name, prev_request);
 					ip_unlock(port);
+					entry->ie_request = IE_REQ_NONE;
 					is_write_unlock(space);
 					break;
 				}
 
 				/*
-				 *	If a registered soright exists,
-				 *	want to atomically switch with it.
-				 *	If ipc_port_dncancel finds us a
-				 *	soright, then the following
-				 *	ipc_port_dnrequest will reuse
-				 *	that slot, so we are guaranteed
-				 *	not to unlock and retry.
+				 * send-once rights, kernel objects, and non-full other queues
+				 * fire immediately (if immediate specified).
 				 */
+				if (send_possible && immediate &&
+				    ((entry->ie_bits & MACH_PORT_TYPE_SEND_ONCE) ||
+				     port->ip_receiver == ipc_space_kernel || !ip_full(port))) {
+					if (prev_request != IE_REQ_NONE)
+						previous = ipc_port_request_cancel(port, name, prev_request);
+					ip_unlock(port);
+					entry->ie_request = IE_REQ_NONE;
+					is_write_unlock(space);
 
-				previous = ipc_right_dncancel_macro(space,
-						port, name, entry);
+					ipc_notify_send_possible(notify, name);
+					break;
+				}
 
-				kr = ipc_port_dnrequest(port, name, notify,
-							&request);
+				/*
+				 * If there is a previous request, free it.  Any subsequent
+				 * allocation cannot fail, thus assuring an atomic swap.
+				 */
+				if (prev_request != IE_REQ_NONE)
+					previous = ipc_port_request_cancel(port, name, prev_request);
+
+				kr = ipc_port_request_alloc(port, name, notify,
+							    send_possible, immediate,
+							    &new_request);
 				if (kr != KERN_SUCCESS) {
 					assert(previous == IP_NULL);
 					is_write_unlock(space);
 
-					kr = ipc_port_dngrow(port,
-							     ITS_SIZE_NONE);
+					kr = ipc_port_request_grow(port, ITS_SIZE_NONE);
 					/* port is unlocked */
+
 					if (kr != KERN_SUCCESS)
 						return kr;
 
 					continue;
 				}
 
-				assert(request != 0);
+				assert(new_request != IE_REQ_NONE);
 				ip_unlock(port);
-
-				entry->ie_request = request;
+				entry->ie_request = new_request;
 				is_write_unlock(space);
 				break;
-			} else {
-
-			  /*
-			   * Our capability bits were changed by ipc_right_check
-			   * because it found an inactive port and removed our
-			   * references to it (converting our entry into a dead
-			   * one).  Reload the bits (and obviously we can't use
-			   * the port name anymore).
-			   */
-			  bits = entry->ie_bits;
-
 			}
+			/* entry may have changed to dead-name by ipc_right_check() */
 
-			assert(bits & MACH_PORT_TYPE_DEAD_NAME);
 		}
 
-		if ((bits & MACH_PORT_TYPE_DEAD_NAME) &&
-		    immediate && (notify != IP_NULL)) {
-			mach_port_urefs_t urefs = IE_BITS_UREFS(bits);
+		/* treat send_possible requests as immediate w.r.t. dead-name */
+		if ((send_possible || immediate) && notify != IP_NULL &&
+		    (entry->ie_bits & MACH_PORT_TYPE_DEAD_NAME)) {
+			mach_port_urefs_t urefs = IE_BITS_UREFS(entry->ie_bits);
 
-			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
 			assert(urefs > 0);
 
 			if (MACH_PORT_UREFS_OVERFLOW(urefs, 1)) {
@@ -374,7 +380,7 @@ ipc_right_dnrequest(
 		}
 
 		is_write_unlock(space);
-		if (bits & MACH_PORT_TYPE_PORT_OR_DEAD)
+		if (entry->ie_bits & MACH_PORT_TYPE_PORT_OR_DEAD)
 			return KERN_INVALID_ARGUMENT;
 		else
 			return KERN_INVALID_RIGHT;
@@ -385,9 +391,9 @@ ipc_right_dnrequest(
 }
 
 /*
- *	Routine:	ipc_right_dncancel
+ *	Routine:	ipc_right_request_cancel
  *	Purpose:
- *		Cancel a dead-name request and return the send-once right.
+ *		Cancel a notification request and return the send-once right.
  *		Afterwards, entry->ie_request == 0.
  *	Conditions:
  *		The space must be write-locked; the port must be locked.
@@ -395,21 +401,23 @@ ipc_right_dnrequest(
  */
 
 ipc_port_t
-ipc_right_dncancel(
+ipc_right_request_cancel(
 	__unused ipc_space_t		space,
 	ipc_port_t			port,
 	mach_port_name_t		name,
 	ipc_entry_t			entry)
 {
-	ipc_port_t dnrequest;
+	ipc_port_t previous;
 
 	assert(ip_active(port));
 	assert(port == (ipc_port_t) entry->ie_object);
 
-	dnrequest = ipc_port_dncancel(port, name, entry->ie_request);
-	entry->ie_request = 0;
+	if (entry->ie_request == IE_REQ_NONE)
+		return IP_NULL;
 
-	return dnrequest;
+	previous = ipc_port_request_cancel(port, name, entry->ie_request);
+	entry->ie_request = IE_REQ_NONE;
+	return previous;
 }
 
 /*
@@ -478,8 +486,6 @@ ipc_right_check(
         }
 
 
-	ipc_port_release(port);
-
 	/* convert entry to dead name */
 
 	if ((bits & MACH_PORT_TYPE_SEND) && !(bits & MACH_PORT_TYPE_RECEIVE))
@@ -489,27 +495,32 @@ ipc_right_check(
 	
 	/*
 	 * If there was a notification request outstanding on this
-	 * name, and since the port went dead, that notification
-	 * must already be on its way up from the port layer. We
-	 * don't need the index of the notification port anymore.
+	 * name, and the port went dead, that notification
+	 * must already be on its way up from the port layer. 
+	 *
+	 * Add the reference that the notification carries. It
+	 * is done here, and not in the notification delivery,
+	 * because the latter doesn't have a space reference and
+	 * trying to actually move a send-right reference would
+	 * get short-circuited into a MACH_PORT_DEAD by IPC. Since
+	 * all calls that deal with the right eventually come
+	 * through here, it has the same result.
 	 *
-	 * JMM - We also add a reference to the entry since the
-	 * notification only carries the name and NOT a reference
-	 * (or right). This makes for pretty loose reference
-	 * counting, since it is only happenstance that we
-	 * detected the notification in progress like this.
-	 * But most (all?) calls that try to deal with this entry
-	 * will also come through here, so the reference gets added
-	 * before the entry gets used eventually (I would rather it
-	 * be explicit in the notification generation, though)
+	 * Once done, clear the request index so we only account
+	 * for it once.
 	 */
-	if (entry->ie_request != 0) {
-		assert(IE_BITS_UREFS(bits) < MACH_PORT_UREFS_MAX);
-		entry->ie_request = 0; 
-		bits++;	
+	if (entry->ie_request != IE_REQ_NONE) {
+		if (ipc_port_request_type(port, name, entry->ie_request) != 0) {
+			assert(IE_BITS_UREFS(bits) < MACH_PORT_UREFS_MAX);
+			bits++;	
+		}
+		entry->ie_request = IE_REQ_NONE; 
 	}
 	entry->ie_bits = bits;
 	entry->ie_object = IO_NULL;
+
+	ipc_port_release(port);
+
 	return TRUE;
 }
 
@@ -548,14 +559,14 @@ ipc_right_clean(
 
 	switch (type) {
 	    case MACH_PORT_TYPE_DEAD_NAME:
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 		assert(entry->ie_object == IO_NULL);
 		break;
 
 	    case MACH_PORT_TYPE_PORT_SET: {
 		ipc_pset_t pset = (ipc_pset_t) entry->ie_object;
 
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 		assert(pset != IPS_NULL);
 
 		ips_lock(pset);
@@ -570,7 +581,7 @@ ipc_right_clean(
 	    case MACH_PORT_TYPE_SEND_RECEIVE:
 	    case MACH_PORT_TYPE_SEND_ONCE: {
 		ipc_port_t port = (ipc_port_t) entry->ie_object;
-		ipc_port_t dnrequest;
+		ipc_port_t request;
 		ipc_port_t nsrequest = IP_NULL;
 		mach_port_mscount_t mscount = 0;
 
@@ -583,7 +594,7 @@ ipc_right_clean(
 			break;
 		}
 
-		dnrequest = ipc_right_dncancel_macro(space, port, 
+		request = ipc_right_request_cancel_macro(space, port, 
 					name, entry);
 
 		if (type & MACH_PORT_TYPE_SEND) {
@@ -619,8 +630,8 @@ ipc_right_clean(
 		if (nsrequest != IP_NULL)
 			ipc_notify_no_senders(nsrequest, mscount);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
@@ -657,7 +668,7 @@ ipc_right_destroy(
 
 	switch (type) {
 	    case MACH_PORT_TYPE_DEAD_NAME:
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 		assert(entry->ie_object == IO_NULL);
 
 		ipc_entry_dealloc(space, name, entry);
@@ -666,7 +677,7 @@ ipc_right_destroy(
 	    case MACH_PORT_TYPE_PORT_SET: {
 		ipc_pset_t pset = (ipc_pset_t) entry->ie_object;
 
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 		assert(pset != IPS_NULL);
 
 		entry->ie_object = IO_NULL;
@@ -686,7 +697,7 @@ ipc_right_destroy(
 		ipc_port_t port = (ipc_port_t) entry->ie_object;
 		ipc_port_t nsrequest = IP_NULL;
 		mach_port_mscount_t mscount = 0;
-		ipc_port_t dnrequest;
+		ipc_port_t request;
 
 		assert(port != IP_NULL);
 
@@ -701,14 +712,14 @@ ipc_right_destroy(
 			ip_release(port);
 			ip_check_unlock(port);
 
-			entry->ie_request = 0;
+			entry->ie_request = IE_REQ_NONE;
 			entry->ie_object = IO_NULL;
 			ipc_entry_dealloc(space, name, entry);
 
 			break;
 		}
 
-		dnrequest = ipc_right_dncancel_macro(space, port, name, entry);
+		request = ipc_right_request_cancel_macro(space, port, name, entry);
 
 		entry->ie_object = IO_NULL;
 		ipc_entry_dealloc(space, name, entry);
@@ -745,8 +756,8 @@ ipc_right_destroy(
 		if (nsrequest != IP_NULL)
 			ipc_notify_no_senders(nsrequest, mscount);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
@@ -792,7 +803,7 @@ ipc_right_dealloc(
 	    dead_name:
 
 		assert(IE_BITS_UREFS(bits) > 0);
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 		assert(entry->ie_object == IO_NULL);
 
 		if (IE_BITS_UREFS(bits) == 1) {
@@ -806,7 +817,7 @@ ipc_right_dealloc(
 	    }
 
 	    case MACH_PORT_TYPE_SEND_ONCE: {
-		ipc_port_t port, dnrequest;
+		ipc_port_t port, request;
 
 		assert(IE_BITS_UREFS(bits) == 1);
 
@@ -823,7 +834,7 @@ ipc_right_dealloc(
 
 		assert(port->ip_sorights > 0);
 
-		dnrequest = ipc_right_dncancel_macro(space, port, name, entry);
+		request = ipc_right_request_cancel_macro(space, port, name, entry);
 		ip_unlock(port);
 
 		entry->ie_object = IO_NULL;
@@ -833,14 +844,14 @@ ipc_right_dealloc(
 
 		ipc_notify_send_once(port);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
 	    case MACH_PORT_TYPE_SEND: {
 		ipc_port_t port;
-		ipc_port_t dnrequest = IP_NULL;
+		ipc_port_t request = IP_NULL;
 		ipc_port_t nsrequest = IP_NULL;
 		mach_port_mscount_t mscount =  0;
 
@@ -868,7 +879,7 @@ ipc_right_dealloc(
 				}
 			}
 
-			dnrequest = ipc_right_dncancel_macro(space, port,
+			request = ipc_right_request_cancel_macro(space, port,
 							     name, entry);
 			ipc_hash_delete(space, (ipc_object_t) port,
 					name, entry);
@@ -887,8 +898,8 @@ ipc_right_dealloc(
 		if (nsrequest != IP_NULL)
 			ipc_notify_no_senders(nsrequest, mscount);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
@@ -988,7 +999,7 @@ ipc_right_delta(
 
 		assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_PORT_SET);
 		assert(IE_BITS_UREFS(bits) == 0);
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 
 		if (delta == 0)
 			goto success;
@@ -1015,7 +1026,7 @@ ipc_right_delta(
 
 	    case MACH_PORT_RIGHT_RECEIVE: {
 		ipc_port_t port;
-		ipc_port_t dnrequest = IP_NULL;
+		ipc_port_t request = IP_NULL;
 
 		if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
 			goto invalid_right;
@@ -1047,26 +1058,43 @@ ipc_right_delta(
 			assert(IE_BITS_UREFS(bits) < MACH_PORT_UREFS_MAX);
 			assert(port->ip_srights > 0);
 
-			/*
-			 *	The remaining send right turns into a
-			 *	dead name.  Notice we don't decrement
-			 *	ip_srights, generate a no-senders notif,
-			 *	or use ipc_right_dncancel, because the
-			 *	port is destroyed "first".
-			 */
-			bits &= ~IE_BITS_TYPE_MASK;
-			bits |= MACH_PORT_TYPE_DEAD_NAME;
-			if (entry->ie_request) {
-				entry->ie_request = 0;
-				bits++;
+			if (port->ip_pdrequest != NULL) {
+				/*
+				 * Since another task has requested a
+				 * destroy notification for this port, it
+				 * isn't actually being destroyed - the receive
+				 * right is just being moved to another task.
+				 * Since we still have one or more send rights,
+				 * we need to record the loss of the receive
+				 * right and enter the remaining send right
+				 * into the hash table.
+				 */
+				entry->ie_bits &= ~MACH_PORT_TYPE_RECEIVE;
+				ipc_hash_insert(space, (ipc_object_t) port,
+				    name, entry);
+				ip_reference(port);
+			} else {
+				/*
+				 *	The remaining send right turns into a
+				 *	dead name.  Notice we don't decrement
+				 *	ip_srights, generate a no-senders notif,
+				 *	or use ipc_right_dncancel, because the
+				 *	port is destroyed "first".
+				 */
+				bits &= ~IE_BITS_TYPE_MASK;
+				bits |= MACH_PORT_TYPE_DEAD_NAME;
+				if (entry->ie_request) {
+					entry->ie_request = IE_REQ_NONE;
+					bits++;
+				}
+				entry->ie_bits = bits;
+				entry->ie_object = IO_NULL;
 			}
-			entry->ie_bits = bits;
-			entry->ie_object = IO_NULL;
 		} else {
 			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_RECEIVE);
 			assert(IE_BITS_UREFS(bits) == 0);
 
-			dnrequest = ipc_right_dncancel_macro(space, port,
+			request = ipc_right_request_cancel_macro(space, port,
 							     name, entry);
 			entry->ie_object = IO_NULL;
 			ipc_entry_dealloc(space, name, entry);
@@ -1076,13 +1104,13 @@ ipc_right_delta(
 		ipc_port_clear_receiver(port);
 		ipc_port_destroy(port);	/* consumes ref, unlocks */
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
 	    case MACH_PORT_RIGHT_SEND_ONCE: {
-		ipc_port_t port, dnrequest;
+		ipc_port_t port, request;
 
 		if ((bits & MACH_PORT_TYPE_SEND_ONCE) == 0)
 			goto invalid_right;
@@ -1111,7 +1139,7 @@ ipc_right_delta(
 			goto success;
 		}
 
-		dnrequest = ipc_right_dncancel_macro(space, port, name, entry);
+		request = ipc_right_request_cancel_macro(space, port, name, entry);
 		ip_unlock(port);
 
 		entry->ie_object = IO_NULL;
@@ -1121,8 +1149,8 @@ ipc_right_delta(
 
 		ipc_notify_send_once(port);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
@@ -1147,7 +1175,7 @@ ipc_right_delta(
 		assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
 		assert(IE_BITS_UREFS(bits) > 0);
 		assert(entry->ie_object == IO_NULL);
-		assert(entry->ie_request == 0);
+		assert(entry->ie_request == IE_REQ_NONE);
 
 		urefs = IE_BITS_UREFS(bits);
 		if (MACH_PORT_UREFS_UNDERFLOW(urefs, delta))
@@ -1169,7 +1197,7 @@ ipc_right_delta(
 	    case MACH_PORT_RIGHT_SEND: {
 		mach_port_urefs_t urefs;
 		ipc_port_t port;
-		ipc_port_t dnrequest = IP_NULL;
+		ipc_port_t request = IP_NULL;
 		ipc_port_t nsrequest = IP_NULL;
 		mach_port_mscount_t mscount = 0;
 
@@ -1220,7 +1248,7 @@ ipc_right_delta(
 				assert(IE_BITS_TYPE(bits) ==
 						MACH_PORT_TYPE_SEND);
 
-				dnrequest = ipc_right_dncancel_macro(space, port,
+				request = ipc_right_request_cancel_macro(space, port,
 								     name, entry);
 				ipc_hash_delete(space, (ipc_object_t) port,
 						name, entry);
@@ -1240,8 +1268,8 @@ ipc_right_delta(
 		if (nsrequest != IP_NULL)
 			ipc_notify_no_senders(nsrequest, mscount);
 
-		if (dnrequest != IP_NULL)
-			ipc_notify_port_deleted(dnrequest, name);
+		if (request != IP_NULL)
+			ipc_notify_port_deleted(request, name);
 		break;
 	    }
 
@@ -1287,27 +1315,42 @@ ipc_right_info(
 	mach_port_type_t	*typep,
 	mach_port_urefs_t	*urefsp)
 {
+	ipc_port_t port;
 	ipc_entry_bits_t bits;
-	mach_port_type_t type;
+	mach_port_type_t type = 0;
 	ipc_port_request_index_t request;
 
 	bits = entry->ie_bits;
+	request = entry->ie_request;
+	port = (ipc_port_t) entry->ie_object;
 
-	if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
-		ipc_port_t port = (ipc_port_t) entry->ie_object;
+	if (bits & MACH_PORT_TYPE_RECEIVE) {
+		assert(IP_VALID(port));
 
-		if (ipc_right_check(space, port, name, entry)) {
+		if (request != IE_REQ_NONE) {
+			ip_lock(port);
+			assert(ip_active(port));
+			type |= ipc_port_request_type(port, name, request);
+			ip_unlock(port);
+		}
+
+	} else if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
+		/*
+		 * validate port is still alive - if so, get request
+		 * types while we still have it locked.  Otherwise,
+		 * recapture the (now dead) bits.
+		 */
+		if (!ipc_right_check(space, port, name, entry)) {
+			if (request != IE_REQ_NONE)
+				type |= ipc_port_request_type(port, name, request);
+			ip_unlock(port);
+		} else {
 			bits = entry->ie_bits;
 			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
-		} else
-			ip_unlock(port);
+		}
 	}
 
-	type = IE_BITS_TYPE(bits);
-	request = entry->ie_request;
-
-	if (request != 0)
-		type |= MACH_PORT_TYPE_DNREQUEST;
+	type |= IE_BITS_TYPE(bits);
 
 	*typep = type;
 	*urefsp = IE_BITS_UREFS(bits);
@@ -1564,7 +1607,7 @@ ipc_right_copyin(
 
 	    case MACH_MSG_TYPE_MOVE_RECEIVE: {
 		ipc_port_t port;
-		ipc_port_t dnrequest = IP_NULL;
+		ipc_port_t request = IP_NULL;
 
 		if ((bits & MACH_PORT_TYPE_RECEIVE) == 0)
 			goto invalid_right;
@@ -1601,7 +1644,7 @@ ipc_right_copyin(
 			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_RECEIVE);
 			assert(IE_BITS_UREFS(bits) == 0);
 
-			dnrequest = ipc_right_dncancel_macro(space, port,
+			request = ipc_right_request_cancel_macro(space, port,
 							     name, entry);
 			entry->ie_object = IO_NULL;
 		}
@@ -1614,7 +1657,7 @@ ipc_right_copyin(
 		ip_unlock(port);
 
 		*objectp = (ipc_object_t) port;
-		*sorightp = dnrequest;
+		*sorightp = request;
 		break;
 	    }
 
@@ -1671,7 +1714,7 @@ ipc_right_copyin(
 
 	    case MACH_MSG_TYPE_MOVE_SEND: {
 		ipc_port_t port;
-		ipc_port_t dnrequest = IP_NULL;
+		ipc_port_t request = IP_NULL;
 
 		if (bits & MACH_PORT_TYPE_DEAD_NAME)
 			goto move_dead;
@@ -1725,7 +1768,7 @@ ipc_right_copyin(
 				assert(IE_BITS_TYPE(bits) ==
 						MACH_PORT_TYPE_SEND);
 
-				dnrequest = ipc_right_dncancel_macro(space, port,
+				request = ipc_right_request_cancel_macro(space, port,
 								     name, entry);
 				ipc_hash_delete(space, (ipc_object_t) port,
 						name, entry);
@@ -1742,13 +1785,13 @@ ipc_right_copyin(
 		ip_unlock(port);
 
 		*objectp = (ipc_object_t) port;
-		*sorightp = dnrequest;
+		*sorightp = request;
 		break;
 	    }
 
 	    case MACH_MSG_TYPE_MOVE_SEND_ONCE: {
 		ipc_port_t port;
-		ipc_port_t dnrequest;
+		ipc_port_t request;
 
 		if (bits & MACH_PORT_TYPE_DEAD_NAME)
 			goto move_dead;
@@ -1792,7 +1835,7 @@ ipc_right_copyin(
 		assert(IE_BITS_UREFS(bits) == 1);
 		assert(port->ip_sorights > 0);
 
-		dnrequest = ipc_right_dncancel_macro(space, port, name, entry);
+		request = ipc_right_request_cancel_macro(space, port, name, entry);
 		ip_unlock(port);
 
 		entry->ie_object = IO_NULL;
@@ -1800,7 +1843,7 @@ ipc_right_copyin(
 			(IE_BITS_UREFS_MASK | MACH_PORT_TYPE_SEND_ONCE);
 
 		*objectp = (ipc_object_t) port;
-		*sorightp = dnrequest;
+		*sorightp = request;
 		break;
 	    }
 
@@ -1814,7 +1857,7 @@ ipc_right_copyin(
     copy_dead:
 	assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
 	assert(IE_BITS_UREFS(bits) > 0);
-	assert(entry->ie_request == 0);
+	assert(entry->ie_request == IE_REQ_NONE);
 	assert(entry->ie_object == 0);
 
 	if (!deadok)
@@ -1827,7 +1870,7 @@ ipc_right_copyin(
     move_dead:
 	assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
 	assert(IE_BITS_UREFS(bits) > 0);
-	assert(entry->ie_request == 0);
+	assert(entry->ie_request == IE_REQ_NONE);
 	assert(entry->ie_object == 0);
 
 	if (!deadok)
@@ -1950,7 +1993,7 @@ ipc_right_copyin_two(
 	ipc_entry_bits_t bits;
 	mach_port_urefs_t urefs;
 	ipc_port_t port;
-	ipc_port_t dnrequest = IP_NULL;
+	ipc_port_t request = IP_NULL;
 #if CONFIG_MACF_MACH
 	task_t self = current_task();
 	int    rc;
@@ -2000,7 +2043,7 @@ ipc_right_copyin_two(
 		} else {
 			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND);
 
-			dnrequest = ipc_right_dncancel_macro(space, port,
+			request = ipc_right_request_cancel_macro(space, port,
 							     name, entry);
 
 			port->ip_srights++;
@@ -2019,7 +2062,7 @@ ipc_right_copyin_two(
 	ip_unlock(port);
 
 	*objectp = (ipc_object_t) port;
-	*sorightp = dnrequest;
+	*sorightp = request;
 	return KERN_SUCCESS;
 
     invalid_right:
@@ -2257,7 +2300,7 @@ ipc_right_rename(
 	 *	Note IE_BITS_COMPAT implies ie_request != 0.
 	 */
 
-	if (request != 0) {
+	if (request != IE_REQ_NONE) {
 		ipc_port_t port;
 
 		assert(bits & MACH_PORT_TYPE_PORT_RIGHTS);
@@ -2265,17 +2308,17 @@ ipc_right_rename(
 		assert(port != IP_NULL);
 
 		if (ipc_right_check(space, port, oname, oentry)) {
-			request = 0;
+			request = IE_REQ_NONE;
 			object = IO_NULL;
 			bits = oentry->ie_bits;
 			assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_DEAD_NAME);
-			assert(oentry->ie_request == 0);
+			assert(oentry->ie_request == IE_REQ_NONE);
 		} else {
 			/* port is locked and active */
 
-			ipc_port_dnrename(port, request, oname, nname);
+			ipc_port_request_rename(port, request, oname, nname);
 			ip_unlock(port);
-			oentry->ie_request = 0;
+			oentry->ie_request = IE_REQ_NONE;
 		}
 	}
 
@@ -2341,7 +2384,7 @@ ipc_right_rename(
 		panic("ipc_right_rename: strange rights");
 	}
 
-	assert(oentry->ie_request == 0);
+	assert(oentry->ie_request == IE_REQ_NONE);
 	oentry->ie_object = IO_NULL;
 	ipc_entry_dealloc(space, oname, oentry);
 	is_write_unlock(space);
diff --git a/osfmk/ipc/ipc_right.h b/osfmk/ipc/ipc_right.h
index b7affe7b3..8b12cd895 100644
--- a/osfmk/ipc/ipc_right.h
+++ b/osfmk/ipc/ipc_right.h
@@ -95,24 +95,25 @@ extern boolean_t ipc_right_reverse(
 	mach_port_name_t	*namep,
 	ipc_entry_t		*entryp);
 
-/* Make a dead-name request, returning the registered send-once right */
-extern kern_return_t ipc_right_dnrequest(
+/* Make a notification request, returning the previous send-once right */
+extern kern_return_t ipc_right_request_alloc(
 	ipc_space_t		space,
 	mach_port_name_t	name,
 	boolean_t		immediate,
+	boolean_t		send_possible,
 	ipc_port_t		notify,
 	ipc_port_t		*previousp);
 
-/* Cancel a dead-name request and return the send-once right */
-extern ipc_port_t ipc_right_dncancel(
+/* Cancel a notification request and return the send-once right */
+extern ipc_port_t ipc_right_request_cancel(
 	ipc_space_t		space,
 	ipc_port_t		port,
 	mach_port_name_t	name,
 	ipc_entry_t		entry);
 
-#define	ipc_right_dncancel_macro(space, port, name, entry)		\
-		 ((entry->ie_request == 0) ? IP_NULL :			\
-		 ipc_right_dncancel((space), (port), (name), (entry)))
+#define	ipc_right_request_cancel_macro(space, port, name, entry)		\
+		 ((entry->ie_request == IE_REQ_NONE) ? IP_NULL :		\
+		 ipc_right_request_cancel((space), (port), (name), (entry)))
 
 /* Check if an entry is being used */
 extern boolean_t ipc_right_inuse(
diff --git a/osfmk/ipc/ipc_space.c b/osfmk/ipc/ipc_space.c
index 434580250..1aaecc594 100644
--- a/osfmk/ipc/ipc_space.c
+++ b/osfmk/ipc/ipc_space.c
@@ -249,6 +249,11 @@ ipc_space_clean(
 	while (space->is_growing)
 		is_write_sleep(space);
 
+	if (!space->is_active) {
+		is_write_unlock(space);
+		return;
+	}
+
 	/*
 	 *	Now we can futz with it	since we have the write lock.
 	 */
diff --git a/osfmk/ipc/ipc_table.c b/osfmk/ipc/ipc_table.c
index bfbac619e..4e19f8844 100644
--- a/osfmk/ipc/ipc_table.c
+++ b/osfmk/ipc/ipc_table.c
@@ -90,8 +90,8 @@ extern vm_map_t kalloc_map;
 ipc_table_size_t ipc_table_entries;
 unsigned int ipc_table_entries_size = 512;
 
-ipc_table_size_t ipc_table_dnrequests;
-unsigned int ipc_table_dnrequests_size = 64;
+ipc_table_size_t ipc_table_requests;
+unsigned int ipc_table_requests_size = 64;
 
 void
 ipc_table_fill(
@@ -151,17 +151,17 @@ ipc_table_init(void)
 		ipc_table_entries[ipc_table_entries_size - 2].its_size;
 
 
-	ipc_table_dnrequests = (ipc_table_size_t)
+	ipc_table_requests = (ipc_table_size_t)
 		kalloc(sizeof(struct ipc_table_size) *
-		       ipc_table_dnrequests_size);
-	assert(ipc_table_dnrequests != ITS_NULL);
+		       ipc_table_requests_size);
+	assert(ipc_table_requests != ITS_NULL);
 
-	ipc_table_fill(ipc_table_dnrequests, ipc_table_dnrequests_size - 1,
+	ipc_table_fill(ipc_table_requests, ipc_table_requests_size - 1,
 		       2, sizeof(struct ipc_port_request));
 
 	/* the last element should have zero size */
 
-	ipc_table_dnrequests[ipc_table_dnrequests_size - 1].its_size = 0;
+	ipc_table_requests[ipc_table_requests_size - 1].its_size = 0;
 }
 
 /*
diff --git a/osfmk/ipc/ipc_table.h b/osfmk/ipc/ipc_table.h
index 1c5eb1bcf..fee56f778 100644
--- a/osfmk/ipc/ipc_table.h
+++ b/osfmk/ipc/ipc_table.h
@@ -106,7 +106,7 @@ struct ipc_table_size {
 };
 
 extern ipc_table_size_t ipc_table_entries;
-extern ipc_table_size_t ipc_table_dnrequests;
+extern ipc_table_size_t ipc_table_requests;
 
 /* Initialize IPC capabilities table storage */
 extern void ipc_table_init(void) __attribute__((section("__TEXT, initcode")));
@@ -161,12 +161,12 @@ extern void ipc_table_free(
 	    (void *)(table)					\
 	)
 
-#define	it_dnrequests_alloc(its)					\
+#define	it_requests_alloc(its)					\
 	((ipc_port_request_t)						\
 	 ipc_table_alloc((its)->its_size *				\
 			 sizeof(struct ipc_port_request)))
 
-#define	it_dnrequests_free(its, table)					\
+#define	it_requests_free(its, table)					\
 	ipc_table_free((its)->its_size *				\
 		       sizeof(struct ipc_port_request),			\
 		       (void *)(table))
diff --git a/osfmk/ipc/ipc_types.h b/osfmk/ipc/ipc_types.h
index a7ac3475e..5857e5ecf 100644
--- a/osfmk/ipc/ipc_types.h
+++ b/osfmk/ipc/ipc_types.h
@@ -70,6 +70,7 @@ typedef struct ipc_kmsg *ipc_kmsg_t;
 #define	IKM_NULL	((ipc_kmsg_t) 0)
 
 typedef	void (*mach_msg_continue_t)(mach_msg_return_t);	/* after wakeup */
+#define	MACH_MSG_CONTINUE_NULL	((mach_msg_continue_t) 0)
 
 #else	/* MACH_KERNEL_PRIVATE */
 
diff --git a/osfmk/ipc/mach_debug.c b/osfmk/ipc/mach_debug.c
index 04442f1fd..f255df6f5 100644
--- a/osfmk/ipc/mach_debug.c
+++ b/osfmk/ipc/mach_debug.c
@@ -326,8 +326,16 @@ mach_port_space_info(
 		iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits));
 		iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE;
 		iin->iin_type = IE_BITS_TYPE(bits);
-		if (entry->ie_request)
-			iin->iin_type |= MACH_PORT_TYPE_DNREQUEST;
+		if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
+		    entry->ie_request != IE_REQ_NONE) {
+			ipc_port_t port = (ipc_port_t) entry->ie_object;
+
+			assert(IP_VALID(port));
+			ip_lock(port);
+			iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request);
+			ip_unlock(port);
+		}
+
 		iin->iin_urefs = IE_BITS_UREFS(bits);
 		iin->iin_object = (natural_t)(uintptr_t)entry->ie_object;
 		iin->iin_next = entry->ie_next;
@@ -349,8 +357,16 @@ mach_port_space_info(
 		iin->iin_name = tentry->ite_name;
 		iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE;
 		iin->iin_type = IE_BITS_TYPE(bits);
-		if (entry->ie_request)
-			iin->iin_type |= MACH_PORT_TYPE_DNREQUEST;
+		if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE &&
+		    entry->ie_request != IE_REQ_NONE) {
+			ipc_port_t port = (ipc_port_t) entry->ie_object;
+
+			assert(IP_VALID(port));
+			ip_lock(port);
+			iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request);
+			ip_unlock(port);
+		}
+
 		iin->iin_urefs = IE_BITS_UREFS(bits);
 		iin->iin_object = (natural_t)(uintptr_t)entry->ie_object;
 		iin->iin_next = entry->ie_next;
@@ -456,18 +472,18 @@ mach_port_dnrequest_info(
 		return kr;
 	/* port is locked and active */
 
-	if (port->ip_dnrequests == IPR_NULL) {
+	if (port->ip_requests == IPR_NULL) {
 		total = 0;
 		used = 0;
 	} else {
-		ipc_port_request_t dnrequests = port->ip_dnrequests;
+		ipc_port_request_t requests = port->ip_requests;
 		ipc_port_request_index_t index;
 
-		total = dnrequests->ipr_size->its_size;
+		total = requests->ipr_size->its_size;
 
 		for (index = 1, used = 0;
 		     index < total; index++) {
-			ipc_port_request_t ipr = &dnrequests[index];
+			ipc_port_request_t ipr = &requests[index];
 
 			if (ipr->ipr_name != MACH_PORT_NULL)
 				used++;
diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c
index 8137915f0..b83ef8191 100644
--- a/osfmk/ipc/mach_msg.c
+++ b/osfmk/ipc/mach_msg.c
@@ -172,10 +172,6 @@ mach_msg_format_0_trailer_t trailer_template = {
  *		MACH_SEND_INVALID_REPLY	Can't copyin reply port.
  *		MACH_SEND_TIMED_OUT	Timeout expired without delivery.
  *		MACH_SEND_INTERRUPTED	Delivery interrupted.
- *		MACH_SEND_NO_NOTIFY	Can't allocate a msg-accepted request.
- *		MACH_SEND_WILL_NOTIFY	Msg-accepted notif. requested.
- *		MACH_SEND_NOTIFY_IN_PROGRESS
- *			This space has already forced a message to this port.
  */
 
 mach_msg_return_t
@@ -184,7 +180,7 @@ mach_msg_send(
 	mach_msg_option_t	option,
 	mach_msg_size_t		send_size,
 	mach_msg_timeout_t	send_timeout,
-	mach_port_name_t	notify)
+	__unused mach_port_name_t	notify)
 {
 	ipc_space_t space = current_space();
 	vm_map_t map = current_map();
@@ -222,20 +218,13 @@ mach_msg_send(
 	trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0;
 	trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE;
 	
-	if (option & MACH_SEND_CANCEL) {
-		if (notify == MACH_PORT_NULL)
-			mr = MACH_SEND_INVALID_NOTIFY;
-		else
-			mr = ipc_kmsg_copyin(kmsg, space, map, notify);
-	} else
-		mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL);
+	mr = ipc_kmsg_copyin(kmsg, space, map, option & MACH_SEND_NOTIFY);
 	if (mr != MACH_MSG_SUCCESS) {
 		ipc_kmsg_free(kmsg);
 		return mr;
 	}
 
 	mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, send_timeout);
-
 	if (mr != MACH_MSG_SUCCESS) {
 	    mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL);
 	    (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, 
@@ -247,7 +236,7 @@ mach_msg_send(
 }
 
 /*
- *	Routine:	mach_msg_receive
+ *	Routine:	mach_msg_receive_results
  *	Purpose:
  *		Receive a message.
  *	Conditions:
@@ -381,11 +370,10 @@ mach_msg_receive_results(void)
 		mach_msg_body_t *slist;
 
 		slist = ipc_kmsg_get_scatter(msg_addr, slist_size, kmsg);
-		mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL, slist);
+		mr = ipc_kmsg_copyout(kmsg, space, map, slist);
 		ipc_kmsg_free_scatter(slist, slist_size);
 	} else {
-		mr = ipc_kmsg_copyout(kmsg, space, map,
-				      MACH_PORT_NULL, MACH_MSG_BODY_NULL);
+		mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL);
 	}
 
 	if (mr != MACH_MSG_SUCCESS) {
@@ -473,7 +461,7 @@ mach_msg_overwrite_trap(
 	mach_msg_size_t		rcv_size = args->rcv_size;
 	mach_port_name_t	rcv_name = args->rcv_name;
 	mach_msg_timeout_t	msg_timeout = args->timeout;
-	mach_port_name_t	notify = args->notify;
+	__unused mach_port_name_t notify = args->notify;
 	mach_vm_address_t	rcv_msg_addr = args->rcv_msg;
         mach_msg_size_t		scatter_list_size = 0; /* NOT INITIALIZED - but not used in pactice */
 	__unused mach_port_seqno_t temp_seqno = 0;
@@ -490,13 +478,7 @@ mach_msg_overwrite_trap(
 		if (mr != MACH_MSG_SUCCESS)
 			return mr;
 
-		if (option & MACH_SEND_CANCEL) {
-			if (notify == MACH_PORT_NULL)
-				mr = MACH_SEND_INVALID_NOTIFY;
-			else
-				mr = ipc_kmsg_copyin(kmsg, space, map, notify);
-		} else
-			mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL);
+		mr = ipc_kmsg_copyin(kmsg, space, map, option & MACH_SEND_NOTIFY);
 		if (mr != MACH_MSG_SUCCESS) {
 			ipc_kmsg_free(kmsg);
 			return mr;
diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c
index 389e80bb1..adfc70bcb 100644
--- a/osfmk/ipc/mach_port.c
+++ b/osfmk/ipc/mach_port.c
@@ -128,6 +128,9 @@ static mach_port_qos_t	qos_template;
  *	Routine:	mach_port_names_helper
  *	Purpose:
  *		A helper function for mach_port_names.
+ *
+ *	Conditions:
+ *		Space containing entry is [at least] read-locked.
  */
 
 void
@@ -141,44 +144,51 @@ mach_port_names_helper(
 {
 	ipc_entry_bits_t bits;
 	ipc_port_request_index_t request;
-	mach_port_type_t type;
+	mach_port_type_t type = 0;
 	ipc_entry_num_t actual;
+	ipc_port_t port;
 
 	bits = entry->ie_bits;
 	request = entry->ie_request;
-	if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
-		ipc_port_t port;
-		boolean_t died;
+	port = (ipc_port_t) entry->ie_object;
 
-		port = (ipc_port_t) entry->ie_object;
-		assert(port != IP_NULL);
+	if (bits & MACH_PORT_TYPE_RECEIVE) {
+		assert(IP_VALID(port));
 
-		/*
-		 *	The timestamp serializes mach_port_names
-		 *	with ipc_port_destroy.  If the port died,
-		 *	but after mach_port_names started, pretend
-		 *	that it isn't dead.
-		 */
+		if (request != IE_REQ_NONE) {
+			ip_lock(port);
+			assert(ip_active(port));
+			type |= ipc_port_request_type(port, name, request);
+			ip_unlock(port);
+		}
 
-		ip_lock(port);
-		died = (!ip_active(port) &&
-			IP_TIMESTAMP_ORDER(port->ip_timestamp, timestamp));
-		ip_unlock(port);
+	} else if (bits & MACH_PORT_TYPE_SEND_RIGHTS) {
+		mach_port_type_t reqtype;
 
-		if (died) {
-			/* pretend this is a dead-name entry */
+		assert(IP_VALID(port));
+		ip_lock(port);
 
+		reqtype = (request != IE_REQ_NONE) ?
+			  ipc_port_request_type(port, name, request) : 0;
+		
+		/*
+		 * If the port is alive, or was alive when the mach_port_names
+		 * started, then return that fact.  Otherwise, pretend we found
+		 * a dead name entry.
+		 */
+		if (ip_active(port) || IP_TIMESTAMP_ORDER(timestamp, port->ip_timestamp)) {
+			type |= reqtype;
+		} else {
 			bits &= ~(IE_BITS_TYPE_MASK);
 			bits |= MACH_PORT_TYPE_DEAD_NAME;
-			if (request != 0)
+			/* account for additional reference for dead-name notification */
+			if (reqtype != 0)
 				bits++;
-			request = 0;
 		}
+		ip_unlock(port);
 	}
 
-	type = IE_BITS_TYPE(bits);
-	if (request != 0)
-		type |= MACH_PORT_TYPE_DNREQUEST;
+	type |= IE_BITS_TYPE(bits);
 
 	actual = *actualp;
 	names[actual] = name;
@@ -436,6 +446,11 @@ mach_port_type(
 	kr = ipc_right_info(space, name, entry, typep, &urefs);
 	if (kr == KERN_SUCCESS)
 		is_write_unlock(space);
+#if 1
+        /* JMM - workaround rdar://problem/9121297 (CF being too picky on these bits). */
+        *typep &= ~(MACH_PORT_TYPE_SPREQUEST | MACH_PORT_TYPE_SPREQUEST_DELAYED);
+#endif
+
 	/* space is unlocked */
 	return kr;
 }
@@ -1472,6 +1487,18 @@ mach_port_request_notification(
 		break;
 	    }
 
+	    case MACH_NOTIFY_SEND_POSSIBLE:
+
+	    	if (!MACH_PORT_VALID(name)) {
+	      		return KERN_INVALID_ARGUMENT;
+		}
+
+		kr = ipc_right_request_alloc(space, name, sync != 0,
+					     TRUE, notify, previousp);
+		if (kr != KERN_SUCCESS)
+			return kr;
+		break;
+
 	    case MACH_NOTIFY_DEAD_NAME:
 
 	    	if (!MACH_PORT_VALID(name)) {
@@ -1483,8 +1510,8 @@ mach_port_request_notification(
 	      		return KERN_INVALID_ARGUMENT;
 		}
 
-		kr = ipc_right_dnrequest(space, name, sync != 0,
-					 notify, previousp);
+		kr = ipc_right_request_alloc(space, name, sync != 0,
+					     FALSE, notify, previousp);
 		if (kr != KERN_SUCCESS)
 			return kr;
 		break;
@@ -1677,7 +1704,7 @@ mach_port_get_attributes(
                         return kr;
                 /* port is locked and active */
 		
-		table = port->ip_dnrequests;
+		table = port->ip_requests;
 		if (table == IPR_NULL)
 			*(int *)info = 0;
 		else
@@ -1744,7 +1771,7 @@ mach_port_set_attributes(
                         return kr;
                 /* port is locked and active */
 		
-		kr = ipc_port_dngrow(port, *(int *)info);
+		kr = ipc_port_request_grow(port, *(int *)info);
 		if (kr != KERN_SUCCESS)
 			return kr;
 		break;
@@ -1870,6 +1897,12 @@ task_set_port_space(
 	kern_return_t kr;
 	
 	is_write_lock(space);
+
+	if (!space->is_active) {
+		is_write_unlock(space);
+		return KERN_INVALID_TASK;
+	}
+
 	kr = ipc_entry_grow_table(space, table_entries);
 	if (kr == KERN_SUCCESS)
 		is_write_unlock(space);
diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c
index 5536038e7..7eb3459ac 100644
--- a/osfmk/kdp/kdp.c
+++ b/osfmk/kdp/kdp.c
@@ -37,11 +37,13 @@
 #include <kdp/kdp_dyld.h>
 
 #include <libsa/types.h>
+#include <libkern/version.h>
 
 #include <string.h> /* bcopy */
 
 #include <kern/processor.h>
 #include <kern/thread.h>
+#include <kern/clock.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 
@@ -151,8 +153,6 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t);
 extern void bcopy_phys(addr64_t, addr64_t, vm_size_t);
 
-extern char version[];
-
 boolean_t
 kdp_packet(
     unsigned char	*pkt,
@@ -247,10 +247,10 @@ kdp_connect(
 	    rp->error = KDPERR_ALREADY_CONNECTED;
     }
     else { 
-	kdp.reply_port     = rport;
-	kdp.exception_port = eport;
-	kdp.is_conn        = TRUE;
-	kdp.conn_seq       = seq;
+	    kdp.reply_port     = rport;
+	    kdp.exception_port = eport;
+	    kdp.is_conn        = TRUE;
+	    kdp.conn_seq       = seq;
         kdp.session_key    = key;
 
 	rp->error = KDPERR_NO_ERROR;
@@ -375,7 +375,7 @@ kdp_kernelversion(
     rp->hdr.len = sizeof (*rp);
 	
     dprintf(("kdp_kernelversion\n"));
-	slen = strlcpy(rp->version, version, MAX_KDP_DATA_SIZE);
+	slen = strlcpy(rp->version, kdp_kernelversion_string, MAX_KDP_DATA_SIZE);
 	
 	rp->hdr.len += slen + 1; /* strlcpy returns the amount copied with NUL */
 	
@@ -547,8 +547,8 @@ kdp_readmem(
     size_t		plen = *len;
     kdp_readmem_reply_t *rp = &pkt->readmem_reply;
     mach_vm_size_t			cnt;
-#if __i386__ || __arm__
-    void		*pversion = &version;
+#if __i386__
+    void		*pversion = &kdp_kernelversion_string;
 #endif
 
     if (plen < sizeof (*rq))
@@ -563,9 +563,9 @@ kdp_readmem(
 	unsigned int	n = rq->nbytes;
 
 	dprintf(("kdp_readmem addr %x size %d\n", rq->address, n));
-#if __i386__ || __arm__
+#if __i386__
 	/* XXX This is a hack to facilitate the "showversion" macro
-	 * on i386/ARM, which is used to obtain the kernel version without
+	 * on i386, which is used to obtain the kernel version without
 	 * symbols - a pointer to the version string should eventually
 	 * be pinned at a fixed address when an equivalent of the
 	 * VECTORS segment (loaded at a fixed load address, and contains
@@ -1066,6 +1066,42 @@ kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) {
 	return (rem == 0);
 }
 
+
+static void
+kdp_mem_snapshot(struct mem_snapshot *mem_snap)
+{
+  mem_snap->snapshot_magic = STACKSHOT_MEM_SNAPSHOT_MAGIC;
+  mem_snap->free_pages = vm_page_free_count;
+  mem_snap->active_pages = vm_page_active_count;
+  mem_snap->inactive_pages = vm_page_inactive_count;
+  mem_snap->purgeable_pages = vm_page_purgeable_count;
+  mem_snap->wired_pages = vm_page_wire_count;
+  mem_snap->speculative_pages = vm_page_speculative_count;
+  mem_snap->throttled_pages = vm_page_throttled_count;
+}
+
+
+/* 
+ * Method for grabbing timer values safely, in the sense that no infinite loop will occur 
+ * Certain flavors of the timer_grab function, which would seem to be the thing to use,   
+ * can loop infinitely if called while the timer is in the process of being updated.      
+ * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of    
+ * the timer using this method. This seems insoluble, since stackshot runs in a context   
+ * where the timer might be half-updated, and has no way of yielding control just long    
+ * enough to finish the update.                                                           
+ */
+
+static uint64_t safe_grab_timer_value(struct timer *t)
+{
+#if   defined(__LP64__)
+  return t->all_bits;
+#else
+  uint64_t time = t->high_bits;    /* endian independent grab */
+  time = (time << 32) | t->low_bits;
+  return time;
+#endif
+}
+
 int
 kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
 {
@@ -1080,41 +1116,74 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 	unsigned framesize = 2 * sizeof(vm_offset_t);
 	struct task ctask;
 	struct thread cthread;
+	struct _vm_map cmap;
+	struct pmap cpmap;
+
+	queue_head_t *task_list = &tasks;
+	boolean_t is_active_list = TRUE;
 	
 	boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
 	boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
 
-	queue_iterate(&tasks, task, task_t, tasks) {
+	if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
+	  if(tracepos + sizeof(struct mem_snapshot) > tracebound) {
+	    error = -1;
+	    goto error_exit;
+	  }
+	  kdp_mem_snapshot((struct mem_snapshot *)tracepos);
+	  tracepos += sizeof(struct mem_snapshot);
+	}
+
+walk_list:
+	queue_iterate(task_list, task, task_t, tasks) {
 		if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task)))
 			goto error_exit;
 
 		int task_pid = pid_from_task(task);
 		boolean_t task64 = task_has_64BitAddr(task);
 
+		if (!task->active) {
+			/* 
+			 * Not interested in terminated tasks without threads, and
+			 * at the moment, stackshot can't handle a task  without a name.
+			 */
+			if (queue_empty(&task->threads) || task_pid == -1) {
+				continue;
+			}
+		}
+
 		/* Trace everything, unless a process was specified */
 		if ((pid == -1) || (pid == task_pid)) {
 			task_snapshot_t task_snap;
-			uint32_t uuid_info_count;
-			mach_vm_address_t uuid_info_addr;
-
-			if (save_loadinfo_p && task_pid > 0) {
+			uint32_t uuid_info_count = 0;
+			mach_vm_address_t uuid_info_addr = 0;
+			boolean_t have_map = (task->map != NULL) && 
+			  (ml_nofault_copy((vm_offset_t)(task->map), (vm_offset_t)&cmap, sizeof(struct _vm_map)) == sizeof(struct _vm_map));
+			boolean_t have_pmap = have_map && (cmap.pmap != NULL) &&
+			  (ml_nofault_copy((vm_offset_t)(cmap.pmap), (vm_offset_t)&cpmap, sizeof(struct pmap)) == sizeof(struct pmap));
+
+			if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) {
 				// Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
 				if (task64) {
 					struct dyld_all_image_infos64 task_image_infos;
-					if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos64)))
-						goto error_exit;
-					uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
-					uuid_info_addr = task_image_infos.uuidArray;
+					if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos64))) {
+						uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
+						uuid_info_addr = task_image_infos.uuidArray;
+					}
 				} else {
 					struct dyld_all_image_infos task_image_infos;
-					if (!kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos)))
-						goto error_exit;
-					uuid_info_count = task_image_infos.uuidArrayCount;
-					uuid_info_addr = task_image_infos.uuidArray;
+					if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct dyld_all_image_infos))) {
+						uuid_info_count = task_image_infos.uuidArrayCount;
+						uuid_info_addr = task_image_infos.uuidArray;
+					}
+				}
+
+				// If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
+				// this data structure), we zero the uuid_info_count so that we won't even try to save load info
+				// for this task.
+				if (!uuid_info_addr) {
+					uuid_info_count = 0;
 				}
-			} else {
-				uuid_info_count = 0;
-				uuid_info_addr = 0;
 			}
 
 			if (tracepos + sizeof(struct task_snapshot) > tracebound) {
@@ -1134,7 +1203,17 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 			task_snap->ss_flags = 0;
 			if (task64)
 				task_snap->ss_flags |= kUser64_p;
+			if (!task->active) 
+				task_snap->ss_flags |= kTerminatedSnapshot;
+
+			task_snap->suspend_count = task->suspend_count;
+			task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0;
+			task_snap->faults = task->faults;
+			task_snap->pageins = task->pageins;
+			task_snap->cow_faults = task->cow_faults;
 			
+			task_snap->user_time_in_terminated_threads = task->total_user_time;
+			task_snap->system_time_in_terminated_threads = task->total_system_time;
 			tracepos += sizeof(struct task_snapshot);
 
 			if (task_pid > 0 && uuid_info_count > 0) {
@@ -1147,10 +1226,11 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 				}
 
 				// Copy in the UUID info array
-				if (!kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
-					goto error_exit;
-
-				tracepos += uuid_info_array_size;
+				// It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap
+				if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
+					task_snap->nloadinfos = 0;
+				else
+					tracepos += uuid_info_array_size;
 			}
 
 			queue_iterate(&task->threads, thread, thread_t, task_threads){
@@ -1167,12 +1247,13 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 				tsnap->state = thread->state;
 				tsnap->wait_event = thread->wait_event;
 				tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation;
-
+				tsnap->user_time = safe_grab_timer_value(&thread->user_timer);
+				tsnap->system_time = safe_grab_timer_value(&thread->system_timer);
 				tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
 				tracepos += sizeof(struct thread_snapshot);
 				tsnap->ss_flags = 0;
 
-				if (dispatch_p && (task != kernel_task) && (task->active) && (task->map)) {
+				if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
 					uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
 					if (dqkeyaddr != 0) {
 						uint64_t dqaddr = 0;
@@ -1190,6 +1271,7 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 /* Call through to the machine specific trace routines
  * Frames are added past the snapshot header.
  */
+				tracebytes = 0;
 				if (thread->kernel_stack != 0) {
 #if defined(__LP64__)					
 					tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
@@ -1204,7 +1286,7 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 				tracepos += tracebytes;
 				tracebytes = 0;
 				/* Trace user stack, if any */
-				if (thread->task->map != kernel_map) {
+				if (task->active && thread->task->map != kernel_map) {
 					/* 64-bit task? */
 					if (task_has_64BitAddr(thread->task)) {
 						tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
@@ -1223,6 +1305,12 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl
 		}
 	}
 
+	if (is_active_list) { 
+		is_active_list = FALSE;
+		task_list = &terminated_tasks;
+		goto walk_list;
+	}
+
 error_exit:
 	/* Release stack snapshot wait indicator */
 	kdp_snapshot_postflight();
diff --git a/osfmk/kdp/kdp_core.h b/osfmk/kdp/kdp_core.h
index 99a278720..d99f2bbcd 100644
--- a/osfmk/kdp/kdp_core.h
+++ b/osfmk/kdp/kdp_core.h
@@ -44,12 +44,11 @@
 #define	KDP_ERROR 5			/* error code */
 #define KDP_SEEK  6                     /* Seek to specified offset */
 #define KDP_EOF   7                     /* signal end of file */
-
-#if	defined(__LP64__)
 #define KDP_FEATURE_MASK_STRING		"features"
-enum	{KDP_FEATURE_LARGE_CRASHDUMPS = 1};
-extern	uint32_t	kdp_crashdump_feature_mask;
-#endif
+
+enum	{KDP_FEATURE_LARGE_CRASHDUMPS = 1, KDP_FEATURE_LARGE_PKT_SIZE = 2};
+extern	uint32_t	kdp_feature_large_crashdumps, kdp_feature_large_pkt_size;
+
 struct	corehdr {
 	short	th_opcode;		/* packet type */
 	union {
@@ -57,7 +56,7 @@ struct	corehdr {
 		unsigned int	tu_code;	/* error code */
 		char	tu_rpl[1];	/* request packet payload */
 	} th_u;
-	char	th_data[1];		/* data or error string */
+	char	th_data[0];		/* data or error string */
 }__attribute__((packed));
 
 #define	th_block	th_u.tu_block
@@ -93,4 +92,6 @@ int 	kdp_send_crashdump_pkt(unsigned int request, char *corename,
 				uint64_t length, void *panic_data);
 
 int	kdp_send_crashdump_data(unsigned int request, char *corename,
-				uint64_t length, caddr_t txstart);
+				int64_t length, caddr_t txstart);
+
+#define KDP_CRASHDUMP_POLL_COUNT (2500)
diff --git a/osfmk/kdp/kdp_dyld.h b/osfmk/kdp/kdp_dyld.h
index ef228574e..910565f2e 100644
--- a/osfmk/kdp/kdp_dyld.h
+++ b/osfmk/kdp/kdp_dyld.h
@@ -81,4 +81,4 @@ struct dyld_all_image_infos64 {
 	user64_addr_t				systemOrderFlag;
 	user64_size_t				uuidArrayCount; // dyld defines this as a uintptr_t despite it being a count
 	user64_addr_t				uuidArray;
-};
+};
\ No newline at end of file
diff --git a/osfmk/kdp/kdp_en_debugger.h b/osfmk/kdp/kdp_en_debugger.h
index dd63d30e2..c8a99822f 100644
--- a/osfmk/kdp/kdp_en_debugger.h
+++ b/osfmk/kdp/kdp_en_debugger.h
@@ -33,6 +33,7 @@
 typedef void (*kdp_send_t)(void * pkt, unsigned int pkt_len);
 typedef void (*kdp_receive_t)(void * pkt, unsigned int * pkt_len, 
 	      unsigned int timeout);
+
 void 
 kdp_register_send_receive(kdp_send_t send, kdp_receive_t receive);
 
diff --git a/osfmk/kdp/kdp_private.h b/osfmk/kdp/kdp_private.h
index 07e5123ff..bcd2f3399 100644
--- a/osfmk/kdp/kdp_private.h
+++ b/osfmk/kdp/kdp_private.h
@@ -29,6 +29,7 @@
 /*
  * Private functions for kdp.c
  */
+extern char kdp_kernelversion_string[];
 
 static boolean_t
 kdp_unknown(
diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c
index 3b298fe6e..22bf8978a 100644
--- a/osfmk/kdp/kdp_udp.c
+++ b/osfmk/kdp/kdp_udp.c
@@ -57,19 +57,33 @@
 #include <vm/vm_kern.h> /* kernel_map */
 
 #include <mach/memory_object_types.h>
+#include <machine/pal_routines.h>
 
 #include <sys/msgbuf.h>
 
+/* we just want the link status flags, so undef KERNEL_PRIVATE for this
+ * header file. */
+#undef KERNEL_PRIVATE
+#include <net/if_media.h> 
+#define KERNEL_PRIVATE
+
 #include <string.h>
 
-#define DO_ALIGN	1	/* align all packet data accesses */
+#include <IOKit/IOPlatformExpert.h>
+#include <libkern/version.h>
+
+#define DO_ALIGN	1	      /* align all packet data accesses */
+#define KDP_SERIAL_IPADDR  0xABADBABE /* IP address used for serial KDP */
+#define LINK_UP_STATUS     (IFM_AVALID | IFM_ACTIVE)
 
 extern int kdp_getc(void);
 extern int reattach_wait;
 
-extern int serial_getc(void);
-extern void serial_putc(char);
-extern int serial_init(void);
+/* only used by IONetworkingFamily */
+typedef uint32_t (*kdp_link_t)(void);
+typedef boolean_t (*kdp_mode_t)(boolean_t);
+void 	kdp_register_link(kdp_link_t link, kdp_mode_t mode);
+void 	kdp_unregister_link(kdp_link_t link, kdp_mode_t mode);
 
 static u_short ip_id;                          /* ip packet ctr, for ids */
 
@@ -115,9 +129,17 @@ static const char
 
 volatile int kdp_flag = 0;
 
-static kdp_send_t kdp_en_send_pkt;
+static kdp_send_t    kdp_en_send_pkt;
 static kdp_receive_t kdp_en_recv_pkt;
+static kdp_link_t    kdp_en_linkstatus;
+static kdp_mode_t    kdp_en_setmode;
 
+#if CONFIG_SERIAL_KDP
+static void kdp_serial_send(void *rpkt, unsigned int rpkt_len);
+#define KDP_SERIAL_ENABLED()  (kdp_en_send_pkt == kdp_serial_send)
+#else
+#define KDP_SERIAL_ENABLED()  (0)
+#endif
 
 static uint32_t kdp_current_ip_address = 0;
 static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}};
@@ -130,6 +152,8 @@ static uint32_t parsed_router_ip = 0;
 static uint32_t router_ip = 0;
 static uint32_t target_ip = 0;
 
+static boolean_t save_ip_in_nvram = FALSE;
+
 static volatile boolean_t panicd_specified = FALSE;
 static boolean_t router_specified = FALSE;
 static boolean_t corename_specified = FALSE;
@@ -151,8 +175,10 @@ static boolean_t flag_arp_resolved = FALSE;
 static unsigned int panic_timeout = 100000;
 static unsigned int last_panic_port = CORE_REMOTE_PORT;
 
-unsigned int SEGSIZE = 512;
+#define KDP_THROTTLE_VALUE       (10ULL * NSEC_PER_SEC)
 
+uint32_t kdp_crashdump_pkt_size = 512;
+#define KDP_LARGE_CRASHDUMP_PKT_SIZE (1440 - 6 - sizeof(struct udpiphdr))
 static char panicd_ip_str[20];
 static char router_ip_str[20];
 static char corename_str[50];
@@ -169,10 +195,13 @@ extern void 		kdp_call(void);
 extern boolean_t 	kdp_call_kdb(void);
 extern int 		kern_dump(void);
 
+extern int inet_aton(const char *cp, struct in_addr *pin);
+extern int inet_ntoa2(struct in_addr * pin, char * cp, const int len);
+
 void *	kdp_get_interface(void);
-void 	kdp_set_gateway_mac(void *);
-void 	kdp_set_ip_and_mac_addresses(struct in_addr *, struct ether_addr *);
-void 	kdp_set_interface(void *);
+void    kdp_set_gateway_mac(void *gatewaymac);
+void 	kdp_set_ip_and_mac_addresses(struct in_addr *ipaddr, struct ether_addr *);
+void 	kdp_set_interface(void *interface, const struct ether_addr *macaddr);
 
 void 			kdp_disable_arp(void);
 static void 		kdp_arp_reply(struct ether_arp *);
@@ -180,10 +209,11 @@ static void 		kdp_process_arp_reply(struct ether_arp *);
 static boolean_t 	kdp_arp_resolve(uint32_t, struct ether_addr *);
 
 static volatile unsigned	kdp_reentry_deadline;
-#if	defined(__LP64__)
-uint32_t kdp_crashdump_feature_mask = KDP_FEATURE_LARGE_CRASHDUMPS;
-static uint32_t	kdp_feature_large_crashdumps;
-#endif
+
+static uint32_t kdp_crashdump_feature_mask = KDP_FEATURE_LARGE_CRASHDUMPS | KDP_FEATURE_LARGE_PKT_SIZE;
+uint32_t kdp_feature_large_crashdumps, kdp_feature_large_pkt_size;
+
+char kdp_kernelversion_string[256];
 
 static boolean_t	gKDPDebug = FALSE;
 #define KDP_DEBUG(...) if (gKDPDebug) printf(__VA_ARGS__);
@@ -200,6 +230,13 @@ static uint32_t stack_snapshot_dispatch_offset;
 
 static unsigned int old_debugger;
 
+#define SBLOCKSZ (2048)
+uint64_t kdp_dump_start_time = 0;
+uint64_t kdp_min_superblock_dump_time = ~1ULL;
+uint64_t kdp_max_superblock_dump_time = 0;
+uint64_t kdp_superblock_dump_time = 0;
+uint64_t kdp_superblock_dump_start_time = 0;
+
 void
 kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
     uint32_t flags, uint32_t dispatch_offset);
@@ -231,6 +268,53 @@ kdp_timer_callout_init(void) {
 }
 
 
+/* only send/receive data if the link is up */
+inline static void wait_for_link(void)
+{
+    static int first = 0;
+
+    if (!kdp_en_linkstatus)
+        return;
+
+    while (((*kdp_en_linkstatus)() & LINK_UP_STATUS) != LINK_UP_STATUS) {
+        if (first)
+            continue;
+
+        first = 1;
+        printf("Waiting for link to become available.\n");
+        kprintf("Waiting for link to become available.\n");
+    }
+}
+
+
+inline static void kdp_send_data(void *packet, unsigned int len)
+{
+    wait_for_link();
+    (*kdp_en_send_pkt)(packet, len);
+}
+
+
+inline static void kdp_receive_data(void *packet, unsigned int *len,
+                                    unsigned int timeout)
+{
+    wait_for_link();
+    (*kdp_en_recv_pkt)(packet, len, timeout);
+}
+
+
+
+void kdp_register_link(kdp_link_t link, kdp_mode_t mode)
+{
+        kdp_en_linkstatus = link;
+        kdp_en_setmode    = mode;
+}
+
+void kdp_unregister_link(__unused kdp_link_t link, __unused kdp_mode_t mode)
+{
+        kdp_en_linkstatus = NULL;
+        kdp_en_setmode    = NULL;
+}
+
 void
 kdp_register_send_receive(
 	kdp_send_t	send, 
@@ -243,15 +327,14 @@ kdp_register_send_receive(
 	kdp_timer_callout_init();
 
 	PE_parse_boot_argn("debug", &debug, sizeof (debug));
-#if	defined(__LP64__)
 	kdp_crashdump_feature_mask = htonl(kdp_crashdump_feature_mask);
-#endif
+
 
 	if (!debug)
 		return;
 
-	kdp_en_send_pkt = send;
-	kdp_en_recv_pkt = receive;
+	kdp_en_send_pkt   = send;
+	kdp_en_recv_pkt   = receive;
 
 	if (debug & DB_KDP_BP_DIS)
 		kdp_flag |= KDP_BP_DIS;   
@@ -303,8 +386,8 @@ kdp_unregister_send_receive(
 	if (current_debugger == KDP_CUR_DB)
 		current_debugger = NO_CUR_DB;
 	kdp_flag &= ~KDP_READY;
-	kdp_en_send_pkt = NULL;
-	kdp_en_recv_pkt = NULL;
+	kdp_en_send_pkt   = NULL;
+	kdp_en_recv_pkt   = NULL;
 }
 
 /* Cache stack snapshot parameters in preparation for a trace */
@@ -449,10 +532,11 @@ kdp_reply(
 	pkt.len += (unsigned int)sizeof (struct ether_header);
     
 	// save reply for possible retransmission
+	assert(pkt.len <= KDP_MAXPACKET);
 	if (!sideband)
-		bcopy((char *)&pkt, (char *)&saved_reply, sizeof(pkt));
+		bcopy((char *)&pkt, (char *)&saved_reply, sizeof(saved_reply));
 
-	(*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len);
+	kdp_send_data(&pkt.data[pkt.off], pkt.len);
 
 	// increment expected sequence number
 	if (!sideband) 
@@ -515,15 +599,66 @@ kdp_send(
     eh->ether_type = htons(ETHERTYPE_IP);
     
     pkt.len += (unsigned int)sizeof (struct ether_header);
-    (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len);
+    kdp_send_data(&pkt.data[pkt.off], pkt.len);
 }
 
-/* We don't interpret this pointer, we just give it to the
-bsd stack so it can decide when to set the MAC and IP info. */
+
+inline static void debugger_if_necessary(void)
+{
+    if ((current_debugger == KDP_CUR_DB) && halt_in_debugger) {
+        kdp_call();
+        halt_in_debugger=0;
+    }
+}
+
+
+/* We don't interpret this pointer, we just give it to the bsd stack
+   so it can decide when to set the MAC and IP info. We'll
+   early initialize the MAC/IP info if we can so that we can use
+   KDP early in boot. These values may subsequently get over-written
+   when the interface gets initialized for real.
+*/
 void
-kdp_set_interface(void *ifp)
+kdp_set_interface(void *ifp, const struct ether_addr *macaddr)
 {
+	char kdpstr[80];
+        struct in_addr addr = { 0 };
+        unsigned int len;
+        
 	kdp_current_ifp = ifp;
+
+        if (PE_parse_boot_argn("kdp_ip_addr", kdpstr, sizeof(kdpstr))) {
+            /* look for a static ip address */
+            if (inet_aton(kdpstr, &addr) == FALSE)
+                goto done;
+
+            goto config_network;
+        }
+
+        /* use saved ip address */
+        save_ip_in_nvram = TRUE;
+
+        len = sizeof(kdpstr);
+        if (PEReadNVRAMProperty("_kdp_ipstr", kdpstr, &len) == FALSE)
+            goto done;
+
+        kdpstr[len < sizeof(kdpstr) ? len : sizeof(kdpstr) - 1] = '\0';
+        if (inet_aton(kdpstr, &addr) == FALSE)
+            goto done;
+
+config_network:
+        kdp_current_ip_address = addr.s_addr;
+        if (macaddr)
+            kdp_current_mac_address = *macaddr;
+
+        /* we can't drop into the debugger at this point because the
+           link will likely not be up. when getDebuggerLinkStatus() support gets
+           added to the appropriate network drivers, adding the
+           following will enable this capability:
+           debugger_if_necessary();
+        */
+done:
+        return;
 }
 
 void *
@@ -537,19 +672,48 @@ kdp_set_ip_and_mac_addresses(
 	struct in_addr		*ipaddr, 
 	struct ether_addr	*macaddr)
 {
-	kdp_current_ip_address = ipaddr->s_addr;
-	kdp_current_mac_address = *macaddr;
-	if ((current_debugger == KDP_CUR_DB) && halt_in_debugger) {
-		kdp_call();
-		halt_in_debugger=0;
-	}
+        static uint64_t last_time    = (uint64_t) -1;
+        static uint64_t throttle_val = 0;
+        uint64_t cur_time;
+        char addr[16];
+
+        if (kdp_current_ip_address == ipaddr->s_addr) 
+            goto done;
+
+        /* don't replace if serial debugging is configured */
+        if (!KDP_SERIAL_ENABLED() ||
+            (kdp_current_ip_address != KDP_SERIAL_IPADDR)) {
+            kdp_current_mac_address = *macaddr;
+            kdp_current_ip_address  = ipaddr->s_addr;
+        }
+
+        if (save_ip_in_nvram == FALSE)
+            goto done;
+
+        if (inet_ntoa2(ipaddr, addr, sizeof(addr)) == FALSE)
+            goto done;
+
+        /* throttle writes if needed */
+        if (!throttle_val)
+            nanoseconds_to_absolutetime(KDP_THROTTLE_VALUE, &throttle_val);
+
+        cur_time = mach_absolute_time();
+        if (last_time == (uint64_t) -1 ||
+            ((cur_time - last_time) > throttle_val)) {
+            PEWriteNVRAMProperty("_kdp_ipstr", addr, 
+                                 (const unsigned int) strlen(addr));
+        }
+        last_time = cur_time;
+
+done:
+        debugger_if_necessary();
 }
 
 void
 kdp_set_gateway_mac(void *gatewaymac)
 {
-  router_mac = *(struct ether_addr *)gatewaymac;
-  flag_router_mac_initialized = TRUE;
+    router_mac = *(struct ether_addr *)gatewaymac;
+    flag_router_mac_initialized = TRUE;
 } 
 
 struct ether_addr 
@@ -657,7 +821,7 @@ kdp_arp_reply(struct ether_arp *ea)
 		(void)memcpy(&pkt.data[pkt.off], ea, sizeof(*ea));
 		pkt.off -= (unsigned int)sizeof (struct ether_header);
 		/* pkt.len is still the length we want, ether_header+ether_arp */
-		(*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len);
+		kdp_send_data(&pkt.data[pkt.off], pkt.len);
 	}
 }
 
@@ -681,7 +845,7 @@ kdp_poll(void)
 	}
 
 	pkt.off = pkt.len = 0;
-	(*kdp_en_recv_pkt)(pkt.data, &pkt.len, 3/* ms */);
+	kdp_receive_data(pkt.data, &pkt.len, 3/* ms */);
 
 	if (pkt.len == 0)
 		return;
@@ -795,7 +959,7 @@ transmit_ARP_request(uint32_t ip_addr)
 	pkt.off = 0;
 	pkt.len = sizeof(struct ether_header) + sizeof(struct ether_arp);
 	/* Transmit */
-	(*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len);
+	kdp_send_data(&pkt.data[pkt.off], pkt.len);
 }
 
 static boolean_t
@@ -878,8 +1042,8 @@ kdp_handler(
 	// check for retransmitted request
 	if (hdr->seq == (exception_seq - 1)) {
 	    /* retransmit last reply */
-	    (*kdp_en_send_pkt)(&saved_reply.data[saved_reply.off],
-			    saved_reply.len);
+	    kdp_send_data(&saved_reply.data[saved_reply.off],
+                          saved_reply.len);
 	    goto again;
 	} else if ((hdr->seq != exception_seq) &&
                    (hdr->request != KDP_CONNECT)) {
@@ -946,33 +1110,38 @@ kdp_connection_wait(void)
 	 * the panic.log
 	 */
 
-	printf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
-            kdp_mac_addr.ether_addr_octet[0] & 0xff,
-            kdp_mac_addr.ether_addr_octet[1] & 0xff,
-            kdp_mac_addr.ether_addr_octet[2] & 0xff,
-            kdp_mac_addr.ether_addr_octet[3] & 0xff,
-            kdp_mac_addr.ether_addr_octet[4] & 0xff,
-            kdp_mac_addr.ether_addr_octet[5] & 0xff);
+        if (KDP_SERIAL_ENABLED()) {
+            printf("Using serial KDP.\n");
+            kprintf("Using serial KDP.\n");
+        } else {
+            printf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                    kdp_mac_addr.ether_addr_octet[0] & 0xff,
+                    kdp_mac_addr.ether_addr_octet[1] & 0xff,
+                    kdp_mac_addr.ether_addr_octet[2] & 0xff,
+                    kdp_mac_addr.ether_addr_octet[3] & 0xff,
+                    kdp_mac_addr.ether_addr_octet[4] & 0xff,
+                    kdp_mac_addr.ether_addr_octet[5] & 0xff);
 		
-	kprintf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
-			kdp_mac_addr.ether_addr_octet[0] & 0xff,
-			kdp_mac_addr.ether_addr_octet[1] & 0xff,
-			kdp_mac_addr.ether_addr_octet[2] & 0xff,
-			kdp_mac_addr.ether_addr_octet[3] & 0xff,
-			kdp_mac_addr.ether_addr_octet[4] & 0xff,
-			kdp_mac_addr.ether_addr_octet[5] & 0xff);
+            kprintf( "ethernet MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+                     kdp_mac_addr.ether_addr_octet[0] & 0xff,
+                     kdp_mac_addr.ether_addr_octet[1] & 0xff,
+                     kdp_mac_addr.ether_addr_octet[2] & 0xff,
+                     kdp_mac_addr.ether_addr_octet[3] & 0xff,
+                     kdp_mac_addr.ether_addr_octet[4] & 0xff,
+                     kdp_mac_addr.ether_addr_octet[5] & 0xff);
 		
-	printf( "ip address: %d.%d.%d.%d\n",
-            (ip_addr & 0xff000000) >> 24,
-            (ip_addr & 0xff0000) >> 16,
-            (ip_addr & 0xff00) >> 8,
-            (ip_addr & 0xff));
+            printf( "ip address: %d.%d.%d.%d\n",
+                    (ip_addr & 0xff000000) >> 24,
+                    (ip_addr & 0xff0000) >> 16,
+                    (ip_addr & 0xff00) >> 8,
+                    (ip_addr & 0xff));
             
-	kprintf( "ip address: %d.%d.%d.%d\n",
-			(ip_addr & 0xff000000) >> 24,
-			(ip_addr & 0xff0000) >> 16,
-			(ip_addr & 0xff00) >> 8,
-			(ip_addr & 0xff));
+            kprintf( "ip address: %d.%d.%d.%d\n",
+                     (ip_addr & 0xff000000) >> 24,
+                     (ip_addr & 0xff0000) >> 16,
+                     (ip_addr & 0xff00) >> 8,
+                     (ip_addr & 0xff));
+        }
             
 	printf("\nWaiting for remote debugger connection.\n");
 
@@ -1145,10 +1314,12 @@ kdp_raise_exception(
     kdp.kdp_cpu = cpu_number();
     kdp.kdp_thread = current_thread();
 
+    if (kdp_en_setmode)  
+        (*kdp_en_setmode)(TRUE); /* enabling link mode */
+
     if (pkt.input)
 	kdp_panic("kdp_raise_exception");
 
-	    
     if (((kdp_flag & KDP_PANIC_DUMP_ENABLED) || (kdp_flag & PANIC_LOG_DUMP))
 	&& (panicstr != (char *) 0)) {
 	    kdp_panic_dump();
@@ -1223,6 +1394,8 @@ kdp_raise_exception(
       goto again;
 
 exit_raise_exception:
+    if (kdp_en_setmode)  
+        (*kdp_en_setmode)(FALSE); /* link cleanup */
     enable_preemption();
 }
 
@@ -1245,11 +1418,9 @@ create_panic_header(unsigned int request, const char *corename,
 	struct corehdr		*coreh;
 	const char		*mode = "octet";
 	char			modelen  = strlen(mode);
-#if defined(__LP64__)	
+
 	size_t			fmask_size = sizeof(KDP_FEATURE_MASK_STRING) + sizeof(kdp_crashdump_feature_mask);
-#else
-	size_t			fmask_size = 0;
-#endif
+
 	pkt.off = sizeof (struct ether_header);
 	pkt.len = (unsigned int)(length + ((request == KDP_WRQ) ? modelen + fmask_size : 0) + 
 	    (corename ? strlen(corename): 0) + sizeof(struct corehdr));
@@ -1303,11 +1474,13 @@ create_panic_header(unsigned int request, const char *corename,
 		*cp++ = '\0';
 		cp += strlcpy (cp, mode, KDP_MAXPACKET - strlen(corename));
 		*cp++ = '\0';
-#if defined(__LP64__)
 		cp += strlcpy(cp, KDP_FEATURE_MASK_STRING, sizeof(KDP_FEATURE_MASK_STRING));
 		*cp++ = '\0'; /* Redundant */
 		bcopy(&kdp_crashdump_feature_mask, cp, sizeof(kdp_crashdump_feature_mask));
-#endif
+		kdp_crashdump_pkt_size = KDP_LARGE_CRASHDUMP_PKT_SIZE;
+		PE_parse_boot_argn("kdp_crashdump_pkt_size", &kdp_crashdump_pkt_size, sizeof(kdp_crashdump_pkt_size));
+		cp += sizeof(kdp_crashdump_feature_mask);
+		*(uint32_t *)cp = htonl(kdp_crashdump_pkt_size);
 	}
 	else
 	{
@@ -1330,14 +1503,11 @@ static int kdp_send_crashdump_seek(char *corename, uint64_t seek_off)
 {
 	int panic_error;
 
-#if defined(__LP64__)
 	if (kdp_feature_large_crashdumps) {
 		panic_error = kdp_send_crashdump_pkt(KDP_SEEK, corename, 
 						     sizeof(seek_off),
 						     &seek_off);
-	} else
-#endif
-	{
+	} else {
 		uint32_t off = (uint32_t) seek_off;
 		panic_error = kdp_send_crashdump_pkt(KDP_SEEK, corename, 
 						     sizeof(off), &off);
@@ -1353,40 +1523,44 @@ static int kdp_send_crashdump_seek(char *corename, uint64_t seek_off)
 }
 
 int kdp_send_crashdump_data(unsigned int request, char *corename,
-    uint64_t length, caddr_t txstart)
+    int64_t length, caddr_t txstart)
 {
 	int panic_error = 0;
 
 	while (length > 0) {
-		uint64_t chunk = MIN(SEGSIZE, length);
-		
+		uint64_t chunk = MIN(kdp_crashdump_pkt_size, length);
+
 		panic_error = kdp_send_crashdump_pkt(request, corename, chunk,
-						     (caddr_t) txstart);
+							txstart);
 		if (panic_error < 0) {
 			printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
 			return panic_error;
 		}
 
-		if (!(panic_block % 2000))
-			kdb_printf_unbuffered(".");
-
 		txstart += chunk;
 		length  -= chunk;
 	}
 	return 0;
 }
 
+uint32_t kdp_crashdump_short_pkt;
+
 int
 kdp_send_crashdump_pkt(unsigned int request, char *corename, 
     uint64_t length, void *panic_data)
 {
+	int poll_count;
 	struct corehdr *th = NULL;
-	int poll_count = 2500;
-  
-	char rretries = 0, tretries = 0;
+	char rretries, tretries;
+
+	if (kdp_dump_start_time == 0) {
+		kdp_dump_start_time = mach_absolute_time();
+		kdp_superblock_dump_start_time = kdp_dump_start_time;
+	}
 
+	tretries = rretries = 0;
+	poll_count = KDP_CRASHDUMP_POLL_COUNT;
 	pkt.off = pkt.len = 0;
-  
 	if (request == KDP_WRQ) /* longer timeout for initial request */
 		poll_count += 1000;
 
@@ -1409,27 +1583,34 @@ TRANSMIT_RETRY:
 	th = create_panic_header(request, corename, (unsigned)length, panic_block);
 
 	if (request == KDP_DATA) {
-		/* as all packets are SEGSIZE in length, the last packet
+		/* as all packets are kdp_crashdump_pkt_size in length, the last packet
 		 * may end up with trailing bits. make sure that those
 		 * bits aren't confusing. */
-		if (length < SEGSIZE)
-			memset(th->th_data + length, 'X', 
-                               SEGSIZE - (uint32_t) length);
+		if (length < kdp_crashdump_pkt_size) {
+			kdp_crashdump_short_pkt++;
+			memset(th->th_data + length, 'Y', 
+                               kdp_crashdump_pkt_size - (uint32_t) length);
+		}
 
-		if (!kdp_machine_vm_read((mach_vm_address_t)(intptr_t)panic_data, (caddr_t) th->th_data, length)) {
-			memset ((caddr_t) th->th_data, 'X', (size_t)length);
+		if (!kdp_machine_vm_read((mach_vm_address_t)(uintptr_t)panic_data, (caddr_t) th->th_data, length)) {
+			uintptr_t next_page = round_page((uintptr_t)panic_data);
+			memset((caddr_t) th->th_data, 'X', (size_t)length);
+			if ((next_page - ((uintptr_t) panic_data)) < length) {
+				uint64_t resid = length - (next_page - (intptr_t) panic_data);
+				if (!kdp_machine_vm_read((mach_vm_address_t)(uintptr_t)next_page, (caddr_t) th->th_data + (length - resid), resid)) {
+					memset((caddr_t) th->th_data + (length - resid), 'X', (size_t)resid);
+				}
+			}
 		}
 	}
 	else if (request == KDP_SEEK) {
-#if defined(__LP64__)
 		if (kdp_feature_large_crashdumps)
 			*(uint64_t *) th->th_data = OSSwapHostToBigInt64((*(uint64_t *) panic_data));
 		else
-#endif
-		*(unsigned int *) th->th_data = htonl(*(unsigned int *) panic_data);
+			*(unsigned int *) th->th_data = htonl(*(unsigned int *) panic_data);
 	}
 
-	(*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len);
+	kdp_send_data(&pkt.data[pkt.off], pkt.len);
 
 	/* Listen for the ACK */
 RECEIVE_RETRY:
@@ -1443,17 +1624,22 @@ RECEIVE_RETRY:
 		pkt.input = FALSE;
     
 		th = (struct corehdr *) &pkt.data[pkt.off];
-#if	defined(__LP64__)
 		if (request == KDP_WRQ) {
 			uint16_t opcode64 = ntohs(th->th_opcode);
 			uint16_t features64 = (opcode64 & 0xFF00)>>8;
 			if ((opcode64 & 0xFF) == KDP_ACK) {
 				kdp_feature_large_crashdumps = features64 & KDP_FEATURE_LARGE_CRASHDUMPS;
+				if (features64 & KDP_FEATURE_LARGE_PKT_SIZE) {
+					kdp_feature_large_pkt_size = 1;
+				}
+				else {
+					kdp_feature_large_pkt_size = 0;
+					kdp_crashdump_pkt_size = 512;
+				}
 				printf("Protocol features: 0x%x\n", (uint32_t) features64);
 				th->th_opcode = htons(KDP_ACK);
 			}
 		}
-#endif
 		if (ntohs(th->th_opcode) == KDP_ACK && ntohl(th->th_block) == panic_block) {
 		}
 		else
@@ -1485,12 +1671,25 @@ RECEIVE_RETRY:
 				kdp_us_spin ((tretries%4) * panic_timeout); /* capped linear backoff */
 				goto TRANSMIT_RETRY;
 			}
-  
-	panic_block++;
-  
-	if (request == KDP_EOF)
+
+	if (!(++panic_block % SBLOCKSZ)) {
+		uint64_t ctime;
+		kdb_printf_unbuffered(".");
+		ctime = mach_absolute_time();
+		kdp_superblock_dump_time = ctime - kdp_superblock_dump_start_time;
+		kdp_superblock_dump_start_time = ctime;
+		if (kdp_superblock_dump_time > kdp_max_superblock_dump_time)
+			kdp_max_superblock_dump_time = kdp_superblock_dump_time;
+		if (kdp_superblock_dump_time < kdp_min_superblock_dump_time)
+			kdp_min_superblock_dump_time = kdp_superblock_dump_time;
+	}
+
+	if (request == KDP_EOF) {
 		printf("\nTotal number of packets transmitted: %d\n", panic_block);
-  
+		printf("Avg. superblock transfer abstime 0x%llx\n", ((mach_absolute_time() - kdp_dump_start_time) / panic_block) * SBLOCKSZ);
+		printf("Minimum superblock transfer abstime: 0x%llx\n", kdp_min_superblock_dump_time);
+		printf("Maximum superblock transfer abstime: 0x%llx\n", kdp_max_superblock_dump_time);
+	}
 	return 1;
 }
 
@@ -1521,8 +1720,6 @@ strnstr(char *s, const char *find, size_t slen)
   return (s);
 }
 
-extern char version[];
-
 /* Horrid hack to extract xnu version if possible - a much cleaner approach
  * would be to have the integrator run a script which would copy the
  * xnu version into a string or an int somewhere at project submission
@@ -1541,10 +1738,9 @@ kdp_get_xnu_version(char *versionbuf)
 	char *vptr;
 
 	strlcpy(vstr, "custom", 10);
-
 	if (kdp_machine_vm_read((mach_vm_address_t)(uintptr_t)version, versionbuf, 128)) {
-               versionbuf[127] = '\0';
-               versionpos = strnstr(versionbuf, "xnu-", 115);
+		versionbuf[127] = '\0';
+		versionpos = strnstr(versionbuf, "xnu-", 115);
 		if (versionpos) {
 			strncpy(vstr, versionpos, sizeof(vstr));
 			vstr[sizeof(vstr)-1] = '\0';
@@ -1562,8 +1758,6 @@ kdp_get_xnu_version(char *versionbuf)
 	return retval;
 }
 
-extern char *inet_aton(const char *cp, struct in_addr *pin);
-
 void
 kdp_set_dump_info(const uint32_t flags, const char *filename, 
                   const char *destipstr, const char *routeripstr,
@@ -1685,23 +1879,23 @@ kdp_panic_dump(void)
 	char coreprefix[10];
 	int panic_error;
 
-	uint64_t 	abstime;
+	uint64_t        abstime;
 	uint32_t	current_ip = ntohl((uint32_t)kdp_current_ip_address);
 
 	if (flag_panic_dump_in_progress) {
-		printf("System dump aborted.\n");
+		kdb_printf("System dump aborted.\n");
 		goto panic_dump_exit;
 	}
 		
 	printf("Entering system dump routine\n");
 
 	if (!kdp_en_recv_pkt || !kdp_en_send_pkt) {
-		printf("Error: No transport device registered for kernel crashdump\n");
-		return;
+			kdb_printf("Error: No transport device registered for kernel crashdump\n");
+			return;
 	}
 
 	if (!panicd_specified) {
-		printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n");
+		kdb_printf("A dump server was not specified in the boot-args, terminating kernel core dump.\n");
 		goto panic_dump_exit;
 	}
 
@@ -1734,27 +1928,27 @@ kdp_panic_dump(void)
         }
 
 	if (0 == inet_aton(panicd_ip_str, (struct in_addr *) &panic_server_ip)) {
-		printf("inet_aton() failed interpreting %s as a panic server IP\n", panicd_ip_str);
+		kdb_printf("inet_aton() failed interpreting %s as a panic server IP\n", panicd_ip_str);
 	}
 	else
-		printf("Attempting connection to panic server configured at IP %s, port %d\n", panicd_ip_str, panicd_port);
+		kdb_printf("Attempting connection to panic server configured at IP %s, port %d\n", panicd_ip_str, panicd_port);
 
 	destination_mac = router_mac;
 
 	if (kdp_arp_resolve(panic_server_ip, &temp_mac)) {
-		printf("Resolved %s's (or proxy's) link level address\n", panicd_ip_str);
+		kdb_printf("Resolved %s's (or proxy's) link level address\n", panicd_ip_str);
 		destination_mac = temp_mac;
 	}
 	else {
 		if (!flag_panic_dump_in_progress) goto panic_dump_exit;
 		if (router_specified) {
 			if (0 == inet_aton(router_ip_str, (struct in_addr *) &parsed_router_ip))
-				printf("inet_aton() failed interpreting %s as an IP\n", router_ip_str);
+				kdb_printf("inet_aton() failed interpreting %s as an IP\n", router_ip_str);
 			else {
 				router_ip = parsed_router_ip;
 				if (kdp_arp_resolve(router_ip, &temp_mac)) {
 					destination_mac = temp_mac;
-					printf("Routing through specified router IP %s (%d)\n", router_ip_str, router_ip);
+					kdb_printf("Routing through specified router IP %s (%d)\n", router_ip_str, router_ip);
 				}
 			}
 		}
@@ -1762,7 +1956,7 @@ kdp_panic_dump(void)
 
 	if (!flag_panic_dump_in_progress) goto panic_dump_exit;
 
-	printf("Transmitting packets to link level address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+	kdb_printf("Transmitting packets to link level address: %02x:%02x:%02x:%02x:%02x:%02x\n",
 	    destination_mac.ether_addr_octet[0] & 0xff,
 	    destination_mac.ether_addr_octet[1] & 0xff,
 	    destination_mac.ether_addr_octet[2] & 0xff,
@@ -1770,17 +1964,17 @@ kdp_panic_dump(void)
 	    destination_mac.ether_addr_octet[4] & 0xff,
 	    destination_mac.ether_addr_octet[5] & 0xff);
 
-	printf("Kernel map size is %llu\n", (unsigned long long) get_vmmap_size(kernel_map));
-	printf("Sending write request for %s\n", corename_str);  
+	kdb_printf("Kernel map size is %llu\n", (unsigned long long) get_vmmap_size(kernel_map));
+	kdb_printf("Sending write request for %s\n", corename_str);  
 
 	if ((panic_error = kdp_send_crashdump_pkt(KDP_WRQ, corename_str, 0 , NULL)) < 0) {
-		printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
+		kdb_printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
 		goto panic_dump_exit;
 	}
 
 	/* Just the panic log requested */
 	if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) {
-		printf("Transmitting panic log, please wait: ");
+		kdb_printf_unbuffered("Transmitting panic log, please wait: ");
 		kdp_send_crashdump_data(KDP_DATA, corename_str, 
 					debug_buf_ptr - debug_buf,
 					debug_buf);
@@ -1794,15 +1988,13 @@ kdp_panic_dump(void)
 		long start_off = msgbufp->msg_bufx;
                 long len;
 
-		printf("Transmitting system log, please wait: ");
+		kdb_printf_unbuffered("Transmitting system log, please wait: ");
 		if (start_off >= msgbufp->msg_bufr) {
 			len = msgbufp->msg_size - start_off;
 			kdp_send_crashdump_data(KDP_DATA, corename_str, len, 
 						msgbufp->msg_bufc + start_off);
-
 			/* seek to remove trailing bytes */
-			if (len & (SEGSIZE - 1))
-				kdp_send_crashdump_seek(corename_str, len);
+			kdp_send_crashdump_seek(corename_str, len);
 			start_off  = 0;
 		}
 
@@ -1843,14 +2035,8 @@ static boolean_t needs_serial_init = TRUE;
 static void
 kdp_serial_send(void *rpkt, unsigned int rpkt_len)
 {
-	if (needs_serial_init)
-	{
-	    serial_init();
-	    needs_serial_init = FALSE;
-	}
-	
 	//	printf("tx\n");
-	kdp_serialize_packet((unsigned char *)rpkt, rpkt_len, serial_putc);
+	kdp_serialize_packet((unsigned char *)rpkt, rpkt_len, pal_serial_putc);
 }
 
 static void 
@@ -1859,18 +2045,12 @@ kdp_serial_receive(void *rpkt, unsigned int *rpkt_len, unsigned int timeout)
 	int readkar;
 	uint64_t now, deadline;
 	
-	if (needs_serial_init)
-	{
-	    serial_init();
-	    needs_serial_init = FALSE;
-	}
-	
 	clock_interval_to_deadline(timeout, 1000 * 1000 /* milliseconds */, &deadline);
 
 //	printf("rx\n");
 	for(clock_get_uptime(&now); now < deadline; clock_get_uptime(&now))
 	{
-		readkar = serial_getc();
+		readkar = pal_serial_getc();
 		if(readkar >= 0)
 		{
 			unsigned char *packet;
@@ -1885,6 +2065,21 @@ kdp_serial_receive(void *rpkt, unsigned int *rpkt_len, unsigned int timeout)
 	*rpkt_len = 0;
 }
 
+static boolean_t
+kdp_serial_setmode(boolean_t active)
+{
+        if (active == FALSE) /* leaving KDP */
+            return TRUE;
+
+	if (!needs_serial_init)
+            return TRUE;
+
+        pal_serial_init();
+        needs_serial_init = FALSE;
+        return TRUE;
+}
+
+
 static void kdp_serial_callout(__unused void *arg, kdp_event_t event)
 {
     /* When we stop KDP, set the bit to re-initialize the console serial port
@@ -1912,6 +2107,21 @@ static void kdp_serial_callout(__unused void *arg, kdp_event_t event)
 void
 kdp_init(void)
 {
+	strlcpy(kdp_kernelversion_string, version, sizeof(kdp_kernelversion_string));
+
+	/* Relies on platform layer calling panic_init() before kdp_init() */
+	if (kernel_uuid[0] != '\0') {
+		/*
+		 * Update kdp_kernelversion_string with our UUID
+		 * generated at link time.
+		 */
+
+		strlcat(kdp_kernelversion_string, "; UUID=", sizeof(kdp_kernelversion_string));
+		strlcat(kdp_kernelversion_string, kernel_uuid, sizeof(kdp_kernelversion_string));
+	}
+
+	if (debug_boot_arg & DB_REBOOT_POST_CORE)
+		kdp_flag |= REBOOT_POST_CORE;
 #if CONFIG_SERIAL_KDP
 	char kdpname[80];
 	struct in_addr ipaddr;
@@ -1928,9 +2138,10 @@ kdp_init(void)
 		return;
 #endif
 	
-	kprintf("Intializing serial KDP\n");
+	kprintf("Initializing serial KDP\n");
 
 	kdp_register_callout(kdp_serial_callout, NULL);
+        kdp_register_link(NULL, kdp_serial_setmode);
 	kdp_register_send_receive(kdp_serial_send, kdp_serial_receive);
 	
 	/* fake up an ip and mac for early serial debugging */
@@ -1940,7 +2151,8 @@ kdp_init(void)
 	macaddr.ether_addr_octet[3] = 'i';
 	macaddr.ether_addr_octet[4] = 'a';
 	macaddr.ether_addr_octet[5] = 'l';
-	ipaddr.s_addr = 0xABADBABE;
+	ipaddr.s_addr = KDP_SERIAL_IPADDR;
 	kdp_set_ip_and_mac_addresses(&ipaddr, &macaddr);
+        
 #endif /* CONFIG_SERIAL_KDP */
 }
diff --git a/osfmk/kdp/ml/i386/kdp_vm.c b/osfmk/kdp/ml/i386/kdp_vm.c
index 752db7b2b..5633c73b9 100644
--- a/osfmk/kdp/ml/i386/kdp_vm.c
+++ b/osfmk/kdp/ml/i386/kdp_vm.c
@@ -44,9 +44,6 @@
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
 
-extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB;
-extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK;
-
 int	kern_dump(void);
 int	kdp_dump_trap(int type, x86_saved_state32_t *regs);
 
@@ -156,15 +153,16 @@ kern_dump(void)
 	vm_map_t	map;
 	unsigned int	thread_count, segment_count;
 	unsigned int	command_size = 0, header_size = 0, tstate_size = 0;
-	unsigned int	hoffset = 0, foffset = 0, nfoffset = 0,  vmoffset = 0;
-	unsigned int	max_header_size = 0;
-	vm_offset_t	header;
+
+	uint64_t        hoffset = 0, foffset = 0, nfoffset = 0, max_header_size;
+	vm_offset_t     header, txstart;                                        
+	vm_address_t    vmoffset;                                               
+
 	struct mach_header	*mh;
 	struct segment_command	*sc;
 	vm_size_t	size;
 	vm_prot_t	prot = 0;
 	vm_prot_t	maxprot = 0;
-	vm_inherit_t	inherit = 0;
 	mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
 	vm_size_t	nflavors;
 	vm_size_t	i;
@@ -176,9 +174,6 @@ kern_dump(void)
 
 	int error = 0;
 	int panic_error = 0;
-	unsigned int txstart = 0;
-	unsigned int mach_section_count = 4;
-	unsigned int num_sects_txed = 0;
 
 	map = kernel_map;
 
@@ -194,7 +189,7 @@ kern_dump(void)
 		tstate_size += sizeof(mythread_state_flavor_t) +
 		    (flavors[i].count * sizeof(int));
 
-	command_size = (segment_count + mach_section_count) *
+	command_size = (segment_count) *
 	    sizeof(struct segment_command) +
 	    thread_count * sizeof(struct thread_command) +
 	    tstate_size * thread_count;
@@ -212,7 +207,7 @@ kern_dump(void)
 	mh->cputype = cpu_type();
 	mh->cpusubtype = cpu_subtype();
 	mh->filetype = MH_CORE;
-	mh->ncmds = segment_count + thread_count + mach_section_count;
+	mh->ncmds = segment_count + thread_count;
 	mh->sizeofcmds = command_size;
 	mh->flags = 0;
 
@@ -225,7 +220,7 @@ kern_dump(void)
 
 	max_header_size = foffset;
 
-	vmoffset = VM_MIN_ADDRESS;		/* offset into VM */
+	vmoffset = VM_MIN_KERNEL_ADDRESS;		/* offset into VM */
 
 	/* Transmit the Mach-O MH_CORE header, and seek forward past the 
 	 * area reserved for the segment and thread commands 
@@ -249,64 +244,36 @@ kern_dump(void)
 		error = panic_error;
 		goto out;
 	}
-	printf ("Transmitting kernel state, please wait: ");
-
-	while ((segment_count > 0) || (kret == KERN_SUCCESS)){
-		/* Check if we've transmitted all the kernel sections */
-		if (num_sects_txed == mach_section_count) {
-
-			while (1) {
-
-				/*
-				 *	Get region information for next region.
-				 */
-
-				vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
-				if((kret = vm_region_recurse_64(map, 
-					    &vmoffset, &size, &nesting_depth, 
-					    (vm_region_recurse_info_t)&vbr,
-					    &vbrcount)) != KERN_SUCCESS) {
-					break;
-				}
-
-				if(vbr.is_submap) {
-					nesting_depth++;
-					continue;
-				} else {
-					break;
-				}
-			}
+	printf ("Transmitting kernel state:\n");
 
-			if(kret != KERN_SUCCESS)
-				break;
+	while ((segment_count > 0) || (kret == KERN_SUCCESS)) {
+		while (1) {
 
-			prot = vbr.protection;
-			maxprot = vbr.max_protection;
-			inherit = vbr.inheritance;
-		}
-		else
-		{
-			switch (num_sects_txed) {
-			case 0:
-				/* Transmit the kernel text section */
-				vmoffset = sectTEXTB;
-				size = sectSizeTEXT;
-				break;
-			case 1:
-				vmoffset = sectDATAB;
-				size = sectSizeDATA;
-				break;
-			case 2:
-				vmoffset = sectPRELINKB;
-				size = sectSizePRELINK;
+			/*
+			 *	Get region information for next region.
+			 */
+
+			vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
+			if((kret = vm_region_recurse_64(map, 
+				    &vmoffset, &size, &nesting_depth, 
+				    (vm_region_recurse_info_t)&vbr,
+				    &vbrcount)) != KERN_SUCCESS) {
 				break;
-			case 3:
-				vmoffset = sectLINKB;
-				size = sectSizeLINK;
+			}
+
+			if(vbr.is_submap) {
+				nesting_depth++;
+				continue;
+			} else {
 				break;
 			}
-			num_sects_txed++;
 		}
+
+		if(kret != KERN_SUCCESS)
+			break;
+
+		prot = vbr.protection;
+		maxprot = vbr.max_protection;
 		/*
 		 *	Fill in segment command structure.
 		 */
@@ -319,7 +286,7 @@ kern_dump(void)
 		sc->segname[0] = 0;
 		sc->vmaddr = vmoffset;
 		sc->vmsize = size;
-		sc->fileoff = foffset;
+		sc->fileoff = (uint32_t) foffset;                               
 		sc->filesize = size;
 		sc->maxprot = maxprot;
 		sc->initprot = prot;
@@ -392,8 +359,7 @@ kern_dump(void)
 	}
     
 	/* last packet */
-	if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0)
-	{
+	if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0) {
 		printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
 		error = panic_error;
 		goto out;
diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.c b/osfmk/kdp/ml/i386/kdp_x86_common.c
index 8f08df116..221d683ac 100644
--- a/osfmk/kdp/ml/i386/kdp_x86_common.c
+++ b/osfmk/kdp/ml/i386/kdp_x86_common.c
@@ -44,6 +44,8 @@
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
 
+#include <machine/pal_routines.h>
+
 // #define KDP_VM_READ_DEBUG 1
 // #define KDP_VM_WRITE_DEBUG 1
 
@@ -73,8 +75,8 @@ kdp_vtophys(
 mach_vm_size_t
 kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len)
 {
-	addr64_t cur_virt_src = (addr64_t)src;
-	addr64_t cur_virt_dst = (addr64_t)(intptr_t)dst;
+	addr64_t cur_virt_src = PAL_KDP_ADDR((addr64_t)src);
+	addr64_t cur_virt_dst = PAL_KDP_ADDR((addr64_t)(intptr_t)dst);
 	addr64_t cur_phys_dst, cur_phys_src;
 	mach_vm_size_t resid = len;
 	mach_vm_size_t cnt = 0, cnt_src, cnt_dst;
@@ -201,8 +203,8 @@ kdp_machine_vm_write( caddr_t src, mach_vm_address_t dst, mach_vm_size_t len)
 	printf("kdp_vm_write: src %p dst %llx len %llx - %08X %08X\n", (void *)src, dst, len, ((unsigned int *)src)[0], ((unsigned int *)src)[1]);
 #endif
 
-	cur_virt_src = (addr64_t)(intptr_t)src;
-	cur_virt_dst = (addr64_t)dst;
+	cur_virt_src = PAL_KDP_ADDR((addr64_t)(intptr_t)src);
+	cur_virt_dst = PAL_KDP_ADDR((addr64_t)dst);
 
 	resid = (unsigned)len;
 
diff --git a/osfmk/kdp/ml/ppc/kdp_asm.s b/osfmk/kdp/ml/ppc/kdp_asm.s
deleted file mode 100644
index cdc0cfc5f..000000000
--- a/osfmk/kdp/ml/ppc/kdp_asm.s
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
-#include <mach_debug.h>
-#include <assym.s>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <mach/ppc/vm_param.h>
-
-/* void kdp_call_with_ctx(int type, struct ppc_thread_state *ssp)
- *
- * Switch on kdp stack and  enter the debugger. On return,
- * switch back to the previous stack
- *
- * If the kdp stack is not free, we allocate ourselves a frame below
- * the current kdp frame. This should never occur in a perfect world.
- */
-
-ENTRY(kdp_call_with_ctx, TAG_NO_FRAME_USED)
-	
-	lis		r2,hi16(MASK(MSR_VEC))	; Get the vector enable
-	mfmsr	r7					; Get the MSR
-	ori		r2,r2,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Get FP and EE
-	mflr	r0
-	andc	r7,r7,r2			; Clear FP, VEC, and EE
-	mtmsr	r7
-	isync										; Need this because we may have ditched fp/vec
-	mfsprg	r8,0				/* Get the per_proc block address */
-	stw	r0,	FM_LR_SAVE(r1)		/* save lr in the current frame */
-	
-	lwz	r9,	PP_DEBSTACKPTR(r8)	/* get kdp stack pointer */
-	cmpwi	r9,	0
-	bne	0f
-
-#ifdef	LET_KDP_REENTER
-	mr	r9,	r1 			/* get current stack pointer */
-	subi	r9,	r9,	FM_REDZONE + FM_SIZE
-#else
-	bl	EXT(kdp_print_backtrace)
-#endif
-
-0:
-	stw	r1,	FM_ARG0(r9)			/* Store old stack pointer */
-	li	r0,	0
-	stw	r0,	PP_DEBSTACKPTR(r8)	/* Mark kdp stack as busy */
-	
-	subi	r1,	r9,	FM_SIZE
-	stw	r0,	FM_BACKPTR(r1)
-	
-	bl	EXT(kdp_trap)
-
-	lis		r2,hi16(MASK(MSR_VEC))		; Get the vector enable
-	mfmsr	r0					/* Get the MSR */
-	ori		r2,r2,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Get FP and EE
-	addi	r1,	r1,	FM_SIZE
-	andc	r0,r0,r2			; Clear FP, VEC, and EE
-	mtmsr	r0
-	isync						; Need this because we may have ditched fp/vec
-
-	mfsprg	r8,0				/* Get the per_proc block address */
-	
-	stw	r1,	PP_DEBSTACKPTR(r8)	/* Mark gdb stack as free */
-	lwz	r1,	FM_ARG0(r1)
-	lwz	r0,	FM_LR_SAVE(r1)
-	mtlr	r0
-
-	blr
-
-
diff --git a/osfmk/kdp/ml/ppc/kdp_machdep.c b/osfmk/kdp/ml/ppc/kdp_machdep.c
deleted file mode 100644
index e1e89331d..000000000
--- a/osfmk/kdp/ml/ppc/kdp_machdep.c
+++ /dev/null
@@ -1,827 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- 
-#include <mach/mach_types.h>
-#include <mach/exception_types.h>
-#include <ppc/exception.h>
-#include <ppc/proc_reg.h>
-#include <kdp/kdp_internal.h>
-#include <ppc/savearea.h>
-#include <ppc/misc_protos.h>
-#include <kern/debug.h>
-#include <IOKit/IOPlatformExpert.h>
-
-#include <kern/thread.h>
-#include <ppc/thread.h>
-#include <vm/vm_map.h>
-#include <ppc/pmap.h>
-
-#define KDP_TEST_HARNESS 0
-#if KDP_TEST_HARNESS
-#define dprintf(x) kprintf x
-#else
-#define dprintf(x)
-#endif
-
-void print_saved_state(void *);
-void kdp_call(void);
-int kdp_getc(void);
-boolean_t kdp_call_kdb(void);
-
-extern pmap_t kdp_pmap;
-
-int
-machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
-
-int
-machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
-
-unsigned
-machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len);
-
-void
-kdp_exception(
-    unsigned char	*pkt,
-    int	*len,
-    unsigned short	*remote_port,
-    unsigned int	exception,
-    unsigned int	code,
-    unsigned int	subcode
-)
-{
-    struct {
-    	kdp_exception_t	pkt;
-	kdp_exc_info_t	exc;
-    }			aligned_pkt;
-    kdp_exception_t	*rq = (kdp_exception_t *)&aligned_pkt;
-
-    bcopy((char *)pkt, (char *)rq, sizeof(*rq));
-    rq->hdr.request = KDP_EXCEPTION;
-    rq->hdr.is_reply = 0;
-    rq->hdr.seq = kdp.exception_seq;
-    rq->hdr.key = 0;
-    rq->hdr.len = sizeof (*rq) + sizeof(kdp_exc_info_t);
-    
-    rq->n_exc_info = 1;
-    rq->exc_info[0].cpu = 0;
-    rq->exc_info[0].exception = exception;
-    rq->exc_info[0].code = code;
-    rq->exc_info[0].subcode = subcode;
-    
-    rq->hdr.len += rq->n_exc_info * sizeof (kdp_exc_info_t);
-    
-    bcopy((char *)rq, (char *)pkt, rq->hdr.len);
-
-    kdp.exception_ack_needed = TRUE;
-    
-    *remote_port = kdp.exception_port;
-    *len = rq->hdr.len;
-}
-
-boolean_t
-kdp_exception_ack(
-    unsigned char	*pkt,
-    int			len
-)
-{
-    kdp_exception_ack_t	aligned_pkt;
-    kdp_exception_ack_t	*rq = (kdp_exception_ack_t *)&aligned_pkt;
-
-    if ((size_t)len < sizeof (*rq))
-	return(FALSE);
-	
-    bcopy((char *)pkt, (char *)rq, sizeof(*rq));
-
-    if (!rq->hdr.is_reply || rq->hdr.request != KDP_EXCEPTION)
-    	return(FALSE);
-	
-    dprintf(("kdp_exception_ack seq %x %x\n", rq->hdr.seq, kdp.exception_seq));
-	
-    if (rq->hdr.seq == kdp.exception_seq) {
-	kdp.exception_ack_needed = FALSE;
-	kdp.exception_seq++;
-    }
-    return(TRUE);
-}
-
-static void
-kdp_getintegerstate(
-    struct ppc_thread_state		*state
-)
-{
-    struct savearea	*saved_state;
-   
-    saved_state = kdp.saved_state;
-   
-    bzero((char *)state,sizeof (struct ppc_thread_state)) ;
-
-    state->srr0	= (unsigned int)saved_state->save_srr0;
-    state->srr1	= (unsigned int)saved_state->save_srr1;
-    state->r0	= (unsigned int)saved_state->save_r0;
-    state->r1	= (unsigned int)saved_state->save_r1;
-    state->r2	= (unsigned int)saved_state->save_r2;
-    state->r3	= (unsigned int)saved_state->save_r3;
-    state->r4	= (unsigned int)saved_state->save_r4;
-    state->r5	= (unsigned int)saved_state->save_r5;
-    state->r6	= (unsigned int)saved_state->save_r6;
-    state->r7	= (unsigned int)saved_state->save_r7;
-    state->r8	= (unsigned int)saved_state->save_r8;
-    state->r9	= (unsigned int)saved_state->save_r9;
-    state->r10	= (unsigned int)saved_state->save_r10;
-    state->r11	= (unsigned int)saved_state->save_r11;
-    state->r12	= (unsigned int)saved_state->save_r12;
-    state->r13	= (unsigned int)saved_state->save_r13;
-    state->r14	= (unsigned int)saved_state->save_r14;
-    state->r15	= (unsigned int)saved_state->save_r15;
-    state->r16	= (unsigned int)saved_state->save_r16;
-    state->r17	= (unsigned int)saved_state->save_r17;
-    state->r18	= (unsigned int)saved_state->save_r18;
-    state->r19	= (unsigned int)saved_state->save_r19;
-    state->r20	= (unsigned int)saved_state->save_r20;
-    state->r21	= (unsigned int)saved_state->save_r21;
-    state->r22	= (unsigned int)saved_state->save_r22;
-    state->r23	= (unsigned int)saved_state->save_r23;
-    state->r24	= (unsigned int)saved_state->save_r24;
-    state->r25	= (unsigned int)saved_state->save_r25;
-    state->r26	= (unsigned int)saved_state->save_r26;
-    state->r27	= (unsigned int)saved_state->save_r27;
-    state->r28	= (unsigned int)saved_state->save_r28;
-    state->r29	= (unsigned int)saved_state->save_r29;
-    state->r30	= (unsigned int)saved_state->save_r30;
-    state->r31	= (unsigned int)saved_state->save_r31;
-    state->cr	= (unsigned int)saved_state->save_cr;
-    state->xer	= (unsigned int)saved_state->save_xer;
-    state->lr	= (unsigned int)saved_state->save_lr;
-    state->ctr	= (unsigned int)saved_state->save_ctr;
-}
-
-static void
-kdp_getintegerstate64(
-    struct ppc_thread_state64	*state
-)
-{
-    struct savearea	*saved_state;
-   
-    saved_state = kdp.saved_state;
-   
-    bzero((char *)state,sizeof (struct ppc_thread_state64)) ;
-
-    state->srr0	= saved_state->save_srr0;
-    state->srr1	= saved_state->save_srr1;
-    state->r0	= saved_state->save_r0;
-    state->r1	= saved_state->save_r1;
-    state->r2	= saved_state->save_r2;
-    state->r3	= saved_state->save_r3;
-    state->r4	= saved_state->save_r4;
-    state->r5	= saved_state->save_r5;
-    state->r6	= saved_state->save_r6;
-    state->r7	= saved_state->save_r7;
-    state->r8	= saved_state->save_r8;
-    state->r9	= saved_state->save_r9;
-    state->r10	= saved_state->save_r10;
-    state->r11	= saved_state->save_r11;
-    state->r12	= saved_state->save_r12;
-    state->r13	= saved_state->save_r13;
-    state->r14	= saved_state->save_r14;
-    state->r15	= saved_state->save_r15;
-    state->r16	= saved_state->save_r16;
-    state->r17	= saved_state->save_r17;
-    state->r18	= saved_state->save_r18;
-    state->r19	= saved_state->save_r19;
-    state->r20	= saved_state->save_r20;
-    state->r21	= saved_state->save_r21;
-    state->r22	= saved_state->save_r22;
-    state->r23	= saved_state->save_r23;
-    state->r24	= saved_state->save_r24;
-    state->r25	= saved_state->save_r25;
-    state->r26	= saved_state->save_r26;
-    state->r27	= saved_state->save_r27;
-    state->r28	= saved_state->save_r28;
-    state->r29	= saved_state->save_r29;
-    state->r30	= saved_state->save_r30;
-    state->r31	= saved_state->save_r31;
-    state->cr	= saved_state->save_cr;
-    state->xer	= saved_state->save_xer;
-    state->lr	= saved_state->save_lr;
-    state->ctr	= saved_state->save_ctr;
-}
-
-kdp_error_t
-kdp_machine_read_regs(
-    __unused unsigned int cpu,
-    unsigned int flavor,
-    char *data,
-    int *size
-)
-{
-    switch (flavor) {
-
-    case PPC_THREAD_STATE:
-		dprintf(("kdp_readregs THREAD_STATE\n"));
-		kdp_getintegerstate((struct ppc_thread_state *)data);
-		*size = PPC_THREAD_STATE_COUNT * sizeof(int);
-		return KDPERR_NO_ERROR;
-
-    case PPC_THREAD_STATE64:
-		dprintf(("kdp_readregs THREAD_STATE\n"));
-		kdp_getintegerstate64((struct ppc_thread_state64 *)data);
-		*size = PPC_THREAD_STATE64_COUNT * sizeof(int);
-		return KDPERR_NO_ERROR;
-
-    case PPC_FLOAT_STATE:
-		dprintf(("kdp_readregs THREAD_FPSTATE\n"));
-		bzero((char *)data ,sizeof(struct ppc_float_state));	
-		*size = PPC_FLOAT_STATE_COUNT * sizeof(int);
-		return KDPERR_NO_ERROR;
-
-    default:
-		dprintf(("kdp_readregs bad flavor %d\n"));
-		return KDPERR_BADFLAVOR;
-    }
-}
-
-static void
-kdp_setintegerstate(
-    struct ppc_thread_state		*state
-)
-{
-    struct savearea	*saved_state;
-   
-    saved_state = kdp.saved_state;
-
-    saved_state->save_srr0	= state->srr0;
-    saved_state->save_srr1	= state->srr1;
-    saved_state->save_r0	= state->r0;
-    saved_state->save_r1	= state->r1;
-    saved_state->save_r2	= state->r2;
-    saved_state->save_r3	= state->r3;
-    saved_state->save_r4	= state->r4;
-    saved_state->save_r5	= state->r5;
-    saved_state->save_r6	= state->r6;
-    saved_state->save_r7	= state->r7;
-    saved_state->save_r8	= state->r8;
-    saved_state->save_r9	= state->r9;
-    saved_state->save_r10	= state->r10;
-    saved_state->save_r11	= state->r11;
-    saved_state->save_r12	= state->r12;
-    saved_state->save_r13	= state->r13;
-    saved_state->save_r14	= state->r14;
-    saved_state->save_r15	= state->r15;
-    saved_state->save_r16	= state->r16;
-    saved_state->save_r17	= state->r17;
-    saved_state->save_r18	= state->r18;
-    saved_state->save_r19	= state->r19;
-    saved_state->save_r20	= state->r20;
-    saved_state->save_r21	= state->r21;
-    saved_state->save_r22	= state->r22;
-    saved_state->save_r23	= state->r23;
-    saved_state->save_r24	= state->r24;
-    saved_state->save_r25	= state->r25;
-    saved_state->save_r26	= state->r26;
-    saved_state->save_r27	= state->r27;
-    saved_state->save_r28	= state->r28;
-    saved_state->save_r29	= state->r29;
-    saved_state->save_r30	= state->r30;
-    saved_state->save_r31	= state->r31;
-    saved_state->save_cr	= state->cr;
-    saved_state->save_xer	= state->xer;
-    saved_state->save_lr	= state->lr;
-    saved_state->save_ctr	= state->ctr;
-}
-
-static void
-kdp_setintegerstate64(
-    struct ppc_thread_state64		*state
-)
-{
-    struct savearea	*saved_state;
-   
-    saved_state = kdp.saved_state;
-
-    saved_state->save_srr0	= state->srr0;
-    saved_state->save_srr1	= state->srr1;
-    saved_state->save_r0	= state->r0;
-    saved_state->save_r1	= state->r1;
-    saved_state->save_r2	= state->r2;
-    saved_state->save_r3	= state->r3;
-    saved_state->save_r4	= state->r4;
-    saved_state->save_r5	= state->r5;
-    saved_state->save_r6	= state->r6;
-    saved_state->save_r7	= state->r7;
-    saved_state->save_r8	= state->r8;
-    saved_state->save_r9	= state->r9;
-    saved_state->save_r10	= state->r10;
-    saved_state->save_r11	= state->r11;
-    saved_state->save_r12	= state->r12;
-    saved_state->save_r13	= state->r13;
-    saved_state->save_r14	= state->r14;
-    saved_state->save_r15	= state->r15;
-    saved_state->save_r16	= state->r16;
-    saved_state->save_r17	= state->r17;
-    saved_state->save_r18	= state->r18;
-    saved_state->save_r19	= state->r19;
-    saved_state->save_r20	= state->r20;
-    saved_state->save_r21	= state->r21;
-    saved_state->save_r22	= state->r22;
-    saved_state->save_r23	= state->r23;
-    saved_state->save_r24	= state->r24;
-    saved_state->save_r25	= state->r25;
-    saved_state->save_r26	= state->r26;
-    saved_state->save_r27	= state->r27;
-    saved_state->save_r28	= state->r28;
-    saved_state->save_r29	= state->r29;
-    saved_state->save_r30	= state->r30;
-    saved_state->save_r31	= state->r31;
-    saved_state->save_cr	= state->cr;
-    saved_state->save_xer	= state->xer;
-    saved_state->save_lr	= state->lr;
-    saved_state->save_ctr	= state->ctr;
-}
-
-kdp_error_t
-kdp_machine_write_regs(
-    __unused unsigned int cpu,
-    unsigned int flavor,
-    char *data,
-    __unused int *size
-)
-{
-    switch (flavor) {
-
-    case PPC_THREAD_STATE:
-		dprintf(("kdp_writeregs THREAD_STATE\n"));
-		kdp_setintegerstate((struct ppc_thread_state *)data);
-
-#if KDP_TEST_HARNESS
-		DumpTheSave((struct savearea *)data);		/* (TEST/DEBUG) */
-#endif
-		return KDPERR_NO_ERROR;
-
-    case PPC_THREAD_STATE64:
-		dprintf(("kdp_writeregs THREAD_STATE64\n"));
-		kdp_setintegerstate64((struct ppc_thread_state64 *)data);
-
-#if KDP_TEST_HARNESS
-		DumpTheSave((struct savearea *)data);		/* (TEST/DEBUG) */
-#endif
-		return KDPERR_NO_ERROR;
-    case PPC_FLOAT_STATE:
-		dprintf(("kdp_writeregs THREAD_FPSTATE\n"));
-		return KDPERR_NO_ERROR;
-
-    default:
-		dprintf(("kdp_writeregs bad flavor %d\n"));
-		return KDPERR_BADFLAVOR;
-    }
-}
-
-void
-kdp_machine_hostinfo(
-    kdp_hostinfo_t *hostinfo
-)
-{
-    int			i;
-
-    hostinfo->cpus_mask = 0;
-    hostinfo->cpu_type = 0;
-
-    for (i = 0; i < machine_info.max_cpus; i++) {
-        if ((PerProcTable[i].ppe_vaddr == (struct per_proc_info *)NULL) || 
-	    !(PerProcTable[i].ppe_vaddr->running))
-            continue;
-	
-        hostinfo->cpus_mask |= (1 << i);
-        if (hostinfo->cpu_type == 0) {
-            hostinfo->cpu_type = slot_type(i);
-            hostinfo->cpu_subtype = slot_subtype(i);
-        }
-    }
-}
-
-void
-kdp_panic(
-    const char		*msg
-)
-{
-    printf("kdp panic: %s\n", msg);    
-    while(1) {}
-}
-
-extern void halt_all_cpus(boolean_t);
-
-void
-kdp_machine_reboot(void)
-{
-	printf("Attempting system restart...");
-	/* Call the platform specific restart*/
-	if (PE_halt_restart) 
-		(*PE_halt_restart)(kPERestartCPU);
-	/* If we do reach this, give up */
-	halt_all_cpus(TRUE);
-}
-
-int
-kdp_intr_disbl(void)
-{
-    return (splhigh());
-}
-
-void
-kdp_intr_enbl(int s)
-{
-    splx(s);
-}
-
-void
-kdp_us_spin(int usec)
-{
-    delay(usec/100);
-}
-
-void print_saved_state(void *state)
-{
-    struct ppc_thread_state	*saved_state;
-
-    saved_state = state;
-
-	printf("pc = 0x%x\n", saved_state->srr0);
-	printf("msr = 0x%x\n", saved_state->srr1);
-	printf("rp = 0x%x\n", saved_state->lr);
-	printf("sp = 0x%x\n", saved_state->r1);
-
-}
-
-void
-kdp_call(void)
-{
-	Debugger("inline call to debugger(machine_startup)");
-}
-
-/*
- * table to convert system specific code to generic codes for kdb
- */
-int kdp_trap_codes[] = {
-	EXC_BAD_ACCESS,	/* 0x0000  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x0100  System reset */
-	EXC_BAD_ACCESS,	/* 0x0200  Machine check */
-	EXC_BAD_ACCESS,	/* 0x0300  Data access */
-	EXC_BAD_ACCESS,	/* 0x0400  Instruction access */
-	EXC_BAD_ACCESS,	/* 0x0500  External interrupt */
-	EXC_BAD_ACCESS,	/* 0x0600  Alignment */
-	EXC_BREAKPOINT,	/* 0x0700  Program - fp exc, ill/priv instr, trap */
-	EXC_ARITHMETIC,	/* 0x0800  Floating point disabled */
-	EXC_SOFTWARE,	/* 0x0900  Decrementer */
-	EXC_BAD_ACCESS,	/* 0x0A00  I/O controller interface */
-	EXC_BAD_ACCESS,	/* 0x0B00  INVALID EXCEPTION */
-	EXC_SOFTWARE,	/* 0x0C00  System call exception */
-	EXC_BREAKPOINT,	/* 0x0D00  Trace */
-	EXC_SOFTWARE,	/* 0x0E00  FP assist */
-	EXC_SOFTWARE,	/* 0x0F00  Performance monitoring */
-	EXC_ARITHMETIC,	/* 0x0F20  Altivec disabled */
-	EXC_BAD_ACCESS,	/* 0x1000  Instruction PTE miss */
-	EXC_BAD_ACCESS,	/* 0x1100  Data load PTE miss */
-	EXC_BAD_ACCESS,	/* 0x1200  Data store PTE miss */
-	EXC_BREAKPOINT,	/* 0x1300  Instruction bkpt */
-	EXC_SOFTWARE,	/* 0x1400  System management */
-	EXC_BAD_ACCESS,	/* 0x1500  INVALID EXCEPTION */
-	EXC_ARITHMETIC,	/* 0x1600  Altivec Assist */
-	EXC_BAD_ACCESS,	/* 0x1700  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1800  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1900  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1A00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1B00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1C00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1D00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1E00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x1F00  INVALID EXCEPTION */
-	EXC_BREAKPOINT,	/* 0x2000  Run Mode/Trace */
-	EXC_BAD_ACCESS,	/* 0x2100  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2200  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2300  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2400  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2500  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2600  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2700  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2800  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2900  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2A00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2B00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2C00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2D00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2E00  INVALID EXCEPTION */
-	EXC_BAD_ACCESS,	/* 0x2F00  INVALID EXCEPTION */
-	EXC_SOFTWARE	/* 0x3000  AST trap (software) */
-};
-
-int
-kdp_getc(void)
-{
-	return(cnmaygetc());
-}
-
-int kdp_backtrace;
-int kdp_sr_dump;
-int kdp_dabr;
-int kdp_noisy;
-
-#define kdp_code(x) kdp_trap_codes[((x)==T_AST?0x31:(x)/T_VECTOR_SIZE)]
-
-void
-kdp_trap(
-    unsigned int		exception,
-    struct savearea	*saved_state
-)
-{
-	unsigned int *fp;
-	unsigned int sp;
-
-	if (kdp_noisy) {
-		if (kdp_backtrace) {
-			printf("\nvector=%x, \n", exception/4);
-			sp = saved_state->save_r1;
-			printf("stack backtrace - sp(%x)  ", sp);
-			fp = (unsigned int *) *((unsigned int *)sp);
-			while (fp) {
-				printf("0x%08x ", fp[2]);
-				fp = (unsigned int *)*fp;
-			}
-			printf("\n");
-		}
-#ifdef XXX
-		if (kdp_sr_dump) {
-			dump_segment_registers();
-		}
-#endif
-	
-		printf("vector=%d  ", exception/4);
-	}
-	kdp_raise_exception(kdp_code(exception), 0, 0, saved_state);
-
-	if (kdp_noisy)
-		printf("kdp_trap: kdp_raise_exception() ret\n");
-
-	if ((unsigned int)(saved_state->save_srr0) == 0x7c800008)
-		saved_state->save_srr0 += 4;			/* BKPT_SIZE */
-
-	if(saved_state->save_srr1 & (MASK(MSR_SE) | MASK(MSR_BE))) {	/* Are we just stepping or continuing */
-		db_run_mode = STEP_ONCE;				/* We are stepping */
-	}
-	else db_run_mode = STEP_CONTINUE;			/* Otherwise we are continuing */
-	
-#ifdef XXX
-	mtspr(dabr, kdp_dabr);
-#endif
-}
-
-boolean_t 
-kdp_call_kdb(
-	void)
-{
-	switch_debugger=1;
-	return(TRUE);
-}
-
-static void kdp_print_registers(struct savearea *state)
-{
-	int i;
-	for (i=0; i<32; i++) {
-		if ((i % 8) == 0)
-			printf("\n%4d :",i);
-			printf(" %08llx",*(&state->save_r0+i));
-	}
-	printf("\n");
-	printf("cr        = 0x%08x\t\t",state->save_cr);
-	printf("xer       = 0x%08llx\n",state->save_xer);
-	printf("lr        = 0x%08llx\t\t",state->save_lr);
-	printf("ctr       = 0x%08llx\n",state->save_ctr);
-	printf("srr0(iar) = 0x%08llx\t\t",state->save_srr0);
-	printf("srr1(msr) = 0x%08llx\n",state->save_srr1);
-	printf("\n");
-}
-
-void kdp_print_backtrace(unsigned, struct savearea *);
-
-void
-kdp_print_backtrace(
-    unsigned int                exception,
-    struct savearea     *saved_state)
-{
-	disable_debug_output = FALSE;
-	debug_mode = TRUE;
-	printf("re-entering kdp:\n");
-	printf("vector=%x, \n", exception/4);
-	kdp_print_registers(saved_state);
-	print_backtrace(saved_state);
-	printf("panic: We are hanging here...\n");
-	while(1);
-}
-
-void
-kdp_machine_get_breakinsn(
-						  uint8_t *bytes,
-						  uint32_t *size
-)
-{
-	*(uint32_t *)bytes = 0x7fe00008;
-	*size = sizeof(uint32_t);
-}
-
-#define LR_OFFSET 8
-#define LR_OFFSET64 16
-
-int
-machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p)
-{
-	uint32_t *tracebuf = (uint32_t *)tracepos;
-	uint32_t fence = 0;
-	uint32_t stackptr = 0;
-	uint32_t stacklimit = 0xb0000000;
-	int framecount = 0;
-	uint32_t init_srr0 = 0;
-	uint32_t prevsp = 0;
-	uint32_t framesize = 2 * sizeof(vm_offset_t);
-	
-	if (user_p) {
-		/* Examine the user savearea */
-		init_srr0 = thread->machine.upcb->save_srr0;
-		stackptr = thread->machine.upcb->save_r1;
-		/* This bound isn't useful, but it doesn't hinder us */
-		stacklimit = 0xffffffff;
-		kdp_pmap = thread->task->map->pmap;
-	}
-	else {
-		stackptr = thread->machine.pcb->save_r1;
-		init_srr0 = thread->machine.pcb->save_srr0;
-	}
-	/* Fill in the "current" program counter */
-	*tracebuf++ = init_srr0;
-
-	for (framecount = 0; framecount < nframes; framecount++) {
-/* Bounds check */
-		if ((uint32_t) (tracebound - ((char *)tracebuf)) < (4 * framesize)) {
-			tracebuf--;
-			break;
-		}
-
-		*tracebuf++ = stackptr;
-/* Invalid frame, or hit fence */
-		if (!stackptr || (stackptr == fence)) {
-			break;
-		}
-/* Stack grows downward */		
-		if (stackptr < prevsp) {
-			break;
-		}
-/* Unaligned frame */		
-		if (stackptr & 0x000000F) {
-			break;
-		}
-		if (stackptr > stacklimit) {
-			break;
-		}
-/* Assume there's a saved link register, and read it */
-		if (kdp_machine_vm_read((caddr_t) (stackptr + LR_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) {
-			break;
-		}
-
-		tracebuf++;
-		prevsp = stackptr;
-/* Next frame */
-		if (kdp_machine_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) {
-			*tracebuf++ = 0;
-			break;
-		}
-	}
-/* Reset the target pmap */
-	kdp_pmap = NULL;
-	return (uint32_t) (((char *) tracebuf) - tracepos);
-}
-
-/* Routine to encapsulate the 64-bit address read hack*/
-unsigned
-machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len)
-{
-	unsigned retval;
-	
-	retval = kdp_machine_vm_read(srcaddr, dstaddr, len);
-	return retval;
-}
-
-int
-machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p)
-{
-	uint64_t *tracebuf = (uint64_t *)tracepos;
-	uint32_t fence = 0;
-	addr64_t stackptr = 0;
-	uint64_t stacklimit = 0xb0000000;
-	int framecount = 0;
-	addr64_t init_srr0 = 0;
-	addr64_t prevsp = 0;
-	unsigned framesize = 2 * sizeof(addr64_t);
-	
-	if (user_p) {
-		init_srr0 = thread->machine.upcb->save_srr0;
-		stackptr = thread->machine.upcb->save_r1;
-		stacklimit = 0xffffffffffffffffULL;
-		kdp_pmap = thread->task->map->pmap;
-	}
-	else {
-		stackptr = thread->machine.pcb->save_r1;
-		init_srr0 = thread->machine.pcb->save_srr0;
-	}
-	
-	*tracebuf++ = init_srr0;
-
-	for (framecount = 0; framecount < nframes; framecount++) {
-
-		if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) {
-			tracebuf--;
-			break;
-		}
-
-		*tracebuf++ = stackptr;
-
-		if (!stackptr || (stackptr == fence)){
-			break;
-		}
-		if (stackptr < prevsp) {
-			break;
-		}
-		if (stackptr & 0x000000F) {
-			break;
-		}
-		if (stackptr > stacklimit) {
-			break;
-		}
-
-		if (machine_read64(stackptr+LR_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) {
-			break;
-		}
-		tracebuf++;
-		
-		prevsp = stackptr;
-		if (machine_read64(stackptr, (caddr_t) &stackptr, sizeof(addr64_t)) != sizeof(addr64_t)) {
-			*tracebuf++ = 0;
-			break;
-		}
-	}
-
-	kdp_pmap = NULL;
-	return (uint32_t) (((char *) tracebuf) - tracepos);
-}
-
-
-void
-kdp_ml_enter_debugger(void)
-{
-	__asm__ __volatile__("tw 4,r3,r3");
-}
-
-int
-kdp_machine_ioport_read(kdp_readioport_req_t *rq, caddr_t data, uint16_t lcpu)
-{
-    return 0;
-}
-
-int
-kdp_machine_ioport_write(kdp_writeioport_req_t *rq, caddr_t data, uint16_t lcpu)
-{
-    return 0;
-}
-
-int
-kdp_machine_msr64_read(kdp_readmsr64_req_t *rq, caddr_t data, uint16_t lcpu)
-{
-    return 0;
-}
-
-int
-kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, __unused caddr_t data, uint16_t lcpu)
-{
-    return 0;
-}
diff --git a/osfmk/kdp/ml/ppc/kdp_misc.s b/osfmk/kdp/ml/ppc/kdp_misc.s
deleted file mode 100644
index a007a296b..000000000
--- a/osfmk/kdp/ml/ppc/kdp_misc.s
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <mach/ppc/vm_param.h>
-#include <assym.s>
-
-ENTRY(kdp_sync_cache, TAG_NO_FRAME_USED)
-	sync					/* data sync */
-	isync					/* inst sync */
-	blr						/* return nothing */
-
-
-;
-;		This is a really stupid physical copy. 1 whole byte at a time...
-;		Source and dest are long longs.  We do this with 64-bit on if
-;		supported.
-;
-
-			.align	5
-			.globl	EXT(kdp_copy_phys)
-			
-LEXT(kdp_copy_phys)
-
-			mflr	r12						; Save return
-			
-			bl		EXT(ml_set_physical_disabled)	; No DR and get 64-bit
-			
-			rlwinm	r3,r3,0,1,0				; Dup low to high source
-			rlwinm	r5,r5,0,1,0				; Dup low to high dest
-			rlwimi	r3,r4,0,0,31			; Copy bottom on in source
-			rlwimi	r5,r6,0,0,31			; Copy bottom on in dest
-			
-kcpagain:	addic.	r7,r7,-1				; Drop count
-			blt--	kcpdone					; All done...
-			lbz		r0,0(r3)				; Grab a whole one
-			stb		r0,0(r5)				; Lay it gently down
-			addi	r3,r3,1					; Next source
-			addi	r5,r5,1					; Next destination
-			b		kcpagain				; Once more with feeling...
-		
-kcpdone:	bl		EXT(ml_restore)			; Put trans, etc back
-			mtlr	r12						; Restore return
-			blr								; Come again please...
-		
diff --git a/osfmk/kdp/ml/ppc/kdp_vm.c b/osfmk/kdp/ml/ppc/kdp_vm.c
deleted file mode 100644
index 737fd862d..000000000
--- a/osfmk/kdp/ml/ppc/kdp_vm.c
+++ /dev/null
@@ -1,570 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <mach/mach_types.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-
-#include <vm/pmap.h>
- 
-#include <ppc/proc_reg.h>
-#include <ppc/machparam.h>
-#include <ppc/mem.h>
-#include <ppc/pmap.h>
-#include <ppc/mappings.h> 
-#include <ppc/cpu_data.h>
-#include <ppc/misc_protos.h>
-
-#include <mach/thread_status.h>
-#include <mach-o/loader.h>
-#include <mach/vm_region.h>
-#include <mach/vm_statistics.h>
-
-#include <vm/vm_kern.h>
-#include <vm/vm_object.h>
-#include <vm/vm_protos.h>
-#include <kdp/kdp_core.h>
-#include <kdp/kdp_udp.h>
-#include <kdp/kdp_internal.h>
-
-#include <ppc/misc_protos.h>
-#include <mach/vm_map.h>
-
-
-pmap_t kdp_pmap;
-boolean_t kdp_trans_off;
-boolean_t kdp_read_io;
-
-extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB;
-extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK;
-
-static addr64_t	kdp_vtophys(pmap_t pmap, addr64_t va);
-int             kern_dump(void);
-
-typedef struct {
-  int	flavor;			/* the number for this flavor */
-  mach_msg_type_number_t	count;	/* count of ints in this flavor */
-} mythread_state_flavor_t;
-
-static mythread_state_flavor_t thread_flavor_array[] = {
-  {PPC_THREAD_STATE , PPC_THREAD_STATE_COUNT},
-};
-
-static int kdp_mynum_flavors = 1;
-static int MAX_TSTATE_FLAVORS = 1;
-
-typedef struct {
-  vm_offset_t header; 
-  int  hoffset;
-  mythread_state_flavor_t *flavors;
-  int tstate_size;
-} tir_t;
-
-char command_buffer[512];
-
-/*
- *
- */
-static addr64_t
-kdp_vtophys(
-	pmap_t pmap,
-	addr64_t va)
-{
-	addr64_t    pa;
-	ppnum_t pp;
-
-	pp = pmap_find_phys(pmap, va);				/* Get the page number */
-	if(!pp) return 0;							/* Just return if no translation */
-	
-	pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL);	/* Shove in the page offset */
-	return(pa);
-}
-/* Verify that src is valid, and physically copy len bytes from src to
- * dst, translating if necessary. If translation is enabled
- * (kdp_trans_off is 0), a non-zero kdp_pmap specifies the pmap to use
- * when translating src.
- */
-
-mach_vm_size_t
-kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len)
-{
-	addr64_t cur_virt_src, cur_virt_dst;
-	addr64_t cur_phys_src, cur_phys_dst;
-	unsigned resid, cnt;
-	unsigned int dummy;
-	pmap_t pmap;
-
-#ifdef KDP_VM_READ_DEBUG
-    kprintf("kdp_machine_vm_read1: src %llx dst %llx len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]);
-#endif
-
-	cur_virt_src = (addr64_t)src;
-	cur_virt_dst = (addr64_t)(intptr_t)dst;
-	
-	if (kdp_trans_off) {
-		resid = len;	/* Get the length to copy */
-
-		while (resid != 0) {
-
-			if((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0)
-				goto exit;
-
-			if(kdp_read_io == 0)
-				if(!mapping_phys_lookup((ppnum_t)(cur_virt_src >> 12), &dummy)) return 0;	/* Can't read where there's not any memory */
-		
-			cnt = 4096 - (cur_virt_src & 0xFFF);	/* Get length left on page */
-			if (cnt > (4096 - (cur_virt_dst & 0xFFF)))
-				cnt = 4096 - (cur_virt_dst & 0xFFF);
-		
-			if (cnt > resid)  cnt = resid;
-
-			bcopy_phys(cur_virt_src, cur_phys_dst, cnt);		/* Copy stuff over */
-
-			cur_virt_src += cnt;
-			cur_virt_dst += cnt;
-			resid -= cnt;
-		}
-		
-	} else {
-
-		resid = len;
-
-		if(kdp_pmap) pmap = kdp_pmap;				/* If special pmap, use it */
-		else pmap = kernel_pmap;					/* otherwise, use kernel's */
-
-		while (resid != 0) {   
-/* Always translate the destination using the kernel_pmap. */
-			if((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0)
-				goto exit;
-
-			if((cur_phys_src = kdp_vtophys(pmap, cur_virt_src)) == 0)
-				goto exit;
-
-			if(kdp_read_io == 0)
-				if(!mapping_phys_lookup((ppnum_t)(cur_phys_src >> 12), &dummy)) goto exit;	/* Can't read where there's not any memory */
-
-			cnt = 4096 - (cur_virt_src & 0xFFF);	/* Get length left on page */
-			if (cnt > (4096 - (cur_virt_dst & 0xFFF)))
-				cnt = 4096 - (cur_virt_dst & 0xFFF);
-
-			if (cnt > resid) cnt = resid;
-
-#ifdef KDP_VM_READ_DEBUG
-				kprintf("kdp_machine_vm_read2: pmap %08X, virt %016LLX, phys %016LLX\n", 
-					pmap, cur_virt_src, cur_phys_src);
-#endif
-
-			bcopy_phys(cur_phys_src, cur_phys_dst, cnt);		/* Copy stuff over */
-			
-			cur_virt_src +=cnt;
-			cur_virt_dst +=cnt;
-			resid -= cnt;
-		}
-	}
-exit:
-#ifdef KDP_VM_READ_DEBUG
-	kprintf("kdp_machine_vm_read: ret %08X\n", len-resid);
-#endif
-        return (len - resid);
-}
-
-mach_vm_size_t
-kdp_machine_phys_read(kdp_readphysmem64_req_t *rq __unused, caddr_t dst __unused, uint16_t lcpu __unused)
-{
-    return 0; /* unimplemented */
-}
-
-/*
- * 
- */
-mach_vm_size_t
-kdp_machine_vm_write( caddr_t src, mach_vm_address_t dst, mach_vm_size_t len)
-{
-	addr64_t cur_virt_src, cur_virt_dst;
-	addr64_t cur_phys_src, cur_phys_dst;
-	unsigned resid, cnt, cnt_src, cnt_dst;
-
-#ifdef KDP_VM_WRITE_DEBUG
-	printf("kdp_vm_write: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]);
-#endif
-
-	cur_virt_src = (addr64_t)(intptr_t)src;
-	cur_virt_dst = (addr64_t)dst;
-
-	resid = len;
-
-	while (resid != 0) {
-		if ((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) 
-			goto exit;
-
-		if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) 
-			goto exit;
-
-		cnt_src = ((cur_phys_src + NBPG) & (-NBPG)) - cur_phys_src;
-		cnt_dst = ((cur_phys_dst + NBPG) & (-NBPG)) - cur_phys_dst;
-
-		if (cnt_src > cnt_dst)
-			cnt = cnt_dst;
-		else
-			cnt = cnt_src;
-		if (cnt > resid) 
-			cnt = resid;
-
-		bcopy_phys(cur_phys_src, cur_phys_dst, cnt);		/* Copy stuff over */
-		sync_cache64(cur_phys_dst, cnt);					/* Sync caches */
-
-		cur_virt_src +=cnt;
-		cur_virt_dst +=cnt;
-		resid -= cnt;
-	}
-exit:
-	return (len - resid);
-}
-
-mach_vm_size_t
-kdp_machine_phys_write(kdp_writephysmem64_req_t *rq __unused, caddr_t src __unused,
-		       uint16_t lcpu __unused)
-{
-    return 0; /* unimplemented */
-}
-
-static void
-kern_collectth_state(thread_t thread, tir_t *t)
-{
-  vm_offset_t	header;
-  int  hoffset, i ;
-  mythread_state_flavor_t *flavors;
-  struct thread_command	*tc;
-  /*
-   *	Fill in thread command structure.
-   */
-  header = t->header;
-  hoffset = t->hoffset;
-  flavors = t->flavors;
-	
-  tc = (struct thread_command *) (header + hoffset);
-  tc->cmd = LC_THREAD;
-  tc->cmdsize = sizeof(struct thread_command)
-    + t->tstate_size;
-  hoffset += sizeof(struct thread_command);
-  /*
-   * Follow with a struct thread_state_flavor and
-   * the appropriate thread state struct for each
-   * thread state flavor.
-   */
-  for (i = 0; i < kdp_mynum_flavors; i++) {
-    *(mythread_state_flavor_t *)(header+hoffset) =
-      flavors[i];
-    hoffset += sizeof(mythread_state_flavor_t);
-
-    if (machine_thread_get_kern_state(thread, flavors[i].flavor,
-			     (thread_state_t) (header+hoffset),
-				      &flavors[i].count) != KERN_SUCCESS)
-      printf ("Failure in machine_thread_get_kern_state()\n");
-    hoffset += flavors[i].count*sizeof(int);
-  }
-
-  t->hoffset = hoffset;
-}
-
-int
-kdp_dump_trap(
-	      int type,
-	      __unused struct savearea *regs)
-{
-  printf ("An unexpected trap (type %d) occurred during the kernel dump, terminating.\n", type);
-  kdp_send_crashdump_pkt(KDP_EOF, NULL, 0, ((void *) 0));
-  abort_panic_transfer();
-  kdp_flag &= ~KDP_PANIC_DUMP_ENABLED;
-  kdp_flag &= ~PANIC_CORE_ON_NMI;
-  kdp_flag &= ~PANIC_LOG_DUMP;
-
-  kdp_reset();
-
-  kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state);
-  return( 0 );
-}
-
-/*
- * Kernel dump (limited to currently executing 32 bit mach_kernel only)
- */
-int
-kern_dump(void)
-{
-  int error = 0;
-  vm_map_t	map;
-  unsigned int	thread_count, segment_count;
-  unsigned int	command_size = 0, header_size = 0, tstate_size = 0;
-  unsigned int	hoffset = 0, foffset = 0, nfoffset = 0,  vmoffset = 0;
-  unsigned int  max_header_size = 0;
-  vm_offset_t	header;
-  struct mach_header	*mh;
-  struct segment_command	*sc;
-  vm_size_t	size;
-  vm_prot_t	prot = 0;
-  vm_prot_t	maxprot = 0;
-  vm_inherit_t	inherit = 0;
-  int		error1 = 0;
-  mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
-  vm_size_t	nflavors;
-  vm_size_t	i;
-  uint32_t nesting_depth = 0;
-  kern_return_t	kret = 0;
-  struct vm_region_submap_info_64 vbr;
-  mach_msg_type_number_t vbrcount  = 0;
-  tir_t tir1;
-
-  int panic_error = 0;
-  unsigned int txstart = 0;
-  unsigned int mach_section_count = 4;
-  unsigned int num_sects_txed = 0;
-
-  map = kernel_map;
-
-  thread_count = 1;
-  segment_count = get_vmmap_entries(map); 
-  
-  printf("Kernel map has %d entries\n", segment_count);
-
-  nflavors = kdp_mynum_flavors;
-  bcopy((char *)thread_flavor_array,(char *) flavors,sizeof(thread_flavor_array));
-
-  for (i = 0; i < nflavors; i++)
-    tstate_size += sizeof(mythread_state_flavor_t) +
-      (flavors[i].count * sizeof(int));
-
-  command_size = (segment_count + mach_section_count) *
-    sizeof(struct segment_command) +
-    thread_count*sizeof(struct thread_command) +
-    tstate_size*thread_count;
-
-  header_size = command_size + sizeof(struct mach_header);
-  header = (vm_offset_t) command_buffer;
-	
-  /*
-   *	Set up Mach-O header for currently executing 32 bit kernel.
-   */
-  printf ("Generated Mach-O header size was %d\n", header_size);
-
-  mh = (struct mach_header *) header;
-  mh->magic = MH_MAGIC;
-  mh->cputype = cpu_type();
-  mh->cpusubtype = cpu_subtype();	/* XXX incorrect; should match kernel */
-  mh->filetype = MH_CORE;
-  mh->ncmds = segment_count + thread_count + mach_section_count;
-  mh->sizeofcmds = command_size;
-  mh->flags = 0;
-
-  hoffset = sizeof(struct mach_header);	/* offset into header */
-  foffset = round_page_32(header_size); /* offset into file */
-  /* Padding.. */
-  if ((foffset - header_size) < (4*sizeof(struct segment_command))) {
-      /* Hack */
-      foffset += ((4*sizeof(struct segment_command)) - (foffset-header_size)); 
-    }
-
-  max_header_size = foffset;
-
-  vmoffset = VM_MIN_ADDRESS;		/* offset into VM */
-
-  /* Transmit the Mach-O MH_CORE header, and seek forward past the 
-   * area reserved for the segment and thread commands 
-   * to begin data transmission 
-   */
-
-   if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { 
-     printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); 
-     return -1; 
-   } 
-
-   if ((panic_error = kdp_send_crashdump_data(KDP_DATA, NULL, sizeof(struct mach_header), (caddr_t) mh) < 0)) {
-     printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-     return -1 ;
-   }
-
-   if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) {
-     printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-     return (-1);
-   }
-  printf ("Transmitting kernel state, please wait: ");
-
-  while ((segment_count > 0) || (kret == KERN_SUCCESS)){
-    /* Check if we've transmitted all the kernel sections */
-    if (num_sects_txed == mach_section_count) {
-      
-    while (1) {
-
-    /*
-     *	Get region information for next region.
-     */
-
-      vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
-      if((kret = vm_region_recurse_64(map, 
-				      &vmoffset, &size, &nesting_depth, 
-				      (vm_region_recurse_info_t)&vbr,
-				      &vbrcount)) != KERN_SUCCESS) {
-	break;
-      }
-
-      if(vbr.is_submap) {
-	nesting_depth++;
-	continue;
-      } else {
-	break;
-      }
-    }
-
-    if(kret != KERN_SUCCESS)
-      break;
-
-    prot = vbr.protection;
-    maxprot = vbr.max_protection;
-    inherit = vbr.inheritance;
-    }
-    else
-      {
-	switch (num_sects_txed) {
-	case 0:
-	  {
-	    /* Transmit the kernel text section */
-	    vmoffset = sectTEXTB;
-	    size = sectSizeTEXT;
-	  }
-	  break;
-        case 1:
-	  {
-	    vmoffset = sectDATAB;
-	    size = sectSizeDATA;
-	  }
-	  break;
-	case 2:
-	  {
-	    vmoffset = sectPRELINKB;
-	    size = sectSizePRELINK;
-	  }
-	  break;
-	case 3:
-	  {
-	    vmoffset = sectLINKB;
-	    size = sectSizeLINK;
-	  }
-	  break;
-	  /* TODO the lowmem vector area may be useful, but its transmission is
-	   * disabled for now. The traceback table area should be transmitted 
-	   * as well - that's indirected from 0x5080.
-	   */
-	}
-	num_sects_txed++;
-      }
-    /*
-     *	Fill in segment command structure.
-     */
-    
-    if (hoffset > max_header_size)
-      break;
-    sc = (struct segment_command *) (header);
-    sc->cmd = LC_SEGMENT;
-    sc->cmdsize = sizeof(struct segment_command);
-    sc->segname[0] = 0;
-    sc->vmaddr = vmoffset;
-    sc->vmsize = size;
-    sc->fileoff = foffset;
-    sc->filesize = size;
-    sc->maxprot = maxprot;
-    sc->initprot = prot;
-    sc->nsects = 0;
-
-    if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-	printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); 
-	return -1; 
-      } 
-    
-    if ((panic_error = kdp_send_crashdump_data(KDP_DATA, NULL, sizeof(struct segment_command) , (caddr_t) sc)) < 0) {
-	printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-	return -1 ;
-      }
-
-    /* Do not transmit memory tagged VM_MEMORY_IOKIT - instead, seek past that
-     * region on the server - this creates a hole in the file  
-     */
-
-    if ((vbr.user_tag != VM_MEMORY_IOKIT)) {
-      
-      if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) {
-	  printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-	  return (-1);
-	}
-
-      txstart = vmoffset;
-
-      if ((panic_error = kdp_send_crashdump_data(KDP_DATA, NULL, size, (caddr_t) txstart)) < 0)	{
-	  printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-	  return -1 ;
-	}
-    }
-
-    hoffset += sizeof(struct segment_command);
-    foffset += size;
-    vmoffset += size;
-    segment_count--;
-  }
-  tir1.header = header;
-  tir1.hoffset = 0;
-  tir1.flavors = flavors;
-  tir1.tstate_size = tstate_size;
-
-  /* Now send out the LC_THREAD load command, with the thread information
-   * for the current activation.
-   * Note that the corefile can contain LC_SEGMENT commands with file offsets
-   * that point past the edge of the corefile, in the event that the last N
-   * VM regions were all I/O mapped or otherwise non-transferable memory, 
-   * not followed by a normal VM region; i.e. there will be no hole that 
-   * reaches to the end of the core file.
-   */
-  kern_collectth_state (current_thread(), &tir1);
-
-  if ((panic_error = kdp_send_crashdump_pkt(KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { 
-      printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); 
-      return -1; 
-    } 
-  
-    if ((panic_error = kdp_send_crashdump_data(KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) {
-	printf ("kdp_send_crashdump_data failed with error %d\n", panic_error);
-	return -1 ;
-      }
-    
-    /* last packet */
-    if ((panic_error = kdp_send_crashdump_pkt(KDP_EOF, NULL, 0, ((void *) 0))) < 0)
-      {
-	printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error);
-	return (-1) ;
-      }
-    
-    if (error == 0)
-      error = error1;
-    return (error);
-}
diff --git a/osfmk/kdp/ml/x86_64/kdp_machdep.c b/osfmk/kdp/ml/x86_64/kdp_machdep.c
index 1da2a0133..d7e071569 100644
--- a/osfmk/kdp/ml/x86_64/kdp_machdep.c
+++ b/osfmk/kdp/ml/x86_64/kdp_machdep.c
@@ -467,6 +467,11 @@ kdp_i386_trap(
 	break;
     }
 
+    if (current_cpu_datap()->cpu_fatal_trap_state) {
+	    current_cpu_datap()->cpu_post_fatal_trap_state = saved_state;
+	    saved_state = current_cpu_datap()->cpu_fatal_trap_state;
+    }
+
     kdp_raise_exception(exception, code, subcode, saved_state);
     /* If the instruction single step bit is set, disable kernel preemption
      */
diff --git a/osfmk/kdp/ml/x86_64/kdp_vm.c b/osfmk/kdp/ml/x86_64/kdp_vm.c
index 8a80e7a3a..a76167621 100644
--- a/osfmk/kdp/ml/x86_64/kdp_vm.c
+++ b/osfmk/kdp/ml/x86_64/kdp_vm.c
@@ -45,9 +45,6 @@
 #include <vm/vm_protos.h>
 #include <vm/vm_kern.h>
 
-extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB;
-extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK;
-
 int	kern_dump(void);
 int	kdp_dump_trap(int type, x86_saved_state64_t *regs);
 
@@ -108,8 +105,31 @@ kern_collectth_state(thread_t thread, tir_t *t)
 		if (flavors[i].flavor == x86_THREAD_STATE64) {
 			x86_thread_state64_t *tstate = (x86_thread_state64_t *) (header + hoffset);
 			vm_offset_t kstack;
+			x86_saved_state64_t *cpstate = current_cpu_datap()->cpu_fatal_trap_state;
 			bzero(tstate, x86_THREAD_STATE64_COUNT * sizeof(int));
-			if ((kstack = thread->kernel_stack) != 0){
+			if ((current_thread() == thread) && (cpstate != NULL)) {
+				tstate->rax = cpstate->rax;
+				tstate->rbx = cpstate->rbx;
+				tstate->rcx = cpstate->rcx;
+				tstate->rdx = cpstate->rdx;
+				tstate->rdi = cpstate->rdi;
+				tstate->rsi = cpstate->rsi;
+				tstate->rbp = cpstate->rbp;
+				tstate->r8 = cpstate->r8;
+				tstate->r9 = cpstate->r9;
+				tstate->r10 = cpstate->r10;
+				tstate->r11 = cpstate->r11;
+				tstate->r12 = cpstate->r12;
+				tstate->r13 = cpstate->r13;
+				tstate->r14 = cpstate->r14;
+				tstate->r15 = cpstate->r15;
+				tstate->rip = cpstate->isf.rip;
+				tstate->rsp = cpstate->isf.rsp;
+				tstate->rflags = cpstate->isf.rflags;
+				tstate->cs = cpstate->isf.cs;
+				tstate->fs = cpstate->fs;
+				tstate->gs = cpstate->gs;
+			} else if ((kstack = thread->kernel_stack) != 0){
 				struct x86_kernel_state *iks = STACK_IKS(kstack);
 				tstate->rbx = iks->k_rbx;
 				tstate->rsp = iks->k_rsp;
@@ -119,7 +139,7 @@ kern_collectth_state(thread_t thread, tir_t *t)
 				tstate->r14 = iks->k_r14;
 				tstate->r15 = iks->k_r15;
 				tstate->rip = iks->k_rip;
-		}
+			}
 		}
 		else if (machine_thread_get_kern_state(thread,
 			flavors[i].flavor, (thread_state_t) (header+hoffset),
@@ -168,7 +188,6 @@ kern_dump(void)
 	mach_vm_size_t	size = 0;
 	vm_prot_t	prot = 0;
 	vm_prot_t	maxprot = 0;
-	vm_inherit_t	inherit = 0;
 	mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
 	vm_size_t	nflavors;
 	vm_size_t	i;
@@ -180,7 +199,6 @@ kern_dump(void)
 
 	int error = 0;
 	int panic_error = 0;
-	unsigned int mach_section_count = 0;
 
 	map = kernel_map;
 
@@ -196,7 +214,7 @@ kern_dump(void)
 		tstate_size += (uint32_t)(sizeof(mythread_state_flavor_t) +
 		    (flavors[i].count * sizeof(int)));
 
-	command_size = (uint32_t)((segment_count + mach_section_count) *
+	command_size = (uint32_t)((segment_count) *
 	    sizeof(struct segment_command_64) +
 	    thread_count * sizeof(struct thread_command) +
 	    tstate_size * thread_count);
@@ -214,7 +232,7 @@ kern_dump(void)
 	mh64->cputype = cpu_type();
 	mh64->cpusubtype = cpu_subtype();
 	mh64->filetype = MH_CORE;
-	mh64->ncmds = segment_count + thread_count + mach_section_count;
+	mh64->ncmds = segment_count + thread_count;
 	mh64->sizeofcmds = command_size;
 	mh64->flags = 0;
 	mh64->reserved = 0;
@@ -281,7 +299,6 @@ kern_dump(void)
 
 		prot = vbr.protection;
 		maxprot = vbr.max_protection;
-		inherit = vbr.inheritance;
 
 		/*
 		 *	Fill in segment command structure.
diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile
index 3b742c63d..cf8f5539e 100644
--- a/osfmk/kern/Makefile
+++ b/osfmk/kern/Makefile
@@ -18,6 +18,7 @@ EXPORT_ONLY_FILES = \
 	cpu_data.h \
 	debug.h \
 	etimer.h \
+	extmod_statistics.h \
 	ipc_mig.h \
 	ipc_misc.h \
 	kalloc.h \
diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c
index b6540f92d..e7b895598 100644
--- a/osfmk/kern/ast.c
+++ b/osfmk/kern/ast.c
@@ -80,6 +80,10 @@
 #include <kern/wait_queue.h>
 #include <mach/policy.h>
 #include <machine/trap.h> // for CHUD AST hook
+#include <machine/pal_routines.h>
+
+
+volatile perfASTCallback perfASTHook;
 
 
 void
@@ -99,14 +103,14 @@ ast_taken(
 	boolean_t		preempt_trap = (reasons == AST_PREEMPTION);
 	ast_t			*myast = ast_pending();
 	thread_t		thread = current_thread();
-	perfCallback	perf_hook = perfASTHook;
+	perfASTCallback	perf_hook = perfASTHook;
 
 	/*
 	 * CHUD hook - all threads including idle processor threads
 	 */
 	if (perf_hook) {
 		if (*myast & AST_CHUD_ALL) {
-			(*perf_hook)(0, NULL, 0, 0);
+			(*perf_hook)(reasons, myast);
 			
 			if (*myast == AST_NONE)
 				return;
@@ -189,6 +193,7 @@ ast_check(
 	thread_t			thread = processor->active_thread;
 
 	processor->current_pri = thread->sched_pri;
+	processor->current_thmode = thread->sched_mode;
 	if (	processor->state == PROCESSOR_RUNNING		||
 			processor->state == PROCESSOR_SHUTDOWN		) {
 		ast_t			preempt;
@@ -196,6 +201,8 @@ ast_check(
 		/*
 		 *	Propagate thread ast to processor.
 		 */
+		pal_ast_check(thread);
+
 		ast_propagate(thread->ast);
 
 		/*
diff --git a/osfmk/kern/audit_sessionport.c b/osfmk/kern/audit_sessionport.c
index f42000464..7e8ee9c30 100644
--- a/osfmk/kern/audit_sessionport.c
+++ b/osfmk/kern/audit_sessionport.c
@@ -30,12 +30,13 @@
 #include <ipc/ipc_port.h>
 #include <kern/ipc_kobject.h>
 #include <kern/audit_sessionport.h>
+#include <libkern/OSAtomic.h>
 
 #if CONFIG_AUDIT
 /*
  * audit_session_mksend
  *
- * Description: Obtain a send right for given audit session information. 
+ * Description: Obtain a send right for given audit session.
  *
  * Parameters:	*aia_p		Audit session information to assosiate with
  * 				the new port.
@@ -45,48 +46,60 @@
  * Returns:	!NULL		Resulting send right.	
  * 		NULL		Failed to allocate port (due to lack of memory
  * 				resources).
- *
- * 		*sessionport	The session port that may have been allocated.
- *
- * Notes: On return, sendport will be set to the new send right on success,
- *	  or null/dead on error.
+
+ * Assumptions: Caller holds a reference on the session during the call.
+ *		If there were no outstanding send rights against the port,
+ *		hold a reference on the session and arm a new no-senders
+ *		notification to determine when to release that reference.
+ *		Otherwise, by creating an additional send right, we share
+ *		the port's reference until all send rights go away.
  */
 ipc_port_t
 audit_session_mksend(struct auditinfo_addr *aia_p, ipc_port_t *sessionport)
 {
-	ipc_port_t notifyport;
 	ipc_port_t sendport = IPC_PORT_NULL;
+	ipc_port_t port;
 
 	/*
-	 * If we have an existing, active session port then use it. 
+	 * If we don't have an existing session port, then create one.
 	 */
-	sendport = ipc_port_make_send(*sessionport);
-	if (IP_VALID(sendport)) {
-		ip_lock(sendport);
-		if (ip_active(sendport) && 
-		    IKOT_AU_SESSIONPORT == ip_kotype(sendport)) {
-			ip_unlock(sendport);
-			return (sendport);
-		}
-		ip_unlock(sendport);
-		ipc_port_release_send(sendport);
+	port = *sessionport;
+	if (!IP_VALID(port)) {
+		ipc_port_t new_port = ipc_port_alloc_kernel();
+		if (!IP_VALID(new_port))
+			return new_port;
+		ipc_kobject_set(new_port, (ipc_kobject_t)aia_p, IKOT_AU_SESSIONPORT);
+		if (!OSCompareAndSwapPtr(port, new_port, sessionport))
+			ipc_port_dealloc_kernel(new_port);
+		port = *sessionport;
 	}
 
+	assert(ip_active(port) && IKOT_AU_SESSIONPORT == ip_kotype(port));
+	sendport = ipc_port_make_send(port);
+
 	/*
-	 * Otherwise, create a new one for this session.
+	 * If we don't have a no-senders notification outstanding against
+	 * the port, take a reference on the session and request one.
 	 */
-	*sessionport = ipc_port_alloc_kernel();
-	if (IP_VALID(*sessionport)) {
-		ipc_kobject_set(*sessionport, (ipc_kobject_t)aia_p,
-		    IKOT_AU_SESSIONPORT);
-
-		/* Request a no-senders notification. */
-		notifyport = ipc_port_make_sonce(*sessionport);
-		ip_lock(*sessionport);
-		/* unlocked by ipc_port_nsrequest */
-		ipc_port_nsrequest(*sessionport, 1, notifyport, &notifyport);
+	if (IP_NULL == port->ip_nsrequest) {
+		ipc_port_t notifyport;
+
+		audit_session_aiaref(aia_p);
+
+		/* Need a send-once right for the target of the notification */
+		notifyport = ipc_port_make_sonce(port);
+
+		/* Request a no-senders notification (at the new make-send threshold) */
+		ip_lock(port);
+		ipc_port_nsrequest(port, port->ip_mscount, notifyport, &notifyport);
+		/* port unlocked */
+
+		if (IP_NULL != notifyport) {
+			/* race requesting notification */
+			audit_session_aiaunref(aia_p);
+			ipc_port_release_sonce(notifyport);
+		}
 	}
-	sendport = ipc_port_make_send(*sessionport);
 
 	return (sendport);
 }
@@ -113,10 +126,12 @@ audit_session_porttoaia(ipc_port_t port)
 
 	if (IP_VALID(port)) {
 		ip_lock(port);
-		if (ip_active(port) && IKOT_AU_SESSIONPORT == ip_kotype(port))
+		if (IKOT_AU_SESSIONPORT == ip_kotype(port)) {
+			assert(ip_active(port));
 			aia_p = (struct auditinfo_addr *)port->ip_kobject;
+		}
 		ip_unlock(port);
-	} 
+	}
 
 	return (aia_p);
 }
@@ -149,28 +164,50 @@ audit_session_nosenders(mach_msg_header_t *msg)
 	ipc_port_t notifyport;
 	struct auditinfo_addr *port_aia_p = NULL;
 
-	if (!IP_VALID(port))
-		return;
+	assert(IKOT_AU_SESSIONPORT == ip_kotype(port));
 	ip_lock(port);
-	if (ip_active(port) && IKOT_AU_SESSIONPORT == ip_kotype(port)) {
-		port_aia_p = (struct auditinfo_addr *)port->ip_kobject;
-		assert(NULL != port_aia_p);
-		if (port->ip_mscount <= notification->not_count)
-			ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
-		else {
-			/* re-arm the notification */
-			ip_unlock(port);
-			notifyport = ipc_port_make_sonce(port);
-			ip_lock(port);
-			/* unlocked by ipc_port_nsrequest */
-			ipc_port_nsrequest(port, port->ip_mscount, notifyport,
-			    &notifyport);
-			return;
+	assert(ip_active(port));
+	port_aia_p = (struct auditinfo_addr *)port->ip_kobject;
+	assert(NULL != port_aia_p);
+
+	/*
+	 * if new send rights have been made since the last notify
+	 * request, re-arm the notification with the new threshold.
+	 */
+	if (port->ip_mscount > notification->not_count) {
+		ip_unlock(port);
+		notifyport = ipc_port_make_sonce(port);
+		ip_lock(port);
+		ipc_port_nsrequest(port, port->ip_mscount, notifyport, &notifyport);
+		/* port unlocked */
+
+		if (IP_NULL != notifyport) {
+			/* race re-arming the notification */
+			ipc_port_release_sonce(notifyport);
+			audit_session_aiaunref(port_aia_p);
 		}
+		return;
 	}
+
+	/*
+	 * Otherwise, no more extant send rights, so release the
+	 * reference held on the session by those send rights.
+	 */
 	ip_unlock(port);
-	if (NULL != port_aia_p)
-		audit_session_portaiadestroy(port_aia_p);
-	ipc_port_dealloc_kernel(port);
+	audit_session_aiaunref(port_aia_p);
+}
+
+void
+audit_session_portdestroy(ipc_port_t *sessionport)
+{
+	ipc_port_t port = *sessionport;
+
+	if (IP_VALID(port)) {
+		assert (ip_active(port));
+		assert(IKOT_AU_SESSIONPORT == ip_kotype(port));
+		ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE);
+		ipc_port_dealloc_kernel(port);
+		*sessionport = IP_NULL;
+	}
 }
 #endif /* CONFIG_AUDIT */
diff --git a/osfmk/kern/audit_sessionport.h b/osfmk/kern/audit_sessionport.h
index 5a26f3451..8b6a85477 100644
--- a/osfmk/kern/audit_sessionport.h
+++ b/osfmk/kern/audit_sessionport.h
@@ -31,11 +31,11 @@
 
 struct auditinfo_addr;
 
-ipc_port_t audit_session_mksend(struct auditinfo_addr *aia_p,
-    ipc_port_t *sessionport);
+ipc_port_t audit_session_mksend(struct auditinfo_addr *, ipc_port_t *);
 struct auditinfo_addr *audit_session_porttoaia(ipc_port_t);
-void audit_session_portaiadestroy(struct auditinfo_addr *);
+void audit_session_portdestroy(ipc_port_t *);
 void audit_session_nosenders(mach_msg_header_t *);
-
+void audit_session_aiaref(struct auditinfo_addr *);
+void audit_session_aiaunref(struct auditinfo_addr *);
 #endif /* _KERN_AUDIT_SESSIONPORT_H_ */
 #endif /* KERNEL_PRIVATE */
diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c
index 822c07ce9..07de86ef0 100644
--- a/osfmk/kern/bsd_kern.c
+++ b/osfmk/kern/bsd_kern.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -95,6 +95,15 @@ void *get_bsdthread_info(thread_t th)
 	return(th->uthread);
 }
 
+/*
+ * XXX
+ */
+int get_thread_lock_count(thread_t th);		/* forced forward */
+int get_thread_lock_count(thread_t th)
+{
+ 	return(th->mutex_count);
+}
+
 /*
  * XXX: wait for BSD to  fix signal code
  * Until then, we cannot block here.  We know the task
@@ -135,7 +144,7 @@ get_signalact(
 			!queue_end(&task->threads, (queue_entry_t)inc); ) {
 		thread_mtx_lock(inc);
 		if (inc->active &&
-				(inc->sched_mode & TH_MODE_ISABORTED) != TH_MODE_ABORT) {
+				(inc->sched_flags & TH_SFLAG_ABORTED_MASK) != TH_SFLAG_ABORT) {
 			thread = inc;
 			break;
 		}
@@ -185,7 +194,7 @@ check_actforsig(
 			thread_mtx_lock(inc);
 
 			if (inc->active  && 
-					(inc->sched_mode & TH_MODE_ISABORTED) != TH_MODE_ABORT) {
+					(inc->sched_flags & TH_SFLAG_ABORTED_MASK) != TH_SFLAG_ABORT) {
 				result = KERN_SUCCESS;
 				break;
 			}
@@ -282,7 +291,7 @@ int is_64signalregset(void)
  * returned.
  */
 vm_map_t
-swap_task_map(task_t task, thread_t thread, vm_map_t map)
+swap_task_map(task_t task, thread_t thread, vm_map_t map, boolean_t doswitch)
 {
 	vm_map_t old_map;
 
@@ -290,8 +299,12 @@ swap_task_map(task_t task, thread_t thread, vm_map_t map)
 		panic("swap_task_map");
 
 	task_lock(task);
+	mp_disable_preemption();
 	old_map = task->map;
 	thread->map = task->map = map;
+	if (doswitch)
+		pmap_switch(map->pmap);
+	mp_enable_preemption();
 	task_unlock(task);
 
 #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0
@@ -452,7 +465,7 @@ boolean_t
 thread_should_abort(
 	thread_t th)
 {
-	return ((th->sched_mode & TH_MODE_ISABORTED) == TH_MODE_ABORT);
+	return ((th->sched_flags & TH_SFLAG_ABORTED_MASK) == TH_SFLAG_ABORT);
 }
 
 /*
@@ -470,14 +483,14 @@ current_thread_aborted (
 	thread_t th = current_thread();
 	spl_t s;
 
-	if ((th->sched_mode & TH_MODE_ISABORTED) == TH_MODE_ABORT &&
+	if ((th->sched_flags & TH_SFLAG_ABORTED_MASK) == TH_SFLAG_ABORT &&
 			(th->options & TH_OPT_INTMASK) != THREAD_UNINT)
 		return (TRUE);
-	if (th->sched_mode & TH_MODE_ABORTSAFELY) {
+	if (th->sched_flags & TH_SFLAG_ABORTSAFELY) {
 		s = splsched();
 		thread_lock(th);
-		if (th->sched_mode & TH_MODE_ABORTSAFELY)
-			th->sched_mode &= ~TH_MODE_ISABORTED;
+		if (th->sched_flags & TH_SFLAG_ABORTSAFELY)
+			th->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
 		thread_unlock(th);
 		splx(s);
 	}
@@ -532,7 +545,9 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
 	vm_map_t map;
 	task_absolutetime_info_data_t   tinfo;
 	thread_t thread;
-	int cswitch = 0, numrunning = 0;
+	uint32_t cswitch = 0, numrunning = 0;
+	uint32_t syscalls_unix = 0;
+	uint32_t syscalls_mach = 0;
 	
 	map = (task == kernel_task)? kernel_map: task->map;
 
@@ -563,6 +578,9 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
 		tval = timer_grab(&thread->system_timer);
 		tinfo.threads_system += tval;
 		tinfo.total_system += tval;
+
+		syscalls_unix += thread->syscalls_unix;
+		syscalls_mach += thread->syscalls_mach;
 	}
 
 	ptinfo->pti_total_system = tinfo.total_system;
@@ -575,8 +593,8 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo)
 	ptinfo->pti_cow_faults = task->cow_faults;
 	ptinfo->pti_messages_sent = task->messages_sent;
 	ptinfo->pti_messages_received = task->messages_received;
-	ptinfo->pti_syscalls_mach = task->syscalls_mach;
-	ptinfo->pti_syscalls_unix = task->syscalls_unix;
+	ptinfo->pti_syscalls_mach = task->syscalls_mach + syscalls_mach;
+	ptinfo->pti_syscalls_unix = task->syscalls_unix + syscalls_unix;
 	ptinfo->pti_csw = task->c_switch + cswitch;
 	ptinfo->pti_threadnum = task->thread_count;
 	ptinfo->pti_numrunning = numrunning;
@@ -598,13 +616,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_interna
 
 	for (thact  = (thread_t)queue_first(&task->threads);
 			!queue_end(&task->threads, (queue_entry_t)thact); ) {
-#if defined(__ppc__) || defined(__arm__)
 		if (thact->machine.cthread_self == thaddr)
-#elif defined (__i386__) || defined (__x86_64__)
-		if (thact->machine.pcb->cthread_self == thaddr)
-#else
-#error architecture not supported
-#endif
 		{
 		
 			count = THREAD_BASIC_INFO_COUNT;
@@ -658,13 +670,7 @@ fill_taskthreadlist(task_t task, void * buffer, int thcount)
 
 	for (thact  = (thread_t)queue_first(&task->threads);
 			!queue_end(&task->threads, (queue_entry_t)thact); ) {
-#if defined(__ppc__) || defined(__arm__)
 		thaddr = thact->machine.cthread_self;
-#elif defined (__i386__) || defined (__x86_64__)
-		thaddr = thact->machine.pcb->cthread_self;
-#else
-#error architecture not supported
-#endif
 		*uptr++ = thaddr;
 		numthr++;
 		if (numthr >= thcount)
diff --git a/osfmk/kern/call_entry.h b/osfmk/kern/call_entry.h
index 57ab51d5e..36f47a31b 100644
--- a/osfmk/kern/call_entry.h
+++ b/osfmk/kern/call_entry.h
@@ -35,41 +35,116 @@
 #ifdef MACH_KERNEL_PRIVATE
 #include <kern/queue.h>
 
-typedef void			*call_entry_param_t;
-typedef void			(*call_entry_func_t)(
-								call_entry_param_t		param0,
-								call_entry_param_t		param1);
+typedef void		*call_entry_param_t;
+typedef void		(*call_entry_func_t)(
+				call_entry_param_t	param0,
+				call_entry_param_t	param1);
 
 typedef struct call_entry {
-    queue_chain_t		q_link;
-	queue_t				queue;
+    queue_chain_t	q_link;
+    queue_head_t	*queue;
     call_entry_func_t	func;
     call_entry_param_t	param0;
     call_entry_param_t	param1;
-    uint64_t			deadline;
+    uint64_t		deadline;
 } call_entry_data_t;
 
-typedef struct call_entry		*call_entry_t;
+typedef struct call_entry	*call_entry_t;
 
-extern queue_t		call_entry_enqueue_deadline(
-							call_entry_t		entry,
-							queue_t				queue,
-							uint64_t			deadline);
 
-extern queue_t		call_entry_enqueue_tail(
-							call_entry_t	entry,
-							queue_t			queue);
-
-extern queue_t		call_entry_dequeue(
-							call_entry_t	entry);
-
-#define	call_entry_setup(entry, pfun, p0)				\
-MACRO_BEGIN												\
+#define	call_entry_setup(entry, pfun, p0)			\
+MACRO_BEGIN							\
 	(entry)->func		= (call_entry_func_t)(pfun);	\
-	(entry)->param0		= (call_entry_param_t)(p0);		\
-	(entry)->queue		= NULL;							\
+	(entry)->param0		= (call_entry_param_t)(p0);	\
+	(entry)->queue		= NULL;				\
 MACRO_END
 
+#define qe(x)		((queue_entry_t)(x))
+#define CE(x)		((call_entry_t)(x))
+
+static __inline__ queue_head_t *
+call_entry_enqueue_tail(
+        call_entry_t            entry,
+        queue_t                 queue)
+{
+        queue_t                 old_queue = entry->queue;
+
+        if (old_queue != NULL)
+                (void)remque(qe(entry));
+
+        enqueue_tail(queue, qe(entry));
+
+        entry->queue = queue;
+
+        return (old_queue);
+}
+
+static __inline__ queue_head_t *
+call_entry_dequeue(
+	call_entry_t		entry)
+{
+        queue_t                 old_queue = entry->queue;
+
+	if (old_queue != NULL) {
+		(void)remque(qe(entry));
+
+		entry->queue = NULL;
+	}
+	return (old_queue);
+}
+
+static __inline__ queue_head_t *
+call_entry_enqueue_deadline(
+	call_entry_t			entry,
+	queue_head_t			*queue,
+	uint64_t			deadline)
+{
+	queue_t		old_queue = entry->queue;
+	call_entry_t	current;
+
+	if (old_queue != queue || entry->deadline < deadline) {
+		if (old_queue == NULL) {
+			current = CE(queue_first(queue));
+		} else if (old_queue != queue) {
+			(void)remque(qe(entry));
+			current = CE(queue_first(queue));
+		} else {
+			current = CE(queue_next(qe(entry)));
+			(void)remque(qe(entry));
+		}
+
+		while (TRUE) {
+			if (queue_end(queue, qe(current)) ||
+			    deadline < current->deadline) {
+				current = CE(queue_prev(qe(current)));
+				break;
+			}
+
+			current = CE(queue_next(qe(current)));
+		}
+		insque(qe(entry), qe(current));
+	}
+	else
+	if (deadline < entry->deadline) {
+		current = CE(queue_prev(qe(entry)));
+
+		(void)remque(qe(entry));
+
+		while (TRUE) {
+			if (queue_end(queue, qe(current)) ||
+			    current->deadline <= deadline) {
+				break;
+			}
+
+			current = CE(queue_prev(qe(current)));
+		}
+		insque(qe(entry), qe(current));
+	}
+	entry->queue = queue;
+	entry->deadline = deadline;
+
+	return (old_queue);
+}
 #endif /* MACH_KERNEL_PRIVATE */
 
 #endif /* _KERN_CALL_ENTRY_H_ */
diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c
index fd2e29797..e9c487ad6 100644
--- a/osfmk/kern/clock.c
+++ b/osfmk/kern/clock.c
@@ -50,7 +50,7 @@
 uint32_t	hz_tick_interval = 1;
 
 
-decl_simple_lock_data(static,clock_lock)
+decl_simple_lock_data(,clock_lock)
 
 #define clock_lock()	\
 	simple_lock(&clock_lock)
@@ -72,7 +72,6 @@ decl_simple_lock_data(static,clock_lock)
  *	where CONV converts absolute time units into seconds and a fraction.
  */
 static struct clock_calend {
-
 	uint64_t	epoch;
 	uint64_t	offset;
 
@@ -161,11 +160,6 @@ clock_config(void)
 	thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL);
 
 	clock_oldconfig();
-
-	/*
-	 * Initialize the timer callouts.
-	 */
-	timer_call_initialize();
 }
 
 /*
@@ -246,6 +240,15 @@ clock_get_calendar_microtime(
 	if (clock_calend.adjdelta < 0) {
 		uint32_t	t32;
 
+		/* 
+		 * Since offset is decremented during a negative adjustment,
+		 * ensure that time increases monotonically without going
+		 * temporarily backwards.
+		 * If the delta has not yet passed, now is set to the start
+		 * of the current adjustment period; otherwise, we're between
+		 * the expiry of the delta and the next call to calend_adjust(),
+		 * and we offset accordingly.
+		 */
 		if (now > clock_calend.adjstart) {
 			t32 = (uint32_t)(now - clock_calend.adjstart);
 
@@ -305,6 +308,7 @@ clock_get_calendar_nanotime(
 	now += clock_calend.offset;
 
 	absolutetime_to_microtime(now, secs, nanosecs);
+
 	*nanosecs *= NSEC_PER_USEC;
 
 	*secs += (clock_sec_t)clock_calend.epoch;
@@ -408,6 +412,7 @@ clock_set_calendar_microtime(
 	 *	Set the new calendar epoch.
 	 */
 	clock_calend.epoch = secs;
+
 	nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
 
 	/*
@@ -473,6 +478,7 @@ clock_initialize_calendar(void)
 		 *	Set the new calendar epoch.
 		 */
 		clock_calend.epoch = secs;
+
 		nanoseconds_to_absolutetime((uint64_t)microsecs * NSEC_PER_USEC, &clock_calend.offset);
 
 		/*
@@ -538,7 +544,7 @@ clock_adjtime(
 	interval = calend_set_adjustment(secs, microsecs);
 	if (interval != 0) {
 		calend_adjdeadline = mach_absolute_time() + interval;
-		if (!timer_call_enter(&calend_adjcall, calend_adjdeadline))
+		if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_CRITICAL))
 			calend_adjactive++;
 	}
 	else
@@ -558,47 +564,103 @@ calend_set_adjustment(
 	int64_t			total, ototal;
 	uint32_t		interval = 0;
 
+	/* 
+	 * Compute the total adjustment time in nanoseconds.
+	 */
 	total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC;
 
+	/* 
+	 * Disable commpage gettimeofday().
+	 */
 	commpage_disable_timestamp();
 
+	/* 
+	 * Get current absolute time.
+	 */
 	now = mach_absolute_time();
 
+	/* 
+	 * Save the old adjustment total for later return.
+	 */
 	ototal = calend_adjtotal;
 
+	/*
+	 * Is a new correction specified?
+	 */
 	if (total != 0) {
+		/*
+		 * Set delta to the standard, small, adjustment skew.
+		 */
 		int32_t		delta = calend_adjskew;
 
 		if (total > 0) {
+			/*
+			 * Positive adjustment. If greater than the preset 'big' 
+			 * threshold, slew at a faster rate, capping if necessary.
+			 */
 			if (total > calend_adjbig)
 				delta *= 10;
 			if (delta > total)
 				delta = (int32_t)total;
 
+			/* 
+			 * Convert the delta back from ns to absolute time and store in adjoffset.
+			 */
 			nanoseconds_to_absolutetime((uint64_t)delta, &t64);
 			clock_calend.adjoffset = (uint32_t)t64;
 		}
 		else {
+			/*
+			 * Negative adjustment; therefore, negate the delta. If 
+			 * greater than the preset 'big' threshold, slew at a faster 
+			 * rate, capping if necessary.
+			 */
 			if (total < -calend_adjbig)
 				delta *= 10;
 			delta = -delta;
 			if (delta < total)
 				delta = (int32_t)total;
 
+			/* 
+			 * Save the current absolute time. Subsequent time operations occuring
+			 * during this negative correction can make use of this value to ensure 
+			 * that time increases monotonically.
+			 */
 			clock_calend.adjstart = now;
 
+			/* 
+			 * Convert the delta back from ns to absolute time and store in adjoffset.
+			 */
 			nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
 			clock_calend.adjoffset = (uint32_t)t64;
 		}
 
+		/* 
+		 * Store the total adjustment time in ns. 
+		 */
 		calend_adjtotal = total;
+		
+		/* 
+		 * Store the delta for this adjustment period in ns. 
+		 */
 		clock_calend.adjdelta = delta;
 
+		/* 
+		 * Set the interval in absolute time for later return. 
+		 */
 		interval = calend_adjinterval;
 	}
-	else
+	else {
+		/* 
+		 * No change; clear any prior adjustment.
+		 */
 		calend_adjtotal = clock_calend.adjdelta = 0;
+	}
 
+	/* 
+	 * If an prior correction was in progress, return the
+	 * remaining uncorrected time from it. 
+	 */
 	if (ototal != 0) {
 		*secs = (long)(ototal / NSEC_PER_SEC);
 		*microsecs = (int)((ototal % NSEC_PER_SEC) / NSEC_PER_USEC);
@@ -627,7 +689,7 @@ calend_adjust_call(void)
 		if (interval != 0) {
 			clock_deadline_for_periodic_event(interval, mach_absolute_time(), &calend_adjdeadline);
 
-			if (!timer_call_enter(&calend_adjcall, calend_adjdeadline))
+			if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_CRITICAL))
 				calend_adjactive++;
 		}
 	}
@@ -661,21 +723,21 @@ calend_adjust(void)
 		}
 	}
 	else
-	if (delta < 0) {
-		clock_calend.offset -= clock_calend.adjoffset;
+		if (delta < 0) {
+			clock_calend.offset -= clock_calend.adjoffset;
 
-		calend_adjtotal -= delta;
-		if (delta < calend_adjtotal) {
-			clock_calend.adjdelta = delta = (int32_t)calend_adjtotal;
+			calend_adjtotal -= delta;
+			if (delta < calend_adjtotal) {
+				clock_calend.adjdelta = delta = (int32_t)calend_adjtotal;
 
-			nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
-			clock_calend.adjoffset = (uint32_t)t64;
+				nanoseconds_to_absolutetime((uint64_t)-delta, &t64);
+				clock_calend.adjoffset = (uint32_t)t64;
+			}
+
+			if (clock_calend.adjdelta != 0)
+				clock_calend.adjstart = now;
 		}
 
-		if (clock_calend.adjdelta != 0)
-			clock_calend.adjstart = now;
-	}
-	
 	if (clock_calend.adjdelta != 0)
 		interval = calend_adjinterval;
 
diff --git a/osfmk/kern/clock_oldops.c b/osfmk/kern/clock_oldops.c
index c268382d9..9a3e6c93f 100644
--- a/osfmk/kern/clock_oldops.c
+++ b/osfmk/kern/clock_oldops.c
@@ -766,7 +766,7 @@ set_alarm(
 	uint64_t	abstime;
 
 	nanotime_to_absolutetime(alarm_time->tv_sec, alarm_time->tv_nsec, &abstime);
-	timer_call_enter(&alarm_expire_timer, abstime);
+	timer_call_enter(&alarm_expire_timer, abstime, 0);
 }
 
 /*
diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c
index 39aa1f425..b6d146746 100644
--- a/osfmk/kern/debug.c
+++ b/osfmk/kern/debug.c
@@ -75,20 +75,19 @@
 #include <kdp/kdp_udp.h>
 #endif
 
-#ifdef	__ppc__
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#endif
-
 #if defined(__i386__) || defined(__x86_64__)
 #include <i386/cpu_threads.h>
 #include <i386/pmCPU.h>
 #endif
 
 #include <IOKit/IOPlatformExpert.h>
+#include <machine/pal_routines.h>
 
 #include <sys/kdebug.h>
 #include <libkern/OSKextLibPrivate.h>
+#include <libkern/OSAtomic.h>
+#include <libkern/kernel_mach_header.h>
+#include <uuid/uuid.h>
 
 unsigned int	halt_in_debugger = 0;
 unsigned int	switch_debugger = 0;
@@ -122,6 +121,7 @@ char *debug_buf_ptr = debug_buf;
 unsigned int debug_buf_size = sizeof(debug_buf);
 
 static char model_name[64];
+/* uuid_string_t */ char kernel_uuid[37]; 
 
 struct pasc {
   unsigned a: 7;
@@ -184,6 +184,14 @@ MACRO_END
 void
 panic_init(void)
 {
+	unsigned long uuidlen = 0;
+	void *uuid;
+
+	uuid = getuuidfromheader(&_mh_execute_header, &uuidlen);
+	if ((uuid != NULL) && (uuidlen == sizeof(uuid_t))) {
+		uuid_unparse_upper(*(uuid_t *)uuid, kernel_uuid);
+	}
+
 	simple_lock_init(&panic_lock, 0);
 	panic_is_inited = 1;
 	panic_caller = 0;
@@ -216,7 +224,7 @@ debug_log_init(void)
 
 void _consume_panic_args(int a __unused, ...)
 {
-    panic(NULL);
+    panic("panic");
 }
 
 void
@@ -227,7 +235,15 @@ panic(const char *str, ...)
 	thread_t thread;
 	wait_queue_t wq;
 
+#if	defined(__i386__) || defined(__x86_64__)
+	/* Attempt to display the unparsed panic string */
+	const char *tstr = str;
 
+	kprintf("Panic initiated, string: ");
+	while (tstr && *tstr)
+		kprintf("%c", *tstr++);
+	kprintf("\n");
+#endif
 	if (kdebug_enable)
 		kdbg_dump_trace_to_file("/var/tmp/panic.trace");
 
@@ -236,10 +252,6 @@ panic(const char *str, ...)
 
 	panic_safe();
 
-#ifdef	__ppc__
-	lastTrace = LLTraceSet(0);		/* Disable low-level tracing */
-#endif
-
 	thread = current_thread();		/* Get failing thread */
 	wq = thread->wait_queue;		/* Save the old value */
 	thread->wait_queue = NULL;		/* Clear the wait so we do not get double panics when we try locks */
@@ -340,6 +352,7 @@ debug_putc(char c)
 }
 
 /* In-place packing routines -- inefficient, but they're called at most once.
+ * Assumes "buflen" is a multiple of 8.
  */
 
 int packA(char *inbuf, uint32_t length, uint32_t buflen)
@@ -347,7 +360,7 @@ int packA(char *inbuf, uint32_t length, uint32_t buflen)
   unsigned int i, j = 0;
   pasc_t pack;
   
-  length = MIN(((length & ~7) +8), buflen);
+  length = MIN(((length + 7) & ~7), buflen);
 
   for (i = 0; i < length; i+=8)
     {
@@ -362,7 +375,7 @@ int packA(char *inbuf, uint32_t length, uint32_t buflen)
       bcopy ((char *) &pack, inbuf + j, 7);
       j += 7;
     }
-  return ((length * 7)/8);
+  return j;
 }
 
 void unpackA(char *inbuf, uint32_t length)
@@ -414,10 +427,20 @@ static void panic_display_model_name(void) {
 	if (ml_nofault_copy((vm_offset_t) &model_name, (vm_offset_t) &tmp_model_name, sizeof(model_name)) != sizeof(model_name))
 		return;
 
-	model_name[sizeof(model_name) - 1] = '\0';
+	tmp_model_name[sizeof(tmp_model_name) - 1] = '\0';
+
+	if (tmp_model_name[0] != 0)
+		kdb_printf("System model name: %s\n", tmp_model_name);
+}
+
+static void panic_display_kernel_uuid(void) {
+	char tmp_kernel_uuid[sizeof(kernel_uuid)];
+
+	if (ml_nofault_copy((vm_offset_t) &kernel_uuid, (vm_offset_t) &tmp_kernel_uuid, sizeof(kernel_uuid)) != sizeof(kernel_uuid))
+		return;
 
-	if (model_name[0] != 0)
-		kdb_printf("System model name: %s\n", model_name);
+	if (tmp_kernel_uuid[0] != '\0')
+		kdb_printf("Kernel UUID: %s\n", tmp_kernel_uuid);
 }
 
 static void panic_display_uptime(void) {
@@ -430,30 +453,37 @@ static void panic_display_uptime(void) {
 extern const char version[];
 extern char osversion[];
 
+static volatile uint32_t config_displayed = 0;
+
 __private_extern__ void panic_display_system_configuration(void) {
-	static volatile boolean_t config_displayed = FALSE;
 
 	panic_display_process_name();
-	if (config_displayed == FALSE) {
-		config_displayed = TRUE;
+	if (OSCompareAndSwap(0, 1, &config_displayed)) {
+		char buf[256];
+		if (strlcpy(buf, PE_boot_args(), sizeof(buf)))
+			kdb_printf("Boot args: %s\n", buf);
 		kdb_printf("\nMac OS version:\n%s\n",
 		    (osversion[0] != 0) ? osversion : "Not yet set");
 		kdb_printf("\nKernel version:\n%s\n",version);
+		panic_display_kernel_uuid();
+		panic_display_pal_info();
 		panic_display_model_name();
 		panic_display_uptime();
-#if	defined(__i386__) || defined(__x86_64__)
-		pmap_pagetable_corruption_msg_log(&kdb_printf);
-#endif /* i386 || x86_64 */
 		panic_display_zprint();
+#if CONFIG_ZLEAKS
+		panic_display_ztrace();
+#endif /* CONFIG_ZLEAKS */
 		kext_dump_panic_lists(&kdb_log);
 	}
 }
 
 extern zone_t		first_zone;
 extern unsigned int	num_zones, stack_total;
+extern unsigned long long stack_allocs;
 
 #if defined(__i386__) || defined (__x86_64__)
 extern unsigned int	inuse_ptepages_count;
+extern long long alloc_ptepages_count;
 #endif
 
 extern boolean_t	panic_include_zprint;
@@ -493,6 +523,37 @@ __private_extern__ void panic_display_zprint()
 	}
 }
 
+#if CONFIG_ZLEAKS
+extern boolean_t	panic_include_ztrace;
+extern struct ztrace* top_ztrace;
+/*
+ * Prints the backtrace most suspected of being a leaker, if we paniced in the zone allocator.
+ * top_ztrace and panic_include_ztrace comes from osfmk/kern/zalloc.c
+ */
+__private_extern__ void panic_display_ztrace(void)
+{
+	if(panic_include_ztrace == TRUE) {
+		unsigned int i = 0;
+		struct ztrace top_ztrace_copy;
+		
+		/* Make sure not to trip another panic if there's something wrong with memory */
+		if(ml_nofault_copy((vm_offset_t)top_ztrace, (vm_offset_t)&top_ztrace_copy, sizeof(struct ztrace)) == sizeof(struct ztrace)) {
+			kdb_printf("\nBacktrace suspected of leaking: (outstanding bytes: %lu)\n", (uintptr_t)top_ztrace_copy.zt_size);
+			/* Print the backtrace addresses */
+			for (i = 0; (i < top_ztrace_copy.zt_depth && i < MAX_ZTRACE_DEPTH) ; i++) {
+				kdb_printf("%p\n", top_ztrace_copy.zt_stack[i]);
+			}
+			/* Print any kexts in that backtrace, along with their link addresses so we can properly blame them */
+			kmod_panic_dump((vm_offset_t *)&top_ztrace_copy.zt_stack[0], top_ztrace_copy.zt_depth);
+		}
+		else {
+			kdb_printf("\nCan't access top_ztrace...\n");
+		}
+		kdb_printf("\n");
+	}
+}
+#endif /* CONFIG_ZLEAKS */
+
 #if !MACH_KDP
 static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}};
 
diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h
index 308435ece..66702bc16 100644
--- a/osfmk/kern/debug.h
+++ b/osfmk/kern/debug.h
@@ -40,8 +40,10 @@ struct thread_snapshot {
 	uint32_t 		nkern_frames;
 	uint32_t 		nuser_frames;
 	uint64_t 		wait_event;
-	uint64_t	 	continuation;
+	uint64_t 	 	continuation;
 	uint64_t 		thread_id;
+	uint64_t 		user_time;
+	uint64_t 		system_time;
 	int32_t  		state;
 	char			ss_flags;
 } __attribute__ ((packed));
@@ -50,6 +52,13 @@ struct task_snapshot {
 	uint32_t		snapshot_magic;
 	int32_t			pid;
 	uint32_t		nloadinfos;
+	uint64_t		user_time_in_terminated_threads;
+	uint64_t		system_time_in_terminated_threads;
+	int				suspend_count; 
+	int				task_size;    // pages
+	int				faults;	 	// number of page faults
+	int				pageins; 	// number of actual pageins
+	int				cow_faults;	// number of copy-on-write faults
 	char			ss_flags;
 	/* We restrict ourselves to a statically defined
 	 * (current as of 2009) length for the
@@ -59,19 +68,34 @@ struct task_snapshot {
 	char			p_comm[17];
 } __attribute__ ((packed));
 
+
+struct mem_snapshot {
+	uint32_t	snapshot_magic;
+	uint32_t	free_pages;
+	uint32_t	active_pages;
+	uint32_t	inactive_pages;
+	uint32_t	purgeable_pages;
+	uint32_t	wired_pages;
+	uint32_t	speculative_pages;
+	uint32_t	throttled_pages;
+} __attribute__((packed));
+
 enum {
 	kUser64_p = 0x1,
 	kKernel64_p = 0x2,
-	kHasDispatchSerial = 0x4
+	kHasDispatchSerial = 0x4,
+	kTerminatedSnapshot = 0x8
 };
 
 enum {
-    STACKSHOT_GET_DQ = 0x1,
-    STACKSHOT_SAVE_LOADINFO = 0x2
+	STACKSHOT_GET_DQ = 0x1,
+	STACKSHOT_SAVE_LOADINFO = 0x2,
+	STACKSHOT_GET_GLOBAL_MEM_STATS = 0x4
 };
 
 #define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface
 #define STACKSHOT_TASK_SNAPSHOT_MAGIC 0xdecafbad
+#define STACKSHOT_MEM_SNAPSHOT_MAGIC  0xabcddcba
 
 #endif /* __APPLE_API_UNSTABLE */
 #endif /* __APPLE_API_PRIVATE */
@@ -81,6 +105,7 @@ enum {
 extern unsigned int	systemLogDiags;
 extern char debug_buf[];
 extern unsigned int	debug_boot_arg;
+extern char kernel_uuid[];
 
 #ifdef MACH_KERNEL_PRIVATE
 
@@ -93,12 +118,12 @@ extern unsigned int     current_debugger;
 #define KDP_CUR_DB      0x1
 #define KDB_CUR_DB      0x2
 
-extern unsigned int     active_debugger;
+extern unsigned int 	active_debugger;
 extern unsigned int 	debug_mode; 
 extern unsigned int 	disable_debug_output; 
 
-extern unsigned int     panicDebugging;
-extern unsigned int	logPanicDataToScreen;
+extern unsigned int 	panicDebugging;
+extern unsigned int 	logPanicDataToScreen;
 
 extern int db_run_mode;
 
@@ -130,7 +155,9 @@ void	unpackA(char *inbuf, uint32_t length);
 
 void	panic_display_system_configuration(void);
 void	panic_display_zprint(void);
-
+#if CONFIG_ZLEAKS
+void	panic_display_ztrace(void);
+#endif /* CONFIG_ZLEAKS */
 #endif /* MACH_KERNEL_PRIVATE */
 
 #define DB_HALT		0x1
diff --git a/osfmk/kern/etimer.h b/osfmk/kern/etimer.h
index de66f9749..8c3674288 100644
--- a/osfmk/kern/etimer.h
+++ b/osfmk/kern/etimer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -58,6 +58,17 @@ extern int setPop(uint64_t time);
 
 extern void etimer_resync_deadlines(void);
 
+extern uint32_t etimer_queue_migrate(int target_cpu);
+
+/* Kernel trace events associated with timers */
+#define DECR_TRAP_LATENCY	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0)
+#define DECR_SET_DEADLINE	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1)
+#define DECR_TIMER_CALLOUT	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2)
+#define DECR_PM_DEADLINE	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3)
+#define DECR_TIMER_MIGRATE	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 4)
+#define DECR_RDHPET		MACHDBG_CODE(DBG_MACH_EXCP_DECI, 5)
+#define DECR_SET_TSC_DEADLINE	MACHDBG_CODE(DBG_MACH_EXCP_DECI, 6)
+
 #endif /* _KERN_ETIMER_H_ */
 
 #endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/kern/exception.c b/osfmk/kern/exception.c
index a3578e1a0..27082522f 100644
--- a/osfmk/kern/exception.c
+++ b/osfmk/kern/exception.c
@@ -438,7 +438,6 @@ kern_return_t abnormal_exit_notify(mach_exception_data_type_t exccode,
  */
 kern_return_t sys_perf_notify(thread_t thread, int pid) 
 {
-
 	host_priv_t		hostp;
 	struct exception_action *excp;
 	ipc_port_t		xport;
diff --git a/osfmk/kern/extmod_statistics.c b/osfmk/kern/extmod_statistics.c
new file mode 100644
index 000000000..4eb26d1ce
--- /dev/null
+++ b/osfmk/kern/extmod_statistics.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+
+#include <kern/host.h>
+#include <kern/thread.h>
+#include <kern/task.h>
+#include <kern/extmod_statistics.h>
+#include <libkern/OSAtomic.h>
+
+#include <uuid/uuid.h>
+
+/*
+ * This code module adds statistics to track when
+ * a userspace task is modified by another userspace
+ * task. This can facilitate triage of crashes
+ * and abberant behavior, which are not expected
+ * to occur when the program is running in its
+ * qualified environment.
+ *
+ * We assume the target task has a lifecycle lock
+ * that will prevent it from exiting
+ * (task_reference/task_reference_internal), which
+ * should be called either explicitly, or implicitly
+ * via MIG glue code (convert_port_to_task).
+ *
+ * Host-wide statistics don't asssume any locks are
+ * held, and use atomic operations.
+ *
+ * If we can detect that the kernel proper is
+ * performing these operations, don't count
+ * it as an external modification. Some of the
+ * external modification routines are called
+ * by the kernel during thread setup, in which
+ * case we rename the userspace entrypoint called
+ * by the MIG demuxer to have a "_from_user" suffix.
+ */
+
+/* externs for BSD kernel */
+extern void fslog_extmod_msgtracer(void *, void *);
+
+/* local routines */
+static void
+extmod_statistics_log(task_t current_task, task_t target);
+
+void
+extmod_statistics_incr_task_for_pid(task_t target)
+{
+	task_t ctask = current_task();
+
+	if ((ctask == kernel_task) || (target == TASK_NULL))
+		return;
+
+	if (target != ctask) {
+		ctask->extmod_statistics.task_for_pid_caller_count++;
+		target->extmod_statistics.task_for_pid_count++;
+		OSIncrementAtomic64(&host_extmod_statistics.task_for_pid_count);
+	}
+}
+
+void
+extmod_statistics_incr_thread_set_state(thread_t target)
+{
+	task_t ctask = current_task();
+	task_t ttask;
+
+	if ((ctask == kernel_task) || (target == THREAD_NULL))
+		return;
+
+	ttask = get_threadtask(target);
+
+	if (ttask == TASK_NULL)
+		return;
+
+	if (ttask != ctask) {
+		ctask->extmod_statistics.thread_set_state_caller_count++;
+		ttask->extmod_statistics.thread_set_state_count++;
+		OSIncrementAtomic64(&host_extmod_statistics.thread_set_state_count);
+	}
+}
+
+void
+extmod_statistics_incr_thread_create(task_t target)
+{
+	task_t ctask = current_task();
+
+	if ((ctask == kernel_task) || (target == TASK_NULL))
+		return;
+
+	if (target != ctask) {
+		ctask->extmod_statistics.thread_creation_caller_count++;
+		target->extmod_statistics.thread_creation_count++;
+		OSIncrementAtomic64(&host_extmod_statistics.thread_creation_count);
+
+		extmod_statistics_log(ctask, target);
+	}
+}
+
+static void
+extmod_statistics_log(task_t current_task, task_t target)
+{
+	void *c_proc;
+	void *t_proc;
+
+	c_proc = get_bsdtask_info(current_task);
+	t_proc = get_bsdtask_info(target);
+	if (c_proc && t_proc) {
+		fslog_extmod_msgtracer(c_proc, t_proc);
+	}
+}
diff --git a/osfmk/ppc/PPCcalls.c b/osfmk/kern/extmod_statistics.h
similarity index 71%
rename from osfmk/ppc/PPCcalls.c
rename to osfmk/kern/extmod_statistics.h
index 39203ec5d..5bf20066a 100644
--- a/osfmk/ppc/PPCcalls.c
+++ b/osfmk/kern/extmod_statistics.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,25 +25,22 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <mach/vm_prot.h>
-#include <ppc/pmap.h>
-#include <ppc/exception.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/vmachmon.h>
-#include <ppc/PseudoKernel.h>
-#include <ppc/misc_protos.h>
-#include <ppc/hw_perfmon.h>
-
 /*
- *	To add a new entry:
- *		Add an "PPCcall(routine)" to the table in ppc/PPCcalls.h
+ *	kern/extmod_statistics.h
  *
- *		Add trap definition to mach/ppc/syscall_sw.h and
- *		recompile user library.
+ *	Definitions for statistics related to external
+ *  modification of a task by another agent on the system.
  *
  */
 
-#include <ppc/PPCcalls.h>
+#ifndef	_KERN_EXTMOD_STATISTICS_H_
+#define _KERN_EXTMODE_STATISTICS_H_
+
+#include <kern/task.h>
+#include <mach/vm_types.h>
+
+extern void extmod_statistics_incr_task_for_pid(task_t target);
+extern void extmod_statistics_incr_thread_set_state(thread_t target);
+extern void extmod_statistics_incr_thread_create(task_t target);
+
+#endif	/* _KERN_EXTMOD_STATISTICS_H_ */
diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c
index b56317bf9..018ea6b78 100644
--- a/osfmk/kern/hibernate.c
+++ b/osfmk/kern/hibernate.c
@@ -51,14 +51,17 @@ hibernate_setup(IOHibernateImageHeader * header,
                         boolean_t vmflush,
 			hibernate_page_list_t ** page_list_ret,
 			hibernate_page_list_t ** page_list_wired_ret,
+			hibernate_page_list_t ** page_list_pal_ret,
                         boolean_t * encryptedswap)
 {
     hibernate_page_list_t * page_list = NULL;
     hibernate_page_list_t * page_list_wired = NULL;
+    hibernate_page_list_t * page_list_pal = NULL;
     uint32_t    	    gobble_count;
 
     *page_list_ret       = NULL;
     *page_list_wired_ret = NULL;
+	*page_list_pal_ret    = NULL;
     
     if (vmflush)
         hibernate_flush_memory();
@@ -72,6 +75,13 @@ hibernate_setup(IOHibernateImageHeader * header,
         kfree(page_list, page_list->list_size);
         return (KERN_RESOURCE_SHORTAGE);
     }
+    page_list_pal = hibernate_page_list_allocate();
+    if (!page_list_pal)
+    {
+        kfree(page_list, page_list->list_size);
+        kfree(page_list_wired, page_list_wired->list_size);
+        return (KERN_RESOURCE_SHORTAGE);
+    }
 
     *encryptedswap = dp_encryption;
 
@@ -90,6 +100,7 @@ hibernate_setup(IOHibernateImageHeader * header,
 
     *page_list_ret       = page_list;
     *page_list_wired_ret = page_list_wired;
+    *page_list_pal_ret    = page_list_pal;
 
     return (KERN_SUCCESS);
 }
diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c
index f77cccb71..15b742050 100644
--- a/osfmk/kern/host.c
+++ b/osfmk/kern/host.c
@@ -90,6 +90,8 @@
 
 host_data_t	realhost;
 
+vm_extmod_statistics_data_t host_extmod_statistics;
+
 kern_return_t
 host_processors(
 	host_priv_t				host_priv,
@@ -189,6 +191,8 @@ host_info(
 	case HOST_SCHED_INFO:
 	{
 		register host_sched_info_t	sched_info;
+		uint32_t quantum_time;
+		uint64_t quantum_ns;
 
 		/*
 		 *	Return scheduler information.
@@ -198,8 +202,11 @@ host_info(
 
 		sched_info = (host_sched_info_t) info;
 
+		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
+		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
+
 		sched_info->min_timeout = 
-			sched_info->min_quantum = std_quantum_us / 1000;
+			sched_info->min_quantum = (uint32_t)(quantum_ns / 1000 / 1000);
 
 		*count = HOST_SCHED_INFO_COUNT;
 
@@ -397,23 +404,29 @@ MACRO_END
 		cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0;
 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 
-		processor = processor_list;
-		GET_TICKS_VALUE(processor, CPU_STATE_USER, user_state);
-		GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state);
-		GET_TICKS_VALUE(processor, CPU_STATE_IDLE, idle_state);
+		simple_lock(&processor_list_lock);
 
-		if (processor_count > 1) {
-			simple_lock(&processor_list_lock);
+		for (processor = processor_list; processor != NULL; processor = processor->processor_list) {
+			timer_data_t	idle_temp;
+			timer_t		idle_state;
 
-			while ((processor = processor->processor_list) != NULL) {
-				GET_TICKS_VALUE(processor, CPU_STATE_USER, user_state);
-				GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state);
+			GET_TICKS_VALUE(processor, CPU_STATE_USER, user_state);
+			GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state);
+
+			idle_state = &PROCESSOR_DATA(processor, idle_state);
+			idle_temp = *idle_state;
+
+			if (PROCESSOR_DATA(processor, current_state) != idle_state ||
+			    timer_grab(&idle_temp) != timer_grab(idle_state))
 				GET_TICKS_VALUE(processor, CPU_STATE_IDLE, idle_state);
-			}
+			else {
+				timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp);
 
-			simple_unlock(&processor_list_lock);
+				cpu_load_info->cpu_ticks[CPU_STATE_IDLE] +=
+					(uint32_t)(timer_grab(&idle_temp) / hz_tick_interval);
+			}
 		}
-
+		simple_unlock(&processor_list_lock);
 		*count = HOST_CPU_LOAD_INFO_COUNT;
 
 		return (KERN_SUCCESS);
@@ -512,6 +525,21 @@ host_statistics64(
 			return(KERN_SUCCESS);
 		}
 
+		case HOST_EXTMOD_INFO64: /* We were asked to get vm_statistics64 */
+		{
+			vm_extmod_statistics_t		out_extmod_statistics;
+
+			if (*count < HOST_EXTMOD_INFO64_COUNT)
+				return (KERN_FAILURE);
+
+			out_extmod_statistics = (vm_extmod_statistics_t) info;
+			*out_extmod_statistics = host_extmod_statistics;
+
+			*count = HOST_EXTMOD_INFO64_COUNT;	
+
+			return(KERN_SUCCESS);
+		}
+
 		default: /* If we didn't recognize the flavor, send to host_statistics */
 			return(host_statistics(host, flavor, (host_info_t) info, count)); 
 	}
@@ -532,6 +560,73 @@ host_priv_statistics(
 	return(host_statistics((host_t)host_priv, flavor, info, count));
 }
 
+kern_return_t
+set_sched_stats_active(
+		boolean_t active) 
+{
+	sched_stats_active = active;
+	return KERN_SUCCESS;
+}
+
+
+kern_return_t
+get_sched_statistics( 
+		struct _processor_statistics_np *out, 
+		uint32_t *count)
+{
+	processor_t processor;
+
+	if (!sched_stats_active) {
+		return KERN_FAILURE;
+	}
+
+	simple_lock(&processor_list_lock);
+	
+	if (*count < (processor_count + 2) * sizeof(struct _processor_statistics_np)) { /* One for RT, one for FS */
+		simple_unlock(&processor_list_lock);
+		return KERN_FAILURE;
+	}
+
+	processor = processor_list;
+	while (processor) {
+		struct processor_sched_statistics *stats = &processor->processor_data.sched_stats;
+
+		out->ps_cpuid 			= processor->cpu_id;
+		out->ps_csw_count 		= stats->csw_count;
+		out->ps_preempt_count 		= stats->preempt_count;
+		out->ps_preempted_rt_count 	= stats->preempted_rt_count;
+		out->ps_preempted_by_rt_count 	= stats->preempted_by_rt_count;
+		out->ps_rt_sched_count		= stats->rt_sched_count;
+		out->ps_interrupt_count 	= stats->interrupt_count;
+		out->ps_ipi_count 		= stats->ipi_count;
+		out->ps_timer_pop_count 	= stats->timer_pop_count;
+		out->ps_runq_count_sum 		= SCHED(processor_runq_stats_count_sum)(processor);
+		out->ps_idle_transitions	= stats->idle_transitions;
+
+		out++;
+		processor = processor->processor_list;
+	}
+
+	*count = (uint32_t) (processor_count * sizeof(struct _processor_statistics_np));
+
+	simple_unlock(&processor_list_lock);
+
+	/* And include RT Queue information */
+	bzero(out, sizeof(*out));
+	out->ps_cpuid = (-1);
+	out->ps_runq_count_sum = rt_runq.runq_stats.count_sum;
+	out++;
+	*count += (uint32_t)sizeof(struct _processor_statistics_np);
+
+	/* And include Fair Share Queue information at the end */
+	bzero(out, sizeof(*out));
+	out->ps_cpuid = (-2);
+	out->ps_runq_count_sum = SCHED(fairshare_runq_stats_count_sum)();
+	*count += (uint32_t)sizeof(struct _processor_statistics_np);
+	
+	return KERN_SUCCESS;
+}
+
 kern_return_t
 host_page_size(
 	host_t		host,
diff --git a/osfmk/kern/host.h b/osfmk/kern/host.h
index 3c64c3b08..24b052648 100644
--- a/osfmk/kern/host.h
+++ b/osfmk/kern/host.h
@@ -74,7 +74,7 @@
 #include <mach/exception_types.h>
 #include <mach/host_special_ports.h>
 #include <kern/kern_types.h>
-
+#include <mach/vm_statistics.h>
 
 struct	host {
 	decl_lck_mtx_data(,lock)		/* lock to protect exceptions */
@@ -89,6 +89,8 @@ extern host_data_t	realhost;
 #define host_lock(host)		lck_mtx_lock(&(host)->lock)
 #define host_unlock(host)	lck_mtx_unlock(&(host)->lock)
 
+extern vm_extmod_statistics_data_t host_extmod_statistics;
+
 #endif	/* MACH_KERNEL_PRIVATE */
 
 /*
diff --git a/osfmk/kern/host_notify.c b/osfmk/kern/host_notify.c
index 769d1cc2e..1ca87dcaf 100644
--- a/osfmk/kern/host_notify.c
+++ b/osfmk/kern/host_notify.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,14 +25,6 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 2003 Apple Computer, Inc.  All rights reserved.
- *
- * HISTORY
- *
- * 16 January 2003 (debo)
- *  Created.
- */
 
 #include <mach/mach_types.h>
 #include <mach/mach_host.h>
@@ -143,7 +135,7 @@ host_notify_port_destroy(
 		ip_unlock(port);
 
 		assert(entry->port == port);
-		remqueue(NULL, (queue_entry_t)entry);
+		remqueue((queue_entry_t)entry);
 		lck_mtx_unlock(&host_notify_lock);
 		zfree(host_notify_zone, entry);
 
diff --git a/osfmk/kern/host_statistics.h b/osfmk/kern/host_statistics.h
index a1471c6f4..efe59d74e 100644
--- a/osfmk/kern/host_statistics.h
+++ b/osfmk/kern/host_statistics.h
@@ -43,16 +43,9 @@
 #include <kern/processor.h>
 
 
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
-#define	VM_STAT_INCR(event)									\
-MACRO_BEGIN 											\
-	OSAddAtomic(1, (SInt32 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event)));	\
-MACRO_END
-#else /* !(defined(__ppc__)) */
 #define VM_STAT_INCR(event)									\
 MACRO_BEGIN											\
 	OSAddAtomic64(1, (SInt64 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event)));	\
 MACRO_END
-#endif /* !(defined(__ppc__)) */
 
 #endif	/* _KERN_HOST_STATISTICS_H_ */
diff --git a/osfmk/kern/ipc_kobject.c b/osfmk/kern/ipc_kobject.c
index a0cc915a8..8963abea6 100644
--- a/osfmk/kern/ipc_kobject.c
+++ b/osfmk/kern/ipc_kobject.c
@@ -560,12 +560,10 @@ ipc_kobject_notify(
 			   return TRUE;
 		   }
 #endif
-#if CONFIG_EMBEDDED
 		   if (ip_kotype(port) == IKOT_FILEPORT) {
 			fileport_notify(request_header);
 			return TRUE;
 		   }
-#endif
 
 	  	   break;
 
diff --git a/osfmk/kern/ipc_mig.c b/osfmk/kern/ipc_mig.c
index b437edd9b..1b2a9163d 100644
--- a/osfmk/kern/ipc_mig.c
+++ b/osfmk/kern/ipc_mig.c
@@ -79,6 +79,7 @@
 #include <ipc/ipc_space.h>
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_pset.h>
+#include <ipc/ipc_notify.h>
 #include <vm/vm_map.h>
 
 #include <libkern/OSAtomic.h>
@@ -115,14 +116,15 @@ mach_msg_send_from_kernel(
 	ipc_kmsg_t kmsg;
 	mach_msg_return_t mr;
 
-	if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port)))
-		return MACH_SEND_INVALID_DEST;
-
 	mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
 	if (mr != MACH_MSG_SUCCESS)
 		return mr;
 
-	ipc_kmsg_copyin_from_kernel_legacy(kmsg);
+	mr = ipc_kmsg_copyin_from_kernel_legacy(kmsg);
+	if (mr != MACH_MSG_SUCCESS) {
+		ipc_kmsg_free(kmsg);
+		return mr;
+	}		
 
 	mr = ipc_kmsg_send_always(kmsg);
 	if (mr != MACH_MSG_SUCCESS) {
@@ -142,14 +144,15 @@ mach_msg_send_from_kernel_proper(
 	ipc_kmsg_t kmsg;
 	mach_msg_return_t mr;
 
-	if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port)))
-		return MACH_SEND_INVALID_DEST;
-
 	mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
 	if (mr != MACH_MSG_SUCCESS)
 		return mr;
 
-	ipc_kmsg_copyin_from_kernel(kmsg);
+	mr = ipc_kmsg_copyin_from_kernel(kmsg);
+	if (mr != MACH_MSG_SUCCESS) {
+		ipc_kmsg_free(kmsg);
+		return mr;
+	}
 
 	mr = ipc_kmsg_send_always(kmsg);
 	if (mr != MACH_MSG_SUCCESS) {
@@ -171,14 +174,16 @@ mach_msg_send_from_kernel_with_options(
 	ipc_kmsg_t kmsg;
 	mach_msg_return_t mr;
 
-	if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port)))
-		return MACH_SEND_INVALID_DEST;
-
 	mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
 	if (mr != MACH_MSG_SUCCESS)
 		return mr;
 
-	ipc_kmsg_copyin_from_kernel_legacy(kmsg);
+	mr = ipc_kmsg_copyin_from_kernel_legacy(kmsg);
+	if (mr != MACH_MSG_SUCCESS) {
+		ipc_kmsg_free(kmsg);
+		return mr;
+	}
+		
 	mr = ipc_kmsg_send(kmsg, option, timeout_val);
 	if (mr != MACH_MSG_SUCCESS) {
 		ipc_kmsg_destroy(kmsg);
@@ -252,7 +257,6 @@ mach_msg_rpc_from_kernel_body(
 	mach_port_seqno_t seqno;
 	mach_msg_return_t mr;
 
-	assert(MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port)));
 	assert(msg->msgh_local_port == MACH_PORT_NULL);
 
 	mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg);
@@ -277,13 +281,16 @@ mach_msg_rpc_from_kernel_body(
 
 #if IKM_SUPPORT_LEGACY
     if(legacy)
-        ipc_kmsg_copyin_from_kernel_legacy(kmsg);
+        mr = ipc_kmsg_copyin_from_kernel_legacy(kmsg);
     else
-        ipc_kmsg_copyin_from_kernel(kmsg);
+        mr = ipc_kmsg_copyin_from_kernel(kmsg);
 #else
-    ipc_kmsg_copyin_from_kernel(kmsg);
+    mr = ipc_kmsg_copyin_from_kernel(kmsg);
 #endif
-
+    if (mr != MACH_MSG_SUCCESS) {
+	    ipc_kmsg_free(kmsg);
+	    return mr;
+    }
 	mr = ipc_kmsg_send_always(kmsg);
 	if (mr != MACH_MSG_SUCCESS) {
 		ipc_kmsg_destroy(kmsg);
@@ -446,7 +453,7 @@ mach_msg_overwrite(
 		max_trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0;
 		max_trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE;
 	
-		mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL);
+		mr = ipc_kmsg_copyin(kmsg, space, map, FALSE);
 		if (mr != MACH_MSG_SUCCESS) {
 			ipc_kmsg_free(kmsg);
 			return mr;
@@ -504,8 +511,7 @@ mach_msg_overwrite(
 			return MACH_RCV_TOO_LARGE;
 		}
 
-		mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL,
-				      MACH_MSG_BODY_NULL);
+		mr = ipc_kmsg_copyout(kmsg, space, map, MACH_MSG_BODY_NULL);
 		if (mr != MACH_MSG_SUCCESS) {
 			if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) {
 				ipc_kmsg_put_to_kernel(msg, kmsg,
diff --git a/osfmk/kern/ipc_misc.c b/osfmk/kern/ipc_misc.c
index 9eeefc347..547abeaad 100644
--- a/osfmk/kern/ipc_misc.c
+++ b/osfmk/kern/ipc_misc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2008, 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -31,6 +31,11 @@
 #include <kern/ipc_kobject.h>
 #include <kern/ipc_misc.h>
 
+#include <mach/mach_port.h>
+#include <mach/vm_map.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+
 extern void fileport_releasefg(struct fileglob *);
 
 /*
@@ -140,6 +145,96 @@ fileport_notify(mach_msg_header_t *msg)
 	} else {
 		ip_unlock(port);
 	}
+}
+
+/*
+ * fileport_invoke
+ *
+ * Description: Invoke a function with the fileglob underlying the fileport.
+ *		Returns the error code related to the fileglob lookup.
+ *
+ * Parameters:	task		The target task
+ *		action		The function to invoke with the fileglob
+ *		arg		Anonymous pointer to caller state
+ *		rval		The value returned from calling 'action'
+ */
+kern_return_t
+fileport_invoke(task_t task, mach_port_name_t name,
+	int (*action)(mach_port_name_t, struct fileglob *, void *),
+	void *arg, int *rval)
+{
+	kern_return_t kr;
+	ipc_port_t fileport;
+	struct fileglob *fg;
+
+	kr = ipc_object_copyin(task->itk_space, name,
+	    MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&fileport);
+	if (kr != KERN_SUCCESS)
+		return (kr);
+
+	if ((fg = fileport_port_to_fileglob(fileport)) != NULL)
+		*rval = (*action)(name, fg, arg);
+	else
+		kr = KERN_FAILURE;
+	ipc_port_release_send(fileport);
+	return (kr);
+}
+
+/*
+ * fileport_walk
+ *
+ * Description: Invoke the action function on every fileport in the task.
+ *
+ *		This could be more efficient if we refactored mach_port_names()
+ *		so that (a) it didn't compute the type information unless asked
+ *		and (b) it could be asked to -not- unwire/copyout the memory
+ *		and (c) if we could ask for port names by kobject type. Not
+ *		clear that it's worth all that complexity, though.
+ *
+ * Parameters: 	task		The target task
+ *		action		The function to invoke on each fileport
+ *		arg		Anonymous pointer to caller state.
+ */
+kern_return_t
+fileport_walk(task_t task,
+	int (*action)(mach_port_name_t, struct fileglob *, void *arg),
+	void *arg)
+{
+	mach_port_name_t *names;
+	mach_msg_type_number_t ncnt, tcnt;
+	vm_map_copy_t map_copy_names, map_copy_types;
+	vm_map_address_t map_names;
+	kern_return_t kr;
+	uint_t i;
+	int rval;
+
+	/*
+	 * mach_port_names returns the 'name' and 'types' in copied-in
+	 * form.  Discard 'types' immediately, then copyout 'names'
+	 * back into the kernel before walking the array.
+	 */
+
+	kr = mach_port_names(task->itk_space,
+	    (mach_port_name_t **)&map_copy_names, &ncnt,
+	    (mach_port_type_t **)&map_copy_types, &tcnt);
+	if (kr != KERN_SUCCESS)
+		return (kr);
+
+	vm_map_copy_discard(map_copy_types);
+
+	kr = vm_map_copyout(ipc_kernel_map, &map_names, map_copy_names);
+	if (kr != KERN_SUCCESS) {
+		vm_map_copy_discard(map_copy_names);
+		return (kr);
+	}
+	names = (mach_port_name_t *)(uintptr_t)map_names;
+
+	for (rval = 0, i = 0; i < ncnt; i++)
+		if (fileport_invoke(task, names[i], action, arg,
+		    &rval) == KERN_SUCCESS && -1 == rval)
+			break;		/* early termination clause */
 
-	return;
+	vm_deallocate(ipc_kernel_map,
+	    (vm_address_t)names, ncnt * sizeof (*names));
+	return (KERN_SUCCESS);
 }
diff --git a/osfmk/kern/ipc_misc.h b/osfmk/kern/ipc_misc.h
index 0e572e030..04fbd6505 100644
--- a/osfmk/kern/ipc_misc.h
+++ b/osfmk/kern/ipc_misc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2009, 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
@@ -33,6 +33,10 @@ struct fileglob;
 ipc_port_t fileport_alloc(struct fileglob *);
 struct fileglob *fileport_port_to_fileglob(ipc_port_t);
 void fileport_notify(mach_msg_header_t *);
+kern_return_t fileport_invoke(task_t, mach_port_name_t,
+    int (*)(mach_port_name_t, struct fileglob *, void *), void *, int *);
+kern_return_t fileport_walk(task_t,
+    int (*)(mach_port_name_t, struct fileglob *, void *), void *);
 
 #endif /* _KERN_IPC_MISC_H_ */
 #endif /* KERNEL_PRIVATE */
diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c
index 03c55052d..f84a19956 100644
--- a/osfmk/kern/kalloc.c
+++ b/osfmk/kern/kalloc.c
@@ -92,11 +92,47 @@ vm_size_t kalloc_kernmap_size;	/* size of kallocs that can come from kernel map
 unsigned int kalloc_large_inuse;
 vm_size_t    kalloc_large_total;
 vm_size_t    kalloc_large_max;
-volatile vm_size_t    kalloc_largest_allocated = 0;
+vm_size_t    kalloc_largest_allocated = 0;
+uint64_t    kalloc_large_sum;
+
+int	kalloc_fake_zone_index = -1; /* index of our fake zone in statistics arrays */
 
 vm_offset_t	kalloc_map_min;
 vm_offset_t	kalloc_map_max;
 
+#ifdef	MUTEX_ZONE
+/*
+ * Diagnostic code to track mutexes separately rather than via the 2^ zones
+ */
+	zone_t		lck_mtx_zone;
+#endif
+
+static void
+KALLOC_ZINFO_SALLOC(vm_size_t bytes)
+{
+	thread_t thr = current_thread();
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thr->tkm_shared.alloc += bytes;
+	if (kalloc_fake_zone_index != -1 && 
+	    (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		zinfo[kalloc_fake_zone_index].alloc += bytes;
+}
+
+static void
+KALLOC_ZINFO_SFREE(vm_size_t bytes)
+{
+	thread_t thr = current_thread();
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thr->tkm_shared.free += bytes;
+	if (kalloc_fake_zone_index != -1 && 
+	    (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		zinfo[kalloc_fake_zone_index].free += bytes;
+}
+
 /*
  *	All allocations of size less than kalloc_max are rounded to the
  *	next highest power of 2.  This allocator is built on top of
@@ -158,11 +194,23 @@ void * kalloc_canblock(
 		boolean_t	canblock);
 
 
+lck_grp_t *kalloc_lck_grp;
+lck_mtx_t kalloc_lock;
+
+#define kalloc_spin_lock()	lck_mtx_lock_spin(&kalloc_lock)
+#define kalloc_unlock()		lck_mtx_unlock(&kalloc_lock)
+
+
 /* OSMalloc local data declarations */
 static
 queue_head_t    OSMalloc_tag_list;
 
-decl_simple_lock_data(static,OSMalloc_tag_lock)
+lck_grp_t *OSMalloc_tag_lck_grp;
+lck_mtx_t OSMalloc_tag_lock;
+
+#define OSMalloc_tag_spin_lock()	lck_mtx_lock_spin(&OSMalloc_tag_lock)
+#define OSMalloc_tag_unlock()		lck_mtx_unlock(&OSMalloc_tag_lock)
+
 
 /* OSMalloc forward declarations */
 void OSMalloc_init(void);
@@ -225,7 +273,9 @@ kalloc_init(
 
 	/*
 	 *	Allocate a zone for each size we are going to handle.
-	 *	We specify non-paged memory.
+	 *	We specify non-paged memory.  Don't charge the caller
+	 *	for the allocation, as we aren't sure how the memory
+	 *	will be handled.
 	 */
 	for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) {
 		if (size < KALLOC_MINSIZE) {
@@ -237,8 +287,15 @@ kalloc_init(
 		}
 		k_zone[i] = zinit(size, k_zone_max[i] * size, size,
 				  k_zone_name[i]);
+		zone_change(k_zone[i], Z_CALLERACCT, FALSE);
 	}
+	kalloc_lck_grp = lck_grp_alloc_init("kalloc.large", LCK_GRP_ATTR_NULL);
+	lck_mtx_init(&kalloc_lock, kalloc_lck_grp, LCK_ATTR_NULL);
 	OSMalloc_init();
+#ifdef	MUTEX_ZONE	
+	lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx");
+#endif	
+
 }
 
 void *
@@ -261,36 +318,42 @@ kalloc_canblock(
 
 		/* kmem_alloc could block so we return if noblock */
 		if (!canblock) {
-		  return(NULL);
+			return(NULL);
 		}
 
-		if (size >= kalloc_kernmap_size) {
-			volatile vm_offset_t prev_largest;
+		if (size >= kalloc_kernmap_size)
 		        alloc_map = kernel_map;
-			/* Thread-safe version of the workaround for 4740071
-			 * (a double FREE())
-			 */
-			do {
-				prev_largest = kalloc_largest_allocated;
-			} while ((size > prev_largest) && !OSCompareAndSwap((UInt32)prev_largest, (UInt32)size, (volatile UInt32 *) &kalloc_largest_allocated));
-		} else
+		else
 			alloc_map = kalloc_map;
 
 		if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) {
 			if (alloc_map != kernel_map) {
 				if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS)
 					addr = NULL;
-		}
+			}
 			else
 				addr = NULL;
 		}
 
 		if (addr != NULL) {
+			kalloc_spin_lock();
+			/*
+			 * Thread-safe version of the workaround for 4740071
+			 * (a double FREE())
+			 */
+			if (size > kalloc_largest_allocated)
+				kalloc_largest_allocated = size;
+
 		        kalloc_large_inuse++;
 		        kalloc_large_total += size;
+			kalloc_large_sum += size;
 
 			if (kalloc_large_total > kalloc_large_max)
 			        kalloc_large_max = kalloc_large_total;
+
+			kalloc_unlock();
+
+			KALLOC_ZINFO_SALLOC(size);
 		}
 		return(addr);
 	}
@@ -374,6 +437,7 @@ krealloc(
 			kmem_free(alloc_map, (vm_offset_t)*addrp, old_size);
 
 			kalloc_large_total += (new_size - old_size);
+			kalloc_large_sum += (new_size - old_size);
 
 			if (kalloc_large_total > kalloc_large_max)
 				kalloc_large_max = kalloc_large_total;
@@ -412,11 +476,18 @@ krealloc(
 			*addrp = NULL;
 			return;
 		}
+		kalloc_spin_lock();
+
 		kalloc_large_inuse++;
+		kalloc_large_sum += new_size;
 		kalloc_large_total += new_size;
 
 		if (kalloc_large_total > kalloc_large_max)
 		        kalloc_large_max = kalloc_large_total;
+
+		kalloc_unlock();
+
+		KALLOC_ZINFO_SALLOC(new_size);
 	} else {
 		register int new_zindex;
 
@@ -515,9 +586,14 @@ kfree(
 		}
 		kmem_free(alloc_map, (vm_offset_t)data, size);
 
+		kalloc_spin_lock();
+
 		kalloc_large_total -= size;
 		kalloc_large_inuse--;
 
+		kalloc_unlock();
+
+		KALLOC_ZINFO_SFREE(size);
 		return;
 	}
 
@@ -560,18 +636,32 @@ kalloc_zone(
 }
 #endif
 
+void
+kalloc_fake_zone_init(int zone_index)
+{
+	kalloc_fake_zone_index = zone_index;
+}
 
 void
-kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
-		     vm_size_t *alloc_size, int *collectable, int *exhaustable)
+kalloc_fake_zone_info(int *count, 
+		      vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
+		      uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
 {
 	*count      = kalloc_large_inuse;
 	*cur_size   = kalloc_large_total;
 	*max_size   = kalloc_large_max;
-	*elem_size  = kalloc_large_total / kalloc_large_inuse;
-	*alloc_size = kalloc_large_total / kalloc_large_inuse;
+
+	if (kalloc_large_inuse) {
+		*elem_size  = kalloc_large_total / kalloc_large_inuse;
+		*alloc_size = kalloc_large_total / kalloc_large_inuse;
+	} else {
+		*elem_size  = 0;
+		*alloc_size = 0;
+	}
+	*sum_size   = kalloc_large_sum;
 	*collectable = 0;
 	*exhaustable = 0;
+	*caller_acct = 0;
 }
 
 
@@ -580,7 +670,9 @@ OSMalloc_init(
 	void)
 {
 	queue_init(&OSMalloc_tag_list);
-	simple_lock_init(&OSMalloc_tag_lock, 0);
+
+	OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL);
+	lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL);
 }
 
 OSMallocTag
@@ -601,9 +693,9 @@ OSMalloc_Tagalloc(
 
 	strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
 
-	simple_lock(&OSMalloc_tag_lock);
+	OSMalloc_tag_spin_lock();
 	enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag);
-	simple_unlock(&OSMalloc_tag_lock);
+	OSMalloc_tag_unlock();
 	OSMTag->OSMT_state = OSMT_VALID;
 	return(OSMTag);
 }
@@ -627,9 +719,9 @@ OSMalloc_Tagrele(
 
 	if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
 		if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) {
-			simple_lock(&OSMalloc_tag_lock);
+			OSMalloc_tag_spin_lock();
 			(void)remque((queue_entry_t)tag);
-			simple_unlock(&OSMalloc_tag_lock);
+			OSMalloc_tag_unlock();
 			kfree((void*)tag, sizeof(*tag));
 		} else
 			panic("OSMalloc_Tagrele(): refcnt 0\n");
@@ -644,9 +736,9 @@ OSMalloc_Tagfree(
 		panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag->OSMT_state);
 
 	if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
-		simple_lock(&OSMalloc_tag_lock);
+		OSMalloc_tag_spin_lock();
 		(void)remque((queue_entry_t)tag);
-		simple_unlock(&OSMalloc_tag_lock);
+		OSMalloc_tag_unlock();
 		kfree((void*)tag, sizeof(*tag));
 	}
 }
diff --git a/osfmk/kern/kalloc.h b/osfmk/kern/kalloc.h
index 7966959f2..9fcb07edc 100644
--- a/osfmk/kern/kalloc.h
+++ b/osfmk/kern/kalloc.h
@@ -88,14 +88,18 @@ extern void		krealloc(void		**addrp,
 				 vm_size_t	new_size,
 				 simple_lock_t	lock);
 
+extern void		kalloc_fake_zone_init( int );
+
 extern void		kalloc_fake_zone_info(
 				int		*count,
 				vm_size_t	*cur_size,
 				vm_size_t	*max_size,
 				vm_size_t	*elem_size,
 				vm_size_t	*alloc_size,
+				uint64_t	*sum_size,
 				int		*collectable,
-				int		*exhaustable);
+				int		*exhaustable,
+				int		*caller_acct);
 
 extern vm_size_t kalloc_max_prerounded;
 extern vm_size_t kalloc_large_total;
diff --git a/osfmk/kern/kern_types.h b/osfmk/kern/kern_types.h
index 7c3e93616..d8e98aa6f 100644
--- a/osfmk/kern/kern_types.h
+++ b/osfmk/kern/kern_types.h
@@ -110,6 +110,15 @@ typedef struct pset_node		*pset_node_t;
 typedef struct affinity_set		*affinity_set_t;
 #define AFFINITY_SET_NULL		((affinity_set_t) 0)
 
+typedef struct run_queue               *run_queue_t;
+#define RUN_QUEUE_NULL                 ((run_queue_t) 0)
+
+typedef struct grrr_run_queue               *grrr_run_queue_t;
+#define GRRR_RUN_QUEUE_NULL                 ((grrr_run_queue_t) 0)
+
+typedef struct grrr_group					*grrr_group_t;
+#define GRRR_GROUP_NULL						((grrr_group_t) 0)
+
 #else	/* MACH_KERNEL_PRIVATE */
 
 struct wait_queue_set ;
diff --git a/osfmk/kern/kext_alloc.c b/osfmk/kern/kext_alloc.c
index 407efcf16..1d3aea127 100644
--- a/osfmk/kern/kext_alloc.c
+++ b/osfmk/kern/kext_alloc.c
@@ -39,7 +39,6 @@
 #include <mach-o/loader.h>
 #include <libkern/kernel_mach_header.h>
 
-#define KEXT_ALLOC_MAX_OFFSET (2 * 1024 * 1024 * 1024UL)
 
 vm_map_t g_kext_map = 0;
 static mach_vm_offset_t kext_alloc_base = 0;
@@ -70,15 +69,15 @@ kext_alloc_init(void)
     text_end = vm_map_round_page(text->vmaddr + text->vmsize);
     text_size = text_end - text_start;
 
-    kext_alloc_base = text_end - KEXT_ALLOC_MAX_OFFSET;
-    kext_alloc_size = KEXT_ALLOC_MAX_OFFSET - text_size;
+    kext_alloc_base = KEXT_ALLOC_BASE(text_end);
+    kext_alloc_size = KEXT_ALLOC_SIZE(text_size);
     kext_alloc_max = kext_alloc_base + kext_alloc_size;
 
     /* Allocate the subblock of the kernel map */
 
     rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, 
 			 kext_alloc_size, /* pageable */ TRUE,
-			 VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE|VM_FLAGS_BELOW_MIN, 
+			 VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE,
 			 &g_kext_map);
     if (rval != KERN_SUCCESS) {
 	    panic("kext_alloc_init: kmem_suballoc failed 0x%x\n", rval);
diff --git a/osfmk/kern/kmod.c b/osfmk/kern/kmod.c
index 121967342..d0563ce09 100644
--- a/osfmk/kern/kmod.c
+++ b/osfmk/kern/kmod.c
@@ -85,7 +85,7 @@ extern void proc_selfname(char * buf, int size);
             __FUNCTION__, procname);  \
     } while (0)
 
-#if __ppc__ || __i386__
+#if __i386__
 // in libkern/OSKextLib.cpp
 extern kern_return_t kext_get_kmod_info(
     kmod_info_array_t      * kmod_list,
@@ -93,7 +93,7 @@ extern kern_return_t kext_get_kmod_info(
 #define KMOD_MIG_UNUSED
 #else
 #define KMOD_MIG_UNUSED __unused
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 
 
 /*********************************************************************
@@ -148,7 +148,7 @@ kmod_get_info(
     kmod_info_array_t * kmod_list KMOD_MIG_UNUSED,
     mach_msg_type_number_t * kmodCount KMOD_MIG_UNUSED)
 {
-#if __ppc__ || __i386__
+#if __i386__
     if (current_task() != kernel_task && task_has_64BitAddr(current_task())) {
         NOT_SUPPORTED_USER64();
         return KERN_NOT_SUPPORTED;
@@ -157,5 +157,5 @@ kmod_get_info(
 #else
     NOT_SUPPORTED_KERNEL();
     return KERN_NOT_SUPPORTED;
-#endif /* __ppc__ || __i386__ */
+#endif /* __i386__ */
 }
diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c
index e31e970c6..07b9924a1 100644
--- a/osfmk/kern/locks.c
+++ b/osfmk/kern/locks.c
@@ -107,6 +107,12 @@ void
 lck_mod_init(
 	void)
 {
+	/*
+	 * Obtain "lcks" options:this currently controls lock statistics
+	 */
+	if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
+		LcksOpts = 0;
+
 	queue_init(&lck_grp_queue);
 	
 	/* 
@@ -537,8 +543,12 @@ lck_mtx_sleep_deadline(
 	if (res == THREAD_WAITING) {
 		lck_mtx_unlock(lck);
 		res = thread_block(THREAD_CONTINUE_NULL);
-		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
-			lck_mtx_lock(lck);
+		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
+			if ((lck_sleep_action & LCK_SLEEP_SPIN))
+				lck_mtx_lock_spin(lck);
+			else
+				lck_mtx_lock(lck);
+		}
 	}
 	else
 	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
@@ -590,7 +600,7 @@ lck_mtx_lock_wait (
 	thread_lock(holder);
 	if (mutex->lck_mtx_pri == 0)
 		holder->promotions++;
-	holder->sched_mode |= TH_MODE_PROMOTED;
+	holder->sched_flags |= TH_SFLAG_PROMOTED;
 	if (		mutex->lck_mtx_pri < priority	&&
 				holder->sched_pri < priority		) {
 		KERNEL_DEBUG_CONSTANT(
@@ -672,7 +682,7 @@ lck_mtx_lock_acquire(
 
 		thread_lock(thread);
 		thread->promotions++;
-		thread->sched_mode |= TH_MODE_PROMOTED;
+		thread->sched_flags |= TH_SFLAG_PROMOTED;
 		if (thread->sched_pri < priority) {
 			KERNEL_DEBUG_CONSTANT(
 				MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
@@ -709,20 +719,22 @@ lck_mtx_unlock_wakeup (
 	else
 		mutex = &lck->lck_mtx_ptr->lck_mtx;
 
+	if (thread != holder)
+		panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
 
 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);
 
-	if (thread != holder)
-		panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
+	assert(mutex->lck_mtx_waiters > 0);
+	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
 
 	if (thread->promotions > 0) {
 		spl_t		s = splsched();
 
 		thread_lock(thread);
 		if (	--thread->promotions == 0				&&
-				(thread->sched_mode & TH_MODE_PROMOTED)		) {
-			thread->sched_mode &= ~TH_MODE_PROMOTED;
-			if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
+				(thread->sched_flags & TH_SFLAG_PROMOTED)		) {
+			thread->sched_flags &= ~TH_SFLAG_PROMOTED;
+			if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 				KERNEL_DEBUG_CONSTANT(
 					MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
 						  thread->sched_pri, DEPRESSPRI, 0, lck, 0);
@@ -738,14 +750,12 @@ lck_mtx_unlock_wakeup (
 									0, lck, 0);
 				}
 
-				compute_priority(thread, FALSE);
+				SCHED(compute_priority)(thread, FALSE);
 			}
 		}
 		thread_unlock(thread);
 		splx(s);
 	}
-	assert(mutex->lck_mtx_waiters > 0);
-	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));
 
 	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 }
diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h
index d23fbc36e..659336194 100644
--- a/osfmk/kern/locks.h
+++ b/osfmk/kern/locks.h
@@ -292,6 +292,7 @@ extern void	lck_mtx_unlock(lck_mtx_t		*lck) __DARWIN10_ALIAS(lck_mtx_unlock);
 extern void				lck_mtx_unlock(
 									lck_mtx_t		*lck);
 #endif	/* __i386__ */
+
 extern void				lck_mtx_destroy(
 									lck_mtx_t		*lck,
 									lck_grp_t		*grp);
@@ -323,18 +324,26 @@ extern void				mutex_pause(uint32_t);
 extern void 			lck_mtx_yield (
 									lck_mtx_t		*lck);
 
-#if defined(i386) || defined(x86_64)
+#if defined(__i386__) || defined(__x86_64__)
 extern boolean_t		lck_mtx_try_lock_spin(
 									lck_mtx_t		*lck);
 
+extern void			lck_mtx_lock_spin_always(
+									lck_mtx_t		*lck);
+
 extern void			lck_mtx_lock_spin(
 									lck_mtx_t		*lck);
 
 extern void			lck_mtx_convert_spin(
 									lck_mtx_t		*lck);
+
+#define lck_mtx_unlock_always(l)	lck_mtx_unlock(l)
+
 #else
 #define lck_mtx_try_lock_spin(l)	lck_mtx_try_lock(l)
 #define	lck_mtx_lock_spin(l)		lck_mtx_lock(l)
+#define lck_mtx_lock_spin_always(l)	lck_spin_lock(l)
+#define lck_mtx_unlock_always(l)	lck_spin_unlock(l)
 #define	lck_mtx_convert_spin(l)		do {} while (0)
 #endif
 
diff --git a/osfmk/kern/mach_param.h b/osfmk/kern/mach_param.h
index 1afd09bf5..44b21a9da 100644
--- a/osfmk/kern/mach_param.h
+++ b/osfmk/kern/mach_param.h
@@ -80,7 +80,7 @@ extern int thread_max, task_threadmax, task_max;
 				+ 40000)		/* slop for objects */
 					/* Number of ports, system-wide */
 
-#define SET_MAX		(task_max + thread_max + 200)
+#define SET_MAX		(task_max + (thread_max * 2) + 200)
 					/* Max number of port sets */
 
 #define	ITE_MAX		(1 << 16)	/* Max number of splay tree entries */
diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c
index 72d91647e..9b310f031 100644
--- a/osfmk/kern/machine.c
+++ b/osfmk/kern/machine.c
@@ -88,6 +88,8 @@
 #include <kern/task.h>
 #include <kern/thread.h>
 
+#include <machine/commpage.h>
+
 #if HIBERNATION
 #include <IOKit/IOHibernatePrivate.h>
 #endif
@@ -120,11 +122,14 @@ processor_up(
 	init_ast_check(processor);
 	pset = processor->processor_set;
 	pset_lock(pset);
-	if (++pset->processor_count == 1)
-		pset->low_pri = pset->low_count = processor;
+	if (++pset->online_processor_count == 1) {
+		pset_pri_init_hint(pset, processor);
+		pset_count_init_hint(pset, processor);
+	}
 	enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
 	processor->state = PROCESSOR_RUNNING;
 	(void)hw_atomic_add(&processor_avail_count, 1);
+	commpage_update_active_cpus();
 	pset_unlock(pset);
 	ml_cpu_up();
 	splx(s);
@@ -214,10 +219,10 @@ processor_shutdown(
 	}
 
 	if (processor->state == PROCESSOR_IDLE)
-		remqueue(&pset->idle_queue, (queue_entry_t)processor);
+		remqueue((queue_entry_t)processor);
 	else
 	if (processor->state == PROCESSOR_RUNNING)
-		remqueue(&pset->active_queue, (queue_entry_t)processor);
+		remqueue((queue_entry_t)processor);
 
 	processor->state = PROCESSOR_SHUTDOWN;
 
@@ -283,6 +288,7 @@ processor_offline(
 	new_thread = processor->idle_thread;
 	processor->active_thread = new_thread;
 	processor->current_pri = IDLEPRI;
+	processor->current_thmode = TH_MODE_NONE;
 	processor->deadline = UINT64_MAX;
 	new_thread->last_processor = processor;
 
@@ -298,10 +304,13 @@ processor_offline(
 	pset = processor->processor_set;
 	pset_lock(pset);
 	processor->state = PROCESSOR_OFF_LINE;
-	if (--pset->processor_count == 0)
-		pset->low_pri = pset->low_count = PROCESSOR_NULL;
+	if (--pset->online_processor_count == 0) {
+		pset_pri_init_hint(pset, PROCESSOR_NULL);
+		pset_count_init_hint(pset, PROCESSOR_NULL);
+	}
 	(void)hw_atomic_sub(&processor_avail_count, 1);
-	processor_queue_shutdown(processor);
+	commpage_update_active_cpus();
+	SCHED(processor_queue_shutdown)(processor);
 	/* pset lock dropped */
 
 	ml_cpu_down();
diff --git a/osfmk/kern/misc_protos.h b/osfmk/kern/misc_protos.h
index 0b7d5a0cc..f7fb46b3c 100644
--- a/osfmk/kern/misc_protos.h
+++ b/osfmk/kern/misc_protos.h
@@ -109,7 +109,8 @@ extern int copyoutmsg(
 	mach_msg_size_t nbytes);
 
 /* Invalidate copy window(s) cache */
-extern void    inval_copy_windows(thread_t);
+extern void inval_copy_windows(thread_t);
+extern void copy_window_fault(thread_t, vm_map_t, int);
 
 extern int sscanf(const char *input, const char *fmt, ...) __scanflike(2,3);
 
@@ -166,6 +167,8 @@ extern void cnputcusr(char);
 
 extern void conslog_putc(char);
 
+extern void cons_putc_locked(char);
+
 extern void consdebug_putc(char);
 
 extern void consdebug_log(char);
diff --git a/osfmk/kern/mk_sp.c b/osfmk/kern/mk_sp.c
index 78a6371fd..d8e86124b 100644
--- a/osfmk/kern/mk_sp.c
+++ b/osfmk/kern/mk_sp.c
@@ -79,23 +79,27 @@ thread_policy_common(
 	if (thread->static_param)
 		return (KERN_SUCCESS);
 
+	if ((policy == POLICY_TIMESHARE)
+		&& !SCHED(supports_timeshare_mode)())
+		policy = TH_MODE_FIXED;
+
 	s = splsched();
 	thread_lock(thread);
 
-	if (	!(thread->sched_mode & TH_MODE_REALTIME)	&&
-			!(thread->safe_mode & TH_MODE_REALTIME)			) {
-		if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
-			integer_t	oldmode = (thread->sched_mode & TH_MODE_TIMESHARE);
+	if (	(thread->sched_mode != TH_MODE_REALTIME)	&&
+			(thread->saved_mode != TH_MODE_REALTIME)		) {
+		if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
+			boolean_t	oldmode = thread->sched_mode == TH_MODE_TIMESHARE;
 
 			if (policy == POLICY_TIMESHARE && !oldmode) {
-				thread->sched_mode |= TH_MODE_TIMESHARE;
+				thread->sched_mode = TH_MODE_TIMESHARE;
 
 				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 					sched_share_incr();
 			}
 			else
 			if (policy != POLICY_TIMESHARE && oldmode) {
-				thread->sched_mode &= ~TH_MODE_TIMESHARE;
+				thread->sched_mode = TH_MODE_FIXED;
 
 				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 					sched_share_decr();
@@ -103,9 +107,9 @@ thread_policy_common(
 		}
 		else {
 			if (policy == POLICY_TIMESHARE)
-				thread->safe_mode |= TH_MODE_TIMESHARE;
+				thread->saved_mode = TH_MODE_TIMESHARE;
 			else
-				thread->safe_mode &= ~TH_MODE_TIMESHARE;
+				thread->saved_mode = TH_MODE_FIXED;
 		}
 
 		if (priority >= thread->max_priority)
@@ -129,6 +133,12 @@ thread_policy_common(
 
 		thread->importance = priority - thread->task_priority;
 
+#if CONFIG_EMBEDDED
+		/* No one can have a base priority less than MAXPRI_THROTTLE */
+		if (priority < MAXPRI_THROTTLE) 
+			priority = MAXPRI_THROTTLE;
+#endif /* CONFIG_EMBEDDED */
+
 		set_priority(thread, priority);
 	}
 
diff --git a/osfmk/kern/pms.h b/osfmk/kern/pms.h
index dceb5bbe5..990c71b2a 100644
--- a/osfmk/kern/pms.h
+++ b/osfmk/kern/pms.h
@@ -145,9 +145,6 @@ typedef struct {
 } pmsctl_t;
 
 extern pmsCtl pmsCtls;				/* Power Management Stepper control */
-#ifdef __ppc__
-extern uint32_t pmsCtlp;
-#endif
 extern uint32_t pmsBroadcastWait;	/* Number of outstanding broadcasts */
 extern int pmsInstalled;
 extern int pmsExperimental;
@@ -157,12 +154,6 @@ extern pmsSetFunc_t pmsFuncTab[pmsSetFuncMax];
 extern pmsQueryFunc_t pmsQueryFunc;
 extern uint32_t pmsPlatformData;
 
-#ifdef __ppc__
-# ifdef XNU_KERNEL_PRIVATE
-# include <ppc/savearea.h>
-# endif /* XNU_KERNEL_PRIVATE */
-extern int pmsCntrl(struct savearea *save);
-#endif /* __ppc__ */
 extern kern_return_t pmsControl(uint32_t request, user_addr_t reqaddr, uint32_t reqsize);
 extern void pmsInit(void);
 extern void pmsStep(int timer);
@@ -188,10 +179,8 @@ extern kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab
 extern void pmsRun(uint32_t nstep);
 extern void pmsPark(void);
 extern void pmsStart(void);
-# ifndef __ppc__
 extern kern_return_t pmsCPULoadVIDTable(uint16_t *tablep, int nstates);	/* i386 only */
 extern kern_return_t pmsCPUSetPStateLimit(uint32_t limit);
-# endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c
index fd04f883a..730be5c81 100644
--- a/osfmk/kern/printf.c
+++ b/osfmk/kern/printf.c
@@ -172,17 +172,12 @@
 #endif
 #include <console/serial_protos.h>
 
-#ifdef __ppc__
-#include <ppc/Firmware.h>
-#endif
-
 #define isdigit(d) ((d) >= '0' && (d) <= '9')
 #define Ctod(c) ((c) - '0')
 
 #define MAXBUF (sizeof(long long int) * 8)	/* enough for binary */
 static char digs[] = "0123456789abcdef";
 
-
 #if CONFIG_NO_PRINTF_STRINGS
 /* Prevent CPP from breaking the definition below */
 #undef printf
@@ -762,6 +757,14 @@ conslog_putc(
 #endif
 }
 
+void
+cons_putc_locked(
+	char c)
+{
+	if ((debug_mode && !disable_debug_output) || !disableConsoleOutput)
+		cnputc(c);
+}
+
 #if	MACH_KDB
 extern void db_putchar(char c);
 #endif
@@ -860,6 +863,8 @@ kdb_printf_unbuffered(const char *fmt, ...)
 	return 0;
 }
 
+#if !CONFIG_EMBEDDED
+
 static void
 copybyte(int c, void *arg)
 {
@@ -891,3 +896,4 @@ sprintf(char *buf, const char *fmt, ...)
 	*copybyte_str = '\0';
         return (int)strlen(buf);
 }
+#endif /* !CONFIG_EMBEDDED */
diff --git a/osfmk/kern/priority.c b/osfmk/kern/priority.c
index b46c5bee3..74b90dfa8 100644
--- a/osfmk/kern/priority.c
+++ b/osfmk/kern/priority.c
@@ -69,6 +69,7 @@
 #include <kern/host.h>
 #include <kern/mach_param.h>
 #include <kern/sched.h>
+#include <sys/kdebug.h>
 #include <kern/spl.h>
 #include <kern/thread.h>
 #include <kern/processor.h>
@@ -93,63 +94,58 @@ thread_quantum_expire(
 
 	thread_lock(thread);
 
+	/*
+	 * We've run up until our quantum expiration, and will (potentially)
+	 * continue without re-entering the scheduler, so update this now.
+	 */
+	thread->last_run_time = processor->quantum_end;
+	
 	/*
 	 *	Check for fail-safe trip.
 	 */
-	if (!(thread->sched_mode & (TH_MODE_TIMESHARE|TH_MODE_PROMOTED))) {
-		uint64_t			new_computation;
+	if ((thread->sched_mode == TH_MODE_REALTIME || thread->sched_mode == TH_MODE_FIXED) && 
+	    !(thread->sched_flags & TH_SFLAG_PROMOTED) &&
+	    !(thread->options & TH_OPT_SYSTEM_CRITICAL)) {
+		uint64_t new_computation;
 
-		new_computation = processor->quantum_end;
-		new_computation -= thread->computation_epoch;
-		if (new_computation + thread->computation_metered >
-											max_unsafe_computation) {
+		new_computation = processor->quantum_end - thread->computation_epoch;
+		new_computation += thread->computation_metered;
+		if (new_computation > max_unsafe_computation) {
 
-			if (thread->sched_mode & TH_MODE_REALTIME) {
-				thread->priority = DEPRESSPRI;
+			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_FAILSAFE)|DBG_FUNC_NONE,
+					(uintptr_t)thread->sched_pri, (uintptr_t)thread->sched_mode, 0, 0, 0);
 
-				thread->safe_mode |= TH_MODE_REALTIME;
-				thread->sched_mode &= ~TH_MODE_REALTIME;
+			if (thread->sched_mode == TH_MODE_REALTIME) {
+				thread->priority = DEPRESSPRI;
+			}
+			
+			thread->saved_mode = thread->sched_mode;
+
+			if (SCHED(supports_timeshare_mode)) {
+				sched_share_incr();
+				thread->sched_mode = TH_MODE_TIMESHARE;
+			} else {
+				/* XXX handle fixed->fixed case */
+				thread->sched_mode = TH_MODE_FIXED;
 			}
 
-			sched_share_incr();
-
-			thread->safe_release = sched_tick + sched_safe_duration;
-			thread->sched_mode |= (TH_MODE_FAILSAFE|TH_MODE_TIMESHARE);
+			thread->safe_release = processor->quantum_end + sched_safe_duration;
+			thread->sched_flags |= TH_SFLAG_FAILSAFE;
 		}
 	}
 		
 	/*
 	 *	Recompute scheduled priority if appropriate.
 	 */
-	if (thread->sched_stamp != sched_tick)
-		update_priority(thread);
+	if (SCHED(can_update_priority)(thread))
+		SCHED(update_priority)(thread);
 	else
-	if (thread->sched_mode & TH_MODE_TIMESHARE) {
-		register uint32_t	delta;
-
-		thread_timer_delta(thread, delta);
-
-		/*
-		 *	Accumulate timesharing usage only
-		 *	during contention for processor
-		 *	resources.
-		 */
-		if (thread->pri_shift < INT8_MAX)
-			thread->sched_usage += delta;
-
-		thread->cpu_delta += delta;
-
-		/*
-		 * Adjust the scheduled priority if
-		 * the thread has not been promoted
-		 * and is not depressed.
-		 */
-		if (	!(thread->sched_mode & TH_MODE_PROMOTED)	&&
-				!(thread->sched_mode & TH_MODE_ISDEPRESSED)		)
-			compute_my_priority(thread);
-	}
+		SCHED(lightweight_update_priority)(thread);
 
+	SCHED(quantum_expire)(thread);
+	
 	processor->current_pri = thread->sched_pri;
+	processor->current_thmode = thread->sched_mode;
 
 	/*
 	 *	This quantum is up, give this thread another.
@@ -158,9 +154,11 @@ thread_quantum_expire(
 		processor->timeslice--;
 
 	thread_quantum_init(thread);
+	thread->last_quantum_refill_time = processor->quantum_end;
+
 	processor->quantum_end += thread->current_quantum;
 	timer_call_enter1(&processor->quantum_timer,
-							thread, processor->quantum_end);
+							thread, processor->quantum_end, 0);
 
 	/*
 	 *	Context switch check.
@@ -173,7 +171,7 @@ thread_quantum_expire(
 		pset_lock(pset);
 
 		pset_pri_hint(pset, processor, processor->current_pri);
-		pset_count_hint(pset, processor, processor->runq.count);
+		pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
 
 		pset_unlock(pset);
 	}
@@ -181,6 +179,46 @@ thread_quantum_expire(
 	thread_unlock(thread);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+void
+sched_traditional_quantum_expire(thread_t	thread __unused)
+{
+	/*
+	 * No special behavior when a timeshare, fixed, or realtime thread
+	 * uses up its entire quantum
+	 */
+}
+
+void
+lightweight_update_priority(thread_t thread)
+{
+	if (thread->sched_mode == TH_MODE_TIMESHARE) {
+		register uint32_t	delta;
+		
+		thread_timer_delta(thread, delta);
+		
+		/*
+		 *	Accumulate timesharing usage only
+		 *	during contention for processor
+		 *	resources.
+		 */
+		if (thread->pri_shift < INT8_MAX)
+			thread->sched_usage += delta;
+		
+		thread->cpu_delta += delta;
+		
+		/*
+		 * Adjust the scheduled priority if
+		 * the thread has not been promoted
+		 * and is not depressed.
+		 */
+		if (	!(thread->sched_flags & TH_SFLAG_PROMOTED)	&&
+			!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)		)
+			compute_my_priority(thread);
+	}	
+}
+
 /*
  *	Define shifts for simulating (5/8) ** n
  *
@@ -236,6 +274,8 @@ static struct shift_data	sched_decay_shifts[SCHED_DECAY_TICKS] = {
 		(pri) = MAXPRI_KERNEL;											\
 	MACRO_END
 
+#endif /* defined(CONFIG_SCHED_TRADITIONAL) */
+
 #endif
 
 /*
@@ -252,9 +292,11 @@ set_priority(
 	register int		priority)
 {
 	thread->priority = priority;
-	compute_priority(thread, FALSE);
+	SCHED(compute_priority)(thread, FALSE);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  *	compute_priority:
  *
@@ -271,10 +313,10 @@ compute_priority(
 {
 	register int		priority;
 
-	if (	!(thread->sched_mode & TH_MODE_PROMOTED)			&&
-			(!(thread->sched_mode & TH_MODE_ISDEPRESSED)	||
+	if (	!(thread->sched_flags & TH_SFLAG_PROMOTED)			&&
+			(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)	||
 				 override_depress							)		) {
-		if (thread->sched_mode & TH_MODE_TIMESHARE)
+		if (thread->sched_mode == TH_MODE_TIMESHARE)
 			do_priority_computation(thread, priority);
 		else
 			priority = thread->priority;
@@ -305,6 +347,23 @@ compute_my_priority(
 	thread->sched_pri = priority;
 }
 
+/*
+ *	can_update_priority
+ *
+ *	Make sure we don't do re-dispatches more frequently than a scheduler tick.
+ *
+ *	Called with the thread locked.
+ */
+boolean_t
+can_update_priority(
+					thread_t	thread)
+{
+	if (sched_tick == thread->sched_stamp)
+		return (FALSE);
+	else
+		return (TRUE);
+}
+
 /*
  *	update_priority
  *
@@ -368,43 +427,45 @@ update_priority(
 	/*
 	 *	Check for fail-safe release.
 	 */
-	if (	(thread->sched_mode & TH_MODE_FAILSAFE)		&&
-			thread->sched_stamp >= thread->safe_release		) {
-		if (!(thread->safe_mode & TH_MODE_TIMESHARE)) {
-			if (thread->safe_mode & TH_MODE_REALTIME) {
+	if (	(thread->sched_flags & TH_SFLAG_FAILSAFE)		&&
+			mach_absolute_time() >= thread->safe_release		) {
+		if (thread->saved_mode != TH_MODE_TIMESHARE) {
+			if (thread->saved_mode == TH_MODE_REALTIME) {
 				thread->priority = BASEPRI_RTQUEUES;
-
-				thread->sched_mode |= TH_MODE_REALTIME;
 			}
 
-			thread->sched_mode &= ~TH_MODE_TIMESHARE;
+			thread->sched_mode = thread->saved_mode;
+			thread->saved_mode = TH_MODE_NONE;
 
 			if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 				sched_share_decr();
 
-			if (!(thread->sched_mode & TH_MODE_ISDEPRESSED))
+			if (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK))
 				set_sched_pri(thread, thread->priority);
 		}
 
-		thread->safe_mode = 0;
-		thread->sched_mode &= ~TH_MODE_FAILSAFE;
+		thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
 	}
 
 	/*
 	 *	Recompute scheduled priority if appropriate.
 	 */
-	if (	(thread->sched_mode & TH_MODE_TIMESHARE)	&&
-			!(thread->sched_mode & TH_MODE_PROMOTED)	&&
-			!(thread->sched_mode & TH_MODE_ISDEPRESSED)		) {
+	if (	(thread->sched_mode == TH_MODE_TIMESHARE)	&&
+			!(thread->sched_flags & TH_SFLAG_PROMOTED)	&&
+			!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)		) {
 		register int		new_pri;
 
 		do_priority_computation(thread, new_pri);
 		if (new_pri != thread->sched_pri) {
-			boolean_t		removed = run_queue_remove(thread);
+			boolean_t		removed = thread_run_queue_remove(thread);
 
 			thread->sched_pri = new_pri;
 			if (removed)
 				thread_setrun(thread, SCHED_TAILQ);
 		}
 	}
+	
+	return;
 }
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c
index ca65ceca6..b0771351f 100644
--- a/osfmk/kern/processor.c
+++ b/osfmk/kern/processor.c
@@ -89,6 +89,7 @@ struct pset_node		pset_node0;
 decl_simple_lock_data(static,pset_node_lock)
 
 queue_head_t			tasks;
+queue_head_t			terminated_tasks;	/* To be used ONLY for stackshot. */
 int						tasks_count;
 queue_head_t			threads;
 int						threads_count;
@@ -103,6 +104,7 @@ uint32_t				processor_avail_count;
 
 processor_t		master_processor;
 int 			master_cpu = 0;
+boolean_t		sched_stats_active = FALSE;
 
 /* Forwards */
 kern_return_t	processor_set_things(
@@ -120,6 +122,7 @@ processor_bootstrap(void)
 	simple_lock_init(&pset_node_lock, 0);
 
 	queue_init(&tasks);
+	queue_init(&terminated_tasks);
 	queue_init(&threads);
 
 	simple_lock_init(&processor_list_lock, 0);
@@ -140,12 +143,16 @@ processor_init(
 	int					cpu_id,
 	processor_set_t		pset)
 {
-	run_queue_init(&processor->runq);
+	if (processor != master_processor) {
+		/* Scheduler state deferred until sched_init() */
+		SCHED(processor_init)(processor);
+	}
 
 	processor->state = PROCESSOR_OFF_LINE;
 	processor->active_thread = processor->next_thread = processor->idle_thread = THREAD_NULL;
 	processor->processor_set = pset;
 	processor->current_pri = MINPRI;
+	processor->current_thmode = TH_MODE_NONE;
 	processor->cpu_id = cpu_id;
 	timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor);
 	processor->deadline = UINT64_MAX;
@@ -236,10 +243,16 @@ pset_init(
 	processor_set_t		pset,
 	pset_node_t			node)
 {
+	if (pset != &pset0) {
+		/* Scheduler state deferred until sched_init() */
+		SCHED(pset_init)(pset);
+	}
+
 	queue_init(&pset->active_queue);
 	queue_init(&pset->idle_queue);
-	pset->processor_count = 0;
-	pset->low_pri = pset->low_count = PROCESSOR_NULL;
+	pset->online_processor_count = 0;
+	pset_pri_init_hint(pset, PROCESSOR_NULL);
+	pset_count_init_hint(pset, PROCESSOR_NULL);
 	pset->cpu_set_low = pset->cpu_set_hi = 0;
 	pset->cpu_set_count = 0;
 	pset_lock_init(pset);
@@ -321,16 +334,32 @@ processor_info(
 	{
 		register processor_cpu_load_info_t	cpu_load_info;
 
-	    if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
+		if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
 			return (KERN_FAILURE);
 
-	    cpu_load_info = (processor_cpu_load_info_t) info;
+		cpu_load_info = (processor_cpu_load_info_t) info;
 		cpu_load_info->cpu_ticks[CPU_STATE_USER] =
 							(uint32_t)(timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval);
 		cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
 							(uint32_t)(timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval);
-		cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+		{
+		timer_data_t	idle_temp;
+		timer_t		idle_state;
+
+		idle_state = &PROCESSOR_DATA(processor, idle_state);
+		idle_temp = *idle_state;
+
+		if (PROCESSOR_DATA(processor, current_state) != idle_state ||
+		    timer_grab(&idle_temp) != timer_grab(idle_state))
+			cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
 							(uint32_t)(timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval);
+		else {
+			timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp);
+				
+			cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+				(uint32_t)(timer_grab(&idle_temp) / hz_tick_interval);
+		}
+		}
 		cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 
 	    *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
diff --git a/osfmk/kern/processor.h b/osfmk/kern/processor.h
index 342a90081..0407b8541 100644
--- a/osfmk/kern/processor.h
+++ b/osfmk/kern/processor.h
@@ -87,13 +87,19 @@ struct processor_set {
 
 	processor_t			low_pri, low_count;
 
-	int					processor_count;
+	int					online_processor_count;
 
 	int					cpu_set_low, cpu_set_hi;
 	int					cpu_set_count;
 
 	decl_simple_lock_data(,sched_lock)	/* lock for above */
 
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+	struct run_queue	pset_runq;      /* runq for this processor set */
+	int					pset_runq_bound_count;
+		/* # of threads in runq bound to any processor in pset */
+#endif
+    
 	struct ipc_port	*	pset_self;		/* port for operations */
 	struct ipc_port *	pset_name_self;	/* port for information */
 
@@ -114,7 +120,7 @@ struct pset_node {
 
 extern struct pset_node	pset_node0;
 
-extern queue_head_t		tasks, threads;
+extern queue_head_t		tasks, terminated_tasks, threads; /* Terminated tasks are ONLY for stackshot */
 extern int				tasks_count, threads_count;
 decl_lck_mtx_data(extern,tasks_threads_lock)
 
@@ -138,6 +144,7 @@ struct processor {
 	processor_set_t		processor_set;	/* assigned set */
 
 	int					current_pri;	/* priority of current thread */
+	sched_mode_t		current_thmode;	/* sched mode of current thread */
 	int					cpu_id;			/* platform numeric id */
 
 	timer_call_data_t	quantum_timer;	/* timer for quantum expiration */
@@ -147,7 +154,13 @@ struct processor {
 	uint64_t			deadline;		/* current deadline */
 	int					timeslice;		/* quanta before timeslice ends */
 
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY)
 	struct run_queue	runq;			/* runq for this processor */
+	int					runq_bound_count; /* # of threads bound to this processor */
+#endif
+#if defined(CONFIG_SCHED_GRRR)
+	struct grrr_run_queue	grrr_runq;      /* Group Ratio Round-Robin runq */
+#endif
 	processor_meta_t	processor_meta;
 
 	struct ipc_port *	processor_self;	/* port for operations */
@@ -164,6 +177,8 @@ extern uint32_t			processor_avail_count;
 
 extern processor_t		master_processor;
 
+extern boolean_t		sched_stats_active;
+
 /*
  *	Processor state is accessed by locking the scheduling lock
  *	for the assigned processor set.
@@ -203,7 +218,7 @@ MACRO_END
 #define pset_count_hint(ps, p, cnt)		\
 MACRO_BEGIN												\
 	if ((p) != (ps)->low_count) {						\
-		if ((cnt) < (ps)->low_count->runq.count)		\
+		if ((cnt) < SCHED(processor_runq_count)((ps)->low_count))		\
 			(ps)->low_count = (p);						\
 		else											\
 		if ((ps)->low_count->state < PROCESSOR_IDLE)	\
@@ -211,6 +226,17 @@ MACRO_BEGIN												\
 	}													\
 MACRO_END
 
+#define pset_pri_init_hint(ps, p)		\
+MACRO_BEGIN												\
+	(ps)->low_pri = (p);								\
+MACRO_END
+
+#define pset_count_init_hint(ps, p)		\
+MACRO_BEGIN												\
+	(ps)->low_count = (p);								\
+MACRO_END
+
+
 extern void		processor_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 
 extern void		processor_init(
diff --git a/osfmk/kern/processor_data.h b/osfmk/kern/processor_data.h
index 200ec35f3..eda5bcce5 100644
--- a/osfmk/kern/processor_data.h
+++ b/osfmk/kern/processor_data.h
@@ -41,6 +41,18 @@
 #include <ipc/ipc_kmsg.h>
 #include <kern/timer.h>
 
+struct processor_sched_statistics {
+	uint32_t		csw_count;
+	uint32_t		preempt_count;
+	uint32_t		preempted_rt_count;
+	uint32_t		preempted_by_rt_count;
+	uint32_t		rt_sched_count;
+	uint32_t		interrupt_count;
+	uint32_t		ipi_count;
+	uint32_t		timer_pop_count;
+	uint32_t		idle_transitions;
+};
+
 struct processor_data {
 	/* Processor state statistics */
 	timer_data_t			idle_state;
@@ -72,6 +84,8 @@ struct processor_data {
 	unsigned long			page_grab_count;
 	int						start_color;
 	void					*free_pages;
+
+	struct processor_sched_statistics sched_stats;
 };
 
 typedef struct processor_data	processor_data_t;
@@ -82,6 +96,34 @@ typedef struct processor_data	processor_data_t;
 extern	void	processor_data_init(
 					processor_t		processor);
 
+#define SCHED_STATS_INTERRUPT(p) 					\
+MACRO_BEGIN								\
+	if (__builtin_expect(sched_stats_active, 0)) { 		\
+		(p)->processor_data.sched_stats.interrupt_count++;	\
+	}								\
+MACRO_END	
+
+#define SCHED_STATS_TIMER_POP(p)					\
+MACRO_BEGIN								\
+	if (__builtin_expect(sched_stats_active, 0)) { 		\
+		(p)->processor_data.sched_stats.timer_pop_count++; 	\
+	}								\
+MACRO_END
+
+#define SCHED_STATS_IPI(p) 						\
+MACRO_BEGIN								\
+	if (__builtin_expect(sched_stats_active, 0)) { 		\
+		(p)->processor_data.sched_stats.ipi_count++; 		\
+	}								\
+MACRO_END
+
+#define SCHED_STATS_CPU_IDLE_START(p)								\
+MACRO_BEGIN											\
+	if (__builtin_expect(sched_stats_active, 0)) { 					\
+		(p)->processor_data.sched_stats.idle_transitions++;				\
+	}											\
+MACRO_END
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 #endif /* _KERN_PROCESSOR_DATA_H_ */
diff --git a/osfmk/kern/queue.c b/osfmk/kern/queue.c
index 06eba9ebc..052770f7a 100644
--- a/osfmk/kern/queue.c
+++ b/osfmk/kern/queue.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -28,66 +28,6 @@
 /*
  * @OSF_COPYRIGHT@
  */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:33  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:55  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.10.3  1995/03/15  17:21:19  bruel
- * 	compile only if !__GNUC__.
- * 	[95/03/09            bruel]
- *
- * Revision 1.1.10.2  1995/01/06  19:48:05  devrcs
- * 	mk6 CR668 - 1.3b26 merge
- * 	* Revision 1.1.3.5  1994/05/06  18:51:43  tmt
- * 	Merge in DEC Alpha changes to osc1.3b19.
- * 	Merge Alpha changes into osc1.312b source code.
- * 	Remove ifdef sun around insque and remque.
- * 	* End1.3merge
- * 	[1994/11/04  09:29:15  dwm]
- * 
- * Revision 1.1.10.1  1994/09/23  02:25:00  ezf
- * 	change marker to not FREE
- * 	[1994/09/22  21:35:34  ezf]
- * 
- * Revision 1.1.3.3  1993/07/28  17:16:26  bernard
- * 	CR9523 -- Prototypes.
- * 	[1993/07/21  17:00:38  bernard]
- * 
- * Revision 1.1.3.2  1993/06/02  23:39:41  jeffc
- * 	Added to OSF/1 R1.3 from NMK15.0.
- * 	[1993/06/02  21:13:58  jeffc]
- * 
- * Revision 1.1  1992/09/30  02:09:52  robert
- * 	Initial revision
- * 
- * $EndLog$
- */
-/* CMU_HIST */
-/*
- * Revision 2.4  91/05/14  16:45:45  mrt
- * 	Correcting copyright
- * 
- * Revision 2.3  91/05/08  12:48:22  dbg
- * 	Compile queue routines on vax.
- * 	[91/03/26            dbg]
- * 
- * Revision 2.2  91/02/05  17:28:38  mrt
- * 	Changed to new Mach copyright
- * 	[91/02/01  16:16:22  mrt]
- * 
- * Revision 2.1  89/08/03  15:51:47  rwd
- * Created.
- * 
- * 17-Mar-87  David Golub (dbg) at Carnegie-Mellon University
- *	Created from routines written by David L. Black.
- *
- */ 
-/* CMU_ENDHIST */
 /* 
  * Mach Operating System
  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
@@ -190,14 +130,13 @@ dequeue_tail(
 
 /*
  *	Remove arbitrary element from queue.
- *	Does not check whether element is on queue - the world
+ *	Does not check whether element is on a queue - the world
  *	will go haywire if it isn't.
  */
 
 /*ARGSUSED*/
 void
 remqueue(
-	queue_t			que,
 	register queue_entry_t	elt)
 {
 	elt->next->prev = elt->prev;
diff --git a/osfmk/kern/queue.h b/osfmk/kern/queue.h
index d0bab0c61..836b55293 100644
--- a/osfmk/kern/queue.h
+++ b/osfmk/kern/queue.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -106,7 +106,7 @@ typedef	struct queue_entry	*queue_entry_t;
 /*
  *	enqueue puts "elt" on the "queue".
  *	dequeue returns the first element in the "queue".
- *	remqueue removes the specified "elt" from the specified "queue".
+ *	remqueue removes the specified "elt" from its queue.
  */
 
 #define enqueue(queue,elt)	enqueue_tail(queue, elt)
@@ -137,7 +137,6 @@ extern queue_entry_t	dequeue_tail(
 
 /* Dequeue element */
 extern void		remqueue(
-				queue_t		que,
 				queue_entry_t	elt);
 
 /* Enqueue element after a particular elem */
@@ -153,6 +152,15 @@ __END_DECLS
 
 #else	/* !__GNUC__ */
 
+#ifdef XNU_KERNEL_PRIVATE
+#define __DEQUEUE_ELT_CLEANUP(elt) do { \
+		(elt)->next = (queue_entry_t) 0; \
+		(elt)->prev = (queue_entry_t) 0; \
+	} while (0)
+#else
+#define __DEQUEUE_ELT_CLEANUP(elt) do { } while(0)
+#endif /* !XNU_KERNEL_PRIVATE */
+
 static __inline__ void
 enqueue_head(
 	queue_t		que,
@@ -185,6 +193,7 @@ dequeue_head(
 		elt = que->next;
 		elt->next->prev = que;
 		que->next = elt->next;
+		__DEQUEUE_ELT_CLEANUP(elt);
 	}
 
 	return (elt);
@@ -200,6 +209,7 @@ dequeue_tail(
 		elt = que->prev;
 		elt->prev->next = que;
 		que->prev = elt->prev;
+		__DEQUEUE_ELT_CLEANUP(elt);
 	}
 
 	return (elt);
@@ -207,11 +217,11 @@ dequeue_tail(
 
 static __inline__ void
 remqueue(
-	__unused queue_t		que,
 	queue_entry_t	elt)
 {
 	elt->next->prev = elt->prev;
 	elt->prev->next = elt->next;
+	__DEQUEUE_ELT_CLEANUP(elt);
 }
 
 static __inline__ void
@@ -231,6 +241,7 @@ remque(
 {
 	(elt->next)->prev = elt->prev;
 	(elt->prev)->next = elt->next;
+	__DEQUEUE_ELT_CLEANUP(elt);
 }
 
 #endif	/* !__GNUC__ */
@@ -603,34 +614,53 @@ MACRO_END
  */
 struct mpqueue_head {
 	struct queue_entry	head;		/* header for queue */
-	decl_simple_lock_data(,	lock)		/* lock for queue */
+	lck_mtx_t		lock_data;
+	lck_mtx_ext_t		lock_data_ext;
 };
 
 typedef struct mpqueue_head	mpqueue_head_t;
 
 #define	round_mpq(size)		(size)
 
-#define mpqueue_init(q)					\
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#define mpqueue_init(q, lck_grp, lck_attr)		\
+MACRO_BEGIN						\
+	queue_init(&(q)->head);				\
+        lck_mtx_init_ext(&(q)->lock_data,		\
+			 &(q)->lock_data_ext,		\
+			 lck_grp,			\
+			 lck_attr);			\
+MACRO_END
+
+#else
+
+#define mpqueue_init(q, lck_grp, lck_attr)		\
 MACRO_BEGIN						\
 	queue_init(&(q)->head);				\
-	simple_lock_init(&(q)->lock, 0);	\
+        lck_spin_init(&(q)->lock_data,			\
+		      lck_grp,				\
+		      lck_attr);			\
 MACRO_END
+#endif
+
 
 #define mpenqueue_tail(q, elt)				\
 MACRO_BEGIN						\
-	simple_lock(&(q)->lock);			\
+        lck_mtx_lock_spin_always(&(q)->lock_data);	\
 	enqueue_tail(&(q)->head, elt);			\
-	simple_unlock(&(q)->lock);			\
+	lck_mtx_unlock_always(&(q)->lock_data);		\
 MACRO_END
 
 #define mpdequeue_head(q, elt)				\
 MACRO_BEGIN						\
-	simple_lock(&(q)->lock);			\
+        lck_mtx_lock_spin_always(&(q)->lock_data);	\
 	if (queue_empty(&(q)->head))			\
 		*(elt) = 0;				\
 	else						\
 		*(elt) = dequeue_head(&(q)->head);	\
-	simple_unlock(&(q)->lock);			\
+	lck_mtx_unlock_always(&(q)->lock_data);		\
 MACRO_END
 
 #endif	/* MACH_KERNEL_PRIVATE */
diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h
index 10f7b4680..9532f4095 100644
--- a/osfmk/kern/sched.h
+++ b/osfmk/kern/sched.h
@@ -166,40 +166,108 @@
 #define DEPRESSPRI	MINPRI			/* depress priority */
 #endif
 
+/* Type used for thread->sched_mode and saved_mode */
+typedef enum {
+	TH_MODE_NONE = 0,					/* unassigned, usually for saved_mode only */
+	TH_MODE_REALTIME,					/* time constraints supplied */
+	TH_MODE_FIXED,						/* use fixed priorities, no decay */
+	TH_MODE_TIMESHARE,					/* use timesharing algorithm */
+	TH_MODE_FAIRSHARE					/* use fair-share scheduling */		
+} sched_mode_t;
+
 /*
  *	Macro to check for invalid priorities.
  */
 #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
 
+struct runq_stats {
+	uint64_t				count_sum;
+	uint64_t				last_change_timestamp;
+};
+
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+
 struct run_queue {
 	int					highq;				/* highest runnable queue */
 	int					bitmap[NRQBM];		/* run queue bitmap array */
 	int					count;				/* # of threads total */
 	int					urgency;			/* level of preemption urgency */
 	queue_head_t		queues[NRQS];		/* one for each priority */
+
+	struct runq_stats	runq_stats;
 };
 
-typedef struct run_queue	*run_queue_t;
-#define RUN_QUEUE_NULL		((run_queue_t) 0)
+#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
 
-#define first_timeslice(processor)		((processor)->timeslice > 0)
+struct rt_queue {
+	int					count;				/* # of threads total */
+	queue_head_t		queue;				/* all runnable RT threads */
 
-#define thread_quantum_init(thread)							\
-MACRO_BEGIN													\
-	(thread)->current_quantum = 							\
-		((thread)->sched_mode & TH_MODE_REALTIME)?			\
-			(thread)->realtime.computation: std_quantum;	\
-MACRO_END
+	struct runq_stats	runq_stats;
+};
+
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+struct fairshare_queue {
+	int					count;				/* # of threads total */
+	queue_head_t		queue;				/* all runnable threads demoted to fairshare scheduling */
+	
+	struct runq_stats	runq_stats;
+};
+#endif
 
-extern struct run_queue		rt_runq;
+#if defined(CONFIG_SCHED_GRRR_CORE)
 
 /*
- *	Scheduler routines.
+ * We map standard Mach priorities to an abstract scale that more properly
+ * indicates how we want processor time allocated under contention.
  */
+typedef uint8_t	grrr_proportional_priority_t;
+typedef uint8_t grrr_group_index_t;
+
+#define NUM_GRRR_PROPORTIONAL_PRIORITIES	256
+#define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
+
+#if 0
+#define NUM_GRRR_GROUPS 8					/* log(256) */
+#endif
+
+#define NUM_GRRR_GROUPS 64					/* 256/4 */
+
+struct grrr_group {
+	queue_chain_t			priority_order;				/* next greatest weight group */
+	grrr_proportional_priority_t		minpriority;
+	grrr_group_index_t		index;
+
+	queue_head_t			clients;
+	int						count;
+	uint32_t				weight;
+#if 0
+	uint32_t				deferred_removal_weight;
+#endif
+	uint32_t				work;
+	thread_t				current_client;
+};
+
+struct grrr_run_queue {
+	int					count;
+	uint32_t			last_rescale_tick;
+	struct grrr_group	groups[NUM_GRRR_GROUPS];
+	queue_head_t		sorted_group_list;
+	uint32_t			weight;
+	grrr_group_t		current_group;
+	
+	struct runq_stats   runq_stats;
+};
+
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
+
+#define first_timeslice(processor)		((processor)->timeslice > 0)
 
-/* Remove thread from its run queue */
-extern boolean_t	run_queue_remove(
-						thread_t	thread);
+extern struct rt_queue		rt_runq;
+
+/*
+ *	Scheduler routines.
+ */
 
 /* Handle quantum expiration for an executing thread */
 extern void		thread_quantum_expire(
@@ -209,13 +277,21 @@ extern void		thread_quantum_expire(
 /* Context switch check for current processor */
 extern ast_t	csw_check(processor_t		processor);
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
 extern uint32_t	std_quantum, min_std_quantum;
 extern uint32_t	std_quantum_us;
+#endif
+
+extern uint32_t thread_depress_time;
+extern uint32_t default_timeshare_computation;
+extern uint32_t default_timeshare_constraint;
 
 extern uint32_t	max_rt_quantum, min_rt_quantum;
 
 extern uint32_t	sched_cswtime;
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  *	Age usage (1 << SCHED_TICK_SHIFT) times per second.
  */
@@ -224,6 +300,10 @@ extern uint32_t	sched_cswtime;
 extern unsigned		sched_tick;
 extern uint32_t		sched_tick_interval;
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+extern uint64_t		sched_one_second_interval;
+
 /* Periodic computation of various averages */
 extern void		compute_averages(void);
 
@@ -236,16 +316,24 @@ extern void		compute_stack_target(
 extern void		compute_memory_pressure(
 					void			*arg);
 
+extern void		compute_zone_gc_throttle(
+					void			*arg);
+
+extern void		compute_pmap_gc_throttle(
+					void			*arg);
+
 /*
  *	Conversion factor from usage
  *	to priority.
  */
+#if defined(CONFIG_SCHED_TRADITIONAL)
 extern uint32_t		sched_pri_shift;
 extern uint32_t		sched_fixed_shift;
 extern int8_t		sched_load_shifts[NRQS];
+#endif
 
 extern int32_t		sched_poll_yield_shift;
-extern uint32_t		sched_safe_duration;
+extern uint64_t		sched_safe_duration;
 
 extern uint32_t		sched_run_count, sched_share_count;
 extern uint32_t		sched_load_average, sched_mach_factor;
@@ -256,13 +344,13 @@ extern uint64_t		max_unsafe_computation;
 extern uint64_t		max_poll_computation;
 
 #define sched_run_incr()			\
-MACRO_BEGIN													\
-	machine_run_count(hw_atomic_add(&sched_run_count, 1));	\
+MACRO_BEGIN					\
+         hw_atomic_add(&sched_run_count, 1);	\
 MACRO_END
 
 #define sched_run_decr()			\
-MACRO_BEGIN													\
-	machine_run_count(hw_atomic_sub(&sched_run_count, 1));	\
+MACRO_BEGIN					\
+	hw_atomic_sub(&sched_run_count, 1);	\
 MACRO_END
 
 #define sched_share_incr()			\
diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c
index e20ddff73..5db621937 100644
--- a/osfmk/kern/sched_average.c
+++ b/osfmk/kern/sched_average.c
@@ -72,6 +72,7 @@
 uint32_t	avenrun[3] = {0, 0, 0};
 uint32_t	mach_factor[3] = {0, 0, 0};
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
 /*
  * Values are scaled by LOAD_SCALE, defined in processor_info.h
  */
@@ -87,22 +88,24 @@ static uint32_t		fract[3] = {
 #undef base
 #undef frac
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 static unsigned int		sched_nrun;
 
 typedef void	(*sched_avg_comp_t)(
 					void			*param);
 
-#define SCHED_AVG_SECS(n)	((n) << SCHED_TICK_SHIFT)
-
 static struct sched_average {
 	sched_avg_comp_t	comp;
 	void				*param;
-	int					period;
-	int					tick;			
+	int					period; /* in seconds */
+	uint64_t			deadline;			
 } sched_average[] = {
-	{ compute_averunnable, &sched_nrun, SCHED_AVG_SECS(5), 0 },
-	{ compute_stack_target, NULL, SCHED_AVG_SECS(5), 1 },
-	{ compute_memory_pressure, NULL, SCHED_AVG_SECS(1), 0 },
+	{ compute_averunnable, &sched_nrun, 5, 0 },
+	{ compute_stack_target, NULL, 5, 1 },
+	{ compute_memory_pressure, NULL, 1, 0 },
+	{ compute_zone_gc_throttle, NULL, 1, 0 },
+	{ compute_pmap_gc_throttle, NULL, 60, 0 },
 	{ NULL, NULL, 0, 0 }
 };
 
@@ -114,7 +117,8 @@ compute_averages(void)
 	int					ncpus, nthreads, nshared;
 	uint32_t			factor_now, average_now, load_now = 0;
 	sched_average_t		avg;
-
+	uint64_t			abstime;
+	
 	/*
 	 *	Retrieve counts, ignoring
 	 *	the current thread.
@@ -154,6 +158,13 @@ compute_averages(void)
 			load_now = NRQS - 1;
 	}
 
+	/*
+	 *	Sample total running threads.
+	 */
+	sched_nrun = nthreads;
+	
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 	/*
 	 *	The conversion factor consists of
 	 *	two components: a fixed value based
@@ -167,11 +178,6 @@ compute_averages(void)
 	 */
 	sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now];
 
-	/*
-	 *	Sample total running threads.
-	 */
-	sched_nrun = nthreads;
-
 	/*
 	 * Compute old-style Mach load averages.
 	 */
@@ -186,14 +192,16 @@ compute_averages(void)
 						(average_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
 		}
 	}
+#endif /* CONFIG_SCHED_TRADITIONAL */
 
 	/*
 	 *	Compute averages in other components.
 	 */
+	abstime = mach_absolute_time();
 	for (avg = sched_average; avg->comp != NULL; ++avg) {
-		if (++avg->tick >= avg->period) {
+		if (abstime >= avg->deadline) {
 			(*avg->comp)(avg->param);
-			avg->tick = 0;
+			avg->deadline = abstime + avg->period * sched_one_second_interval;
 		}
 	}
 }
diff --git a/osfmk/kern/sched_fixedpriority.c b/osfmk/kern/sched_fixedpriority.c
new file mode 100644
index 000000000..1eca4aaac
--- /dev/null
+++ b/osfmk/kern/sched_fixedpriority.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/machine.h>
+#include <mach/policy.h>
+#include <mach/sync_policy.h>
+#include <mach/thread_act.h>
+
+#include <machine/machine_routines.h>
+#include <machine/sched_param.h>
+#include <machine/machine_cpu.h>
+
+#include <kern/kern_types.h>
+#include <kern/clock.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/debug.h>
+#include <kern/lock.h>
+#include <kern/macro_help.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/wait_queue.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#include <mach/sdt.h>
+
+#include <sys/kdebug.h>
+
+static void
+sched_fixedpriority_init(void);
+
+static void
+sched_fixedpriority_with_pset_runqueue_init(void);
+
+static void
+sched_fixedpriority_timebase_init(void);
+
+static void
+sched_fixedpriority_processor_init(processor_t processor);
+
+static void
+sched_fixedpriority_pset_init(processor_set_t pset);
+
+static void
+sched_fixedpriority_maintenance_continuation(void);
+
+static thread_t
+sched_fixedpriority_choose_thread(processor_t		processor,
+							 int				priority);
+
+static thread_t
+sched_fixedpriority_steal_thread(processor_set_t		pset);
+
+static void
+sched_fixedpriority_compute_priority(thread_t	thread,
+							 boolean_t			override_depress);
+
+static processor_t
+sched_fixedpriority_choose_processor(	processor_set_t		pset,
+								processor_t			processor,
+								thread_t			thread);
+
+
+static boolean_t
+sched_fixedpriority_processor_enqueue(
+							 processor_t			processor,
+							 thread_t			thread,
+							 integer_t			options);
+
+static void
+sched_fixedpriority_processor_queue_shutdown(
+									 processor_t			processor);
+
+static boolean_t
+sched_fixedpriority_processor_queue_remove(
+						    processor_t			processor,
+							thread_t		thread);
+
+static boolean_t
+sched_fixedpriority_processor_queue_empty(processor_t		processor);
+
+static boolean_t
+sched_fixedpriority_processor_queue_has_priority(processor_t		processor,
+										 int				priority,
+										 boolean_t		gte);
+
+static boolean_t
+sched_fixedpriority_priority_is_urgent(int priority);
+
+static ast_t
+sched_fixedpriority_processor_csw_check(processor_t processor);
+
+static uint32_t
+sched_fixedpriority_initial_quantum_size(thread_t thread);
+
+static sched_mode_t
+sched_fixedpriority_initial_thread_sched_mode(task_t parent_task);
+
+static boolean_t
+sched_fixedpriority_supports_timeshare_mode(void);
+
+static boolean_t
+sched_fixedpriority_can_update_priority(thread_t	thread);
+
+static void
+sched_fixedpriority_update_priority(thread_t	thread);
+
+static void
+sched_fixedpriority_lightweight_update_priority(thread_t	thread);
+
+static void
+sched_fixedpriority_quantum_expire(thread_t	thread);
+
+static boolean_t
+sched_fixedpriority_should_current_thread_rechoose_processor(processor_t			processor);
+
+static int
+sched_fixedpriority_processor_runq_count(processor_t	processor);
+
+static uint64_t
+sched_fixedpriority_processor_runq_stats_count_sum(processor_t   processor);
+
+const struct sched_dispatch_table sched_fixedpriority_dispatch = {
+	sched_fixedpriority_init,
+	sched_fixedpriority_timebase_init,
+	sched_fixedpriority_processor_init,
+	sched_fixedpriority_pset_init,
+	sched_fixedpriority_maintenance_continuation,
+	sched_fixedpriority_choose_thread,
+	sched_fixedpriority_steal_thread,
+	sched_fixedpriority_compute_priority,
+	sched_fixedpriority_choose_processor,
+	sched_fixedpriority_processor_enqueue,
+	sched_fixedpriority_processor_queue_shutdown,
+	sched_fixedpriority_processor_queue_remove,
+	sched_fixedpriority_processor_queue_empty,
+	sched_fixedpriority_priority_is_urgent,
+	sched_fixedpriority_processor_csw_check,
+	sched_fixedpriority_processor_queue_has_priority,
+	sched_fixedpriority_initial_quantum_size,
+	sched_fixedpriority_initial_thread_sched_mode,
+	sched_fixedpriority_supports_timeshare_mode,
+	sched_fixedpriority_can_update_priority,
+	sched_fixedpriority_update_priority,
+	sched_fixedpriority_lightweight_update_priority,
+	sched_fixedpriority_quantum_expire,
+	sched_fixedpriority_should_current_thread_rechoose_processor,
+	sched_fixedpriority_processor_runq_count,
+	sched_fixedpriority_processor_runq_stats_count_sum,
+	sched_traditional_fairshare_init,
+	sched_traditional_fairshare_runq_count,
+	sched_traditional_fairshare_runq_stats_count_sum,
+	sched_traditional_fairshare_enqueue,
+	sched_traditional_fairshare_dequeue,
+	sched_traditional_fairshare_queue_remove,
+	TRUE /* direct_dispatch_to_idle_processors */
+};
+
+const struct sched_dispatch_table sched_fixedpriority_with_pset_runqueue_dispatch = {
+	sched_fixedpriority_with_pset_runqueue_init,
+	sched_fixedpriority_timebase_init,
+	sched_fixedpriority_processor_init,
+	sched_fixedpriority_pset_init,
+	sched_fixedpriority_maintenance_continuation,
+	sched_fixedpriority_choose_thread,
+	sched_fixedpriority_steal_thread,
+	sched_fixedpriority_compute_priority,
+	sched_fixedpriority_choose_processor,
+	sched_fixedpriority_processor_enqueue,
+	sched_fixedpriority_processor_queue_shutdown,
+	sched_fixedpriority_processor_queue_remove,
+	sched_fixedpriority_processor_queue_empty,
+	sched_fixedpriority_priority_is_urgent,
+	sched_fixedpriority_processor_csw_check,
+	sched_fixedpriority_processor_queue_has_priority,
+	sched_fixedpriority_initial_quantum_size,
+	sched_fixedpriority_initial_thread_sched_mode,
+	sched_fixedpriority_supports_timeshare_mode,
+	sched_fixedpriority_can_update_priority,
+	sched_fixedpriority_update_priority,
+	sched_fixedpriority_lightweight_update_priority,
+	sched_fixedpriority_quantum_expire,
+	sched_fixedpriority_should_current_thread_rechoose_processor,
+	sched_fixedpriority_processor_runq_count,
+	sched_fixedpriority_processor_runq_stats_count_sum,
+	sched_traditional_fairshare_init,
+	sched_traditional_fairshare_runq_count,
+	sched_traditional_fairshare_runq_stats_count_sum,
+	sched_traditional_fairshare_enqueue,
+	sched_traditional_fairshare_dequeue,
+	sched_traditional_fairshare_queue_remove,
+	FALSE /* direct_dispatch_to_idle_processors */
+};
+
+extern int	max_unsafe_quanta;
+
+#define		SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM		5		/* in ms */
+static uint32_t sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM;
+static uint32_t sched_fixedpriority_quantum;
+
+#define SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME 100 /* ms */
+static uint32_t fairshare_minimum_blocked_time_ms = SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME;
+static uint32_t fairshare_minimum_blocked_time;
+
+static uint32_t			sched_fixedpriority_tick;
+static uint64_t			sched_fixedpriority_tick_deadline;
+extern uint32_t			grrr_rescale_tick;
+
+static boolean_t sched_fixedpriority_use_pset_runqueue = FALSE;
+
+__attribute__((always_inline))
+static inline run_queue_t runq_for_processor(processor_t processor)
+{
+	if (sched_fixedpriority_use_pset_runqueue)
+		return &processor->processor_set->pset_runq;
+	else
+		return &processor->runq;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread)
+{
+	if (thread->bound_processor == PROCESSOR_NULL)
+		return;
+    
+	assert(thread->bound_processor == processor);
+    
+	if (sched_fixedpriority_use_pset_runqueue)
+		processor->processor_set->pset_runq_bound_count++;
+    
+	processor->runq_bound_count++;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread)
+{
+	if (thread->bound_processor == PROCESSOR_NULL)
+		return;
+    
+	assert(thread->bound_processor == processor);
+    
+	if (sched_fixedpriority_use_pset_runqueue)
+		processor->processor_set->pset_runq_bound_count--;
+    
+	processor->runq_bound_count--;
+}
+
+static void
+sched_fixedpriority_init(void)
+{
+	if (!PE_parse_boot_argn("fixedpriority_quantum", &sched_fixedpriority_quantum_ms, sizeof (sched_fixedpriority_quantum_ms))) {
+		sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM;
+	}
+	
+	if (sched_fixedpriority_quantum_ms < 1)
+		sched_fixedpriority_quantum_ms = SCHED_FIXEDPRIORITY_DEFAULT_QUANTUM;
+	
+	printf("standard fixed priority timeslicing quantum is %u ms\n", sched_fixedpriority_quantum_ms);
+}
+
+static void
+sched_fixedpriority_with_pset_runqueue_init(void)
+{
+	sched_fixedpriority_init();
+	sched_fixedpriority_use_pset_runqueue = TRUE;
+}
+
+static void
+sched_fixedpriority_timebase_init(void)
+{
+	uint64_t	abstime;
+
+	/* standard timeslicing quantum */
+	clock_interval_to_absolutetime_interval(
+											sched_fixedpriority_quantum_ms, NSEC_PER_MSEC, &abstime);
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	sched_fixedpriority_quantum = (uint32_t)abstime;
+	
+	thread_depress_time = 1 * sched_fixedpriority_quantum;
+	default_timeshare_computation = sched_fixedpriority_quantum / 2;
+	default_timeshare_constraint = sched_fixedpriority_quantum;
+	
+	max_unsafe_computation = max_unsafe_quanta * sched_fixedpriority_quantum;
+	sched_safe_duration = 2 * max_unsafe_quanta * sched_fixedpriority_quantum;
+
+	if (!PE_parse_boot_argn("fairshare_minblockedtime", &fairshare_minimum_blocked_time_ms, sizeof (fairshare_minimum_blocked_time_ms))) {
+		fairshare_minimum_blocked_time_ms = SCHED_FIXEDPRIORITY_DEFAULT_FAIRSHARE_MINIMUM_BLOCK_TIME;
+	}
+	
+	clock_interval_to_absolutetime_interval(
+											fairshare_minimum_blocked_time_ms, NSEC_PER_MSEC, &abstime);
+	
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	fairshare_minimum_blocked_time = (uint32_t)abstime;
+}
+
+static void
+sched_fixedpriority_processor_init(processor_t processor)
+{
+	if (!sched_fixedpriority_use_pset_runqueue) {
+		run_queue_init(&processor->runq);
+	}
+	processor->runq_bound_count = 0;
+}
+
+static void
+sched_fixedpriority_pset_init(processor_set_t pset)
+{
+	if (sched_fixedpriority_use_pset_runqueue) {
+		run_queue_init(&pset->pset_runq);
+	}
+	pset->pset_runq_bound_count = 0;
+}
+
+
+static void
+sched_fixedpriority_maintenance_continuation(void)
+{
+	uint64_t			abstime = mach_absolute_time();
+	
+	sched_fixedpriority_tick++;
+	grrr_rescale_tick++;
+    
+	/*
+	 *  Compute various averages.
+	 */
+	compute_averages();
+	
+	if (sched_fixedpriority_tick_deadline == 0)
+		sched_fixedpriority_tick_deadline = abstime;
+	
+	clock_deadline_for_periodic_event(10*sched_one_second_interval, abstime,
+						&sched_fixedpriority_tick_deadline);
+	
+	assert_wait_deadline((event_t)sched_fixedpriority_maintenance_continuation, THREAD_UNINT, sched_fixedpriority_tick_deadline);
+	thread_block((thread_continue_t)sched_fixedpriority_maintenance_continuation);
+	/*NOTREACHED*/
+}
+
+
+static thread_t
+sched_fixedpriority_choose_thread(processor_t		processor,
+						  int				priority)
+{
+	thread_t thread;
+	
+	thread = choose_thread(processor, runq_for_processor(processor), priority);
+	if (thread != THREAD_NULL) {
+		runq_consider_decr_bound_count(processor, thread);
+	}
+	
+	return thread;
+}
+
+static thread_t
+sched_fixedpriority_steal_thread(processor_set_t		pset)
+{
+	pset_unlock(pset);
+	
+	return (THREAD_NULL);
+	
+}
+
+static void
+sched_fixedpriority_compute_priority(thread_t	thread,
+							 boolean_t			override_depress)
+{
+	/* Reset current priority to base priority */
+	if (	!(thread->sched_flags & TH_SFLAG_PROMOTED)			&&
+		(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)	||
+		 override_depress							)		) {
+			set_sched_pri(thread, thread->priority);
+		}
+}
+
+static processor_t
+sched_fixedpriority_choose_processor(	processor_set_t		pset,
+							 processor_t			processor,
+							 thread_t			thread)
+{
+	return choose_processor(pset, processor, thread);
+}
+static boolean_t
+sched_fixedpriority_processor_enqueue(
+							 processor_t			processor,
+							 thread_t			thread,
+							 integer_t			options)
+{
+	run_queue_t		rq = runq_for_processor(processor);
+	boolean_t		result;
+	
+	result = run_queue_enqueue(rq, thread, options);
+	thread->runq = processor;
+	runq_consider_incr_bound_count(processor, thread);
+
+	return (result);
+}
+
+static void
+sched_fixedpriority_processor_queue_shutdown(
+									 processor_t			processor)
+{
+	processor_set_t		pset = processor->processor_set;
+	thread_t			thread;
+	queue_head_t		tqueue, bqueue;
+	
+	queue_init(&tqueue);
+	queue_init(&bqueue);
+	
+	while ((thread = sched_fixedpriority_choose_thread(processor, IDLEPRI)) != THREAD_NULL) {
+		if (thread->bound_processor == PROCESSOR_NULL) {
+			enqueue_tail(&tqueue, (queue_entry_t)thread);
+		} else {
+			enqueue_tail(&bqueue, (queue_entry_t)thread);				
+		}
+	}
+	
+	while ((thread = (thread_t)dequeue_head(&bqueue)) != THREAD_NULL) {
+		sched_fixedpriority_processor_enqueue(processor, thread, SCHED_TAILQ);
+	}	
+	
+	pset_unlock(pset);
+	
+	while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
+		thread_lock(thread);
+		
+		thread_setrun(thread, SCHED_TAILQ);
+		
+		thread_unlock(thread);
+	}
+}
+
+static boolean_t
+sched_fixedpriority_processor_queue_remove(
+								processor_t			processor,
+								thread_t		thread)
+{
+	void *			rqlock;
+	run_queue_t		rq;
+
+	rqlock = &processor->processor_set->sched_lock;
+	rq = runq_for_processor(processor);
+
+	simple_lock(rqlock);
+	if (processor == thread->runq) {
+		/*
+		 *	Thread is on a run queue and we have a lock on
+		 *	that run queue.
+		 */
+		runq_consider_decr_bound_count(processor, thread);
+		run_queue_remove(rq, thread);
+	}
+	else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		processor = PROCESSOR_NULL;
+	}
+	
+	simple_unlock(rqlock);
+	
+	return (processor != PROCESSOR_NULL);
+}
+
+static boolean_t
+sched_fixedpriority_processor_queue_empty(processor_t		processor)
+{
+	/*
+	 * See sched_traditional_with_pset_runqueue_processor_queue_empty
+	 * for algorithm
+	 */
+	int count = runq_for_processor(processor)->count;
+
+	if (sched_fixedpriority_use_pset_runqueue) {
+		processor_set_t pset = processor->processor_set;
+
+		count -= pset->pset_runq_bound_count;
+		count += processor->runq_bound_count;
+	}
+	
+	return count == 0;
+}
+
+static boolean_t
+sched_fixedpriority_processor_queue_has_priority(processor_t		processor,
+										 int				priority,
+										 boolean_t		gte)
+{
+	if (gte)
+		return runq_for_processor(processor)->highq >= priority;
+	else
+		return runq_for_processor(processor)->highq > priority;
+}
+
+/* Implement sched_preempt_pri in code */
+static boolean_t
+sched_fixedpriority_priority_is_urgent(int priority)
+{
+	if (priority <= BASEPRI_FOREGROUND)
+		return FALSE;
+	
+	if (priority < MINPRI_KERNEL)
+		return TRUE;
+
+	if (priority >= BASEPRI_PREEMPT)
+		return TRUE;
+	
+	return FALSE;
+}
+
+static ast_t
+sched_fixedpriority_processor_csw_check(processor_t processor)
+{
+	run_queue_t		runq;
+	
+	runq = runq_for_processor(processor);
+	if (runq->highq > processor->current_pri) {
+		if (runq->urgency > 0)
+			return (AST_PREEMPT | AST_URGENT);
+
+		if (processor->active_thread && thread_eager_preemption(processor->active_thread))
+			return (AST_PREEMPT | AST_URGENT);
+		
+		return AST_PREEMPT;
+	} else if (processor->current_thmode == TH_MODE_FAIRSHARE) {
+		if (!sched_fixedpriority_processor_queue_empty(processor)) {
+			/* Allow queued threads to run if the current thread got demoted to fairshare */
+			return (AST_PREEMPT | AST_URGENT);
+		} else if ((!first_timeslice(processor)) && SCHED(fairshare_runq_count)() > 0) {
+			/* Allow other fairshare threads to run */
+			return AST_PREEMPT | AST_URGENT;
+		}
+	}
+	
+	return AST_NONE;
+}
+
+static uint32_t
+sched_fixedpriority_initial_quantum_size(thread_t thread __unused)
+{
+	return sched_fixedpriority_quantum;
+}
+
+static sched_mode_t
+sched_fixedpriority_initial_thread_sched_mode(task_t parent_task)
+{
+	if (parent_task == kernel_task)
+		return TH_MODE_FIXED;
+	else
+		return TH_MODE_TIMESHARE;
+}
+
+static boolean_t
+sched_fixedpriority_supports_timeshare_mode(void)
+{
+	return TRUE;
+}
+
+static boolean_t
+sched_fixedpriority_can_update_priority(thread_t	thread __unused)
+{
+	return ((thread->sched_flags & TH_SFLAG_PRI_UPDATE) == 0);
+}
+
+static void
+sched_fixedpriority_update_priority(thread_t	thread)
+{
+	uint64_t current_time = mach_absolute_time();
+
+	thread->sched_flags |= TH_SFLAG_PRI_UPDATE;
+
+	if (thread->sched_flags & TH_SFLAG_FAIRSHARE_TRIPPED) {
+		
+		/*
+		 * Make sure we've waited fairshare_minimum_blocked_time both from the time
+		 * we were throttled into the fairshare band, and the last time
+		 * we ran.
+		 */
+		if (current_time >= thread->last_run_time + fairshare_minimum_blocked_time) {
+			
+			boolean_t		removed = thread_run_queue_remove(thread);
+						
+			thread->sched_flags &= ~TH_SFLAG_FAIRSHARE_TRIPPED;
+			thread->sched_mode = thread->saved_mode;
+			thread->saved_mode = TH_MODE_NONE;
+			
+			if (removed)
+				thread_setrun(thread, SCHED_TAILQ);
+
+			KERNEL_DEBUG_CONSTANT1(
+								   MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_EXIT) | DBG_FUNC_NONE, (uint32_t)(thread->last_run_time & 0xFFFFFFFF), (uint32_t)(thread->last_run_time >> 32), (uint32_t)(current_time & 0xFFFFFFFF), (uint32_t)(current_time >> 32), thread_tid(thread));
+
+		}
+	} else if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) && (thread->bound_processor == PROCESSOR_NULL)) {
+		boolean_t		removed = thread_run_queue_remove(thread);
+		
+		thread->sched_flags |= TH_SFLAG_FAIRSHARE_TRIPPED;
+		thread->saved_mode = thread->sched_mode;
+		thread->sched_mode = TH_MODE_FAIRSHARE;
+		
+		thread->last_quantum_refill_time = thread->last_run_time - 2 * sched_fixedpriority_quantum - 1;
+		
+		if (removed)
+			thread_setrun(thread, SCHED_TAILQ);
+
+		KERNEL_DEBUG_CONSTANT(
+							   MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), 0xFFFFFFFF, 0, 0, 0);
+
+	}
+	
+	/*
+	 *	Check for fail-safe release.
+	 */
+	if (	(thread->sched_flags & TH_SFLAG_FAILSAFE)		&&
+		current_time >= thread->safe_release		) {
+		
+		
+		thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
+		
+		if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
+			/* Restore to previous */
+			
+			thread->sched_mode = thread->saved_mode;
+			thread->saved_mode = TH_MODE_NONE;
+			
+			if (thread->sched_mode == TH_MODE_REALTIME) {
+				thread->priority = BASEPRI_RTQUEUES;
+				
+			}
+			
+			if (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK))
+				set_sched_pri(thread, thread->priority);
+		}
+	}
+	
+	thread->sched_flags &= ~TH_SFLAG_PRI_UPDATE;
+	return;
+}
+
+static void
+sched_fixedpriority_lightweight_update_priority(thread_t	thread __unused)
+{
+	return;
+}
+
+static void
+sched_fixedpriority_quantum_expire(
+						  thread_t	thread)
+{
+	/* Put thread into fairshare class, core scheduler will manage runqueue */
+	if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->task != kernel_task) && !(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
+		uint64_t elapsed = thread->last_run_time - thread->last_quantum_refill_time;
+		
+		/* If we managed to use our quantum in less than 2*quantum wall clock time,
+		 * we are considered CPU bound and eligible for demotion. Since the quantum
+		 * is reset when thread_unblock() is called, we are only really considering
+		 * threads that elongate their execution time due to preemption.
+		 */
+		if ((elapsed < 2 * sched_fixedpriority_quantum) && (thread->bound_processor == PROCESSOR_NULL)) {
+		
+			thread->saved_mode = thread->sched_mode;
+			thread->sched_mode = TH_MODE_FAIRSHARE;
+			thread->sched_flags |= TH_SFLAG_FAIRSHARE_TRIPPED;
+			KERNEL_DEBUG_CONSTANT(
+							  MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (uint32_t)(elapsed & 0xFFFFFFFF), (uint32_t)(elapsed >> 32), 0, 0);
+		}
+	}
+}
+
+
+static boolean_t
+sched_fixedpriority_should_current_thread_rechoose_processor(processor_t			processor __unused)
+{
+	return (TRUE);
+}
+
+
+static int
+sched_fixedpriority_processor_runq_count(processor_t	processor)
+{
+	return runq_for_processor(processor)->count;
+}
+
+static uint64_t
+sched_fixedpriority_processor_runq_stats_count_sum(processor_t	processor)
+{
+	return runq_for_processor(processor)->runq_stats.count_sum;
+}
diff --git a/osfmk/kern/sched_grrr.c b/osfmk/kern/sched_grrr.c
new file mode 100644
index 000000000..d27b29e87
--- /dev/null
+++ b/osfmk/kern/sched_grrr.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/machine.h>
+#include <mach/policy.h>
+#include <mach/sync_policy.h>
+#include <mach/thread_act.h>
+
+#include <machine/machine_routines.h>
+#include <machine/sched_param.h>
+#include <machine/machine_cpu.h>
+
+#include <kern/kern_types.h>
+#include <kern/clock.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/debug.h>
+#include <kern/lock.h>
+#include <kern/macro_help.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/wait_queue.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#include <mach/sdt.h>
+
+#include <sys/kdebug.h>
+
+#if defined(CONFIG_SCHED_GRRR_CORE)
+
+static void
+grrr_priority_mapping_init(void);
+
+static boolean_t
+grrr_enqueue(
+				   grrr_run_queue_t			rq,
+				   thread_t			thread);
+				   
+static thread_t
+grrr_select(
+					grrr_run_queue_t	rq);
+
+static void
+grrr_remove(
+				  grrr_run_queue_t			rq,
+				  thread_t		thread);
+
+
+static void
+grrr_sorted_list_insert_group(grrr_run_queue_t rq,
+									grrr_group_t group);
+
+static void
+grrr_rescale_work(grrr_run_queue_t rq);
+
+static void
+grrr_runqueue_init(grrr_run_queue_t		runq);
+
+/* Map Mach priorities to ones suitable for proportional sharing */
+static grrr_proportional_priority_t grrr_priority_mapping[NRQS];
+
+/* Map each proportional priority to its group */
+static grrr_group_index_t grrr_group_mapping[NUM_GRRR_PROPORTIONAL_PRIORITIES];
+
+uint32_t			grrr_rescale_tick;
+
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
+
+#if defined(CONFIG_SCHED_GRRR)
+
+static void
+sched_grrr_init(void);
+
+static void
+sched_grrr_timebase_init(void);
+
+static void
+sched_grrr_processor_init(processor_t processor);
+
+static void
+sched_grrr_pset_init(processor_set_t pset);
+
+static void
+sched_grrr_maintenance_continuation(void);
+
+static thread_t
+sched_grrr_choose_thread(processor_t		processor,
+							 int				priority);
+
+static thread_t
+sched_grrr_steal_thread(processor_set_t		pset);
+
+static void
+sched_grrr_compute_priority(thread_t	thread,
+							 boolean_t			override_depress);
+
+static processor_t
+sched_grrr_choose_processor(	processor_set_t		pset,
+								processor_t			processor,
+								thread_t			thread);
+
+static boolean_t
+sched_grrr_processor_enqueue(
+							 processor_t			processor,
+							 thread_t			thread,
+							 integer_t			options);
+
+static void
+sched_grrr_processor_queue_shutdown(
+									 processor_t			processor);
+
+static boolean_t
+sched_grrr_processor_queue_remove(
+						    processor_t			processor,
+							thread_t		thread);
+
+static boolean_t
+sched_grrr_processor_queue_empty(processor_t		processor);
+
+static boolean_t
+sched_grrr_processor_queue_has_priority(processor_t		processor,
+										 int				priority,
+										 boolean_t		gte);
+
+static boolean_t
+sched_grrr_priority_is_urgent(int priority);
+
+static ast_t
+sched_grrr_processor_csw_check(processor_t processor);
+
+static uint32_t
+sched_grrr_initial_quantum_size(thread_t thread);
+
+static sched_mode_t
+sched_grrr_initial_thread_sched_mode(task_t parent_task);
+
+static boolean_t
+sched_grrr_supports_timeshare_mode(void);
+
+static boolean_t
+sched_grrr_can_update_priority(thread_t	thread);
+
+static void
+sched_grrr_update_priority(thread_t	thread);
+
+static void
+sched_grrr_lightweight_update_priority(thread_t	thread);
+
+static void
+sched_grrr_quantum_expire(thread_t	thread);
+
+static boolean_t
+sched_grrr_should_current_thread_rechoose_processor(processor_t			processor);
+
+static int
+sched_grrr_processor_runq_count(processor_t	processor);
+
+static uint64_t
+sched_grrr_processor_runq_stats_count_sum(processor_t   processor);
+
+const struct sched_dispatch_table sched_grrr_dispatch = {
+	sched_grrr_init,
+	sched_grrr_timebase_init,
+	sched_grrr_processor_init,
+	sched_grrr_pset_init,
+	sched_grrr_maintenance_continuation,
+	sched_grrr_choose_thread,
+	sched_grrr_steal_thread,
+	sched_grrr_compute_priority,
+	sched_grrr_choose_processor,
+	sched_grrr_processor_enqueue,
+	sched_grrr_processor_queue_shutdown,
+	sched_grrr_processor_queue_remove,
+	sched_grrr_processor_queue_empty,
+	sched_grrr_priority_is_urgent,
+	sched_grrr_processor_csw_check,
+	sched_grrr_processor_queue_has_priority,
+	sched_grrr_initial_quantum_size,
+	sched_grrr_initial_thread_sched_mode,
+	sched_grrr_supports_timeshare_mode,
+	sched_grrr_can_update_priority,
+	sched_grrr_update_priority,
+	sched_grrr_lightweight_update_priority,
+	sched_grrr_quantum_expire,
+	sched_grrr_should_current_thread_rechoose_processor,
+	sched_grrr_processor_runq_count,
+	sched_grrr_processor_runq_stats_count_sum,
+	sched_grrr_fairshare_init,
+	sched_grrr_fairshare_runq_count,
+	sched_grrr_fairshare_runq_stats_count_sum,
+	sched_grrr_fairshare_enqueue,
+	sched_grrr_fairshare_dequeue,
+	sched_grrr_fairshare_queue_remove,
+	TRUE /* direct_dispatch_to_idle_processors */
+};
+
+extern int	default_preemption_rate;
+extern int	max_unsafe_quanta;
+
+static uint32_t grrr_quantum_us;
+static uint32_t grrr_quantum;
+
+static uint64_t			sched_grrr_tick_deadline;
+
+static void
+sched_grrr_init(void)
+{
+	if (default_preemption_rate < 1)
+		default_preemption_rate = 100;
+	grrr_quantum_us = (1000 * 1000) / default_preemption_rate;
+	
+	printf("standard grrr timeslicing quantum is %d us\n", grrr_quantum_us);
+
+	grrr_priority_mapping_init();
+}
+
+static void
+sched_grrr_timebase_init(void)
+{
+	uint64_t	abstime;
+
+	/* standard timeslicing quantum */
+	clock_interval_to_absolutetime_interval(
+											grrr_quantum_us, NSEC_PER_USEC, &abstime);
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	grrr_quantum = (uint32_t)abstime;
+	
+	thread_depress_time = 1 * grrr_quantum;
+	default_timeshare_computation = grrr_quantum / 2;
+	default_timeshare_constraint = grrr_quantum;
+	
+	max_unsafe_computation = max_unsafe_quanta * grrr_quantum;
+	sched_safe_duration = 2 * max_unsafe_quanta * grrr_quantum;
+
+}
+
+static void
+sched_grrr_processor_init(processor_t processor)
+{
+	grrr_runqueue_init(&processor->grrr_runq);
+}
+
+static void
+sched_grrr_pset_init(processor_set_t pset __unused)
+{
+}
+
+static void
+sched_grrr_maintenance_continuation(void)
+{
+	uint64_t			abstime = mach_absolute_time();
+	
+	grrr_rescale_tick++;
+    
+	/*
+	 *  Compute various averages.
+	 */
+	compute_averages();
+	
+	if (sched_grrr_tick_deadline == 0)
+		sched_grrr_tick_deadline = abstime;
+	
+	clock_deadline_for_periodic_event(10*sched_one_second_interval, abstime,
+						&sched_grrr_tick_deadline);
+	
+	assert_wait_deadline((event_t)sched_grrr_maintenance_continuation, THREAD_UNINT, sched_grrr_tick_deadline);
+	thread_block((thread_continue_t)sched_grrr_maintenance_continuation);
+	/*NOTREACHED*/
+}
+
+
+static thread_t
+sched_grrr_choose_thread(processor_t		processor,
+						  int				priority __unused)
+{
+	grrr_run_queue_t		rq = &processor->grrr_runq;
+	
+	return 	grrr_select(rq);
+}
+
+static thread_t
+sched_grrr_steal_thread(processor_set_t		pset)
+{
+	pset_unlock(pset);
+	
+	return (THREAD_NULL);
+	
+}
+
+static void
+sched_grrr_compute_priority(thread_t	thread,
+							 boolean_t			override_depress __unused)
+{
+	set_sched_pri(thread, thread->priority);
+}
+
+static processor_t
+sched_grrr_choose_processor(	processor_set_t		pset,
+							 processor_t			processor,
+							 thread_t			thread)
+{
+	return choose_processor(pset, processor, thread);
+}
+
+static boolean_t
+sched_grrr_processor_enqueue(
+							 processor_t			processor,
+							 thread_t			thread,
+							 integer_t			options __unused)
+{
+	grrr_run_queue_t		rq = &processor->grrr_runq;
+	boolean_t				result;
+	
+	result = grrr_enqueue(rq, thread);
+	
+	thread->runq = processor;
+	
+	return result;
+}
+
+static void
+sched_grrr_processor_queue_shutdown(
+									 processor_t			processor)
+{
+	processor_set_t		pset = processor->processor_set;
+	thread_t			thread;
+	queue_head_t		tqueue, bqueue;
+	
+	queue_init(&tqueue);
+	queue_init(&bqueue);
+	
+	while ((thread = sched_grrr_choose_thread(processor, IDLEPRI)) != THREAD_NULL) {
+		if (thread->bound_processor == PROCESSOR_NULL) {
+			enqueue_tail(&tqueue, (queue_entry_t)thread);
+		} else {
+			enqueue_tail(&bqueue, (queue_entry_t)thread);				
+		}
+	}
+	
+	while ((thread = (thread_t)dequeue_head(&bqueue)) != THREAD_NULL) {
+		sched_grrr_processor_enqueue(processor, thread, SCHED_TAILQ);
+	}	
+	
+	pset_unlock(pset);
+	
+	while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) {
+		thread_lock(thread);
+		
+		thread_setrun(thread, SCHED_TAILQ);
+		
+		thread_unlock(thread);
+	}
+}
+
+static boolean_t
+sched_grrr_processor_queue_remove(
+								processor_t			processor,
+								thread_t		thread)
+{
+	void *			rqlock;
+	
+	rqlock = &processor->processor_set->sched_lock;
+	simple_lock(rqlock);
+	
+	if (processor == thread->runq) {
+		/*
+		 *	Thread is on a run queue and we have a lock on
+		 *	that run queue.
+		 */
+		grrr_run_queue_t		rq = &processor->grrr_runq;
+
+		grrr_remove(rq, thread);
+	} else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		processor = PROCESSOR_NULL;		
+	}
+	
+	simple_unlock(rqlock);
+	
+	return (processor != PROCESSOR_NULL);	
+}
+				   
+static boolean_t
+sched_grrr_processor_queue_empty(processor_t		processor __unused)
+{
+	boolean_t result;
+	
+	result = (processor->grrr_runq.count == 0);
+	
+	return result;
+}
+
+static boolean_t
+sched_grrr_processor_queue_has_priority(processor_t		processor,
+										 int				priority,
+										 boolean_t		gte __unused)
+{
+	grrr_run_queue_t		rq = &processor->grrr_runq;
+	unsigned int	i;
+
+	i = grrr_group_mapping[grrr_priority_mapping[priority]];
+	for ( ; i < NUM_GRRR_GROUPS; i++) {
+		if (rq->groups[i].count > 0)
+			return (TRUE);
+	}
+	
+	return (FALSE);
+}
+
+/* Implement sched_preempt_pri in code */
+static boolean_t
+sched_grrr_priority_is_urgent(int priority)
+{
+	if (priority <= BASEPRI_FOREGROUND)
+		return FALSE;
+	
+	if (priority < MINPRI_KERNEL)
+		return TRUE;
+
+	if (priority >= BASEPRI_PREEMPT)
+		return TRUE;
+	
+	return FALSE;
+}
+
+static ast_t
+sched_grrr_processor_csw_check(processor_t processor)
+{
+	int				count;
+	
+	count = sched_grrr_processor_runq_count(processor);
+	
+	if (count > 0) {
+		
+		return AST_PREEMPT;
+	}
+	
+	return AST_NONE;
+}
+
+static uint32_t
+sched_grrr_initial_quantum_size(thread_t thread __unused)
+{
+	return grrr_quantum;
+}
+
+static sched_mode_t
+sched_grrr_initial_thread_sched_mode(task_t parent_task)
+{
+	if (parent_task == kernel_task)
+		return TH_MODE_FIXED;
+	else
+		return TH_MODE_TIMESHARE;	
+}
+
+static boolean_t
+sched_grrr_supports_timeshare_mode(void)
+{
+	return TRUE;
+}
+
+static boolean_t
+sched_grrr_can_update_priority(thread_t	thread __unused)
+{
+	return FALSE;
+}
+
+static void
+sched_grrr_update_priority(thread_t	thread __unused)
+{
+	
+}
+
+static void
+sched_grrr_lightweight_update_priority(thread_t	thread __unused)
+{
+	return;
+}
+
+static void
+sched_grrr_quantum_expire(
+						  thread_t	thread __unused)
+{
+}
+
+
+static boolean_t
+sched_grrr_should_current_thread_rechoose_processor(processor_t			processor __unused)
+{
+	return (TRUE);
+}
+
+static int
+sched_grrr_processor_runq_count(processor_t	processor)
+{
+	return processor->grrr_runq.count;
+}
+
+static uint64_t
+sched_grrr_processor_runq_stats_count_sum(processor_t	processor)
+{
+	return processor->grrr_runq.runq_stats.count_sum;
+}
+
+#endif /* defined(CONFIG_SCHED_GRRR) */
+
+#if defined(CONFIG_SCHED_GRRR_CORE)
+
+static void
+grrr_priority_mapping_init(void)
+{
+	unsigned int i;
+	
+	/* Map 0->0 up to 10->20 */
+	for (i=0; i <= 10; i++) {
+		grrr_priority_mapping[i] = 2*i;
+	}
+	
+	/* Map user priorities 11->33 up to 51 -> 153 */
+	for (i=11; i <= 51; i++) {
+		grrr_priority_mapping[i] = 3*i;
+	}
+	
+	/* Map high priorities 52->180 up to 127->255 */
+	for (i=52; i <= 127; i++) {
+		grrr_priority_mapping[i] = 128 + i;
+	}
+	
+	for (i = 0; i < NUM_GRRR_PROPORTIONAL_PRIORITIES; i++) {
+		
+#if 0		
+		unsigned j, k;
+		/* Calculate log(i); */
+		for (j=0, k=1; k <= i; j++, k *= 2);
+#endif
+		
+		/* Groups of 4 */
+		grrr_group_mapping[i] = i >> 2;
+	}
+	
+}
+
+static thread_t
+grrr_intragroup_schedule(grrr_group_t group)
+{
+	thread_t thread;
+
+	if (group->count == 0) {
+		return THREAD_NULL;
+	}
+	
+	thread = group->current_client;
+	if (thread == THREAD_NULL) {
+		thread = (thread_t)queue_first(&group->clients);
+	}
+	
+	if (1 /* deficit */) {
+		group->current_client = (thread_t)queue_next((queue_entry_t)thread);
+		if (queue_end(&group->clients, (queue_entry_t)group->current_client)) {
+			group->current_client = (thread_t)queue_first(&group->clients);
+		}
+		
+		thread = group->current_client;
+	}
+	
+	return thread;
+}
+
+static thread_t
+grrr_intergroup_schedule(grrr_run_queue_t rq)
+{
+	thread_t thread;
+	grrr_group_t group;
+	
+	if (rq->count == 0) {
+		return THREAD_NULL;
+	}
+	
+	group = rq->current_group;
+	
+	if (group == GRRR_GROUP_NULL) {
+		group = (grrr_group_t)queue_first(&rq->sorted_group_list);
+	}
+	
+	thread = grrr_intragroup_schedule(group);
+	
+	if ((group->work >= (UINT32_MAX-256)) || (rq->last_rescale_tick != grrr_rescale_tick)) {
+		grrr_rescale_work(rq);
+	}
+	group->work++;
+	
+	if (queue_end(&rq->sorted_group_list, queue_next((queue_entry_t)group))) {
+		/* last group, go back to beginning */
+		group = (grrr_group_t)queue_first(&rq->sorted_group_list);
+	} else {
+		grrr_group_t nextgroup = (grrr_group_t)queue_next((queue_entry_t)group);
+		uint64_t orderleft, orderright;
+		
+		/*
+		 * The well-ordering condition for intergroup selection is:
+		 *
+		 * (group->work+1) / (nextgroup->work+1) > (group->weight) / (nextgroup->weight)
+		 *
+		 * Multiply both sides by their denominators to avoid division
+		 *
+		 */
+		orderleft = (group->work + 1) * ((uint64_t)nextgroup->weight);
+		orderright = (nextgroup->work + 1) * ((uint64_t)group->weight);
+		if (orderleft > orderright) {
+			group = nextgroup;
+		} else {
+			group = (grrr_group_t)queue_first(&rq->sorted_group_list);
+		}
+	}
+	
+	rq->current_group = group;
+	
+	return thread;
+}
+
+static void
+grrr_runqueue_init(grrr_run_queue_t		runq)
+{
+	grrr_group_index_t index;
+	
+	runq->count = 0;
+	
+	for (index = 0; index < NUM_GRRR_GROUPS; index++) {
+		unsigned int prisearch;
+
+		for (prisearch = 0;
+			 prisearch < NUM_GRRR_PROPORTIONAL_PRIORITIES;
+			 prisearch++) {
+			if (grrr_group_mapping[prisearch] == index) {
+				runq->groups[index].minpriority = (grrr_proportional_priority_t)prisearch;
+				break;
+			}
+		}
+		
+		runq->groups[index].index = index;
+
+		queue_init(&runq->groups[index].clients);
+		runq->groups[index].count = 0;
+		runq->groups[index].weight = 0;
+		runq->groups[index].work = 0;
+		runq->groups[index].current_client = THREAD_NULL;
+	}
+	
+	queue_init(&runq->sorted_group_list);
+	runq->weight = 0;
+	runq->current_group = GRRR_GROUP_NULL;
+}
+
+static void
+grrr_rescale_work(grrr_run_queue_t rq)
+{
+	grrr_group_index_t index;
+
+	/* avoid overflow by scaling by 1/8th */
+	for (index = 0; index < NUM_GRRR_GROUPS; index++) {
+		rq->groups[index].work >>= 3;
+	}
+
+	rq->last_rescale_tick = grrr_rescale_tick;
+}
+
+static boolean_t
+grrr_enqueue(
+							 grrr_run_queue_t			rq,
+							 thread_t			thread)
+{							 
+	grrr_proportional_priority_t	gpriority;
+	grrr_group_index_t		gindex;
+	grrr_group_t			group;
+
+	gpriority = grrr_priority_mapping[thread->sched_pri];
+	gindex = grrr_group_mapping[gpriority];
+	group = &rq->groups[gindex];
+
+#if 0
+	thread->grrr_deficit = 0;
+#endif
+	
+	if (group->count == 0) {
+		/* Empty group, this is the first client */
+		enqueue_tail(&group->clients, (queue_entry_t)thread);
+		group->count = 1;
+		group->weight = gpriority;
+		group->current_client = thread;
+	} else {
+		/* Insert before the current client */
+		if (group->current_client == THREAD_NULL ||
+			queue_first(&group->clients) == (queue_entry_t)group->current_client) {
+			enqueue_head(&group->clients, (queue_entry_t)thread);
+		} else {
+			insque((queue_entry_t)thread, queue_prev((queue_entry_t)group->current_client));
+		}
+		SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+		group->count++;
+		group->weight += gpriority;
+
+		/* Since there was already a client, this is on the per-processor sorted list already */
+		remqueue((queue_entry_t)group);
+	}
+	
+	grrr_sorted_list_insert_group(rq, group);
+
+	rq->count++;
+	rq->weight += gpriority;
+	
+	return (FALSE);
+}
+
+static thread_t
+grrr_select(grrr_run_queue_t	rq)
+{
+	thread_t		thread;
+
+	thread = grrr_intergroup_schedule(rq);
+	if (thread != THREAD_NULL) {
+		grrr_proportional_priority_t	gpriority;
+		grrr_group_index_t		gindex;
+		grrr_group_t			group;
+		
+		gpriority = grrr_priority_mapping[thread->sched_pri];
+		gindex = grrr_group_mapping[gpriority];
+		group = &rq->groups[gindex];
+		
+		remqueue((queue_entry_t)thread);
+		SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+		group->count--;
+		group->weight -= gpriority;
+		if (group->current_client == thread) {
+			group->current_client = THREAD_NULL;
+		}
+		
+		remqueue((queue_entry_t)group);
+		if (group->count == 0) {
+			if (rq->current_group == group) {
+				rq->current_group = GRRR_GROUP_NULL;
+			}
+		} else {
+			/* Need to re-insert in sorted location */
+			grrr_sorted_list_insert_group(rq, group);
+		}
+		
+		rq->count--;
+		rq->weight -= gpriority;
+		
+		thread->runq = PROCESSOR_NULL;
+	}		
+	
+	
+	return (thread);
+}
+
+static void
+grrr_remove(
+								 grrr_run_queue_t			rq,
+								 thread_t		thread)
+{				   
+	grrr_proportional_priority_t	gpriority;
+	grrr_group_index_t		gindex;
+	grrr_group_t			group;
+	
+	gpriority = grrr_priority_mapping[thread->sched_pri];
+	gindex = grrr_group_mapping[gpriority];
+	group = &rq->groups[gindex];
+	
+	remqueue((queue_entry_t)thread);
+	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+	group->count--;
+	group->weight -= gpriority;
+	if (group->current_client == thread) {
+		group->current_client = THREAD_NULL;
+	}
+	
+	remqueue((queue_entry_t)group);
+	if (group->count == 0) {
+		if (rq->current_group == group) {
+			rq->current_group = GRRR_GROUP_NULL;
+		}
+	} else {
+		/* Need to re-insert in sorted location */
+		grrr_sorted_list_insert_group(rq, group);
+	}
+	
+	rq->count--;
+	rq->weight -= gpriority;
+	
+	thread->runq = PROCESSOR_NULL;
+}
+
+static void
+grrr_sorted_list_insert_group(grrr_run_queue_t rq,
+												grrr_group_t group)
+{
+	/* Simple insertion sort */
+	if (queue_empty(&rq->sorted_group_list)) {
+		enqueue_tail(&rq->sorted_group_list, (queue_entry_t)group);
+	} else {
+		grrr_group_t search_group;
+		
+		/* Start searching from the head (heaviest weight) for the first
+		 * element less than us, so we can insert before it
+		 */
+		search_group = (grrr_group_t)queue_first(&rq->sorted_group_list);
+		while (!queue_end(&rq->sorted_group_list, (queue_entry_t)search_group) ) {
+			
+			if (search_group->weight < group->weight) {
+				/* we should be before this */
+				search_group = (grrr_group_t)queue_prev((queue_entry_t)search_group);
+				break;
+			} if (search_group->weight == group->weight) {
+				/* Use group index as a tie breaker */
+				if (search_group->index < group->index) {
+					search_group = (grrr_group_t)queue_prev((queue_entry_t)search_group);
+					break;
+				}
+			}
+			
+			/* otherwise, our weight is too small, keep going */
+			search_group = (grrr_group_t)queue_next((queue_entry_t)search_group);
+		}
+		
+		if (queue_end(&rq->sorted_group_list, (queue_entry_t)search_group)) {
+			enqueue_tail(&rq->sorted_group_list, (queue_entry_t)group);
+		} else {
+			insque((queue_entry_t)group, (queue_entry_t)search_group);
+		}
+	}
+}
+
+#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
+
+#if defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+
+static struct grrr_run_queue	fs_grrr_runq;
+#define FS_GRRR_RUNQ		((processor_t)-2)
+decl_simple_lock_data(static,fs_grrr_lock);
+
+void
+sched_grrr_fairshare_init(void)
+{
+	grrr_priority_mapping_init();
+	
+	simple_lock_init(&fs_grrr_lock, 0);
+	grrr_runqueue_init(&fs_grrr_runq);
+}
+
+
+int
+sched_grrr_fairshare_runq_count(void)
+{
+	return fs_grrr_runq.count;
+}
+
+uint64_t
+sched_grrr_fairshare_runq_stats_count_sum(void)
+{
+	return fs_grrr_runq.runq_stats.count_sum;
+}
+
+void
+sched_grrr_fairshare_enqueue(thread_t thread)
+{
+	simple_lock(&fs_grrr_lock);
+	
+	(void)grrr_enqueue(&fs_grrr_runq, thread);
+
+	thread->runq = FS_GRRR_RUNQ;
+
+	simple_unlock(&fs_grrr_lock);	
+}
+
+thread_t	sched_grrr_fairshare_dequeue(void)
+{
+	thread_t thread;
+	
+	simple_lock(&fs_grrr_lock);
+	if (fs_grrr_runq.count > 0) {
+		thread = grrr_select(&fs_grrr_runq);
+		
+		simple_unlock(&fs_grrr_lock);
+		
+		return (thread);
+	}
+	simple_unlock(&fs_grrr_lock);		
+	
+	return THREAD_NULL;
+}
+
+boolean_t	sched_grrr_fairshare_queue_remove(thread_t thread)
+{
+	
+	simple_lock(&fs_grrr_lock);
+	
+	if (FS_GRRR_RUNQ == thread->runq) {
+		grrr_remove(&fs_grrr_runq, thread);
+		
+		simple_unlock(&fs_grrr_lock);
+		return (TRUE);
+	}
+	else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		simple_unlock(&fs_grrr_lock);
+		return (FALSE);
+	}	
+}
+
+#endif /* defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c
index dddaf47d7..c73ef0f3d 100644
--- a/osfmk/kern/sched_prim.c
+++ b/osfmk/kern/sched_prim.c
@@ -73,10 +73,12 @@
 #include <mach/machine.h>
 #include <mach/policy.h>
 #include <mach/sync_policy.h>
+#include <mach/thread_act.h>
 
 #include <machine/machine_routines.h>
 #include <machine/sched_param.h>
 #include <machine/machine_cpu.h>
+#include <machine/machlimits.h>
 
 #include <kern/kern_types.h>
 #include <kern/clock.h>
@@ -107,10 +109,16 @@
 
 #include <kern/pms.h>
 
-struct run_queue	rt_runq;
+struct rt_queue	rt_runq;
 #define RT_RUNQ		((processor_t)-1)
 decl_simple_lock_data(static,rt_lock);
 
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+static struct fairshare_queue	fs_runq;
+#define FS_RUNQ		((processor_t)-2)
+decl_simple_lock_data(static,fs_lock);
+#endif
+
 #define		DEFAULT_PREEMPTION_RATE		100		/* (1/s) */
 int			default_preemption_rate = DEFAULT_PREEMPTION_RATE;
 
@@ -123,57 +131,203 @@ int			max_poll_quanta = MAX_POLL_QUANTA;
 #define		SCHED_POLL_YIELD_SHIFT		4		/* 1/16 */
 int			sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT;
 
-uint64_t	max_unsafe_computation;
-uint32_t	sched_safe_duration;
 uint64_t	max_poll_computation;
 
+uint64_t	max_unsafe_computation;
+uint64_t	sched_safe_duration;
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 uint32_t	std_quantum;
 uint32_t	min_std_quantum;
 
 uint32_t	std_quantum_us;
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+uint32_t	thread_depress_time;
+uint32_t	default_timeshare_computation;
+uint32_t	default_timeshare_constraint;
+
 uint32_t	max_rt_quantum;
 uint32_t	min_rt_quantum;
 
 uint32_t	sched_cswtime;
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 unsigned	sched_tick;
 uint32_t	sched_tick_interval;
 
 uint32_t	sched_pri_shift = INT8_MAX;
 uint32_t	sched_fixed_shift;
 
+static boolean_t sched_traditional_use_pset_runqueue = FALSE;
+
+__attribute__((always_inline))
+static inline run_queue_t runq_for_processor(processor_t processor)
+{
+	if (sched_traditional_use_pset_runqueue)
+		return &processor->processor_set->pset_runq;
+	else
+		return &processor->runq;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_incr_bound_count(processor_t processor, thread_t thread)
+{
+	if (thread->bound_processor == PROCESSOR_NULL)
+		return;
+    
+	assert(thread->bound_processor == processor);
+    
+	if (sched_traditional_use_pset_runqueue)
+		processor->processor_set->pset_runq_bound_count++;
+    
+	processor->runq_bound_count++;
+}
+
+__attribute__((always_inline))
+static inline void runq_consider_decr_bound_count(processor_t processor, thread_t thread)
+{
+	if (thread->bound_processor == PROCESSOR_NULL)
+		return;
+    
+	assert(thread->bound_processor == processor);
+    
+	if (sched_traditional_use_pset_runqueue)
+		processor->processor_set->pset_runq_bound_count--;
+    
+	processor->runq_bound_count--;
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+uint64_t	sched_one_second_interval;
+
 uint32_t	sched_run_count, sched_share_count;
 uint32_t	sched_load_average, sched_mach_factor;
 
 /* Forwards */
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 static void load_shift_init(void) __attribute__((section("__TEXT, initcode")));
 static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode")));
 
-static thread_t	run_queue_dequeue(
-					run_queue_t		runq,
-					integer_t		options);
+#endif /* CONFIG_SCHED_TRADITIONAL */
 
-static thread_t	choose_thread(
-					processor_t		processor,
-					int				priority);
+static thread_t	thread_select(
+					thread_t			thread,
+					processor_t			processor);
 
+#if CONFIG_SCHED_IDLE_IN_PLACE
 static thread_t	thread_select_idle(
 					thread_t			thread,
 					processor_t			processor);
+#endif
 
-static thread_t	processor_idle(
+thread_t	processor_idle(
 					thread_t			thread,
 					processor_t			processor);
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 static thread_t	steal_thread(
 					processor_set_t		pset);
 
+static thread_t	steal_thread_disabled(
+					processor_set_t		pset) __attribute__((unused));
+
+
 static thread_t	steal_processor_thread(
 					processor_t			processor);
 
 static void		thread_update_scan(void);
 
+static void processor_setrun(
+				 processor_t			processor,
+				 thread_t			thread,
+				 integer_t			options);
+
+static boolean_t
+processor_enqueue(
+				  processor_t		processor,
+				  thread_t		thread,
+				  integer_t		options);
+
+static boolean_t
+processor_queue_remove(
+					   processor_t			processor,
+					   thread_t		thread);
+
+static boolean_t	processor_queue_empty(processor_t		processor);
+
+static boolean_t	priority_is_urgent(int priority);
+
+static ast_t		processor_csw_check(processor_t processor);
+
+static boolean_t	processor_queue_has_priority(processor_t		processor,
+											int				priority,
+											boolean_t		gte);
+
+static boolean_t	should_current_thread_rechoose_processor(processor_t			processor);
+
+static int     sched_traditional_processor_runq_count(processor_t   processor);
+
+static boolean_t	sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t		processor);
+
+static uint64_t     sched_traditional_processor_runq_stats_count_sum(processor_t   processor);
+
+static uint64_t		sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor);
+#endif
+	
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+static void
+sched_traditional_init(void);
+
+static void
+sched_traditional_timebase_init(void);
+
+static void
+sched_traditional_processor_init(processor_t processor);
+
+static void
+sched_traditional_pset_init(processor_set_t pset);
+
+static void
+sched_traditional_with_pset_runqueue_init(void);
+
+#endif
+
+static void
+sched_realtime_init(void)  __attribute__((section("__TEXT, initcode")));
+
+static void
+sched_realtime_timebase_init(void);
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+static void
+sched_traditional_tick_continue(void);
+
+static uint32_t
+sched_traditional_initial_quantum_size(thread_t thread);
+
+static sched_mode_t
+sched_traditional_initial_thread_sched_mode(task_t parent_task);
+
+static boolean_t
+sched_traditional_supports_timeshare_mode(void);
+
+static thread_t
+sched_traditional_choose_thread(
+								processor_t		processor,
+								int				priority);
+
+#endif
+
 #if	DEBUG
 extern int debug_task;
 #define TLOG(a, fmt, args...) if(debug_task & a) kprintf(fmt, ## args)
@@ -214,11 +368,221 @@ boolean_t	thread_runnable(
  *
  */
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
 int8_t		sched_load_shifts[NRQS];
 int		sched_preempt_pri[NRQBM];
+#endif
+
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+const struct sched_dispatch_table sched_traditional_dispatch = {
+	sched_traditional_init,
+	sched_traditional_timebase_init,
+	sched_traditional_processor_init,
+	sched_traditional_pset_init,
+	sched_traditional_tick_continue,
+	sched_traditional_choose_thread,
+	steal_thread,
+	compute_priority,
+	choose_processor,
+	processor_enqueue,
+	processor_queue_shutdown,
+	processor_queue_remove,
+	processor_queue_empty,
+	priority_is_urgent,
+	processor_csw_check,
+	processor_queue_has_priority,
+	sched_traditional_initial_quantum_size,
+	sched_traditional_initial_thread_sched_mode,
+	sched_traditional_supports_timeshare_mode,
+	can_update_priority,
+	update_priority,
+	lightweight_update_priority,
+	sched_traditional_quantum_expire,
+	should_current_thread_rechoose_processor,
+	sched_traditional_processor_runq_count,
+	sched_traditional_processor_runq_stats_count_sum,
+	sched_traditional_fairshare_init,
+	sched_traditional_fairshare_runq_count,
+	sched_traditional_fairshare_runq_stats_count_sum,
+	sched_traditional_fairshare_enqueue,
+	sched_traditional_fairshare_dequeue,
+	sched_traditional_fairshare_queue_remove,
+	TRUE /* direct_dispatch_to_idle_processors */
+};
+
+const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch = {
+	sched_traditional_with_pset_runqueue_init,
+	sched_traditional_timebase_init,
+	sched_traditional_processor_init,
+	sched_traditional_pset_init,
+	sched_traditional_tick_continue,
+	sched_traditional_choose_thread,
+	steal_thread,
+	compute_priority,
+	choose_processor,
+	processor_enqueue,
+	processor_queue_shutdown,
+	processor_queue_remove,
+	sched_traditional_with_pset_runqueue_processor_queue_empty,
+	priority_is_urgent,
+	processor_csw_check,
+	processor_queue_has_priority,
+	sched_traditional_initial_quantum_size,
+	sched_traditional_initial_thread_sched_mode,
+	sched_traditional_supports_timeshare_mode,
+	can_update_priority,
+	update_priority,
+	lightweight_update_priority,
+	sched_traditional_quantum_expire,
+	should_current_thread_rechoose_processor,
+	sched_traditional_processor_runq_count,
+	sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum,
+	sched_traditional_fairshare_init,
+	sched_traditional_fairshare_runq_count,
+	sched_traditional_fairshare_runq_stats_count_sum,
+	sched_traditional_fairshare_enqueue,
+	sched_traditional_fairshare_dequeue,
+	sched_traditional_fairshare_queue_remove,
+	FALSE /* direct_dispatch_to_idle_processors */
+};
+
+#endif
+
+const struct sched_dispatch_table *sched_current_dispatch = NULL;
+
+/*
+ * Statically allocate a buffer to hold the longest possible
+ * scheduler description string, as currently implemented.
+ * bsd/kern/kern_sysctl.c has a corresponding definition in bsd/
+ * to export to userspace via sysctl(3). If either version
+ * changes, update the other.
+ *
+ * Note that in addition to being an upper bound on the strings
+ * in the kernel, it's also an exact parameter to PE_get_default(),
+ * which interrogates the device tree on some platforms. That
+ * API requires the caller know the exact size of the device tree
+ * property, so we need both a legacy size (32) and the current size
+ * (48) to deal with old and new device trees. The device tree property
+ * is similarly padded to a fixed size so that the same kernel image
+ * can run on multiple devices with different schedulers configured
+ * in the device tree.
+ */
+#define SCHED_STRING_MAX_LENGTH (48)
+
+char sched_string[SCHED_STRING_MAX_LENGTH];
+static enum sched_enum _sched_enum = sched_enum_unknown;
 
 void
 sched_init(void)
+{
+	char sched_arg[SCHED_STRING_MAX_LENGTH] = { '\0' };
+
+	/* Check for runtime selection of the scheduler algorithm */
+	if (!PE_parse_boot_argn("sched", sched_arg, sizeof (sched_arg))) {
+		/* If no boot-args override, look in device tree */
+		if (!PE_get_default("kern.sched", sched_arg,
+							SCHED_STRING_MAX_LENGTH)) {
+			sched_arg[0] = '\0';
+		}
+	}
+
+	if (strlen(sched_arg) > 0) {
+		if (0) {
+			/* Allow pattern below */
+#if defined(CONFIG_SCHED_TRADITIONAL)
+		} else if (0 == strcmp(sched_arg, kSchedTraditionalString)) {
+			sched_current_dispatch = &sched_traditional_dispatch;
+			_sched_enum = sched_enum_traditional;
+			strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalString);
+		} else if (0 == strcmp(sched_arg, kSchedTraditionalWithPsetRunqueueString)) {
+			sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch;
+			_sched_enum = sched_enum_traditional_with_pset_runqueue;
+			strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedTraditionalWithPsetRunqueueString);
+#endif
+#if defined(CONFIG_SCHED_PROTO)
+		} else if (0 == strcmp(sched_arg, kSchedProtoString)) {
+			sched_current_dispatch = &sched_proto_dispatch;
+			_sched_enum = sched_enum_proto;
+			strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedProtoString);
+#endif
+#if defined(CONFIG_SCHED_GRRR)
+		} else if (0 == strcmp(sched_arg, kSchedGRRRString)) {
+			sched_current_dispatch = &sched_grrr_dispatch;
+			_sched_enum = sched_enum_grrr;
+			strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedGRRRString);
+#endif
+#if defined(CONFIG_SCHED_FIXEDPRIORITY)
+		} else if (0 == strcmp(sched_arg, kSchedFixedPriorityString)) {
+			sched_current_dispatch = &sched_fixedpriority_dispatch;
+			_sched_enum = sched_enum_fixedpriority;
+			strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityString);
+		} else if (0 == strcmp(sched_arg, kSchedFixedPriorityWithPsetRunqueueString)) {
+			sched_current_dispatch = &sched_fixedpriority_with_pset_runqueue_dispatch;
+			_sched_enum = sched_enum_fixedpriority_with_pset_runqueue;
+			strlcpy(sched_string, kSchedFixedPriorityWithPsetRunqueueString, sizeof(sched_string));
+			kprintf("Scheduler: Runtime selection of %s\n", kSchedFixedPriorityWithPsetRunqueueString);
+#endif
+		} else {
+			panic("Unrecognized scheduler algorithm: %s", sched_arg);
+		}
+	} else {
+#if   defined(CONFIG_SCHED_TRADITIONAL)
+		sched_current_dispatch = &sched_traditional_dispatch;
+		_sched_enum = sched_enum_traditional;
+		strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string));
+		kprintf("Scheduler: Default of %s\n", kSchedTraditionalString);
+#elif defined(CONFIG_SCHED_PROTO)
+		sched_current_dispatch = &sched_proto_dispatch;
+		_sched_enum = sched_enum_proto;
+		strlcpy(sched_string, kSchedProtoString, sizeof(sched_string));
+		kprintf("Scheduler: Default of %s\n", kSchedProtoString);
+#elif defined(CONFIG_SCHED_GRRR)
+		sched_current_dispatch = &sched_grrr_dispatch;
+		_sched_enum = sched_enum_grrr;
+		strlcpy(sched_string, kSchedGRRRString, sizeof(sched_string));
+		kprintf("Scheduler: Default of %s\n", kSchedGRRRString);
+#elif defined(CONFIG_SCHED_FIXEDPRIORITY)
+		sched_current_dispatch = &sched_fixedpriority_dispatch;
+		_sched_enum = sched_enum_fixedpriority;
+		strlcpy(sched_string, kSchedFixedPriorityString, sizeof(sched_string));
+		kprintf("Scheduler: Default of %s\n", kSchedFixedPriorityString);
+#else
+#error No default scheduler implementation
+#endif
+	}
+	
+	SCHED(init)();
+	SCHED(fairshare_init)();
+	sched_realtime_init();
+	ast_init();
+	
+	SCHED(pset_init)(&pset0);
+	SCHED(processor_init)(master_processor);
+}
+
+void
+sched_timebase_init(void)
+{
+	uint64_t	abstime;
+	
+	clock_interval_to_absolutetime_interval(1, NSEC_PER_SEC, &abstime);
+	sched_one_second_interval = abstime;
+	
+	SCHED(timebase_init)();
+	sched_realtime_timebase_init();
+}
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+static void
+sched_traditional_init(void)
 {
 	/*
 	 * Calculate the timeslicing quantum
@@ -230,19 +594,13 @@ sched_init(void)
 
 	printf("standard timeslicing quantum is %d us\n", std_quantum_us);
 
-	sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) *
-											(1 << SCHED_TICK_SHIFT);
-
 	load_shift_init();
 	preempt_pri_init();
-	simple_lock_init(&rt_lock, 0);
-	run_queue_init(&rt_runq);
 	sched_tick = 0;
-	ast_init();
 }
 
-void
-sched_timebase_init(void)
+static void
+sched_traditional_timebase_init(void)
 {
 	uint64_t	abstime;
 	uint32_t	shift;
@@ -258,17 +616,6 @@ sched_timebase_init(void)
 	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
 	min_std_quantum = (uint32_t)abstime;
 
-	/* smallest rt computaton (50 us) */
-	clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
-	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
-	min_rt_quantum = (uint32_t)abstime;
-
-	/* maximum rt computation (50 ms) */
-	clock_interval_to_absolutetime_interval(
-							50, 1000*NSEC_PER_USEC, &abstime);
-	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
-	max_rt_quantum = (uint32_t)abstime;
-
 	/* scheduler tick interval */
 	clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT,
 													NSEC_PER_USEC, &abstime);
@@ -285,9 +632,82 @@ sched_timebase_init(void)
 	sched_fixed_shift = shift;
 
 	max_unsafe_computation = max_unsafe_quanta * std_quantum;
+	sched_safe_duration = 2 * max_unsafe_quanta * std_quantum;
+	
 	max_poll_computation = max_poll_quanta * std_quantum;
+	thread_depress_time = 1 * std_quantum;
+	default_timeshare_computation = std_quantum / 2;
+	default_timeshare_constraint = std_quantum;
+
+}
+
+static void
+sched_traditional_processor_init(processor_t processor)
+{
+	if (!sched_traditional_use_pset_runqueue) {
+		run_queue_init(&processor->runq);
+	}
+	processor->runq_bound_count = 0;
+}
+
+static void
+sched_traditional_pset_init(processor_set_t pset)
+{
+	if (sched_traditional_use_pset_runqueue) {
+		run_queue_init(&pset->pset_runq);
+	}
+	pset->pset_runq_bound_count = 0;
+}
+
+static void
+sched_traditional_with_pset_runqueue_init(void)
+{
+	sched_traditional_init();
+	sched_traditional_use_pset_runqueue = TRUE;
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+void
+sched_traditional_fairshare_init(void)
+{
+	simple_lock_init(&fs_lock, 0);
+	
+	fs_runq.count = 0;
+	queue_init(&fs_runq.queue);
+}
+#endif
+
+static void
+sched_realtime_init(void)
+{
+	simple_lock_init(&rt_lock, 0);
+
+	rt_runq.count = 0;
+	queue_init(&rt_runq.queue);
 }
 
+static void
+sched_realtime_timebase_init(void)
+{
+	uint64_t abstime;
+
+	/* smallest rt computaton (50 us) */
+	clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	min_rt_quantum = (uint32_t)abstime;
+
+	/* maximum rt computation (50 ms) */
+	clock_interval_to_absolutetime_interval(
+		50, 1000*NSEC_PER_USEC, &abstime);
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	max_rt_quantum = (uint32_t)abstime;
+
+}
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  * Set up values for timeshare
  * loading factors.
@@ -318,6 +738,8 @@ preempt_pri_init(void)
 		setbit(i, p);
 }
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 /*
  *	Thread wait timer expiration.
  */
@@ -363,7 +785,7 @@ thread_set_timer(
 	thread_lock(thread);
 	if ((thread->state & TH_WAIT) != 0) {
 		clock_interval_to_deadline(interval, scale_factor, &deadline);
-		if (!timer_call_enter(&thread->wait_timer, deadline))
+		if (!timer_call_enter(&thread->wait_timer, deadline, thread->sched_pri >= BASEPRI_RTQUEUES ? TIMER_CALL_CRITICAL : 0))
 			thread->wait_timer_active++;
 		thread->wait_timer_is_set = TRUE;
 	}
@@ -381,7 +803,7 @@ thread_set_timer_deadline(
 	s = splsched();
 	thread_lock(thread);
 	if ((thread->state & TH_WAIT) != 0) {
-		if (!timer_call_enter(&thread->wait_timer, deadline))
+		if (!timer_call_enter(&thread->wait_timer, deadline, thread->sched_pri >= BASEPRI_RTQUEUES ? TIMER_CALL_CRITICAL : 0))
 			thread->wait_timer_active++;
 		thread->wait_timer_is_set = TRUE;
 	}
@@ -453,26 +875,31 @@ thread_unblock(
 		 *	Update run counts.
 		 */
 		sched_run_incr();
-		if (thread->sched_mode & TH_MODE_TIMESHARE)
+		if (thread->sched_mode == TH_MODE_TIMESHARE)
 			sched_share_incr();
 	}
 	else {
 		/*
 		 *	Signal if idling on another processor.
 		 */
+#if CONFIG_SCHED_IDLE_IN_PLACE
 		if (thread->state & TH_IDLE) {
 			processor_t		processor = thread->last_processor;
 
 			if (processor != current_processor())
 				machine_signal_idle(processor);
 		}
+#else
+		assert((thread->state & TH_IDLE) == 0);
+#endif
+
 		result = TRUE;
 	}
 
 	/*
 	 * Calculate deadline for real-time threads.
 	 */
-	if (thread->sched_mode & TH_MODE_REALTIME) {
+	if (thread->sched_mode == TH_MODE_REALTIME) {
 		thread->realtime.deadline = mach_absolute_time();
 		thread->realtime.deadline += thread->realtime.constraint;
 	}
@@ -554,9 +981,9 @@ thread_mark_wait_locked(
 	at_safe_point = (interruptible == THREAD_ABORTSAFE);
 
 	if (	interruptible == THREAD_UNINT			||
-			!(thread->sched_mode & TH_MODE_ABORT)	||
+			!(thread->sched_flags & TH_SFLAG_ABORT)	||
 			(!at_safe_point &&
-				(thread->sched_mode & TH_MODE_ABORTSAFELY))) {
+				(thread->sched_flags & TH_SFLAG_ABORTSAFELY))) {
 
 		DTRACE_SCHED(sleep);
 
@@ -565,8 +992,8 @@ thread_mark_wait_locked(
 		return (thread->wait_result = THREAD_WAITING);
 	}
 	else
-	if (thread->sched_mode & TH_MODE_ABORTSAFELY)
-		thread->sched_mode &= ~TH_MODE_ISABORTED;
+	if (thread->sched_flags & TH_SFLAG_ABORTSAFELY)
+		thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
 
 	return (thread->wait_result = THREAD_INTERRUPTED);
 }
@@ -1033,7 +1460,18 @@ kern_return_t
 thread_wakeup_prim(
 	event_t			event,
 	boolean_t		one_thread,
-	wait_result_t	result)
+	wait_result_t		result)
+{
+	return (thread_wakeup_prim_internal(event, one_thread, result, -1));
+}
+
+
+kern_return_t
+thread_wakeup_prim_internal(
+	event_t			event,
+	boolean_t		one_thread,
+	wait_result_t		result,
+	int			priority)
 {
 	register wait_queue_t	wq;
 	register int			index;
@@ -1041,9 +1479,9 @@ thread_wakeup_prim(
 	index = wait_hash(event);
 	wq = &wait_queues[index];
 	if (one_thread)
-	    return (wait_queue_wakeup_one(wq, event, result));
+		return (wait_queue_wakeup_one(wq, event, result, priority));
 	else
-	    return (wait_queue_wakeup_all(wq, event, result));
+		return (wait_queue_wakeup_all(wq, event, result));
 }
 
 /*
@@ -1092,17 +1530,23 @@ thread_select(
 	thread_t			new_thread = THREAD_NULL;
 	boolean_t			inactive_state;
 
+	assert(processor == current_processor());
+
 	do {
 		/*
 		 *	Update the priority.
 		 */
-		if (thread->sched_stamp != sched_tick)
-			update_priority(thread);
-
+		if (SCHED(can_update_priority)(thread))
+			SCHED(update_priority)(thread);
+		
 		processor->current_pri = thread->sched_pri;
+		processor->current_thmode = thread->sched_mode;
 
 		pset_lock(pset);
 
+		assert(pset->low_count);
+		assert(pset->low_pri);
+
 		inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor);
 
 		simple_lock(&rt_lock);
@@ -1113,12 +1557,7 @@ thread_select(
 		 *	bound to a different processor, nor be in the wrong
 		 *	processor set.
 		 */
-		if (
-#if CONFIG_EMBEDDED
-				((thread->state & ~TH_SUSP) == TH_RUN)					&&
-#else
-				thread->state == TH_RUN									&&
-#endif
+		if (	((thread->state & ~TH_SUSP) == TH_RUN)					&&
 				(thread->sched_pri >= BASEPRI_RTQUEUES		||
 				 processor->processor_meta == PROCESSOR_META_NULL ||
 				 processor->processor_meta->primary == processor)		&&
@@ -1128,24 +1567,16 @@ thread_select(
 				 thread->affinity_set->aset_pset == pset)			) {
 			if (	thread->sched_pri >= BASEPRI_RTQUEUES	&&
 						first_timeslice(processor)				) {
-				if (rt_runq.highq >= BASEPRI_RTQUEUES) {
-					register run_queue_t	runq = &rt_runq;
+				if (rt_runq.count > 0) {
 					register queue_t		q;
 
-					q = runq->queues + runq->highq;
+					q = &rt_runq.queue;
 					if (((thread_t)q->next)->realtime.deadline <
 													processor->deadline) {
-						thread = (thread_t)q->next;
-						((queue_entry_t)thread)->next->prev = q;
-						q->next = ((queue_entry_t)thread)->next;
+						thread = (thread_t)dequeue_head(q);
 						thread->runq = PROCESSOR_NULL;
-						runq->count--; runq->urgency--;
-						assert(runq->urgency >= 0);
-						if (queue_empty(q)) {
-							if (runq->highq != IDLEPRI)
-								clrbit(MAXPRI - runq->highq, runq->bitmap);
-							runq->highq = MAXPRI - ffsbit(runq->bitmap);
-						}
+						SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
+						rt_runq.count--;
 					}
 				}
 
@@ -1158,8 +1589,8 @@ thread_select(
 				return (thread);
 			}
 
-			if (!inactive_state && rt_runq.highq < thread->sched_pri &&
-					(new_thread = choose_thread(processor, thread->sched_pri)) == THREAD_NULL) {
+			if (!inactive_state && (thread->sched_mode != TH_MODE_FAIRSHARE || SCHED(fairshare_runq_count)() == 0) && (rt_runq.count == 0 || BASEPRI_RTQUEUES < thread->sched_pri) &&
+					(new_thread = SCHED(choose_thread)(processor, thread->sched_mode == TH_MODE_FAIRSHARE ? MINPRI : thread->sched_pri)) == THREAD_NULL) {
 
 				simple_unlock(&rt_lock);
 
@@ -1167,7 +1598,7 @@ thread_select(
 
 				pset_pri_hint(pset, processor, processor->current_pri);
 
-				pset_count_hint(pset, processor, processor->runq.count);
+				pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
 
 				processor->deadline = UINT64_MAX;
 
@@ -1178,14 +1609,14 @@ thread_select(
 		}
 
 		if (new_thread != THREAD_NULL ||
-				(processor->runq.highq >= rt_runq.highq &&
-					 (new_thread = choose_thread(processor, MINPRI)) != THREAD_NULL)) {
+				(SCHED(processor_queue_has_priority)(processor, rt_runq.count == 0 ? IDLEPRI : BASEPRI_RTQUEUES, TRUE) &&
+					 (new_thread = SCHED(choose_thread)(processor, MINPRI)) != THREAD_NULL)) {
 				simple_unlock(&rt_lock);
 
 				if (!inactive_state) {
 					pset_pri_hint(pset, processor, new_thread->sched_pri);
 
-					pset_count_hint(pset, processor, processor->runq.count);
+					pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
 				}
 
 				processor->deadline = UINT64_MAX;
@@ -1195,7 +1626,12 @@ thread_select(
 		}
 
 		if (rt_runq.count > 0) {
-			thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ);
+			thread = (thread_t)dequeue_head(&rt_runq.queue);
+
+			thread->runq = PROCESSOR_NULL;
+			SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
+			rt_runq.count--;
+
 			simple_unlock(&rt_lock);
 
 			processor->deadline = thread->realtime.deadline;
@@ -1206,6 +1642,17 @@ thread_select(
 
 		simple_unlock(&rt_lock);
 
+		/* No realtime threads and no normal threads on the per-processor
+		 * runqueue. Finally check for global fairshare threads.
+		 */
+		if ((new_thread = SCHED(fairshare_dequeue)()) != THREAD_NULL) {
+
+			processor->deadline = UINT64_MAX;
+			pset_unlock(pset);
+			
+			return (new_thread);
+		}
+			
 		processor->deadline = UINT64_MAX;
 
 		/*
@@ -1214,10 +1661,10 @@ thread_select(
 		 */
 		if (inactive_state) {
 			if (processor->state == PROCESSOR_RUNNING)
-				remqueue(&pset->active_queue, (queue_entry_t)processor);
+				remqueue((queue_entry_t)processor);
 			else
 			if (processor->state == PROCESSOR_IDLE)
-				remqueue(&pset->idle_queue, (queue_entry_t)processor);
+				remqueue((queue_entry_t)processor);
 
 			processor->state = PROCESSOR_INACTIVE;
 
@@ -1230,15 +1677,16 @@ thread_select(
 		 *	No runnable threads, attempt to steal
 		 *	from other processors.
 		 */
-		new_thread = steal_thread(pset);
-		if (new_thread != THREAD_NULL)
+		new_thread = SCHED(steal_thread)(pset);
+		if (new_thread != THREAD_NULL) {
 			return (new_thread);
+		}
 
 		/*
 		 *	If other threads have appeared, shortcut
 		 *	around again.
 		 */
-		if (processor->runq.count > 0 || rt_runq.count > 0)
+		if (!SCHED(processor_queue_empty)(processor) || rt_runq.count > 0 || SCHED(fairshare_runq_count)() > 0)
 			continue;
 
 		pset_lock(pset);
@@ -1248,26 +1696,28 @@ thread_select(
 		 *	was running.
 		 */
 		if (processor->state == PROCESSOR_RUNNING) {
-			remqueue(&pset->active_queue, (queue_entry_t)processor);
+			remqueue((queue_entry_t)processor);
 			processor->state = PROCESSOR_IDLE;
 
 			if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) {
 				enqueue_head(&pset->idle_queue, (queue_entry_t)processor);
-				pset->low_pri = pset->low_count = processor;
+				pset_pri_init_hint(pset, processor);
+				pset_count_init_hint(pset, processor);
 			}
 			else {
 				enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor);
-				pset_unlock(pset);
-				return (processor->idle_thread);
+					pset_unlock(pset);
+					return (processor->idle_thread);
 			}
 		}
 
 		pset_unlock(pset);
 
+#if CONFIG_SCHED_IDLE_IN_PLACE
 		/*
 		 *	Choose idle thread if fast idle is not possible.
 		 */
-		if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active)
+		if ((thread->state & (TH_IDLE|TH_TERMINATE|TH_SUSP)) || !(thread->state & TH_WAIT) || thread->wake_active || thread->sched_pri >= BASEPRI_RTQUEUES)
 			return (processor->idle_thread);
 
 		/*
@@ -1277,11 +1727,23 @@ thread_select(
 		 */
 		new_thread = thread_select_idle(thread, processor);
 
+#else /* !CONFIG_SCHED_IDLE_IN_PLACE */
+		
+		/*
+		 * Do a full context switch to idle so that the current
+		 * thread can start running on another processor without
+		 * waiting for the fast-idled processor to wake up.
+		 */
+		return (processor->idle_thread);
+
+#endif /* !CONFIG_SCHED_IDLE_IN_PLACE */
+
 	} while (new_thread == THREAD_NULL);
 
 	return (new_thread);
 }
 
+#if CONFIG_SCHED_IDLE_IN_PLACE
 /*
  *	thread_select_idle:
  *
@@ -1296,12 +1758,13 @@ thread_select_idle(
 {
 	thread_t		new_thread;
 
-	if (thread->sched_mode & TH_MODE_TIMESHARE)
+	if (thread->sched_mode == TH_MODE_TIMESHARE)
 		sched_share_decr();
 	sched_run_decr();
 
 	thread->state |= TH_IDLE;
 	processor->current_pri = IDLEPRI;
+	processor->current_thmode = TH_MODE_NONE;
 
 	thread_unlock(thread);
 
@@ -1309,6 +1772,7 @@ thread_select_idle(
 	 *	Switch execution timing to processor idle thread.
 	 */
 	processor->last_dispatch = mach_absolute_time();
+	thread->last_run_time = processor->last_dispatch;
 	thread_timer_event(processor->last_dispatch, &processor->idle_thread->system_timer);
 	PROCESSOR_DATA(processor, kernel_timer) = &processor->idle_thread->system_timer;
 
@@ -1320,6 +1784,8 @@ thread_select_idle(
 
 	(*thread->sched_call)(SCHED_CALL_BLOCK, thread);
 
+	thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0);
+
 	/*
 	 *	Enable interrupts and perform idling activities.  No
 	 *	preemption due to TH_IDLE being set.
@@ -1333,6 +1799,23 @@ thread_select_idle(
 
 	thread_lock(thread);
 
+	/*
+	 * If we idled in place, simulate a context switch back
+	 * to the original priority of the thread so that the
+	 * platform layer cannot distinguish this from a true
+	 * switch to the idle thread.
+	 */
+	if (thread->sched_mode == TH_MODE_REALTIME)
+		thread_tell_urgency(THREAD_URGENCY_REAL_TIME, thread->realtime.period, thread->realtime.deadline);
+	/* Identify non-promoted threads which have requested a
+	 * "background" priority.
+	 */
+	else if ((thread->sched_pri <= MAXPRI_THROTTLE) &&
+			 (thread->priority <= MAXPRI_THROTTLE))
+		thread_tell_urgency(THREAD_URGENCY_BACKGROUND, thread->sched_pri, thread->priority);
+	else
+		thread_tell_urgency(THREAD_URGENCY_NORMAL, thread->sched_pri, thread->priority);
+
 	/*
 	 *	If awakened, switch to thread timer and start a new quantum.
 	 *	Otherwise skip; we will context switch to another thread or return here.
@@ -1343,9 +1826,10 @@ thread_select_idle(
 		PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
 
 		thread_quantum_init(thread);
+		thread->last_quantum_refill_time = processor->last_dispatch;
 
 		processor->quantum_end = processor->last_dispatch + thread->current_quantum;
-		timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end);
+		timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, 0);
 		processor->timeslice = 1;
 
 		thread->computation_epoch = processor->last_dispatch;
@@ -1354,11 +1838,32 @@ thread_select_idle(
 	thread->state &= ~TH_IDLE;
 
 	sched_run_incr();
-	if (thread->sched_mode & TH_MODE_TIMESHARE)
+	if (thread->sched_mode == TH_MODE_TIMESHARE)
 		sched_share_incr();
 
 	return (new_thread);
 }
+#endif /* CONFIG_SCHED_IDLE_IN_PLACE */
+
+#if defined(CONFIG_SCHED_TRADITIONAL) 
+static thread_t
+sched_traditional_choose_thread(
+								processor_t		processor,
+								int				priority)
+{
+	thread_t thread;
+	
+	thread = choose_thread(processor, runq_for_processor(processor), priority);
+	if (thread != THREAD_NULL) {
+		runq_consider_decr_bound_count(processor, thread);
+	}
+	
+	return thread;
+}
+
+#endif /* defined(CONFIG_SCHED_TRADITIONAL)  */
+
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY)
 
 /*
  *	choose_thread:
@@ -1370,12 +1875,12 @@ thread_select_idle(
  *	Associated pset must be locked.  Returns THREAD_NULL
  *	on failure.
  */
-static thread_t
+thread_t
 choose_thread(
 	processor_t		processor,
+	run_queue_t		rq,
 	int				priority)
 {
-	run_queue_t		rq = &processor->runq;
 	queue_t			queue = rq->queues + rq->highq;
 	int				pri = rq->highq, count = rq->count;
 	thread_t		thread;
@@ -1385,11 +1890,12 @@ choose_thread(
 		while (!queue_end(queue, (queue_entry_t)thread)) {
 			if (thread->bound_processor == PROCESSOR_NULL ||
 							thread->bound_processor == processor) {
-				remqueue(queue, (queue_entry_t)thread);
+				remqueue((queue_entry_t)thread);
 
 				thread->runq = PROCESSOR_NULL;
+				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 				rq->count--;
-				if (testbit(pri, sched_preempt_pri)) {
+				if (SCHED(priority_is_urgent)(pri)) {
 					rq->urgency--; assert(rq->urgency >= 0);
 				}
 				if (queue_empty(queue)) {
@@ -1411,6 +1917,8 @@ choose_thread(
 	return (THREAD_NULL);
 }
 
+#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
+
 /*
  *	Perform a context switch and start executing the new thread.
  *
@@ -1478,7 +1986,7 @@ thread_invoke(
 	 * Allow time constraint threads to hang onto
 	 * a stack.
 	 */
-	if ((self->sched_mode & TH_MODE_REALTIME) && !self->reserved_stack)
+	if ((self->sched_mode == TH_MODE_REALTIME) && !self->reserved_stack)
 		self->reserved_stack = self->kernel_stack;
 
 	if (continuation != NULL) {
@@ -1500,6 +2008,7 @@ thread_invoke(
 			processor = current_processor();
 			processor->active_thread = thread;
 			processor->current_pri = thread->sched_pri;
+			processor->current_thmode = thread->sched_mode;
 			if (thread->last_processor != processor && thread->last_processor != NULL) {
 				if (thread->last_processor->processor_set != processor->processor_set)
 					thread->ps_switch++;
@@ -1513,16 +2022,24 @@ thread_invoke(
 			self->reason = reason;
 
 			processor->last_dispatch = mach_absolute_time();
+			self->last_run_time = processor->last_dispatch;
 			thread_timer_event(processor->last_dispatch, &thread->system_timer);
 			PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
 	
 			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE,
 										self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
+			if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
+				KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
+						(uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
+			}
+
 			DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
 
-			TLOG(1, "thread_invoke: calling machine_stack_handoff\n");
-			machine_stack_handoff(self, thread);
+			SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
+
+			TLOG(1, "thread_invoke: calling stack_handoff\n");
+			stack_handoff(self, thread);
 
 			DTRACE_SCHED(on__cpu);
 
@@ -1545,6 +2062,9 @@ thread_invoke(
 			counter(++c_thread_invoke_same);
 			thread_unlock(self);
 
+			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
+								self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+
 			self->continuation = self->parameter = NULL;
 
 			funnel_refunnel_check(self, 3);
@@ -1571,6 +2091,10 @@ need_stack:
 			ast_context(self);
 			counter(++c_thread_invoke_same);
 			thread_unlock(self);
+
+			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
+								self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
+
 			return (TRUE);
 		}
 	}
@@ -1581,6 +2105,7 @@ need_stack:
 	processor = current_processor();
 	processor->active_thread = thread;
 	processor->current_pri = thread->sched_pri;
+	processor->current_thmode = thread->sched_mode;
 	if (thread->last_processor != processor && thread->last_processor != NULL) {
 		if (thread->last_processor->processor_set != processor->processor_set)
 			thread->ps_switch++;
@@ -1597,14 +2122,22 @@ need_stack:
 	self->reason = reason;
 
 	processor->last_dispatch = mach_absolute_time();
+	self->last_run_time = processor->last_dispatch;
 	thread_timer_event(processor->last_dispatch, &thread->system_timer);
 	PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer;
 
 	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE,
 							self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0);
 
+	if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) {
+		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE,
+				(uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0);
+	}
+
 	DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info);
 
+	SCHED_STATS_CSW(processor, self->reason, self->sched_pri, thread->sched_pri);
+
 	/*
 	 * This is where we actually switch register context,
 	 * and address space if required.  We will next run
@@ -1671,7 +2204,7 @@ thread_dispatch(
 			else
 				thread->current_quantum = 0;
 
-			if (thread->sched_mode & TH_MODE_REALTIME) {
+			if (thread->sched_mode == TH_MODE_REALTIME) {
 				/*
 				 *	Cancel the deadline if the thread has
 				 *	consumed the entire quantum.
@@ -1681,6 +2214,7 @@ thread_dispatch(
 					thread->reason |= AST_QUANTUM;
 				}
 			} else {
+#if defined(CONFIG_SCHED_TRADITIONAL)
 				/*
 				 *	For non-realtime threads treat a tiny
 				 *	remaining quantum as an expired quantum
@@ -1690,6 +2224,7 @@ thread_dispatch(
 					thread->reason |= AST_QUANTUM;
 					thread->current_quantum += std_quantum;
 				}
+#endif
 			}
 
 			/*
@@ -1738,7 +2273,7 @@ thread_dispatch(
 
 				thread->state &= ~TH_RUN;
 
-				if (thread->sched_mode & TH_MODE_TIMESHARE)
+				if (thread->sched_mode == TH_MODE_TIMESHARE)
 					sched_share_decr();
 				sched_run_decr();
 
@@ -1762,17 +2297,30 @@ thread_dispatch(
 	}
 
 	if (!(self->state & TH_IDLE)) {
+
+		if (self->sched_mode == TH_MODE_REALTIME)
+			thread_tell_urgency(THREAD_URGENCY_REAL_TIME, self->realtime.period, self->realtime.deadline);
+		/* Identify non-promoted threads which have requested a
+		 * "background" priority.
+		 */
+		else if ((self->sched_pri <= MAXPRI_THROTTLE) &&
+			(self->priority <= MAXPRI_THROTTLE))
+			thread_tell_urgency(THREAD_URGENCY_BACKGROUND, self->sched_pri, self->priority);
+		else
+			thread_tell_urgency(THREAD_URGENCY_NORMAL, self->sched_pri, self->priority);
 		/*
 		 *	Get a new quantum if none remaining.
 		 */
-		if (self->current_quantum == 0)
+		if (self->current_quantum == 0) {
 			thread_quantum_init(self);
+			self->last_quantum_refill_time = processor->last_dispatch;
+		}
 
 		/*
 		 *	Set up quantum timer and timeslice.
 		 */
 		processor->quantum_end = (processor->last_dispatch + self->current_quantum);
-		timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end);
+		timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, 0);
 
 		processor->timeslice = 1;
 
@@ -1781,6 +2329,8 @@ thread_dispatch(
 	else {
 		timer_call_cancel(&processor->quantum_timer);
 		processor->timeslice = 0;
+
+		thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0);
 	}
 }
 
@@ -1832,7 +2382,7 @@ thread_block_reason(
 	self->continuation = continuation;
 	self->parameter = parameter;
 
-	if (kdebug_thread_block && kdebug_enable && self->state != TH_RUN) {
+	if (__improbable(kdebug_thread_block && kdebug_enable && self->state != TH_RUN)) {
 		uint32_t        bt[8];
 
 		OSBacktrace((void **)&bt[0], 8);
@@ -1944,18 +2494,52 @@ thread_continue(
 	/*NOTREACHED*/
 }
 
-/*
- *	run_queue_init:
- *
- *	Initialize a run queue before first use.
- */
 void
-run_queue_init(
-	run_queue_t		rq)
+thread_quantum_init(thread_t thread)
 {
-	int				i;
+	if (thread->sched_mode == TH_MODE_REALTIME) {
+		thread->current_quantum = thread->realtime.computation;
+	} else {
+		thread->current_quantum = SCHED(initial_quantum_size)(thread);
+	}
+}
 
-	rq->highq = IDLEPRI;
+#if defined(CONFIG_SCHED_TRADITIONAL)
+static uint32_t
+sched_traditional_initial_quantum_size(thread_t thread __unused)
+{
+	return std_quantum;
+}
+
+static sched_mode_t
+sched_traditional_initial_thread_sched_mode(task_t parent_task)
+{
+	if (parent_task == kernel_task)
+		return TH_MODE_FIXED;
+	else
+		return TH_MODE_TIMESHARE;
+}
+
+static boolean_t
+sched_traditional_supports_timeshare_mode(void)
+{
+	return TRUE;
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+/*
+ *	run_queue_init:
+ *
+ *	Initialize a run queue before first use.
+ */
+void
+run_queue_init(
+	run_queue_t		rq)
+{
+	int				i;
+
+	rq->highq = IDLEPRI;
 	for (i = 0; i < NRQBM; i++)
 		rq->bitmap[i] = 0;
 	setbit(MAXPRI - IDLEPRI, rq->bitmap);
@@ -1964,16 +2548,97 @@ run_queue_init(
 		queue_init(&rq->queues[i]);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+int
+sched_traditional_fairshare_runq_count(void)
+{
+	return fs_runq.count;
+}
+
+uint64_t
+sched_traditional_fairshare_runq_stats_count_sum(void)
+{
+	return fs_runq.runq_stats.count_sum;
+}
+
+void
+sched_traditional_fairshare_enqueue(thread_t thread)
+{
+	queue_t				queue = &fs_runq.queue;
+	
+	simple_lock(&fs_lock);
+	
+	enqueue_tail(queue, (queue_entry_t)thread);
+	
+	thread->runq = FS_RUNQ;
+	SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
+	fs_runq.count++;
+	
+	simple_unlock(&fs_lock);	
+}
+
+thread_t
+sched_traditional_fairshare_dequeue(void)
+{
+	thread_t thread;
+	
+	simple_lock(&fs_lock);
+	if (fs_runq.count > 0) {
+		thread = (thread_t)dequeue_head(&fs_runq.queue);
+		
+		thread->runq = PROCESSOR_NULL;
+		SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
+		fs_runq.count--;
+		
+		simple_unlock(&fs_lock);
+		
+		return (thread);
+	}
+	simple_unlock(&fs_lock);		
+
+	return THREAD_NULL;
+}
+
+boolean_t
+sched_traditional_fairshare_queue_remove(thread_t thread)
+{
+	queue_t			q;
+
+	simple_lock(&fs_lock);
+	q = &fs_runq.queue;
+	
+	if (FS_RUNQ == thread->runq) {
+		remqueue((queue_entry_t)thread);
+		SCHED_STATS_RUNQ_CHANGE(&fs_runq.runq_stats, fs_runq.count);
+		fs_runq.count--;
+		
+		thread->runq = PROCESSOR_NULL;
+		simple_unlock(&fs_lock);
+		return (TRUE);
+	}
+	else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		simple_unlock(&fs_lock);
+		return (FALSE);
+	}	
+}
+
+#endif /* defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */
+
 /*
  *	run_queue_dequeue:
  *
  *	Perform a dequeue operation on a run queue,
  *	and return the resulting thread.
  *
- *	The run queue must be locked (see run_queue_remove()
+ *	The run queue must be locked (see thread_run_queue_remove()
  *	for more info), and not empty.
  */
-static thread_t
+thread_t
 run_queue_dequeue(
 	run_queue_t		rq,
 	integer_t		options)
@@ -1982,19 +2647,16 @@ run_queue_dequeue(
 	queue_t			queue = rq->queues + rq->highq;
 
 	if (options & SCHED_HEADQ) {
-		thread = (thread_t)queue->next;
-		((queue_entry_t)thread)->next->prev = queue;
-		queue->next = ((queue_entry_t)thread)->next;
+		thread = (thread_t)dequeue_head(queue);
 	}
 	else {
-		thread = (thread_t)queue->prev;
-		((queue_entry_t)thread)->prev->next = queue;
-		queue->prev = ((queue_entry_t)thread)->prev;
+		thread = (thread_t)dequeue_tail(queue);
 	}
 
 	thread->runq = PROCESSOR_NULL;
+	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
 	rq->count--;
-	if (testbit(rq->highq, sched_preempt_pri)) {
+	if (SCHED(priority_is_urgent)(rq->highq)) {
 		rq->urgency--; assert(rq->urgency >= 0);
 	}
 	if (queue_empty(queue)) {
@@ -2006,6 +2668,103 @@ run_queue_dequeue(
 	return (thread);
 }
 
+/*
+ *	run_queue_enqueue:
+ *
+ *	Perform a enqueue operation on a run queue.
+ *
+ *	The run queue must be locked (see thread_run_queue_remove()
+ *	for more info).
+ */
+boolean_t
+run_queue_enqueue(
+							  run_queue_t		rq,
+							  thread_t			thread,
+							  integer_t		options)
+{
+	queue_t			queue = rq->queues + thread->sched_pri;
+	boolean_t		result = FALSE;
+	
+	if (queue_empty(queue)) {
+		enqueue_tail(queue, (queue_entry_t)thread);
+		
+		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
+		if (thread->sched_pri > rq->highq) {
+			rq->highq = thread->sched_pri;
+			result = TRUE;
+		}
+	}
+	else
+		if (options & SCHED_TAILQ)
+			enqueue_tail(queue, (queue_entry_t)thread);
+		else
+			enqueue_head(queue, (queue_entry_t)thread);
+	
+	if (SCHED(priority_is_urgent)(thread->sched_pri))
+		rq->urgency++;
+	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+	rq->count++;
+	
+	return (result);
+	
+}
+
+/*
+ *	run_queue_remove:
+ *
+ *	Remove a specific thread from a runqueue.
+ *
+ *	The run queue must be locked.
+ */
+void
+run_queue_remove(
+				  run_queue_t		rq,
+				  thread_t			thread)
+{
+
+	remqueue((queue_entry_t)thread);
+	SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+	rq->count--;
+	if (SCHED(priority_is_urgent)(thread->sched_pri)) {
+		rq->urgency--; assert(rq->urgency >= 0);
+	}
+	
+	if (queue_empty(rq->queues + thread->sched_pri)) {
+		/* update run queue status */
+		if (thread->sched_pri != IDLEPRI)
+			clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
+		rq->highq = MAXPRI - ffsbit(rq->bitmap);
+	}
+	
+	thread->runq = PROCESSOR_NULL;
+}
+
+/*
+ *	fairshare_setrun:
+ *
+ *	Dispatch a thread for round-robin execution.
+ *
+ *	Thread must be locked.  Associated pset must
+ *	be locked, and is returned unlocked.
+ */
+static void
+fairshare_setrun(
+				  processor_t			processor,
+				  thread_t			thread)
+{
+	processor_set_t		pset = processor->processor_set;
+		
+	thread->chosen_processor = processor;
+
+	SCHED(fairshare_enqueue)(thread);
+	
+	if (processor != current_processor())
+		machine_signal_idle(processor);
+
+	pset_unlock(pset);
+
+}
+
 /*
  *	realtime_queue_insert:
  *
@@ -2015,8 +2774,7 @@ static boolean_t
 realtime_queue_insert(
 	thread_t			thread)
 {
-	run_queue_t			rq = &rt_runq;
-	queue_t				queue = rq->queues + thread->sched_pri;
+	queue_t				queue = &rt_runq.queue;
 	uint64_t			deadline = thread->realtime.deadline;
 	boolean_t			preempt = FALSE;
 
@@ -2024,10 +2782,6 @@ realtime_queue_insert(
 
 	if (queue_empty(queue)) {
 		enqueue_tail(queue, (queue_entry_t)thread);
-
-		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
-		if (thread->sched_pri > rq->highq)
-			rq->highq = thread->sched_pri;
 		preempt = TRUE;
 	}
 	else {
@@ -2050,7 +2804,8 @@ realtime_queue_insert(
 	}
 
 	thread->runq = RT_RUNQ;
-	rq->count++; rq->urgency++;
+	SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
+	rt_runq.count++;
 
 	simple_unlock(&rt_lock);
 
@@ -2072,11 +2827,14 @@ realtime_setrun(
 {
 	processor_set_t		pset = processor->processor_set;
 
+	thread->chosen_processor = processor;
+
 	/*
 	 *	Dispatch directly onto idle processor.
 	 */
-	if (processor->state == PROCESSOR_IDLE) {
-		remqueue(&pset->idle_queue, (queue_entry_t)processor);
+	if ( (thread->bound_processor == processor)
+		&& processor->state == PROCESSOR_IDLE) {
+		remqueue((queue_entry_t)processor);
 		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
 
 		processor->next_thread = thread;
@@ -2090,8 +2848,11 @@ realtime_setrun(
 	}
 
 	if (realtime_queue_insert(thread)) {
+		int prstate = processor->state;
 		if (processor == current_processor())
 			ast_on(AST_PREEMPT | AST_URGENT);
+		else if ((prstate == PROCESSOR_DISPATCHING)  || (prstate == PROCESSOR_IDLE))
+			machine_signal_idle(processor);
 		else
 			cause_ast_check(processor);
 	}
@@ -2099,6 +2860,14 @@ realtime_setrun(
 	pset_unlock(pset);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+static boolean_t
+priority_is_urgent(int priority)
+{
+	return testbit(priority, sched_preempt_pri) ? TRUE : FALSE;
+}
+
 /*
  *	processor_enqueue:
  *
@@ -2108,7 +2877,7 @@ realtime_setrun(
  *	Returns TRUE if a preemption is indicated based on the state
  *	of the run queue.
  *
- *	The run queue must be locked (see run_queue_remove()
+ *	The run queue must be locked (see thread_run_queue_remove()
  *	for more info).
  */
 static boolean_t
@@ -2117,33 +2886,18 @@ processor_enqueue(
 	thread_t		thread,
 	integer_t		options)
 {
-	run_queue_t		rq = &processor->runq;
-	queue_t			queue = rq->queues + thread->sched_pri;
-	boolean_t		result = FALSE;
+	run_queue_t		rq = runq_for_processor(processor);
+	boolean_t		result;
 	
-	if (queue_empty(queue)) {
-		enqueue_tail(queue, (queue_entry_t)thread);
-
-		setbit(MAXPRI - thread->sched_pri, rq->bitmap);
-		if (thread->sched_pri > rq->highq) {
-			rq->highq = thread->sched_pri;
-			result = TRUE;
-		}
-	}
-	else
-	if (options & SCHED_TAILQ)
-		enqueue_tail(queue, (queue_entry_t)thread);
-	else
-		enqueue_head(queue, (queue_entry_t)thread);
-
+	result = run_queue_enqueue(rq, thread, options);
 	thread->runq = processor;
-	if (testbit(thread->sched_pri, sched_preempt_pri))
-		rq->urgency++;
-	rq->count++;
+	runq_consider_incr_bound_count(processor, thread);
 
 	return (result);
 }
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 /*
  *	processor_setrun:
  *
@@ -2162,11 +2916,15 @@ processor_setrun(
 	processor_set_t		pset = processor->processor_set;
 	ast_t				preempt;
 
+	thread->chosen_processor = processor;
+
 	/*
 	 *	Dispatch directly onto idle processor.
 	 */
-	if (processor->state == PROCESSOR_IDLE) {
-		remqueue(&pset->idle_queue, (queue_entry_t)processor);
+	if ( (SCHED(direct_dispatch_to_idle_processors) ||
+		  thread->bound_processor == processor)
+		&& processor->state == PROCESSOR_IDLE) {
+		remqueue((queue_entry_t)processor);
 		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
 
 		processor->next_thread = thread;
@@ -2182,15 +2940,17 @@ processor_setrun(
 	/*
 	 *	Set preemption mode.
 	 */
-	if (testbit(thread->sched_pri, sched_preempt_pri))
+	if (SCHED(priority_is_urgent)(thread->sched_pri) && thread->sched_pri > processor->current_pri)
+		preempt = (AST_PREEMPT | AST_URGENT);
+	else if(processor->active_thread && thread_eager_preemption(processor->active_thread))
 		preempt = (AST_PREEMPT | AST_URGENT);
 	else
-	if (thread->sched_mode & TH_MODE_TIMESHARE && thread->sched_pri < thread->priority)
+	if ((thread->sched_mode == TH_MODE_TIMESHARE) && thread->sched_pri < thread->priority)
 		preempt = AST_NONE;
 	else
 		preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE;
 
-	if (!processor_enqueue(processor, thread, options))
+	if (!SCHED(processor_enqueue)(processor, thread, options))
 		preempt = AST_NONE;
 
 	if (preempt != AST_NONE) {
@@ -2199,9 +2959,14 @@ processor_setrun(
 				ast_on(preempt);
 		}
 		else
+		if (	processor->state == PROCESSOR_IDLE || processor->state == PROCESSOR_DISPATCHING) {
+			machine_signal_idle(processor);
+		}
+		else
 		if (	(processor->state == PROCESSOR_RUNNING		||
 				 processor->state == PROCESSOR_SHUTDOWN)		&&
-				thread->sched_pri >= processor->current_pri		) {
+				(thread->sched_pri >= processor->current_pri	||
+				processor->current_thmode == TH_MODE_FAIRSHARE)) {
 			cause_ast_check(processor);
 		}
 	}
@@ -2210,10 +2975,112 @@ processor_setrun(
 			thread->sched_pri >= processor->current_pri	) {
 		cause_ast_check(processor);
 	}
+	else
+	if (	processor->state == PROCESSOR_IDLE	&&
+			processor != current_processor()	) {
+				machine_signal_idle(processor);
+	}
 
 	pset_unlock(pset);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+static boolean_t
+processor_queue_empty(processor_t		processor)
+{
+	return runq_for_processor(processor)->count == 0;
+	
+}
+
+static boolean_t
+sched_traditional_with_pset_runqueue_processor_queue_empty(processor_t		processor)
+{
+	processor_set_t pset = processor->processor_set;
+	int count = runq_for_processor(processor)->count;
+
+	/*
+	 * The pset runq contains the count of all runnable threads
+	 * for all processors in the pset. However, for threads that
+	 * are bound to another processor, the current "processor"
+	 * is not eligible to execute the thread. So we only
+	 * include bound threads that our bound to the current
+	 * "processor". This allows the processor to idle when the
+	 * count of eligible threads drops to 0, even if there's
+	 * a runnable thread bound to a different processor in the
+	 * shared runq.
+	 */
+
+	count -= pset->pset_runq_bound_count;
+	count += processor->runq_bound_count;
+
+	return count == 0;
+}
+
+static ast_t
+processor_csw_check(processor_t processor)
+{
+	run_queue_t		runq;
+
+	assert(processor->active_thread != NULL);
+	
+	runq = runq_for_processor(processor);
+	if (runq->highq > processor->current_pri) {
+		if (runq->urgency > 0)
+			return (AST_PREEMPT | AST_URGENT);
+		
+		if (processor->active_thread && thread_eager_preemption(processor->active_thread))
+			return (AST_PREEMPT | AST_URGENT);
+
+		return AST_PREEMPT;
+	}
+
+	return AST_NONE;
+}
+
+static boolean_t
+processor_queue_has_priority(processor_t		processor,
+							 int				priority,
+							 boolean_t			gte)
+{
+	if (gte)
+		return runq_for_processor(processor)->highq >= priority;
+	else
+		return runq_for_processor(processor)->highq > priority;
+}
+
+static boolean_t
+should_current_thread_rechoose_processor(processor_t			processor)
+{
+	return (processor->current_pri < BASEPRI_RTQUEUES
+			&& processor->processor_meta != PROCESSOR_META_NULL
+			&& processor->processor_meta->primary != processor);
+}
+
+static int
+sched_traditional_processor_runq_count(processor_t   processor)
+{
+	return runq_for_processor(processor)->count;
+}
+
+
+static uint64_t
+sched_traditional_processor_runq_stats_count_sum(processor_t   processor)
+{
+	return runq_for_processor(processor)->runq_stats.count_sum;
+}
+
+static uint64_t
+sched_traditional_with_pset_runqueue_processor_runq_stats_count_sum(processor_t   processor)
+{
+	if (processor->cpu_id == processor->processor_set->cpu_set_low)
+		return runq_for_processor(processor)->runq_stats.count_sum;
+	else
+		return 0ULL;
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 #define next_pset(p)	(((p)->pset_list != PROCESSOR_SET_NULL)? (p)->pset_list: (p)->node->psets)
 
 /*
@@ -2233,7 +3100,7 @@ choose_next_pset(
 
 	do {
 		nset = next_pset(nset);
-	} while (nset->processor_count < 1 && nset != pset);
+	} while (nset->online_processor_count < 1 && nset != pset);
 
 	return (nset);
 }
@@ -2250,7 +3117,7 @@ choose_next_pset(
  *	The thread must be locked.  The pset must be locked,
  *	and the resulting pset is locked on return.
  */
-static processor_t
+processor_t
 choose_processor(
 	processor_set_t		pset,
 	processor_t			processor,
@@ -2258,7 +3125,7 @@ choose_processor(
 {
 	processor_set_t		nset, cset = pset;
 	processor_meta_t	pmeta = PROCESSOR_META_NULL;
-	processor_t		mprocessor;
+	processor_t             mprocessor;
 	
 	/*
 	 *	Prefer the hinted processor, when appropriate.
@@ -2308,8 +3175,8 @@ choose_processor(
 			lp_processor = cset->low_pri;
 			/* Consider hinted processor */
 			if (lp_processor != PROCESSOR_NULL &&
-			((lp_processor->processor_meta == PROCESSOR_META_NULL) ||
-			((lp_processor == lp_processor->processor_meta->primary) &&
+			    ((lp_processor->processor_meta == PROCESSOR_META_NULL) ||
+			    ((lp_processor == lp_processor->processor_meta->primary) &&
 			    !queue_empty(&lp_processor->processor_meta->idle_queue))) &&
 			    lp_processor->state != PROCESSOR_INACTIVE &&
 			    lp_processor->state != PROCESSOR_SHUTDOWN &&
@@ -2359,6 +3226,7 @@ choose_processor(
 				return lp_processor;
 			if (thread->realtime.deadline < furthest_deadline)
 				return fd_processor;
+
 			processor = PROCESSOR_NULL;
 		}
 		else {
@@ -2375,7 +3243,7 @@ choose_processor(
 			if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE &&
 					cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE &&
 						(processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT &&
-															cset->low_count->runq.count < processor->runq.count))) {
+															SCHED(processor_runq_count)(cset->low_count) < SCHED(processor_runq_count)(processor)))) {
 				processor = cset->low_count;
 			}
 
@@ -2387,9 +3255,10 @@ choose_processor(
 				if (processor != PROCESSOR_NULL)
 					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
 			}
+
 			if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) {
 				if (processor->processor_meta != PROCESSOR_META_NULL &&
-				    !queue_empty(&processor->processor_meta->idle_queue))
+											!queue_empty(&processor->processor_meta->idle_queue))
 					pmeta = processor->processor_meta;
 			}
 		}
@@ -2493,8 +3362,8 @@ thread_setrun(
 	/*
 	 *	Update priority if needed.
 	 */
-	if (thread->sched_stamp != sched_tick)
-		update_priority(thread);
+	if (SCHED(can_update_priority)(thread))
+		SCHED(update_priority)(thread);
 
 	assert(thread->runq == PROCESSOR_NULL);
 
@@ -2509,7 +3378,7 @@ thread_setrun(
 			pset = thread->affinity_set->aset_pset;
 			pset_lock(pset);
 
-			processor = choose_processor(pset, PROCESSOR_NULL, thread);
+			processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
 		}
 		else
 		if (thread->last_processor != PROCESSOR_NULL) {
@@ -2519,7 +3388,13 @@ thread_setrun(
 			processor = thread->last_processor;
 			pset = processor->processor_set;
 			pset_lock(pset);
-			processor = choose_processor(pset, processor, thread);
+			processor = SCHED(choose_processor)(pset, processor, thread);
+
+			if ((thread->last_processor != processor) && (thread->last_processor != PROCESSOR_NULL)) {
+				KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LPA_BROKEN)|DBG_FUNC_NONE,
+									  (uintptr_t)thread_tid(thread), (uintptr_t)thread->last_processor->cpu_id, (uintptr_t)processor->cpu_id, thread->last_processor->state, 0);
+			}
+			
 		}
 		else {
 			/*
@@ -2537,7 +3412,7 @@ thread_setrun(
 			pset = choose_next_pset(pset);
 			pset_lock(pset);
 
-			processor = choose_processor(pset, PROCESSOR_NULL, thread);
+			processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread);
 			task->pset_hint = processor->processor_set;
 		}
 	}
@@ -2557,6 +3432,8 @@ thread_setrun(
 	 */
 	if (thread->sched_pri >= BASEPRI_RTQUEUES)
 		realtime_setrun(processor, thread);
+	else if (thread->sched_mode == TH_MODE_FAIRSHARE)
+		fairshare_setrun(processor, thread);
 	else
 		processor_setrun(processor, thread, options);
 }
@@ -2573,6 +3450,8 @@ task_choose_pset(
 	return (pset);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  *	processor_queue_shutdown:
  *
@@ -2587,7 +3466,7 @@ processor_queue_shutdown(
 	processor_t			processor)
 {
 	processor_set_t		pset = processor->processor_set;
-	run_queue_t			rq = &processor->runq;
+	run_queue_t			rq = runq_for_processor(processor);
 	queue_t				queue = rq->queues + rq->highq;
 	int					pri = rq->highq, count = rq->count;
 	thread_t			next, thread;
@@ -2601,11 +3480,13 @@ processor_queue_shutdown(
 			next = (thread_t)queue_next((queue_entry_t)thread);
 
 			if (thread->bound_processor == PROCESSOR_NULL) {
-				remqueue(queue, (queue_entry_t)thread);
+				remqueue((queue_entry_t)thread);
 
 				thread->runq = PROCESSOR_NULL;
+				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+				runq_consider_decr_bound_count(processor, thread);
 				rq->count--;
-				if (testbit(pri, sched_preempt_pri)) {
+				if (SCHED(priority_is_urgent)(pri)) {
 					rq->urgency--; assert(rq->urgency >= 0);
 				}
 				if (queue_empty(queue)) {
@@ -2635,6 +3516,8 @@ processor_queue_shutdown(
 	}
 }
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 /*
  *	Check for a preemption point in
  *	the current context.
@@ -2646,53 +3529,30 @@ csw_check(
 	processor_t		processor)
 {
 	ast_t			result = AST_NONE;
-	run_queue_t		runq;
 
 	if (first_timeslice(processor)) {
-		runq = &rt_runq;
-		if (runq->highq >= BASEPRI_RTQUEUES)
+		if (rt_runq.count > 0)
 			return (AST_PREEMPT | AST_URGENT);
 
-		if (runq->highq > processor->current_pri) {
-			if (runq->urgency > 0)
-				return (AST_PREEMPT | AST_URGENT);
-
-			result |= AST_PREEMPT;
-		}
-
-		runq = &processor->runq;
-		if (runq->highq > processor->current_pri) {
-			if (runq->urgency > 0)
-				return (AST_PREEMPT | AST_URGENT);
-
-			result |= AST_PREEMPT;
-		}
+		result |= SCHED(processor_csw_check)(processor);
+		if (result & AST_URGENT)
+			return result;
 	}
 	else {
-		runq = &rt_runq;
-		if (runq->highq >= processor->current_pri) {
-			if (runq->urgency > 0)
-				return (AST_PREEMPT | AST_URGENT);
-
-			result |= AST_PREEMPT;
-		}
-
-		runq = &processor->runq;
-		if (runq->highq >= processor->current_pri) {
-			if (runq->urgency > 0)
-				return (AST_PREEMPT | AST_URGENT);
+		if (rt_runq.count > 0 && BASEPRI_RTQUEUES >= processor->current_pri)
+			return (AST_PREEMPT | AST_URGENT);
 
-			result |= AST_PREEMPT;
-		}
+		result |= SCHED(processor_csw_check)(processor);
+		if (result & AST_URGENT)
+			return result;
 	}
 
 	if (result != AST_NONE)
 		return (result);
 
-	if (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL &&
-				processor->processor_meta->primary != processor)
+	if (SCHED(should_current_thread_rechoose_processor)(processor))
 		return (AST_PREEMPT);
-
+	
 	if (machine_processor_is_inactive(processor))
 		return (AST_PREEMPT);
 
@@ -2716,7 +3576,7 @@ set_sched_pri(
 	thread_t		thread,
 	int				priority)
 {
-	boolean_t		removed = run_queue_remove(thread);
+	boolean_t		removed = thread_run_queue_remove(thread);
 
 	thread->sched_pri = priority;
 	if (removed)
@@ -2729,6 +3589,7 @@ set_sched_pri(
 			ast_t			preempt;
 
 			processor->current_pri = priority;
+			processor->current_thmode = thread->sched_mode;
 			if ((preempt = csw_check(processor)) != AST_NONE)
 				ast_on(preempt);
 		}
@@ -2769,8 +3630,48 @@ run_queue_check(
 
 #endif	/* DEBUG */
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
+/* locks the runqueue itself */
+
+static boolean_t
+processor_queue_remove(
+					   processor_t			processor,
+					   thread_t		thread)
+{
+	void *			rqlock;
+	run_queue_t		rq;
+	
+	rqlock = &processor->processor_set->sched_lock;
+	rq = runq_for_processor(processor);
+
+	simple_lock(rqlock);
+	if (processor == thread->runq) {
+		/*
+		 *	Thread is on a run queue and we have a lock on
+		 *	that run queue.
+		 */
+		runq_consider_decr_bound_count(processor, thread);
+		run_queue_remove(rq, thread);
+	}
+	else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		processor = PROCESSOR_NULL;
+	}
+	
+	simple_unlock(rqlock);
+
+	return (processor != PROCESSOR_NULL);
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
 /*
- *	run_queue_remove:
+ *	thread_run_queue_remove:
  *
  *	Remove a thread from a current run queue and
  *	return TRUE if successful.
@@ -2778,7 +3679,7 @@ run_queue_check(
  *	Thread must be locked.
  */
 boolean_t
-run_queue_remove(
+thread_run_queue_remove(
 	thread_t		thread)
 {
 	processor_t		processor = thread->runq;
@@ -2790,41 +3691,32 @@ run_queue_remove(
 	 *	and removed.
 	 */
 	if (processor != PROCESSOR_NULL) {
-		void *			rqlock;
-		run_queue_t		rq;
+		queue_t			q;
 
 		/*
 		 *	The processor run queues are locked by the
 		 *	processor set.  Real-time priorities use a
 		 *	global queue with a dedicated lock.
 		 */
-		if (thread->sched_pri < BASEPRI_RTQUEUES) {
-			rqlock = &processor->processor_set->sched_lock;
-			rq = &processor->runq;
+		if (thread->sched_mode == TH_MODE_FAIRSHARE) {
+			return SCHED(fairshare_queue_remove)(thread);
 		}
-		else {
-			rqlock = &rt_lock; rq = &rt_runq;
+		
+		if (thread->sched_pri < BASEPRI_RTQUEUES) {
+			return SCHED(processor_queue_remove)(processor, thread);
 		}
 
-		simple_lock(rqlock);
+		simple_lock(&rt_lock);
+		q = &rt_runq.queue;
 
 		if (processor == thread->runq) {
 			/*
 			 *	Thread is on a run queue and we have a lock on
 			 *	that run queue.
 			 */
-			remqueue(&rq->queues[0], (queue_entry_t)thread);
-			rq->count--;
-			if (testbit(thread->sched_pri, sched_preempt_pri)) {
-				rq->urgency--; assert(rq->urgency >= 0);
-			}
-
-			if (queue_empty(rq->queues + thread->sched_pri)) {
-				/* update run queue status */
-				if (thread->sched_pri != IDLEPRI)
-					clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
-				rq->highq = MAXPRI - ffsbit(rq->bitmap);
-			}
+			remqueue((queue_entry_t)thread);
+			SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count);
+			rt_runq.count--;
 
 			thread->runq = PROCESSOR_NULL;
 		}
@@ -2837,12 +3729,14 @@ run_queue_remove(
 			processor = PROCESSOR_NULL;
 		}
 
-		simple_unlock(rqlock);
+		simple_unlock(&rt_lock);
 	}
 
 	return (processor != PROCESSOR_NULL);
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  *	steal_processor_thread:
  *
@@ -2856,7 +3750,7 @@ static thread_t
 steal_processor_thread(
 	processor_t		processor)
 {
-	run_queue_t		rq = &processor->runq;
+	run_queue_t		rq = runq_for_processor(processor);
 	queue_t			queue = rq->queues + rq->highq;
 	int				pri = rq->highq, count = rq->count;
 	thread_t		thread;
@@ -2865,11 +3759,13 @@ steal_processor_thread(
 		thread = (thread_t)queue_first(queue);
 		while (!queue_end(queue, (queue_entry_t)thread)) {
 			if (thread->bound_processor == PROCESSOR_NULL) {
-				remqueue(queue, (queue_entry_t)thread);
+				remqueue((queue_entry_t)thread);
 
 				thread->runq = PROCESSOR_NULL;
+				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+				runq_consider_decr_bound_count(processor, thread);
 				rq->count--;
-				if (testbit(pri, sched_preempt_pri)) {
+				if (SCHED(priority_is_urgent)(pri)) {
 					rq->urgency--; assert(rq->urgency >= 0);
 				}
 				if (queue_empty(queue)) {
@@ -2912,10 +3808,10 @@ steal_thread(
 	do {
 		processor = (processor_t)queue_first(&cset->active_queue);
 		while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) {
-			if (processor->runq.count > 0) {
+			if (runq_for_processor(processor)->count > 0) {
 				thread = steal_processor_thread(processor);
 				if (thread != THREAD_NULL) {
-					remqueue(&cset->active_queue, (queue_entry_t)processor);
+					remqueue((queue_entry_t)processor);
 					enqueue_tail(&cset->active_queue, (queue_entry_t)processor);
 
 					pset_unlock(cset);
@@ -2942,6 +3838,55 @@ steal_thread(
 	return (THREAD_NULL);
 }
 
+static thread_t	steal_thread_disabled(
+					processor_set_t		pset)
+{
+	pset_unlock(pset);
+
+	return (THREAD_NULL);
+}
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+
+int
+thread_get_urgency(uint64_t *rt_period, uint64_t *rt_deadline)
+{
+	processor_t	processor;
+	thread_t	thread;
+	
+	processor = current_processor();
+
+	thread = processor->next_thread;
+
+	if (thread != NULL) {
+		if (thread->sched_mode == TH_MODE_REALTIME) {
+
+			if (rt_period != NULL)
+				*rt_period = thread->realtime.period;
+			if (rt_deadline != NULL)
+				*rt_deadline = thread->realtime.deadline;
+
+			KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_REAL_TIME, thread->realtime.period,
+			    (thread->realtime.deadline >> 32), thread->realtime.deadline, 0);
+
+			return (THREAD_URGENCY_REAL_TIME);
+		} else if ((thread->sched_pri <= MAXPRI_THROTTLE) &&
+		    (thread->priority <= MAXPRI_THROTTLE)) {
+			KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_BACKGROUND, thread->sched_pri, thread->priority, 0, 0);
+			return (THREAD_URGENCY_BACKGROUND);
+		}
+		else 
+			KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_NORMAL, 0, 0, 0, 0);
+
+		return (THREAD_URGENCY_NORMAL);
+	}
+	else
+		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_NONE, 0, 0, 0, 0);
+	return (THREAD_URGENCY_NONE);
+}
+
+
 /*
  *	This is the processor idle loop, which just looks for other threads
  *	to execute.  Processor idle threads invoke this without supplying a
@@ -2949,7 +3894,14 @@ steal_thread(
  *
  *	Returns a the next thread to execute if dispatched directly.
  */
-static thread_t
+
+#if 0
+#define IDLE_KERNEL_DEBUG_CONSTANT(...) KERNEL_DEBUG_CONSTANT(__VA_ARGS__)
+#else
+#define IDLE_KERNEL_DEBUG_CONSTANT(...) do { } while(0)
+#endif
+
+thread_t
 processor_idle(
 	thread_t			thread,
 	processor_t			processor)
@@ -2957,22 +3909,29 @@ processor_idle(
 	processor_set_t		pset = processor->processor_set;
 	thread_t			new_thread;
 	int					state;
-
 	(void)splsched();
 
 	KERNEL_DEBUG_CONSTANT(
 		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (uintptr_t)thread_tid(thread), 0, 0, 0, 0);
 
+	SCHED_STATS_CPU_IDLE_START(processor);
+
 	timer_switch(&PROCESSOR_DATA(processor, system_state),
 									mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state));
 	PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state);
 
-	while (processor->next_thread == THREAD_NULL && processor->runq.count == 0 && rt_runq.count == 0 &&
+	while (processor->next_thread == THREAD_NULL && SCHED(processor_queue_empty)(processor) && rt_runq.count == 0 && SCHED(fairshare_runq_count)() == 0 &&
 				(thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) {
+		IDLE_KERNEL_DEBUG_CONSTANT(
+			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0);
+
 		machine_idle();
 
 		(void)splsched();
 
+		IDLE_KERNEL_DEBUG_CONSTANT(
+			MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -2, 0);
+
 		if (processor->state == PROCESSOR_INACTIVE && !machine_processor_is_inactive(processor))
 			break;
 	}
@@ -2992,19 +3951,20 @@ processor_idle(
 		processor->next_thread = THREAD_NULL;
 		processor->state = PROCESSOR_RUNNING;
 
-		if (	processor->runq.highq > new_thread->sched_pri					||
-				(rt_runq.highq > 0 && rt_runq.highq >= new_thread->sched_pri)	) {
+		if (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE)					||
+				(rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)	) {
 			processor->deadline = UINT64_MAX;
 
 			pset_unlock(pset);
 
 			thread_lock(new_thread);
+			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REDISPATCH), (uintptr_t)thread_tid(new_thread), new_thread->sched_pri, rt_runq.count, 0, 0);
 			thread_setrun(new_thread, SCHED_HEADQ);
 			thread_unlock(new_thread);
 
 			KERNEL_DEBUG_CONSTANT(
 				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
-	
+
 			return (THREAD_NULL);
 		}
 
@@ -3012,12 +3972,12 @@ processor_idle(
 
 		KERNEL_DEBUG_CONSTANT(
 				      MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0);
-
+			
 		return (new_thread);
 	}
 	else
 	if (state == PROCESSOR_IDLE) {
-		remqueue(&pset->idle_queue, (queue_entry_t)processor);
+		remqueue((queue_entry_t)processor);
 
 		processor->state = PROCESSOR_RUNNING;
 		enqueue_tail(&pset->active_queue, (queue_entry_t)processor);
@@ -3045,7 +4005,7 @@ processor_idle(
 
 			KERNEL_DEBUG_CONSTANT(
 				MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
-
+		
 			return (THREAD_NULL);
 		}
 	}
@@ -3054,7 +4014,7 @@ processor_idle(
 
 	KERNEL_DEBUG_CONSTANT(
 		MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0);
-
+		
 	return (THREAD_NULL);
 }
 
@@ -3105,8 +4065,6 @@ idle_thread_create(
 	return (KERN_SUCCESS);
 }
 
-static uint64_t		sched_tick_deadline;
-
 /*
  * sched_startup:
  *
@@ -3120,14 +4078,16 @@ sched_startup(void)
 	kern_return_t	result;
 	thread_t		thread;
 
-	result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread);
+	result = kernel_thread_start_priority((thread_continue_t)sched_init_thread,
+										  (void *)SCHED(maintenance_continuation),
+										  MAXPRI_KERNEL, &thread);
 	if (result != KERN_SUCCESS)
 		panic("sched_startup");
 
 	thread_deallocate(thread);
 
 	/*
-	 * Yield to the sched_tick_thread while it times
+	 * Yield to the sched_init_thread while it times
 	 * a series of context switches back.  It stores
 	 * the baseline value in sched_cswtime.
 	 *
@@ -3136,20 +4096,20 @@ sched_startup(void)
 	 */
 	while (sched_cswtime == 0)
 		thread_block(THREAD_CONTINUE_NULL);
+}
 
-	thread_daemon_init();
+#if defined(CONFIG_SCHED_TRADITIONAL)
 
-	thread_call_initialize();
-}
+static uint64_t			sched_tick_deadline = 0;
 
 /*
- *	sched_tick_thread:
+ *	sched_init_thread:
  *
  *	Perform periodic bookkeeping functions about ten
  *	times per second.
  */
 static void
-sched_tick_continue(void)
+sched_traditional_tick_continue(void)
 {
 	uint64_t			abstime = mach_absolute_time();
 
@@ -3166,14 +4126,44 @@ sched_tick_continue(void)
 	 */
 	thread_update_scan();
 
+	if (sched_tick_deadline == 0)
+		sched_tick_deadline = abstime;
+	
 	clock_deadline_for_periodic_event(sched_tick_interval, abstime,
 														&sched_tick_deadline);
 
-	assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline);
-	thread_block((thread_continue_t)sched_tick_continue);
+	assert_wait_deadline((event_t)sched_traditional_tick_continue, THREAD_UNINT, sched_tick_deadline);
+	thread_block((thread_continue_t)sched_traditional_tick_continue);
 	/*NOTREACHED*/
 }
 
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+static uint32_t
+time_individual_cswitch(void)
+{
+	uint32_t switches = 0;
+	uint64_t newtime, starttime;
+
+	/* Wait for absolute time to increase. */
+	starttime = mach_absolute_time();
+	do {
+		newtime = mach_absolute_time();
+	} while (newtime == starttime);
+
+	/* Measure one or more context switches until time increases again.
+	 * This ensures we get non-zero timings even if absolute time
+	 * increases very infrequently compared to CPU clock. */
+	starttime = newtime;
+	do {
+		thread_block(THREAD_CONTINUE_NULL);
+		newtime = mach_absolute_time();
+		++switches;
+	} while (newtime == starttime);
+	/* Round up. */
+	return (uint32_t) ((newtime - starttime + switches - 1) / switches);
+}
+
 /*
  * Time a series of context switches to determine
  * a baseline.  Toss the high and low and return
@@ -3183,15 +4173,11 @@ static uint32_t
 time_cswitch(void)
 {
 	uint32_t	new, hi, low, accum;
-	uint64_t	abstime;
-	int			i, tries = 7;
+	int			i, tries = 7, denom;
 
 	accum = hi = low = 0;
 	for (i = 0; i < tries; ++i) {
-		abstime = mach_absolute_time();
-		thread_block(THREAD_CONTINUE_NULL);
-
-		new = (uint32_t)(mach_absolute_time() - abstime);
+		new = time_individual_cswitch();
 
 		if (i == 0)
 			accum = hi = low = new;
@@ -3204,21 +4190,24 @@ time_cswitch(void)
 			accum += new;
 		}
 	}
-
-	return ((accum - hi - low) / (2 * (tries - 2)));
+	/* Round up. */
+	denom = 2 * (tries - 2);
+	return (accum - hi - low + denom - 1) / denom;
 }
 
 void
-sched_tick_thread(void)
+sched_init_thread(void (*continuation)(void))
 {
 	sched_cswtime = time_cswitch();
+	assert(sched_cswtime > 0);
 
-	sched_tick_deadline = mach_absolute_time();
+	continuation();
 
-	sched_tick_continue();
 	/*NOTREACHED*/
 }
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
+
 /*
  *	thread_update_scan / runq_scan:
  *
@@ -3258,7 +4247,7 @@ runq_scan(
 		while (count > 0) {
 			queue_iterate(q, thread, thread_t, links) {
 				if (		thread->sched_stamp != sched_tick		&&
-						(thread->sched_mode & TH_MODE_TIMESHARE)	) {
+						(thread->sched_mode == TH_MODE_TIMESHARE)	) {
 					if (thread_update_count == THREAD_UPDATE_SIZE)
 						return (TRUE);
 
@@ -3292,7 +4281,7 @@ thread_update_scan(void)
 			s = splsched();
 			pset_lock(pset);
 
-			restart_needed = runq_scan(&processor->runq);
+			restart_needed = runq_scan(runq_for_processor(processor));
 
 			pset_unlock(pset);
 			splx(s);
@@ -3321,9 +4310,10 @@ thread_update_scan(void)
 
 			s = splsched();
 			thread_lock(thread);
-			if (	!(thread->state & (TH_WAIT|TH_SUSP))	&&
-						thread->sched_stamp != sched_tick	)
-				update_priority(thread);
+			if (	!(thread->state & (TH_WAIT))	) {
+				if (SCHED(can_update_priority)(thread))
+					SCHED(update_priority)(thread);
+			}
 			thread_unlock(thread);
 			splx(s);
 
@@ -3331,20 +4321,115 @@ thread_update_scan(void)
 	    }
 	} while (restart_needed);
 }
+
+#endif /* CONFIG_SCHED_TRADITIONAL */
+
+boolean_t
+thread_eager_preemption(thread_t thread) 
+{
+	return ((thread->sched_flags & TH_SFLAG_EAGERPREEMPT) != 0);
+}
+
+void
+thread_set_eager_preempt(thread_t thread) 
+{
+	spl_t x;
+	processor_t p;
+	ast_t ast = AST_NONE;
+
+	x = splsched();
+	p = current_processor();
+
+	thread_lock(thread);
+	thread->sched_flags |= TH_SFLAG_EAGERPREEMPT;
+
+	if (thread == current_thread()) {
+		thread_unlock(thread);
+
+		ast = csw_check(p);
+		if (ast != AST_NONE) {
+			(void) thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
+		}
+	} else {
+		p = thread->last_processor;
+
+		if (p != PROCESSOR_NULL	&& p->state == PROCESSOR_RUNNING &&
+			p->active_thread == thread) {
+			cause_ast_check(p);
+		}
 		
+		thread_unlock(thread);
+	}
+
+	splx(x);
+}
+
+void
+thread_clear_eager_preempt(thread_t thread) 
+{
+	spl_t x;
+
+	x = splsched();
+	thread_lock(thread);
+
+	thread->sched_flags &= ~TH_SFLAG_EAGERPREEMPT;
+	
+	thread_unlock(thread);
+	splx(x);
+}
+/*
+ * Scheduling statistics
+ */
+void
+sched_stats_handle_csw(processor_t processor, int reasons, int selfpri, int otherpri)
+{
+	struct processor_sched_statistics *stats;
+	boolean_t to_realtime = FALSE;
+	
+	stats = &processor->processor_data.sched_stats;
+	stats->csw_count++;
+
+	if (otherpri >= BASEPRI_REALTIME) {
+		stats->rt_sched_count++;
+		to_realtime = TRUE;
+	}
+
+	if ((reasons & AST_PREEMPT) != 0) {
+		stats->preempt_count++;
+
+		if (selfpri >= BASEPRI_REALTIME) {
+			stats->preempted_rt_count++;
+		} 
+
+		if (to_realtime) {
+			stats->preempted_by_rt_count++;
+		}
+
+	}
+}
+
+void
+sched_stats_handle_runq_change(struct runq_stats *stats, int old_count) 
+{
+	uint64_t timestamp = mach_absolute_time();
+
+	stats->count_sum += (timestamp - stats->last_change_timestamp) * old_count;
+	stats->last_change_timestamp = timestamp;
+}
+
 /*
- *	Just in case someone doesn't use the macro
+ *     For calls from assembly code
  */
-#undef	thread_wakeup
+#undef thread_wakeup
 void
 thread_wakeup(
-	event_t		x);
+       event_t         x);
 
 void
 thread_wakeup(
-	event_t		x)
+       event_t         x)
 {
-	thread_wakeup_with_result(x, THREAD_AWAKENED);
+       thread_wakeup_with_result(x, THREAD_AWAKENED);
 }
 
 boolean_t
diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h
index 9f1c95347..0f89239ae 100644
--- a/osfmk/kern/sched_prim.h
+++ b/osfmk/kern/sched_prim.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -147,12 +147,20 @@ extern void		compute_my_priority(
 					thread_t		thread);
 
 /* Periodic scheduler activity */
-extern void		sched_tick_thread(void);
+extern void		sched_init_thread(void (*)(void));
 
 /* Perform sched_tick housekeeping activities */
-extern void		update_priority(
+extern boolean_t		can_update_priority(
 					thread_t		thread);
 
+extern void		update_priority(
+											thread_t		thread);
+
+extern void		lightweight_update_priority(
+								thread_t		thread);
+
+extern void		sched_traditional_quantum_expire(thread_t	thread);
+
 /* Idle processor thread */
 extern void		idle_thread(void);
 
@@ -185,13 +193,80 @@ extern processor_set_t	task_choose_pset(
 extern processor_t		thread_bind(
 							processor_t		processor);
 
+/* Choose the best processor to run a thread */
+extern processor_t	choose_processor(
+									 processor_set_t		pset,
+									 processor_t			processor,
+									 thread_t			thread);
+
+/* Choose a thread from a processor's priority-based runq */
+extern thread_t choose_thread(
+							  processor_t		processor,
+							  run_queue_t		runq,
+							  int				priority);
+
+
+extern void thread_quantum_init(
+								thread_t thread);
+
 extern void		run_queue_init(
 					run_queue_t		runq);
 
+extern thread_t	run_queue_dequeue(
+							  run_queue_t		runq,
+							  integer_t		options);
+
+extern boolean_t	run_queue_enqueue(
+							  run_queue_t		runq,
+							  thread_t			thread,
+							  integer_t		options);
+
+extern void	run_queue_remove(
+									 run_queue_t		runq,
+									 thread_t			thread);
+									  
+/* Remove thread from its run queue */
+extern boolean_t	thread_run_queue_remove(
+						thread_t	thread);
+
 extern void		thread_timer_expire(
 					void			*thread,
 					void			*p1);
 
+extern boolean_t	thread_eager_preemption(
+						thread_t thread);
+
+/* Fair Share routines */
+#if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_PROTO) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+void		sched_traditional_fairshare_init(void);
+
+int			sched_traditional_fairshare_runq_count(void);
+
+uint64_t	sched_traditional_fairshare_runq_stats_count_sum(void);
+
+void		sched_traditional_fairshare_enqueue(thread_t thread);
+
+thread_t	sched_traditional_fairshare_dequeue(void);
+
+boolean_t	sched_traditional_fairshare_queue_remove(thread_t thread);
+#endif
+
+#if defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
+void		sched_grrr_fairshare_init(void);
+
+int			sched_grrr_fairshare_runq_count(void);
+
+uint64_t	sched_grrr_fairshare_runq_stats_count_sum(void);
+
+void		sched_grrr_fairshare_enqueue(thread_t thread);
+
+thread_t	sched_grrr_fairshare_dequeue(void);
+
+boolean_t	sched_grrr_fairshare_queue_remove(thread_t thread);
+#endif
+
+extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
+
 /* Set the maximum interrupt level for the thread */
 __private_extern__ wait_interrupt_t thread_interrupt_level(
 						wait_interrupt_t interruptible);
@@ -205,6 +280,55 @@ __private_extern__ kern_return_t clear_wait_internal(
 						thread_t		thread,
 						wait_result_t	result);
 
+extern void sched_stats_handle_csw(
+							processor_t processor, 
+							int reasons, 
+							int selfpri, 
+							int otherpri);
+
+extern void sched_stats_handle_runq_change(
+									struct runq_stats *stats, 
+									int old_count);
+
+
+
+#define	SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) 		\
+do { 								\
+	if (__builtin_expect(sched_stats_active, 0)) { 	\
+		sched_stats_handle_csw((processor), 		\
+				(reasons), (selfpri), (otherpri)); 	\
+	}							\
+} while (0) 
+
+
+#define SCHED_STATS_RUNQ_CHANGE(stats, old_count)		\
+do { 								\
+	if (__builtin_expect(sched_stats_active, 0)) { 	\
+		sched_stats_handle_runq_change((stats), 	\
+								(old_count));		\
+	}							\
+} while (0) 
+
+#define THREAD_URGENCY_NONE		0	/* indicates that there is no currently runnable */
+#define THREAD_URGENCY_BACKGROUND	1	/* indicates that the thread is marked as a "background" thread */
+#define THREAD_URGENCY_NORMAL		2	/* indicates that the thread is marked as a "normal" thread */
+#define THREAD_URGENCY_REAL_TIME	3	/* indicates that the thread is marked as a "real-time" or urgent thread */
+#define	THREAD_URGENCY_MAX		4	/* Marker */
+/* Returns the "urgency" of the currently running thread (provided by scheduler) */
+extern int	thread_get_urgency(
+    				   	uint64_t	*rt_period,
+					uint64_t	*rt_deadline);
+
+/* Tells the "urgency" of the just scheduled thread (provided by CPU PM) */
+extern void	thread_tell_urgency(
+    					int		urgency,
+					uint64_t	rt_period,
+					uint64_t	rt_deadline);
+
+/* Tells if there are "active" RT threads in the system (provided by CPU PM) */
+extern void	active_rt_threads(
+    					boolean_t	active);
+
 #endif /* MACH_KERNEL_PRIVATE */
 
 __BEGIN_DECLS
@@ -259,14 +383,27 @@ extern wait_result_t	assert_wait_deadline(
 extern kern_return_t	thread_wakeup_prim(
 							event_t				event,
 							boolean_t			one_thread,
-							wait_result_t		result);
+							wait_result_t			result);
+
+#ifdef MACH_KERNEL_PRIVATE
+extern kern_return_t	thread_wakeup_prim_internal(
+							event_t				event,
+							boolean_t			one_thread,
+							wait_result_t			result,
+							int				priority);
+#endif
 
 #define thread_wakeup(x)					\
-			thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
+	                thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
 #define thread_wakeup_with_result(x, z)		\
-			thread_wakeup_prim((x), FALSE, (z))
+	                thread_wakeup_prim((x), FALSE, (z))
 #define thread_wakeup_one(x)				\
-			thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
+	                thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
+
+#ifdef MACH_KERNEL_PRIVATE
+#define thread_wakeup_one_with_pri(x, pri)				\
+	                thread_wakeup_prim_internal((x), TRUE, THREAD_AWAKENED, pri)
+#endif
 
 extern boolean_t		preemption_enabled(void);
 
@@ -302,6 +439,223 @@ extern void		thread_cancel_timer(void);
 
 #endif	/* KERNEL_PRIVATE */
 
+#ifdef MACH_KERNEL_PRIVATE
+
+/*
+ * Scheduler algorithm indirection. If only one algorithm is
+ * enabled at compile-time, a direction function call is used.
+ * If more than one is enabled, calls are dispatched through
+ * a function pointer table.
+ */
+
+#if   !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_FIXEDPRIORITY)
+#error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
+#endif
+
+#define SCHED(f) (sched_current_dispatch->f)
+
+struct sched_dispatch_table {
+	void	(*init)(void);				/* Init global state */
+	void	(*timebase_init)(void);		/* Timebase-dependent initialization */
+	void	(*processor_init)(processor_t processor);	/* Per-processor scheduler init */
+	void	(*pset_init)(processor_set_t pset);	/* Per-processor set scheduler init */
+	
+	void	(*maintenance_continuation)(void);	/* Function called regularly */
+	
+	/*
+	 * Choose a thread of greater or equal priority from the per-processor
+	 * runqueue for timeshare/fixed threads
+	 */
+	thread_t	(*choose_thread)(
+								  processor_t		processor,
+								  int				priority);
+	
+	/*
+	 * Steal a thread from another processor in the pset so that it can run
+	 * immediately
+	 */
+	thread_t	(*steal_thread)(
+								processor_set_t		pset);
+	
+	/*
+	 * Recalculate sched_pri based on base priority, past running time,
+	 * and scheduling class.
+	 */
+	void		(*compute_priority)(
+					 thread_t	thread,
+					 boolean_t			override_depress);
+	
+	/*
+	 * Pick the best processor for a thread (any kind of thread) to run on.
+	 */
+	processor_t	(*choose_processor)(
+										 processor_set_t		pset,
+										 processor_t			processor,
+										 thread_t			thread);
+	/*
+	 * Enqueue a timeshare or fixed priority thread onto the per-processor
+	 * runqueue
+	 */
+	boolean_t (*processor_enqueue)(
+								 processor_t			processor,
+								 thread_t			thread,
+								 integer_t			options);
+	
+	/* Migrate threads away in preparation for processor shutdown */
+	void (*processor_queue_shutdown)(
+									 processor_t			processor);
+	
+	/* Remove the specific thread from the per-processor runqueue */
+	boolean_t	(*processor_queue_remove)(
+									processor_t			processor,
+									thread_t		thread);
+	
+	/*
+	 * Does the per-processor runqueue have any timeshare or fixed priority
+	 * threads on it? Called without pset lock held, so should
+	 * not assume immutability while executing.
+	 */
+	boolean_t	(*processor_queue_empty)(processor_t		processor);
+	
+	/*
+	 * Would this priority trigger an urgent preemption if it's sitting
+	 * on the per-processor runqueue?
+	 */
+	boolean_t	(*priority_is_urgent)(int priority);
+	
+	/*
+	 * Does the per-processor runqueue contain runnable threads that
+	 * should cause the currently-running thread to be preempted?
+	 */
+	ast_t		(*processor_csw_check)(processor_t processor);
+	
+	/*
+	 * Does the per-processor runqueue contain a runnable thread
+	 * of > or >= priority, as a preflight for choose_thread() or other
+	 * thread selection
+	 */
+	boolean_t	(*processor_queue_has_priority)(processor_t		processor,
+												int				priority,
+												boolean_t		gte);
+	
+	/* Quantum size for the specified non-realtime thread. */
+	uint32_t	(*initial_quantum_size)(thread_t thread);
+	
+	/* Scheduler mode for a new thread */
+	sched_mode_t	(*initial_thread_sched_mode)(task_t parent_task);
+	
+	/* Scheduler algorithm supports timeshare (decay) mode */
+	boolean_t	(*supports_timeshare_mode)(void);
+	
+	/*
+	 * Is it safe to call update_priority, which may change a thread's
+	 * runqueue or other state. This can be used to throttle changes
+	 * to dynamic priority.
+	 */
+	boolean_t	(*can_update_priority)(thread_t thread);
+
+	/*
+	 * Update both scheduled priority and other persistent state.
+	 * Side effects may including migration to another processor's runqueue.
+	 */
+	void		(*update_priority)(thread_t thread);
+	
+	/* Lower overhead update to scheduled priority and state. */
+	void		(*lightweight_update_priority)(thread_t thread);
+	
+	/* Callback for non-realtime threads when the quantum timer fires */
+	void		(*quantum_expire)(thread_t thread);
+	
+	/*
+	 * Even though we could continue executing on this processor, does the
+	 * topology (SMT, for instance) indicate that a better processor could be
+	 * chosen
+	 */
+	boolean_t	(*should_current_thread_rechoose_processor)(processor_t			processor);
+    
+	/*
+	 * Runnable threads on per-processor runqueue. Should only
+	 * be used for relative comparisons of load between processors.
+	 */
+	int			(*processor_runq_count)(processor_t	processor);
+	
+	/* Aggregate runcount statistics for per-processor runqueue */
+    uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
+	
+	/* Initialize structures to track demoted fairshare threads */
+	void		(*fairshare_init)(void);
+	
+	/* Number of runnable fairshare threads */
+	int			(*fairshare_runq_count)(void);
+	
+	/* Aggregate runcount statistics for fairshare runqueue */
+	uint64_t	(*fairshare_runq_stats_count_sum)(void);
+	
+	void		(*fairshare_enqueue)(thread_t thread);
+	
+	thread_t	(*fairshare_dequeue)(void);
+
+	boolean_t	(*fairshare_queue_remove)(thread_t thread);
+    
+	/*
+	* Use processor->next_thread to pin a thread to an idle
+	* processor. If FALSE, threads are enqueued and can
+	* be stolen by other processors.
+	*/
+	boolean_t   direct_dispatch_to_idle_processors;
+};
+
+#if defined(CONFIG_SCHED_TRADITIONAL)
+#define kSchedTraditionalString "traditional"
+#define kSchedTraditionalWithPsetRunqueueString "traditional_with_pset_runqueue"
+extern const struct sched_dispatch_table sched_traditional_dispatch;
+extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_PROTO)
+#define kSchedProtoString "proto"
+extern const struct sched_dispatch_table sched_proto_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_GRRR)
+#define kSchedGRRRString "grrr"
+extern const struct sched_dispatch_table sched_grrr_dispatch;
+#endif
+
+#if defined(CONFIG_SCHED_FIXEDPRIORITY)
+#define kSchedFixedPriorityString "fixedpriority"
+#define kSchedFixedPriorityWithPsetRunqueueString "fixedpriority_with_pset_runqueue"
+extern const struct sched_dispatch_table sched_fixedpriority_dispatch;
+extern const struct sched_dispatch_table sched_fixedpriority_with_pset_runqueue_dispatch;
+#endif
+
+/*
+ * It is an error to invoke any scheduler-related code
+ * before this is set up
+ */
+enum sched_enum {
+	sched_enum_unknown = 0,
+#if defined(CONFIG_SCHED_TRADITIONAL)
+	sched_enum_traditional = 1,
+	sched_enum_traditional_with_pset_runqueue = 2,
+#endif
+#if defined(CONFIG_SCHED_PROTO)
+	sched_enum_proto = 3,
+#endif
+#if defined(CONFIG_SCHED_GRRR)
+	sched_enum_grrr = 4,
+#endif
+#if defined(CONFIG_SCHED_FIXEDPRIORITY)
+	sched_enum_fixedpriority = 5,
+	sched_enum_fixedpriority_with_pset_runqueue = 6,
+#endif
+	sched_enum_max = 7
+};
+
+extern const struct sched_dispatch_table *sched_current_dispatch;
+
+#endif	/* MACH_KERNEL_PRIVATE */
+
 __END_DECLS
 
 #endif	/* _KERN_SCHED_PRIM_H_ */
diff --git a/osfmk/kern/sched_proto.c b/osfmk/kern/sched_proto.c
new file mode 100644
index 000000000..e31cb0590
--- /dev/null
+++ b/osfmk/kern/sched_proto.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/machine.h>
+#include <mach/policy.h>
+#include <mach/sync_policy.h>
+#include <mach/thread_act.h>
+
+#include <machine/machine_routines.h>
+#include <machine/sched_param.h>
+#include <machine/machine_cpu.h>
+
+#include <kern/kern_types.h>
+#include <kern/clock.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/debug.h>
+#include <kern/lock.h>
+#include <kern/macro_help.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/wait_queue.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+
+#include <mach/sdt.h>
+
+#include <sys/kdebug.h>
+
+static void
+sched_proto_init(void);
+
+static void
+sched_proto_timebase_init(void);
+
+static void
+sched_proto_processor_init(processor_t processor);
+
+static void
+sched_proto_pset_init(processor_set_t pset);
+
+static void
+sched_proto_maintenance_continuation(void);
+
+static thread_t
+sched_proto_choose_thread(processor_t		processor,
+							 int				priority);
+
+static thread_t
+sched_proto_steal_thread(processor_set_t		pset);
+
+static void
+sched_proto_compute_priority(thread_t	thread,
+							 boolean_t			override_depress);
+
+static processor_t
+sched_proto_choose_processor(	processor_set_t		pset,
+								processor_t			processor,
+								thread_t			thread);
+
+
+static boolean_t
+sched_proto_processor_enqueue(
+							 processor_t			processor,
+							 thread_t			thread,
+							 integer_t			options);
+
+static void
+sched_proto_processor_queue_shutdown(
+									 processor_t			processor);
+
+static boolean_t
+sched_proto_processor_queue_remove(
+						    processor_t			processor,
+							thread_t		thread);
+
+static boolean_t
+sched_proto_processor_queue_empty(processor_t		processor);
+
+static boolean_t
+sched_proto_processor_queue_has_priority(processor_t		processor,
+										 int				priority,
+										 boolean_t		gte);
+
+static boolean_t
+sched_proto_priority_is_urgent(int priority);
+
+static ast_t
+sched_proto_processor_csw_check(processor_t processor);
+
+static uint32_t
+sched_proto_initial_quantum_size(thread_t thread);
+
+static sched_mode_t
+sched_proto_initial_thread_sched_mode(task_t parent_task);
+
+static boolean_t
+sched_proto_supports_timeshare_mode(void);
+
+static boolean_t
+sched_proto_can_update_priority(thread_t	thread);
+
+static void
+sched_proto_update_priority(thread_t	thread);
+
+static void
+sched_proto_lightweight_update_priority(thread_t	thread);
+
+static void
+sched_proto_quantum_expire(thread_t	thread);
+
+static boolean_t
+sched_proto_should_current_thread_rechoose_processor(processor_t			processor);
+
+static int
+sched_proto_processor_runq_count(processor_t   processor);
+
+static uint64_t
+sched_proto_processor_runq_stats_count_sum(processor_t   processor);
+
+const struct sched_dispatch_table sched_proto_dispatch = {
+	sched_proto_init,
+	sched_proto_timebase_init,
+	sched_proto_processor_init,
+	sched_proto_pset_init,
+	sched_proto_maintenance_continuation,
+	sched_proto_choose_thread,
+	sched_proto_steal_thread,
+	sched_proto_compute_priority,
+	sched_proto_choose_processor,
+	sched_proto_processor_enqueue,
+	sched_proto_processor_queue_shutdown,
+	sched_proto_processor_queue_remove,
+	sched_proto_processor_queue_empty,
+	sched_proto_priority_is_urgent,
+	sched_proto_processor_csw_check,
+	sched_proto_processor_queue_has_priority,
+	sched_proto_initial_quantum_size,
+	sched_proto_initial_thread_sched_mode,
+	sched_proto_supports_timeshare_mode,
+	sched_proto_can_update_priority,
+	sched_proto_update_priority,
+	sched_proto_lightweight_update_priority,
+	sched_proto_quantum_expire,
+	sched_proto_should_current_thread_rechoose_processor,
+	sched_proto_processor_runq_count,
+	sched_proto_processor_runq_stats_count_sum,
+	sched_traditional_fairshare_init,
+	sched_traditional_fairshare_runq_count,
+	sched_traditional_fairshare_runq_stats_count_sum,
+	sched_traditional_fairshare_enqueue,
+	sched_traditional_fairshare_dequeue,
+	sched_traditional_fairshare_queue_remove,
+	TRUE /* direct_dispatch_to_idle_processors */
+};
+
+static struct run_queue	*global_runq;
+static struct run_queue	global_runq_storage;
+
+#define GLOBAL_RUNQ		((processor_t)-2)
+decl_simple_lock_data(static,global_runq_lock);
+
+extern int	max_unsafe_quanta;
+
+static uint32_t proto_quantum_us;
+static uint32_t proto_quantum;
+
+static uint32_t	runqueue_generation;
+
+static processor_t proto_processor;
+
+static uint64_t			sched_proto_tick_deadline;
+static uint32_t			sched_proto_tick;
+
+static void
+sched_proto_init(void)
+{
+	proto_quantum_us = 10*1000;
+	
+	printf("standard proto timeslicing quantum is %d us\n", proto_quantum_us);
+
+	simple_lock_init(&global_runq_lock, 0);
+	global_runq = &global_runq_storage;
+	run_queue_init(global_runq);
+	runqueue_generation = 0;
+	
+	proto_processor = master_processor;
+}
+
+static void
+sched_proto_timebase_init(void)
+{
+	uint64_t	abstime;
+
+	/* standard timeslicing quantum */
+	clock_interval_to_absolutetime_interval(
+											proto_quantum_us, NSEC_PER_USEC, &abstime);
+	assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
+	proto_quantum = (uint32_t)abstime;
+	
+	thread_depress_time = 1 * proto_quantum;
+	default_timeshare_computation = proto_quantum / 2;
+	default_timeshare_constraint = proto_quantum;
+	
+	max_unsafe_computation = max_unsafe_quanta * proto_quantum;
+	sched_safe_duration = 2 * max_unsafe_quanta * proto_quantum;
+
+}
+
+static void
+sched_proto_processor_init(processor_t processor __unused)
+{
+	/* No per-processor state */
+}
+
+static void
+sched_proto_pset_init(processor_set_t pset __unused)
+{
+}
+
+static void
+sched_proto_maintenance_continuation(void)
+{
+	uint64_t			abstime = mach_absolute_time();
+	
+	sched_proto_tick++;
+	
+	/* Every 8 seconds, switch to another processor */
+	if ((sched_proto_tick & 0x7) == 0) {
+		processor_t new_processor;
+		
+		new_processor = proto_processor->processor_list;
+		if (new_processor == PROCESSOR_NULL)
+			proto_processor = master_processor;
+		else
+			proto_processor = new_processor;
+	}
+		
+	
+	/*
+	 *  Compute various averages.
+	 */
+	compute_averages();
+	
+	if (sched_proto_tick_deadline == 0)
+		sched_proto_tick_deadline = abstime;
+	
+	clock_deadline_for_periodic_event(sched_one_second_interval, abstime,
+						&sched_proto_tick_deadline);
+	
+	assert_wait_deadline((event_t)sched_proto_maintenance_continuation, THREAD_UNINT, sched_proto_tick_deadline);
+	thread_block((thread_continue_t)sched_proto_maintenance_continuation);
+	/*NOTREACHED*/
+}
+
+static thread_t
+sched_proto_choose_thread(processor_t		processor,
+						  int				priority)
+{
+	run_queue_t		rq = global_runq;
+	queue_t			queue;
+	int				pri, count;
+	thread_t		thread;
+	
+	
+	simple_lock(&global_runq_lock);
+	
+	queue = rq->queues + rq->highq;
+	pri = rq->highq;
+	count = rq->count;
+	
+	/*
+	 * Since we don't depress priorities, a high priority thread
+	 * may get selected over and over again. Put a runqueue
+	 * generation number in the thread structure so that we
+	 * can ensure that we've cycled through all runnable tasks
+	 * before coming back to a high priority thread. This isn't
+	 * perfect, especially if the number of runnable threads always
+	 * stays high, but is a workable approximation
+	 */
+	 
+	while (count > 0 && pri >= priority) {
+		thread = (thread_t)queue_first(queue);
+		while (!queue_end(queue, (queue_entry_t)thread)) {
+			if ((thread->bound_processor == PROCESSOR_NULL ||
+				thread->bound_processor == processor) &&
+				runqueue_generation != thread->runqueue_generation) {
+				remqueue((queue_entry_t)thread);
+				
+				thread->runq = PROCESSOR_NULL;
+				thread->runqueue_generation = runqueue_generation;
+				SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+				rq->count--;
+				if (queue_empty(queue)) {
+					if (pri != IDLEPRI)
+						clrbit(MAXPRI - pri, rq->bitmap);
+					rq->highq = MAXPRI - ffsbit(rq->bitmap);
+				}
+				
+				simple_unlock(&global_runq_lock);
+				return (thread);
+			}
+			count--;
+			
+			thread = (thread_t)queue_next((queue_entry_t)thread);
+		}
+		
+		queue--; pri--;
+	}
+	
+	runqueue_generation++;
+	
+	simple_unlock(&global_runq_lock);
+	return (THREAD_NULL);
+}
+
+static thread_t
+sched_proto_steal_thread(processor_set_t		pset)
+{
+	pset_unlock(pset);
+	
+	return (THREAD_NULL);
+	
+}
+
+static void
+sched_proto_compute_priority(thread_t	thread,
+							 boolean_t			override_depress __unused)
+{
+	set_sched_pri(thread, thread->priority);
+}
+
+static processor_t
+sched_proto_choose_processor(	processor_set_t		pset,
+							 processor_t			processor,
+							 thread_t			thread __unused)
+{
+	processor = proto_processor;
+	
+	/*
+	 *	Check that the correct processor set is
+	 *	returned locked.
+	 */
+	if (pset != processor->processor_set) {
+		pset_unlock(pset);
+		
+		pset = processor->processor_set;
+		pset_lock(pset);
+	}
+	
+	return (processor);
+}
+
+static boolean_t
+sched_proto_processor_enqueue(
+							 processor_t			processor __unused,
+							 thread_t			thread,
+							 integer_t			options)
+{
+	run_queue_t		rq = global_runq;
+	boolean_t		result;
+	
+	simple_lock(&global_runq_lock);
+	result = run_queue_enqueue(rq, thread, options);
+	thread->runq = GLOBAL_RUNQ;
+	simple_unlock(&global_runq_lock);
+	
+	return (result);
+}
+
+static void
+sched_proto_processor_queue_shutdown(
+									 processor_t			processor)
+{
+	/* With a global runqueue, just stop choosing this processor */
+	(void)processor;
+}
+
+static boolean_t
+sched_proto_processor_queue_remove(
+								processor_t			processor,
+								thread_t		thread)
+{
+	void *			rqlock;
+	run_queue_t		rq;
+	
+	rqlock = &global_runq_lock;
+	rq = global_runq;
+	
+	simple_lock(rqlock);
+	if (processor == thread->runq) {
+		/*
+		 *	Thread is on a run queue and we have a lock on
+		 *	that run queue.
+		 */
+		remqueue((queue_entry_t)thread);
+		SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
+		rq->count--;
+		if (SCHED(priority_is_urgent)(thread->sched_pri)) {
+			rq->urgency--; assert(rq->urgency >= 0);
+		}
+		
+		if (queue_empty(rq->queues + thread->sched_pri)) {
+			/* update run queue status */
+			if (thread->sched_pri != IDLEPRI)
+				clrbit(MAXPRI - thread->sched_pri, rq->bitmap);
+			rq->highq = MAXPRI - ffsbit(rq->bitmap);
+		}
+		
+		thread->runq = PROCESSOR_NULL;
+	}
+	else {
+		/*
+		 *	The thread left the run queue before we could
+		 * 	lock the run queue.
+		 */
+		assert(thread->runq == PROCESSOR_NULL);
+		processor = PROCESSOR_NULL;
+	}
+	
+	simple_unlock(rqlock);
+	
+	return (processor != PROCESSOR_NULL);
+}
+
+static boolean_t
+sched_proto_processor_queue_empty(processor_t		processor __unused)
+{
+	boolean_t result;
+	
+	result = (global_runq->count == 0);
+	
+	return result;
+}
+
+static boolean_t
+sched_proto_processor_queue_has_priority(processor_t		processor __unused,
+										 int				priority,
+										 boolean_t		gte)
+{
+	boolean_t result;
+	
+	simple_lock(&global_runq_lock);
+
+	if (gte)
+		result = global_runq->highq >= priority;
+	else
+		result = global_runq->highq >= priority;
+
+	simple_unlock(&global_runq_lock);
+	
+	return result;
+}
+
+/* Implement sched_preempt_pri in code */
+static boolean_t
+sched_proto_priority_is_urgent(int priority)
+{
+	if (priority <= BASEPRI_FOREGROUND)
+		return FALSE;
+	
+	if (priority < MINPRI_KERNEL)
+		return TRUE;
+
+	if (priority >= BASEPRI_PREEMPT)
+		return TRUE;
+	
+	return FALSE;
+}
+
+static ast_t
+sched_proto_processor_csw_check(processor_t processor __unused)
+{
+	run_queue_t		runq;
+	int				count, urgency;
+	
+	runq = global_runq;
+	count = runq->count;
+	urgency = runq->urgency;
+	
+	if (count > 0) {
+		if (urgency > 0)
+			return (AST_PREEMPT | AST_URGENT);
+		
+		return AST_PREEMPT;
+	}
+	
+	return AST_NONE;
+}
+
+static uint32_t
+sched_proto_initial_quantum_size(thread_t thread __unused)
+{
+	return proto_quantum;
+}
+
+static sched_mode_t
+sched_proto_initial_thread_sched_mode(task_t parent_task)
+{
+	if (parent_task == kernel_task)
+		return TH_MODE_FIXED;
+	else
+		return TH_MODE_TIMESHARE;	
+}
+
+static boolean_t
+sched_proto_supports_timeshare_mode(void)
+{
+	return TRUE;
+}
+
+static boolean_t
+sched_proto_can_update_priority(thread_t	thread __unused)
+{
+	return FALSE;
+}
+
+static void
+sched_proto_update_priority(thread_t	thread __unused)
+{
+	
+}
+
+static void
+sched_proto_lightweight_update_priority(thread_t	thread __unused)
+{
+	
+}
+
+static void
+sched_proto_quantum_expire(thread_t	thread __unused)
+{
+	
+}
+
+static boolean_t
+sched_proto_should_current_thread_rechoose_processor(processor_t			processor)
+{
+	return (proto_processor != processor);
+}
+
+static int
+sched_proto_processor_runq_count(processor_t   processor)
+{
+	if (master_processor == processor) {
+		return global_runq->count;
+	} else {
+		return 0;
+	}	
+}
+
+static uint64_t
+sched_proto_processor_runq_stats_count_sum(processor_t   processor)
+{
+	if (master_processor == processor) {
+		return global_runq->runq_stats.count_sum;
+	} else {
+		return 0ULL;
+	}
+}
+
diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c
index a59122569..6b5ea8302 100644
--- a/osfmk/kern/stack.c
+++ b/osfmk/kern/stack.c
@@ -63,6 +63,9 @@ static vm_offset_t		stack_free_list;
 static unsigned int		stack_free_count, stack_free_hiwat;		/* free list count */
 static unsigned int		stack_hiwat;
 unsigned int			stack_total;				/* current total count */
+unsigned long long		stack_allocs;				/* total count of allocations */
+
+static int			stack_fake_zone_index = -1;	/* index in zone_info array */
 
 static unsigned int		stack_free_target;
 static int				stack_free_delta;
@@ -76,6 +79,51 @@ vm_offset_t			kernel_stack_size = KERNEL_STACK_SIZE;
 vm_offset_t			kernel_stack_mask = -KERNEL_STACK_SIZE;
 vm_offset_t			kernel_stack_depth_max = 0;
 
+static inline void
+STACK_ZINFO_PALLOC(thread_t thread)
+{
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thread->tkm_private.alloc += kernel_stack_size;
+	if (stack_fake_zone_index != -1 &&
+	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		OSAddAtomic64(kernel_stack_size,
+			      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
+}
+
+static inline void
+STACK_ZINFO_PFREE(thread_t thread)
+{
+	task_t task;
+	zinfo_usage_t zinfo;
+
+	thread->tkm_private.free += kernel_stack_size;
+	if (stack_fake_zone_index != -1 &&
+	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+		OSAddAtomic64(kernel_stack_size, 
+			      (int64_t *)&zinfo[stack_fake_zone_index].free);
+}
+
+static inline void
+STACK_ZINFO_HANDOFF(thread_t from, thread_t to)
+{
+	from->tkm_private.free += kernel_stack_size;
+	to->tkm_private.alloc += kernel_stack_size;
+	if (stack_fake_zone_index != -1) {
+		task_t task;
+		zinfo_usage_t zinfo;
+	
+		if ((task = from->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+			OSAddAtomic64(kernel_stack_size, 
+				      (int64_t *)&zinfo[stack_fake_zone_index].free);
+
+		if ((task = to->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+			OSAddAtomic64(kernel_stack_size, 
+				      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
+	}
+}
+
 /*
  *	The next field is at the base of the stack,
  *	so the low end is left unsullied.
@@ -98,6 +146,9 @@ roundup_pow2(vm_offset_t size)
 	return 1UL << (log2(size - 1) + 1); 
 }
 
+static vm_offset_t stack_alloc_internal(void);
+static void stack_free_stack(vm_offset_t);
+
 void
 stack_init(void)
 {
@@ -125,18 +176,17 @@ stack_init(void)
  *	Allocate a stack for a thread, may
  *	block.
  */
-void
-stack_alloc(
-	thread_t	thread)
+
+static vm_offset_t 
+stack_alloc_internal(void)
 {
 	vm_offset_t		stack;
 	spl_t			s;
 	int			guard_flags;
 
-	assert(thread->kernel_stack == 0);
-
 	s = splsched();
 	stack_lock();
+	stack_allocs++;
 	stack = stack_free_list;
 	if (stack != 0) {
 		stack_free_list = stack_next(stack);
@@ -174,8 +224,25 @@ stack_alloc(
 
 		stack += PAGE_SIZE;
 	}
+	return stack;
+}
 
-	machine_stack_attach(thread, stack);
+void
+stack_alloc(
+	thread_t	thread)
+{
+
+	assert(thread->kernel_stack == 0);
+	machine_stack_attach(thread, stack_alloc_internal());
+	STACK_ZINFO_PALLOC(thread);
+}
+
+void
+stack_handoff(thread_t from, thread_t to)
+{
+	assert(from == current_thread());
+	machine_stack_handoff(from, to);
+	STACK_ZINFO_HANDOFF(from, to);
 }
 
 /*
@@ -190,11 +257,23 @@ stack_free(
     vm_offset_t		stack = machine_stack_detach(thread);
 
 	assert(stack);
-	if (stack != thread->reserved_stack)
+	if (stack != thread->reserved_stack) {
+		STACK_ZINFO_PFREE(thread);
 		stack_free_stack(stack);
+	}
 }
 
 void
+stack_free_reserved(
+	thread_t	thread)
+{
+	if (thread->reserved_stack != thread->kernel_stack) {
+		stack_free_stack(thread->reserved_stack);
+		STACK_ZINFO_PFREE(thread);
+	}
+}
+
+static void
 stack_free_stack(
 	vm_offset_t		stack)
 {
@@ -240,6 +319,7 @@ stack_alloc_try(
 	cache = &PROCESSOR_DATA(current_processor(), stack_cache);
 	stack = cache->free;
 	if (stack != 0) {
+		STACK_ZINFO_PALLOC(thread);
 		cache->free = stack_next(stack);
 		cache->count--;
 	}
@@ -248,6 +328,7 @@ stack_alloc_try(
 			stack_lock();
 			stack = stack_free_list;
 			if (stack != 0) {
+				STACK_ZINFO_PALLOC(thread);
 				stack_free_list = stack_next(stack);
 				stack_free_count--;
 				stack_free_delta--;
@@ -360,14 +441,23 @@ __unused void		*arg)
 }
 
 void
-stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
-		     vm_size_t *alloc_size, int *collectable, int *exhaustable)
+stack_fake_zone_init(int zone_index)
+{
+	stack_fake_zone_index = zone_index;
+}
+
+void
+stack_fake_zone_info(int *count, 
+		     vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
+		     uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
 {
 	unsigned int	total, hiwat, free;
+	unsigned long long all;
 	spl_t			s;
 
 	s = splsched();
 	stack_lock();
+	all = stack_allocs;
 	total = stack_total;
 	hiwat = stack_hiwat;
 	free = stack_free_count;
@@ -379,8 +469,11 @@ stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_si
 	*max_size   = kernel_stack_size * hiwat;
 	*elem_size  = kernel_stack_size;
 	*alloc_size = kernel_stack_size;
+	*sum_size = all * kernel_stack_size;
+
 	*collectable = 1;
 	*exhaustable = 0;
+	*caller_acct = 1;
 }
 
 /* OBSOLETE */
diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c
index fb673da76..47290e3d8 100644
--- a/osfmk/kern/startup.c
+++ b/osfmk/kern/startup.c
@@ -105,6 +105,7 @@
 #include <machine/pmap.h>
 #include <machine/commpage.h>
 #include <libkern/version.h>
+#include <sys/kdebug.h>
 
 #if MACH_KDP
 #include <kdp/kdp.h>
@@ -118,11 +119,6 @@
 #include <pmc/pmc.h>
 #endif
 
-#ifdef __ppc__
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#endif
-
 static void		kernel_bootstrap_thread(void);
 
 static void		load_context(
@@ -135,36 +131,58 @@ extern void cpu_physwindow_init(int);
 // libkern/OSKextLib.cpp
 extern void	OSKextRemoveKextBootstrap(void);
 
-void srv_setup(void);
-extern void bsd_srv_setup(int);
+void scale_setup(void);
+extern void bsd_scale_setup(int);
 extern unsigned int semaphore_max;
 
-
 /*
  *	Running in virtual memory, on the interrupt stack.
  */
 
-extern int srv;
+extern int serverperfmode;
+
+/* size of kernel trace buffer, disabled by default */
+unsigned int new_nkdbufs = 0;
+
+/* mach leak logging */
+int log_leaks = 0;
+int turn_on_log_leaks = 0;
+
+
+void
+kernel_early_bootstrap(void)
+{
+
+	lck_mod_init();
+
+	/*
+	 * Initialize the timer callout world
+	 */
+	timer_call_initialize();
+}
+
 
 void
 kernel_bootstrap(void)
 {
 	kern_return_t	result;
-	thread_t		thread;
+	thread_t	thread;
+	char		namep[16];
 
 	printf("%s\n", version); /* log kernel version */
 
 #define kernel_bootstrap_kprintf(x...) /* kprintf("kernel_bootstrap: " x) */
 
-	/* i386_vm_init already checks for this ; do it aagin anyway */
-        if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) {
-                srv = 1;
-        }
+	if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */
+		turn_on_log_leaks = 1;
 
-	srv_setup();
+	PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
 
-	kernel_bootstrap_kprintf("calling lck_mod_init\n");
-	lck_mod_init();
+	/* i386_vm_init already checks for this ; do it aagin anyway */
+        if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) {
+                serverperfmode = 1;
+        }
+	scale_setup();
 
 	kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n");
 	vm_mem_bootstrap();
@@ -232,6 +250,13 @@ kernel_bootstrap(void)
 	thread->state = TH_RUN;
 	thread_deallocate(thread);
 
+	/* transfer statistics from init thread to kernel */
+	thread_t init_thread = current_thread();
+	kernel_task->tkm_private.alloc = init_thread->tkm_private.alloc;
+	kernel_task->tkm_private.free = init_thread->tkm_private.free;
+	kernel_task->tkm_shared.alloc = init_thread->tkm_shared.alloc;
+	kernel_task->tkm_shared.free = init_thread->tkm_shared.free;
+
 	kernel_bootstrap_kprintf("calling load_context - done\n");
 	load_context(thread);
 	/*NOTREACHED*/
@@ -264,6 +289,18 @@ kernel_bootstrap_thread(void)
 	kernel_bootstrap_thread_kprintf("calling sched_startup\n");
 	sched_startup();
 
+	/*
+	 * Thread lifecycle maintenance (teardown, stack allocation)
+	 */
+	kernel_bootstrap_thread_kprintf("calling thread_daemon_init\n");
+	thread_daemon_init();
+	
+	/*
+	 * Thread callout service.
+	 */
+	kernel_bootstrap_thread_kprintf("calling thread_call_initialize\n");
+	thread_call_initialize();
+	
 	/*
 	 * Remain on current processor as
 	 * additional processors come online.
@@ -307,6 +344,14 @@ kernel_bootstrap_thread(void)
 	pmc_bootstrap();
 #endif
 
+#if (defined(__i386__) || defined(__x86_64__))
+	if (turn_on_log_leaks && !new_nkdbufs)
+		new_nkdbufs = 200000;
+	start_kern_tracing(new_nkdbufs);
+	if (turn_on_log_leaks)
+		log_leaks = 1;
+#endif
+
 #ifdef	IOKIT
 	PE_init_iokit();
 #endif
@@ -324,6 +369,14 @@ kernel_bootstrap_thread(void)
 	cpu_userwindow_init(0);
 #endif
 
+#if (!defined(__i386__) && !defined(__x86_64__))
+	if (turn_on_log_leaks && !new_nkdbufs)
+		new_nkdbufs = 200000;
+	start_kern_tracing(new_nkdbufs);
+	if (turn_on_log_leaks)
+		log_leaks = 1;
+#endif
+
 	/*
 	 *	Initialize the shared region module.
 	 */
@@ -459,6 +512,7 @@ load_context(
 
 	processor->active_thread = thread;
 	processor->current_pri = thread->sched_pri;
+	processor->current_thmode = thread->sched_mode;
 	processor->deadline = UINT64_MAX;
 	thread->last_processor = processor;
 
@@ -477,23 +531,32 @@ load_context(
 }
 
 void
-srv_setup()
+scale_setup()
 {
 	int scale = 0;
 #if defined(__LP64__)
-	/* if memory is more than 16G, then apply rules for processes */
-	if ((srv != 0) && ((uint64_t)sane_size >= (uint64_t)(16 * 1024 * 1024 *1024ULL))) {
+	typeof(task_max) task_max_base = task_max;
+
+	/* Raise limits for servers with >= 16G */
+	if ((serverperfmode != 0) && ((uint64_t)sane_size >= (uint64_t)(16 * 1024 * 1024 *1024ULL))) {
 		scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 *1024ULL));
 		/* limit to 128 G */
 		if (scale > 16)
 			scale = 16;
-		task_max = 2500 * scale;
+		task_max_base = 2500;
+	} else if ((uint64_t)sane_size >= (uint64_t)(3 * 1024 * 1024 *1024ULL))
+		scale = 2;
+
+	task_max = MAX(task_max, task_max_base * scale);
+
+	if (scale != 0) {
 		task_threadmax = task_max;
-		thread_max = task_max * 5;
-	} else
-		scale = 0;
+		thread_max = task_max * 5; 
+	}
+
 #endif
-	bsd_srv_setup(scale);
+
+	bsd_scale_setup(scale);
 	
 	ipc_space_max = SPACE_MAX;
 	ipc_tree_entry_max = ITE_MAX;
diff --git a/osfmk/kern/startup.h b/osfmk/kern/startup.h
index bb60c7d40..7c239784f 100644
--- a/osfmk/kern/startup.h
+++ b/osfmk/kern/startup.h
@@ -42,6 +42,7 @@ __BEGIN_DECLS
  */
 
 /* Initialize kernel */
+extern void kernel_early_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 extern void kernel_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 
 /* Initialize machine dependent stuff */
diff --git a/osfmk/kern/sync_lock.c b/osfmk/kern/sync_lock.c
index 174381f5f..b69958ad7 100644
--- a/osfmk/kern/sync_lock.c
+++ b/osfmk/kern/sync_lock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -72,12 +72,10 @@
 	th = (ul)->holder;					\
         if ((th)->active) {					\
 		thread_mtx_lock(th);				\
-		remqueue(&th->held_ulocks,			\
-			 (queue_entry_t) (ul));			\
+		remqueue((queue_entry_t) (ul));		\
 		thread_mtx_unlock(th);				\
 	} else {						\
-		remqueue(&th->held_ulocks,			\
-			 (queue_entry_t) (ul));			\
+		remqueue((queue_entry_t) (ul));		\
 	}							\
 	(ul)->holder = THREAD_NULL;				\
 	MACRO_END
@@ -98,7 +96,7 @@
 #define lock_set_ownership_clear(ls, t)				\
 	MACRO_BEGIN						\
 	task_lock((t));						\
-	remqueue(&(t)->lock_set_list, (queue_entry_t) (ls));	\
+	remqueue((queue_entry_t) (ls));	\
 	(t)->lock_sets_owned--;					\
 	task_unlock((t));					\
 	MACRO_END
diff --git a/osfmk/kern/sync_sema.c b/osfmk/kern/sync_sema.c
index a072684ad..80ffb8199 100644
--- a/osfmk/kern/sync_sema.c
+++ b/osfmk/kern/sync_sema.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -241,7 +241,7 @@ semaphore_destroy(
 		task_unlock(task);
 		return KERN_INVALID_ARGUMENT;
 	}
-	remqueue(&task->semaphore_list, (queue_entry_t) semaphore);
+	remqueue((queue_entry_t) semaphore);
 	semaphore->owner = TASK_NULL;
 	task->semaphores_owned--;
 	task_unlock(task);
diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c
index 3daf1ec38..e45f99f17 100644
--- a/osfmk/kern/syscall_subr.c
+++ b/osfmk/kern/syscall_subr.c
@@ -115,7 +115,7 @@ swtch_continue(void)
 
     disable_preemption();
 	myprocessor = current_processor();
-	result = myprocessor->runq.count > 0 || rt_runq.count > 0;
+	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 	enable_preemption();
 
 	thread_syscall_return(result);
@@ -131,7 +131,7 @@ swtch(
 
 	disable_preemption();
 	myprocessor = current_processor();
-	if (myprocessor->runq.count == 0 &&	rt_runq.count == 0) {
+	if (SCHED(processor_queue_empty)(myprocessor) &&	rt_runq.count == 0) {
 		mp_enable_preemption();
 
 		return (FALSE);
@@ -144,7 +144,7 @@ swtch(
 
 	disable_preemption();
 	myprocessor = current_processor();
-	result = myprocessor->runq.count > 0 || rt_runq.count > 0;
+	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 	enable_preemption();
 
 	return (result);
@@ -160,7 +160,7 @@ swtch_pri_continue(void)
 
     disable_preemption();
 	myprocessor = current_processor();
-	result = myprocessor->runq.count > 0 || rt_runq.count > 0;
+	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 	mp_enable_preemption();
 
 	thread_syscall_return(result);
@@ -176,7 +176,7 @@ __unused	struct swtch_pri_args *args)
 
 	disable_preemption();
 	myprocessor = current_processor();
-	if (myprocessor->runq.count == 0 && rt_runq.count == 0) {
+	if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
 		mp_enable_preemption();
 
 		return (FALSE);
@@ -185,7 +185,7 @@ __unused	struct swtch_pri_args *args)
 
 	counter(c_swtch_pri_block++);
 
-	thread_depress_abstime(std_quantum);
+	thread_depress_abstime(thread_depress_time);
 
 	thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD);
 
@@ -193,7 +193,7 @@ __unused	struct swtch_pri_args *args)
 
 	disable_preemption();
 	myprocessor = current_processor();
-	result = myprocessor->runq.count > 0 || rt_runq.count > 0;
+	result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 	enable_preemption();
 
 	return (result);
@@ -290,7 +290,7 @@ thread_switch(
 			thread->sched_pri < BASEPRI_RTQUEUES				&&
 			(thread->bound_processor == PROCESSOR_NULL	||
 			 thread->bound_processor == processor)				&&
-				run_queue_remove(thread)							) {
+				thread_run_queue_remove(thread)							) {
 			/*
 			 *	Hah, got it!!
 			 */
@@ -347,16 +347,16 @@ thread_depress_abstime(
 
     s = splsched();
     thread_lock(self);
-	if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
+	if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
 		processor_t		myprocessor = self->last_processor;
 
 		self->sched_pri = DEPRESSPRI;
 		myprocessor->current_pri = self->sched_pri;
-		self->sched_mode |= TH_MODE_DEPRESS;
+		self->sched_flags |= TH_SFLAG_DEPRESS;
 
 		if (interval != 0) {
 			clock_absolutetime_interval_to_deadline(interval, &deadline);
-			if (!timer_call_enter(&self->depress_timer, deadline))
+			if (!timer_call_enter(&self->depress_timer, deadline, 0))
 				self->depress_timer_active++;
 		}
 	}
@@ -389,8 +389,8 @@ thread_depress_expire(
     s = splsched();
     thread_lock(thread);
 	if (--thread->depress_timer_active == 0) {
-		thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
-		compute_priority(thread, FALSE);
+		thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
+		SCHED(compute_priority)(thread, FALSE);
 	}
     thread_unlock(thread);
     splx(s);
@@ -408,10 +408,10 @@ thread_depress_abort_internal(
 
     s = splsched();
     thread_lock(thread);
-	if (!(thread->sched_mode & TH_MODE_POLLDEPRESS)) {
-		if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
-			thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
-			compute_priority(thread, FALSE);
+	if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
+		if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+			thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
+			SCHED(compute_priority)(thread, FALSE);
 			result = KERN_SUCCESS;
 		}
 
@@ -433,7 +433,7 @@ thread_poll_yield(
 	assert(self == current_thread());
 
 	s = splsched();
-	if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) {
+	if (self->sched_mode == TH_MODE_FIXED) {
 		uint64_t			total_computation, abstime;
 
 		abstime = mach_absolute_time();
@@ -444,16 +444,16 @@ thread_poll_yield(
 			ast_t			preempt;
 
 			thread_lock(self);
-			if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
+			if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
 				self->sched_pri = DEPRESSPRI;
 				myprocessor->current_pri = self->sched_pri;
 			}
 			self->computation_epoch = abstime;
 			self->computation_metered = 0;
-			self->sched_mode |= TH_MODE_POLLDEPRESS;
+			self->sched_flags |= TH_SFLAG_POLLDEPRESS;
 
 			abstime += (total_computation >> sched_poll_yield_shift);
-			if (!timer_call_enter(&self->depress_timer, abstime))
+			if (!timer_call_enter(&self->depress_timer, abstime, 0))
 				self->depress_timer_active++;
 			thread_unlock(self);
 
@@ -473,7 +473,7 @@ thread_yield_internal(
 
 	disable_preemption();
 	myprocessor = current_processor();
-	if (myprocessor->runq.count == 0 && rt_runq.count == 0) {
+	if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
 		mp_enable_preemption();
 
 		return;
diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c
index 59a402aa3..7dc2d61fd 100644
--- a/osfmk/kern/syscall_sw.c
+++ b/osfmk/kern/syscall_sw.c
@@ -135,7 +135,11 @@ mach_trap_t	mach_trap_table[MACH_TRAP_TABLE_COUNT] = {
 /* 40 */	MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 41 */	MACH_TRAP(kern_invalid, 0, NULL, NULL),
 /* 42 */	MACH_TRAP(kern_invalid, 0, NULL, NULL),
+#if	!defined(CONFIG_EMBEDDED)
 /* 43 */	MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd),
+#else
+/* 43 */	MACH_TRAP(kern_invalid, 0, NULL, NULL),
+#endif	/* !defined(CONFIG_EMBEDDED) */
 /* 44 */	MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd),
 /* 45 */ 	MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd),
 /* 46 */	MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd),
diff --git a/osfmk/kern/syscall_sw.h b/osfmk/kern/syscall_sw.h
index c6259eeb0..d186546d5 100644
--- a/osfmk/kern/syscall_sw.h
+++ b/osfmk/kern/syscall_sw.h
@@ -71,17 +71,13 @@ typedef	void	mach_munge_t(const void *, void *);
 typedef struct {
 	int			mach_trap_arg_count;
 	int			(*mach_trap_function)(void);
-#if defined(__i386__)
-	boolean_t		mach_trap_stack;
-#else
+#if 0 /* no active architectures use mungers for mach traps */
 	mach_munge_t		*mach_trap_arg_munge32; /* system call arguments for 32-bit */
 	mach_munge_t		*mach_trap_arg_munge64; /* system call arguments for 64-bit */
 #endif
-#if	!MACH_ASSERT
-	int			mach_trap_unused;
-#else
+#if	MACH_ASSERT
 	const char*		mach_trap_name;
-#endif /* !MACH_ASSERT */
+#endif /* MACH_ASSERT */
 } mach_trap_t;
 
 #define MACH_TRAP_TABLE_COUNT   128
@@ -90,23 +86,16 @@ typedef struct {
 extern mach_trap_t		mach_trap_table[];
 extern int			mach_trap_count;
 
-#if defined(__i386__)
-#if	!MACH_ASSERT
-#define	MACH_TRAP(name, arg_count, munge32, munge64)	\
-		{ (arg_count), (int (*)(void)) (name), FALSE, 0 }
-#else
-#define MACH_TRAP(name, arg_count, munge32, munge64)		\
-		{ (arg_count), (int (*)(void)) (name), FALSE, #name }
-#endif /* !MACH_ASSERT */
-#else  /* !defined(__i386__) */
+#if defined(__i386__) || defined(__x86_64__)
 #if	!MACH_ASSERT
 #define	MACH_TRAP(name, arg_count, munge32, munge64)	\
-		{ (arg_count), (int (*)(void)) (name), (munge32), (munge64), 0 }
+		{ (arg_count), (int (*)(void)) (name)  }
 #else
 #define MACH_TRAP(name, arg_count, munge32, munge64)		\
-  		{ (arg_count), (int (*)(void)) (name), (munge32), (munge64), #name }
+		{ (arg_count), (int (*)(void)) (name), #name }
 #endif /* !MACH_ASSERT */
-
-#endif /* !defined(__i386__) */
+#else  /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */
+#error Unsupported architecture
+#endif /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */
 
 #endif	/* _KERN_SYSCALL_SW_H_ */
diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c
index c5efca2a7..985f3c144 100644
--- a/osfmk/kern/task.c
+++ b/osfmk/kern/task.c
@@ -131,12 +131,6 @@
 #include <ddb/db_sym.h>
 #endif	/* MACH_KDB */
 
-#ifdef __ppc__
-#include <ppc/exception.h>
-#include <ppc/hw_perfmon.h>
-#endif
-
-
 /*
  * Exported interfaces
  */
@@ -163,8 +157,14 @@ lck_attr_t      task_lck_attr;
 lck_grp_t       task_lck_grp;
 lck_grp_attr_t  task_lck_grp_attr;
 
+zinfo_usage_store_t tasks_tkm_private;
+zinfo_usage_store_t tasks_tkm_shared;
+
 int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
 
+/* externs for BSD kernel */
+extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
+
 /* Forwards */
 
 void		task_hold_locked(
@@ -226,17 +226,6 @@ task_set_64bit(
 				     (vm_map_offset_t) VM_MAX_ADDRESS,
 				     MACH_VM_MAX_ADDRESS,
 				     0);
-#ifdef __ppc__
-		/*
-		 * PPC51: ppc64 is limited to 51-bit addresses.
-		 * Memory mapped above that limit is handled specially
-		 * at the pmap level, so let pmap clean the commpage mapping
-		 * explicitly...
-		 */
-		pmap_unmap_sharedpage(task->map->pmap);	/* Unmap commpage */
-		/* ... and avoid regular pmap cleanup */
-		vm_flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
-#endif /* __ppc__ */
 		/* remove the higher VM mappings */
 		(void) vm_map_remove(task->map,
 				     MACH_VM_MAX_ADDRESS,
@@ -285,6 +274,7 @@ task_init(void)
 			task_max * sizeof(struct task),
 			TASK_CHUNK * sizeof(struct task),
 			"tasks");
+
 	zone_change(task_zone, Z_NOENCRYPT, TRUE);
 
 	/*
@@ -409,6 +399,13 @@ task_create_internal(
 	new_task->taskFeatures[0] = 0;				/* Init task features */
 	new_task->taskFeatures[1] = 0;				/* Init task features */
 
+	new_task->tkm_private.alloc = 0;
+	new_task->tkm_private.free = 0;
+	new_task->tkm_shared.alloc = 0;
+	new_task->tkm_shared.free = 0;
+
+	zinfo_task_init(new_task);
+
 #ifdef MACH_BSD
 	new_task->bsd_info = NULL;
 #endif /* MACH_BSD */
@@ -416,12 +413,8 @@ task_create_internal(
 #if defined(__i386__) || defined(__x86_64__)
 	new_task->i386_ldt = 0;
 	new_task->task_debug = NULL;
-
 #endif
 
-#ifdef __ppc__
-	if(BootProcInfo.pf.Available & pf64Bit) new_task->taskFeatures[0] |= tf64BitData;	/* If 64-bit machine, show we have 64-bit registers at least */
-#endif
 
 	queue_init(&new_task->semaphore_list);
 	queue_init(&new_task->lock_set_list);
@@ -473,6 +466,16 @@ task_create_internal(
 			task_affinity_create(parent_task, new_task);
 
 		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
+		new_task->policystate = parent_task->policystate;
+		/* inherit the self action state */
+		new_task->actionstate = parent_task->actionstate;
+		new_task->ext_policystate = parent_task->ext_policystate;
+#if NOTYET
+		/* till the child lifecycle is cleared do not inherit external action */
+		new_task->ext_actionstate = parent_task->ext_actionstate;
+#else
+		new_task->ext_actionstate = default_task_null_policy;
+#endif
 	}
 	else {
 		new_task->sec_token = KERNEL_SECURITY_TOKEN;
@@ -483,8 +486,14 @@ task_create_internal(
 		if(is_64bit)
 			task_set_64BitAddr(new_task);
 #endif
+		new_task->all_image_info_addr = (mach_vm_address_t)0;
+		new_task->all_image_info_size = (mach_vm_size_t)0;
 
 		new_task->pset_hint = PROCESSOR_SET_NULL;
+		new_task->policystate = default_task_proc_policy;
+		new_task->ext_policystate = default_task_proc_policy;
+		new_task->actionstate = default_task_null_policy;
+		new_task->ext_actionstate = default_task_null_policy;
 	}
 
 	if (kernel_task == TASK_NULL) {
@@ -495,6 +504,8 @@ task_create_internal(
 		new_task->priority = BASEPRI_DEFAULT;
 		new_task->max_priority = MAXPRI_USER;
 	}
+
+	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
 	
 	lck_mtx_lock(&tasks_threads_lock);
 	queue_enter(&tasks, new_task, task_t, tasks);
@@ -525,6 +536,10 @@ task_deallocate(
 	if (task_deallocate_internal(task) > 0)
 		return;
 
+	lck_mtx_lock(&tasks_threads_lock);
+	queue_remove(&terminated_tasks, task, task_t, tasks);
+	lck_mtx_unlock(&tasks_threads_lock);
+
 	ipc_task_terminate(task);
 
 	if (task->affinity_space)
@@ -538,6 +553,11 @@ task_deallocate(
 #if CONFIG_MACF_MACH
 	labelh_release(task->label);
 #endif
+	OSAddAtomic64(task->tkm_private.alloc, (int64_t *)&tasks_tkm_private.alloc);
+	OSAddAtomic64(task->tkm_private.free, (int64_t *)&tasks_tkm_private.free);
+	OSAddAtomic64(task->tkm_shared.alloc, (int64_t *)&tasks_tkm_shared.alloc);
+	OSAddAtomic64(task->tkm_shared.free, (int64_t *)&tasks_tkm_shared.free);
+	zinfo_task_free(task);
 	zfree(task_zone, task);
 }
 
@@ -603,9 +623,9 @@ task_terminate_internal(
 		task_lock(task);
 	}
 
-	if (!task->active || !self->active) {
+	if (!task->active) {
 		/*
-		 *	Task or current act is already being terminated.
+		 *	Task is already being terminated.
 		 *	Just return an error. If we are dying, this will
 		 *	just get us to our AST special handler and that
 		 *	will get us to finalize the termination of ourselves.
@@ -665,13 +685,6 @@ task_terminate_internal(
 	 */
 	ipc_space_destroy(task->itk_space);
 
-#ifdef __ppc__
-	/*
-	 * PPC51: ppc64 is limited to 51-bit addresses.
-	 */
-	pmap_unmap_sharedpage(task->map->pmap);		/* Unmap commpage */
-#endif /* __ppc__ */
-
 	if (vm_map_has_4GB_pagezero(task->map))
 		vm_map_clear_4GB_pagezero(task->map);
 
@@ -693,6 +706,7 @@ task_terminate_internal(
 
 	lck_mtx_lock(&tasks_threads_lock);
 	queue_remove(&tasks, task, task_t, tasks);
+	queue_enter(&terminated_tasks, task, task_t, tasks);
 	tasks_count--;
 	lck_mtx_unlock(&tasks_threads_lock);
 
@@ -702,10 +716,6 @@ task_terminate_internal(
 	 */
 	thread_interrupt_level(interrupt_save);
 
-#if __ppc__
-    perfmon_release_facility(task); // notify the perfmon facility
-#endif
-
 	/*
 	 * Get rid of the task active reference on itself.
 	 */
@@ -1162,8 +1172,9 @@ task_resume(
 	}
 
 	if (task->user_stop_count > 0) {
-		if (--task->user_stop_count == 0)
+		if (--task->user_stop_count == 0) {
 			release = TRUE;
+		}
 	}
 	else {
 		task_unlock(task);
@@ -1182,6 +1193,60 @@ task_resume(
 	return (KERN_SUCCESS);
 }
 
+#if CONFIG_FREEZE
+
+/*
+ *	task_freeze:
+ *
+ *	Freeze a currently suspended task.
+ *
+ * Conditions:
+ * 	The caller holds a reference to the task
+ */
+kern_return_t
+task_freeze(
+	register task_t    task,
+	uint32_t           *purgeable_count,
+	uint32_t           *wired_count,
+	uint32_t           *clean_count,
+	uint32_t           *dirty_count,
+	boolean_t          *shared,
+	boolean_t          walk_only)
+{
+	if (task == TASK_NULL || task == kernel_task)
+		return (KERN_INVALID_ARGUMENT);
+
+	if (walk_only) {
+		vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, shared);		
+	} else {
+		vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, shared);
+	}
+
+	return (KERN_SUCCESS);
+}
+
+/*
+ *	task_thaw:
+ *
+ *	Thaw a currently frozen task.
+ *
+ * Conditions:
+ * 	The caller holds a reference to the task
+ */
+kern_return_t
+task_thaw(
+	register task_t		task)
+{
+	if (task == TASK_NULL || task == kernel_task)
+		return (KERN_INVALID_ARGUMENT);
+
+	vm_map_thaw(task->map);
+
+	return (KERN_SUCCESS);
+}
+
+#endif /* CONFIG_FREEZE */
+
 kern_return_t
 host_security_set_task_token(
         host_security_t  host_security,
@@ -1439,17 +1504,126 @@ task_info(
 	{
 		task_dyld_info_t info;
 
-		if (*task_info_count < TASK_DYLD_INFO_COUNT) {
+		/*
+		 * We added the format field to TASK_DYLD_INFO output.  For
+		 * temporary backward compatibility, accept the fact that
+		 * clients may ask for the old version - distinquished by the
+		 * size of the expected result structure.
+		 */
+#define TASK_LEGACY_DYLD_INFO_COUNT \
+		offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
+
+		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
 			error = KERN_INVALID_ARGUMENT;
 			break;
 		}
+
 		info = (task_dyld_info_t)task_info_out;
 		info->all_image_info_addr = task->all_image_info_addr;
 		info->all_image_info_size = task->all_image_info_size;
-		*task_info_count = TASK_DYLD_INFO_COUNT;
+
+		/* only set format on output for those expecting it */
+		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
+			info->all_image_info_format = task_has_64BitAddr(task) ?
+				                 TASK_DYLD_ALL_IMAGE_INFO_64 : 
+				                 TASK_DYLD_ALL_IMAGE_INFO_32 ;
+			*task_info_count = TASK_DYLD_INFO_COUNT;
+		} else {
+			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
+		}
 		break;
 	}
 
+	case TASK_EXTMOD_INFO:
+	{
+		task_extmod_info_t info;
+		void *p;
+
+		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
+			error = KERN_INVALID_ARGUMENT;
+			break;
+		}
+
+		info = (task_extmod_info_t)task_info_out;
+
+		p = get_bsdtask_info(task);
+		if (p) {
+			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
+		} else {
+			bzero(info->task_uuid, sizeof(info->task_uuid));
+		}
+		info->extmod_statistics = task->extmod_statistics;
+		*task_info_count = TASK_EXTMOD_INFO_COUNT;
+
+		break;
+	}
+
+	case TASK_KERNELMEMORY_INFO:
+	{
+		task_kernelmemory_info_t	tkm_info;
+		thread_t			thread;
+
+		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
+		   error = KERN_INVALID_ARGUMENT;
+		   break;
+		}
+
+		tkm_info = (task_kernelmemory_info_t) task_info_out;
+
+		if (task == kernel_task) {
+			/*
+			 * All shared allocs/frees from other tasks count against
+			 * the kernel private memory usage.  If we are looking up
+			 * info for the kernel task, gather from everywhere.
+			 */
+			task_unlock(task);
+
+			/* start by accounting for all the terminated tasks against the kernel */
+			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
+			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
+			tkm_info->total_salloc = 0;
+			tkm_info->total_sfree = 0;
+
+			/* count all other task/thread shared alloc/free against the kernel */
+			lck_mtx_lock(&tasks_threads_lock);
+			queue_iterate(&tasks, task, task_t, tasks) {
+				if (task == kernel_task) {
+					tkm_info->total_palloc += task->tkm_private.alloc;
+					tkm_info->total_pfree += task->tkm_private.free;
+				}
+				tkm_info->total_palloc += task->tkm_shared.alloc;
+				tkm_info->total_pfree += task->tkm_shared.free;
+			}
+			queue_iterate(&threads, thread, thread_t, threads) {
+				if (thread->task == kernel_task) {
+					tkm_info->total_palloc += thread->tkm_private.alloc;
+					tkm_info->total_pfree += thread->tkm_private.free;
+				}
+				tkm_info->total_palloc += thread->tkm_shared.alloc;
+				tkm_info->total_pfree += thread->tkm_shared.free;
+			}
+			lck_mtx_unlock(&tasks_threads_lock);
+		} else {
+			/* account for all the terminated threads in the process */
+			tkm_info->total_palloc = task->tkm_private.alloc;
+			tkm_info->total_pfree = task->tkm_private.free;
+			tkm_info->total_salloc = task->tkm_shared.alloc;
+			tkm_info->total_sfree = task->tkm_shared.free;
+
+			/* then add in all the running threads */
+			queue_iterate(&task->threads, thread, thread_t, task_threads) {
+				tkm_info->total_palloc += thread->tkm_private.alloc;
+				tkm_info->total_pfree += thread->tkm_private.free;
+				tkm_info->total_salloc += thread->tkm_shared.alloc;
+				tkm_info->total_sfree += thread->tkm_shared.free;
+			}
+			task_unlock(task);
+		}
+
+		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
+		return KERN_SUCCESS;
+	}
+
 	/* OBSOLETE */
 	case TASK_SCHED_FIFO_INFO:
 	{
@@ -1460,12 +1634,15 @@ task_info(
 		}
 
 		error = KERN_INVALID_POLICY;
+		break;
 	}
 
 	/* OBSOLETE */
 	case TASK_SCHED_RR_INFO:
 	{
 		register policy_rr_base_t	rr_base;
+		uint32_t quantum_time;
+		uint64_t quantum_ns;
 
 		if (*task_info_count < POLICY_RR_BASE_COUNT) {
 			error = KERN_INVALID_ARGUMENT;
@@ -1481,7 +1658,10 @@ task_info(
 
 		rr_base->base_priority = task->priority;
 
-		rr_base->quantum = std_quantum_us / 1000;
+		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
+		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
+		
+		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
 
 		*task_info_count = POLICY_RR_BASE_COUNT;
 		break;
@@ -1546,6 +1726,7 @@ task_info(
             
 	case TASK_SCHED_INFO:
 		error = KERN_INVALID_ARGUMENT;
+		break;
 
 	case TASK_EVENTS_INFO:
 	{
@@ -1571,7 +1752,9 @@ task_info(
 		events_info->csw = task->c_switch;
 
 		queue_iterate(&task->threads, thread, thread_t, task_threads) {
-			events_info->csw += thread->c_switch;
+			events_info->csw	   += thread->c_switch;
+			events_info->syscalls_mach += thread->syscalls_mach;
+			events_info->syscalls_unix += thread->syscalls_unix;
 		}
 
 
@@ -1586,8 +1769,8 @@ task_info(
 		}
 
 		error = task_affinity_info(task, task_info_out, task_info_count);
+		break;
 	}
-
 	default:
 		error = KERN_INVALID_ARGUMENT;
 	}
@@ -1942,6 +2125,24 @@ task_reference(
 		task_reference_internal(task);
 }
 
+/* 
+ * This routine is called always with task lock held.
+ * And it returns a thread handle without reference as the caller
+ * operates on it under the task lock held.
+ */
+thread_t
+task_findtid(task_t task, uint64_t tid)
+{
+	thread_t thread= THREAD_NULL;
+
+	queue_iterate(&task->threads, thread, thread_t, task_threads) {
+			if (thread->thread_id == tid)
+				break;
+	}
+	return(thread);
+}
+
+
 #if CONFIG_MACF_MACH
 /*
  * Protect 2 task labels against modification by adding a reference on
diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h
index 0e7ea86e2..af0482aca 100644
--- a/osfmk/kern/task.h
+++ b/osfmk/kern/task.h
@@ -104,15 +104,117 @@
 #include <mach/mach_param.h>
 #include <mach/task_info.h>
 #include <mach/exception_types.h>
+#include <mach/vm_statistics.h>
 #include <machine/task.h>
 
 #include <kern/cpu_data.h>
 #include <kern/queue.h>
 #include <kern/exception.h>
 #include <kern/lock.h>
-#include <kern/thread.h>
 #include <security/_label.h>
 #include <ipc/ipc_labelh.h>
+#endif /* MACH_KERNEL_PRIVATE */
+
+#ifdef XNU_KERNEL_PRIVATE
+
+/* defns for task->rsu_controldata */
+#define TASK_POLICY_CPU_RESOURCE_USAGE		0
+#define TASK_POLICY_WIREDMEM_RESOURCE_USAGE	1
+#define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE	2
+#define TASK_POLICY_DISK_RESOURCE_USAGE		3
+#define TASK_POLICY_NETWORK_RESOURCE_USAGE	4
+#define TASK_POLICY_POWER_RESOURCE_USAGE	5
+
+#define TASK_POLICY_RESOURCE_USAGE_COUNT 6
+
+/*
+ * Process Action and Policy bit definitions 
+
+The bit defns of the policy states 
+64   60    56   52   48   44   40   36   32   28   24   20   16   12   8        0
+|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| 
+|RFU | RFU | PWR| NET| DSK| CPU| VM | WM | LVM| RFU| CPU| NET| GPU| DSK| BGRND  |
+|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| 
+|<-----------   RESOURCE USAGE  -------->|< LOWSRC>|<-HARDWARE ACCESS->|BackGrnd|     
+|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| 
+
+*
+*/
+
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE		0x00
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_LOWPRI		0x01
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE	0x02
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NETTHROTTLE	0x04
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU		0x08
+#if CONFIG_EMBEDDED
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL		0x0F
+#else /* CONFIG_EMBEDDED */
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL		0x07
+#endif /* CONFIG_EMBEDDED */
+#define TASK_POLICY_BACKGROUND_ATTRIBUTE_DEFAULT	TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL
+
+/* Hardware disk access attributes, bit different as it should reflect IOPOL_XXX */
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NONE	0x00
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL	0x01
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_PASSIVE	0x02
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE	0x03
+#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_DEFAULT	TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL
+
+/* Hardware disk access attributes */
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE		0x00
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NORMAL	0x00
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS	0x00
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS	0x01
+#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT	0x00
+
+/* Hardware Network access attributes */
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NONE		0x00
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NORMAL	0x00
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_THROTTLE	0x01
+#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_DEFAULT	0x00
+
+/* Hardware CPU access attributes */
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_NONE		0x00
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_NORMAL	0x00
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ALL		0x00
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ONE		0x01
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_LLCACHE	0x02
+#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_DEFAULT	0x00
+
+/* Resource usage/low resource attributes */
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_NONE		0x00
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE		0x01
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND 		0x02
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE	0x03
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY		0x04
+#define TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT		0x00
+
+#endif /* XNU_KERNEL_PRIVATE */
+
+#ifdef MACH_KERNEL_PRIVATE
+
+typedef struct process_policy {
+	uint64_t  apptype:4,
+		  rfu1:4,
+		  ru_power:4,	/* Resource Usage Power */
+		  ru_net:4,	/* Resource Usage Network */
+		  ru_disk:4,	/* Resource Usage Disk */
+		  ru_cpu:4,	/* Resource Usage CPU */
+		  ru_virtmem:4,	/* Resource Usage VM */
+		  ru_wiredmem:4,/* Resource Usage Wired Memory */
+		  low_vm:4,	/* Low Virtual Memory */
+		  rfu2:4,
+		  hw_cpu:4,	/* HW Access to CPU */
+		  hw_net:4,	/* HW Access to Network */
+		  hw_gpu:4,	/* HW Access to GPU */
+		  hw_disk:4,	/* HW Access to Disk */
+		  hw_bg:8;	/* Darwin Background Policy */
+} process_policy_t;
+
+#include <kern/thread.h>
+
+extern process_policy_t default_task_proc_policy;	/* init value for the process policy attributes */
+extern process_policy_t default_task_null_policy;		/* none as the value for the process policy attributes */
 
 struct task {
 	/* Synchronization/destruction information */
@@ -193,9 +295,14 @@ struct task {
         integer_t messages_received;   /* messages received counter */
         integer_t syscalls_mach;       /* mach system call counter */
         integer_t syscalls_unix;       /* unix system call counter */
-		uint32_t  c_switch;			   /* total context switches */
-		uint32_t  p_switch;			   /* total processor switches */
-		uint32_t  ps_switch;		   /* total pset switches */
+	uint32_t  c_switch;			   /* total context switches */
+	uint32_t  p_switch;			   /* total processor switches */
+	uint32_t  ps_switch;		   /* total pset switches */
+
+	zinfo_usage_store_t tkm_private;/* private kmem alloc/free stats (reaped threads) */
+	zinfo_usage_store_t tkm_shared; /* shared kmem alloc/free stats (reaped threads) */
+	zinfo_usage_t tkm_zinfo;	/* per-task, per-zone usage statistics */
+
 #ifdef  MACH_BSD 
 	void *bsd_info;
 #endif  
@@ -221,6 +328,14 @@ struct task {
 	uint32_t t_chud;		/* CHUD flags, used for Shark */
 #endif
 
+	process_policy_t ext_actionstate;	/* externally applied actions */
+	process_policy_t ext_policystate;	/* externally defined process policy states*/
+	process_policy_t actionstate;		/* self applied acions */
+	process_policy_t policystate;		/* process wide policy states */
+
+	uint64_t rsu_controldata[TASK_POLICY_RESOURCE_USAGE_COUNT];
+
+	vm_extmod_statistics_data_t	extmod_statistics;
 };
 
 #define task_lock(task)		lck_mtx_lock(&(task)->lock)
@@ -293,6 +408,24 @@ extern kern_return_t	task_hold(
 extern kern_return_t	task_release(
 							task_t		task);
 
+#if CONFIG_FREEZE
+
+/* Freeze a task's resident pages */
+extern kern_return_t	task_freeze(
+							task_t		task,
+							uint32_t	*purgeable_count,
+							uint32_t	*wired_count,
+							uint32_t	*clean_count,
+							uint32_t	*dirty_count,
+							boolean_t	*shared,
+							boolean_t	walk_only);
+
+/* Thaw a currently frozen task */
+extern kern_return_t	task_thaw(
+							task_t		task);
+
+#endif /* CONFIG_FREEZE */
+
 /* Halt all other threads in the current task */
 extern kern_return_t	task_start_halt(
 							task_t		task);
@@ -352,7 +485,7 @@ extern int get_task_numactivethreads(task_t task);
 /* JMM - should just be temporary (implementation in bsd_kern still) */
 extern void	set_bsdtask_info(task_t,void *);
 extern vm_map_t get_task_map_reference(task_t);
-extern vm_map_t	swap_task_map(task_t, thread_t, vm_map_t);
+extern vm_map_t	swap_task_map(task_t, thread_t, vm_map_t, boolean_t);
 extern pmap_t	get_task_pmap(task_t);
 extern uint64_t	get_task_resident_size(task_t);
 
@@ -373,6 +506,74 @@ extern kern_return_t machine_task_set_state(
 					mach_msg_type_number_t state_count);
 
 
+int proc_get_task_bg_policy(task_t task);
+int proc_get_thread_bg_policy(task_t task, uint64_t tid);
+int proc_get_self_isbackground(void);
+int proc_get_selfthread_isbackground(void);
+
+int proc_get_darwinbgstate(task_t, uint32_t *);
+int proc_set_bgtaskpolicy(task_t task, int intval);
+int proc_set1_bgtaskpolicy(task_t task, int intval);
+int proc_set_bgthreadpolicy(task_t task, uint64_t tid, int val);
+int proc_set1_bgthreadpolicy(task_t task, uint64_t tid, int val);
+
+int proc_add_bgtaskpolicy(task_t task, int val);
+int proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val);
+int proc_remove_bgtaskpolicy(task_t task, int policy);
+int proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val);
+
+int proc_apply_bgtaskpolicy(task_t task);
+int proc_apply_bgtaskpolicy_external(task_t task);
+int proc_apply_bgtaskpolicy_internal(task_t task);
+int proc_apply_bgthreadpolicy(task_t task, uint64_t tid);
+int proc_apply_bgtask_selfpolicy(void);
+int proc_apply_bgthread_selfpolicy(void);
+int proc_apply_workq_bgthreadpolicy(thread_t);
+
+int proc_restore_bgtaskpolicy(task_t task);
+int proc_restore_bgthreadpolicy(task_t task, uint64_t tid);
+int proc_restore_bgthread_selfpolicy(void);
+int proc_restore_workq_bgthreadpolicy(thread_t);
+
+/* hw access routines */
+int proc_apply_task_diskacc(task_t task, int policy);
+int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy);
+int proc_apply_thread_selfdiskacc(int policy);
+int proc_get_task_disacc(task_t task);
+int proc_get_task_selfdiskacc(void);
+int proc_get_thread_selfdiskacc(void);
+int proc_denyinherit_policy(task_t task);
+int proc_denyselfset_policy(task_t task);
+
+int proc_get_task_selfgpuacc_deny(void);
+int proc_apply_task_gpuacc(task_t task, int prio);
+
+int proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep);
+int proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline);
+thread_t task_findtid(task_t, uint64_t);
+
+#define PROC_POLICY_OSX_APPTYPE_NONE		0
+#define PROC_POLICY_OSX_APPTYPE_TAL		1
+#define PROC_POLICY_OSX_APPTYPE_WIDGET		2
+#define PROC_POLICY_OSX_APPTYPE_DBCLIENT	2	/* Not a bug, just rename of widget */
+#define PROC_POLICY_IOS_APPTYPE			3
+#define PROC_POLICY_IOS_NONUITYPE		4
+
+void proc_set_task_apptype(task_t, int);
+int proc_disable_task_apptype(task_t task, int policy_subtype);
+int proc_enable_task_apptype(task_t task, int policy_subtype);
+
+/* resource handle callback */
+int task_action_cpuusage(task_t);
+
+/* BSD call back functions */
+extern int proc_apply_resource_actions(void * p, int type, int action);
+extern int proc_restore_resource_actions(void * p, int type, int action);
+extern int task_restore_resource_actions(task_t task, int type);
+
+extern void proc_apply_task_networkbg(void * bsd_info);
+extern void proc_restore_task_networkbg(void * bsd_info);
+extern void proc_set_task_networkbg(void * bsd_info, int setbg);
 #endif	/* XNU_KERNEL_PRIVATE */
 
 #ifdef	KERNEL_PRIVATE
diff --git a/osfmk/kern/task_policy.c b/osfmk/kern/task_policy.c
index d3395ddb4..e8f9bc628 100644
--- a/osfmk/kern/task_policy.c
+++ b/osfmk/kern/task_policy.c
@@ -31,6 +31,53 @@
 
 #include <kern/sched.h>
 #include <kern/task.h>
+#include <mach/thread_policy.h>
+#include <sys/errno.h>
+#include <sys/resource.h>
+#include <machine/limits.h>
+
+static int proc_apply_bgtaskpolicy_locked(task_t task, int, int);
+static int proc_restore_bgtaskpolicy_locked(task_t, int, int, int);
+static int task_get_cpuusage(task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep);
+static int task_set_cpuusage(task_t task, uint32_t percentage, uint64_t interval, uint64_t deadline);
+static int task_apply_resource_actions(task_t task, int type);
+static int proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset);
+static void restore_bgthreadpolicy_locked(thread_t thread, int selfset);
+
+process_policy_t default_task_proc_policy = {0,
+					     0,
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    0,
+					    TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ALL,
+					    TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NORMAL,
+					    TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS,
+					    TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL,
+					    TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL
+					    };
+
+process_policy_t default_task_null_policy = {0,
+					     0,
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, 
+					    0,
+					    TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE,
+					    TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NONE,
+					    TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE,
+					    TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL,
+					    TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE
+					    };
+			
 
 static void
 task_priority(
@@ -46,6 +93,8 @@ task_policy_set(
 	mach_msg_type_number_t	count)
 {
 	kern_return_t		result = KERN_SUCCESS;
+	void * bsdinfo = NULL;
+	int setbg = 0;
 
 	if (task == TASK_NULL || task == kernel_task)
 		return (KERN_INVALID_ARGUMENT);
@@ -54,72 +103,151 @@ task_policy_set(
 
 	case TASK_CATEGORY_POLICY:
 	{
-		task_category_policy_t		info = (task_category_policy_t)policy_info;
+		task_category_policy_t info = (task_category_policy_t)policy_info;
 
 		if (count < TASK_CATEGORY_POLICY_COUNT)
 			return (KERN_INVALID_ARGUMENT);
 
+#if CONFIG_EMBEDDED
+		if ((current_task() == task) && (info != NULL) &&
+		    (info->role != TASK_THROTTLE_APPLICATION))
+		return (KERN_INVALID_ARGUMENT);
+#endif
+
 		task_lock(task);
+		if (	info->role == TASK_FOREGROUND_APPLICATION ||
+				info->role == TASK_BACKGROUND_APPLICATION) {
+#if !CONFIG_EMBEDDED
+			if (task->ext_actionstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) {
+				switch (info->role) {
+					case TASK_FOREGROUND_APPLICATION:
+						switch (task->ext_actionstate.apptype) {
+							case PROC_POLICY_OSX_APPTYPE_TAL:
+								/* Move the app to foreground with no DarwinBG */
+								proc_restore_bgtaskpolicy_locked(task, 1, 1, BASEPRI_FOREGROUND);
+								bsdinfo = task->bsd_info;
+								setbg = 0;
+								break;
+
+							case PROC_POLICY_OSX_APPTYPE_DBCLIENT: 
+								/* reset the apptype so enforcement on background/foregound */
+								task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+								/* Internal application and make it foreground pri */
+								proc_restore_bgtaskpolicy_locked(task, 1, 0, BASEPRI_FOREGROUND);
+								bsdinfo = task->bsd_info;
+								setbg = 0;
+								break;
+
+							default:
+								/* the app types cannot be in CONTROL, GRAPHICS STATE, so it will de default state here */
+								task_priority(task,
+									((info->role == TASK_FOREGROUND_APPLICATION)?
+									BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
+									task->max_priority);
+								break;
+					}
+					task->role = TASK_FOREGROUND_APPLICATION;
+					break;
+
+					case TASK_BACKGROUND_APPLICATION:
+						switch (task->ext_actionstate.apptype) {
+							case PROC_POLICY_OSX_APPTYPE_TAL:
+								/* TAL apps will get Darwin backgrounded if not already set */
+								if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+									/* external application of Darwin BG */
+									proc_apply_bgtaskpolicy_locked(task, 1, 1);
+									bsdinfo = task->bsd_info;
+									setbg = 1;
+								}
+								break;
+
+							default:
+								task_priority(task,
+									((info->role == TASK_FOREGROUND_APPLICATION)?
+									BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
+									task->max_priority);
+								break;
+						}
+						task->role = TASK_BACKGROUND_APPLICATION;
+						break;
 
-		if (	info->role == TASK_FOREGROUND_APPLICATION	||
-				info->role == TASK_BACKGROUND_APPLICATION		) {
+					default:
+						/* do nothing */
+						break;
+
+				} /* switch info->role */
+			} else   { /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */
+#endif /* !CONFIG_EMBEDDED */
 			switch (task->role) {
 
 			case TASK_FOREGROUND_APPLICATION:
 			case TASK_BACKGROUND_APPLICATION:
 			case TASK_UNSPECIFIED:
-				task_priority(task,
-								((info->role == TASK_FOREGROUND_APPLICATION)?
-									BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
-							  task->max_priority);
+				/* if there are no process wide backgrounding ... */
+				if ((task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) &&
+					(task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) {
+						task_priority(task,
+							((info->role == TASK_FOREGROUND_APPLICATION)?
+							BASEPRI_FOREGROUND: BASEPRI_BACKGROUND),
+							task->max_priority);
+				}
 				task->role = info->role;
 				break;
 
 			case TASK_CONTROL_APPLICATION:
 			case TASK_RENICED:
-				/* fail silently */
+				/* else fail silently */
 				break;
 
 			default:
 				result = KERN_INVALID_ARGUMENT;
 				break;
 			}
-		}
-		else
-		if (info->role == TASK_CONTROL_APPLICATION) {
-			if (	task != current_task()			||
-					task->sec_token.val[0] != 0			)
+#if !CONFIG_EMBEDDED
+		} /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */
+#endif /* !CONFIG_EMBEDDED */
+
+		} else if (info->role == TASK_CONTROL_APPLICATION) {
+			if (task != current_task()||
+					task->sec_token.val[0] != 0)
 				result = KERN_INVALID_ARGUMENT;
 			else {
 				task_priority(task, BASEPRI_CONTROL, task->max_priority);
 				task->role = info->role;
 			}
-		}
-		else
-		if (info->role == TASK_GRAPHICS_SERVER) {
-			if (	task != current_task()			||
-					task->sec_token.val[0] != 0			)
+		} else if (info->role == TASK_GRAPHICS_SERVER) {
+			if (task != current_task() ||
+					task->sec_token.val[0] != 0)
 				result = KERN_INVALID_ARGUMENT;
 			else {
 				task_priority(task, MAXPRI_RESERVED - 3, MAXPRI_RESERVED);
 				task->role = info->role;
 			}
-		}
-		else
+		} else
+#if CONFIG_EMBEDDED
 		if (info->role == TASK_THROTTLE_APPLICATION) {
 			task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
 			task->role = info->role;
-		}
-		else
-		if (info->role == TASK_DEFAULT_APPLICATION) {
+		} else if (info->role == TASK_DEFAULT_APPLICATION || info->role == TASK_NONUI_APPLICATION)
+		{
 			task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
 			task->role = info->role;
-		}
-		else
+		} else
+#else /* CONFIG_EMBEDDED */
+		if (info->role == TASK_DEFAULT_APPLICATION)
+		{
+			task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER);
+			task->role = info->role;
+		} else
+#endif /* CONFIG_EMBEDDED */
 			result = KERN_INVALID_ARGUMENT;
 
 		task_unlock(task);
 
+		/* if backgrounding action ... */
+		if (bsdinfo != NULL)
+			proc_set_task_networkbg(bsdinfo, setbg);
+
 		break;
 	}
 
@@ -225,3 +353,979 @@ task_policy_get(
 
 	return (KERN_SUCCESS);
 }
+
+/* task Darwin BG enforcement/settings related routines */
+int 
+proc_get_task_bg_policy(task_t task)
+{
+
+	int selfset = 0;
+	int val = 0;
+
+	if (current_task() == task) 
+		selfset = 1;
+
+	if (selfset == 0) {
+		val = task->ext_policystate.hw_bg;
+	} else {
+		val = task->policystate.hw_bg;
+	}
+
+	return(val);
+}
+
+
+int 
+proc_get_thread_bg_policy(task_t task, uint64_t tid)
+{
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+	int val = 0;
+
+	if (tid == self->thread_id)  {
+		val = self->policystate.hw_bg;
+	} else {
+		task_lock(task);
+		thread = task_findtid(task, tid);
+		if (thread != NULL)
+			val = thread->ext_policystate.hw_bg;
+		task_unlock(task);
+	}
+
+	return(val);
+}
+
+int
+proc_get_self_isbackground(void)
+{
+	task_t task = current_task();;
+	thread_t thread = current_thread();
+
+	if ((task->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+		(task->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+		(thread->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+		(thread->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
+			return(1);
+	else
+		return(0);	
+	
+}
+
+int proc_get_selfthread_isbackground(void)
+{
+	thread_t thread = current_thread();
+
+	if ((thread->ext_actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) ||
+		(thread->actionstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE))
+			return(1);
+	else
+		return(0);	
+}
+
+
+int 
+proc_set_bgtaskpolicy(task_t task, int intval)
+{
+
+	int selfset = 0;
+
+	if (current_task() == task) 
+		selfset = 1;
+
+	task_lock(task);
+
+	if (selfset == 0) {
+		/* allready set? */
+		if (task->ext_policystate.hw_bg != intval)
+			task->ext_policystate.hw_bg = intval;
+	} else {
+		if (task->policystate.hw_bg != intval)
+			task->policystate.hw_bg = intval;
+	}
+
+	task_unlock(task);
+	return(0);
+}
+
+/* set and apply as well */
+int proc_set1_bgtaskpolicy(task_t task, int prio)
+{
+	int error = 0;
+
+	if (prio == PRIO_DARWIN_BG) {
+		error = proc_set_bgtaskpolicy(task, TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL);
+		if (error == 0)
+			error = proc_apply_bgtaskpolicy(task);
+	} else {
+		error = proc_restore_bgtaskpolicy(task);
+	}
+
+	return(error);
+}
+
+
+int 
+proc_set_bgthreadpolicy(task_t task, uint64_t tid, int prio)
+{
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+	int reset;
+
+	if (prio == 0)
+		reset = 1;
+	task_lock(task);
+	if (tid == self->thread_id) {
+		self->policystate.hw_bg = prio;
+	} else {
+		thread = task_findtid(task, tid);
+		if (thread != NULL)
+			thread->ext_policystate.hw_bg = prio;
+	}
+		
+	task_unlock(task);
+
+	return(0);
+}
+
+int 
+proc_set1_bgthreadpolicy(task_t task, uint64_t tid, int prio)
+{
+	int error = 0;
+
+	if (prio == PRIO_DARWIN_BG) {
+		error = proc_set_bgthreadpolicy(task, tid, TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL);
+		if (error == 0)
+			error = proc_apply_bgthreadpolicy(task, tid);
+	} else {
+		error = proc_restore_bgthreadpolicy(task, tid);
+	}
+
+	return(error);
+}
+
+int 
+proc_add_bgtaskpolicy(task_t task, int val)
+{
+	int selfset = 0;
+
+	if (current_task() == task) 
+		selfset = 1;
+
+	task_lock(task);
+
+	if (selfset == 0) {
+		task->policystate.hw_bg |= val;
+	} else {
+		task->ext_policystate.hw_bg |= val;
+	}
+
+	task_unlock(task);
+	return(0);
+}
+
+int 
+proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val)
+{
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+	int reset;
+
+	if (val == 0)
+		reset = 1;
+	task_lock(task);
+	if (tid == self->thread_id) {
+		self->policystate.hw_bg |= val;
+	} else {
+		thread = task_findtid(task, tid);
+		if (thread != NULL)
+			thread->ext_policystate.hw_bg |= val;
+	}
+		
+	task_unlock(task);
+
+	return(val);
+}
+
+int 
+proc_remove_bgtaskpolicy(task_t task, int intval)
+{
+	int selfset = 0;
+
+	if (current_task() == task) 
+		selfset = 1;
+
+	task_lock(task);
+
+	if (selfset == 0) {
+		task->policystate.hw_bg &= ~intval;
+	} else {
+		task->ext_policystate.hw_bg &= ~intval;
+	}
+
+	task_unlock(task);
+	return(0);
+}
+
+int 
+proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val)
+{
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+	int reset;
+
+	if (val == 0)
+		reset = 1;
+	task_lock(task);
+	if (tid == self->thread_id) {
+		self->policystate.hw_bg &= ~val;
+	} else {
+		thread = task_findtid(task, tid);
+		if (thread != NULL)
+			thread->ext_policystate.hw_bg &= ~val;
+	}
+		
+	task_unlock(task);
+
+	return(val);
+}
+
+int
+proc_apply_bgtask_selfpolicy(void)
+{
+	return(proc_apply_bgtaskpolicy(current_task()));
+}
+
+int 
+proc_apply_bgtaskpolicy(task_t task)
+{
+	int external = 1;
+
+	if (task == current_task())
+		external = 0;
+
+	return(proc_apply_bgtaskpolicy_locked(task, 0, external));
+}
+
+int
+proc_apply_bgtaskpolicy_external(task_t task)
+{
+	return(proc_apply_bgtaskpolicy_locked(task, 0, 1));
+
+}
+
+int
+proc_apply_bgtaskpolicy_internal(task_t task)
+{
+	return(proc_apply_bgtaskpolicy_locked(task, 0, 0));
+}
+
+
+static int
+proc_apply_bgtaskpolicy_locked(task_t task, int locked, int external)
+{
+	if (locked == 0)
+		task_lock(task);
+
+	if (external != 0) {
+		/* allready set? */
+		if (task->ext_actionstate.hw_bg != task->ext_policystate.hw_bg) {
+			task->ext_actionstate.hw_bg = task->ext_policystate.hw_bg;
+			task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
+			/* background state applied */
+		}
+	} else {
+		if (task->actionstate.hw_bg != task->policystate.hw_bg) {
+			task->actionstate.hw_bg = task->policystate.hw_bg;
+			task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE);
+		}
+	}
+	if (locked == 0)
+		task_unlock(task);
+	return(0);
+}
+
+/* apply the self backgrounding even if the thread is not current thread/task(timer threads) */
+int
+proc_apply_workq_bgthreadpolicy(thread_t thread)
+{
+	int error;
+	task_t wqtask = TASK_NULL;
+
+	if (thread != THREAD_NULL) {
+		wqtask = thread->task;
+		task_lock(wqtask);
+		/* apply the background as selfset internal one */
+		error = proc_apply_bgthreadpolicy_locked(thread, 1);
+		task_unlock(wqtask);
+	} else	
+		error = ESRCH;
+
+	return(error);
+}
+
+int 
+proc_apply_bgthreadpolicy(task_t task, uint64_t tid)
+{
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+	int selfset = 0, error = 0;
+	task_t localtask = TASK_NULL;
+
+	if (tid == self->thread_id) {
+		selfset = 1;
+		localtask = current_task();
+	} else {
+		localtask = task;
+	}
+
+	task_lock(localtask);
+	if (selfset != 0) {
+		thread = self;
+	} else {
+		thread = task_findtid(task, tid);
+	}
+
+	error = proc_apply_bgthreadpolicy_locked(thread, selfset);
+	task_unlock(localtask);
+
+	return(error);
+}
+
+static int
+proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset)
+{
+	int set = 0;
+	thread_precedence_policy_data_t policy;
+
+	if (thread != NULL) {
+		if (selfset != 0) {
+			/* internal application */
+			if (thread->actionstate.hw_bg != thread->policystate.hw_bg) {
+				thread->actionstate.hw_bg = thread->policystate.hw_bg;
+				if (thread->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) 
+					set = 1;
+		
+			}
+		} else {
+			/* external application */
+			if (thread->ext_actionstate.hw_bg != thread->ext_policystate.hw_bg) {
+				thread->ext_actionstate.hw_bg = thread->ext_policystate.hw_bg;
+				if (thread->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+					set = 1;
+			}
+		}
+			
+		if (set != 0) {
+			/* set thread priority (we did not save previous value) */
+			policy.importance = INT_MIN;
+				
+			thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+                                                   (thread_policy_t)&policy,
+                                                   THREAD_PRECEDENCE_POLICY_COUNT );
+
+		}
+	} else	
+		return(ESRCH);
+		
+	return(0);
+}
+
+int
+proc_apply_bgthread_selfpolicy(void)
+{
+	return(proc_apply_bgthreadpolicy(current_task(), current_thread()->thread_id));
+}
+
+
+int 
+proc_restore_bgtaskpolicy(task_t task)
+{
+	int external = 1;
+
+	if (current_task() == task) 
+		external = 0;
+	return(proc_restore_bgtaskpolicy_locked(task, 0, external,  BASEPRI_DEFAULT));
+}
+
+static int
+proc_restore_bgtaskpolicy_locked(task_t task, int locked, int external, int pri)
+{
+	if (locked == 0)
+		task_lock(task);
+
+	if (external != 0) {
+		task->ext_actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+		/* self BG in flight? */
+		if (task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+			task_priority(task, pri, MAXPRI_USER);
+#if CONFIG_EMBEDDED
+			/* non embedded users need role for policy reapplication */
+			task->role = TASK_DEFAULT_APPLICATION;
+#endif /* CONFIG_EMBEDDED */
+		}
+	 } else {
+		task->actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+		/* external BG in flight? */
+		if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) {
+			task_priority(task, pri, MAXPRI_USER);
+#if CONFIG_EMBEDDED
+			/* non embedded users need role for policy reapplication */
+			task->role = TASK_DEFAULT_APPLICATION;
+#endif /* CONFIG_EMBEDDED */
+		}
+	}
+
+	if (locked == 0)
+		task_unlock(task);
+
+	return(0);
+}
+
+/* restore the self backgrounding even if the thread is not current thread */
+int
+proc_restore_workq_bgthreadpolicy(thread_t thread)
+{
+	int error = 0;
+	task_t wqtask = TASK_NULL;
+
+	if (thread != THREAD_NULL) {
+		wqtask = thread->task;
+		task_lock(wqtask);
+		/* remove the background and restore default importance as self(internal) removal */
+		restore_bgthreadpolicy_locked(thread, 1);
+		task_unlock(wqtask);
+	} else
+		error = ESRCH;
+
+	return(error);
+}
+
+int proc_restore_bgthread_selfpolicy(void)
+{
+	return(proc_restore_bgthreadpolicy(current_task(), thread_tid(current_thread())));
+
+}
+
+
+int 
+proc_restore_bgthreadpolicy(task_t task, uint64_t tid)
+{
+	int selfset = 0;
+	thread_t self = current_thread();
+	thread_t thread = THREAD_NULL;
+
+	task_lock(task);
+	if (tid == self->thread_id) {
+		thread = self;
+		selfset = 1;
+	} else {
+		thread = task_findtid(task, tid);
+	}
+
+	if (thread != NULL)
+		restore_bgthreadpolicy_locked(thread, selfset);
+
+	task_unlock(task);
+
+	if (thread != NULL)
+		return(0);
+	else
+		return(1);
+}
+
+static void
+restore_bgthreadpolicy_locked(thread_t thread, int selfset)
+{
+	thread_precedence_policy_data_t policy;
+	int reset = 0;
+
+	if (thread != NULL) {
+		if (selfset != 0) {
+			thread->actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+			/* external BG in flight? */
+			if (thread->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+					reset = 1;
+		
+		} else {
+			thread->ext_actionstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE;
+			/* self BG in flight? */
+			if (thread->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)
+					reset = 1;
+		}
+			
+		if (reset != 0) {
+			/* reset thread priority (we did not save previous value) */
+			policy.importance = 0;
+			thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
+                                                   (thread_policy_t)&policy,
+                                                   THREAD_PRECEDENCE_POLICY_COUNT );
+		}
+	}
+}
+
+void 
+proc_set_task_apptype(task_t task, int type)
+{
+	switch (type) {
+		case PROC_POLICY_OSX_APPTYPE_TAL:
+			task->ext_policystate.apptype = type;
+			task->policystate.apptype = type;
+			proc_apply_bgtaskpolicy_external(task);
+			/* indicate that BG is set and next foreground needs to reset */
+			task->ext_actionstate.apptype = type;
+			break;
+
+		case PROC_POLICY_OSX_APPTYPE_DBCLIENT:
+			task->ext_policystate.apptype = type;
+			task->policystate.apptype = type;
+			proc_apply_bgtaskpolicy_internal(task);
+			/* indicate that BG is set and next foreground needs to reset */
+			task->ext_actionstate.apptype = type;
+			break;
+	
+		case PROC_POLICY_IOS_APPTYPE:
+			task->ext_policystate.apptype = type;
+			task->policystate.apptype = type;
+			break;
+		case PROC_POLICY_IOS_NONUITYPE:
+			task->ext_policystate.apptype = type;
+			task->policystate.apptype = type;
+			/* set to deny access to gpu */
+			task->ext_actionstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+			task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS;
+			break;
+
+		default:
+			break;
+	}
+}
+
+/* update the darwin backdground action state in the flags field for libproc */
+#define PROC_FLAG_DARWINBG      0x8000  /* process in darwin background */
+#define PROC_FLAG_EXT_DARWINBG  0x10000 /* process in darwin background - external enforcement */
+
+int
+proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
+{
+	if (task->ext_actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
+		*flagsp |= PROC_FLAG_EXT_DARWINBG;
+	}
+	if (task->actionstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){
+		*flagsp |= PROC_FLAG_DARWINBG;
+	}
+		
+	return(0);
+}
+
+/* 
+ * HW disk access realted routines, they need to return 
+ * IOPOL_XXX equivalents for spec_xxx/throttle updates.
+ */
+
+int 
+proc_get_task_disacc(task_t task)
+{
+	if ((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (task->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(task->ext_actionstate.hw_disk);
+	if ((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (task->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(task->actionstate.hw_disk);
+	return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+}
+
+int
+proc_get_task_selfdiskacc(void)
+{
+	task_t task = current_task();
+	thread_t thread= current_thread();
+
+	/* 
+	 * As per defined iopolicysys behavior, thread trumps task. 
+	 * Do we need to follow that for external enforcements of BG or hw access?
+	 * Status quo for now..
+	 */
+	if((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (thread->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(thread->ext_actionstate.hw_disk);
+	if((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (thread->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(thread->actionstate.hw_disk);
+
+	if ((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (task->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(task->ext_actionstate.hw_disk);
+	if ((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (task->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(task->actionstate.hw_disk);
+	return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+}
+
+int
+proc_get_thread_selfdiskacc(void)
+{
+	thread_t thread = current_thread();
+
+	if((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (thread->ext_actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(thread->ext_actionstate.hw_disk);
+	if((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0)
+		return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE);
+	if (thread->actionstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL)
+		return(thread->actionstate.hw_disk);
+	return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL);
+}
+
+int proc_apply_task_diskacc(task_t task, int policy)
+{
+	task_t self = current_task();
+
+	task_lock(task);
+	if (task ==  self) {
+		task->actionstate.hw_disk = policy;
+		task->policystate.hw_disk = policy;
+	} else {
+		task->ext_actionstate.hw_disk = policy;
+		task->ext_policystate.hw_disk = policy;
+	}
+	task_unlock(task);
+	return(0);
+}
+
+int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy)
+{
+	thread_t thread;
+
+	if (tid == TID_NULL) {
+		thread = current_thread();
+		proc_apply_thread_selfdiskacc(policy);
+	} else {
+		task_lock(task);
+		thread = task_findtid(task, tid);
+		if (thread != NULL) {
+			thread->ext_actionstate.hw_disk = policy;
+			thread->ext_policystate.hw_disk = policy;
+		}
+		task_unlock(task);
+	}
+	if (thread != NULL)
+		return(0);
+	else
+		return(0);
+}
+
+int
+proc_apply_thread_selfdiskacc(int policy)
+{
+	task_t task = current_task();
+	thread_t thread = current_thread();
+
+	task_lock(task);
+	thread->actionstate.hw_disk = policy;
+	thread->policystate.hw_disk = policy;
+	task_unlock(task);
+	return(0);
+}
+
+int 
+proc_denyinherit_policy(__unused task_t task)
+{
+	return(0);
+}
+
+int 
+proc_denyselfset_policy(__unused task_t task)
+{
+	return(0);
+}
+
+/* HW GPU access related routines */
+int
+proc_get_task_selfgpuacc_deny(void)
+{
+	task_t task = current_task();
+	thread_t thread = current_thread();
+
+	if (((task->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->ext_actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+		return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
+	if (((task->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+		return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
+	if (((thread->ext_actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->ext_actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+		return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
+	if (((thread->actionstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->actionstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS))
+		return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
+
+	return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NORMAL);
+}
+
+int
+proc_apply_task_gpuacc(task_t task, int policy)
+{
+
+	task_t self = current_task();
+
+	task_lock(task);
+	if (task ==  self) {
+		task->actionstate.hw_gpu = policy;
+		task->policystate.hw_gpu = policy;
+	} else {
+		task->ext_actionstate.hw_gpu = policy;
+		task->ext_policystate.hw_gpu = policy;
+	}
+	task_unlock(task);
+
+	return(0);
+}
+
+/* Resource usage , CPU realted routines */
+int 
+proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep)
+{
+	
+	int error = 0;
+
+	task_lock(task);
+	if (task != current_task()) {
+		*policyp = task->ext_policystate.ru_cpu;
+	} else {
+		*policyp = task->policystate.ru_cpu;
+	}
+	
+	error = task_get_cpuusage(task, percentagep, intervalp, deadlinep);
+
+	return(error);
+}
+
+int 
+proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline)
+{
+	int error = 0;
+
+	task_lock(task);
+	if (task != current_task()) {
+		task->ext_policystate.ru_cpu = policy;	
+	} else {
+		task->policystate.ru_cpu = policy;	
+	}
+	error = task_set_cpuusage(task, percentage, interval, deadline);
+	task_unlock(task);
+	return(error);
+}
+
+
+/* used to apply resource limit related actions */
+static int
+task_apply_resource_actions(task_t task, int type)
+{
+	int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+	void * bsdinfo = NULL;
+	
+	switch (type) {
+		case TASK_POLICY_CPU_RESOURCE_USAGE:
+			break;
+		case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
+		case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
+		case TASK_POLICY_DISK_RESOURCE_USAGE:
+		case TASK_POLICY_NETWORK_RESOURCE_USAGE:
+		case TASK_POLICY_POWER_RESOURCE_USAGE:
+			return(0);
+
+		default:
+			return(1);
+	};
+
+	/* only cpu actions for now */
+	task_lock(task);
+	
+	if (task->ext_actionstate.ru_cpu == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+		/* apply action */
+		task->ext_actionstate.ru_cpu = task->ext_policystate.ru_cpu;
+		action = task->ext_actionstate.ru_cpu;
+	}
+	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+		bsdinfo = task->bsd_info;
+		task_unlock(task);
+		proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
+	} else
+		task_unlock(task);
+
+	return(0);
+}
+
+int
+task_restore_resource_actions(task_t task, int type)
+{
+	int action;
+	void * bsdinfo = NULL;
+	
+	switch (type) {
+		case TASK_POLICY_CPU_RESOURCE_USAGE:
+			break;
+		case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
+		case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
+		case TASK_POLICY_DISK_RESOURCE_USAGE:
+		case TASK_POLICY_NETWORK_RESOURCE_USAGE:
+		case TASK_POLICY_POWER_RESOURCE_USAGE:
+			return(0);
+
+		default:
+			return(1);
+	};
+
+	/* only cpu actions for now */
+	task_lock(task);
+	
+	action = task->ext_actionstate.ru_cpu;
+	if (task->ext_actionstate.ru_cpu != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+		/* reset action */
+		task->ext_actionstate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
+	}
+	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
+		bsdinfo = task->bsd_info;
+		task_unlock(task);
+		proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
+	} else
+		task_unlock(task);
+
+	return(0);
+
+}
+
+/* For ledger hookups */
+static int
+task_get_cpuusage(__unused task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep)
+{
+	*percentagep = 0;
+	*intervalp = 0;
+	*deadlinep = 0;
+
+	return(0);
+}
+
+static int
+task_set_cpuusage(__unused task_t task, __unused uint32_t percentage, __unused uint64_t interval, __unused uint64_t deadline)
+{
+	return(0);
+}
+
+/* called by ledger unit to enforce action due to  resource usage criteria being met */
+int
+task_action_cpuusage(task_t task)
+{
+	return(task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE));
+}
+
+int 
+proc_disable_task_apptype(task_t task, int policy_subtype)
+{
+	void * bsdinfo = NULL;
+	int setbg = 0;
+	int ret = 0;
+	int maxpri = BASEPRI_DEFAULT;
+
+	task_lock(task);
+
+	if (task->ext_policystate.apptype != policy_subtype) {
+		ret = EINVAL;
+		goto out;
+	}
+
+#if !CONFIG_EMBEDDED
+	switch (task->role) {
+		case TASK_FOREGROUND_APPLICATION:
+			maxpri = BASEPRI_FOREGROUND;
+			break;
+		case TASK_BACKGROUND_APPLICATION:
+			maxpri = BASEPRI_BACKGROUND;
+			break;
+		default:
+			maxpri = BASEPRI_DEFAULT;
+	}
+#endif
+			
+	if (task->ext_actionstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) {
+			switch (task->ext_actionstate.apptype) {
+				case PROC_POLICY_OSX_APPTYPE_TAL:
+					/* disable foreground/background handling */
+					task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+					/* external BG application removal */
+					proc_restore_bgtaskpolicy_locked(task, 1, 1, maxpri);
+					bsdinfo = task->bsd_info;
+					setbg = 0;
+					break;
+
+				case PROC_POLICY_OSX_APPTYPE_DBCLIENT:
+					/* disable foreground/background handling */
+					task->ext_actionstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE;
+					/* internal BG application removal */
+					proc_restore_bgtaskpolicy_locked(task, 1, 0, maxpri);
+					bsdinfo = task->bsd_info;
+					setbg = 0;
+					break;
+
+				default:
+					ret = EINVAL;
+					break;
+			}
+	} else
+		ret = EINVAL;
+
+out:
+	task_unlock(task);
+	/* if backgrounding action ... */
+	if (bsdinfo != NULL)
+		proc_set_task_networkbg(bsdinfo, setbg);
+
+	return(ret);
+}
+
+int 
+proc_enable_task_apptype(task_t task, int policy_subtype)
+{
+	void * bsdinfo = NULL;
+	int setbg = 0;
+	int ret = 0;
+
+	task_lock(task);
+
+	if (task->ext_policystate.apptype != policy_subtype) {
+		ret = EINVAL;
+		goto out;
+	}
+
+	if (task->ext_actionstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) {
+		switch (task->ext_policystate.apptype) {
+			case PROC_POLICY_OSX_APPTYPE_TAL:
+			 	 /* TAL policy is activated again */
+				task->ext_actionstate.apptype = task->ext_policystate.apptype;
+				if (task->role == TASK_BACKGROUND_APPLICATION) {
+					if (task->role == TASK_BACKGROUND_APPLICATION) {
+						proc_apply_bgtaskpolicy_locked(task, 1, 1);
+						bsdinfo = task->bsd_info;
+						setbg = 1;
+					}
+				}
+				ret = 0;
+				break;
+			default:
+				ret = EINVAL;
+		}
+	} else
+		ret = EINVAL;
+
+out:
+	task_unlock(task);
+	/* if backgrounding action ... */
+	if (bsdinfo != NULL)
+		proc_set_task_networkbg(bsdinfo, setbg);
+
+	return(ret);
+}
+
diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c
index adff14820..84f7cf817 100644
--- a/osfmk/kern/thread.c
+++ b/osfmk/kern/thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -91,11 +91,13 @@
 #include <mach/vm_param.h>
 
 #include <machine/thread.h>
+#include <machine/pal_routines.h>
 
 #include <kern/kern_types.h>
 #include <kern/kalloc.h>
 #include <kern/cpu_data.h>
 #include <kern/counters.h>
+#include <kern/extmod_statistics.h>
 #include <kern/ipc_mig.h>
 #include <kern/ipc_tt.h>
 #include <kern/mach_param.h>
@@ -182,8 +184,9 @@ thread_bootstrap(void)
 	thread_template.parameter = NULL;
 
 	thread_template.importance = 0;
-	thread_template.sched_mode = 0;
-	thread_template.safe_mode = 0;
+	thread_template.sched_mode = TH_MODE_NONE;
+	thread_template.sched_flags = 0;
+	thread_template.saved_mode = TH_MODE_NONE;
 	thread_template.safe_release = 0;
 
 	thread_template.priority = 0;
@@ -198,14 +201,18 @@ thread_bootstrap(void)
 	thread_template.realtime.deadline = UINT64_MAX;
 
 	thread_template.current_quantum = 0;
+	thread_template.last_run_time = 0;
+	thread_template.last_quantum_refill_time = 0;
 
 	thread_template.computation_metered = 0;
 	thread_template.computation_epoch = 0;
 
+#if defined(CONFIG_SCHED_TRADITIONAL)
 	thread_template.sched_stamp = 0;
-	thread_template.sched_usage = 0;
 	thread_template.pri_shift = INT8_MAX;
+	thread_template.sched_usage = 0;
 	thread_template.cpu_usage = thread_template.cpu_delta = 0;
+#endif
 	thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
 
 	thread_template.bound_processor = PROCESSOR_NULL;
@@ -247,6 +254,18 @@ thread_bootstrap(void)
 
 	thread_template.affinity_set = NULL;
 	
+	thread_template.syscalls_unix = 0;
+	thread_template.syscalls_mach = 0;
+
+	thread_template.tkm_private.alloc = 0;
+	thread_template.tkm_private.free = 0;
+	thread_template.tkm_shared.alloc = 0;
+	thread_template.tkm_shared.free = 0;
+	thread_template.actionstate = default_task_null_policy;
+	thread_template.ext_actionstate = default_task_null_policy;
+	thread_template.policystate = default_task_proc_policy;
+	thread_template.ext_policystate = default_task_proc_policy;
+
 	init_thread = thread_template;
 	machine_set_current_thread(&init_thread);
 }
@@ -259,8 +278,9 @@ thread_init(void)
 			thread_max * sizeof(struct thread),
 			THREAD_CHUNK * sizeof(struct thread),
 			"threads");
+
 	zone_change(thread_zone, Z_NOENCRYPT, TRUE);
-	
+
 	lck_grp_attr_setdefault(&thread_lck_grp_attr);
 	lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
 	lck_attr_setdefault(&thread_lck_attr);
@@ -288,10 +308,13 @@ void
 thread_terminate_self(void)
 {
 	thread_t		thread = current_thread();
+
 	task_t			task;
 	spl_t			s;
 	int threadcnt;
 
+	pal_thread_terminate_self(thread);
+
 	DTRACE_PROC(lwp__exit);
 
 	thread_mtx_lock(thread);
@@ -309,8 +332,8 @@ thread_terminate_self(void)
 	 *	Cancel priority depression, wait for concurrent expirations
 	 *	on other processors.
 	 */
-	if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
-		thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
+	if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
+		thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 
 		if (timer_call_cancel(&thread->depress_timer))
 			thread->depress_timer_active--;
@@ -374,8 +397,7 @@ thread_terminate_self(void)
 	 *	If there is a reserved stack, release it.
 	 */
 	if (thread->reserved_stack != 0) {
-		if (thread->reserved_stack != thread->kernel_stack)
-			stack_free_stack(thread->reserved_stack);
+		stack_free_reserved(thread);
 		thread->reserved_stack = 0;
 	}
 
@@ -404,6 +426,7 @@ thread_deallocate(
 	if (thread_deallocate_internal(thread) > 0)
 		return;
 
+
 	ipc_thread_terminate(thread);
 
 	task = thread->task;
@@ -417,14 +440,14 @@ thread_deallocate(
 	}
 #endif  /* MACH_BSD */   
 
-	task_deallocate(task);
-
 	if (thread->kernel_stack != 0)
 		stack_free(thread);
 
 	lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
 	machine_thread_destroy(thread);
 
+	task_deallocate(task);
+
 	zfree(thread_zone, thread);
 }
 
@@ -436,8 +459,11 @@ thread_deallocate(
 static void
 thread_terminate_daemon(void)
 {
-	thread_t			thread;
-	task_t				task;
+	thread_t	self, thread;
+	task_t		task;
+
+	self = current_thread();
+	self->options |= TH_OPT_SYSTEM_CRITICAL;
 
 	(void)splsched();
 	simple_lock(&thread_terminate_lock);
@@ -456,6 +482,14 @@ thread_terminate_daemon(void)
 		task->p_switch += thread->p_switch;
 		task->ps_switch += thread->ps_switch;
 
+		task->syscalls_unix += thread->syscalls_unix;
+		task->syscalls_mach += thread->syscalls_mach;
+
+		task->tkm_private.alloc += thread->tkm_private.alloc;
+		task->tkm_private.free += thread->tkm_private.free;
+		task->tkm_shared.alloc += thread->tkm_shared.alloc;
+		task->tkm_shared.free += thread->tkm_shared.free;
+
 		queue_remove(&task->threads, thread, thread_t, task_threads);
 		task->thread_count--;
 
@@ -483,6 +517,7 @@ thread_terminate_daemon(void)
 	simple_unlock(&thread_terminate_lock);
 	/* splsched */
 
+	self->options &= ~TH_OPT_SYSTEM_CRITICAL;
 	thread_block((thread_continue_t)thread_terminate_daemon);
 	/*NOTREACHED*/
 }
@@ -561,7 +596,7 @@ void
 thread_daemon_init(void)
 {
 	kern_return_t	result;
-	thread_t		thread;
+	thread_t	thread = NULL;
 
 	simple_lock_init(&thread_terminate_lock, 0);
 	queue_init(&thread_terminate_queue);
@@ -712,18 +747,25 @@ thread_create_internal(
 #endif
 
 	/* Set the thread's scheduling parameters */
-	if (parent_task != kernel_task)
-		new_thread->sched_mode |= TH_MODE_TIMESHARE;
+	new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
+	new_thread->sched_flags = 0;
 	new_thread->max_priority = parent_task->max_priority;
 	new_thread->task_priority = parent_task->priority;
 	new_thread->priority = (priority < 0)? parent_task->priority: priority;
 	if (new_thread->priority > new_thread->max_priority)
 		new_thread->priority = new_thread->max_priority;
+#if CONFIG_EMBEDDED 
+	if (new_thread->priority < MAXPRI_THROTTLE) {
+		new_thread->priority = MAXPRI_THROTTLE;
+	}
+#endif /* CONFIG_EMBEDDED */
 	new_thread->importance =
 					new_thread->priority - new_thread->task_priority;
+#if defined(CONFIG_SCHED_TRADITIONAL)
 	new_thread->sched_stamp = sched_tick;
 	new_thread->pri_shift = sched_pri_shift;
-	compute_priority(new_thread, FALSE);
+#endif
+	SCHED(compute_priority)(new_thread, FALSE);
 
 	new_thread->active = TRUE;
 
@@ -751,10 +793,11 @@ thread_create_internal(
 	return (KERN_SUCCESS);
 }
 
-kern_return_t
-thread_create(
+static kern_return_t
+thread_create_internal2(
 	task_t				task,
-	thread_t			*new_thread)
+	thread_t			*new_thread,
+	boolean_t			from_user)
 {
 	kern_return_t		result;
 	thread_t			thread;
@@ -771,6 +814,9 @@ thread_create(
 	if (task->suspend_count > 0)
 		thread_hold(thread);
 
+	if (from_user)
+		extmod_statistics_incr_thread_create(task);
+
 	task_unlock(task);
 	lck_mtx_unlock(&tasks_threads_lock);
 	
@@ -779,13 +825,36 @@ thread_create(
 	return (KERN_SUCCESS);
 }
 
+/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
 kern_return_t
-thread_create_running(
+thread_create(
+	task_t				task,
+	thread_t			*new_thread);
+
+kern_return_t
+thread_create(
+	task_t				task,
+	thread_t			*new_thread)
+{
+	return thread_create_internal2(task, new_thread, FALSE);
+}
+
+kern_return_t
+thread_create_from_user(
+	task_t				task,
+	thread_t			*new_thread)
+{
+	return thread_create_internal2(task, new_thread, TRUE);
+}
+
+static kern_return_t
+thread_create_running_internal2(
 	register task_t         task,
 	int                     flavor,
 	thread_state_t          new_state,
 	mach_msg_type_number_t  new_state_count,
-	thread_t				*new_thread)
+	thread_t				*new_thread,
+	boolean_t				from_user)
 {
 	register kern_return_t  result;
 	thread_t				thread;
@@ -812,6 +881,9 @@ thread_create_running(
 	thread_start_internal(thread);
 	thread_mtx_unlock(thread);
 
+	if (from_user)
+		extmod_statistics_incr_thread_create(task);
+
 	task_unlock(task);
 	lck_mtx_unlock(&tasks_threads_lock);
 
@@ -820,6 +892,41 @@ thread_create_running(
 	return (result);
 }
 
+/* Prototype, see justification above */
+kern_return_t
+thread_create_running(
+	register task_t         task,
+	int                     flavor,
+	thread_state_t          new_state,
+	mach_msg_type_number_t  new_state_count,
+	thread_t				*new_thread);
+
+kern_return_t
+thread_create_running(
+	register task_t         task,
+	int                     flavor,
+	thread_state_t          new_state,
+	mach_msg_type_number_t  new_state_count,
+	thread_t				*new_thread)
+{
+	return thread_create_running_internal2(
+		task, flavor, new_state, new_state_count,
+		new_thread, FALSE);
+}
+
+kern_return_t
+thread_create_running_from_user(
+	register task_t         task,
+	int                     flavor,
+	thread_state_t          new_state,
+	mach_msg_type_number_t  new_state_count,
+	thread_t				*new_thread)
+{
+	return thread_create_running_internal2(
+		task, flavor, new_state, new_state_count,
+		new_thread, TRUE);
+}
+
 kern_return_t
 thread_create_workq(
 	task_t				task,
@@ -977,8 +1084,8 @@ thread_info_internal(
 		/*
 		 *	Update lazy-evaluated scheduler info because someone wants it.
 		 */
-		if (thread->sched_stamp != sched_tick)
-			update_priority(thread);
+		if (SCHED(can_update_priority)(thread))
+			SCHED(update_priority)(thread);
 
 		basic_info->sleep_time = 0;
 
@@ -987,14 +1094,19 @@ thread_info_internal(
 		 *	then for 5/8 ageing.  The correction factor [3/5] is
 		 *	(1/(5/8) - 1).
 		 */
-		basic_info->cpu_usage =	(integer_t)(((uint64_t)thread->cpu_usage
-									* TH_USAGE_SCALE) /	sched_tick_interval);
-		basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
-
+		basic_info->cpu_usage = 0;
+#if defined(CONFIG_SCHED_TRADITIONAL)
+		if (sched_tick_interval) {
+			basic_info->cpu_usage =	(integer_t)(((uint64_t)thread->cpu_usage
+										* TH_USAGE_SCALE) /	sched_tick_interval);
+			basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
+		}
+#endif
+		
 		if (basic_info->cpu_usage > TH_USAGE_SCALE)
 			basic_info->cpu_usage = TH_USAGE_SCALE;
 
-		basic_info->policy = ((thread->sched_mode & TH_MODE_TIMESHARE)?
+		basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
 												POLICY_TIMESHARE: POLICY_RR);
 
 	    flags = 0;
@@ -1045,11 +1157,7 @@ thread_info_internal(
 	    thread_lock(thread);
 
 	    identifier_info->thread_id = thread->thread_id;
-#if defined(__ppc__) || defined(__arm__)
 	    identifier_info->thread_handle = thread->machine.cthread_self;
-#else
-	    identifier_info->thread_handle = thread->machine.pcb->cthread_self;
-#endif
 	    if(thread->task->bsd_info) {
 	    	identifier_info->dispatch_qaddr =  identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
 	    } else {
@@ -1074,14 +1182,14 @@ thread_info_internal(
 	    s = splsched();
 		thread_lock(thread);
 
-	    if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
+	    if (thread->sched_mode != TH_MODE_TIMESHARE) {
 	    	thread_unlock(thread);
 			splx(s);
 
 			return (KERN_INVALID_POLICY);
 	    }
 
-		ts_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0;
+		ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
 		if (ts_info->depressed) {
 			ts_info->base_priority = DEPRESSPRI;
 			ts_info->depress_priority = thread->priority;
@@ -1111,7 +1219,9 @@ thread_info_internal(
 	else
 	if (flavor == THREAD_SCHED_RR_INFO) {
 		policy_rr_info_t			rr_info;
-
+		uint32_t quantum_time;
+		uint64_t quantum_ns;
+		
 		if (*thread_info_count < POLICY_RR_INFO_COUNT)
 			return (KERN_INVALID_ARGUMENT);
 
@@ -1120,14 +1230,14 @@ thread_info_internal(
 	    s = splsched();
 		thread_lock(thread);
 
-	    if (thread->sched_mode & TH_MODE_TIMESHARE) {
+	    if (thread->sched_mode == TH_MODE_TIMESHARE) {
 	    	thread_unlock(thread);
 			splx(s);
 
 			return (KERN_INVALID_POLICY);
 	    }
 
-		rr_info->depressed = (thread->sched_mode & TH_MODE_ISDEPRESSED) != 0;
+		rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
 		if (rr_info->depressed) {
 			rr_info->base_priority = DEPRESSPRI;
 			rr_info->depress_priority = thread->priority;
@@ -1137,8 +1247,11 @@ thread_info_internal(
 			rr_info->depress_priority = -1;
 		}
 
+		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
+		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
+		
 		rr_info->max_priority = thread->max_priority;
-	    rr_info->quantum = std_quantum_us / 1000;
+	    rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
 
 		thread_unlock(thread);
 	    splx(s);
@@ -1416,11 +1529,7 @@ thread_dispatchqaddr(
 	uint64_t	thread_handle = 0;
 
 	if (thread != THREAD_NULL) {
-#if defined(__ppc__) || defined(__arm__)
 		thread_handle = thread->machine.cthread_self;
-#else
-		thread_handle = thread->machine.pcb->cthread_self;
-#endif
 
 		if (thread->task->bsd_info)
 			dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h
index db2c6e352..916391593 100644
--- a/osfmk/kern/thread.h
+++ b/osfmk/kern/thread.h
@@ -146,6 +146,7 @@ struct thread {
 #define TH_OPT_INTMASK		0x03		/* interrupt / abort level */
 #define TH_OPT_VMPRIV		0x04		/* may allocate reserved memory */
 #define TH_OPT_DTRACE		0x08		/* executing under dtrace_probe */
+#define TH_OPT_SYSTEM_CRITICAL	0x10		/* Thread must always be allowed to run - even under heavy load */
 
 	/* Data updated during assert_wait/thread_wakeup */
 	decl_simple_lock_data(,sched_lock)	/* scheduling lock (thread_lock()) */
@@ -183,23 +184,37 @@ struct thread {
 #define TH_IDLE			0x80			/* idling processor */
 
 	/* Scheduling information */
-	integer_t			sched_mode;			/* scheduling mode bits */
-#define TH_MODE_REALTIME		0x0001		/* time constraints supplied */
-#define TH_MODE_TIMESHARE		0x0002		/* use timesharing algorithm */
-#define TH_MODE_FAILSAFE		0x0004		/* fail-safe has tripped */
-#define	TH_MODE_PROMOTED		0x0008		/* sched pri has been promoted */
-#define TH_MODE_ABORT			0x0010		/* abort interruptible waits */
-#define TH_MODE_ABORTSAFELY		0x0020		/* ... but only those at safe point */
-#define TH_MODE_ISABORTED		(TH_MODE_ABORT | TH_MODE_ABORTSAFELY)
-#define	TH_MODE_DEPRESS			0x0040		/* normal depress yield */
-#define TH_MODE_POLLDEPRESS		0x0080		/* polled depress yield */
-#define TH_MODE_ISDEPRESSED		(TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS)
+	sched_mode_t			sched_mode;		/* scheduling mode */
+	sched_mode_t			saved_mode;		/* saved mode during forced mode demotion */
+	
+	unsigned int			sched_flags;		/* current flag bits */
+#define TH_SFLAG_FAIRSHARE_TRIPPED	0x0001		/* fairshare scheduling activated */
+#define TH_SFLAG_FAILSAFE		0x0002		/* fail-safe has tripped */
+#define TH_SFLAG_THROTTLED		0x0004      /* owner task in throttled state */
+#define TH_SFLAG_DEMOTED_MASK      (TH_SFLAG_THROTTLED | TH_SFLAG_FAILSAFE | TH_SFLAG_FAIRSHARE_TRIPPED)
+
+#define	TH_SFLAG_PROMOTED		0x0008		/* sched pri has been promoted */
+#define TH_SFLAG_ABORT			0x0010		/* abort interruptible waits */
+#define TH_SFLAG_ABORTSAFELY		0x0020		/* ... but only those at safe point */
+#define TH_SFLAG_ABORTED_MASK		(TH_SFLAG_ABORT | TH_SFLAG_ABORTSAFELY)
+#define	TH_SFLAG_DEPRESS		0x0040		/* normal depress yield */
+#define TH_SFLAG_POLLDEPRESS		0x0080		/* polled depress yield */
+#define TH_SFLAG_DEPRESSED_MASK		(TH_SFLAG_DEPRESS | TH_SFLAG_POLLDEPRESS)
+#define TH_SFLAG_PRI_UPDATE		0x0100		/* Updating priority */
+#define TH_SFLAG_EAGERPREEMPT		0x0200		/* Any preemption of this thread should be treated as if AST_URGENT applied */
+
 
 	integer_t			sched_pri;			/* scheduled (current) priority */
 	integer_t			priority;			/* base priority */
 	integer_t			max_priority;		/* max base priority */
 	integer_t			task_priority;		/* copy of task base priority */
 
+#if defined(CONFIG_SCHED_GRRR)
+#if 0
+	uint16_t			grrr_deficit;		/* fixed point (1/1000th quantum) fractional deficit */
+#endif
+#endif
+	
 	integer_t			promotions;			/* level of promotion */
 	integer_t			pending_promoter_index;
 	void				*pending_promoter[2];
@@ -216,30 +231,38 @@ struct thread {
 		uint64_t			deadline;
 	}					realtime;
 
+	uint32_t			was_promoted_on_wakeup;
 	uint32_t			current_quantum;	/* duration of current quantum */
+	uint64_t			last_run_time;		/* time when thread was switched away from */
+	uint64_t			last_quantum_refill_time;	/* time when current_quantum was refilled after expiration */
 
   /* Data used during setrun/dispatch */
 	timer_data_t		system_timer;		/* system mode timer */
 	processor_t			bound_processor;	/* bound to a processor? */
 	processor_t			last_processor;		/* processor last dispatched on */
+	processor_t			chosen_processor;	/* Where we want to run this thread */
 
 	/* Fail-safe computation since last unblock or qualifying yield */
 	uint64_t			computation_metered;
 	uint64_t			computation_epoch;
-	integer_t			safe_mode;		/* saved mode during fail-safe */
-	natural_t			safe_release;	/* when to release fail-safe */
+	uint64_t			safe_release;	/* when to release fail-safe */
 
 	/* Call out from scheduler */
 	void				(*sched_call)(
 							int			type,
 							thread_t	thread);
-
+#if defined(CONFIG_SCHED_PROTO)
+	uint32_t			runqueue_generation;	/* last time runqueue was drained */
+#endif
+	
 	/* Statistics and timesharing calculations */
+#if defined(CONFIG_SCHED_TRADITIONAL)
 	natural_t			sched_stamp;	/* last scheduler tick */
 	natural_t			sched_usage;	/* timesharing cpu usage [sched] */
 	natural_t			pri_shift;		/* usage -> priority from pset */
 	natural_t			cpu_usage;		/* instrumented cpu usage [%cpu] */
 	natural_t			cpu_delta;		/* accumulated cpu_usage delta */
+#endif
 	uint32_t			c_switch;		/* total context switches */
 	uint32_t			p_switch;		/* total processor switches */
 	uint32_t			ps_switch;		/* total pset switches */
@@ -366,7 +389,20 @@ struct thread {
 	        clock_sec_t t_page_creation_time;
 
 		uint32_t t_chud;	/* CHUD flags, used for Shark */
+
+		integer_t mutex_count;	/* total count of locks held */
+
 		uint64_t thread_id;	/*system wide unique thread-id*/
+
+	/* Statistics accumulated per-thread and aggregated per-task */
+	uint32_t		syscalls_unix;
+	uint32_t		syscalls_mach;
+	zinfo_usage_store_t	tkm_private;	/* private kernel memory allocs/frees */
+	zinfo_usage_store_t	tkm_shared;	/* shared kernel memory allocs/frees */
+	struct process_policy ext_actionstate;	/* externally applied actions */
+	struct process_policy ext_policystate;	/* externally defined process policy states*/
+	struct process_policy actionstate;		/* self applied acions */
+	struct process_policy policystate;		/* process wide policy states */
 };
 
 #define ith_state		saved.receive.state
@@ -441,11 +477,15 @@ extern void			thread_release(
 extern void				stack_alloc(
 							thread_t		thread);
 
+extern void			stack_handoff(
+					      		thread_t		from,
+							thread_t		to);
+
 extern void				stack_free(
 							thread_t		thread);
 
-extern void				stack_free_stack(
-							vm_offset_t		stack);
+extern void				stack_free_reserved(
+							thread_t		thread);
 
 extern boolean_t		stack_alloc_try(
 							thread_t	    thread);
@@ -454,6 +494,7 @@ extern void				stack_collect(void);
 
 extern void				stack_init(void) __attribute__((section("__TEXT, initcode")));
 
+
 extern kern_return_t    thread_state_initialize(
 							thread_t				thread);
 
@@ -684,6 +725,22 @@ extern kern_return_t	thread_setsinglestep(
 						thread_t		thread,
 						int			on);
 
+extern kern_return_t	thread_userstack(
+						thread_t,
+						int,
+						thread_state_t,
+						unsigned int,
+						mach_vm_offset_t *,
+						int *);
+
+kern_return_t	thread_entrypoint(
+				thread_t,
+				int,
+				thread_state_t,
+				unsigned int,
+				mach_vm_offset_t *); 
+
+
 extern kern_return_t	thread_wire_internal(
 							host_priv_t		host_priv,
 							thread_t		thread,
@@ -775,6 +832,10 @@ extern kern_return_t	kernel_thread_start(
 							thread_continue_t	continuation,
 							void				*parameter,
 							thread_t			*new_thread);
+#ifdef KERNEL_PRIVATE
+void thread_set_eager_preempt(thread_t thread);
+void thread_clear_eager_preempt(thread_t thread);
+#endif /* KERNEL_PRIVATE */
 
 __END_DECLS
 
diff --git a/osfmk/kern/thread_act.c b/osfmk/kern/thread_act.c
index 8c18ffc30..455a0fb01 100644
--- a/osfmk/kern/thread_act.c
+++ b/osfmk/kern/thread_act.c
@@ -59,6 +59,7 @@
 #include <kern/ast.h>
 #include <kern/mach_param.h>
 #include <kern/zalloc.h>
+#include <kern/extmod_statistics.h>
 #include <kern/thread.h>
 #include <kern/task.h>
 #include <kern/sched_prim.h>
@@ -314,12 +315,12 @@ act_abort(
 
 	thread_lock(thread);
 
-	if (!(thread->sched_mode & TH_MODE_ABORT)) {
-		thread->sched_mode |= TH_MODE_ABORT;
+	if (!(thread->sched_flags & TH_SFLAG_ABORT)) {
+		thread->sched_flags |= TH_SFLAG_ABORT;
 		install_special_handler_locked(thread);
 	}
 	else
-		thread->sched_mode &= ~TH_MODE_ABORTSAFELY;
+		thread->sched_flags &= ~TH_SFLAG_ABORTSAFELY;
 
 	thread_unlock(thread);
 	splx(s);
@@ -365,8 +366,8 @@ thread_abort_safely(
 		thread_lock(thread);
 		if (!thread->at_safe_point ||
 				clear_wait_internal(thread, THREAD_INTERRUPTED) != KERN_SUCCESS) {
-			if (!(thread->sched_mode & TH_MODE_ABORT)) {
-				thread->sched_mode |= TH_MODE_ISABORTED;
+			if (!(thread->sched_flags & TH_SFLAG_ABORT)) {
+				thread->sched_flags |= TH_SFLAG_ABORTED_MASK;
 				install_special_handler_locked(thread);
 			}
 		}
@@ -460,12 +461,13 @@ thread_get_state(
  *	Change thread's machine-dependent state.  Called with nothing
  *	locked.  Returns same way.
  */
-kern_return_t
-thread_set_state(
+static kern_return_t
+thread_set_state_internal(
 	register thread_t		thread,
 	int						flavor,
 	thread_state_t			state,
-	mach_msg_type_number_t	state_count)
+	mach_msg_type_number_t	state_count,
+	boolean_t				from_user)
 {
 	kern_return_t		result = KERN_SUCCESS;
 
@@ -500,11 +502,41 @@ thread_set_state(
 	else
 		result = KERN_TERMINATED;
 
+	if ((result == KERN_SUCCESS) && from_user)
+		extmod_statistics_incr_thread_set_state(thread);
+
 	thread_mtx_unlock(thread);
 
 	return (result);
 }
+
+/* No prototype, since thread_act_server.h has the _from_user version if KERNEL_SERVER */ 
+kern_return_t
+thread_set_state(
+	register thread_t		thread,
+	int						flavor,
+	thread_state_t			state,
+	mach_msg_type_number_t	state_count);
+
+kern_return_t
+thread_set_state(
+	register thread_t		thread,
+	int						flavor,
+	thread_state_t			state,
+	mach_msg_type_number_t	state_count)
+{
+	return thread_set_state_internal(thread, flavor, state, state_count, FALSE);
+}
  
+kern_return_t
+thread_set_state_from_user(
+	register thread_t		thread,
+	int						flavor,
+	thread_state_t			state,
+	mach_msg_type_number_t	state_count)
+{
+	return thread_set_state_internal(thread, flavor, state, state_count, TRUE);
+}
  
 /*
  * Kernel-internal "thread" interfaces used outside this file:
@@ -672,8 +704,8 @@ install_special_handler_locked(
 	 * a chance to do locking required to
 	 * block itself in special_handler().
 	 */
-	if (thread->sched_mode & TH_MODE_ISDEPRESSED)
-		compute_priority(thread, TRUE);
+	if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)
+		SCHED(compute_priority)(thread, TRUE);
 
 	thread_ast_set(thread, AST_APC);
 
@@ -753,7 +785,7 @@ special_handler_continue(void)
 		spl_t			s = splsched();
 
 		thread_lock(thread);
-		if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
+		if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 			processor_t		myprocessor = thread->last_processor;
 
 			thread->sched_pri = DEPRESSPRI;
@@ -784,7 +816,7 @@ special_handler(
 
 	s = splsched();
 	thread_lock(thread);
-	thread->sched_mode &= ~TH_MODE_ISABORTED;
+	thread->sched_flags &= ~TH_SFLAG_ABORTED_MASK;
 	thread_unlock(thread);
 	splx(s);
 
@@ -816,6 +848,14 @@ special_handler(
 	thread_mtx_unlock(thread);
 }
 
+/* Prototype, see justification above */
+kern_return_t
+act_set_state(
+	thread_t				thread,
+	int						flavor,
+	thread_state_t			state,
+	mach_msg_type_number_t	count);
+
 kern_return_t
 act_set_state(
 	thread_t				thread,
@@ -830,6 +870,20 @@ act_set_state(
     
 }
 
+kern_return_t
+act_set_state_from_user(
+	thread_t				thread,
+	int						flavor,
+	thread_state_t			state,
+	mach_msg_type_number_t	count)
+{
+    if (thread == current_thread())
+	    return (KERN_INVALID_ARGUMENT);
+
+    return (thread_set_state_from_user(thread, flavor, state, count));
+    
+}
+
 kern_return_t
 act_get_state(
 	thread_t				thread,
diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c
index a50c6d7d3..93edbc489 100644
--- a/osfmk/kern/thread_call.c
+++ b/osfmk/kern/thread_call.c
@@ -46,13 +46,12 @@
 
 #include <sys/kdebug.h>
 
-decl_simple_lock_data(static,thread_call_lock)
 
 static zone_t		thread_call_zone;
 
 struct thread_call_group {
 	queue_head_t		pending_queue;
-	uint32_t			pending_count;
+	uint32_t		pending_count;
 
 	queue_head_t		delayed_queue;
 
@@ -60,7 +59,7 @@ struct thread_call_group {
 
 	struct wait_queue	idle_wqueue;
 	struct wait_queue	daemon_wqueue;
-	uint32_t			idle_count, active_count;
+	uint32_t		idle_count, active_count;
 };
 
 typedef struct thread_call_group	*thread_call_group_t;
@@ -113,13 +112,32 @@ static void		thread_call_daemon(
 				thread_call_thread(
 					thread_call_group_t		group);
 
-static void		thread_call_delayed_timer(
+extern void		thread_call_delayed_timer(
 					timer_call_param_t		p0,
 					timer_call_param_t		p1);
 
 #define qe(x)		((queue_entry_t)(x))
 #define TC(x)		((thread_call_t)(x))
 
+
+lck_grp_t               thread_call_queues_lck_grp;
+lck_grp_t               thread_call_lck_grp;
+lck_attr_t              thread_call_lck_attr;
+lck_grp_attr_t          thread_call_lck_grp_attr;
+
+#if defined(__i386__) || defined(__x86_64__)
+lck_mtx_t		thread_call_lock_data;
+#else
+lck_spin_t		thread_call_lock_data;
+#endif
+
+#define thread_call_lock_spin()			\
+	lck_mtx_lock_spin_always(&thread_call_lock_data)
+
+#define thread_call_unlock()			\
+	lck_mtx_unlock_always(&thread_call_lock_data)
+
+
 /*
  *	thread_call_initialize:
  *
@@ -129,7 +147,7 @@ static void		thread_call_delayed_timer(
 void
 thread_call_initialize(void)
 {
-    thread_call_t			call;
+	thread_call_t			call;
 	thread_call_group_t		group = &thread_call_group0;
 	kern_return_t			result;
 	thread_t				thread;
@@ -138,33 +156,42 @@ thread_call_initialize(void)
 
 	i = sizeof (thread_call_data_t);
 	thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
+	zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
 	zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
 
-    simple_lock_init(&thread_call_lock, 0);
+	lck_attr_setdefault(&thread_call_lck_attr);
+	lck_grp_attr_setdefault(&thread_call_lck_grp_attr);
+	lck_grp_init(&thread_call_queues_lck_grp, "thread_call_queues", &thread_call_lck_grp_attr);
+	lck_grp_init(&thread_call_lck_grp, "thread_call", &thread_call_lck_grp_attr);
 
-	s = splsched();
-	simple_lock(&thread_call_lock);
+#if defined(__i386__) || defined(__x86_64__)
+        lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
+#else
+        lck_spin_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
+#endif
+	queue_init(&group->pending_queue);
+	queue_init(&group->delayed_queue);
 
-    queue_init(&group->pending_queue);
-    queue_init(&group->delayed_queue);
+	s = splsched();
+	thread_call_lock_spin();
 
 	timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
 
 	wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
 	wait_queue_init(&group->daemon_wqueue, SYNC_POLICY_FIFO);
 
-    queue_init(&thread_call_internal_queue);
-    for (
+	queue_init(&thread_call_internal_queue);
+	for (
 	    	call = internal_call_storage;
 			call < &internal_call_storage[internal_call_count];
 			call++) {
 
 		enqueue_tail(&thread_call_internal_queue, qe(call));
-    }
+	}
 
 	thread_call_daemon_awake = TRUE;
 
-	simple_unlock(&thread_call_lock);
+	thread_call_unlock();
 	splx(s);
 
 	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, group, BASEPRI_PREEMPT + 1, &thread);
@@ -236,7 +263,7 @@ _pending_call_enqueue(
     thread_call_t		call,
 	thread_call_group_t	group)
 {
-	queue_t		old_queue;
+	queue_head_t		*old_queue;
 
 	old_queue = call_entry_enqueue_tail(call, &group->pending_queue);
 
@@ -261,9 +288,9 @@ static __inline__ boolean_t
 _delayed_call_enqueue(
     thread_call_t		call,
 	thread_call_group_t	group,
-	uint64_t			deadline)
+	uint64_t		deadline)
 {
-	queue_t			old_queue;
+	queue_head_t		*old_queue;
 
 	old_queue = call_entry_enqueue_deadline(call, &group->delayed_queue, deadline);
 
@@ -287,7 +314,7 @@ _call_dequeue(
 	thread_call_t		call,
 	thread_call_group_t	group)
 {
-	queue_t			old_queue;
+	queue_head_t		*old_queue;
 
 	old_queue = call_entry_dequeue(call);
 
@@ -310,7 +337,7 @@ _set_delayed_call_timer(
     thread_call_t		call,
 	thread_call_group_t	group)
 {
-    timer_call_enter(&group->delayed_timer, call->deadline);
+    timer_call_enter(&group->delayed_timer, call->deadline, 0);
 }
 
 /*
@@ -330,7 +357,7 @@ _remove_from_pending_queue(
     thread_call_param_t		param0,
     boolean_t				remove_all)
 {
-	boolean_t				call_removed = FALSE;
+	boolean_t			call_removed = FALSE;
 	thread_call_t			call;
 	thread_call_group_t		group = &thread_call_group0;
     
@@ -424,7 +451,7 @@ thread_call_func(
     spl_t					s;
     
     s = splsched();
-    simple_lock(&thread_call_lock);
+    thread_call_lock_spin();
     
     call = TC(queue_first(&group->pending_queue));
     
@@ -449,7 +476,7 @@ thread_call_func(
 			thread_call_wake(group);
     }
 
-	simple_unlock(&thread_call_lock);
+    thread_call_unlock();
     splx(s);
 }
 
@@ -472,7 +499,7 @@ thread_call_func_delayed(
     spl_t					s;
     
     s = splsched();
-    simple_lock(&thread_call_lock);
+    thread_call_lock_spin();
     
     call = _internal_call_allocate();
     call->func			= func;
@@ -484,7 +511,7 @@ thread_call_func_delayed(
     if (queue_first(&group->delayed_queue) == qe(call))
     	_set_delayed_call_timer(call, group);
     
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
     splx(s);
 }
 
@@ -510,7 +537,7 @@ thread_call_func_cancel(
     spl_t				s;
     
     s = splsched();
-    simple_lock(&thread_call_lock);
+    thread_call_lock_spin();
 
     if (cancel_all)
 		result = _remove_from_pending_queue(func, param, cancel_all) |
@@ -519,7 +546,7 @@ thread_call_func_cancel(
 		result = _remove_from_pending_queue(func, param, cancel_all) ||
 						_remove_from_delayed_queue(func, param, cancel_all);
     
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
     splx(s);
 
 	return (result);
@@ -554,16 +581,16 @@ thread_call_free(
     spl_t		s;
     
     s = splsched();
-    simple_lock(&thread_call_lock);
+    thread_call_lock_spin();
     
     if (call->queue != NULL) {
-    	simple_unlock(&thread_call_lock);
-		splx(s);
+	    thread_call_unlock();
+	    splx(s);
 
-		return (FALSE);
+	    return (FALSE);
     }
     
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
     splx(s);
     
 	zfree(thread_call_zone, call);
@@ -585,10 +612,10 @@ thread_call_enter(
 {
 	boolean_t				result = TRUE;
 	thread_call_group_t		group = &thread_call_group0;
-    spl_t					s;
+	spl_t					s;
     
-    s = splsched();
-    simple_lock(&thread_call_lock);
+	s = splsched();
+	thread_call_lock_spin();
     
     if (call->queue != &group->pending_queue) {
     	result = _pending_call_enqueue(call, group);
@@ -599,8 +626,8 @@ thread_call_enter(
 
 	call->param1 = 0;
 
-	simple_unlock(&thread_call_lock);
-    splx(s);
+	thread_call_unlock();
+	splx(s);
 
 	return (result);
 }
@@ -612,10 +639,10 @@ thread_call_enter1(
 {
 	boolean_t				result = TRUE;
 	thread_call_group_t		group = &thread_call_group0;
-    spl_t					s;
+	spl_t					s;
     
-    s = splsched();
-    simple_lock(&thread_call_lock);
+	s = splsched();
+	thread_call_lock_spin();
     
     if (call->queue != &group->pending_queue) {
     	result = _pending_call_enqueue(call, group);
@@ -626,8 +653,8 @@ thread_call_enter1(
 
 	call->param1 = param1;
 
-	simple_unlock(&thread_call_lock);
-    splx(s);
+	thread_call_unlock();
+	splx(s);
 
 	return (result);
 }
@@ -648,10 +675,10 @@ thread_call_enter_delayed(
 {
 	boolean_t				result = TRUE;
 	thread_call_group_t		group = &thread_call_group0;
-    spl_t					s;
+	spl_t					s;
 
-    s = splsched();
-    simple_lock(&thread_call_lock);
+	s = splsched();
+	thread_call_lock_spin();
 
 	result = _delayed_call_enqueue(call, group, deadline);
 
@@ -660,8 +687,8 @@ thread_call_enter_delayed(
 
 	call->param1 = 0;
 
-    simple_unlock(&thread_call_lock);
-    splx(s);
+	thread_call_unlock();
+	splx(s);
 
 	return (result);
 }
@@ -674,10 +701,10 @@ thread_call_enter1_delayed(
 {
 	boolean_t				result = TRUE;
 	thread_call_group_t		group = &thread_call_group0;
-    spl_t					s;
+	spl_t					s;
 
-    s = splsched();
-    simple_lock(&thread_call_lock);
+	s = splsched();
+	thread_call_lock_spin();
 
 	result = _delayed_call_enqueue(call, group, deadline);
 
@@ -686,8 +713,8 @@ thread_call_enter1_delayed(
 
 	call->param1 = param1;
 
-    simple_unlock(&thread_call_lock);
-    splx(s);
+	thread_call_unlock();
+	splx(s);
 
 	return (result);
 }
@@ -706,15 +733,15 @@ thread_call_cancel(
 {
 	boolean_t				result;
 	thread_call_group_t		group = &thread_call_group0;
-    spl_t					s;
+	spl_t					s;
     
-    s = splsched();
-    simple_lock(&thread_call_lock);
+	s = splsched();
+	thread_call_lock_spin();
 
 	result = _call_dequeue(call, group);
 	
-    simple_unlock(&thread_call_lock);
-    splx(s);
+	thread_call_unlock();
+	splx(s);
 
 	return (result);
 }
@@ -739,7 +766,7 @@ thread_call_is_delayed(
 	spl_t					s;
 
 	s = splsched();
-	simple_lock(&thread_call_lock);
+	thread_call_lock_spin();
 
 	if (call->queue == &group->delayed_queue) {
 		if (deadline != NULL)
@@ -747,7 +774,7 @@ thread_call_is_delayed(
 		result = TRUE;
 	}
 
-	simple_unlock(&thread_call_lock);
+	thread_call_unlock();
 	splx(s);
 
 	return (result);
@@ -769,13 +796,13 @@ static __inline__ void
 thread_call_wake(
 	thread_call_group_t		group)
 {
-	if (group->idle_count > 0 && wait_queue_wakeup_one(&group->idle_wqueue, NULL, THREAD_AWAKENED) == KERN_SUCCESS) {
+	if (group->idle_count > 0 && wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
 		group->idle_count--; group->active_count++;
 	}
 	else
 	if (!thread_call_daemon_awake) {
 		thread_call_daemon_awake = TRUE;
-		wait_queue_wakeup_one(&group->daemon_wqueue, NULL, THREAD_AWAKENED);
+		wait_queue_wakeup_one(&group->daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
 	}
 }
 
@@ -791,7 +818,7 @@ __unused	thread_t		thread)
 {
 	thread_call_group_t		group = &thread_call_group0;
 
-	simple_lock(&thread_call_lock);
+	thread_call_lock_spin();
 
 	switch (type) {
 
@@ -805,7 +832,7 @@ __unused	thread_t		thread)
 		break;
 	}
 
-	simple_unlock(&thread_call_lock);
+	thread_call_unlock();
 }
 
 /*
@@ -817,8 +844,8 @@ thread_call_thread(
 {
 	thread_t		self = current_thread();
 
-    (void) splsched();
-    simple_lock(&thread_call_lock);
+	(void) splsched();
+	thread_call_lock_spin();
 
 	thread_sched_call(self, sched_call_thread);
 
@@ -838,7 +865,7 @@ thread_call_thread(
 
 		_internal_call_release(call);
 
-		simple_unlock(&thread_call_lock);
+		thread_call_unlock();
 		(void) spllo();
 
 		KERNEL_DEBUG_CONSTANT(
@@ -847,10 +874,16 @@ thread_call_thread(
 
 		(*func)(param0, param1);
 
+		if (get_preemption_level() != 0) {
+			int pl = get_preemption_level();
+			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
+				  pl, func, param0, param1);
+		}
+		
 		(void)thread_funnel_set(self->funnel_lock, FALSE);		/* XXX */
 
 		(void) splsched();
-		simple_lock(&thread_call_lock);
+		thread_call_lock_spin();
     }
 
 	thread_sched_call(self, NULL);
@@ -859,16 +892,16 @@ thread_call_thread(
     if (group->idle_count < thread_call_thread_min) {
 		group->idle_count++;
 
-		wait_queue_assert_wait(&group->idle_wqueue, NULL, THREAD_UNINT, 0);
+		wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0);
 	
-		simple_unlock(&thread_call_lock);
+		thread_call_unlock();
 		(void) spllo();
 
 		thread_block_parameter((thread_continue_t)thread_call_thread, group);
 		/* NOTREACHED */
     }
 
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
     (void) spllo();
     
     thread_terminate(self);
@@ -886,12 +919,12 @@ thread_call_daemon_continue(
 	thread_t		thread;
 
     (void) splsched();
-    simple_lock(&thread_call_lock);
+    thread_call_lock_spin();
         
 	while (group->active_count == 0	&& group->pending_count > 0) {
 		group->active_count++;
 
-		simple_unlock(&thread_call_lock);
+		thread_call_unlock();
 		(void) spllo();
 	
 		result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, BASEPRI_PREEMPT, &thread);
@@ -901,13 +934,13 @@ thread_call_daemon_continue(
 		thread_deallocate(thread);
 
 		(void) splsched();
-		simple_lock(&thread_call_lock);
+		thread_call_lock_spin();
     }
 
     thread_call_daemon_awake = FALSE;
-    wait_queue_assert_wait(&group->daemon_wqueue, NULL, THREAD_UNINT, 0);
+    wait_queue_assert_wait(&group->daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
     
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
 	(void) spllo();
     
 	thread_block_parameter((thread_continue_t)thread_call_daemon_continue, group);
@@ -927,7 +960,7 @@ thread_call_daemon(
     /* NOTREACHED */
 }
 
-static void
+void
 thread_call_delayed_timer(
 	timer_call_param_t				p0,
 	__unused timer_call_param_t		p1
@@ -938,7 +971,7 @@ thread_call_delayed_timer(
 	boolean_t				new_pending = FALSE;
 	uint64_t				timestamp;
 
-    simple_lock(&thread_call_lock);
+	thread_call_lock_spin();
 
 	timestamp = mach_absolute_time();
     
@@ -961,5 +994,5 @@ thread_call_delayed_timer(
     if (new_pending && group->active_count == 0)
 		thread_call_wake(group);
 
-    simple_unlock(&thread_call_lock);
+    thread_call_unlock();
 }
diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c
index 58028df2d..7ed70a151 100644
--- a/osfmk/kern/thread_policy.c
+++ b/osfmk/kern/thread_policy.c
@@ -38,6 +38,14 @@ static void
 thread_recompute_priority(
 	thread_t		thread);
 
+#if CONFIG_EMBEDDED
+static void
+thread_throttle(
+	thread_t		thread,
+	integer_t		task_priority);
+
+extern int mach_do_background_thread(thread_t thread, int prio);
+#endif
 
 
 kern_return_t
@@ -86,37 +94,40 @@ thread_policy_set_internal(
 			timeshare = info->timeshare;
 		}
 
+		if (!SCHED(supports_timeshare_mode)())
+			timeshare = FALSE;
+		
 		s = splsched();
 		thread_lock(thread);
 
-		if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
-			integer_t	oldmode = (thread->sched_mode & TH_MODE_TIMESHARE);
+		if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
+			integer_t	oldmode = (thread->sched_mode == TH_MODE_TIMESHARE);
 
-			thread->sched_mode &= ~TH_MODE_REALTIME;
+			if (timeshare) {
+				thread->sched_mode = TH_MODE_TIMESHARE;
 
-			if (timeshare && !oldmode) {
-				thread->sched_mode |= TH_MODE_TIMESHARE;
-
-				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
-					sched_share_incr();
+				if (!oldmode) {
+					if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+						sched_share_incr();
+				}
 			}
-			else
-			if (!timeshare && oldmode) {
-				thread->sched_mode &= ~TH_MODE_TIMESHARE;
+			else {
+				thread->sched_mode = TH_MODE_FIXED;
 
-				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
-					sched_share_decr();
+				if (oldmode) {
+					if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+						sched_share_decr();
+				}
 			}
 
 			thread_recompute_priority(thread);
 		}
 		else {
-			thread->safe_mode &= ~TH_MODE_REALTIME;
 
 			if (timeshare)
-				thread->safe_mode |= TH_MODE_TIMESHARE;
+				thread->saved_mode = TH_MODE_TIMESHARE;
 			else
-				thread->safe_mode &= ~TH_MODE_TIMESHARE;
+				thread->saved_mode = TH_MODE_FIXED;
 		}
 
 		thread_unlock(thread);
@@ -150,20 +161,23 @@ thread_policy_set_internal(
 		thread->realtime.constraint = info->constraint;
 		thread->realtime.preemptible = info->preemptible;
 
-		if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
-			if (thread->sched_mode & TH_MODE_TIMESHARE) {
-				thread->sched_mode &= ~TH_MODE_TIMESHARE;
-
+		if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
+			thread->saved_mode = TH_MODE_REALTIME;
+		}
+#if CONFIG_EMBEDDED
+		else if (thread->task_priority <= MAXPRI_THROTTLE) {
+			thread->saved_mode = TH_MODE_REALTIME;
+			thread->sched_flags |= TH_SFLAG_THROTTLED;		
+		}
+#endif
+		else {
+			if (thread->sched_mode == TH_MODE_TIMESHARE) {
 				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 					sched_share_decr();
 			}
-			thread->sched_mode |= TH_MODE_REALTIME;
+			thread->sched_mode = TH_MODE_REALTIME;
 			thread_recompute_priority(thread);
 		}
-		else {
-			thread->safe_mode &= ~TH_MODE_TIMESHARE;
-			thread->safe_mode |= TH_MODE_REALTIME;
-		}
 
 		thread_unlock(thread);
 		splx(s);
@@ -217,6 +231,19 @@ thread_policy_set_internal(
 		thread_mtx_unlock(thread);
 		return thread_affinity_set(thread, info->affinity_tag);
 	}
+
+#if CONFIG_EMBEDDED
+	case THREAD_BACKGROUND_POLICY:
+	{
+		thread_background_policy_t	info;
+
+		info = (thread_background_policy_t) policy_info;
+
+		thread_mtx_unlock(thread);
+		return mach_do_background_thread(thread, info->priority);
+	}
+#endif /* CONFIG_EMBEDDED */
+
 	default:
 		result = KERN_INVALID_ARGUMENT;
 		break;
@@ -232,7 +259,7 @@ thread_recompute_priority(
 {
 	integer_t		priority;
 
-	if (thread->sched_mode & TH_MODE_REALTIME)
+	if (thread->sched_mode == TH_MODE_REALTIME)
 		priority = BASEPRI_RTQUEUES;
 	else {
 		if (thread->importance > MAXPRI)
@@ -250,11 +277,75 @@ thread_recompute_priority(
 		else
 		if (priority < MINPRI)
 			priority = MINPRI;
+#if CONFIG_EMBEDDED
+		/* No one can have a base priority less than MAXPRI_THROTTLE */
+		if (priority < MAXPRI_THROTTLE) 
+			priority = MAXPRI_THROTTLE;
+#endif /* CONFIG_EMBEDDED */
 	}
 
 	set_priority(thread, priority);
 }
 
+#if CONFIG_EMBEDDED
+static void
+thread_throttle(
+	thread_t		thread,
+	integer_t		task_priority)
+{
+	if (!(thread->sched_flags & TH_SFLAG_THROTTLED) && 
+		(task_priority <= MAXPRI_THROTTLE)) {
+
+		if (!((thread->sched_mode == TH_MODE_REALTIME) ||
+			  (thread->saved_mode == TH_MODE_REALTIME))) {
+			return;
+		}
+
+		/* Demote to timeshare if throttling */
+		if (thread->sched_mode == TH_MODE_REALTIME)		
+		{
+			thread->saved_mode = TH_MODE_REALTIME;
+
+			if (thread->sched_mode == TH_MODE_TIMESHARE) {
+				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+					sched_share_incr();
+			}
+		}
+
+		/* TH_SFLAG_FAILSAFE and TH_SFLAG_THROTTLED are mutually exclusive,
+		 * since a throttled thread is not realtime during the throttle
+		 * and doesn't need the failsafe repromotion. We therefore clear
+		 * the former and set the latter flags here.
+		 */
+		thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
+		thread->sched_flags |= TH_SFLAG_THROTTLED;
+		
+		if (SCHED(supports_timeshare_mode)())
+			thread->sched_mode = TH_MODE_TIMESHARE;
+		else
+			thread->sched_mode = TH_MODE_FIXED;
+	}
+	else if ((thread->sched_flags & TH_SFLAG_THROTTLED) &&
+			 (task_priority > MAXPRI_THROTTLE)) {
+
+		/* Promote back to real time if unthrottling */
+		if (!(thread->saved_mode == TH_MODE_TIMESHARE)) {
+
+			thread->sched_mode = thread->saved_mode;
+
+			if (thread->sched_mode == TH_MODE_TIMESHARE) {
+				if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
+					sched_share_decr();
+			}
+			
+			thread->saved_mode = TH_MODE_NONE;
+		}
+
+		thread->sched_flags &= ~TH_SFLAG_THROTTLED;
+	}	
+}
+#endif
+
 void
 thread_task_priority(
 	thread_t		thread,
@@ -268,6 +359,10 @@ thread_task_priority(
 	s = splsched();
 	thread_lock(thread);
 
+#if CONFIG_EMBEDDED
+	thread_throttle(thread, priority);
+#endif
+
 	thread->task_priority = priority;
 	thread->max_priority = max_priority;
 
@@ -286,19 +381,20 @@ thread_policy_reset(
 	s = splsched();
 	thread_lock(thread);
 
-	if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
-		thread->sched_mode &= ~TH_MODE_REALTIME;
+	if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
+		sched_mode_t oldmode = thread->sched_mode;
+		
+		thread->sched_mode = SCHED(initial_thread_sched_mode)(thread->task);
 
-		if (!(thread->sched_mode & TH_MODE_TIMESHARE)) {
-			thread->sched_mode |= TH_MODE_TIMESHARE;
+		if ((oldmode != TH_MODE_TIMESHARE) && (thread->sched_mode == TH_MODE_TIMESHARE)) {
 
 			if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 				sched_share_incr();
 		}
 	}
 	else {
-		thread->safe_mode = 0;
-		thread->sched_mode &= ~TH_MODE_FAILSAFE;
+		thread->saved_mode = TH_MODE_NONE;
+		thread->sched_flags &= ~TH_SFLAG_DEMOTED_MASK;
 	}
 
 	thread->importance = 0;
@@ -340,12 +436,12 @@ thread_policy_get(
 			s = splsched();
 			thread_lock(thread);
 
-			if (	!(thread->sched_mode & TH_MODE_REALTIME)	&&
-					!(thread->safe_mode & TH_MODE_REALTIME)			) {
-				if (!(thread->sched_mode & TH_MODE_FAILSAFE))
-					timeshare = (thread->sched_mode & TH_MODE_TIMESHARE) != 0;
+			if (	 (thread->sched_mode != TH_MODE_REALTIME)	&&
+					 (thread->saved_mode != TH_MODE_REALTIME)			) {
+				if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
+					timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
 				else
-					timeshare = (thread->safe_mode & TH_MODE_TIMESHARE) != 0;
+					timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
 			}
 			else
 				*get_default = TRUE;
@@ -379,8 +475,8 @@ thread_policy_get(
 			s = splsched();
 			thread_lock(thread);
 
-			if (	(thread->sched_mode & TH_MODE_REALTIME)	||
-					(thread->safe_mode & TH_MODE_REALTIME)		) {
+			if (	(thread->sched_mode == TH_MODE_REALTIME)	||
+					(thread->saved_mode == TH_MODE_REALTIME)		) {
 				info->period = thread->realtime.period;
 				info->computation = thread->realtime.computation;
 				info->constraint = thread->realtime.constraint;
@@ -395,8 +491,8 @@ thread_policy_get(
 
 		if (*get_default) {
 			info->period = 0;
-			info->computation = std_quantum / 2;
-			info->constraint = std_quantum;
+			info->computation = default_timeshare_computation;
+			info->constraint = default_timeshare_constraint;
 			info->preemptible = TRUE;
 		}
 
diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c
index 74c4534a2..83eb0e43a 100644
--- a/osfmk/kern/timer_call.c
+++ b/osfmk/kern/timer_call.c
@@ -44,235 +44,432 @@
 #include <mach/sdt.h>
 #endif
 
-decl_simple_lock_data(static,timer_call_lock)
 
-#define qe(x)		((queue_entry_t)(x))
-#define TC(x)		((timer_call_t)(x))
+#if DEBUG
+#define TIMER_ASSERT	1
+#endif
+
+//#define TIMER_ASSERT	1
+//#define TIMER_DBG	1
+
+#if TIMER_DBG
+#define DBG(x...) kprintf("DBG: " x);
+#else
+#define DBG(x...)
+#endif
+
+lck_grp_t               timer_call_lck_grp;
+lck_attr_t              timer_call_lck_attr;
+lck_grp_attr_t          timer_call_lck_grp_attr;
+
+
+#define timer_call_lock_spin(queue)		\
+	lck_mtx_lock_spin_always(&queue->lock_data)
+
+#define timer_call_unlock(queue)		\
+	lck_mtx_unlock_always(&queue->lock_data)
+
+
+#define QUEUE(x)	((queue_t)(x))
+#define MPQUEUE(x)	((mpqueue_head_t *)(x))
+#define TIMER_CALL(x)	((timer_call_t)(x))
+
+static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint32_t flags);
+boolean_t 	mach_timer_coalescing_enabled = TRUE;
+
+mpqueue_head_t	*timer_call_enqueue_deadline_unlocked(
+			timer_call_t		call,
+			mpqueue_head_t		*queue,
+			uint64_t		deadline);
+
+mpqueue_head_t	*timer_call_dequeue_unlocked(
+			timer_call_t 		call);
+
 
 void
 timer_call_initialize(void)
 {
-	simple_lock_init(&timer_call_lock, 0);
+	lck_attr_setdefault(&timer_call_lck_attr);
+	lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
+	lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
 }
 
+
+void
+timer_call_initialize_queue(mpqueue_head_t *queue)
+{
+	DBG("timer_call_initialize_queue(%p)\n", queue);
+	mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr);
+}
+
+
 void
 timer_call_setup(
 	timer_call_t			call,
 	timer_call_func_t		func,
 	timer_call_param_t		param0)
 {
-	call_entry_setup(call, func, param0);
+	DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
+	call_entry_setup(CE(call), func, param0);
+	simple_lock_init(&(call)->lock, 0);
+	call->async_dequeue = FALSE;
 }
 
-__inline__ queue_t
-call_entry_enqueue_deadline(
-	call_entry_t		entry,
-	queue_t				queue,
-	uint64_t			deadline)
-{
-	queue_t			old_queue = entry->queue;
-	timer_call_t	current;
-
-	if (old_queue != queue || entry->deadline < deadline) {
-		if (old_queue != queue)
-			current = TC(queue_first(queue));
-		else
-			current = TC(queue_next(qe(entry)));
-
-		if (old_queue != NULL)
-			(void)remque(qe(entry));
+/*
+ * Timer call entry locking model
+ * ==============================
+ *
+ * Timer call entries are linked on per-cpu timer queues which are protected
+ * by the queue lock and the call entry lock. The locking protocol is:
+ *
+ *  0) The canonical locking order is timer call entry followed by queue.
+ *
+ *  1) With only the entry lock held, entry.queue is valid:
+ *    1a) NULL: the entry is not queued, or
+ *    1b) non-NULL: this queue must be locked before the entry is modified.
+ *        After locking the queue, the call.async_dequeue flag must be checked:
+ *    1c) TRUE: the entry was removed from the queue by another thread
+ *	        and we must NULL the entry.queue and reset this flag, or
+ *    1d) FALSE: (ie. queued), the entry can be manipulated.
+ *
+ *  2) If a queue lock is obtained first, the queue is stable:
+ *    2a) If a try-lock of a queued entry succeeds, the call can be operated on
+ *	  and dequeued.
+ *    2b) If a try-lock fails, it indicates that another thread is attempting
+ *        to change the entry and move it to a different position in this queue
+ *        or to different queue. The entry can be dequeued but it should not be
+ *        operated upon since it is being changed. Furthermore, we don't null
+ *	  the entry.queue pointer (protected by the entry lock we don't own).
+ *	  Instead, we set the async_dequeue flag -- see (1c).
+ */
 
-		while (TRUE) {
-			if (	queue_end(queue, qe(current))		||
-					deadline < current->deadline		) {
-				current = TC(queue_prev(qe(current)));
-				break;
-			}
+/*
+ * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline()
+ * cast between pointer types (mpqueue_head_t *) and (queue_t) so that
+ * we can use the call_entry_dequeue() and call_entry_enqueue_deadline()
+ * methods to operate on timer_call structs as if they are call_entry structs.
+ * These structures are identical except for their queue head pointer fields.
+ *
+ * In the debug case, we assert that the timer call locking protocol 
+ * is being obeyed.
+ */
+#if TIMER_ASSERT
+static __inline__ mpqueue_head_t *
+timer_call_entry_dequeue(
+	timer_call_t		entry)
+{
+        mpqueue_head_t	*old_queue = MPQUEUE(CE(entry)->queue);
+
+	if (!hw_lock_held((hw_lock_t)&entry->lock))
+		panic("_call_entry_dequeue() "
+			"entry %p is not locked\n", entry);
+	/*
+	 * XXX The queue lock is actually a mutex in spin mode
+	 *     but there's no way to test for it being held
+	 *     so we pretend it's a spinlock!
+	 */
+	if (!hw_lock_held((hw_lock_t)&old_queue->lock_data))
+		panic("_call_entry_dequeue() "
+			"queue %p is not locked\n", old_queue);
+
+	call_entry_dequeue(CE(entry));
 
-			current = TC(queue_next(qe(current)));
-		}
+	return (old_queue);
+}
 
-		insque(qe(entry), qe(current));
-	}
-	else
-	if (deadline < entry->deadline) {
-		current = TC(queue_prev(qe(entry)));
+static __inline__ mpqueue_head_t *
+timer_call_entry_enqueue_deadline(
+	timer_call_t		entry,
+	mpqueue_head_t		*queue,
+	uint64_t		deadline)
+{
+	mpqueue_head_t	*old_queue = MPQUEUE(CE(entry)->queue);
 
-		(void)remque(qe(entry));
+	if (!hw_lock_held((hw_lock_t)&entry->lock))
+		panic("_call_entry_enqueue_deadline() "
+			"entry %p is not locked\n", entry);
+	/* XXX More lock pretense:  */
+	if (!hw_lock_held((hw_lock_t)&queue->lock_data))
+		panic("_call_entry_enqueue_deadline() "
+			"queue %p is not locked\n", queue);
+	if (old_queue != NULL && old_queue != queue)
+		panic("_call_entry_enqueue_deadline() "
+			"old_queue %p != queue", old_queue);
 
-		while (TRUE) {
-			if (	queue_end(queue, qe(current))		||
-					current->deadline <= deadline		) {
-				break;
-			}
+	call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline);
 
-			current = TC(queue_prev(qe(current)));
-		}
+	return (old_queue);
+}
 
-		insque(qe(entry), qe(current));
-	}
+#else
 
-	entry->queue = queue;
-	entry->deadline = deadline;
+static __inline__ mpqueue_head_t *
+timer_call_entry_dequeue(
+	timer_call_t		entry)
+{
+	return MPQUEUE(call_entry_dequeue(CE(entry)));
+}
 
-	return (old_queue);
+static __inline__ mpqueue_head_t *
+timer_call_entry_enqueue_deadline(
+	timer_call_t			entry,
+	mpqueue_head_t			*queue,
+	uint64_t			deadline)
+{
+	return MPQUEUE(call_entry_enqueue_deadline(CE(entry),
+						   QUEUE(queue), deadline));
 }
 
-__inline__ queue_t
-call_entry_enqueue_tail(
-	call_entry_t		entry,
-	queue_t				queue)
+#endif
+
+#if TIMER_ASSERT
+unsigned timer_call_enqueue_deadline_unlocked_async1;
+unsigned timer_call_enqueue_deadline_unlocked_async2;
+#endif
+/*
+ * Assumes call_entry and queues unlocked, interrupts disabled.
+ */
+__inline__ mpqueue_head_t *
+timer_call_enqueue_deadline_unlocked(
+	timer_call_t 			call,
+	mpqueue_head_t			*queue,
+	uint64_t			deadline)
 {
-	queue_t			old_queue = entry->queue;
+	call_entry_t	entry = CE(call);
+	mpqueue_head_t	*old_queue;
 
-	if (old_queue != NULL)
-		(void)remque(qe(entry));
+	DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
 
-	enqueue_tail(queue, qe(entry));
+	simple_lock(&call->lock);
+	old_queue = MPQUEUE(entry->queue);
+	if (old_queue != NULL) {
+		timer_call_lock_spin(old_queue);
+		if (call->async_dequeue) {
+			/* collision (1c): null queue pointer and reset flag */
+			call->async_dequeue = FALSE;
+			entry->queue = NULL;
+#if TIMER_ASSERT
+			timer_call_enqueue_deadline_unlocked_async1++;
+#endif
+		} else if (old_queue != queue) {
+			(void)remque(qe(entry));
+			entry->queue = NULL;
+#if TIMER_ASSERT
+			timer_call_enqueue_deadline_unlocked_async2++;
+#endif
+		}
+		if (old_queue != queue) {
+			timer_call_unlock(old_queue);
+			timer_call_lock_spin(queue);
+		}
+	} else {
+		timer_call_lock_spin(queue);
+	}
 
-	entry->queue = queue;
+	timer_call_entry_enqueue_deadline(call, queue, deadline);
+	timer_call_unlock(queue);
+	simple_unlock(&call->lock);
 
 	return (old_queue);
 }
 
-__inline__ queue_t
-call_entry_dequeue(
-	call_entry_t		entry)
+#if TIMER_ASSERT
+unsigned timer_call_dequeue_unlocked_async1;
+unsigned timer_call_dequeue_unlocked_async2;
+#endif
+mpqueue_head_t *
+timer_call_dequeue_unlocked(
+	timer_call_t 		call)
 {
-	queue_t			old_queue = entry->queue;
+	call_entry_t	entry = CE(call);
+	mpqueue_head_t	*old_queue;
 
-	if (old_queue != NULL)
-		(void)remque(qe(entry));
-
-	entry->queue = NULL;
+	DBG("timer_call_dequeue_unlocked(%p)\n", call);
 
+	simple_lock(&call->lock);
+	old_queue = MPQUEUE(entry->queue);
+	if (old_queue != NULL) {
+		timer_call_lock_spin(old_queue);
+		if (call->async_dequeue) {
+			/* collision (1c): null queue pointer and reset flag */
+			call->async_dequeue = FALSE;
+#if TIMER_ASSERT
+			timer_call_dequeue_unlocked_async1++;
+#endif
+		} else {
+			(void)remque(qe(entry));
+#if TIMER_ASSERT
+			timer_call_dequeue_unlocked_async2++;
+#endif
+		}
+		entry->queue = NULL;
+		timer_call_unlock(old_queue);
+	}
+	simple_unlock(&call->lock);
 	return (old_queue);
 }
 
-boolean_t
-timer_call_enter(
-	timer_call_t		call,
-	uint64_t			deadline)
+static boolean_t 
+timer_call_enter_internal(
+	timer_call_t 		call,
+	timer_call_param_t	param1,
+	uint64_t 		deadline,
+	uint32_t 		flags)
 {
-	queue_t			queue, old_queue;
+	mpqueue_head_t		*queue;
+	mpqueue_head_t		*old_queue;
 	spl_t			s;
+	uint64_t 		slop = 0;
 
 	s = splclock();
-	simple_lock(&timer_call_lock);
+
+	call->soft_deadline = deadline;
+	call->flags = flags;
+
+	if ((flags & TIMER_CALL_CRITICAL) == 0 &&
+	     mach_timer_coalescing_enabled) {
+		slop = timer_call_slop(deadline);
+		deadline += slop;
+	}
 
 	queue = timer_queue_assign(deadline);
 
-	old_queue = call_entry_enqueue_deadline(call, queue, deadline);
+	old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline);
 
-	call->param1 = NULL;
+	CE(call)->param1 = param1;
 
-	simple_unlock(&timer_call_lock);
 	splx(s);
 
 	return (old_queue != NULL);
 }
 
+boolean_t
+timer_call_enter(
+	timer_call_t		call,
+	uint64_t		deadline,
+	uint32_t		flags)
+{
+	return timer_call_enter_internal(call, NULL, deadline, flags);
+}
+
 boolean_t
 timer_call_enter1(
 	timer_call_t		call,
 	timer_call_param_t	param1,
-	uint64_t			deadline)
+	uint64_t		deadline,
+	uint32_t		flags)
 {
-	queue_t			queue, old_queue;
-	spl_t			s;
-
-	s = splclock();
-	simple_lock(&timer_call_lock);
-
-	queue = timer_queue_assign(deadline);
-
-	old_queue = call_entry_enqueue_deadline(call, queue, deadline);
-
-	call->param1 = param1;
-
-	simple_unlock(&timer_call_lock);
-	splx(s);
-
-	return (old_queue != NULL);
+	return timer_call_enter_internal(call, param1, deadline, flags);
 }
 
 boolean_t
 timer_call_cancel(
 	timer_call_t		call)
 {
-	queue_t			old_queue;
+	mpqueue_head_t		*old_queue;
 	spl_t			s;
 
 	s = splclock();
-	simple_lock(&timer_call_lock);
 
-	old_queue = call_entry_dequeue(call);
+	old_queue = timer_call_dequeue_unlocked(call);
 
 	if (old_queue != NULL) {
-		if (!queue_empty(old_queue))
-			timer_queue_cancel(old_queue, call->deadline, TC(queue_first(old_queue))->deadline);
+		timer_call_lock_spin(old_queue);
+		if (!queue_empty(&old_queue->head))
+			timer_queue_cancel(old_queue, CE(call)->deadline, CE(queue_first(&old_queue->head))->deadline);
 		else
-			timer_queue_cancel(old_queue, call->deadline, UINT64_MAX);
+			timer_queue_cancel(old_queue, CE(call)->deadline, UINT64_MAX);
+		timer_call_unlock(old_queue);
 	}
-
-	simple_unlock(&timer_call_lock);
 	splx(s);
 
 	return (old_queue != NULL);
 }
 
+uint32_t	timer_queue_shutdown_lock_skips;
 void
 timer_queue_shutdown(
-	queue_t			queue)
+	mpqueue_head_t		*queue)
 {
-	timer_call_t	call;
-	queue_t			new_queue;
+	timer_call_t		call;
+	mpqueue_head_t		*new_queue;
 	spl_t			s;
 
+	DBG("timer_queue_shutdown(%p)\n", queue);
+
 	s = splclock();
-	simple_lock(&timer_call_lock);
 
-	call = TC(queue_first(queue));
+	/* Note comma operator in while expression re-locking each iteration */
+	while (timer_call_lock_spin(queue), !queue_empty(&queue->head)) {
+		call = TIMER_CALL(queue_first(&queue->head));
+		if (!simple_lock_try(&call->lock)) {
+			/*
+			 * case (2b) lock order inversion, dequeue and skip
+			 * Don't change the call_entry queue back-pointer
+			 * but set the async_dequeue field.
+			 */
+			timer_queue_shutdown_lock_skips++;
+			(void) remque(qe(call));
+			call->async_dequeue = TRUE;
+			timer_call_unlock(queue);
+			continue;
+		}
 
-	while (!queue_end(queue, qe(call))) {
-		new_queue = timer_queue_assign(call->deadline);
+		/* remove entry from old queue */
+		timer_call_entry_dequeue(call);
+		timer_call_unlock(queue);
 
-		call_entry_enqueue_deadline(call, new_queue, call->deadline);
+		/* and queue it on new */
+		new_queue = timer_queue_assign(CE(call)->deadline);
+		timer_call_lock_spin(new_queue);
+		timer_call_entry_enqueue_deadline(
+			call, new_queue, CE(call)->deadline);
+		timer_call_unlock(new_queue);
 
-		call = TC(queue_first(queue));
+		simple_unlock(&call->lock);
 	}
 
-	simple_unlock(&timer_call_lock);
+	timer_call_unlock(queue);
 	splx(s);
 }
 
+uint32_t	timer_queue_expire_lock_skips;
 uint64_t
 timer_queue_expire(
-	queue_t			queue,
+	mpqueue_head_t		*queue,
 	uint64_t		deadline)
 {
 	timer_call_t	call;
 
-	simple_lock(&timer_call_lock);
+	DBG("timer_queue_expire(%p,)\n", queue);
+
+	timer_call_lock_spin(queue);
 
-	call = TC(queue_first(queue));
+	while (!queue_empty(&queue->head)) {
+		call = TIMER_CALL(queue_first(&queue->head));
 
-	while (!queue_end(queue, qe(call))) {
-		if (call->deadline <= deadline) {
+		if (call->soft_deadline <= deadline) {
 			timer_call_func_t		func;
 			timer_call_param_t		param0, param1;
 
-			call_entry_dequeue(call);
+			if (!simple_lock_try(&call->lock)) {
+				/* case (2b) lock inversion, dequeue and skip */
+				timer_queue_expire_lock_skips++;
+				(void) remque(qe(call));
+				call->async_dequeue = TRUE;
+				continue;
+			}
+
+			timer_call_entry_dequeue(call);
 
-			func = call->func;
-			param0 = call->param0;
-			param1 = call->param1;
+			func = CE(call)->func;
+			param0 = CE(call)->param0;
+			param1 = CE(call)->param1;
 
-			simple_unlock(&timer_call_lock);
+			simple_unlock(&call->lock);
+			timer_call_unlock(queue);
 
-			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI,
-							   2)
-							| DBG_FUNC_START,
+			KERNEL_DEBUG_CONSTANT(DECR_TIMER_CALLOUT | DBG_FUNC_START,
 					      func,
 					      param0,
 					      param1, 0, 0);
@@ -291,27 +488,120 @@ timer_queue_expire(
 										timer_call_param_t, param1);
 #endif
 
-			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI,
-							   2)
-							| DBG_FUNC_END,
+			KERNEL_DEBUG_CONSTANT(DECR_TIMER_CALLOUT | DBG_FUNC_END,
 					      func,
 					      param0,
 					      param1, 0, 0);
 
-			simple_lock(&timer_call_lock);
+			timer_call_lock_spin(queue);
 		}
 		else
 			break;
-
-		call = TC(queue_first(queue));
 	}
 
-	if (!queue_end(queue, qe(call)))
-		deadline = call->deadline;
+	if (!queue_empty(&queue->head))
+		deadline = CE(call)->deadline;
 	else
 		deadline = UINT64_MAX;
 
-	simple_unlock(&timer_call_lock);
+	timer_call_unlock(queue);
 
 	return (deadline);
 }
+
+
+extern int serverperfmode;
+uint32_t	timer_queue_migrate_lock_skips;
+/*
+ * timer_queue_migrate() is called by etimer_queue_migrate()
+ * to move timer requests from the local processor (queue_from)
+ * to a target processor's (queue_to).
+ */
+int
+timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
+{
+	timer_call_t	call;
+	timer_call_t	head_to;
+	int		timers_migrated = 0;
+
+	DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
+
+	assert(!ml_get_interrupts_enabled());
+	assert(queue_from != queue_to);
+
+	if (serverperfmode) {
+		/*
+		 * if we're running a high end server
+		 * avoid migrations... they add latency
+		 * and don't save us power under typical
+		 * server workloads
+		 */
+		return -4;
+	}
+
+	/*
+	 * Take both local (from) and target (to) timer queue locks while
+	 * moving the timers from the local queue to the target processor.
+	 * We assume that the target is always the boot processor.
+	 * But only move if all of the following is true:
+	 *  - the target queue is non-empty
+	 *  - the local queue is non-empty
+	 *  - the local queue's first deadline is later than the target's
+	 *  - the local queue contains no non-migrateable "local" call
+	 * so that we need not have the target resync.
+	 */
+
+        timer_call_lock_spin(queue_to);
+
+	head_to = TIMER_CALL(queue_first(&queue_to->head));
+	if (queue_empty(&queue_to->head)) {
+		timers_migrated = -1;
+		goto abort1;
+	}
+
+        timer_call_lock_spin(queue_from);
+
+	if (queue_empty(&queue_from->head)) {
+		timers_migrated = -2;
+		goto abort2;
+	}
+
+	call = TIMER_CALL(queue_first(&queue_from->head));
+	if (CE(call)->deadline < CE(head_to)->deadline) {
+		timers_migrated = 0;
+		goto abort2;
+	}
+
+	/* perform scan for non-migratable timers */
+	do {
+		if (call->flags & TIMER_CALL_LOCAL) {
+			timers_migrated = -3;
+			goto abort2;
+		}
+		call = TIMER_CALL(queue_next(qe(call)));
+	} while (!queue_end(&queue_from->head, qe(call)));
+
+	/* migration loop itself -- both queues are locked */
+	while (!queue_empty(&queue_from->head)) {
+		call = TIMER_CALL(queue_first(&queue_from->head));
+		if (!simple_lock_try(&call->lock)) {
+			/* case (2b) lock order inversion, dequeue only */
+			timer_queue_migrate_lock_skips++;
+			(void) remque(qe(call));
+			call->async_dequeue = TRUE;
+			continue;
+		}
+		timer_call_entry_dequeue(call);
+		timer_call_entry_enqueue_deadline(
+			call, queue_to, CE(call)->deadline);
+		timers_migrated++;
+		simple_unlock(&call->lock);
+	}
+
+abort2:
+       	timer_call_unlock(queue_from);
+abort1:
+       	timer_call_unlock(queue_to);
+
+	return timers_migrated;
+}
diff --git a/osfmk/kern/timer_call.h b/osfmk/kern/timer_call.h
index 061e3d96c..f2a074d39 100644
--- a/osfmk/kern/timer_call.h
+++ b/osfmk/kern/timer_call.h
@@ -36,30 +36,47 @@
 
 #ifdef MACH_KERNEL_PRIVATE
 
-typedef struct call_entry	*timer_call_t;
+#include <kern/call_entry.h>
+
+/*
+ * NOTE: for now, bsd/dev/dtrace/dtrace_glue.c has its own definition
+ * of this data structure, and the two had better match.
+ */
+typedef struct timer_call {
+	struct call_entry 	call_entry;
+	decl_simple_lock_data( ,lock);		/* protects call_entry queue */
+	uint64_t		soft_deadline;
+	uint32_t		flags;
+	boolean_t		async_dequeue;	/* this field is protected by
+						   call_entry queue's lock */
+} *timer_call_t;
+
 typedef void				*timer_call_param_t;
 typedef void				(*timer_call_func_t)(
 									timer_call_param_t		param0,
 									timer_call_param_t		param1);
-
+#define TIMER_CALL_CRITICAL	0x01
+#define TIMER_CALL_LOCAL	0x02
 extern boolean_t	timer_call_enter(
 						timer_call_t	call,
-						uint64_t		deadline);
+						uint64_t	deadline,
+						uint32_t	flags);
 
 extern boolean_t	timer_call_enter1(
 						timer_call_t		call,
 						timer_call_param_t	param1,
-						uint64_t			deadline);
+						uint64_t		deadline,
+						uint32_t 		flags);
 
 extern boolean_t	timer_call_cancel(
 						timer_call_t	call);
 
-#include <kern/call_entry.h>
-
-typedef struct call_entry	timer_call_data_t;
+typedef struct timer_call 	timer_call_data_t;
 
 extern void		timer_call_initialize(void);
 
+extern void		timer_call_initialize_queue(mpqueue_head_t *);
+
 extern void		timer_call_setup(
 					timer_call_t		call,
 					timer_call_func_t	func,
diff --git a/osfmk/kern/timer_queue.h b/osfmk/kern/timer_queue.h
index 050b09afa..3975b3101 100644
--- a/osfmk/kern/timer_queue.h
+++ b/osfmk/kern/timer_queue.h
@@ -43,14 +43,17 @@
  */
 
 /* Request an expiration deadline, returns queue association */
-extern queue_t		timer_queue_assign(
+extern mpqueue_head_t *timer_queue_assign(
+				uint64_t		deadline);
+
+extern uint64_t		timer_call_slop(
 						uint64_t		deadline);
 
 /* Cancel an associated expiration deadline and specify new deadline */
-extern void			timer_queue_cancel(
-						queue_t			queue,
-						uint64_t		deadline,
-						uint64_t		new_deadline);
+extern void		timer_queue_cancel(
+				mpqueue_head_t		*queue,
+				uint64_t		deadline,
+				uint64_t		new_deadline);
 
 /*
  *	Invoked by platform, implemented by kernel.
@@ -58,12 +61,17 @@ extern void			timer_queue_cancel(
 
 /* Process deadline expiration for queue, returns new deadline */
 extern uint64_t		timer_queue_expire(
-						queue_t			queue,
-						uint64_t		deadline);
+				mpqueue_head_t		*queue,
+				uint64_t		deadline);
 
 /* Shutdown a timer queue and reassign existing activities */
-extern void			timer_queue_shutdown(
-						queue_t			queue);
+extern void		timer_queue_shutdown(
+				mpqueue_head_t		*queue);
+
+/* Move timer requests from one queue to another */
+extern int		timer_queue_migrate(
+				mpqueue_head_t		*from,
+				mpqueue_head_t		*to);
 
 #endif	/* MACH_KERNEL_PRIVATE */
 
diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c
index 6763ac65c..14cd08724 100644
--- a/osfmk/kern/wait_queue.c
+++ b/osfmk/kern/wait_queue.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -628,6 +628,25 @@ wait_queue_link(
 	return ret;
 }	
 
+wait_queue_link_t
+wait_queue_link_allocate(void)
+{
+	wait_queue_link_t wql;
+
+	wql = zalloc(_wait_queue_link_zone); /* Can't fail */
+	bzero(wql, sizeof(*wql));
+	wql->wql_type = WAIT_QUEUE_UNLINKED;
+
+	return wql;
+}
+
+kern_return_t
+wait_queue_link_free(wait_queue_link_t wql) 
+{
+	zfree(_wait_queue_link_zone, wql);
+	return KERN_SUCCESS;
+}
+
 
 /*
  *	Routine:	wait_queue_unlink_locked
@@ -848,6 +867,48 @@ retry:
 	return(KERN_SUCCESS);
 }	
 
+kern_return_t
+wait_queue_set_unlink_one(
+	wait_queue_set_t wq_set,
+	wait_queue_link_t wql)
+{
+	wait_queue_t wq;
+	spl_t s;
+
+	assert(wait_queue_is_set(wq_set));
+
+retry:
+	s = splsched();
+	wqs_lock(wq_set);
+
+	WAIT_QUEUE_SET_CHECK(wq_set);
+
+	/* Already unlinked, e.g. by selclearthread() */
+	if (wql->wql_type == WAIT_QUEUE_UNLINKED) {
+		goto out;
+	}
+
+	WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+
+	/* On a wait queue, and we hold set queue lock ... */
+	wq = wql->wql_queue;
+	if (wait_queue_lock_try(wq)) {
+		wait_queue_unlink_locked(wq, wq_set, wql);
+		wait_queue_unlock(wq);
+	} else {
+		wqs_unlock(wq_set);
+		splx(s);
+		delay(1);
+		goto retry;
+	}
+
+out:
+	wqs_unlock(wq_set);
+	splx(s);
+
+	return KERN_SUCCESS;
+}
+
 /*
  *	Routine:	wait_queue_assert_wait64_locked
  *	Purpose:
@@ -868,6 +929,7 @@ wait_queue_assert_wait64_locked(
 	thread_t thread)
 {
 	wait_result_t wait_result;
+	boolean_t realtime;
 
 	if (!wait_queue_assert_possible(thread))
 		panic("wait_queue_assert_wait64_locked");
@@ -878,7 +940,17 @@ wait_queue_assert_wait64_locked(
 		if (event == NO_EVENT64 && wqs_is_preposted(wqs))
 			return(THREAD_AWAKENED);
 	}
-	  
+
+	/*
+	 * Realtime threads get priority for wait queue placements.
+	 * This allows wait_queue_wakeup_one to prefer a waiting
+	 * realtime thread, similar in principle to performing
+	 * a wait_queue_wakeup_all and allowing scheduler prioritization
+	 * to run the realtime thread, but without causing the
+	 * lock contention of that scenario.
+	 */
+	realtime = (thread->sched_pri >= BASEPRI_REALTIME);
+
 	/*
 	 * This is the extent to which we currently take scheduling attributes
 	 * into account.  If the thread is vm priviledged, we stick it at
@@ -887,7 +959,9 @@ wait_queue_assert_wait64_locked(
 	 */
 	wait_result = thread_mark_wait_locked(thread, interruptible);
 	if (wait_result == THREAD_WAITING) {
-		if (!wq->wq_fifo || thread->options & TH_OPT_VMPRIV)
+		if (!wq->wq_fifo
+			|| (thread->options & TH_OPT_VMPRIV)
+			|| realtime)
 			enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
 		else
 			enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);
@@ -896,7 +970,11 @@ wait_queue_assert_wait64_locked(
 		thread->wait_queue = wq;
 
 		if (deadline != 0) {
-			if (!timer_call_enter(&thread->wait_timer, deadline))
+			uint32_t flags;
+
+			flags = realtime ? TIMER_CALL_CRITICAL : 0;
+
+			if (!timer_call_enter(&thread->wait_timer, deadline, flags))
 				thread->wait_timer_active++;
 			thread->wait_timer_is_set = TRUE;
 		}
@@ -1035,7 +1113,7 @@ _wait_queue_select64_all(
 
 			if (t->wait_event == event) {
 				thread_lock(t);
-				remqueue(q, (queue_entry_t) t);
+				remqueue((queue_entry_t) t);
 				enqueue (wake_queue, (queue_entry_t) t);
 				t->wait_queue = WAIT_QUEUE_NULL;
 				t->wait_event = NO_EVENT64;
@@ -1242,7 +1320,7 @@ _wait_queue_select64_one(
 			t = (thread_t)wq_element;
 			if (t->wait_event == event) {
 				thread_lock(t);
-				remqueue(q, (queue_entry_t) t);
+				remqueue((queue_entry_t) t);
 				t->wait_queue = WAIT_QUEUE_NULL;
 				t->wait_event = NO_EVENT64;
 				t->at_safe_point = FALSE;
@@ -1278,7 +1356,7 @@ wait_queue_pull_thread_locked(
 
 	assert(thread->wait_queue == waitq);
 
-	remqueue(&waitq->wq_queue, (queue_entry_t)thread );
+	remqueue((queue_entry_t)thread );
 	thread->wait_queue = WAIT_QUEUE_NULL;
 	thread->wait_event = NO_EVENT64;
 	thread->at_safe_point = FALSE;
@@ -1314,7 +1392,7 @@ _wait_queue_select64_thread(
 
 	thread_lock(thread);
 	if ((thread->wait_queue == wq) && (thread->wait_event == event)) {
-		remqueue(q, (queue_entry_t) thread);
+		remqueue((queue_entry_t) thread);
 		thread->at_safe_point = FALSE;
 		thread->wait_event = NO_EVENT64;
 		thread->wait_queue = WAIT_QUEUE_NULL;
@@ -1448,7 +1526,8 @@ kern_return_t
 wait_queue_wakeup_one(
 	wait_queue_t wq,
 	event_t event,
-	wait_result_t result)
+	wait_result_t result,
+	int priority)
 {
 	thread_t thread;
 	spl_t s;
@@ -1465,6 +1544,14 @@ wait_queue_wakeup_one(
 	if (thread) {
 		kern_return_t res;
 
+		if (thread->sched_pri < priority) {
+			if (priority <= MAXPRI) {
+				set_sched_pri(thread, priority);
+
+				thread->was_promoted_on_wakeup = 1;
+				thread->sched_flags |= TH_SFLAG_PROMOTED;
+			}
+		}
 		res = thread_go(thread, result);
 		assert(res == KERN_SUCCESS);
 		thread_unlock(thread);
diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h
index 386bd093c..42675a30b 100644
--- a/osfmk/kern/wait_queue.h
+++ b/osfmk/kern/wait_queue.h
@@ -43,6 +43,7 @@
 
 #include <kern/lock.h>
 #include <kern/queue.h>
+#include <mach/branch_predicates.h>
 
 #include <machine/cpu_number.h>
 #include <machine/machine_routines.h> /* machine_timeout_suspended() */
@@ -153,7 +154,7 @@ typedef struct _wait_queue_link {
 #define wait_queue_lock_try(wq) (hw_lock_try(&(wq)->wq_interlock))
 
 /* For x86, the hardware timeout is in TSC units. */
-#if defined(i386)
+#if defined(i386) || defined(x86_64)
 #define	hwLockTimeOut LockTimeOutTSC
 #else
 #define	hwLockTimeOut LockTimeOut
@@ -166,8 +167,9 @@ typedef struct _wait_queue_link {
  */
 
 static inline void wait_queue_lock(wait_queue_t wq) {
-	if (hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2) == 0) {
+	if (__improbable(hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2) == 0)) {
 		boolean_t wql_acquired = FALSE;
+
 		while (machine_timeout_suspended()) {
 #if	defined(__i386__) || defined(__x86_64__)
 /*
@@ -179,7 +181,6 @@ static inline void wait_queue_lock(wait_queue_t wq) {
 			if ((wql_acquired = hw_lock_to(&(wq)->wq_interlock, hwLockTimeOut * 2)))
 				break;
 		}
-
 		if (wql_acquired == FALSE)
 			panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number());
 	}
@@ -329,6 +330,15 @@ extern kern_return_t wait_queue_unlink_all(
 extern kern_return_t wait_queue_set_unlink_all(
 			wait_queue_set_t set_queue);
 
+#ifdef XNU_KERNEL_PRIVATE
+extern kern_return_t wait_queue_set_unlink_one(
+			wait_queue_set_t set_queue,
+			wait_queue_link_t link);
+
+extern wait_queue_link_t wait_queue_link_allocate(void);
+
+#endif /* XNU_KERNEL_PRIVATE */
+
 /* legacy API */
 kern_return_t wait_queue_sub_init(
 			wait_queue_set_t set_queue,
@@ -388,7 +398,8 @@ extern wait_result_t wait_queue_assert_wait(
 extern kern_return_t wait_queue_wakeup_one(
 			wait_queue_t wait_queue,
 			event_t wake_event,
-			wait_result_t result);
+			wait_result_t result,
+	                int priority);
 
 /* wakeup all the threads waiting on <wait_queue,event> pair */
 extern kern_return_t wait_queue_wakeup_all(
diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c
index c0e567eaf..c6bf2f01e 100644
--- a/osfmk/kern/zalloc.c
+++ b/osfmk/kern/zalloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -71,6 +71,7 @@
 #include <mach/vm_param.h>
 #include <mach/kern_return.h>
 #include <mach/mach_host_server.h>
+#include <mach/task_server.h>
 #include <mach/machine/vm_types.h>
 #include <mach_debug/zone_info.h>
 
@@ -96,13 +97,6 @@
 #include <libkern/OSDebug.h>
 #include <sys/kdebug.h>
 
-#if defined(__ppc__)
-/* for fake zone stat routines */
-#include <ppc/savearea.h>
-#include <ppc/mappings.h>
-#endif
-
-
 /* 
  * Zone Corruption Debugging
  *
@@ -114,7 +108,7 @@
  *     each other when re-using the zone element, to detect modifications.
  * (3) poison the freed memory by overwriting it with 0xdeadbeef.
  *
- * The first two checks are farily light weight and are enabled by specifying "-zc" 
+ * The first two checks are fairly light weight and are enabled by specifying "-zc" 
  * in the boot-args.  If you want more aggressive checking for use-after-free bugs
  * and you don't mind the additional overhead, then turn on poisoning by adding
  * "-zp" to the boot-args in addition to "-zc".  If you specify -zp without -zc,
@@ -125,6 +119,48 @@
 boolean_t check_freed_element = FALSE;		/* enabled by -zc in boot-args */
 boolean_t zfree_clear = FALSE;			/* enabled by -zp in boot-args */
 
+/*
+ * Fake zones for things that want to report via zprint but are not actually zones.
+ */
+struct fake_zone_info {
+	const char* name;
+	void (*init)(int);
+	void (*query)(int *,
+		     vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
+		      uint64_t *, int *, int *, int *);
+};
+
+static struct fake_zone_info fake_zones[] = {
+	{
+		.name = "kernel_stacks",
+		.init = stack_fake_zone_init,
+		.query = stack_fake_zone_info,
+	},
+#if defined(__i386__) || defined (__x86_64__)
+	{
+		.name = "page_tables",
+		.init = pt_fake_zone_init,
+		.query = pt_fake_zone_info,
+	},
+#endif /* i386 */
+	{
+		.name = "kalloc.large",
+		.init = kalloc_fake_zone_init,
+		.query = kalloc_fake_zone_info,
+	},
+};
+unsigned int num_fake_zones = sizeof(fake_zones)/sizeof(fake_zones[0]);
+
+/*
+ * Zone info options
+ */
+boolean_t zinfo_per_task = FALSE;		/* enabled by -zinfop in boot-args */
+#define ZINFO_SLOTS 200				/* for now */
+#define ZONES_MAX (ZINFO_SLOTS - num_fake_zones - 1)
+
+/* 
+ * Allocation helper macros
+ */
 #define is_kernel_data_addr(a)	(!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3)))
 
 #define ADD_TO_ZONE(zone, element)					\
@@ -159,13 +195,14 @@ MACRO_BEGIN									\
 			if (zfree_clear) {					\
 				unsigned int ii;				\
 				for (ii = sizeof(vm_offset_t) / sizeof(uint32_t); \
-					 ii < zone->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
+				     ii < (zone)->elem_size/sizeof(uint32_t) - sizeof(vm_offset_t) / sizeof(uint32_t); \
 					 ii++)					\
 					if (((uint32_t *)(ret))[ii] != (uint32_t)0xdeadbeef) \
 						panic("a freed zone element has been modified");\
 			}							\
 		}								\
 		(zone)->count++;						\
+		(zone)->sum_count++;						\
 		(zone)->free_elements = *((vm_offset_t *)(ret));		\
 	}									\
 MACRO_END
@@ -229,6 +266,8 @@ vm_map_t	zone_map = VM_MAP_NULL;
 
 zone_t		zone_zone = ZONE_NULL;	/* the zone containing other zones */
 
+zone_t		zinfo_zone = ZONE_NULL; /* zone of per-task zone info */
+
 /*
  *	The VM system gives us an initial chunk of memory.
  *	It has to be big enough to allocate the zone_zone
@@ -320,8 +359,7 @@ unsigned int		num_zones;
 boolean_t zone_gc_allowed = TRUE;
 boolean_t zone_gc_forced = FALSE;
 boolean_t panic_include_zprint = FALSE;
-unsigned zone_gc_last_tick = 0;
-unsigned zone_gc_max_rate = 0;		/* in ticks */
+boolean_t zone_gc_allowed_by_time_throttle = TRUE;
 
 /*
  * Zone leak debugging code
@@ -366,15 +404,13 @@ static char zone_name_to_log[MAX_ZONE_NAME] = "";	/* the zone name we're logging
  * but one doesn't generally care about performance when tracking down a leak.  The log is capped at 8000
  * records since going much larger than this tends to make the system unresponsive and unbootable on small
  * memory configurations.  The default value is 4000 records.
- *
- * MAX_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interrest.  15
- * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
- * caller is up above these lower levels.
  */
-
+#if	defined(__LP64__)
+#define ZRECORDS_MAX 		16000		/* Max records allowed in the log */
+#else
 #define ZRECORDS_MAX 		8000		/* Max records allowed in the log */
+#endif
 #define ZRECORDS_DEFAULT	4000		/* default records in log if zrecs is not specificed in boot-args */
-#define MAX_DEPTH 		15		/* number of levels of the stack trace to record */
 
 /*
  * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows
@@ -388,7 +424,7 @@ struct zrecord {
         void		*z_element;		/* the element that was zalloc'ed of zfree'ed */
         uint32_t	z_opcode:1,		/* whether it was a zalloc or zfree */
 			z_time:31;		/* time index when operation was done */
-        void		*z_pc[MAX_DEPTH];	/* stack trace of caller */
+        void		*z_pc[MAX_ZTRACE_DEPTH];	/* stack trace of caller */
 };
 
 /*
@@ -458,7 +494,526 @@ log_this_zone(const char *zonename, const char *logname)
 
 extern boolean_t zlog_ready;
 
+#if CONFIG_ZLEAKS
+#pragma mark -
+#pragma mark Zone Leak Detection
+
+/* 
+ * The zone leak detector, abbreviated 'zleak', keeps track of a subset of the currently outstanding
+ * allocations made by the zone allocator.  Every z_sample_factor allocations in each zone, we capture a
+ * backtrace.  Every free, we examine the table and determine if the allocation was being tracked, 
+ * and stop tracking it if it was being tracked.
+ *
+ * We track the allocations in the zallocations hash table, which stores the address that was returned from 
+ * the zone allocator.  Each stored entry in the zallocations table points to an entry in the ztraces table, which
+ * stores the backtrace associated with that allocation.  This provides uniquing for the relatively large
+ * backtraces - we don't store them more than once.
+ *
+ * Data collection begins when the zone map is 50% full, and only occurs for zones that are taking up
+ * a large amount of virtual space.
+ */
+#define ZLEAK_STATE_ENABLED		0x01	/* Zone leak monitoring should be turned on if zone_map fills up. */
+#define ZLEAK_STATE_ACTIVE 		0x02	/* We are actively collecting traces. */
+#define ZLEAK_STATE_ACTIVATING 		0x04	/* Some thread is doing setup; others should move along. */
+#define ZLEAK_STATE_FAILED		0x08	/* Attempt to allocate tables failed.  We will not try again. */
+uint32_t	zleak_state = 0;		/* State of collection, as above */
+
+boolean_t	panic_include_ztrace	= FALSE;  	/* Enable zleak logging on panic */
+vm_size_t 	zleak_global_tracking_threshold;	/* Size of zone map at which to start collecting data */
+vm_size_t 	zleak_per_zone_tracking_threshold;	/* Size a zone will have before we will collect data on it */
+unsigned int 	z_sample_factor	= 1000;			/* Allocations per sample attempt */
+
+/*
+ * Counters for allocation statistics.
+ */ 
+
+/* Times two active records want to occupy the same spot */
+unsigned int z_alloc_collisions = 0;
+unsigned int z_trace_collisions = 0;
+
+/* Times a new record lands on a spot previously occupied by a freed allocation */
+unsigned int z_alloc_overwrites = 0;
+unsigned int z_trace_overwrites = 0;
+
+/* Times a new alloc or trace is put into the hash table */
+unsigned int z_alloc_recorded	= 0;
+unsigned int z_trace_recorded	= 0;
+
+/* Times zleak_log returned false due to not being able to acquire the lock */
+unsigned int z_total_conflicts	= 0;
+
+
+#pragma mark struct zallocation
+/*
+ * Structure for keeping track of an allocation
+ * An allocation bucket is in use if its element is not NULL
+ */
+struct zallocation {
+	uintptr_t		za_element;		/* the element that was zalloc'ed or zfree'ed, NULL if bucket unused */
+	vm_size_t		za_size;			/* how much memory did this allocation take up? */
+	uint32_t		za_trace_index;	/* index into ztraces for backtrace associated with allocation */
+	/* TODO: #if this out */
+	uint32_t		za_hit_count;		/* for determining effectiveness of hash function */
+};
+
+/* Size must be a power of two for the zhash to be able to just mask off bits instead of mod */
+#define ZLEAK_ALLOCATION_MAP_NUM	16384
+#define ZLEAK_TRACE_MAP_NUM		8192
+
+uint32_t zleak_alloc_buckets = ZLEAK_ALLOCATION_MAP_NUM;
+uint32_t zleak_trace_buckets = ZLEAK_TRACE_MAP_NUM;
+
+vm_size_t zleak_max_zonemap_size;
+
+/* Hashmaps of allocations and their corresponding traces */
+static struct zallocation*	zallocations;
+static struct ztrace*		ztraces;
+
+/* not static so that panic can see this, see kern/debug.c */
+struct ztrace*				top_ztrace;
+
+/* Lock to protect zallocations, ztraces, and top_ztrace from concurrent modification. */
+static lck_mtx_t			zleak_lock;
+static lck_attr_t			zleak_lock_attr;
+static lck_grp_t			zleak_lock_grp;
+static lck_grp_attr_t			zleak_lock_grp_attr;
+
+/*
+ * Initializes the zone leak monitor.  Called from zone_init()
+ */
+static void 
+zleak_init(vm_size_t max_zonemap_size) 
+{
+	char			scratch_buf[16];
+	boolean_t		zleak_enable_flag = FALSE;
+
+	zleak_max_zonemap_size = max_zonemap_size;
+	zleak_global_tracking_threshold = max_zonemap_size / 2;	
+	zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8;
+
+	/* -zleakoff (flag to disable zone leak monitor) */
+	if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) {
+		zleak_enable_flag = FALSE;
+		printf("zone leak detection disabled\n");
+	} else {
+		zleak_enable_flag = TRUE;
+		printf("zone leak detection enabled\n");
+	}
+	
+	/* zfactor=XXXX (override how often to sample the zone allocator) */
+	if (PE_parse_boot_argn("zfactor", &z_sample_factor, sizeof(z_sample_factor))) {
+		printf("Zone leak factor override:%u\n", z_sample_factor);
+	}
+	
+	/* zleak-allocs=XXXX (override number of buckets in zallocations) */
+	if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) {
+		printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets);
+		/* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+		if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) {
+			printf("Override isn't a power of two, bad things might happen!");
+		}
+	}
+	
+	/* zleak-traces=XXXX (override number of buckets in ztraces) */
+	if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) {
+		printf("Zone leak trace buckets override:%u\n", zleak_trace_buckets);
+		/* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */
+		if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) {
+			printf("Override isn't a power of two, bad things might happen!");
+		}
+	}
+	
+	/* allocate the zleak_lock */
+	lck_grp_attr_setdefault(&zleak_lock_grp_attr);
+	lck_grp_init(&zleak_lock_grp, "zleak_lock", &zleak_lock_grp_attr);
+	lck_attr_setdefault(&zleak_lock_attr);
+	lck_mtx_init(&zleak_lock, &zleak_lock_grp, &zleak_lock_attr);
+	
+	if (zleak_enable_flag) {
+		zleak_state = ZLEAK_STATE_ENABLED;
+	}
+}
+
+#if CONFIG_ZLEAKS
+
+/*
+ * Support for kern.zleak.active sysctl - a simplified
+ * simplified version of the zleak_state variable.
+ */
+int
+get_zleak_state(void)
+{
+	if (zleak_state & ZLEAK_STATE_FAILED)
+		return (-1);
+	if (zleak_state & ZLEAK_STATE_ACTIVE)
+		return (1);
+	return (0);
+}
+
+#endif
+
+
+kern_return_t
+zleak_activate(void)
+{
+	kern_return_t retval;
+	vm_size_t z_alloc_size = zleak_alloc_buckets * sizeof(struct zallocation);
+	vm_size_t z_trace_size = zleak_trace_buckets * sizeof(struct ztrace);
+	void *allocations_ptr = NULL;
+	void *traces_ptr = NULL;
+
+	/* Only one thread attempts to activate at a time */
+	if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+		return KERN_SUCCESS;
+	}
+
+	/* Indicate that we're doing the setup */
+	lck_mtx_lock_spin(&zleak_lock);
+	if (zleak_state & (ZLEAK_STATE_ACTIVE | ZLEAK_STATE_ACTIVATING | ZLEAK_STATE_FAILED)) {
+		lck_mtx_unlock(&zleak_lock);
+		return KERN_SUCCESS;
+	}
+
+	zleak_state |= ZLEAK_STATE_ACTIVATING;
+	lck_mtx_unlock(&zleak_lock);
+
+	/* Allocate and zero tables */
+	retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&allocations_ptr, z_alloc_size);
+	if (retval != KERN_SUCCESS) {
+		goto fail;
+	}
+
+	retval = kmem_alloc_kobject(kernel_map, (vm_offset_t*)&traces_ptr, z_trace_size);
+	if (retval != KERN_SUCCESS) {
+		goto fail;
+	}
+
+	bzero(allocations_ptr, z_alloc_size);
+	bzero(traces_ptr, z_trace_size);
+
+	/* Everything's set.  Install tables, mark active. */
+	zallocations = allocations_ptr;
+	ztraces = traces_ptr;
+
+	/*
+	 * Initialize the top_ztrace to the first entry in ztraces, 
+	 * so we don't have to check for null in zleak_log
+	 */
+	top_ztrace = &ztraces[0];
+
+	/*
+	 * Note that we do need a barrier between installing
+	 * the tables and setting the active flag, because the zfree()
+	 * path accesses the table without a lock if we're active.
+	 */
+	lck_mtx_lock_spin(&zleak_lock);
+	zleak_state |= ZLEAK_STATE_ACTIVE;
+	zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+	lck_mtx_unlock(&zleak_lock);
+	
+	return 0;
+
+fail:	
+	/*
+	 * If we fail to allocate memory, don't further tax
+	 * the system by trying again.
+	 */
+	lck_mtx_lock_spin(&zleak_lock);
+	zleak_state |= ZLEAK_STATE_FAILED;
+	zleak_state &= ~ZLEAK_STATE_ACTIVATING;
+	lck_mtx_unlock(&zleak_lock);
+
+	if (allocations_ptr != NULL) {
+		kmem_free(kernel_map, (vm_offset_t)allocations_ptr, z_alloc_size);
+	}
+
+	if (traces_ptr != NULL) {
+		kmem_free(kernel_map, (vm_offset_t)traces_ptr, z_trace_size);
+	}
+
+	return retval;
+}
+
+/*
+ * TODO: What about allocations that never get deallocated, 
+ * especially ones with unique backtraces? Should we wait to record
+ * until after boot has completed?  
+ * (How many persistent zallocs are there?)
+ */
+
+/*
+ * This function records the allocation in the allocations table, 
+ * and stores the associated backtrace in the traces table 
+ * (or just increments the refcount if the trace is already recorded)
+ * If the allocation slot is in use, the old allocation is replaced with the new allocation, and
+ * the associated trace's refcount is decremented.
+ * If the trace slot is in use, it returns.
+ * The refcount is incremented by the amount of memory the allocation consumes.
+ * The return value indicates whether to try again next time.
+ */
+static boolean_t
+zleak_log(uintptr_t* bt,
+		  uintptr_t addr,
+		  uint32_t depth,
+		  vm_size_t allocation_size) 
+{
+	/* Quit if there's someone else modifying the hash tables */
+	if (!lck_mtx_try_lock_spin(&zleak_lock)) {
+		z_total_conflicts++;
+		return FALSE;
+	}
+	
+	struct zallocation* allocation	= &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+	
+	uint32_t trace_index = hashbacktrace(bt, depth, zleak_trace_buckets);
+	struct ztrace* trace = &ztraces[trace_index];
+	
+	allocation->za_hit_count++;
+	trace->zt_hit_count++;
+	
+	/* 
+	 * If the allocation bucket we want to be in is occupied, and if the occupier
+	 * has the same trace as us, just bail.  
+	 */
+	if (allocation->za_element != (uintptr_t) 0 && trace_index == allocation->za_trace_index) {
+		z_alloc_collisions++;
+		
+		lck_mtx_unlock(&zleak_lock);
+		return TRUE;
+	}
+	
+	/* STEP 1: Store the backtrace in the traces array. */
+	/* A size of zero indicates that the trace bucket is free. */
+	
+	if (trace->zt_size > 0 && bcmp(trace->zt_stack, bt, (depth * sizeof(uintptr_t))) != 0 ) {
+		/* 
+		 * Different unique trace with same hash!
+		 * Just bail - if we're trying to record the leaker, hopefully the other trace will be deallocated
+		 * and get out of the way for later chances
+		 */
+		trace->zt_collisions++;
+		z_trace_collisions++;
+		
+		lck_mtx_unlock(&zleak_lock);
+		return TRUE;
+	} else if (trace->zt_size > 0) {
+		/* Same trace, already added, so increment refcount */
+		trace->zt_size += allocation_size;
+	} else {
+		/* Found an unused trace bucket, record the trace here! */
+		if (trace->zt_depth != 0) /* if this slot was previously used but not currently in use */
+			z_trace_overwrites++;
+		
+		z_trace_recorded++;
+		trace->zt_size			= allocation_size;
+		memcpy(trace->zt_stack, bt, (depth * sizeof(uintptr_t)) );
+		
+		trace->zt_depth		= depth;
+		trace->zt_collisions	= 0;
+	}
+	
+	/* STEP 2: Store the allocation record in the allocations array. */
+	
+	if (allocation->za_element != (uintptr_t) 0) {
+		/* 
+		 * Straight up replace any allocation record that was there.  We don't want to do the work
+		 * to preserve the allocation entries that were there, because we only record a subset of the 
+		 * allocations anyways.
+		 */
+		
+		z_alloc_collisions++;
+		
+		struct ztrace* associated_trace = &ztraces[allocation->za_trace_index];
+		/* Knock off old allocation's size, not the new allocation */
+		associated_trace->zt_size -= allocation->za_size;
+	} else if (allocation->za_trace_index != 0) {
+		/* Slot previously used but not currently in use */
+		z_alloc_overwrites++;
+	}
+
+	allocation->za_element		= addr;
+	allocation->za_trace_index	= trace_index;
+	allocation->za_size		= allocation_size;
+	
+	z_alloc_recorded++;
+	
+	if (top_ztrace->zt_size < trace->zt_size)
+		top_ztrace = trace;
+	
+	lck_mtx_unlock(&zleak_lock);
+	return TRUE;
+}
+
+/*
+ * Free the allocation record and release the stacktrace.
+ * This should be as fast as possible because it will be called for every free.
+ */
+static void
+zleak_free(uintptr_t addr,
+		   vm_size_t allocation_size) 
+{
+	if (addr == (uintptr_t) 0)
+		return;
+	
+	struct zallocation* allocation = &zallocations[hashaddr(addr, zleak_alloc_buckets)];
+	
+	/* Double-checked locking: check to find out if we're interested, lock, check to make
+	 * sure it hasn't changed, then modify it, and release the lock.
+	 */
 	
+	if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
+		/* if the allocation was the one, grab the lock, check again, then delete it */
+		lck_mtx_lock_spin(&zleak_lock);
+		
+		if (allocation->za_element == addr && allocation->za_trace_index < zleak_trace_buckets) {
+			struct ztrace *trace;
+
+			/* allocation_size had better match what was passed into zleak_log - otherwise someone is freeing into the wrong zone! */
+			if (allocation->za_size != allocation_size) {
+				panic("Freeing as size %lu memory that was allocated with size %lu\n", 
+						(uintptr_t)allocation_size, (uintptr_t)allocation->za_size);
+			}
+			
+			trace = &ztraces[allocation->za_trace_index];
+			
+			/* size of 0 indicates trace bucket is unused */
+			if (trace->zt_size > 0) {
+				trace->zt_size -= allocation_size;
+			}
+			
+			/* A NULL element means the allocation bucket is unused */
+			allocation->za_element = 0;
+		}
+		lck_mtx_unlock(&zleak_lock);
+	}
+}
+
+#endif /* CONFIG_ZLEAKS */
+
+/*  These functions outside of CONFIG_ZLEAKS because they are also used in
+ *  mbuf.c for mbuf leak-detection.  This is why they lack the z_ prefix.
+ */
+
+/*
+ * This function captures a backtrace from the current stack and
+ * returns the number of frames captured, limited by max_frames.
+ * It's fast because it does no checking to make sure there isn't bad data.
+ * Since it's only called from threads that we're going to keep executing,
+ * if there's bad data we were going to die eventually.
+ * This seems to work for x86 and X86_64.
+ * ARMTODO: Test it on ARM, I think it will work but I can't test it.  If it works, remove the ifdef.
+ * If this function is inlined, it doesn't record the frame of the function it's inside.
+ * (because there's no stack frame!)
+ */
+uint32_t
+fastbacktrace(uintptr_t* bt, uint32_t max_frames)
+{
+#if defined(__x86_64__) || defined(__i386__)
+	uintptr_t* frameptr = NULL, *frameptr_next = NULL;
+	uintptr_t retaddr = 0;
+	uint32_t frame_index = 0, frames = 0;
+	uintptr_t kstackb, kstackt;
+
+	kstackb = current_thread()->kernel_stack;
+	kstackt = kstackb + kernel_stack_size;
+	/* Load stack frame pointer (EBP on x86) into frameptr */
+	frameptr = __builtin_frame_address(0);
+
+	while (frameptr != NULL && frame_index < max_frames ) {
+		/* Next frame pointer is pointed to by the previous one */
+		frameptr_next = (uintptr_t*) *frameptr;
+
+		/* Bail if we see a zero in the stack frame, that means we've reached the top of the stack */
+                /* That also means the return address is worthless, so don't record it */
+		if (frameptr_next == NULL)
+			break;
+		/* Verify thread stack bounds */
+		if (((uintptr_t)frameptr_next > kstackt) || ((uintptr_t)frameptr_next < kstackb))
+			break;
+		/* Pull return address from one spot above the frame pointer */
+		retaddr = *(frameptr + 1);
+
+		/* Store it in the backtrace array */
+		bt[frame_index++] = retaddr;
+
+		frameptr = frameptr_next;
+	}
+
+	/* Save the number of frames captured for return value */
+	frames = frame_index;
+
+	/* Fill in the rest of the backtrace with zeros */
+	while (frame_index < max_frames)
+		bt[frame_index++] = 0;
+
+	return frames;
+#else
+	return OSBacktrace((void*)bt, max_frames);
+#endif
+}
+
+/* "Thomas Wang's 32/64 bit mix functions."  http://www.concentric.net/~Ttwang/tech/inthash.htm */
+uintptr_t
+hash_mix(uintptr_t x)
+{
+#ifndef __LP64__
+	x += ~(x << 15);
+	x ^=  (x >> 10);
+	x +=  (x << 3 );
+	x ^=  (x >> 6 );
+	x += ~(x << 11);
+	x ^=  (x >> 16);
+#else
+	x += ~(x << 32);
+	x ^=  (x >> 22);
+	x += ~(x << 13);
+	x ^=  (x >> 8 );
+	x +=  (x << 3 );
+	x ^=  (x >> 15);
+	x += ~(x << 27);
+	x ^=  (x >> 31);
+#endif
+	return x;
+}
+
+uint32_t
+hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size)
+{
+
+	uintptr_t hash = 0;
+	uintptr_t mask = max_size - 1;
+
+	while (--depth) {
+		hash += bt[depth];
+	}
+
+	hash = hash_mix(hash) & mask;
+
+	assert(hash < max_size);
+
+	return (uint32_t) hash;
+}
+
+/*
+ *  TODO: Determine how well distributed this is
+ *      max_size must be a power of 2. i.e 0x10000 because 0x10000-1 is 0x0FFFF which is a great bitmask
+ */
+uint32_t
+hashaddr(uintptr_t pt, uint32_t max_size)
+{
+	uintptr_t hash = 0;
+	uintptr_t mask = max_size - 1;
+
+	hash = hash_mix(pt) & mask;
+
+	assert(hash < max_size);
+
+	return (uint32_t) hash;
+}
+
+/* End of all leak-detection code */
+#pragma mark -
+
 /*
  *	zinit initializes a new zone.  The zone data structures themselves
  *	are stored in a zone, which is initially a static structure that
@@ -537,6 +1092,7 @@ use_this_allocation:
 	z->alloc_size = alloc;
 	z->zone_name = name;
 	z->count = 0;
+	z->sum_count = 0LL;
 	z->doing_alloc = FALSE;
 	z->doing_gc = FALSE;
 	z->exhaustible = FALSE;
@@ -545,8 +1101,16 @@ use_this_allocation:
 	z->expandable  = TRUE;
 	z->waiting = FALSE;
 	z->async_pending = FALSE;
+	z->caller_acct = TRUE;
 	z->noencrypt = FALSE;
 
+#if CONFIG_ZLEAKS
+	z->num_allocs = 0;
+	z->num_frees = 0;
+	z->zleak_capture = 0;
+	z->zleak_on = FALSE;
+#endif /* CONFIG_ZLEAKS */
+
 #if	ZONE_DEBUG
 	z->active_zones.next = z->active_zones.prev = NULL;	
 	zone_debug_enable(z);
@@ -555,13 +1119,20 @@ use_this_allocation:
 
 	/*
 	 *	Add the zone to the all-zones list.
+	 *	If we are tracking zone info per task, and we have
+	 *	already used all the available stat slots, then keep
+	 *	using the overflow zone slot.
 	 */
-
 	z->next_zone = ZONE_NULL;
 	thread_call_setup(&z->call_async_alloc, zalloc_async, z);
 	simple_lock(&all_zones_lock);
 	*last_zone = z;
 	last_zone = &z->next_zone;
+	z->index = num_zones;
+	if (zinfo_per_task) {
+		if (num_zones > ZONES_MAX)
+			z->index = ZONES_MAX;
+	}
 	num_zones++;
 	simple_unlock(&all_zones_lock);
 
@@ -782,6 +1353,24 @@ zone_bootstrap(void)
 	vm_offset_t zone_zone_space;
 	char temp_buf[16];
 
+#if 6094439
+	/* enable zone checks by default, to try and catch offenders... */
+#if 0
+	/* 7968354: turn "-zc" back off */
+	check_freed_element = TRUE;
+	/* 7995202: turn "-zp" back off */
+	zfree_clear = TRUE;
+#endif
+	
+	/* ... but allow them to be turned off explicitely */
+	if (PE_parse_boot_argn("-no_zc", temp_buf, sizeof (temp_buf))) {
+		check_freed_element = FALSE;
+	}
+	if (PE_parse_boot_argn("-no_zp", temp_buf, sizeof (temp_buf))) {
+		zfree_clear = FALSE;
+	}
+#endif
+
 	/* see if we want freed zone element checking and/or poisoning */
 	if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
 		check_freed_element = TRUE;
@@ -791,6 +1380,10 @@ zone_bootstrap(void)
 		zfree_clear = TRUE;
 	}
 
+	if (PE_parse_boot_argn("-zinfop", temp_buf, sizeof (temp_buf))) {
+		zinfo_per_task = TRUE;
+	}
+
 	/*
 	 * Check for and set up zone leak detection if requested via boot-args.  We recognized two
 	 * boot-args:
@@ -834,13 +1427,47 @@ zone_bootstrap(void)
 	zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
 			  sizeof(struct zone), "zones");
 	zone_change(zone_zone, Z_COLLECT, FALSE);
+	zone_change(zone_zone, Z_CALLERACCT, FALSE);
 	zone_change(zone_zone, Z_NOENCRYPT, TRUE);
 
 	zone_zone_size = zalloc_end_of_space - zalloc_next_space;
 	zget_space(NULL, zone_zone_size, &zone_zone_space);
 	zcram(zone_zone, (void *)zone_zone_space, zone_zone_size);
+
+	/* initialize fake zones and zone info if tracking by task */
+	if (zinfo_per_task) {
+		vm_size_t zisize = sizeof(zinfo_usage_store_t) * ZINFO_SLOTS;
+		unsigned int i;
+
+		for (i = 0; i < num_fake_zones; i++)
+			fake_zones[i].init(ZINFO_SLOTS - num_fake_zones + i);
+		zinfo_zone = zinit(zisize, zisize * CONFIG_TASK_MAX,
+				   zisize, "per task zinfo");
+		zone_change(zinfo_zone, Z_CALLERACCT, FALSE);
+	}
+}
+
+void
+zinfo_task_init(task_t task)
+{
+	if (zinfo_per_task) {
+		task->tkm_zinfo = zalloc(zinfo_zone);
+		memset(task->tkm_zinfo, 0, sizeof(zinfo_usage_store_t) * ZINFO_SLOTS);
+	} else {
+		task->tkm_zinfo = NULL;
+	}
 }
 
+void
+zinfo_task_free(task_t task)
+{
+	assert(task != kernel_task);
+	if (task->tkm_zinfo != NULL) {
+		zfree(zinfo_zone, task->tkm_zinfo);
+		task->tkm_zinfo = NULL;
+	}
+}
+		
 void
 zone_init(
 	vm_size_t max_zonemap_size)
@@ -876,10 +1503,20 @@ zone_init(
 	lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr);
 	
 	zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
+	
+#if CONFIG_ZLEAKS
+	/*
+	 * Initialize the zone leak monitor
+	 */
+	zleak_init(max_zonemap_size);
+#endif /* CONFIG_ZLEAKS */
 }
 
 extern volatile SInt32 kfree_nop_count;
 
+#pragma mark -
+#pragma mark zalloc_canblock
+
 /*
  *	zalloc returns an element from the specified zone.
  */
@@ -890,20 +1527,40 @@ zalloc_canblock(
 {
 	vm_offset_t	addr;
 	kern_return_t retval;
-	void	  	*bt[MAX_DEPTH];		/* only used if zone logging is enabled */
+	uintptr_t	zbt[MAX_ZTRACE_DEPTH];	/* used in zone leak logging and zone leak detection */
 	int 		numsaved = 0;
-	int		i;
+	int			i;
+
+#if CONFIG_ZLEAKS
+	uint32_t	zleak_tracedepth = 0;  /* log this allocation if nonzero */
+#endif /* CONFIG_ZLEAKS */
 
 	assert(zone != ZONE_NULL);
+	
+	lock_zone(zone);
 
 	/*
 	 * If zone logging is turned on and this is the zone we're tracking, grab a backtrace.
 	 */
-
+	
 	if (DO_LOGGING(zone))
-	        numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
-
-	lock_zone(zone);
+	        numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH);
+	
+#if CONFIG_ZLEAKS
+	/* 
+	 * Zone leak detection: capture a backtrace every z_sample_factor
+	 * allocations in this zone. 
+	 */
+	if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
+		zone->zleak_capture = 1;
+		
+		/* Avoid backtracing twice if zone logging is on */
+		if (numsaved == 0 )
+			zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
+		else
+			zleak_tracedepth = numsaved;
+	}
+#endif /* CONFIG_ZLEAKS */
 
 	REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 
@@ -974,6 +1631,26 @@ zalloc_canblock(
 						if (alloc_size == PAGE_SIZE)
 							space = zone_alias_addr(space);
 #endif
+
+#if CONFIG_ZLEAKS
+						if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) {
+							if (zone_map->size >= zleak_global_tracking_threshold) {
+								kern_return_t kr;
+								
+								kr = zleak_activate();
+								if (kr != KERN_SUCCESS) {
+									printf("Failed to activate live zone leak debugging (%d).\n", kr);
+								}
+							}
+						}
+
+						if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) {
+							if (zone->cur_size > zleak_per_zone_tracking_threshold) {
+								zone->zleak_on = TRUE;
+							}	
+						}
+#endif /* CONFIG_ZLEAKS */
+
 					        zone_page_init(space, alloc_size,
 							       ZONE_PAGE_USED);
 						zcram(zone, (void *)space, alloc_size);
@@ -987,12 +1664,20 @@ zalloc_canblock(
 							printf("zalloc did gc\n");
 							zone_display_zprint();
 						}
-					        if (retry == 3) {
+						if (retry == 3) {
 						  panic_include_zprint = TRUE;
+#if CONFIG_ZLEAKS
+						  if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
+							  panic_include_ztrace = TRUE;
+						  }
+#endif /* CONFIG_ZLEAKS */		
+							/* TODO: Change this to something more descriptive, perhaps 
+							 * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE).
+							 */
 						  panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count);
 						}
 					} else {
-					        break;
+						break;
 					}
 				}
 				lock_zone(zone);
@@ -1021,6 +1706,7 @@ zalloc_canblock(
 				}
 				if (retval == KERN_SUCCESS) {
 					zone->count++;
+					zone->sum_count++;
 					zone->cur_size += zone->elem_size;
 #if	ZONE_DEBUG
 					if (zone_debug_enabled(zone)) {
@@ -1042,6 +1728,18 @@ zalloc_canblock(
 					VM_PAGE_WAIT();
 					lock_zone(zone);
 				} else {
+					/*
+					 * Equivalent to a 'retry fail 3', we're out of address space in the zone_map
+					 * (if it returned KERN_NO_SPACE)
+					 */
+					if (retval == KERN_NO_SPACE) {
+						panic_include_zprint = TRUE;
+#if CONFIG_ZLEAKS
+						  if ((zleak_state & ZLEAK_STATE_ACTIVE)) {
+							panic_include_ztrace = TRUE;
+						}
+#endif /* CONFIG_ZLEAKS */
+					}
 					panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval);
 				}
 			}
@@ -1050,6 +1748,20 @@ zalloc_canblock(
 			REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 	}
 
+#if CONFIG_ZLEAKS
+	/* Zone leak detection:
+	 * If we're sampling this allocation, add it to the zleaks hash table. 
+	 */
+	if (addr && zleak_tracedepth > 0)  {
+		/* Sampling can fail if another sample is happening at the same time in a different zone. */
+		if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
+			/* If it failed, roll back the counter so we sample the next allocation instead. */
+			zone->zleak_capture = z_sample_factor;
+		}
+	}
+#endif /* CONFIG_ZLEAKS */			
+			
+			
 	/*
 	 * See if we should be logging allocations in this zone.  Logging is rarely done except when a leak is
 	 * suspected, so this code rarely executes.  We need to do this code while still holding the zone lock
@@ -1109,9 +1821,9 @@ empty_slot:
 		  zrecords[zcurrent].z_opcode = ZOP_ALLOC;
 			
 		  for (i = 0; i < numsaved; i++)
-		        zrecords[zcurrent].z_pc[i] = bt[i];
+		        zrecords[zcurrent].z_pc[i] = (void*) zbt[i];
 
-		  for (; i < MAX_DEPTH; i++)
+		  for (; i < MAX_ZTRACE_DEPTH; i++)
 			zrecords[zcurrent].z_pc[i] = 0;
 	
 		  zcurrent++;
@@ -1134,12 +1846,31 @@ empty_slot:
 		addr += ZONE_DEBUG_OFFSET;
 	}
 #endif
+	
+#if CONFIG_ZLEAKS
+	if (addr != 0) {
+		zone->num_allocs++;
+	}
+#endif /* CONFIG_ZLEAKS */
 
 	unlock_zone(zone);
 
 success:
 	TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr);
 
+	if (addr) {
+		thread_t thr = current_thread();
+		task_t task;
+		zinfo_usage_t zinfo;
+
+		if (zone->caller_acct)
+			thr->tkm_private.alloc += zone->elem_size;
+		else
+			thr->tkm_shared.alloc += zone->elem_size;
+
+		if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+			OSAddAtomic64(zone->elem_size, (int64_t *)&zinfo[zone->index].alloc);
+	}
 	return((void *)addr);
 }
 
@@ -1179,17 +1910,36 @@ zalloc_async(
  *
  *	This form should be used when you can not block (like when
  *	processing an interrupt).
+ *
+ *	XXX: It seems like only vm_page_grab_fictitious_common uses this, and its
+ *  friend vm_page_more_fictitious can block, so it doesn't seem like 
+ *  this is used for interrupts any more....
  */
 void *
 zget(
 	register zone_t	zone)
 {
 	register vm_offset_t	addr;
+	
+#if CONFIG_ZLEAKS
+	uintptr_t	zbt[MAX_ZTRACE_DEPTH];		/* used for zone leak detection */
+	uint32_t	zleak_tracedepth = 0;  /* log this allocation if nonzero */
+#endif /* CONFIG_ZLEAKS */
 
 	assert( zone != ZONE_NULL );
 
 	if (!lock_try_zone(zone))
 		return NULL;
+	
+#if CONFIG_ZLEAKS
+	/*
+	 * Zone leak detection: capture a backtrace
+	 */
+	if (zone->zleak_on && (zone->zleak_capture++ % z_sample_factor == 0)) {
+		zone->zleak_capture = 1;
+		zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH);
+	}
+#endif /* CONFIG_ZLEAKS */
 
 	REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
 #if	ZONE_DEBUG
@@ -1198,6 +1948,24 @@ zget(
 		addr += ZONE_DEBUG_OFFSET;
 	}
 #endif	/* ZONE_DEBUG */
+	
+#if CONFIG_ZLEAKS
+	/*
+	 * Zone leak detection: record the allocation 
+	 */
+	if (zone->zleak_on && zleak_tracedepth > 0 && addr) {
+		/* Sampling can fail if another sample is happening at the same time in a different zone. */
+		if (!zleak_log(zbt, addr, zleak_tracedepth, zone->elem_size)) {
+			/* If it failed, roll back the counter so we sample the next allocation instead. */
+			zone->zleak_capture = z_sample_factor;
+		}
+	}
+	
+	if (addr != 0) {
+		zone->num_allocs++;
+	}
+#endif /* CONFIG_ZLEAKS */
+	
 	unlock_zone(zone);
 
 	return((void *) addr);
@@ -1216,7 +1984,7 @@ zfree(
 	void 		*addr)
 {
 	vm_offset_t	elem = (vm_offset_t) addr;
-	void		*bt[MAX_DEPTH];			/* only used if zone logging is enable via boot-args */
+	void		*zbt[MAX_ZTRACE_DEPTH];			/* only used if zone logging is enabled via boot-args */
 	int		numsaved = 0;
 
 	assert(zone != ZONE_NULL);
@@ -1226,7 +1994,7 @@ zfree(
 	 */
 
 	if (DO_LOGGING(zone))
-		numsaved = OSBacktrace(&bt[0], MAX_DEPTH);
+		numsaved = OSBacktrace(&zbt[0], MAX_ZTRACE_DEPTH);
 
 #if MACH_ASSERT
 	/* Basic sanity checks */
@@ -1274,9 +2042,9 @@ zfree(
 			zrecords[zcurrent].z_opcode = ZOP_FREE;
 
 			for (i = 0; i < numsaved; i++)
-				zrecords[zcurrent].z_pc[i] = bt[i];
+				zrecords[zcurrent].z_pc[i] = zbt[i];
 
-			for (; i < MAX_DEPTH; i++)
+			for (; i < MAX_ZTRACE_DEPTH; i++)
 				zrecords[zcurrent].z_pc[i] = 0;
 
 			zcurrent++;
@@ -1321,7 +2089,7 @@ zfree(
 			if (elem != (vm_offset_t)tmp_elem)
 				panic("zfree()ing element from wrong zone");
 		}
-		remqueue(&zone->active_zones, (queue_t) elem);
+		remqueue((queue_t) elem);
 	}
 #endif	/* ZONE_DEBUG */
 	if (zone_check) {
@@ -1340,7 +2108,19 @@ zfree(
 	if (zone->count < 0)
 		panic("zfree: count < 0!");
 #endif
+	
 
+#if CONFIG_ZLEAKS
+	zone->num_frees++;
+
+	/*
+	 * Zone leak detection: un-track the allocation 
+	 */
+	if (zone->zleak_on) {
+		zleak_free(elem, zone->elem_size);
+	}
+#endif /* CONFIG_ZLEAKS */
+	
 	/*
 	 * If elements have one or more pages, and memory is low,
 	 * request to run the garbage collection in the zone  the next 
@@ -1351,6 +2131,20 @@ zfree(
 		zone_gc_forced = TRUE;
 	}
 	unlock_zone(zone);
+
+	{
+		thread_t thr = current_thread();
+		task_t task;
+		zinfo_usage_t zinfo;
+
+		if (zone->caller_acct)
+			thr->tkm_private.free += zone->elem_size;
+		else
+			thr->tkm_shared.free += zone->elem_size;
+		if ((task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
+			OSAddAtomic64(zone->elem_size,
+				      (int64_t *)&zinfo[zone->index].free);
+	}
 }
 
 
@@ -1382,6 +2176,9 @@ zone_change(
 		case Z_FOREIGN:
 			zone->allows_foreign = value;
 			break;
+		case Z_CALLERACCT:
+			zone->caller_acct = value;
+			break;
 #if MACH_ASSERT
 		default:
 			panic("Zone_change: Wrong Item Type!");
@@ -1886,7 +2683,7 @@ zone_gc(void)
 	while ((zp = zone_free_pages) != NULL) {
 		zone_free_pages = zp->link;
 #if	ZONE_ALIAS_ADDR
-		z = zone_virtual_addr((vm_map_address_t)z);
+		z = (zone_t)zone_virtual_addr((vm_map_address_t)z);
 #endif
 		kmem_free(zone_map, zone_map_min_address + PAGE_SIZE *
 										(zp - zone_page_table), PAGE_SIZE);
@@ -1905,57 +2702,334 @@ zone_gc(void)
 void
 consider_zone_gc(boolean_t force)
 {
-	/*
-	 *	By default, don't attempt zone GC more frequently
-	 *	than once / 1 minutes.
-	 */
-
-	if (zone_gc_max_rate == 0)
-		zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1;
 
 	if (zone_gc_allowed &&
-	    ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) ||
+	    (zone_gc_allowed_by_time_throttle ||
 	     zone_gc_forced ||
 	     force)) {
 		zone_gc_forced = FALSE;
-		zone_gc_last_tick = sched_tick;
+		zone_gc_allowed_by_time_throttle = FALSE; /* reset periodically */
 		zone_gc();
 	}
 }
 
-struct fake_zone_info {
-	const char* name;
-	void (*func)(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
-		    int *, int *);
-};
+/*
+ *	By default, don't attempt zone GC more frequently
+ *	than once / 1 minutes.
+ */
+void
+compute_zone_gc_throttle(void *arg __unused)
+{
+	zone_gc_allowed_by_time_throttle = TRUE;
+}
 
-static struct fake_zone_info fake_zones[] = {
-	{
-		.name = "kernel_stacks",
-		.func = stack_fake_zone_info,
-	},
-#ifdef ppc
-	{
-		.name = "save_areas",
-		.func = save_fake_zone_info,
-	},
-	{
-		.name = "pmap_mappings",
-		.func = mapping_fake_zone_info,
-	},
-#endif /* ppc */
-#if defined(__i386__) || defined (__x86_64__)
-	{
-		.name = "page_tables",
-		.func = pt_fake_zone_info,
-	},
-#endif /* i386 */
-	{
-		.name = "kalloc.large",
-		.func = kalloc_fake_zone_info,
-	},
-};
 
+kern_return_t
+task_zone_info(
+	task_t			task,
+	mach_zone_name_array_t	*namesp,
+	mach_msg_type_number_t  *namesCntp,
+	task_zone_info_array_t	*infop,
+	mach_msg_type_number_t  *infoCntp)
+{
+	mach_zone_name_t	*names;
+	vm_offset_t		names_addr;
+	vm_size_t		names_size;
+	task_zone_info_t	*info;
+	vm_offset_t		info_addr;
+	vm_size_t		info_size;
+	unsigned int		max_zones, i;
+	zone_t			z;
+	mach_zone_name_t	*zn;
+	task_zone_info_t    	*zi;
+	kern_return_t		kr;
+
+	vm_size_t		used;
+	vm_map_copy_t		copy;
+
+
+	if (task == TASK_NULL)
+		return KERN_INVALID_TASK;
+
+	/*
+	 *	We assume that zones aren't freed once allocated.
+	 *	We won't pick up any zones that are allocated later.
+	 */
+
+	simple_lock(&all_zones_lock);
+	max_zones = (unsigned int)(num_zones + num_fake_zones);
+	z = first_zone;
+	simple_unlock(&all_zones_lock);
+
+	names_size = round_page(max_zones * sizeof *names);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &names_addr, names_size);
+	if (kr != KERN_SUCCESS)
+		return kr;
+	names = (mach_zone_name_t *) names_addr;
+
+	info_size = round_page(max_zones * sizeof *info);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &info_addr, info_size);
+	if (kr != KERN_SUCCESS) {
+		kmem_free(ipc_kernel_map,
+			  names_addr, names_size);
+		return kr;
+	}
+
+	info = (task_zone_info_t *) info_addr;
+
+	zn = &names[0];
+	zi = &info[0];
+
+	for (i = 0; i < max_zones - num_fake_zones; i++) {
+		struct zone zcopy;
+
+		assert(z != ZONE_NULL);
+
+		lock_zone(z);
+		zcopy = *z;
+		unlock_zone(z);
+
+		simple_lock(&all_zones_lock);
+		z = z->next_zone;
+		simple_unlock(&all_zones_lock);
+
+		/* assuming here the name data is static */
+		(void) strncpy(zn->mzn_name, zcopy.zone_name,
+			       sizeof zn->mzn_name);
+		zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
+
+		zi->tzi_count = (uint64_t)zcopy.count;
+		zi->tzi_cur_size = (uint64_t)zcopy.cur_size;
+		zi->tzi_max_size = (uint64_t)zcopy.max_size;
+		zi->tzi_elem_size = (uint64_t)zcopy.elem_size;
+		zi->tzi_alloc_size = (uint64_t)zcopy.alloc_size;
+		zi->tzi_sum_size = zcopy.sum_count * zcopy.elem_size;
+		zi->tzi_exhaustible = (uint64_t)zcopy.exhaustible;
+		zi->tzi_collectable = (uint64_t)zcopy.collectable;
+		zi->tzi_caller_acct = (uint64_t)zcopy.caller_acct;
+		if (task->tkm_zinfo != NULL) {
+			zi->tzi_task_alloc = task->tkm_zinfo[zcopy.index].alloc;
+			zi->tzi_task_free = task->tkm_zinfo[zcopy.index].free;
+		} else {
+			zi->tzi_task_alloc = 0;
+			zi->tzi_task_free = 0;
+		}
+		zn++;
+		zi++;
+	}
+
+	/*
+	 * loop through the fake zones and fill them using the specialized
+	 * functions
+	 */
+	for (i = 0; i < num_fake_zones; i++) {
+		int count, collectable, exhaustible, caller_acct, index;
+		vm_size_t cur_size, max_size, elem_size, alloc_size;
+		uint64_t sum_size;
+
+		strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name);
+		zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
+		fake_zones[i].query(&count, &cur_size,
+				    &max_size, &elem_size,
+				    &alloc_size, &sum_size,
+				    &collectable, &exhaustible, &caller_acct);
+		zi->tzi_count = (uint64_t)count;
+		zi->tzi_cur_size = (uint64_t)cur_size;
+		zi->tzi_max_size = (uint64_t)max_size;
+		zi->tzi_elem_size = (uint64_t)elem_size;
+		zi->tzi_alloc_size = (uint64_t)alloc_size;
+		zi->tzi_sum_size = sum_size;
+		zi->tzi_collectable = (uint64_t)collectable;
+		zi->tzi_exhaustible = (uint64_t)exhaustible;
+		zi->tzi_caller_acct = (uint64_t)caller_acct;
+		if (task->tkm_zinfo != NULL) {
+			index = ZINFO_SLOTS - num_fake_zones + i;
+			zi->tzi_task_alloc = task->tkm_zinfo[index].alloc;
+			zi->tzi_task_free = task->tkm_zinfo[index].free;
+		} else {
+			zi->tzi_task_alloc = 0;
+			zi->tzi_task_free = 0;
+		}
+		zn++;
+		zi++;
+	}
+
+	used = max_zones * sizeof *names;
+	if (used != names_size)
+		bzero((char *) (names_addr + used), names_size - used);
+
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
+			   (vm_map_size_t)names_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
+
+	*namesp = (mach_zone_name_t *) copy;
+	*namesCntp = max_zones;
+
+	used = max_zones * sizeof *info;
+
+	if (used != info_size)
+		bzero((char *) (info_addr + used), info_size - used);
+
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
+			   (vm_map_size_t)info_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
+
+	*infop = (task_zone_info_t *) copy;
+	*infoCntp = max_zones;
+
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+mach_zone_info(
+	host_t			host,
+	mach_zone_name_array_t	*namesp,
+	mach_msg_type_number_t  *namesCntp,
+	mach_zone_info_array_t	*infop,
+	mach_msg_type_number_t  *infoCntp)
+{
+	mach_zone_name_t	*names;
+	vm_offset_t		names_addr;
+	vm_size_t		names_size;
+	mach_zone_info_t	*info;
+	vm_offset_t		info_addr;
+	vm_size_t		info_size;
+	unsigned int		max_zones, i;
+	zone_t			z;
+	mach_zone_name_t	*zn;
+	mach_zone_info_t    	*zi;
+	kern_return_t		kr;
+	
+	vm_size_t		used;
+	vm_map_copy_t		copy;
+
+
+	if (host == HOST_NULL)
+		return KERN_INVALID_HOST;
+
+	num_fake_zones = sizeof fake_zones / sizeof fake_zones[0];
+
+	/*
+	 *	We assume that zones aren't freed once allocated.
+	 *	We won't pick up any zones that are allocated later.
+	 */
+
+	simple_lock(&all_zones_lock);
+	max_zones = (unsigned int)(num_zones + num_fake_zones);
+	z = first_zone;
+	simple_unlock(&all_zones_lock);
+
+	names_size = round_page(max_zones * sizeof *names);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &names_addr, names_size);
+	if (kr != KERN_SUCCESS)
+		return kr;
+	names = (mach_zone_name_t *) names_addr;
+
+	info_size = round_page(max_zones * sizeof *info);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &info_addr, info_size);
+	if (kr != KERN_SUCCESS) {
+		kmem_free(ipc_kernel_map,
+			  names_addr, names_size);
+		return kr;
+	}
+
+	info = (mach_zone_info_t *) info_addr;
+
+	zn = &names[0];
+	zi = &info[0];
+
+	for (i = 0; i < max_zones - num_fake_zones; i++) {
+		struct zone zcopy;
+
+		assert(z != ZONE_NULL);
+
+		lock_zone(z);
+		zcopy = *z;
+		unlock_zone(z);
+
+		simple_lock(&all_zones_lock);
+		z = z->next_zone;
+		simple_unlock(&all_zones_lock);
+
+		/* assuming here the name data is static */
+		(void) strncpy(zn->mzn_name, zcopy.zone_name,
+			       sizeof zn->mzn_name);
+		zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
+
+		zi->mzi_count = (uint64_t)zcopy.count;
+		zi->mzi_cur_size = (uint64_t)zcopy.cur_size;
+		zi->mzi_max_size = (uint64_t)zcopy.max_size;
+		zi->mzi_elem_size = (uint64_t)zcopy.elem_size;
+		zi->mzi_alloc_size = (uint64_t)zcopy.alloc_size;
+		zi->mzi_sum_size = zcopy.sum_count * zcopy.elem_size;
+		zi->mzi_exhaustible = (uint64_t)zcopy.exhaustible;
+		zi->mzi_collectable = (uint64_t)zcopy.collectable;
+		zn++;
+		zi++;
+	}
+
+	/*
+	 * loop through the fake zones and fill them using the specialized
+	 * functions
+	 */
+	for (i = 0; i < num_fake_zones; i++) {
+		int count, collectable, exhaustible, caller_acct;
+		vm_size_t cur_size, max_size, elem_size, alloc_size;
+		uint64_t sum_size;
+
+		strncpy(zn->mzn_name, fake_zones[i].name, sizeof zn->mzn_name);
+		zn->mzn_name[sizeof zn->mzn_name - 1] = '\0';
+		fake_zones[i].query(&count, &cur_size,
+				    &max_size, &elem_size,
+				    &alloc_size, &sum_size,
+				    &collectable, &exhaustible, &caller_acct);
+		zi->mzi_count = (uint64_t)count;
+		zi->mzi_cur_size = (uint64_t)cur_size;
+		zi->mzi_max_size = (uint64_t)max_size;
+		zi->mzi_elem_size = (uint64_t)elem_size;
+		zi->mzi_alloc_size = (uint64_t)alloc_size;
+		zi->mzi_sum_size = sum_size;
+		zi->mzi_collectable = (uint64_t)collectable;
+		zi->mzi_exhaustible = (uint64_t)exhaustible;
+
+		zn++;
+		zi++;
+	}
+
+	used = max_zones * sizeof *names;
+	if (used != names_size)
+		bzero((char *) (names_addr + used), names_size - used);
+
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
+			   (vm_map_size_t)names_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
+
+	*namesp = (mach_zone_name_t *) copy;
+	*namesCntp = max_zones;
+
+	used = max_zones * sizeof *info;
+
+	if (used != info_size)
+		bzero((char *) (info_addr + used), info_size - used);
+
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
+			   (vm_map_size_t)info_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
+
+	*infop = (mach_zone_info_t *) copy;
+	*infoCntp = max_zones;
+
+	return KERN_SUCCESS;
+}
+
+/*
+ * host_zone_info - LEGACY user interface for Mach zone information
+ * 		    Should use mach_zone_info() instead!
+ */
 kern_return_t
 host_zone_info(
 	host_t			host,
@@ -1975,7 +3049,9 @@ host_zone_info(
 	zone_name_t    *zn;
 	zone_info_t    *zi;
 	kern_return_t	kr;
-	size_t		num_fake_zones;
+
+	vm_size_t	used;
+	vm_map_copy_t	copy;
 
 
 	if (host == HOST_NULL)
@@ -2001,40 +3077,28 @@ host_zone_info(
 	z = first_zone;
 	simple_unlock(&all_zones_lock);
 
-	if (max_zones <= *namesCntp) {
-		/* use in-line memory */
-		names_size = *namesCntp * sizeof *names;
-		names = *namesp;
-	} else {
-		names_size = round_page(max_zones * sizeof *names);
-		kr = kmem_alloc_pageable(ipc_kernel_map,
-					 &names_addr, names_size);
-		if (kr != KERN_SUCCESS)
-			return kr;
-		names = (zone_name_t *) names_addr;
-	}
-
-	if (max_zones <= *infoCntp) {
-		/* use in-line memory */
-	  	info_size = *infoCntp * sizeof *info;
-		info = *infop;
-	} else {
-		info_size = round_page(max_zones * sizeof *info);
-		kr = kmem_alloc_pageable(ipc_kernel_map,
-					 &info_addr, info_size);
-		if (kr != KERN_SUCCESS) {
-			if (names != *namesp)
-				kmem_free(ipc_kernel_map,
-					  names_addr, names_size);
-			return kr;
-		}
-
-		info = (zone_info_t *) info_addr;
+	names_size = round_page(max_zones * sizeof *names);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &names_addr, names_size);
+	if (kr != KERN_SUCCESS)
+		return kr;
+	names = (zone_name_t *) names_addr;
+
+	info_size = round_page(max_zones * sizeof *info);
+	kr = kmem_alloc_pageable(ipc_kernel_map,
+				 &info_addr, info_size);
+	if (kr != KERN_SUCCESS) {
+		kmem_free(ipc_kernel_map,
+			  names_addr, names_size);
+		return kr;
 	}
+
+	info = (zone_info_t *) info_addr;
+
 	zn = &names[0];
 	zi = &info[0];
 
-	for (i = 0; i < num_zones; i++) {
+	for (i = 0; i < max_zones - num_fake_zones; i++) {
 		struct zone zcopy;
 
 		assert(z != ZONE_NULL);
@@ -2069,57 +3133,49 @@ host_zone_info(
 	 * functions
 	 */
 	for (i = 0; i < num_fake_zones; i++) {
+		int caller_acct;
+		uint64_t sum_space;
 		strncpy(zn->zn_name, fake_zones[i].name, sizeof zn->zn_name);
 		zn->zn_name[sizeof zn->zn_name - 1] = '\0';
-		fake_zones[i].func(&zi->zi_count, &zi->zi_cur_size,
-				   &zi->zi_max_size, &zi->zi_elem_size,
-				   &zi->zi_alloc_size, &zi->zi_collectable,
-				   &zi->zi_exhaustible);
+		fake_zones[i].query(&zi->zi_count, &zi->zi_cur_size,
+				    &zi->zi_max_size, &zi->zi_elem_size,
+				    &zi->zi_alloc_size, &sum_space,
+				    &zi->zi_collectable, &zi->zi_exhaustible, &caller_acct);
 		zn++;
 		zi++;
 	}
 
-	if (names != *namesp) {
-		vm_size_t used;
-		vm_map_copy_t copy;
-
-		used = max_zones * sizeof *names;
+	used = max_zones * sizeof *names;
+	if (used != names_size)
+		bzero((char *) (names_addr + used), names_size - used);
 
-		if (used != names_size)
-			bzero((char *) (names_addr + used), names_size - used);
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
+			   (vm_map_size_t)names_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
 
-		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr,
-				   (vm_map_size_t)names_size, TRUE, &copy);
-		assert(kr == KERN_SUCCESS);
-
-		*namesp = (zone_name_t *) copy;
-	}
+	*namesp = (zone_name_t *) copy;
 	*namesCntp = max_zones;
 
-	if (info != *infop) {
-		vm_size_t used;
-		vm_map_copy_t copy;
+	used = max_zones * sizeof *info;
+	if (used != info_size)
+		bzero((char *) (info_addr + used), info_size - used);
 
-		used = max_zones * sizeof *info;
+	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
+			   (vm_map_size_t)info_size, TRUE, &copy);
+	assert(kr == KERN_SUCCESS);
 
-		if (used != info_size)
-			bzero((char *) (info_addr + used), info_size - used);
-
-		kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr,
-				   (vm_map_size_t)info_size, TRUE, &copy);
-		assert(kr == KERN_SUCCESS);
-
-		*infop = (zone_info_t *) copy;
-	}
+	*infop = (zone_info_t *) copy;
 	*infoCntp = max_zones;
 
 	return KERN_SUCCESS;
 }
 
 extern unsigned int stack_total;
+extern unsigned long long stack_allocs;
 
 #if defined(__i386__) || defined (__x86_64__)
 extern unsigned int inuse_ptepages_count;
+extern long long alloc_ptepages_count;
 #endif
 
 void zone_display_zprint()
@@ -2191,6 +3247,8 @@ db_print_zone(
 	  	db_printf("C");
 	if (zcopy.expandable)
 	  	db_printf("X");
+	if (zcopy.caller_acct)
+		db_printf("A");
 	db_printf("\n");
 }
 
diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h
index 22f9d78b4..d7d722239 100644
--- a/osfmk/kern/zalloc.h
+++ b/osfmk/kern/zalloc.h
@@ -100,6 +100,7 @@ struct zone {
 	vm_size_t	max_size;	/* how large can this zone grow */
 	vm_size_t	elem_size;	/* size of an element */
 	vm_size_t	alloc_size;	/* size used for more memory */
+	uint64_t	sum_count;	/* count of allocs (life of zone) */
 	unsigned int
 	/* boolean_t */ exhaustible :1,	/* (F) merely return if empty? */
 	/* boolean_t */	collectable :1,	/* (F) garbage collect empty pages */
@@ -108,16 +109,38 @@ struct zone {
 	/* boolean_t */	doing_alloc :1,	/* is zone expanding now? */
 	/* boolean_t */	waiting :1,	/* is thread waiting for expansion? */
 	/* boolean_t */	async_pending :1,	/* asynchronous allocation pending? */
+#if CONFIG_ZLEAKS
+	/* boolean_t */ zleak_on :1,	/* Are we collecting allocation information? */
+#endif	/* ZONE_DEBUG */
+	/* boolean_t */	caller_acct: 1, /* do we account allocation/free to the caller? */  
 	/* boolean_t */	doing_gc :1,	/* garbage collect in progress? */
 	/* boolean_t */ noencrypt :1;
+	int		index;		/* index into zone_info arrays for this zone */
 	struct zone *	next_zone;	/* Link for all-zones list */
 	call_entry_data_t	call_async_alloc;	/* callout for asynchronous alloc */
 	const char	*zone_name;	/* a name for the zone */
 #if	ZONE_DEBUG
 	queue_head_t	active_zones;	/* active elements */
 #endif	/* ZONE_DEBUG */
+
+#if CONFIG_ZLEAKS
+	uint32_t num_allocs;	/* alloc stats for zleak benchmarks */
+	uint32_t num_frees;		/* free stats for zleak benchmarks */
+	uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */
+#endif /* CONFIG_ZLEAKS */
 };
 
+/*
+ *	structure for tracking zone usage
+ *	Used either one per task/thread for all zones or <per-task,per-zone>.
+ */
+typedef struct zinfo_usage_store_t {
+	/* These fields may be updated atomically, and so must be 8 byte aligned */
+	uint64_t	alloc __attribute__((aligned(8)));		/* allocation counter */
+	uint64_t	free __attribute__((aligned(8)));		/* free counter */
+} zinfo_usage_store_t;
+typedef zinfo_usage_store_t *zinfo_usage_t;
+
 extern void		zone_gc(void);
 extern void		consider_zone_gc(boolean_t);
 
@@ -131,15 +154,23 @@ extern void		zone_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 extern void		zone_init(
 					vm_size_t	map_size) __attribute__((section("__TEXT, initcode")));
 
+/* Handle per-task zone info */
+extern void		zinfo_task_init(task_t task);
+extern void		zinfo_task_free(task_t task);
+
+
 /* Stack use statistics */
+extern void		stack_fake_zone_init(int zone_index);
 extern void		stack_fake_zone_info(
 					int			*count, 
 					vm_size_t	*cur_size, 
 					vm_size_t	*max_size,
 					vm_size_t	*elem_size,
 					vm_size_t	*alloc_size, 
+					uint64_t	*sum_size,
 					int			*collectable, 
-					int			*exhaustable);
+					int			*exhaustable,
+					int		*caller_acct);
 
 #if		ZONE_DEBUG
 
@@ -220,6 +251,7 @@ extern void		zone_change(
 #define Z_COLLECT	2	/* Make zone collectable	*/
 #define Z_EXPAND	3	/* Make zone expandable		*/
 #define	Z_FOREIGN	4	/* Allow collectable zone to contain foreign elements */
+#define Z_CALLERACCT	5	/* Account alloc/free against the caller */
 #define Z_NOENCRYPT	6	/* Don't encrypt zone during hibernation */
 
 /* Preallocate space for zone from zone map */
@@ -230,6 +262,48 @@ extern void		zprealloc(
 extern integer_t	zone_free_count(
 						zone_t		zone);
 
+/*
+ * MAX_ZTRACE_DEPTH configures how deep of a stack trace is taken on each zalloc in the zone of interest.  15
+ * levels is usually enough to get past all the layers of code in kalloc and IOKit and see who the actual
+ * caller is up above these lower levels.
+ *
+ * This is used both for the zone leak detector and the zone corruption log.
+ */
+
+#define MAX_ZTRACE_DEPTH	15
+
+/* 
+ *  Structure for keeping track of a backtrace, used for leak detection.
+ *  This is in the .h file because it is used during panic, see kern/debug.c
+ *  A non-zero size indicates that the trace is in use.
+ */
+struct ztrace {
+	vm_size_t		zt_size;			/* How much memory are all the allocations referring to this trace taking up? */
+	uint32_t		zt_depth;			/* depth of stack (0 to MAX_ZTRACE_DEPTH) */
+	void*			zt_stack[MAX_ZTRACE_DEPTH];	/* series of return addresses from OSBacktrace */
+	uint32_t		zt_collisions;			/* How many times did a different stack land here while it was occupied? */
+	uint32_t		zt_hit_count;			/* for determining effectiveness of hash function */
+};
+
+#if CONFIG_ZLEAKS
+
+/* support for the kern.zleak.* sysctls */
+
+extern kern_return_t zleak_activate(void);
+extern vm_size_t zleak_max_zonemap_size;
+extern vm_size_t zleak_global_tracking_threshold;
+extern vm_size_t zleak_per_zone_tracking_threshold;
+
+extern int get_zleak_state(void);
+
+#endif	/* CONFIG_ZLEAKS */
+
+/* These functions used for leak detection both in zalloc.c and mbuf.c */
+extern uint32_t fastbacktrace(uintptr_t* bt, uint32_t max_frames);
+extern uintptr_t hash_mix(uintptr_t x);
+extern uint32_t hashbacktrace(uintptr_t* bt, uint32_t depth, uint32_t max_size);
+extern uint32_t hashaddr(uintptr_t pt, uint32_t max_size);
+
 #endif	/* XNU_KERNEL_PRIVATE */
 
 __END_DECLS
diff --git a/osfmk/kextd/Makefile b/osfmk/kextd/Makefile
index d3a065420..771b0cd26 100644
--- a/osfmk/kextd/Makefile
+++ b/osfmk/kextd/Makefile
@@ -8,14 +8,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 =
 
 EXPINC_SUBDIRS =
 
-EXPINC_SUBDIRS_PPC =
-
 EXPINC_SUBDIRS_I386 =
 
 MIG_DEFS = kextd_mach.defs
diff --git a/osfmk/libsa/machine/types.h b/osfmk/libsa/machine/types.h
index f79adbe87..0a6f4bb69 100644
--- a/osfmk/libsa/machine/types.h
+++ b/osfmk/libsa/machine/types.h
@@ -28,9 +28,7 @@
 #ifndef _MACH_MACHINE_TYPES_H
 #define _MACH_MACHINE_TYPES_H
 
-#if defined (__ppc__)
-#include "libsa/ppc/types.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "libsa/i386/types.h"
 #else
 #error architecture not supported
diff --git a/osfmk/libsa/ppc/types.h b/osfmk/libsa/ppc/types.h
deleted file mode 100644
index 859f94b92..000000000
--- a/osfmk/libsa/ppc/types.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:51  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:25:36  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.2.1  1996/12/09  16:59:05  stephen
- * 	nmklinux_1.0b3_shared into pmk1.1
- * 	[1996/12/09  11:18:55  stephen]
- *
- * Revision 1.1.2.1  1996/09/17  16:56:35  bruel
- * 	created from standalone mach servers
- * 	[1996/09/17  16:16:17  bruel]
- * 
- * $EndLog$
- */
-
-#ifndef	_MACH_MACHINE_TYPES_H_
-#define _MACH_MACHINE_TYPES_H_ 1
-
-typedef long		dev_t;		/* device number (major+minor) */
-
-typedef signed char	bit8_t;		/* signed 8-bit quantity */
-typedef unsigned char	u_bit8_t;	/* unsigned 8-bit quantity */
-
-typedef short		bit16_t;	/* signed 16-bit quantity */
-typedef unsigned short	u_bit16_t;	/* unsigned 16-bit quantity */
-
-typedef int		bit32_t;	/* signed 32-bit quantity */
-typedef unsigned int	u_bit32_t;	/* unsigned 32-bit quantity */
-
-/* Only 32 bits of the "bit64_t" are significant on this 32-bit machine */
-typedef struct { int __val[2]; } bit64_t;	/* signed 64-bit quantity */
-typedef struct { unsigned int __val[2]; } u_bit64_t;/* unsigned 64-bit quantity */
-#define	_SIG64_BITS	__val[1]	/* bits of interest (32) */
-
-#endif /*  _MACH_MACHINE_TYPES_H_ */
diff --git a/osfmk/libsa/types.h b/osfmk/libsa/types.h
index ca12b7efb..341e42b0e 100644
--- a/osfmk/libsa/types.h
+++ b/osfmk/libsa/types.h
@@ -69,7 +69,6 @@ typedef struct _quad_ {
 
 typedef	char *		caddr_t;	/* address of a (signed) char */
 
-typedef int		time_t;		/* a signed 32    */
 typedef unsigned int	daddr_t;	/* an unsigned 32 */
 #if 0 /* off_t should be 64-bit ! */
 typedef	unsigned int	off_t;		/* another unsigned 32 */
diff --git a/osfmk/lockd/Makefile b/osfmk/lockd/Makefile
index 45820f497..2975dc2dd 100644
--- a/osfmk/lockd/Makefile
+++ b/osfmk/lockd/Makefile
@@ -8,14 +8,10 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS =
 
-INSTINC_SUBDIRS_PPC = 
-
 INSTINC_SUBDIRS_I386 =
 
 EXPINC_SUBDIRS =
 
-EXPINC_SUBDIRS_PPC =
-
 EXPINC_SUBDIRS_I386 =
 
 MIG_DEFS = lockd_mach.defs
diff --git a/osfmk/mach/Makefile b/osfmk/mach/Makefile
index eaadb00bd..770208eaa 100644
--- a/osfmk/mach/Makefile
+++ b/osfmk/mach/Makefile
@@ -3,39 +3,18 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
 export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
 export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 
-
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-INSTINC_SUBDIRS = \
-	machine
-
-INSTINC_SUBDIRS_PPC = \
-	ppc
-
-INSTINC_SUBDIRS_ARM = \
-	arm
-
-INSTINC_SUBDIRS_I386 = \
-	i386
-
-INSTINC_SUBDIRS_X86_64 = \
-	i386
-
-EXPINC_SUBDIRS = \
-	machine
-
-EXPINC_SUBDIRS_PPC = \
-	ppc
-
-EXPINC_SUBDIRS_I386 = \
-	i386
-
-EXPINC_SUBDIRS_X86_64 = \
-	i386
+INSTINC_SUBDIRS = machine
+INSTINC_SUBDIRS_ARM = arm
+INSTINC_SUBDIRS_I386 = i386
+INSTINC_SUBDIRS_X86_64 = i386
 
-EXPINC_SUBDIRS_ARM = \
-	arm
+EXPINC_SUBDIRS = machine
+EXPINC_SUBDIRS_I386 = i386
+EXPINC_SUBDIRS_X86_64 = i386
+EXPINC_SUBDIRS_ARM = arm
 
 MIG_TYPES = \
 	clock_types.defs \
@@ -188,6 +167,7 @@ INSTALL_MI_GEN_LIST =
 INSTALL_MI_DIR = mach
 
 EXPORT_MI_LIST	= \
+	branch_predicates.h \
 	mach_interface.h \
 	${DATAFILES}
 
diff --git a/osfmk/mach/branch_predicates.h b/osfmk/mach/branch_predicates.h
new file mode 100644
index 000000000..8d16db0fa
--- /dev/null
+++ b/osfmk/mach/branch_predicates.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+
+#ifndef	_MACH_BRANCH_PREDICATES_H
+#define	_MACH_BRANCH_PREDICATES_H
+
+#define	__probable(x)	__builtin_expect((x), 1)
+#define	__improbable(x)	__builtin_expect((x), 0)
+#endif /* _MACH_BRANCH_PREDICATES_H */
diff --git a/osfmk/mach/clock_types.h b/osfmk/mach/clock_types.h
index 12035da3c..e020066d1 100644
--- a/osfmk/mach/clock_types.h
+++ b/osfmk/mach/clock_types.h
@@ -84,6 +84,7 @@ typedef struct mach_timespec	mach_timespec_t;
 #define NSEC_PER_USEC	1000		/* nanoseconds per microsecond */
 #define USEC_PER_SEC	1000000		/* microseconds per second */
 #define NSEC_PER_SEC	1000000000	/* nanoseconds per second */
+#define NSEC_PER_MSEC	1000000ull	/* nanoseconds per millisecond */
 
 #define BAD_MACH_TIMESPEC(t)						\
 	((t)->tv_nsec < 0 || (t)->tv_nsec >= NSEC_PER_SEC)
diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h
index 3bd96bf53..9ad10eaa7 100644
--- a/osfmk/mach/host_info.h
+++ b/osfmk/mach/host_info.h
@@ -184,6 +184,7 @@ typedef struct host_priority_info	*host_priority_info_t;
 
 /* host_statistics64() */
 #define HOST_VM_INFO64		4	/* 64-bit virtual memory stats */
+#define HOST_EXTMOD_INFO64	5	/* External modification stats */
 
 
 struct host_load_info {
@@ -204,6 +205,13 @@ typedef struct host_load_info	*host_load_info_t;
 /* size of the latest version of the structure */
 #define HOST_VM_INFO64_LATEST_COUNT HOST_VM_INFO64_COUNT
 
+/* in <mach/vm_statistics.h> */
+/* vm_extmod_statistics */
+#define HOST_EXTMOD_INFO64_COUNT ((mach_msg_type_number_t) \
+	    (sizeof(vm_extmod_statistics_data_t)/sizeof(integer_t)))
+
+/* size of the latest version of the structure */
+#define HOST_EXTMOD_INFO64_LATEST_COUNT HOST_EXTMOD_INFO64_COUNT
 
 /* vm_statistics */
 #define	HOST_VM_INFO_COUNT ((mach_msg_type_number_t) \
@@ -229,4 +237,41 @@ typedef struct host_cpu_load_info	*host_cpu_load_info_t;
 #define HOST_CPU_LOAD_INFO_COUNT ((mach_msg_type_number_t) \
 		(sizeof (host_cpu_load_info_data_t) / sizeof (integer_t)))
 
+#ifdef PRIVATE
+/*
+ * CPU Statistics information
+ */
+struct _processor_statistics_np  {
+	int32_t			ps_cpuid;
+
+	uint32_t		ps_csw_count;
+	uint32_t		ps_preempt_count;
+	uint32_t		ps_preempted_rt_count;
+	uint32_t		ps_preempted_by_rt_count;
+
+	uint32_t		ps_rt_sched_count;
+
+	uint32_t		ps_interrupt_count;
+	uint32_t		ps_ipi_count;
+	uint32_t		ps_timer_pop_count;
+	
+	uint64_t		ps_runq_count_sum __attribute((aligned(8)));
+
+	uint32_t		ps_idle_transitions;
+
+};
+
+#endif /* PRIVATE */
+
+#ifdef KERNEL_PRIVATE
+
+extern kern_return_t	set_sched_stats_active(
+					boolean_t active);
+
+extern kern_return_t	get_sched_statistics( 
+					struct _processor_statistics_np *out, 
+					uint32_t *count);
+#endif  /* KERNEL_PRIVATE */
+
+
 #endif	/* _MACH_HOST_INFO_H_ */
diff --git a/osfmk/mach/i386/_structs.h b/osfmk/mach/i386/_structs.h
index bcac16be3..9dd3f4416 100644
--- a/osfmk/mach/i386/_structs.h
+++ b/osfmk/mach/i386/_structs.h
@@ -401,17 +401,19 @@ _STRUCT_X86_AVX_STATE32
 #define _STRUCT_X86_EXCEPTION_STATE32	struct __darwin_i386_exception_state
 _STRUCT_X86_EXCEPTION_STATE32
 {
-    unsigned int	__trapno;
-    unsigned int	__err;
-    unsigned int	__faultvaddr;
+	__uint16_t	__trapno;
+	__uint16_t	__cpu;
+	__uint32_t	__err;
+	__uint32_t	__faultvaddr;
 };
 #else /* !__DARWIN_UNIX03 */
 #define _STRUCT_X86_EXCEPTION_STATE32	struct i386_exception_state
 _STRUCT_X86_EXCEPTION_STATE32
 {
-    unsigned int	trapno;
-    unsigned int	err;
-    unsigned int	faultvaddr;
+	__uint16_t	trapno;
+	__uint16_t	cpu;
+	__uint32_t	err;
+	__uint32_t	faultvaddr;
 };
 #endif /* !__DARWIN_UNIX03 */
 
@@ -748,17 +750,19 @@ _STRUCT_X86_AVX_STATE64
 #define _STRUCT_X86_EXCEPTION_STATE64	struct __darwin_x86_exception_state64
 _STRUCT_X86_EXCEPTION_STATE64
 {
-    unsigned int	__trapno;
-    unsigned int	__err;
-    __uint64_t		__faultvaddr;
+    __uint16_t	__trapno;
+    __uint16_t	__cpu;
+    __uint32_t	__err;
+    __uint64_t	__faultvaddr;
 };
 #else /* !__DARWIN_UNIX03 */
 #define _STRUCT_X86_EXCEPTION_STATE64	struct x86_exception_state64
 _STRUCT_X86_EXCEPTION_STATE64
 {
-    unsigned int	trapno;
-    unsigned int	err;
-    __uint64_t		faultvaddr;
+    __uint16_t	trapno;
+    __uint16_t	cpu;
+    __uint32_t	err;
+    __uint64_t	faultvaddr;
 };
 #endif /* !__DARWIN_UNIX03 */
 
diff --git a/osfmk/mach/i386/_types.h b/osfmk/mach/i386/_types.h
deleted file mode 100644
index 5679b84e2..000000000
--- a/osfmk/mach/i386/_types.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_MACH_I386__TYPES_H_
-#define	_MACH_I386__TYPES_H_
-
-/*
- * i386_thread_state is the structure that is exported to user threads for 
- * use in status/mutate calls.  This structure should never change.
- *
- */
-
-#if !__DARWIN_UNIX03
-struct i386_thread_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_i386_thread_state
-#endif /* __DARWIN_UNIX03 */
-{
-    unsigned int	eax;
-    unsigned int	ebx;
-    unsigned int	ecx;
-    unsigned int	edx;
-    unsigned int	edi;
-    unsigned int	esi;
-    unsigned int	ebp;
-    unsigned int	esp;
-    unsigned int	ss;
-    unsigned int	eflags;
-    unsigned int	eip;
-    unsigned int	cs;
-    unsigned int	ds;
-    unsigned int	es;
-    unsigned int	fs;
-    unsigned int	gs;
-};
-
-#if !__DARWIN_UNIX03
-struct x86_thread_state64
-#else /* __DARWIN_UNIX03 */
-struct __darwin_x86_thread_state64
-#endif /* __DARWIN_UNIX03 */
-{
-	uint64_t	rax;
-	uint64_t	rbx;
-	uint64_t	rcx;
-	uint64_t	rdx;
-	uint64_t	rdi;
-	uint64_t	rsi;
-	uint64_t	rbp;
-	uint64_t	rsp;
-	uint64_t	r8;
-	uint64_t	r9;
-	uint64_t	r10;
-	uint64_t	r11;
-	uint64_t	r12;
-	uint64_t	r13;
-	uint64_t	r14;
-	uint64_t	r15;
-	uint64_t	rip;
-	uint64_t	rflags;
-	uint64_t	cs;
-	uint64_t	fs;
-	uint64_t	gs;
-};
-
-
-typedef struct fp_control {
-    unsigned short		invalid	:1,
-    				denorm	:1,
-				zdiv	:1,
-				ovrfl	:1,
-				undfl	:1,
-				precis	:1,
-					:2,
-				pc	:2,
-#define FP_PREC_24B		0
-#define	FP_PREC_53B		2
-#define FP_PREC_64B		3
-				rc	:2,
-#define FP_RND_NEAR		0
-#define FP_RND_DOWN		1
-#define FP_RND_UP		2
-#define FP_CHOP			3
-				/*inf*/	:1,
-					:3;
-} fp_control_t;
-/*
- * Status word.
- */
-
-typedef struct fp_status {
-    unsigned short		invalid	:1,
-    				denorm	:1,
-				zdiv	:1,
-				ovrfl	:1,
-				undfl	:1,
-				precis	:1,
-				stkflt	:1,
-				errsumm	:1,
-				c0	:1,
-				c1	:1,
-				c2	:1,
-				tos	:3,
-				c3	:1,
-				busy	:1;
-} fp_status_t;
-				
-/* defn of 80bit x87 FPU or MMX register  */
-struct mmst_reg {
-	char	mmst_reg[10];
-	char	mmst_rsrv[6];
-};
-
-
-/* defn of 128 bit XMM regs */
-struct xmm_reg {
-	char		xmm_reg[16];
-};
-
-/* 
- * Floating point state.
- */
-
-#define FP_STATE_BYTES		512	/* number of chars worth of data from fpu_fcw */
-#if !__DARWIN_UNIX03
-struct i386_float_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_i386_float_state
-#endif /* __DARWIN_UNIX03 */
-{
-	int 			fpu_reserved[2];
-	fp_control_t	fpu_fcw;			/* x87 FPU control word */
-	fp_status_t		fpu_fsw;			/* x87 FPU status word */
-	uint8_t			fpu_ftw;			/* x87 FPU tag word */
-	uint8_t			fpu_rsrv1;			/* reserved */ 
-	uint16_t		fpu_fop;			/* x87 FPU Opcode */
-	uint32_t		fpu_ip;				/* x87 FPU Instruction Pointer offset */
-	uint16_t		fpu_cs;				/* x87 FPU Instruction Pointer Selector */
-	uint16_t		fpu_rsrv2;			/* reserved */
-	uint32_t		fpu_dp;				/* x87 FPU Instruction Operand(Data) Pointer offset */
-	uint16_t		fpu_ds;				/* x87 FPU Instruction Operand(Data) Pointer Selector */
-	uint16_t		fpu_rsrv3;			/* reserved */
-	uint32_t		fpu_mxcsr;			/* MXCSR Register state */
-	uint32_t		fpu_mxcsrmask;		/* MXCSR mask */
-	struct mmst_reg	fpu_stmm0;		/* ST0/MM0   */
-	struct mmst_reg	fpu_stmm1;		/* ST1/MM1  */
-	struct mmst_reg	fpu_stmm2;		/* ST2/MM2  */
-	struct mmst_reg	fpu_stmm3;		/* ST3/MM3  */
-	struct mmst_reg	fpu_stmm4;		/* ST4/MM4  */
-	struct mmst_reg	fpu_stmm5;		/* ST5/MM5  */
-	struct mmst_reg	fpu_stmm6;		/* ST6/MM6  */
-	struct mmst_reg	fpu_stmm7;		/* ST7/MM7  */
-	struct xmm_reg	fpu_xmm0;		/* XMM 0  */
-	struct xmm_reg	fpu_xmm1;		/* XMM 1  */
-	struct xmm_reg	fpu_xmm2;		/* XMM 2  */
-	struct xmm_reg	fpu_xmm3;		/* XMM 3  */
-	struct xmm_reg	fpu_xmm4;		/* XMM 4  */
-	struct xmm_reg	fpu_xmm5;		/* XMM 5  */
-	struct xmm_reg	fpu_xmm6;		/* XMM 6  */
-	struct xmm_reg	fpu_xmm7;		/* XMM 7  */
-	char			fpu_rsrv4[14*16];	/* reserved */
-	int 			fpu_reserved1;
-};
-
-
-#if !__DARWIN_UNIX03
-struct i386_exception_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_i386_exception_state
-#endif /* __DARWIN_UNIX03 */
-{
-    unsigned int	trapno;
-    unsigned int	err;
-    unsigned int	faultvaddr;
-};
-
-#if !__DARWIN_UNIX03
-struct x86_debug_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_x86_debug_state
-#endif /* __DARWIN_UNIX03 */
-{
-	unsigned int	dr0;
-	unsigned int	dr1;
-	unsigned int	dr2;
-	unsigned int	dr3;
-	unsigned int	dr4;
-	unsigned int	dr5;
-	unsigned int	dr6;
-	unsigned int	dr7;
-};
-
-#endif /* _MACH_I386__TYPES_H_ */
diff --git a/osfmk/mach/i386/sdt_isa.h b/osfmk/mach/i386/sdt_isa.h
index c32239162..503f5ce63 100644
--- a/osfmk/mach/i386/sdt_isa.h
+++ b/osfmk/mach/i386/sdt_isa.h
@@ -41,7 +41,7 @@
  */
 #ifdef __x86_64__
 #define DTRACE_LAB(p, n)		\
-   "__dtrace_probeDOLLAR" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n)
+   "__dtrace_probeDOLLAR" DTRACE_TOSTRING(%=__LINE__) DTRACE_STRINGIFY(_##p##___##n)
 
 #define DTRACE_LABEL(p, n)		\
       ".section __DATA, __data\n\t"	\
@@ -51,7 +51,7 @@
 	"1:"
 #else
 #define DTRACE_LAB(p, n)		\
-   "__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n)
+   "__dtrace_probe$" DTRACE_TOSTRING(%=__LINE__) DTRACE_STRINGIFY(_##p##___##n)
 
 #define DTRACE_LABEL(p, n)		\
       ".section __DATA, __data\n\t"	\
@@ -62,7 +62,7 @@
 #endif
 #else	/* !KERNEL */
 #define DTRACE_LABEL(p, n)									\
-	"__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) ":"	"\n\t"
+	"__dtrace_probe$" DTRACE_TOSTRING(%=__LINE__) DTRACE_STRINGIFY(_##p##___##n) ":"	"\n\t"
 #endif	/* !KERNEL */
 
 #ifdef DTRACE_CALL_TEST
@@ -103,6 +103,8 @@
 #define DTRACE_CALL0ARGS(provider, name)							\
 	asm volatile (										\
 		      DTRACE_CALL(provider, name)						\
+	              :										\
+	              :										\
 	);
 
 #define DTRACE_CALL1ARG(provider, name)								\
diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h
index 501fc8df0..715422ac8 100644
--- a/osfmk/mach/i386/thread_status.h
+++ b/osfmk/mach/i386/thread_status.h
@@ -300,11 +300,12 @@ typedef struct x86_debug_state x86_debug_state_t;
  * enough stack
  */
 struct x86_seg_load_fault32 {
-	unsigned int    trapno;
-	unsigned int    err;
-	unsigned int    eip;
-	unsigned int    cs;
-	unsigned int    efl;
+	uint16_t	trapno;
+	uint16_t	cpu;
+	uint32_t	err;
+	uint32_t	eip;
+	uint32_t	cs;
+	uint32_t	efl;
 };
 
 #ifdef XNU_KERNEL_PRIVATE
@@ -318,23 +319,24 @@ struct x86_seg_load_fault32 {
  * on all traps into debugger.)
  */
 struct x86_saved_state32_from_kernel {
-	unsigned int	gs;
-	unsigned int	fs;
-	unsigned int	es;
-	unsigned int	ds;
-	unsigned int	edi;
-	unsigned int	esi;
-	unsigned int	ebp;
-	unsigned int	cr2;	/* kernel esp stored by pusha - we save cr2 here later */
-	unsigned int	ebx;
-	unsigned int	edx;
-	unsigned int	ecx;
-	unsigned int	eax;
-	unsigned int	trapno;
-	unsigned int	err;
-	unsigned int	eip;
-	unsigned int	cs;
-	unsigned int	efl;
+	uint32_t	gs;
+	uint32_t	fs;
+	uint32_t	es;
+	uint32_t	ds;
+	uint32_t	edi;
+	uint32_t	esi;
+	uint32_t	ebp;
+	uint32_t	cr2;	/* kernel esp stored by pusha - we save cr2 here later */
+	uint32_t	ebx;
+	uint32_t	edx;
+	uint32_t	ecx;
+	uint32_t	eax;
+	uint16_t	trapno;
+	uint16_t	cpu;
+	uint32_t	err;
+	uint32_t	eip;
+	uint32_t	cs;
+	uint32_t	efl;
 };
 
 /*
@@ -343,25 +345,26 @@ struct x86_saved_state32_from_kernel {
  * servers, because copying can be avoided:
  */
 struct x86_saved_state32 {
-	unsigned int	gs;
-	unsigned int	fs;
-	unsigned int	es;
-	unsigned int	ds;
-	unsigned int	edi;
-	unsigned int	esi;
-	unsigned int	ebp;
-	unsigned int	cr2;	/* kernel esp stored by pusha - we save cr2 here later */
-	unsigned int	ebx;
-	unsigned int	edx;
-	unsigned int	ecx;
-	unsigned int	eax;
-	unsigned int	trapno;
-	unsigned int	err;
-	unsigned int	eip;
-	unsigned int	cs;
-	unsigned int	efl;
-	unsigned int	uesp;
-	unsigned int	ss;
+	uint32_t	gs;
+	uint32_t	fs;
+	uint32_t	es;
+	uint32_t	ds;
+	uint32_t	edi;
+	uint32_t	esi;
+	uint32_t	ebp;
+	uint32_t	cr2;	/* kernel esp stored by pusha - we save cr2 here later */
+	uint32_t	ebx;
+	uint32_t	edx;
+	uint32_t	ecx;
+	uint32_t	eax;
+	uint16_t	trapno;
+	uint16_t	cpu;
+	uint32_t	err;
+	uint32_t	eip;
+	uint32_t	cs;
+	uint32_t	efl;
+	uint32_t	uesp;
+	uint32_t	ss;
 };
 typedef struct x86_saved_state32 x86_saved_state32_t;
 
@@ -374,6 +377,7 @@ struct x86_saved_state32_tagged {
 	struct x86_saved_state32	state;
 };
 typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t;
+/* Note: sizeof(x86_saved_state32_tagged_t) is a multiple of 16 bytes */
 
 struct x86_sframe32 {
 	/*
@@ -395,13 +399,10 @@ typedef struct x86_sframe32 x86_sframe32_t;
  * on any exception/trap/interrupt.
  */
 struct x86_64_intr_stack_frame {
-	uint32_t	trapno;
-#if defined(__LP64__) && defined(KERNEL)
+	uint16_t	trapno;
+	uint16_t	cpu;
 	uint32_t 	_pad;
 	uint64_t	trapfn;
-#else
-	uint32_t	trapfn;
-#endif
 	uint64_t	err;
 	uint64_t	rip;
 	uint64_t	cs;
@@ -410,6 +411,7 @@ struct x86_64_intr_stack_frame {
 	uint64_t	ss;
 };
 typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t;
+/* Note: sizeof(x86_64_intr_stack_frame_t) must be a multiple of 16 bytes */
 
 /*
  * This defines the state saved before entry into compatibility mode.
@@ -418,24 +420,18 @@ typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t;
  */
 struct x86_saved_state_compat32 {
 	struct x86_saved_state32_tagged	iss32;
-#if defined(__LP64__) && defined(KERNEL)
-#else
-	uint32_t			pad_for_16byte_alignment[2];
-#endif
-	struct	x86_64_intr_stack_frame	isf64;
+	struct x86_64_intr_stack_frame	isf64;
 };
 typedef struct x86_saved_state_compat32 x86_saved_state_compat32_t;
 
 struct x86_sframe_compat32 {
+	uint32_t			pad_for_16byte_alignment[2];
+	uint64_t			_register_save_slot;
         struct x86_64_intr_stack_frame  slf;
-#if defined(__LP64__) && defined(KERNEL)
-#else
-	uint32_t	pad_for_16byte_alignment[2];
-#endif
         struct x86_saved_state_compat32 ssf;
-	uint32_t			empty[4];
 };
 typedef struct x86_sframe_compat32 x86_sframe_compat32_t;
+/* Note: sizeof(x86_sframe_compat32_t) must be a multiple of 16 bytes */
 
 /*
  * thread state format for task running in 64bit long mode
@@ -480,9 +476,9 @@ struct x86_saved_state64 {
 
 	uint32_t	gs;
 	uint32_t	fs;
-#ifdef __x86_64__
-	uint32_t		_pad_for_alignment[3];
-#endif
+
+	uint32_t	_pad_for_tagged_alignment[3];
+
 	struct	x86_64_intr_stack_frame	isf;
 };
 typedef struct x86_saved_state64 x86_saved_state64_t;
@@ -496,13 +492,12 @@ struct x86_saved_state64_tagged {
 typedef struct x86_saved_state64_tagged x86_saved_state64_tagged_t;
 
 struct x86_sframe64 {
-        struct x86_64_intr_stack_frame	slf;
-#ifdef __i386__
-		uint32_t		_pad_for_alignment[3];
-#endif
-        struct x86_saved_state64_tagged	ssf;
+	uint64_t			_register_save_slot[2];
+	struct x86_64_intr_stack_frame	slf;
+	x86_saved_state64_tagged_t	ssf;
 };
 typedef struct x86_sframe64 x86_sframe64_t;
+/* Note: sizeof(x86_sframe64_t) is a multiple of 16 bytes */
 
 extern uint32_t get_eflags_exportmask(void);
 
diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h
index 9487ff7ef..fb2ca164f 100644
--- a/osfmk/mach/i386/vm_param.h
+++ b/osfmk/mach/i386/vm_param.h
@@ -182,24 +182,29 @@
 
 #if defined(__i386__)
 
-#define VM_MIN_KERNEL_ADDRESS	((vm_offset_t) 0x00001000U)
-#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS
-
-#define VM_MAX_KERNEL_ADDRESS	((vm_offset_t) 0xFE7FFFFFU)
-#define KERNEL_STACK_SIZE		(I386_PGBYTES*4)
+#define KERNEL_IMAGE_TO_PHYS(x) (x)
+#define VM_MIN_KERNEL_ADDRESS		((vm_offset_t) 0x00001000U)
+#define VM_MIN_KERNEL_AND_KEXT_ADDRESS	VM_MIN_KERNEL_ADDRESS
+#define VM_MAX_KERNEL_ADDRESS		((vm_offset_t) 0xFE7FFFFFU)
 
 #elif defined(__x86_64__)
 
-#define VM_MIN_KERNEL_ADDRESS	((vm_offset_t) 0xFFFFFF8000000000UL)
-#define VM_MIN_KERNEL_AND_KEXT_ADDRESS (VM_MIN_KERNEL_ADDRESS - 0x80000000ULL)
-
-#define VM_MAX_KERNEL_ADDRESS	((vm_offset_t) 0xFFFFFFFFFFFFEFFFUL)
-#define KERNEL_STACK_SIZE		(I386_PGBYTES*4)
+#define KERNEL_IMAGE_TO_PHYS(x) (x)
+#define VM_MIN_KERNEL_ADDRESS		((vm_offset_t) 0xFFFFFF8000000000UL)
+#define VM_MIN_KERNEL_PAGE		((ppnum_t)0)
+#define VM_MIN_KERNEL_AND_KEXT_ADDRESS	(VM_MIN_KERNEL_ADDRESS - 0x80000000ULL)
+#define VM_MAX_KERNEL_ADDRESS		((vm_offset_t) 0xFFFFFFFFFFFFEFFFUL)
+#define VM_MAX_KERNEL_ADDRESS_EFI32	((vm_offset_t) 0xFFFFFF80FFFFEFFFUL)
+#define KEXT_ALLOC_MAX_OFFSET (2 * 1024 * 1024 * 1024UL)
+#define KEXT_ALLOC_BASE(x)  ((x) - KEXT_ALLOC_MAX_OFFSET)
+#define KEXT_ALLOC_SIZE(x)  (KEXT_ALLOC_MAX_OFFSET - (x))
 
 #else
 #error unsupported architecture
 #endif
 
+#define KERNEL_STACK_SIZE	(I386_PGBYTES*4)
+
 #define VM_MAP_MIN_ADDRESS	MACH_VM_MIN_ADDRESS
 #define VM_MAP_MAX_ADDRESS	MACH_VM_MAX_ADDRESS
 
diff --git a/osfmk/mach/mach_host.defs b/osfmk/mach/mach_host.defs
index df309d936..536cdce83 100644
--- a/osfmk/mach/mach_host.defs
+++ b/osfmk/mach/mach_host.defs
@@ -168,6 +168,8 @@ routine	kmod_get_info(
 /*
  *	Returns information about the memory allocation zones.
  *      Supported in all kernels..
+ *
+ *	DEPRECATED!  Use mach_zone_info() instead.
  */
 routine host_zone_info(
 		host		: host_t;
@@ -257,5 +259,17 @@ routine host_statistics64(
 		flavor		: host_flavor_t;
 	out	host_info64_out	: host_info64_t, CountInOut);
 
+/*
+ *	Returns information about the memory allocation zones.
+ *      Data returned is compatible with various caller and kernel
+ *	address space sizes (unlike host_zone_info()).
+ */
+routine mach_zone_info(
+		host		: host_t;
+	out	names		: mach_zone_name_array_t,
+					Dealloc;
+	out	info		: mach_zone_info_array_t,
+					Dealloc);
+
 
 /* vim: set ft=c : */
diff --git a/osfmk/mach/mach_port.defs b/osfmk/mach/mach_port.defs
index 5801ee42c..6c612758d 100644
--- a/osfmk/mach/mach_port.defs
+++ b/osfmk/mach/mach_port.defs
@@ -239,9 +239,10 @@ routine mach_port_move_member(
  *	Requests a notification from the kernel.  The request
  *	must supply the send-once right which is used for
  *	the notification.  If a send-once right was previously
- *	registered, it is returned.  The msg_id must be one of
+ *	registered, it is returned.  The msgid must be one of:
  *		MACH_NOTIFY_PORT_DESTROYED (receive rights)
  *		MACH_NOTIFY_DEAD_NAME (send/receive/send-once rights)
+ *		MACH_NOTIFY_SEND_POSSIBLE (send/receive/send-once rights)
  *		MACH_NOTIFY_NO_SENDERS (receive rights)
  *
  *	The sync value specifies whether a notification should
@@ -251,10 +252,20 @@ routine mach_port_move_member(
  *		MACH_NOTIFY_DEAD_NAME: if non-zero, then name can be dead,
  *			and the notification gets sent immediately.
  *			If zero, then name can't be dead.
+ *		MACH_NOTIFY_SEND_POSSIBLE: if non-zero, will generate a	send-
+ *			possible notification as soon as it is possible to send
+ *			to the port. If zero, will generate a send-possible
+ *			notification only after a subsequent failed send
+ *			(with MACH_SEND_NOTIFY option to mach_msg call).  Can
+ *			generate a dead-name notification if name is already dead
+ *			or becomes dead before a send-possible notification fires.
  *		MACH_NOTIFY_NO_SENDERS: the notification gets sent
  *			immediately if the current mscount is greater
  *			than or equal to the sync value and there are no
  *			extant send rights.
+ *
+ * If the name is deleted before a successfully registered notification
+ * is delivered, it is replaced with a port-deleted notification.
  */
 
 routine mach_port_request_notification(
diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h
index 38298e27e..37ab3277f 100644
--- a/osfmk/mach/mach_traps.h
+++ b/osfmk/mach/mach_traps.h
@@ -199,7 +199,7 @@ extern kern_return_t pid_for_task(
 				mach_port_name_t t,
 				int *x);
 
-#if		!defined(__LP64__)
+#if		!defined(__LP64__) && !defined(__arm__)
 /* these should go away altogether - so no 64 legacy please */
 
 extern kern_return_t map_fd(
@@ -209,13 +209,17 @@ extern kern_return_t map_fd(
 				boolean_t findspace,
 				vm_size_t size);
 
-#endif	/* !defined(__LP64__) */
+#endif	/* !defined(__LP64__) && !defined(__arm__) */
 
 #else	/* KERNEL */
 
 #ifdef	XNU_KERNEL_PRIVATE
 
-/* Syscall data translations routines */
+/* Syscall data translations routines
+ *
+ * The kernel may support multiple userspace ABIs, and must use
+ * argument structures with elements large enough for any of them.
+ */
 #define	PAD_(t)	(sizeof(uint64_t) <= sizeof(t) \
  		? 0 : sizeof(uint64_t) - sizeof(t))
 #define PAD_ARG_8
@@ -231,9 +235,14 @@ extern kern_return_t map_fd(
 #define PAD_ARG_(arg_type, arg_name) \
   char arg_name##_l_[PADL_(arg_type)]; arg_type arg_name; char arg_name##_r_[PADR_(arg_type)];
 
-#ifndef __MUNGE_ONCE
-#define __MUNGE_ONCE
-#ifdef __ppc__
+/*
+ * To support 32-bit clients as well as 64-bit clients, argument
+ * structures may need to be munged to repack the arguments. All
+ * active architectures do this inline in the code to dispatch Mach
+ * traps, without calling out to the BSD system call mungers.
+ */
+
+#if 0 /* no active architectures use this */
 void munge_w(const void *, void *);  
 void munge_ww(const void *, void *);  
 void munge_www(const void *, void *);  
@@ -258,33 +267,7 @@ void munge_wlw(const void *, void *);
 void munge_wwwl(const void *, void *);  
 void munge_wwwwl(const void *, void *);  
 void munge_wwwwwl(const void *, void *);  
-#else 
-#define munge_w  NULL 
-#define munge_ww  NULL 
-#define munge_www  NULL 
-#define munge_wwww  NULL 
-#define munge_wwwww  NULL 
-#define munge_wwwwww  NULL 
-#define munge_wwwwwww  NULL 
-#define munge_wwwwwwww  NULL 
-#define munge_d  NULL 
-#define munge_dd  NULL 
-#define munge_ddd  NULL 
-#define munge_dddd  NULL 
-#define munge_ddddd  NULL 
-#define munge_dddddd  NULL 
-#define munge_ddddddd  NULL 
-#define munge_dddddddd  NULL 
-#define munge_l NULL
-#define munge_lw NULL
-#define munge_lwww NULL
-#define munge_wl  NULL 
-#define munge_wlw  NULL 
-#define munge_wwwl  NULL 
-#define munge_wwwwl  NULL 
-#define munge_wwwwwl  NULL 
-#endif /* __ppc__ */
-#endif /* !__MUNGE_ONCE */
+#endif /* 0 */
 
 struct kern_invalid_args {
 	int32_t dummy;
@@ -381,6 +364,7 @@ struct semaphore_timedwait_signal_trap_args {
 extern kern_return_t semaphore_timedwait_signal_trap(
 				struct semaphore_timedwait_signal_trap_args *args);
 
+#if		!defined(CONFIG_EMBEDDED)
 struct map_fd_args {
 	PAD_ARG_(int, fd);
 	PAD_ARG_(vm_offset_t, offset);
@@ -390,6 +374,7 @@ struct map_fd_args {
 };
 extern kern_return_t map_fd(
 				struct map_fd_args *args);
+#endif	/* !defined(CONFIG_EMBEDDED) */
 
 struct task_for_pid_args {
 	PAD_ARG_(mach_port_name_t, target_tport);
diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs
index c4479bd51..0f36eeec5 100644
--- a/osfmk/mach/mach_types.defs
+++ b/osfmk/mach/mach_types.defs
@@ -213,11 +213,13 @@ type thread_policy_t		= array[*:16] of integer_t;
 		 * policy_rr_info_t (5 ints)
 		 * task security token (2 ints)
 		 * task audit token (8 ints)
+		 * dyld info (2 64-bit ints and 1 int)
+		 * task_extmod_info_t (8 64-bit ints)
 		 * If other task_info flavors are added, this
 		 * definition may need to be changed. (See
 		 * mach/task_info.h and mach/policy.h) */
 type task_flavor_t		= int;
-type task_info_t		= array[*:10] of integer_t;
+type task_info_t		= array[*:32] of integer_t;
 
 type task_policy_flavor_t	= natural_t;
 type task_policy_t		= array[*:16] of integer_t;
@@ -311,6 +313,7 @@ type host_info_t 		= array[*:15] of integer_t;
 		 * host_info64_t: variable-sized inline array that can contain:
 		 * 
 		 *	vm_statistics_t (6 ints and 9 longs)
+		 *	vm_extmod_statistics_t (6 64-bit ints)
 		 */
 type host_info64_t		= array[*:256] of integer_t;
 
diff --git a/osfmk/mach/mach_types.h b/osfmk/mach/mach_types.h
index 5f9ddf14f..e4e47f63a 100644
--- a/osfmk/mach/mach_types.h
+++ b/osfmk/mach/mach_types.h
@@ -245,6 +245,7 @@ typedef exception_handler_array_t exception_port_arrary_t;
 #define TASK_NULL		((task_t) 0)
 #define TASK_NAME_NULL		((task_name_t) 0)
 #define THREAD_NULL		((thread_t) 0)
+#define TID_NULL		((uint64_t) 0)
 #define THR_ACT_NULL 		((thread_act_t) 0)
 #define IPC_SPACE_NULL		((ipc_space_t) 0)
 #define HOST_NULL		((host_t) 0)
diff --git a/osfmk/mach/mach_vm.defs b/osfmk/mach/mach_vm.defs
index 0cd136c69..ade3eaa61 100644
--- a/osfmk/mach/mach_vm.defs
+++ b/osfmk/mach/mach_vm.defs
@@ -291,7 +291,11 @@ routine vm_behavior_set(
 #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_)
 routine mach_vm_map(
 #else
+#if defined(__arm__) && !LIBSYSCALL_INTERFACE
+routine _vm_map_arm(
+#else
 routine vm_map(
+#endif
 #endif
 		target_task	: vm_task_entry_t;
 	inout	address		: mach_vm_address_t;
diff --git a/osfmk/mach/machine.h b/osfmk/mach/machine.h
index d209fa278..9ebf5532c 100644
--- a/osfmk/mach/machine.h
+++ b/osfmk/mach/machine.h
@@ -370,10 +370,12 @@ __END_DECLS
 #define CPUFAMILY_INTEL_PENRYN		0x78ea4fbc
 #define CPUFAMILY_INTEL_NEHALEM		0x6b5a4cd2
 #define CPUFAMILY_INTEL_WESTMERE	0x573b5eec
+#define CPUFAMILY_INTEL_SANDYBRIDGE	0x5490b78c
 #define CPUFAMILY_ARM_9			0xe73283ae
 #define CPUFAMILY_ARM_11		0x8ff620d8
 #define CPUFAMILY_ARM_XSCALE		0x53b005f5
 #define CPUFAMILY_ARM_13		0x0cc90e64
+#define CPUFAMILY_ARM_14		0x96077ef1
 
 /* The following synonyms are deprecated: */
 #define CPUFAMILY_INTEL_6_14	CPUFAMILY_INTEL_YONAH
diff --git a/osfmk/mach/machine/asm.h b/osfmk/mach/machine/asm.h
index ba98269a5..1cdbb8109 100644
--- a/osfmk/mach/machine/asm.h
+++ b/osfmk/mach/machine/asm.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_ASM_H
 #define _MACH_MACHINE_ASM_H
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/asm.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/asm.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/boolean.h b/osfmk/mach/machine/boolean.h
index 97ffd0766..521033b72 100644
--- a/osfmk/mach/machine/boolean.h
+++ b/osfmk/mach/machine/boolean.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_BOOLEAN_H_
 #define _MACH_MACHINE_BOOLEAN_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/boolean.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/boolean.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/exception.h b/osfmk/mach/machine/exception.h
index 5fc148663..5fce0e919 100644
--- a/osfmk/mach/machine/exception.h
+++ b/osfmk/mach/machine/exception.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_EXCEPTION_H_
 #define _MACH_MACHINE_EXCEPTION_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/exception.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/exception.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/kern_return.h b/osfmk/mach/machine/kern_return.h
index 82c0adf0c..e2b5bc677 100644
--- a/osfmk/mach/machine/kern_return.h
+++ b/osfmk/mach/machine/kern_return.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_KERN_RETURN_H_
 #define _MACH_MACHINE_KERN_RETURN_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/kern_return.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/kern_return.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/machine_types.defs b/osfmk/mach/machine/machine_types.defs
index 418d16bcc..2ed0d52fd 100644
--- a/osfmk/mach/machine/machine_types.defs
+++ b/osfmk/mach/machine/machine_types.defs
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_MACHINE_TYPES_DEFS
 #define _MACH_MACHINE_MACHINE_TYPES_DEFS
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/machine_types.defs"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/machine_types.defs"
 #elif defined (__arm__)
 #include "mach/arm/machine_types.defs"
diff --git a/osfmk/mach/machine/ndr_def.h b/osfmk/mach/machine/ndr_def.h
index 10e8e3e2f..2d3451472 100644
--- a/osfmk/mach/machine/ndr_def.h
+++ b/osfmk/mach/machine/ndr_def.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_NDR_DEF_H
 #define _MACH_MACHINE_NDR_DEF_H
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/ndr_def.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/ndr_def.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/processor_info.h b/osfmk/mach/machine/processor_info.h
index a4c6d639e..c7ddb5b01 100644
--- a/osfmk/mach/machine/processor_info.h
+++ b/osfmk/mach/machine/processor_info.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_PROCESSOR_INFO_H_
 #define _MACH_MACHINE_PROCESSOR_INFO_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/processor_info.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/processor_info.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/rpc.h b/osfmk/mach/machine/rpc.h
index 849260ae4..3e543a88f 100644
--- a/osfmk/mach/machine/rpc.h
+++ b/osfmk/mach/machine/rpc.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_RPC_H_
 #define _MACH_MACHINE_RPC_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/rpc.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/rpc.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/sdt.h b/osfmk/mach/machine/sdt.h
index af2b59b1e..551f2b0fc 100644
--- a/osfmk/mach/machine/sdt.h
+++ b/osfmk/mach/machine/sdt.h
@@ -224,6 +224,54 @@
     type3, arg3, type4, arg4)						\
 	DTRACE_PROBE4(__vminfo_, name, arg1, arg2, arg3, arg4)
 
+#define DTRACE_IP(name)							\
+	DTRACE_PROBE(__ip_, name)
+
+#define DTRACE_IP1(name, type1, arg1)					\
+	DTRACE_PROBE1(__ip_, name, arg1)
+
+#define DTRACE_IP2(name, type1, arg1, type2, arg2)			\
+	DTRACE_PROBE2(__ip_, name, arg1, arg2)
+
+#define DTRACE_IP3(name, type1, arg1, type2, arg2, type3, arg3)		\
+	DTRACE_PROBE3(__ip_, name, arg1, arg2, arg3)
+
+#define DTRACE_IP4(name, type1, arg1, type2, arg2,			\
+    type3, arg3, type4, arg4)						\
+	DTRACE_PROBE4(__ip_, name, arg1, arg2, arg3, arg4)
+
+#define DTRACE_IP5(name, typ1, arg1, type2, arg2, type3, arg3,		\
+    type4, arg4, type5, arg5)						\
+	DTRACE_PROBE5(__ip_, name, arg1, arg2, arg3, arg4, arg5)
+
+#define DTRACE_IP6(name, type1, arg1, type2, arg2, type3, arg3,		\
+    type4,arg4, type5, arg5, type6, arg6)				\
+	DTRACE_PROBE6(__ip_, name, arg1, arg2, arg3, arg4, arg5, arg6)
+
+#define DTRACE_IP7(name, type1, arg1, type2, arg2, type3, arg3,		\
+    type4, arg4, type5, arg5, type6, arg6, type7, arg7)			\
+	DTRACE_PROBE7(__ip_, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7)
+
+#define DTRACE_TCP(name)                                                 \
+        DTRACE_PROBE(__tcp_, name)
+
+#define DTRACE_TCP1(name, type1, arg1)                                   \
+        DTRACE_PROBE1(__tcp_, name, arg1)
+
+#define DTRACE_TCP2(name, type1, arg1, type2, arg2)                      \
+        DTRACE_PROBE2(__tcp_, name, arg1, arg2)
+
+#define DTRACE_TCP3(name, type1, arg1, type2, arg2, type3, arg3)         \
+        DTRACE_PROBE3(__tcp_, name, arg1, arg2, arg3)
+
+#define DTRACE_TCP4(name, type1, arg1, type2, arg2,                      \
+    type3, arg3, type4, arg4)                                           \
+        DTRACE_PROBE4(__tcp_, name, arg1, arg2, arg3, arg4)
+
+#define DTRACE_TCP5(name, typ1, arg1, type2, arg2, type3, arg3,          \
+    type4, arg4, type5, arg5)                                           \
+        DTRACE_PROBE5(__tcp_, name, arg1, arg2, arg3, arg4, arg5)
+
 #else /* CONFIG_DTRACE */
 
 #define	DTRACE_SCHED(name) do {} while (0)
@@ -250,6 +298,22 @@
 #define DTRACE_VM2(name, type1, arg1, type2, arg2) do {} while(0)
 #define DTRACE_VM3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0)
 #define DTRACE_VM4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0)
+#define DTRACE_IP(name) do {} while(0)
+#define DTRACE_IP1(name, type1, arg1) do {} while(0)
+#define DTRACE_IP2(name, type1, arg1, type2, arg2) do {} while(0)
+#define DTRACE_IP3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0)
+#define DTRACE_IP4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0)
+#define DTRACE_IP5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0)
+#define DTRACE_IP6(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6) do {} while(0)
+#define DTRACE_IP7(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, \
+	type6, arg6, type7, arg7) do {} while(0)
+
+#define DTRACE_TCP(name) do {} while(0)
+#define DTRACE_TCP1(name, type1, arg1) do {} while(0)
+#define DTRACE_TCP2(name, type1, arg1, type2, arg2) do {} while(0)
+#define DTRACE_TCP3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0)
+#define DTRACE_TCP4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0)
+#define DTRACE_TCP5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0)
 
 #endif /* CONFIG_DTRACE */
 
diff --git a/osfmk/mach/machine/sdt_isa.h b/osfmk/mach/machine/sdt_isa.h
index 000690744..edd26dcc2 100644
--- a/osfmk/mach/machine/sdt_isa.h
+++ b/osfmk/mach/machine/sdt_isa.h
@@ -28,9 +28,7 @@
 #ifndef _MACH_MACHINE_SDT_ISA_H_
 #define _MACH_MACHINE_SDT_ISA_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include <mach/ppc/sdt_isa.h>
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include <mach/i386/sdt_isa.h>
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/syscall_sw.h b/osfmk/mach/machine/syscall_sw.h
index 972331769..902b6815e 100644
--- a/osfmk/mach/machine/syscall_sw.h
+++ b/osfmk/mach/machine/syscall_sw.h
@@ -31,9 +31,7 @@
 #ifndef _MACH_MACHINE_SYSCALL_SW_H_
 #define _MACH_MACHINE_SYSCALL_SW_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/syscall_sw.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/syscall_sw.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/thread_state.h b/osfmk/mach/machine/thread_state.h
index bf9a155d9..061477698 100644
--- a/osfmk/mach/machine/thread_state.h
+++ b/osfmk/mach/machine/thread_state.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_THREAD_STATE_H_
 #define _MACH_MACHINE_THREAD_STATE_H_
 
-#if defined (__ppc__) || defined(__ppc64__)
-#include "mach/ppc/thread_state.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/thread_state.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/thread_status.h b/osfmk/mach/machine/thread_status.h
index 10ed68996..74cda9596 100644
--- a/osfmk/mach/machine/thread_status.h
+++ b/osfmk/mach/machine/thread_status.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_THREAD_STATUS_H_
 #define _MACH_MACHINE_THREAD_STATUS_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/thread_status.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/thread_status.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/vm_param.h b/osfmk/mach/machine/vm_param.h
index 685342999..5898fdba1 100644
--- a/osfmk/mach/machine/vm_param.h
+++ b/osfmk/mach/machine/vm_param.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_VM_PARAM_H_
 #define _MACH_MACHINE_VM_PARAM_H_
 
-#if defined (__ppc__) || defined (__ppc64__)
-#include "mach/ppc/vm_param.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/vm_param.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/machine/vm_types.h b/osfmk/mach/machine/vm_types.h
index a5c4c8ba1..2b7526570 100644
--- a/osfmk/mach/machine/vm_types.h
+++ b/osfmk/mach/machine/vm_types.h
@@ -29,9 +29,7 @@
 #ifndef _MACH_MACHINE_VM_TYPES_H_
 #define _MACH_MACHINE_VM_TYPES_H_
 
-#if defined (__ppc__) || defined(__ppc64__)
-#include "mach/ppc/vm_types.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "mach/i386/vm_types.h"
 #else
 #error architecture not supported
diff --git a/osfmk/mach/memory_object.defs b/osfmk/mach/memory_object.defs
index 436e9b290..01afb30a0 100644
--- a/osfmk/mach/memory_object.defs
+++ b/osfmk/mach/memory_object.defs
@@ -203,4 +203,8 @@ routine memory_object_map(
 routine memory_object_last_unmap(
 		memory_object           : memory_object_t);
 
+routine	memory_object_data_reclaim(
+		memory_object		: memory_object_t;
+		reclaim_backing_store	: boolean_t);
+
 /* vim: set ft=c : */
diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h
index c4a5df888..846987cfd 100644
--- a/osfmk/mach/memory_object_types.h
+++ b/osfmk/mach/memory_object_types.h
@@ -158,6 +158,9 @@ typedef const struct memory_object_pager_ops {
 		vm_prot_t prot);
 	kern_return_t (*memory_object_last_unmap)(
 		memory_object_t mem_obj);
+	kern_return_t (*memory_object_data_reclaim)(
+		memory_object_t mem_obj,
+		boolean_t reclaim_backing_store);
 	const char *memory_object_pager_name;
 } * memory_object_pager_ops_t;
 
@@ -376,10 +379,10 @@ typedef struct memory_object_attr_info	memory_object_attr_info_data_t;
 			& 0xFF000000) | ((flags) & 0xFFFFFF));
 
 /* leave room for vm_prot bits */
-#define MAP_MEM_ONLY		0x10000	/* change processor caching  */
-#define MAP_MEM_NAMED_CREATE	0x20000 /* create extant object      */
-#define MAP_MEM_PURGABLE	0x40000	/* create a purgable VM object */
-#define MAP_MEM_NAMED_REUSE	0x80000	/* reuse provided entry if identical */
+#define MAP_MEM_ONLY		0x010000 /* change processor caching  */
+#define MAP_MEM_NAMED_CREATE	0x020000 /* create extant object      */
+#define MAP_MEM_PURGABLE	0x040000 /* create a purgable VM object */
+#define MAP_MEM_NAMED_REUSE	0x080000 /* reuse provided entry if identical */
 
 #ifdef KERNEL
 
@@ -463,9 +466,10 @@ typedef uint32_t	upl_size_t;	/* page-aligned byte size */
 #define UPL_UBC_MSYNC		0x02000000
 #define UPL_UBC_PAGEOUT		0x04000000
 #define UPL_UBC_PAGEIN		0x08000000
+#define UPL_REQUEST_SET_DIRTY	0x10000000
 
 /* UPL flags known by this kernel */
-#define UPL_VALID_FLAGS		0x0FFFFFFF
+#define UPL_VALID_FLAGS		0x1FFFFFFF
 
 
 /* upl abort error flags */
@@ -518,9 +522,7 @@ typedef uint32_t	upl_size_t;	/* page-aligned byte size */
 /*
  *
  */
-#ifdef MACH_KERNEL_PRIVATE
 #define UPL_PAGING_ENCRYPTED	0x20
-#endif /* MACH_KERNEL_PRIVATE */
 
 /*
  * this pageout is being originated as part of an explicit
@@ -682,6 +684,7 @@ extern ppnum_t	upl_phys_page(upl_page_info_t *upl, int index);
 extern boolean_t	upl_device_page(upl_page_info_t *upl);
 extern boolean_t	upl_speculative_page(upl_page_info_t *upl, int index);
 extern void	upl_clear_dirty(upl_t upl, boolean_t value);
+extern void	upl_set_referenced(upl_t upl, boolean_t value);
 
 __END_DECLS
 
diff --git a/osfmk/mach/message.h b/osfmk/mach/message.h
index 9be5f5e90..195607585 100644
--- a/osfmk/mach/message.h
+++ b/osfmk/mach/message.h
@@ -581,12 +581,12 @@ typedef integer_t mach_msg_option_t;
 
 #define MACH_SEND_TIMEOUT	0x00000010
 #define MACH_SEND_INTERRUPT	0x00000040	/* libmach implements */
-#define MACH_SEND_CANCEL	0x00000080
+#define MACH_SEND_NOTIFY	0x00000080	/* arm send-possible notify */
 #define MACH_SEND_ALWAYS	0x00010000	/* internal use only */
 #define MACH_SEND_TRAILER	0x00020000	
 
 #define MACH_RCV_TIMEOUT	0x00000100
-#define MACH_RCV_NOTIFY		0x00000200
+#define MACH_RCV_NOTIFY		0x00000200	/* reserved - legacy */
 #define MACH_RCV_INTERRUPT	0x00000400	/* libmach implements */
 #define MACH_RCV_OVERWRITE	0x00001000
 
diff --git a/osfmk/mach/notify.defs b/osfmk/mach/notify.defs
index 6f7f81d2d..4aece97bf 100644
--- a/osfmk/mach/notify.defs
+++ b/osfmk/mach/notify.defs
@@ -83,7 +83,17 @@ simpleroutine mach_notify_port_deleted(
 #endif	/* SEQNOS */
 		name	: mach_port_name_t);
 
-skip;	/* was MACH_NOTIFY_MSG_ACCEPTED: 0102 */
+#if (KERNEL_USER | MACH_NOTIFY_SEND_POSSIBLE_EXPECTED)
+/* MACH_NOTIFY_SEND_POSSIBLE: 0102 */
+simpleroutine mach_notify_send_possible(
+		notify	: mach_port_move_send_once_t;
+#if	SEQNOS
+	msgseqno seqno	: mach_port_seqno_t;
+#endif	/* SEQNOS */
+		name	: mach_port_name_t);
+#else
+skip;
+#endif
 
 skip;	/* was NOTIFY_OWNERSHIP_RIGHTS: 0103 */
 
diff --git a/osfmk/mach/notify.h b/osfmk/mach/notify.h
index 768a865cd..845646c5c 100644
--- a/osfmk/mach/notify.h
+++ b/osfmk/mach/notify.h
@@ -74,8 +74,10 @@
  */
 
 #define MACH_NOTIFY_FIRST		0100
-#define MACH_NOTIFY_PORT_DELETED	(MACH_NOTIFY_FIRST + 001 )
+#define MACH_NOTIFY_PORT_DELETED	(MACH_NOTIFY_FIRST + 001)
 			/* A send or send-once right was deleted. */
+#define MACH_NOTIFY_SEND_POSSIBLE	(MACH_NOTIFY_FIRST + 002)
+			/* Now possible to send using specified right */
 #define MACH_NOTIFY_PORT_DESTROYED	(MACH_NOTIFY_FIRST + 005)
 			/* A receive right was (would have been) deallocated */
 #define MACH_NOTIFY_NO_SENDERS		(MACH_NOTIFY_FIRST + 006)
@@ -103,6 +105,13 @@ typedef struct {
     mach_msg_format_0_trailer_t trailer;
 } mach_port_deleted_notification_t;
 
+typedef struct {
+    mach_msg_header_t	not_header;
+    NDR_record_t	NDR;
+    mach_port_name_t not_port;/* MACH_MSG_TYPE_PORT_NAME */
+    mach_msg_format_0_trailer_t trailer;
+} mach_send_possible_notification_t;
+
 typedef struct {
     mach_msg_header_t	not_header;
     mach_msg_body_t	not_body;
diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h
index 9db876f17..b09673aba 100644
--- a/osfmk/mach/port.h
+++ b/osfmk/mach/port.h
@@ -277,7 +277,9 @@ typedef mach_port_type_t *mach_port_type_array_t;
 
 /* Dummy type bits that mach_port_type/mach_port_names can return. */
 
-#define MACH_PORT_TYPE_DNREQUEST	0x80000000
+#define MACH_PORT_TYPE_DNREQUEST		0x80000000
+#define MACH_PORT_TYPE_SPREQUEST		0x40000000
+#define MACH_PORT_TYPE_SPREQUEST_DELAYED	0x20000000
 
 /* User-references for capabilities. */
 
diff --git a/osfmk/mach/ppc/Makefile b/osfmk/mach/ppc/Makefile
deleted file mode 100644
index 83f21cec6..000000000
--- a/osfmk/mach/ppc/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-VPATH+=$(SOURCE)/../../ppc:
-
-DATAFILES = \
-	boolean.h exception.h kern_return.h ndr_def.h \
-	processor_info.h rpc.h thread_state.h thread_status.h \
-	vm_param.h vm_types.h machine_types.defs \
-	syscall_sw.h _structs.h sdt_isa.h
-
-INSTALL_MD_LIST = ${DATAFILES}
-
-INSTALL_MD_GEN_LIST = \
-	asm.h
-
-INSTALL_MD_DIR = mach/ppc
-
-EXPORT_MD_LIST = ${DATAFILES}
-
-EXPORT_MD_GEN_LIST = \
-	asm.h
-
-EXPORT_MD_DIR = mach/ppc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/osfmk/mach/ppc/_structs.h b/osfmk/mach/ppc/_structs.h
deleted file mode 100644
index f2c78cda1..000000000
--- a/osfmk/mach/ppc/_structs.h
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_MACH_PPC__STRUCTS_H_
-#define	_MACH_PPC__STRUCTS_H_
-
-#include <sys/cdefs.h>
-
-/*
- * ppc_thread_state is the structure that is exported to user threads for 
- * use in status/mutate calls.  This structure should never change.
- *
- */
-
-#if __DARWIN_UNIX03
-#define	_STRUCT_PPC_THREAD_STATE	struct __darwin_ppc_thread_state
-_STRUCT_PPC_THREAD_STATE
-{
-	unsigned int __srr0;	/* Instruction address register (PC) */
-	unsigned int __srr1;	/* Machine state register (supervisor) */
-	unsigned int __r0;
-	unsigned int __r1;
-	unsigned int __r2;
-	unsigned int __r3;
-	unsigned int __r4;
-	unsigned int __r5;
-	unsigned int __r6;
-	unsigned int __r7;
-	unsigned int __r8;
-	unsigned int __r9;
-	unsigned int __r10;
-	unsigned int __r11;
-	unsigned int __r12;
-	unsigned int __r13;
-	unsigned int __r14;
-	unsigned int __r15;
-	unsigned int __r16;
-	unsigned int __r17;
-	unsigned int __r18;
-	unsigned int __r19;
-	unsigned int __r20;
-	unsigned int __r21;
-	unsigned int __r22;
-	unsigned int __r23;
-	unsigned int __r24;
-	unsigned int __r25;
-	unsigned int __r26;
-	unsigned int __r27;
-	unsigned int __r28;
-	unsigned int __r29;
-	unsigned int __r30;
-	unsigned int __r31;
-
-	unsigned int __cr;	/* Condition register */
-	unsigned int __xer;	/* User's integer exception register */
-	unsigned int __lr;	/* Link register */
-	unsigned int __ctr;	/* Count register */
-	unsigned int __mq;	/* MQ register (601 only) */
-
-	unsigned int __vrsave;	/* Vector Save Register */
-};
-#else /* !__DARWIN_UNIX03 */
-#define	_STRUCT_PPC_THREAD_STATE	struct ppc_thread_state
-_STRUCT_PPC_THREAD_STATE
-{
-	unsigned int srr0;	/* Instruction address register (PC) */
-	unsigned int srr1;	/* Machine state register (supervisor) */
-	unsigned int r0;
-	unsigned int r1;
-	unsigned int r2;
-	unsigned int r3;
-	unsigned int r4;
-	unsigned int r5;
-	unsigned int r6;
-	unsigned int r7;
-	unsigned int r8;
-	unsigned int r9;
-	unsigned int r10;
-	unsigned int r11;
-	unsigned int r12;
-	unsigned int r13;
-	unsigned int r14;
-	unsigned int r15;
-	unsigned int r16;
-	unsigned int r17;
-	unsigned int r18;
-	unsigned int r19;
-	unsigned int r20;
-	unsigned int r21;
-	unsigned int r22;
-	unsigned int r23;
-	unsigned int r24;
-	unsigned int r25;
-	unsigned int r26;
-	unsigned int r27;
-	unsigned int r28;
-	unsigned int r29;
-	unsigned int r30;
-	unsigned int r31;
-
-	unsigned int cr;	/* Condition register */
-	unsigned int xer;	/* User's integer exception register */
-	unsigned int lr;	/* Link register */
-	unsigned int ctr;	/* Count register */
-	unsigned int mq;	/* MQ register (601 only) */
-
-	unsigned int vrsave;	/* Vector Save Register */
-};
-#endif /* __DARWIN_UNIX03 */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-
-#pragma pack(4)			/* Make sure the structure stays as we defined it */
-
-#if __DARWIN_UNIX03
-#define _STRUCT_PPC_THREAD_STATE64	struct __darwin_ppc_thread_state64
-_STRUCT_PPC_THREAD_STATE64
-{
-	unsigned long long __srr0;	/* Instruction address register (PC) */
-	unsigned long long __srr1;	/* Machine state register (supervisor) */
-	unsigned long long __r0;
-	unsigned long long __r1;
-	unsigned long long __r2;
-	unsigned long long __r3;
-	unsigned long long __r4;
-	unsigned long long __r5;
-	unsigned long long __r6;
-	unsigned long long __r7;
-	unsigned long long __r8;
-	unsigned long long __r9;
-	unsigned long long __r10;
-	unsigned long long __r11;
-	unsigned long long __r12;
-	unsigned long long __r13;
-	unsigned long long __r14;
-	unsigned long long __r15;
-	unsigned long long __r16;
-	unsigned long long __r17;
-	unsigned long long __r18;
-	unsigned long long __r19;
-	unsigned long long __r20;
-	unsigned long long __r21;
-	unsigned long long __r22;
-	unsigned long long __r23;
-	unsigned long long __r24;
-	unsigned long long __r25;
-	unsigned long long __r26;
-	unsigned long long __r27;
-	unsigned long long __r28;
-	unsigned long long __r29;
-	unsigned long long __r30;
-	unsigned long long __r31;
-
-	unsigned int __cr;		/* Condition register */
-	unsigned long long __xer;	/* User's integer exception register */
-	unsigned long long __lr;	/* Link register */
-	unsigned long long __ctr;	/* Count register */
-
-	unsigned int __vrsave;		/* Vector Save Register */
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_PPC_THREAD_STATE64	struct ppc_thread_state64
-_STRUCT_PPC_THREAD_STATE64
-{
-	unsigned long long srr0;	/* Instruction address register (PC) */
-	unsigned long long srr1;	/* Machine state register (supervisor) */
-	unsigned long long r0;
-	unsigned long long r1;
-	unsigned long long r2;
-	unsigned long long r3;
-	unsigned long long r4;
-	unsigned long long r5;
-	unsigned long long r6;
-	unsigned long long r7;
-	unsigned long long r8;
-	unsigned long long r9;
-	unsigned long long r10;
-	unsigned long long r11;
-	unsigned long long r12;
-	unsigned long long r13;
-	unsigned long long r14;
-	unsigned long long r15;
-	unsigned long long r16;
-	unsigned long long r17;
-	unsigned long long r18;
-	unsigned long long r19;
-	unsigned long long r20;
-	unsigned long long r21;
-	unsigned long long r22;
-	unsigned long long r23;
-	unsigned long long r24;
-	unsigned long long r25;
-	unsigned long long r26;
-	unsigned long long r27;
-	unsigned long long r28;
-	unsigned long long r29;
-	unsigned long long r30;
-	unsigned long long r31;
-
-	unsigned int cr;		/* Condition register */
-	unsigned long long xer;		/* User's integer exception register */
-	unsigned long long lr;		/* Link register */
-	unsigned long long ctr;		/* Count register */
-
-	unsigned int vrsave;		/* Vector Save Register */
-};
-#endif /* __DARWIN_UNIX03 */
-
-#pragma pack()
-
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-/* This structure should be double-word aligned for performance */
-
-#if __DARWIN_UNIX03
-#define	_STRUCT_PPC_FLOAT_STATE	struct __darwin_ppc_float_state
-_STRUCT_PPC_FLOAT_STATE
-{
-	double  __fpregs[32];
-
-	unsigned int __fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */
-	unsigned int __fpscr;	/* floating point status register */
-};
-#else /* !__DARWIN_UNIX03 */
-#define	_STRUCT_PPC_FLOAT_STATE	struct ppc_float_state
-_STRUCT_PPC_FLOAT_STATE
-{
-	double  fpregs[32];
-
-	unsigned int fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */
-	unsigned int fpscr;	/* floating point status register */
-};
-#endif /* __DARWIN_UNIX03 */
-
-#pragma pack(4)		/* Make sure the structure stays as we defined it */
-
-#if __DARWIN_UNIX03
-#define _STRUCT_PPC_VECTOR_STATE	struct __darwin_ppc_vector_state
-_STRUCT_PPC_VECTOR_STATE
-{
-#if defined(__LP64__)
-	unsigned int	__save_vr[32][4];
-	unsigned int	__save_vscr[4];
-#else
-	unsigned long	__save_vr[32][4];
-	unsigned long	__save_vscr[4];
-#endif
-	unsigned int	__save_pad5[4];
-	unsigned int	__save_vrvalid;			/* VRs that have been saved */
-	unsigned int	__save_pad6[7];
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_PPC_VECTOR_STATE	struct ppc_vector_state
-_STRUCT_PPC_VECTOR_STATE
-{
-#if defined(__LP64__)
-	unsigned int	save_vr[32][4];
-	unsigned int	save_vscr[4];
-#else
-	unsigned long	save_vr[32][4];
-	unsigned long	save_vscr[4];
-#endif
-	unsigned int	save_pad5[4];
-	unsigned int	save_vrvalid;			/* VRs that have been saved */
-	unsigned int	save_pad6[7];
-};
-#endif /* __DARWIN_UNIX03 */
-
-#pragma pack()
-
-/*
- * ppc_exception_state
- *
- * This structure corresponds to some additional state of the user
- * registers as saved in the PCB upon kernel entry. They are only
- * available if an exception is passed out of the kernel, and even
- * then not all are guaranteed to be updated.
- *
- * Some padding is included in this structure which allows space for
- * servers to store temporary values if need be, to maintain binary
- * compatiblity.
- */
-
-/* Exception state for 32-bit thread (on 32-bit processor) */
-/* Still available on 64-bit processors, but may fall short */
-/* of covering the full potential state (hi half available). */
-
-#pragma pack(4)	/* Make sure the structure stays as we defined it */
-
-#if __DARWIN_UNIX03
-#define _STRUCT_PPC_EXCEPTION_STATE	struct __darwin_ppc_exception_state
-_STRUCT_PPC_EXCEPTION_STATE
-{
-#if defined(__LP64__)
-	unsigned int __dar;		/* Fault registers for coredump */
-	unsigned int __dsisr;
-	unsigned int __exception;	/* number of powerpc exception taken */
-	unsigned int __pad0;		/* align to 16 bytes */
-	unsigned int __pad1[4];		/* space in PCB "just in case" */
-#else
-	unsigned long __dar;		/* Fault registers for coredump */
-	unsigned long __dsisr;
-	unsigned long __exception;	/* number of powerpc exception taken */
-	unsigned long __pad0;		/* align to 16 bytes */
-	unsigned long __pad1[4];	/* space in PCB "just in case" */
-#endif
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_PPC_EXCEPTION_STATE	struct ppc_exception_state
-_STRUCT_PPC_EXCEPTION_STATE
-{
-#if defined(__LP64__)
-	unsigned int dar;		/* Fault registers for coredump */
-	unsigned int dsisr;
-	unsigned int exception;		/* number of powerpc exception taken */
-	unsigned int pad0;		/* align to 16 bytes */
-	unsigned int pad1[4];		/* space in PCB "just in case" */
-#else
-	unsigned long dar;		/* Fault registers for coredump */
-	unsigned long dsisr;
-	unsigned long exception;	/* number of powerpc exception taken */
-	unsigned long pad0;		/* align to 16 bytes */
-	unsigned long pad1[4];		/* space in PCB "just in case" */
-#endif
-};
-#endif /* __DARWIN_UNIX03 */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#if __DARWIN_UNIX03
-#define _STRUCT_PPC_EXCEPTION_STATE64	struct __darwin_ppc_exception_state64
-_STRUCT_PPC_EXCEPTION_STATE64
-{
-	unsigned long long __dar;	/* Fault registers for coredump */
-#if defined(__LP64__)
-	unsigned int  __dsisr;
-	unsigned int  __exception;	/* number of powerpc exception taken */
-	unsigned int  __pad1[4];	/* space in PCB "just in case" */
-#else
-	unsigned long __dsisr;
-	unsigned long __exception;	/* number of powerpc exception taken */
-	unsigned long __pad1[4];	/* space in PCB "just in case" */
-#endif
-};
-#else /* !__DARWIN_UNIX03 */
-#define _STRUCT_PPC_EXCEPTION_STATE64	struct ppc_exception_state64
-_STRUCT_PPC_EXCEPTION_STATE64
-{
-	unsigned long long dar;		/* Fault registers for coredump */
-#if defined(__LP64__)
-	unsigned int  dsisr;
-	unsigned int  exception;	/* number of powerpc exception taken */
-	unsigned int  pad1[4];		/* space in PCB "just in case" */
-#else
-	unsigned long dsisr;
-	unsigned long exception;	/* number of powerpc exception taken */
-	unsigned long pad1[4];		/* space in PCB "just in case" */
-#endif
-};
-#endif /* __DARWIN_UNIX03 */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#pragma pack()
-
-#endif /* _MACH_PPC__STRUCTS_H_ */
diff --git a/osfmk/mach/ppc/_types.h b/osfmk/mach/ppc/_types.h
deleted file mode 100644
index fd3cb8f19..000000000
--- a/osfmk/mach/ppc/_types.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_MACH_PPC__TYPES_H_
-#define	_MACH_PPC__TYPES_H_
-
-#include <sys/cdefs.h>
-
-/*
- * ppc_thread_state is the structure that is exported to user threads for 
- * use in status/mutate calls.  This structure should never change.
- *
- */
-
-#if !__DARWIN_UNIX03
-struct ppc_thread_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_ppc_thread_state
-#endif /* __DARWIN_UNIX03 */
-{
-	unsigned int srr0;      /* Instruction address register (PC) */
-	unsigned int srr1;	/* Machine state register (supervisor) */
-	unsigned int r0;
-	unsigned int r1;
-	unsigned int r2;
-	unsigned int r3;
-	unsigned int r4;
-	unsigned int r5;
-	unsigned int r6;
-	unsigned int r7;
-	unsigned int r8;
-	unsigned int r9;
-	unsigned int r10;
-	unsigned int r11;
-	unsigned int r12;
-	unsigned int r13;
-	unsigned int r14;
-	unsigned int r15;
-	unsigned int r16;
-	unsigned int r17;
-	unsigned int r18;
-	unsigned int r19;
-	unsigned int r20;
-	unsigned int r21;
-	unsigned int r22;
-	unsigned int r23;
-	unsigned int r24;
-	unsigned int r25;
-	unsigned int r26;
-	unsigned int r27;
-	unsigned int r28;
-	unsigned int r29;
-	unsigned int r30;
-	unsigned int r31;
-
-	unsigned int cr;        /* Condition register */
-	unsigned int xer;	/* User's integer exception register */
-	unsigned int lr;	/* Link register */
-	unsigned int ctr;	/* Count register */
-	unsigned int mq;	/* MQ register (601 only) */
-
-	unsigned int vrsave;	/* Vector Save Register */
-};
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct ppc_thread_state64 {
-	unsigned long long srr0;	/* Instruction address register (PC) */
-	unsigned long long srr1;	/* Machine state register (supervisor) */
-	unsigned long long r0;
-	unsigned long long r1;
-	unsigned long long r2;
-	unsigned long long r3;
-	unsigned long long r4;
-	unsigned long long r5;
-	unsigned long long r6;
-	unsigned long long r7;
-	unsigned long long r8;
-	unsigned long long r9;
-	unsigned long long r10;
-	unsigned long long r11;
-	unsigned long long r12;
-	unsigned long long r13;
-	unsigned long long r14;
-	unsigned long long r15;
-	unsigned long long r16;
-	unsigned long long r17;
-	unsigned long long r18;
-	unsigned long long r19;
-	unsigned long long r20;
-	unsigned long long r21;
-	unsigned long long r22;
-	unsigned long long r23;
-	unsigned long long r24;
-	unsigned long long r25;
-	unsigned long long r26;
-	unsigned long long r27;
-	unsigned long long r28;
-	unsigned long long r29;
-	unsigned long long r30;
-	unsigned long long r31;
-
-	unsigned int cr;			/* Condition register */
-	unsigned long long xer;		/* User's integer exception register */
-	unsigned long long lr;		/* Link register */
-	unsigned long long ctr;		/* Count register */
-
-	unsigned int vrsave;		/* Vector Save Register */
-};
-
-#pragma pack()
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-/* This structure should be double-word aligned for performance */
-
-#if !__DARWIN_UNIX03
-struct ppc_float_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_ppc_float_state
-#endif /* __DARWIN_UNIX03 */
-{
-	double  fpregs[32];
-
-	unsigned int fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */
-	unsigned int fpscr;	/* floating point status register */
-};
-
-#pragma pack(4)		/* Make sure the structure stays as we defined it */
-
-#if !__DARWIN_UNIX03
-struct ppc_vector_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_ppc_vector_state
-#endif /* __DARWIN_UNIX03 */
-{
-#if defined(__LP64__)
-	unsigned int	save_vr[32][4];
-	unsigned int	save_vscr[4];
-#else
-	unsigned long	save_vr[32][4];
-	unsigned long	save_vscr[4];
-#endif
-	unsigned int	save_pad5[4];
-	unsigned int	save_vrvalid;			/* VRs that have been saved */
-	unsigned int	save_pad6[7];
-};
-#pragma pack()
-
-/*
- * ppc_exception_state
- *
- * This structure corresponds to some additional state of the user
- * registers as saved in the PCB upon kernel entry. They are only
- * available if an exception is passed out of the kernel, and even
- * then not all are guaranteed to be updated.
- *
- * Some padding is included in this structure which allows space for
- * servers to store temporary values if need be, to maintain binary
- * compatiblity.
- */
-
-/* Exception state for 32-bit thread (on 32-bit processor) */
-/* Still available on 64-bit processors, but may fall short */
-/* of covering the full potential state (hi half available). */
-
-#pragma pack(4)	/* Make sure the structure stays as we defined it */
-
-#if !__DARWIN_UNIX03
-struct ppc_exception_state
-#else /* __DARWIN_UNIX03 */
-struct __darwin_ppc_exception_state
-#endif /* __DARWIN_UNIX03 */
-{
-#if defined(__LP64__)
-	unsigned int dar;			/* Fault registers for coredump */
-	unsigned int dsisr;
-	unsigned int exception;	/* number of powerpc exception taken */
-	unsigned int pad0;			/* align to 16 bytes */
-	unsigned int pad1[4];		/* space in PCB "just in case" */
-#else
-	unsigned long dar;			/* Fault registers for coredump */
-	unsigned long dsisr;
-	unsigned long exception;	/* number of powerpc exception taken */
-	unsigned long pad0;			/* align to 16 bytes */
-	unsigned long pad1[4];		/* space in PCB "just in case" */
-#endif
-};
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-struct ppc_exception_state64 {
-	unsigned long long dar;		/* Fault registers for coredump */
-#if defined(__LP64__)
-	unsigned int  dsisr;
-	unsigned int  exception;	/* number of powerpc exception taken */
-	unsigned int  pad1[4];		/* space in PCB "just in case" */
-#else
-	unsigned long dsisr;
-	unsigned long exception;	/* number of powerpc exception taken */
-	unsigned long pad1[4];		/* space in PCB "just in case" */
-#endif
-};
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#pragma pack()
-
-#endif /* _MACH_PPC__TYPES_H_ */
diff --git a/osfmk/mach/ppc/boolean.h b/osfmk/mach/ppc/boolean.h
deleted file mode 100644
index aa3769c9f..000000000
--- a/osfmk/mach/ppc/boolean.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-/*
- *	File:	boolean.h
- *
- *	Boolean type, for ppc.
- */
-
-#ifndef	_MACH_PPC_BOOLEAN_H_
-#define _MACH_PPC_BOOLEAN_H_
-
-#if defined(__ppc64__)
-typedef unsigned int	boolean_t;
-#else
-typedef int		boolean_t;
-#endif
-
-#endif	/* _MACH_PPC_BOOLEAN_H_ */
diff --git a/osfmk/mach/ppc/exception.h b/osfmk/mach/ppc/exception.h
deleted file mode 100644
index da4e7cb6b..000000000
--- a/osfmk/mach/ppc/exception.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Copyright (c) 1990, 1991, 1992, The University of Utah and
- * the Center for Software Science at the University of Utah (CSS).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the Center
- * for Software Science at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSS DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- * 	Utah $Hdr: $
- */
-
-#ifndef	_MACH_PPC_EXCEPTION_H_
-#define _MACH_PPC_EXCEPTION_H_
-
-#define	EXC_TYPES_COUNT	11	/* incl. illegal exception 0 */
-
-#define EXCEPTION_CODE_MAX 2    /* elements in vector (code+subcode) */
-/*
- *	EXC_BAD_INSTRUCTION
- */
-
-#define EXC_PPC_INVALID_SYSCALL	1    /* invalid syscall number */
-#define EXC_PPC_UNIPL_INST	2    /* unimplemented instruction */
-#define EXC_PPC_PRIVINST	3    /* priviledged instruction */
-#define EXC_PPC_PRIVREG		4    /* priviledged register */
-#define EXC_PPC_TRACE		5    /* trace/single-step */
-#define EXC_PPC_PERFMON     6    /* performance monitor */ 
-
-/*
- *	EXC_BAD_ACCESS
- *	Note: do not conflict with kern_return_t values returned by vm_fault
- */
-
-#define EXC_PPC_VM_PROT_READ		0x101 /* error reading syscall args */
-#define EXC_PPC_BADSPACE		0x102 /* bad space referenced */
-#define EXC_PPC_UNALIGNED		0x103 /* unaligned data reference */
-
-/*
- *	EXC_ARITHMETIC
- */
-
-#define EXC_PPC_OVERFLOW		1    /* integer overflow */
-#define EXC_PPC_ZERO_DIVIDE		2    /* integer divide by zero */
-#define EXC_PPC_FLT_INEXACT		3    /* IEEE inexact exception */
-#define EXC_PPC_FLT_ZERO_DIVIDE		4    /* IEEE zero divide */
-#define EXC_PPC_FLT_UNDERFLOW		5    /* IEEE floating underflow */
-#define EXC_PPC_FLT_OVERFLOW		6    /* IEEE floating overflow */
-#define EXC_PPC_FLT_NOT_A_NUMBER	7    /* IEEE not a number */
-
-/*
- * EXC_PPC_NOEMULATION should go away when we add software emulation
- * for floating point. Right now we don't support this.
- */
-
-#define EXC_PPC_NOEMULATION		8	/* no floating point emulation */
-#define EXC_PPC_ALTIVECASSIST	9	/* Altivec Denorm Assist */
-
-/*
- *	EXC_SOFTWARE
- * 	Note: 0x10000-0x10003 in use for unix signal 
- */
-#define EXC_PPC_TRAP		1		/* Program trap */
-#define EXC_PPC_MIGRATE		0x10100		/* Time to bolt */
-
-
-/*
- *	EXC_BREAKPOINT
- */
-
-#define EXC_PPC_BREAKPOINT		EXC_PPC_TRAP    /* breakpoint trap */
-
-/*
- *	machine dependent exception masks
- */
-#define	EXC_MASK_MACHINE	0
-
-#endif	/* _MACH_PPC_EXCEPTION_H_ */
diff --git a/osfmk/mach/ppc/kern_return.h b/osfmk/mach/ppc/kern_return.h
deleted file mode 100644
index 2c79023f2..000000000
--- a/osfmk/mach/ppc/kern_return.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-/*
- *	File:	kern_return.h
- *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
- *	Date:	1985
- *
- *	Machine-dependent kernel return definitions.
- */
-
-#ifndef	_MACH_PPC_KERN_RETURN_H_
-#define _MACH_PPC_KERN_RETURN_H_
-
-#ifndef	ASSEMBLER
-typedef	int		kern_return_t;
-#endif	/* ASSEMBLER */
-
-#endif	/* _MACH_PPC_KERN_RETURN_H_ */
diff --git a/osfmk/mach/ppc/machine_types.defs b/osfmk/mach/ppc/machine_types.defs
deleted file mode 100644
index f0d5c41f3..000000000
--- a/osfmk/mach/ppc/machine_types.defs
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- *	Header file for basic, machine-dependent data types.
- */
-
-#ifndef	_PPC_VM_TYPES_DEFS_
-#define	_PPC_VM_TYPES_DEFS_
-
-type short = int16_t;
-type int = int32_t;
-type unsigned = uint32_t;
-
-type float = MACH_MSG_TYPE_REAL_32;
-type double = MACH_MSG_TYPE_REAL_64;
-
-/* from ISO/IEC 988:1999 spec */
-/* 7.18.1.4 Integer types capable of holding object pointers */
-/*
- * The [u]intptr_t types for the native
- * integer type, e.g. 32 or 64 or.. whatever
- * register size the machine has.  They are
- * used for entities that might be either
- * [unsigned] integers or pointers, and for
- * type-casting between the two.
- *
- * For instance, the IPC system represents
- * a port in user space as an integer and
- * in kernel space as a pointer.
- */
-#if defined(__ppc64__)
-type uintptr_t = uint64_t;
-type intptr_t = int64_t;
-#else
-type uintptr_t = uint32_t;
-type intptr_t = int32_t;
-#endif
-
-/*
- * These are the legacy Mach types that are
- * the [rough] equivalents of the standards above.
- * They were defined in terms of int, not
- * long int, so they remain separate.
- */
-#if defined(__ppc64__)
-type register_t = int64_t;
-#else
-type register_t = int32_t;
-#endif
-type integer_t = int32_t;
-type natural_t = uint32_t;
-
-/*
- * These are the VM types that scale with the address
- * space size of a given process.
- */
-
-#if defined(__ppc64__)
-type vm_address_t = uint64_t;
-type vm_offset_t = uint64_t;
-type vm_size_t = uint64_t;
-#else
-type vm_address_t = natural_t;
-type vm_offset_t = natural_t;
-type vm_size_t = natural_t;
-#endif
-
-/*
- * The mach_vm_xxx_t types are sized to hold the
- * maximum pointer, offset, etc... supported on the
- * platform.
- */
-type mach_vm_address_t = uint64_t;
-type mach_vm_offset_t = uint64_t;
-type mach_vm_size_t = uint64_t;
-
-#if	MACH_IPC_COMPAT
-/*
- * For the old IPC interface
- */
-#define	MSG_TYPE_PORT_NAME	uint32_t
-
-#endif	/* MACH_IPC_COMPAT */
-
-/*
- * These are types used internal to Mach to implement the
- * legacy 32-bit VM APIs published by the kernel.
- */
-#define	VM32_SUPPORT	1
-
-type vm32_address_t = uint32_t;
-type vm32_offset_t = uint32_t;
-type vm32_size_t = uint32_t;
-
-#endif	/* _PPC_VM_TYPES_DEFS_ */
-
-/* vim: set ft=c : */
diff --git a/osfmk/mach/ppc/ndr_def.h b/osfmk/mach/ppc/ndr_def.h
deleted file mode 100644
index cb012b2f4..000000000
--- a/osfmk/mach/ppc/ndr_def.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <mach/ndr.h>
-
-NDR_record_t NDR_record = {
-	0,			/* mig_reserved */
-	0,			/* mig_reserved */
-	0,			/* mig_reserved */
-	NDR_PROTOCOL_2_0,		
-	NDR_INT_BIG_ENDIAN,
-	NDR_CHAR_ASCII,
-	NDR_FLOAT_IEEE,
-	0,
-};
diff --git a/osfmk/mach/ppc/processor_info.h b/osfmk/mach/ppc/processor_info.h
deleted file mode 100644
index 168cb195d..000000000
--- a/osfmk/mach/ppc/processor_info.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- *	File:	mach/ppc/processor_info.h
- *
- *	Data structure definitions for ppc specific processor control
- */
-
-#ifndef	_MACH_PPC_PROCESSOR_INFO_H_
-#define _MACH_PPC_PROCESSOR_INFO_H_
-
-#include <mach/machine.h>
-#include <mach/message.h>
-
-#ifdef	PRIVATE
-
-/* processor_control command operations */
-#define PROCESSOR_PM_SET_REGS     1     /* Set Performance Monitor Registers  */
-#define PROCESSOR_PM_SET_MMCR     2     /* Set Monitor Mode Controls Registers  */
-#define PROCESSOR_PM_CLR_PMC      3     /* Clear Performance Monitor Counter Registers */
-
-/* 
- * Performance Monitor Register structures
- *
- * XXX - These have not been updated for ppc64.
- */
-
-typedef union {
-	unsigned int word;
-	struct {
-	        unsigned int dis	: 1;
-		unsigned int dp 	: 1;
-		unsigned int du 	: 1;
-	        unsigned int dms	: 1;
-	        unsigned int dmr	: 1;
-	        unsigned int reserved3	: 1;        /* enint         */
-	        unsigned int reserved4	: 1;        /* discount      */
-	        unsigned int reserved5	: 2;        /* rtcselect     */
-	        unsigned int reserved6	: 1;        /* intonbittrans */
-	        unsigned int threshold	: 6;
-	        unsigned int reserved7	: 1;        /* pmc1intcontrol */
-	        unsigned int reserved8	: 1;        /* pmcintcontrol  */
-	        unsigned int reserved9	: 1;        /* pmctrigger     */
-	        unsigned int pmc1select	: 7;
-	        unsigned int pmc2select	: 6;
-	}bits;
-}mmcr0_t;
-
-typedef union {
-	unsigned int word;
-	struct {
-	        unsigned int pmc3select	: 5;
-	        unsigned int pmc4select	: 5;
-	        unsigned int reserved	: 22;
-	}bits;
-}mmcr1_t;
-
-typedef union {
-	unsigned int word;
-	struct {
-	        unsigned int threshmult	 : 1;
-	        unsigned int reserved	 : 31;
-	}bits;
-}mmcr2_t;
-
-typedef union {
-	unsigned int word;
-	struct {
-	        unsigned int ov : 1;        /* overflow value */
-	        unsigned int cv : 31;       /* countervalue */
-	}bits;
-}pmcn_t;
-
-
-
-/* Processor Performance Monitor Registers definitions */
-
-struct processor_pm_regs {
-      union {
-	mmcr0_t mmcr0;
-        mmcr1_t mmcr1;
-        mmcr2_t mmcr2;
-      }u;
-      pmcn_t pmc[2];
-};
-
-typedef struct processor_pm_regs processor_pm_regs_data_t;
-typedef struct processor_pm_regs *processor_pm_regs_t;
-#define PROCESSOR_PM_REGS_COUNT ((mach_msg_type_number_t) \
-        (sizeof(processor_pm_regs_data_t) / sizeof (unsigned int)))
-
-#define PROCESSOR_PM_REGS_COUNT_POWERPC_750 \
-            (PROCESSOR_PM_REGS_COUNT * 2 )
-
-#define PROCESSOR_PM_REGS_COUNT_POWERPC_7400 \
-            (PROCESSOR_PM_REGS_COUNT * 3 )
-
-union processor_control_data {
-        processor_pm_regs_data_t cmd_pm_regs[3];
-};
-
-struct processor_control_cmd {
-    integer_t      cmd_op;
-    cpu_type_t     cmd_cpu_type;
-    cpu_subtype_t  cmd_cpu_subtype;
-    union processor_control_data u;
-};
-
-typedef struct processor_control_cmd   processor_control_cmd_data_t;
-typedef struct processor_control_cmd   *processor_control_cmd_t;
-#define cmd_pm_regs u.cmd_pm_regs;
-#define cmd_pm_ctls u.cmd_pm_ctls;
-
-#define PROCESSOR_CONTROL_CMD_COUNT ((mach_msg_type_number_t) \
-    (((sizeof(processor_control_cmd_data_t)) - \
-      (sizeof(union processor_control_data))) / sizeof (integer_t)))
-
-     /* x should be a processor_pm_regs_t */
-#define PERFMON_MMCR0(x)    ((x)[0].u.mmcr0.word)
-#define PERFMON_PMC1(x)     ((x)[0].pmc[0].word)
-#define PERFMON_PMC2(x)     ((x)[0].pmc[1].word)
-#define PERFMON_MMCR1(x)    ((x)[1].u.mmcr1.word)
-#define PERFMON_PMC3(x)     ((x)[1].pmc[0].word)
-#define PERFMON_PMC4(x)     ((x)[1].pmc[1].word)
-#define PERFMON_MMCR2(x)    ((x)[2].u.mmcr2.word)
-
-#define PERFMON_DIS(x)           ((x)[0].u.mmcr0.bits.dis)
-#define PERFMON_DP(x)            ((x)[0].u.mmcr0.bits.dp)
-#define PERFMON_DU(x)            ((x)[0].u.mmcr0.bits.du)
-#define PERFMON_DMS(x)           ((x)[0].u.mmcr0.bits.dms)
-#define PERFMON_DMR(x)           ((x)[0].u.mmcr0.bits.dmr)
-#define PERFMON_THRESHOLD(x)     ((x)[0].u.mmcr0.bits.threshold)
-#define PERFMON_PMC1SELECT(x)    ((x)[0].u.mmcr0.bits.pmc1select)
-#define PERFMON_PMC2SELECT(x)    ((x)[0].u.mmcr0.bits.pmc2select)
-#define PERFMON_PMC3SELECT(x)    ((x)[1].u.mmcr1.bits.pmc3select)
-#define PERFMON_PMC4SELECT(x)    ((x)[1].u.mmcr1.bits.pmc4select)
-#define PERFMON_THRESHMULT(x)    ((x)[2].u.mmcr2.bits.threshmult)
-#define PERFMON_PMC1_CV(x)       ((x)[0].u.pmc[0].bits.cv)
-#define PERFMON_PMC2_CV(x)       ((x)[0].u.pmc[1].bits.cv)
-#define PERFMON_PMC3_CV(x)       ((x)[1].u.pmc[0].bits.cv)
-#define PERFMON_PMC4_CV(x)       ((x)[1].u.pmc[1].bits.cv)
-
-typedef unsigned int processor_temperature_data_t;
-typedef unsigned int *processor_temperature_t;
-
-#define PROCESSOR_TEMPERATURE_COUNT 1
-
-#endif	/* PRIVATE */
-
-#endif	/* _MACH_PPC_PROCESSOR_INFO_H_ */
diff --git a/osfmk/mach/ppc/sdt_isa.h b/osfmk/mach/ppc/sdt_isa.h
deleted file mode 100644
index 558a12406..000000000
--- a/osfmk/mach/ppc/sdt_isa.h
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _MACH_PPC_SDT_ISA_H
-#define	_MACH_PPC_SDT_ISA_H
-
-/* #pragma ident	"@(#)sdt.h	1.7	05/06/08 SMI" */
-
-/*
- * Only define when testing.  This makes the calls into actual calls to
- * test functions.
- */
-/* #define DTRACE_CALL_TEST */
-
-#define DTRACE_STRINGIFY(s) #s
-#define DTRACE_TOSTRING(s) DTRACE_STRINGIFY(s)
-
-#if defined(KERNEL)
-/*
- * For the kernel, set an explicit global label so the symbol can be located
- */
-#define DTRACE_LAB(p, n)                                                                        \
-    "__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n)
-#define DTRACE_LABEL(p, n)		\
-      ".section __DATA, __data\n\t"	\
-      ".globl " DTRACE_LAB(p, n) "\n\t"	\
-       DTRACE_LAB(p, n) ":" ".long 1f""\n\t"	\
-       ".text" "\n\t"			\
-	"1:"
-#else	/* !KERNEL */
-#define DTRACE_LABEL(p, n)									\
-	"__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) ":"	"\n\t"
-#endif	/* !KERNEL */
-
-#ifdef DTRACE_CALL_TEST
-
-#define DTRACE_CALL(p,n)	\
-	DTRACE_LABEL(p,n)	\
-	DTRACE_CALL_INSN(p,n)
-
-#else	/* !DTRACE_CALL_TEST */
-
-#define DTRACE_CALL(p,n)	\
-	DTRACE_LABEL(p,n)	\
-	DTRACE_NOPS
-
-#endif	/* !DTRACE_CALL_TEST */
-
-#ifdef __ppc__
-
-#define DTRACE_NOPS			\
-	"nop"			"\n\t"
-
-#define DTRACE_CALL_INSN(p,n)						\
-	"bl _dtracetest" DTRACE_STRINGIFY(_##p##_##n)	"\n\t"
-
-#define ARG1_EXTENT	1
-#define ARGS2_EXTENT	2
-#define ARGS3_EXTENT	3
-#define ARGS4_EXTENT	4
-#define ARGS5_EXTENT	5
-#define ARGS6_EXTENT	6
-#define ARGS7_EXTENT	7
-#define ARGS8_EXTENT	8
-#define ARGS9_EXTENT	9
-#define ARGS10_EXTENT	10	
-
-#define DTRACE_CALL0ARGS(provider, name)							\
-	asm volatile (										\
-	              DTRACE_CALL(provider, name)						\
-	              "# eat trailing nl+tab from DTRACE_CALL"					\
-	              :										\
-	              :										\
-	);
-
-#define DTRACE_CALL1ARG(provider, name)								\
-	asm volatile ("subi r1,r1,0x20"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-                      "addi r1,r1,0x20"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3"									\
-	);
-
-#define DTRACE_CALL2ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x20"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x20"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4"								\
-	);
-
-#define DTRACE_CALL3ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x30"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x30"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5"							\
-	);
-
-#define DTRACE_CALL4ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x30"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x30"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6"							\
-	);
-
-#define DTRACE_CALL5ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x30"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x30"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7"						\
-	);
-
-#define DTRACE_CALL6ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x30"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              "lwz r8,0x14(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x30"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8"					\
-	);
-
-#define DTRACE_CALL7ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x40"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              "lwz r8,0x14(%0)"							"\n\t"	\
-	              "lwz r9,0x18(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x40"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"				\
-	);
-
-#define DTRACE_CALL8ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x40"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              "lwz r8,0x14(%0)"							"\n\t"	\
-	              "lwz r9,0x18(%0)"							"\n\t"	\
-	              "lwz r10,0x1c(%0)"						"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x40"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"				\
-	);
-
-#define DTRACE_CALL9ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x40"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              "lwz r8,0x14(%0)"							"\n\t"	\
-	              "lwz r9,0x18(%0)"							"\n\t"	\
-	              "lwz r10,0x1c(%0)"						"\n\t"	\
-	              "lwz r11,0x20(%0)"						"\n\t"	\
-	              "stw r11,0x38(r1)"						"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x40"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"			\
-	);
-
-#define DTRACE_CALL10ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x40"							"\n\t"	\
-	              "lwz r3,0x0(%0)"							"\n\t"	\
-	              "lwz r4,0x4(%0)"							"\n\t"	\
-	              "lwz r5,0x8(%0)"							"\n\t"	\
-	              "lwz r6,0xc(%0)"							"\n\t"	\
-	              "lwz r7,0x10(%0)"							"\n\t"	\
-	              "lwz r8,0x14(%0)"							"\n\t"	\
-	              "lwz r9,0x18(%0)"							"\n\t"	\
-	              "lwz r10,0x1c(%0)"						"\n\t"	\
-	              "lwz r11,0x20(%0)"						"\n\t"	\
-	              "lwz r12,0x24(%0)"						"\n\t"	\
-	              "stw r11,0x38(r1)"						"\n\t"	\
-	              "stw r12,0x3c(r1)"						"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x40"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"		\
-	);
-
-#endif // __ppc__
-
-#ifdef __ppc64__
-
-#define DTRACE_NOPS			\
-	"nop"			"\n\t"
-
-#define DTRACE_CALL_INSN(p,n)						\
-	"bl _dtracetest" DTRACE_STRINGIFY(_##p##_##n)	"\n\t"
-
-#define ARG1_EXTENT	1
-#define ARGS2_EXTENT	2
-#define ARGS3_EXTENT	3
-#define ARGS4_EXTENT	4
-#define ARGS5_EXTENT	5
-#define ARGS6_EXTENT	6
-#define ARGS7_EXTENT	7
-#define ARGS8_EXTENT	8
-#define ARGS9_EXTENT	9
-#define ARGS10_EXTENT	10	
-
-#define DTRACE_CALL0ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x30"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x30"								\
-	              :										\
-	              :										\
-	              :										\
-	);
-
-#define DTRACE_CALL1ARG(provider, name)								\
-	asm volatile ("ld r3,0x0(%0)"							"\n\t"	\
-	              "subi r1,r1,0x38"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-                      "addi r1,r1,0x38"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3"									\
-	);
-
-#define DTRACE_CALL2ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x40"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x40"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4"								\
-	);
-
-#define DTRACE_CALL3ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x48"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x48"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5"							\
-	);
-
-#define DTRACE_CALL4ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x50"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x50"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6"							\
-	);
-
-#define DTRACE_CALL5ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x58"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x58"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7"						\
-	);
-
-#define DTRACE_CALL6ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x60"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              "ld r8,0x28(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x60"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8"					\
-	);
-
-#define DTRACE_CALL7ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x68"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              "ld r8,0x28(%0)"							"\n\t"	\
-	              "ld r9,0x30(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x68"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9"				\
-	);
-
-#define DTRACE_CALL8ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x70"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              "ld r8,0x28(%0)"							"\n\t"	\
-	              "ld r9,0x30(%0)"							"\n\t"	\
-	              "ld r10,0x38(%0)"							"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x70"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10"				\
-	);
-
-#define DTRACE_CALL9ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x78"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              "ld r8,0x28(%0)"							"\n\t"	\
-	              "ld r9,0x30(%0)"							"\n\t"	\
-	              "ld r10,0x38(%0)"							"\n\t"	\
-	              "ld r11,0x40(%0)"							"\n\t"	\
-	              "std r11,0x70(r1)"						"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x78"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"			\
-	);
-
-#define DTRACE_CALL10ARGS(provider, name)							\
-	asm volatile ("subi r1,r1,0x80"							"\n\t"	\
-	              "ld r3,0x0(%0)"							"\n\t"	\
-	              "ld r4,0x8(%0)"							"\n\t"	\
-	              "ld r5,0x10(%0)"							"\n\t"	\
-	              "ld r6,0x18(%0)"							"\n\t"	\
-	              "ld r7,0x20(%0)"							"\n\t"	\
-	              "ld r8,0x28(%0)"							"\n\t"	\
-	              "ld r9,0x30(%0)"							"\n\t"	\
-	              "ld r10,0x38(%0)"							"\n\t"	\
-	              "ld r11,0x40(%0)"							"\n\t"	\
-	              "ld r12,0x48(%0)"							"\n\t"	\
-	              "std r11,0x70(r1)"						"\n\t"	\
-	              "std r12,0x78(r1)"						"\n\t"	\
-	              DTRACE_CALL(provider, name)						\
-	              "addi r1,r1,0x80"								\
-	              :										\
-	              : "b" (__dtrace_args)							\
-	              : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"		\
-	);
-
-#endif // __ppc64__
-
-#endif	/* _MACH_PPC_SDT_ISA_H */
diff --git a/osfmk/mach/ppc/syscall_sw.h b/osfmk/mach/ppc/syscall_sw.h
deleted file mode 100644
index 335ff9e21..000000000
--- a/osfmk/mach/ppc/syscall_sw.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifdef	PRIVATE
-
-#ifndef	_MACH_PPC_SYSCALL_SW_H_
-#define _MACH_PPC_SYSCALL_SW_H_
-
-#include <mach/machine/asm.h>
-
-#define kernel_trap(trap_name,trap_number,number_args) \
-ENTRY(trap_name, TAG_NO_FRAME_USED) @\
-	li	r0,	trap_number @\
-	sc	@\
-	blr
-
-#define ppc_trap(trap_name,trap_number) \
-ENTRY(trap_name, TAG_NO_FRAME_USED) @\
-	li	r0,	trap_number @\
-	sc	@\
-	blr
-	
-/*
- *	Put any definitions for PPC-only system calls in here (only if
- *	this file is being included from the one that instantiates the
- *	mach system calls).
- *
- *	Note: PPC-only system calls are in the 0x6000 to 0x6FFF range
- */
-
-#ifdef _MACH_SYSCALL_SW_H_	
-
-ppc_trap(diagCall,0x6000)	
-ppc_trap(vmm_get_version,0x6001)
-ppc_trap(vmm_get_features,0x6002)
-ppc_trap(vmm_init_context,0x6003)	
-ppc_trap(vmm_dispatch,0x6004)	
-ppc_trap(bb_enable_bluebox,0x6005)	
-ppc_trap(bb_disable_bluebox,0x6006)	
-ppc_trap(bb_settaskenv,0x6007)	
-ppc_trap(vmm_stop_vm,0x6008)	
-ppc_trap(CHUDCall,0x6009)	
-ppc_trap(ppcNull,0x600A)	
-ppc_trap(perfmon_control,0x600B)	
-ppc_trap(ppcNullinst,0x600C)	
-ppc_trap(pmsCPUCntrl,0x600D)	
-#endif /* _MACH_SYSCALL_SW_H_ */
-
-#endif	/* _MACH_PPC_SYSCALL_SW_H_ */
-
-#endif	/* PRIVATE */
diff --git a/osfmk/mach/ppc/thread_status.h b/osfmk/mach/ppc/thread_status.h
deleted file mode 100644
index ba077f74d..000000000
--- a/osfmk/mach/ppc/thread_status.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_MACH_PPC_THREAD_STATUS_H_
-#define _MACH_PPC_THREAD_STATUS_H_
-
-#include <mach/ppc/_structs.h>
-#include <mach/message.h>
-
-/*
- * ppc_thread_state is the structure that is exported to user threads for 
- * use in status/mutate calls.  This structure should never change.
- *
- */
-
-#define PPC_THREAD_STATE        1
-#define PPC_FLOAT_STATE         2
-#define PPC_EXCEPTION_STATE		3
-#define PPC_VECTOR_STATE		4
-#define PPC_THREAD_STATE64		5
-#define PPC_EXCEPTION_STATE64	6
-#define THREAD_STATE_NONE		7
-	       
-/*
- * VALID_THREAD_STATE_FLAVOR is a platform specific macro that when passed
- * an exception flavor will return whether that is a defined flavor for
- * that platform.
- * The macro must be manually updated to include all of the valid exception
- * flavors as defined above.
- */
-#define VALID_THREAD_STATE_FLAVOR(x)       \
-        ((x == PPC_THREAD_STATE)        || \
-         (x == PPC_FLOAT_STATE)         || \
-	 (x == PPC_EXCEPTION_STATE)     	|| \
-         (x == PPC_VECTOR_STATE)        || \
-         (x == PPC_THREAD_STATE64)      || \
-         (x == PPC_EXCEPTION_STATE64)   || \
-         (x == THREAD_STATE_NONE))
-
-typedef _STRUCT_PPC_THREAD_STATE	ppc_thread_state_t;
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-typedef _STRUCT_PPC_THREAD_STATE64	ppc_thread_state64_t;
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-typedef _STRUCT_PPC_FLOAT_STATE		ppc_float_state_t;
-typedef _STRUCT_PPC_VECTOR_STATE	ppc_vector_state_t;
-
-/*
- * saved state structure
- *
- * This structure corresponds to the saved state. 
- *
- */
-
-#ifdef	MACH__POSIX_C_SOURCE_PRIVATE
-
-#include <ppc/savearea.h>
-
-typedef struct savearea				ppc_saved_state_t;
-
-#else	/* MACH__POSIX_C_SOURCE_PRIVATE */
-
-typedef struct ppc_thread_state			ppc_saved_state_t;
-
-#endif	/* MACH__POSIX_C_SOURCE_PRIVATE */
-
-/*
- * ppc_exception_state
- *
- * This structure corresponds to some additional state of the user
- * registers as saved in the PCB upon kernel entry. They are only
- * available if an exception is passed out of the kernel, and even
- * then not all are guaranteed to be updated.
- *
- * Some padding is included in this structure which allows space for
- * servers to store temporary values if need be, to maintain binary
- * compatiblity.
- */
-
-/* Exception state for 32-bit thread (on 32-bit processor) */
-/* Still available on 64-bit processors, but may fall short */
-/* of covering the full potential state (hi half available). */
-
-typedef _STRUCT_PPC_EXCEPTION_STATE	ppc_exception_state_t;
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-typedef _STRUCT_PPC_EXCEPTION_STATE64	ppc_exception_state64_t;
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-/*
- * Save State Flags
- */
-
-#define PPC_THREAD_STATE_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_thread_state_t) / sizeof(int)))
-
-#define PPC_THREAD_STATE64_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_thread_state64_t) / sizeof(int)))
-
-#define PPC_EXCEPTION_STATE_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_exception_state_t) / sizeof(int)))
-
-#define PPC_EXCEPTION_STATE64_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_exception_state64_t) / sizeof(int)))
-
-#define PPC_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_float_state_t) / sizeof(int)))
-
-#define PPC_VECTOR_STATE_COUNT ((mach_msg_type_number_t) \
-   (sizeof(ppc_vector_state_t) / sizeof(int)))
-
-/*
- * Machine-independent way for servers and Mach's exception mechanism to
- * choose the most efficient state flavor for exception RPC's:
- */
-#define MACHINE_THREAD_STATE		PPC_THREAD_STATE
-#define MACHINE_THREAD_STATE_COUNT	PPC_THREAD_STATE_COUNT
-
-/*
- * Largest state on this machine:
- */
-#define THREAD_MACHINE_STATE_MAX	THREAD_STATE_MAX
-
-#endif /* _MACH_PPC_THREAD_STATUS_H_ */
diff --git a/osfmk/mach/ppc/vm_param.h b/osfmk/mach/ppc/vm_param.h
deleted file mode 100644
index af3a94262..000000000
--- a/osfmk/mach/ppc/vm_param.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_MACH_PPC_VM_PARAM_H_
-#define _MACH_PPC_VM_PARAM_H_
-
-/*
- * These are the global definitions
- */
-
-#define BYTE_SIZE		8		/* byte size in bits */
-
-#define PPC_PGBYTES		4096	/* bytes per ppc page */
-#define PPC_PGSHIFT		12		/* number of bits to shift for pages */
-
-#define	PAGE_SIZE		PPC_PGBYTES
-#define	PAGE_SHIFT		PPC_PGSHIFT
-#define PAGE_MASK		(PAGE_SIZE - 1)
-
-#if 0
-#define VM_MAX_PAGE_ADDRESS 0xFFFFFFFFFFFFF000ULL
-#else
-/* 
- * LP64todo - For now, we are limited to 51-bits of user addressing
- */
-#define VM_MAX_PAGE_ADDRESS 0x0007FFFFFFFFF000ULL
-#endif
-
-#define MACH_VM_MIN_ADDRESS	((mach_vm_offset_t) 0)
-#define MACH_VM_MAX_ADDRESS	((mach_vm_offset_t) VM_MAX_PAGE_ADDRESS)
-
-/*
- * These are the values relative to the local process.
- */
-#if defined (__ppc64__)
-/*
- * LP64todo - We don't have the 64-bit address space layout yet.
- * Use the 32-bit stack layout for now.
- */
-#define VM_MIN_ADDRESS	((vm_offset_t) MACH_VM_MIN_ADDRESS)
-#define VM_MAX_ADDRESS	((vm_offset_t) MACH_VM_MAX_ADDRESS)
-#define USER_STACK_END  	((vm_offset_t) 0x00000000ffff0000ULL)
-#else
-#define VM_MIN_ADDRESS	((vm_offset_t) 0)
-#define VM_MAX_ADDRESS	((vm_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF))
-#define USER_STACK_END  	((vm_offset_t) 0xffff0000U)
-#endif /* defined(__ppc64__) */
-
-#ifdef	KERNEL_PRIVATE
-
-/* Kernel-wide values */
-#define VM_MIN_KERNEL_ADDRESS	((vm_offset_t) 0x00001000U)
-#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS
-#define VM_MAX_KERNEL_ADDRESS	((vm_offset_t) 0xDFFFFFFFU)
-#define KERNEL_STACK_SIZE		(4 * PPC_PGBYTES)
-#define INTSTACK_SIZE		(5 * PPC_PGBYTES)
-
-#define VM_MAP_MIN_ADDRESS	MACH_VM_MIN_ADDRESS
-#define VM_MAP_MAX_ADDRESS	MACH_VM_MAX_ADDRESS
-
-#ifdef	MACH_KERNEL_PRIVATE
-
-/* For implementing legacy 32-bit interfaces */
-#define VM32_SUPPORT		1
-#define VM32_MIN_ADDRESS	((vm32_offset_t) 0)
-#define VM32_MAX_ADDRESS	((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF))
-
-
-#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection,	\
-				flags, wired, options, result)		\
-	MACRO_BEGIN							\
-		result=KERN_SUCCESS;					\
-		PMAP_ENTER(pmap, virtual_address, page, protection,	\
-				flags, wired);				\
-	MACRO_END
-
-
-#endif	/* MACH_KERNEL_PRIVATE */
-
-#endif	/* KERNEL_PRIVATE */
-
-#endif	/* _MACH_PPC_VM_PARAM_H_ */
diff --git a/osfmk/mach/ppc/vm_types.h b/osfmk/mach/ppc/vm_types.h
deleted file mode 100644
index 0b3d39485..000000000
--- a/osfmk/mach/ppc/vm_types.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-/*
- *	File:	vm_types.h
- *	Author:	Avadis Tevanian, Jr.
- *	Date: 1985
- *
- *	Header file for VM data types.  PPC version.
- */
-
-#ifndef	_MACH_PPC_VM_TYPES_H_
-#define _MACH_PPC_VM_TYPES_H_
-
-#ifndef	ASSEMBLER
-
-#include <ppc/_types.h>
-#include <mach/ppc/vm_param.h>
-#include <stdint.h>
-
-/*
- * natural_t and integer_t are Mach's legacy types for machine-
- * independent integer types (unsigned, and signed, respectively).
- * Their original purpose was to define other types in a machine/
- * compiler independent way.
- *
- * They also had an implicit "same size as pointer" characteristic
- * to them (i.e. Mach's traditional types are very ILP32 or ILP64
- * centric).  We support PowerPC ABIs that do not follow either of
- * these models (specifically LP64).  Therefore, we had to make a
- * choice between making these types scale with pointers or stay
- * tied to integers.  Because their use is predominantly tied to
- * to the size of an integer, we are keeping that association and
- * breaking free from pointer size guarantees.
- *
- * New use of these types is discouraged.
- */
-typedef __darwin_natural_t	natural_t;
-typedef int			integer_t;
-
-#if defined(__ppc__)
-
-/*
- * For 32-bit PowerPC ABIs, the scalable types were
- * always based upon natural_t (unsigned int). 
- * Because of potential legacy issues with name mangling,
- * we cannot use the stdint uintptr_t type.
- */
-typedef	natural_t		vm_offset_t;
-typedef	natural_t		vm_size_t;
-
-#else /* __ppc64__ */
-
-/*
- * For 64-bit PowerPC ABIs, we have no legacy name mangling
- * issues, so we use the stdint types for scaling these
- * types to the same size as a pointer.
- */
-typedef	uintptr_t		vm_offset_t;
-typedef uintptr_t		vm_size_t;
-
-#endif 
-
-/*
- * This new type is independent of a particular vm map's
- * implementation size - and represents appropriate types
- * for all possible maps.  This is used for interfaces
- * where the size of the map is not known - or we don't
- * want to have to distinguish.
- */
-typedef uint64_t		mach_vm_address_t;
-typedef uint64_t		mach_vm_offset_t;
-typedef uint64_t		mach_vm_size_t;
-
-typedef uint64_t		vm_map_offset_t;
-typedef uint64_t		vm_map_address_t;
-typedef uint64_t		vm_map_size_t;
-
-#ifdef	MACH_KERNEL_PRIVATE
-
-#if VM32_SUPPORT
-
-/*
- * These are types used internal to Mach to implement the
- * legacy 32-bit VM APIs published by the kernel.
- */
-typedef uint32_t		vm32_address_t;
-typedef uint32_t		vm32_offset_t;
-typedef uint32_t		vm32_size_t;
-
-#endif	/* VM32_SUPPORT */
-
-#endif	/* MACH_KERNEL_PRIVATE */
-
-#endif	/* ASSEMBLER */
-
-/*
- * If composing messages by hand (please do not)
- */
-#define	MACH_MSG_TYPE_INTEGER_T	MACH_MSG_TYPE_INTEGER_32
-
-#endif	/* _MACH_PPC_VM_TYPES_H_ */
diff --git a/osfmk/mach/processor.defs b/osfmk/mach/processor.defs
index f590633f1..99ea969a7 100644
--- a/osfmk/mach/processor.defs
+++ b/osfmk/mach/processor.defs
@@ -74,7 +74,7 @@ subsystem
 /*
  * References to processor objects are returned by:
  *	host_processors(host_priv_t,...);
- *
+ */
 /*
  *	Start processor.
  */
diff --git a/osfmk/mach/security.defs b/osfmk/mach/security.defs
index 734aa90eb..8d27ae1a8 100644
--- a/osfmk/mach/security.defs
+++ b/osfmk/mach/security.defs
@@ -4,7 +4,7 @@
 subsystem
 #if	KERNEL_SERVER
 	  KernelServer
-#endif	KERNEL_SERVER
+#endif	/* KERNEL_SERVER */
 	  security 5200;
 
 #include <mach/std_types.defs>
diff --git a/osfmk/mach/shared_region.h b/osfmk/mach/shared_region.h
index 1e2143e1a..29ced2a40 100644
--- a/osfmk/mach/shared_region.h
+++ b/osfmk/mach/shared_region.h
@@ -66,7 +66,7 @@
 #define SHARED_REGION_BASE_ARM			0x30000000ULL
 #define SHARED_REGION_SIZE_ARM			0x10000000ULL
 #define SHARED_REGION_NESTING_BASE_ARM		0x30000000ULL
-#define SHARED_REGION_NESTING_SIZE_ARM		0x08000000ULL
+#define SHARED_REGION_NESTING_SIZE_ARM		0x10000000ULL
 #define SHARED_REGION_NESTING_MIN_ARM		?
 #define SHARED_REGION_NESTING_MAX_ARM		?
 
@@ -84,20 +84,6 @@
 #define SHARED_REGION_NESTING_SIZE		SHARED_REGION_NESTING_SIZE_X86_64
 #define SHARED_REGION_NESTING_MIN		SHARED_REGION_NESTING_MIN_X86_64
 #define SHARED_REGION_NESTING_MAX		SHARED_REGION_NESTING_MAX_X86_64
-#elif defined(__ppc__)
-#define SHARED_REGION_BASE			SHARED_REGION_BASE_PPC
-#define SHARED_REGION_SIZE			SHARED_REGION_SIZE_PPC
-#define SHARED_REGION_NESTING_BASE		SHARED_REGION_NESTING_BASE_PPC
-#define SHARED_REGION_NESTING_SIZE		SHARED_REGION_NESTING_SIZE_PPC
-#define SHARED_REGION_NESTING_MIN		SHARED_REGION_NESTING_MIN_PPC
-#define SHARED_REGION_NESTING_MAX		SHARED_REGION_NESTING_MAX_PPC
-#elif defined(__ppc64__)
-#define SHARED_REGION_BASE			SHARED_REGION_BASE_PPC64
-#define SHARED_REGION_SIZE			SHARED_REGION_SIZE_PPC64
-#define SHARED_REGION_NESTING_BASE		SHARED_REGION_NESTING_BASE_PPC64
-#define SHARED_REGION_NESTING_SIZE		SHARED_REGION_NESTING_SIZE_PPC64
-#define SHARED_REGION_NESTING_MIN		SHARED_REGION_NESTING_MIN_PPC64
-#define SHARED_REGION_NESTING_MAX		SHARED_REGION_NESTING_MAX_PPC64
 #endif
 
 #ifdef KERNEL_PRIVATE
@@ -126,6 +112,7 @@ struct shared_file_mapping_np {
 };
 #define VM_PROT_COW  0x8  /* must not interfere with normal prot assignments */
 #define VM_PROT_ZF  0x10  /* must not interfere with normal prot assignments */
+#define VM_PROT_SLIDE  0x20  /* must not interfere with normal prot assignments */
 
 #ifndef KERNEL
 
@@ -134,6 +121,7 @@ int	shared_region_check_np(uint64_t	*startaddress);
 int	shared_region_map_np(int fd,
 			     uint32_t mappingCount,
 			     const struct shared_file_mapping_np *mappings);
+int	shared_region_slide_np(void);
 __END_DECLS
 
 #endif /* !KERNEL */
diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h
index 11e9211f8..bac3552d3 100644
--- a/osfmk/mach/syscall_sw.h
+++ b/osfmk/mach/syscall_sw.h
@@ -91,9 +91,9 @@ kernel_trap(semaphore_wait_signal_trap,-37,2)
 kernel_trap(semaphore_timedwait_trap,-38,3)
 kernel_trap(semaphore_timedwait_signal_trap,-39,4)
 
-#if		!defined(__LP64__)
+#if		!defined(__LP64__) && !defined(__arm__)
 kernel_trap(map_fd,-43,5)
-#endif	/* __LP64__ */
+#endif	/*!defined(__LP64__) && !defined(__arm__) */
 
 kernel_trap(task_name_for_pid,-44,3)
 kernel_trap(task_for_pid,-45,3)
diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs
index ceebc9529..0c70e9aef 100644
--- a/osfmk/mach/task.defs
+++ b/osfmk/mach/task.defs
@@ -70,6 +70,8 @@ subsystem
 
 #include <mach/std_types.defs>
 #include <mach/mach_types.defs>
+#include <mach_debug/mach_debug_types.defs>
+
 /*
  *	Create a new task with an empty set of IPC rights,
  *	and having an address space constructed from the
@@ -170,7 +172,12 @@ routine task_set_special_port(
  *	the port representing the first thr_act in that new thread.  The
  *	initial execution state of the thread is undefined.
  */
-routine thread_create(
+routine
+#ifdef KERNEL_SERVER
+thread_create_from_user(
+#else
+thread_create(
+#endif
 		parent_task	: task_t;
 	out	child_act	: thread_act_t);
 
@@ -181,7 +188,12 @@ routine thread_create(
  *	by flavor and new_state. Returns the port representing 
  *	the new thread.
  */
-routine thread_create_running(
+routine
+#ifdef KERNEL_SERVER
+thread_create_running_from_user(
+#else
+thread_create_running(
+#endif
                 parent_task     : task_t;
                 flavor          : thread_state_flavor_t;
                 new_state       : thread_state_t;
@@ -332,7 +344,16 @@ routine task_set_ras_pc(
 		boundspc        : vm_address_t);
 
 
-skip; /* was kernel_task_create() */
+/*
+ * Return zone info as seen/used by this task.
+ */
+routine task_zone_info(
+		target_task	: task_t;
+	out	names		: mach_zone_name_array_t,
+					Dealloc;
+	out	info		: task_zone_info_array_t,
+					Dealloc);
+
 
 /* 
  * JMM - Want to eliminate processor_set so keep them at the end.
@@ -389,5 +410,4 @@ routine	task_set_state(
 		flavor		: thread_state_flavor_t;
 		new_state	: thread_state_t);
 
-
 /* vim: set ft=c : */
diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h
index cab9c1757..a43dc6cb9 100644
--- a/osfmk/mach/task_info.h
+++ b/osfmk/mach/task_info.h
@@ -195,6 +195,20 @@ typedef struct task_absolutetime_info	*task_absolutetime_info_t;
 #define TASK_ABSOLUTETIME_INFO_COUNT	((mach_msg_type_number_t) \
 		(sizeof (task_absolutetime_info_data_t) / sizeof (natural_t)))
 
+#define TASK_KERNELMEMORY_INFO	7
+
+struct task_kernelmemory_info {
+	uint64_t		total_palloc;	/* private kernel mem alloc'ed */
+	uint64_t		total_pfree;	/* private kernel mem freed */
+	uint64_t		total_salloc;	/* shared kernel mem alloc'ed */
+	uint64_t		total_sfree;	/* shared kernel mem freed */
+};
+
+typedef struct task_kernelmemory_info	task_kernelmemory_info_data_t;
+typedef struct task_kernelmemory_info	*task_kernelmemory_info_t;
+#define TASK_KERNELMEMORY_INFO_COUNT	((mach_msg_type_number_t) \
+		(sizeof (task_kernelmemory_info_data_t) / sizeof (natural_t)))
+
 #define TASK_SECURITY_TOKEN		13
 #define TASK_SECURITY_TOKEN_COUNT	((mach_msg_type_number_t) \
 		(sizeof(security_token_t) / sizeof(natural_t)))
@@ -217,16 +231,30 @@ typedef struct task_affinity_tag_info	*task_affinity_tag_info_t;
 #define TASK_AFFINITY_TAG_INFO_COUNT	\
 		(sizeof(task_affinity_tag_info_data_t) / sizeof(natural_t))
 
-#define TASK_DYLD_INFO			17	/* This is experimental. */
+#define TASK_DYLD_INFO			17
 
 struct task_dyld_info {
 	mach_vm_address_t	all_image_info_addr;
 	mach_vm_size_t		all_image_info_size;
+	integer_t		all_image_info_format;		
 };
 typedef struct task_dyld_info	task_dyld_info_data_t;
 typedef struct task_dyld_info	*task_dyld_info_t;
 #define TASK_DYLD_INFO_COUNT	\
     		(sizeof(task_dyld_info_data_t) / sizeof(natural_t))
+#define TASK_DYLD_ALL_IMAGE_INFO_32	0	/* format value */
+#define TASK_DYLD_ALL_IMAGE_INFO_64	1	/* format value */
+
+#define TASK_EXTMOD_INFO			18
+
+struct task_extmod_info {
+	unsigned char	task_uuid[16];
+	vm_extmod_statistics_data_t		extmod_statistics;
+};
+typedef struct task_extmod_info	task_extmod_info_data_t;
+typedef struct task_extmod_info	*task_extmod_info_t;
+#define TASK_EXTMOD_INFO_COUNT	\
+    		(sizeof(task_extmod_info_data_t) / sizeof(natural_t))
 
 #pragma pack()
 
diff --git a/osfmk/mach/task_policy.h b/osfmk/mach/task_policy.h
index 3a2fb39c4..71d70526f 100644
--- a/osfmk/mach/task_policy.h
+++ b/osfmk/mach/task_policy.h
@@ -111,6 +111,7 @@ enum task_role {
 	TASK_CONTROL_APPLICATION,
 	TASK_GRAPHICS_SERVER,
 	TASK_THROTTLE_APPLICATION,
+	TASK_NONUI_APPLICATION,
 	TASK_DEFAULT_APPLICATION
 };
 
diff --git a/osfmk/mach/thread_act.defs b/osfmk/mach/thread_act.defs
index 47a21a9e6..9754acb63 100644
--- a/osfmk/mach/thread_act.defs
+++ b/osfmk/mach/thread_act.defs
@@ -100,7 +100,12 @@ routine act_get_state(
  *	If the thread is currently executing, the state change
  *	may be ill-defined.
  */
-routine	act_set_state(
+routine
+#ifdef KERNEL_SERVER
+act_set_state_from_user(
+#else
+act_set_state(
+#endif
 		target_act	: thread_act_t;
 		flavor		: int;
 		new_state	: thread_state_t);
@@ -124,7 +129,12 @@ routine thread_get_state(
  *	If the thread is currently executing, the state change
  *	may be ill-defined.
  */
-routine	thread_set_state(
+routine
+#ifdef KERNEL_SERVER
+thread_set_state_from_user(
+#else
+thread_set_state(
+#endif
 		target_act	: thread_act_t;
 		flavor		: thread_state_flavor_t;
 		new_state	: thread_state_t);
diff --git a/osfmk/mach/thread_policy.h b/osfmk/mach/thread_policy.h
index d9530b776..607028837 100644
--- a/osfmk/mach/thread_policy.h
+++ b/osfmk/mach/thread_policy.h
@@ -215,4 +215,20 @@ typedef struct thread_affinity_policy		*thread_affinity_policy_t;
 #define THREAD_AFFINITY_POLICY_COUNT	((mach_msg_type_number_t) \
 	(sizeof (thread_affinity_policy_data_t) / sizeof (integer_t)))
 
+/*
+ * THREAD_BACKGROUND_POLICY:
+ */
+
+#define THREAD_BACKGROUND_POLICY	5
+
+struct thread_background_policy {
+	integer_t	priority;
+};
+
+typedef struct thread_background_policy		thread_background_policy_data_t;
+typedef struct thread_background_policy 	*thread_background_policy_t;
+
+#define THREAD_BACKGROUND_POLICY_COUNT	((mach_msg_type_number_t) \
+	(sizeof (thread_background_policy_data_t) / sizeof (integer_t)))
+
 #endif	/* _MACH_THREAD_POLICY_H_ */
diff --git a/osfmk/mach/vm_prot.h b/osfmk/mach/vm_prot.h
index 6fe17d43c..ae2d67584 100644
--- a/osfmk/mach/vm_prot.h
+++ b/osfmk/mach/vm_prot.h
@@ -129,13 +129,20 @@ typedef int		vm_prot_t;
 
 #define VM_PROT_WANTS_COPY	((vm_prot_t) 0x10)
 
-
+#ifdef PRIVATE
 /*
  *	The caller wants this memory region treated as if it had a valid
  *	code signature.
  */
 
 #define VM_PROT_TRUSTED		((vm_prot_t) 0x20)
+#endif /* PRIVATE */
 
+/*
+ * 	Another invalid protection value.
+ *	Indicates that the other protection bits are to be applied as a mask
+ *	against the actual protection bits of the map entry.
+ */
+#define VM_PROT_IS_MASK		((vm_prot_t) 0x40)
 
 #endif	/* _MACH_VM_PROT_H_ */
diff --git a/osfmk/mach/vm_region.h b/osfmk/mach/vm_region.h
index ceb42b7b5..ebc0e8d54 100644
--- a/osfmk/mach/vm_region.h
+++ b/osfmk/mach/vm_region.h
@@ -126,6 +126,7 @@ typedef struct vm_region_basic_info		 vm_region_basic_info_data_t;
 #define SM_TRUESHARED      5
 #define SM_PRIVATE_ALIASED 6
 #define SM_SHARED_ALIASED  7
+#define SM_LARGE_PAGE      8
 
 /* 
  * For submap info,  the SM flags above are overlayed when a submap
@@ -309,6 +310,7 @@ struct vm_page_info_basic {
 	vm_object_id_t		object_id;
 	memory_object_offset_t	offset;
 	int			depth;
+	int			__pad; /* pad to 64-bit boundary */
 };
 typedef struct vm_page_info_basic		*vm_page_info_basic_t;
 typedef struct vm_page_info_basic		vm_page_info_basic_data_t;
diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h
index 89ca4351e..4d1b13a56 100644
--- a/osfmk/mach/vm_statistics.h
+++ b/osfmk/mach/vm_statistics.h
@@ -113,15 +113,6 @@ struct vm_statistics {
 typedef struct vm_statistics	*vm_statistics_t;
 typedef struct vm_statistics	vm_statistics_data_t;
 
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
-
-typedef struct vm_statistics	*vm_statistics64_t;
-typedef struct vm_statistics	vm_statistics64_data_t;
-
-#define VM_STATISTICS_TRUNCATE_TO_32_BIT(value) value
-
-#else /* !(defined(__ppc__))  */
-
 /* 
  * vm_statistics64
  *
@@ -133,6 +124,8 @@ typedef struct vm_statistics	vm_statistics64_data_t;
  *	rev3 - 	changed name to vm_statistics64.
  *		changed some fields in structure to 64-bit on 
  *		arm, i386 and x86_64 architectures.
+ *	rev4 -  require 64-bit alignment for efficient access
+ *		in the kernel. No change to reported data.
  *
  */
 
@@ -163,8 +156,7 @@ struct vm_statistics64 {
 	 */
 	natural_t	speculative_count;	/* # of pages speculative */
 
-}
-;
+} __attribute__((aligned(8)));
 
 typedef struct vm_statistics64	*vm_statistics64_t;
 typedef struct vm_statistics64	vm_statistics64_data_t;
@@ -177,7 +169,27 @@ typedef struct vm_statistics64	vm_statistics64_data_t;
  */
 #define VM_STATISTICS_TRUNCATE_TO_32_BIT(value) ((uint32_t)(((value) > UINT32_MAX ) ? UINT32_MAX : (value)))
 
-#endif /* !(defined(__ppc__)) */
+/* 
+ * vm_extmod_statistics
+ *
+ * Structure to record modifications to a task by an
+ * external agent.
+ *
+ * History:
+ *	rev0 - 	original structure.
+ */
+
+struct vm_extmod_statistics {
+	int64_t	task_for_pid_count;			/* # of times task port was looked up */
+	int64_t task_for_pid_caller_count;	/* # of times this task called task_for_pid */
+	int64_t	thread_creation_count;		/* # of threads created in task */
+	int64_t	thread_creation_caller_count;	/* # of threads created by task */
+	int64_t	thread_set_state_count;		/* # of register state sets in task */
+	int64_t	thread_set_state_caller_count;	/* # of register state sets by task */
+} __attribute__((aligned(8)));
+
+typedef struct vm_extmod_statistics *vm_extmod_statistics_t;
+typedef struct vm_extmod_statistics vm_extmod_statistics_data_t;
 
 
 /* included for the vm_map_page_query call */
@@ -245,7 +257,6 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 #define VM_FLAGS_PURGABLE	0x0002
 #define VM_FLAGS_NO_CACHE	0x0010
 #ifdef KERNEL_PRIVATE
-#define VM_FLAGS_BELOW_MIN	0x0080	/* map below the map's min offset */
 #define VM_FLAGS_PERMANENT	0x0100	/* mapping can NEVER be unmapped */
 #define VM_FLAGS_GUARD_AFTER	0x0200	/* guard page after the mapping */
 #define VM_FLAGS_GUARD_BEFORE	0x0400	/* guard page before the mapping */
@@ -256,6 +267,7 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 #define VM_FLAGS_OVERWRITE	0x4000	/* delete any existing mappings first */
 #ifdef KERNEL_PRIVATE
 #define VM_FLAGS_NO_PMAP_CHECK	0x8000	/* do not check that pmap is empty */
+#define	VM_FLAGS_MAP_JIT	0x80000	/* Used to mark an entry as describing a JIT region */
 #endif /* KERNEL_PRIVATE */
 
 /*
@@ -267,10 +279,12 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 #define VM_FLAGS_SUPERPAGE_SHIFT 16
 
 #define SUPERPAGE_NONE			0	/* no superpages, if all bits are 0 */
-#define VM_FLAGS_SUPERPAGE_NONE		(SUPERPAGE_NONE<<VM_FLAGS_SUPERPAGE_SHIFT)
+#define SUPERPAGE_SIZE_ANY		1
+#define VM_FLAGS_SUPERPAGE_NONE     (SUPERPAGE_NONE     << VM_FLAGS_SUPERPAGE_SHIFT)
+#define VM_FLAGS_SUPERPAGE_SIZE_ANY (SUPERPAGE_SIZE_ANY << VM_FLAGS_SUPERPAGE_SHIFT)
 #if defined(__x86_64__) || !defined(KERNEL)
-#define SUPERPAGE_SIZE_2MB		1
-#define VM_FLAGS_SUPERPAGE_SIZE_2MB	(SUPERPAGE_SIZE_2MB<<VM_FLAGS_SUPERPAGE_SHIFT)
+#define SUPERPAGE_SIZE_2MB		2
+#define VM_FLAGS_SUPERPAGE_SIZE_2MB (SUPERPAGE_SIZE_2MB<<VM_FLAGS_SUPERPAGE_SHIFT)
 #endif
 
 #define VM_FLAGS_ALIAS_MASK	0xFF000000
@@ -358,6 +372,24 @@ typedef struct pmap_statistics	*pmap_statistics_t;
 /* memory allocated for GLSL */
 #define VM_MEMORY_GLSL  66
 
+/* memory allocated for OpenCL.framework */
+#define VM_MEMORY_OPENCL    67
+
+/* memory allocated for QuartzCore.framework */
+#define VM_MEMORY_COREIMAGE 68
+
+/* memory allocated for WebCore Purgeable Buffers */
+#define VM_MEMORY_WEBCORE_PURGEABLE_BUFFERS 69
+
+/* ImageIO memory */
+#define VM_MEMORY_IMAGEIO	70
+
+/* CoreProfile memory */
+#define VM_MEMORY_COREPROFILE	71
+
+/* assetsd / MobileSlideShow memory */
+#define VM_MEMORY_ASSETSD    72
+
 /* Reserve 240-255 for application */
 #define VM_MEMORY_APPLICATION_SPECIFIC_1 240
 #define VM_MEMORY_APPLICATION_SPECIFIC_16 255
diff --git a/osfmk/mach_debug/mach_debug_types.defs b/osfmk/mach_debug/mach_debug_types.defs
index 8e63dbb65..ce3be8cb4 100644
--- a/osfmk/mach_debug/mach_debug_types.defs
+++ b/osfmk/mach_debug/mach_debug_types.defs
@@ -64,11 +64,20 @@
 
 #include <mach/std_types.defs>
 
-type zone_name_t = struct[80] of char;
-type zone_name_array_t = array[] of zone_name_t;
+type zone_name_t = struct[80] of char;			/* deprecated */
+type zone_name_array_t = array[] of zone_name_t;	/* deprecated */
 
-type zone_info_t = struct[9] of integer_t;
-type zone_info_array_t = array[] of zone_info_t;
+type zone_info_t = struct[9] of integer_t;		/* deprecated */
+type zone_info_array_t = array[] of zone_info_t; 	/* deprecated */
+
+type mach_zone_name_t = struct[80] of char;
+type mach_zone_name_array_t = array[] of mach_zone_name_t;
+
+type mach_zone_info_t = struct[8] of uint64_t;
+type mach_zone_info_array_t = array[] of mach_zone_info_t;
+
+type task_zone_info_t = struct[11] of uint64_t;
+type task_zone_info_array_t = array[] of task_zone_info_t;
 
 type hash_info_bucket_t = struct[1] of natural_t;
 type hash_info_bucket_array_t = array[] of hash_info_bucket_t;
diff --git a/osfmk/mach_debug/zone_info.h b/osfmk/mach_debug/zone_info.h
index 9d2182a29..277801d5d 100644
--- a/osfmk/mach_debug/zone_info.h
+++ b/osfmk/mach_debug/zone_info.h
@@ -63,8 +63,9 @@
 #include <mach/machine/vm_types.h>
 
 /*
- *	Remember to update the mig type definitions
- *	in mach_debug_types.defs when adding/removing fields.
+ *	Legacy definitions for host_zone_info().  This interface, and
+ *	these definitions have been deprecated in favor of the new
+ *	mach_zone_info() inteface and types below.
  */
 
 #define ZONE_NAME_MAX_LEN		80
@@ -90,4 +91,46 @@ typedef struct zone_info {
 
 typedef zone_info_t *zone_info_array_t;
 
+
+/*
+ *	Remember to update the mig type definitions
+ *	in mach_debug_types.defs when adding/removing fields.
+ */
+
+#define MACH_ZONE_NAME_MAX_LEN		80
+
+typedef struct mach_zone_name {
+	char		mzn_name[ZONE_NAME_MAX_LEN];
+} mach_zone_name_t;
+
+typedef mach_zone_name_t *mach_zone_name_array_t;
+
+typedef struct mach_zone_info_data {
+	uint64_t	mzi_count;	/* count of elements in use */
+	uint64_t	mzi_cur_size;	/* current memory utilization */
+	uint64_t	mzi_max_size;	/* how large can this zone grow */
+        uint64_t	mzi_elem_size;	/* size of an element */
+	uint64_t	mzi_alloc_size;	/* size used for more memory */
+	uint64_t	mzi_sum_size;	/* sum of all allocs (life of zone) */
+	uint64_t	mzi_exhaustible;	/* merely return if empty? */
+	uint64_t	mzi_collectable;	/* garbage collect elements? */
+} mach_zone_info_t;
+
+typedef mach_zone_info_t *mach_zone_info_array_t;
+
+typedef struct task_zone_info_data {
+	uint64_t	tzi_count;	/* count of elements in use */
+	uint64_t	tzi_cur_size;	/* current memory utilization */
+	uint64_t	tzi_max_size;	/* how large can this zone grow */
+        uint64_t	tzi_elem_size;	/* size of an element */
+	uint64_t	tzi_alloc_size;	/* size used for more memory */
+	uint64_t	tzi_sum_size;	/* sum of all allocs (life of zone) */
+	uint64_t	tzi_exhaustible;	/* merely return if empty? */
+	uint64_t	tzi_collectable;	/* garbage collect elements? */
+	uint64_t	tzi_caller_acct;	/* charged to caller (or kernel) */
+	uint64_t	tzi_task_alloc;	/* sum of all allocs by this task */
+	uint64_t	tzi_task_free;	/* sum of all frees by this task */
+} task_zone_info_t;
+
+typedef task_zone_info_t *task_zone_info_array_t;
 #endif	/* _MACH_DEBUG_ZONE_INFO_H_ */
diff --git a/osfmk/machine/Makefile b/osfmk/machine/Makefile
index d68ef2fbf..2170671dd 100644
--- a/osfmk/machine/Makefile
+++ b/osfmk/machine/Makefile
@@ -15,6 +15,8 @@ DATAFILES = \
 	lock.h	\
 	locks.h	\
 	machine_routines.h	\
+	pal_routines.h		\
+	pal_hibernate.h		\
 	simple_lock.h
 
 INSTALL_MI_LCL_LIST = cpu_capabilities.h
diff --git a/osfmk/machine/asm.h b/osfmk/machine/asm.h
index 70c246cb8..c43a64523 100644
--- a/osfmk/machine/asm.h
+++ b/osfmk/machine/asm.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_ASM_H
 #define _MACHINE_ASM_H
 
-#if defined (__ppc__)
-#include "ppc/asm.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/asm.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/ast.h b/osfmk/machine/ast.h
index 0c01fc7bc..b4880d25a 100644
--- a/osfmk/machine/ast.h
+++ b/osfmk/machine/ast.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_AST_H
 #define _MACHINE_AST_H
 
-#if defined (__ppc__)
-#include "ppc/ast.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/ast.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/ast_types.h b/osfmk/machine/ast_types.h
index fc7d1d229..57ae58bff 100644
--- a/osfmk/machine/ast_types.h
+++ b/osfmk/machine/ast_types.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_AST_TYPES_H
 #define _MACHINE_AST_TYPES_H
 
-#if defined (__ppc__)
-#include "ppc/ast_types.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/ast_types.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/commpage.h b/osfmk/machine/commpage.h
index c3d3a9989..d11521702 100644
--- a/osfmk/machine/commpage.h
+++ b/osfmk/machine/commpage.h
@@ -29,9 +29,7 @@
 #ifndef _MACHINE_COMMPAGE_H
 #define _MACHINE_COMMPAGE_H
 
-#if defined (__ppc__)
-#include "ppc/commpage/commpage.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/commpage/commpage.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/cpu_affinity.h b/osfmk/machine/cpu_affinity.h
index 990c856bc..5b3e47ac0 100644
--- a/osfmk/machine/cpu_affinity.h
+++ b/osfmk/machine/cpu_affinity.h
@@ -30,9 +30,7 @@
 #ifndef _MACHINE_CPU_AFFINITY_H
 #define _MACHINE_CPU_AFFINITY_H
 
-#if defined (__ppc__)
-#include "ppc/cpu_affinity.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/cpu_affinity.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/cpu_capabilities.h b/osfmk/machine/cpu_capabilities.h
index 606ec2898..a722dc93f 100644
--- a/osfmk/machine/cpu_capabilities.h
+++ b/osfmk/machine/cpu_capabilities.h
@@ -31,18 +31,14 @@
 #define _MACHINE_CPU_CAPABILITIES_H
 
 #ifdef KERNEL_PRIVATE
-#if defined (__ppc__)
-#include "ppc/cpu_capabilities.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/cpu_capabilities.h"
 #else
 #error architecture not supported
 #endif
 
 #else /* !KERNEL_PRIVATE -- System Framework header */
-#if defined (__ppc__) || defined(__ppc64__)
-#include <System/ppc/cpu_capabilities.h>
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include <System/i386/cpu_capabilities.h>
 #else
 #error architecture not supported
diff --git a/osfmk/machine/cpu_data.h b/osfmk/machine/cpu_data.h
index 0a047481f..347235ec9 100644
--- a/osfmk/machine/cpu_data.h
+++ b/osfmk/machine/cpu_data.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_CPU_DATA_H
 #define _MACHINE_CPU_DATA_H
 
-#if defined (__ppc__)
-#include "ppc/cpu_data.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/cpu_data.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/cpu_number.h b/osfmk/machine/cpu_number.h
index 47e71ba57..45c4b2b4d 100644
--- a/osfmk/machine/cpu_number.h
+++ b/osfmk/machine/cpu_number.h
@@ -30,9 +30,7 @@
 #ifndef _MACHINE_CPU_NUMBER_H
 #define _MACHINE_CPU_NUMBER_H
 
-#if defined (__ppc__)
-#include "ppc/cpu_number.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/cpu_number.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/db_machdep.h b/osfmk/machine/db_machdep.h
index ae38b4451..76ce9b313 100644
--- a/osfmk/machine/db_machdep.h
+++ b/osfmk/machine/db_machdep.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_DB_MACHDEP_H
 #define _MACHINE_DB_MACHDEP_H
 
-#if defined (__ppc__)
-#include "ppc/db_machdep.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/db_machdep.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/endian.h b/osfmk/machine/endian.h
index 5f9c0b9d8..5078c0fd7 100644
--- a/osfmk/machine/endian.h
+++ b/osfmk/machine/endian.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_ENDIAN_H
 #define _MACHINE_ENDIAN_H
 
-#if defined (__ppc__)
-#include "ppc/endian.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/endian.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/io_map_entries.h b/osfmk/machine/io_map_entries.h
index 8e9e9e456..49306bc16 100644
--- a/osfmk/machine/io_map_entries.h
+++ b/osfmk/machine/io_map_entries.h
@@ -30,9 +30,7 @@
 #ifndef _MACHINE_IO_MAP_ENTRIES_H_
 #define _MACHINE_IO_MAP_ENTRIES_H_
 
-#if defined (__ppc__)
-#include "ppc/io_map_entries.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/io_map_entries.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/lock.h b/osfmk/machine/lock.h
index 558e780d4..a870743a5 100644
--- a/osfmk/machine/lock.h
+++ b/osfmk/machine/lock.h
@@ -30,9 +30,7 @@
 #ifndef _MACHINE_LOCK_H_
 #define _MACHINE_LOCK_H_
 
-#if defined (__ppc__)
-#include "ppc/lock.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/lock.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/locks.h b/osfmk/machine/locks.h
index 786419581..ad7dcdcbe 100644
--- a/osfmk/machine/locks.h
+++ b/osfmk/machine/locks.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_LOCKS_H_
 #define _MACHINE_LOCKS_H_
 
-#if defined (__ppc__)
-#include "ppc/locks.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/locks.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/machine_cpu.h b/osfmk/machine/machine_cpu.h
index fdc556a16..734cf8f30 100644
--- a/osfmk/machine/machine_cpu.h
+++ b/osfmk/machine/machine_cpu.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_MACHINE_CPU_H
 #define _MACHINE_MACHINE_CPU_H
 
-#if defined (__ppc__)
-#include "ppc/machine_cpu.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/machine_cpu.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/machine_routines.h b/osfmk/machine/machine_routines.h
index a92705fed..361dee046 100644
--- a/osfmk/machine/machine_routines.h
+++ b/osfmk/machine/machine_routines.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_MACHINE_ROUTINES_H
 #define _MACHINE_MACHINE_ROUTINES_H
 
-#if defined (__ppc__)
-#include "ppc/machine_routines.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "i386/machine_routines.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/machine_rpc.h b/osfmk/machine/machine_rpc.h
index 0fe29c9d7..c158a0c19 100644
--- a/osfmk/machine/machine_rpc.h
+++ b/osfmk/machine/machine_rpc.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_MACHINE_RPC_H
 #define _MACHINE_MACHINE_RPC_H
 
-#if defined (__ppc__)
-#include "ppc/machine_rpc.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/machine_rpc.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/machlimits.h b/osfmk/machine/machlimits.h
index f9d468434..fee4ddf0c 100644
--- a/osfmk/machine/machlimits.h
+++ b/osfmk/machine/machlimits.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_MACHLIMITS_H
 #define _MACHINE_MACHLIMITS_H
 
-#if defined (__ppc__)
-#include "ppc/machlimits.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/machlimits.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/machparam.h b/osfmk/machine/machparam.h
index 3e8325307..9657b8cce 100644
--- a/osfmk/machine/machparam.h
+++ b/osfmk/machine/machparam.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_MACHPARAM_H
 #define _MACHINE_MACHPARAM_H
 
-#if defined (__ppc__)
-#include "ppc/machparam.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/machparam.h"
 #else
 #error architecture not supported
diff --git a/libsyscall/mach/ppc/mach_absolute_time.s b/osfmk/machine/pal_hibernate.h
similarity index 81%
rename from libsyscall/mach/ppc/mach_absolute_time.s
rename to osfmk/machine/pal_hibernate.h
index 2f4da835f..238896dc5 100644
--- a/libsyscall/mach/ppc/mach_absolute_time.s
+++ b/osfmk/machine/pal_hibernate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,13 +25,13 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+#ifndef _MACHINE_PAL_HIBERNATE_H
+#define _MACHINE_PAL_HIBERNATE_H
 
-#define	__APPLE_API_PRIVATE
-#include <machine/cpu_capabilities.h>
-#undef	__APPLE_API_PRIVATE
+#if defined (__i386__) || defined(__x86_64__)
+#include "i386/pal_hibernate.h"
+#else
+#error architecture not supported
+#endif
 
-.text
-.align 4
-.globl _mach_absolute_time
-_mach_absolute_time:
-    ba	_COMM_PAGE_ABSOLUTE_TIME
+#endif /* _MACHINE_PAL_HIBERNATE_H */
diff --git a/iokit/IOKit/machine/IOSharedLockImp.h b/osfmk/machine/pal_routines.h
similarity index 85%
rename from iokit/IOKit/machine/IOSharedLockImp.h
rename to osfmk/machine/pal_routines.h
index ec0c90f2c..755b532e9 100644
--- a/iokit/IOKit/machine/IOSharedLockImp.h
+++ b/osfmk/machine/pal_routines.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,12 +25,13 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+#ifndef _MACHINE_PAL_ROUTINES_H
+#define _MACHINE_PAL_ROUTINES_H
 
-#if defined (__ppc__)
-#include "IOKit/ppc/IOSharedLockImp.h"
-#elif defined (__i386__) || defined (__x86_64__)
-#include "IOKit/i386/IOSharedLockImp.h"
+#if defined (__i386__) || defined(__x86_64__)
+#include "i386/pal_routines.h"
 #else
 #error architecture not supported
 #endif
 
+#endif /* _MACHINE_PAL_ROUTINES_H */
diff --git a/osfmk/machine/pmap.h b/osfmk/machine/pmap.h
index b6290032a..78bef764e 100644
--- a/osfmk/machine/pmap.h
+++ b/osfmk/machine/pmap.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_PMAP_H
 #define _MACHINE_PMAP_H
 
-#if defined (__ppc__)
-#include "ppc/pmap.h"
-#elif defined (__x86_64__) || defined (__i386__)
+#if defined (__x86_64__) || defined (__i386__)
 #include "i386/pmap.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/sched_param.h b/osfmk/machine/sched_param.h
index 04c23c73c..2c5dc0d0d 100644
--- a/osfmk/machine/sched_param.h
+++ b/osfmk/machine/sched_param.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_SCHED_PARAM_H
 #define _MACHINE_SCHED_PARAM_H
 
-#if defined (__ppc__)
-#include "ppc/sched_param.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/sched_param.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/setjmp.h b/osfmk/machine/setjmp.h
index c59703092..142e4f677 100644
--- a/osfmk/machine/setjmp.h
+++ b/osfmk/machine/setjmp.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_SETJMP_H
 #define _MACHINE_SETJMP_H
 
-#if defined (__ppc__)
-#include "ppc/setjmp.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/setjmp.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/simple_lock.h b/osfmk/machine/simple_lock.h
index 799b74c9b..30e2b44c5 100644
--- a/osfmk/machine/simple_lock.h
+++ b/osfmk/machine/simple_lock.h
@@ -30,9 +30,7 @@
 #ifndef _MACHINE_SIMPLE_LOCK_H_
 #define _MACHINE_SIMPLE_LOCK_H_
 
-#if defined (__ppc__)
-#include "ppc/simple_lock.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/simple_lock.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/task.h b/osfmk/machine/task.h
index faf4ba5ac..3e9fc821a 100644
--- a/osfmk/machine/task.h
+++ b/osfmk/machine/task.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_TASK_H
 #define _MACHINE_TASK_H
 
-#if defined (__ppc__)
-#include "ppc/task.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/task.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/thread.h b/osfmk/machine/thread.h
index 5eeccbd33..840d103b7 100644
--- a/osfmk/machine/thread.h
+++ b/osfmk/machine/thread.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_THREAD_H
 #define _MACHINE_THREAD_H
 
-#if defined (__ppc__)
-#include "ppc/thread.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/thread.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/timer.h b/osfmk/machine/timer.h
index a80a74b72..ba8e5b5ff 100644
--- a/osfmk/machine/timer.h
+++ b/osfmk/machine/timer.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_TIMER_H
 #define _MACHINE_TIMER_H
 
-#if defined (__ppc__)
-#include "ppc/timer.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/timer.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/trap.h b/osfmk/machine/trap.h
index 54298707b..5fb2aa18d 100644
--- a/osfmk/machine/trap.h
+++ b/osfmk/machine/trap.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_TRAP_H
 #define _MACHINE_TRAP_H
 
-#if defined (__ppc__)
-#include "ppc/trap.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/trap.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/vm_tuning.h b/osfmk/machine/vm_tuning.h
index a5906bb68..324d9d25a 100644
--- a/osfmk/machine/vm_tuning.h
+++ b/osfmk/machine/vm_tuning.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_VM_TUNING_H
 #define _MACHINE_VM_TUNING_H
 
-#if defined (__ppc__)
-#include "ppc/vm_tuning.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/vm_tuning.h"
 #else
 #error architecture not supported
diff --git a/osfmk/machine/xpr.h b/osfmk/machine/xpr.h
index 089a5cc65..ad747c34f 100644
--- a/osfmk/machine/xpr.h
+++ b/osfmk/machine/xpr.h
@@ -28,9 +28,7 @@
 #ifndef _MACHINE_XPR_H
 #define _MACHINE_XPR_H
 
-#if defined (__ppc__)
-#include "ppc/xpr.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "i386/xpr.h"
 #else
 #error architecture not supported
diff --git a/osfmk/pmc/pmc.c b/osfmk/pmc/pmc.c
index 43da760a1..f5a894823 100644
--- a/osfmk/pmc/pmc.c
+++ b/osfmk/pmc/pmc.c
@@ -38,11 +38,6 @@
 #include <i386/mp.h>
 #endif
 
-#if defined(__ppc__)
-#include <ppc/cpu_internal.h>
-#include <ppc/machine_cpu.h>
-#endif
-
 #if CONFIG_COUNTERS
 
 /* various debug logging enable */
@@ -224,13 +219,13 @@ static volatile uint32_t perf_counters_count = 0U;
  * constitute a conflict.
  */
 static queue_t system_reservations = NULL;
-static volatile uint32_t system_reservation_count __attribute__((aligned(4))) = 0U;
+static volatile uint32_t system_reservation_count = 0U;
 
 static queue_t task_reservations = NULL;
-static volatile uint32_t task_reservation_count __attribute__((aligned(4))) = 0U;
+static volatile uint32_t task_reservation_count = 0U;
 
 static queue_t thread_reservations = NULL;
-static volatile uint32_t thread_reservation_count __attribute__((aligned(4))) = 0U;
+static volatile uint32_t thread_reservation_count = 0U;
 
 
 #if XNU_KERNEL_PRIVATE
@@ -928,6 +923,7 @@ static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
 			case PMC_FLAG_SCOPE_SYSTEM:
 				/* Simply add it to the system queue */
 				pmc_internal_reservation_enqueue(system_reservations, resv);
+				system_reservation_count++;
 				
 				lck_spin_unlock(&reservations_spin);
 
@@ -939,6 +935,7 @@ static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
 
 				/* Not only do we enqueue it in our local queue for tracking */
 				pmc_internal_reservation_enqueue(task_reservations, resv);
+				task_reservation_count++;
 
 				lck_spin_unlock(&reservations_spin);
 
@@ -956,6 +953,7 @@ static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
 				 */
 
 				pmc_internal_reservation_enqueue(thread_reservations, resv);
+				thread_reservation_count++;
 
 				lck_spin_unlock(&reservations_spin);
 				
@@ -998,22 +996,6 @@ static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, vo
 			
 			/* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */
 			mp_cpus_call(mask, ASYNC, action_func, reservation);
-#elif defined(__ppc__)
-			size_t ii;
-
-			if (core_cnt > 0) {
-				for (ii = 0; ii < core_cnt; ii++) {
-					if (cores[ii] == (uint32_t)cpu_number()) {
-						action_func(reservation);
-					} else {
-						cpu_signal(cores[ii], SIGPcall, (uint32_t)action_func, (uint32_t)reservation);
-					}
-				}
-			} else {
-				uint32_t sync;
-				cpu_broadcast(&sync, (void (*)(uint32_t))action_func, (uint32_t)reservation);
-				action_func(reservation);
-			}
 #else
 #error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
 #endif
@@ -1044,6 +1026,7 @@ static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
 		case PMC_FLAG_SCOPE_SYSTEM:
 			lck_spin_lock(&reservations_spin);
 			pmc_internal_reservation_dequeue(system_reservations, resv);
+			system_reservation_count--;
 			lck_spin_unlock(&reservations_spin);
 			break;
 
@@ -1054,6 +1037,7 @@ static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
 
 			/* remove from the global queue */
 			pmc_internal_reservation_dequeue(task_reservations, resv);
+			task_reservation_count--;
 
 			/* unlock the global */
 			lck_spin_unlock(&reservations_spin);
@@ -1066,6 +1050,7 @@ static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
 			lck_spin_lock(&reservations_spin);
 
 			pmc_internal_reservation_dequeue(thread_reservations, resv);
+			thread_reservation_count--;
 
 			lck_spin_unlock(&reservations_spin);
 
diff --git a/osfmk/pmc/pmc.h b/osfmk/pmc/pmc.h
index ab396a9c6..72692fa54 100644
--- a/osfmk/pmc/pmc.h
+++ b/osfmk/pmc/pmc.h
@@ -34,6 +34,8 @@ extern "C" {
 #include <mach/mach_time.h>
 #include <mach/mach_types.h>
 
+#include <libkern/version.h>
+
 /****************************************************************************
  * The four main object types
  *
@@ -336,6 +338,14 @@ typedef struct pmc_methods {
  * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance monitor
  * driver, KERN_INVALID_ARGUMENT if one or both of the arguments is null
  */
+
+/* Prevent older AppleProfileFamily kexts from loading on newer kernels.
+ * Alas, C doesn't necessarily have a cleaner way to do the version number concatenation
+ */
+#define PERF_REG_NAME1(a, b) a ## b
+#define PERF_REG_NAME(a, b) PERF_REG_NAME1(a, b)
+#define perf_monitor_register PERF_REG_NAME(perf_monitor_register_, VERSION_MAJOR)
+
 kern_return_t perf_monitor_register(perf_monitor_object_t monitor, perf_monitor_methods_t *methods);
 
 /*!fn
diff --git a/osfmk/ppc/AltiAssist.s b/osfmk/ppc/AltiAssist.s
deleted file mode 100644
index 6ff23acba..000000000
--- a/osfmk/ppc/AltiAssist.s
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 																							
- 	AltiAssist.s 
-
-	Do the VMX assists
-
-	Lovingly crafted by Bill Angell using traditional methods and only natural or recycled materials.
-	No animal products are used other than rendered otter bile and deep fried pork lard.
-
-*/
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <mach/machine/vm_param.h>
-#include <assym.s>
-
-;
-;
-;			General stuff what happens here:
-;				1)	All general context saved, interrupts off, translation off
-;				2)	Vector and floating point disabled, but there may be live context.
-;					This code is responsible for saving and restoring what is used. This
-;					includes exception states, java mode, etc.
-;				3)	No attempt is made to resolve page faults.  PTE misses are handled
-;					automatically, but actual faults (ala copyin/copyout) are not. If 
-;					a fault does occur, the exception that caused entry to the emulation
-;					routine is remapped to either an instruction or data miss (depending
-;					upon the stage detected) and redriven through the exception handler.
-;					The only time that an instruction fault can happen is when a different
-;					processor removes a mapping between our original fault and when we
-;					fetch the assisted instruction. For an assisted instruction, data
-;					faults should not occur (except in the MP case).  For a purely
-;					emulated instruction, faults can occur.
-;
-;			Emulation algorithms cloned from MacOS 9 code.
-;
-;			Assumes that R2 = per_proc_area
-;
-;
-
-
-			.align	5
-			.globl	EXT(AltivecAssist)
-
-LEXT(AltivecAssist)
-
-			li			r10,emvr0					; Point to the vector savearea
-			
-			li			r11,emvr1					; Another savearea
-			stvxl		v0,r10,r2					; Save V0
-			stvxl		v1,r11,r2					; Save V1
-			vspltisw	v0,1						; Set a 1 in V0			
-			vspltisw	v1,8						; Get half of the shift
-			vslw		v0,v0,v1					; Shift half way
-			vslw		v0,v0,v1					; Shift the rest of the way (we now have 0x00010000)
-			mfvscr		v1							; Get the VSCR
-			vor			v1,v1,v0					; Turn off Java mode
-			lvxl		v0,r10,r2					; Restore V0
-			mtvscr		v1							; Set Java mode off
-			lvxl		v1,r11,r2					; Restore V1
-			
-			li			r11,T_IN_VAIN				; We are all done
-			b			EXT(EmulExit)				; We are done, no tracing on...
-
diff --git a/osfmk/ppc/Diagnostics.c b/osfmk/ppc/Diagnostics.c
deleted file mode 100644
index d6aa269c8..000000000
--- a/osfmk/ppc/Diagnostics.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-/*
- *	Author: Bill Angell, Apple
- *	Date:	9/auht-aught
- *
- * Random diagnostics
- */
-
-
-#include <kern/machine.h>
-#include <kern/processor.h>
-#include <mach/machine.h>
-#include <mach/processor_info.h>
-#include <mach/mach_types.h>
-#include <mach/boolean.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <kern/ipc_kobject.h>
-#include <mach/vm_param.h>
-#include <ipc/port.h>
-#include <ipc/ipc_entry.h>
-#include <ipc/ipc_space.h>
-#include <ipc/ipc_object.h>
-#include <ipc/ipc_port.h>
-#include <ipc/ipc_right.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <vm/pmap.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#include <ppc/db_low_trace.h>
-#include <ppc/mappings.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/savearea.h>
-#include <ppc/Diagnostics.h>
-#include <pexpert/pexpert.h>
-#include <console/video_console.h>
-#include <ppc/trap.h>
-
-extern struct vc_info vinfo;
-extern uint32_t warFlags;
-#define warDisMBpoff	0x80000000
-
-kern_return_t testPerfTrap(int trapno, struct savearea *ss, 
-	unsigned int dsisr, addr64_t dar);
-
-
-int diagCall(struct savearea *save) {
-
-	union {
-		unsigned long long tbase;
-		unsigned int tb[2];
-	} ttt, adj;
-	natural_t tbu, tbu2, tbl;
-	struct per_proc_info *per_proc;					/* Area for my per_proc address */
-	int cpu, ret, subc;
-	unsigned int temp, temp2, *baddr, oldwar;
-	addr64_t src, snk;
-	uint64_t srrwrk;
-	scomcomm sarea;
-	ipc_port_t port;
-	ipc_entry_t ientry;
-	processor_t prssr;
-	vm_address_t addrs;
-	
-
-	if(!(dgWork.dgFlags & enaDiagSCs)) return 0;	/* If not enabled, cause an exception */
-
-	switch(save->save_r3) {							/* Select the routine */
-	
-/*
- *		Adjust the timebase for drift recovery testing
- */
-		case dgAdjTB:								/* Adjust the timebase */
-
-			adj.tb[0] = 0;							/* Clear high part */
-			adj.tb[1] = save->save_r4;				/* Set low order */
-			if(adj.tb[1] & 0x80000000) adj.tb[0] = 0xFFFFFFFF;	/* Propagate sign bit */
-						
-			do {									/* Read current time */
-				asm volatile("	mftbu %0" : "=r" (tbu));
-				asm volatile("	mftb %0" : "=r" (tbl));
-				asm volatile("	mftbu %0" : "=r" (tbu2));
-			} while (tbu != tbu2);
-			
-			ttt.tb[0] = tbu;						/* Set high */
-			ttt.tb[1] = tbl;						/* Set low */
-			
-			ttt.tbase = ttt.tbase + adj.tbase;		/* Increment or decrement the TB */
-			
-			tbu = ttt.tb[0];						/* Save in regular variable */
-			tbl = ttt.tb[1];						/* Save in regular variable */
-
-			mttb(0);								/* Set low to keep from ticking */
-			mttbu(tbu);								/* Set adjusted high */
-			mttb(tbl);								/* Set adjusted low */
-			
-			return -1;								/* Return no AST checking... */
-			
-/*
- *		Return physical address of a page
- */
-		case dgLRA:
-		
-			save->save_r3 = pmap_find_phys(current_thread()->map->pmap, save->save_r4);	/* Get read address */
-			
-			return -1;								/* Return no AST checking... */
-			
-/*
- *		Copy physical to virtual
- */
-		case dgpcpy:
-		
-		
-#if 1
-			src = (save->save_r4 << 32) | (0x00000000FFFFFFFFULL & save->save_r5);	/* Merge into 64-bit */
-			snk = (save->save_r6 << 32) | (0x00000000FFFFFFFFULL & save->save_r7);	/* Merge into 64-bit */
-			save->save_r3 = copypv(src, snk, save->save_r8, save->save_r9);	/* Copy the physical page */
-#endif			
-			return 1;								/* Return and check for ASTs... */
-			
-/*
- *		Read/Write physical memory
- */
-		case dgprw:
-		
-			src = (save->save_r5 << 32) | (0x00000000FFFFFFFFULL & save->save_r6);	/* Merge into 64-bit */
-			
-			switch(save->save_r4) {					/* Select the actual function */
-
-				case 0:
-					save->save_r3 = (uint64_t)ml_phys_read_byte((unsigned int)src);
-					break;
-			
-				case 1:
-					save->save_r3 = (uint64_t)ml_phys_read_byte_64(src);
-					break;
-				
-				case 2:
-					save->save_r3 = (uint64_t)ml_phys_read((unsigned int)src);
-					break;
-				
-				case 3:
-					save->save_r3 = (uint64_t)ml_phys_read_64(src);
-					break;
-
-				case 4:
-					ml_phys_write_byte((unsigned int)src, (unsigned int)save->save_r7);
-					break;
-			
-				case 5:
-					ml_phys_write_byte_64(src, (unsigned int)save->save_r7);
-					break;
-				
-				case 6:
-					ml_phys_write((unsigned int)src, (unsigned int)save->save_r7);
-					break;
-				
-				case 7:
-					ml_phys_write_64(src, (unsigned int)save->save_r7);
-					break;
-			}
-
-			return 1;								/* Return and check for ASTs... */
-			
-			
-/*
- *		Soft reset processor
- */
-		case dgreset:
-		
-			cpu = save->save_r4;					/* Get the requested CPU number */
-			
-			if(cpu >= MAX_CPUS) {						/* Check for bogus cpu */
-				save->save_r3 = KERN_FAILURE;		/* Set failure */
-				return 1;
-			}
-		
-			per_proc = PerProcTable[cpu].ppe_vaddr;		/* Point to the processor */
-			if(!per_proc->running) return KERN_FAILURE;	/* It is not running */	
-
-			
-			(void)PE_cpu_start(per_proc->cpu_id, 
-						per_proc->start_paddr, (vm_offset_t)per_proc);
-			
-			save->save_r3 = KERN_SUCCESS;			/* Set scuuess */
-
-			return 1;								/* Return and check for ASTs... */
-
-/*
- *		Force cache flush
- */
-		case dgFlush:
-		
-			cacheInit();							/* Blow cache */
-			return 1;								/* Return and check for ASTs... */
-
-/*
- *		various hack tests
- */
-		case dgtest:
-		
-			kprintf("Trying to hang\n");
-			baddr = (unsigned *)((unsigned)&baddr | 1); /* Make an odd address */
-			__asm__ volatile("lwarx r2,0,%0" : : "r" (baddr));
-			kprintf("Didn't hang\n");
-
-			return 1;								/* Return and check for ASTs... */
-			
-		
-
-/*
- *		Create a physical block map into the current task
- *		Don't bother to check for any errors.
- *		parms - vaddr, paddr, size, prot, attributes
- */
-		case dgBMphys:
-					
-			pmap_map_block(current_thread()->map->pmap, (addr64_t)save->save_r4,	/* Map in the block */ 
-				save->save_r5, save->save_r6, save->save_r7, save->save_r8, 0);
-
-			return 1;								/* Return and check for ASTs... */
-		
-
-/*
- *		Remove any mapping from the current task
- *		Don't bother to check for any errors.
- *		parms - vaddr
- */
-		case dgUnMap:
-		
-			(void)mapping_remove(current_thread()->map->pmap, save->save_r4);	/* Remove mapping */
-			return 1;								/* Return and check for ASTs... */
-	
-			
-/*
- *		Allows direct control of alignment handling.
- *
- *		The bottom bit of the parameter is used to set the control bit, enaNotifyEM.
- */
-		case dgAlign:
-		
-			temp = dgWork.dgFlags;				/* Save the old values */
-			
-			temp2 = (save->save_r4 & 1) << (31 - enaNotifyEMb);	/* Move parms into flag format */
-			dgWork.dgFlags = (temp & ~enaNotifyEM) | temp2;	/* Set the flag */
-		
-			save->save_r3 = (temp >> (31 - enaNotifyEMb)) & 1;	/* Return the original */
-			
-			return 1;								/* Return and check for ASTs... */
-			
-/*
- *		Return info for boot screen
- */
-		case dgBootScreen:
-			
-			ml_set_interrupts_enabled(1);
-			(void)copyout((char *)&vinfo, save->save_r4, sizeof(struct vc_info));	/* Copy out the video info */ 
-			ml_set_interrupts_enabled(0);
-			return 1;								/* Return and check for ASTs... */
-			
-/*
- *		Don't return info for boot screen
- */
-		case dgCPNull:
-			
-			ml_set_interrupts_enabled(1);
-			(void)copyout((char *)&vinfo, save->save_r4, 0);	/* Copy out nothing */
-			ml_set_interrupts_enabled(0);
-			return 1;								/* Return and check for ASTs... */
-			
-/*
- *		Test machine check handler - only on 64-bit machines
- */
-		case dgmck:
-			if(!(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)) return 0;	/* Leave if not correct machine */
-
-			fwEmMck(save->save_r4, save->save_r5, save->save_r6, save->save_r7, save->save_r8, save->save_r9);	/* Start injecting */ 
-
-			return -1;								/* Return and don't check for ASTs... */
-
-/*
- *		Set 64-bit on or off - only on 64-bit machines
- */
-		case dg64:
-			if(!(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)) return 0;	/* Leave if not correct machine */
-
-			srrwrk = save->save_srr1 >> 63;			/* Save the old 64-bit bit */
-			
-			save->save_srr1 = (save->save_srr1 & 0x7FFFFFFFFFFFFFFFULL) | (save->save_r4 << 63);	/* Set the requested mode */
-			save->save_r3 = srrwrk;					/* Return the old value */
-
-			task_clear_64BitAddr(current_thread()->task);
-			if((save->save_r4 & 1)) task_set_64BitAddr(current_thread()->task);
-
-			return -1;								/* Return and don't check for ASTs... */
-		
-/*
- *		Test the probe read function
- */
-
-		case dgProbeRead:
-		
-			src = (save->save_r4 << 32) | (0x00000000FFFFFFFFULL & save->save_r5);	/* Merge into 64-bit */
-			save->save_r3 = ml_probe_read_64(src, &temp);	/* Try the address */
-			save->save_r4 = temp;					/* Return the data */
-			return -1;								/* Regurn and don't check for ASTs */
-		
-/*
- *		Do perf monitor stuff
- */
-
-		case dgPerfMon:
-		
-			setPmon(save->save_r4, save->save_r5);	/* Go load up MMCR0 and MMCR1 */
-			return -1;								/* Regurn and don't check for ASTs */
-
-/*
- *		Map a page
- *		Don't bother to check for any errors.
- *		parms - vaddr, paddr, prot, attributes
- */
-		case dgMapPage:
-					
-			(void)mapping_make(current_thread()->map->pmap, /* Map in the page */ 
-				(addr64_t)(((save->save_r5 & 0xFFFFFFFF) << 32) | (save->save_r5 & 0xFFFFFFFF)), save->save_r6, 0, 1, VM_PROT_READ|VM_PROT_WRITE);
-
-			return -1;								/* Return and check for ASTs... */
-		
-/*
- *		SCOM interface
- *		parms - pointer to scomcomm
- */
-		case dgScom:
-					
-			ret = copyin(save->save_r4, (void *)&sarea, sizeof(scomcomm));	/* Get the data */
-			if(ret) return 0;						/* Copyin failed - return an exception */
-			
-			sarea.scomstat = 0xFFFFFFFFFFFFFFFFULL;	/* Clear status */
-			cpu = cpu_number();						/* Get us */
-			
-			if((sarea.scomcpu < real_ncpus) && PerProcTable[sarea.scomcpu].ppe_vaddr->running) {
-				if(sarea.scomcpu == cpu) {			/* Is it us? */
-					if(sarea.scomfunc) {			/* Are we writing */
-						sarea.scomstat = ml_scom_write(sarea.scomreg, sarea.scomdata);	/* Write scom */
-					}
-					else {
-						sarea.scomstat = ml_scom_read(sarea.scomreg, &sarea.scomdata);	/* Read scom */
-					}
-				}
-				else {									/* Otherwise, tell the other processor */
-					(void)cpu_signal(sarea.scomcpu, SIGPcpureq, CPRQscom ,(unsigned int)&sarea);	/* Ask him to do this */
-					(void)hw_cpu_sync((unsigned int*)&sarea.scomstat, LockTimeOut);	/* Wait for the other processor to get its temperature */
-				}
-			}
-
-			ret = copyout((void *)&sarea, save->save_r4, sizeof(scomcomm));	/* Get the data */
-			if(ret) return 0;						/* Copyin failed - return an exception */
-	
-			return -1;								/* Return and check for ASTs... */
-		
-/*
- *		Bind current thread to a processor. Parm is processor port.  If port is 0, unbind. 
- */
-	
-		case dgBind:
-
-			if(save->save_r4 == 0) {				/* Are we unbinding? */
-				thread_bind(PROCESSOR_NULL);		/* Unbind us */
-				save->save_r3 = KERN_SUCCESS;		/* Set success */
-				return -1;							/* Return and check asts */
-			}
-
-			ret = ipc_right_lookup_write(current_space(), (mach_port_name_t)save->save_r4, 
-				&ientry);							/* Look up the IPC entry */
-			
-			if(ret != KERN_SUCCESS) {				/* Couldn't find it */
-				save->save_r3 = ret;				/* Pass back return */
-				return -1;							/* Return and check asts */
-			}
-
-			port = (ipc_port_t)ientry->ie_object;	/* Get the actual port */
-
-			if (!ip_active(port) || (ip_kotype(port) != IKOT_PROCESSOR)) {	/* Active and a processor? */
-				is_write_unlock(current_space());	/* Unlock the space */
-				save->save_r3 = KERN_INVALID_ARGUMENT;	/* This port is not a processor */
-				return -1;							/* Return and check asts */
-			}
-
-			prssr = (processor_t)port->ip_kobject;	/* Extract the processor */
-			is_write_unlock(current_space());		/* All done with the space now, unlock it */
-			
-/*
- *			The following probably isn't valid if a processor is in the processor going offline,
- *			but who cares, this is a diagnostic interface...
- */
-			
-			if(prssr->state == PROCESSOR_SHUTDOWN) {	/* Are we trying to bind to an offline processor? */
-				save->save_r3 = KERN_INVALID_ARGUMENT;	/* This processor is offline */
-				return -1;							/* Return and check asts */
-			}
-		
-			thread_bind(prssr);						/* Bind us to the processor */
-			thread_block(THREAD_CONTINUE_NULL);		/* Make it so */
-	
-			save->save_r3 = KERN_SUCCESS;			/* Set success */
-			return -1;								/* Return and check asts */
-			
-/*
- *		Return per_proc for the named processor.  Pass in a port.  Returns per_proc or 0 if failure
- */
-	
-		case dgPproc:
-
-			ret = ipc_right_lookup_write(current_space(), (mach_port_name_t)save->save_r4, 
-				&ientry);							/* Look up the IPC entry */
-			
-			if(ret != KERN_SUCCESS) {				/* Couldn't find it */
-				save->save_r3 = 0;					/* Pass back return */
-				return -1;							/* Return and check asts */
-			}
-
-			port = (ipc_port_t)ientry->ie_object;	/* Get the actualy port */
-
-			if (!ip_active(port) || (ip_kotype(port) != IKOT_PROCESSOR)) {	/* Active and a processor? */
-				is_write_unlock(current_space());	/* Unlock the space */
-				save->save_r3 = 0;					/* This port is not a processor */
-				return -1;							/* Return and check asts */
-			}
-
-			prssr = (processor_t)port->ip_kobject;	/* Extract the processor */
-			is_write_unlock(current_space());		/* All done with the space now, unlock it */
-			
-			save->save_r3 = (uint64_t)(uint32_t)PerProcTable[prssr->cpu_id].ppe_vaddr;	/* Pass back ther per proc */
-			return -1;								/* Return and check asts */
-
-/*
- *		Allocate contiguous memory in the kernel. Pass in size, pass back vaddr or 0 for error
- *		Note that this must be explicitly released by the user.  There is an "issue"
- *		if we try to allocate directly into the user: the contiguous area has a kernel wire
- *		on it.   If we terminate, we will hang waiting for wire to be released.  Ain't no
- *		way that will happen,  so we do it in the kernel and make them release it.  That way
- *		we will leak rather than hang. 
- *		
- */
-		case dgAcntg:
-					
-			addrs = 0;								/* Clear just in case */
-			
-			ret = kmem_alloc_contig(kernel_map, &addrs, (vm_size_t)save->save_r4,
-						PAGE_MASK, 0, 0, FALSE);						/* That which does not make us stronger, kills us... */
-			if(ret != KERN_SUCCESS) addrs = 0;		/* Pass 0 if error */
-		
-			save->save_r3 = (uint64_t)addrs;		/* Pass back whatever */
-			return -1;								/* Return and check for ASTs... */
-		
-
-/*
- *		Return physical address of a page in the kernel
- */
-		case dgKlra:
-		
-			save->save_r3 = pmap_find_phys(kernel_pmap, save->save_r4);	/* Get read address */
-			return -1;								/* Return no AST checking... */
-
-/*
- *		Release kernel memory - intent is to release congiguous memory
- */
-		case dgKfree:
-		
-			kmem_free( kernel_map, (vm_address_t) save->save_r4, (vm_size_t)save->save_r5);
-			return -1;								/* Return no AST checking... */
-
-		
-		case dgWar:									/* Set or reset workaround flags */
-		
-			save->save_r3 = (uint32_t)warFlags;		/* Get the old flags */
-			oldwar = warFlags;						/* Remember the old war flags */
-			
-			subc = (int32_t)save->save_r4;			/* Extract the subcommand */
-			switch(subc) {							/* Do what we need */
-				case 1:								/* Replace all */
-					warFlags = (uint32_t)save->save_r5;	/* Do them all */
-					break;
-				
-				case 2:								/* Turn on selected workarounds */
-					warFlags = warFlags | (uint32_t)save->save_r5;
-					break;
-					
-				case 3:								/* Turn off selected workarounds */
-					warFlags = warFlags & ~((uint32_t)save->save_r5);
-					break;
-				
-				case 4:								/* Start up selected workaround */
-					break;
-				
-				case 5:								/* Stop selected workaround */
-					break;
-				
-				case 6:								/* Reset specific workaround parameters to default */
-					break;
-				
-				case 7:								/* Set workaround parameters */
-					break;
-
-				default:
-				
-					break;
-					
-			}
-
-			save->save_r3 = oldwar;					/* Pass back original */
-			return -1;				
-
-
-		default:									/* Handle invalid ones */
-			return 0;								/* Return an exception */
-		
-	}
-
-};
-
-kern_return_t
-testPerfTrap(int trapno, struct savearea *ss, unsigned int dsisr, addr64_t dar)
-{
-
-	if(trapno != T_ALIGNMENT) return KERN_FAILURE;
-
-	kprintf("alignment exception at %08llX, srr1 = %08llX, dsisr = %08X, dar = %08llX\n",
-			ss->save_srr0, ss->save_srr1, dsisr, dar);
-		
-	return KERN_SUCCESS;
-}
-
diff --git a/osfmk/ppc/Diagnostics.h b/osfmk/ppc/Diagnostics.h
deleted file mode 100644
index 17e31a323..000000000
--- a/osfmk/ppc/Diagnostics.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-/*
- * Here are the Diagnostic interface interfaces
- * Lovingly crafted by Bill Angell using traditional methods
- *	Keep selectors in sync with the x86 version where possible.	
-*/
-#ifdef	KERNEL_PRIVATE
-
-#ifndef _DIAGNOSTICS_H_
-#define _DIAGNOSTICS_H_
-
-#ifndef __ppc__
-#error This file is only useful on PowerPC.
-#endif
-#include <ppc/savearea.h>
-
-int diagCall(struct savearea *save);
-
-#define diagSCnum 0x00006000
-
-#define dgAdjTB 0
-#define dgLRA 1
-#define dgpcpy 2
-#define dgreset 3
-#define dgtest 4
-#define dgBMphys 5
-#define dgUnMap 6
-#define dgBootScreen 7
-#define dgFlush 8
-#define dgAlign 9
-#define dgprw 10
-#define dgmck 11
-#define dg64 12
-#define dgProbeRead 13
-#define dgCPNull 14
-#define dgPerfMon 15
-#define dgMapPage 16
-#define dgScom 17
-#define dgBind 18
-#define dgPproc 19
-#define dgAcntg 20
-#define dgKlra 21
-#define dgKfree 22
-#define	dgWar 23
-
-
-typedef struct diagWork {			/* Diagnostic work area */
-
-	unsigned int dgLock;			/* Lock if needed */
-	unsigned int dgFlags;			/* Flags */
-#define enaExpTrace 0x00000001
-#define enaExpTraceb 31
-#define enaUsrFCall 0x00000002
-#define enaUsrFCallb 30
-#define enaUsrPhyMp 0x00000004
-#define enaUsrPhyMpb 29
-#define enaDiagSCs  0x00000008
-#define enaDiagSCsb  28
-#define enaDiagDM  0x00000010
-#define enaDiagSDMb  27
-#define enaDiagEM  0x00000020
-#define enaDiagEMb  26
-#define enaDiagTrap  0x00000040
-#define enaDiagTrapb  25
-#define enaNotifyEM  0x00000080
-#define enaNotifyEMb  24
-	
-	unsigned int dgMisc0;
-	unsigned int dgMisc1;
-	unsigned int dgMisc2;
-	unsigned int dgMisc3;
-	unsigned int dgMisc4;
-	unsigned int dgMisc5;
-
-} diagWork;
-
-typedef struct scomcomm {
-	uint16_t	scomcpu;	/* CPU number */
-	uint16_t	scomfunc;	/* 0 = read; 1 = write */
-	uint32_t	scomreg;	/* SCOM register */
-	uint64_t	scomstat;	/* returned status */
-	uint64_t	scomdata;	/* input for write,  output for read */
-} scomcomm;
-
-extern diagWork dgWork;
-extern int diagTrap(struct savearea *, unsigned int);
-
-
-#endif /* _DIAGNOSTICS_H_ */
-
-#endif /* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/Emulate.s b/osfmk/ppc/Emulate.s
deleted file mode 100644
index 76ea4eb1c..000000000
--- a/osfmk/ppc/Emulate.s
+++ /dev/null
@@ -1,1445 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* 																							
- 	Emulate.s 
-
-	Emulate instructions and traps.
-
-	Lovingly crafted by Bill Angell using traditional methods and only natural or recycled materials.
-	No animal products are used other than rendered otter bile and deep fried pork lard.
-
-*/
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/cpu_capabilities.h>
-#include <mach/machine/vm_param.h>
-#include <assym.s>
-
-#define traceInst 30
-#define dssAllDone 29
-
-;			General stuff what happens here:
-;				1)	All general context saved, interrupts off, translation off
-;				2)	Vector and floating point disabled, but there may be live context.
-;					This code is responsible for saving and restoring what is used. This
-;					includes exception states, java mode, etc.
-;				3)	No attempt is made to resolve page faults.  PTE misses are handled
-;					automatically, but actual faults (ala copyin/copyout) are not. If 
-;					a fault does occur, the exception that caused entry to the emulation
-;					routine is remapped to either an instruction or data miss (depending
-;					upon the stage detected) and redrived through the exception handler.
-;					The only time that an instruction fault can happen is when a different
-;					processor removes a mapping between our original fault and when we
-;					fetch the assisted instruction. For an assisted instruction, data
-;					faults should not occur (except in the MP case).  For a purely
-;					emulated instruction, faults can occur.
-;
-;
-
-
-			.align	5
-			.globl	EXT(Emulate)
-
-LEXT(Emulate)
-
-			bf--	pf64Bitb,emn64					; Skip if not 64-bit
-			b		EXT(Emulate64)					; Jump to the 64-bit code...
-			
-emn64:		mfsprg	r31,0							; Get the per_proc
-			lwz		r12,savesrr1+4(r13)				; Get the exception info
-			rlwinm.	r0,r12,0,SRR1_PRG_ILL_INS_BIT,SRR1_PRG_ILL_INS_BIT	; Emulation candidate?
-			lwz		r30,dgFlags(0)					; Get the flags
-			beq+	eExit							; Nope, do not try to emulate...
-
-			rlwinm.	r0,r30,0,enaDiagEMb,enaDiagEMb	; Do we want to try to emulate something?
-			mfsprg	r28,2							; Get the processor features
-			beq+	eExit							; No emulation allowed...
-
-			rlwinm.	r28,r28,0,pfAltivecb,pfAltivecb	; Do we have Altivec on this machine?
-			beq		eNoVect							; Nope, no Altivec...
-			
-			dssall									; We need to kill streams because we are going to flip to problem state
-			sync
-
-eNoVect:	bl		eIFetch							; Get the instruction image
-			bne-	eRedriveAsISI					; Go redrive this as an ISI...	
-
-			rlwinm.	r0,r10,0,0,5					; See if we have the "special" op code here
-			rlwinm	r20,r10,16,22,31				; Set rS/rD and rA
-			bne+	eExit							; Not special op, ignore...
-
-			rlwinm	r0,r10,31,22,31					; Extract the sub op code
-			crclr	cr1_eq							; Clear
-			rlwimi	r20,r10,14,15,16				; Move bits 29 and 30 of instruction to 15 and 16 of DSISR
-			cmplwi	r0,790							; lhbrx?
-			rlwimi	r20,r10,8,17,17					; Move bit 25 to bit 17
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,534							; lwbrx?
-			rlwimi	r20,r10,3,18,21					; Move bit 21-24 to bit 18-21
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,918							; sthbrx?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,662							; stwbrx?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,1014							; dcbz?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,533							; lswx?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,661							; stswx?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			bne		cr1_eq,eNotIndex				; Go check non-index forms...
-
-			rlwinm.	r21,r10,19,24,28				; Extract index to rA to build EA
-			rlwinm	r22,r10,24,24,28				; Extract index to rB
-			addi	r24,r13,saver0+4				; Point to the start of registers
-			li		r19,0							; Assume 0 base
-			beq		eZeroBase						; Yes...
-			lwzx	r19,r24,r21						; Get the base register value
-			
-eZeroBase:	lwzx	r22,r24,r22						; Get the index value
-			add		r22,r22,r19						; Get DAR
-			b		eFinishUp						; Done, go finish up...
-						
-eNotIndex:	cmplwi	r0,725							; stswi?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			cmplwi	r0,597							; lswi?
-			cror	cr1_eq,cr1_eq,cr0_eq			; Remember
-			bne		cr1,eExit						; Not one we handle...
-	
-			rlwinm.	r21,r10,19,24,28				; Extract index to rA to build EA
-			addi	r24,r13,saver0+4				; Point to the start of registers
-			li		r22,0							; Assume 0 base
-			beq		eFinishUp						; Yes, it is...
-			lwzx	r22,r24,r21						; Get the base register value
-			
-eFinishUp:	stw		r20,savedsisr(r13)				; Set the DSISR
-			li		r11,T_ALIGNMENT					; Get the exception code
-			stw		r22,savedar+4(r13)				; Save the DAR
-			stw		r11,saveexception(r13)			; Set the exception code
-			b		EXT(AlignAssist)				; Go emulate the handler...
-
-
-eExit:		b		EXT(EmulExit)					; Just return for now...
-
-
-;
-;			Fetch the failing instruction.
-;			Image returned in R10 if CR0_EQ is false, otherwise, an ISI should be generated.
-;			R1 has the DSISR if access failed.
-;
-
-			.align	5
-
-eIFetch:	lwz		r23,savesrr1+4(r13)				; Get old MSR
-			mflr	r28								; Save return
-
-			rlwinm	r3,r23,32-MSR_DR_BIT+MSR_IR_BIT,MSR_DR_BIT,MSR_DR_BIT	; Move IR to DR for ifetch
-			mfmsr	r30								; Save the MSR for now
-			rlwimi	r3,r23,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	; Move DR to RI for ifetch
-			
-			lwz		r23,savesrr0+4(r13)				; Get instruction address
-			or		r3,r23,r3						; Turn on the DR and RI bit if translation was on
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r3								; Flip RI and, if IR was set, DR
-			isync
-			
-			lwz		r10,0(r23)						; Fetch the instruction
-			
-			mtmsr	r30								; Trans and RI off
-			isync
-			
-			mtlr	r28								; Restore the LR
-			blr										; Return with instruction image in R10
-
-
-;
-;			Redrive as an ISI
-;
-
-eRedriveAsISI:
-			lwz		r6,savesrr1+4(r13)				; Get the srr1 value
-			lwz		r4,SAVflags(r13)				; Pick up the flags
-			li		r11,T_INSTRUCTION_ACCESS		; Set failing instruction fetch code
-			rlwimi	r6,r1,0,1,4						; Move the DSISR bits to the SRR1
-			oris	r4,r4,hi16(SAVredrive)			; Set the redrive bit
-			stw		r11,saveexception(r13)			; Set the replacement code
-			stw		r4,SAVflags(r13)				; Set redrive request
-			stw		r6,savesrr1+4(r13)				; Set the srr1 value
-			b		EXT(EmulExit)					; Bail out to handle ISI...
-
-
-;
-;			This code emulates instructions that have failed because of operand 
-;			alignment.  We decode the DSISR to figure out what we need to do.
-;
-;			DSISR:
-;				0001FC00 - Instruction designation
-#define iFloat 12
-#define iOptype1 15
-#define iOptype2 16
-#define iOptype3 18
-#define iOptype4 19
-#define iUpdate 17
-#define iStore 20
-#define iDouble 21
-#define iNotify 22
-;				000003E0 - Target/Source register
-;				0000001F - Register to update if update form
-;
-
-			.align	5
-			.globl	EXT(AlignAssist)
-
-LEXT(AlignAssist)
-			bf--	pf64Bitb,aan64					; Skip if not 64-bit
-			b		EXT(AlignAssist64)				; Jump to the 64-bit code...
-			
-aan64:		lwz		r20,savedsisr(r13)				; Get the DSISR
-			li		r0,0							; Assume we emulate
-			mfsprg	r31,0							; Get the per_proc
-			mtcrf	0x10,r20						; Put instruction ID in CR for later
-			lwz		r21,spcFlags(r31)				; Grab the special flags
-			stw		r0,savemisc3(r13)				; Assume that we emulate ok
-			mtcrf	0x08,r20						; Put instruction ID in CR for later
-			rlwinm.	r0,r21,0,runningVMbit,runningVMbit	; Are we running a VM?
-			mtcrf	0x04,r20						; Put instruction ID in CR for later
-			lwz		r22,savesrr1+4(r13)				; Get the SRR1
-			bne-	aaPassAlong						; We are in a VM, no emulation for alignment exceptions...
-			lwz		r19,dgFlags(0)					; Get the diagnostics flags
-			crxor	iFloat,iOptype1,iOptype2		; Set this to 0 if both bits are either 0 or 1
-			mr		r26,r20							; Save the DSISR
-			rlwinm.	r0,r22,0,MSR_SE_BIT,MSR_SE_BIT	; Were we single stepping?
-			lwz		r23,savedar+4(r13)				; Pick up the address that we want to access
-			crnot	traceInst,cr0_eq				; Remember if trace is on
-			
-			rlwinm.	r0,r19,0,enaNotifyEMb,enaNotifyEMb	; Should we notify that an alignment exception happened?
-			mfmsr	r30								; Save the MSR for now
-			crnot	iNotify,cr0_eq					; Remember to tell someone we did this				
-			li		r29,emfp0						; Point to work area
-			crxor	iFloat,iFloat,iOptype3			; Set true if we have a floating point instruction
-			dcbz	r29,r31							; Clear and allocate a cache line for us to work in
-			rlwinm	r24,r20,3,24,28					; Get displacement to register to update if update form
-			rlwimi	r20,r20,24,28,28				; Move load/store indication to the bottom of index
-			rlwinm	r22,r22,0,MSR_DR_BIT,MSR_DR_BIT	; Move rupt DR to DR for ifetch
-			rlwimi	r20,r20,26,27,27				; Move single/double indication to just above the bottom
-			rlwimi	r22,r22,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	; Move DR to RI for i-fetch
-			lis		r29,hi16(EXT(aaFPopTable))		; High part of FP branch table
-			or		r22,r30,r22						; Set the DR and RI bits if translation was on
-			bf-		iFloat,aaNotFloat				; This is not a floating point instruction...
-			ori		r29,r29,lo16(EXT(aaFPopTable))	; Low part of FP branch table
-			
-			rlwimi	r29,r20,0,22,28					; Index into table based upon register||iDouble||iStore
-			mtctr	r29								; Get set to call the function	
-			bt		iStore,aaFPstore				; This is an FP store...
-		
-;
-;			Here we handle floating point loads
-;			
-
-aaFPload:	crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI
-			isync
-			
-			lwz		r10,0(r23)						; Get the first word
-			bf-		cr0_eq,aaLdNotDbl				; Jump out if we DSIed...
-			bf		iDouble,aaLdNotDbl				; this is not a double...
-			lwz		r11,4(r23)						; Get the second half
-			
-aaLdNotDbl:	mr		r4,r0							; Save the DAR if we failed the access
-			
-			mtmsr	r30								; Turn off translation again
-			isync
-			
-			bf-		cr0_eq,aaRedriveAsDSI			; Go redrive this as a DSI...	
-			
-			stw		r10,emfp0(r31)					; Save the first half
-			stw		r11,emfp0+4(r31)				; Save the second half, just in case we need it
-			
-			bctrl									; Go set the target FP register
-
-			b		aaComExit						; All done, go exit...			
-		
-;
-;			Here we handle floating point stores
-;			
-
-			.align	5
-
-aaFPstore:	bctrl									; Go save the source FP register
-			
-			lwz		r10,emfp0(r31)					; Get first word
-			crandc	iDouble,iDouble,iOptype4		; Change to 4-byte access if stfiwx
-			lwz		r11,emfp0+4(r31)				; and the second
-			bf+		iOptype4,aaNotstfiwx			; This is not a stfiwx...
-			mr		r10,r11							; The stfiwx wants to store the second half
-
-aaNotstfiwx:
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI
-			isync
-			
-			stw		r10,0(r23)						; Save the first word
-			bf-		cr0_eq,aaStNotDbl				; Jump out if we DSIed...
-			bf		iDouble,aaStNotDbl				; this is not a double...
-			stw		r11,4(r23)						; Save the second half
-			
-aaStNotDbl:	mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Turn off
-			isync
-			
-			bf-		cr0_eq,aaRedriveAsDSI			; Go redrive this as a DSI...
-
-;
-;			Common exit routines
-;
-
-aaComExit:	lwz		r10,savesrr0+4(r13)				; Get the failing instruction address
-			add		r24,r24,r13						; Offset to update register
-			li		r11,T_IN_VAIN					; Assume we are all done
-			addi	r10,r10,4						; Step to the next instruction
-			bf		iUpdate,aaComExNU				; Skip if not an update form...
-			stw		r23,saver0+4(r24)				; Update the target
-			
-aaComExNU:	lwz		r9,SAVflags(r13)				; Get the flags
-			stw		r10,savesrr0+4(r13)				; Set new PC
-			bt-		traceInst,aaComExitrd			; We are tracing, go emulate trace...
-			bf+		iNotify,aaComExGo				; Nothing special here, go...
-	
-			li		r11,T_ALIGNMENT					; Set the we just did an alignment exception....
-			
-aaComExGo:	b		EXT(EmulExit)					; We are done, no tracing on...
-
-
-;
-;			This is not a floating point operation
-;
-;			The table of these emulation routines is indexed by taking the low order 4 bits of
-;			the instruction code in the DSISR and subtracting 7.  If this comes up negative,
-;			the instruction is not to be emulated.  Then we add bit 0 of the code * 4.  This
-;			gives us a fairly compact and almost unique index.  Both lwm and stmw map to 0 so
-;			that one needs to be further reduced, and we end up with holes at a few indexes.
-;
-
-			.align	5
-
-aaNotFloat:
-			lis		r19,hi16(aaEmTable)				; Point to high part of table address
-			rlwinm	r3,r26,24,26,29					; Isolate last 4 bits of op type * 4
-			rlwimi	r19,r26,20,27,27				; Get bit 0 of instruction code * 4 into bottom of table base
-			addic.	r3,r3,-28						; Subtract 7*4 to adjust index
-			ori		r19,r19,lo16(aaEmTable)			; Low part of table address
-			blt-	aaPassAlong						; We do not handle any of these (lwarx, stwcx., eciwx, ecowx)...
-			add		r19,r19,r3						; Point to emulation routine
-			rlwinm	r18,r26,30,24,28				; Get the target/source register displacement
-
-			mtctr	r19								; Set the routine address
-			
-			bctr									; Go emulate the instruction...
-
-;
-;			This is the table of non-floating point emulation routines.
-;			It is indexed by the code immediately above.
-		
-			.align	5							
-
-aaEmTable:
-			b		aaLmwStmw						; This for lmw/stmw
-			b		aaLswx							; This for lwwx
-			b		aaLswi							; This for lswi
-			b		aaStswx							; This for stswx
-			b		aaStswi							; This for stswi
-			b		aaLwbrx							; This for lwbrx
-			b		aaPassAlong						; This an invalid index (6)
-			b		aaStwbrx						; This for stwbrx
-			b		aaPassAlong						; This an invalid index (8)
-			b		aaLhbrx							; This for lhbrx
-			b		aaPassAlong						; This an invalid index (A)
-			b		aaSthbrx						; This for sthbrx
-			b		aaDcbz							; This for dcbz
-			b		aaPassAlong						; This an invalid index (D)
-			b		aaPassAlong						; This an invalid index (E)
-			b		aaPassAlong						; This an invalid index (F)
-
-
-;
-;			Here we handle the set up for the lmw and stmw.  After that, we split off to the
-;			individual routines.
-;
-;			Note also that after some set up, all of the string instructions come through here as well.
-;
-			.align	5
-						
-aaLmwStmw:
-			rlwinm	r17,r18,31,1,29					; Convert doublword based index to words
-			li		r28,0							; Set no extra bytes to move (used for string instructions)
-			subfic	r17,r17,32*4					; Calculate the length of the transfer
-
-aaLSComm:	addi	r19,r13,saver0+4				; Offset to registers in savearea
-			mr		r16,r23							; Make a hunk pointer
-			
-			bt		iUpdate,aaStmw					; This is the stmw...
-			
-;
-;			Load multiple word
-;
-
-aaLmwNxt:	cmplwi	cr1,r17,8*4						; Is there enough to move 8?
-			blt-	cr1,aaLmwNxtH					; Not enough for a full hunk...
-			subi	r17,r17,8*4						; Back off for another hunk
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI
-			isync
-		
-			lwz		r2,0(r16)						; Load word 0
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r15,4(r16)						; Load word 1
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r14,8(r16)						; Load word 2
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r5,12(r16)						; Load word 3
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r6,16(r16)						; Load word 4
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r7,20(r16)						; Load word 5
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r8,24(r16)						; Load word 6
-			bf-		cr0_eq,aaLmwB1					; Error, bail...
-			lwz		r9,28(r16)						; Load word 7
-		
-aaLmwB1:	mr		r4,r0							; Remember DAR, jus in case we failed the access
-			mtmsr	r30								; Turn off DR, RI
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-
-			addi	r16,r16,8*4						; Point up to next input aread
-		
-			stwx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r5,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r6,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r7,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r8,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r9,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-
-			b		aaLmwNxt						; Do the next hunk...
-
-			.align	5
-			
-aaLmwNxtH:	cmplwi	cr1,r17,4*4						; Do we have 4 left?
-			blt		cr1,aaLmwL4						; Nope...
-
-			subi	r17,r17,4*4						; Set count properly
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-		
-			lwz		r2,0(r16)						; Load word 0
-			bf-		cr0_eq,aaLmwB2					; Error, bail...
-			lwz		r15,4(r16)						; Load word 1
-			bf-		cr0_eq,aaLmwB2					; Error, bail...
-			lwz		r14,8(r16)						; Load word 2
-			bf-		cr0_eq,aaLmwB2					; Error, bail...
-			lwz		r5,12(r16)						; Load word 3
-		
-aaLmwB2:	mr		r4,r0							; Remember DAR, jus in case we failed the access
-			mtmsr	r30								; Turn off DR, RI
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-		
-			addi	r16,r16,4*4						; Point up to next input aread
-			
-			stwx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			stwx	r5,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-
-aaLmwL4:	or.		r5,r17,r28						; Do we have anything left?
-			cmplwi	cr1,r17,(2*4)					; Do we have one, two, or three full words left?
-			cmplwi	cr2,r17,0						; Do we have no full words left?
-			beq		aaComExit						; Nothing left...
-
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-
-			beq-	cr2,aaLmwBy						; No full words, get bytes...
-			
-			lwz		r2,0(r16)						; Pick up first word
-			bf-		cr0_eq,aaLmwDn					; Read failed, escape...
-			addi	r16,r16,4						; Next input location
-			blt		cr1,aaLmwBy						; We only had one, we are done...
-
-			lwz		r15,0(r16)						; Pick up second word
-			bf-		cr0_eq,aaLmwDn					; Read failed, escape...
-			addi	r16,r16,4						; Next input location
-			beq		cr1,aaLmwBy						; We had two, we are done...
-
-			lwz		r14,0(r16)						; Load word 3
-			addi	r16,r16,4						; Next input location
-
-aaLmwBy:	cmplwi	cr2,r28,0						; Any trailing bytes to do?
-			li		r8,0							; Clear second trailing byte
-			cmplwi	cr1,r28,2						; Check for 1, 2, or 3
-			li		r9,0							; Clear third trailing byte
-			beq+	cr2,aaLmwDn						; No trailing bytes...
-			
-			lbz		r5,0(r16)						; Pick up first trailing byte
-			bf-		cr0_eq,aaLmwDn					; Read failed, escape...
-			blt		cr1,aaLmwDn						; We only had one, we are done...
-
-			lbz		r8,1(r16)						; Pick up second trailing byte
-			bf-		cr0_eq,aaLmwDn					; Read failed, escape...
-			beq		cr1,aaLmwDn						; We had two, we are done...
-
-			lbz		r9,2(r16)						; Get last trailing byte
-			
-
-aaLmwDn:	rlwinm	r5,r5,24,0,7					; Move first byte to top
-			cmplwi	cr2,r17,0						; Any full words to do?
-			mr		r4,r0							; Remember DAR, just in case we failed the access
-			rlwimi	r9,r8,8,16,23					; Move second byte above third byte
-			cmplwi	cr1,r17,(2*4)					; Do we have one, two, or three full words left?
-			mr		r3,r30							; Set the normal MSR
-			rlwimi	r5,r9,8,8,23					; Move bytes 1 and 2 after 0
-
-			mtmsr	r30								; Turn off DR, RI
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-
-			beq-	cr2,aaLmwCb						; No full words, copy bytes...
-
-			stwx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			blt		cr1,aaLmwCb						; We only had one, we are done...
-			
-			stwx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			beq		cr1,aaLmwCb						; We had two, we are done...
-
-			stwx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-
-aaLmwCb:	mr.		r28,r28							; Any trailing bytes to do?
-			beq+	aaComExit						; Nope, leave...
-
-			stwx	r5,r19,r18						; Store register
-						
-			b		aaComExit						; We are done....
-
-;
-;			Store multiple word
-;
-
-			.align	5
-
-aaStmw:
-			crclr	iUpdate							; Make sure we do not think this is an update form
-
-aaStmwNxt:	cmplwi	cr1,r17,8*4						; Is there enough to move 8?
-			blt-	cr1,aaStmwNxtH					; Not enough for a full hunk...
-			subi	r17,r17,8*4						; Back off for another hunk
-		
-			lwzx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r5,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r6,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r7,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r8,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r9,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-
-			stw		r2,0(r16)						; Store word 0
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r15,4(r16)						; Store word 1
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r14,8(r16)						; Store word 2
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r5,12(r16)						; Store word 3
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r6,16(r16)						; Store word 4
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r7,20(r16)						; Store word 5
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r8,24(r16)						; Store word 6
-			bf-		cr0_eq,aaStmwB1					; Error, bail...
-			stw		r9,28(r16)						; Store word 7
-		
-			addi	r16,r16,8*4						; Point up to next output aread
-		
-		
-aaStmwB1:	mr		r4,r0							; Remember DAR, jus in case we failed the access
-			mtmsr	r30								; Normal MSR
-			isync
-
-			bt-		cr0_eq,aaStmwNxt				; We have more to do and no failed access...
-			b		aaRedriveAsDSI					; We failed, go redrive this as a DSI...
-
-			.align	5
-			
-aaStmwNxtH:	cmplwi	cr1,r17,(4*4)					; Do we have at least 4 left?
-			blt		cr1,aaStmwL4					; Nope...
-			subi	r17,r17,4*4						; Set count properly
-
-			lwzx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			lwzx	r5,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI
-			isync
-		
-			stw		r2,0(r16)						; Store word 0
-			bf-		cr0_eq,aaStmwB2					; Error, bail...
-			stw		r15,4(r16)						; Store word 1
-			bf-		cr0_eq,aaStmwB2					; Error, bail...
-			stw		r14,8(r16)						; Store word 2
-			bf-		cr0_eq,aaStmwB2					; Error, bail...
-			stw		r5,12(r16)						; Store word 3
-
-			addi	r16,r16,4*4						; Point up to next input aread
-		
-aaStmwB2:	mr		r4,r0							; Remember DAR, jus in case we failed the access
-			mtmsr	r30								; Normal MSR
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-
-aaStmwL4:	or.		r5,r17,r28						; Do we have anything left to do?
-			cmplwi	cr1,r17,(2*4)					; Do we have one, two, or three left?
-			cmplwi	cr2,r17,0						; Do we have no full words left?
-			beq		aaComExit						; Nothing left...
-
-			beq-	cr2,aaStmwBy1					; No full words, check out bytes
-
-			lwzx	r2,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			blt		cr1,aaStmwBy1					; We only had one, go save it...
-			
-			lwzx	r15,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			beq		cr1,aaStmwBy1					; We had two, go save it...
-			
-			lwzx	r14,r19,r18						; Store register
-			addi	r18,r18,8						; Next register
-			rlwinm	r18,r18,0,24,28					; Wrap back to 0 if needed
-			
-aaStmwBy1:	mr.		r28,r28							; Do we have any trailing bytes?
-			beq+	aaStmwSt						; Nope...
-			
-			lwzx	r5,r19,r18						; Yes, pick up one extra register
-			
-aaStmwSt:	crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI
-			isync
-
-			beq-	cr2,aaStmwBy2					; No words, check trailing bytes...					
-
-			stw		r2,0(r16)						; Save first word
-			bf-		cr0_eq,aaStmwDn					; Store failed, escape...
-			addi	r16,r16,4						; Bump sink
-			blt		cr1,aaStmwBy2					; We only had one, we are done...
-
-			stw		r15,0(r16)						; Save second word
-			bf-		cr0_eq,aaStmwDn					; Store failed, escape...
-			addi	r16,r16,4						; Bump sink
-			beq		cr1,aaStmwBy2					; We had two, we are done...
-
-			stw		r14,0(r16)						; Save third word
-			bf-		cr0_eq,aaStmwDn					; Store failed, escape...
-			addi	r16,r16,4						; Bump sink
-				
-aaStmwBy2:	rlwinm	r2,r5,8,24,31					; Get byte 0
-			cmplwi	cr2,r28,0						; Any trailing bytes to do?
-			rlwinm	r14,r5,24,24,31					; Get byte 3
-			li		r8,0							; Clear second trailing byte
-			cmplwi	cr1,r28,2						; Check for 1, 2, or 3
-			li		r9,0							; Clear third trailing byte
-			beq+	cr2,aaStmwDn					; No trailing bytes...
-			rlwinm	r15,r5,16,24,31					; Get byte 1
-
-			stb		r2,0(r16)						; Save first byte
-			bf-		cr0_eq,aaStmwDn					; Read failed, escape...
-			blt		cr1,aaStmwDn					; We only had one, we are done...
-
-			stb		r15,1(r16)						; Save second byte
-			bf-		cr0_eq,aaStmwDn					; Read failed, escape...
-			beq		cr1,aaStmwDn					; We had two, we are done...
-
-			stb		r14,2(r16)						; Save third byte
-
-aaStmwDn:	mr		r4,r0							; Remember DAR, jus in case we failed the access
-			mtmsr	r30								; Normal MSR
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-
-			b		aaComExit						; We are done....
-
-	
-;
-;			Load String Indexed
-;
-
-			.align	5
-			
-aaLswx:		lwz		r17,savexer+4(r13)				; Pick up the XER
-			crclr	iUpdate							; Make sure we think this the load form
-			rlwinm.	r25,r17,0,25,31					; Get the number of bytes to load
-			rlwinm	r28,r17,0,30,31					; Get the number of bytes past an even word
-			beq-	aaComExit						; Do nothing if 0 length...
-			xor		r17,r25,r28						; Round down to an even word boundary
-			b		aaLSComm						; Join up with common load/store code...
-
-	
-;
-;			Load String Immediate
-;
-
-			.align	5
-
-aaLswi:		mr		r9,r23							; Save the DAR
-			bl		eIFetch							; Get the instruction image
-			bne-	eRedriveAsISI					; Go redrive this as an ISI...	
-			rlwinm	r25,r10,21,27,31				; Get the number of bytes to load
-			crclr	iUpdate							; Make sure we think this the load form
-			subi	r25,r25,1						; Back off by 1
-			rlwinm	r25,r25,0,27,31					; Clear back down
-			addi	r25,r25,1						; Add back the 1 to convert 0 to 32
-			rlwinm	r28,r25,0,30,31					; Get the number of bytes past an even word
-			xor		r17,r25,r28						; Round down to an even word boundary
-			mr		r23,r9							; Move back the DAR
-			b		aaLSComm						; Join up with common load/store code...
-	
-;
-;			Store String Indexed
-;
-
-			.align	5
-
-aaStswx:	lwz		r17,savexer+4(r13)				; Pick up the XER
-			crclr	iUpdate							; Make sure this is clear in case we have 0 length
-			rlwinm.	r25,r17,0,25,31					; Get the number of bytes to load
-			rlwinm	r28,r17,0,30,31					; Get the number of bytes past an even word
-			beq-	aaComExit						; Do nothing if 0 length...
-			xor		r17,r25,r28						; Round down to an even word boundary
-			crset	iUpdate							; Make sure we think this the store form
-			b		aaLSComm						; Join up with common load/store code...
-
-	
-;
-;			Store String Immediate
-;
-
-			.align	5
-
-aaStswi:	mr		r9,r23							; Save the DAR
-			bl		eIFetch							; Get the instruction image
-			bne-	eRedriveAsISI					; Go redrive this as an ISI...	
-			rlwinm	r25,r10,21,27,31				; Get the number of bytes to load
-			crclr	iUpdate							; Make sure we think this the load form
-			subi	r25,r25,1						; Back off by 1
-			rlwinm	r25,r25,0,27,31					; Clear back down
-			addi	r25,r25,1						; Add back the 1 to convert 0 to 32
-			rlwinm	r28,r25,21,30,31				; Get the number of bytes past an even word
-			xor		r17,r25,r28						; Round down to an even word boundary
-			mr		r23,r9							; Move back the DAR
-			b		aaLSComm						; Join up with common load/store code...
-	
-
-;
-;			Load byte-reversed word
-;
-
-			.align	5
-
-aaLwbrx:
-			add		r18,r18,r13						; Index to source register
-
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-		
-			lwz		r11,0(r23)						; Load the word
-		
-			mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Restore normal MSR
-			isync
-			
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-			
-			rlwinm	r10,r11,8,0,31					; Get byte 0 to 3 and byte 2 to 1
-			rlwimi	r10,r11,24,16,23				; Move byte 1 to byte 2
-			rlwimi	r10,r11,24,0,7					; Move byte 3 to byte 0
-		
-			stw		r10,saver0+4(r18)				; Set the register
-
-			b		aaComExit						; All done, go exit...
-
-
-
-;
-;			Store byte-reversed word
-;
-
-			.align	5
-
-aaStwbrx:
-			add		r18,r18,r13						; Index to source register
-			lwz		r11,saver0+4(r18)				; Get the register to store
-
-			rlwinm	r10,r11,8,0,31					; Get byte 0 to 3 and byte 2 to 1
-			rlwimi	r10,r11,24,16,23				; Move byte 1 to byte 2
-			rlwimi	r10,r11,24,0,7					; Move byte 3 to byte 0
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-		
-			stw		r10,0(r23)						; Store the reversed halfword
-		
-			mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Restore normal MSR
-			isync
-			
-			bt+		cr0_eq,aaComExit				; All done, go exit...
-			b		aaRedriveAsDSI					; We failed, go redrive this as a DSI...	
-
-
-
-;
-;			Load byte-reversed halfword
-;
-
-			.align	5
-
-aaLhbrx:
-			add		r18,r18,r13						; Index to source register
-
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-		
-			lhz		r11,0(r23)						; Load the halfword
-		
-			mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Restore normal MSR
-			isync
-
-			bf-		cr0_eq,aaRedriveAsDSI			; We failed, go redrive this as a DSI...
-			
-			rlwinm	r10,r11,8,16,23					; Rotate bottom byte up one and clear everything else
-			rlwimi	r10,r11,24,24,31				; Put old second from bottom into bottom
-		
-			stw		r10,saver0+4(r18)				; Set the register
-
-			b		aaComExit						; All done, go exit...
-
-
-;
-;			Store byte-reversed halfword
-;
-
-			.align	5
-
-aaSthbrx:
-			add		r18,r18,r13						; Index to source register
-			lwz		r10,saver0+4(r18)				; Get the register to store
-			rlwinm	r10,r10,8,0,31					; Rotate bottom byte up one
-			rlwimi	r10,r10,16,24,31				; Put old second from bottom into bottom
-			
-			crset	cr0_eq							; Set this to see if we failed
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-		
-			sth		r10,0(r23)						; Store the reversed halfword
-		
-			mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Restore normal MSR
-			isync
-
-			bt+		cr0_eq,aaComExit				; All done, go exit...
-			b		aaRedriveAsDSI					; We failed, go redrive this as a DSI...	
-
-;
-;			Data cache block zero
-;
-
-			.align	5
-
-aaDcbz:			
-            lwz     r0,savesrr0+4(r13)              ; get instruction address
-            li		r4,_COMM_PAGE_BASE_ADDRESS
-			rlwinm	r23,r23,0,0,26					; Round EA back to a 32-byte boundary
-            sub     r4,r0,r4                        ; compute instruction offset from base of commpage
-            cmplwi  r4,_COMM_PAGE_AREA_USED         ; did fault occur in commpage?
-            bge+    aaDcbz1                         ; skip if not in commpage
-            lwz		r4,savecr(r13)                  ; if we take a dcbz in the commpage...
-            rlwinm	r4,r4,0,0,27                    ; ...clear users cr7 as a flag for commpage code
-            stw		r4,savecr(r13)
-aaDcbz1:
-			crset	cr0_eq							; Set this to see if we failed
-			li		r0,0							; Clear this out
-			mtmsr	r22								; Flip DR, RI, and maybe PR on
-			isync
-			
-			stw		r0,0(r23)						; Clear word	
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,4(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,8(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,12(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,16(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,20(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,24(r23)						; Clear word		
-			bne-	aaDcbzXit						; Got DSI, we are stopping...	
-			stw		r0,28(r23)						; Clear word		
-			
-aaDcbzXit:	mr		r4,r0							; Save the DAR if we failed the access
-			mtmsr	r30								; Restore normal MSR
-			isync
-
-			crclr	iUpdate							; Make sure we do not think this is an update form
-			
-			bt+		cr0_eq,aaComExit				; All done, go exit...
-			b		aaRedriveAsDSI					; We failed, go redrive this as a DSI...	
-
-
-;
-;			Unhandled alignment exception, pass it along
-;
-
-aaPassAlong:
-			li		r0,1							; Indicate that we failed to emulate
-			stw		r0,savemisc3(r13)				; Assume that we emulate ok
-			b		EXT(EmulExit)					
-
-
-
-
-;
-;			We go here to emulate a trace exception after we have handled alignment error
-;
-
-			.align	5
-			
-aaComExitrd:
-			lis		r11,hi16(srr1clr)				; Get the bits we need to clear
-			oris	r9,r9,hi16(SAVredrive)			; Set the redrive bit
-			andc	r12,r12,r11						; Clear what needs to be cleared
-			li		r11,T_TRACE						; Set trace interrupt
-			stw		r9,SAVflags(r13)				; Set the flags
-			stw		r11,saveexception(r13)			; Set the exception code
-			b		EXT(EmulExit)					; Exit and do trace interrupt...
-			
-
-
-;
-;			Redrive as a DSI
-
-aaRedriveAsDSI:
-			mr		r20,r1							; Save the DSISR
-			mr		r21,r4
-			lwz		r4,SAVflags(r13)				; Pick up the flags
-			li		r11,T_DATA_ACCESS				; Set failing data access code
-			oris	r4,r4,hi16(SAVredrive)			; Set the redrive bit
-			stw		r20,savedsisr(r13)				; Set the DSISR of failed access
-			stw		r21,savedar+4(r13)				; Set the address of the failed access
-			stw		r11,saveexception(r13)			; Set the replacement code
-			stw		r4,SAVflags(r13)				; Set redrive request
-			b		EXT(EmulExit)					; Bail out to handle ISI...
-
-
-
-;
-;			Table of functions to load or store floating point registers
-;			This table is indexed reg||size||dir.  That means that each
-;			like load/store pair (e.g., lfd f31/stfd f31) are within the same
-;			quadword, which is the current ifetch size.  We expect most of the
-;			unaligned accesses to be part of copies, therefore, with this
-;			organization, we will save the ifetch of the store after the load.
-;
-
-			.align	10								; Make sure we are on a 1k boundary
-			.globl	EXT(aaFPopTable)
-			
-LEXT(aaFPopTable)
-			lfs		f0,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f0,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f0,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f0,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f1,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f1,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f1,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f1,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f2,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f2,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f2,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f2,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f3,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f3,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f3,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f3,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f4,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f4,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f4,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f4,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f5,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f5,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f5,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f5,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f6,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f6,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f6,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f6,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f7,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f7,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f7,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f7,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f8,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f8,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f8,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f8,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f9,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f9,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f9,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f9,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f10,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f10,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f10,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f10,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f11,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f11,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f11,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f11,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f12,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f12,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f12,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f12,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f13,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f13,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f13,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f13,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f14,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f14,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f14,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f14,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f15,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f15,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f15,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f15,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f16,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f16,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f16,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f16,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f17,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f17,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f17,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f17,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f18,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f18,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f18,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f18,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f19,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f19,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f19,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f19,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f20,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f20,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f20,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f20,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f21,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f21,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f21,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f21,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f22,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f22,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f22,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f22,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f23,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f23,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f23,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f23,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f24,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f24,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f24,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f24,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f25,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f25,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f25,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f25,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f26,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f26,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f26,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f26,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f27,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f27,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f27,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f27,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f28,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f28,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f28,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f28,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f29,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f29,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f29,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f29,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f30,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f30,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f30,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f30,emfp0(r31)					; Store double variant
-			blr
-
-			lfs		f31,emfp0(r31)					; Load single variant
-			blr
-
-			stfs	f31,emfp0(r31)					; Store single variant
-			blr
-			
-			lfd		f31,emfp0(r31)					; Load double variant
-			blr
-			
-			stfd	f31,emfp0(r31)					; Store double variant
-			blr
-
diff --git a/osfmk/ppc/Emulate64.s b/osfmk/ppc/Emulate64.s
deleted file mode 100644
index 2e7854d3f..000000000
--- a/osfmk/ppc/Emulate64.s
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * Copyright (c) 2002-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */ 																							
-
-/* Emulate64.s
- *
- * Software emulation of instructions not handled in hw, on 64-bit machines.
- */
- 
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <mach/machine/vm_param.h>
-#include <ppc/cpu_capabilities.h>
-#include <assym.s>
-
-// CR bit set if the instruction is an "update" form (LFDU, STWU, etc):
-#define	kUpdate	25
-
-// CR bit set if interrupt occured in trace mode (ie, MSR_SE_BIT):
-#define kTrace	8
-
-// CR bit set if notification on alignment interrupts is requested (notifyUnalignbit in spcFlags):
-#define	kNotify	9
-
-// CR bit distinguishes between alignment and program exceptions:
-#define	kAlignment	10
-
-
-
-// *************************************
-// * P R O G R A M   I N T E R R U P T *
-// *************************************
-//
-// These are floating pt exceptions, illegal instructions, privileged mode violations,
-// and traps.  All we're interested in at this low level is illegal instructions.
-// The ones we "emulate" are:
-//		DCBA,  which is not implemented in the IBM 970.  The emulation is to ignore it,
-//			   as it is just a hint.
-//		MCRXR, which is not implemented on the IBM 970, but is in the PPC ISA.
-//
-// Additionally, to facilitate debugging the alignment handler, we recognize a special
-// diagnostic mode that is used to simulate alignment exceptions.  When in this mode,
-// if the instruction has opcode==0 and the extended opcode is one of the X-form
-// instructions that can take an alignment interrupt, then we change the opcode to
-// 31 and pretend it got an alignment interrupt.  This exercises paths that
-// are hard to drive or perhaps never driven on this particular CPU.
-
-        .text
-        .globl	EXT(Emulate64)
-        .align	5
-LEXT(Emulate64)
-        crclr	kAlignment						// not an alignment exception
-        b		a64AlignAssistJoin				// join alignment handler
-        
-        
-// Return from alignment handler with all the regs loaded for opcode emulation.
-        
-a64HandleProgramInt:
-        rlwinm.	r0,r29,0,SRR1_PRG_ILL_INS_BIT,SRR1_PRG_ILL_INS_BIT	// illegal opcode?
-        beq		a64PassAlong					// No, must have been trap or priv violation etc
-        rlwinm	r3,r20,6,26,31					// right justify opcode field (bits 0-5)
-        rlwinm	r4,r20,31,22,31					// right justify extended opcode field (bits 21-30)
-        cmpwi	cr0,r3,31						// X-form?
-        cmpwi	cr1,r4,758						// DCBA?
-        cmpwi	cr4,r4,512						// MCRXR?
-        crand	cr1_eq,cr0_eq,cr1_eq			// merge the two tests for DCBA
-        crand	cr4_eq,cr0_eq,cr4_eq			// and for MCRXR
-        beq++	cr1_eq,a64ExitEm				// was DCBA, so ignore
-        bne--	cr4_eq,a64NotEmulated			// skip if not MCRXR
-        
-// Was MCRXR, so emulate.
-
-        ld		r3,savexer(r13)					// get the XER
-        lwz		r4,savecr(r13)					// and the CR
-        rlwinm	r5,r20,11,27,29					// get (CR# * 4) from instruction
-        rlwinm	r6,r3,0,4,31					// zero XER[32-35] (also XER[0-31])
-        sld		r4,r4,r5						// move target CR field to bits 32-35
-        rlwimi	r4,r3,0,0,3						// move XER[32-35] into CR field
-        stw		r6,savexer+4(r13)				// update XER
-        srd		r4,r4,r5						// re-position CR
-        stw		r4,savecr(r13)					// update CR
-        b		a64ExitEm						// done
-
-// Not an opcode we normally emulate.  If in special diagnostic mode and opcode=0,
-// emulate as an alignment exception.  This special case is for test software.
-
-a64NotEmulated:
-        lwz		r30,dgFlags(0)					// Get the flags
-        rlwinm.	r0,r30,0,enaDiagEMb,enaDiagEMb	// Do we want to try to emulate something?
-        beq++	a64PassAlong					// No emulation allowed
-        cmpwi	r3,0							// opcode==0 ?
-        bne		a64PassAlong					// not the special case
-        oris	r20,r20,0x7C00					// change opcode to 31
-        crset	kAlignment						// say we took alignment exception
-        rlwinm	r5,r4,0,26+1,26-1				// mask Update bit (32) out of extended opcode
-        rlwinm	r5,r5,0,0,31					// Clean out leftover junk from rlwinm
-
-        cmpwi	r4,1014							// dcbz/dcbz128 ?
-        crmove	cr1_eq,cr0_eq
-        cmpwi	r5,21							// ldx/ldux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,599							// lfdx/lfdux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,535							// lfsx/lfsux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,343							// lhax/lhaux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,790							// lhbrx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,279							// lhzx/lhzux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,597							// lswi ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,533							// lswx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,341							// lwax/lwaux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,534							// lwbrx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,23							// lwz/lwzx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,149							// stdx/stdux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,727							// stfdx/stfdux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,983							// stfiwx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,663							// stfsx/stfsux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,918							// sthbrx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,407							// sthx/sthux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,725							// stswi ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,661							// stswx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r4,662							// stwbrx ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        cmpwi	r5,151							// stwx/stwux ?
-        cror	cr1_eq,cr0_eq,cr1_eq
-        
-        beq++	cr1,a64GotInstruction			// it was one of the X-forms we handle
-        crclr	kAlignment						// revert to program interrupt
-        b		a64PassAlong					// not recognized extended opcode
-        
-
-// *****************************************
-// * A L I G N M E N T   I N T E R R U P T *
-// *****************************************
-//
-// We get here in exception context, ie with interrupts disabled, translation off, and
-// in 64-bit mode, with:
-//		r13 = save-area pointer, with general context already saved in it
-//		cr6 = feature flags
-// We preserve r13 and cr6.  Other GPRs and CRs, the LR and CTR are used.
-//
-// Current 64-bit processors (GPUL) handle almost all misaligned operations in hardware,
-// so this routine usually isn't called very often.  Only floating pt ops that cross a page
-// boundary and are not word aligned, and LMW/STMW can take exceptions to cacheable memory.
-// However, in contrast to G3 and G4, any misaligned load/store will get an alignment
-// interrupt on uncached memory.
-//
-// We always emulate scalar ops with a series of byte load/stores.  Doing so is no slower
-// than LWZ/STW in cases where a scalar op gets an alignment exception.
-//
-// This routine supports all legal permutations of alignment interrupts occuring in user or
-// supervisor mode, 32 or 64-bit addressing, and translation on or off.  We do not emulate
-// instructions that go past the end of an address space, such as "LHZ -1(0)"; we just pass
-// along the alignment exception rather than wrap around to byte 0.
-//
-// First, check for a few special cases such as virtual machines, etc.
-
-        .globl	EXT(AlignAssist64)
-        .align	5
-LEXT(AlignAssist64)
-        crset	kAlignment								// mark as alignment interrupt
-
-a64AlignAssistJoin:										// join here from program interrupt handler
-      	li		r0,0									// Get a 0
-        mfsprg	r31,0									// get the per_proc data ptr
-        mcrf	cr3,cr6									// save feature flags here...
-        lwz		r21,spcFlags(r31)						// grab the special flags
-        ld		r29,savesrr1(r13)						// get the MSR etc at the fault
-        ld		r28,savesrr0(r13)						// get the EA of faulting instruction
-       	stw		r0,savemisc3(r13)						// Assume we will handle this ok
-        mfmsr	r26										// save MSR at entry
-        rlwinm.	r0,r21,0,runningVMbit,runningVMbit		// Are we running a VM?
-        lwz		r19,dgFlags(0)							// Get the diagnostics flags
-        bne--	a64PassAlong							// yes, let the virtual machine monitor handle
-
-
-// Set up the MSR shadow regs.  We turn on FP in this routine, and usually set DR and RI
-// when accessing user space (the SLB is still set up with all the user space translations.)
-// However, if the interrupt occured in the kernel with DR off, we keep it off while
-// accessing the "target" address space.  If we set DR to access the target space, we also
-// set RI.  The RI bit tells the exception handlers to clear cr0 beq and return if we get an
-// exception accessing the user address space.  We are careful to test cr0 beq after every such
-// access.  We keep the following "shadows" of the MSR in global regs across this code:
-//		r25 = MSR at entry, plus FP and probably DR and RI (used to access target space)
-//		r26 = MSR at entry
-//		r27 = free
-//		r29 = SRR1 (ie, MSR at interrupt)
-// Note that EE and IR are always off, and SF is always on in this code.
-
-		rlwinm	r3,r29,31,MSR_DR_BIT,MSR_DR_BIT			// Move instruction translate bit to DR
-        rlwimi	r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	// if DR is now set, set RI too
-        or		r25,r26,r3								// assemble MSR to use accessing target space
-        
-
-// Because the DSISR and DAR are either not set or are not to be trusted on some 64-bit
-// processors on an alignment interrupt, we must fetch the faulting instruction ourselves,
-// then decode/hash the opcode and reconstruct the EA manually.
-
-        mtmsr	r25					// turn on FP and (if it was on at fault) DR and RI
-        isync						// wait for it to happen
-		cmpw	r0,r0				// turn on beq so we can check for DSIs
-        lwz		r20,0(r28)			// fetch faulting instruction, probably with DR on
-        bne--	a64RedriveAsISI		// got a DSI trying to fetch it, pretend it was an ISI
-        mtmsr	r26					// turn DR back off
-        isync						// wait for it to happen
-
-
-// Set a few flags while we wait for the faulting instruction to arrive from cache.
-
-        rlwinm.	r0,r29,0,MSR_SE_BIT,MSR_SE_BIT				// Were we single stepping?
-		stw		r20,savemisc2(r13)	// Save the instruction image in case we notify
-        crnot	kTrace,cr0_eq
-        rlwinm.	r0,r19,0,enaNotifyEMb,enaNotifyEMb			// Should we notify?
-        crnot	kNotify,cr0_eq        
-
-		rlwinm	r3,r29,0,MSR_DR_BIT,MSR_DR_BIT			// was data translation on at fault?
-        rlwimi	r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	// if DR is now set, set RI too
-        or		r25,r26,r3								// assemble MSR to use accessing target space
-
-
-// Hash the intruction into a 5-bit value "AAAAB" used to index the branch table, and a
-// 1-bit kUpdate flag, as follows:
-//  ¥ for X-form instructions (with primary opcode 31):
-//       the "AAAA" bits are bits 21-24 of the instruction
-//       the "B" bit is the XOR of bits 29 and 30
-//       the update bit is instruction bit 25
-//	¥ for D and DS-form instructions (actually, any primary opcode except 31):
-//       the "AAAA" bits are bits 1-4 of the instruction
-//       the "B" bit is 0
-//       the update bit is instruction bit 5
-//
-// Just for fun (and perhaps a little speed on deep-pipe machines), we compute the hash,
-// update flag, and EA without branches and with ipc >= 2.
-//
-// When we "bctr" to the opcode-specific reoutine, the following are all set up:
-//		MSR = EE and IR off, SF and FP on
-//		r12 = full 64-bit EA (r17 is clamped EA)
-//		r13 = save-area pointer (physical)
-//		r14 = ptr to saver0 in save-area (ie, to base of GPRs)
-//		r15 = 0x00000000FFFFFFFF if 32-bit mode fault, 0xFFFFFFFFFFFFFFFF if 64
-//		r16 = RA * 8 (ie, reg# not reg value)
-//		r17 = EA, clamped to 32 bits if 32-bit mode fault (see also r12)
-//		r18 = (RA|0) (reg value)
-//		r19 = -1 if X-form, 0 if D-form
-//		r20 = faulting instruction
-//		r21 = RT * 8 (ie, reg# not reg value)
-//		r22 = addr(aaFPopTable)+(RT*32), ie ptr to floating pt table for target register
-//		r25 = MSR at entrance, probably with DR and RI set (for access to target space)
-//		r26 = MSR at entrance
-//		r27 = free
-//		r28 = SRR0 (ie, EA of faulting instruction)
-//		r29 = SRR1 (ie, MSR at fault)
-//		r30 = scratch, usually user data
-//		r31 = per-proc pointer
-//		cr2 = kTrace, kNotify, and kAlignment flags
-//      cr3 = saved copy of feature flags used in lowmem vector code
-//		cr6 = bits 24-27 of CR are bits 24-27 of opcode if X-form, or bits 4-5 and 00 if D-form
-//			  bit 25 is the kUpdate flag, set for update form instructions
-//		cr7 = bits 28-31 of CR are bits 28-31 of opcode if X-form, or 0 if D-form
-
-a64GotInstruction:					// here from program interrupt with instruction in r20
-        rlwinm	r21,r20,6+6,20,25	// move the primary opcode (bits 0-6) to bits 20-25
-        la		r14,saver0(r13)		// r14 <- base address of GPR registers
-        xori	r19,r21,0x07C0		// iff primary opcode is 31, set r19 to 0
-        rlwinm	r16,r20,16+3,24,28	// r16 <- RA*8
-        subi	r19,r19,1			// set bit 0 iff X-form (ie, if primary opcode is 31)
-        rlwinm	r17,r20,21+3,24,28	// r17 <- RB*8 (if X-form)
-        sradi	r19,r19,63			// r19 <- -1 if X-form, 0 if D-form
-        extsh	r22,r20				// r22 <- displacement (if D-form)
-
-        ldx		r23,r14,r17			// get (RB), if any
-        and		r15,r20,r19			// instruction if X, 0 if D
-        andc	r17,r21,r19			// primary opcode in bits 20-25 if D, 0 if X
-        ldx		r18,r14,r16			// get (RA)
-        subi	r24,r16,1			// set bit 0 iff RA==0
-        or		r21,r15,r17			// r21 <- instruction if X, or bits 0-5 in bits 20-25 if D
-        sradi	r24,r24,63			// r24 <- -1 if RA==0, 0 otherwise
-        rlwinm	r17,r21,32-4,25,28	// shift opcode bits 21-24 to 25-28 (hash "AAAA" bits)
-        lis		r10,ha16(a64BranchTable)	// start to build up branch table address
-        rlwimi	r17,r21,0,29,29		// move opcode bit 29 into hash as start of "B" bit
-        rlwinm	r30,r21,1,29,29		// position opcode bit 30 in position 29
-        and		r12,r23,r19			// RB if X-form, 0 if D-form
-        andc	r11,r22,r19			// 0 if X-form, sign extended displacement if D-form
-        xor		r17,r17,r30			// bit 29 ("B") of hash is xor(bit29,bit30)
-        addi	r10,r10,lo16(a64BranchTable)
-        or		r12,r12,r11			// r12 <- (RB) or displacement, as appropriate
-        lwzx	r30,r10,r17			// get address from branch table
-        mtcrf	0x01,r21			// move opcode bits 28-31 to CR7
-        sradi	r15,r29,32			// propogate SF bit from SRR1 (MSR_SF, which is bit 0)
-        andc	r18,r18,r24			// r18 <- (RA|0)
-        mtcrf	0x02,r21			// move opcode bits 24-27 to CR6 (kUpdate is bit 25)
-        add		r12,r18,r12			// r12 <- 64-bit EA
-        mtctr	r30					// set up branch address
-        
-        oris	r15,r15,0xFFFF		// start to fill low word of r15 with 1s
-        rlwinm	r21,r20,11+3,24,28	// r21 <- RT * 8
-        lis		r22,ha16(EXT(aaFPopTable))	// start to compute address of floating pt table
-        ori		r15,r15,0xFFFF		// now bits 32-63 of r15 are 1s
-        addi	r22,r22,lo16(EXT(aaFPopTable))
-        and		r17,r12,r15			// clamp EA to 32 bits if fault occured in 32-bit mode
-        rlwimi	r22,r21,2,22,26		// move RT into aaFPopTable address (which is 1KB aligned)
-        
-        bf--	kAlignment,a64HandleProgramInt	// return to Program Interrupt handler
-        bctr						// if alignment interrupt, jump to opcode-specific routine
-        
-        
-// Floating-pt load single (lfs[u], lfsx[u])
-
-a64LfsLfsx:
-        bl		a64Load4Bytes		// get data in r30
-        mtctr	r22					// set up address of "lfs fRT,emfp0(r31)"
-        stw		r30,emfp0(r31)		// put word here for aaFPopTable routine
-        bctrl						// do the lfs
-        b		a64UpdateCheck		// update RA if necessary and exit
-        
-        
-// Floating-pt store single (stfs[u], stfsx[u])
-
-a64StfsStfsx:
-        ori		r22,r22,8			// set dir==1 (ie, single store) in aaFPopTable
-        mtctr	r22					// set up address of "stfs fRT,emfp0(r31)"
-        bctrl						// execute the store into emfp0
-        lwz		r30,emfp0(r31)		// get the word
-        bl		a64Store4Bytes		// store r30 into user space
-        b		a64UpdateCheck		// update RA if necessary and exit
-        
-
-// Floating-pt store as integer word (stfiwx)
-
-a64Stfiwx:
-        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable
-        mtctr	r22					// set up FP register table address
-        bctrl						// double precision store into emfp0
-        lwz		r30,emfp0+4(r31)	// get the low-order word
-        bl		a64Store4Bytes		// store r30 into user space
-        b		a64Exit				// successfully emulated
-        
-
-// Floating-pt load double (lfd[u], lfdx[u])
-
-a64LfdLfdx:
-        ori		r22,r22,16			// set Double bit in aaFPopTable address
-        bl		a64Load8Bytes		// get data in r30
-        mtctr	r22					// set up address of "lfd fRT,emfp0(r31)"
-        std		r30,emfp0(r31)		// put doubleword here for aaFPopTable routine
-        bctrl						// execute the load
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Floating-pt store double (stfd[u], stfdx[u])
-
-a64StfdStfdx:
-        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable address
-        mtctr	r22					// address of routine to stfd RT
-        bctrl						// store into emfp0
-        ld		r30,emfp0(r31)		// get the doubleword
-        bl		a64Store8Bytes		// store r30 into user space
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Load halfword w 0-fill (lhz[u], lhzx[u])
-
-a64LhzLhzx:
-        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
-        stdx	r30,r14,r21			// store into RT slot in register file
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Load halfword w sign fill (lha[u], lhax[u])
-
-a64LhaLhax:
-        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
-        extsh	r30,r30				// sign-extend
-        stdx	r30,r14,r21			// store into RT slot in register file
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Load halfword byte reversed (lhbrx)
-
-a64Lhbrx:
-        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
-        rlwinm	r3,r30,8,16,23		// reverse bytes into r3
-        rlwimi	r3,r30,24,24,31
-        stdx	r3,r14,r21			// store into RT slot in register file
-        b		a64Exit				// successfully emulated
-
-
-// Store halfword (sth[u], sthx[u])
-
-a64SthSthx:
-        ldx		r30,r14,r21			// get RT
-        bl		a64Store2Bytes		// store r30 into user space
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Store halfword byte reversed (sthbrx)
-
-a64Sthbrx:
-        addi	r21,r21,6			// point to low two bytes of RT
-        lhbrx	r30,r14,r21			// load and reverse
-        bl		a64Store2Bytes		// store r30 into user space
-        b		a64Exit				// successfully emulated
-
-
-// Load word w 0-fill (lwz[u], lwzx[u]), also lwarx.
-
-a64LwzLwzxLwarx:
-        andc	r3,r19,r20			// light bit 30 of r3 iff lwarx
-        andi.	r0,r3,2				// is it lwarx?
-        bne--	a64PassAlong		// yes, never try to emulate a lwarx
-        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
-        stdx	r30,r14,r21			// update register file
-        b		a64UpdateCheck		// update RA if necessary and exit
-        
-        
-// Load word w sign fill (lwa, lwax[u])
-
-a64Lwa:
-        crclr	kUpdate				// no update form of lwa (its a reserved encoding)
-a64Lwax:
-        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
-        extsw	r30,r30				// sign extend
-        stdx	r30,r14,r21			// update register file
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Load word byte reversed (lwbrx)
-
-a64Lwbrx:
-        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
-        rlwinm	r3,r30,24,0,31		// flip bytes 1234 to 4123
-        rlwimi	r3,r30,8,8,15		// r3 is now 4323
-        rlwimi	r3,r30,8,24,31		// r3 is now 4321
-        stdx	r3,r14,r21			// update register file
-        b		a64Exit				// successfully emulated
-
-        
-// Store word (stw[u], stwx[u])
-
-a64StwStwx:
-        ldx		r30,r14,r21			// get RT
-        bl		a64Store4Bytes		// store r30 into user space
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Store word byte reversed (stwbrx)
-
-a64Stwbrx:
-        addi	r21,r21,4			// point to low word of RT
-        lwbrx	r30,r14,r21			// load and reverse
-        bl		a64Store4Bytes		// store r30 into user space
-        b		a64Exit				// successfully emulated
-
-
-// Load doubleword (ld[u], ldx[u]), also lwa.
-
-a64LdLwa:							// these are DS form: ld=0, ldu=1, and lwa=2
-        mtcrf	0x01,r20			// move DS field to cr7
-        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
-        sub		r12,r12,r3			// subtract from full 64-bit EA
-        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
-        bt		30,a64Lwa			// handle lwa
-        crmove	kUpdate,31			// if opcode bit 31 is set, it is ldu so set update flag
-a64Ldx:
-        bl		a64Load8Bytes		// load 8 bytes from user space into r30
-        stdx	r30,r14,r21			// update register file
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Store doubleword (stdx[u], std[u], stwcx)
-
-a64StdxStwcx:
-        bf--	30,a64PassAlong		// stwcx, so pass along alignment exception
-        b		a64Stdx				// was stdx
-a64StdStfiwx:						// if DS form: 0=std, 1=stdu, 2-3=undefined
-        bt		30,a64Stfiwx		// handle stfiwx
-        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
-        mtcrf	0x01,r20			// move DS field to cr7
-        sub		r12,r12,r3			// subtract from full 64-bit EA
-        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
-        crmove	kUpdate,31			// if DS==1, then it is update form
-a64Stdx:
-        ldx		r30,r14,r21			// get RT
-        bl		a64Store8Bytes		// store RT into user space
-        b		a64UpdateCheck		// update RA if necessary and exit
-
-
-// Dcbz and Dcbz128 (bit 10 distinguishes the two forms)
-
-a64DcbzDcbz128:
-        andis.	r0,r20,0x0020		// bit 10 set?
-        li		r3,0				// get a 0 to store
-        li		r0,4				// assume 32-bit version, store 8 bytes 4x
-        rldicr	r17,r17,0,63-5		// 32-byte align EA
-		li		r4,_COMM_PAGE_BASE_ADDRESS
-        beq		a64DcbzSetup		// it was the 32-byte version
-        rldicr	r17,r17,0,63-7		// zero low 7 bits of EA
-        li		r0,16				// store 8 bytes 16x
-a64DcbzSetup:
-		sub		r4,r28,r4			// get instruction offset from start of commpage
-        and		r4,r4,r15			// mask off high-order bits if 32-bit mode
-		cmpldi  r4,_COMM_PAGE_AREA_USED // did fault occur in commpage area?
-        bge		a64NotCommpage		// not in commpage
-        rlwinm.	r4,r29,0,MSR_PR_BIT,MSR_PR_BIT	// did fault occur in user mode?
-        beq--	a64NotCommpage		// do not zero cr7 if kernel got alignment exception
-        lwz		r4,savecr(r13)		// if we take a dcbz{128} in the commpage...
-        rlwinm	r4,r4,0,0,27		// ...clear user's cr7...
-        stw		r4,savecr(r13)		// ...as a flag for commpage code
-a64NotCommpage:
-        mtctr	r0
-        cmpw	r0,r0				// turn cr0 beq on so we can check for DSIs
-        mtmsr	r25					// turn on DR and RI so we can address user space
-        isync						// wait for it to happen
-a64DcbzLoop:
-        std		r3,0(r17)			// store into user space
-        bne--	a64RedriveAsDSI
-        addi	r17,r17,8
-        bdnz	a64DcbzLoop
-        
-        mtmsr	r26					// restore MSR
-        isync						// wait for it to happen
-        b		a64Exit
-
-
-// Load and store multiple (lmw, stmw), distinguished by bit 25
-
-a64LmwStmw:
-        subfic	r22,r21,32*8		// how many regs to load or store?
-        srwi	r22,r22,1			// get bytes to load/store
-        bf		25,a64LoadMultiple	// handle lmw
-        b		a64StoreMultiple	// it was stmw
-        
-        
-// Load string word immediate (lswi)
-
-a64Lswi:
-        rlwinm	r22,r20,21,27,31	// get #bytes in r22
-        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
-        subi	r3,r22,1			// r22==0?
-        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
-        b		a64LoadMultiple
-        
-        
-// Store string word immediate (stswi)
-
-a64Stswi:
-        rlwinm	r22,r20,21,27,31	// get #bytes in r22
-        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
-        subi	r3,r22,1			// r22==0?
-        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
-        b		a64StoreMultiple
-        
-        
-// Load string word indexed (lswx), also lwbrx
-
-a64LswxLwbrx:
-        bf		30,a64Lwbrx			// was lwbrx
-        ld		r22,savexer(r13)	// get the xer
-        rlwinm	r22,r22,0,25,31		// isolate the byte count
-        b		a64LoadMultiple		// join common code
-        
-        
-// Store string word indexed (stswx), also stwbrx
-
-a64StswxStwbrx:
-        bf		30,a64Stwbrx		// was stwbrx
-        ld		r22,savexer(r13)	// get the xer
-        rlwinm	r22,r22,0,25,31		// isolate the byte count
-        b		a64StoreMultiple	// join common code
-
-
-// Load multiple words.  This handles lmw, lswi, and lswx.
-
-a64LoadMultiple:					// r22 = byte count, may be 0
-        subic.	r3,r22,1			// get (#bytes-1)
-        blt		a64Exit				// done if 0
-        add		r4,r17,r3			// get EA of last operand byte
-        and		r4,r4,r15			// clamp
-        cmpld	r4,r17				// address space wrap?
-        blt--	a64PassAlong		// pass along exception if so
-        srwi.	r4,r22,2			// get # full words to load
-        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
-        cmpwi	cr1,r22,0			// leftover bytes?
-        beq		a64Lm3				// no words
-        mtctr	r4					// set up word count
-        cmpw	r0,r0				// set beq for DSI test
-a64Lm2:
-        mtmsr	r25					// turn on DR and RI
-        isync						// wait for it to happen
-        lbz		r3,0(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r4,1(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r5,2(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r6,3(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        rlwinm	r30,r3,24,0,7		// pack bytes into r30
-        rldimi	r30,r4,16,40
-        rldimi	r30,r5,8,48
-        rldimi	r30,r6,0,56
-        mtmsr	r26					// turn DR back off so we can store into register file
-        isync
-        addi	r17,r17,4			// bump EA
-        stdx	r30,r14,r21			// pack into register file
-        addi	r21,r21,8			// bump register file offset
-        rlwinm	r21,r21,0,24,28		// wrap around to 0
-        bdnz	a64Lm2
-a64Lm3:								// cr1/r22 = leftover bytes (0-3), cr0 beq set
-        beq		cr1,a64Exit			// no leftover bytes
-        mtctr	r22
-        mtmsr	r25					// turn on DR so we can access user space
-        isync
-        lbz		r3,0(r17)			// get 1st leftover byte
-        bne--	a64RedriveAsDSI		// got a DSI
-        rlwinm	r30,r3,24,0,7		// position in byte 4 of r30 (and clear rest of r30)
-        bdz		a64Lm4				// only 1 byte leftover
-        lbz		r3,1(r17)			// get 2nd byte
-        bne--	a64RedriveAsDSI		// got a DSI
-        rldimi	r30,r3,16,40		// insert into byte 5 of r30
-        bdz		a64Lm4				// only 2 bytes leftover
-        lbz		r3,2(r17)			// get 3rd byte
-        bne--	a64RedriveAsDSI		// got a DSI
-        rldimi	r30,r3,8,48			// insert into byte 6
-a64Lm4:
-        mtmsr	r26					// turn DR back off so we can store into register file
-        isync
-        stdx	r30,r14,r21			// pack partially-filled word into register file
-        b		a64Exit
-
-
-// Store multiple words.  This handles stmw, stswi, and stswx.
-
-a64StoreMultiple:					// r22 = byte count, may be 0
-        subic.	r3,r22,1			// get (#bytes-1)
-        blt		a64Exit				// done if 0
-        add		r4,r17,r3			// get EA of last operand byte
-        and		r4,r4,r15			// clamp
-        cmpld	r4,r17				// address space wrap?
-        blt--	a64PassAlong		// pass along exception if so
-        srwi.	r4,r22,2			// get # full words to load
-        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
-        cmpwi	cr1,r22,0			// leftover bytes?
-        beq		a64Sm3				// no words
-        mtctr	r4					// set up word count
-        cmpw	r0,r0				// turn on beq so we can check for DSIs
-a64Sm2:
-        ldx		r30,r14,r21			// get next register
-        addi	r21,r21,8			// bump register file offset
-        rlwinm	r21,r21,0,24,28		// wrap around to 0
-        srwi	r3,r30,24			// shift the four bytes into position
-        srwi	r4,r30,16
-        srwi	r5,r30,8
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        stb		r3,0(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r4,1(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r5,2(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r30,3(r17)
-        bne--	a64RedriveAsDSI		// got a DSI
-        mtmsr	r26					// turn DR back off
-        isync
-        addi	r17,r17,4			// bump EA
-        bdnz	a64Sm2
-a64Sm3:								// r22 = 0-3, cr1 set on r22, cr0 beq set
-        beq		cr1,a64Exit			// no leftover bytes
-        ldx		r30,r14,r21			// get last register
-        mtctr	r22
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-a64Sm4:
-        rlwinm	r30,r30,8,0,31		// position next byte
-        stb		r30,0(r17)			// pack into user space
-        addi	r17,r17,1			// bump user space ptr
-        bne--	a64RedriveAsDSI		// got a DSI
-        bdnz	a64Sm4
-        mtmsr	r26					// turn DR back off
-        isync
-        b		a64Exit
-
-
-// Subroutines to load bytes from user space.
-
-a64Load2Bytes:						// load 2 bytes right-justified into r30
-        addi	r7,r17,1			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        sub.	r30,r30,r30			// 0-fill dest and set beq
-        b		a64Load2			// jump into routine
-a64Load4Bytes:						// load 4 bytes right-justified into r30 (ie, low order word)
-        addi	r7,r17,3			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        sub.	r30,r30,r30			// 0-fill dest and set beq
-        b		a64Load4			// jump into routine
-a64Load8Bytes:						// load 8 bytes into r30
-        addi	r7,r17,7			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        sub.	r30,r30,r30			// 0-fill dest and set beq
-        lbz		r3,-7(r7)			// get byte 0
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r4,-6(r7)			// and byte 1, etc
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r5,-5(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r6,-4(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        rldimi	r30,r3,56,0			// position bytes in upper word
-        rldimi	r30,r4,48,8
-        rldimi	r30,r5,40,16
-        rldimi	r30,r6,32,24
-a64Load4:
-        lbz		r3,-3(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r4,-2(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        rldimi	r30,r3,24,32		// insert bytes 4 and 5 into r30
-        rldimi	r30,r4,16,40
-a64Load2:
-        lbz		r3,-1(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        lbz		r4,0(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        mtmsr	r26					// turn DR back off
-        isync
-        rldimi	r30,r3,8,48			// insert bytes 6 and 7 into r30
-        rldimi	r30,r4,0,56
-        blr
-        
-        
-// Subroutines to store bytes into user space.
-
-a64Store2Bytes:						// store bytes 6 and 7 of r30
-        addi	r7,r17,1			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        cmpw	r0,r0				// set beq so we can check for DSI
-        b		a64Store2			// jump into routine
-a64Store4Bytes:						// store bytes 4-7 of r30 (ie, low order word)
-        addi	r7,r17,3			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        cmpw	r0,r0				// set beq so we can check for DSI
-        b		a64Store4			// jump into routine
-a64Store8Bytes:						// r30 = bytes
-        addi	r7,r17,7			// get EA of last byte
-        and		r7,r7,r15			// clamp
-        cmpld	r7,r17				// address wrap?
-        blt--	a64PassAlong		// yes
-        mtmsr	r25					// turn on DR so we can access user space
-        isync						// wait for it to happen
-        cmpw	r0,r0				// set beq so we can check for DSI
-        rotldi	r3,r30,8			// shift byte 0 into position
-        rotldi	r4,r30,16			// and byte 1
-        rotldi	r5,r30,24			// and byte 2
-        rotldi	r6,r30,32			// and byte 3
-        stb		r3,-7(r7)			// store byte 0
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r4,-6(r7)			// and byte 1 etc...
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r5,-5(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r6,-4(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-a64Store4:
-        rotldi	r3,r30,40			// shift byte 4 into position
-        rotldi	r4,r30,48			// and byte 5
-        stb		r3,-3(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r4,-2(r7)
-        bne--	a64RedriveAsDSI		// got a DSI
-a64Store2:
-        rotldi	r3,r30,56			// shift byte 6 into position
-        stb		r3,-1(r7)			// store byte 6
-        bne--	a64RedriveAsDSI		// got a DSI
-        stb		r30,0(r7)			// store byte 7, which is already positioned
-        bne--	a64RedriveAsDSI		// got a DSI
-        mtmsr	r26					// turn off DR
-        isync
-        blr
-        
-                
-// Exit routines.
-
-a64ExitEm:
-		li		r30,T_EMULATE			// Change exception code to emulate
-		stw		r30,saveexception(r13)	// Save it
-		b		a64Exit					// Join standard exit routine...
-
-a64PassAlong:							// unhandled exception, just pass it along
-        li		r0,1					// Set that the alignment/program exception was not emulated
-        crset	kNotify					// return T_ALIGNMENT or T_PROGRAM
-		stw		r0,savemisc3(r13)		// Set that emulation was not done
-        crclr	kTrace					// not a trace interrupt
-        b		a64Exit1
-a64UpdateCheck:							// successfully emulated, may be update form
-        bf		kUpdate,a64Exit			// update?
-        stdx	r12,r14,r16				// yes, store 64-bit EA into RA
-a64Exit:								// instruction successfully emulated
-        addi	r28,r28,4				// bump SRR0 past the emulated instruction
-        li		r30,T_IN_VAIN			// eat the interrupt since we emulated it
-        and		r28,r28,r15				// clamp to address space size (32 vs 64)
-        std		r28,savesrr0(r13)		// save, so we return to next instruction
-a64Exit1:
-        bt--	kTrace,a64Trace			// were we in single-step at fault?
-        bt--	kNotify,a64Notify		// should we say T_ALIGNMENT anyway?
-a64Exit2:
-        mcrf	cr6,cr3					// restore feature flags
-        mr		r11,r30					// pass back exception code (T_IN_VAIN etc) in r11
-        b		EXT(EmulExit)			// return to exception processing
-
-
-// Notification requested: pass exception upstairs even though it might have been emulated.
-
-a64Notify:
-        li		r30,T_ALIGNMENT			// somebody wants to know about it (but don't redrive)
-        bt		kAlignment,a64Exit2		// was an alignment exception
-        li		r30,T_PROGRAM			// was an emulated instruction
-        b		a64Exit2
-
-
-// Emulate a trace interrupt after handling alignment interrupt.
-
-a64Trace:
-        lwz		r9,SAVflags(r13)		// get the save-area flags
-        li		r30,T_TRACE
-        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
-        stw		r30,saveexception(r13)	// Set the exception code
-        stw		r9,SAVflags(r13)		// Set the flags
-        b		a64Exit2				// Exit and do trace interrupt...
-
-
-// Got a DSI accessing user space.  Redrive.  One way this can happen is if another
-// processor removes a mapping while we are emulating.
-
-a64RedriveAsISI:						// this DSI happened fetching the opcode (r1==DSISR  r4==DAR)
-        mtmsr	r26						// turn DR back off
-        isync							// wait for it to happen
-        li		r30,T_INSTRUCTION_ACCESS
-        rlwimi	r29,r1,0,1,4			// insert the fault type from DSI's DSISR
-        std		r29,savesrr1(r13)		// update SRR1 to look like an ISI
-        b		a64Redrive
-
-a64RedriveAsDSI:						// r0==DAR  r1==DSISR
-        mtmsr	r26						// turn DR back off
-        isync							// wait for it to happen
-        stw		r1,savedsisr(r13)		// Set the DSISR of failed access
-        std		r0,savedar(r13)			// Set the address of the failed access
-        li		r30,T_DATA_ACCESS		// Set failing data access code
-a64Redrive:
-        lwz		r9,SAVflags(r13)		// Pick up the flags
-        stw		r30,saveexception(r13)	// Set the replacement code
-        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
-        stw		r9,SAVflags(r13)		// Set redrive request
-        crclr	kTrace					// don't take a trace interrupt
-        crclr	kNotify					// don't pass alignment exception
-        b		a64Exit2				// done
-        
-
-// This is the branch table, indexed by the "AAAAB" opcode hash.
-
-a64BranchTable:
-        .long	a64LwzLwzxLwarx		// 00000  lwz[u], lwzx[u], lwarx
-        .long	a64Ldx				// 00001  ldx[u]
-        .long	a64PassAlong		// 00010  ldarx 	(never emulate these)
-        .long	a64PassAlong		// 00011
-        .long	a64StwStwx			// 00100  stw[u], stwx[u]
-        .long	a64StdxStwcx		// 00101  stdx[u], stwcx
-        .long	a64PassAlong		// 00110
-        .long	a64PassAlong		// 00111  stdcx		(never emulate these)
-        .long	a64LhzLhzx			// 01000  lhz[u], lhzx[u]
-        .long	a64PassAlong		// 01001
-        .long	a64LhaLhax			// 01010  lha[u], lhax[u]
-        .long	a64Lwax				// 01011  lwax[u]
-        .long	a64SthSthx			// 01100  sth[u], sthx[u]
-        .long	a64PassAlong		// 01101
-        .long	a64LmwStmw			// 01110  lmw, stmw
-        .long	a64PassAlong		// 01111
-        .long	a64LfsLfsx			// 10000  lfs[u], lfsx[u]
-        .long	a64LswxLwbrx		// 10001  lswx, lwbrx
-        .long	a64LfdLfdx			// 10010  lfd[u], lfdx[u]
-        .long	a64Lswi				// 10011  lswi
-        .long	a64StfsStfsx		// 10100  stfs[u], stfsx[u]
-        .long	a64StswxStwbrx		// 10101  stswx, stwbrx
-        .long	a64StfdStfdx		// 10110  stfd[u], stfdx[u]
-        .long	a64Stswi			// 10111  stswi
-        .long	a64PassAlong		// 11000
-        .long	a64Lhbrx			// 11001  lhbrx
-        .long	a64LdLwa			// 11010  ld[u], lwa
-        .long	a64PassAlong		// 11011
-        .long	a64PassAlong		// 11100
-        .long	a64Sthbrx			// 11101  sthbrx
-        .long	a64StdStfiwx		// 11110  std[u], stfiwx
-        .long	a64DcbzDcbz128		// 11111  dcbz, dcbz128
-
-
diff --git a/osfmk/ppc/Firmware.h b/osfmk/ppc/Firmware.h
deleted file mode 100644
index c0a57f6f8..000000000
--- a/osfmk/ppc/Firmware.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-/*
- * Here be the firmware's public interfaces
- * Lovingly crafted by Bill Angell using traditional methods
-*/
-
-#ifndef _FIRMWARE_H_
-#define _FIRMWARE_H_
-
-#ifndef __ppc__
-#error This file is only useful on PowerPC.
-#endif
-
-#include <mach/vm_types.h>
-#include <ppc/Diagnostics.h>
-
-/*
- *	This routine is used to write debug output to either the modem or printer port.
- *	parm 1 is printer (0) or modem (1); parm 2 is ID (printed directly); parm 3 converted to hex
- */
-
-void dbgDisp(unsigned int port, unsigned int id, unsigned int data);
-void dbgLog(unsigned int d0, unsigned int d1, unsigned int d2, unsigned int d3);
-void dbgLog2(unsigned int type, unsigned int p1, unsigned int p2);
-void dbgDispLL(unsigned int port, unsigned int id, unsigned int data);
-void fwSCCinit(unsigned int port);
-void fwEmMck(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int);	/* Start injecting */ 
-void fwSCOM(scomcomm *);	/* Read/Write SCOM */ 
-void setPmon(unsigned int, unsigned int);	/* Set perf mon stuff */ 
-
-extern void dbgTrace(unsigned int id, unsigned int item1, unsigned int item2, unsigned int item3, unsigned int item4);
-#if 0		/* (TEST/DEBUG) - eliminate inline */
-extern __inline__ void dbgTrace(unsigned int id, unsigned int item1, unsigned int item2, unsigned int item3, unsigned int item4) {
- 
- 		__asm__ volatile("mr   r2,%0" : : "r" (id) : "r2");
- 		__asm__ volatile("mr   r3,%0" : : "r" (item1) : "r3");
- 		__asm__ volatile("mr   r4,%0" : : "r" (item2) : "r4");
- 		__asm__ volatile("mr   r5,%0" : : "r" (item3) : "r5");
- 		__asm__ volatile("mr   r6,%0" : : "r" (item3) : "r6");
-        __asm__ volatile("lis  r0,hi16(CutTrace)" : : : "r0");
-        __asm__ volatile("ori  r0,r0,lo16(CutTrace)" : : : "r0");
-        __asm__ volatile("sc");
-		return;
-}
-#endif
-
-extern void DoPreempt(void);
-extern __inline__ void DoPreempt(void) {
-        __asm__ volatile("lis  r0,hi16(DoPreemptCall)" : : : "r0");
-        __asm__ volatile("ori  r0,r0,lo16(DoPreemptCall)" : : : "r0");
-        __asm__ volatile("sc");
-		return;
-}
-
-extern void CreateFakeIO(void);
-extern __inline__ void CreateFakeIO(void) {
-		__asm__ volatile("lis  r0,hi16(CreateFakeIOCall)" : : : "r0");
-		__asm__ volatile("ori  r0,r0,lo16(CreateFakeIOCall)" : : : "r0");
-		__asm__ volatile("sc");
-		return;
-}
-
-extern void CreateFakeDEC(void);
-extern __inline__ void CreateFakeDEC(void) {
-        __asm__ volatile("lis  r0,hi16(CreateFakeDECCall)" : : : "r0");
-        __asm__ volatile("ori  r0,r0,lo16(CreateFakeDECCall)" : : : "r0");
-		__asm__ volatile("sc");
-		return;
-}
-
-extern void CreateShutdownCTX(void);
-extern __inline__ void CreateShutdownCTX(void) {
-        __asm__ volatile("lis  r0,hi16(CreateShutdownCTXCall)" : : : "r0");
-        __asm__ volatile("ori  r0,r0,lo16(CreateShutdownCTXCall)" : : : "r0");
-		__asm__ volatile("sc");
-		return;
-}
-
-extern void ChokeSys(unsigned int ercd);
-extern __inline__ void ChokeSys(unsigned int ercd) {
- 		__asm__ volatile("mr   r3,%0" : : "r" (ercd) : "r3");
-        __asm__ volatile("lis  r0,hi16(Choke)" : : : "r0");
-        __asm__ volatile("ori  r0,r0,lo16(Choke)" : : : "r0");
-		__asm__ volatile("sc");
-		return;
-}
-
-typedef struct Boot_Video bootBumbleC;
-
-extern void StoreReal(unsigned int val, unsigned int addr);
-extern void ReadReal(addr64_t raddr, unsigned int *vaddr);
-extern void ClearReal(unsigned int addr, unsigned int lgn);
-extern void LoadDBATs(unsigned int *bat);
-extern void LoadIBATs(unsigned int *bat);
-extern void stFloat(unsigned int *addr);
-extern int stVectors(unsigned int *addr);
-extern int stSpecrs(unsigned int *addr);
-extern unsigned int LLTraceSet(unsigned int tflags);
-extern void GratefulDebInit(bootBumbleC *boot_video_info);
-extern void GratefulDebDisp(unsigned int coord, unsigned int data);
-extern void checkNMI(void);
-
-#pragma pack(4)						/* Make sure the structure stays as we defined it */
-typedef struct GDWorkArea {			/* Grateful Deb work area one per processor */
-
-/*	Note that a lot of info is duplicated for each processor */
-
-	unsigned int GDsave[32];		/* Save area for registers */
-	
-	unsigned int GDfp0[2];
-	unsigned int GDfp1[2];
-	unsigned int GDfp2[2];
-	unsigned int GDfp3[2];
-	
-	unsigned int GDtop;				/* Top pixel of CPU's window */
-	unsigned int GDleft;			/* Left pixel of CPU's window */
-	unsigned int GDtopleft;			/* Physical address of top left in frame buffer */
-	unsigned int GDrowbytes;		/* Bytes per row */
-	unsigned int GDrowchar;			/* Bytes per row of characters plus leading */
-	unsigned int GDdepth;			/* Bits per pixel */
-	unsigned int GDcollgn;			/* Column width in bytes */
-	unsigned int GDready;			/* We are ready to go */
-	unsigned int GDfiller[16];		/* Fill it up to a 256 byte boundary */
-	
-	unsigned int GDrowbuf1[128];	/* Buffer to an 8 character row */
-	unsigned int GDrowbuf2[128];	/* Buffer to an 8 character row */
-
-} GDWorkArea;
-#pragma pack()
-#define GDfontsize 16
-#define GDdispcols 2
-
-#endif /* _FIRMWARE_H_ */
diff --git a/osfmk/ppc/Firmware.s b/osfmk/ppc/Firmware.s
deleted file mode 100644
index d5f687f34..000000000
--- a/osfmk/ppc/Firmware.s
+++ /dev/null
@@ -1,2517 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-	
-/* 																							
- 	Firmware.s 
-
-	Handle things that should be treated as an extension of the hardware
-
-	Lovingly crafted by Bill Angell using traditional methods and only natural or recycled materials.
-	No animal products are used other than rendered otter bile and deep fried pork lard.
-
-*/
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/spec_reg.h>
-#include <ppc/exception.h>
-#include <mach/machine/vm_param.h>
-#include <assym.s>
-
-
-/*
- *			Here we generate the table of supported firmware calls 
- */
- 
-
-	
-			.data
-			.align	5								/* Line up on cache line */
-			
-			.globl	EXT(FWtable)
-
-EXT(FWtable):
-
-			.globl	CutTrace						/* Let everyone know 'bout it */
-			.set	CutTrace,(.-EXT(FWtable))/4|0x80000000	/* Call number for CutTrace */
-			.long	callUnimp						/* This was already handled in lowmem_vectors */
-
-#include	<ppc/FirmwareCalls.h>
- 
-			.set	EXT(FirmwareCnt), (.-EXT(FWtable))/4	/* Get the top number */
-
-			.text
-			
-#define SIMPLESCC 1
-#define NOTQUITEASSIMPLE 1
-/*
- *			This routine handles the firmware call routine. It must be entered with IR and DR off,
- *			interruptions disabled, and in supervisor state. 
- *
- *			When we enter, we expect R0 to have call number, and LR
- *			to point to the return.  Also, all registers saved in savearea in R13.
- *			R3 is as passed in by the user.  All others must be gotten from the save area
- */
-
-
-			.align	5
-			.globl	EXT(FirmwareCall)
-
-LEXT(FirmwareCall)
-		
-			rlwinm	r1,r0,2,1,29					/* Clear out bit 0 and multiply by 4 */
-			lis		r12,HIGH_ADDR(EXT(FWtable))		/* Get the high part of the firmware call table */
-			cmplwi	r1,EXT(FirmwareCnt)*4			/* Is it a valid firmware call number */
-			ori		r12,r12,LOW_ADDR(EXT(FWtable))	/* Now the low part */
-			ble+	goodCall						/* Yeah, it is... */
-			
-			li		r3,T_SYSTEM_CALL				/* Tell the vector handler that we know nothing */
-			b		EXT(FCReturn)					; Bye dudes...
-			
-goodCall:	mfsprg	r10,0							/* Make sure about the per_proc block */
-			lwzx	r1,r1,r12						/* Pick up the address of the routine */
-			lwz		r4,saver4+4(r13)				/* Pass in caller's R4 */
-			lwz		r5,saver5+4(r13)				/* Pass in caller's R5 */
-			rlwinm.	r1,r1,0,0,29					/* Make sure the flag bits are clear */
-
-			mtlr	r1								/* Put it in the LR */
-			beq-	callUnimp						/* This one was unimplimented... */
-
-			blrl									/* Call the routine... */
-
-			stw		r3,saver3+4(r13)				/* Pass back the return code to caller */
-			li		r3,T_IN_VAIN					/* Tell the vector handler that we took care of it */
-			b		EXT(FCReturn)					; Bye dudes...
-	
-callUnimp:	li		r3,T_SYSTEM_CALL				/* Tell the vector handler that we know nothing */
-			b		EXT(FCReturn)					; Bye dudes...
-
-/*
- *			This routine is used to store using a real address. It stores parmeter1 at parameter2.
- */
-
-			.align	5
-			.globl	EXT(StoreReal)
-
-LEXT(StoreReal)
-
-			lis		r0,HIGH_ADDR(StoreRealCall)		/* Get the top part of the SC number */
-			ori		r0,r0,LOW_ADDR(StoreRealCall)	/* and the bottom part */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-			
-			.align	5
-			.globl	EXT(StoreRealLL)
-
-LEXT(StoreRealLL)
-
-			stw		r3,0(r4)						/* Store the word */
-			blr										/* Leave... */
-
-/*
- *			This routine is used to clear a range of physical pages.
- */
-			
-			.align	5
-			.globl	EXT(ClearReal)
-
-LEXT(ClearReal)
-
-			lis		r0,HIGH_ADDR(ClearRealCall)		/* Get the top part of the SC number */
-			ori		r0,r0,LOW_ADDR(ClearRealCall)	/* and the bottom part */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-			
-			
-			.align	5
-			.globl	EXT(ClearRealLL)
-
-LEXT(ClearRealLL)
-
-/*
- *			We take the first parameter as a physical address.  The second is the length in bytes.
- *			Being crazy, I'll round the address down, and the length up.  We could end up clearing
- *			an extra page at the start and one at the end, but we don't really care.  If someone
- *			is stupid enough to give me unaligned addresses and lengths, I am just arrogant enough
- *			to take them at their word and to hell with them.
- */
-
-			neg		r5,r3							/* Negate the address */ 
-			addi	r4,r4,4095						/* Round length up */
-			rlwinm	r5,r5,0,20,31					/* Save extra length */
-			rlwinm	r3,r3,0,0,19					/* Round the page on down */
-			add		r4,r4,r5						/* Add up all extra lengths */
-			li		r6,32							/* Get a displacement */
-			rlwinm	r4,r4,0,0,19					/* Round the length back down */
-			
-clrloop:	subi	r4,r4,32						/* Back off a cache line */
-			dcbz	0,r3							/* Do the even line */
-			sub.	r4,r4,r6						/* Back off a second time (we only do this to generate a CR */
-			dcbz	r6,r3							/* Clear the even line */
-			addi	r3,r3,64						/* Move up to every other line */
-			bgt+	clrloop							/* Go until we've done it all... */
-
-			blr										/* Leave... */
-/*
- *			This routine will read in 32 byte of real storage.
- */
- 			
-			.align	5
-			.globl	EXT(ReadReal)
-
-LEXT(ReadReal)
-
-			mfsprg	r9,2							; Get the features
-			mfmsr	r0								; Get the MSR 
-			li		r8,lo16(MASK(MSR_DR))			; Get the DR bit
-			rlwinm.	r9,r9,0,pf64Bitb,pf64Bitb		; Are we 64-bit?
-			ori		r8,r8,lo16(MASK(MSR_EE))		; Add in the EE bit
-			li		r7,1							; Get set for it
-			andc	r8,r0,r8						; Turn off EE and DR
-			bt--	cr0_eq,rr32a					; Yes, we are...
-			
-			rldimi	r8,r7,63,MSR_SF_BIT				; Set SF bit (bit 0)
-			sldi	r3,r3,32						; Slide on over for true 64-bit address
-			mtmsrd	r8
-			isync
-			or		r3,r3,r4						; Join top and bottom of address
-			mr		r4,r5							; Set destination address
-			b		rrJoina							; Join on up...
-			
-rr32a:		mr		r3,r4							; Position bottom of long long
-			mr		r4,r5							; Set destination address
-			mtmsr	r8								/* Disable EE and DR */
-			isync									/* Just make sure about it */
-			
-rrJoina:	lwz		r5,0(r3)						/* Get word 0 */
-			lwz		r6,4(r3)						/* Get word 1 */
-			lwz		r7,8(r3)						/* Get word 2 */
-			lwz		r8,12(r3)						/* Get word 3 */
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable 
-			lwz		r9,16(r3)						/* Get word 4 */
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get the FP enable 
-			lwz		r10,20(r3)						/* Get word 5 */
-			andc	r0,r0,r2						; Clear VEC and FP enables
-			lwz		r11,24(r3)						/* Get word 6 */
-			lwz		r12,28(r3)						/* Get word 7 */
-			
-			bt--	cr0_eq,rr32b					; We are not 64-bit...
-
-			mtmsrd	r0
-			isync
-			b		rrJoinb							; Join on up...
-
-rr32b:		mtmsr	r0								/* Restore original machine state */
-			isync									/* Insure goodness */
-			
-rrJoinb:	stw		r5,0(r4)						/* Set word 0 */
-			stw		r6,4(r4)						/* Set word 1 */
-			stw		r7,8(r4)						/* Set word 2 */
-			stw		r8,12(r4)						/* Set word 3 */
-			stw		r9,16(r4)						/* Set word 4 */
-			stw		r10,20(r4)						/* Set word 5 */
-			stw		r11,24(r4)						/* Set word 6 */
-			stw		r12,28(r4)						/* Set word 7 */
-			
-			blr
-			
-
-/*
- *			This routine is used to load all 4 DBATs.
- */
- 			
-			.align	5
-			.globl	EXT(LoadDBATs)
-
-LEXT(LoadDBATs)
-
-
-			lis		r0,HIGH_ADDR(LoadDBATsCall)		/* Top half of LoadDBATsCall firmware call number */
-			ori		r0,r0,LOW_ADDR(LoadDBATsCall)	/* Bottom half */
-			sc										/* Do it to it */
-
-			blr										/* Bye bye, Birdie... */
-			
- 			
-			.align	5
-			.globl	EXT(xLoadDBATsLL)
-
-LEXT(xLoadDBATsLL)
-
-			lwz		r4,0(r3)						/* Get DBAT 0 high */
-			lwz		r5,4(r3)						/* Get DBAT 0 low */
-			lwz		r6,8(r3)						/* Get DBAT 1 high */
-			lwz		r7,12(r3)						/* Get DBAT 1 low */
-			lwz		r8,16(r3)						/* Get DBAT 2 high */
-			lwz		r9,20(r3)						/* Get DBAT 2 low */
-			lwz		r10,24(r3)						/* Get DBAT 3 high */
-			lwz		r11,28(r3)						/* Get DBAT 3 low */
-			
-			sync									/* Common decency and the state law require that you wash your hands */
-			mtdbatu	0,r4							/* Load DBAT 0 high */
-			mtdbatl	0,r5							/* Load DBAT 0 low */
-			mtdbatu	1,r6							/* Load DBAT 1 high */
-			mtdbatl	1,r7							/* Load DBAT 1 low */
-			mtdbatu	2,r8							/* Load DBAT 2 high */
-			mtdbatl	2,r9							/* Load DBAT 2 low */
-			mtdbatu	3,r10							/* Load DBAT 3 high */
-			mtdbatl	3,r11							/* Load DBAT 3 low */
-			sync									/* Make sure it's done */
-			isync									/* Toss out anything new */
-
-			blr										/* Leave... */
-
-/*
- *			This routine is used to load all 4 IBATs.
- */
-
-			.align	5
-			.globl	EXT(LoadIBATs)
-
-LEXT(LoadIBATs)
-
-
-			lis		r0,HIGH_ADDR(LoadIBATsCall)		/* Top half of LoadIBATsCall firmware call number */
-			ori		r0,r0,LOW_ADDR(LoadIBATsCall)	/* Bottom half */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-			
-			.align	5
-			.globl	EXT(xLoadIBATsLL)
-
-LEXT(xLoadIBATsLL)
-
-			lwz		r4,0(r3)						/* Get IBAT 0 high */
-			lwz		r5,4(r3)						/* Get IBAT 0 low */
-			lwz		r6,8(r3)						/* Get IBAT 1 high */
-			lwz		r7,12(r3)						/* Get IBAT 1 low */
-			lwz		r8,16(r3)						/* Get IBAT 2 high */
-			lwz		r9,20(r3)						/* Get IBAT 2 low */
-			lwz		r10,24(r3)						/* Get IBAT 3 high */
-			lwz		r11,28(r3)						/* Get IBAT 3 low */
-			
-			sync									/* Common decency and the state law require that you wash your hands */
-			mtibatu	0,r4							/* Load IBAT 0 high */
-			mtibatl	0,r5							/* Load IBAT 0 low */
-			mtibatu	1,r6							/* Load IBAT 1 high */
-			mtibatl	1,r7							/* Load IBAT 1 low */
-			mtibatu	2,r8							/* Load IBAT 2 high */
-			mtibatl	2,r9							/* Load IBAT 2 low */
-			mtibatu	3,r10							/* Load IBAT 3 high */
-			mtibatl	3,r11							/* Load IBAT 3 low */
-			sync									/* Make sure it's done */
-			isync									/* Toss out anything new */
-			
-			blr										/* Leave... */
-
-
-/*
- *			This is the glue to call the CutTrace firmware call
- *			dbgTrace(id, p1, p2, p3, p4)
- */
- 			
-			.align	5
-			.globl	EXT(dbgTrace)
-
-LEXT(dbgTrace)
-			
-			mr		r2,r3
-			mr		r3,r4
-			lis		r0,HIGH_ADDR(CutTrace)			/* Top half of CreateFakeIO firmware call number */
-			mr		r4,r5
-			mr		r5,r6
-			ori		r0,r0,LOW_ADDR(CutTrace)		/* Bottom half */
-			mr		r6,r7
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-
-/*
- *			This is the glue to create a fake I/O interruption
- */
-  			
-			.align	5
-			.globl	EXT(CreateFakeIO)
-
-LEXT(CreateFakeIO)
-			
-			lis		r0,HIGH_ADDR(CreateFakeIOCall)	/* Top half of CreateFakeIO firmware call number */
-			ori		r0,r0,LOW_ADDR(CreateFakeIOCall)	/* Bottom half */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-
-/*
- *			This is the glue to create a fake Dec interruption
- */
-   			
-			.align	5
-			.globl	EXT(CreateFakeDEC)
-
-LEXT(CreateFakeDEC)
-			
-#if 0
-			mflr	r4								; (TEST/DEBUG)
-			bl		EXT(ml_sense_nmi)				; (TEST/DEBUG)
-			mtlr	r4								; (TEST/DEBUG)
-#endif			
-			
-			lis		r0,HIGH_ADDR(CreateFakeDECCall)	/* Top half of CreateFakeDEC firmware call number */
-			ori		r0,r0,LOW_ADDR(CreateFakeDECCall)	/* Bottom half */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-
-
-/*
- *			This is the glue to create a shutdown context
- */
- 
-			.align	5
-			.globl	EXT(CreateShutdownCTX)
-
-LEXT(CreateShutdownCTX)
-			
-			lis		r0,HIGH_ADDR(CreateShutdownCTXCall)	/* Top half of CreateFakeIO firmware call number */
-			ori		r0,r0,LOW_ADDR(CreateShutdownCTXCall)	/* Bottom half */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-
-/*
- *			This is the glue to choke system
- */
-  
-			.align	5
-			.globl	EXT(ChokeSys)
-
-LEXT(ChokeSys)
-			
-			lis		r0,HIGH_ADDR(Choke)				/* Top half of Choke firmware call number */
-			ori		r0,r0,LOW_ADDR(Choke)			/* Bottom half */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-
-/* 
- *			Used to initialize the SCC for debugging output
- */
-
-  
-			.align	5
-			.globl	EXT(fwSCCinit)
-
-LEXT(fwSCCinit)
-		
-			mfmsr	r8										/* Save the MSR */
-			mr.		r3,r3									/* See if printer or modem */
-			rlwinm	r12,r8,0,28,25							/* Turn off translation */
-			lis		r10,0xF301								/* Set the top part */
-			rlwinm	r12,r12,0,17,15							/* Turn off interruptions */
-#if 0
-			mtmsr	r12										/* Smash the MSR */
-			isync											/* Make it clean */
-#endif
-
-			ori		r10,r10,0x2000							/* Assume the printer (this is the normal one) */
-			beq+	fwSCCprnt								/* It sure are... */
-			ori		r10,r10,0x0002							/* Move it over to the modem port */
-
-fwSCCprnt:	dcbf	0,r10									/* Insure it is out */
-			sync
-			eieio
-			dcbi	0,r10									/* Toss it */
-			sync
-
-
-			li		r7,0x09									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x80									/* Reset channel A */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x04									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x44									/* x16 clock, 1 stop bit */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x03									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0xC0									/* 8 bits per char */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x05									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0xE2									/* DTR mode, 8bit/char */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x02									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x00									/* Vector 0 */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0A									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x00									/* Clear misc controls */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0B									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x50									/* B/R gen T/R */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0C									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0A									/* 9600 baud low */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0D									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x00									/* 9600 baud high */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x03									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0xC1									/* 8 bits/char, Rx enable */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x05									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0xEA									/* 8 bits/char, Tx enable */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0E									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x01									/* BR rate gen enable */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0F									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x00									/* ints off */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x10									/* Reset ext/stat ints */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x10									/* Reset ext/stat ints */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x01									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x10									/* int on Rx, no Tx int enable */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x09									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x0A									/* int on Rx, Tx int enable */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Master enable, no vector */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x09									/* Set the register */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			li		r7,0x02									/* No vector */
-			stb		r7,0(r10)								/* Set the register */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Master enable, no vector */
-			dcbi	0,r10
-			eieio
-			
-			lbz		r7,0(r10)								/* Clear interrupts */
-			sync 											/* Master enable, no vector */
-			dcbi	0,r10
-			eieio
-
-wSCCrdy:	eieio											/* Barricade it */
-			lbz		r7,0(r10)								/* Get current status */
-			dcbi	0,r10
-			sync
-			andi.	r7,r7,0x04								/* Is transmitter empty? */
-			beq		wSCCrdy									/* Nope... */
-
-			eieio
-
-#if 0
-			mtmsr	r8										/* Restore 'rupts and TR */
-			isync
-#endif
-			blr												/* Leave... */
-
-/*
- *			This routine is used to write debug output to either the modem or printer port.
- *			parm 1 is printer (0) or modem (1); parm 2 is ID (printed directly); parm 3 converted to hex
- */
-  
-			.align	5
-			.globl	EXT(dbgDisp)
-
-LEXT(dbgDisp)
-
-			mr		r12,r0									/* Keep R0 pristene */
-			lis		r0,HIGH_ADDR(dbgDispCall)				/* Top half of dbgDispCall firmware call number */
-			ori		r0,r0,LOW_ADDR(dbgDispCall)				/* Bottom half */
-
-			sc												/* Go display the stuff */
-
-			mr		r0,r12									/* Restore R0 */
-			blr												/* Return... */
-			
-/*			Here's the low-level part of dbgDisp			*/
-
-			.align	5
-			.globl	EXT(dbgDispLL)
-
-LEXT(dbgDispLL)
-
-dbgDispInt:	mfmsr	r8										/* Save the MSR */
-
-#if 0
-			lis		r10,0xF301			/* (TEST/DEBUG) */
-			ori		r10,r10,0x2002		/* (TEST/DEBUG) */
-			dcbf	0,r10				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r10				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-			li		r7,0x35				/* (TEST/DEBUG) */
-			stb		r7,4(r10)			/* (TEST/DEBUG) */
-			
-			lis		r7,10				/* (TEST/DEBUG) */
-spw6:		addi	r7,r7,-1			/* (TEST/DEBUG) */
-			mr.		r7,r7				/* (TEST/DEBUG) */
-			bne-	spw6				/* (TEST/DEBUG) */
-			dcbf	0,r10				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r10				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-#endif
-
-			rlwinm	r12,r8,0,28,25							/* Turn off translation */
-			rlwinm	r12,r12,0,17,15							/* Turn off interruptions */
-
-			mflr	r11										/* Save the link register */
-
-#if 0
-			mr		r7,r12				/* (TEST/DEBUG) */
-			bl		dumpr7				/* (TEST/DEBUG) */
-#endif
-
-			mr.		r3,r3									/* See if printer or modem */
-			lis		r10,0xF301								/* Set the top part */
-			mr		r3,r4									/* Copy the ID parameter */
-			
-#if 0
-			mr		r9,r12				/* (TEST/DEBUG) */
-			
-			mtmsr	r12					/* (TEST/DEBUG) */
-			isync						/* (TEST/DEBUG) */
-
-#if 0
-			mtmsr	r8					/* (TEST/DEBUG) */
-			isync						/* (TEST/DEBUG) */
-#endif
-
-			lis		r12,0xF301			/* (TEST/DEBUG) */
-			ori		r12,r12,0x2002		/* (TEST/DEBUG) */
-#if 1
-			dcbf	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-#endif
-
-xqrw1:		eieio						/* (TEST/DEBUG) */
-			lbz		r7,0(r12)			/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			andi.	r7,r7,0x04			/* (TEST/DEBUG) */
-			beq		xqrw1				/* (TEST/DEBUG) */
-			
-			eieio						/* (TEST/DEBUG) */
-			li		r7,0x36				/* (TEST/DEBUG) */
-			stb		r7,4(r12)			/* (TEST/DEBUG) */
-			eieio
-			dcbf	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-			
-			
-			lis		r7,10				/* (TEST/DEBUG) */
-spw7:		addi	r7,r7,-1			/* (TEST/DEBUG) */
-			mr.		r7,r7				/* (TEST/DEBUG) */
-			bne-	spw7				/* (TEST/DEBUG) */
-			dcbf	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-			mr		r12,r9				/* (TEST/DEBUG) */
-#endif
-
-			mtmsr	r12										/* Smash the MSR */
-			isync											/* Make it clean */
-
-			
-#if SIMPLESCC && !NOTQUITEASSIMPLE
-			ori		r10,r10,0x3010							/* Assume the printer (this is the normal one) */
-#else
-			ori		r10,r10,0x2000							/* Assume the printer (this is the normal one) */
-#endif
-			beq+	dbgDprintr								/* It sure are... */
-#if SIMPLESCC && !NOTQUITEASSIMPLE
-			ori		r10,r10,0x0020							/* Move it over to the modem port */
-#else
-			ori		r10,r10,0x0002							/* Move it over to the modem port */
-
-#if !NOTQUITEASSIMPLE
-			lis		r7,0xF300								/* Address of SCC rounded to 128k */
-			ori		r7,r7,0x0032							/* Make it cache inhibited */
-			mtdbatl	3,r7									/* Load DBAT 3 low */
-			lis		r7,0xF300								/* Address of SCC rounded to 128k */
-			ori		r7,r7,0x0002							/* Make it supervisor only */
-			mtdbatu	3,r7									/* Load DBAT 3 high */
-			ori		r12,r12,0x0010							/* Turn on DR */
-			mtmsr	r12										/* Smash the MSR */
-			isync											/* Make it clean */
-
-#endif
-#endif
-			
-dbgDprintr:	sync
-#if 0
-			mr		r7,r10									/* (TEST/DEBUG) */
-			bl		dumpr7									/* (TEST/DEBUG) */
-#endif
-			
-			dcbi	0,r10									/* Toss it */
-			eieio
-			
-#if 0
-			lis		r12,0xF301			/* (TEST/DEBUG) */
-			ori		r12,r12,0x2002		/* (TEST/DEBUG) */
-			dcbf	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-			li		r7,0x37				/* (TEST/DEBUG) */
-			stb		r7,4(r12)			/* (TEST/DEBUG) */
-			
-			lis		r7,12				/* (TEST/DEBUG) */
-spw8:		addi	r7,r7,-1			/* (TEST/DEBUG) */
-			mr.		r7,r7				/* (TEST/DEBUG) */
-			bne-	spw8				/* (TEST/DEBUG) */
-			dcbf	0,r12				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r12				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-#endif
-
-
-/*			Print the ID parameter							*/
-			
-			lis		r12,HIGH_ADDR(fwdisplock)				/* Get the display locker outer */
-			ori		r12,r12,LOW_ADDR(fwdisplock)			/* Last part */
-			
-			lwarx	r7,0,r12								; ?
-
-ddwait0:	lwarx	r7,0,r12								/* Get the lock */
-			mr.		r7,r7									/* Is it locked? */
-			bne-	ddwait0									/* Yup... */
-			stwcx.	r12,0,r12								/* Try to get it */
-			bne-	ddwait0									/* Nope, start all over... */
-
-#if 0
-			dcbf	0,r10				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r10				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-			li		r7,0x38				/* (TEST/DEBUG) */
-			stb		r7,6(r10)			/* (TEST/DEBUG) */
-			
-			lis		r7,10				/* (TEST/DEBUG) */
-spwa:		addi	r7,r7,-1			/* (TEST/DEBUG) */
-			mr.		r7,r7				/* (TEST/DEBUG) */
-			bne-	spwa				/* (TEST/DEBUG) */
-			dcbf	0,r10				/* (TEST/DEBUG) */
-			sync						/* (TEST/DEBUG) */
-			dcbi	0,r10				/* (TEST/DEBUG) */
-			eieio						/* (TEST/DEBUG) */
-#endif
-			
-			rlwinm	r3,r3,8,0,31							/* Get the first character */
-			bl		dbgDchar								/* Print it */
-			rlwinm	r3,r3,8,0,31							/* Get the second character */
-			bl		dbgDchar								/* Print it */
-			rlwinm	r3,r3,8,0,31							/* Get the third character */
-			bl		dbgDchar								/* Print it */
-			rlwinm	r3,r3,8,0,31							/* Get the fourth character */
-			bl		dbgDchar								/* Print it */
-			
-			li		r3,0x20									/* Get a space for a separator */
-			bl		dbgDchar								/* Print it */
-			bl		dbg4byte								/* Print register 5 in hex */			
-
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-			
-			mtlr	r11										/* Get back the return */
-#if !SIMPLESCC && !NOTQUITEASSIMPLE
-			li		r7,0									/* Get a zero */
-			mtdbatu	3,r7									/* Invalidate DBAT 3 upper */
-			mtdbatl	3,r7									/* Invalidate DBAT 3 lower */
-#endif
-			lis		r12,HIGH_ADDR(fwdisplock)				/* Get the display locker outer */
-			li		r7,0									/* Get a zero */
-			ori		r12,r12,LOW_ADDR(fwdisplock)			/* Last part */
-			dcbi	0,r10									/* ? */
-			stw		r7,0(r12)								/* Release the display lock */
-			mtmsr	r8										/* Restore the MSR */
-			isync											/* Wait for it */
-			blr												/* Leave... */
-			
-
-dbg4byte:	mflr	r12										/* Save the return */
-
-			lis		r4,HIGH_ADDR(hexTab)					/* Point to the top of table */
-			li		r6,8									/* Set number of hex digits to dump */
-			ori		r4,r4,LOW_ADDR(hexTab)					/* Point to the bottom of table */
-						
-dbgDnext:	rlwinm	r5,r5,4,0,31							/* Rotate a nybble */
-			subi	r6,r6,1									/* Back down the count */
-			rlwinm	r3,r5,0,28,31							/* Isolate the last nybble */
-			lbzx	r3,r4,r3								/* Convert to ascii */
-			bl		dbgDchar								/* Print it */
-			mr.		r6,r6									/* Any more? */
-			bne+	dbgDnext								/* Convert 'em all... */
-
-			li		r3,0x20									/* Space */
-			bl		dbgDchar								/* Send it */
-			mtlr	r12										/* Restore LR */
-			blr												/* Return... */
-			
-/*			Write to whichever serial port.  Try to leave it clean, but not too hard (this is a hack) */
-			
-dbgDchar:	
-#if SIMPLESCC && !NOTQUITEASSIMPLE		
-			stb		r3,0(r10)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-
-			lis		r7,3									/* Get enough for about 1ms */
-
-dbgDchar0:	addi	r7,r7,-1								/* Count down */
-			mr.		r7,r7									/* Waited long enough? */
-			bgt+	dbgDchar0								/* Nope... */
-#endif
-#if NOTQUITEASSIMPLE
-#if 0
-			li		r7,0x01									/* ? */
-			stb		r7,0(r10)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			lbz		r7,0(r10)								/* ? */
-			dcbi	0,r10									/* Force it out */
-			sync 											/* kill it off */
-			eieio
-
-			li		r7,0x00									/* ? */
-			stb		r7,0(r10)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			lbz		r7,0(r10)								/* ? */
-			dcbi	0,r10									/* Force it out */
-			sync 											/* kill it off */
-			eieio
-#endif
-		
-qrw1:		eieio											/* Barricade it */
-			lbz		r7,0(r10)								/* ? */
-			dcbi	0,r10
-			sync
-			andi.	r7,r7,0x04								/* ? */
-			beq		qrw1									/* Nope... */
-
-			eieio
-
-			stb		r3,4(r10)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-qrw2:		eieio											/* Barricade it */
-			lbz		r7,0(r10)								/* ? */
-			dcbi	0,r10
-			sync
-			andi.	r7,r7,0x04								/* ? */
-			beq		qrw2									/* Nope... */
-
-#if 0
-			eieio
-			li		r7,0x10									/* ? */
-			stb		r7,0(r10)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-
-			lbz		r7,0(r10)								/* ? */
-			dcbi	0,r10									/* Force it out */
-			sync 											/* kill it off */
-			eieio
-#endif		
-			
-			lis		r7,0x0080								/* ? */
-			lis		r9,0xF300								/* ? */
-			ori		r7,r7,0x010F							/* ? */
-			stw		r7,0x28(r9)								/* ? */
-			dcbf	0,r10									/* Force it out */
-			sync 											/* Make sure it's out there */
-			dcbi	0,r10
-			eieio
-			
-#endif
-#if !SIMPLESCC && !NOTQUITEASSIMPLE
-			rlwinm	r9,r10,0,0,29							/* Get channel a */
-			eieio											/* Barricade it */
-		
-			li		r7,0x03									/* ? */
-			stb		r7,0(r9)								/* ? */
-			eieio											/* Barricade it */
-
-			lbz		r7,0(r9)								/* ? */
-		
-			eieio											/* Barricade it */
-			lbz		r7,0(r9)								/* ? */
-			
-dchrw1:		eieio											/* Barricade it */
-			lbz		r7,0(r10)								/* ? */
-			andi.	r7,r7,0x04								/* ? */
-			beq		dchrw1									/* Nope... */
-			
-			stb		r3,4(r10)								/* ? */
-			sync											/* Make sure it's there */
-			eieio											/* Don't get confused */
-
-dchrw2:		eieio											/* Barricade it */
-			lbz		r7,0(r10)								/* ? */
-			andi.	r7,r7,0x04								/* ? */
-			beq		dchrw2									/* Nope... */
-	
-			eieio											/* Avoid confusion */
-			lbz		r7,0(r10)								/* ? */
-			andi.	r7,r7,0x40								/* ? */
-			beq+	nounder									/* Nope... */
-
-			eieio											/* Avoid confusion */
-			li		r7,0xC0									/* ? */
-			stb		r7,0(r10)								/* ? */
-
-nounder:	eieio											/* Avoid confusion */
-			li		r7,0x10									/* ? */
-			stb		r7,0(r10)								/* ? */
-
-			eieio											/* Avoid confusion */
-			li		r7,0x38									/* ? */
-			stb		r7,0(r9)								/* ? */
-		
-			eieio											/* Avoid confusion */
-			li		r7,0x30									/* ? */
-			stb		r7,0(r10)								/* ? */
-
-			eieio											/* Avoid confusion */
-			li		r7,0x20									/* ? */
-			stb		r7,0(r10)								/* ? */
-			eieio											/* Avoid confusion */
-			sync
-
-#endif
-			blr												/* Return */
-
-			.globl hexTab
-
-hexTab:		STRINGD	"0123456789ABCDEF"						/* Convert hex numbers to printable hex */ 			
-			
-
-/*
- *			Dumps all the registers in the savearea in R13
- */
- 
-
-			.align	5
-			.globl	EXT(dbgRegsLL)
-
-LEXT(dbgRegsLL)
-
-			b		EXT(FCReturn)					; Bye dudes...
-#if 0
-			li		r3,0									/* ? */
-			bl		dbgRegsCm								/* Join on up... */
-			b		EXT(FCReturn)					; Bye dudes...
-			
-			
-			.align	5
-			.globl	EXT(dbgRegs)
-
-LEXT(dbgRegs)
-
-dbgRegsCm:	mfmsr	r8										/* Save the MSR */
-			mr.		r3,r3									/* ? */
-			rlwinm	r12,r8,0,28,25							/* Turn off translation */
-			lis		r10,0xF301								/* Set the top part */
-			rlwinm	r12,r12,0,17,15							/* Turn off interruptions */
-			mtmsr	r12										/* Smash the MSR */
-			isync											/* Make it clean */
-#if SIMPLESCC && !NOTQUITEASSIMPLE
-			ori		r10,r10,0x3010							/* ? */
-#else
-			ori		r10,r10,0x2000							/* ? */
-#endif
-			mflr	r11										/* Save the link register */
-			beq+	dbgDprints								/* It sure are... */
-#if SIMPLESCC && !NOTQUITEASSIMPLE
-			ori		r10,r10,0x0020							/* ? */
-#else
-			ori		r10,r10,0x0002							/* ? */
-
-			dcbf	0,r10									/* Insure it is out */
-			sync
-			dcbi	0,r10									/* Toss it */
-#if !NOTQUITEASSIMPLE
-			lis		r7,0xF300								/* ? */
-			ori		r7,r7,0x0032							/* ? */
-			mtdbatl	3,r7									/* ? */
-			lis		r7,0xF300								/* ? */
-			ori		r7,r7,0x0002							/* ? */
-			mtdbatu	3,r7									/* ? */
-			ori		r12,r12,0x0010							/* ? */
-			mtmsr	r12										/* ? */
-			isync											/* ? */
-#endif
-#endif
-
-dbgDprints:	
-			lis		r3,HIGH_ADDR(fwdisplock)				/* Get the display locker outer */
-			ori		r3,r3,LOW_ADDR(fwdisplock)				/* Last part */
-			
-			lwarx	r5,0,r3									; ?
-ddwait1:	lwarx	r5,0,r3									/* Get the lock */
-			mr.		r5,r5									/* Is it locked? */
-			bne-	ddwait1									/* Yup... */
-			stwcx.	r3,0,r3									/* Try to get it */
-			bne-	ddwait1									/* Nope, start all over... */
-			
-			li		r3,0x52									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x65									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x67									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x73									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-
-			lwz		r5,saver0(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver1(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver2(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver3(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver4(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver5(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver6(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver7(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver8(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver9(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver10(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver11(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver12(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver13(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver14(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver15(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver16(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver17(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver18(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver19(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver20(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver21(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver22(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver23(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver24(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver25(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver26(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver27(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,saver28(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver29(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver30(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,saver31(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-/*			Segment registers */
-
-			li		r3,0x53									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x65									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x67									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x73									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-
-			lwz		r5,savesr0(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr1(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr2(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr3(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,savesr4(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr5(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr6(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr7(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,savesr8(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr9(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr10(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr11(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,savesr12(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr13(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr14(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesr15(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-			
-			li		r3,0x30									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x31									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x64									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x64									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,savesrr0(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savesrr1(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savedar(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savedsisr(r13)						/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-			
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x6C									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x63									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x63									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x20									/* Print eyecatcher */
-			bl		dbgDchar								/* Send it */
-			lwz		r5,savelr(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savecr(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			lwz		r5,savectr(r13)							/* Do register */
-			bl		dbg4byte								/* Print */			
-			li		r3,0x0A									/* Linefeed */
-			bl		dbgDchar								/* Send it */
-			li		r3,0x0D									/* Carriage return */
-			bl		dbgDchar								/* Send it */
-			mtlr	r11										/* Get back the return */
-			dcbi	0,r10									/* ? */
-#if !SIMPLESCC && !NOTQUITEASSIMPLE
-			li		r7,0									/* Get a zero */
-			mtdbatu	3,r7									/* Invalidate DBAT 3 upper */
-			mtdbatl	3,r7									/* Invalidate DBAT 3 lower */
-#endif
-			lis		r3,HIGH_ADDR(fwdisplock)				/* Get the display locker outer */
-			li		r7,0									/* Get a zero */
-			ori		r3,r3,LOW_ADDR(fwdisplock)				/* Last part */
-			stw		r7,0(r3)								/* Clear display lock */
-			mtmsr	r8										/* Restore the MSR */
-			isync											/* Wait for it */
-			blr												/* Leave... */
-#endif			
-			
-/*
- *			Used for debugging to leave stuff in 0x380-0x3FF (128 bytes).
- *			Mapping is V=R.  Stores and loads are real.
- */
-			
-			.align	5
-			.globl	EXT(dbgCkpt)
-
-LEXT(dbgCkpt)
-
-			mr		r12,r0									/* Keep R0 pristene */
-			lis		r0,HIGH_ADDR(dbgCkptCall)				/* Top half of dbgCkptCall firmware call number */
-			ori		r0,r0,LOW_ADDR(dbgCkptCall)				/* Bottom half */
-
-			sc												/* Go stash the stuff */
-
-			mr		r0,r12									/* Restore R0 */
-			blr												/* Return... */
-			
-/*			Here's the low-level part of dbgCkpt			*/
-
-			.align	5
-			.globl	EXT(dbgCkptLL)
-
-LEXT(dbgCkptLL)
-
-
-			li		r12,0x380								/* Point to output area */
-			li		r1,32									/* Get line size */
-			dcbz	0,r12									/* Make sure we don't fetch a cache line */
-
-			lwz		r4,0x00(r3)								/* Load up storage to checkpoint */
-			
-			dcbt	r1,r3									/* Start in the next line */
-			
-			lwz		r5,0x04(r3)								/* Load up storage to checkpoint */
-			lwz		r6,0x08(r3)								/* Load up storage to checkpoint */
-			lwz		r7,0x0C(r3)								/* Load up storage to checkpoint */
-			lwz		r8,0x10(r3)								/* Load up storage to checkpoint */
-			lwz		r9,0x14(r3)								/* Load up storage to checkpoint */
-			lwz		r10,0x18(r3)							/* Load up storage to checkpoint */
-			lwz		r11,0x1C(r3)							/* Load up storage to checkpoint */
-			
-			add		r3,r3,r1								/* Bump input */
-			
-			stw		r4,0x00(r12)							/* Store it */
-			stw		r5,0x04(r12)							/* Store it */
-			stw		r6,0x08(r12)							/* Store it */
-			stw		r7,0x0C(r12)							/* Store it */
-			stw		r8,0x10(r12)							/* Store it */
-			stw		r9,0x14(r12)							/* Store it */
-			stw		r10,0x18(r12)							/* Store it */
-			stw		r11,0x1C(r12)							/* Store it */
-			
-			dcbz	r1,r12									/* Clear the next line */
-			add		r12,r12,r1								/* Point to next output line */
-
-			lwz		r4,0x00(r3)								/* Load up storage to checkpoint */
-			lwz		r5,0x04(r3)								/* Load up storage to checkpoint */
-			lwz		r6,0x08(r3)								/* Load up storage to checkpoint */
-			lwz		r7,0x0C(r3)								/* Load up storage to checkpoint */
-			lwz		r8,0x10(r3)								/* Load up storage to checkpoint */
-			lwz		r9,0x14(r3)								/* Load up storage to checkpoint */
-			lwz		r10,0x18(r3)							/* Load up storage to checkpoint */
-			lwz		r11,0x1C(r3)							/* Load up storage to checkpoint */
-			
-			dcbt	r1,r3									/* Touch the next line */
-			add		r3,r3,r1								/* Point to next input line */
-				
-			stw		r4,0x00(r12)							/* Store it */
-			stw		r5,0x04(r12)							/* Store it */
-			stw		r6,0x08(r12)							/* Store it */
-			stw		r7,0x0C(r12)							/* Store it */
-			stw		r8,0x10(r12)							/* Store it */
-			stw		r9,0x14(r12)							/* Store it */
-			stw		r10,0x18(r12)							/* Store it */
-			stw		r11,0x1C(r12)							/* Store it */
-
-			dcbz	r1,r12									/* Clear the next line */
-			add		r12,r12,r1								/* Point to next output line */
-
-			lwz		r4,0x00(r3)								/* Load up storage to checkpoint */
-			lwz		r5,0x04(r3)								/* Load up storage to checkpoint */
-			lwz		r6,0x08(r3)								/* Load up storage to checkpoint */
-			lwz		r7,0x0C(r3)								/* Load up storage to checkpoint */
-			lwz		r8,0x10(r3)								/* Load up storage to checkpoint */
-			lwz		r9,0x14(r3)								/* Load up storage to checkpoint */
-			lwz		r10,0x18(r3)							/* Load up storage to checkpoint */
-			lwz		r11,0x1C(r3)							/* Load up storage to checkpoint */
-			
-			dcbt	r1,r3									/* Touch the next line */
-			add		r3,r3,r1								/* Point to next input line */
-				
-			stw		r4,0x00(r12)							/* Store it */
-			stw		r5,0x04(r12)							/* Store it */
-			stw		r6,0x08(r12)							/* Store it */
-			stw		r7,0x0C(r12)							/* Store it */
-			stw		r8,0x10(r12)							/* Store it */
-			stw		r9,0x14(r12)							/* Store it */
-			stw		r10,0x18(r12)							/* Store it */
-			stw		r11,0x1C(r12)							/* Store it */
-
-			dcbz	r1,r12									/* Clear the next line */
-			add		r12,r12,r1								/* Point to next output line */
-
-			lwz		r4,0x00(r3)								/* Load up storage to checkpoint */
-			lwz		r5,0x04(r3)								/* Load up storage to checkpoint */
-			lwz		r6,0x08(r3)								/* Load up storage to checkpoint */
-			lwz		r7,0x0C(r3)								/* Load up storage to checkpoint */
-			lwz		r8,0x10(r3)								/* Load up storage to checkpoint */
-			lwz		r9,0x14(r3)								/* Load up storage to checkpoint */
-			lwz		r10,0x18(r3)							/* Load up storage to checkpoint */
-			lwz		r11,0x1C(r3)							/* Load up storage to checkpoint */
-			
-			stw		r4,0x00(r12)							/* Store it */
-			stw		r5,0x04(r12)							/* Store it */
-			stw		r6,0x08(r12)							/* Store it */
-			stw		r7,0x0C(r12)							/* Store it */
-			stw		r8,0x10(r12)							/* Store it */
-			stw		r9,0x14(r12)							/* Store it */
-			stw		r10,0x18(r12)							/* Store it */
-			stw		r11,0x1C(r12)							/* Store it */
-			
-			blr
-
-			
-/*
- *			Do Preemption.  Forces a T_PREEMPT trap to allow a preemption to occur.
- */
-
-			.align	5
-			.globl	EXT(DoPreemptLL)
-
-LEXT(DoPreemptLL)
-
-			li		r3,T_PREEMPT					/* Set preemption interrupt value */
-			stw		r3,saveexception(r13)			/* Modify the exception type to preemption */
-			b		EXT(FCReturn)					; Bye dudes...
-
-			
-/*
- *			Force 'rupt handler to dispatch with new context
- *			R3 at the call contains the new savearea.
- *			R4 at the call contains a return code to pass back in R3.
- *			Forces a T_CSWITCH
- */
-
-			.align	5
-			.globl	EXT(SwitchContextLL)
-
-LEXT(SwitchContextLL)
-
-			li		r3,T_CSWITCH					/* Set context switch value */
-			stw		r3,saveexception(r13)			/* Modify the exception type to switch context */
-			b		EXT(FCReturn)					; Bye dudes...
-
-			
-/*
- *			Create a fake I/O 'rupt.  
- *			Forces a T_INTERRUPT trap to pretend that an actual I/O interrupt occurred.
- */
-
-			.align	5
-			.globl	EXT(CreateFakeIOLL)
-
-LEXT(CreateFakeIOLL)
-
-			li		r3,T_INTERRUPT					/* Set external interrupt value */
-			stw		r3,saveexception(r13)			/* Modify the exception type to external */
-			b		EXT(FCReturn)					; Bye dudes...
-			
-/*
- *			Create a shutdown context
- *			Forces a T_SHUTDOWN trap.
- */
-
-			.align	5
-			.globl	EXT(CreateShutdownCTXLL)
-
-LEXT(CreateShutdownCTXLL)
-
-			li		r3,T_SHUTDOWN					/* Set external interrupt value */
-			stw		r3,saveexception(r13)			/* Modify the exception type to external */
-			b		EXT(FCReturn)					; Bye dudes...
-			
-/*
- *			Create a fake decrementer 'rupt.  
- *			Forces a T_DECREMENTER trap to pretend that an actual decrementer interrupt occurred.
- */
-
-			.align	5
-			.globl	EXT(CreateFakeDECLL)
-
-LEXT(CreateFakeDECLL)
-
-			li		r3,T_DECREMENTER				/* Set decrementer interrupt value */
-			stw		r3,saveexception(r13)			/* Modify the exception type to external */
-			b		EXT(FCReturn)					; Bye dudes...
-
-/*
- *			Choke the system.  
- */
-
-			.align	5
-			.globl	EXT(DoChokeLL)
-
-LEXT(DoChokeLL)
-
-			li		r3,T_CHOKE						; Set external interrupt value
-			stw		r3,saveexception(r13)			; Modify the exception type to external
-			b		EXT(FCReturn)					; Bye dudes...
-
-/*
- *			Null firmware call 
- */
-
-			.align	5
-			.globl	EXT(NullLL)
-
-LEXT(NullLL)
-
-			li		r3,T_IN_VAIN					; Set to just ignore this one
-			b		EXT(FCReturn)					; Bye dudes...
-
-;
-;			Null firmware call 
-;
-
-			.align	5
-			.globl	EXT(iNullLL)
-
-LEXT(iNullLL)
-
-			mfspr	r4,pmc1							; Get stamp
-			stw		r4,0x6100+(9*16)+0x0(0)			; Save it
-#if 1
-			mfspr	r4,pmc2							; Get stamp
-			stw		r4,0x6100+(9*16)+0x4(0)			; Save it
-			mfspr	r4,pmc3							; Get stamp
-			stw		r4,0x6100+(9*16)+0x8(0)			; Save it
-			mfspr	r4,pmc4							; Get stamp
-			stw		r4,0x6100+(9*16)+0xC(0)			; Save it
-#endif
-			li		r3,T_IN_VAIN					; Set to just ignore this one
-			b		EXT(FCReturn)					; Bye dudes...
-			
-;
-;			Set the low level trace flags 
-;
- 
-			.align	5
-			.globl	EXT(LLTraceSet)
-
-LEXT(LLTraceSet)
-
-			mr		r4,r3							; Save the new value 
-			
-			lwz		r3,traceMask(0)					; Get the old trace flags to pass back 
-			stw		r4,traceMask(0)					; Replace with the new ones
-			blr										; Leave... 
-
-#if 0
-	
-/*
-; ***************************************************************************
-;
-;			----------------- Grateful Deb ----------------
-;
-;			Debugging: direct draw into main screen menu bar
-;
-;			Takes R4 value, converts it to hex characters and displays it.
-;
-;			Gotta make sure the DCBST is done to force the pixels from the cache.
-;
-;			Position is taken as column, row (0 based) from R3.
-;			Characters are from hexfont, and are 16x16 pixels. 
-;
-;			Only works with two processors so far
-;
-;
-; ***************************************************************************
-*/
-
-#define GDfromright 20
-#define GDfontsize 16
-
-			.align	5
-			.globl	EXT(GratefulDeb)
-
-LEXT(GratefulDeb)
-
-			mfspr	r6,pir							/* Get the PIR */
-			lis		r5,HIGH_ADDR(EXT(GratefulDebWork))	/* Point to our work area */
-			rlwinm	r6,r6,8,23,23					/* Get part of the offset to our processors area */
-			ori		r5,r5,LOW_ADDR(EXT(GratefulDebWork))	/* Start building the address */
-			rlwimi	r6,r6,2,21,21					/* Get the rest of the offset to our processors area */
-			add		r6,r6,r5						/* Point at our CPU's work area */
-			mfmsr	r5								/* Get that MSR */
-			stmw	r0,GDsave(r6)					/* Save all registers */
-			lwz		r10,GDready(r6)					/* See if we're all ready to go */
-			ori		r0,r5,0x2000					/* Turn on the floating point */
-			mr		r31,r6							/* Get a more sane base register */
-			mr.		r10,r10							/* Are we all set? */
-			mtmsr	r0								/* Enable floating point */
-			isync
-			
-			stfd	f0,GDfp0(r31)					/* Save FP */
-			stfd	f1,GDfp1(r31)					/* Save FP */
-			stfd	f2,GDfp2(r31)					/* Save FP */
-			stfd	f3,GDfp3(r31)					/* Save FP */
-		
-			beq-	GDbailout						/* Go and bail... */
-			
-			rlwinm	r25,r3,0,16,31					/* Isolate just the row number */
-			lwz		r28,GDtopleft(r31)				/* Get the physical address of our line 0 */
-			rlwinm	r3,r3,16,16,31					/* Isolate the column number */
-			lwz		r27,GDrowbytes(r31)				/* Get the number of bytes per row */
-			lwz		r9,GDrowchar(r31)				/* Get the number of bytes per row of full leaded charactrers */
-			lwz		r26,GDdepth(r31)				/* Get the bit depth */
-			mullw	r25,r25,r9						/* get offset to the row to write in bytes */
-			lwz		r24,GDcollgn(r31)				/* Get the size of columns in bytes */
-			add		r25,r28,r25						/* Physical address of row */
-			mullw	r3,r3,r24						/* Get byte offset to first output column */
-			
-			li		r9,32							/* Get the initial shift calc */
-			
-			lis		r20,HIGH_ADDR(hexfont)			/* Point to the font */
-			
-			li		r18,GDfontsize					/* Get the number of rows in the font */
-			ori		r20,r20,LOW_ADDR(hexfont)		/* Point to the low part */
-			add		r21,r25,r3						/* Physical address of top left output pixel */
-			sub		r9,r9,r26						/* Get right shift justifier for pixel size */
-			li		r7,32							/* Number of bits per word */
-
-startNybble:
-			la		r6,GDrowbuf1(r31)				/* Point to the row buffer */
-			li		r19,8							/* Get the number of characters in a row */
-			
-getNybble:	rlwinm	r10,r4,9,23,26					/* Get the top nybble * 32 */
-			rlwinm	r4,r4,4,0,31					/* Rotate a nybble */
-			add		r10,r20,r10						/* Point to the character in the font */
-			
-			rlwinm	r16,r26,4,0,27					/* Width of row in actual bits */
-			lhz		r15,0(r10)						/* Get the next row of the font */
-			
-rendrow:	rlwinm	r17,r15,16,0,0					/* Get the next font pixel in the row */
-			rlwinm	r15,r15,1,16,31					/* Move in the next font pixel */
-			srawi	r17,r17,31						/* Fill with 1s if black and 0s if white (reversed) */
-			
-			slw		r14,r14,r26						/* Make room for our pixel in a register */
-			srw		r17,r17,r9						/* Isolate one pixels worth of black or white */
-			sub.	r7,r7,r26						/* See how may bits are left */
-			sub		r16,r16,r26						/* Count how many bits are left to store for this row */
-			or		r14,r14,r17						/* Put in the pixel */
-			bne+	notfull							/* Finish rendering this word */
-			
-			not		r14,r14							/* Invert to black on white */
-			stw		r14,0(r6)						/* Write out the word */
-			li		r7,32							/* Bit per word count */
-			addi	r6,r6,4							/* Point to the next word */
-			
-notfull:	mr.		r16,r16							/* Have we finished the whole character row? */			
-			bne+	rendrow							/* Finish rendering the row */
-		
-			addic.	r19,r19,-1						/* Are we finished with a whole display row yet? */
-			bne+	getNybble						/* Not yet... */
-			
-			la		r6,GDrowbuf1(r31)				/* Point to the row buffer */
-			rlwinm	r19,r26,31,0,29					/* Number of cache lines (depth/2) */
-			mr		r14,r21							/* Get the frame buffer address */
-			
-//			BREAKPOINT_TRAP
-
-blitrow:	lfd		f0,0(r6)						/* Load a line */
-			lfd		f1,8(r6)					
-			lfd		f2,16(r6)					
-			lfd		f3,24(r6)					
-			
-			stfd	f0,0(r14)						/* Blit a line */
-			stfd	f1,8(r14)					
-			stfd	f2,16(r14)					
-			stfd	f3,24(r14)	
-			
-			addi	r6,r6,32						/* Next buffered line */
-			
-			dcbst	0,r14							/* Force the line to the screen */
-			sync									/* Make sure the line is on it's way */
-			eieio									/* Make sure we beat the invalidate */
-			dcbi	0,r14							/* Make sure we leave no paradox */
-			
-			addic.	r19,r19,-1						/* Done all lines yet? */
-			addi	r14,r14,32						/* Point to the next output */
-			bne+	blitrow							/* Nope, do it some more... */
-			
-			addic.	r18,r18,-1						/* Have we done all the rows in character yet? */
-			addi	r20,r20,2						/* Offset the font to the next row */
-			add		r21,r21,r27						/* Point to start of next row */
-			bne+	startNybble						/* Nope, go through the word one more time... */
-					
-GDbailout:	mr		r1,r31							/* Move the workarea base */
-	
-			lfd		f0,GDfp0(r31)					/* Restore FP */
-			lfd		f1,GDfp1(r31)					/* Restore FP */
-			lfd		f2,GDfp2(r31)					/* Restore FP */
-			lfd		f3,GDfp3(r31)					/* Restore FP */
-			
-			mtmsr	r5								/* Disable floating point */
-			isync
-			
-			lmw		r3,GDsave+12(r1)				/* Restore most registers */
-			lwz		r0,GDsave(r1)					/* Restore R0 */
-			lwz		r1,GDsave+4(r1)					/* Finally, R1 */
-			blr										/* Leave... */
-			
-
-/*
- *			void GratefulDebDisp(unsigned int coord, unsigned int data);
- */
-
-
-			.align	5
-			.globl	EXT(GratefulDebDisp)
-
-LEXT(GratefulDebDisp)
-
-			mfmsr	r9								/* Save the current MSR */
-			mflr	r7								/* Save the return */
-			andi.	r8,r9,0x7FCF					/* Clear interrupt and translation */
-			mtmsr	r8								/* Turn 'em really off */
-			isync									/* Make sure about the translation part */
-			bl		EXT(GratefulDeb)				/* Display it */
-			mtmsr	r9								/* Restore interrupt and translation */
-			mtlr	r7								/* Restore return */
-			isync									/* Make sure */
-			blr
-
-			
-#endif
-
-/*
- *			void checkNMI(void);
- */
-
-
-			.align	5
-			.globl	EXT(checkNMI)
-
-LEXT(checkNMI)
-		
-			mfmsr	r9								/* Save it */
-			andi.	r8,r9,0x7FCF					/* Clear it */
-			mtmsr	r8								/* Disable it */
-			isync									/* Fence it */
-			lis		r7,0xF300						/* Find it */
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable 
-			ori		r7,r7,0x0020					/* Find it */
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get the FP enable 
-			dcbi	0,r7							/* Toss it */
-			sync									/* Sync it */
-			andc	r9,r9,r2						; Clear VEC and FP enables
-			eieio									/* Get it */
-			lwz		r6,0x000C(r7)					/* Check it */
-			eieio									/* Fence it */
-			dcbi	0,r7							/* Toss it */
-			rlwinm.	r4,r6,0,19,19					/* Check it */
-			rlwinm	r6,r6,0,20,18					/* Clear it */
-			sync									/* Sync it */
-			eieio									/* Fence it */
-			beq+	xnonmi							/* Branch on it */
-
-			stw		r6,0x0008(r7)					/* Reset it */
-			sync									/* Sync it */
-			dcbi	0,r6							/* Toss it */
-			eieio									/* Fence it */
-
-			mtmsr	r9								/* Restore it */
-			isync									/* Hold it */
-
-			BREAKPOINT_TRAP							/* Kill it */
-			blr										/* Return from it */
-
-xnonmi:												/* Label it */
-			mtmsr	r9								/* Restore it */
-			isync									/* Hold it */
-			blr										/* Return from it */
-
-;
-;			Saves floating point registers
-;
-
-			.align	5
-			.globl	EXT(stFloat)
-
-LEXT(stFloat)
-
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable 
-			li		r4,0
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get the FP enable 
-			ori		r4,r4,lo16(MASK(MSR_EE))		; Get the EE bit
-
-			mfmsr	r0								; Save the MSR
-
-			andc	r4,r0,r4						; Clear EE
-			ori		r4,r4,lo16(MASK(MSR_FP))		; Enable floating point
-			mtmsr	r4
-			isync
-
-			andc	r0,r0,r2						; Clear VEC and FP enables
-			
-			stfd	f0,0x00(r3)
-			stfd	f1,0x08(r3)
-			stfd	f2,0x10(r3)
-			stfd	f3,0x18(r3)
-			stfd	f4,0x20(r3)
-			stfd	f5,0x28(r3)
-			stfd	f6,0x30(r3)
-			stfd	f7,0x38(r3)
-			stfd	f8,0x40(r3)
-			stfd	f9,0x48(r3)
-			stfd	f10,0x50(r3)
-			stfd	f11,0x58(r3)
-			stfd	f12,0x60(r3)
-			stfd	f13,0x68(r3)
-			stfd	f14,0x70(r3)
-			stfd	f15,0x78(r3)
-			stfd	f16,0x80(r3)
-			stfd	f17,0x88(r3)
-			stfd	f18,0x90(r3)
-			stfd	f19,0x98(r3)
-			stfd	f20,0xA0(r3)
-			stfd	f21,0xA8(r3)
-			stfd	f22,0xB0(r3)
-			stfd	f23,0xB8(r3)
-			stfd	f24,0xC0(r3)
-			stfd	f25,0xC8(r3)
-			stfd	f26,0xD0(r3)
-			stfd	f27,0xD8(r3)
-			stfd	f28,0xE0(r3)
-			stfd	f29,0xE8(r3)
-			stfd	f30,0xF0(r3)
-			stfd	f31,0xF8(r3)
-			mffs	f0
-			stfd	f0,0x100(r3)
-			lfd		f0,0x00(r3)
-			mtmsr	r0
-			isync
-			blr
-			
-
-;
-;			Saves vector registers.  Returns 0 if non-Altivec machine.
-;
-
-			.align	5
-			.globl	EXT(stVectors)
-
-LEXT(stVectors)
-
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable 
-			li		r4,0
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get the FP enable 
-			ori		r4,r4,lo16(MASK(MSR_EE))		; Get the EE bit
-			
-			mfsprg	r6,2				; Get features
-			mr		r5,r3				; Save area address
-			rlwinm.	r6,r6,0,pfAltivecb,pfAltivecb		;  Do we have Altivec?
-			li		r3,0				; Assume failure
-			beqlr-						; No...
-			
-			mfmsr	r0					; Save the MSR
-	
-			andc	r4,r0,r4			; Clear EE
-
-			oris	r4,r4,hi16(MASK(MSR_VEC))	; Enable vectors
-			mtmsr	r4
-			isync
-			
-			andc	r0,r0,r2			; Clear FP and VEC
-			
-			stvxl	v0,0,r5
-			addi	r5,r5,16
-			stvxl	v1,0,r5
-			addi	r5,r5,16
-			stvxl	v2,0,r5
-			addi	r5,r5,16
-			stvxl	v3,0,r5
-			addi	r5,r5,16
-			stvxl	v4,0,r5
-			addi	r5,r5,16
-			stvxl	v5,0,r5
-			addi	r5,r5,16
-			stvxl	v6,0,r5
-			addi	r5,r5,16
-			stvxl	v7,0,r5
-			addi	r5,r5,16
-			stvxl	v8,0,r5
-			addi	r5,r5,16
-			stvxl	v9,0,r5
-			addi	r5,r5,16
-			stvxl	v10,0,r5
-			addi	r5,r5,16
-			stvxl	v11,0,r5
-			addi	r5,r5,16
-			stvxl	v12,0,r5
-			addi	r5,r5,16
-			stvxl	v13,0,r5
-			addi	r5,r5,16
-			stvxl	v14,0,r5
-			addi	r5,r5,16
-			stvxl	v15,0,r5
-			addi	r5,r5,16
-			stvxl	v16,0,r5
-			addi	r5,r5,16
-			stvxl	v17,0,r5
-			addi	r5,r5,16
-			stvxl	v18,0,r5
-			addi	r5,r5,16
-			stvxl	v19,0,r5
-			addi	r5,r5,16
-			stvxl	v20,0,r5
-			addi	r5,r5,16
-			stvxl	v21,0,r5
-			addi	r5,r5,16
-			stvxl	v22,0,r5
-			addi	r5,r5,16
-			stvxl	v23,0,r5
-			addi	r5,r5,16
-			stvxl	v24,0,r5
-			addi	r5,r5,16
-			stvxl	v25,0,r5
-			addi	r5,r5,16
-			stvxl	v26,0,r5
-			addi	r5,r5,16
-			stvxl	v27,0,r5
-			addi	r5,r5,16
-			stvxl	v28,0,r5
-			addi	r5,r5,16
-			stvxl	v29,0,r5
-			addi	r5,r5,16
-			stvxl	v30,0,r5
-			addi	r5,r5,16
-			stvxl	v31,0,r5
-			mfvscr	v31
-			addi	r6,r5,16
-			stvxl	v31,0,r6
-			li		r3,1
-			lvxl	v31,0,r5
-			mtmsr	r0
-			isync
-
-			blr
-
-
-;
-;			Saves yet more registers
-;
-
-			.align	5
-			.globl	EXT(stSpecrs)
-
-LEXT(stSpecrs)
-
-
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable 
-			li		r4,0
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get the FP enable 
-			ori		r4,r4,lo16(MASK(MSR_EE))		; Get the EE bit
-
- 			mfsprg	r9,2							; Get feature flags 
-			mtcrf	0x02,r9							; move pf64Bit cr6
-
-			mfmsr	r0								; Save the MSR
-			andc	r0,r0,r2						; Turn off VEC and FP
-			andc	r4,r0,r4			; And EE
-			mtmsr	r4
-			isync
-			
-			mfpvr	r12
-			stw		r12,4(r3)
-			rlwinm	r12,r12,16,16,31
-
-			bt++	pf64Bitb,stsSF1					; skip if 64-bit (only they take the hint)
-
-			mfdbatu	r4,0
-			mfdbatl	r5,0
-			mfdbatu	r6,1
-			mfdbatl	r7,1
-			mfdbatu	r8,2
-			mfdbatl	r9,2
-			mfdbatu	r10,3
-			mfdbatl	r11,3
-			stw		r4,8(r3)
-			stw		r5,12(r3)
-			stw		r6,16(r3)
-			stw		r7,20(r3)
-			stw		r8,24(r3)
-			stw		r9,28(r3)
-			stw		r10,32(r3)
-			stw		r11,36(r3)
-
-			mfibatu	r4,0
-			mfibatl	r5,0
-			mfibatu	r6,1
-			mfibatl	r7,1
-			mfibatu	r8,2
-			mfibatl	r9,2
-			mfibatu	r10,3
-			mfibatl	r11,3
-			stw		r4,40(r3)
-			stw		r5,44(r3)
-			stw		r6,48(r3)
-			stw		r7,52(r3)
-			stw		r8,56(r3)
-			stw		r9,60(r3)
-			stw		r10,64(r3)
-			stw		r11,68(r3)
-			
-			mfsprg	r4,0
-			mfsprg	r5,1
-			mfsprg	r6,2
-			mfsprg	r7,3
-			stw		r4,72(r3)
-			stw		r5,76(r3)
-			stw		r6,80(r3)
-			stw		r7,84(r3)
-			
-			mfsdr1	r4
-			stw		r4,88(r3)
-			
-			la		r4,92(r3)
-			li		r5,0
-			
-stSnsr:		mfsrin	r6,r5
-			addis	r5,r5,0x1000
-			stw		r6,0(r4)
-			mr.		r5,r5
-			addi	r4,r4,4
-			bne+	stSnsr
-
-			cmplwi	r12,PROCESSOR_VERSION_750
-			mfspr	r4,hid0
-			stw		r4,(39*4)(r3)
-
-			li		r4,0
-			li		r5,0
-			li		r6,0
-			li		r7,0
-			
-			mfspr	r4,hid1
-			mfspr	r5,l2cr
-			mfspr	r6,msscr0
-			mfspr	r7,msscr1
-
-			stw		r4,(40*4)(r3)
-			stw		r6,(42*4)(r3)
-			stw		r5,(41*4)(r3)
-			stw		r7,(43*4)(r3)
-
-			li		r4,0
-			beq		isis750
-			
-			mfspr	r4,pir
-isis750:	stw		r4,0(r3)
-
-			li		r4,0
-			li		r5,0
-			li		r6,0
-			li		r7,0
-			blt-	b4750
-			
-			mfspr	r4,thrm1
-			mfspr	r5,thrm2
-			mfspr	r6,thrm3
-			mfspr	r7,ictc
-
-b4750:		stw		r4,(44*4)(r3)
-			stw		r5,(45*4)(r3)
-			stw		r6,(46*4)(r3)
-			stw		r7,(47*4)(r3)
-			
-			li		r4,0
-			li		r6,0
-			cmplwi	r12,PROCESSOR_VERSION_7400
-			bne		nnmax
-			
-			mfspr	r6,dabr
-			mfpvr	r5
-			rlwinm	r5,r5,0,16,31
-			cmplwi	r5,0x1101
-			beq		gnmax
-			cmplwi	r5,0x1102
-			bne		nnmax
-
-gnmax:		mfspr	r4,1016
-
-nnmax:		stw		r4,(48*4)(r3)
-			stw		r6,(49*4)(r3)
-			
-			mtmsr	r0
-			isync
-
-			blr
-
-stsSF1:		mfsprg	r4,0
-			mfsprg	r5,1
-			mfsprg	r6,2
-			mfsprg	r7,3
-			std		r4,(18*4)(r3)
-			std		r5,(20*4)(r3)
-			std		r6,(22*4)(r3)
-			std		r7,(24*4)(r3)
-			
-			mfsdr1	r4
-			std		r4,(26*4)(r3)
-
-			mfspr	r4,hid0
-			std		r4,(28*4)(r3)
-			mfspr	r4,hid1
-			std		r4,(30*4)(r3)
-			mfspr	r4,hid4
-			std		r4,(32*4)(r3)
-			mfspr	r4,hid5
-			std		r4,(34*4)(r3)
-
-
-stsSF2:		li		r5,0
-			la		r4,(80*4)(r3)
-			
-stsslbm:	slbmfee	r6,r5
-			slbmfev	r7,r5
-			std		r6,0(r4)
-			std		r7,8(r4)
-			addi	r5,r5,1
-			cmplwi	r5,64
-			addi	r4,r4,16
-			blt		stsslbm
-			
-			mtmsr	r0
-			isync
-
-			blr
-
-;
-;			fwEmMck - this forces the hardware to emulate machine checks
-;			Only valid on 64-bit machines
-;			Note: we want interruptions disabled here
-;
-
-			.globl	EXT(fwEmMck)
-			
-			.align	5
-
-LEXT(fwEmMck)
-
-
-			rlwinm	r3,r3,0,1,0						; Copy low of high high - scomd
-			rlwinm	r5,r5,0,1,0						; Copy low of high high - hid1
-			rlwinm	r7,r7,0,1,0						; Copy low of high high - hid4
-			rlwimi	r3,r4,0,0,31					; Copy low of low low
-			rlwimi	r5,r6,0,0,31					; Copy low of low low
-			rlwimi	r7,r8,0,0,31					; Copy low of low low
-
-			lis		r9,3							; Start forming hid1 error inject mask
-			lis		r10,hi16(0x01084083)			; Start formaing hid4 error inject mask
-			ori		r9,r9,0xC000					; Next bit
-			ori		r10,r10,lo16(0x01084083)		; Next part
-			sldi	r9,r9,32						; Shift up high
-			sldi	r10,r10,8						; Shift into position
-			
-			mfspr	r0,hid1							; Get hid1
-			mfspr	r2,hid4							; and hid4
-			
-			and		r5,r5,r9						; Keep only error inject controls - hid1
-			and		r7,r7,r10						; Keep only error inject controls - hid4
-			
-			andc	r0,r0,r9						; Clear error inject controls hid1
-			andc	r2,r2,r10						; Clear error inject controls hid4
-			
-			or		r0,r0,r5						; Add in the new controls hid1
-			or		r2,r2,r7						; Add in the new controls hid4
-			
-/* ? */
-#if 0
-			lis		r12,CoreErrI					; Get the error inject controls
-			sync
-
-			mtspr	scomd,r3						; Set the error inject controls
-			mtspr	scomc,r12						; Request error inject
-			mfspr	r11,scomc						; Get back the status (we just ignore it)
-#endif
-			sync
-			isync							
-			
-			mtspr	hid1,r0							; Move in hid1 controls
-			mtspr	hid1,r0							; We need to do it twice
-			isync
-			
-			sync
-			mtspr	hid4,r2							; Move in hid4 controls
-			isync
-			
-			blr										; Leave...
-
-;
-;			fwSCOMrd - read/write SCOM
-;
-			.align	5
-			.globl	EXT(fwSCOM)
-
-LEXT(fwSCOM)
-
-			lhz		r12,scomfunc(r3)				; Get the function
-			lwz		r4,scomreg(r3)					; Get the register
-			rldicr	r4,r4,8,47						; Position for SCOM
-
-			mr.		r12,r12							; See if read or write
-			bne		fwSCwrite						; Go do a write
-
-			mfsprg	r0,2							; Get the feature flags
-			ori		r4,r4,0x8000					; Set to read data
-			rlwinm.	r0,r0,pfSCOMFixUpb+1,31,31		; Set shift if we need a fix me up
-			sync
-
-			mtspr	scomc,r4						; Request the register
-			mfspr	r11,scomd						; Get the register contents
-			mfspr	r10,scomc						; Get back the status
-			sync
-			isync							
-
-			sld		r11,r11,r0						; Fix up if needed
-			
-			std		r11,scomdata(r3)				; Save result
-			eieio
-			std		r10,scomstat(r3)				; Save status
-
-			blr
-
-fwSCwrite:	ld		r5,scomdata(r3)					; Get the data
-			
-			sync
-
-			mtspr	scomd,r5						; Set the data
-			mtspr	scomc,r4						; Set it
-			mfspr	r10,scomc						; Get back the status
-			sync
-			isync							
-
-			std		r10,scomstat(r3)				; Save status
-			
-			blr
-
-;
-;			diagTrap - this is used to trigger checks from user space
-;			any "twi 31,r31,0xFFFx" will come here (x = 0 to F).
-;			On entry R3 points to savearea.
-;			R4 is the "x" from instruction;
-;			Pass back 1 to no-op twi and return to user
-;			Pass back 0 to treat as normal twi.
-;
-
-			.globl	EXT(diagTrap)
-			
-			.align	5
-
-LEXT(diagTrap)
-
-			li		r3,1							; Ignore TWI
-			blr										; Leave...
-
-
-
-
-;
-;			setPmon - this is used to manipulate MMCR0 and MMCR1
-
-			.globl	EXT(setPmon)
-			
-			.align	5
-
-LEXT(setPmon)
-
-			li		r0,0
-			isync
-			mtspr	mmcr0,r0						; Clear MMCR0
-			mtspr	mmcr1,r0						; Clear MMCR1
-			mtspr	pmc1,r0
-			mtspr	pmc2,r0
-			mtspr	pmc3,r0
-			mtspr	pmc4,r0
-
-			isync
-
-			mtspr	mmcr0,r3						; Set MMCR0
-			mtspr	mmcr1,r4						; Set MMCR1
-			isync
-			blr										; Leave...
-
-
diff --git a/osfmk/ppc/FirmwareC.c b/osfmk/ppc/FirmwareC.c
deleted file mode 100644
index 5ddc41c71..000000000
--- a/osfmk/ppc/FirmwareC.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *	This file contains firmware code.
- *
- */
-
-#include <debug.h>
-#include <mach_vm_debug.h>
-#include <db_machine_commands.h>
-
-#include <kern/thread.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-#include <kern/spl.h>
-
-#include <kern/misc_protos.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/mem.h>
-#include <ppc/pmap.h>
-#include <ppc/new_screen.h>
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#include <pexpert/pexpert.h>
-#include <ddb/db_output.h>
-
-Boot_Video dgVideo;
-extern GDWorkArea GratefulDebWork[];
-
-struct RuptCtr {		/* Counts hardware interrupts */
-	struct GDpos {		/* Screen position for Grateful Deb display */
-		unsigned short col;	/* Column  (-1 means no display) */
-		unsigned short row;	/* Row */
-	} GDpos;
-	unsigned int count;	/* Count of interrupt */
-	unsigned int timed;	/* If set, count updates at timed rate  */
-	unsigned int lasttime;	/* Low of timebase when last updated */
-};
-
-/* Window layout for Grateful Deb:
- *
- *	0				9
- *
- *	0	Total			Decrimenter
- *	1	DSI				ISI
- *	2	System call		External
- *	3	SIGP			Floating point
- *	4	Program			Alignment
- */
-
-struct RuptCtr RuptCtrs[96] = {
-	{	/* Total interruptions */
-		.GDpos = {
-			.col = 0,
-			.row = 0,
-		},
-		.count = 0,
-		.timed = 1,
-	},
-	{	/* Reset */
-		.GDpos = {
-			.col = -1,
-			.row = -1,
-		},
-		.count = 0,
-		.timed = 0,
-	},
-	{	/* Machine check */
-		.GDpos = {
-			.col = -1,
-			.row = -1,
-		},
-		.count = 0,
-		.timed = 0,
-	},
-	{	/* DSIs */
-		.GDpos = {
-			.col = 0,
-			.row = 1,
-		},
-		.count = 0,
-		.timed = 1},
-	{	/* ISIs */
-		.GDpos = {
-			.col = 1,
-			.row = 1,
-		},
-		.count = 0,
-		.timed = 1,
-	},
-	{	/* Externals */
-		.GDpos = {
-			.col = 1,
-			.row = 2,
-		},
-		.count = 0,
-		.timed = 1,
-	},
-	{	/* Alignment */
-		.GDpos = {
-			.col = 1,
-			.row = 4,
-		},
-		.count = 0,
-		.timed = 0,
-	},
-	{.GDpos = {.col = 0,.row = 4},.count = 0,.timed = 0},	/* Program */
-	{.GDpos = {.col = 1,.row = 3},.count = 0,.timed = 0},	/* Floating point */
-	{.GDpos = {.col = 1,.row = 0},.count = 0,.timed = 1},	/* Decrementer */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* I/O error */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = 0,.row = 2},.count = 0,.timed = 1},	/* System call */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Trace */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Floating point assist */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Performance monitor */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* VMX */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Instruction breakpoint */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* System management */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Trace */
-	{.GDpos = {.col = 0,.row = 3},.count = 0,.timed = 0},	/* SIGP */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Preemption */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Context switch */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Special, update frequency controls */
-
-	/*Start of second processor counts */
-
-	{.GDpos = {.col = 0,.row = 0},.count = 0,.timed = 1},	/* Total interruptions */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reset */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Machine check */
-	{.GDpos = {.col = 0,.row = 1},.count = 0,.timed = 1},	/* DSIs */
-	{.GDpos = {.col = 1,.row = 1},.count = 0,.timed = 1},	/* ISIs */
-	{.GDpos = {.col = 1,.row = 2},.count = 0,.timed = 1},	/* Externals */
-	{.GDpos = {.col = 1,.row = 4},.count = 0,.timed = 0},	/* Alignment */
-	{.GDpos = {.col = 0,.row = 4},.count = 0,.timed = 0},	/* Program */
-	{.GDpos = {.col = 1,.row = 3},.count = 0,.timed = 0},	/* Floating point */
-	{.GDpos = {.col = 1,.row = 0},.count = 0,.timed = 1},	/* Decrementer */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* I/O error */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = 0,.row = 2},.count = 0,.timed = 1},	/* System call */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Trace */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Floating point assist */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Performance monitor */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* VMX */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Instruction breakpoint */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* System management */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Trace */
-	{.GDpos = {.col = 0,.row = 3},.count = 0,.timed = 0},	/* SIGP */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Preemption */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Context switch */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Reserved */
-	{.GDpos = {.col = -1,.row = -1},.count = 0,.timed = 0},	/* Special, update frequency controls */
-};
-
-void
-GratefulDebInit(bootBumbleC *boot_video_info)
-{				/* Initialize the video debugger */
-
-	unsigned int fillframe[256];
-	unsigned int startpos, startbyte, windowleft, newwidth, i, j, startword,
-	    oldwidth, nrmlgn;
-	unsigned int nwords, *byteleft, lstlgn, pixlgn, bytelgn;
-
-	if (!boot_video_info) {	/* Are we disabling it? */
-		GratefulDebWork[0].GDready = 0;	/* Disable output */
-		return;
-	}
-
-	nrmlgn = (9 * GDfontsize) * (boot_video_info->v_depth / 8);	/* Get the normal column size in bytes */
-	lstlgn = (((8 * GDfontsize) + (GDfontsize >> 1)) * boot_video_info->v_depth) / 8;	/* Same as normal, but with 1/2 character space */
-	nrmlgn = (nrmlgn + 31) & -32;	/* Round to a line */
-
-	bytelgn = (nrmlgn * (GDdispcols - 1)) + lstlgn;	/* Length in bytes */
-	pixlgn = bytelgn / (boot_video_info->v_depth / 8);	/* Number of pixels wide */
-
-	startbyte = (boot_video_info->v_width * (boot_video_info->v_depth / 8)) - bytelgn;	/* Get the starting byte unaligned */
-	startpos = boot_video_info->v_width - pixlgn;	/* Starting pixel position */
-
-	startbyte += (unsigned int)boot_video_info->v_baseAddr & 31;	/* Add the extra to cache boundary in frame buffer */
-	startbyte &= -32;	/* Make sure it's on a cache line for speed */
-	startbyte += (unsigned int)boot_video_info->v_baseAddr & 31;	/* Subtract the extra to cache boundary in frame buffer */
-
-	windowleft = startbyte - (((GDfontsize / 2) * boot_video_info->v_depth) / 8);	/* Back up a half character */
-	windowleft &= -4;	/* Make sure it is on a word boundary */
-	newwidth = windowleft / (boot_video_info->v_depth / 8);	/* Get the new pixel width of screen */
-
-	oldwidth = boot_video_info->v_width;	/* Save the old width */
-//      boot_video_info->v_width = newwidth;                                    /* Set the new width */
-
-	nwords = oldwidth - newwidth;	/* See how much to fill in pixels */
-	nwords = nwords / (32 / boot_video_info->v_depth);	/* Get that in bytes */
-
-	startword = (newwidth + 3) / 4;	/* Where does it start? */
-
-	byteleft = (unsigned int *)(boot_video_info->v_baseAddr + windowleft);	/* Starting place */
-	for (i = 0; i < nwords; i++)
-		byteleft[i] = 0;	/* Set the row to all black */
-
-	byteleft = (unsigned int *)(boot_video_info->v_baseAddr + windowleft + (boot_video_info->v_rowBytes * 1));	/* Starting place */
-	for (i = 0; i < nwords; i++)
-		byteleft[i] = 0;	/* Set the row to all black */
-
-	byteleft = (unsigned int *)(boot_video_info->v_baseAddr + windowleft + (boot_video_info->v_rowBytes * (boot_video_info->v_height - 2)));	/* Starting place */
-	for (i = 0; i < nwords; i++)
-		byteleft[i] = 0;	/* Set the row to all black */
-
-	byteleft = (unsigned int *)(boot_video_info->v_baseAddr + windowleft + (boot_video_info->v_rowBytes * (boot_video_info->v_height - 1)));	/* Starting place */
-	for (i = 0; i < nwords; i++)
-		byteleft[i] = 0;	/* Set the row to all black */
-
-	for (i = 0; i < nwords; i++)
-		fillframe[i] = 0xFFFFFFFF;	/* Set the row to all white */
-
-	if (boot_video_info->v_depth == 8) {	/* See if 8 bits a pixel */
-		fillframe[0] = 0x0000FFFF;	/* Make left border */
-		fillframe[nwords - 1] = 0xFFFF0000;	/* Make right border */
-	} else if (boot_video_info->v_depth == 16) {	/* See if 16 bits a pixel */
-		fillframe[0] = 0x00000000;	/* Make left border */
-		fillframe[nwords - 1] = 0x00000000;	/* Make right border */
-	} else {
-		fillframe[0] = 0x00000000;	/* Make left border */
-		fillframe[1] = 0x00000000;	/* Make left border */
-		fillframe[nwords - 1] = 0x00000000;	/* Make right border */
-		fillframe[nwords - 2] = 0x00000000;	/* Make right border */
-	}
-
-	byteleft = (unsigned int *)(boot_video_info->v_baseAddr + windowleft + (boot_video_info->v_rowBytes * 2));	/* Place to start filling */
-
-	for (i = 2; i < (boot_video_info->v_height - 2); i++) {	/* Fill the rest */
-		for (j = 0; j < nwords; j++)
-			byteleft[j] = fillframe[j];	/* Fill the row */
-		byteleft = (unsigned int *)((unsigned int)byteleft + boot_video_info->v_rowBytes);	/* Next row */
-	}
-
-	for (i = 0; i < 2; i++) {	/* Initialize both (for now) processor areas */
-
-		GratefulDebWork[i].GDtop =
-		    2 + (GDfontsize / 2) + (i * 18 * GDfontsize);
-		GratefulDebWork[i].GDleft = 2 + startpos + (GDfontsize / 2);
-		GratefulDebWork[i].GDtopleft =
-		    boot_video_info->v_baseAddr + startbyte +
-		    (GratefulDebWork[i].GDtop * boot_video_info->v_rowBytes);
-		GratefulDebWork[i].GDrowbytes = boot_video_info->v_rowBytes;
-		GratefulDebWork[i].GDrowchar =
-		    boot_video_info->v_rowBytes * (GDfontsize +
-						   (GDfontsize / 4));
-		GratefulDebWork[i].GDdepth = boot_video_info->v_depth;
-		GratefulDebWork[i].GDcollgn = nrmlgn;
-
-//              RuptCtrs[(48*i)+47].timed = gPEClockFrequencyInfo.timebase_frequency_hz >> 4;   /* (Update every 16th of a second (16 fps) */
-		RuptCtrs[(48 * i) + 47].timed = gPEClockFrequencyInfo.timebase_frequency_hz >> 3;	/* (Update every 8th of a second (8 fps) */
-//              RuptCtrs[(48*i)+47].timed = gPEClockFrequencyInfo.timebase_frequency_hz >> 2;   /* (Update every 4th of a second (4 fps) */
-//              RuptCtrs[(48*i)+47].timed = gPEClockFrequencyInfo.timebase_frequency_hz >> 1;   /* (Update every 2th of a second (2 fps) */
-//              RuptCtrs[(48*i)+47].timed = gPEClockFrequencyInfo.timebase_frequency_hz >> 0;   /* (Update every 1 second (1 fps) */
-
-		sync();
-
-		GratefulDebWork[i].GDready = 1;	/* This one's all ready */
-	}
-}
-
-void debugNoop(void);
-void
-debugNoop(void)
-{				/* This does absolutely nothing */
-}
diff --git a/osfmk/ppc/FirmwareCalls.h b/osfmk/ppc/FirmwareCalls.h
deleted file mode 100644
index ec25ea30c..000000000
--- a/osfmk/ppc/FirmwareCalls.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
- 
-#ifdef ASSEMBLER
-
-#ifdef _FIRMWARECALLS_H_
-#error Hey! You can only include FirmwareCalls.h in one assembler file, dude. And it should be Firmware.s!
-#else /* _FIRMWARECALLS_H_ */
-
-/*
- *			Entries for all firmware calls are in here (except for call 0x80000000 - CutTrace
- */
-
-#define _FIRMWARECALLS_H_
-
-#define	fwCallEnt(name, entrypt) 									\
-			.globl	name								__ASMNL__	\
- 			.set	name,(.-EXT(FWtable))/4|0x80000000	__ASMNL__	\
-			.long	EXT(entrypt)						__ASMNL__
-			
-/*
- *
- */
- 
-			fwCallEnt(dbgDispCall, dbgDispLL)				/* Write stuff to printer or modem port */
-			fwCallEnt(dbgCkptCall, dbgCkptLL)				/* Save 128 bytes from r3 to 0x380 V=R mapping */
-			fwCallEnt(StoreRealCall, StoreRealLL)			/* Save one word in real storage */
-			fwCallEnt(ClearRealCall, ClearRealLL)			/* Clear physical pages */
-			fwCallEnt(LoadDBATsCall, xLoadDBATsLL)			/* Load all DBATs */
-			fwCallEnt(LoadIBATsCall, xLoadIBATsLL)			/* Load all IBATs */
-			fwCallEnt(DoPreemptCall, DoPreemptLL)			/* Preempt if need be */
-			fwCallEnt(CreateFakeIOCall, CreateFakeIOLL)		/* Make a fake I/O interruption */
-			fwCallEnt(SwitchContextCall, SwitchContextLL)	/* Switch context */
-			fwCallEnt(Choke, DoChokeLL)						/* Choke (system crash) */
-			fwCallEnt(dbgRegsCall, dbgRegsLL)				/* Dumps all registers */
-			fwCallEnt(CreateFakeDECCall, CreateFakeDECLL)	/* Make a fake decrementer interruption */
-			fwCallEnt(CreateShutdownCTXCall, CreateShutdownCTXLL)	/* create a shutdown context */
-			fwCallEnt(NullCall, NullLL)						/* Null Firmware call */
-			fwCallEnt(iNullCall, iNullLL)					/* Instrumented null Firmware call */
-
-#endif	/* _FIRMWARECALLS_H_ */
-
-#else /* ASSEMBLER */
-	
-/*
- *			The firmware function headers
- */
-extern void			CutTrace		(unsigned int item1, ...);
-
-#endif /* ASSEMBLER */
diff --git a/osfmk/ppc/Makefile b/osfmk/ppc/Makefile
deleted file mode 100644
index b978cc676..000000000
--- a/osfmk/ppc/Makefile
+++ /dev/null
@@ -1,36 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-
-EXPORT_ONLY_FILES = 		\
-	asm.h 			\
-	cpu_number.h		\
-	cpu_capabilities.h	\
-	Diagnostics.h		\
-	io_map_entries.h	\
-	lock.h			\
-	locks.h			\
-	proc_reg.h		\
-	machine_routines.h	\
-	mappings.h		\
-	savearea.h		\
-	simple_lock.h
-
-INSTALL_MD_DIR = ppc
-
-INSTALL_MD_LCL_LIST = cpu_capabilities.h
-
-EXPORT_MD_LIST = ${EXPORT_ONLY_FILES}
-
-EXPORT_MD_DIR = ppc
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/osfmk/ppc/PPCcalls.h b/osfmk/ppc/PPCcalls.h
deleted file mode 100644
index 262fe2e91..000000000
--- a/osfmk/ppc/PPCcalls.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- *	To add a new entry:
- *		Add an "PPCTRAP(routine)" to the table below
- *
- *		Add trap definition to mach/ppc/syscall_sw.h and
- *		recompile user library.
- *
- *	Note:
- *		The maximum number of calls is 0x1000 (4096 for the hexually challanged)
- *
- */
-
-typedef	int (*PPCcallEnt)(struct savearea *save);
-
-#define PPCcall(rout) rout
-#define dis (PPCcallEnt)0
-
-PPCcallEnt	PPCcalls[] = {
-
-	PPCcall(diagCall),				/* 0x6000 Call diagnostics routines */
-	PPCcall(vmm_get_version),		/* 0x6001 Get Virtual Machine Monitor version */
-	PPCcall(vmm_get_features),		/* 0x6002 Get Virtual Machine Monitor supported features */
-	PPCcall(vmm_init_context),		/* 0x6003 Initialize a VMM context */
-	PPCcall(vmm_dispatch),			/* 0x6004 Dispatch a Virtual Machine Monitor call */	
-	PPCcall(bb_enable_bluebox),		/* 0x6005 Enable this thread for use in the blue box virtual machine */
-	PPCcall(bb_disable_bluebox),	/* 0x6006 Disable this thread for use in the blue box virtual machine */
-	PPCcall(bb_settaskenv),			/* 0x6007 Set the BlueBox per thread task environment data */
-	PPCcall(vmm_stop_vm),			/* 0x6008 Stop a running VM */
-
-	PPCcall(dis),					/* 0x6009 disabled */
-	
-	PPCcall(ppcNull),				/* 0x600A Null PPC syscall */
-	PPCcall(perfmon_control),		/* 0x600B performance monitor */
-	PPCcall(ppcNullinst),			/* 0x600C Instrumented Null PPC syscall */
-	PPCcall(pmsCntrl),				/* 0x600D Power Management Stepper */
-	PPCcall(dis),					/* 0x600E disabled */
-	PPCcall(dis),					/* 0x600F disabled */
-	PPCcall(dis),					/* 0x6010 disabled */
-	PPCcall(dis),					/* 0x6011 disabled */
-	PPCcall(dis),					/* 0x6012 disabled */
-	PPCcall(dis),					/* 0x6013 disabled */
-	PPCcall(dis),					/* 0x6014 disabled */
-	PPCcall(dis),					/* 0x6015 disabled */
-	PPCcall(dis),					/* 0x6016 disabled */
-	PPCcall(dis),					/* 0x6017 disabled */
-	PPCcall(dis),					/* 0x6018 disabled */
-	PPCcall(dis),					/* 0x6019 disabled */
-	PPCcall(dis),					/* 0x601A disabled */
-	PPCcall(dis),					/* 0x601B disabled */
-	PPCcall(dis),					/* 0x601C disabled */
-	PPCcall(dis),					/* 0x601D disabled */
-	PPCcall(dis),					/* 0x601E disabled */
-	PPCcall(dis),					/* 0x601F disabled */
-};
-
-#undef dis
diff --git a/osfmk/ppc/Performance.s b/osfmk/ppc/Performance.s
deleted file mode 100644
index 440c39678..000000000
--- a/osfmk/ppc/Performance.s
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT_INTERNAL_USE_ONLY@
- */
-
-/* 																							
- 	Performance.s 
-
-	Handle things that should are related to the hardware performance monitor
-
-	Lovingly crafted by Bill Angell using traditional methods and only natural or recycled materials.
-	No more than 7500 chinchillas were killed in the production of the code.
-
-*/
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/Performance.h>
-#include <mach/machine/vm_param.h>
-#include <assym.s>
-
-#if PERF_HIST
-/*
- *			This routine is used to interface to the performance monitor
- */
-
-ENTRY(PerfCtl, TAG_NO_FRAME_USED)
-
-			lis		r0,PerfCtlCall@h				/* Get the top part of the SC number */
-			ori		r0,r0,PerfCtlCall@l				/* and the bottom part */
-			sc										/* Do it to it */
-			blr										/* Bye bye, Birdie... */
-			
-
-ENTRY(PerfCtlLL, TAG_NO_FRAME_USED)
-
-			cmplwi	r3,maxPerf						/* See if we are within range */
-			mflr	r11								/* Get the return point */
-			li		r3,0							/* Show failure */
-			bgelrl-									/* Load up current address and, also, leave if out of range */
-prfBase:	mflr	r12								/* Get our address */
-			rlwinm	r10,r3,2,0,31					/* Get displacement into branch table */
-			addi	r12,r12,prfBrnch-prfBase		/* Point to the branch address */
-			add		r12,r12,r10						/* Point to the branch */
-			mtlr	r12								/* Get it in the link register */
-			blr										/* Vector to the specific performance command... */
-			
-prfBrnch:	b		prfClear						/* Clear the histogram table */
-			b		prfStart						/* Start the performance monitor */
-			b		prfStop							/* Stop the performance monitor */
-			b		prfMap							/* Map the histogram into an address space */
-			.equ	maxPerf, (.-prfBrnch)/4			/* Set the highest valid address */
-			
-/*
- *			Clear the monitor histogram
- */
-prfClear:
- 			li		r4,PMIhist@l					/* We know this to be in page 0, so no need for the high part */
-			lis		r8,PMIHIST_SIZE@h				/* Get high half of the table size */
-			lwz		r4,0(r4)						/* Get the real address of the histgram */
-			ori		r8,r8,PMIHIST_SIZE@l			/* Get the low half of the table size */
-			li		r6,32							/* Get a displacement */
-			li		r3,1							/* Set up a good return code */
-			mtlr	r11								/* Restore the return address */
-						
-clrloop:	subi	r8,r8,32						/* Back off a cache line */
-			dcbz	0,r4							/* Do the even line */
-			sub.	r8,r8,r6						/* Back off a second time (we only do this to generate a CR */
-			dcbz	r6,r4							/* Clear the even line */
-			addi	r4,r4,64						/* Move up to every other line */
-			bgt+	clrloop							/* Go until we've done it all... */
-
-			blr										/* Leave... */
-			
-/*
- *			Start the monitor histogram
- */
- prfStart:
- 			mtlr	r11								/* Restore the return address */
-			blr										/* Return... */
-			
-/*
- *			Stop the monitor histogram
- */
- prfStop:
- 			mtlr	r11								/* Restore the return address */
-			blr										/* Return... */
-			
-/*
- *			Maps the monitor histogram into another address space
- */
- prfMap:
- 			mtlr	r11								/* Restore the return address */
-			blr										/* Return... */
-
-#endif
-
diff --git a/osfmk/ppc/PseudoKernel.c b/osfmk/ppc/PseudoKernel.c
deleted file mode 100644
index fc2a10ecc..000000000
--- a/osfmk/ppc/PseudoKernel.c
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- 	File:		PseudoKernel.c
-
- 	Contains:	BlueBox PseudoKernel calls
-	Written by:	Mark Gorlinsky
-				Bill Angell
-
- 	Copyright:	1997 by Apple Computer, Inc., all rights reserved
-
-*/
-
-#include <mach/mach_types.h>
-#include <mach/mach_host.h>
-#include <mach/kern_return.h>
-
-#include <kern/kalloc.h>
-#include <kern/kern_types.h>
-#include <kern/host.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <ppc/PseudoKernel.h>
-#include <ppc/exception.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-
-extern int is_suser(void);
-extern void tbeproc(void *proc);
-
-void bbSetRupt(ReturnHandler *rh, thread_t ct);
-
-/*
-** Function:	NotifyInterruption
-**
-** Inputs:
-**		ppcInterrupHandler	- interrupt handler to execute
-**		interruptStatePtr	- current interrupt state
-**
-** Outputs:
-**
-** Notes:
-**
-*/
-kern_return_t
-syscall_notify_interrupt(void)
-{
-	task_t			task;
-	thread_t 		act, fact;
-	bbRupt			*bbr;
-	BTTD_t			*bttd;
-	int				i;
-
-	task = current_task();							/* Figure out who our task is */
-
-	task_lock(task);						/* Lock our task */
-	
-	fact = (thread_t)task->threads.next;		/* Get the first activation on task */
-	act = NULL;										/* Pretend we didn't find it yet */
-	
-	for(i = 0; i < task->thread_count; i++) {		/* Scan the whole list */
-		if(fact->machine.bbDescAddr) {					/* Is this a Blue thread? */
-			bttd = (BTTD_t *)(fact->machine.bbDescAddr & -PAGE_SIZE);
-			if(bttd->InterruptVector) {				/* Is this the Blue interrupt thread? */
-				act = fact;							/* Yeah... */
-				break;								/* Found it, Bail the loop... */
-			}
-		}
-		fact = (thread_t)fact->task_threads.next;	/* Go to the next one */
-	}
-
-	if(!act) {								/* Couldn't find a bluebox */
-		task_unlock(task);					/* Release task lock */
-		return KERN_FAILURE;				/* No tickie, no shirtee... */
-	}
-
-	thread_reference(act);
-	
-	task_unlock(task);								/* Safe to release now */
-
-	thread_mtx_lock(act);
-
-	/* if the calling thread is the BlueBox thread that handles interrupts
-	 * we know that we are in the PsuedoKernel and we can short circuit 
-	 * setting up the asynchronous task by setting a pending interrupt.
-	 */
-	
-	if (act == current_thread()) {		
-		bttd->InterruptControlWord = bttd->InterruptControlWord | 
-			((bttd->postIntMask >> kCR2ToBackupShift) & kBackupCR2Mask);
-				
-		thread_mtx_unlock(act);						/* Unlock the activation */
-		thread_deallocate(act);
-		return KERN_SUCCESS;
-	}
-
-	if(act->machine.emPendRupts >= 16) {				/* Have we hit the arbitrary maximum? */
-		thread_mtx_unlock(act);						/* Unlock the activation */
-		thread_deallocate(act);
-		return KERN_RESOURCE_SHORTAGE;				/* Too many pending right now */
-	}
-	
-	if(!(bbr = (bbRupt *)kalloc(sizeof(bbRupt)))) {	/* Get a return handler control block */
-		thread_mtx_unlock(act);						/* Unlock the activation */
-		thread_deallocate(act);
-		return KERN_RESOURCE_SHORTAGE;				/* No storage... */
-	}
-	
-	(void)hw_atomic_add(&act->machine.emPendRupts, 1);	/* Count this 'rupt */
-	bbr->rh.handler = bbSetRupt;					/* Set interruption routine */
-
-	bbr->rh.next = act->handlers;					/* Put our interrupt at the start of the list */
-	act->handlers = &bbr->rh;
-
-	act_set_apc(act);								/* Set an APC AST */
-
-	thread_mtx_unlock(act);							/* Unlock the activation */
-	thread_deallocate(act);
-	return KERN_SUCCESS;							/* We're done... */
-}
-
-/* 
- *	This guy is fired off asynchronously to actually do the 'rupt.
- *	We will find the user state savearea and modify it.  If we can't,
- *	we just leave after releasing our work area
- */
-
-void bbSetRupt(ReturnHandler *rh, thread_t act) {
-
-	struct savearea	*sv;
-	BTTD_t		*bttd;
-	bbRupt		*bbr;
-	UInt32		interruptState;
-	
-	bbr = (bbRupt *)rh;								/* Make our area convenient */
-
-	if(!(act->machine.bbDescAddr)) {					/* Is BlueBox still enabled? */
-		kfree(bbr, sizeof(bbRupt));	/* No, release the control block */
-		return;
-	}
-
-	(void)hw_atomic_sub(&act->machine.emPendRupts, 1);	/* Uncount this 'rupt */
-
-	if(!(sv = find_user_regs(act))) {				/* Find the user state registers */
-		kfree(bbr, sizeof(bbRupt));	/* Couldn't find 'em, release the control block */
-		return;
-	}
-
-	bttd = (BTTD_t *)(act->machine.bbDescAddr & -PAGE_SIZE);
-		
-    interruptState = (bttd->InterruptControlWord & kInterruptStateMask) >> kInterruptStateShift; 
-
-    switch (interruptState) {
-		
-		case kInSystemContext:
-			sv->save_cr |= bttd->postIntMask;		/* post int in CR2 */
-			break;
-			
-		case kInAlternateContext:
-			bttd->InterruptControlWord = (bttd->InterruptControlWord & ~kInterruptStateMask) | 
-				(kInPseudoKernel << kInterruptStateShift);
-				
-			bttd->exceptionInfo.srr0 = (unsigned int)sv->save_srr0;		/* Save the current PC */
-			sv->save_srr0 = (uint64_t)act->machine.bbInterrupt;	/* Set the new PC */
-			bttd->exceptionInfo.sprg1 = (unsigned int)sv->save_r1;		/* Save the original R1 */
-			sv->save_r1 = (uint64_t)bttd->exceptionInfo.sprg0;	/* Set the new R1 */
-			bttd->exceptionInfo.srr1 = (unsigned int)sv->save_srr1;		/* Save the original MSR */
-			sv->save_srr1 &= ~(MASK(MSR_BE)|MASK(MSR_SE));	/* Clear SE|BE bits in MSR */
-			act->machine.specFlags &= ~bbNoMachSC;				/* reactivate Mach SCs */ 
-			disable_preemption();							/* Don't move us around */
-			getPerProc()->spcFlags = act->machine.specFlags;	/* Copy the flags */
-			enable_preemption();							/* Ok to move us around */
-			/* drop through to post int in backup CR2 in ICW */
-
-		case kInExceptionHandler:
-		case kInPseudoKernel:
-		case kOutsideBlue:
-			bttd->InterruptControlWord = bttd->InterruptControlWord | 
-				((bttd->postIntMask >> kCR2ToBackupShift) & kBackupCR2Mask);
-			break;
-				
-		default:
-			break;
-	}
-
-	kfree(bbr, sizeof(bbRupt));	/* Release the control block */
-	return;
-
-}
-
-kern_return_t
-enable_bluebox(host_t host, unsigned _taskID, unsigned _TWI_TableStart,
-			   unsigned _Desc_TableStart);
-kern_return_t disable_bluebox( host_t host );
-
-/*
- * This function is used to enable the firmware assist code for bluebox traps, system calls
- * and interrupts.
- *
- * The assist code can be called from two types of threads.  The blue thread, which handles 
- * traps, system calls and interrupts and preemptive threads that only issue system calls.
- *
- * Parameters:	host			.
- * 		_taskID			opaque task ID
- * 		_TWI_TableStart		Start of TWI table
- * 		_Desc_TableStart	Start of descriptor table
- */ 
-
-kern_return_t
-enable_bluebox(host_t host, unsigned _taskID, unsigned _TWI_TableStart,
-	       unsigned _Desc_TableStart)
-{
-	/* XXX mig funness */
-	void *taskID = (void *)_taskID;
-	void *TWI_TableStart = (void *)_TWI_TableStart;
-	char *Desc_TableStart = (char *)_Desc_TableStart;
-	
-	thread_t 		th;
-	vm_offset_t		kerndescaddr, origdescoffset;
-	kern_return_t 	ret;
-	ppnum_t			physdescpage;
-	BTTD_t			*bttd;
-	
-	th = current_thread();									/* Get our thread */					
-
-	if ( host == HOST_NULL ) return KERN_INVALID_HOST;
-	if ( ! is_suser() ) return KERN_FAILURE;						/* We will only do this for the superuser */
-	if ( th->machine.bbDescAddr ) return KERN_FAILURE;		/* Bail if already authorized... */
-	if ( ! (unsigned int) Desc_TableStart ) return KERN_FAILURE;	/* There has to be a descriptor page */ 
-	if ( ! TWI_TableStart ) return KERN_FAILURE;					/* There has to be a TWI table */ 
-
-	/* Get the page offset of the descriptor */
-	origdescoffset = (vm_offset_t)Desc_TableStart & (PAGE_SIZE - 1);
-
-	/* Align the descriptor to a page */
-	Desc_TableStart = (char *)((vm_offset_t)Desc_TableStart & -PAGE_SIZE);
-
-	ret = vm_map_wire(th->map, 					/* Kernel wire the descriptor in the user's map */
-		(vm_offset_t)Desc_TableStart,
-		(vm_offset_t)Desc_TableStart + PAGE_SIZE,
-		VM_PROT_READ | VM_PROT_WRITE,
-		FALSE);															
-		
-	if(ret != KERN_SUCCESS) {								/* Couldn't wire it, spit on 'em... */
-		return KERN_FAILURE;	
-	}
-		
-	physdescpage = 											/* Get the physical page number of the page */
-		pmap_find_phys(th->map->pmap, CAST_USER_ADDR_T(Desc_TableStart));
-
-	ret =  kmem_alloc_pageable(kernel_map, &kerndescaddr, PAGE_SIZE);	/* Find a virtual address to use */
-	if(ret != KERN_SUCCESS) {								/* Could we get an address? */
-		(void) vm_map_unwire(th->map,				/* No, unwire the descriptor */
-			(vm_offset_t)Desc_TableStart,
-			(vm_offset_t)Desc_TableStart + PAGE_SIZE,
-			TRUE);
-		return KERN_FAILURE;								/* Split... */
-	}
-	
-	(void) pmap_enter(kernel_pmap, 							/* Map this into the kernel */
-		kerndescaddr, physdescpage, VM_PROT_READ|VM_PROT_WRITE, 
-		VM_WIMG_USE_DEFAULT, TRUE);
-	
-	bttd = (BTTD_t *)kerndescaddr;							/* Get the address in a convienient spot */ 
-	
-	th->machine.bbDescAddr = (unsigned int)kerndescaddr+origdescoffset;	/* Set kernel address of the table */
-	th->machine.bbUserDA = (unsigned int)Desc_TableStart;	/* Set user address of the table */
-	th->machine.bbTableStart = (unsigned int)TWI_TableStart;	/* Set address of the trap table */
-	th->machine.bbTaskID = (unsigned int)taskID;		/* Assign opaque task ID */
-	th->machine.bbTaskEnv = 0;						/* Clean task environment data */
-	th->machine.emPendRupts = 0;						/* Clean pending 'rupt count */
-	th->machine.bbTrap = bttd->TrapVector;			/* Remember trap vector */
-	th->machine.bbSysCall = bttd->SysCallVector;		/* Remember syscall vector */
-	th->machine.bbInterrupt = bttd->InterruptVector;	/* Remember interrupt vector */
-	th->machine.bbPending = bttd->PendingIntVector;	/* Remember pending vector */
-	th->machine.specFlags &= ~(bbNoMachSC | bbPreemptive);	/* Make sure mach SCs are enabled and we are not marked preemptive */
-	th->machine.specFlags |= bbThread;				/* Set that we are Classic thread */
-		
-	if(!(bttd->InterruptVector)) {							/* See if this is a preemptive (MP) BlueBox thread */
-		th->machine.specFlags |= bbPreemptive;		/* Yes, remember it */
-	}
-		
-	disable_preemption();									/* Don't move us around */
-	getPerProc()->spcFlags = th->machine.specFlags;	/* Copy the flags */
-	enable_preemption();									/* Ok to move us around */
-		
-	{
-		/* mark the proc to indicate that this is a TBE proc */
-
-		tbeproc(th->task->bsd_info);
-	}
-
-	return KERN_SUCCESS;
-}
-
-kern_return_t disable_bluebox( host_t host ) {				/* User call to terminate bluebox */
-	
-	thread_t 	act;
-	
-	act = current_thread();									/* Get our thread */					
-
-	if (host == HOST_NULL) return KERN_INVALID_HOST;
-	
-	if(!is_suser()) return KERN_FAILURE;					/* We will only do this for the superuser */
-	if(!act->machine.bbDescAddr) return KERN_FAILURE;			/* Bail if not authorized... */
-
-	disable_bluebox_internal(act);							/* Clean it all up */
-	return KERN_SUCCESS;									/* Leave */
-}
-
-void disable_bluebox_internal(thread_t act) {			/* Terminate bluebox */
-		
-	(void) vm_map_unwire(act->map,							/* Unwire the descriptor in user's address space */
-		(vm_offset_t)act->machine.bbUserDA,
-		(vm_offset_t)act->machine.bbUserDA + PAGE_SIZE,
-		FALSE);
-		
-	kmem_free(kernel_map, (vm_offset_t)act->machine.bbDescAddr & -PAGE_SIZE, PAGE_SIZE);	/* Release the page */
-	
-	act->machine.bbDescAddr = 0;								/* Clear kernel pointer to it */
-	act->machine.bbUserDA = 0;									/* Clear user pointer to it */
-	act->machine.bbTableStart = 0;								/* Clear user pointer to TWI table */
-	act->machine.bbTaskID = 0;									/* Clear opaque task ID */
-	act->machine.bbTaskEnv = 0;								/* Clean task environment data */
-	act->machine.emPendRupts = 0;								/* Clean pending 'rupt count */
-	act->machine.specFlags &= ~(bbNoMachSC | bbPreemptive | bbThread);	/* Clean up Blue Box enables */
-	disable_preemption();								/* Don't move us around */
-	getPerProc()->spcFlags = act->machine.specFlags;		/* Copy the flags */
-	enable_preemption();								/* Ok to move us around */
-	return;
-}
-
-/*
- * Use the new PPCcall method to enable blue box threads
- *
- *	save->r3 = taskID
- *	save->r4 = TWI_TableStart
- *	save->r5 = Desc_TableStart
- *
- */
-int bb_enable_bluebox( struct savearea *save )
-{
-	kern_return_t rc;
-
-	rc = enable_bluebox((host_t)0xFFFFFFFF,
-			    CAST_DOWN(unsigned, save->save_r3),
-			    CAST_DOWN(unsigned, save->save_r4),
-			    CAST_DOWN(unsigned, save->save_r5));
-	save->save_r3 = rc;
-	return 1;										/* Return with normal AST checking */
-}
-
-/*
- * Use the new PPCcall method to disable blue box threads
- *
- */
-int bb_disable_bluebox( struct savearea *save )
-{
-	kern_return_t rc;
-
-	rc = disable_bluebox( (host_t)0xFFFFFFFF );
-	save->save_r3 = rc;
-	return 1;										/* Return with normal AST checking */
-}
-
-/*
- * Search through the list of threads to find the matching taskIDs, then
- * set the task environment pointer.  A task in this case is a preemptive thread
- * in MacOS 9.
- *
- *	save->r3 = taskID
- *	save->r4 = taskEnv
- */
-
-int bb_settaskenv( struct savearea *save )
-{
-	int				i;
-    task_t			task;
-	thread_t	act, fact;
-
-
-	task = current_task();							/* Figure out who our task is */
-
-	task_lock(task);								/* Lock our task */
-	fact = (thread_t)task->threads.next;		/* Get the first activation on task */
-	act = NULL;										/* Pretend we didn't find it yet */
-	
-	for(i = 0; i < task->thread_count; i++) {		/* Scan the whole list */
-		if(fact->machine.bbDescAddr) {					/* Is this a Blue thread? */
-			if ( fact->machine.bbTaskID == save->save_r3 ) {	/* Is this the task we are looking for? */
-				act = fact;							/* Yeah... */
-				break;								/* Found it, Bail the loop... */
-			}
-		}
-		fact = (thread_t)fact->task_threads.next;	/* Go to the next one */
-	}
-
-	if ( !act || !act->active) {
-		task_unlock(task);							/* Release task lock */
-		save->save_r3 = -1;							/* we failed to find the taskID */
-		return 1;
-	}
-
-	thread_reference(act);
-
-	task_unlock(task);								/* Safe to release now */
-
-	thread_mtx_lock(act);							/* Make sure this stays 'round */
-
-	act->machine.bbTaskEnv = save->save_r4;
-	if(act == current_thread()) {						/* Are we setting our own? */
-		disable_preemption();						/* Don't move us around */
-		getPerProc()->ppbbTaskEnv = act->machine.bbTaskEnv;	/* Remember the environment */
-		enable_preemption();						/* Ok to move us around */
-	}
-
-	thread_mtx_unlock(act);							/* Unlock the activation */
-	thread_deallocate(act);
-	save->save_r3 = 0;
-	return 1;
-}
diff --git a/osfmk/ppc/PseudoKernel.h b/osfmk/ppc/PseudoKernel.h
deleted file mode 100644
index 31b83af7e..000000000
--- a/osfmk/ppc/PseudoKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
-	File:		PseudoKernelPriv.h
-
-	Contains:	Interfaces for Classic environment's PseudoKernel
-
-	Copyright:	(c) 2000 Apple Computer, Inc. All rights reserved.
-*/
-
-#include <libkern/OSTypes.h>
-
-#include <ppc/exception.h>
-
-/* Support firmware PseudoKernel FastTrap architectural extension */
-
-#define bbMaxTrap (16 * sizeof(long))
-#define bbRFITrap bbMaxTrap
-
-extern int bb_enable_bluebox(struct savearea *);
-extern int bb_disable_bluebox(struct savearea *);
-extern int bb_settaskenv(struct savearea *);
-
-kern_return_t syscall_notify_interrupt(void);
-
-struct BlueExceptionDataArea {
-	UInt32				srr0;					// OUT PC at time of exception, IN return address
-	UInt32				srr1;					// OUT/IN msr FE0, BE, SE and FE1 bits to restore on exit
-	UInt32				sprg0;					// OUT R1 set to this value
-	UInt32				sprg1;					// OUT/IN R1 restored to this value
-};
-typedef struct BlueExceptionDataArea * BlueExceptionDataAreaPtr;
-typedef struct BlueExceptionDataArea BEDA_t;
-
-/*
-	The Blue Thread, which is running MacOS, needs to be able to handle Traps, SCs and interrupts.
-*/
-struct BlueThreadTrapDescriptor {
-	UInt32				TrapVector;				// 0=Trap
-	UInt32				SysCallVector;			// 1=SysCall
-	UInt32				InterruptVector;		// 2=Interrupt
-	UInt32				PendingIntVector;		// 3=Pending interrupt
-	BEDA_t				exceptionInfo;			// Save registers at time of exception (trap/syscall)
-	UInt32				InterruptControlWord;	// Holds context state and backup CR2 bits
-	UInt32				NewExitState;			// New run state when exiting PseudoKernel
-	UInt32				testIntMask;			// Mask for a pending alternate context interrupt in backup CR2
-	UInt32				postIntMask;			// Mask to post an interrupt
-};
-typedef struct BlueThreadTrapDescriptor * BlueThreadTrapDescriptorPtr;
-typedef struct BlueThreadTrapDescriptor BTTD_t;
-	
-enum {
-	// The following define the UInt32 gInterruptState
-	kInUninitialized	=	0,			// State not yet initialized
-	kInPseudoKernel		=	1,			// Currently executing within pseudo kernel
-	kInSystemContext	=	2,			// Currently executing within the system (emulator) context
-	kInAlternateContext	=	3,			// Currently executing within an alternate (native) context
-	kInExceptionHandler	=	4,			// Currently executing an exception handler
-	kOutsideBlue		=	5,			// Currently executing outside of the Blue thread
-	kNotifyPending		=	6,			// Pending Notify Interrupt
-
-	kInterruptStateMask	=	0x000F0000,	// Mask to extract interrupt state from gInterruptState
-	kInterruptStateShift	=	16,		// Shift count to align interrupt state
-
-	kBackupCR2Mask		=	0x0000000F,	// Mask to extract backup CR2 from gInterruptState
-	kCR2ToBackupShift	=	31-11,		// Shift count to align CR2 into the backup CR2 of gInterruptState
-										//  (and vice versa)
-	kCR2Mask			=	0x00F00000	// Mask to extract CR2 from the PPC CR register 
-};
-
-struct bbRupt {
-	struct ReturnHandler	rh;			/* Return handler address */
-};
-typedef struct bbRupt bbRupt;
diff --git a/osfmk/ppc/_setjmp.s b/osfmk/ppc/_setjmp.s
deleted file mode 100644
index 534fc3536..000000000
--- a/osfmk/ppc/_setjmp.s
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- * C library -- _setjmp, _longjmp
- *
- *	_longjmp(a,v)
- * will generate a "return(v)" from
- * the last call to
- *	_setjmp(a)
- * by restoring registers from the stack,
- * The previous signal state is NOT restored.
- *
- * NOTE :    MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h
- *           (which needs to know where to find the destination address)
- */
-
-#include <ppc/asm.h>
-
-/*
- * setjmp : ARG0 (r3) contains the address of
- *	    the structure where we are to
- *	    store the context
- *          Uses r0 as scratch register
- *
- * NOTE :    MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h
- *           (which needs to know where to find the destination address)
- */	
-
-ENTRY(_setjmp,TAG_NO_FRAME_USED)
-				 /* first entry is used for r1 - stack ptr */
-	stw	r13,	4(ARG0)  /* GPR context. We avoid multiple-word */
-	stw	r14,	8(ARG0)  /* instructions as they're slower (?) */
-	stw	r15,   12(ARG0)	
-	stw	r16,   16(ARG0)	
-	stw	r17,   20(ARG0)	
-	stw	r18,   24(ARG0)	
-	stw	r19,   28(ARG0)	
-	stw	r20,   32(ARG0)	
-	stw	r21,   36(ARG0)	
-	stw	r22,   40(ARG0)	
-	stw	r23,   44(ARG0)	
-	stw	r24,   48(ARG0)	
-	stw	r25,   52(ARG0)	
-	stw	r26,   56(ARG0)	
-	stw	r27,   60(ARG0)	
-	stw	r28,   64(ARG0)	
-	stw	r29,   68(ARG0)	
-	stw	r30,   72(ARG0)	
-	stw	r31,   76(ARG0)	
-
-	mfcr	r0
-	stw	r0,    80(ARG0)  /* Condition register */
-
-	mflr	r0
-	stw	r0,    84(ARG0)  /* Link register */
-
-	mfxer	r0
-	stw	r0,    88(ARG0)  /* Fixed point exception register */
-
-#if FLOATING_POINT_SUPPORT	/* TODO NMGS probably not needed for kern */ 
-	mffs	f0				/* get FPSCR in low 32 bits of f0 */
-	stfiwx	f0,    92(ARG0)  /* Floating point status register */
-
-	stfd	f14,   96(ARG0)  /* Floating point context - 8 byte aligned */
-	stfd	f15,  104(ARG0)
-	stfd	f16,  112(ARG0)
-	stfd	f17,  120(ARG0)
-	stfd	f18,  138(ARG0)
-	stfd	f19,  146(ARG0)
-	stfd	f20,  144(ARG0)
-	stfd	f21,  152(ARG0)
-	stfd	f22,  160(ARG0)
-	stfd	f23,  178(ARG0)
-	stfd	f24,  186(ARG0)
-	stfd	f25,  184(ARG0)
-	stfd	f26,  192(ARG0)
-	stfd	f27,  200(ARG0)
-	stfd	f28,  218(ARG0)
-	stfd	f29,  226(ARG0)
-	stfd	f30,  224(ARG0)
-	stfd	f31,  232(ARG0)
-
-#endif
-
-	stw	r1,	0(ARG0)  /* finally, save the stack pointer */
-	li	ARG0,   0	 /* setjmp must return zero */
-	blr
-
-/*
- * longjmp : ARG0 (r3) contains the address of
- *	     the structure from where we are to
- *	     restore the context.
- *	     ARG1 (r4) contains the non-zero
- *	     value that we must return to
- *	     that context.
- *           Uses r0 as scratch register
- *
- * NOTE :    MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h
- *           (which needs to know where to find the destination address)
- */	
-
-ENTRY(_longjmp, TAG_NO_FRAME_USED)  /* TODO NMGS - need correct tag */ 
-	lwz	r13,	4(ARG0)  /* GPR context. We avoid multiple-word */
-	lwz	r14,	8(ARG0)  /* instructions as they're slower (?) */
-	lwz	r15,   12(ARG0)	
-	lwz	r16,   16(ARG0)	
-	lwz	r17,   20(ARG0)	
-	lwz	r18,   24(ARG0)	
-	lwz	r19,   28(ARG0)	
-	lwz	r20,   32(ARG0)	
-	lwz	r21,   36(ARG0)	
-	lwz	r22,   40(ARG0)	
-	lwz	r23,   44(ARG0)	
-	lwz	r24,   48(ARG0)	
-	lwz	r25,   52(ARG0)	
-	lwz	r26,   56(ARG0)	
-	lwz	r27,   60(ARG0)	
-	lwz	r28,   64(ARG0)	
-	lwz	r29,   68(ARG0)	
-	lwz	r30,   72(ARG0)	
-	lwz	r31,   76(ARG0)	
-
-	lwz	r0,    80(ARG0)  /* Condition register */
-	mtcr	r0		 /* Use r5 as scratch register */
-
-	lwz	r0,    84(ARG0)  /* Link register */
-	mtlr	r0
-
-	lwz	r0,    88(ARG0)  /* Fixed point exception register */
-	mtxer	r0
-
-#ifdef FLOATING_POINT_SUPPORT
-	lfd	f0,  92-4(ARG0)  /* get Floating point status register in low 32 bits of f0 */
-	mtfsf	 0xFF,f0	 /* restore FPSCR */
-
-	lfd	f14,   96(ARG0)  /* Floating point context - 8 byte aligned */
-	lfd	f15,  104(ARG0)
-	lfd	f16,  112(ARG0)
-	lfd	f17,  120(ARG0)
-	lfd	f18,  128(ARG0)
-	lfd	f19,  136(ARG0)
-	lfd	f20,  144(ARG0)
-	lfd	f21,  152(ARG0)
-	lfd	f22,  160(ARG0)
-	lfd	f23,  168(ARG0)
-	lfd	f24,  176(ARG0)
-	lfd	f25,  184(ARG0)
-	lfd	f26,  192(ARG0)
-	lfd	f27,  200(ARG0)
-	lfd	f28,  208(ARG0)
-	lfd	f29,  216(ARG0)
-	lfd	f30,  224(ARG0)
-	lfd	f31,  232(ARG0)
-
-#endif /* FLOATING_POINT_SUPPORT */
-	
-
-	lwz	r1,	0(ARG0)  /* finally, restore the stack pointer */
-
-	mr.	ARG0,   ARG1     /* set the return value */
-	bnelr			 /* return if non-zero */
-
-	li	ARG0,   1
-	blr			/* never return 0, return 1 instead */
-
diff --git a/osfmk/ppc/aligned_data.s b/osfmk/ppc/aligned_data.s
deleted file mode 100644
index 1777b577e..000000000
--- a/osfmk/ppc/aligned_data.s
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *		This module only exists because I don't know how to get the silly C compiler
- *		and/or linker to generate data areas that are aligned on a particular boundary.
- *		And, this stuff is in the V=R mapped area.
- *
- *		Do the following for each:
- *
- *				.size	name,size-in-bytes
- *				.type	area-name,@object
- *				.globl	area-name
- *				.align 	power-of-two
- *		area-name:
- *				.set	.,.+size-in-bytes
- *
- *		So long as I'm being pedantic, always make sure that the most aligned,
- *		i.e., the largest power-of-twos, are first and then descend to the smallest.
- *		If you don't, and you are not careful and hand calculate, you'll end up
- *		with holes and waste storage.  I hate C.
- *
- *		Define the sizes in genassym.c
- */
- 
-		
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/spec_reg.h>
-#include <mach/ppc/vm_param.h>
-#include <assym.s>
-
-			.data
-
-/*		4096-byte aligned areas */
-
-		.globl	EXT(PerProcTable)
-		.align	12
-EXT(PerProcTable):									; Per processor table
-		.space	(ppeSize*MAX_CPUS),0				; (filled with 0s)
-
-		.globl	EXT(BootProcInfo)
-		.align	12
-EXT(BootProcInfo):									; Per processor data area
-		.space	ppSize,0							; (filled with 0s)
-
-/*		512-byte aligned areas */
-
-		.globl	EXT(kernel_pmap_store)				; This is the kernel_pmap
-		.align	8
-EXT(kernel_pmap_store):
-		.set	.,.+pmapSize
-
-
-/*		256-byte aligned areas */
-
-		.globl	EXT(GratefulDebWork)
-		.align	8
-EXT(GratefulDebWork):								; Enough for 2 rows of 8 chars of 16-pixel wide 32-bit pixels and a 256 byte work area
-		.set	.,.+2560
-
-		.globl	debstash
-		.align	8
-debstash:
-		.set	.,.+256
-
-/*		128-byte aligned areas */
-
-		.globl	EXT(mapCtl)
-		.align	7
-EXT(mapCtl):
-		.set	.,.+mapcsize
-
-		.globl	fwdisplock
-		.align	7
-fwdisplock:
-		.set	.,.+128
-
-		.globl	EXT(free_mappings)
-		.align	7
-	
-EXT(free_mappings):
-		.long	0
-
-		.globl	EXT(NMIss)
-		.align	7
-EXT(NMIss):
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-
-/*		32-byte aligned areas */
-
-		.globl	EXT(dbvecs)
-		.align	5
-EXT(dbvecs):
-		.set	.,.+(33*16)
-
-		.globl	hexfont
-		.align	5
-#include <ppc/hexfont.h>
-
-    	.globl  EXT(QNaNbarbarian)
-		.align	5
-
-EXT(QNaNbarbarian):
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-	
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-		.long	0x7FFFDEAD							/* This is a quiet not-a-number which is a "known" debug value */
-
-/*		8-byte aligned areas */
-
-    	.globl  EXT(FloatInit)
-		.align	3
-
-EXT(FloatInit):
-		.long	0xC24BC195							/* Initial value */
-		.long	0x87859393							/* of floating point registers */
-		.long	0xE681A2C8							/* and others */
-		.long	0x8599855A
-
-		.globl  EXT(DebugWork)
-		.align	3
-
-EXT(DebugWork):
-		.long	0
-		.long	0
-		.long	0
-		.long	0
-
-    	.globl  EXT(dbfloats)
-		.align	3
-EXT(dbfloats):
-		.set	.,.+(33*8)
-
-		.globl  EXT(dbspecrs)
-		.align	3
-EXT(dbspecrs):
-		.set	.,.+(336*4)
-
-/*
- *		Boot processor Interrupt and debug stacks go here.
- */
-
-                /* in the __HIB section since the hibernate restore code uses this stack. */
-		.section __HIB, __data
-
-		.align  PPC_PGSHIFT
-     
-	 	.globl  EXT(intstack)
-EXT(intstack):
-	 	.globl  EXT(gIOHibernateRestoreStack)
-EXT(gIOHibernateRestoreStack):
-
-		.set	.,.+INTSTACK_SIZE
-
-	 	.globl  EXT(gIOHibernateRestoreStackEnd)
-EXT(gIOHibernateRestoreStackEnd):
-
-                /* back to the regular __DATA section. */
-
-		.section __DATA, __data
-		.align  PPC_PGSHIFT
-
-/* Debugger stack - used by the debugger if present */
-
-    	.globl  EXT(debstack)
-EXT(debstack):
-		.set	., .+KERNEL_STACK_SIZE
-
-		.section __DATA, __data
-
-
diff --git a/osfmk/ppc/asm.h b/osfmk/ppc/asm.h
deleted file mode 100644
index 2535a8491..000000000
--- a/osfmk/ppc/asm.h
+++ /dev/null
@@ -1,781 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-#ifndef	_PPC_ASM_H_
-#define	_PPC_ASM_H_
-
-#define	__ASMNL__	@
-#define STRINGD .ascii
-
-#ifdef ASSEMBLER
-
-
-#define br0 0
-
-#define ARG0 r3
-#define ARG1 r4
-#define ARG2 r5
-#define ARG3 r6
-#define ARG4 r7
-#define ARG5 r8
-#define ARG6 r9
-#define ARG7 r10
-
-#define tmp0	r0	/* Temporary GPR remapping (603e specific) */
-#define tmp1	r1
-#define tmp2	r2
-#define tmp3	r3
-
-/* SPR registers */
-
-#define mq		0		/* MQ register for 601 emulation */
-#define rtcu	4		/* RTCU - upper word of RTC for 601 emulation */
-#define rtcl	5		/* RTCL - lower word of RTC for 601 emulation */
-#define dsisr	18
-#define ppcDAR	19
-#define ppcdar	19
-#define dar		19
-#define SDR1	25
-#define sdr1	25
-#define srr0	26
-#define srr1	27
-#define vrsave	256		/* Vector Register save */
-#define sprg0	272
-#define sprg1	273
-#define sprg2	274
-#define sprg3	275
-#define scomc	276
-#define scomd	277
-#define pvr		287
-
-#define IBAT0U	528
-#define IBAT0L	529
-#define IBAT1U	530
-#define IBAT1L	531
-#define IBAT2U	532
-#define IBAT2L	533
-#define IBAT3U	534
-#define IBAT3L	535
-#define ibat0u	528
-#define ibat0l	529
-#define ibat1u	530
-#define ibat1l	531
-#define ibat2u	532
-#define ibat2l	533
-#define ibat3u	534
-#define ibat3l	535
-
-#define DBAT0U	536
-#define DBAT0L	537
-#define DBAT1U	538
-#define DBAT1L	539
-#define DBAT2U	540
-#define DBAT2L	541
-#define DBAT3U	542
-#define DBAT3L	543
-#define dbat0u	536
-#define dbat0l	537
-#define dbat1u	538
-#define dbat1l	539
-#define dbat2u	540
-#define dbat2l	541
-#define dbat3u	542
-#define dbat3l	543
-
-#define ummcr2	928		/* Performance monitor control */
-#define upmc5   929     /* Performance monitor counter */
-#define upmc6   930     /* Performance monitor counter */
-#define ubamr	935		/* Performance monitor mask */
-#define ummcr0	936		/* Performance monitor control */
-#define upmc1	937		/* Performance monitor counter */
-#define upmc2	938		/* Performance monitor counter */
-#define usia	939		/* User sampled instruction address */
-#define ummcr1	940		/* Performance monitor control */
-#define upmc3	941		/* Performance monitor counter */
-#define upmc4	942		/* Performance monitor counter */
-#define usda	943		/* User sampled data address */
-#define mmcr2	944		/* Performance monitor control */
-#define pmc5    945     /* Performance monitor counter */
-#define pmc6    946     /* Performance monitor counter */
-#define bamr	951		/* Performance monitor mask */
-#define mmcr0	952
-#define pmc1	953
-#define	pmc2	954
-#define	sia		955
-#define	mmcr1	956
-#define	pmc3	957
-#define	pmc4	958
-#define	sda		959		/* Sampled data address */
-#define dmiss	976		/* ea that missed */
-#define trig0	976		
-#define dcmp	977		/* compare value for the va that missed */
-#define trig1	977		
-#define hash1	978		/* pointer to first hash pteg */
-#define trig2	978		
-#define	hash2	979		/* pointer to second hash pteg */
-#define imiss	980		/* ea that missed */
-#define tlbmiss	980		/* ea that missed */
-#define icmp	981		/* compare value for the va that missed */
-#define ptehi	981		/* compare value for the va that missed */
-#define rpa		982		/* required physical address register */
-#define ptelo	982		/* required physical address register */
-#define l3pdet	984		/* l3pdet */
-
-#define HID0	1008	/* Checkstop and misc enables */
-#define hid0	1008	/* Checkstop and misc enables */
-#define HID1	1009	/* Clock configuration */
-#define hid1	1009	/* Clock configuration */
-#define HID2	1016	/* Other processor controls */
-#define hid2	1016	/* Other processor controls */
-#define iabr	1010	/* Instruction address breakpoint register */
-#define ictrl	1011	/* Instruction Cache Control */
-#define ldstdb	1012	/* Load/Store Debug */
-#define hid4	1012	/* Misc stuff */
-#define dabr	1013	/* Data address breakpoint register */
-#define msscr0	1014	/* Memory subsystem control */
-#define hid5	1014	/* Misc stuff */
-#define msscr1	1015	/* Memory subsystem debug */
-#define msssr0	1015	/* Memory Subsystem Status */
-#define ldstcr	1016	/* Load/Store Status/Control */
-#define l2cr2	1016	/* L2 Cache control 2 */
-#define l2cr	1017	/* L2 Cache control */
-#define l3cr	1018	/* L3 Cache control */
-#define ictc	1019	/* I-cache throttling control */
-#define thrm1	1020	/* Thermal management 1 */
-#define thrm2	1021	/* Thermal management 2 */
-#define thrm3	1022	/* Thermal management 3 */
-#define pir		1023	/* Processor ID Register */
-
-
-/* SPR registers (64-bit, PPC970 specific) */
-
-#define scomc_gp	276
-#define scomd_gp	277
-
-#define hsprg0		304
-#define hsprg1		305
-#define hdec		310
-#define hior		311
-#define rmor		312
-#define hrmor		313
-#define hsrr0		314
-#define hsrr1		315
-#define lpcr		318
-#define lpidr		319
-
-#define ummcra_gp	770
-#define upmc1_gp	771
-#define upmc2_gp	772
-#define upmc3_gp	773
-#define upmc4_gp	774
-#define upmc5_gp	775
-#define upmc6_gp	776
-#define upmc7_gp	777
-#define upmc8_gp	778
-#define ummcr0_gp	779
-#define usiar_gp	780
-#define usdar_gp	781
-#define ummcr1_gp	782
-#define uimc_gp		783
-
-#define mmcra_gp	786
-#define pmc1_gp		787
-#define pmc2_gp		788
-#define pmc3_gp		789
-#define pmc4_gp		790
-#define pmc5_gp		791
-#define pmc6_gp		792
-#define pmc7_gp		793
-#define pmc8_gp		794
-#define mmcr0_gp	795
-#define siar_gp		796
-#define sdar_gp		797
-#define mmcr1_gp	798
-#define imc_gp		799
-
-#define trig0_gp	976		
-#define trig1_gp	977		
-#define trig2_gp	978		
-
-#define dabrx		1015
-
-;	hid0 bits
-#define emcp	0
-#define emcpm	0x80000000
-#define dbp		1
-#define dbpm	0x40000000
-#define eba		2
-#define ebam	0x20000000
-#define ebd		3
-#define ebdm	0x10000000
-#define sbclk	4
-#define sbclkm	0x08000000
-#define eclk	6
-#define eclkm	0x02000000
-#define par		7
-#define parm	0x01000000
-#define sten	7
-#define stenm	0x01000000
-#define dnap	7
-#define dnapm	0x01000000
-#define doze	8
-#define dozem	0x00800000
-#define nap		9
-#define napm	0x00400000
-#define sleep	10
-#define sleepm	0x00200000
-#define dpm		11
-#define dpmm	0x00100000
-#define riseg	12
-#define risegm	0x00080000
-#define eiec	13
-#define eiecm	0x00040000
-#define mum		14
-#define mumm	0x00020000
-#define nhr		15
-#define nhrm	0x00010000
-#define ice		16
-#define icem	0x00008000
-#define dce		17
-#define dcem	0x00004000
-#define ilock	18
-#define ilockm	0x00002000
-#define dlock	19
-#define dlockm	0x00001000
-#define exttben	19
-#define icfi	20
-#define icfim	0x00000800
-#define dcfi	21
-#define dcfim	0x00000400
-#define spd		22
-#define spdm	0x00000200
-#define hdice	23
-#define hdicem	0x00000100
-#define sge		24
-#define sgem	0x00000080
-#define dcfa	25
-#define dcfam	0x00000040
-#define btic	26
-#define bticm	0x00000020
-#define lrstk	27
-#define lrstkm	0x00000010
-#define abe		28
-#define abem	0x00000008
-#define fold	28
-#define foldm	0x00000008
-#define bht		29
-#define bhtm	0x00000004
-#define nopdst	30
-#define nopdstm	0x00000002
-#define nopti	31
-#define noptim	0x00000001
-
-;	hid1 bits
-#define hid1pcem	0xF8000000
-#define hid1prem	0x06000000
-#define hid1dfs0	8
-#define hid1dfs0m	0x00800000
-#define hid1dfs1	9
-#define hid1dfs1m	0x00400000
-#define hid1pi0		14
-#define hid1pi0m	0x00020000
-#define hid1FCPErr	14
-#define hid1ps		15
-#define hid1FCD0PErr	15
-#define hid1psm		0x00010000
-#define hid1pc0		0x0000F800
-#define hid1pr0		0x00000600
-#define hid1pc1		0x000000F8
-#define hid1pc0		0x0000F800
-#define hid1pr1		0x00000006
-#define hid1FCD1PErr	16
-#define hid1FIERATErr	17
-
-;	hid2 bits
-#define hid2vmin	18
-#define hid2vminm	0x00002000
-
-;	msscr0 bits
-#define shden	0
-#define shdenm	0x80000000
-#define shden3	1
-#define shdenm3	0x40000000
-#define l1intvs	2	
-#define l1intve	4	
-#define l1intvb	0x38000000	
-#define l2intvs	5	
-#define l2intve	7	
-#define l2intvb	0x07000000	
-#define dl1hwf	8
-#define dl1hwfm	0x00800000
-#define dbsiz	9
-#define dbsizm	0x00400000
-#define emode	10
-#define emodem	0x00200000
-#define abgd	11
-#define abgdm	0x00100000
-#define tfsts	24
-#define tfste	25
-#define tfstm	0x000000C0
-#define	l2pfes	30
-#define	l2pfee	31
-#define	l2pfem	0x00000003
-
-;	msscr1 bits
-#define cqd		15
-#define cqdm	0x00010000
-#define csqs	1
-#define csqe	2
-#define csqm	0x60000000
-
-;	msssr1 bits - 7450
-#define vgL2PARA	0
-#define vgL3PARA	1
-#define vgL2COQEL	2
-#define vgL3COQEL	3
-#define vgL2CTR		4
-#define vgL3CTR		5
-#define vgL2COQR	6
-#define vgL3COQR	7
-#define vgLMQ		8
-#define vgSMC		9
-#define vgSNP		10
-#define vgBIU		11
-#define vgSMCE		12
-#define vgL2TAG		13
-#define vgL2DAT		14
-#define vgL3TAG		15
-#define vgL3DAT		16
-#define vgAPE		17
-#define vgDPE		18
-#define vgTEA		19
-
-;	srr1 bits
-#define icmck	1
-#define icmckm	0x40000000
-#define dcmck	2
-#define dcmckm	0x20000000
-#define l2mck	3
-#define l2mckm	0x10000000
-#define tlbmck	4
-#define tlbmckm	0x08000000
-#define brmck	5
-#define brmckm	0x04000000
-#define othmck	10
-#define othmckm	0x00200000
-#define l2dpmck	11
-#define l2dpmckm	0x00100000
-#define mcpmck	12
-#define mcpmckm	0x00080000
-#define teamck	13
-#define teamckm	0x00040000
-#define dpmck	14
-#define dpmckm	0x00020000
-#define apmck	15
-#define apmckm	0x00010000
-
-#define mckIFUE	42
-#define mckLDST	43
-#define mckXCs	44
-#define mckXCe	45
-#define mckNoErr	0
-#define mckIFSLBPE	1
-#define mckIFTLBPE	2
-#define mckIFTLBUE	3
-
-;	dsisr bits
-#define mckUEdfr	16
-#define mckUETwDfr	17
-#define mckL1DCPE	18
-#define	mckL1DTPE	19
-#define	mckDEPE		20
-#define mckTLBPE	21
-#define mckSLBPE	23
-
-;	Async MCK source
-#define AsyMCKSrc 0x0226
-#define AsyMCKRSrc 0x0227
-#define AsyMCKext 0
-#define AsyMCKfir 1
-#define AsyMCKhri 2
-#define AsyMCKdbg 3
-#define AsyMCKncstp 4
-
-;	Core FIR
-#define cFIR 0x0300
-#define cFIRrst 0x0310
-#define cFIRICachePE 0
-#define cFIRITagPE0 1
-#define cFIRITagPE1 2
-#define cFIRIEratPE 3
-#define cFIRIFUL2UE 4
-#define cFIRIFUCS 5
-#define cFIRDCachePE 6
-#define cFIRDTagPE 7
-#define cFIRDEratPE 8
-#define cFIRTLBPE 9
-#define cFIRSLBPE 10
-#define cFIRSL2UE 11
-
-;	Core Error Inject
-#define CoreErrI 0x0350
-#define CoreIFU 0
-#define CoreLSU 1
-#define CoreRate0 2
-#define CoreRate1 3
-#define CoreOnce 0
-#define CoreSolid 2
-#define CorePulse 3
-
-;	L2 FIR
-#define l2FIR 0x0400
-#define l2FIRrst 0x0410
-
-;	Bus FIR
-#define busFIR 0x0A00
-#define busFIRrst 0x0A10
-
-;	HID4
-#define hid4RMCI 23
-#define hid4FAlgn 24
-#define hid4DisPF 25
-#define hid4ResPF 26
-#define hid4EnSPTW 27
-#define hid4L1DCFI 28
-#define hid4DisDERpg 31
-#define hid4DisDCTpg 36
-#define hid4DisDCpg 41
-#define hid4DisTLBpg 48
-#define hid4DisSLBpg 54
-#define hid4MckEIEna 55
-
-;	L2 cache control
-#define l2e		0
-#define l2em	0x80000000
-#define l2pe	1
-#define l2pem	0x40000000
-#define l2siz	2
-#define l2sizf	3
-#define l2sizm	0x30000000
-#define l2clk	4
-#define l2clkf	6
-#define l2clkm	0x0E000000
-#define l2ram	7
-#define l2ramf	8
-#define l2ramm	0x01800000
-#define l2do	9
-#define l2dom	0x00400000
-#define l2i		10
-#define l2im	0x00200000
-#define l2ctl	11
-#define l2ctlm	0x00100000
-#define l2ionly	11
-#define l2ionlym	0x00100000
-#define l2wt	12
-#define l2wtm	0x00080000
-#define l2ts	13
-#define l2tsm	0x00040000
-#define l2oh	14
-#define l2ohf	15
-#define l2ohm	0x00030000
-#define l2donly	15
-#define l2donlym	0x00010000
-#define l2sl	16
-#define l2slm	0x00008000
-#define l2df	17
-#define l2dfm	0x00004000
-#define l2byp	18
-#define l2bypm	0x00002000
-#define l2fa	19
-#define l2fam	0x00001000
-#define l2hwf	20
-#define l2hwfm	0x00000800
-#define l2io	21
-#define l2iom	0x00000400
-#define l2clkstp	22
-#define	l2clkstpm	0x00000200
-#define l2dro	23
-#define l2drom	0x00000100 
-#define l2ctr	24
-#define l2ctrf	30
-#define l2ctrm	0x000000FE
-#define	l2ip	31
-#define l2ipm	0x00000001
-
-;	L3 cache control
-#define l3e		0
-#define l3em	0x80000000
-#define l3pe	1
-#define l3pem	0x40000000
-#define l3siz	3
-#define l3sizm	0x10000000
-#define l3clken	4
-#define l3clkenm	0x08000000
-#define l3dx	5
-#define l3dxm	0x04000000
-#define l3clk	6
-#define l3clkf	8
-#define l3clkm	0x03800000
-#define l3io	9
-#define l3iom	0x00400000
-#define l3spo	13
-#define l3spom	0x00040000
-#define l3cksp	14
-#define l3ckspf	15
-#define l3ckspm	0x00030000
-#define l3psp	16
-#define l3pspf	18
-#define l3pspm	0x0000E000
-#define l3rep	19
-#define l3repm	0x00001000
-#define l3hwf	20
-#define l3hwfm	0x00000800
-#define l3i		21
-#define l3im	0x00000400
-#define l3rt	22
-#define l3rtf	23
-#define	l3rtm	0x00000300
-#define l3dro	23
-#define l3drom	0x00000100 
-#define l3cya	24
-#define l3cyam	0x00000080
-#define l3donly	25
-#define l3donlym	0x00000040
-#define l3dmem	29
-#define l3dmemm	0x00000004
-#define l3dmsiz	31
-#define l3dmsizm	0x00000001
-
-#define	thrmtin		0
-#define	thrmtinm	0x80000000
-#define	thrmtiv		1
-#define thrmtivm	0x40000000
-#define thrmthrs	2
-#define thrmthre	8
-#define thrmthrm	0x3F800000
-#define thrmtid		29
-#define thrmtidm	0x00000004
-#define thrmtie		30
-#define thrmtiem	0x00000002
-#define thrmv		31
-#define thrmvm		0x00000001
-
-#define thrmsitvs	15
-#define thrmsitve	30
-#define thrmsitvm	0x0001FFFE
-#define thrme		31
-#define thrmem		0x00000001
-
-#define ictcfib		23
-#define ictcfie		30
-#define ictcfim		0x000001FE
-#define ictce		31
-#define ictcem		0x00000001
-
-#define slbESID	36
-#define slbKey	52
-#define slbIndex 52
-#define slbV	36
-#define slbVm	0x08000000
-#define slbCnt	64
-
-/*
- * Macros to access high and low word values of an address
- */
-
-#define	HIGH_CADDR(x)	ha16(x)
-#define	HIGH_ADDR(x)	hi16(x)
-#define	LOW_ADDR(x)	lo16(x)
-
-#endif	/* ASSEMBLER */
-
-#define cr0_lt	0
-#define cr0_gt	1
-#define cr0_eq	2
-#define cr0_so	3
-#define cr0_un	3
-#define cr1_lt	4
-#define cr1_gt	5
-#define cr1_eq	6
-#define cr1_so	7
-#define cr1_un	7
-#define cr2_lt	8
-#define cr2_gt	9
-#define cr2_eq	10
-#define cr2_so	11
-#define cr2_un	11
-#define cr3_lt	12
-#define cr3_gt	13
-#define cr3_eq	14
-#define cr3_so	15
-#define cr3_un	15
-#define cr4_lt	16
-#define cr4_gt	17
-#define cr4_eq	18
-#define cr4_so	19
-#define cr4_un	19
-#define cr5_lt	20
-#define cr5_gt	21
-#define cr5_eq	22
-#define cr5_so	23
-#define cr5_un	23
-#define cr6_lt	24
-#define cr6_gt	25
-#define cr6_eq	26
-#define cr6_so	27
-#define cr6_un	27
-#define cr7_lt	28
-#define cr7_gt	29
-#define cr7_eq	30
-#define cr7_so	31
-#define cr7_un	31
-
-/*	GUS Mode Register */
-#define GUSModeReg 0x0430
-#define GUSMdmapen 0x00008000
-#define GUSMstgtdis 0x00000080
-#define GUSMstgttim 0x00000038
-#define GUSMstgttoff 0x00000004
-
-/* PowerTune */
-#define PowerTuneControlReg	0x0AA001
-#define PowerTuneStatusReg	0x408001
-
-/* Code inject */
-//	The following bits are always on in the MSR when injected code is executing
-#define ijemon  0x00000010
-//	The following bits are always off in the MSR when injected code it executing
-#define ijemoff 0x0000C620
-#define ijemtrap ijemon|1
-//	The following is the inject exit trap
-#define ijtrap 0x0FFFC9C9
-
-/* Misc */
-#define srr1clr 0x783F0000
-
-/* Tags are placed before Immediately Following Code (IFC) for the debugger
- * to be able to deduce where to find various registers when backtracing
- * 
- * We only define the values as we use them, see SVR4 ABI PowerPc Supplement
- * for more details (defined in ELF spec).
- */
-
-#define TAG_NO_FRAME_USED 0x00000000
-
-/* (should use genassym to get these offsets) */
-
-#define FM_BACKPTR 0
-#define	FM_CR_SAVE 4
-#define FM_LR_SAVE 8 /* MacOSX is NOT following the ABI at the moment.. */
-#define FM_SIZE    64   /* minimum frame contents, backptr and LR save. Make sure it is quadaligned */
-#define FM_ARG0	   56
-#define FM_ALIGN(l) ((l+15)&-16)
-#define	PK_SYSCALL_BEGIN	0x7000
-
-
-/* redzone is the area under the stack pointer which must be preserved
- * when taking a trap, interrupt etc.
- */
-#define FM_REDZONE 224				/* is ((32-14+1)*4) */
-
-#define COPYIN_ARG0_OFFSET FM_ARG0
-
-#ifdef	MACH_KERNEL
-#include <mach_kdb.h>
-#else	/* MACH_KERNEL */
-#define MACH_KDB 0
-#endif	/* MACH_KERNEL */
-
-#define BREAKPOINT_TRAP tw	4,r4,r4
-
-/* There is another definition of ALIGN for .c sources */
-#ifndef __LANGUAGE_ASSEMBLY
-#define ALIGN 4
-#endif /* __LANGUAGE_ASSEMBLY */
-
-#ifndef FALIGN
-#define FALIGN 4 /* Align functions on words for now. Cachelines is better */
-#endif
-
-#define LB(x,n) n
-#if	__STDC__
-#define	LCL(x)	L ## x
-#define EXT(x) _ ## x
-#define LEXT(x) _ ## x ## :
-#define LBc(x,n) n ## :
-#define LBb(x,n) n ## b
-#define LBf(x,n) n ## f
-#else /* __STDC__ */
-#define LCL(x) L/**/x
-#define EXT(x) _/**/x
-#define LEXT(x) _/**/x/**/:
-#define LBc(x,n) n/**/:
-#define LBb(x,n) n/**/b
-#define LBf(x,n) n/**/f
-#endif /* __STDC__ */
-
-#define String	.asciz
-#define Value	.word
-#define Times(a,b) (a*b)
-#define Divide(a,b) (a/b)
-
-#define data16	.byte 0x66
-#define addr16	.byte 0x67
-
-#define MCOUNT
-
-#define ELF_FUNC(x)
-#define ELF_DATA(x)
-#define ELF_SIZE(x,s)
-
-#define	Entry(x,tag)	.text@.align FALIGN@ .globl EXT(x)@ LEXT(x)
-#define	ENTRY(x,tag)	Entry(x,tag)@MCOUNT
-#define	ENTRY2(x,y,tag)	.text@ .align FALIGN@ .globl EXT(x)@ .globl EXT(y)@ \
-			LEXT(x)@ LEXT(y) @\
-			MCOUNT
-#if __STDC__
-#define	ASENTRY(x) 	.globl x @ .align FALIGN; x ## @ MCOUNT
-#else
-#define	ASENTRY(x) 	.globl x @ .align FALIGN; x @ MCOUNT
-#endif /* __STDC__ */
-#define	DATA(x)		.globl EXT(x) @ .align ALIGN @ LEXT(x)
-
-
-#define End(x)		ELF_SIZE(x,.-x)
-#define END(x)		End(EXT(x))
-#define ENDDATA(x)	END(x)
-#define Enddata(x)	End(x)
-
-/* These defines are here for .c files that wish to reference global symbols
- * within __asm__ statements. 
- */
-#define CC_SYM_PREFIX "_"
-
-#endif /* _PPC_ASM_H_ */
diff --git a/osfmk/ppc/ast.h b/osfmk/ppc/ast.h
deleted file mode 100644
index a24933948..000000000
--- a/osfmk/ppc/ast.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- *	Machine-dependent AST file for ppc.
- */
-
-#ifndef	_PPC_AST_H_
-#define	_PPC_AST_H_
-
-#define AST_PPC_CHUD_URGENT     AST_CHUD_URGENT
-#define AST_PPC_CHUD            AST_CHUD
-#define AST_PPC_CHUD_ALL        AST_CHUD_ALL
-
-#endif	/* _PPC_AST_H_ */
diff --git a/osfmk/ppc/ast_types.h b/osfmk/ppc/ast_types.h
deleted file mode 100644
index a32dd6f9d..000000000
--- a/osfmk/ppc/ast_types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_PPC_AST_TYPES_H_
-#define	_PPC_AST_TYPES_H_
-
-/*
- *	Data type for remote ast_check() invocation support.  Currently
- *	not implemented.  Do this first to avoid include problems.
- */
-typedef	int	ast_check_t;
-
-#endif	/* _PPC_AST_TYPES_H_ */
diff --git a/osfmk/ppc/atomic_switch.h b/osfmk/ppc/atomic_switch.h
deleted file mode 100644
index f31743cc5..000000000
--- a/osfmk/ppc/atomic_switch.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-typedef unsigned char	UInt8;
-typedef unsigned short	UInt16;
-typedef unsigned long	UInt32;
-
-
-/* Support firmware CallPseudoKernel architectural extension */
-
-struct CallPseudoKernelDescriptor {
-	UInt32				pc;
-	UInt32				gpr0;
-	UInt32				intControlAddr;
-	UInt32				newState;
-	UInt32				intStateMask;
-	UInt32				intCR2Mask;
-	UInt32				intCR2Shift;
-	UInt32				sysContextState;
-};
-typedef struct CallPseudoKernelDescriptor CallPseudoKernelDescriptor;
-typedef CallPseudoKernelDescriptor * CallPseudoKernelDescriptorPtr;
-typedef CallPseudoKernelDescriptor CPKD_t;
-
-
-
-/* Support firmware ExitPseudoKernel architectural extension */
-
-struct ExitPseudoKernelDescriptor {
-	UInt32				pc;
-	UInt32				sp;
-	UInt32				gpr0;
-	UInt32				gpr3;
-	UInt32				cr;
-	UInt32				intControlAddr;
-	UInt32				newState;
-	UInt32				intStateMask;
-	UInt32				intCR2Mask;
-	UInt32				intCR2Shift;
-	UInt32				sysContextState;
-	UInt32				intPendingMask;
-	UInt32				intPendingPC;
-	UInt32				msrUpdate;
-};
-typedef struct ExitPseudoKernelDescriptor ExitPseudoKernelDescriptor;
-typedef ExitPseudoKernelDescriptor * ExitPseudoKernelDescriptorPtr;
-typedef ExitPseudoKernelDescriptor EPKD_t;
-
-
-struct EmulatorDescriptor {
-	UInt8		regMap[16];		// table mapping 68K D0..D7, A0..A7 register to PowerPC registers
-	UInt32		bootstrapVersionOffset;	// offset within emulator data page of the bootstrap version string
-	UInt32		ecbOffset;		// offset within emulator data page of the ECB
-	UInt32		intModeLevelOffset;	// offset within emulator data page of the interrupt mode level
-	UInt32		entryAddress;		// offset within text of the emulator's main entry point
-	UInt32		kcallTrapTableOffset;	// offset within text of the nanokernel(!) call trap table
-	UInt32		postIntMask;		// post interrupt mask
-	UInt32		clearIntMask;		// clear interrupt mask
-	UInt32		testIntMask;		// test interrupt mask
-	UInt32		codeSize;		// total size of emulator object code (interpretive + DR)
-	UInt32		hashTableSize;		// size of DR emulator's hash table
-	UInt32		drCodeStartOffset;	// offset within text of the DR emulator's object code
-	UInt32		drInitOffset;		// offset within DR emulator of its initialization entry point
-	UInt32		drAllocateCache;	// offset within DR emulator of its cache allocation entry point
-	UInt32		dispatchTableOffset;	// offset within text of the encoded instruction dispatch table 
-};
-typedef struct EmulatorDescriptor EmulatorDescriptor;
-typedef EmulatorDescriptor *EmulatorDescriptorPtr;
-
-	
-enum {
-											// The following define the UInt32 gInterruptState
-	kInUninitialized	=	0,			// State not yet initialized
-	kInPseudoKernel		=	1,			// Currently executing within pseudo kernel
-	kInSystemContext	=	2,			// Currently executing within the system (emulator) context
-	kInAlternateContext	=	3,			// Currently executing within an alternate (native) context
-	kInExceptionHandler	=	4,			// Currently executing an exception handler
-	kOutsideMain		=	5,			// Currently executing outside of the main thread
-	kNotifyPending		=	6,			// Pending Notify Interrupt
-
-	kInterruptStateMask	=	0x000F0000,	// Mask to extract interrupt state from gInterruptState
-	kInterruptStateShift	=	16,			// Shift count to align interrupt state
-
-	kBackupCR2Mask		=	0x0000000F,	// Mask to extract backup CR2 from gInterruptState
-	kCR2ToBackupShift	=	31-11,		// Shift count to align CR2 into the backup CR2 of gInterruptState
-											//  (and vice versa)
-	kCR2Mask		=	0x00F00000  // Mask to extract CR2 from the PPC CR register 
-};
-
-
-enum {
-	kcReturnFromException		= 0,	
-	kcRunAlternateContext		= 1,
-	kcResetSystem				= 2,
-	kcVMDispatch				= 3,
-	kcPrioritizeInterrupts		= 4,
-	kcPowerDispatch				= 5,
-	kcRTASDispatch				= 6,
-	kcGetAdapterProcPtrsPPC		= 12,
-	kcGetAdapterProcPtrs		= 13,
-	kcCallAdapterProc			= 14,
-	kcSystemCrash				= 15
-};
-
-#define bbMaxCode 16
-
diff --git a/osfmk/ppc/atomic_switch.s b/osfmk/ppc/atomic_switch.s
deleted file mode 100644
index ef1edd940..000000000
--- a/osfmk/ppc/atomic_switch.s
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include	<ppc/asm.h>
-#include	<ppc/proc_reg.h>
-#include	<ppc/exception.h>
-#include	<mach/ppc/vm_param.h>
-#include	<assym.s>
-
-/*
- *	Classic atomic switch and fast trap code
- *	Written by: Mark Gorlinsky
- */
-
-/*
-**
-** Blue Box Fast Trap entry
-**
-**
-** The registers at entry are as hw_exceptions left them. Which means
-** that the Blue Box data area is pointed to be R26.
-**
-** We exit here through the fast path exit point in hw_exceptions.  That means that
-** upon exit, R4 must not change.  It is the savearea with the current user context
-** to restore.
-**
-** Input registers are:
-** r0  = Syscall number
-** r4  = Current context savearea (do not modify)
-** r13 = THREAD_TOP_ACT pointer
-** r26 = base of ACT_MACH_BDA in kernel address space
-** -- for Traps --
-** r24 = Index into TWI table (x4)
-**
-**
-*/
-
-
-ENTRY(atomic_switch_syscall, TAG_NO_FRAME_USED)
-	
-/*
- *			Note: the BlueBox fast path system calls (-1 and -2) we handled as
- *			an ultra-fast trap in lowmem_vectors.
- */
-			lwz		r5,bbSysCall(r13)					; Pick up the syscall vector
-			b		.L_CallPseudoKernel
-
-ENTRY(atomic_switch_trap, TAG_NO_FRAME_USED)
-
-/*
-** functions 0-15 -> Call PseudoKernel
-**             16 -> Exit PseudoKernel
-*/
-
-			cmplwi	cr7,r24,BB_RFI_TRAP					; Is this an RFI?
-			beq		cr7,.L_ExitPseudoKernel				; Yes...
-
-			lwz		r5,bbTrap(r13)						; Pick up the trap vector
-
-/******************************************************************************
- * void CallPseudoKernel ( int vector, thread_act_t * act, BEDA_t * beda, savearea *sv )
- *
- * This op provides a means of invoking the BlueBox PseudoKernel from a
- * system (68k) or native (PPC) context while changing BlueBox interruption
- * state atomically.  As an added bonus, this op leaves all but R1/PC of the user 
- * state registers intact.  R1/PC are saved in a per thread save area, the base of
- * which is located in the bbDescAddr member of the thread_act structure.
- *
- * This op is invoked from the Emulator Trap dispatch table or from a System
- * Call when Mach SCs have been disabled. A vectorindex is passed in to indicate
- * which vector should be taken.
- *
- * If this op is invoked from the Emulator Trap dispatch table, the kernel is
- * aware of starting address of this table.  It used the users PC (SRR0) 
- * and the start of the Trap dispatch table address to verify the trap exception 
- * as a atomic_switch trap.  If a trap exception is verified as a atomic_switch
- * trap we enter here with the following registers loaded.
- *
- * Input registers are:
- * r5	= Vector to take
- * r13 	= Current thread context data
- * r26	= Base address of BlueBox exception data area in kernel address space
- * r4	= Current context savearea (do not modify)
- *
- ******************************************************************************/
-
-.L_CallPseudoKernel:
-
-			mfsprg	r2,1								; Get the current activation
-			lwz		r2,ACT_PER_PROC(r2)					; Get the per_proc block
-			rlwinm	r6,r26,0,0,19						; Start of page is bttd
-			lwz		r7,ACT_MACT_SPF(r13)				; Get special flags 
-			lwz		r1,BTTD_INTERRUPT_VECTOR(r6)		; Get interrupt vector
-			rlwinm	r7,r7,0,bbNoMachSCbit+1,bbNoMachSCbit-1	
-														; Reactivate Mach SCs
-			lwz		r8,BTTD_INTCONTROLWORD(r6)			; Get Interrupt Control Word
-			cmpwi	r1,0								; Is this a preemptive thread ?
-			stw		r7,ACT_MACT_SPF(r13)				; Update special flags
-			stw		r7,spcFlags(r2)						; Update per_proc version
-			beq		.L_CallFromPreemptiveThread			; No int vector means preemptive thread
-
-			rlwinm	r1,r8,0,INTSTATEMASK_B,INTSTATEMASK_E
-														; Extract current Interrupt state
-			rlwinm	r8,r8,0,INTSTATEMASK_E+1,INTSTATEMASK_B-1
-														; Clear current interrupt state
-			xoris	r2,r1,SYSCONTEXTSTATE				; Setup for System Context check 
-			lwz		r1,savecr(r4)						; Load current CR bits
-			cmpwi	r2,0								; Check if state is System Context?
-			oris	r8,r8,PSEUDOKERNELSTATE				; Update state for entering the PK
-			bne		.L_CallFromAlternateContext			; No, then do not save CR2 bits
-
-			rlwimi	r8,r1,32-INTCR2TOBACKUPSHIFT,INTBACKUPCR2MASK_B,INTBACKUPCR2MASK_E
-														; Insert live CR2 in ICW BackupCR2
-.L_CallFromAlternateContext:
-
-			stw		r8,BTTD_INTCONTROLWORD(r6)			; Update ICW
-
-.L_CallFromPreemptiveThread:
-
-			lwz		r1,savesrr0+4(r4)					; Get current PC
-			lwz		r2,saver1+4(r4)						; Get current R1
-			lwz		r3,savesrr1+4(r4)					; Get current MSR
-			stw		r1,BEDA_SRR0(r26)					; Save current PC
-			rlwinm	r3,r3,0,MSR_BE_BIT+1,MSR_SE_BIT-1				
-														; Clear SE|BE bits in MSR
-			stw		r2,BEDA_SPRG1(r26)					; Save current R1 
-			stw		r3,savesrr1+4(r4)					; Load new MSR
-
-			lwz		r1,BEDA_SPRG0(r26)					; Get replacement R1
-			stw		r5,savesrr0+4(r4)					; Save vector as PC
-			stw		r3,BEDA_SRR1(r26)					; Update saved MSR
-			stw		r1,saver1+4(r4)						; Load up new R1
-
-			b		EXT(fastexit)						; Go back and take the fast path exit...
-
-/******************************************************************************
- * void ExitPseudoKernel ( thread_act_t * act, BEDA_t * beda, savearea * sv  )
- *
- * This op provides a means of exiting from the BlueBox PseudoKernel to a
- * user context.  This op attempts to simulate an RFI for the returning
- * Traps (atomic_switch_trap) and SysCalls (atomic_switch_syscall).  Only the
- * Blue Thread handling interrupts is allowed to atomically change
- * interruption state and handle pending interrupts.
- *
- * If an interrupt is pending and we are returning to the alternate context,
- * the exit is aborted and we return to an pending interrupt handler in the
- * Blue Box pseudokernel.  
- *
- * It also allows the MSR's FE0, FE1, BE and SE bits to updated for the user
- * and completes the PPC register loading.
- *
- * Input registers are:
- * r4  = Current context savearea (do not modify)
- * r13 = Pointer to the current active thread's data
- * r26 = Base address of BlueBox Data in kernel address space 
- *
- ******************************************************************************/
-
-.L_ExitPseudoKernel:
-
-			rlwinm	r6,r26,0,0,19						; Start of page is bttd
-			lwz		r7,ACT_MACT_SPF(r13)				; Get special flags
-			lwz		r2,BTTD_INTERRUPT_VECTOR(r6)		; Get the interrupt vector
-			lwz		r1,BEDA_SPRG1(r26)					; Get saved CTR
-			ori		r7,r7,(0x8000 >> (bbNoMachSCbit - 16))	; Disable Mach SCs for Blue Box
-
-			cmpwi	r2,0								; Is this a preemptive thread
-			stw		r1,savectr+4(r4)					; Update CTR
-			beq		.L_ExitFromPreemptiveThread
-
-			lwz		r8,BTTD_INTCONTROLWORD(r6)			; Get ICW
-			lwz		r1,BTTD_NEWEXITSTATE(r6)			; New interrupt state
-			lwz		r2,BTTD_TESTINTMASK(r6)				; Get pending interrupt mask
-			lis		r3,SYSCONTEXTSTATE					; Setup for check in system context
-			rlwimi	r8,r1,0,INTSTATEMASK_B,INTSTATEMASK_E
-														; Insert new state
-			cmplw	cr1,r1,r3							; System context ?
-			and.	r2,r8,r2							; Any pending interrupt?
-			lwz		r1,savecr(r4)						; Get current CR
-			
-			beq		cr1,.L_ExitToSystemContext			; We are in system context
-			beq		.L_ExitUpdateRuptControlWord		; We do not have a pending interrupt
-
-			lwz		r2,saver1+4(r4)						; Get current R1
-			lwz		r1,BEDA_SPRG0(r26)					; Get replacement R1
-			stw		r2,BEDA_SPRG1(r26)					; Save current R1
-			stw		r1,saver1+4(r4)						; Load up new R1
-			lwz		r3,bbPending(r13)					; Get pending interrupt PC
-			b		.L_ExitAbortExit					; Abort and Exit
-
-.L_ExitToSystemContext:
-			rlwimi	r1,r8,INTCR2TOBACKUPSHIFT,INTCR2MASK_B,INTCR2MASK_E
-														; Insert live CR2 into backup CR2
-.L_ExitUpdateRuptControlWord:
-			stw		r8,BTTD_INTCONTROLWORD(r6)			; Update ICW
-			stw		r1,savecr(r4)						; Update CR
-
-.L_ExitFromPreemptiveThread:
-			mfsprg	r3,1								; Get the current activation
-			lwz		r3,ACT_PER_PROC(r3)					; Get the per_proc block
-			lwz		r2,savesrr1+4(r4)					; Get current MSR	
-			lwz		r1,BEDA_SRR1(r26)					; Get new MSR
-			stw		r7,ACT_MACT_SPF(r13)				; Update special flags
-			stw		r7,spcFlags(r3)						; Update per_proc version
-			rlwimi	r2,r1,0,MSR_FE0_BIT,MSR_FE1_BIT
-														; Insert FE0,FE1,SE,BE bits
-			lwz		r3,BEDA_SRR0(r26)					; Get new PC
-			stw		r2,savesrr1+4(r4)					; Update MSR
-
-.L_ExitAbortExit:
-			stw		r3,savesrr0+4(r4)					; Update PC
-
-			b		EXT(fastexit)						; Go back and take the fast path exit...
-
diff --git a/osfmk/ppc/bat_init.c b/osfmk/ppc/bat_init.c
deleted file mode 100644
index 7434a4f02..000000000
--- a/osfmk/ppc/bat_init.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <mach/std_types.h>
-#include <ppc/proc_reg.h>
-#include <ppc/boot.h>
-#include <ppc/mem.h>
-
-// The sophisticated BAT manager
-
-unsigned int mappedSegments = 0;
-unsigned int availableBATs = 0xE;		// BAT0 used, 1-3 available
-
-vm_offset_t
-PEResidentAddress( vm_offset_t address, vm_size_t length )
-{
-    if( mappedSegments & (1 << (15 & (address >> 28))))
-	return( address);
-    else
-	return( 0);
-}
-
-vm_offset_t
-PEMapSegment( vm_offset_t address, vm_size_t length )
-{
-    vm_offset_t		retAddress;
-    bat_t		bat;
-    int			batNum;
-
-    retAddress = PEResidentAddress( address, length );
-    if( retAddress)
-	return( retAddress);
-
-    if( length < (256 * 1024))
-	return( 0);
-    if( availableBATs == 0)
-	return( 0);
-
-    for( batNum = 0;
-	 (0 == (availableBATs & (1 << batNum)));
-	 batNum++);
-
-    bat.upper.word	     = address & 0xf0000000;
-    bat.lower.word	     = bat.upper.word;
-
-    bat.upper.bits.bl    = 0x7ff;	/* size = 256M */
-    bat.upper.bits.vs    = 1;
-    bat.upper.bits.vp    = 0;		/* user disabled */
-
-    bat.lower.bits.wimg  = PTE_WIMG_IO;
-    bat.lower.bits.pp    = 2;		/* read/write access */
-    
-    // Update the shadow bats.
-    shadow_BAT.DBATs[batNum].upper = bat.upper.word;
-    shadow_BAT.DBATs[batNum].lower = bat.lower.word;
-    
-    sync();isync();
-    switch( batNum) {			// !%$@!! mtdbat needs literal
-	case 0:
-	    mtdbatu( 0, BAT_INVALID); 	/* invalidate old mapping */
-	    mtdbatl( 0, bat.lower.word);
-	    mtdbatu( 0, bat.upper.word);
-	    break;
-	case 1:
-	    mtdbatu( 1, BAT_INVALID);
-	    mtdbatl( 1, bat.lower.word);
-	    mtdbatu( 1, bat.upper.word);
-	    break;
-	case 2:
-	    mtdbatu( 2, BAT_INVALID);
-	    mtdbatl( 2, bat.lower.word);
-	    mtdbatu( 2, bat.upper.word);
-	    break;
-	case 3:
-	    mtdbatu( 3, BAT_INVALID);
-	    mtdbatl( 3, bat.lower.word);
-	    mtdbatu( 3, bat.upper.word);
-	    break;
-    }
-    sync();isync();
-
-    availableBATs &= ~(1 << batNum);
-    mappedSegments |= (1 << (15 & (address >> 28)));
-
-    return( address);
-}
-
-void initialize_bats(boot_args *args)
-{
-	int i;
-
-	/* Give ourselves the virtual map that we would like */
-	bat_t		      bat;
-
-	/* Make sure that the BATs map what we expect. Note
-	 * that we assume BAT0 maps kernel text & data.
-	 *
-	 * Except, oops, none of the BATs have ever been set.
-	 * Developer worked only by fluke.
-	 */
-
-	bat.upper.word	     = 0;
-	bat.upper.bits.bepi  = 0x0;	/* start at logical addr 0M */
-	/*
-	 * We should be smarter here about picking an
-	 * amount to map
-	 */
-	bat.upper.bits.bl    = 0x7ff;	/* size = 256M */
-	bat.upper.bits.vs    = 1;
-	bat.upper.bits.vp    = 0;
-
-	bat.lower.word       = 0;
-	bat.lower.bits.brpn  = 0x0;	/* start at physical addr 0 */
-	bat.lower.bits.wimg  = PTE_WIMG_DEFAULT;
-	bat.lower.bits.pp    = 2;	/* read/write access */
-
-	/* Mustn't cause any data traffic here,
-	 * we're modifying our data BAT register!
-	 */
-
-	sync();
-	mtdbatu(0, BAT_INVALID);	/* invalidate old mapping */
-	isync();
-	mtdbatl(0, bat.lower.word);
-	isync();
-	mtdbatu(0, bat.upper.word);	/* update with new mapping */
-	isync();
-	mtibatl(0, bat.lower.word);
-	isync();
-	mtibatu(0, bat.upper.word);	/* update with new mapping */
-	isync();
-
-	sync();isync();
-	mtdbatu(1,BAT_INVALID); mtdbatl(1,BAT_INVALID);
-	mtibatu(1,BAT_INVALID); mtibatl(1,BAT_INVALID);
-	mtdbatu(2,BAT_INVALID); mtdbatl(2,BAT_INVALID);
-	mtibatu(2,BAT_INVALID); mtibatl(2,BAT_INVALID);
-	mtdbatu(3,BAT_INVALID); mtdbatl(3,BAT_INVALID);
-	mtibatu(3,BAT_INVALID); mtibatl(3,BAT_INVALID);
-	sync();isync();
-
-	PEMapSegment( 0xf0000000, 0x10000000);
-	if( args->Video.v_baseAddr)
-	  PEMapSegment( args->Video.v_baseAddr, 0x10000000);
-
-	/* Set up segment registers as VM through space 0 */
-	isync();
-	for (i=0; i<=15; i++) {
-	  mtsrin(KERNEL_SEG_REG0_VALUE | i, i * 0x10000000);
-	}
-	isync();
-}
-
-/*
- * Adjust the size of the region mapped by a BAT
- * to to be just large enough to include the specified
- * offset, and return the offset of the new end of the region.
- * Note that both 'offsets' are really *lengths*, i.e. the
- * offset of the end of the mapped region from the beginning.
- * Either the instruction or data BATs (or both) can be specified.
- * If the new length is greater than the size mappable by a BAT,
- * then that value is just returned and no changes are made.
- */
-vm_offset_t
-adjust_bat_limit(
-    vm_offset_t		new_minimum,
-    int			batn,
-    boolean_t		ibat,
-    boolean_t		dbat
-)
-{
-    vm_offset_t		new_limit;
-
-    if (new_minimum <= 256*1024*1024) {
-	unsigned int	bl = 0;
-
-	new_limit = 128*1024;
-	while (new_limit < new_minimum) {
-	    new_limit *= 2;
-	    bl = (bl << 1) | 1;
-	}
-
-	{
-	    batu_t	batu;
-
-	    if (dbat) switch (batn) {
-
-	    case 0:
-		mfdbatu(batu, 0 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtdbatu( 0, batu);
-		sync(); isync();
-
-		break;
-
-	    case 1:
-		mfdbatu(batu, 1 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtdbatu( 1, batu);
-		sync(); isync();
-
-		break;
-
-	    case 2:
-		mfdbatu(batu, 2 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtdbatu( 2, batu);
-		sync(); isync();
-
-		break;
-
-	    case 3:
-		mfdbatu(batu, 3 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtdbatu( 3, batu);
-		sync(); isync();
-
-		break;
-	    }
-
-	    if (ibat) switch (batn) {
-
-	    case 0:
-		mfibatu(batu, 0 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtibatu( 0, batu);
-		sync(); isync();
-
-		break;
-
-	    case 1:
-		mfibatu(batu, 1 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtibatu( 1, batu);
-		sync(); isync();
-
-		break;
-
-	    case 2:
-		mfibatu(batu, 2 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtibatu( 2, batu);
-		sync(); isync();
-
-		break;
-
-	    case 3:
-		mfibatu(batu, 3 );
-		batu.bits.bl = bl;
-
-		sync(); isync();
-		mtibatu( 3, batu);
-		sync(); isync();
-
-		break;
-	    }
-	}
-    }
-    else
-	new_limit = new_minimum;
-
-    return (new_limit);
-}
diff --git a/osfmk/ppc/bcopy.s b/osfmk/ppc/bcopy.s
deleted file mode 100644
index bc05940f2..000000000
--- a/osfmk/ppc/bcopy.s
+++ /dev/null
@@ -1,981 +0,0 @@
-/*
- * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-;
-;			Copy bytes of data around. Handles overlapped data.
-;
-;
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-
-;       These routines use CR5 for certain flags:
-;		Use CR5_lt to indicate non-cached (in bcopy and memcpy)
-#define noncache 20
-
-
-;       The bcopy_phys variants use a stack frame so they can call bcopy as a subroutine.
-#define BCOPY_SF_SIZE   32      // total size
-#define BCOPY_SF_MSR    16      // we save caller's MSR here (possibly minus VEC and FP)
-
-
-#define kShort  32              // short operands are special cased
-
-
-; void bcopy_physvir_32(from, to, nbytes)
-;
-; Attempt to copy physically addressed memory with translation on if conditions are met.
-; Otherwise do a normal bcopy_phys.  This routine is used because some 32-bit processors 
-; are very slow doing real-mode (translation off) copies, so we set up temporary BATs
-; for the passed phys addrs and do the copy with translation on.  
-;
-; Rules are: - neither source nor destination can cross a page. 
-;            - Interrupts must be disabled when this routine is called.
-;            - Translation must be on when called.
-;
-; To do the copy, we build a 128 DBAT for both the source and sink.  If both are the same, only one
-; is loaded.  We do not touch the IBATs, so there is no issue if either physical page
-; address is the same as the virtual address of the instructions we are executing.
-;
-; At the end, we invalidate the used DBATs.
-;
-; Note that the address parameters are long longs.  We will transform these to 64-bit
-; values.  Note that on 32-bit architectures that this will ignore the high half of the
-; passed in value.  This should be ok since we can not have any bigger than 32 bit addresses
-; there anyhow.
-;
-; Note also that this routine is used only on 32-bit machines. If you're contemplating use
-; on a 64-bit processor, use the physical memory window instead; please refer to copypv()
-; for an example of how this is done.
-
-			.align	5
-			.globl	EXT(bcopy_physvir_32)
-
-LEXT(bcopy_physvir_32)
-            mflr    r0                          ; get return address
-            rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-            mfsprg	r8,2						; get processor feature flags
-            stw     r0,8(r1)                    ; save return address
-			rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-            stwu    r1,-BCOPY_SF_SIZE(r1)       ; push on a stack frame so we can call bcopy
-            mtcrf	0x02,r8						; move pf64Bit to cr6 so we can test
-            subi    r0,r7,1                     ; get length - 1
-			rlwinm	r4,r5,0,1,0					; Duplicate high half of long long paddr into top of reg
-			add		r11,r3,r0					; Point to last byte of sink
-			mr		r5,r7						; Get the length into the right register
-            rlwimi	r4,r6,0,0,31				; Combine bottom of long long to full 64-bits
-
-; This test for page overflow may not work if the length is negative.  Negative lengths are invalid input
-; to bcopy_physvir() on 32-bit machines, and will result in a panic.
-            
-			add		r12,r4,r0					; Point to last byte of source
-			xor		r7,r11,r3					; See if we went to next page
-			xor		r8,r12,r4					; See if we went to next page
-			or		r0,r7,r8					; Combine wrap
-			
-//			li		r9,((PTE_WIMG_CB_CACHED_COHERENT<<3)|2)	; Set default attributes
-			li		r9,((2<<3)|2)				; Set default attributes
-			rlwinm.	r0,r0,0,0,19				; Did we overflow a page?
-			li		r7,2						; Set validity flags
-			li		r8,2						; Set validity flags
-			bne-	bcopy_phys1					; Overflowed page, do normal physical copy...
-
-			rlwimi	r11,r9,0,15,31				; Set sink lower DBAT value
-			rlwimi	r12,r9,0,15,31				; Set source lower DBAT value
-			rlwimi	r7,r11,0,0,14				; Set sink upper DBAT value
-			rlwimi	r8,r12,0,0,14				; Set source upper DBAT value
-			cmplw	cr1,r11,r12					; See if sink and source are same block
-			
-			sync
-
-			mtdbatl	0,r11						; Set sink lower DBAT 
-			mtdbatu	0,r7						; Set sink upper DBAT
-
-			beq-	cr1,bcpvsame				; Source and sink are in same block
-
-			mtdbatl	1,r12						; Set source lower DBAT 
-			mtdbatu	1,r8						; Set source upper DBAT
-            
-bcpvsame:	
-            sync                                ; wait for the BATs to stabilize
-            isync
-            
-            bl      EXT(bcopy)                  ; BATs set up, args in r3-r5, so do the copy with DR on
-
-            li		r0,0						; Get set to invalidate upper half of BATs
-			sync								; Make sure all is well
-			mtdbatu	0,r0						; Clear sink upper DBAT
-			mtdbatu	1,r0						; Clear source upper DBAT
-			sync
-			isync			
-            
-            lwz     r0,BCOPY_SF_SIZE+8(r1)      ; get return address
-            addi    r1,r1,BCOPY_SF_SIZE         ; pop off stack frame
-            mtlr    r0
-            blr
-
-
-; void bcopy_phys(from, to, nbytes)
-;
-; Turns off data translation before the copy.  This one will not work in user state.
-; This routine is used on 32 and 64-bit machines.
-;
-; Note that the address parameters are long longs.  We will transform these to 64-bit
-; values.  Note that on 32-bit architectures that this will ignore the high half of the
-; passed in value.  This should be ok since we can not have any bigger than 32 bit addresses
-; there anyhow.
-;
-; Also note that you probably will not be happy if either the sink or source spans across the
-; boundary between RAM and I/O space.  Good chance of hanging the machine and this code 
-; will not check, so be careful.
-;
-; NOTE: when called, translation must be on, and we must be in 32-bit mode.
-;       Interrupts may or may not be disabled.
-
-			.align	5
-			.globl	EXT(bcopy_phys)
-
-LEXT(bcopy_phys)
-            mflr    r0                          ; get return address
-            rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-            stw     r0,8(r1)                    ; save
-            mfsprg	r8,2						; get processor feature flags
-            stwu    r1,-BCOPY_SF_SIZE(r1)       ; push on a stack frame so we can call bcopy
-			rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-			rlwinm	r4,r5,0,1,0					; Duplicate high half of long long paddr into top of reg
-			mtcrf	0x02,r8						; move pf64Bit to cr6 so we can test
-			rlwimi	r4,r6,0,0,31				; Combine bottom of long long to full 64-bits
-			mr		r5,r7						; Get the length into the right register
-
-bcopy_phys1:									; enter from bcopy_physvir with pf64Bit in cr6 and parms in r3-r5
-			mfmsr	r9							; Get the MSR
-			lis		r6,hi16(MASK(MSR_VEC))		; Get vector enable            
-            ori     r6,r6,lo16(MASK(MSR_FP)|MASK(MSR_DR))	; Add in FP and DR
-            andc    r9,r9,r6                    ; unconditionally turn DR, VEC, and FP off
-            bt++	pf64Bitb,bcopy_phys64		; skip if 64-bit (only they take hint)
-
-; 32-bit CPUs
-
-			mtmsr	r9							; turn DR, FP, and VEC off
-			isync								; Wait for it
-			
-            bl      EXT(bcopy)                  ; do the copy with translation off and caching on
-            
-			mfmsr	r9							; Get the MSR
-            ori     r9,r9,lo16(MASK(MSR_DR))    ; turn translation back on (but leave VEC and FP off)
-            mtmsr   r9                          ; restore msr
-            isync                               ; wait for it to happen
-            lwz     r0,BCOPY_SF_SIZE+8(r1)      ; get return address once translation is back on
-            mtlr    r0
-            addi    r1,r1,BCOPY_SF_SIZE         ; pop off stack frame
-            blr
-
-            
-; 64-bit: turn DR off and SF on.
-
-bcopy_phys64:									; r9 = MSR with DP, VEC, and FP off
-            ori     r8,r9,lo16(MASK(MSR_DR))    ; make a copy with DR back on... this is what we return to caller
-			srdi	r2,r3,31					; Get a 1 if source is in I/O memory
-            li		r0,1						; Note - we use this in a couple places below
-			srdi	r10,r4,31					; Get a 1 if sink is in I/O memory
-            std     r8,BCOPY_SF_MSR(r1)         ; save caller's MSR so we remember whether EE was on
-            rldimi	r9,r0,63,MSR_SF_BIT			; set SF on in MSR we will copy with
-			cmpldi	cr0,r2,1					; Is source in I/O memory?
-			cmpldi	cr7,r10,1					; Is sink in I/O memory?
-            mtmsrd	r9							; turn 64-bit addressing on, data translation off
-            isync								; wait for it to happen
-			cror	cr7_eq,cr0_eq,cr7_eq		; See if either source or sink is in I/O area
-            beq--   cr7,io_space_real_mode_copy ; an operand is in I/O space
-            
-            bl      EXT(bcopy)                  ; do copy with DR off and SF on, cache enabled
-                        
-bcopy_phys64x:
-			mfmsr	r9							; Get the MSR we used to copy
-            rldicl	r9,r9,0,MSR_SF_BIT+1		; clear SF
-            ori     r9,r9,lo16(MASK(MSR_DR))    ; turn translation back on
-            mtmsrd  r9                          ; turn 64-bit mode off, translation back on
-            isync								; wait for it to happen
-            lwz     r0,BCOPY_SF_SIZE+8(r1)      ; get return address once translation is back on
-            ld      r8,BCOPY_SF_MSR(r1)         ; get caller's MSR once translation is back on
-            mtlr    r0
-            mtmsrd  r8,1                        ; turn EE back on if necessary
-            addi    r1,r1,BCOPY_SF_SIZE         ; pop off stack frame
-            blr
-
-;   We need to copy with DR off, but one of the operands is in I/O space.  To avoid wedging U3,
-;   which cannot handle a cache burst in I/O space, we must turn caching off for the real memory access.
-;   This can only be done by setting bits in HID4.  We cannot lose control and execute random code in
-;   this state, so we have to disable interrupts as well.  This is an unpleasant hack.
-
-io_space_real_mode_copy:                        ; r0=1, r9=MSR we want to copy with
-			sldi	r11,r0,31-MSR_EE_BIT		; Get a mask for the EE bit
-			sldi	r0,r0,32+8					; Get the right bit to turn off caching
-			andc	r9,r9,r11					; Turn off EE bit
-			mfspr	r2,hid4						; Get HID4
-			mtmsrd	r9,1                        ; Force off EE
-			or		r2,r2,r0					; Set bit to make real accesses cache-inhibited
-			sync								; Sync up
-			mtspr	hid4,r2						; Make real accesses cache-inhibited
-			isync								; Toss prefetches
-
-			lis		r12,0xE000					; Get the unlikeliest ESID possible
-			srdi	r12,r12,1					; Make 0x7FFFFFFFF0000000
-			slbie	r12							; Make sure the ERAT is cleared 
-			
-			sync
-			isync
-			
-            bl      EXT(bcopy_nc)               ; copy with SF on and EE, DR, VEC, and FP off, cache inhibited
-            
-			li		r0,1						; Get a 1
-			sldi	r0,r0,32+8					; Get the right bit to turn off caching
-			mfspr	r2,hid4						; Get HID4
-			andc	r2,r2,r0					; Clear bit to make real accesses cache-inhibited
-			sync								; Sync up
-			mtspr	hid4,r2						; Make real accesses not cache-inhibited
-			isync								; Toss prefetches
-	
-			lis		r12,0xE000					; Get the unlikeliest ESID possible
-			srdi	r12,r12,1					; Make 0x7FFFFFFFF0000000
-			slbie	r12							; Make sure the ERAT is cleared
-            b       bcopy_phys64x
-
-
-;
-; shortcopy
-;
-; Special case short operands (<32 bytes), which are very common.  Note that the check for
-; reverse vs normal moves isn't quite correct in 64-bit mode; in rare cases we will move in
-; reverse when it wasn't necessary to do so.  This is OK, since performance of the two cases
-; is similar.  We do get the direction right when it counts (ie, when the operands overlap.)
-; Also note that we use the G3/G4 "backend" code, even on G5.  This is OK too, since G5 has
-; plenty of load/store dispatch bandwidth in this case, the extra ops are hidden by latency,
-; and using word instead of doubleword moves reduces the possibility of unaligned accesses,
-; which cost about 20 cycles if they cross a 32-byte boundary on G5.  Finally, because we
-; might do unaligned accesses this code cannot be called from bcopy_nc().
-;           r4 = destination
-;           r5 = length (<32)
-;           r6 = source
-;           r12 = (dest - source)
-
-            .align  5
-shortcopy:
-            cmplw   r12,r5                      ; must move reverse if (dest-source)<length
-            mtcrf   2,r5                        ; move length to cr6 and cr7 one at a time...
-            mtcrf   1,r5                        ; ...which is faster on G4 and G5
-            bge++   backend                     ; handle forward moves (most common case)
-            add     r6,r6,r5                    ; point one past end of operands in reverse moves
-            add     r4,r4,r5
-            b       bbackend                    ; handle reverse moves
-            
-;	
-; void bcopy(from, to, nbytes)
-;
-; NOTE: bcopy is called from copyin and copyout etc with the "thread_recover" ptr set.
-; This means bcopy must not set up a stack frame or touch non-volatile registers, and also means that it
-; cannot rely on turning off interrupts, because we expect to get DSIs and have execution aborted by a "longjmp"
-; to the thread_recover routine.  What this means is that it would be hard to use vector or floating point
-; registers to accelerate the copy.
-;
-; NOTE: this code can be called in any of three "modes":
-;       - on 32-bit processors (32-byte cache line)
-;       - on 64-bit processors running in 32-bit mode (128-byte cache line)
-;       - on 64-bit processors running in 64-bit mode (128-byte cache line)
-
-			.align	5
-			.globl	EXT(bcopy)
-            .globl  EXT(bcopy_nop_if_32bit)
-
-LEXT(bcopy)
-			cmplwi	cr1,r5,kShort               ; less than 32 bytes?
-            sub.    r12,r4,r3					; test for to==from in mode-independent way, start fwd/rev check
-			mr		r6,r3						; Set source (must preserve r3 for memcopy return)
-			blt     cr1,shortcopy               ; special case short operands
-			crclr	noncache					; Set cached
-LEXT(bcopy_nop_if_32bit)
-            bne++   copyit64                    ; handle 64-bit processor (patched to NOP if 32-bit processor)
-			bne+    copyit32					; handle 32-bit processor
-            blr                                 ; to==from so nothing to do
-	
-;
-; bcopy_nc(from, to, nbytes)
-;
-; bcopy_nc() operates on non-cached memory so we can not use any kind of cache instructions.
-; Furthermore, we must avoid all unaligned accesses on 64-bit machines, since they take
-; alignment exceptions.  Thus we cannot use "shortcopy", which could do unaligned lwz/stw.
-; Like bcopy(), bcopy_nc() can be called both in 32- and 64-bit mode.
-
-			.align	5
-			.globl	EXT(bcopy_nc)
-            .globl  EXT(bcopy_nc_nop_if_32bit)
-
-LEXT(bcopy_nc)
-			cmpwi	cr1,r5,0					; Check if we have a 0 length
-            sub.	r12,r4,r3					; test for to==from in mode-independent way, start fwd/rev check
-			mr		r6,r3						; Set source (must preserve r3 for memcopy return)
-			crset	noncache					; Set non-cached
-			cror    cr0_eq,cr1_eq,cr0_eq        ; set cr0 beq if either length zero or to==from
-LEXT(bcopy_nc_nop_if_32bit)
-            bne++   copyit64                    ; handle 64-bit processor (patched to NOP if 32-bit processor)
-			bne+    copyit32					; handle 32-bit processor
-            blr                                 ; either zero length or to==from
-
-;
-; void* memcpy(to, from, nbytes)
-; void* memmove(to, from, nbytes)
-;
-; memcpy() and memmove() are only called in 32-bit mode, albeit on both 32- and 64-bit processors.
-; However, they would work correctly if called in 64-bit mode.
-
-			.align	5
-			.globl	EXT(memcpy)
-			.globl	EXT(memmove)
-            .globl  EXT(memcpy_nop_if_32bit)
-
-LEXT(memcpy)
-LEXT(memmove)
-			cmplwi	cr1,r5,kShort               ; less than 32 bytes?
-            sub.    r12,r3,r4					; test for to==from in mode-independent way, start fwd/rev check
-			mr		r6,r4						; Set source
-			mr		r4,r3						; Set the "to" (must preserve r3 for return value)
-			blt     cr1,shortcopy               ; special case short operands
-			crclr	noncache					; Set cached
-LEXT(memcpy_nop_if_32bit)
-            bne++   copyit64                    ; handle 64-bit processor (patched to NOP if 32-bit processor)
-			beqlr-                              ; exit if to==from
-
-
-;       Here to copy on 32-bit processors.
-;
-;			When we move the memory, forward overlays must be handled.  We
-;			also can not use the cache instructions if we are from bcopy_nc.
-;			We need to preserve R3 because it needs to be returned for memcpy.
-;			We can be interrupted and lose control here.
-;
-;           When entered:
-;               r4 = destination
-;               r5 = length (>0)
-;               r6 = source
-;               r12 = (dest - source)
-;               cr5 = noncache flag
-
-copyit32:                                       ; WARNING! can drop down to this label
-            cmplw   cr1,r12,r5                  ; must move reverse if (dest-source)<length
-            cntlzw  r11,r5                      ; get magnitude of length
-            dcbt    0,r6                        ; start to touch in source
-            lis     r10,hi16(0x80000000)        ; get 0x80000000
-            neg     r9,r4                       ; start to get alignment for destination
-            dcbtst  0,r4                        ; start to touch in destination
-            sraw    r8,r10,r11                  ; get mask based on operand length, to limit alignment
-            blt-    cr1,reverse32bit            ; reverse move required
-			
-; Forward moves on 32-bit machines, also word aligned uncached ops on 64-bit machines.
-; NOTE: we never do an unaligned access if the source and destination are "relatively"
-; word aligned.  We depend on this in the uncached case on 64-bit processors.
-;               r4 = destination
-;               r5 = length (>0)
-;               r6 = source
-;               r8 = inverse of largest mask smaller than operand length
-;               r9 = neg(dest), used to compute alignment
-;               cr5 = noncache flag
-
-forward32bit:                                   ; enter from 64-bit CPUs with word aligned uncached operands
-			rlwinm	r7,r9,0,0x1F				; get bytes to 32-byte-align destination
-			andc.   r0,r7,r8					; limit to the maximum front end move
-            mtcrf   0x01,r0                     ; move length to cr6 and cr7 one cr at a time...
-			beq		alline						; Already on a line...
-			
-			mtcrf	0x02,r0						; ...since moving more than one is slower on G4 and G5
-			sub		r5,r5,r0					; Set the length left to move
-
-			bf		31,alhalf					; No single byte to do...
-			lbz		r7,0(r6)					; Get the byte
-			addi	r6,r6,1						; Point to the next
-			stb		r7,0(r4)					; Save the single
-			addi	r4,r4,1						; Bump sink
-			
-;			Sink is halfword aligned here
-
-alhalf:		bf		30,alword					; No halfword to do...
-			lhz		r7,0(r6)					; Get the halfword
-			addi	r6,r6,2						; Point to the next
-			sth		r7,0(r4)					; Save the halfword
-			addi	r4,r4,2						; Bump sink
-			
-;			Sink is word aligned here
-
-alword:		bf		29,aldouble					; No word to do...
-			lwz		r7,0(r6)					; Get the word
-			addi	r6,r6,4						; Point to the next
-			stw		r7,0(r4)					; Save the word
-			addi	r4,r4,4						; Bump sink
-			
-;			Sink is double aligned here
-
-aldouble:	bf		28,alquad					; No double to do...
-			lwz		r7,0(r6)					; Get the first word
-			lwz		r8,4(r6)					; Get the second word
-			addi	r6,r6,8						; Point to the next
-			stw		r7,0(r4)					; Save the first word
-			stw		r8,4(r4)					; Save the second word
-			addi	r4,r4,8						; Bump sink
-			
-;			Sink is quadword aligned here
-
-alquad:		bf		27,alline					; No quad to do...
-			lwz		r7,0(r6)					; Get the first word
-			lwz		r8,4(r6)					; Get the second word
-			lwz		r9,8(r6)					; Get the third word
-			stw		r7,0(r4)					; Save the first word
-			lwz		r11,12(r6)					; Get the fourth word
-			addi	r6,r6,16					; Point to the next
-			stw		r8,4(r4)					; Save the second word
-			stw		r9,8(r4)					; Save the third word
-			stw		r11,12(r4)					; Save the fourth word
-			addi	r4,r4,16					; Bump sink
-			
-;			Sink is line aligned here
-
-alline:		rlwinm.	r0,r5,27,5,31				; Get the number of full lines to move
-            mtcrf   0x02,r5                     ; move length to cr6 and cr7 one cr at a time...
-			mtcrf	0x01,r5						; ...since moving more than one is slower on G4 and G5			
-			beq-	backend						; No full lines to move
-            
-            mtctr   r0                          ; set up loop count
-			li		r0,96						; Stride for touch ahead
-            b       nxtline
-			
-            .align  4
-nxtline:
-            lwz		r2,0(r6)					; Get the first word
-			lwz		r5,4(r6)					; Get the second word
-			lwz		r7,8(r6)					; Get the third word
-			lwz		r8,12(r6)					; Get the fourth word
-			lwz		r9,16(r6)					; Get the fifth word
-			lwz		r10,20(r6)					; Get the sixth word
-			lwz		r11,24(r6)					; Get the seventh word
-			lwz		r12,28(r6)					; Get the eighth word
-			bt-		noncache,skipz				; Skip if we are not cached...
-			dcbz	0,r4						; Blow away the whole line because we are replacing it
-			dcbt	r6,r0						; Touch ahead a bit
-skipz:
-			addi	r6,r6,32					; Point to the next
-			stw		r2,0(r4)					; Save the first word
-			stw		r5,4(r4)					; Save the second word
-			stw		r7,8(r4)					; Save the third word
-			stw		r8,12(r4)					; Save the fourth word
-			stw		r9,16(r4)					; Save the fifth word
-			stw		r10,20(r4)					; Save the sixth word
-			stw		r11,24(r4)					; Save the seventh word
-			stw		r12,28(r4)					; Save the eighth word
-			addi	r4,r4,32					; Bump sink
-			bdnz+	nxtline						; Do the next line, if any...
-
-	
-;			Move backend quadword
-
-backend:                                        ; Join here from "shortcopy" for forward moves <32 bytes
-            bf		27,noquad					; No quad to do...
-			lwz		r7,0(r6)					; Get the first word
-			lwz		r8,4(r6)					; Get the second word
-			lwz		r9,8(r6)					; Get the third word
-			lwz		r11,12(r6)					; Get the fourth word
-			stw		r7,0(r4)					; Save the first word
-			addi	r6,r6,16					; Point to the next
-			stw		r8,4(r4)					; Save the second word
-			stw		r9,8(r4)					; Save the third word
-			stw		r11,12(r4)					; Save the fourth word
-			addi	r4,r4,16					; Bump sink
-			
-;			Move backend double
-
-noquad:		bf		28,nodouble					; No double to do...
-			lwz		r7,0(r6)					; Get the first word
-			lwz		r8,4(r6)					; Get the second word
-			addi	r6,r6,8						; Point to the next
-			stw		r7,0(r4)					; Save the first word
-			stw		r8,4(r4)					; Save the second word
-			addi	r4,r4,8						; Bump sink
-			
-;			Move backend word
-
-nodouble:	bf		29,noword					; No word to do...
-			lwz		r7,0(r6)					; Get the word
-			addi	r6,r6,4						; Point to the next
-			stw		r7,0(r4)					; Save the word
-			addi	r4,r4,4						; Bump sink
-			
-;			Move backend halfword
-
-noword:		bf		30,nohalf					; No halfword to do...
-			lhz		r7,0(r6)					; Get the halfword
-			addi	r6,r6,2						; Point to the next
-			sth		r7,0(r4)					; Save the halfword
-			addi	r4,r4,2						; Bump sink
-
-;			Move backend byte
-
-nohalf:		bflr    31                          ; Leave cuz we are all done...	
-			lbz		r7,0(r6)					; Get the byte
-			stb		r7,0(r4)					; Save the single
-            blr
-
-
-; Reverse moves on 32-bit machines, also reverse word aligned uncached moves on 64-bit machines.
-; NOTE: we never do an unaligned access if the source and destination are "relatively"
-; word aligned.  We depend on this in the uncached case on 64-bit processors.
-; These are slower because we don't bother with dcbz.  Fortunately, reverse moves are uncommon.
-;               r4 = destination
-;               r5 = length (>0)
-;               r6 = source
-;               r8 = inverse of largest mask smaller than operand length
-;               cr5 = noncache flag (but we don't dcbz anyway)
-
-reverse32bit:									; here from 64-bit code with word aligned uncached operands
-            add		r4,r5,r4					; Point past the last sink byte
-			add		r6,r5,r6					; Point past the last source byte 
-			rlwinm	r7,r4,0,0x1F				; Calculate the length to align dest on cache boundary
-			li		r12,-1						; Make sure we touch in the actual line
-			andc.   r0,r7,r8					; Apply movement limit
-			dcbt	r12,r6						; Touch in the last line of source
-            mtcrf   0x01,r0                     ; move length to cr6 and cr7 one cr at a time...
-			dcbtst	r12,r4						; Touch in the last line of the sink
-			mtcrf	0x02,r0						; ...since moving more than one is slower on G4 and G5			
-			beq-	balline						; Aready on cache line boundary (or too short to bother)
-			
-			sub		r5,r5,r0					; Precaculate move length left after alignment
-			
-			bf		31,balhalf					; No single byte to do...
-			lbz		r7,-1(r6)					; Get the byte
-			subi	r6,r6,1						; Point to the next
-			stb		r7,-1(r4)					; Save the single
-			subi	r4,r4,1						; Bump sink
-			
-;			Sink is halfword aligned here
-
-balhalf:	bf		30,balword					; No halfword to do...
-			lhz		r7,-2(r6)					; Get the halfword
-			subi	r6,r6,2						; Point to the next
-			sth		r7,-2(r4)					; Save the halfword
-			subi	r4,r4,2						; Bump sink
-			
-;			Sink is word aligned here
-
-balword:	bf		29,baldouble				; No word to do...
-			lwz		r7,-4(r6)					; Get the word
-			subi	r6,r6,4						; Point to the next
-			stw		r7,-4(r4)					; Save the word
-			subi	r4,r4,4						; Bump sink
-			
-;			Sink is double aligned here
-
-baldouble:	bf		28,balquad					; No double to do...
-			lwz		r7,-8(r6)					; Get the first word
-			lwz		r8,-4(r6)					; Get the second word
-			subi	r6,r6,8						; Point to the next
-			stw		r7,-8(r4)					; Save the first word
-			stw		r8,-4(r4)					; Save the second word
-			subi	r4,r4,8						; Bump sink
-			
-;			Sink is quadword aligned here
-
-balquad:	bf		27,balline					; No quad to do...
-			lwz		r7,-16(r6)					; Get the first word
-			lwz		r8,-12(r6)					; Get the second word
-			lwz		r9,-8(r6)					; Get the third word
-			lwz		r11,-4(r6)					; Get the fourth word
-			stw		r7,-16(r4)					; Save the first word
-			subi	r6,r6,16					; Point to the next
-			stw		r8,-12(r4)					; Save the second word
-			stw		r9,-8(r4)					; Save the third word
-			stw		r11,-4(r4)					; Save the fourth word
-			subi	r4,r4,16					; Bump sink
-			
-;			Sink is line aligned here
-
-balline:	rlwinm.	r0,r5,27,5,31				; Get the number of full lines to move
-            mtcrf   0x02,r5                     ; move length to cr6 and cr7 one cr at a time...
-			mtcrf	0x01,r5						; ...since moving more than one is slower on G4 and G5			
-			beq-	bbackend					; No full lines to move
-            mtctr   r0                          ; set up loop count
-            b       bnxtline
-			
-            .align  4
-bnxtline:
-			lwz		r7,-32(r6)					; Get the first word
-			lwz		r5,-28(r6)					; Get the second word
-			lwz		r2,-24(r6)					; Get the third word
-			lwz		r12,-20(r6)					; Get the third word
-			lwz		r11,-16(r6)					; Get the fifth word
-			lwz		r10,-12(r6)					; Get the sixth word
-			lwz		r9,-8(r6)					; Get the seventh word
-			lwz		r8,-4(r6)					; Get the eighth word
-			subi	r6,r6,32					; Point to the next
-			
-			stw		r7,-32(r4)					; Get the first word
-            stw		r5,-28(r4)					; Get the second word
-			stw		r2,-24(r4)					; Get the third word
-			stw		r12,-20(r4)					; Get the third word
-			stw		r11,-16(r4)					; Get the fifth word
-			stw		r10,-12(r4)					; Get the sixth word
-			stw		r9,-8(r4)					; Get the seventh word
-			stw		r8,-4(r4)					; Get the eighth word
-			subi	r4,r4,32					; Bump sink
-			
-			bdnz+	bnxtline					; Do the next line, if any...
-
-;
-;			Note: We touched these lines in at the beginning
-;
-	
-;			Move backend quadword
-
-bbackend:                                       ; Join here from "shortcopy" for reverse moves of <32 bytes
-            bf		27,bnoquad					; No quad to do...
-			lwz		r7,-16(r6)					; Get the first word
-			lwz		r8,-12(r6)					; Get the second word
-			lwz		r9,-8(r6)					; Get the third word
-			lwz		r11,-4(r6)					; Get the fourth word
-			stw		r7,-16(r4)					; Save the first word
-			subi	r6,r6,16					; Point to the next
-			stw		r8,-12(r4)					; Save the second word
-			stw		r9,-8(r4)					; Save the third word
-			stw		r11,-4(r4)					; Save the fourth word
-			subi	r4,r4,16					; Bump sink
-			
-;			Move backend double
-
-bnoquad:	bf		28,bnodouble				; No double to do...
-			lwz		r7,-8(r6)					; Get the first word
-			lwz		r8,-4(r6)					; Get the second word
-			subi	r6,r6,8						; Point to the next
-			stw		r7,-8(r4)					; Save the first word
-			stw		r8,-4(r4)					; Save the second word
-			subi	r4,r4,8						; Bump sink
-			
-;			Move backend word
-
-bnodouble:	bf		29,bnoword					; No word to do...
-			lwz		r7,-4(r6)					; Get the word
-			subi	r6,r6,4						; Point to the next
-			stw		r7,-4(r4)					; Save the word
-			subi	r4,r4,4						; Bump sink
-			
-;			Move backend halfword
-
-bnoword:	bf		30,bnohalf					; No halfword to do...
-			lhz		r7,-2(r6)					; Get the halfword
-			subi	r6,r6,2						; Point to the next
-			sth		r7,-2(r4)					; Save the halfword
-			subi	r4,r4,2						; Bump sink
-
-;			Move backend byte
-
-bnohalf:	bflr    31                          ; Leave cuz we are all done...	
-			lbz		r7,-1(r6)					; Get the byte
-			stb		r7,-1(r4)					; Save the single
-			blr
-
-
-// Here on 64-bit processors, which have a 128-byte cache line.  This can be
-// called either in 32 or 64-bit mode, which makes the test for reverse moves
-// a little tricky.  We've already filtered out the (sou==dest) and (len==0)
-// special cases.
-//
-// When entered:
-//		r4 = destination (32 or 64-bit ptr)
-//		r5 = length (always 32 bits)
-//		r6 = source (32 or 64-bit ptr)
-//      r12 = (dest - source), reverse move required if (dest-source)<length
-//		cr5 = noncache flag
-
-        .align	5
-copyit64:
-        rlwinm  r7,r5,0,0,31        // truncate length to 32-bit, in case we're running in 64-bit mode
-        cntlzw	r11,r5				// get magnitude of length
-        dcbt	0,r6				// touch in 1st block of source
-        dcbtst	0,r4				// touch in 1st destination cache block
-        subc    r7,r12,r7           // set Carry if (dest-source)>=length, in mode-independent way
-        li      r0,0                // get a 0
-        lis     r10,hi16(0x80000000)// get 0x80000000
-        addze.  r0,r0               // set cr0 on carry bit (beq if reverse move required)
-        neg     r9,r4               // start to get alignment for destination
-        sraw    r8,r10,r11          // get mask based on operand length, to limit alignment
-        bt--	noncache,c64uncached// skip if uncached
-        beq--	c64rdouble          // handle cached reverse moves        
-                
-        
-// Forward, cached or doubleword aligned uncached.  This is the common case.
-// NOTE: we never do an unaligned access if the source and destination are "relatively"
-// doubleword aligned.  We depend on this in the uncached case.
-//      r4 = destination
-//      r5 = length (>0)
-//      r6 = source
-//      r8 = inverse of largest mask smaller than operand length
-//      r9 = neg(dest), used to compute alignment
-//      cr5 = noncache flag
-
-c64double:
-        rlwinm  r7,r9,0,0x7F        // get #bytes to 128-byte align destination
-        andc    r7,r7,r8            // limit by operand length
-        andi.	r8,r7,7				// r8 <- #bytes to doubleword align
-        srwi	r9,r7,3				// r9 <- #doublewords to 128-byte align
-        sub		r5,r5,r7			// adjust length remaining
-        cmpwi	cr1,r9,0			// any doublewords to move to cache align?
-        srwi	r10,r5,7			// r10 <- 128-byte chunks to xfer after aligning dest
-        cmpwi	cr7,r10,0			// set cr7 on chunk count
-        beq		c64double2			// dest already doubleword aligned
-        mtctr	r8
-        b		c64double1
-        
-        .align	5					// align inner loops
-c64double1:							// copy bytes until dest is doubleword aligned
-        lbz		r0,0(r6)
-        addi	r6,r6,1
-        stb		r0,0(r4)
-        addi	r4,r4,1
-        bdnz	c64double1
-
-c64double2:							// r9/cr1=doublewords, r10/cr7=128-byte chunks
-        beq		cr1,c64double4		// no doublewords to xfer in order to cache align
-        mtctr	r9
-        b		c64double3
-
-        .align	5					// align inner loops
-c64double3:							// copy doublewords until dest is 128-byte aligned
-        ld		r7,0(r6)
-        addi	r6,r6,8
-        std		r7,0(r4)
-        addi	r4,r4,8
-        bdnz	c64double3
-        
-// Here to xfer 128-byte chunks, if any.  Since we only have 8 GPRs for
-// data (64 bytes), we load/store each twice per 128-byte chunk.
-
-c64double4:							// r10/cr7=128-byte chunks
-        rlwinm	r0,r5,29,28,31		// r0 <- count of leftover doublewords, after moving chunks
-        cmpwi	cr1,r0,0			// set cr1 on leftover doublewords
-        beq		cr7,c64double7		// no 128-byte chunks
-        
-        ; We must check for (source-dest)<128 in a mode-independent way.  If within 128 bytes,
-        ; turn on "noncache" because we cannot use dcbz128 even if operands are cacheable.
-        
-        sub		r8,r6,r4			// r8 <- (source - dest)
-        rldicr. r0,r8,0,63-7        // zero low 7 bits and check for 0, mode independent
-        cror	noncache,cr0_eq,noncache	// turn on "noncache" flag if (source-dest)<128
-        mtctr	r10
-        b		c64InnerLoop
-                
-        .align	5					// align inner loop
-c64InnerLoop:						// loop copying 128-byte cache lines to 128-aligned destination
-        ld		r0,0(r6)			// start pipe: load 1st half-line
-        ld		r2,8(r6)
-        ld		r7,16(r6)
-        ld		r8,24(r6)
-        ld		r9,32(r6)
-        ld		r10,40(r6)
-        ld		r11,48(r6)
-        ld		r12,56(r6)
-        bt		noncache,c64InnerLoop1	// skip if uncached or overlap
-        dcbz128	0,r4				// avoid prefetch of next cache line
-c64InnerLoop1:
-
-        std		r0,0(r4)
-        std		r2,8(r4)
-        std		r7,16(r4)
-        std		r8,24(r4)
-        std		r9,32(r4)
-        std		r10,40(r4)
-        std		r11,48(r4)
-        std		r12,56(r4)
-        
-        ld		r0,64(r6)			// load 2nd half of chunk
-        ld		r2,72(r6)
-        ld		r7,80(r6)
-        ld		r8,88(r6)
-        ld		r9,96(r6)
-        ld		r10,104(r6)
-        ld		r11,112(r6)
-        ld		r12,120(r6)
-        addi	r6,r6,128
-
-        std		r0,64(r4)
-        std		r2,72(r4)
-        std		r7,80(r4)
-        std		r8,88(r4)
-        std		r9,96(r4)
-        std		r10,104(r4)
-        std		r11,112(r4)
-        std		r12,120(r4)
-        addi	r4,r4,128			// advance to next dest chunk
-
-        bdnz	c64InnerLoop		// loop if more chunks
-        
-
-c64double7:         	            // r5 <- leftover bytes, cr1 set on doubleword count
-        rlwinm	r0,r5,29,28,31		// r0 <- count of leftover doublewords (0-15)
-        andi.	r5,r5,7				// r5/cr0 <- count of leftover bytes (0-7)
-        beq		cr1,c64byte			// no leftover doublewords
-        mtctr	r0
-        b		c64double8
-        
-        .align	5					// align inner loop
-c64double8:							// loop copying leftover doublewords
-        ld		r0,0(r6)
-        addi	r6,r6,8
-        std		r0,0(r4)
-        addi	r4,r4,8
-        bdnz	c64double8
-
-
-// Forward byte loop.
-
-c64byte:							// r5/cr0 <- byte count (can be big if unaligned uncached)
-		beqlr                       // done if no leftover bytes
-        mtctr	r5
-        b		c64byte1
-        
-        .align	5					// align inner loop
-c64byte1:
-        lbz		r0,0(r6)
-        addi	r6,r6,1
-        stb		r0,0(r4)
-        addi	r4,r4,1
-        bdnz	c64byte1
-
-        blr
-
-
-// Uncached copies.  We must avoid unaligned accesses, since they always take alignment
-// exceptions on uncached memory on 64-bit processors.  This may mean we copy long operands
-// a byte at a time, but that is still much faster than alignment exceptions.
-//      r4 = destination
-//      r5 = length (>0)
-//      r6 = source
-//      r8 = inverse of largest mask smaller than operand length
-//      r9 = neg(dest), used to compute alignment
-//      r12 = (dest-source), used to test relative alignment
-//      cr0 = beq if reverse move required
-//      cr5 = noncache flag
-
-c64uncached:
-        rlwinm	r10,r12,0,29,31		// relatively doubleword aligned?
-        rlwinm	r11,r12,0,30,31		// relatively word aligned?
-        cmpwi	cr7,r10,0			// set cr7 beq if doubleword aligned
-        cmpwi	cr1,r11,0			// set cr1 beq if word aligned
-        beq--   c64reverseUncached
-        
-        beq		cr7,c64double		// doubleword aligned
-        beq		cr1,forward32bit    // word aligned, use G3/G4 code
-        cmpwi	r5,0				// set cr0 on byte count
-        b		c64byte				// unaligned operands
-
-c64reverseUncached:
-        beq		cr7,c64rdouble		// doubleword aligned so can use LD/STD
-        beq		cr1,reverse32bit	// word aligned, use G3/G4 code
-        add		r6,r6,r5			// point to (end+1) of source and dest
-        add		r4,r4,r5
-        cmpwi	r5,0				// set cr0 on length
-        b		c64rbyte			// copy a byte at a time
-        
-        
-
-// Reverse doubleword copies.  This is used for all cached copies, and doubleword
-// aligned uncached copies.
-//      r4 = destination
-//      r5 = length (>0)
-//      r6 = source
-//      r8 = inverse of largest mask of low-order 1s smaller than operand length
-//      cr5 = noncache flag
-
-c64rdouble:
-        add		r6,r6,r5			// point to (end+1) of source and dest
-        add		r4,r4,r5
-        rlwinm	r7,r4,0,29,31		// r7 <- #bytes to doubleword align dest
-        andc.   r7,r7,r8            // limit by operand length
-        sub		r5,r5,r7			// adjust length
-        srwi	r8,r5,6				// r8 <- 64-byte chunks to xfer
-        cmpwi	cr1,r8,0			// any chunks?
-        beq		c64rd2				// source already doubleword aligned
-        mtctr	r7
-
-c64rd1:								// copy bytes until source doublword aligned
-        lbzu	r0,-1(r6)
-        stbu	r0,-1(r4)
-        bdnz	c64rd1
-        
-c64rd2:								// r8/cr1 <- count of 64-byte chunks
-        rlwinm	r0,r5,29,29,31		// r0 <- count of leftover doublewords
-        andi.	r5,r5,7				// r5/cr0 <- count of leftover bytes
-        cmpwi	cr7,r0,0			// leftover doublewords?
-        beq		cr1,c64rd4			// no chunks to xfer
-        mtctr	r8
-        b		c64rd3
-        
-        .align	5					// align inner loop
-c64rd3:								// loop copying 64-byte chunks
-        ld		r7,-8(r6)
-        ld		r8,-16(r6)
-        ld		r9,-24(r6)
-        ld		r10,-32(r6)
-        ld		r11,-40(r6)
-        ld		r12,-48(r6)
-        std		r7,-8(r4)
-        std		r8,-16(r4)
-        ld		r7,-56(r6)
-        ldu		r8,-64(r6)
-        std		r9,-24(r4)
-        std		r10,-32(r4)
-        std		r11,-40(r4)
-        std		r12,-48(r4)
-        std		r7,-56(r4)
-        stdu	r8,-64(r4)
-        bdnz	c64rd3
-
-c64rd4:								// r0/cr7 = leftover doublewords  r5/cr0 = leftover bytes
-        beq		cr7,c64rbyte		// no leftover doublewords
-        mtctr	r0
-        
-c64rd5:								// loop copying leftover doublewords
-        ldu		r0,-8(r6)
-        stdu	r0,-8(r4)
-        bdnz	c64rd5
-
-
-// Reverse byte loop.
-
-c64rbyte:							// r5/cr0 <- byte count (can be big if unaligned uncached)
-        beqlr                       // done if no leftover bytes
-        mtctr	r5
-        
-c64rbyte1:
-        lbzu	r0,-1(r6)
-        stbu	r0,-1(r4)
-        bdnz	c64rbyte1
-
-        blr
-
diff --git a/osfmk/ppc/bcopytest.c b/osfmk/ppc/bcopytest.c
deleted file mode 100644
index bcc86bfb4..000000000
--- a/osfmk/ppc/bcopytest.c
+++ /dev/null
@@ -1,621 +0,0 @@
-#include <debug.h>
-#include <mach_kgdb.h>
-#include <mach_vm_debug.h>
-#include <db_machine_commands.h>
-
-#include <kern/thread.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <kern/spl.h>
-
-#include <kern/misc_protos.h>
-#include <ppc/exception.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-
-#include <vm/pmap.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-
-#include <ppc/new_screen.h>
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#include <ddb/db_output.h>
-
-#include <console/video_console.h>		/* (TEST/DEBUG) */
-
-#define patper 253
-
-
-int main(void);
-void clrarea(unsigned int *source, unsigned int *sink);
-int tstcopy(void *src, void *snk, unsigned int lgn);
-void clrarea2(unsigned int *source, unsigned int *sink);
-int tstcopy2(void *src, void *snk, unsigned int lgn);
-int tstcopy3(void *src, void *snk, unsigned int lgn);
-int tstcopy4(void *src, void *snk, unsigned int lgn);
-int tstcopy5(void *src, void *snk, unsigned int lgn);
-int dumbcopy(void *src, void *snk, unsigned int lgn);
-
-
-unsigned int gtick(void);
-
-
-void bcopytest(void);
-void bcopytest(void) {
-
-	void *srcptr, *snkptr, *asrc, *asnk;
-	int bsrc, bsnk, size, i, ret, n; 
-	volatile int dbg = 0;
-	unsigned int *sink, *source;
-	
-	kern_return_t retr;
-	
-	db_printf("bcopy test\n");	
-	
-	retr = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&sink, (1024*1024)+4096);	/* Get sink area */
-	if(retr != KERN_SUCCESS) {	/* Did we find any memory at all? */
-		panic("bcopytest: Whoops...  no memory for sink\n");
-	}
-	
-	retr = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&source, (1024*1024)+4096);	/* Get source area */
-	if(retr != KERN_SUCCESS) {	/* Did we find any memory at all? */
-		panic("bcopytest: Whoops...  no memory for source\n");
-	}
-
-	db_printf("Source at %08X; Sink at %08X\n", source, sink);
-	
-	srcptr = (void *)&source[0];
-	snkptr = (void *)&sink[0];
-	
-#if 1
-	db_printf("Testing non-overlap case; source bndry = 0 to 7F; sink bndry = 0 - 7F; lgn = 1 to 256\n");
-	for(bsrc = 0; bsrc < 128; bsrc++) {			/* Step the source by 1 */
-		for(bsnk = 0; bsnk < 128; bsnk++) {		/* Step the sink by 1 */
-			for(size = 1; size <= 256; size++) {	/* Step the size by 1 */
-			
-				clrarea(source, sink);						/* Reset source and clear sink */
-				if(size == 255) {
-					dbg = 99;
-				}
-				if(tstcopy((void *)((unsigned int)srcptr + bsrc), (void *)((unsigned int)snkptr + bsnk), size)) {	
-					db_printf("Test failed; source = %02X; sink = %02X; length = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-	db_printf("Non-overlap test complete\n");
-#endif
-
-
-#if 1	
-	db_printf("Testing overlap\n");
-	for(bsrc = 1; bsrc < 128; bsrc++) {			/* Step the source by 1 */
-		for(bsnk = 0; bsnk < 128; bsnk++) {		/* Step the sink by 1 */
-			for(size = 1; size <= 256; size++) {	/* Step the size by 1 */
-			
-				clrarea2(source, sink);						/* Reset source and clear sink */
-				if(bsrc < bsnk) {
-					dbg = 88;
-				}
-				else {
-					dbg = 99;
-				}
-				if(tstcopy2((void *)((unsigned int)srcptr + bsrc), (void *)((unsigned int)srcptr + bsnk), size)) {	
-					db_printf("Test failed; source = %02X; sink = %02X; length = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-	db_printf("Overlap test complete\n");
-#endif
-
-#if 1
-	db_printf("Starting exhaustive tests\n");
-	for(i = 0; i < 262144 * 4; i++) {		/* Set all 1MB of source and dest to known pattern */
-		((unsigned char *)srcptr)[i] = i % patper;	/* Make a non-power-of-two length pattern */
-		((unsigned char *)snkptr)[i] = i % patper;	/* Make a non-power-of-two length pattern */
-	}
-
-	db_printf("No overlap; source < sink, length = 0 to 1023\nSource =");
-
-#if 1
-	for(bsrc = 0; bsrc < 128; bsrc++) {				/* Step source by 1 */
-		db_printf(" %3d", bsrc);					/* Show where we're at */
-		for(bsnk = 0; bsnk < 128; bsnk++) {			/* Step sink by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsrc); 			/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk + 2048);	/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, sink = %3d size = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("No overlap; source > sink, length = 0 to 1023\nSource =");
-
-#if 1
-	for(bsrc = 0; bsrc < 128; bsrc++) {				/* Step source by 1 */
-		db_printf(" %3d", bsrc);					/* Show where we're at */
-		for(bsnk = 0; bsnk < 128; bsnk++) {			/* Step sink by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsrc + 2048);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk);			/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, sink = %3d size = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("Overlap; source = sink + N (N = 0 to 127), length = 0 to 1023\nN =");
-
-#if 1
-	for(n = 0; n < 128; n++) {						/* Step n by 1 */
-		db_printf(" %3d", n);					/* Show where we're at */
-		for(bsnk = 0; bsnk < 128; bsnk++) {			/* Step sink by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk + n);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk);		/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, sink = %3d size = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("Overlap; source + N = sink (N = 0 to 127), length = 0 to 1023\nSource =");
-
-#if 1
-	for(bsrc = 0; bsrc < 128; bsrc++) {				/* Step source by 1 */
-		db_printf(" %3d", bsrc);					/* Show where we're at */
-		for(n = 0; n < 128; n++) {					/* Step N by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk + n);	/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, n = %3d size = %d\n", bsrc, n, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-				
-	db_printf("\n");
-	db_printf("Overlap; source = sink + N + 128 (N = 0 to 127), length = 0 to 1023\nN =");
-
-#if 1
-	for(n = 0; n < 128; n++) {						/* Step n by 1 */
-		db_printf(" %3d", n);					/* Show where we're at */
-		for(bsnk = 0; bsnk < 128; bsnk++) {			/* Step sink by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk + n + 128);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk);		/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, sink = %3d size = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("Overlap; source + N + 128 = sink (N = 0 to 127), length = 0 to 1023\nSource =");
-
-#if 1
-	for(bsrc = 0; bsrc < 128; bsrc++) {				/* Step source by 1 */
-		db_printf(" %3d", bsrc);					/* Show where we're at */
-		for(n = 0; n < 128; n++) {					/* Step N by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk + n + 128);	/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, n = %3d size = %d\n", bsrc, n, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("Overlap; source = sink + N + 256 (N = 0 to 127), length = 0 to 1023\nSource =");
-
-#if 1
-	for(n = 0; n < 128; n++) {						/* Step n by 1 */
-		db_printf(" %3d", n);					/* Show where we're at */
-		for(bsnk = 0; bsnk < 128; bsnk++) {			/* Step sink by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk + n + 256);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk);		/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, sink = %3d size = %d\n", bsrc, bsnk, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-	db_printf("\n");
-	db_printf("Overlap; source + N + 256 = sink (N = 0 to 127), length = 0 to 1023\nSource =");
-#if 1
-	for(bsrc = 0; bsrc < 128; bsrc++) {				/* Step source by 1 */
-		db_printf(" %3d", bsrc);					/* Show where we're at */
-		for(n = 0; n < 128; n++) {					/* Step N by 1 */
-			for(size = 0; size < 1025; size++) {	/* Step size from 0 to 1023 */				
-				asrc = (void *)((unsigned int)srcptr + bsnk);	/* Start byte address */
-				asnk = (void *)((unsigned int)srcptr + bsnk + n + 256);	/* End byte address */
-				ret = tstcopy5(asrc, asnk, size);	/* Copy and validate */
-				if(ret) {	
-					db_printf("\nTest failed - source = %3d, n = %3d size = %d\n", bsrc, n, size);
-					db_printf("failed\n");
-				}
-			}
-		}
-	}
-#endif
-				
-
-
-
-
-
-#endif
-	
-#if 0
-	iterations = 1000;
-	tottime = 0;
-	totbytes = 0;
-	
-	db_printf("Random test starting; iterations = %d\n", iterations);
-	for(i = 0; i < 262144 * 4; i++) {		/* Clear all 2MB of source (and dest for this test) */
-		((unsigned char *)srcptr)[i] = i & 255;
-	}
-	
-	for(i = 0; i < iterations; i++) {			/* Test until we are done */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsrc = makerand & 0x0007FFFF;			/* Generate source */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsnk = makerand & 0x0007FFFF;			/* Generate sink */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		size = makerand & 0x0007FFFF;			/* Generate length */
-#if 1
-		db_printf("rt %7d: src = %08X; sink = %08X; length = %7d\n", i, ((unsigned int)srcptr + bsrc),
-			((unsigned int)srcptr + bsnk), size);
-#endif
-
-		asrc = (void *)((unsigned int)srcptr + bsrc); 
-		asnk = (void *)((unsigned int)srcptr + bsnk); 
-		timein = gtick();
-		ret = tstcopy3(asrc, asnk, size);
-		timeout = gtick();
-		if(ret) {	
-			db_printf("Test failed; source = %02X; sink = %02X; length = %d\n", bsrc, bsnk, size);
-			db_printf("failed\n");
-	
-		}
-		ticks = timeout - timein;				/* Get time in ticks for copy */
-		tottime += ticks;
-		totbytes += size;
-		
-		rate = (double) totbytes / (double)tottime;	/* Get bytes per tick */ 
-//		rate = rate * (double)11250000.0;				/* Bytes per second */
-//		rate = rate * (double)16500000.0;				/* Bytes per second */
-		rate = rate * (double)tbfreq;					/* Bytes per second */
-		rate = rate / (double)1000000.0;				/* Get number of MBs */
-		
-		db_printf("Total bytes = %lld; total time = %lld; rate = %f10\n", totbytes, tottime, rate);
-		
-	}
-#endif
-
-
-	
-#if 0
-	iterations = 100;
-	tottime = 0;
-	totbytes = 0;
-	
-	db_printf("Random test starting; iterations = %d\n", iterations);
-	for(i = 0; i < 262144 * 4; i++) {		/* Clear all 2MB of source (and dest for this test) */
-		((unsigned char *)srcptr)[i] = i & 255;
-	}
-	
-	for(i = 0; i < iterations; i++) {			/* Test until we are done */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsrc = makerand & 0x0007FFFF;			/* Generate source */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsnk = makerand & 0x0007FFFF;			/* Generate sink */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		size = makerand & 0x0007FFFF;			/* Generate length */
-#if 1
-		db_printf("rt %7d: src = %08X; sink = %08X; length = %7d\n", i, ((unsigned int)srcptr + bsrc),
-			((unsigned int)srcptr + bsnk), size);
-#endif
-
-		asrc = (void *)((unsigned int)srcptr + bsrc); 
-		asnk = (void *)((unsigned int)srcptr + bsnk); 
-		timein = gtick();
-		ret = tstcopy4(asrc, asnk, size);
-		timeout = gtick();
-		if(ret) {	
-			db_printf("Test failed; source = %02X; sink = %02X; length = %d\n", bsrc, bsnk, size);
-			db_printf("failed\n");
-	
-		}
-		ticks = timeout - timein;				/* Get time in ticks for copy */
-		tottime += ticks;
-		totbytes += size;
-		
-		rate = (double) totbytes / (double)tottime;	/* Get bytes per tick */ 
-//		rate = rate * (double)11250000.0;				/* Bytes per second */
-//		rate = rate * (double)16500000.0;				/* Bytes per second */
-		rate = rate * (double)tbfreq;					/* Bytes per second */
-		rate = rate / (double)1000000.0;				/* Get number of MBs */
-		
-		db_printf("Total bytes = %lld; total time = %lld; rate = %f10\n", totbytes, tottime, rate);
-		
-	}
-#endif
-	
-#if 0
-	iterations = 100;
-	tottime = 0;
-	totbytes = 0;
-	
-	db_printf("Random test starting; iterations = %d\n", iterations);
-	for(i = 0; i < 262144 * 4; i++) {		/* Clear all 2MB of source (and dest for this test) */
-		((unsigned char *)srcptr)[i] = i & 255;
-	}
-	
-	for(i = 0; i < iterations; i++) {			/* Test until we are done */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsrc = makerand & 0x0007FFFF;			/* Generate source */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		bsnk = makerand & 0x0007FFFF;			/* Generate sink */
-		makerand = rand() << 16 | (rand() & 0x0000FFFF);
-		size = makerand & 0x0007FFFF;			/* Generate length */
-#if 1
-		db_printf("rt %7d: src = %08X; sink = %08X; length = %7d\n", i, ((unsigned int)srcptr + bsrc),
-			((unsigned int)srcptr + bsnk), size);
-#endif
-
-		asrc = (void *)((unsigned int)srcptr + bsrc); 
-		asnk = (void *)((unsigned int)srcptr + bsnk); 
-		timein = gtick();
-		ret = dumbcopy(asrc, asnk, size);
-		timeout = gtick();
-		if(ret) {	
-			db_printf("Test failed; source = %02X; sink = %02X; length = %d\n", bsrc, bsnk, size);
-			db_printf("failed\n");
-	
-		}
-		ticks = timeout - timein;				/* Get time in ticks for copy */
-		tottime += ticks;
-		totbytes += size;
-		
-		rate = (double) totbytes / (double)tottime;	/* Get bytes per tick */ 
-		rate = rate * (double)tbfreq;				/* Bytes per second */
-		rate = rate / (double)1000000.0;			/* Get number of MBs */
-		
-		db_printf("Total bytes = %lld; total time = %lld; rate = %f10\n", totbytes, tottime, rate);
-		
-	}
-#endif
-	
-	kmem_free(kernel_map, (vm_offset_t) sink, (1024*1024)+4096);	/* Release this mapping block */
-	kmem_free(kernel_map, (vm_offset_t) source, (1024*1024)+4096);	/* Release this mapping block */
-	
-	if(dbg == 22) db_printf("Gabbagoogoo\n");
-	return;
-}
-
-void clrarea(unsigned int *source, unsigned int *sink) {
-
-	unsigned int i;
-	
-	for(i=0; i < 1024; i++) {		/* Init source & sink */
-		source[i]	= 0x55555555;	/* Known pattern */
-		sink[i] 	= 0xAAAAAAAA;	/* Known pattern */
-	}
-	return;
-}
-
-void
-clrarea2(unsigned int *source, __unused unsigned int *sink)
-{
-	unsigned int i;
-	unsigned char *ss;
-	
-	ss = (unsigned char *)&source[0];
-	
-	for(i=0; i < 1024 * 4; i++) {	/* Init source/sink */
-		ss[i] = i & 0xFF;			/* Known pattern */
-	}
-	return;
-}
-
-int tstcopy(void *src, void *snk, unsigned int lgn) {
-
-	unsigned int i, crap;
-	
-	bcopy(src, snk, lgn);
-	
-	for(i = 0; i < lgn; i++) {
-		if(((unsigned char *)snk)[i] != 0x55) {
-			crap = (unsigned int)&((unsigned char *)snk)[i];
-			db_printf("bad copy at sink[%d] (%08X) it is %02X\n", i,crap, ((unsigned char *)snk)[i]);
-			return 1;
-		}
-	}
-	if(((unsigned char *)snk)[lgn] != 0xAA) {	/* Is it right? */
-		crap = (unsigned int)&((unsigned char *)snk)[i];
-		db_printf("Copied too far at sink[%d] (%08X) it is %02X\n", i, crap, ((unsigned char *)snk)[lgn]);
-		return 1;
-	}
-	return 0;
-
-}
-
-int tstcopy2(void *src, void *snk, unsigned int lgn) {
-
-	unsigned int i, crap;
-	unsigned char ic, ec;
-	
-	ic = ((unsigned char *)src)[0];
-	ec = ((unsigned char *)snk)[lgn];
-	
-	bcopy(src, snk, lgn);
-	
-	for(i = 0; i < lgn; i++) {
-		if(((unsigned char *)snk)[i] != ic) {
-			crap = (unsigned int)&((unsigned char *)snk)[i];
-			db_printf("bad copy at sink[%d] (%08X) it is %02X\n", i,crap, ((unsigned char *)snk)[i]);
-			return 1;
-		}
-		ic = (ic + 1) & 0xFF;
-	}
-	
-	if(((unsigned char *)snk)[lgn] != ec) {	/* Is it right? */
-		crap = (unsigned int)&((unsigned char *)snk)[i];
-		db_printf("Copied too far at sink[%d] (%08X) it is %02X\n", i, crap, ((unsigned char *)snk)[lgn]);
-		return 1;
-	}
-	return 0;
-
-}
-
-int tstcopy3(void *src, void *snk, unsigned int lgn) {
-
-	unsigned int i, crap;
-	unsigned char ic, ec, oic;
-	
-	oic = ((unsigned char *)snk)[0];
-	ic = ((unsigned char *)src)[0];
-	ec = ((unsigned char *)snk)[lgn];
-	
-	bcopy(src, snk, lgn);
-	
-	for(i = 0; i < lgn; i++) {
-		if(((unsigned char *)snk)[i] != ic) {
-			crap = (unsigned int)&((unsigned char *)snk)[i];
-			db_printf("bad copy at sink[%d] (%08X) it is %02X\n", i ,crap, ((unsigned char *)snk)[i]);
-			return 1;
-		}
-		ic = (ic + 1) & 0xFF;
-	}
-	
-	if(((unsigned char *)snk)[lgn] != ec) {	/* Is it right? */
-		crap = (unsigned int)&((unsigned char *)snk)[i];
-		db_printf("Copied too far at sink[%d] (%08X) it is %02X\n", i, crap, ((unsigned char *)snk)[lgn]);
-		return 1;
-	}
-
-	for(i=0; i < lgn; i++) {	/* Restore pattern */
-		((unsigned char *)snk)[i] = oic;		
-		oic = (oic + 1) & 0xFF;
-	}
-
-	return 0;
-
-}
-
-int tstcopy4(void *src, void *snk, unsigned int lgn) {
-	
-	bcopy(src, snk, lgn);
-	return 0;
-
-}
-
-int tstcopy5(void *src, void *snk, unsigned int lgn) {
-
-	unsigned int i = 0, crap;
-	unsigned char ic, ec, oic, pc;
-	
-	oic = ((unsigned char *)snk)[0];				/* Original first sink character */
-	ic = ((unsigned char *)src)[0];					/* Original first source character */
-	ec = ((unsigned char *)snk)[lgn];				/* Original character just after last sink character */
-	pc = ((unsigned char *)snk)[-1];				/* Original character just before sink */
-	
-	bcopy(src, snk, lgn);
-	
-	if(((unsigned char *)snk)[lgn] != ec) {			/* Did we copy too far forward? */
-		crap = (unsigned int)&((unsigned char *)snk)[i];
-		db_printf("Copied too far at sink[%d] (%08X) it is %02X\n", i, crap, ((unsigned char *)snk)[lgn]);
-		return 1;
-	}
-
-	if(((unsigned char *)snk)[-1] != pc) {			/* Did we copy too far backward? */
-		crap = (unsigned int)&((unsigned char *)snk)[i];
-		db_printf("Copied too far at sink[%d] (%08X) it is %02X\n", i, crap, ((unsigned char *)snk)[lgn]);
-		return 1;
-	}
-
-	for(i = 0; i < lgn; i++) {						/* Check sink byte sequence */
-		if(((unsigned char *)snk)[i] != ic) {
-			crap = (unsigned int)&((unsigned char *)snk)[i];
-			db_printf("bad copy at sink[%d] (%08X) it is %02X\n", i ,crap, ((unsigned char *)snk)[i]);
-			return 1;
-		}
-		ic = (ic + 1) % patper;
-	}
-
-	for(i=0; i < lgn; i++) {	/* Restore pattern */
-		((unsigned char *)snk)[i] = oic;		
-		oic = (oic + 1) % patper;
-	}
-
-	return 0;
-
-}
-
-int dumbcopy(void *src, void *snk, unsigned int lgn) {
-	unsigned int i;
-	char *p = (char *)snk;
-	char *q = (char *)src;
-	
-	for(i = 0; i < lgn; i++) {
-		*p++ = *q++;
-	}
-	return 0;
-
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/osfmk/ppc/bits.s b/osfmk/ppc/bits.s
deleted file mode 100644
index d8d5960d5..000000000
--- a/osfmk/ppc/bits.s
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-
-#	
-# void setbit(int bitno, int *s)
-# 
-# Set indicated bit in bit string.
-#     Note:	being big-endian, bit 0 is 0x80000000.
-	
-ENTRY(setbit,TAG_NO_FRAME_USED)
-
-	rlwinm		r8,r3,29,3,31		/* Get byte displacement */
-	rlwinm		r9,r3,0,29,31		/* Get bit within byte */
-	li			r6,0x80				/* Start with bit 0 */
-	lbzx		r5,r4,r8			/* Grab target byte */
-	srw			r6,r6,r9			/* Get the right bit (fits right into the load cycle) */
-	or			r5,r5,r6			/* Turn on the right bit */
-	stbx		r5,r4,r8			/* Save the byte back */
-	blr	
-	
-#	
-# void clrbit(int bitno, int *s)
-# 
-# Clear indicated bit in bit string.
-#     Note:	being big-endian, bit 0 is 0x80000000.
-	
-ENTRY(clrbit,TAG_NO_FRAME_USED)
-
-	rlwinm		r8,r3,29,3,31		/* Get byte displacement */
-	rlwinm		r9,r3,0,29,31		/* Get bit within byte */
-	li			r6,0x80				/* Start with bit 0 */
-	lbzx		r5,r4,r8			/* Grab target byte */
-	srw			r6,r6,r9			/* Get the right bit (fits right into the load cycle) */
-	andc		r5,r5,r6			/* Turn off the right bit */
-	stbx		r5,r4,r8			/* Save the byte back */
-	blr	
-
-
-# /*
-#  * Find first bit set in bit string.
-#  */
-# int
-# ffsbit(int *s)
-#
-# Returns the bit index of the first bit set (starting from 0)
-# Assumes pointer is word-aligned
-
-ENTRY(ffsbit, TAG_NO_FRAME_USED)
-	lwz	r0,	0(ARG0)
-		mr	ARG1,	ARG0	/* Free up ARG0 for result */
-
-	cmpwi	r0,	0		/* Check against zero... */
-		cntlzw	ARG0,	r0	/* Free inst... find the set bit... */
-	bnelr+				/* Return if bit in first word */
-
-.L_ffsbit_lp:
-	lwz	r0,	4(ARG1)
-	addi	ARG1,	ARG1,	4
-	cmpwi	r0,	0		/* Check against zero... */
-		cntlzw	r12,	r0
-		add	ARG0,	ARG0,	r12	/* ARG0 keeps bit count */
-	beq+	.L_ffsbit_lp
-	blr
-	
-/*
- * int tstbit(int bitno, int *s)
- *
- * Test indicated bit in bit string.
- *	Note:	 being big-endian, bit 0 is 0x80000000.
- */
-
-ENTRY2(tstbit, testbit, TAG_NO_FRAME_USED)
-
-	rlwinm		r8,r3,29,3,31		/* Get byte displacement */
-	rlwinm		r9,r3,0,29,31		/* Get bit within byte */
-	lbzx		r5,r4,r8			/* Grab target byte */
-	addi		r9,r9,25			/* Get actual shift value */
-	rlwnm		r3,r5,r9,31,31		/* Pass the bit back */
-	blr	
diff --git a/osfmk/ppc/boot.h b/osfmk/ppc/boot.h
deleted file mode 100644
index 9d3e885ee..000000000
--- a/osfmk/ppc/boot.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include	<pexpert/ppc/boot.h>
diff --git a/osfmk/ppc/bzero.s b/osfmk/ppc/bzero.s
deleted file mode 100644
index 0dbd810b4..000000000
--- a/osfmk/ppc/bzero.s
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <ppc/asm.h>
-#include <ppc/exception.h>
-#include <assym.s>
-
-        .text
-        .align	2
-        .globl	_memset
-        .globl	_bzero
-        .globl	_bzero_nc
-        .globl	_bzero_phys
-        .globl	_bzero_phys_nc
-
-
-// *****************************
-// * B Z E R O _ P H Y S _ N C *
-// *****************************
-//
-// void bzero_phys_nc(addr64_t phys_addr, uint32_t length);
-//
-// Takes a phys addr in (r3,r4), and length in r5.  NO CACHING
-
-        .align	5
-LEXT(bzero_phys_nc)
-        mflr	r12				// save return address
-        rlwinm	r3,r3,0,1,0		// coallesce long-long in (r3,r4) into reg64_t in r3
-        rlwimi	r3,r4,0,0,31
-        mr		r4,r5			// put length where bzero() expects it
-        bl		EXT(ml_set_physical_get_ffs)	// turn DR off, SF on, features in cr6, old MSR in r11
-        bl		EXT(bzero_nc)		// use normal bzero() routine
-        mtlr	r12				// restore return
-        b		EXT(ml_restore)		// restore MSR, turning DR on and SF off
-        
-
-// ***********************
-// * B Z E R O _ P H Y S *
-// ***********************
-//
-// void bzero_phys(addr64_t phys_addr, uint32_t length);
-//
-// Takes a phys addr in (r3,r4), and length in r5.  We leave cache on.
-
-        .align	5
-LEXT(bzero_phys)
-        mflr	r12				// save return address
-        rlwinm	r3,r3,0,1,0		// coallesce long-long in (r3,r4) into reg64_t in r3
-        rlwimi	r3,r4,0,0,31
-        mr		r4,r5			// put length where bzero() expects it
-        bl		EXT(ml_set_physical_get_ffs)	// turn DR off, SF on, features in cr6, old MSR in r11
-        bl		EXT(bzero)		// use normal bzero() routine
-        mtlr	r12				// restore return
-        b		EXT(ml_restore)		// restore MSR, turning DR on and SF off
-        
-
-// *******************
-// * B Z E R O _ N C *
-// *******************
-//
-//	void bzero_nc(char	*addr, unsigned int length);
-//
-// For use with uncached memory.  Doesn't seem to be used at all, so probably not
-// performance critical.  NB: we must avoid unaligned stores, because some
-// machines (eg, 970) take alignment exceptions on _any_ unaligned op to uncached
-// memory.  Of course, we must also avoid dcbz.
-
-LEXT(bzero_nc)
-        cmplwi	cr1,r4,20		// too short to bother with 16-byte loops?
-        cmplwi	cr7,r4,0		// check for (len==0)
-        li		r6,0			// get a 0
-        bge		cr1,bznc1		// skip if length >=20
-        mtctr	r4				// set up byte loop
-        beqlr--	cr7				// done if len=0
-        
-// Short operands, loop over bytes.
-
-bznc0:
-        stb		r6,0(r3)
-        addi	r3,r3,1
-        bdnz	bznc0
-        blr
-        
-// Handle operands long enough to do doubleword stores; we must doubleword
-// align, to avoid alignment exceptions.
-
-bznc1:
-        neg		r7,r3			// start to compute #bytes to align
-        mfsprg	r10,2			// get feature flags
-        andi.	r0,r7,7			// get #bytes to doubleword align
-        mr		r5,r3			// make copy of operand ptr as bcopy expects
-        mtcrf	0x02,r10		// put pf64Bitb etc in cr6
-        beq		bzero_tail		// already doubleword aligned
-        sub		r4,r4,r0		// adjust count
-        mtctr	r0				// set up loop
-bznc2:							// zero bytes until doubleword aligned
-        stb		r6,0(r5)
-        addi	r5,r5,1
-        bdnz	bznc2
-        b		bzero_tail		// join bzero, now that r5 is aligned
-        
-
-// *************     ***************
-// * B Z E R O * and * M E M S E T *
-// *************     ***************
-//
-// void *   memset(void *b, int c, size_t len);
-// void		bzero(void *b, size_t len);
-//
-// These routines support G3, G4, and the 970, and run in both 32 and
-// 64-bit mode.  Lengths (size_t) are always 32 bits.
-//
-// Register use:
-//    r0 = temp
-//    r2 = temp
-//    r3 = original ptr, not changed since memset returns it
-//    r4 = count of bytes to set
-//    r5 = working operand ptr ("rp")
-//    r6 = value to store (usually 0)
-// r7-r9 = temps
-//   r10 = feature flags
-//   r11 = old MSR (if bzero_phys)
-//   r12 = return address (if bzero_phys)
-//   cr6 = feature flags (pf64Bit, pf128Byte, and pf32Byte)
-
-        .align	5
-LEXT(memset)					// void *   memset(void *b, int c, size_t len);
-        andi.	r6,r4,0xFF		// copy value to working register, test for 0
-        mr		r4,r5			// move length to working register
-        bne--	memset1			// skip if nonzero
-LEXT(bzero)						// void	bzero(void *b, size_t len);
-        dcbtst	0,r3			// touch in 1st cache block
-        mfsprg	r10,2			// get features
-        li		r6,0			// get a 0
-        neg		r7,r3			// start to compute #bytes to align
-        andi.	r0,r10,pf128Byte+pf32Byte // get cache line size
-        mtcrf	0x02,r10		// put pf128Byte etc in cr6
-        cmplw	r4,r0			// operand length >= cache line size?
-        mr		r5,r3			// make copy of operand ptr (can't change r3)
-        blt		bzero_tail		// too short for dcbz (or dcbz128)
-        rlwinm	r0,r7,0,0x1F	// get #bytes to  32-byte align
-        rlwinm	r9,r7,0,0x7F	// get #bytes to 128-byte align
-        bt++	pf128Byteb,bzero_128 // skip if 128-byte processor
-
-// Operand length >=32 and cache line size is 32.
-//		r0 = #bytes to 32-byte align
-//		r4 = length
-//		r5 = ptr to operand
-//		r6 = 0
-
-        sub		r2,r4,r0		// adjust length
-        cmpwi	cr1,r0,0		// already 32-byte aligned?
-        srwi.	r8,r2,5			// get #32-byte chunks
-        beq		bzero_tail		// not long enough to dcbz
-        mtctr	r8				// set up loop count
-        rlwinm	r4,r2,0,27,31	// mask down to leftover byte count
-        beq		cr1,bz_dcbz32 	// skip if already 32-byte aligned
-        
-// 32-byte align.  We just store 32 0s, rather than test and use conditional
-// branches.  This is usually faster, because there are no mispredicts.
-
-        stw		r6,0(r5)		// zero next 32 bytes
-        stw		r6,4(r5)
-        stw		r6,8(r5)
-        stw		r6,12(r5)
-        stw		r6,16(r5)
-        stw		r6,20(r5)
-        stw		r6,24(r5)
-        stw		r6,28(r5)
-        add		r5,r5,r0		// now r5 is 32-byte aligned
-        b		bz_dcbz32
-
-// Loop doing 32-byte version of DCBZ instruction.
-
-        .align	4				// align the inner loop
-bz_dcbz32:
-        dcbz	0,r5			// zero another 32 bytes
-        addi	r5,r5,32
-        bdnz	bz_dcbz32
-
-// Store trailing bytes.  This routine is used both by bzero and memset.
-//		r4 = #bytes to store (may be large if memset)
-//		r5 = address
-//		r6 = value to store (in all 8 bytes)
-//     cr6 = pf64Bit etc flags
-
-bzero_tail:
-        srwi.	r0,r4,4			// get #(16-byte-chunks)
-        mtcrf	0x01,r4			// remaining byte count to cr7
-        beq		bzt3			// no 16-byte chunks
-        mtctr	r0				// set up loop count
-        bt++	pf64Bitb,bzt2	// skip if 64-bit processor
-        b		bzt1
-        .align	5
-bzt1:							// loop over 16-byte chunks on 32-bit processor
-        stw		r6,0(r5)
-        stw		r6,4(r5)
-        stw		r6,8(r5)
-        stw		r6,12(r5)
-        addi	r5,r5,16
-        bdnz	bzt1
-        b		bzt3
-        .align	5
-bzt2:							// loop over 16-byte chunks on 64-bit processor
-        std		r6,0(r5)
-        std		r6,8(r5)
-        addi	r5,r5,16
-        bdnz	bzt2
-        bf		28,bzt4			// 8-byte chunk?
-        std		r6,0(r5)
-        addi	r5,r5,8
-        b		bzt4
-bzt3:
-        bf		28,bzt4			// 8-byte chunk?
-        stw		r6,0(r5)
-        stw		r6,4(r5)
-        addi	r5,r5,8
-bzt4:
-        bf		29,bzt5			// word?
-        stw		r6,0(r5)
-        addi	r5,r5,4
-bzt5:
-        bf		30,bzt6			// halfword?
-        sth		r6,0(r5)
-        addi	r5,r5,2
-bzt6:
-        bflr	31				// byte?
-        stb		r6,0(r5)
-        blr
-        
-// Operand length is >=128 and cache line size is 128. We assume that
-// because the linesize is 128 bytes, this is a 64-bit processor.
-//		r4 = length
-//		r5 = ptr to operand
-//		r6 = 0
-//		r7 = neg(r5)
-//		r9 = #bytes to 128-byte align
-
-        .align	5
-bzero_128:
-        sub		r2,r4,r9		// r2 <- length remaining after cache-line aligning
-        rlwinm	r0,r7,0,0xF		// r0 <- #bytes to 16-byte align
-        srwi.	r8,r2,7			// r8 <- number of cache lines to 0
-        std		r6,0(r5)		// always store 16 bytes to 16-byte align...
-        std		r6,8(r5)		// ...even if too short for dcbz128
-        add		r5,r5,r0		// 16-byte align ptr
-        sub		r4,r4,r0		// adjust count
-        beq		bzero_tail		// r8==0, not long enough to dcbz128
-        sub.	r7,r9,r0		// get #bytes remaining to 128-byte align
-        rlwinm	r4,r2,0,0x7F	// r4 <- length remaining after dcbz128'ing
-        mtctr	r8				// set up dcbz128 loop
-        beq		bz_dcbz128		// already 128-byte aligned
-        b		bz_align		// enter loop over 16-byte chunks
-
-// 128-byte align by looping over 16-byte chunks.
-        
-        .align	5
-bz_align:						// loop over 16-byte chunks
-        subic.	r7,r7,16		// more to go?
-        std		r6,0(r5)
-        std		r6,8(r5)
-        addi	r5,r5,16
-        bgt		bz_align
-        
-        b		bz_dcbz128		// enter dcbz128 loop
-        
-// Loop over 128-byte cache lines.
-//		r4 = length remaining after cache lines (0..127)
-//		r5 = ptr (128-byte aligned)
-//		r6 = 0
-//		ctr = count of cache lines to 0
-
-        .align	5
-bz_dcbz128:
-        dcbz128	0,r5			// zero a 128-byte cache line
-        addi	r5,r5,128
-        bdnz	bz_dcbz128
-        
-        b		bzero_tail		// handle leftovers
-
-
-// Handle memset() for nonzero values.  This case is relatively infrequent;
-// the large majority of memset() calls are for 0.
-//		r3 = ptr
-//		r4 = count
-//		r6 = value in lower byte (nonzero)
-
-memset1:
-        cmplwi	r4,16			// too short to bother aligning?
-        rlwimi	r6,r6,8,16,23	// replicate value to low 2 bytes
-        mr		r5,r3			// make working copy of operand ptr
-        rlwimi	r6,r6,16,0,15	// value now in all 4 bytes
-        blt		bzero_tail		// length<16, we won't be using "std"
-        mfsprg	r10,2			// get feature flags
-        neg		r7,r5			// start to compute #bytes to align
-        rlwinm	r6,r6,0,1,0		// value now in all 8 bytes (if 64-bit)
-        andi.	r0,r7,7			// r6 <- #bytes to doubleword align
-        stw		r6,0(r5)		// store 8 bytes to avoid a loop
-        stw		r6,4(r5)
-        mtcrf	0x02,r10		// get pf64Bit flag etc in cr6
-        sub		r4,r4,r0		// adjust count
-        add		r5,r5,r0		// doubleword align ptr
-        b		bzero_tail
-        
-        
-
diff --git a/osfmk/ppc/cache.s b/osfmk/ppc/cache.s
deleted file mode 100644
index 94aa0aeeb..000000000
--- a/osfmk/ppc/cache.s
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <assym.s>
-
-/* These routines run in 32 or 64-bit addressing, and handle
- * 32 and 128 byte caches.  They do not use compare instructions
- * on addresses, since compares are 32/64-bit-mode-specific.
- */
-
-#define	kDcbf			0x1
-#define	kDcbfb			31
-#define	kDcbi			0x2
-#define	kDcbib			30
-#define	kIcbi			0x4
-#define	kIcbib			29
-
-
-/*
- * extern void flush_dcache(vm_offset_t addr, unsigned count, boolean phys);
- * extern void flush_dcache64(addr64_t addr, unsigned count, boolean phys);
- *
- * flush_dcache takes a virtual or physical address and count to flush
- * and (can be called for multiple virtual pages).
- *
- * it flushes the data cache
- * cache for the address range in question
- *
- * if 'phys' is non-zero then physical addresses will be used
- */
-
-
- 
-        .text
-        .align	5
-        .globl	_flush_dcache
-_flush_dcache:
-        li		r0,kDcbf					// use DCBF instruction
-        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
-        b		cache_op_join				// join common code
-
-        .align	5
-        .globl	_flush_dcache64
-_flush_dcache64:
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-		li		r0,kDcbf					// use DCBF instruction
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		mr		r4,r5						; Move count
-		mr		r5,r6						; Move physical flag
-        b		cache_op_join				// join common code
-
-
-/*
- * extern void invalidate_dcache(vm_offset_t va, unsigned count, boolean phys);
- * extern void invalidate_dcache64(addr64_t va, unsigned count, boolean phys);
- *
- * invalidate_dcache takes a virtual or physical address and count to
- * invalidate and (can be called for multiple virtual pages).
- *
- * it invalidates the data cache for the address range in question
- */
- 
-        .globl	_invalidate_dcache
-_invalidate_dcache:
-        li		r0,kDcbi					// use DCBI instruction
-        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
-        b		cache_op_join				// join common code
-
-
-        .align	5
-        .globl	_invalidate_dcache64
-_invalidate_dcache64:
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-        li		r0,kDcbi					// use DCBI instruction
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		mr		r4,r5						; Move count
-		mr		r5,r6						; Move physical flag
-        b		cache_op_join				// join common code
-
-/*
- * extern void invalidate_icache(vm_offset_t addr, unsigned cnt, boolean phys);
- * extern void invalidate_icache64(addr64_t addr, unsigned cnt, boolean phys);
- *
- * invalidate_icache takes a virtual or physical address and
- * count to invalidate, (can be called for multiple virtual pages).
- *
- * it invalidates the instruction cache for the address range in question.
- */
- 
-        .globl	_invalidate_icache
-_invalidate_icache:
-        li		r0,kIcbi					// use ICBI instruction
-        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
-        b		cache_op_join				// join common code
-        
-
-        .align	5
-        .globl	_invalidate_icache64
-_invalidate_icache64:
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-        li		r0,kIcbi					// use ICBI instruction
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		mr		r4,r5						; Move count
-		mr		r5,r6						; Move physical flag
-        b		cache_op_join				// join common code
-                        
-/*
- * extern void sync_ppage(ppnum_t pa);
- *
- * sync_ppage takes a physical page number
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
- */
-
-        .globl	_sync_ppage
-        .align	5
-_sync_ppage:								// Should be the most commonly called routine, by far 
-		mfsprg	r2,2
-        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
-		mtcrf	0x02,r2						; Move pf64Bit to cr6
-        li		r5,1						// set flag for physical addresses
-		li		r4,4096						; Set page size
-		bt++	pf64Bitb,spp64				; Skip if 64-bit (only they take the hint)
-        rlwinm	r3,r3,12,0,19				; Convert to physical address - 32-bit
-        b		cache_op_join				; Join up....
-        
-spp64:	sldi	r3,r3,12					; Convert to physical address - 64-bit        
-        b		cache_op_join				; Join up....
-                        
-
-
-/*
- * extern void sync_cache_virtual(vm_offset_t addr, unsigned count);
- *
- * Like "sync_cache", except it takes a virtual address and byte count.
- * It flushes the data cache, invalidates the I cache, and sync's.
- */
- 
-        .globl	_sync_cache_virtual
-        .align	5
-_sync_cache_virtual:
-        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
-        li		r5,0						// set flag for virtual addresses
-        b		cache_op_join				// join common code
-        
-                        
-/*
- * extern void sync_cache(vm_offset_t pa, unsigned count);
- * extern void sync_cache64(addr64_t pa, unsigned count);
- *
- * sync_cache takes a physical address and count to sync, thus
- * must not be called for multiple virtual pages.
- *
- * it writes out the data cache and invalidates the instruction
- * cache for the address range in question
- */
-
-        .globl	_sync_cache
-        .align	5
-_sync_cache:
-        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
-        li		r5,1						// set flag for physical addresses
-        rlwinm	r3,r3,0,0,31				// truncate address in case this is a 64-bit machine
-        b		cache_op_join				// join common code
-
-        .globl	_sync_cache64
-        .align	5
-_sync_cache64: 
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-        li		r0,kDcbf+kIcbi				// we need to dcbf and then icbi
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-       	mr		r4,r5						; Copy over the length
-        li		r5,1						// set flag for physical addresses
-
-        
-        // Common code to handle the cache operations.
-
-cache_op_join:								// here with r3=addr, r4=count, r5=phys flag, r0=bits
-        mfsprg	r10,2						// r10 <- processor feature flags
-        cmpwi	cr5,r5,0					// using physical addresses?
-        mtcrf	0x01,r0						// move kDcbf, kDcbi, and kIcbi bits to CR7
-        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
-        mtcrf	0x02,r10					// move pf64Bit bit to CR6
-        subi	r8,r9,1						// r8 <- (linesize-1)
-        beq--	cr5,cache_op_2				// skip if using virtual addresses
-        
-        bf--	pf64Bitb,cache_op_not64		// This is not a 64-bit machine
-       
-        srdi	r12,r3,31					// Slide bit 32 to bit 63
-        cmpldi	r12,1						// Are we in the I/O mapped area?
-        beqlr--								// No cache ops allowed here...
-        
-cache_op_not64:
-        mflr	r12							// save return address
-        bl		EXT(ml_set_physical)		// turn on physical addressing
-        mtlr	r12							// restore return address
-
-        // get r3=first cache line, r4=first line not in set, r6=byte count
-        
-cache_op_2:        
-        add		r7,r3,r4					// point to 1st byte not to operate on
-        andc	r3,r3,r8					// r3 <- 1st line to operate on
-        add		r4,r7,r8					// round up
-        andc	r4,r4,r8					// r4 <- 1st line not to operate on
-        sub.	r6,r4,r3					// r6 <- byte count to operate on
-        beq--	cache_op_exit				// nothing to do
-        bf--	kDcbfb,cache_op_6			// no need to dcbf
-        
-        
-        // DCBF loop
-        
-cache_op_5:
-        sub.	r6,r6,r9					// more to go?
-        dcbf	r6,r3						// flush next line to RAM
-        bne		cache_op_5					// loop if more to go
-        sync								// make sure the data reaches RAM
-        sub		r6,r4,r3					// reset count
-
-
-        // ICBI loop
-        
-cache_op_6:
-        bf--	kIcbib,cache_op_8			// no need to icbi
-cache_op_7:
-        sub.	r6,r6,r9					// more to go?
-        icbi	r6,r3						// invalidate next line
-        bne		cache_op_7
-        sub		r6,r4,r3					// reset count
-        isync
-        sync
-        
-        
-        // DCBI loop
-        
-cache_op_8:
-        bf++	kDcbib,cache_op_exit		// no need to dcbi
-cache_op_9:
-        sub.	r6,r6,r9					// more to go?
-        dcbi	r6,r3						// invalidate next line
-        bne		cache_op_9
-        sync
-        
-        
-        // restore MSR iff necessary and done
-        
-cache_op_exit:
-        beqlr--	cr5							// if using virtual addresses, no need to restore MSR
-        b		EXT(ml_restore)				// restore MSR and return
-
-
-////////////////////////////////////////////////////
-
-        .align	5
-        .globl	_dcache_incoherent_io_store64
-_dcache_incoherent_io_store64:
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		mr		r4,r5						; Move count
-
-											// here with r3=addr, r4=count
-        mfsprg	r10,2						// r10 <- processor feature flags
-        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
-        mtcrf	0x02,r10					// move pf64Bit bit to CR6
-        subi	r8,r9,1						// r8 <- (linesize-1)
-        
-        bf--	pf64Bitb,cache_ios_not64	// This is not a 64-bit machine
-       
-        srdi	r12,r3,31					// Slide bit 32 to bit 63
-        cmpldi	r12,1						// Are we in the I/O mapped area?
-        beqlr--								// No cache ops allowed here...
-        
-cache_ios_not64:
-        mflr	r12							// save return address
-        bl		EXT(ml_set_physical)		// turn on physical addressing
-        mtlr	r12							// restore return address
-
-        // get r3=first cache line, r4=first line not in set, r6=byte count
-        add		r7,r3,r4					// point to 1st byte not to operate on
-        andc	r3,r3,r8					// r3 <- 1st line to operate on
-        add		r4,r7,r8					// round up
-        andc	r4,r4,r8					// r4 <- 1st line not to operate on
-        sub.	r6,r4,r3					// r6 <- byte count to operate on
-        beq--	cache_ios_exit				// nothing to do
-        
-        sub.	r6,r6,r9					// >1 line?
-        beq		cache_ios_last_line			// use dcbst on all lines but last
-        
-        // DCBST loop
-cache_ios_5:
-        sub.	r6,r6,r9					// more to go?
-        dcbst	r6,r3						// store next line
-        bne		cache_ios_5					// loop if more to go
-
-cache_ios_last_line:
-        sync								// flush last line
-        isync
-        dcbf	r6,r3
-        sync
-        isync
-        add		r6,r6,r3
-        lwz		r0,0(r6)					// make sure the data reaches RAM (not just the memory controller)
-        isync
-
-        // restore MSR
-cache_ios_exit:
-        b		EXT(ml_restore)				// restore MSR and return
-
-
-////////////////////////////////////////////////////
-
-        .align	5
-        .globl	_dcache_incoherent_io_flush64
-_dcache_incoherent_io_flush64:
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		mr		r4,r5						; Move count
-
-											// here with r3=addr, r4=count
-        mfsprg	r10,2						// r10 <- processor feature flags
-        andi.	r9,r10,pf32Byte+pf128Byte	// r9 <- cache line size
-        mtcrf	0x02,r10					// move pf64Bit bit to CR6
-        subi	r8,r9,1						// r8 <- (linesize-1)
-        
-        bf--	pf64Bitb,cache_iof_not64	// This is not a 64-bit machine
-       
-        srdi	r12,r3,31					// Slide bit 32 to bit 63
-        cmpldi	r12,1						// Are we in the I/O mapped area?
-        beqlr--								// No cache ops allowed here...
-        
-cache_iof_not64:
-        mflr	r12							// save return address
-        bl		EXT(ml_set_physical)		// turn on physical addressing
-        mtlr	r12							// restore return address
-
-        // get r3=first cache line, r4=first line not in set, r6=byte count
-        add		r7,r3,r4					// point to 1st byte not to operate on
-        andc	r3,r3,r8					// r3 <- 1st line to operate on
-        add		r4,r7,r8					// round up
-        andc	r4,r4,r8					// r4 <- 1st line not to operate on
-        sub.	r6,r4,r3					// r6 <- byte count to operate on
-        beq--	cache_iof_exit				// nothing to do
-        
-        // DCBF loop
-cache_iof_5:
-        sub.	r6,r6,r9					// more to go?
-        dcbf	r6,r3						// store next line
-        bne		cache_iof_5					// loop if more to go
-
-cache_iof_last_line:
-        sync								// flush last line
-        isync
-
-        // restore MSR
-cache_iof_exit:
-        b		EXT(ml_restore)				// restore MSR and return
-
-
diff --git a/osfmk/ppc/commpage/atomic.s b/osfmk/ppc/commpage/atomic.s
deleted file mode 100644
index a53e61fe3..000000000
--- a/osfmk/ppc/commpage/atomic.s
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-
-/* OSAtomic.h library native implementations. */
-
-        .text
-        .align	2
-
-atomic_add32:						// int32_t OSAtomicAdd32( int32_t amt, int32_t *value );
-1:
-		lwarx   r5,0,r4
-		add		r6,r3,r5
-		stwcx.  r6,0,r4
-		bne--   1b
-		mr		r3,r6
-		blr
-		
-    COMMPAGE_DESCRIPTOR(atomic_add32,_COMM_PAGE_ATOMIC_ADD32,0,0,kCommPageBoth)
-
-
-atomic_add64:						// int64_t OSAtomicAdd64( int64_t amt, int64_t *value );
-1:
-		ldarx   r5,0,r4
-		add		r6,r3,r5
-		stdcx.  r6,0,r4
-		bne--   1b
-		mr		r3,r6
-		blr
-		
-    COMMPAGE_DESCRIPTOR(atomic_add64,_COMM_PAGE_ATOMIC_ADD64,k64Bit,0,kCommPage64)
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This is the no-barrier version */
-compare_and_swap32_on32:			// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
-1:
-		lwarx   r7,0,r5
-		cmplw   r7,r3
-		bne-	2f
-		stwcx.  r4,0,r5
-		bne-	1b
-		li		r3,1
-		blr
-2:
-		li		r3,0				// return failure
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_on32,_COMM_PAGE_COMPARE_AND_SWAP32,0,k64Bit,kCommPageBoth)
-
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This is the no-barrier version */
-compare_and_swap32_on64:			// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value);
-1:
-		lwarx   r7,0,r5
-		cmplw   r7,r3
-		bne--	2f
-		stwcx.  r4,0,r5
-		bne--	1b
-		li		r3,1
-		blr
-2:
-		li		r8,-8				// on 970, must release reservation
-		li		r3,0				// return failure
-		stwcx.  r4,r8,r1			// store into red zone to release
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_on64,_COMM_PAGE_COMPARE_AND_SWAP32,k64Bit,0,kCommPageBoth)
-
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This is the no-barrier version */
-compare_and_swap64:					// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value);
-1:
-		ldarx   r7,0,r5
-		cmpld   r7,r3
-		bne--	2f
-		stdcx.  r4,0,r5
-		bne--	1b
-		li		r3,1
-		blr
-2:
-		li		r8,-8				// on 970, must release reservation
-		li		r3,0				// return failure
-		stdcx.  r4,r8,r1			// store into red zone to release
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap64,_COMM_PAGE_COMPARE_AND_SWAP64,k64Bit,0,kCommPage64)
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This version of compare-and-swap incorporates a memory barrier. */
-compare_and_swap32_on32b:			// bool OSAtomicCompareAndSwapBarrier32( int32_t old, int32_t new, int32_t *value);
-        eieio                       // write barrier, NOP'd on a UP
-1:
-		lwarx   r7,0,r5
-		cmplw   r7,r3
-		bne-	2f
-		stwcx.  r4,0,r5
-		bne-	1b
-        isync                       // read barrier, NOP'd on a UP
-		li		r3,1
-		blr
-2:
-		li		r3,0				// return failure
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_on32b,_COMM_PAGE_COMPARE_AND_SWAP32B,0,k64Bit,kCommPageBoth+kCommPageSYNC+kCommPageISYNC)
-
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This version of compare-and-swap incorporates a memory barrier. */
-compare_and_swap32_on64b:			// bool OSAtomicCompareAndSwapBarrier32( int32_t old, int32_t new, int32_t *value);
-        lwsync                      // write barrier, NOP'd on a UP
-1:
-		lwarx   r7,0,r5
-		cmplw   r7,r3
-		bne--	2f
-		stwcx.  r4,0,r5
-		bne--	1b
-        isync                       // read barrier, NOP'd on a UP
-		li		r3,1
-		blr
-2:
-		li		r8,-8				// on 970, must release reservation
-		li		r3,0				// return failure
-		stwcx.  r4,r8,r1			// store into red zone to release
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap32_on64b,_COMM_PAGE_COMPARE_AND_SWAP32B,k64Bit,0,kCommPageBoth+kCommPageSYNC+kCommPageISYNC)
-
-
-/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */
-/* This version of compare-and-swap incorporates a memory barrier. */
-compare_and_swap64b:				// bool OSAtomicCompareAndSwapBarrier64( int64_t old, int64_t new, int64_t *value);
-        lwsync                      // write barrier, NOP'd on a UP
-1:
-		ldarx   r7,0,r5
-		cmpld   r7,r3
-		bne--	2f
-		stdcx.  r4,0,r5
-		bne--	1b
-        isync                       // read barrier, NOP'd on a UP
-		li		r3,1
-		blr
-2:
-		li		r8,-8				// on 970, must release reservation
-		li		r3,0				// return failure
-		stdcx.  r4,r8,r1			// store into red zone to release
-		blr
-
-    COMMPAGE_DESCRIPTOR(compare_and_swap64b,_COMM_PAGE_COMPARE_AND_SWAP64B,k64Bit,0,kCommPage64+kCommPageSYNC+kCommPageISYNC)
-
-
-atomic_enqueue32:					// void OSAtomicEnqueue( void **list, void *new, size_t offset);
-1:
-		lwarx   r6,0,r3				// get link to 1st on list
-		stwx	r6,r4,r5			// hang list off new node
-		eieio						// make sure the "stwx" comes before "stwcx." (nop'd on UP)
-		stwcx.  r4,0,r3				// make new 1st on list
-		beqlr++
-		b		1b
-		
-    COMMPAGE_DESCRIPTOR(atomic_enqueue32,_COMM_PAGE_ENQUEUE,0,0,kCommPageSYNC+kCommPage32)
-
-
-atomic_enqueue64:					// void OSAtomicEnqueue( void **list, void *new, size_t offset);
-1:
-		ldarx   r6,0,r3				// get link to 1st on list
-		stdx	r6,r4,r5			// hang list off new node
-		lwsync						// make sure the "stdx" comes before the "stdcx." (nop'd on UP)
-		stdcx.  r4,0,r3				// make new 1st on list
-		beqlr++
-		b		1b
-		
-    COMMPAGE_DESCRIPTOR(atomic_enqueue64,_COMM_PAGE_ENQUEUE,k64Bit,0,kCommPageSYNC+kCommPage64)
-
-
-atomic_dequeue32_on32:              // void* OSAtomicDequeue( void **list, size_t offset);
-        mr      r5,r3
-1:
-		lwarx   r3,0,r5				// get 1st in list
-        cmpwi   r3,0                // null?
-        beqlr                       // yes, list empty
-		lwzx	r6,r3,r4			// get 2nd
-		stwcx.  r6,0,r5				// make 2nd first
-		bne--	1b
-		isync						// cancel read-aheads (nop'd on UP)
-		blr
-
-    COMMPAGE_DESCRIPTOR(atomic_dequeue32_on32,_COMM_PAGE_DEQUEUE,0,k64Bit,kCommPageISYNC+kCommPage32)
-
-
-atomic_dequeue32_on64:              // void* OSAtomicDequeue( void **list, size_t offset);
-        mr      r5,r3
-        li      r7,-8               // use red zone to release reservation if necessary
-1:
-		lwarx   r3,0,r5				// get 1st in list
-        cmpwi   r3,0                // null?
-        beq     2f
-		lwzx	r6,r3,r4			// get 2nd
-		stwcx.  r6,0,r5				// make 2nd first
-		isync						// cancel read-aheads (nop'd on UP)
-		beqlr++                     // return next element in r2
-        b       1b                  // retry (lost reservation)
-2:
-        stwcx.  r0,r7,r1            // on 970, release reservation using red zone
-		blr                         // return null
-
-    COMMPAGE_DESCRIPTOR(atomic_dequeue32_on64,_COMM_PAGE_DEQUEUE,k64Bit,0,kCommPageISYNC+kCommPage32)
-
-
-atomic_dequeue64:					// void* OSAtomicDequeue( void **list, size_t offset);
-        mr      r5,r3
-        li      r7,-8               // use red zone to release reservation if necessary
-1:
-		ldarx   r3,0,r5				// get 1st in list
-        cmpdi   r3,0                // null?
-        beq     2f
-		ldx     r6,r3,r4			// get 2nd
-		stdcx.  r6,0,r5				// make 2nd first
-		isync						// cancel read-aheads (nop'd on UP)
-		beqlr++                     // return next element in r2
-        b       1b                  // retry (lost reservation)
-2:
-        stdcx.  r0,r7,r1            // on 970, release reservation using red zone
-		blr                         // return null
-
-    COMMPAGE_DESCRIPTOR(atomic_dequeue64,_COMM_PAGE_DEQUEUE,k64Bit,0,kCommPageISYNC+kCommPage64)
-
-
-memory_barrier_up:					// void OSMemoryBarrier( void )
-		blr							// nothing to do on UP
-		
-    COMMPAGE_DESCRIPTOR(memory_barrier_up,_COMM_PAGE_MEMORY_BARRIER,kUP,0,kCommPageBoth)
-
-
-memory_barrier_mp32:				// void OSMemoryBarrier( void )
-		isync						// we use eieio in preference to sync...
-		eieio						// ...because it is faster
-		blr
-		
-    COMMPAGE_DESCRIPTOR(memory_barrier_mp32,_COMM_PAGE_MEMORY_BARRIER,0,kUP+k64Bit,kCommPage32)
-
-
-memory_barrier_mp64:				// void OSMemoryBarrier( void )
-		isync
-		lwsync						// on 970, lwsync is faster than eieio
-		blr
-		
-    COMMPAGE_DESCRIPTOR(memory_barrier_mp64,_COMM_PAGE_MEMORY_BARRIER,k64Bit,kUP,kCommPageBoth)
diff --git a/osfmk/ppc/commpage/bcopy_64.s b/osfmk/ppc/commpage/bcopy_64.s
deleted file mode 100644
index 4d0b2c9bd..000000000
--- a/osfmk/ppc/commpage/bcopy_64.s
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* =======================================
- * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
- * =======================================
- *
- * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
- * This version might be used bringing up new processors, with known
- * Altivec bugs that need to be worked around.  It is not particularly well
- * optimized.
- *
- * For 64-bit processors with a 128-byte cache line, running in either 
- * 32- or 64-bit mode.  This is written for 32-bit execution, the kernel
- * will translate to 64-bit code when it compiles the 64-bit commpage.
- *
- * Register usage.  Note we use R2, so this code will not run in a PEF/CFM
- * environment.
- *   r0  = "w7" or temp
- *   r2  = "w8"
- *   r3  = not used, as memcpy and memmove return 1st parameter as a value
- *   r4  = source ptr ("rs")
- *   r5  = count of bytes to move ("rc")
- *   r6  = "w1"
- *   r7  = "w2"
- *   r8  = "w3"
- *   r9  = "w4"
- *   r10 = "w5"
- *   r11 = "w6"
- *   r12 = destination ptr ("rd")
- */
-#define rs	r4
-#define rd	r12
-#define rc	r5
-#define	rv	r2
-
-#define w1	r6
-#define w2	r7
-#define w3	r8
-#define	w4	r9
-#define	w5	r10
-#define	w6	r11
-#define	w7	r0
-#define	w8	r2
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-
-#define	kLong		64				// too long for inline loopless code
-
-
-// Main entry points.
-
-        .align 	5
-bcopy_64:							// void bcopy(const void *src, void *dst, size_t len)
-        cmplwi	rc,kLong			// short or long?
-        sub		w1,r4,r3			// must move in reverse if (rd-rs)<rc
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        mr		rd,r4				// start to move registers to canonic spot
-        mr		rs,r3
-        blt		LShort				// handle short operands
-        dcbt	0,r3				// touch in destination
-        b		LLong				// join medium/long operand code
-
-// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
-        
-        .align	5
-Lmemcpy_g4:							// void* memcpy(void *dst, void *src, size_t len)
-Lmemmove_g4:						// void* memmove(void *dst, const void *src, size_t len)
-        cmplwi	rc,kLong			// short or long?
-        sub		w1,r3,r4			// must move in reverse if (rd-rs)<rc
-        dcbt	0,r4				// touch in the first line of source
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        mr		rd,r3				// must leave r3 alone, it is return value for memcpy etc
-        bge		LLong				// handle medium or long operands
-
-// Handle short operands.
-        
-LShort:
-        mtcrf	0x02,rc				// put length bits 26-27 in cr6 (faster one cr at a time)
-        mtcrf	0x01,rc				// put length bits 28-31 in cr7
-        blt		cr1,LShortReverse
-        
-// Forward short operands.  This is the most frequent case, so it is inline.
-
-LShort64:							// enter to xfer last 64 bytes
-        bf		26,0f				// 64-byte chunk to xfer?
-        ld		w1,0(rs)
-        ld		w2,8(rs)
-        ld		w3,16(rs)
-        ld		w4,24(rs)
-        addi	rs,rs,32
-        std		w1,0(rd)
-        std		w2,8(rd)
-        std		w3,16(rd)
-        std		w4,24(rd)
-        addi	rd,rd,32
-0:
-        bf		27,1f				// quadword to move?
-        ld		w1,0(rs)
-        ld		w2,8(rs)
-        addi	rs,rs,16
-        std		w1,0(rd)
-        std		w2,8(rd)
-        addi	rd,rd,16
-1:
-        bf		28,2f				// doubleword?
-        ld		w1,0(rs)
-        addi	rs,rs,8
-        std		w1,0(rd)
-        addi	rd,rd,8
-2:
-        bf		29,3f				// word?
-        lwz		w1,0(rs)
-        addi	rs,rs,4
-        stw		w1,0(rd)
-        addi	rd,rd,4
-3:
-        bf		30,4f				// halfword to move?
-        lhz		w1,0(rs)
-        addi	rs,rs,2
-        sth		w1,0(rd)
-        addi	rd,rd,2
-4:
-        bflr	31					// skip if no odd byte
-        lbz		w1,0(rs)
-        stb		w1,0(rd)
-        blr
-        
-        
-// Handle short reverse operands.
-//		cr6 = bits 26-27 of length
-//		cr7 = bits 28-31 of length      
-
-LShortReverse:
-        add		rs,rs,rc			// adjust ptrs for reverse move
-        add		rd,rd,rc
-LShortReverse64:					// enter to xfer last 64 bytes
-        bf		26,0f				// 64-byte chunk to xfer?
-        ld		w1,-8(rs)
-        ld		w2,-16(rs)
-        ld		w3,-24(rs)
-        ldu		w4,-32(rs)
-        std		w1,-8(rd)
-        std		w2,-16(rd)
-        std		w3,-24(rd)
-        stdu	w4,-32(rd)
-0:
-        bf		27,1f				// quadword to move?
-        ld		w1,-8(rs)
-        ldu		w2,-16(rs)
-        std		w1,-8(rd)
-        stdu	w2,-16(rd)
-1:
-        bf		28,2f				// doubleword?
-        ldu		w1,-8(rs)
-        stdu	w1,-8(rd)
-2:
-        bf		29,3f				// word?
-        lwzu	w1,-4(rs)
-        stwu	w1,-4(rd)
-3:
-        bf		30,4f				// halfword to move?
-        lhzu	w1,-2(rs)
-        sthu	w1,-2(rd)
-4:
-        bflr	31					// done if no odd byte
-        lbz 	w1,-1(rs)			// no update
-        stb 	w1,-1(rd)
-        blr
-        
-
-// Long operands.
-//     cr1 = blt iff we must move reverse
-
-        .align	4
-LLong:
-        dcbtst	0,rd				// touch in destination
-        neg		w3,rd				// start to compute #bytes to align destination
-        andi.	w6,w3,7				// w6 <- #bytes to 8-byte align destination
-        blt		cr1,LLongReverse	// handle reverse moves
-        mtctr	w6					// set up for loop to align destination
-        sub		rc,rc,w6			// adjust count
-        beq		LAligned			// destination already 8-byte aligned
-1:
-        lbz		w1,0(rs)
-        addi	rs,rs,1
-        stb		w1,0(rd)
-        addi	rd,rd,1
-        bdnz	1b
-        
-// Destination is 8-byte aligned.
-
-LAligned:
-        srwi.	w2,rc,6				// w2 <- count of 64-byte chunks
-        mtcrf	0x02,rc				// leftover byte count to cr (faster one cr at a time)
-        mtcrf	0x01,rc				// put length bits 28-31 in cr7
-        beq		LShort64			// no 64-byte chunks
-        mtctr	w2
-        b		1f
-        
-// Loop moving 64-byte chunks.
-
-        .align	5
-1:
-        ld		w1,0(rs)
-        ld		w2,8(rs)
-        ld		w3,16(rs)
-        ld		w4,24(rs)
-        ld		w5,32(rs)
-        ld		w6,40(rs)
-        ld		w7,48(rs)
-        ld		w8,56(rs)
-        addi	rs,rs,64
-        std		w1,0(rd)
-        std		w2,8(rd)
-        std		w3,16(rd)
-        std		w4,24(rd)
-        std		w5,32(rd)
-        std		w6,40(rd)
-        std		w7,48(rd)
-        std		w8,56(rd)
-        addi	rd,rd,64
-        bdnz	1b
-        
-        b		LShort64
-
-        
-// Handle reverse moves.
-
-LLongReverse:
-        add		rd,rd,rc				// point to end of operands
-        add		rs,rs,rc
-        andi.	r0,rd,7					// is destination 8-byte aligned?
-        sub		rc,rc,r0				// adjust count
-        mtctr	r0						// set up for byte loop
-        beq		LRevAligned				// already aligned
-        
-1:
-        lbzu	w1,-1(rs)
-        stbu	w1,-1(rd)
-        bdnz	1b
-
-// Destination is 8-byte aligned.
-
-LRevAligned:
-        srwi.	w2,rc,6				// w2 <- count of 64-byte chunks
-        mtcrf	0x02,rc				// leftover byte count to cr (faster one cr at a time)
-        mtcrf	0x01,rc				// put length bits 28-31 in cr7
-        beq		LShortReverse64		// no 64-byte chunks
-        mtctr	w2
-        b		1f
-
-// Loop over 64-byte chunks (reverse).
-
-        .align	5
-1:
-        ld		w1,-8(rs)
-        ld		w2,-16(rs)
-        ld		w3,-24(rs)
-        ld		w4,-32(rs)
-        ld		w5,-40(rs)
-        ld		w6,-48(rs)
-        ld		w7,-56(rs)
-        ldu		w8,-64(rs)
-        std		w1,-8(rd)
-        std		w2,-16(rd)
-        std		w3,-24(rd)
-        std		w4,-32(rd)
-        std		w5,-40(rd)
-        std		w6,-48(rd)
-        std		w7,-56(rd)
-        stdu	w8,-64(rd)
-        bdnz	1b
-        
-        b		LShortReverse64
-
-	COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)
diff --git a/osfmk/ppc/commpage/bcopy_970.s b/osfmk/ppc/commpage/bcopy_970.s
deleted file mode 100644
index 61916abf2..000000000
--- a/osfmk/ppc/commpage/bcopy_970.s
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* =======================================
- * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
- * =======================================
- *
- * Version of 6/11/2003, tuned for the IBM 970.
- *
- * Register usage.  Note the rather delicate way we assign multiple uses
- * to the same register.  Beware.
- *   r0  = temp (NB: cannot use r0 for any constant such as "c16")
- *   r3  = not used, as memcpy and memmove return 1st parameter as a value
- *   r4  = source ptr ("rs")
- *   r5  = count of bytes to move ("rc")
- *   r6  = "w1", "c16", or "cm17"
- *   r7  = "w2", "c32", or "cm33"
- *   r8  = "w3", "c48", or "cm49"
- *   r9  = "w4",        or "cm1"
- *   r10 = vrsave ("rv")
- *   r11 = unused
- *   r12 = destination ptr ("rd")
- *   v0  = permute vector ("vp") 
- * v1-v8 = qw's loaded from source
- *v9-v12 = permuted qw's ("vw", "vx", "vy", and "vz")
- */
-#define rs	r4
-#define rd	r12
-#define rc	r5
-#define	rv	r10
-
-#define w1	r6
-#define w2	r7
-#define w3	r8
-#define	w4	r9
-
-#define c16		r6
-#define cm17	r6
-#define c32		r7
-#define cm33	r7
-#define c48		r8
-#define cm49	r8
-#define cm1		r9
-
-#define	vp	v0
-#define	vw	v9
-#define	vx	v10
-#define	vy	v11
-#define	vz	v12
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-/*
- * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
- * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
- * simple transformations:
- *      - all word compares are changed to doubleword
- *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
- * Nothing else is done.  For this to work, the following rules must be
- * carefully followed:
- *      - do not use carry or overflow
- *      - only use record mode if you are sure the results are mode-invariant
- *        for example, all "andi." and almost all "rlwinm." are fine
- *      - do not use "slwi", "slw", or "srw"
- * An imaginative programmer could break the porting model in other ways, but the above
- * are the most likely problem areas.  It is perhaps surprising how well in practice
- * this simple method works.
- */
-
-#define	kShort		64
-#define	kVeryLong	(128*1024)
-
-
-// Main entry points.
-
-        .align 	5
-bcopy_970:							// void bcopy(const void *src, void *dst, size_t len)
-        cmplwi	rc,kShort			// short or long?
-        sub		w1,r4,r3			// must move in reverse if (rd-rs)<rc
-        mr		rd,r4				// move registers to canonic spot
-        mr		rs,r3
-        blt		LShort				// handle short operands
-        dcbt	0,rs				// touch in the first line of source
-        dcbtst	0,rd				// touch in destination
-        b		LLong1				// join long operand code
-
-// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
-
-        .align	5
-Lmemcpy_970:						// void* memcpy(void *dst, void *src, size_t len)
-Lmemmove_970:						// void* memmove(void *dst, const void *src, size_t len)
-        cmplwi	rc,kShort			// short or long?
-        sub		w1,r3,r4			// must move in reverse if (rd-rs)<rc
-        mr		rd,r3				// must leave r3 alone, it is return value for memcpy etc
-        bge		LLong0				// handle long operands
-
-// Handle short operands.
-//		rs = source
-//		rd = destination
-//		rc = count
-//		w1 = (rd-rs), must move reverse if (rd-rs)<rc
-        
-LShort:
-        cmplw	cr1,w1,rc			// set cr1 blt if we must move reverse
-        mtcrf	0x02,rc				// move length to cr6 and cr7 one at a time
-        mtcrf	0x01,rc
-        blt--	cr1,LShortReverse
-        
-// Forward short operands.  This is the most frequent case, so it is inline.
-
-        bf		26,0f				// 32-byte chunk to move?
-        ld		w1,0(rs)
-        ld		w2,8(rs)
-        ld		w3,16(rs)
-        ld		w4,24(rs)
-        addi	rs,rs,32
-        std		w1,0(rd)
-        std		w2,8(rd)
-        std		w3,16(rd)
-        std		w4,24(rd)
-        addi	rd,rd,32
-0:
-LShort32:
-        bf		27,1f				// quadword to move?
-        ld		w1,0(rs)
-        ld		w3,8(rs)
-        addi	rs,rs,16
-        std		w1,0(rd)
-        std		w3,8(rd)
-        addi	rd,rd,16
-1:
-LShort16:							// join here to xfer 0-15 bytes
-        bf		28,2f				// doubleword?
-        ld		w1,0(rs)
-        addi	rs,rs,8
-        std		w1,0(rd)
-        addi	rd,rd,8
-2:
-        bf		29,3f				// word?
-        lwz		w1,0(rs)
-        addi	rs,rs,4
-        stw		w1,0(rd)
-        addi	rd,rd,4
-3:
-        bf		30,4f				// halfword to move?
-        lhz		w1,0(rs)
-        addi	rs,rs,2
-        sth		w1,0(rd)
-        addi	rd,rd,2
-4:
-        bflr	31					// skip if no odd byte
-        lbz		w1,0(rs)
-        stb		w1,0(rd)
-        blr
-        
-        
-// Handle short reverse operands.
-//		cr = length in bits 26-31       
-
-LShortReverse:
-        add		rs,rs,rc			// adjust ptrs for reverse move
-        add		rd,rd,rc
-        bf		26,0f				// 32 bytes to move?
-        ld		w1,-8(rs)
-        ld		w2,-16(rs)
-        ld		w3,-24(rs)
-        ldu		w4,-32(rs)
-        std		w1,-8(rd)
-        std		w2,-16(rd)
-        std		w3,-24(rd)
-        stdu	w4,-32(rd)
-0:
-        bf		27,1f				// quadword to move?
-        ld		w1,-8(rs)
-        ldu		w2,-16(rs)
-        std		w1,-8(rd)
-        stdu	w2,-16(rd)
-1:
-LShortReverse16:					// join here to xfer 0-15 bytes and return
-        bf		28,2f				// doubleword?
-        ldu		w1,-8(rs)
-        stdu	w1,-8(rd)
-2:
-        bf		29,3f				// word?
-        lwzu	w1,-4(rs)
-        stwu	w1,-4(rd)
-3:
-        bf		30,4f				// halfword to move?
-        lhzu	w1,-2(rs)
-        sthu	w1,-2(rd)
-4:
-        bflr	31					// done if no odd byte
-        lbz 	w1,-1(rs)			// no update
-        stb 	w1,-1(rd)
-        blr
-        
-
-// Long operands, use Altivec in most cases.
-//		rs = source
-//		rd = destination
-//		rc = count
-//		w1 = (rd-rs), must move reverse if (rd-rs)<rc
-
-LLong0:								// entry from memmove()
-        dcbt	0,rs				// touch in source
-        dcbtst	0,rd				// touch in destination
-LLong1:								// entry from bcopy() with operands already touched in
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        neg		w3,rd				// start to compute #bytes to align destination
-        rlwinm	w2,w1,0,0xF			// 16-byte aligned?  (w2==0 if so)
-        andi.	w4,w3,0xF			// w4 <- #bytes to 16-byte align destination
-        cmpwi	cr5,w2,0			// set cr5 beq if relatively 16-byte aligned
-        blt--	cr1,LLongReverse	// handle reverse moves
-        sub		rc,rc,w4			// adjust length for aligning destination
-        srwi	r0,rc,7				// get #cache lines to copy (may be 0)
-        cmpwi	cr1,r0,0			// set cr1 on #chunks
-        beq		LFwdAligned			// dest is already aligned
-        
-// 16-byte align destination.
-
-        mtcrf	0x01,w4				// cr7 <- #bytes to align dest (nonzero)
-        bf		31,1f				// byte to move?
-        lbz		w1,0(rs)
-        addi	rs,rs,1
-        stb		w1,0(rd)
-        addi	rd,rd,1
-1:
-        bf		30,2f				// halfword?
-        lhz		w1,0(rs)
-        addi	rs,rs,2
-        sth		w1,0(rd)
-        addi	rd,rd,2
-2:
-        bf		29,3f				// word?
-        lwz		w1,0(rs)
-        addi	rs,rs,4
-        stw		w1,0(rd)
-        addi	rd,rd,4
-3:
-        bf		28,LFwdAligned		// doubleword?
-        ld		w1,0(rs)
-        addi	rs,rs,8
-        std		w1,0(rd)
-        addi	rd,rd,8
-
-
-// Forward, destination is 16-byte aligned.  There are five cases:
-//  1. If the length>=kVeryLong (ie, several pages), then use the
-//     "bigcopy" path that pulls all the punches.  This is the fastest
-//	   case for cold-cache operands, as any this long will likely be.
-//	2. If length>=128 and source is 16-byte aligned, then use the
-//	   lvx/stvx loop over 128-byte chunks.  This is the fastest
-//     case for hot-cache operands, 2nd fastest for cold.
-//	3. If length>=128 and source is not 16-byte aligned, then use the
-//	   lvx/vperm/stvx loop over 128-byte chunks.
-//	4. If length<128 and source is 8-byte aligned, then use the
-//	   ld/std loop over 32-byte chunks.
-//	5. If length<128 and source is not 8-byte aligned, then use the
-//	   lvx/vperm/stvx loop over 32-byte chunks.  This is the slowest case.
-// Registers at this point:
-//		r0/cr1 = count of cache lines ("chunks") that we'll cover (may be 0)
-//			rs = alignment unknown
-//		    rd = 16-byte aligned
-//			rc = bytes remaining
-//			w2 = low 4 bits of (rd-rs), used to check alignment
-//		   cr5 = beq if source is also 16-byte aligned
-
-LFwdAligned:
-        andi.	w3,w2,7				// is source at least 8-byte aligned?
-        mtcrf	0x01,rc				// move leftover count to cr7 for LShort16
-        bne		cr1,LFwdLongVectors	// at least one 128-byte chunk, so use vectors
-        srwi	w1,rc,5				// get 32-byte chunk count
-        mtcrf	0x02,rc				// move bit 27 of length to cr6 for LShort32
-        mtctr	w1					// set up 32-byte loop (w1!=0)
-        beq		LFwdMedAligned		// source is 8-byte aligned, so use ld/std loop
-        mfspr	rv,vrsave			// get bitmap of live vector registers
-        oris	w4,rv,0xFFF8		// we use v0-v12
-        li		c16,16				// get constant used in lvx
-        li		c32,32
-        mtspr	vrsave,w4			// update mask
-        lvx		v1,0,rs				// prefetch 1st source quadword
-        lvsl	vp,0,rs				// get permute vector to shift left
-        
-        
-// Fewer than 128 bytes but not doubleword aligned: use lvx/vperm/stvx.
-
-1:									// loop over 32-byte chunks
-        lvx		v2,c16,rs
-        lvx		v3,c32,rs
-        addi	rs,rs,32
-        vperm	vx,v1,v2,vp
-        vperm	vy,v2,v3,vp
-        vor		v1,v3,v3			// v1 <- v3
-        stvx	vx,0,rd
-        stvx	vy,c16,rd
-        addi	rd,rd,32
-        bdnz	1b
-        
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        b		LShort32
-
-        
-// Fewer than 128 bytes and doubleword aligned: use ld/std.
-
-        .align	5
-LFwdMedAligned:									// loop over 32-byte chunks
-        ld		w1,0(rs)
-        ld		w2,8(rs)
-        ld		w3,16(rs)
-        ld		w4,24(rs)
-        addi	rs,rs,32
-        std		w1,0(rd)
-        std		w2,8(rd)
-        std		w3,16(rd)
-        std		w4,24(rd)
-        addi	rd,rd,32
-        bdnz	LFwdMedAligned
-        
-        b		LShort32
-
-        
-// Forward, 128 bytes or more: use vectors.  When entered:
-//	    r0 = 128-byte chunks to move (>0)
-//		rd = 16-byte aligned
-//	   cr5 = beq if source is 16-byte aligned
-//	   cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-// We set up many registers:
-//	   ctr = number of 128-byte chunks to move
-//	r0/cr0 = leftover QWs to move
-//	   cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-//	   cr6 = beq if leftover byte count is 0
-//		rv = original value of VRSave
-// c16,c32,c48 = loaded
-
-LFwdLongVectors:
-        mfspr	rv,vrsave			// get bitmap of live vector registers
-        lis		w3,kVeryLong>>16	// cutoff for very-long-operand special case path
-        cmplw	cr1,rc,w3			// very long operand?
-        rlwinm	w3,rc,0,28,31		// move last 0-15 byte count to w3
-        bge--	cr1,LBigCopy        // handle big copies separately
-        mtctr	r0					// set up loop count
-        cmpwi	cr6,w3,0			// set cr6 on leftover byte count
-        oris	w4,rv,0xFFF8		// we use v0-v12
-        rlwinm.	r0,rc,28,29,31		// get number of quadword leftovers (0-7) and set cr0
-        li		c16,16				// get constants used in ldvx/stvx
-        mtspr	vrsave,w4			// update mask
-        li		c32,32
-        li		c48,48
-        beq		cr5,LFwdLongAligned	// source is also 16-byte aligned, no need for vperm
-        lvsl	vp,0,rs				// get permute vector to shift left
-        lvx		v1,0,rs				// prefetch 1st source quadword
-        b		LFwdLongUnaligned
-
-
-// Forward, long, unaligned vector loop.
-
-        .align	5					// align inner loops
-LFwdLongUnaligned:					// loop over 128-byte chunks
-        addi	w4,rs,64
-        lvx		v2,c16,rs
-        lvx		v3,c32,rs
-        lvx		v4,c48,rs
-        lvx		v5,0,w4
-        lvx		v6,c16,w4
-        vperm	vw,v1,v2,vp
-        lvx		v7,c32,w4
-        lvx		v8,c48,w4
-        addi	rs,rs,128
-        vperm	vx,v2,v3,vp
-        addi	w4,rd,64
-        lvx		v1,0,rs
-        stvx	vw,0,rd
-        vperm	vy,v3,v4,vp
-        stvx	vx,c16,rd
-        vperm	vz,v4,v5,vp
-        stvx	vy,c32,rd
-        vperm	vw,v5,v6,vp
-        stvx	vz,c48,rd
-        vperm	vx,v6,v7,vp
-        addi	rd,rd,128
-        stvx	vw,0,w4
-        vperm	vy,v7,v8,vp
-        stvx	vx,c16,w4
-        vperm	vz,v8,v1,vp
-        stvx	vy,c32,w4
-        stvx	vz,c48,w4
-        bdnz	LFwdLongUnaligned
-
-        beq		4f					// no leftover quadwords
-        mtctr	r0
-3:									// loop over remaining quadwords
-        lvx		v2,c16,rs
-        addi	rs,rs,16
-        vperm	vx,v1,v2,vp
-        vor		v1,v2,v2			// v1 <- v2
-        stvx	vx,0,rd
-        addi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShort16		// handle last 0-15 bytes if any
-        blr
-
-
-// Forward, long, 16-byte aligned vector loop.
-
-        .align	5
-LFwdLongAligned:        			// loop over 128-byte chunks
-        addi	w4,rs,64
-        lvx		v1,0,rs
-        lvx		v2,c16,rs
-        lvx		v3,c32,rs
-        lvx		v4,c48,rs
-        lvx		v5,0,w4
-        lvx		v6,c16,w4
-        lvx		v7,c32,w4
-        lvx		v8,c48,w4
-        addi	rs,rs,128
-        addi	w4,rd,64
-        stvx	v1,0,rd 
-        stvx	v2,c16,rd
-        stvx	v3,c32,rd
-        stvx	v4,c48,rd
-        stvx	v5,0,w4
-        stvx	v6,c16,w4
-        stvx	v7,c32,w4
-        stvx	v8,c48,w4
-        addi	rd,rd,128
-        bdnz	LFwdLongAligned
-                
-        beq		4f					// no leftover quadwords
-        mtctr	r0
-3:									// loop over remaining quadwords (1-7)
-        lvx		v1,0,rs
-        addi	rs,rs,16
-        stvx	v1,0,rd
-        addi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShort16		// handle last 0-15 bytes if any
-        blr
-        
-
-// Long, reverse moves.
-//		rs = source
-//		rd = destination
-//		rc = count
-//	   cr5 = beq if relatively 16-byte aligned
-
-LLongReverse:
-        add		rd,rd,rc			// point to end of operands
-        add		rs,rs,rc
-        andi.	r0,rd,0xF			// #bytes to 16-byte align destination
-        beq		2f					// already aligned
-        
-// 16-byte align destination.
-
-        mtctr	r0					// set up for loop
-        sub		rc,rc,r0
-1:
-        lbzu	w1,-1(rs)
-        stbu	w1,-1(rd)
-        bdnz	1b
-
-// Prepare for reverse vector loop.  When entered:
-//		rd = 16-byte aligned
-//		cr5 = beq if source also 16-byte aligned
-// We set up many registers:
-//		ctr/cr1 = number of 64-byte chunks to move (may be 0)
-//		r0/cr0 = leftover QWs to move
-//		cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-//		cr6 = beq if leftover byte count is 0
-//		cm1 = -1
-//		rv = original value of vrsave
-
-2:
-        mfspr	rv,vrsave			// get bitmap of live vector registers
-        srwi	r0,rc,6				// get count of 64-byte chunks to move (may be 0)
-        oris	w1,rv,0xFFF8		// we use v0-v12
-        mtcrf	0x01,rc				// prepare for moving last 0-15 bytes in LShortReverse16
-        rlwinm	w3,rc,0,28,31		// move last 0-15 byte count to w3 too
-        cmpwi	cr1,r0,0			// set cr1 on chunk count
-        mtspr	vrsave,w1			// update mask
-        mtctr	r0					// set up loop count
-        cmpwi	cr6,w3,0			// set cr6 on leftover byte count
-        rlwinm.	r0,rc,28,30,31		// get number of quadword leftovers (0-3) and set cr0
-        li		cm1,-1				// get constants used in ldvx/stvx
-        
-        bne		cr5,LReverseVecUnal	// handle unaligned operands
-        beq		cr1,2f				// no chunks (if no chunks, must be leftover QWs)
-        li		cm17,-17
-        li		cm33,-33
-        li		cm49,-49
-        b		1f
-
-// Long, reverse 16-byte-aligned vector loop.
-      
-        .align	5					// align inner loops
-1:        							// loop over 64-byte chunks
-        lvx		v1,cm1,rs
-        lvx		v2,cm17,rs
-        lvx		v3,cm33,rs
-        lvx		v4,cm49,rs
-        subi	rs,rs,64
-        stvx	v1,cm1,rd
-        stvx	v2,cm17,rd
-        stvx	v3,cm33,rd
-        stvx	v4,cm49,rd
-        subi	rd,rd,64
-        bdnz	1b
-        
-        beq		4f					// no leftover quadwords
-2:									// r0=#QWs, rv=vrsave, cr7=(rc & F), cr6 set on cr7
-        mtctr	r0
-3:									// loop over remaining quadwords (1-7)
-        lvx		v1,cm1,rs
-        subi	rs,rs,16
-        stvx	v1,cm1,rd
-        subi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShortReverse16	// handle last 0-15 bytes if any
-        blr
-
-
-// Long, reverse, unaligned vector loop.
-//		ctr/cr1 = number of 64-byte chunks to move (may be 0)
-//		r0/cr0 = leftover QWs to move
-//		cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-//		cr6 = beq if leftover byte count is 0
-//		rv = original value of vrsave
-//		cm1 = -1
-
-LReverseVecUnal:
-        lvsl	vp,0,rs				// get permute vector to shift left
-        lvx		v1,cm1,rs			// v1 always looks ahead
-        li		cm17,-17
-        beq		cr1,2f				// no chunks (if no chunks, must be leftover QWs)
-        li		cm33,-33
-        li		cm49,-49
-        b		1f
-        
-        .align	5					// align the inner loops
-1:									// loop over 64-byte chunks
-        lvx		v2,cm17,rs
-        lvx		v3,cm33,rs
-        lvx		v4,cm49,rs
-        subi	rs,rs,64
-        vperm	vx,v2,v1,vp
-        lvx		v1,cm1,rs
-        vperm	vy,v3,v2,vp
-        stvx	vx,cm1,rd
-        vperm	vz,v4,v3,vp
-        stvx	vy,cm17,rd
-        vperm	vx,v1,v4,vp
-        stvx	vz,cm33,rd
-        stvx	vx,cm49,rd
-        subi	rd,rd,64
-        bdnz	1b
-
-        beq		4f					// no leftover quadwords
-2:									// r0=#QWs, rv=vrsave, v1=next QW, cr7=(rc & F), cr6 set on cr7
-        mtctr	r0
-3:									// loop over 1-3 quadwords
-        lvx		v2,cm17,rs
-        subi	rs,rs,16
-        vperm	vx,v2,v1,vp
-        vor		v1,v2,v2			// v1 <- v2
-        stvx	vx,cm1,rd
-        subi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShortReverse16	// handle last 0-15 bytes iff any
-        blr
-
-        
-// Very Big Copy Path.  Save our return address in the stack for help decoding backtraces.
-// The conditions bigcopy expects are:
-//  r0 = return address (also stored in caller's SF)
-//	r4 = source ptr
-//	r5 = length (at least several pages)
-// r12 = dest ptr
-
-LBigCopy:
-		lis		r2,0x4000			// r2 <- 0x40000000
-        mflr    r0                  // get our return address
-		add.	r2,r2,r2			// set cr0_lt if running in 32-bit mode
-        stw     r0,8(r1)            // save return, assuming 32-bit mode ("crsave" if 64-bit mode)
-		blta	_COMM_PAGE_BIGCOPY  // 32-bit mode, join big operand copy
-		std		r0,16(r1)			// save return in correct spot for 64-bit mode
-        ba      _COMM_PAGE_BIGCOPY  // then join big operand code
-        
-
-	COMMPAGE_DESCRIPTOR(bcopy_970,_COMM_PAGE_BCOPY,k64Bit+kHasAltivec,0, \
-				kCommPageMTCRF+kCommPageBoth+kPort32to64)
diff --git a/osfmk/ppc/commpage/bcopy_g3.s b/osfmk/ppc/commpage/bcopy_g3.s
deleted file mode 100644
index f0900963e..000000000
--- a/osfmk/ppc/commpage/bcopy_g3.s
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* =======================================
- * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
- * =======================================
- *
- * Version of 2/20/2003, tuned for G3.
- *
- * Register usage.  Note we use R2, so this code will not run in a PEF/CFM
- * environment.
- *
- *   r0  = "w7" or temp
- *   r2  = "w8"
- *   r3  = not used, as memcpy and memmove return 1st parameter as a value
- *   r4  = source ptr ("rs")
- *   r5  = count of bytes to move ("rc")
- *   r6  = "w1"
- *   r7  = "w2"
- *   r8  = "w3"
- *   r9  = "w4"
- *   r10 = "w5"
- *   r11 = "w6"
- *   r12 = destination ptr ("rd")
- * f0-f3 = used for moving 8-byte aligned data
- */
-#define rs	r4		// NB: we depend on rs==r4 in "lswx" instructions
-#define rd	r12
-#define rc	r5
-
-#define w1	r6
-#define w2	r7
-#define w3	r8
-#define	w4	r9
-#define	w5	r10
-#define	w6	r11
-#define	w7	r0
-#define	w8	r2
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-
-
-#define	kLong	33					// too long for string ops
-
-
-// Main entry points.
-
-        .align 	5
-bcopy_g3:							// void bcopy(const void *src, void *dst, size_t len)
-        cmplwi	rc,kLong			// length > 32 bytes?
-        sub		w1,r4,r3			// must move in reverse if (rd-rs)<rc
-        mr		rd,r4				// start to move source & dest to canonic spot
-        bge		LLong0				// skip if long operand
-        mtxer	rc					// set length for string ops
-        lswx	r5,0,r3				// load bytes into r5-r12
-        stswx	r5,0,r4				// store them
-        blr
-
-// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
-
-        .align	5
-Lmemcpy_g3:							// void* memcpy(void *dst, void *src, size_t len)
-Lmemmove_g3:						// void* memmove(void *dst, const void *src, size_t len)
-        cmplwi	rc,kLong			// length > 32 bytes?
-        sub		w1,r3,rs			// must move in reverse if (rd-rs)<rc
-        mr		rd,r3				// must leave r3 alone, it is return value for memcpy etc
-        bge		LLong1				// longer than 32 bytes
-        mtxer	rc					// set length for string ops
-        lswx	r5,0,r4				// load bytes into r5-r12
-        stswx	r5,0,r3				// store them
-        blr
-
-// Long operands (more than 32 bytes.)
-//		w1  = (rd-rs), used to check for alignment
-
-LLong0:								// enter from bcopy()
-        mr		rs,r3				// must leave r3 alone (it is return value for memcpy)
-LLong1:								// enter from memcpy() and memmove()
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        rlwinm	r0,w1,0,0x3			// are operands relatively word-aligned?
-        neg		w2,rd				// prepare to align destination
-        cmpwi	cr5,r0,0			// set cr5 beq if relatively word aligned
-        blt		cr1,LLongReverse	// handle reverse move
-        andi.	w4,w2,3				// w4 <- #bytes to word align destination
-        beq		cr5,LLongFloat		// relatively aligned so use FPRs
-        sub		rc,rc,w4			// adjust count for alignment
-        srwi	r0,rc,5				// get #chunks to xfer (>=1)
-        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
-        mtctr	r0					// set up loop count
-        beq		1f					// dest already word aligned
-    
-// Word align the destination.
-        
-        mtxer	w4					// byte count to xer
-        cmpwi	r0,0				// any chunks to xfer?
-        lswx	w1,0,rs				// move w4 bytes to align dest
-        add		rs,rs,w4
-        stswx	w1,0,rd
-        add		rd,rd,w4
-        beq-	2f					// pathologic case, no chunks to xfer
-
-// Forward, unaligned loop.
-
-1:
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        lwz		w3,8(rs)
-        lwz		w4,12(rs)
-        lwz		w5,16(rs)
-        lwz		w6,20(rs)
-        lwz		w7,24(rs)
-        lwz		w8,28(rs)
-        addi	rs,rs,32
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        stw		w3,8(rd)
-        stw		w4,12(rd)
-        stw		w5,16(rd)
-        stw		w6,20(rd)
-        stw		w7,24(rd)
-        stw		w8,28(rd)
-        addi	rd,rd,32
-        bdnz	1b
-2:									// rc = remaining bytes (0-31)
-        mtxer	rc					// set up count for string ops
-        mr		r0,rd				// move dest ptr out of the way
-        lswx	r5,0,rs				// load xer bytes into r5-r12 (rs==r4)
-        stswx	r5,0,r0				// store them
-        blr
-        
-
-
-// Forward, aligned loop.  We use FPRs.
-
-LLongFloat:
-        andi.	w4,w2,7				// W4 <- #bytes to doubleword-align destination
-        sub		rc,rc,w4			// adjust count for alignment
-        srwi	r0,rc,5				// number of 32-byte chunks to xfer
-        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
-        mtctr	r0					// set up loop count
-        beq		1f					// dest already doubleword aligned
-    
-// Doubleword align the destination.
-        
-        mtxer	w4					// byte count to xer
-        cmpwi	r0,0				// any chunks to xfer?
-        lswx	w1,0,rs				// move w4 bytes to align dest
-        add		rs,rs,w4
-        stswx	w1,0,rd
-        add		rd,rd,w4
-        beq-	2f					// pathologic case, no chunks to xfer
-1:									// loop over 32-byte chunks
-        lfd		f0,0(rs)
-        lfd		f1,8(rs)
-        lfd		f2,16(rs)
-        lfd		f3,24(rs)
-        addi	rs,rs,32
-        stfd	f0,0(rd)
-        stfd	f1,8(rd)
-        stfd	f2,16(rd)
-        stfd	f3,24(rd)
-        addi	rd,rd,32
-        bdnz	1b
-2:									// rc = remaining bytes (0-31)
-        mtxer	rc					// set up count for string ops
-        mr		r0,rd				// move dest ptr out of the way
-        lswx	r5,0,rs				// load xer bytes into r5-r12 (rs==r4)
-        stswx	r5,0,r0				// store them
-        blr
-
-        
-// Long, reverse moves.
-//		cr5 = beq if relatively word aligned
-
-LLongReverse:
-        add		rd,rd,rc			// point to end of operands + 1
-        add		rs,rs,rc
-        beq		cr5,LReverseFloat	// aligned operands so can use FPRs
-        srwi	r0,rc,5				// get chunk count
-        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
-        mtctr	r0					// set up loop count
-        mtxer	rc					// set up for trailing bytes
-1:
-        lwz		w1,-4(rs)
-        lwz		w2,-8(rs)
-        lwz		w3,-12(rs)
-        lwz		w4,-16(rs)
-        stw		w1,-4(rd)
-        lwz		w5,-20(rs)
-        stw		w2,-8(rd)
-        lwz		w6,-24(rs)
-        stw		w3,-12(rd)
-        lwz		w7,-28(rs)
-        stw		w4,-16(rd)
-        lwzu	w8,-32(rs)
-        stw		w5,-20(rd)
-        stw		w6,-24(rd)
-        stw		w7,-28(rd)
-        stwu	w8,-32(rd)
-        bdnz	1b
-
-        sub		r4,rs,rc			// point to 1st (leftmost) leftover byte (0..31)
-        sub		r0,rd,rc			// move dest ptr out of way
-        lswx	r5,0,r4				// load xer bytes into r5-r12
-        stswx	r5,0,r0				// store them
-        blr
-
-
-// Long, reverse aligned moves.  We use FPRs.
-
-LReverseFloat:
-        andi.	w4,rd,7				// W3 <- #bytes to doubleword-align destination
-        sub		rc,rc,w4			// adjust count for alignment
-        srwi	r0,rc,5				// number of 32-byte chunks to xfer
-        rlwinm	rc,rc,0,0x1F		// mask down to leftover bytes
-        mtctr	r0					// set up loop count
-        beq		1f					// dest already doubleword aligned
-    
-// Doubleword align the destination.
-        
-        mtxer	w4					// byte count to xer
-        cmpwi	r0,0				// any chunks to xfer?
-        sub		rs,rs,w4			// point to 1st bytes to xfer
-        sub		rd,rd,w4
-        lswx	w1,0,rs				// move w3 bytes to align dest
-        stswx	w1,0,rd
-        beq-	2f					// pathologic case, no chunks to xfer
-1:
-        lfd		f0,-8(rs)
-        lfd		f1,-16(rs)
-        lfd		f2,-24(rs)
-        lfdu	f3,-32(rs)
-        stfd	f0,-8(rd)
-        stfd	f1,-16(rd)
-        stfd	f2,-24(rd)
-        stfdu	f3,-32(rd)
-        bdnz	1b
-2:									// rc = remaining bytes (0-31)
-        mtxer	rc					// set up count for string ops
-        sub		r4,rs,rc			// point to 1st (leftmost) leftover byte (0..31)
-        sub		r0,rd,rc			// move dest ptr out of way
-        lswx	r5,0,r4				// load xer bytes into r5-r12
-        stswx	r5,0,r0				// store them
-        blr
-
-	COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)
diff --git a/osfmk/ppc/commpage/bcopy_g4.s b/osfmk/ppc/commpage/bcopy_g4.s
deleted file mode 100644
index 0d901ab20..000000000
--- a/osfmk/ppc/commpage/bcopy_g4.s
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* =======================================
- * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
- * =======================================
- *
- * Version of 2/20/2003, tuned for G4.  The inner loops use DCBA to avoid
- * reading destination cache lines.  Only the 7450 actually benefits from
- * this, and then only in the cold-cache case.  On 7400s and 7455s, we
- * patch the DCBAs into NOPs.
- *
- * Register usage.  Note we use R2, so this code will not run in a PEF/CFM
- * environment.  Note also the rather delicate way we assign multiple uses
- * to the same register.  Beware.
- *
- *   r0  = "w7" or temp (NB: cannot use r0 for any constant such as "c16")
- *   r2  = "w8" or vrsave ("rv")
- *   r3  = not used, as memcpy and memmove return 1st parameter as a value
- *   r4  = source ptr ("rs")
- *   r5  = count of bytes to move ("rc")
- *   r6  = "w1", "c16", or "cm17"
- *   r7  = "w2", "c32", or "cm33"
- *   r8  = "w3", "c48", or "cm49"
- *   r9  = "w4", or "cm1"
- *   r10 = "w5", "c96", or "cm97"
- *   r11 = "w6", "c128", or "cm129"
- *   r12 = destination ptr ("rd")
- *   v0  = permute vector ("vp") 
- * v1-v4 = qw's loaded from source
- * v5-v7 = permuted qw's ("vw", "vx", "vy")
- */
-#define rs	r4
-#define rd	r12
-#define rc	r5
-#define	rv	r2
-
-#define w1	r6
-#define w2	r7
-#define w3	r8
-#define	w4	r9
-#define	w5	r10
-#define	w6	r11
-#define	w7	r0
-#define	w8	r2
-
-#define c16		r6
-#define cm17	r6
-#define c32		r7
-#define cm33	r7
-#define c48		r8
-#define cm49	r8
-#define cm1		r9
-#define c96		r10
-#define cm97	r10
-#define c128	r11
-#define cm129	r11
-
-#define	vp	v0
-#define	vw	v5
-#define	vx	v6
-#define	vy	v7
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-
-#define	kMedium		32				// too long for inline loopless code
-#define	kLong		96				// long enough to justify use of Altivec
-
-
-// Main entry points.
-
-        .align 	5
-bcopy_g4:							// void bcopy(const void *src, void *dst, size_t len)
-        cmplwi	rc,kMedium			// short or long?
-        sub		w1,r4,r3			// must move in reverse if (rd-rs)<rc
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        mr		rd,r4				// start to move registers to canonic spot
-        mr		rs,r3
-        blt+	LShort				// handle short operands
-        dcbt	0,r3				// touch in destination
-        b		LMedium				// join medium/long operand code
-
-// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
-        
-        .align	5
-Lmemcpy_g4:							// void* memcpy(void *dst, void *src, size_t len)
-Lmemmove_g4:						// void* memmove(void *dst, const void *src, size_t len)
-        cmplwi	rc,kMedium			// short or long?
-        sub		w1,r3,r4			// must move in reverse if (rd-rs)<rc
-        dcbt	0,r4				// touch in the first line of source
-        cmplw	cr1,w1,rc			// set cr1 blt iff we must move reverse
-        mr		rd,r3				// must leave r3 alone, it is return value for memcpy etc
-        bge-	LMedium				// handle medium or long operands
-
-// Handle short operands.
-        
-LShort:
-        andi.	r0,rc,0x10			// test bit 27 separately (faster on G4)
-        mtcrf	0x01,rc				// put length bits 28-31 in cr7
-        blt-	cr1,LShortReverse
-        
-// Forward short operands.  This is the most frequent case, so it is inline.
-
-        beq		LShort16			// quadword to move?
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        lwz		w3,8(rs)
-        lwz		w4,12(rs)
-        addi	rs,rs,16
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        stw		w3,8(rd)
-        stw		w4,12(rd)
-        addi	rd,rd,16
-LShort16:							// join here to xfer 0-15 bytes
-        bf		28,2f				// doubleword?
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        addi	rs,rs,8
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        addi	rd,rd,8
-2:
-        bf		29,3f				// word?
-        lwz		w1,0(rs)
-        addi	rs,rs,4
-        stw		w1,0(rd)
-        addi	rd,rd,4
-3:
-        bf		30,4f				// halfword to move?
-        lhz		w1,0(rs)
-        addi	rs,rs,2
-        sth		w1,0(rd)
-        addi	rd,rd,2
-4:
-        bflr	31					// skip if no odd byte
-        lbz		w1,0(rs)
-        stb		w1,0(rd)
-        blr
-        
-        
-// Handle short reverse operands.
-//		cr0 = bne if bit 27 of length is set
-//		cr7 = bits 28-31 of length      
-
-LShortReverse:
-        add		rs,rs,rc			// adjust ptrs for reverse move
-        add		rd,rd,rc
-        beq		LShortReverse16		// quadword to move?
-        lwz		w1,-4(rs)
-        lwz		w2,-8(rs)
-        lwz		w3,-12(rs)
-        lwzu	w4,-16(rs)
-        stw		w1,-4(rd)
-        stw		w2,-8(rd)
-        stw		w3,-12(rd)
-        stwu	w4,-16(rd)
-LShortReverse16:					// join here to xfer 0-15 bytes and return
-        bf		28,2f				// doubleword?
-        lwz		w1,-4(rs)
-        lwzu	w2,-8(rs)
-        stw		w1,-4(rd)
-        stwu	w2,-8(rd)
-2:
-        bf		29,3f				// word?
-        lwzu	w1,-4(rs)
-        stwu	w1,-4(rd)
-3:
-        bf		30,4f				// halfword to move?
-        lhzu	w1,-2(rs)
-        sthu	w1,-2(rd)
-4:
-        bflr	31					// done if no odd byte
-        lbz 	w1,-1(rs)			// no update
-        stb 	w1,-1(rd)
-        blr
-        
-
-// Medium and long operands.  Use Altivec if long enough, else scalar loops.
-//		w1 = (rd-rs), used to check for alignment
-//     cr1 = blt iff we must move reverse
-
-        .align	4
-LMedium:
-        dcbtst	0,rd				// touch in destination
-        cmplwi	cr7,rc,kLong		// long enough for vectors?
-        neg		w3,rd				// start to compute #bytes to align destination
-        rlwinm	r0,w1,0,0x7			// check relative 8-byte alignment
-        andi.	w6,w3,7				// w6 <- #bytes to 8-byte align destination
-        blt		cr1,LMediumReverse	// handle reverse moves
-        rlwinm	w4,w3,0,0x1F		// w4 <- #bytes to 32-byte align destination
-        cmpwi	cr6,r0,0			// set cr6 beq if relatively aligned
-        bge		cr7,LFwdLong		// long enough for vectors
-
-// Medium length: use scalar loops.
-//	w6/cr0 = #bytes to 8-byte align destination
-//	   cr6 = beq if relatively doubleword aligned
-
-        sub		rc,rc,w6			// decrement length remaining
-        beq		1f					// skip if dest already doubleword aligned
-        mtxer	w6					// set up count for move
-        lswx	w1,0,rs				// move w6 bytes to align destination
-        stswx	w1,0,rd
-        add		rs,rs,w6			// bump ptrs past
-        add		rd,rd,w6
-1:        
-        srwi	r0,rc,4				// get # 16-byte chunks (>=1)
-        mtcrf	0x01,rc				// save remaining byte count here for LShort16
-        mtctr	r0					// set up 16-byte loop
-        bne		cr6,3f				// source not 4-byte aligned
-        b		2f
-        
-        .align	4
-2:									// loop over 16-byte  aligned chunks
-        lfd		f0,0(rs)
-        lfd		f1,8(rs)
-        addi	rs,rs,16
-        stfd	f0,0(rd)
-        stfd	f1,8(rd)
-        addi	rd,rd,16
-        bdnz	2b
-        
-        b		LShort16
-        
-        .align	4
-3:									// loop over 16-byte unaligned chunks
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        lwz		w3,8(rs)
-        lwz		w4,12(rs)
-        addi	rs,rs,16
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        stw		w3,8(rd)
-        stw		w4,12(rd)
-        addi	rd,rd,16
-        bdnz	3b
-        
-        b		LShort16
-
-
-// Vector loops.  First, we must 32-byte align the destination.
-//		w1 = (rd-rs), used to check for reverse and alignment
-//		w4 = #bytes to 32-byte align destination
-//		rc = long enough for at least one vector loop
-
-LFwdLong:
-        cmpwi	w4,0				// dest already aligned?
-        sub		rc,rc,w4			// adjust length
-        mtcrf	0x01,w4				// cr7 <- #bytes to align dest
-        rlwinm	w2,w1,0,0xF			// relatively 16-byte aligned?
-        mtcrf	0x02,w4				// finish moving #bytes to align to cr6 and cr7
-        srwi	r0,rc,6				// get # 64-byte chunks to xfer (>=1)
-        cmpwi	cr5,w2,0			// set cr5 beq if relatively 16-byte aligned
-        beq		LFwdAligned			// dest is already aligned
-        
-// 32-byte align destination.
-
-        bf		31,1f				// byte to move?
-        lbz		w1,0(rs)
-        addi	rs,rs,1
-        stb		w1,0(rd)
-        addi	rd,rd,1
-1:
-        bf		30,2f				// halfword?
-        lhz		w1,0(rs)
-        addi	rs,rs,2
-        sth		w1,0(rd)
-        addi	rd,rd,2
-2:
-        bf		29,3f				// word?
-        lwz		w1,0(rs)
-        addi	rs,rs,4
-        stw		w1,0(rd)
-        addi	rd,rd,4
-3:
-        bf		28,4f				// doubleword?
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        addi	rs,rs,8
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        addi	rd,rd,8
-4:	
-        bf		27,LFwdAligned		// quadword?
-        lwz		w1,0(rs)
-        lwz		w2,4(rs)
-        lwz		w3,8(rs)
-        lwz		w4,12(rs)
-        addi	rs,rs,16
-        stw		w1,0(rd)
-        stw		w2,4(rd)
-        stw		w3,8(rd)
-        stw		w4,12(rd)
-        addi	rd,rd,16
-
-
-// Destination is 32-byte aligned.
-//		r0 = count of 64-byte chunks to move (not 0)
-//		rd = 32-byte aligned
-//		rc = bytes remaining
-//	   cr5 = beq if source is 16-byte aligned
-// We set up many registers:
-//	   ctr = number of 64-byte chunks to move
-//	r0/cr0 = leftover QWs to move
-//	   cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-//	   cr6 = beq if leftover byte count is 0
-//		rv = original value of vrsave
-// c16 etc = loaded
-
-LFwdAligned:
-        mfspr	rv,vrsave			// get bitmap of live vector registers
-        mtcrf	0x01,rc				// move leftover count to cr7 for LShort16
-        rlwinm	w3,rc,0,28,31		// move last 0-15 byte count to w3
-        mtctr	r0					// set up loop count
-        cmpwi	cr6,w3,0			// set cr6 on leftover byte count
-        oris	w1,rv,0xFF00		// we use v0-v7
-        rlwinm.	r0,rc,28,30,31		// get number of quadword leftovers (0-3) and set cr0
-        mtspr	vrsave,w1			// update mask
-        li		c16,16				// get constants used in ldvx/stvx
-        li		c32,32
-        li		c48,48
-        li		c96,96
-        li		c128,128
-        bne		cr5,LForwardVecUnal	// handle unaligned operands
-        b		1f
-
-        .align	4
-1:        							// loop over 64-byte chunks
-        dcbt	c96,rs
-        dcbt	c128,rs
-        lvx		v1,0,rs
-        lvx		v2,c16,rs
-        lvx		v3,c32,rs
-        lvx		v4,c48,rs
-        addi	rs,rs,64
-        dcba	0,rd				// patched to NOP on some machines
-        stvx	v1,0,rd
-        stvx	v2,c16,rd
-        dcba	c32,rd				// patched to NOP on some machines
-        stvx	v3,c32,rd
-        stvx	v4,c48,rd
-        addi	rd,rd,64
-        bdnz	1b
-                
-        beq		4f					// no leftover quadwords
-        mtctr	r0
-3:									// loop over remaining quadwords (1-3)
-        lvx		v1,0,rs
-        addi	rs,rs,16
-        stvx	v1,0,rd
-        addi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShort16		// handle last 0-15 bytes if any
-        blr
-        
-
-// Long, forward, unaligned vector loop.
-
-LForwardVecUnal:
-        lvsl	vp,0,rs				// get permute vector to shift left
-        lvx		v1,0,rs				// prefetch 1st source quadword
-        b		1f
-
-        .align	4					// align inner loops
-1:									// loop over 64-byte chunks
-        lvx		v2,c16,rs
-        dcbt	c96,rs
-        lvx		v3,c32,rs
-        dcbt	c128,rs
-        lvx		v4,c48,rs
-        addi	rs,rs,64
-        vperm	vw,v1,v2,vp
-        lvx		v1,0,rs
-        vperm	vx,v2,v3,vp
-        dcba	0,rd				// patched to NOP on some machines
-        stvx	vw,0,rd
-        vperm	vy,v3,v4,vp
-        stvx	vx,c16,rd
-        vperm	vw,v4,v1,vp
-        dcba	c32,rd				// patched to NOP on some machines
-        stvx	vy,c32,rd
-        stvx	vw,c48,rd
-        addi	rd,rd,64
-        bdnz	1b
-
-        beq-	4f					// no leftover quadwords
-        mtctr	r0
-3:									// loop over remaining quadwords
-        lvx		v2,c16,rs
-        addi	rs,rs,16
-        vperm	vx,v1,v2,vp
-        vor		v1,v2,v2			// v1 <- v2
-        stvx	vx,0,rd
-        addi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShort16		// handle last 0-15 bytes if any
-        blr
-        
-
-// Medium and long, reverse moves.  We use altivec if the operands are long enough,
-// else a lwz/stx loop.
-//		w1 = (rd-rs), used to check for reverse and alignment
-//	   cr7 = bge if long
-
-LMediumReverse:
-        add		rd,rd,rc			// point to end of operands
-        add		rs,rs,rc
-        andi.	w4,rd,0x1F			// w4 <- #bytes to 32-byte align destination
-        rlwinm	w6,rd,0,0x3			// w6 <- #bytes to 4-byte align destination
-        bge		cr7,LLongReverse	// long enough for vectors
-
-// Scalar loop.
-//	    w6 = #bytes to 4-byte align destination
-
-        sub		rc,rc,w6			// decrement length remaining
-        mtxer	w6					// set up count for move
-        sub		rs,rs,w6			// back up ptrs
-        sub		rd,rd,w6
-        srwi	r0,rc,4				// get # 16-byte chunks (>=1)
-        mtcrf	0x01,rc				// set remaining byte count here for LShortReverse16
-        lswx	w1,0,rs				// move w6 bytes to align destination
-        stswx	w1,0,rd
-        mtctr	r0					// set up 16-byte loop
-        b		1f
-        
-        .align	4
-1:									// loop over 16-byte  aligned chunks
-        lwz		w1,-4(rs)
-        lwz		w2,-8(rs)
-        lwz		w3,-12(rs)
-        lwzu	w4,-16(rs)
-        stw		w1,-4(rd)
-        stw		w2,-8(rd)
-        stw		w3,-12(rd)
-        stwu	w4,-16(rd)
-        bdnz	1b
-        
-        b		LShortReverse16
-        
-
-// Reverse vector loops.  First, we must 32-byte align the destination.
-//		w1 = (rd-rs), used to check for reverse and alignment
-//	w4/cr0 = #bytes to 32-byte align destination
-//		rc = long enough for at least one vector loop
-
-LLongReverse:
-        sub		rc,rc,w4			// adjust length
-        mtcrf	0x01,w4				// cr7 <- #bytes to align dest
-        rlwinm	w2,w1,0,0xF			// relatively 16-byte aligned?
-        mtcrf	0x02,w4				// finish moving #bytes to align to cr6 and cr7
-        srwi	r0,rc,6				// get # 64-byte chunks to xfer (>=1)
-        cmpwi	cr5,w2,0			// set cr5 beq if relatively 16-byte aligned
-        beq		LReverseAligned		// dest is already aligned
-        
-// 32-byte align destination.
-
-        bf		31,1f				// byte to move?
-        lbzu 	w1,-1(rs)
-        stbu 	w1,-1(rd)
-1:
-        bf		30,2f				// halfword?
-        lhzu 	w1,-2(rs)
-        sthu 	w1,-2(rd)
-2:
-        bf		29,3f				// word?
-        lwzu 	w1,-4(rs)
-        stwu 	w1,-4(rd)
-3:
-        bf		28,4f				// doubleword?
-        lwz		w1,-4(rs)
-        lwzu	w2,-8(rs)
-        stw		w1,-4(rd)
-        stwu	w2,-8(rd)
-4:	
-        bf		27,LReverseAligned	// quadword?
-        lwz		w1,-4(rs)
-        lwz		w2,-8(rs)
-        lwz		w3,-12(rs)
-        lwzu	w4,-16(rs)
-        stw		w1,-4(rd)
-        stw		w2,-8(rd)
-        stw		w3,-12(rd)
-        stwu	w4,-16(rd)
-
-// Destination is 32-byte aligned.
-//		r0 = count of 64-byte chunks to move (not 0)
-//		rd = 32-byte aligned
-//		rc = bytes remaining
-//	   cr5 = beq if source is 16-byte aligned
-// We set up many registers:
-//	   ctr = number of 64-byte chunks to move
-//	r0/cr0 = leftover QWs to move
-//	   cr7 = low 4 bits of rc (ie, leftover byte count 0-15)
-//	   cr6 = beq if leftover byte count is 0
-//		rv = original value of vrsave
-// cm1 etc = loaded
-        
-LReverseAligned:
-        mfspr	rv,vrsave			// get bitmap of live vector registers
-        mtcrf	0x01,rc				// move leftover count to cr7 for LShort16
-        rlwinm	w3,rc,0,28,31		// move last 0-15 byte count to w3
-        mtctr	r0					// set up loop count
-        cmpwi	cr6,w3,0			// set cr6 on leftover byte count
-        oris	w1,rv,0xFF00		// we use v0-v7
-        rlwinm.	r0,rc,28,30,31		// get number of quadword leftovers (0-3) and set cr0
-        mtspr	vrsave,w1			// update mask
-        li		cm1,-1				// get constants used in ldvx/stvx
-        li		cm17,-17
-        li		cm33,-33
-        li		cm49,-49
-        li		cm97,-97
-        li		cm129,-129        
-        bne		cr5,LReverseVecUnal	// handle unaligned operands
-        b		1f
-      
-        .align	4					// align inner loops
-1:        							// loop over 64-byte chunks
-        dcbt	cm97,rs
-        dcbt	cm129,rs
-        lvx		v1,cm1,rs
-        lvx		v2,cm17,rs
-        lvx		v3,cm33,rs
-        lvx		v4,cm49,rs
-        subi	rs,rs,64
-        stvx	v1,cm1,rd
-        stvx	v2,cm17,rd
-        stvx	v3,cm33,rd
-        stvx	v4,cm49,rd
-        subi	rd,rd,64
-        bdnz	1b
-
-        beq		4f					// no leftover quadwords
-        mtctr	r0
-3:									// loop over remaining quadwords (1-7)
-        lvx		v1,cm1,rs
-        subi	rs,rs,16
-        stvx	v1,cm1,rd
-        subi	rd,rd,16
-        bdnz	3b
-4:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShortReverse16	// handle last 0-15 bytes if any
-        blr
-
-
-// Long, reverse, unaligned vector loop.
-
-LReverseVecUnal:
-        lvsl	vp,0,rs				// get permute vector to shift left
-        lvx		v1,cm1,rs			// v1 always looks ahead
-        b		1f
-        
-        .align	4					// align the inner loops
-1:									// loop over 64-byte chunks
-        lvx		v2,cm17,rs
-        dcbt	cm97,rs
-        lvx		v3,cm33,rs
-        dcbt	cm129,rs
-        lvx		v4,cm49,rs
-        subi	rs,rs,64
-        vperm	vw,v2,v1,vp
-        lvx		v1,cm1,rs
-        vperm	vx,v3,v2,vp
-        stvx	vw,cm1,rd
-        vperm	vy,v4,v3,vp
-        stvx	vx,cm17,rd
-        vperm	vw,v1,v4,vp
-        stvx	vy,cm33,rd
-        stvx	vw,cm49,rd
-        subi	rd,rd,64
-        bdnz	1b
-        
-        beq		3f					// no leftover quadwords
-        mtctr	r0
-2:									// loop over 1-3 quadwords
-        lvx		v2,cm17,rs
-        subi	rs,rs,16
-        vperm	vx,v2,v1,vp
-        vor		v1,v2,v2			// v1 <- v2
-        stvx	vx,cm1,rd
-        subi	rd,rd,16
-        bdnz	2b
-3:
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        bne		cr6,LShortReverse16	// handle last 0-15 bytes iff any
-        blr
-
-	COMMPAGE_DESCRIPTOR(bcopy_g4,_COMM_PAGE_BCOPY,kHasAltivec,k64Bit,kCommPageDCBA+kCommPage32)
diff --git a/osfmk/ppc/commpage/bigcopy_970.s b/osfmk/ppc/commpage/bigcopy_970.s
deleted file mode 100644
index add093ea3..000000000
--- a/osfmk/ppc/commpage/bigcopy_970.s
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* ====================================
- * Very Long Operand BCOPY for Mac OS X
- * ====================================
- *
- * Version of 2/21/2004, tuned for the IBM 970.  This is for operands at
- * least several pages long.  It is called from bcopy()/memcpy()/memmove(),
- * and runs both in 32 and 64-bit mode.
- *
- * We use the following additional strategies not used by the shorter
- * operand paths.  Mostly, we try to optimize for memory bandwidth:
- *	1. Use DCBZ128 to avoid reading destination lines.  Because this code
- *     resides on the commmpage, it can use a private interface with the
- *     kernel to minimize alignment exceptions if the destination is
- *     uncached.  The kernel will clear cr7 whenever it emulates a DCBZ or
- *     DCBZ128 on the commpage.  Thus we take at most one exception per call,
- *     which is amortized across the very long operand.
- *	2. Copy larger chunks per iteration to minimize R/W bus turnaround
- *     and maximize DRAM page locality (opening a new page is expensive.)
- *     We use 256-byte chunks.
- *  3. Touch in one source chunk ahead with DCBT.  This is probably the
- *     least important change, and probably only helps restart the
- *     hardware stream at the start of each source page.
- */
- 
-#define rs	r13
-#define rd	r14
-#define rc	r15
-#define rx  r16
-
-#define c16     r3
-#define c32     r4
-#define c48     r5
-#define c64     r6
-#define c80     r7
-#define c96     r8
-#define c112    r9
-#define	c256	r10
-#define	c384	r11
-#define rv      r12     // vrsave
-
-// Offsets within the "red zone" (which is 224 bytes long):
-
-#define rzR3    -8
-#define rzR13	-16
-#define rzR14	-24
-#define rzR15   -32
-#define rzR16   -40
-
-#define rzV20	-64
-#define rzV21	-80
-#define rzV22	-96
-#define rzV23	-112
-
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-/*
- * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
- * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
- * simple transformations:
- *      - all word compares are changed to doubleword
- *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
- * Nothing else is done.  For this to work, the following rules must be
- * carefully followed:
- *      - do not use carry or overflow
- *      - only use record mode if you are sure the results are mode-invariant
- *        for example, all "andi." and almost all "rlwinm." are fine
- *      - do not use "slwi", "slw", or "srw"
- * An imaginative programmer could break the porting model in other ways, but the above
- * are the most likely problem areas.  It is perhaps surprising how well in practice
- * this simple method works.
- */
-
-// Entry point.  This is a subroutine of bcopy().  When called:
-//  r0 = return address (also stored in caller's SF)
-//	r4 = source ptr
-//	r5 = length (at least several pages)
-// r12 = dest ptr
-// 
-// We only do "forward" moves, ie non-overlapping or toward 0.  We return with non-volatiles
-// and r3 preserved.
-
-        .align 	5
-bigcopy_970:
-        neg     r2,r12              // is destination cache-line-aligned?
-        std     r3,rzR3(r1)         // save caller's r3, which must be preserved for memcpy()
-        std		r13,rzR13(r1)		// spill non-volatile regs we use to redzone
-        std		r14,rzR14(r1)
-        std		r15,rzR15(r1)
-        andi.   r2,r2,0x7F          // #bytes to align
-        std     r16,rzR16(r1)
-        mr      rs,r4               // copy parameters into nonvolatile registers
-        mr      rd,r12
-        mr      rc,r5
-        mr      rx,r0               // also save return address
-        beq     1f                  // skip if already aligned
-
-// Cache-line-align destination.
-        
-        mr      r3,rd               // set up dest ptr for memcpy()
-        mr      r5,r2               // number of bytes to copy
-        add     rs,rs,r2            // then bump our parameters past initial copy
-        add     rd,rd,r2
-        sub     rc,rc,r2
-        bla     _COMM_PAGE_MEMCPY   // 128-byte-align destination
-
-
-// Load constant offsets and check whether source is 16-byte aligned.
-// NB: the kernel clears cr7 if it emulates a dcbz128 on the commpage,
-// and we dcbz only if cr7 beq is set.
-
-1:
-        dcbt    0,rs                // touch in 1st line of source
-        andi.	r0,rs,15			// check source alignment
-        mfspr	rv,vrsave			// save caller's bitmask
-        li		c16,16				// load the constant offsets for x-form ops
-        li		c32,32
-        srwi    r2,rc,8             // get number of 256-byte chunks to xfer
-        li		r0,-256				// we use 24 VRs (ie, 0-23)
-        li		c48,48
-        li      c64,64
-        li      c80,80
-        or      r0,r0,rv            // add our bits to caller's
-        li      c96,96
-        mtctr   r2                  // set up loop count
-        li      c112,112
-        cmpd    cr7,r2,r2           // initialize cr7_eq to "on", so we dcbz128
-        mtspr	vrsave,r0           // say we use vr0..vr23
-        li		c256,256
-        li		c384,384
-        beq		LalignedLoop		// handle aligned sources
-
-        
-// Set up for unaligned loop.
-
-        lvsl	v0,0,rs				// get permute vector for left shift
-        lvxl	v1,0,rs				// prime the loop
-        li		r0,rzV20            // save non-volatile VRs in redzone
-        stvx	v20,r1,r0
-        li		r0,rzV21
-        stvx	v21,r1,r0
-        li		r0,rzV22
-        stvx	v22,r1,r0
-        li		r0,rzV23
-        stvx	v23,r1,r0
-        b		LunalignedLoop		// enter unaligned loop
-
-
-// Main loop for unaligned operands.  We loop over 256-byte chunks (2 cache lines).
-// Destination is 128-byte aligned, source is unaligned.
-
-        .align	5
-LunalignedLoop:
-        dcbt	c256,rs             // touch in next chunk
-        dcbt	c384,rs
-        addi    r2,rs,128           // point to 2nd 128 bytes of source
-        lvxl	v2,c16,rs
-        lvxl	v3,c32,rs
-        lvxl	v4,c48,rs
-        lvxl    v5,c64,rs
-        lvxl    v6,c80,rs
-        lvxl    v7,c96,rs
-        lvxl    v8,c112,rs
-        lvxl    v9,0,r2
-        addi    rs,rs,256           // point to next source chunk
-        lvxl    v10,c16,r2
-        lvxl    v11,c32,r2
-        vperm   v17,v1,v2,v0
-        lvxl    v12,c48,r2
-        lvxl    v13,c64,r2
-        vperm   v18,v2,v3,v0
-        lvxl    v14,c80,r2
-        lvxl    v15,c96,r2
-        vperm   v19,v3,v4,v0
-        lvxl    v16,c112,r2
-        lvxl	v1,0,rs             // peek ahead at first source quad in next chunk
-        vperm   v20,v4,v5,v0
-        addi    r2,rd,128           // point to 2nd 128 bytes of dest 
-        bne--	cr7,1f				// skip dcbz's if cr7 beq has been turned off by kernel
-        dcbz128	0,rd
-        dcbz128	0,r2
-1:
-        vperm   v21,v5,v6,v0
-        stvxl	v17,0,rd
-        vperm   v22,v6,v7,v0
-        stvxl	v18,c16,rd
-        vperm   v23,v7,v8,v0
-        stvxl	v19,c32,rd
-        vperm   v17,v8,v9,v0
-        stvxl	v20,c48,rd
-        vperm   v18,v9,v10,v0
-        stvxl	v21,c64,rd
-        vperm   v19,v10,v11,v0
-        stvxl	v22,c80,rd
-        vperm   v20,v11,v12,v0
-        stvxl	v23,c96,rd
-        vperm   v21,v12,v13,v0
-        stvxl	v17,c112,rd
-        vperm   v22,v13,v14,v0
-        addi	rd,rd,256           // point to next dest chunk
-        stvxl	v18,0,r2
-        vperm   v23,v14,v15,v0
-        stvxl	v19,c16,r2
-        vperm   v17,v15,v16,v0
-        stvxl	v20,c32,r2
-        vperm   v18,v16,v1,v0
-        stvxl	v21,c48,r2
-        stvxl	v22,c64,r2
-        stvxl	v23,c80,r2
-        stvxl	v17,c96,r2
-        stvxl	v18,c112,r2
-        bdnz++	LunalignedLoop      // loop if another 256 bytes to go
-
-        li		r6,rzV20            // restore non-volatile VRs
-        li		r7,rzV21
-        li		r8,rzV22
-        li		r9,rzV23
-        lvx		v20,r1,r6
-        lvx		v21,r1,r7
-        lvx		v22,r1,r8
-        lvx		v23,r1,r9
-        b       Ldone
-        
-        
-// Aligned loop.  Destination is 128-byte aligned, and source is 16-byte
-// aligned.  Loop over 256-byte chunks (2 cache lines.)
-
-        .align	5
-LalignedLoop:
-        dcbt	c256,rs             // touch in next chunk
-        dcbt	c384,rs
-        addi    r2,rs,128           // point to 2nd 128 bytes of source
-        lvxl	v1,0,rs
-        lvxl	v2,c16,rs
-        lvxl	v3,c32,rs
-        lvxl	v4,c48,rs
-        lvxl    v5,c64,rs
-        lvxl    v6,c80,rs
-        lvxl    v7,c96,rs
-        lvxl    v8,c112,rs
-        lvxl    v9,0,r2
-        lvxl    v10,c16,r2
-        lvxl    v11,c32,r2
-        lvxl    v12,c48,r2
-        lvxl    v13,c64,r2
-        lvxl    v14,c80,r2
-        lvxl    v15,c96,r2
-        lvxl    v16,c112,r2
-        addi    r2,rd,128           // point to 2nd 128 bytes of dest 
-        bne--	cr7,1f				// skip dcbz's if cr7 beq has been turned off by kernel
-        dcbz128	0,rd
-        dcbz128	0,r2
-1:
-        addi    rs,rs,256           // point to next source chunk
-        stvxl	v1,0,rd
-        stvxl	v2,c16,rd
-        stvxl	v3,c32,rd
-        stvxl	v4,c48,rd
-        stvxl	v5,c64,rd
-        stvxl	v6,c80,rd
-        stvxl	v7,c96,rd
-        stvxl	v8,c112,rd
-        addi	rd,rd,256           // point to next dest chunk
-        stvxl	v9,0,r2
-        stvxl	v10,c16,r2
-        stvxl	v11,c32,r2
-        stvxl	v12,c48,r2
-        stvxl	v13,c64,r2
-        stvxl	v14,c80,r2
-        stvxl	v15,c96,r2
-        stvxl	v16,c112,r2
-        bdnz++	LalignedLoop		// loop if another 256 bytes to go
-
-
-// Done, except for 0..255 leftover bytes at end.
-//	rs = source ptr
-//	rd = dest ptr
-//	rc = remaining count in low 7 bits
-//	rv = caller's vrsave
-//  rx = caller's return address
-
-Ldone:
-        andi.   r5,rc,0xFF          // any leftover bytes? (0..255)
-        mtspr	vrsave,rv			// restore bitmap of live vr's
-        
-        mr      r3,rd
-        mr      r4,rs
-        bnela   _COMM_PAGE_MEMCPY   // copy leftover bytes
-
-        mtlr    rx                  // restore return address
-        ld      r3,rzR3(r1)         // restore non-volatile GPRs from redzone
-        ld		r13,rzR13(r1)
-        ld		r14,rzR14(r1)
-        ld		r15,rzR15(r1)
-        ld      r16,rzR16(r1)
-        blr
-
-
-        COMMPAGE_DESCRIPTOR(bigcopy_970,_COMM_PAGE_BIGCOPY,0,0,kPort32to64+kCommPageBoth)
-
diff --git a/osfmk/ppc/commpage/bzero_128.s b/osfmk/ppc/commpage/bzero_128.s
deleted file mode 100644
index f22198478..000000000
--- a/osfmk/ppc/commpage/bzero_128.s
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-/*
- * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
- * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
- * simple transformations:
- *      - all word compares are changed to doubleword
- *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
- * Nothing else is done.  For this to work, the following rules must be
- * carefully followed:
- *      - do not use carry or overflow
- *      - only use record mode if you are sure the results are mode-invariant
- *        for example, all "andi." and almost all "rlwinm." are fine
- *      - do not use "slwi", "slw", or "srw"
- * An imaginative programmer could break the porting model in other ways, but the above
- * are the most likely problem areas.  It is perhaps surprising how well in practice
- * this simple method works.
- */        
-
-// **********************
-// * B Z E R O _ 1 2 8  *
-// **********************
-//
-// For 64-bit processors with a 128-byte cache line.
-//
-// Register use:
-//		r0 = zero
-//		r3 = original ptr, not changed since memset returns it
-//		r4 = count of bytes to set
-//		r9 = working operand ptr
-// WARNING: We do not touch r2 and r10-r12, which some callers depend on.
-
-        .align	5
-bzero_128:						// void	bzero(void *b, size_t len);
-        cmplwi	cr7,r4,128		// too short for DCBZ128?
-        li		r0,0			// get a 0
-        neg		r5,r3			// start to compute #bytes to align
-        mr		r9,r3			// make copy of operand ptr (can't change r3)
-        blt		cr7,Ltail		// length < 128, too short for DCBZ
-
-// At least 128 bytes long, so compute alignment and #cache blocks.
-
-        andi.	r5,r5,0x7F		// r5 <-  #bytes to 128-byte align
-        sub		r4,r4,r5		// adjust length
-        srwi	r8,r4,7			// r8 <- 128-byte chunks
-        rlwinm	r4,r4,0,0x7F	// mask length down to remaining bytes
-        mtctr	r8				// set up loop count
-        beq		Ldcbz			// skip if already aligned (r8!=0)
-        
-// 128-byte align
-
-        mtcrf	0x01,r5			// start to move #bytes to align to cr6 and cr7
-        cmpwi	cr1,r8,0		// any 128-byte cache lines to 0?
-        mtcrf	0x02,r5
-        
-        bf		31,1f			// byte?
-        stb		r0,0(r9)
-        addi	r9,r9,1
-1:
-        bf		30,2f			// halfword?
-        sth		r0,0(r9)
-        addi	r9,r9,2
-2:
-        bf		29,3f			// word?
-        stw		r0,0(r9)
-        addi	r9,r9,4
-3:
-        bf		28,4f			// doubleword?
-        std		r0,0(r9)
-        addi	r9,r9,8
-4:
-        bf		27,5f			// quadword?
-        std		r0,0(r9)
-        std		r0,8(r9)
-        addi	r9,r9,16
-5:
-        bf		26,6f			// 32-byte chunk?
-        std		r0,0(r9)
-        std		r0,8(r9)
-        std		r0,16(r9)
-        std		r0,24(r9)
-        addi	r9,r9,32
-6:
-        bf		25,7f			// 64-byte chunk?
-        std		r0,0(r9)
-        std		r0,8(r9)
-        std		r0,16(r9)
-        std		r0,24(r9)
-        std		r0,32(r9)
-        std		r0,40(r9)
-        std		r0,48(r9)
-        std		r0,56(r9)
-        addi	r9,r9,64
-7:
-        beq		cr1,Ltail		// no chunks to dcbz128
-
-// Loop doing 128-byte version of DCBZ instruction.
-// NB: if the memory is cache-inhibited, the kernel will clear cr7
-// when it emulates the alignment exception.  Eventually, we may want
-// to check for this case.
-
-Ldcbz:
-        dcbz128	0,r9			// zero another 32 bytes
-        addi	r9,r9,128
-        bdnz	Ldcbz
-
-// Store trailing bytes.
-//		r0 = 0
-//		r4 = count
-//		r9 = ptr
-
-Ltail:
-        srwi.	r5,r4,4			// r5 <- 16-byte chunks to 0
-        mtcrf	0x01,r4			// remaining byte count to cr7
-        mtctr	r5
-        beq		2f				// skip if no 16-byte chunks
-1:								// loop over 16-byte chunks
-        std		r0,0(r9)
-        std		r0,8(r9)
-        addi	r9,r9,16
-        bdnz	1b
-2:
-        bf		28,4f			// 8-byte chunk?
-        std		r0,0(r9)
-        addi	r9,r9,8
-4:
-        bf		29,5f			// word?
-        stw		r0,0(r9)
-        addi	r9,r9,4
-5:
-        bf		30,6f			// halfword?
-        sth		r0,0(r9)
-        addi	r9,r9,2
-6:
-        bflr	31				// byte?
-        stb		r0,0(r9)
-        blr
-
-	COMMPAGE_DESCRIPTOR(bzero_128,_COMM_PAGE_BZERO,kCache128+k64Bit,0, \
-				kCommPageMTCRF+kCommPageBoth+kPort32to64)
diff --git a/osfmk/ppc/commpage/bzero_32.s b/osfmk/ppc/commpage/bzero_32.s
deleted file mode 100644
index fe7653d6d..000000000
--- a/osfmk/ppc/commpage/bzero_32.s
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-        
-
-// *******************
-// * B Z E R O _ 3 2 *
-// *******************
-//
-// For 32-bit processors with a 32-byte cache line.
-//
-// Register use:
-//		r0 = zero
-//		r3 = original ptr, not changed since memset returns it
-//		r4 = count of bytes to set
-//		r9 = working operand ptr
-// We do not touch r2 and r10-r12, which some callers depend on.
-
-        .align	5
-bzero_32:						// void	bzero(void *b, size_t len);
-        cmplwi	cr7,r4,32		// too short for DCBZ?
-        li		r0,0			// get a 0
-        neg		r5,r3			// start to compute #bytes to align
-        mr		r9,r3			// make copy of operand ptr (can't change r3)
-        blt		cr7,Ltail		// length < 32, too short for DCBZ
-
-// At least 32 bytes long, so compute alignment and #cache blocks.
-
-        andi.	r5,r5,0x1F		// r5 <-  #bytes to 32-byte align
-        sub		r4,r4,r5		// adjust length
-        srwi	r8,r4,5			// r8 <- #32-byte chunks
-        cmpwi	cr1,r8,0		// any chunks?
-        mtctr	r8				// set up loop count
-        beq		1f				// skip if already 32-byte aligned (r8!=0)
-        
-// 32-byte align.  We just store 32 0s, rather than test and use conditional
-// branches.  We've already stored the first few bytes above.
-
-        stw		r0,0(r9)
-        stw		r0,4(r9)
-        stw		r0,8(r9)
-        stw		r0,12(r9)
-        stw		r0,16(r9)
-        stw		r0,20(r9)
-        stw		r0,24(r9)
-        stw		r0,28(r9)
-        add		r9,r9,r5		// now rp is 32-byte aligned
-        beq		cr1,Ltail		// skip if no 32-byte chunks
-
-// Loop doing 32-byte version of DCBZ instruction.
-// NB: we take alignment exceptions on cache-inhibited memory.
-// The kernel could be changed to zero cr7 when emulating a
-// dcbz (as it does on 64-bit processors), so we could avoid all
-// but the first.
-
-1:
-        andi.	r5,r4,0x1F		// will there be trailing bytes?
-        b		2f
-        .align	4
-2:
-        dcbz	0,r9			// zero another 32 bytes
-        addi	r9,r9,32
-        bdnz	2b
-        
-        beqlr					// no trailing bytes
-
-// Store trailing bytes.
-
-Ltail:
-        andi.	r5,r4,0x10		// test bit 27 separately
-        mtcrf	0x01,r4			// remaining byte count to cr7
-        
-        beq		2f				// no 16-byte chunks
-        stw		r0,0(r9)
-        stw		r0,4(r9)
-        stw		r0,8(r9)
-        stw		r0,12(r9)
-        addi	r9,r9,16
-2:
-        bf		28,4f			// 8-byte chunk?
-        stw		r0,0(r9)
-        stw		r0,4(r9)
-        addi	r9,r9,8
-4:
-        bf		29,5f			// word?
-        stw		r0,0(r9)
-        addi	r9,r9,4
-5:
-        bf		30,6f			// halfword?
-        sth		r0,0(r9)
-        addi	r9,r9,2
-6:
-        bflr	31				// byte?
-        stb		r0,0(r9)
-        blr
-
-	COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32)
diff --git a/osfmk/ppc/commpage/cacheflush.s b/osfmk/ppc/commpage/cacheflush.s
deleted file mode 100644
index 43d7452ea..000000000
--- a/osfmk/ppc/commpage/cacheflush.s
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-
-// *********************************************
-// * C O M M P A G E _ F L U S H _ D C A C H E *
-// *********************************************
-//
-// Note that this routine is called both in 32 and 64-bit mode.
-//
-//	r3 = ptr to 1st byte to flush
-//	r4 = length to flush (may be 0)
-
-commpage_flush_dcache:
-        mr.     r4,r4           // test length for 0 in mode-independent way
-        lhz		r5,_COMM_PAGE_CACHE_LINESIZE(0)
-        subi	r9,r5,1			// get (linesize-1)
-        and		r0,r3,r9		// get offset within line of 1st byte
-        add		r4,r4,r0		// adjust length so we flush them all
-        add		r4,r4,r9		// round length up...
-        andc	r4,r4,r9		// ...to multiple of cache lines
-        beqlr--					// length was 0, so exit
-1:
-        sub.	r4,r4,r5		// more to go?
-        dcbf	0,r3			// flush another line
-        add		r3,r3,r5
-        bne		1b
-        sync					// make sure lines are flushed before we return
-        blr
-        
-	COMMPAGE_DESCRIPTOR(commpage_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,0,0,kCommPageBoth)
-        
-        
-// *********************************************
-// * C O M M P A G E _ F L U S H _ I C A C H E *
-// *********************************************
-//
-// Note that this routine is called both in 32 and 64-bit mode.
-//
-//	r3 = ptr to 1st byte to flush
-//	r4 = length to flush (may be 0)
-
-commpage_flush_icache:
-        mr.     r4,r4           // test length for 0 in mode-independent way
-        lhz		r5,_COMM_PAGE_CACHE_LINESIZE(0)
-        subi	r9,r5,1			// get (linesize-1)
-        and		r0,r3,r9		// get offset within line of 1st byte
-        add		r4,r4,r0		// adjust length so we flush them all
-        mr		r7,r3			// copy ptr
-        add		r4,r4,r9		// round length up...
-        andc	r4,r4,r9		// ...to multiple of cache lines
-        mr		r6,r4			// copy length
-        beqlr--					// length was 0, so exit
-1:
-        sub.	r4,r4,r5		// more to go?
-        dcbf	0,r3			// flush another line
-        add		r3,r3,r5
-        bne		1b
-        sync					// make sure lines are flushed
-2:
-        sub.	r6,r6,r5		// more to go?
-        icbi	0,r7
-        add		r7,r7,r5
-        bne		2b
-        
-        // The following sync is only needed on MP machines, probably only on
-        // 7400-family MP machines.  But because we're not certain of this, and
-        // this isn't a speed critical routine, we are conservative and always sync.
-        
-        sync					// wait until other processors see the icbi's
-        isync					// make sure we haven't prefetched old instructions
-        
-        blr
-
-	COMMPAGE_DESCRIPTOR(commpage_flush_icache,_COMM_PAGE_FLUSH_ICACHE,0,0,kCommPageBoth)
-
-        
diff --git a/osfmk/ppc/commpage/commpage.c b/osfmk/ppc/commpage/commpage.c
deleted file mode 100644
index 6b0227322..000000000
--- a/osfmk/ppc/commpage/commpage.c
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/*
- *	Here's what to do if you want to add a new routine to the comm page:
- *
- *		1. Add a definition for it's address in osfmk/ppc/cpu_capabilities.h,
- *		   being careful to reserve room for future expansion.
- *
- *		2. Write one or more versions of the routine, each with it's own
- *		   commpage_descriptor.  The tricky part is getting the "special",
- *		   "musthave", and "canthave" fields right, so that exactly one
- *		   version of the routine is selected for every machine.
- *		   The source files should be in osfmk/ppc/commpage/.
- *
- *		3. Add a ptr to your new commpage_descriptor(s) in the "routines"
- *		   static array below.  Of course, you'll also have to declare them 
- *		   "extern".
- *
- *		4. Write the code in Libc to use the new routine.
- */
-
-#include <mach/mach_types.h>
-#include <mach/machine.h>
-#include <mach/vm_map.h>
-#include <ppc/exception.h>
-#include <ppc/machine_routines.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-#include <machine/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <ipc/ipc_port.h>
-
-extern	vm_map_t	commpage32_map;   // the 32-bit shared submap, set up in vm init
-extern  vm_map_t	commpage64_map;   // the 64-bit shared submap
-
-char	*commPagePtr32 = NULL;			// virtual address of 32-bit comm page in kernel map
-char	*commPagePtr64 = NULL;			// and 64-bit commpage
-int		_cpu_capabilities = 0;			// define the capability vector
-
-static	char	*next;					// next available byte in comm page
-static	int		cur_routine;			// comm page address of "current" routine
-static	int		matched;				// true if we've found a match for "current" routine
-static  char	*commPagePtr;			// virtual address in kernel of commpage we are working on
-
-extern	commpage_descriptor	compare_and_swap32_on32;
-extern	commpage_descriptor	compare_and_swap32_on64;
-extern	commpage_descriptor	compare_and_swap64;
-extern	commpage_descriptor	atomic_enqueue32;
-extern	commpage_descriptor	atomic_enqueue64;
-extern	commpage_descriptor	atomic_dequeue32_on32;
-extern	commpage_descriptor	atomic_dequeue32_on64;
-extern	commpage_descriptor	atomic_dequeue64;
-extern	commpage_descriptor	memory_barrier_up;
-extern	commpage_descriptor	memory_barrier_mp32;
-extern	commpage_descriptor	memory_barrier_mp64;
-extern	commpage_descriptor	atomic_add32;
-extern	commpage_descriptor	atomic_add64;
-extern	commpage_descriptor	mach_absolute_time_32;
-extern	commpage_descriptor	mach_absolute_time_64;
-extern	commpage_descriptor	mach_absolute_time_lp64;
-extern	commpage_descriptor	spinlock_32_try_mp;
-extern	commpage_descriptor	spinlock_32_try_up;
-extern	commpage_descriptor	spinlock_64_try_mp;
-extern	commpage_descriptor	spinlock_64_try_up;
-extern	commpage_descriptor	spinlock_32_lock_mp;
-extern	commpage_descriptor	spinlock_32_lock_up;
-extern	commpage_descriptor	spinlock_64_lock_mp;
-extern	commpage_descriptor	spinlock_64_lock_up;
-extern	commpage_descriptor	spinlock_32_unlock_mp;
-extern	commpage_descriptor	spinlock_32_unlock_up;
-extern	commpage_descriptor	spinlock_64_unlock_mp;
-extern	commpage_descriptor	spinlock_64_unlock_up;
-extern	commpage_descriptor	pthread_getspecific_sprg3_32;
-extern	commpage_descriptor	pthread_getspecific_sprg3_64;
-extern	commpage_descriptor	pthread_getspecific_uftrap;
-extern	commpage_descriptor	gettimeofday_32;
-extern	commpage_descriptor	gettimeofday_g5_32;
-extern	commpage_descriptor	gettimeofday_g5_64;
-extern	commpage_descriptor	commpage_flush_dcache;
-extern	commpage_descriptor	commpage_flush_icache;
-extern	commpage_descriptor	pthread_self_sprg3;
-extern	commpage_descriptor	pthread_self_uftrap;
-extern	commpage_descriptor	spinlock_relinquish;
-extern	commpage_descriptor	bzero_32;
-extern	commpage_descriptor	bzero_128;
-extern	commpage_descriptor	bcopy_g3;
-extern	commpage_descriptor	bcopy_g4;
-extern	commpage_descriptor	bcopy_970;
-extern	commpage_descriptor	bcopy_64;
-extern	commpage_descriptor	compare_and_swap32_on32b;
-extern	commpage_descriptor	compare_and_swap32_on64b;
-extern	commpage_descriptor	compare_and_swap64b;
-extern  commpage_descriptor memset_64;
-extern  commpage_descriptor memset_g3;
-extern  commpage_descriptor memset_g4;
-extern  commpage_descriptor memset_g5;
-extern	commpage_descriptor	bigcopy_970;
-
-/* The list of all possible commpage routines.  WARNING: the check for overlap
- * assumes that these routines are in strictly ascending order, sorted by address
- * in the commpage.  We panic if not.
- */
-static	commpage_descriptor	*routines[] = {
-    &compare_and_swap32_on32,
-    &compare_and_swap32_on64,
-    &compare_and_swap64,
-    &atomic_enqueue32,
-    &atomic_enqueue64,
-    &atomic_dequeue32_on32,
-    &atomic_dequeue32_on64,
-    &atomic_dequeue64,
-    &memory_barrier_up,
-    &memory_barrier_mp32,
-    &memory_barrier_mp64,
-    &atomic_add32,
-    &atomic_add64,
-    &mach_absolute_time_32,
-    &mach_absolute_time_64,
-    &mach_absolute_time_lp64,
-    &spinlock_32_try_mp,
-    &spinlock_32_try_up,
-    &spinlock_64_try_mp,
-    &spinlock_64_try_up,
-    &spinlock_32_lock_mp,
-    &spinlock_32_lock_up,
-    &spinlock_64_lock_mp,
-    &spinlock_64_lock_up,
-    &spinlock_32_unlock_mp,
-    &spinlock_32_unlock_up,
-    &spinlock_64_unlock_mp,
-    &spinlock_64_unlock_up,
-    &pthread_getspecific_sprg3_32,
-    &pthread_getspecific_sprg3_64,
-    &pthread_getspecific_uftrap,
-    &gettimeofday_32,
-    &gettimeofday_g5_32,
-    &gettimeofday_g5_64,
-    &commpage_flush_dcache,
-    &commpage_flush_icache,
-    &pthread_self_sprg3,
-    &pthread_self_uftrap,
-    &spinlock_relinquish,
-    &bzero_32,
-    &bzero_128,
-    &bcopy_g3,
-    &bcopy_g4,
-    &bcopy_970,
-    &bcopy_64,
-    &compare_and_swap32_on32b,
-    &compare_and_swap32_on64b,
-    &compare_and_swap64b,
-    &memset_64,
-    &memset_g3,
-    &memset_g4,
-    &memset_g5,
-    &bigcopy_970,
-    NULL };
-
-
-/* Allocate the commpages and add to one of the shared submaps created by vm.
- * Called once each for the 32 and 64-bit submaps.
- * 	1. allocate pages in the kernel map (RW)
- *	2. wire them down
- *	3. make a memory entry out of them
- *	4. map that entry into the shared comm region map (R-only)
- */
-static	void*
-commpage_allocate( 
-	vm_map_t			submap )					// commpage32_map or commpage64_map
-{
-    vm_offset_t			kernel_addr = 0;		// address of commpage in kernel map
-    vm_offset_t			zero = 0;
-    vm_size_t			size = _COMM_PAGE_AREA_USED;	// size actually populated
-    vm_map_entry_t		entry;
-    ipc_port_t			handle;
-    
-    if (submap == NULL)
-        panic("commpage submap is null");
-    
-    if (vm_map(kernel_map,&kernel_addr,_COMM_PAGE_AREA_USED,0,VM_FLAGS_ANYWHERE,NULL,0,FALSE,VM_PROT_ALL,VM_PROT_ALL,VM_INHERIT_NONE))
-        panic("cannot allocate commpage");
-        
-    if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+_COMM_PAGE_AREA_USED,VM_PROT_DEFAULT,FALSE))
-        panic("cannot wire commpage");
-
-    /* 
-     * Now that the object is created and wired into the kernel map, mark it so that no delay
-     * copy-on-write will ever be performed on it as a result of mapping it into user-space.
-     * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and
-     * that would be a real disaster.
-     *
-     * JMM - What we really need is a way to create it like this in the first place.
-     */
-    if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map)
-        panic("cannot find commpage entry");
-    entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
-	
-    if (mach_make_memory_entry(	kernel_map,			// target map
-                                &size,				// size
-                                kernel_addr,		// offset (address in kernel map)
-                                VM_PROT_ALL,		// map it RWX
-                                &handle,			// this is the object handle we get
-                                NULL ))				// parent_entry
-        panic("cannot make entry for commpage");
-    
-    if (vm_map_64(	submap,							// target map (shared submap)
-                    &zero,							// address (map into 1st page in submap)
-                    _COMM_PAGE_AREA_USED,			// size
-                    0,								// mask
-                    VM_FLAGS_FIXED,					// flags (it must be 1st page in submap)
-                    handle,							// port is the memory entry we just made
-                    0,								// offset (map 1st page in memory entry)
-                    FALSE,							// copy
-                    VM_PROT_READ|VM_PROT_EXECUTE,				// cur_protection (R-only in user map)
-                    VM_PROT_READ|VM_PROT_EXECUTE,				// max_protection
-                    VM_INHERIT_SHARE ))				// inheritance
-        panic("cannot map commpage");
-        
-    ipc_port_release(handle);
-        
-    return (void*) kernel_addr;						// return address in kernel map
-}
-
-
-/* Get address (in kernel map) of a commpage field. */
-
-static	void*
-commpage_addr_of(
-    int 	addr_at_runtime	)
-{
-    return	(void*) (commPagePtr + addr_at_runtime - _COMM_PAGE_BASE_ADDRESS);
-}
-
-
-/* Determine number of CPUs on this system.  We cannot rely on
- * machine_info.max_cpus this early in the boot.
- */
-static int
-commpage_cpus( void )
-{
-    int		cpus;
-    
-    cpus = ml_get_max_cpus();			// NB: this call can block
-    
-    if (cpus == 0)
-        panic("commpage cpus==0");
-    if (cpus > 0xFF)
-        cpus = 0xFF;
-    
-    return	cpus;
-}
-
-
-/* Initialize kernel version of _cpu_capabilities vector (used by KEXTs.) */
-
-static void
-commpage_init_cpu_capabilities( void )
-{
-    procFeatures	*pfp;
-    int	cpus;
-    int	available;
-
-    pfp = &(PerProcTable[0].ppe_vaddr->pf);			// point to features in per-proc
-    available = pfp->Available;
-
-    // If AltiVec is disabled make sure it is not reported as available.
-    if ((available & pfAltivec) == 0) {
-        _cpu_capabilities &= ~kHasAltivec;
-    }
-
-    if (_cpu_capabilities & kDcbaAvailable) { 		// if this processor has DCBA, time it...
-        _cpu_capabilities |= commpage_time_dcba();	// ...and set kDcbaRecomended if it helps.
-    }
-
-    cpus = commpage_cpus();                         // how many CPUs do we have
-    if (cpus == 1) _cpu_capabilities |= kUP;
-    _cpu_capabilities |= (cpus << kNumCPUsShift);
-
-    if (_cpu_capabilities & k64Bit)                 // 64-bit processors use SPRG3 for TLS
-        _cpu_capabilities |= kFastThreadLocalStorage;
-}
-
-
-/* Copy data into commpage. */
-
-static void
-commpage_stuff(
-    int         address,
-    const void 	*source,
-    int         length	)
-{    
-    char	*dest = commpage_addr_of(address);
-    
-    if (dest < next)
-        panic("commpage overlap: %p - %p", dest, next);
-    
-    bcopy((const char*)source,dest,length);
-    
-    next = (dest + length);
-}
-
-
-/* Modify commpage code in-place for this specific platform. */
-
-static void
-commpage_change(
-    uint32_t 	*ptr,
-    int 		bytes,
-    uint32_t 	search_mask, 
-    uint32_t 	search_pattern,
-    uint32_t 	new_mask,
-    uint32_t 	new_pattern,
-    int			(*check)(uint32_t instruction)	)
-{
-    int			words = bytes >> 2;
-    uint32_t	word;
-
-    while( (--words) >= 0 ) {
-        word = *ptr;
-        if ((word & search_mask)==search_pattern) {
-            if ((check==NULL) || (check(word))) {	// check instruction if necessary
-                word &= ~new_mask;
-                word |= new_pattern;
-                *ptr = word;
-            }
-        }
-        ptr++;
-    }
-}
-
-
-/* Check to see if exactly one bit is set in a MTCRF instruction's FXM field.
- */
-static int
-commpage_onebit(
-    uint32_t	mtcrf )
-{
-    int x = (mtcrf >> 12) & 0xFF;		// isolate the FXM field of the MTCRF
-    
-    if (x==0)
-        panic("commpage bad mtcrf");
-        
-    return	(x & (x-1))==0 ? 1 : 0;		// return 1 iff exactly 1 bit set in FXM field
-}
-
-
-/* Check to see if a RLWINM (whose ME is 31) is a SRWI.  Since to shift right n bits
- * you must "RLWINM ra,rs,32-n,n,31", if (SH+MB)==32 then we have a SRWI.
- */
-static int
-commpage_srwi(
-	uint32_t	rlwinm )
-{
-	int			sh = (rlwinm >> 11) & 0x1F;		// extract SH field of RLWINM, ie bits 16-20
-	int			mb = (rlwinm >> 6 ) & 0x1F;		// extract MB field of RLWINM, ie bits 21-25
-	
-	return  (sh + mb) == 32;					// it is a SRWI if (SH+MB)==32
-}
-
-
-/* Handle kCommPageDCBA bit: the commpage routine uses DCBA.  If the machine we're
- * running on doesn't benefit from use of that instruction, map them to NOPs
- * in the commpage.
- */
-static void
-commpage_handle_dcbas(
-    int 	address,
-    int 	length	)
-{
-    uint32_t	*ptr, search_mask, search, replace_mask, replace;
-    
-    if ( (_cpu_capabilities & kDcbaRecommended) == 0 ) {
-        ptr = commpage_addr_of(address);
-        
-        search_mask =	0xFC0007FE;		// search x-form opcode bits
-        search =		0x7C0005EC;		// for a DCBA
-        replace_mask = 	0xFFFFFFFF;		// replace all bits...
-        replace =		0x60000000;		// ...with a NOP
-    
-        commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-    }
-}
-
-
-/* Handle kCommPageSYNC bit: this routine uses SYNC, LWSYNC, or EIEIO.  If we're
- * running on a UP machine, map them to NOPs.
- */
-static void
-commpage_handle_syncs(
-    int 	address, 
-    int 	length	)
-{
-    uint32_t	*ptr, search_mask, search, replace_mask, replace;
-    
-    if (_NumCPUs() == 1) {
-        ptr = commpage_addr_of(address);
-        
-        search_mask =	0xFC0005FE;		// search x-form opcode bits (but ignore bit 0x00000200)
-        search =		0x7C0004AC;		// for a SYNC, LWSYNC, or EIEIO
-        replace_mask = 	0xFFFFFFFF;		// replace all bits...
-        replace =		0x60000000;		// ...with a NOP
-    
-        commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-    }
-}
-
-
-/* Handle kCommPageISYNC bit: this routine uses ISYNCs.  If we're running on a UP machine,
- * map them to NOPs.
- */
-static void
-commpage_handle_isyncs(
-    int 	address, 
-    int 	length	)
-{
-    uint32_t	*ptr, search_mask, search, replace_mask, replace;
-    
-    if (_NumCPUs() == 1) {
-        ptr = commpage_addr_of(address);
-        
-        search_mask =	0xFC0007FE;		// search xl-form opcode bits
-        search =		0x4C00012C;		// for an ISYNC
-        replace_mask = 	0xFFFFFFFF;		// replace all bits...
-        replace =		0x60000000;		// ...with a NOP
-    
-        commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-    }
-}
-
-
-/* Handle kCommPageMTCRF bit.  When this was written (3/03), the assembler did not
- * recognize the special form of MTCRF instructions, in which exactly one bit is set
- * in the 8-bit mask field.  Bit 11 of the instruction should be set in this case,
- * since the 970 and probably other 64-bit processors optimize it.  Once the assembler
- * has been updated this code can be removed, though it need not be.
- */
-static void
-commpage_handle_mtcrfs(
-    int 	address, 
-    int 	length	)
-{
-    uint32_t	*ptr, search_mask, search, replace_mask, replace;
-    
-    if (_cpu_capabilities & k64Bit) {
-        ptr = commpage_addr_of(address);
-        
-        search_mask =	0xFC0007FE;		// search x-form opcode bits
-        search =		0x7C000120;		// for a MTCRF
-        replace_mask = 	0x00100000;		// replace bit 11...
-        replace =		0x00100000;		// ...with a 1-bit
-    
-        commpage_change(ptr,length,search_mask,search,replace_mask,replace,commpage_onebit);
-    }
-}
-
-
-/* Port 32-bit code to 64-bit for use in the 64-bit commpage.  This sounds fancier than
- * it is.  We do the following:
- *		- map "cmpw*" into "cmpd*"
- *		- map "srwi" into "srdi"
- * Perhaps surprisingly, this is enough to permit lots of code to run in 64-bit mode, as
- * long as it is written with this in mind.
- */
-static void
-commpage_port_32_to_64(
-    int 	address, 
-    int 	length	)
-{
-    uint32_t	*ptr, search_mask, search, replace_mask, replace;
-
-	ptr = commpage_addr_of(address);
-	
-	search_mask =	0xFC2007FE;		// search x-form opcode bits (and L bit)
-	search =		0x7C000000;		// for a CMPW
-	replace_mask = 	0x00200000;		// replace bit 10 (L)...
-	replace =		0x00200000;		// ...with a 1-bit, converting word to doubleword compares
-	commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-
-	search_mask =	0xFC2007FE;		// search x-form opcode bits (and L bit)
-	search =		0x7C000040;		// for a CMPLW
-	replace_mask = 	0x00200000;		// replace bit 10 (L)...
-	replace =		0x00200000;		// ...with a 1-bit, converting word to doubleword compares
-	commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-
-	search_mask =	0xFC200000;		// search d-form opcode bits (and L bit)
-	search =		0x28000000;		// for a CMPLWI
-	replace_mask = 	0x00200000;		// replace bit 10 (L)...
-	replace =		0x00200000;		// ...with a 1-bit, converting word to doubleword compares
-	commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-
-	search_mask =	0xFC200000;		// search d-form opcode bits (and L bit)
-	search =		0x2C000000;		// for a CMPWI
-	replace_mask = 	0x00200000;		// replace bit 10 (L)...
-	replace =		0x00200000;		// ...with a 1-bit, converting word to doubleword compares
-	commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL);
-	
-	search_mask =	0xFC00003E;		// search d-form opcode bits and ME (mask end) field
-	search =		0x5400003E;		// for an RLWINM with ME=31 (which might be a "srwi")
-	replace_mask = 	0xFC00003E;		// then replace RLWINM's opcode and ME field to make a RLDICL
-	replace =		0x78000002;		// opcode is 30, ME is 0, except we add 32 to SH amount
-	commpage_change(ptr,length,search_mask,search,replace_mask,replace,commpage_srwi);
-} 
-
-
-/* Copy a routine into comm page if it matches running machine.
- */
-static void
-commpage_stuff_routine(
-    commpage_descriptor	*rd,
-	int					mode )				// kCommPage32 or kCommPage64
-{
-    char	*routine_code;
-    int		must,cant;
-	
-	if ( (rd->special & mode) == 0 )		// is this routine useable in this mode?
-		return;
-    
-    if (rd->commpage_address != cur_routine) {
-        if ((cur_routine!=0) && (matched==0))
-            panic("commpage no match for last, next address %08x", rd->commpage_address);
-        cur_routine = rd->commpage_address;
-        matched = 0;
-    }
-    
-    must = _cpu_capabilities & rd->musthave;
-    cant = _cpu_capabilities & rd->canthave;
-    
-    if ((must == rd->musthave) && (cant == 0)) {
-        if (matched)
-            panic("commpage multiple matches for address %08x", rd->commpage_address);
-        matched = 1;
-        routine_code = ((char*)rd) + rd->code_offset;
-        
-        commpage_stuff(rd->commpage_address,routine_code,rd->code_length);
-        
-        if (rd->special & kCommPageDCBA)
-            commpage_handle_dcbas(rd->commpage_address,rd->code_length);
-            
-        if (rd->special & kCommPageSYNC)
-            commpage_handle_syncs(rd->commpage_address,rd->code_length);
-            
-        if (rd->special & kCommPageISYNC)
-            commpage_handle_isyncs(rd->commpage_address,rd->code_length);
-            
-        if (rd->special & kCommPageMTCRF)
-            commpage_handle_mtcrfs(rd->commpage_address,rd->code_length);
-			
-		if ((mode == kCommPage64) && (rd->special & kPort32to64))
-			commpage_port_32_to_64(rd->commpage_address,rd->code_length);
-    }
-}
-
-
-/* Fill in the 32- or 64-bit commpage.  Called once for each.  */
-
-static void
-commpage_populate_one( 
-	vm_map_t	submap,			// the map to populate
-	char	**  kernAddressPtr,	// address within kernel of this commpage
-	int			mode,           // either kCommPage32 or kCommPage64
-    const char* signature )     // "commpage 32-bit" or "commpage 64-bit"
-{
-    char	c1;
-    short	c2;
-    addr64_t c8;
-    static double	two52 = 1048576.0 * 1048576.0 * 4096.0;	// 2**52
-    static double	ten6 = 1000000.0;						// 10**6
-    static uint64_t magicFE = 0xFEFEFEFEFEFEFEFFLL;         // used to find 0s in strings
-    static uint64_t magic80 = 0x8080808080808080LL;         // also used to find 0s
-    commpage_descriptor	**rd;
-    short	version = _COMM_PAGE_THIS_VERSION;
-    
-    next = NULL;								// initialize next available byte in the commpage
-	cur_routine = 0;							// initialize comm page address of "current" routine
-	
-    commPagePtr = (char*) commpage_allocate( submap );
-    *kernAddressPtr = commPagePtr;				// save address either in commPagePtr32 or 64
-
-    /* Stuff in the constants.  We move things into the comm page in strictly
-     * ascending order, so we can check for overlap and panic if so.
-     */
-    
-    commpage_stuff(_COMM_PAGE_SIGNATURE,signature,strlen(signature));
-    
-    commpage_stuff(_COMM_PAGE_VERSION,&version,2);
-
-    commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int));
-    
-    c1 = (_cpu_capabilities & kHasAltivec) ? -1 : 0;
-    commpage_stuff(_COMM_PAGE_ALTIVEC,&c1,1);
-    
-    c1 = (_cpu_capabilities & k64Bit) ? -1 : 0;
-    commpage_stuff(_COMM_PAGE_64_BIT,&c1,1);
-    
-    if (_cpu_capabilities & kCache32)
-        c2 = 32;
-    else if (_cpu_capabilities & kCache64)
-        c2 = 64;
-    else if (_cpu_capabilities & kCache128)
-        c2 = 128;
-    commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2);
-    
-    commpage_stuff(_COMM_PAGE_2_TO_52,&two52,8);    
-    commpage_stuff(_COMM_PAGE_10_TO_6,&ten6,8);
-    commpage_stuff(_COMM_PAGE_MAGIC_FE,&magicFE,8);
-    commpage_stuff(_COMM_PAGE_MAGIC_80,&magic80,8);
-    
-    c8 = 0;													// 0 timestamp means "disabled"
-    commpage_stuff(_COMM_PAGE_TIMEBASE,&c8,8);
-    commpage_stuff(_COMM_PAGE_TIMESTAMP,&c8,8);
-    commpage_stuff(_COMM_PAGE_SEC_PER_TICK,&c8,8);
-
-    /* Now the routines.  We try each potential routine in turn,
-     * and copy in any that "match" the platform we are running on.
-     * We require that exactly one routine match for each slot in the
-     * comm page, and panic if not.
-     */
-        
-    for( rd = routines; *rd != NULL ; rd++ ) 
-        commpage_stuff_routine(*rd,mode);
-        
-    if (!matched)
-        panic("commpage no match on last routine");
-    
-    if (next > (commPagePtr + _COMM_PAGE_AREA_USED))
-        panic("commpage overflow");
-	
-	
-	// make all that new code executable
-	
-    sync_cache_virtual((vm_offset_t) commPagePtr,_COMM_PAGE_AREA_USED);
-}
-
-
-/* Fill in commpage: called once, during kernel initialization, from the
- * startup thread before user-mode code is running.
- *
- * See the top of this file for a list of what you have to do to add
- * a new routine to the commpage.
- */  
-
-void
-commpage_populate( void )
-{
-    commpage_init_cpu_capabilities();
-	commpage_populate_one( commpage32_map, &commPagePtr32, kCommPage32, "commpage 32-bit");
-	if (_cpu_capabilities & k64Bit) {
-		commpage_populate_one( commpage64_map, &commPagePtr64, kCommPage64, "commpage 64-bit");
-		pmap_init_sharedpage((vm_offset_t)commPagePtr64);			// Do the 64-bit version        
-	}
-        
-}
diff --git a/osfmk/ppc/commpage/commpage.h b/osfmk/ppc/commpage/commpage.h
deleted file mode 100644
index 64a139faf..000000000
--- a/osfmk/ppc/commpage/commpage.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef _PPC_COMMPAGE_H
-#define _PPC_COMMPAGE_H
-
-#ifndef	__ASSEMBLER__
-#include <stdint.h>
-#endif /* __ASSEMBLER__ */
-
-
-/* Special check bits for the compage_descriptor "special" field. */
- 
-#define	kCommPageDCBA		0x0001			// this routine uses DCBA, map to NOP if not appropriate
-#define	kCommPageSYNC		0x0002			// this routine uses SYNC, LWSYNC, or EIEIO, map to NOP if UP
-#define kCommPageISYNC		0x0004			// this routine uses ISYNC, map to NOP if UP
-#define	kCommPageMTCRF		0x0008			// set bit 11 in MTCRF if only 1 cr specified
-
-#define kPort32to64			0x1000			// written for 32-bit, must port to 64-bit
-#define kCommPage64			0x2000			// this routine is useable in 64-bit mode
-#define kCommPage32			0x4000			// this routine is useable in 32-bit mode
-#define kCommPageBoth		(kCommPage32+kCommPage64)
-
-
-#ifdef	__ASSEMBLER__
-
-#define	COMMPAGE_DESCRIPTOR(label,address,must,cant,special)	\
-    .globl  EXT(label)  @\
-LEXT(label)	@\
-    .short	label-.	@\
-    .short	.-label-2	@\
-    .short	address	@\
-    .short	special	@\
-    .long	must    @\
-    .long	cant
-    
-
-#else /* __ASSEMBLER__ */
-
-/* Each potential commpage routine is described by one of these.
- * Note that the COMMPAGE_DESCRIPTOR macro (above), used in
- * assembly language, must agree with this.
- */
- 
-typedef	struct	commpage_descriptor	{
-    short	code_offset;					// offset to code from this descriptor
-    short	code_length;					// length in bytes
-    short	commpage_address;				// put at this address (_COMM_PAGE_BCOPY etc)
-    short	special;						// special handling bits for DCBA and SYNC etc
-    long	musthave;						// _cpu_capability bits we must have
-    long	canthave;						// _cpu_capability bits we can't have
-} commpage_descriptor;
-
-
-extern	char	*commPagePtr32;				// virt address of 32-bit commpage in kernel map
-extern	char	*commPagePtr64;				// virt address of 64-bit commpage in kernel map
-
-extern	void	commpage_set_timestamp(uint64_t tbr, uint64_t secs, uint32_t ticks_per_sec);
-
-#define	commpage_disable_timestamp() commpage_set_timestamp( 0, 0, 0 )
-#define commpage_set_memory_pressure( pressure )
-
-extern	int	commpage_time_dcba( void );
-
-#endif	/* __ASSEMBLER__ */
-
-#endif /* _PPC_COMMPAGE_H */
diff --git a/osfmk/ppc/commpage/commpage_asm.s b/osfmk/ppc/commpage/commpage_asm.s
deleted file mode 100644
index d3ea83c24..000000000
--- a/osfmk/ppc/commpage/commpage_asm.s
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-
-// commpage_time_dcba() uses a stack frame as follows:
-
-#define	kBufSiz		1024				// Size of the buffer we use to do DCBA timing on G4
-#define	kSFSize		(kBufSiz+128+16)	// Stack frame size, which contains the 128-byte-aligned buffer
-#define	kLoopCnt	5					// Iterations of the timing loop
-#define	kDCBA		22					// Bit in cr5 used as a flag in timing loop
-
-
-// commpage_set_timestamp() uses the red zone for temporary storage:
-
-#define	rzSaveF1			-8		// caller's FPR1
-#define	rzSaveF2			-16		// caller's FPR2
-#define	rzSaveF3			-24		// caller's FPR3
-#define	rzSaveF4			-32		// caller's FPR4
-#define	rzSaveF5			-40		// caller's FPR5
-#define	rzNewTimeBase		-48		// used to load 64-bit TBR into a FPR
-
-
-// commpage_set_timestamp() uses the following data.  kkTicksPerSec remembers
-// the number used to compute _COMM_PAGE_SEC_PER_TICK.  Since this constant
-// rarely changes, we use it to avoid needless recomputation.  It is a double
-// value, pre-initialize with an exponent of 2**52.
-
-#define	kkBinary0		0					// offset in data to long long 0 (a constant)
-#define	kkDouble1		8					// offset in data to double 1.0 (a constant)
-#define	kkTicksPerSec	16					// offset in data to double(ticks_per_sec)
-
-        .data
-        .align	3							// three doubleword fields
-Ldata:
-        .long	0							// kkBinary0
-        .long	0
-        .double	1.0e0						// kkDouble1        
-        .long	0x43300000					// kkTicksPerSec (plus 2**52)
-        .long	0							// this is where we store ticks_per_sec, to float
-
-        .text
-        .align	2
-        .globl	EXT(commpage_time_dcba)
-        .globl	EXT(commpage_set_timestamp)
-
-
-/*	***********************************************
- *	* C O M M P A G E _ S E T _ T I M E S T A M P *
- *	***********************************************
- *
- *	Update the gettimeofday() shared data on the commpages, as follows:
- *		_COMM_PAGE_TIMESTAMP = the clock offset at timebase (seconds)
- *		_COMM_PAGE_TIMEBASE = the timebase at which the timestamp was valid
- *		_COMM_PAGE_SEC_PER_TICK = multiply timebase ticks by this to get seconds (double)
- *	The convention is that if the timebase is 0, the data is invalid.  Because other
- *	CPUs are reading the three values asynchronously and must get a consistent set, 
- *	it is critical that we update them with the following protocol:
- *		1. set timebase to 0 (atomically), to invalidate all three values
- *		2. eieio (to create a barrier in stores to cacheable memory)
- *		3. change timestamp and "secs per tick"
- *		4. eieio
- *		5. set timebase nonzero (atomically)
- *	This works because readers read the timebase, then the timestamp and divisor, sync
- *	if MP, then read the timebase a second time and check to be sure it is equal to the first.
- *
- *	We could save a few cycles on 64-bit machines by special casing them, but it probably
- *	isn't necessary because this routine shouldn't be called very often.
- *
- *	When called:
- *		r3 = upper half of timebase (timebase is disabled if 0)
- *		r4 = lower half of timebase
- *		r5 = upper half of timestamp
- *		r6 = lower half of timestamp
- *		r7 = divisor (ie, timebase ticks per sec)
- *	We set up:
- *		r8 = ptr to our static data (kkBinary0, kkDouble1, kkTicksPerSec)
- *		r9 = ptr to 32-bit commpage in kernel map
- *     r10 = ptr to 64-bit commpage in kernel map
- *
- *	--> Interrupts must be disabled and rtclock locked when called.  <--
- */
- 
-        .align	5
-LEXT(commpage_set_timestamp)				// void commpage_set_timestamp(tbr,secs,divisor)
-        mfmsr	r11							// get MSR
-        ori		r2,r11,MASK(MSR_FP)			// turn FP on
-        mtmsr	r2
-        isync								// wait until MSR changes take effect
-        
-        or.		r0,r3,r4					// is timebase 0? (thus disabled)
-        lis		r8,hi16(Ldata)				// point to our data
-        lis		r9,ha16(EXT(commPagePtr32))	// get ptrs to address of commpages in kernel map
-		lis		r10,ha16(EXT(commPagePtr64))
-        stfd	f1,rzSaveF1(r1)				// save a FPR in the red zone
-        ori		r8,r8,lo16(Ldata)
-        lwz		r9,lo16(EXT(commPagePtr32))(r9)	// r9 <- 32-bit commpage ptr
-		lwz		r10,lo16(EXT(commPagePtr64))(r10) // r10 <- 64-bit commpage ptr
-        lfd		f1,kkBinary0(r8)			// get fixed 0s
-        li		r0,_COMM_PAGE_BASE_ADDRESS	// get va in user space of commpage
-        cmpwi	cr1,r9,0					// is 32-bit commpage allocated yet?
-		cmpwi   cr6,r10,0					// is 64-bit commpage allocated yet?
-        sub		r9,r9,r0					// r9 <- 32-bit commpage address, biased by user va
-		sub		r10,r10,r0					// r10<- 64-bit commpage address
-        beq--	cr1,3f						// skip if 32-bit commpage not allocated (64-bit won't be either)
-		bne++   cr6,1f						// skip if 64-bit commpage is allocated
-		mr		r10,r9						// if no 64-bit commpage, point to 32-bit version with r10 too
-1:
-        stfd	f1,_COMM_PAGE_TIMEBASE(r9)	// turn off the 32-bit-commpage timestamp (atomically)
-		stfd	f1,_COMM_PAGE_TIMEBASE(r10) // and the 64-bit one too
-        eieio								// make sure all CPUs see it is off
-        beq		3f							// all we had to do is turn off timestamp
-        
-        lwz		r0,kkTicksPerSec+4(r8)		// get last ticks_per_sec (or 0 if first)
-        stw		r3,rzNewTimeBase(r1)		// store new timebase so we can lfd
-        stw		r4,rzNewTimeBase+4(r1)
-        cmpw	r0,r7						// do we need to recompute _COMM_PAGE_SEC_PER_TICK?
-        stw		r5,_COMM_PAGE_TIMESTAMP(r9)	// store the new timestamp in the 32-bit page
-        stw		r6,_COMM_PAGE_TIMESTAMP+4(r9)
-        stw		r5,_COMM_PAGE_TIMESTAMP(r10)// and the 64-bit commpage
-        stw		r6,_COMM_PAGE_TIMESTAMP+4(r10)
-        lfd		f1,rzNewTimeBase(r1)		// get timebase in a FPR so we can store atomically
-        beq++	2f							// same ticks_per_sec, no need to recompute
-        
-        stw		r7,kkTicksPerSec+4(r8)		// must recompute SEC_PER_TICK
-        stfd	f2,rzSaveF2(r1)				// we'll need a few more temp FPRs
-        stfd	f3,rzSaveF3(r1)
-        stfd	f4,rzSaveF4(r1)
-        stfd	f5,rzSaveF5(r1)
-        lfd		f2,_COMM_PAGE_2_TO_52(r9)	// f2 <- double(2**52)
-        lfd		f3,kkTicksPerSec(r8)		// float new ticks_per_sec + 2**52
-        lfd		f4,kkDouble1(r8)			// f4 <- double(1.0)
-        mffs	f5							// save caller's FPSCR
-        mtfsfi	7,1							// clear Inexeact Exception bit, set round-to-zero
-        fsub	f3,f3,f2					// get ticks_per_sec
-        fdiv	f3,f4,f3					// divide 1 by ticks_per_sec to get SEC_PER_TICK
-        stfd	f3,_COMM_PAGE_SEC_PER_TICK(r9)
-        stfd	f3,_COMM_PAGE_SEC_PER_TICK(r10)
-        mtfsf	0xFF,f5						// restore FPSCR
-        lfd		f2,rzSaveF2(r1)				// restore FPRs
-        lfd		f3,rzSaveF3(r1)
-        lfd		f4,rzSaveF4(r1)
-        lfd		f5,rzSaveF5(r1)
-2:											// f1 == new timestamp
-        eieio								// wait until the stores take
-        stfd	f1,_COMM_PAGE_TIMEBASE(r9)	// then turn the timestamp back on (atomically)
-        stfd	f1,_COMM_PAGE_TIMEBASE(r10)	// both
-3:											// here once all fields updated
-        lfd		f1,rzSaveF1(r1)				// restore last FPR
-        mtmsr	r11							// turn FP back off
-        isync
-        blr
-
-
-/*	***************************************
- *	* C O M M P A G E _ T I M E _ D C B A *
- *	***************************************
- *
- *	Not all processors that support the DCBA opcode actually benefit from it.
- *	Some store-gather and read-cancel well enough that there is no need to use
- *	DCBA to avoid fetching cache lines that will be completely overwritten, while
- *	others have this feature disabled (to work around errata etc), and so benefit
- *	from DCBA.  Since it is hard to tell the one group from the other, we just
- *	time loops with and without DCBA, and pick the fastest.  Thus we avoid
- *	delicate dependence on processor and/or platform revisions.
- *
- *	We return either kDcbaRecommended or zero.
- *
- *		int commpage_time_dcba( void );
- */
- 
-LEXT(commpage_time_dcba)
-        mflr	r12					// get return
-        stw		r12,8(r1)			// save
-        stwu	r1,-kSFSize(r1)		// carve our temp buffer from the stack
-        addi	r11,r1,127+16		// get base address...
-        rlwinm	r11,r11,0,0,24		// ...of our buffer, 128-byte aligned
-        crset	kDCBA				// first, use DCBA
-        bl		LTest				// time it with DCBA
-        srwi	r0,r3,3				// bias 12 pct in favor of not using DCBA...
-        add		r10,r3,r0			// ...because DCBA is always slower with warm cache
-        crclr	kDCBA
-        bl		LTest				// time without DCBA
-        cmplw	r10,r3				// which is better?
-        mtlr	r12					// restore return
-        lwz		r1,0(r1)			// pop off our stack frame
-        li		r3,kDcbaRecommended		// assume using DCBA is faster
-        bltlr
-        li		r3,0			// no DCBA is faster
-        blr
-                
-        
-// Subroutine to time a loop with or without DCBA.
-//		kDCBA = set if we should use DCBA
-//		r11 = base of buffer to use for test (kBufSiz bytes)
-//
-//		We return TBR ticks in r3.
-//		We use r0,r3-r9.
-
-LTest:
-        li		r4,kLoopCnt			// number of times to loop
-        li		r3,-1				// initialize fastest time
-1:
-        mr		r6,r11				// initialize buffer ptr
-        li		r0,kBufSiz/32		// r0 <- cache blocks to test
-        mtctr	r0
-2:
-        dcbf	0,r6				// first, force the blocks out of the cache
-        addi	r6,r6,32
-        bdnz	2b
-        sync						// make sure all the flushes take
-        mr		r6,r11				// re-initialize buffer ptr
-        mtctr	r0					// reset cache-block count
-        mftbu	r7					// remember upper half so we can check for carry
-        mftb	r8					// start the timer
-3:									// loop over cache blocks
-        bf		kDCBA,4f			// should we DCBA?
-        dcba	0,r6
-4:
-        stw		r0,0(r6)			// store the entire cache block
-        stw		r0,4(r6)
-        stw		r0,8(r6)
-        stw		r0,12(r6)
-        stw		r0,16(r6)
-        stw		r0,20(r6)
-        stw		r0,24(r6)
-        stw		r0,28(r6)
-        addi	r6,r6,32
-        bdnz	3b
-        mftb	r9
-        mftbu	r0
-        cmpw	r0,r7				// did timebase carry?
-        bne		1b					// yes, retest rather than fuss
-        sub		r9,r9,r8			// r9 <- time for this loop
-        cmplw	r9,r3				// faster than current best?
-        bge		5f					// no
-        mr		r3,r9				// remember fastest time through loop
-5:
-        subi	r4,r4,1				// decrement outer loop count
-        cmpwi	r4,0				// more to go?
-        bne		1b					// loop if so
-        blr							// return fastest time in r3
diff --git a/osfmk/ppc/commpage/gettimeofday.s b/osfmk/ppc/commpage/gettimeofday.s
deleted file mode 100644
index e9645ee37..000000000
--- a/osfmk/ppc/commpage/gettimeofday.s
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/* The red zone is used to move data between GPRs and FPRs: */
-
-#define	rzTicks			-8			// elapsed ticks since timestamp (double)
-#define	rzSeconds		-16			// seconds since timestamp (double)
-#define	rzUSeconds		-24			// useconds since timestamp (double)
-
-
-        .text
-        .align	2
-
-
-// *********************************
-// * G E T T I M E O F D A Y _ 3 2 *
-// *********************************
-//
-// This is a subroutine of gettimeofday.c that gets the seconds and microseconds
-// in user mode, usually without having to make a system call.  We do not deal with
-// the timezone.  The kernel maintains the following values in the comm page:
-//
-//	_COMM_PAGE_TIMESTAMP = 64 bit seconds timestamp
-//
-//	_COMM_PAGE_TIMEBASE = the timebase at which the timestamp was valid
-//
-//	_COMM_PAGE_SEC_PER_TICK = multiply timebase ticks by this to get seconds (double)
-//
-//	_COMM_PAGE_2_TO_52 = double precision constant 2**52
-//
-//	_COMM_PAGE_10_TO_6 = double precision constant 10**6
-//
-// We have to be careful to read these values atomically.  The kernel updates them 
-// asynchronously to account for drift or time changes (eg, ntp.)  We adopt the
-// convention that (timebase==0) means the timestamp is invalid, in which case we
-// return a bad status so our caller can make the system call.
-//
-//		r3 = ptr to user's timeval structure (should not be null)
-
-gettimeofday_32:								// int gettimeofday(timeval *tp);
-0:
-        lwz		r5,_COMM_PAGE_TIMEBASE+0(0)		// r5,r6 = TBR at timestamp
-        lwz		r6,_COMM_PAGE_TIMEBASE+4(0)
-        lwz		r8,_COMM_PAGE_TIMESTAMP+4(0)	// r8 = timestamp 32 bit seconds
-        lfd		f1,_COMM_PAGE_SEC_PER_TICK(0)
-1:        
-        mftbu	r10								// r10,r11 = current timebase
-        mftb	r11
-        mftbu	r12
-        cmplw	r10,r12
-        bne-	1b
-        or.		r0,r5,r6						// timebase 0? (ie, is timestamp invalid?)
-        
-        sync									// create a barrier (patched to NOP if UP)
-        
-        lwz		r0,_COMM_PAGE_TIMEBASE+0(0)		// then load data a 2nd time
-        lwz		r12,_COMM_PAGE_TIMEBASE+4(0)
-        lwz		r9,_COMM_PAGE_TIMESTAMP+4(0)
-        cmplw	cr6,r5,r0			// did we read a consistent set?
-        cmplw	cr7,r6,r12
-        beq-	3f					// timestamp is disabled so return bad status
-        cmplw	cr5,r9,r8
-        crand	cr0_eq,cr6_eq,cr7_eq
-        crand	cr0_eq,cr0_eq,cr5_eq
-        bne-	0b					// loop until we have a consistent set of data
-        
-        subfc	r11,r6,r11			// compute ticks since timestamp
-        lwz		r9,_COMM_PAGE_2_TO_52(0)	// get exponent for (2**52)
-        subfe	r10,r5,r10			// complete 64-bit subtract
-        lfd		f2,_COMM_PAGE_2_TO_52(0)	// f2 <- (2**52)
-        srwi.	r0,r10,2			// if more than 2**34 ticks have elapsed...
-        stw		r11,rzTicks+4(r1)	// store elapsed ticks into red zone
-        or		r10,r10,r9			// convert long-long in (r10,r11) into double
-        bne-	3f					// ...call kernel to reprime timestamp
-
-        stw		r10,rzTicks(r1)		// complete double
-
-		mffs	f7
-		mtfsfi	7,1        
-        lfd		f3,rzTicks(r1)		// get elapsed ticks since timestamp + 2**52
-        fsub	f4,f3,f2			// subtract 2**52 and normalize
-        fmul	f5,f4,f1			// f5 <- elapsed seconds since timestamp
-        lfd		f3,_COMM_PAGE_10_TO_6(0)	// get 10**6
-        fctiwz	f6,f5				// convert to integer
-        stfd	f6,rzSeconds(r1)	// store integer seconds into red zone
-        stw		r9,rzSeconds(r1)	// prepare to reload as floating pt
-        lfd		f6,rzSeconds(r1)	// get seconds + 2**52
-        fsub	f6,f6,f2			// f6 <- integral seconds
-        fsub	f6,f5,f6			// f6 <- fractional part of elapsed seconds
-        fmul	f6,f6,f3			// f6 <- fractional elapsed useconds
-        fctiwz	f6,f6				// convert useconds to integer
-        stfd	f6,rzUSeconds(r1)	// store useconds into red zone
-		mtfsf	0xff,f7
-        
-        lwz		r5,rzSeconds+4(r1)	// r5 <- seconds since timestamp
-        lwz		r7,rzUSeconds+4(r1)	// r7 <- useconds since timestamp
-        add		r6,r8,r5			// add elapsed seconds to timestamp seconds
-        
-        stw		r6,0(r3)			// store secs//usecs into user's timeval
-        stw		r7,4(r3)
-        li		r3,0				// return success
-        blr
-3:									// too long since last timestamp or this code is disabled
-        li		r3,1				// return bad status so our caller will make syscall
-        blr
-        
-	COMMPAGE_DESCRIPTOR(gettimeofday_32,_COMM_PAGE_GETTIMEOFDAY,0,k64Bit,kCommPageSYNC+kCommPage32)
-        
-        
-// ***************************************
-// * G E T T I M E O F D A Y _ G 5 _ 3 2 *
-// ***************************************
-//
-// This routine is called in 32-bit mode on 64-bit processors.  A timeval is a struct of
-// a long seconds and int useconds, so its size depends on mode.
-
-gettimeofday_g5_32:							// int gettimeofday(timeval *tp);
-0:
-        ld		r6,_COMM_PAGE_TIMEBASE(0)	// r6 = TBR at timestamp
-        ld		r8,_COMM_PAGE_TIMESTAMP(0)	// r8 = timestamp (seconds)
-        lfd		f1,_COMM_PAGE_SEC_PER_TICK(0)
-        mftb	r10							// r10 = get current timebase
-        lwsync								// create a barrier if MP (patched to NOP if UP)
-        ld		r11,_COMM_PAGE_TIMEBASE(0)	// then get data a 2nd time
-        ld		r12,_COMM_PAGE_TIMESTAMP(0)
-        cmpdi	cr1,r6,0			// is the timestamp disabled?
-        cmpld	cr6,r6,r11			// did we read a consistent set?
-        cmpld	cr7,r8,r12
-        beq--	cr1,3f				// exit if timestamp disabled
-        crand	cr6_eq,cr7_eq,cr6_eq
-        sub		r11,r10,r6			// compute elapsed ticks from timestamp
-        bne--	cr6,0b				// loop until we have a consistent set of data
-                
-        srdi.	r0,r11,35			// has it been more than 2**35 ticks since last timestamp?
-        std		r11,rzTicks(r1)		// put ticks in redzone where we can "lfd" it
-        bne--	3f					// timestamp too old, so reprime
-
-		mffs	f7
-		mtfsfi	7,1
-        lfd		f3,rzTicks(r1)		// get elapsed ticks since timestamp (fixed pt)
-        fcfid	f4,f3				// float the tick count
-        fmul	f5,f4,f1			// f5 <- elapsed seconds since timestamp
-        lfd		f3,_COMM_PAGE_10_TO_6(0)	// get 10**6
-        fctidz	f6,f5				// convert integer seconds to fixed pt
-        stfd	f6,rzSeconds(r1)	// save fixed pt integer seconds in red zone
-        fcfid	f6,f6				// float the integer seconds
-        fsub	f6,f5,f6			// f6 <- fractional part of elapsed seconds
-        fmul	f6,f6,f3			// f6 <- fractional elapsed useconds
-        fctidz	f6,f6				// convert useconds to fixed pt integer
-        stfd	f6,rzUSeconds(r1)	// store useconds into red zone
-		mtfsf	0xff,f7
-        
-        lwz		r5,rzSeconds+4(r1)	// r5 <- seconds since timestamp
-        lwz		r7,rzUSeconds+4(r1)	// r7 <- useconds since timestamp
-        add		r6,r8,r5			// add elapsed seconds to timestamp seconds
-        
-        stw		r6,0(r3)			// store secs//usecs into user's timeval
-        stw		r7,4(r3)
-        li		r3,0				// return success
-        blr
-3:									// too long since last timestamp or this code is disabled
-        li		r3,1				// return bad status so our caller will make syscall
-        blr
-
-	COMMPAGE_DESCRIPTOR(gettimeofday_g5_32,_COMM_PAGE_GETTIMEOFDAY,k64Bit,0,kCommPageSYNC+kCommPage32)
-        
-        
-// ***************************************
-// * G E T T I M E O F D A Y _ G 5 _ 6 4 *
-// ***************************************
-//
-// This routine is called in 64-bit mode on 64-bit processors.  A timeval is a struct of
-// a long seconds and int useconds, so its size depends on mode.
-
-gettimeofday_g5_64:							// int gettimeofday(timeval *tp);
-0:
-        ld		r6,_COMM_PAGE_TIMEBASE(0)	// r6 = TBR at timestamp
-        ld		r8,_COMM_PAGE_TIMESTAMP(0)	// r8 = timestamp (seconds)
-        lfd		f1,_COMM_PAGE_SEC_PER_TICK(0)
-        mftb	r10							// r10 = get current timebase
-        lwsync								// create a barrier if MP (patched to NOP if UP)
-        ld		r11,_COMM_PAGE_TIMEBASE(0)	// then get data a 2nd time
-        ld		r12,_COMM_PAGE_TIMESTAMP(0)
-        cmpdi	cr1,r6,0			// is the timestamp disabled?
-        cmpld	cr6,r6,r11			// did we read a consistent set?
-        cmpld	cr7,r8,r12
-        beq--	cr1,3f				// exit if timestamp disabled
-        crand	cr6_eq,cr7_eq,cr6_eq
-        sub		r11,r10,r6			// compute elapsed ticks from timestamp
-        bne--	cr6,0b				// loop until we have a consistent set of data
-                
-        srdi.	r0,r11,35			// has it been more than 2**35 ticks since last timestamp?
-        std		r11,rzTicks(r1)		// put ticks in redzone where we can "lfd" it
-        bne--	3f					// timestamp too old, so reprime
-
-		mffs	f7
-		mtfsfi	7,1
-        lfd		f3,rzTicks(r1)		// get elapsed ticks since timestamp (fixed pt)
-        fcfid	f4,f3				// float the tick count
-        fmul	f5,f4,f1			// f5 <- elapsed seconds since timestamp
-        lfd		f3,_COMM_PAGE_10_TO_6(0)	// get 10**6
-        fctidz	f6,f5				// convert integer seconds to fixed pt
-        stfd	f6,rzSeconds(r1)	// save fixed pt integer seconds in red zone
-        fcfid	f6,f6				// float the integer seconds
-        fsub	f6,f5,f6			// f6 <- fractional part of elapsed seconds
-        fmul	f6,f6,f3			// f6 <- fractional elapsed useconds
-        fctidz	f6,f6				// convert useconds to fixed pt integer
-        stfd	f6,rzUSeconds(r1)	// store useconds into red zone
-		mtfsf	0xff,f7
-        
-        lwz		r5,rzSeconds+4(r1)	// r5 <- seconds since timestamp
-        lwz		r7,rzUSeconds+4(r1)	// r7 <- useconds since timestamp
-        add		r6,r8,r5			// add elapsed seconds to timestamp seconds
-        
-        std		r6,0(r3)			// store secs//usecs into user's timeval
-        stw		r7,8(r3)
-        li		r3,0				// return success
-        blr
-3:									// too long since last timestamp or this code is disabled
-        li		r3,1				// return bad status so our caller will make syscall
-        blr
-
-	COMMPAGE_DESCRIPTOR(gettimeofday_g5_64,_COMM_PAGE_GETTIMEOFDAY,k64Bit,0,kCommPageSYNC+kCommPage64)
-
-        
diff --git a/osfmk/ppc/commpage/mach_absolute_time.s b/osfmk/ppc/commpage/mach_absolute_time.s
deleted file mode 100644
index be9345dad..000000000
--- a/osfmk/ppc/commpage/mach_absolute_time.s
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-
-// *********************************************
-// * M A C H _ A B S O L U T E _ T I M E _ 3 2 *
-// *********************************************
-
-mach_absolute_time_32:
-1:        
-        mftbu	r3
-        mftb	r4
-        mftbu	r5
-        cmplw	r3,r5
-        beqlr+
-        b		1b
-        
-	COMMPAGE_DESCRIPTOR(mach_absolute_time_32,_COMM_PAGE_ABSOLUTE_TIME,0,k64Bit,kCommPage32)
-        
-        
-// *********************************************
-// * M A C H _ A B S O L U T E _ T I M E _ 6 4 *
-// *********************************************
-//
-// This is the version that is called in 32-bit mode, so we return the TBR in r3 and r4.
-
-mach_absolute_time_64:
-        mftb	r4
-        srdi	r3,r4,32
-        blr
-
-	COMMPAGE_DESCRIPTOR(mach_absolute_time_64,_COMM_PAGE_ABSOLUTE_TIME,k64Bit,0,kCommPage32)
-        
-        
-// *************************************************
-// * M A C H _ A B S O L U T E _ T I M E _ L P 6 4 *
-// *************************************************
-//
-// This is the version that is called in 64-bit mode, so we return the TBR in r3.
-
-mach_absolute_time_lp64:
-        mftb	r3
-        blr
-
-	COMMPAGE_DESCRIPTOR(mach_absolute_time_lp64,_COMM_PAGE_ABSOLUTE_TIME,k64Bit,0,kCommPage64)
-
-        
diff --git a/osfmk/ppc/commpage/memset_64.s b/osfmk/ppc/commpage/memset_64.s
deleted file mode 100644
index 187e742b6..000000000
--- a/osfmk/ppc/commpage/memset_64.s
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-/*
- * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
- * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
- * simple transformations:
- *      - all word compares are changed to doubleword
- *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
- * Nothing else is done.  For this to work, the following rules must be
- * carefully followed:
- *      - do not use carry or overflow
- *      - only use record mode if you are sure the results are mode-invariant
- *        for example, all "andi." and almost all "rlwinm." are fine
- *      - do not use "slwi", "slw", or "srw"
- * An imaginative programmer could break the porting model in other ways, but the above
- * are the most likely problem areas.  It is perhaps surprising how well in practice
- * this simple method works.
- */        
-
-        .text
-        .align	2
-
-
-/* *********************
- * * M E M S E T _ 6 4 *
- * *********************
- *
- * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
- * operands (zero operands are funneled into bzero.)  This version is for a
- * hypothetic processor that is 64-bit but not Altivec.
- * It is not optimized, since it would only be used during bringup.
- *
- * Registers at entry:
- *		r4 = count of bytes to store (must be >= 32)
- *      r8 = ptr to the 1st byte to store (16-byte aligned)
- *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
- * When we return:
- *		r3 = not changed, since memset returns it
- *      r4 = bytes remaining to store (will be <32)
- *      r7 = not changed
- *      r8 = ptr to next byte to store (still 16-byte aligned)
- *     r12 = not changed (holds return value for memset)
- */
-
-memset_64:
-        srwi    r0,r4,5                 // get number of 32-byte chunks (>0)
-        ld      r10,0(r9)               // load pattern
-        ld      r11,8(r9)
-        rlwinm  r4,r4,0,0x1F            // mask down count
-        mtctr   r0                      // set up loop count
-        
-        // Loop over 32-byte chunks.
-1:
-        std     r10,0(r8)
-        std     r11,8(r8)
-        std     r10,16(r8)
-        std     r11,24(r8)
-        addi    r8,r8,32
-        bdnz++  1b
-
-        blr
-
-
-	COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \
-				kCommPageBoth+kPort32to64)
diff --git a/osfmk/ppc/commpage/memset_g3.s b/osfmk/ppc/commpage/memset_g3.s
deleted file mode 100644
index 469627f85..000000000
--- a/osfmk/ppc/commpage/memset_g3.s
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-/* *********************
- * * M E M S E T _ G 3 *
- * *********************
- *
- * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
- * operands (zero operands are funneled into bzero.)  This version is for
- * 32-bit processors with a 32-byte cache line and no Altivec.
- *
- * Registers at entry:
- *		r4 = count of bytes to store (must be >= 32)
- *      r8 = ptr to the 1st byte to store (16-byte aligned)
- *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
- * When we return:
- *		r3 = not changed, since memset returns it
- *      r4 = bytes remaining to store (will be <32)
- *      r7 = not changed
- *      r8 = ptr to next byte to store (still 16-byte aligned)
- *     r12 = not changed (holds return value for memset)
- */
-
-        .align	4
-memset_g3:
-        andi.   r0,r8,16                // cache line aligned?
-        lfd     f0,0(r9)                // pick up the pattern in two FPRs
-        lfd     f1,8(r9)
-        beq     1f                      // skip if already aligned
-        
-        // cache line align
-        
-        stfd    f0,0(r8)                // no, store another 16 bytes to align
-        stfd    f1,8(r8)
-        subi    r4,r4,16                // skip past the 16 bytes we just stored
-        addi    r8,r8,16
-        
-        // Loop over cache lines.  This code uses a private protocol with the kernel:
-        // when the kernel emulates an alignment exception on a DCBZ that occurs in the
-        // commpage, it zeroes CR7.  We use this to detect the case where we are operating on
-        // uncached memory, and do not use DCBZ again in this code. We assume that either
-        // all the operand is cacheable or none of it is, so we only check the first DCBZ.
-1:
-        srwi.   r0,r4,6                 // get count of 64-byte chunks
-        cmpw    cr7,r0,r0               // set cr7_eq (kernel turns off on alignment exception)
-        rlwinm  r4,r4,0,0x3F            // mask down to residual count (0..63)
-        beq     Lleftover               // no chunks
-        dcbz    0,r8                    // zero first cache line (clearing cr7 if alignment exception)
-        mtctr   r0
-        li      r6,32                   // get an offset for DCBZ
-        beq+    cr7,LDcbzEnter          // enter DCBZ loop (we didn't get an alignment exception)
-        
-        // Loop over 64-byte chunks without DCBZ.
-LNoDcbz:
-        stfd    f0,0(r8)
-        stfd    f1,8(r8)
-        stfd    f0,16(r8)
-        stfd    f1,24(r8)
-        stfd    f0,32(r8)
-        stfd    f1,40(r8)
-        stfd    f0,48(r8)
-        stfd    f1,56(r8)
-        addi    r8,r8,64
-        bdnz    LNoDcbz
-        
-        b       Lleftover
-        
-        // Loop over 64-byte chunks using DCBZ.
-LDcbz:
-        dcbz    0,r8
-LDcbzEnter:
-        dcbz    r6,r8
-        stfd    f0,0(r8)
-        stfd    f1,8(r8)
-        stfd    f0,16(r8)
-        stfd    f1,24(r8)
-        stfd    f0,32(r8)
-        stfd    f1,40(r8)
-        stfd    f0,48(r8)
-        stfd    f1,56(r8)
-        addi    r8,r8,64
-        bdnz    LDcbz
-        
-        // Handle leftovers (0..63 bytes)
-Lleftover:
-        srwi.   r0,r4,4                 // get count of 16-byte chunks
-        rlwinm  r4,r4,0,0xF             // mask down to residuals
-        beqlr                           // no 16-byte chunks so done
-        mtctr   r0
-2:
-        stfd    f0,0(r8)
-        stfd    f1,8(r8)
-        addi    r8,r8,16
-        bdnz    2b
-        
-        blr
-
-	COMMPAGE_DESCRIPTOR(memset_g3,_COMM_PAGE_MEMSET_PATTERN,kCache32,kHasAltivec, \
-				kCommPage32)
diff --git a/osfmk/ppc/commpage/memset_g4.s b/osfmk/ppc/commpage/memset_g4.s
deleted file mode 100644
index 9e33f45f2..000000000
--- a/osfmk/ppc/commpage/memset_g4.s
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-
-/* *********************
- * * M E M S E T _ G 4 *
- * *********************
- *
- * This is a subroutine called by Libc memset and memset_pattern for large nonzero
- * operands (zero operands are funneled into bzero.)  This version is for
- * 32-bit processors with a 32-byte cache line and Altivec.
- *
- * Registers at entry:
- *		r4 = count of bytes to store (must be >= 32)
- *      r8 = ptr to the 1st byte to store (16-byte aligned)
- *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
- * When we return:
- *		r3 = not changed, since memset returns it
- *      r4 = bytes remaining to store (will be <32)
- *      r7 = not changed
- *      r8 = ptr to next byte to store (still 16-byte aligned)
- *     r12 = not changed (holds return value for memset)
- */
-
-#define kBig    (3*64)                  // big enough to warrant using dcba (NB: must be >= 3*64)
-
-        .align	4
-memset_g4:
-        cmplwi  cr1,r4,kBig             // big enough to warrant using dcbz?
-        mfspr   r2,vrsave               // we'll be using VRs
-        oris    r0,r2,0x8000            // we use vr0
-        andi.   r5,r8,0x10              // is ptr 32-byte aligned?
-        mtspr   vrsave,r0
-        li      r5,16                   // get offsets for "stvx"
-        lvx     v0,0,r9                 // load the pattern into v0
-        li      r6,32
-        blt     cr1,LShort              // not big enough to bother with dcba
-        li      r9,48
-        
-        // cache line align
-        
-        beq     2f                      // already aligned
-        stvx    v0,0,r8                 // store another 16 bytes to align
-        addi    r8,r8,16
-        subi    r4,r4,16
-        
-        // Set up for inner loop.
-2:
-        srwi    r0,r4,6                 // get count of 64-byte chunks (>=2)
-        dcba    0,r8                    // pre-allocate first cache line (possibly nop'd)
-        rlwinm  r4,r4,0,0x3F            // mask down to residual count (0..63)
-        subic   r0,r0,1                 // loop 1-too-few times
-        li      r10,64                  // get offsets to DCBA one chunk ahead
-        li      r11,64+32
-        mtctr   r0
-        dcba    r6,r8                   // zero 2nd cache line (possibly nop'd)
-        b       3f                      // enter DCBA loop
-        
-        // Loop over 64-byte chunks.  We DCBA one chunk ahead, which is a little faster.
-        // Note that some G4s do not benefit from the DCBAs.  We nop them in that case.
-        
-        .align  4
-3:
-        dcba    r10,r8                  // zero one 64-byte chunk ahead (possibly nop'd)
-        dcba    r11,r8
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        stvx    v0,r6,r8
-        stvx    v0,r9,r8
-        addi    r8,r8,64
-        bdnz+   3b
-        
-        // Last chunk, which we've already DCBAd.
-
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        stvx    v0,r6,r8
-        stvx    v0,r9,r8
-        addi    r8,r8,64
-        
-        // loop over 32-byte chunks at end
-LShort:
-        srwi.   r0,r4,5                 // get count of 32-byte chunks
-        rlwinm  r4,r4,0,0x1F            // mask down to residual count (0..31)
-        beq     7f                      // no chunks so done
-        mtctr   r0
-6:
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        addi    r8,r8,32
-        bdnz    6b
-7:
-        mtspr   vrsave,r2               // restore caller's vrsave
-        blr
-
-
-	COMMPAGE_DESCRIPTOR(memset_g4,_COMM_PAGE_MEMSET_PATTERN,kCache32+kHasAltivec,0, \
-				kCommPageDCBA+kCommPage32)
diff --git a/osfmk/ppc/commpage/memset_g5.s b/osfmk/ppc/commpage/memset_g5.s
deleted file mode 100644
index 6acf98579..000000000
--- a/osfmk/ppc/commpage/memset_g5.s
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-/*
- * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
- * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
- * simple transformations:
- *      - all word compares are changed to doubleword
- *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
- * Nothing else is done.  For this to work, the following rules must be
- * carefully followed:
- *      - do not use carry or overflow
- *      - only use record mode if you are sure the results are mode-invariant
- *        for example, all "andi." and almost all "rlwinm." are fine
- *      - do not use "slwi", "slw", or "srw"
- * An imaginative programmer could break the porting model in other ways, but the above
- * are the most likely problem areas.  It is perhaps surprising how well in practice
- * this simple method works.
- */        
-
-/* *********************
- * * M E M S E T _ G 5 *
- * *********************
- *
- * This is a subroutine called by Libc memset and memset_pattern for large nonzero
- * operands (zero operands are funneled into bzero.)  This version is for
- * 64-bit processors with a 128-byte cache line and Altivec.
- *
- * Registers at entry:
- *		r4 = count of bytes to store (must be >= 32)
- *      r8 = ptr to the 1st byte to store (16-byte aligned)
- *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
- * When we return:
- *		r3 = not changed, since memset returns it
- *      r4 = bytes remaining to store (will be <32)
- *      r7 = not changed
- *      r8 = ptr to next byte to store (still 16-byte aligned)
- *     r12 = not changed (holds return value for memset)
- */
-
-#define kBig    (3*128)                 // big enough to warrant using dcbz (NB: must be >= 3*128)
-
-        .align	5
-memset_g5:
-        cmplwi  cr1,r4,kBig             // big enough to warrant using dcbz?
-        neg     r10,r8                  // start to align ptr
-        mfspr   r2,vrsave               // we'll be using VRs
-        andi.   r10,r10,0x70            // get #bytes to cache line align
-        oris    r0,r2,0x8000            // we use vr0
-        mtspr   vrsave,r0
-        li      r5,16                   // get offsets for "stvx"
-        lvx     v0,0,r9                 // load the pattern into v0
-        li      r6,32
-        blt     cr1,LShort              // not big enough to bother with dcbz
-        li      r9,48
-        
-        // cache line align
-        
-        beq     2f                      // already aligned
-1:
-        subic.  r10,r10,16              // more to go?
-        stvx    v0,0,r8
-        addi    r8,r8,16
-        subi    r4,r4,16
-        bne     1b
-        
-        // Loop over cache lines.  This code uses a private protocol with the kernel:
-        // when the kernel emulates an alignment exception on a DCBZ that occurs in the
-        // commpage, it zeroes CR7.  We use this to detect the case where we are operating on
-        // uncached memory, and do not use DCBZ again in this code. We assume that either
-        // all the operand is cacheable or none of it is, so we only check the first DCBZ.
-2:
-        cmpw    cr7,r3,r3               // set cr7_eq (kernel will clear if DCBZ faults)
-        dcbzl   0,r8                    // zero first cache line (clearing cr7 if alignment exception)
-        srwi    r0,r4,7                 // get #cache lines (>=2)
-        rlwinm  r4,r4,0,0x7F            // mask down to residual count (0..127)
-        bne--   cr7,LNoDcbz             // exit if we took alignment exception on the first DCBZ
-        subic   r0,r0,1                 // loop 1-too-few times
-        li      r11,128                 // set DCBZ look-ahead
-        mtctr   r0
-        b       3f                      // use loop that DCBZs
-        
-        // Loop over cache lines.  We DCBZ one line ahead, which is a little faster.
-        
-        .align  5
-3:
-        dcbzl   r11,r8                  // zero one line ahead
-        addi    r10,r8,64
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        stvx    v0,r6,r8
-        stvx    v0,r9,r8
-        addi    r8,r8,128
-        stvx    v0,0,r10
-        stvx    v0,r5,r10
-        stvx    v0,r6,r10
-        stvx    v0,r9,r10
-        bdnz++  3b
-        
-        li      r0,1                    // we've already DCBZ'd the last line
-LNoDcbz:                                // r0: loop count
-        mtctr   r0
-        
-        // Loop which does not DCBZ.  Normally this is only used for last cache line,
-        // because we've already zeroed it.
-4:        
-        addi    r10,r8,64
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        stvx    v0,r6,r8
-        stvx    v0,r9,r8
-        addi    r8,r8,128
-        stvx    v0,0,r10
-        stvx    v0,r5,r10
-        stvx    v0,r6,r10
-        stvx    v0,r9,r10
-        bdnz--  4b                      // optimize for the cacheable case
-        
-        // loop over 32-byte chunks
-LShort:
-        srwi.   r0,r4,5                 // get count of 32-byte chunks
-        rlwinm  r4,r4,0,0x1F            // mask down to residual count (0..31)
-        beq     7f                      // no chunks so done
-        mtctr   r0
-6:
-        stvx    v0,0,r8
-        stvx    v0,r5,r8
-        addi    r8,r8,32
-        bdnz++  6b
-7:
-        mtspr   vrsave,r2               // restore caller's vrsave
-        blr
-
-
-	COMMPAGE_DESCRIPTOR(memset_g5,_COMM_PAGE_MEMSET_PATTERN,kCache128+k64Bit+kHasAltivec,0, \
-				kCommPageBoth+kPort32to64)
diff --git a/osfmk/ppc/commpage/pthread.s b/osfmk/ppc/commpage/pthread.s
deleted file mode 100644
index 58dd6c4aa..000000000
--- a/osfmk/ppc/commpage/pthread.s
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-#define	USER_SPRG3	259		// user-mode-readable encoding for SPRG3
-
-
-// ***********************************************************
-// * P T H R E A D _ G E T S P E C I F I C _ S P R G 3 _ 3 2 *
-// ***********************************************************
-//
-// For processors with user-readable SPRG3, in 32-bit mode.   Called with:
-//		r3 = word number
-//		r4 = offset to thread specific data (_PTHREAD_TSD_OFFSET)
-
-pthread_getspecific_sprg3_32:
-        slwi	r5,r3,2				// convert word# to byte offset
-        mfspr	r3,USER_SPRG3		// get per-thread cookie
-        add		r5,r5,r4			// add in offset to first word
-        lwzx	r3,r3,r5			// get the thread-specific word
-        blr
-        
-    COMMPAGE_DESCRIPTOR(pthread_getspecific_sprg3_32,_COMM_PAGE_PTHREAD_GETSPECIFIC,k64Bit,0,kCommPage32)
-
-
-// ***********************************************************
-// * P T H R E A D _ G E T S P E C I F I C _ S P R G 3 _ 6 4 *
-// ***********************************************************
-//
-// For processors with user-readable SPRG3, in 64-bit mode.  This may not be used
-// because the 64-bit ABI uses r13 for the thread-local-data pointer.  Called with:
-//		r3 = word number
-//		r4 = offset to thread specific data (_PTHREAD_TSD_OFFSET)
-
-pthread_getspecific_sprg3_64:
-        sldi	r5,r3,3				// convert double-word# to byte offset
-        mfspr	r3,USER_SPRG3		// get per-thread cookie
-        add		r5,r5,r4			// add in offset to first word
-        ldx		r3,r3,r5			// get the thread-specific doubleword
-        blr
-        
-    COMMPAGE_DESCRIPTOR(pthread_getspecific_sprg3_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,k64Bit,0,kCommPage64)
-    
-    
-// ***************************************
-// * P T H R E A D _ S E L F _ S P R G 3 *
-// ***************************************
-//
-// For processors with user-readable SPRG3.  Useable both in 32 and 64-bit modes.
-
-pthread_self_sprg3:
-        mfspr	r3,USER_SPRG3		// get per-thread cookie
-        blr
-        
-    COMMPAGE_DESCRIPTOR(pthread_self_sprg3,_COMM_PAGE_PTHREAD_SELF,k64Bit,0,kCommPageBoth)
-    
-        
-// *******************************************************
-// * P T H R E A D _ G E T S P E C I F I C _ U F T R A P *
-// *******************************************************
-//
-// For processors that use the Ultra-Fast-Trap to get the thread-specific ptr.
-// Called with:
-//		r3 = word number
-//		r4 = offset to thread specific data (_PTHREAD_TSD_OFFSET)
-
-pthread_getspecific_uftrap:
-        slwi	r5,r3,2				// convert word# to byte offset
-        li 		r0,0x7FF2			// magic "pthread_self" ultra-fast trap code
-        sc
-        add		r5,r5,r4			// add in offset to first word
-        lwzx	r3,r3,r5			// get the thread-specific word
-        blr
-
-    COMMPAGE_DESCRIPTOR(pthread_getspecific_uftrap,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,k64Bit,kCommPage32)
-    
-        
-// *****************************************
-// * P T H R E A D _ S E L F _ U F T R A P *
-// *****************************************
-//
-// For processors that use the Ultra-Fast-Trap to get the thread-specific ptr.
-
-pthread_self_uftrap:
-        li 		r0,0x7FF2			// magic "pthread_self" ultra-fast trap code
-        sc							// get r3==TLDP
-        blr
-
-    COMMPAGE_DESCRIPTOR(pthread_self_uftrap,_COMM_PAGE_PTHREAD_SELF,0,k64Bit,kCommPage32)
diff --git a/osfmk/ppc/commpage/spinlocks.s b/osfmk/ppc/commpage/spinlocks.s
deleted file mode 100644
index 480f49050..000000000
--- a/osfmk/ppc/commpage/spinlocks.s
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <sys/appleapiopts.h>
-#include <ppc/asm.h>					// EXT, LEXT
-#include <machine/cpu_capabilities.h>
-#include <machine/commpage.h>
-
-        .text
-        .align	2
-
-#define	MP_SPIN_TRIES   1000
-
-
-/* The user mode spinlock library.  There are many versions,
- * in order to take advantage of a few special cases:
- *	- no barrier instructions (SYNC,ISYNC) are needed if UP
- *	- 64-bit processors can use LWSYNC instead of SYNC (if MP)
- *  - 32-bit processors can use ISYNC/EIEIO instead of SYNC (if MP)
- *	- branch hints appropriate to the processor (+ vs ++ etc)
- *	- potentially custom relinquish strategies (not used at present)
- *	- fixes for errata as necessary
- *
- * The convention for lockwords is that 0==free and -1==locked.
- */ 
-
-
-spinlock_32_try_mp:
-		mr		r5, r3
-		li		r3, 1
-1:
-        lwarx	r4,0,r5
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne-	2f
-        stwcx.	r6,0,r5
-        isync				// cancel speculative execution
-        beqlr+
-        b		1b
-2:
-        li		r3,0        // we did not get the lock
-        blr
-
-	COMMPAGE_DESCRIPTOR(spinlock_32_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,k64Bit+kUP,kCommPage32)
-        
-
-spinlock_32_try_up:
-		mr		r5, r3
-		li		r3, 1
-1:
-        lwarx	r4,0,r5
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne-	2f
-        stwcx.	r6,0,r5
-        beqlr+
-        b		1b
-2:
-        li		r3,0        // we did not get the lock
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_32_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,k64Bit,kCommPage32)
-
-
-spinlock_32_lock_mp:
-        li		r5,MP_SPIN_TRIES
-1:
-        lwarx	r4,0,r3
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne-	2f
-        stwcx.	r6,0,r3
-        isync				// cancel speculative execution
-        beqlr+				// we return void
-        b		1b
-2:
-        subic.	r5,r5,1		// try again before relinquish?
-        bne		1b
-        ba		_COMM_PAGE_RELINQUISH
-
-    COMMPAGE_DESCRIPTOR(spinlock_32_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,k64Bit+kUP,kCommPage32)
-
-
-spinlock_32_lock_up:
-1:
-        lwarx	r4,0,r3
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bnea-	_COMM_PAGE_RELINQUISH	// always depress on UP (let lock owner run)
-        stwcx.	r6,0,r3
-        beqlr+				// we return void
-        b		1b
-
-    COMMPAGE_DESCRIPTOR(spinlock_32_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,k64Bit,kCommPage32)
-
-
-spinlock_32_unlock_mp:
-        li		r4,0
-        isync				// complete prior stores before unlock
-		eieio				// (using isync/eieio is faster than a sync)
-        stw		r4,0(r3)
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_32_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,0,k64Bit+kUP,kCommPage32)
-
-
-spinlock_32_unlock_up:
-        li		r4,0
-        stw		r4,0(r3)
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_32_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,kUP,k64Bit,kCommPage32)
-
-
-spinlock_64_try_mp:
-		mr		r5, r3
-		li		r3, 1
-1:
-        lwarx	r4,0,r5
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne--	2f
-        stwcx.	r6,0,r5
-        isync				// cancel speculative execution
-        beqlr++
-        b		1b
-2:
-        li		r6,-4
-        stwcx.	r5,r6,r1	// clear the pending reservation (using red zone)
-        li		r3,0        // we did not get the lock
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_try_mp,_COMM_PAGE_SPINLOCK_TRY,k64Bit,kUP,kCommPageBoth)
-
-
-spinlock_64_try_up:
-		mr		r5, r3
-		li		r3, 1
-1:
-        lwarx	r4,0,r5
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne--	2f
-        stwcx.	r6,0,r5
-        beqlr++
-        b		1b
-2:
-        li		r6,-4
-        stwcx.	r5,r6,r1	// clear the pending reservation (using red zone)
-        li		r3,0        // we did not get the lock
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_try_up,_COMM_PAGE_SPINLOCK_TRY,k64Bit+kUP,0,kCommPageBoth)
-
-
-spinlock_64_lock_mp:
-        li		r5,MP_SPIN_TRIES
-1:
-        lwarx	r4,0,r3
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne--	2f
-        stwcx.	r6,0,r3
-        isync				// cancel speculative execution
-        beqlr++				// we return void
-        b		1b
-2:
-        li		r6,-4
-        stwcx.	r3,r6,r1	// clear the pending reservation (using red zone)
-        subic.	r5,r5,1		// try again before relinquish?
-        bne--	1b			// mispredict this one (a cheap back-off)
-        ba		_COMM_PAGE_RELINQUISH
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,k64Bit,kUP,kCommPageBoth)
-
-
-spinlock_64_lock_up:
-1:
-        lwarx	r4,0,r3
-		li		r6,-1		// locked == -1
-        cmpwi	r4,0
-        bne--	2f
-        stwcx.	r6,0,r3
-        beqlr++				// we return void
-        b		1b
-2:							// always relinquish on UP (let lock owner run)
-        li		r6,-4
-        stwcx.	r3,r6,r1	// clear the pending reservation (using red zone)
-		ba		_COMM_PAGE_RELINQUISH
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_lock_up,_COMM_PAGE_SPINLOCK_LOCK,k64Bit+kUP,0,kCommPageBoth)
-
-
-spinlock_64_unlock_mp:
-        lwsync				// complete prior stores before unlock
-        li		r4,0
-        stw		r4,0(r3)
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit,kUP,kCommPageBoth)
-
-
-spinlock_64_unlock_up:
-        li		r4,0
-        stw		r4,0(r3)
-        blr
-
-    COMMPAGE_DESCRIPTOR(spinlock_64_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit+kUP,0,kCommPageBoth)
-    
-
-spinlock_relinquish:
-        mr		r12,r3		// preserve lockword ptr across relinquish
-        li		r3,0		// THREAD_NULL
-        li		r4,1		// SWITCH_OPTION_DEPRESS
-        li		r5,1		// timeout (ms)
-        li		r0,-61		// SYSCALL_THREAD_SWITCH
-        sc					// relinquish
-        mr		r3,r12
-        ba		_COMM_PAGE_SPINLOCK_LOCK
-        
-    COMMPAGE_DESCRIPTOR(spinlock_relinquish,_COMM_PAGE_RELINQUISH,0,0,kCommPageBoth)
-
diff --git a/osfmk/ppc/conf.c b/osfmk/ppc/conf.c
deleted file mode 100644
index adeb60ea7..000000000
--- a/osfmk/ppc/conf.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/* 
- */
-
-#include <platforms.h>
-#include <types.h>
-#include <kern/clock.h>
-
-/*
- * Clock device subsystem configuration. The clock_list[]
- * table contains the clock structures for all clocks in
- * the system.
- */
-
-extern	struct clock_ops	sysclk_ops, calend_ops;
-
-/*
- * List of clock devices.
- */
-struct	clock	clock_list[] = {
-
-	/* SYSTEM_CLOCK */
-	{ &sysclk_ops, NULL, NULL },
-
-	/* CALENDAR_CLOCK */
-	{ &calend_ops, NULL, NULL },
-};
-int	clock_count = sizeof(clock_list) / sizeof(clock_list[0]);
-
-
diff --git a/osfmk/ppc/console_feed.c b/osfmk/ppc/console_feed.c
deleted file mode 100644
index 8f029d49d..000000000
--- a/osfmk/ppc/console_feed.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- * 
- */
-
-/* Intercept mach console output and supply it to a user application */
-
-#include <mach_kdb.h>
-
-#include <types.h>
-#include <device/buf.h>
-#include <device/conf.h>
-#include <device/errno.h>
-#include <device/misc_protos.h>
-#include <device/ds_routines.h>
-#include <device/cirbuf.h>
-#include <ppc/console_feed_entries.h>
-#include <ppc/serial_io.h>
-
-#if	MACH_KDB
-#include <ppc/db_machdep.h>
-#endif	/* MACH_KDB */
-
-static struct cirbuf cons_feed_cb;
-static int cons_feed_count = 0;
-io_req_t   cons_feed_queued = 0;
-
-/* console feed lock should be taken at splhigh */
-decl_simple_lock_data(,cons_feed_lock)
-
-boolean_t cons_feed_read_done(io_req_t ior);
-
-io_return_t
-console_feed_open(
-	dev_t		dev,
-	dev_mode_t	flag,
-	io_req_t	ior)
-{
-	spl_t	s;
-
-        simple_lock_init(&cons_feed_lock, 0);
-#if	MACH_KDB
-	if (console_is_serial()) {
-		return D_DEVICE_DOWN;
-	}
-#endif	/* MACH_KDB */
-	cb_alloc(&cons_feed_cb, CONSOLE_FEED_BUFSIZE);
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-	cons_feed_count++;
-	simple_unlock(&cons_feed_lock);
-	splx(s);
-	return D_SUCCESS;
-}
-
-void
-console_feed_close(
-	dev_t		dev)
-{
-	spl_t	s;
-
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-	cons_feed_count--;
-	simple_unlock(&cons_feed_lock);
-	splx(s);
-
-	console_feed_cancel_and_flush();
-	cb_free(&cons_feed_cb);
-
-	return;
-}
-
-/* A routine that can be called from a panic or other problem
- * situation. It switches off the console feed and dumps any
- * remaining buffered information to the original console
- * (usually the screen). It doesn't free up the buffer, since
- * it tries to be as minimal as possible 
- */
-
-void console_feed_cancel_and_flush(void)
-{
-	int	c;
-	spl_t	s;
-	
-#if	NCONSFEED > 0
-#if	MACH_KDB
-	if (console_is_serial()) {
-		return;
-	}
-#endif	/* MACH_KDB */
-
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-	if (cons_feed_count == 0) {
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-		return;
-	}
-	cons_feed_count = 0;
-	simple_unlock(&cons_feed_lock);
-	splx(s);
-
-	do {
-		c = getc(&cons_feed_cb);
-		if (c == -1)
-			break;
-		cnputc(c);
-	} while (1);
-#endif /* NCONSFEED > 0 */
-}
-
-io_return_t
-console_feed_read(
-	dev_t		dev,
-	io_req_t 	ior)
-{
-	spl_t		s;
-	kern_return_t	rc;
-	int		count;
-
-	rc = device_read_alloc(ior, (vm_size_t) ior->io_count);
-	if (rc != KERN_SUCCESS)
-		return rc;
-
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-
-	ior->io_residual = ior->io_count;
-
-	count = q_to_b(&cons_feed_cb, (char *) ior->io_data, ior->io_count);
-	if (count == 0) {
-		if (ior->io_mode & D_NOWAIT) {
-			rc = D_WOULD_BLOCK;
-		}
-		if (cons_feed_queued == NULL) {
-			ior->io_done = cons_feed_read_done;
-			cons_feed_queued = ior;
-			rc = D_IO_QUEUED;
-		} else {
-			/* Can't queue multiple read requests yet */
-			rc = D_INVALID_OPERATION;
-		}
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-		return rc;
-	}
-
-	simple_unlock(&cons_feed_lock);
-	splx(s);
-
-	ior->io_residual -= count;
-
-	iodone(ior);
-
-	if (ior->io_op & IO_SYNC) {
-		iowait(ior);
-	}
-
-	return D_SUCCESS;
-}
-
-/* Called when data is ready and there's a queued-up read waiting */
-boolean_t cons_feed_read_done(io_req_t ior)
-{
-	spl_t	s;
-	int	count;
-
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-
-	count = q_to_b(&cons_feed_cb, (char *) ior->io_data, ior->io_count);
-	if (count == 0) {
-		if (cons_feed_queued == NULL) {
-			ior->io_done = cons_feed_read_done;
-			cons_feed_queued = ior;
-		}
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-		return FALSE;
-	}
-
-	simple_unlock(&cons_feed_lock);
-	splx(s);
-
-	ior->io_residual -= count;
-	ds_read_done(ior);
-
-	return TRUE;
-}
-
-/* This routine is called from putc() - it should return TRUE if
- * the character should be passed on to a physical console, FALSE
- * if the feed has intercepted the character. It may be called from
- * under interrupt (even splhigh)
- */
-
-boolean_t console_feed_putc(char c)
-{
-	spl_t 		s;
-	io_req_t	ior;
-	boolean_t	retval;
-
-#if	MACH_KDB
-	if (db_active) {
-		return TRUE;
-	}
-#endif	/* MACH_KDB */
-
-	retval=TRUE;	/* TRUE : character should be displayed now */
-	if (!cons_feed_count) {
-		return TRUE;
-	}
-	s = splhigh();
-	simple_lock(&cons_feed_lock);
-	if (!cons_feed_count) {
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-		return TRUE;
-	}
-	/* queue up the data if we can */
-	if (!putc(c, &cons_feed_cb)) {
-		/* able to stock the character */
-		retval = FALSE;
-	}
-	if (cons_feed_queued != NULL) {
-		/* Queued up request - service it */
-		ior = cons_feed_queued;
-		cons_feed_queued = NULL;
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-		iodone(ior);
-		retval=FALSE;
-	} else {
-		simple_unlock(&cons_feed_lock);
-		splx(s);
-	}
-	return retval;
-}
diff --git a/osfmk/ppc/console_feed_entries.h b/osfmk/ppc/console_feed_entries.h
deleted file mode 100644
index 729955043..000000000
--- a/osfmk/ppc/console_feed_entries.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- * 
- */
-
-extern io_return_t	console_feed_open(
-				dev_t		dev,
-				dev_mode_t	flag,
-				io_req_t	ior);
-
-extern void		console_feed_close(
-				dev_t		dev);
-
-extern io_return_t	console_feed_read(
-				dev_t		dev,
-				io_req_t	ior);
-
-extern boolean_t	console_feed_putc(char c);
-extern void		console_feed_cancel_and_flush(void);
-
-#define CONSOLE_FEED_BUFSIZE 4096
diff --git a/osfmk/ppc/cpu.c b/osfmk/ppc/cpu.c
deleted file mode 100644
index 774b94bbd..000000000
--- a/osfmk/ppc/cpu.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <mach/mach_types.h>
-#include <mach/machine.h>
-#include <mach/processor_info.h>
-
-#include <kern/kalloc.h>
-#include <kern/kern_types.h>
-#include <kern/machine.h>
-#include <kern/misc_protos.h>
-#include <kern/thread.h>
-#include <kern/sched_prim.h>
-#include <kern/timer_queue.h>
-#include <kern/processor.h>
-#include <kern/pms.h>
-
-#include <vm/pmap.h>
-#include <IOKit/IOHibernatePrivate.h>
-
-#include <ppc/proc_reg.h>
-#include <ppc/misc_protos.h>
-#include <ppc/fpu_protos.h>
-#include <ppc/machine_routines.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/asm.h>
-#include <ppc/hw_perfmon.h>
-#include <pexpert/pexpert.h>
-#include <kern/cpu_data.h>
-#include <ppc/mappings.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/trap.h>
-#include <ppc/machine_cpu.h>
-#include <ppc/rtclock.h>
-
-#include <libkern/OSAtomic.h>
-
-unsigned int		real_ncpus = 1;
-unsigned int		max_ncpus  = MAX_CPUS;
-
-decl_simple_lock_data(static,rht_lock);
-
-static unsigned int	rht_state = 0;
-#define RHT_WAIT	0x01
-#define RHT_BUSY	0x02
-
-decl_simple_lock_data(static,SignalReadyLock);
-
-struct SIGtimebase {
-	volatile boolean_t	avail;
-	volatile boolean_t	ready;
-	volatile boolean_t	done;
-	uint64_t	abstime;
-};
-
-perfCallback	   	perfCpuSigHook;			/* Pointer to CHUD cpu signal hook routine */
-
-extern uint32_t			debugger_sync;
-
-/*
- * Forward definitions
- */
-
-void	cpu_sync_timebase(
-			void);
-
-void	cpu_timebase_signal_handler(
-			struct per_proc_info    *proc_info,
-			struct SIGtimebase		*timebaseAddr);
-
-/*
- *	Routine:	cpu_bootstrap
- *	Function:
- */
-void
-cpu_bootstrap(
-	void)
-{
-	simple_lock_init(&rht_lock,0);
-	simple_lock_init(&SignalReadyLock,0);
-}
-
-
-/*
- *	Routine:	cpu_init
- *	Function:
- */
-void
-cpu_init(
-	void)
-{
-	struct per_proc_info *proc_info;
-
-	proc_info = getPerProc();
-
-	/*
-	 * Restore the TBR.
-	 */
-	if (proc_info->save_tbu != 0 || proc_info->save_tbl != 0) {
-		mttb(0);
-		mttbu(proc_info->save_tbu);
-		mttb(proc_info->save_tbl);
-	}
-
-	proc_info->rtcPop = EndOfAllTime;			/* forget any existing decrementer setting */
-	etimer_resync_deadlines();				/* Now that the time base is sort of correct, request the next timer pop */
-
-	proc_info->cpu_type = CPU_TYPE_POWERPC;
-	proc_info->cpu_subtype = (cpu_subtype_t)proc_info->pf.rptdProc;
-	proc_info->cpu_threadtype = CPU_THREADTYPE_NONE;
-	proc_info->running = TRUE;
-
-}
-
-/*
- *	Routine:	cpu_machine_init
- *	Function:
- */
-void
-cpu_machine_init(
-	void)
-{
-	struct per_proc_info			*proc_info;
-	volatile struct per_proc_info	*mproc_info;
-
-
-	proc_info = getPerProc();
-	mproc_info = PerProcTable[master_cpu].ppe_vaddr;
-
-	if (proc_info != mproc_info) {
-		simple_lock(&rht_lock);
-		if (rht_state & RHT_WAIT)
-			thread_wakeup(&rht_state);
-		rht_state &= ~(RHT_BUSY|RHT_WAIT);
-		simple_unlock(&rht_lock);
-	}
-
-	PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone));
-
-	if (proc_info->hibernate) {
-		uint32_t	tbu, tbl;
-
-		do {
-			tbu = mftbu();
-			tbl = mftb();
-		} while (mftbu() != tbu);
-
-	    proc_info->hibernate = 0;
-	    hibernate_machine_init();
-
-		// hibernate_machine_init() could take minutes and we don't want timeouts
-		// to fire as soon as scheduling starts. Reset timebase so it appears
-		// no time has elapsed, as it would for regular sleep.
-		mttb(0);
-		mttbu(tbu);
-		mttb(tbl);
-	}
-
-	if (proc_info != mproc_info) {
-	while (!((mproc_info->cpu_flags) & SignalReady)) 
-			continue;
-		cpu_sync_timebase();
-	}
-
-	ml_init_interrupt();
-	if (proc_info != mproc_info)
-		simple_lock(&SignalReadyLock);
-	proc_info->cpu_flags |= BootDone|SignalReady;
-	if (proc_info != mproc_info) {
-		if (proc_info->ppXFlags & SignalReadyWait) {
-			hw_atomic_and_noret(&proc_info->ppXFlags, ~SignalReadyWait);
-			thread_wakeup(&proc_info->cpu_flags);
-		}
-		simple_unlock(&SignalReadyLock);
-		pmsPark();						/* Timers should be cool now, park the power management stepper */
-	}
-}
-
-
-/*
- *	Routine:	cpu_per_proc_alloc
- *	Function:
- */
-struct per_proc_info *
-cpu_per_proc_alloc(
-		void)
-{
-	struct per_proc_info	*proc_info = NULL;
-	void			*interrupt_stack = NULL;
-	void			*debugger_stack = NULL;
-
-	if ((proc_info = (struct per_proc_info*)kalloc(sizeof(struct per_proc_info))) == (struct per_proc_info*)0)
-		return (struct per_proc_info *)NULL;
-	if ((interrupt_stack = kalloc(INTSTACK_SIZE)) == 0) {
-		kfree(proc_info, sizeof(struct per_proc_info));
-		return (struct per_proc_info *)NULL;
-	}
-
-	if ((debugger_stack = kalloc(kernel_stack_size)) == 0) {
-		kfree(proc_info, sizeof(struct per_proc_info));
-		kfree(interrupt_stack, INTSTACK_SIZE);
-		return (struct per_proc_info *)NULL;
-	}
-
-	bzero((void *)proc_info, sizeof(struct per_proc_info));
-
-	/* Set physical address of the second page */
-	proc_info->pp2ndPage = (addr64_t)pmap_find_phys(kernel_pmap,
-				((addr64_t)(unsigned int)proc_info) + 0x1000)
-			       << PAGE_SHIFT;
-	proc_info->next_savearea = (uint64_t)save_get_init();
-	proc_info->pf = BootProcInfo.pf;
-	proc_info->istackptr = (vm_offset_t)interrupt_stack + INTSTACK_SIZE - FM_SIZE;
-	proc_info->intstack_top_ss = proc_info->istackptr;
-	proc_info->debstackptr = (vm_offset_t)debugger_stack + kernel_stack_size - FM_SIZE;
-	proc_info->debstack_top_ss = proc_info->debstackptr;
-
-	queue_init(&proc_info->rtclock_timer.queue);
-	proc_info->rtclock_timer.deadline = EndOfAllTime;
-
-	return proc_info;
-
-}
-
-
-/*
- *	Routine:	cpu_per_proc_free
- *	Function:
- */
-void
-cpu_per_proc_free(
-	struct per_proc_info	*proc_info
-)
-{
-	if (proc_info->cpu_number == master_cpu)
-		return;
-	kfree((void *)(proc_info->intstack_top_ss - INTSTACK_SIZE + FM_SIZE), INTSTACK_SIZE);
-	kfree((void *)(proc_info->debstack_top_ss -  kernel_stack_size + FM_SIZE), kernel_stack_size);
-	kfree((void *)proc_info, sizeof(struct per_proc_info));			/* Release the per_proc */
-}
-
-
-/*
- *	Routine:	cpu_per_proc_register
- *	Function:
- */
-kern_return_t
-cpu_per_proc_register(
-	struct per_proc_info	*proc_info
-)
-{
-	int	cpu;
-	
-	cpu = OSIncrementAtomic(&real_ncpus);
-	
-	if (real_ncpus > max_ncpus) {
-		return KERN_FAILURE;
-	}
-	
-	proc_info->cpu_number = cpu;
-	PerProcTable[cpu].ppe_vaddr = proc_info;
-	PerProcTable[cpu].ppe_paddr = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)(unsigned int)proc_info) << PAGE_SHIFT;
-	eieio();
-	return KERN_SUCCESS;
-}
-
-
-/*
- *	Routine:	cpu_start
- *	Function:
- */
-kern_return_t
-cpu_start(
-	int cpu)
-{
-	struct per_proc_info	*proc_info;
-	kern_return_t			ret;
-	mapping_t				*mp;
-
-	proc_info = PerProcTable[cpu].ppe_vaddr;
-
-	if (cpu == cpu_number()) {
- 	  PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone));
-	  ml_init_interrupt();
-	  proc_info->cpu_flags |= BootDone|SignalReady;
-
-	  return KERN_SUCCESS;
-	} else {
-		proc_info->cpu_flags &= BootDone;
-		proc_info->interrupts_enabled = 0;
-		proc_info->pending_ast = AST_NONE;
-		proc_info->istackptr = proc_info->intstack_top_ss;
-		proc_info->rtcPop = EndOfAllTime;
-		proc_info->FPU_owner = NULL;
-		proc_info->VMX_owner = NULL;
-		proc_info->pms.pmsStamp = 0;									/* Dummy transition time */
-		proc_info->pms.pmsPop = EndOfAllTime;							/* Set the pop way into the future */
-		proc_info->pms.pmsState = pmsParked;							/* Park the stepper */
-		proc_info->pms.pmsCSetCmd = pmsCInit;							/* Set dummy initial hardware state */
-		mp = (mapping_t *)(&proc_info->ppUMWmp);
-		mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1;
-		mp->mpSpace = invalSpace;
-
-		if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) {
-
-			simple_lock(&rht_lock);
-			while (rht_state & RHT_BUSY) {
-				rht_state |= RHT_WAIT;
-				thread_sleep_usimple_lock((event_t)&rht_state,
-						    &rht_lock, THREAD_UNINT);
-			}
-			rht_state |= RHT_BUSY;
-			simple_unlock(&rht_lock);
-
-			ml_phys_write((vm_offset_t)&ResetHandler + 0,
-					  RESET_HANDLER_START);
-			ml_phys_write((vm_offset_t)&ResetHandler + 4,
-					  (vm_offset_t)_start_cpu);
-			ml_phys_write((vm_offset_t)&ResetHandler + 8,
-					  (vm_offset_t)&PerProcTable[cpu]);
-		}
-/*
- *		Note: we pass the current time to the other processor here. He will load it
- *		as early as possible so that there is a chance that it is close to accurate.
- *		After the machine is up a while, we will officially resync the clocks so
- *		that all processors are the same.  This is just to get close.
- */
-
-		ml_get_timebase((unsigned long long *)&proc_info->ruptStamp);
-		
-		__asm__ volatile("sync");				/* Commit to storage */
-		__asm__ volatile("isync");				/* Wait a second */
-		ret = PE_cpu_start(proc_info->cpu_id,
-						   proc_info->start_paddr, (vm_offset_t)proc_info);
-
-		if (ret != KERN_SUCCESS) {
-			if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) {
-				simple_lock(&rht_lock);
-				if (rht_state & RHT_WAIT)
-					thread_wakeup(&rht_state);
-				rht_state &= ~(RHT_BUSY|RHT_WAIT);
-				simple_unlock(&rht_lock);
-			};
-		} else {
-			simple_lock(&SignalReadyLock);
-			if (!((*(volatile short *)&proc_info->cpu_flags) & SignalReady)) {
-				hw_atomic_or_noret(&proc_info->ppXFlags, SignalReadyWait);
-				thread_sleep_simple_lock((event_t)&proc_info->cpu_flags,
-				                          &SignalReadyLock, THREAD_UNINT);
-			}
-			simple_unlock(&SignalReadyLock);
-
-		}
-		return(ret);
-	}
-}
-
-/*
- *	Routine:	cpu_exit_wait
- *	Function:
- */
-void
-cpu_exit_wait(
-	int	cpu)
-{
-	struct per_proc_info	*tpproc;
-
-	if ( cpu != master_cpu) {
-		tpproc = PerProcTable[cpu].ppe_vaddr;
-		while (!((*(volatile short *)&tpproc->cpu_flags) & SleepState)) {};
-	}
-}
-
-
-/*
- *	Routine:	cpu_doshutdown
- *	Function:
- */
-void
-cpu_doshutdown(
-	void)
-{
-	enable_preemption();
-	processor_offline(current_processor());
-}
-
-
-/*
- *	Routine:	cpu_sleep
- *	Function:
- */
-void
-cpu_sleep(
-	void)
-{
-	struct per_proc_info	*proc_info;
-	unsigned int			i;
-	unsigned int			wait_ncpus_sleep, ncpus_sleep;
-	facility_context		*fowner;
-
-	proc_info = getPerProc();
-
-	proc_info->running = FALSE;
-
-	timer_queue_shutdown(&proc_info->rtclock_timer.queue);
-	proc_info->rtclock_timer.deadline = EndOfAllTime;
-
-	fowner = proc_info->FPU_owner;					/* Cache this */
-	if(fowner) /* If anyone owns FPU, save it */
-		fpu_save(fowner);
-	proc_info->FPU_owner = NULL;						/* Set no fpu owner now */
-
-	fowner = proc_info->VMX_owner;					/* Cache this */
-	if(fowner) vec_save(fowner);					/* If anyone owns vectors, save it */
-	proc_info->VMX_owner = NULL;						/* Set no vector owner now */
-
-	if (proc_info->cpu_number == master_cpu)  {
-		proc_info->cpu_flags &= BootDone;
-		proc_info->interrupts_enabled = 0;
-		proc_info->pending_ast = AST_NONE;
-
-		if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) {
-			ml_phys_write((vm_offset_t)&ResetHandler + 0,
-					  RESET_HANDLER_START);
-			ml_phys_write((vm_offset_t)&ResetHandler + 4,
-					  (vm_offset_t)_start_cpu);
-			ml_phys_write((vm_offset_t)&ResetHandler + 8,
-					  (vm_offset_t)&PerProcTable[master_cpu]);
-
-			__asm__ volatile("sync");
-			__asm__ volatile("isync");
-		}
-
-		wait_ncpus_sleep = real_ncpus-1; 
-		ncpus_sleep = 0;
-		while (wait_ncpus_sleep != ncpus_sleep) {
-			ncpus_sleep = 0;
-			for(i=1; i < real_ncpus ; i++) {
-				if ((*(volatile short *)&(PerProcTable[i].ppe_vaddr->cpu_flags)) & SleepState)
-					ncpus_sleep++;
-			}
-		}
-
-	}
-
-	/*
-	 * Save the TBR before stopping.
-	 */
-	do {
-		proc_info->save_tbu = mftbu();
-		proc_info->save_tbl = mftb();
-	} while (mftbu() != proc_info->save_tbu);
-
-	PE_cpu_machine_quiesce(proc_info->cpu_id);
-}
-
-
-/*
- *	Routine:	cpu_signal
- *	Function:
- *	Here is where we send a message to another processor.  So far we only have two:
- *	SIGPast and SIGPdebug.  SIGPast is used to preempt and kick off threads (this is
- *	currently disabled). SIGPdebug is used to enter the debugger.
- *
- *	We set up the SIGP function to indicate that this is a simple message and set the
- *	order code (MPsigpParm0) to SIGPast or SIGPdebug). After finding the per_processor
- *	block for the target, we lock the message block. Then we set the parameter(s). 
- *	Next we change the lock (also called "busy") to "passing" and finally signal
- *	the other processor. Note that we only wait about 1ms to get the message lock.  
- *	If we time out, we return failure to our caller. It is their responsibility to
- *	recover.
- */
-kern_return_t 
-cpu_signal(
-	int target, 
-	int signal, 
-	unsigned int p1, 
-	unsigned int p2)
-{
-
-	unsigned int				holdStat;
-	struct per_proc_info		*tpproc, *mpproc;
-	int							busybitset=0;
-
-#if DEBUG
-	if(((unsigned int)target) >= MAX_CPUS) panic("cpu_signal: invalid target CPU - %08X\n", target);
-#endif
-
-	mpproc = getPerProc();							/* Point to our block */
-	tpproc = PerProcTable[target].ppe_vaddr;		/* Point to the target's block */
-	if(mpproc == tpproc) return KERN_FAILURE;		/* Cannot signal ourselves */
-
-	if(!tpproc->running) return KERN_FAILURE;
-
-	if (!(tpproc->cpu_flags & SignalReady)) return KERN_FAILURE;
-		
-	if((tpproc->MPsigpStat & MPsigpMsgp) == MPsigpMsgp) {	/* Is there an unreceived message already pending? */
-
-		if(signal == SIGPwake) {					/* SIGPwake can merge into all others... */
-			mpproc->hwCtr.numSIGPmwake++;			/* Account for merged wakes */
-			return KERN_SUCCESS;
-		}
-
-		if((signal == SIGPast) && (tpproc->MPsigpParm0 == SIGPast)) {	/* We can merge ASTs */
-			mpproc->hwCtr.numSIGPmast++;			/* Account for merged ASTs */
-			return KERN_SUCCESS;					/* Don't bother to send this one... */
-		}
-
-		if (tpproc->MPsigpParm0 == SIGPwake) {
-			if (hw_lock_mbits(&tpproc->MPsigpStat, (MPsigpMsgp | MPsigpAck), 
-			                  (MPsigpBusy | MPsigpPass ), MPsigpBusy, 0)) {
-				busybitset = 1;
-				mpproc->hwCtr.numSIGPmwake++;	
-			}
-		}
-	}	
-	
-	if((busybitset == 0) && 
-	   (!hw_lock_mbits(&tpproc->MPsigpStat, MPsigpMsgp, 0, MPsigpBusy, 
-	   (gPEClockFrequencyInfo.timebase_frequency_hz >> 11)))) {	/* Try to lock the message block with a .5ms timeout */
-		mpproc->hwCtr.numSIGPtimo++;				/* Account for timeouts */
-		return KERN_FAILURE;						/* Timed out, take your ball and go home... */
-	}
-
-	holdStat = MPsigpBusy | MPsigpPass | (MPsigpSigp << 8) | mpproc->cpu_number;	/* Set up the signal status word */
-	tpproc->MPsigpParm0 = signal;					/* Set message order */
-	tpproc->MPsigpParm1 = p1;						/* Set additional parm */
-	tpproc->MPsigpParm2 = p2;						/* Set additional parm */
-	
-	__asm__ volatile("sync");						/* Make sure it's all there */
-	
-	tpproc->MPsigpStat = holdStat;					/* Set status and pass the lock */
-	__asm__ volatile("eieio");						/* I'm a paraniod freak */
-	
-	if (busybitset == 0)
-		PE_cpu_signal(mpproc->cpu_id, tpproc->cpu_id);	/* Kick the other processor */
-
-	return KERN_SUCCESS;							/* All is goodness and rainbows... */
-}
-
-
-/*
- *	Routine:	cpu_signal_handler
- *	Function:
- *	Here is where we implement the receiver of the signaling protocol.
- *	We wait for the signal status area to be passed to us. Then we snarf
- *	up the status, the sender, and the 3 potential parms. Next we release
- *	the lock and signal the other guy.
- */
-void 
-cpu_signal_handler(void)
-{
-	unsigned int holdStat, holdParm0, holdParm1, holdParm2;
-	unsigned int *parmAddr;
-	struct per_proc_info	*proc_info;
-	int cpu;
-	broadcastFunc xfunc;
-	cpu = cpu_number();								/* Get the CPU number */
-
-	proc_info = getPerProc();
-
-/*
- *	Since we've been signaled, wait about 31 ms for the signal lock to pass
- */
-	if(!hw_lock_mbits(&proc_info->MPsigpStat, (MPsigpMsgp | MPsigpAck), (MPsigpBusy | MPsigpPass),
-	  (MPsigpBusy | MPsigpPass | MPsigpAck), (gPEClockFrequencyInfo.timebase_frequency_hz >> 5))) {
-		panic("cpu_signal_handler: Lock pass timed out\n");
-	}
-	
-	holdStat = proc_info->MPsigpStat;				/* Snarf stat word */
-	holdParm0 = proc_info->MPsigpParm0;				/* Snarf parameter */
-	holdParm1 = proc_info->MPsigpParm1;				/* Snarf parameter */
-	holdParm2 = proc_info->MPsigpParm2;				/* Snarf parameter */
-	
-	__asm__ volatile("isync");						/* Make sure we don't unlock until memory is in */
-
-	proc_info->MPsigpStat = holdStat & ~(MPsigpMsgp | MPsigpAck | MPsigpFunc);	/* Release lock */
-
-	switch ((holdStat & MPsigpFunc) >> 8) {			/* Decode function code */
-
-		case MPsigpIdle:							/* Was function cancelled? */
-			return;									/* Yup... */
-			
-		case MPsigpSigp:							/* Signal Processor message? */
-			
-			switch (holdParm0) {					/* Decode SIGP message order */
-
-				case SIGPast:						/* Should we do an AST? */
-					proc_info->hwCtr.numSIGPast++;		/* Count this one */
-#if 0
-					kprintf("cpu_signal_handler: AST check on cpu %x\n", cpu_number());
-#endif
-					ast_check((processor_t)proc_info->processor);
-					return;							/* All done... */
-					
-				case SIGPcpureq:					/* CPU specific function? */
-				
-					proc_info->hwCtr.numSIGPcpureq++;	/* Count this one */
-					switch (holdParm1) {			/* Select specific function */
-					
-						case CPRQtimebase:
-
-							cpu_timebase_signal_handler(proc_info, (struct SIGtimebase *)holdParm2);
-							return;
-
-						case CPRQsegload:
-							return;
-						
- 						case CPRQchud:
- 							parmAddr = (unsigned int *)holdParm2;	/* Get the destination address */
- 							if(perfCpuSigHook) {
- 								struct savearea *ssp = current_thread()->machine.pcb;
- 								if(ssp) {
- 									(perfCpuSigHook)(parmAddr[1] /* request */, ssp, 0, 0);
- 								}
-   							}
- 							parmAddr[1] = 0;
- 							parmAddr[0] = 0;		/* Show we're done */
-  							return;
-						
-						case CPRQscom:
-							if(((scomcomm *)holdParm2)->scomfunc) {	/* Are we writing */
-								((scomcomm *)holdParm2)->scomstat = ml_scom_write(((scomcomm *)holdParm2)->scomreg, ((scomcomm *)holdParm2)->scomdata);	/* Write scom */
-							}
-							else {					/* No, reading... */
-								((scomcomm *)holdParm2)->scomstat = ml_scom_read(((scomcomm *)holdParm2)->scomreg, &((scomcomm *)holdParm2)->scomdata);	/* Read scom */
-							}
-							return;
-
-						case CPRQsps:
-							{
-							ml_set_processor_speed_slave(holdParm2);
-							return;
-						}
-						default:
-							panic("cpu_signal_handler: unknown CPU request - %08X\n", holdParm1);
-							return;
-					}
-					
-	
-				case SIGPdebug:						/* Enter the debugger? */		
-
-					proc_info->hwCtr.numSIGPdebug++;	/* Count this one */
-					proc_info->debugger_is_slave++;		/* Bump up the count to show we're here */
-					(void)hw_atomic_sub(&debugger_sync, 1);	/* Show we've received the 'rupt */
-					__asm__ volatile("tw 4,r3,r3");	/* Enter the debugger */
-					return;							/* All done now... */
-					
-				case SIGPwake:						/* Wake up CPU */
-					proc_info->hwCtr.numSIGPwake++;		/* Count this one */
-					return;							/* No need to do anything, the interrupt does it all... */
-					
-				case SIGPcall:						/* Call function on CPU */
-					proc_info->hwCtr.numSIGPcall++;	/* Count this one */
-					xfunc = (broadcastFunc)holdParm1;				/* Do this since I can't seem to figure C out */
-					xfunc(holdParm2);				/* Call the passed function */
-					return;							/* Done... */
-					
-				default:
-					panic("cpu_signal_handler: unknown SIGP message order - %08X\n", holdParm0);
-					return;
-			
-			}
-	
-		default:
-			panic("cpu_signal_handler: unknown SIGP function - %08X\n", (holdStat & MPsigpFunc) >> 8);
-			return;
-	
-	}
-	panic("cpu_signal_handler: we should never get here\n");
-}
-
-
-/*
- *	Routine:	cpu_sync_timebase
- *	Function:
- */
-void
-cpu_sync_timebase(
-	void)
-{
-	natural_t tbu, tbl;
-	boolean_t	intr;
-	struct SIGtimebase	syncClkSpot;
-
-	intr = ml_set_interrupts_enabled(FALSE);		/* No interruptions in here */
-
-	syncClkSpot.avail = FALSE;
-	syncClkSpot.ready = FALSE;
-	syncClkSpot.done = FALSE;
-
-	while (cpu_signal(master_cpu, SIGPcpureq, CPRQtimebase,
-							(unsigned int)&syncClkSpot) != KERN_SUCCESS)
-		continue;
-
-	while (syncClkSpot.avail == FALSE)
-		continue;
-
-	isync();
-
-	/*
-	 * We do the following to keep the compiler from generating extra stuff 
-	 * in tb set part
-	 */
-	tbu = syncClkSpot.abstime >> 32;
-	tbl = (uint32_t)syncClkSpot.abstime;
-
-	mttb(0);
-	mttbu(tbu);
-	mttb(tbl);
-
-	syncClkSpot.ready = TRUE;
-
-	while (syncClkSpot.done == FALSE)
-		continue;
-
-	etimer_resync_deadlines();									/* Start the timer */
-	(void)ml_set_interrupts_enabled(intr);
-}
-
-
-/*
- *	Routine:	cpu_timebase_signal_handler
- *	Function:
- */
-void
-cpu_timebase_signal_handler(
-	struct per_proc_info    *proc_info,
-	struct SIGtimebase		*timebaseAddr)
-{
-	unsigned int		tbu, tbu2, tbl;
-
-	if(proc_info->time_base_enable !=  (void(*)(cpu_id_t, boolean_t ))NULL)
-		proc_info->time_base_enable(proc_info->cpu_id, FALSE);
-
-	timebaseAddr->abstime = 0;	/* Touch to force into cache */
-	sync();
-							
-	do {
-		asm volatile("	mftbu %0" : "=r" (tbu));
-		asm volatile("	mftb %0" : "=r" (tbl));
-		asm volatile("	mftbu %0" : "=r" (tbu2));
-	} while (tbu != tbu2);
-							
-	timebaseAddr->abstime = ((uint64_t)tbu << 32) | tbl;
-	sync();					/* Force order */
-						
-	timebaseAddr->avail = TRUE;
-
-	while (timebaseAddr->ready == FALSE)
-		continue;
-
-	if(proc_info->time_base_enable !=  (void(*)(cpu_id_t, boolean_t ))NULL)
-		proc_info->time_base_enable(proc_info->cpu_id, TRUE);
-
-	timebaseAddr->done = TRUE;
-}
-
-
-/*
- *	Routine:	cpu_control
- *	Function:
- */
-kern_return_t
-cpu_control(
-	int			slot_num,
-	processor_info_t	info,
-	unsigned int    	count)
-{
-	struct per_proc_info	*proc_info;
-	cpu_type_t		tcpu_type;
-	cpu_subtype_t		tcpu_subtype;
-	processor_pm_regs_t	perf_regs;
-	processor_control_cmd_t	cmd;
-	boolean_t		oldlevel;
-#define MMCR0_SUPPORT_MASK	0xf83f1fff
-#define MMCR1_SUPPORT_MASK	0xffc00000
-#define MMCR2_SUPPORT_MASK	0x80000000
-
-	proc_info = PerProcTable[slot_num].ppe_vaddr;
-	tcpu_type = proc_info->cpu_type;
-	tcpu_subtype = proc_info->cpu_subtype;
-	cmd = (processor_control_cmd_t) info;
-
-	if (count < PROCESSOR_CONTROL_CMD_COUNT)
-	  return(KERN_FAILURE);
-
-	if ( tcpu_type != cmd->cmd_cpu_type ||
-	     tcpu_subtype != cmd->cmd_cpu_subtype)
-	  return(KERN_FAILURE);
-
-	if (perfmon_acquire_facility(current_task()) != KERN_SUCCESS) {
-		return(KERN_RESOURCE_SHORTAGE); /* cpu performance facility in use by another task */
-	}
-
-	switch (cmd->cmd_op)
-	  {
-	  case PROCESSOR_PM_CLR_PMC:       /* Clear Performance Monitor Counters */
-	    switch (tcpu_subtype)
-	      {
-	      case CPU_SUBTYPE_POWERPC_750:
-	      case CPU_SUBTYPE_POWERPC_7400:
-	      case CPU_SUBTYPE_POWERPC_7450:
-		{
-		  oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-		  mtpmc1(0x0);
-		  mtpmc2(0x0);
-		  mtpmc3(0x0);
-		  mtpmc4(0x0);
-		  ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		  return(KERN_SUCCESS);
-		}
-	      default:
-		return(KERN_FAILURE);
-	      } /* tcpu_subtype */
-	  case PROCESSOR_PM_SET_REGS:      /* Set Performance Monitor Registors */
-	    switch (tcpu_subtype)
-	      {
-	      case CPU_SUBTYPE_POWERPC_750:
-		if (count <  (PROCESSOR_CONTROL_CMD_COUNT +
-		       PROCESSOR_PM_REGS_COUNT_POWERPC_750))
-		  return(KERN_FAILURE);
-		else
-		  {
-		    perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs;
-		    oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-		    mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK);
-		    mtpmc1(PERFMON_PMC1(perf_regs));
-		    mtpmc2(PERFMON_PMC2(perf_regs));
-		    mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK);
-		    mtpmc3(PERFMON_PMC3(perf_regs));
-		    mtpmc4(PERFMON_PMC4(perf_regs));
-		    ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		    return(KERN_SUCCESS);
-		  }
-	      case CPU_SUBTYPE_POWERPC_7400:
-	      case CPU_SUBTYPE_POWERPC_7450:
-		if (count <  (PROCESSOR_CONTROL_CMD_COUNT +
-		       PROCESSOR_PM_REGS_COUNT_POWERPC_7400))
-		  return(KERN_FAILURE);
-		else
-		  {
-		    perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs;
-		    oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-		    mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK);
-		    mtpmc1(PERFMON_PMC1(perf_regs));
-		    mtpmc2(PERFMON_PMC2(perf_regs));
-		    mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK);
-		    mtpmc3(PERFMON_PMC3(perf_regs));
-		    mtpmc4(PERFMON_PMC4(perf_regs));
-		    mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK);
-		    ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		    return(KERN_SUCCESS);
-		  }
-	      default:
-		return(KERN_FAILURE);
-	      } /* switch tcpu_subtype */
-	  case PROCESSOR_PM_SET_MMCR:
-	    switch (tcpu_subtype)
-	      {
-	      case CPU_SUBTYPE_POWERPC_750:
-		if (count < (PROCESSOR_CONTROL_CMD_COUNT +
-		      PROCESSOR_PM_REGS_COUNT_POWERPC_750))
-		  return(KERN_FAILURE);
-		else
-		  {
-		    perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs;
-		    oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-		    mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK);
-		    mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK);
-		    ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		    return(KERN_SUCCESS);
-		  }
-	      case CPU_SUBTYPE_POWERPC_7400:
-	      case CPU_SUBTYPE_POWERPC_7450:
-		if (count < (PROCESSOR_CONTROL_CMD_COUNT +
-		      PROCESSOR_PM_REGS_COUNT_POWERPC_7400))
-		  return(KERN_FAILURE);
-		else
-		  {
-		    perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs;
-		    oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-		    mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK);
-		    mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK);
-		    mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK);
-		    ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		    return(KERN_SUCCESS);
-		  }
-	      default:
-		return(KERN_FAILURE);
-	      } /* tcpu_subtype */
-	  default:
-	    return(KERN_FAILURE);
-	  } /* switch cmd_op */
-}
-
-
-/*
- *	Routine:	cpu_info_count
- *	Function:
- */
-kern_return_t
-cpu_info_count(
-	processor_flavor_t	flavor,
-	unsigned int    	*count)
-{
-	cpu_subtype_t     tcpu_subtype;
-
-	/*
-	 * For now, we just assume that all CPUs are of the same type
-	 */
-	tcpu_subtype = PerProcTable[master_cpu].ppe_vaddr->cpu_subtype;
-	switch (flavor) {
-		case PROCESSOR_PM_REGS_INFO:
-			switch (tcpu_subtype) {
-				case CPU_SUBTYPE_POWERPC_750:
-		
-					*count = PROCESSOR_PM_REGS_COUNT_POWERPC_750;
-					return(KERN_SUCCESS);
-
-				case CPU_SUBTYPE_POWERPC_7400:
-				case CPU_SUBTYPE_POWERPC_7450:
-		
-					*count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400;
-					return(KERN_SUCCESS);
-
-				default:
-					*count = 0;
-					return(KERN_INVALID_ARGUMENT);
-			} /* switch tcpu_subtype */
-
-		case PROCESSOR_TEMPERATURE:
-			*count = PROCESSOR_TEMPERATURE_COUNT;
-			return (KERN_SUCCESS);
-
-		default:
-			*count = 0;
-			return(KERN_INVALID_ARGUMENT);
-			
-	}
-}
-
-
-/*
- *	Routine:	cpu_info
- *	Function:
- */
-kern_return_t
-cpu_info(
-	processor_flavor_t	flavor,
-	int			slot_num,
-	processor_info_t	info,
-	unsigned int    	*count)
-{
-	cpu_subtype_t     tcpu_subtype;
-	processor_pm_regs_t  perf_regs;
-	boolean_t oldlevel;
-
-	tcpu_subtype = PerProcTable[slot_num].ppe_vaddr->cpu_subtype;
-
-	switch (flavor) {
-		case PROCESSOR_PM_REGS_INFO:
-
-			perf_regs = (processor_pm_regs_t) info;
-
-			switch (tcpu_subtype) {
-				case CPU_SUBTYPE_POWERPC_750:
-
-					if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_750)
-					  return(KERN_FAILURE);
-				  
-					oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-					PERFMON_MMCR0(perf_regs) = mfmmcr0();
-					PERFMON_PMC1(perf_regs)  = mfpmc1();
-					PERFMON_PMC2(perf_regs)  = mfpmc2();
-					PERFMON_MMCR1(perf_regs) = mfmmcr1();
-					PERFMON_PMC3(perf_regs)  = mfpmc3();
-					PERFMON_PMC4(perf_regs)  = mfpmc4();
-					ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		
-					*count = PROCESSOR_PM_REGS_COUNT_POWERPC_750;
-					return(KERN_SUCCESS);
-
-				case CPU_SUBTYPE_POWERPC_7400:
-				case CPU_SUBTYPE_POWERPC_7450:
-
-					if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_7400)
-					  return(KERN_FAILURE);
-				  
-					oldlevel = ml_set_interrupts_enabled(FALSE);    /* disable interrupts */
-					PERFMON_MMCR0(perf_regs) = mfmmcr0();
-					PERFMON_PMC1(perf_regs)  = mfpmc1();
-					PERFMON_PMC2(perf_regs)  = mfpmc2();
-					PERFMON_MMCR1(perf_regs) = mfmmcr1();
-					PERFMON_PMC3(perf_regs)  = mfpmc3();
-					PERFMON_PMC4(perf_regs)  = mfpmc4();
-					PERFMON_MMCR2(perf_regs) = mfmmcr2();
-					ml_set_interrupts_enabled(oldlevel);     /* enable interrupts */
-		
-					*count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400;
-					return(KERN_SUCCESS);
-
-				default:
-					return(KERN_FAILURE);
-			} /* switch tcpu_subtype */
-
-		case PROCESSOR_TEMPERATURE:					/* Get the temperature of a processor */
-
-			*info = -1;								/* Get the temperature */
-			return(KERN_FAILURE);
-
-		default:
-			return(KERN_INVALID_ARGUMENT);
-			
-	} /* flavor */
-}
-
-
-/*
- *	Routine:	cpu_to_processor
- *	Function:
- */
-processor_t
-cpu_to_processor(
-	int			cpu)
-{
-	return ((processor_t)PerProcTable[cpu].ppe_vaddr->processor);
-}
-
-
-/*
- *	Routine:	slot_type
- *	Function:
- */
-cpu_type_t
-slot_type(
-	int		slot_num)
-{
-	return (PerProcTable[slot_num].ppe_vaddr->cpu_type);
-}
-
-
-/*
- *	Routine:	slot_subtype
- *	Function:
- */
-cpu_subtype_t
-slot_subtype(
-	int		slot_num)
-{
-	return (PerProcTable[slot_num].ppe_vaddr->cpu_subtype);
-}
-
-
-/*
- *	Routine:	slot_threadtype
- *	Function:
- */
-cpu_threadtype_t
-slot_threadtype(
-	int		slot_num)
-{
-	return (PerProcTable[slot_num].ppe_vaddr->cpu_threadtype);
-}
-
-
-/*
- *	Routine:	cpu_type
- *	Function:
- */
-cpu_type_t
-cpu_type(void)
-{
-	return (getPerProc()->cpu_type);
-}
-
-
-/*
- *	Routine:	cpu_subtype
- *	Function:
- */
-cpu_subtype_t
-cpu_subtype(void)
-{
-	return (getPerProc()->cpu_subtype);
-}
-
-
-/*
- *	Routine:	cpu_threadtype
- *	Function:
- */
-cpu_threadtype_t
-cpu_threadtype(void)
-{
-	return (getPerProc()->cpu_threadtype);
-}
-
-/*
- *	Call a function on all running processors
- *
- *	Note that the synch paramter is used to wait until all functions are complete.
- *	It is not passed to the other processor and must be known by the called function.
- *	The called function must do a thread_wakeup on the synch if it decrements the
- *	synch count to 0.
- *
- *	We start by initializing the synchronizer to the number of possible cpus.
- *	The we signal each popssible processor.
- *	If the signal fails, we count it.  We also skip our own.
- *	When we are finished signaling, we adjust the syncronizer count down buy the number of failed signals.
- *	Because the signaled processors are also decrementing the synchronizer count, the adjustment may result in a 0
- *	If this happens, all other processors are finished with the function.
- *	If so, we clear the wait and continue
- *	Otherwise, we block waiting for the other processor(s) to finish.
- *
- *	Meanwhile, the other processors are decrementing the synchronizer when they are done
- *	If it goes to zero, thread_wakeup is called to run the broadcaster
- *
- *	Note that because we account for the broadcaster in the synchronization count, we will not get any
- *	premature wakeup calls.
- *
- *	Also note that when we do the adjustment of the synchronization count, it the result is 0, it means that
- *	all of the other processors are finished.  Otherwise, we know that there is at least one more. 
- *	When that thread decrements the synchronizer to zero, it will do a thread_wake.
- *	
- */
-
-int32_t
-cpu_broadcast(uint32_t *synch, broadcastFunc func, uint32_t parm)
-{
-	int failsig;
-	unsigned int cpu, ocpu;
-	
-	cpu = cpu_number();						/* Who are we? */
-	failsig = 0;							/* Clear called processor count */
-	
-	if(real_ncpus > 1) {						/* Are we just a uni? */
-		
-		*synch = real_ncpus;					/* Set how many we are going to try */
-		assert_wait((event_t)synch, THREAD_UNINT);		/* If more than one processor, we may have to wait */
-		
-		for(ocpu = 0; ocpu < real_ncpus; ocpu++) {		/* Tell everyone to call */
-			
-			if(ocpu == cpu)	continue;			/* If we talk to ourselves, people will wonder... */
-			
-			if(KERN_SUCCESS != cpu_signal(ocpu, SIGPcall, (uint32_t)func, parm)) {	/* Call the function on the other processor */
-				failsig++;				/* Count failed signals */
-			}
-		}
-		
-		if (hw_atomic_sub(synch, failsig + 1) == 0)
-			clear_wait(current_thread(), THREAD_AWAKENED);	/* Clear wait if we never signalled or all of the others finished */
-		else
-			thread_block(THREAD_CONTINUE_NULL);		/* Wait for everyone to get into step... */
-	}
-	
-	return (real_ncpus - failsig - 1);				/* Return the number of guys actually signalled... */
-}
diff --git a/osfmk/ppc/cpu_capabilities.h b/osfmk/ppc/cpu_capabilities.h
deleted file mode 100644
index 268666cd2..000000000
--- a/osfmk/ppc/cpu_capabilities.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifdef	PRIVATE
-
-#ifndef _PPC_CPU_CAPABILITIES_H
-#define _PPC_CPU_CAPABILITIES_H
-
-/* _cpu_capabilities
- *
- * This is the authoritative way to determine from user mode what
- * implementation-specific processor features are available.
- * This API is only supported for Apple internal use.
- */
-
-#ifndef	__ASSEMBLER__
- 
-extern int _cpu_capabilities;
- 
-#endif /* __ASSEMBLER__ */
-
-/* Bit definitions for _cpu_capabilities: */
-
-#define	kHasAltivec				0x00000001
-#define	k64Bit					0x00000002	// 64-bit GPRs
-#define	kCache32				0x00000004	// cache line size is 32 bytes
-#define	kCache64				0x00000008
-#define	kCache128				0x00000010
-#define	kDcbaRecommended		0x00000020	// PPC: dcba is available and recommended
-#define	kDcbaAvailable			0x00000040	// PPC: dcba is available (but may or may not be recommended)
-#define	kDataStreamsRecommended	0x00000080	// PPC: dst, dstt, dstst, dss, and dssall instructions available and recommended
-#define	kDataStreamsAvailable	0x00000100	// PPC: dst, dstt, dstst, dss, and dssall instructions available (may or may not be rec'd)
-#define	kDcbtStreamsRecommended	0x00000200	// PPC: enhanced dcbt instruction available and recommended
-#define	kDcbtStreamsAvailable	0x00000400	// PPC: enhanced dcbt instruction available (but may or may not be recommended)
-#define	kFastThreadLocalStorage	0x00000800	// TLS ptr is kept in a user-mode-readable register
-
-#define	kUP						0x00008000	// set if (kNumCPUs == 1)
-#define	kNumCPUs				0x00FF0000	// number of CPUs (see _NumCPUs() below)
-
-#define	kNumCPUsShift			16			// see _NumCPUs() below
-
-#define	kHasGraphicsOps			0x08000000	// PPC: has fres, frsqrte, and fsel instructions
-#define	kHasStfiwx				0x10000000	// PPC: has stfiwx instruction
-#define	kHasFsqrt				0x20000000	// PPC: has fsqrt and fsqrts instructions
-
-#ifndef	__ASSEMBLER__
- 
-static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) >> kNumCPUsShift; }
-
-#endif /* __ASSEMBLER__ */
-
-
-/*
- * The shared kernel/user "comm page(s)":
- *
- * The last eight pages of every address space are reserved for the kernel/user
- * "comm area".  Because they can be addressed via a sign-extended 16-bit field,
- * it is particularly efficient to access code or data in the comm area with
- * absolute branches (ba, bla, bca) or absolute load/stores ("lwz r0,-4096(0)").
- * Because the comm area can be reached from anywhere, dyld is not needed.
- * Although eight pages are reserved, presently only two are populated and mapped.
- *
- * Routines on the comm page(s) can be thought of as the firmware for extended processor
- * instructions, whose opcodes are special forms of "bla".  Ie, they are cpu
- * capabilities.  During system initialization, the kernel populates the comm page with
- * code customized for the particular processor and platform.
- *
- * Because Mach VM cannot map the last page of an address space, the max length of
- * the comm area is seven pages.
- */
- 
-#define _COMM_PAGE_BASE_ADDRESS			(-8*4096)						// start at page -8, ie 0xFFFF8000
-#define	_COMM_PAGE_AREA_LENGTH			( 7*4096)						// reserved length of entire comm area
-#define	_COMM_PAGE_AREA_USED			( 2*4096)						// we use two pages so far
-
-/* The following set of definitions are used in the kernel, which needs to distinguish between
- * the 32 and 64-bit commpage addresses and lengths.  On PPC they are the same, but on Imtel
- * they are not.
- */
-#define _COMM_PAGE32_BASE_ADDRESS		( _COMM_PAGE_BASE_ADDRESS )
-#define _COMM_PAGE64_BASE_ADDRESS		( _COMM_PAGE_BASE_ADDRESS )
-#define	_COMM_PAGE32_AREA_LENGTH		( _COMM_PAGE_AREA_LENGTH )
-#define	_COMM_PAGE64_AREA_LENGTH		( _COMM_PAGE_AREA_LENGTH )
-#define	_COMM_PAGE32_AREA_USED			( _COMM_PAGE_AREA_USED )
-#define	_COMM_PAGE64_AREA_USED			( _COMM_PAGE_AREA_USED )
-
-/* The Objective-C runtime fixed address page to optimize message dispatch */
-#define _OBJC_PAGE_BASE_ADDRESS			(-20*4096)						// start at page -20, ie 0xFFFEC000
- 
-/*
- * Objective-C needs an "absolute" area all the way up to the top of the
- * address space.
- * For a ppc32 task, that area gets allocated at runtime from user space.
- * For a ppc64 task, that area is not within the user-accessible address range,
- * so we pre-allocate it at exec time (see vm_map_exec()) along with the
- * comm page.
- * 
- * NOTE: that means we can't "nest" the 64-bit comm page...
- */
-#define _COMM_PAGE32_OBJC_SIZE	0ULL
-#define _COMM_PAGE32_OBJC_BASE	0ULL
-#if 0
-#define _COMM_PAGE64_OBJC_SIZE	(4 * 4096)
-#define _COMM_PAGE64_OBJC_BASE	(_OBJC_PAGE_BASE_ADDRESS)
-#else
-/*
- * PPC51: ppc64 is limited to 51-bit addresses.
- * PPC64 has a 51-bit address space limit, so we can't just go and
- * map the Obj-C area up there.  We would have to create a nested pmap
- * and make a special mapping that redirects the large virtual addresses to
- * that other address space with lower addresses that fit within the 51-bit
- * limit.
- * VM would then have to handle this redirection when we fault one
- * of these pages in but it doesn't do that at this point, so no
- * Obj-C area for ppc64 for now :-(
- */
-#define _COMM_PAGE64_OBJC_SIZE	0ULL
-#define _COMM_PAGE64_OBJC_BASE	0ULL
-#endif
-
-/* data in the comm page */
- 
-#define _COMM_PAGE_SIGNATURE			(_COMM_PAGE_BASE_ADDRESS+0x000)	// first few bytes are a signature
-#define _COMM_PAGE_VERSION				(_COMM_PAGE_BASE_ADDRESS+0x01E)	// 16-bit version#
-#define	_COMM_PAGE_THIS_VERSION			2								// this is version 2 of the commarea format
-  
-#define _COMM_PAGE_CPU_CAPABILITIES		(_COMM_PAGE_BASE_ADDRESS+0x020)	// mirror of extern int _cpu_capabilities
-#define _COMM_PAGE_NCPUS				(_COMM_PAGE_BASE_ADDRESS+0x021)	// number of configured CPUs
-#define _COMM_PAGE_ALTIVEC				(_COMM_PAGE_BASE_ADDRESS+0x024)	// nonzero if Altivec available
-#define _COMM_PAGE_64_BIT				(_COMM_PAGE_BASE_ADDRESS+0x025)	// nonzero if 64-bit processor
-#define _COMM_PAGE_CACHE_LINESIZE		(_COMM_PAGE_BASE_ADDRESS+0x026)	// cache line size (16-bit field)
- 
-#define _COMM_PAGE_UNUSED1				(_COMM_PAGE_BASE_ADDRESS+0x028)	// 24 unused bytes
- 
-#define _COMM_PAGE_2_TO_52				(_COMM_PAGE_BASE_ADDRESS+0x040)	// double float constant 2**52
-#define _COMM_PAGE_10_TO_6				(_COMM_PAGE_BASE_ADDRESS+0x048)	// double float constant 10**6
-#define _COMM_PAGE_MAGIC_FE             (_COMM_PAGE_BASE_ADDRESS+0x050) // magic constant 0xFEFEFEFEFEFEFEFF (to find 0s)
-#define _COMM_PAGE_MAGIC_80             (_COMM_PAGE_BASE_ADDRESS+0x058) // magic constant 0x8080808080808080 (to find 0s)
- 
-#define _COMM_PAGE_TIMEBASE				(_COMM_PAGE_BASE_ADDRESS+0x060)	// used by gettimeofday()
-#define _COMM_PAGE_TIMESTAMP			(_COMM_PAGE_BASE_ADDRESS+0x068)	// used by gettimeofday()
-#define _COMM_PAGE_SEC_PER_TICK			(_COMM_PAGE_BASE_ADDRESS+0x070)	// used by gettimeofday()
- 
- /* jump table (bla to this address, which may be a branch to the actual code somewhere else) */
- /* When new jump table entries are added, corresponding symbols should be added below         */
- 
-#define _COMM_PAGE_COMPARE_AND_SWAP32   (_COMM_PAGE_BASE_ADDRESS+0x080) // compare-and-swap word, no barrier
-#define _COMM_PAGE_COMPARE_AND_SWAP64   (_COMM_PAGE_BASE_ADDRESS+0x0c0) // compare-and-swap doubleword, no barrier
-#define _COMM_PAGE_ENQUEUE				(_COMM_PAGE_BASE_ADDRESS+0x100) // enqueue
-#define _COMM_PAGE_DEQUEUE				(_COMM_PAGE_BASE_ADDRESS+0x140) // dequeue
-#define _COMM_PAGE_MEMORY_BARRIER		(_COMM_PAGE_BASE_ADDRESS+0x180) // memory barrier
-#define _COMM_PAGE_ATOMIC_ADD32			(_COMM_PAGE_BASE_ADDRESS+0x1a0) // add atomic word
-#define _COMM_PAGE_ATOMIC_ADD64			(_COMM_PAGE_BASE_ADDRESS+0x1c0) // add atomic doubleword
-
-#define _COMM_PAGE_UNUSED3				(_COMM_PAGE_BASE_ADDRESS+0x1e0) // 32 unused bytes
-
-#define _COMM_PAGE_ABSOLUTE_TIME		(_COMM_PAGE_BASE_ADDRESS+0x200)	// mach_absolute_time()
-#define _COMM_PAGE_SPINLOCK_TRY			(_COMM_PAGE_BASE_ADDRESS+0x220)	// spinlock_try()
-#define _COMM_PAGE_SPINLOCK_LOCK		(_COMM_PAGE_BASE_ADDRESS+0x260)	// spinlock_lock()
-#define _COMM_PAGE_SPINLOCK_UNLOCK		(_COMM_PAGE_BASE_ADDRESS+0x2a0)	// spinlock_unlock()
-#define _COMM_PAGE_PTHREAD_GETSPECIFIC	(_COMM_PAGE_BASE_ADDRESS+0x2c0)	// pthread_getspecific()
-#define _COMM_PAGE_GETTIMEOFDAY			(_COMM_PAGE_BASE_ADDRESS+0x2e0)	// used by gettimeofday()
-#define _COMM_PAGE_FLUSH_DCACHE			(_COMM_PAGE_BASE_ADDRESS+0x4e0)	// sys_dcache_flush()
-#define _COMM_PAGE_FLUSH_ICACHE			(_COMM_PAGE_BASE_ADDRESS+0x520)	// sys_icache_invalidate()
-#define _COMM_PAGE_PTHREAD_SELF			(_COMM_PAGE_BASE_ADDRESS+0x580)	// pthread_self()
-
-#define	_COMM_PAGE_UNUSED4				(_COMM_PAGE_BASE_ADDRESS+0x5a0)	// 32 unused bytes
-
-#define	_COMM_PAGE_RELINQUISH			(_COMM_PAGE_BASE_ADDRESS+0x5c0)	// used by spinlocks
- 
-#define _COMM_PAGE_UNUSED5				(_COMM_PAGE_BASE_ADDRESS+0x5e0)	// 32 unused bytes
- 
-#define _COMM_PAGE_BZERO				(_COMM_PAGE_BASE_ADDRESS+0x600)	// bzero()
-#define _COMM_PAGE_BCOPY				(_COMM_PAGE_BASE_ADDRESS+0x780)	// bcopy()
-#define	_COMM_PAGE_MEMCPY				(_COMM_PAGE_BASE_ADDRESS+0x7a0)	// memcpy()
-#define	_COMM_PAGE_MEMMOVE				(_COMM_PAGE_BASE_ADDRESS+0x7a0)	// memmove()
-
-#define _COMM_PAGE_COMPARE_AND_SWAP32B  (_COMM_PAGE_BASE_ADDRESS+0xf80)	// compare-and-swap word w barrier
-#define _COMM_PAGE_COMPARE_AND_SWAP64B  (_COMM_PAGE_BASE_ADDRESS+0xfc0)	// compare-and-swap doubleword w barrier
-
-#define	_COMM_PAGE_MEMSET_PATTERN       (_COMM_PAGE_BASE_ADDRESS+0x1000)// used by nonzero memset()
-#define	_COMM_PAGE_BIGCOPY				(_COMM_PAGE_BASE_ADDRESS+0x1140)// very-long-operand copies
-
-#define _COMM_PAGE_END					(_COMM_PAGE_BASE_ADDRESS+0x1700)// end of commpage area
-
-#ifdef __ASSEMBLER__
-#ifdef __COMM_PAGE_SYMBOLS
-
-#define CREATE_COMM_PAGE_SYMBOL(symbol_name, symbol_address)		\
-				.org	(symbol_address - _COMM_PAGE_BASE_ADDRESS) @\
-symbol_name: nop
-
-	.text		// Required to make a well behaved symbol file
-
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32, _COMM_PAGE_COMPARE_AND_SWAP32)
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64, _COMM_PAGE_COMPARE_AND_SWAP64)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_enqueue, _COMM_PAGE_ENQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_dequeue, _COMM_PAGE_DEQUEUE)
-	CREATE_COMM_PAGE_SYMBOL(___memory_barrier, _COMM_PAGE_MEMORY_BARRIER)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_add32, _COMM_PAGE_ATOMIC_ADD32)
-	CREATE_COMM_PAGE_SYMBOL(___atomic_add64, _COMM_PAGE_ATOMIC_ADD64)
-	CREATE_COMM_PAGE_SYMBOL(___mach_absolute_time, _COMM_PAGE_ABSOLUTE_TIME)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock_try, _COMM_PAGE_SPINLOCK_TRY)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock, _COMM_PAGE_SPINLOCK_LOCK)
-	CREATE_COMM_PAGE_SYMBOL(___spin_unlock, _COMM_PAGE_SPINLOCK_UNLOCK)
-	CREATE_COMM_PAGE_SYMBOL(___pthread_getspecific, _COMM_PAGE_PTHREAD_GETSPECIFIC)
-	CREATE_COMM_PAGE_SYMBOL(___gettimeofday, _COMM_PAGE_GETTIMEOFDAY)
-	CREATE_COMM_PAGE_SYMBOL(___sys_dcache_flush, _COMM_PAGE_FLUSH_DCACHE)
-	CREATE_COMM_PAGE_SYMBOL(___sys_icache_invalidate, _COMM_PAGE_FLUSH_ICACHE)
-	CREATE_COMM_PAGE_SYMBOL(___pthread_self, _COMM_PAGE_PTHREAD_SELF)
-	CREATE_COMM_PAGE_SYMBOL(___spin_lock_relinquish, _COMM_PAGE_RELINQUISH)
-	CREATE_COMM_PAGE_SYMBOL(___bzero, _COMM_PAGE_BZERO)
-	CREATE_COMM_PAGE_SYMBOL(___bcopy, _COMM_PAGE_BCOPY)
-	CREATE_COMM_PAGE_SYMBOL(___memcpy, _COMM_PAGE_MEMCPY)
-//	CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE)
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32b, _COMM_PAGE_COMPARE_AND_SWAP32B)
-	CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64b, _COMM_PAGE_COMPARE_AND_SWAP64B)
-    CREATE_COMM_PAGE_SYMBOL(___memset_pattern, _COMM_PAGE_MEMSET_PATTERN)
-	CREATE_COMM_PAGE_SYMBOL(___bigcopy, _COMM_PAGE_BIGCOPY)
-	
-	CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END)
-
-	.data		// Required to make a well behaved symbol file
-	.long	0	// Required to make a well behaved symbol file
-
-#endif /* __COMM_PAGE_SYMBOLS */
-#endif /* __ASSEMBLER__ */
-
-#endif /* _PPC_CPU_CAPABILITIES_H */
-#endif /* PRIVATE */
diff --git a/osfmk/ppc/cpu_data.h b/osfmk/ppc/cpu_data.h
deleted file mode 100644
index 3a5fa9190..000000000
--- a/osfmk/ppc/cpu_data.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#ifndef	PPC_CPU_DATA
-#define PPC_CPU_DATA
-
-#ifdef	MACH_KERNEL_PRIVATE
-
-#include <mach/mach_types.h>
-#include <machine/thread.h>
-
-extern thread_t current_thread(void);
-extern __inline__ thread_t current_thread(void) 
-{
-	thread_t	result;
-
-	__asm__ volatile("mfsprg %0,1" : "=r" (result));
-
-	return (result);
-}
-
-#define	getPerProc()		current_thread()->machine.PerProc
-
-extern int 					get_preemption_level(void);
-extern void 					_enable_preemption_no_check(void);
-
-#define enable_preemption_no_check()		_enable_preemption_no_check()
-#define mp_disable_preemption()			_disable_preemption()
-#define mp_enable_preemption()			_enable_preemption()
-#define mp_enable_preemption_no_check()		_enable_preemption_no_check()
-
-#endif	/* MACH_KERNEL_PRIVATE */
-
-#endif	/* PPC_CPU_DATA */
diff --git a/osfmk/ppc/cpu_internal.h b/osfmk/ppc/cpu_internal.h
deleted file mode 100644
index 0c876a1e9..000000000
--- a/osfmk/ppc/cpu_internal.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-#ifndef	_PPC_CPU_INTERNAL_H_
-#define	_PPC_CPU_INTERNAL_H_
-
-#include <mach/kern_return.h>
-#include <ppc/exception.h>
-
-extern void						_start_cpu(
-										void);
-
-extern void						cpu_bootstrap(
-										void);
-
-extern void						cpu_init(
-										void);
-
-extern kern_return_t			cpu_signal(
-										int				target,
-										int				signal,
-										unsigned int	p1,
-										unsigned int	p2);
-
-#define SIGPast			0		/* Requests an ast on target processor */
-#define SIGPcpureq		1		/* Requests CPU specific function */
-#define SIGPdebug		2		/* Requests a debugger entry */
-#define SIGPwake		3		/* Wake up a sleeping processor */
-#define SIGPcall		4		/* Call a function on a processor */
-
-#define CPRQtimebase	1		/* Get timebase of processor */
-#define CPRQsegload		2		/* Segment registers reload */
-#define CPRQscom		3		/* SCOM */
-#define CPRQchud		4		/* CHUD perfmon */
-#define CPRQsps			5		/* Set Processor Speed */
-
-
-extern struct per_proc_info *	cpu_per_proc_alloc(
-										void);
-
-extern void						cpu_per_proc_free(
-										struct per_proc_info *per_proc);
-
-extern void *					console_per_proc_alloc(
-										boolean_t boot_processor);
-
-extern void 					console_per_proc_free(
-										void *per_proc_cbfr);
-
-extern void * 					chudxnu_per_proc_alloc(
-										boolean_t boot_processor);
-
-extern void						chudxnu_per_proc_free(
-										void *per_proc_chud);
-
-extern kern_return_t			cpu_per_proc_register(
-										struct per_proc_info	*proc_info);
-
-extern	unsigned int			real_ncpus;
-extern	unsigned int			max_ncpus;
-
-#endif	/* _PPC_CPU_INTERNAL_H_ */
diff --git a/osfmk/ppc/cswtch.s b/osfmk/ppc/cswtch.s
deleted file mode 100644
index 54a17af4e..000000000
--- a/osfmk/ppc/cswtch.s
+++ /dev/null
@@ -1,2486 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-#include <debug.h>
-#include <mach/ppc/vm_param.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-
-#define FPVECDBG 0
-
-	.text
-	
-/*
- * void     machine_load_context(thread_t        thread)
- *
- * Load the context for the first thread to run on a
- * cpu, and go.
- */
-
-			.align	5
-			.globl	EXT(machine_load_context)
-
-LEXT(machine_load_context)
-			mfsprg	r6,1							; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-			lwz		r0,PP_INTSTACK_TOP_SS(r6)
-			stw		r0,PP_ISTACKPTR(r6)
-			mr		r9,r3							/* Set up the current thread */
-			mtsprg	1,r9
-			li		r0,0							/* Clear a register */
-			lwz		r3,ACT_MACT_PCB(r9)				/* Get the savearea used */
-			mfmsr	r5								/* Since we are passing control, get our MSR values */
-			lwz		r11,SAVprev+4(r3)				/* Get the previous savearea */
-			lwz		r1,saver1+4(r3)					/* Load new stack pointer */
-			lwz		r10,ACT_MACT_SPF(r9)			/* Get the special flags */
-			stw		r0,saver3+4(r3)					/* Make sure we pass in a 0 for the continuation */
-			stw		r0,FM_BACKPTR(r1)				/* zero backptr */
-			stw		r5,savesrr1+4(r3)				/* Pass our MSR to the new guy */
-			stw		r11,ACT_MACT_PCB(r9)			/* Unstack our savearea */
-			oris	r10,r10,hi16(OnProc)			/* Set OnProc bit */
-			stw		r0,ACT_PREEMPT_CNT(r9)			/* Enable preemption */
-			stw		r10,ACT_MACT_SPF(r9)			/* Update the special flags */
-			stw		r10,spcFlags(r6)				/*  Set per_proc copy of the special flags */
-			b		EXT(exception_exit)				/* Go for it */
-	
-/* thread_t Switch_context(thread_t	old,
- * 				      	 void		(*cont)(void),
- *				         thread_t	new)
- *
- * Switch from one thread to another. If a continuation is supplied, then
- * we do not need to save callee save registers.
- *
- */
-
-/* void Call_continuation( void (*continuation)(void),  void *param, wait_result_t wresult, vm_offset_t stack_ptr)
- */
-
-			.align	5
-			.globl	EXT(Call_continuation)
-
-LEXT(Call_continuation)
-			mtlr	r3								/* continuation */
-			mr		r3,r4							/* parameter */
-			mr		r4,r5							/* wait result */
-			mr		r1,r6							/* Load new stack pointer */
-			blrl									/* Jump to the continuation */
-			mfsprg	r3,1
-			b		EXT(thread_terminate)
-
-/*
- * Get the old kernel stack, and store into the thread structure.
- * See if a continuation is supplied, and skip state save if so.
- *
- * Note that interrupts must be disabled before we get here (i.e., splsched)
- */
-
-/* 			
- *			Switch_context(old, continuation, new)
- *
- * 			Context switches are double jumps.  We pass the following to the
- *			context switch firmware call:
- *
- *			R3  = switchee's savearea, virtual if continuation, low order physical for full switch
- *			R4  = old thread
- *			R5  = new SRR0
- *			R6  = new SRR1
- *			R7  = high order physical address of savearea for full switch
- *
- *			savesrr0 is set to go to switch_in
- *			savesrr1 is set to uninterruptible with translation on
- */
-
-
-			.align	5
-			.globl	EXT(Switch_context)
-
-LEXT(Switch_context)
-
-			lwz		r12,ACT_PER_PROC(r3)			; Get the per_proc block
-#if DEBUG
-			lwz		r0,PP_ISTACKPTR(r12)			; (DEBUG/TRACE) make sure we are not
-			mr.		r0,r0							; (DEBUG/TRACE) on the interrupt
-			bne++	notonintstack					; (DEBUG/TRACE) stack
-			BREAKPOINT_TRAP
-notonintstack:
-#endif	
-			lwz		r8,ACT_MACT_PCB(r5)				; Get the PCB for the new guy
-			lwz		r9,umwSpace(r5)					; Get user memory window address space
-			cmpwi	cr1,r4,0						; Remeber if there is a continuation - used waaaay down below 
-            lwz     r0,CTHREAD_SELF+0(r5)           ; Pick up the user assist "word" (actually a double)
-			lwz		r7,CTHREAD_SELF+4(r5)           ; both halves
-			lwz		r11,ACT_MACT_BTE(r5)			; Get BlueBox Task Environment
-			lwz		r6,umwRelo(r5)					; Get user memory window relocation top
-			stw		r12,ACT_PER_PROC(r5)			; Set per_proc in new activation
-			mtsprg	1,r5
-			lwz		r2,umwRelo+4(r5)				; Get user memory window relocation bottom
-			
-            stw     r0,UAW+0(r12)                   ; Save the assist word for the "ultra fast path"
-			stw		r7,UAW+4(r12)
-
-			lwz		r7,ACT_MACT_SPF(r5)				; Get the special flags
-			
-			sth		r9,ppUMWmp+mpSpace(r12)			; Save the space
-			stw		r6,ppUMWmp+mpNestReloc(r12)		; Save top part of physical address
-			stw		r2,ppUMWmp+mpNestReloc+4(r12)	; Save bottom part of physical address
-			stw		r11,ppbbTaskEnv(r12)			; Save the bb task env
-			lwz		r2,traceMask(0)					; Get the enabled traces
-			stw		r7,spcFlags(r12)				; Set per_proc copy of the special flags
-			lis		r0,hi16(CutTrace)				; Trace FW call
-			mr.		r2,r2							; Any tracing going on?
-			lwz		r11,SAVprev+4(r8)				; Get the previous of the switchee savearea
-			ori		r0,r0,lo16(CutTrace)			; Trace FW call
-			beq++	cswNoTrc						; No trace today, dude...
-
-			li		r2,0x4400						; Trace ID
-			mr		r6,r11							; Trace prev savearea
-			sc										; Cut trace entry of context switch
-			
-cswNoTrc:	lwz		r2,curctx(r5)					; Grab our current context pointer
-			lwz		r10,FPUowner(r12)				; Grab the owner of the FPU			
-			lwz		r9,VMXowner(r12)				; Grab the owner of the vector
-			mfmsr	r6								; Get the MSR because the switched to thread should inherit it 
-			stw		r11,ACT_MACT_PCB(r5)			; Dequeue the savearea we are switching to
-			li		r0,1							; Get set to hold off quickfret
-
-			rlwinm	r6,r6,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Turn off the FP
-			cmplw	r10,r2							; Do we have the live float context?
-			lwz		r10,FPUlevel(r2)				; Get the live level
-			mr		r4,r3							; Save our old thread to pass back 
-			cmplw	cr5,r9,r2						; Do we have the live vector context?		
-			rlwinm	r6,r6,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Turn off the vector
-			stw		r0,holdQFret(r12)				; Make sure we hold off releasing quickfret
-			bne++	cswnofloat						; Float is not ours...
-			
-			cmplw	r10,r11							; Is the level the same?
-			lhz		r0,PP_CPU_NUMBER(r12)			; Get our CPU number
-			lwz		r5,FPUcpu(r2)					; Get the owning cpu
-			bne++	cswnofloat						; Level not the same, this is not live...
-			
-			cmplw	r5,r0							; Still owned by this cpu?
-			lwz		r10,FPUsave(r2)					; Get the pointer to next saved context
-			bne++	cswnofloat						; CPU claimed by someone else...
-			
-			mr.		r10,r10							; Is there a savearea here?
-			ori		r6,r6,lo16(MASK(MSR_FP))		; Enable floating point
-			
-			beq--	cswnofloat						; No savearea to check...
-			
-			lwz		r3,SAVlevel(r10)				; Get the level
-			lwz		r5,SAVprev+4(r10)				; Get the previous of this savearea
-			cmplw	r3,r11							; Is it for the current level?
-			
-			bne++	cswnofloat						; Nope...
-			
-			stw		r5,FPUsave(r2)					; Pop off this savearea
-
-			rlwinm	r3,r10,0,0,19					; Move back to start of page
-
-			lwz		r5,quickfret(r12)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r12)				; Get the first in quickfret list (bottom)					
-			lwz		r7,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r5,SAVprev(r10)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r10)				; Link the old in (bottom)					
-			xor		r3,r10,r3						; Convert to physical
-			stw		r7,quickfret(r12)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r12)				; Set the first in quickfret list (bottom)					
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			mr		r7,r2							; (TEST/DEBUG)
-			li		r2,0x4401						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-			lhz		r0,PP_CPU_NUMBER(r12)			; (TEST/DEBUG)
-			mr		r2,r7							; (TEST/DEBUG) 
-#endif	
-
-cswnofloat:	bne++	cr5,cswnovect					; Vector is not ours...
-
-			lwz		r10,VMXlevel(r2)				; Get the live level
-			
-			cmplw	r10,r11							; Is the level the same?
-			lhz		r0,PP_CPU_NUMBER(r12)			; Get our CPU number
-			lwz		r5,VMXcpu(r2)					; Get the owning cpu
-			bne++	cswnovect						; Level not the same, this is not live...
-			
-			cmplw	r5,r0							; Still owned by this cpu?
-			lwz		r10,VMXsave(r2)					; Get the level
-			bne++	cswnovect						; CPU claimed by someone else...
-			
-			mr.		r10,r10							; Is there a savearea here?
-			oris	r6,r6,hi16(MASK(MSR_VEC))		; Enable vector
-			
-			beq--	cswnovect						; No savearea to check...
-			
-			lwz		r3,SAVlevel(r10)				; Get the level
-			lwz		r5,SAVprev+4(r10)				; Get the previous of this savearea
-			cmplw	r3,r11							; Is it for the current level?
-			
-			bne++	cswnovect						; Nope...
-			
-			stw		r5,VMXsave(r2)					; Pop off this savearea
-			rlwinm	r3,r10,0,0,19					; Move back to start of page
-
-			lwz		r5,quickfret(r12)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r12)				; Get the first in quickfret list (bottom)					
-			lwz		r2,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r5,SAVprev(r10)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r10)				; Link the old in (bottom)					
-			xor		r3,r10,r3						; Convert to physical
-			stw		r2,quickfret(r12)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r12)				; Set the first in quickfret list (bottom)					
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x4501						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-cswnovect:	li		r0,0							; Get set to release quickfret holdoff	
-			rlwinm	r11,r8,0,0,19					; Switch to savearea base
-			lis		r9,hi16(EXT(switch_in))			; Get top of switch in routine 
-			lwz		r5,savesrr0+4(r8)				; Set up the new SRR0
-;
-;			Note that the low-level code requires the R7 contain the high order half of the savearea's
-;			physical address.  This is hack city, but it is the way it is.
-;
-			lwz		r7,SACvrswap(r11)				; Get the high order V to R translation
-			lwz		r11,SACvrswap+4(r11)			; Get the low order V to R translation
-			ori		r9,r9,lo16(EXT(switch_in))		; Bottom half of switch in 
-			stw		r0,holdQFret(r12)				; Make sure we release quickfret holdoff
-			stw		r9,savesrr0+4(r8)				; Make us jump to the switch in routine 
-
-			lwz		r9,SAVflags(r8)					/* Get the flags */
-			lis		r0,hi16(SwitchContextCall)		/* Top part of switch context */
-			li		r10,(MASK(MSR_ME)|MASK(MSR_DR)) /* Get the switcher's MSR */
-			ori		r0,r0,lo16(SwitchContextCall)	/* Bottom part of switch context */
-			stw		r10,savesrr1+4(r8)				/* Set up for switch in */
-			rlwinm	r9,r9,0,15,13					/* Reset the syscall flag */
-			xor		r3,r11,r8						/* Get the physical address of the new context save area */
-			stw		r9,SAVflags(r8)					/* Set the flags */
-
-			bne		cr1,swtchtocont					; Switch to the continuation
-			sc										/* Switch to the new context */
-	
-/*			We come back here in the new thread context	
- * 			R4 was set to hold the old thread pointer, but switch_in will put it into
- *			R3 where it belongs.
- */
-			blr										/* Jump into the new thread */
-
-;
-;			This is where we go when a continuation is set.  We are actually
-;			killing off the old context of the new guy so we need to pop off
-;			any float or vector states for the ditched level.
-;
-;			Note that we do the same kind of thing a chkfac in hw_exceptions.s
-;
-
-		
-swtchtocont:
-
-			stw		r5,savesrr0+4(r8)				; Set the pc
-			stw		r6,savesrr1+4(r8)				; Set the next MSR to use
-			stw		r4,saver3+4(r8)					; Make sure we pass back the old thread
-			mr		r3,r8							; Pass in the virtual address of savearea
-			
-			b		EXT(exception_exit)				; Blocking on continuation, toss old context...
-
-
-
-/*
- *			All switched to threads come here first to clean up the old thread.
- *			We need to do the following contortions because we need to keep
- *			the LR clean. And because we need to manipulate the savearea chain
- *			with translation on.  If we could, this should be done in lowmem_vectors
- *			before translation is turned on.  But we can't, dang it!
- *
- *			switch_in() runs with DR on and IR off
- *
- *			R3  = switcher's savearea (32-bit virtual)
- *			saver4  = old thread in switcher's save
- *			saver5  = new SRR0 in switcher's save
- *			saver6  = new SRR1 in switcher's save
-
-
- */
- 
-
-			.align	5
-			.globl	EXT(switch_in)
-
-LEXT(switch_in)
-
-			lwz		r4,saver4+4(r3)					; Get the old thread 
-			lwz		r5,saver5+4(r3)					; Get the srr0 value 
-			
-	 		mfsprg	r0,2							; Get feature flags 
-			mr		r9,r4							; Get the switched from ACT
-			lwz		r6,saver6+4(r3)					; Get the srr1 value 
-			rlwinm.	r0,r0,0,pf64Bitb,pf64Bitb		; Check for 64-bit
-			lwz		r10,ACT_MACT_PCB(r9)			; Get the top PCB on the old thread 
-
-			stw		r3,ACT_MACT_PCB(r9)				; Put the new one on top
-			stw		r10,SAVprev+4(r3)				; Chain on the old one
-
-			mr		r3,r4							; Pass back the old thread 
-
-			mtsrr0	r5								; Set return point
-			mtsrr1	r6								; Set return MSR
-			
-			bne++	siSixtyFour						; Go do 64-bit...
-
-			rfi										; Jam...
-			
-siSixtyFour:
-			rfid									; Jam...
-			
-/*
- * void fpu_save(facility_context ctx)
- *
- *			Note that there are some oddities here when we save a context we are using.
- *			It is really not too cool to do this, but what the hey...  Anyway, 
- *			we turn fpus and vecs off before we leave., The oddity is that if you use fpus after this, the
- *			savearea containing the context just saved will go away.  So, bottom line is
- *			that don't use fpus until after you are done with the saved context.
- */
-			.align	5
-			.globl	EXT(fpu_save)
-
-LEXT(fpu_save)
-			
-			lis		r2,hi16(MASK(MSR_VEC))			; Get the vector enable
-			li		r12,lo16(MASK(MSR_EE))			; Get the EE bit
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Get FP
-
-			mfmsr	r0								; Get the MSR
-			andc	r0,r0,r2						; Clear FP, VEC
-			andc	r2,r0,r12						; Clear EE
-			ori		r2,r2,MASK(MSR_FP)				; Enable the floating point feature for now also
-			mtmsr	r2								; Set the MSR
-			isync
-
-			mfsprg	r6,1							; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-			lwz		r12,FPUowner(r6)				; Get the context ID for owner
-
-#if FPVECDBG
-			mr		r7,r0							; (TEST/DEBUG)
-			li		r4,0							; (TEST/DEBUG)
-			mr		r10,r3							; (TEST/DEBUG)
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			mr.		r3,r12							; (TEST/DEBUG)
-			li		r2,0x6F00						; (TEST/DEBUG)
-			li		r5,0							; (TEST/DEBUG)
-			beq--	noowneryet						; (TEST/DEBUG)
-			lwz		r4,FPUlevel(r12)				; (TEST/DEBUG)
-			lwz		r5,FPUsave(r12)					; (TEST/DEBUG)
-
-noowneryet:	oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-			mr		r0,r7							; (TEST/DEBUG)
-			mr		r3,r10							; (TEST/DEBUG)
-#endif	
-			mflr	r2								; Save the return address
-
-			cmplw	r3,r12							; Is the specified context live?
-			lhz		r11,PP_CPU_NUMBER(r6)			; Get our CPU number
-			lwz		r9,FPUcpu(r3)					; Get the cpu that context was last on		
-			bne--	fsret							; Nobody owns the FPU, no save required...
-			
-			cmplw	r9,r11							; Was the context for this processor? 
-			la		r5,FPUsync(r3)					; Point to the sync word
-			bne--	fsret							; Facility not last used on this processor...
-
-;
-;			It looks like we need to save this one.			
-;
-;			First, make sure that the live context block is not mucked with while
-;			we are trying to save it on out.  Then we will give it the final check.
-;
-
-			lis		r9,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r8								; Get the time now
-			lwz		r9,lo16(EXT(LockTimeOut))(r9)	; Get the timeout value
-			b		fssync0a						; Jump to the lock...
-			
-			.align	5
-			
-fssync0:	li		r7,lgKillResv					; Get killing field	
-			stwcx.	r7,0,r7							; Kill reservation
-
-fssync0a:	lwz		r7,0(r5)						; Sniff the lock
-			mftb	r10								; Is it time yet?
-			cmplwi	cr1,r7,0						; Is it locked?
-			sub		r10,r10,r8						; How long have we been spinning?
-			cmplw	r10,r9							; Has it been too long?
-			bgt--	fstimeout						; Way too long, panic...
-			bne--	cr1,fssync0a					; Yea, still locked so sniff harder...
-
-fssync1:	lwarx	r7,0,r5							; Get the sync word
-			li		r12,1							; Get the lock
-			mr.		r7,r7							; Is it unlocked?
-			bne--	fssync0
-			stwcx.	r12,0,r5						; Store lock and test reservation
-			bne--	fssync1							; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-			lwz		r12,FPUowner(r6)				; Get the context ID for owner
-			cmplw	r3,r12							; Check again if we own the FPU?
-			bne--	fsretlk							; Go unlock and return since we no longer own context
-			
-			lwz		r5,FPUcpu(r12)					; Get the cpu that context was last on		
-			lwz		r7,FPUsave(r12)					; Get the current FPU savearea for the thread
-			cmplw	r5,r11							; Is this for the same processor?
-			lwz		r9,FPUlevel(r12)				; Get our current level indicator
-			bne--	fsretlk							; Not the same processor, skip any save...
-			
-			cmplwi	r7,0							; Have we ever saved this facility context?
-			beq--	fsneedone						; Never saved it, so go do it...
-			
-			lwz		r8,SAVlevel(r7)					; Get the level of this savearea
-			cmplw	r9,r8							; Correct level?
-			beq--	fsretlk							; The current level is already saved, bail out...
-
-fsneedone:	bl		EXT(save_get)					; Get a savearea for the context
-
-			mfsprg	r6,1							; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-			li		r4,SAVfloat						; Get floating point tag			
-			lwz		r12,FPUowner(r6)				; Get back our thread
-			stb		r4,SAVflags+2(r3)				; Mark this savearea as a float
-			lwz		r4,facAct(r12)					; Get the activation associated with live context
-			lwz		r8,FPUsave(r12)					; Get the current top floating point savearea
-			stw		r4,SAVact(r3)					; Indicate the right activation for this context
-			lwz		r9,FPUlevel(r12)				; Get our current level indicator again		
-			stw		r3,FPUsave(r12)					; Set this as the most current floating point context
-			stw		r8,SAVprev+4(r3)				; And then chain this in front
-
-			stw		r9,SAVlevel(r3)					; Show level in savearea
-
-            bl		fp_store						; save all 32 FPRs in the save area at r3
-			mtlr	r2								; Restore return
- 
-fsretlk:	li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,FPUsync(r12)					; Unlock it
-
-fsret:		mtmsr	r0								; Put interrupts on if they were and floating point off
-			isync
-
-			blr
-
-fstimeout:	mr		r4,r5							; Set the lock address
-			mr		r5,r7							; Set the lock word data
-			lis		r3,hi16(fstimeout_str)			; Get the failed lck message
-			ori		r3,r3,lo16(fstimeout_str)		; Get the failed lck message
-			bl		EXT(panic)
-			BREAKPOINT_TRAP							; We die here anyway
-
-			.data
-fstimeout_str:
-			STRINGD	"fpu_save: timeout on sync lock (0x%08X), value = 0x%08X\n\000"
-			.text
-
-
-/*
- * fpu_switch()
- *
- * Entered to handle the floating-point unavailable exception and
- * switch fpu context
- *
- * This code is run in virtual address mode on with interrupts off.
- *
- * Upon exit, the code returns to the users context with the floating
- * point facility turned on.
- *
- * ENTRY:	VM switched ON
- *		Interrupts  OFF
- *              State is saved in savearea pointed to by R4.
- *				All other registers are free.
- * 
- */
-
-			.align	5
-			.globl	EXT(fpu_switch)
-
-LEXT(fpu_switch)
-
-#if DEBUG
-			lis		r3,hi16(EXT(fpu_trap_count))	; Get address of FP trap counter
-			ori		r3,r3,lo16(EXT(fpu_trap_count))	; Get address of FP trap counter
-			lwz		r1,0(r3)
-			addi	r1,r1,1
-			stw		r1,0(r3)
-#endif /* DEBUG */
-
-			mfsprg	r17,1							; Get the current activation
-			lwz		r26,ACT_PER_PROC(r17)			; Get the per_proc block
-			mfmsr	r19								; Get the current MSR
-			
-			mr		r25,r4							; Save the entry savearea
-			lwz		r22,FPUowner(r26)				; Get the thread that owns the FPU
-			ori		r19,r19,lo16(MASK(MSR_FP))		; Enable the floating point feature
-			
-			mtmsr	r19								; Enable floating point instructions
-			isync
-
-			lwz		r27,ACT_MACT_PCB(r17)			; Get the current level
-			lwz		r29,curctx(r17)					; Grab the current context anchor of the current thread
-
-;			R22 has the "old" context anchor
-;			R29 has the "new" context anchor
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F01						; (TEST/DEBUG)
-			mr		r3,r22							; (TEST/DEBUG)
-			mr		r5,r29							; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-						
-			lhz		r16,PP_CPU_NUMBER(r26)			; Get the current CPU number
-
-			mr.		r22,r22							; See if there is any live FP status			
-			la		r15,FPUsync(r22)				; Point to the sync word
-
-			beq--	fsnosave						; No live context, so nothing to save...
-
-			lwz		r18,FPUcpu(r22)					; Get the last CPU we ran on
-			cmplw	cr2,r22,r29						; Are both old and new the same context?
-			lwz		r30,FPUsave(r22)				; Get the top savearea
-			cmplw	r18,r16							; Make sure we are on the right processor
-			lwz		r31,FPUlevel(r22)				; Get the context level
-			cmplwi	cr1,r30,0						; Anything saved yet?
-
-			bne--	fsnosave						; No, not on the same processor...
-						
-;
-;			Check to see if the live context has already been saved.
-;			Also check to see if all we are here just to re-enable the MSR
-;			and handle specially if so.
-;
-
-			cmplw	r31,r27							; See if the current and active levels are the same
-			crand	cr0_eq,cr2_eq,cr0_eq			; Remember if both the levels and contexts are the same
-			
-			beq--	fsthesame						; New and old are the same, just go enable...
-
-
-;
-;			Note it turns out that on a G5, the following load has about a 50-50 chance of
-;			taking a segment exception in a system that is doing heavy file I/O.  We
-;			make a dummy access right now in order to get that resolved before we take the lock.
-;			We do not use the data returned because it may change over the lock
-;
-
-			beq--	cr1,fswsync						; Nothing saved, skip the probe attempt...
-			lwz		r11,SAVlevel(r30)				; Touch the context in order to fault in the segment
-			
-;
-;			Make sure that the live context block is not mucked with while
-;			we are trying to save it on out  
-;
-
-fswsync:	lis		r11,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r3								; Get the time now
-			lwz		r11,lo16(EXT(LockTimeOut))(r11)	; Get the timeout value
-			b		fswsync0a						; Jump to the lock...
-			
-			.align	5
-			
-fswsync0:	li		r19,lgKillResv					; Get killing field	
-			stwcx.	r19,0,r19						; Kill reservation
-
-fswsync0a:	lwz		r19,0(r15)						; Sniff the lock
-			mftb	r18								; Is it time yet?
-			cmplwi	cr1,r19,0						; Is it locked?
-			sub		r18,r18,r3						; How long have we been spinning?
-			cmplw	r18,r11							; Has it been too long?
-			bgt--	fswtimeout						; Way too long, panic...
-			bne--	cr1,fswsync0a					; Yea, still locked so sniff harder...
-
-fswsync1:	lwarx	r19,0,r15						; Get the sync word
-			li		r0,1							; Get the lock
-			mr.		r19,r19							; Is it unlocked?
-			bne--	fswsync0
-			stwcx.	r0,0,r15						; Store lock and test reservation
-			bne--	fswsync1						; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-;
-;			Note that now that we have the lock, we need to check if anything changed.
-;			Also note that the possible changes are limited.  The context owner can 
-;			never change to a different thread or level although it can be invalidated.
-;			A new context can not be pushed on top of us, but it can be popped.  The
-;			cpu indicator will always change if another processor mucked with any 
-;			contexts.
-;
-;			It should be very rare that any of the context stuff changes across the lock.
-;
-
-			lwz		r0,FPUowner(r26)				; Get the thread that owns the FPU again
-			lwz		r11,FPUsave(r22)				; Get the top savearea again
-			lwz		r18,FPUcpu(r22)					; Get the last CPU we ran on again
-			sub		r0,r0,r22						; Non-zero if we lost ownership, 0 if not
-			xor		r11,r11,r30						; Non-zero if saved context changed, 0 if not
-			xor		r18,r18,r16						; Non-zero if cpu changed,  0 if not
-			cmplwi	cr1,r30,0						; Is anything saved?
-			or		r0,r0,r11						; Zero only if both owner and context are unchanged
-			or.		r0,r0,r18						; Zero only if nothing has changed
-			li		r3,0							; Clear this
-			
-			bne--	fsnosavelk						; Something has changed, so this is not ours to save...
-			beq--	cr1,fsmstsave					; There is no context saved yet...
-
-			lwz		r11,SAVlevel(r30)				; Get the level of top saved context
-			
-			cmplw	r31,r11							; Are live and saved the same?
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F02						; (TEST/DEBUG)
-			mr		r3,r11							; (TEST/DEBUG)
-			mr		r5,r31							; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-			li		r3,0							; (TEST/DEBUG)
-#endif	
-
-			beq++	fsnosavelk						; Same level, so already saved...			
-			
-fsmstsave:	stw		r3,FPUowner(r26)				; Kill the context now
-			eieio									; Make sure everyone sees it
-			bl		EXT(save_get)					; Go get a savearea
-			
-			lwz		r12,facAct(r22)					; Get the activation associated with the context
-			stw		r30,SAVprev+4(r3)				; Point us to the old context
-			stw		r31,SAVlevel(r3)				; Tag our level
-			li		r7,SAVfloat						; Get the floating point ID
-			stw		r12,SAVact(r3)					; Make sure we point to the right guy
-			stb		r7,SAVflags+2(r3)				; Set that we have a floating point save area
-			stw		r3,FPUsave(r22)					; Set this as the latest context savearea for the thread
-			
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F03						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-            bl		fp_store						; store all 32 FPRs
-
-fsnosavelk:	li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,FPUsync(r22)					; Unlock it.
-
-;
-;			The context is all saved now and the facility is free.
-;
-;			Check if we need to fill the registers with junk, because this level has 
-;			never used them before and some thieving bastard could hack the old values
-;			of some thread!  Just imagine what would happen if they could!  Why, nothing
-;			would be safe! My God! It is terrifying!
-;
-;			Make sure that the live context block is not mucked with while
-;			we are trying to load it up  
-;
-
-fsnosave:	la		r15,FPUsync(r29)				; Point to the sync word
-			lis		r11,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r3								; Get the time now
-			lwz		r11,lo16(EXT(LockTimeOut))(r11)	; Get the timeout value
-			b		fsnsync0a						; Jump to the lock...
-			
-			.align	5
-			
-fsnsync0:	li		r19,lgKillResv					; Get killing field	
-			stwcx.	r19,0,r19						; Kill reservation
-
-fsnsync0a:	lwz		r19,0(r15)						; Sniff the lock
-			mftb	r18								; Is it time yet?
-			cmplwi	cr1,r19,0						; Is it locked?
-			sub		r18,r18,r3						; How long have we been spinning?
-			cmplw	r18,r11							; Has it been too long?
-			bgt--	fsntimeout						; Way too long, panic...
-			bne--	cr1,fsnsync0a					; Yea, still locked so sniff harder...
-
-fsnsync1:	lwarx	r19,0,r15						; Get the sync word
-			li		r0,1							; Get the lock
-			mr.		r19,r19							; Is it unlocked?
-			bne--	fsnsync0						; Unfortunately, it is locked...
-			stwcx.	r0,0,r15						; Store lock and test reservation
-			bne--	fsnsync1						; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-			lwz		r15,ACT_MACT_PCB(r17)			; Get the current level of the "new" one
-			lwz		r19,FPUcpu(r29)					; Get the last CPU we ran on
-			lwz		r14,FPUsave(r29)				; Point to the top of the "new" context stack
-
-			stw		r16,FPUcpu(r29)					; Claim context for us
-			eieio
-
-#if FPVECDBG
-			lwz		r13,FPUlevel(r29)				; (TEST/DEBUG)
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F04						; (TEST/DEBUG)
-			mr		r1,r15							; (TEST/DEBUG)
-			mr		r3,r14							; (TEST/DEBUG)
-			mr		r5,r13							; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			
-			lis		r18,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			mulli	r19,r19,ppeSize					; Find offset to the owner per_proc_entry			
-			ori		r18,r18,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r16,FPUowner					; Displacement to float owner
-			add		r19,r18,r19						; Point to the owner per_proc_entry	
-			lwz		r19,ppe_vaddr(r19)				; Point to the owner per_proc	
-			
-fsinvothr:	lwarx	r18,r16,r19						; Get the owner
-			sub		r0,r18,r29						; Subtract one from the other
-			sub		r11,r29,r18						; Subtract the other from the one
-			or		r11,r11,r0						; Combine them
-			srawi	r11,r11,31						; Get a 0 if equal or -1 of not
-			and		r18,r18,r11						; Make 0 if same, unchanged if not
-			stwcx.	r18,r16,r19						; Try to invalidate it
-			bne--	fsinvothr						; Try again if there was a collision...
-		
-			cmplwi	cr1,r14,0						; Do we possibly have some context to load?
-			la		r11,savefp0(r14)				; Point to first line to bring in
-			stw		r15,FPUlevel(r29)				; Set the "new" active level
-			eieio
-			stw		r29,FPUowner(r26)				; Mark us as having the live context
-			
-			beq++	cr1,MakeSureThatNoTerroristsCanHurtUsByGod	; No "new" context to load...
-			
-			dcbt	0,r11							; Touch line in
-
-			lwz		r0,SAVlevel(r14)				; Get the level of first facility savearea
-			lwz		r3,SAVprev+4(r14)				; Get the previous context
-			cmplw	r0,r15							; Top level correct to load?
-			li		r7,0							; Get the unlock value
-			bne--	MakeSureThatNoTerroristsCanHurtUsByGod	; No, go initialize...
-
-			stw		r3,FPUsave(r29)					; Pop the context (we will toss the savearea later)
-			
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F05						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-			eieio									; Make sure that these updates make it out
-			stw		r7,FPUsync(r29)					; Unlock context now that the context save has been removed
-
-// Note this code is used both by 32- and 128-byte processors.  This means six extra DCBTs
-// are executed on a 128-byte machine, but that is better than a mispredicted branch.
-
-			la		r11,savefp4(r14)				; Point to next line
-			dcbt	0,r11							; Touch line in
-			lfd     f0, savefp0(r14)
-			lfd     f1,savefp1(r14)
-			lfd     f2,savefp2(r14)
-			la		r11,savefp8(r14)				; Point to next line
-			lfd     f3,savefp3(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f4,savefp4(r14)
-			lfd     f5,savefp5(r14)
-			lfd     f6,savefp6(r14)
-			la		r11,savefp12(r14)				; Point to next line
-			lfd     f7,savefp7(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f8,savefp8(r14)
-			lfd     f9,savefp9(r14)
-			lfd     f10,savefp10(r14)
-			la		r11,savefp16(r14)				; Point to next line
-			lfd     f11,savefp11(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f12,savefp12(r14)
-			lfd     f13,savefp13(r14)
-			lfd     f14,savefp14(r14)
-			la		r11,savefp20(r14)				; Point to next line
-			lfd     f15,savefp15(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f16,savefp16(r14)
-			lfd     f17,savefp17(r14)
-			lfd     f18,savefp18(r14)
-			la		r11,savefp24(r14)				; Point to next line
-			lfd     f19,savefp19(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f20,savefp20(r14)
-			lfd     f21,savefp21(r14)
-			la		r11,savefp28(r14)				; Point to next line
-			lfd     f22,savefp22(r14)
-			lfd     f23,savefp23(r14)
-			dcbt	0,r11							; Touch line in
-			lfd     f24,savefp24(r14)
-			lfd     f25,savefp25(r14)
-			lfd     f26,savefp26(r14)
-			lfd     f27,savefp27(r14)
-			lfd     f28,savefp28(r14)
-			lfd     f29,savefp29(r14)
-			lfd     f30,savefp30(r14)
-			lfd     f31,savefp31(r14)
-			
-			mr		r3,r14							; Get the old savearea (we popped it before)
-			bl		EXT(save_ret)					; Toss it
-			
-fsenable:	lwz		r8,savesrr1+4(r25)				; Get the msr of the interrupted guy
-			ori		r8,r8,MASK(MSR_FP)				; Enable the floating point feature
-			lwz		r10,ACT_MACT_SPF(r17)			; Get the act special flags
-			lwz		r11,spcFlags(r26)				; Get per_proc spec flags cause not in sync with act
-			oris	r10,r10,hi16(floatUsed|floatCng)	; Set that we used floating point
-			oris	r11,r11,hi16(floatUsed|floatCng)	; Set that we used floating point
-			rlwinm.	r0,r8,0,MSR_PR_BIT,MSR_PR_BIT	; See if we are doing this for user state
-			stw		r8,savesrr1+4(r25)				; Set the msr of the interrupted guy
-			mr		r3,r25							; Pass the virtual addres of savearea
-			beq-	fsnuser							; We are not user state...
-			stw		r10,ACT_MACT_SPF(r17)			; Set the activation copy
-			stw		r11,spcFlags(r26)				; Set per_proc copy
-
-fsnuser:
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F07						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			
-			b		EXT(exception_exit)				; Exit to the fray...
-
-/*
- *			Initialize the registers to some bogus value
- */
-
-MakeSureThatNoTerroristsCanHurtUsByGod:
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F06						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			lis		r5,hi16(EXT(FloatInit))			; Get top secret floating point init value address
-			li		r7,0							; Get the unlock value
-			ori		r5,r5,lo16(EXT(FloatInit))		; Slam bottom
-			eieio									; Make sure that these updates make it out
-			stw		r7,FPUsync(r29)					; Unlock it now that the context has been removed
-
-			lfd		f0,0(r5)						; Initialize FP0 
-			fmr		f1,f0							; Do them all						
-			fmr		f2,f0								
-			fmr		f3,f0								
-			fmr		f4,f0								
-			fmr		f5,f0						
-			fmr		f6,f0						
-			fmr		f7,f0						
-			fmr		f8,f0						
-			fmr		f9,f0						
-			fmr		f10,f0						
-			fmr		f11,f0						
-			fmr		f12,f0						
-			fmr		f13,f0						
-			fmr		f14,f0						
-			fmr		f15,f0						
-			fmr		f16,f0						
-			fmr		f17,f0
-			fmr		f18,f0						
-			fmr		f19,f0						
-			fmr		f20,f0						
-			fmr		f21,f0						
-			fmr		f22,f0						
-			fmr		f23,f0						
-			fmr		f24,f0						
-			fmr		f25,f0						
-			fmr		f26,f0						
-			fmr		f27,f0						
-			fmr		f28,f0						
-			fmr		f29,f0						
-			fmr		f30,f0						
-			fmr		f31,f0						
-			b		fsenable						; Finish setting it all up...				
-
-
-;
-;			We get here when we are switching to the same context at the same level and the context
-;			is still live.  Essentially, all we are doing is turning on the facility.  It may have
-;			gotten turned off due to doing a context save for the current level or a context switch
-;			back to the live guy.
-;
-
-			.align	5
-			
-
-fsthesamel:	li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,FPUsync(r22)					; Unlock it.
-
-fsthesame:
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x7F0A						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			beq-	cr1,fsenable					; Not saved yet, nothing to pop, go enable and exit...
-			
-			lwz		r11,SAVlevel(r30)				; Get the level of top saved context
-			lwz		r14,SAVprev+4(r30)				; Get the previous savearea
-			
-			cmplw	r11,r31							; Are live and saved the same?
-
-			bne++	fsenable						; Level not the same, nothing to pop, go enable and exit...
-			
-			mr		r3,r30							; Get the old savearea (we popped it before)
-			stw		r14,FPUsave(r22)				; Pop the savearea from the stack
-			bl		EXT(save_ret)					; Toss it
-			b		fsenable						; Go enable and exit...
-
-;
-;			Note that we need to choke in this code rather than panic because there is no
-;			stack.
-;
-
-fswtimeout:	lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failTimeout					; Timeout code
-			sc										; System ABEND
-
-fsntimeout:	lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failTimeout					; Timeout code
-			sc										; System ABEND
-
-vswtimeout0:	
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failTimeout					; Timeout code
-			sc										; System ABEND
-
-vswtimeout1:	
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failTimeout					; Timeout code
-			sc										; System ABEND
-
-;
-;			This function invalidates any live floating point context for the passed in facility_context.
-;			This is intended to be called just before act_machine_sv_free tosses saveareas.
-;
-
-			.align	5
-			.globl	EXT(toss_live_fpu)
-
-LEXT(toss_live_fpu)
-			
-			lis		r0,hi16(MASK(MSR_VEC))			; Get VEC
-			mfmsr	r9								; Get the MSR
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Add in FP
-			rlwinm.	r8,r9,0,MSR_FP_BIT,MSR_FP_BIT	; Are floats on right now?
-			andc	r9,r9,r0						; Force off VEC and FP
-			ori		r0,r0,lo16(MASK(MSR_EE))		; Turn off EE
-			andc	r0,r9,r0						; Turn off EE now
-			mtmsr	r0								; No interruptions
-			isync
-			beq+	tlfnotours						; Floats off, can not be live here...
-
-			mfsprg	r8,1							; Get the current activation
-			lwz		r8,ACT_PER_PROC(r8)				; Get the per_proc block
-
-;
-;			Note that at this point, since floats are on, we are the owner
-;			of live state on this processor
-;
-
-			lwz		r6,FPUowner(r8)					; Get the thread that owns the floats
-			li		r0,0							; Clear this just in case we need it
-			cmplw	r6,r3							; Are we tossing our own context?
-			bne--	tlfnotours						; Nope...
-			
-			lfd		f1,Zero(0)						; Make a 0			
-			mtfsf	0xFF,f1							; Clear it
-
-tlfnotours:	lwz		r11,FPUcpu(r3)					; Get the cpu on which we last loaded context
-			lis		r12,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			mulli	r11,r11,ppeSize					; Find offset to the owner per_proc_entry			
-			ori		r12,r12,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r10,FPUowner					; Displacement to float owner
-			add		r11,r12,r11						; Point to the owner per_proc_entry	
-			lwz		r11,ppe_vaddr(r11)				; Point to the owner per_proc	
-			
-tlfinvothr:	lwarx	r12,r10,r11						; Get the owner
-
-			sub		r0,r12,r3						; Subtract one from the other
-			sub		r8,r3,r12						; Subtract the other from the one
-			or		r8,r8,r0						; Combine them
-			srawi	r8,r8,31						; Get a 0 if equal or -1 of not
-			and		r12,r12,r8						; Make 0 if same, unchanged if not
-			stwcx.	r12,r10,r11						; Try to invalidate it
-			bne--	tlfinvothr						; Try again if there was a collision...
-
-			mtmsr	r9								; Restore interruptions
-			isync									; Could be turning off floats here
-			blr										; Leave...
-
-
-/*
- *			Altivec stuff is here. The techniques used are pretty identical to
- *			the floating point. Except that we will honor the VRSAVE register
- *			settings when loading and restoring registers.
- *
- *			There are two indications of saved VRs: the VRSAVE register and the vrvalid
- *			mask. VRSAVE is set by the vector user and represents the VRs that they
- *			say that they are using. The vrvalid mask indicates which vector registers
- *			are saved in the savearea. Whenever context is saved, it is saved according
- *			to the VRSAVE register.  It is loaded based on VRSAVE anded with
- *			vrvalid (all other registers are splatted with 0s). This is done because we
- *			don't want to load any registers we don't have a copy of, we want to set them
- *			to zero instead.
- *
- *			Note that there are some oddities here when we save a context we are using.
- *			It is really not too cool to do this, but what the hey...  Anyway, 
- *			we turn vectors and fpu off before we leave.
- *			The oddity is that if you use vectors after this, the
- *			savearea containing the context just saved will go away.  So, bottom line is
- *			that don't use vectors until after you are done with the saved context.
- *
- */
-
-			.align	5
-			.globl	EXT(vec_save)
-
-LEXT(vec_save)
-
-
-			lis		r2,hi16(MASK(MSR_VEC))			; Get VEC
-			mfmsr	r0								; Get the MSR
-			ori		r2,r2,lo16(MASK(MSR_FP))		; Add in FP
-			andc	r0,r0,r2						; Force off VEC and FP
-			ori		r2,r2,lo16(MASK(MSR_EE))		; Clear EE
-			andc	r2,r0,r2						; Clear EE for now
-			oris	r2,r2,hi16(MASK(MSR_VEC))		; Enable the vector facility for now also
-			mtmsr	r2								; Set the MSR
-			isync
-		
-			mfsprg	r6,1							; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-			lwz		r12,VMXowner(r6)				; Get the context ID for owner
-
-#if FPVECDBG
-			mr		r11,r6							; (TEST/DEBUG)
-			mr		r7,r0							; (TEST/DEBUG)
-			li		r4,0							; (TEST/DEBUG)
-			mr		r10,r3							; (TEST/DEBUG)
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			mr.		r3,r12							; (TEST/DEBUG)
-			li		r2,0x5F00						; (TEST/DEBUG)
-			li		r5,0							; (TEST/DEBUG)
-			lwz		r6,liveVRS(r6)					; (TEST/DEBUG)
-			beq--	noowneryeu						; (TEST/DEBUG)
-			lwz		r4,VMXlevel(r12)				; (TEST/DEBUG)
-			lwz		r5,VMXsave(r12)					; (TEST/DEBUG)
-
-noowneryeu:	oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-			mr		r0,r7							; (TEST/DEBUG)
-			mr		r3,r10							; (TEST/DEBUG)
-			mr		r6,r11							; (TEST/DEBUG)
-#endif	
-			mflr	r2								; Save the return address
-
-			cmplw	r3,r12							; Is the specified context live?
-			lhz		r11,PP_CPU_NUMBER(r6)			; Get our CPU number
-			bne--	vsret							; We do not own the vector, no save required...
-			lwz		r9,VMXcpu(r12)					; Get the cpu that context was last on		
-			
-			cmplw	r9,r11							; Was the context for this processor? 
-			la		r5,VMXsync(r3)					; Point to the sync word
-			bne--	vsret							; Specified context is not live
-
-;
-;			It looks like we need to save this one.  Or possibly toss a saved one if
-;			the VRSAVE is 0.
-;
-;			First, make sure that the live context block is not mucked with while
-;			we are trying to save it on out.  Then we will give it the final check.
-;
-
-			lis		r9,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r8								; Get the time now
-			lwz		r9,lo16(EXT(LockTimeOut))(r9)	; Get the timeout value
-			b		vssync0a						; Jump to the lock...
-			
-			.align	5
-			
-vssync0:	li		r7,lgKillResv					; Get killing field	
-			stwcx.	r7,0,r7							; Kill reservation
-
-vssync0a:	lwz		r7,0(r5)						; Sniff the lock
-			mftb	r10								; Is it time yet?
-			cmplwi	cr1,r7,0						; Is it locked?
-			sub		r10,r10,r8						; How long have we been spinning?
-			cmplw	r10,r9							; Has it been too long?
-			bgt--	vswtimeout0						; Way too long, panic...
-			bne--	cr1,vssync0a					; Yea, still locked so sniff harder...
-
-vssync1:	lwarx	r7,0,r5							; Get the sync word
-			li		r12,1							; Get the lock
-			mr.		r7,r7							; Is it unlocked?
-			bne--	vssync0							; No, it is unlocked...
-			stwcx.	r12,0,r5						; Store lock and test reservation
-			bne--	vssync1							; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-			lwz		r12,VMXowner(r6)				; Get the context ID for owner
-			cmplw	r3,r12							; Check again if we own VMX?
-			lwz		r10,liveVRS(r6)					; Get the right VRSave register
-			bne--	vsretlk							; Go unlock and return since we no longer own context
-			
-			lwz		r5,VMXcpu(r12)					; Get the cpu that context was last on		
-			lwz		r7,VMXsave(r12)					; Get the current vector savearea for the thread
-			cmplwi	cr1,r10,0						; Is VRsave set to 0?
-			cmplw	r5,r11							; Is this for the same processor?
-			lwz		r9,VMXlevel(r12)				; Get our current level indicator
-			bne--	vsretlk							; Not the same processor, skip any save...
-			
-			cmplwi	r7,0							; Have we ever saved this facility context?
-			beq--	vsneedone						; Never saved it, so we need an area...
-			
-			lwz		r8,SAVlevel(r7)					; Get the level this savearea is for
-			cmplw	r9,r8							; Correct level?
-			bne--	vsneedone						; Different level, so we need to save...
-			
-			bne++	cr1,vsretlk						; VRsave is non-zero so we need to keep what is saved...
-						
-			lwz		r4,SAVprev+4(r7)				; Pick up the previous area
-			li		r5,0							; Assume we just dumped the last
-			mr.		r4,r4							; Is there one?
-			stw		r4,VMXsave(r12)					; Dequeue this savearea
-			beq--	vsnomore						; We do not have another...
-			
-			lwz		r5,SAVlevel(r4)					; Get the level associated with save
-
-vsnomore:	stw		r5,VMXlevel(r12)				; Save the level
-			li		r7,0							; Clear
-			stw		r7,VMXowner(r6)					; Show no live context here
-
-vsbackout:	mr		r4,r0							; restore the saved MSR			
-			eieio
-			stw		r7,VMXsync(r12)					; Unlock the context
-
-			b		EXT(save_ret_wMSR)				; Toss the savearea and return from there...
-
-			.align	5
-
-vsneedone:	beq--	cr1,vsclrlive					; VRSave is zero, go blow away the context...
-
-			bl		EXT(save_get)					; Get a savearea for the context
-			
-			mfsprg	r6,1							; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-			li		r4,SAVvector					; Get vector tag			
-			lwz		r12,VMXowner(r6)				; Get back our context ID
-			stb		r4,SAVflags+2(r3)				; Mark this savearea as a vector
-			mr.		r12,r12							; See if we were disowned while away. Very, very small chance of it...
-			li		r7,0							; Clear
-			beq--	vsbackout						; If disowned, just toss savearea...
-			lwz		r4,facAct(r12)					; Get the activation associated with live context
-			lwz		r8,VMXsave(r12)					; Get the current top vector savearea
-			stw		r4,SAVact(r3)					; Indicate the right activation for this context
-			lwz		r9,VMXlevel(r12)				; Get our current level indicator again		
-			stw		r3,VMXsave(r12)					; Set this as the most current floating point context
-			stw		r8,SAVprev+4(r3)				; And then chain this in front
-
-			stw		r9,SAVlevel(r3)					; Set level in savearea
-            mfcr	r12								; save CRs across call to vr_store
-			lwz		r10,liveVRS(r6)					; Get the right VRSave register
-            
-            bl		vr_store						; store live VRs into savearea as required (uses r4-r11)
-
-			mfsprg	r6,1							; Get the current activation
-			mtcrf	255,r12							; Restore the non-volatile CRs
- 			lwz		r6,ACT_PER_PROC(r6)				; Get the per_proc block
-          	mtlr	r2								; Restore return address
-			lwz		r12,VMXowner(r6)				; Get back our context ID
-
-vsretlk:	li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,VMXsync(r12)					; Unlock it
-		
-vsret:		mtmsr	r0								; Put interrupts on if they were and vector off
-			isync
-
-			blr
-
-vsclrlive:	li		r7,0							; Clear
-			stw		r7,VMXowner(r6)					; Show no live context here
-			b		vsretlk							; Go unlock and leave...
-
-/*
- * vec_switch()
- *
- * Entered to handle the vector unavailable exception and
- * switch vector context
- *
- * This code is run with virtual address mode on and interrupts off.
- *
- * Upon exit, the code returns to the users context with the vector
- * facility turned on.
- *
- * ENTRY:	VM switched ON
- *		Interrupts  OFF
- *              State is saved in savearea pointed to by R4.
- *				All other registers are free.
- * 
- */
-
-			.align	5
-			.globl	EXT(vec_switch)
-
-LEXT(vec_switch)
-
-#if DEBUG
-			lis		r3,hi16(EXT(vec_trap_count))	; Get address of vector trap counter
-			ori		r3,r3,lo16(EXT(vec_trap_count))	; Get address of vector trap counter
-			lwz		r1,0(r3)
-			addi	r1,r1,1
-			stw		r1,0(r3)
-#endif /* DEBUG */
-
-			mfsprg	r17,1							; Get the current activation
-			lwz		r26,ACT_PER_PROC(r17)			; Get the per_proc block
-			mfmsr	r19								; Get the current MSR
-			
-			mr		r25,r4							; Save the entry savearea
-			oris	r19,r19,hi16(MASK(MSR_VEC))		; Enable the vector feature
-			lwz		r22,VMXowner(r26)				; Get the thread that owns the vector
-				
-			mtmsr	r19								; Enable vector instructions
-			isync
-			
-			lwz		r27,ACT_MACT_PCB(r17)			; Get the current level
-			lwz		r29,curctx(r17)					; Grab the current context anchor of the current thread
-
-;			R22 has the "old" context anchor
-;			R29 has the "new" context anchor
-
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x5F01						; (TEST/DEBUG)
-			mr		r3,r22							; (TEST/DEBUG)
-			mr		r5,r29							; (TEST/DEBUG)
-			lwz		r6,liveVRS(r26)					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)		; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-			lhz		r16,PP_CPU_NUMBER(r26)			; Get the current CPU number
-			
-			mr.		r22,r22							; See if there is any live vector status
-			la		r15,VMXsync(r22)				; Point to the sync word
-
-			beq--	vswnosave						; No live context, so nothing to save...
-
-			lwz		r18,VMXcpu(r22)					; Get the last CPU we ran on
-			cmplw	cr2,r22,r29						; Are both old and new the same context?
-			lwz		r30,VMXsave(r22)				; Get the top savearea
-			cmplwi	cr1,r30,0						; Anything saved yet?
-			lwz		r31,VMXlevel(r22)				; Get the context level
-			cmplw	r18,r16							; Make sure we are on the right processor
-			
-			lwz		r10,liveVRS(r26)				; Get the right VRSave register
-
-			bne--	vswnosave						; No, not on the same processor...
-		
-;
-;			Check to see if the live context has already been saved.
-;			Also check to see if all we are here just to re-enable the MSR
-;			and handle specially if so.
-;
-
-			cmplw	r31,r27							; See if the current and active levels are the same
-			crand	cr0_eq,cr2_eq,cr0_eq			; Remember if both the levels and contexts are the same			
-
-			beq--	vswthesame						; New and old are the same, just go enable...
-
-;
-;			Make sure that the live context block is not mucked with while
-;			we are trying to save it on out  
-;
-
-			lis		r11,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r3								; Get the time now
-			lwz		r11,lo16(EXT(LockTimeOut))(r11)	; Get the timeout value
-			b		vswsync0a						; Jump to the lock...
-			
-			.align	5
-			
-vswsync0:	li		r19,lgKillResv					; Get killing field	
-			stwcx.	r19,0,r19						; Kill reservation
-
-vswsync0a:	lwz		r19,0(r15)						; Sniff the lock
-			mftb	r18								; Is it time yet?
-			cmplwi	cr1,r19,0						; Is it locked?
-			sub		r18,r18,r3						; How long have we been spinning?
-			cmplw	r18,r11							; Has it been too long?
-			bgt--	vswtimeout0						; Way too long, panic...
-			bne--	cr1,vswsync0a					; Yea, still locked so sniff harder...
-
-vswsync1:	lwarx	r19,0,r15						; Get the sync word
-			li		r0,1							; Get the lock
-			mr.		r19,r19							; Is it unlocked?
-			bne--	vswsync0
-			stwcx.	r0,0,r15						; Store lock and test reservation
-			bne--	vswsync1						; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-;
-;			Note that now that we have the lock, we need to check if anything changed.
-;			Also note that the possible changes are limited.  The context owner can 
-;			never change to a different thread or level although it can be invalidated.
-;			A new context can not be pushed on top of us, but it can be popped.  The
-;			cpu indicator will always change if another processor mucked with any 
-;			contexts.
-;
-;			It should be very rare that any of the context stuff changes across the lock.
-;
-
-			lwz		r0,VMXowner(r26)				; Get the thread that owns the vectors again
-			lwz		r11,VMXsave(r22)				; Get the top savearea again
-			lwz		r18,VMXcpu(r22)					; Get the last CPU we ran on again
-			sub		r0,r0,r22						; Non-zero if we lost ownership, 0 if not
-			xor		r11,r11,r30						; Non-zero if saved context changed, 0 if not
-			xor		r18,r18,r16						; Non-zero if cpu changed,  0 if not
-			cmplwi	cr1,r30,0						; Is anything saved?
-			or		r0,r0,r11						; Zero only if both owner and context are unchanged
-			or.		r0,r0,r18						; Zero only if nothing has changed
-			cmplwi	cr2,r10,0						; Check VRSave to see if we really need to save anything...
-			li		r8,0							; Clear
-			
-			bne--	vswnosavelk						; Something has changed, so this is not ours to save...
-			beq--	cr1,vswmstsave					; There is no context saved yet...
-			
-			lwz		r11,SAVlevel(r30)				; Get the level of top saved context
-			
-			cmplw	r31,r11							; Are live and saved the same?
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F02						; (TEST/DEBUG)
-			mr		r3,r30							; (TEST/DEBUG)
-			mr		r5,r31							; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-			beq++	vswnosavelk						; Same level, already saved...
-			bne--	cr2,vswnosavelk					; Live context saved and VRSave not 0, no save and keep context...
-			
-			lwz		r4,SAVprev+4(r30)				; Pick up the previous area
-			li		r5,0							; Assume this is the only one (which should be the ususal case)
-			mr.		r4,r4							; Was this the only one?
-			stw		r4,VMXsave(r22)					; Dequeue this savearea
-			beq++	vswonlyone						; This was the only one...
-			lwz		r5,SAVlevel(r4)					; Get the level associated with previous save
-
-vswonlyone:	stw		r5,VMXlevel(r22)				; Save the level
-			stw		r8,VMXowner(r26)				; Clear owner
-
-			mr		r3,r30							; Copy the savearea we are tossing
-			bl		EXT(save_ret)					; Toss the savearea
-			b		vswnosavelk						; Go load up the context...
-
-			.align	5
-
-vswmstsave:	stw		r8,VMXowner(r26)				; Clear owner
-			beq--	cr2,vswnosavelk					; The VRSave was 0, so there is nothing to save...
-
-			bl		EXT(save_get)					; Go get a savearea
-
-			lwz		r12,facAct(r22)					; Get the activation associated with the context
-			stw		r3,VMXsave(r22)					; Set this as the latest context savearea for the thread
-			stw		r30,SAVprev+4(r3)				; Point us to the old context
-			stw		r31,SAVlevel(r3)				; Tag our level
-			li		r7,SAVvector					; Get the vector ID
-			stw		r12,SAVact(r3)					; Make sure we point to the right guy
-			stb		r7,SAVflags+2(r3)				; Set that we have a vector save area
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F03						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-			lwz		r10,liveVRS(r26)				; Get the right VRSave register
-            bl		vr_store						; store VRs into savearea according to vrsave (uses r4-r11)
-			
-;
-;			The context is all saved now and the facility is free.
-;
-;			Check if we need to fill the registers with junk, because this level has 
-;			never used them before and some thieving bastard could hack the old values
-;			of some thread!  Just imagine what would happen if they could!  Why, nothing
-;			would be safe! My God! It is terrifying!
-;
-;			Also, along the way, thanks to Ian Ollmann, we generate the 0x7FFFDEAD (QNaNbarbarian)
-;			constant that we may need to fill unused vector registers.
-;
-;			Make sure that the live context block is not mucked with while
-;			we are trying to load it up  
-;
-
-vswnosavelk:
-			li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,VMXsync(r22)					; Unlock the old context
-			
-vswnosave:	la		r15,VMXsync(r29)				; Point to the sync word
-			lis		r11,ha16(EXT(LockTimeOut))		; Get the high part 
-			mftb	r3								; Get the time now
-			lwz		r11,lo16(EXT(LockTimeOut))(r11)	; Get the timeout value
-			b		vswnsync0a						; Jump to the lock...
-			
-			.align	5
-			
-vswnsync0:	li		r19,lgKillResv					; Get killing field	
-			stwcx.	r19,0,r19						; Kill reservation
-
-vswnsync0a:	lwz		r19,0(r15)						; Sniff the lock
-			mftb	r18								; Is it time yet?
-			cmplwi	cr1,r19,0						; Is it locked?
-			sub		r18,r18,r3						; How long have we been spinning?
-			cmplw	r18,r11							; Has it been too long?
-			bgt--	vswtimeout1						; Way too long, panic...
-			bne--	cr1,vswnsync0a					; Yea, still locked so sniff harder...
-
-vswnsync1:	lwarx	r19,0,r15						; Get the sync word
-			li		r0,1							; Get the lock
-			mr.		r19,r19							; Is it unlocked?
-			bne--	vswnsync0						; Unfortunately, it is locked...
-			stwcx.	r0,0,r15						; Store lock and test reservation
-			bne--	vswnsync1						; Try again if lost reservation...
-
-			isync									; Toss speculation
-
-			vspltisb v31,-10						; Get 0xF6F6F6F6	
-			lwz		r15,ACT_MACT_PCB(r17)			; Get the current level of the "new" one
-			vspltisb v30,5							; Get 0x05050505	
-			lwz		r19,VMXcpu(r29)					; Get the last CPU we ran on
-			vspltish v29,4							; Get 0x00040004
-			lwz		r14,VMXsave(r29)				; Point to the top of the "new" context stack
-			vrlb	v31,v31,v30						; Get 0xDEDEDEDE
-
-			stw		r16,VMXcpu(r29)					; Claim context for us
-			eieio
-
-#if FPVECDBG
-			lwz		r13,VMXlevel(r29)				; (TEST/DEBUG)
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F04						; (TEST/DEBUG)
-			mr		r1,r15							; (TEST/DEBUG)
-			mr		r3,r14							; (TEST/DEBUG)
-			mr		r5,r13							; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			
-			lis		r18,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			vspltisb v28,-2							; Get 0xFEFEFEFE		   
-			mulli	r19,r19,ppeSize					; Find offset to the owner per_proc_entry			
-			vsubuhm	v31,v31,v29						; Get 0xDEDADEDA
-			ori		r18,r18,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			vpkpx	v30,v28,v3						; Get 0x7FFF7FFF
-			li		r16,VMXowner					; Displacement to vector owner
-			add		r19,r18,r19						; Point to the owner per_proc_entry	
-			lwz		r19,ppe_vaddr(r19)				; Point to the owner per_proc	
-			vrlb	v31,v31,v29						; Get 0xDEADDEAD	
-			
-vswinvothr:	lwarx	r18,r16,r19						; Get the owner
-
-			sub		r0,r18,r29						; Subtract one from the other
-			sub		r11,r29,r18						; Subtract the other from the one
-			or		r11,r11,r0						; Combine them
-			srawi	r11,r11,31						; Get a 0 if equal or -1 of not
-			and		r18,r18,r11						; Make 0 if same, unchanged if not
-			stwcx.	r18,r16,r19						; Try to invalidate it
-			bne--	vswinvothr						; Try again if there was a collision...		
-	
-			cmplwi	cr1,r14,0						; Do we possibly have some context to load?
-			vmrghh	v31,v30,v31						; Get 0x7FFFDEAD.  V31 keeps this value until the bitter end
-			stw		r15,VMXlevel(r29)				; Set the "new" active level
-			eieio
-			stw		r29,VMXowner(r26)				; Mark us as having the live context
-
-			beq--	cr1,ProtectTheAmericanWay		; Nothing to restore, first time use...
-		
-			lwz		r3,SAVprev+4(r14)				; Get the previous context
-			lwz		r0,SAVlevel(r14)				; Get the level of first facility savearea
-			cmplw	r0,r15							; Top level correct to load?
-			bne--	ProtectTheAmericanWay			; No, go initialize...
-			
-			stw		r3,VMXsave(r29)					; Pop the context (we will toss the savearea later)
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F05						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-
-			lwz		r10,savevrvalid(r14)			; Get the valid VRs in the savearea
-			lwz		r22,savevrsave(r25)				; Get the most current VRSAVE
-			and		r10,r10,r22						; Figure out just what registers need to be loaded
-            mr		r3,r14							; r3 <- ptr to savearea with VRs
-            bl		vr_load							; load VRs from save area based on vrsave in r10
-            			
-			bl		EXT(save_ret)					; Toss the save area after loading VRs
-
-vrenablelk:	li		r7,0							; Get the unlock value
-			eieio									; Make sure that these updates make it out
-			stw		r7,VMXsync(r29)					; Unlock the new context
-			
-vrenable:	lwz		r8,savesrr1+4(r25)				; Get the msr of the interrupted guy
-			oris	r8,r8,hi16(MASK(MSR_VEC))		; Enable the vector facility
-			lwz		r10,ACT_MACT_SPF(r17)			; Get the act special flags
-			lwz		r11,spcFlags(r26)				; Get per_proc spec flags cause not in sync with act
-			oris	r10,r10,hi16(vectorUsed|vectorCng)	; Set that we used vectors
-			oris	r11,r11,hi16(vectorUsed|vectorCng)	; Set that we used vectors
-			rlwinm.	r0,r8,0,MSR_PR_BIT,MSR_PR_BIT	; See if we are doing this for user state
-			stw		r8,savesrr1+4(r25)				; Set the msr of the interrupted guy
-			mr		r3,r25							; Pass virtual address of the savearea
-			beq-	vrnuser							; We are not user state...
-			stw		r10,ACT_MACT_SPF(r17)			; Set the activation copy
-			stw		r11,spcFlags(r26)				; Set per_proc copy
-
-vrnuser:
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F07						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			b		EXT(exception_exit)				; Exit to the fray...
-
-/*
- *			Initialize the registers to some bogus value
- */
-
-ProtectTheAmericanWay:
-			
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F06						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			
-			vor		v0,v31,v31						; Copy into the next register
-			vor		v1,v31,v31						; Copy into the next register
-			vor		v2,v31,v31						; Copy into the next register
-			vor		v3,v31,v31						; Copy into the next register
-			vor		v4,v31,v31						; Copy into the next register
-			vor		v5,v31,v31						; Copy into the next register
-			vor		v6,v31,v31						; Copy into the next register
-			vor		v7,v31,v31						; Copy into the next register
-			vor		v8,v31,v31						; Copy into the next register
-			vor		v9,v31,v31						; Copy into the next register
-			vor		v10,v31,v31						; Copy into the next register
-			vor		v11,v31,v31						; Copy into the next register
-			vor		v12,v31,v31						; Copy into the next register
-			vor		v13,v31,v31						; Copy into the next register
-			vor		v14,v31,v31						; Copy into the next register
-			vor		v15,v31,v31						; Copy into the next register
-			vor		v16,v31,v31						; Copy into the next register
-			vor		v17,v31,v31						; Copy into the next register
-			vor		v18,v31,v31						; Copy into the next register
-			vor		v19,v31,v31						; Copy into the next register
-			vor		v20,v31,v31						; Copy into the next register
-			vor		v21,v31,v31						; Copy into the next register
-			vor		v22,v31,v31						; Copy into the next register
-			vor		v23,v31,v31						; Copy into the next register
-			vor		v24,v31,v31						; Copy into the next register
-			vor		v25,v31,v31						; Copy into the next register
-			vor		v26,v31,v31						; Copy into the next register
-			vor		v27,v31,v31						; Copy into the next register
-			vor		v28,v31,v31						; Copy into the next register
-			vor		v29,v31,v31						; Copy into the next register
-			vor		v30,v31,v31						; Copy into the next register
-			b		vrenablelk						; Finish setting it all up...				
-
-
-
-;
-;			We get here when we are switching to the same context at the same level and the context
-;			is still live.  Essentially, all we are doing is turning on the faility.  It may have
-;			gotten turned off due to doing a context save for the current level or a context switch
-;			back to the live guy.
-;
-
-			.align	5
-			
-vswthesame:
-
-#if FPVECDBG
-			lis		r0,hi16(CutTrace)				; (TEST/DEBUG)
-			li		r2,0x5F0A						; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)			; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif	
-			beq-	cr1,vrenable					; Not saved yet, nothing to pop, go enable and exit...
-			
-			lwz		r11,SAVlevel(r30)				; Get the level of top saved context
-			lwz		r14,SAVprev+4(r30)				; Get the previous savearea
-			
-			cmplw	r11,r31							; Are live and saved the same?
-
-			bne+	vrenable						; Level not the same, nothing to pop, go enable and exit...
-			
-			mr		r3,r30							; Get the old savearea (we popped it before)
-			stw		r11,VMXsave(r22)				; Pop the vector stack
-			bl		EXT(save_ret)					; Toss it
-			b		vrenable						; Go enable and exit...
-
-
-;
-;			This function invalidates any live vector context for the passed in facility_context.
-;			This is intended to be called just before act_machine_sv_free tosses saveareas.
-;
-
-			.align	5
-			.globl	EXT(toss_live_vec)
-
-LEXT(toss_live_vec)
-			
-			lis		r0,hi16(MASK(MSR_VEC))			; Get VEC
-			mfmsr	r9								; Get the MSR
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Add in FP
-			rlwinm.	r8,r9,0,MSR_VEC_BIT,MSR_VEC_BIT	; Are vectors on right now?
-			andc	r9,r9,r0						; Force off VEC and FP
-			ori		r0,r0,lo16(MASK(MSR_EE))		; Turn off EE
-			andc	r0,r9,r0						; Turn off EE now
-			mtmsr	r0								; No interruptions
-			isync
-			beq+	tlvnotours						; Vector off, can not be live here...
-
-			mfsprg	r8,1							; Get the current activation
-			lwz		r8,ACT_PER_PROC(r8)				; Get the per_proc block
-
-;
-;			Note that at this point, since vecs are on, we are the owner
-;			of live state on this processor
-;
-
-			lwz		r6,VMXowner(r8)					; Get the thread that owns the vector
-			li		r0,0							; Clear this just in case we need it
-			cmplw	r6,r3							; Are we tossing our own context?
-			bne-	tlvnotours						; Nope...
-			
-			vspltish v1,1							; Turn on the non-Java bit and saturate
-			vspltisw v0,1							; Turn on the saturate bit
-			vxor	v1,v1,v0						; Turn off saturate	
-			mtspr	vrsave,r0						; Clear VRSAVE 
-			mtvscr	v1								; Set the non-java, no saturate status
-
-tlvnotours:	lwz		r11,VMXcpu(r3)					; Get the cpu on which we last loaded context
-			lis		r12,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			mulli	r11,r11,ppeSize					; Find offset to the owner per_proc_entry			
-			ori		r12,r12,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r10,VMXowner					; Displacement to vector owner
-			add		r11,r12,r11						; Point to the owner per_proc_entry	
-			lwz		r11,ppe_vaddr(r11)				; Point to the owner per_proc	
-			li		r0,0							; Set a 0 to invalidate context
-			
-tlvinvothr:	lwarx	r12,r10,r11						; Get the owner
-
-			sub		r0,r12,r3						; Subtract one from the other
-			sub		r8,r3,r12						; Subtract the other from the one
-			or		r8,r8,r0						; Combine them
-			srawi	r8,r8,31						; Get a 0 if equal or -1 of not
-			and		r12,r12,r8						; Make 0 if same, unchanged if not
-			stwcx.	r12,r10,r11						; Try to invalidate it
-			bne--	tlvinvothr						; Try again if there was a collision...		
-
-			mtmsr	r9								; Restore interruptions
-			isync									; Could be turning off vectors here
-			blr										; Leave....
-
-#if 0
-;
-;			This function invalidates any live vector context for the passed in facility_context
-;			if the level is current.  It also tosses the corresponding savearea if there is one.
-;			This function is primarily used whenever we detect a VRSave that is all zeros.
-;
-
-			.align	5
-			.globl	EXT(vec_trash)
-
-LEXT(vec_trash)
-			
-			lwz		r12,facAct(r3)					; Get the activation
-			lwz		r11,VMXlevel(r3)				; Get the context level
-			lwz		r10,ACT_MACT_PCB(r12)			; Grab the current level for the thread
-			lwz		r9,VMXsave(r3)					; Get the savearea, if any
-			cmplw	r10,r11							; Are we at the right level?
-			cmplwi	cr1,r9,0						; Remember if there is a savearea
-			bnelr+									; No, we do nothing...			
-			
-			lwz		r11,VMXcpu(r3)					; Get the cpu on which we last loaded context
-			lis		r12,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			mulli	r11,r11,ppeSize					; Find offset to the owner per_proc_entry			
-			ori		r12,r12,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r10,VMXowner					; Displacement to vector owner
-			add		r11,r12,r11						; Point to the owner per_proc_entry	
-			lwz		r11,ppe_vaddr(r11)				; Point to the owner per_proc	
-			
-vtinvothr:	lwarx	r12,r10,r11						; Get the owner
-
-			sub		r0,r12,r3						; Subtract one from the other
-			sub		r8,r3,r12						; Subtract the other from the one
-			or		r8,r8,r0						; Combine them
-			srawi	r8,r8,31						; Get a 0 if equal or -1 of not
-			and		r12,r12,r8						; Make 0 if same, unchanged if not
-			stwcx.	r12,r10,r11						; Try to invalidate it
-			bne--	vtinvothr						; Try again if there was a collision...		
-
-
-			beqlr++	cr1								; Leave if there is no savearea
-			lwz		r8,SAVlevel(r9)					; Get the level of the savearea
-			cmplw	r8,r11							; Savearea for the current level?
-			bnelr++									; No, nothing to release...
-			
-			lwz		r8,SAVprev+4(r9)				; Pick up the previous area
-			mr.		r8,r8							; Is there a previous?
-			beq--	vtnoprev						; Nope...
-			lwz		r7,SAVlevel(r8)					; Get the level associated with save
-
-vtnoprev:	stw		r8,VMXsave(r3)					; Dequeue this savearea
-			stw		r7,VMXlevel(r3)					; Pop the level
-			
-			mr		r3,r9							; Get the savearea to release
-			b		EXT(save_ret)					; Go and toss the save area (note, we will return from there)...
-#endif	
-			
-;
-;			Just some test code to force vector and/or floating point in the kernel
-;			
-
-			.align	5
-			.globl	EXT(fctx_test)
-
-LEXT(fctx_test)
-			
-			mfsprg	r3,1							; Get the current thread
-			mr.		r3,r3							; Are we actually up and running?
-			beqlr-									; No...
-			
-			fmr		f0,f0							; Use floating point
-			mftb	r4								; Get time base for a random number
-			li		r5,1							; Get a potential vrsave to use
-			andi.	r4,r4,0x3F						; Get a number from 0 - 63
-			slw		r5,r5,r4						; Choose a register to save (should be 0 half the time)
-			mtspr	vrsave,r5						; Set VRSave
-			vor		v0,v0,v0						; Use vectors
-			blr
-
-
-// *******************
-// * f p _ s t o r e *
-// *******************
-//
-// Store FPRs into a save area.   Called by fpu_save and fpu_switch.
-//
-// When called:
-//		floating pt is enabled
-//		r3 = ptr to save area
-//
-// We destroy:
-//		r11.
-
-fp_store:
-            mfsprg	r11,2					; get feature flags
-            mtcrf	0x02,r11				; put cache line size bits in cr6
-            la		r11,savefp0(r3)			; point to 1st line
-            dcbz128	0,r11					; establish 1st line no matter what linesize is
-            bt--	pf32Byteb,fp_st32		; skip if a 32-byte machine
-            
-// Store the FPRs on a 128-byte machine.
-			
-			stfd    f0,savefp0(r3)
-			stfd    f1,savefp1(r3)
-			la		r11,savefp16(r3)		; Point to the 2nd cache line
-			stfd    f2,savefp2(r3)
-			stfd    f3,savefp3(r3)
-			dcbz128	0,r11					; establish 2nd line
-			stfd    f4,savefp4(r3)
-			stfd    f5,savefp5(r3)
-			stfd    f6,savefp6(r3)
-			stfd    f7,savefp7(r3)
-			stfd    f8,savefp8(r3)
-			stfd    f9,savefp9(r3)
-			stfd    f10,savefp10(r3)
-			stfd    f11,savefp11(r3)
-			stfd    f12,savefp12(r3)
-			stfd    f13,savefp13(r3)
-			stfd    f14,savefp14(r3)
-			stfd    f15,savefp15(r3)
-			stfd    f16,savefp16(r3)
-			stfd    f17,savefp17(r3)
-			stfd    f18,savefp18(r3)
-			stfd    f19,savefp19(r3)
-			stfd    f20,savefp20(r3)
-			stfd    f21,savefp21(r3)
-			stfd    f22,savefp22(r3)
-			stfd    f23,savefp23(r3)
-			stfd    f24,savefp24(r3)
-			stfd    f25,savefp25(r3)
-			stfd    f26,savefp26(r3)
-			stfd    f27,savefp27(r3)
-			stfd    f28,savefp28(r3)
-			stfd    f29,savefp29(r3)
-			stfd    f30,savefp30(r3)
-			stfd    f31,savefp31(r3)
-            blr
-            
-// Store FPRs on a 32-byte machine.
-
-fp_st32:
-			la		r11,savefp4(r3)				; Point to the 2nd line
-			stfd    f0,savefp0(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f1,savefp1(r3)
-			stfd    f2,savefp2(r3)
-			la		r11,savefp8(r3)				; Point to the 3rd line
-			stfd    f3,savefp3(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f4,savefp4(r3)
-			stfd    f5,savefp5(r3)
-			stfd    f6,savefp6(r3)
-			la		r11,savefp12(r3)			; Point to the 4th line
-			stfd    f7,savefp7(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f8,savefp8(r3)
-			stfd    f9,savefp9(r3)
-			stfd    f10,savefp10(r3)
-			la		r11,savefp16(r3)			; Point to the 5th line
-			stfd    f11,savefp11(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f12,savefp12(r3)
-			stfd    f13,savefp13(r3)
-			stfd    f14,savefp14(r3)
-			la		r11,savefp20(r3)			; Point to the 6th line 
-			stfd    f15,savefp15(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f16,savefp16(r3)
-			stfd    f17,savefp17(r3)
-			stfd    f18,savefp18(r3)
-			la		r11,savefp24(r3)			; Point to the 7th line
-			stfd    f19,savefp19(r3)
-			dcbz	0,r11						; Allocate cache
-			stfd    f20,savefp20(r3)
-
-			stfd    f21,savefp21(r3)
-			stfd    f22,savefp22(r3)
-			la		r11,savefp28(r3)			; Point to the 8th line
-			stfd    f23,savefp23(r3)
-			dcbz	0,r11						; allocate it
-			stfd    f24,savefp24(r3)
-			stfd    f25,savefp25(r3)
-			stfd    f26,savefp26(r3)
-			stfd    f27,savefp27(r3)
-
-			stfd    f28,savefp28(r3)
-			stfd    f29,savefp29(r3)
-			stfd    f30,savefp30(r3)
-			stfd    f31,savefp31(r3)
-            blr
-
-
-// *******************
-// * v r _ s t o r e *
-// *******************
-//
-// Store VRs into savearea, according to bits set in passed vrsave bitfield.  This routine is used
-// both by vec_save and vec_switch.  In order to minimize conditional branches and touching in
-// unnecessary cache blocks, we either save all or none of the VRs in a block.  We have separate paths
-// for each cache block size.
-//
-// When called:
-//		interrupts are off, vectors are enabled
-//		r3 = ptr to save area
-//		r10 = vrsave (not 0)
-//
-// We destroy:
-//		r4 - r11, all CRs.
-
-vr_store:
-            mfsprg	r9,2					; get feature flags
-			stw		r10,savevrvalid(r3)		; Save the validity information in savearea
-			slwi	r8,r10,1				; Shift over 1
-            mtcrf	0x02,r9					; put cache line size bits in cr6 where we can test
-			or		r8,r10,r8				; r8 <- even bits show which pairs are in use
-            bt--	pf32Byteb,vr_st32		; skip if 32-byte cacheline processor
-
-            
-; Save vectors on a 128-byte linesize processor.  We save all or none of the 8 registers in each of
-; the four cache lines.  This minimizes mispredicted branches yet handles cache lines optimally.
-
-            slwi	r7,r8,2					; shift groups-of-2 over by 2
-            li		r4,16					; load offsets for X-form stores
-            or		r8,r7,r8				; show if any in group of 4 are in use
-            li		r5,32
-            slwi	r7,r8,4					; shift groups-of-4 over by 4
-            li		r6,48
-            or		r11,r7,r8				; show if any in group of 8 are in use
-            li		r7,64
-            mtcrf	0x80,r11				; set CRs one at a time (faster)
-            li		r8,80
-            mtcrf	0x20,r11
-            li		r9,96
-            mtcrf	0x08,r11
-            li		r10,112
-            mtcrf	0x02,r11
-            
-            bf		0,vr_st64b				; skip if none of vr0-vr7 are in use
-            la		r11,savevr0(r3)			; get address of this group of registers in save area
-            dcbz128	0,r11					; zero the line
-            stvxl	v0,0,r11				; save 8 VRs in the line
-            stvxl	v1,r4,r11
-            stvxl	v2,r5,r11
-            stvxl	v3,r6,r11
-            stvxl	v4,r7,r11
-            stvxl	v5,r8,r11
-            stvxl	v6,r9,r11
-            stvxl	v7,r10,r11
-            
-vr_st64b:
-            bf		8,vr_st64c				; skip if none of vr8-vr15 are in use
-            la		r11,savevr8(r3)			; get address of this group of registers in save area
-            dcbz128	0,r11					; zero the line
-            stvxl	v8,0,r11				; save 8 VRs in the line
-            stvxl	v9,r4,r11
-            stvxl	v10,r5,r11
-            stvxl	v11,r6,r11
-            stvxl	v12,r7,r11
-            stvxl	v13,r8,r11
-            stvxl	v14,r9,r11
-            stvxl	v15,r10,r11
-
-vr_st64c:
-            bf		16,vr_st64d				; skip if none of vr16-vr23 are in use
-            la		r11,savevr16(r3)		; get address of this group of registers in save area
-            dcbz128	0,r11					; zero the line
-            stvxl	v16,0,r11				; save 8 VRs in the line
-            stvxl	v17,r4,r11
-            stvxl	v18,r5,r11
-            stvxl	v19,r6,r11
-            stvxl	v20,r7,r11
-            stvxl	v21,r8,r11
-            stvxl	v22,r9,r11
-            stvxl	v23,r10,r11
-
-vr_st64d:
-            bflr	24						; done if none of vr24-vr31 are in use
-            la		r11,savevr24(r3)		; get address of this group of registers in save area
-            dcbz128	0,r11					; zero the line
-            stvxl	v24,0,r11				; save 8 VRs in the line
-            stvxl	v25,r4,r11
-            stvxl	v26,r5,r11
-            stvxl	v27,r6,r11
-            stvxl	v28,r7,r11
-            stvxl	v29,r8,r11
-            stvxl	v30,r9,r11
-            stvxl	v31,r10,r11
-            blr            
-            
-; Save vectors on a 32-byte linesize processor.  We save in 16 groups of 2: we either save both
-; or neither in each group.  This cuts down on conditional branches.
-;			 r8 = bitmask with bit n set (for even n) if either of that pair of VRs is in use
-;		     r3 = savearea
-
-vr_st32:
-            mtcrf	0xFF,r8					; set CR bits so we can branch on them
-            li		r4,16					; load offset for X-form stores
-
-            bf		0,vr_st32b				; skip if neither VR in this pair is in use
-            la		r11,savevr0(r3)			; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v0,0,r11				; save the two VRs in the line
-            stvxl	v1,r4,r11
-
-vr_st32b:
-            bf		2,vr_st32c				; skip if neither VR in this pair is in use
-            la		r11,savevr2(r3)			; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v2,0,r11				; save the two VRs in the line
-            stvxl	v3,r4,r11
-
-vr_st32c:
-            bf		4,vr_st32d				; skip if neither VR in this pair is in use
-            la		r11,savevr4(r3)			; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v4,0,r11				; save the two VRs in the line
-            stvxl	v5,r4,r11
-
-vr_st32d:
-            bf		6,vr_st32e				; skip if neither VR in this pair is in use
-            la		r11,savevr6(r3)			; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v6,0,r11				; save the two VRs in the line
-            stvxl	v7,r4,r11
-
-vr_st32e:
-            bf		8,vr_st32f				; skip if neither VR in this pair is in use
-            la		r11,savevr8(r3)			; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v8,0,r11				; save the two VRs in the line
-            stvxl	v9,r4,r11
-
-vr_st32f:
-            bf		10,vr_st32g				; skip if neither VR in this pair is in use
-            la		r11,savevr10(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v10,0,r11				; save the two VRs in the line
-            stvxl	v11,r4,r11
-
-vr_st32g:
-            bf		12,vr_st32h				; skip if neither VR in this pair is in use
-            la		r11,savevr12(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v12,0,r11				; save the two VRs in the line
-            stvxl	v13,r4,r11
-
-vr_st32h:
-            bf		14,vr_st32i				; skip if neither VR in this pair is in use
-            la		r11,savevr14(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v14,0,r11				; save the two VRs in the line
-            stvxl	v15,r4,r11
-
-vr_st32i:
-            bf		16,vr_st32j				; skip if neither VR in this pair is in use
-            la		r11,savevr16(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v16,0,r11				; save the two VRs in the line
-            stvxl	v17,r4,r11
-
-vr_st32j:
-            bf		18,vr_st32k				; skip if neither VR in this pair is in use
-            la		r11,savevr18(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v18,0,r11				; save the two VRs in the line
-            stvxl	v19,r4,r11
-
-vr_st32k:
-            bf		20,vr_st32l				; skip if neither VR in this pair is in use
-            la		r11,savevr20(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v20,0,r11				; save the two VRs in the line
-            stvxl	v21,r4,r11
-
-vr_st32l:
-            bf		22,vr_st32m				; skip if neither VR in this pair is in use
-            la		r11,savevr22(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v22,0,r11				; save the two VRs in the line
-            stvxl	v23,r4,r11
-
-vr_st32m:
-            bf		24,vr_st32n				; skip if neither VR in this pair is in use
-            la		r11,savevr24(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v24,0,r11				; save the two VRs in the line
-            stvxl	v25,r4,r11
-
-vr_st32n:
-            bf		26,vr_st32o				; skip if neither VR in this pair is in use
-            la		r11,savevr26(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v26,0,r11				; save the two VRs in the line
-            stvxl	v27,r4,r11
-
-vr_st32o:
-            bf		28,vr_st32p				; skip if neither VR in this pair is in use
-            la		r11,savevr28(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v28,0,r11				; save the two VRs in the line
-            stvxl	v29,r4,r11
-
-vr_st32p:
-            bflr	30						; done if neither VR in this pair is in use
-            la		r11,savevr30(r3)		; get address of this group of registers in save area
-            dcba	0,r11					; establish the line wo reading it
-            stvxl	v30,0,r11				; save the two VRs in the line
-            stvxl	v31,r4,r11
-            blr
-
-
-// *****************
-// * v r _ l o a d *
-// *****************
-//
-// Load live VRs from a savearea, according to bits set in a passed vector.  This is the reverse
-// of "vr_store".  Like it, we avoid touching unnecessary cache blocks and minimize conditional
-// branches by loading all VRs from a cache line, if we have to load any.  If we don't load the VRs
-// in a cache line, we bug them.  Note that this behavior is slightly different from earlier kernels,
-// which would bug all VRs that aren't live.
-//
-// When called:
-//		interrupts are off, vectors are enabled
-//		r3 = ptr to save area
-//		r10 = vector of live regs to load (ie, savevrsave & savevrvalid, may be 0)
-//		v31 = bugbug constant (0x7FFFDEAD7FFFDEAD7FFFDEAD7FFFDEAD)
-//
-// We destroy:
-//		r4 - r11, all CRs.
-
-vr_load:
-            mfsprg	r9,2					; get feature flags
-            li		r6,1					; assuming 32-byte, get (#VRs)-1 in a cacheline
-            mtcrf	0x02,r9					; set cache line size bits in cr6
-            lis		r7,0xC000				; assuming 32-byte, set bits 0-1
-            bt--	pf32Byteb,vr_ld0		; skip if 32-bit processor
-            li		r6,7					; 128-byte machines have 8 VRs in a cacheline
-            lis		r7,0xFF00				; so set bits 0-7
-            
-// Loop touching in cache blocks we will load from.
-//		r3 = savearea ptr
-//		r5 = we light bits for the VRs we will be loading
-//		r6 = 1 if 32-byte, 7 if 128-byte
-//		r7 = 0xC0000000 if 32-byte, 0xFF000000 if 128-byte
-//		r10 = live VR bits
-//		v31 = bugbug constant
-
-vr_ld0:
-            li		r5,0					; initialize set of VRs to load
-            la		r11,savevr0(r3)			; get address of register file
-            b		vr_ld2					; enter loop in middle
-            
-            .align	5
-vr_ld1:										; loop over each cache line we will load
-            dcbt	r4,r11					; start prefetch of the line
-            andc	r10,r10,r9				; turn off the bits in this line
-            or		r5,r5,r9				; we will load all these
-vr_ld2:										; initial entry pt
-            cntlzw	r4,r10					; get offset to next live VR
-            andc	r4,r4,r6				; cacheline align it
-            srw.	r9,r7,r4				; position bits for VRs in that cache line
-            slwi	r4,r4,4					; get byte offset within register file to that line
-            bne		vr_ld1					; loop if more bits in r10
-            
-            bf--	pf128Byteb,vr_ld32		; skip if not 128-byte lines
-
-// Handle a processor with 128-byte cache lines.  Four groups of 8 VRs.
-//		r3 = savearea ptr
-//		r5 = 1st bit in each cacheline is 1 iff any reg in that line must be loaded
-//		r11 = addr(savevr0)
-//		v31 = bugbug constant
-
-            mtcrf	0x80,r5					; set up bits for conditional branches
-            li		r4,16					; load offsets for X-form stores
-            li		r6,48
-            mtcrf	0x20,r5					; load CRs ona at a time, which is faster
-            li		r7,64
-            li		r8,80
-            mtcrf	0x08,r5
-            li		r9,96
-            li		r10,112
-            mtcrf	0x02,r5
-            li		r5,32
-            
-            bt		0,vr_ld128a				; skip if this line must be loaded
-            vor		v0,v31,v31				; no VR must be loaded, so bug them all
-            vor		v1,v31,v31
-            vor		v2,v31,v31
-            vor		v3,v31,v31
-            vor		v4,v31,v31
-            vor		v5,v31,v31
-            vor		v6,v31,v31
-            vor		v7,v31,v31
-            b		vr_ld128b
-vr_ld128a:									; must load from this line
-            lvxl	v0,0,r11
-            lvxl	v1,r4,r11
-            lvxl	v2,r5,r11
-            lvxl	v3,r6,r11
-            lvxl	v4,r7,r11
-            lvxl	v5,r8,r11
-            lvxl	v6,r9,r11
-            lvxl	v7,r10,r11
-            
-vr_ld128b:   								; here to handle next cache line         
-            la		r11,savevr8(r3)			; load offset to it
-            bt		8,vr_ld128c				; skip if this line must be loaded
-            vor		v8,v31,v31				; no VR must be loaded, so bug them all
-            vor		v9,v31,v31
-            vor		v10,v31,v31
-            vor		v11,v31,v31
-            vor		v12,v31,v31
-            vor		v13,v31,v31
-            vor		v14,v31,v31
-            vor		v15,v31,v31
-            b		vr_ld128d
-vr_ld128c:									; must load from this line
-            lvxl	v8,0,r11
-            lvxl	v9,r4,r11
-            lvxl	v10,r5,r11
-            lvxl	v11,r6,r11
-            lvxl	v12,r7,r11
-            lvxl	v13,r8,r11
-            lvxl	v14,r9,r11
-            lvxl	v15,r10,r11
-            
-vr_ld128d:   								; here to handle next cache line         
-            la		r11,savevr16(r3)		; load offset to it
-            bt		16,vr_ld128e			; skip if this line must be loaded
-            vor		v16,v31,v31				; no VR must be loaded, so bug them all
-            vor		v17,v31,v31
-            vor		v18,v31,v31
-            vor		v19,v31,v31
-            vor		v20,v31,v31
-            vor		v21,v31,v31
-            vor		v22,v31,v31
-            vor		v23,v31,v31
-            b		vr_ld128f
-vr_ld128e:									; must load from this line
-            lvxl	v16,0,r11
-            lvxl	v17,r4,r11
-            lvxl	v18,r5,r11
-            lvxl	v19,r6,r11
-            lvxl	v20,r7,r11
-            lvxl	v21,r8,r11
-            lvxl	v22,r9,r11
-            lvxl	v23,r10,r11
-            
-vr_ld128f:   								; here to handle next cache line         
-            la		r11,savevr24(r3)		; load offset to it
-            bt		24,vr_ld128g			; skip if this line must be loaded
-            vor		v24,v31,v31				; no VR must be loaded, so bug them all
-            vor		v25,v31,v31
-            vor		v26,v31,v31
-            vor		v27,v31,v31
-            vor		v28,v31,v31
-            vor		v29,v31,v31
-            vor		v30,v31,v31
-            blr
-vr_ld128g:									; must load from this line
-            lvxl	v24,0,r11
-            lvxl	v25,r4,r11
-            lvxl	v26,r5,r11
-            lvxl	v27,r6,r11
-            lvxl	v28,r7,r11
-            lvxl	v29,r8,r11
-            lvxl	v30,r9,r11
-            lvxl	v31,r10,r11
-            blr
-            
-// Handle a processor with 32-byte cache lines.  Sixteen groups of two VRs.
-//		r5 = 1st bit in each cacheline is 1 iff any reg in that line must be loaded
-//		r11 = addr(savevr0)
-
-vr_ld32:
-            mtcrf	0xFF,r5					; set up bits for conditional branches
-            li		r4,16					; load offset for X-form stores
-            
-            bt		0,vr_ld32load0			; skip if we must load this line
-            vor		v0,v31,v31				; neither VR is live, so bug them both
-            vor		v1,v31,v31
-            b		vr_ld32test2
-vr_ld32load0:								; must load VRs in this line
-            lvxl	v0,0,r11
-            lvxl	v1,r4,r11
-            
-vr_ld32test2:								; here to handle next cache line
-            la		r11,savevr2(r3)			; get offset to next cache line
-            bt		2,vr_ld32load2			; skip if we must load this line
-            vor		v2,v31,v31				; neither VR is live, so bug them both
-            vor		v3,v31,v31
-            b		vr_ld32test4
-vr_ld32load2:								; must load VRs in this line
-            lvxl	v2,0,r11
-            lvxl	v3,r4,r11
-            
-vr_ld32test4:								; here to handle next cache line
-            la		r11,savevr4(r3)			; get offset to next cache line
-            bt		4,vr_ld32load4			; skip if we must load this line
-            vor		v4,v31,v31				; neither VR is live, so bug them both
-            vor		v5,v31,v31
-            b		vr_ld32test6
-vr_ld32load4:								; must load VRs in this line
-            lvxl	v4,0,r11
-            lvxl	v5,r4,r11
-            
-vr_ld32test6:								; here to handle next cache line
-            la		r11,savevr6(r3)			; get offset to next cache line
-            bt		6,vr_ld32load6			; skip if we must load this line
-            vor		v6,v31,v31				; neither VR is live, so bug them both
-            vor		v7,v31,v31
-            b		vr_ld32test8
-vr_ld32load6:								; must load VRs in this line
-            lvxl	v6,0,r11
-            lvxl	v7,r4,r11
-            
-vr_ld32test8:								; here to handle next cache line
-            la		r11,savevr8(r3)			; get offset to next cache line
-            bt		8,vr_ld32load8			; skip if we must load this line
-            vor		v8,v31,v31				; neither VR is live, so bug them both
-            vor		v9,v31,v31
-            b		vr_ld32test10
-vr_ld32load8:								; must load VRs in this line
-            lvxl	v8,0,r11
-            lvxl	v9,r4,r11
-            
-vr_ld32test10:								; here to handle next cache line
-            la		r11,savevr10(r3)		; get offset to next cache line
-            bt		10,vr_ld32load10		; skip if we must load this line
-            vor		v10,v31,v31				; neither VR is live, so bug them both
-            vor		v11,v31,v31
-            b		vr_ld32test12
-vr_ld32load10:								; must load VRs in this line
-            lvxl	v10,0,r11
-            lvxl	v11,r4,r11
-            
-vr_ld32test12:								; here to handle next cache line
-            la		r11,savevr12(r3)		; get offset to next cache line
-            bt		12,vr_ld32load12		; skip if we must load this line
-            vor		v12,v31,v31				; neither VR is live, so bug them both
-            vor		v13,v31,v31
-            b		vr_ld32test14
-vr_ld32load12:								; must load VRs in this line
-            lvxl	v12,0,r11
-            lvxl	v13,r4,r11
-            
-vr_ld32test14:								; here to handle next cache line
-            la		r11,savevr14(r3)		; get offset to next cache line
-            bt		14,vr_ld32load14		; skip if we must load this line
-            vor		v14,v31,v31				; neither VR is live, so bug them both
-            vor		v15,v31,v31
-            b		vr_ld32test16
-vr_ld32load14:								; must load VRs in this line
-            lvxl	v14,0,r11
-            lvxl	v15,r4,r11
-            
-vr_ld32test16:								; here to handle next cache line
-            la		r11,savevr16(r3)		; get offset to next cache line
-            bt		16,vr_ld32load16		; skip if we must load this line
-            vor		v16,v31,v31				; neither VR is live, so bug them both
-            vor		v17,v31,v31
-            b		vr_ld32test18
-vr_ld32load16:								; must load VRs in this line
-            lvxl	v16,0,r11
-            lvxl	v17,r4,r11
-            
-vr_ld32test18:								; here to handle next cache line
-            la		r11,savevr18(r3)		; get offset to next cache line
-            bt		18,vr_ld32load18		; skip if we must load this line
-            vor		v18,v31,v31				; neither VR is live, so bug them both
-            vor		v19,v31,v31
-            b		vr_ld32test20
-vr_ld32load18:								; must load VRs in this line
-            lvxl	v18,0,r11
-            lvxl	v19,r4,r11
-            
-vr_ld32test20:								; here to handle next cache line
-            la		r11,savevr20(r3)		; get offset to next cache line
-            bt		20,vr_ld32load20		; skip if we must load this line
-            vor		v20,v31,v31				; neither VR is live, so bug them both
-            vor		v21,v31,v31
-            b		vr_ld32test22
-vr_ld32load20:								; must load VRs in this line
-            lvxl	v20,0,r11
-            lvxl	v21,r4,r11
-            
-vr_ld32test22:								; here to handle next cache line
-            la		r11,savevr22(r3)		; get offset to next cache line
-            bt		22,vr_ld32load22		; skip if we must load this line
-            vor		v22,v31,v31				; neither VR is live, so bug them both
-            vor		v23,v31,v31
-            b		vr_ld32test24
-vr_ld32load22:								; must load VRs in this line
-            lvxl	v22,0,r11
-            lvxl	v23,r4,r11
-            
-vr_ld32test24:								; here to handle next cache line
-            la		r11,savevr24(r3)		; get offset to next cache line
-            bt		24,vr_ld32load24		; skip if we must load this line
-            vor		v24,v31,v31				; neither VR is live, so bug them both
-            vor		v25,v31,v31
-            b		vr_ld32test26
-vr_ld32load24:								; must load VRs in this line
-            lvxl	v24,0,r11
-            lvxl	v25,r4,r11
-            
-vr_ld32test26:								; here to handle next cache line
-            la		r11,savevr26(r3)		; get offset to next cache line
-            bt		26,vr_ld32load26		; skip if we must load this line
-            vor		v26,v31,v31				; neither VR is live, so bug them both
-            vor		v27,v31,v31
-            b		vr_ld32test28
-vr_ld32load26:								; must load VRs in this line
-            lvxl	v26,0,r11
-            lvxl	v27,r4,r11
-            
-vr_ld32test28:								; here to handle next cache line
-            la		r11,savevr28(r3)		; get offset to next cache line
-            bt		28,vr_ld32load28		; skip if we must load this line
-            vor		v28,v31,v31				; neither VR is live, so bug them both
-            vor		v29,v31,v31
-            b		vr_ld32test30
-vr_ld32load28:								; must load VRs in this line
-            lvxl	v28,0,r11
-            lvxl	v29,r4,r11
-            
-vr_ld32test30:								; here to handle next cache line
-            la		r11,savevr30(r3)		; get offset to next cache line
-            bt		30,vr_ld32load30		; skip if we must load this line
-            vor		v30,v31,v31				; neither VR is live, so bug them both
-            blr
-vr_ld32load30:								; must load VRs in this line
-            lvxl	v30,0,r11
-            lvxl	v31,r4,r11
-            blr
diff --git a/osfmk/ppc/db_asm.s b/osfmk/ppc/db_asm.s
deleted file mode 100644
index 626fa1822..000000000
--- a/osfmk/ppc/db_asm.s
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <mach/ppc/vm_param.h>
-#include <assym.s>
-
-
-/* void
- * db_phys_cmp(src_a, src_b, bytecount)
- *      vm_offset_t     src_a;
- *      vm_offset_t     src_b;
- *      int             bytecount
- *
- * This routine will compare bytecount bytes from physical address src_a and physical
- * address src_b. 
- */
-
-#warning THIS IS BROKEN FOR 64-BIT
-
-	/* Switch off data translations */
-	lis		r7,hi16(MASK(MSR_VEC))
-	ori		r7,r7,lo16(MASK(MSR_FP))
-	mfmsr	r6
-	andc	r6,r6,r7			; Force FP and vec off
-	ori		r7,r7,lo16(MASK(MSR_DR))	; Set the DR bit
-	andc	r7,r6,r7			; Force DR off
-	mtmsr	r7
-	isync			/* Ensure data translations are off */
-
-	subi	r3,	r3,	4
-	subi	r4,	r4,	4
-
-	cmpwi	r5,	3
-	ble-	.L_db_phys_cmp_bytes
-.L_db_phys_cmp_loop:
-	lwz	r0,	4(r3)
-	lwz	r7,	4(r4)
-	addi	r3,	r3,	4
-	addi	r4,	r4,	4
-	subi	r5,	r5,	4
-	cmpw	r0,	r7
-	bne	.L_db_phys_cmp_false
-	cmpwi	r5,	3
-	bgt+	.L_db_phys_cmp_loop
-
-	/* If no leftover bytes, we're done now */
-	cmpwi	r5,	0
-	beq+	.L_db_phys_cmp_true
-	
-.L_db_phys_cmp_bytes:
-	addi	r3,	r3,	3
-	addi	r4,	r4,	3
-.L_db_phys_cmp_byte_loop:	
-	lbz	r0,	1(r3)
-	lbz	r7,	1(r4)
-	addi	r3,	r3,	1
-	addi	r4,	r4,	1
-	subi	r5,	r5,	1
-	cmpw	r0,	r7
-	bne	.L_db_phys_cmp_false
-	cmpwi	r5,	0
-	bne+	.L_db_phys_cmp_loop
-
-.L_db_phys_cmp_true:
-	li	r3,	1
-	b	.L_db_phys_cmp_done
-
-.L_db_phys_cmp_false:
-	li	r3,	0
-
-.L_db_phys_cmp_done:
-	mtmsr	r6		/* Restore original translations */
-	isync			/* Ensure data translations are off */
-
-	blr
-
diff --git a/osfmk/ppc/db_disasm.c b/osfmk/ppc/db_disasm.c
deleted file mode 100644
index 6410471ef..000000000
--- a/osfmk/ppc/db_disasm.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- * Instruction disassembler.
- */
-
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_output.h>
-
-#include <kern/task.h>
-#include <kern/misc_protos.h>
-
-#include "ppc_disasm.h"
-
-db_addr_t	db_disasm_pc, db_disasm_symaddr;
-boolean_t	db_disasm_print_symaddr;
-
-/*
- * Disassemble instruction at 'loc'.  'altfmt' specifies an
- * (optional) alternate format.  Return address of start of
- * next instruction.
- */
-db_addr_t
-db_disasm(db_addr_t loc, __unused boolean_t altfmt, task_t task)
-{
-	int inst;
-	char *p;
-
-	inst = db_get_task_value(loc, 4, FALSE, task);
-	db_disasm_pc = loc;
-	db_disasm_print_symaddr = FALSE;
-	p = in(inst);
-	db_printf("%s", p);
-	if (db_disasm_print_symaddr) {
-		db_printf(" <");
-		db_task_printsym(db_disasm_symaddr, DB_STGY_ANY, task);
-		db_printf(">");
-	}
-	db_printf("\n");		/* Make sure we have a new line for multiline displays */
-	dis_done();
-	return (loc+4);
-}
-
-/*
- * Given four bytes of instruction (stored as an int, not an
- * array of characters), compute if the instruction reads
- * memory.
- */
-int
-db_inst_load(__unused unsigned long insw)
-{
-#if 1
-	db_printf("db_inst_load: coming soon in a debugger near you!\n");
-	return 0;
-#else
-	unsigned char insb, bits;
-
-	insb = insw & 0xff;
-	insw >>= 8;
-	bits = db_ldstrtab[insb];
-	if (!(bits & DBLS_LOAD))
-		return (0);
-	while (1) {
-		switch (bits & DBLS_MODS) {
-		case 0:
-			return (1);	
-		case DBLS_MODRM:
-			insb = insw & 0xff;
-			return ((insb & 0xc0) != 0xc0);
-		case DBLS_SECOND|DBLS_MODRM:
-			insb = insw & 0xff;
-			return ((insb & 0xc0) != 0xc0 ? 2 : 0);
-		case DBLS_SECOND:
-			return (2);
-		case DBLS_ESCAPE:
-			insb = insw & 0xff;
-			insw >>= 8;
-			bits = db_ldstrtab0f[insb];
-			break;
-		case DBLS_SWREG:
-			return (db_inst_swreg(TRUE, insw, insb));
-		default:
-			panic ("db_inst_load: unknown mod bits");
-		}
-	}
-#endif
-}
-
-/*
- * Given four bytes of instruction (stored as an int, not an
- * array of characters), compute if the instruction writes
- * memory.
- */
-int
-db_inst_store(__unused unsigned long insw)
-{
-#if 1
-	db_printf("db_inst_store: coming soon in a debugger near you!\n");
-	return 0;
-#else
-	unsigned char insb, bits;
-
-	insb = insw & 0xff;
-	insw >>= 8;
-	bits = db_ldstrtab[insb];
-	if (!(bits & DBLS_STORE))
-		return (0);
-	while (1) {
-		switch (bits & DBLS_MODS) {
-		case 0:
-			return (1);	
-		case DBLS_MODRM:
-			insb = insw & 0xff;
-			return ((insb & 0xc0) != 0xc0);
-		case DBLS_SECOND|DBLS_MODRM:
-			insb = insw & 0xff;
-			return ((insb & 0xc0) != 0xc0 ? 2 : 0);
-		case DBLS_SECOND:
-			return (2);
-		case DBLS_ESCAPE:
-			insb = insw & 0xff;
-			insw >>= 8;
-			bits = db_ldstrtab0f[insb];
-			break;
-		case DBLS_SWREG:
-			return (db_inst_swreg(FALSE, insw, insb));
-		default:
-			panic ("db_inst_store: unknown mod bits");
-		}
-	}
-#endif
-}
-
-/*
- * Extra routines for the automatically generated disassembler
- */
-char *
-hex(
-	bits n)
-{
-	char *p;
-
-	if (n < 10)
-		return dec(n);
-	p = dis_alloc(11);
-	sprintf(p, "0x%lx", n);
-	return p;
-}
-
-char *
-dec(
-	bits n)
-{
-	char *p = dis_alloc(11);
-	sprintf(p, "%lu", n);
-	return p;
-}
-
-char *
-brdispl(
-	bits displ,
-	bits nbits)
-{
-	int sign, extended;
-
-	sign = 1 << (nbits - 1);
-	extended = (displ & sign ? displ - (sign << 1) : displ);
-	db_disasm_symaddr = db_disasm_pc + (extended << 2);
-	db_disasm_print_symaddr = TRUE;
-	return hex(extended << 2);
-}
-
-char *
-mbz(bits n)
-{
-	return n ? "[reserved bits not zero]" : "";
-}
-
-size_t db_disasm_string_size = 0;
-#define DB_DISASM_STRING_MAXSIZE	4096
-char db_disasm_string[DB_DISASM_STRING_MAXSIZE];
-
-void *db_disasm_malloc(size_t size);	/* forward */
-void *
-db_disasm_malloc(
-	size_t size)
-{
-	void * new_buf;
-
-	if (db_disasm_string_size + size <= DB_DISASM_STRING_MAXSIZE) {
-		new_buf = (void *) (db_disasm_string + db_disasm_string_size);
-		db_disasm_string_size += size;
-		return new_buf;
-	}
-	db_printf("db_disasm_malloc(size=%d) failed: %d left !\n",
-		  size,
-		  DB_DISASM_STRING_MAXSIZE - db_disasm_string_size);
-	return (void *) 0;
-}
diff --git a/osfmk/ppc/db_interface.c b/osfmk/ppc/db_interface.c
deleted file mode 100644
index 3109d1b5e..000000000
--- a/osfmk/ppc/db_interface.c
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <platforms.h>
-#include <time_stamp.h>
-#include <mach_mp_debug.h>
-#include <mach_ldebug.h>
-#include <db_machine_commands.h>
-
-#include <kern/spl.h>
-#include <kern/cpu_number.h>
-#include <kern/kern_types.h>
-#include <kern/misc_protos.h>
-#include <vm/pmap.h>
-
-#include <ppc/mem.h>
-#include <ppc/db_machdep.h>
-#include <ppc/trap.h>
-#include <ppc/setjmp.h>
-#include <ppc/pmap.h>
-#include <ppc/misc_protos.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/db_machdep.h>
-#include <ppc/mappings.h>
-#include <ppc/Firmware.h>
-#include <ppc/serial_io.h> /* for switch_to_serial_console */
-
-#include <mach/vm_param.h>
-#include <mach/machine/vm_types.h>
-#include <vm/vm_map.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <kern/debug.h>
-#include <kern/machine.h> /* for halt_all_cpus() */
-#include <pexpert/pexpert.h>
-#include <IOKit/IOPlatformExpert.h>
-
-#include <ddb/db_command.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_run.h>
-#include <ddb/db_trap.h>
-#include <ddb/db_output.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_break.h>
-#include <ddb/db_watch.h>
-
-struct	 savearea *ppc_last_saved_statep;
-struct	 savearea ppc_nested_saved_state;
-unsigned ppc_last_kdb_sp;
-db_regs_t	ddb_regs;	/* register state */
-
-extern int debugger_cpu;				/* Current cpu running debugger	*/
-
-int		db_all_set_up = 0;
-
-
-#if !MACH_KDP
-void kdp_register_send_receive(void);
-#endif
-
-/*
- *	Enter KDB through a keyboard trap.
- *	We show the registers as of the keyboard interrupt
- *	instead of those at its call to KDB.
- */
-struct int_regs {
-	/* XXX more registers ? */
-	struct ppc_interrupt_state *is;
-};
-
-extern int	TRAP_TYPES;
-
-/*
- * Code used to synchronize kdb among all cpus, one active at a time, switch
- * from on to another using kdb_on! #cpu or cpu #cpu
- */
-
-decl_simple_lock_data(, kdb_lock)	/* kdb lock			*/
-
-#define	db_simple_lock_init(l, e)	hw_lock_init(&((l)->interlock))
-#define	db_simple_lock_try(l)		hw_lock_try(&((l)->interlock))
-#define	db_simple_unlock(l)		hw_lock_unlock(&((l)->interlock))
-
-extern volatile unsigned int	cpus_holding_bkpts;	/* counter for number of cpus holding
-						   breakpoints (ie: cpus that did not
-						   insert back breakpoints) */
-extern boolean_t	db_breakpoints_inserted;
-
-/* Forward */
-
-extern void	kdbprinttrap(
-			int			type,
-			int			code,
-			int			*pc,
-			int			sp);
-extern void	db_write_bytes_user_space(
-			vm_offset_t		addr,
-			int			size,
-			char			*data,
-			task_t			task);
-extern int	db_search_null(
-			task_t			task,
-			unsigned		*svaddr,
-			unsigned		evaddr,
-			unsigned		*skaddr,
-			int			flag);
-extern int	kdb_enter(int);
-extern void	kdb_leave(void);
-extern void	lock_kdb(void);
-extern void	unlock_kdb(void);
-
-#if DB_MACHINE_COMMANDS
-struct db_command	ppc_db_commands[] = {
-	{ "lt",		db_low_trace,	CS_MORE|CS_SET_DOT,	0 },
-	{ (char *)0, 	0,		0,			0 }
-};
-#endif /* DB_MACHINE_COMMANDS */
-
-#if !MACH_KDP
-void kdp_register_send_receive(void) {}
-#endif
-
-extern jmp_buf_t *db_recover;
-
-/*
- *  kdb_trap - field a TRACE or BPT trap
- */
-void
-kdb_trap(
-	int			type,
-	struct savearea *regs)
-{
-	boolean_t	trap_from_user;
-	int			previous_console_device;
-	int			code=0;
-
-	previous_console_device=switch_to_serial_console();
-
-	switch (type) {
-	    case T_TRACE:	/* single_step */
-	    case T_PROGRAM:	/* breakpoint */
-#if 0
-	    case T_WATCHPOINT:	/* watchpoint */
-#endif
-	    case -1:	/* keyboard interrupt */
-		break;
-
-	    default:
-		if (db_recover) {
-		    ppc_nested_saved_state = *regs;
-		    db_printf("Caught ");
-		    if (type > TRAP_TYPES)
-			db_printf("type %d", type);
-		    else
-			db_printf("%s", trap_type[type]);
-		    db_printf(" trap, pc = %llx\n",
-			      regs->save_srr0);
-		    db_error("");
-		    /*NOTREACHED*/
-		}
-		kdbprinttrap(type, code, (int *)&regs->save_srr0, regs->save_r1);
-	}
-
-	getPerProc()->db_saved_state = regs;
-
-	ppc_last_saved_statep = regs;
-	ppc_last_kdb_sp = (unsigned) &type;
-
-	if (!IS_USER_TRAP(regs)) {
-		bzero((char *)&ddb_regs, sizeof (ddb_regs));
-		ddb_regs = *regs;
-		trap_from_user = FALSE;	
-
-	}
-	else {
-		ddb_regs = *regs;
-		trap_from_user = TRUE;
-	}
-
-	db_task_trap(type, code, trap_from_user);
-
-	*regs = ddb_regs;
-
-	if ((type == T_PROGRAM) &&
-	    (db_get_task_value(regs->save_srr0,
-			       BKPT_SIZE,
-			       FALSE,
-			       db_target_space(current_thread(),
-					       trap_from_user))
-	                      == BKPT_INST))
-	    regs->save_srr0 += BKPT_SIZE;
-
-	getPerProc()->db_saved_state = 0;
-	switch_to_old_console(previous_console_device);
-
-}
-
-
-/*
- * Print trap reason.
- */
-
-void
-kdbprinttrap(
-	int	type,
-	int	code,
-	int	*pc,
-	int	sp)
-{
-	printf("kernel: ");
-	if (type > TRAP_TYPES)
-	    db_printf("type %d", type);
-	else
-	    db_printf("%s", trap_type[type]);
-	db_printf(" trap, code=%x pc@%x = %x sp=%x\n",
-		  code, pc, *(int *)pc, sp);
-	db_run_mode = STEP_CONTINUE;
-}
-
-/*
- *
- */
-static addr64_t
-db_vtophys(pmap_t pmap, vm_offset_t va)
-{
-	ppnum_t pp;
-	addr64_t pa;
-
-	pp = pmap_find_phys(pmap, (addr64_t)va);
-
-	if (pp == 0) return(0);					/* Couldn't find it */
-	
-	pa = ((addr64_t)pp << 12) | (addr64_t)(va & 0xFFF);	/* Get physical address */
-
-	return(pa);
-}
-
-/*
- * Read bytes from task address space for debugger.
- */
-void
-db_read_bytes(
-	vm_offset_t	addr,
-	int		size,
-	char		*data,
-	task_t		task)
-{
-	int		n,max;
-	addr64_t	phys_dst;
-	addr64_t 	phys_src;
-	pmap_t	pmap;
-	
-	while (size > 0) {
-		if (task != NULL)
-			pmap = task->map->pmap;
-		else
-			pmap = kernel_pmap;
-
-		phys_src = db_vtophys(pmap, (vm_offset_t)addr);  
-		if (phys_src == 0) {
-			db_printf("\nno memory is assigned to src address %08x\n",
-				  addr);
-			db_error(0);
-			/* NOTREACHED */
-		}
-
-		phys_dst = db_vtophys(kernel_pmap, (vm_offset_t)data); 
-		if (phys_dst == 0) {
-			db_printf("\nno memory is assigned to dst address %08x\n",
-				  data);
-			db_error(0);
-			/* NOTREACHED */
-		}
-		
-		/* don't over-run any page boundaries - check src range */
-		max = round_page_64(phys_src + 1) - phys_src;
-		if (max > size)
-			max = size;
-		/* Check destination won't run over boundary either */
-		n = round_page_64(phys_dst + 1) - phys_dst;
-		
-		if (n < max) max = n;
-		size -= max;
-		addr += max;
-		phys_copy(phys_src, phys_dst, max);
-
-		/* resync I+D caches */
-		sync_cache64(phys_dst, max);
-
-		phys_src += max;
-		phys_dst += max;
-	}
-}
-
-/*
- * Write bytes to task address space for debugger.
- */
-void
-db_write_bytes(
-	vm_offset_t	addr,
-	int		size,
-	char		*data,
-	task_t		task)
-{
-	int		n,max;
-	addr64_t	phys_dst;
-	addr64_t 	phys_src;
-	pmap_t	pmap;
-	
-	while (size > 0) {
-
-		phys_src = db_vtophys(kernel_pmap, (vm_offset_t)data); 
-		if (phys_src == 0) {
-			db_printf("\nno memory is assigned to src address %08x\n",
-				  data);
-			db_error(0);
-			/* NOTREACHED */
-		}
-		
-		/* space stays as kernel space unless in another task */
-		if (task == NULL) pmap = kernel_pmap;
-		else pmap = task->map->pmap;
-
-		phys_dst = db_vtophys(pmap, (vm_offset_t)addr);  
-		if (phys_dst == 0) {
-			db_printf("\nno memory is assigned to dst address %08x\n",
-				  addr);
-			db_error(0);
-			/* NOTREACHED */
-		}
-
-		/* don't over-run any page boundaries - check src range */
-		max = round_page_64(phys_src + 1) - phys_src;
-		if (max > size)
-			max = size;
-		/* Check destination won't run over boundary either */
-		n = round_page_64(phys_dst + 1) - phys_dst;
-		if (n < max)
-			max = n;
-		size -= max;
-		addr += max;
-		phys_copy(phys_src, phys_dst, max);
-
-		/* resync I+D caches */
-		sync_cache64(phys_dst, max);
-
-		phys_src += max;
-		phys_dst += max;
-	}
-}
-	
-boolean_t
-db_check_access(
-	vm_offset_t	addr,
-	int		size,
-	task_t		task)
-{
-	register int	n;
-
-	if (task == kernel_task || task == TASK_NULL) {
-	    if (kernel_task == TASK_NULL)  return(TRUE);
-	    task = kernel_task;
-	} else if (task == TASK_NULL) {
-	    if (current_thread() == THR_ACT_NULL) return(FALSE);
-	    task = current_thread()->task;
-	}
-
-	while (size > 0) {
-		if(!pmap_find_phys(task->map->pmap, (addr64_t)addr)) return (FALSE);	/* Fail if page not mapped */
-	    n = trunc_page_32(addr+PPC_PGBYTES) - addr;
-	    if (n > size)
-		n = size;
-	    size -= n;
-	    addr += n;
-	}
-	return(TRUE);
-}
-
-boolean_t
-db_phys_eq(
-	task_t		task1,
-	vm_offset_t	addr1,
-	task_t		task2,
-	vm_offset_t	addr2)
-{
-	addr64_t	physa, physb;
-
-	if ((addr1 & (PPC_PGBYTES-1)) != (addr2 & (PPC_PGBYTES-1)))	/* Is byte displacement the same? */
-		return FALSE;
-
-	if (task1 == TASK_NULL) {						/* See if there is a task active */
-		if (current_thread() == THR_ACT_NULL)		/* See if there is a current task */
-			return FALSE;
-		task1 = current_thread()->task;				/* If so, use that one */
-	}
-	
-	if(!(physa = db_vtophys(task1->map->pmap, (vm_offset_t)trunc_page_32(addr1)))) return FALSE;	/* Get real address of the first */
-	if(!(physb = db_vtophys(task2->map->pmap, (vm_offset_t)trunc_page_32(addr2)))) return FALSE;	/* Get real address of the second */
-	
-	return (physa == physb);						/* Check if they are equal, then return... */
-}
-
-#define DB_USER_STACK_ADDR		(0xc0000000)
-#define DB_NAME_SEARCH_LIMIT		(DB_USER_STACK_ADDR-(PPC_PGBYTES*3))
-
-boolean_t
-db_phys_cmp(__unused vm_offset_t a1, __unused vm_offset_t a2,
-	    __unused vm_size_t s1)
-{
-	db_printf("db_phys_cmp: not implemented\n");
-	return 0;
-}
-
-
-int
-db_search_null(__unused task_t task, __unused unsigned *svaddr,
-	       __unused unsigned evaddr, __unused unsigned *skaddr,
-	       __unused int flag)
-{
-	db_printf("db_search_null: not implemented\n");
-	return(-1);
-}
-
-struct proc;
-unsigned char *getProcName(struct proc *proc);
-
-void
-db_task_name(
-	task_t		task)
-{
-	register unsigned char *p;
-	unsigned char tname[33];
-	int i;
-
-	p = 0;
-	tname[0] = 0;
-	
-	if(task->bsd_info) p = getProcName((struct proc *)(task->bsd_info));	/* Point to task name */
-	
-	if(p) {
-		for(i = 0; i < 32; i++) {			/* Move no more than 32 bytes */
-			tname[i] = p[i];
-			if(p[i] == 0) break;
-		}
-		tname[i] = 0;
-		db_printf("%s", tname);
-	}
-	else db_printf("no name");
-}
-
-extern int kdb_flag;  
-void
-db_machdep_init(void)
-{
-#define KDB_READY       0x1
-	kdb_flag |= KDB_READY;
-}
-
-
-#ifdef	__STDC__
-//#define KDB_SAVE(type, name) extern type name; type name##_save = name
-#define KDB_SAVE(type, name) type name##_save = name
-#define KDB_RESTORE(name) name = name##_save
-#else	/* __STDC__ */
-#define KDB_SAVE(type, name) type name/**/_save = name
-//#define KDB_SAVE(type, name) extern type name; type name/**/_save = name
-#define KDB_RESTORE(name) name = name/**/_save
-#endif	/* __STDC__ */
-
-#define KDB_SAVE_CTXT() \
-	KDB_SAVE(int, db_run_mode); \
-	KDB_SAVE(boolean_t, db_sstep_print); \
-	KDB_SAVE(int, db_loop_count); \
-	KDB_SAVE(int, db_call_depth); \
-	KDB_SAVE(int, db_inst_count); \
-	KDB_SAVE(int, db_last_inst_count); \
-	KDB_SAVE(int, db_load_count); \
-	KDB_SAVE(int, db_store_count); \
-	KDB_SAVE(boolean_t, db_cmd_loop_done); \
-	KDB_SAVE(jmp_buf_t *, db_recover); \
-	KDB_SAVE(db_addr_t, db_dot); \
-	KDB_SAVE(db_addr_t, db_last_addr); \
-	KDB_SAVE(db_addr_t, db_prev); \
-	KDB_SAVE(db_addr_t, db_next); \
-	KDB_SAVE(db_regs_t, ddb_regs); 
-
-#define KDB_RESTORE_CTXT() \
-	KDB_RESTORE(db_run_mode); \
-	KDB_RESTORE(db_sstep_print); \
-	KDB_RESTORE(db_loop_count); \
-	KDB_RESTORE(db_call_depth); \
-	KDB_RESTORE(db_inst_count); \
-	KDB_RESTORE(db_last_inst_count); \
-	KDB_RESTORE(db_load_count); \
-	KDB_RESTORE(db_store_count); \
-	KDB_RESTORE(db_cmd_loop_done); \
-	KDB_RESTORE(db_recover); \
-	KDB_RESTORE(db_dot); \
-	KDB_RESTORE(db_last_addr); \
-	KDB_RESTORE(db_prev); \
-	KDB_RESTORE(db_next); \
-	KDB_RESTORE(ddb_regs); 
-
-extern boolean_t db_sstep_print;
-extern int db_loop_count;
-extern int db_call_depth;
-extern int db_inst_count;
-extern int db_last_inst_count;
-extern int db_load_count;
-extern int db_store_count;
-extern boolean_t db_cmd_loop_done;
-extern void unlock_debugger(void);
-extern void lock_debugger(void);
-/*
- * switch to another cpu
- */
-void
-kdb_on(
-	int		cpu)
-{
-	KDB_SAVE_CTXT();
-	if (cpu < 0 || cpu >= (int)real_ncpus || !PerProcTable[cpu].ppe_vaddr->debugger_active)
-		return;
-	db_set_breakpoints();
-	db_set_watchpoints();
-	debugger_cpu = cpu;
-	unlock_debugger();
-	lock_debugger();
-	db_clear_breakpoints();
-	db_clear_watchpoints();
-	KDB_RESTORE_CTXT();
-	if (debugger_cpu == -1)  {/* someone continued */
-		debugger_cpu = cpu_number();
-		db_continue_cmd(0, 0, 0, NULL);
-	}
-}
-
-/*
- * system reboot
- */
-
-void
-db_reboot(__unused db_expr_t addr, __unused boolean_t have_addr,
-	  __unused db_expr_t count, char *modif)
-{
-	boolean_t	reboot = TRUE;
-	char		*cp, c;
-	
-	cp = modif;
-	while ((c = *cp++) != 0) {
-		if (c == 'r')	/* reboot */
-			reboot = TRUE;
-		if (c == 'h')	/* halt */
-			reboot = FALSE;
-	}
-	if(!reboot) halt_all_cpus(FALSE);	/* If no reboot, try to be clean about it */
-
-	if (PE_halt_restart)
-		(*PE_halt_restart)(kPERestartCPU);
-	db_printf("Sorry, system can't reboot automatically yet...  You need to do it by hand...\n");
-
-}
diff --git a/osfmk/ppc/db_low_trace.c b/osfmk/ppc/db_low_trace.c
deleted file mode 100644
index e081b2643..000000000
--- a/osfmk/ppc/db_low_trace.c
+++ /dev/null
@@ -1,1106 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-/*
- *	Author: Bill Angell, Apple
- *	Date:	6/97
- *
- * exceptions and certain C functions write into a trace table which
- * can be examined via the machine 'lt' command under kdb
- */
-
-
-#include <string.h>			/* For strcpy() */
-#include <mach/boolean.h>
-#include <machine/db_machdep.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_lex.h>
-#include <ddb/db_output.h>
-#include <ddb/db_command.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_command.h>		/* For db_option() */
-#include <ddb/db_examine.h>
-#include <ddb/db_expr.h>
-#include <kern/thread.h>
-#include <kern/task.h>
-#include <mach/vm_param.h>
-#include <mach/kmod.h>
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#include <ppc/db_low_trace.h>
-#include <ppc/mappings.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/savearea.h>
-#include <ppc/vmachmon.h>
-
-void db_dumppca(unsigned int ptegindex); 	
-void db_dumpmapping(struct mapping *mp); 					/* Dump out a mapping */
-extern kmod_info_t *kmod;									/* Find the kmods */
-
-db_addr_t	db_low_trace_prev = 0;
-
-/*
- *		Print out the low level trace table:
- *
- *		Displays the entry and 15 before it in newest to oldest order
- *		
- *		lt [entaddr]
- 
- *		If entaddr is omitted, it starts with the most current
- *		If entaddr = 0, it starts with the most current and does the whole table
- */
-void
-db_low_trace(db_expr_t addr, boolean_t have_addr, db_expr_t count, char *modif)
-{
-	int		c, i;
-	unsigned int tempx, cnt;
-	unsigned int xTraceCurr, xTraceStart, xTraceEnd, cxltr;
-	db_addr_t	next_addr;
-	LowTraceRecord xltr;
-	unsigned char cmark;
-	addr64_t xxltr;
-	
-	cnt = 16;													/* Default to 16 entries */
-	
-	xTraceCurr = trcWork.traceCurr;								/* Transfer current pointer */
-	xTraceStart = trcWork.traceStart;							/* Transfer start of table */
-	xTraceEnd = trcWork.traceEnd;								/* Transfer end of table */
-	
-	if(addr == -1) cnt = 0x7FFFFFFF;							/* Max the count */
-
-	if(!addr || (addr == -1)) {
-		addr=xTraceCurr-sizeof(LowTraceRecord);					/* Start at the newest */
-		if((unsigned int)addr<xTraceStart) addr=xTraceEnd-sizeof(LowTraceRecord);	/* Wrap low back to high */
-	}
-	
-	if((unsigned int)addr<xTraceStart||(unsigned int)addr>=xTraceEnd) {	/* In the table? */
-		db_printf("address not in low memory trace table\n");	/* Tell the fool */
-		return;													/* Leave... */
-	}
-
-	if((unsigned int)addr&0x0000007F) {							/* Proper alignment? */
-		db_printf("address not aligned on trace entry boundary (0x80)\n");	/* Tell 'em */
-		return;													/* Leave... */
-	}
-	
-	xxltr = addr;												/* Set the start */
-	cxltr = ((xTraceCurr == xTraceStart ? xTraceEnd : xTraceCurr) - sizeof(LowTraceRecord));	/* Get address of newest entry */
-
-	db_low_trace_prev = addr;									/* Starting point */
-
-	for(i=0; i < cnt; i++) {									/* Dump the 16 (or all) entries */
-	
-		ReadReal((addr64_t)xxltr, (unsigned int *)&xltr);					/* Get the first half */
-		ReadReal((addr64_t)xxltr + 32, &(((unsigned int *)&xltr)[8]));		/* Get the second half */
-		ReadReal((addr64_t)xxltr + 64, &(((unsigned int *)&xltr)[16]));		/* Get the second half */
-		ReadReal((addr64_t)xxltr + 96, &(((unsigned int *)&xltr)[24]));		/* Get the second half */
-		
-		db_printf("\n%s%08llX  %1X  %08X %08X - %04X", (xxltr != cxltr ? " " : "*"), 
-			xxltr,
-			(xltr.LTR_cpu & 0xFF), xltr.LTR_timeHi, xltr.LTR_timeLo, 
-			(xltr.LTR_excpt & 0x8000 ? 0xFFFF : xltr.LTR_excpt * 64));	/* Print the first line */
-
-		if(xltr.LTR_cpu & 0xFF00) db_printf(", sflgs = %02X\n", ((xltr.LTR_cpu >> 8) & 0xFF));
-		else db_printf("\n");
-			
-		db_printf("              DAR/DSR/CR: %016llX %08X %08X\n", xltr.LTR_dar, xltr.LTR_dsisr, xltr.LTR_cr);
-		
-		db_printf("                SRR0/SRR1 %016llX %016llX\n",  xltr.LTR_srr0, xltr.LTR_srr1);
-		db_printf("                LR/CTR    %016llX %016llX\n",  xltr.LTR_lr, xltr.LTR_ctr);
-
-		db_printf("                R0/R1/R2  %016llX %016llX %016llX\n", xltr.LTR_r0, xltr.LTR_r1, xltr.LTR_r2);
-		db_printf("                R3/R4/R5  %016llX %016llX %016llX\n", xltr.LTR_r3, xltr.LTR_r4, xltr.LTR_r5);
-		db_printf("              R6/sv/rsv   %016llX %016llX %08X\n", xltr.LTR_r6, xltr.LTR_save, xltr.LTR_rsvd0);
-	
-		if((cnt != 16) && (xxltr == xTraceCurr)) break;			/* If whole table dump, exit when we hit start again... */
-
-		xxltr-=sizeof(LowTraceRecord);							/* Back it on up */
-		if(xxltr<xTraceStart)
-			xxltr=(xTraceEnd-sizeof(LowTraceRecord));			/* Wrap low back to high */
-	
-	}
-	db_next = (db_expr_t)(xxltr);
-}
-
-
-/*
- *		Print out 256 bytes
- *
- *		
- *		dl [entaddr]
- */
-void
-db_display_long(db_expr_t addr, __unused boolean_t have_addr,
-		db_expr_t count, char * modif)
-{
-	int				i;
-
-	for(i=0; i<8; i++) {									/* Print 256 bytes */
-		db_printf("%016llX   %08X %08X %08X %08X  %08X %08X %08X %08X\n", addr,	/* Print a line */
-			((unsigned long *)addr)[0], ((unsigned long *)addr)[1], ((unsigned long *)addr)[2], ((unsigned long *)addr)[3], 
-			((unsigned long *)addr)[4], ((unsigned long *)addr)[5], ((unsigned long *)addr)[6], ((unsigned long *)addr)[7]);
-		addr=(db_expr_t)(addr+0x00000020);					/* Point to next address */
-	}
-	db_next = addr;
-
-
-}
-
-unsigned char xtran[256] = {
-/*  x0   x1   x2   x3   x4   x5   x6   x7   x8   x9   xA   xB   xC   xD   xE   xF   	   */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* 0x */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* 1x */
-	' ', '!', '"', '#', '$', '%', '&',0x27, '(', ')', '*', '+', ',', '-', '.', '/',  /* 2x */
-	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',  /* 3x */
-	'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',  /* 4x */
-	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[',0x5C, ']', '^', '_',  /* 5x */
-	'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',  /* 6x */
-	'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '.',  /* 7x */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* 8x */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* 9x */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Ax */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Bx */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Cx */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Dx */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Ex */
-	'.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.',  /* Fx */
-};
-
-/*
- *		Print out 256 bytes in characters
- *
- *		
- *		dc [entaddr]
- */
-void
-db_display_char(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char * modif)
-{
-
-	int				i, j, k;
-	unsigned char xlt[256], *xaddr;
-	
-	xaddr = (unsigned char *)addr;
-	
-
-	for(i = 0; i < 8; i++) {								/* Print 256 bytes */
-		j = 0;
-		for(k = 0; k < 32; k++) {
-			xlt[j] = xtran[*xaddr];
-			xaddr++;
-			j++;
-			if((k & 3) == 3) {
-				xlt[j] = ' ';
-				j++;
-			}
-		}
-		xlt[j] = 0;
-		
-		db_printf("%016llX   %s\n", (addr64_t)(xaddr - 32), xlt);	/* Print a line */
-	}
-
-	db_next = (db_expr_t)xaddr;
-
-
-}
-
-/*
- *		Print out 256 bytes of real storage
- *
- *		Displays the entry and 15 before it in newest to oldest order
- *		
- *		dr [entaddr]
- */
-void
-db_display_real(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char *modif)
-{
-	int				i;
-	unsigned int xbuf[8];
-
-	for(i=0; i<8; i++) {									/* Print 256 bytes */
-		ReadReal(addr, &xbuf[0]);							/* Get the real storage data */
-		db_printf("%016llX   %08X %08X %08X %08X  %08X %08X %08X %08X\n", addr,	/* Print a line */
-			xbuf[0], xbuf[1], xbuf[2], xbuf[3], 
-			xbuf[4], xbuf[5], xbuf[6], xbuf[7]);
-		addr = addr + 0x00000020;							/* Point to next address */
-	}
-	db_next = addr;
-}
-
-unsigned int	dvspace = 0;
-
-/*
- *		Print out virtual to real translation information
- *
- *		
- *		dm vaddr [space] (defaults to last entered) 
- */
-void
-db_display_mappings(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		    char *modif)
-{
-	db_expr_t	xspace;
-	pmap_t			pmap;
-	addr64_t		lnextva;
-
-	mapping_t	*mp;
-	
-	if (db_expression(&xspace)) {							/* Get the address space requested */
-		if(xspace >= maxAdrSp) {
-			db_printf("requested address space (%llX) larger than max (%X)\n", xspace, maxAdrSp - 1);
-			return;
-		}
-		dvspace = xspace;									/* Get the space or set default */
-	}
-	
-	db_printf("mapping information for %016llX in space %8X:\n", addr, dvspace);
-
-	pmap = pmapTrans[dvspace].pmapVAddr;					/* Find the pmap address */
-	if(!pmap) {												/* The pmap is not in use */
-		db_printf("The space %X is not assigned to a pmap\n", dvspace);	/* Say we are wrong */
-		return;
-	}
-
-	mp = hw_find_map(pmap, (addr64_t)addr, &lnextva);		/* Try to find the mapping for this address */
-	if((unsigned int)mp == mapRtBadLk) {					/* Did we lock up ok? */
-		db_printf("Timeout locking physical entry for virtual address %016ll8X\n", addr);	
-		return;
-	}
-	
-	if(!mp) {												/* Did we find one? */
-		db_printf("Not mapped\n");	
-		return;												/* Didn't find any, return FALSE... */
-	}
-	
-	mapping_drop_busy(mp);									/* The mapping shouldn't be changing */
-
-	db_dumpmapping(mp);										/* Dump it all out */
-
-	/* Tell them we did it */
-}
-
-/*
- *		Print out hash table data
- *
- *		
- *		dh vaddr [space] (defaults to last entered) 
- */
-void
-db_display_hash(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char *modif)
-{
-	db_expr_t		xspace;
-	unsigned int	seg, vsid, ptegindex, htsize;
-	pmap_t			pmap;
-	addr64_t		lnextva, llva, vpn, esid;
-	uint64_t		hash;
-	int 			s4bit;
-
-	llva = (addr64_t)((unsigned int)addr);					/* Make sure we are 64-bit now */
-	
-	s4bit = !((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) == 0);	/* Are we a big guy? */
-	if (db_expression(&xspace)) {							/* Get the address space requested */
-		if(xspace >= maxAdrSp) {
-			db_printf("requested address space (%llX) larger than max (%X)\n", xspace, maxAdrSp - 1);
-			return;
-		}
-		dvspace = xspace;									/* Get the space or set default */
-	}
-	
-	pmap = pmapTrans[dvspace].pmapVAddr;					/* Find the pmap address */
-	if(!pmap) {												/* The pmap is not in use */
-		db_printf("The space %X is not assigned to a pmap\n", dvspace);	/* Say we are wrong */
-		return;
-	}
-
-	hash = (uint64_t)pmap->space | ((uint64_t)pmap->space << maxAdrSpb) | ((uint64_t)pmap->space << (2 * maxAdrSpb));	/* Get hash value */
-	hash = hash & 0x0000001FFFFFFFFF;						/* Make sure we stay within supported ranges */
-	
-	esid = ((llva >> 14) & -maxAdrSp) ^ hash;				/* Get ESID */
-	llva = ((llva >> 12) & 0xFFFF) ^ esid;					/* Get index into hash table */
-
-	if(s4bit) htsize = hash_table_size >> 7;				/* Get number of entries in hash table for 64-bit */
-	else htsize = hash_table_size >> 6;						/* get number of entries in hash table for 32-bit */
-	
-	ptegindex = llva & (htsize - 1);						/* Get the index to the pteg and pca */
-	db_dumppca(ptegindex);									/* dump the info */
-	
-	/* Tell them we did it */
-}
-
-/*
- *		Displays all of the in-use pmaps in the system.
- *
-  *		dp
- */
-void
-db_display_pmap(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char *modif)
-{
-	pmap_t			pmap;
-	int i;
-	unsigned int v0, v1, st0, st1;
-	
-	pmap = (pmap_t)addr;
-	if(!have_addr) pmap = kernel_pmap;						/* Start at the beginning */
-	
-	db_printf("PMAP     (real)            Next     Prev     Space    Flags    Ref      spaceNum Resident Wired\n"); 
-//	           xxxxxxxx rrrrrrrrrrrrrrrr  xxxxxxxx pppppppp ssssssss cccccccc vvvvvvvv nnnnnnnn rrrrrrrr wwwwwwwww
-	while(1) {												/* Do them all */
-		db_printf("%08X %016llX  %08X %08X %08X %08X %08X %08X %08X %08X\n",
-			pmap, (addr64_t)pmap ^ pmap->pmapvr,
-			pmap->pmap_link.next,  pmap->pmap_link.prev,
-			pmap->space, pmap->pmapFlags, pmap->ref_count, pmap->spaceNum,
-			pmap->stats.resident_count,
-			pmap->stats.wired_count);
-
-		db_printf("lists = %d, rand = %08X, visits = %016llX, searches = %08X\n",
-			pmap->pmapCurLists, pmap->pmapRandNum,
-			pmap->pmapSearchVisits, pmap->pmapSearchCnt); 
-
-		db_printf("cctl = %08X, SCSubTag = %016llX\n",
-			pmap->pmapCCtl, pmap->pmapSCSubTag); 
-		
-		for(i = 0; i < 16; i +=2) {
-			v0 = (pmap->pmapCCtl >> (31 - i) & 1);			/* Get high order bit */
-			v1 = (pmap->pmapCCtl >> (30 - i) & 1);			/* Get high order bit */
-			st0 = (pmap->pmapSCSubTag >> (60 - (4 * i))) & 0xF;	/* Get the sub-tag */
-			st1 = (pmap->pmapSCSubTag >> (56 - (4 * i))) & 0xF;	/* Get the sub-tag */
-			
-			db_printf("         %01X %01X %016llX/%016llX  %01X %01X %016llX/%016llX\n", 
-				v0, st0, pmap->pmapSegCache[i].sgcESID, pmap->pmapSegCache[i].sgcVSID,
-				v1, st1, pmap->pmapSegCache[i+1].sgcESID, pmap->pmapSegCache[i+1].sgcVSID);
-		}
-
-		db_printf("\n");
-		if(have_addr) break;								/* Do only one if address supplied */
-		pmap = (pmap_t)pmap->pmap_link.next;				/* Skip to the next */
-		if(pmap == kernel_pmap) break;						/* We've wrapped, we're done */
-	}
-}
-
-
-/*
- *		Checks the pmap skip lists
- *
- *		
- *		cp pmap
- */
-void
-db_check_pmaps(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-	       char *modif)
-{
-	int				i;
-	unsigned int ret;
-	uint64_t dumpa[32];
-	pmap_t pmap;
-	
-	pmap = (pmap_t)addr;
-	if(!have_addr) pmap = kernel_pmap;						/* If no map supplied, start with kernel */
-	
-	while(1) {												/* Do them all */
-		ret = mapSkipListVerifyC(pmap, &dumpa);							/* Check out the map */
-		if(!ret) db_printf("Skiplists verified ok, pmap = %08X\n", pmap);
-		else { 
-			db_printf("Verification failure at %08X, pmap = %08X\n", ret, pmap);
-			for(i = 0; i < 32; i += 4) {
-				db_printf("R%02d  %016llX  %016llX  %016llX  %016llX\n", i,
-					dumpa[i], dumpa[i + 1], dumpa[i + 2], dumpa[i + 3]);
-			}
-		}
-		if(have_addr) break;								/* Do only one if address supplied */
-		pmap = (pmap_t)pmap->pmap_link.next;				/* Skip to the next */
-		if(pmap == kernel_pmap) break;						/* We've wrapped, we're done */
-	}
-}
-
-
-/*
- *		Displays iokit junk
- *
-  *		di
- */
-
-void db_piokjunk(void);
-
-void
-db_display_iokit(__unused db_expr_t addr, __unused boolean_t have_addr,
-		 __unused db_expr_t count, __unused char *modif)
-{
-	db_piokjunk();
-}
-
-/*
- *		Prints out a mapping control block
- *
- */
- 
-void db_dumpmapping(struct mapping *mp) { 					/* Dump out a mapping */
-
-	pmap_t pmap;
-	int i;
-
-	db_printf("Dump of mapping block: %08X,  pmap: %08X (%016llX)\n", mp, pmapTrans[mp->mpSpace].pmapVAddr, 
-		pmapTrans[mp->mpSpace].pmapPAddr);			/* Header */
-	db_printf("              mpFlags: %08X\n", mp->mpFlags);                 
-	db_printf("              mpSpace: %04X\n", mp->mpSpace);                 
-	db_printf("              mpBSize: %04X\n", mp->u.mpBSize);                 
-	db_printf("                mpPte: %08X\n", mp->mpPte);                 
-	db_printf("              mpPAddr: %08X\n", mp->mpPAddr);                 
-	db_printf("              mpVAddr: %016llX\n", mp->mpVAddr);                 
-	db_printf("              mpAlias: %016llX\n", mp->mpAlias);                 
-	db_printf("             mpList00: %016llX\n", mp->mpList0);                 
-	
-	for(i = 1; i < (mp->mpFlags & mpLists); i++) {			/* Dump out secondary physical skip lists */
-		db_printf("             mpList%02d: %016llX\n", i, mp->mpList[i - 1]);     
-	}
-}
-
-/*
- *		Prints out a PTEG and PCA
- *
- */
- 
-void db_dumppca(unsigned int ptegindex) { 	
-
-	addr64_t pteg, pca, llva;	
-	unsigned int xpteg[32], xpca[8], space, hash, pva, seg, api, va;
-	int i, s4bit;
-	unsigned long long llslot, llseg, llhash;
-
-	s4bit = !((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) == 0);	/* Are we a big guy? */
-
-	pteg = hash_table_base + (ptegindex << 6);				/* Point to the PTEG */
-	if(s4bit) pteg = hash_table_base + (ptegindex << 7);	/* Point to the PTEG */
-	pca  = hash_table_base - ((ptegindex + 1) * 4);			/* Point to the PCA */
-	db_printf("PTEG = %016llX, PCA = %016llX (index = %08X)\n", pteg, pca, ptegindex);
-	
-	ReadReal(pteg, &xpteg[0]);								/* Get first half of the pteg */
-	ReadReal(pteg + 0x20, &xpteg[8]);						/* Get second half of the pteg */
-	ReadReal(pca, &xpca[0]);								/* Get pca */
-
-	db_printf("PCA: free = %02X, steal = %02X, auto = %02X, misc = %02X\n", 
-		((xpca[0] >> 24) & 255), ((xpca[0] >> 16) & 255), ((xpca[0] >> 8) & 255), xpca[0] & 255);
-		
-	if(!s4bit) {											/* Little guy? */
-
-		for(i = 0; i < 16; i += 2) {						/* Step through pteg */
-			db_printf("%08X %08X - ", xpteg[i], xpteg[i + 1]);	/* Dump the pteg slot */
-			
-			if(xpteg[i] & 0x80000000) db_printf("  valid - ");	/* Is it valid? */
-			else db_printf("invalid - ");					/* Nope, invalid */
-		
-			space = (xpteg[i] >> 7) & (maxAdrSp - 1);		/* Extract the space */
-			hash = space | (space << maxAdrSpb) | (space << (2 * maxAdrSpb));	/* Get the hash */
-			pva =  ptegindex ^ hash;						/* Get part of the vaddr */
-			seg = (xpteg[i] >> 7) ^ hash;					/* Get the segment number */
-			api = (xpteg[i] & 0x3F);						/* Get the API */
-			va = ((seg << (28 - maxAdrSpb)) & 0xF0000000) | (api << 22) | ((pva << 12) & 0x003FF000);	/* Get the vaddr */
-			db_printf("va = %08X\n", va);
-		}
-	}
-	else {
-		ReadReal(pteg + 0x40, &xpteg[16]);					/* Get third half of the pteg */
-		ReadReal(pteg + 0x60, &xpteg[24]);					/* Get fourth half of the pteg */
-
-		for(i = 0; i < 32; i += 4) {						/* Step through pteg */
-			db_printf("%08X%08X %08X%08X - ", xpteg[i], xpteg[i + 1], xpteg[i + 2], xpteg[i + 3]);	/* Dump the pteg slot */
-			
-			if(xpteg[i + 1] & 1) db_printf("  valid - ");	/* Is it valid? */
-			else db_printf("invalid - ");					/* Nope, invalid */
-
-			llslot = ((long long)xpteg[i] << 32) | (long long)xpteg[i + 1];	/* Make a long long version of this */ 
-			space = (llslot >> 12) & (maxAdrSp - 1);		/* Extract the space */
-			llhash = (unsigned long long)space | ((unsigned long long)space << maxAdrSpb) | ((unsigned long long)space << (2 * maxAdrSpb));	/* Get the hash */
-			llhash = llhash & 0x0000001FFFFFFFFFULL;		/* Make sure we stay within supported ranges */
-			pva =  (unsigned long long)ptegindex ^ llhash;	/* Get part of the vaddr */
-			llseg = (llslot >> 12) ^ llhash;				/* Get the segment number */
-			api = (llslot >> 7) & 0x1F;						/* Get the API */
-			llva = ((llseg << (28 - maxAdrSpb)) & 0xFFFFFFFFF0000000ULL) | (api << 23) | ((pva << 12) & 0x007FF000);	/* Get the vaddr */
-			db_printf("va = %016llX\n", llva);
-		}
-	}
-}
-
-
-/*
- *		Print out 256 bytes of virtual storage
- *
- *		
- *		dv [entaddr] [space]
- *		address must be on 32-byte boundary.  It will be rounded down if not
- */
-void
-db_display_virtual(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		   char *modif)
-{
-
-	int			i, size, lines, rlines;
-	unsigned int 	xbuf[8];
-	db_expr_t	xspace;
-	pmap_t		pmap;
-
-	mapping_t	*mp, *mpv;
-	addr64_t	pa;
-	ppnum_t		pnum;
-
-	if (db_expression(&xspace)) {							/* Parse the space ID */
-		if(xspace >= (1 << maxAdrSpb)) {					/* Check if they gave us a sane space number */
-			db_printf("Invalid space ID: %llX - max is %X\n", xspace, (1 << maxAdrSpb) - 1);
-			return;
-		}
-		dvspace = xspace;									/* Get the space or set default */
-	}
-	
-	pmap = (pmap_t)pmapTrans[dvspace].pmapVAddr;			/* Find the pmap address */
-	if((unsigned int)pmap == 0) {							/* Is there actually a pmap here? */
-		db_printf("Address space not found: %X\n", dvspace);	/* Complain */
-		return;
-	}
-	
-	addr &= -32;
-	
-	size = 4096 - (addr & 0x00000FFF);						/* Bytes left on page */
-	lines = size / 32;										/* Number of lines in first or only part */
-	if(lines > 8) lines = 8;
-	rlines = 8 - lines;
-	if(rlines < 0) lines = 0;
-	
-	db_printf("Dumping %016llX (pmap = %08X, space = %X); ", addr, pmap, dvspace);
-
-	pnum = pmap_find_phys(pmap, (addr64_t)addr);			/* Phynd the Physical */
-	if(!pnum) {												/* Did we find one? */
-		db_printf("Not mapped\n");	
-		return;												/* Didn't find any, return FALSE... */
-	}
-
-	pa = (addr64_t)(pnum << 12) | (addr64_t)(addr & 0xFFF);	/* Get the physical address */
-	db_printf("phys = %016llX\n", pa);
-
-	for(i=0; i<lines; i++) {								/* Print n bytes */
-		ReadReal(pa, &xbuf[0]);								/* Get the real storage data */
-		db_printf("%016llX   %08X %08X %08X %08X  %08X %08X %08X %08X\n", addr,	/* Print a line */
-			xbuf[0], xbuf[1], xbuf[2], xbuf[3], 
-			xbuf[4], xbuf[5], xbuf[6], xbuf[7]);
-		addr = (db_expr_t)(addr + 0x00000020);				/* Point to next address */
-		pa = pa + 0x00000020;								/* Point to next address */
-	}
-	db_next = addr;
-	
-	if(!rlines) return;
-	
-	db_printf("Dumping %016llX (pmap = %08X, space = %X); ", addr, pmap, dvspace);
-
-	pnum = pmap_find_phys(pmap, (addr64_t)((unsigned int)addr));	/* Phynd the Physical */
-	if(!pnum) {												/* Did we find one? */
-		db_printf("Not mapped\n");	
-		return;												/* Didn't find any, return FALSE... */
-	}
-
-	pa = (addr64_t)(pnum << 12) | (addr64_t)((unsigned int)addr & 0xFFF);	/* Get the physical address */
-	db_printf("phys = %016llX\n", pa);
-
-	for(i=0; i<rlines; i++) {								/* Print n bytes */
-		ReadReal(pa, &xbuf[0]);								/* Get the real storage data */
-		db_printf("%016llX   %08X %08X %08X %08X  %08X %08X %08X %08X\n", addr,	/* Print a line */
-			xbuf[0], xbuf[1], xbuf[2], xbuf[3], 
-			xbuf[4], xbuf[5], xbuf[6], xbuf[7]);
-		addr = (db_expr_t)(addr + 0x00000020);				/* Point to next address */
-		pa = pa + 0x00000020;								/* Point to next address */
-	}
-	db_next = addr;
-
-
-}
-
-
-/*
- *		Print out savearea stuff
- *
- *		
- *		ds 
- */
-
-#define chainmax 32
-
-void
-db_display_save(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char *modif)
-{
-	int				i, j, totsaves, tottasks, taskact, chainsize, vmid, didvmhead;
-	task_t			task;
-	thread_act_t	act;
-	struct savearea		*save;
-	vmmCntrlTable	*CTable;
-	
-	tottasks = 0;
-	totsaves = 0;
-	
-	for(task = (task_t)tasks.next; task != (task_t)&tasks.next; task = (task_t)task->tasks.next) {	/* Go through the tasks */
-		taskact = 0;								/* Reset activation count */
-		db_printf("\nTask %4d @%08X:\n", tottasks, task);	/* Show where we're at */
-		for(act = (thread_act_t)task->threads.next; act != (thread_act_t)&task->threads; act = (thread_act_t)act->task_threads.next) {	/* Go through activations */
-			db_printf("   Act %4d @%08X - p: %08X  current context: %08X\n",
-					  taskact, act, act->machine.pcb, act->machine.curctx);					
-					
-			save = (struct savearea *)act->machine.pcb; 		/* Set the start of the normal chain */
-			chainsize = 0;
-			
-			db_printf("      General context - fp: %08X  fl: %08X  fc: %d  vp: %08X  vl: %08X  vp: %d\n",
-				act->machine.facctx.FPUsave, act->machine.facctx.FPUlevel, act->machine.facctx.FPUcpu, 		
-				act->machine.facctx.VMXsave, act->machine.facctx.VMXlevel, act->machine.facctx.VMXcpu);
-			
-			while(save) {							/* Do them all */
-				totsaves++;							/* Count savearea */
-				db_printf("         Norm %08X: %016llX %016llX - tot = %d\n", save, save->save_srr0, save->save_srr1, totsaves);
-				save = (struct savearea *)save->save_hdr.save_prev;	/* Next one */
-				if(chainsize++ > chainmax) {		/* See if we might be in a loop */
-					db_printf("         Chain terminated by count (%d) before %08X\n", chainmax, save);
-					break;
-				}
-			}
-			
-			save = (struct savearea *)act->machine.facctx.FPUsave; 	/* Set the start of the floating point chain */
-			chainsize = 0;
-			while(save) {							/* Do them all */
-				totsaves++;							/* Count savearea */
-				db_printf("         FPU  %08X: %08X - tot = %d\n", save, save->save_hdr.save_level, totsaves);
-				save = (struct savearea *)save->save_hdr.save_prev;	/* Next one */
-				if(chainsize++ > chainmax) {		/* See if we might be in a loop */
-					db_printf("         Chain terminated by count (%d) before %08X\n", chainmax, save);
-					break;
-				}
-			}
-			
-			save = (struct savearea *)act->machine.facctx.VMXsave; 	/* Set the start of the floating point chain */
-			chainsize = 0;
-			while(save) {							/* Do them all */
-				totsaves++;							/* Count savearea */
-				db_printf("         Vec  %08X: %08X - tot = %d\n", save, save->save_hdr.save_level, totsaves);
-				save = (struct savearea *)save->save_hdr.save_prev;	/* Next one */
-				if(chainsize++ > chainmax) {		/* See if we might be in a loop */
-					db_printf("         Chain terminated by count (%d) before %08X\n", chainmax, save);
-					break;
-				}
-			}
-			
-			if(CTable = act->machine.vmmControl) {		/* Are there virtual machines? */
-				
-				for(vmid = 0; vmid < kVmmMaxContexts; vmid++) {
-					
-					if(!(CTable->vmmc[vmid].vmmFlags & vmmInUse)) continue;	/* Skip if vm is not in use */
-					
-					if(!CTable->vmmc[vmid].vmmFacCtx.FPUsave && !CTable->vmmc[vmid].vmmFacCtx.VMXsave) continue;	/* If neither types, skip this vm */
-					
-					db_printf("      VMachine ID %3d - fp: %08X  fl: %08X  fc: %d  vp: %08X  vl: %08X  vp: %d\n", vmid,	/* Title it */
-						CTable->vmmc[vmid].vmmFacCtx.FPUsave, CTable->vmmc[vmid].vmmFacCtx.FPUlevel, CTable->vmmc[vmid].vmmFacCtx.FPUcpu, 		
-						CTable->vmmc[vmid].vmmFacCtx.VMXsave, CTable->vmmc[vmid].vmmFacCtx.VMXlevel, CTable->vmmc[vmid].vmmFacCtx.VMXcpu
-					);
-					
-					save = (struct savearea *)CTable->vmmc[vmid].vmmFacCtx.FPUsave; 	/* Set the start of the floating point chain */
-					chainsize = 0;
-					while(save) {						/* Do them all */
-						totsaves++;						/* Count savearea */
-						db_printf("         FPU  %08X: %08X - tot = %d\n", save, save->save_hdr.save_level, totsaves);
-						save = (struct savearea *)save->save_hdr.save_prev;	/* Next one */
-						if(chainsize++ > chainmax) {	/* See if we might be in a loop */
-							db_printf("         Chain terminated by count (%d) before %08X\n", chainmax, save);
-							break;
-						}
-					}
-					
-					save = (struct savearea *)CTable->vmmc[vmid].vmmFacCtx.VMXsave; 	/* Set the start of the floating point chain */
-					chainsize = 0;
-					while(save) {						/* Do them all */
-						totsaves++;						/* Count savearea */
-						db_printf("         Vec  %08X: %08X - tot = %d\n", save, save->save_hdr.save_level, totsaves);
-						save = (struct savearea *)save->save_hdr.save_prev;	/* Next one */
-						if(chainsize++ > chainmax) {	/* See if we might be in a loop */
-							db_printf("         Chain terminated by count (%d) before %08X\n", chainmax, save);
-							break;
-						}
-					}
-				}
-			}
-			taskact++;
-		}
-		tottasks++;
-	}
-	
-	db_printf("Total saveareas accounted for: %d\n", totsaves);
-}
-
-/*
- *		Print out extra registers
- *
- *		
- *		dx 
- */
-
-extern unsigned int dbfloats[33][2];
-extern unsigned int dbvecs[33][4];
-extern unsigned int dbspecrs[336];
-
-void
-db_display_xregs(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		 char *modif)
-{
-	int				i, j, pents;
-
-	stSpecrs(dbspecrs);										/* Save special registers */
-	if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) {
-		db_printf("PIR:    %08X\n", dbspecrs[0]);
-		db_printf("PVR:    %08X\n", dbspecrs[1]);
-		db_printf("SDR1:   %08X.%08X\n", dbspecrs[26], dbspecrs[27]);
-		db_printf("HID0:   %08X.%08X\n", dbspecrs[28], dbspecrs[29]);
-		db_printf("HID1:   %08X.%08X\n", dbspecrs[30], dbspecrs[31]);
-		db_printf("HID4:   %08X.%08X\n", dbspecrs[32], dbspecrs[33]);
-		db_printf("HID5:   %08X.%08X\n", dbspecrs[34], dbspecrs[35]);
-		db_printf("SPRG0:  %08X.%08X %08X.%08X\n", dbspecrs[18], dbspecrs[19], dbspecrs[20], dbspecrs[21]);
-		db_printf("SPRG2:  %08X.%08X %08X.%08X\n", dbspecrs[22], dbspecrs[23], dbspecrs[24], dbspecrs[25]);
-		db_printf("\n");
-		for(i = 0; i < (64 * 4); i += 4) {
-			db_printf("SLB %02d: %08X.%08X %08X.%08X\n", i / 4, dbspecrs[80 + i], dbspecrs[81 + i], dbspecrs[82 + i], dbspecrs[83 + i]);
-		}
-	}
-	else {	
-		db_printf("PIR:    %08X\n", dbspecrs[0]);
-		db_printf("PVR:    %08X\n", dbspecrs[1]);
-		db_printf("SDR1:   %08X\n", dbspecrs[22]);
-		db_printf("HID0:   %08X\n", dbspecrs[39]);
-		db_printf("HID1:   %08X\n", dbspecrs[40]);
-		db_printf("L2CR:   %08X\n", dbspecrs[41]);
-		db_printf("MSSCR0: %08X\n", dbspecrs[42]);
-		db_printf("MSSCR1: %08X\n", dbspecrs[43]);
-		db_printf("THRM1:  %08X\n", dbspecrs[44]);
-		db_printf("THRM2:  %08X\n", dbspecrs[45]);
-		db_printf("THRM3:  %08X\n", dbspecrs[46]);
-		db_printf("ICTC:   %08X\n", dbspecrs[47]);
-		db_printf("L2CR2:  %08X\n", dbspecrs[48]);
-		db_printf("DABR:   %08X\n", dbspecrs[49]);
-	
-		db_printf("DBAT: %08X %08X %08X %08X\n", dbspecrs[2], dbspecrs[3], dbspecrs[4], dbspecrs[5]);
-		db_printf("      %08X %08X %08X %08X\n", dbspecrs[6], dbspecrs[7], dbspecrs[8], dbspecrs[9]);
-		db_printf("IBAT: %08X %08X %08X %08X\n", dbspecrs[10], dbspecrs[11], dbspecrs[12], dbspecrs[13]);
-		db_printf("      %08X %08X %08X %08X\n", dbspecrs[14], dbspecrs[15], dbspecrs[16], dbspecrs[17]);
-		db_printf("SPRG: %08X %08X %08X %08X\n", dbspecrs[18], dbspecrs[19], dbspecrs[20], dbspecrs[21]);
-		db_printf("\n");
-		for(i = 0; i < 16; i += 8) {						/* Print 8 at a time */
-			db_printf("SR%02d: %08X %08X %08X %08X %08X %08X %08X %08X\n", i,
-				dbspecrs[23+i], dbspecrs[24+i], dbspecrs[25+i], dbspecrs[26+i], 
-				dbspecrs[27+i], dbspecrs[28+i], dbspecrs[29+i], dbspecrs[30+i]); 
-		}
-	}
-	
-	db_printf("\n");
-
-	stFloat(dbfloats);										/* Save floating point registers */
-	for(i = 0; i < 32; i += 4) {							/* Print 4 at a time */
-		db_printf("F%02d: %08X %08X  %08X %08X  %08X %08X  %08X %08X\n", i,
-			dbfloats[i][0], dbfloats[i][1], dbfloats[i+1][0], dbfloats[i+1][1], 
-			dbfloats[i+2][0], dbfloats[i+2][1], dbfloats[i+3][0], dbfloats[i+3][1]); 
-	}
-	db_printf("FCR: %08X %08X\n", dbfloats[32][0], dbfloats[32][1]);	/* Print FSCR */
-	
-	if(!stVectors(dbvecs)) return;							/* Return if not Altivec capable */
-	
-	db_printf("\n");
-	
-	for(i = 0; i < 32; i += 2) {							/* Print 2 at a time */
-		db_printf("V%02d: %08X %08X %08X %08X  %08X %08X %08X %08X\n", i,
-			dbvecs[i][0], dbvecs[i][1], dbvecs[i][2], dbvecs[i][3], 
-			dbvecs[i+1][0], dbvecs[i+1][1], dbvecs[i+1][2], dbvecs[i+1][3]); 
-	}
-	db_printf("VCR: %08X %08X %08X %08X\n", dbvecs[32][0], dbvecs[32][1], dbvecs[32][2], dbvecs[32][3]);	/* Print VSCR */
-
-	/* Tell them we did it */
-}
-
-/*
- *		Check check mappings and hash table for consistency
- *
-  *		cm
- */
-void
-db_check_mappings(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		  char *modif)
-{
-	addr64_t  pteg, pca, llva, lnextva;	
-	unsigned int xpteg[32], xpca[8], space, hash, pva, seg, api, va, free, free2, xauto, PTEGcnt, wimgkk, wimgxx, slotoff;
-	int i, j, fnderr, slot, slot2, k, s4bit;
-	pmap_t pmap;
-	mapping_t *mp;
-	ppnum_t ppn, pa, aoff;
-	unsigned long long llslot, llseg, llhash;
-	
-	s4bit = 0;												/* Assume dinky? */
-	if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) s4bit = 1;	/* Are we a big guy? */
-	
-	PTEGcnt = hash_table_size / 64;							/* Get the number of PTEGS */
-	if(s4bit) PTEGcnt = PTEGcnt / 2;						/* PTEGs are twice as big */	
-
-	pteg = hash_table_base;									/* Start of hash table */
-	pca = hash_table_base - 4;								/* Start of PCA */
-	
-	for(i = 0; i < PTEGcnt; i++) {							/* Step through them all */
-
-		fnderr = 0;
-	
-		ReadReal(pteg, &xpteg[0]);							/* Get first half of the pteg */
-		ReadReal(pteg + 0x20, &xpteg[8]);					/* Get second half of the pteg */
-		if(s4bit) {											/* See if we need the other half */
-			ReadReal(pteg + 0x40, &xpteg[16]);				/* Get third half of the pteg */
-			ReadReal(pteg + 0x60, &xpteg[24]);				/* Get fourth half of the pteg */
-		}
-		ReadReal(pca, &xpca[0]);							/* Get pca */
-	
-		if(xpca[0] & 0x00000001) {							/* Is PCA locked? */
-			db_printf("Unexpected locked PCA\n");			/* Yeah, this may be bad */
-			fnderr = 1;										/* Remember to print the pca/pteg pair later */
-		}
-
-		free = 0x80000000;
-		
-		for(j = 0; j < 7; j++) {							/* Search for duplicates */
-			slot = j * 2;									/* Point to the slot */
-			if(s4bit) slot = slot * 2;						/* Adjust for bigger slots */
-			if(!(xpca[0] & free)) {							/* Check more if slot is allocated */
-				for(k = j + 1; k < 8; k++) {				/* Search remaining slots */
-					slot2 = k * 2;							/* Point to the slot */
-					if(s4bit) slot2 = slot2 * 2;			/* Adjust for bigger slots */
-					if((xpteg[slot] == xpteg[slot2]) 
-					   && (!s4bit || (xpteg[slot + 1] == xpteg[slot2 + 1]))) {		/* Do we have duplicates? */
-						db_printf("Duplicate tags in pteg, slot %d and slot %d\n", j, k);
-						fnderr = 1;
-					}
-				}
-			}
-			free = free >> 1;								/* Move slot over */
-		}
-		
-		free = 0x80000000;
-		xauto = 0x00008000;
-
-		for(j = 0; j < 8; j++) {							/* Step through the slots */
-		
-			slot = j * 2;									/* Point to the slot */
-			if(s4bit) slot = slot * 2;						/* Hagfish? */
-			if(xpca[0] & free) {							/* Check if marked free */
-				if((!s4bit && (xpteg[slot] & 0x80000000))	/* Is a supposedly free slot valid? */
-				   || (s4bit && (xpteg[slot + 1] & 1))) {	
-					db_printf("Free slot still valid - %d\n", j);	
-					fnderr = 1;
-				}	
-			}
-			else {											/* We have an in use slot here */
-								
-				if(!(!s4bit && (xpteg[slot] & 0x80000000))	/* Is a supposedly in use slot valid? */
-				   && !(s4bit && (xpteg[slot + 1] & 1))) {	
-					db_printf("Inuse slot not valid - %d\n", j);	
-					fnderr = 1;
-				}	
-				else {										/* Slot is valid, check mapping */
-					if(!s4bit) {							/* Not Hagfish? */
-						space = (xpteg[slot] >> 7) & (maxAdrSp - 1);	/* Extract the space */
-						hash = space | (space << maxAdrSpb) | (space << (2 * maxAdrSpb));	/* Get the hash */
-						pva =  i ^ hash;					/* Get part of the vaddr */
-						seg = (xpteg[slot] >> 7) ^ hash;	/* Get the segment number */
-						api = (xpteg[slot] & 0x3F);			/* Get the API */
-						va = ((seg << (28 - maxAdrSpb)) & 0xF0000000) | (api << 22) | ((pva << 12) & 0x003FF000);	/* Get the vaddr */
-						llva = (addr64_t)va;				/* Make this a long long */
-						wimgxx = xpteg[slot + 1] & 0x7F;	/* Get the wimg and pp */
-						ppn = xpteg[slot + 1] >> 12;		/* Get physical page number */
-						slotoff = (i * 64) + (j * 8) | 1;	/* Get offset to slot and valid bit */
-					}
-					else {									/* Yes, Hagfish */
-						llslot = ((long long)xpteg[slot] << 32) | (long long)xpteg[slot + 1];	/* Make a long long version of this */ 
-						space = (llslot >> 12) & (maxAdrSp - 1);	/* Extract the space */
-						llhash = (unsigned long long)space | ((unsigned long long)space << maxAdrSpb) | ((unsigned long long)space << (2 * maxAdrSpb));	/* Get the hash */
-						llhash = llhash & 0x0000001FFFFFFFFFULL;	/* Make sure we stay within supported ranges */
-						pva =  i ^ llhash;					/* Get part of the vaddr */
-						llseg = ((llslot >> 12) ^ llhash);	/* Get the segment number */
-						api = (llslot >> 7) & 0x1F;			/* Get the API */
-						llva = ((llseg << (28 - maxAdrSpb)) & 0xFFFFFFFFF0000000ULL) | (api << 23) | ((pva << 12) & 0x007FF000);	/* Get the vaddr */
-						wimgxx = xpteg[slot + 3] & 0x7F;	/* Get the wimg and pp */
-						ppn =  (xpteg[slot + 2] << 20) | (xpteg[slot + 3] >> 12);	/* Get physical page number */
-						slotoff = (i * 128) + (j * 16) | 1;		/* Get offset to slot and valid bit */
-					}
-					
-					pmap = pmapTrans[space].pmapVAddr;	/* Find the pmap address */
-					if(!pmap) {								/* The pmap is not in use */
-						db_printf("The space %08X is not assigned to a pmap, slot = %d\n", space, slot);	/* Say we are wrong */
-						fnderr = 1;
-						goto dcmout;
-					}
-
-					if (pmap->pmapFlags & pmapVMgsaa) {
-						unsigned int ret;
-						mapping_t mpcopy;
-						ret = hw_find_map_gv(pmap, llva, &mpcopy);
-					} else {
-						mp = hw_find_map(pmap, llva, &lnextva);		/* Try to find the mapping for this address */
-	//					db_printf("%08X - %017llX\n", mp, llva);
-						if((unsigned int)mp == mapRtBadLk) {	/* Did we lock up ok? */
-							db_printf("Timeout locking mapping for for virtual address %016ll8X, slot = %d\n", llva, j);	
-							return;
-						}
-						
-						if(!mp) {								/* Did we find one? */
-							db_printf("Not mapped, slot = %d, va = %08X\n", j, (unsigned int)llva);	
-							fnderr = 1;
-							goto dcmout;
-						}
-						
-						if((mp->mpFlags & 0xFF000000) > 0x01000000) {	/* Is busy count too high? */
-							db_printf("Busy count too high, slot = %d\n", j);
-							fnderr = 1;
-						}
-						
-						if((mp->mpFlags & mpType) == mpBlock) {		/* Is this a block map? */
-							if(!(xpca[0] & xauto)) {				/* Is it marked as such? */
-								db_printf("mapping marked as block, PCA is not, slot = %d\n", j);
-								fnderr = 1;
-							}
-						}
-						else {									/* Is a block */
-							if(xpca[0] & xauto) {				/* Is it marked as such? */
-								db_printf("mapping not marked as block, PCA is, slot = %d\n", j);
-								fnderr = 1;
-							}
-							if(mp->mpPte != slotoff) {			/* See if mapping PTEG offset is us */
-								db_printf("mapping does not point to PTE, slot = %d\n", j);
-								fnderr = 1;
-							}
-						}
-					
-						wimgkk = (unsigned int)mp->mpVAddr;		/* Get last half of vaddr where keys, etc are */
-						wimgkk = (wimgkk ^ wimgxx) & 0x7F;		/* XOR to find differences from PTE */
-						if(wimgkk) {							/* See if key in PTE is what we want */
-							db_printf("key or WIMG does not match, slot = %d\n", j);
-							fnderr = 1;
-						}
-						
-						aoff = (ppnum_t)((llva >> 12) - (mp->mpVAddr >> 12));	/* Get the offset from vaddr */
-						pa = aoff + mp->mpPAddr;				/* Get the physical page number we expect */
-						if(pa != ppn) {							/* Is physical address expected? */
-							db_printf("Physical address does not match, slot = %d\n", j);
-							fnderr = 1;
-						}
-		
-						mapping_drop_busy(mp);					/* We're done with the mapping */
-					}
-				}
-				
-			}
-dcmout:
-			free = free >> 1;
-			xauto = xauto >> 1;
-		}
-
-
-		if(fnderr)db_dumppca(i);							/* Print if error */
-
-		pteg = pteg + 64;									/* Go to the next one */
-		if(s4bit) pteg = pteg + 64;							/* Hagfish? */
-		pca = pca - 4;										/* Go to the next one */
-
-
-	}
-}
-
-/*
- *		Displays all of the kmods in the system.
- *
-  *		dp
- */
-void
-db_display_kmod(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-		char *modif)
-{
-	kmod_info_t	*kmd;
-	unsigned int strt, end;
-	
-	kmd = kmod;							/* Start at the start */
-	
-	db_printf("info      addr      start    - end       name ver\n");
-
-	while(kmd) {						/* Dump 'em all */
-		strt = (unsigned int)kmd->address + kmd->hdr_size;	/* Get start of kmod text */
-		end = (unsigned int)kmd->address + kmd->size;			/* Get end of kmod */
-		db_printf("%08X  %08X  %08X - %08X: %s, %s\n", kmd, kmd->address, strt, end, 
-			kmd->name, kmd->version);
-		kmd = kmd->next;				/* Step to it */
-	}
-}
-
-/*
- *		Displays stuff
- *
-  *		gs
- */
-unsigned char xxgpo[36] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-
-void
-db_gsnoop(db_expr_t addr, boolean_t have_addr, db_expr_t count, char *modif)
-{
-	int i, j;
-	unsigned char *gp, gpn[36];
-#define ngpr 34
-	
-	gp = (unsigned char *)0x8000005C;
-	
-	for(i = 0; i < ngpr; i++) gpn[i] = gp[i];	/* Copy 'em */
-	
-	for(i = 0; i < ngpr; i++) {
-		db_printf("%02X ", gpn[i]);
-	}
-	db_printf("\n");
-	
-	for(i = 0; i < ngpr; i++) {
-		if(gpn[i] != xxgpo[i]) db_printf("^^ ");
-		else  db_printf("   ");
-	}
-	db_printf("\n");
-	
-	for(i = 0; i < ngpr; i++) xxgpo[i] = gpn[i];	/* Save 'em */
-}
-
-
-void Dumbo(void);
-void Dumbo(void){
-}
diff --git a/osfmk/ppc/db_low_trace.h b/osfmk/ppc/db_low_trace.h
deleted file mode 100644
index efc3faedb..000000000
--- a/osfmk/ppc/db_low_trace.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_FREE_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-#ifndef	_DDB_DB_LTR_H_
-#define	_DDB_DB_LTR_H_
-
-#include <machine/db_machdep.h>
-#include <kern/task.h>
-
-/*
- * Prototypes for functions exported by this module.
- */
-
-void db_list_pmap(db_expr_t, boolean_t, db_expr_t, char *);
-void db_low_trace(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_long(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_char(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_real(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_virtual(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_mappings(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_hash(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_pmap(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_iokit(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_save(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_xregs(db_expr_t, boolean_t, db_expr_t, char *);
-void db_display_kmod(db_expr_t, boolean_t, db_expr_t, char *);
-void db_gsnoop(db_expr_t, boolean_t, db_expr_t count, char *);
-void db_check_mappings(db_expr_t, boolean_t, db_expr_t, char *);
-void db_check_pmaps(db_expr_t, boolean_t, db_expr_t, char *);
-
-#endif	/* !_DDB_DB_LTR_H_ */
diff --git a/osfmk/ppc/db_machdep.h b/osfmk/ppc/db_machdep.h
deleted file mode 100644
index cb9162c4e..000000000
--- a/osfmk/ppc/db_machdep.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-
-#ifndef	_PPC_DB_MACHDEP_H_
-#define	_PPC_DB_MACHDEP_H_
-
-/*
- * Machine-dependent defines for new kernel debugger.
- */
-
-#include <kern/kern_types.h>
-#include <mach/ppc/vm_types.h>
-#include <mach/ppc/vm_param.h>
-#include <kern/thread.h>
-#include <ppc/trap.h>
-#include <ppc/proc_reg.h>
-#include <ppc/savearea.h>
-
-typedef	addr64_t db_addr_t;	/* address - unsigned */
-typedef	uint64_t db_expr_t;	/* expression - signed???  try unsigned */
-
-typedef struct savearea db_regs_t;
-extern db_regs_t	ddb_regs;	/* register state */
-#define	DDB_REGS	(&ddb_regs)
-extern int	db_active;	/* ddb is active */
-
-#define	PC_REGS(regs)	((db_addr_t)(regs)->save_srr0)
-
-#define	BKPT_INST	0x7c810808	/* breakpoint instruction */
-#define	BKPT_SIZE	(4)		/* size of breakpoint inst */
-#define	BKPT_SET(inst)	(BKPT_INST)
-
-#define db_clear_single_step(regs)	((regs)->save_srr1 &= ~MASK(MSR_SE))
-#define db_set_single_step(regs)	((regs)->save_srr1 |= MASK(MSR_SE))
-
-#define	IS_BREAKPOINT_TRAP(type, code)	(FALSE)
-#define IS_WATCHPOINT_TRAP(type, code)	(FALSE)
-
-#define	inst_trap_return(ins)	(FALSE)
-#define	inst_return(ins)	(FALSE)
-#define	inst_call(ins)		(FALSE)
-
-int db_inst_load(unsigned long);
-int db_inst_store(unsigned long);
-
-/* access capability and access macros */
-
-#define DB_ACCESS_LEVEL	DB_ACCESS_ANY	/* any space */
-#define DB_CHECK_ACCESS(addr,size,task)				\
-	db_check_access(addr,size,task)
-#define DB_PHYS_EQ(task1,addr1,task2,addr2)			\
-	db_phys_eq(task1,addr1,task2,addr2)
-#define DB_VALID_KERN_ADDR(addr)				\
-	((addr) >= VM_MIN_KERNEL_ADDRESS && 			\
-	 (addr) < vm_last_addr)
-#define DB_VALID_ADDRESS(addr,user)				\
-	((!(user) && DB_VALID_KERN_ADDR(addr)) ||		\
-	 ((user) && (addr) < VM_MAX_ADDRESS))
-
-/*
- * Given pointer to savearea, determine if it represents
- * a thread executing a) in user space, b) in the kernel, or c)
- * in a kernel-loaded task.  Return true for cases a) and c).
- */
-#define IS_USER_TRAP(regs)	\
-     (USER_MODE(regs->save_srr1))
-
-extern boolean_t	db_check_access(
-				vm_offset_t	addr,
-				int		size,
-				task_t		task);
-extern boolean_t	db_phys_eq(
-				task_t		task1,
-				vm_offset_t	addr1,
-				task_t		task2,
-				vm_offset_t	addr2);
-extern db_addr_t	db_disasm(
-				db_addr_t	loc,
-				boolean_t	altfmt,
-				task_t		task);
-extern void		db_read_bytes(
-				vm_offset_t	addr,
-				int		size,
-				char		*data,
-				task_t		task);
-extern void		db_write_bytes(
-				vm_offset_t	addr,
-				int		size,
-				char		*data,
-				task_t		task);
-extern void		db_stack_trace_cmd(
-				db_expr_t	addr,
-				boolean_t	have_addr,
-				db_expr_t	count,
-				char		*modif);
-extern void		db_reboot(
-				db_expr_t	addr,
-				boolean_t	have_addr,
-				db_expr_t	count,
-				char		*modif);
-
-/* macros for printing OS server dependent task name */
-
-#define DB_TASK_NAME(task)	db_task_name(task)
-#define DB_TASK_NAME_TITLE	"COMMAND                                "
-#define DB_TASK_NAME_LEN	39
-#define DB_NULL_TASK_NAME	"?                      "
-
-extern void		db_task_name(
-				task_t			task);
-
-/* macro for checking if a thread has used floating-point */
-
-#define db_act_fp_used(act)	(FALSE)
-
-extern void		kdb_trap(
-				int			type,
-				struct savearea	*regs);
-extern boolean_t	db_trap_from_asm(
-				struct savearea *regs);
-extern void		kdb_on(
-				int			cpu);
-extern void		cnpollc(
-				boolean_t		on);
-
-extern boolean_t	db_phys_cmp(
-				vm_offset_t, 
-				vm_offset_t, 
-				vm_size_t);
-
-#endif	/* _PPC_DB_MACHDEP_H_ */
diff --git a/osfmk/ppc/db_trace.c b/osfmk/ppc/db_trace.c
deleted file mode 100644
index 601378162..000000000
--- a/osfmk/ppc/db_trace.c
+++ /dev/null
@@ -1,1122 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <string.h>
-
-#include <mach/boolean.h>
-#include <mach/machine.h>
-
-#include <vm/vm_map.h>
-
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <kern/task.h>
-
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-
-#include <machine/asm.h>
-#include <machine/db_machdep.h>
-#include <machine/setjmp.h>
-
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_task_thread.h>
-#include <ddb/db_output.h>
-
-extern jmp_buf_t *db_recover;
-
-struct savearea ddb_null_kregs;
-
-extern vm_offset_t vm_min_inks_addr;	/* set by db_clone_symtabXXX */
-
-#define DB_NUMARGS_MAX	5
-
-#define	INFIXEDSTACK(va)	0							\
-
-#define INKERNELSTACK(va, th) 1
-
-struct db_ppc_frame {
-	struct db_ppc_frame	*f_frame;
-	int			pad1;
-	uint32_t	f_retaddr;
-	int			pad3;
-	int			pad4;
-	int			pad5;
-	uint32_t	f_arg[DB_NUMARGS_MAX];
-};
-
-#define	TRAP		1
-#define	INTERRUPT	2
-#define SYSCALL		3
-
-db_addr_t	db_user_trap_symbol_value = 0;
-db_addr_t	db_kernel_trap_symbol_value = 0;
-db_addr_t	db_interrupt_symbol_value = 0;
-db_addr_t	db_return_to_iret_symbol_value = 0;
-db_addr_t	db_syscall_symbol_value = 0;
-boolean_t	db_trace_symbols_found = FALSE;
-
-static int db_ppc_reg_value(
-			struct db_variable	* vp,
-			db_expr_t		* val,
-			int			flag,
-			db_var_aux_param_t	ap);
-static void db_find_trace_symbols(void);
-static int db_numargs(
-			struct db_ppc_frame	*fp,
-			task_t			task);
-static boolean_t db_find_arg(
-			struct db_ppc_frame	*frame,
-			db_addr_t		calleepc,
-			task_t			task,
-			int			narg,
-			db_addr_t		*arg);
-static void db_nextframe(
-			struct db_ppc_frame	**lfp,
-			struct db_ppc_frame	**fp,
-			db_addr_t		*ip,
-			int			frame_type,
-			thread_act_t		thr_act,
-			db_addr_t		linkpc);
-
-/*
- * Machine register set.
- */
-struct db_variable db_regs[] = {
-	/* XXX "pc" is an alias to "srr0"... */
-	{
-		.name = "pc",
-		.valuep = &ddb_regs.save_srr0,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "srr0",
-		.valuep = &ddb_regs.save_srr0,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "srr1",
-		.valuep = &ddb_regs.save_srr1,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r0",
-		.valuep = &ddb_regs.save_r0,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r1",
-		.valuep = &ddb_regs.save_r1,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r2",
-		.valuep = &ddb_regs.save_r2,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r3",
-		.valuep = &ddb_regs.save_r3,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r4",
-		.valuep = &ddb_regs.save_r4,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r5",
-		.valuep = &ddb_regs.save_r5,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r6",
-		.valuep = &ddb_regs.save_r6,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r7",
-		.valuep = &ddb_regs.save_r7,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r8",
-		.valuep = &ddb_regs.save_r8,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r9",
-		.valuep = &ddb_regs.save_r9,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r10",
-		.valuep = &ddb_regs.save_r10,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r11",
-		.valuep = &ddb_regs.save_r11,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r12",
-		.valuep = &ddb_regs.save_r12,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r13",
-		.valuep = &ddb_regs.save_r13,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r14",
-		.valuep = &ddb_regs.save_r14,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r15",
-		.valuep = &ddb_regs.save_r15,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r16",
-		.valuep = &ddb_regs.save_r16,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r17",
-		.valuep = &ddb_regs.save_r17,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r18",
-		.valuep = &ddb_regs.save_r18,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r19",
-		.valuep = &ddb_regs.save_r19,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r20",
-		.valuep = &ddb_regs.save_r20,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r21",
-		.valuep = &ddb_regs.save_r21,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r22",
-		.valuep = &ddb_regs.save_r22,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r23",
-		.valuep = &ddb_regs.save_r23,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r24",
-		.valuep = &ddb_regs.save_r24,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r25",
-		.valuep = &ddb_regs.save_r25,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r26",
-		.valuep = &ddb_regs.save_r26,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r27",
-		.valuep = &ddb_regs.save_r27,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r28",
-		.valuep = &ddb_regs.save_r28,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r29",
-		.valuep = &ddb_regs.save_r29,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r30",
-		.valuep = &ddb_regs.save_r30,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "r31",
-		.valuep = &ddb_regs.save_r31,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "cr",
-		.valuep = (db_expr_t *)&ddb_regs.save_cr,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "xer",
-		.valuep = &ddb_regs.save_xer,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "lr",
-		.valuep = &ddb_regs.save_lr,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-	{
-		.name = "ctr",
-		.valuep = &ddb_regs.save_ctr,
-		.fcn = db_ppc_reg_value,
-		.min_level = 0,
-		.max_level = 0,
-		.low = 0,
-		.high = 0,
-		.hidden_level = TRUE,
-	},
-};
-struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
-
-int
-db_ppc_reg_value(
-	struct	db_variable	*vp,
-	db_expr_t		*valuep,
-	int			flag,
-	db_var_aux_param_t	ap)
-{
-	db_expr_t *dp = 0;
-	db_expr_t null_reg = 0;
-	uint32_t *dp32;
-	thread_act_t thr_act = ap->thr_act;
-	unsigned int cpu;
-
-	if (db_option(ap->modif, 'u')) {
-		if (thr_act == THR_ACT_NULL) {
-			if ((thr_act = current_thread()) == THR_ACT_NULL)
-				db_error("no user registers\n");
-		}
-		if (thr_act == current_thread()) {
-			if (IS_USER_TRAP((&ddb_regs))) dp = vp->valuep;
-			else if (INFIXEDSTACK(ddb_regs.save_r1))
-				db_error("cannot get/set user registers in nested interrupt\n");
-		}
-	} 
-	else {
-		if (thr_act == THR_ACT_NULL || thr_act == current_thread()) {
-			dp = vp->valuep;
-		} 
-		else {
-			if (thr_act->kernel_stack) {
-				for (cpu = 0; cpu < real_ncpus; cpu++) {
-					if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING &&
-							cpu_to_processor(cpu)->active_thread == thr_act &&
-							PerProcTable[cpu].ppe_vaddr->db_saved_state) {
-
-						dp = (db_expr_t)(((uint32_t)(PerProcTable[cpu].ppe_vaddr->db_saved_state)) +
-								(((uint32_t) vp->valuep) -
-								 (uint32_t) &ddb_regs));
-						break;
-					}
-				}
-
-				if (dp == 0)
-					dp = &null_reg;
-			} 
-			else {
-				/* only PC is valid */
-				if (vp->valuep == &ddb_regs.save_srr0)
-					dp = (db_expr_t *)&thr_act->continuation;
-				else
-					dp = &null_reg;
-			}
-		}
-	}
-	if (dp == 0) {
-		if (!db_option(ap->modif, 'u')) {
-			for (cpu = 0; cpu < real_ncpus; cpu++) {
-				if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING &&
-						cpu_to_processor(cpu)->active_thread == thr_act &&
-						PerProcTable[cpu].ppe_vaddr->db_saved_state) {
-					dp = (int *) (((int)(PerProcTable[cpu].ppe_vaddr->db_saved_state)) +
-							(((int) vp->valuep) - (int) &ddb_regs));
-					break;
-				}
-			}
-		}
-		if (dp == 0) {
-			if (!thr_act || thr_act->machine.pcb == 0)
-				db_error("no pcb\n");
-			dp = (int *)((int)thr_act->machine.pcb + ((int)vp->valuep - (int)&ddb_regs));
-		}
-	}
-
-	if(vp->valuep == (db_expr_t *)&ddb_regs.save_cr) {	/* Is this the CR we are doing? */
-		dp32 = (uint32_t *)dp;						/* Make this easier */
-		if (flag == DB_VAR_SET)
-			*dp32 = *valuep;
-		else
-			*valuep = *dp32;
-	}
-	else {											/* Normal 64-bit registers */
-		if (flag == DB_VAR_SET)
-			*dp = *valuep;
-		else
-			*valuep = *(unsigned long long *)dp;
-	}
-
-	return 0;
-}
-
-
-void
-db_find_trace_symbols(void)
-{
-	db_expr_t	value;
-	boolean_t	found_some;
-
-	found_some = FALSE;
-	if (db_value_of_name(CC_SYM_PREFIX "thandler", &value)) {
-		db_user_trap_symbol_value = (db_addr_t) value;
-		found_some = TRUE;
-	}
-	if (db_value_of_name(CC_SYM_PREFIX "thandler", &value)) {
-		db_kernel_trap_symbol_value = (db_addr_t) value;
-		found_some = TRUE;
-	}
-	if (db_value_of_name(CC_SYM_PREFIX "ihandler", &value)) {
-		db_interrupt_symbol_value = (db_addr_t) value;
-		found_some = TRUE;
-	}
-#if 0
-	if (db_value_of_name(CC_SYM_PREFIX "return_to_iret", &value)) {
-		db_return_to_iret_symbol_value = (db_addr_t) value;
-		found_some = TRUE;
-	}
-#endif
-	if (db_value_of_name(CC_SYM_PREFIX "thandler", &value)) {
-		db_syscall_symbol_value = (db_addr_t) value;
-		found_some = TRUE;
-	}
-	if (found_some) 
-		db_trace_symbols_found = TRUE;
-}
-
-int
-db_numargs(
-	struct db_ppc_frame	*fp,
-	task_t			task)
-{
-	return DB_NUMARGS_MAX;
-}
-
-boolean_t
-db_find_arg(
-	struct db_ppc_frame 	*fp,
-	db_addr_t		calleepc,
-	task_t			task,
-	int			narg,
-	db_addr_t		*arg)
-{
-	db_addr_t	argp;
-	db_addr_t	calleep;
-	db_addr_t   	offset;
-	int		i;
-	int		inst;
-	char 		*name;
-
-#if	0
-	db_find_task_sym_and_offset(calleepc, &name, &offset, task);
-	calleep = calleepc-offset;
-
-	for (i = 0; calleep < calleepc; i++, calleep++) {
-		if (!DB_CHECK_ACCESS((int) calleep, 4, task)) {
-			continue;
-		}
-		inst = db_get_task_value(calleep, 4, FALSE, task);
-		if ((inst & 0xffff0000) == (0x907f0000 + (narg << 21)) ||
-				(inst & 0xffff0000) == (0x90610000 + (narg << 21))) {
-			argp = (db_addr_t) &(fp->f_arg[narg]);
-			*arg = argp;
-			return TRUE;
-		}
-	}
-#endif
-	return FALSE;
-}
-
-extern int	TRAP_TYPES;
-/* 
- * Figure out the next frame up in the call stack.  
- * For trap(), we print the address of the faulting instruction and 
- *   proceed with the calling frame.  We return the ip that faulted.
- *   If the trap was caused by jumping through a bogus pointer, then
- *   the next line in the backtrace will list some random function as 
- *   being called.  It should get the argument list correct, though.  
- *   It might be possible to dig out from the next frame up the name
- *   of the function that faulted, but that could get hairy.
- */
-void
-db_nextframe(
-	struct db_ppc_frame	**lfp,		/* in/out */
-	struct db_ppc_frame	**fp,		/* in/out */
-	db_addr_t		*ip,		/* out */
-	int			frame_type,	/* in */
-	thread_act_t		thr_act,
-	db_addr_t		linkpc)		/* in */
-{
-	struct savearea *saved_regs;
-
-	task_t task = (thr_act != THR_ACT_NULL)? thr_act->task: TASK_NULL;
-
-	switch(frame_type) {
-	case TRAP:
-		db_printf(">>>>> trap <<<<<\n");
-		goto miss_frame;
-		break;
-	case INTERRUPT:
-		if (*lfp == 0) {
-			db_printf(">>>>> interrupt <<<<<\n");
-			goto miss_frame;
-		}
-		db_printf(">>>>> interrupt <<<<<\n");
-		goto miss_frame;
-		break;
-	case SYSCALL:
-		if (thr_act != THR_ACT_NULL && thr_act->machine.pcb) {
-			*ip = (db_addr_t) thr_act->machine.pcb->save_srr0;
-			*fp = (struct db_ppc_frame *) (thr_act->machine.pcb->save_r1);
-			break;
-		}
-		/* falling down for unknown case */
-	default:
-miss_frame:
-		if(!pmap_find_phys(kernel_pmap, (addr64_t)*fp)) {	/* Check if this is valid */
-			db_printf("Frame not mapped %08X\n",*fp);		/* Say not found */
-			*fp = 0;										/* Show not found */
-			break;											/* Out of here */
-		}
-
-		if ((*fp)->f_frame)
-			*ip = (db_addr_t)
-				db_get_task_value((int)&(*fp)->f_frame->f_retaddr,
-						4, FALSE, task);
-		else
-			*ip = (db_addr_t) 
-				db_get_task_value((int)&(*fp)->f_retaddr,
-						4, FALSE, task);
-
-		*lfp = *fp;
-		*fp = (struct db_ppc_frame *)
-			db_get_task_value((int)&(*fp)->f_frame, 4, FALSE, task);
-		break;
-	}
-}
-
-void
-db_stack_trace_cmd(
-	db_expr_t	addr,
-	boolean_t	have_addr,
-	db_expr_t	count,
-	char		*modif)
-{
-	struct db_ppc_frame *frame, *lastframe;
-	db_addr_t	callpc, linkpc, lastcallpc;
-	int		frame_type;
-	boolean_t	kernel_only = TRUE;
-	boolean_t	trace_thread = FALSE;
-	boolean_t	trace_all_threads = FALSE;
-	int		thcount = 0;
-	char		*filename;
-	int		linenum;
-	task_t		task;
-	thread_act_t	th, top_act;
-	int		user_frame;
-	int		frame_count;
-	jmp_buf_t	*prev;
-	jmp_buf_t	db_jmp_buf;
-	queue_entry_t	act_list;
-
-	if (!db_trace_symbols_found)
-		db_find_trace_symbols();
-	{
-		char *cp = modif;
-		char c;
-
-		while ((c = *cp++) != 0) {
-			if (c == 't')
-				trace_thread = TRUE;
-			if (c == 'T') {
-				trace_all_threads = TRUE;
-				trace_thread = TRUE;
-			}
-			if (c == 'u')
-				kernel_only = FALSE;
-		}
-	}
-
-	if (trace_all_threads) {
-		if (!have_addr && !trace_thread) {
-			have_addr = TRUE;
-			trace_thread = TRUE;
-			act_list = &(current_task()->threads);
-			addr = (db_expr_t) queue_first(act_list);
-		} 
-		else if (trace_thread) {
-			if (have_addr) {
-				if (!db_check_act_address_valid((thread_act_t)addr)) {
-					if (db_lookup_task((task_t)addr) == -1)
-						return;
-					act_list = &(((task_t)addr)->threads);
-					addr = (db_expr_t) queue_first(act_list);
-				} 
-				else {
-					act_list = &(((thread_act_t)addr)->task->threads);
-					thcount = db_lookup_task_act(((thread_act_t)addr)->task,
-							(thread_act_t)addr);
-				}
-			} 
-			else {
-				th = db_default_act;
-				if (th == THR_ACT_NULL)
-					th = current_thread();
-				if (th == THR_ACT_NULL) {
-					db_printf("no active thr_act\n");
-					return;
-				}
-				have_addr = TRUE;
-				act_list = &th->task->threads;
-				addr = (db_expr_t) queue_first(act_list);
-			}
-		}
-	}
-
-	if (count == -1)
-		count = 65535;
-
-next_thread:
-	top_act = THR_ACT_NULL;
-
-	user_frame = 0;
-	frame_count = count;
-
-	if (!have_addr && !trace_thread) {
-		frame = (struct db_ppc_frame *)(ddb_regs.save_r1);
-		callpc = (db_addr_t)ddb_regs.save_srr0;
-		linkpc = (db_addr_t)ddb_regs.save_lr;
-		th = current_thread();
-		task = (th != THR_ACT_NULL)? th->task: TASK_NULL;
-	} 
-	else if (trace_thread) {
-		if (have_addr) {
-			th = (thread_act_t) addr;
-			if (!db_check_act_address_valid(th))
-				return;
-		} 
-		else {
-			th = db_default_act;
-			if (th == THR_ACT_NULL)
-				th = current_thread();
-			if (th == THR_ACT_NULL) {
-				db_printf("no active thread\n");
-				return;
-			}
-		}
-		if (trace_all_threads)
-			db_printf("---------- Thread 0x%x (#%d of %d) ----------\n",
-					addr, thcount, th->task->thread_count);
-
-next_activation:
-		user_frame = 0;
-
-		task = th->task;
-		if (th == current_thread()) {
-			frame = (struct db_ppc_frame *)(ddb_regs.save_r1);
-			callpc = (db_addr_t)ddb_regs.save_srr0;
-			linkpc = (db_addr_t)ddb_regs.save_lr;
-		} 
-		else {
-			if (th->machine.pcb == 0) {
-				db_printf("thread has no pcb\n");
-				goto thread_done;
-			}
-			if (th->kernel_stack == 0) {
-				struct savearea *pss = th->machine.pcb;
-
-				db_printf("Continuation ");
-				db_task_printsym((db_expr_t)th->continuation,
-						DB_STGY_PROC, task);
-				db_printf("\n");
-				frame = (struct db_ppc_frame *) (pss->save_r1);
-				callpc = (db_addr_t) (pss->save_srr0);
-				linkpc = (db_addr_t) (pss->save_lr);
-			} 
-			else {
-				int cpu;
-
-				for (cpu = 0; cpu < real_ncpus; cpu++) {
-					if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING &&
-							cpu_to_processor(cpu)->active_thread == th &&
-							PerProcTable[cpu].ppe_vaddr->db_saved_state) {
-						break;
-					}
-				}
-				if (top_act != THR_ACT_NULL) {
-					/*
-					 * Trying to get the backtrace of an activation
-					 * which is not the top_most one in the RPC chain:
-					 * use the activation's pcb.
-					 */
-					struct savearea *pss;
-
-					pss = th->machine.pcb;
-					frame = (struct db_ppc_frame *) (pss->save_r1);
-					callpc = (db_addr_t) (pss->save_srr0);
-					linkpc = (db_addr_t) (pss->save_lr);
-				} else {
-					if (cpu == real_ncpus) {
-						struct savearea *iks;
-						int r;
-
-						iks = th->machine.pcb;
-						prev = db_recover;
-						if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-							frame = (struct db_ppc_frame *) (iks->save_r1);
-							callpc = (db_addr_t) (iks->save_lr);
-							linkpc = 0;
-						} else {
-							/*
-							 * The kernel stack has probably been
-							 * paged out (swapped out activation).
-							 */
-							db_recover = prev;
-							if (r == 2)	/* 'q' from db_more() */
-								db_error(0);
-							db_printf("<kernel stack (0x%x) error "
-									"(probably swapped out)>\n",
-									iks);
-							goto next_act;
-						}
-						db_recover = prev;
-					} else {
-						db_printf(">>>>> active on cpu %d <<<<<\n",
-								cpu);
-						frame = (struct db_ppc_frame *)
-							(PerProcTable[cpu].ppe_vaddr->db_saved_state->save_r1);
-						callpc = (db_addr_t) PerProcTable[cpu].ppe_vaddr->db_saved_state->save_srr0;
-						linkpc = (db_addr_t) PerProcTable[cpu].ppe_vaddr->db_saved_state->save_lr;
-					}
-				}
-			}
-		}
-	} else {
-		frame = (struct db_ppc_frame *)addr;
-		th = (db_default_act)? db_default_act: current_thread();
-		task = (th != THR_ACT_NULL)? th->task: TASK_NULL;
-		if (frame->f_frame) {
-			callpc = (db_addr_t)db_get_task_value
-				((int)&frame->f_frame->f_retaddr,
-				 4, FALSE, (user_frame) ? task : 0);
-			callpc = callpc-sizeof(callpc);
-		} else
-			callpc =0;
-		linkpc = 0;
-	}
-
-	if (!INKERNELSTACK((unsigned)frame, th)) {
-		db_printf(">>>>> user space <<<<<\n");
-		if (kernel_only)
-			goto thread_done;
-		user_frame++;
-	}
-
-	lastframe = 0;
-	lastcallpc = (db_addr_t) 0;
-	while (frame_count-- && frame != 0) {
-		int narg = DB_NUMARGS_MAX;
-		int arg;	
-		char *	name;
-		db_expr_t	offset;
-		db_addr_t call_func = 0;
-		int r;
-		db_addr_t	off;
-
-		db_symbol_values(NULL,
-				db_search_task_symbol_and_line(
-					callpc, DB_STGY_XTRN, &offset, &filename,
-					&linenum, (user_frame) ? task : 0, &narg),
-				&name, (db_expr_t *)&call_func);
-		if ( name == NULL) {
-			db_find_task_sym_and_offset(callpc, 
-					&name, &off, (user_frame) ? task : 0);
-			offset = (db_expr_t) off;
-		}
-
-		if (user_frame == 0) {
-			if (call_func &&
-					(call_func == db_user_trap_symbol_value ||
-					 call_func == db_kernel_trap_symbol_value)) {
-				frame_type = TRAP;
-				narg = 1;
-			} else if (call_func &&
-					call_func == db_interrupt_symbol_value) {
-				frame_type = INTERRUPT;
-				goto next_frame;
-			} else if (call_func &&
-					call_func == db_syscall_symbol_value) {
-				frame_type = SYSCALL;
-				goto next_frame;
-			} else {
-				frame_type = 0;
-				prev = db_recover;
-				if ((r = _setjmp(db_recover = &db_jmp_buf)) 
-						== 0) {
-					if (narg < 0)
-						narg = db_numargs(frame,
-								(user_frame) ? task : 0);
-					db_recover = prev;
-				} else {
-					db_recover = prev;
-					goto next_act;
-				}
-			}
-		} else {
-			frame_type = 0;
-			prev = db_recover;
-			if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-				if (narg < 0)
-					narg = db_numargs(frame,
-							(user_frame) ? task : 0);
-				db_recover = prev;
-			} else {
-				db_recover = prev;
-				goto next_act;
-			}
-		}
-
-		if (name == 0 || offset > db_maxoff) {
-			db_printf("[%08X]0x%08X(", frame, callpc);
-		} else {
-			db_printf("[%08X]%s", frame, name);
-			if (offset)
-				db_printf("+%llx", offset);
-			db_printf("(");
-		};
-
-		narg = db_numargs(frame, (user_frame) ? task : 0);
-
-		for (arg = 0; arg < narg; arg++) {
-			db_addr_t	argp;
-			int value;
-			boolean_t found;
-
-			prev = db_recover;
-			if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-				found = FALSE;
-				if (lastframe) 
-					found = db_find_arg(frame, lastframe->f_retaddr,
-							(user_frame) ? task : 0, arg, &argp);
-				if (found)
-					value = db_get_task_value(argp, 4, FALSE,
-							(user_frame) ? task : 0);
-			} else {
-				db_recover = prev;
-				if (r == 2)	/* 'q' from db_more() */
-					db_error(0);
-				db_printf("... <stack error>)");
-				db_printf("\n");
-				goto next_act;
-			}
-			db_recover = prev;
-			if (found)
-				db_printf("%08X", value);
-			else
-				db_printf("??");	
-			argp = argp + sizeof(argp);
-			if (arg < narg-1)
-				db_printf(",");
-		}
-		if (arg != narg)
-			db_printf("...");
-		db_printf(")");
-		db_printf("\n");
-
-next_frame:
-		lastcallpc = callpc;
-		prev = db_recover;
-		if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) {
-			db_nextframe(&lastframe, &frame, &callpc, frame_type,
-					(user_frame) ? th : THR_ACT_NULL, linkpc);
-			callpc = callpc-sizeof(callpc);
-			db_recover = prev;
-		} else {
-			db_recover = prev;
-			frame = 0;
-		}
-		linkpc = 0;
-
-		if (frame == 0) {
-next_act:
-			/* end of chain */
-			break;
-		}
-		if (!INKERNELSTACK(lastframe, th) ||
-				!INKERNELSTACK((unsigned)frame, th))
-			user_frame++;
-		if (user_frame == 1) {
-			db_printf(">>>>> user space <<<<<\n");
-			if (kernel_only)
-				break;
-		}
-
-		if (frame <= lastframe) {
-			if ((INKERNELSTACK(lastframe, th) && !INKERNELSTACK(frame, th)))
-				continue;
-			db_printf("Bad frame pointer: 0x%x\n", frame);
-			break;
-		}
-	}
-
-thread_done:
-	if (trace_all_threads) {
-		if (top_act != THR_ACT_NULL)
-			th = top_act;
-		th = (thread_act_t) queue_next(&th->task_threads);
-		if (! queue_end(act_list, (queue_entry_t) th)) {
-			db_printf("\n");
-			addr = (db_expr_t) th;
-			thcount++;
-			goto next_thread;
-		}
-	}
-}
diff --git a/osfmk/ppc/endian.h b/osfmk/ppc/endian.h
deleted file mode 100644
index 397b09de8..000000000
--- a/osfmk/ppc/endian.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#ifndef _MACHINE_ENDIAN_H_
-#define _MACHINE_ENDIAN_H_
-
-/*
- * Definitions for byte order,
- * according to byte significance from low address to high.
- */
-#define	LITTLE_ENDIAN	1234	/* least-significant byte first (vax) */
-#define	BIG_ENDIAN	4321	/* most-significant byte first (IBM, net) */
-#define	PDP_ENDIAN	3412	/* LSB first in word, MSW first in long (pdp) */
-
-#ifdef __BIG_ENDIAN__			/* Predefined by compiler */
-#define	BYTE_ORDER	BIG_ENDIAN	/* byte order we use on ppc */
-#define ENDIAN		BIG
-#else
-#error code has not been ported to little endian targets yet
-#endif
-
-/*
- * Macros for network/external number representation conversion.
- */
-#if BYTE_ORDER == BIG_ENDIAN && !defined(lint)
-#define	ntohl(x)	(x)
-#define	ntohs(x)	(x)
-#define	htonl(x)	(x)
-#define	htons(x)	(x)
-
-static __inline__ unsigned int byte_reverse_word(unsigned int word);
-static __inline__ unsigned int byte_reverse_word(unsigned int word) {
-	unsigned int result;
-	__asm__ volatile("lwbrx	%0, 0, %1" : "=r" (result) : "r" (&word));
-	return result;
-}
-
-/* The above function is commutative, so we can use it for
- * translations in both directions (to/from little endianness)
- * Note that htolx and ltohx are probably identical, they are
- * included for completeness.
- */
-#define htoll(x)  byte_reverse_word(x)
-#define htols(x)  (byte_reverse_word(x) >> 16)
-#define ltohl(x)  htoll(x)
-#define ltohs(x)  htols(x)
-
-#define htobl(x) (x)
-#define htobs(x) (x)
-#define btohl(x) (x)
-#define btohs(x) (x)
-
-#else
-unsigned short	ntohs(), htons();
-unsigned long	ntohl(), htonl();
-#endif
-
-/* This defines the order of elements in a bitfield,
- * it is principally used by the SCSI subsystem in
- * the definitions of mapped registers
- */
-#define BYTE_MSF 1
-
-#endif /* _MACHINE_ENDIAN_H_ */
diff --git a/osfmk/ppc/etimer.c b/osfmk/ppc/etimer.c
deleted file mode 100644
index dca034b91..000000000
--- a/osfmk/ppc/etimer.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/*
- *	File:		etimer.c
- *	Purpose:	Routines for handling the machine independent
- *				event timer.
- */
-
-#include <mach/mach_types.h>
-
-#include <kern/clock.h>
-#include <kern/thread.h>
-#include <kern/timer_queue.h>
-#include <kern/processor.h>
-#include <kern/macro_help.h>
-#include <kern/spl.h>
-#include <kern/etimer.h>
-#include <kern/pms.h>
-
-#include <machine/commpage.h>
-#include <machine/machine_routines.h>
-
-#include <sys/kdebug.h>
-#include <ppc/exception.h>
-
-/*
- * 	Event timer interrupt.
- *
- * XXX a drawback of this implementation is that events serviced earlier must not set deadlines
- *     that occur before the entire chain completes.
- *
- * XXX a better implementation would use a set of generic callouts and iterate over them
- */
-void
-etimer_intr(
-__unused int inuser,
-__unused uint64_t iaddr)
-{
-	uint64_t		abstime;
-	rtclock_timer_t		*mytimer;
-	struct per_proc_info	*pp;
-
-	pp = getPerProc();
-
-	mytimer = &pp->rtclock_timer;				/* Point to the event timer */
-
-	abstime = mach_absolute_time();				/* Get the time now */
-
-	/* is it time for power management state change? */	
-	if (pp->pms.pmsPop <= abstime) {
-	        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0);
-		pmsStep(1);					/* Yes, advance step */
-	        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-
-		abstime = mach_absolute_time();			/* Get the time again since we ran a bit */
-	}
-
-	/* has a pending clock timer expired? */
-	if (mytimer->deadline <= abstime) {			/* Have we expired the deadline? */
-		mytimer->has_expired = TRUE;			/* Remember that we popped */
-		mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
-		mytimer->has_expired = FALSE;
-	}
-
-	/* schedule our next deadline */
-	pp->rtcPop = EndOfAllTime;				/* any real deadline will be earlier */
-	etimer_resync_deadlines();
-}
-
-/*
- * Set the clock deadline.
- */
-void etimer_set_deadline(uint64_t deadline)
-{
-	rtclock_timer_t		*mytimer;
-	spl_t			s;
-	struct per_proc_info	*pp;
-
-	s = splclock();					/* no interruptions */
-	pp = getPerProc();
-
-	mytimer = &pp->rtclock_timer;			/* Point to the timer itself */
-	mytimer->deadline = deadline;			/* Set the new expiration time */
-
-	etimer_resync_deadlines();
-
-	splx(s);
-}
-
-
-/*
- * Re-evaluate the outstanding deadlines and select the most proximate.
- *
- * Should be called at splclock.
- */
-void
-etimer_resync_deadlines(void)
-{
-	uint64_t		deadline;
-	rtclock_timer_t		*mytimer;
-	spl_t			s = splclock();		/* No interruptions please */
-	struct per_proc_info	*pp;
-
-	pp = getPerProc();
-
-	deadline = ~0ULL;
-
-	/* if we have a clock timer set sooner, pop on that */
-	mytimer = &pp->rtclock_timer;			/* Point to the timer itself */
-	if (!mytimer->has_expired && mytimer->deadline > 0)
-		deadline = mytimer->deadline;
-
-	/* if we have a power management event coming up, how about that? */
-	if (pp->pms.pmsPop > 0 && pp->pms.pmsPop < deadline)
-		deadline = pp->pms.pmsPop;
-	
-
-	if (deadline > 0 && deadline <= pp->rtcPop) {
-		int     decr;
-		uint64_t now;
-
-		now = mach_absolute_time();
-		decr = setPop(deadline);
-
-		if (deadline < now)
-		        pp->rtcPop = now + decr;
-		else
-		        pp->rtcPop = deadline;
-
-		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0);
-	}
-	splx(s);
-}
-
-queue_t
-timer_queue_assign(
-	uint64_t		deadline)
-{
-	struct per_proc_info	*pp = getPerProc();
-	rtclock_timer_t			*timer;
-
-	if (pp->running) {
-		timer = &pp->rtclock_timer;
-
-		if (deadline < timer->deadline)
-			etimer_set_deadline(deadline);
-	}
-	else
-		timer = &PerProcTable[master_cpu].ppe_vaddr->rtclock_timer;
-
-	return (&timer->queue);
-}
-
-void
-timer_queue_cancel(
-	queue_t			queue,
-	uint64_t		deadline,
-	uint64_t		new_deadline)
-{
-	if (queue == &getPerProc()->rtclock_timer.queue) {
-		if (deadline < new_deadline)
-			etimer_set_deadline(new_deadline);
-	}
-}
diff --git a/osfmk/ppc/exception.h b/osfmk/ppc/exception.h
deleted file mode 100644
index 394b884e4..000000000
--- a/osfmk/ppc/exception.h
+++ /dev/null
@@ -1,693 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* Miscellaneous constants and structures used by the exception
- * handlers
- */
-
-#ifndef _PPC_EXCEPTION_H_
-#define _PPC_EXCEPTION_H_
-
-#include <ppc/savearea.h>
-
-#ifndef ASSEMBLER
-
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-
-#include <mach/machine/vm_types.h>
-#include <mach/boolean.h>
-#include <kern/ast.h>
-#include <kern/cpu_data.h>
-#include <kern/pms.h>
-#include <pexpert/pexpert.h>
-#include <IOKit/IOInterrupts.h>
-#include <ppc/machine_routines.h>
-#include <ppc/rtclock.h>
-
-/*	Per processor CPU features */
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct procFeatures {
-	unsigned int	Available;			/* 0x000 */
-#define pfFloat		0x80000000
-#define pfFloatb	0
-#define pfAltivec	0x40000000
-#define pfAltivecb	1
-#define pfAvJava	0x20000000
-#define pfAvJavab	2
-#define pfSMPcap	0x10000000
-#define pfSMPcapb	3
-#define pfCanSleep	0x08000000
-#define pfCanSleepb	4
-#define pfCanNap	0x04000000
-#define pfCanNapb	5
-#define pfCanDoze	0x02000000
-#define pfCanDozeb	6
-#define pfSlowNap	0x00400000
-#define pfSlowNapb	9
-#define pfNoMuMMCK	0x00200000
-#define pfNoMuMMCKb	10
-#define pfNoL2PFNap	0x00100000
-#define pfNoL2PFNapb	11
-#define pfSCOMFixUp	0x00080000
-#define pfSCOMFixUpb	12
-#define	pfHasDcba	0x00040000
-#define	pfHasDcbab	13
-#define	pfL1fa		0x00010000
-#define	pfL1fab		15
-#define pfL2		0x00008000
-#define pfL2b		16
-#define pfL2fa		0x00004000
-#define pfL2fab		17
-#define pfL2i		0x00002000
-#define pfL2ib		18
-#define pfLClck		0x00001000
-#define pfLClckb	19
-#define pfWillNap	0x00000800
-#define pfWillNapb	20
-#define pfNoMSRir	0x00000400
-#define pfNoMSRirb	21
-#define pfL3pdet	0x00000200
-#define pfL3pdetb	22
-#define	pf128Byte	0x00000080
-#define	pf128Byteb	24
-#define	pf32Byte	0x00000020
-#define	pf32Byteb	26
-#define	pf64Bit		0x00000010
-#define	pf64Bitb	27
-#define pfL3		0x00000004
-#define pfL3b		29
-#define pfL3fa		0x00000002
-#define pfL3fab		30
-#define pfValid		0x00000001
-#define pfValidb	31
-	unsigned short	rptdProc;			/* 0x004 */
-	unsigned short	lineSize;			/* 0x006 */
-	unsigned int	l1iSize;			/* 0x008 */
-	unsigned int	l1dSize;			/* 0x00C */
-	unsigned int	l2cr;				/* 0x010 */
-	unsigned int	l2Size;				/* 0x014 */
-	unsigned int	l3cr;				/* 0x018 */
-	unsigned int	l3Size;				/* 0x01C */
-	unsigned int	pfMSSCR0;			/* 0x020 */
-	unsigned int	pfMSSCR1;			/* 0x024 */
-	unsigned int	pfICTRL;			/* 0x028 */
-	unsigned int	pfLDSTCR;			/* 0x02C */
-	unsigned int	pfLDSTDB;			/* 0x030 */
-	unsigned int	pfMaxVAddr;			/* 0x034 */
-	unsigned int	pfMaxPAddr;			/* 0x038 */
-	unsigned int	pfPTEG;				/* 0x03C */
-	uint64_t		pfHID0;				/* 0x040 */
-	uint64_t		pfHID1;				/* 0x048 */
-	uint64_t		pfHID2;				/* 0x050 */
-	uint64_t		pfHID3;				/* 0x058 */
-	uint64_t		pfHID4;				/* 0x060 */
-	uint64_t		pfHID5;				/* 0x068 */
-	unsigned int	l2crOriginal;		/* 0x070 */
-	unsigned int	l3crOriginal;		/* 0x074 */
-	unsigned int	pfBootConfig;		/* 0x078 */
-	unsigned int	pfPowerModes;		/* 0x07C */
-#define pmDPLLVmin		0x00010000
-#define pmDPLLVminb		15
-#define pmType			0x000000FF
-#define pmPowerTune		0x00000003
-#define pmDFS			0x00000002
-#define pmDualPLL		0x00000001
-	unsigned int	pfPowerTune0;		/* 0x080 */
-	unsigned int	pfPowerTune1;		/* 0x084 */
-	unsigned int	rsrvd88[6];			/* 0x088 */
-};
-#pragma pack()
-
-typedef struct procFeatures procFeatures;
-
-
-/*
- *
- *		Various performance counters
- */
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct hwCtrs {	
-
-	unsigned int	hwInVains; 				/* In vain */
-	unsigned int	hwResets;				/* Reset */
-	unsigned int	hwMachineChecks;		/* Machine check */
-	unsigned int	hwDSIs; 				/* DSIs */
-	unsigned int	hwISIs; 				/* ISIs */
-	unsigned int	hwExternals; 			/* Externals */
-	unsigned int	hwAlignments; 			/* Alignment */
-	unsigned int	hwPrograms; 			/* Program */
-	unsigned int	hwFloatPointUnavailable;	/* Floating point */
-	unsigned int	hwDecrementers; 		/* Decrementer */
-	unsigned int	hwIOErrors; 			/* I/O error */
-	unsigned int	hwrsvd0; 				/* Reserved */
-	unsigned int	hwSystemCalls; 			/* System call */
-	unsigned int	hwTraces; 				/* Trace */
-	unsigned int	hwFloatingPointAssists; /* Floating point assist */
-	unsigned int	hwPerformanceMonitors; 	/* Performance monitor */
-	unsigned int	hwAltivecs; 			/* VMX */
-	unsigned int	hwrsvd1; 				/* Reserved */
-	unsigned int	hwrsvd2; 				/* Reserved */
-	unsigned int	hwrsvd3; 				/* Reserved */
-	unsigned int	hwInstBreakpoints; 		/* Instruction breakpoint */
-	unsigned int	hwSystemManagements; 	/* System management */
-	unsigned int	hwAltivecAssists; 		/* Altivec Assist */
-	unsigned int	hwThermal;				/* Thermals */
-	unsigned int	hwrsvd5; 				/* Reserved */
-	unsigned int	hwrsvd6; 				/* Reserved */
-	unsigned int	hwrsvd7; 				/* Reserved */
-	unsigned int	hwrsvd8;				/* Reserved */
-	unsigned int	hwrsvd9; 				/* Reserved */
-	unsigned int	hwrsvd10; 				/* Reserved */
-	unsigned int	hwrsvd11; 				/* Reserved */
-	unsigned int	hwrsvd12; 				/* Reserved */
-	unsigned int	hwrsvd13; 				/* Reserved */
-	unsigned int	hwTrace601;				/* Trace */
-	unsigned int	hwSIGPs; 				/* SIGP */
-	unsigned int	hwPreemptions; 			/* Preemption */
-	unsigned int	hwContextSwitchs;		/* Context switch */
-	unsigned int	hwShutdowns;			/* Shutdowns */
-	unsigned int	hwChokes;				/* System ABENDs */
-	unsigned int	hwDataSegments;			/* Data Segment Interruptions */
-	unsigned int	hwInstructionSegments;	/* Instruction Segment Interruptions */
-	unsigned int	hwSoftPatches;			/* Soft Patch interruptions */
-	unsigned int	hwMaintenances;			/* Maintenance interruptions */
-	unsigned int	hwInstrumentations;		/* Instrumentation interruptions */
-	unsigned int	hwrsvd14;				/* Reserved */
-	unsigned int 	hwhdec;					/* 0B4 Hypervisor decrementer */
-
-	unsigned int	hwspare0[11];			/* 0B8 Reserved */
-	unsigned int	hwspare0a;				/* 0E4 Reserved */
-	unsigned int	hwspare0b;				/* 0E8 Reserved */
-	unsigned int	hwspare0c;				/* 0EC Reserved */
-	unsigned int	hwspare0d;				/* 0F0 Reserved */
-	unsigned int	hwIgnored;				/* 0F4 Interruptions ignored */
-	unsigned int	hwRedrives;				/* 0F8 Number of redriven interrupts */
-	unsigned int	hwSteals;				/* 0FC Steals */
-/*											   100 */
-
-	unsigned int 	hwMckHang;				/* ? */
-	unsigned int 	hwMckSLBPE;				/* ? */
-	unsigned int 	hwMckTLBPE;				/* ? */
-	unsigned int 	hwMckERCPE;				/* ? */
-	unsigned int	hwMckL1DPE;				/* ? */
-	unsigned int	hwMckL1TPE;				/* ? */
-	unsigned int 	hwMckUE;				/* ? */
-	unsigned int 	hwMckIUE;				/* ? */
-	unsigned int 	hwMckIUEr;				/* ? */
-	unsigned int 	hwMckDUE;				/* ? */
-	unsigned int 	hwMckDTW;				/* ? */
-	unsigned int 	hwMckUnk;				/* ? */
-	unsigned int 	hwMckExt;				/* ? */
-	unsigned int 	hwMckICachePE;			/* ? */
-	unsigned int 	hwMckITagPE;			/* ? */
-	unsigned int 	hwMckIEratPE;			/* ? */
-	unsigned int 	hwMckDEratPE;			/* ? */
-	unsigned int	hwspare2[15];			/* Pad to next 128 bndry */
-/*											0x180 */
-
-	unsigned int	napStamp[2];			/* Time base when we napped */
-	unsigned int	napTotal[2];			/* Total nap time in ticks */
-	unsigned int	numSIGPast;				/* Number of SIGP asts recieved */
-	unsigned int	numSIGPcpureq;			/* Number of SIGP cpu requests recieved */
-	unsigned int	numSIGPdebug;			/* Number of SIGP debugs recieved */
-	unsigned int	numSIGPwake;			/* Number of SIGP wakes recieved */
-	unsigned int	numSIGPtimo;			/* Number of SIGP send timeouts */
-	unsigned int	numSIGPmast;			/* Number of SIGPast messages merged */
-	unsigned int	numSIGPmwake;			/* Number of SIGPwake messages merged */
-	
-	unsigned int	hwWalkPhys;				/* Number of entries to hw_walk_phys */
-	unsigned int	hwWalkFull;				/* Full purge of connected PTE's */
-	unsigned int	hwWalkMerge;			/* RC merge of connected PTE's */
-	unsigned int	hwWalkQuick;			/* Quick scan of connected PTE's */
-	unsigned int	numSIGPcall;			/* Number of SIGPcall messages received */
-	
-	unsigned int	hwspare3[16];			/* Pad to 512 */
-	
-};
-#pragma pack()
-
-typedef struct hwCtrs hwCtrs;
-
-struct patch_entry {
-	unsigned int	*addr;
-	unsigned int	data;
-	unsigned int	type;
-	unsigned int	value;
-};
-
-typedef struct patch_entry patch_entry_t;
-
-#define	PATCH_INVALID		0
-#define	PATCH_PROCESSOR		1
-#define	PATCH_FEATURE		2
-#define PATCH_END_OF_TABLE  3
-
-#define PatchExt32		0x80000000
-#define PatchExt32b		0
-#define PatchLwsync		0x40000000
-#define PatchLwsyncb	1
-
-/* When an exception is taken, this info is accessed via sprg0 */
-/* We should always have this one on a cache line boundary */
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct per_proc_info {
-	unsigned short	cpu_number;
-	unsigned short	cpu_flags;			/* Various low-level flags */
-	vm_offset_t  	istackptr;
-	vm_offset_t  	intstack_top_ss;
-
-	vm_offset_t  	debstackptr;
-	vm_offset_t  	debstack_top_ss;
-
-	unsigned int 	spcFlags;			/* Special thread flags */
-	unsigned int	old_thread;
-	ast_t			pending_ast;		/* mask of pending ast(s) */
-
-	/* PPC cache line boundary here - 020 */
-
-	int				cpu_type;
-	int				cpu_subtype;
-	int				cpu_threadtype;
-/*
- *	Note: the following two pairs of words need to stay in order and each pair must
- *	be in the same reservation (line) granule 
- */
-	struct facility_context	*FPU_owner;	/* Owner of the FPU on this cpu */
-	unsigned int 	liveVRSave;			/* VRSave assiciated with live vector registers */
-	struct facility_context	*VMX_owner;	/* Owner of the VMX on this cpu */
-	unsigned int	spcTRc;				/* Special trace count */
-	unsigned int	spcTRp;				/* Special trace buffer pointer */
-
-	/* PPC cache line boundary here - 040 */
-	addr64_t		quickfret;			/* List of saveareas to release */
-	addr64_t		lclfree;			/* Pointer to local savearea list */
-	unsigned int	lclfreecnt;			/* Entries in local savearea list */
-	unsigned int 	holdQFret;			/* Hold off releasing quickfret list */
-	uint64_t		rtcPop;				/* Real Time Clock pop */
-
-	/* PPC cache line boundary here - 060 */
-	boolean_t		interrupts_enabled;
-	IOInterruptHandler	interrupt_handler;
-	void *			interrupt_nub;
-	unsigned int	interrupt_source;
-	void *			interrupt_target;
-	void *			interrupt_refCon;
-	uint64_t		next_savearea;			/* pointer to the next savearea */
-
-	/* PPC cache line boundary here - 080 */
-	unsigned int	MPsigpStat;			/* Signal Processor status (interlocked update for this one) */
-#define MPsigpMsgp		0xC0000000		/* Message pending (busy + pass ) */
-#define MPsigpBusy		0x80000000		/* Processor area busy, i.e., locked */
-#define MPsigpPass		0x40000000		/* Busy lock passed to receiving processor */
-#define MPsigpAck		0x20000000		/* Ack Busy lock passed to receiving processor */
-#define MPsigpSrc		0x000000FF		/* Processor that owns busy, i.e., the ID of */
-										/*   whomever set busy. When a busy is passed, */
-										/*   this is the requestor of the function. */
-#define MPsigpFunc		0x0000FF00		/* Current function */
-#define MPsigpIdle		0x00			/* No function pending */
-#define MPsigpSigp		0x04			/* Signal a processor */
-	unsigned int	MPsigpParm0;		/* SIGP parm 0 */
-	unsigned int	MPsigpParm1;		/* SIGP parm 1 */
-	unsigned int	MPsigpParm2;		/* SIGP parm 2 */
-	cpu_id_t		cpu_id;
-	vm_offset_t		start_paddr;
-	unsigned int	ruptStamp[2];		/* Timebase at last interruption */
-
-	/* PPC cache line boundary here - 0A0 */
-	procFeatures 	pf;					/* Processor features */
-	
-	/* PPC cache line boundary here - 140 */
-	void *			pp_cbfr;
-	void *			pp_chud;
-	rtclock_timer_t	rtclock_timer;
-	unsigned int	ppbbTaskEnv;		/* BlueBox Task Environment */
-    
-	/* PPC cache line boundary here - 160 */
-	struct savearea *	db_saved_state;
-	time_base_enable_t	time_base_enable;
-	uint32_t		ppXFlags;
-	int				running;
-	int				debugger_is_slave;
-	int				debugger_active;
-	int				debugger_pending;
-	uint32_t		debugger_holdoff;
-	
-	/* PPC cache line boundary here - 180 */
-    uint64_t        Uassist;            /* User Assist DoubleWord */
-	uint64_t		validSegs;			/* Valid SR/STB slots */
-	addr64_t		ppUserPmap;			/* Current user state pmap (physical address) */
-	unsigned int	ppUserPmapVirt;		/* Current user state pmap (virtual address) */
-	unsigned int	ppMapFlags;			/* Mapping flags */
-	
-	/* PPC cache line boundary here - 1A0 */
-	unsigned short	ppInvSeg;			/* Forces complete invalidate of SRs/SLB (this must stay with ppInvSeg) */
-	unsigned short	ppCurSeg;			/* Set to 1 if user segments, 0 if kernel (this must stay with ppInvSeg) */
-	unsigned int	ppSegSteal;			/* Count of segment slot steals */
-	ppnum_t			VMMareaPhys;		/* vmm state page physical addr */
-	unsigned int	VMMXAFlgs;			/* vmm extended flags */
-	unsigned int	FAMintercept;		/* vmm FAM Exceptions to intercept */
-	unsigned int	hibernate;			/* wake from hibernate */
-	uint32_t		save_tbl;
-	uint32_t		save_tbu;
-	
-	/* PPC cache line boundary here - 1C0 */
-	unsigned int	ppUMWmp[16];		/* Linkage mapping for user memory window - 64 bytes */
-	
-	/* PPC cache line boundary here - 200 */
-	uint64_t		tempr0;				/* temporary savearea */
-	uint64_t		tempr1;			
-	uint64_t		tempr2;
-	uint64_t		tempr3;
-
-	uint64_t		tempr4;				
-	uint64_t		tempr5;
-	uint64_t		tempr6;
-	uint64_t		tempr7;
-
-	uint64_t		tempr8;
-	uint64_t		tempr9;
-	uint64_t		tempr10;
-	uint64_t		tempr11;
-	
-	uint64_t		tempr12;
-	uint64_t		tempr13;
-	uint64_t		tempr14;
-	uint64_t		tempr15;
-	
-	uint64_t		tempr16;
-	uint64_t		tempr17;
-	uint64_t		tempr18;
-	uint64_t		tempr19;
-
-	uint64_t		tempr20;
-	uint64_t		tempr21;
-	uint64_t		tempr22;
-	uint64_t		tempr23;
-	
-	uint64_t		tempr24;
-	uint64_t		tempr25;
-	uint64_t		tempr26;
-	uint64_t		tempr27;
-	
-	uint64_t		tempr28;
-	uint64_t		tempr29;
-	uint64_t		tempr30;
-	uint64_t		tempr31;
-
-
-	/* PPC cache line boundary here - 300 */
-	double			emfp0;				/* Copies of floating point registers */
-	double			emfp1;				/* Used for emulation purposes */
-	double			emfp2;
-	double			emfp3;
-
-	double			emfp4;				
-	double			emfp5;
-	double			emfp6;
-	double			emfp7;
-
-	double			emfp8;
-	double			emfp9;
-	double			emfp10;
-	double			emfp11;
-	
-	double			emfp12;
-	double			emfp13;
-	double			emfp14;
-	double			emfp15;
-	
-	double			emfp16;
-	double			emfp17;
-	double			emfp18;
-	double			emfp19;
-
-	double			emfp20;
-	double			emfp21;
-	double			emfp22;
-	double			emfp23;
-	
-	double			emfp24;
-	double			emfp25;
-	double			emfp26;
-	double			emfp27;
-	
-	double			emfp28;
-	double			emfp29;
-	double			emfp30;
-	double			emfp31;
-
-/*								   - 400 */
-	unsigned int 	emfpscr_pad;
-	unsigned int 	emfpscr;
-	unsigned int	empadfp[6];
-	
-/*								   - 420 */
-	unsigned int	emvr0[4];			/* Copies of vector registers used both */
-	unsigned int	emvr1[4];			/* for full vector emulation or */
-	unsigned int	emvr2[4];			/* as saveareas while assisting denorms */
-	unsigned int	emvr3[4];
-	unsigned int	emvr4[4];
-	unsigned int	emvr5[4];
-	unsigned int	emvr6[4];
-	unsigned int	emvr7[4];
-	unsigned int	emvr8[4];
-	unsigned int	emvr9[4];
-	unsigned int	emvr10[4];
-	unsigned int	emvr11[4];
-	unsigned int	emvr12[4];
-	unsigned int	emvr13[4];
-	unsigned int	emvr14[4];
-	unsigned int	emvr15[4];
-	unsigned int	emvr16[4];
-	unsigned int	emvr17[4];
-	unsigned int	emvr18[4];
-	unsigned int	emvr19[4];
-	unsigned int	emvr20[4];
-	unsigned int	emvr21[4];
-	unsigned int	emvr22[4];
-	unsigned int	emvr23[4];
-	unsigned int	emvr24[4];
-	unsigned int	emvr25[4];
-	unsigned int	emvr26[4];
-	unsigned int	emvr27[4];
-	unsigned int	emvr28[4];
-	unsigned int	emvr29[4];
-	unsigned int	emvr30[4];
-	unsigned int	emvr31[4];
-	unsigned int	emvscr[4];			
-	unsigned int	empadvr[4];			
-/*								   - 640 */
-/* note implicit dependence on kSkipListMaxLists, which must be <= 28 */
-    addr64_t		skipListPrev[28];	/* prev ptrs saved as side effect of calling mapSearchFull() */
-    
-/*								   - 720 */
-
-	unsigned int	patcharea[56];
-/*								   - 800 */
-
-	hwCtrs			hwCtr;					/* Hardware exception counters */
-/*								   - A00 */
-	addr64_t		pp2ndPage;				/* Physical address of the second page of the per_proc */
-	addr64_t		ijsave;					/* Pointer to original savearea for injected code */
-	uint32_t		pprsvd0A10[4];
-/*								   - A20 */
-	pmsd			pms;					/* Power Management Stepper control */
-	unsigned int	pprsvd0A40[16];			/* Reserved */
-/*								   - A80 */
-	uint32_t		pprsvd0A80[16];			/* Reserved */
-	
-	unsigned int	pprsvd0AC0[336];		/* Reserved out to next page boundary */
-/*								   - 1000 */
-
-/*
- *	This is the start of the second page of the per_proc block.  Because we do not
- *	allocate physically contiguous memory, it may be physically discontiguous from the
- *	first page.  Currently there isn't anything here that is accessed translation off,
- *	but if we need it, pp2ndPage contains the physical address.
- *
- *	Note that the boot processor's per_proc is statically allocated, so it will be a
- *	V=R contiguous area.  That allows access during early boot before we turn translation on
- *	for the first time.
- */
-
-	unsigned int	processor[384];			/* processor structure */
-	
-	unsigned int	pprsvd1[640];			/* Reserved out to next page boundary */
-/*								   - 2000 */
-
-};
-
-#pragma pack()
-
-
-/*
- * Macro to convert a processor_t processor to its attached per_proc_info_t per_proc
- */
-#define PROCESSOR_TO_PER_PROC(x)										\
-			((struct per_proc_info*)((unsigned int)(x)					\
-			- (unsigned int)(((struct per_proc_info *)0)->processor)))
-
-extern struct per_proc_info BootProcInfo;
-
-#define	MAX_CPUS	256
-
-struct per_proc_entry {
-	addr64_t				ppe_paddr;		/* Physical address of the first page of per_proc, 2nd is in pp2ndPage. */
-	unsigned int			ppe_pad4[1];
-	struct per_proc_info	*ppe_vaddr;		/* Virtual address of the per_proc */
-};
-
-extern	struct per_proc_entry PerProcTable[MAX_CPUS-1];
-
-
-extern const char *trap_type[];
-
-#endif /* ndef ASSEMBLER */					/* with this savearea should be redriven */
-
-/* cpu_flags defs */
-#define SIGPactive	0x8000
-#define needSRload	0x4000
-#define turnEEon	0x2000
-#define SleepState	0x0800
-#define SleepStateb	4
-#define mcountOff	0x0400
-#define SignalReady	0x0200
-#define BootDone	0x0100
-#define loadMSR		0x7FF4
-
-/* ppXFlags defs */
-#define SignalReadyWait	0x00000001
-
-#define T_VECTOR_SIZE	4					/* function pointer size */
-
-/* Hardware exceptions */
-
-#define T_IN_VAIN				(0x00 * T_VECTOR_SIZE)
-#define T_RESET					(0x01 * T_VECTOR_SIZE)
-#define T_MACHINE_CHECK			(0x02 * T_VECTOR_SIZE)
-#define T_DATA_ACCESS			(0x03 * T_VECTOR_SIZE)
-#define T_INSTRUCTION_ACCESS	(0x04 * T_VECTOR_SIZE)
-#define T_INTERRUPT				(0x05 * T_VECTOR_SIZE)
-#define T_ALIGNMENT				(0x06 * T_VECTOR_SIZE)
-#define T_PROGRAM				(0x07 * T_VECTOR_SIZE)
-#define T_FP_UNAVAILABLE		(0x08 * T_VECTOR_SIZE)
-#define T_DECREMENTER			(0x09 * T_VECTOR_SIZE)
-#define T_IO_ERROR				(0x0a * T_VECTOR_SIZE)
-#define T_RESERVED				(0x0b * T_VECTOR_SIZE)
-#define T_SYSTEM_CALL			(0x0c * T_VECTOR_SIZE)
-#define T_TRACE					(0x0d * T_VECTOR_SIZE)
-#define T_FP_ASSIST				(0x0e * T_VECTOR_SIZE)
-#define T_PERF_MON				(0x0f * T_VECTOR_SIZE)
-#define T_VMX					(0x10 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP0			(0x11 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP1			(0x12 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP2			(0x13 * T_VECTOR_SIZE)
-#define T_INSTRUCTION_BKPT		(0x14 * T_VECTOR_SIZE)
-#define T_SYSTEM_MANAGEMENT		(0x15 * T_VECTOR_SIZE)
-#define T_ALTIVEC_ASSIST		(0x16 * T_VECTOR_SIZE)
-#define T_THERMAL				(0x17 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP5			(0x18 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP6			(0x19 * T_VECTOR_SIZE)
-#define T_INVALID_EXCP7			(0x1A * T_VECTOR_SIZE)
-#define T_INVALID_EXCP8			(0x1B * T_VECTOR_SIZE)
-#define T_INVALID_EXCP9			(0x1C * T_VECTOR_SIZE)
-#define T_INVALID_EXCP10		(0x1D * T_VECTOR_SIZE)
-#define T_INVALID_EXCP11		(0x1E * T_VECTOR_SIZE)
-#define T_INVALID_EXCP12		(0x1F * T_VECTOR_SIZE)
-#define T_EMULATE				(0x20 * T_VECTOR_SIZE)
-
-#define T_RUNMODE_TRACE			(0x21 * T_VECTOR_SIZE) /* 601 only */
-
-#define T_SIGP					(0x22 * T_VECTOR_SIZE)
-#define T_PREEMPT				(0x23 * T_VECTOR_SIZE)
-#define T_CSWITCH				(0x24 * T_VECTOR_SIZE)
-#define T_SHUTDOWN				(0x25 * T_VECTOR_SIZE)
-#define T_CHOKE					(0x26 * T_VECTOR_SIZE)
-
-#define T_DATA_SEGMENT			(0x27 * T_VECTOR_SIZE)
-#define T_INSTRUCTION_SEGMENT	(0x28 * T_VECTOR_SIZE)
-
-#define T_SOFT_PATCH			(0x29 * T_VECTOR_SIZE)
-#define T_MAINTENANCE			(0x2A * T_VECTOR_SIZE)
-#define T_INSTRUMENTATION		(0x2B * T_VECTOR_SIZE)
-#define T_ARCHDEP0				(0x2C * T_VECTOR_SIZE)
-#define T_HDEC					(0x2D * T_VECTOR_SIZE)
-#define T_INJECT_EXIT			(0x2E * T_VECTOR_SIZE)
-#define T_DTRACE_RET			T_INJECT_EXIT
-
-#define T_AST					(0x100 * T_VECTOR_SIZE) 
-#define T_MAX					T_CHOKE		 /* Maximum exception no */
-
-#define	T_FAM					0x00004000
-
-#define	EXCEPTION_VECTOR(exception)	(exception * 0x100 / T_VECTOR_SIZE )
-
-/*
- *		System choke (failure) codes 
- */
- 
-#define failDebug 0
-#define failStack 1
-#define failMapping 2
-#define failContext 3
-#define failNoSavearea 4
-#define failSaveareaCorr 5
-#define failBadLiveContext 6
-#define	failSkipLists 7
-#define	failUnalignedStk 8
-#define	failPmap 9
-#define	failTimeout 10
-
-/* Always must be last - update failNames table in model_dep.c as well */
-#define failUnknown 11
-
-#ifndef ASSEMBLER
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct resethandler {
-	unsigned int	type;
-	vm_offset_t	call_paddr;
-	vm_offset_t	arg__paddr;
-} resethandler_t;
-#pragma pack()
-
-extern resethandler_t ResetHandler;
-
-#endif
-
-#define	RESET_HANDLER_NULL	0x0
-#define	RESET_HANDLER_START	0x1
-#define	RESET_HANDLER_BUPOR	0x2
-#define	RESET_HANDLER_IGNORE	0x3
-
-#endif /* _PPC_EXCEPTION_H_ */
diff --git a/osfmk/ppc/fpu_protos.h b/osfmk/ppc/fpu_protos.h
deleted file mode 100644
index 7ceed096a..000000000
--- a/osfmk/ppc/fpu_protos.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#ifndef _PPC_FPU_PROTOS_H_
-#define _PPC_FPU_PROTOS_H_
-
-#include <ppc/thread.h>
-
-extern void fpu_save(struct facility_context *);
-extern void fpu_disable(void);
-
-#endif /* _PPC_FPU_PROTOS_H_ */
diff --git a/osfmk/ppc/genassym.c b/osfmk/ppc/genassym.c
deleted file mode 100644
index 8207aeb55..000000000
--- a/osfmk/ppc/genassym.c
+++ /dev/null
@@ -1,1438 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-/*
- * genassym.c is used to produce an
- * assembly file which, intermingled with unuseful assembly code,
- * has all the necessary definitions emitted. This assembly file is
- * then postprocessed with sed to extract only these definitions
- * and thus the final assyms.s is created.
- *
- * This convoluted means is necessary since the structure alignment
- * and packing may be different between the host machine and the
- * target so we are forced into using the cross compiler to generate
- * the values, but we cannot run anything on the target machine.
- */
-
-#include <types.h>
-
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/host.h>
-#include <kern/lock.h>
-#include <kern/locks.h>
-#include <kern/processor.h>
-#include <ppc/exception.h>
-#include <ppc/thread.h>
-#include <ppc/misc_protos.h>
-#include <kern/syscall_sw.h>
-#include <ppc/low_trace.h>
-#include <ppc/PseudoKernel.h>
-#include <ppc/mappings.h>
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#include <vm/vm_map.h>
-#include <vm/pmap.h>
-#include <ppc/pmap.h>
-#include <ppc/Diagnostics.h>
-#include <pexpert/pexpert.h>
-#include <mach/machine.h>
-#include <ppc/vmachmon.h>
-#include <ppc/hw_perfmon.h>
-#include <ppc/PPCcalls.h>
-#include <ppc/mem.h>
-#include <ppc/boot.h>
-#include <ppc/lowglobals.h>
-
-#if	CONFIG_DTRACE
-#define NEED_DTRACE_DEFS
-#include <../bsd/sys/lockstat.h>
-#endif
-
-/* Undefine standard offsetof because it is different than the one here */
-#undef offsetof
-#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE)0)->MEMBER)
-
-#define DECLARE(SYM,VAL) \
-	__asm("#DEFINITION##define\t" SYM "\t%0" : : "n" ((u_int)(VAL)))
-
-int main(int argc, char *argv[])
-{
-	/* Process Control Block */
-	DECLARE("ACT_MACT_KSP",	offsetof(thread_t, machine.ksp));
-	DECLARE("ACT_MACT_BEDA", offsetof(thread_t, machine.bbDescAddr));
-	DECLARE("ACT_MACT_BTS",	offsetof(thread_t, machine.bbTableStart));
-	DECLARE("ACT_MACT_BTE",	offsetof(thread_t, machine.bbTaskEnv));
-	DECLARE("ACT_MACT_SPF",	offsetof(thread_t, machine.specFlags));
-	DECLARE("ACT_PREEMPT_CNT",	offsetof(thread_t, machine.preemption_count));
-	DECLARE("ACT_PER_PROC",	offsetof(thread_t, machine.PerProc));
-	DECLARE("qactTimer",	offsetof(thread_t, machine.qactTimer));
-	DECLARE("umwSpace",	offsetof(thread_t, machine.umwSpace));
-	DECLARE("umwRelo",	offsetof(thread_t, machine.umwRelo));
-	DECLARE("umwSwitchAway",	umwSwitchAway);
-	DECLARE("umwSwitchAwayb",	umwSwitchAwayb);
-	DECLARE("bbTrap",		offsetof(thread_t, machine.bbTrap));
-	DECLARE("bbSysCall",	offsetof(thread_t, machine.bbSysCall));
-	DECLARE("bbInterrupt",	offsetof(thread_t, machine.bbInterrupt));
-	DECLARE("bbPending",	offsetof(thread_t, machine.bbPending));
-	
-	DECLARE("floatUsed",	floatUsed);
-	DECLARE("vectorUsed",	vectorUsed);
-	DECLARE("runningVM",	runningVM);
-	DECLARE("runningVMbit",	runningVMbit);
-	DECLARE("floatCng",		floatCng);
-	DECLARE("floatCngbit",	floatCngbit);
-	DECLARE("vectorCng",	vectorCng);
-	DECLARE("vectorCngbit",	vectorCngbit);
-	DECLARE("userProtKey",	userProtKey);
-	DECLARE("userProtKeybit",	userProtKeybit);
-
-	DECLARE("bbThread",		bbThread);
-	DECLARE("bbThreadbit",	bbThreadbit);
-	DECLARE("bbNoMachSC",	bbNoMachSC);
-	DECLARE("bbNoMachSCbit",bbNoMachSCbit);
-	DECLARE("bbPreemptive",	bbPreemptive);
-	DECLARE("bbPreemptivebit",	bbPreemptivebit);
-
-	DECLARE("fvChkb",		fvChkb);
-	DECLARE("fvChk",		fvChk);
-	DECLARE("FamVMena",		FamVMena);
-	DECLARE("FamVMenabit",		FamVMenabit);
-	DECLARE("FamVMmode",		FamVMmode);
-	DECLARE("FamVMmodebit",		FamVMmodebit);
-	DECLARE("perfMonitor",		perfMonitor);
-	DECLARE("perfMonitorbit",	perfMonitorbit);
-	DECLARE("OnProc",		OnProc);
-	DECLARE("OnProcbit",		OnProcbit);
-
-	/* Per Proc info structure */
-	DECLARE("PP_CPU_NUMBER",		offsetof(struct per_proc_info *, cpu_number));
-	DECLARE("PP_CPU_FLAGS",			offsetof(struct per_proc_info *, cpu_flags));
-	DECLARE("PP_ISTACKPTR",			offsetof(struct per_proc_info *, istackptr));
-	DECLARE("PP_INTSTACK_TOP_SS",	offsetof(struct per_proc_info *, intstack_top_ss));
-	DECLARE("PP_DEBSTACKPTR",		offsetof(struct per_proc_info *, debstackptr));
-	DECLARE("PP_DEBSTACK_TOP_SS",	offsetof(struct per_proc_info *, debstack_top_ss));
-	DECLARE("PP_HIBERNATE",	offsetof(struct per_proc_info *, hibernate));
-	DECLARE("FPUowner",				offsetof(struct per_proc_info *, FPU_owner));
-	DECLARE("VMXowner",				offsetof(struct per_proc_info *, VMX_owner));
-	DECLARE("holdQFret",			offsetof(struct per_proc_info *, holdQFret));
-	DECLARE("rtcPop",				offsetof(struct per_proc_info *, rtcPop));
-
-	DECLARE("PP_PENDING_AST",		offsetof(struct per_proc_info *, pending_ast));
-	DECLARE("quickfret", 			offsetof(struct per_proc_info *, quickfret));
-	DECLARE("lclfree", 				offsetof(struct per_proc_info *, lclfree));
-	DECLARE("lclfreecnt",			offsetof(struct per_proc_info *, lclfreecnt));
-	DECLARE("PP_INTS_ENABLED", 		offsetof(struct per_proc_info *, interrupts_enabled));
-	DECLARE("UAW", 					offsetof(struct per_proc_info *, Uassist));
-	DECLARE("next_savearea", 		offsetof(struct per_proc_info *, next_savearea));
-	DECLARE("ppbbTaskEnv", 			offsetof(struct per_proc_info *, ppbbTaskEnv));
-	DECLARE("liveVRS", 				offsetof(struct per_proc_info *, liveVRSave));
-	DECLARE("spcFlags", 			offsetof(struct per_proc_info *, spcFlags));
-	DECLARE("spcTRc", 				offsetof(struct per_proc_info *, spcTRc));
-	DECLARE("spcTRp", 				offsetof(struct per_proc_info *, spcTRp));
-	DECLARE("ruptStamp", 			offsetof(struct per_proc_info *, ruptStamp));
-	DECLARE("pfAvailable", 			offsetof(struct per_proc_info *, pf.Available));
-	DECLARE("pfFloat",				pfFloat);
-	DECLARE("pfFloatb",				pfFloatb);
-	DECLARE("pfAltivec",			pfAltivec);
-	DECLARE("pfAltivecb",			pfAltivecb);
-	DECLARE("pfAvJava",				pfAvJava);
-	DECLARE("pfAvJavab",			pfAvJavab);
-	DECLARE("pfSMPcap",				pfSMPcap);
-	DECLARE("pfSMPcapb",			pfSMPcapb);
-	DECLARE("pfCanSleep",			pfCanSleep);
-	DECLARE("pfCanSleepb",			pfCanSleepb);
-	DECLARE("pfCanNap",				pfCanNap);
-	DECLARE("pfCanNapb",			pfCanNapb);
-	DECLARE("pfCanDoze",			pfCanDoze);
-	DECLARE("pfCanDozeb",			pfCanDozeb);
-	DECLARE("pfSlowNap",				pfSlowNap);
-	DECLARE("pfSlowNapb",				pfSlowNapb);
-	DECLARE("pfNoMuMMCK",				pfNoMuMMCK);
-	DECLARE("pfNoMuMMCKb",				pfNoMuMMCKb);
-	DECLARE("pfNoL2PFNap",				pfNoL2PFNap);
-	DECLARE("pfNoL2PFNapb",				pfNoL2PFNapb);
-	DECLARE("pfSCOMFixUp",				pfSCOMFixUp);
-	DECLARE("pfSCOMFixUpb",				pfSCOMFixUpb);
-    DECLARE("pfHasDcba",			pfHasDcba);
-	DECLARE("pfHasDcbab",			pfHasDcbab);
-	DECLARE("pfL1fa",				pfL1fa);
-	DECLARE("pfL1fab",				pfL1fab);
-	DECLARE("pfL2",					pfL2);
-	DECLARE("pfL2b",				pfL2b);
-	DECLARE("pfL2fa",				pfL2fa);
-	DECLARE("pfL2fab",				pfL2fab);
-	DECLARE("pfL2i",				pfL2i);
-	DECLARE("pfL2ib",				pfL2ib);
-	DECLARE("pfLClck",				pfLClck);
-	DECLARE("pfLClckb",				pfLClckb);
-	DECLARE("pfWillNap",			pfWillNap);
-	DECLARE("pfWillNapb",			pfWillNapb);
-	DECLARE("pfNoMSRir",			pfNoMSRir);
-	DECLARE("pfNoMSRirb",			pfNoMSRirb);
-	DECLARE("pfL3pdet",				pfL3pdet);
-	DECLARE("pfL3pdetb",			pfL3pdetb);
-    DECLARE("pf128Byte",			pf128Byte);
-    DECLARE("pf128Byteb",			pf128Byteb);
-    DECLARE("pf32Byte",				pf32Byte);
-    DECLARE("pf32Byteb",			pf32Byteb);
-    DECLARE("pf64Bit",				pf64Bit);
-    DECLARE("pf64Bitb",				pf64Bitb);
-	DECLARE("pfL3",					pfL3);
-	DECLARE("pfL3b",				pfL3b);
-	DECLARE("pfL3fa",				pfL3fa);
-	DECLARE("pfL3fab",				pfL3fab);
-	DECLARE("pfValid",				pfValid);
-	DECLARE("pfValidb",				pfValidb);
-	DECLARE("pfrptdProc", 			offsetof(struct per_proc_info *, pf.rptdProc));
-	DECLARE("pflineSize", 			offsetof(struct per_proc_info *, pf.lineSize));
-	DECLARE("pfl1iSize", 			offsetof(struct per_proc_info *, pf.l1iSize));
-	DECLARE("pfl1dSize", 			offsetof(struct per_proc_info *, pf.l1dSize));
-	DECLARE("pfl2cr", 				offsetof(struct per_proc_info *, pf.l2cr));
-	DECLARE("pfl2Size", 			offsetof(struct per_proc_info *, pf.l2Size));
-	DECLARE("pfl3cr", 				offsetof(struct per_proc_info *, pf.l3cr));
-	DECLARE("pfl3Size", 			offsetof(struct per_proc_info *, pf.l3Size));
-	DECLARE("pfHID0", 				offsetof(struct per_proc_info *, pf.pfHID0));
-	DECLARE("pfHID1", 				offsetof(struct per_proc_info *, pf.pfHID1));
-	DECLARE("pfHID2", 				offsetof(struct per_proc_info *, pf.pfHID2));
-	DECLARE("pfHID3", 				offsetof(struct per_proc_info *, pf.pfHID3));
-	DECLARE("pfHID4", 				offsetof(struct per_proc_info *, pf.pfHID4));
-	DECLARE("pfHID5", 				offsetof(struct per_proc_info *, pf.pfHID5));
-	DECLARE("pfMSSCR0", 			offsetof(struct per_proc_info *, pf.pfMSSCR0));
-	DECLARE("pfMSSCR1", 			offsetof(struct per_proc_info *, pf.pfMSSCR1));
-	DECLARE("pfICTRL", 				offsetof(struct per_proc_info *, pf.pfICTRL));
-	DECLARE("pfLDSTCR", 			offsetof(struct per_proc_info *, pf.pfLDSTCR));
-	DECLARE("pfLDSTDB", 			offsetof(struct per_proc_info *, pf.pfLDSTDB));
-	DECLARE("pfl2crOriginal", 		offsetof(struct per_proc_info *, pf.l2crOriginal));
-	DECLARE("pfl3crOriginal", 		offsetof(struct per_proc_info *, pf.l3crOriginal));
-	DECLARE("pfBootConfig",			offsetof(struct per_proc_info *, pf.pfBootConfig));
-	DECLARE("pfPowerModes",			offsetof(struct per_proc_info *, pf.pfPowerModes));
-	DECLARE("pfPowerTune0",			offsetof(struct per_proc_info *, pf.pfPowerTune0));
-	DECLARE("pfPowerTune1",			offsetof(struct per_proc_info *, pf.pfPowerTune1));
-	DECLARE("pmType",				pmType);
-	DECLARE("pmDPLLVmin",			pmDPLLVmin);
-	DECLARE("pmDPLLVminb",			pmDPLLVminb);
-	DECLARE("pmPowerTune",			pmPowerTune);
-	DECLARE("pmDFS",				pmDFS);
-	DECLARE("pmDualPLL",			pmDualPLL);
-	DECLARE("pfPTEG", 				offsetof(struct per_proc_info *, pf.pfPTEG));
-	DECLARE("pfMaxVAddr", 			offsetof(struct per_proc_info *, pf.pfMaxVAddr));
-	DECLARE("pfMaxPAddr", 			offsetof(struct per_proc_info *, pf.pfMaxPAddr));
-	DECLARE("pfSize", 				sizeof(procFeatures));
-	
-	DECLARE("validSegs", 			offsetof(struct per_proc_info *, validSegs));
-	DECLARE("ppUserPmapVirt", 		offsetof(struct per_proc_info *, ppUserPmapVirt));
-	DECLARE("ppUserPmap", 			offsetof(struct per_proc_info *, ppUserPmap));
-	DECLARE("ppMapFlags", 			offsetof(struct per_proc_info *, ppMapFlags));
-	DECLARE("ppInvSeg", 			offsetof(struct per_proc_info *, ppInvSeg));
-	DECLARE("ppCurSeg", 			offsetof(struct per_proc_info *, ppCurSeg));
-	DECLARE("ppSegSteal", 			offsetof(struct per_proc_info *, ppSegSteal));
-
-	DECLARE("VMMareaPhys", 			offsetof(struct per_proc_info *, VMMareaPhys));
-	DECLARE("VMMXAFlgs", 			offsetof(struct per_proc_info *, VMMXAFlgs));
-	DECLARE("FAMintercept", 		offsetof(struct per_proc_info *, FAMintercept));
-
-	DECLARE("ppUMWmp", 				offsetof(struct per_proc_info *, ppUMWmp));
-
-	DECLARE("tempr0", 				offsetof(struct per_proc_info *, tempr0));
-	DECLARE("tempr1", 				offsetof(struct per_proc_info *, tempr1));
-	DECLARE("tempr2", 				offsetof(struct per_proc_info *, tempr2));
-	DECLARE("tempr3", 				offsetof(struct per_proc_info *, tempr3));
-	DECLARE("tempr4", 				offsetof(struct per_proc_info *, tempr4));
-	DECLARE("tempr5", 				offsetof(struct per_proc_info *, tempr5));
-	DECLARE("tempr6", 				offsetof(struct per_proc_info *, tempr6));
-	DECLARE("tempr7", 				offsetof(struct per_proc_info *, tempr7));
-	DECLARE("tempr8", 				offsetof(struct per_proc_info *, tempr8));
-	DECLARE("tempr9", 				offsetof(struct per_proc_info *, tempr9));
-	DECLARE("tempr10", 				offsetof(struct per_proc_info *, tempr10));
-	DECLARE("tempr11", 				offsetof(struct per_proc_info *, tempr11));
-	DECLARE("tempr12", 				offsetof(struct per_proc_info *, tempr12));
-	DECLARE("tempr13", 				offsetof(struct per_proc_info *, tempr13));
-	DECLARE("tempr14", 				offsetof(struct per_proc_info *, tempr14));
-	DECLARE("tempr15", 				offsetof(struct per_proc_info *, tempr15));
-	DECLARE("tempr16", 				offsetof(struct per_proc_info *, tempr16));
-	DECLARE("tempr17", 				offsetof(struct per_proc_info *, tempr17));
-	DECLARE("tempr18", 				offsetof(struct per_proc_info *, tempr18));
-	DECLARE("tempr19", 				offsetof(struct per_proc_info *, tempr19));
-	DECLARE("tempr20", 				offsetof(struct per_proc_info *, tempr20));
-	DECLARE("tempr21", 				offsetof(struct per_proc_info *, tempr21));
-	DECLARE("tempr22", 				offsetof(struct per_proc_info *, tempr22));
-	DECLARE("tempr23", 				offsetof(struct per_proc_info *, tempr23));
-	DECLARE("tempr24", 				offsetof(struct per_proc_info *, tempr24));
-	DECLARE("tempr25", 				offsetof(struct per_proc_info *, tempr25));
-	DECLARE("tempr26", 				offsetof(struct per_proc_info *, tempr26));
-	DECLARE("tempr27", 				offsetof(struct per_proc_info *, tempr27));
-	DECLARE("tempr28", 				offsetof(struct per_proc_info *, tempr28));
-	DECLARE("tempr29", 				offsetof(struct per_proc_info *, tempr29));
-	DECLARE("tempr30", 				offsetof(struct per_proc_info *, tempr30));
-	DECLARE("tempr31", 				offsetof(struct per_proc_info *, tempr31));
-
-	DECLARE("emfp0", 				offsetof(struct per_proc_info *, emfp0));
-	DECLARE("emfp1", 				offsetof(struct per_proc_info *, emfp1));
-	DECLARE("emfp2", 				offsetof(struct per_proc_info *, emfp2));
-	DECLARE("emfp3", 				offsetof(struct per_proc_info *, emfp3));
-	DECLARE("emfp4", 				offsetof(struct per_proc_info *, emfp4));
-	DECLARE("emfp5", 				offsetof(struct per_proc_info *, emfp5));
-	DECLARE("emfp6", 				offsetof(struct per_proc_info *, emfp6));
-	DECLARE("emfp7", 				offsetof(struct per_proc_info *, emfp7));
-	DECLARE("emfp8", 				offsetof(struct per_proc_info *, emfp8));
-	DECLARE("emfp9", 				offsetof(struct per_proc_info *, emfp9));
-	DECLARE("emfp10", 				offsetof(struct per_proc_info *, emfp10));
-	DECLARE("emfp11", 				offsetof(struct per_proc_info *, emfp11));
-	DECLARE("emfp12", 				offsetof(struct per_proc_info *, emfp12));
-	DECLARE("emfp13", 				offsetof(struct per_proc_info *, emfp13));
-	DECLARE("emfp14", 				offsetof(struct per_proc_info *, emfp14));
-	DECLARE("emfp15", 				offsetof(struct per_proc_info *, emfp15));
-	DECLARE("emfp16", 				offsetof(struct per_proc_info *, emfp16));
-	DECLARE("emfp17", 				offsetof(struct per_proc_info *, emfp17));
-	DECLARE("emfp18", 				offsetof(struct per_proc_info *, emfp18));
-	DECLARE("emfp19", 				offsetof(struct per_proc_info *, emfp19));
-	DECLARE("emfp20", 				offsetof(struct per_proc_info *, emfp20));
-	DECLARE("emfp21", 				offsetof(struct per_proc_info *, emfp21));
-	DECLARE("emfp22", 				offsetof(struct per_proc_info *, emfp22));
-	DECLARE("emfp23", 				offsetof(struct per_proc_info *, emfp23));
-	DECLARE("emfp24", 				offsetof(struct per_proc_info *, emfp24));
-	DECLARE("emfp25", 				offsetof(struct per_proc_info *, emfp25));
-	DECLARE("emfp26", 				offsetof(struct per_proc_info *, emfp26));
-	DECLARE("emfp27", 				offsetof(struct per_proc_info *, emfp27));
-	DECLARE("emfp28", 				offsetof(struct per_proc_info *, emfp28));
-	DECLARE("emfp29", 				offsetof(struct per_proc_info *, emfp29));
-	DECLARE("emfp30", 				offsetof(struct per_proc_info *, emfp30));
-	DECLARE("emfp31", 				offsetof(struct per_proc_info *, emfp31));
-	DECLARE("emfpscr_pad", 			offsetof(struct per_proc_info *, emfpscr_pad));
-	DECLARE("emfpscr", 				offsetof(struct per_proc_info *, emfpscr));
-
-	DECLARE("emvr0", 				offsetof(struct per_proc_info *, emvr0));
-	DECLARE("emvr1", 				offsetof(struct per_proc_info *, emvr1));
-	DECLARE("emvr2", 				offsetof(struct per_proc_info *, emvr2));
-	DECLARE("emvr3", 				offsetof(struct per_proc_info *, emvr3));
-	DECLARE("emvr4", 				offsetof(struct per_proc_info *, emvr4));
-	DECLARE("emvr5", 				offsetof(struct per_proc_info *, emvr5));
-	DECLARE("emvr6", 				offsetof(struct per_proc_info *, emvr6));
-	DECLARE("emvr7", 				offsetof(struct per_proc_info *, emvr7));
-	DECLARE("emvr8", 				offsetof(struct per_proc_info *, emvr8));
-	DECLARE("emvr9", 				offsetof(struct per_proc_info *, emvr9));
-	DECLARE("emvr10", 				offsetof(struct per_proc_info *, emvr10));
-	DECLARE("emvr11", 				offsetof(struct per_proc_info *, emvr11));
-	DECLARE("emvr12", 				offsetof(struct per_proc_info *, emvr12));
-	DECLARE("emvr13", 				offsetof(struct per_proc_info *, emvr13));
-	DECLARE("emvr14", 				offsetof(struct per_proc_info *, emvr14));
-	DECLARE("emvr15", 				offsetof(struct per_proc_info *, emvr15));
-	DECLARE("emvr16", 				offsetof(struct per_proc_info *, emvr16));
-	DECLARE("emvr17", 				offsetof(struct per_proc_info *, emvr17));
-	DECLARE("emvr18", 				offsetof(struct per_proc_info *, emvr18));
-	DECLARE("emvr19", 				offsetof(struct per_proc_info *, emvr19));
-	DECLARE("emvr20", 				offsetof(struct per_proc_info *, emvr20));
-	DECLARE("emvr21", 				offsetof(struct per_proc_info *, emvr21));
-	DECLARE("emvr22", 				offsetof(struct per_proc_info *, emvr22));
-	DECLARE("emvr23", 				offsetof(struct per_proc_info *, emvr23));
-	DECLARE("emvr24", 				offsetof(struct per_proc_info *, emvr24));
-	DECLARE("emvr25", 				offsetof(struct per_proc_info *, emvr25));
-	DECLARE("emvr26", 				offsetof(struct per_proc_info *, emvr26));
-	DECLARE("emvr27", 				offsetof(struct per_proc_info *, emvr27));
-	DECLARE("emvr28", 				offsetof(struct per_proc_info *, emvr28));
-	DECLARE("emvr29", 				offsetof(struct per_proc_info *, emvr29));
-	DECLARE("emvr30", 				offsetof(struct per_proc_info *, emvr30));
-	DECLARE("emvr31", 				offsetof(struct per_proc_info *, emvr31));
-	DECLARE("empadvr", 				offsetof(struct per_proc_info *, empadvr));
-	DECLARE("skipListPrev", 		offsetof(struct per_proc_info *, skipListPrev));
-	DECLARE("ppSize",				sizeof(struct per_proc_info));
-	DECLARE("ppe_paddr", 				offsetof(struct per_proc_entry *, ppe_paddr));
-	DECLARE("ppe_vaddr", 				offsetof(struct per_proc_entry *, ppe_vaddr));
-	DECLARE("ppeSize",				sizeof(struct per_proc_entry));
-	DECLARE("MAX_CPUS",				MAX_CPUS);
-	DECLARE("patcharea", 			offsetof(struct per_proc_info *, patcharea));
-
-	DECLARE("hwCounts",				offsetof(struct per_proc_info *, hwCtr));
-	DECLARE("hwInVains",			offsetof(struct per_proc_info *, hwCtr.hwInVains));
-	DECLARE("hwResets",				offsetof(struct per_proc_info *, hwCtr.hwResets));
-	DECLARE("hwMachineChecks",		offsetof(struct per_proc_info *, hwCtr.hwMachineChecks));
-	DECLARE("hwDSIs",				offsetof(struct per_proc_info *, hwCtr.hwDSIs));
-	DECLARE("hwISIs",				offsetof(struct per_proc_info *, hwCtr.hwISIs));
-	DECLARE("hwExternals",			offsetof(struct per_proc_info *, hwCtr.hwExternals));
-	DECLARE("hwAlignments",			offsetof(struct per_proc_info *, hwCtr.hwAlignments));
-	DECLARE("hwPrograms",			offsetof(struct per_proc_info *, hwCtr.hwPrograms));
-	DECLARE("hwFloatPointUnavailable",	offsetof(struct per_proc_info *, hwCtr.hwFloatPointUnavailable));
-	DECLARE("hwDecrementers",		offsetof(struct per_proc_info *, hwCtr.hwDecrementers));
-	DECLARE("hwIOErrors",			offsetof(struct per_proc_info *, hwCtr.hwIOErrors));
-	DECLARE("hwrsvd0",				offsetof(struct per_proc_info *, hwCtr.hwrsvd0));
-	DECLARE("hwSystemCalls",		offsetof(struct per_proc_info *, hwCtr.hwSystemCalls));
-	DECLARE("hwTraces",				offsetof(struct per_proc_info *, hwCtr.hwTraces));
-	DECLARE("hwFloatingPointAssists",	offsetof(struct per_proc_info *, hwCtr.hwFloatingPointAssists));
-	DECLARE("hwPerformanceMonitors",	offsetof(struct per_proc_info *, hwCtr.hwPerformanceMonitors));
-	DECLARE("hwAltivecs",			offsetof(struct per_proc_info *, hwCtr.hwAltivecs));
-	DECLARE("hwrsvd1",				offsetof(struct per_proc_info *, hwCtr.hwrsvd1));
-	DECLARE("hwrsvd2",				offsetof(struct per_proc_info *, hwCtr.hwrsvd2));
-	DECLARE("hwrsvd3",				offsetof(struct per_proc_info *, hwCtr.hwrsvd3));
-	DECLARE("hwInstBreakpoints",	offsetof(struct per_proc_info *, hwCtr.hwInstBreakpoints));
-	DECLARE("hwSystemManagements",	offsetof(struct per_proc_info *, hwCtr.hwSystemManagements));
-	DECLARE("hwAltivecAssists",		offsetof(struct per_proc_info *, hwCtr.hwAltivecAssists));
-	DECLARE("hwThermal",			offsetof(struct per_proc_info *, hwCtr.hwThermal));
-	DECLARE("hwrsvd5",				offsetof(struct per_proc_info *, hwCtr.hwrsvd5));
-	DECLARE("hwrsvd6",				offsetof(struct per_proc_info *, hwCtr.hwrsvd6));
-	DECLARE("hwrsvd7",				offsetof(struct per_proc_info *, hwCtr.hwrsvd7));
-	DECLARE("hwrsvd8",				offsetof(struct per_proc_info *, hwCtr.hwrsvd8));
-	DECLARE("hwrsvd9",				offsetof(struct per_proc_info *, hwCtr.hwrsvd9));
-	DECLARE("hwrsvd10",				offsetof(struct per_proc_info *, hwCtr.hwrsvd10));
-	DECLARE("hwrsvd11",				offsetof(struct per_proc_info *, hwCtr.hwrsvd11));
-	DECLARE("hwrsvd12",				offsetof(struct per_proc_info *, hwCtr.hwrsvd12));
-	DECLARE("hwrsvd13",				offsetof(struct per_proc_info *, hwCtr.hwrsvd13));
-	DECLARE("hwTrace601",			offsetof(struct per_proc_info *, hwCtr.hwTrace601));
-	DECLARE("hwSIGPs",				offsetof(struct per_proc_info *, hwCtr.hwSIGPs));
-	DECLARE("hwPreemptions",		offsetof(struct per_proc_info *, hwCtr.hwPreemptions));
-	DECLARE("hwContextSwitchs",		offsetof(struct per_proc_info *, hwCtr.hwContextSwitchs));
-	DECLARE("hwShutdowns",			offsetof(struct per_proc_info *, hwCtr.hwShutdowns));
-	DECLARE("hwChokes",				offsetof(struct per_proc_info *, hwCtr.hwChokes));
-	DECLARE("hwDataSegments",		offsetof(struct per_proc_info *, hwCtr.hwDataSegments));
-	DECLARE("hwInstructionSegments",	offsetof(struct per_proc_info *, hwCtr.hwInstructionSegments));
-	DECLARE("hwSoftPatches",		offsetof(struct per_proc_info *, hwCtr.hwSoftPatches));
-	DECLARE("hwMaintenances",		offsetof(struct per_proc_info *, hwCtr.hwMaintenances));
-	DECLARE("hwInstrumentations",	offsetof(struct per_proc_info *, hwCtr.hwInstrumentations));
-	DECLARE("hwRedrives",			offsetof(struct per_proc_info *, hwCtr.hwRedrives));
-	DECLARE("hwIgnored",			offsetof(struct per_proc_info *, hwCtr.hwIgnored));
-	DECLARE("hwhdec",				offsetof(struct per_proc_info *, hwCtr.hwhdec));
-	DECLARE("hwSteals",				offsetof(struct per_proc_info *, hwCtr.hwSteals));
-	
-	DECLARE("hwWalkPhys",			offsetof(struct per_proc_info *, hwCtr.hwWalkPhys));
-	DECLARE("hwWalkFull",			offsetof(struct per_proc_info *, hwCtr.hwWalkFull));
-	DECLARE("hwWalkMerge",			offsetof(struct per_proc_info *, hwCtr.hwWalkMerge));
-	DECLARE("hwWalkQuick",			offsetof(struct per_proc_info *, hwCtr.hwWalkQuick));
-
-	DECLARE("hwMckHang",			offsetof(struct per_proc_info *, hwCtr.hwMckHang));
-	DECLARE("hwMckSLBPE",			offsetof(struct per_proc_info *, hwCtr.hwMckSLBPE));
-	DECLARE("hwMckTLBPE",			offsetof(struct per_proc_info *, hwCtr.hwMckTLBPE));
-	DECLARE("hwMckERCPE",			offsetof(struct per_proc_info *, hwCtr.hwMckERCPE));
-	DECLARE("hwMckL1DPE",			offsetof(struct per_proc_info *, hwCtr.hwMckL1DPE));
-	DECLARE("hwMckL1TPE",			offsetof(struct per_proc_info *, hwCtr.hwMckL1TPE));
-	DECLARE("hwMckUE",				offsetof(struct per_proc_info *, hwCtr.hwMckUE));
-	DECLARE("hwMckIUE",				offsetof(struct per_proc_info *, hwCtr.hwMckIUE));
-	DECLARE("hwMckIUEr",			offsetof(struct per_proc_info *, hwCtr.hwMckIUEr));
-	DECLARE("hwMckDUE",				offsetof(struct per_proc_info *, hwCtr.hwMckDUE));
-	DECLARE("hwMckDTW",				offsetof(struct per_proc_info *, hwCtr.hwMckDTW));
-	DECLARE("hwMckUnk",				offsetof(struct per_proc_info *, hwCtr.hwMckUnk));
-	DECLARE("hwMckExt",				offsetof(struct per_proc_info *, hwCtr.hwMckExt));
-	DECLARE("hwMckICachePE",		offsetof(struct per_proc_info *, hwCtr.hwMckICachePE));
-	DECLARE("hwMckITagPE",			offsetof(struct per_proc_info *, hwCtr.hwMckITagPE));
-	DECLARE("hwMckIEratPE",			offsetof(struct per_proc_info *, hwCtr.hwMckIEratPE));
-	DECLARE("hwMckDEratPE",			offsetof(struct per_proc_info *, hwCtr.hwMckDEratPE));
-
-	DECLARE("ijsave", 				offsetof(struct per_proc_info *, ijsave));
-
-	DECLARE("napStamp", 			offsetof(struct per_proc_info *, hwCtr.napStamp));
-	DECLARE("napTotal", 			offsetof(struct per_proc_info *, hwCtr.napTotal));
-	DECLARE("PP_PROCESSOR",			offsetof(struct per_proc_info *, processor[0]));
-	DECLARE("PP_PROCESSOR_SIZE",	sizeof(((struct per_proc_info *)0)->processor));
-	DECLARE("PROCESSOR_SIZE",		sizeof (struct processor));
-
-	DECLARE("patchAddr",			offsetof(struct patch_entry *, addr));
-	DECLARE("patchData",			offsetof(struct patch_entry *, data));
-	DECLARE("patchType",			offsetof(struct patch_entry *, type));
-	DECLARE("patchValue",			offsetof(struct patch_entry *, value));
-	DECLARE("peSize", 				sizeof(patch_entry_t));
-	DECLARE("PATCH_PROCESSOR",		PATCH_PROCESSOR);
-	DECLARE("PATCH_FEATURE",		PATCH_FEATURE);
-    DECLARE("PATCH_END_OF_TABLE",   PATCH_END_OF_TABLE);
-	DECLARE("PatchExt32",			PatchExt32);
-	DECLARE("PatchExt32b",			PatchExt32b);
-	DECLARE("PatchLwsync",			PatchLwsync);
-	DECLARE("PatchLwsyncb",			PatchLwsyncb);
-
-	DECLARE("RESETHANDLER_TYPE", 	offsetof(struct resethandler *, type));
-	DECLARE("RESETHANDLER_CALL", 	offsetof(struct resethandler *, call_paddr));
-	DECLARE("RESETHANDLER_ARG", 	offsetof(struct resethandler *, arg__paddr));
-
-	/* we want offset from
-	 * bottom of kernel stack, not offset into structure
-	 */
-#define IKSBASE (u_int)STACK_IKS(0)
-
-	/* values from kern/thread.h */
-	DECLARE("THREAD_STATE",		offsetof(thread_t, state));
-	DECLARE("TH_IDLE",				TH_IDLE);
-	DECLARE("THREAD_KERNEL_STACK",	offsetof(thread_t, kernel_stack));
-	DECLARE("THREAD_RECOVER",		offsetof(thread_t, recover));
-	DECLARE("THREAD_FUNNEL_LOCK",
-			offsetof(thread_t, funnel_lock));
-	DECLARE("THREAD_FUNNEL_STATE",
-			offsetof(thread_t, funnel_state));
-	DECLARE("LOCK_FNL_MUTEX",
-			offsetof(struct funnel_lock *, fnl_mutex));
-
-	DECLARE("ACT_TASK",				offsetof(thread_t, task));
-	DECLARE("ACT_MACT_PCB",			offsetof(thread_t, machine.pcb));
-	DECLARE("ACT_MACT_UPCB",		offsetof(thread_t, machine.upcb));
-	DECLARE("ACT_AST",				offsetof(thread_t, ast));
-	DECLARE("ACT_VMMAP",			offsetof(thread_t, map));
-	DECLARE("vmmCEntry",			offsetof(thread_t, machine.vmmCEntry));
-	DECLARE("vmmControl",			offsetof(thread_t, machine.vmmControl));
-	DECLARE("curctx",				offsetof(thread_t, machine.curctx));
-	DECLARE("deferctx",				offsetof(thread_t, machine.deferctx));
-	DECLARE("facctx",				offsetof(thread_t, machine.facctx));
-#ifdef MACH_BSD
-	DECLARE("CTHREAD_SELF",			offsetof(thread_t, machine.cthread_self));
-#endif  
-
-	DECLARE("FPUsave",				offsetof(struct facility_context *,FPUsave));
-	DECLARE("FPUlevel",				offsetof(struct facility_context *,FPUlevel));
-	DECLARE("FPUcpu",				offsetof(struct facility_context *,FPUcpu));
-	DECLARE("FPUsync",				offsetof(struct facility_context *,FPUsync));
-	DECLARE("VMXsave",				offsetof(struct facility_context *,VMXsave));
-	DECLARE("VMXlevel",				offsetof(struct facility_context *,VMXlevel));
-	DECLARE("VMXcpu",				offsetof(struct facility_context *,VMXcpu));
-	DECLARE("VMXsync",				offsetof(struct facility_context *,VMXsync));
-	DECLARE("facAct",				offsetof(struct facility_context *,facAct));
-
-	/* Values from vmachmon.h */
-	
-	DECLARE("kVmmGetVersion", 		kVmmGetVersion);
-	DECLARE("kVmmvGetFeatures",		kVmmvGetFeatures);
-	DECLARE("kVmmInitContext", 		kVmmInitContext);
-	DECLARE("kVmmTearDownContext", 	kVmmTearDownContext);
-	DECLARE("kVmmTearDownAll", 		kVmmTearDownAll);
-	DECLARE("kVmmMapPage", 			kVmmMapPage);
-	DECLARE("kVmmGetPageMapping", 	kVmmGetPageMapping);
-	DECLARE("kVmmUnmapPage", 		kVmmUnmapPage);
-	DECLARE("kVmmUnmapAllPages", 	kVmmUnmapAllPages);
-	DECLARE("kVmmGetPageDirtyFlag", kVmmGetPageDirtyFlag);
-	DECLARE("kVmmGetFloatState",	kVmmGetFloatState);
-	DECLARE("kVmmGetVectorState",	kVmmGetVectorState);
-	DECLARE("kVmmSetTimer", 		kVmmSetTimer);
-	DECLARE("kVmmGetTimer", 		kVmmGetTimer);
-	DECLARE("kVmmExecuteVM", 		kVmmExecuteVM);
-	DECLARE("kVmmProtectPage", 		kVmmProtectPage);
-	DECLARE("kVmmMapList", 			kVmmMapList);
-	DECLARE("kVmmUnmapList", 		kVmmUnmapList);
-	DECLARE("kVmmActivateXA", 		kVmmActivateXA);
-	DECLARE("kVmmDeactivateXA", 	kVmmDeactivateXA);
-	DECLARE("kVmmGetXA",			kVmmGetXA);
-	DECLARE("kVmmMapPage64", 		kVmmMapPage64);
-	DECLARE("kVmmGetPageMapping64",	kVmmGetPageMapping64);
-	DECLARE("kVmmUnmapPage64", 		kVmmUnmapPage64);
-	DECLARE("kVmmGetPageDirtyFlag64", 	kVmmGetPageDirtyFlag64);
-	DECLARE("kVmmMapExecute64", 	kVmmMapExecute64);
-	DECLARE("kVmmProtectExecute64", kVmmProtectExecute64);
-	DECLARE("kVmmMapList64", 		kVmmMapList64);
-	DECLARE("kVmmUnmapList64", 		kVmmUnmapList64);
-	DECLARE("kvmmExitToHost",		kvmmExitToHost);
-	DECLARE("kvmmResumeGuest",		kvmmResumeGuest);
-	DECLARE("kvmmGetGuestRegister",	kvmmGetGuestRegister);
-	DECLARE("kvmmSetGuestRegister",	kvmmSetGuestRegister);
-
-	DECLARE("kVmmReturnNull",		kVmmReturnNull);
-	DECLARE("kVmmStopped",			kVmmStopped);
-	DECLARE("kVmmBogusContext",		kVmmBogusContext);
-	DECLARE("kVmmReturnDataPageFault",	kVmmReturnDataPageFault);
-	DECLARE("kVmmReturnInstrPageFault",	kVmmReturnInstrPageFault);
-	DECLARE("kVmmReturnAlignmentFault",	kVmmReturnAlignmentFault);
-	DECLARE("kVmmReturnProgramException",	kVmmReturnProgramException);
-	DECLARE("kVmmReturnSystemCall",		kVmmReturnSystemCall);
-	DECLARE("kVmmReturnTraceException",	kVmmReturnTraceException);
-	DECLARE("kVmmInvalidAdSpace",	kVmmInvalidAdSpace);
-
-	DECLARE("kVmmProtXtnd",			kVmmProtXtnd);
-	DECLARE("kVmmProtNARW",			kVmmProtNARW);
-	DECLARE("kVmmProtRORW",			kVmmProtRORW);
-	DECLARE("kVmmProtRWRW",			kVmmProtRWRW);
-	DECLARE("kVmmProtRORO",			kVmmProtRORO);
-	
-	DECLARE("vmmFlags",				offsetof(struct vmmCntrlEntry *, vmmFlags));
-	DECLARE("vmmXAFlgs",			offsetof(struct vmmCntrlEntry *, vmmXAFlgs));
-	DECLARE("vmmPmap",				offsetof(struct vmmCntrlEntry *, vmmPmap));
-	DECLARE("vmmInUseb",			vmmInUseb);
-	DECLARE("vmmInUse",				vmmInUse);
-	DECLARE("vmmContextKern",		offsetof(struct vmmCntrlEntry *, vmmContextKern));
-	DECLARE("vmmContextPhys",		offsetof(struct vmmCntrlEntry *, vmmContextPhys));
-	DECLARE("vmmContextUser",		offsetof(struct vmmCntrlEntry *, vmmContextUser));
-	DECLARE("vmmFacCtx",			offsetof(struct vmmCntrlEntry *, vmmFacCtx));
-	DECLARE("vmmLastMap",			offsetof(struct vmmCntrlTable *, vmmLastMap));
-	DECLARE("vmmGFlags",			offsetof(struct vmmCntrlTable *, vmmGFlags));
-	DECLARE("vmmc",					offsetof(struct vmmCntrlTable *, vmmc));
-	DECLARE("vmmAdsp",				offsetof(struct vmmCntrlTable *, vmmAdsp));
-	DECLARE("vmmLastAdSp",			vmmLastAdSp);
-	DECLARE("vmmFAMintercept",		offsetof(struct vmmCntrlEntry *, vmmFAMintercept));
-	DECLARE("vmmCEntrySize",		sizeof(struct vmmCntrlEntry));
-	DECLARE("kVmmMaxContexts",		kVmmMaxContexts);
-	
-	DECLARE("interface_version",	offsetof(struct vmm_state_page_t *, interface_version));
-	DECLARE("thread_index",			offsetof(struct vmm_state_page_t *, thread_index));
-	DECLARE("vmmStat",				offsetof(struct vmm_state_page_t *, vmmStat));
-	DECLARE("vmmCntrl",				offsetof(struct vmm_state_page_t *, vmmCntrl));
-	DECLARE("vmm_proc_state",		offsetof(struct vmm_state_page_t *, vmm_proc_state));
-
-	DECLARE("return_code",			offsetof(struct vmm_state_page_t *, return_code));
-
-	DECLARE("return_params",		offsetof(struct vmm_state_page_t *, vmmRet.vmmrp32.return_params));
-	DECLARE("return_paramsX",		offsetof(struct vmm_state_page_t *, vmmRet.vmmrp64.return_params));
-
-#if 0
-	DECLARE("return_params",		offsetof(struct vmm_state_page_t *, return_params));
-	DECLARE("vmm_proc_state",		offsetof(struct vmm_state_page_t *, vmm_proc_state));
-#endif
-	DECLARE("vmmppcVRs",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcVRs));
-	DECLARE("vmmppcVSCR",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcVSCR));
-	DECLARE("vmmppcFPRs",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcFPRs));
-	DECLARE("vmmppcFPSCR",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcFPSCR));
-
-	DECLARE("vmmppcpc",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcPC));
-	DECLARE("vmmppcmsr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcMSR));
-	DECLARE("vmmppcr0",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x00));
-	DECLARE("vmmppcr1",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x04));
-	DECLARE("vmmppcr2",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x08));
-	DECLARE("vmmppcr3",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x0C));
-	DECLARE("vmmppcr4",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x10));
-	DECLARE("vmmppcr5",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x14));
-
-	DECLARE("vmmppcr6",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x18));
-	DECLARE("vmmppcr7",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x1C));
-	DECLARE("vmmppcr8",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x20));
-	DECLARE("vmmppcr9",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x24));
-	DECLARE("vmmppcr10",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x28));
-	DECLARE("vmmppcr11",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x2C));
-	DECLARE("vmmppcr12",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x30));
-	DECLARE("vmmppcr13",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x34));
-
-	DECLARE("vmmppcr14",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x38));
-	DECLARE("vmmppcr15",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x3C));
-	DECLARE("vmmppcr16",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x40));
-	DECLARE("vmmppcr17",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x44));
-	DECLARE("vmmppcr18",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x48));
-	DECLARE("vmmppcr19",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x4C));
-	DECLARE("vmmppcr20",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x50));
-	DECLARE("vmmppcr21",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x54));
-
-	DECLARE("vmmppcr22",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x58));
-	DECLARE("vmmppcr23",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x5C));
-	DECLARE("vmmppcr24",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x60));
-	DECLARE("vmmppcr25",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x64));
-	DECLARE("vmmppcr26",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x68));
-	DECLARE("vmmppcr27",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x6C));
-	DECLARE("vmmppcr28",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x70));
-	DECLARE("vmmppcr29",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x74));
-
-	DECLARE("vmmppcr30",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x78));
-	DECLARE("vmmppcr31",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcGPRs+0x7C));
-	DECLARE("vmmppccr",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcCR));
-	DECLARE("vmmppcxer",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcXER));
-	DECLARE("vmmppclr",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcLR));
-	DECLARE("vmmppcctr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcCTR));
-	DECLARE("vmmppcmq",				offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcMQ));
-	DECLARE("vmmppcvrsave",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs32.ppcVRSave));	
-
-	DECLARE("vmmppcXpc",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcPC));
-	DECLARE("vmmppcXmsr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcMSR));
-	DECLARE("vmmppcXr0",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x00));
-	DECLARE("vmmppcXr1",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x08));
-	DECLARE("vmmppcXr2",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x10));
-	DECLARE("vmmppcXr3",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x18));
-	DECLARE("vmmppcXr4",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x20));
-	DECLARE("vmmppcXr5",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x28));
-
-	DECLARE("vmmppcXr6",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x30));
-	DECLARE("vmmppcXr7",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x38));
-	DECLARE("vmmppcXr8",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x40));
-	DECLARE("vmmppcXr9",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x48));
-	DECLARE("vmmppcXr10",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x50));
-	DECLARE("vmmppcXr11",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x58));
-	DECLARE("vmmppcXr12",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x60));
-	DECLARE("vmmppcXr13",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x68));
-
-	DECLARE("vmmppcXr14",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x70));
-	DECLARE("vmmppcXr15",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x78));
-	DECLARE("vmmppcXr16",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x80));
-	DECLARE("vmmppcXr17",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x88));
-	DECLARE("vmmppcXr18",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x90));
-	DECLARE("vmmppcXr19",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0x98));
-	DECLARE("vmmppcXr20",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xA0));
-	DECLARE("vmmppcXr21",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xA8));
-
-	DECLARE("vmmppcXr22",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xB0));
-	DECLARE("vmmppcXr23",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xB8));
-	DECLARE("vmmppcXr24",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xC0));
-	DECLARE("vmmppcXr25",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xC8));
-	DECLARE("vmmppcXr26",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xD0));
-	DECLARE("vmmppcXr27",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xD8));
-	DECLARE("vmmppcXr28",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xE0));
-	DECLARE("vmmppcXr29",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xE8));
-
-	DECLARE("vmmppcXr30",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xF0));
-	DECLARE("vmmppcXr31",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcGPRs+0xF8));
-	DECLARE("vmmppcXcr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcCR));
-	DECLARE("vmmppcXxer",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcXER));
-	DECLARE("vmmppcXlr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcLR));
-	DECLARE("vmmppcXctr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcCTR));
-	DECLARE("vmmppcXvrsave",		offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcRegs.ppcRegs64.ppcVRSave));	
-
-	DECLARE("vmmppcvscr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcVSCR+0x00));	
-	DECLARE("vmmppcfpscrpad",		offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcFPSCR));
-	DECLARE("vmmppcfpscr",			offsetof(struct vmm_state_page_t *, vmm_proc_state.ppcFPSCR+4));
-
-	DECLARE("famguestr0",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register));
-	DECLARE("famguestr1",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x4));
-	DECLARE("famguestr2",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x8));
-	DECLARE("famguestr3",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0xC));
-	DECLARE("famguestr4",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x10));
-	DECLARE("famguestr5",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x14));
-	DECLARE("famguestr6",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x18));
-	DECLARE("famguestr7",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_register+0x1C));
-	DECLARE("famguestpc",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_pc));
-	DECLARE("famguestmsr",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.guest_msr));
-	DECLARE("famdispcode",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.fastassist_dispatch_code));
-	DECLARE("famrefcon",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.fastassist_refcon));
-	DECLARE("famparam",				offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.fastassist_parameter));
-	DECLARE("famhandler",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.fastassist_dispatch));
-	DECLARE("famintercepts",		offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs32.fastassist_intercepts));
-
-	DECLARE("famguestXr0",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register));
-	DECLARE("famguestXr1",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x8));
-	DECLARE("famguestXr2",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x10));
-	DECLARE("famguestXr3",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x18));
-	DECLARE("famguestXr4",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x20));
-	DECLARE("famguestXr5",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x28));
-	DECLARE("famguestXr6",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x30));
-	DECLARE("famguestXr7",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_register+0x38));
-	DECLARE("famguestXpc",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_pc));
-	DECLARE("famguestXmsr",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.guest_msr));
-	DECLARE("famdispcodeX",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.fastassist_dispatch_code));
-	DECLARE("famrefconX",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.fastassist_refcon));
-	DECLARE("famparamX",				offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.fastassist_parameter));
-	DECLARE("famhandlerX",			offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.fastassist_dispatch));
-	DECLARE("faminterceptsX",		offsetof(struct vmm_state_page_t *, vmm_fastassist_state.vmmfs64.fastassist_intercepts));
-
-	DECLARE("vmmFloatCngd",			vmmFloatCngd);
-	DECLARE("vmmFloatCngdb",		vmmFloatCngdb);
-	DECLARE("vmmVectCngd",			vmmVectCngd);
-	DECLARE("vmmVectCngdb",			vmmVectCngdb);
-	DECLARE("vmmTimerPop",			vmmTimerPop);
-	DECLARE("vmmTimerPopb",			vmmTimerPopb);
-	DECLARE("vmmFAMmode",			vmmFAMmode);
-	DECLARE("vmmFAMmodeb",			vmmFAMmodeb);
-	DECLARE("vmmSpfSave",			vmmSpfSave);
-	DECLARE("vmmSpfSaveb",			vmmSpfSaveb);
-	DECLARE("vmmFloatLoad",			vmmFloatLoad);
-	DECLARE("vmmFloatLoadb",		vmmFloatLoadb);
-	DECLARE("vmmVectLoad",			vmmVectLoad);
-	DECLARE("vmmVectLoadb",			vmmVectLoadb);
-	DECLARE("vmmVectVRall",			vmmVectVRall);
-	DECLARE("vmmVectVRallb",		vmmVectVRallb);
-	DECLARE("vmmVectVAss",			vmmVectVAss);
-	DECLARE("vmmVectVAssb",			vmmVectVAssb);
-	DECLARE("vmmXStart",			vmmXStart);
-	DECLARE("vmmXStartb",			vmmXStartb);
-	DECLARE("vmmXStop",				vmmXStop);
-	DECLARE("vmmXStopb",			vmmXStopb);
-	DECLARE("vmmKey",				vmmKey);
-	DECLARE("vmmKeyb",				vmmKeyb);
-	DECLARE("vmmFamSet",			vmmFamSet);
-	DECLARE("vmmFamSetb",			vmmFamSetb);
-	DECLARE("vmmFamEna",			vmmFamEna);
-	DECLARE("vmmFamEnab",			vmmFamEnab);
-	DECLARE("vmm64Bit",				vmm64Bit);
-
-	/* values from kern/task.h */
-	DECLARE("TASK_SYSCALLS_MACH",	offsetof(struct task *, syscalls_mach));
-	DECLARE("TASK_SYSCALLS_UNIX",	offsetof(struct task *, syscalls_unix));
-
-	DECLARE("TASK_VTIMERS",			offsetof(struct task *, vtimers));
-
-	/* values from vm/vm_map.h */
-	DECLARE("VMMAP_PMAP",	offsetof(struct _vm_map *, pmap));
-
-	/* values from machine/pmap.h */
-	DECLARE("pmapSpace",			offsetof(struct pmap *, space));
-	DECLARE("spaceNum",				offsetof(struct pmap *, spaceNum));
-	DECLARE("pmapSXlk",				offsetof(struct pmap *, pmapSXlk));
-	DECLARE("pmapCCtl",				offsetof(struct pmap *, pmapCCtl));
-    DECLARE("pmapCCtlVal",			pmapCCtlVal);
-    DECLARE("pmapCCtlLck",			pmapCCtlLck);
-    DECLARE("pmapCCtlLckb",			pmapCCtlLckb);
-    DECLARE("pmapCCtlGen",			pmapCCtlGen);
-    DECLARE("pmapSegCacheCnt",		pmapSegCacheCnt);
-    DECLARE("pmapSegCacheUse",		pmapSegCacheUse);
-	DECLARE("pmapvr",				offsetof(struct pmap *, pmapvr));
-	DECLARE("pmapFlags",			offsetof(struct pmap *, pmapFlags));
-    DECLARE("pmapKeys",				pmapKeys);
-    DECLARE("pmapKeyDef",			pmapKeyDef);
-	DECLARE("pmapSCSubTag",			offsetof(struct pmap *, pmapSCSubTag));
-	DECLARE("pmapVmmExt",			offsetof(struct pmap *, pmapVmmExt));
-	DECLARE("pmapVmmExtPhys",		offsetof(struct pmap *, pmapVmmExtPhys));
-	DECLARE("pmapVMhost",			pmapVMhost);
-	DECLARE("pmapVMgsaa",			pmapVMgsaa);
-	DECLARE("pmapSegCache",			offsetof(struct pmap *, pmapSegCache));
-	DECLARE("pmapCurLists",			offsetof(struct pmap *, pmapCurLists));
-	DECLARE("pmapRandNum",			offsetof(struct pmap *, pmapRandNum));
-	DECLARE("pmapSkipLists",		offsetof(struct pmap *, pmapSkipLists));
-	DECLARE("pmapSearchVisits",		offsetof(struct pmap *, pmapSearchVisits));
-	DECLARE("pmapSearchCnt",		offsetof(struct pmap *, pmapSearchCnt));
-	DECLARE("pmapSize",				pmapSize);
-    DECLARE("kSkipListFanoutShift",	kSkipListFanoutShift);
-    DECLARE("kSkipListMaxLists",	kSkipListMaxLists);
-    DECLARE("invalSpace",			invalSpace);
-
-	DECLARE("sgcESID",				offsetof(struct sgc *, sgcESID));
-	DECLARE("sgcESmsk",				sgcESmsk);
-	DECLARE("sgcVSID",				offsetof(struct sgc *, sgcVSID));
-	DECLARE("sgcVSmsk",				sgcVSmsk);
-	DECLARE("sgcVSKeys",			sgcVSKeys);
-	DECLARE("sgcVSKeyUsr",			sgcVSKeyUsr);
-	DECLARE("sgcVSNoEx",			sgcVSNoEx);
-	DECLARE("pmapPAddr",			offsetof(struct pmapTransTab *, pmapPAddr));
-	DECLARE("pmapVAddr",			offsetof(struct pmapTransTab *, pmapVAddr));
-	DECLARE("pmapTransSize",		sizeof(pmapTransTab));
-	DECLARE("pmapResidentCnt",		offsetof(struct pmap *, stats.resident_count));
-	DECLARE("pmapResidentMax",		offsetof(struct pmap *, stats.resident_max));
-
-	DECLARE("maxAdrSp",				maxAdrSp);
-	DECLARE("maxAdrSpb",			maxAdrSpb);
-	
-	DECLARE("cppvPsnkb",			cppvPsnkb);
-	DECLARE("cppvPsrcb",			cppvPsrcb);
-	DECLARE("cppvFsnkb",			cppvFsnkb);
-	DECLARE("cppvFsrcb",			cppvFsrcb);
-	DECLARE("cppvNoModSnkb",		cppvNoModSnkb);
-	DECLARE("cppvNoRefSrcb",		cppvNoRefSrcb);
-	DECLARE("cppvKmapb",			cppvKmapb);
-	
-	DECLARE("vmxSalt",				offsetof(struct pmap_vmm_ext *, vmxSalt));
-	DECLARE("vmxHostPmapPhys",		offsetof(struct pmap_vmm_ext *, vmxHostPmapPhys));
-	DECLARE("vmxHostPmap",			offsetof(struct pmap_vmm_ext *,	vmxHostPmap));
-	DECLARE("vmxHashPgIdx",			offsetof(struct pmap_vmm_ext *, vmxHashPgIdx));
-	DECLARE("vmxHashPgList",		offsetof(struct pmap_vmm_ext *, vmxHashPgList));
-	DECLARE("vmxStats",				offsetof(struct pmap_vmm_ext *, vmxStats));
-	DECLARE("vmxSize",				sizeof(struct pmap_vmm_ext));
-	DECLARE("VMX_HPIDX_OFFSET",		VMX_HPIDX_OFFSET);
-	DECLARE("VMX_HPLIST_OFFSET",	VMX_HPLIST_OFFSET);
-	DECLARE("VMX_ACTMAP_OFFSET",	VMX_ACTMAP_OFFSET);
-	DECLARE("vxsGpf",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGpf));
-	DECLARE("vxsGpfMiss",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGpfMiss));
-	DECLARE("vxsGrm",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrm));
-	DECLARE("vxsGrmMiss",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrmMiss));
-	DECLARE("vxsGrmActive",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrmActive));
-	DECLARE("vxsGra",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGra));
-	DECLARE("vxsGraHits",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGraHits));
-	DECLARE("vxsGraActive",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGraActive));
-	DECLARE("vxsGrl",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrl));
-	DECLARE("vxsGrlActive",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrlActive));
-	DECLARE("vxsGrs",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrs));
-	DECLARE("vxsGrsHitAct",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitAct));
-	DECLARE("vxsGrsHitSusp",		offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitSusp));
-	DECLARE("vxsGrsMissGV",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsMissGV));
-	DECLARE("vxsGrsHitPE",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitPE));
-	DECLARE("vxsGrsMissPE",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsMissPE));
-	DECLARE("vxsGad",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGad));
-	DECLARE("vxsGadHit",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadHit));
-	DECLARE("vxsGadFree",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadFree));
-	DECLARE("vxsGadDormant",		offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadDormant));
-	DECLARE("vxsGadSteal",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadSteal));
-	DECLARE("vxsGsu",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsu));
-	DECLARE("vxsGsuHit",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsuHit));
-	DECLARE("vxsGsuMiss",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsuMiss));
-	DECLARE("vxsGtd",				offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtd));
-	DECLARE("vxsGtdHit",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtdHit));
-	DECLARE("vxsGtdMiss",			offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtdMiss));
-
-	/* values from kern/timer.h */
-	DECLARE("TIMER_LOW",		offsetof(struct timer *, low_bits));
-	DECLARE("TIMER_HIGH",		offsetof(struct timer *, high_bits));
-	DECLARE("TIMER_HIGHCHK",	offsetof(struct timer *, high_bits_check));
-	DECLARE("TIMER_TSTAMP",		offsetof(struct timer *, tstamp));
-
-	DECLARE("THREAD_TIMER",		offsetof(struct processor *, processor_data.thread_timer));
-	DECLARE("KERNEL_TIMER",		offsetof(struct processor *, processor_data.kernel_timer));
-	DECLARE("SYSTEM_TIMER",		offsetof(struct thread *, system_timer));
-	DECLARE("USER_TIMER",		offsetof(struct thread *, user_timer));
-	DECLARE("SYSTEM_STATE",		offsetof(struct processor *, processor_data.system_state));
-	DECLARE("USER_STATE",		offsetof(struct processor *, processor_data.user_state));
-	DECLARE("CURRENT_STATE",	offsetof(struct processor *, processor_data.current_state));
-
-	/* Constants from pmap.h */
-	DECLARE("PPC_SID_KERNEL", PPC_SID_KERNEL);
-
-	/* values for accessing mach_trap table */
-	DECLARE("MACH_TRAP_ARG_MUNGE32",
-		offsetof(mach_trap_t *, mach_trap_arg_munge32));
-	DECLARE("MACH_TRAP_ARG_MUNGE64",
-		offsetof(mach_trap_t *, mach_trap_arg_munge64));
-	DECLARE("MACH_TRAP_ARGC",
-		offsetof(mach_trap_t *, mach_trap_arg_count));
-	DECLARE("MACH_TRAP_FUNCTION",
-		offsetof(mach_trap_t *, mach_trap_function));
-
-	DECLARE("MACH_TRAP_TABLE_COUNT", MACH_TRAP_TABLE_COUNT);
-
-	DECLARE("PPCcallmax", sizeof(PPCcalls));
-
-	/* Misc values used by assembler */
-	DECLARE("AST_ALL", AST_ALL);
-	DECLARE("AST_URGENT", AST_URGENT);
-	DECLARE("AST_BSD", AST_BSD);
-
-	/* Spin Lock structure */
-	DECLARE("SLOCK_ILK",	offsetof(lck_spin_t *, interlock));
-
-	/* Mutex structure */
-	DECLARE("MUTEX_DATA",	offsetof(lck_mtx_t *, lck_mtx_data));
-	DECLARE("MUTEX_WAITERS",offsetof(lck_mtx_t *, lck_mtx_waiters));
-	DECLARE("MUTEX_PROMOTED_PRI",offsetof(lck_mtx_t *, lck_mtx_pri));
-	DECLARE("MUTEX_TYPE",	offsetof(lck_mtx_ext_t *, lck_mtx_deb.type));
-	DECLARE("MUTEX_STACK",	offsetof(lck_mtx_ext_t *, lck_mtx_deb.stack));
-	DECLARE("MUTEX_FRAMES",	LCK_FRAMES_MAX);
-	DECLARE("MUTEX_THREAD",	offsetof(lck_mtx_ext_t *, lck_mtx_deb.thread));
-	DECLARE("MUTEX_ATTR",	offsetof(lck_mtx_ext_t *, lck_mtx_attr));
-	DECLARE("MUTEX_ATTR_DEBUG", LCK_MTX_ATTR_DEBUG);
-	DECLARE("MUTEX_ATTR_DEBUGb", LCK_MTX_ATTR_DEBUGb);
-	DECLARE("MUTEX_ATTR_STAT", LCK_MTX_ATTR_STAT);
-	DECLARE("MUTEX_ATTR_STATb", LCK_MTX_ATTR_STATb);
-	DECLARE("MUTEX_GRP",	offsetof(lck_mtx_ext_t *, lck_mtx_grp));
-	DECLARE("MUTEX_TAG",	MUTEX_TAG);
-	DECLARE("MUTEX_IND",	LCK_MTX_TAG_INDIRECT);
-	DECLARE("MUTEX_ITAG",offsetof(lck_mtx_t *, lck_mtx_tag));
-	DECLARE("MUTEX_PTR",offsetof(lck_mtx_t *, lck_mtx_ptr));
-	DECLARE("MUTEX_ASSERT_OWNED",	LCK_MTX_ASSERT_OWNED);
-	DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED);
-	DECLARE("GRP_MTX_STAT_UTIL",	offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt));
-	DECLARE("GRP_MTX_STAT_MISS",	offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt));
-	DECLARE("GRP_MTX_STAT_WAIT",	offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt));
-
-	/* RW lock structure */
-	DECLARE("RW_IND",	LCK_RW_TAG_INDIRECT);
-	DECLARE("RW_PTR",	offsetof(lck_rw_t *, lck_rw_ptr));
-	DECLARE("RW_SHARED",	LCK_RW_TYPE_SHARED);
-	DECLARE("RW_EXCL",	LCK_RW_TYPE_EXCLUSIVE);
-	DECLARE("RW_EVENT",	(((sizeof(lck_rw_t)-1))/sizeof(unsigned int))*sizeof(unsigned int));
-
-	/* values from low_trace.h */
-	DECLARE("LTR_cpu",	offsetof(struct LowTraceRecord *, LTR_cpu));
-	DECLARE("LTR_excpt",	offsetof(struct LowTraceRecord *, LTR_excpt));
-	DECLARE("LTR_timeHi",	offsetof(struct LowTraceRecord *, LTR_timeHi));
-	DECLARE("LTR_timeLo",	offsetof(struct LowTraceRecord *, LTR_timeLo));
-	DECLARE("LTR_cr",	offsetof(struct LowTraceRecord *, LTR_cr));
-	DECLARE("LTR_srr0",	offsetof(struct LowTraceRecord *, LTR_srr0));
-	DECLARE("LTR_srr1",	offsetof(struct LowTraceRecord *, LTR_srr1));
-	DECLARE("LTR_dar",	offsetof(struct LowTraceRecord *, LTR_dar));
-	DECLARE("LTR_dsisr",	offsetof(struct LowTraceRecord *, LTR_dsisr));
-	DECLARE("LTR_rsvd0",	offsetof(struct LowTraceRecord *, LTR_rsvd0));
-	DECLARE("LTR_save",	offsetof(struct LowTraceRecord *, LTR_save));
-	DECLARE("LTR_lr",	offsetof(struct LowTraceRecord *, LTR_lr));
-	DECLARE("LTR_ctr",	offsetof(struct LowTraceRecord *, LTR_ctr));
-	DECLARE("LTR_r0",	offsetof(struct LowTraceRecord *, LTR_r0));
-	DECLARE("LTR_r1",	offsetof(struct LowTraceRecord *, LTR_r1));
-	DECLARE("LTR_r2",	offsetof(struct LowTraceRecord *, LTR_r2));
-	DECLARE("LTR_r3",	offsetof(struct LowTraceRecord *, LTR_r3));
-	DECLARE("LTR_r4",	offsetof(struct LowTraceRecord *, LTR_r4));
-	DECLARE("LTR_r5",	offsetof(struct LowTraceRecord *, LTR_r5));
-	DECLARE("LTR_r6",	offsetof(struct LowTraceRecord *, LTR_r6));
-	DECLARE("LTR_size",	sizeof(struct LowTraceRecord));
-
-/*	Values from pexpert.h */
-	DECLARE("PECFIcpurate",	offsetof(struct clock_frequency_info_t *, cpu_clock_rate_hz));
-	DECLARE("PECFIbusrate",	offsetof(struct clock_frequency_info_t *, bus_clock_rate_hz));
-
-/*	Values from pmap_internals.h and mappings.h */
-
-	DECLARE("mpFlags",		offsetof(struct mapping *, mpFlags));
-	DECLARE("mpBusy",		mpBusy);
-	DECLARE("mpPrevious",	mpPrevious);
-	DECLARE("mpNext",		mpNext);
-	DECLARE("mpPIndex",		mpPIndex);
-	DECLARE("mpType",		mpType);
-	DECLARE("mpNormal",		mpNormal);
-	DECLARE("mpBlock",		mpBlock);
-	DECLARE("mpMinSpecial",	mpMinSpecial);
-	DECLARE("mpNest",		mpNest);
-	DECLARE("mpLinkage",	mpLinkage);
-	DECLARE("mpACID",		mpACID);
-	DECLARE("mpGuest",		mpGuest);
-	DECLARE("mpFIP",		mpFIP);
-	DECLARE("mpFIPb",		mpFIPb);
-	DECLARE("mpPcfg",		mpPcfg);
-	DECLARE("mpPcfgb",		mpPcfgb);
-	DECLARE("mpRIP",		mpRIP);
-	DECLARE("mpRIPb",		mpRIPb);
-	DECLARE("mpPerm",		mpPerm);
-	DECLARE("mpPermb",		mpPermb);
-	DECLARE("mpBSu",		mpBSu);
-	DECLARE("mpBSub",		mpBSub);
-	DECLARE("mpLists",		mpLists);
-	DECLARE("mpListsb",		mpListsb);
-	DECLARE("mpgFlags",		mpgFlags);
-	DECLARE("mpgFree",		mpgFree);
-	DECLARE("mpgGlobal",	mpgGlobal);
-	DECLARE("mpgDormant",	mpgDormant);
-
-	DECLARE("mpSpace",		offsetof(struct mapping *, mpSpace));
-	DECLARE("mpBSize",		offsetof(struct mapping *, u.mpBSize));
-	DECLARE("mpgCursor",	offsetof(struct mapping *, u.mpgCursor));
-	DECLARE("mpPte",		offsetof(struct mapping *, mpPte));
-	DECLARE("mpHValid",		mpHValid);
-	DECLARE("mpHValidb",	mpHValidb);
-
-	DECLARE("mpPAddr",		offsetof(struct mapping *, mpPAddr));
-	DECLARE("mpVAddr",		offsetof(struct mapping *, mpVAddr));
-	DECLARE("mpHWFlags",	mpHWFlags);
-	DECLARE("mpHWFlagsb",	mpHWFlagsb);
-	DECLARE("mpN",			mpN);
-	DECLARE("mpNb",			mpNb);
-	DECLARE("mpPP",			mpPP);
-	DECLARE("mpPPb",		mpPPb);
-	DECLARE("mpPPe",		mpPPe);
-	DECLARE("mpKKN",		mpKKN);
-	DECLARE("mpKKNb",		mpKKNb);
-	DECLARE("mpWIMG",		mpWIMG);
-	DECLARE("mpWIMGb",		mpWIMGb);
-	DECLARE("mpW",			mpW);
-	DECLARE("mpWb",			mpWb);
-	DECLARE("mpI",			mpI);
-	DECLARE("mpIb",			mpIb);
-	DECLARE("mpM",			mpM);
-	DECLARE("mpMb",			mpMb);
-	DECLARE("mpG",			mpG);
-	DECLARE("mpGb",			mpGb);
-	DECLARE("mpWIMGe",		mpWIMGe);
-	DECLARE("mpC",			mpC);
-	DECLARE("mpCb",			mpCb);
-	DECLARE("mpR",			mpR);
-	DECLARE("mpRb",			mpRb);
-	DECLARE("mpAlias",		offsetof(struct mapping *, mpAlias));
-	DECLARE("mpNestReloc",	offsetof(struct mapping *, mpNestReloc));	
-	DECLARE("mpBlkRemCur",	offsetof(struct mapping *, mpBlkRemCur));	
-	DECLARE("mpList0",		offsetof(struct mapping *, mpList0));
-	DECLARE("mpList	",		offsetof(struct mapping *, mpList));
-	DECLARE("mpBasicSize",	mpBasicSize);
-	DECLARE("mpBasicLists",	mpBasicLists);
-
-	DECLARE("mbvrswap",		offsetof(struct mappingblok *, mapblokvrswap));
-	DECLARE("mbfree",		offsetof(struct mappingblok *, mapblokfree));
-	DECLARE("mapcsize",		sizeof(struct mappingctl));
-	
-	DECLARE("hwpPurgePTE",	hwpPurgePTE);
-	DECLARE("hwpMergePTE",	hwpMergePTE);
-	DECLARE("hwpNoopPTE",	hwpNoopPTE);
-
-// DANGER WIL ROBINSON!!! This wonderfully magical tool doesn't seem to handle 64-bit constants,
-// leaving us with only the cold ash of a zero. ppI, ppG, and who knows what else is affected.
-	DECLARE("ppLink",		offsetof(struct phys_entry *, ppLink));
-	DECLARE("ppLock",		ppLock);
-	DECLARE("ppFlags",		ppFlags);
-//	DECLARE("ppI",			ppI);
-	DECLARE("ppIb",			ppIb);
-//	DECLARE("ppG",			ppG);
-	DECLARE("ppGb",			ppGb);
-	DECLARE("ppR",			ppR);
-	DECLARE("ppRb",			ppRb);
-	DECLARE("ppC",			ppC);
-	DECLARE("ppCb",			ppCb);
-	DECLARE("physEntrySize",physEntrySize);
-	DECLARE("ppLFAmask",	ppLFAmask);
-	DECLARE("ppLFArrot",	ppLFArrot);
-
-	DECLARE("pcfFlags",		offsetof(struct pcfg *, pcfFlags));
-	DECLARE("pcfEncode",	offsetof(struct pcfg *, pcfEncode));
-	DECLARE("pcfPSize",		offsetof(struct pcfg *, pcfPSize));
-	DECLARE("pcfShift",		offsetof(struct pcfg *, pcfShift));
-	DECLARE("pcfValid",		pcfValid);
-	DECLARE("pcfLarge",		pcfLarge);
-	DECLARE("pcfDedSeg",	pcfDedSeg);
-	DECLARE("pcfSize",		sizeof(struct pcfg));
-	DECLARE("pcfDefPcfg",	pcfDefPcfg);
-	DECLARE("pcfLargePcfg",	pcfLargePcfg);
-
-	DECLARE("PCAallo",		offsetof(struct PCA *, flgs.PCAallo));
-	DECLARE("PCAfree",		offsetof(struct PCA *, flgs.PCAalflgs.PCAfree));
-	DECLARE("PCAauto",		offsetof(struct PCA *, flgs.PCAalflgs.PCAauto));
-	DECLARE("PCAmisc",		offsetof(struct PCA *, flgs.PCAalflgs.PCAmisc));
-	DECLARE("PCAlock",		PCAlock);
-	DECLARE("PCAlockb",		PCAlockb);
-	DECLARE("PCAsteal",		offsetof(struct PCA *, flgs.PCAalflgs.PCAsteal));
-
-	DECLARE("mrPhysTab",	offsetof(struct mem_region *, mrPhysTab));
-	DECLARE("mrStart",		offsetof(struct mem_region *, mrStart));
-	DECLARE("mrEnd",		offsetof(struct mem_region *, mrEnd));
-	DECLARE("mrAStart",		offsetof(struct mem_region *, mrAStart));
-	DECLARE("mrAEnd",		offsetof(struct mem_region *, mrAEnd));
-	DECLARE("mrSize",		sizeof(struct mem_region));
-
-	DECLARE("mapRemChunk",	mapRemChunk);
-
-	DECLARE("mapRetCode",	mapRetCode);
-	DECLARE("mapRtOK",		mapRtOK);
-	DECLARE("mapRtBadLk",	mapRtBadLk);
-	DECLARE("mapRtPerm",	mapRtPerm);
-	DECLARE("mapRtNotFnd",	mapRtNotFnd);
-	DECLARE("mapRtBlock",	mapRtBlock);
-	DECLARE("mapRtNest",	mapRtNest);
-	DECLARE("mapRtRemove",	mapRtRemove);
-	DECLARE("mapRtMapDup",	mapRtMapDup);
-	DECLARE("mapRtGuest",	mapRtGuest);
-	DECLARE("mapRtEmpty",	mapRtEmpty);
-	DECLARE("mapRtSmash",	mapRtSmash);
-
-#if 0
-	DECLARE("MFpcaptr",		offsetof(struct mappingflush *, pcaptr));
-	DECLARE("MFmappingcnt",		offsetof(struct mappingflush *, mappingcnt));
-	DECLARE("MFmapping",		offsetof(struct mappingflush *, mapping));
-	DECLARE("MFmappingSize", 	sizeof(struct mfmapping));
-#endif
-
-	DECLARE("GV_GROUPS_LG2",	GV_GROUPS_LG2);
-	DECLARE("GV_GROUPS",		GV_GROUPS);
-	DECLARE("GV_SLOT_SZ_LG2",	GV_SLOT_SZ_LG2);
-	DECLARE("GV_SLOT_SZ",		GV_SLOT_SZ);
-	DECLARE("GV_SLOTS_LG2",		GV_SLOTS_LG2);
-	DECLARE("GV_SLOTS",			GV_SLOTS);
-	DECLARE("GV_PGIDX_SZ_LG2",	GV_PGIDX_SZ_LG2);
-	DECLARE("GV_PAGE_SZ_LG2",	GV_PAGE_SZ_LG2);
-	DECLARE("GV_PAGE_SZ",		GV_PAGE_SZ);
-	DECLARE("GV_PAGE_MASK",		GV_PAGE_MASK);
-	DECLARE("GV_HPAGES",		GV_HPAGES);
-	DECLARE("GV_GRPS_PPG_LG2",	GV_GRPS_PPG_LG2);
-	DECLARE("GV_GRPS_PPG",		GV_GRPS_PPG);
-	DECLARE("GV_GRP_MASK",		GV_GRP_MASK);
-	DECLARE("GV_SLOT_MASK",		GV_SLOT_MASK);
-	DECLARE("GV_HPAGE_SHIFT",	GV_HPAGE_SHIFT);
-	DECLARE("GV_HPAGE_MASK",	GV_HPAGE_MASK);
-	DECLARE("GV_HGRP_SHIFT",	GV_HGRP_SHIFT);
-	DECLARE("GV_HGRP_MASK",		GV_HGRP_MASK);
-	DECLARE("GV_MAPWD_BITS_LG2",GV_MAPWD_BITS_LG2);
-	DECLARE("GV_MAPWD_SZ_LG2",	GV_MAPWD_SZ_LG2);
-	DECLARE("GV_MAP_WORDS",		GV_MAP_WORDS);
-	DECLARE("GV_MAP_MASK",		GV_MAP_MASK);
-	DECLARE("GV_MAP_SHIFT",		GV_MAP_SHIFT);
-	DECLARE("GV_BAND_SHIFT",	GV_BAND_SHIFT);
-	DECLARE("GV_BAND_SZ_LG2",	GV_BAND_SZ_LG2);
-	DECLARE("GV_BAND_MASK",		GV_BAND_MASK);
-
-#if 1
-	DECLARE("GDsave",		offsetof(struct GDWorkArea *, GDsave));
-	DECLARE("GDfp0",		offsetof(struct GDWorkArea *, GDfp0));
-	DECLARE("GDfp1",		offsetof(struct GDWorkArea *, GDfp1));
-	DECLARE("GDfp2",		offsetof(struct GDWorkArea *, GDfp2));
-	DECLARE("GDfp3",		offsetof(struct GDWorkArea *, GDfp3));
-	DECLARE("GDtop",		offsetof(struct GDWorkArea *, GDtop));
-	DECLARE("GDleft",		offsetof(struct GDWorkArea *, GDleft));
-	DECLARE("GDtopleft",	offsetof(struct GDWorkArea *, GDtopleft));
-	DECLARE("GDrowbytes",	offsetof(struct GDWorkArea *, GDrowbytes));
-	DECLARE("GDrowchar",	offsetof(struct GDWorkArea *, GDrowchar));
-	DECLARE("GDdepth",		offsetof(struct GDWorkArea *, GDdepth));
-	DECLARE("GDcollgn",		offsetof(struct GDWorkArea *, GDcollgn));
-	DECLARE("GDready",		offsetof(struct GDWorkArea *, GDready));
-	DECLARE("GDrowbuf1",	offsetof(struct GDWorkArea *, GDrowbuf1));
-	DECLARE("GDrowbuf2",	offsetof(struct GDWorkArea *, GDrowbuf2));
-#endif
-
-	DECLARE("enaExpTrace",	enaExpTrace);
-	DECLARE("enaExpTraceb",	enaExpTraceb);
-	DECLARE("enaUsrFCall",	enaUsrFCall);
-	DECLARE("enaUsrFCallb",	enaUsrFCallb);
-	DECLARE("enaUsrPhyMp",	enaUsrPhyMp);
-	DECLARE("enaUsrPhyMpb",	enaUsrPhyMpb);
-	DECLARE("enaDiagSCs",	enaDiagSCs);
-	DECLARE("enaDiagSCsb",	enaDiagSCsb);
-	DECLARE("enaDiagEM",	enaDiagEM);
-	DECLARE("enaDiagEMb",	enaDiagEMb);
-	DECLARE("enaNotifyEM",	enaNotifyEM);
-	DECLARE("enaNotifyEMb",	enaNotifyEMb);
-	DECLARE("disLkType",	disLkType);
-	DECLARE("disLktypeb",	disLktypeb);
-	DECLARE("disLkThread",	disLkThread);
-	DECLARE("disLkThreadb",	disLkThreadb);
-	DECLARE("enaLkExtStck",	enaLkExtStck);
-	DECLARE("enaLkExtStckb",enaLkExtStckb);
-	DECLARE("disLkMyLck",	disLkMyLck);
-	DECLARE("disLkMyLckb",	disLkMyLckb);
-	DECLARE("dgMisc1",		offsetof(struct diagWork *, dgMisc1));
-	DECLARE("dgMisc2",		offsetof(struct diagWork *, dgMisc2));
-	DECLARE("dgMisc3",		offsetof(struct diagWork *, dgMisc3));
-	DECLARE("dgMisc4",		offsetof(struct diagWork *, dgMisc4));
-	DECLARE("dgMisc5",		offsetof(struct diagWork *, dgMisc5));
-
-	DECLARE("SACnext",		offsetof(struct savearea_comm *, sac_next));
-	DECLARE("SACprev",		offsetof(struct savearea_comm *, sac_prev));
-	DECLARE("SACvrswap",	offsetof(struct savearea_comm *, sac_vrswap));
-	DECLARE("SACalloc",		offsetof(struct savearea_comm *, sac_alloc));
-	DECLARE("SACflags",		offsetof(struct savearea_comm *, sac_flags));
-	DECLARE("sac_cnt",		sac_cnt);
-	DECLARE("sac_empty",	sac_empty);
-	DECLARE("sac_perm",		sac_perm);
-	DECLARE("sac_permb",	sac_permb);
-
-	DECLARE("LocalSaveTarget",		LocalSaveTarget);
-	DECLARE("LocalSaveMin",			LocalSaveMin);
-	DECLARE("LocalSaveMax",			LocalSaveMax);
-	DECLARE("FreeListMin",			FreeListMin);
-	DECLARE("SaveLowHysteresis",	SaveLowHysteresis);
-	DECLARE("SaveHighHysteresis",	SaveHighHysteresis);
-	DECLARE("InitialSaveAreas",		InitialSaveAreas);
-	DECLARE("InitialSaveTarget",	InitialSaveTarget);
-	DECLARE("InitialSaveBloks",		InitialSaveBloks);
-
-	DECLARE("SAVprev",		offsetof(struct savearea_comm *, save_prev));
-	DECLARE("SAVact",		offsetof(struct savearea_comm *, save_act));
-	DECLARE("SAVflags",		offsetof(struct savearea_comm *, save_flags));
-	DECLARE("SAVlevel",		offsetof(struct savearea_comm *, save_level));
-	DECLARE("SAVtime",		offsetof(struct savearea_comm *, save_time));
-	DECLARE("savemisc0",	offsetof(struct savearea_comm *, save_misc0));
-	DECLARE("savemisc1",	offsetof(struct savearea_comm *, save_misc1));
-	DECLARE("savemisc2",	offsetof(struct savearea_comm *, save_misc2));
-	DECLARE("savemisc3",	offsetof(struct savearea_comm *, save_misc3));
-
-	DECLARE("SAVsize",		sizeof(struct savearea));
-	DECLARE("SAVsizefpu",	sizeof(struct savearea_vec));
-	DECLARE("SAVsizevec",	sizeof(struct savearea_fpu));
-	DECLARE("SAVcommsize",	sizeof(struct savearea_comm));
-	
-	DECLARE("savesrr0",		offsetof(struct savearea *, save_srr0));
-	DECLARE("savesrr1",		offsetof(struct savearea *, save_srr1));
-	DECLARE("savecr",		offsetof(struct savearea *, save_cr));
-	DECLARE("savexer",		offsetof(struct savearea *, save_xer));
-	DECLARE("savelr",		offsetof(struct savearea *, save_lr));
-	DECLARE("savectr",		offsetof(struct savearea *, save_ctr));
-	DECLARE("savedar",		offsetof(struct savearea *, save_dar));
-	DECLARE("savedsisr",	offsetof(struct savearea *, save_dsisr));
-	DECLARE("saveexception",	offsetof(struct savearea *, save_exception));
-	DECLARE("savefpscrpad",	offsetof(struct savearea *, save_fpscrpad));
-	DECLARE("savefpscr",	offsetof(struct savearea *, save_fpscr));
-	DECLARE("savevrsave",	offsetof(struct savearea *, save_vrsave));	
-	DECLARE("savevscr",		offsetof(struct savearea *, save_vscr));	
-
-	DECLARE("savemmcr0",	offsetof(struct savearea *, save_mmcr0));
-	DECLARE("savemmcr1",	offsetof(struct savearea *, save_mmcr1));
-	DECLARE("savemmcr2",	offsetof(struct savearea *, save_mmcr2));
-	DECLARE("savepmc",		offsetof(struct savearea *, save_pmc));
-	
-	DECLARE("saveinstr",	offsetof(struct savearea *, save_instr));
-
-	DECLARE("savexdat0",	offsetof(struct savearea *, save_xdat0));
-	DECLARE("savexdat1",	offsetof(struct savearea *, save_xdat1));
-	DECLARE("savexdat2",	offsetof(struct savearea *, save_xdat2));
-	DECLARE("savexdat3",	offsetof(struct savearea *, save_xdat3));
-	
-	DECLARE("saver0",		offsetof(struct savearea *, save_r0));
-	DECLARE("saver1",		offsetof(struct savearea *, save_r1));
-	DECLARE("saver2",		offsetof(struct savearea *, save_r2));
-	DECLARE("saver3",		offsetof(struct savearea *, save_r3));
-	DECLARE("saver4",		offsetof(struct savearea *, save_r4));
-	DECLARE("saver5",		offsetof(struct savearea *, save_r5));
-	DECLARE("saver6",		offsetof(struct savearea *, save_r6));
-	DECLARE("saver7",		offsetof(struct savearea *, save_r7));
-	DECLARE("saver8",		offsetof(struct savearea *, save_r8));
-	DECLARE("saver9",		offsetof(struct savearea *, save_r9));
-	DECLARE("saver10",		offsetof(struct savearea *, save_r10));
-	DECLARE("saver11",		offsetof(struct savearea *, save_r11));
-	DECLARE("saver12",		offsetof(struct savearea *, save_r12));
-	DECLARE("saver13",		offsetof(struct savearea *, save_r13));
-	DECLARE("saver14",		offsetof(struct savearea *, save_r14));
-	DECLARE("saver15",		offsetof(struct savearea *, save_r15));
-	DECLARE("saver16",		offsetof(struct savearea *, save_r16));
-	DECLARE("saver17",		offsetof(struct savearea *, save_r17));
-	DECLARE("saver18",		offsetof(struct savearea *, save_r18));
-	DECLARE("saver19",		offsetof(struct savearea *, save_r19));
-	DECLARE("saver20",		offsetof(struct savearea *, save_r20));
-	DECLARE("saver21",		offsetof(struct savearea *, save_r21));
-	DECLARE("saver22",		offsetof(struct savearea *, save_r22));
-	DECLARE("saver23",		offsetof(struct savearea *, save_r23));
-	DECLARE("saver24",		offsetof(struct savearea *, save_r24));
-	DECLARE("saver25",		offsetof(struct savearea *, save_r25));
-	DECLARE("saver26",		offsetof(struct savearea *, save_r26));
-	DECLARE("saver27",		offsetof(struct savearea *, save_r27));
-	DECLARE("saver28",		offsetof(struct savearea *, save_r28));
-	DECLARE("saver29",		offsetof(struct savearea *, save_r29));
-	DECLARE("saver30",		offsetof(struct savearea *, save_r30));
-	DECLARE("saver31",		offsetof(struct savearea *, save_r31));
-
-	DECLARE("savefp0",		offsetof(struct savearea_fpu *, save_fp0));
-	DECLARE("savefp1",		offsetof(struct savearea_fpu *, save_fp1));
-	DECLARE("savefp2",		offsetof(struct savearea_fpu *, save_fp2));
-	DECLARE("savefp3",		offsetof(struct savearea_fpu *, save_fp3));
-	DECLARE("savefp4",		offsetof(struct savearea_fpu *, save_fp4));
-	DECLARE("savefp5",		offsetof(struct savearea_fpu *, save_fp5));
-	DECLARE("savefp6",		offsetof(struct savearea_fpu *, save_fp6));
-	DECLARE("savefp7",		offsetof(struct savearea_fpu *, save_fp7));
-	DECLARE("savefp8",		offsetof(struct savearea_fpu *, save_fp8));
-	DECLARE("savefp9",		offsetof(struct savearea_fpu *, save_fp9));
-	DECLARE("savefp10",		offsetof(struct savearea_fpu *, save_fp10));
-	DECLARE("savefp11",		offsetof(struct savearea_fpu *, save_fp11));
-	DECLARE("savefp12",		offsetof(struct savearea_fpu *, save_fp12));
-	DECLARE("savefp13",		offsetof(struct savearea_fpu *, save_fp13));
-	DECLARE("savefp14",		offsetof(struct savearea_fpu *, save_fp14));
-	DECLARE("savefp15",		offsetof(struct savearea_fpu *, save_fp15));
-	DECLARE("savefp16",		offsetof(struct savearea_fpu *, save_fp16));
-	DECLARE("savefp17",		offsetof(struct savearea_fpu *, save_fp17));
-	DECLARE("savefp18",		offsetof(struct savearea_fpu *, save_fp18));
-	DECLARE("savefp19",		offsetof(struct savearea_fpu *, save_fp19));
-	DECLARE("savefp20",		offsetof(struct savearea_fpu *, save_fp20));
-	DECLARE("savefp21",		offsetof(struct savearea_fpu *, save_fp21));
-	DECLARE("savefp22",		offsetof(struct savearea_fpu *, save_fp22));
-	DECLARE("savefp23",		offsetof(struct savearea_fpu *, save_fp23));
-	DECLARE("savefp24",		offsetof(struct savearea_fpu *, save_fp24));
-	DECLARE("savefp25",		offsetof(struct savearea_fpu *, save_fp25));
-	DECLARE("savefp26",		offsetof(struct savearea_fpu *, save_fp26));
-	DECLARE("savefp27",		offsetof(struct savearea_fpu *, save_fp27));
-	DECLARE("savefp28",		offsetof(struct savearea_fpu *, save_fp28));
-	DECLARE("savefp29",		offsetof(struct savearea_fpu *, save_fp29));
-	DECLARE("savefp30",		offsetof(struct savearea_fpu *, save_fp30));
-	DECLARE("savefp31",		offsetof(struct savearea_fpu *, save_fp31));
-	
-	DECLARE("savevr0",		offsetof(struct savearea_vec *, save_vr0));
-	DECLARE("savevr1",		offsetof(struct savearea_vec *, save_vr1));
-	DECLARE("savevr2",		offsetof(struct savearea_vec *, save_vr2));
-	DECLARE("savevr3",		offsetof(struct savearea_vec *, save_vr3));
-	DECLARE("savevr4",		offsetof(struct savearea_vec *, save_vr4));
-	DECLARE("savevr5",		offsetof(struct savearea_vec *, save_vr5));
-	DECLARE("savevr6",		offsetof(struct savearea_vec *, save_vr6));
-	DECLARE("savevr7",		offsetof(struct savearea_vec *, save_vr7));
-	DECLARE("savevr8",		offsetof(struct savearea_vec *, save_vr8));
-	DECLARE("savevr9",		offsetof(struct savearea_vec *, save_vr9));
-	DECLARE("savevr10",		offsetof(struct savearea_vec *, save_vr10));
-	DECLARE("savevr11",		offsetof(struct savearea_vec *, save_vr11));
-	DECLARE("savevr12",		offsetof(struct savearea_vec *, save_vr12));
-	DECLARE("savevr13",		offsetof(struct savearea_vec *, save_vr13));
-	DECLARE("savevr14",		offsetof(struct savearea_vec *, save_vr14));
-	DECLARE("savevr15",		offsetof(struct savearea_vec *, save_vr15));
-	DECLARE("savevr16",		offsetof(struct savearea_vec *, save_vr16));
-	DECLARE("savevr17",		offsetof(struct savearea_vec *, save_vr17));
-	DECLARE("savevr18",		offsetof(struct savearea_vec *, save_vr18));
-	DECLARE("savevr19",		offsetof(struct savearea_vec *, save_vr19));
-	DECLARE("savevr20",		offsetof(struct savearea_vec *, save_vr20));
-	DECLARE("savevr21",		offsetof(struct savearea_vec *, save_vr21));
-	DECLARE("savevr22",		offsetof(struct savearea_vec *, save_vr22));
-	DECLARE("savevr23",		offsetof(struct savearea_vec *, save_vr23));
-	DECLARE("savevr24",		offsetof(struct savearea_vec *, save_vr24));
-	DECLARE("savevr25",		offsetof(struct savearea_vec *, save_vr25));
-	DECLARE("savevr26",		offsetof(struct savearea_vec *, save_vr26));
-	DECLARE("savevr27",		offsetof(struct savearea_vec *, save_vr27));
-	DECLARE("savevr28",		offsetof(struct savearea_vec *, save_vr28));
-	DECLARE("savevr29",		offsetof(struct savearea_vec *, save_vr29));
-	DECLARE("savevr30",		offsetof(struct savearea_vec *, save_vr30));
-	DECLARE("savevr31",		offsetof(struct savearea_vec *, save_vr31));
-	DECLARE("savevrvalid",	offsetof(struct savearea_vec *, save_vrvalid));	
-
-	/* PseudoKernel Exception Descriptor info */
-	DECLARE("BEDA_SRR0",	offsetof(BEDA_t *, srr0));
-	DECLARE("BEDA_SRR1",	offsetof(BEDA_t *, srr1));
-	DECLARE("BEDA_SPRG0",	offsetof(BEDA_t *, sprg0));
-	DECLARE("BEDA_SPRG1",	offsetof(BEDA_t *, sprg1));
-
-	/* PseudoKernel Interrupt Control Word */
-	DECLARE("BTTD_INTCONTROLWORD",	offsetof(BTTD_t *, InterruptControlWord));
-
-	/* New state when exiting the pseudokernel */
-	DECLARE("BTTD_NEWEXITSTATE",	offsetof(BTTD_t *, NewExitState));
-
-	/* PseudoKernel Test/Post Interrupt */
-	DECLARE("BTTD_TESTINTMASK",	offsetof(BTTD_t *, testIntMask));
-	DECLARE("BTTD_POSTINTMASK",	offsetof(BTTD_t *, postIntMask));
-
-	/* PseudoKernel Vectors */
-	DECLARE("BTTD_TRAP_VECTOR",			offsetof(BTTD_t *, TrapVector));
-	DECLARE("BTTD_SYSCALL_VECTOR",		offsetof(BTTD_t *, SysCallVector));
-	DECLARE("BTTD_INTERRUPT_VECTOR",	offsetof(BTTD_t *, InterruptVector));
-	DECLARE("BTTD_PENDINGINT_VECTOR",	offsetof(BTTD_t *, PendingIntVector));
-	
-	/* PseudoKernel Bits, Masks and misc */
-	DECLARE("SYSCONTEXTSTATE",		kInSystemContext);
-	DECLARE("PSEUDOKERNELSTATE",	kInPseudoKernel);
-	DECLARE("INTSTATEMASK_B",		12);
-	DECLARE("INTSTATEMASK_E",		15);
-	DECLARE("INTCR2MASK_B",			8);
-	DECLARE("INTCR2MASK_E",			11);
-	DECLARE("INTBACKUPCR2MASK_B",	28);
-	DECLARE("INTBACKUPCR2MASK_E",	31);
-	DECLARE("INTCR2TOBACKUPSHIFT",	kCR2ToBackupShift);
-	DECLARE("BB_MAX_TRAP",			bbMaxTrap);
-	DECLARE("BB_RFI_TRAP",			bbRFITrap);
-
-	/* Various hackery */
-	DECLARE("procState",		offsetof(struct processor *, state));
-	
-	DECLARE("CPU_SUBTYPE_POWERPC_ALL",		CPU_SUBTYPE_POWERPC_ALL);
-	DECLARE("CPU_SUBTYPE_POWERPC_750",		CPU_SUBTYPE_POWERPC_750);
-	DECLARE("CPU_SUBTYPE_POWERPC_7400",		CPU_SUBTYPE_POWERPC_7400);
-	DECLARE("CPU_SUBTYPE_POWERPC_7450",		CPU_SUBTYPE_POWERPC_7450);
-	DECLARE("CPU_SUBTYPE_POWERPC_970",		CPU_SUBTYPE_POWERPC_970);
-
-	DECLARE("shdIBAT",	offsetof(struct shadowBAT *, IBATs));	
-	DECLARE("shdDBAT",	offsetof(struct shadowBAT *, DBATs));	
-	
-	/* Low Memory Globals */
-
-	DECLARE("lgVerCode", 			offsetof(struct lowglo *, lgVerCode));
-	DECLARE("lgPPStart", 			offsetof(struct lowglo *, lgPPStart));
-	DECLARE("maxDec", 				offsetof(struct lowglo *, lgMaxDec));
-	DECLARE("mckFlags", 			offsetof(struct lowglo *, lgMckFlags));
-	DECLARE("lgPMWvaddr",			offsetof(struct lowglo *, lgPMWvaddr));
-	DECLARE("lgUMWvaddr",			offsetof(struct lowglo *, lgUMWvaddr));
-	DECLARE("trcWork", 				offsetof(struct lowglo *, lgTrcWork));
-	DECLARE("traceMask",			offsetof(struct lowglo *, lgTrcWork.traceMask));
-	DECLARE("traceCurr",			offsetof(struct lowglo *, lgTrcWork.traceCurr));
-	DECLARE("traceStart",			offsetof(struct lowglo *, lgTrcWork.traceStart));
-	DECLARE("traceEnd",				offsetof(struct lowglo *, lgTrcWork.traceEnd));
-	DECLARE("traceMsnd",			offsetof(struct lowglo *, lgTrcWork.traceMsnd));
-
-	DECLARE("Zero", 				offsetof(struct lowglo *, lgZero));
-	DECLARE("saveanchor", 			offsetof(struct lowglo *, lgSaveanchor));
-
-	DECLARE("SVlock",				offsetof(struct lowglo *, lgSaveanchor.savelock));
-	DECLARE("SVpoolfwd",			offsetof(struct lowglo *, lgSaveanchor.savepoolfwd));
-	DECLARE("SVpoolbwd",			offsetof(struct lowglo *, lgSaveanchor.savepoolbwd));
-	DECLARE("SVfree",				offsetof(struct lowglo *, lgSaveanchor.savefree));
-	DECLARE("SVfreecnt",			offsetof(struct lowglo *, lgSaveanchor.savefreecnt));
-	DECLARE("SVadjust",				offsetof(struct lowglo *, lgSaveanchor.saveadjust));
-	DECLARE("SVinuse",				offsetof(struct lowglo *, lgSaveanchor.saveinuse));
-	DECLARE("SVtarget",				offsetof(struct lowglo *, lgSaveanchor.savetarget));
-	DECLARE("SVsaveinusesnapshot",		offsetof(struct lowglo *, lgSaveanchor.saveinusesnapshot));
-	DECLARE("SVsavefreesnapshot",		offsetof(struct lowglo *, lgSaveanchor.savefreesnapshot));
-	DECLARE("SVsize",				sizeof(struct Saveanchor));
-
-	DECLARE("tlbieLock", 			offsetof(struct lowglo *, lgTlbieLck));
-
-	DECLARE("dgFlags",				offsetof(struct lowglo *, lgdgWork.dgFlags));
-	DECLARE("dgLock",				offsetof(struct lowglo *, lgdgWork.dgLock));
-	DECLARE("dgMisc0",				offsetof(struct lowglo *, lgdgWork.dgMisc0));
-	
-	DECLARE("lglcksWork",			offsetof(struct lowglo *, lglcksWork));
-	DECLARE("lgKillResv",			offsetof(struct lowglo *, lgKillResv));
-	DECLARE("lgpPcfg",				offsetof(struct lowglo *, lgpPcfg));
-
-
-	DECLARE("scomcpu",				offsetof(struct scomcomm *, scomcpu));
-	DECLARE("scomfunc",				offsetof(struct scomcomm *, scomfunc));
-	DECLARE("scomreg",				offsetof(struct scomcomm *, scomreg));
-	DECLARE("scomstat",				offsetof(struct scomcomm *, scomstat));
-	DECLARE("scomdata",				offsetof(struct scomcomm *, scomdata));
-
-#if	CONFIG_DTRACE
-	DECLARE("LS_LCK_MTX_UNLOCK_RELEASE", LS_LCK_MTX_UNLOCK_RELEASE);
-	DECLARE("LS_LCK_MTX_LOCK_ACQUIRE", LS_LCK_MTX_LOCK_ACQUIRE);
-#endif
-
-	return(0);  /* For ANSI C :-) */
-}
diff --git a/osfmk/ppc/hexfont.h b/osfmk/ppc/hexfont.h
deleted file mode 100644
index 38035a0c9..000000000
--- a/osfmk/ppc/hexfont.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*					  0123456789ABCDEF */
-
-hexfont:	.short	0x0000	/* 0b0000000000000000 */
-			.short	0x07C0	/* 0b0000011111000000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3C78	/* 0b0011110001111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x701C	/* 0b0111000000011100 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3C78	/* 0b0011110001111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x07C0	/* 0b0000011111000000 */
-			.short	0x0000	/* 0b0000000000000000 */
-	
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0080	/* 0b0000000010000000 */
-			.short	0x0180	/* 0b0000000110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0780	/* 0b0000011110000000 */
-			.short	0x0F80	/* 0b0000111110000000 */
-			.short	0x1F80	/* 0b0001111110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x7038	/* 0b0111000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x00F0	/* 0b0000000011110000 */
-			.short	0x01E0	/* 0b0000000111100000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0F00	/* 0b0000111100000000 */
-			.short	0x1C00	/* 0b0001110000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x7FFC	/* 0b0111111111111100 */
-			.short	0x7FFC	/* 0b0111111111111100 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x00F0	/* 0b0000000011110000 */
-			.short	0x00F0	/* 0b0000000011110000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0020	/* 0b0000000000100000 */
-			.short	0x0060	/* 0b0000000001100000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x01E0	/* 0b0000000111100000 */
-			.short	0x03E0	/* 0b0000001111100000 */
-			.short	0x07E0	/* 0b0000011111100000 */
-			.short	0x0EE0	/* 0b0000111011100000 */
-			.short	0x1CE0	/* 0b0001110011100000 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x7FF8	/* 0b0111111111111000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x0000	/* 0b0000000000000000 */
- 
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3FC0	/* 0b0011111111000000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x3870	/* 0b0011100001110000 */
-			.short	0x3038	/* 0b0011000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x0FC0	/* 0b0000111111000000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0070	/* 0b0000000001110000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x01C0	/* 0b0000000111000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0700	/* 0b0000011100000000 */
-			.short	0x0E00	/* 0b0000111000000000 */
-			.short	0x1C00	/* 0b0001110000000000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x003C	/* 0b0000000000111000 */
-			.short	0x0038	/* 0b0000000000111000 */
-			.short	0x0070	/* 0b0000000001110000 */
-			.short	0x0070	/* 0b0000000001110000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x01C0	/* 0b0000000111000000 */
-			.short	0x01C0	/* 0b0000000111000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0700	/* 0b0000011100000000 */
-			.short	0x0700	/* 0b0000011100000000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF8	/* 0b0001111111111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x383C	/* 0b0011100000011100 */
-			.short	0x383C	/* 0b0011100000011100 */
-			.short	0x1FFC	/* 0b0001111111111100 */
-			.short	0x0FF8	/* 0b0000111111111000 */
-			.short	0x0078	/* 0b0000000001111000 */
-			.short	0x0070	/* 0b0000000001110000 */
-			.short	0x00E0	/* 0b0000000011100000 */
-			.short	0x01C0	/* 0b0000000111000000 */
-			.short	0x0380	/* 0b0000001110000000 */
-			.short	0x0700	/* 0b0000011100000000 */
-			.short	0x0E00	/* 0b0000111000000000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x07E0	/* 0b0000011111100000 */
-			.short	0x0FF0	/* 0b0000111111110000 */
-			.short	0x1C38	/* 0b0001110000111000 */
-			.short	0x1C38	/* 0b0001110000111000 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x383C	/* 0b0011100000111100 */
-			.short	0x3FF8	/* 0b0011111111111000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x7000	/* 0b0111000000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x1FF0	/* 0b0001111111110000 */
-			.short	0x0FE0	/* 0b0000111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x381C	/* 0b0011100000011100 */
-			.short	0x3838	/* 0b0011100000111000 */
-			.short	0x3FF0	/* 0b0011111111110000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x0000	/* 0b0000000000000000 */
-
-			.short	0x0000	/* 0b0000000000000000 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3FFC	/* 0b0011111111111100 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3FE0	/* 0b0011111111100000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x3800	/* 0b0011100000000000 */
-			.short	0x0000	/* 0b0000000000000000 */
-			
diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c
deleted file mode 100644
index 2bd051994..000000000
--- a/osfmk/ppc/hibernate_ppc.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <kern/kern_types.h>
-#include <kern/kalloc.h>
-#include <kern/machine.h>
-#include <kern/misc_protos.h>
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <mach/machine.h>
-#include <mach/processor_info.h>
-#include <mach/mach_types.h>
-#include <ppc/proc_reg.h>
-#include <ppc/misc_protos.h>
-#include <ppc/machine_routines.h>
-#include <ppc/machine_cpu.h>
-#include <ppc/exception.h>
-#include <ppc/asm.h>
-#include <ppc/hw_perfmon.h>
-#include <pexpert/pexpert.h>
-#include <kern/cpu_data.h>
-#include <ppc/mappings.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/trap.h>
-#include <ppc/mem.h>
-#include <IOKit/IOPlatformExpert.h>
-#include <IOKit/IOHibernatePrivate.h>
-#include <vm/vm_page.h>
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-unsigned int		save_kdebug_enable = 0;
-
-
-hibernate_page_list_t *
-hibernate_page_list_allocate(void)
-{
-    vm_size_t               size;
-    uint32_t                bank;
-    uint32_t	            pages, page_count;
-    hibernate_page_list_t * list;
-    hibernate_bitmap_t *    bitmap;
-
-    page_count = 0;
-    size = sizeof(hibernate_page_list_t);
-
-    for (bank = 0; bank < (uint32_t) pmap_mem_regions_count; bank++)
-    {
-	size += sizeof(hibernate_bitmap_t);
-	pages = pmap_mem_regions[bank].mrEnd + 1 - pmap_mem_regions[bank].mrStart;
-	page_count += pages;
-	size += ((pages + 31) >> 5) * sizeof(uint32_t);
-    }
-
-    list = kalloc(size);
-    if (!list)
-	return (list);
-	
-    list->list_size  = size;
-    list->page_count = page_count;
-    list->bank_count = pmap_mem_regions_count;
-
-    bitmap = &list->bank_bitmap[0];
-    for (bank = 0; bank < list->bank_count; bank++)
-    {
-	bitmap->first_page  =  pmap_mem_regions[bank].mrStart;
-	bitmap->last_page   =  pmap_mem_regions[bank].mrEnd;
-	bitmap->bitmapwords = (pmap_mem_regions[bank].mrEnd + 1
-			     - pmap_mem_regions[bank].mrStart + 31) >> 5;
-
-	bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
-    }
-    return (list);
-}
-
-void
-hibernate_page_list_setall_machine(hibernate_page_list_t * page_list,
-                                    hibernate_page_list_t * page_list_wired,
-                                    uint32_t * pagesOut)
-{
-    uint32_t page, count, PCAsize;
-
-    /* Get total size of PCA table */
-    PCAsize = round_page((hash_table_size / PerProcTable[0].ppe_vaddr->pf.pfPTEG) 
-                          * sizeof(PCA_t));
-
-    page = atop_64(hash_table_base - PCAsize);
-    count = atop_64(hash_table_size + PCAsize);
-
-    hibernate_set_page_state(page_list, page_list_wired, page, count, 0);
-    pagesOut -= count;
-
-    HIBLOG("removed hash, pca: %d pages\n", count);
-
-    save_snapshot();
-}
-
-// mark pages not to be saved and not for scratch usage during restore
-void
-hibernate_page_list_set_volatile(__unused hibernate_page_list_t *page_list,
-				 __unused hibernate_page_list_t *page_list_wired,
-				 __unused uint32_t *pagesOut)
-{
-}
-
-kern_return_t 
-hibernate_processor_setup(IOHibernateImageHeader * header)
-{
-    header->processorFlags = PerProcTable[0].ppe_vaddr->pf.Available;
-
-    PerProcTable[0].ppe_vaddr->hibernate = 1;
-
-    return (KERN_SUCCESS);
-}
-
-void
-hibernate_vm_lock(void)
-{
-    if (getPerProc()->hibernate)
-    {
-        vm_page_lock_queues();
-        lck_mtx_lock(&vm_page_queue_free_lock);
-    }
-}
-
-void
-hibernate_vm_unlock(void)
-{
-    if (getPerProc()->hibernate)
-    {
-        lck_mtx_unlock(&vm_page_queue_free_lock);
-        vm_page_unlock_queues();
-    }
-}
-
-void ml_ppc_sleep(void)
-{
-    struct per_proc_info *proc_info;
-    boolean_t dohalt;
-
-    proc_info = getPerProc();
-    if (!proc_info->hibernate)
-    {
-	ml_ppc_do_sleep();
-	return;
-    }
-
-    {
-        uint64_t start, end, nsec;
-
-	HIBLOG("mapping_hibernate_flush start\n");
-	clock_get_uptime(&start);
-
-	mapping_hibernate_flush();
-
-	clock_get_uptime(&end);
-	absolutetime_to_nanoseconds(end - start, &nsec);
-	HIBLOG("mapping_hibernate_flush time: %qd ms\n", nsec / 1000000ULL);
-    }
-
-    dohalt = hibernate_write_image();
-
-    if (dohalt)
-    {
-	// off
-	HIBLOG("power off\n");
-	if (PE_halt_restart) 
-	    (*PE_halt_restart)(kPEHaltCPU);
-    }
-    else
-    {
-	// sleep
-	HIBLOG("sleep\n");
-
-	// should we come back via regular wake, set the state in memory.
-	PerProcTable[0].ppe_vaddr->hibernate = 0;
-
-	PE_cpu_machine_quiesce(proc_info->cpu_id);
-	return;
-    }
-}
-
-void
-hibernate_newruntime_map(__unused void * map,
-			 __unused vm_size_t map_size,
-			 __unused uint32_t runtime_offset)
-{
-}
diff --git a/osfmk/ppc/hibernate_restore.s b/osfmk/ppc/hibernate_restore.s
deleted file mode 100644
index 9025e6589..000000000
--- a/osfmk/ppc/hibernate_restore.s
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-
-/*
-This code is linked into the kernel but part of the "__HIB" section, which means
-its used by code running in the special context of restoring the kernel text and data
-from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything
-it calls or references (ie. hibernate_restore_phys_page())
-needs to be careful to only touch memory also in the "__HIB" section.
-*/
-
-/*
-void 
-hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags);
-*/
-
-			.align	5
-			.globl	EXT(hibernate_restore_phys_page)
-			.globl	EXT(hibernate_machine_entrypoint)
-
-LEXT(hibernate_restore_phys_page)
-
-	andi.		r0, r8, pf64Bit
-	bne		hibernate_restore_phys_page64
-
-        srwi		r10,r7,5				; r10 <- 32-byte chunks to xfer
-        mtctr		r10
-	cmpwi		r4, 0
-	beq		hibernate_restore_phys_pageFlush
-
-hibernate_restore_phys_pageCopy:
-        lwz		r0,0(r4)
-        lwz		r2,4(r4)
-        lwz		r7,8(r4)
-        lwz		r8,12(r4)
-        lwz		r9,16(r4)
-        lwz		r10,20(r4)
-        lwz		r11,24(r4)
-        lwz		r12,28(r4)
-
-        dcbz		0,r6					; avoid prefetch of next cache line
-        stw		r0,0(r6)
-        stw		r2,4(r6)
-        stw		r7,8(r6)
-        stw		r8,12(r6)
-        stw		r9,16(r6)
-        stw		r10,20(r6)
-        stw		r11,24(r6)
-        stw		r12,28(r6)
-        
-	dcbf 		0, r6
-	sync
-	icbi 		0, r6
-	isync
-	sync
-
-        addi		r4,r4,32
-        addi		r6,r6,32
-
-        bdnz		hibernate_restore_phys_pageCopy		; loop if more chunks
-        blr
-
-hibernate_restore_phys_pageFlush:
-	dcbf 		0, r6
-	sync
-	icbi 		0, r6
-	isync
-	sync
-
-        addi		r6,r6,32
-        bdnz		hibernate_restore_phys_pageFlush		; loop if more chunks
-        blr
-
-
-hibernate_restore_phys_page64:
-	rlwinm		r3,r3,0,1,0			; Duplicate high half of long long paddr into top of reg
-	rlwimi		r3,r4,0,0,31			; Combine bottom of long long to full 64-bits
-	rlwinm		r4,r5,0,1,0			; Duplicate high half of long long paddr into top of reg
-	rlwimi		r4,r6,0,0,31			; Combine bottom of long long to full 64-bits
-
-	mfmsr		r9				; Get the MSR
-	li		r0,1				; Note - we use this in a couple places below
-	rldimi		r9,r0,63,MSR_SF_BIT		; set SF on in MSR we will copy with
-	mtmsrd		r9				; turn 64-bit addressing on
-	isync						; wait for it to happen
-
-        srwi	r10,r7,7				; r10 <- 128-byte chunks to xfer
-        mtctr	r10
-	cmpdi	r3, 0
-	beq	hibernate_restore_phys_page64Flush
-
-hibernate_restore_phys_page64Copy:
-        ld		r0,0(r3)
-        ld		r2,8(r3)
-        ld		r7,16(r3)
-        ld		r8,24(r3)
-        ld		r9,32(r3)
-        ld		r10,40(r3)
-        ld		r11,48(r3)
-        ld		r12,56(r3)
-
-        dcbz128		0,r4				; avoid prefetch of next cache line
-        std		r0,0(r4)
-        std		r2,8(r4)
-        std		r7,16(r4)
-        std		r8,24(r4)
-        std		r9,32(r4)
-        std		r10,40(r4)
-        std		r11,48(r4)
-        std		r12,56(r4)
-        
-        ld		r0,64(r3)			; load 2nd half of chunk
-        ld		r2,72(r3)
-        ld		r7,80(r3)
-        ld		r8,88(r3)
-        ld		r9,96(r3)
-        ld		r10,104(r3)
-        ld		r11,112(r3)
-        ld		r12,120(r3)
-
-        std		r0,64(r4)
-        std		r2,72(r4)
-        std		r7,80(r4)
-        std		r8,88(r4)
-        std		r9,96(r4)
-        std		r10,104(r4)
-        std		r11,112(r4)
-        std		r12,120(r4)
-
-	dcbf 		0, r4
-	sync
-	icbi 		0, r4
-	isync
-	sync
-
-        addi		r3,r3,128
-        addi		r4,r4,128
-
-        bdnz		hibernate_restore_phys_page64Copy		; loop if more chunks
-
-
-hibernate_restore_phys_page64Done:
-	mfmsr		r9				; Get the MSR we used to copy
-	rldicl		r9,r9,0,MSR_SF_BIT+1		; clear SF
-        mtmsrd  	r9                          	; turn 64-bit mode off
-	isync                               		; wait for it to happen
-        blr
-
-hibernate_restore_phys_page64Flush:
-	dcbf 		0, r4
-	sync
-	icbi 		0, r4
-	isync
-	sync
-
-        addi		r4,r4,128
-
-        bdnz		hibernate_restore_phys_page64Flush		; loop if more chunks
-	b		hibernate_restore_phys_page64Done
-
-LEXT(hibernate_machine_entrypoint)
-        b               EXT(hibernate_kernel_entrypoint)
-
diff --git a/osfmk/ppc/hw_exception.s b/osfmk/ppc/hw_exception.s
deleted file mode 100644
index ab77e1774..000000000
--- a/osfmk/ppc/hw_exception.s
+++ /dev/null
@@ -1,1832 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* Low level routines dealing with exception entry and exit.
- * There are various types of exception:
- *
- *    Interrupt, trap, system call and debugger entry. Each has it's own
- *    handler since the state save routine is different for each. The
- *    code is very similar (a lot of cut and paste).
- *
- *    The code for the FPU disabled handler (lazy fpu) is in cswtch.s
- */
-
-#include <debug.h>
-#include <mach_assert.h>
-#include <mach/exception_types.h>
-#include <mach/kern_return.h>
-#include <mach/ppc/vm_param.h>
-
-#include <assym.s>
-
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/trap.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-
-
-#define VERIFYSAVE 0
-#define FPVECDBG 0
-#define FPFLOOD 0
-#define INSTRUMENT 0
-
-/*
- * thandler(type)
- *
- * ENTRY:	VM switched ON
- *			Interrupts  OFF
- *			R3 contains exception code
- *			R4 points to the saved context (virtual address)
- *			Everything is saved in savearea
- */
-
-/*
- * If pcb.ksp == 0 then the kernel stack is already busy,
- *         we make a stack frame
- *		   leaving enough space for the 'red zone' in case the
- *		   trapped thread was in the middle of saving state below
- *		   its stack pointer.
- *
- * otherwise       we make a stack frame and
- * 		   the kernel stack (setting pcb.ksp to 0)
- *
- * on return, we do the reverse, the last state is popped from the pcb
- * and pcb.ksp is set to the top of stack                  
- */
-
-/* TRAP_SPACE_NEEDED is the space assumed free on the kernel stack when
- * another trap is taken. We need at least enough space for a saved state
- * structure plus two small backpointer frames, and we add a few
- * hundred bytes for the space needed by the C (which may be less but
- * may be much more). We're trying to catch kernel stack overflows :-)
- */
-
-#define TRAP_SPACE_NEEDED	FM_REDZONE+(2*FM_SIZE)+256
-
-			.text
-
-			.align	5
-			.globl EXT(thandler)
-LEXT(thandler)										; Trap handler
-
-			mfsprg	r13,1							; Get the current activation
-			lwz		r25,ACT_PER_PROC(r13)			; Get the per_proc block 
-		
-			lwz		r1,PP_ISTACKPTR(r25)			; Get interrupt stack pointer
-	
-			cmpwi	cr0,r1,0						; Are we on interrupt stack?					
-			mr		r6,r13
-			beq-	cr0,EXT(ihandler)				; If on interrupt stack, treat this as interrupt...
-			lwz		r26,ACT_MACT_SPF(r13)			; Get special flags
-			lwz		r8,ACT_MACT_PCB(r13)			; Get the last savearea used
-			rlwinm.	r26,r26,0,bbThreadbit,bbThreadbit	; Do we have Blue Box Assist active? 
-			lwz		r1,ACT_MACT_KSP(r13)			; Get the top of kernel stack
-			bnel-	checkassist						; See if we should assist this
-			stw		r4,ACT_MACT_PCB(r13)			; Point to our savearea
-			stw		r8,SAVprev+4(r4)				; Queue the new save area in the front 
-			
-#if VERIFYSAVE
-			bl		versave							; (TEST/DEBUG)
-#endif
-			
-			lwz		r9,THREAD_KERNEL_STACK(r6)		; Get our kernel stack start
-			cmpwi	cr1,r1,0						; Are we already on kernel stack?
-			stw		r13,SAVact(r4)					; Mark the savearea as belonging to this activation
-			lwz		r26,saver1+4(r4)				; Get the stack at interrupt time
-
-			bne+	cr1,.L_kstackfree				; We are not on kernel stack yet...		
-
-			subi	r1,r26,FM_REDZONE				; Make a red zone on interrupt time kernel stack
-
-.L_kstackfree:
-			lwz		r31,savesrr1+4(r4)				; Pick up the entry MSR 
-			sub		r9,r1,r9						; Get displacment into the kernel stack
-			li		r0,0							; Make this 0
-			rlwinm.	r0,r9,0,28,31					; Verify that we have a 16-byte aligned stack (and get a 0)
-			cmplwi	cr2,r9,KERNEL_STACK_SIZE		; Do we still have room on the stack?
-			beq		cr1,.L_state_on_kstack			; using above test for pcb/stack
-
-			stw		r0,ACT_MACT_KSP(r13)			; Show that we have taken the stack
-
-.L_state_on_kstack:	
-			lwz		r9,savevrsave(r4)				; Get the VRSAVE register
-			bne--	kernelStackUnaligned			; Stack is unaligned...
-			rlwinm.	r6,r31,0,MSR_VEC_BIT,MSR_VEC_BIT	; Was vector on?
-			subi	r1,r1,FM_SIZE					; Push a header onto the current stack 
-			bgt--	cr2,kernelStackBad				; Kernel stack is bogus...
-
-kernelStackNotBad:									; Vector was off
-			beq++	tvecoff							; Vector off, do not save vrsave...
-			stw		r9,liveVRS(r25)					; Set the live value
-
-tvecoff:	stw		r26,FM_BACKPTR(r1)				; Link back to the previous frame
-
-#if	DEBUG
-/* If debugging, we need two frames, the first being a dummy
- * which links back to the trapped routine. The second is
- * that which the C routine below will need
- */
-			lwz		r3,savesrr0+4(r4)				; Get the point of interruption
-			stw		r3,FM_LR_SAVE(r1)				; save old instr ptr as LR value 
-			stwu	r1,	-FM_SIZE(r1)				; and make new frame 
-#endif /* DEBUG */
-
-			mr		r30,r4
-			lwz		r3,SAVtime(r4)
-			lwz		r4,SAVtime+4(r4)
-			addi	r5,r13,SYSTEM_TIMER
-			bl		EXT(thread_timer_event)
-			addi	r5,r25,SYSTEM_STATE
-			bl		EXT(state_event)
-
-			lwz		r7,ACT_TASK(r13)
-			lwz		r8,TASK_VTIMERS(r7)
-			cmpwi	r8,0
-			beq++	0f
-
-			lwz		r7,ACT_PER_PROC(r13)
-			li		r4,AST_BSD
-			lwz		r8,PP_PENDING_AST(r7)
-			or		r8,r8,r4
-			stw		r8,PP_PENDING_AST(r7)
-			addi	r3,r13,ACT_AST
-			bl		EXT(hw_atomic_or)
-0:
-
-/* call trap handler proper, with
- *   ARG0 = type
- *   ARG1 = saved_state ptr
- *   ARG2 = dsisr
- *   ARG3 = dar
- */
-
-			mr		r4,r30
-			lwz		r3,saveexception(r30)			; Get the exception code 
-			lwz		r0,ACT_MACT_SPF(r13)			; Get the special flags
-			
-			addi	r5,r3,-T_DATA_ACCESS			; Adjust to start of range
-			rlwinm.	r0,r0,0,runningVMbit,runningVMbit	; Are we in VM state? (cr0_eq == 0 if yes)
-			cmplwi	cr2,r5,T_TRACE-T_DATA_ACCESS	; Are we still in range? (cr_gt if not)
-			
-			lwz		r5,savedsisr(r4)				; Get the saved DSISR
-			
-			crnor	cr7_eq,cr0_eq,cr2_gt			; We should intercept if in VM and is a true trap (cr7_eq == 1 if yes)
-			rlwinm.	r0,r31,0,MSR_PR_BIT,MSR_PR_BIT	; Are we trapping from supervisor state? (cr0_eq == 1 if yes)
-
-			cmpi	cr2,r3,T_PREEMPT				; Is this a preemption?
-
-			beq--	.L_check_VM
-			stw		r4,ACT_MACT_UPCB(r13)			; Store user savearea
-.L_check_VM:
-			
-			crandc	cr0_eq,cr7_eq,cr0_eq			; Do not intercept if we are in the kernel (cr0_eq == 1 if yes)
-			
-			lwz		r6,savedar(r4)					; Get the DAR (top)
-			lwz		r7,savedar+4(r4)				; Get the DAR (bottom)
-	
-			beq-	cr2,.L_call_trap				; Do not turn on interrupts for T_PREEMPT
-			beq-	exitFromVM						; Any true trap but T_MACHINE_CHECK exits us from the VM...
-
-/* syscall exception might warp here if there's nothing left
- * to do except generate a trap
- */
-
-.L_call_trap:	
-
-#if FPFLOOD
-			stfd	f31,emfp31(r25)					; (TEST/DEBUG)
-#endif
-			
-			bl	EXT(trap)
-
-			lis		r10,hi16(MASK(MSR_VEC))			; Get the vector enable
-			mfmsr	r7								; Get the MSR
-			ori		r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE))	; Add in FP and EE
-			andc	r7,r7,r10						; Turn off VEC, FP, and EE
-			mtmsr	r7								; Disable for interrupts
-			mfsprg	r8,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r8)			; Get the per_proc block 
-/*
- * This is also the point where new threads come when they are created.
- * The new thread is setup to look like a thread that took an 
- * interrupt and went immediatly into trap.
- */
-
-thread_return:
-			lwz		r11,SAVflags(r3)				; Get the flags of the current savearea
-			lwz		r0,savesrr1+4(r3)				; Get the MSR we are going to
-			lwz		r4,SAVprev+4(r3)				; Pick up the previous savearea 
-			mfsprg	r8,1							; Get the current thread
-			rlwinm	r11,r11,0,15,13					; Clear the syscall flag
-			rlwinm.	r0,r0,0,MSR_PR_BIT,MSR_PR_BIT	; Are we going to the user?
-			mr		r1,r8
-			stw		r11,SAVflags(r3)				; Save back the flags (with reset stack cleared) 
-			
-			lwz		r5,THREAD_KERNEL_STACK(r1)		; Get the base pointer to the stack 
-			stw		r4,ACT_MACT_PCB(r8)				; Point to the previous savearea (or 0 if none)
-			addi	r5,r5,KERNEL_STACK_SIZE-FM_SIZE	; Reset to empty 
-
-			beq--	chkfac							; We are not leaving the kernel yet...
-
-			stw		r5,ACT_MACT_KSP(r8)				; Save the empty stack pointer 
-			b		chkfac							; Go end it all...
-
-
-;
-;			Here is where we go when we detect that the kernel stack is all messed up.
-;			We just try to dump some info and get into the debugger.
-;
-
-kernelStackBad:
-
-			lwz		r3,PP_DEBSTACK_TOP_SS(r25)		; Pick up debug stack top
-			subi	r3,r3,KERNEL_STACK_SIZE-FM_SIZE	; Adjust to start of stack
-			sub		r3,r1,r3						; Get displacement into debug stack
-			cmplwi	cr2,r3,KERNEL_STACK_SIZE-FM_SIZE	; Check if we are on debug stack
-			blt+	cr2,kernelStackNotBad			; Yeah, that is ok too...
-
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failStack					; Bad stack code
-			sc										; System ABEND
-
-kernelStackUnaligned:
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failUnalignedStk				; Unaligned stack code
-			sc										; System ABEND
-
-
-/*
- * shandler(type)
- *
- * ENTRY:	VM switched ON
- *			Interrupts  OFF
- *			R3 contains exception code
- *			R4 points to the saved context (virtual address)
- *			Everything is saved in savearea
- */
-
-/*
- * If pcb.ksp == 0 then the kernel stack is already busy,
- *                 this is an error - jump to the debugger entry
- * 
- * otherwise       depending upon the type of
- *                 syscall, look it up in the kernel table
- *		   		   or pass it to the server.
- *
- * on return, we do the reverse, the state is popped from the pcb
- * and pcb.ksp is set to the top of stack.
- */
- 
-/*
- *	NOTE:
- *		mach system calls are negative
- *		BSD system calls are low positive
- *		PPC-only system calls are in the range 0x6xxx
- *		PPC-only "fast" traps are in the range 0x7xxx
- */
- 
-			.align	5
-			.globl EXT(shandler)
-LEXT(shandler)										; System call handler
-
-			lwz		r7,savesrr1+4(r4)				; Get the SRR1 value
-			mfsprg	r13,1							; Get the current activation
-			lwz		r25,ACT_PER_PROC(r13)			; Get the per_proc block 
-			lwz		r0,saver0+4(r4)					; Get the original syscall number
-			lwz		r17,PP_ISTACKPTR(r25)			; Get interrupt stack pointer
-			rlwinm	r15,r0,0,0,19					; Clear the bottom of call number for fast check
-			mr.		r17,r17							; Are we on interrupt stack?
-			lwz		r9,savevrsave(r4)				; Get the VRsave register
-			beq--	EXT(ihandler)					; On interrupt stack, not allowed...
-			rlwinm.	r6,r7,0,MSR_VEC_BIT,MSR_VEC_BIT	; Was vector on?
-			mr		r16,r13
-
-			beq++	svecoff							; Vector off, do not save vrsave...
-			stw		r9,liveVRS(r25)					; Set the live value
-;
-; 			Check if SCs are being redirected for the BlueBox or to VMM
-;
-
-svecoff:	lwz		r6,ACT_MACT_SPF(r13)			; Pick up activation special flags
-			mtcrf	0x40,r6							; Check special flags
-			mtcrf	0x01,r6							; Check special flags
-			crmove	cr6_eq,runningVMbit				; Remember if we are in VMM
-			bne++	cr6,sVMchecked					; Not running VM
-			lwz		r18,spcFlags(r25)				; Load per_proc special flags
-			rlwinm. r18,r18,0,FamVMmodebit,FamVMmodebit	; Is FamVMmodebit set?
-			beq		sVMchecked						; Not in FAM
-			cmpwi	r0,0x6004						; Is it vmm_dispatch syscall:
-			bne		sVMchecked
-			lwz		r26,saver3+4(r4)				; Get the original syscall number
-			cmpwi	cr6,r26,kvmmExitToHost			; vmm_exit_to_host request
-sVMchecked:
-			bf++	bbNoMachSCbit,noassist			; Take branch if SCs are not redirected
-			lwz		r26,ACT_MACT_BEDA(r13)			; Pick up the pointer to the blue box exception area
-			b		EXT(atomic_switch_syscall)		; Go to the assist...
-
-noassist:	cmplwi	r15,0x7000						; Do we have a fast path trap? 
-			lwz		r14,ACT_MACT_PCB(r13)			; Now point to the PCB 
-			beql	fastpath						; We think it is a fastpath... 
-
-			lwz		r1,ACT_MACT_KSP(r13)			; Get the kernel stack pointer 
-#if DEBUG
-			mr.		r1,r1							; Are we already on the kernel stack? 
-			li		r3,T_SYSTEM_CALL				; Yup, pretend we had an interrupt... 
-			beq-	EXT(ihandler)					; Bad boy, bad boy... What cha gonna do when they come for you?
-#endif /* DEBUG */
-
-			stw		r4,ACT_MACT_PCB(r13)			; Point to our savearea
-			stw		r4,ACT_MACT_UPCB(r13)			; Store user savearea
-			li		r0,0							; Clear this out 
-			stw		r14,SAVprev+4(r4)				; Queue the new save area in the front 
-			stw		r13,SAVact(r4)					; Point the savearea at its activation
-			
-#if VERIFYSAVE
-			bl		versave							; (TEST/DEBUG)
-#endif			
-			
-			lwz		r15,saver1+4(r4)				; Grab interrupt time stack 
-			mr		r30,r4							; Save pointer to the new context savearea
-			stw		r0,ACT_MACT_KSP(r13)			; Mark stack as busy with 0 val 
-			stw		r15,FM_BACKPTR(r1)				; Link stack frame backwards
-
-			lwz		r3,SAVtime(r30)
-			lwz		r4,SAVtime+4(r30)
-			addi	r5,r13,SYSTEM_TIMER
-			bl		EXT(thread_timer_event)
-			addi	r5,r25,SYSTEM_STATE
-			bl		EXT(state_event)
-		
-			lwz		r7,ACT_TASK(r13)
-			lwz		r8,TASK_VTIMERS(r7)
-			cmpwi	r8,0
-			beq++	0f
-
-			lwz		r7,ACT_PER_PROC(r13)
-			li		r4,AST_BSD
-			lwz		r8,PP_PENDING_AST(r7)
-			or		r8,r8,r4
-			stw		r8,PP_PENDING_AST(r7)
-			addi	r3,r13,ACT_AST
-			bl		EXT(hw_atomic_or)
-0:
-		
-#if	DEBUG
-/* If debugging, we need two frames, the first being a dummy
- * which links back to the trapped routine. The second is
- * that which the C routine below will need
- */
-			lwz		r8,savesrr0+4(r30)				; Get the point of interruption
-			stw		r8,FM_LR_SAVE(r1)				; Save old instr ptr as LR value
-			stwu	r1,	-FM_SIZE(r1)				; and make new frame
-#endif /* DEBUG */
-
-			mr		r4,r30
-
-			lwz		r15,SAVflags(r30)				; Get the savearea flags
-			lwz		r0,saver0+4(r30)				; Get R0 back
-			mfmsr	r11								; Get the MSR
-			stwu	r1,-(FM_SIZE+ARG_SIZE+MUNGE_ARGS_SIZE)(r1)		; Make a stack frame
-			ori		r11,r11,lo16(MASK(MSR_EE))		; Turn on interruption enabled bit
-			rlwinm	r10,r0,0,0,19					; Keep only the top part 
-			oris	r15,r15,SAVsyscall >> 16 		; Mark that it this is a syscall
-			cmplwi	r10,0x6000						; Is it the special ppc-only guy?
-			stw		r15,SAVflags(r30)				; Save syscall marker
-			beq--	cr6,exitFromVM					; It is time to exit from alternate context...
-			
-			beq--	ppcscall						; Call the ppc-only system call handler...
-
-			mr.		r0,r0							; What kind is it?
-			mtmsr	r11								; Enable interruptions
-
-			blt--	.L_kernel_syscall				; System call number if negative, this is a mach call...
-											
-			lwz     r8,ACT_TASK(r13)				; Get our task
-			cmpwi	cr0,r0,0x7FFA					; Special blue box call?
-			beq--	.L_notify_interrupt_syscall		; Yeah, call it...
-			
-			lwz     r7,TASK_SYSCALLS_UNIX(r8)		; Get the current count
-			mr      r3,r30							; Get PCB/savearea
-			mr      r4,r13							; current activation
-			addi    r7,r7,1							; Bump it
-			stw     r7,TASK_SYSCALLS_UNIX(r8)		; Save it
-
-#if FPFLOOD
-			stfd	f31,emfp31(r25)					; (TEST/DEBUG)
-#endif
-
-			bl      EXT(unix_syscall)				; Check out unix...
-
-.L_call_server_syscall_exception:		
-			li		r3,EXC_SYSCALL					; doexception(EXC_SYSCALL, num, 1)
-
-.L_call_server_exception:
-			mr		r4,r0							; Set syscall selector
-			li		r5,1
-			b		EXT(doexception)				; Go away, never to return...
-
-.L_notify_interrupt_syscall:
-			lwz		r3,saver3+4(r30)				; Get the new PC address to pass in
-			bl		EXT(syscall_notify_interrupt)
-/*
- * Ok, return from C function, R3 = return value
- *
- * saved state is still in R30 and the active thread is in R16	.	
- */
-			mr		r31,r16							; Move the current thread pointer
-			stw		r3,saver3+4(r30)				; Stash the return code
-			b		.L_thread_syscall_ret_check_ast
-	
-;
-;			Handle PPC-only system call interface
-;			These are called with interruptions disabled
-;			and the savearea/pcb as the first parameter.
-;			It is up to the callee to enable interruptions if
-;			they should be.  We are in a state here where
-;			both interrupts and preemption are ok, but because we could
-;			be calling diagnostic code we will not enable.
-;			
-;			Also, the callee is responsible for finding any parameters
-;			in the savearea/pcb. It also must set saver3 with any return
-;			code before returning.
-;
-;			There are 3 possible return codes:
-;				0  the call is disabled or something, we treat this like it was bogus
-;				+  the call finished ok, check for AST
-;				-  the call finished ok, do not check for AST
-;
-;			Note: the last option is intended for special diagnostics calls that 
-;			want the thread to return and execute before checking for preemption.
-;
-;			NOTE: Both R16 (thread) and R30 (savearea) need to be preserved over this call!!!!
-;
-
-			.align	5
-
-ppcscall:	rlwinm	r11,r0,2,18,29					; Make an index into the table
-			lis		r10,hi16(EXT(PPCcalls))			; Get PPC-only system call table
-			cmplwi	r11,PPCcallmax					; See if we are too big
-			ori		r10,r10,lo16(EXT(PPCcalls))		; Merge in low half
-			bgt-	.L_call_server_syscall_exception	; Bogus call...
-			lwzx	r11,r10,r11						; Get function address
-			
-;
-;			Note: make sure we do not change the savearea in R30 to
-;			a different register without checking.  Some of the PPCcalls
-;			depend upon it being there.
-;
-	
-			mr		r3,r30							; Pass the savearea
-			mr		r4,r13							; Pass the activation
-			mr.		r11,r11							; See if there is a function here
-			mtctr	r11								; Set the function address
-			beq-	.L_call_server_syscall_exception	; Disabled call...
-#if INSTRUMENT
-			mfspr	r4,pmc1							; Get stamp
-			stw		r4,0x6100+(9*16)+0x0(0)			; Save it
-			mfspr	r4,pmc2							; Get stamp
-			stw		r4,0x6100+(9*16)+0x4(0)			; Save it
-			mfspr	r4,pmc3							; Get stamp
-			stw		r4,0x6100+(9*16)+0x8(0)			; Save it
-			mfspr	r4,pmc4							; Get stamp
-			stw		r4,0x6100+(9*16)+0xC(0)			; Save it
-#endif
-			bctrl									; Call it
-	
-			.globl	EXT(ppcscret)
-
-LEXT(ppcscret)
-			mr.		r3,r3							; See what we should do
-			mr		r31,r16							; Restore the current thread pointer
-			bgt+	.L_thread_syscall_ret_check_ast	; Take normal AST checking return....
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block 
-			blt+	.L_thread_syscall_return		; Return, but no ASTs....
-			lwz		r0,saver0+4(r30)				; Restore the system call number
-			b		.L_call_server_syscall_exception	; Go to common exit...
-
-
-
-/*
- * we get here for mach system calls
- * when kdebug tracing is enabled
- */
-	
-ksystrace:	
-			mr		r4,r30							; Pass in saved state
-			bl      EXT(syscall_trace)
-			
-			cmplw	r31,r29							; Is this syscall in the table?	
-			add		r31,r27,r28						; Point right to the syscall table entry
-
-			bge-	.L_call_server_syscall_exception	; The syscall number is invalid
-	
-			lwz     r0,savesrr1(r30)				; Get the saved srr1
-			rlwinm.	r0,r0,0,MSR_SF_BIT,MSR_SF_BIT	; Test for 64 bit caller
-			lwz		r0,MACH_TRAP_ARG_MUNGE32(r31)	; Pick up the 32 bit munge function address
-			beq--	.L_ksystrace_munge
-			lwz		r0,MACH_TRAP_ARG_MUNGE64(r31)	; Pick up the 64 bit munge function address
-
-.L_ksystrace_munge:
-			cmplwi  r0,0							; do we have a munger to call?
-			mtctr	r0								; Set the function call address
-			addi	r3,r30,saver3					; Pointer to args from save area
-			addi	r4,r1,FM_ARG0+ARG_SIZE			; Pointer for munged args
-			beq--	.L_ksystrace_trapcall			; just make the trap call
-			bctrl									; Call the munge function
-
-.L_ksystrace_trapcall:		
-			lwz		r0,MACH_TRAP_FUNCTION(r31)		; Pick up the function address
-			mtctr	r0								; Set the function call address
-			addi	r3,r1,FM_ARG0+ARG_SIZE			; Pointer to munged args
-			bctrl
-
-			mr		r4,r30							; Pass in the savearea
-			bl		EXT(syscall_trace_end)			; Trace the exit of the system call	
-			b		.L_mach_return
-
-	
-			
-/* Once here, we know that the syscall was -ve
- * we should still have r1=ksp,
- * r16		= pointer to current thread,
- * r13		= pointer to top activation,
- * r0		= syscall number
- * r30		= pointer to saved state (in pcb)
- */
-
-				.align	5
-
-.L_kernel_syscall:	
-;
-; Call a function that can print out our syscall info 
-; Note that we don t care about any volatiles yet
-;
-			lwz		r10,ACT_TASK(r13)				; Get our task 
-			lwz		r0,saver0+4(r30)
-			lis		r8,hi16(EXT(kdebug_enable))		; Get top of kdebug_enable 
-			lis		r28,hi16(EXT(mach_trap_table))	; Get address of table
-			ori		r8,r8,lo16(EXT(kdebug_enable))	; Get bottom of kdebug_enable 
-			lwz		r8,0(r8)						; Get kdebug_enable 
-
-			lwz		r7,TASK_SYSCALLS_MACH(r10)	; Get the current count
-			neg		r31,r0						; Make this positive
-			mr 		r3,r31						; save it
-			slwi	r27,r3,4					; multiply by 16
-			slwi	r3,r3,2						; and the original by 4
-			ori		r28,r28,lo16(EXT(mach_trap_table))	; Get address of table
-			add		r27,r27,r3						; for a total of 20x (5 words/entry)
-			addi	r7,r7,1							; Bump TASK_SYSCALLS_MACH count
-			cmplwi	r8,0							; Is kdebug_enable non-zero
-			stw		r7,TASK_SYSCALLS_MACH(r10)		; Save count
-			bne--	ksystrace						; yes, tracing enabled
-			
-			cmplwi	r31,MACH_TRAP_TABLE_COUNT		; Is this syscall in the table?	
-			add		r31,r27,r28						; Point right to the syscall table entry
-
-			bge--	.L_call_server_syscall_exception	; The syscall number is invalid
-
-			lwz     r0,savesrr1(r30)				; Get the saved srr1
-			rlwinm.	r0,r0,0,MSR_SF_BIT,MSR_SF_BIT	; Test for 64 bit caller
-			lwz		r0,MACH_TRAP_ARG_MUNGE32(r31)	; Pick up the 32 bit munge function address
-			beq--	.L_kernel_syscall_munge
-			lwz		r0,MACH_TRAP_ARG_MUNGE64(r31)	; Pick up the 64 bit munge function address
-
-.L_kernel_syscall_munge:
-			cmplwi	r0,0							; test for null munger
-			mtctr	r0								; Set the function call address
-			addi	r3,r30,saver3						; Pointer to args from save area
-			addi	r4,r1,FM_ARG0+ARG_SIZE				; Pointer for munged args
-			beq--	.L_kernel_syscall_trapcall		;   null munger - skip to trap call
-			bctrl								; Call the munge function
-
-.L_kernel_syscall_trapcall:		
-			lwz		r0,MACH_TRAP_FUNCTION(r31)		; Pick up the function address
-			mtctr	r0								; Set the function call address
-			addi	r3,r1,FM_ARG0+ARG_SIZE			; Pointer to munged args
-
-#if FPFLOOD
-			stfd	f31,emfp31(r25)					; (TEST/DEBUG)
-#endif
-
-			bctrl
-
-
-/*
- * Ok, return from C function, R3 = return value
- *
- * get the active thread's PCB pointer and thus pointer to user state
- * saved state is still in R30 and the active thread is in R16
- */
-
-.L_mach_return:
-			srawi  r0,r3,31							; properly extend the return code
-			cmpi	cr0,r3,KERN_INVALID_ARGUMENT	; deal with invalid system calls
-			mr		r31,r16							; Move the current thread pointer
-			stw		r0, saver3(r30)					; stash the high part of the return code
-			stw		r3,saver3+4(r30)				; Stash the low part of the return code
-			beq--	cr0,.L_mach_invalid_ret			; otherwise fall through into the normal return path
-.L_mach_invalid_arg:		
-
-
-/* 'standard' syscall returns here - INTERRUPTS ARE STILL ON
- * the syscall may perform a thread_set_syscall_return
- * followed by a thread_exception_return, ending up
- * at thread_syscall_return below, with SS_R3 having
- * been set up already
- *
- * When we are here, r31 should point to the current thread,
- *                   r30 should point to the current pcb
- *    r3 contains value that we're going to return to the user
- *    which has already been stored back into the save area
- */
-		
-.L_thread_syscall_ret_check_ast:	
-			lis		r10,hi16(MASK(MSR_VEC))			; Get the vector enable
-			mfmsr	r12								; Get the current MSR 
-			ori		r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE))	; Add in FP and EE
-			andc	r12,r12,r10						; Turn off VEC, FP, and EE
-			mtmsr	r12								; Turn interruptions off
-			
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block 
-
-/* Check to see if there's an outstanding AST */
-		
-			lwz		r4,PP_PENDING_AST(r10)
-			cmpi	cr0,r4,	0						; Any pending asts?
-			beq++	cr0,.L_syscall_no_ast			; Nope...
-
-/* Yes there is, call ast_taken 
- * pretending that the user thread took an AST exception here,
- * ast_taken will save all state and bring us back here
- */
-
-#if	DEBUG
-/* debug assert - make sure that we're not returning to kernel */
-			lwz		r3,savesrr1+4(r30)
-			andi.	r3,r3,MASK(MSR_PR)
-			bne++	scrnotkern						; returning to user level, check 
-			
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failContext					; Bad state code
-			sc										; System ABEND
-
-scrnotkern:		
-#endif	/* DEBUG */
-	
-			lis		r3,hi16(AST_ALL)				; Set ast flags
-			li		r4,1							; Set interrupt allowed
-			ori		r3,r3,lo16(AST_ALL)
-			bl		EXT(ast_taken)					; Process the pending ast
-			b		.L_thread_syscall_ret_check_ast	; Go see if there was another...
-
-.L_mach_invalid_ret:	
-/*
- * need to figure out why we got an KERN_INVALID_ARG
- * if it was due to a non-existent system call
- * then we want to throw an exception... otherwise
- * we want to pass the error code back to the caller
- */
-			lwz		r0,saver0+4(r30)				; reload the original syscall number
-			neg		r28,r0							; Make this positive
-			mr		r4,r28							; save a copy
-			slwi	r27,r4,4						; multiply by 16
-			slwi	r4,r4,2							; and another 4
-			lis		r28,hi16(EXT(mach_trap_table))	; Get address of table
-			add		r27,r27,r4						; for a total of 20x (5 words/entry)
-			ori		r28,r28,lo16(EXT(mach_trap_table))	; Get address of table
-			add		r28,r27,r28						; Point right to the syscall table entry
-			lwz		r27,MACH_TRAP_FUNCTION(r28)		; Pick up the function address
-			lis		r28,hi16(EXT(kern_invalid))		; Get high half of invalid syscall function
-			ori		r28,r28,lo16(EXT(kern_invalid))	; Get low half of invalid syscall function
-			cmpw	cr0,r27,r28						; Check if this is an invalid system call
-			beq--	.L_call_server_syscall_exception	; We have a bad system call
-			b		.L_mach_invalid_arg             ; a system call returned KERN_INVALID_ARG
-		
-	
-/* thread_exception_return returns to here, almost all
- * registers intact. It expects a full context restore
- * of what it hasn't restored itself (ie. what we use).
- *
- * In particular for us,
- * we still have     r31 points to the current thread,
- *                   r30 points to the current pcb
- */
- 
- 			.align	5
- 
-.L_syscall_no_ast:
-.L_thread_syscall_return:
-
-			mr		r3,r30							; Get savearea to the correct register for common exit
-
-			lwz		r11,SAVflags(r30)				; Get the flags 
-			lwz		r5,THREAD_KERNEL_STACK(r31)		; Get the base pointer to the stack 
-			lwz		r4,SAVprev+4(r30)				; Get the previous save area
-			rlwinm	r11,r11,0,15,13					; Clear the syscall flag
-			mfsprg	r8,1				 			; Now find the current activation 
-			addi	r5,r5,KERNEL_STACK_SIZE-FM_SIZE	; Reset to empty
-			stw		r11,SAVflags(r30)				; Stick back the flags
-			stw		r5,ACT_MACT_KSP(r8)				; Save the empty stack pointer
-			stw		r4,ACT_MACT_PCB(r8)				; Save previous save area
-			b		chkfac							; Go end it all...
-
-/*
- * thread_exception_return()
- *
- * Return to user mode directly from within a system call.
- */
-
-			.align	5
-			.globl EXT(thread_bootstrap_return)
-LEXT(thread_bootstrap_return)						; NOTE: THIS IS GOING AWAY IN A FEW DAYS....
-
-			.globl EXT(thread_exception_return)
-LEXT(thread_exception_return)						; Directly return to user mode
-
-.L_thread_exc_ret_check_ast:	
-			lis		r10,hi16(MASK(MSR_VEC))			; Get the vector enable
-			mfmsr	r3								; Get the MSR 
-			ori		r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE))	; Add in FP and EE
-			andc	r3,r3,r10						; Turn off VEC, FP, and EE
-			mtmsr	r3								; Disable interrupts
-
-/* Check to see if there's an outstanding AST */
-/* We don't bother establishing a call frame even though CHECK_AST
-   can invoke ast_taken(), because it can just borrow our caller's
-   frame, given that we're not going to return.  
-*/
-		
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block 
-			lwz		r4,PP_PENDING_AST(r10)
-			cmpi	cr0,r4,	0
-			beq+		cr0,.L_exc_ret_no_ast
-		
-/* Yes there is, call ast_taken 
- * pretending that the user thread took an AST exception here,
- * ast_taken will save all state and bring us back here
- */
-	
-			lis		r3,hi16(AST_ALL)
-			li		r4,1
-			ori		r3,r3,lo16(AST_ALL)
-			
-			bl		EXT(ast_taken)
-			b		.L_thread_exc_ret_check_ast		; check for a second AST (rare)
-	
-/* arriving here, interrupts should be disabled */
-/* Get the active thread's PCB pointer to restore regs
- */
-.L_exc_ret_no_ast:
-			
-			mfsprg  r30,1							; Get the currrent activation
-			mr		r31,r30
-
-			lwz		r30,ACT_MACT_PCB(r30)
-			mr.		r30,r30							; Is there any context yet?
-			beq-	makeDummyCtx					; No, hack one up...
-#if	DEBUG
-/* 
- * debug assert - make sure that we're not returning to kernel
- * get the active thread's PCB pointer and thus pointer to user state
- */
-		
-			lwz		r3,savesrr1+4(r30)
-			andi.	r3,r3,MASK(MSR_PR)
-			bne+	ret_user2						; We are ok...
-
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failContext					; Bad state code
-			sc										; System ABEND
-			
-ret_user2:		
-#endif	/* DEBUG */
-		
-/* If the system call flag isn't set, then we came from a trap,
- * so warp into the return_from_trap (thread_return) routine,
- * which takes PCB pointer in R3, not in r30!
- */
-			lwz		r0,SAVflags(r30)				; Grab the savearea flags
-			andis.	r0,r0,SAVsyscall>>16			; Are we returning from a syscall?
-			mr		r3,r30							; Copy pcb pointer into r3 in case we need it
-			beq--	cr0,thread_return				; Nope, must be a thread return...
-			b		.L_thread_syscall_return		; Join up with the system call return...
-
-;
-;			This is where we handle someone trying who did a thread_create followed
-;			by a thread_resume with no intervening thread_set_state.  Just make an
-;			empty context, initialize it to trash and let em execute at 0...
-;
-
-			.align	5
-
-makeDummyCtx:
-			bl		EXT(save_get)					; Get a save_area
-			li		r4,SAVgeneral					; Get the general context type
-			li		r0,0							; Get a 0
-			stb		r4,SAVflags+2(r3)				; Set type
-			addi	r2,r3,savefpscr+4				; Point past what we are clearing
-			mr		r4,r3							; Save the start
-			
-cleardummy:	stw		r0,0(r4)						; Clear stuff
-			addi	r4,r4,4							; Next word
-			cmplw	r4,r2							; Still some more?
-			blt+	cleardummy						; Yeah...
-			
-			lis		r2,hi16(MSR_EXPORT_MASK_SET)	; Set the high part of the user MSR
-			ori		r2,r2,lo16(MSR_EXPORT_MASK_SET)	; And the low part
-			stw		r2,savesrr1+4(r3)				; Set the default user MSR
-	
-			b		thread_return					; Go let em try to execute, hah!
-	
-/*
- * ihandler(type)
- *
- * ENTRY:	VM switched ON
- *			Interrupts  OFF
- *			R3 contains exception code
- *			R4 points to the saved context (virtual address)
- *			Everything is saved in savearea
- *
- */
-
-			.align	5
-			.globl EXT(ihandler)
-LEXT(ihandler)										; Interrupt handler */
-
-/*
- * get the value of istackptr, if it's zero then we're already on the
- * interrupt stack.
- */
-
-			lwz		r10,savesrr1+4(r4)				; Get SRR1 
-			lwz		r7,savevrsave(r4)				; Get the VRSAVE register
-			mfsprg	r13,1							; Get the current activation
-			lwz		r25,ACT_PER_PROC(r13)			; Get the per_proc block 
-			li		r14,0							; Zero this for now
-			rlwinm.	r16,r10,0,MSR_VEC_BIT,MSR_VEC_BIT	; Was vector on?
-			lwz		r1,PP_ISTACKPTR(r25)			; Get the interrupt stack
-			li		r16,0							; Zero this for now
-
-			beq+	ivecoff							; Vector off, do not save vrsave...
-			stw		r7,liveVRS(r25)					; Set the live value
-
-ivecoff:	li		r0,0							; Get a constant 0
-			rlwinm	r5,r10,0,MSR_PR_BIT,MSR_PR_BIT	; Are we trapping from supervisor state?
-			mr.		r1,r1							; Is it active?
-			cmplwi	cr2,r5,0						; cr2_eq == 1 if yes
-			mr		r16,r13
-			lwz		r14,ACT_MACT_PCB(r13)			; Now point to the PCB 
-			lwz		r9,saver1+4(r4)					; Pick up the rupt time stack
-			stw		r14,SAVprev+4(r4)				; Queue the new save area in the front
-			stw		r13,SAVact(r4)					; Point the savearea at its activation
-			stw		r4,ACT_MACT_PCB(r13)			; Point to our savearea 
-			beq		cr2,ifromk
-			stw		r4,ACT_MACT_UPCB(r13)			; Store user savearea
-
-ifromk:		bne		.L_istackfree					; Nope... 
-
-/* We're already on the interrupt stack, get back the old
- * stack pointer and make room for a frame
- */
-
-			lwz		r10,PP_INTSTACK_TOP_SS(r25)		; Get the top of the interrupt stack
-			addi	r5,r9,INTSTACK_SIZE-FM_SIZE		; Shift stack for bounds check
-			subi	r1,r9,FM_REDZONE				; Back up beyond the red zone
-			sub		r5,r5,r10						; Get displacement into stack
-			cmplwi	r5,INTSTACK_SIZE-FM_SIZE		; Is the stack actually invalid?
-			blt+	ihsetback						; The stack is ok...
-
-			lwz		r5,PP_DEBSTACK_TOP_SS(r25)		; Pick up debug stack top
-			subi	r5,r5,KERNEL_STACK_SIZE-FM_SIZE	; Adjust to start of stack
-			sub		r5,r1,r5						; Get displacement into debug stack
-			cmplwi	cr2,r5,KERNEL_STACK_SIZE-FM_SIZE	; Check if we are on debug stack
-			blt+	cr2,ihsetback					; Yeah, that is ok too...
-
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failStack					; Bad stack code
-			sc										; System ABEND
-
-intUnalignedStk:
-			lis		r0,hi16(Choke)					; Choke code
-			ori		r0,r0,lo16(Choke)				; and the rest
-			li		r3,failUnalignedStk				; Unaligned stack code
-			sc										; System ABEND
-
-			.align	5
-			
-.L_istackfree:
-			rlwinm.	r0,r1,0,28,31					; Check if stack is aligned (and get 0)
-			lwz		r10,SAVflags(r4)				; Get savearea flags
-			bne--	intUnalignedStk					; Stack is unaligned...
-			stw		r0,PP_ISTACKPTR(r25)			; Mark the stack in use 
-			oris	r10,r10,hi16(SAVrststk)			; Indicate we reset stack when we return from this one 
-			stw		r10,SAVflags(r4)				; Stick it back		
-	
-/*
- * To summarize, when we reach here, the state has been saved and
- * the stack is marked as busy. We now generate a small
- * stack frame with backpointers to follow the calling
- * conventions. We set up the backpointers to the trapped
- * routine allowing us to backtrace.
- */
-	
-ihsetback:	subi	r1,r1,FM_SIZE					; Make a new frame 
-			stw		r9,FM_BACKPTR(r1)				; Point back to previous stackptr
-		
-#if VERIFYSAVE
-			beq-	cr1,ihbootnover					; (TEST/DEBUG)
-			bl		versave							; (TEST/DEBUG)
-ihbootnover:										; (TEST/DEBUG)
-#endif
-
-#if	DEBUG
-/* If debugging, we need two frames, the first being a dummy
- * which links back to the trapped routine. The second is
- * that which the C routine below will need
- */
-			lwz		r5,savesrr0+4(r4)				; Get interrupt address 
-			stw		r5,FM_LR_SAVE(r1)				; save old instr ptr as LR value 
-			stwu	r1,-FM_SIZE(r1)					; Make another new frame for C routine
-#endif /* DEBUG */
-
-			mr		r31,r3
-			mr		r30,r4
-
-			lwz		r3,SAVtime(r4)
-			lwz		r4,SAVtime+4(r4)
-			addi	r5,r25,PP_PROCESSOR
-			lwz		r5,KERNEL_TIMER(r5)
-			bl		EXT(thread_timer_event)
-			addi	r6,r25,PP_PROCESSOR
-			lwz		r5,CURRENT_STATE(r6)
-			addi	r7,r6,USER_STATE
-			cmplw	r5,r7
-			bne		0f
-			addi	r5,r6,SYSTEM_STATE
-			bl		EXT(state_event)
-0:
-
-			lwz		r7,ACT_TASK(r13)
-			lwz		r8,TASK_VTIMERS(r7)
-			cmpwi	r8,0
-			beq++	0f
-
-			lwz		r7,ACT_PER_PROC(r13)
-			li		r4,AST_BSD
-			lwz		r8,PP_PENDING_AST(r7)
-			or		r8,r8,r4
-			stw		r8,PP_PENDING_AST(r7)
-			addi	r3,r13,ACT_AST
-			bl		EXT(hw_atomic_or)
-0:
-
-			mr		r3,r31
-			mr		r4,r30
-			lwz		r5,savedsisr(r30)				; Get the DSISR
-			lwz		r6,savedar+4(r30)				; Get the DAR 
-
-#if FPFLOOD
-			stfd	f31,emfp31(r25)					; (TEST/DEBUG)
-#endif
-
-			bl	EXT(interrupt)
-
-/* interrupt() returns a pointer to the saved state in r3 */
-
-			lis		r10,hi16(MASK(MSR_VEC))			; Get the vector enable
-			mfmsr	r0								; Get our MSR
-			ori		r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE))	; Add in FP and EE
-			andc	r0,r0,r10						; Turn off VEC, FP, and EE
-			mtmsr	r0								; Make sure interrupts are disabled
-			mfsprg	r8,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r8)			; Get the per_proc block 
-		
-			lwz		r7,SAVflags(r3)					; Pick up the flags
-			lwz		r9,SAVprev+4(r3)				; Get previous save area
-			cmplwi	cr1,r8,0						; Are we still initializing?
-			lwz		r12,savesrr1+4(r3)				; Get the MSR we will load on return 
-			andis.	r11,r7,hi16(SAVrststk)			; Is this the first on the stack?
-			stw		r9,ACT_MACT_PCB(r8)				; Point to previous context savearea 
-			mr		r4,r3							; Move the savearea pointer
-			beq		.L_no_int_ast2					; Get going if not the top-o-stack...
-
-
-/* We're the last frame on the stack. Restore istackptr to empty state.
- *
- * Check for ASTs if one of the below is true:	
- *    returning to user mode
- *    returning to a kloaded server
- */
-			lwz		r9,PP_INTSTACK_TOP_SS(r10)		; Get the empty stack value 
-			andc	r7,r7,r11						; Remove the stack reset bit in case we pass this one
-			stw		r9,PP_ISTACKPTR(r10)			; Save that saved state ptr 
-			lwz		r3,ACT_PREEMPT_CNT(r8)			; Get preemption level 
-			stw		r7,SAVflags(r4)					; Save the flags
-			cmplwi	r3, 0							; Check for preemption
-			bne		.L_no_int_ast					; Do not preempt if level is not zero
-			andi.	r6,r12,MASK(MSR_PR)				; privilege mode
-			lwz		r11,PP_PENDING_AST(r10)			; Get the pending AST mask
-			beq-	.L_kernel_int_ast				; In kernel space, AST_URGENT check
-			li		r3,T_AST						; Assume the worst
-			mr.		r11,r11							; Are there any pending? 
-			beq		.L_no_int_ast					; Nope... 
-			b		.L_call_thandler
-
-.L_kernel_int_ast:
-			andi.	r11,r11,AST_URGENT				; Do we have AST_URGENT?
-			li		r3,T_PREEMPT					; Assume the worst
-			beq		.L_no_int_ast					; Nope... 
-
-/*
- * There is a pending AST. Massage things to make it look like
- * we took a trap and jump into the trap handler.  To do this
- * we essentially pretend to return from the interrupt but
- * at the last minute jump into the trap handler with an AST
- * trap instead of performing an rfi.
- */
-
-.L_call_thandler:
-			stw		r3,saveexception(r4)			; Set the exception code to T_AST/T_PREEMPT
-			b		EXT(thandler)					; We need to preempt so treat like a trap...
-
-.L_no_int_ast:	
-			mr		r3,r4							; Get into the right register for common code
-			
-.L_no_int_ast2:	
-			rlwinm	r7,r7,0,15,13					; Clear the syscall flag
-			li		r4,0							; Assume for a moment that we are in init
-			stw		r7,SAVflags(r3)					; Set the flags with cleared syscall flag
-			beq--	cr1,chkfac						; Jump away if we are in init...
-
-			lwz		r4,ACT_MACT_PCB(r8)				; Get the new level marker
-
-
-;
-;			This section is common to all exception exits.  It throws away vector
-;			and floating point saveareas as the exception level of a thread is
-;			exited.  
-;
-;			It also enables the facility if its context is live
-;			Requires:
-;				R3  = Savearea to be released (virtual)
-;				R4  = New top of savearea stack (could be 0)
-;				R8  = pointer to activation
-;				R10 = per_proc block
-;
-;			Note that barring unforseen crashes, there is no escape from this point
-;			on. We WILL call exception_exit and launch this context. No worries
-;			about preemption or interruptions here.
-;
-;			Note that we will set up R26 with whatever context we will be launching,
-;			so it will indicate the current, or the deferred it it is set and we
-;			are going to user state.  CR2_eq will be set to indicate deferred.
-;
-
-chkfac:		lwz		r29,savesrr1+4(r3)				; Get the current MSR
-			mr.		r28,r8							; Are we still in boot?
-			mr		r31,r10							; Move per_proc address
-			mr		r30,r4							; Preserve new level
-			mr		r27,r3							; Save the old level
-			beq--	chkenax							; Yeah, skip it all...
-			
-			rlwinm.	r0,r29,0,MSR_PR_BIT,MSR_PR_BIT	; Are we going into user state?
-
-			lwz		r20,curctx(r28)					; Get our current context
-			lwz		r26,deferctx(r28)				; Get any deferred context switch
-			li		r0,1							; Get set to hold off quickfret
-			rlwinm	r29,r29,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Turn off floating point for now
-			lwz		r21,FPUlevel(r20)				; Get the facility level
-			cmplwi	cr2,r26,0						; Are we going into a deferred context later?
-			rlwinm	r29,r29,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Turn off vector for now
-			crnor	cr2_eq,cr0_eq,cr2_eq			; Set cr2_eq if going to user state and there is deferred
-			lhz		r19,PP_CPU_NUMBER(r31)			; Get our CPU number
-			cmplw	r27,r21							; Are we returning from the active level?
-			stw		r0,holdQFret(r31)				; Make sure we hold off releasing quickfret
-			bne++	fpuchkena						; Nope...
-
-;
-;			First clean up any live context we are returning from
-;
-
-			lwz		r22,FPUcpu(r20)					; Get CPU this context was last dispatched on
-			
-			stw		r19,FPUcpu(r20)					; Claim context for us
-			
-			eieio									; Make sure this gets out before owner clear
-			
-#if ppeSize != 16
-#error per_proc_entry is not 16bytes in size
-#endif
-			
-			lis		r23,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			slwi	r22,r22,4						; Find offset to the owner per_proc_entry
-			ori		r23,r23,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r24,FPUowner					; Displacement to float owner
-			add		r22,r23,r22						; Point to the owner per_proc_entry
-			lwz		r22,ppe_vaddr(r22)				; Point to the owner per_proc
-			
-fpuinvothr:	lwarx	r23,r24,r22						; Get the owner
-
-			sub		r0,r23,r20						; Subtract one from the other
-			sub		r21,r20,r23						; Subtract the other from the one
-			or		r21,r21,r0						; Combine them
-			srawi	r21,r21,31						; Get a 0 if equal or -1 of not
-			and		r23,r23,r21						; Make 0 if same, unchanged if not
-			stwcx.	r23,r24,r22						; Try to invalidate it
-			bne--	fpuinvothr						; Try again if there was a collision...
-
-			isync
-
-;
-;			Now if there is a savearea associated with the popped context, release it.
-;			Either way, pop the level to the top stacked context.
-;
-
-			lwz		r22,FPUsave(r20)				; Get pointer to the first savearea
-			li		r21,0							; Assume we popped all the way out
-			mr.		r22,r22							; Is there anything there?
-			beq++	fpusetlvl						; No, see if we need to enable...
-			
-			lwz		r21,SAVlevel(r22)				; Get the level of that savearea
-			cmplw	r21,r27							; Is this the saved copy of the live stuff?
-			bne		fpusetlvl						; No, leave as is...
-			
-			lwz		r24,SAVprev+4(r22)				; Pick up the previous area
-			li		r21,0							; Assume we popped all the way out
-			mr.		r24,r24							; Any more context stacked?
-			beq--	fpuonlyone						; Nope...
-			lwz		r21,SAVlevel(r24)				; Get the level associated with save
-
-fpuonlyone:	stw		r24,FPUsave(r20)				; Dequeue this savearea
-
-			rlwinm	r3,r22,0,0,19					; Find main savearea header
-
-			lwz		r8,quickfret(r31)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r31)				; Get the first in quickfret list (bottom)					
-			lwz		r2,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r8,SAVprev(r22)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r22)				; Link the old in (bottom)					
-			xor		r3,r22,r3						; Convert to physical
-			stw		r2,quickfret(r31)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r31)				; Set the first in quickfret list (bottom)					
-			
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x3301						; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)		; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif				
-
-fpusetlvl:	stw		r21,FPUlevel(r20)				; Save the level
-		
-;
-;			Here we check if we are at the right level
-;			We need to check the level we are entering, not the one we are exiting.
-;			Therefore, we will use the defer level if it is non-zero and we are
-;			going into user state.
-;
-			
-fpuchkena:	bt--	cr2_eq,fpuhasdfrd				; Skip if deferred, R26 already set up...
-			mr		r26,r20							; Use the non-deferred value
-			
-fpuhasdfrd:	
-#if 0
-			rlwinm.	r0,r29,0,MSR_PR_BIT,MSR_PR_BIT	; (TEST/DEBUG) Going into user state?
-			beq		fpunusrstt						; (TEST/DEBUG) Nope...	
-			lwz		r23,FPUlevel(r26)				; (TEST/DEBUG) Get the level ID
-			lwz		r24,FPUsave(r26)				; (TEST/DEBUG) Get the first savearea
-			mr.		r23,r23							; (TEST/DEBUG) Should be level 0
-			beq++	fpulvl0							; (TEST/DEBUG) Yes...
-
-			lis		r0,hi16(Choke)					; (TEST/DEBUG) Choke code
-			ori		r0,r0,lo16(Choke)				; (TEST/DEBUG) and the rest
-			sc										; (TEST/DEBUG) System ABEND
-			
-fpulvl0:	mr.		r24,r24							; (TEST/DEBUG) Any context?
-			beq		fpunusrstt						; (TEST/DEBUG) No...
-			lwz		r23,SAVlevel(r24)				; (TEST/DEBUG) Get level of context
-			lwz		r21,SAVprev+4(r24)				; (TEST/DEBUG) Get previous pointer
-			mr.		r23,r23							; (TEST/DEBUG) Is this our user context?
-			beq++	fpulvl0b						; (TEST/DEBUG) Yes...
-
-			lis		r0,hi16(Choke)					; (TEST/DEBUG) Choke code
-			ori		r0,r0,lo16(Choke)				; (TEST/DEBUG) and the rest
-			sc										; (TEST/DEBUG) System ABEND
-			
-fpulvl0b:	mr.		r21,r21							; (TEST/DEBUG) Is there a forward chain?
-			beq++	fpunusrstt						; (TEST/DEBUG) Nope...
-
-			lis		r0,hi16(Choke)					; (TEST/DEBUG) Choke code
-			ori		r0,r0,lo16(Choke)				; (TEST/DEBUG) and the rest
-			sc										; (TEST/DEBUG) System ABEND
-						
-fpunusrstt:											; (TEST/DEBUG)
-#endif				
-			
-			lwz		r21,FPUowner(r31)				; Get the ID of the live context
-			lwz		r23,FPUlevel(r26)				; Get the level ID
-			lwz		r24,FPUcpu(r26)					; Get the CPU that the context was last dispatched on
-			cmplw	cr3,r26,r21						; Do we have the live context?
-			cmplw	r30,r23							; Are we about to launch the live level?
-			bne--	cr3,chkvec						; No, can not possibly enable...
-			cmplw	cr1,r19,r24						; Was facility used on this processor last?
-			bne--	chkvec							; No, not live...
-			bne--	cr1,chkvec						; No, wrong cpu, have to enable later....
-			
-			lwz		r24,FPUsave(r26)				; Get the first savearea
-			mr.		r24,r24							; Any savearea?
-			beq++	fpuena							; Nope...
-			lwz		r25,SAVlevel(r24)				; Get the level of savearea
-			lwz		r0,SAVprev+4(r24)				; Get the previous
-
-			cmplw	r30,r25							; Is savearea for the level we are launching?
-			bne++	fpuena							; No, just go enable...
-			
-			stw		r0,FPUsave(r26)					; Pop the chain
-
-			rlwinm	r3,r24,0,0,19					; Find main savearea header
-
-			lwz		r8,quickfret(r31)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r31)				; Get the first in quickfret list (bottom)					
-			lwz		r2,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r8,SAVprev(r24)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r24)				; Link the old in (bottom)					
-			xor		r3,r24,r3						; Convert to physical
-			stw		r2,quickfret(r31)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r31)				; Set the first in quickfret list (bottom)					
-
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x3302						; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)		; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif				
-
-fpuena:		ori		r29,r29,lo16(MASK(MSR_FP))		; Enable facility			
-			
-chkvec:		
-
-			lwz		r21,VMXlevel(r20)				; Get the facility level
-		
-			cmplw	r27,r21							; Are we returning from the active level?
-			bne+	vmxchkena						; Nope...
-			
-
-;
-;			First clean up any live context we are returning from
-;
-
-			lwz		r22,VMXcpu(r20)					; Get CPU this context was last dispatched on
-			
-			stw		r19,VMXcpu(r20)					; Claim context for us
-			
-			eieio									; Make sure this gets out before owner clear
-			
-			lis		r23,hi16(EXT(PerProcTable))		; Set base PerProcTable
-			slwi	r22,r22,4						; Find offset to the owner per_proc_entry
-			ori		r23,r23,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r24,VMXowner					; Displacement to float owner
-			add		r22,r23,r22						; Point to the owner per_proc_entry
-			lwz		r22,ppe_vaddr(r22)				; Point to the owner per_proc
-			
-vmxinvothr:	lwarx	r23,r24,r22						; Get the owner
-
-			sub		r0,r23,r20						; Subtract one from the other
-			sub		r21,r20,r23						; Subtract the other from the one
-			or		r21,r21,r0						; Combine them
-			srawi	r21,r21,31						; Get a 0 if equal or -1 of not
-			and		r23,r23,r21						; Make 0 if same, unchanged if not
-			stwcx.	r23,r24,r22						; Try to invalidate it
-			bne--	vmxinvothr						; Try again if there was a collision...
-
-			isync
-
-;
-;			Now if there is a savearea associated with the popped context, release it.
-;			Either way, pop the level to the top stacked context.
-;
-
-			lwz		r22,VMXsave(r20)				; Get pointer to the first savearea
-			li		r21,0							; Assume we popped all the way out
-			mr.		r22,r22							; Is there anything there?
-			beq++	vmxsetlvl						; No, see if we need to enable...
-			
-			lwz		r21,SAVlevel(r22)				; Get the level of that savearea
-			cmplw	r21,r27							; Is this the saved copy of the live stuff?
-			bne		vmxsetlvl						; No, leave as is...
-			
-			lwz		r24,SAVprev+4(r22)				; Pick up the previous area
-			li		r21,0							; Assume we popped all the way out
-			mr.		r24,r24							; Any more context?
-			beq--	vmxonlyone						; Nope...
-			lwz		r21,SAVlevel(r24)				; Get the level associated with save
-
-vmxonlyone:	stw		r24,VMXsave(r20)				; Dequeue this savearea
-			
-			rlwinm	r3,r22,0,0,19					; Find main savearea header
-
-			lwz		r8,quickfret(r31)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r31)				; Get the first in quickfret list (bottom)					
-			lwz		r2,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r8,SAVprev(r22)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r22)				; Link the old in (bottom)					
-			xor		r3,r22,r3						; Convert to physical
-			stw		r2,quickfret(r31)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r31)				; Set the first in quickfret list (bottom)					
-
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x3401						; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)		; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif				
-
-vmxsetlvl:	stw		r21,VMXlevel(r20)				; Save the level
-		
-;
-;			Here we check if we are at the right level
-;
-			
-vmxchkena:	lwz		r21,VMXowner(r31)				; Get the ID of the live context
-			lwz		r23,VMXlevel(r26)				; Get the level ID
-			cmplw	r26,r21							; Do we have the live context?
-			lwz		r24,VMXcpu(r26)					; Get the CPU that the context was last dispatched on
-			bne--	setena							; No, can not possibly enable...
-			cmplw	r30,r23							; Are we about to launch the live level?
-			cmplw	cr1,r19,r24						; Was facility used on this processor last?
-			bne--	setena							; No, not live...
-			bne--	cr1,setena						; No, wrong cpu, have to enable later....
-			
-			lwz		r24,VMXsave(r26)				; Get the first savearea
-			mr.		r24,r24							; Any savearea?
-			beq++	vmxena							; Nope...
-			lwz		r25,SAVlevel(r24)				; Get the level of savearea
-			lwz		r0,SAVprev+4(r24)				; Get the previous
-			cmplw	r30,r25							; Is savearea for the level we are launching?
-			bne++	vmxena							; No, just go enable...
-
-			stw		r0,VMXsave(r26)					; Pop the chain
-			
-			rlwinm	r3,r24,0,0,19					; Find main savearea header
-
-			lwz		r8,quickfret(r31)				; Get the first in quickfret list (top)					
-			lwz		r9,quickfret+4(r31)				; Get the first in quickfret list (bottom)					
-			lwz		r2,SACvrswap(r3)				; Get the virtual to real conversion (top)
-			lwz		r3,SACvrswap+4(r3)				; Get the virtual to real conversion (bottom)
-			stw		r8,SAVprev(r24)					; Link the old in (top)					
-			stw		r9,SAVprev+4(r24)				; Link the old in (bottom)					
-			xor		r3,r24,r3						; Convert to physical
-			stw		r2,quickfret(r31)				; Set the first in quickfret list (top)					
-			stw		r3,quickfret+4(r31)				; Set the first in quickfret list (bottom)					
-
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x3402						; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)		; (TEST/DEBUG)
-			sc										; (TEST/DEBUG)
-#endif				
-			
-vmxena:		oris	r29,r29,hi16(MASK(MSR_VEC))		; Enable facility
-
-setena:		lwz		r18,umwSpace(r28)				; Get the space ID in case we are launching user
-			rlwinm.	r0,r29,0,MSR_PR_BIT,MSR_PR_BIT	; Are we about to launch user state?
-			li		r0,0							; Get set to release quickfret holdoff
-			crmove	cr7_eq,cr0_eq					; Remember if we are going to user state
-			rlwimi.	r20,r29,(((31-floatCngbit)+(MSR_FP_BIT+1))&31),floatCngbit,floatCngbit	; Set flag if we enabled floats
-			lwz		r19,deferctx(r28)				; Get any deferred facility context switch
-			rlwinm	r20,r29,(((31-vectorCngbit)+(MSR_VEC_BIT+1))&31),vectorCngbit,vectorCngbit	; Set flag if we enabled vector
-			stw		r29,savesrr1+4(r27)				; Turn facility on or off
-			stw		r0,holdQFret(r31)				; Release quickfret
-			oris	r18,r18,hi16(umwSwitchAway)		; Set the switch-away bit in case we go to user
-
-			beq		setenaa							; Neither float nor vector turned on....
-			
-			lwz		r5,ACT_MACT_SPF(r28)			; Get activation copy
-			lwz		r6,spcFlags(r31)				; Get per_proc copy
-			or		r5,r5,r20						; Set vector/float changed bits in activation
-			or		r6,r6,r20						; Set vector/float changed bits in per_proc
-			stw		r5,ACT_MACT_SPF(r28)			; Set activation copy
-			stw		r6,spcFlags(r31)				; Set per_proc copy
-
-setenaa:	mfdec	r24								; Get decrementer
-			bf+		cr2_eq,nodefer					; No deferred to switch to...
-						
-			li		r20,0							; Clear this
-			stw		r26,curctx(r28)					; Make the facility context current
-			stw		r20,deferctx(r28)				; Clear deferred context
-
-nodefer:	lwz		r22,qactTimer(r28)				; Get high order quick activation timer
-			mr.		r24,r24							; See if it has popped already...
-			lwz		r23,qactTimer+4(r28)			; Get low order qact timer
-			ble-	chkifuser						; We have popped or are just about to...
-			
-segtb:		mftbu	r20								; Get the upper time base
-			mftb	r21								; Get the low
-			mftbu	r19								; Get upper again
-			or.		r0,r22,r23						; Any time set?
-			cmplw	cr1,r20,r19						; Did they change?
-			beq++	chkifuser						; No time set....
-			bne--	cr1,segtb						; Timebase ticked, get them again...
-			
-			subfc	r6,r21,r23						; Subtract current from qact time
-			li		r0,0							; Make a 0
-			subfe	r5,r20,r22						; Finish subtract
-			subfze	r0,r0							; Get a 0 if qact was bigger than current, -1 otherwise
-			andc.	r12,r5,r0						; Set 0 if qact has passed
-			andc	r13,r6,r0						; Set 0 if qact has passed
-			bne		chkifuser						; If high order is non-zero, this is too big for a decrementer
-			cmplw	r13,r24							; Is this earlier than the decrementer? (logical compare takes care of high bit on)
-			bge++	chkifuser						; No, do not reset decrementer...
-			
-			mtdec	r13								; Set our value
-
-chkifuser:	bl		EXT(mach_absolute_time)
-			lwz		r5,ACT_PER_PROC(r28)
-			addi	r6,r5,PP_PROCESSOR
-			lwz		r5,KERNEL_TIMER(r6)
-			lwz		r29,CURRENT_STATE(r6)
-			beq--	cr7,chkifuser1					; Skip this if we are going to kernel...
-			stw		r18,umwSpace(r28)				; Half-invalidate to force MapUserAddressWindow to reload SRs
-			addi	r5,r28,USER_TIMER
-			addi	r29,r6,USER_STATE
-
-chkifuser1:	bl		EXT(thread_timer_event)
-			mr		r5,r29
-			bl		EXT(state_event)
-
-chkenax:	
-
-#if DEBUG
-			lwz		r20,SAVact(r27)					; (TEST/DEBUG) Make sure our restore
-			mfsprg	r21, 1							; (TEST/DEBUG) with the current act.
-			cmpwi	r21,0							; (TEST/DEBUG)
-			beq--	yeswereok						; (TEST/DEBUG)
-			cmplw	r21,r20							; (TEST/DEBUG)
-			beq++	yeswereok						; (TEST/DEBUG)
-
-			lis		r0,hi16(Choke)					; (TEST/DEBUG) Choke code
-			ori		r0,r0,lo16(Choke)				; (TEST/DEBUG) and the rest
-			mr		r21,r27							; (TEST/DEBUG) Save the savearea address
-			li		r3,failContext					; (TEST/DEBUG) Bad state code
-			sc										; (TEST/DEBUG) System ABEND
-
-yeswereok:
-#endif
-	
-			mr		r3,r27							; Pass savearea back
-			b		EXT(exception_exit)				; We are all done now...
-
-
-
-;
-;			Null PPC call - performance testing, does absolutely nothing
-;
-
-			.align	5
-			
-			.globl	EXT(ppcNull)
-			
-LEXT(ppcNull)
-
-			li		r3,-1							; Make sure we test no asts
-			blr
-
-
-;
-;			Instrumented null PPC call - performance testing, does absolutely nothing
-;			Forces various timestamps to be returned.
-;
-
-			.align	5
-			
-			.globl	EXT(ppcNullinst)
-			
-LEXT(ppcNullinst)
-
-			li		r3,-1							; Make sure we test no asts
-			blr
-
-
-/*
- *			Here's where we handle the fastpath stuff
- *			We'll do what we can here because registers are already
- *			loaded and it will be less confusing that moving them around.
- *			If we need to though, we'll branch off somewhere's else.
- *
- *			Registers when we get here:
- *
- *				r0  = syscall number
- *				r4  = savearea/pcb
- *				r13 = activation
- *				r14 = previous savearea (if any)
- *				r16 = thread
- *				r25 = per_proc
- */
-
-			.align	5
-
-fastpath:	cmplwi	cr3,r0,0x7FF5				; Is this a null fastpath?
-			beq--	cr3,fastexutl				; Yes, bail fast...
-			cmplwi	cr3,r0,0x7FF1				; Is it CthreadSetSelfNumber? 	
-			bnelr--	cr3							; Not a fast path...
-
-/*
- * void cthread_set_self(cproc_t p)
- *
- * Set's thread state "user_value".  In practice this is the thread-local-data-pointer (TLDP),
- * though we do not interpret it.  This call is mostly used by 32-bit tasks, but we save all 64 bits
- * in case a 64-bit task wants to use this facility.  They normally do not, because the 64-bit
- * ABI reserves r13 for the TLDP.
- *
- * This op is invoked as follows:
- *	li r0, CthreadSetSelfNumber	// load the fast-trap number
- *	sc				// invoke fast-trap
- *	blr
- */
-
-CthreadSetSelfNumber:
-            lwz     r3,saver3+0(r4)             /* get the TLDP passed in r3 */
-			lwz		r5,saver3+4(r4)				/* (all 64 bits, in case this is a 64-bit task) */
-			stw		r3,CTHREAD_SELF+0(r13)		/* Remember it in the activation... */
-			stw		r5,CTHREAD_SELF+4(r13)
-			stw		r3,UAW+0(r25)               /* ...and in the per-proc */
-			stw		r5,UAW+4(r25)
-
-
-			.globl	EXT(fastexit)
-EXT(fastexit):
-fastexutl:	mr		r3,r4						; Pass back savearea
-			b		EXT(exception_exit)			; Go back to the caller...
-
-
-/*
- *			Here's where we check for a hit on the Blue Box Assist
- *			Most registers are non-volatile, so be careful here. If we don't 
- *			recognize the trap instruction we go back for regular processing.
- *			Otherwise we transfer to the assist code.
- */
- 
-			.align	5
-			
-checkassist:
-			lwz		r0,saveexception(r4)		; Get the exception code
-			lwz		r23,savesrr1+4(r4)			; Get the interrupted MSR 
-			lwz		r26,ACT_MACT_BEDA(r13)		; Get Blue Box Descriptor Area
-			mtcrf	0x18,r23					; Check what SRR1 says
-			lwz		r24,ACT_MACT_BTS(r13)		; Get the table start 
-			cmplwi	r0,T_AST					; Check for T_AST trap 
-			lwz		r27,savesrr0+4(r4)			; Get trapped address 
-			crnand	cr1_eq,SRR1_PRG_TRAP_BIT,MSR_PR_BIT	; We need both trap and user state
-			sub		r24,r27,r24					; See how far into it we are 
-			cror	cr0_eq,cr0_eq,cr1_eq		; Need to bail if AST or not trap or not user state
-			cmplwi	cr1,r24,BB_MAX_TRAP			; Do we fit in the list? 
-			cror	cr0_eq,cr0_eq,cr1_gt		; Also leave it trap not in range
-			btlr-	cr0_eq						; No assist if AST or not trap or not user state or trap not in range
-			b		EXT(atomic_switch_trap)		; Go to the assist...
-			
-;
-;			Virtual Machine Monitor 
-;			Here is where we exit from the emulated context
-;			Note that most registers get trashed here
-;			R3 and R30 are preserved across the call and hold the activation
-;			and savearea respectivily.
-;			
-
-			.align	5
-
-exitFromVM:	mr		r30,r4						; Get the savearea
-			mr		r3,r13						; Get the activation
-			
-			b		EXT(vmm_exit)				; Do it to it
-			
-			.align	5
-			.globl	EXT(retFromVM)
-
-LEXT(retFromVM)
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block 
-			mr		r8,r3						; Get the activation
-			lwz		r4,SAVprev+4(r30)			; Pick up the previous savearea
-			mr		r3,r30						; Put savearea in proper register for common code
-			lwz		r11,SAVflags(r30)			; Get the flags of the current savearea
-			rlwinm	r11,r11,0,15,13				; Clear the syscall flag 
-			mr		r1,r8
-			stw		r11,SAVflags(r3)			; Save back the flags (with reset stack cleared)
-
-			stw		r4,ACT_MACT_PCB(r8)			; Point to the previous savearea (or 0 if none)
-
-			lwz		r5,THREAD_KERNEL_STACK(r1)	; Get the base pointer to the stack
-			addi	r5,r5,KERNEL_STACK_SIZE-FM_SIZE	; Reset to empty 
-			stw		r5,ACT_MACT_KSP(r8)			; Save the empty stack pointer
-			b		chkfac						; Go end it all...
-
-
-;
-;			chandler (note: not a candle maker or tallow merchant)
-;
-;			Here is the system choke handler.  This is where the system goes
-;			to die.
-;			
-;			We get here as a result of a T_CHOKE exception which is generated
-;			by the Choke firmware call or by lowmem_vectors when it detects a
-;			fatal error. Examples of where this may be used is when we detect
-;			problems in low-level mapping chains, trashed savearea free chains,
-;			or stack guardpage violations.
-;
-;			Note that we can not set a back chain in the stack when we come
-;			here because we are probably here because the chain was corrupt.
-;
-
-
-			.align	5
-			.globl EXT(chandler)
-LEXT(chandler)									; Choke handler
-
-			li		r31,0						; Get a 0
-			mfsprg	r25,1						; Get the current activation
-			lwz		r25,ACT_PER_PROC(r25)		; Get the per_proc block 
-			stw		r31,traceMask(0)			; Force tracing off right now
-		
-		
-		
-			lwz		r1,PP_DEBSTACKPTR(r25)		; Get debug stack pointer
-			cmpwi	r1,-1						; Are we already choking?
-			bne		chokefirst					; Nope...
-			
-chokespin:	addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			addi	r31,r31,1					; Spin and hope for an analyzer connection...				
-			b		chokespin					; Spin and hope for an analyzer connection...
-			
-chokefirst:	li		r0,-1						; Set choke value
-			mr.		r1,r1						; See if we are on debug stack yet
-			lwz		r10,saver1+4(r4)			; 
-			stw		r0,PP_DEBSTACKPTR(r25)		; Show we are choking
-			bne		chokestart					; We are not on the debug stack yet...
-			
-			lwz		r2,PP_DEBSTACK_TOP_SS(r25)	; Get debug stack top
-			sub		r11,r2,r10					; Get stack depth
-
-			cmplwi	r11,KERNEL_STACK_SIZE-FM_SIZE-TRAP_SPACE_NEEDED	; Check if stack pointer is ok			
-			bgt		chokespin					; Bad stack pointer or too little left, just die...
-
-			subi	r1,r10,FM_REDZONE			; Make a red zone
-
-chokestart:	li		r0,0						; Get a zero
-			stw		r0,FM_BACKPTR(r1)			; We now have terminated the back chain
-
-			bl		EXT(SysChoked)				; Call the "C" phase of this
-			b		chokespin					; Should not be here so just go spin...
-			
-
-#if VERIFYSAVE			
-;
-;			Savearea chain verification
-;
-		
-versave:	
-#if 0
-			lis		r22,hi16(EXT(DebugWork))		; (TEST/DEBUG)
-			ori		r22,r22,lo16(EXT(DebugWork))	; (TEST/DEBUG)
-			lwz		r23,0(r22)						; (TEST/DEBUG)
-			mr.		r23,r23							; (TEST/DEBUG)
-			beqlr-									; (TEST/DEBUG)
-			mfsprg	r20,1							; Get the current activation
-			lwz		r20,ACT_PER_PROC(r20)			; Get the per_proc block 
-			lwz		r21,pfAvailable(r20)			; (TEST/DEBUG)
-			mr.		r21,r21							; (TEST/DEBUG)
-			bnelr+									; (TEST/DEBUG)
-			
-			stw		r22,0(r22)						; (TEST/DEBUG) Lock out more checks
-			BREAKPOINT_TRAP							; (TEST/DEBUG) Get into debugger
-#endif
-
-#if 0
-		;; This code is broken and migration will make the matter even worse
-;
-;			Make sure that all savearea chains have the right type on them
-;
-
-			lis		r28,hi16(EXT(default_pset))		; (TEST/DEBUG)
-			lis		r27,hi16(EXT(DebugWork))		; (TEST/DEBUG)
-			ori		r28,r28,lo16(EXT(default_pset))	; (TEST/DEBUG)
-			ori		r27,r27,lo16(EXT(DebugWork))	; (TEST/DEBUG)
-			li		r20,0							; (TEST/DEBUG)
-			lwz		r26,0(r27)						; (TEST/DEBUG)
-			lwz		r27,psthreadcnt(r28)			; (TEST/DEBUG)
-			mr.		r26,r26							; (TEST/DEBUG) Have we locked the test out?
-			lwz		r28,psthreads(r28)				; (TEST/DEBUG)
-			mflr	r31								; (TEST/DEBUG) Save return
-			bnelr-									; (TEST/DEBUG) Test already triggered, skip...
-			b		fckgo							; (TEST/DEBUG) Join up...
-			
-fcknext:	mr.		r27,r27							; (TEST/DEBUG) Any more threads?
-			bne+	fckxxx							; (TEST/DEBUG) Yes...
-
-			mtlr	r31								; (TEST/DEBUG) Restore return
-			blr										; (TEST/DEBUG) Leave...
-			
-fckxxx:		lwz		r28,THREAD_PSTHRN(r28)			; (TEST/DEBUG) Get next thread
-
-fckgo:		subi	r27,r27,1						; (TEST/DEBUG) Decrement thread count
-			lwz		r24,THREAD_TOP_ACT(r28)			; (TEST/DEBUG) Get activation for the thread
-			lwz		r20,ACT_MACT_PCB(r24)			; (TEST/DEBUG) Get the normal context
-			li		r21,SAVgeneral					; (TEST/DEBUG) Make sure this is all general context
-			bl		versavetype						; (TEST/DEBUG) Check the chain
-			
-			lwz		r20,facctx+FPUsave(r24)			; (TEST/DEBUG) Get regular floating point
-			li		r21,SAVfloat					; (TEST/DEBUG) Make sure this is all floating point
-			bl		versavetype						; (TEST/DEBUG) Check the chain			
-			
-			lwz		r20,facctx+VMXsave(r24)			; (TEST/DEBUG) Get regular vector point
-			li		r21,SAVvector					; (TEST/DEBUG) Make sure this is all vector
-			bl		versavetype						; (TEST/DEBUG) Check the chain			
-			
-			lwz		r29,vmmControl(r24)				; (TEST/DEBUG) Get the virtual machine control blocks
-			mr.		r29,r29							; (TEST/DEBUG) Are there any?
-			beq+	fcknext							; (TEST/DEBUG) Nope, next thread...
-			
-			li		r22,kVmmMaxContextsPerThread	; (TEST/DEBUG) Get the number of control blocks	
-			subi	r29,r29,vmmCEntrySize			; (TEST/DEBUG) Get running start	
-			
-fcknvmm:	subi	r22,r22,1						; (TEST/DEBUG) Do all of them
-			mr.		r22,r22							; (TEST/DEBUG) Are we all done?
-			addi	r29,r29,vmmCEntrySize			; (TEST/DEBUG) Get the next entry
-			blt-	fcknext							; (TEST/DEBUG) Yes, check next thread...
-			
-			lwz		r23,vmmFlags(r29)				; (TEST/DEBUG) Get entry flags
-			rlwinm.	r23,r23,0,0,0					; (TEST/DEBUG) Is this in use?
-			beq+	fcknvmm							; (TEST/DEBUG) Not in use...
-			
-			lwz		r20,vmmFacCtx+FPUsave(r29)		; (TEST/DEBUG) Get regular floating point
-			li		r21,SAVfloat					; (TEST/DEBUG) Make sure this is all floating point
-			bl		versavetype						; (TEST/DEBUG) Check the chain			
-			
-			lwz		r20,vmmFacCtx+VMXsave(r29)		; (TEST/DEBUG) Get regular vector point
-			li		r21,SAVvector					; (TEST/DEBUG) Make sure this is all vector
-			bl		versavetype						; (TEST/DEBUG) Check the chain			
-			b		fcknvmm							; (TEST/DEBUG) Get then vmm block...
-			
-versavetype:
-			mr.		r20,r20							; (TEST/DEBUG) Chain done?
-			beqlr-									; (TEST/DEBUG) Yes...
-			
-			lwz		r23,SAVflags(r20)				; (TEST/DEBUG) Get the flags
-			rlwinm	r23,r23,24,24,31				; (TEST/DEBUG) Position it
-			cmplw	r23,r21							; (TEST/DEBUG) Are we the correct type?
-			beq+	versvok							; (TEST/DEBUG) This one is ok...
-			
-			lis		r22,hi16(EXT(DebugWork))		; (TEST/DEBUG)
-			ori		r22,r22,lo16(EXT(DebugWork))	; (TEST/DEBUG)
-			stw		r22,0(r22)						; (TEST/DEBUG) Lock out more checks
-			BREAKPOINT_TRAP							; (TEST/DEBUG) Get into debugger
-			
-versvok:	lwz		r20,SAVprev+4(r20)				; (TEST/DEBUG) Get the previous one
-			b		versavetype						; (TEST/DEBUG) Go check its type...
-#endif
-
-
-#endif	
diff --git a/osfmk/ppc/hw_lock.s b/osfmk/ppc/hw_lock.s
deleted file mode 100644
index 880bbf6ef..000000000
--- a/osfmk/ppc/hw_lock.s
+++ /dev/null
@@ -1,2187 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <mach_assert.h>
-#include <mach_ldebug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-
-
-#include <config_dtrace.h>
-#if	CONFIG_DTRACE
-	#define	LOCKSTAT_LABEL(lab) \
-	.data		__ASMNL__	\
-	.globl	lab	__ASMNL__	\
-	lab:		__ASMNL__	\
-	.long 9f	__ASMNL__	\
-	.text		__ASMNL__	\
-	9:		__ASMNL__	\
-
-	.globl	_dtrace_probe, _lockstat_probemap
-#define		LOCKSTAT_RECORD(id)			\
-			lis	r6,hi16(_lockstat_probemap)		__ASMNL__	\
-			ori	r6,r6,lo16(_lockstat_probemap)		__ASMNL__	\
-			lwz	r5,4*id(r6)				__ASMNL__	\
-			mr.	r5,r5					__ASMNL__	\
-			beqlr--						__ASMNL__	\
-			mr	r4,r3					__ASMNL__	\
-			mr	r3,r5					__ASMNL__	\
-			li	r5,0					__ASMNL__	\
-			li	r6,0					__ASMNL__	\
-			li	r7,0					__ASMNL__	\
-			li	r8,0					__ASMNL__	\
-			PROLOG(0)					__ASMNL__	\
-			bl	_dtrace_probe				__ASMNL__	\
-			EPILOG
-#endif
-			
-
-
-#define	STRING	ascii
-
-#define	ILK_LOCKED		0x01
-#define	WAIT_FLAG		0x02
-#define	WANT_UPGRADE	0x04
-#define	WANT_EXCL		0x08
-#define	PRIV_EXCL		0x8000
-
-#define TH_FN_OWNED		0x01
-
-# volatile CR bits
-#define hwtimeout	20
-#define mlckmiss	21
-
-#define	RW_DATA		0
-
-#define PROLOG(space)														\
-			stwu	r1,-(FM_ALIGN(space)+FM_SIZE)(r1)			__ASMNL__	\
-			mfcr	r2											__ASMNL__	\
-			mflr	r0											__ASMNL__	\
-			stw		r3,FM_ARG0(r1)								__ASMNL__	\
-			stw		r11,FM_ARG0+0x04(r1)						__ASMNL__	\
-			stw		r2,(FM_ALIGN(space)+FM_SIZE+FM_CR_SAVE)(r1)	__ASMNL__	\
-			stw		r0,(FM_ALIGN(space)+FM_SIZE+FM_LR_SAVE)(r1)	__ASMNL__
-	
-#define EPILOG																 	\
-			lwz		r1,0(r1)										__ASMNL__	\
-			lwz		r0,FM_LR_SAVE(r1)								__ASMNL__	\
-			mtlr	r0												__ASMNL__
-
-/*
- *		void hw_lock_init(hw_lock_t)
- *
- *			Initialize a hardware lock.
- */
-			.align	5
-			.globl	EXT(hw_lock_init)
-
-LEXT(hw_lock_init)
-
-			li	r0,	0								; set lock to free == 0 
-			stw	r0,	0(r3)							; Initialize the lock 
-			blr
-	
-/*
- *		unsigned int hw_lock_bit(hw_lock_t, unsigned int bit, unsigned int timeout)
- *
- *			Try to acquire spin-lock. The second parameter is the bit mask to test and set.
- *			multiple bits may be set. Return success (1) or failure (0).
- *			Attempt will fail after timeout ticks of the timebase.
- */
-			.align	5
-			.globl	EXT(hw_lock_bit)
-
-LEXT(hw_lock_bit)
-
-			crset	hwtimeout						; timeout option
-			mr		r12,r4							; Load bit mask
-			mr		r4,r5							; Load timeout value
-			b		lckcomm							; Join on up...
-
-/*
- *      void hw_lock_lock(hw_lock_t)
- *
- *			Acquire lock, spinning until it becomes available.
- *			Return with preemption disabled.
- *			We will just set a default timeout and jump into the NORMAL timeout lock.
- */
-			.align	5
-			.globl	EXT(hw_lock_lock)
-
-LEXT(hw_lock_lock)
-			crclr	hwtimeout						; no timeout option
-			li		r4,0							; request default timeout value
-			li		r12,ILK_LOCKED					; Load bit mask
-			b		lckcomm							; Join on up...
-
-lockDisa:
-			crset	hwtimeout						; timeout option
-			li		r4,0							; request default timeout value
-			li		r12,ILK_LOCKED					; Load bit mask
-			b		lckcomm							; Join on up...
-
-/*
- *		unsigned int hw_lock_to(hw_lock_t, unsigned int timeout)
- *
- *			Try to acquire spin-lock. Return success (1) or failure (0).
- *			Attempt will fail after timeout ticks of the timebase.
- *			We try fairly hard to get this lock.  We disable for interruptions, but
- *			reenable after a "short" timeout (128 ticks, we may want to change this).
- *			After checking to see if the large timeout value (passed in) has expired and a
- *			sufficient number of cycles have gone by (to insure pending 'rupts are taken),
- *			we return either in abject failure, or disable and go back to the lock sniff routine.
- *			If the sniffer finds the lock free, it jumps right up and tries to grab it.
- */
-			.align	5
-			.globl	EXT(hw_lock_to)
-
-LEXT(hw_lock_to)
-			crset	hwtimeout						; timeout option
-			li		r12,ILK_LOCKED					; Load bit mask
-lckcomm:
-			mfsprg	r6,1							; Get the current activation 
-			lwz		r5,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			addi	r5,r5,1							; Bring up the disable count
-			stw		r5,ACT_PREEMPT_CNT(r6)			; Save it back 
-			mr		r5,r3							; Get the address of the lock
-			li		r8,0							; Set r8 to zero
-
-lcktry:		lwarx	r6,0,r5							; Grab the lock value
-			and.	r3,r6,r12						; Is it locked?
-			or		r6,r6,r12						; Set interlock 
-			bne--	lckspin							; Yeah, wait for it to clear...
-			stwcx.	r6,0,r5							; Try to seize that there durn lock
-			bne--	lcktry							; Couldn't get it...
-			li		r3,1							; return true 
-			.globl  EXT(hwllckPatch_isync)
-LEXT(hwllckPatch_isync)   
-			isync									; Make sure we don't use a speculativily loaded value
-			blr										; Go on home...
-
-lckspin:	li		r6,lgKillResv					; Get killing field	
-			stwcx.	r6,0,r6							; Kill reservation
-			
-			mr.		r4,r4							; Test timeout value
-			bne++	lockspin0
-			lis		r4,hi16(EXT(LockTimeOut))		; Get the high part 
-			ori		r4,r4,lo16(EXT(LockTimeOut))	; And the low part
-			lwz		r4,0(r4)						; Get the timeout value
-lockspin0:
-			mr.		r8,r8							; Is r8 set to zero
-			bne++	lockspin1						; If yes, first spin attempt
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-			mftb	r8								; Get timestamp on entry
-			b		lcksniff
-
-lockspin1:	mtmsr	r7								; Turn off interruptions 
-			mftb	r8								; Get timestamp on entry
-
-lcksniff:	lwz		r3,0(r5)						; Get that lock in here
-			and.	r3,r3,r12						; Is it free yet?
-			beq++	lckretry						; Yeah, try for it again...
-			
-			mftb	r10								; Time stamp us now
-			sub		r10,r10,r8						; Get the elapsed time
-			cmplwi	r10,128							; Have we been spinning for 128 tb ticks?
-			blt++	lcksniff						; Not yet...
-			
-			mtmsr	r9								; Say, any interrupts pending?
-
-;			The following instructions force the pipeline to be interlocked to that only one
-;			instruction is issued per cycle.  The insures that we stay enabled for a long enough
-;			time; if it's too short, pending interruptions will not have a chance to be taken
-
-			subi	r4,r4,128						; Back off elapsed time from timeout value
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			mr.		r4,r4							; See if we used the whole timeout
-			li		r3,0							; Assume a timeout return code
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			
-			ble--	lckfail							; We failed
-			b		lockspin1						; Now that we've opened an enable window, keep trying...
-lckretry:
-			mtmsr	r9								; Restore interrupt state
-			li		r8,1							; Insure that R8 is not 0
-			b		lcktry
-lckfail:											; We couldn't get the lock
-			bf		hwtimeout,lckpanic
-			li		r3,0							; Set failure return code
-			blr										; Return, head hanging low...
-lckpanic:
-			mr		r4,r5
-			mr		r5,r3
-			lis		r3,hi16(lckpanic_str)			; Get the failed lck message
-			ori		r3,r3,lo16(lckpanic_str)		; Get the failed lck message
-			bl		EXT(panic)
-			BREAKPOINT_TRAP							; We die here anyway
-			.data
-lckpanic_str:
-			STRINGD	"timeout on attempt to acquire lock (0x%08X), value = 0x%08X\n\000"
-			.text
-
-/*
- *      void hw_lock_unlock(hw_lock_t)
- *
- *      Unconditionally release lock.
- *      Release preemption level.
- */
-			.align	5
-			.globl	EXT(hw_lock_unlock)
-
-LEXT(hw_lock_unlock)
-
-			.globl  EXT(hwulckPatch_isync)
-LEXT(hwulckPatch_isync)   
-			isync 
-			.globl  EXT(hwulckPatch_eieio)
-LEXT(hwulckPatch_eieio)
-			eieio
-			li	r0,	0								; set lock to free
-			stw	r0,	0(r3)
-
-			b		epStart							; Go enable preemption...
-
-/*
- *		unsigned int hw_unlock_bit(hw_lock_t, unsigned int bit)
- *
- *			Release bit based spin-lock. The second parameter is the bit mask to clear.
- *			Multiple bits may be cleared.
- *
- */
-			.align	5
-			.globl	EXT(hw_unlock_bit)
-
-LEXT(hw_unlock_bit)
-
-			.globl  EXT(hwulckbPatch_isync)
-LEXT(hwulckbPatch_isync)   
-			isync 
-			.globl  EXT(hwulckbPatch_eieio)
-LEXT(hwulckbPatch_eieio)
-			eieio
-ubittry:	lwarx	r0,0,r3							; Grab the lock value
-			andc	r0,r0,r4						; Clear the lock bits
-			stwcx.	r0,0,r3							; Try to clear that there durn lock
-			bne-	ubittry							; Try again, couldn't save it...
-
-			b		epStart							; Go enable preemption...
-
-/*
- *		unsigned int hw_lock_mbits(hw_lock_t, unsigned int bits, unsigned int value, 
- *			unsigned int newb, unsigned int timeout)
- *
- *			Try to acquire spin-lock. The second parameter is the bit mask to check.
- *			The third is the value of those bits and the 4th is what to set them to.
- *			Return success (1) or failure (0).
- *			Attempt will fail after timeout ticks of the timebase.
- *			We try fairly hard to get this lock.  We disable for interruptions, but
- *			reenable after a "short" timeout (128 ticks, we may want to shorten this).
- *			After checking to see if the large timeout value (passed in) has expired and a
- *			sufficient number of cycles have gone by (to insure pending 'rupts are taken),
- *			we return either in abject failure, or disable and go back to the lock sniff routine.
- *			If the sniffer finds the lock free, it jumps right up and tries to grab it.
- */
-			.align	5
-			.globl	EXT(hw_lock_mbits)
-
-LEXT(hw_lock_mbits)
-
-			li		r10,0			
-
-mbittry:	lwarx	r12,0,r3						; Grab the lock value
-			and		r0,r12,r4						; Clear extra bits
-			andc	r12,r12,r4						; Clear all bits in the bit mask
-			or		r12,r12,r6						; Turn on the lock bits
-			cmplw	r0,r5							; Are these the right bits?
-			bne--	mbitspin						; Nope, wait for it to clear...
-			stwcx.	r12,0,r3						; Try to seize that there durn lock
-			beq++	mbitgot							; We got it, yahoo...
-			b		mbittry							; Just start up again if the store failed...
-
-			.align	5
-mbitspin:	li		r11,lgKillResv					; Point to killing field
-			stwcx.	r11,0,r11						; Kill it
-			
-			mr.		r10,r10							; Is r10 set to zero
-			bne++	mbitspin0						; If yes, first spin attempt
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r8,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r8,r9,r8						; Clear EE as well
-			mtmsr	r8								; Turn off interruptions
-			isync									; May have turned off vectors or float here
-			mftb	r10								; Get the low part of the time base
-			b		mbitsniff
-mbitspin0:
-			mtmsr	r8								; Turn off interruptions
-			mftb	r10								; Get the low part of the time base
-mbitsniff:
-			lwz		r12,0(r3)						; Get that lock in here
-			and		r0,r12,r4						; Clear extra bits
-			cmplw	r0,r5							; Are these the right bits?
-			beq++	mbitretry						; Yeah, try for it again...
-			
-			mftb	r11								; Time stamp us now
-			sub		r11,r11,r10						; Get the elapsed time
-			cmplwi	r11,128							; Have we been spinning for 128 tb ticks?
-			blt++	mbitsniff						; Not yet...
-			
-			mtmsr	r9								; Say, any interrupts pending?			
-
-;			The following instructions force the pipeline to be interlocked to that only one
-;			instruction is issued per cycle.  The insures that we stay enabled for a long enough
-;			time. If it is too short, pending interruptions will not have a chance to be taken 
-			
-			subi	r7,r7,128						; Back off elapsed time from timeout value
-			or		r7,r7,r7						; Do nothing here but force a single cycle delay
-			mr.		r7,r7							; See if we used the whole timeout
-			or		r7,r7,r7						; Do nothing here but force a single cycle delay
-			
-			ble--	mbitfail						; We failed
-			b		mbitspin0						; Now that we have opened an enable window, keep trying...
-mbitretry:
-			mtmsr	r9								; Enable for interruptions
-			li		r10,1							; Make sure this is non-zero
-			b		mbittry
-
-			.align	5
-mbitgot:	
-			li		r3,1							; Set good return code
-			.globl  EXT(hwlmlckPatch_isync)
-LEXT(hwlmlckPatch_isync)   
-			isync									; Make sure we do not use a speculativily loaded value
-			blr
-
-mbitfail:	li		r3,0							; Set failure return code
-			blr										; Return, head hanging low...
-
-/*
- *      unsigned int hw_cpu_sync(unsigned int *, unsigned int timeout)
- *
- *			Spin until word hits 0 or timeout. 
- *			Return success (1) or failure (0).
- *			Attempt will fail after timeout ticks of the timebase.
- *
- *			The theory is that a processor will bump a counter as it signals
- *			other processors.  Then it will spin untl the counter hits 0 (or
- *			times out).  The other processors, as it receives the signal will 
- *			decrement the counter.
- *
- *			The other processors use interlocked update to decrement, this one
- *			does not need to interlock.
- */
-			.align	5
-			.globl	EXT(hw_cpu_sync)
-
-LEXT(hw_cpu_sync)
-
-			mftb	r10								; Get the low part of the time base
-			mr		r9,r3							; Save the sync word address
-			li		r3,1							; Assume we work
-
-csynctry:	lwz		r11,0(r9)						; Grab the sync value
-			mr.		r11,r11							; Counter hit 0?
-			beqlr-									; Yeah, we are sunk...
-			mftb	r12								; Time stamp us now
-
-			sub		r12,r12,r10						; Get the elapsed time
-			cmplw	r4,r12							; Have we gone too long?
-			bge+	csynctry						; Not yet...
-			
-			li		r3,0							; Set failure...
-			blr										; Return, head hanging low...
-
-/*
- *      unsigned int hw_cpu_wcng(unsigned int *, unsigned int, unsigned int timeout)
- *
- *			Spin until word changes or timeout. 
- *			Return success (1) or failure (0).
- *			Attempt will fail after timeout ticks of the timebase.
- *
- *			This is used to insure that a processor passes a certain point.
- *			An example of use is to monitor the last interrupt time in the 
- *			per_proc block.  This can be used to insure that the other processor
- *			has seen at least one interrupt since a specific time.
- */
-			.align	5
-			.globl	EXT(hw_cpu_wcng)
-
-LEXT(hw_cpu_wcng)
-
-			mftb	r10								; Get the low part of the time base
-			mr		r9,r3							; Save the sync word address
-			li		r3,1							; Assume we work
-
-wcngtry:	lwz		r11,0(r9)						; Grab the  value
-			cmplw	r11,r4							; Do they still match?
-			bnelr-									; Nope, cool...
-			mftb	r12								; Time stamp us now
-
-			sub		r12,r12,r10						; Get the elapsed time
-			cmplw	r5,r12							; Have we gone too long?
-			bge+	wcngtry							; Not yet...
-			
-			li		r3,0							; Set failure...
-			blr										; Return, head hanging low...
-			
-
-/*
- *		unsigned int hw_lock_try(hw_lock_t)
- *
- *			Try to acquire spin-lock. Return success (1) or failure (0)
- *			Returns with preemption disabled on success.
- *
- */
-			.align	5
-			.globl	EXT(hw_lock_try)
-
-LEXT(hw_lock_try)
-
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value 
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-
-			mtmsr	r7								; Disable interruptions and thus, preemption
-
-			lwz		r5,0(r3)						; Quick load
-			andi.	r6,r5,ILK_LOCKED				; TEST...
-			bne--	.L_lock_try_failed				; No go...
-
-.L_lock_try_loop:	
-			lwarx	r5,0,r3							; Ld from addr of arg and reserve
-
-			andi.	r6,r5,ILK_LOCKED				; TEST...
-			ori		r5,r5,ILK_LOCKED
-			bne--	.L_lock_try_failedX				; branch if taken. Predict free 
-	
-			stwcx.	r5,0,r3							; And SET (if still reserved)
-			bne--	.L_lock_try_loop				; If set failed, loop back 
-			
-			.globl  EXT(hwltlckPatch_isync)
-LEXT(hwltlckPatch_isync)   
-			isync
-
-			mfsprg	r6,1							; Get current activation 
-			lwz		r5,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			addi	r5,r5,1							; Bring up the disable count 
-			stw		r5,ACT_PREEMPT_CNT(r6)			; Save it back
-
- 			mtmsr	r9								; Allow interruptions now 
-			li		r3,1							; Set that the lock was free 
-			blr
-
-.L_lock_try_failedX:
- 			li		r6,lgKillResv					; Killing field
- 			stwcx.	r6,0,r6							; Kill reservation
- 			
-.L_lock_try_failed:
- 			mtmsr	r9								; Allow interruptions now 
-			li		r3,0							; FAILURE - lock was taken 
-			blr
-
-/*
- *		unsigned int hw_lock_held(hw_lock_t)
- *
- *			Return 1 if lock is held
- *			Doesn't change preemption state.
- *			N.B.  Racy, of course.
- */
-			.align	5
-			.globl	EXT(hw_lock_held)
-
-LEXT(hw_lock_held)
-
-			isync									; Make sure we don't use a speculativily fetched lock 
-			lwz		r3, 0(r3)						; Get lock value 
-			andi.	r6,r3,ILK_LOCKED				; Extract the ILK_LOCKED bit
-			blr
-
-/*
- *		uint32_t hw_compare_and_store(uint32_t oldval, uint32_t newval, uint32_t *dest)
- *
- *			Compare old to area if equal, store new, and return true
- *			else return false and no store
- *			This is an atomic operation
- */
-			.align	5
-			.globl	EXT(hw_compare_and_store)
-			.globl	EXT(OSCompareAndSwap)
-			.globl	EXT(OSCompareAndSwapPtr)
-
-LEXT(hw_compare_and_store)
-LEXT(OSCompareAndSwap)
-LEXT(OSCompareAndSwapPtr)
-
-			mr		r6,r3							; Save the old value
-
-cstry:		lwarx	r9,0,r5							; Grab the area value
-			li		r3,1							; Assume it works
-			cmplw	cr0,r9,r6						; Does it match the old value?
-			bne--	csfail							; No, it must have changed...
-			stwcx.	r4,0,r5							; Try to save the new value
-			bne--	cstry							; Didn't get it, try again...
-			.globl  EXT(hwcsatomicPatch_isync)
-LEXT(hwcsatomicPatch_isync)   
-			isync									; Just hold up prefetch
-			blr										; Return...
-			
-csfail:		li		r3,lgKillResv					; Killing field
-			stwcx.	r3,0,r3							; Blow reservation
-			
-			li		r3,0							; Set failure
-			blr										; Better luck next time...
-
-
-/*
- *		uint32_t hw_atomic_add(uint32_t *dest, uint32_t delt)
- *
- *			Atomically add the second parameter to the first.
- *			Returns the result.
- *
- */
-			.align	5
-			.globl	EXT(hw_atomic_add)
-
-LEXT(hw_atomic_add)
-
-			mr		r6,r3							; Save the area
-
-addtry:		lwarx	r3,0,r6							; Grab the area value
-			add		r3,r3,r4						; Add the value
-			stwcx.	r3,0,r6							; Try to save the new value
-			bne--	addtry							; Didn't get it, try again...
-			blr										; Return...
-
-
-/*
- *		uint32_t hw_atomic_sub(uint32_t *dest, uint32_t delt)
- *
- *			Atomically subtract the second parameter from the first.
- *			Returns the result.
- *
- */
-			.align	5
-			.globl	EXT(hw_atomic_sub)
-
-LEXT(hw_atomic_sub)
-
-			mr		r6,r3							; Save the area
-
-subtry:		lwarx	r3,0,r6							; Grab the area value
-			sub		r3,r3,r4						; Subtract the value
-			stwcx.	r3,0,r6							; Try to save the new value
-			bne--	subtry							; Didn't get it, try again...
-			blr										; Return...
-
-
-/*
- *		uint32_t hw_atomic_or(uint32_t *dest, uint32_t mask)
- *
- *			Atomically ORs the second parameter into the first.
- *			Returns the result.
- */
-			.align	5
-			.globl	EXT(hw_atomic_or)
-LEXT(hw_atomic_or)
-			.globl	EXT(hw_atomic_or_noret)
-LEXT(hw_atomic_or_noret)
-			mr		r6,r3							; Save the area 		
-
-ortry:		lwarx	r3,0,r6							; Grab the area value
-			or		r3,r3,r4						; OR the value 
-			stwcx.	r3,0,r6							; Try to save the new value
-			bne--	ortry							; Did not get it, try again...
-			blr										; Return...
-
-
-/*
- *		uint32_t hw_atomic_and(uint32_t *dest, uint32_t mask)
- *
- *			Atomically ANDs the second parameter with the first.
- *			Returns the result.
- *
- */
-			.align	5
-			.globl	EXT(hw_atomic_and)
-LEXT(hw_atomic_and)
-			.globl	EXT(hw_atomic_and_noret)
-LEXT(hw_atomic_and_noret)
-			mr		r6,r3							; Save the area 		
-
-andtry:		lwarx	r3,0,r6							; Grab the area value
-			and		r3,r3,r4						; AND the value 
-			stwcx.	r3,0,r6							; Try to save the new value
-			bne--	andtry							; Did not get it, try again...
-			blr										; Return...
-
-
-/*
- *		void hw_queue_atomic(unsigned int * anchor, unsigned int * elem, unsigned int disp)
- *
- *			Atomically inserts the element at the head of the list
- *			anchor is the pointer to the first element
- *			element is the pointer to the element to insert
- *			disp is the displacement into the element to the chain pointer
- */
-			.align	5
-			.globl	EXT(hw_queue_atomic)
-			.globl	EXT(OSEnqueueAtomic)
-
-LEXT(hw_queue_atomic)
-LEXT(OSEnqueueAtomic)
-
-			mr		r7,r4							; Make end point the same as start
-			mr		r8,r5							; Copy the displacement also
-			b		hw_queue_comm					; Join common code...
-
-/*
- *		void hw_queue_atomic_list(unsigned int * anchor, unsigned int * first, unsigned int * last, unsigned int disp)
- *
- *			Atomically inserts the list of elements at the head of the list
- *			anchor is the pointer to the first element
- *			first is the pointer to the first element to insert
- *			last is the pointer to the last element to insert
- *			disp is the displacement into the element to the chain pointer
- */
-			.align	5
-			.globl	EXT(hw_queue_atomic_list)
-
-LEXT(hw_queue_atomic_list)
-
-			mr		r7,r5							; Make end point the same as start
-			mr		r8,r6							; Copy the displacement also
-
-hw_queue_comm:
-			lwarx	r9,0,r3							; Pick up the anchor
-			stwx	r9,r8,r7						; Chain that to the end of the new stuff
-			eieio									; Make sure this store makes it before the anchor update
-			stwcx.	r4,0,r3							; Try to chain into the front
-			bne--	hw_queue_comm					; Didn't make it, try again...
-
-			blr										; Return...
-
-/*
- *		unsigned int *hw_dequeue_atomic(unsigned int *anchor, unsigned int disp)
- *
- *			Atomically removes the first element in a list and returns it.
- *			anchor is the pointer to the first element
- *			disp is the displacement into the element to the chain pointer
- *			Returns element if found, 0 if empty.
- */
-			.align	5
-			.globl	EXT(hw_dequeue_atomic)
-			.globl	EXT(OSDequeueAtomic)
-
-LEXT(hw_dequeue_atomic)
-LEXT(OSDequeueAtomic)
-
-			mr		r5,r3							; Save the anchor
-
-hw_dequeue_comm:
-			lwarx	r3,0,r5							; Pick up the anchor
-			mr.		r3,r3							; Is the list empty?
-			beq--	hdcFail							; Leave it list empty...
-			lwzx	r9,r4,r3						; Get the next in line
-			stwcx.	r9,0,r5							; Try to chain into the front
-			beqlr++									; Got the thing, go away with it...
-			b		hw_dequeue_comm					; Did not make it, try again...
-
-hdcFail:	li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Dump reservation
-			blr										; Leave...
-
-
-/*
- * Routines for mutex lock debugging.
- */
-
-/* 
- * Gets lock check flags in CR6: CR bits 24-27
- */
-#define CHECK_SETUP(rg)											\
-			lbz		rg,lglcksWork(0)				__ASMNL__ 	\
-			mtcrf	2,rg							__ASMNL__ 
-
-
-/*
- * Checks for expected lock type.
- */
-#define	CHECK_MUTEX_TYPE()										\
-			bf		MUTEX_ATTR_DEBUGb,1f			__ASMNL__	\
-			bt		24+disLktypeb,1f				__ASMNL__ 	\
-			lwz		r10,MUTEX_TYPE(r3)				__ASMNL__ 	\
-			cmpwi	r10,MUTEX_TAG					__ASMNL__	\
-			beq++	1f								__ASMNL__	\
-			PROLOG(0)								__ASMNL__	\
-			mr		r4,r11							__ASMNL__	\
-			mr		r5,r10							__ASMNL__	\
-			lis		r3,hi16(not_a_mutex)			__ASMNL__	\
-			ori		r3,r3,lo16(not_a_mutex)			__ASMNL__	\
-			bl		EXT(panic)						__ASMNL__	\
-			BREAKPOINT_TRAP							__ASMNL__	\
-1:
-
-	.data
-not_a_mutex:
-			STRINGD	"mutex (0x%08X) not a mutex type (0x%08X)\n\000"
-			.text
-
-/* 
- * Verifies return to the correct thread in "unlock" situations.
- */
-#define CHECK_THREAD(thread_offset)								\
-			bf		MUTEX_ATTR_DEBUGb,3f			__ASMNL__	\
-			bt		24+disLkThreadb,3f				__ASMNL__ 	\
-			mfsprg	r10,1							__ASMNL__	\
-			lwz		r5,MUTEX_DATA(r3)				__ASMNL__	\
-			rlwinm.	r9,r5,0,0,29					__ASMNL__	\
-			bne++	1f								__ASMNL__	\
-			lis		r3,hi16(not_held) 				__ASMNL__	\
-			ori		r3,r3,lo16(not_held)			__ASMNL__ 	\
-			b		2f								__ASMNL__	\
-1:													__ASMNL__	\
-			cmpw	r9,r10	 						__ASMNL__ 	\
-			beq++	3f 								__ASMNL__ 	\
-			mr		r5,r10							__ASMNL__	\
-			mr		r6,r9							__ASMNL__	\
-			lis		r3,hi16(wrong_thread) 			__ASMNL__	\
-			ori		r3,r3,lo16(wrong_thread)		__ASMNL__ 	\
-2:													__ASMNL__	\
-			mr		r4,r11							__ASMNL__	\
-			PROLOG(0)								__ASMNL__	\
-			bl		EXT(panic)	 					__ASMNL__ 	\
-			BREAKPOINT_TRAP							__ASMNL__	\
-3:
-
-	.data
-not_held:
-	STRINGD	"mutex (0x%08X) not held\n\000"
-wrong_thread:
-	STRINGD	"mutex (0x%08X) unlocked by non-owner(0x%08X), current owner(0x%08X)\n\000"
-	.text
-
-#define CHECK_MYLOCK()											\
-			bf		MUTEX_ATTR_DEBUGb,1f			__ASMNL__	\
-			bt		24+disLkMyLckb,1f				__ASMNL__ 	\
-			mfsprg	r10,1							__ASMNL__	\
-			lwz		r9,MUTEX_DATA(r3)				__ASMNL__	\
-			rlwinm	r9,r9,0,0,29					__ASMNL__	\
-			cmpw	r9,r10	 						__ASMNL__	\
-			bne++	1f 								__ASMNL__	\
-			mr		r4,r11							__ASMNL__	\
-			lis		r3,	hi16(mylock_attempt)		__ASMNL__	\
-			ori		r3,r3,lo16(mylock_attempt)		__ASMNL__	\
-			bl		EXT(panic)	 					__ASMNL__	\
-			BREAKPOINT_TRAP							__ASMNL__	\
-1:	
-	
-	.data
-mylock_attempt:
-	STRINGD	"mutex (0x%08X) recursive lock attempt\n\000"
-	.text
-
-#define	LCK_STACK(lck, stack, lck_stack, frame_cnt, lr_save, tmp)		\
-			bf		24+enaLkExtStckb,3f				__ASMNL__ 	\
-			addi	lck_stack,lck,MUTEX_STACK		__ASMNL__	\
-			li		frame_cnt,MUTEX_FRAMES-1		__ASMNL__	\
-1:													__ASMNL__	\
-			mr		tmp,stack						__ASMNL__	\
-			lwz		stack,0(stack)					__ASMNL__	\
-			xor		tmp,stack,tmp					__ASMNL__	\
-			cmplwi	tmp,8192						__ASMNL__	\
-			bge--	2f								__ASMNL__	\
-			lwz		lr_save,FM_LR_SAVE(stack)		__ASMNL__	\
-			stwu	lr_save,4(lck_stack)			__ASMNL__	\
-			subi	frame_cnt,frame_cnt,1			__ASMNL__	\
-			cmpi	cr0,frame_cnt,0					__ASMNL__	\
-			bne		1b								__ASMNL__	\
-			b		3f								__ASMNL__	\
-2:													__ASMNL__	\
-			li		tmp,0							__ASMNL__	\
-			stwu	tmp,4(lck_stack)				__ASMNL__	\
-			subi	frame_cnt,frame_cnt,1			__ASMNL__	\
-			cmpi	cr0,frame_cnt,0					__ASMNL__	\
-			bne		2b								__ASMNL__	\
-3:	
-
-			.align	5
-			mr		r11,r3							; Save lock addr
-mlckeEnter:
-			lwz		r0,MUTEX_ATTR(r3)
-			mtcrf	1,r0							; Set cr7
-			CHECK_SETUP(r12)	
-			CHECK_MUTEX_TYPE()
-
-			bf		MUTEX_ATTR_DEBUGb,L_mtx_lock_assert_wait_2
-			PROLOG(0)
-			bl		EXT(assert_wait_possible)
-			mr.		r3,r3
-			bne		L_mtx_lock_assert_wait_1
-			lis		r3,hi16(L_mtx_lock_assert_wait_panic_str)
-			ori		r3,r3,lo16(L_mtx_lock_assert_wait_panic_str)
-			bl		EXT(panic)
-			BREAKPOINT_TRAP							; We die here anyway
-
-			.data
-L_mtx_lock_assert_wait_panic_str:
-			STRINGD "mutex lock attempt with  assert_wait_possible false\n\000" 
-			.text
-
-L_mtx_lock_assert_wait_1:
-			lwz		r3,FM_ARG0(r1)
-			lwz		r11,FM_ARG0+0x04(r1)
-			lwz		r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1)
-			mtcr	r2
-			EPILOG
-L_mtx_lock_assert_wait_2:
-
-			mfsprg	r6,1							; load the current thread
-			bf		MUTEX_ATTR_STATb,mlckestatskip	; Branch if no stat
-			lwz		r5,MUTEX_GRP(r3)				; Load lock group
-			li		r7,GRP_MTX_STAT_UTIL+4			; Set stat util offset
-mlckestatloop:
-			lwarx	r8,r7,r5						; Load stat util cnt
-			addi	r8,r8,1							; Increment stat util cnt
-			stwcx.	r8,r7,r5						; Store stat util cnt
-			bne--	mlckestatloop					; Retry if failed
-			mr.		r8,r8							; Test for zero
-			bne++	mlckestatskip					; Did stat util cnt wrapped?
-			lwz		r8,GRP_MTX_STAT_UTIL(r5)		; Load upper stat util cnt
-			addi	r8,r8,1							; Increment upper stat util cnt
-			stw		r8,GRP_MTX_STAT_UTIL(r5)		; Store upper stat util cnt
-mlckestatskip:
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock quickly
-			li		r4,0
-			li		r8,0
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-			mr.		r5,r5							; Quick check
-			bne--	mlckespin01						; Can not get it right now...
-
-mlcketry:
-			lwarx	r5,MUTEX_DATA,r3				; load the mutex lock
-			mr.		r5,r5
-			bne--	mlckespin0						; Can not get it right now...
-			stwcx.	r6,MUTEX_DATA,r3				; grab the lock
-			bne--	mlcketry						; loop back if failed
-			.globl	EXT(mlckePatch_isync)
-LEXT(mlckePatch_isync)
-			isync									; stop prefeteching
-			mflr	r12
-			bf		MUTEX_ATTR_DEBUGb,mlckedebskip
-			mr		r8,r6							; Get the active thread
-			stw		r12,MUTEX_STACK(r3)				; Save our caller
-			stw		r8,MUTEX_THREAD(r3)				; Set the mutex's holding thread
-			mr		r5,r1
-			LCK_STACK(r3,r5,r6,r7,r8,r10)
-mlckedebskip:
-			mtmsr	r9								; Say, any interrupts pending?
-			blr
-
-mlckespin0:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Kill reservation
-mlckespin01:
-			mflr	r12
-			mtmsr	r9								; Say, any interrupts pending?
-			bl		mlckspin1	
-			mtmsr	r7								; Turn off interruptions, vec and fp off already
-			mtlr	r12
-			b		mlcketry
-
-/*
- *		void lck_mtx_lock(lck_mtx_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_lock)
-LEXT(lck_mtx_lock)
-
-			mfsprg	r6,1							; load the current thread
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock quickly
-			mr		r11,r3							; Save lock addr
-			li		r4,0
-			li		r8,0
-			li		r9,0
-			mr.		r5,r5							; Quick check
-			bne--	mlckspin00						; Indirect or Can not get it right now...
-
-mlcktry:
-			lwarx	r5,MUTEX_DATA,r3				; load the mutex lock
-			mr.		r5,r5
-			bne--	mlckspin01						; Can not get it right now...
-			stwcx.	r6,MUTEX_DATA,r3				; grab the lock
-			bne--	mlcktry							; loop back if failed
-			.globl	EXT(mlckPatch_isync)
-LEXT(mlckPatch_isync)
-			isync									; stop prefeteching
-			blr
-; Need to debug making blr above a patch point and record:
-;			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE)
-
-mlckspin00:
-			cmpli	cr0,r5,MUTEX_IND				; Is it a mutex indirect 
-			bne--	mlckspin02						; No, go handle contention 
-			lwz		r3,MUTEX_PTR(r3)				; load mutex ext pointer
-			b		mlckeEnter
-mlckspin01:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Kill reservation
-mlckspin02:
-			mflr	r12
-			li		r0,0
-			mtcrf	1,r0							; Set cr7 to zero
-			bl		mlckspin1
-			mtlr	r12
-			b		mlcktry
-
-
-mlckspin1:
-			mr.		r4,r4							; Test timeout value
-			bne++	mlckspin2
-			lis		r4,hi16(EXT(MutexSpin))			; Get the high part 
-			ori		r4,r4,lo16(EXT(MutexSpin)	)	; And the low part
-			lwz		r4,0(r4)						; Get spin timerout value
-			mr.		r4,r4							; Test spin timeout value
-			bne++	mlckspin2						; Is spin timeout requested
-			crclr	mlckmiss						; Clear miss test
-			b		mlckslow1						; Don't try to spin
-
-mlckspin2:	mr.		r8,r8							; Is r8 set to zero
-			bne++	mlckspin3						; If yes, first spin attempt
-			crclr	mlckmiss						; Clear miss test
-			mr.		r9,r9							; Is r9 set to zero
-			bne++	mlckspin3						; If yes, r9 set with  msr value
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-			mftb	r8								; Get timestamp on entry
-			b		mlcksniff
-
-mlckspin3:	mtmsr	r7								; Turn off interruptions 
-			mftb	r8								; Get timestamp on entry
-
-mlcksniff:	lwz		r5,MUTEX_DATA(r3)				; Get that lock in here
-			mr.		r5,r5							; Is the lock held
-			beq++	mlckretry						; No, try for it again...
-			rlwinm.	r10,r5,0,0,29					; Extract the lock owner
-			beq++	mlckslow0						; InterLock is held
-			bf		MUTEX_ATTR_STATb,mlStatSkip		; Branch if no stat
-			andi.	r5,r5,ILK_LOCKED				; extract interlocked?
-			bne		mlStatSkip						; yes, skip
-			bt		mlckmiss,mlStatSkip				; miss already counted
-			crset	mlckmiss						; Remember miss recorded
-			lwz		r5,MUTEX_GRP(r3)				; Load lock group
-			addi	r5,r5,GRP_MTX_STAT_MISS+4			; Add stat miss offset
-mlStatLoop:
-			lwarx	r6,0,r5							; Load stat miss cnt
-			addi	r6,r6,1							; Increment stat miss cnt
-			stwcx.	r6,0,r5							; Update stat miss cnt
-			bne--	mlStatLoop						; Retry if failed
-			mfsprg	r6,1							; Reload current thread
-mlStatSkip:
-			lwz		r2,ACT_MACT_SPF(r10)			; Get the special flags
-			rlwinm. r2,r2,0,OnProcbit,OnProcbit 	; Is OnProcbit set?
-			beq		mlckslow0						; Lock owner isn't running
-			lis		r2,hi16(TH_IDLE)				; Get thread idle state
-			ori		r2,r2,lo16(TH_IDLE)				; Get thread idle state
-			lwz		r10,THREAD_STATE(r10)			; Get the thread state
-			and.	r10,r10,r2						; Is idle set?
-			bne		mlckslow0						; Lock owner is idling
-
-			mftb	r10								; Time stamp us now
-			sub		r10,r10,r8						; Get the elapsed time
-			cmplwi	r10,128							; Have we been spinning for 128 tb ticks?
-			blt++	mlcksniff						; Not yet...
-			
-			mtmsr	r9								; Say, any interrupts pending?
-
-;			The following instructions force the pipeline to be interlocked to that only one
-;			instruction is issued per cycle.  The insures that we stay enabled for a long enough
-;			time; if it's too short, pending interruptions will not have a chance to be taken
-
-			subi	r4,r4,128						; Back off elapsed time from timeout value
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			mr.		r4,r4							; See if we used the whole timeout
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			
-			ble--	mlckslow1						; We failed
-			b		mlckspin3						; Now that we've opened an enable window, keep trying...
-mlckretry:
-			mtmsr	r9								; Restore interrupt state
-			li		r8,1							; Show already through once
-			blr	
-
-mlckslow0:											; We couldn't get the lock
-			mtmsr	r9								; Restore interrupt state
-
-mlckslow1:
-			mtlr	r12
-
-			PROLOG(0)
-.L_ml_retry:
-			bl		lockDisa						; Go get a lock on the mutex's interlock lock
-			mr.		r4,r3							; Did we get it?
-			lwz		r3,FM_ARG0(r1)					; Restore the lock address
-			bne++	mlGotInt						; We got it just fine...
-			mr		r4,r11							; Saved lock addr
-			lis		r3,hi16(mutex_failed1)			; Get the failed mutex message
-			ori		r3,r3,lo16(mutex_failed1)		; Get the failed mutex message
-			bl		EXT(panic)						; Call panic
-			BREAKPOINT_TRAP							; We die here anyway, can not get the lock
-	
-			.data
-mutex_failed1:
-			STRINGD	"attempt to interlock mutex (0x%08X) failed on mutex lock\n\000"
-			.text
-			
-mlGotInt:
-			
-;			Note that there is no reason to do a load and reserve here.  We already
-;			hold the interlock lock and no one can touch this field unless they 
-;			have that, so, we're free to play
-
-			lwz		r4,MUTEX_DATA(r3)				; Get the mutex's lock field
-			rlwinm.	r9,r4,30,2,31					; So, can we have it?
-			bne-	mlInUse							; Nope, sombody's playing already...
-
-			bf++		MUTEX_ATTR_DEBUGb,mlDebSkip
-			CHECK_SETUP(r5)
-			mfsprg	r9,1							; Get the current activation
-			lwz		r5,0(r1)						; Get previous save frame
-			lwz		r6,FM_LR_SAVE(r5)				; Get our caller's address
-			mr		r8,r9							; Get the active thread
-			stw		r6,MUTEX_STACK(r3)				; Save our caller
-			stw		r8,MUTEX_THREAD(r3)				; Set the mutex's holding thread
-			LCK_STACK(r3,r5,r6,r7,r8,r10)
-mlDebSkip:
-			mr		r3,r11							; Get the based lock address
-			bl	EXT(lck_mtx_lock_acquire)
-			lwz		r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1)
-			mfsprg	r5,1
-			mtcr	r2
-			mr.		r4,r3
-			lwz		r3,FM_ARG0(r1)					; restore r3 (saved in prolog)
-			lwz		r11,FM_ARG0+0x04(r1)			; restore r11 (saved in prolog)
-			beq		mlUnlock
-			ori		r5,r5,WAIT_FLAG
-
-mlUnlock:	eieio	
-			stw	r5,MUTEX_DATA(r3)					; grab the mutexlock and free the interlock
-
-			EPILOG									; Restore all saved registers
-			b		epStart							; Go enable preemption...
-
-;			We come to here when we have a resource conflict.  In other words,
-;			the mutex is held.
-
-mlInUse:
-
-			CHECK_SETUP(r12)	
-			CHECK_MYLOCK()							; Assert we don't own the lock already */
-
-;			Note that we come in here with the interlock set.  The wait routine
-;			will unlock it before waiting.
-
-			bf		MUTEX_ATTR_STATb,mlStatSkip2	; Branch if no stat
-			lwz		r5,MUTEX_GRP(r3)				; Load lck group
-			bt		mlckmiss,mlStatSkip1			; Skip miss already counted
-			crset	mlckmiss						; Remember miss recorded
-			li		r9,GRP_MTX_STAT_MISS+4			; Get stat miss offset
-mlStatLoop1:
-			lwarx	r8,r9,r5						; Load stat miss cnt
-			addi	r8,r8,1							; Increment stat miss cnt	
-			stwcx.	r8,r9,r5						; Store stat miss cnt
-			bne--	mlStatLoop1						; Retry if failed
-mlStatSkip1:
-			lwz		r9,GRP_MTX_STAT_WAIT+4(r5)		; Load wait cnt
-			addi	r9,r9,1							; Increment wait cnt
-			stw		r9,GRP_MTX_STAT_WAIT+4(r5)		; Update miss cnt
-mlStatSkip2:
-			ori		r4,r4,WAIT_FLAG					; Set the wait flag
-			stw		r4,MUTEX_DATA(r3)
-			rlwinm	r4,r4,0,0,29					; Extract the lock owner
-			mfcr	r2
-			stw		r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1)
-			mr		r3,r11							; Get the based lock address
-			bl		EXT(lck_mtx_lock_wait)			; Wait for our turn at the lock
-			
-			lwz		r3,FM_ARG0(r1)					; restore r3 (saved in prolog)
-			lwz		r11,FM_ARG0+0x04(r1)			; restore r11 (saved in prolog)
-			lwz		r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1)
-			mtcr	r2
-			b		.L_ml_retry						; and try again...
-
-	
-/*
- *		void lck_mtx_try_lock(_extlck_mtx_ext_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_try_lock_ext)
-LEXT(lck_mtx_try_lock_ext)
-			mr		r11,r3							; Save lock addr
-mlteEnter:
-			lwz		r0,MUTEX_ATTR(r3)
-			mtcrf	1,r0							; Set cr7
-			CHECK_SETUP(r12)	
-			CHECK_MUTEX_TYPE()
-			
-			bf		MUTEX_ATTR_STATb,mlteStatSkip	; Branch if no stat
-			lwz		r5,MUTEX_GRP(r3)				; Load lock group
-			li		r7,GRP_MTX_STAT_UTIL+4			; Set stat util offset
-mlteStatLoop:
-			lwarx	r8,r7,r5						; Load stat util cnt
-			addi	r8,r8,1							; Increment stat util cnt
-			stwcx.	r8,r7,r5						; Store stat util cnt
-			bne--	mlteStatLoop					; Retry if failed
-			mr.		r8,r8							; Test for zero
-			bne++	mlteStatSkip					; Did stat util cnt wrapped?
-			lwz		r8,GRP_MTX_STAT_UTIL(r5)		; Load upper stat util cnt
-			addi	r8,r8,1							; Increment upper stat util cnt
-			stw		r8,GRP_MTX_STAT_UTIL(r5)		; Store upper stat util cnt
-mlteStatSkip:
-			mfsprg	r6,1							; load the current thread
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock value
-			mr.		r5,r5							; Quick check
-			bne--	L_mtx_try_slow					; Can not get it now...
-			mfmsr	r9								; Get the MSR value
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-
-mlteLoopTry:
-			lwarx	r5,MUTEX_DATA,r3				; load the lock value
-			mr.		r5,r5
-			bne--	mlteSlowX						; branch to the slow path
-			stwcx.	r6,MUTEX_DATA,r3				; grab the lock
-			bne--	mlteLoopTry						; retry if failed
-			.globl	EXT(mltelckPatch_isync)
-LEXT(mltelckPatch_isync)
-			isync									; stop prefetching
-			mflr	r12
-			bf		MUTEX_ATTR_DEBUGb,mlteDebSkip
-			mr		r8,r6							; Get the active thread
-			stw		r12,MUTEX_STACK(r3)				; Save our caller
-			stw		r8,MUTEX_THREAD(r3)				; Set the mutex's holding thread
-			mr		r5,r1
-			LCK_STACK(r3,r5,r6,r7,r8,r10)
-mlteDebSkip:
-			li		r3, 1
-			mtmsr	r9								; Say, any interrupts pending?
-			blr
-mlteSlowX:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Kill reservation
-			mtmsr	r9								; Say, any interrupts pending?
-			b		L_mtx_try_slow
-
-
-/*
- *		void lck_mtx_try_lock(lck_mtx_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_try_lock)
-LEXT(lck_mtx_try_lock)
-
-			mfsprg	r6,1							; load the current thread
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock value
-			mr		r11,r3							; Save lock addr
-			mr.		r5,r5							; Quick check
-			bne--	mltSlow00						; Indirect or Can not get it now...
-
-mltLoopTry:
-			lwarx	r5,MUTEX_DATA,r3				; load the lock value
-			mr.		r5,r5
-			bne--	mltSlow01						; branch to the slow path
-			stwcx.	r6,MUTEX_DATA,r3				; grab the lock
-			bne--	mltLoopTry						; retry if failed
-			.globl	EXT(mltlckPatch_isync)
-LEXT(mltlckPatch_isync)
-			isync									; stop prefetching
-			li		r3, 1
-			blr
-
-mltSlow00:
-			cmpli	cr0,r5,MUTEX_IND				; Is it a mutex indirect 
-			bne--	mltSlow02						; No, go handle contention 
-			lwz		r3,MUTEX_PTR(r3)				; load mutex ext pointer
-			b		mlteEnter
-mltSlow01:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Kill reservation
-
-mltSlow02:
-			li		r0,0
-			mtcrf	1,r0							; Set cr7 to zero
-
-L_mtx_try_slow:
-			PROLOG(0)
-	
-			lwz		r6,MUTEX_DATA(r3)				; Quick check
-			rlwinm.	r6,r6,30,2,31					; to see if someone has this lock already
-			bne-	mtFail							; Someone's got it already...
-
-			bl		lockDisa						; Go get a lock on the mutex's interlock lock
-			mr.		r4,r3							; Did we get it?
-			lwz		r3,FM_ARG0(r1)					; Restore the lock address
-			bne++	mtGotInt						; We got it just fine...
-			mr		r4,r11							; Saved lock addr
-			lis		r3,hi16(mutex_failed2)			; Get the failed mutex message
-			ori		r3,r3,lo16(mutex_failed2)		; Get the failed mutex message
-			bl		EXT(panic)						; Call panic
-			BREAKPOINT_TRAP							; We die here anyway, can not get the lock
-	
-			.data
-mutex_failed2:
-			STRINGD	"attempt to interlock mutex (0x%08X) failed on mutex lock try\n\000"
-			.text
-			
-mtGotInt:
-			
-;			Note that there is no reason to do a load and reserve here.  We already
-;			hold the interlock and no one can touch at this field unless they 
-;			have that, so, we're free to play 
-			
-			lwz		r4,MUTEX_DATA(r3)				; Get the mutex's lock field
-			rlwinm.	r9,r4,30,2,31					; So, can we have it?
-			bne-	mtInUse							; Nope, sombody's playing already...
-			
-			bf++	MUTEX_ATTR_DEBUGb,mtDebSkip
-			CHECK_SETUP(r5)
-			mfsprg	r9,1							; Get the current activation
-			lwz		r5,0(r1)						; Get previous save frame
-			lwz		r6,FM_LR_SAVE(r5)				; Get our caller's address
-			mr		r8,r9							; Get the active thread
-			stw		r6,MUTEX_STACK(r3)				; Save our caller
-			stw		r8,MUTEX_THREAD(r3)				; Set the mutex's holding thread
-			LCK_STACK(r3,r5,r6,r7,r8,r10)
-mtDebSkip:
-			mr		r3,r11							; Get the based lock address
-			bl	EXT(lck_mtx_lock_acquire)
-			mfsprg	r5,1
-			mr.		r4,r3
-			lwz		r3,FM_ARG0(r1)					; restore r3 (saved in prolog)
-			lwz		r11,FM_ARG0+0x04(r1)			; restore r11 (saved in prolog)
-			beq		mtUnlock
-			ori		r5,r5,WAIT_FLAG
-
-mtUnlock:	eieio
-			stw	r5,MUTEX_DATA(r3)					; grab the mutexlock and free the interlock
-
-			bl		epStart							; Go enable preemption...
-
-			li		r3, 1
-			EPILOG									; Restore all saved registers
-			blr										; Return...
-
-;			We come to here when we have a resource conflict.  In other words,
-;			the mutex is held.
-
-mtInUse:	
-			bf++	MUTEX_ATTR_STATb,mtStatSkip		; Branch if no stat
-			lwz		r5,MUTEX_GRP(r3)				; Load lock group
-			li		r9,GRP_MTX_STAT_MISS+4			; Get stat miss offset
-mtStatLoop:
-			lwarx	r8,r9,r5						; Load stat miss cnt
-			addi	r8,r8,1							; Increment stat miss cnt	
-			stwcx.	r8,r9,r5						; Store stat miss cnt
-			bne--	mtStatLoop						; Retry if failed
-mtStatSkip:
-			rlwinm	r4,r4,0,0,30					; Get the unlock value
-			stw		r4,MUTEX_DATA(r3)				; free the interlock
-			bl		epStart							; Go enable preemption...
-
-mtFail:		li		r3,0							; Set failure code
-			EPILOG									; Restore all saved registers
-			blr										; Return...
-
-		
-
-/*
- *		void lck_mtx_ext_unlock(lck_mtx_ext_t* l)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_ext_unlock)
-LEXT(lck_mtx_ext_unlock)
-mlueEnter:
-			.globl	EXT(mulckePatch_isync)
-LEXT(mulckePatch_isync)
-			isync
-			.globl	EXT(mulckePatch_eieio)     
-LEXT(mulckePatch_eieio)
-			eieio
-			mr		r11,r3							; Save lock addr
-mlueEnter1:
-			lwz		r0,MUTEX_ATTR(r3)
-			mtcrf	1,r0							; Set cr7
-			CHECK_SETUP(r12)	
-			CHECK_MUTEX_TYPE()
-			CHECK_THREAD(MUTEX_THREAD)
-
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock
-			rlwinm.	r4,r5,0,30,31					; Quick check
-			bne--	L_mtx_unlock_slow				; Can not get it now...
-			mfmsr	r9								; Get the MSR value
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-
-mlueLoop:
-			lwarx	r5,MUTEX_DATA,r3
-			rlwinm.	r4,r5,0,30,31					; Bail if pending waiter or interlock set
-			li		r5,0							; Clear the mutexlock
-			bne--	mlueSlowX
-			stwcx.	r5,MUTEX_DATA,r3
-			bne--	mlueLoop
-			mtmsr	r9								; Say, any interrupts pending?
-			blr
-
-mlueSlowX:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Dump reservation
-			mtmsr	r9								; Say, any interrupts pending?
-			b		L_mtx_unlock_slow				; Join slow path...
-
-/*
- *		void lck_mtx_unlock(lck_mtx_t* l)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_unlock)
-LEXT(lck_mtx_unlock)
-mluEnter:
-			.globl	EXT(mulckPatch_isync)
-LEXT(mulckPatch_isync)
-			isync
-			.globl	EXT(mulckPatch_eieio)     
-LEXT(mulckPatch_eieio)
-			eieio
-			mr		r11,r3							; Save lock addr
-mluEnter1:
-			lwz		r5,MUTEX_DATA(r3)				; Get the lock
-			rlwinm.	r4,r5,0,30,31					; Quick check
-			bne--	mluSlow0						; Indirect or Can not get it now...
-
-mluLoop:
-			lwarx	r5,MUTEX_DATA,r3
-			rlwinm.	r4,r5,0,30,31					; Bail if pending waiter or interlock set
-			li		r5,0							; Clear the mutexlock
-			bne--	mluSlowX
-			stwcx.	r5,MUTEX_DATA,r3
-			bne--	mluLoop
-#if	CONFIG_DTRACE
-/* lock released - LS_LCK_MTX_UNLOCK_RELEASE */
-			LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point)
-			blr
-
-			LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE)
-#endif
-			blr
-
-
-mluSlow0:
-			cmpli	cr0,r5,MUTEX_IND				; Is it a mutex indirect 
-			bne--	L_mtx_unlock_slow				; No, go handle contention 
-			lwz		r3,MUTEX_PTR(r3)				; load mutex ext pointer
-			b		mlueEnter1
-mluSlowX:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Dump reservation
-
-L_mtx_unlock_slow:
-			
-			PROLOG(0)
-	
-			bl		lockDisa						; Go get a lock on the mutex's interlock lock
-			mr.		r4,r3							; Did we get it?
-			lwz		r3,FM_ARG0(r1)					; Restore the lock address
-			bne++	muGotInt						; We got it just fine...
-			mr		r4,r11							; Saved lock addr
-			lis		r3,hi16(mutex_failed3)			; Get the failed mutex message
-			ori		r3,r3,lo16(mutex_failed3)		; Get the failed mutex message
-			bl		EXT(panic)						; Call panic
-			BREAKPOINT_TRAP							; We die here anyway, can not get the lock
-	
-			.data
-mutex_failed3:
-			STRINGD	"attempt to interlock mutex (0x%08X) failed on mutex unlock\n\000"
-			.text
-			
-			
-muGotInt:
-			lwz		r4,MUTEX_DATA(r3)
-			andi.	r5,r4,WAIT_FLAG					; are there any waiters ?
-			rlwinm	r4,r4,0,0,29
-			beq+	muUnlock						; Nope, we're done...
-
-			mr		r3,r11							; Get the based lock address
-			bl		EXT(lck_mtx_unlock_wakeup)		; yes, wake a thread
-			lwz		r3,FM_ARG0(r1)					; restore r3 (saved in prolog)
-			lwz		r11,FM_ARG0+0x04(r1)			; restore r11 (saved in prolog)
-			lwz		r5,MUTEX_DATA(r3)				; load the lock
-
-muUnlock:
-			andi.	r5,r5,WAIT_FLAG					; Get the unlock value
-			eieio
-			stw		r5,MUTEX_DATA(r3)				; unlock the interlock and lock
-
-			EPILOG									; Deal with the stack now, enable_preemption doesn't always want one
-			b		epStart							; Go enable preemption...
-
-/*
- *		void lck_mtx_assert(lck_mtx_t* l, unsigned int)
- *
- */
-			.align	5
-			.globl	EXT(lck_mtx_assert)
-LEXT(lck_mtx_assert)
-			mr		r11,r3
-maEnter:
-			lwz		r5,MUTEX_DATA(r3)
-			cmpli	cr0,r5,MUTEX_IND				; Is it a mutex indirect 
-			bne--	maCheck							; No, go check the assertion
-			lwz		r3,MUTEX_PTR(r3)				; load mutex ext pointer
-			b		maEnter
-maCheck:
-			mfsprg	r6,1							; load the current thread
-			rlwinm	r5,r5,0,0,29					; Extract the lock owner
-			cmpwi	r4,MUTEX_ASSERT_OWNED
-			cmplw	cr1,r6,r5						; Is the lock held by current act
-			crandc	cr0_eq,cr0_eq,cr1_eq			; Check owned assertion
-			bne--	maNext
-			mr		r4,r11
-			lis		r3,hi16(mutex_assert1)			; Get the failed mutex message
-			ori		r3,r3,lo16(mutex_assert1)		; Get the failed mutex message
-			b		maPanic							; Panic path
-maNext:
-			cmpwi	r4,MUTEX_ASSERT_NOTOWNED		; Check not owned assertion
-			crand	cr0_eq,cr0_eq,cr1_eq			;
-			bnelr++
-maPanic:
-			PROLOG(0)
-			mr		r4,r11
-			lis		r3,hi16(mutex_assert2)			; Get the failed mutex message
-			ori		r3,r3,lo16(mutex_assert2)		; Get the failed mutex message
-			bl		EXT(panic)						; Call panic
-			BREAKPOINT_TRAP							; We die here anyway
-
-			.data
-mutex_assert1:
-			STRINGD	"mutex (0x%08X) not owned\n\000"
-mutex_assert2:
-			STRINGD	"mutex (0x%08X) owned\n\000"
-			.text
-			
-			
-/*
- *		void lck_mtx_ilk_unlock(lck_mtx *lock)
- */
-			.globl	EXT(lck_mtx_ilk_unlock)
-LEXT(lck_mtx_ilk_unlock)
-
-			lwz		r10,MUTEX_DATA(r3)
-			rlwinm	r10,r10,0,0,30
-			eieio
-			stw		r10,MUTEX_DATA(r3)
-
-			b		epStart							; Go enable preemption...
-
-/*		
- *		void _enable_preemption_no_check(void)
- *
- *			This version does not check if we get preempted or not
- */
-			.align	4
-			.globl	EXT(_enable_preemption_no_check)
-
-LEXT(_enable_preemption_no_check)
-
-			cmplw	cr1,r1,r1						; Force zero cr so we know not to check if preempted
-			b		epCommn							; Join up with the other enable code... 
-
-/*		
- *		void _enable_preemption(void)
- *
- *			This version checks if we get preempted or not
- */
-			.align	5
-			.globl	EXT(_enable_preemption)
-
-LEXT(_enable_preemption)
-
-;		Here is where we enable preemption.
-
-epStart:
-			cmplwi	cr1,r1,0						; Force non-zero cr so we know to check if preempted
-
-epCommn:
-			mfsprg	r3,1							; Get current activation
-			li		r8,-1							; Get a decrementer
-			lwz		r5,ACT_PREEMPT_CNT(r3)			; Get the preemption level
-			add.	r5,r5,r8						; Bring down the disable count
-			blt-	epTooFar						; Yeah, we did...
-			stw		r5,ACT_PREEMPT_CNT(r3)			; Save it back
-			crandc	cr0_eq,cr0_eq,cr1_eq
-			beq+	epCheckPreempt					; Go check if we need to be preempted...
-			blr										; Leave...
-epTooFar:	
-			mr		r4,r5
-			lis		r3,hi16(epTooFarStr)			; First half of panic string
-			ori		r3,r3,lo16(epTooFarStr)			; Second half of panic string
-			PROLOG(0)
-			bl		EXT(panic)
-			BREAKPOINT_TRAP							; We die here anyway
-
-			.data
-epTooFarStr:
-			STRINGD	"enable_preemption: preemption_level %d\n\000"
-
-			.text
-			.align	5
-epCheckPreempt:
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			andi.	r4,r9,lo16(MASK(MSR_EE))		; We cannot preempt if interruptions are off
-			beq+	epCPno							; No preemption here...
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-			lwz		r3,ACT_PER_PROC(r3)				; Get the per_proc block
-			lwz		r7,PP_PENDING_AST(r3)			; Get pending AST mask
-			li		r5,AST_URGENT					; Get the requests we do honor
-			lis		r0,hi16(DoPreemptCall)			; Just in case, get the top of firmware call
-			and.	r7,r7,r5						; Should we preempt?
-			ori		r0,r0,lo16(DoPreemptCall)		; Merge in bottom part
-			mtmsr	r9								; Allow interrupts if we can
-epCPno:		
-			beqlr+									; We probably will not preempt...
-			sc										; Do the preemption
-			blr										; Now, go away now...
-
-/*
- *		void disable_preemption(void)
- *
- *			Here is where we disable preemption.
- */
-			.align	5
-			.globl	EXT(_disable_preemption)
-
-LEXT(_disable_preemption)
-
-			mfsprg	r6,1							; Get the current activation
-			lwz		r5,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			addi	r5,r5,1							; Bring up the disable count
-			stw		r5,ACT_PREEMPT_CNT(r6)			; Save it back 
-			blr										; Return...
-
-/*
- *		int get_preemption_level(void)
- *
- *			Return the current preemption level
- */
-			.align	5
-			.globl	EXT(get_preemption_level)
-
-LEXT(get_preemption_level)
- 
-			mfsprg	r6,1							; Get current activation
-			lwz		r3,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			blr										; Return...
-
-/*
- *		void ppc_usimple_lock_init(simple_lock_t, etap_event_t)
- *
- *			Initialize a simple lock.
- */
-			.align	5
-			.globl	EXT(ppc_usimple_lock_init)
-
-LEXT(ppc_usimple_lock_init)
-
-			li	r0,	0								; set lock to free == 0 
-			stw	r0,	0(r3)							; Initialize the lock 
-			blr
-	
-/*
- *		void lck_spin_lock(lck_spin_t *)
- *		void ppc_usimple_lock(simple_lock_t *)
- *
- */
-			.align	5
-			.globl	EXT(lck_spin_lock)
-LEXT(lck_spin_lock)
-			.globl	EXT(ppc_usimple_lock)
-LEXT(ppc_usimple_lock)
-
-			mfsprg	r6,1							; Get the current activation 
-			lwz		r5,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			addi	r5,r5,1							; Bring up the disable count
-			stw		r5,ACT_PREEMPT_CNT(r6)			; Save it back 
-			mr		r5,r3							; Get the address of the lock
-			li		r8,0							; Set r8 to zero
-			li		r4,0							; Set r4 to zero
-
-slcktry:	lwarx	r11,SLOCK_ILK,r5				; Grab the lock value
-			andi.	r3,r11,ILK_LOCKED				; Is it locked?
-			ori		r11,r6,ILK_LOCKED				; Set interlock 
-			bne--	slckspin						; Yeah, wait for it to clear...
-			stwcx.	r11,SLOCK_ILK,r5				; Try to seize that there durn lock
-			bne--	slcktry							; Couldn't get it...
-			.globl  EXT(slckPatch_isync)
-LEXT(slckPatch_isync)
-			isync									; Make sure we don't use a speculativily loaded value
-			blr										; Go on home...
-
-slckspin:	li		r11,lgKillResv					; Killing field
-			stwcx.	r11,0,r11						; Kill reservation
-
-			mr.		r4,r4							; Test timeout value
-			bne++	slockspin0
-			lis		r4,hi16(EXT(LockTimeOut))		; Get the high part 
-			ori		r4,r4,lo16(EXT(LockTimeOut))	; And the low part
-			lwz		r4,0(r4)						; Get the timerout value
-
-slockspin0:	mr.		r8,r8							; Is r8 set to zero
-			bne++	slockspin1						; If yes, first spin attempt
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Turn off interruptions 
-			isync									; May have turned off vec and fp here 
-			mftb	r8								; Get timestamp on entry
-			b		slcksniff
-
-slockspin1:	mtmsr	r7								; Turn off interruptions 
-			mftb	r8								; Get timestamp on entry
-
-slcksniff:	lwz		r3,SLOCK_ILK(r5)				; Get that lock in here
-			andi.	r3,r3,ILK_LOCKED				; Is it free yet?
-			beq++	slckretry						; Yeah, try for it again...
-			
-			mftb	r10								; Time stamp us now
-			sub		r10,r10,r8						; Get the elapsed time
-			cmplwi	r10,128							; Have we been spinning for 128 tb ticks?
-			blt++	slcksniff						; Not yet...
-			
-			mtmsr	r9								; Say, any interrupts pending?
-
-;			The following instructions force the pipeline to be interlocked to that only one
-;			instruction is issued per cycle.  The insures that we stay enabled for a long enough
-;			time; if it's too short, pending interruptions will not have a chance to be taken
-
-			subi	r4,r4,128						; Back off elapsed time from timeout value
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			mr.		r4,r4							; See if we used the whole timeout
-			li		r3,0							; Assume a timeout return code
-			or		r4,r4,r4						; Do nothing here but force a single cycle delay
-			
-			ble--	slckfail						; We failed
-			b		slockspin1						; Now that we've opened an enable window, keep trying...
-slckretry:
-			mtmsr	r9								; Restore interrupt state
-			li		r8,1							; Show already through once
-			b		slcktry
-slckfail:											; We couldn't get the lock
-			lis		r3,hi16(slckpanic_str)
-			ori		r3,r3,lo16(slckpanic_str)
-			mr		r4,r5
-			mflr	r5
-			PROLOG(0)
-			bl		EXT(panic)
-			BREAKPOINT_TRAP							; We die here anyway
-
-		.data
-slckpanic_str:
-		STRINGD "simple lock (0x%08X) deadlock detection, pc=0x%08X\n\000"
-		.text
-
-/*
- *		boolean_t lck_spin_try_lock(lck_spin_t *)
- *		unsigned int ppc_usimple_lock_try(simple_lock_t *)
- *
- */
-			.align	5
-			.globl	EXT(lck_spin_try_lock)
-LEXT(lck_spin_try_lock)
-			.globl	EXT(ppc_usimple_lock_try)
-LEXT(ppc_usimple_lock_try)
-
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r9								; Get the MSR value 
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Get FP enable
-			ori		r7,r0,lo16(MASK(MSR_EE))		; Get EE bit on too
-			andc	r9,r9,r0						; Clear FP and VEC
-			andc	r7,r9,r7						; Clear EE as well
-			mtmsr	r7								; Disable interruptions and thus, preemption
-			mfsprg	r6,1							; Get current activation 
-
-			lwz		r11,SLOCK_ILK(r3)				; Get the lock
-			andi.	r5,r11,ILK_LOCKED				; Check it...
-			bne--	slcktryfail						; Quickly fail...
-
-slcktryloop:	
-			lwarx	r11,SLOCK_ILK,r3				; Ld from addr of arg and reserve
-
-			andi.	r5,r11,ILK_LOCKED				; TEST...
-			ori		r5,r6,ILK_LOCKED
-			bne--	slcktryfailX					; branch if taken. Predict free 
-	
-			stwcx.	r5,SLOCK_ILK,r3					; And SET (if still reserved)
-			bne--	slcktryloop						; If set failed, loop back 
-			
-			.globl  EXT(stlckPatch_isync)
-LEXT(stlckPatch_isync)
-			isync
-
-			lwz		r5,ACT_PREEMPT_CNT(r6)			; Get the preemption level
-			addi	r5,r5,1							; Bring up the disable count 
-			stw		r5,ACT_PREEMPT_CNT(r6)			; Save it back
-
- 			mtmsr	r9								; Allow interruptions now 
-			li		r3,1							; Set that the lock was free 
-			blr
-
-slcktryfailX:
-			li		r5,lgKillResv					; Killing field
-			stwcx.	r5,0,r5							; Kill reservation
-
-slcktryfail:
- 			mtmsr	r9								; Allow interruptions now 
-			li		r3,0							; FAILURE - lock was taken 
-			blr
-
-
-/*
- *		void lck_spin_unlock(lck_spin_t *)
- *		void ppc_usimple_unlock_rwcmb(simple_lock_t *)
- *
- */
-			.align	5
-			.globl	EXT(lck_spin_unlock)
-LEXT(lck_spin_unlock)
-			.globl	EXT(ppc_usimple_unlock_rwcmb)
-LEXT(ppc_usimple_unlock_rwcmb)
-
-			li		r0,0
-			.globl  EXT(sulckPatch_isync)
-LEXT(sulckPatch_isync)
-			isync
-			.globl  EXT(sulckPatch_eieio)
-LEXT(sulckPatch_eieio)
-			eieio
-			stw		r0, SLOCK_ILK(r3)
-
-			b		epStart							; Go enable preemption...
-
-/*
- *		void ppc_usimple_unlock_rwmb(simple_lock_t *)
- *
- */
-			.align	5
-			.globl	EXT(ppc_usimple_unlock_rwmb)
-
-LEXT(ppc_usimple_unlock_rwmb)
-
-			li		r0,0
-			sync
-			stw		r0, SLOCK_ILK(r3)
-
-			b		epStart							; Go enable preemption...
-
-/*
- *		void lck_rw_lock_exclusive(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_lock_exclusive)
-LEXT(lck_rw_lock_exclusive)
-#if	!MACH_LDEBUG
-			.globl	EXT(lock_write)
-LEXT(lock_write)
-#endif
-			lis		r7,0xFFFF
-			ori		r7,r7,(WANT_EXCL|WANT_UPGRADE|ILK_LOCKED)
-rwleloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			and.	r8,r5,r7						; Can we have it?
-			ori		r6,r5,WANT_EXCL					; Mark Exclusive
-			bne--	rwlespin						; Branch if cannot be held
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwleloop
-			.globl  EXT(rwlePatch_isync)
-LEXT(rwlePatch_isync)
-			isync
-			blr
-rwlespin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwlespin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_lock_exclusive_ext)
-rwlespin1:
-			b		EXT(lck_rw_lock_exclusive_gen)
-
-/*
- *		void lck_rw_lock_shared(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_lock_shared)
-LEXT(lck_rw_lock_shared)
-#if	!MACH_LDEBUG
-			.globl	EXT(lock_read)
-LEXT(lock_read)
-#endif
-rwlsloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			andi.	r7,r5,WANT_EXCL|WANT_UPGRADE|ILK_LOCKED	; Can we have it?
-			bne--	rwlsopt							; Branch if cannot be held
-rwlsloopres:
-			addis	r6,r5,1							; Increment read cnt
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwlsloop
-			.globl  EXT(rwlsPatch_isync)
-LEXT(rwlsPatch_isync)
-			isync
-			blr
-rwlsopt:
-			andi.	r7,r5,PRIV_EXCL|ILK_LOCKED		; Can we have it?
-			bne--	rwlsspin						; Branch if cannot be held
-			lis		r7,0xFFFF						; Get read cnt mask
-			and.	r8,r5,r7						; Is it shared
-			bne		rwlsloopres						; Branch if can be held
-rwlsspin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwlsspin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_lock_shared_ext)
-rwlsspin1:
-			b		EXT(lck_rw_lock_shared_gen)
-
-/*
- *		boolean_t lck_rw_lock_shared_to_exclusive(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_lock_shared_to_exclusive)
-LEXT(lck_rw_lock_shared_to_exclusive)
-#if	!MACH_LDEBUG
-			.globl	EXT(lock_read_to_write)
-LEXT(lock_read_to_write)
-#endif
-rwlseloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			addis	r6,r5,0xFFFF					; Decrement read cnt
-			lis		r8,0xFFFF						; Get read count mask
-			ori		r8,r8,WANT_UPGRADE|ILK_LOCKED	; Include Interlock and upgrade flags
-			and.	r7,r6,r8						; Can we have it?
-			ori		r9,r6,WANT_UPGRADE				; Mark Exclusive
-			bne--	rwlsespin						; Branch if cannot be held
-			stwcx.	r9,RW_DATA,r3					; Update lock word
-			bne--	rwlseloop
-			.globl  EXT(rwlsePatch_isync)
-LEXT(rwlsePatch_isync)
-			isync
-			li		r3,1							; Succeed, return TRUE...
-			blr
-rwlsespin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwlsespin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_lock_shared_to_exclusive_ext)
-rwlsespin1:
-			b		EXT(lck_rw_lock_shared_to_exclusive_gen)
-
-
-
-/*
- *		void lck_rw_lock_exclusive_to_shared(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_lock_exclusive_to_shared)
-LEXT(lck_rw_lock_exclusive_to_shared)
-#if	!MACH_LDEBUG
-			.globl	EXT(lock_write_to_read)
-LEXT(lock_write_to_read)
-#endif
-			.globl  EXT(rwlesPatch_isync)
-LEXT(rwlesPatch_isync)
-			isync
-			.globl  EXT(rwlesPatch_eieio)
-LEXT(rwlesPatch_eieio)
-			eieio
-rwlesloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			andi.	r7,r5,ILK_LOCKED				; Test interlock flag
-			bne--	rwlesspin						; Branch if interlocked
-			lis		r6,1							; Get 1 for read count
-			andi.	r10,r5,WANT_UPGRADE				; Is it held with upgrade
-			li		r9,WANT_UPGRADE|WAIT_FLAG		; Get upgrade and wait flags mask
-			bne		rwlesexcl1						; Skip if held with upgrade
-			li		r9,WANT_EXCL|WAIT_FLAG			; Get exclusive and wait flags mask
-rwlesexcl1:
-			andc	r7,r5,r9						; Marked free
-			rlwimi	r6,r7,0,16,31					; Set shared cnt to one
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwlesloop
-			andi.	r7,r5,WAIT_FLAG					; Test wait flag
-			beqlr++									; Return of no waiters
-			addi	r3,r3,RW_EVENT					; Get lock event address
-			b		EXT(thread_wakeup)				; wakeup waiters
-rwlesspin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwlesspin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_lock_exclusive_to_shared_ext)
-rwlesspin1:
-			b		EXT(lck_rw_lock_exclusive_to_shared_gen)
-
-
-
-/*
- *		boolean_t lck_rw_try_lock_exclusive(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_try_lock_exclusive)
-LEXT(lck_rw_try_lock_exclusive)
-			lis		r10,0xFFFF						; Load read count mask
-			ori		r10,r10,WANT_EXCL|WANT_UPGRADE	; Include exclusive and upgrade flags
-rwtleloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			andi.	r7,r5,ILK_LOCKED				; Test interlock flag
-			bne--	rwtlespin						; Branch if interlocked
-			and.	r7,r5,r10						; Can we have it
-			ori		r6,r5,WANT_EXCL					; Mark Exclusive
-			bne--	rwtlefail						; 
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwtleloop
-			.globl  EXT(rwtlePatch_isync)
-LEXT(rwtlePatch_isync)
-			isync
-			li		r3,1							; Return TRUE
-			blr
-rwtlefail:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			li		r3,0							; Return FALSE
-			blr
-rwtlespin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwtlespin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_try_lock_exclusive_ext)
-rwtlespin1:
-			b		EXT(lck_rw_try_lock_exclusive_gen)
-
-
-/*
- *		boolean_t lck_rw_try_lock_shared(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_try_lock_shared)
-LEXT(lck_rw_try_lock_shared)
-rwtlsloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			andi.	r7,r5,ILK_LOCKED				; Test interlock flag
-			bne--	rwtlsspin						; Branch if interlocked
-			andi.	r7,r5,WANT_EXCL|WANT_UPGRADE	; So, can we have it?
-			bne--	rwtlsopt						; Branch if held exclusive
-rwtlsloopres:
-			addis	r6,r5,1							; Increment read cnt
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwtlsloop
-			.globl  EXT(rwtlsPatch_isync)
-LEXT(rwtlsPatch_isync)
-			isync
-			li		r3,1							; Return TRUE
-			blr
-rwtlsopt:
-			andi.	r7,r5,PRIV_EXCL					; Can we have it?
-			bne--	rwtlsfail						; Branch if cannot be held
-			lis		r7,0xFFFF						; Get read cnt mask
-			and.	r8,r5,r7						; Is it shared
-			bne		rwtlsloopres					; Branch if can be held
-rwtlsfail:
-			li		r3,0							; Return FALSE
-			blr
-rwtlsspin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwtlsspin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_try_lock_shared_ext)
-rwtlsspin1:
-			b		EXT(lck_rw_try_lock_shared_gen)
-
-
-
-/*
- *		lck_rw_type_t lck_rw_done(lck_rw_t*)
- *
- */
-			.align	5
-			.globl	EXT(lck_rw_done)
-LEXT(lck_rw_done)
-#if	!MACH_LDEBUG
-			.globl	EXT(lock_done)
-LEXT(lock_done)
-#endif
-			.globl  EXT(rwldPatch_isync)
-LEXT(rwldPatch_isync)
-			isync
-			.globl  EXT(rwldPatch_eieio)
-LEXT(rwldPatch_eieio)
-			eieio
-			li		r10,WAIT_FLAG					; Get wait flag
-			lis		r7,0xFFFF						; Get read cnt mask
-			mr		r12,r3							; Save lock addr
-rwldloop:	lwarx	r5,RW_DATA,r3					; Grab the lock value
-			andi.	r8,r5,ILK_LOCKED				; Test interlock flag
-			bne--	rwldspin						; Branch if interlocked
-			and.	r8,r5,r7						; Is it shared
-			cmpi	cr1,r8,0						; Is it shared
-			beq		cr1,rwldexcl					; No, check exclusive
-			li		r11,RW_SHARED					; Set return value
-			addis	r6,r5,0xFFFF					; Decrement read count
-			and.	r8,r6,r7						; Is it still shared
-			li		r8,0							; Assume no wakeup
-			bne		rwldshared1						; Skip if still held shared
-			and		r8,r6,r10						; Extract wait flag
-			andc	r6,r6,r10						; Clear wait flag
-rwldshared1:
-			b		rwldstore
-rwldexcl:
-			li		r11,RW_EXCL						; Set return value
-			li		r9,WANT_UPGRADE					; Get upgrade flag
-			and.	r6,r5,r9						; Is it held with upgrade
-			li		r9,WANT_UPGRADE|WAIT_FLAG		; Mask upgrade abd wait flags
-			bne		rwldexcl1						; Skip if held with upgrade
-			li		r9,WANT_EXCL|WAIT_FLAG			; Mask exclusive and wait flags
-rwldexcl1:
-			andc	r6,r5,r9						; Marked free
-			and		r8,r5,r10						; Null if no waiter
-rwldstore:
-			stwcx.	r6,RW_DATA,r3					; Update lock word
-			bne--	rwldloop
-			mr.		r8,r8							; wakeup needed?
-			mr		r3,r11							; Return lock held type
-			beqlr++
-			mr		r3,r12							; Restore lock address
-			PROLOG(0)
-			addi	r3,r3,RW_EVENT					; Get lock event address
-			bl		EXT(thread_wakeup)				; wakeup threads
-			lwz		r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1)
-			mtcr	r2
-			EPILOG
-			li		r3,RW_SHARED					; Assume lock type shared
-			bne		cr1,rwldret						; Branch if was held exclusive
-			li		r3,RW_EXCL						; Return lock type exclusive
-rwldret:
-			blr
-rwldspin:
-			li		r4,lgKillResv					; Killing field
-			stwcx.	r4,0,r4							; Kill it
-			cmpli	cr0,r5,RW_IND					; Is it a lock indirect 
-			bne--	rwldspin1						; No, go handle contention 
-			mr		r4,r3							; pass lock pointer
-			lwz		r3,RW_PTR(r3)					; load lock ext pointer
-			b		EXT(lck_rw_done_ext)
-rwldspin1:
-			b		EXT(lck_rw_done_gen)
-
-/*
- *		void lck_rw_ilk_lock(lck_rw_t *lock)
- */
-			.globl	EXT(lck_rw_ilk_lock)
-LEXT(lck_rw_ilk_lock)
-			crclr	hwtimeout						; no timeout option
-			li		r4,0							; request default timeout value
-			li		r12,ILK_LOCKED					; Load bit mask
-			b		lckcomm							; Join on up...
-
-/*
- *		void lck_rw_ilk_unlock(lck_rw_t *lock)
- */
-			.globl	EXT(lck_rw_ilk_unlock)
-LEXT(lck_rw_ilk_unlock)
-			li		r4,1
-			b		EXT(hw_unlock_bit)
diff --git a/osfmk/ppc/hw_lock_types.h b/osfmk/ppc/hw_lock_types.h
deleted file mode 100644
index be6e5568c..000000000
--- a/osfmk/ppc/hw_lock_types.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (C) 1998 Apple Computer
- * All Rights Reserved
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#ifndef	_PPC_HW_LOCK_TYPES_H_
-#define	_PPC_HW_LOCK_TYPES_H_
-
-struct hslock {
-	int		lock_data;
-};
-
-typedef struct hslock hw_lock_data_t, *hw_lock_t;
-
-#define hw_lock_addr(hwl)	(&((hwl).lock_data))
-
-
-#endif	/* _PPC_HW_LOCK_TYPES_H_ */
diff --git a/osfmk/ppc/hw_perfmon.c b/osfmk/ppc/hw_perfmon.c
deleted file mode 100644
index 64d38d1e0..000000000
--- a/osfmk/ppc/hw_perfmon.c
+++ /dev/null
@@ -1,959 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <kern/thread.h>
-#include <kern/ipc_tt.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-#include <ppc/hw_perfmon.h>
-#include <ppc/hw_perfmon_mmcr.h>
-#include <ppc/trap.h>
-#include <mach/thread_act.h>
-
-decl_simple_lock_data(,hw_perfmon_lock)
-static task_t hw_perfmon_owner = TASK_NULL;
-static int hw_perfmon_thread_count = 0;
-
-/* Notes:
- * -supervisor/user level filtering is unnecessary because of the way PMCs and MMCRs are context switched
- *  (can only count user events anyway)
- * -marked filtering is unnecssary because each thread has its own virtualized set of PMCs and MMCRs
- * -virtual counter PMI is passed up as a breakpoint exception
- */
-
-int perfmon_init(void)
-{
-	simple_lock_init(&hw_perfmon_lock, FALSE);
-	return KERN_SUCCESS;
-}
-
-/* PMC Facility Owner:
- * TASK_NULL - no one owns it
- * kernel_task - owned by hw_perfmon
- * other task - owned by another task
- */
-
-int perfmon_acquire_facility(task_t task)
-{
-	kern_return_t retval = KERN_SUCCESS;
-  
-	simple_lock(&hw_perfmon_lock);
-  
-	if(hw_perfmon_owner==task) {
-#ifdef HWPERFMON_DEBUG
-		kprintf("perfmon_acquire_facility - ACQUIRED: already owner\n");
-#endif
-		retval = KERN_SUCCESS;
-		/* already own it */
-	} else if(hw_perfmon_owner==TASK_NULL) { /* no one owns it */
-		hw_perfmon_owner = task;
-		hw_perfmon_thread_count = 0;
-#ifdef HWPERFMON_DEBUG
-		kprintf("perfmon_acquire_facility - ACQUIRED: no current owner - made new owner\n");
-#endif
-		retval = KERN_SUCCESS;
-	} else { /* someone already owns it */
-		if(hw_perfmon_owner==kernel_task) {
-			if(hw_perfmon_thread_count==0) { /* kernel owns it but no threads using it */
-				hw_perfmon_owner = task;
-				hw_perfmon_thread_count = 0;
-#ifdef HWPERFMON_DEBUG
-				kprintf("perfmon_acquire_facility - ACQUIRED: kernel is current owner but no threads using it\n");
-#endif
-				retval = KERN_SUCCESS;
-			} else {
-#ifdef HWPERFMON_DEBUG
-				kprintf("perfmon_acquire_facility - DENIED: kernel is current owner and facility in use\n");
-#endif
-				retval = KERN_RESOURCE_SHORTAGE;
-			}
-		} else { /* non-kernel owner */
-#ifdef HWPERFMON_DEBUG
-			kprintf("perfmon_acquire_facility - DENIED: another active task owns the facility\n");
-#endif
-			retval = KERN_RESOURCE_SHORTAGE;
-		}
-	}
-  
-	simple_unlock(&hw_perfmon_lock);
-	return retval;
-}
-
-int perfmon_release_facility(task_t task)
-{
-	kern_return_t retval = KERN_SUCCESS;
-	task_t old_perfmon_owner = hw_perfmon_owner;
-  
-	simple_lock(&hw_perfmon_lock);
-  
-	if(task!=hw_perfmon_owner) {
-		retval = KERN_NO_ACCESS;
-	} else {
-		if(old_perfmon_owner==kernel_task) {
-			if(hw_perfmon_thread_count>0) {
-#ifdef HWPERFMON_DEBUG
-				kprintf("perfmon_release_facility - NOT RELEASED: kernel task is owner and has active perfmon threads\n");
-#endif
-				retval = KERN_NO_ACCESS;
-			} else {
-#ifdef HWPERFMON_DEBUG
-				kprintf("perfmon_release_facility - RELEASED: kernel task was owner\n");
-#endif
-				hw_perfmon_owner = TASK_NULL;
-				retval = KERN_SUCCESS;
-			}
-		} else {
-#ifdef HWPERFMON_DEBUG
-			kprintf("perfmon_release_facility - RELEASED: user task was owner\n");
-#endif
-			hw_perfmon_owner = TASK_NULL;
-			retval = KERN_SUCCESS;
-		}
-	}
-
-	simple_unlock(&hw_perfmon_lock);
-	return retval;
-}
-
-static int
-perfmon_enable(thread_t thread)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-	int curPMC;
-  
-	if(thread->machine.specFlags & perfMonitor) {
-		return KERN_SUCCESS; /* already enabled */
-	} else if(perfmon_acquire_facility(kernel_task)!=KERN_SUCCESS) {
-		return KERN_RESOURCE_SHORTAGE; /* facility is in use */
-	} else { /* kernel_task owns the faciltity and this thread has not yet been counted */
-		simple_lock(&hw_perfmon_lock);
-		hw_perfmon_thread_count++;
-		simple_unlock(&hw_perfmon_lock);
-	}
-
-	sv->save_mmcr1 = 0;
-	sv->save_mmcr2 = 0;
-	
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-		
-				mmcr0_reg.value = 0;
-				mmcr0_reg.field.disable_counters_always = TRUE;
-				mmcr0_reg.field.disable_counters_supervisor = TRUE; /* no choice */
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-		
-				mmcr0_reg.value = 0;
-				mmcr0_reg.field.disable_counters_always = TRUE;
-				mmcr0_reg.field.disable_counters_supervisor = TRUE; /* no choice */
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-  
-	if(retval==KERN_SUCCESS) {
-		for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-			sv->save_pmc[curPMC] = 0;
-			thread->machine.pmcovfl[curPMC] = 0;
-		}
-		thread->machine.perfmonFlags = 0;
-		thread->machine.specFlags |= perfMonitor; /* enable perf monitor facility for this thread */
-		if(thread==current_thread()) {
-			getPerProc()->spcFlags |= perfMonitor; /* update per_proc */
-		}
-	}
-
-#ifdef HWPERFMON_DEBUG  
-	kprintf("perfmon_enable - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif  
-
-	return retval;
-}
-
-int perfmon_disable(thread_t thread)
-{
-	struct savearea *sv = thread->machine.pcb;
-	int curPMC;
-  
-	if(!(thread->machine.specFlags & perfMonitor)) {
-		return KERN_NO_ACCESS; /* not enabled */
-	} else {
-		simple_lock(&hw_perfmon_lock);
-		hw_perfmon_thread_count--;
-		simple_unlock(&hw_perfmon_lock);
-		perfmon_release_facility(kernel_task); /* will release if hw_perfmon_thread_count is 0 */
-	}
-  
-	thread->machine.specFlags &= ~perfMonitor; /* disable perf monitor facility for this thread */
-	if(thread==current_thread()) {
-		PerProcTable[cpu_number()].ppe_vaddr->spcFlags &= ~perfMonitor; /* update per_proc */
-	}
-	sv->save_mmcr0 = 0;
-	sv->save_mmcr1 = 0;
-	sv->save_mmcr2 = 0;
-  
-	for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-		sv->save_pmc[curPMC] = 0;
-		thread->machine.pmcovfl[curPMC] = 0;
-		thread->machine.perfmonFlags = 0;
-	}
-  
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_disable - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif  
-
-	return KERN_SUCCESS;
-}
-
-static int
-perfmon_clear_counters(thread_t thread)
-{
-	struct savearea *sv = thread->machine.pcb;
-	int curPMC;
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_clear_counters (CPU%d)\n", cpu_number());
-#endif  
-
-	/* clear thread copy */
-	for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-		sv->save_pmc[curPMC] = 0;
-		thread->machine.pmcovfl[curPMC] = 0;
-	}
-  
-	return KERN_SUCCESS;
-}
-
-static int
-perfmon_write_counters(thread_t thread, uint64_t *pmcs)
-{
-	struct savearea *sv = thread->machine.pcb;
-	int curPMC;
-  
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_write_counters (CPU%d): mmcr0 = %016llX, pmc1=%llX pmc2=%llX pmc3=%llX pmc4=%llX pmc5=%llX pmc6=%llX pmc7=%llX pmc8=%llX\n", cpu_number(), sv->save_mmcr0, pmcs[PMC_1], pmcs[PMC_2], pmcs[PMC_3], pmcs[PMC_4], pmcs[PMC_5], pmcs[PMC_6], pmcs[PMC_7], pmcs[PMC_8]);
-#endif  
-
-	/* update thread copy */
-	for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-		sv->save_pmc[curPMC] = pmcs[curPMC] & 0x7FFFFFFF;
-		thread->machine.pmcovfl[curPMC] = (pmcs[curPMC]>>31) & 0xFFFFFFFF;
-	}
-  
-	return KERN_SUCCESS;
-}
-
-static int
-perfmon_read_counters(thread_t thread, uint64_t *pmcs)
-{
-	struct savearea *sv = thread->machine.pcb;
-	int curPMC;
-  
-	/* retrieve from thread copy */
-	for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-		pmcs[curPMC] = thread->machine.pmcovfl[curPMC]; 
-		pmcs[curPMC] = pmcs[curPMC]<<31;
-		pmcs[curPMC] |= (sv->save_pmc[curPMC] & 0x7FFFFFFF);
-	}
-
-	/* zero any unused counters on this platform */
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			pmcs[PMC_7] = 0;
-			pmcs[PMC_8] = 0;
-			break;
-		default:
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_read_counters (CPU%d): mmcr0 = %016llX pmc1=%llX pmc2=%llX pmc3=%llX pmc4=%llX pmc5=%llX pmc6=%llX pmc7=%llX pmc8=%llX\n", cpu_number(), sv->save_mmcr0, pmcs[PMC_1], pmcs[PMC_2], pmcs[PMC_3], pmcs[PMC_4], pmcs[PMC_5], pmcs[PMC_6], pmcs[PMC_7], pmcs[PMC_8]);
-#endif  
-
-	return KERN_SUCCESS;
-}
-
-static int
-perfmon_start_counters(thread_t thread)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr0_reg.field.disable_counters_always = FALSE;
-				/* XXXXX PMI broken on 750, 750CX, 750FX, 7400 and 7410 v1.2 and earlier XXXXX */
-				mmcr0_reg.field.on_pmi_stop_counting = FALSE;
-				mmcr0_reg.field.enable_pmi = FALSE; 
-				mmcr0_reg.field.enable_pmi_on_pmc1 = FALSE;
-				mmcr0_reg.field.enable_pmi_on_pmcn = FALSE;
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr0_reg.field.disable_counters_always = FALSE;
-				mmcr0_reg.field.on_pmi_stop_counting = TRUE;
-				mmcr0_reg.field.enable_pmi = TRUE;
-				mmcr0_reg.field.enable_pmi_on_pmc1 = TRUE;
-				mmcr0_reg.field.enable_pmi_on_pmcn = TRUE;
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr0_reg.field.disable_counters_always = FALSE;
-				mmcr0_reg.field.on_pmi_stop_counting = TRUE;
-				mmcr0_reg.field.enable_pmi = TRUE;
-				mmcr0_reg.field.enable_pmi_on_pmc1 = TRUE;
-				mmcr0_reg.field.enable_pmi_on_pmcn = TRUE;
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_start_counters (CPU%d) - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", cpu_number(), sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
-
-	return retval;
-}
-
-static int
-perfmon_stop_counters(thread_t thread)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr0_reg.field.disable_counters_always = TRUE;
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr0_reg.field.disable_counters_always = TRUE;
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_stop_counters (CPU%d) - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", cpu_number(), sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
-
-	return retval;
-}
-
-static int
-perfmon_set_event(thread_t thread, int pmc, int event)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_set_event b4 (CPU%d) - pmc=%d, event=%d - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", cpu_number(), pmc, event, sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
- 
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				ppc32_mmcr1_reg_t mmcr1_reg;
-		
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr1_reg.value = sv->save_mmcr1;
-		
-				switch(pmc) {
-					case PMC_1:
-						mmcr0_reg.field.pmc1_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_2:
-						mmcr0_reg.field.pmc2_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_3:
-						mmcr1_reg.field.pmc3_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_4:
-						mmcr1_reg.field.pmc4_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					default:
-						retval = KERN_FAILURE;
-						break;
-				}
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				ppc32_mmcr1_reg_t mmcr1_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr1_reg.value = sv->save_mmcr1;
- 
-				switch(pmc) {
-					case PMC_1:
-						mmcr0_reg.field.pmc1_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_2:
-						mmcr0_reg.field.pmc2_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_3:
-						mmcr1_reg.field.pmc3_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_4:
-						mmcr1_reg.field.pmc4_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_5:
-						mmcr1_reg.field.pmc5_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_6:
-						mmcr1_reg.field.pmc6_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					default:
-						retval = KERN_FAILURE;
-						break;
-				}
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-				ppc64_mmcr1_reg_t mmcr1_reg;
-	  
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr1_reg.value = sv->save_mmcr1;
-	  
-				switch(pmc) {
-					case PMC_1:
-						mmcr0_reg.field.pmc1_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_2:
-						mmcr0_reg.field.pmc2_event = event;
-						sv->save_mmcr0 = mmcr0_reg.value;
-						break;
-					case PMC_3:
-						mmcr1_reg.field.pmc3_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_4:
-						mmcr1_reg.field.pmc4_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_5:
-						mmcr1_reg.field.pmc5_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_6:
-						mmcr1_reg.field.pmc6_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_7:
-						mmcr1_reg.field.pmc7_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					case PMC_8:
-						mmcr1_reg.field.pmc8_event = event;
-						sv->save_mmcr1 = mmcr1_reg.value;
-						break;
-					default:
-						retval = KERN_FAILURE;
-						break;
-				}
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_set_event (CPU%d) - pmc=%d, event=%d - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", cpu_number(), pmc, event, sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
-
-	return retval;
-}
-
-static int
-perfmon_set_event_func(thread_t thread, uint32_t f)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_set_event_func - func=%s\n", 
-		   f==PPC_PERFMON_FUNC_FPU ? "FUNC" :
-		   f==PPC_PERFMON_FUNC_ISU ? "ISU" :
-		   f==PPC_PERFMON_FUNC_IFU ? "IFU" :
-		   f==PPC_PERFMON_FUNC_VMX ? "VMX" :
-		   f==PPC_PERFMON_FUNC_IDU ? "IDU" :
-		   f==PPC_PERFMON_FUNC_GPS ? "GPS" :
-		   f==PPC_PERFMON_FUNC_LSU0 ? "LSU0" :
-		   f==PPC_PERFMON_FUNC_LSU1A ? "LSU1A" :
-		   f==PPC_PERFMON_FUNC_LSU1B ? "LSU1B" :
-		   f==PPC_PERFMON_FUNC_SPECA ? "SPECA" :
-		   f==PPC_PERFMON_FUNC_SPECB ? "SPECB" :
-		   f==PPC_PERFMON_FUNC_SPECC ? "SPECC" :
-		   "UNKNOWN");
-#endif /* HWPERFMON_DEBUG */
-
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			retval = KERN_FAILURE; /* event functional unit only applies to 970 */
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr1_reg_t mmcr1_reg;
-				ppc_func_unit_t func_unit;
-
-				func_unit.value = f;
-				mmcr1_reg.value = sv->save_mmcr1;
-
-				mmcr1_reg.field.ttm0_select = func_unit.field.TTM0SEL;
-				mmcr1_reg.field.ttm1_select = func_unit.field.TTM1SEL;
-				mmcr1_reg.field.ttm2_select = 0; /* not used */
-				mmcr1_reg.field.ttm3_select = func_unit.field.TTM3SEL;
-				mmcr1_reg.field.speculative_event = func_unit.field.SPECSEL;
-				mmcr1_reg.field.lane0_select = func_unit.field.TD_CP_DBGxSEL;
-				mmcr1_reg.field.lane1_select = func_unit.field.TD_CP_DBGxSEL;
-				mmcr1_reg.field.lane2_select = func_unit.field.TD_CP_DBGxSEL;
-				mmcr1_reg.field.lane3_select = func_unit.field.TD_CP_DBGxSEL;
-
-				sv->save_mmcr1 = mmcr1_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-	return retval;
-}
-
-static int
-perfmon_set_threshold(thread_t thread, int threshold)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-
-				if(threshold>63) { /* no multiplier on 750 */
-					int newThreshold = 63;
-#ifdef HWPERFMON_DEBUG
-					kprintf("perfmon_set_threshold - WARNING: supplied threshold (%d) exceeds max threshold value - clamping to %d\n", threshold, newThreshold);
-#endif
-					threshold = newThreshold;
-				}
-				mmcr0_reg.field.threshold_value = threshold;
-
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-				ppc32_mmcr2_reg_t mmcr2_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-				mmcr2_reg.value = sv->save_mmcr2;
-
-				if(threshold<=(2*63)) { /* 2x multiplier */
-					if(threshold%2 != 0) {
-						int newThreshold = 2*(threshold/2);
-#ifdef HWPERFMON_DEBUG
-						kprintf("perfmon_set_threshold - WARNING: supplied threshold (%d) is not evenly divisible by 2x multiplier - using threshold of %d instead\n", threshold, newThreshold);
-#endif
-						threshold = newThreshold;
-					}
-					mmcr2_reg.field.threshold_multiplier = 0;
-				} else if(threshold<=(32*63)) { /* 32x multiplier */
-					if(threshold%32 != 0) {
-						int newThreshold = 32*(threshold/32);
-#ifdef HWPERFMON_DEBUG
-						kprintf("perfmon_set_threshold - WARNING: supplied threshold (%d) is not evenly divisible by 32x multiplier - using threshold of %d instead\n", threshold, newThreshold);
-#endif
-						threshold = newThreshold;
-					}
-					mmcr2_reg.field.threshold_multiplier = 1;
-				} else {
-					int newThreshold = 32*63;
-#ifdef HWPERFMON_DEBUG
-					kprintf("perfmon_set_threshold - WARNING: supplied threshold (%d) exceeds max threshold value - clamping to %d\n", threshold, newThreshold);
-#endif
-					threshold = newThreshold;
-					mmcr2_reg.field.threshold_multiplier = 1;
-				}
-				mmcr0_reg.field.threshold_value = threshold;
-
-				sv->save_mmcr0 = mmcr0_reg.value;
-				sv->save_mmcr2 = mmcr2_reg.value;
-
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-
-				if(threshold>63) { /* multiplier is in HID1 on 970 - not context switching HID1 so always 1x */
-					int newThreshold = 63;
-#ifdef HWPERFMON_DEBUG
-					kprintf("perfmon_set_threshold - WARNING: supplied threshold (%d) exceeds max threshold value - clamping to %d\n", threshold, newThreshold);
-#endif
-					threshold = newThreshold;
-				}
-				mmcr0_reg.field.threshold_value = threshold;
-
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_set_threshold - threshold=%d - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", threshold, sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
-
-	return retval;
-}
-
-static int
-perfmon_set_tbsel(thread_t thread, int tbsel)
-{
-	struct savearea *sv = thread->machine.pcb;
-	kern_return_t retval = KERN_SUCCESS;
-
-	switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-		case CPU_SUBTYPE_POWERPC_750:
-		case CPU_SUBTYPE_POWERPC_7400:
-		case CPU_SUBTYPE_POWERPC_7450:
-			{
-				ppc32_mmcr0_reg_t mmcr0_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-				switch(tbsel) {
-					case 0x0:
-					case 0x1:
-					case 0x2:
-					case 0x3:
-						mmcr0_reg.field.timebase_bit_selector = tbsel;
-						break;
-					default:
-						retval = KERN_FAILURE;
-				}
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		case CPU_SUBTYPE_POWERPC_970:
-			{
-				ppc64_mmcr0_reg_t mmcr0_reg;
-
-				mmcr0_reg.value = sv->save_mmcr0;
-				switch(tbsel) {
-					case 0x0:
-					case 0x1:
-					case 0x2:
-					case 0x3:
-						mmcr0_reg.field.timebase_bit_selector = tbsel;
-						break;
-					default:
-						retval = KERN_FAILURE;
-				}
-				sv->save_mmcr0 = mmcr0_reg.value;
-			}
-			break;
-		default:
-			retval = KERN_FAILURE;
-			break;
-	}
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_set_tbsel - tbsel=%d - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", tbsel, sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2);
-#endif
-
-	return retval;
-}
-
-int perfmon_control(struct savearea *ssp)
-{
-	mach_port_name_t thr_port = CAST_DOWN(mach_port_name_t, ssp->save_r3); 
-	int action = (int)ssp->save_r4;
-	int pmc = (int)ssp->save_r5;
-	int val = (int)ssp->save_r6;
-	uint64_t *usr_pmcs_p = CAST_DOWN(uint64_t *, ssp->save_r7);
-	thread_t thread = THREAD_NULL;
-	uint64_t kern_pmcs[MAX_CPUPMC_COUNT];
-	kern_return_t retval = KERN_SUCCESS;
-	int error;  
-	boolean_t oldlevel;
-
-	thread = port_name_to_thread(thr_port); // convert user space thread port name to a thread_t
-	if(!thread) {
-		ssp->save_r3 = KERN_INVALID_ARGUMENT;
-		return 1;  /* Return and check for ASTs... */
-	}
-
-	if(thread!=current_thread()) {
-		thread_suspend(thread);
-	}
-
-#ifdef HWPERFMON_DEBUG
-	//  kprintf("perfmon_control: action=0x%x pmc=%d val=%d pmcs=0x%x\n", action, pmc, val, usr_pmcs_p);
-#endif  
-
-	oldlevel = ml_set_interrupts_enabled(FALSE);
-  
-	/* individual actions which do not require perfmon facility to be enabled */
-	if(action==PPC_PERFMON_DISABLE) {
-		retval = perfmon_disable(thread);
-	}
-	else if(action==PPC_PERFMON_ENABLE) {
-		retval = perfmon_enable(thread);
-	}
-  
-	else { /* individual actions which do require perfmon facility to be enabled */
-		if(!(thread->machine.specFlags & perfMonitor)) { /* perfmon not enabled */
-#ifdef HWPERFMON_DEBUG
-			kprintf("perfmon_control: ERROR - perfmon not enabled for this thread\n");
-#endif
-			retval = KERN_NO_ACCESS;
-			goto perfmon_return;
-		}
-	
-		if(action==PPC_PERFMON_SET_EVENT) {
-			retval = perfmon_set_event(thread, pmc, val);
-		}
-		else if(action==PPC_PERFMON_SET_THRESHOLD) {
-			retval = perfmon_set_threshold(thread, val);
-		}
-		else if(action==PPC_PERFMON_SET_TBSEL) {
-			retval = perfmon_set_tbsel(thread, val);
-		}
-		else if(action==PPC_PERFMON_SET_EVENT_FUNC) {
-			retval = perfmon_set_event_func(thread, val);
-		}
-		else if(action==PPC_PERFMON_ENABLE_PMI_BRKPT) {
-			if(val) {
-				thread->machine.perfmonFlags |= PERFMONFLAG_BREAKPOINT_FOR_PMI;
-			} else {
-				thread->machine.perfmonFlags &= ~PERFMONFLAG_BREAKPOINT_FOR_PMI;
-			}
-			retval = KERN_SUCCESS;
-		}
-	
-		/* combinable actions */
-		else {
-			if(action & PPC_PERFMON_STOP_COUNTERS) {
-				error = perfmon_stop_counters(thread);
-				if(error!=KERN_SUCCESS) {
-					retval = error;
-					goto perfmon_return;
-				}
-			}
-			if(action & PPC_PERFMON_CLEAR_COUNTERS) {
-				error = perfmon_clear_counters(thread);
-				if(error!=KERN_SUCCESS) {
-					retval = error;
-					goto perfmon_return;
-				}
-			}
-			if(action & PPC_PERFMON_WRITE_COUNTERS) {
-				if((error = copyin(CAST_USER_ADDR_T(usr_pmcs_p), (void *)kern_pmcs, MAX_CPUPMC_COUNT*sizeof(uint64_t)))) {
-					retval = error;
-					goto perfmon_return;
-				}
-				error = perfmon_write_counters(thread, kern_pmcs);
-				if(error!=KERN_SUCCESS) {
-					retval = error;
-					goto perfmon_return;
-				}
-			}
-			if(action & PPC_PERFMON_READ_COUNTERS) {
-				error = perfmon_read_counters(thread, kern_pmcs);
-				if(error!=KERN_SUCCESS) {
-					retval = error;
-					goto perfmon_return;
-				}
-				if((error = copyout((void *)kern_pmcs, CAST_USER_ADDR_T(usr_pmcs_p), MAX_CPUPMC_COUNT*sizeof(uint64_t)))) {
-					retval = error;
-					goto perfmon_return;
-				}
-			}
-			if(action & PPC_PERFMON_START_COUNTERS) {
-				error = perfmon_start_counters(thread);
-				if(error!=KERN_SUCCESS) {
-					retval = error;
-					goto perfmon_return;
-				}
-			}
-		}
-	}
-  
- perfmon_return:
-	ml_set_interrupts_enabled(oldlevel);
-
-#ifdef HWPERFMON_DEBUG
-	kprintf("perfmon_control (CPU%d): mmcr0 = %016llX, pmc1=%X pmc2=%X pmc3=%X pmc4=%X pmc5=%X pmc6=%X pmc7=%X pmc8=%X\n", cpu_number(), ssp->save_mmcr0, ssp->save_pmc[PMC_1], ssp->save_pmc[PMC_2], ssp->save_pmc[PMC_3], ssp->save_pmc[PMC_4], ssp->save_pmc[PMC_5], ssp->save_pmc[PMC_6], ssp->save_pmc[PMC_7], ssp->save_pmc[PMC_8]);
-#endif  
- 
-	if(thread!=current_thread()) {
-		thread_resume(thread);
-	}
-
-#ifdef HWPERFMON_DEBUG
-	if(retval!=KERN_SUCCESS) {
-		kprintf("perfmon_control - ERROR: retval=%d\n", retval);
-	}
-#endif /* HWPERFMON_DEBUG */
-
-	ssp->save_r3 = retval;
-	return 1;  /* Return and check for ASTs... */
-}
-
-int perfmon_handle_pmi(struct savearea *ssp)
-{
-	int curPMC;
-	kern_return_t retval = KERN_SUCCESS;
-	thread_t thread = current_thread();
-
-#ifdef HWPERFMON_DEBUG
-		kprintf("perfmon_handle_pmi: got rupt\n");
-#endif
-
-	if(!(thread->machine.specFlags & perfMonitor)) { /* perfmon not enabled */
-#ifdef HWPERFMON_DEBUG
-		kprintf("perfmon_handle_pmi: ERROR - perfmon not enabled for this thread\n");
-#endif
-		return KERN_FAILURE;
-	}
-  
-	for(curPMC=0; curPMC<MAX_CPUPMC_COUNT; curPMC++) {
-		if(thread->machine.pcb->save_pmc[curPMC] & 0x80000000) {
-			if(thread->machine.pmcovfl[curPMC]==0xFFFFFFFF && (thread->machine.perfmonFlags & PERFMONFLAG_BREAKPOINT_FOR_PMI)) {
-				doexception(EXC_BREAKPOINT, EXC_PPC_PERFMON, (unsigned int)ssp->save_srr0); // pass up a breakpoint exception
-				return KERN_SUCCESS;
-			} else {
-				thread->machine.pmcovfl[curPMC]++;
-				thread->machine.pcb->save_pmc[curPMC] = 0;
-			}
-		}
-	}
-  
-	if(retval==KERN_SUCCESS) {
-		switch(PerProcTable[0].ppe_vaddr->cpu_subtype) {
-			case CPU_SUBTYPE_POWERPC_7450:
-				{
-					ppc32_mmcr0_reg_t mmcr0_reg;
-	
-					mmcr0_reg.value = thread->machine.pcb->save_mmcr0;
-					mmcr0_reg.field.disable_counters_always = FALSE;
-					mmcr0_reg.field.enable_pmi = TRUE;
-					thread->machine.pcb->save_mmcr0 = mmcr0_reg.value;
-				}
-				retval = KERN_SUCCESS;
-				break;
-			case CPU_SUBTYPE_POWERPC_970:
-				{
-					ppc64_mmcr0_reg_t mmcr0_reg;
-	
-					mmcr0_reg.value = thread->machine.pcb->save_mmcr0;
-					mmcr0_reg.field.disable_counters_always = FALSE;
-					mmcr0_reg.field.enable_pmi = TRUE;
-					thread->machine.pcb->save_mmcr0 = mmcr0_reg.value;
-				}
-				retval = KERN_SUCCESS;
-				break;
-			default:
-				retval = KERN_FAILURE;
-				break;
-		}
-	}
-
-	return retval;
-}
diff --git a/osfmk/ppc/hw_perfmon.h b/osfmk/ppc/hw_perfmon.h
deleted file mode 100644
index c091d0b7b..000000000
--- a/osfmk/ppc/hw_perfmon.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _HW_PERFMON_H_
-#define _HW_PERFMON_H_
-
-#ifndef __ppc__
-#error This file is only useful on PowerPC.
-#endif
-
-#define MAX_CPUPMC_COUNT  8
-
-#define PMC_1    0
-#define PMC_2    1
-#define PMC_3    2
-#define PMC_4    3
-#define PMC_5    4
-#define PMC_6    5
-#define PMC_7    6
-#define PMC_8    7
-
-/* these actions can be combined and simultaneously performed with a single call to perfmon_control() */
-typedef enum {
-	PPC_PERFMON_CLEAR_COUNTERS =   0x0002,
-	PPC_PERFMON_START_COUNTERS =   0x0004,
-	PPC_PERFMON_STOP_COUNTERS  =   0x0008,
-	PPC_PERFMON_READ_COUNTERS  =   0x0010,
-	PPC_PERFMON_WRITE_COUNTERS =   0x0020
-} perfmon_multi_action_t;
-
-/* these actions can not be combined and each requires a separate call to perfmon_control() */
-typedef enum {
-	PPC_PERFMON_ENABLE =           0x00010000,
-	PPC_PERFMON_DISABLE =          0x00020000,
-	PPC_PERFMON_SET_EVENT =        0x00030000,
-	PPC_PERFMON_SET_THRESHOLD =    0x00040000,
-	PPC_PERFMON_SET_TBSEL =        0x00050000,
-	PPC_PERFMON_SET_EVENT_FUNC =   0x00060000,
-	PPC_PERFMON_ENABLE_PMI_BRKPT = 0x00070000
-} perfmon_single_action_t;
-
-/* used to select byte lane and speculative events (currently 970 only) */
-typedef enum {                        /* SPECSEL[0:1]  TD_CP_DBGxSEL[0:1]  TTM3SEL[0:1]  TTM1SEL[0:1]  TTM0SEL[0:1] */
-	PPC_PERFMON_FUNC_FPU =         0,   /*           00                  00            00            00            00 */
-	PPC_PERFMON_FUNC_ISU =         1,   /*           00                  00            00            00            01 */
-	PPC_PERFMON_FUNC_IFU =         2,   /*           00                  00            00            00            10 */
-	PPC_PERFMON_FUNC_VMX =         3,   /*           00                  00            00            00            11 */
-	PPC_PERFMON_FUNC_IDU =        64,   /*           00                  01            00            00            00 */
-	PPC_PERFMON_FUNC_GPS =        76,   /*           00                  01            00            11            00 */
-	PPC_PERFMON_FUNC_LSU0 =      128,   /*           00                  10            00            00            00 */
-	PPC_PERFMON_FUNC_LSU1A =     192,   /*           00                  11            00            00            00 */
-	PPC_PERFMON_FUNC_LSU1B =     240,   /*           00                  11            11            00            00 */
-	PPC_PERFMON_FUNC_SPECA =     256,   /*           01                  00            00            00            00 */
-	PPC_PERFMON_FUNC_SPECB =     512,   /*           10                  00            00            00            00 */
-	PPC_PERFMON_FUNC_SPECC =     768,   /*           11                  00            00            00            00 */
-} perfmon_functional_unit_t;
-
-#ifdef MACH_KERNEL_PRIVATE
-int perfmon_acquire_facility(task_t task);
-int perfmon_release_facility(task_t task);
-
-extern int perfmon_disable(thread_t thr_act);
-extern int perfmon_init(void);
-extern int perfmon_control(struct savearea *save);
-extern int perfmon_handle_pmi(struct savearea *ssp);
-
-/* perfmonFlags */
-#define PERFMONFLAG_BREAKPOINT_FOR_PMI     0x1
-
-#endif /* MACH_KERNEL_PRIVATE */
-
-/* 
- * From user space:
- * 
- * int perfmon_control(thread_t thread, perfmon_action_t action, int pmc, u_int32_t val, u_int64_t *pmcs);
- * 
- * r3: thread
- * r4: action
- * r5: pmc
- * r6: event/threshold/tbsel/count
- * r7: pointer to space for PMC counts: uint64_t[MAX_CPUPMC_COUNT]
- *
- * perfmon_control(thread, PPC_PERFMON_CLEAR_COUNTERS, 0, 0, NULL);
- * perfmon_control(thread, PPC_PERFMON_START_COUNTERS, 0, 0, NULL);
- * perfmon_control(thread, PPC_PERFMON_STOP_COUNTERS, 0, 0, NULL);
- * perfmon_control(thread, PPC_PERFMON_READ_COUNTERS, 0, 0, uint64_t *pmcs);
- * perfmon_control(thread, PPC_PERFMON_WRITE_COUNTERS, 0, 0, uint64_t *pmcs);
- * perfmon_control(thread, PPC_PERFMON_ENABLE, 0, 0, NULL);
- * perfmon_control(thread, PPC_PERFMON_DISABLE, 0, 0, NULL);
- * perfmon_control(thread, PPC_PERFMON_SET_EVENT, int pmc, int event, NULL);
- * perfmon_control(thread, PPC_PERFMON_SET_THRESHOLD, 0, int threshold, NULL);
- * perfmon_control(thread, PPC_PERFMON_SET_TBSEL, 0, int tbsel, NULL);
- * perfmon_control(thread, PPC_PERFMON_SET_EVENT_FUNC, 0, perfmon_functional_unit_t func, NULL);
- * perfmon_control(thread, PPC_PERFMON_ENABLE_PMI_BRKPT, 0, boolean_t enable, NULL);
- *
- */
-
-#endif /* _HW_PERFMON_H_ */
diff --git a/osfmk/ppc/hw_perfmon_mmcr.h b/osfmk/ppc/hw_perfmon_mmcr.h
deleted file mode 100644
index 6dd894d94..000000000
--- a/osfmk/ppc/hw_perfmon_mmcr.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
- 
-#ifndef _HW_PERFMON_MMCR_H_
-#define _HW_PERFMON_MMCR_H_
-
-#ifndef __ppc__
-#error This file is only useful on PowerPC.
-#endif
-
-typedef struct {
-	uint32_t disable_counters_always : 1;     /*     0: disable counters */
-	uint32_t disable_counters_supervisor : 1; /*     1: disable counters (supervisor) */
-	uint32_t disable_counters_user : 1;       /*     2: disable counters (user) */
-	uint32_t disable_counters_marked : 1;     /*     3: disable counters (marked bit == 1) */
-	uint32_t disable_counters_unmarked : 1;   /*     4: disable counters (marked bit == 0) */
-	uint32_t enable_pmi : 1;                  /*     5: performance monitor interrupt enable */
-	uint32_t on_pmi_stop_counting : 1;        /*     6: disable counters (pmi) */
-	uint32_t timebase_bit_selector : 2;       /*   7-8: TBL bit for TB events */
-	uint32_t enable_timebase_pmi : 1;         /*     9: enable pmi on TBL bit transition */
-	uint32_t threshold_value : 6;             /* 10-15: threshold value */
-	uint32_t enable_pmi_on_pmc1 : 1;          /*    16: enable pmi on pmc1 overflow */
-	uint32_t enable_pmi_on_pmcn : 1;          /*    17: enable pmi on any pmc except pmc1 overflow */
-	uint32_t enable_pmi_trigger : 1;          /*    18: enable triggering of pmcn by pmc1 overflow */
-	uint32_t pmc1_event : 7;                  /* 19-25: pmc1 event select */
-	uint32_t pmc2_event : 6;                  /* 26-31: pmc2 event select */
-} ppc32_mmcr0_bits_t;
-
-typedef union {
-	uint32_t value;
-	ppc32_mmcr0_bits_t field;
-} ppc32_mmcr0_reg_t;
-
-typedef struct {
-	uint32_t pmc3_event : 5;
-	uint32_t pmc4_event : 5;
-	uint32_t pmc5_event : 5;
-	uint32_t pmc6_event : 6;
-	uint32_t /*reserved*/ : 11;
-} ppc32_mmcr1_bits_t;
-
-typedef union {
-	uint32_t value;
-	ppc32_mmcr1_bits_t field;
-} ppc32_mmcr1_reg_t;
-
-typedef struct {
-	uint32_t threshold_multiplier : 1;
-	uint32_t /*reserved*/ : 31;
-} ppc32_mmcr2_bits_t;
-
-typedef union {
-	uint32_t value;
-	ppc32_mmcr2_bits_t field;
-} ppc32_mmcr2_reg_t;
-
-typedef struct {
-	uint32_t /* reserved */ : 32;             /*  0-31: reserved */
-	uint32_t disable_counters_always : 1;     /*    32: disable counters */
-	uint32_t disable_counters_supervisor : 1; /*    33: disable counters (supervisor) */
-	uint32_t disable_counters_user : 1;       /*    34: disable counters (user) */
-	uint32_t disable_counters_marked : 1;     /*    35: disable counters (marked bit == 1) */
-	uint32_t disable_counters_unmarked : 1;   /*    36: disable counters (marked bit == 0) */
-	uint32_t enable_pmi : 1;                  /*    37: performance monitor interrupt enable */
-	uint32_t on_pmi_stop_counting : 1;        /*    38: disable counters (pmi) */
-	uint32_t timebase_bit_selector : 2;       /* 39-40: TBL bit for timebase events */
-	uint32_t enable_timebase_pmi : 1;         /*    41: enable pmi on TBL bit transition */
-	uint32_t threshold_value : 6;             /* 42-47: threshold value */
-	uint32_t enable_pmi_on_pmc1 : 1;          /*    48: enable pmi on pmc1 overflow */
-	uint32_t enable_pmi_on_pmcn : 1;          /*    49: enable pmi on any pmc except pmc1 overflow */
-	uint32_t enable_pmi_trigger : 1;          /*    50: enable triggering of pmcn by pmc1 overflow */
-	uint32_t pmc1_event : 5;                  /* 51-55: pmc1 event select */
-	uint32_t perfmon_event_occurred : 1;      /*    56: performance monitor event has occurred */
-	uint32_t /* reserved */ : 1;              /*    57: reserved */
-	uint32_t pmc2_event : 5;                  /* 58-62: pmc2 event select */
-	uint32_t disable_counters_hypervisor : 1; /*    63: disable counters (hypervisor) */
-} ppc64_mmcr0_bits_t;
-
-typedef union {
-	uint64_t value;
-	ppc64_mmcr0_bits_t field;
-} ppc64_mmcr0_reg_t;
-
-typedef struct {
-	uint32_t ttm0_select : 2;                 /*   0-1: FPU/ISU/IFU/VMX unit select */
-	uint32_t /* reserved */ : 1;              /*     2: reserved */
-	uint32_t ttm1_select : 2;                 /*   3-4: IDU/ISU/ISU unit select */
-	uint32_t /* reserved */ : 1;              /*     5: reserved */
-	uint32_t ttm2_select : 2;                 /*   6-7: IFU/LSU0 unit select */
-	uint32_t /* reserved */ : 1;              /*     8: reserved */
-	uint32_t ttm3_select : 2;                 /*  9-10: LSU1 select */
-	uint32_t /* reserved */ : 1;              /*    11: reserved */
-	uint32_t lane0_select : 2;                /* 12-13: Byte lane 0 unit select (TD_CP_DBG0SEL) */
-	uint32_t lane1_select : 2;                /* 14-15: Byte lane 1 unit select (TD_CP_DBG1SEL) */
-	uint32_t lane2_select : 2;                /* 16-17: Byte lane 2 unit select (TD_CP_DBG2SEL) */
-	uint32_t lane3_select : 2;                /* 18-19: Byte lane 3 unit select (TD_CP_DBG3SEL) */
-	uint32_t /* reserved */ : 4;              /* 20-23: reserved */
-	uint32_t pmc1_adder_lane_select : 1;      /*    24: PMC1 Event Adder Lane Select (PMC1_ADDER_SELECT) */
-	uint32_t pmc2_adder_lane_select : 1;      /*    25: PMC2 Event Adder Lane Select (PMC2_ADDER_SELECT) */
-	uint32_t pmc6_adder_lane_select : 1;      /*    26: PMC6 Event Adder Lane Select (PMC6_ADDER_SELECT) */
-	uint32_t pmc5_adder_lane_select : 1;      /*    27: PMC5 Event Adder Lane Select (PMC5_ADDER_SELECT) */
-	uint32_t pmc8_adder_lane_select : 1;      /*    28: PMC8 Event Adder Lane Select (PMC8_ADDER_SELECT) */
-	uint32_t pmc7_adder_lane_select : 1;      /*    29: PMC7 Event Adder Lane Select (PMC7_ADDER_SELECT) */
-	uint32_t pmc3_adder_lane_select : 1;      /*    30: PMC3 Event Adder Lane Select (PMC3_ADDER_SELECT) */
-	uint32_t pmc4_adder_lane_select : 1;      /*    31: PMC4 Event Adder Lane Select (PMC4_ADDER_SELECT) */
-	uint32_t pmc3_event : 5;                  /* 32-36: pmc3 event select */
-	uint32_t pmc4_event : 5;                  /* 37-41: pmc4 event select */
-	uint32_t pmc5_event : 5;                  /* 42-46: pmc5 event select */
-	uint32_t pmc6_event : 5;                  /* 47-51: pmc6 event select */
-	uint32_t pmc7_event : 5;                  /* 52-56: pmc7 event select */
-	uint32_t pmc8_event : 5;                  /* 57-61: pmc8 event select */
-	uint32_t speculative_event : 2;           /* 62-63: SPeCulative count event SELector */
-} ppc64_mmcr1_bits_t;
-
-typedef union {
-	uint64_t value;
-	ppc64_mmcr1_bits_t field;
-} ppc64_mmcr1_reg_t;
-
-typedef struct {
-	uint32_t /* reserved */ : 32;             /*  0-31: reserved */
-	uint32_t siar_sdar_same_instruction : 1;  /*    32: SIAR and SDAR are from same instruction */
-	uint32_t disable_counters_pmc1_pmc4 : 1;  /*    33: disable counters PMC1-PMC4 */
-	uint32_t disable_counters_pmc5_pmc8 : 1;  /*    34: disable counters PMC5-PMC8 */
-	uint32_t problem_state_siar : 1;          /*    35: MSR[PR] bit when SIAR set */
-	uint32_t hypervisor_state_siar : 1;       /*    36: MSR[HV] bit when SIAR set */
-	uint32_t /* reserved */ : 3;              /* 37-39: reserved */
-	uint32_t threshold_start_event : 3;       /* 40-42: threshold start event */
-	uint32_t threshold_end_event : 3;         /* 43-45: threshold end event */
-	uint32_t /* reserved */ : 3;              /* 46-48: reserved */
-	uint32_t imr_select : 1;                  /*    49: imr select */
-	uint32_t imr_mark : 2;                    /* 50-51: imr mark */
-	uint32_t imr_mask : 4;                    /* 52-55: imr mask */
-	uint32_t imr_match : 4;                   /* 56-59: imr match */
-	uint32_t disable_counters_tags_inactive : 1; /* 60: disable counters in tags inactive mode */
-	uint32_t disable_counters_tags_active : 1; /*   61: disable counters in tags active mode */
-	uint32_t disable_counters_wait_state : 1; /*    62: freeze counters in wait state (CNTL[31]=0) */
-	uint32_t sample_enable : 1;               /*    63: sampling enabled */
-} ppc64_mmcra_bits_t;
-
-typedef union {
-	uint64_t value;
-	ppc64_mmcra_bits_t field;
-} ppc64_mmcra_reg_t;
-
-/* PPC_PERFMON_FUNC_* values are taken apart to fill in the appropriate configuration bitfields: */
-typedef struct {
-	uint32_t /* reserved */ : 22;
-	uint32_t SPECSEL : 2;
-	uint32_t TD_CP_DBGxSEL : 2;
-	uint32_t TTM3SEL : 2;
-	uint32_t TTM1SEL : 2;
-	uint32_t TTM0SEL : 2;
-} ppc_func_bits_t;
-
-typedef union {
-	uint32_t value;
-	ppc_func_bits_t field;
-} ppc_func_unit_t;
-
-#endif /* _HW_PERFMON_MMCR_H_ */
diff --git a/osfmk/ppc/hw_vm.s b/osfmk/ppc/hw_vm.s
deleted file mode 100644
index bcad7dad2..000000000
--- a/osfmk/ppc/hw_vm.s
+++ /dev/null
@@ -1,8794 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <assym.s>
-#include <debug.h>
-#include <db_machine_commands.h>
-#include <mach_rt.h>
-	
-#include <mach_debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/Performance.h>
-#include <ppc/exception.h>
-#include <mach/ppc/vm_param.h>
-	
-			.text
-
-;
-;                                     0        0        1        2        3        4        4        5      6
-;                                     0        8        6        4        2        0        8        6      3 
-;                                    +--------+--------+--------+--------+--------+--------+--------+--------+
-;                                    |00000000|00000SSS|SSSSSSSS|SSSSSSSS|SSSSPPPP|PPPPPPPP|PPPPxxxx|xxxxxxxx|          - EA
-;                                    +--------+--------+--------+--------+--------+--------+--------+--------+
-;
-;                                                       0        0        1
-;                                                       0        8        6      
-;                                                      +--------+--------+--------+
-;                                                      |//////BB|BBBBBBBB|BBBB////|                                     - SID - base
-;                                                      +--------+--------+--------+
-;
-;                                     0        0        1
-;                                     0        8        6      
-;                                    +--------+--------+--------+
-;                                    |////////|11111111|111111//|                                                       - SID - copy 1
-;                                    +--------+--------+--------+
-;
-;                   0        0        1
-;                   0        8        6      
-;                  +--------+--------+--------+
-;                  |////////|//222222|22222222|                                                                         - SID - copy 2
-;                  +--------+--------+--------+
-;
-;          0        0        1
-;          0        8        6      
-;         +--------+--------+--------+
-;         |//////33|33333333|33//////|                                                                                  - SID - copy 3 - not needed
-;         +--------+--------+--------+                                                                                         for 65 bit VPN
-;
-;                   0        0        1        2        3        4        4  5   5  
-;                   0        8        6        4        2        0        8  1   5  
-;                  +--------+--------+--------+--------+--------+--------+--------+
-;                  |00000000|00000002|22222222|11111111|111111BB|BBBBBBBB|BBBB////|                                     - SID Hash - this is all
-;                  +--------+--------+--------+--------+--------+--------+--------+                                           SID copies ORed
-;                   0        0        1        2        3        4        4  5   5  
-;                   0        8        6        4        2        0        8  1   5  
-;                  +--------+--------+--------+--------+--------+--------+--------+
-;                  |00000000|0000000S|SSSSSSSS|SSSSSSSS|SSSSSS00|00000000|0000////|                                      - Shifted high order EA
-;                  +--------+--------+--------+--------+--------+--------+--------+                                           left shifted "segment"
-;                                                                                                                             part of EA to make
-;                                                                                                                             room for SID base
-;
-;
-;                   0        0        1        2        3        4        4  5   5  
-;                   0        8        6        4        2        0        8  1   5  
-;                  +--------+--------+--------+--------+--------+--------+--------+
-;                  |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVV////|                                     - VSID - SID Hash XORed
-;                  +--------+--------+--------+--------+--------+--------+--------+                                            with shifted EA
-;
-;                   0        0        1        2        3        4        4        5        6        7      7
-;                   0        8        6        4        2        0        8        6        4        2      9
-;                  +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-;                  |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVPPPP|PPPPPPPP|PPPPxxxx|xxxxxxxx|          - VPN
-;                  +--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-;
-
-
-/*			addr64_t hw_add_map(struct pmap *pmap, struct mapping *mp) - Adds a mapping
- *
- *			Maps a page or block into a pmap
- *
- *			Returns 0 if add worked or the vaddr of the first overlap if not
- *
- * Make mapping - not block or I/O - note: this is low-level, upper should remove duplicates
- *  
- *  1) bump mapping busy count
- *  2) lock pmap share
- *  3) find mapping full path - finds all possible list previous elements
- *  4) upgrade pmap to exclusive
- *  5) add mapping to search list
- *  6) find physent
- *  7) lock physent
- *  8) add to physent
- *  9) unlock physent
- * 10) unlock pmap
- * 11) drop mapping busy count
- * 
- * 
- * Make mapping - block or I/O - note: this is low-level, upper should remove duplicates
- *  
- *  1) bump mapping busy count
- *  2) lock pmap share
- *  3) find mapping full path - finds all possible list previous elements
- *  4) upgrade pmap to exclusive
- *  5) add mapping to search list
- *  6) unlock pmap
- *  7) drop mapping busy count
- * 
- */
-
-			.align	5
-			.globl	EXT(hw_add_map)
-
-LEXT(hw_add_map)
- 			
- 			stwu	r1,-(FM_ALIGN((31-17+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r17,FM_ARG0+0x00(r1)		; Save a register
-			stw		r18,FM_ARG0+0x04(r1)		; Save a register
-			stw		r19,FM_ARG0+0x08(r1)		; Save a register
- 			mfsprg	r19,2						; Get feature flags 
-			stw		r20,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r21,FM_ARG0+0x10(r1)		; Save a register
-			mtcrf	0x02,r19					; move pf64Bit cr6
-			stw		r22,FM_ARG0+0x14(r1)		; Save a register
-			stw		r23,FM_ARG0+0x18(r1)		; Save a register
-			stw		r24,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r25,FM_ARG0+0x20(r1)		; Save a register
-			stw		r26,FM_ARG0+0x24(r1)		; Save a register
-			stw		r27,FM_ARG0+0x28(r1)		; Save a register
-			stw		r28,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r29,FM_ARG0+0x30(r1)		; Save a register
-			stw		r30,FM_ARG0+0x34(r1)		; Save a register
-			stw		r31,FM_ARG0+0x38(r1)		; Save a register
-			stw		r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active?
-			bne		hamPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
-			rlwinm	r11,r4,0,0,19				; Round down to get mapping block address
-  			mr		r28,r3						; Save the pmap
-  			mr		r31,r4						; Save the mapping
-			bt++	pf64Bitb,hamSF1				; skip if 64-bit (only they take the hint)
-			lwz		r20,pmapvr+4(r3)			; Get conversion mask for pmap
-			lwz		r21,mbvrswap+4(r11)			; Get conversion mask for mapping
-
-			b		hamSF1x						; Done...
-			
-hamSF1:		ld		r20,pmapvr(r3)				; Get conversion mask for pmap
-			ld		r21,mbvrswap(r11)			; Get conversion mask for mapping
-
-hamSF1x:	bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-			
-			mr		r17,r11						; Save the MSR
-			xor		r28,r28,r20					; Convert the pmap to physical addressing
-			xor		r31,r31,r21					; Convert the mapping to physical addressing
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			lwz		r24,mpFlags(r31)			; Pick up the flags
-			bne--	hamBadLock					; Nope...
-
-			li		r21,0						; Remember that we have the shared lock
-			
-;
-;			Note that we do a full search (i.e., no shortcut level skips, etc.)
-;			here so that we will know the previous elements so we can dequeue them
-;			later.
-;
-
-hamRescan:	lwz		r4,mpVAddr(r31)				; Get the new vaddr top half
-			lwz		r5,mpVAddr+4(r31)			; Get the new vaddr bottom half
-			mr		r3,r28						; Pass in pmap to search
-			lhz		r23,mpBSize(r31)			; Get the block size for later
-			mr		r29,r4						; Save top half of vaddr for later
-			mr		r30,r5						; Save bottom half of vaddr for later
-			
-			bl		EXT(mapSearchFull)			; Go see if we can find it
-			
-			li		r22,lo16(0x800C)			; Get 0xFFFF800C
-			rlwinm	r0,r24,mpBSub+1,31,31		; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r23,r23,1					; Get actual length
-			rlwnm	r22,r22,r0,27,31			; Rotate to get 12 or 25
-			lis		r0,0x8000					; Get 0xFFFFFFFF80000000
-			slw		r9,r23,r22					; Isolate the low part
-			rlwnm	r22,r23,r22,22,31			; Extract the high order
-			addic	r23,r9,-4096				; Get the length to the last page
-			add		r0,r0,r0					; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit
-			addme	r22,r22						; Do high order as well...
-			mr.		r3,r3						; Did we find a mapping here?
-			or		r0,r30,r0					; Fill high word of 64-bit with 1s so we will properly carry
-			bne--	hamOverlay					; We found a mapping, this is no good, can not double map...
-
-			addc	r9,r0,r23					; Add size to get last page in new range
-			or.		r0,r4,r5					; Are we beyond the end?
-			adde	r8,r29,r22					; Add the rest of the length on
-			rlwinm	r9,r9,0,0,31				; Clean top half of sum
-			beq++	hamFits						; We are at the end...
-
-			cmplw	cr1,r9,r5					; Is the bottom part of our end less?
-			cmplw	r8,r4						; Is our end before the next (top part)
-			crand	cr0_eq,cr0_eq,cr1_lt		; Is the second half less and the first half equal?
-			cror	cr0_eq,cr0_eq,cr0_lt		; Or is the top half less
-			
-			bf--	cr0_eq,hamOverlay			; No, we do fit, there is an overlay...
-			
-;
-;			Here we try to convert to an exclusive lock.  This will fail if someone else
-;			has it shared.
-;
-hamFits:	mr.		r21,r21						; Do we already have the exclusive lock?			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			
-			bne--	hamGotX						; We already have the exclusive...
-			
-			bl		sxlkPromote					; Try to promote shared to exclusive
-			mr.		r3,r3						; Could we?
-			beq++	hamGotX						; Yeah...
-			
-;
-;			Since we could not promote our lock, we need to convert to it.
-;			That means that we drop the shared lock and wait to get it
-;			exclusive.  Since we release the lock, we need to do the look up
-;			again.
-;			
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkConvert					; Convert shared to exclusive
-			mr.		r3,r3						; Could we?
-			bne--	hamBadLock					; Nope, we must have timed out...
-			
-			li		r21,1						; Remember that we have the exclusive lock
-			b		hamRescan					; Go look again...
-			
-			.align	5
-
-hamGotX:	mr		r3,r28						; Get the pmap to insert into
-			mr		r4,r31						; Point to the mapping
-			bl		EXT(mapInsert)				; Insert the mapping into the list
-
-			rlwinm	r11,r24,mpPcfgb+2,mpPcfg>>6	; Get the index into the page config table
-			lhz		r8,mpSpace(r31)				; Get the address space
-			lwz		r11,lgpPcfg(r11)			; Get the page config
-			mfsdr1	r7							; Get the hash table base/bounds
-
-			lwz		r4,pmapResidentCnt(r28)		; Get the mapped page count 
-			lwz	r12,pmapResidentMax(r28)		; r12 = pmap->stats.resident_max
-			addi	r4,r4,1						; Bump up the mapped page count
-			stw		r4,pmapResidentCnt(r28)		; Set the mapped page count
-			cmplw	r12,r4					; if pmap->stats.resident_max >= pmap->stats.resident_count
-			bge+	hamSkipMax				;	goto hamSkipResMax
-			stw	r4,pmapResidentMax(r28)			; pmap->stats.resident_max = pmap->stats.resident_count
-			
-hamSkipMax:		andi.	r0,r24,mpType				; Is this a normal mapping?
-
-			rlwimi	r8,r8,14,4,17				; Double address space
-			rlwinm	r9,r30,0,4,31				; Clear segment
-			rlwinm	r10,r30,18,14,17			; Shift EA[32:35] down to correct spot in VSID (actually shift up 14)
-			rlwimi	r8,r8,28,0,3				; Get the last nybble of the hash
-			rlwimi	r10,r29,18,0,13				; Shift EA[18:31] down to VSID (31-bit math works because of max hash table size)			
-			rlwinm	r7,r7,0,16,31				; Isolate length mask (or count)
-			srw		r9,r9,r11					; Isolate just the page index
-			xor		r10,r10,r8					; Calculate the low 32 bits of the VSID
-
-			xor		r9,r9,r10					; Get the hash to the PTEG
-			
-			bne--	hamDoneNP					; Not a normal mapping, therefore, no physent...
-			
-			bl		mapPhysFindLock				; Go find and lock the physent
-			
-			bt++	pf64Bitb,ham64				; This is 64-bit...
-			
-			lwz		r11,ppLink+4(r3)			; Get the alias chain pointer
-			rlwinm	r7,r7,16,0,15				; Get the PTEG wrap size
-			slwi	r9,r9,6						; Make PTEG offset
-			ori		r7,r7,0xFFC0				; Stick in the bottom part
-			rlwinm	r12,r11,0,~ppFlags			; Clean it up
-			and		r9,r9,r7					; Wrap offset into table
-			mr		r4,r31						; Set the link to install
-			stw		r9,mpPte(r31)				; Point the mapping at the PTEG (exact offset is invalid)
-			stw		r12,mpAlias+4(r31)			; Move to the mapping
-			bl		mapPhyCSet32				; Install the link
-			b		hamDone						; Go finish up...
-			
-			.align	5
-			
-ham64:		li		r0,ppLFAmask				; Get mask to clean up alias pointer
-			subfic	r7,r7,46					; Get number of leading zeros
-			eqv		r4,r4,r4					; Get all ones
-			ld		r11,ppLink(r3)				; Get the alias chain pointer
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			srd		r4,r4,r7					; Get the wrap mask
-			sldi	r9,r9,7						; Change hash to PTEG offset
-			andc	r11,r11,r0					; Clean out the lock and flags
-			and		r9,r9,r4					; Wrap to PTEG
-			mr		r4,r31
-			stw		r9,mpPte(r31)				; Point the mapping at the PTEG (exact offset is invalid)
-			std		r11,mpAlias(r31)			; Set the alias pointer in the mapping
-
-			bl		mapPhyCSet64				; Install the link
-						
-hamDone:	bl		mapPhysUnlock				; Unlock the physent chain
-
-hamDoneNP:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			mr		r3,r31						; Get the mapping pointer
-			bl		mapDropBusy					; Drop the busy count
-			
-			li		r3,0						; Set successful return
-			li		r4,0						; Set successful return
-
-hamReturn:	bt++	pf64Bitb,hamR64				; Yes...
-
-			mtmsr	r17							; Restore enables/translation/etc.
-			isync
-			b		hamReturnC					; Join common...
-
-hamR64:		mtmsrd	r17							; Restore enables/translation/etc.
-			isync								
-			
-hamReturnC:	lwz		r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Get the return
-			lwz		r17,FM_ARG0+0x00(r1)		; Save a register
-			lwz		r18,FM_ARG0+0x04(r1)		; Save a register
-			lwz		r19,FM_ARG0+0x08(r1)		; Save a register
-			lwz		r20,FM_ARG0+0x0C(r1)		; Save a register
-			mtlr	r0							; Restore the return
-			lwz		r21,FM_ARG0+0x10(r1)		; Save a register
-			lwz		r22,FM_ARG0+0x14(r1)		; Save a register
-			lwz		r23,FM_ARG0+0x18(r1)		; Save a register
-			lwz		r24,FM_ARG0+0x1C(r1)		; Save a register
-			lwz		r25,FM_ARG0+0x20(r1)		; Save a register
-			lwz		r26,FM_ARG0+0x24(r1)		; Save a register
-			lwz		r27,FM_ARG0+0x28(r1)		; Save a register
-			lwz		r28,FM_ARG0+0x2C(r1)		; Save a register
-			lwz		r29,FM_ARG0+0x30(r1)		; Save a register
-			lwz		r30,FM_ARG0+0x34(r1)		; Save a register
-			lwz		r31,FM_ARG0+0x38(r1)		; Save a register
-			lwz		r1,0(r1)					; Pop the stack
-			
-			blr									; Leave...
-
-			
-			.align	5
-
-hamOverlay:	lwz		r22,mpFlags(r3)				; Get the overlay flags
-			li		r0,mpC|mpR					; Get a mask to turn off RC bits
-			lwz		r23,mpFlags(r31)			; Get the requested flags
-			lwz		r20,mpVAddr(r3)				; Get the overlay address
-			lwz		r8,mpVAddr(r31)				; Get the requested address
-			lwz		r21,mpVAddr+4(r3)			; Get the overlay address
-			lwz		r9,mpVAddr+4(r31)			; Get the requested address
-			lhz		r10,mpBSize(r3)				; Get the overlay length
-			lhz		r11,mpBSize(r31)			; Get the requested length
-			lwz		r24,mpPAddr(r3)				; Get the overlay physical address
-			lwz		r25,mpPAddr(r31)			; Get the requested physical address
-			andc	r21,r21,r0					; Clear RC bits
-			andc	r9,r9,r0					; Clear RC bits
-
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			rlwinm.	r0,r22,0,mpRIPb,mpRIPb		; Are we in the process of removing this one?
-			mr		r3,r20						; Save the top of the colliding address
-			rlwinm	r4,r21,0,0,19				; Save the bottom of the colliding address
-
-			bne++	hamRemv						; Removing, go say so so we help...
-			
-			cmplw	r20,r8						; High part of vaddr the same?
-			cmplw	cr1,r21,r9					; Low part?
-			crand	cr5_eq,cr0_eq,cr1_eq		; Remember if same
-			
-			cmplw	r10,r11						; Size the same?
-			cmplw	cr1,r24,r25					; Physical address?
-			crand	cr5_eq,cr5_eq,cr0_eq		; Remember
-			crand	cr5_eq,cr5_eq,cr1_eq		; Remember if same
-			
-			xor		r23,r23,r22					; Compare mapping flag words
-			andi.	r23,r23,mpType|mpPerm		; Are mapping types and attributes the same?
-			crand	cr5_eq,cr5_eq,cr0_eq		; Merge in final check
-			bf--	cr5_eq,hamSmash				; This is not the same, so we return a smash...
-			
-			ori		r4,r4,mapRtMapDup			; Set duplicate
-			b		hamReturn					; And leave...
-			
-hamRemv:	ori		r4,r4,mapRtRemove			; We are in the process of removing the collision
-			b		hamReturn					; Come back yall...
-			
-hamSmash:	ori		r4,r4,mapRtSmash			; Tell caller that it has some clean up to do
-			b		hamReturn					; Join common epilog code
-
-			.align	5
-			
-hamBadLock:	li		r3,0						; Set lock time out error code
-			li		r4,mapRtBadLk				; Set lock time out error code
-			b		hamReturn					; Leave....
-
-hamPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-
-
-
-
-/*
- *			mapping *hw_rem_map(pmap, vaddr, addr64_t *next) - remove a mapping from the system.
- *
- *			Upon entry, R3 contains a pointer to a pmap.  Since vaddr is
- *			a 64-bit quantity, it is a long long so it is in R4 and R5.
- *			
- *			We return the virtual address of the removed mapping as a 
- *			R3.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *
- *			Note that this must be done with both interruptions off and VM off
- *	
- *  Remove mapping via pmap, regular page, no pte
- * 
- *  1) lock pmap share
- *  2) find mapping full path - finds all possible list previous elements
- *  4) upgrade pmap to exclusive
- *  3) bump mapping busy count
- *  5) remove mapping from search list
- *  6) unlock pmap
- *  7) lock physent
- *  8) remove from physent
- *  9) unlock physent
- * 10) drop mapping busy count
- * 11) drain mapping busy count
- * 
- * 
- * Remove mapping via pmap, regular page, with pte
- * 
- *  1) lock pmap share
- *  2) find mapping full path - finds all possible list previous elements
- *  3) upgrade lock to exclusive
- *  4) bump mapping busy count
- *  5) lock PTEG
- *  6) invalidate pte and tlbie
- *  7) atomic merge rc into physent
- *  8) unlock PTEG
- *  9) remove mapping from search list
- * 10) unlock pmap
- * 11) lock physent
- * 12) remove from physent
- * 13) unlock physent
- * 14) drop mapping busy count
- * 15) drain mapping busy count
- * 
- * 
- * Remove mapping via pmap, I/O or block
- * 
- *  1) lock pmap share
- *  2) find mapping full path - finds all possible list previous elements
- *  3) upgrade lock to exclusive
- *  4) bump mapping busy count
- *	5) mark remove-in-progress
- *	6) check and bump remove chunk cursor if needed
- *	7) unlock pmap
- *	8) if something to invalidate, go to step 11
-
- *	9) drop busy
- * 10) return with mapRtRemove to force higher level to call again
- 
- * 11) Lock PTEG
- * 12) invalidate ptes, no tlbie
- * 13) unlock PTEG
- * 14) repeat 11 - 13 for all pages in chunk
- * 15) if not final chunk, go to step 9
- * 16) invalidate tlb entries for the whole block map but no more than the full tlb
- * 17) lock pmap share
- * 18) find mapping full path - finds all possible list previous elements
- * 19) upgrade lock to exclusive
- * 20) remove mapping from search list
- * 21) drop mapping busy count
- * 22) drain mapping busy count
- *	
- */
-
-			.align	5
-			.globl	EXT(hw_rem_map)
-
-LEXT(hw_rem_map)
-
-;
-;			NOTE NOTE NOTE - IF WE CHANGE THIS STACK FRAME STUFF WE NEED TO CHANGE
-;			THE HW_PURGE_* ROUTINES ALSO
-;
-
-#define hrmStackSize ((31-15+1)*4)+4
-			stwu	r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r15,FM_ARG0+0x00(r1)		; Save a register
-			stw		r16,FM_ARG0+0x04(r1)		; Save a register
-			stw		r17,FM_ARG0+0x08(r1)		; Save a register
-			stw		r18,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r19,FM_ARG0+0x10(r1)		; Save a register
- 			mfsprg	r19,2						; Get feature flags 
-			stw		r20,FM_ARG0+0x14(r1)		; Save a register
-			stw		r21,FM_ARG0+0x18(r1)		; Save a register
-			mtcrf	0x02,r19					; move pf64Bit cr6
-			stw		r22,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r23,FM_ARG0+0x20(r1)		; Save a register
-			stw		r24,FM_ARG0+0x24(r1)		; Save a register
-			stw		r25,FM_ARG0+0x28(r1)		; Save a register
-			stw		r26,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r27,FM_ARG0+0x30(r1)		; Save a register
-			stw		r28,FM_ARG0+0x34(r1)		; Save a register
-			stw		r29,FM_ARG0+0x38(r1)		; Save a register
-			stw		r30,FM_ARG0+0x3C(r1)		; Save a register
-			stw		r31,FM_ARG0+0x40(r1)		; Save a register
-			stw		r6,FM_ARG0+0x44(r1)			; Save address to save next mapped vaddr
-			stw		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		hrmPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
- 			bt++	pf64Bitb,hrmSF1				; skip if 64-bit (only they take the hint)
-			lwz		r9,pmapvr+4(r3)				; Get conversion mask
-			b		hrmSF1x						; Done...
-			
-hrmSF1:		ld		r9,pmapvr(r3)				; Get conversion mask
-
-hrmSF1x:	
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-			
-			xor		r28,r3,r9					; Convert the pmap to physical addressing
-
-;
-;			Here is where we join in from the hw_purge_* routines
-;
-
-hrmJoin:	lwz		r3,pmapFlags(r28)			; Get pmap's flags
-			mfsprg	r19,2						; Get feature flags again (for alternate entries)
-
-			mr		r17,r11						; Save the MSR
-			mr		r29,r4						; Top half of vaddr
-			mr		r30,r5						; Bottom half of vaddr
-			
-			rlwinm.	r3,r3,0,pmapVMgsaa			; Is guest shadow assist active? 
-			bne--	hrmGuest					; Yes, handle specially
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hrmBadLock					; Nope...
-			
-;
-;			Note that we do a full search (i.e., no shortcut level skips, etc.)
-;			here so that we will know the previous elements so we can dequeue them
-;			later. Note: we get back mpFlags in R7.
-;
-
-			mr		r3,r28						; Pass in pmap to search
-			mr		r4,r29						; High order of address
-			mr		r5,r30						; Low order of address
-			bl		EXT(mapSearchFull)			; Go see if we can find it
-
-			andi.	r0,r7,mpPerm				; Mapping marked permanent?
-			crmove	cr5_eq,cr0_eq				; Remember permanent marking
-			mr		r20,r7						; Remember mpFlags
-			mr.		r31,r3						; Did we? (And remember mapping address for later)
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq--	hrmNotFound					; Nope, not found...
- 			
-			bf--	cr5_eq,hrmPerm				; This one can't be removed...
-;
-;			Here we try to promote to an exclusive lock.  This will fail if someone else
-;			has it shared.
-;
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkPromote					; Try to promote shared to exclusive
-			mr.		r3,r3						; Could we?
-			beq++	hrmGotX						; Yeah...
-			
-;
-;			Since we could not promote our lock, we need to convert to it.
-;			That means that we drop the shared lock and wait to get it
-;			exclusive.  Since we release the lock, we need to do the look up
-;			again.
-;			
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkConvert					; Convert shared to exclusive
-			mr.		r3,r3						; Could we?
-			bne--	hrmBadLock					; Nope, we must have timed out...
-			
-			mr		r3,r28						; Pass in pmap to search
-			mr		r4,r29						; High order of address
-			mr		r5,r30						; Low order of address
-			bl		EXT(mapSearchFull)			; Rescan the list
-			
-			andi.	r0,r7,mpPerm				; Mapping marked permanent?
-			crmove	cr5_eq,cr0_eq				; Remember permanent marking
-			mr.		r31,r3						; Did we lose it when we converted?
-			mr		r20,r7						; Remember mpFlags
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq--	hrmNotFound					; Yeah, we did, someone tossed it for us...
-		
-			bf--	cr5_eq,hrmPerm				; This one can't be removed...
-
-;
-;			We have an exclusive lock on the mapping chain. And we
-;			also have the busy count bumped in the mapping so it can
-;			not vanish on us.
-;
-
-hrmGotX:	mr		r3,r31						; Get the mapping
-			bl		mapBumpBusy					; Bump up the busy count
-			
-;
-;			Invalidate any PTEs associated with this
-;			mapping (more than one if a block) and accumulate the reference
-;			and change bits.
-;
-;			Here is also where we need to split 32- and 64-bit processing
-;
-
-			lwz		r21,mpPte(r31)				; Grab the offset to the PTE
-			rlwinm	r23,r29,0,1,0				; Copy high order vaddr to high if 64-bit machine
-			mfsdr1	r29							; Get the hash table base and size
-
-			rlwinm	r0,r20,0,mpType				; Isolate mapping type
-			cmplwi	cr5,r0,mpBlock				; Remember whether this is a block mapping
-			cmplwi	r0,mpMinSpecial				; cr0_lt <- not a special mapping type
-			
-			rlwinm	r0,r21,0,mpHValidb,mpHValidb	; See if we actually have a PTE
-			ori		r2,r2,0xFFFF				; Get mask to clean out hash table base (works for both 32- and 64-bit)
-			cmpwi	cr1,r0,0					; Have we made a PTE for this yet? 
-			rlwinm	r21,r21,0,~mpHValid			; Clear out valid bit
-			crorc	cr0_eq,cr1_eq,cr0_lt		; No need to look at PTE if none or a special mapping
-			rlwimi	r23,r30,0,0,31				; Insert low under high part of address
-			andc	r29,r29,r2					; Clean up hash table base
-			li		r22,0						; Clear this on out (also sets RC to 0 if we bail)
-			mr		r30,r23						; Move the now merged vaddr to the correct register
-			add		r26,r29,r21					; Point to the PTEG slot
-			
-			bt++	pf64Bitb,hrmSplit64			; Go do 64-bit version...
-			
-			rlwinm	r9,r21,28,4,29				; Convert PTEG to PCA entry
-			beq-	cr5,hrmBlock32				; Go treat block specially...
-			subfic	r9,r9,-4					; Get the PCA entry offset
-			bt-		cr0_eq,hrmPysDQ32			; Skip next if no possible PTE...
-			add		r7,r9,r29					; Point to the PCA slot
-	
-			bl		mapLockPteg					; Go lock up the PTEG (Note: we need to save R6 to set PCA)
-	
-			lwz		r21,mpPte(r31)				; Get the quick pointer again
-			lwz		r5,0(r26)					; Get the top of PTE
-			
-			rlwinm.	r0,r21,0,mpHValidb,mpHValidb	; See if we actually have a PTE
-			rlwinm	r21,r21,0,~mpHValid			; Clear out valid bit
-			rlwinm	r5,r5,0,1,31				; Turn off valid bit in PTE
-			stw		r21,mpPte(r31)				; Make sure we invalidate mpPte, still pointing to PTEG (keep walk_page from making a mistake)
-			beq-	hrmUlckPCA32				; Pte is gone, no need to invalidate...
-			
-			stw		r5,0(r26)					; Invalidate the PTE
-
-			li		r9,tlbieLock				; Get the TLBIE lock
-
-			sync								; Make sure the invalid PTE is actually in memory
-	
-hrmPtlb32:	lwarx	r5,0,r9						; Get the TLBIE lock 
-			mr.		r5,r5						; Is it locked?
-			li		r5,1						; Get locked indicator
-			bne-	hrmPtlb32					; It is locked, go spin...
-			stwcx.	r5,0,r9						; Try to get it
-			bne-	hrmPtlb32					; We was beat... 
-			
-			rlwinm.	r0,r19,0,pfSMPcapb,pfSMPcapb	; Can this processor do SMP?	
-					
-			tlbie	r30							; Invalidate it all corresponding TLB entries
-			
-			beq-	hrmNTlbs					; Jump if we can not do a TLBSYNC....
-			
-			eieio								; Make sure that the tlbie happens first
-			tlbsync								; Wait for everyone to catch up
-			sync								; Make sure of it all
-			
-hrmNTlbs:	li		r0,0						; Clear this 
-			rlwinm	r2,r21,29,29,31				; Get slot number (8 byte entries)
-			stw		r0,tlbieLock(0)				; Clear the tlbie lock
-			lis		r0,0x8000					; Get bit for slot 0
-			eieio								; Make sure those RC bit have been stashed in PTE
-			
-			srw		r0,r0,r2					; Get the allocation hash mask
-			lwz		r22,4(r26)					; Get the latest reference and change bits
-			or		r6,r6,r0					; Show that this slot is free
-
-hrmUlckPCA32:			
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock the PTEG
-		
-;
-;			Now, it is time to remove the mapping and unlock the chain.
-;			But first, we need to make sure no one else is using this 
-;			mapping so we drain the busy now
-;			
-
-hrmPysDQ32:	mr		r3,r31						; Point to the mapping
-			bl		mapDrainBusy				; Go wait until mapping is unused
-
-			mr		r3,r28						; Get the pmap to remove from
-			mr		r4,r31						; Point to the mapping
-			bl		EXT(mapRemove)				; Remove the mapping from the list			
-
-			lwz		r4,pmapResidentCnt(r28)		; Get the mapped page count 
-			rlwinm	r0,r20,0,mpType				; Isolate mapping type
-			cmplwi	cr1,r0,mpMinSpecial			; cr1_lt <- not a special mapping type
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			subi	r4,r4,1						; Drop down the mapped page count
-			stw		r4,pmapResidentCnt(r28)		; Set the mapped page count 
-			bl		sxlkUnlock					; Unlock the search list
-
-			bf--	cr1_lt,hrmRetn32			; This one has no real memory associated with it so we are done...
-
-			bl		mapPhysFindLock				; Go find and lock the physent
-
-			lwz		r9,ppLink+4(r3)				; Get first mapping
-
-			mr		r4,r22						; Get the RC bits we just got
-			bl		mapPhysMerge				; Go merge the RC bits
-			
-			rlwinm	r9,r9,0,~ppFlags			; Clear the flags from the mapping pointer
-			
-			cmplw	r9,r31						; Are we the first on the list?
-			bne-	hrmNot1st					; Nope...
-			
-			li		r9,0						; Get a 0
-			lwz		r4,mpAlias+4(r31)			; Get our new forward pointer
-			stw		r9,mpAlias+4(r31)			; Make sure we are off the chain
-			bl		mapPhyCSet32				; Go set the physent link and preserve flags								
-			
-			b		hrmPhyDQd					; Join up and unlock it all...
-
-			.align	5
-			
-hrmPerm:	li		r8,-4096					; Get the value we need to round down to a page
-			and		r8,r8,r31					; Get back to a page
-			lwz		r8,mbvrswap+4(r8)			; Get last half of virtual to real swap
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			
-			xor		r3,r31,r8					; Flip mapping address to virtual
-			ori		r3,r3,mapRtPerm				; Set permanent mapping error
-			b		hrmErRtn
-			
-hrmBadLock:	li		r3,mapRtBadLk				; Set bad lock
-			b		hrmErRtn
-			
-hrmEndInSight:
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			
-hrmDoneChunk:
-			mr		r3,r31						; Point to the mapping
-			bl		mapDropBusy					; Drop the busy here since we need to come back
-			li		r3,mapRtRemove				; Say we are still removing this
-			b		hrmErRtn
-
-			.align	5
-			
-hrmNotFound:
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			li		r3,mapRtNotFnd				; No mapping found
-
-hrmErRtn:	bt++	pf64Bitb,hrmSF1z			; skip if 64-bit (only they take the hint)
-
-			mtmsr	r17							; Restore enables/translation/etc.
-			isync
-			b		hrmRetnCmn					; Join the common return code...
-
-hrmSF1z:	mtmsrd	r17							; Restore enables/translation/etc.
-			isync
-			b		hrmRetnCmn					; Join the common return code...
-
-			.align	5
-
-hrmNot1st:	mr.		r8,r9						; Remember and test current node
-			beq-	hrmPhyDQd					; Could not find our node, someone must have unmapped us...
-			lwz		r9,mpAlias+4(r9)			; Chain to the next
-			cmplw	r9,r31						; Is this us?
-			bne-	hrmNot1st					; Not us...
-		
-			lwz		r9,mpAlias+4(r9)			; Get our forward pointer
-			stw		r9,mpAlias+4(r8)			; Unchain us
-			
-			nop									; For alignment
-			
-hrmPhyDQd:	bl		mapPhysUnlock				; Unlock the physent chain
-
-hrmRetn32:	rlwinm	r8,r31,0,0,19				; Find start of page
-			mr		r3,r31						; Copy the pointer to the mapping
-			lwz		r8,mbvrswap+4(r8)			; Get last half of virtual to real swap
-			bl		mapDrainBusy				; Go wait until mapping is unused
-
-			xor		r3,r31,r8					; Flip mapping address to virtual
-			
-			mtmsr	r17							; Restore enables/translation/etc.
-			isync
-
-hrmRetnCmn:	lwz		r6,FM_ARG0+0x44(r1)			; Get address to save next mapped vaddr
-			lwz		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Restore the return
-			lwz		r17,FM_ARG0+0x08(r1)		; Restore a register
-			lwz		r18,FM_ARG0+0x0C(r1)		; Restore a register
-			mr.		r6,r6						; Should we pass back the "next" vaddr?
-			lwz		r19,FM_ARG0+0x10(r1)		; Restore a register
-			lwz		r20,FM_ARG0+0x14(r1)		; Restore a register
-			mtlr	r0							; Restore the return
-			
-			rlwinm	r16,r16,0,0,19				; Clean to a page boundary
-			beq		hrmNoNextAdr				; Do not pass back the next vaddr...
-			stw		r15,0(r6)					; Pass back the top of the next vaddr
-			stw		r16,4(r6)					; Pass back the bottom of the next vaddr
-
-hrmNoNextAdr:
-			lwz		r15,FM_ARG0+0x00(r1)		; Restore a register
-			lwz		r16,FM_ARG0+0x04(r1)		; Restore a register
-			lwz		r21,FM_ARG0+0x18(r1)		; Restore a register
-			rlwinm	r3,r3,0,0,31				; Clear top of register if 64-bit
-			lwz		r22,FM_ARG0+0x1C(r1)		; Restore a register
-			lwz		r23,FM_ARG0+0x20(r1)		; Restore a register
-			lwz		r24,FM_ARG0+0x24(r1)		; Restore a register
-			lwz		r25,FM_ARG0+0x28(r1)		; Restore a register
-			lwz		r26,FM_ARG0+0x2C(r1)		; Restore a register
-			lwz		r27,FM_ARG0+0x30(r1)		; Restore a register
-			lwz		r28,FM_ARG0+0x34(r1)		; Restore a register
-			lwz		r29,FM_ARG0+0x38(r1)		; Restore a register
-			lwz		r30,FM_ARG0+0x3C(r1)		; Restore a register
-			lwz		r31,FM_ARG0+0x40(r1)		; Restore a register
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-
-;
-;			Here is where we come when all is lost.  Somehow, we failed a mapping function
-;			that must work... All hope is gone.  Alas, we die.......
-;
-
-hrmPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-
-
-;
-;			Invalidate block mappings by invalidating a chunk of autogen PTEs in PTEGs hashed
-;			in the range. Then, if we did not finish, return a code indicating that we need to 
-;			be called again.  Eventually, we will finish and then, we will do a TLBIE for each 
-;			PTEG up to the point where we have cleared it all (64 for 32-bit architecture)
-;
-;			A potential speed up is that we stop the invalidate loop once we have walked through
-;			the hash table once. This really is not worth the trouble because we need to have
-;			mapped 1/2 of physical RAM in an individual block.  Way unlikely.
-;
-;			We should rethink this and see if we think it will be faster to check PTE and
-;			only invalidate the specific PTE rather than all block map PTEs in the PTEG.
-;
-
-			.align	5
-			
-hrmBlock32:	lis		r29,0xD000					; Get shift to 32MB bsu
-			rlwinm	r24,r20,mpBSub+1+2,29,29	; Rotate to get 0 if 4K bsu or 13 if 32MB bsu
-			lhz		r25,mpBSize(r31)			; Get the number of pages in block
-			lhz		r23,mpSpace(r31)			; Get the address space hash
-			lwz		r9,mpBlkRemCur(r31)			; Get our current remove position
-			rlwnm	r29,r29,r24,28,31			; Rotate to get 0 or 13
-			addi	r25,r25,1					; Account for zero-based counting
-			ori		r0,r20,mpRIP				; Turn on the remove in progress flag
-			slw		r25,r25,r29					; Adjust for 32MB if needed
-			mfsdr1	r29							; Get the hash table base and size
-			rlwinm	r24,r23,maxAdrSpb,32-maxAdrSpb-maxAdrSpb,31-maxAdrSpb	; Get high order of hash
-			subi	r25,r25,1					; Convert back to zero-based counting
-			lwz		r27,mpVAddr+4(r31)			; Get the base vaddr
-			sub		r4,r25,r9					; Get number of pages left
-			cmplw	cr1,r9,r25					; Have we already hit the end?
-			addi	r10,r9,mapRemChunk			; Point to the start of the next chunk
-			addi	r2,r4,-mapRemChunk			; See if mapRemChunk or more
-			rlwinm	r26,r29,16,7,15				; Get the hash table size
-			srawi	r2,r2,31					; We have -1 if less than mapRemChunk or 0 if equal or more
-			stb		r0,mpFlags+3(r31)			; Save the flags with the mpRIP bit on
-			subi	r4,r4,mapRemChunk-1			; Back off for a running start (will be negative for more than mapRemChunk)
-			cmpwi	cr7,r2,0					; Remember if we have finished
-			slwi	r0,r9,12					; Make cursor into page offset
-			or		r24,r24,r23					; Get full hash
-			and		r4,r4,r2					; If more than a chunk, bring this back to 0
-			rlwinm	r29,r29,0,0,15				; Isolate the hash table base
-			add		r27,r27,r0					; Adjust vaddr to start of current chunk
-			addi	r4,r4,mapRemChunk-1			; Add mapRemChunk-1 to get max(num left,  chunksize)
-			
-			bgt-	cr1,hrmEndInSight			; Someone is already doing the last hunk...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			stw		r10,mpBlkRemCur(r31)		; Set next chunk to do (note: this may indicate after end)
-			bl		sxlkUnlock					; Unlock the search list while we are invalidating
-			
-			rlwinm	r8,r27,4+maxAdrSpb,31-maxAdrSpb-3,31-maxAdrSpb	; Isolate the segment
-			rlwinm	r30,r27,26,6,25				; Shift vaddr to PTEG offset (and remember VADDR in R27)
-			xor		r24,r24,r8					; Get the proper VSID
-			rlwinm	r21,r27,26,10,25			; Shift page index to PTEG offset (and remember VADDR in R27)
-			ori		r26,r26,lo16(0xFFC0)		; Stick in the rest of the length
-			rlwinm	r22,r4,6,10,25				; Shift size to PTEG offset
-			rlwinm	r24,r24,6,0,25				; Shift hash to PTEG units
-			add		r22,r22,r30					; Get end address (in PTEG units)
-			
-hrmBInv32:	rlwinm	r23,r30,0,10,25				; Isolate just the page index 
-			xor		r23,r23,r24					; Hash it
-			and		r23,r23,r26					; Wrap it into the table
-			rlwinm	r3,r23,28,4,29				; Change to PCA offset
-			subfic	r3,r3,-4					; Get the PCA entry offset
-			add		r7,r3,r29					; Point to the PCA slot
-			cmplw	cr5,r30,r22					; Check if we reached the end of the range
-			addi	r30,r30,64					; bump to the next vaddr
-								
-			bl		mapLockPteg					; Lock the PTEG
-					
-			rlwinm.	r4,r6,16,0,7				; Position, save, and test block mappings in PCA
-			add		r5,r23,r29					; Point to the PTEG
-			li		r0,0						; Set an invalid PTE value
-			beq+	hrmBNone32					; No block map PTEs in this PTEG...
-			mtcrf	0x80,r4						; Set CRs to select PTE slots
-			mtcrf	0x40,r4						; Set CRs to select PTE slots
-
-			bf		0,hrmSlot0					; No autogen here
-			stw		r0,0x00(r5)					; Invalidate PTE
-
-hrmSlot0:	bf		1,hrmSlot1					; No autogen here
-			stw		r0,0x08(r5)					; Invalidate PTE
-
-hrmSlot1:	bf		2,hrmSlot2					; No autogen here
-			stw		r0,0x10(r5)					; Invalidate PTE
-
-hrmSlot2:	bf		3,hrmSlot3					; No autogen here
-			stw		r0,0x18(r5)					; Invalidate PTE
-
-hrmSlot3:	bf		4,hrmSlot4					; No autogen here
-			stw		r0,0x20(r5)					; Invalidate PTE
-
-hrmSlot4:	bf		5,hrmSlot5					; No autogen here
-			stw		r0,0x28(r5)					; Invalidate PTE
-
-hrmSlot5:	bf		6,hrmSlot6					; No autogen here
-			stw		r0,0x30(r5)					; Invalidate PTE
-
-hrmSlot6:	bf		7,hrmSlot7					; No autogen here
-			stw		r0,0x38(r5)					; Invalidate PTE
-
-hrmSlot7:	rlwinm	r0,r4,16,16,23				; Move in use to autogen
-			or		r6,r6,r4					; Flip on the free bits that corrospond to the autogens we cleared
-			andc	r6,r6,r0					; Turn off all the old autogen bits
-
-hrmBNone32:	eieio								; Make sure all updates come first
-
-			stw		r6,0(r7)					; Unlock and set the PCA
-			
-			bne+	cr5,hrmBInv32				; Go invalidate the next...
-
-			bge+	cr7,hrmDoneChunk			; We have not as yet done the last chunk, go tell our caller to call again...
-
-			mr		r3,r31						; Copy the pointer to the mapping
-			bl		mapDrainBusy				; Go wait until we are sure all other removers are done with this one
-
-			sync								; Make sure memory is consistent
-			
-			subi	r5,r25,63					; Subtract TLB size from page count (note we are 0 based here)
-			li		r6,63						; Assume full invalidate for now
-			srawi	r5,r5,31					; Make 0 if we need a full purge, -1 otherwise
-			andc	r6,r6,r5					; Clear max if we have less to do
-			and		r5,r25,r5					; Clear count if we have more than max
-			lwz		r27,mpVAddr+4(r31)			; Get the base vaddr again
-			li		r7,tlbieLock				; Get the TLBIE lock
-			or		r5,r5,r6					; Get number of TLBIEs needed		
-					
-hrmBTLBlck:	lwarx	r2,0,r7						; Get the TLBIE lock
-			mr.		r2,r2						; Is it locked?
-			li		r2,1						; Get our lock value
-			bne-	hrmBTLBlck					; It is locked, go wait...
-			stwcx.	r2,0,r7						; Try to get it
-			bne-	hrmBTLBlck					; We was beat...
-	
-hrmBTLBi:	addic.	r5,r5,-1					; See if we did them all
-			tlbie	r27							; Invalidate it everywhere
-			addi	r27,r27,0x1000				; Up to the next page
-			bge+	hrmBTLBi					; Make sure we have done it all...
-			
-			rlwinm.	r0,r19,0,pfSMPcapb,pfSMPcapb	; Can this processor do SMP?	
-			li		r2,0						; Lock clear value
-			
-			sync								; Make sure all is quiet
-			beq-	hrmBNTlbs					; Jump if we can not do a TLBSYNC....
-			
-			eieio								; Make sure that the tlbie happens first
-			tlbsync								; Wait for everyone to catch up
-			sync								; Wait for quiet again
-
-hrmBNTlbs:	stw		r2,tlbieLock(0)				; Clear the tlbie lock
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne-	hrmPanic					; Nope...
-			
-			lwz		r4,mpVAddr(r31)				; High order of address
-			lwz		r5,mpVAddr+4(r31)			; Low order of address
-			mr		r3,r28						; Pass in pmap to search
-			mr		r29,r4						; Save this in case we need it (only promote fails)
-			mr		r30,r5						; Save this in case we need it (only promote fails)
-			bl		EXT(mapSearchFull)			; Go see if we can find it
-			
-			mr.		r3,r3						; Did we? (And remember mapping address for later)
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq-	hrmPanic					; Nope, not found...
-			
-			cmplw	r3,r31						; Same mapping?
-			bne-	hrmPanic					; Not good...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkPromote					; Try to promote shared to exclusive
-			mr.		r3,r3						; Could we?
-			mr		r3,r31						; Restore the mapping pointer
-			beq+	hrmBDone1					; Yeah...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkConvert					; Convert shared to exclusive
-			mr.		r3,r3						; Could we?
-			bne--	hrmPanic					; Nope, we must have timed out...
-			
-			mr		r3,r28						; Pass in pmap to search
-			mr		r4,r29						; High order of address
-			mr		r5,r30						; Low order of address
-			bl		EXT(mapSearchFull)			; Rescan the list
-			
-			mr.		r3,r3						; Did we lose it when we converted?
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq--	hrmPanic					; Yeah, we did, someone tossed it for us...
-
-hrmBDone1:	bl		mapDrainBusy				; Go wait until mapping is unused
-
-			mr		r3,r28						; Get the pmap to remove from
-			mr		r4,r31						; Point to the mapping
-			bl		EXT(mapRemove)				; Remove the mapping from the list	
-					
-			lwz		r4,pmapResidentCnt(r28)		; Get the mapped page count 
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			subi	r4,r4,1						; Drop down the mapped page count
-			stw		r4,pmapResidentCnt(r28)		; Set the mapped page count 
-			bl		sxlkUnlock					; Unlock the search list
-		
-			b		hrmRetn32					; We are all done, get out...
-
-;
-;			Here we handle the 64-bit version of hw_rem_map
-;
-		
-			.align	5
-		
-hrmSplit64:	rlwinm	r9,r21,27,5,29				; Convert PTEG to PCA entry
-			beq--	cr5,hrmBlock64				; Go treat block specially...
-			subfic	r9,r9,-4					; Get the PCA entry offset
-			bt--	cr0_eq,hrmPysDQ64			; Skip next if no possible PTE...
-			add		r7,r9,r29					; Point to the PCA slot
-			
-			bl		mapLockPteg					; Go lock up the PTEG
-	
-			lwz		r21,mpPte(r31)				; Get the quick pointer again
-			ld		r5,0(r26)					; Get the top of PTE
-			
-			rlwinm.	r0,r21,0,mpHValidb,mpHValidb	; See if we actually have a PTE
-			rlwinm	r21,r21,0,~mpHValid			; Clear out valid bit
-			sldi	r23,r5,16					; Shift AVPN up to EA format
-//			****	Need to adjust above shift based on the page size - large pages need to shift a bit more
-			rldicr	r5,r5,0,62					; Clear the valid bit
-			rldimi	r23,r30,0,36				; Insert the page portion of the VPN
-			stw		r21,mpPte(r31)				; Make sure we invalidate mpPte but keep pointing to PTEG (keep walk_page from making a mistake)
-			beq--	hrmUlckPCA64				; Pte is gone, no need to invalidate...
-			
-			std		r5,0(r26)					; Invalidate the PTE
-
-			li		r9,tlbieLock				; Get the TLBIE lock
-
-			sync								; Make sure the invalid PTE is actually in memory
-
-hrmPtlb64:	lwarx	r5,0,r9						; Get the TLBIE lock 
-			rldicl	r23,r23,0,16				; Clear bits 0:15 cause they say to
-			mr.		r5,r5						; Is it locked?
-			li		r5,1						; Get locked indicator
-			bne--	hrmPtlb64w					; It is locked, go spin...
-			stwcx.	r5,0,r9						; Try to get it
-			bne--	hrmPtlb64					; We was beat... 
-					
-			tlbie	r23							; Invalidate all corresponding TLB entries
-			
-			eieio								; Make sure that the tlbie happens first
-			tlbsync								; Wait for everyone to catch up
-			
-			ptesync								; Make sure of it all
-			li		r0,0						; Clear this 
-			rlwinm	r2,r21,28,29,31				; Get slot number (16 byte entries)
-			stw		r0,tlbieLock(0)				; Clear the tlbie lock
-			oris	r0,r0,0x8000				; Assume slot 0
-
-			srw		r0,r0,r2					; Get slot mask to deallocate
-
-			lwz		r22,12(r26)					; Get the latest reference and change bits
-			or		r6,r6,r0					; Make the guy we killed free
-			
-hrmUlckPCA64:
-			eieio								; Make sure all updates come first
-
-			stw		r6,0(r7)					; Unlock and change the PCA
-		
-hrmPysDQ64:	mr		r3,r31						; Point to the mapping
-			bl		mapDrainBusy				; Go wait until mapping is unused
-
-			mr		r3,r28						; Get the pmap to remove from
-			mr		r4,r31						; Point to the mapping
-			bl		EXT(mapRemove)				; Remove the mapping from the list			
-
-			rlwinm	r0,r20,0,mpType				; Isolate mapping type
-			cmplwi	cr1,r0,mpMinSpecial			; cr1_lt <- not a special mapping type
-			lwz		r4,pmapResidentCnt(r28)		; Get the mapped page count 
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			subi	r4,r4,1						; Drop down the mapped page count
-			stw		r4,pmapResidentCnt(r28)		; Set the mapped page count 
-			bl		sxlkUnlock					; Unlock the search list
-		
-			bf--	cr1_lt,hrmRetn64			; This one has no real memory associated with it so we are done...
-
-			bl		mapPhysFindLock				; Go find and lock the physent
-
-			li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			ld		r9,ppLink(r3)				; Get first mapping
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			mr		r4,r22						; Get the RC bits we just got
-			
-			bl		mapPhysMerge				; Go merge the RC bits
-			
-			andc	r9,r9,r0					; Clean up the mapping pointer
-			
-			cmpld	r9,r31						; Are we the first on the list?
-			bne--	hrmNot1st64					; Nope...
-			
-			li		r9,0						; Get a 0
-			ld		r4,mpAlias(r31)				; Get our forward pointer
-			
-			std		r9,mpAlias(r31)				; Make sure we are off the chain
-			bl		mapPhyCSet64				; Go set the physent link and preserve flags								
-
-			b		hrmPhyDQd64					; Join up and unlock it all...
-			
-hrmPtlb64w:	li		r5,lgKillResv				; Point to some spare memory
-			stwcx.	r5,0,r5						; Clear the pending reservation			
-						
-			
-hrmPtlb64x:	lwz		r5,0(r9)					; Do a regular load to avoid taking reservation
-			mr.		r5,r5						; is it locked?
-			beq++	hrmPtlb64					; Nope...
-			b		hrmPtlb64x					; Sniff some more...
-		
-			.align	5							
-			
-hrmNot1st64:
-			mr.		r8,r9						; Remember and test current node
-			beq--	hrmPhyDQd64					; Could not find our node...
-			ld		r9,mpAlias(r9)				; Chain to the next
-			cmpld	r9,r31						; Is this us?
-			bne--	hrmNot1st64					; Not us...
-		
-			ld		r9,mpAlias(r9)				; Get our forward pointer
-			std		r9,mpAlias(r8)				; Unchain us
-			
-			nop									; For alignment
-			
-hrmPhyDQd64:	
-			bl		mapPhysUnlock				; Unlock the physent chain
-
-hrmRetn64:	rldicr	r8,r31,0,51					; Find start of page
-			mr		r3,r31						; Copy the pointer to the mapping
-			lwz		r8,mbvrswap+4(r8)			; Get last half of virtual to real swap
-			bl		mapDrainBusy				; Go wait until mapping is unused
-
-			xor		r3,r31,r8					; Flip mapping address to virtual
-			
-			mtmsrd	r17							; Restore enables/translation/etc.
-			isync
-			
-			b		hrmRetnCmn					; Join the common return path...
-
-
-;
-;			Check hrmBlock32 for comments.
-;
-
-			.align	5
-			
-hrmBlock64:	lis		r29,0xD000					; Get shift to 32MB bsu			
-			rlwinm	r10,r20,mpBSub+1+2,29,29	; Rotate to get 0 if 4K bsu or 13 if 32MB bsu
-			lhz		r24,mpSpace(r31)			; Get the address space hash
-			lhz		r25,mpBSize(r31)			; Get the number of pages in block
-			lwz		r9,mpBlkRemCur(r31)			; Get our current remove position
-			rlwnm	r29,r29,r10,28,31			; Rotate to get 0 or 13
-			addi	r25,r25,1					; Account for zero-based counting
-			ori		r0,r20,mpRIP				; Turn on the remove in progress flag
-			slw		r25,r25,r29					; Adjust for 32MB if needed
-			mfsdr1	r29							; Get the hash table base and size
-			ld		r27,mpVAddr(r31)			; Get the base vaddr
-			subi	r25,r25,1					; Convert back to zero-based counting
-			rlwinm	r5,r29,0,27,31				; Isolate the size
-			sub		r4,r25,r9					; Get number of pages left
-			cmplw	cr1,r9,r25					; Have we already hit the end?
-			addi	r10,r9,mapRemChunk			; Point to the start of the next chunk
-			addi	r2,r4,-mapRemChunk			; See if mapRemChunk or more
-			stb		r0,mpFlags+3(r31)			; Save the flags with the mpRIP bit on
-			srawi	r2,r2,31					; We have -1 if less than mapRemChunk or 0 if equal or more
-			subi	r4,r4,mapRemChunk-1			; Back off for a running start (will be negative for more than mapRemChunk)
-			cmpwi	cr7,r2,0					; Remember if we are doing the last chunk
-			and		r4,r4,r2					; If more than a chunk, bring this back to 0
-			srdi	r27,r27,12					; Change address into page index
-			addi	r4,r4,mapRemChunk-1			; Add mapRemChunk-1 to get max(num left,  chunksize)
-			add		r27,r27,r9					; Adjust vaddr to start of current chunk
-			
-			bgt--	cr1,hrmEndInSight			; Someone is already doing the last hunk...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			stw		r10,mpBlkRemCur(r31)		; Set next chunk to do (note: this may indicate after end)
-			bl		sxlkUnlock					; Unlock the search list while we are invalidating
-			
-			rlwimi	r24,r24,14,4,17				; Insert a copy of space hash
-			eqv		r26,r26,r26					; Get all foxes here
-			rldimi	r24,r24,28,8				; Make a couple copies up higher
-			rldicr	r29,r29,0,47				; Isolate just the hash table base
-			subfic	r5,r5,46					; Get number of leading zeros
-			srd		r26,r26,r5					; Shift the size bits over		
-			mr		r30,r27						; Get start of chunk to invalidate
-			rldicr	r26,r26,0,56				; Make length in PTEG units
-			add		r22,r4,r30					; Get end page number
-									
-hrmBInv64:	srdi	r0,r30,2					; Shift page index over to form ESID
-			rldicr	r0,r0,0,49					; Clean all but segment portion
-			rlwinm	r2,r30,0,16,31				; Get the current page index
-			xor		r0,r0,r24					; Form VSID
-			xor		r8,r2,r0					; Hash the vaddr
-			sldi	r8,r8,7						; Make into PTEG offset
-			and		r23,r8,r26					; Wrap into the hash table
-			rlwinm	r3,r23,27,5,29				; Change to PCA offset (table is always 2GB or less so 32-bit instructions work here)
-			subfic	r3,r3,-4					; Get the PCA entry offset
-			add		r7,r3,r29					; Point to the PCA slot
-			
-			cmplw	cr5,r30,r22					; Have we reached the end of the range?
-								
-			bl		mapLockPteg					; Lock the PTEG
-						
-			rlwinm.	r4,r6,16,0,7				; Extract the block mappings in this here PTEG and see if there are any
-			add		r5,r23,r29					; Point to the PTEG
-			li		r0,0						; Set an invalid PTE value
-			beq++	hrmBNone64					; No block map PTEs in this PTEG...
-			mtcrf	0x80,r4						; Set CRs to select PTE slots
-			mtcrf	0x40,r4						; Set CRs to select PTE slots
-
-
-			bf		0,hrmSlot0s					; No autogen here
-			std		r0,0x00(r5)					; Invalidate PTE
-
-hrmSlot0s:	bf		1,hrmSlot1s					; No autogen here
-			std		r0,0x10(r5)					; Invalidate PTE
-
-hrmSlot1s:	bf		2,hrmSlot2s					; No autogen here
-			std		r0,0x20(r5)					; Invalidate PTE
-
-hrmSlot2s:	bf		3,hrmSlot3s					; No autogen here
-			std		r0,0x30(r5)					; Invalidate PTE
-
-hrmSlot3s:	bf		4,hrmSlot4s					; No autogen here
-			std		r0,0x40(r5)					; Invalidate PTE
-
-hrmSlot4s:	bf		5,hrmSlot5s					; No autogen here
-			std		r0,0x50(r5)					; Invalidate PTE
-
-hrmSlot5s:	bf		6,hrmSlot6s					; No autogen here
-			std		r0,0x60(r5)					; Invalidate PTE
-
-hrmSlot6s:	bf		7,hrmSlot7s					; No autogen here
-			std		r0,0x70(r5)					; Invalidate PTE
-
-hrmSlot7s:	rlwinm	r0,r4,16,16,23				; Move in use to autogen
-			or		r6,r6,r4					; Flip on the free bits that corrospond to the autogens we cleared
-			andc	r6,r6,r0					; Turn off all the old autogen bits
-
-hrmBNone64:	eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock and set the PCA
-
-			addi	r30,r30,1					; bump to the next PTEG
-			bne++	cr5,hrmBInv64				; Go invalidate the next...
-
-			bge+	cr7,hrmDoneChunk			; We have not as yet done the last chunk, go tell our caller to call again...
-
-			mr		r3,r31						; Copy the pointer to the mapping
-			bl		mapDrainBusy				; Go wait until we are sure all other removers are done with this one
-
-			sync								; Make sure memory is consistent
-
-			subi	r5,r25,255					; Subtract TLB size from page count (note we are 0 based here)
-			li		r6,255						; Assume full invalidate for now
-			srawi	r5,r5,31					; Make 0 if we need a full purge, -1 otherwise
-			andc	r6,r6,r5					; Clear max if we have less to do
-			and		r5,r25,r5					; Clear count if we have more than max
-			sldi	r24,r24,28					; Get the full XOR value over to segment position
-			ld		r27,mpVAddr(r31)			; Get the base vaddr
-			li		r7,tlbieLock				; Get the TLBIE lock
-			or		r5,r5,r6					; Get number of TLBIEs needed		
-			
-hrmBTLBlcl:	lwarx	r2,0,r7						; Get the TLBIE lock
-			mr.		r2,r2						; Is it locked?
-			li		r2,1						; Get our lock value
-			bne--	hrmBTLBlcm					; It is locked, go wait...
-			stwcx.	r2,0,r7						; Try to get it
-			bne--	hrmBTLBlcl					; We was beat...
-	
-hrmBTLBj:	sldi	r2,r27,maxAdrSpb			; Move to make room for address space ID
-			rldicr	r2,r2,0,35-maxAdrSpb		; Clear out the extra
-			addic.	r5,r5,-1					; See if we did them all
-			xor		r2,r2,r24					; Make the VSID
-			rldimi	r2,r27,0,36					; Insert the page portion of the VPN
-			rldicl	r2,r2,0,16					; Clear bits 0:15 cause they say we gotta
-
-			tlbie	r2							; Invalidate it everywhere
-			addi	r27,r27,0x1000				; Up to the next page
-			bge++	hrmBTLBj					; Make sure we have done it all...
-			
-			eieio								; Make sure that the tlbie happens first
-			tlbsync								; wait for everyone to catch up
-
-			li		r2,0						; Lock clear value
-
-			ptesync								; Wait for quiet again
-
-			stw		r2,tlbieLock(0)				; Clear the tlbie lock
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne-	hrmPanic					; Nope...
-			
-			lwz		r4,mpVAddr(r31)				; High order of address
-			lwz		r5,mpVAddr+4(r31)			; Low order of address
-			mr		r3,r28						; Pass in pmap to search
-			mr		r29,r4						; Save this in case we need it (only promote fails)
-			mr		r30,r5						; Save this in case we need it (only promote fails)
-			bl		EXT(mapSearchFull)			; Go see if we can find it
-			
-			mr.		r3,r3						; Did we? (And remember mapping address for later)
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq-	hrmPanic					; Nope, not found...
-			
-			cmpld	r3,r31						; Same mapping?
-			bne-	hrmPanic					; Not good...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkPromote					; Try to promote shared to exclusive
-			mr.		r3,r3						; Could we?
-			mr		r3,r31						; Restore the mapping pointer
-			beq+	hrmBDone2					; Yeah...
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkConvert					; Convert shared to exclusive
-			mr.		r3,r3						; Could we?
-			bne--	hrmPanic					; Nope, we must have timed out...
-			
-			mr		r3,r28						; Pass in pmap to search
-			mr		r4,r29						; High order of address
-			mr		r5,r30						; Low order of address
-			bl		EXT(mapSearchFull)			; Rescan the list
-			
-			mr.		r3,r3						; Did we lose it when we converted?
-			mr		r15,r4						; Save top of next vaddr
-			mr		r16,r5						; Save bottom of next vaddr
-			beq--	hrmPanic					; Yeah, we did, someone tossed it for us...
-
-hrmBDone2:	bl		mapDrainBusy				; Go wait until mapping is unused
-
-			mr		r3,r28						; Get the pmap to remove from
-			mr		r4,r31						; Point to the mapping
-			bl		EXT(mapRemove)				; Remove the mapping from the list	
-					
-			lwz		r4,pmapResidentCnt(r28)		; Get the mapped page count 
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			subi	r4,r4,1						; Drop down the mapped page count
-			stw		r4,pmapResidentCnt(r28)		; Set the mapped page count 
-			bl		sxlkUnlock					; Unlock the search list
-		
-			b		hrmRetn64					; We are all done, get out...
-			
-hrmBTLBlcm:	li		r2,lgKillResv				; Get space unreserve line
-			stwcx.	r2,0,r2						; Unreserve it
-						
-hrmBTLBlcn:	lwz		r2,0(r7)					; Get the TLBIE lock
-			mr.		r2,r2						; Is it held?
-			beq++	hrmBTLBlcl					; Nope...
-			b		hrmBTLBlcn					; Yeah...
-
-;
-;			Guest shadow assist -- mapping remove
-;
-;			Method of operation:
-;				o Locate the VMM extension block and the host pmap
-;				o Obtain the host pmap's search lock exclusively
-;				o Locate the requested mapping in the shadow hash table,
-;				  exit if not found
-;				o If connected, disconnect the PTE and gather R&C to physent
-;				o Locate and lock the physent
-;				o Remove mapping from physent's chain
-;				o Unlock physent
-;				o Unlock pmap's search lock
-;
-;			Non-volatile registers on entry:
-;				r17: caller's msr image
-;				r19: sprg2 (feature flags)
-;				r28: guest pmap's physical address
-;				r29: high-order 32 bits of guest virtual address
-;				r30: low-order 32 bits of guest virtual address
-;
-;			Non-volatile register usage:
-;				r26: VMM extension block's physical address
-;				r27: host pmap's physical address
-;				r28: guest pmap's physical address
-;				r29: physent's physical address
-;				r30: guest virtual address
-;				r31: guest mapping's physical address
-;
-			.align	5			
-hrmGuest:
-			rlwinm	r30,r30,0,0xFFFFF000		; Clean up low-order bits of 32-bit guest vaddr
-			bt++	pf64Bitb,hrmG64				; Test for 64-bit machine
-			lwz		r26,pmapVmmExtPhys+4(r28)	; r26 <- VMM pmap extension block paddr
-			lwz		r27,vmxHostPmapPhys+4(r26)	; r27 <- host pmap's paddr
-			b		hrmGStart					; Join common code
-
-hrmG64:		ld		r26,pmapVmmExtPhys(r28)		; r26 <- VMM pmap extension block paddr
-			ld		r27,vmxHostPmapPhys(r26)	; r27 <- host pmap's paddr
-			rldimi	r30,r29,32,0				; Insert high-order 32 bits of 64-bit guest vaddr			
-
-hrmGStart:	la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-			
-			lwz		r3,vxsGrm(r26)				; Get mapping remove request count
-
-			lwz		r9,pmapSpace(r28)			; r9 <- guest space ID number
-			la		r31,VMX_HPIDX_OFFSET(r26)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r9,r11					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r12,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r12					; r31 <- hash page index entry
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,hrmG64Search		; Separate handling for 64-bit search
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-												
-			addi	r3,r3,1						; Increment remove request count
-			stw		r3,vxsGrm(r26)				; Update remove request count
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		hrmG32SrchLp				; Let the search begin!
-			
-			.align	5
-hrmG32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free mapping flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(free && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		hrmGSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	hrmG32SrchLp				; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free mapping flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(free && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && space match && virtual addr match
-			beq		hrmGSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		hrmGSrchMiss				; No joy in our hash group
-			
-hrmG64Search:			
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		hrmG64SrchLp				; Let the search begin!
-			
-			.align	5
-hrmG64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free mapping flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(free && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		hrmGSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	hrmG64SrchLp				; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free mapping flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(free && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && space match && virtual addr match
-			beq		hrmGSrchHit					; Join common path on hit (r31 points to guest mapping)
-hrmGSrchMiss:
-			lwz		r3,vxsGrmMiss(r26)			; Get remove miss count
-			li		r25,mapRtNotFnd				; Return not found
-			addi	r3,r3,1						; Increment miss count
-			stw		r3,vxsGrmMiss(r26)			; Update miss count
-			b		hrmGReturn					; Join guest return
-
-			.align	5			
-hrmGSrchHit:
-			rlwinm.	r0,r6,0,mpgDormant			; Is this entry dormant?
-			bne		hrmGDormant					; Yes, nothing to disconnect
-			
-			lwz		r3,vxsGrmActive(r26)		; Get active hit count
-			addi	r3,r3,1						; Increment active hit count
-			stw		r3,vxsGrmActive(r26)		; Update hit count
-			
-			bt++	pf64Bitb,hrmGDscon64		; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		hrmGFreePTE					; Join 64-bit path to release the PTE			
-hrmGDscon64:
-			bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-hrmGFreePTE:
-			mr.		r3,r3						; Was there a valid PTE?
-			beq		hrmGDormant					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE offset
-			rlwinm	r8,r8,0,~mpHValid			; Make the offset invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE offset
-			eieio								; Synchronize all previous updates (mapInvPtexx didn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-
-hrmGDormant:
-			lwz		r3,mpPAddr(r31)				; r3 <- physical 4K-page number
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r29,r3						; Got lock on our physent?
-			beq--	hrmGBadPLock				; No, time to bail out
-
-			crset	cr1_eq						; cr1_eq <- previous link is the anchor
-			bt++	pf64Bitb,hrmGRemove64		; Use 64-bit version on 64-bit machine
-			la		r11,ppLink+4(r29)			; Point to chain anchor
-			lwz		r9,ppLink+4(r29)			; Get chain anchor
-			rlwinm.	r9,r9,0,~ppFlags			; Remove flags, yielding 32-bit physical chain pointer
-hrmGRemLoop:
-			beq-	hrmGPEMissMiss				; End of chain, this is not good
-			cmplw	r9,r31						; Is this the mapping to remove?
-			lwz		r8,mpAlias+4(r9)			; Get forward chain pointer
-			bne		hrmGRemNext					; No, chain onward
-			bt		cr1_eq,hrmGRemRetry			; Mapping to remove is chained from anchor
-			stw		r8,0(r11)					; Unchain gpv->phys mapping
-			b		hrmGDelete					; Finish deleting mapping
-hrmGRemRetry:
-			lwarx	r0,0,r11					; Get previous link
-			rlwimi	r0,r8,0,~ppFlags			; Insert new forward pointer whilst preserving flags
-			stwcx.	r0,0,r11					; Update previous link
-			bne-	hrmGRemRetry				; Lost reservation, retry
-			b		hrmGDelete					; Finish deleting mapping
-			
-hrmGRemNext:
-			la		r11,mpAlias+4(r9)			; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		hrmGRemLoop					; Carry on
-
-hrmGRemove64:
-			li		r7,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r7,r7,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			la		r11,ppLink(r29)				; Point to chain anchor
-			ld		r9,ppLink(r29)				; Get chain anchor
-			andc.	r9,r9,r7					; Remove flags, yielding 64-bit physical chain pointer
-hrmGRem64Lp:
-			beq--	hrmGPEMissMiss				; End of chain, this is not good
-			cmpld	r9,r31						; Is this the mapping to remove?
-			ld		r8,mpAlias(r9)				; Get forward chain pinter
-			bne		hrmGRem64Nxt				; No mapping to remove, chain on, dude
-			bt		cr1_eq,hrmGRem64Rt			; Mapping to remove is chained from anchor
-			std		r8,0(r11)					; Unchain gpv->phys mapping
-			b		hrmGDelete					; Finish deleting mapping
-hrmGRem64Rt:
-			ldarx	r0,0,r11					; Get previous link
-			and		r0,r0,r7					; Get flags
-			or		r0,r0,r8					; Insert new forward pointer
-			stdcx.	r0,0,r11					; Slam it back in
-			bne--	hrmGRem64Rt					; Lost reservation, retry
-			b		hrmGDelete					; Finish deleting mapping
-
-			.align	5		
-hrmGRem64Nxt:
-			la		r11,mpAlias(r9)				; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		hrmGRem64Lp					; Carry on
-			
-hrmGDelete:
-			mr		r3,r29						; r3 <- physent addr
-			bl		mapPhysUnlock				; Unlock physent chain
-			lwz		r3,mpFlags(r31)				; Get mapping's flags
-			rlwinm	r3,r3,0,~mpgFlags			; Clear all guest flags
-			ori		r3,r3,mpgFree				; Mark mapping free
-			stw		r3,mpFlags(r31)				; Update flags
-			li		r25,mapRtGuest				; Set return code to 'found guest mapping'
-
-hrmGReturn:
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap search lock phys addr
-			bl		sxlkUnlock					; Release host pmap search lock
-			
-			mr		r3,r25						; r3 <- return code
-			bt++	pf64Bitb,hrmGRtn64			; Handle 64-bit separately
-			mtmsr	r17							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		hrmRetnCmn					; Nothing to do now but pop a frame and return
-hrmGRtn64:	mtmsrd	r17							; Restore 'rupts, translation, 32-bit mode
-			b		hrmRetnCmn					; Join common return
-
-hrmGBadPLock:
-hrmGPEMissMiss:
-			lis		r0,hi16(Choke)				; Seen the arrow on the doorpost
-			ori		r0,r0,lo16(Choke)			; Sayin' "THIS LAND IS CONDEMNED"
-			li		r3,failMapping				; All the way from New Orleans
-			sc									; To Jeruselem
-
-
-/*
- *			mapping *hw_purge_phys(physent) - remove a mapping from the system
- *
- *			Upon entry, R3 contains a pointer to a physent.  
- *
- *			This function removes the first mapping from a physical entry
- *			alias list.  It locks the list, extracts the vaddr and pmap from
- *			the first entry.  It then jumps into the hw_rem_map function.
- *			NOTE: since we jump into rem_map, we need to set up the stack
- *			identically.  Also, we set the next parm to 0 so we do not
- *			try to save a next vaddr.
- *			
- *			We return the virtual address of the removed mapping as a 
- *			R3.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *
- *			Note that this must be done with both interruptions off and VM off
- *	
- * 
- * Remove mapping via physical page (mapping_purge)
- * 
- *  1) lock physent
- *  2) extract vaddr and pmap
- *  3) unlock physent
- *  4) do "remove mapping via pmap"
- *  
- *	
- */
-
-			.align	5
-			.globl	EXT(hw_purge_phys)
-
-LEXT(hw_purge_phys)
-			stwu	r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r15,FM_ARG0+0x00(r1)		; Save a register
-			stw		r16,FM_ARG0+0x04(r1)		; Save a register
-			stw		r17,FM_ARG0+0x08(r1)		; Save a register
-			stw		r18,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r19,FM_ARG0+0x10(r1)		; Save a register
-			stw		r20,FM_ARG0+0x14(r1)		; Save a register
-			stw		r21,FM_ARG0+0x18(r1)		; Save a register
-			stw		r22,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r23,FM_ARG0+0x20(r1)		; Save a register
-			stw		r24,FM_ARG0+0x24(r1)		; Save a register
-			stw		r25,FM_ARG0+0x28(r1)		; Save a register
-			li		r6,0						; Set no next address return
-			stw		r26,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r27,FM_ARG0+0x30(r1)		; Save a register
-			stw		r28,FM_ARG0+0x34(r1)		; Save a register
-			stw		r29,FM_ARG0+0x38(r1)		; Save a register
-			stw		r30,FM_ARG0+0x3C(r1)		; Save a register
-			stw		r31,FM_ARG0+0x40(r1)		; Save a register
-			stw		r6,FM_ARG0+0x44(r1)			; Save address to save next mapped vaddr
-			stw		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			bl		mapPhysLock					; Lock the physent
-			
- 			bt++	pf64Bitb,hppSF				; skip if 64-bit (only they take the hint)
-		
-			lwz		r12,ppLink+4(r3)			; Grab the pointer to the first mapping
- 			li		r0,ppFlags					; Set the bottom stuff to clear
-			b		hppJoin						; Join the common...
-			
-hppSF:		li		r0,ppLFAmask
-			ld		r12,ppLink(r3)				; Get the pointer to the first mapping
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-
-hppJoin:	andc.	r12,r12,r0					; Clean and test link
-			beq--	hppNone						; There are no more mappings on physical page
-			
-			lis		r28,hi16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			lhz		r7,mpSpace(r12)			; Get the address space hash
-			ori		r28,r28,lo16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			slwi	r0,r7,2						; Multiply space by 4
-			lwz		r4,mpVAddr(r12)				; Get the top of the vaddr
-			slwi	r7,r7,3						; Multiply space by 8
-			lwz		r5,mpVAddr+4(r12)			; and the bottom
-			add		r7,r7,r0					; Get correct displacement into translate table
-			lwz		r28,0(r28)					; Get the actual translation map
-	
-			add		r28,r28,r7					; Point to the pmap translation
-					
-			bl		mapPhysUnlock				; Time to unlock the physical entry
-			
- 			bt++	pf64Bitb,hppSF2				; skip if 64-bit (only they take the hint)
-			
-			lwz		r28,pmapPAddr+4(r28)		; Get the physical address of the pmap
-			b		hrmJoin						; Go remove the mapping...
-			
-hppSF2:		ld		r28,pmapPAddr(r28)			; Get the physical address of the pmap
-			b		hrmJoin						; Go remove the mapping...
-
-			.align	5
-			
-hppNone:	bl		mapPhysUnlock				; Time to unlock the physical entry
-
-			bt++	pf64Bitb,hppSF3				; skip if 64-bit (only they take the hint)...
-
-			mtmsr	r11							; Restore enables/translation/etc.
-			isync
-			b		hppRetnCmn					; Join the common return code...
-
-hppSF3:		mtmsrd	r11							; Restore enables/translation/etc.
-			isync
-
-;
-;			NOTE: we have not used any registers other than the volatiles to this point
-;
-
-hppRetnCmn:	lwz		r12,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Restore the return
-
-			li		r3,mapRtEmpty				; Physent chain is empty
-			mtlr	r12							; Restore the return
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-
-/*
- *			mapping *hw_purge_map(pmap, vaddr, addr64_t *next) - remove a mapping from the system.
- *
- *			Upon entry, R3 contains a pointer to a pmap.  Since vaddr is
- *			a 64-bit quantity, it is a long long so it is in R4 and R5.
- *			
- *			We return the virtual address of the removed mapping as a 
- *			R3.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *
- *			Note that this must be done with both interruptions off and VM off
- *	
- *  Remove a mapping which can be reestablished by VM
- *
- */
-
-			.align	5
-			.globl	EXT(hw_purge_map)
-
-LEXT(hw_purge_map)
-			stwu	r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r15,FM_ARG0+0x00(r1)		; Save a register
-			stw		r16,FM_ARG0+0x04(r1)		; Save a register
-			stw		r17,FM_ARG0+0x08(r1)		; Save a register
-			stw		r18,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r19,FM_ARG0+0x10(r1)		; Save a register
- 			mfsprg	r19,2						; Get feature flags 
-			stw		r20,FM_ARG0+0x14(r1)		; Save a register
-			stw		r21,FM_ARG0+0x18(r1)		; Save a register
-			mtcrf	0x02,r19					; move pf64Bit cr6
-			stw		r22,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r23,FM_ARG0+0x20(r1)		; Save a register
-			stw		r24,FM_ARG0+0x24(r1)		; Save a register
-			stw		r25,FM_ARG0+0x28(r1)		; Save a register
-			stw		r26,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r27,FM_ARG0+0x30(r1)		; Save a register
-			stw		r28,FM_ARG0+0x34(r1)		; Save a register
-			stw		r29,FM_ARG0+0x38(r1)		; Save a register
-			stw		r30,FM_ARG0+0x3C(r1)		; Save a register
-			stw		r31,FM_ARG0+0x40(r1)		; Save a register
-			stw		r6,FM_ARG0+0x44(r1)			; Save address to save next mapped vaddr
-			stw		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		hpmPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
- 			bt++	pf64Bitb,hpmSF1				; skip if 64-bit (only they take the hint)
-			lwz		r9,pmapvr+4(r3)				; Get conversion mask
-			b		hpmSF1x						; Done...
-			
-hpmSF1:		ld		r9,pmapvr(r3)				; Get conversion mask
-
-hpmSF1x:	
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			xor		r28,r3,r9					; Convert the pmap to physical addressing
-
-			mr		r17,r11						; Save the MSR
-
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkExclusive				; Go get an exclusive lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hrmBadLock					; Nope...
-;
-;			Note that we do a full search (i.e., no shortcut level skips, etc.)
-;			here so that we will know the previous elements so we can dequeue them
-;			later.
-;
-hpmSearch:
-			mr		r3,r28						; Pass in pmap to search
-			mr		r29,r4						; Top half of vaddr
-			mr		r30,r5						; Bottom half of vaddr
-			bl		EXT(mapSearchFull)			; Rescan the list
-			mr.		r31,r3						; Did we? (And remember mapping address for later)
-			or		r0,r4,r5					; Are we beyond the end?
-			mr		r15,r4						; Save top of next vaddr
-			cmplwi	cr1,r0,0					; See if there is another
-			mr		r16,r5						; Save bottom of next vaddr
-			bne--	hpmGotOne					; We found one, go check it out...
-
-hpmCNext:	bne++	cr1,hpmSearch				; There is another to check...
-			b		hrmNotFound					; No more in pmap to check...
-
-hpmGotOne:	lwz		r20,mpFlags(r3)				; Get the flags
-			andi.	r0,r20,lo16(mpType|mpPerm)	; cr0_eq <- normal mapping && !permanent
-			rlwinm	r21,r20,8,24,31				; Extract the busy count
-			cmplwi	cr2,r21,0					; Is it busy?
-			crand	cr0_eq,cr2_eq,cr0_eq		; not busy and can be removed?
-			beq++	hrmGotX						; Found, branch to remove the mapping...
-			b		hpmCNext					; Nope...
-
-hpmPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-
-/*
- *			mapping *hw_purge_space(physent, pmap) - remove a mapping from the system based upon address space
- *
- *			Upon entry, R3 contains a pointer to a pmap.  
- *			pa is a pointer to the physent
- *
- *			This function removes the first mapping for a specific pmap from a physical entry
- *			alias list.  It locks the list, extracts the vaddr and pmap from
- *			the first apporpriate entry.  It then jumps into the hw_rem_map function.
- *			NOTE: since we jump into rem_map, we need to set up the stack
- *			identically.  Also, we set the next parm to 0 so we do not
- *			try to save a next vaddr.
- *			
- *			We return the virtual address of the removed mapping as a 
- *			R3.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *
- *			Note that this must be done with both interruptions off and VM off
- *	
- * 
- * Remove mapping via physical page (mapping_purge)
- * 
- *  1) lock physent
- *  2) extract vaddr and pmap
- *  3) unlock physent
- *  4) do "remove mapping via pmap"
- *  
- *	
- */
-
-			.align	5
-			.globl	EXT(hw_purge_space)
-
-LEXT(hw_purge_space)
-			stwu	r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r15,FM_ARG0+0x00(r1)		; Save a register
-			stw		r16,FM_ARG0+0x04(r1)		; Save a register
-			stw		r17,FM_ARG0+0x08(r1)		; Save a register
- 			mfsprg	r2,2						; Get feature flags 
-			stw		r18,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r19,FM_ARG0+0x10(r1)		; Save a register
-			stw		r20,FM_ARG0+0x14(r1)		; Save a register
-			stw		r21,FM_ARG0+0x18(r1)		; Save a register
-			stw		r22,FM_ARG0+0x1C(r1)		; Save a register
-			mtcrf	0x02,r2						; move pf64Bit cr6
-			stw		r23,FM_ARG0+0x20(r1)		; Save a register
-			stw		r24,FM_ARG0+0x24(r1)		; Save a register
-			stw		r25,FM_ARG0+0x28(r1)		; Save a register
-			stw		r26,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r27,FM_ARG0+0x30(r1)		; Save a register
-			li		r6,0						; Set no next address return
-			stw		r28,FM_ARG0+0x34(r1)		; Save a register
-			stw		r29,FM_ARG0+0x38(r1)		; Save a register
-			stw		r30,FM_ARG0+0x3C(r1)		; Save a register
-			stw		r31,FM_ARG0+0x40(r1)		; Save a register
-			stw		r6,FM_ARG0+0x44(r1)			; Save address to save next mapped vaddr
-			stw		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r4)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		hpsPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
-			bt++	pf64Bitb,hpsSF1				; skip if 64-bit (only they take the hint)
-
-			lwz		r9,pmapvr+4(r4)				; Get conversion mask for pmap
-
-			b		hpsSF1x						; Done...
-			
-hpsSF1:		ld		r9,pmapvr(r4)				; Get conversion mask for pmap
-
-hpsSF1x:	bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-			
-			xor		r4,r4,r9					; Convert the pmap to physical addressing
-
-			bl		mapPhysLock					; Lock the physent
-			 
- 			lwz		r8,pmapSpace(r4)			; Get the space hash
- 
- 			bt++	pf64Bitb,hpsSF				; skip if 64-bit (only they take the hint)
-		
-			lwz		r12,ppLink+4(r3)			; Grab the pointer to the first mapping
-			
-hpsSrc32:	rlwinm.	r12,r12,0,~ppFlags			; Clean and test mapping address
-			beq		hpsNone						; Did not find one...
-			
-			lhz		r10,mpSpace(r12)			; Get the space
-			
-			cmplw	r10,r8						; Is this one of ours?
-			beq		hpsFnd						; Yes...
-			
-			lwz		r12,mpAlias+4(r12)			; Chain on to the next
-			b		hpsSrc32					; Check it out...
-
-			.align	5
-		
-hpsSF:		li		r0,ppLFAmask
-			ld		r12,ppLink(r3)				; Get the pointer to the first mapping
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			
-hpsSrc64:	andc.	r12,r12,r0					; Clean and test mapping address
-			beq		hpsNone						; Did not find one...
-			
-			lhz		r10,mpSpace(r12)			; Get the space
-			
-			cmplw	r10,r8						; Is this one of ours?
-			beq		hpsFnd						; Yes...
-			
-			ld		r12,mpAlias(r12)			; Chain on to the next
-			b		hpsSrc64					; Check it out...
-			
-			.align	5
-			
-hpsFnd:		mr		r28,r4						; Set the pmap physical address
-			lwz		r4,mpVAddr(r12)				; Get the top of the vaddr
-			lwz		r5,mpVAddr+4(r12)			; and the bottom
-			
-			bl		mapPhysUnlock				; Time to unlock the physical entry
-			b		hrmJoin						; Go remove the mapping...
-			
-			.align	5
-			
-hpsNone:	bl		mapPhysUnlock				; Time to unlock the physical entry
-
-			bt++	pf64Bitb,hpsSF3				; skip if 64-bit (only they take the hint)...
-
-			mtmsr	r11							; Restore enables/translation/etc.
-			isync
-			b		hpsRetnCmn					; Join the common return code...
-
-hpsSF3:		mtmsrd	r11							; Restore enables/translation/etc.
-			isync
-
-;
-;			NOTE: we have not used any registers other than the volatiles to this point
-;
-
-hpsRetnCmn:	lwz		r12,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Restore the return
-
-			li		r3,mapRtEmpty				; No mappings for specified pmap on physent chain
-			mtlr	r12							; Restore the return
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-
-hpsPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-			
-/*
- *			mapping *hw_scrub_guest(physent, pmap) - remove first guest mapping associated with host
- *                                                    on this physent chain
- *
- *			Locates the first guest mapping on the physent chain that is associated with the
- *			specified host pmap. If this succeeds, the mapping is removed by joining the general
- *			remove path; otherwise, we return NULL. The caller is expected to invoke this entry
- *			repeatedly until no additional guest mappings that match our criteria are removed.
- *
- *			Because this entry point exits through hw_rem_map, our prolog pushes its frame.
- *
- *			Parameters:
- *				r3 : physent, 32-bit kernel virtual address
- *				r4 : host pmap, 32-bit kernel virtual address
- *
- *			Volatile register usage (for linkage through hrmJoin):
- *				r4 : high-order 32 bits of guest virtual address
- *				r5 : low-order 32 bits of guest virtual address
- *				r11: saved MSR image
- *
- *			Non-volatile register usage:
- *				r26: VMM extension block's physical address
- *				r27: host pmap's physical address
- *				r28: guest pmap's physical address
- *	
- */
-
-			.align	5
-			.globl	EXT(hw_scrub_guest)
-
-LEXT(hw_scrub_guest)
-			stwu	r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r15,FM_ARG0+0x00(r1)		; Save a register
-			stw		r16,FM_ARG0+0x04(r1)		; Save a register
-			stw		r17,FM_ARG0+0x08(r1)		; Save a register
- 			mfsprg	r2,2						; Get feature flags 
-			stw		r18,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r19,FM_ARG0+0x10(r1)		; Save a register
-			stw		r20,FM_ARG0+0x14(r1)		; Save a register
-			stw		r21,FM_ARG0+0x18(r1)		; Save a register
-			stw		r22,FM_ARG0+0x1C(r1)		; Save a register
-			mtcrf	0x02,r2						; move pf64Bit cr6
-			stw		r23,FM_ARG0+0x20(r1)		; Save a register
-			stw		r24,FM_ARG0+0x24(r1)		; Save a register
-			stw		r25,FM_ARG0+0x28(r1)		; Save a register
-			stw		r26,FM_ARG0+0x2C(r1)		; Save a register
-			stw		r27,FM_ARG0+0x30(r1)		; Save a register
-			li		r6,0						; Set no next address return
-			stw		r28,FM_ARG0+0x34(r1)		; Save a register
-			stw		r29,FM_ARG0+0x38(r1)		; Save a register
-			stw		r30,FM_ARG0+0x3C(r1)		; Save a register
-			stw		r31,FM_ARG0+0x40(r1)		; Save a register
-			stw		r6,FM_ARG0+0x44(r1)			; Save address to save next mapped vaddr
-			stw		r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			lwz		r11,pmapVmmExt(r4)			; get VMM pmap extension block vaddr
-
-			bt++	pf64Bitb,hsg64Salt			; Test for 64-bit machine
-			lwz		r26,pmapVmmExtPhys+4(r4)	; Get VMM pmap extension block paddr
-			lwz		r9,pmapvr+4(r4)				; Get 32-bit virt<->real conversion salt
-			b		hsgStart					; Get to work
-
-hsg64Salt:	ld		r26,pmapVmmExtPhys(r4)		; Get VMM pmap extension block paddr
-			ld		r9,pmapvr+4(r4)				; Get 64-bit virt<->real conversion salt
-			
-hsgStart:	bl		EXT(mapSetUp)				; Disable 'rupts, translation, enter 64-bit mode
-			xor		r27,r4,r9					; Convert host pmap_t virt->real
-			bl		mapPhysLock					; Lock the physent
-
-			bt++	pf64Bitb,hsg64Scan			; Test for 64-bit machine
-
-			lwz		r12,ppLink+4(r3)			; Grab the pointer to the first mapping
-hsg32Loop:	rlwinm.	r12,r12,0,~ppFlags			; Clean and test mapping address
-			beq		hsg32Miss					; Did not find one...
-			lwz		r8,mpFlags(r12)				; Get mapping's flags
-			lhz		r7,mpSpace(r12)				; Get mapping's space id
-			rlwinm	r8,r8,0,mpType				; Extract mapping's type code
-			lis		r28,hi16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			xori	r8,r8,mpGuest				; Is it a guest mapping?
-			ori		r28,r28,lo16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			slwi	r9,r7,2						; Multiply space by 4
-			lwz		r28,0(r28)					; Get the actual translation map
-			lwz		r4,mpVAddr(r12)				; Get the top of the vaddr
-			slwi	r7,r7,3						; Multiply space by 8
-			lwz		r5,mpVAddr+4(r12)			; Get the bottom of the vaddr
-			add		r7,r7,r9					; Get correct displacement into translate table
-			add		r28,r28,r7					; Point to the pmap translation
-			lwz		r28,pmapPAddr+4(r28)		; Get guest pmap paddr
-			lwz		r7,pmapVmmExtPhys+4(r28)	; Get VMM extension block paddr
-			xor		r7,r7,r26					; Is guest associated with specified host?
-			or.		r7,r7,r8					; Guest mapping && associated with host?
-			lwz		r12,mpAlias+4(r12)			; Chain on to the next
-			bne		hsg32Loop					; Try next mapping on alias chain			
-
-hsg32Hit:	bl		mapPhysUnlock				; Unlock physent chain
-			b		hrmJoin						; Join common path for mapping removal
-			
-			.align	5
-hsg32Miss:	bl		mapPhysUnlock				; Unlock physent chain
-			mtmsr	r11							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			li		r3,mapRtEmpty				; No mappings found matching specified criteria
-			b		hrmRetnCmn					; Exit through common epilog
-			
-			.align	5			
-hsg64Scan:	li		r6,ppLFAmask				; Get lock, flag, attribute mask seed
-			ld		r12,ppLink(r3)				; Grab the pointer to the first mapping
-			rotrdi	r6,r6,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-hsg64Loop:	andc.	r12,r12,r6					; Clean and test mapping address
-			beq		hsg64Miss					; Did not find one...
-			lwz		r8,mpFlags(r12)				; Get mapping's flags
-			lhz		r7,mpSpace(r12)				; Get mapping's space id
-			rlwinm	r8,r8,0,mpType				; Extract mapping's type code
-			lis		r28,hi16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			xori	r8,r8,mpGuest				; Is it a guest mapping?
-			ori		r28,r28,lo16(EXT(pmapTrans))	; Get the top of the start of the pmap hash to pmap translate table
-			slwi	r9,r7,2						; Multiply space by 4
-			lwz		r28,0(r28)					; Get the actual translation map
-			lwz		r4,mpVAddr(r12)				; Get the top of the vaddr
-			slwi	r7,r7,3						; Multiply space by 8
-			lwz		r5,mpVAddr+4(r12)			; Get the bottom of the vaddr
-			add		r7,r7,r9					; Get correct displacement into translate table
-			add		r28,r28,r7					; Point to the pmap translation
-			ld		r28,pmapPAddr(r28)			; Get guest pmap paddr
-			ld		r7,pmapVmmExtPhys(r28)		; Get VMM extension block paddr
-			xor		r7,r7,r26					; Is guest associated with specified host?
-			or.		r7,r7,r8					; Guest mapping && associated with host?
-			ld		r12,mpAlias(r12)			; Chain on to the next
-			bne		hsg64Loop					; Try next mapping on alias chain			
-
-hsg64Hit:	bl		mapPhysUnlock				; Unlock physent chain
-			b		hrmJoin						; Join common path for mapping removal
-			
-			.align	5
-hsg64Miss:	bl		mapPhysUnlock				; Unlock physent chain
-			mtmsrd	r11							; Restore 'rupts, translation
-			li		r3,mapRtEmpty				; No mappings found matching specified criteria
-			b		hrmRetnCmn					; Exit through common epilog
-
-
-/*
- *			mapping *hw_find_space(physent, space) - finds the first mapping on physent for specified space
- *
- *			Upon entry, R3 contains a pointer to a physent.  
- *			space is the space ID from the pmap in question
- *
- *			We return the virtual address of the found mapping in 
- *			R3. Note that the mapping busy is bumped.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *	
- */
-
-			.align	5
-			.globl	EXT(hw_find_space)
-
-LEXT(hw_find_space)
-			stwu	r1,-(FM_SIZE)(r1)			; Make some space on the stack
-			mflr	r0							; Save the link register
-			mr		r8,r4						; Remember the space
-			stw		r0,(FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			bl		mapPhysLock					; Lock the physent
- 
- 			bt++	pf64Bitb,hfsSF				; skip if 64-bit (only they take the hint)
-		
-			lwz		r12,ppLink+4(r3)			; Grab the pointer to the first mapping
-			
-hfsSrc32:	rlwinm.	r12,r12,0,~ppFlags			; Clean and test mapping address
-			beq		hfsNone						; Did not find one...
-			
-			lhz		r10,mpSpace(r12)			; Get the space
-			
-			cmplw	r10,r8						; Is this one of ours?
-			beq		hfsFnd						; Yes...
-			
-			lwz		r12,mpAlias+4(r12)			; Chain on to the next
-			b		hfsSrc32					; Check it out...
-
-			.align	5
-		
-hfsSF:		li		r0,ppLFAmask
-			ld		r12,ppLink(r3)				; Get the pointer to the first mapping
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			
-hfsSrc64:	andc.	r12,r12,r0					; Clean and test mapping address
-			beq		hfsNone						; Did not find one...
-			
-			lhz		r10,mpSpace(r12)			; Get the space
-			
-			cmplw	r10,r8						; Is this one of ours?
-			beq		hfsFnd						; Yes...
-			
-			ld		r12,mpAlias(r12)			; Chain on to the next
-			b		hfsSrc64					; Check it out...
-			
-			.align	5
-			
-hfsFnd:		mr		r8,r3						; Save the physent
-			mr		r3,r12						; Point to the mapping
-			bl		mapBumpBusy					; If we found it, bump up the busy count so the mapping does not disapear
-
-			mr		r3,r8						; Get back the physical entry
-			li		r7,0xFFF					; Get a page size mask
-			bl		mapPhysUnlock				; Time to unlock the physical entry
-		
-			andc	r3,r12,r7					; Move the mapping back down to a page	
-			lwz		r3,mbvrswap+4(r3)			; Get last half of virtual to real swap
-			xor		r12,r3,r12					; Convert to virtual
-			b		hfsRet						; Time to return
-			
-			.align	5
-			
-hfsNone:	bl		mapPhysUnlock				; Time to unlock the physical entry
-			
-hfsRet:		bt++	pf64Bitb,hfsSF3				; skip if 64-bit (only they take the hint)...
-
-			mtmsr	r11							; Restore enables/translation/etc.
-			isync
-			b		hfsRetnCmn					; Join the common return code...
-
-hfsSF3:		mtmsrd	r11							; Restore enables/translation/etc.
-			isync
-
-;
-;			NOTE: we have not used any registers other than the volatiles to this point
-;
-
-hfsRetnCmn:	mr		r3,r12						; Get the mapping or a 0 if we failed
-
-#if DEBUG
-			mr.		r3,r3						; Anything to return?
-			beq		hfsRetnNull					; Nope
-			lwz		r11,mpFlags(r3)				; Get mapping flags
-			rlwinm	r0,r11,0,mpType				; Isolate the mapping type
-			cmplwi	r0,mpGuest					; Shadow guest mapping?
-			beq		hfsPanic					; Yup, kick the bucket
-hfsRetnNull:
-#endif
-
-			lwz		r12,(FM_SIZE+FM_LR_SAVE)(r1)	; Restore the return
-
-			mtlr	r12							; Restore the return
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-
-hfsPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-
-;
-;			mapping *hw_find_map(pmap, va, *nextva) - Looks up a vaddr in a pmap
-;			Returns 0 if not found or the virtual address of the mapping if
-;			if is.  Also, the mapping has the busy count bumped.
-;
-			.align	5
-			.globl	EXT(hw_find_map)
-
-LEXT(hw_find_map)
- 			stwu	r1,-(FM_ALIGN((31-25+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r25,FM_ARG0+0x00(r1)		; Save a register
-			stw		r26,FM_ARG0+0x04(r1)		; Save a register
-			mr		r25,r6						; Remember address of next va
-			stw		r27,FM_ARG0+0x08(r1)		; Save a register
-			stw		r28,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r29,FM_ARG0+0x10(r1)		; Save a register
-			stw		r30,FM_ARG0+0x14(r1)		; Save a register
-			stw		r31,FM_ARG0+0x18(r1)		; Save a register
-			stw		r0,(FM_ALIGN((31-26+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		hfmPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
-			lwz		r6,pmapvr(r3)				; Get the first part of the VR translation for pmap
-			lwz		r7,pmapvr+4(r3)				; Get the second part
-
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			mr		r27,r11						; Remember the old MSR
-			mr		r26,r12						; Remember the feature bits
-
-			xor		r28,r3,r7					; Change the common 32- and 64-bit half
-
-			bf--	pf64Bitb,hfmSF1				; skip if 32-bit...
-			
-			rldimi	r28,r6,32,0					; Shift the fixed upper part of the physical over and cram in top
-
-hfmSF1:		mr		r29,r4						; Save top half of vaddr
-			mr		r30,r5						; Save the bottom half
-						
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hfmBadLock					; Nope...
-
-			mr		r3,r28						; get the pmap address
-			mr		r4,r29						; Get bits 0:31 to look for
-			mr		r5,r30						; Get bits 32:64
-			
-			bl		EXT(mapSearch)				; Go see if we can find it (note: R7 comes back with mpFlags)
-
-			rlwinm	r0,r7,0,mpRIPb,mpRIPb		; Find remove in progress bit
-			mr.		r31,r3						; Save the mapping if we found it
-			cmplwi	cr1,r0,0					; Are we removing?
-			mr		r29,r4						; Save next va high half
-			crorc	cr0_eq,cr0_eq,cr1_eq		; Not found or removing
-			mr		r30,r5						; Save next va low half
-			li		r6,0						; Assume we did not find it
-			li		r26,0xFFF					; Get a mask to relocate to start of mapping page
-
-			bt--	cr0_eq,hfmNotFnd			; We did not find it...
-
-			bl		mapBumpBusy					; If we found it, bump up the busy count so the mapping does not disapear
-
-			andc	r4,r31,r26					; Get back to the mapping page start
-
-;			Note: we can treat 32- and 64-bit the same here. Because we are going from
-;			physical to virtual and we only do 32-bit virtual, we only need the low order
-;			word of the xor.
-
-			lwz		r4,mbvrswap+4(r4)			; Get last half of virtual to real swap
-			li		r6,-1						; Indicate we found it and it is not being removed
-			xor		r31,r31,r4					; Flip to virtual
-
-hfmNotFnd:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			rlwinm	r3,r31,0,0,31				; Move mapping to return register and clear top of register if 64-bit
-			and		r3,r3,r6					; Clear if not found or removing
-
-hfmReturn:	bt++	pf64Bitb,hfmR64				; Yes...
-
-			mtmsr	r27							; Restore enables/translation/etc.
-			isync
-			b		hfmReturnC					; Join common...
-
-hfmR64:		mtmsrd	r27							; Restore enables/translation/etc.
-			isync								
-			
-hfmReturnC:	stw		r29,0(r25)					; Save the top of the next va
-			stw		r30,4(r25)					; Save the bottom of the next va
-			lwz		r0,(FM_ALIGN((31-25+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-			lwz		r25,FM_ARG0+0x00(r1)		; Restore a register
-			lwz		r26,FM_ARG0+0x04(r1)		; Restore a register
-			and		r3,r3,r6					; Clear return if the mapping is being removed
-			lwz		r27,FM_ARG0+0x08(r1)		; Restore a register
-			mtlr	r0							; Restore the return
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore a register
-			lwz		r29,FM_ARG0+0x10(r1)		; Restore a register
-			lwz		r30,FM_ARG0+0x14(r1)		; Restore a register
-			lwz		r31,FM_ARG0+0x18(r1)		; Restore a register
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-			
-			.align	5
-			
-hfmBadLock:	li		r3,1						; Set lock time out error code
-			b		hfmReturn					; Leave....
-
-hfmPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-
-
-/*
- *			void hw_clear_maps(void) 
- *
- *			Remove all mappings for all phys entries.
- *	
- * 
- */
-
-			.align	5
-			.globl	EXT(hw_clear_maps)
-
-LEXT(hw_clear_maps)
-			mflr	r10							; Save the link register
-            mfcr	r9							; Save the condition register
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			lis		r5,hi16(EXT(pmap_mem_regions))		; Point to the start of the region table
-			ori		r5,r5,lo16(EXT(pmap_mem_regions))	; Point to the start of the region table			
-
-hcmNextRegion:
-			lwz		r3,mrPhysTab(r5)			; Get the actual table address
-			lwz		r0,mrStart(r5)				; Get start of table entry
-			lwz		r4,mrEnd(r5)				; Get end of table entry
-			addi	r5,r5,mrSize				; Point to the next regions
-
-			cmplwi	r3,0						; No more regions?
-			beq--	hcmDone						; Leave...
-
-			sub		r4,r4,r0					; Calculate physical entry count
-            addi	r4,r4,1
-            mtctr	r4
-
-			bt++	pf64Bitb,hcmNextPhys64		; 64-bit version
-
-
-hcmNextPhys32:
-			lwz		r4,ppLink+4(r3)				; Grab the pointer to the first mapping
-            addi	r3,r3,physEntrySize			; Next phys_entry
-			
-hcmNextMap32:
-			rlwinm.	r4,r4,0,~ppFlags			; Clean and test mapping address
-			beq		hcmNoMap32					; Did not find one...
-
-			lwz		r0,mpPte(r4)				; Grab the offset to the PTE
-			rlwinm	r0,r0,0,~mpHValid			; Clear out valid bit
-			stw		r0,mpPte(r4)				; Get the quick pointer again
-
-			lwz		r4,mpAlias+4(r4)			; Chain on to the next
-			b		hcmNextMap32				; Check it out...
-hcmNoMap32:
-            bdnz	hcmNextPhys32
-            b		hcmNextRegion
-
-
-			.align	5
-hcmNextPhys64:
-			li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			ld		r4,ppLink(r3)				; Get the pointer to the first mapping
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-            addi	r3,r3,physEntrySize			; Next phys_entry
-			
-hcmNextMap64:
-			andc.	r4,r4,r0					; Clean and test mapping address
-			beq		hcmNoMap64					; Did not find one...
-
-			lwz		r0,mpPte(r4)				; Grab the offset to the PTE
-			rlwinm	r0,r0,0,~mpHValid			; Clear out valid bit
-			stw		r0,mpPte(r4)				; Get the quick pointer again
-
-			ld		r4,mpAlias(r4)				; Chain on to the next
-			li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			b		hcmNextMap64				; Check it out...
-hcmNoMap64:
-            bdnz	hcmNextPhys64
-            b		hcmNextRegion
-
-
-			.align	5
-hcmDone:
-			mtlr	r10							; Restore the return
-			mtcr	r9							; Restore the condition register
-			bt++	pf64Bitb,hcmDone64			; 64-bit version
-hcmDone32:
-			mtmsr	r11							; Restore translation/mode/etc.
-			isync
-			blr									; Leave...
-
-hcmDone64:
-			mtmsrd	r11							; Restore translation/mode/etc.
-			isync
-			blr									; Leave...
-
-
-
-/*
- *			unsigned int hw_walk_phys(pp, preop, op, postop, parm, opmod) 
- *				walks all mapping for a physical page and performs
- *				specified operations on each.
- *
- *			pp is unlocked physent
- *			preop is operation to perform on physent before walk.  This would be
- *				used to set cache attribute or protection
- *			op is the operation to perform on each mapping during walk
- *			postop is operation to perform in the phsyent after walk.  this would be
- *				used to set or reset the RC bits.
- *			opmod modifies the action taken on any connected PTEs visited during
- *				the mapping walk.
- *
- *			We return the RC bits from before postop is run.
- *
- *			Note that this is designed to be called from 32-bit mode with a stack.
- *
- *			We disable translation and all interruptions here.  This keeps is
- *			from having to worry about a deadlock due to having anything locked
- *			and needing it to process a fault.
- *
- *			We lock the physent, execute preop, and then walk each mapping in turn. 
- *			If there is a PTE, it is invalidated and the RC merged into the physent.
- *			Then we call the op function.
- *			Then we revalidate the PTE.
- *			Once all all mappings are finished, we save the physent RC and call the 
- *			postop routine.  Then we unlock the physent and return the RC.
- *	
- * 
- */
-
-			.align	5
-			.globl	EXT(hw_walk_phys)
-
-LEXT(hw_walk_phys)
-			stwu	r1,-(FM_ALIGN((31-24+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r24,FM_ARG0+0x00(r1)		; Save a register
-			stw		r25,FM_ARG0+0x04(r1)		; Save a register
-			stw		r26,FM_ARG0+0x08(r1)		; Save a register
-			stw		r27,FM_ARG0+0x0C(r1)		; Save a register
-			mr		r24,r8						; Save the parm
-			mr		r25,r7						; Save the parm
-			stw		r28,FM_ARG0+0x10(r1)		; Save a register
-			stw		r29,FM_ARG0+0x14(r1)		; Save a register
-			stw		r30,FM_ARG0+0x18(r1)		; Save a register
-			stw		r31,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-			
-			mfsprg	r26,0						; (INSTRUMENTATION)
-			lwz		r27,hwWalkPhys(r26)			; (INSTRUMENTATION)
-			addi	r27,r27,1					; (INSTRUMENTATION)
-			stw		r27,hwWalkPhys(r26)			; (INSTRUMENTATION)
-			la		r26,hwWalkFull(r26)			; (INSTRUMENTATION)
-			slwi	r12,r24,2					; (INSTRUMENTATION)
-			lwzx	r27,r26,r12					; (INSTRUMENTATION)
-			addi	r27,r27,1					; (INSTRUMENTATION)
-			stwx	r27,r26,r12					; (INSTRUMENTATION)
-		
-			mr		r26,r11						; Save the old MSR
-			lis		r27,hi16(hwpOpBase)			; Get high order of op base
-			slwi	r4,r4,7						; Convert preop to displacement
-			ori		r27,r27,lo16(hwpOpBase)		; Get low order of op base
-			slwi	r5,r5,7						; Convert op to displacement
-			add		r12,r4,r27					; Point to the preop routine
-			slwi	r28,r6,7					; Convert postop to displacement
-			mtctr	r12							; Set preop routine	
-			add		r28,r28,r27					; Get the address of the postop routine
-			add		r27,r5,r27					; Get the address of the op routine			
-
-			bl		mapPhysLock					; Lock the physent
-
-			mr		r29,r3						; Save the physent address
-			
-			bt++	pf64Bitb,hwp64				; skip if 64-bit (only they take the hint)
-			
-			bctrl								; Call preop routine
-			bne-	hwpEarly32					; preop says to bail now...
-
-			cmplwi	r24,hwpMergePTE				; Classify operation modifier			
- 			mtctr	r27							; Set up the op function address
-			lwz		r31,ppLink+4(r3)			; Grab the pointer to the first mapping
-			blt		hwpSrc32					; Do TLB invalidate/purge/merge/reload for each mapping
-			beq		hwpMSrc32					; Do TLB merge for each mapping
-			
-hwpQSrc32:	rlwinm.	r31,r31,0,~ppFlags			; Clean and test mapping address
-			beq		hwpNone32					; Did not find one...
-			
-			bctrl								; Call the op function
-			
-			bne-	hwpEarly32					; op says to bail now...
-			lwz		r31,mpAlias+4(r31)			; Chain on to the next
-			b		hwpQSrc32					; Check it out...
-
-			.align	5			
-hwpMSrc32:	rlwinm.	r31,r31,0,~ppFlags			; Clean and test mapping address
-			beq		hwpNone32					; Did not find one...
-			
-			bl		mapMergeRC32				; Merge reference and change into mapping and physent
-			bctrl								; Call the op function
-			
-			bne-	hwpEarly32					; op says to bail now...
-			lwz		r31,mpAlias+4(r31)			; Chain on to the next
-			b		hwpMSrc32					; Check it out...
-
-			.align	5			
-hwpSrc32:	rlwinm.	r31,r31,0,~ppFlags			; Clean and test mapping address
-			beq		hwpNone32					; Did not find one...
-						
-;
-;			Note: mapInvPte32 returns the PTE in R3 (or 0 if none), PTE high in R4, 
-;			PTE low in R5.  The PCA address is in R7.  The PTEG come back locked.
-;			If there is no PTE, PTE low is obtained from mapping
-;
-			bl		mapInvPte32					; Invalidate and lock PTE, also merge into physent
-		
-			bctrl								; Call the op function
-
-			crmove	cr1_eq,cr0_eq				; Save the return code
-						
-			mr.		r3,r3						; Was there a previously valid PTE?
-			beq-	hwpNxt32					; Nope...
-			
-			stw		r5,4(r3)					; Store second half of PTE
-			eieio								; Make sure we do not reorder
-			stw		r4,0(r3)					; Revalidate the PTE
-			
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock the PCA
-			
-hwpNxt32:	bne-	cr1,hwpEarly32				; op says to bail now...
-			lwz		r31,mpAlias+4(r31)			; Chain on to the next
-			b		hwpSrc32					; Check it out...
-
-			.align	5
-
-hwpNone32:	mtctr	r28							; Get the post routine address
-			
-			lwz		r30,ppLink+4(r29)			; Save the old RC
-			mr		r3,r29						; Get the physent address
-			bctrl								; Call post routine
-
-			bl		mapPhysUnlock				; Unlock the physent
-			
-			mtmsr	r26							; Restore translation/mode/etc.
-			isync
-			
-			b		hwpReturn					; Go restore registers and return...
-
-			.align	5
-
-hwpEarly32:	lwz		r30,ppLink+4(r29)			; Save the old RC
-			mr		r3,r29						; Get the physent address
-			bl		mapPhysUnlock				; Unlock the physent
-			
-			mtmsr	r26							; Restore translation/mode/etc.
-			isync
-			
-			b		hwpReturn					; Go restore registers and return...
-
-			.align	5
-		
-hwp64:		bctrl								; Call preop routine
-			bne--	hwpEarly64					; preop says to bail now...
-			
-			cmplwi	r24,hwpMergePTE				; Classify operation modifier			
- 			mtctr	r27							; Set up the op function address
-			
-			li		r24,ppLFAmask
-			ld		r31,ppLink(r3)				; Get the pointer to the first mapping
-			rotrdi	r24,r24,ppLFArrot			; Rotate clean up mask to get 0xF0000000000000000F
-			blt		hwpSrc64					; Do TLB invalidate/purge/merge/reload for each mapping
-			beq		hwpMSrc64					; Do TLB merge for each mapping
-			
-hwpQSrc64:	andc.	r31,r31,r24					; Clean and test mapping address
-			beq		hwpNone64					; Did not find one...
-
-			bctrl								; Call the op function
-
-			bne--	hwpEarly64					; op says to bail now...
-			ld		r31,mpAlias(r31)			; Chain on to the next
-			b		hwpQSrc64					; Check it out...
-
-			.align	5			
-hwpMSrc64:	andc.	r31,r31,r24					; Clean and test mapping address
-			beq		hwpNone64					; Did not find one...
-
-			bl		mapMergeRC64				; Merge reference and change into mapping and physent
-			bctrl								; Call the op function
-
-			bne--	hwpEarly64					; op says to bail now...
-			ld		r31,mpAlias(r31)			; Chain on to the next
-			b		hwpMSrc64					; Check it out...
-
-			.align	5			
-hwpSrc64:	andc.	r31,r31,r24					; Clean and test mapping address
-			beq		hwpNone64					; Did not find one...
-;
-;			Note: mapInvPte64 returns the PTE in R3 (or 0 if none), PTE high in R4, 
-;			PTE low in R5. PTEG comes back locked if there is one
-;
-			bl		mapInvPte64					; Invalidate and lock PTEG, also merge into physent
-
-			bctrl								; Call the op function
-
-			crmove	cr1_eq,cr0_eq				; Save the return code
-			
-			mr.		r3,r3						; Was there a previously valid PTE?
-			beq--	hwpNxt64					; Nope...
-			
-			std		r5,8(r3)					; Save bottom of PTE
-			eieio								; Make sure we do not reorder 
-			std		r4,0(r3)					; Revalidate the PTE
-			
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock the PCA
-
-hwpNxt64:	bne--	cr1,hwpEarly64				; op says to bail now...
-			ld		r31,mpAlias(r31)			; Chain on to the next
-			b		hwpSrc64					; Check it out...
-	
-			.align	5
-			
-hwpNone64:	mtctr	r28							; Get the post routine address
-			
-			lwz		r30,ppLink+4(r29)			; Save the old RC
-			mr		r3,r29						; Get the physent address
-			bctrl								; Call post routine
-
-			bl		mapPhysUnlock				; Unlock the physent
-			
-			mtmsrd	r26							; Restore translation/mode/etc.
-			isync
-			b		hwpReturn					; Go restore registers and return...
-
-			.align	5
-
-hwpEarly64:	lwz		r30,ppLink+4(r29)			; Save the old RC
-			mr		r3,r29						; Get the physent address
-			bl		mapPhysUnlock				; Unlock the physent
-			
-			mtmsrd	r26							; Restore translation/mode/etc.
-			isync			
-
-hwpReturn:	lwz		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Restore the return
-			lwz		r24,FM_ARG0+0x00(r1)		; Restore a register
-			lwz		r25,FM_ARG0+0x04(r1)		; Restore a register
-			lwz		r26,FM_ARG0+0x08(r1)		; Restore a register
-			mr		r3,r30						; Pass back the RC
-			lwz		r27,FM_ARG0+0x0C(r1)		; Restore a register
-			lwz		r28,FM_ARG0+0x10(r1)		; Restore a register
-			mtlr	r0							; Restore the return
-			lwz		r29,FM_ARG0+0x14(r1)		; Restore a register
-			lwz		r30,FM_ARG0+0x18(r1)		; Restore a register
-			lwz		r31,FM_ARG0+0x1C(r1)		; Restore a register
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-
-
-;
-;			The preop/op/postop function table.
-;			Each function must be 64-byte aligned and be no more than
-;			16 instructions.  If more than 16, we must fix address calculations
-;			at the start of hwpOpBase
-;
-;			The routine must set CR0_EQ in order to continue scan.
-;			If CR0_EQ is not set, an early return from the function is made.
-;
-
-			.align	7
-			
-hwpOpBase:
-
-;			Function 0 - No operation
-
-hwpNoop:	cmplw	r0,r0						; Make sure CR0_EQ is set
-			blr									; Just return...
-
-			.align	5
-
-;			This is the continuation of function 4 - Set attributes in mapping
-
-;			We changed the attributes of a mapped page.  Make sure there are no cache paradoxes.
-;			NOTE: Do we have to deal with i-cache here?
-
-hwpSAM:		li		r11,4096					; Get page size
-			
-hwpSAMinvd:	sub.	r11,r11,r9					; Back off a line
-			dcbf	r11,r5						; Flush the line in the data cache
-			bgt++	hwpSAMinvd					; Go do the rest of it...
-			
-			sync								; Make sure it is done
-
-			li		r11,4096					; Get page size
-			
-hwpSAMinvi:	sub.	r11,r11,r9					; Back off a line
-			icbi	r11,r5						; Flush the line in the icache
-			bgt++	hwpSAMinvi					; Go do the rest of it...
-			
-			sync								; Make sure it is done
-
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			blr									; Return...
-
-
-;			Function 1 - Set protection in physent (obsolete)
-
-			.set	.,hwpOpBase+(1*128)			; Generate error if previous function too long
-
-hwpSPrtPhy: cmplw	r0,r0						; Make sure we return CR0_EQ
-			blr									; Return...
-			
-
-;			Function 2 - Set protection in mapping
-
-;			NOTE: Changes to no-execute permission are ignored
-
-			.set	.,hwpOpBase+(2*128)			; Generate error if previous function too long
-
-hwpSPrtMap:	lwz		r9,mpFlags(r31)				; Get the mapping flags
-			lwz		r8,mpVAddr+4(r31)			; Get the protection part of mapping
-			rlwinm.	r9,r9,0,mpPermb,mpPermb		; Is the mapping permanent?
-			li		r0,lo16(mpPP)				; Get protection bits
-			crnot	cr0_eq,cr0_eq				; Change CR0_EQ to true if mapping is permanent
-			rlwinm	r2,r25,0,mpPP				; Isolate new protection bits 
-			beqlr--								; Leave if permanent mapping (before we trash R5)...
-			andc	r5,r5,r0					; Clear the old prot bits
-			or		r5,r5,r2					; Move in the new prot bits
-			rlwimi	r8,r5,0,20,31				; Copy into the mapping copy
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Leave...
-			
-;			Function 3 - Set attributes in physent
-
-			.set	.,hwpOpBase+(3*128)			; Generate error if previous function too long
-
-hwpSAtrPhy:	li		r5,ppLink					; Get offset for flag part of physent
-
-hwpSAtrPhX:	lwarx	r4,r5,r29					; Get the old flags
-			rlwimi	r4,r25,0,ppIb,ppGb			; Stick in the new attributes
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpSAtrPhX					; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-			
-;			Function 4 - Set attributes in mapping
-
-			.set	.,hwpOpBase+(4*128)			; Generate error if previous function too long
-
-hwpSAtrMap:	lwz		r9,mpFlags(r31)				; Get the mapping flags
-			lwz		r8,mpVAddr+4(r31)			; Get the attribute part of mapping
-			li		r2,mpM						; Force on coherent
-			rlwinm.	r9,r9,0,mpPermb,mpPermb		; Is the mapping permanent?
-			li		r0,lo16(mpWIMG)				; Get wimg mask		
-			crnot	cr0_eq,cr0_eq				; Change CR0_EQ to true if mapping is permanent
-			rlwimi	r2,r25,32-(mpIb-32-ppIb),mpIb-32,mpIb-32
-												; Copy in the cache inhibited bit
-			beqlr--								; Leave if permanent mapping (before we trash R5)...
-			andc	r5,r5,r0					; Clear the old wimg
-			rlwimi	r2,r25,32-(mpGb-32-ppGb),mpGb-32,mpGb-32
-												; Copy in the guarded bit
-			mfsprg	r9,2						; Feature flags
-			or		r5,r5,r2					; Move in the new wimg
-			rlwimi	r8,r5,0,20,31				; Copy into the mapping copy
-			lwz		r2,mpPAddr(r31)				; Get the physical address
-			li		r0,0xFFF					; Start a mask
-			andi.	r9,r9,pf32Byte+pf128Byte	; Get cache line size
-			rlwinm	r5,r0,0,1,0					; Copy to top half
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			rlwinm	r2,r2,12,1,0				; Copy to top and rotate to make physical address with junk left
-			and		r5,r5,r2					; Clean stuff in top 32 bits
-			andc	r2,r2,r0					; Clean bottom too
-			rlwimi	r5,r2,0,0,31				; Insert low 23 to make full physical address
-			b		hwpSAM						; Join common
-			
-;			NOTE: we moved the remainder of the code out of here because it
-;			did not fit in the 128 bytes allotted.  It got stuck into the free space
-;			at the end of the no-op function.
-
-
-
-			
-;			Function 5 - Clear reference in physent
-
-			.set	.,hwpOpBase+(5*128)			; Generate error if previous function too long
-
-hwpCRefPhy:	li		r5,ppLink+4					; Get offset for flag part of physent
-
-hwpCRefPhX:	lwarx	r4,r5,r29					; Get the old flags
-			rlwinm	r4,r4,0,ppRb+1-32,ppRb-1-32	; Clear R
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpCRefPhX					; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-
-			
-;			Function 6 - Clear reference in mapping 
-
-			.set	.,hwpOpBase+(6*128)			; Generate error if previous function too long
-
-hwpCRefMap:	li		r0,lo16(mpR)				; Get reference bit
-			lwz		r8,mpVAddr+4(r31)			; Get the flag part of mapping
-			andc	r5,r5,r0					; Clear in PTE copy
-			andc	r8,r8,r0					; and in the mapping
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Return...
-
-			
-;			Function 7 - Clear change in physent
-
-			.set	.,hwpOpBase+(7*128)			; Generate error if previous function too long
-
-hwpCCngPhy:	li		r5,ppLink+4					; Get offset for flag part of physent
-
-hwpCCngPhX:	lwarx	r4,r5,r29					; Get the old flags
-			rlwinm	r4,r4,0,ppCb+1-32,ppCb-1-32	; Clear C
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpCCngPhX					; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-			
-			
-;			Function 8 - Clear change in mapping
-
-			.set	.,hwpOpBase+(8*128)			; Generate error if previous function too long
-
-hwpCCngMap:	li		r0,lo16(mpC)				; Get change bit
-			lwz		r8,mpVAddr+4(r31)			; Get the flag part of mapping
-			andc	r5,r5,r0					; Clear in PTE copy
-			andc	r8,r8,r0					; and in the mapping
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Return...
-
-			
-;			Function 9 - Set reference in physent
-
-			.set	.,hwpOpBase+(9*128)			; Generate error if previous function too long
-
-hwpSRefPhy:	li		r5,ppLink+4					; Get offset for flag part of physent
-
-hwpSRefPhX:	lwarx	r4,r5,r29					; Get the old flags
-			ori		r4,r4,lo16(ppR)				; Set the reference
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpSRefPhX					; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-
-			
-;			Function 10 - Set reference in mapping
-
-			.set	.,hwpOpBase+(10*128)		; Generate error if previous function too long
-
-hwpSRefMap:	lwz		r8,mpVAddr+4(r31)			; Get the flag part of mapping
-			ori		r8,r8,lo16(mpR)				; Set reference in mapping
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Return...
-			
-;			Function 11 - Set change in physent
-
-			.set	.,hwpOpBase+(11*128)		; Generate error if previous function too long
-
-hwpSCngPhy:	li		r5,ppLink+4					; Get offset for flag part of physent
-
-hwpSCngPhX:	lwarx	r4,r5,r29					; Get the old flags
-			ori		r4,r4,lo16(ppC)				; Set the change bit
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpSCngPhX					; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-			
-;			Function 12 - Set change in mapping
-
-			.set	.,hwpOpBase+(12*128)		; Generate error if previous function too long
-
-hwpSCngMap:	lwz		r8,mpVAddr+4(r31)			; Get the flag part of mapping
-			ori		r8,r8,lo16(mpC)				; Set chage in mapping
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Return...
-
-;			Function 13 - Test reference in physent
-
-			.set	.,hwpOpBase+(13*128)		; Generate error if previous function too long
-			
-hwpTRefPhy:	lwz		r0,ppLink+4(r29)			; Get the flags from physent	
-			rlwinm.	r0,r0,0,ppRb-32,ppRb-32		; Isolate reference bit and see if 0
-			blr									; Return (CR0_EQ set to continue if reference is off)...
-
-
-;			Function 14 - Test reference in mapping
-
-			.set	.,hwpOpBase+(14*128)		; Generate error if previous function too long
-			
-hwpTRefMap:	rlwinm.	r0,r5,0,mpRb-32,mpRb-32		; Isolate reference bit and see if 0
-			blr									; Return (CR0_EQ set to continue if reference is off)...
-
-
-;			Function 15 - Test change in physent
-
-			.set	.,hwpOpBase+(15*128)		; Generate error if previous function too long
-			
-hwpTCngPhy:	lwz		r0,ppLink+4(r29)			; Get the flags from physent	
-			rlwinm.	r0,r0,0,ppCb-32,ppCb-32		; Isolate change bit and see if 0
-			blr									; Return (CR0_EQ set to continue if change is off)...
-
-
-;			Function 16 - Test change in mapping
-
-			.set	.,hwpOpBase+(16*128)		; Generate error if previous function too long
-			
-hwpTCngMap:	rlwinm.	r0,r5,0,mpCb-32,mpCb-32		; Isolate change bit and see if 0
-			blr									; Return (CR0_EQ set to continue if change is off)...
-
-
-;			Function 17 - Test reference and change in physent
-
-			.set	.,hwpOpBase+(17*128)		; Generate error if previous function too long
-
-hwpTRefCngPhy:			
-			lwz		r0,ppLink+4(r29)			; Get the flags from physent	
-			rlwinm	r0,r0,0,ppRb-32,ppCb-32		; Isolate reference and change bits
-			cmplwi	r0,lo16(ppR|ppC)			; cr0_eq <- ((R == 1) && (C == 1))
-			crnot	cr0_eq,cr0_eq				; cr0_eq <- ((R == 0) || (C == 0))
-			blr									; Return (CR0_EQ set to continue if either R or C is off)...
-
-
-;			Function 18 - Test reference and change in mapping
-
-			.set	.,hwpOpBase+(18*128)		; Generate error if previous function too long
-hwpTRefCngMap:
-			rlwinm	r0,r5,0,mpRb-32,mpCb-32		; Isolate reference and change bits from mapping
-			cmplwi	r0,lo16(mpR|mpC)			; cr0_eq <- ((R == 1) && (C == 1))
-			crnot	cr0_eq,cr0_eq				; cr0_eq <- ((R == 0) || (C == 0))
-			blr									; Return (CR0_EQ set to continue if either R or C is off)...
-
-
-;			Function 19 - Clear reference and change in physent
-
-			.set	.,hwpOpBase+(19*128)		; Generate error if previous function too long
-hwpCRefCngPhy:
-			li		r5,ppLink+4					; Get offset for flag part of physent
-
-hwpCRefCngPhX:
-			lwarx	r4,r5,r29					; Get the old flags
-			andc	r4,r4,r25					; Clear R and C as specified by mask
-			stwcx.	r4,r5,r29					; Try to stuff it
-			bne--	hwpCRefCngPhX				; Try again...
-;			Note: CR0_EQ is set because of stwcx.
-			blr									; Return...
-
-
-;			Function 20 - Clear reference and change in mapping
-
-			.set	.,hwpOpBase+(20*128)		; Generate error if previous function too long
-hwpCRefCngMap:
-			srwi	r0,r25,(ppRb - mpRb)		; Align reference/change clear mask (phys->map)
-			lwz		r8,mpVAddr+4(r31)			; Get the flag part of mapping
-			andc	r5,r5,r0					; Clear in PTE copy
-			andc	r8,r8,r0					; and in the mapping
-			cmpw	r0,r0						; Make sure we return CR0_EQ
-			stw		r8,mpVAddr+4(r31)			; Set the flag part of mapping
-			blr									; Return...
-
-
-			.set	.,hwpOpBase+(21*128)		; Generate error if previous function too long
-
-;
-;			unsigned int hw_protect(pmap, va, prot, *nextva) - Changes protection on a specific mapping.
-;			
-;			Returns:
-;				mapRtOK     - if all is ok
-;				mapRtBadLk  - if mapping lock fails
-;				mapRtPerm   - if mapping is permanent
-;				mapRtNotFnd - if mapping is not found
-;				mapRtBlock  - if mapping is a block
-;
-			.align	5
-			.globl	EXT(hw_protect)
-
-LEXT(hw_protect)
- 			stwu	r1,-(FM_ALIGN((31-24+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r24,FM_ARG0+0x00(r1)		; Save a register
-			stw		r25,FM_ARG0+0x04(r1)		; Save a register
-			mr		r25,r7						; Remember address of next va
-			stw		r26,FM_ARG0+0x08(r1)		; Save a register
-			stw		r27,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r28,FM_ARG0+0x10(r1)		; Save a register
-			mr		r24,r6						; Save the new protection flags
-			stw		r29,FM_ARG0+0x14(r1)		; Save a register
-			stw		r30,FM_ARG0+0x18(r1)		; Save a register
-			stw		r31,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		hpPanic						; Call not valid for guest shadow assist pmap
-#endif
-			
-			lwz		r6,pmapvr(r3)				; Get the first part of the VR translation for pmap
-			lwz		r7,pmapvr+4(r3)				; Get the second part
-
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			mr		r27,r11						; Remember the old MSR
-			mr		r26,r12						; Remember the feature bits
-
-			xor		r28,r3,r7					; Change the common 32- and 64-bit half
-
-			bf--	pf64Bitb,hpSF1				; skip if 32-bit...
-			
-			rldimi	r28,r6,32,0					; Shift the fixed upper part of the physical over and cram in top
-
-hpSF1:		mr		r29,r4						; Save top half of vaddr
-			mr		r30,r5						; Save the bottom half
-						
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hpBadLock					; Nope...
-
-			mr		r3,r28						; get the pmap address
-			mr		r4,r29						; Get bits 0:31 to look for
-			mr		r5,r30						; Get bits 32:64
-			
-			bl		EXT(mapSearch)				; Go see if we can find it (note: R7 comes back with mpFlags)
-
-			rlwinm.	r0,r7,0,mpType				; Is this a normal mapping?
-			crmove	cr1_eq,cr0_eq				; cr1_eq <- this is a normal mapping
-			andi.	r0,r7,mpPerm|mpRIP			; Is it permanent or being removed?
-			cror	cr1_eq,cr0_eq,cr1_eq        ; cr1_eq <- normal mapping and not permanent and not being removed
-			mr.		r31,r3						; Save the mapping if we found it
-			mr		r29,r4						; Save next va high half
-			mr		r30,r5						; Save next va low half
-			
-			beq--	hpNotFound					; Not found...
-
-			bf--	cr1_eq,hpNotAllowed			; Something special is happening...
-			
-			bt++	pf64Bitb,hpDo64				; Split for 64 bit
-			
-			bl		mapInvPte32					; Invalidate and lock PTEG, also merge into physent
-						
-			rlwimi	r5,r24,0,mpPPb-32,mpPPe-32	; Stick in the new pp (note that we ignore no-execute for 32-bit)
-			mr.		r3,r3						; Was there a previously valid PTE?
-
-			stb		r5,mpVAddr+7(r31)			; Set the new pp field (do not muck with the rest)			
-
-			beq--	hpNoOld32					; Nope...
-			
-			stw		r5,4(r3)					; Store second half of PTE
-			eieio								; Make sure we do not reorder
-			stw		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-		
-hpNoOld32:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			li		r3,mapRtOK					; Set normal return		
-			b		hpR32						; Join common...
-
-			.align	5			
-			
-			
-hpDo64:		bl		mapInvPte64					; Invalidate and lock PTEG, also merge into physent
-						
-			rldimi	r5,r24,0,mpNb				; Stick in the new no-exectue and pp bits
-			mr.		r3,r3						; Was there a previously valid PTE?
-
-			stb		r5,mpVAddr+7(r31)			; Set the new pp field (do not muck with the rest)			
-
-			beq--	hpNoOld64					; Nope...
-			
-			std		r5,8(r3)					; Store second half of PTE
-			eieio								; Make sure we do not reorder
-			std		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-
-hpNoOld64:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			li		r3,mapRtOK					; Set normal return		
-			b		hpR64						; Join common...
-
-			.align	5							
-						
-hpReturn:	bt++	pf64Bitb,hpR64				; Yes...
-
-hpR32:		mtmsr	r27							; Restore enables/translation/etc.
-			isync
-			b		hpReturnC					; Join common...
-
-hpR64:		mtmsrd	r27							; Restore enables/translation/etc.
-			isync								
-			
-hpReturnC:	stw		r29,0(r25)					; Save the top of the next va
-			stw		r30,4(r25)					; Save the bottom of the next va
-			lwz		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-			lwz		r24,FM_ARG0+0x00(r1)		; Save a register
-			lwz		r25,FM_ARG0+0x04(r1)		; Save a register
-			lwz		r26,FM_ARG0+0x08(r1)		; Save a register
-			mtlr	r0							; Restore the return
-			lwz		r27,FM_ARG0+0x0C(r1)		; Save a register
-			lwz		r28,FM_ARG0+0x10(r1)		; Save a register
-			lwz		r29,FM_ARG0+0x14(r1)		; Save a register
-			lwz		r30,FM_ARG0+0x18(r1)		; Save a register
-			lwz		r31,FM_ARG0+0x1C(r1)		; Save a register
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-			
-			.align	5
-			
-hpBadLock:	li		r3,mapRtBadLk				; Set lock time out error code
-			b		hpReturn					; Leave....
-			
-hpNotFound:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			
-			li		r3,mapRtNotFnd				; Set that we did not find the requested page
-			b		hpReturn					; Leave....
-			
-hpNotAllowed:	
-			rlwinm.	r0,r7,0,mpRIPb,mpRIPb		; Is it actually being removed?
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bne--	hpNotFound					; Yeah...
-			bl		sxlkUnlock					; Unlock the search list
-			
-			li		r3,mapRtBlock				; Assume it was a block
-			rlwinm	r0,r7,0,mpType				; Isolate mapping type
-			cmplwi	r0,mpBlock					; Is this a block mapping?
-			beq++	hpReturn					; Yes, leave...
-			
-			li		r3,mapRtPerm				; Set that we hit a permanent page
-			b		hpReturn					; Leave....
-
-hpPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-			
-
-;
-;			int hw_test_rc(pmap, va, reset) - tests RC on a specific va
-;			
-;			Returns following code ORed with RC from mapping
-;				mapRtOK     - if all is ok
-;				mapRtBadLk  - if mapping lock fails
-;				mapRtNotFnd - if mapping is not found
-;
-			.align	5
-			.globl	EXT(hw_test_rc)
-
-LEXT(hw_test_rc)
- 			stwu	r1,-(FM_ALIGN((31-24+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stw		r24,FM_ARG0+0x00(r1)		; Save a register
-			stw		r25,FM_ARG0+0x04(r1)		; Save a register
-			stw		r26,FM_ARG0+0x08(r1)		; Save a register
-			stw		r27,FM_ARG0+0x0C(r1)		; Save a register
-			stw		r28,FM_ARG0+0x10(r1)		; Save a register
-			mr		r24,r6						; Save the reset request
-			stw		r29,FM_ARG0+0x14(r1)		; Save a register
-			stw		r30,FM_ARG0+0x18(r1)		; Save a register
-			stw		r31,FM_ARG0+0x1C(r1)		; Save a register
-			stw		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-#if DEBUG
-			lwz		r11,pmapFlags(r3)			; Get pmaps flags
-			rlwinm.	r11,r11,0,pmapVMgsaa		; Is guest shadow assist active? 
-			bne		htrPanic					; Call not valid for guest shadow assist pmap
-#endif
-			
-			lwz		r6,pmapvr(r3)				; Get the first part of the VR translation for pmap
-			lwz		r7,pmapvr+4(r3)				; Get the second part
-
-
-			bl		EXT(mapSetUp)				; Turn off interrupts, translation, and possibly enter 64-bit
-
-			mr		r27,r11						; Remember the old MSR
-			mr		r26,r12						; Remember the feature bits
-
-			xor		r28,r3,r7					; Change the common 32- and 64-bit half
-
-			bf--	pf64Bitb,htrSF1				; skip if 32-bit...
-			
-			rldimi	r28,r6,32,0					; Shift the fixed upper part of the physical over and cram in top
-
-htrSF1:		mr		r29,r4						; Save top half of vaddr
-			mr		r30,r5						; Save the bottom half
-						
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			li		r25,0						; Clear RC
-			bne--	htrBadLock					; Nope...
-
-			mr		r3,r28						; get the pmap address
-			mr		r4,r29						; Get bits 0:31 to look for
-			mr		r5,r30						; Get bits 32:64
-			
-			bl		EXT(mapSearch)				; Go see if we can find it (R7 comes back with mpFlags)
-
-			rlwinm.	r0,r7,0,mpType				; Is this a normal mapping?
-			crmove	cr1_eq,cr0_eq				; cr1_eq <- this is a normal mapping
-			andi.	r0,r7,mpPerm|mpRIP			; Is it permanent or being removed?
-			crand	cr1_eq,cr0_eq,cr1_eq        ; cr1_eq <- normal mapping and not permanent and not being removed
-			mr.		r31,r3						; Save the mapping if we found it
-			crandc	cr1_eq,cr1_eq,cr0_eq		; cr1_eq <- found & normal & not permanent & not being removed
-			
-			bf--	cr1_eq,htrNotFound			; Not found, something special, or being removed...
-			
-			bt++	pf64Bitb,htrDo64			; Split for 64 bit
-			
-			bl		mapInvPte32					; Invalidate and lock PTEG, also merge into physent
-						
-			cmplwi	cr1,r24,0					; Do we want to clear RC?
-			lwz		r12,mpVAddr+4(r31)			; Get the bottom of the mapping vaddr field
-			mr.		r3,r3						; Was there a previously valid PTE?
-			li		r0,lo16(mpR|mpC)			; Get bits to clear
-
-			and		r25,r5,r0					; Save the RC bits
-			beq++	cr1,htrNoClr32				; Nope...
-			
-			andc	r12,r12,r0					; Clear mapping copy of RC
-			andc	r5,r5,r0					; Clear PTE copy of RC
-			sth		r12,mpVAddr+6(r31)			; Set the new RC			
-
-htrNoClr32:	beq--	htrNoOld32					; No previously valid PTE...
-			
-			sth		r5,6(r3)					; Store updated RC
-			eieio								; Make sure we do not reorder
-			stw		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-
-htrNoOld32:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			li		r3,mapRtOK					; Set normal return		
-			b		htrR32						; Join common...
-
-			.align	5			
-			
-			
-htrDo64:	bl		mapInvPte64					; Invalidate and lock PTEG, also merge into physent
-						
-			cmplwi	cr1,r24,0					; Do we want to clear RC?
-			lwz		r12,mpVAddr+4(r31)			; Get the bottom of the mapping vaddr field
-			mr.		r3,r3						; Was there a previously valid PTE?
-			li		r0,lo16(mpR|mpC)			; Get bits to clear
-
-			and		r25,r5,r0					; Save the RC bits
-			beq++	cr1,htrNoClr64				; Nope...
-			
-			andc	r12,r12,r0					; Clear mapping copy of RC
-			andc	r5,r5,r0					; Clear PTE copy of RC
-			sth		r12,mpVAddr+6(r31)			; Set the new RC			
-
-htrNoClr64:	beq--	htrNoOld64					; Nope, no pevious pte...
-			
-			sth		r5,14(r3)					; Store updated RC
-			eieio								; Make sure we do not reorder
-			std		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-
-htrNoOld64:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			li		r3,mapRtOK					; Set normal return		
-			b		htrR64						; Join common...
-
-			.align	5							
-						
-htrReturn:	bt++	pf64Bitb,htrR64				; Yes...
-
-htrR32:		mtmsr	r27							; Restore enables/translation/etc.
-			isync
-			b		htrReturnC					; Join common...
-
-htrR64:		mtmsrd	r27							; Restore enables/translation/etc.
-			isync								
-			
-htrReturnC:	lwz		r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-			or		r3,r3,r25					; Send the RC bits back
-			lwz		r24,FM_ARG0+0x00(r1)		; Save a register
-			lwz		r25,FM_ARG0+0x04(r1)		; Save a register
-			lwz		r26,FM_ARG0+0x08(r1)		; Save a register
-			mtlr	r0							; Restore the return
-			lwz		r27,FM_ARG0+0x0C(r1)		; Save a register
-			lwz		r28,FM_ARG0+0x10(r1)		; Save a register
-			lwz		r29,FM_ARG0+0x14(r1)		; Save a register
-			lwz		r30,FM_ARG0+0x18(r1)		; Save a register
-			lwz		r31,FM_ARG0+0x1C(r1)		; Save a register
-			lwz		r1,0(r1)					; Pop the stack
-			blr									; Leave...
-			
-			.align	5
-			
-htrBadLock:	li		r3,mapRtBadLk				; Set lock time out error code
-			b		htrReturn					; Leave....
-			
-htrNotFound:	
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			
-			li		r3,mapRtNotFnd				; Set that we did not find the requested page
-			b		htrReturn					; Leave....
-
-htrPanic:	lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show that we failed some kind of mapping thing
-			sc
-			
-
-;
-;
-;			mapFindLockPN - find and lock physent for a given page number
-;
-;
-			.align	5
-mapFindLockPN:
-			lis		r9,hi16(EXT(pmap_mem_regions))		; Point to the start of the region table
-			mr		r2,r3						; Save our target
-			ori		r9,r9,lo16(EXT(pmap_mem_regions))	; Point to the start of the region table			
-
-mapFLPNitr:	lwz		r3,mrPhysTab(r9)			; Get the actual table address
-			lwz		r5,mrStart(r9)				; Get start of table entry
-			lwz		r0,mrEnd(r9)				; Get end of table entry
-			addi	r9,r9,mrSize				; Point to the next slot
-			cmplwi	cr7,r3,0					; Are we at the end of the table?
-			cmplw	r2,r5						; See if we are in this table
-			cmplw	cr1,r2,r0					; Check end also
-			sub		r4,r2,r5					; Calculate index to physical entry
-			beq--	cr7,mapFLPNmiss				; Leave if we did not find an entry...
-			cror	cr0_lt,cr0_lt,cr1_gt		; Set CR0_LT if it is NOT this entry
-			slwi	r4,r4,3						; Get offset to physical entry
-
-			blt--	mapFLPNitr					; Did not find it...
-			
-			add		r3,r3,r4					; Point right to the slot
-			b		mapPhysLock					; Join common lock code
-
-mapFLPNmiss:
-			li		r3,0						; Show that we did not find it
-			blr									; Leave...			
-			
-
-;
-;			mapPhysFindLock - find physent list and lock it
-;			R31 points to mapping
-;
-			.align	5
-			
-mapPhysFindLock:	
-			lbz		r4,mpFlags+1(r31)			; Get the index into the physent bank table
-			lis		r3,ha16(EXT(pmap_mem_regions))	; Get high order of physent table (note use of ha16 to get value appropriate for an addi of low part)
-			rlwinm	r4,r4,2,24,29				; Mask index bits and convert to byte offset
-			addi	r4,r4,lo16(EXT(pmap_mem_regions))	; Get low part of address of entry
-			add		r3,r3,r4					; Point to table entry
-			lwz		r5,mpPAddr(r31)				; Get physical page number
-			lwz		r7,mrStart(r3)				; Get the start of range
-			lwz		r3,mrPhysTab(r3)			; Get the start of the entries for this bank
-			sub		r6,r5,r7					; Get index to physent
-			rlwinm	r6,r6,3,0,28				; Get offset to physent
-			add		r3,r3,r6					; Point right to the physent
-			b		mapPhysLock					; Join in the lock...
-
-;
-;			mapPhysLock - lock a physent list
-;			R3 contains list header
-;
-			.align	5
-
-mapPhysLockS:
-			li		r2,lgKillResv				; Get a spot to kill reservation
-			stwcx.	r2,0,r2						; Kill it...
-			
-mapPhysLockT:
-			lwz		r2,ppLink(r3)				; Get physent chain header
-			rlwinm.	r2,r2,0,0,0					; Is lock clear?
-			bne--	mapPhysLockT				; Nope, still locked...
-			
-mapPhysLock:	
-			lwarx	r2,0,r3						; Get the lock
-			rlwinm.	r0,r2,0,0,0					; Is it locked?
-			oris	r0,r2,0x8000				; Set the lock bit
-			bne--	mapPhysLockS				; It is locked, spin on it...
-			stwcx.	r0,0,r3						; Try to stuff it back...
-			bne--	mapPhysLock					; Collision, try again...
-			isync								; Clear any speculations
-			blr									; Leave...
-			
-
-;
-;			mapPhysUnlock - unlock a physent list
-;			R3 contains list header
-;
-			.align	5
-			
-mapPhysUnlock:	
-			lwz		r0,ppLink(r3)				; Get physent chain header
-			rlwinm	r0,r0,0,1,31				; Clear the lock bit
-			eieio								; Make sure unlock comes last
-			stw		r0,ppLink(r3)				; Unlock the list
-			blr
-
-;
-;			mapPhysMerge - merge the RC bits into the master copy
-;			R3 points to the physent 
-;			R4 contains the RC bits
-;
-;			Note: we just return if RC is 0
-;
-			.align	5
-			
-mapPhysMerge:	
-			rlwinm.	r4,r4,PTE1_REFERENCED_BIT+(64-ppRb),ppRb-32,ppCb-32	; Isolate RC bits
-			la		r5,ppLink+4(r3)				; Point to the RC field
-			beqlr--								; Leave if RC is 0...
-			
-mapPhysMergeT:
-			lwarx	r6,0,r5						; Get the RC part
-			or		r6,r6,r4					; Merge in the RC
-			stwcx.	r6,0,r5						; Try to stuff it back...
-			bne--	mapPhysMergeT				; Collision, try again...
-			blr									; Leave...
-
-;
-;			Sets the physent link pointer and preserves all flags
-;			The list is locked
-;			R3 points to physent
-;			R4 has link to set
-;
-
-			.align	5
-
-mapPhyCSet32:
-			la		r5,ppLink+4(r3)				; Point to the link word
-
-mapPhyCSetR:
-			lwarx	r2,0,r5						; Get the link and flags
-			rlwimi	r4,r2,0,ppFlags				; Insert the flags
-			stwcx.	r4,0,r5						; Stick them back
-			bne--	mapPhyCSetR					; Someone else did something, try again...
-			blr									; Return...
-
-			.align	5
-
-mapPhyCSet64:
-			li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-		
-mapPhyCSet64x:
-			ldarx	r2,0,r3						; Get the link and flags
-			and		r5,r2,r0					; Isolate the flags
-			or		r6,r4,r5					; Add them to the link
-			stdcx.	r6,0,r3						; Stick them back
-			bne--	mapPhyCSet64x				; Someone else did something, try again...
-			blr									; Return...						
-
-;
-;			mapBumpBusy - increment the busy count on a mapping
-;			R3 points to mapping
-;
-
-			.align	5
-
-mapBumpBusy:
-			lwarx	r4,0,r3						; Get mpBusy
-			addis	r4,r4,0x0100				; Bump the busy count
-			stwcx.	r4,0,r3						; Save it back
-			bne--	mapBumpBusy					; This did not work, try again...
-			blr									; Leave...
-
-;
-;			mapDropBusy - increment the busy count on a mapping
-;			R3 points to mapping
-;
-
-			.globl	EXT(mapping_drop_busy)
-			.align	5
-
-LEXT(mapping_drop_busy)
-mapDropBusy:
-			lwarx	r4,0,r3						; Get mpBusy
-			addis	r4,r4,0xFF00				; Drop the busy count
-			stwcx.	r4,0,r3						; Save it back
-			bne--	mapDropBusy					; This did not work, try again...
-			blr									; Leave...
-
-;
-;			mapDrainBusy - drain the busy count on a mapping
-;			R3 points to mapping
-;			Note: we already have a busy for ourselves. Only one
-;			busy per processor is allowed, so we just spin here
-;			waiting for the count to drop to 1.
-;			Also, the mapping can not be on any lists when we do this
-;			so all we are doing is waiting until it can be released.
-;
-
-			.align	5
-
-mapDrainBusy:
-			lwz		r4,mpFlags(r3)				; Get mpBusy
-			rlwinm	r4,r4,8,24,31				; Clean it up
-			cmplwi	r4,1						; Is is just our busy?
-			beqlr++								; Yeah, it is clear...
-			b		mapDrainBusy				; Try again...
-
-
-	
-;
-;			handleDSeg - handle a data segment fault
-;			handleISeg - handle an instruction segment fault
-;
-;			All that we do here is to map these to DSI or ISI and insure
-;			that the hash bit is not set.  This forces the fault code
-;			to also handle the missing segment.
-;
-;			At entry R2 contains per_proc, R13 contains savarea pointer,
-;			and R11 is the exception code.
-;
-
-			.align	5
-			.globl	EXT(handleDSeg)
-
-LEXT(handleDSeg)
-
-			li		r11,T_DATA_ACCESS			; Change fault to DSI
-			stw		r11,saveexception(r13)		; Change the exception code from seg fault to PTE miss
-			b		EXT(handlePF)				; Join common...
-
-			.align	5
-			.globl	EXT(handleISeg)
-
-LEXT(handleISeg)
-
-			li		r11,T_INSTRUCTION_ACCESS	; Change fault to ISI
-			stw		r11,saveexception(r13)		; Change the exception code from seg fault to PTE miss
-			b		EXT(handlePF)				; Join common...
-
-
-/*
- *			handlePF - handle a page fault interruption
- *
- *			At entry R2 contains per_proc, R13 contains savarea pointer,
- *			and R11 is the exception code.
- *
- *			This first part does a quick check to see if we can handle the fault.
- *			We canot handle any kind of protection exceptions here, so we pass
- *			them up to the next level.
- *
- *			NOTE: In order for a page-fault redrive to work, the translation miss
- *			bit must be set in the DSISR (or SRR1 for IFETCH).  That must occur
- *			before we come here.
- */
-
-			.align	5
-			.globl	EXT(handlePF)
-
-LEXT(handlePF)
-
- 			mfsprg	r12,2						; Get feature flags 
-			cmplwi	r11,T_INSTRUCTION_ACCESS		; See if this is for the instruction 
-			lwz		r8,savesrr1+4(r13)			; Get the MSR to determine mode
-			mtcrf	0x02,r12					; move pf64Bit to cr6
-			lis		r0,hi16(dsiNoEx|dsiProt|dsiInvMode|dsiAC)	; Get the types that we cannot handle here
-			lwz		r18,SAVflags(r13)			; Get the flags
-			
-			beq--	gotIfetch					; We have an IFETCH here...
-			
-			lwz		r27,savedsisr(r13)			; Get the DSISR
-			lwz		r29,savedar(r13)			; Get the first half of the DAR
-			lwz		r30,savedar+4(r13)			; And second half
-
-			b		ckIfProt					; Go check if this is a protection fault...
-
-gotIfetch:	andis.	r27,r8,hi16(dsiValid)		; Clean this up to construct a DSISR value
-			lwz		r29,savesrr0(r13)			; Get the first half of the instruction address
-			lwz		r30,savesrr0+4(r13)			; And second half
-			stw		r27,savedsisr(r13)			; Save the "constructed" DSISR
-
-ckIfProt:	and.	r4,r27,r0					; Is this a non-handlable exception?
-			li		r20,64						; Set a limit of 64 nests for sanity check
-			bne--	hpfExit						; Yes... (probably not though)
-			
-;
-;			Note: if the RI is on, we are accessing user space from the kernel, therefore we
-;			should be loading the user pmap here.
-;
-
-			andi.	r0,r8,lo16(MASK(MSR_PR)|MASK(MSR_RI))	; Are we addressing user or kernel space?
-			lis		r8,hi16(EXT(kernel_pmap_phys))	; Assume kernel
-			mr		r19,r2						; Remember the per_proc
-			ori		r8,r8,lo16(EXT(kernel_pmap_phys))	; Assume kernel (bottom of address)
-			mr		r23,r30						; Save the low part of faulting address
-			beq--	hpfInKern					; Skip if we are in the kernel
-			la		r8,ppUserPmap(r19)			; Point to the current user pmap
-			
-hpfInKern:	mr		r22,r29						; Save the high part of faulting address
-			
-			bt--	pf64Bitb,hpf64a				; If 64-bit, skip the next bit...
-
-;
-;			On 32-bit machines we emulate a segment exception by loading unused SRs with a
-;			predefined value that corresponds to no address space.  When we see that value
-;			we turn off the PTE miss bit in the DSISR to drive the code later on that will
-;			cause the proper SR to be loaded.
-;
-
-			lwz		r28,4(r8)					; Pick up the pmap
-			rlwinm.	r18,r18,0,SAVredriveb,SAVredriveb	; Was this a redrive?
-			mr		r25,r28						; Save the original pmap (in case we nest)
-			lwz		r0,pmapFlags(r28)			; Get pmap's flags
-			bne		hpfGVtest					; Segs are not ours if so...
-			mfsrin	r4,r30						; Get the SR that was used for translation
-			cmplwi	r4,invalSpace				; Is this a simulated segment fault?
-			bne++	hpfGVtest					; No...
-			
-			rlwinm	r27,r27,0,dsiMissb+1,dsiMissb-1	; Clear the PTE miss bit in DSISR
-			b		hpfGVtest					; Join on up...
-			
-			.align	5
-
-			nop									; Push hpfNest to a 32-byte boundary
-			nop									; Push hpfNest to a 32-byte boundary
-			nop									; Push hpfNest to a 32-byte boundary
-
-hpf64a:		ld		r28,0(r8)					; Get the pmap pointer (64-bit)
-			mr		r25,r28						; Save the original pmap (in case we nest)
-			lwz		r0,pmapFlags(r28)			; Get pmap's flags
-			
-hpfGVtest:	rlwinm.	r0,r0,0,pmapVMgsaa			; Using guest shadow mapping assist?
-			bne		hpfGVxlate					; Yup, do accelerated shadow stuff
-
-;
-;			This is where we loop descending nested pmaps
-;
-
-hpfNest:	la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			addi	r20,r20,-1					; Count nest try
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hpfBadLock					; Nope...
-
-			mr		r3,r28						; Get the pmap pointer
-			mr		r4,r22						; Get top of faulting vaddr
-			mr		r5,r23						; Get bottom of faulting vaddr
-			bl		EXT(mapSearch)				; Go see if we can find it (R7 gets mpFlags)
-
-			rlwinm	r0,r7,0,mpRIPb,mpRIPb		; Are we removing this one?
-			mr.		r31,r3						; Save the mapping if we found it
-			cmplwi	cr1,r0,0					; Check for removal
-			crorc	cr0_eq,cr0_eq,cr1_eq		; Merge not found and removing
-			
-			bt--	cr0_eq,hpfNotFound			; Not found or removing...
-
-			rlwinm	r0,r7,0,mpType				; Isolate mapping type
-			cmplwi	r0,mpNest					; Are we again nested?
-			cmplwi	cr1,r0,mpLinkage			; Are we a linkage type?
-			cror	cr0_eq,cr1_eq,cr0_eq		; cr0_eq <- nested or linkage type?
-			mr		r26,r7						; Get the flags for this mapping (passed back from search call)
-			
-			lhz		r21,mpSpace(r31)			; Get the space
-
-			bne++	hpfFoundIt					; No, we found our guy...
-			
-
-#if pmapTransSize != 12
-#error pmapTrans entry size is not 12 bytes!!!!!!!!!!!! It is pmapTransSize
-#endif
-			cmplwi	r0,mpLinkage				; Linkage mapping?
-			cmplwi	cr1,r20,0					; Too many nestings?
-			beq--	hpfSpclNest					; Do we need to do special handling?
-
-hpfCSrch:	lhz		r21,mpSpace(r31)			; Get the space
-			lwz		r8,mpNestReloc(r31)			; Get the vaddr relocation
-			lwz		r9,mpNestReloc+4(r31)		; Get the vaddr relocation bottom half
-			la		r3,pmapSXlk(r28)			; Point to the old pmap search lock
-			lis		r0,0x8000					; Get 0xFFFFFFFF80000000
-			lis		r10,hi16(EXT(pmapTrans))	; Get the translate table
-			add		r0,r0,r0					; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit
-			blt--	cr1,hpfNestTooMuch			; Too many nestings, must be a loop...
-			or		r23,r23,r0					; Make sure a carry will propagate all the way in 64-bit
-			slwi	r11,r21,3					; Multiply space by 8
-			ori		r10,r10,lo16(EXT(pmapTrans))	; Get the translate table low part
-			addc	r23,r23,r9					; Relocate bottom half of vaddr
-			lwz		r10,0(r10)					; Get the actual translation map
-			slwi	r12,r21,2					; Multiply space by 4
-			add		r10,r10,r11					; Add in the higher part of the index
-			rlwinm	r23,r23,0,0,31				; Clean up the relocated address (does nothing in 32-bit)
-			adde	r22,r22,r8					; Relocate the top half of the vaddr
-			add		r12,r12,r10					; Now we are pointing at the space to pmap translation entry
-			bl		sxlkUnlock					; Unlock the search list
-			
-			bt++	pf64Bitb,hpfGetPmap64		; Separate handling for 64-bit machines
-			lwz		r28,pmapPAddr+4(r12)		; Get the physical address of the new pmap
-			cmplwi	r28,0						; Is the pmap paddr valid?
-			bne+	hpfNest						; Nest into new pmap...
-			b		hpfBadPmap					; Handle bad pmap
-			
-hpfGetPmap64:
-			ld		r28,pmapPAddr(r12)			; Get the physical address of the new pmap
-			cmpldi	r28,0						; Is the pmap paddr valid?
-			bne++	hpfNest						; Nest into new pmap...
-			b		hpfBadPmap					; Handle bad pmap			
-
-
-;
-;			Error condition.  We only allow 64 nestings.  This keeps us from having to 
-;			check for recusive nests when we install them.
-;
-		
-			.align	5
-
-hpfNestTooMuch:
-			lwz		r20,savedsisr(r13)			; Get the DSISR
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list (R3 good from above)
-			ori		r20,r20,1					; Indicate that there was a nesting problem 
-			stw		r20,savedsisr(r13)			; Stash it
-			lwz		r11,saveexception(r13)		; Restore the exception code
-			b		EXT(PFSExit)				; Yes... (probably not though)
-
-;
-;			Error condition - lock failed - this is fatal
-;
-		
-			.align	5
-
-hpfBadLock:
-			lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failMapping				; Show mapping failure
-			sc
-			
-;
-;			Error condition - space id selected an invalid pmap - fatal
-;
-
-			.align	5
-			
-hpfBadPmap:
-			lis		r0,hi16(Choke)				; System abend
-			ori		r0,r0,lo16(Choke)			; System abend
-			li		r3,failPmap					; Show invalid pmap
-			sc
-			
-;
-;			Did not find any kind of mapping
-;
-
-			.align	5
-			
-hpfNotFound:
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock it
-			lwz		r11,saveexception(r13)		; Restore the exception code
-			
-hpfExit:										; We need this because we can not do a relative branch
-			b		EXT(PFSExit)				; Yes... (probably not though)
-
-
-;
-;			Here is where we handle special mappings.  So far, the only use is to load a 
-;			processor specific segment register for copy in/out handling.
-;
-;			The only (so far implemented) special map is used for copyin/copyout.
-;			We keep a mapping of a "linkage" mapping in the per_proc.
-;			The linkage mapping is basically a nested pmap that is switched in
-;			as part of context switch.  It relocates the appropriate user address
-;			space slice into the right place in the kernel.
-;
-
-			.align	5
-
-hpfSpclNest:	
-			la		r31,ppUMWmp(r19)			; Just point to the mapping
-			oris	r27,r27,hi16(dsiLinkage)	; Show that we had a linkage mapping here
-			b		hpfCSrch					; Go continue search...
-
-
-;
-;			We have now found a mapping for the address we faulted on. 
-;			
-
-;
-;			Here we go about calculating what the VSID should be. We concatanate
-;			the space ID (14 bits wide) 3 times.  We then slide the vaddr over
-;			so that bits 0:35 are in 14:49 (leaves a hole for one copy of the space ID).
-;			Then we XOR and expanded space ID and the shifted vaddr.  This gives us
-;			the VSID.  
-;
-;			This is used both for segment handling and PTE handling
-;
-
-
-#if maxAdrSpb != 14
-#error maxAdrSpb (address space id size) is not 14 bits!!!!!!!!!!!!
-#endif
-
-;			Important non-volatile registers at this point ('home' means the final pmap/mapping found
-;   		when a multi-level mapping has been successfully searched):
-;				r21: home space id number
-;				r22: relocated high-order 32 bits of vaddr
-;				r23: relocated low-order 32 bits of vaddr 	
-;				r25: pmap physical address
-;				r27: dsisr
-;				r28: home pmap physical address
-;				r29: high-order 32 bits of faulting vaddr
-;				r30: low-order 32 bits of faulting vaddr
-;				r31: mapping's physical address
-
-			.align	5
-			
-hpfFoundIt:	lwz		r12,pmapFlags(r28)			; Get the pmap flags so we can find the keys for this segment
-hpfGVfound:	rlwinm.	r0,r27,0,dsiMissb,dsiMissb	; Did we actually miss the segment?
-			rlwinm	r15,r23,18,14,17			; Shift 32:35 (0:3) of vaddr just above space ID
-			rlwinm	r20,r21,28,22,31			; Shift upper 10 bits of space into high order
-			rlwinm	r14,r22,18,14,31			; Shift 0:17 of vaddr over
-			rlwinm	r0,r27,0,dsiLinkageb,dsiLinkageb	; Isolate linkage mapping flag
-			rlwimi	r21,r21,14,4,17				; Make a second copy of space above first
-			cmplwi	cr5,r0,0					; Did we just do a special nesting?
-			rlwimi	r15,r22,18,0,13				; Shift 18:31 of vaddr just above shifted 32:35	
-			crorc	cr0_eq,cr0_eq,cr5_eq		; Force outselves through the seg load code if special nest
-			rlwimi	r21,r21,28,0,3				; Get low order of 3rd copy of space at top of register
-			xor		r14,r14,r20					; Calculate the top half of VSID
-			xor		r15,r15,r21					; Calculate the bottom half of the VSID
-			rlwinm	r14,r14,12,15,19			; Slide the top of the VSID over to correct position (trim for 65 bit addressing)
-			rlwinm	r12,r12,9,20,22				; Isolate and position key for cache entry
-			rlwimi	r14,r15,12,20,31			; Slide top of bottom of VSID over into the top
-			rlwinm	r15,r15,12,0,19				; Slide the last nybble into the low order segment position
-			or		r12,r12,r15					; Add key into the bottom of VSID
-;
-;			Note: ESID is in R22:R23 pair; VSID is in R14:R15; cache form VSID is R14:R12
-			
-			bne++	hpfPteMiss					; Nope, normal PTE miss...
-
-;
-;			Here is the only place that we make an entry in the pmap segment cache.
-;
-;			Note that we do not make an entry in the segment cache for special
-;			nested mappings.  This makes the copy in/out segment get refreshed
-;			when switching threads.
-;
-;			The first thing that we do is to look up the ESID we are going to load
-;			into a segment in the pmap cache.  If it is already there, this is
-;			a segment that appeared since the last time we switched address spaces.
-;			If all is correct, then it was another processors that made the cache
-;			entry.  If not, well, it is an error that we should die on, but I have
-;			not figured a good way to trap it yet.
-;
-;			If we get a hit, we just bail, otherwise, lock the pmap cache, select
-;			an entry based on the generation number, update the cache entry, and 
-;			also update the pmap sub-tag as well.  The sub-tag is a table of 4 bit
-;			entries that correspond to the last 4 bits (32:35 for 64-bit and 
-;			0:3 for 32-bit) of the ESID.
-;
-;			Then we unlock and bail.
-;
-;			First lock it.  Then select a free slot or steal one based on the generation
-;			number. Then store it, update the allocation flags, and unlock.
-;
-;			The cache entry contains an image of the ESID/VSID pair we would load for
-;			64-bit architecture.  For 32-bit, it is a simple transform to an SR image.
-;
-;			Remember, this cache entry goes in the ORIGINAL pmap (saved in R25), not
-;			the current one, which may have changed because we nested.
-;
-;			Also remember that we do not store the valid bit in the ESID.  If we 
-;			od, this will break some other stuff.
-;
-
-			bne--	cr5,hpfNoCacheEnt2			; Skip the cache entry if this is a "special nest" fault....
-			
-			mr		r3,r25						; Point to the pmap
-			mr		r4,r29						; ESID high half
-			mr		r5,r30						; ESID low half
-			bl		pmapCacheLookup				; Go see if this is in the cache already
-			
-			mr.		r3,r3						; Did we find it?
-			mr		r4,r11						; Copy this to a different register
-
-			bne--	hpfNoCacheEnt				; Yes, we found it, no need to make another entry...
-			
-			lwz		r10,pmapSCSubTag(r25)		; Get the first part of the sub-tag lookup table
-			lwz		r11,pmapSCSubTag+4(r25)		; Get the second part of the sub-tag lookup table
-			
-			cntlzw	r7,r4						; Find a free slot
-
-			subi	r6,r7,pmapSegCacheUse		; We end up with a negative if we find one
-			rlwinm	r30,r30,0,0,3				; Clean up the ESID
-			srawi	r6,r6,31					; Get 0xFFFFFFFF if we have one, 0 if not
-			addi	r5,r4,1						; Bump the generation number
-			and		r7,r7,r6					; Clear bit number if none empty
-			andc	r8,r4,r6					; Clear generation count if we found an empty
-			rlwimi	r4,r5,0,17,31				; Insert the new generation number into the control word			
-			or		r7,r7,r8					; Select a slot number
-			li		r8,0						; Clear
-			andi.	r7,r7,pmapSegCacheUse-1		; Wrap into the number we are using
-			oris	r8,r8,0x8000				; Get the high bit on
-			la		r9,pmapSegCache(r25)		; Point to the segment cache
-			slwi	r6,r7,4						; Get index into the segment cache
-			slwi	r2,r7,2						; Get index into the segment cache sub-tag index
-			srw		r8,r8,r7					; Get the mask
-			cmplwi	r2,32						; See if we are in the first or second half of sub-tag
-			li		r0,0						; Clear
-			rlwinm	r2,r2,0,27,31				; Wrap shift so we do not shift cache entries 8-F out
-			oris	r0,r0,0xF000				; Get the sub-tag mask
-			add		r9,r9,r6					; Point to the cache slot
-			srw		r0,r0,r2					; Slide sub-tag mask to right slot (shift work for either half)
-			srw		r5,r30,r2					; Slide sub-tag to right slot (shift work for either half)
-			
-			stw		r29,sgcESID(r9)				; Save the top of the ESID
-			andc	r10,r10,r0					; Clear sub-tag slot in case we are in top
-			andc	r11,r11,r0					; Clear sub-tag slot in case we are in bottom
-			stw		r30,sgcESID+4(r9)			; Save the bottom of the ESID
-			or		r10,r10,r5					; Stick in subtag in case top half
-			or		r11,r11,r5					; Stick in subtag in case bottom half
-			stw		r14,sgcVSID(r9)				; Save the top of the VSID
-			andc	r4,r4,r8					; Clear the invalid bit for the slot we just allocated
-			stw		r12,sgcVSID+4(r9)			; Save the bottom of the VSID and the key
-			bge		hpfSCSTbottom				; Go save the bottom part of sub-tag
-			
-			stw		r10,pmapSCSubTag(r25)		; Save the top of the sub-tag
-			b		hpfNoCacheEnt				; Go finish up...
-			
-hpfSCSTbottom:
-			stw		r11,pmapSCSubTag+4(r25)		; Save the bottom of the sub-tag
-
-
-hpfNoCacheEnt:	
-			eieio								; Make sure cache is updated before lock
-			stw		r4,pmapCCtl(r25)			; Unlock, allocate, and bump generation number
-
-
-hpfNoCacheEnt2:
-			lwz		r4,ppMapFlags(r19)			; Get the protection key modifier
-			bt++	pf64Bitb,hpfLoadSeg64		; If 64-bit, go load the segment...
-						
-;
-;			Make and enter 32-bit segment register
-;
-
-			lwz		r16,validSegs(r19)			; Get the valid SR flags
-			xor		r12,r12,r4					; Alter the storage key before loading segment register
-			rlwinm	r2,r30,4,28,31				; Isolate the segment we are setting
-			rlwinm	r6,r12,19,1,3				; Insert the keys and N bit			
-			lis		r0,0x8000					; Set bit 0
-			rlwimi	r6,r12,20,12,31				; Insert 4:23 the VSID
-			srw		r0,r0,r2					; Get bit corresponding to SR
-			rlwimi	r6,r14,20,8,11				; Get the last nybble of the SR contents			
-			or		r16,r16,r0					; Show that SR is valid
-		
-			mtsrin	r6,r30						; Set the actual SR
-			
-			stw		r16,validSegs(r19)			; Set the valid SR flags
-		
-			b		hpfPteMiss					; SR loaded, go do a PTE...
-			
-;
-;			Make and enter 64-bit segment look-aside buffer entry.
-;			Note that the cache entry is the right format except for valid bit.
-;			We also need to convert from long long to 64-bit register values.
-;
-
-
-			.align	5
-			
-hpfLoadSeg64:
-			ld		r16,validSegs(r19)			; Get the valid SLB entry flags
-			sldi	r8,r29,32					; Move high order address over
-			sldi	r10,r14,32					; Move high part of VSID over
-			
-			not		r3,r16						; Make valids be 0s
-			li		r0,1						; Prepare to set bit 0
-			
-			cntlzd	r17,r3						; Find a free SLB	
-			xor		r12,r12,r4					; Alter the storage key before loading segment table entry
-			or		r9,r8,r30					; Form full 64-bit address
-			cmplwi	r17,63						; Did we find a free SLB entry?		
-			sldi	r0,r0,63					; Get bit 0 set
-			or		r10,r10,r12					; Move in low part and keys
-			addi	r17,r17,1					; Skip SLB 0 always
-			blt++	hpfFreeSeg					; Yes, go load it...
-
-;
-;			No free SLB entries, select one that is in use and invalidate it
-;
-			lwz		r4,ppSegSteal(r19)			; Get the next slot to steal
-			addi	r17,r4,pmapSegCacheUse+1	; Select stealee from non-cached slots only
-			addi	r4,r4,1						; Set next slot to steal
-			slbmfee	r7,r17						; Get the entry that is in the selected spot
-			subi	r2,r4,63-pmapSegCacheUse	; Force steal to wrap
-			rldicr	r7,r7,0,35					; Clear the valid bit and the rest
-			srawi	r2,r2,31					; Get -1 if steal index still in range
-			slbie	r7							; Invalidate the in-use SLB entry
-			and		r4,r4,r2					; Reset steal index when it should wrap
-			isync								; 
-			
-			stw		r4,ppSegSteal(r19)			; Set the next slot to steal
-;
-;			We are now ready to stick the SLB entry in the SLB and mark it in use
-;
-
-hpfFreeSeg:	
-			subi	r4,r17,1					; Adjust shift to account for skipping slb 0
-			mr		r7,r9						; Get a copy of the ESID with bits 36:63 clear
-			srd		r0,r0,r4					; Set bit mask for allocation
-			oris	r9,r9,0x0800				; Turn on the valid bit
-			or		r16,r16,r0					; Turn on the allocation flag
-			rldimi	r9,r17,0,58					; Copy in the SLB entry selector
-			
-			beq++	cr5,hpfNoBlow				; Skip blowing away the SLBE if this is not a special nest...
-			slbie	r7							; Blow away a potential duplicate
-			
-hpfNoBlow:	slbmte	r10,r9						; Make that SLB entry
-
-			std		r16,validSegs(r19)			; Mark as valid
-			b		hpfPteMiss					; STE loaded, go do a PTE...
-			
-;
-;			The segment has been set up and loaded if need be.  Now we are ready to build the
-;			PTE and get it into the hash table.
-;
-;			Note that there is actually a race here.  If we start fault processing on
-;			a different pmap, i.e., we have descended into a nested pmap, it is possible
-;			that the nest could have been removed from the original pmap.  We would
-;			succeed with this translation anyway.  I do not think we need to worry
-;			about this (famous last words) because nobody should be unnesting anything 
-;			if there are still people activily using them.  It should be up to the
-;			higher level VM system to put the kibosh on this.
-;
-;			There is also another race here: if we fault on the same mapping on more than
-;			one processor at the same time, we could end up with multiple PTEs for the same
-;			mapping.  This is not a good thing....   We really only need one of the
-;			fault handlers to finish, so what we do is to set a "fault in progress" flag in
-;			the mapping.  If we see that set, we just abandon the handler and hope that by
-;			the time we restore context and restart the interrupted code, the fault has
-;			been resolved by the other guy.  If not, we will take another fault.
-;
-		
-;
-;			NOTE: IMPORTANT - CR7 contains a flag indicating if we have a block mapping or not.
-;			It is required to stay there until after we call mapSelSlot!!!!
-;
-
-			.align	5
-			
-hpfPteMiss:	lwarx	r0,0,r31					; Load the mapping flag field
-			lwz		r12,mpPte(r31)				; Get the quick pointer to PTE
-			li		r3,mpHValid					; Get the PTE valid bit
-			andi.	r2,r0,lo16(mpFIP)			; Are we handling a fault on the other side?
-			ori		r2,r0,lo16(mpFIP)			; Set the fault in progress flag
-			crnot	cr1_eq,cr0_eq				; Remember if FIP was on
-			and.	r12,r12,r3					; Isolate the valid bit
-			crorc	cr0_eq,cr1_eq,cr0_eq		; Bail if FIP is on.  Then, if already have PTE, bail...
-			beq--	hpfAbandon					; Yes, other processor is or already has handled this...
-			rlwinm	r0,r2,0,mpType				; Isolate mapping type
-			cmplwi	r0,mpBlock					; Is this a block mapping?
-			crnot	cr7_eq,cr0_eq				; Remember if we have a block mapping
-			stwcx.	r2,0,r31					; Store the flags
-			bne--	hpfPteMiss					; Collision, try again...
-
-			bt++	pf64Bitb,hpfBldPTE64		; Skip down to the 64 bit stuff...
-
-;
-;			At this point we are about to do the 32-bit PTE generation.
-;
-;			The following is the R14:R15 pair that contains the "shifted" VSID:
-;
-;                             1        2        3        4        4        5      6 
-;           0        8        6        4        2        0        8        6      3
-;          +--------+--------+--------+--------+--------+--------+--------+--------+
-;          |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|    
-;          +--------+--------+--------+--------+--------+--------+--------+--------+                   
-;
-;			The 24 bits of the 32-bit architecture VSID is in the following:
-;
-;                             1        2        3        4        4        5      6 
-;           0        8        6        4        2        0        8        6      3
-;          +--------+--------+--------+--------+--------+--------+--------+--------+
-;          |////////|////////|////////|////VVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|    
-;          +--------+--------+--------+--------+--------+--------+--------+--------+                   
-;
-
-
-hpfBldPTE32:
-			lwz		r25,mpVAddr+4(r31)			; Grab the base virtual address for the mapping (32-bit portion)	
-			lwz		r24,mpPAddr(r31)			; Grab the base physical page number for the mapping	
-
-			mfsdr1	r27							; Get the hash table base address
-
-			rlwinm	r0,r23,0,4,19				; Isolate just the page index
-			rlwinm	r18,r23,10,26,31			; Extract the API
-			xor		r19,r15,r0					; Calculate hash << 12
-			mr		r2,r25						; Save the flag part of the mapping
-			rlwimi	r18,r14,27,1,4				; Move bits 28:31 of the "shifted" VSID into the PTE image
-			rlwinm	r16,r27,16,7,15				; Extract the hash table size
-			rlwinm	r25,r25,0,0,19				; Clear out the flags
-			slwi	r24,r24,12					; Change ppnum to physical address (note: 36-bit addressing no supported)
-			sub		r25,r23,r25					; Get offset in mapping to page (0 unless block map)
-			ori		r16,r16,lo16(0xFFC0)		; Slap in the bottom of the mask
-			rlwinm	r27,r27,0,0,15				; Extract the hash table base
-			rlwinm	r19,r19,26,6,25				; Shift hash over to make offset into hash table
-			add		r24,r24,r25					; Adjust to true physical address
-			rlwimi	r18,r15,27,5,24				; Move bits 32:31 of the "shifted" VSID into the PTE image
-			rlwimi	r24,r2,0,20,31				; Slap in the WIMG and prot
-			and		r19,r19,r16					; Wrap hash table offset into the hash table
-			ori		r24,r24,lo16(mpR)			; Turn on the reference bit right now
-			rlwinm	r20,r19,28,10,29			; Shift hash over to make offset into PCA
-			add		r19,r19,r27					; Point to the PTEG
-			subfic	r20,r20,-4					; Get negative offset to PCA
-			oris	r18,r18,lo16(0x8000)		; Make sure the valid bit is on
-			add		r20,r20,r27					; Point to the PCA slot
-		
-;
-;			We now have a valid PTE pair in R18/R24.  R18 is PTE upper and R24 is PTE lower.
-;			R19 contains the offset of the PTEG in the hash table. R20 has offset into the PCA.
-;		
-;			We need to check PTE pointer (mpPte) again after we lock the PTEG.  It is possible 
-;			that some other processor beat us and stuck in a PTE or that 
-;			all we had was a simple segment exception and the PTE was there the whole time.
-;			If we find one a pointer, we are done.
-;
-
-			mr		r7,r20						; Copy the PCA pointer
-			bl		mapLockPteg					; Lock the PTEG
-	
-			lwz		r12,mpPte(r31)				; Get the offset to the PTE
-			mr		r17,r6						; Remember the PCA image
-			mr		r16,r6						; Prime the post-select PCA image
-			andi.	r0,r12,mpHValid				; Is there a PTE here already?
-			li		r21,8						; Get the number of slots
-
-			bne-	cr7,hpfNoPte32				; Skip this for a block mapping...
-
-			bne-	hpfBailOut					; Someone already did this for us...
-
-;
-;			The mapSelSlot function selects a PTEG slot to use. As input, it uses R6 as a 
-;			pointer to the PCA.  When it returns, R3 contains 0 if an unoccupied slot was
-;			selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE.
-;			R4 returns the slot index.
-;
-;			REMEMBER: CR7 indicates that we are building a block mapping.
-;
-
-hpfNoPte32:	subic.	r21,r21,1					; See if we have tried all slots
-			mr		r6,r17						; Get back the original PCA
-			rlwimi	r6,r16,0,8,15				; Insert the updated steal slot
-			blt-	hpfBailOut					; Holy Cow, all slots are locked...
-			
-			bl		mapSelSlot					; Go select a slot (note that the PCA image is already set up)
-
-			cmplwi	cr5,r3,1					; Did we steal a slot?
-			rlwimi	r19,r4,3,26,28				; Insert PTE index into PTEG address yielding PTE address
-			mr		r16,r6						; Remember the PCA image after selection
-			blt+	cr5,hpfInser32				; Nope, no steal...
-			
-			lwz		r6,0(r19)					; Get the old PTE
-			lwz		r7,4(r19)					; Get the real part of the stealee
-			rlwinm	r6,r6,0,1,31				; Clear the valid bit
-			bgt		cr5,hpfNipBM				; Do not try to lock a non-existant physent for a block mapping...
-			srwi	r3,r7,12					; Change phys address to a ppnum
-			bl		mapFindPhyTry				; Go find and try to lock physent (note: if R3 is 0, there is no physent for this page)
-			cmplwi	cr1,r3,0					; Check if this is in RAM
-			bne-	hpfNoPte32					; Could not get it, try for another...
-			
-			crmove	cr5_gt,cr1_eq				; If we did not find a physent, pretend that this is a block map
-			
-hpfNipBM:	stw		r6,0(r19)					; Set the invalid PTE
-
-			sync								; Make sure the invalid is stored
-			li		r9,tlbieLock				; Get the TLBIE lock
-			rlwinm	r10,r6,21,0,3				; Shift last 4 bits of space to segment part
-			
-hpfTLBIE32:	lwarx	r0,0,r9						; Get the TLBIE lock 
-			mfsprg	r4,0						; Get the per_proc
-			rlwinm	r8,r6,25,18,31				; Extract the space ID
-			rlwinm	r11,r6,25,18,31				; Extract the space ID
-			lwz		r7,hwSteals(r4)				; Get the steal count
-			srwi	r2,r6,7						; Align segment number with hash
-			rlwimi	r11,r11,14,4,17				; Get copy above ourselves
-			mr.		r0,r0						; Is it locked? 
-			srwi	r0,r19,6					; Align PTEG offset for back hash
-			xor		r2,r2,r11					; Get the segment number (plus a whole bunch of extra bits)
- 			xor		r11,r11,r0					; Hash backwards to partial vaddr
-			rlwinm	r12,r2,14,0,3				; Shift segment up
-			mfsprg	r2,2						; Get feature flags 
-			li		r0,1						; Get our lock word 
-			rlwimi	r12,r6,22,4,9				; Move up the API
-			bne-	hpfTLBIE32					; It is locked, go wait...
-			rlwimi	r12,r11,12,10,19			; Move in the rest of the vaddr
-			
-			stwcx.	r0,0,r9						; Try to get it
-			bne-	hpfTLBIE32					; We was beat...
-			addi	r7,r7,1						; Bump the steal count
-			
-			rlwinm.	r0,r2,0,pfSMPcapb,pfSMPcapb	; Can this be an MP box?
-			li		r0,0						; Lock clear value 
-
-			tlbie	r12							; Invalidate it everywhere 
-
-			
-			beq-	hpfNoTS32					; Can not have MP on this machine...
-			
-			eieio								; Make sure that the tlbie happens first 
-			tlbsync								; Wait for everyone to catch up 
-			sync								; Make sure of it all
-
-hpfNoTS32:	stw		r0,tlbieLock(0)				; Clear the tlbie lock
-			
-			stw		r7,hwSteals(r4)				; Save the steal count
-			bgt		cr5,hpfInser32				; We just stole a block mapping...
-			
-			lwz		r4,4(r19)					; Get the RC of the just invalidated PTE
-			
-			la		r11,ppLink+4(r3)			; Point to the master RC copy
-			lwz		r7,ppLink+4(r3)				; Grab the pointer to the first mapping
-			rlwinm	r2,r4,27,ppRb-32,ppCb-32	; Position the new RC
-
-hpfMrgRC32:	lwarx	r0,0,r11					; Get the master RC
-			or		r0,r0,r2					; Merge in the new RC
-			stwcx.	r0,0,r11					; Try to stick it back
-			bne-	hpfMrgRC32					; Try again if we collided...
-			
-			
-hpfFPnch:	rlwinm.	r7,r7,0,~ppFlags			; Clean and test mapping address
-			beq-	hpfLostPhys					; We could not find our mapping.  Kick the bucket...
-			
-			lhz		r10,mpSpace(r7)				; Get the space
-			lwz		r9,mpVAddr+4(r7)			; And the vaddr
-			cmplw	cr1,r10,r8					; Is this one of ours?
-			xor		r9,r12,r9					; Compare virtual address
-			cmplwi	r9,0x1000					; See if we really match
-			crand	cr0_eq,cr1_eq,cr0_lt		; See if both space and vaddr match
-			beq+	hpfFPnch2					; Yes, found ours...
-			
-			lwz		r7,mpAlias+4(r7)			; Chain on to the next
-			b		hpfFPnch					; Check it out...
-
-hpfFPnch2:	sub		r0,r19,r27					; Get offset to the PTEG
-			stw		r0,mpPte(r7)				; Invalidate the quick pointer (keep quick pointer pointing to PTEG)
-			bl		mapPhysUnlock				; Unlock the physent now
-			
-hpfInser32:	oris	r18,r18,lo16(0x8000)		; Make sure the valid bit is on
-
-			stw		r24,4(r19)					; Stuff in the real part of the PTE
-			eieio								; Make sure this gets there first
-
-			stw		r18,0(r19)					; Stuff the virtual part of the PTE and make it valid
-			mr		r17,r16						; Get the PCA image to save
-			b		hpfFinish					; Go join the common exit code...
-			
-			
-;
-;			At this point we are about to do the 64-bit PTE generation.
-;
-;			The following is the R14:R15 pair that contains the "shifted" VSID:
-;
-;                             1        2        3        4        4        5      6 
-;           0        8        6        4        2        0        8        6      3
-;          +--------+--------+--------+--------+--------+--------+--------+--------+
-;          |00000000|0000000V|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVVVVVV|VVVV////|////////|    
-;          +--------+--------+--------+--------+--------+--------+--------+--------+                   
-;
-;
-
-			.align	5
-
-hpfBldPTE64:
-			ld		r10,mpVAddr(r31)			; Grab the base virtual address for the mapping 
-			lwz		r24,mpPAddr(r31)			; Grab the base physical page number for the mapping	
-
-			mfsdr1	r27							; Get the hash table base address
-
-			sldi	r11,r22,32					; Slide top of adjusted EA over
-			sldi	r14,r14,32					; Slide top of VSID over
-			rlwinm	r5,r27,0,27,31				; Isolate the size
-			eqv		r16,r16,r16					; Get all foxes here
-			rlwimi	r15,r23,16,20,24			; Stick in EA[36:40] to make AVPN	
-			mr		r2,r10						; Save the flag part of the mapping
-			or		r11,r11,r23					; Stick in bottom of adjusted EA for full 64-bit value	
-			rldicr	r27,r27,0,45				; Clean up the hash table base
-			or		r15,r15,r14					; Stick in bottom of AVPN for full 64-bit value	
-			rlwinm	r0,r11,0,4,19				; Clear out everything but the page
-			subfic	r5,r5,46					; Get number of leading zeros
-			xor		r19,r0,r15					; Calculate hash
-			ori		r15,r15,1					; Turn on valid bit in AVPN to make top of PTE
-			srd		r16,r16,r5					; Shift over to get length of table
-			srdi	r19,r19,5					; Convert page offset to hash table offset
-			rldicr	r16,r16,0,56				; Clean up lower bits in hash table size			
-			rldicr	r10,r10,0,51				; Clear out flags
-			sldi	r24,r24,12					; Change ppnum to physical address
-			sub		r11,r11,r10					; Get the offset from the base mapping
-			and		r19,r19,r16					; Wrap into hash table
-			add		r24,r24,r11					; Get actual physical address of this page
-			srdi	r20,r19,5					; Convert PTEG offset to PCA offset
-			rldimi	r24,r2,0,52					; Insert the keys, WIMG, RC, etc.
-			subfic	r20,r20,-4					; Get negative offset to PCA
-			ori		r24,r24,lo16(mpR)			; Force on the reference bit
-			add		r20,r20,r27					; Point to the PCA slot		
-			add		r19,r19,r27					; Point to the PTEG
-			
-;
-;			We now have a valid PTE pair in R15/R24.  R15 is PTE upper and R24 is PTE lower.
-;			R19 contains the offset of the PTEG in the hash table. R20 has offset into the PCA.
-;		
-;			We need to check PTE pointer (mpPte) again after we lock the PTEG.  It is possible 
-;			that some other processor beat us and stuck in a PTE or that 
-;			all we had was a simple segment exception and the PTE was there the whole time.
-;			If we find one a pointer, we are done.
-;
-			
-			mr		r7,r20						; Copy the PCA pointer
-			bl		mapLockPteg					; Lock the PTEG
-	
-			lwz		r12,mpPte(r31)				; Get the offset to the PTE
-			mr		r17,r6						; Remember the PCA image
-			mr		r18,r6						; Prime post-selection PCA image
-			andi.	r0,r12,mpHValid				; See if we have a PTE now
-			li		r21,8						; Get the number of slots
-		
-			bne--	cr7,hpfNoPte64				; Skip this for a block mapping...
-
-			bne--	hpfBailOut					; Someone already did this for us...
-
-;
-;			The mapSelSlot function selects a PTEG slot to use. As input, it uses R3 as a 
-;			pointer to the PCA.  When it returns, R3 contains 0 if an unoccupied slot was
-;			selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE.
-;			R4 returns the slot index.
-;
-;			REMEMBER: CR7 indicates that we are building a block mapping.
-;
-
-hpfNoPte64:	subic.	r21,r21,1					; See if we have tried all slots
-			mr		r6,r17						; Restore original state of PCA
-			rlwimi	r6,r18,0,8,15				; Insert the updated steal slot
-			blt-	hpfBailOut					; Holy Cow, all slots are locked...
-			
-			bl		mapSelSlot					; Go select a slot
-
-			cmplwi	cr5,r3,1					; Did we steal a slot?			
-			mr		r18,r6						; Remember the PCA image after selection
-			insrdi	r19,r4,3,57					; Insert slot index into PTEG address bits 57:59, forming the PTE address
-			lwz		r10,hwSteals(r2)			; Get the steal count
-			blt++	cr5,hpfInser64				; Nope, no steal...
-
-			ld		r6,0(r19)					; Get the old PTE
-			ld		r7,8(r19)					; Get the real part of the stealee
-			rldicr	r6,r6,0,62					; Clear the valid bit
-			bgt		cr5,hpfNipBMx				; Do not try to lock a non-existant physent for a block mapping...
-			srdi	r3,r7,12					; Change page address to a page address
-			bl		mapFindPhyTry				; Go find and try to lock physent (note: if R3 is 0, there is no physent for this page)
-			cmplwi	cr1,r3,0					; Check if this is in RAM
-			bne--	hpfNoPte64					; Could not get it, try for another...
-			
-			crmove	cr5_gt,cr1_eq				; If we did not find a physent, pretend that this is a block map
-			
-hpfNipBMx:	std		r6,0(r19)					; Set the invalid PTE
-			li		r9,tlbieLock				; Get the TLBIE lock
-
-			srdi	r11,r6,5					; Shift VSID over for back hash
-			mfsprg	r4,0						; Get the per_proc
-			xor		r11,r11,r19					; Hash backwards to get low bits of VPN
-			sync								; Make sure the invalid is stored
-			
-			sldi	r12,r6,16					; Move AVPN to EA position
-			sldi	r11,r11,5					; Move this to the page position
-			
-hpfTLBIE64:	lwarx	r0,0,r9						; Get the TLBIE lock 
-			mr.		r0,r0						; Is it locked? 
-			li		r0,1						; Get our lock word
-			bne--	hpfTLBIE65					; It is locked, go wait...
-			
-			stwcx.	r0,0,r9						; Try to get it
-			rldimi	r12,r11,0,41				; Stick the low part of the page number into the AVPN
-			rldicl	r8,r6,52,50					; Isolate the address space ID
-			bne--	hpfTLBIE64					; We was beat...
-			addi	r10,r10,1					; Bump the steal count
-			
-			rldicl	r11,r12,0,16				; Clear cause the book says so
-			li		r0,0						; Lock clear value 
-
-			tlbie	r11							; Invalidate it everywhere 
-
-			mr		r7,r8						; Get a copy of the space ID
-			eieio								; Make sure that the tlbie happens first
-			rldimi	r7,r7,14,36					; Copy address space to make hash value
-			tlbsync								; Wait for everyone to catch up
-			rldimi	r7,r7,28,22					; Add in a 3rd copy of the hash up top
-			srdi	r2,r6,26					; Shift original segment down to bottom
-			
-			ptesync								; Make sure of it all
-			xor		r7,r7,r2					; Compute original segment
-			stw		r0,tlbieLock(0)				; Clear the tlbie lock
-
-			stw		r10,hwSteals(r4)			; Save the steal count
-			bgt		cr5,hpfInser64				; We just stole a block mapping...
-			
-			rldimi	r12,r7,28,0					; Insert decoded segment
-			rldicl	r4,r12,0,13					; Trim to max supported address
-			
-			ld		r12,8(r19)					; Get the RC of the just invalidated PTE			
-
-			la		r11,ppLink+4(r3)			; Point to the master RC copy
-			ld		r7,ppLink(r3)				; Grab the pointer to the first mapping
-			rlwinm	r2,r12,27,ppRb-32,ppCb-32	; Position the new RC
-
-hpfMrgRC64:	lwarx	r0,0,r11					; Get the master RC
-			li		r12,ppLFAmask				; Get mask to clean up alias pointer
-			or		r0,r0,r2					; Merge in the new RC
-			rotrdi	r12,r12,ppLFArrot			; Rotate clean up mask to get 0xF0000000000000000F
-			stwcx.	r0,0,r11					; Try to stick it back
-			bne--	hpfMrgRC64					; Try again if we collided...
-	
-hpfFPnchx:	andc.	r7,r7,r12					; Clean and test mapping address
-			beq--	hpfLostPhys					; We could not find our mapping.  Kick the bucket...
-			
-			lhz		r10,mpSpace(r7)				; Get the space
-			ld		r9,mpVAddr(r7)				; And the vaddr
-			cmplw	cr1,r10,r8					; Is this one of ours?
-			xor		r9,r4,r9					; Compare virtual address
-			cmpldi	r9,0x1000					; See if we really match
-			crand	cr0_eq,cr1_eq,cr0_lt		; See if both space and vaddr match
-			beq++	hpfFPnch2x					; Yes, found ours...
-			
-			ld		r7,mpAlias(r7)				; Chain on to the next
-			b		hpfFPnchx					; Check it out...
-
-			.align	5
-
-hpfTLBIE65:	li		r7,lgKillResv				; Point to the reservatio kill area
-			stwcx.	r7,0,r7						; Kill reservation		
-			
-hpfTLBIE63: lwz		r0,0(r9)					; Get the TLBIE lock
-			mr.		r0,r0						; Is it locked?
-			beq++	hpfTLBIE64					; Yup, wait for it...
-			b		hpfTLBIE63					; Nope, try again..
-
-
-
-hpfFPnch2x:	sub		r0,r19,r27					; Get offset to PTEG
-			stw		r0,mpPte(r7)				; Invalidate the quick pointer (keep pointing at PTEG though)
-			bl		mapPhysUnlock				; Unlock the physent now
-			
-
-hpfInser64:	std		r24,8(r19)					; Stuff in the real part of the PTE
-			eieio								; Make sure this gets there first
-			std		r15,0(r19)					; Stuff the virtual part of the PTE and make it valid
-			mr		r17,r18						; Get the PCA image to set
-			b		hpfFinish					; Go join the common exit code...
-
-hpfLostPhys:
-			lis		r0,hi16(Choke)				; System abend - we must find the stolen mapping or we are dead
-			ori		r0,r0,lo16(Choke)			; System abend
-			sc
-			
-;
-;			This is the common code we execute when we are finished setting up the PTE.
-;
-	
-			.align	5
-			
-hpfFinish:	sub		r4,r19,r27					; Get offset of PTE
-			ori		r4,r4,lo16(mpHValid)		; Add valid bit to PTE offset
-			bne		cr7,hpfBailOut				; Do not set the PTE pointer for a block map
-			stw		r4,mpPte(r31)				; Remember our PTE
-			
-hpfBailOut:	eieio								; Make sure all updates come first
-			stw		r17,0(r20)					; Unlock and set the final PCA
-			
-;
-;			This is where we go if we have started processing the fault, but find that someone
-;			else has taken care of it.
-;
-
-hpfIgnore:	lwz		r2,mpFlags(r31)				; Get the mapping flags
-			rlwinm	r2,r2,0,mpFIPb+1,mpFIPb-1	; Clear the "fault in progress" flag
-			sth		r2,mpFlags+2(r31)			; Set it
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			li		r11,T_IN_VAIN				; Say that it was handled
-			b		EXT(PFSExit)				; Leave...
-
-;
-;			This is where we go when we  find that someone else
-;			is in the process of handling the fault.
-;
-
-hpfAbandon:	li		r3,lgKillResv				; Kill off any reservation
-			stwcx.	r3,0,r3						; Do it
-			
-			la		r3,pmapSXlk(r28)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-
-			li		r11,T_IN_VAIN				; Say that it was handled
-			b		EXT(PFSExit)				; Leave...
-			
-;
-;			Guest shadow assist -- page fault handler
-;
-;			Here we handle a fault in a guest pmap that has the guest shadow mapping
-;			assist active. We locate the VMM pmap extension block, which contains an
-;			index over the discontiguous multi-page shadow hash table. The index
-;			corresponding to our vaddr is selected, and the selected group within
-;			that page is searched for a valid and active entry that contains
-;			our vaddr and space id. The search is pipelined, so that we may fetch
-;			the next slot while examining the current slot for a hit. The final
-;			search iteration is unrolled so that we don't fetch beyond the end of
-;			our group, which could have dire consequences depending upon where the
-;			physical hash page is located.
-;
-;			The VMM pmap extension block occupies a page. Begining at offset 0, we
-;			have the pmap_vmm_ext proper. Aligned at the first 128-byte boundary
-;			after the pmap_vmm_ext is the hash table physical address index, a 
-;			linear list of 64-bit physical addresses of the pages that comprise
-;			the hash table.
-;
-;			In the event that we succesfully locate a guest mapping, we re-join
-;			the page fault path at hpfGVfound with the mapping's address in r31;
-;			otherwise, we re-join at hpfNotFound. In either case, we re-join holding
-;			a share of the pmap search lock for the host pmap with the host pmap's
-;			address in r28, the guest pmap's space id in r21, and the guest pmap's
-;			flags in r12.
-;
-
-			.align	5
-hpfGVxlate:
-			bt		pf64Bitb,hpfGV64			; Take 64-bit path for 64-bit machine
-			
-			lwz		r11,pmapVmmExtPhys+4(r28)	; r11 <- VMM pmap extension block paddr
-			lwz		r12,pmapFlags(r28)			; r12 <- guest pmap's flags
-			lwz		r21,pmapSpace(r28)			; r21 <- guest space ID number
-			lwz		r28,vmxHostPmapPhys+4(r11)	; r28 <- host pmap's paddr
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			rlwinm	r10,r30,0,0xFFFFF000		; r10 <- page-aligned guest vaddr
-			lwz		r6,vxsGpf(r11)				; Get guest fault count
-			
-			srwi	r3,r10,12					; Form shadow hash:
-			xor		r3,r3,r21					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r4					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r3,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-
-			la		r3,pmapSXlk(r28)			; Point to the host pmap's search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne-	hpfBadLock					; Nope...
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			addi	r6,r6,1						; Increment guest fault count
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			stw		r6,vxsGpf(r11)				; Update guest fault count
-			b		hpfGVlp32
-
-			.align	5
-hpfGVlp32:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r6,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r21					; Compare space ID
-			or		r0,r6,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r10					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		hpfGVfound					; Join common patch on hit (r31 points to mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	hpfGVlp32					; Iterate
-
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r3,r3,mpgFree+mpgDormant	; Isolate guest free and dormant flag
-			xor		r4,r4,r21					; Compare space ID
-			or		r0,r3,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r10					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		hpfGVfound					; Join common patch on hit (r31 points to mapping)
-			
-			b		hpfGVmiss
-
-			.align	5
-hpfGV64:
-			ld		r11,pmapVmmExtPhys(r28)		; r11 <- VMM pmap extension block paddr
-			lwz		r12,pmapFlags(r28)			; r12 <- guest pmap's flags
-			lwz		r21,pmapSpace(r28)			; r21 <- guest space ID number
-			ld		r28,vmxHostPmapPhys(r11)	; r28 <- host pmap's paddr
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			rlwinm	r10,r30,0,0xFFFFF000		; Form 64-bit guest vaddr
-			rldimi	r10,r29,32,0				;  cleaning up low-order 12 bits			
-			lwz		r6,vxsGpf(r11)				; Get guest fault count
-
-			srwi	r3,r10,12					; Form shadow hash:
-			xor		r3,r3,r21					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r4					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r3,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-												
-			la		r3,pmapSXlk(r28)			; Point to the host pmap's search lock
-			bl		sxlkShared					; Go get a shared lock on the mapping lists
-			mr.		r3,r3						; Did we get the lock?
-			bne--	hpfBadLock					; Nope...
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			addi	r6,r6,1						; Increment guest fault count
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			stw		r6,vxsGpf(r11)				; Update guest fault count
-			b		hpfGVlp64
-			
-			.align	5
-hpfGVlp64:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r6,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flag
-			xor		r7,r7,r21					; Compare space ID
-			or		r0,r6,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r10					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		hpfGVfound					; Join common path on hit (r31 points to mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	hpfGVlp64					; Iterate
-
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r3,r3,mpgFree+mpgDormant	; Isolate guest free and dormant flag
-			xor		r4,r4,r21					; Compare space ID
-			or		r0,r3,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r10					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		hpfGVfound					; Join common path on hit (r31 points to mapping)
-
-hpfGVmiss:
-			lwz		r6,vxsGpfMiss(r11)			; Guest guest fault miss count
-			addi	r6,r6,1						; Increment miss count
-			stw		r6,vxsGpfMiss(r11)			; Update guest fault miss count
-			b		hpfNotFound
-			
-/*
- *			hw_set_user_space(pmap) 
- *			hw_set_user_space_dis(pmap) 
- *
- * 			Indicate whether memory space needs to be switched.
- *			We really need to turn off interrupts here, because we need to be non-preemptable
- *
- *			hw_set_user_space_dis is used when interruptions are already disabled. Mind the
- *			register usage here.   The VMM switch code in vmachmon.s that calls this
- *			know what registers are in use.  Check that if these change.
- */
-
-
-	
-			.align	5
-			.globl	EXT(hw_set_user_space)
-
-LEXT(hw_set_user_space)
-
-			lis		r8,hi16(MASK(MSR_VEC))		; Get the vector enable
-			mfmsr	r10							; Get the current MSR 
-			ori		r8,r8,lo16(MASK(MSR_FP))	; Add in FP
-			ori		r9,r8,lo16(MASK(MSR_EE))	; Add in the EE
-			andc	r10,r10,r8					; Turn off VEC, FP for good
-			andc	r9,r10,r9					; Turn off EE also
-			mtmsr	r9							; Disable them 
- 			isync								; Make sure FP and vec are off
-			mfsprg	r6,1						; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)			; Get the per_proc block
-			lwz		r2,ppUserPmapVirt(r6)		; Get our virtual pmap address
-			mfsprg	r4,2						; The the feature flags
-			lwz		r7,pmapvr(r3)				; Get the v to r translation
- 			lwz		r8,pmapvr+4(r3)				; Get the v to r translation
- 			mtcrf	0x80,r4						; Get the Altivec flag
-			xor		r4,r3,r8					; Get bottom of the real address of bmap anchor
-			cmplw	cr1,r3,r2					; Same address space as before?
-			stw		r7,ppUserPmap(r6)			; Show our real pmap address
-			crorc	cr1_eq,cr1_eq,pfAltivecb	; See if same address space or not altivec machine
-			stw		r4,ppUserPmap+4(r6)			; Show our real pmap address
-			stw		r3,ppUserPmapVirt(r6)		; Show our virtual pmap address
-			mtmsr	r10							; Restore interruptions 
-			beqlr--	cr1							; Leave if the same address space or not Altivec
-
-			dssall								; Need to kill all data streams if adrsp changed
-			sync
-			blr									; Return... 
-	
-			.align	5
-			.globl	EXT(hw_set_user_space_dis)
-
-LEXT(hw_set_user_space_dis)
-
- 			lwz		r7,pmapvr(r3)				; Get the v to r translation
- 			mfsprg	r4,2						; The the feature flags
-			lwz		r8,pmapvr+4(r3)				; Get the v to r translation
-			mfsprg	r6,1						; Get the current activation
-			lwz		r6,ACT_PER_PROC(r6)			; Get the per_proc block
-			lwz		r2,ppUserPmapVirt(r6)		; Get our virtual pmap address
- 			mtcrf	0x80,r4						; Get the Altivec flag
-			xor		r4,r3,r8					; Get bottom of the real address of bmap anchor
-			cmplw	cr1,r3,r2					; Same address space as before?
-			stw		r7,ppUserPmap(r6)			; Show our real pmap address
-			crorc	cr1_eq,cr1_eq,pfAltivecb	; See if same address space or not altivec machine
-			stw		r4,ppUserPmap+4(r6)			; Show our real pmap address
-			stw		r3,ppUserPmapVirt(r6)		; Show our virtual pmap address
-			beqlr--	cr1							; Leave if the same
-
-			dssall								; Need to kill all data streams if adrsp changed
-			sync
-			blr									; Return...
-	
-/*			int mapalc1(struct mappingblok *mb) - Finds, allocates, and zeros a free 1-bit mapping entry
- *
- *			Lock must already be held on mapping block list
- *			returns 0 if all slots filled.
- *			returns n if a slot is found and it is not the last
- *			returns -n if a slot is found and it is the last
- *			when n and -n are returned, the corresponding bit is cleared
- *			the mapping is zeroed out before return
- *
- */
-
-			.align	5
-			.globl	EXT(mapalc1)
-
-LEXT(mapalc1)
-			lwz		r4,mbfree(r3)				; Get the 1st mask 
-			lis		r0,0x8000					; Get the mask to clear the first free bit
-			lwz		r5,mbfree+4(r3)				; Get the 2nd mask 
-			mr		r12,r3						; Save the block ptr
-			cntlzw	r3,r4						; Get first 1-bit in 1st word
-			srw.	r9,r0,r3					; Get bit corresponding to first free one
-			cntlzw	r10,r5						; Get first free field in second word
-			andc	r4,r4,r9					; Turn 1-bit off in 1st word
-			bne		mapalc1f					; Found one in 1st word
-			
-			srw.	r9,r0,r10					; Get bit corresponding to first free one in 2nd word
-            li		r3,0						; assume failure return
-			andc	r5,r5,r9					; Turn it off
-			beqlr--								; There are no 1 bits left...
-            addi	r3,r10,32					; set the correct number
-            
-mapalc1f:
-            or.		r0,r4,r5					; any more bits set?
-            stw		r4,mbfree(r12)				; update bitmasks
-            stw		r5,mbfree+4(r12)
-            
-            slwi	r6,r3,6						; get (n * mpBasicSize), ie offset of mapping in block
-            addi	r7,r6,32
-            dcbz	r6,r12						; clear the 64-byte mapping
-            dcbz	r7,r12
-            
-            bnelr++								; return if another bit remains set
-            
-            neg		r3,r3						; indicate we just returned the last bit
-            blr
-
-
-/*			int mapalc2(struct mappingblok *mb) - Finds, allocates, and zero's a free 2-bit mapping entry
- *
- *			Lock must already be held on mapping block list
- *			returns 0 if all slots filled.
- *			returns n if a slot is found and it is not the last
- *			returns -n if a slot is found and it is the last
- *			when n and -n are returned, the corresponding bits are cleared
- * 			We find runs of 2 consecutive 1 bits by cntlzw(n & (n<<1)).
- *			the mapping is zero'd out before return
- */
-
-			.align	5
-			.globl	EXT(mapalc2)
-LEXT(mapalc2)
-			lwz		r4,mbfree(r3)				; Get the first mask 
-			lis		r0,0x8000					; Get the mask to clear the first free bit
-			lwz		r5,mbfree+4(r3)				; Get the second mask 
-			mr		r12,r3						; Save the block ptr
-            slwi	r6,r4,1						; shift first word over
-            and		r6,r4,r6					; lite start of double bit runs in 1st word
-            slwi	r7,r5,1						; shift 2nd word over
-			cntlzw	r3,r6						; Get first free 2-bit run in 1st word
-            and		r7,r5,r7					; lite start of double bit runs in 2nd word
-			srw.	r9,r0,r3					; Get bit corresponding to first run in 1st word
-			cntlzw	r10,r7						; Get first free field in second word
-            srwi	r11,r9,1					; shift over for 2nd bit in 1st word
-			andc	r4,r4,r9					; Turn off 1st bit in 1st word
-            andc	r4,r4,r11					; turn off 2nd bit in 1st word
-			bne		mapalc2a					; Found two consecutive free bits in 1st word
-			
-			srw.	r9,r0,r10					; Get bit corresponding to first free one in second word
-            li		r3,0						; assume failure
-            srwi	r11,r9,1					; get mask for 2nd bit
-			andc	r5,r5,r9					; Turn off 1st bit in 2nd word
-            andc	r5,r5,r11					; turn off 2nd bit in 2nd word
-			beq--	mapalc2c					; There are no runs of 2 bits in 2nd word either
-            addi	r3,r10,32					; set the correct number
-            
-mapalc2a:
-            or.		r0,r4,r5					; any more bits set?
-            stw		r4,mbfree(r12)				; update bitmasks
-            stw		r5,mbfree+4(r12)
-            slwi	r6,r3,6						; get (n * mpBasicSize), ie offset of mapping in block
-            addi	r7,r6,32
-            addi	r8,r6,64
-            addi	r9,r6,96
-            dcbz	r6,r12						; zero out the 128-byte mapping
-            dcbz	r7,r12						; we use the slow 32-byte dcbz even on 64-bit machines
-            dcbz	r8,r12						; because the mapping may not be 128-byte aligned
-            dcbz	r9,r12
-            
-            bnelr++								; return if another bit remains set
-            
-            neg		r3,r3						; indicate we just returned the last bit
-            blr
-            
-mapalc2c:
-            rlwinm	r7,r5,1,31,31				; move bit 0 of 2nd word to bit 31
-            and.	r0,r4,r7					; is the 2-bit field that spans the 2 words free?
-            beqlr								; no, we failed
-            rlwinm	r4,r4,0,0,30				; yes, turn off bit 31 of 1st word
-            rlwinm	r5,r5,0,1,31				; turn off bit 0 of 2nd word
-            li		r3,31						; get index of this field
-            b		mapalc2a
-			
-
-;
-;			This routine initialzes the hash table and PCA.
-;			It is done here because we may need to be 64-bit to do it.
-;
-
-			.align	5
-			.globl	EXT(hw_hash_init)
-
-LEXT(hw_hash_init)
-
- 			mfsprg	r10,2						; Get feature flags 
-			lis		r12,hi16(EXT(hash_table_size))		; Get hash table size address
-			mtcrf	0x02,r10					; move pf64Bit to cr6
-			lis		r11,hi16(EXT(hash_table_base))		; Get hash table base address
-			lis		r4,0xFF01					; Set all slots free and start steal at end
-			ori		r12,r12,lo16(EXT(hash_table_size))	; Get hash table size address
-			ori		r11,r11,lo16(EXT(hash_table_base))	; Get hash table base address
-
-			lwz		r12,0(r12)					; Get hash table size
-			li		r3,0						; Get start
-			bt++	pf64Bitb,hhiSF				; skip if 64-bit (only they take the hint)
-
-			lwz		r11,4(r11)					; Get hash table base
-			
-hhiNext32:	cmplw	r3,r12						; Have we reached the end?
-			bge-	hhiCPCA32					; Yes...			
-			dcbz	r3,r11						; Clear the line
-			addi	r3,r3,32					; Next one...
-			b		hhiNext32					; Go on...
-
-hhiCPCA32:	rlwinm	r12,r12,28,4,29				; Get number of slots * 4
-			li		r3,-4						; Displacement to first PCA entry
-			neg		r12,r12						; Get negative end of PCA	
-			
-hhiNPCA32:	stwx	r4,r3,r11					; Initialize the PCA entry
-			subi	r3,r3,4						; Next slot
-			cmpw	r3,r12						; Have we finished?
-			bge+	hhiNPCA32					; Not yet...
-			blr									; Leave...
-
-hhiSF:		mfmsr	r9							; Save the MSR
-			li		r8,1						; Get a 1
-			mr		r0,r9						; Get a copy of the MSR
-			ld		r11,0(r11)					; Get hash table base
-			rldimi	r0,r8,63,MSR_SF_BIT			; Set SF bit (bit 0)
-			mtmsrd	r0							; Turn on SF
-			isync
-			
-			
-hhiNext64:	cmpld	r3,r12						; Have we reached the end?
-			bge--	hhiCPCA64					; Yes...			
-			dcbz128	r3,r11						; Clear the line
-			addi	r3,r3,128					; Next one...
-			b		hhiNext64					; Go on...
-
-hhiCPCA64:	rlwinm	r12,r12,27,5,29				; Get number of slots * 4
-			li		r3,-4						; Displacement to first PCA entry
-			neg		r12,r12						; Get negative end of PCA	
-		
-hhiNPCA64:	stwx	r4,r3,r11					; Initialize the PCA entry
-			subi	r3,r3,4						; Next slot
-			cmpd	r3,r12						; Have we finished?
-			bge++	hhiNPCA64					; Not yet...
-
-			mtmsrd	r9							; Turn off SF if it was off
-			isync
-			blr									; Leave...
-			
-			
-;
-;			This routine sets up the hardware to start translation.
-;			Note that we do NOT start translation.
-;
-
-			.align	5
-			.globl	EXT(hw_setup_trans)
-
-LEXT(hw_setup_trans)
-
- 			mfsprg	r11,0						; Get the per_proc block
- 			mfsprg	r12,2						; Get feature flags 
- 			li		r0,0						; Get a 0
- 			li		r2,1						; And a 1
-			mtcrf	0x02,r12					; Move pf64Bit to cr6
-			stw		r0,validSegs(r11)			; Make sure we think all SR/STEs are invalid
-			stw		r0,validSegs+4(r11)			; Make sure we think all SR/STEs are invalid, part deux
-			sth		r2,ppInvSeg(r11)			; Force a reload of the SRs
-			sth		r0,ppCurSeg(r11)			; Set that we are starting out in kernel
-			
-			bt++	pf64Bitb,hstSF				; skip if 64-bit (only they take the hint)
-
-			li		r9,0						; Clear out a register
-			sync
-			isync
-			mtdbatu 0,r9						; Invalidate maps
-			mtdbatl 0,r9						; Invalidate maps
-			mtdbatu 1,r9						; Invalidate maps
-			mtdbatl 1,r9						; Invalidate maps
-			mtdbatu 2,r9						; Invalidate maps
-			mtdbatl 2,r9						; Invalidate maps
-			mtdbatu 3,r9						; Invalidate maps
-			mtdbatl 3,r9						; Invalidate maps
-
-			mtibatu 0,r9						; Invalidate maps
-			mtibatl 0,r9						; Invalidate maps
-			mtibatu 1,r9						; Invalidate maps
-			mtibatl 1,r9						; Invalidate maps
-			mtibatu 2,r9						; Invalidate maps
-			mtibatl 2,r9						; Invalidate maps
-			mtibatu 3,r9						; Invalidate maps
-			mtibatl 3,r9						; Invalidate maps
-
-			lis		r11,hi16(EXT(hash_table_base))		; Get hash table base address
-			lis		r12,hi16(EXT(hash_table_size))		; Get hash table size address
-			ori		r11,r11,lo16(EXT(hash_table_base))	; Get hash table base address
-			ori		r12,r12,lo16(EXT(hash_table_size))	; Get hash table size address
-			lwz		r11,4(r11)					; Get hash table base
-			lwz		r12,0(r12)					; Get hash table size
-			subi	r12,r12,1					; Back off by 1
-			rlwimi	r11,r12,16,23,31			; Stick the size into the sdr1 image
-			
-			mtsdr1	r11							; Ok, we now have the hash table set up
-			sync
-			
-			li		r12,invalSpace				; Get the invalid segment value
-			li		r10,0						; Start low
-			
-hstsetsr:	mtsrin	r12,r10						; Set the SR
-			addis	r10,r10,0x1000				; Bump the segment
-			mr.		r10,r10						; Are we finished?
-			bne+	hstsetsr					; Nope...	
-			sync
-			blr									; Return...
-
-;
-;			64-bit version
-;
-
-hstSF:		lis		r11,hi16(EXT(hash_table_base))		; Get hash table base address
-			lis		r12,hi16(EXT(hash_table_size))		; Get hash table size address
-			ori		r11,r11,lo16(EXT(hash_table_base))	; Get hash table base address
-			ori		r12,r12,lo16(EXT(hash_table_size))	; Get hash table size address
-			ld		r11,0(r11)					; Get hash table base
-			lwz		r12,0(r12)					; Get hash table size
-			cntlzw	r10,r12						; Get the number of bits
-			subfic	r10,r10,13					; Get the extra bits we need
-			or		r11,r11,r10					; Add the size field to SDR1
-			
-			mtsdr1	r11							; Ok, we now have the hash table set up
-			sync
-
-			li		r0,0						; Set an SLB slot index of 0
-			slbia								; Trash all SLB entries (except for entry 0 that is)
-			slbmfee	r7,r0						; Get the entry that is in SLB index 0
-			rldicr	r7,r7,0,35					; Clear the valid bit and the rest
-			slbie	r7							; Invalidate it
-
-			blr									; Return...
-
-
-;
-;			This routine turns on translation for the first time on a processor
-;
-
-			.align	5
-			.globl	EXT(hw_start_trans)
-
-LEXT(hw_start_trans)
-
-			
-			mfmsr	r10							; Get the msr
-			ori		r10,r10,lo16(MASK(MSR_IR) | MASK(MSR_DR))	; Turn on translation
-
-			mtmsr	r10							; Everything falls apart here
-			isync
-			
-			blr									; Back to it.
-
-
-
-;
-;			This routine validates a segment register.
-;				hw_map_seg(pmap_t pmap, addr64_t seg, addr64_t va)
-;
-;				r3 = virtual pmap
-;				r4 = segment[0:31]
-;				r5 = segment[32:63]
-;				r6 = va[0:31]
-;				r7 = va[32:63]
-;
-;			Note that we transform the addr64_t (long long) parameters into single 64-bit values.
-;			Note that there is no reason to apply the key modifier here because this is only
-;			used for kernel accesses.
-;
-
-			.align	5
-			.globl	EXT(hw_map_seg)
-
-LEXT(hw_map_seg)
-
-			lwz		r0,pmapSpace(r3)			; Get the space, we will need it soon
-			lwz		r9,pmapFlags(r3)			; Get the flags for the keys now
- 			mfsprg	r10,2						; Get feature flags 
-
-;
-;			Note: the following code would problably be easier to follow if I split it,
-;			but I just wanted to see if I could write this to work on both 32- and 64-bit
-;			machines combined.
-;
-			
-;
-;			Here we enter with va[0:31] in r6[0:31] (or r6[32:63] on 64-bit machines)
-;			and va[32:63] in r7[0:31] (or r7[32:63] on 64-bit machines)
-
-			rlwinm	r4,r4,0,1,0					; Copy seg[0:31] into r4[0;31] - no-op for 32-bit
-			rlwinm	r7,r7,18,14,17				; Slide va[32:35] east to just west of space ID
-			mtcrf	0x02,r10					; Move pf64Bit and pfNoMSRirb to cr5 and 6
-			srwi	r8,r6,14					; Slide va[0:17] east to just west of the rest
-			rlwimi	r7,r6,18,0,13				; Slide va[18:31] east to just west of slid va[32:25]
-			rlwimi	r0,r0,14,4,17				; Dup address space ID above itself
-			rlwinm	r8,r8,0,1,0					; Dup low part into high (does nothing on 32-bit machines)
-			rlwinm	r2,r0,28,0,31				; Rotate rotate low nybble to top of low half
-			rlwimi	r2,r2,0,1,0					; Replicate bottom 32 into top 32
-			rlwimi	r8,r7,0,0,31				; Join va[0:17] with va[18:35] (just like mr on 32-bit machines)			
-
-			rlwimi	r2,r0,0,4,31				; We should now have 4 copies of the space
-												; concatenated together.   There is garbage
-												; at the top for 64-bit but we will clean
-												; that out later.
-			rlwimi	r4,r5,0,0,31				; Copy seg[32:63] into r4[32:63] - just like mr for 32-bit
-
-			
-;
-;			Here we exit with va[0:35] shifted into r8[14:51], zeros elsewhere, or
-;			va[18:35] shifted into r8[0:17], zeros elsewhere on 32-bit machines
-;			
-												
-;
-;			What we have now is:
-;
-;					 0        0        1        2        3        4        4        5      6
-;					 0        8        6        4        2        0        8        6      3	- for 64-bit machines
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;			r2 =	|xxxx0000|AAAAAAAA|AAAAAABB|BBBBBBBB|BBBBCCCC|CCCCCCCC|CCDDDDDD|DDDDDDDD|	- hash value
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;														 0        0        1        2      3	- for 32-bit machines
-;														 0        8        6        4      1
-;
-;					 0        0        1        2        3        4        4        5      6
-;					 0        8        6        4        2        0        8        6      3	- for 64-bit machines
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;			r8 =	|00000000|000000SS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SS000000|00000000|	- shifted and cleaned EA
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;														 0        0        1        2      3	- for 32-bit machines
-;														 0        8        6        4      1
-;
-;					 0        0        1        2        3        4        4        5      6
-;					 0        8        6        4        2        0        8        6      3	- for 64-bit machines
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;			r4 =	|SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSSSSSS|SSSS0000|00000000|00000000|00000000|	- Segment
-;					+--------+--------+--------+--------+--------+--------+--------+--------+
-;														 0        0        1        2      3	- for 32-bit machines
-;														 0        8        6        4      1
-
-
-			xor		r8,r8,r2					; Calculate VSID
-			
-			bf--	pf64Bitb,hms32bit			; Skip out if 32-bit...
-			mfsprg	r12,0						; Get the per_proc
-			li		r0,1						; Prepare to set bit 0 (also to clear EE)
-			mfmsr	r6							; Get current MSR
-			li		r2,MASK(MSR_IR)|MASK(MSR_DR)	; Get the translation bits
-			mtmsrd	r0,1						; Set only the EE bit to 0
-			rlwinm	r6,r6,0,MSR_EE_BIT,MSR_EE_BIT	; See if EE bit is on
-			mfmsr	r11							; Get the MSR right now, after disabling EE
-			andc	r2,r11,r2					; Turn off translation now
-			rldimi	r2,r0,63,0					; Get bit 64-bit turned on
-			or		r11,r11,r6					; Turn on the EE bit if it was on
-			mtmsrd	r2							; Make sure translation and EE are off and 64-bit is on
-			isync								; Hang out a bit
-						
-			ld		r6,validSegs(r12)			; Get the valid SLB entry flags
-			sldi	r9,r9,9						; Position the key and noex bit
-			
-			rldimi	r5,r8,12,0					; Form the VSID/key
-			
-			not		r3,r6						; Make valids be 0s
-			
-			cntlzd	r7,r3						; Find a free SLB	
-			cmplwi	r7,63						; Did we find a free SLB entry?		
-			
-			slbie	r4							; Since this ESID may still be in an SLBE, kill it
-
-			oris	r4,r4,0x0800				; Turn on the valid bit in ESID
-			addi	r7,r7,1						; Make sure we skip slb 0
-			blt++	hmsFreeSeg					; Yes, go load it...
-
-;
-;			No free SLB entries, select one that is in use and invalidate it
-;
-			lwz		r2,ppSegSteal(r12)			; Get the next slot to steal
-			addi	r7,r2,pmapSegCacheUse+1		; Select stealee from non-cached slots only
-			addi	r2,r2,1						; Set next slot to steal
-			slbmfee	r3,r7						; Get the entry that is in the selected spot
-			subi	r8,r2,64-(pmapSegCacheUse+1)	; Force steal to wrap
-			rldicr	r3,r3,0,35					; Clear the valid bit and the rest
-			srawi	r8,r8,31					; Get -1 if steal index still in range
-			slbie	r3							; Invalidate the in-use SLB entry
-			and		r2,r2,r8					; Reset steal index when it should wrap
-			isync								; 
-			
-			stw		r2,ppSegSteal(r12)			; Set the next slot to steal
-;
-;			We are now ready to stick the SLB entry in the SLB and mark it in use
-;
-
-hmsFreeSeg:	subi	r2,r7,1						; Adjust for skipped slb 0
-			rldimi	r4,r7,0,58					; Copy in the SLB entry selector
-			srd		r0,r0,r2					; Set bit mask for allocation
-			rldicl	r5,r5,0,15					; Clean out the unsupported bits
-			or		r6,r6,r0					; Turn on the allocation flag
-			
-			slbmte	r5,r4						; Make that SLB entry
-
-			std		r6,validSegs(r12)			; Mark as valid
-			mtmsrd	r11							; Restore the MSR
-			isync
-			blr									; Back to it...
-
-			.align	5
-
-hms32bit:
-			mfsprg	r12,1						; Get the current activation
-			lwz		r12,ACT_PER_PROC(r12)		; Get the per_proc block
-			rlwinm	r8,r8,0,8,31				; Clean up the VSID
-			rlwinm	r2,r4,4,28,31				; Isolate the segment we are setting
-			lis		r0,0x8000					; Set bit 0
-			rlwimi	r8,r9,28,1,3				; Insert the keys and N bit			
-			srw		r0,r0,r2					; Get bit corresponding to SR
-			addi	r7,r12,validSegs			; Point to the valid segment flags directly
-		
-			mtsrin	r8,r4						; Set the actual SR	
-			isync								; Need to make sure this is done
-		
-hmsrupt:	lwarx	r6,0,r7						; Get and reserve the valid segment flags
-			or		r6,r6,r0					; Show that SR is valid
-			stwcx.	r6,0,r7						; Set the valid SR flags
-			bne--	hmsrupt						; Had an interrupt, need to get flags again...
-
-			blr									; Back to it...
-
-
-;
-;			This routine invalidates a segment register.
-;
-
-			.align	5
-			.globl	EXT(hw_blow_seg)
-
-LEXT(hw_blow_seg)
-
- 			mfsprg	r10,2						; Get feature flags 
-			mtcrf	0x02,r10					; move pf64Bit and pfNoMSRirb to cr5 and 6
-		
-			rlwinm	r9,r4,0,0,3					; Save low segment address and make sure it is clean
-			
-			bf--	pf64Bitb,hbs32bit			; Skip out if 32-bit...
-			
-			li		r0,1						; Prepare to set bit 0 (also to clear EE)
-			mfmsr	r6							; Get current MSR
-			li		r2,MASK(MSR_IR)|MASK(MSR_DR)	; Get the translation bits
-			mtmsrd	r0,1						; Set only the EE bit to 0
-			rlwinm	r6,r6,0,MSR_EE_BIT,MSR_EE_BIT	; See if EE bit is on
-			mfmsr	r11							; Get the MSR right now, after disabling EE
-			andc	r2,r11,r2					; Turn off translation now
-			rldimi	r2,r0,63,0					; Get bit 64-bit turned on
-			or		r11,r11,r6					; Turn on the EE bit if it was on
-			mtmsrd	r2							; Make sure translation and EE are off and 64-bit is on
-			isync								; Hang out a bit
-
-			rldimi	r9,r3,32,0					; Insert the top part of the ESID
-			
-			slbie	r9							; Invalidate the associated SLB entry
-			
-			mtmsrd	r11							; Restore the MSR
-			isync
-			blr									; Back to it.
-
-			.align	5
-
-hbs32bit:
-			mfsprg	r12,1						; Get the current activation
-			lwz		r12,ACT_PER_PROC(r12)		; Get the per_proc block
-			addi	r7,r12,validSegs			; Point to the valid segment flags directly
-			lwarx	r4,0,r7						; Get and reserve the valid segment flags
-			rlwinm	r6,r9,4,28,31				; Convert segment to number
-			lis		r2,0x8000					; Set up a mask
-			srw		r2,r2,r6					; Make a mask
-			and.	r0,r4,r2					; See if this is even valid
-			li		r5,invalSpace				; Set the invalid address space VSID
-			beqlr								; Leave if already invalid...
-			
-			mtsrin	r5,r9						; Slam the segment register
-			isync								; Need to make sure this is done
-		
-hbsrupt:	andc	r4,r4,r2					; Clear the valid bit for this segment
-			stwcx.	r4,0,r7						; Set the valid SR flags
-			beqlr++								; Stored ok, no interrupt, time to leave...
-			
-			lwarx	r4,0,r7						; Get and reserve the valid segment flags again
-			b		hbsrupt						; Try again...
-
-;
-;			This routine invadates the entire pmap segment cache
-;
-;			Translation is on, interrupts may or may not be enabled.
-;
-
-			.align	5
-			.globl	EXT(invalidateSegs)
-
-LEXT(invalidateSegs)
-
-			la		r10,pmapCCtl(r3)			; Point to the segment cache control
-			eqv		r2,r2,r2					; Get all foxes
-			
-isInv:		lwarx	r4,0,r10					; Get the segment cache control value
-			rlwimi	r4,r2,0,0,15				; Slam in all invalid bits
-			rlwinm.	r0,r4,0,pmapCCtlLckb,pmapCCtlLckb	; Is it already locked?
-			bne--	isInv0						; Yes, try again...
-			
-			stwcx.	r4,0,r10					; Try to invalidate it
-			bne--	isInv						; Someone else just stuffed it...
-			blr									; Leave...
-			
-
-isInv0:		li		r4,lgKillResv				; Get reservation kill zone
-			stwcx.	r4,0,r4						; Kill reservation
-
-isInv1:		lwz		r4,pmapCCtl(r3)				; Get the segment cache control
-			rlwinm.	r0,r4,0,pmapCCtlLckb,pmapCCtlLckb	; Is it already locked?
-			bne--	isInv						; Nope...
-			b		isInv1						; Still locked do it again...
-			
-;
-;			This routine switches segment registers between kernel and user.
-;			We have some assumptions and rules:
-;				We are in the exception vectors
-;				pf64Bitb is set up
-;				R3 contains the MSR we going to
-;				We can not use R4, R13, R20, R21, R25, R26, R29
-;				R13 is the savearea
-;				R29 has the per_proc
-;
-;			We return R3 as 0 if we did not switch between kernel and user
-;			We also maintain and apply the user state key modifier used by VMM support;	
-;			If we go to the kernel it is set to 0, otherwise it follows the bit 
-;			in spcFlags.
-;
-
-			.align	5
-			.globl	EXT(switchSegs)
-
-LEXT(switchSegs)
-
-			lwz		r22,ppInvSeg(r29)			; Get the ppInvSeg (force invalidate) and ppCurSeg (user or kernel segments indicator)
-			lwz		r9,spcFlags(r29)			; Pick up the special user state flags
-			rlwinm	r2,r3,MSR_PR_BIT+1,31,31	; Isolate the problem mode bit
-			rlwinm	r3,r3,MSR_RI_BIT+1,31,31	; Isolate the recoverable interrupt bit
-			lis		r8,hi16(EXT(kernel_pmap_phys))	; Assume kernel
-			or		r2,r2,r3					; This will 1 if we will be using user segments
-			li		r3,0						; Get a selection mask
-			cmplw	r2,r22						; This will be EQ if same state and not ppInvSeg
-			ori		r8,r8,lo16(EXT(kernel_pmap_phys))	; Assume kernel (bottom of address)
-			sub		r3,r3,r2					; Form select mask - 0 if kernel, -1 if user
-			la		r19,ppUserPmap(r29)			; Point to the current user pmap
-
-;			The following line is an exercise of a generally unreadable but recompile-friendly programing practice
-			rlwinm	r30,r9,userProtKeybit+1+(63-sgcVSKeyUsr),sgcVSKeyUsr-32,sgcVSKeyUsr-32	; Isolate the user state protection key 
-
-			andc	r8,r8,r3					; Zero kernel pmap ptr if user, untouched otherwise
-			and		r19,r19,r3					; Zero user pmap ptr if kernel, untouched otherwise
-			and		r30,r30,r3					; Clear key modifier if kernel, leave otherwise
-			or		r8,r8,r19					; Get the pointer to the pmap we are using
-
-			beqlr								; We are staying in the same mode, do not touch segs...
-
-			lwz		r28,0(r8)					; Get top half of pmap address
-			lwz		r10,4(r8)					; Get bottom half
-
-			stw		r2,ppInvSeg(r29)			; Clear request for invalidate and save ppCurSeg
-			rlwinm	r28,r28,0,1,0				; Copy top to top
-			stw		r30,ppMapFlags(r29)			; Set the key modifier
-			rlwimi	r28,r10,0,0,31				; Insert bottom
-			
-			la		r10,pmapCCtl(r28)			; Point to the segment cache control
-			la		r9,pmapSegCache(r28)		; Point to the segment cache
-
-ssgLock:	lwarx	r15,0,r10					; Get and reserve the segment cache control
-			rlwinm.	r0,r15,0,pmapCCtlLckb,pmapCCtlLckb	; Someone have the lock?
-			ori		r16,r15,lo16(pmapCCtlLck)	; Set lock bit
-			bne--	ssgLock0					; Yup, this is in use...
-
-			stwcx.	r16,0,r10					; Try to set the lock
-			bne--	ssgLock						; Did we get contention?
-			
-			not		r11,r15						; Invert the invalids to valids
-			li		r17,0						; Set a mask for the SRs we are loading
-			isync								; Make sure we are all caught up
-
-			bf--	pf64Bitb,ssg32Enter			; If 32-bit, jump into it...
-		
-			li		r0,0						; Clear
-			slbia								; Trash all SLB entries (except for entry 0 that is)
-			li		r17,1						; Get SLB index to load (skip slb 0)
-			oris	r0,r0,0x8000				; Get set for a mask
-			b		ssg64Enter					; Start on a cache line...
-
-			.align	5
-
-ssgLock0:	li		r15,lgKillResv				; Killing field
-			stwcx.	r15,0,r15					; Kill reservation
-
-ssgLock1:	lwz		r15,pmapCCtl(r28)			; Get the segment cache controls
-			rlwinm.	r15,r15,0,pmapCCtlLckb,pmapCCtlLckb	; Someone have the lock?
-			beq++	ssgLock						; Yup, this is in use...
-			b		ssgLock1					; Nope, try again...
-;
-;			This is the 32-bit address space switch code.
-;			We take a reservation on the segment cache and walk through.
-;			For each entry, we load the specified entries and remember which
-;			we did with a mask.  Then, we figure out which segments should be
-;			invalid and then see which actually are.  Then we load those with the
-;			defined invalid VSID. 
-;			Afterwards, we unlock the segment cache.
-;
-
-			.align	5
-
-ssg32Enter:	cntlzw	r12,r11						; Find the next slot in use
-			cmplwi	r12,pmapSegCacheUse			; See if we are done
-			slwi	r14,r12,4					; Index to the cache slot
-			lis		r0,0x8000					; Get set for a mask
-			add		r14,r14,r9					; Point to the entry
-		
-			bge-	ssg32Done					; All done...
-		
-			lwz		r5,sgcESID+4(r14)			; Get the ESID part
-			srw		r2,r0,r12					; Form a mask for the one we are loading
-			lwz		r7,sgcVSID+4(r14)			; And get the VSID bottom
-
-			andc	r11,r11,r2					; Clear the bit
-			lwz		r6,sgcVSID(r14)				; And get the VSID top
-
-			rlwinm	r2,r5,4,28,31				; Change the segment number to a number
-
-			xor		r7,r7,r30					; Modify the key before we actually set it
-			srw		r0,r0,r2					; Get a mask for the SR we are loading
-			rlwinm	r8,r7,19,1,3				; Insert the keys and N bit			
-			or		r17,r17,r0					; Remember the segment
-			rlwimi	r8,r7,20,12,31				; Insert 4:23 the VSID
-			rlwimi	r8,r6,20,8,11				; Get the last nybble of the SR contents			
-
-			mtsrin	r8,r5						; Load the segment
-			b		ssg32Enter					; Go enter the next...
-			
-			.align	5
-			
-ssg32Done:	lwz		r16,validSegs(r29)			; Get the valid SRs flags
-			stw		r15,pmapCCtl(r28)			; Unlock the segment cache controls
-
-			lis		r0,0x8000					; Get set for a mask
-			li		r2,invalSpace				; Set the invalid address space VSID
-
-			nop									; Align loop
-			nop									; Align loop
-			andc	r16,r16,r17					; Get list of SRs that were valid before but not now
-			nop									; Align loop
-
-ssg32Inval:	cntlzw	r18,r16						; Get the first one to invalidate
-			cmplwi	r18,16						; Have we finished?
-			srw		r22,r0,r18					; Get the mask bit
-			rlwinm	r23,r18,28,0,3				; Get the segment register we need
-			andc	r16,r16,r22					; Get rid of the guy we just did
-			bge		ssg32Really					; Yes, we are really done now...
-
-			mtsrin	r2,r23						; Invalidate the SR
-			b		ssg32Inval					; Do the next...
-			
-			.align	5
-
-ssg32Really:
-			stw		r17,validSegs(r29)			; Set the valid SR flags
-			li		r3,1						; Set kernel/user transition
-			blr
-
-;
-;			This is the 64-bit address space switch code.
-;			First we blow away all of the SLB entries.
-;			Walk through,
-;			loading the SLB.  Afterwards, we release the cache lock
-;
-;			Note that because we have to treat SLBE 0 specially, we do not ever use it...
-;			Its a performance thing...
-;
-
-			.align	5
-
-ssg64Enter:	cntlzw	r12,r11						; Find the next slot in use
-			cmplwi	r12,pmapSegCacheUse			; See if we are done
-			slwi	r14,r12,4					; Index to the cache slot
-			srw		r16,r0,r12					; Form a mask for the one we are loading
-			add		r14,r14,r9					; Point to the entry
-			andc	r11,r11,r16					; Clear the bit
-			bge--	ssg64Done					; All done...
-
-			ld		r5,sgcESID(r14)				; Get the ESID part
-			ld		r6,sgcVSID(r14)				; And get the VSID part
-			oris	r5,r5,0x0800				; Turn on the valid bit
-			or		r5,r5,r17					; Insert the SLB slot
-			xor		r6,r6,r30					; Modify the key before we actually set it
-			addi	r17,r17,1					; Bump to the next slot
-			slbmte	r6,r5						; Make that SLB entry
-			b		ssg64Enter					; Go enter the next...
-			
-			.align	5
-			
-ssg64Done:	stw		r15,pmapCCtl(r28)			; Unlock the segment cache controls
-
-			eqv		r16,r16,r16					; Load up with all foxes
-			subfic	r17,r17,64					; Get the number of 1 bits we need
-
-			sld		r16,r16,r17					; Get a mask for the used SLB entries
-			li		r3,1						; Set kernel/user transition
-			std		r16,validSegs(r29)			; Set the valid SR flags
-			blr
-
-;
-;			mapSetUp - this function sets initial state for all mapping functions.
-;			We turn off all translations (physical), disable interruptions, and 
-;			enter 64-bit mode if applicable.
-;
-;			We also return the original MSR in r11, the feature flags in R12,
-;			and CR6 set up so we can do easy branches for 64-bit
-;			hw_clear_maps assumes r10, r9 will not be trashed.
-;
-
-			.align	5
-			.globl	EXT(mapSetUp)
-
-LEXT(mapSetUp)
-
-			lis		r0,hi16(MASK(MSR_VEC))		; Get the vector mask
- 			mfsprg	r12,2						; Get feature flags 
- 			ori		r0,r0,lo16(MASK(MSR_FP))	; Get the FP as well
-			mtcrf	0x04,r12					; move pf64Bit and pfNoMSRirb to cr5 and 6
-			mfmsr	r11							; Save the MSR 
-			mtcrf	0x02,r12					; move pf64Bit and pfNoMSRirb to cr5 and 6
-			andc	r11,r11,r0					; Clear VEC and FP for good
-			ori		r0,r0,lo16(MASK(MSR_EE)|MASK(MSR_DR)|MASK(MSR_IR))	; Get rid of EE, IR, and DR
-			li		r2,1						; Prepare for 64 bit
-			andc	r0,r11,r0					; Clear the rest
-			bt		pfNoMSRirb,msuNoMSR			; No MSR...
-			bt++	pf64Bitb,msuSF				; skip if 64-bit (only they take the hint)
-
-			mtmsr	r0							; Translation and all off
-			isync								; Toss prefetch
-			blr									; Return...
-
-			.align	5
-
-msuSF:		rldimi	r0,r2,63,MSR_SF_BIT			; set SF bit (bit 0)
-			mtmsrd	r0							; set 64-bit mode, turn off EE, DR, and IR
-			isync								; synchronize
-			blr									; Return...
-
-			.align	5
-
-msuNoMSR:	mr		r2,r3						; Save R3 across call
-			mr		r3,r0						; Get the new MSR value
-			li		r0,loadMSR					; Get the MSR setter SC
-			sc									; Set it
-			mr		r3,r2						; Restore R3
-			blr									; Go back all set up...
-			
-
-;
-;			Guest shadow assist -- remove all guest mappings
-;
-;			Remove all mappings for a guest pmap from the shadow hash table.
-;
-;			Parameters:
-;				r3 : address of pmap, 32-bit kernel virtual address
-;
-;			Non-volatile register usage:
-;				r24 : host pmap's physical address
-;				r25 : VMM extension block's physical address
-;				r26 : physent address
-;				r27 : guest pmap's space ID number
-;				r28 : current hash table page index
-;				r29 : guest pmap's physical address
-;				r30 : saved msr image
-;				r31 : current mapping
-;
-			.align	5
-			.globl	EXT(hw_rem_all_gv)
-			
-LEXT(hw_rem_all_gv)
-
-#define graStackSize ((31-24+1)*4)+4
-			stwu	r1,-(FM_ALIGN(graStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(graStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-												
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-
-			bt++	pf64Bitb,gra64Salt			; Test for 64-bit machine
-			lwz		r25,pmapVmmExtPhys+4(r3)	; r25 <- VMM pmap extension block paddr
-			lwz		r9,pmapvr+4(r3)				; Get 32-bit virt<->real conversion salt
-			lwz		r24,vmxHostPmapPhys+4(r11)	; r24 <- host pmap's paddr
-			b		graStart					; Get to it			
-gra64Salt:	ld		r25,pmapVmmExtPhys(r3)		; r25 <- VMM pmap extension block paddr
-			ld		r9,pmapvr(r3)				; Get 64-bit virt<->real conversion salt
-			ld		r24,vmxHostPmapPhys(r11)	; r24 <- host pmap's paddr
-graStart:	bl		EXT(mapSetUp)				; Disable 'rupts, translation, enter 64-bit mode
-			xor		r29,r3,r9					; Convert pmap_t virt->real
-			mr		r30,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r24)			; r3 <- host pmap's search lock
-			bl		sxlkExclusive				; Get lock exclusive
-			
-			lwz		r3,vxsGra(r25)				; Get remove all count
-			addi	r3,r3,1						; Increment remove all count
-			stw		r3,vxsGra(r25)				; Update remove all count
-
-			li		r28,0						; r28 <- first hash page table index to search
-			lwz		r27,pmapSpace(r29)			; r27 <- guest pmap's space ID number
-graPgLoop:	
-			la		r31,VMX_HPIDX_OFFSET(r25)	; Get base of hash page physical index
-			rlwinm	r11,r28,GV_PGIDX_SZ_LG2,GV_HPAGE_MASK
-												; Convert page index into page physical index offset
-			add		r31,r31,r11					; Calculate page physical index entry address
-			bt++	pf64Bitb,gra64Page			; Separate handling for 64-bit
-			lwz		r31,4(r31)					; r31 <- first slot in hash table page to examine
-			b		graLoop						; Examine all slots in this page
-gra64Page:	ld		r31,0(r31)					; r31 <- first slot in hash table page to examine
-			b		graLoop						; Examine all slots in this page
-
-			.align	5
-graLoop:	lwz		r3,mpFlags(r31)				; Get mapping's flags
-			lhz		r4,mpSpace(r31)				; Get mapping's space ID number
-			rlwinm	r6,r3,0,mpgFree				; Isolate guest free mapping flag
-			xor		r4,r4,r27					; Compare space ID number
-			or.		r0,r6,r4					; cr0_eq <- !free && space id match
-			bne		graMiss						; Not one of ours, skip it
-			
-			lwz		r11,vxsGraHits(r25)			; Get remove hit count
-			addi	r11,r11,1					; Increment remove hit count
-			stw		r11,vxsGraHits(r25)			; Update remove hit count
-
-			rlwinm.	r0,r3,0,mpgDormant			; Is this entry dormant?
-			bne		graRemPhys					; Yes, nothing to disconnect
-			
-			lwz		r11,vxsGraActive(r25)		; Get remove active count
-			addi	r11,r11,1					; Increment remove hit count
-			stw		r11,vxsGraActive(r25)		; Update remove hit count
-
-			bt++	pf64Bitb,graDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		graFreePTE					; Join 64-bit path to release the PTE			
-graDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-graFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	graRemPhys					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx doesn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-			
-graRemPhys:
-			lwz		r3,mpPAddr(r31)				; r3 <- physical 4K-page number
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r26,r3						; Got lock on our physent?
-			beq--	graBadPLock					; No, time to bail out
-
-			crset	cr1_eq						; cr1_eq <- previous link is the anchor
-			bt++	pf64Bitb,graRemove64		; Use 64-bit version on 64-bit machine
-			la		r11,ppLink+4(r26)			; Point to chain anchor
-			lwz		r9,ppLink+4(r26)			; Get chain anchor
-			rlwinm.	r9,r9,0,~ppFlags			; Remove flags, yielding 32-bit physical chain pointer
-
-graRemLoop:	beq-	graRemoveMiss				; End of chain, this is not good
-			cmplw	r9,r31						; Is this the mapping to remove?
-			lwz		r8,mpAlias+4(r9)			; Get forward chain pointer
-			bne		graRemNext					; No, chain onward
-			bt		cr1_eq,graRemRetry			; Mapping to remove is chained from anchor
-			stw		r8,0(r11)					; Unchain gpv->phys mapping
-			b		graRemoved					; Exit loop
-graRemRetry:
-			lwarx	r0,0,r11					; Get previous link
-			rlwimi	r0,r8,0,~ppFlags			; Insert new forward pointer whilst preserving flags
-			stwcx.	r0,0,r11					; Update previous link
-			bne-	graRemRetry					; Lost reservation, retry
-			b		graRemoved					; Good work, let's get outta here
-			
-graRemNext:	la		r11,mpAlias+4(r9)			; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		graRemLoop					; Carry on
-
-graRemove64:
-			li		r7,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r7,r7,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			la		r11,ppLink(r26)				; Point to chain anchor
-			ld		r9,ppLink(r26)				; Get chain anchor
-			andc.	r9,r9,r7					; Remove flags, yielding 64-bit physical chain pointer
-graRem64Lp:	beq--	graRemoveMiss				; End of chain, this is not good
-			cmpld	r9,r31						; Is this the mapping to remove?
-			ld		r8,mpAlias(r9)				; Get forward chain pinter
-			bne		graRem64Nxt					; Not mapping to remove, chain on, dude
-			bt		cr1_eq,graRem64Rt			; Mapping to remove is chained from anchor
-			std		r8,0(r11)					; Unchain gpv->phys mapping
-			b		graRemoved					; Exit loop
-graRem64Rt:	ldarx	r0,0,r11					; Get previous link
-			and		r0,r0,r7					; Get flags
-			or		r0,r0,r8					; Insert new forward pointer
-			stdcx.	r0,0,r11					; Slam it back in
-			bne--	graRem64Rt					; Lost reservation, retry
-			b		graRemoved					; Good work, let's go home
-		
-graRem64Nxt:
-			la		r11,mpAlias(r9)				; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		graRem64Lp					; Carry on
-
-graRemoved:
-			mr		r3,r26						; r3 <- physent's address
-			bl		mapPhysUnlock				; Unlock the physent (and its chain of mappings)
-			
-			lwz		r3,mpFlags(r31)				; Get mapping's flags
-			rlwinm	r3,r3,0,~mpgFlags			; Clear all guest flags
-			ori		r3,r3,mpgFree				; Mark mapping free
-			stw		r3,mpFlags(r31)				; Update flags
-			
-graMiss:	addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping
-			rlwinm.	r0,r31,0,GV_PAGE_MASK		; End of hash table page?
-			bne		graLoop						; No, examine next slot
-			addi	r28,r28,1					; Increment hash table page index
-			cmplwi	r28,GV_HPAGES				; End of hash table?
-			bne		graPgLoop					; Examine next hash table page
-			
-			la		r3,pmapSXlk(r24)			; r3 <- host pmap's search lock
-			bl		sxlkUnlock					; Release host pmap's search lock
-			
-			bt++	pf64Bitb,graRtn64			; Handle 64-bit separately
-			mtmsr	r30							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		graPopFrame					; Nothing to do now but pop a frame and return
-graRtn64:	mtmsrd	r30							; Restore 'rupts, translation, 32-bit mode
-graPopFrame:		
-			lwz		r0,(FM_ALIGN(graStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-graBadPLock:
-graRemoveMiss:
-			lis		r0,hi16(Choke)				; Dmitri, you know how we've always talked about the
-			ori		r0,r0,lo16(Choke)			;  possibility of something going wrong with the bomb?
-			li		r3,failMapping				; The BOMB, Dmitri.
-			sc									; The hydrogen bomb.
-
-
-;
-;			Guest shadow assist -- remove local guest mappings
-;
-;			Remove local mappings for a guest pmap from the shadow hash table.
-;
-;			Parameters:
-;				r3 : address of guest pmap, 32-bit kernel virtual address
-;
-;			Non-volatile register usage:
-;				r20 : current active map word's physical address
-;				r21 : current hash table page address
-;				r22 : updated active map word in process
-;				r23 : active map word in process
-;				r24 : host pmap's physical address
-;				r25 : VMM extension block's physical address
-;				r26 : physent address
-;				r27 : guest pmap's space ID number
-;				r28 : current active map index
-;				r29 : guest pmap's physical address
-;				r30 : saved msr image
-;				r31 : current mapping
-;
-			.align	5
-			.globl	EXT(hw_rem_local_gv)
-			
-LEXT(hw_rem_local_gv)
-
-#define grlStackSize ((31-20+1)*4)+4
-			stwu	r1,-(FM_ALIGN(grlStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(grlStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-			stw		r23,FM_ARG0+0x20(r1)		; Save non-volatile r23
-			stw		r22,FM_ARG0+0x24(r1)		; Save non-volatile r22
-			stw		r21,FM_ARG0+0x28(r1)		; Save non-volatile r21
-			stw		r20,FM_ARG0+0x2C(r1)		; Save non-volatile r20
-												
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-
-			bt++	pf64Bitb,grl64Salt			; Test for 64-bit machine
-			lwz		r25,pmapVmmExtPhys+4(r3)	; r25 <- VMM pmap extension block paddr
-			lwz		r9,pmapvr+4(r3)				; Get 32-bit virt<->real conversion salt
-			lwz		r24,vmxHostPmapPhys+4(r11)	; r24 <- host pmap's paddr
-			b		grlStart					; Get to it			
-grl64Salt:	ld		r25,pmapVmmExtPhys(r3)		; r25 <- VMM pmap extension block paddr
-			ld		r9,pmapvr(r3)				; Get 64-bit virt<->real conversion salt
-			ld		r24,vmxHostPmapPhys(r11)	; r24 <- host pmap's paddr
-
-grlStart:	bl		EXT(mapSetUp)				; Disable 'rupts, translation, enter 64-bit mode
-			xor		r29,r3,r9					; Convert pmap_t virt->real
-			mr		r30,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r24)			; r3 <- host pmap's search lock
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r28,0						; r28 <- index of first active map word to search
-			lwz		r27,pmapSpace(r29)			; r27 <- guest pmap's space ID number
-			b		grlMap1st					; Examine first map word
-
-			.align	5
-grlNextMap:	stw		r22,0(r21)					; Save updated map word
-			addi	r28,r28,1					; Increment map word index
-			cmplwi	r28,GV_MAP_WORDS			; See if we're done
-			beq		grlDone						; Yup, let's get outta here
-
-grlMap1st:	la		r20,VMX_ACTMAP_OFFSET(r25)	; Get base of active map word array
-			rlwinm	r11,r28,GV_MAPWD_SZ_LG2,GV_MAP_MASK
-												; Convert map index into map index offset
-			add		r20,r20,r11					; Calculate map array element address
-			lwz		r22,0(r20)					; Get active map word at index
-			mr.		r23,r22						; Any active mappings indicated?
-			beq		grlNextMap					; Nope, check next word
-			
-			la		r21,VMX_HPIDX_OFFSET(r25)	; Get base of hash page physical index
-			rlwinm	r11,r28,GV_MAP_SHIFT,GV_HPAGE_MASK
-												; Extract page index from map word index and convert
-												;  into page physical index offset
-			add		r21,r21,r11					; Calculate page physical index entry address
-			bt++	pf64Bitb,grl64Page			; Separate handling for 64-bit
-			lwz		r21,4(r21)					; Get selected hash table page's address
-			b		grlLoop						; Examine all slots in this page
-grl64Page:	ld		r21,0(r21)					; Get selected hash table page's address
-			b		grlLoop						; Examine all slots in this page
-			
-			.align	5
-grlLoop:	cntlzw	r11,r23						; Get next active bit lit in map word
-			cmplwi	r11,32						; Any active mappings left in this word?
-			lis		r12,0x8000					; Prepare mask to reset bit
-			srw		r12,r12,r11					; Position mask bit
-			andc	r23,r23,r12					; Reset lit bit
-			beq		grlNextMap					; No bits lit, examine next map word						
-
-			slwi	r31,r11,GV_SLOT_SZ_LG2		; Get slot offset in slot band from lit bit number
-			rlwinm	r31,r28,GV_BAND_SHIFT,GV_BAND_MASK
-												; Extract slot band number from index and insert
-			add		r31,r31,r21					; Add hash page address yielding mapping slot address
-
-			lwz		r3,mpFlags(r31)				; Get mapping's flags
-			lhz		r4,mpSpace(r31)				; Get mapping's space ID number
-			rlwinm	r5,r3,0,mpgGlobal			; Extract global bit
-			xor		r4,r4,r27					; Compare space ID number
-			or.		r4,r4,r5					; (space id miss || global)
-			bne		grlLoop						; Not one of ours, skip it
-			andc	r22,r22,r12					; Reset active bit corresponding to this mapping
-			ori		r3,r3,mpgDormant			; Mark entry dormant
-			stw		r3,mpFlags(r31)				; Update mapping's flags
-
-			bt++	pf64Bitb,grlDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		grlFreePTE					; Join 64-bit path to release the PTE			
-grlDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-grlFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	grlLoop						; No valid PTE, we're done with this mapping
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx doesn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-			b		grlLoop						; On to next active mapping in this map word
-						
-grlDone:	la		r3,pmapSXlk(r24)			; r3 <- host pmap's search lock
-			bl		sxlkUnlock					; Release host pmap's search lock
-			
-			bt++	pf64Bitb,grlRtn64			; Handle 64-bit separately
-			mtmsr	r30							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		grlPopFrame					; Nothing to do now but pop a frame and return
-grlRtn64:	mtmsrd	r30							; Restore 'rupts, translation, 32-bit mode
-grlPopFrame:		
-			lwz		r0,(FM_ALIGN(grlStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r23,FM_ARG0+0x20(r1)		; Restore non-volatile r23
-			lwz		r22,FM_ARG0+0x24(r1)		; Restore non-volatile r22
-			lwz		r21,FM_ARG0+0x28(r1)		; Restore non-volatile r21
-			lwz		r20,FM_ARG0+0x2C(r1)		; Restore non-volatile r20
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-
-;
-;			Guest shadow assist -- resume a guest mapping
-;
-;			Locates the specified dormant mapping, and if it exists validates it and makes it
-;			active.
-;
-;			Parameters:
-;				r3 : address of host pmap, 32-bit kernel virtual address
-;				r4 : address of guest pmap, 32-bit kernel virtual address
-;				r5 : host virtual address, high-order 32 bits
-;				r6 : host virtual address,  low-order 32 bits
-;				r7 : guest virtual address, high-order 32 bits
-;				r8 : guest virtual address,  low-order 32 bits
-;				r9 : guest mapping protection code
-;
-;			Non-volatile register usage:
-;				r23 : VMM extension block's physical address
-;				r24 : physent physical address
-;				r25 : caller's msr image from mapSetUp
-;				r26 : guest mapping protection code
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : host virtual address
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-			.align	5
-			.globl	EXT(hw_res_map_gv)
-			
-LEXT(hw_res_map_gv)
-
-#define grsStackSize ((31-23+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(grsStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(grsStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-			stw		r23,FM_ARG0+0x20(r1)		; Save non-volatile r23
-
-			rlwinm	r29,r6,0,0xFFFFF000			; Clean up low-order 32 bits of host vaddr
-			rlwinm	r30,r8,0,0xFFFFF000			; Clean up low-order 32 bits of guest vaddr
-			mr		r26,r9						; Copy guest mapping protection code
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r4)			; r9 <- guest space ID number
-			bt++	pf64Bitb,grs64Salt			; Handle 64-bit machine separately
-			lwz		r23,pmapVmmExtPhys+4(r3)	; r23 <- VMM pmap extension block paddr
-			lwz		r27,pmapvr+4(r3)			; Get 32-bit virt<->real host pmap conversion salt
-			lwz		r28,pmapvr+4(r4)			; Get 32-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		grsStart					; Get to it			
-
-grs64Salt:	rldimi	r29,r5,32,0					; Insert high-order 32 bits of 64-bit host vaddr			
-			rldimi	r30,r7,32,0					; Insert high-order 32 bits of 64-bit guest vaddr			
-			ld		r23,pmapVmmExtPhys(r3)		; r23 <- VMM pmap extension block paddr
-			ld		r27,pmapvr(r3)				; Get 64-bit virt<->real host pmap conversion salt
-			ld		r28,pmapvr(r4)				; Get 64-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-grsStart:	xor		r27,r3,r27					; Convert host pmap_t virt->real
-			xor		r28,r4,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r25,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,grs64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		grs32SrchLp					; Let the search begin!
-			
-			.align	5
-grs32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		grsSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	grs32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && space match && virtual addr match
-			beq		grsSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		grsSrchMiss					; No joy in our hash group
-			
-grs64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		grs64SrchLp					; Let the search begin!
-			
-			.align	5
-grs64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		grsSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	grs64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && space match && virtual addr match
-			bne		grsSrchMiss					; No joy in our hash group
-			
-grsSrchHit:
-			rlwinm.	r0,r6,0,mpgDormant			; Is the mapping dormant?
-			bne		grsFindHost					; Yes, nothing to disconnect
-
-			bt++	pf64Bitb,grsDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		grsFreePTE					; Join 64-bit path to release the PTE			
-grsDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-grsFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	grsFindHost					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx didn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-
-grsFindHost:
-
-// We now have a dormant guest mapping that matches our space id and virtual address. Our next
-// step is to locate the host mapping that completes the guest mapping's connection to a physical
-// frame. The guest and host mappings must connect to the same physical frame, so they must both
-// be chained on the same physent. We search the physent chain for a host mapping matching our
-// host's space id and the host virtual address. If we succeed, we know that the entire chain
-// of mappings (guest virtual->host virtual->physical) is valid, so the dormant mapping can be
-// resumed. If we fail to find the specified host virtual->physical mapping, it is because the
-// host virtual or physical address has changed since the guest mapping was suspended, so it
-// is no longer valid and cannot be resumed -- we therefore delete the guest mappping and tell
-// our caller that it will have to take its long path, translating the host virtual address
-// through the host's skiplist and installing a new guest mapping.
-
-			lwz		r3,mpPAddr(r31)				; r3 <- physical 4K-page number
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r24,r3						; Got lock on our physent?
-			beq--	grsBadPLock					; No, time to bail out
-			
-			bt++	pf64Bitb,grsPFnd64			; 64-bit version of physent chain search
-			
-			lwz		r9,ppLink+4(r24)			; Get first mapping on physent
-			lwz		r6,pmapSpace(r27)			; Get host pmap's space id number
-			rlwinm	r9,r9,0,~ppFlags			; Be-gone, unsightly flags
-grsPELoop:	mr.		r12,r9						; Got a mapping to look at?
-			beq-	grsPEMiss					; Nope, we've missed hva->phys mapping
-			lwz		r7,mpFlags(r12)				; Get mapping's flags
-			lhz		r4,mpSpace(r12)				; Get mapping's space id number
-			lwz		r5,mpVAddr+4(r12)			; Get mapping's virtual address
-			lwz		r9,mpAlias+4(r12)			; Next mapping in physent alias chain
-			
-			rlwinm	r0,r7,0,mpType				; Isolate mapping's type
-			rlwinm	r5,r5,0,~mpHWFlags			; Bye-bye unsightly flags
-			xori	r0,r0,mpNormal				; Normal mapping?
-			xor		r4,r4,r6					; Compare w/ host space id number
-			xor		r5,r5,r29					; Compare w/ host virtual address
-			or		r0,r0,r4					; r0 <- (wrong type || !space id)
-			or.		r0,r0,r5					; cr0_eq <- (right type && space id hit && hva hit)
-			beq		grsPEHit					; Hit
-			b		grsPELoop					; Iterate
-			
-grsPFnd64:	li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			ld		r9,ppLink(r24)				; Get first mapping on physent
-			lwz		r6,pmapSpace(r27)			; Get pmap's space id number
-			andc	r9,r9,r0					; Cleanup mapping pointer
-grsPELp64:	mr.		r12,r9						; Got a mapping to look at?
-			beq--	grsPEMiss					; Nope, we've missed hva->phys mapping
-			lwz		r7,mpFlags(r12)				; Get mapping's flags
-			lhz		r4,mpSpace(r12)				; Get mapping's space id number
-			ld		r5,mpVAddr(r12)				; Get mapping's virtual address
-			ld		r9,mpAlias(r12)				; Next mapping physent alias chain
-			rlwinm	r0,r7,0,mpType				; Isolate mapping's type
-			rldicr	r5,r5,0,mpHWFlagsb-1		; Bye-bye unsightly flags
-			xori	r0,r0,mpNormal				; Normal mapping?
-			xor		r4,r4,r6					; Compare w/ host space id number
-			xor		r5,r5,r29					; Compare w/ host virtual address
-			or		r0,r0,r4					; r0 <- (wrong type || !space id)
-			or.		r0,r0,r5					; cr0_eq <- (right type && space id hit && hva hit)
-			beq		grsPEHit					; Hit
-			b		grsPELp64					; Iterate
-			
-grsPEHit:	lwz		r0,mpVAddr+4(r31)			; Get va byte containing protection bits
-			rlwimi	r0,r26,0,mpPP				; Insert new protection bits
-			stw		r0,mpVAddr+4(r31)			; Write 'em back
-
-			eieio								; Ensure previous mapping updates are visible
-			lwz		r0,mpFlags(r31)				; Get flags
-			rlwinm	r0,r0,0,~mpgDormant			; Turn off dormant flag
-			stw		r0,mpFlags(r31)				; Set updated flags, entry is now valid
-			
-			li		r31,mapRtOK					; Indicate success
-			b		grsRelPhy					; Exit through physent lock release
-
-grsPEMiss:	crset	cr1_eq						; cr1_eq <- previous link is the anchor
-			bt++	pf64Bitb,grsRemove64		; Use 64-bit version on 64-bit machine
-			la		r11,ppLink+4(r24)			; Point to chain anchor
-			lwz		r9,ppLink+4(r24)			; Get chain anchor
-			rlwinm.	r9,r9,0,~ppFlags			; Remove flags, yielding 32-bit physical chain pointer
-grsRemLoop:	beq-	grsPEMissMiss				; End of chain, this is not good
-			cmplw	r9,r31						; Is this the mapping to remove?
-			lwz		r8,mpAlias+4(r9)			; Get forward chain pointer
-			bne		grsRemNext					; No, chain onward
-			bt		cr1_eq,grsRemRetry			; Mapping to remove is chained from anchor
-			stw		r8,0(r11)					; Unchain gpv->phys mapping
-			b		grsDelete					; Finish deleting mapping
-grsRemRetry:
-			lwarx	r0,0,r11					; Get previous link
-			rlwimi	r0,r8,0,~ppFlags			; Insert new forward pointer whilst preserving flags
-			stwcx.	r0,0,r11					; Update previous link
-			bne-	grsRemRetry					; Lost reservation, retry
-			b		grsDelete					; Finish deleting mapping
-			
-			.align	5
-grsRemNext:	la		r11,mpAlias+4(r9)			; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		grsRemLoop					; Carry on
-
-grsRemove64:
-			li		r7,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r7,r7,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			la		r11,ppLink(r24)				; Point to chain anchor
-			ld		r9,ppLink(r24)				; Get chain anchor
-			andc.	r9,r9,r7					; Remove flags, yielding 64-bit physical chain pointer
-grsRem64Lp:	beq--	grsPEMissMiss				; End of chain, this is not good
-			cmpld	r9,r31						; Is this the mapping to remove?
-			ld		r8,mpAlias(r9)				; Get forward chain pinter
-			bne		grsRem64Nxt					; Not mapping to remove, chain on, dude
-			bt		cr1_eq,grsRem64Rt			; Mapping to remove is chained from anchor
-			std		r8,0(r11)					; Unchain gpv->phys mapping
-			b		grsDelete					; Finish deleting mapping
-grsRem64Rt:	ldarx	r0,0,r11					; Get previous link
-			and		r0,r0,r7					; Get flags
-			or		r0,r0,r8					; Insert new forward pointer
-			stdcx.	r0,0,r11					; Slam it back in
-			bne--	grsRem64Rt					; Lost reservation, retry
-			b		grsDelete					; Finish deleting mapping
-
-			.align	5		
-grsRem64Nxt:
-			la		r11,mpAlias(r9)				; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		grsRem64Lp					; Carry on
-			
-grsDelete:
-			lwz		r3,mpFlags(r31)				; Get mapping's flags
-			rlwinm	r3,r3,0,~mpgFlags			; Clear all guest flags
-			ori		r3,r3,mpgFree				; Mark mapping free
-			stw		r3,mpFlags(r31)				; Update flags
-
-			li		r31,mapRtNotFnd				; Didn't succeed
-
-grsRelPhy:	mr		r3,r24						; r3 <- physent addr
-			bl		mapPhysUnlock				; Unlock physent chain
-			
-grsRelPmap:	la		r3,pmapSXlk(r27)			; r3 <- host pmap search lock phys addr
-			bl		sxlkUnlock					; Release host pmap search lock
-			
-grsRtn:		mr		r3,r31						; r3 <- result code
-			bt++	pf64Bitb,grsRtn64			; Handle 64-bit separately
-			mtmsr	r25							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		grsPopFrame					; Nothing to do now but pop a frame and return
-grsRtn64:	mtmsrd	r25							; Restore 'rupts, translation, 32-bit mode
-grsPopFrame:		
-			lwz		r0,(FM_ALIGN(grsStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r23,FM_ARG0+0x20(r1)		; Restore non-volatile r23
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-			.align	5
-grsSrchMiss:
-			li		r31,mapRtNotFnd				; Could not locate requested mapping
-			b		grsRelPmap					; Exit through host pmap search lock release
-
-grsBadPLock:
-grsPEMissMiss:
-			lis		r0,hi16(Choke)				; Dmitri, you know how we've always talked about the
-			ori		r0,r0,lo16(Choke)			;  possibility of something going wrong with the bomb?
-			li		r3,failMapping				; The BOMB, Dmitri.
-			sc									; The hydrogen bomb.
-
-
-;
-;			Guest shadow assist -- add a guest mapping
-;
-;			Adds a guest mapping.
-;
-;			Parameters:
-;				r3 : address of host pmap, 32-bit kernel virtual address
-;				r4 : address of guest pmap, 32-bit kernel virtual address
-;				r5 : guest virtual address, high-order 32 bits
-;				r6 : guest virtual address,  low-order 32 bits (with mpHWFlags)
-;				r7 : new mapping's flags
-;				r8 : physical address, 32-bit page number
-;
-;			Non-volatile register usage:
-;				r22 : hash group's physical address
-;				r23 : VMM extension block's physical address
-;				r24 : mapping's flags
-;				r25 : caller's msr image from mapSetUp
-;				r26 : physent physical address
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : physical address, 32-bit 4k-page number
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-			
-			.align	5
-			.globl	EXT(hw_add_map_gv)
-			
-			
-LEXT(hw_add_map_gv)
-
-#define gadStackSize ((31-22+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gadStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gadStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-			stw		r23,FM_ARG0+0x20(r1)		; Save non-volatile r23
-			stw		r22,FM_ARG0+0x24(r1)		; Save non-volatile r22
-
-			rlwinm	r30,r5,0,1,0				; Get high-order 32 bits of guest vaddr
-			rlwimi	r30,r6,0,0,31				; Get  low-order 32 bits of guest vaddr
-			mr		r24,r7						; Copy guest mapping's flags
-			mr		r29,r8						; Copy target frame's physical address
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r4)			; r9 <- guest space ID number
-			bt++	pf64Bitb,gad64Salt			; Test for 64-bit machine
-			lwz		r23,pmapVmmExtPhys+4(r3)	; r23 <- VMM pmap extension block paddr
-			lwz		r27,pmapvr+4(r3)			; Get 32-bit virt<->real host pmap conversion salt
-			lwz		r28,pmapvr+4(r4)			; Get 32-bit virt<->real guest pmap conversion salt
-			la		r22,VMX_HPIDX_OFFSET(r11)	; r22 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r22,r22,r10					; r22 <- hash page index entry
-			lwz		r22,4(r22)					; r22 <- hash page paddr
-			rlwimi	r22,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r22 <- hash group paddr
-			b		gadStart					; Get to it			
-
-gad64Salt:	ld		r23,pmapVmmExtPhys(r3)		; r23 <- VMM pmap extension block paddr
-			ld		r27,pmapvr(r3)				; Get 64-bit virt<->real host pmap conversion salt
-			ld		r28,pmapvr(r4)				; Get 64-bit virt<->real guest pmap conversion salt
-			la		r22,VMX_HPIDX_OFFSET(r11)	; r22 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r22,r22,r10					; r22 <- hash page index entry
-			ld		r22,0(r22)					; r22 <- hash page paddr
-			insrdi	r22,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r22 <- hash group paddr
-
-gadStart:	xor		r27,r3,r27					; Convert host pmap_t virt->real
-			xor		r28,r4,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r25,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exlusive
-
-			mr		r31,r22						; Prepare to search this group
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gad64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			clrrwi	r12,r30,12					; r12 <- virtual address we're searching for
-			b		gad32SrchLp					; Let the search begin!
-			
-			.align	5
-gad32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && space match)
-			xor		r8,r8,r12					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		gadRelPmap					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gad32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && && space match)
-			xor		r5,r5,r12					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- free && space match && virtual addr match
-			beq		gadRelPmap					; Join common path on hit (r31 points to guest mapping)
-			b		gadScan						; No joy in our hash group
-			
-gad64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			clrrdi	r12,r30,12					; r12 <- virtual address we're searching for
-			b		gad64SrchLp					; Let the search begin!
-			
-			.align	5
-gad64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && space match)
-			xor		r8,r8,r12					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && space match && virtual addr match
-			beq		gadRelPmap					; Hit, let upper-level redrive sort it out
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gad64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			rlwinm	r11,r6,0,mpgFree			; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && && space match)
-			xor		r5,r5,r12					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && space match && virtual addr match
-			bne		gadScan						; No joy in our hash group
-			b		gadRelPmap					; Hit, let upper-level redrive sort it out
-			
-gadScan:	lbz		r12,mpgCursor(r22)			; Get group's cursor
-			rlwinm	r12,r12,GV_SLOT_SZ_LG2,(GV_SLOT_MASK << GV_SLOT_SZ_LG2)
-												; Prepare to address slot at cursor
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			or		r2,r22,r12					; r2 <- 1st mapping to search
-			lwz		r3,mpFlags(r2)				; r3 <- 1st mapping slot's flags
-			li		r11,0						; No dormant entries found yet
-			b		gadScanLoop					; Let the search begin!
-			
-			.align	5
-gadScanLoop:
-			addi	r12,r12,GV_SLOT_SZ			; Calculate next slot number to search
-			rlwinm	r12,r12,0,(GV_SLOT_MASK << GV_SLOT_SZ_LG2)
-												; Trim off any carry, wrapping into slot number range
-			mr		r31,r2						; r31 <- current mapping's address
-			or		r2,r22,r12					; r2 <- next mapping to search
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags(r2)				; r3 <- next mapping slot's flags
-			rlwinm.	r0,r6,0,mpgFree				; Test free flag
-			bne		gadFillMap					; Join common path on hit (r31 points to free mapping)
-			rlwinm	r0,r6,0,mpgDormant			; Dormant entry?
-			xori	r0,r0,mpgDormant			; Invert dormant flag
-			or.		r0,r0,r11					; Skip all but the first dormant entry we see
-			bne		gadNotDorm					; Not dormant or we've already seen one
-			mr		r11,r31						; We'll use this dormant entry if we don't find a free one first
-gadNotDorm:	bdnz	gadScanLoop					; Iterate
-
-			mr		r31,r2						; r31 <- final mapping's address
-			rlwinm.	r0,r6,0,mpgFree				; Test free flag in final mapping
-			bne		gadFillMap					; Join common path on hit (r31 points to dormant mapping)
-			rlwinm	r0,r6,0,mpgDormant			; Dormant entry?
-			xori	r0,r0,mpgDormant			; Invert dormant flag
-			or.		r0,r0,r11					; Skip all but the first dormant entry we see
-			bne		gadCkDormant				; Not dormant or we've already seen one
-			mr		r11,r31						; We'll use this dormant entry if we don't find a free one first
-
-gadCkDormant:
-			mr.		r31,r11						; Get dormant mapping, if any, and test
-			bne		gadUpCursor					; Go update the cursor, we'll take the dormant entry
-			
-gadSteal:
-			lbz		r12,mpgCursor(r22)			; Get group's cursor
-			rlwinm	r12,r12,GV_SLOT_SZ_LG2,(GV_SLOT_MASK << GV_SLOT_SZ_LG2)
-												; Prepare to address slot at cursor
-			or		r31,r22,r12					; r31 <- address of mapping to steal
-
-			bt++	pf64Bitb,gadDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		gadFreePTE					; Join 64-bit path to release the PTE			
-gadDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-gadFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	gadUpCursor					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx didn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-
-gadUpCursor:
-			rlwinm	r12,r31,(32-GV_SLOT_SZ_LG2),GV_SLOT_MASK
-												; Recover slot number from stolen mapping's address
-			addi	r12,r12,1					; Increment slot number
-			rlwinm	r12,r12,0,GV_SLOT_MASK		; Clip to slot number range
-			stb		r12,mpgCursor(r22)			; Update group's cursor
-
-			lwz		r3,mpPAddr(r31)				; r3 <- physical 4K-page number
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r26,r3						; Got lock on our physent?
-			beq--	gadBadPLock					; No, time to bail out
-
-			crset	cr1_eq						; cr1_eq <- previous link is the anchor
-			bt++	pf64Bitb,gadRemove64		; Use 64-bit version on 64-bit machine
-			la		r11,ppLink+4(r26)			; Point to chain anchor
-			lwz		r9,ppLink+4(r26)			; Get chain anchor
-			rlwinm.	r9,r9,0,~ppFlags			; Remove flags, yielding 32-bit physical chain pointer
-gadRemLoop:	beq-	gadPEMissMiss				; End of chain, this is not good
-			cmplw	r9,r31						; Is this the mapping to remove?
-			lwz		r8,mpAlias+4(r9)			; Get forward chain pointer
-			bne		gadRemNext					; No, chain onward
-			bt		cr1_eq,gadRemRetry			; Mapping to remove is chained from anchor
-			stw		r8,0(r11)					; Unchain gpv->phys mapping
-			b		gadDelDone					; Finish deleting mapping
-gadRemRetry:
-			lwarx	r0,0,r11					; Get previous link
-			rlwimi	r0,r8,0,~ppFlags			; Insert new forward pointer whilst preserving flags
-			stwcx.	r0,0,r11					; Update previous link
-			bne-	gadRemRetry					; Lost reservation, retry
-			b		gadDelDone					; Finish deleting mapping
-			
-gadRemNext:	la		r11,mpAlias+4(r9)			; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		gadRemLoop					; Carry on
-
-gadRemove64:
-			li		r7,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r7,r7,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			la		r11,ppLink(r26)				; Point to chain anchor
-			ld		r9,ppLink(r26)				; Get chain anchor
-			andc.	r9,r9,r7					; Remove flags, yielding 64-bit physical chain pointer
-gadRem64Lp:	beq--	gadPEMissMiss				; End of chain, this is not good
-			cmpld	r9,r31						; Is this the mapping to remove?
-			ld		r8,mpAlias(r9)				; Get forward chain pinter
-			bne		gadRem64Nxt					; Not mapping to remove, chain on, dude
-			bt		cr1_eq,gadRem64Rt			; Mapping to remove is chained from anchor
-			std		r8,0(r11)					; Unchain gpv->phys mapping
-			b		gadDelDone					; Finish deleting mapping
-gadRem64Rt:	ldarx	r0,0,r11					; Get previous link
-			and		r0,r0,r7					; Get flags
-			or		r0,r0,r8					; Insert new forward pointer
-			stdcx.	r0,0,r11					; Slam it back in
-			bne--	gadRem64Rt					; Lost reservation, retry
-			b		gadDelDone					; Finish deleting mapping
-
-			.align	5		
-gadRem64Nxt:
-			la		r11,mpAlias(r9)				; Point to (soon to be) previous link
-			crclr	cr1_eq						; ~cr1_eq <- Previous link is not the anchor
-			mr.		r9,r8						; Does next entry exist?
-			b		gadRem64Lp					; Carry on
-			
-gadDelDone:
-			mr		r3,r26						; Get physent address
-			bl		mapPhysUnlock				; Unlock physent chain
-
-gadFillMap:
-			lwz		r12,pmapSpace(r28)			; Get guest space id number
-			li		r2,0						; Get a zero
-			stw		r24,mpFlags(r31)			; Set mapping's flags
-			sth		r12,mpSpace(r31)			; Set mapping's space id number
-			stw		r2,mpPte(r31)				; Set mapping's pte pointer invalid
-			stw		r29,mpPAddr(r31)			; Set mapping's physical address
-			bt++	pf64Bitb,gadVA64			; Use 64-bit version on 64-bit machine
-			stw		r30,mpVAddr+4(r31)			; Set mapping's virtual address (w/flags)
-			b		gadChain					; Continue with chaining mapping to physent
-gadVA64:	std		r30,mpVAddr(r31)			; Set mapping's virtual address (w/flags)
-			
-gadChain:	mr		r3,r29						; r3 <- physical frame address
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r26,r3						; Got lock on our physent?
-			beq--	gadBadPLock					; No, time to bail out
-
-			bt++	pf64Bitb,gadChain64			; Use 64-bit version on 64-bit machine
-			lwz		r12,ppLink+4(r26)			; Get forward chain
-			rlwinm	r11,r12,0,~ppFlags			; Get physent's forward pointer sans flags
-			rlwimi	r12,r31,0,~ppFlags			; Insert new mapping, preserve physent flags
-			stw		r11,mpAlias+4(r31)			; New mapping will head chain
-			stw		r12,ppLink+4(r26)			; Point physent to new mapping
-			b		gadFinish					; All over now...
-
-gadChain64:	li		r7,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r7,r7,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			ld		r12,ppLink(r26)				; Get forward chain
-			andc	r11,r12,r7					; Get physent's forward chain pointer sans flags
-			and		r12,r12,r7					; Isolate pointer's flags
-			or		r12,r12,r31					; Insert new mapping's address forming pointer
-			std		r11,mpAlias(r31)			; New mapping will head chain
-			std		r12,ppLink(r26)				; Point physent to new mapping
-
-gadFinish:	eieio								; Ensure new mapping is completely visible
-			
-gadRelPhy:	mr		r3,r26						; r3 <- physent addr
-			bl		mapPhysUnlock				; Unlock physent chain
-			
-gadRelPmap:	la		r3,pmapSXlk(r27)			; r3 <- host pmap search lock phys addr
-			bl		sxlkUnlock					; Release host pmap search lock
-			
-			bt++	pf64Bitb,gadRtn64			; Handle 64-bit separately
-			mtmsr	r25							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		gadPopFrame					; Nothing to do now but pop a frame and return
-gadRtn64:	mtmsrd	r25							; Restore 'rupts, translation, 32-bit mode
-gadPopFrame:		
-			lwz		r0,(FM_ALIGN(gadStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r23,FM_ARG0+0x20(r1)		; Restore non-volatile r23
-			lwz		r22,FM_ARG0+0x24(r1)		; Restore non-volatile r22
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-gadPEMissMiss:
-gadBadPLock:
-			lis		r0,hi16(Choke)				; Dmitri, you know how we've always talked about the
-			ori		r0,r0,lo16(Choke)			;  possibility of something going wrong with the bomb?
-			li		r3,failMapping				; The BOMB, Dmitri.
-			sc									; The hydrogen bomb.
-
-
-;
-;			Guest shadow assist -- supend a guest mapping
-;
-;			Suspends a guest mapping.
-;
-;			Parameters:
-;				r3 : address of host pmap, 32-bit kernel virtual address
-;				r4 : address of guest pmap, 32-bit kernel virtual address
-;				r5 : guest virtual address, high-order 32 bits
-;				r6 : guest virtual address,  low-order 32 bits
-;
-;			Non-volatile register usage:
-;				r26 : VMM extension block's physical address
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : caller's msr image from mapSetUp
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-
-			.align	5
-			.globl	EXT(hw_susp_map_gv)
-
-LEXT(hw_susp_map_gv)
-
-#define gsuStackSize ((31-26+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gsuStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gsuStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-
-			rlwinm	r30,r6,0,0xFFFFF000			; Clean up low-order 32 bits of guest vaddr
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r4)			; r9 <- guest space ID number
-			bt++	pf64Bitb,gsu64Salt			; Test for 64-bit machine
-
-			lwz		r26,pmapVmmExtPhys+4(r3)	; r26 <- VMM pmap extension block paddr
-			lwz		r27,pmapvr+4(r3)			; Get 32-bit virt<->real host pmap conversion salt
-			lwz		r28,pmapvr+4(r4)			; Get 32-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		gsuStart					; Get to it			
-gsu64Salt:	rldimi	r30,r5,32,0					; Insert high-order 32 bits of 64-bit guest vaddr
-			ld		r26,pmapVmmExtPhys(r3)		; r26 <- VMM pmap extension block paddr
-			ld		r27,pmapvr(r3)				; Get 64-bit virt<->real host pmap conversion salt
-			ld		r28,pmapvr(r4)				; Get 64-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-gsuStart:	xor		r27,r3,r27					; Convert host pmap_t virt->real
-			xor		r28,r4,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r29,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gsu64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		gsu32SrchLp					; Let the search begin!
-			
-			.align	5
-gsu32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gsuSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gsu32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gsuSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		gsuSrchMiss					; No joy in our hash group
-			
-gsu64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		gsu64SrchLp					; Let the search begin!
-			
-			.align	5
-gsu64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gsuSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gsu64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			bne		gsuSrchMiss					; No joy in our hash group
-			
-gsuSrchHit:
-			bt++	pf64Bitb,gsuDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		gsuFreePTE					; Join 64-bit path to release the PTE			
-gsuDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-gsuFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	gsuNoPTE					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx didn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-			
-gsuNoPTE:	lwz		r3,mpFlags(r31)				; Get mapping's flags
-			ori		r3,r3,mpgDormant			; Mark entry dormant
-			stw		r3,mpFlags(r31)				; Save updated flags
-			eieio								; Ensure update is visible when we unlock
-
-gsuSrchMiss:
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap search lock phys addr
-			bl		sxlkUnlock					; Release host pmap search lock
-			
-			bt++	pf64Bitb,gsuRtn64			; Handle 64-bit separately
-			mtmsr	r29							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		gsuPopFrame					; Nothing to do now but pop a frame and return
-gsuRtn64:	mtmsrd	r29							; Restore 'rupts, translation, 32-bit mode
-gsuPopFrame:		
-			lwz		r0,(FM_ALIGN(gsuStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-;
-;			Guest shadow assist -- test guest mapping reference and change bits
-;
-;			Locates the specified guest mapping, and if it exists gathers its reference
-;			and change bit, optionallyÊresetting them.
-;
-;			Parameters:
-;				r3 : address of host pmap, 32-bit kernel virtual address
-;				r4 : address of guest pmap, 32-bit kernel virtual address
-;				r5 : guest virtual address, high-order 32 bits
-;				r6 : guest virtual address,  low-order 32 bits
-;				r7 : reset boolean
-;
-;			Non-volatile register usage:
-;				r24 : VMM extension block's physical address
-;				r25 : return code (w/reference and change bits)
-;				r26 : reset boolean
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : caller's msr image from mapSetUp
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-
-			.align	5
-			.globl	EXT(hw_test_rc_gv)
-
-LEXT(hw_test_rc_gv)
-
-#define gtdStackSize ((31-24+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gtdStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-
-			rlwinm	r30,r6,0,0xFFFFF000			; Clean up low-order 20 bits of guest vaddr
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r4)			; r9 <- guest space ID number
-
-			bt++	pf64Bitb,gtd64Salt			; Test for 64-bit machine
-
-			lwz		r24,pmapVmmExtPhys+4(r3)	; r24 <- VMM pmap extension block paddr
-			lwz		r27,pmapvr+4(r3)			; Get 32-bit virt<->real host pmap conversion salt
-			lwz		r28,pmapvr+4(r4)			; Get 32-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		gtdStart					; Get to it			
-
-gtd64Salt:	rldimi	r30,r5,32,0					; Insert high-order 32 bits of 64-bit guest vaddr
-			ld		r24,pmapVmmExtPhys(r3)		; r24 <- VMM pmap extension block paddr
-			ld		r27,pmapvr(r3)				; Get 64-bit virt<->real host pmap conversion salt
-			ld		r28,pmapvr(r4)				; Get 64-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-gtdStart:	xor		r27,r3,r27					; Convert host pmap_t virt->real
-			xor		r28,r4,r28					; Convert guest pmap_t virt->real
-			mr		r26,r7						; Save reset boolean
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r29,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gtd64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		gtd32SrchLp					; Let the search begin!
-			
-			.align	5
-gtd32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gtdSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gtd32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gtdSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		gtdSrchMiss					; No joy in our hash group
-			
-gtd64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		gtd64SrchLp					; Let the search begin!
-			
-			.align	5
-gtd64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gtdSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gtd64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			bne		gtdSrchMiss					; No joy in our hash group
-			
-gtdSrchHit:
-			bt++	pf64Bitb,gtdDo64			; Split for 64 bit
-			
-			bl		mapInvPte32					; Invalidate and lock PTEG, also merge into physent
-						
-			cmplwi	cr1,r26,0					; Do we want to clear RC?
-			lwz		r12,mpVAddr+4(r31)			; Get the bottom of the mapping vaddr field
-			mr.		r3,r3						; Was there a previously valid PTE?
-			li		r0,lo16(mpR|mpC)			; Get bits to clear
-
-			and		r25,r5,r0					; Copy RC bits into result
-			beq++	cr1,gtdNoClr32				; Nope...
-			
-			andc	r12,r12,r0					; Clear mapping copy of RC
-			andc	r5,r5,r0					; Clear PTE copy of RC
-			sth		r12,mpVAddr+6(r31)			; Set the new RC in mapping			
-
-gtdNoClr32:	beq--	gtdNoOld32					; No previously valid PTE...
-			
-			sth		r5,6(r3)					; Store updated RC in PTE
-			eieio								; Make sure we do not reorder
-			stw		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-
-gtdNoOld32:	la		r3,pmapSXlk(r27)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			b		gtdR32						; Join common...
-
-			.align	5			
-			
-			
-gtdDo64:	bl		mapInvPte64					; Invalidate and lock PTEG, also merge into physent
-						
-			cmplwi	cr1,r26,0					; Do we want to clear RC?
-			lwz		r12,mpVAddr+4(r31)			; Get the bottom of the mapping vaddr field
-			mr.		r3,r3						; Was there a previously valid PTE?
-			li		r0,lo16(mpR|mpC)			; Get bits to clear
-
-			and		r25,r5,r0					; Copy RC bits into result
-			beq++	cr1,gtdNoClr64				; Nope...
-			
-			andc	r12,r12,r0					; Clear mapping copy of RC
-			andc	r5,r5,r0					; Clear PTE copy of RC
-			sth		r12,mpVAddr+6(r31)			; Set the new RC			
-
-gtdNoClr64:	beq--	gtdNoOld64					; Nope, no pevious pte...
-			
-			sth		r5,14(r3)					; Store updated RC
-			eieio								; Make sure we do not reorder
-			std		r4,0(r3)					; Revalidate the PTE
-
-			eieio								; Make sure all updates come first
-			stw		r6,0(r7)					; Unlock PCA
-
-gtdNoOld64:	la		r3,pmapSXlk(r27)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			b		gtdR64						; Join common...
-
-gtdSrchMiss:
-			la		r3,pmapSXlk(r27)			; Point to the pmap search lock
-			bl		sxlkUnlock					; Unlock the search list
-			li		r25,mapRtNotFnd				; Get ready to return not found
-			bt++	pf64Bitb,gtdR64				; Test for 64-bit machine
-			
-gtdR32:		mtmsr	r29							; Restore caller's msr image
-			isync
-			b		gtdEpilog
-			
-gtdR64:		mtmsrd	r29							; Restore caller's msr image
-
-gtdEpilog:	lwz		r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			mr		r3,r25						; Get return code
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-;
-;			Guest shadow assist -- convert guest to host virtual address
-;
-;			Locates the specified guest mapping, and if it exists locates the
-;			first mapping belonging to its host on the physical chain and returns
-;			its virtual address.
-;
-;			Note that if there are multiple mappings belonging to this host
-;			chained to the physent to which the guest mapping is chained, then
-;			host virtual aliases exist for this physical address. If host aliases
-;			exist, then we select the first on the physent chain, making it 
-;			unpredictable which of the two or more possible host virtual addresses
-;			will be returned.
-;
-;			Parameters:
-;				r3 : address of guest pmap, 32-bit kernel virtual address
-;				r4 : guest virtual address, high-order 32 bits
-;				r5 : guest virtual address,  low-order 32 bits
-;
-;			Non-volatile register usage:
-;				r24 : physent physical address
-;				r25 : VMM extension block's physical address
-;				r26 : host virtual address
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : caller's msr image from mapSetUp
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-
-			.align	5
-			.globl	EXT(hw_gva_to_hva)
-
-LEXT(hw_gva_to_hva)
-
-#define gthStackSize ((31-24+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gtdStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-			stw		r24,FM_ARG0+0x1C(r1)		; Save non-volatile r24
-
-			rlwinm	r30,r5,0,0xFFFFF000			; Clean up low-order 32 bits of guest vaddr
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r3)			; r9 <- guest space ID number
-
-			bt++	pf64Bitb,gth64Salt			; Test for 64-bit machine
-
-			lwz		r25,pmapVmmExtPhys+4(r3)	; r25 <- VMM pmap extension block paddr
-			lwz		r28,pmapvr+4(r3)			; Get 32-bit virt<->real guest pmap conversion salt
-			lwz		r27,vmxHostPmapPhys+4(r11)	; Get host pmap physical address
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		gthStart					; Get to it			
-
-gth64Salt:	rldimi	r30,r4,32,0					; Insert high-order 32 bits of 64-bit guest vaddr
-			ld		r25,pmapVmmExtPhys(r3)		; r24 <- VMM pmap extension block paddr
-			ld		r28,pmapvr(r3)				; Get 64-bit virt<->real guest pmap conversion salt
-			ld		r27,vmxHostPmapPhys(r11)	; Get host pmap physical address
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-gthStart:	xor		r28,r3,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r29,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gth64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		gth32SrchLp					; Let the search begin!
-			
-			.align	5
-gth32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gthSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gth32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gthSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		gthSrchMiss					; No joy in our hash group
-			
-gth64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		gth64SrchLp					; Let the search begin!
-			
-			.align	5
-gth64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gthSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gth64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			bne		gthSrchMiss					; No joy in our hash group
-			
-gthSrchHit:	lwz		r3,mpPAddr(r31)				; r3 <- physical 4K-page number
-			bl		mapFindLockPN				; Find 'n' lock this page's physent
-			mr.		r24,r3						; Got lock on our physent?
-			beq--	gthBadPLock					; No, time to bail out
-			
-			bt++	pf64Bitb,gthPFnd64			; 64-bit version of physent chain search
-			
-			lwz		r9,ppLink+4(r24)			; Get first mapping on physent
-			lwz		r6,pmapSpace(r27)			; Get host pmap's space id number
-			rlwinm	r9,r9,0,~ppFlags			; Be-gone, unsightly flags
-gthPELoop:	mr.		r12,r9						; Got a mapping to look at?
-			beq-	gthPEMiss					; Nope, we've missed hva->phys mapping
-			lwz		r7,mpFlags(r12)				; Get mapping's flags
-			lhz		r4,mpSpace(r12)				; Get mapping's space id number
-			lwz		r26,mpVAddr+4(r12)			; Get mapping's virtual address
-			lwz		r9,mpAlias+4(r12)			; Next mapping in physent alias chain
-			
-			rlwinm	r0,r7,0,mpType				; Isolate mapping's type
-			rlwinm	r26,r26,0,~mpHWFlags		; Bye-bye unsightly flags
-			xori	r0,r0,mpNormal				; Normal mapping?
-			xor		r4,r4,r6					; Compare w/ host space id number
-			or.		r0,r0,r4					; cr0_eq <- (normal && space id hit)
-			beq		gthPEHit					; Hit
-			b		gthPELoop					; Iterate
-			
-gthPFnd64:	li		r0,ppLFAmask				; Get mask to clean up mapping pointer
-			rotrdi	r0,r0,ppLFArrot				; Rotate clean up mask to get 0xF0000000000000000F
-			ld		r9,ppLink(r24)				; Get first mapping on physent
-			lwz		r6,pmapSpace(r27)			; Get host pmap's space id number
-			andc	r9,r9,r0					; Cleanup mapping pointer
-gthPELp64:	mr.		r12,r9						; Got a mapping to look at?
-			beq--	gthPEMiss					; Nope, we've missed hva->phys mapping
-			lwz		r7,mpFlags(r12)				; Get mapping's flags
-			lhz		r4,mpSpace(r12)				; Get mapping's space id number
-			ld		r26,mpVAddr(r12)			; Get mapping's virtual address
-			ld		r9,mpAlias(r12)				; Next mapping physent alias chain
-			rlwinm	r0,r7,0,mpType				; Isolate mapping's type
-			rldicr	r26,r26,0,mpHWFlagsb-1		; Bye-bye unsightly flags
-			xori	r0,r0,mpNormal				; Normal mapping?
-			xor		r4,r4,r6					; Compare w/ host space id number
-			or.		r0,r0,r4					; cr0_eq <- (normal && space id hit)
-			beq		gthPEHit					; Hit
-			b		gthPELp64					; Iterate
-
-			.align	5			
-gthPEMiss:	mr		r3,r24						; Get physent's address
-			bl		mapPhysUnlock				; Unlock physent chain
-gthSrchMiss:
-			la		r3,pmapSXlk(r27)			; Get host pmap search lock address
-			bl		sxlkUnlock					; Release host pmap search lock
-			li		r3,-1						; Return 64-bit -1
-			li		r4,-1
-			bt++	pf64Bitb,gthEpi64			; Take 64-bit exit
-			b		gthEpi32					; Take 32-bit exit
-
-			.align	5
-gthPEHit:	mr		r3,r24						; Get physent's address
-			bl		mapPhysUnlock				; Unlock physent chain
-			la		r3,pmapSXlk(r27)			; Get host pmap search lock address
-			bl		sxlkUnlock					; Release host pmap search lock
-
-			bt++	pf64Bitb,gthR64				; Test for 64-bit machine
-			
-gthR32:		li		r3,0						; High-order 32 bits host virtual address
-			mr		r4,r26						; Low-order  32 bits host virtual address
-gthEpi32:	mtmsr	r29							; Restore caller's msr image
-			isync
-			b		gthEpilog
-
-			.align	5			
-gthR64:		srdi	r3,r26,32					; High-order 32 bits host virtual address
-			clrldi	r4,r26,32					; Low-order  32 bits host virtual address
-gthEpi64:	mtmsrd	r29							; Restore caller's msr image
-
-gthEpilog:	lwz		r0,(FM_ALIGN(gthStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r24,FM_ARG0+0x1C(r1)		; Restore non-volatile r24
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-gthBadPLock:
-			lis		r0,hi16(Choke)				; Dmitri, you know how we've always talked about the
-			ori		r0,r0,lo16(Choke)			;  possibility of something going wrong with the bomb?
-			li		r3,failMapping				; The BOMB, Dmitri.
-			sc									; The hydrogen bomb.
-
-
-;
-;			Guest shadow assist -- find a guest mapping
-;
-;			Locates the specified guest mapping, and if it exists returns a copy
-;			of it.
-;
-;			Parameters:
-;				r3 : address of guest pmap, 32-bit kernel virtual address
-;				r4 : guest virtual address, high-order 32 bits
-;				r5 : guest virtual address,  low-order 32 bits
-;				r6 : 32 byte copy area, 32-bit kernel virtual address
-;
-;			Non-volatile register usage:
-;				r25 : VMM extension block's physical address
-;				r26 : copy area virtual address
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : caller's msr image from mapSetUp
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-
-			.align	5
-			.globl	EXT(hw_find_map_gv)
-
-LEXT(hw_find_map_gv)
-
-#define gfmStackSize ((31-25+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gfmStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gfmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-
-			rlwinm	r30,r5,0,0xFFFFF000			; Clean up low-order 32 bits of guest vaddr
-			mr		r26,r6						; Copy copy buffer vaddr
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r3)			; r9 <- guest space ID number
-
-			bt++	pf64Bitb,gfm64Salt			; Test for 64-bit machine
-
-			lwz		r25,pmapVmmExtPhys+4(r3)	; r25 <- VMM pmap extension block paddr
-			lwz		r28,pmapvr+4(r3)			; Get 32-bit virt<->real guest pmap conversion salt
-			lwz		r27,vmxHostPmapPhys+4(r11)	; Get host pmap physical address
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		gfmStart					; Get to it			
-
-gfm64Salt:	rldimi	r30,r4,32,0					; Insert high-order 32 bits of 64-bit guest vaddr
-			ld		r25,pmapVmmExtPhys(r3)		; r24 <- VMM pmap extension block paddr
-			ld		r28,pmapvr(r3)				; Get 64-bit virt<->real guest pmap conversion salt
-			ld		r27,vmxHostPmapPhys(r11)	; Get host pmap physical address
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-gfmStart:	xor		r28,r3,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r29,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gfm64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		gfm32SrchLp					; Let the search begin!
-			
-			.align	5
-gfm32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gfmSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gfm32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gfmSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		gfmSrchMiss					; No joy in our hash group
-			
-gfm64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		gfm64SrchLp					; Let the search begin!
-			
-			.align	5
-gfm64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- !(!free && !dormant && space match)
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gfmSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gfm64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free and dormant flags
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- !(!free && !dormant && space match)
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			bne		gfmSrchMiss					; No joy in our hash group
-			
-gfmSrchHit:	lwz		r5,0(r31)					; Fetch 32 bytes of mapping from physical
-			lwz		r6,4(r31)					;  +4
-			lwz		r7,8(r31)					;  +8
-			lwz		r8,12(r31)					;  +12
-			lwz		r9,16(r31)					;  +16
-			lwz		r10,20(r31)					;  +20
-			lwz		r11,24(r31)					;  +24
-			lwz		r12,28(r31)					;  +28
-			
-			li		r31,mapRtOK					; Return found mapping
-
-			la		r3,pmapSXlk(r27)			; Get host pmap search lock address
-			bl		sxlkUnlock					; Release host pmap search lock
-
-			bt++	pf64Bitb,gfmEpi64			; Test for 64-bit machine
-			
-gfmEpi32:	mtmsr	r29							; Restore caller's msr image
-			isync								; A small wrench
-			b		gfmEpilog					;  and a larger bubble
-
-			.align	5			
-gfmEpi64:	mtmsrd	r29							; Restore caller's msr image
-
-gfmEpilog:	mr.		r3,r31						; Copy/test mapping address
-			beq		gfmNotFound					; Skip copy if no mapping found
-			
-			stw		r5,0(r26)					; Store 32 bytes of mapping into virtual
-			stw		r6,4(r26)					;  +4
-			stw		r7,8(r26)					;  +8
-			stw		r8,12(r26)					;  +12
-			stw		r9,16(r26)					;  +16
-			stw		r10,20(r26)					;  +20
-			stw		r11,24(r26)					;  +24
-			stw		r12,28(r26)					;  +28
-			
-gfmNotFound:
-			lwz		r0,(FM_ALIGN(gfmStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-			.align	5			
-gfmSrchMiss:
-			li		r31,mapRtNotFnd				; Indicate mapping not found
-			la		r3,pmapSXlk(r27)			; Get host pmap search lock address
-			bl		sxlkUnlock					; Release host pmap search lock
-			bt++	pf64Bitb,gfmEpi64			; Take 64-bit exit
-			b		gfmEpi32					; Take 32-bit exit
-
-
-;
-;			Guest shadow assist -- change guest page protection
-;
-;			Locates the specified dormant mapping, and if it is active, changes its
-;			protection.
-;
-;			Parameters:
-;				r3 : address of guest pmap, 32-bit kernel virtual address
-;				r4 : guest virtual address, high-order 32 bits
-;				r5 : guest virtual address,  low-order 32 bits
-;				r6 : guest mapping protection code
-;
-;			Non-volatile register usage:
-;				r25 : caller's msr image from mapSetUp
-;				r26 : guest mapping protection code
-;				r27 : host pmap physical address
-;				r28 : guest pmap physical address
-;				r29 : VMM extension block's physical address
-;				r30 : guest virtual address
-;				r31 : gva->phys mapping's physical address
-;
-			.align	5
-			.globl	EXT(hw_protect_gv)
-			
-LEXT(hw_protect_gv)
-
-#define gcpStackSize ((31-24+1)*4)+4
-
-			stwu	r1,-(FM_ALIGN(gcpStackSize)+FM_SIZE)(r1)
-												; Mint a new stack frame
-			mflr	r0							; Get caller's return address
-			mfsprg	r11,2						; Get feature flags
-			mtcrf	0x02,r11					; Insert feature flags into cr6
-			stw		r0,(FM_ALIGN(gcpStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Save caller's return address
-			stw		r31,FM_ARG0+0x00(r1)		; Save non-volatile r31
-			stw		r30,FM_ARG0+0x04(r1)		; Save non-volatile r30
-			stw		r29,FM_ARG0+0x08(r1)		; Save non-volatile r29
-			stw		r28,FM_ARG0+0x0C(r1)		; Save non-volatile r28
-			stw		r27,FM_ARG0+0x10(r1)		; Save non-volatile r27
-			stw		r26,FM_ARG0+0x14(r1)		; Save non-volatile r26
-			stw		r25,FM_ARG0+0x18(r1)		; Save non-volatile r25
-
-			rlwinm	r30,r5,0,0xFFFFF000			; Clean up low-order 32 bits of guest vaddr
-			mr		r26,r6						; Copy guest mapping protection code
-
-			lwz		r11,pmapVmmExt(r3)			; r11 <- VMM pmap extension block vaddr
-			lwz		r9,pmapSpace(r3)			; r9 <- guest space ID number
-			bt++	pf64Bitb,gcp64Salt			; Handle 64-bit machine separately
-			lwz		r29,pmapVmmExtPhys+4(r3)	; r29 <- VMM pmap extension block paddr
-			lwz		r27,vmxHostPmapPhys+4(r11)	; r27 <- host pmap paddr
-			lwz		r28,pmapvr+4(r3)			; Get 32-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			lwz		r31,4(r31)					; r31 <- hash page paddr
-			rlwimi	r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK
-												; r31 <- hash group paddr
-			b		gcpStart					; Get to it			
-
-gcp64Salt:	rldimi	r30,r4,32,0					; Insert high-order 32 bits of 64-bit guest vaddr			
-			ld		r29,pmapVmmExtPhys(r3)		; r29 <- VMM pmap extension block paddr
-			ld		r27,vmxHostPmapPhys(r11)	; r27 <- host pmap paddr
-			ld		r28,pmapvr(r3)				; Get 64-bit virt<->real guest pmap conversion salt
-			la		r31,VMX_HPIDX_OFFSET(r11)	; r31 <- base of hash page physical index
-			srwi	r11,r30,12					; Form shadow hash:
-			xor		r11,r11,r9					; 	spaceID ^ (vaddr >> 12) 
-			rlwinm	r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK
-												; Form index offset from hash page number
-			add		r31,r31,r10					; r31 <- hash page index entry
-			ld		r31,0(r31)					; r31 <- hash page paddr
-			insrdi	r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2)
-												; r31 <- hash group paddr
-
-gcpStart:	xor		r28,r4,r28					; Convert guest pmap_t virt->real
-			bl		EXT(mapSetUp)				; Disable 'rupts, translation, maybe enter 64-bit mode
-			mr		r25,r11						; Save caller's msr image
-
-			la		r3,pmapSXlk(r27)			; r3 <- host pmap's search lock address
-			bl		sxlkExclusive				; Get lock exclusive
-
-			li		r0,(GV_SLOTS - 1)			; Prepare to iterate over mapping slots
-			mtctr	r0							;  in this group
-			bt++	pf64Bitb,gcp64Search		; Test for 64-bit machine
-
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			lwz		r5,mpVAddr+4(r31)			; r5 <- 1st mapping slot's virtual address
-			b		gcp32SrchLp					; Let the search begin!
-			
-			.align	5
-gcp32SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrwi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			lwz		r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- free || dormant || !space match
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gcpSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gcp32SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrwi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- free || dormant || !space match
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gcpSrchHit					; Join common path on hit (r31 points to guest mapping)
-			b		gcpSrchMiss					; No joy in our hash group
-			
-gcp64Search:			
-			lwz		r3,mpFlags(r31)				; r3 <- 1st mapping slot's flags
-			lhz		r4,mpSpace(r31)				; r4 <- 1st mapping slot's space ID 
-			ld		r5,mpVAddr(r31)				; r5 <- 1st mapping slot's virtual address
-			b		gcp64SrchLp					; Let the search begin!
-			
-			.align	5
-gcp64SrchLp:
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			lwz		r3,mpFlags+GV_SLOT_SZ(r31)	; r3 <- next mapping slot's flags
-			mr		r7,r4						; r7 <- current mapping slot's space ID
-			lhz		r4,mpSpace+GV_SLOT_SZ(r31)	; r4 <- next mapping slot's space ID
-			clrrdi	r8,r5,12					; r8 <- current mapping slot's virtual addr w/o flags
-			ld		r5,mpVAddr+GV_SLOT_SZ(r31)	; r5 <- next mapping slot's virtual addr
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free flag
-			xor		r7,r7,r9					; Compare space ID
-			or		r0,r11,r7					; r0 <- free || dormant || !space match
-			xor		r8,r8,r30					; Compare virtual address
-			or.		r0,r0,r8					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			beq		gcpSrchHit					; Join common path on hit (r31 points to guest mapping)
-			
-			addi	r31,r31,GV_SLOT_SZ			; r31 <- next mapping slot		
-			bdnz	gcp64SrchLp					; Iterate
-
-			mr		r6,r3						; r6 <- current mapping slot's flags
-			clrrdi	r5,r5,12					; Remove flags from virtual address			
-			andi.	r11,r6,mpgFree+mpgDormant	; Isolate guest free flag
-			xor		r4,r4,r9					; Compare space ID
-			or		r0,r11,r4					; r0 <- free || dormant || !space match
-			xor		r5,r5,r30					; Compare virtual address
-			or.		r0,r0,r5					; cr0_eq <- !free && !dormant && space match && virtual addr match
-			bne		gcpSrchMiss					; No joy in our hash group
-			
-gcpSrchHit:
-			bt++	pf64Bitb,gcpDscon64			; Handle 64-bit disconnect separately
-			bl		mapInvPte32					; Disconnect PTE, invalidate, gather ref and change
-												; r31 <- mapping's physical address
-												; r3  -> PTE slot physical address
-												; r4  -> High-order 32 bits of PTE
-												; r5  -> Low-order  32 bits of PTE
-												; r6  -> PCA
-												; r7  -> PCA physical address
-			rlwinm	r2,r3,29,29,31				; Get PTE's slot number in the PTEG (8-byte PTEs)
-			b		gcpFreePTE					; Join 64-bit path to release the PTE			
-gcpDscon64:	bl		mapInvPte64					; Disconnect PTE, invalidate, gather ref and change
-			rlwinm	r2,r3,28,29,31				; Get PTE's slot number in the PTEG (16-byte PTEs)
-gcpFreePTE: mr.		r3,r3						; Was there a valid PTE?
-			beq-	gcpSetKey					; No valid PTE, we're almost done
-			lis		r0,0x8000					; Prepare free bit for this slot
-			srw		r0,r0,r2					; Position free bit
-			or		r6,r6,r0					; Set it in our PCA image
-			lwz		r8,mpPte(r31)				; Get PTE pointer
-			rlwinm	r8,r8,0,~mpHValid			; Make the pointer invalid
-			stw		r8,mpPte(r31)				; Save invalidated PTE pointer
-			eieio								; Synchronize all previous updates (mapInvPtexx didn't)
-			stw		r6,0(r7)					; Update PCA and unlock the PTEG
-			
-gcpSetKey:	lwz		r0,mpVAddr+4(r31)			; Get va word containing protection bits
-			rlwimi	r0,r26,0,mpPP				; Insert new protection bits
-			stw		r0,mpVAddr+4(r31)			; Write 'em back
-			eieio								; Ensure previous mapping updates are visible
-			li		r31,mapRtOK					; I'm a success
-
-gcpRelPmap:	la		r3,pmapSXlk(r27)			; r3 <- host pmap search lock phys addr
-			bl		sxlkUnlock					; Release host pmap search lock
-			
-			mr		r3,r31						; r3 <- result code
-			bt++	pf64Bitb,gcpRtn64			; Handle 64-bit separately
-			mtmsr	r25							; Restore 'rupts, translation
-			isync								; Throw a small wrench into the pipeline
-			b		gcpPopFrame					; Nothing to do now but pop a frame and return
-gcpRtn64:	mtmsrd	r25							; Restore 'rupts, translation, 32-bit mode
-gcpPopFrame:		
-			lwz		r0,(FM_ALIGN(gcpStackSize)+FM_SIZE+FM_LR_SAVE)(r1)
-												; Get caller's return address
-			lwz		r31,FM_ARG0+0x00(r1)		; Restore non-volatile r31
-			lwz		r30,FM_ARG0+0x04(r1)		; Restore non-volatile r30
-			lwz		r29,FM_ARG0+0x08(r1)		; Restore non-volatile r29
-			lwz		r28,FM_ARG0+0x0C(r1)		; Restore non-volatile r28
-			mtlr	r0							; Prepare return address
-			lwz		r27,FM_ARG0+0x10(r1)		; Restore non-volatile r27
-			lwz		r26,FM_ARG0+0x14(r1)		; Restore non-volatile r26
-			lwz		r25,FM_ARG0+0x18(r1)		; Restore non-volatile r25
-			lwz		r1,0(r1)					; Pop stack frame
-			blr									; Return to caller
-
-			.align	5
-gcpSrchMiss:
-			li		r31,mapRtNotFnd				; Could not locate requested mapping
-			b		gcpRelPmap					; Exit through host pmap search lock release
-
-
-;
-;			Find the physent based on a physical page and try to lock it (but not too hard) 
-;			Note that this table always has an entry that with a 0 table pointer at the end 
-;			
-;			R3 contains ppnum on entry
-;			R3 is 0 if no entry was found
-;			R3 is physent if found
-;			cr0_eq is true if lock was obtained or there was no entry to lock
-;			cr0_eq is false of there was an entry and it was locked
-;	
-
-			.align	5
-			
-mapFindPhyTry:	
-			lis		r9,hi16(EXT(pmap_mem_regions))		; Point to the start of the region table
-			mr		r2,r3						; Save our target
-			ori		r9,r9,lo16(EXT(pmap_mem_regions))	; Point to the start of the region table			
-
-mapFindPhz:	lwz		r3,mrPhysTab(r9)			; Get the actual table address
-			lwz		r5,mrStart(r9)				; Get start of table entry
-			lwz		r0,mrEnd(r9)				; Get end of table entry
-			addi	r9,r9,mrSize				; Point to the next slot
-			cmplwi	cr2,r3,0					; Are we at the end of the table?
-			cmplw	r2,r5						; See if we are in this table
-			cmplw	cr1,r2,r0					; Check end also
-			sub		r4,r2,r5					; Calculate index to physical entry
-			beq--	cr2,mapFindNo				; Leave if we did not find an entry...
-			cror	cr0_lt,cr0_lt,cr1_gt		; Set CR0_LT if it is NOT this entry
-			slwi	r4,r4,3						; Get offset to physical entry
-
-			blt--	mapFindPhz					; Did not find it...
-			
-			add		r3,r3,r4					; Point right to the slot
-	
-mapFindOv:	lwz		r2,0(r3)					; Get the lock contents right now
-			rlwinm.	r0,r2,0,0,0					; Is it locked?
-			bnelr--								; Yes it is...
-			
-			lwarx	r2,0,r3						; Get the lock
-			rlwinm.	r0,r2,0,0,0					; Is it locked?
-			oris	r0,r2,0x8000				; Set the lock bit
-			bne--	mapFindKl					; It is locked, go get rid of reservation and leave...
-			stwcx.	r0,0,r3						; Try to stuff it back...
-			bne--	mapFindOv					; Collision, try again...
-			isync								; Clear any speculations
-			blr									; Leave...
-
-mapFindKl:	li		r2,lgKillResv				; Killing field
-			stwcx.	r2,0,r2						; Trash reservation...
-			crclr	cr0_eq						; Make sure we do not think we got the lock
-			blr									; Leave...
-
-mapFindNo:	crset	cr0_eq						; Make sure that we set this
-			li		r3,0						; Show that we did not find it
-			blr									; Leave...			
-;
-;			pmapCacheLookup - This function will look up an entry in the pmap segment cache.
-;
-;			How the pmap cache lookup works:
-;
-;			We use a combination of three things: a mask of valid entries, a sub-tag, and the
-;			ESID (aka the "tag").  The mask indicates which of the cache slots actually contain
-;			an entry.  The sub-tag is a 16 entry 4 bit array that contains the low order 4 bits
-;			of the ESID, bits 32:36 of the effective for 64-bit and 0:3 for 32-bit.  The cache
-;			entry contains the full 36 bit ESID.
-;
-;			The purpose of the sub-tag is to limit the number of searches necessary when looking
-;			for an existing cache entry.  Because there are 16 slots in the cache, we could end up
-;			searching all 16 if an match is not found.  
-;
-;			Essentially, we will search only the slots that have a valid entry and whose sub-tag
-;			matches. More than likely, we will eliminate almost all of the searches.
-;		
-;			Inputs:
-;				R3 = pmap
-;				R4 = ESID high half
-;				R5 = ESID low half
-;
-;			Outputs:
-;				R3 = pmap cache slot if found, 0 if not
-;				R10 = pmapCCtl address
-;				R11 = pmapCCtl image
-;				pmapCCtl locked on exit
-;
-
-			.align	5
-
-pmapCacheLookup:		
-			la		r10,pmapCCtl(r3)			; Point to the segment cache control
-
-pmapCacheLookuq:		
-			lwarx	r11,0,r10					; Get the segment cache control value
-			rlwinm.	r0,r11,0,pmapCCtlLckb,pmapCCtlLckb	; Is it already locked?
-			ori		r0,r11,lo16(pmapCCtlLck)	; Turn on the lock bit
-			bne--	pmapCacheLookur				; Nope...
-			stwcx.	r0,0,r10					; Try to take the lock
-			bne--	pmapCacheLookuq				; Someone else just stuffed it, try again...
-
-			isync								; Make sure we get reservation first
-			lwz		r9,pmapSCSubTag(r3)			; Get the high part of the sub-tag
-			rlwimi	r5,r5,28,4,7				; Copy sub-tag just to right of itself (XX------)
-			lwz		r10,pmapSCSubTag+4(r3)		; And the bottom half
-			rlwimi	r5,r5,24,8,15				; Copy doubled sub-tag to right of itself (XXXX----)
-			lis		r8,0x8888					; Get some eights
-			rlwimi	r5,r5,16,16,31				; Copy quadrupled sub-tags to the right
-			ori		r8,r8,0x8888				; Fill the rest with eights
-
-			eqv		r10,r10,r5					; Get 0xF where we hit in bottom half
-			eqv		r9,r9,r5					; Get 0xF where we hit in top half
-			
-			rlwinm	r2,r10,1,0,30				; Shift over 1
-			rlwinm	r0,r9,1,0,30				; Shift over 1
-			and		r2,r2,r10					; AND the even/odd pair into the even
-			and		r0,r0,r9					; AND the even/odd pair into the even
-			rlwinm	r10,r2,2,0,28				; Shift over 2
-			rlwinm	r9,r0,2,0,28				; Shift over 2
-			and		r10,r2,r10					; AND the even of the ANDed pairs giving the AND of all 4 bits in 0, 4, ...
-			and		r9,r0,r9					; AND the even of the ANDed pairs giving the AND of all 4 bits in 0, 4, ...
-			
-			and		r10,r10,r8					; Clear out extras
-			and		r9,r9,r8					; Clear out extras
-			
-			rlwinm	r0,r10,3,1,28				; Slide adjacent next to each other
-			rlwinm	r2,r9,3,1,28				; Slide adjacent next to each other
-			or		r10,r0,r10					; Merge them
-			or		r9,r2,r9					; Merge them
-			rlwinm	r0,r10,6,2,26				; Slide adjacent pairs next to each other
-			rlwinm	r2,r9,6,2,26				; Slide adjacent pairs next to each other
-			or		r10,r0,r10					; Merge them
-			or		r9,r2,r9					; Merge them
-			rlwimi	r10,r10,12,4,7				; Stick in the low-order adjacent quad
-			rlwimi	r9,r9,12,4,7				; Stick in the low-order adjacent quad
-			not		r6,r11						; Turn invalid into valid
-			rlwimi	r9,r10,24,8,15				; Merge in the adjacent octs giving a hit mask
-			
-			la		r10,pmapSegCache(r3)		; Point at the cache slots
-			and.	r6,r9,r6					; Get mask of valid and hit
-			li		r0,0						; Clear
-			li		r3,0						; Assume not found
-			oris	r0,r0,0x8000				; Start a mask
-			beqlr++								; Leave, should usually be no hits...
-			
-pclNextEnt:	cntlzw	r5,r6						; Find an in use one
-			cmplwi	cr1,r5,pmapSegCacheUse		; Did we find one?
-			rlwinm	r7,r5,4,0,27				; Index to the cache entry
-			srw		r2,r0,r5					; Get validity mask bit
-			add		r7,r7,r10					; Point to the cache slot
-			andc	r6,r6,r2					; Clear the validity bit we just tried
-			bgelr--	cr1							; Leave if there are no more to check...
-			
-			lwz		r5,sgcESID(r7)				; Get the top half
-			
-			cmplw	r5,r4						; Only need to check top because sub-tag is the entire other half
-			
-			bne++	pclNextEnt					; Nope, try again...
-
-			mr		r3,r7						; Point to the slot
-			blr									; Leave....
-
-			.align	5
-
-pmapCacheLookur:
-			li		r11,lgKillResv				; The killing spot
-			stwcx.	r11,0,r11					; Kill the reservation
-			
-pmapCacheLookus:		
-			lwz		r11,pmapCCtl(r3)			; Get the segment cache control
-			rlwinm.	r0,r11,0,pmapCCtlLckb,pmapCCtlLckb	; Is it already locked?
-			beq++	pmapCacheLookup				; Nope...
-			b		pmapCacheLookus				; Yup, keep waiting...
-
-
-;
-;			mapMergeRC -- Given a physical mapping address in R31, locate its
-;           connected PTE (if any) and merge the PTE referenced and changed bits
-;			into the mapping and physent.
-;
-
-			.align	5
-			
-mapMergeRC32:
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			mfsdr1	r7							; Get the pointer to the hash table
-			lwz		r5,mpVAddr+4(r31)			; Grab the virtual address
-			rlwinm	r10,r7,0,0,15				; Clean up the hash table base
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			srwi	r7,r0,4						; Convert to PCA units
-			rlwinm	r7,r7,0,0,29				; Clean up PCA offset
-			mflr	r2							; Save the return
-			subfic	r7,r7,-4					; Convert to -4 based negative index
-			add		r7,r10,r7					; Point to the PCA directly
-			beqlr--								; There was no PTE to start with...
-			
-			bl		mapLockPteg					; Lock the PTEG
-
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			mtlr	r2							; Restore the LR
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			beq-	mMPUnlock					; There is no PTE, someone took it so just unlock and leave...
-
-			rlwinm	r3,r0,0,0,30				; Clear the valid bit
-			add		r3,r3,r10					; Point to actual PTE
-			lwz		r5,4(r3)					; Get the real part of the PTE
-			srwi	r10,r5,12					; Change physical address to a ppnum
-
-mMNmerge:	lbz		r11,mpFlags+1(r31)			; Get the offset to the physical entry table
-			lwz		r0,mpVAddr+4(r31)			; Get the flags part of the field
-			lis		r8,hi16(EXT(pmap_mem_regions))	; Get the top of the region table
-			ori		r8,r8,lo16(EXT(pmap_mem_regions))	; Get the bottom of the region table
-			rlwinm	r11,r11,2,24,29				; Mask index bits and convert to byte offset
-			add		r11,r11,r8					; Point to the bank table
-			lwz		r2,mrPhysTab(r11)			; Get the physical table bank pointer
-			lwz		r11,mrStart(r11)			; Get the start of bank
-			rlwimi	r0,r5,0,mpRb-32,mpCb-32		; Copy in the RC
-			addi	r2,r2,4						; Offset to last half of field
-			stw		r0,mpVAddr+4(r31)			; Set the new RC into the field
-			sub		r11,r10,r11					; Get the index into the table
-			rlwinm	r11,r11,3,0,28				; Get offset to the physent
-
-mMmrgRC:	lwarx	r10,r11,r2					; Get the master RC
-			rlwinm	r0,r5,27,ppRb-32,ppCb-32	; Position the new RC
-			or		r0,r0,r10					; Merge in the new RC
-			stwcx.	r0,r11,r2					; Try to stick it back
-			bne--	mMmrgRC						; Try again if we collided...
-			eieio								; Commit all updates
-
-mMPUnlock:				
-			stw		r6,0(r7)					; Unlock PTEG
-			blr									; Return
-
-;
-;			64-bit version of mapMergeRC
-;
-			.align	5
-
-mapMergeRC64:
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			ld		r5,mpVAddr(r31)				; Grab the virtual address
-			mfsdr1	r7							; Get the pointer to the hash table
-			rldicr	r10,r7,0,45					; Clean up the hash table base
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			srdi	r7,r0,5						; Convert to PCA units
-			rldicr	r7,r7,0,61					; Clean up PCA
-			subfic	r7,r7,-4					; Convert to -4 based negative index
-			mflr	r2							; Save the return
-			add		r7,r10,r7					; Point to the PCA directly
-			beqlr--								; There was no PTE to start with...
-			
-			bl		mapLockPteg					; Lock the PTEG
-			
-			lwz		r0,mpPte(r31)				; Grab the PTE offset again
-			mtlr	r2							; Restore the LR
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			beq--	mMPUnlock					; There is no PTE, someone took it so just unlock and leave...
-
-			rlwinm	r3,r0,0,0,30				; Clear the valid bit
-			add		r3,r3,r10					; Point to the actual PTE
-			ld		r5,8(r3)					; Get the real part
-			srdi	r10,r5,12					; Change physical address to a ppnum
-			b		mMNmerge					; Join the common 32-64-bit code...
-
-
-;
-;			This routine, given a mapping, will find and lock the PTEG
-;			If mpPte does not point to a PTE (checked before and after lock), it will unlock the
-;			PTEG and return.  In this case we will have undefined in R4
-;			and the low 12 bits of mpVAddr valid in R5.  R3 will contain 0.
-;
-;			If the mapping is still valid, we will invalidate the PTE and merge
-;			the RC bits into the physent and also save them into the mapping.
-;
-;			We then return with R3 pointing to the PTE slot, R4 is the
-;			top of the PTE and R5 is the bottom.  R6 contains the PCA.
-;			R7 points to the PCA entry.
-;
-;			Note that we should NEVER be called on a block or special mapping.
-;			We could do many bad things.
-;
-
-			.align	5
-
-mapInvPte32:
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			mfsdr1	r7							; Get the pointer to the hash table
-			lwz		r5,mpVAddr+4(r31)			; Grab the virtual address
-			rlwinm	r10,r7,0,0,15				; Clean up the hash table base
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			srwi	r7,r0,4						; Convert to PCA units
-			rlwinm	r7,r7,0,0,29				; Clean up PCA offset
-			mflr	r2							; Save the return
-			subfic	r7,r7,-4					; Convert to -4 based negative index
-			add		r7,r10,r7					; Point to the PCA directly
-			beqlr--								; There was no PTE to start with...
-			
-			bl		mapLockPteg					; Lock the PTEG
-
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			mtlr	r2							; Restore the LR
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			beq-	mIPUnlock					; There is no PTE, someone took it so just unlock and leave...
-
-			rlwinm	r3,r0,0,0,30				; Clear the valid bit
-			add		r3,r3,r10					; Point to actual PTE
-			lwz		r4,0(r3)					; Get the top of the PTE
-			
-			li		r8,tlbieLock				; Get the TLBIE lock
-			rlwinm	r0,r4,0,1,31				; Clear the valid bit
-			stw		r0,0(r3)					; Invalidate the PTE
-
-			sync								; Make sure everyone sees the invalidate
-			
-mITLBIE32:	lwarx	r0,0,r8						; Get the TLBIE lock 
-			mfsprg	r2,2						; Get feature flags 
-			mr.		r0,r0						; Is it locked? 
-			li		r0,1						; Get our lock word 
-			bne-	mITLBIE32					; It is locked, go wait...
-			
-			stwcx.	r0,0,r8						; Try to get it
-			bne-	mITLBIE32					; We was beat...
-			
-			rlwinm.	r0,r2,0,pfSMPcapb,pfSMPcapb	; Can this be an MP box?
-			li		r0,0						; Lock clear value 
-
-			tlbie	r5							; Invalidate it everywhere 
-			
-			beq-	mINoTS32					; Can not have MP on this machine...
-			
-			eieio								; Make sure that the tlbie happens first 
-			tlbsync								; Wait for everyone to catch up 
-			sync								; Make sure of it all
-			
-mINoTS32:	stw		r0,tlbieLock(0)				; Clear the tlbie lock
-			lwz		r5,4(r3)					; Get the real part
-			srwi	r10,r5,12					; Change physical address to a ppnum
-
-mINmerge:	lbz		r11,mpFlags+1(r31)			; Get the offset to the physical entry table
-			lwz		r0,mpVAddr+4(r31)			; Get the flags part of the field
-			lis		r8,hi16(EXT(pmap_mem_regions))	; Get the top of the region table
-			ori		r8,r8,lo16(EXT(pmap_mem_regions))	; Get the bottom of the region table
-			rlwinm	r11,r11,2,24,29				; Mask index bits and convert to byte offset
-			add		r11,r11,r8					; Point to the bank table
-			lwz		r2,mrPhysTab(r11)			; Get the physical table bank pointer
-			lwz		r11,mrStart(r11)			; Get the start of bank
-			rlwimi	r0,r5,0,mpRb-32,mpCb-32		; Copy in the RC
-			addi	r2,r2,4						; Offset to last half of field
-			stw		r0,mpVAddr+4(r31)			; Set the new RC into the field
-			sub		r11,r10,r11					; Get the index into the table
-			rlwinm	r11,r11,3,0,28				; Get offset to the physent
-
-
-mImrgRC:	lwarx	r10,r11,r2					; Get the master RC
-			rlwinm	r0,r5,27,ppRb-32,ppCb-32	; Position the new RC
-			or		r0,r0,r10					; Merge in the new RC
-			stwcx.	r0,r11,r2					; Try to stick it back
-			bne--	mImrgRC						; Try again if we collided...
-			
-			blr									; Leave with the PCA still locked up...
-
-mIPUnlock:	eieio								; Make sure all updates come first
-				
-			stw		r6,0(r7)					; Unlock
-			blr
-
-;
-;			64-bit version
-;
-			.align	5
-
-mapInvPte64:
-			lwz		r0,mpPte(r31)				; Grab the PTE offset
-			ld		r5,mpVAddr(r31)				; Grab the virtual address
-			mfsdr1	r7							; Get the pointer to the hash table
-			rldicr	r10,r7,0,45					; Clean up the hash table base
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			srdi	r7,r0,5						; Convert to PCA units
-			rldicr	r7,r7,0,61					; Clean up PCA
-			subfic	r7,r7,-4					; Convert to -4 based negative index
-			mflr	r2							; Save the return
-			add		r7,r10,r7					; Point to the PCA directly
-			beqlr--								; There was no PTE to start with...
-			
-			bl		mapLockPteg					; Lock the PTEG
-			
-			lwz		r0,mpPte(r31)				; Grab the PTE offset again
-			mtlr	r2							; Restore the LR
-			andi.	r3,r0,mpHValid				; Is there a possible PTE?
-			beq--	mIPUnlock					; There is no PTE, someone took it so just unlock and leave...
-
-			rlwinm	r3,r0,0,0,30				; Clear the valid bit
-			add		r3,r3,r10					; Point to the actual PTE
-			ld		r4,0(r3)					; Get the top of the PTE
-
-			li		r8,tlbieLock				; Get the TLBIE lock
-			rldicr	r0,r4,0,62					; Clear the valid bit
-			std		r0,0(r3)					; Invalidate the PTE
-			
-			rldicr	r2,r4,16,35					; Shift the AVPN over to match VPN
-			sync								; Make sure everyone sees the invalidate
-			rldimi	r2,r5,0,36					; Cram in the page portion of the EA
-			
-mITLBIE64:	lwarx	r0,0,r8						; Get the TLBIE lock 
-			mr.		r0,r0						; Is it locked? 
-			li		r0,1						; Get our lock word 
-			bne--	mITLBIE64a					; It is locked, toss reservation and wait...
-			
-			stwcx.	r0,0,r8						; Try to get it
-			bne--	mITLBIE64					; We was beat...
-
-			rldicl	r2,r2,0,16					; Clear bits 0:15 because we are under orders
-			
-			li		r0,0						; Lock clear value 
-
-			tlbie	r2							; Invalidate it everywhere 
-
-			eieio								; Make sure that the tlbie happens first 
-			tlbsync								; Wait for everyone to catch up 
-			ptesync								; Wait for quiet again
-
-			stw		r0,tlbieLock(0)				; Clear the tlbie lock
-
-			ld		r5,8(r3)					; Get the real part
-			srdi	r10,r5,12					; Change physical address to a ppnum
-			b		mINmerge					; Join the common 32-64-bit code...
-
-mITLBIE64a:	li		r5,lgKillResv				; Killing field
-			stwcx.	r5,0,r5						; Kill reservation
-			
-mITLBIE64b:	lwz		r0,0(r8)					; Get the TLBIE lock
-			mr.		r0,r0						; Is it locked?
-			beq++	mITLBIE64					; Nope, try again...
-			b		mITLBIE64b					; Yup, wait for it...
-
-;
-;			mapLockPteg - Locks a PTEG
-;			R7 points to PCA entry
-;			R6 contains PCA on return
-;
-;
-
-			.align	5
-			
-mapLockPteg:
-			lwarx	r6,0,r7						; Pick up the PCA
-			rlwinm.	r0,r6,0,PCAlockb,PCAlockb	; Is the PTEG locked?
-			ori		r0,r6,PCAlock				; Set the lock bit
-			bne--	mLSkill						; It is locked...
-			
-			stwcx.	r0,0,r7						; Try to lock the PTEG
-			bne--	mapLockPteg					; We collided...
-			
-			isync								; Nostradamus lied
-			blr									; Leave...
-				
-mLSkill:	li		r6,lgKillResv				; Get killing field
-			stwcx.	r6,0,r6						; Kill it
-
-mapLockPteh:
-			lwz		r6,0(r7)					; Pick up the PCA
-			rlwinm.	r0,r6,0,PCAlockb,PCAlockb	; Is the PTEG locked?
-			beq++	mapLockPteg					; Nope, try again...
-			b		mapLockPteh					; Yes, wait for it...
-			
-
-;
-;			The mapSelSlot function selects a PTEG slot to use. As input, it expects R6 
-;			to contain the PCA.  When it returns, R3 contains 0 if an unoccupied slot was
-;			selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE.
-;			R4 returns the slot index.
-;
-;			CR7 also indicates that we have a block mapping
-;
-;			The PTEG allocation controls are a bit map of the state of the PTEG. 
-;			PCAfree indicates that the PTE slot is empty. 
-;			PCAauto means that it comes from an autogen area.  These
-;			guys do not keep track of reference and change and are actually "wired".
-;			They are easy to maintain. PCAsteal
-;			is a sliding position mask used to "randomize" PTE slot stealing.  All 4 of these
-;			fields fit in a single word and are loaded and stored under control of the
-;			PTEG control area lock (PCAlock).
-;
-;			Note that PCAauto does not contribute to the steal calculations at all.  Originally
-;			it did, autogens were second in priority.  This can result in a pathalogical
-;			case where an instruction can not make forward progress, or one PTE slot
-;			thrashes.
-;
-;			Note that the PCA must be locked when we get here.
-;
-;			Physically, the fields are arranged:
-;				0: PCAfree
-;				1: PCAsteal
-;				2: PCAauto
-;				3: PCAmisc
-;				
-;
-;			At entry, R6 contains new unlocked PCA image (real PCA is locked and untouched)
-;
-;			At exit:
-;
-;			R3 = 0 - no steal
-;			R3 = 1 - steal regular
-;			R3 = 2 - steal autogen
-;			R4 contains slot number
-;			R6 contains updated PCA image
-;
-
-			.align	5
-			
-mapSelSlot:	lis		r10,0						; Clear autogen mask
-			li		r9,0						; Start a mask
-			beq		cr7,mSSnotblk				; Skip if this is not a block mapping
-			ori		r10,r10,lo16(0xFFFF)		; Make sure we mark a block mapping (autogen)
-
-mSSnotblk:	rlwinm	r11,r6,16,24,31				; Isolate just the steal mask
-			oris	r9,r9,0x8000				; Get a mask
-			cntlzw	r4,r6						; Find a slot or steal one
-			ori		r9,r9,lo16(0x8000)			; Insure that we have 0x80008000
-			rlwinm	r4,r4,0,29,31				; Isolate bit position
-			rlwimi	r11,r11,8,16,23				; Get set to march a 1 back into top of 8 bit rotate
-			srw		r2,r9,r4					; Get mask to isolate selected inuse and autogen flags
-			srwi	r11,r11,1					; Slide steal mask right
-			and		r8,r6,r2					; Isolate the old in use and autogen bits
-			andc	r6,r6,r2					; Allocate the slot and also clear autogen flag
-			addi	r0,r8,0x7F00				; Push autogen flag to bit 16
-			and		r2,r2,r10					; Keep the autogen part if autogen
-			addis	r8,r8,0xFF00				; Push in use to bit 0 and invert
-			or		r6,r6,r2					; Add in the new autogen bit 
-			rlwinm	r0,r0,17,31,31				; Get a 1 if the old was autogenned (always 0 if not in use)
-			rlwinm	r8,r8,1,31,31				; Isolate old in use
-			rlwimi	r6,r11,16,8,15				; Stick the new steal slot in
-
-			add		r3,r0,r8					; Get 0 if no steal, 1 if steal normal, 2 if steal autogen			
-			blr									; Leave...
-			
-;
-;			Shared/Exclusive locks
-;
-;			A shared/exclusive lock allows multiple shares of a lock to be taken
-;			but only one exclusive.  A shared lock can be "promoted" to exclusive
-;			when it is the only share.  If there are multiple sharers, the lock
-;			must be "converted".  A promotion drops the share and gains exclusive as
-;			an atomic operation.  If anyone else has a share, the operation fails.
-;			A conversion first drops the share and then takes an exclusive lock.
-;
-;			We will want to add a timeout to this eventually.
-;
-;			R3 is set to 0 for success, non-zero for failure
-;
-
-;
-;			Convert a share into an exclusive
-;
-
-			.align	5
-			
-sxlkConvert:
-
-			lis		r0,0x8000					; Get the locked lock image
-#if 0
-			mflr	r0							; (TEST/DEBUG)
-			oris	r0,r0,0x8000				; (TEST/DEBUG)
-#endif
-		
-sxlkCTry:	lwarx	r2,0,r3						; Get the lock word
-			cmplwi	r2,1						; Does it just have our share?
-			subi	r2,r2,1						; Drop our share in case we do not get it
-			bne--	sxlkCnotfree				; No, we need to unlock...
-			stwcx.	r0,0,r3						; Try to take it exclusively
-			bne--	sxlkCTry					; Collision, try again...
-			
-			isync
-			li		r3,0						; Set RC
-			blr									; Leave...
-
-sxlkCnotfree:
-			stwcx.	r2,0,r3						; Try to drop our share...	
-			bne--	sxlkCTry					; Try again if we collided...
-			b		sxlkExclusive				; Go take it exclusively...
-
-;
-;			Promote shared to exclusive
-;
-
-			.align	5
-			
-sxlkPromote:
-			lis		r0,0x8000					; Get the locked lock image
-#if 0
-			mflr	r0							; (TEST/DEBUG)
-			oris	r0,r0,0x8000				; (TEST/DEBUG)
-#endif
-		
-sxlkPTry:	lwarx	r2,0,r3						; Get the lock word
-			cmplwi	r2,1						; Does it just have our share?
-			bne--	sxlkPkill					; No, just fail (R3 is non-zero)...
-			stwcx.	r0,0,r3						; Try to take it exclusively
-			bne--	sxlkPTry					; Collision, try again...
-			
-			isync
-			li		r3,0						; Set RC
-			blr									; Leave...
-
-sxlkPkill:	li		r2,lgKillResv				; Point to killing field
-			stwcx.	r2,0,r2						; Kill reservation
-			blr									; Leave
-
-
-
-;
-;			Take lock exclusivily
-;
-
-			.align	5
-			
-sxlkExclusive:
-			lis		r0,0x8000					; Get the locked lock image
-#if 0
-			mflr	r0							; (TEST/DEBUG)
-			oris	r0,r0,0x8000				; (TEST/DEBUG)
-#endif
-		
-sxlkXTry:	lwarx	r2,0,r3						; Get the lock word
-			mr.		r2,r2						; Is it locked?
-			bne--	sxlkXWait					; Yes...
-			stwcx.	r0,0,r3						; Try to take it
-			bne--	sxlkXTry					; Collision, try again...
-			
-			isync								; Toss anything younger than us
-			li		r3,0						; Set RC
-			blr									; Leave...
-			
-			.align	5
-
-sxlkXWait:	li		r2,lgKillResv				; Point to killing field
-			stwcx.	r2,0,r2						; Kill reservation
-			
-sxlkXWaiu:	lwz		r2,0(r3)					; Get the lock again
-			mr.		r2,r2						; Is it free yet?
-			beq++	sxlkXTry					; Yup...
-			b		sxlkXWaiu					; Hang around a bit more...
-
-;
-;			Take a share of the lock
-;
-
-			.align	5
-			
-sxlkShared:	lwarx	r2,0,r3						; Get the lock word
-			rlwinm.	r0,r2,0,0,0					; Is it locked exclusively?
-			addi	r2,r2,1						; Up the share count
-			bne--	sxlkSWait					; Yes...
-			stwcx.	r2,0,r3						; Try to take it
-			bne--	sxlkShared					; Collision, try again...
-			
-			isync								; Toss anything younger than us
-			li		r3,0						; Set RC
-			blr									; Leave...
-			
-			.align	5
-
-sxlkSWait:	li		r2,lgKillResv				; Point to killing field
-			stwcx.	r2,0,r2						; Kill reservation
-
-sxlkSWaiu:	lwz		r2,0(r3)					; Get the lock again
-			rlwinm.	r0,r2,0,0,0					; Is it locked exclusively?
-			beq++	sxlkShared					; Nope...
-			b		sxlkSWaiu					; Hang around a bit more...
-
-;
-;			Unlock either exclusive or shared.
-;
-
-			.align	5
-			
-sxlkUnlock:	eieio								; Make sure we order our stores out
-		
-sxlkUnTry:	lwarx	r2,0,r3						; Get the lock
-			rlwinm.	r0,r2,0,0,0					; Do we hold it exclusively?
-			subi	r2,r2,1						; Remove our share if we have one
-			li		r0,0						; Clear this
-			bne--	sxlkUExclu					; We hold exclusive...
-			
-			stwcx.	r2,0,r3						; Try to lose our share
-			bne--	sxlkUnTry					; Collision...
-			blr									; Leave...
-			
-sxlkUExclu:	stwcx.	r0,0,r3						; Unlock and release reservation
-			beqlr++								; Leave if ok...
-			b		sxlkUnTry					; Could not store, try over...	
-			
-
-			.align	5
-			.globl	EXT(fillPage)
-
-LEXT(fillPage)
-
- 			mfsprg	r0,2						; Get feature flags 
-			mtcrf	0x02,r0						; move pf64Bit to cr
-
-			rlwinm	r4,r4,0,1,0					; Copy fill to top of 64-bit register
-			lis		r2,0x0200					; Get vec
-			mr		r6,r4						; Copy
-			ori		r2,r2,0x2000				; Get FP
-			mr		r7,r4						; Copy
-			mfmsr	r5							; Get MSR
-			mr		r8,r4						; Copy
-			andc	r5,r5,r2					; Clear out permanent turn-offs
-			mr		r9,r4						; Copy
-			ori		r2,r2,0x8030				; Clear IR, DR and EE
-			mr		r10,r4						; Copy
-			andc	r0,r5,r2					; Kill them
-			mr		r11,r4						; Copy
-			mr		r12,r4						; Copy
-			bt++	pf64Bitb,fpSF1				; skip if 64-bit (only they take the hint)
-			
-			slwi	r3,r3,12					; Make into a physical address
-			mtmsr	r2							; Interrupts and translation off
-			isync
-			
-			li		r2,4096/32					; Get number of cache lines
-			
-fp32again:	dcbz	0,r3						; Clear
-			addic.	r2,r2,-1					; Count down
-			stw		r4,0(r3)					; Fill
-			stw		r6,4(r3)					; Fill
-			stw		r7,8(r3)					; Fill
-			stw		r8,12(r3)					; Fill
-			stw		r9,16(r3)					; Fill
-			stw		r10,20(r3)					; Fill
-			stw		r11,24(r3)					; Fill
-			stw		r12,28(r3)					; Fill
-			addi	r3,r3,32					; Point next
-			bgt+	fp32again					; Keep going
-
-			mtmsr	r5							; Restore all
-			isync
-			blr									; Return...
-			
-			.align	5
-			
-fpSF1:		li		r2,1
-			sldi	r2,r2,63					; Get 64-bit bit
-			or		r0,r0,r2					; Turn on 64-bit
-			sldi	r3,r3,12					; Make into a physical address
-
-			mtmsrd	r0							; Interrupts and translation off
-			isync
-			
-			li		r2,4096/128					; Get number of cache lines
-						
-fp64again:	dcbz128	0,r3						; Clear
-			addic.	r2,r2,-1					; Count down
-			std		r4,0(r3)					; Fill
-			std		r6,8(r3)					; Fill
-			std		r7,16(r3)					; Fill
-			std		r8,24(r3)					; Fill
-			std		r9,32(r3)					; Fill
-			std		r10,40(r3)					; Fill
-			std		r11,48(r3)					; Fill
-			std		r12,56(r3)					; Fill
-			std		r4,64+0(r3)					; Fill
-			std		r6,64+8(r3)					; Fill
-			std		r7,64+16(r3)				; Fill
-			std		r8,64+24(r3)				; Fill
-			std		r9,64+32(r3)				; Fill
-			std		r10,64+40(r3)				; Fill
-			std		r11,64+48(r3)				; Fill
-			std		r12,64+56(r3)				; Fill
-			addi	r3,r3,128					; Point next
-			bgt+	fp64again					; Keep going
-
-			mtmsrd	r5							; Restore all
-			isync
-			blr									; Return...
-			
-			.align	5
-			.globl	EXT(mapLog)
-
-LEXT(mapLog)
-
-			mfmsr	r12
-			lis		r11,hi16(EXT(mapdebug))
-			ori		r11,r11,lo16(EXT(mapdebug))
-			lwz		r10,0(r11)
-			mr.		r10,r10
-			bne++	mLxx
-			mr		r10,r3
-mLxx:		rlwinm	r0,r12,0,MSR_DR_BIT+1,MSR_DR_BIT-1
-			mtmsr	r0
-			isync
-			stw		r4,0(r10)
-			stw		r4,4(r10)
-			stw		r5,8(r10)
-			stw		r6,12(r10)
-			mtmsr	r12
-			isync
-			addi	r10,r10,16
-			stw		r10,0(r11)
-			blr
-			
-#if 1
-			.align	5
-			.globl	EXT(checkBogus)
-
-LEXT(checkBogus)
-
-			BREAKPOINT_TRAP
-			blr									; No-op normally
-			
-#endif						
-
-
-
-
diff --git a/osfmk/ppc/instrumentation.h b/osfmk/ppc/instrumentation.h
deleted file mode 100644
index 536d8aa59..000000000
--- a/osfmk/ppc/instrumentation.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-/*
- * Here be the instrumentaion page layout
- * Lovingly crafted by Bill Angell using traditional methods
-*/
-
-#ifndef _INSTRUMENTATION_H_
-#define _INSTRUMENTATION_H_
-
-#define INTRUMENTATION 1
-
-
-#define inBase 0x6000
-
-#define inEntry 0
-#define inAtGetTb 1
-#define inBeforeTrace 2
-#define inAfterSAAlloc 3
-#define inBeforeFilter 4
-#define inEatRuptQfret 5
-#define inEatRuptSAfree 6
-#define inPassupSwtchSeg 7
-#define inExceptionExit 8
-#define inMiddleOfSC 9
-#define inEatRuptSwtchSeg 10
-#define inPassup 11
-#define inCopyout 12
-#define inMUASbefore 13
-#define inMUAS
-
-#endif /* _INSTRUMENTATION_H_ */
diff --git a/osfmk/ppc/interrupt.c b/osfmk/ppc/interrupt.c
deleted file mode 100644
index e1be2769d..000000000
--- a/osfmk/ppc/interrupt.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-#include <kern/misc_protos.h>
-#include <kern/assert.h>
-#include <kern/thread.h>
-#include <kern/counters.h>
-#include <kern/etimer.h>
-#include <kern/pms.h>
-#include <ppc/misc_protos.h>
-#include <ppc/trap.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-#include <ppc/vmachmon.h>
-#include <ppc/machine_cpu.h>
-#include <pexpert/pexpert.h>
-#include <sys/kdebug.h>
-
-volatile perfCallback perfIntHook;						/* Pointer to CHUD trap hook routine */
-
-#if CONFIG_DTRACE
-#if (DEVELOPMENT || DEBUG )
-#include <mach/sdt.h>
-#endif
-
-extern vm_offset_t dtrace_get_cpu_int_stack_top(void);
-
-vm_offset_t dtrace_get_cpu_int_stack_top(void)
-{
-	return getPerProc()->intstack_top_ss;
-}
-
-/* See <rdar://problem/4613924> */
-perfCallback tempDTraceIntHook; /* Pointer to DTrace fbt int handler */
-#endif
-
-void unresolved_kernel_trap(int trapno,
-				   struct savearea *ssp,
-				   unsigned int dsisr,
-				   addr64_t dar,
-				   const char *message);
-
-unsigned int isync_mfdec(void);
-
-struct savearea * interrupt(
-        int type,
-        struct savearea *ssp,
-	unsigned int dsisr,
-	unsigned int dar)
-{
-	int	current_cpu;
-	struct per_proc_info	*proc_info;
-	uint64_t		now;
-	thread_t		thread;
-
-	disable_preemption();
-
-	perfCallback fn = perfIntHook;
-	if(fn) {							/* Is there a hook? */
-		if(fn(type, ssp, dsisr, dar) == KERN_SUCCESS) return ssp;	/* If it succeeds, we are done... */
-	}
-	
-#if CONFIG_DTRACE
-	if(tempDTraceIntHook) {							/* Is there a hook? */
-		if(tempDTraceIntHook(type, ssp, dsisr, dar) == KERN_SUCCESS) return ssp;	/* If it succeeds, we are done... */
-	}
-#endif
-
-#if 0
-	{
-		extern void fctx_text(void);
-		fctx_test();
-	}
-#endif
-
-
-	current_cpu = cpu_number();
-	proc_info = getPerProc();
-
-	switch (type) {
-
-		case T_DECREMENTER:
-			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE,
-				  isync_mfdec(), (unsigned int)ssp->save_srr0, 0, 0, 0);
-	
-			now = mach_absolute_time();				/* Find out what time it is */
-			
-			if(now >= proc_info->pms.pmsPop) {		/* Is it time for power management state change? */
-				pmsStep(1);							/* Yes, advance step */
-				now = mach_absolute_time();			/* Get the time again since we ran a bit */
-			}
-
-			thread = current_thread();					/* Find ourselves */
-			if(thread->machine.qactTimer != 0) {	/* Is the timer set? */
-				if (thread->machine.qactTimer <= now) {	/* It is set, has it popped? */
-					thread->machine.qactTimer = 0;		/* Clear single shot timer */
-					if((unsigned int)thread->machine.vmmControl & 0xFFFFFFFE) {	/* Are there any virtual machines? */
-						vmm_timer_pop(thread);			/* Yes, check out them out... */
-					}
-				}
-			}
-
-			etimer_intr(USER_MODE(ssp->save_srr1), ssp->save_srr0);	/* Handle event timer */
-			break;
-	
-		case T_INTERRUPT:
-			/* Call the platform interrupt routine */
-			counter(c_incoming_interrupts++);
-	
-			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
-			   current_cpu, (unsigned int)ssp->save_srr0, 0, 0, 0);
-	
-#if CONFIG_DTRACE && (DEVELOPMENT || DEBUG )
-			DTRACE_INT5(interrupt__start, void *, proc_info->interrupt_nub, int, proc_info->interrupt_source, 
-						void *, proc_info->interrupt_target, IOInterruptHandler, proc_info->interrupt_handler,
-						void *, proc_info->interrupt_refCon);
-#endif
-
-			proc_info->interrupt_handler(
-				proc_info->interrupt_target, 
-				proc_info->interrupt_refCon,
-				proc_info->interrupt_nub, 
-				proc_info->interrupt_source);
-	
-#if CONFIG_DTRACE && (DEVELOPMENT || DEBUG )
-			DTRACE_INT5(interrupt__complete, void *, proc_info->interrupt_nub, int, proc_info->interrupt_source, 
-						void *, proc_info->interrupt_target, IOInterruptHandler, proc_info->interrupt_handler,
-						void *, proc_info->interrupt_refCon);
-#endif
-
-			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END,
-			   0, 0, 0, 0, 0);
-	
-			break;
-	
-		case T_SIGP:
-			/* Did the other processor signal us? */ 
-			cpu_signal_handler();
-			break;
-	
-		case T_SHUTDOWN:
-			cpu_doshutdown();
-			panic("returning from cpu_doshutdown()\n");
-			break;
-	
-				
-		default:
-			if (!Call_Debugger(type, ssp))
-				unresolved_kernel_trap(type, ssp, dsisr, dar, NULL);
-			break;
-	}
-
-	enable_preemption();
-	return ssp;
-}
diff --git a/osfmk/ppc/io_map.c b/osfmk/ppc/io_map.c
deleted file mode 100644
index e30d357ce..000000000
--- a/osfmk/ppc/io_map.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#include <debug.h>
-#include <mach/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <ppc/pmap.h>
-#include <ppc/io_map_entries.h>
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#include <ppc/proc_reg.h>
-
-extern vm_offset_t	virtual_avail;
-
-/*
- * Allocate and map memory for devices that may need to be mapped 
- * outside the usual physical memory. If phys_addr is NULL then
- * steal the appropriate number of physical pages from the vm
- * system and map them.
- *
- * Note, this will onl
- */
-vm_offset_t
-io_map(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
-{
-	vm_offset_t	start;
-	vm_size_t	i;
-	unsigned int    mflags;
-	vm_page_t 	m;
-
-	mflags = mmFlgBlock | mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-
-#if DEBUG
-	assert (kernel_map != VM_MAP_NULL);			/* VM must be initialised */
-#endif
-
-	if (phys_addr != 0) {						/* If they supplied a physical address, use it */
-
-		size = round_page(size + (phys_addr & PAGE_MASK));	/* Make sure we map all of it */
-
-		(void) kmem_alloc_pageable(kernel_map, &start, size);	/* Get some virtual addresses to use */
-		
-		(void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), 
-			mflags,					/* Map with requested cache mode */
-			(size >> 12), VM_PROT_READ|VM_PROT_WRITE);
-
-		return (start + (phys_addr & PAGE_MASK));	/* Pass back the virtual address */
-	
-	} else {
-	
-		(void) kmem_alloc_pageable(kernel_map, &start, size);	/* Get some virtual addresses */
-
-		mapping_prealloc(size);					/* Make sure there are enough free mappings */
-
-		for (i = 0; i < size ; i += PAGE_SIZE) {
-			m = VM_PAGE_NULL;
-			while ((m = vm_page_grab()) == VM_PAGE_NULL) {	/* Get a physical page */
-				VM_PAGE_WAIT();					/* Wait if we didn't have one */
-			}
-			vm_page_gobble(m);
-			
-			(void)mapping_make(kernel_pmap, 
-				(addr64_t)(start + i), m->phys_page, 
-				mflags,					/* Map with requested cache mode */
-				1, VM_PROT_READ|VM_PROT_WRITE);	
-			
-		}
-
-		mapping_relpre();						/* Allow mapping release */
-		return start;
-	}
-}
-
-
-/*
- * Allocate and map memory for devices before the VM system comes alive.
- */
-
-vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
-{
-	vm_offset_t	start;
-	unsigned int    mflags;
-
-	if(kernel_map != VM_MAP_NULL) {				/* If VM system is up, redirect to normal routine */
-		
-		return io_map(phys_addr, size, flags);			/* Map the address */
-	
-	}
-
-	mflags = mmFlgBlock | mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-	
-	size = round_page(size + (phys_addr - (phys_addr & -PAGE_SIZE)));	/* Extend the length to include it all */
-	start = pmap_boot_map(size);				/* Get me some virtual address */
-
-	(void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), 
-		mflags,					/* Map with requested cache mode */
-		(size >> 12), VM_PROT_READ|VM_PROT_WRITE);
-
-	return (start + (phys_addr & PAGE_MASK));
-}
diff --git a/osfmk/ppc/io_map_entries.h b/osfmk/ppc/io_map_entries.h
deleted file mode 100644
index 8fceaaf86..000000000
--- a/osfmk/ppc/io_map_entries.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-#ifdef	KERNEL_PRIVATE
-
-#ifndef	_PPC_IO_MAP_ENTRIES_H_
-#define	_PPC_IO_MAP_ENTRIES_H_
-
-extern vm_offset_t	io_map(
-				vm_map_offset_t		phys_addr,
-				vm_size_t		size,
-				unsigned int            flags);
-extern vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags);
-
-#endif	/* _PPC_IO_MAP_ENTRIES_H_ */
-
-#endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/lock.h b/osfmk/ppc/lock.h
deleted file mode 100644
index 0628f554f..000000000
--- a/osfmk/ppc/lock.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * Copyright (C) 1998 Apple Computer
- * All Rights Reserved
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#ifdef	KERNEL_PRIVATE
-
-#ifndef	_PPC_LOCK_H_
-#define	_PPC_LOCK_H_
-
-#ifdef  MACH_KERNEL_PRIVATE
-
-#include <kern/macro_help.h>
-#include <kern/assert.h>
-#include <mach_ldebug.h>
-#include <ppc/locks.h>
-
-#if     !MACH_LDEBUG
-typedef	lck_rw_t	lock_t;
-#else
-typedef	lck_rw_ext_t	lock_t;
-#endif	/* !MACH_LDEBUG */
-
-extern unsigned int LockTimeOut;			/* Number of hardware ticks of a lock timeout */
-
-#endif	/* MACH_KERNEL_PRIVATE */
-
-#endif	/* _PPC_LOCK_H_ */
-
-#endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/locks.h b/osfmk/ppc/locks.h
deleted file mode 100644
index 639a820a8..000000000
--- a/osfmk/ppc/locks.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#ifndef	_PPC_LOCKS_H_
-#define	_PPC_LOCKS_H_
-
-#include <kern/kern_types.h>
-#ifdef	MACH_KERNEL_PRIVATE
-#include <ppc/hw_lock_types.h>
-#endif
-
-
-#ifdef	MACH_KERNEL_PRIVATE
-
-extern	unsigned int	LcksOpts;
-
-#define enaLkDeb		0x00000001	/* Request debug in default attribute */
-#define enaLkStat		0x00000002	/* Request statistic in default attribute */
-
-#define disLkType		0x80000000	/* Disable type checking */
-#define disLktypeb		0
-#define disLkThread		0x40000000	/* Disable ownership checking */
-#define disLkThreadb	1
-#define enaLkExtStck	0x20000000	/* Enable extended backtrace */
-#define enaLkExtStckb	2
-#define disLkMyLck		0x10000000	/* Disable recursive lock dectection */
-#define disLkMyLckb		3
-
-#endif
-
-#ifdef	MACH_KERNEL_PRIVATE
-typedef struct {
-	unsigned int		interlock;
-	unsigned int		lck_spin_pad4[2];
-} lck_spin_t;
-
-#define	LCK_SPIN_TAG_DESTROYED		0x00002007	/* lock marked as Destroyed */
-
-#else
-#ifdef	KERNEL_PRIVATE
-typedef struct {
-	unsigned int   		 opaque[3];
-} lck_spin_t;
-#else
-typedef struct __lck_spin_t__	lck_spin_t;
-#endif
-#endif
-
-#ifdef	MACH_KERNEL_PRIVATE
-typedef struct _lck_mtx_ {
-	union {
-		struct {
-			unsigned int			lck_mtxd_data;
-			unsigned short			lck_mtxd_waiters;
-			unsigned short			lck_mtxd_pri;
-			unsigned int			lck_mtxd_pad8;
-		} lck_mtxd;
-		struct {
-			unsigned int			lck_mtxi_tag;
-			struct _lck_mtx_ext_	*lck_mtxi_ptr;
-			unsigned int			lck_mtxi_pad8;
-		} lck_mtxi;
-	} lck_mtx_sw; 
-} lck_mtx_t;
-
-#define	lck_mtx_data	lck_mtx_sw.lck_mtxd.lck_mtxd_data
-#define	lck_mtx_waiters	lck_mtx_sw.lck_mtxd.lck_mtxd_waiters
-#define	lck_mtx_pri		lck_mtx_sw.lck_mtxd.lck_mtxd_pri
-
-#define lck_mtx_tag	lck_mtx_sw.lck_mtxi.lck_mtxi_tag
-#define lck_mtx_ptr		lck_mtx_sw.lck_mtxi.lck_mtxi_ptr
-
-#define	LCK_MTX_TAG_INDIRECT			0x00001007	/* lock marked as Indirect  */
-#define	LCK_MTX_TAG_DESTROYED			0x00002007	/* lock marked as Destroyed */
-
-#define	LCK_FRAMES_MAX	8
-
-typedef struct {
-	unsigned int		type;
-	vm_offset_t			stack[LCK_FRAMES_MAX];
-	vm_offset_t			thread;
-} lck_mtx_deb_t;
-
-#define MUTEX_TAG       0x4d4d
-
-typedef struct {
-	unsigned int		lck_mtx_stat_data;
-} lck_mtx_stat_t;
-
-typedef struct _lck_mtx_ext_ {
-	lck_mtx_t		lck_mtx;
-	struct _lck_grp_	*lck_mtx_grp;
-	unsigned int		lck_mtx_attr;
-	lck_mtx_deb_t		lck_mtx_deb;
-	/* Unused on PowerPC */
-	lck_mtx_stat_t		lck_mtx_stat;
-} lck_mtx_ext_t;
-
-#define	LCK_MTX_ATTR_DEBUG	0x1
-#define	LCK_MTX_ATTR_DEBUGb	31
-#define	LCK_MTX_ATTR_STAT	0x2
-#define	LCK_MTX_ATTR_STATb	30
-
-#else
-#ifdef	KERNEL_PRIVATE
-typedef struct {
-    unsigned int   		 opaque[3];
-} lck_mtx_t;
-
-typedef struct {
-    unsigned int   		 opaque[16];
-} lck_mtx_ext_t;
-#else
-typedef struct __lck_mtx_t__		lck_mtx_t;
-typedef struct __lck_mtx_ext_t__	lck_mtx_ext_t;
-#endif
-#endif
-
-#ifdef	MACH_KERNEL_PRIVATE
-typedef struct {
-	union {
-		struct {
-			unsigned int			lck_rwd_shared_cnt:16,	/* No. of shared granted request */
-									lck_rwd_priv_excl:1,	/* priority for Writer */
-									lck_rwd_pad17:11,		/* padding */
-									lck_rwd_want_excl:1,	/* Writer is waiting, or locked for write */
-									lck_rwd_want_upgrade:1,	/* Read-to-write upgrade waiting */
-									lck_rwd_waiting:1,		/* Someone is sleeping on lock */
-									lck_rwd_interlock:1;	/* Read-to-write upgrade waiting */
-			unsigned int			lck_rwd_pad4;
-			unsigned int			lck_rwd_pad8;
-		} lck_rwd;
-		struct {
-			unsigned int			lck_rwi_tag;
-			struct _lck_rw_ext_	*lck_rwi_ptr;
-			unsigned int			lck_rwi_pad8;
-		} lck_rwi;
-	} lck_rw_sw; 
-} lck_rw_t;
-
-#define	lck_rw_interlock		lck_rw_sw.lck_rwd.lck_rwd_interlock
-#define	lck_rw_want_upgrade		lck_rw_sw.lck_rwd.lck_rwd_want_upgrade
-#define	lck_rw_want_excl		lck_rw_sw.lck_rwd.lck_rwd_want_excl
-#define	lck_rw_waiting			lck_rw_sw.lck_rwd.lck_rwd_waiting
-#define	lck_rw_priv_excl		lck_rw_sw.lck_rwd.lck_rwd_priv_excl
-#define	lck_rw_shared_cnt		lck_rw_sw.lck_rwd.lck_rwd_shared_cnt
-
-#define lck_rw_tag				lck_rw_sw.lck_rwi.lck_rwi_tag
-#define lck_rw_ptr				lck_rw_sw.lck_rwi.lck_rwi_ptr
-
-typedef struct {
-	unsigned int		type;
-	vm_offset_t			stack[LCK_FRAMES_MAX];
-	thread_t			thread;
-	void 				(*pc_excl)(void);
-	void 				(*pc_done)(void);
-} lck_rw_deb_t;
-
-#define RW_TAG       0x5d5d
-
-typedef struct {
-	unsigned int		lck_rw_stat_data;
-} lck_rw_stat_t;
-
-typedef struct _lck_rw_ext_ {
-	lck_rw_t		lck_rw;
-	struct _lck_grp_	*lck_rw_grp;
-	unsigned int		lck_rw_attr;
-	lck_rw_deb_t		lck_rw_deb;
-	lck_rw_stat_t		lck_rw_stat;
-} lck_rw_ext_t;
-
-#define	LCK_RW_ATTR_DEBUG	0x1
-#define	LCK_RW_ATTR_DEBUGb	31
-#define	LCK_RW_ATTR_STAT	0x2
-#define	LCK_RW_ATTR_STATb	30
-#define	LCK_RW_ATTR_DIS_THREAD	0x40000000
-#define	LCK_RW_ATTR_DIS_THREADb	1
-#define	LCK_RW_ATTR_DIS_MYLOCK	0x10000000
-#define	LCK_RW_ATTR_DIS_MYLOCKb	3
-
-#define	LCK_RW_TAG_INDIRECT			0x00001107	/* lock marked as Indirect  */
-#define	LCK_RW_TAG_DESTROYED		0x00002107	/* lock marked as Destroyed */
-
-#else
-#ifdef	KERNEL_PRIVATE
-typedef struct {
-    unsigned int   		 opaque[3];
-} lck_rw_t;
-#else
-typedef	struct __lck_rw_t__	lck_rw_t;
-#endif
-#endif
-
-#endif	/* _PPC_LOCKS_H_ */
diff --git a/osfmk/ppc/locks_ppc.c b/osfmk/ppc/locks_ppc.c
deleted file mode 100644
index c734043f5..000000000
--- a/osfmk/ppc/locks_ppc.c
+++ /dev/null
@@ -1,2360 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- *	File:	kern/lock.c
- *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
- *	Date:	1985
- *
- *	Locking primitives implementation
- */
-
-#include <mach_kdb.h>
-#include <mach_ldebug.h>
-
-#include <kern/kalloc.h>
-#include <kern/lock.h>
-#include <kern/locks.h>
-#include <kern/misc_protos.h>
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <kern/sched_prim.h>
-#include <kern/xpr.h>
-#include <kern/debug.h>
-#include <string.h>
-
-#if	MACH_KDB
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_print.h>
-#endif	/* MACH_KDB */
-
-#ifdef __ppc__
-#include <ppc/Firmware.h>
-#endif
-
-#include <sys/kdebug.h>
-
-/*
- * We need only enough declarations from the BSD-side to be able to
- * test if our probe is active, and to call __dtrace_probe().  Setting
- * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
- *
- * Note that if CONFIG_DTRACE is off, the include file below stubs out
- * the code hooks here.
- */
-#if	CONFIG_DTRACE
-#define NEED_DTRACE_DEFS
-#include <../bsd/sys/lockstat.h>
-#endif
-
-#define	LCK_RW_LCK_EXCLUSIVE_CODE	0x100
-#define	LCK_RW_LCK_EXCLUSIVE1_CODE	0x101
-#define	LCK_RW_LCK_SHARED_CODE		0x102
-#define	LCK_RW_LCK_SH_TO_EX_CODE	0x103
-#define	LCK_RW_LCK_SH_TO_EX1_CODE	0x104
-#define	LCK_RW_LCK_EX_TO_SH_CODE	0x105
-
-
-#define	ANY_LOCK_DEBUG	(USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
-
-unsigned int lock_wait_time[2] = { (unsigned int)-1, 0 } ;
-
-/* Forwards */
-
-
-#if	USLOCK_DEBUG
-/*
- *	Perform simple lock checks.
- */
-int	uslock_check = 1;
-int	max_lock_loops	= 100000000;
-decl_simple_lock_data(extern , printf_lock)
-decl_simple_lock_data(extern , panic_lock)
-#if	MACH_KDB
-decl_simple_lock_data(extern , kdb_lock)
-#endif	/* MACH_KDB */
-#endif	/* USLOCK_DEBUG */
-
-
-/*
- *	We often want to know the addresses of the callers
- *	of the various lock routines.  However, this information
- *	is only used for debugging and statistics.
- */
-typedef void	*pc_t;
-#define	INVALID_PC	((void *) VM_MAX_KERNEL_ADDRESS)
-#define	INVALID_THREAD	((void *) VM_MAX_KERNEL_ADDRESS)
-#if	ANY_LOCK_DEBUG
-#define	OBTAIN_PC(pc,l)	((pc) = (void *) GET_RETURN_PC(&(l)))
-#else	/* ANY_LOCK_DEBUG */
-#ifdef	lint
-/*
- *	Eliminate lint complaints about unused local pc variables.
- */
-#define	OBTAIN_PC(pc,l)	++pc
-#else	/* lint */
-#define	OBTAIN_PC(pc,l)
-#endif	/* lint */
-#endif	/* USLOCK_DEBUG */
-
-
-/*
- *	Portable lock package implementation of usimple_locks.
- */
-
-#if	USLOCK_DEBUG
-#define	USLDBG(stmt)	stmt
-void		usld_lock_init(usimple_lock_t, unsigned short);
-void		usld_lock_pre(usimple_lock_t, pc_t);
-void		usld_lock_post(usimple_lock_t, pc_t);
-void		usld_unlock(usimple_lock_t, pc_t);
-void		usld_lock_try_pre(usimple_lock_t, pc_t);
-void		usld_lock_try_post(usimple_lock_t, pc_t);
-int		usld_lock_common_checks(usimple_lock_t, const char *);
-#else	/* USLOCK_DEBUG */
-#define	USLDBG(stmt)
-#endif	/* USLOCK_DEBUG */
-
-/*
- *      Routine:        lck_spin_alloc_init
- */
-lck_spin_t *
-lck_spin_alloc_init(
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-	lck_spin_t	*lck;
-
-	if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0)
-		lck_spin_init(lck, grp, attr);
-		
-	return(lck);
-}
-
-/*
- *      Routine:        lck_spin_free
- */
-void
-lck_spin_free(
-	lck_spin_t	*lck,
-	lck_grp_t	*grp) {
-	lck_spin_destroy(lck, grp);
-	kfree((void *)lck, sizeof(lck_spin_t));
-}
-
-/*
- *      Routine:        lck_spin_init
- */
-void
-lck_spin_init(
-	lck_spin_t		*lck,
-	lck_grp_t		*grp,
-	__unused lck_attr_t	*attr) {
-
-	lck->interlock = 0;
-	lck_grp_reference(grp);
-	lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
-}
-
-/*
- *      Routine:        lck_spin_destroy
- */
-void
-lck_spin_destroy(
-	lck_spin_t	*lck,
-	lck_grp_t	*grp) {
-	if (lck->interlock == LCK_SPIN_TAG_DESTROYED)
-		return;
-	lck->interlock = LCK_SPIN_TAG_DESTROYED;
-	lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
-	lck_grp_deallocate(grp);
-}
-
-/*
- *	Initialize a usimple_lock.
- *
- *	No change in preemption state.
- */
-void
-usimple_lock_init(
-	usimple_lock_t	l,
-	unsigned short	tag)
-{
-#ifndef	MACHINE_SIMPLE_LOCK
-	USLDBG(usld_lock_init(l, tag));
-	hw_lock_init(&l->interlock);
-#else
-	simple_lock_init((simple_lock_t)l,tag);
-#endif
-}
-
-
-/*
- *	Acquire a usimple_lock.
- *
- *	Returns with preemption disabled.  Note
- *	that the hw_lock routines are responsible for
- *	maintaining preemption state.
- */
-void
-usimple_lock(
-	usimple_lock_t	l)
-{
-#ifndef	MACHINE_SIMPLE_LOCK
-	pc_t		pc;
-
-	OBTAIN_PC(pc, l);
-	USLDBG(usld_lock_pre(l, pc));
-
-	if(!hw_lock_to(&l->interlock, LockTimeOut))	/* Try to get the lock with a timeout */ 
-		panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", l, cpu_number(), pc);
-
-	USLDBG(usld_lock_post(l, pc));
-#else
-	simple_lock((simple_lock_t)l);
-#endif
-}
-
-
-/*
- *	Release a usimple_lock.
- *
- *	Returns with preemption enabled.  Note
- *	that the hw_lock routines are responsible for
- *	maintaining preemption state.
- */
-void
-usimple_unlock(
-	usimple_lock_t	l)
-{
-#ifndef	MACHINE_SIMPLE_LOCK
-	pc_t	pc;
-
-	OBTAIN_PC(pc, l);
-	USLDBG(usld_unlock(l, pc));
-	sync();
-	hw_lock_unlock(&l->interlock);
-#else
-	simple_unlock_rwmb((simple_lock_t)l);
-#endif
-}
-
-
-/*
- *	Conditionally acquire a usimple_lock.
- *
- *	On success, returns with preemption disabled.
- *	On failure, returns with preemption in the same state
- *	as when first invoked.  Note that the hw_lock routines
- *	are responsible for maintaining preemption state.
- *
- *	XXX No stats are gathered on a miss; I preserved this
- *	behavior from the original assembly-language code, but
- *	doesn't it make sense to log misses?  XXX
- */
-unsigned int
-usimple_lock_try(
-	usimple_lock_t	l)
-{
-#ifndef	MACHINE_SIMPLE_LOCK
-	pc_t		pc;
-	unsigned int	success;
-
-	OBTAIN_PC(pc, l);
-	USLDBG(usld_lock_try_pre(l, pc));
-	success = hw_lock_try(&l->interlock);
-	if (success)
-		USLDBG(usld_lock_try_post(l, pc));
-	return success;
-#else
-	return(simple_lock_try((simple_lock_t)l));
-#endif
-}
-
-#if	USLOCK_DEBUG
-/*
- *	States of a usimple_lock.  The default when initializing
- *	a usimple_lock is setting it up for debug checking.
- */
-#define	USLOCK_CHECKED		0x0001		/* lock is being checked */
-#define	USLOCK_TAKEN		0x0002		/* lock has been taken */
-#define	USLOCK_INIT		0xBAA0		/* lock has been initialized */
-#define	USLOCK_INITIALIZED	(USLOCK_INIT|USLOCK_CHECKED)
-#define	USLOCK_CHECKING(l)	(uslock_check &&			\
-				 ((l)->debug.state & USLOCK_CHECKED))
-
-/*
- *	Trace activities of a particularly interesting lock.
- */
-void	usl_trace(usimple_lock_t, int, pc_t, const char *);
-
-
-/*
- *	Initialize the debugging information contained
- *	in a usimple_lock.
- */
-void
-usld_lock_init(
-	usimple_lock_t	l,
-	__unused unsigned short	tag)
-{
-	if (l == USIMPLE_LOCK_NULL)
-		panic("lock initialization:  null lock pointer");
-	l->lock_type = USLOCK_TAG;
-	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
-	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
-	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
-	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
-	l->debug.duration[0] = l->debug.duration[1] = 0;
-	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
-	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
-	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
-}
-
-
-/*
- *	These checks apply to all usimple_locks, not just
- *	those with USLOCK_CHECKED turned on.
- */
-int
-usld_lock_common_checks(usimple_lock_t l, const char *caller)
-{
-	if (l == USIMPLE_LOCK_NULL)
-		panic("%s:  null lock pointer", caller);
-	if (l->lock_type != USLOCK_TAG)
-		panic("%s:  0x%x is not a usimple lock", caller, (integer_t) l);
-	if (!(l->debug.state & USLOCK_INIT))
-		panic("%s:  0x%x is not an initialized lock",
-		      caller, (integer_t) l);
-	return USLOCK_CHECKING(l);
-}
-
-
-/*
- *	Debug checks on a usimple_lock just before attempting
- *	to acquire it.
- */
-/* ARGSUSED */
-void
-usld_lock_pre(
-	usimple_lock_t	l,
-	pc_t		pc)
-{
-	const char *caller = "usimple_lock";
-
-	if (!usld_lock_common_checks(l, caller))
-		return;
-
-/*
- *	Note that we have a weird case where we are getting a lock when we are]
- *	in the process of putting the system to sleep. We are running with no
- *	current threads, therefore we can't tell if we are trying to retake a lock
- *	we have or someone on the other processor has it.  Therefore we just
- *	ignore this test if the locking thread is 0.
- */
-
-	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
-	    l->debug.lock_thread == (void *) current_thread()) {
-		printf("%s:  lock 0x%x already locked (at %p) by",
-		      caller, (integer_t) l, l->debug.lock_pc);
-		printf(" current thread %p (new attempt at pc %p)\n",
-		       l->debug.lock_thread, pc);
-		panic("%s", caller);
-	}
-	mp_disable_preemption();
-	usl_trace(l, cpu_number(), pc, caller);
-	mp_enable_preemption();
-}
-
-
-/*
- *	Debug checks on a usimple_lock just after acquiring it.
- *
- *	Pre-emption has been disabled at this point,
- *	so we are safe in using cpu_number.
- */
-void
-usld_lock_post(
-	usimple_lock_t	l,
-	pc_t		pc)
-{
-	int mycpu;
-	const char *caller = "successful usimple_lock";
-
-
-	if (!usld_lock_common_checks(l, caller))
-		return;
-
-	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
-		panic("%s:  lock 0x%x became uninitialized",
-		      caller, (integer_t) l);
-	if ((l->debug.state & USLOCK_TAKEN))
-		panic("%s:  lock 0x%x became TAKEN by someone else",
-		      caller, (integer_t) l);
-
-	mycpu = cpu_number();
-	l->debug.lock_thread = (void *)current_thread();
-	l->debug.state |= USLOCK_TAKEN;
-	l->debug.lock_pc = pc;
-	l->debug.lock_cpu = mycpu;
-
-	usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- *	Debug checks on a usimple_lock just before
- *	releasing it.  Note that the caller has not
- *	yet released the hardware lock.
- *
- *	Preemption is still disabled, so there's
- *	no problem using cpu_number.
- */
-void
-usld_unlock(
-	usimple_lock_t	l,
-	pc_t		pc)
-{
-	int mycpu;
-	const char *caller = "usimple_unlock";
-
-
-	if (!usld_lock_common_checks(l, caller))
-		return;
-
-	mycpu = cpu_number();
-
-	if (!(l->debug.state & USLOCK_TAKEN))
-		panic("%s:  lock 0x%x hasn't been taken",
-		      caller, (integer_t) l);
-	if (l->debug.lock_thread != (void *) current_thread())
-		panic("%s:  unlocking lock 0x%x, owned by thread %p",
-		      caller, (integer_t) l, l->debug.lock_thread);
-	if (l->debug.lock_cpu != mycpu) {
-		printf("%s:  unlocking lock 0x%x on cpu 0x%x",
-		       caller, (integer_t) l, mycpu);
-		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
-		panic("%s", caller);
-	}
-	usl_trace(l, mycpu, pc, caller);
-
-	l->debug.unlock_thread = l->debug.lock_thread;
-	l->debug.lock_thread = INVALID_PC;
-	l->debug.state &= ~USLOCK_TAKEN;
-	l->debug.unlock_pc = pc;
-	l->debug.unlock_cpu = mycpu;
-}
-
-
-/*
- *	Debug checks on a usimple_lock just before
- *	attempting to acquire it.
- *
- *	Preemption isn't guaranteed to be disabled.
- */
-void
-usld_lock_try_pre(
-	usimple_lock_t	l,
-	pc_t		pc)
-{
-	const char *caller = "usimple_lock_try";
-
-	if (!usld_lock_common_checks(l, caller))
-		return;
-	mp_disable_preemption();
-	usl_trace(l, cpu_number(), pc, caller);
-	mp_enable_preemption();
-}
-
-
-/*
- *	Debug checks on a usimple_lock just after
- *	successfully attempting to acquire it.
- *
- *	Preemption has been disabled by the
- *	lock acquisition attempt, so it's safe
- *	to use cpu_number.
- */
-void
-usld_lock_try_post(
-	usimple_lock_t	l,
-	pc_t		pc)
-{
-	int mycpu;
-	const char *caller = "successful usimple_lock_try";
-
-	if (!usld_lock_common_checks(l, caller))
-		return;
-
-	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
-		panic("%s:  lock 0x%x became uninitialized",
-		      caller, (integer_t) l);
-	if ((l->debug.state & USLOCK_TAKEN))
-		panic("%s:  lock 0x%x became TAKEN by someone else",
-		      caller, (integer_t) l);
-
-	mycpu = cpu_number();
-	l->debug.lock_thread = (void *) current_thread();
-	l->debug.state |= USLOCK_TAKEN;
-	l->debug.lock_pc = pc;
-	l->debug.lock_cpu = mycpu;
-
-	usl_trace(l, mycpu, pc, caller);
-}
-
-
-/*
- *	For very special cases, set traced_lock to point to a
- *	specific lock of interest.  The result is a series of
- *	XPRs showing lock operations on that lock.  The lock_seq
- *	value is used to show the order of those operations.
- */
-usimple_lock_t		traced_lock;
-unsigned int		lock_seq;
-
-void
-usl_trace(
-	usimple_lock_t	l,
-	int		mycpu,
-	pc_t		pc,
-	const char *	op_name)
-{
-	if (traced_lock == l) {
-		XPR(XPR_SLOCK,
-		    "seq %d, cpu %d, %s @ %x\n",
-		    (integer_t) lock_seq, (integer_t) mycpu,
-		    (integer_t) op_name, (integer_t) pc, 0);
-		lock_seq++;
-	}
-}
-
-
-#endif	/* USLOCK_DEBUG */
-
-/*
- * The C portion of the shared/exclusive locks package.
- */
-
-/*
- * Forward definition 
- */
-
-void lck_rw_lock_exclusive_gen(
-	lck_rw_t	*lck);
-
-lck_rw_type_t lck_rw_done_gen(
-	lck_rw_t	*lck);
-
-void
-lck_rw_lock_shared_gen(
-	lck_rw_t	*lck);
-
-boolean_t
-lck_rw_lock_shared_to_exclusive_gen(
-	lck_rw_t	*lck);
-
-void
-lck_rw_lock_exclusive_to_shared_gen(
-	lck_rw_t	*lck);
-
-boolean_t
-lck_rw_try_lock_exclusive_gen(
-	lck_rw_t	*lck);
-
-boolean_t
-lck_rw_try_lock_shared_gen(
-	lck_rw_t	*lck);
-
-void lck_rw_ext_init(
-	lck_rw_ext_t	*lck,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr);
-
-void lck_rw_ext_backtrace(
-	lck_rw_ext_t	*lck);
-
-void lck_rw_lock_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-lck_rw_type_t lck_rw_done_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-void
-lck_rw_lock_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-boolean_t
-lck_rw_lock_shared_to_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-void
-lck_rw_lock_exclusive_to_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-boolean_t
-lck_rw_try_lock_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-boolean_t
-lck_rw_try_lock_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-void
-lck_rw_ilk_lock(
-	lck_rw_t	*lck);
-
-void
-lck_rw_ilk_unlock(
-	lck_rw_t	*lck);
-
-void
-lck_rw_check_type(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck);
-
-void
-lck_rw_assert_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck,
-	unsigned int	type);
-
-/*
- *	Routine:	lock_alloc
- *	Function:
- *		Allocate a lock for external users who cannot
- *		hard-code the structure definition into their
- *		objects.
- *		For now just use kalloc, but a zone is probably
- *		warranted.
- */
-lock_t *
-lock_alloc(
-	boolean_t		can_sleep,
-	__unused unsigned short	tag,
-	__unused unsigned short	tag1)
-{
-	lock_t		*lck;
-
-	if ((lck = (lock_t *)kalloc(sizeof(lock_t))) != 0)
-	  lock_init(lck, can_sleep, tag, tag1);
-	return(lck);
-}
-
-/*
- *	Routine:	lock_init
- *	Function:
- *		Initialize a lock; required before use.
- *		Note that clients declare the "struct lock"
- *		variables and then initialize them, rather
- *		than getting a new one from this module.
- */
-void
-lock_init(
-	lock_t			*lck,
-	boolean_t		can_sleep,
-	__unused unsigned short	tag,
-	__unused unsigned short	tag1)
-{
-	if (!can_sleep)
-		panic("lock_init: sleep mode must be set to TRUE\n");
-
-	(void) memset((void *) lck, 0, sizeof(lock_t));
-#if	MACH_LDEBUG
-	lck->lck_rw_deb.type = RW_TAG;
-	lck->lck_rw_attr |= (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD|LCK_RW_ATTR_DIS_MYLOCK);
-	lck->lck_rw.lck_rw_priv_excl = TRUE;
-#else
-	lck->lck_rw_priv_excl = TRUE;
-#endif
-
-}
-
-
-/*
- *	Routine:	lock_free
- *	Function:
- *		Free a lock allocated for external users.
- *		For now just use kfree, but a zone is probably
- *		warranted.
- */
-void
-lock_free(
-	lock_t	*lck)
-{
-	kfree((void *)lck, sizeof(lock_t));
-}
-
-#if	MACH_LDEBUG
-void
-lock_write(
-	lock_t	*lck)
-{
-	lck_rw_lock_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
-}
-
-void
-lock_done(
-	lock_t	*lck)
-{
-	(void)lck_rw_done_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
-}
-
-void
-lock_read(
-	lock_t	*lck)
-{
-	lck_rw_lock_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
-}
-
-boolean_t
-lock_read_to_write(
-	lock_t	*lck)
-{
-	return(lck_rw_lock_shared_to_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck));
-}
-
-void
-lock_write_to_read(
-	register lock_t	*lck)
-{
-	lck_rw_lock_exclusive_to_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck);
-}
-#endif
-
-/*
- *      Routine:        lck_rw_alloc_init
- */
-lck_rw_t *
-lck_rw_alloc_init(
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-	lck_rw_t	*lck;
-
-	if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
-		lck_rw_init(lck, grp, attr);
-		
-	return(lck);
-}
-
-/*
- *      Routine:        lck_rw_free
- */
-void
-lck_rw_free(
-	lck_rw_t	*lck,
-	lck_grp_t	*grp) {
-	lck_rw_destroy(lck, grp);
-	kfree((void *)lck, sizeof(lck_rw_t));
-}
-
-/*
- *      Routine:        lck_rw_init
- */
-void
-lck_rw_init(
-	lck_rw_t		*lck,
-	lck_grp_t		*grp,
-	lck_attr_t		*attr) {
-	lck_rw_ext_t	*lck_ext;
-	lck_attr_t	*lck_attr;
-
-	if (attr != LCK_ATTR_NULL)
-		lck_attr = attr;
-	else
-		lck_attr = &LockDefaultLckAttr;
-
-	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-		if ((lck_ext = (lck_rw_ext_t *)kalloc(sizeof(lck_rw_ext_t))) != 0) {
-			lck_rw_ext_init(lck_ext, grp, lck_attr);	
-			lck->lck_rw_tag = LCK_RW_TAG_INDIRECT;
-			lck->lck_rw_ptr = lck_ext;
-		}
-	} else {
-		(void) memset((void *) lck, 0, sizeof(lck_rw_t));
-		if ((lck_attr->lck_attr_val)  & LCK_ATTR_RW_SHARED_PRIORITY)
-			lck->lck_rw_priv_excl = FALSE;
-		else
-			lck->lck_rw_priv_excl = TRUE;
-	}
-
-	lck_grp_reference(grp);
-	lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
-}
-
-/*
- *      Routine:        lck_rw_ext_init
- */
-void
-lck_rw_ext_init(
-	lck_rw_ext_t	*lck,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-
-	bzero((void *)lck, sizeof(lck_rw_ext_t));
-	if ((attr->lck_attr_val)  & LCK_ATTR_RW_SHARED_PRIORITY)
-		lck->lck_rw.lck_rw_priv_excl = FALSE;
-	else
-		lck->lck_rw.lck_rw_priv_excl = TRUE;
-
-	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-		lck->lck_rw_deb.type = RW_TAG;
-		lck->lck_rw_attr |= LCK_RW_ATTR_DEBUG;
-	}
-
-	lck->lck_rw_grp = grp;
-
-	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
-		 lck->lck_rw_attr |= LCK_RW_ATTR_STAT;
-}
-
-/*
- *      Routine:        lck_rw_destroy
- */
-void
-lck_rw_destroy(
-	lck_rw_t	*lck,
-	lck_grp_t	*grp) {
-	boolean_t lck_is_indirect;
-	
-	if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
-		return;
-	lck_is_indirect = (lck->lck_rw_tag == LCK_RW_TAG_INDIRECT);
-	lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
-	if (lck_is_indirect)
-		kfree((void *)lck->lck_rw_ptr, sizeof(lck_rw_ext_t));
-
-	lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
-	lck_grp_deallocate(grp);
-	return;
-}
-
-/*
- *	Routine:	lck_rw_lock
- */
-void
-lck_rw_lock(
-	lck_rw_t	*lck,
-	lck_rw_type_t	lck_rw_type)
-{
-	if (lck_rw_type == LCK_RW_TYPE_SHARED)
-		lck_rw_lock_shared(lck);
-	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
-		lck_rw_lock_exclusive(lck);
-	else
-		panic("lck_rw_lock(): Invalid RW lock type: %d\n", lck_rw_type);
-}
-
-
-/*
- *	Routine:	lck_rw_unlock
- */
-void
-lck_rw_unlock(
-	lck_rw_t	*lck,
-	lck_rw_type_t	lck_rw_type)
-{
-	if (lck_rw_type == LCK_RW_TYPE_SHARED)
-		lck_rw_unlock_shared(lck);
-	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
-		lck_rw_unlock_exclusive(lck);
-	else
-		panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type);
-}
-
-
-/*
- *	Routine:	lck_rw_unlock_shared
- */
-void
-lck_rw_unlock_shared(
-	lck_rw_t	*lck)
-{
-	lck_rw_type_t	ret;
-
-	ret = lck_rw_done(lck);
-
-	if (ret != LCK_RW_TYPE_SHARED)
-		panic("lck_rw_unlock(): lock held in mode: %d\n", ret);
-}
-
-
-/*
- *	Routine:	lck_rw_unlock_exclusive
- */
-void
-lck_rw_unlock_exclusive(
-	lck_rw_t	*lck)
-{
-	lck_rw_type_t	ret;
-
-	ret = lck_rw_done(lck);
-
-	if (ret != LCK_RW_TYPE_EXCLUSIVE)
-		panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret);
-}
-
-
-/*
- *      Routine:        lck_rw_try_lock
- */
-boolean_t
-lck_rw_try_lock(
-	lck_rw_t	*lck,
-	lck_rw_type_t	lck_rw_type)
-{
-	if (lck_rw_type == LCK_RW_TYPE_SHARED)
-		return(lck_rw_try_lock_shared(lck));
-	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
-		return(lck_rw_try_lock_exclusive(lck));
-	else
-		panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type);
-	return(FALSE);
-}
-
-
-
-/*
- *      Routine:        lck_rw_lock_exclusive_gen
- */
-void
-lck_rw_lock_exclusive_gen(
-	lck_rw_t	*lck)
-{
-	int	   i;
-	wait_result_t	res;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-	int readers_at_sleep;
-#endif
-
-	lck_rw_ilk_lock(lck);
-#if	CONFIG_DTRACE
-	readers_at_sleep = lck->lck_rw_shared_cnt;
-#endif
-
-	/*
-	 *	Try to acquire the lck_rw_want_excl bit.
-	 */
-	while (lck->lck_rw_want_excl) {
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
-
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = -1;
-		}
-#endif
-
-		i = lock_wait_time[1];
-		if (i != 0) {
-			lck_rw_ilk_unlock(lck);
-			while (--i != 0 && lck->lck_rw_want_excl)
-				continue;
-			lck_rw_ilk_lock(lck);
-		}
-
-		if (lck->lck_rw_want_excl) {
-			lck->lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				lck_rw_ilk_unlock(lck);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(lck);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
-	}
-	lck->lck_rw_want_excl = TRUE;
-
-	/* Wait for readers (and upgrades) to finish */
-
-	while ((lck->lck_rw_shared_cnt != 0) || lck->lck_rw_want_upgrade) {
-
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
-			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, i, 0);
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		if (i != 0) {
-			lck_rw_ilk_unlock(lck);
-			while (--i != 0 && (lck->lck_rw_shared_cnt != 0 ||
-					    lck->lck_rw_want_upgrade))
-				continue;
-			lck_rw_ilk_lock(lck);
-		}
-
-		if (lck->lck_rw_shared_cnt != 0 || lck->lck_rw_want_upgrade) {
-			lck->lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				lck_rw_ilk_unlock(lck);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(lck);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
-			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, res, 0);
-	}
-
-	lck_rw_ilk_unlock(lck);
-#if	CONFIG_DTRACE
-	/*
-	 * Decide what latencies we suffered that are Dtrace events.
-	 * If we have set wait_interval, then we either spun or slept.
-	 * At least we get out from under the interlock before we record
-	 * which is the best we can do here to minimize the impact
-	 * of the tracing.
-	 */
-	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
-			    mach_absolute_time() - wait_interval, 1);
-		} else {
-			/*
-			 * For the blocking case, we also record if when we blocked
-			 * it was held for read or write, and how many readers.
-			 * Notice that above we recorded this before we dropped
-			 * the interlock so the count is accurate.
-			 */
-			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
-			    mach_absolute_time() - wait_interval, 1,
-			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
-		}
-	}
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
-#endif
-}
-
-
-/*
- *      Routine:        lck_rw_done_gen
- */
-lck_rw_type_t
-lck_rw_done_gen(
-	lck_rw_t	*lck)
-{
-	boolean_t	do_wakeup = FALSE;
-	lck_rw_type_t	lck_rw_type;
-
-
-	lck_rw_ilk_lock(lck);
-
-	if (lck->lck_rw_shared_cnt != 0) {
-		lck_rw_type = LCK_RW_TYPE_SHARED;
-		lck->lck_rw_shared_cnt--;
-	}
-	else {	
-		lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
-		if (lck->lck_rw_want_upgrade) 
-			lck->lck_rw_want_upgrade = FALSE;
-		else 
-			lck->lck_rw_want_excl = FALSE;
-	}
-
-	/*
-	 *	There is no reason to wakeup a lck_rw_waiting thread
-	 *	if the read-count is non-zero.  Consider:
-	 *		we must be dropping a read lock
-	 *		threads are waiting only if one wants a write lock
-	 *		if there are still readers, they can't proceed
-	 */
-
-	if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) {
-		lck->lck_rw_waiting = FALSE;
-		do_wakeup = TRUE;
-	}
-
-	lck_rw_ilk_unlock(lck);
-
-	if (do_wakeup)
-		thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lck_rw_type);
-	return(lck_rw_type);
-}
-
-
-/*
- *	Routine:	lck_rw_lock_shared_gen
- */
-void
-lck_rw_lock_shared_gen(
-	lck_rw_t	*lck)
-{
-	int		i;
-	wait_result_t      res;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-	int readers_at_sleep;
-#endif
-
-	lck_rw_ilk_lock(lck);
-#if	CONFIG_DTRACE
-	readers_at_sleep = lck->lck_rw_shared_cnt;
-#endif
-
-	while ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
-	        ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
-			     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, i, 0);
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		if (i != 0) {
-			lck_rw_ilk_unlock(lck);
-			while (--i != 0 && 
-			       (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
-			       ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl)))
-				continue;
-			lck_rw_ilk_lock(lck);
-		}
-
-		if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
-		    ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
-			lck->lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				lck_rw_ilk_unlock(lck);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(lck);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
-			     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, res, 0);
-	}
-
-	lck->lck_rw_shared_cnt++;
-
-	lck_rw_ilk_unlock(lck);
-#if	CONFIG_DTRACE
-	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
-		} else {
-			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
-			    mach_absolute_time() - wait_interval, 0,
-			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
-		}
-	}
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
-#endif
-}
-
-
-/*
- *	Routine:	lck_rw_lock_shared_to_exclusive_gen
- *	Function:
- *		Improves a read-only lock to one with
- *		write permission.  If another reader has
- *		already requested an upgrade to a write lock,
- *		no lock is held upon return.
- *
- *		Returns FALSE if the upgrade *failed*.
- */
-
-boolean_t
-lck_rw_lock_shared_to_exclusive_gen(
-	lck_rw_t	*lck)
-{
-	int	    i;
-	boolean_t	    do_wakeup = FALSE;
-	wait_result_t      res;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-	int readers_at_sleep = 0;
-#endif
-
-	lck_rw_ilk_lock(lck);
-
-	lck->lck_rw_shared_cnt--;	
-
-	if (lck->lck_rw_want_upgrade) {
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
-			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0);
-
-		/*
-		 *	Someone else has requested upgrade.
-		 *	Since we've released a read lock, wake
-		 *	him up.
-		 */
-		if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) {
-			lck->lck_rw_waiting = FALSE;
-			do_wakeup = TRUE;
-		}
-
-		lck_rw_ilk_unlock(lck);
-
-		if (do_wakeup)
-			thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
-			     (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0);
-
-		return (FALSE);
-	}
-
-	lck->lck_rw_want_upgrade = TRUE;
-
-	while (lck->lck_rw_shared_cnt != 0) {
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
-			     (int)lck, lck->lck_rw_shared_cnt, i, 0, 0);
-
-#if	CONFIG_DTRACE
-		readers_at_sleep = lck->lck_rw_shared_cnt;
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-		if (i != 0) {
-			lck_rw_ilk_unlock(lck);
-			while (--i != 0 && lck->lck_rw_shared_cnt != 0)
-				continue;
-			lck_rw_ilk_lock(lck);
-		}
-
-		if (lck->lck_rw_shared_cnt != 0) {
-			lck->lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				lck_rw_ilk_unlock(lck);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(lck);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
-			     (int)lck, lck->lck_rw_shared_cnt, 0, 0, 0);
-	}
-
-	lck_rw_ilk_unlock(lck);
-
-#if	CONFIG_DTRACE
-	/*
-	 * We infer if we took a sleep or spin path by whether readers_at_sleep
-	 * was set.
-	 */
-	if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
-		} else {
-			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
-			    mach_absolute_time() - wait_interval, 1,
-			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
-		}
-	}
-#endif
-
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
-	return (TRUE);
-}
-
-/*
- *      Routine:        lck_rw_lock_exclusive_to_shared_gen
- */
-void
-lck_rw_lock_exclusive_to_shared_gen(
-	lck_rw_t	*lck)
-{
-	boolean_t	   do_wakeup = FALSE;
-
-	lck_rw_ilk_lock(lck);
-
-	lck->lck_rw_shared_cnt++;
-	if (lck->lck_rw_want_upgrade)
-		lck->lck_rw_want_upgrade = FALSE;
-	else
-	 	lck->lck_rw_want_excl = FALSE;
-
-	if (lck->lck_rw_waiting) {
-		lck->lck_rw_waiting = FALSE;
-		do_wakeup = TRUE;
-	}
-
-	lck_rw_ilk_unlock(lck);
-
-	if (do_wakeup)
-		thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
-}
-
-
-/*
- *	Routine:	lck_rw_try_lock_exclusive_gen
- *	Function:
- *		Tries to get a write lock.
- *
- *		Returns FALSE if the lock is not held on return.
- */
-
-boolean_t
-lck_rw_try_lock_exclusive_gen(
-	lck_rw_t	*lck)
-{
-	lck_rw_ilk_lock(lck);
-
-	if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade || lck->lck_rw_shared_cnt) {
-		/*
-		 *	Can't get lock.
-		 */
-		lck_rw_ilk_unlock(lck);
-		return(FALSE);
-	}
-
-	/*
-	 *	Have lock.
-	 */
-
-	lck->lck_rw_want_excl = TRUE;
-
-	lck_rw_ilk_unlock(lck);
-
-	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
-	return(TRUE);
-}
-
-/*
- *	Routine:	lck_rw_try_lock_shared_gen
- *	Function:
- *		Tries to get a read lock.
- *
- *		Returns FALSE if the lock is not held on return.
- */
-
-boolean_t
-lck_rw_try_lock_shared_gen(
-	lck_rw_t	*lck)
-{
-	lck_rw_ilk_lock(lck);
-
-	if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
-	    ((lck->lck_rw_shared_cnt == 0) || (lck->lck_rw_priv_excl))) {
-		lck_rw_ilk_unlock(lck);
-		return(FALSE);
-	}
-
-	lck->lck_rw_shared_cnt++;
-
-	lck_rw_ilk_unlock(lck);
-
-	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
-	return(TRUE);
-}
-
-
-/*
- *	Routine:	lck_rw_ext_backtrace
- */
-void
-lck_rw_ext_backtrace(
-	lck_rw_ext_t	*lck)
-{
-	unsigned int *stackptr, *stackptr_prev;
-	unsigned int frame;
-
-	__asm__ volatile("mr %0,r1" : "=r" (stackptr)); 
-	frame = 0;
-	while (frame < LCK_FRAMES_MAX) {
-		stackptr_prev = stackptr;
-		stackptr = ( unsigned int *)*stackptr;
-		if ( (((unsigned int)stackptr_prev) - ((unsigned int)stackptr)) > 8192)
-			break;
-		lck->lck_rw_deb.stack[frame] = *(stackptr+2); 
-		frame++;
-	}
-	while (frame < LCK_FRAMES_MAX) {
-		lck->lck_rw_deb.stack[frame] = 0;
-		frame++;
-	}
-}
-
-
-/*
- *      Routine:        lck_rw_lock_exclusive_ext
- */
-void
-lck_rw_lock_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	int				i;
-	wait_result_t	res;
-	boolean_t		lock_miss = FALSE;
-	boolean_t		lock_wait = FALSE;
-	boolean_t		lock_stat;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-	int readers_at_sleep;
-#endif
-
-	lck_rw_check_type(lck, rlck);
-
-	if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_MYLOCK)) == LCK_RW_ATTR_DEBUG) 
-	     && (lck->lck_rw_deb.thread == current_thread()))
-		panic("rw lock (%p) recursive lock attempt\n", rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-#if	CONFIG_DTRACE
-	readers_at_sleep = lck->lck_rw.lck_rw_shared_cnt;
-#endif
-
-	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
-
-	if (lock_stat)
-		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
-
-	/*
-	 *	Try to acquire the lck_rw.lck_rw_want_excl bit.
-	 */
-	while (lck->lck_rw.lck_rw_want_excl) {
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)rlck, 0, 0, 0, 0);
-
-		if (lock_stat && !lock_miss) {
-			lock_miss = TRUE;
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		i = lock_wait_time[1];
-		if (i != 0) {
-			lck_rw_ilk_unlock(&lck->lck_rw);
-			while (--i != 0 && lck->lck_rw.lck_rw_want_excl)
-				continue;
-			lck_rw_ilk_lock(&lck->lck_rw);
-		}
-
-		if (lck->lck_rw.lck_rw_want_excl) {
-			lck->lck_rw.lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				if (lock_stat && !lock_wait) {
-					lock_wait = TRUE;
-					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
-				}
-				lck_rw_ilk_unlock(&lck->lck_rw);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(&lck->lck_rw);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)rlck, res, 0, 0, 0);
-	}
-	lck->lck_rw.lck_rw_want_excl = TRUE;
-
-	/* Wait for readers (and upgrades) to finish */
-
-	while ((lck->lck_rw.lck_rw_shared_cnt != 0) || lck->lck_rw.lck_rw_want_upgrade) {
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, i, 0);
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		if (lock_stat && !lock_miss) {
-			lock_miss = TRUE;
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-
-		if (i != 0) {
-			lck_rw_ilk_unlock(&lck->lck_rw);
-			while (--i != 0 && (lck->lck_rw.lck_rw_shared_cnt != 0 ||
-					    lck->lck_rw.lck_rw_want_upgrade))
-				continue;
-			lck_rw_ilk_lock(&lck->lck_rw);
-		}
-
-		if (lck->lck_rw.lck_rw_shared_cnt != 0 || lck->lck_rw.lck_rw_want_upgrade) {
-			lck->lck_rw.lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				if (lock_stat && !lock_wait) {
-					lock_wait = TRUE;
-					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
-				}
-				lck_rw_ilk_unlock(&lck->lck_rw);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(&lck->lck_rw);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, res, 0);
-	}
-
-	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
-	if (LcksOpts & enaLkExtStck)
-		lck_rw_ext_backtrace(lck);
-	lck->lck_rw_deb.thread = current_thread();
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-#if	CONFIG_DTRACE
-	/*
-	 * Decide what latencies we suffered that are Dtrace events.
-	 * If we have set wait_interval, then we either spun or slept.
-	 * At least we get out from under the interlock before we record
-	 * which is the best we can do here to minimize the impact
-	 * of the tracing.
-	 */
-	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
-			    mach_absolute_time() - wait_interval, 1);
-		} else {
-			/*
-			 * For the blocking case, we also record if when we blocked
-			 * it was held for read or write, and how many readers.
-			 * Notice that above we recorded this before we dropped
-			 * the interlock so the count is accurate.
-			 */
-			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
-			    mach_absolute_time() - wait_interval, 1,
-			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
-		}
-	}
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
-#endif
-}
-
-
-/*
- *      Routine:        lck_rw_done_ext
- */
-lck_rw_type_t
-lck_rw_done_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	boolean_t	do_wakeup = FALSE;
-	lck_rw_type_t	lck_rw_type;
-
-
-	lck_rw_check_type(lck, rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-
-	if (lck->lck_rw.lck_rw_shared_cnt != 0) {
-		lck_rw_type = LCK_RW_TYPE_SHARED;
-		lck->lck_rw.lck_rw_shared_cnt--;
-	}
-	else {	
-		lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
-		if (lck->lck_rw.lck_rw_want_upgrade) 
-			lck->lck_rw.lck_rw_want_upgrade = FALSE;
-		else if (lck->lck_rw.lck_rw_want_excl)
-			lck->lck_rw.lck_rw_want_excl = FALSE;
-		else
-			panic("rw lock (%p) bad state (0x%08X) on attempt to release a shared or exlusive right\n",
-				  rlck, lck->lck_rw.lck_rw_tag);
-		if (lck->lck_rw_deb.thread == THREAD_NULL)
-			panic("rw lock (%p) not held\n",
-			      rlck);
-		else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG) 
-			 && (lck->lck_rw_deb.thread != current_thread()))
-			panic("rw lock (%p) unlocked by non-owner(%p), current owner(%p)\n",
-				  rlck, current_thread(), lck->lck_rw_deb.thread);
-		lck->lck_rw_deb.thread = THREAD_NULL;
-	}
-
-	if (lck->lck_rw_attr & LCK_RW_ATTR_DEBUG)
-		lck->lck_rw_deb.pc_done = __builtin_return_address(0);
-
-	/*
-	 *	There is no reason to wakeup a waiting thread
-	 *	if the read-count is non-zero.  Consider:
-	 *		we must be dropping a read lock
-	 *		threads are waiting only if one wants a write lock
-	 *		if there are still readers, they can't proceed
-	 */
-
-	if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) {
-		lck->lck_rw.lck_rw_waiting = FALSE;
-		do_wakeup = TRUE;
-	}
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-
-	if (do_wakeup)
-		thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lck_rw_type);
-	return(lck_rw_type);
-}
-
-
-/*
- *	Routine:	lck_rw_lock_shared_ext
- */
-void
-lck_rw_lock_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	int				i;
-	wait_result_t	res;
-	boolean_t		lock_miss = FALSE;
-	boolean_t		lock_wait = FALSE;
-	boolean_t		lock_stat;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-	int readers_at_sleep;
-#endif
-
-	lck_rw_check_type(lck, rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-#if	CONFIG_DTRACE
-	readers_at_sleep = lck->lck_rw.lck_rw_shared_cnt;
-#endif
-
-	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
-
-	if (lock_stat)
-		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
-
-	while ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
-	       ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
-			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, i, 0);
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		if (lock_stat && !lock_miss) {
-			lock_miss = TRUE;
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-
-		if (i != 0) {
-			lck_rw_ilk_unlock(&lck->lck_rw);
-			while (--i != 0 && 
-			       (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
-	       		       ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl)))
-				continue;
-			lck_rw_ilk_lock(&lck->lck_rw);
-		}
-
-		if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade)  &&
-		   ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
-			lck->lck_rw.lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				if (lock_stat && !lock_wait) {
-					lock_wait = TRUE;
-					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
-				}
-				lck_rw_ilk_unlock(&lck->lck_rw);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(&lck->lck_rw);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
-			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, res, 0);
-	}
-
-	lck->lck_rw.lck_rw_shared_cnt++;
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-#if	CONFIG_DTRACE
-	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
-		} else {
-			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
-			    mach_absolute_time() - wait_interval, 0,
-			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
-		}
-	}
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
-#endif
-}
-
-
-/*
- *	Routine:	lck_rw_lock_shared_to_exclusive_ext
- *	Function:
- *		Improves a read-only lock to one with
- *		write permission.  If another reader has
- *		already requested an upgrade to a write lock,
- *		no lock is held upon return.
- *
- *		Returns FALSE if the upgrade *failed*.
- */
-
-boolean_t
-lck_rw_lock_shared_to_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	int	    i;
-	boolean_t	    do_wakeup = FALSE;
-	wait_result_t      res;
-	boolean_t		lock_miss = FALSE;
-	boolean_t		lock_wait = FALSE;
-	boolean_t		lock_stat;
-#if	CONFIG_DTRACE
-	uint64_t wait_interval = 0;
-	int slept = 0;
-#endif
-
-	lck_rw_check_type(lck, rlck);
-
-	if (lck->lck_rw_deb.thread == current_thread())
-		panic("rw lock (%p) recursive lock attempt\n", rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-
-	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
-
-	if (lock_stat)
-		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
-
-	lck->lck_rw.lck_rw_shared_cnt--;	
-
-	if (lck->lck_rw.lck_rw_want_upgrade) {
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
-
-		/*
-		 *	Someone else has requested upgrade.
-		 *	Since we've released a read lock, wake
-		 *	him up.
-		 */
-		if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) {
-			lck->lck_rw.lck_rw_waiting = FALSE;
-			do_wakeup = TRUE;
-		}
-
-		lck_rw_ilk_unlock(&lck->lck_rw);
-
-		if (do_wakeup)
-			thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
-
-		return (FALSE);
-	}
-
-	lck->lck_rw.lck_rw_want_upgrade = TRUE;
-
-	while (lck->lck_rw.lck_rw_shared_cnt != 0) {
-		i = lock_wait_time[1];
-
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, i, 0, 0);
-
-		if (lock_stat && !lock_miss) {
-			lock_miss = TRUE;
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-#if	CONFIG_DTRACE
-		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK]) && wait_interval == 0) {
-			wait_interval = mach_absolute_time();
-		} else {
-			wait_interval = (unsigned) -1;
-		}
-#endif
-
-		if (i != 0) {
-			lck_rw_ilk_unlock(&lck->lck_rw);
-			while (--i != 0 && lck->lck_rw.lck_rw_shared_cnt != 0)
-				continue;
-			lck_rw_ilk_lock(&lck->lck_rw);
-		}
-
-		if (lck->lck_rw.lck_rw_shared_cnt != 0) {
-			lck->lck_rw.lck_rw_waiting = TRUE;
-			res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
-			if (res == THREAD_WAITING) {
-				if (lock_stat && !lock_wait) {
-					lock_wait = TRUE;
-					lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++;
-				}
-				lck_rw_ilk_unlock(&lck->lck_rw);
-				res = thread_block(THREAD_CONTINUE_NULL);
-#if	CONFIG_DTRACE
-				slept = 1;
-#endif
-				lck_rw_ilk_lock(&lck->lck_rw);
-			}
-		}
-		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
-			     (int)rlck, lck->lck_rw.lck_rw_shared_cnt, 0, 0, 0);
-	}
-
-	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
-	if (LcksOpts & enaLkExtStck)
-		lck_rw_ext_backtrace(lck);
-	lck->lck_rw_deb.thread = current_thread();
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-
-#if	CONFIG_DTRACE
-	/*
-	 * If we've travelled a path with no spin or sleep, then wait_interval
-	 * is still zero.
-	 */
-	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
-		if (slept == 0) {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
-		} else {
-			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck, mach_absolute_time() - wait_interval, 0);
-		}
-	}
-#endif
-
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
-
-	return (TRUE);
-}
-
-/*
- *      Routine:        lck_rw_lock_exclusive_to_shared_ext
- */
-void
-lck_rw_lock_exclusive_to_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	boolean_t	   do_wakeup = FALSE;
-
-	lck_rw_check_type(lck, rlck);
-
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
-			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, 0, 0);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-
-	lck->lck_rw.lck_rw_shared_cnt++;
-	if (lck->lck_rw.lck_rw_want_upgrade)
-		lck->lck_rw.lck_rw_want_upgrade = FALSE;
-	else if (lck->lck_rw.lck_rw_want_excl)
-	 	lck->lck_rw.lck_rw_want_excl = FALSE;
-	else
-		panic("rw lock (%p) bad state (0x%08X) on attempt to release a shared or exlusive right\n",
-			  rlck, lck->lck_rw.lck_rw_tag);
-	if (lck->lck_rw_deb.thread == THREAD_NULL)
-		panic("rw lock (%p) not held\n",
-		      rlck);
-	else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG) 
-		  && (lck->lck_rw_deb.thread != current_thread()))
-		panic("rw lock (%p) unlocked by non-owner(%p), current owner(%p)\n",
-			  rlck, current_thread(), lck->lck_rw_deb.thread);
-
-	lck->lck_rw_deb.thread = THREAD_NULL;
-
-	if (lck->lck_rw.lck_rw_waiting) {
-		lck->lck_rw.lck_rw_waiting = FALSE;
-		do_wakeup = TRUE;
-	}
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-
-	if (do_wakeup)
-		thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
-
-	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
-			     (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, lck->lck_rw.lck_rw_shared_cnt, 0);
-
-	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
-}
-
-
-/*
- *	Routine:	lck_rw_try_lock_exclusive_ext
- *	Function:
- *		Tries to get a write lock.
- *
- *		Returns FALSE if the lock is not held on return.
- */
-
-boolean_t
-lck_rw_try_lock_exclusive_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	boolean_t		lock_stat;
-
-	lck_rw_check_type(lck, rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-
-	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
-
-	if (lock_stat)
-		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
-
-	if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade || lck->lck_rw.lck_rw_shared_cnt) {
-		/*
-		 *	Can't get lock.
-		 */
-		if (lock_stat) {
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-		lck_rw_ilk_unlock(&lck->lck_rw);
-		return(FALSE);
-	}
-
-	/*
-	 *	Have lock.
-	 */
-
-	lck->lck_rw.lck_rw_want_excl = TRUE;
-	lck->lck_rw_deb.pc_excl = __builtin_return_address(0);
-	if (LcksOpts & enaLkExtStck)
-		lck_rw_ext_backtrace(lck);
-	lck->lck_rw_deb.thread = current_thread();
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-
-	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
-
-	return(TRUE);
-}
-
-/*
- *	Routine:	lck_rw_try_lock_shared_ext
- *	Function:
- *		Tries to get a read lock.
- *
- *		Returns FALSE if the lock is not held on return.
- */
-
-boolean_t
-lck_rw_try_lock_shared_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck)
-{
-	boolean_t		lock_stat;
-
-	lck_rw_check_type(lck, rlck);
-
-	lck_rw_ilk_lock(&lck->lck_rw);
-
-	lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE;
-
-	if (lock_stat)
-		lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++;
-
-	if ((lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) &&
-	    ((lck->lck_rw.lck_rw_shared_cnt == 0) || (lck->lck_rw.lck_rw_priv_excl))) {
-		if (lock_stat) {
-			lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++;
-		}
-		lck_rw_ilk_unlock(&lck->lck_rw);
-		return(FALSE);
-	}
-
-	lck->lck_rw.lck_rw_shared_cnt++;
-
-	lck_rw_ilk_unlock(&lck->lck_rw);
-
-	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
-
-	return(TRUE);
-}
-
-void
-lck_rw_check_type(
-	lck_rw_ext_t	*lck,
-	lck_rw_t		*rlck)
-{
-	if (lck->lck_rw_deb.type != RW_TAG)
-		panic("rw lock (%p) not a rw lock type (0x%08X)\n",rlck, lck->lck_rw_deb.type);
-}
-
-void
-lck_rw_assert_ext(
-	lck_rw_ext_t	*lck,
-	lck_rw_t	*rlck,
-	unsigned int	type)
-{
-	lck_rw_check_type(lck, rlck);
-
-	switch (type) {
-	case LCK_RW_ASSERT_SHARED:
-		if (lck->lck_rw.lck_rw_shared_cnt != 0) {
-			return;
-		}
-		break;
-	case LCK_RW_ASSERT_EXCLUSIVE:
-		if ((lck->lck_rw.lck_rw_want_excl ||
-		     lck->lck_rw.lck_rw_want_upgrade) &&
-		    lck->lck_rw.lck_rw_shared_cnt == 0) {
-			return;
-		}
-		break;
-	case LCK_RW_ASSERT_HELD:
-		if (lck->lck_rw.lck_rw_want_excl ||
-		    lck->lck_rw.lck_rw_want_upgrade ||
-		    lck->lck_rw.lck_rw_shared_cnt != 0) {
-			return;
-		}
-		break;
-	default:
-		break;
-	}
-
-	panic("rw lock (%p -> %p) not held (mode=%u)\n", rlck, lck, type);
-}
-
-void
-lck_rw_assert(
-	lck_rw_t	*lck,
-	unsigned int	type)
-{
-	if (lck->lck_rw_tag != LCK_RW_TAG_INDIRECT) {
-		switch (type) {
-		case LCK_RW_ASSERT_SHARED:
-			if (lck->lck_rw_shared_cnt != 0) {
-				return;
-			}
-			break;
-		case LCK_RW_ASSERT_EXCLUSIVE:
-			if (lck->lck_rw_shared_cnt == 0 &&
-			    (lck->lck_rw_want_excl ||
-			     lck->lck_rw_want_upgrade)) {
-				return;
-			}
-			break;
-		case LCK_RW_ASSERT_HELD:
-			if (lck->lck_rw_shared_cnt != 0 ||
-			    lck->lck_rw_want_excl ||
-			    lck->lck_rw_want_upgrade) {
-				return;
-			}
-			break;
-		default:
-			break;
-		}
-		panic("rw lock (%p) not held (mode=%u)\n", lck, type);
-	} else {
-		lck_rw_assert_ext((lck_rw_ext_t *)lck->lck_rw_ptr,
-				  (lck_rw_t *)lck,
-				  type);
-	}
-}
-
-/*
- * The C portion of the mutex package.  These routines are only invoked
- * if the optimized assembler routines can't do the work.
- */
-
-/*
- * Forward definition 
- */
-
-void lck_mtx_ext_init(
-	lck_mtx_ext_t	*lck,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr);
-
-/*
- *      Routine:        lck_mtx_alloc_init
- */
-lck_mtx_t *
-lck_mtx_alloc_init(
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-	lck_mtx_t	*lck;
-
-	if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0)
-		lck_mtx_init(lck, grp, attr);
-		
-	return(lck);
-}
-
-/*
- *      Routine:        lck_mtx_free
- */
-void
-lck_mtx_free(
-	lck_mtx_t	*lck,
-	lck_grp_t	*grp) {
-	lck_mtx_destroy(lck, grp);
-	kfree((void *)lck, sizeof(lck_mtx_t));
-}
-
-/*
- *      Routine:        lck_mtx_init
- */
-void
-lck_mtx_init(
-	lck_mtx_t	*lck,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-	lck_mtx_ext_t	*lck_ext;
-	lck_attr_t	*lck_attr;
-
-	if (attr != LCK_ATTR_NULL)
-		lck_attr = attr;
-	else
-		lck_attr = &LockDefaultLckAttr;
-
-	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-		if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) {
-			lck_mtx_ext_init(lck_ext, grp, lck_attr);	
-			lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
-			lck->lck_mtx_ptr = lck_ext;
-		}
-	} else {
-		lck->lck_mtx_data = 0;
-		lck->lck_mtx_waiters = 0;
-		lck->lck_mtx_pri = 0;
-	}
-	lck_grp_reference(grp);
-	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
-}
-
-/*
- *      Routine:        lck_mtx_init_ext
- */
-void
-lck_mtx_init_ext(
-	lck_mtx_t	*lck,
-	lck_mtx_ext_t	*lck_ext,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr)
-{
-	lck_attr_t	*lck_attr;
-
-	if (attr != LCK_ATTR_NULL)
-		lck_attr = attr;
-	else
-		lck_attr = &LockDefaultLckAttr;
-
-	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-		lck_mtx_ext_init(lck_ext, grp, lck_attr);
-		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
-		lck->lck_mtx_ptr = lck_ext;
-	} else {
-		lck->lck_mtx_data = 0;
-		lck->lck_mtx_waiters = 0;
-		lck->lck_mtx_pri = 0;
-	}
-	lck_grp_reference(grp);
-	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
-}
-
-/*
- *      Routine:        lck_mtx_ext_init
- */
-void
-lck_mtx_ext_init(
-	lck_mtx_ext_t	*lck,
-	lck_grp_t	*grp,
-	lck_attr_t	*attr) {
-
-	bzero((void *)lck, sizeof(lck_mtx_ext_t));
-
-	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
-		lck->lck_mtx_deb.type = MUTEX_TAG;
-		lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
-	}
-
-	lck->lck_mtx_grp = grp;
-
-	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
-		 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
-}
-
-/*
- *      Routine:        lck_mtx_destroy
- */
-void
-lck_mtx_destroy(
-	lck_mtx_t	*lck,
-	lck_grp_t	*grp) {
-	boolean_t lck_is_indirect;
-	
-	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
-		return;
-	lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT);
-	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
-	if (lck_is_indirect)
-		kfree((void *)lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t));
-
-	lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
-	lck_grp_deallocate(grp);
-	return;
-}
-
-
-#if	MACH_KDB
-/*
- * Routines to print out simple_locks and mutexes in a nicely-formatted
- * fashion.
- */
-
-const char *simple_lock_labels = "ENTRY    ILK THREAD   DURATION CALLER";
-
-void	db_print_simple_lock(
-			simple_lock_t	addr);
-
-void
-db_show_one_simple_lock (db_expr_t addr, boolean_t have_addr,
-			 __unused db_expr_t count,
-			 __unused char *modif)
-{
-	simple_lock_t	saddr = (simple_lock_t)(unsigned long)addr;
-
-	if (saddr == (simple_lock_t)0 || !have_addr) {
-		db_error ("No simple_lock\n");
-	}
-#if	USLOCK_DEBUG
-	else if (saddr->lock_type != USLOCK_TAG)
-		db_error ("Not a simple_lock\n");
-#endif	/* USLOCK_DEBUG */
-
-	db_printf ("%s\n", simple_lock_labels);
-	db_print_simple_lock (saddr);
-}
-
-void
-db_print_simple_lock (
-	simple_lock_t	addr)
-{
-
-	db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock));
-#if	USLOCK_DEBUG
-	db_printf (" %08x", addr->debug.lock_thread);
-	db_printf (" %08x ", addr->debug.duration[1]);
-	db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY);
-#endif	/* USLOCK_DEBUG */
-	db_printf ("\n");
-}
-
-void
-db_show_one_lock(
-	lock_t  *lock)
-{
-	db_printf("shared_count = 0x%x, %swant_upgrade, %swant_exclusive, ",
-		  lock->lck_rw.lck_rw_shared_cnt,
-		  lock->lck_rw.lck_rw_want_upgrade ? "" : "!",
-		  lock->lck_rw.lck_rw_want_excl ? "" : "!");
-	db_printf("%swaiting\n", 
-		  lock->lck_rw.lck_rw_waiting ? "" : "!");
-	db_printf("%sInterlock\n",
-		  lock->lck_rw.lck_rw_interlock ? "" : "!");
-}
-
-#endif	/* MACH_KDB */
-
diff --git a/osfmk/ppc/low_trace.h b/osfmk/ppc/low_trace.h
deleted file mode 100644
index e5a81d7a9..000000000
--- a/osfmk/ppc/low_trace.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- *
- *	These are the structures and constants used for the low-level trace
- */
-
-
-
-
-
-
-#ifndef _LOW_TRACE_H_
-#define _LOW_TRACE_H_
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct LowTraceRecord {
-
-	unsigned short	LTR_cpu;			/* 0000 - CPU address */
-	unsigned short	LTR_excpt;			/* 0002 - Exception code */
-	unsigned int	LTR_timeHi;			/* 0004 - High order time */
-	unsigned int	LTR_timeLo;			/* 0008 - Low order time */
-	unsigned int	LTR_cr;				/* 000C - CR */
-	unsigned int	LTR_dsisr;			/* 0010 - DSISR */
-	unsigned int	LTR_rsvd0;			/* 0014 - reserved */
-	uint64_t		LTR_srr0;			/* 0018 - SRR0 */
-
-	uint64_t		LTR_srr1;			/* 0020 - SRR1 */
-	uint64_t		LTR_dar;			/* 0028 - DAR */
-	uint64_t		LTR_save;			/* 0030 - savearea */
-	uint64_t		LTR_lr;				/* 0038 - LR */
-
-	uint64_t		LTR_ctr;			/* 0040 - CTR */
-	uint64_t		LTR_r0;				/* 0048 - R0 */
-	uint64_t		LTR_r1;				/* 0050 - R1 */
-	uint64_t		LTR_r2;				/* 0058 - R2 */
-
-	uint64_t		LTR_r3;				/* 0060 - R3 */
-	uint64_t		LTR_r4;				/* 0068 - R4 */
-	uint64_t		LTR_r5;				/* 0070 - R5 */
-	uint64_t		LTR_r6;				/* 0078 - R6 */
-
-} LowTraceRecord;		
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct traceWork {
-
-	unsigned int traceCurr;				/* Address of next slot */
-	unsigned int traceMask;				/* Types to be traced */
-	unsigned int traceStart;			/* Start of trace table */
-	unsigned int traceEnd;				/* End of trace table */
-	unsigned int traceMsnd;				/* Saved trace mask */
-	unsigned int traceSize;				/* Size of trace table. Min 1 page */
-	unsigned int traceGas[2];
-} traceWork;
-#pragma pack()
-
-extern traceWork trcWork;
-extern unsigned int lastTrace;			/* Value of low-level exception trace controls */
-
-
-#endif /* ifndef _LOW_TRACE_H_ */
diff --git a/osfmk/ppc/lowglobals.h b/osfmk/ppc/lowglobals.h
deleted file mode 100644
index 20503a89b..000000000
--- a/osfmk/ppc/lowglobals.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *		Header files for the Low Memory Globals (lg) 
- */
-#ifndef	_LOW_MEMORY_GLOBALS_H_
-#define	_LOW_MEMORY_GLOBALS_H_
-
-#include <mach/mach_types.h>
-#include <mach/vm_types.h>
-#include <mach/machine/vm_types.h>
-#include <mach/vm_prot.h>
-#include <ppc/proc_reg.h>
-#include <ppc/savearea.h>
-#include <ppc/low_trace.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/mappings.h>
-
-/*
- * Don't change these structures unless you change the corresponding assembly code
- * which is in lowmem_vectors.s
- */
- 
-/* 
- *	This is where we put constants, pointers, and data areas that must be accessed
- *	quickly through assembler.  They are designed to be accessed directly with 
- *	absolute addresses, not via a base register.  This is a global area, and not
- *	per processor.
- */
- 
-#pragma pack(4)								/* Make sure the structure stays as we defined it */
-typedef struct lowglo {
-
-	unsigned long	lgForceAddr[5*1024];	/* 0000 Force to page 5 */
-	unsigned char	lgVerCode[8];			/* 5000 System verification code */
-	unsigned long long lgZero;				/* 5008 Double constant 0 */
-	unsigned int	lgPPStart;				/* 5010 Start of per_proc blocks */
-	unsigned int    lgCHUDXNUfnStart;		/* 5014 CHUD XNU function glue table */
-	unsigned int	lgMckFlags;				/* 5018 Machine check flags */
-	unsigned int    lgVersion;				/* 501C Pointer to kernel version string */
-	uint64_t		lgPMWvaddr;				/* 5020 physical memory window virtual address */
-	uint64_t		lgUMWvaddr;				/* 5028 user memory window virtual address */
-	unsigned int	lgVMMforcedFeats;		/* 5030 VMM boot-args forced feature flags */
-	unsigned int	lgMaxDec;				/* 5034 Maximum decrementer we can set */
-	unsigned int	lgPmsCtlp;				/* 5038 Pointer to power management stepper control */
-	unsigned int	lgRsv03C[17];			/* 503C reserved */
-	traceWork		lgTrcWork;				/* 5080 Tracing control block - trcWork */
-	unsigned int	lgRsv0A0[24];			/* 50A0 reserved */
-	struct Saveanchor	lgSaveanchor;		/* 5100 Savearea anchor - saveanchor */
-	unsigned int	lgRsv140[16];			/* 5140 reserved */
-	unsigned int	lgTlbieLck;				/* 5180 TLBIE lock */
-	unsigned int	lgRsv184[31];			/* 5184 reserved - push to next line */
-	struct diagWork	lgdgWork;				/* 5200 Start of diagnostic work area */
-	unsigned int	lglcksWork;				/* 5220 lcks option */
-	unsigned int	lgRsv224[23];			/* 5224 reserved */
-	pcfg 			lgpPcfg[8];				/* 5280 Page configurations */
-	unsigned int	lgRst2A0[24];			/* 52A0 reserved */
-	unsigned int	lgKillResv;				/* 5300 line used to kill reservations */
-	unsigned int	lgKillResvpad[31];		/* 5304 pad reservation kill line */
-
-	unsigned int	lgRsv380[32];			/* 5380 - 5400 reserved  */
-
-	unsigned int	lgRsv400[32];			/* 5400 - 5480 reserved  */
-	uint32_t		lgKmodptr;		/* 0x5480 Pointer to kmod, debugging aid */
-	uint32_t		lgTransOff;		/* 0x5484 Pointer to kdp_trans_off, debugging aid */
-	uint32_t		lgReadIO;		/* 0x5488 Pointer to kdp_read_io, debugging aid */
-	uint32_t		lgDevSlot1;		/* 0x548C For developer use */
-	uint32_t		lgDevSlot2;		/* 0x5490 For developer use */
-	uint32_t		lgOSVersion;		/* 0x5494 Pointer to OS version string */
-	uint32_t		lgRebootFlag;		/* 0x5498 Pointer to debugger reboot trigger */
-	uint32_t                lgManualPktAddr;        /* 0x549C Pointer to manual packet structure */
-	uint32_t		lgRsv49C[728];		/* 0x54A0 Reserved - push to 1 page */
-} lowglo;
-
-extern lowglo lowGlo;
-
-#endif /* _LOW_MEMORY_GLOBALS_H_ */
diff --git a/osfmk/ppc/lowmem_vectors.s b/osfmk/ppc/lowmem_vectors.s
deleted file mode 100644
index 5e2893d36..000000000
--- a/osfmk/ppc/lowmem_vectors.s
+++ /dev/null
@@ -1,4010 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <assym.s>
-#include <debug.h>
-#include <db_machine_commands.h>
-	
-#include <mach_debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/Performance.h>
-#include <ppc/savearea.h>
-#include <mach/ppc/vm_param.h>
-
-#define ESPDEBUG 0
-#define INSTRUMENT 0
-
-#define featAltivec 29
-#define wasNapping 30
-
-#define	VECTOR_SEGMENT	.section __VECTORS, __interrupts
-
-			VECTOR_SEGMENT
-
-			.globl	EXT(lowGlo)
-EXT(lowGlo):
-
-			.globl	EXT(ExceptionVectorsStart)
-
-EXT(ExceptionVectorsStart):							/* Used if relocating the exception vectors */
-baseR:												/* Used so we have more readable code */
-
-;
-;			Handle system reset.
-;			We do not ever expect a hard reset so we do not actually check.
-;			When we come here, we check for a RESET_HANDLER_START (which means we are
-;			waking up from sleep), a RESET_HANDLER_BUPOR (which is using for bring up
-;			when starting directly from a POR), and RESET_HANDLER_IGNORE (which means
-;			ignore the interrupt).
-;
-;			Some machines (so far, 32-bit guys) will always ignore a non-START interrupt.
-;			The ones who do take it, check if the interrupt is too be ignored.  This is 
-;			always the case until the previous reset is handled (i.e., we have exited
-;			from the debugger).
-;
-			. = 0xf0
-			.globl	EXT(ResetHandler)
-EXT(ResetHandler):
-			.long	0x0
-			.long	0x0
-			.long	0x0
-
-			. = 0x100
-.L_handler100:
-			mtsprg	2,r13			/* Save R13 */
-			mtsprg	3,r11			/* Save R11 */
-			lwz		r13,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_TYPE)(br0)	; Get reset type
-			mfcr	r11
-			cmpi	cr0,r13,RESET_HANDLER_START
-			bne		resetexc
-
-			li		r11,RESET_HANDLER_NULL
-			stw		r11,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_TYPE)(br0)	; Clear reset type
-
-			lwz		r4,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_CALL)(br0)
-			lwz		r3,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_ARG)(br0)
-			mtlr	r4
-			blr
-
-resetexc:	cmplwi	r13,RESET_HANDLER_BUPOR			; Special bring up POR sequence?
-			bne		resetexc2						; No...
-			lis		r4,hi16(EXT(resetPOR))			; Get POR code
-			ori		r4,r4,lo16(EXT(resetPOR))		; The rest
-			mtlr	r4								; Set it
-			blr										; Jump to it....
-
-resetexc2:	cmplwi	cr1,r13,RESET_HANDLER_IGNORE	; Are we ignoring these? (Software debounce)
-
-			mfsprg	r13,0							; Get per_proc
-			lwz		r13,pfAvailable(r13)			; Get the features
-			rlwinm.	r13,r13,0,pf64Bitb,pf64Bitb		; Is this a 64-bit machine?
-			cror	cr1_eq,cr0_eq,cr1_eq			; See if we want to take this
-			bne--	cr1,rxCont						; Yes, continue...
-			bne--	rxIg64							; 64-bit path...
-
-			mtcr	r11								; Restore the CR
-			mfsprg	r13,2							; Restore R13
-			mfsprg	r11,0							; Get per_proc
-			lwz		r11,pfAvailable(r11)			; Get the features
-			mtsprg	2,r11							; Restore sprg2
-			mfsprg	r11,3							; Restore R11
-			rfi										; Return and ignore the reset
-
-rxIg64:		mtcr	r11								; Restore the CR
-			mfsprg	r11,0							; Get per_proc
-			mtspr	hsprg0,r14						; Save a register
-			ld		r14,UAW(r11)					; Get the User Assist DoubleWord
-			mfsprg	r13,2							; Restore R13
-			lwz		r11,pfAvailable(r11)			; Get the features
-			mtsprg	2,r11							; Restore sprg2
-			mfsprg	r11,3							; Restore R11
-			mtsprg	3,r14							; Set the UAW in sprg3
-			mfspr	r14,hsprg0						; Restore R14
-			rfid									; Return and ignore the reset
-
-rxCont:		mtcr	r11
-			li		r11,RESET_HANDLER_IGNORE		; Get set to ignore
-			stw		r11,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_TYPE)(br0)	; Start ignoring these
-			mfsprg	r13,1							/* Get the exception save area */
-			li		r11,T_RESET						/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * 			Machine check 
- */
-
-			. = 0x200
-.L_handler200:
-			mtsprg	2,r13							; Save R13 
-			mtsprg	3,r11							; Save R11
-
-			.globl	EXT(extPatchMCK)
-LEXT(extPatchMCK)									; This is patched to a nop for 64-bit 
-			b		h200aaa							; Skip 64-bit code... 
-
-;
-;			Fall through here for 970 MCKs.
-;
-
-			li		r11,1							; ?
-			sldi	r11,r11,32+3					; ?
-			mfspr	r13,hid4						; ?
-			or		r11,r11,r13						; ?
-			sync
-			mtspr	hid4,r11						; ?
-			isync
-			li		r11,1							; ?
-			sldi	r11,r11,32+8					; ?
-			andc	r13,r13,r11						; ?
-			lis		r11,0xE000						; Get the unlikeliest ESID possible
-			sync
-			mtspr	hid4,r13						; ?
-			isync									; ?
-			
-			srdi	r11,r11,1						; ?
-			slbie	r11								; ?
-			sync
-			isync
-		
-			li		r11,T_MACHINE_CHECK				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-;
-;			Preliminary checking of other MCKs
-;
-
-h200aaa:	mfsrr1	r11								; Get the SRR1
-			mfcr	r13								; Save the CR
-			
-			rlwinm.	r11,r11,0,dcmck,dcmck			; ?
-			beq+	notDCache						; ?
-			
-			sync
-			mfspr	r11,msscr0						; ?
-			dssall									; ?
-			sync
-			isync
-
-			oris	r11,r11,hi16(dl1hwfm)			; ?
-			mtspr	msscr0,r11						; ?
-			
-rstbsy:		mfspr	r11,msscr0						; ?
-			
-			rlwinm.	r11,r11,0,dl1hwf,dl1hwf			; ?
-			bne		rstbsy							; ?
-			
-			sync									; ?
-
-			mfsprg	r11,0							; Get the per_proc
-			mtcrf	255,r13							; Restore CRs
-			lwz		r13,hwMachineChecks(r11)		; Get old count
-			addi	r13,r13,1						; Count this one
-			stw		r13,hwMachineChecks(r11)		; Set new count
-			lwz		r11,pfAvailable(r11)			; Get the feature flags
-			mfsprg	r13,2							; Restore R13
-			mtsprg	2,r11							; Set the feature flags
-			mfsprg	r11,3							; Restore R11
-			rfi										; Return
-
-notDCache:	mtcrf	255,r13							; Restore CRs
-			li		r11,T_MACHINE_CHECK				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-
-/*
- * 			Data access - page fault, invalid memory rights for operation
- */
-
-			. = 0x300
-.L_handler300:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_DATA_ACCESS				/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-/*
- * 			Data segment
- */
-
-			. = 0x380
-.L_handler380:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11
-			li		r11,T_DATA_SEGMENT				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-/*
- * 			Instruction access - as for data access
- */
-
-			. = 0x400
-.L_handler400:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11
-			li		r11,T_INSTRUCTION_ACCESS		; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-/*
- * 			Instruction segment
- */
-
-			. = 0x480
-.L_handler480:
-			mtsprg	2,r13							; Save R13 
-			mtsprg	3,r11							; Save R11 
-			li		r11,T_INSTRUCTION_SEGMENT		; Set rupt code
-			b		.L_exception_entry				; Join common... 
-
-/*
- * 			External interrupt
- */
-
-			. = 0x500
-.L_handler500:
-			mtsprg	2,r13							; Save R13 
-			mtsprg	3,r11							; Save R11
-			li		r11,T_INTERRUPT					; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-/*
- * 			Alignment - many reasons
- */
-
-			. = 0x600
-.L_handler600:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_ALIGNMENT|T_FAM			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * 			Program - floating point exception, illegal inst, priv inst, user trap
- */
-
-			. = 0x700
-.L_handler700:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11			
-			li		r11,T_PROGRAM|T_FAM				; Set program interruption code
-			b		.L_exception_entry				; Join common...
-
-/*
- * 			Floating point disabled
- */
-
-			. = 0x800
-.L_handler800:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_FP_UNAVAILABLE			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-/*
- * 			Decrementer - DEC register has passed zero.
- */
-
-			. = 0x900
-.L_handler900:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_DECREMENTER				/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * 			I/O controller interface error - MACH does not use this
- */
-
-			. = 0xA00
-.L_handlerA00:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_IO_ERROR					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * 			Reserved
- */
-
-			. = 0xB00
-.L_handlerB00:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_RESERVED					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-;           System Calls (sc instruction)
-;
-;           The syscall number is in r0.  All we do here is munge the number into an
-;           8-bit index into the "scTable", and dispatch on it to handle the Ultra
-;           Fast Traps (UFTs.)  The index is:
-;
-;               0x80 - set if syscall number is 0x80000000 (CutTrace)
-;               0x40 - set if syscall number is 0x00006004
-;               0x20 - set if upper 29 bits of syscall number are 0xFFFFFFF8
-;               0x10 - set if upper 29 bits of syscall number are 0x00007FF0
-;               0x0E - low three bits of syscall number
-;               0x01 - zero, as scTable is an array of shorts
-
-			. = 0xC00
-.L_handlerC00:
-			mtsprg	3,r11							; Save R11
-			mtsprg	2,r13							; Save R13
-			rlwinm	r11,r0,0,0xFFFFFFF8				; mask off low 3 bits of syscall number
-			xori	r13,r11,0x7FF0					; start to check for the 0x7FFx traps
-			addi	r11,r11,8						; make a 0 iff this is a 0xFFFFFFF8 trap
-			cntlzw	r13,r13							; set bit 0x20 iff a 0x7FFx trap
-			cntlzw	r11,r11							; set bit 0x20 iff a 0xFFFFFFF8 trap
-			xoris	r0,r0,0x8000					; Flip bit to make 0 iff 0x80000000
-			rlwimi	r11,r13,31,0x10					; move 0x7FFx bit into position
-			cntlzw	r13,r0							; Set bit 0x20 iff 0x80000000
-			xoris	r0,r0,0x8000					; Flip bit to restore R0
-			rlwimi	r11,r13,2,0x80					; Set bit 0x80 iff CutTrace
-			xori	r13,r0,0x6004					; start to check for 0x6004
-			rlwimi	r11,r0,1,0xE					; move in low 3 bits of syscall number
-			cntlzw	r13,r13							; set bit 0x20 iff 0x6004
-			rlwinm	r11,r11,0,0,30					; clear out bit 31
-			rlwimi	r11,r13,1,0x40					; move 0x6004 bit into position
-			lhz		r11,lo16(scTable)(r11)			; get branch address from sc table
-			mfctr	r13								; save callers ctr in r13
-			mtctr	r11								; set up branch to syscall handler
-			mfsprg	r11,0							; get per_proc, which most UFTs use
-			bctr									; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11)
-
-/*
- * 			Trace - generated by single stepping
- *				performance monitor BE branch enable tracing/logging
- *				is also done here now.  while this is permanently in the
- *				system the impact is completely unnoticable as this code is
- *				only executed when (a) a single step or branch exception is
- *				hit, (b) in the single step debugger case there is so much
- *				overhead already the few extra instructions for testing for BE
- *				are not even noticable
- *
- *			Note that this trace is available only to user state so we do not 
- *			need to set sprg2 before returning.
- */
-
-			. = 0xD00
-.L_handlerD00:
-			mtsprg	3,r11							; Save R11
-			mfsprg	r11,2							; Get the feature flags
-			mtsprg	2,r13							; Save R13
-
-			li		r11,T_TRACE|T_FAM				; Set interrupt code
-			b		.L_exception_entry				; Join common...
-
-/*
- * 			Floating point assist
- */
-
-			. = 0xE00
-.L_handlerE00:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_FP_ASSIST					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-/*
- *			Performance monitor interruption
- */
-
- 			. = 0xF00
-PMIhandler:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_PERF_MON					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-	
-
-/*
- *			VMX exception
- */
-
- 			. = 0xF20
-VMXhandler:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_VMX						/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-	
-
-;
-;			Instruction translation miss exception - not supported
-;
-
- 			. = 0x1000
-.L_handler1000:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11
-			li		r11,T_INVALID_EXCP0				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-	
-
-;
-;			Data load translation miss exception - not supported
-;
-
- 			. = 0x1100
-.L_handler1100:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11
-			li		r11,T_INVALID_EXCP1				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-	
-
-;
-;			Data store translation miss exception - not supported
-;
-
- 			. = 0x1200
-.L_handler1200:
-			mtsprg	2,r13							; Save R13
-			mtsprg	3,r11							; Save R11
-			li		r11,T_INVALID_EXCP2				; Set rupt code
-			b		.L_exception_entry				; Join common...
-
-	
-/*
- * 			Instruction address breakpoint
- */
-
-			. = 0x1300
-.L_handler1300:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_INSTRUCTION_BKPT			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * 			System management interrupt
- */
-
-			. = 0x1400
-.L_handler1400:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_SYSTEM_MANAGEMENT			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-/*
- * 			Soft Patch
- */
-
-			. = 0x1500
-.L_handler1500:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_SOFT_PATCH				/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-;
-; 			Altivec Java Mode Assist interrupt or Maintenace interrupt
-;
-
-			. = 0x1600
-.L_handler1600:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_ALTIVEC_ASSIST			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-;
-; 			Altivec Java Mode Assist interrupt or Thermal interruption 
-;
-
-			. = 0x1700
-.L_handler1700:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_THERMAL					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-;
-; 			Thermal interruption - 64-bit
-;
-
-			. = 0x1800
-.L_handler1800:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_ARCHDEP0					/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-/*
- * There is now a large gap of reserved traps
- */
-
-/*
- * 			Instrumentation interruption
- */
-
-			. = 0x2000
-.L_handler2000:
-			mtsprg	2,r13							/* Save R13 */
-			mtsprg	3,r11							/* Save R11 */
-			li		r11,T_INSTRUMENTATION			/* Set 'rupt code */
-			b		.L_exception_entry				/* Join common... */
-
-
-	
-			.data
-			.align	ALIGN
-			.globl	EXT(exception_entry)
-EXT(exception_entry):
-			.long	.L_exception_entry-EXT(ExceptionVectorsStart) /* phys addr of fn */
-				
-			VECTOR_SEGMENT
-
-/*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
- *
- * First-level syscall dispatch.  The syscall vector maps r0 (the syscall number) into an
- * index into the "scTable" (below), and then branches to one of these routines.  The PPC
- * syscalls come in several varieties, as follows:
- *
- * 1. If (syscall & 0xFFFFF000) == 0x00007000, then it is a PPC Fast Trap or UFT.
- *    The UFTs are dispatched here, the Fast Traps are dispatched in hw_exceptions.s.
- *
- * 2. If (syscall & 0xFFFFF000) == 0x00006000, then it is a PPC-only trap.
- *    One of these (0x6004) is a UFT, but most are dispatched in hw_exceptions.s.  These
- *    are mostly Blue Box or VMM (Virtual Machine) calls.
- *
- * 3. If (syscall & 0xFFFFFFF0) == 0xFFFFFFF0, then it is also a UFT and is dispatched here.
- *
- * 4. If (syscall & 0xFFFFF000) == 0x80000000, then it is a "firmware" call and is dispatched in
- *    Firmware.s, though the special "Cut Trace" trap (0x80000000) is handled here as an ultra
- *    fast trap.
- *
- * 5. If (syscall & 0xFFFFF000) == 0xFFFFF000, and it is not one of the above, then it is a Mach
- *    syscall, which are dispatched in hw_exceptions.s via "mach_trap_table".
- *
- * 6. If (syscall & 0xFFFFF000) == 0x00000000, then it is a BSD syscall, which are dispatched
- *    by "unix_syscall" using the "sysent" table.
- *
- * What distinguishes the UFTs, aside from being ultra fast, is that they cannot rely on translation
- * being on, and so cannot look at the activation or task control block, etc.  We handle them right
- * here, and return to the caller without turning interrupts or translation on.  The UFTs are:
- *
- *      0xFFFFFFFF - BlueBox only - MKIsPreemptiveTask
- *      0xFFFFFFFE - BlueBox only - MKIsPreemptiveTaskEnv
- *      0x00007FF2 - User state only - thread info (32-bit mode)
- *      0x00007FF3 - User state only - floating point / vector facility status
- *      0x00007FF4 - Kernel only - loadMSR - not used on 64-bit machines
- *      0x00006004 - vmm_dispatch (only some of which are UFTs)
- *
- * "scTable" is an array of 2-byte addresses, accessed using a 7-bit index derived from the syscall
- * number as follows:
- *
- *      0x80 (A) - set if syscall number is 0x80000000
- *      0x40 (B) - set if syscall number is 0x00006004
- *      0x20 (C) - set if upper 29 bits of syscall number are 0xFFFFFFF8
- *      0x10 (D) - set if upper 29 bits of syscall number are 0x00007FF0
- *      0x0E (E) - low three bits of syscall number
- *
- * If you define another UFT, try to use a number in one of the currently decoded ranges, ie one marked
- * "unassigned" below.  The dispatch table and the UFT handlers must reside in the first 32KB of
- * physical memory.
- */
- 
-            .align  8                               ; start this table on a 256-byte boundry
-scTable:                                            ; ABCD E
-			.short	uftNormalSyscall-baseR			; 0000 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0000 7  these syscalls are not in a reserved range
-
-			.short	uftNormalSyscall-baseR			; 0001 0  0x7FF0 is unassigned
-			.short	uftNormalSyscall-baseR			; 0001 1  0x7FF1 is Set Thread Info Fast Trap (pass up)
-			.short	uftThreadInfo-baseR				; 0001 2  0x7FF2 is Thread Info
-			.short	uftFacilityStatus-baseR			; 0001 3  0x7FF3 is Facility Status
-			.short	uftLoadMSR-baseR				; 0001 4  0x7FF4 is Load MSR
-			.short	uftNormalSyscall-baseR			; 0001 5  0x7FF5 is the Null FastPath Trap (pass up)
-			.short	uftNormalSyscall-baseR			; 0001 6  0x7FF6 is unassigned
-			.short	uftNormalSyscall-baseR			; 0001 7  0x7FF7 is unassigned
-
-			.short	uftNormalSyscall-baseR			; 0010 0  0xFFFFFFF0 is unassigned
-			.short	uftNormalSyscall-baseR			; 0010 1  0xFFFFFFF1 is unassigned
-			.short	uftNormalSyscall-baseR			; 0010 2  0xFFFFFFF2 is unassigned
-			.short	uftNormalSyscall-baseR			; 0010 3  0xFFFFFFF3 is unassigned
-			.short	uftNormalSyscall-baseR			; 0010 4  0xFFFFFFF4 is unassigned
-			.short	uftNormalSyscall-baseR			; 0010 5  0xFFFFFFF5 is unassigned
-			.short	uftIsPreemptiveTaskEnv-baseR	; 0010 6  0xFFFFFFFE is Blue Box uftIsPreemptiveTaskEnv
-			.short	uftIsPreemptiveTask-baseR		; 0010 7  0xFFFFFFFF is Blue Box IsPreemptiveTask
-
-			.short	WhoaBaby-baseR					; 0011 0  impossible combination
-			.short	WhoaBaby-baseR					; 0011 1  impossible combination
-			.short	WhoaBaby-baseR					; 0011 2  impossible combination
-			.short	WhoaBaby-baseR					; 0011 3  impossible combination
-			.short	WhoaBaby-baseR					; 0011 4  impossible combination
-			.short	WhoaBaby-baseR					; 0011 5  impossible combination
-			.short	WhoaBaby-baseR					; 0011 6  impossible combination
-			.short	WhoaBaby-baseR					; 0011 7  impossible combination
-
-			.short	WhoaBaby-baseR					; 0100 0  0x6000 is an impossible index (diagCall)
-			.short	WhoaBaby-baseR					; 0100 1  0x6001 is an impossible index (vmm_get_version)
-			.short	WhoaBaby-baseR					; 0100 2  0x6002 is an impossible index (vmm_get_features)
-			.short	WhoaBaby-baseR					; 0100 3  0x6003 is an impossible index (vmm_init_context)
-			.short	uftVMM-baseR					; 0100 4  0x6004 is vmm_dispatch (only some of which are UFTs)
-			.short	WhoaBaby-baseR					; 0100 5  0x6005 is an impossible index (bb_enable_bluebox)
-			.short	WhoaBaby-baseR					; 0100 6  0x6006 is an impossible index (bb_disable_bluebox)
-			.short	WhoaBaby-baseR					; 0100 7  0x6007 is an impossible index (bb_settaskenv)
-
-			.short	uftNormalSyscall-baseR			; 0101 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0101 7  these syscalls are not in a reserved range
-			
-			.short	uftNormalSyscall-baseR			; 0110 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0110 7  these syscalls are not in a reserved range
-			
-			.short	uftNormalSyscall-baseR			; 0111 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 0111 7  these syscalls are not in a reserved range
-
-			.short	uftCutTrace-baseR				; 1000 0  CutTrace
-			.short	uftNormalSyscall-baseR			; 1000 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1000 7  these syscalls are not in a reserved range
-
-			.short	uftNormalSyscall-baseR			; 1001 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1001 7  these syscalls are not in a reserved range
-
-			.short	uftNormalSyscall-baseR			; 1010 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1010 7  these syscalls are not in a reserved range
-
-			.short	uftNormalSyscall-baseR			; 1011 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1011 7  these syscalls are not in a reserved range
-
-			.short	uftNormalSyscall-baseR			; 1100 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1100 7  these syscalls are not in a reserved range
-			
-			.short	uftNormalSyscall-baseR			; 1101 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1101 7  these syscalls are not in a reserved range
-			
-			.short	uftNormalSyscall-baseR			; 1110 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1110 7  these syscalls are not in a reserved range
-			
-			.short	uftNormalSyscall-baseR			; 1111 0  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 1  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 2  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 3  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 4  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 5  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 6  these syscalls are not in a reserved range
-			.short	uftNormalSyscall-baseR			; 1111 7  these syscalls are not in a reserved range
-
-            .align  2                               ; prepare for code
-
-
-/* Ultra Fast Trap (UFT) Handlers:
- *
- * We get here directly from the hw syscall vector via the "scTable" vector (above), 
- * with interrupts and VM off, in 64-bit mode if supported, and with all registers live
- * except the following:
- *
- *        r11 = per_proc ptr (ie, sprg0)
- *        r13 = holds caller's ctr register
- *      sprg2 = holds caller's r13
- *      sprg3 = holds caller's r11
- */
-
-;			Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs.
-
-uftVMM:
-			mtctr	r13								; restore callers ctr
-			lwz		r11,spcFlags(r11)				; get the special flags word from per_proc
-			mfcr	r13								; save callers entire cr (we use all fields below)
-			rlwinm	r11,r11,16,16,31				; Extract spcFlags upper bits
-			andi.	r11,r11,hi16(runningVM|FamVMena|FamVMmode)
-			cmpwi	cr0,r11,hi16(runningVM|FamVMena|FamVMmode)	; Test in VM FAM
-			bne--	uftNormal80						; not eligible for FAM UFTs
-			cmpwi	cr5,r3,kvmmResumeGuest			; Compare r3 with kvmmResumeGuest
-			cmpwi	cr2,r3,kvmmSetGuestRegister		; Compare r3 with kvmmSetGuestRegister
-			cror	cr1_eq,cr5_lt,cr2_gt			; Set true if out of VMM Fast syscall range
-			bt--	cr1_eq,uftNormalFF				; Exit if out of range (the others are not UFTs)
-			b		EXT(vmm_ufp)					; handle UFT range of vmm_dispatch syscall
-
-			
-;			Handle blue box UFTs (syscalls -1 and -2).
-
-uftIsPreemptiveTask:
-uftIsPreemptiveTaskEnv:
-			mtctr	r13								; restore callers ctr
-			lwz		r11,spcFlags(r11)				; get the special flags word from per_proc
-			mfcr	r13,0x80						; save callers cr0 so we can use it
-			andi.	r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need
-			cmplwi	r11,bbNoMachSC					; See if we are trapping syscalls
-			blt--	uftNormal80						; No...
-			cmpwi	r0,-2							; is this call IsPreemptiveTaskEnv?
-			rlwimi	r13,r11,bbPreemptivebit-cr0_eq,cr0_eq,cr0_eq	; Copy preemptive task flag into user cr0_eq
-			mfsprg	r11,0							; Get the per proc once more
-			bne++	uftRestoreThenRFI				; do not load r0 if IsPreemptiveTask
-			lwz		r0,ppbbTaskEnv(r11)				; Get the shadowed taskEnv (only difference)
-			b		uftRestoreThenRFI				; restore modified cr0 and return
-
-
-;			Handle "Thread Info" UFT (0x7FF2)
-
-			.globl	EXT(uft_uaw_nop_if_32bit)
-uftThreadInfo:
-			lwz		r3,UAW+4(r11)					; get user assist word, assuming a 32-bit processor
-LEXT(uft_uaw_nop_if_32bit)
-			ld		r3,UAW(r11)						; get the whole doubleword if 64-bit (patched to nop if 32-bit)
-			mtctr	r13								; restore callers ctr
-			b		uftRFI							; done
-
-
-;			Handle "Facility Status" UFT (0x7FF3)
-
-uftFacilityStatus:
-			lwz		r3,spcFlags(r11)				; get "special flags" word from per_proc
-			mtctr	r13								; restore callers ctr
-			b		uftRFI							; done
-
-
-;			Handle "Load MSR" UFT (0x7FF4).	 This is not used on 64-bit processors, though it would work.
-
-uftLoadMSR:
-			mfsrr1	r11								; get callers MSR
-			mtctr	r13								; restore callers ctr
-			mfcr	r13,0x80						; save callers cr0 so we can test PR
-			rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel?
-			bne-	uftNormal80						; do not permit from user mode
-			mfsprg	r11,0							; restore per_proc
-			mtsrr1	r3								; Set new MSR
-
-
-;			Return to caller after UFT.	 When called:
-;				r11 = per_proc ptr
-;				r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called)
-;				sprg2 = callers r13
-;				sprg3 = callers r11
-
-uftRestoreThenRFI:									; WARNING: can drop down to here
-			mtcrf	0x80,r13						; restore callers cr0
-uftRFI:
-			.globl	EXT(uft_nop_if_32bit)
-LEXT(uft_nop_if_32bit)
-			b		uftX64							; patched to NOP if 32-bit processor
-			
-uftX32:		lwz		r11,pfAvailable(r11)			; Get the feature flags
-			mfsprg	r13,2							; Restore R13
-			mtsprg	2,r11							; Set the feature flags
-			mfsprg	r11,3							; Restore R11
-			rfi										; Back to our guy...
-			
-uftX64:		mtspr	hsprg0,r14						; Save a register in a Hypervisor SPRG
-			ld		r14,UAW(r11)					; Get the User Assist DoubleWord
-			lwz		r11,pfAvailable(r11)			; Get the feature flags
-			mfsprg	r13,2							; Restore R13
-			mtsprg	2,r11							; Set the feature flags
-			mfsprg	r11,3							; Restore R11
-			mtsprg	3,r14							; Set the UAW in sprg3
-			mfspr	r14,hsprg0						; Restore R14
-			rfid									; Back to our guy...
-
-;
-;			Quickly cut a trace table entry for the CutTrace firmware call.
-;
-;			All registers except R11 and R13 are unchanged.
-;
-;			Note that this code cuts a trace table entry for the CutTrace call only.
-;			An identical entry is made during normal interrupt processing.  Any entry
-;			format entry changes made must be done in both places.
-;
-
-			.align	5
-			
-			.globl	EXT(uft_cuttrace)
-LEXT(uft_cuttrace)
-uftCutTrace:
-			b		uftct64							; patched to NOP if 32-bit processor
-
-			stw		r20,tempr0(r11)					; Save some work registers
-			lwz		r20,dgFlags(0)					; Get the flags
-			stw		r21,tempr1(r11)					; Save some work registers
-			mfsrr1	r21								; Get the SRR1
-			rlwinm	r20,r20,MSR_PR_BIT-enaUsrFCallb,MASK(MSR_PR)	; Shift the validity bit over to pr bit spot
-			stw		r25,tempr2(r11)					; Save some work registers
-			orc		r20,r20,r21						; Get ~PR | FC
-			mfcr	r25								; Save the CR
-			stw		r22,tempr3(r11)					; Save some work registers
-			lhz		r22,PP_CPU_NUMBER(r11)			; Get the logical processor number
-			andi.	r20,r20,MASK(MSR_PR)			; Set cr0_eq is we are in problem state and the validity bit is not set
-			stw		r23,tempr4(r11)					; Save some work registers
-			lwz		r23,traceMask(0)				; Get the trace mask
-			stw		r24,tempr5(r11)					; Save some work registers
-			beq-	ctbail32						; Can not issue from user...
-			
-
-			addi	r24,r22,16						; Get shift to move cpu mask to syscall mask
-			rlwnm	r24,r23,r24,12,12				; Shift cpu mask bit to rupt type mask
-			and.	r24,r24,r23						; See if both are on
-
-;
-;			We select a trace entry using a compare and swap on the next entry field.
-;			Since we do not lock the actual trace buffer, there is a potential that
-;			another processor could wrap an trash our entry.  Who cares?
-;
-
-			li		r23,trcWork						; Get the trace work area address
-			lwz		r21,traceStart(0)				; Get the start of trace table
-			lwz		r22,traceEnd(0)					; Get end of trace table
-			
-			beq--	ctdisa32						; Leave because tracing is disabled...					
-
-ctgte32:	lwarx	r20,0,r23						; Get and reserve the next slot to allocate
-			addi	r24,r20,LTR_size				; Point to the next trace entry
-			cmplw	r24,r22							; Do we need to wrap the trace table?
-			bne+	ctgte32s						; No wrap, we got us a trace entry...
-			
-			mr		r24,r21							; Wrap back to start
-
-ctgte32s:	stwcx.	r24,0,r23						; Try to update the current pointer
-			bne-	ctgte32							; Collision, try again...
-			
-#if ESPDEBUG
-			dcbf	0,r23							; Force to memory
-			sync
-#endif
-			
-			dcbz	0,r20							; Clear and allocate first trace line
-			li		r24,32							; Offset to next line
-			
-ctgte32tb:	mftbu	r21								; Get the upper time now
-			mftb	r22								; Get the lower time now
-			mftbu	r23								; Get upper again
-			cmplw	r21,r23							; Has it ticked?
-			bne-	ctgte32tb						; Yes, start again...
-
-			dcbz	r24,r20							; Clean second line
-
-;
-;			Let us cut that trace entry now.
-;
-;			Note that this code cuts a trace table entry for the CutTrace call only.
-;			An identical entry is made during normal interrupt processing.  Any entry
-;			format entry changes made must be done in both places.
-;
-
-			lhz		r24,PP_CPU_NUMBER(r11)			; Get the logical processor number
-			li		r23,T_SYSTEM_CALL				; Get the system call id
-			mtctr	r13								; Restore the callers CTR
-			sth		r24,LTR_cpu(r20)				; Save processor number
-			li		r24,64							; Offset to third line
-			sth		r23,LTR_excpt(r20)				; Set the exception code
-			dcbz	r24,r20							; Clean 3rd line
-			mfspr	r23,dsisr						; Get the DSISR
-			stw		r21,LTR_timeHi(r20)				; Save top of time stamp
-			li		r24,96							; Offset to fourth line
-			mflr	r21								; Get the LR
-			dcbz	r24,r20							; Clean 4th line
-			stw		r22,LTR_timeLo(r20)				; Save bottom of time stamp
-			mfsrr0	r22								; Get SRR0
-			stw		r25,LTR_cr(r20)					; Save CR
-			mfsrr1	r24								; Get the SRR1
-			stw		r23,LTR_dsisr(r20)				; Save DSISR
-			stw		r22,LTR_srr0+4(r20)				; Save SRR0
-			mfdar	r23								; Get DAR
-			stw		r24,LTR_srr1+4(r20)				; Save SRR1
-			stw		r23,LTR_dar+4(r20)				; Save DAR
-			stw		r21,LTR_lr+4(r20)				; Save LR
-
-			stw		r13,LTR_ctr+4(r20)				; Save CTR
-			stw		r0,LTR_r0+4(r20)				; Save register
-			stw		r1,LTR_r1+4(r20)				; Save register
-			stw		r2,LTR_r2+4(r20)				; Save register
-			stw		r3,LTR_r3+4(r20)				; Save register
-			stw		r4,LTR_r4+4(r20)				; Save register
-			stw		r5,LTR_r5+4(r20)				; Save register
-			stw		r6,LTR_r6+4(r20)				; Save register
-
-#if 0
-			lwz		r21,FPUowner(r11)				; (TEST/DEBUG) Get the current floating point owner
-			stw		r21,LTR_rsvd0(r20)				; (TEST/DEBUG) Record the owner
-#endif
-			
-#if ESPDEBUG
-			addi	r21,r20,32						; Second line
-			addi	r22,r20,64						; Third line
-			dcbst	0,r20							; Force to memory
-			dcbst	0,r21							; Force to memory
-			addi	r21,r22,32						; Fourth line
-			dcbst	0,r22							; Force to memory
-			dcbst	0,r21							; Force to memory
-			sync									; Make sure it all goes
-#endif
-
-ctdisa32:	mtcrf	0x80,r25						; Restore the used condition register field
-			lwz		r20,tempr0(r11)					; Restore work register
-			lwz		r21,tempr1(r11)					; Restore work register
-			lwz		r25,tempr2(r11)					; Restore work register
-			mtctr	r13								; Restore the callers CTR
-			lwz		r22,tempr3(r11)					; Restore work register
-			lwz		r23,tempr4(r11)					; Restore work register
-			lwz		r24,tempr5(r11)					; Restore work register
-			b		uftX32							; Go restore the rest and go...
-
-ctbail32:	mtcrf	0x80,r25						; Restore the used condition register field
-			lwz		r20,tempr0(r11)					; Restore work register
-			lwz		r21,tempr1(r11)					; Restore work register
-			lwz		r25,tempr2(r11)					; Restore work register
-			mtctr	r13								; Restore the callers CTR
-			lwz		r22,tempr3(r11)					; Restore work register
-			lwz		r23,tempr4(r11)					; Restore work register
-			b		uftNormalSyscall				; Go pass it on along...
-
-;
-;			This is the 64-bit version.
-;
-
-uftct64:	std		r20,tempr0(r11)					; Save some work registers
-			lwz		r20,dgFlags(0)					; Get the flags
-			std		r21,tempr1(r11)					; Save some work registers
-			mfsrr1	r21								; Get the SRR1
-			rlwinm	r20,r20,MSR_PR_BIT-enaUsrFCallb,MASK(MSR_PR)	; Shift the validity bit over to pr bit spot
-			std		r25,tempr2(r11)					; Save some work registers
-			orc		r20,r20,r21						; Get ~PR | FC
-			mfcr	r25								; Save the CR
-			std		r22,tempr3(r11)					; Save some work registers
-			lhz		r22,PP_CPU_NUMBER(r11)			; Get the logical processor number
-			andi.	r20,r20,MASK(MSR_PR)			; Set cr0_eq when we are in problem state and the validity bit is not set
-			std		r23,tempr4(r11)					; Save some work registers
-			lwz		r23,traceMask(0)				; Get the trace mask
-			std		r24,tempr5(r11)					; Save some work registers
-			beq--	ctbail64						; Can not issue from user...
-
-			addi	r24,r22,16						; Get shift to move cpu mask to syscall mask
-			rlwnm	r24,r23,r24,12,12				; Shift cpu mask bit to rupt type mask
-			and.	r24,r24,r23						; See if both are on
-			
-;
-;			We select a trace entry using a compare and swap on the next entry field.
-;			Since we do not lock the actual trace buffer, there is a potential that
-;			another processor could wrap an trash our entry.  Who cares?
-;
-
-			li		r23,trcWork						; Get the trace work area address
-			lwz		r21,traceStart(0)				; Get the start of trace table
-			lwz		r22,traceEnd(0)					; Get end of trace table
-			
-			beq--	ctdisa64						; Leave because tracing is disabled...					
-
-ctgte64:	lwarx	r20,0,r23						; Get and reserve the next slot to allocate
-			addi	r24,r20,LTR_size				; Point to the next trace entry
-			cmplw	r24,r22							; Do we need to wrap the trace table?
-			bne++	ctgte64s						; No wrap, we got us a trace entry...
-			
-			mr		r24,r21							; Wrap back to start
-
-ctgte64s:	stwcx.	r24,0,r23						; Try to update the current pointer
-			bne--	ctgte64							; Collision, try again...
-			
-#if ESPDEBUG
-			dcbf	0,r23							; Force to memory
-			sync
-#endif
-			
-			dcbz128	0,r20							; Zap the trace entry
-			
-			mftb	r21								; Get the time
-
-;
-;			Let us cut that trace entry now.
-;
-;			Note that this code cuts a trace table entry for the CutTrace call only.
-;			An identical entry is made during normal interrupt processing.  Any entry
-;			format entry changes made must be done in both places.
-;
-
-			lhz		r24,PP_CPU_NUMBER(r11)			; Get the logical processor number
-			li		r23,T_SYSTEM_CALL				; Get the system call id
-			sth		r24,LTR_cpu(r20)				; Save processor number
-			sth		r23,LTR_excpt(r20)				; Set the exception code
-			mfspr	r23,dsisr						; Get the DSISR
-			std		r21,LTR_timeHi(r20)				; Save top of time stamp
-			mflr	r21								; Get the LR
-			mfsrr0	r22								; Get SRR0
-			stw		r25,LTR_cr(r20)					; Save CR
-			mfsrr1	r24								; Get the SRR1
-			stw		r23,LTR_dsisr(r20)				; Save DSISR
-			std		r22,LTR_srr0(r20)				; Save SRR0
-			mfdar	r23								; Get DAR
-			std		r24,LTR_srr1(r20)				; Save SRR1
-			std		r23,LTR_dar(r20)				; Save DAR
-			std		r21,LTR_lr(r20)					; Save LR
-
-			std		r13,LTR_ctr(r20)				; Save CTR
-			std		r0,LTR_r0(r20)					; Save register
-			std		r1,LTR_r1(r20)					; Save register
-			std		r2,LTR_r2(r20)					; Save register
-			std		r3,LTR_r3(r20)					; Save register
-			std		r4,LTR_r4(r20)					; Save register
-			std		r5,LTR_r5(r20)					; Save register
-			std		r6,LTR_r6(r20)					; Save register
-			
-#if 0
-			lwz		r21,FPUowner(r11)				; (TEST/DEBUG) Get the current floating point owner
-			stw		r21,LTR_rsvd0(r20)				; (TEST/DEBUG) Record the owner
-#endif
-
-#if ESPDEBUG
-			dcbf	0,r20							; Force to memory			
-			sync									; Make sure it all goes
-#endif
-
-ctdisa64:	mtcrf	0x80,r25						; Restore the used condition register field
-			ld		r20,tempr0(r11)					; Restore work register
-			ld		r21,tempr1(r11)					; Restore work register
-			ld		r25,tempr2(r11)					; Restore work register
-			mtctr	r13								; Restore the callers CTR
-			ld		r22,tempr3(r11)					; Restore work register
-			ld		r23,tempr4(r11)					; Restore work register
-			ld		r24,tempr5(r11)					; Restore work register
-			b		uftX64							; Go restore the rest and go...
-
-ctbail64:	mtcrf	0x80,r25						; Restore the used condition register field
-			ld		r20,tempr0(r11)					; Restore work register
-			ld		r21,tempr1(r11)					; Restore work register
-			ld		r25,tempr2(r11)					; Restore work register
-			mtctr	r13								; Restore the callers CTR
-			ld		r22,tempr3(r11)					; Restore work register
-			ld		r23,tempr4(r11)					; Restore work register
-			li		r11,T_SYSTEM_CALL|T_FAM			; Set system code call
-			b		extEntry64						; Go straight to the 64-bit code...
-
-
-
-;			Handle a system call that is not a UFT and which thus goes upstairs.
-
-uftNormalFF:										; here with entire cr in r13
-			mtcr	r13								; restore all 8 fields
-			b		uftNormalSyscall1				; Join common...
-			
-uftNormal80:										; here with callers cr0 in r13
-			mtcrf	0x80,r13						; restore cr0
-			b		uftNormalSyscall1				; Join common...
-			
-uftNormalSyscall:									; r13 = callers ctr
-			mtctr	r13								; restore ctr
-uftNormalSyscall1:
-			li		r11,T_SYSTEM_CALL|T_FAM			; this is a system call (and fall through)
-
-
-/*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>*/
-/*
- * .L_exception_entry(type)
- *
- * Come here via branch directly from the vector, or falling down from above, with the following
- * set up:
- *
- * ENTRY:	interrupts off, VM off, in 64-bit mode if supported
- *          Caller's r13 saved in sprg2.
- *          Caller's r11 saved in sprg3.
- *          Exception code (ie, T_SYSTEM_CALL etc) in r11.
- *          All other registers are live.
- *
- */
-
-.L_exception_entry:                                 ; WARNING: can fall through from UFT handler
-			
-/*
- *
- *	Here we will save off a mess of registers, the special ones and R0-R12.  We use the DCBZ
- *	instruction to clear and allcoate a line in the cache.  This way we won't take any cache
- *	misses, so these stores won't take all that long. Except the first line that is because
- *	we can't do a DCBZ if the L1 D-cache is off.  The rest we will skip if they are
- *	off also.
- * 
- *	Note that if we are attempting to sleep (as opposed to nap or doze) all interruptions
- *	are ignored.
- */
-
-
-			.globl	EXT(extPatch32)						
-			
-
-LEXT(extPatch32)
-			b		extEntry64						; Go do 64-bit (patched to a nop if 32-bit)
-			mfsprg  r13,0							; Load per_proc
-			lwz		r13,next_savearea+4(r13)		; Get the exception save area
-			stw		r0,saver0+4(r13)				; Save register 0
-			stw		r1,saver1+4(r13)				; Save register 1
-
-			mfspr	r1,hid0							; Get HID0
-			mfcr	r0								; Save the whole CR
-			
-			mtcrf	0x20,r1							; Get set to test for sleep
-			cror	doze,doze,nap					; Remember if we are napping
-			bf		sleep,notsleep					; Skip if we are not trying to sleep
-			
-			mtcrf	0x20,r0							; Restore the CR
-			lwz		r0,saver0+4(r13)				; Restore R0
-			lwz		r1,saver1+4(r13)				; Restore R1
-			mfsprg	r13,0							; Get the per_proc 
-			lwz		r11,pfAvailable(r13)			; Get back the feature flags
-			mfsprg	r13,2							; Restore R13
-			mtsprg	2,r11							; Set sprg2 to the features
-			mfsprg	r11,3							; Restore R11
-			rfi										; Jump back into sleep code...
-			.long	0								; Leave these here please...
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			
-
-;
-;			This is the 32-bit context saving stuff
-;
-
-			.align	5
-						
-notsleep:	stw		r2,saver2+4(r13)				; Save this one
-			bf		doze,notspdo					; Skip the next if we are not napping/dozing...
-			rlwinm	r2,r1,0,nap+1,doze-1			; Clear any possible nap and doze bits
-			mtspr	hid0,r2							; Clear the nap/doze bits
-
-notspdo:
-			la		r1,saver4(r13)					; Point to the next line in case we need it
-			crmove	wasNapping,doze					; Remember if we were napping
-			mfsprg	r2,0							; Get the per_proc area
-			dcbz	0,r1							; allocate r4-r7 32-byte line in cache
-			
-;
-;			Remember, we are setting up CR6 with feature flags
-;
-			andi.	r1,r11,T_FAM					; Check FAM bit
-	
-			stw		r3,saver3+4(r13)				; Save this one
-			stw		r4,saver4+4(r13)				; Save this one
-			andc	r11,r11,r1						; Clear FAM bit
-			beq+	noFAM							; Is it FAM intercept
-			mfsrr1	r3								; Load srr1
-			rlwinm.	r3,r3,0,MSR_PR_BIT,MSR_PR_BIT	; Are we trapping from supervisor state?
-			beq+	noFAM							; From supervisor state
-			lwz		r1,spcFlags(r2)					; Load spcFlags 
-			rlwinm	r1,r1,1+FamVMmodebit,30,31		; Extract FamVMenabit and FamVMmodebit
-			cmpwi	cr0,r1,2						; Check FamVMena set without FamVMmode
-			bne+	noFAM							; Can this context be FAM intercept
-			lwz		r4,FAMintercept(r2)				; Load exceptions mask to intercept
-			srwi	r1,r11,2						; Divide r11 by 4
-			lis		r3,0x8000						; Set r3 to 0x80000000
-			srw		r1,r3,r1						; Set bit for current exception
-			and.	r1,r1,r4						; And current exception with the intercept mask
-			beq+	noFAM							; Is it FAM intercept
-			b		EXT(vmm_fam_exc)
-noFAM:
-			lwz		r1,pfAvailable(r2)				; Get the CPU features flags			
-			la		r3,saver8(r13)					; Point to line with r8-r11
-			mtcrf	0xE2,r1							; Put the features flags (that we care about) in the CR
-			dcbz	0,r3							; allocate r8-r11 32-byte line in cache
-            la		r3,saver12(r13)					; point to r12-r15 line
-			lis		r4,hi16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Set up the MSR we will use throughout. Note that ME come on here if MCK
-			stw		r6,saver6+4(r13)				; Save this one
-			ori		r4,r4,lo16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Rest of MSR
-			stw		r8,saver8+4(r13)				; Save this one
-			crmove	featAltivec,pfAltivecb			; Set the Altivec flag
-			mtmsr	r4								; Set MSR
-			isync
-			mfsrr0	r6								; Get the interruption SRR0 
-            la		r8,savesrr0(r13)				; point to line with SRR0, SRR1, CR, XER, and LR
-			dcbz	0,r3							; allocate r12-r15 32-byte line in cache
-            la		r3,saver16(r13)					; point to next line
-			dcbz	0,r8							; allocate 32-byte line with SRR0, SRR1, CR, XER, and LR
-			stw		r7,saver7+4(r13)				; Save this one
-			mfsrr1	r7								; Get the interrupt SRR1
-			stw		r6,savesrr0+4(r13)				; Save the SRR0 
-			stw		r5,saver5+4(r13)				; Save this one 
-			mfsprg	r6,2							; Get interrupt time R13
-			mtsprg	2,r1							; Set the feature flags
-			mfsprg	r8,3							; Get rupt time R11
-			stw		r7,savesrr1+4(r13)				; Save SRR1 
-			stw		r8,saver11+4(r13)				; Save rupt time R11
-			stw		r6,saver13+4(r13)				; Save rupt R13
-			dcbz	0,r3							; allocate 32-byte line with r16-r19
-            la		r3,saver20(r13)					; point to next line
-
-getTB:		mftbu	r6								; Get the upper timebase
-			mftb	r7								; Get the lower timebase
-			mftbu	r8								; Get the upper one again
-			cmplw	r6,r8							; Did the top tick?
-			bne-	getTB							; Yeah, need to get it again...
-
-			stw		r8,ruptStamp(r2)				; Save the top of time stamp
-			stw		r8,SAVtime(r13)					; Save the top of time stamp
-			stw		r7,ruptStamp+4(r2)				; Save the bottom of time stamp
-			stw		r7,SAVtime+4(r13)				; Save the bottom of time stamp
-
-			dcbz	0,r3							; allocate 32-byte line with r20-r23
-			stw		r9,saver9+4(r13)				; Save this one
-
-			stw		r10,saver10+4(r13)				; Save this one
-			mflr	r4								; Get the LR
-			mfxer	r10								; Get the XER
-			
-			bf+		wasNapping,notNapping			; Skip if not waking up from nap...
-
-			lwz		r6,napStamp+4(r2)				; Pick up low order nap stamp
-			lis		r3,hi16(EXT(machine_idle_ret))	; Get high part of nap/doze return
-			lwz		r5,napStamp(r2)					; and high order
-			subfc	r7,r6,r7						; Subtract low stamp from now
-			lwz		r6,napTotal+4(r2)				; Pick up low total
-			subfe	r5,r5,r8						; Subtract high stamp and borrow from now
-			lwz		r8,napTotal(r2)					; Pick up the high total
-			addc	r6,r6,r7						; Add low to total
-			ori		r3,r3,lo16(EXT(machine_idle_ret))	; Get low part of nap/doze return
-			adde	r8,r8,r5						; Add high and carry to total
-			stw		r6,napTotal+4(r2)				; Save the low total
-			stw		r8,napTotal(r2)					; Save the high total
-			stw		r3,savesrr0+4(r13)				; Modify to return to nap/doze exit
-			
-			rlwinm.	r3,r1,0,pfSlowNapb,pfSlowNapb	; Should HID1 be restored?
-			beq		notInSlowNap
-
-			lwz		r3,pfHID1(r2)					; Get saved HID1 value
-			mtspr	hid1,r3							; Restore HID1
-
-notInSlowNap:
-			rlwinm.	r3,r1,0,pfNoL2PFNapb,pfNoL2PFNapb	; Should MSSCR0 be restored?
-			beq		notNapping
-
-			lwz		r3,pfMSSCR0(r2)					; Get saved MSSCR0 value
-			mtspr	msscr0,r3						; Restore MSSCR0
-			sync
-			isync
-
-notNapping:	stw		r12,saver12+4(r13)				; Save this one
-						
-			stw		r14,saver14+4(r13)				; Save this one
-			stw		r15,saver15+4(r13)				; Save this one 
-			la		r14,saver24(r13)				; Point to the next block to save into
-			mfctr	r6								; Get the CTR 
-			stw		r16,saver16+4(r13)				; Save this one
-            la		r15,savectr(r13)				; point to line with CTR, DAR, DSISR, Exception code, and VRSAVE
-			stw		r4,savelr+4(r13)				; Save rupt LR
-		
-			dcbz	0,r14							; allocate 32-byte line with r24-r27
-            la		r16,saver28(r13)				; point to line with r28-r31
-			dcbz	0,r15							; allocate line with CTR, DAR, DSISR, Exception code, and VRSAVE
-			stw		r17,saver17+4(r13)				; Save this one
-			stw		r18,saver18+4(r13)				; Save this one 
-			stw		r6,savectr+4(r13)				; Save rupt CTR
-			stw		r0,savecr(r13)					; Save rupt CR
-			stw		r19,saver19+4(r13)				; Save this one
-			mfdar	r6								; Get the rupt DAR
-			stw		r20,saver20+4(r13)				; Save this one 
-			dcbz	0,r16							; allocate 32-byte line with r28-r31
-
-			stw		r21,saver21+4(r13)				; Save this one
-			lwz		r21,spcFlags(r2)				; Get the special flags from per_proc
-			stw		r10,savexer+4(r13)				; Save the rupt XER
-			stw		r30,saver30+4(r13)				; Save this one 
-			lhz		r30,pfrptdProc(r2)				; Get the reported processor type
-			stw		r31,saver31+4(r13)				; Save this one 
-			stw		r22,saver22+4(r13)				; Save this one 
-			stw		r23,saver23+4(r13)				; Save this one 
-			stw		r24,saver24+4(r13)				; Save this one 
-			stw		r25,saver25+4(r13)				; Save this one 
-			mfdsisr	r7								; Get the rupt DSISR 
-			stw		r26,saver26+4(r13)				; Save this one		
-			stw		r27,saver27+4(r13)				; Save this one 
-			andis.	r21,r21,hi16(perfMonitor)		; Is the performance monitor enabled?
-			stw		r28,saver28+4(r13)				; Save this one
-			cmpwi	cr1, r30,CPU_SUBTYPE_POWERPC_750	; G3?
-            la		r27,savevscr(r13)				; point to 32-byte line with VSCR and FPSCR
-			cmpwi	cr2,r30,CPU_SUBTYPE_POWERPC_7400	; This guy?
-			stw		r29,saver29+4(r13)				; Save R29
-			stw		r6,savedar+4(r13)				; Save the rupt DAR 
-			li		r10,savepmc						; Point to pmc savearea
-
-			beq+	noPerfMonSave32					; No perfmon on here...
-
-			dcbz	r10,r13							; Clear first part of pmc area
-			li		r10,savepmc+0x20				; Point to pmc savearea second part
-			li		r22,0							; r22:	zero
-			dcbz	r10,r13							; Clear second part of pmc area
-		
-			beq		cr1,perfMonSave32_750			; This is a G3...
-
-			beq		cr2,perfMonSave32_7400			; Regular olde G4...
-
-			mfspr	r24,pmc5						; Here for a 7450
-			mfspr	r25,pmc6
-			stw		r24,savepmc+16(r13)				; Save PMC5
-			stw		r25,savepmc+20(r13)				; Save PMC6
-			mtspr	pmc5,r22						; Leave PMC5 clear
-			mtspr	pmc6,r22						; Leave PMC6 clear
-
-perfMonSave32_7400:		
-			mfspr	r25,mmcr2
-			stw		r25,savemmcr2+4(r13)			; Save MMCR2
-			mtspr	mmcr2,r22						; Leave MMCR2 clear
-
-perfMonSave32_750:		
-			mfspr	r23,mmcr0
-			mfspr	r24,mmcr1
-			stw		r23,savemmcr0+4(r13)			; Save MMCR0
-			stw		r24,savemmcr1+4(r13)			; Save MMCR1 
-			mtspr	mmcr0,r22						; Leave MMCR0 clear
-			mtspr	mmcr1,r22						; Leave MMCR1 clear
-			mfspr	r23,pmc1
-			mfspr	r24,pmc2
-			mfspr	r25,pmc3
-			mfspr	r26,pmc4
-			stw		r23,savepmc+0(r13)				; Save PMC1
-			stw		r24,savepmc+4(r13)				; Save PMC2
-			stw		r25,savepmc+8(r13)				; Save PMC3
-			stw		r26,savepmc+12(r13)				; Save PMC4
-			mtspr	pmc1,r22						; Leave PMC1 clear 
-			mtspr	pmc2,r22						; Leave PMC2 clear
-			mtspr	pmc3,r22						; Leave PMC3 clear 		
-			mtspr	pmc4,r22						; Leave PMC4 clear
-
-noPerfMonSave32:		
-			dcbz	0,r27							; allocate line with VSCR and FPSCR 
-			
-			stw		r7,savedsisr(r13)				; Save the rupt code DSISR
-			stw		r11,saveexception(r13)			; Save the exception code 
-
-
-;
-;			Everything is saved at this point, except for FPRs, and VMX registers.
-;			Time for us to get a new savearea and then trace interrupt if it is enabled.
-;
-
-			lwz		r25,traceMask(0)				; Get the trace mask
-			li		r0,SAVgeneral					; Get the savearea type value
-			lhz		r19,PP_CPU_NUMBER(r2)			; Get the logical processor number											
-			rlwinm	r22,r11,30,0,31					; Divide interrupt code by 4
-			stb		r0,SAVflags+2(r13)				; Mark valid context
-			addi	r22,r22,10						; Adjust code so we shift into CR5
-			li		r23,trcWork						; Get the trace work area address
-			rlwnm	r7,r25,r22,22,22				; Set CR5_EQ bit position to 0 if tracing allowed 
-			li		r26,0x8							; Get start of cpu mask
-			srw		r26,r26,r19						; Get bit position of cpu number
-			mtcrf	0x04,r7							; Set CR5 to show trace or not
-			and.	r26,r26,r25						; See if we trace this cpu
-			crandc	cr5_eq,cr5_eq,cr0_eq			; Turn off tracing if cpu is disabled
-;
-;			At this point, we can take another exception and lose nothing.
-;
-
-			bne+	cr5,xcp32xit					; Skip all of this if no tracing here...
-
-;
-;			We select a trace entry using a compare and swap on the next entry field.
-;			Since we do not lock the actual trace buffer, there is a potential that
-;			another processor could wrap an trash our entry.  Who cares?
-;
-
-			lwz		r25,traceStart(0)				; Get the start of trace table
-			lwz		r26,traceEnd(0)					; Get end of trace table
-	
-trcsel:		lwarx	r20,0,r23						; Get and reserve the next slot to allocate
-			
-			addi	r22,r20,LTR_size				; Point to the next trace entry
-			cmplw	r22,r26							; Do we need to wrap the trace table?
-			bne+	gotTrcEnt						; No wrap, we got us a trace entry...
-			
-			mr		r22,r25							; Wrap back to start
-
-gotTrcEnt:	stwcx.	r22,0,r23						; Try to update the current pointer
-			bne-	trcsel							; Collision, try again...
-			
-#if ESPDEBUG
-			dcbf	0,r23							; Force to memory
-			sync
-#endif
-			
-			dcbz	0,r20							; Clear and allocate first trace line
-
-;
-;			Let us cut that trace entry now.
-;
-;			Note that this code cuts a trace table entry for everything but the CutTrace call.
-;			An identical entry is made during normal CutTrace processing.  Any entry
-;			format changes made must be done in both places.
-;
-
-			lwz		r16,ruptStamp(r2)				; Get top of time base
-			lwz		r17,ruptStamp+4(r2)				; Get the bottom of time stamp
-
-			li		r14,32							; Offset to second line
-
-			lwz		r0,saver0+4(r13)				; Get back interrupt time R0
-			lwz		r1,saver1+4(r13)				; Get back interrupt time R1
-			lwz		r8,savecr(r13)					; Get the CR value
-			
-			dcbz	r14,r20							; Zap the second line
-			
-			sth		r19,LTR_cpu(r20)				; Stash the cpu number
-			li		r14,64							; Offset to third line
-			sth		r11,LTR_excpt(r20)				; Save the exception type 
-			lwz		r7,saver2+4(r13)				; Get back interrupt time R2
-			lwz		r3,saver3+4(r13)				; Restore this one
-		
-			dcbz	r14,r20							; Zap the third half
-			
-			mfdsisr	r9								; Get the DSISR
-			li		r14,96							; Offset to forth line
-			stw		r16,LTR_timeHi(r20)				; Set the upper part of TB 
-			stw		r17,LTR_timeLo(r20)				; Set the lower part of TB
-			lwz		r10,savelr+4(r13)				; Get the LR
-			mfsrr0	r17								; Get SRR0 back, it is still good
-			
-			dcbz	r14,r20							; Zap the forth half
-			lwz		r4,saver4+4(r13)				; Restore this one
-			lwz		r5,saver5+4(r13)				; Restore this one
-			mfsrr1	r18								; SRR1 is still good in here
-
-			stw		r8,LTR_cr(r20)					; Save the CR
-			lwz		r6,saver6+4(r13)				; Get R6
-			mfdar	r16								; Get this back
-			stw		r9,LTR_dsisr(r20)				; Save the DSISR
-			stw		r17,LTR_srr0+4(r20)				; Save the SSR0 
-			
-			stw		r18,LTR_srr1+4(r20)				; Save the SRR1 
-			stw		r16,LTR_dar+4(r20)				; Save the DAR
-			mfctr	r17								; Get the CTR (still good in register)
-			stw		r13,LTR_save+4(r20)				; Save the savearea 
-			stw		r10,LTR_lr+4(r20)				; Save the LR
-			
-			stw		r17,LTR_ctr+4(r20)				; Save off the CTR
-			stw		r0,LTR_r0+4(r20)				; Save off register 0 			
-			stw		r1,LTR_r1+4(r20)				; Save off register 1			
-			stw		r7,LTR_r2+4(r20)				; Save off register 2 	
-					
-		
-			stw		r3,LTR_r3+4(r20)				; Save off register 3
-			stw		r4,LTR_r4+4(r20)				; Save off register 4 
-			stw		r5,LTR_r5+4(r20)				; Save off register 5	
-			stw		r6,LTR_r6+4(r20)				; Save off register 6	
-
-#if ESPDEBUG
-			addi	r17,r20,32						; Second line
-			addi	r16,r20,64						; Third line
-			dcbst	br0,r20							; Force to memory
-			dcbst	br0,r17							; Force to memory
-			addi	r17,r17,32						; Fourth line
-			dcbst	br0,r16							; Force to memory
-			dcbst	br0,r17							; Force to memory
-			
-			sync									; Make sure it all goes
-#endif
-xcp32xit:	mr		r14,r11							; Save the interrupt code across the call
-			bl		EXT(save_get_phys_32)			; Grab a savearea
-			mfsprg	r2,0							; Get the per_proc info
-			li		r10,emfp0						; Point to floating point save
-			mr		r11,r14							; Get the exception code back
-			dcbz	r10,r2							; Clear for speed
-			stw		r3,next_savearea+4(r2)			; Store the savearea for the next rupt
-
-			b		xcpCommon						; Go join the common interrupt processing...
-
-;
-;
-;			This is the 64-bit context saving stuff
-;
-
-			.align	5
-						
-extEntry64:	mfsprg  r13,0							; Load per_proc
-			ld		r13,next_savearea(r13)			; Get the exception save area
-			std		r0,saver0(r13)					; Save register 0
-			lis		r0,hi16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Set up the MSR we will use throughout. Note that ME come on here if MCK
-			std		r1,saver1(r13)					; Save register 1
-			ori		r1,r0,lo16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Rest of MSR
-			lis		r0,0x0010						; Get rupt code transform validity mask
-			mtmsr	r1								; Set MSR
-			isync
-		
-			ori		r0,r0,0x0200					; Get rupt code transform validity mask
-			std		r2,saver2(r13)					; Save this one
-			lis		r1,0x00F0						; Top half of xform XOR
-			rlwinm	r2,r11,29,27,31					; Get high 5 bits of rupt code
-			std		r3,saver3(r13)					; Save this one
-			slw		r0,r0,r2						; Move transform validity bit to bit 0
-			std		r4,saver4(r13)					; Save this one
-			std		r5,saver5(r13)					; Save this one 
-			ori		r1,r1,0x04EC					; Bottom half of xform XOR
-			mfxer	r5								; Save the XER because we are about to muck with it
-			rlwinm	r4,r11,1,27,28					; Get bottom of interrupt code * 8
-			lis		r3,hi16(dozem|napm)				; Get the nap and doze bits
-			srawi	r0,r0,31						; Get 0xFFFFFFFF of xform valid, 0 otherwise
-			rlwnm	r4,r1,r4,24,31					; Extract the xform XOR
-			li		r1,saver16						; Point to the next line
-			and		r4,r4,r0						; Only keep transform if we are to use it
-			li		r2,lgKillResv					; Point to the killing field
-			mfcr	r0								; Save the CR
-			stwcx.	r2,0,r2							; Kill any pending reservation
-			dcbz128	r1,r13							; Blow away the line
-			sldi	r3,r3,32						; Position it
-			mfspr	r1,hid0							; Get HID0
-			andc	r3,r1,r3						; Clear nap and doze
-			xor		r11,r11,r4						; Transform 970 rupt code to standard keeping FAM bit
-			cmpld	r3,r1							; See if nap and/or doze was on
-			std		r6,saver6(r13)					; Save this one
-			mfsprg	r2,0							; Get the per_proc area
-			la		r6,savesrr0(r13)				; point to line with SRR0, SRR1, CR, XER, and LR
-			beq++	eE64NoNap						; No nap here,  skip all this...
-		
-			sync									; Make sure we are clean
-			mtspr	hid0,r3							; Set the updated hid0
-			mfspr	r1,hid0							; Yes, this is silly, keep it here
-			mfspr	r1,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r1,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r1,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r1,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r1,hid0							; Yes, this is a duplicate, keep it here
-			
-eE64NoNap:	crnot	wasNapping,cr0_eq				; Remember if we were napping
-			andi.	r1,r11,T_FAM					; Check FAM bit
-			beq++	eEnoFAM							; Is it FAM intercept
-			mfsrr1	r3								; Load srr1
-			andc	r11,r11,r1						; Clear FAM bit
-			rlwinm.	r3,r3,0,MSR_PR_BIT,MSR_PR_BIT	; Are we trapping from supervisor state?
-			beq++	eEnoFAM							; From supervisor state
-			lwz		r1,spcFlags(r2)					; Load spcFlags 
-			rlwinm	r1,r1,1+FamVMmodebit,30,31		; Extract FamVMenabit and FamVMmodebit
-			cmpwi	cr0,r1,2						; Check FamVMena set without FamVMmode
-			bne++	eEnoFAM							; Can this context be FAM intercept
-			lwz		r4,FAMintercept(r2)				; Load exceptions mask to intercept
-			li		r3,0							; Clear
-			srwi	r1,r11,2						; divide r11 by 4
-			oris	r3,r3,0x8000					; Set r3 to 0x80000000
-			srw		r1,r3,r1						; Set bit for current exception
-			and.	r1,r1,r4						; And current exception with the intercept mask
-			beq++	eEnoFAM							; Is it FAM intercept
-			b		EXT(vmm_fam_exc)
-
-			.align	5
-
-eEnoFAM:	lwz		r1,pfAvailable(r2)				; Get the CPU features flags	
-			dcbz128	0,r6							; allocate 128-byte line with SRR0, SRR1, CR, XER, and LR
-			
-;
-;			Remember, we are setting up CR6 with feature flags
-;
-			std		r7,saver7(r13)					; Save this one
-			mtcrf	0x80,r1							; Put the features flags (that we care about) in the CR
-			std		r8,saver8(r13)					; Save this one
-			mtcrf	0x40,r1							; Put the features flags (that we care about) in the CR
-			mfsrr0	r6								; Get the interruption SRR0 
-			mtcrf	0x20,r1							; Put the features flags (that we care about) in the CR
-			mfsrr1	r7								; Get the interrupt SRR1
-			std		r6,savesrr0(r13)				; Save the SRR0 
-			mtcrf	0x02,r1							; Put the features flags (that we care about) in the CR
-			std		r9,saver9(r13)					; Save this one
-			crmove	featAltivec,pfAltivecb			; Set the Altivec flag
-			std		r7,savesrr1(r13)				; Save SRR1 
-			mfsprg	r9,3							; Get rupt time R11
-			std		r10,saver10(r13)				; Save this one
-			mfsprg	r6,2							; Get interrupt time R13
-			std		r9,saver11(r13)					; Save rupt time R11
-			mtsprg	2,r1							; Set the feature flags
-			std		r12,saver12(r13)				; Save this one
-			mflr	r4								; Get the LR
- 			mftb	r7								; Get the timebase
-			std		r6,saver13(r13)					; Save rupt R13
-			std		r7,ruptStamp(r2)				; Save the time stamp
-			std		r7,SAVtime(r13)					; Save the time stamp
-			
-			bf++	wasNapping,notNappingSF			; Skip if not waking up from nap...
-
-			ld		r6,napStamp(r2)					; Pick up nap stamp
-			lis		r3,hi16(EXT(machine_idle_ret))	; Get high part of nap/doze return
-			sub		r7,r7,r6						; Subtract stamp from now
-			ld		r6,napTotal(r2)					; Pick up total
-			add		r6,r6,r7						; Add low to total
-			ori		r3,r3,lo16(EXT(machine_idle_ret))	; Get low part of nap/doze return
-			std		r6,napTotal(r2)					; Save the high total
-			std		r3,savesrr0(r13)				; Modify to return to nap/doze exit
-			
-notNappingSF:	
-			std		r14,saver14(r13)				; Save this one
-			std		r15,saver15(r13)				; Save this one 
-			stw		r0,savecr(r13)					; Save rupt CR
-			mfctr	r6								; Get the CTR 
-			std		r16,saver16(r13)				; Save this one
-			std		r4,savelr(r13)					; Save rupt LR
-		
-			std		r17,saver17(r13)				; Save this one
-			li		r7,savepmc						; Point to pmc area
-			std		r18,saver18(r13)				; Save this one 
-			lwz		r17,spcFlags(r2)				; Get the special flags from per_proc
-			std		r6,savectr(r13)					; Save rupt CTR
-			std		r19,saver19(r13)				; Save this one
-			mfdar	r6								; Get the rupt DAR
-			std		r20,saver20(r13)				; Save this one 
-
-			dcbz128	r7,r13							; Clear out the pmc spot
-					
-			std		r21,saver21(r13)				; Save this one
-			std		r5,savexer(r13)					; Save the rupt XER
-			std		r22,saver22(r13)				; Save this one 
-			std		r23,saver23(r13)				; Save this one 
-			std		r24,saver24(r13)				; Save this one 
-			std		r25,saver25(r13)				; Save this one 
-			mfdsisr	r7								; Get the rupt DSISR 
-			std		r26,saver26(r13)				; Save this one		
-			andis.	r17,r17,hi16(perfMonitor)		; Is the performance monitor enabled?
-			std		r27,saver27(r13)				; Save this one 
-			li		r10,emfp0						; Point to floating point save
-			std		r28,saver28(r13)				; Save this one
-            la		r27,savevscr(r13)				; point to 32-byte line with VSCR and FPSCR
-			std		r29,saver29(r13)				; Save R29
-			std		r30,saver30(r13)				; Save this one 
-			std		r31,saver31(r13)				; Save this one 
-			std		r6,savedar(r13)					; Save the rupt DAR 
-			stw		r7,savedsisr(r13)				; Save the rupt code DSISR
-			stw		r11,saveexception(r13)			; Save the exception code 
-
-			beq++	noPerfMonSave64					; Performance monitor not on...
-
-			li		r22,0							; r22:	zero
-		
-			mfspr	r23,mmcr0_gp
-			mfspr	r24,mmcr1_gp
-			mfspr	r25,mmcra_gp
-			std		r23,savemmcr0(r13)				; Save MMCR0
-			std		r24,savemmcr1(r13)				; Save MMCR1 
-			std		r25,savemmcr2(r13)				; Save MMCRA
-			mtspr	mmcr0_gp,r22					; Leave MMCR0 clear
-			mtspr	mmcr1_gp,r22					; Leave MMCR1 clear
-			mtspr	mmcra_gp,r22					; Leave MMCRA clear 
-			mfspr	r23,pmc1_gp
-			mfspr	r24,pmc2_gp
-			mfspr	r25,pmc3_gp
-			mfspr	r26,pmc4_gp
-			stw		r23,savepmc+0(r13)				; Save PMC1
-			stw		r24,savepmc+4(r13)				; Save PMC2
-			stw		r25,savepmc+8(r13)				; Save PMC3
-			stw		r26,savepmc+12(r13)				; Save PMC4
-			mfspr	r23,pmc5_gp
-			mfspr	r24,pmc6_gp
-			mfspr	r25,pmc7_gp
-			mfspr	r26,pmc8_gp
-			stw		r23,savepmc+16(r13)				; Save PMC5
-			stw		r24,savepmc+20(r13)				; Save PMC6
-			stw		r25,savepmc+24(r13)				; Save PMC7
-			stw		r26,savepmc+28(r13)				; Save PMC8
-			mtspr	pmc1_gp,r22						; Leave PMC1 clear 
-			mtspr	pmc2_gp,r22						; Leave PMC2 clear
-			mtspr	pmc3_gp,r22						; Leave PMC3 clear 		
-			mtspr	pmc4_gp,r22						; Leave PMC4 clear 
-			mtspr	pmc5_gp,r22						; Leave PMC5 clear 
-			mtspr	pmc6_gp,r22						; Leave PMC6 clear
-			mtspr	pmc7_gp,r22						; Leave PMC7 clear 		
-			mtspr	pmc8_gp,r22						; Leave PMC8 clear 
-
-noPerfMonSave64:		
-
-;
-;			Everything is saved at this point, except for FPRs, and VMX registers.
-;			Time for us to get a new savearea and then trace interrupt if it is enabled.
-;
-
-			lwz		r25,traceMask(0)				; Get the trace mask
-			li		r0,SAVgeneral					; Get the savearea type value
-			lhz		r19,PP_CPU_NUMBER(r2)			; Get the logical processor number											
-			stb		r0,SAVflags+2(r13)				; Mark valid context
-			rlwinm	r22,r11,30,0,31					; Divide interrupt code by 2
-			li		r23,trcWork						; Get the trace work area address
-			addi	r22,r22,10						; Adjust code so we shift into CR5
-			li		r26,0x8							; Get start of cpu mask
-			rlwnm	r7,r25,r22,22,22				; Set CR5_EQ bit position to 0 if tracing allowed 
-			srw		r26,r26,r19						; Get bit position of cpu number
-			mtcrf	0x04,r7							; Set CR5 to show trace or not
-			and.	r26,r26,r25						; See if we trace this cpu
-			crandc	cr5_eq,cr5_eq,cr0_eq			; Turn off tracing if cpu is disabled
-
-			bne++	cr5,xcp64xit					; Skip all of this if no tracing here...
-
-;
-;			We select a trace entry using a compare and swap on the next entry field.
-;			Since we do not lock the actual trace buffer, there is a potential that
-;			another processor could wrap an trash our entry.  Who cares?
-;
-
-			lwz		r25,traceStart(0)				; Get the start of trace table
-			lwz		r26,traceEnd(0)					; Get end of trace table
-
-trcselSF:	lwarx	r20,0,r23						; Get and reserve the next slot to allocate
-			
-			addi	r22,r20,LTR_size				; Point to the next trace entry
-			cmplw	r22,r26							; Do we need to wrap the trace table?
-			bne++	gotTrcEntSF						; No wrap, we got us a trace entry...
-			
-			mr		r22,r25							; Wrap back to start
-
-gotTrcEntSF:	
-			stwcx.	r22,0,r23						; Try to update the current pointer
-			bne-	trcselSF						; Collision, try again...
-			
-#if ESPDEBUG
-			dcbf	0,r23							; Force to memory
-			sync
-#endif
-
-;
-;			Let us cut that trace entry now.
-;
-;			Note that this code cuts a trace table entry for everything but the CutTrace call.
-;			An identical entry is made during normal CutTrace processing.  Any entry
-;			format changes made must be done in both places.
-;
-
-			dcbz128	0,r20							; Zap the trace entry
-
-			lwz		r9,SAVflags(r13)				; Get savearea flags
-
-			ld		r16,ruptStamp(r2)				; Get top of time base
-			ld		r0,saver0(r13)					; Get back interrupt time R0 (we need this whether we trace or not)
-			std		r16,LTR_timeHi(r20)				; Set the upper part of TB 
-			ld		r1,saver1(r13)					; Get back interrupt time R1
-			rlwinm	r9,r9,20,16,23					; Isolate the special flags
-			ld		r18,saver2(r13)					; Get back interrupt time R2
-			std		r0,LTR_r0(r20)					; Save off register 0 			
-			rlwimi	r9,r19,0,24,31					; Slide in the cpu number
-			ld		r3,saver3(r13)					; Restore this one
-			sth		r9,LTR_cpu(r20)					; Stash the cpu number and special flags
-			std		r1,LTR_r1(r20)					; Save off register 1			
-			ld		r4,saver4(r13)					; Restore this one
-			std		r18,LTR_r2(r20)					; Save off register 2 			
-			ld		r5,saver5(r13)					; Restore this one
-			ld		r6,saver6(r13)					; Get R6
-			std		r3,LTR_r3(r20)					; Save off register 3
-			lwz		r16,savecr(r13)					; Get the CR value
-			std		r4,LTR_r4(r20)					; Save off register 4 
-			mfsrr0	r17								; Get SRR0 back, it is still good
-			std		r5,LTR_r5(r20)					; Save off register 5	
-			std		r6,LTR_r6(r20)					; Save off register 6	
-			mfsrr1	r18								; SRR1 is still good in here
-			stw		r16,LTR_cr(r20)					; Save the CR
-			std		r17,LTR_srr0(r20)				; Save the SSR0 
-			std		r18,LTR_srr1(r20)				; Save the SRR1 
-						
-			mfdar	r17								; Get this back
-			ld		r16,savelr(r13)					; Get the LR
-			std		r17,LTR_dar(r20)				; Save the DAR
-			mfctr	r17								; Get the CTR (still good in register)
-			std		r16,LTR_lr(r20)					; Save the LR
-			std		r17,LTR_ctr(r20)				; Save off the CTR
-			mfdsisr	r17								; Get the DSISR
-			std		r13,LTR_save(r20)				; Save the savearea 
-			stw		r17,LTR_dsisr(r20)				; Save the DSISR
-			sth		r11,LTR_excpt(r20)				; Save the exception type 
-#if 0
-			lwz		r17,FPUowner(r2)				; (TEST/DEBUG) Get the current floating point owner
-			stw		r17,LTR_rsvd0(r20)				; (TEST/DEBUG) Record the owner
-#endif
-
-#if ESPDEBUG
-			dcbf	0,r20							; Force to memory			
-			sync									; Make sure it all goes
-#endif
-xcp64xit:	mr		r14,r11							; Save the interrupt code across the call
-			bl		EXT(save_get_phys_64)			; Grab a savearea
-			mfsprg	r2,0							; Get the per_proc info
-			li		r10,emfp0						; Point to floating point save
-			mr		r11,r14							; Get the exception code back
-			dcbz128	r10,r2							; Clear for speed
-			std		r3,next_savearea(r2)			; Store the savearea for the next rupt
-			b		xcpCommon						; Go join the common interrupt processing...
-
-;
-;			All of the context is saved. Now we will get a
-;			fresh savearea.  After this we can take an interrupt.
-;
-
-			.align	5
-
-xcpCommon:
-
-;
-;			Here we will save some floating point and vector status
-;			and we also set a clean default status for a new interrupt level.
-;			Note that we assume that emfp0 is on an altivec boundary
-;			and that R10 points to it (as a displacemnt from R2).
-;
-;			We need to save the FPSCR as if it is normal context.
-;			This is because pending exceptions will cause an exception even if
-;			FP is disabled. We need to clear the FPSCR when we first start running in the
-;			kernel.
-;
-
-			stfd	f0,emfp0(r2)					; Save FPR0	
-			stfd	f1,emfp1(r2)					; Save FPR1	
-			li		r19,0							; Assume no Altivec
-			mffs	f0								; Get the FPSCR
-			lfd		f1,Zero(0)						; Make a 0			
-			stfd	f0,savefpscrpad(r13)			; Save the FPSCR
-			li		r9,0							; Get set to clear VRSAVE
-			mtfsf	0xFF,f1							; Clear it
-			addi	r14,r10,16						; Displacement to second vector register
-			lfd		f0,emfp0(r2)					; Restore FPR0	
-			la		r28,savevscr(r13)				; Point to the status area
-			lfd		f1,emfp1(r2)					; Restore FPR1	
-
-			bf		featAltivec,noavec				; No Altivec on this CPU...
-			
-			stvxl	v0,r10,r2						; Save a register
-			stvxl	v1,r14,r2						; Save a second register
-			mfspr	r19,vrsave						; Get the VRSAVE register
-			mfvscr	v0								; Get the vector status register
-			vspltish v1,1							; Turn on the non-Java bit and saturate
-			stvxl	v0,0,r28						; Save the vector status
-			vspltisw v0,1							; Turn on the saturate bit
-			vxor	v1,v1,v0						; Turn off saturate	
-			mtvscr	v1								; Set the non-java, no saturate status for new level
-			mtspr	vrsave,r9						; Clear VRSAVE for each interrupt level
-
-			lvxl	v0,r10,r2						; Restore first work register
-			lvxl	v1,r14,r2						; Restore second work register
-
-noavec:		stw		r19,savevrsave(r13)				; Save the vector register usage flags
-			
-;
-;			We are now done saving all of the context.  Start filtering the interrupts.
-;			Note that a Redrive will count as an actual interrupt.
-;			Note also that we take a lot of system calls so we will start decode here.
-;
-
-Redrive:	
-			lwz		r22,SAVflags(r13)				; Pick up the flags
-			lwz		r0,saver0+4(r13)				; Get back interrupt time syscall number
-			mfsprg	r2,0							; Restore per_proc
-		
-			lwz		r20,lo16(xcpTable)(r11)         ; Get the interrupt handler (note: xcpTable must be in 1st 32k of physical memory)
-			la		r12,hwCounts(r2)				; Point to the exception count area
-			andis.	r24,r22,hi16(SAVeat)			; Should we eat this one?		
-			rlwinm	r22,r22,SAVredriveb+1,31,31		; Get a 1 if we are redriving
-			add		r12,r12,r11						; Point to the count
-			lwz		r25,0(r12)						; Get the old value
-			lwz		r23,hwRedrives(r2)				; Get the redrive count
-			crmove	cr3_eq,cr0_eq					; Remember if we are ignoring
-			xori	r24,r22,1						; Get the NOT of the redrive
-			mtctr	r20								; Point to the interrupt handler
-			mtcrf	0x80,r0							; Set our CR0 to the high nybble of possible syscall code
-			add		r25,r25,r24						; Count this one if not a redrive
-			add		r23,r23,r22						; Count this one if if is a redrive
-			crandc	cr0_lt,cr0_lt,cr0_gt			; See if we have R0 equal to 0b10xx...x 
-			stw		r25,0(r12)						; Store it back
-			stw		r23,hwRedrives(r2)				; Save the redrive count
-			bne--	cr3,IgnoreRupt					; Interruption is being ignored...
-			bctr									; Go process the exception...
-	
-
-;
-;			Exception vector filter table (like everything in this file, must be in 1st 32KB of physical memory)
-;
-
-			.align	7
-			
-xcpTable:
-			.long	EatRupt							; T_IN_VAIN			
-			.long	PassUpTrap						; T_RESET				
-			.long	MachineCheck					; T_MACHINE_CHECK		
-			.long	EXT(handlePF)					; T_DATA_ACCESS		
-			.long	EXT(handlePF)					; T_INSTRUCTION_ACCESS
-			.long	PassUpRupt						; T_INTERRUPT		
-			.long	EXT(AlignAssist)				; T_ALIGNMENT			
-			.long	ProgramChk						; T_PROGRAM
-			.long	PassUpFPU						; T_FP_UNAVAILABLE		
-			.long	PassUpRupt						; T_DECREMENTER		
-			.long	PassUpTrap						; T_IO_ERROR			
-			.long	PassUpTrap						; T_RESERVED			
-			.long	xcpSyscall						; T_SYSTEM_CALL			
-			.long	PassUpTrap						; T_TRACE				
-			.long	PassUpTrap						; T_FP_ASSIST			
-			.long	PassUpTrap						; T_PERF_MON				
-			.long	PassUpVMX						; T_VMX					
-			.long	PassUpTrap						; T_INVALID_EXCP0		
-			.long	PassUpTrap						; T_INVALID_EXCP1			
-			.long	PassUpTrap						; T_INVALID_EXCP2		
-			.long	PassUpTrap						; T_INSTRUCTION_BKPT		
-			.long	PassUpRupt						; T_SYSTEM_MANAGEMENT		
-			.long	EXT(AltivecAssist)				; T_ALTIVEC_ASSIST		
-			.long	PassUpRupt						; T_THERMAL				
-			.long	PassUpTrap						; T_INVALID_EXCP5		
-			.long	PassUpTrap						; T_INVALID_EXCP6			
-			.long	PassUpTrap						; T_INVALID_EXCP7			
-			.long	PassUpTrap						; T_INVALID_EXCP8			
-			.long	PassUpTrap						; T_INVALID_EXCP9			
-			.long	PassUpTrap						; T_INVALID_EXCP10		
-			.long	PassUpTrap						; T_INVALID_EXCP11		
-			.long	PassUpTrap						; T_INVALID_EXCP12	
-			.long	PassUpTrap						; T_INVALID_EXCP13		
-
-			.long	PassUpTrap						; T_RUNMODE_TRACE			
-
-			.long	PassUpRupt						; T_SIGP					
-			.long	PassUpTrap						; T_PREEMPT				
-			.long	conswtch						; T_CSWITCH				
-			.long	PassUpRupt						; T_SHUTDOWN				
-			.long	PassUpAbend						; T_CHOKE					
-
-			.long	EXT(handleDSeg)					; T_DATA_SEGMENT			
-			.long	EXT(handleISeg)					; T_INSTRUCTION_SEGMENT	
-
-			.long	WhoaBaby						; T_SOFT_PATCH			
-			.long	WhoaBaby						; T_MAINTENANCE			
-			.long	WhoaBaby						; T_INSTRUMENTATION		
-			.long	WhoaBaby						; T_ARCHDEP0
-			.long	EatRupt							; T_HDEC
-;
-;			Just what the heck happened here???? 
-;           NB: also get here from UFT dispatch table, on bogus index
-;
-			
-WhoaBaby:	b		.								; Open the hood and wait for help
-
-			.align	5
-			
-IgnoreRupt:
-			lwz		r20,hwIgnored(r2)				; Grab the ignored interruption count
-			addi	r20,r20,1						; Count this one
-			stw		r20,hwIgnored(r2)				; Save the ignored count
-			b		EatRupt							; Ignore it...
-
-
-													
-;
-;			System call
-;
-		
-			.align	5
-
-xcpSyscall:	lis		r20,hi16(EXT(shandler))			; Assume this is a normal one, get handler address
-			rlwinm	r6,r0,1,0,31					; Move sign bit to the end 
-			ori		r20,r20,lo16(EXT(shandler))		; Assume this is a normal one, get handler address
-			bnl++	cr0,PassUp						; R0 not 0b10xxx...x, can not be any kind of magical system call, just pass it up...
-			lwz		r7,savesrr1+4(r13)				; Get the entering MSR (low half)
-			lwz		r1,dgFlags(0)					; Get the flags
-			cmplwi	cr2,r6,1						; See if original R0 had the CutTrace request code in it 
-			
-			rlwinm.	r7,r7,0,MSR_PR_BIT,MSR_PR_BIT	; Did we come from user state?
-			beq++	FCisok							; From supervisor state...
-
-			rlwinm.	r1,r1,0,enaUsrFCallb,enaUsrFCallb	; Are they valid?
-			beq++	PassUp							; No, treat as a normal one...
-
-FCisok:		beq++	cr2,EatRupt						; This is a CutTrace system call, we are done with it...
-			
-;
-;			Here is where we call the firmware.  If it returns T_IN_VAIN, that means
-;			that it has handled the interruption.  Remember: thou shalt not trash R13
-;			while you are away.  Anything else is ok.
-;			
-
-			lwz		r3,saver3+4(r13)				; Restore the first parameter
-			b		EXT(FirmwareCall)				; Go handle the firmware call....
-
-;
-;			Here is where we return from the firmware call
-;
-
-			.align	5
-			.globl	EXT(FCReturn)
-
-LEXT(FCReturn)
-			cmplwi	r3,T_IN_VAIN					; Was it handled? 
-			beq++	EatRupt							; Interrupt was handled...
-			mr		r11,r3							; Put the rupt code into the right register
-			b		Redrive							; Go through the filter again...
-		
-
-;
-;			Here is where we return from the PTE miss and segment exception handler
-;
-
-			.align	5
-			.globl	EXT(PFSExit)
-
-LEXT(PFSExit)
-
-#if 0
-			mfsprg	r2,0							; (BRINGUP)
-			lwz		r0,savedsisr(r13)				; (BRINGUP)
-			andis.	r0,r0,hi16(dsiAC)				; (BRINGUP)
-			beq++	didnthit						; (BRINGUP)
-			lwz		r0,20(0)						; (BRINGUP)
-			mr.		r0,r0							; (BRINGUP)
-			bne--	didnthit						; (BRINGUP)
-#if 0
-			li		r0,1							; (BRINGUP)
-			stw		r0,20(0)						; (BRINGUP)
-			lis		r0,hi16(Choke)					; (BRINGUP)
-			ori		r0,r0,lo16(Choke)				; (BRINGUP)
-			sc										; (BRINGUP)
-#endif
-			
-			lwz		r4,savesrr0+4(r13)				; (BRINGUP)
-			lwz		r8,savesrr1+4(r13)				; (BRINGUP)
-			lwz		r6,savedar+4(r13)				; (BRINGUP)
-			rlwinm.	r0,r8,0,MSR_IR_BIT,MSR_IR_BIT	; (BRINGUP)
-			mfmsr	r9								; (BRINGUP)
-			ori		r0,r9,lo16(MASK(MSR_DR))		; (BRINGUP)
-			beq--	hghg							; (BRINGUP)
-			mtmsr	r0								; (BRINGUP)
-			isync									; (BRINGUP)
-
-hghg:		lwz		r5,0(r4)						; (BRINGUP)
-			beq--	hghg1							; (BRINGUP)
-			mtmsr	r9								; (BRINGUP)
-			isync									; (BRINGUP)
-
-hghg1:		rlwinm	r7,r5,6,26,31					; (BRINGUP)
-			rlwinm	r27,r5,14,24,28					; (BRINGUP)
-			addi	r3,r13,saver0+4					; (BRINGUP)
-			lwzx	r3,r3,r27						; (BRINGUP)
-			
-#if 0
-			lwz		r27,patcharea+4(r2)				; (BRINGUP)
-			mr.		r3,r3							; (BRINGUP)
-			bne++	nbnbnb							; (BRINGUP)
-			addi	r27,r27,1						; (BRINGUP)
-			stw		r27,patcharea+4(r2)				; (BRINGUP)
-nbnbnb:					
-#endif			
-			
-			rlwinm.	r28,r8,0,MSR_DR_BIT,MSR_DR_BIT	; (BRINGUP)
-			rlwinm	r27,r6,0,0,29					; (BRINGUP)
-			ori		r28,r9,lo16(MASK(MSR_DR))		; (BRINGUP)
-			mfspr	r10,dabr						; (BRINGUP)
-			li		r0,0							; (BRINGUP)
-			mtspr	dabr,r0							; (BRINGUP)
-			cmplwi	cr1,r7,31						; (BRINGUP) 
-			beq--	qqq0							; (BRINGUP)
-			mtmsr	r28								; (BRINGUP)
-qqq0:
-			isync									; (BRINGUP)
-			
-			lwz		r27,0(r27)						; (BRINGUP) - Get original value
-			
-			bne		cr1,qqq1						; (BRINGUP)
-			
-			rlwinm	r5,r5,31,22,31					; (BRINGUP)
-			cmplwi	cr1,r5,151						; (BRINGUP)			
-			beq		cr1,qqq3						; (BRINGUP)
-			cmplwi	cr1,r5,407						; (BRINGUP)			
-			beq		cr1,qqq2						; (BRINGUP)
-			cmplwi	cr1,r5,215						; (BRINGUP)			
-			beq		cr1,qqq0q						; (BRINGUP)
-			cmplwi	cr1,r5,1014						; (BRINGUP)
-			beq		cr1,qqqm1						; (BRINGUP)
-
-			lis		r0,hi16(Choke)					; (BRINGUP)
-			ori		r0,r0,lo16(Choke)				; (BRINGUP)
-			sc										; (BRINGUP)
-			
-qqqm1:		rlwinm	r7,r6,0,0,26					; (BRINGUP)
-			stw		r0,0(r7)						; (BRINGUP)
-			stw		r0,4(r7)						; (BRINGUP)
-			stw		r0,8(r7)						; (BRINGUP)
-			stw		r0,12(r7)						; (BRINGUP)
-			stw		r0,16(r7)						; (BRINGUP)
-			stw		r0,20(r7)						; (BRINGUP)
-			stw		r0,24(r7)						; (BRINGUP)
-			stw		r0,28(r7)						; (BRINGUP)
-			b		qqq9
-		
-qqq1:		cmplwi	r7,38							; (BRINGUP)
-			bgt		qqq2							; (BRINGUP)
-			blt		qqq3							; (BRINGUP)
-
-qqq0q:		stb		r3,0(r6)						; (BRINGUP)
-			b		qqq9							; (BRINGUP)
-			
-qqq2:		sth		r3,0(r6)						; (BRINGUP)
-			b		qqq9							; (BRINGUP)
-			
-qqq3:		stw		r3,0(r6)						; (BRINGUP)
-			
-qqq9:		
-#if 0
-			rlwinm	r7,r6,0,0,29					; (BRINGUP)
-			lwz		r0,0(r7)						; (BRINGUP) - Get newest value
-#else
-			lis		r7,hi16(0x000792B8)				; (BRINGUP)
-			ori		r7,r7,lo16(0x000792B8)			; (BRINGUP)
-			lwz		r0,0(r7)						; (BRINGUP) - Get newest value
-#endif
-			mtmsr	r9								; (BRINGUP)
-			mtspr	dabr,r10						; (BRINGUP)
-			isync									; (BRINGUP)
-
-#if 0
-			lwz		r28,patcharea+12(r2)			; (BRINGUP)
-			mr.		r28,r28							; (BRINGUP)
-			bne++	qqq12							; (BRINGUP)
-			lis		r28,0x4000						; (BRINGUP)
-
-qqq12:		stw		r27,0(r28)						; (BRINGUP)
-			lwz		r6,savedar+4(r13)				; (BRINGUP)
-			stw		r0,4(r28)						; (BRINGUP)
-			stw		r4,8(r28)						; (BRINGUP)
-			stw		r6,12(r28)						; (BRINGUP)
-			addi	r28,r28,16						; (BRINGUP)
-			mr.		r3,r3							; (BRINGUP)
-			stw		r28,patcharea+12(r2)			; (BRINGUP)
-			lwz		r10,patcharea+8(r2)				; (BRINGUP)
-			lwz		r0,patcharea+4(r2)				; (BRINGUP)
-#endif
-
-#if 1
-			stw		r0,patcharea(r2)				; (BRINGUP)
-#endif
-
-#if 0
-			xor		r28,r0,r27						; (BRINGUP) - See how much it changed
-			rlwinm	r28,r28,24,24,31				; (BRINGUP)
-			cmplwi	r28,1							; (BRINGUP)
-
-			ble++	qqq10							; (BRINGUP)
-
-			mr		r7,r0							; (BRINGUP)
-			li		r0,1							; (BRINGUP)
-			stw		r0,20(0)						; (BRINGUP)
-			lis		r0,hi16(Choke)					; (BRINGUP)
-			ori		r0,r0,lo16(Choke)				; (BRINGUP)
-			sc										; (BRINGUP)
-#endif
-
-
-qqq10:		addi	r4,r4,4							; (BRINGUP)
-			stw		r4,savesrr0+4(r13)				; (BRINGUP)
-				
-			li		r11,T_IN_VAIN					; (BRINGUP)
-			b		EatRupt							; (BRINGUP)
-			
-didnthit:											; (BRINGUP)
-#endif
-#if 0
-			lwz		r0,20(0)						; (BRINGUP)
-			mr.		r0,r0							; (BRINGUP)
-			beq++	opopop							; (BRINGUP)
-			li		r0,0							; (BRINGUP)
-			stw		r0,20(0)						; (BRINGUP)
-			lis		r0,hi16(Choke)					; (BRINGUP)
-			ori		r0,r0,lo16(Choke)				; (BRINGUP)
-			sc										; (BRINGUP)
-opopop:
-#endif
-			lwz		r0,savesrr1+4(r13)				; Get the MSR in use at exception time
-			cmplwi	cr1,r11,T_IN_VAIN				; Was it handled?
-			rlwinm.	r4,r0,0,MSR_PR_BIT,MSR_PR_BIT	; Are we trapping from supervisor state?
-			beq++	cr1,EatRupt						; Yeah, just blast back to the user... 
-			beq--	NoFamPf
-			mfsprg	r2,0							; Get back per_proc
-			lwz		r1,spcFlags(r2)					; Load spcFlags
-            rlwinm	r1,r1,1+FamVMmodebit,30,31		; Extract FamVMenabit and FamVMmodebit
-            cmpi	cr0,r1,2						; Check FamVMena set without FamVMmode
-			bne--	cr0,NoFamPf
-            lwz		r6,FAMintercept(r2)				; Load exceptions mask to intercept
-			li		r5,0							; Clear
-			srwi	r1,r11,2						; divide r11 by 4
-            oris	r5,r5,0x8000					; Set r5 to 0x80000000
-            srw		r1,r5,r1						; Set bit for current exception
-            and.	r1,r1,r6						; And current exception with the intercept mask
-            beq++	NoFamPf							; Is it FAM intercept
-			bl		EXT(vmm_fam_pf)
-			b		EatRupt
-
-NoFamPf:	andi.	r4,r0,lo16(MASK(MSR_RI))		; See if the recover bit is on
-			lis		r0,0x8000						; Get 0xFFFFFFFF80000000
-			add		r0,r0,r0						; Get 0xFFFFFFFF00000000
-			beq++	PassUpTrap						; Not on, normal case...
-;
-;			Here is where we handle the "recovery mode" stuff.
-;			This is set by an emulation routine to trap any faults when it is fetching data or
-;			instructions.  
-;
-;			If we get a fault, we turn off RI, set CR0_EQ to false, bump the PC, and set R0
-;			and R1 to the DAR and DSISR, respectively.
-;
-			lwz		r3,savesrr0(r13)				; Get the failing instruction address
-			lwz		r4,savesrr0+4(r13)				; Get the failing instruction address
-			lwz		r5,savecr(r13)					; Get the condition register
-			or		r4,r4,r0						; Fill the high part with foxes
-			lwz		r0,savedar(r13)					; Get the DAR
-			addic	r4,r4,4							; Skip failing instruction
-			lwz		r6,savedar+4(r13)				; Get the DAR
-			addze	r3,r3							; Propagate carry
-			rlwinm	r5,r5,0,3,1						; Clear CR0_EQ to let emulation code know we failed
-			lwz		r7,savedsisr(r13)				; Grab the DSISR
-			stw		r3,savesrr0(r13)				; Save resume address
-			stw		r4,savesrr0+4(r13)				; Save resume address
-			stw		r5,savecr(r13)					; And the resume CR
-			stw		r0,saver0(r13)					; Pass back the DAR
-			stw		r6,saver0+4(r13)				; Pass back the DAR
-			stw		r7,saver1+4(r13)				; Pass back the DSISR
-			b		EatRupt							; Resume emulated code
-
-;
-;			Here is where we handle the context switch firmware call.  The old 
-;			context has been saved. The new savearea is in kind of hokey, the high order
-;			half is stored in saver7 and the low half is in saver3. We will just
-;			muck around with the savearea pointers, and then join the exit routine 
-;
-
-			.align	5
-
-conswtch:	
-			li		r0,0xFFF						; Get page boundary
-			mr		r29,r13							; Save the save
-			andc	r30,r13,r0						; Round down to page boundary (64-bit safe)
-			lwz		r5,saver3+4(r13)				; Switch to the new savearea
-			bf--	pf64Bitb,xcswNo64				; Not 64-bit...
-			lwz		r6,saver7+4(r13)				; Get the high order half
-			sldi	r6,r6,32						; Position high half
-			or		r5,r5,r6						; Merge them
-
-xcswNo64:	lwz		r30,SACvrswap+4(r30)			; get real to virtual translation
-			mr		r13,r5							; Switch saveareas
-			li		r0,0							; Clear this
-			xor		r27,r29,r30						; Flip to virtual
-			stw		r0,saver3(r5)					; Push the new virtual savearea to the switch to routine
-			stw		r27,saver3+4(r5)				; Push the new virtual savearea to the switch to routine
-			b		EatRupt							; Start it up... 
-
-;
-;			Handle machine check here.
-;
-; ?
-;
-
-			.align	5
-
-MachineCheck:
-
-			bt++	pf64Bitb,mck64					; ?
-			
-			lwz		r27,savesrr1+4(r13)				; Pick up srr1
-
-;
-;			Check if the failure was in 
-;			ml_probe_read.  If so, this is expected, so modify the PC to
-;			ml_proble_read_mck and then eat the exception.
-;
-			lwz		r30,savesrr0+4(r13)				; Get the failing PC
-			lis		r28,hi16(EXT(ml_probe_read_mck))	; High order part
-			lis		r27,hi16(EXT(ml_probe_read))	; High order part
-			ori		r28,r28,lo16(EXT(ml_probe_read_mck))	; Get the low part
-			ori		r27,r27,lo16(EXT(ml_probe_read))	; Get the low part
-			cmplw	r30,r28							; Check highest possible
-			cmplw	cr1,r30,r27						; Check lowest
-			bge-	PassUpTrap						; Outside of range
-			blt-	cr1,PassUpTrap					; Outside of range
-;
-;			We need to fix up the BATs here because the probe
-;			routine messed them all up... As long as we are at it,
-;			fix up to return directly to caller of probe.
-;
-		
-			lis		r11,hi16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
-			ori		r11,r11,lo16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
-			
-			lwz		r30,0(r11)						; Pick up DBAT 0 high
-			lwz		r28,4(r11)						; Pick up DBAT 0 low
-			lwz		r27,8(r11)						; Pick up DBAT 1 high
-			lwz		r18,16(r11)						; Pick up DBAT 2 high
-			lwz		r11,24(r11)						; Pick up DBAT 3 high
-			
-			sync
-			mtdbatu	0,r30							; Restore DBAT 0 high
-			mtdbatl	0,r28							; Restore DBAT 0 low
-			mtdbatu	1,r27							; Restore DBAT 1 high
-			mtdbatu	2,r18							; Restore DBAT 2 high
-			mtdbatu	3,r11							; Restore DBAT 3 high 
-			sync
-
-			lwz		r28,savelr+4(r13)				; Get return point
-			lwz		r27,saver0+4(r13)				; Get the saved MSR
-			li		r30,0							; Get a failure RC
-			stw		r28,savesrr0+4(r13)				; Set the return point
-			stw		r27,savesrr1+4(r13)				; Set the continued MSR
-			stw		r30,saver3+4(r13)				; Set return code
-			b		EatRupt							; Yum, yum, eat it all up...
-
-;
-;			64-bit machine checks
-;
-
-mck64:		
-
-;
-;			NOTE: WE NEED TO RETHINK RECOVERABILITY A BIT - radar 3167190
-;
-
-			ld		r23,savesrr0(r13)				; Grab the SRR0 in case we need bad instruction
-			ld		r20,savesrr1(r13)				; Grab the SRR1 so we can decode the thing
-			lwz		r21,savedsisr(r13)				; We might need this in a bit
-			ld		r22,savedar(r13)				; We might need this in a bit
-
-			lis		r8,AsyMCKSrc					; Get the Async MCK Source register address
-			mfsprg	r19,2							; Get the feature flags
-			ori		r8,r8,0x8000					; Set to read data
-			rlwinm.	r0,r19,0,pfSCOMFixUpb,pfSCOMFixUpb	; Do we need to fix the SCOM data?
-			
-			sync
-
-			mtspr	scomc,r8						; Request the MCK source
-			mfspr	r24,scomd						; Get the source
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-
-			lis		r8,AsyMCKRSrc					; Get the Async MCK Source AND mask address
-			li		r9,0							; Get and AND mask of 0
-			
-			sync
-
-			mtspr	scomd,r9						; Set the AND mask to 0
-			mtspr	scomc,r8						; Write the AND mask and clear conditions
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-
-			lis		r8,cFIR							; Get the Core FIR register address
-			ori		r8,r8,0x8000					; Set to read data
-			
-			sync
-
-			mtspr	scomc,r8						; Request the Core FIR
-			mfspr	r25,scomd						; Get the source
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-			
-			lis		r8,cFIRrst						; Get the Core FIR AND mask address
-			
-			sync
-
-			mtspr	scomd,r9						; Set the AND mask to 0
-			mtspr	scomc,r8						; Write the AND mask and clear conditions
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-
-			lis		r8,l2FIR						; Get the L2 FIR register address
-			ori		r8,r8,0x8000					; Set to read data
-			
-			sync
-
-			mtspr	scomc,r8						; Request the L2 FIR
-			mfspr	r26,scomd						; Get the source
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-			
-			lis		r8,l2FIRrst						; Get the L2 FIR AND mask address
-			
-			sync
-
-			mtspr	scomd,r9						; Set the AND mask to 0
-			mtspr	scomc,r8						; Write the AND mask and clear conditions
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-
-			lis		r8,busFIR						; Get the Bus FIR register address
-			ori		r8,r8,0x8000					; Set to read data
-			
-			sync
-
-			mtspr	scomc,r8						; Request the Bus FIR
-			mfspr	r27,scomd						; Get the source
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-			
-			lis		r8,busFIRrst					; Get the Bus FIR AND mask address
-			
-			sync
-
-			mtspr	scomd,r9						; Set the AND mask to 0
-			mtspr	scomc,r8						; Write the AND mask and clear conditions
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-			
-;			Note: bug in early chips where scom reads are shifted right by 1. We fix that here.
-;			Also note that we will lose bit 63
-
-			beq++	mckNoFix						; No fix up is needed
-			sldi	r24,r24,1						; Shift left 1
-			sldi	r25,r25,1						; Shift left 1
-			sldi	r26,r26,1						; Shift left 1
-			sldi	r27,r27,1						; Shift left 1
-			
-mckNoFix:	std		r24,savexdat0(r13)				; Save the MCK source in case we pass the error
-			std		r25,savexdat1(r13)				; Save the Core FIR in case we pass the error
-			std		r26,savexdat2(r13)				; Save the L2 FIR in case we pass the error
-			std		r27,savexdat3(r13)				; Save the BUS FIR in case we pass the error
-
-			rlwinm.	r0,r20,0,mckIFUE-32,mckIFUE-32	; Is this some kind of uncorrectable?
-			bne		mckUE							; Yeah...
-			
-			rlwinm.	r0,r20,0,mckLDST-32,mckLDST-32	; Some kind of load/store error?
-			bne		mckHandleLDST					; Yes...
-			
-			rldicl.	r0,r20,46,62					; Get the error cause code
-			beq		mckNotSure						; We need some more checks for this one...
-			
-			cmplwi	r0,2							; Check for TLB parity error
-			blt		mckSLBparity					; This is an SLB parity error...
-			bgt		mckhIFUE						; This is an IFetch tablewalk reload UE...
-			
-;			IFetch TLB parity error
-
-			isync
-			tlbiel	r23								; Locally invalidate TLB entry for iaddr
-			sync									; Wait for it
-			b		ceMck							; All recovered...
-			
-;			SLB parity error.  This could be software caused.  We get one if there is
-;			more than 1 valid SLBE with a matching ESID. That one we do not want to
-;			try to recover from.  Search for it and if we get it, panic. 
-
-mckSLBparity:
-			crclr	cr0_eq							; Make sure we are not equal so we take correct exit
-
-			la		r3,emvr0(r2)					; Use this to keep track of valid ESIDs we find
-			li		r5,0							; Start with index 0
-
-mckSLBck:	la		r4,emvr0(r2)					; Use this to keep track of valid ESIDs we find
-			slbmfee	r6,r5							; Get the next SLBE
-			andis.	r0,r6,0x0800					; See if valid bit is on
-			beq		mckSLBnx						; Skip invalid and go to next
-			
-mckSLBck2:	cmpld	r4,r3							; Have we reached the end of the table?
-			beq		mckSLBne						; Yes, go enter this one...
-			ld		r7,0(r4)						; Pick up the saved ESID
-			cmpld	r6,r7							; Is this a match?
-			beq		mckSLBrec						; Whoops, I did bad, recover and pass up...
-			addi	r4,r4,8							; Next table entry
-			b		mckSLBck2						; Check the next...
-
-mckSLBnx:	addi	r5,r5,1							; Point to next SLBE
-			cmplwi	r5,64							; Have we checked all of them?
-			bne++	mckSLBck						; Not yet, check again...
-			b		mckSLBrec						; We looked at them all, go recover...
-			
-mckSLBne:	std		r6,0(r3)						; Save this ESID
-			addi	r3,r3,8							; Point to the new slot
-			b		mckSLBnx						; Go do the next SLBE...
-			
-;			Recover an SLB error
-			
-mckSLBrec:	li		r0,0							; Set an SLB slot index of 0
-			slbia									; Trash all SLB entries (except for entry 0 that is)
-			slbmfee	r7,r0							; Get the entry that is in SLB index 0
-			rldicr	r7,r7,0,35						; Clear the valid bit and the rest
-			slbie	r7								; Invalidate it
-			
-			li		r3,0							; Set the first SLBE
-			
-mckSLBclr:	slbmte	r0,r3							; Clear the whole entry to 0s
-			addi	r3,r3,1							; Bump index
-			cmplwi	cr1,r3,64						; Have we done them all?
-			bne++	cr1,mckSLBclr					; Yup....
-			
-			sth		r3,ppInvSeg(r2)					; Store non-zero to trigger SLB reload 
-			bne++	ceMck							; This was not a programming error, all recovered...
-			b		ueMck							; Pass the software error up...
-
-;
-;			Handle a load/store unit error.  We need to decode the DSISR
-;
-
-mckHandleLDST:
-			rlwinm.	r0,r21,0,mckL1DCPE,mckL1DCPE	; An L1 data cache parity error?
-			bne++	mckL1D							; Yeah, we dealt with this back in the vector...
-		
-			rlwinm.	r0,r21,0,mckL1DTPE,mckL1DTPE	; An L1 tag error?
-			bne++	mckL1T							; Yeah, we dealt with this back in the vector...
-		
-			rlwinm.	r0,r21,0,mckUEdfr,mckUEdfr		; Is the a "deferred" UE?
-			bne		mckDUE							; Yeah, go see if expected...
-		
-			rlwinm.	r0,r21,0,mckUETwDfr,mckUETwDfr	; Is the a "deferred" tablewalk UE?
-			bne		mckDTW							; Yeah, no recovery...
-			
-			rlwinm.	r0,r21,0,mckSLBPE,mckSLBPE		; SLB parity error?
-			bne		mckSLBparity					; Yeah, go attempt recovery....
-			
-;			This is a recoverable D-ERAT or TLB error
-
-			la		r9,hwMckERCPE(r2)				; Get DERAT parity error count
-
-mckInvDAR:	isync
-			tlbiel	r22								; Locally invalidate the TLB entry
-			sync
-			
-			lwz		r21,0(r9)						; Get count
-			addi	r21,r21,1						; Count this one
-			stw		r21,0(r9)						; Stick it back
-			
-			b		ceMck							; All recovered...
-		
-;
-;			When we come here, we are not quite sure what the error is.  We need to
-;			dig a bit further.
-;
-;			R24 is interrupt source
-;			R25 is Core FIR
-;
-;			Note that both have been cleared already.
-;
-
-mckNotSure:
-			rldicl.	r0,r24,AsyMCKfir+1,63			; Something in the FIR?
-			bne--	mckFIR							; Yup, go check some more...
-			
-			rldicl.	r0,r24,AsyMCKhri+1,63			; Hang recovery?
-			bne--	mckHangRcvr						; Yup...
-			
-			rldicl.	r0,r24,AsyMCKext+1,63			; External signal?
-			bne--	mckExtMck						; Yup...
-
-;
-;			We really do not know what this one is or what to do with it...
-;
-			
-mckUnk:		lwz		r21,hwMckUnk(r2)				; Get unknown error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckUnk(r2)				; Stuff it
-			b		ueMck							; Go south, young man...
-
-;
-;			Hang recovery.  This is just a notification so we only count.
-;
-			
-mckHangRcrvr:
-			lwz		r21,hwMckHang(r2)				; Get hang recovery count
-			addi	r21,r21,1						; Count this one
-			stw		r21,hwMckHang(r2)				; Stick it back
-			b		ceMck							; All recovered...
-
-;
-;			Externally signaled MCK.  No recovery for the moment, but we this may be
-;			where we handle ml_probe_read problems eventually.
-;			
-mckExtMck:
-			lwz		r21,hwMckHang(r2)				; Get hang recovery count
-			addi	r21,r21,1						; Count this one
-			stw		r21,hwMckHang(r2)				; Stick it back
-			b		ceMck							; All recovered...
-
-;
-;			Machine check cause is in a FIR.  Suss it out here.
-;			Core FIR is in R25 and has been cleared in HW.
-;			
-
-mckFIR:		rldicl.	r0,r25,cFIRICachePE+1,63		; I-Cache parity error?
-			la		r19,hwMckICachePE(r2)			; Point to counter
-			bne		mckInvICache					; Go invalidate I-Cache...
-
-			rldicl.	r0,r25,cFIRITagPE0+1,63			; I-Cache tag parity error?
-			la		r19,hwMckITagPE(r2)				; Point to counter
-			bne		mckInvICache					; Go invalidate I-Cache...
-
-			rldicl.	r0,r25,cFIRITagPE1+1,63			; I-Cache tag parity error?
-			la		r19,hwMckITagPE(r2)				; Point to counter
-			bne		mckInvICache					; Go invalidate I-Cache...
-
-			rldicl.	r0,r25,cFIRIEratPE+1,63			; IERAT parity error?
-			la		r19,hwMckIEratPE(r2)			; Point to counter
-			bne		mckInvERAT						; Go invalidate ERATs...
-
-			rldicl.	r0,r25,cFIRIFUL2UE+1,63			; IFetch got L2 UE?
-			bne		mckhIFUE						; Go count and pass up...
-
-			rldicl.	r0,r25,cFIRDCachePE+1,63		; D-Cache PE?
-			bne		mckL1D							; Handled, just go count...
-
-			rldicl.	r0,r25,cFIRDTagPE+1,63			; D-Cache tag PE?
-			bne		mckL1T							; Handled, just go count...
-
-			rldicl.	r0,r25,cFIRDEratPE+1,63			; DERAT PE?
-			la		r19,hwMckDEratPE(r2)			; Point to counter
-			bne		mckInvERAT						; Go invalidate ERATs...
-
-			rldicl.	r0,r25,cFIRTLBPE+1,63			; TLB PE?
-			la		r9,hwMckTLBPE(r2)				; Get TLB parity error count
-			bne		mckInvDAR						; Go recover...
-
-			rldicl.	r0,r25,cFIRSLBPE+1,63			; SLB PE?
-			bne		mckSLBparity					; Cope with it...
-			
-			b		mckUnk							; Have not a clue...
-
-;
-;			General recovery for I-Cache errors.  Just flush it completely.
-;
-
-			.align	7								; Force into cache line
-
-mckInvICache:
-			lis		r0,0x0080						; Get a 0x0080 (bit 9 >> 32)
-			mfspr	r21,hid1						; Get the current HID1
-			sldi	r0,r0,32						; Get the "forced ICBI match" bit
-			or		r0,r0,r21						; Set forced match
-			
-			isync
-			mtspr	hid1,r0							; Stick it
-			mtspr	hid1,r0							; Stick it again
-			isync
-		
-			li		r6,0							; Start at 0
-			
-mckIcbi:	icbi	0,r6							; Kill I$
-			addi	r6,r6,128						; Next line
-			andis.	r5,r6,1							; Have we done them all?
-			beq++	mckIcbi							; Not yet...
-
-			isync
-			mtspr	hid1,r21						; Restore original HID1
-			mtspr	hid1,r21						; Stick it again
-			isync
-			
-			lwz		r5,0(r19)						; Get the counter
-			addi	r5,r5,1							; Count it
-			stw		r5,0(r19)						; Stuff it back
-			b		ceMck							; All recovered...
-			
-		
-;			General recovery for ERAT problems - handled in exception vector already
-
-mckInvERAT:	lwz		r21,0(r19)						; Get the exception count spot
-			addi	r21,r21,1						; Count this one
-			stw		r21,0(r19)						; Save count
-			b		ceMck							; All recovered...
-			
-;			General hang recovery - this is a notification only, just count.	
-			
-mckHangRcvr:			
-			lwz		r21,hwMckHang(r2)				; Get hang recovery count
-			addi	r21,r21,1						; Count this one
-			stw		r21,hwMckHang(r2)				; Stick it back
-			b		ceMck							; All recovered...
-
-
-;
-;			These are the uncorrectable errors, just count them then pass it along.
-;
-	
-mckUE:		lwz		r21,hwMckUE(r2)					; Get general uncorrectable error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckUE(r2)					; Stuff it
-			b		ueMck							; Go south, young man...
-	
-mckhIFUE:	lwz		r21,hwMckIUEr(r2)				; Get I-Fetch TLB reload uncorrectable error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckIUEr(r2)				; Stuff it
-			b		ueMck							; Go south, young man...
-
-mckDUE:		lwz		r21,hwMckDUE(r2)				; Get deferred uncorrectable error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckDUE(r2)				; Stuff it
-			
-;
-;			Right here is where we end up after a failure on a ml_probe_read_64.
-;			We will check if that is the case, and if so, fix everything up and
-;			return from it.
-			
-			lis		r8,hi16(EXT(ml_probe_read_64))	; High of start
-			lis		r9,hi16(EXT(ml_probe_read_mck_64))	; High of end
-			ori		r8,r8,lo16(EXT(ml_probe_read_64))	; Low of start
-			ori		r9,r9,lo16(EXT(ml_probe_read_mck_64))	; Low of end
-			cmpld	r23,r8							; Too soon?
-			cmpld	cr1,r23,r9						; Too late?
-			
-			cror	cr0_lt,cr0_lt,cr1_gt			; Too soon or too late?
-			ld		r3,saver12(r13)					; Get the original MSR
-			ld		r5,savelr(r13)					; Get the return address
-			li		r4,0							; Get fail code
-			blt--	ueMck							; This is a normal machine check, just pass up...
-			std		r5,savesrr0(r13)				; Set the return MSR
-			
-			std		r3,savesrr1(r13)				; Set the return address
-			std		r4,saver3(r13)					; Set failure return code
-			b		ceMck							; All recovered...
-
-mckDTW:		lwz		r21,hwMckDTW(r2)				; Get deferred tablewalk uncorrectable error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckDTW(r2)				; Stuff it
-			b		ueMck							; Go south, young man...
-
-mckL1D:		lwz		r21,hwMckL1DPE(r2)				; Get data cache parity error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckL1DPE(r2)				; Stuff it
-			b		ceMck							; All recovered...
-
-mckL1T:		lwz		r21,hwMckL1TPE(r2)				; Get TLB parity error count
-			addi	r21,r21,1						; Count it
-			stw		r21,hwMckL1TPE(r2)				; Stuff it
-
-ceMck:		lwz		r21,mckFlags(0)					; Get the flags
-			li		r0,1							; Set the recovered flag before passing up
-			rlwinm.	r21,r21,0,31,31					; Check if we want to log recoverables
-			stw		r0,savemisc3(r13)				; Set it
-			beq++	EatRupt							; No log of recoverables wanted...
-			b		PassUpTrap						; Go up and log error...
-
-ueMck:		li		r0,0							; Set the unrecovered flag before passing up
-			stw		r0,savemisc3(r13)				; Set it
-			b		PassUpTrap						; Go up and log error and probably panic
-			
-;
-;			We come here to handle program exceptions
-;
-;			When the program check is a trap instruction and it happens when
-;			we are executing injected code, we need to check if it is an exit trap.
-;			If it is, we need to populate the current savearea with some of the context from 
-;			the saved pre-inject savearea.  This is needed because the current savearea will be
-;			tossed as part of the pass up code.  Additionally, because we will not be nullifying
-;			the emulated instruction as we do with any other exception.
-;
-			
-			.align	5
-
-ProgramChk:	lwz		r5,savesrr1+4(r13)				; Get the interrupt SRR1
-			lwz		r3,ijsave(r2)					; Get the inject savearea top
-			lwz		r4,ijsave+4(r2)					; And get the bottom of the inject savearea pointer
-			rlwimi	r5,r5,15,31,31					; Scoot trap flag down to a spare bit
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits of to top 32
-			li		r0,0x0023						; Get bits that match scooted trap flag, IR, and RI
-			and		r0,r5,r0						; Clear any extra SRR1 bits
-			rlwimi.	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits and see if ijsave is 0		
-			cmplwi	cr1,r0,1						; Make sure we were IR off, RI off, and got a trap exception
-			crandc	cr0_eq,cr1_eq,cr0_eq			; If we are injecting, ijsave will be non-zero and we had the trap bit set
-			mfsrr0	r4								; Get the PC
-			bne++	cr0,mustem						; This is not an injection exit...
-
-			lwz		r4,0(r4)						; Get the trap instruction
-			lis		r5,hi16(ijtrap)					; Get high half of inject exit trap
-			ori		r5,r5,lo16(ijtrap)				; And the low half
-			cmplw	r4,r5							; Correct trap instruction?
-			bne		mustem							; No, not inject exit...
-
-			lwz		r4,savesrr0(r3)					; Get the original SRR0
-			lwz		r5,savesrr0+4(r3)				; And the rest of it
-			lwz		r6,savesrr1(r3)					; Get the original SRR1
-			stw		r4,savesrr0(r13)				; Set the new SRR0 to the original
-			lwz		r4,savesrr1+4(r13)				; Get the bottom of the new SRR1
-			lwz		r7,savesrr1+4(r3)				; Get the bottom of the original SRR1
-			li		r11,T_INJECT_EXIT				; Set an inject exit exception
-			stw		r5,savesrr0+4(r13)				; Set the new bottom of SRR0 to the original
-			rlwimi	r7,r4,0,MSR_FP_BIT,MSR_FP_BIT	; Make sure we retain the current floating point enable bit
-			stw		r6,savesrr1(r13)				; Save the top half of the original SRR1
-			sth		r7,savesrr1+6(r13)				; And the last bottom
-			stw		r11,saveexception(r13)			; Set the new the exception code
-			b		PassUpTrap						; Go pass it on up...
-
-mustem:		b		EXT(Emulate)					; Go try to emulate this one...
-
-
-/*
- *			Here's where we come back from some instruction emulator.  If we come back with
- *			T_IN_VAIN, the emulation is done and we should just reload state and directly
- *			go back to the interrupted code. Otherwise, we'll check to see if
- *			we need to redrive with a different interrupt, i.e., DSI.
- *			Note that this we are actually not redriving the rupt, rather changing it
- *			into a different one.  Thus we clear the redrive bit.
- */
- 
-			.align	5
-			.globl	EXT(EmulExit)
-
-LEXT(EmulExit)
-
-			cmplwi	cr1,r11,T_IN_VAIN				; Was it emulated?
-			lis		r1,hi16(SAVredrive)				; Get redrive request
-			beq++	cr1,EatRupt						; Yeah, just blast back to the user...
-			lwz		r4,SAVflags(r13)				; Pick up the flags
-
-			and.	r0,r4,r1						; Check if redrive requested
-
-			beq++	PassUpTrap						; No redrive, just keep on going...
-
-			b		Redrive							; Redrive the exception...
-		
-;
-; 			Jump into main handler code switching on VM at the same time.
-;
-; 			We assume kernel data is mapped contiguously in physical
-; 			memory, otherwise we would need to switch on (at least) virtual data.
-;			SRs are already set up.
-;
-	
-			.align	5
-	
-PassUpTrap:	lis		r20,hi16(EXT(thandler))			; Get thandler address
-			ori		r20,r20,lo16(EXT(thandler))		; Get thandler address
-			b		PassUp							; Go pass it up...
-	
-PassUpRupt:	lis		r20,hi16(EXT(ihandler))			; Get ihandler address
-			ori		r20,r20,lo16(EXT(ihandler))		; Get ihandler address
-			b		PassUp							; Go pass it up...
-	
-			.align	5
-	
-PassUpFPU:	lis		r20,hi16(EXT(fpu_switch))		; Get FPU switcher address
-			ori		r20,r20,lo16(EXT(fpu_switch))	; Get FPU switcher address
-			b		PassUp							; Go pass it up...
-
-			.align	5
-
-PassUpVMX:	lis		r20,hi16(EXT(vec_switch))		; Get VMX switcher address
-			ori		r20,r20,lo16(EXT(vec_switch))	; Get VMX switcher address
-			bt++	featAltivec,PassUp				; We have VMX on this CPU...
-			li		r11,T_PROGRAM					; Say that it is a program exception
-			li		r20,8							; Set invalid instruction
-			stw		r11,saveexception(r13)			; Set the new the exception code
-			sth		r20,savesrr1+4(r13)				; Set the invalid instruction SRR code
-			
-			b		PassUpTrap						; Go pass it up...
-	
-			.align	5
-	
-PassUpAbend:	
-			lis		r20,hi16(EXT(chandler))			; Get choke handler address
-			ori		r20,r20,lo16(EXT(chandler))		; Get choke handler address
-			b		PassUp							; Go pass it up...
-
-			.align	5
-
-PassUp:		
-			mfsprg	r29,0							; Get the per_proc block back
-			
-			cmplwi	cr1,r11,T_INJECT_EXIT			; Are we exiting from an injection?
-			lwz		r3,ijsave(r29)					; Get the inject savearea top
-			lwz		r4,ijsave+4(r29)				; And get the bottom of the inject savearea pointer
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi.	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits and see if ijsave is 0		
-			beq++	notaninjct						; Skip tossing savearea if no injection...
-
-			beq--	cr1,nonullify					; Have not finished the instruction, go nullify it...
-			
-			lwz		r4,savesrr1+4(r3)				; Get the interrupt modifiers from the original SRR1
-			lwz		r5,savesrr1+4(r13)				; Get the interrupt modifiers from the new SRR1
-			lwz		r6,savedar(r13)					; Get the top of the DAR
-			rlwimi	r4,r5,0,0,15					; copy the new top to the original SRR1
-			lwz		r7,savedar+4(r13)				; Get the bottom of the DAR
-			rlwimi	r4,r5,0,MSR_FP_BIT,MSR_FP_BIT	; Copy the new FP enable bit into the old SRR1
-			stw		r4,savesrr1+4(r3)				; Save the updated SRR1
-			lwz		r5,savedsisr(r13)				; Grab the new DSISR
-			
-			mr		r4,r13							; Save the new savearea pointer
-			mr		r13,r3							; Point to the old savearea we are keeping
-			stw		r6,savedar(r13)					; Save top of new DAR
-			stw		r7,savedar+4(r13)				; Save bottom of new DAR
-			stw		r5,savedsisr(r13)				; Set the new DSISR
-			stw		r11,saveexception(r13)			; Set the new exception code
-			mr		r3,r4							; Point to the new savearea in order to toss it
-			
-nonullify:	li		r0,0							; Get a zero
-			stw		r0,ijsave(r29)					; Clear the pointer to the saved savearea
-			stw		r0,ijsave+4(r29)				; Clear the pointer to the saved savearea
-			
-			bl		EXT(save_ret_phys)				; Dump that pesky extra savearea			
-			
-notaninjct:	lwz		r10,SAVflags(r13)				; Pick up the flags
-
-			li		r0,0xFFF						; Get a page mask
-			li		r2,MASK(MSR_BE)|MASK(MSR_SE)	; Get the mask to save trace bits
-			andc	r5,r13,r0						; Back off to the start of savearea block
-			mfmsr	r3								; Get our MSR
-			rlwinm	r10,r10,0,SAVredriveb+1,SAVredriveb-1	; Clear the redrive before we pass it up
-			li		r21,MSR_SUPERVISOR_INT_OFF		; Get our normal MSR value
-			and		r3,r3,r2						; Clear all but trace
-			lwz		r5,SACvrswap+4(r5)				; Get real to virtual conversion			
-			or		r21,r21,r3						; Keep the trace bits if they are on
-			stw		r10,SAVflags(r13)				; Set the flags with the cleared redrive flag
-
-			xor		r4,r13,r5						; Pass up the virtual address of context savearea
-			rlwinm	r4,r4,0,0,31					; Clean top half of virtual savearea if 64-bit
-
-			mr		r3,r21							; Pass in the MSR we will go to
-			bl		EXT(switchSegs)					; Go handle the segment registers/STB
-
-			lwz		r3,saveexception(r13)			; Recall the exception code
-			
-			mtsrr0	r20								; Set up the handler address
-			mtsrr1	r21								; Set up our normal MSR value
-
-			bt++	pf64Bitb,puLaunch				; Handle 64-bit machine...
-
-			rfi										; Launch the exception handler
-			
-puLaunch:	rfid									; Launch the exception handler
-
-/*
- *			This routine is the main place where we return from an interruption.
- *
- *			This is also where we release the quickfret list.  These are saveareas
- *			that were released as part of the exception exit path in hw_exceptions.
- *			In order to save an atomic operation (which actually will not work
- *			properly on a 64-bit machine) we use holdQFret to indicate that the list
- *			is in flux and should not be looked at here.  This comes into play only
- *			when we take a PTE miss when we are queuing a savearea onto qfret.
- *			Quite rare but could happen.  If the flag is set, this code does not
- *			release the list and waits until next time.
- *
- *			All we need to remember here is that R13 must point to the savearea
- *			that has the context we need to load up. Translation and interruptions
- *			must be disabled.
- *
- *			This code always loads the context in the savearea pointed to
- *			by R13.  In the process, it throws away the savearea.  If there 
- *			is any tomfoolery with savearea stacks, it must be taken care of 
- *			before we get here.
- *
- */
- 
- 			.align	5
- 
-EatRupt:	mfsprg	r29,0							; Get the per_proc block back
-			mr		r31,r13							; Move the savearea pointer to the far end of the register set
-			mfsprg	r27,2							; Get the processor features
-			
-			lwz		r3,holdQFret(r29)				; Get the release hold off flag
-
-			bt++	pf64Bitb,eat64a					; Skip down to the 64-bit version of this
-
-;
-;			This starts the 32-bit version
-;
-
-			mr.		r3,r3							; Should we hold off the quick release?
-			lwz		r30,quickfret+4(r29)			; Pick up the quick fret list, if any
-			la		r21,saver0(r31)					; Point to the first thing we restore
-			bne-	ernoqfret						; Hold off set, do not release just now...
-			
-erchkfret:	mr.		r3,r30							; Any savearea to quickly release?
-			beq+	ernoqfret						; No quickfrets...
-			lwz		r30,SAVprev+4(r30)				; Chain back now
-			
-			bl		EXT(save_ret_phys)				; Put it on the free list			
-			stw		r30,quickfret+4(r29)			; Dequeue previous guy (really, it is ok to wait until after the release)
-			b		erchkfret						; Try the next one...
-
-			.align	5
-			
-ernoqfret:	
-			lwz		r30,SAVflags(r31)				; Pick up the flags
-			lis		r0,hi16(SAVinject)				; Get inject flag
-			dcbt	0,r21							; Touch in the first thing we need
-			
-;
-;			Here we release the savearea.
-;
-;			Important!!!!  The savearea is released before we are done with it. When the
-;			local free savearea list (anchored at lclfree) gets too long, save_ret_phys
-;			will trim the list, making the extra saveareas allocatable by another processor
-;			The code in there must ALWAYS leave our savearea on the local list, otherwise
-;			we could be very, very unhappy.  The code there always queues the "just released"
-;			savearea to the head of the local list.  Then, if it needs to trim, it will
-;			start with the SECOND savearea, leaving ours intact.
-;
-;			If we are going to inject code here, we must not toss the savearea because
-;			we will continue to use it.  The code stream to inject is in it and we 
-;			use it to hold the pre-inject context so that we can merge that with the
-;			post-inject context.  The field ijsave in the per-proc is used to point to the savearea.
-;
-;			Note that we will NEVER pass an interrupt up without first dealing with this savearea.
-;			
-;			All permanent interruptions (i.e., not denorm, alignment, or handled page and segment faults)
-;			will nullify any injected code and pass the interrupt up in the original savearea.  A normal
-;			inject completion will merge the original context into the new savearea and pass that up.
-;			
-;			Note that the following code which sets up the injection will only be executed when
-;			SAVinject is set.  That means that if will not run if we are returning from an alignment
-;			or denorm exception, or from a handled page or segment fault.
-;
-
-			andc	r0,r30,r0						; Clear the inject flag
-			cmplw	cr4,r0,r30						; Remember if we need to inject
-			mr		r3,r31							; Get the exiting savearea in parm register
-			beq+	cr4,noinject					; No, we are not going to inject instructions...	
-			
-			stw		r0,SAVflags(r31)				; Yes we are, clear the request...
-			
-			lhz		r26,PP_CPU_NUMBER(r29)			; Get the cpu number
-			lwz		r25,saveinstr(r31)				; Get the instruction count
-			la		r3,saveinstr+4(r31)				; Point to the instruction stream
-			slwi	r26,r26,6						; Get offset to the inject code stream for this processor
-			li		r5,0							; Get the current instruction offset
-			ori		r26,r26,lo16(EXT(ijcode))		; Get the base of the inject buffer for this processor (always < 64K)
-			slwi	r25,r25,2						; Multiply by 4
-			
-injctit:	lwzx	r6,r5,r3						; Pick up the instruction
-			stwx	r6,r5,r26						; Inject into code buffer
-			addi	r5,r5,4							; Bump offset
-			cmplw	r5,r25							; Have we hit the end?
-			blt-	injctit							; Continue until we have copied all...
-			
-			lis		r3,0x0FFF						; Build our magic trap
-			ori		r3,r3,0xC9C9					; Build our magic trap
-			stw		r31,ijsave+4(r29)				; Save the original savearea for injection
-			stwx	r3,r5,r26						; Save the magic trap
-
-			li		r3,32							; Get cache line size
-			dcbf	0,r26							; Flush first line
-			dcbf	r3,r26							; And the second
-			sync									; Hang on until it's done
-			
-			icbi	0,r26							; Flush instructions in the first line
-			icbi	r3,r26							; And the second
-			isync									; Throw anything stale away
-			sync									; Hang on until it's done
-			b		injected						; Skip the savearea release...
-			
-noinject:	bl		EXT(save_ret_phys)				; Put old savearea on the free list			
-
-injected:	lwz		r3,savesrr1+4(r31)				; Pass in the MSR we are going to
-			bl		EXT(switchSegs)					; Go handle the segment registers/STB
-
-			li		r3,savesrr1+4					; Get offset to the srr1 value
-			lwarx	r8,r3,r31						; Get destination MSR and take reservation along the way (just so we can blow it away)
-			cmplw	cr3,r14,r14						; Set that we do not need to stop streams
-
-			li		r21,emfp0						; Point to the fp savearea
-			stwcx.	r8,r3,r31						; Blow away any reservations we hold
-
-			lwz		r25,savesrr0+4(r31)				; Get the SRR0 to use
-			
-			la		r28,saver4(r31)					; Point to the 32-byte line with r4-r7
-			dcbz	r21,r29							; Clear a work area
-			lwz		r0,saver0+4(r31)				; Restore R0			
-			dcbt	0,r28							; Touch in r4-r7 
-			lwz		r1,saver1+4(r31)				; Restore R1	
-			
-			beq+	cr4,noinject2					; No code injection here...
-			
-;
-;			If we are injecting, we need to stay in supervisor state with instruction
-;			address translation off.  We also need to have as few potential interruptions as
-;			possible.  Therefore, we turn off external interruptions and tracing (which doesn't
-;			make much sense anyway).
-;
-			ori		r8,r8,lo16(ijemoff)				; Force the need-to-be-off bits on
-			mr		r25,r26							; Get the injected code address
-			xori	r8,r8,lo16(ijemoff)				; Turn off all of the need-to-be-off bits
-			
-noinject2:	lwz		r2,saver2+4(r31)				; Restore R2	
-			la		r28,saver8(r31)					; Point to the 32-byte line with r8-r11
-			lwz		r3,saver3+4(r31)				; Restore R3
-            andis.	r6,r27,hi16(pfAltivec)			; Do we have altivec on the machine?
-            dcbt	0,r28							; touch in r8-r11
-			lwz		r4,saver4+4(r31)				; Restore R4
-            la		r28,saver12(r31)				; Point to the 32-byte line with r12-r15
-			mtsrr0	r25								; Restore the SRR0 now
-			lwz		r5,saver5+4(r31)				; Restore R5
-			mtsrr1	r8								; Restore the SRR1 now 
-			lwz		r6,saver6+4(r31)				; Restore R6			
-			
-			dcbt	0,r28							; touch in r12-r15
-			la		r28,saver16(r31)
-			
-			lwz		r7,saver7+4(r31)				; Restore R7
-			lwz		r8,saver8+4(r31)				; Restore R8	
-			lwz		r9,saver9+4(r31)				; Restore R9
-            
-            dcbt	0,r28							; touch in r16-r19
-            la		r28,saver20(r31)			
-            		
-			lwz		r10,saver10+4(r31)				; Restore R10
-			lwz		r11,saver11+4(r31)				; Restore R11			
-			
-			dcbt	0,r28							; touch in r20-r23
-			la		r28,savevscr(r31)				; Point to the status area
-			
-			lwz		r12,saver12+4(r31)				; Restore R12
-			lwz		r13,saver13+4(r31)				; Restore R13			
-
-            la		r14,savectr+4(r31)
-			dcbt	0,r28							; Touch in VSCR and FPSCR
-            dcbt	0,r14							; touch in CTR, DAR, DSISR, VRSAVE, and Exception code
-
-			lwz		r26,next_savearea+4(r29)		; Get the exception save area
-			la		r28,saver24(r31)
-
-			lwz		r14,saver14+4(r31)				; Restore R14	
-			lwz		r15,saver15+4(r31)				; Restore R15			
-
-
-			stfd	f0,emfp0(r29)					; Save FP0
-			lwz		r27,savevrsave(r31)				; Get the vrsave
-            dcbt	0,r28							; touch in r24-r27
-			la		r28,savevscr(r31)				; Point to the status area
-			lfd		f0,savefpscrpad(r31)			; Get the fpscr
-            la		r22,saver28(r31)
-			mtfsf	0xFF,f0							; Restore fpscr		
-			lfd		f0,emfp0(r29)					; Restore the used register
-
-			beq		noavec3							; No Altivec on this CPU...
-			
-			stvxl	v0,r21,r29						; Save a vector register
-			lvxl	v0,0,r28						; Get the vector status
-			mtspr	vrsave,r27						; Set the vrsave
-			mtvscr	v0								; Set the vector status
-			lvxl	v0,r21,r29						; Restore work vector register
-
-noavec3:	dcbt	0,r22							; touch in r28-r31
-           	
- 			lwz		r23,spcFlags(r29)				; Get the special flags from per_proc
-            la		r17,savesrr0(r31)
-			la		r26,saver0(r26)					; Point to the first part of the next savearea
-            dcbt	0,r17							; touch in SRR0, SRR1, CR, XER, LR 
-			lhz		r28,pfrptdProc(r29)				; Get the reported processor type
-
-			lwz		r16,saver16+4(r31)				; Restore R16
-			lwz		r17,saver17+4(r31)				; Restore R17
-			lwz		r18,saver18+4(r31)				; Restore R18	
-			lwz		r19,saver19+4(r31)				; Restore R19	
-			lwz		r20,saver20+4(r31)				; Restore R20
-			lwz		r21,saver21+4(r31)				; Restore R21
-			lwz		r22,saver22+4(r31)				; Restore R22
-
-			cmpwi	cr1,r28,CPU_SUBTYPE_POWERPC_750	; G3?
-
-			dcbz	0,r26							; Clear and allocate next savearea we use in the off chance it is still in when we next interrupt
-
-			andis.	r23,r23,hi16(perfMonitor)		; Is the performance monitor enabled?
-			lwz		r23,saver23+4(r31)				; Restore R23
-			cmpwi	cr2,r28,CPU_SUBTYPE_POWERPC_7400	; Yer standard G4?
-			lwz		r24,saver24+4(r31)				; Restore R24			
-			lwz		r25,saver25+4(r31)				; Restore R25			
-			lwz		r26,saver26+4(r31)				; Restore R26		
-			lwz		r27,saver27+4(r31)				; Restore R27			
-
-			beq+	noPerfMonRestore32				; No perf monitor... 
-
-			beq-	cr1,perfMonRestore32_750		; This is a G3...
-			beq-	cr2,perfMonRestore32_7400		; Standard G4...
-		
-			lwz		r28,savepmc+16(r31)
-			lwz		r29,savepmc+20(r31)
-			mtspr	pmc5,r28						; Restore PMC5
-			mtspr	pmc6,r29						; Restore PMC6
-
-perfMonRestore32_7400:
-			lwz		r28,savemmcr2+4(r31)
-			mtspr	mmcr2,r28						; Restore MMCR2
-
-perfMonRestore32_750:
-			lwz		r28,savepmc+0(r31)
-			lwz		r29,savepmc+4(r31)
-			mtspr	pmc1,r28						; Restore PMC1 
-			mtspr	pmc2,r29						; Restore PMC2 
-			lwz		r28,savepmc+8(r31)
-			lwz		r29,savepmc+12(r31)
-			mtspr	pmc3,r28						; Restore PMC3
-			mtspr	pmc4,r29						; Restore PMC4
-			lwz		r28,savemmcr1+4(r31)
-			lwz		r29,savemmcr0+4(r31)
-			mtspr	mmcr1,r28						; Restore MMCR1
-			mtspr	mmcr0,r29						; Restore MMCR0
-
-noPerfMonRestore32:		
-			lwz		r28,savecr(r31)					; Get CR to restore
-			lwz		r29,savexer+4(r31)				; Get XER to restore
-			mtcr	r28								; Restore the CR
-			lwz		r28,savelr+4(r31)				; Get LR to restore
-			mtxer	r29								; Restore the XER
-			lwz		r29,savectr+4(r31)				; Get the CTR to restore
-			mtlr	r28								; Restore the LR 
-			lwz		r28,saver30+4(r31)				; Get R30
-			mtctr	r29								; Restore the CTR
-			lwz		r29,saver31+4(r31)				; Get R31
-			mtsprg	2,r28							; Save R30 for later
-			lwz		r28,saver28+4(r31)				; Restore R28			
-			mtsprg	3,r29							; Save R31 for later
-			lwz		r29,saver29+4(r31)				; Restore R29
-
-			mfsprg	r31,0							; Get per_proc
-			mfsprg	r30,2							; Restore R30 
-			lwz		r31,pfAvailable(r31)			; Get the feature flags
-			mtsprg	2,r31							; Set the feature flags
-			mfsprg	r31,3							; Restore R31
-
-			rfi										; Click heels three times and think very hard that there is no place like home...
-
-			.long	0								; Leave this here
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-			.long	0
-
-
-;
-;			This starts the 64-bit version
-;
-
-			.align	7
-
-eat64a:		ld		r30,quickfret(r29)				; Pick up the quick fret list, if any
-
-			mr.		r3,r3							; Should we hold off the quick release?
-			la		r21,saver0(r31)					; Point to the first thing we restore
-			bne--	ernoqfre64						; Hold off set, do not release just now...
-			
-erchkfre64:	mr.		r3,r30							; Any savearea to quickly release?
-			beq+	ernoqfre64						; No quickfrets...
-			ld		r30,SAVprev(r30)				; Chain back now
-			
-			bl		EXT(save_ret_phys)				; Put it on the free list			
-
-			std		r30,quickfret(r29)				; Dequeue previous guy (really, it is ok to wait until after the release)
-			b		erchkfre64						; Try the next one...
-
-			.align	7
-			
-ernoqfre64:	lwz		r30,SAVflags(r31)				; Pick up the flags
-			lis		r0,hi16(SAVinject)				; Get inject flag
-			dcbt	0,r21							; Touch in the first thing we need
-			
-;
-;			Here we release the savearea.
-;
-;			Important!!!!  The savearea is released before we are done with it. When the
-;			local free savearea list (anchored at lclfree) gets too long, save_ret_phys
-;			will trim the list, making the extra saveareas allocatable by another processor
-;			The code in there must ALWAYS leave our savearea on the local list, otherwise
-;			we could be very, very unhappy.  The code there always queues the "just released"
-;			savearea to the head of the local list.  Then, if it needs to trim, it will
-;			start with the SECOND savearea, leaving ours intact.
-;
-;			If we are going to inject code here, we must not toss the savearea because
-;			we will continue to use it.  The code stream to inject is in it and we 
-;			use it to hold the pre-inject context so that we can merge that with the
-;			post-inject context.  The field ijsave in the per-proc is used to point to the savearea.
-;
-;			Note that we will NEVER pass an interrupt up without first dealing with this savearea.
-;			
-;			All permanent interruptions (i.e., not denorm, alignment, or handled page and segment faults)
-;			will nullify any injected code and pass the interrupt up in the original savearea.  A normal
-;			inject completion will merge the original context into the new savearea and pass that up.
-;			
-;			Note that the following code which sets up the injection will only be executed when
-;			SAVinject is set.  That means that if will not run if we are returning from an alignment
-;			or denorm exception, or from a handled page or segment fault.
-;
-
-
-			li		r3,lgKillResv					; Get spot to kill reservation
-			andc	r0,r30,r0						; Clear the inject flag
-			stdcx.	r3,0,r3							; Blow away any reservations we hold
-			cmplw	cr4,r0,r30						; Remember if we need to inject
-			mr		r3,r31							; Get the exiting savearea in parm register
-			beq++	cr4,noinject3					; No, we are not going to inject instructions...	
-			
-			stw		r0,SAVflags(r31)				; Yes we are, clear the request...
-
-			lhz		r26,PP_CPU_NUMBER(r29)			; Get the cpu number
-			lwz		r25,saveinstr(r31)				; Get the instruction count
-			la		r3,saveinstr+4(r31)				; Point to the instruction stream
-			slwi	r26,r26,6						; Get offset to the inject code stream for this processor
-			li		r5,0							; Get the current instruction offset
-			ori		r26,r26,lo16(EXT(ijcode))		; Get the base of the inject buffer for this processor (always < 64K)
-			slwi	r25,r25,2						; Multiply by 4
-			
-injctit2:	lwzx	r6,r5,r3						; Pick up the instruction
-			stwx	r6,r5,r26						; Inject into code buffer
-			addi	r5,r5,4							; Bump offset
-			cmplw	r5,r25							; Have we hit the end?
-			blt--	injctit2						; Continue until we have copied all...
-			
-			lis		r3,0x0FFF						; Build our magic trap
-			ori		r3,r3,0xC9C9					; Build our magic trap
-			std		r31,ijsave(r29)					; Save the original savearea for injection
-			stwx	r3,r5,r26						; Save the magic trap
-
-			dcbf	0,r26							; Flush the line
-			sync									; Hang on until it's done
-			
-			icbi	0,r26							; Flush instructions in the line
-			isync									; Throw anything stale away
-			sync									; Hang on until it's done
-			b		injected2						; Skip the savearea release...
-			
-noinject3:	bl		EXT(save_ret_phys)				; Put it on the free list			
-
-injected2:	lwz		r3,savesrr1+4(r31)				; Pass in the MSR we will be going to
-			bl		EXT(switchSegs)					; Go handle the segment registers/STB
-
-			ld		r8,savesrr1(r31)				; Get destination MSR
-			cmplw	cr3,r14,r14						; Set that we do not need to stop streams
-			li		r21,emfp0						; Point to a workarea
-
-			ld		r25,savesrr0(r31)				; Get the SRR0 to use
-			la		r28,saver16(r31)				; Point to the 128-byte line with r16-r31
-			dcbz128	r21,r29							; Clear a work area
-			ld		r0,saver0(r31)					; Restore R0			
-			dcbt	0,r28							; Touch in r16-r31 
-			ld		r1,saver1(r31)					; Restore R1	
-			
-			beq++	cr4,noinject4					; No code injection here...
-			
-;
-;			If we are injecting, we need to stay in supervisor state with instruction
-;			address translation off.  We also need to have as few potential interruptions as
-;			possible.  Therefore, we turn off external interruptions and tracing (which doesn't
-;			make much sense anyway).
-;
-			ori		r8,r8,lo16(ijemoff)				; Force the need-to-be-off bits on
-			mr		r25,r26							; Point pc to injection code buffer
-			xori	r8,r8,lo16(ijemoff)				; Turn off all of the need-to-be-off bits
-			
-noinject4:	ld		r2,saver2(r31)					; Restore R2	
-			ld		r3,saver3(r31)					; Restore R3
-			mtcrf	0x80,r27						; Get facility availability flags (do not touch CR1-7)
-			ld		r4,saver4(r31)					; Restore R4
-			mtsrr0	r25								; Restore the SRR0 now
-			ld		r5,saver5(r31)					; Restore R5
-			mtsrr1	r8								; Restore the SRR1 now 
-			ld		r6,saver6(r31)					; Restore R6			
-						
-			ld		r7,saver7(r31)					; Restore R7
-			ld		r8,saver8(r31)					; Restore R8	
-			ld		r9,saver9(r31)					; Restore R9
-            
-			la		r28,savevscr(r31)				; Point to the status area
-            		
-			ld		r10,saver10(r31)				; Restore R10
-			ld		r11,saver11(r31)				; Restore R11			
-			ld		r12,saver12(r31)				; Restore R12
-			ld		r13,saver13(r31)				; Restore R13			
-
-			ld		r26,next_savearea(r29)			; Get the exception save area
-
-			ld		r14,saver14(r31)				; Restore R14	
-			ld		r15,saver15(r31)				; Restore R15			
-			lwz		r27,savevrsave(r31)				; Get the vrsave
-			
-			bf--	pfAltivecb,noavec2s				; Skip if no VMX...
-			
-			stvxl	v0,r21,r29						; Save a vector register
-			lvxl	v0,0,r28						; Get the vector status
-			mtvscr	v0								; Set the vector status
-
-			lvxl	v0,r21,r29						; Restore work vector register
-		
-noavec2s:	mtspr	vrsave,r27						; Set the vrsave
-
-			lwz		r28,saveexception(r31)			; Get exception type
-			stfd	f0,emfp0(r29)					; Save FP0
-			lfd		f0,savefpscrpad(r31)			; Get the fpscr
-			mtfsf	0xFF,f0							; Restore fpscr		
-			lfd		f0,emfp0(r29)					; Restore the used register
-			ld		r16,saver16(r31)				; Restore R16
-			lwz		r30,spcFlags(r29)				; Get the special flags from per_proc
-			ld		r17,saver17(r31)				; Restore R17
-			ld		r18,saver18(r31)				; Restore R18	
-			cmplwi	cr1,r28,T_RESET					; Are we returning from a reset?
-			ld		r19,saver19(r31)				; Restore R19	
-			ld		r20,saver20(r31)				; Restore R20
-			li		r27,0							; Get a zero
-			ld		r21,saver21(r31)				; Restore R21
-			la		r26,saver0(r26)					; Point to the first part of the next savearea
-			andis.	r30,r30,hi16(perfMonitor)		; Is the performance monitor enabled?
-			ld		r22,saver22(r31)				; Restore R22
-			ld		r23,saver23(r31)				; Restore R23
-			bne++	cr1,er64rrst					; We are not returning from a reset...
-			stw		r27,lo16(EXT(ResetHandler)-EXT(ExceptionVectorsStart)+RESETHANDLER_TYPE)(br0)	; Allow resets again
-
-er64rrst:	ld		r24,saver24(r31)				; Restore R24			
-
-			dcbz128	0,r26							; Clear and allocate next savearea we use in the off chance it is still in when we next interrupt
-
-			ld		r25,saver25(r31)				; Restore R25			
-			ld		r26,saver26(r31)				; Restore R26		
-			ld		r27,saver27(r31)				; Restore R27			
-
-			beq++	noPerfMonRestore64				; Nope... 
-
-			lwz		r28,savepmc+0(r31)
-			lwz		r29,savepmc+4(r31)
-			mtspr	pmc1_gp,r28						; Restore PMC1 
-			mtspr	pmc2_gp,r29						; Restore PMC2 
-			lwz		r28,savepmc+8(r31)
-			lwz		r29,savepmc+12(r31)
-			mtspr	pmc3_gp,r28						; Restore PMC3
-			mtspr	pmc4_gp,r29						; Restore PMC4
-			lwz		r28,savepmc+16(r31)
-			lwz		r29,savepmc+20(r31)
-			mtspr	pmc5_gp,r28						; Restore PMC5 
-			mtspr	pmc6_gp,r29						; Restore PMC6 
-			lwz		r28,savepmc+24(r31)
-			lwz		r29,savepmc+28(r31)
-			mtspr	pmc7_gp,r28						; Restore PMC7
-			mtspr	pmc8_gp,r29						; Restore PMC8
-			ld		r28,savemmcr1(r31)
-			ld		r29,savemmcr2(r31)
-			mtspr	mmcr1_gp,r28					; Restore MMCR1
-			mtspr	mmcra_gp,r29					; Restore MMCRA
-			ld		r28,savemmcr0(r31)
-			
-			mtspr	mmcr0_gp,r28					; Restore MMCR0
-
-noPerfMonRestore64:		
-			mfsprg	r30,0							; Get per_proc
-			lwz		r28,savecr(r31)					; Get CR to restore
-			ld		r29,savexer(r31)				; Get XER to restore
-			mtcr	r28								; Restore the CR
-			ld		r28,savelr(r31)					; Get LR to restore
-			mtxer	r29								; Restore the XER
-			ld		r29,savectr(r31)				; Get the CTR to restore
-			mtlr	r28								; Restore the LR 
-			ld		r28,saver30(r31)				; Get R30
-			mtctr	r29								; Restore the CTR
-			ld		r29,saver31(r31)				; Get R31
-			mtspr	hsprg0,r28						; Save R30 for later
-			ld		r28,saver28(r31)				; Restore R28			
-			mtsprg	3,r29							; Save R31 for later
-			ld		r29,saver29(r31)				; Restore R29
-
-			lwz		r31,pfAvailable(r30)			; Get the feature flags
-			ld		r30,UAW(r30)					; Get the User Assist DoubleWord
-			mtsprg	2,r31							; Set the feature flags
-			mfsprg	r31,3							; Restore R31
-			mtsprg	3,r30							; Set the UAW
-			mfspr	r30,hsprg0						; Restore R30
-
-			rfid									; Click heels three times and think very hard that there is no place like home...
-
-
-	
-/*
- * exception_exit(savearea *)
- *
- *
- * ENTRY :	IR and/or DR and/or interruptions can be on
- *			R3 points to the virtual address of a savearea
- */
-	
-			.align	5
-			.globl	EXT(exception_exit)
-
-LEXT(exception_exit)
-
-			mfsprg	r29,2							; Get feature flags
-			mr		r31,r3							; Get the savearea in the right register 
-			mtcrf	0x04,r29						; Set the features			
-			li		r0,1							; Get this just in case		
-			mtcrf	0x02,r29						; Set the features			
-			lis		r30,hi16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Set up the MSR we will use throughout. Note that ME come on here if MCK
-			rlwinm	r4,r3,0,0,19					; Round down to savearea block base
-			lis		r1,hi16(SAVredrive)				; Get redrive request
-			mfsprg	r2,0							; Get the per_proc block
-			ori		r30,r30,lo16(MASK(MSR_VEC)|MASK(MSR_FP)|MASK(MSR_ME))	; Rest of MSR
-			bt++	pf64Bitb,eeSixtyFour			; We are 64-bit...
-			
-			lwz		r4,SACvrswap+4(r4)				; Get the virtual to real translation
-			
-			bt		pfNoMSRirb,eeNoMSR				; No MSR...
-
-			mtmsr	r30								; Translation and all off
-			isync									; Toss prefetch
-			b		eeNoMSRx
-			
-			.align	5
-			
-eeSixtyFour:
-			ld		r4,SACvrswap(r4)				; Get the virtual to real translation
-			rldimi	r30,r0,63,MSR_SF_BIT			; Set SF bit (bit 0)
-			mtmsrd	r30								; Set 64-bit mode, turn off EE, DR, and IR
-			isync									; Toss prefetch
-			b		eeNoMSRx
-			
-			.align	5
-			
-eeNoMSR:	li		r0,loadMSR						; Get the MSR setter SC
-			mr		r3,r30							; Get new MSR
-			sc										; Set it
-
-eeNoMSRx:	xor		r31,r31,r4						; Convert the savearea to physical addressing
-			lwz		r4,SAVflags(r31)				; Pick up the flags
-			mr		r13,r31							; Put savearea here also
-
-			and.	r0,r4,r1						; Check if redrive requested
-			
-			dcbt	br0,r2							; We will need this in just a sec
-
-			beq+	EatRupt							; No redrive, just exit...
-
-0:			mftbu	r2								; Avoid using an obsolete timestamp for the redrive
-			mftb	r4
-			mftbu	r0
-			cmplw	r0,r2
-			bne--	0b
-
-			stw		r2,SAVtime(r13)
-			stw		r4,SAVtime+4(r13)
-
-			lwz		r11,saveexception(r13)			; Restore exception code
-			b		Redrive							; Redrive the exception...
-
-
-		
-			.align	12								; Force page alignment
-
-			.globl EXT(ExceptionVectorsEnd)
-EXT(ExceptionVectorsEnd):							/* Used if relocating the exception vectors */
-
-
-
-
-;
-;			Here is where we keep the low memory globals
-;
-
-			. = 0x5000
-			
-			.ascii	"Hagfish "						; 5000 Unique eyecatcher
-			.long	0								; 5008 Zero
-			.long	0								; 500C Zero cont...
-			.long	EXT(PerProcTable)				; 5010 pointer to per_proc_entry table
-			.long	0								; 5014 Zero
-
-			.globl	EXT(mckFlags)
-EXT(mckFlags):
-			.long	0								; 5018 Machine check flags
-			
-			.long	EXT(version)					; 501C Pointer to kernel version string
-			.long	0								; 5020 physical memory window virtual address
-			.long	0								; 5024 physical memory window virtual address
-			.long	0								; 5028 user memory window virtual address
-			.long	0								; 502C user memory window virtual address
-			.long	0								; 5030 VMM boot-args forced feature flags
-
-			.globl	EXT(maxDec)
-EXT(maxDec):
-			.long	0x7FFFFFFF						; 5034 maximum decrementer value
-			
-
-			.globl	EXT(pmsCtlp)
-EXT(pmsCtlp):
-			.long	0								; 5038 Pointer to power management stepper control
-			
-			.long	0								; 503C reserved
-			.long	0								; 5040 reserved
-			.long	0								; 5044 reserved
-			.long	0								; 5048 reserved
-			.long	0								; 504C reserved
-			.long	0								; 5050 reserved
-			.long	0								; 5054 reserved
-			.long	0								; 5058 reserved
-			.long	0								; 505C reserved
-			.long	0								; 5060 reserved
-			.long	0								; 5064 reserved
-			.long	0								; 5068 reserved
-			.long	0								; 506C reserved
-			.long	0								; 5070 reserved
-			.long	0								; 5074 reserved
-			.long	0								; 5078 reserved
-			.long	0								; 507C reserved
-
-			.globl	EXT(trcWork)
-EXT(trcWork):
-			.long	0								; 5080 The next trace entry to use
-#if DEBUG
-			.long	0xFFFFFFFF 						; 5084 All enabled 
-#else
-			.long	0x00000000						; 5084 All disabled on non-debug systems
-#endif
-			.long	0								; 5088 Start of the trace table
-			.long	0								; 508C End (wrap point) of the trace
-			.long	0								; 5090 Saved mask while in debugger
-			.long	0								; 5094 Size of trace table (1 - 256 pages)
-			.long	0								; 5098 traceGas[0]
-			.long	0								; 509C traceGas[1]
-
-			.long	0								; 50A0 reserved			
-			.long	0								; 50A4 reserved			
-			.long	0								; 50A8 reserved			
-			.long	0								; 50AC reserved			
-			.long	0								; 50B0 reserved			
-			.long	0								; 50B4 reserved			
-			.long	0								; 50B8 reserved			
-			.long	0								; 50BC reserved			
-			.long	0								; 50C0 reserved			
-			.long	0								; 50C4 reserved			
-			.long	0								; 50C8 reserved			
-			.long	0								; 50CC reserved			
-			.long	0								; 50D0 reserved			
-			.long	0								; 50D4 reserved			
-			.long	0								; 50D8 reserved			
-			.long	0								; 50DC reserved			
-			.long	0								; 50E0 reserved			
-			.long	0								; 50E4 reserved			
-			.long	0								; 50E8 reserved			
-			.long	0								; 50EC reserved			
-			.long	0								; 50F0 reserved			
-			.long	0								; 50F4 reserved			
-			.long	0								; 50F8 reserved			
-			.long	0								; 50FC reserved			
-
-			.globl	EXT(saveanchor)
-
-EXT(saveanchor):									; 5100 saveanchor
-			.set	.,.+SVsize
-			
-			.long	0								; 5140 reserved
-			.long	0								; 5144 reserved
-			.long	0								; 5148 reserved
-			.long	0								; 514C reserved
-			.long	0								; 5150 reserved
-			.long	0								; 5154 reserved
-			.long	0								; 5158 reserved
-			.long	0								; 515C reserved
-			.long	0								; 5160 reserved
-			.long	0								; 5164 reserved
-			.long	0								; 5168 reserved
-			.long	0								; 516C reserved
-			.long	0								; 5170 reserved
-			.long	0								; 5174 reserved
-			.long	0								; 5178 reserved
-			.long	0								; 517C reserved
-			
-			.long	0								; 5180 tlbieLock
-
-			.long	0								; 5184 reserved
-			.long	0								; 5188 reserved
-			.long	0								; 518C reserved
-			.long	0								; 5190 reserved
-			.long	0								; 5194 reserved
-			.long	0								; 5198 reserved
-			.long	0								; 519C reserved
-			.long	0								; 51A0 reserved			
-			.long	0								; 51A4 reserved			
-			.long	0								; 51A8 reserved			
-			.long	0								; 51AC reserved			
-			.long	0								; 51B0 reserved			
-			.long	0								; 51B4 reserved			
-			.long	0								; 51B8 reserved			
-			.long	0								; 51BC reserved			
-			.long	0								; 51C0 reserved			
-			.long	0								; 51C4 reserved			
-			.long	0								; 51C8 reserved			
-			.long	0								; 51CC reserved			
-			.long	0								; 51D0 reserved			
-			.long	0								; 51D4 reserved			
-			.long	0								; 51D8 reserved			
-			.long	0								; 51DC reserved			
-			.long	0								; 51E0 reserved			
-			.long	0								; 51E4 reserved			
-			.long	0								; 51E8 reserved			
-			.long	0								; 51EC reserved			
-			.long	0								; 51F0 reserved			
-			.long	0								; 51F4 reserved			
-			.long	0								; 51F8 reserved			
-			.long	0								; 51FC reserved	
-			
-			.globl	EXT(dgWork)
-			
-EXT(dgWork):
-			.long	0								; 5200 dgLock
-			.long	0								; 5204 dgFlags		
-			.long	0								; 5208 dgMisc0		
-			.long	0								; 520C dgMisc1		
-			.long	0								; 5210 dgMisc2		
-			.long	0								; 5214 dgMisc3		
-			.long	0								; 5218 dgMisc4		
-			.long	0								; 521C dgMisc5	
-
-			.globl	EXT(LcksOpts)
-EXT(LcksOpts):
-			.long	0								; 5220 lcksWork
-			.long	0								; 5224 reserved
-			.long	0								; 5228 reserved
-			.long	0								; 522C reserved
-			.long	0								; 5230 reserved
-			.long	0								; 5234 reserved
-			.long	0								; 5238 reserved
-			.long	0								; 523C reserved
-			.long	0								; 5240 reserved
-			.long	0								; 5244 reserved
-			.long	0								; 5248 reserved
-			.long	0								; 524C reserved
-			.long	0								; 5250 reserved
-			.long	0								; 5254 reserved
-			.long	0								; 5258 reserved
-			.long	0								; 525C reserved
-			.long	0								; 5260 reserved
-			.long	0								; 5264 reserved
-			.long	0								; 5268 reserved
-			.long	0								; 526C reserved
-			.long	0								; 5270 reserved
-			.long	0								; 5274 reserved
-			.long	0								; 5278 reserved
-			.long	0								; 527C reserved
-			
-			.globl	EXT(pPcfg)
-EXT(pPcfg):
-			.long	0x80000000 | (12 << 8) | 12		; 5280 pcfDefPcfg - 4k
-			.long	0								; 5284 pcfLargePcfg
-			.long	0								; 5288 Non-primary page configurations
-			.long	0								; 528C Non-primary page configurations
-			.long	0								; 5290 Non-primary page configurations
-			.long	0								; 5294 Non-primary page configurations
-			.long	0								; 5298 Non-primary page configurations
-			.long	0								; 529C Non-primary page configurations
-			
-			.long	0								; 52A0 reserved			
-			.long	0								; 52A4 reserved			
-			.long	0								; 52A8 reserved			
-			.long	0								; 52AC reserved			
-			.long	0								; 52B0 reserved			
-			.long	0								; 52B4 reserved			
-			.long	0								; 52B8 reserved			
-			.long	0								; 52BC reserved			
-			.long	0								; 52C0 reserved			
-			.long	0								; 52C4 reserved			
-			.long	0								; 52C8 reserved			
-			.long	0								; 52CC reserved			
-			.long	0								; 52D0 reserved			
-			.long	0								; 52D4 reserved			
-			.long	0								; 52D8 reserved			
-			.long	0								; 52DC reserved			
-			.long	0								; 52E0 reserved			
-			.long	0								; 52E4 reserved			
-			.long	0								; 52E8 reserved			
-			.long	0								; 52EC reserved			
-			.long	0								; 52F0 reserved			
-			.long	0								; 52F4 reserved			
-			.long	0								; 52F8 reserved			
-			.long	0								; 52FC reserved	
-
-			.globl	EXT(killresv)
-EXT(killresv):
-
-			.long	0								; 5300 Used to kill reservations
-			.long	0								; 5304 Used to kill reservations
-			.long	0								; 5308 Used to kill reservations
-			.long	0								; 530C Used to kill reservations
-			.long	0								; 5310 Used to kill reservations
-			.long	0								; 5314 Used to kill reservations
-			.long	0								; 5318 Used to kill reservations
-			.long	0								; 531C Used to kill reservations
-			.long	0								; 5320 Used to kill reservations
-			.long	0								; 5324 Used to kill reservations
-			.long	0								; 5328 Used to kill reservations
-			.long	0								; 532C Used to kill reservations
-			.long	0								; 5330 Used to kill reservations
-			.long	0								; 5334 Used to kill reservations
-			.long	0								; 5338 Used to kill reservations
-			.long	0								; 533C Used to kill reservations
-			.long	0								; 5340 Used to kill reservations
-			.long	0								; 5344 Used to kill reservations
-			.long	0								; 5348 Used to kill reservations
-			.long	0								; 534C Used to kill reservations
-			.long	0								; 5350 Used to kill reservations
-			.long	0								; 5354 Used to kill reservations
-			.long	0								; 5358 Used to kill reservations
-			.long	0								; 535C Used to kill reservations
-			.long	0								; 5360 Used to kill reservations
-			.long	0								; 5364 Used to kill reservations
-			.long	0								; 5368 Used to kill reservations
-			.long	0								; 536C Used to kill reservations
-			.long	0								; 5370 Used to kill reservations
-			.long	0								; 5374 Used to kill reservations
-			.long	0								; 5378 Used to kill reservations
-			.long	0								; 537C Used to kill reservations
-			
-			.long	0								; 5380 reserved
-			.long	0								; 5384 reserved
-			.long	0								; 5388 reserved
-			.long	0								; 538C reserved
-			.long	0								; 5390 reserved
-			.long	0								; 5394 reserved
-			.long	0								; 5398 reserved
-			.long	0								; 539C reserved
-			.long	0								; 53A0 reserved			
-			.long	0								; 53A4 reserved			
-			.long	0								; 53A8 reserved			
-			.long	0								; 53AC reserved			
-			.long	0								; 53B0 reserved			
-			.long	0								; 53B4 reserved			
-			.long	0								; 53B8 reserved			
-			.long	0								; 53BC reserved			
-			.long	0								; 53C0 reserved			
-			.long	0								; 53C4 reserved			
-			.long	0								; 53C8 reserved			
-			.long	0								; 53CC reserved			
-			.long	0								; 53D0 reserved			
-			.long	0								; 53D4 reserved			
-			.long	0								; 53D8 reserved			
-			.long	0								; 53DC reserved			
-			.long	0								; 53E0 reserved			
-			.long	0								; 53E4 reserved			
-			.long	0								; 53E8 reserved			
-			.long	0								; 53EC reserved			
-			.long	0								; 53F0 reserved			
-			.long	0								; 53F4 reserved			
-			.long	0								; 53F8 reserved			
-			.long	0								; 53FC reserved	
-			.long	0								; 5400 reserved
-			.long	0								; 5404 reserved
-			.long	0								; 5408 reserved
-			.long	0								; 540C reserved
-			.long	0								; 5410 reserved
-			.long	0								; 5414 reserved
-			.long	0								; 5418 reserved
-			.long	0								; 541C reserved
-			.long	0								; 5420 reserved			
-			.long	0								; 5424 reserved			
-			.long	0								; 5428 reserved			
-			.long	0								; 542C reserved			
-			.long	0								; 5430 reserved			
-			.long	0								; 5434 reserved			
-			.long	0								; 5438 reserved			
-			.long	0								; 543C reserved			
-			.long	0								; 5440 reserved			
-			.long	0								; 5444 reserved			
-			.long	0								; 5448 reserved			
-			.long	0								; 544C reserved			
-			.long	0								; 5450 reserved			
-			.long	0								; 5454 reserved			
-			.long	0								; 5458 reserved			
-			.long	0								; 545C reserved			
-			.long	0								; 5460 reserved			
-			.long	0								; 5464 reserved			
-			.long	0								; 5468 reserved			
-			.long	0								; 546C reserved			
-			.long	0								; 5470 reserved			
-			.long	0								; 5474 reserved			
-			.long	0								; 5478 reserved			
-			.long	0								; 547C reserved
-			.long	EXT(kmod)						; 5480 Pointer to kmod, debugging aid
-			.long	EXT(kdp_trans_off)				; 5484 Pointer to kdp_trans_off, debugging aid
-			.long	EXT(kdp_read_io)				; 5488 Pointer to kdp_read_io, debugging aid
-			.long	0								; 548C Reserved for developer use
-			.long	0								; 5490 Reserved for developer use
-			.long	EXT(osversion)					; 5494	Pointer to osversion string, debugging aid
-			.long	EXT(flag_kdp_trigger_reboot)					; 5498	Pointer to KDP reboot trigger, debugging aid
-			.long	EXT(manual_pkt)					; 549C	Pointer to KDP manual packet, debugging aid
-
-;
-;	The "shared page" is used for low-level debugging and is actually 1/2 page long
-;
-
-			. = 0x6000
-			.globl	EXT(sharedPage)
-
-EXT(sharedPage):									; This is a debugging page shared by all processors
-			.long	0xC24BC195						; Comm Area validity value 
-			.long	0x87859393						; Comm Area validity value 
-			.long	0xE681A2C8						; Comm Area validity value 
-			.long	0x8599855A						; Comm Area validity value 
-			.long	0xD74BD296						; Comm Area validity value 
-			.long	0x8388E681						; Comm Area validity value 
-			.long	0xA2C88599						; Comm Area validity value 
-			.short	0x855A							; Comm Area validity value 
-			.short	1								; Comm Area version number
-			.fill	504*4,1,0						; (filled with 0s)
-
-;
-;	The ijcode area is used for code injection.  It is 1/2 page long and will allow 32 processors to inject
-;	16 instructions each concurrently.
-;
-
-			.globl	EXT(ijcode)
-
-EXT(ijcode):										; Code injection area
-			.fill	512*4,1,0						; 6800 32x64 slots for code injection streams
-
-	.data
-	.align	ALIGN
-	.globl	EXT(exception_end)
-EXT(exception_end):
-	.long	EXT(ExceptionVectorsEnd) -EXT(ExceptionVectorsStart) /* phys fn */
-
-
-
diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c
deleted file mode 100644
index d4fb8e1ca..000000000
--- a/osfmk/ppc/machine_routines.c
+++ /dev/null
@@ -1,847 +0,0 @@
-/*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <mach/mach_types.h>
-
-#include <ppc/machine_routines.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/io_map_entries.h>
-#include <ppc/misc_protos.h>
-#include <ppc/savearea.h>
-#include <ppc/Firmware.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/new_screen.h>
-#include <ppc/proc_reg.h>
-#include <ppc/machine_cpu.h> /* for cpu_signal_handler() */
-#include <ppc/fpu_protos.h>
-#include <kern/kern_types.h>
-#include <kern/processor.h>
-#include <kern/machine.h>
-
-#include <vm/vm_page.h>
-
-unsigned int		LockTimeOut = 1250000000;
-unsigned int		MutexSpin = 0;
-
-static int max_cpus_initialized = 0;
-
-uint32_t warFlags = 0;
-#define warDisMBpoff	0x80000000
-#define	MAX_CPUS_SET	0x01
-#define	MAX_CPUS_WAIT	0x02
-
-decl_simple_lock_data(, spsLock);
-unsigned int spsLockInit = 0;
-
-extern unsigned int hwllckPatch_isync;
-extern unsigned int hwulckPatch_isync;
-extern unsigned int hwulckbPatch_isync;
-extern unsigned int hwlmlckPatch_isync;
-extern unsigned int hwltlckPatch_isync;
-extern unsigned int hwcsatomicPatch_isync;
-extern unsigned int mlckePatch_isync;
-extern unsigned int mlckPatch_isync;
-extern unsigned int mltelckPatch_isync;
-extern unsigned int mltlckPatch_isync;
-extern unsigned int mulckePatch_isync;
-extern unsigned int mulckPatch_isync;
-extern unsigned int slckPatch_isync;
-extern unsigned int stlckPatch_isync;
-extern unsigned int sulckPatch_isync;
-extern unsigned int rwlePatch_isync;
-extern unsigned int rwlsPatch_isync;
-extern unsigned int rwlsePatch_isync;
-extern unsigned int rwlesPatch_isync;
-extern unsigned int rwtlePatch_isync;
-extern unsigned int rwtlsPatch_isync;
-extern unsigned int rwldPatch_isync;
-extern unsigned int hwulckPatch_eieio;
-extern unsigned int mulckPatch_eieio;
-extern unsigned int mulckePatch_eieio;
-extern unsigned int sulckPatch_eieio;
-extern unsigned int rwlesPatch_eieio;
-extern unsigned int rwldPatch_eieio;
-
-struct patch_up {
-        unsigned int    *addr;
-        unsigned int    data;
-};
-
-typedef struct patch_up patch_up_t;
-
-patch_up_t patch_up_table[] = {
-	{&hwllckPatch_isync,		0x60000000},
-	{&hwulckPatch_isync,		0x60000000},
-	{&hwulckbPatch_isync,		0x60000000},
-	{&hwlmlckPatch_isync,		0x60000000},
-	{&hwltlckPatch_isync,		0x60000000},
-	{&hwcsatomicPatch_isync,	0x60000000},
-	{&mlckePatch_isync,		0x60000000},
-	{&mlckPatch_isync,		0x60000000},
-	{&mltelckPatch_isync,		0x60000000},
-	{&mltlckPatch_isync,		0x60000000},
-	{&mulckePatch_isync,		0x60000000},
-	{&mulckPatch_isync,		0x60000000},
-	{&slckPatch_isync,		0x60000000},
-	{&stlckPatch_isync,		0x60000000},
-	{&sulckPatch_isync,		0x60000000},
-	{&rwlePatch_isync,		0x60000000},
-	{&rwlsPatch_isync,		0x60000000},
-	{&rwlsePatch_isync,		0x60000000},
-	{&rwlesPatch_isync,		0x60000000},
-	{&rwtlePatch_isync,		0x60000000},
-	{&rwtlsPatch_isync,		0x60000000},
-	{&rwldPatch_isync,		0x60000000},
-	{&hwulckPatch_eieio,		0x60000000},
-	{&hwulckPatch_eieio,		0x60000000},
-	{&mulckPatch_eieio,		0x60000000},
-	{&mulckePatch_eieio,		0x60000000},
-	{&sulckPatch_eieio,		0x60000000},
-	{&rwlesPatch_eieio,		0x60000000},
-	{&rwldPatch_eieio,		0x60000000},
-	{NULL,				0x00000000}
-};
-
-extern int			forcenap;
-extern boolean_t	pmap_initialized;
-
-/* Map memory map IO space */
-vm_offset_t 
-ml_io_map(
-	vm_offset_t phys_addr, 
-	vm_size_t size)
-{
-	return(io_map(phys_addr,size,VM_WIMG_IO));
-}
-
-
-void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size)
-{
-        *phys_addr = 0;
-	*size      = 0;
-}
-
-
-/*
- *	Routine:        ml_static_malloc
- *	Function: 	static memory allocation
- */
-vm_offset_t 
-ml_static_malloc(
-	vm_size_t size)
-{
-	vm_offset_t vaddr;
-
-	if (pmap_initialized)
-		return((vm_offset_t)NULL);
-	else {
-		vaddr = static_memory_end;
-		static_memory_end = round_page(vaddr+size);
-		return(vaddr);
-	}
-}
-
-/*
- *	Routine:        ml_static_ptovirt
- *	Function:
- */
-vm_offset_t
-ml_static_ptovirt(
-	vm_offset_t paddr)
-{
-	vm_offset_t vaddr;
-
-	/* Static memory is map V=R */
-	vaddr = paddr;
-	if ( (vaddr < static_memory_end) && (pmap_extract(kernel_pmap, vaddr)==paddr) )
-		return(vaddr);
-	else
-		return((vm_offset_t)NULL);
-}
-
-/*
- *	Routine:        ml_static_mfree
- *	Function:
- */
-void
-ml_static_mfree(
-	vm_offset_t vaddr,
-	vm_size_t size)
-{
-	vm_offset_t paddr_cur, vaddr_cur;
-
-	for (vaddr_cur = round_page_32(vaddr);
-	     vaddr_cur < trunc_page_32(vaddr+size);
-	     vaddr_cur += PAGE_SIZE) {
-		paddr_cur = pmap_extract(kernel_pmap, vaddr_cur);
-		if (paddr_cur != (vm_offset_t)NULL) {
-			vm_page_wire_count--;
-			pmap_remove(kernel_pmap, (addr64_t)vaddr_cur, (addr64_t)(vaddr_cur+PAGE_SIZE));
-			vm_page_create(paddr_cur>>12,(paddr_cur+PAGE_SIZE)>>12);
-		}
-	}
-}
-
-/*
- *	Routine:        ml_vtophys
- *	Function:	virtual to physical on static pages
- */
-vm_offset_t ml_vtophys(
-	vm_offset_t vaddr)
-{
-	return(pmap_extract(kernel_pmap, vaddr));
-}
-
-/*
- *	Routine:        ml_install_interrupt_handler
- *	Function:	Initialize Interrupt Handler
- */
-void ml_install_interrupt_handler(
-	void *nub,
-	int source,
-	void *target,
-	IOInterruptHandler handler,
-	void *refCon)
-{
-	struct per_proc_info	*proc_info;
-	boolean_t		current_state;
-
-	current_state = ml_get_interrupts_enabled();
-	proc_info = getPerProc();
-
-	proc_info->interrupt_nub     = nub;
-	proc_info->interrupt_source  = source;
-	proc_info->interrupt_target  = target;
-	proc_info->interrupt_handler = handler;
-	proc_info->interrupt_refCon  = refCon;
-
-	proc_info->interrupts_enabled = TRUE;  
-	(void) ml_set_interrupts_enabled(current_state);
-
-	initialize_screen(NULL, kPEAcquireScreen);
-}
-
-/*
- *	Routine:        ml_nofault_copy
- *	Function:	Perform a physical mode copy if the source and
- *			destination have valid translations in the kernel pmap.
- *			If translations are present, they are assumed to
- *			be wired; i.e. no attempt is made to guarantee that the
- *			translations obtained remained valid for
- *			the duration of their use.
- */
-
-vm_size_t ml_nofault_copy(
-	vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size)
-{
-	addr64_t cur_phys_dst, cur_phys_src;
-	uint32_t count, pindex, nbytes = 0;
-
-	while (size > 0) {
-		if (!(cur_phys_src = kvtophys(virtsrc)))
-			break;
-		if (!(cur_phys_dst = kvtophys(virtdst)))
-			break;
-		if (!mapping_phys_lookup((cur_phys_src>>12), &pindex) ||
-		    !mapping_phys_lookup((cur_phys_dst>>12), &pindex))
-			break;
-		count = PAGE_SIZE - (cur_phys_src & PAGE_MASK);
-		if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK)))
-			count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK);
-		if (count > size)
-			count = size;
-
-		bcopy_phys(cur_phys_src, cur_phys_dst, count);
-
-		nbytes += count;
-		virtsrc += count;
-		virtdst += count;
-		size -= count;
-	}
-
-	return nbytes;
-}
-
-/*
- *	Routine:        ml_init_interrupt
- *	Function:	Initialize Interrupts
- */
-void ml_init_interrupt(void)
-{
-	boolean_t current_state;
-
-	current_state = ml_get_interrupts_enabled();
-
-	getPerProc()->interrupts_enabled = TRUE;  
-	(void) ml_set_interrupts_enabled(current_state);
-}
-
-/*
- *	Routine:        ml_get_interrupts_enabled
- *	Function:	Get Interrupts Enabled
- */
-boolean_t ml_get_interrupts_enabled(void)
-{
-	return((mfmsr() & MASK(MSR_EE)) != 0);
-}
-
-/*
- *	Routine:        ml_at_interrupt_context
- *	Function:	Check if running at interrupt context
- */
-boolean_t ml_at_interrupt_context(void)
-{
-	boolean_t	ret;
-	boolean_t	current_state;
-
-	current_state = ml_set_interrupts_enabled(FALSE);
- 	ret = (getPerProc()->istackptr == 0);	
-	ml_set_interrupts_enabled(current_state);
-	return(ret);
-}
-
-/*
- *	Routine:        ml_cause_interrupt
- *	Function:	Generate a fake interrupt
- */
-void ml_cause_interrupt(void)
-{
-	CreateFakeIO();
-}
-
-/*
- *	Routine:        ml_thread_policy
- *	Function:
- */
-void ml_thread_policy(
-	thread_t thread,
-__unused	unsigned policy_id,
-	unsigned policy_info)
-{
-	if (policy_info & MACHINE_NETWORK_WORKLOOP) {
-		spl_t		s = splsched();
-
-		thread_lock(thread);
-
-		set_priority(thread, thread->priority + 1);
-
-		thread_unlock(thread);
-		splx(s);
-	}
-}
-
-/*
- *	Routine:        machine_signal_idle
- *	Function:
- */
-void
-machine_signal_idle(
-	processor_t processor)
-{
-	struct per_proc_info	*proc_info;
-
-	proc_info = PROCESSOR_TO_PER_PROC(processor);
-
-	if (proc_info->pf.Available & (pfCanDoze|pfWillNap))
-		(void)cpu_signal(proc_info->cpu_number, SIGPwake, 0, 0);
-}
-
-/*
- *	Routine:        ml_processor_register
- *	Function:
- */
-kern_return_t
-ml_processor_register(
-	ml_processor_info_t 	*in_processor_info,
-	processor_t				*processor_out,
-	ipi_handler_t			*ipi_handler)
-{
-	struct per_proc_info	*proc_info;
-	int						donap;
-	boolean_t				current_state;
-	boolean_t				boot_processor;
-
-	if (in_processor_info->boot_cpu == FALSE) {
-		if (spsLockInit == 0) {
-			spsLockInit = 1;
-			simple_lock_init(&spsLock, 0);
-                }
-		boot_processor = FALSE;
-		proc_info = cpu_per_proc_alloc();
-		if (proc_info == (struct per_proc_info *)NULL)
-			return KERN_FAILURE;
-		proc_info->pp_cbfr = console_per_proc_alloc(FALSE);
-		if (proc_info->pp_cbfr == (void *)NULL)
-			goto	processor_register_error;
-	} else {
-		boot_processor = TRUE;
-		proc_info =  PerProcTable[master_cpu].ppe_vaddr;
-	}
-
-	proc_info->pp_chud = chudxnu_per_proc_alloc(boot_processor);
-	if (proc_info->pp_chud == (void *)NULL)
-		goto	processor_register_error;
-
-	if (!boot_processor)
-		if (cpu_per_proc_register(proc_info) != KERN_SUCCESS)
-			goto	processor_register_error;
-
-	proc_info->cpu_id = in_processor_info->cpu_id;
-	proc_info->start_paddr = in_processor_info->start_paddr;
-	if(in_processor_info->time_base_enable !=  (void(*)(cpu_id_t, boolean_t ))NULL)
-		proc_info->time_base_enable = in_processor_info->time_base_enable;
-	else
-		proc_info->time_base_enable = (void(*)(cpu_id_t, boolean_t ))NULL;
-
-	if((proc_info->pf.pfPowerModes & pmType) == pmPowerTune) {
-		proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0;
-		proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1;
-	}
-
-	donap = in_processor_info->supports_nap;	/* Assume we use requested nap */
-	if(forcenap) donap = forcenap - 1;		/* If there was an override, use that */
-
-	if((proc_info->pf.Available & pfCanNap)
-	   && (donap)) {
-		proc_info->pf.Available |= pfWillNap;
-		current_state = ml_set_interrupts_enabled(FALSE);
-		if(proc_info == getPerProc()) 
-			__asm__ volatile("mtsprg 2,%0" : : "r" (proc_info->pf.Available));	/* Set live value */
-		(void) ml_set_interrupts_enabled(current_state);
-	}
-
-	if (!boot_processor) {
-		(void)hw_atomic_add(&saveanchor.savetarget, FreeListMin);   /* saveareas for this processor */
-		processor_init((struct processor *)proc_info->processor,
-								proc_info->cpu_number, processor_pset(master_processor));
-	}
-
-	*processor_out = (struct processor *)proc_info->processor;
-	*ipi_handler = cpu_signal_handler;
-
-	return KERN_SUCCESS;
-
-processor_register_error:
-	if (proc_info->pp_cbfr != (void *)NULL)
-		console_per_proc_free(proc_info->pp_cbfr);
-	if (proc_info->pp_chud != (void *)NULL)
-		chudxnu_per_proc_free(proc_info->pp_chud);
-	if (!boot_processor)
-		cpu_per_proc_free(proc_info);
-	return KERN_FAILURE;
-}
-
-/*
- *	Routine:        ml_enable_nap
- *	Function:
- */
-boolean_t
-ml_enable_nap(int target_cpu, boolean_t nap_enabled)
-{
-	struct per_proc_info	*proc_info;
-	boolean_t				prev_value;
-	boolean_t				current_state;
-
-	proc_info = PerProcTable[target_cpu].ppe_vaddr;
-
-    prev_value = (proc_info->pf.Available & pfCanNap) && (proc_info->pf.Available & pfWillNap);
-    
- 	if(forcenap) nap_enabled = forcenap - 1;				/* If we are to force nap on or off, do it */
- 
- 	if(proc_info->pf.Available & pfCanNap) {				/* Can the processor nap? */
-		if (nap_enabled) proc_info->pf.Available |= pfWillNap;	/* Is nap supported on this machine? */
-		else proc_info->pf.Available &= ~pfWillNap;			/* Clear if not */
-	}
-
-	current_state = ml_set_interrupts_enabled(FALSE);
-	if(proc_info == getPerProc()) 
-		__asm__ volatile("mtsprg 2,%0" : : "r" (proc_info->pf.Available));	/* Set live value */
-	(void) ml_set_interrupts_enabled(current_state);
- 
-    return (prev_value);
-}
-
-/*
- *	Routine:        ml_init_max_cpus
- *	Function:
- */
-void
-ml_init_max_cpus(unsigned int max_cpus)
-{
-	boolean_t current_state;
-
-	current_state = ml_set_interrupts_enabled(FALSE);
-	if (max_cpus_initialized != MAX_CPUS_SET) {
-			if (max_cpus > 0 && max_cpus <= MAX_CPUS) {
-			/*
-			 * Note: max_ncpus is the maximum number
-			 * that the kernel supports or that the "cpus="
-			 * boot-arg has set. Here we take int minimum.
-			 */
-			machine_info.max_cpus = MIN(max_cpus, max_ncpus);
-			machine_info.physical_cpu_max = max_cpus;
-			machine_info.logical_cpu_max = max_cpus;
-		}
-		if (max_cpus_initialized == MAX_CPUS_WAIT)
-			wakeup((event_t)&max_cpus_initialized);
-		max_cpus_initialized = MAX_CPUS_SET;
-	}
-	
-	if (machine_info.logical_cpu_max == 1) {
-		struct patch_up *patch_up_ptr = &patch_up_table[0];
-
-		while (patch_up_ptr->addr != NULL) {
-			/*
-			 * Patch for V=R kernel text section
-			 */
-			bcopy_phys((addr64_t)((unsigned int)(&patch_up_ptr->data)), 
-				   (addr64_t)((unsigned int)(patch_up_ptr->addr)), 4);
-			sync_cache64((addr64_t)((unsigned int)(patch_up_ptr->addr)),4);
-			patch_up_ptr++;
-		}
-	}
-	
-	(void) ml_set_interrupts_enabled(current_state);	
-}
-
-/*
- *	Routine:        ml_get_max_cpus
- *	Function:
- */
-unsigned int
-ml_get_max_cpus(void)
-{
-	boolean_t current_state;
-
-	current_state = ml_set_interrupts_enabled(FALSE);
-	if (max_cpus_initialized != MAX_CPUS_SET) {
-			max_cpus_initialized = MAX_CPUS_WAIT;
-			assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT);
-			(void)thread_block(THREAD_CONTINUE_NULL);
-	}
-	(void) ml_set_interrupts_enabled(current_state);
-	return(machine_info.max_cpus);
-}
-
-/*
- * This is called from the machine-independent routine cpu_up()
- * to perform machine-dependent info updates.
- */
-void
-ml_cpu_up(void)
-{
-	(void)hw_atomic_add(&machine_info.physical_cpu, 1);
-	(void)hw_atomic_add(&machine_info.logical_cpu, 1);
-}
-
-/*
- * This is called from the machine-independent routine cpu_down()
- * to perform machine-dependent info updates.
- */
-void
-ml_cpu_down(void)
-{
-	(void)hw_atomic_sub(&machine_info.physical_cpu, 1);
-	(void)hw_atomic_sub(&machine_info.logical_cpu, 1);
-}
-
-/*
- *	Routine:        ml_cpu_get_info
- *	Function:
- */
-void
-ml_cpu_get_info(ml_cpu_info_t *ml_cpu_info)
-{
-  struct per_proc_info	*proc_info;
-
-  if (ml_cpu_info == 0) return;
-  
-  proc_info = PerProcTable[master_cpu].ppe_vaddr;
-  ml_cpu_info->vector_unit = (proc_info->pf.Available & pfAltivec) != 0;
-  ml_cpu_info->cache_line_size = proc_info->pf.lineSize;
-  ml_cpu_info->l1_icache_size = proc_info->pf.l1iSize;
-  ml_cpu_info->l1_dcache_size = proc_info->pf.l1dSize;
-  
-  if (proc_info->pf.Available & pfL2) {
-    ml_cpu_info->l2_settings = proc_info->pf.l2cr;
-    ml_cpu_info->l2_cache_size = proc_info->pf.l2Size;
-  } else {
-    ml_cpu_info->l2_settings = 0;
-    ml_cpu_info->l2_cache_size = 0xFFFFFFFF;
-  }
-  if (proc_info->pf.Available & pfL3) {
-    ml_cpu_info->l3_settings = proc_info->pf.l3cr;
-    ml_cpu_info->l3_cache_size = proc_info->pf.l3Size;
-  } else {
-    ml_cpu_info->l3_settings = 0;
-    ml_cpu_info->l3_cache_size = 0xFFFFFFFF;
-  }
-}
-
-/*
- *	Routine:        ml_enable_cache_level
- *	Function:
- */
-#define l2em 0x80000000
-#define l3em 0x80000000
-int
-ml_enable_cache_level(int cache_level, int enable)
-{
-  int old_mode;
-  unsigned long available, ccr;
-  struct per_proc_info	*proc_info;
-  
-  if (real_ncpus != 1) return -1;	/* XXX: This test is not safe */
-  
-  proc_info = PerProcTable[master_cpu].ppe_vaddr;
-  available = proc_info->pf.Available;
-  
-  if ((cache_level == 2) && (available & pfL2)) {
-    ccr = proc_info->pf.l2cr;
-    old_mode = (ccr & l2em) ? TRUE : FALSE;
-    if (old_mode != enable) {
-      if (enable) ccr = proc_info->pf.l2crOriginal;
-      else ccr = 0;
-      proc_info->pf.l2cr = ccr;
-      cacheInit();
-    }
-    
-    return old_mode;
-  }
-  
-  if ((cache_level == 3) && (available & pfL3)) {
-    ccr = proc_info->pf.l3cr;
-    old_mode = (ccr & l3em) ? TRUE : FALSE;
-    if (old_mode != enable) {
-      if (enable) ccr = proc_info->pf.l3crOriginal;
-      else ccr = 0;
-      proc_info->pf.l3cr = ccr;
-      cacheInit();
-    }
-    
-    return old_mode;
-  }
-  
-  return -1;
-}
-
-
-/*
- *      Routine:        ml_set_processor_speed
- *      Function:
- */
-void
-ml_set_processor_speed(unsigned long speed)
-{
-	struct per_proc_info    *proc_info;
-	uint32_t                cpu;
-	kern_return_t           result;
- 	boolean_t		current_state;
-	 unsigned int		i;
-  
-	proc_info = PerProcTable[master_cpu].ppe_vaddr;
-
-	switch (proc_info->pf.pfPowerModes & pmType) {	/* Figure specific type */
-		case pmDualPLL:
-
-			ml_set_processor_speed_dpll(speed);
-			break;
-			
-		case pmDFS:
-
-			for (cpu = 0; cpu < real_ncpus; cpu++) {
-				/*
-				 * cpu_signal() returns after .5ms if it fails to signal a running cpu
-				 * retry cpu_signal() for .1s to deal with long interrupt latency at boot
-				 */
-				for (i=200; i>0; i--) {
-					current_state = ml_set_interrupts_enabled(FALSE);
-					if (cpu != (unsigned)cpu_number()) {
-							if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady)
-							/*
-							 * Target cpu is off-line, skip
-							 */
-							result = KERN_SUCCESS;
-						else {
-							simple_lock(&spsLock);
-							result = cpu_signal(cpu, SIGPcpureq, CPRQsps, speed);	
-							if (result == KERN_SUCCESS) 
-								thread_sleep_simple_lock(&spsLock, &spsLock, THREAD_UNINT);
-							simple_unlock(&spsLock);
-						}
-					} else {
-						ml_set_processor_speed_dfs(speed);
-						result = KERN_SUCCESS;
-					}
-					(void) ml_set_interrupts_enabled(current_state);
-					if (result == KERN_SUCCESS)
-						break;
-				}
-				if (result != KERN_SUCCESS)
-					panic("ml_set_processor_speed(): Fail to set cpu%d speed\n", cpu);
-			}
-			break;
-			
-		case pmPowerTune:
-	
-			ml_set_processor_speed_powertune(speed);
-			break;
-			
-		default:					
-			break;
-
-	}
-	return;
-}
-
-/*
- *      Routine:        ml_set_processor_speed_slave
- *      Function:
- */
-void
-ml_set_processor_speed_slave(unsigned long speed)
-{
-  ml_set_processor_speed_dfs(speed);
-  
-  simple_lock(&spsLock);
-  thread_wakeup(&spsLock);
-  simple_unlock(&spsLock);
-}
-
-/*
- *	Routine:        ml_init_lock_timeout
- *	Function:
- */
-void
-ml_init_lock_timeout(void)
-{
-	uint64_t	abstime;
-	uint32_t	mtxspin; 
-
-	nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime);
-	LockTimeOut = (unsigned int)abstime;
-
-	if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
-		if (mtxspin > USEC_PER_SEC>>4)
-			mtxspin =  USEC_PER_SEC>>4;
-		nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
-	} else {
-		nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime);
-	}
-	MutexSpin = (unsigned int)abstime;
-}
-
-/*
- *	Routine:        init_ast_check
- *	Function:
- */
-void
-init_ast_check(
-	__unused processor_t	processor)
-{}
-
-/*
- *	Routine:        cause_ast_check
- *	Function:
- */
-void
-cause_ast_check(
-	processor_t		processor)
-{
-	struct per_proc_info	*proc_info;
-
-	proc_info = PROCESSOR_TO_PER_PROC(processor);
-
-	if (proc_info != getPerProc()
-	    && proc_info->interrupts_enabled == TRUE)
-		cpu_signal(proc_info->cpu_number, SIGPast, (unsigned int)NULL, (unsigned int)NULL);
-}
-              
-/*
- *	Routine:        machine_processor_shutdown
- *	Function:
- */
-thread_t        
-machine_processor_shutdown(
-	__unused thread_t		thread,
-	__unused void			(*doshutdown)(processor_t),
-	__unused processor_t	processor)
-{
-	CreateShutdownCTX();   
-	return((thread_t)(getPerProc()->old_thread));
-}
-
-
-void ml_mem_backoff(void) {
-
-	if(warFlags & warDisMBpoff) return;					/* If backoff disabled, exit */
-
-	__asm__ volatile("sync");
-	__asm__ volatile("isync");
-	
-	return;
-}
-
-
-
-/*
- * Stubs for CPU Stepper
- */
-void
-machine_run_count(__unused uint32_t count)
-{
-}
-
-boolean_t
-machine_processor_is_inactive(__unused processor_t processor)
-{
-    return(FALSE);
-}
-
-processor_t
-machine_choose_processor(__unused processor_set_t pset, processor_t processor)
-{
-    return (processor);
-}
-
-vm_offset_t ml_stack_remaining(void)
-{
-	uintptr_t local = (uintptr_t) &local;
-
-	if (ml_at_interrupt_context()) {
-	    return (local - (getPerProc()->intstack_top_ss - INTSTACK_SIZE));
-	} else {
-	    return (local - current_thread()->kernel_stack);
-	}
-}
-
-boolean_t machine_timeout_suspended(void) {
-	return FALSE;
-}
diff --git a/osfmk/ppc/machine_routines.h b/osfmk/ppc/machine_routines.h
deleted file mode 100644
index 47b12432d..000000000
--- a/osfmk/ppc/machine_routines.h
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_PPC_MACHINE_ROUTINES_H_
-#define	_PPC_MACHINE_ROUTINES_H_
-
-#include <mach/mach_types.h>
-#include <mach/boolean.h>
-#include <kern/kern_types.h>
-#include <pexpert/pexpert.h>
-
-#include <sys/cdefs.h>
-#include <sys/appleapiopts.h>
-
-__BEGIN_DECLS
-
-/* Get Interrupts Enabled */
-extern boolean_t	ml_get_interrupts_enabled(
-						void);
-
-/* Set Interrupts Enabled */
-extern boolean_t	ml_set_interrupts_enabled(
-						boolean_t				enable);
-
-/* Check if running at interrupt context */
-extern boolean_t	ml_at_interrupt_context(
-						void);
-
-#ifdef KERNEL_PRIVATE
-
-/* Generate a fake interrupt */
-extern void			ml_cause_interrupt(
-						void);
-
-/* Type for the IPI Hander */
-typedef void (*ipi_handler_t)(void);
-
-/* Type for the Time Base Enable function */
-typedef void (*time_base_enable_t)(cpu_id_t cpu_id, boolean_t enable);
-
-/* enables (or disables) the processor nap mode the function returns the previous value*/
-extern boolean_t	ml_enable_nap(
-						int						target_cpu,
-						boolean_t				nap_enabled);
-
-/* Put the processor to sleep */
-extern void			ml_ppc_sleep(
-						void);
-
-extern void			ml_get_timebase(
-						unsigned long long		*timstamp);
-
-extern int			ml_enable_cache_level(
-						int						cache_level,
-						int						enable);
-
-extern void			ml_static_mfree(
-						vm_offset_t				vaddr,
-						vm_size_t				size);
-        
-/* Init Interrupts */
-extern void			ml_install_interrupt_handler(
-						void					*nub,
-						int						source,
-						void					*target,
-						IOInterruptHandler		handler,
-						void					*refCon);
-               
-extern vm_offset_t		ml_static_ptovirt(
-							vm_offset_t			paddr);
-
-/* virtual to physical on wired pages */
-extern vm_offset_t		ml_vtophys(
-							vm_offset_t			vaddr);
-
-vm_size_t ml_nofault_copy(
-	vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size);
-
-/* PCI config cycle probing */
-extern boolean_t		ml_probe_read(
-							vm_offset_t			paddr,
-							unsigned int		*val);
-
-extern boolean_t		ml_probe_read_64(
-							addr64_t			paddr,
-							unsigned int		*val);
-
-/* Read physical address byte */
-extern unsigned int		ml_phys_read_byte(
-							vm_offset_t			paddr);
-
-extern unsigned int		ml_phys_read_byte_64(
-							addr64_t			paddr);
-
-/* Read physical address half word */
-extern unsigned int		ml_phys_read_half(
-							vm_offset_t			paddr);
-
-extern unsigned int		ml_phys_read_half_64(
-							addr64_t			paddr);
-
-/* Read physical address word*/
-extern unsigned int		ml_phys_read(
-							vm_offset_t			paddr);
-
-extern unsigned int		ml_phys_read_64(
-							addr64_t			paddr);
-
-extern unsigned int		ml_phys_read_word(
-							vm_offset_t			paddr);
-
-extern unsigned int		ml_phys_read_word_64(
-							addr64_t			paddr);
-
-/* Read physical address double word */
-extern unsigned long long ml_phys_read_double(
-							vm_offset_t			paddr);
-
-extern unsigned long long ml_phys_read_double_64(
-							addr64_t			paddr);
-
-/* Write physical address byte */
-extern void				ml_phys_write_byte(
-							vm_offset_t			paddr,
-							unsigned	int		data);
-
-extern void				ml_phys_write_byte_64(
-								addr64_t		paddr,
-								unsigned int	data);
-
-/* Write physical address half word */
-extern void				ml_phys_write_half(
-							vm_offset_t			paddr,
-							unsigned int		data);
-
-extern void				ml_phys_write_half_64(
-							addr64_t			paddr,
-							unsigned int		data);
-
-/* Write physical address word */
-extern void				ml_phys_write(
-							vm_offset_t			paddr,
-							unsigned int		data);
-
-extern void				ml_phys_write_64(
-							addr64_t			paddr,
-							unsigned int		data);
-
-extern void				ml_phys_write_word(
-							vm_offset_t			paddr,
-							unsigned int		data);
-
-extern void				ml_phys_write_word_64(
-							addr64_t			paddr,
-							unsigned int		data);
-
-/* Write physical address double word */
-extern void				 ml_phys_write_double(
-							vm_offset_t			paddr,
-							unsigned long long	data);
-
-extern void				ml_phys_write_double_64(
-							addr64_t paddr,
-							unsigned long long	 data);
-
-/* Struct for ml_processor_register */
-struct ml_processor_info {
-	cpu_id_t			cpu_id;
-	boolean_t			boot_cpu;
-	vm_offset_t			start_paddr;
-	boolean_t			supports_nap;
-	unsigned long			l2cr_value;
-	time_base_enable_t		time_base_enable;
-	uint32_t			power_mode_0;
-	uint32_t			power_mode_1;
-};
-
-typedef struct ml_processor_info ml_processor_info_t;
-
-/* Register a processor */
-extern kern_return_t	ml_processor_register(
-							ml_processor_info_t *ml_processor_info,
-							processor_t			*processor,
-							ipi_handler_t		*ipi_handler);
-
-/* Zero bytes starting at a physical address */
-extern void				bzero_phys(
-							addr64_t			phys_address,
-							uint32_t			length);
-
-/* Zero bytes starting at a physical address that's uncacheable */
-extern void				bzero_phys_nc(
-							addr64_t			phys_address,
-							uint32_t			length);
-
-/* Bytes available on current stack */
-vm_offset_t ml_stack_remaining(void);
-
-#endif /* KERNEL_PRIVATE */
-
-#ifdef	XNU_KERNEL_PRIVATE
-#if	defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE)
-
-/* Map memory map IO space */
-extern vm_offset_t		ml_io_map(
-							vm_offset_t			phys_addr, 
-							vm_size_t			size);
-
-void	ml_get_bouncepool_info(
-        vm_offset_t *phys_addr,
-	vm_size_t   *size);
-
-
-/* boot memory allocation */
-extern vm_offset_t		ml_static_malloc(
-							vm_size_t			size);
-
-#endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */
-
-
-#ifdef	MACH_KERNEL_PRIVATE
-extern void				ml_init_interrupt(
-							void);
-
-extern void				cacheInit(
-							void);
-
-extern void				cacheDisable(
-							void);
-
-extern void				ml_init_lock_timeout(
-							void);
-
-void ml_ppc_do_sleep(void);
-
-boolean_t machine_timeout_suspended(void);
-#endif /* MACH_KERNEL_PRIVATE */
-#endif /* XNU_KERNEL_PRIVATE */
-
-#ifdef  KERNEL_PRIVATE
-extern void		ml_thread_policy(
-				thread_t	thread,
-				unsigned	policy_id,
-				unsigned	policy_info);
-
-#define MACHINE_GROUP				0x00000001
-#define MACHINE_NETWORK_GROUP		0x10000000 
-#define MACHINE_NETWORK_WORKLOOP	0x00000001
-#define MACHINE_NETWORK_NETISR		0x00000002
-
-/* Initialize the maximum number of CPUs */
-extern void				ml_init_max_cpus(
-							unsigned int		max_cpus);
-
-/* Return the maximum number of CPUs set by ml_init_max_cpus() */
-extern unsigned int		ml_get_max_cpus(
-							void);
-
-extern void			ml_cpu_up(void);
-extern void			ml_cpu_down(void);
-
-/* Struct for ml_cpu_get_info */
-struct ml_cpu_info {
-	unsigned long		vector_unit;
-	unsigned long		cache_line_size;
-	unsigned long		l1_icache_size;
-	unsigned long		l1_dcache_size;
-	unsigned long		l2_settings;
-	unsigned long		l2_cache_size;
-	unsigned long		l3_settings;
-	unsigned long		l3_cache_size;
-};
-
-typedef struct ml_cpu_info ml_cpu_info_t;
-
-/* Get processor info */
-extern void				ml_cpu_get_info(
-							ml_cpu_info_t		*ml_cpu_info);
-
-extern void				ml_set_processor_speed(
-							unsigned long		speed);
-extern void				ml_set_processor_speed_slave(
-							unsigned long		speed);
-extern void				ml_set_processor_speed_dpll(
-							unsigned long		speed);
-extern void				ml_set_processor_speed_dfs(
-							unsigned long		speed);
-extern void				ml_set_processor_speed_powertune(
-							unsigned long		speed);
-
-extern void				ml_set_processor_voltage(
-							unsigned long		voltage);
-
-extern unsigned int		ml_scom_write(
-							uint32_t			reg,
-							uint64_t			data);
-
-extern unsigned int		ml_scom_read(
-							uint32_t			reg,
-							uint64_t			*data);
-
-extern uint32_t 		ml_hdec_ratio(void);
-
-extern int boffSettingsInit;
-
-#endif /* KERNEL_PRIVATE */
-
-__END_DECLS
-
-#endif /* _PPC_MACHINE_ROUTINES_H_ */
diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s
deleted file mode 100644
index 45bfb5b5e..000000000
--- a/osfmk/ppc/machine_routines_asm.s
+++ /dev/null
@@ -1,2345 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-#include <debug.h>
-#include <mach/ppc/vm_param.h>
-#include <ppc/exception.h>
-	
-    
-/*
- * ml_set_physical()		 	-- turn off DR and (if 64-bit) turn SF on
- *								   it is assumed that pf64Bit is already in cr6
- * ml_set_physical_get_ffs() 	-- turn DR off, SF on, and get feature flags 
- * ml_set_physical_disabled()	-- turn DR and EE off, SF on, get feature flags
- * ml_set_translation_off()		-- turn DR, IR, and EE off, SF on, get feature flags
- *
- * Callable only from assembler, these return:
- *	 r2 -- new MSR
- *	r11 -- old MSR
- *	r10 -- feature flags (pf64Bit etc, ie SPRG 2)
- *	cr6 -- feature flags 24-27, ie pf64Bit, pf128Byte, and pf32Byte
- *
- * Uses r0 and r2.  ml_set_translation_off also uses r3 and cr5.
- */
-
-        .align	4
-        .globl	EXT(ml_set_translation_off)
-LEXT(ml_set_translation_off)
-        mfsprg	r10,2						// get feature flags
-       	li		r0,0						; Clear this
-        mtcrf	0x02,r10					// move pf64Bit etc to cr6
-        ori		r0,r0,lo16(MASK(MSR_EE)+MASK(MSR_FP)+MASK(MSR_IR)+MASK(MSR_DR)) // turn off all 4
-        mfmsr	r11							// get MSR
-		oris	r0,r0,hi16(MASK(MSR_VEC))	// Turn off vector too
-        mtcrf	0x04,r10					// move pfNoMSRir etc to cr5
-        andc	r2,r11,r0					// turn off EE, IR, and DR
-        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
-        bf		pfNoMSRirb,ml_set_physical_32	// skip if we can load MSR directly
-        li		r0,loadMSR					// Get the MSR setter SC
-        mr		r3,r2						// copy new MSR to r2
-        sc									// Set it
-        blr
-        
-		.align	4
-		.globl	EXT(ml_set_physical_disabled)
-
-LEXT(ml_set_physical_disabled)
-		li		r0,0						; Clear
-        mfsprg	r10,2						// get feature flags
-        ori		r0,r0,lo16(MASK(MSR_EE))	// turn EE and fp off
-        mtcrf	0x02,r10					// move pf64Bit etc to cr6
-        b		ml_set_physical_join
-
-		.align	5
-		.globl	EXT(ml_set_physical_get_ffs)
-
-LEXT(ml_set_physical_get_ffs)
-        mfsprg	r10,2						// get feature flags
-        mtcrf	0x02,r10					// move pf64Bit etc to cr6
-
-		.globl	EXT(ml_set_physical)
-LEXT(ml_set_physical)
-
-        li		r0,0						// do not turn off interrupts
-
-ml_set_physical_join:
-		oris	r0,r0,hi16(MASK(MSR_VEC))	// Always gonna turn of vectors
-        mfmsr	r11							// get MSR
-        ori		r0,r0,lo16(MASK(MSR_DR)+MASK(MSR_FP))	// always turn off DR and FP bit
-        andc	r2,r11,r0					// turn off DR and maybe EE
-        bt++	pf64Bitb,ml_set_physical_64	// skip if 64-bit (only they take the hint)
-ml_set_physical_32:
-        mtmsr	r2							// turn off translation
-        isync
-        blr
-        
-ml_set_physical_64:
-        li		r0,1						// get a 1 to slam into SF
-        rldimi	r2,r0,63,MSR_SF_BIT			// set SF bit (bit 0)
-        mtmsrd	r2							// set 64-bit mode, turn off data relocation
-        isync								// synchronize
-        blr
-    
-
-/*
- * ml_restore(old_MSR)
- *
- * Callable only from assembler, restores the MSR in r11 saved by ml_set_physical.
- * We assume cr6 and r11 are as set by ml_set_physical, ie:
- *	cr6 - pf64Bit flag (feature flags 24-27)
- *	r11 - old MSR
- */
- 
-		.align	5
-		.globl	EXT(ml_restore)
-
-LEXT(ml_restore)
-        bt++	pf64Bitb,ml_restore_64		// handle 64-bit cpus (only they take the hint)
-        mtmsr	r11							// restore a 32-bit MSR
-        isync
-        blr
-        
-ml_restore_64:
-        mtmsrd	r11							// restore a 64-bit MSR
-        isync
-        blr
-
-    
-/* PCI config cycle probing
- *
- *	boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val)
- *
- *	Read the memory location at physical address paddr.
- *  This is a part of a device probe, so there is a good chance we will
- *  have a machine check here. So we have to be able to handle that.
- *  We assume that machine checks are enabled both in MSR and HIDs
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_probe_read)
-
-LEXT(ml_probe_read)
-
-			mfsprg	r9,2							; Get feature flags
-			
-			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
-			rlwinm	r3,r3,0,0,31					; Clean up for 64-bit machines
-			bne++	mpr64bit						; Go do this the 64-bit way...
-
-mpr32bit:	lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
-			mfmsr	r0								; Save the current MSR
-			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
-
-			neg		r10,r3							; Number of bytes to end of page
-			andc	r0,r0,r8						; Clear VEC and FP
-			rlwinm.	r10,r10,0,20,31					; Clear excess junk and test for page bndry
-			ori		r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, IR, and DR
-			mr		r12,r3							; Save the load address
-			andc	r2,r0,r8						; Clear VEC, FP, and EE
-			mtcrf	0x04,r9							; Set the features			
-			cmplwi	cr1,r10,4						; At least 4 bytes left in page?
-			beq-	mprdoit							; We are right on the boundary...
-			li		r3,0
-			bltlr-	cr1								; No, just return failure...
-
-mprdoit:
-
-			bt		pfNoMSRirb,mprNoMSR				; No MSR...
-
-			mtmsr	r2								; Translation and all off
-			isync									; Toss prefetch
-			b		mprNoMSRx
-			
-mprNoMSR:	
-			mr		r5,r0
-			li		r0,loadMSR						; Get the MSR setter SC
-			mr		r3,r2							; Get new MSR
-			sc										; Set it
-			mr		r0,r5
-			li		r3,0
-mprNoMSRx:
-
-			mfspr		r6, hid0					; Get a copy of hid0
-			
-			rlwinm.		r5, r9, 0, pfNoMuMMCKb, pfNoMuMMCKb		; Check for NoMuMMCK
-			bne		mprNoMuM
-			
-			rlwinm		r5, r6, 0, ice+1, ice-1				; Turn off L1 I-Cache
-			mtspr		hid0, r5
-			isync								; Wait for I-Cache off
-			rlwinm		r5, r6, 0, mum+1, mum-1				; Turn off MuM w/ I-Cache on
-			mtspr		hid0, r5
-mprNoMuM:
-
-;
-;			We need to insure that there is no more than 1 BAT register that
-;			can get a hit. There could be repercussions beyond the ken
-;			of mortal man. It is best not to tempt fate.
-;
-
-;			Note: we will reload these from the shadow BATs later
-
-			li		r10,0							; Clear a register
-			
-			sync									; Make sure all is well
-
-			mtdbatu	1,r10							; Invalidate DBAT 1 
-			mtdbatu	2,r10							; Invalidate DBAT 2 
-			mtdbatu	3,r10							; Invalidate DBAT 3  
-			
-			rlwinm	r10,r12,0,0,14					; Round down to a 128k boundary
-			ori		r11,r10,0x32					; Set uncached, coherent, R/W
-			ori		r10,r10,2						; Make the upper half (128k, valid supervisor)
-			mtdbatl	0,r11							; Set lower BAT first
-			mtdbatu	0,r10							; Now the upper
-			sync									; Just make sure
-			
-			dcbf	0,r12							; Make sure we kill the cache to avoid paradoxes
-			sync
-			
-			ori		r11,r2,lo16(MASK(MSR_DR))		; Turn on data translation
-			mtmsr	r11								; Do it for real
-			isync									; Make sure of it
-			
-			eieio									; Make sure of all previous accesses
-			sync									; Make sure it is all caught up
-			
-			lwz		r11,0(r12)						; Get it and maybe machine check here
-			
-			eieio									; Make sure of ordering again
-			sync									; Get caught up yet again
-			isync									; Do not go further till we are here
-			
-			mtmsr	r2								; Turn translation back off
-			isync
-			
-			lis		r10,hi16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
-			ori		r10,r10,lo16(EXT(shadow_BAT)+shdDBAT)	; Get shadow address
-			
-			lwz		r5,0(r10)						; Pick up DBAT 0 high
-			lwz		r6,4(r10)						; Pick up DBAT 0 low
-			lwz		r7,8(r10)						; Pick up DBAT 1 high
-			lwz		r8,16(r10)						; Pick up DBAT 2 high
-			lwz		r9,24(r10)						; Pick up DBAT 3 high
-			
-			mtdbatu	0,r5							; Restore DBAT 0 high
-			mtdbatl	0,r6							; Restore DBAT 0 low
-			mtdbatu	1,r7							; Restore DBAT 1 high
-			mtdbatu	2,r8							; Restore DBAT 2 high
-			mtdbatu	3,r9							; Restore DBAT 3 high 
-			sync
-			
-			li		r3,1							; We made it
-			
-			mtmsr	r0								; Restore translation and exceptions
-			isync									; Toss speculations
-			
-			stw		r11,0(r4)						; Save the loaded value
-			blr										; Return...
-			
-;			Force a line boundry here. This means we will be able to check addresses better
-			.align	5
-			.globl	EXT(ml_probe_read_mck)
-LEXT(ml_probe_read_mck)
-
-    
-/* PCI config cycle probing - 64-bit
- *
- *	boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val)
- *
- *	Read the memory location at physical address paddr.
- *  This is a part of a device probe, so there is a good chance we will
- *  have a machine check here. So we have to be able to handle that.
- *  We assume that machine checks are enabled both in MSR and HIDs
- */
-
-;			Force a line boundry here
-			.align	6
-			.globl	EXT(ml_probe_read_64)
-
-LEXT(ml_probe_read_64)
-
-			mfsprg	r9,2							; Get feature flags
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwinm.	r0,r9,0,pf64Bitb,pf64Bitb		; Are we on a 64-bit machine?
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-			
-			mr		r4,r5							; Move result to common register
-			beq--	mpr32bit						; Go do this the 32-bit way...
-
-mpr64bit:	andi.	r0,r3,3							; Check if we are on a word boundary
-			li		r0,0							; Clear the EE bit (and everything else for that matter)
-			bne--	mprFail							; Boundary not good...
-			mfmsr	r11								; Get the MSR
-			mtmsrd	r0,1							; Set the EE bit only (do not care about RI)
-			rlwinm	r11,r11,0,MSR_EE_BIT,MSR_EE_BIT	; Isolate just the EE bit
-			mfmsr	r10								; Refresh our view of the MSR (VMX/FP may have changed)
-			or		r12,r10,r11						; Turn on EE if on before we turned it off
-			ori		r0,r0,lo16(MASK(MSR_IR)|MASK(MSR_DR))	; Get the IR and DR bits
-			li		r2,1							; Get a 1
-			sldi	r2,r2,63						; Get the 64-bit bit
-			andc	r10,r10,r0						; Clear IR and DR
-			or		r10,r10,r2						; Set 64-bit
-			
-			li		r0,1							; Get a 1
-			mtmsrd	r10								; Translation and EE off, 64-bit on
-			isync			
-			
-			sldi	r0,r0,32+8						; Get the right bit to inhibit caching
-
-			mfspr	r8,hid4							; Get HID4
-			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
-			sync									; Sync up
-			mtspr	hid4,r2							; Make real accesses cache-inhibited
-			isync									; Toss prefetches
-			
-			lis		r7,0xE000						; Get the unlikeliest ESID possible
-			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
-			slbie	r7								; Make sure the ERAT is cleared 
-			
-			sync
-			isync
-
-			eieio									; Make sure of all previous accesses
-			
-			lwz		r11,0(r3)						; Get it and maybe machine check here
-			
-			eieio									; Make sure of ordering again
-			sync									; Get caught up yet again
-			isync									; Do not go further till we are here
-
-			sync									; Sync up
-			mtspr	hid4,r8							; Make real accesses not cache-inhibited
-			isync									; Toss prefetches
-
-			lis		r7,0xE000						; Get the unlikeliest ESID possible
-			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
-			slbie	r7								; Make sure the ERAT is cleared 
-
-			mtmsrd	r12								; Restore entry MSR
-			isync
-			
-			stw		r11,0(r4)						; Pass back the result
-			li		r3,1							; Indicate success
-			blr										; Leave...
-
-mprFail:	li		r3,0							; Set failure
-			blr										; Leave...
-
-;			Force a line boundry here. This means we will be able to check addresses better
-			.align	6
-			.globl	EXT(ml_probe_read_mck_64)
-LEXT(ml_probe_read_mck_64)
-
-
-/* Read physical address byte
- *
- *	unsigned int ml_phys_read_byte(vm_offset_t paddr)
- *	unsigned int ml_phys_read_byte_64(addr64_t paddr)
- *
- *	Read the byte at physical address paddr. Memory should not be cache inhibited.
- */
-
-;			Force a line boundry here
-
-			.align	5
-			.globl	EXT(ml_phys_read_byte_64)
-
-LEXT(ml_phys_read_byte_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
-            b		ml_phys_read_byte_join			
-
-			.globl	EXT(ml_phys_read_byte)
-
-LEXT(ml_phys_read_byte)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_read_byte_join:								; r3 = address to read (reg64_t)
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			lbz		r3,0(r3)						; Get the byte
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Read physical address half word
- *
- *	unsigned int ml_phys_read_half(vm_offset_t paddr)
- *	unsigned int ml_phys_read_half_64(addr64_t paddr)
- *
- *	Read the half word at physical address paddr. Memory should not be cache inhibited.
- */
-
-;			Force a line boundry here
-
-			.align	5
-			.globl	EXT(ml_phys_read_half_64)
-
-LEXT(ml_phys_read_half_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
-            b		ml_phys_read_half_join		
-
-			.globl	EXT(ml_phys_read_half)
-
-LEXT(ml_phys_read_half)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_read_half_join:								; r3 = address to read (reg64_t)
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			lhz		r3,0(r3)						; Get the half word
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Read physical address word
- *
- *	unsigned int ml_phys_read(vm_offset_t paddr)
- *	unsigned int ml_phys_read_64(addr64_t paddr)
- *	unsigned int ml_phys_read_word(vm_offset_t paddr)
- *	unsigned int ml_phys_read_word_64(addr64_t paddr)
- *
- *	Read the word at physical address paddr. Memory should not be cache inhibited.
- */
-
-;			Force a line boundry here
-
-			.align	5
-			.globl	EXT(ml_phys_read_64)
-			.globl	EXT(ml_phys_read_word_64)
-
-LEXT(ml_phys_read_64)
-LEXT(ml_phys_read_word_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits
-            b		ml_phys_read_word_join		
-
-			.globl	EXT(ml_phys_read)
-			.globl	EXT(ml_phys_read_word)
-
-LEXT(ml_phys_read)
-LEXT(ml_phys_read_word)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_read_word_join:								; r3 = address to read (reg64_t)
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			lwz		r3,0(r3)						; Get the word
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Read physical address double word
- *
- *	unsigned long long ml_phys_read_double(vm_offset_t paddr)
- *	unsigned long long ml_phys_read_double_64(addr64_t paddr)
- *
- *	Read the double word at physical address paddr. Memory should not be cache inhibited.
- */
-
-;			Force a line boundry here
-
-			.align	5
-			.globl	EXT(ml_phys_read_double_64)
-
-LEXT(ml_phys_read_double_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-            b		ml_phys_read_double_join		
-
-			.globl	EXT(ml_phys_read_double)
-
-LEXT(ml_phys_read_double)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_read_double_join:							; r3 = address to read (reg64_t)
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			lwz		r4,4(r3)						; Get the low word
-			lwz		r3,0(r3)						; Get the high word
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Write physical address byte
- *
- *	void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
- *	void ml_phys_write_byte_64(addr64_t paddr, unsigned int data)
- *
- *	Write the byte at physical address paddr. Memory should not be cache inhibited.
- */
-
-			.align	5
-			.globl	EXT(ml_phys_write_byte_64)
-
-LEXT(ml_phys_write_byte_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-			mr		r4,r5							; Copy over the data
-            b		ml_phys_write_byte_join
-
-			.globl	EXT(ml_phys_write_byte)
-
-LEXT(ml_phys_write_byte)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_write_byte_join:							; r3 = address to write (reg64_t), r4 = data
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			stb		r4,0(r3)						; Set the byte
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Write physical address half word
- *
- *	void ml_phys_write_half(vm_offset_t paddr, unsigned int data)
- *	void ml_phys_write_half_64(addr64_t paddr, unsigned int data)
- *
- *	Write the half word at physical address paddr. Memory should not be cache inhibited.
- */
-
-			.align	5
-			.globl	EXT(ml_phys_write_half_64)
-
-LEXT(ml_phys_write_half_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-			mr		r4,r5							; Copy over the data
-            b		ml_phys_write_half_join
-
-			.globl	EXT(ml_phys_write_half)
-
-LEXT(ml_phys_write_half)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_write_half_join:							; r3 = address to write (reg64_t), r4 = data
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			sth		r4,0(r3)						; Set the half word
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Write physical address word
- *
- *	void ml_phys_write(vm_offset_t paddr, unsigned int data)
- *	void ml_phys_write_64(addr64_t paddr, unsigned int data)
- *	void ml_phys_write_word(vm_offset_t paddr, unsigned int data)
- *	void ml_phys_write_word_64(addr64_t paddr, unsigned int data)
- *
- *	Write the word at physical address paddr. Memory should not be cache inhibited.
- */
-
-			.align	5
-			.globl	EXT(ml_phys_write_64)
-			.globl	EXT(ml_phys_write_word_64)
-
-LEXT(ml_phys_write_64)
-LEXT(ml_phys_write_word_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-			mr		r4,r5							; Copy over the data
-            b		ml_phys_write_word_join
-
-			.globl	EXT(ml_phys_write)
-			.globl	EXT(ml_phys_write_word)
-
-LEXT(ml_phys_write)
-LEXT(ml_phys_write_word)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_write_word_join:							; r3 = address to write (reg64_t), r4 = data
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			stw		r4,0(r3)						; Set the word
-			b		rdwrpost						; Clean up and leave...
-
-
-/* Write physical address double word
- *
- *	void ml_phys_write_double(vm_offset_t paddr, unsigned long long data)
- *	void ml_phys_write_double_64(addr64_t paddr, unsigned long long data)
- *
- *	Write the double word at physical address paddr. Memory should not be cache inhibited.
- */
-
-			.align	5
-			.globl	EXT(ml_phys_write_double_64)
-
-LEXT(ml_phys_write_double_64)
-
-			rlwinm	r3,r3,0,1,0						; Copy low 32 bits to top 32
-			rlwimi	r3,r4,0,0,31					; Insert low part of 64-bit address in bottom 32 bits			
-			mr		r4,r5							; Copy over the high data
-			mr		r5,r6							; Copy over the low data
-            b		ml_phys_write_double_join
-
-			.globl	EXT(ml_phys_write_double)
-
-LEXT(ml_phys_write_double)
-            rlwinm   r3,r3,0,0,31    				; truncate address to 32-bits
-ml_phys_write_double_join:							; r3 = address to write (reg64_t), r4,r5 = data (long long)
-			mflr	r11								; Save the return
-			bl		rdwrpre							; Get set up, translation/interrupts off, 64-bit on, etc.
-			
-			stw		r4,0(r3)						; Set the high word
-			stw		r5,4(r3)						; Set the low word
-			b		rdwrpost						; Clean up and leave...
-
-
-			.align	5
-
-rdwrpre:	mfsprg	r12,2							; Get feature flags 
-			lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
-			mfmsr	r10								; Save the MSR 
-			ori		r8,r8,lo16(MASK(MSR_FP))		; Add the FP flag
-			mtcrf	0x02,r12						; move pf64Bit
-			andc	r10,r10,r8						; Clear VEC and FP
-			ori		r9,r8,lo16(MASK(MSR_EE)|MASK(MSR_IR)|MASK(MSR_DR))		; Drop EE, DR, and IR
-			li		r2,1							; Prepare for 64 bit
-			andc	r9,r10,r9						; Clear VEC, FP, DR, and EE
-			bf--	pf64Bitb,rdwrpre32				; Join 32-bit code...
-			
-			srdi	r7,r3,31						; Get a 1 if address is in I/O memory
-			rldimi	r9,r2,63,MSR_SF_BIT				; set SF bit (bit 0)
-			cmpldi	cr7,r7,1						; Is source in I/O memory?
-			mtmsrd	r9								; set 64-bit mode, turn off EE, DR, and IR
-			isync									; synchronize
-
-			sldi	r0,r2,32+8						; Get the right bit to turn off caching
-			
-			bnelr++	cr7								; We are not in the I/O area, all ready...
-			
-			mfspr	r8,hid4							; Get HID4
-			or		r2,r8,r0						; Set bit to make real accesses cache-inhibited
-			sync									; Sync up
-			mtspr	hid4,r2							; Make real accesses cache-inhibited
-			isync									; Toss prefetches
-			
-			lis		r7,0xE000						; Get the unlikeliest ESID possible
-			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
-			slbie	r7								; Make sure the ERAT is cleared 
-			
-			sync
-			isync
-			blr										; Finally,  all ready...
-	
-			.align	5
-			
-rdwrpre32:	rlwimi	r9,r10,0,MSR_IR_BIT,MSR_IR_BIT	; Leave the IR bit unchanged
-			mtmsr	r9								; Drop EE, DR, and leave IR unchanged
-			isync
-			blr										; All set up, leave...
-			
-			.align	5
-			
-rdwrpost:	mtlr	r11								; Restore the return
-			bt++	pf64Bitb,rdwrpost64				; Join 64-bit code...
-			
-			mtmsr	r10								; Restore entry MSR (sans FP and VEC)
-			isync
-			blr										; Leave...
-			
-rdwrpost64:	bne++	cr7,rdwrpcok					; Skip enabling real mode caching if we did not change it...
-
-			sync									; Sync up
-			mtspr	hid4,r8							; Make real accesses not cache-inhibited
-			isync									; Toss prefetches
-
-			lis		r7,0xE000						; Get the unlikeliest ESID possible
-			srdi	r7,r7,1							; Make 0x7FFFFFFFF0000000
-			slbie	r7								; Make sure the ERAT is cleared 
-
-rdwrpcok:	mtmsrd	r10								; Restore entry MSR (sans FP and VEC)
-			isync
-			blr										; Leave...
-
-
-/* set interrupts enabled or disabled
- *
- *	boolean_t set_interrupts_enabled(boolean_t enable)
- *
- *	Set EE bit to "enable" and return old value as boolean
- */
-
-;			Force a line boundry here
-			.align  5
-			.globl  EXT(ml_set_interrupts_enabled)
- 
-LEXT(ml_set_interrupts_enabled)
-
-			andi.   r4,r3,1							; Are we turning interruptions on?
-			lis		r0,hi16(MASK(MSR_VEC))			; Get vector enable
-			mfmsr	r5								; Get the current MSR
-			ori		r0,r0,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Get float enable and EE enable
-			rlwinm	r3,r5,17,31,31					; Set return value
-			andc	r5,r5,r0						; Force VEC and FP off
-			bne	    CheckPreemption					; Interrupts going on, check ASTs...
-
-			mtmsr   r5                              ; Slam diable (always going disabled here)
-			isync									; Need this because FP/Vec might go off
-			blr
-
-			.align	5
-
-CheckPreemption:
-			mfsprg	r9,1							; Get current activation
-			lwz		r7,ACT_PER_PROC(r9)				; Get the per_proc block
-			ori		r5,r5,lo16(MASK(MSR_EE))		; Turn on the enable
-			lwz		r8,PP_PENDING_AST(r7)			; Get pending AST mask
-			li		r6,AST_URGENT					; Get the type we will preempt for 
-			lwz		r7,ACT_PREEMPT_CNT(r9)			; Get preemption count
-			lis		r0,hi16(DoPreemptCall)			; High part of Preempt FW call
-			cmpwi	cr1,r7,0						; Are preemptions masked off?
-			and.	r8,r8,r6						; Are we urgent?
-			crorc	cr1_eq,cr0_eq,cr1_eq			; Remember if preemptions are masked or not urgent
-			ori		r0,r0,lo16(DoPreemptCall)   	; Bottome of FW call
-
-			mtmsr	r5								; Restore the MSR now, before we can preempt
-			isync									; Need this because FP/Vec might go off
-
-			beqlr++	cr1								; Return if no premption...
-			sc										; Preempt
-			blr
-
-;			Force a line boundry here
-			.align  5
-			.globl  EXT(timer_update)
- 
-LEXT(timer_update)
-			stw		r4,TIMER_HIGHCHK(r3)
-			eieio
-			stw		r5,TIMER_LOW(r3)
-			eieio
-	 		stw		r4,TIMER_HIGH(r3)
-			blr
-
-;			Force a line boundry here
-			.align  5
-			.globl  EXT(timer_grab)
- 
-LEXT(timer_grab)
-0:			lwz		r11,TIMER_HIGH(r3)
-			lwz		r4,TIMER_LOW(r3)
-			isync
-			lwz		r9,TIMER_HIGHCHK(r3)
-			cmpw	r11,r9
-			bne--	0b
-			mr		r3,r11
-			blr
-
-;			Force a line boundry here
-			.align  5
-			.globl  EXT(thread_timer_event)
- 
-LEXT(thread_timer_event)
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block
-			addi	r10,r10,PP_PROCESSOR
-			lwz		r11,THREAD_TIMER(r10)
-
-			lwz		r9,TIMER_LOW(r11)
-			lwz		r7,TIMER_TSTAMP(r11)
-			lwz		r8,TIMER_TSTAMP+4(r11)
-			subfc	r8,r8,r4
-			subfe	r7,r7,r3
-			addc	r8,r8,r9
-			addze.	r7,r7
-			beq++	0f
-
-			lwz		r6,TIMER_HIGH(r11)
-			add		r7,r7,r6
-			stw		r7,TIMER_HIGHCHK(r11)
-			eieio
-			stw		r8,TIMER_LOW(r11)
-			eieio
-	 		stw		r7,TIMER_HIGH(r11)
-			b		1f
-
-0:			stw		r8,TIMER_LOW(r11)
-
-1:			stw		r5,THREAD_TIMER(r10)
-			stw		r3,TIMER_TSTAMP(r5)
-			stw		r4,TIMER_TSTAMP+4(r5)
-			blr
-
-;			Force a line boundry here
-			.align  5
-			.globl  EXT(state_event)
- 
-LEXT(state_event)
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block
-			addi	r10,r10,PP_PROCESSOR
-			lwz		r11,CURRENT_STATE(r10)
-
-			lwz		r9,TIMER_LOW(r11)
-			lwz		r7,TIMER_TSTAMP(r11)
-			lwz		r8,TIMER_TSTAMP+4(r11)
-			subfc	r8,r8,r4
-			subfe	r7,r7,r3
-			addc	r8,r8,r9
-			addze.	r7,r7
-			beq++	0f
-
-			lwz		r6,TIMER_HIGH(r11)
-			add		r7,r7,r6
-			stw		r7,TIMER_HIGHCHK(r11)
-			eieio
-			stw		r8,TIMER_LOW(r11)
-			eieio
-	 		stw		r7,TIMER_HIGH(r11)
-			b		1f
-
-0:			stw		r8,TIMER_LOW(r11)
-
-1:			stw		r5,CURRENT_STATE(r10)
-			stw		r3,TIMER_TSTAMP(r5)
-			stw		r4,TIMER_TSTAMP+4(r5)
-			blr
-
-/*  Set machine into idle power-saving mode. 
- *
- *	void machine_idle(void)
- *
- *	We will use the PPC NAP or DOZE for this. 
- *	This call always returns.  Must be called with spllo (i.e., interruptions
- *	enabled).
- *
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(machine_idle)
-
-LEXT(machine_idle)
-
-			mfsprg	r12,1							; Get the current activation
-			lwz		r12,ACT_PER_PROC(r12)			; Get the per_proc block
-			lhz		r10,PP_CPU_FLAGS(r12)			; Get the flags
-			lwz		r11,PP_INTS_ENABLED(r12)		; Get interrupt enabled state
-			andi.	r10,r10,SignalReady				; Are Signal ready?
-			cmpwi	cr1,r11,0						; Are interrupt disabled?
-			cror	cr0_eq, cr1_eq, cr0_eq			; Interrupt disabled or Signal not ready?
-			mfmsr	r3								; Save the MSR 
-			
-			beq--	nonap							; Yes, return after re-enabling interrupts
-			lis		r0,hi16(MASK(MSR_VEC))			; Get the vector flag
-			ori		r0,r0,lo16(MASK(MSR_FP))		; Add the FP flag
-			andc	r3,r3,r0						; Clear VEC and FP
-			ori		r0,r0,lo16(MASK(MSR_EE))		; Drop EE also
-			andc	r5,r3,r0						; Clear VEC, FP, DR, and EE
-
-			mtmsr	r5								; Hold up interruptions for now
-			isync									; May have messed with fp/vec
-			mfsprg	r11,2							; Get CPU specific features
-			mfspr	r6,hid0							; Get the current power-saving mode
-			mtcrf	0xC7,r11						; Get the facility flags
-
-			lis		r4,hi16(napm)					; Assume we can nap
-			bt		pfWillNapb,yesnap				; Yeah, nap is ok...
-			
-			lis		r4,hi16(dozem)					; Assume we can doze
-			bt		pfCanDozeb,yesnap				; We can sleep or doze one this machine...
-
-nonap:		ori		r3,r3,lo16(MASK(MSR_EE))		; Flip on EE
-			
-			mtmsr	r3								; Turn interruptions back on
-			blr										; Leave...
-
-yesnap:		mftbu	r9								; Get the upper timebase
-			mftb	r7								; Get the lower timebase
-			mftbu	r8								; Get the upper one again
-			cmplw	r9,r8							; Did the top tick?
-			bne--	yesnap							; Yeah, need to get it again...
-			stw		r8,napStamp(r12)				; Set high order time stamp
-			stw		r7,napStamp+4(r12)				; Set low order nap stamp
-
-			rlwinm.	r0,r11,0,pfAltivecb,pfAltivecb	; Do we have altivec?
-			beq--	minovec							; No...
-			dssall									; Stop the streams before we nap/doze
-			sync
-			lwz		r8,napStamp(r12)				; Reload high order time stamp
-clearpipe:
-			cmplw	r8,r8
-			bne-	clearpipe
-			isync
-
-minovec:	rlwinm.	r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before nap?
-			beq++	miL2PFok
-
-			mfspr	r7,msscr0						; Get currect MSSCR0 value
-			rlwinm	r7,r7,0,0,l2pfes-1				; Disable L2 Prefetch
-			mtspr	msscr0,r7						; Updates MSSCR0 value
-			sync
-			isync
-
-miL2PFok:
-			rlwinm.	r7,r11,0,pfSlowNapb,pfSlowNapb	; Should nap at slow speed?
-			beq	minoslownap
-
-			mfspr	r7,hid1							; Get current HID1 value
-			oris	r7,r7,hi16(hid1psm)				; Select PLL1
-			mtspr	hid1,r7							; Update HID1 value
-
-
-;
-;			We have to open up interruptions here because book 4 says that we should
-;			turn on only the POW bit and that we should have interrupts enabled.
-;			The interrupt handler will detect that nap or doze is set if an interrupt
-;			is taken and set everything up to return directly to machine_idle_ret.
-;			So, make sure everything we need there is already set up...
-;
-
-minoslownap:
-			lis		r10,hi16(dozem|napm|sleepm)		; Mask of power management bits
-		
-			bf--	pf64Bitb,mipNSF1				; skip if 32-bit...
-			
-			sldi	r4,r4,32						; Position the flags
-			sldi	r10,r10,32						; Position the masks
-
-mipNSF1:	li		r2,lo16(MASK(MSR_DR)|MASK(MSR_IR))	; Get the translation mask
-			andc	r6,r6,r10						; Clean up the old power bits		
-			ori		r7,r5,lo16(MASK(MSR_EE))		; Flip on EE to make exit msr
-			andc	r5,r5,r2						; Clear IR and DR from current MSR
-			or		r6,r6,r4						; Set nap or doze
-			ori		r5,r5,lo16(MASK(MSR_EE))		; Flip on EE to make nap msr
-			oris	r2,r5,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
-			
-			sync
-			mtspr	hid0,r6							; Set up the HID for nap/doze
-			mfspr	r6,hid0							; Yes, this is silly, keep it here
-			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r6,hid0							; Yes, this is a duplicate, keep it here
-			isync									; Make sure it is set
-
-
-;
-;			Turn translation off to nap
-;
-
-			bt		pfNoMSRirb,miNoMSR				; Jump if we need to use SC for this...
-			mtmsr	r5								; Turn translation off, interrupts on
-			isync									; Wait for it
-			b		miNoMSRx						; Jump back in line...
-			
-miNoMSR:	mr		r3,r5							; Pass in the new MSR value 
-			li		r0,loadMSR						; MSR setter ultrafast
-			sc										; Do it to it like you never done before...
-
-miNoMSRx:	bf--	pf64Bitb,mipowloop				; skip if 32-bit...
-			
-			li		r3,0x10							; Fancy nap threshold is 0x10 ticks
-			mftb	r8								; Get the low half of the time base
-			mfdec	r4								; Get the decrementer ticks
-			cmplw	r4,r3							; Less than threshold?
-			blt		mipowloop
-			
-			mtdec	r3								; Load decrementer with threshold
-			isync									; and make sure,
-			mfdec	r3								; really sure, it gets there
-			
-			rlwinm	r6,r2,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Clear out the EE bit
-			sync									; Make sure queues are clear
-			mtmsr	r6								; Set MSR with EE off but POW on
-			isync									; Make sure this takes before we proceed
-			
-			mftb	r9								; Get the low half of the time base
-			sub		r9,r9,r8						; Get the number of ticks spent waiting
-			sub		r4,r4,r9						; Adjust the decrementer value
-			
-			mtdec	r4								; Load decrementer with the rest of the timeout
-			isync									; and make sure,
-			mfdec	r4								; really sure, it gets there
-			
-mipowloop:
-			sync									; Make sure queues are clear
-			mtmsr	r2								; Nap or doze, MSR with POW, EE set, translation off
-			isync									; Make sure this takes before we proceed
-			b		mipowloop						; loop if POW does not take
-
-;
-;			Note that the interrupt handler will turn off the nap/doze bits in the hid.
-;			Also remember that the interrupt handler will force return to here whenever
-;			the nap/doze bits are set.
-;
-			.globl	EXT(machine_idle_ret)
-LEXT(machine_idle_ret)
-			mtmsr	r7								; Make sure the MSR is what we want
-			isync									; In case we turn on translation
-;
-;			Protect against a lost decrementer trap if the current decrementer value is negative
-;			by more than 10 ticks, re-arm it since it is unlikely to fire at this point...
-;			A hardware interrupt got us out of machine_idle and may also be contributing to this state
-; 
-			mfdec	r6								; Get decrementer
-			cmpwi	cr0,r6,-10						; Compare decrementer with -10
-			bgelr++									; Return if greater
-			li		r0,1							; Load 1
-			mtdec	r0								; Set decrementer to 1
-			blr										; Return...
-
-/*  Put machine to sleep. 
- *	This call never returns. We always exit sleep via a soft reset.
- *	All external interruptions must be drained at this point and disabled.
- *
- *	void ml_ppc_do_sleep(void)
- *
- *	We will use the PPC SLEEP for this. 
- *
- *	There is one bit of hackery in here: we need to enable for
- *	interruptions when we go to sleep and there may be a pending
- *	decrimenter rupt.  So we make the decrimenter 0x7FFFFFFF and enable for
- *	interruptions. The decrimenter rupt vector recognizes this and returns
- *	directly back here.
- *
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_ppc_do_sleep)
-
-LEXT(ml_ppc_do_sleep)
-
-#if 0
-			mfmsr	r5								; Hack to spin instead of sleep 
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation	
-			rlwinm	r5,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
-			mtmsr	r5								; No talking
-			isync
-			
-deadsleep:	addi	r3,r3,1							; Make analyzer happy
-			addi	r3,r3,1
-			addi	r3,r3,1
-			b		deadsleep						; Die the death of 1000 joys...
-#endif	
-			
-			mfsprg	r12,1							; Get the current activation
-			lwz		r12,ACT_PER_PROC(r12)			; Get the per_proc block
-			mfsprg	r11,2							; Get CPU specific features
-			eqv		r10,r10,r10						; Get all foxes
-			mtcrf	0x04,r11						; move pfNoMSRirb to cr5
-			mfspr	r4,hid0							; Get the current power-saving mode
-			mtcrf	0x02,r11						; move pf64Bit to cr6
-
-			rlwinm.	r5,r11,0,pfNoL2PFNapb,pfNoL2PFNapb	; Turn off L2 Prefetch before sleep?
-			beq	mpsL2PFok
-
-			mfspr	r5,msscr0						; Get currect MSSCR0 value
-			rlwinm	r5,r5,0,0,l2pfes-1				; Disable L2 Prefetch
-			mtspr	msscr0,r5						; Updates MSSCR0 value
-			sync
-			isync
-
-mpsL2PFok:
-			bt++	pf64Bitb,mpsPF64bit				; PM bits are shifted on 64bit systems.
-
-			rlwinm	r4,r4,0,sleep+1,doze-1			; Clear all possible power-saving modes (not DPM though)
-			oris	r4,r4,hi16(sleepm)				; Set sleep
-			b		mpsClearDEC
-
-mpsPF64bit:
-			lis		r5, hi16(dozem|napm|sleepm)		; Clear all possible power-saving modes (not DPM though)
-			sldi	r5, r5, 32
-			andc	r4, r4, r5
-			lis		r5, hi16(napm)					; Set sleep
-			sldi	r5, r5, 32
-			or		r4, r4, r5
-
-mpsClearDEC:
-			mfmsr	r5								; Get the current MSR
-			rlwinm	r10,r10,0,1,31					; Make 0x7FFFFFFF
-			mtdec	r10								; Load decrimenter with 0x7FFFFFFF
-			isync									; and make sure,
-			mfdec	r9								; really sure, it gets there
-			
-			li		r2,1							; Prepare for 64 bit
-			rlwinm	r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
-;
-;			Note that we need translation off before we set the HID to sleep.  Otherwise
-;			we will ignore any PTE misses that occur and cause an infinite loop.
-;
-			bf++	pf64Bitb,mpsCheckMSR			; check 64-bit processor
-			rldimi	r5,r2,63,MSR_SF_BIT				; set SF bit (bit 0)
-			mtmsrd	r5								; set 64-bit mode, turn off EE, DR, and IR
-			isync									; Toss prefetch                           
-			b		mpsNoMSRx
-
-mpsCheckMSR:
-			bt		pfNoMSRirb,mpsNoMSR				; No MSR...
-
-			mtmsr	r5								; Translation off
-			isync									; Toss prefetch
-			b		mpsNoMSRx
-			
-mpsNoMSR:	
-			li		r0,loadMSR						; Get the MSR setter SC
-			mr		r3,r5							; Get new MSR
-			sc										; Set it
-mpsNoMSRx:
-
-			ori		r3,r5,lo16(MASK(MSR_EE))		; Flip on EE
-			sync
-			mtspr	hid0,r4							; Set up the HID to sleep
-			mfspr	r4,hid0							; Yes, this is silly, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-
-			mtmsr	r3								; Enable for interrupts to drain decrimenter
-				
-			add		r6,r4,r5						; Just waste time
-			add		r6,r6,r4						; A bit more
-			add		r6,r6,r5						; A bit more
-
-			mtmsr	r5								; Interruptions back off
-			isync									; Toss prefetch
-
-;
-;			We are here with translation off, interrupts off, all possible
-;			interruptions drained off, and a decrimenter that will not pop.
-;
-
-			bl		EXT(cacheInit)					; Clear out the caches.  This will leave them on
-			bl		EXT(cacheDisable)				; Turn off all caches
-			
-			mfmsr	r5								; Get the current MSR
-			oris	r5,r5,hi16(MASK(MSR_POW))		; Turn on power management in next MSR
-													; Leave EE off because power goes off shortly
-			mfsprg	r12,0							; Get the per_proc_info
-			li		r10,PP_CPU_FLAGS
-			lhz		r11,PP_CPU_FLAGS(r12)			; Get the flags
-			ori		r11,r11,SleepState				; Marked SleepState
-			sth		r11,PP_CPU_FLAGS(r12)			; Set the flags
-			dcbf	r10,r12
-			
-			mfsprg	r11,2							; Get CPU specific features
-			rlwinm.	r0,r11,0,pf64Bitb,pf64Bitb		; Test for 64 bit processor
-			eqv		r4,r4,r4						; Get all foxes
-			rlwinm	r4,r4,0,1,31					; Make 0x7FFFFFFF
-			beq		slSleepNow						; skip if 32-bit...
-			li		r3, 0x4000						; Cause decrimenter to roll over soon
-			mtdec	r3								; Load decrimenter with 0x00004000
-			isync									; and make sure,
-			mfdec	r3								; really sure, it gets there
-			
-slSleepNow:
-			sync									; Sync it all up
-			mtmsr	r5								; Do sleep with interruptions enabled
-			isync									; Take a pill
-			mtdec	r4								; Load decrimenter with 0x7FFFFFFF
-			isync									; and make sure,
-			mfdec	r3								; really sure, it gets there
-			b		slSleepNow						; Go back to sleep if we wake up...
-			
-
-
-/*  Initialize all caches including the TLBs
- *
- *	void cacheInit(void)
- *
- *	This is used to force the caches to an initial clean state.  First, we 
- *	check if the cache is on, if so, we need to flush the contents to memory.
- *	Then we invalidate the L1. Next, we configure and invalidate the L2 etc.
- *	Finally we turn on all of the caches
- *
- *	Note that if translation is not disabled when this is called, the TLB will not
- *	be completely clear after return.
- *
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(cacheInit)
-
-LEXT(cacheInit)
-
-			mfsprg	r12,0							; Get the per_proc_info
-			mfspr	r9,hid0							; Get the current power-saving mode
-			
-			mfsprg	r11,2							; Get CPU specific features
-			mfmsr	r7								; Get the current MSR
-			rlwinm	r7,r7,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-			rlwinm	r7,r7,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-			rlwimi	r11,r11,pfLClckb+1,31,31		; Move pfLClck to another position (to keep from using non-volatile CRs)
-			rlwinm	r5,r7,0,MSR_DR_BIT+1,MSR_IR_BIT-1	; Turn off translation		
-			rlwinm	r5,r5,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
-			mtcrf	0x87,r11						; Get the feature flags
-			lis		r10,hi16(dozem|napm|sleepm|dpmm)	; Mask of power management bits
-			bf--	pf64Bitb,cIniNSF1				; Skip if 32-bit...
-			
-			sldi	r10,r10,32						; Position the masks
-
-cIniNSF1:	andc	r4,r9,r10						; Clean up the old power bits		
-			mtspr	hid0,r4							; Set up the HID
-			mfspr	r4,hid0							; Yes, this is silly, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r4,hid0							; Yes, this is a duplicate, keep it here
-
-			bt		pfNoMSRirb,ciNoMSR				; No MSR...
-
-			mtmsr	r5								; Translation and all off
-			isync									; Toss prefetch
-			b		ciNoMSRx
-			
-ciNoMSR:	
-			li		r0,loadMSR						; Get the MSR setter SC
-			mr		r3,r5							; Get new MSR
-			sc										; Set it
-ciNoMSRx:
-			
-			bf		pfAltivecb,cinoDSS				; No Altivec here...
-			
-			dssall									; Stop streams
-			sync
-
-cinoDSS:	li		r5,tlbieLock					; Get the TLBIE lock
-			li		r0,128							; Get number of TLB entries
-			
-			li		r6,0							; Start at 0
-			bf--	pf64Bitb,citlbhang				; Skip if 32-bit...
-			li		r0,1024							; Get the number of TLB entries
-
-citlbhang:	lwarx	r2,0,r5							; Get the TLBIE lock
-			mr.		r2,r2							; Is it locked?
-			bne-	citlbhang						; It is locked, go wait...
-			stwcx.	r0,0,r5							; Try to get it
-			bne-	citlbhang						; We was beat...
-
-			mtctr	r0								; Set the CTR
-			
-cipurgeTLB:	tlbie	r6								; Purge this entry
-			addi	r6,r6,4096						; Next page
-			bdnz	cipurgeTLB						; Do them all...
-			
-			mtcrf	0x80,r11						; Set SMP capability
-			sync									; Make sure all TLB purges are done
-			eieio									; Order, order in the court
-			
-			bf		pfSMPcapb,cinoSMP				; SMP incapable...
-			
-			tlbsync									; Sync all TLBs
-			sync
-			isync
-			
-			bf--	pf64Bitb,cinoSMP				; Skip if 32-bit...
-			ptesync									; Wait for quiet again
-			sync
-			
-cinoSMP:	stw		r2,tlbieLock(0)					; Unlock TLBIE lock
-
-			bt++	pf64Bitb,cin64					; Skip if 64-bit...
-
-			rlwinm.	r0,r9,0,ice,dce					; Were either of the level 1s on?
-			beq-	cinoL1							; No, no need to flush...
-			
-            rlwinm.	r0,r11,0,pfL1fab,pfL1fab		; do we have L1 flush assist?
-			beq		ciswdl1							; If no hw flush assist, go do by software...
-			
-			mfspr	r8,msscr0						; Get the memory system control register
-			oris	r8,r8,hi16(dl1hwfm)				; Turn on the hardware flush request
-			
-			mtspr	msscr0,r8						; Start the flush operation
-			
-ciwdl1f:	mfspr	r8,msscr0						; Get the control register again
-			
-			rlwinm.	r8,r8,0,dl1hwf,dl1hwf			; Has the flush request been reset yet?
-			bne		ciwdl1f							; No, flush is still in progress...
-			b		ciinvdl1						; Go invalidate l1...
-			
-;
-;			We need to either make this very complicated or to use ROM for
-;			the flush.  The problem is that if during the following sequence a
-;			snoop occurs that invalidates one of the lines in the cache, the
-;			PLRU sequence will be altered making it possible to miss lines
-;			during the flush.  So, we either need to dedicate an area of RAM
-;			to each processor, lock use of a RAM area, or use ROM.  ROM is
-;			by far the easiest. Note that this is not an issue for machines
-;			that have harware flush assists.
-;
-
-ciswdl1:	lwz		r0,pfl1dSize(r12)				; Get the level 1 cache size
-					
-			bf		31,cisnlck						; Skip if pfLClck not set...
-			
-			mfspr	r4,msscr0						; ?
-			rlwinm	r6,r4,0,0,l2pfes-1				; ?
-			mtspr	msscr0,r6						; Set it
-			sync
-			isync
-			
-			mfspr	r8,ldstcr						; Save the LDSTCR
-			li		r2,1							; Get a mask of 0x01
-			lis		r3,0xFFF0						; Point to ROM
-			rlwinm	r11,r0,29,3,31					; Get the amount of memory to handle all indexes
-
-			li		r6,0							; Start here
-			
-cisiniflsh:	dcbf	r6,r3							; Flush each line of the range we use
-			addi	r6,r6,32						; Bump to the next
-			cmplw	r6,r0							; Have we reached the end?
-			blt+	cisiniflsh						; Nope, continue initial flush...
-			
-			sync									; Make sure it is done
-	
-			addi	r11,r11,-1						; Get mask for index wrap	
-			li		r6,0							; Get starting offset
-						
-cislckit:	not		r5,r2							; Lock all but 1 way
-			rlwimi	r5,r8,0,0,23					; Build LDSTCR
-			mtspr	ldstcr,r5						; Lock a way
-			sync									; Clear out memory accesses
-			isync									; Wait for all
-			
-			
-cistouch:	lwzx	r10,r3,r6						; Pick up some trash
-			addi	r6,r6,32						; Go to the next index
-			and.	r0,r6,r11						; See if we are about to do next index
-			bne+	cistouch						; Nope, do more...
-			
-			sync									; Make sure it is all done
-			isync									
-			
-			sub		r6,r6,r11						; Back up to start + 1
-			addi	r6,r6,-1						; Get it right
-			
-cisflush:	dcbf	r3,r6							; Flush everything out
-			addi	r6,r6,32						; Go to the next index
-			and.	r0,r6,r11						; See if we are about to do next index
-			bne+	cisflush						; Nope, do more...
-
-			sync									; Make sure it is all done
-			isync									
-			
-			
-			rlwinm.	r2,r2,1,24,31					; Shift to next way
-			bne+	cislckit						; Do this for all ways...
-
-			mtspr	ldstcr,r8						; Slam back to original
-			sync
-			isync
-			
-			mtspr	msscr0,r4						; ?
-			sync
-			isync
-
-			b		cinoL1							; Go on to level 2...
-			
-
-cisnlck:	rlwinm	r2,r0,0,1,30					; Double cache size
-			add		r0,r0,r2						; Get 3 times cache size
-			rlwinm	r0,r0,26,6,31					; Get 3/2 number of cache lines
-			lis		r3,0xFFF0						; Dead recon ROM address for now
-			mtctr	r0								; Number of lines to flush
-
-ciswfldl1a:	lwz		r2,0(r3)						; Flush anything else
-			addi	r3,r3,32						; Next line
-			bdnz	ciswfldl1a						; Flush the lot...
-			
-ciinvdl1:	sync									; Make sure all flushes have been committed
-
-			mfspr	r8,hid0							; Get the HID0 bits
-			rlwinm	r8,r8,0,dce+1,ice-1				; Clear cache enables
-			mtspr	hid0,r8							; and turn off L1 cache
-			sync									; Make sure all is done
-			isync
-
-			ori		r8,r8,lo16(icem|dcem|icfim|dcfim)	; Set the HID0 bits for enable, and invalidate
-			sync
-			isync										
-			
-			mtspr	hid0,r8							; Start the invalidate and turn on cache	
-			rlwinm	r8,r8,0,dcfi+1,icfi-1			; Turn off the invalidate bits
-			mtspr	hid0,r8							; Turn off the invalidate (needed for some older machines)
-			sync
-
-			
-cinoL1:
-;
-;			Flush and disable the level 2
-;
-            mfsprg	r10,2							; need to check 2 features we did not put in CR
-            rlwinm.	r0,r10,0,pfL2b,pfL2b			; do we have L2?
-			beq		cinol2							; No level 2 cache to flush
-
-			mfspr	r8,l2cr							; Get the L2CR
-			lwz		r3,pfl2cr(r12)					; Get the L2CR value
-			rlwinm.		r0,r8,0,l2e,l2e					; Was the L2 enabled?
-			bne		ciflushl2					; Yes, force flush
-			cmplwi		r8, 0						; Was the L2 all the way off?
-			beq		ciinvdl2					; Yes, force invalidate
-			lis		r0,hi16(l2sizm|l2clkm|l2ramm|l2ohm)	; Get confiuration bits
-			xor		r2,r8,r3						; Get changing bits?
-			ori		r0,r0,lo16(l2slm|l2dfm|l2bypm)	; More config bits
-			and.	r0,r0,r2						; Did any change?
-			bne-	ciinvdl2						; Yes, just invalidate and get PLL synced...		
-			
-ciflushl2:
-            rlwinm.	r0,r10,0,pfL2fab,pfL2fab		; hardware-assisted L2 flush?
-			beq		ciswfl2							; Flush not in hardware...
-			
-			mr		r10,r8							; Take a copy now
-			
-			bf		31,cinol2lck					; Skip if pfLClck not set...
-			
-			oris	r10,r10,hi16(l2ionlym|l2donlym)	; Set both instruction- and data-only
-			sync
-			mtspr	l2cr,r10						; Lock out the cache
-			sync
-			isync
-			
-cinol2lck:	ori		r10,r10,lo16(l2hwfm)			; Request flush
-			sync									; Make sure everything is done
-			
-			mtspr	l2cr,r10						; Request flush
-			
-cihwfl2:	mfspr	r10,l2cr						; Get back the L2CR
-			rlwinm.	r10,r10,0,l2hwf,l2hwf			; Is the flush over?
-			bne+	cihwfl2							; Nope, keep going...
-			b		ciinvdl2						; Flush done, go invalidate L2...
-			
-ciswfl2:
-			lwz		r0,pfl2Size(r12)				; Get the L2 size
-			oris	r2,r8,hi16(l2dom)				; Set L2 to data only mode
-
-			b		ciswfl2doa					; Branch to next line...
-
-			.align  5
-ciswfl2doc:
-			mtspr	l2cr,r2							; Disable L2
-			sync
-			isync
-			b		ciswfl2dod					; It is off, go invalidate it...
-
-ciswfl2doa:
-			b		ciswfl2dob					; Branch to next...
-
-ciswfl2dob:
-			sync								; Finish memory stuff
-			isync								; Stop speculation
-			b		ciswfl2doc					; Jump back up and turn on data only...
-ciswfl2dod:
-			rlwinm	r0,r0,27,5,31					; Get the number of lines
-			lis		r10,0xFFF0						; Dead recon ROM for now
-			mtctr	r0								; Set the number of lines
-			
-ciswfldl2a:	lwz		r0,0(r10)						; Load something to flush something
-			addi	r10,r10,32						; Next line
-			bdnz	ciswfldl2a						; Do the lot...
-			
-ciinvdl2:	rlwinm	r8,r3,0,l2e+1,31				; Clear the enable bit
-			b		cinla							; Branch to next line...
-
-			.align  5
-cinlc:		mtspr	l2cr,r8							; Disable L2
-			sync
-			isync
-			b		ciinvl2							; It is off, go invalidate it...
-			
-cinla:		b		cinlb							; Branch to next...
-
-cinlb:		sync									; Finish memory stuff
-			isync									; Stop speculation
-			b		cinlc							; Jump back up and turn off cache...
-			
-ciinvl2:	sync
-			isync
-
-			cmplwi	r3, 0							; Should the L2 be all the way off?
-			beq	cinol2							; Yes, done with L2
-
-			oris	r2,r8,hi16(l2im)				; Get the invalidate flag set
-			
-			mtspr	l2cr,r2							; Start the invalidate
-			sync
-			isync
-ciinvdl2a:	mfspr	r2,l2cr							; Get the L2CR
-            mfsprg	r0,2							; need to check a feature in "non-volatile" set
-            rlwinm.	r0,r0,0,pfL2ib,pfL2ib			; flush in HW?
-			beq		ciinvdl2b						; Flush not in hardware...
-			rlwinm.	r2,r2,0,l2i,l2i					; Is the invalidate still going?
-			bne+	ciinvdl2a						; Assume so, this will take a looong time...
-			sync
-			b		cinol2							; No level 2 cache to flush
-ciinvdl2b:
-			rlwinm.	r2,r2,0,l2ip,l2ip				; Is the invalidate still going?
-			bne+	ciinvdl2a						; Assume so, this will take a looong time...
-			sync
-			mtspr	l2cr,r8							; Turn off the invalidate request
-			
-cinol2:
-			
-;
-;			Flush and enable the level 3
-;
-			bf		pfL3b,cinol3					; No level 3 cache to flush
-
-			mfspr	r8,l3cr							; Get the L3CR
-			lwz		r3,pfl3cr(r12)					; Get the L3CR value
-			rlwinm.		r0,r8,0,l3e,l3e					; Was the L3 enabled?
-			bne		ciflushl3					; Yes, force flush
-			cmplwi		r8, 0						; Was the L3 all the way off?
-			beq		ciinvdl3					; Yes, force invalidate
-			lis		r0,hi16(l3pem|l3sizm|l3dxm|l3clkm|l3spom|l3ckspm)	; Get configuration bits
-			xor		r2,r8,r3						; Get changing bits?
-			ori		r0,r0,lo16(l3pspm|l3repm|l3rtm|l3cyam|l3dmemm|l3dmsizm)	; More config bits
-			and.	r0,r0,r2						; Did any change?
-			bne-	ciinvdl3						; Yes, just invalidate and get PLL synced...
-			
-ciflushl3:
-			sync									; 7450 book says do this even though not needed
-			mr		r10,r8							; Take a copy now
-			
-			bf		31,cinol3lck					; Skip if pfL23lck not set...
-			
-			oris	r10,r10,hi16(l3iom)				; Set instruction-only
-			ori		r10,r10,lo16(l3donlym)			; Set data-only
-			sync
-			mtspr	l3cr,r10						; Lock out the cache
-			sync
-			isync
-			
-cinol3lck:	ori		r10,r10,lo16(l3hwfm)			; Request flush
-			sync									; Make sure everything is done
-			
-			mtspr	l3cr,r10						; Request flush
-			
-cihwfl3:	mfspr	r10,l3cr						; Get back the L3CR
-			rlwinm.	r10,r10,0,l3hwf,l3hwf			; Is the flush over?
-			bne+	cihwfl3							; Nope, keep going...
-
-ciinvdl3:	rlwinm	r8,r3,0,l3e+1,31				; Clear the enable bit
-			sync									; Make sure of life, liberty, and justice
-			mtspr	l3cr,r8							; Disable L3
-			sync
-
-			cmplwi	r3, 0							; Should the L3 be all the way off?
-			beq	cinol3							; Yes, done with L3
-
-			ori		r8,r8,lo16(l3im)				; Get the invalidate flag set
-
-			mtspr	l3cr,r8							; Start the invalidate
-
-ciinvdl3b:	mfspr	r8,l3cr							; Get the L3CR
-			rlwinm.	r8,r8,0,l3i,l3i					; Is the invalidate still going?
-			bne+	ciinvdl3b						; Assume so...
-			sync
-
-			lwz	r10, pfBootConfig(r12)					; ?
-			rlwinm.	r10, r10, 24, 28, 31					; ?
-			beq	ciinvdl3nopdet						; ?
-			
-			mfspr	r8,l3pdet						; ?
-			srw	r2, r8, r10						; ?
-			rlwimi	r2, r8, 0, 24, 31					; ?
-			subfic	r10, r10, 32						; ?
-			li	r8, -1							; ?
-			ori	r2, r2, 0x0080						; ?
-			slw	r8, r8, r10						; ?
-			or	r8, r2, r8						; ?
-			mtspr	l3pdet, r8						; ?
-			isync
-
-ciinvdl3nopdet:
-			mfspr	r8,l3cr							; Get the L3CR
-			rlwinm	r8,r8,0,l3clken+1,l3clken-1		; Clear the clock enable bit
-			mtspr	l3cr,r8							; Disable the clock
-
-			li		r2,128							; ?
-ciinvdl3c:	addi	r2,r2,-1						; ?
-			cmplwi	r2,0							; ?
-			bne+	ciinvdl3c
-
-			mfspr	r10,msssr0						; ?
-			rlwinm	r10,r10,0,vgL3TAG+1,vgL3TAG-1	; ?
-			mtspr	msssr0,r10						; ?
-			sync
-
-			mtspr	l3cr,r3							; Enable it as desired
-			sync
-cinol3:
-            mfsprg	r0,2							; need to check a feature in "non-volatile" set
-            rlwinm.	r0,r0,0,pfL2b,pfL2b				; is there an L2 cache?
-			beq		cinol2a							; No level 2 cache to enable
-
-			lwz		r3,pfl2cr(r12)					; Get the L2CR value
-			cmplwi		r3, 0						; Should the L2 be all the way off?
-			beq		cinol2a							: Yes, done with L2
-			mtspr	l2cr,r3							; Enable it as desired
-			sync
-
-;
-;			Invalidate and turn on L1s
-;
-
-cinol2a:	
-			bt		31,cinoexit						; Skip if pfLClck set...
-
-			rlwinm	r8,r9,0,dce+1,ice-1				; Clear the I- and D- cache enables
-			mtspr	hid0,r8							; Turn off dem caches
-			sync
-			
-			ori		r8,r9,lo16(icem|dcem|icfim|dcfim)	; Set the HID0 bits for enable, and invalidate
-			rlwinm	r9,r8,0,dcfi+1,icfi-1			; Turn off the invalidate bits
-			sync
-			isync											
-
-			mtspr	hid0,r8							; Start the invalidate and turn on L1 cache	
-
-cinoexit:	mtspr	hid0,r9							; Turn off the invalidate (needed for some older machines) and restore entry conditions
-			sync
-			mtmsr	r7								; Restore MSR to entry
-			isync
-			blr										; Return...
-
-
-;
-;			Handle 64-bit architecture
-;			This processor can not run without caches, so we just push everything out
-;			and flush.  It will be relativily clean afterwards
-;
-			
-			.align	5
-			
-cin64:		
-			mfspr	r10,hid1						; Save hid1
-			mfspr	r4,hid4							; Save hid4
-			mr		r12,r10							; Really save hid1
-			mr		r11,r4							; Get a working copy of hid4
-
-			li		r0,0							; Get a 0
-			eqv		r2,r2,r2						; Get all foxes
-			
-			rldimi	r10,r0,55,7						; Clear I$ prefetch bits (7:8)
-			
-			isync
-			mtspr	hid1,r10						; Stick it
-			mtspr	hid1,r10						; Stick it again
-			isync
-
-			rldimi	r11,r2,38,25					; Disable D$ prefetch (25:25)
-			
-			sync
-			mtspr	hid4,r11						; Stick it
-			isync
-
-			li		r3,8							; Set bit 28+32
-			sldi	r3,r3,32						; Make it bit 28
-			or		r3,r3,r11						; Turn on the flash invalidate L1D$
-			
-			oris	r5,r11,0x0600					; Set disable L1D$ bits		
-			sync
-			mtspr	hid4,r3							; Invalidate
-			isync
-	
-			mtspr	hid4,r5							; Un-invalidate and disable L1D$
-			isync
-			
-			lis		r8,GUSModeReg					; Get the GUS mode ring address
-			mfsprg	r0,2							; Get the feature flags
-			ori		r8,r8,0x8000					; Set to read data
-			rlwinm.	r0,r0,pfSCOMFixUpb+1,31,31		; Set shift if we need a fix me up
-
-			sync
-
-			mtspr	scomc,r8						; Request the GUS mode
-			mfspr	r11,scomd						; Get the GUS mode
-			mfspr	r8,scomc						; Get back the status (we just ignore it)
-			sync
-			isync							
-
-			sld		r11,r11,r0						; Fix up if needed
-
-			ori		r6,r11,lo16(GUSMdmapen)			; Set the bit that means direct L2 cache address
-			lis		r8,GUSModeReg					; Get GUS mode register address
-				
-			sync
-
-			mtspr	scomd,r6						; Set that we want direct L2 mode
-			mtspr	scomc,r8						; Tell GUS we want direct L2 mode
-			mfspr	r3,scomc						; Get back the status
-			sync
-			isync							
-
-			li		r3,0							; Clear start point
-		
-cflushlp:	lis		r6,0x0040						; Pick 4MB line as our target
-			or		r6,r6,r3						; Put in the line offset
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-			addis	r6,r6,8							; Roll bit 42:44
-			lwz		r5,0(r6)						; Load a line
-
-			addi	r3,r3,128						; Next line
-			andis.	r5,r3,8							; Have we done enough?
-			beq++	cflushlp						; Not yet...
-			
-			sync
-
-			lis		r6,0x0040						; Pick 4MB line as our target
-
-cflushx:	dcbf	0,r6							; Flush line and invalidate
-			addi	r6,r6,128						; Next line
-			andis.	r5,r6,0x0080					; Have we done enough?
-			beq++	cflushx							; Keep on flushing...
-
-			mr		r3,r10							; Copy current hid1
-			rldimi	r3,r2,54,9						; Set force icbi match mode
-			
-			li		r6,0							; Set start if ICBI range
-			isync
-			mtspr	hid1,r3							; Stick it
-			mtspr	hid1,r3							; Stick it again
-			isync
-
-cflicbi:	icbi	0,r6							; Kill I$
-			addi	r6,r6,128						; Next line
-			andis.	r5,r6,1							; Have we done them all?
-			beq++	cflicbi							; Not yet...
-
-			lis		r8,GUSModeReg					; Get GUS mode register address
-				
-			sync
-
-			mtspr	scomd,r11						; Set that we do not want direct mode
-			mtspr	scomc,r8						; Tell GUS we do not want direct mode
-			mfspr	r3,scomc						; Get back the status
-			sync
-			isync							
-
-			isync
-			mtspr	hid0,r9							; Restore entry hid0
-			mfspr	r9,hid0							; Yes, this is silly, keep it here
-			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
-			mfspr	r9,hid0							; Yes, this is a duplicate, keep it here
-			isync
-
-			isync
-			mtspr	hid1,r12						; Restore entry hid1
-			mtspr	hid1,r12						; Stick it again
-			isync
-		
-			sync
-			mtspr	hid4,r4							; Restore entry hid4
-			isync
-
-			sync
-			mtmsr	r7								; Restore MSR to entry
-			isync
-			blr										; Return...
-			
-			
-
-/*  Disables all caches
- *
- *	void cacheDisable(void)
- *
- *	Turns off all caches on the processor. They are not flushed.
- *
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(cacheDisable)
-
-LEXT(cacheDisable)
-
-			mfsprg	r11,2							; Get CPU specific features
-			mtcrf	0x83,r11						; Set feature flags
-			
-			bf		pfAltivecb,cdNoAlt				; No vectors...
-			
-			dssall									; Stop streams
-			
-cdNoAlt:	sync
-			
-			btlr	pf64Bitb						; No way to disable a 64-bit machine...
-			
-			mfspr	r5,hid0							; Get the hid
-			rlwinm	r5,r5,0,dce+1,ice-1				; Clear the I- and D- cache enables
-			mtspr	hid0,r5							; Turn off dem caches
-			sync
-
-            rlwinm.	r0,r11,0,pfL2b,pfL2b			; is there an L2?
-			beq		cdNoL2							; Skip if no L2...
-
-			mfspr	r5,l2cr							; Get the L2
-			rlwinm	r5,r5,0,l2e+1,31				; Turn off enable bit
-
-			b		cinlaa							; Branch to next line...
-
-			.align  5
-cinlcc:		mtspr	l2cr,r5							; Disable L2
-			sync
-			isync
-			b		cdNoL2							; It is off, we are done...
-			
-cinlaa:		b		cinlbb							; Branch to next...
-
-cinlbb:		sync									; Finish memory stuff
-			isync									; Stop speculation
-			b		cinlcc							; Jump back up and turn off cache...
-
-cdNoL2:
-
-			bf		pfL3b,cdNoL3					; Skip down if no L3...
-			
-			mfspr	r5,l3cr							; Get the L3
-			rlwinm	r5,r5,0,l3e+1,31				; Turn off enable bit
-			rlwinm	r5,r5,0,l3clken+1,l3clken-1		; Turn off cache enable bit
-			mtspr	l3cr,r5							; Disable the caches
-			sync
-			
-cdNoL3:
-			blr										; Leave...
-
-
-/*  Initialize processor thermal monitoring  
- *	void ml_thrm_init(void)
- *
- *	Obsolete, deprecated and will be removed.
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_thrm_init)
-
-LEXT(ml_thrm_init)
-			blr
-
-/*  Set thermal monitor bounds 
- *	void ml_thrm_set(unsigned int low, unsigned int high)
- *
- *	Obsolete, deprecated and will be removed.
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_thrm_set)
-
-LEXT(ml_thrm_set)
-			blr
-
-/*  Read processor temprature  
- *	unsigned int ml_read_temp(void)
- *
- *	Obsolete, deprecated and will be removed.
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_read_temp)
-
-LEXT(ml_read_temp)
-			li		r3,-1
-			blr
-
-/*  Throttle processor speed up or down
- *	unsigned int ml_throttle(unsigned int step)
- *
- *	Returns old speed and sets new.  Both step and return are values from 0 to
- *	255 that define number of throttle steps, 0 being off and "ictcfim" is max * 2.
- *
- *	Obsolete, deprecated and will be removed.
- */
-
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_throttle)
-
-LEXT(ml_throttle)
-			li		r3,0
-			blr
-
-/*
-**      ml_get_timebase()
-**
-**      Entry   - R3 contains pointer to 64 bit structure.
-**
-**      Exit    - 64 bit structure filled in.
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_get_timebase)
-
-LEXT(ml_get_timebase)
-
-loop:
-			mftbu   r4
-			mftb    r5
-			mftbu   r6
-			cmpw    r6, r4
-			bne-    loop
-			
-			stw     r4, 0(r3)
-			stw     r5, 4(r3)
-			
-			blr
-
-/*
- *		unsigned int cpu_number(void)
- *
- *			Returns the current cpu number. 
- */
-
-			.align	5
-			.globl	EXT(cpu_number)
-
-LEXT(cpu_number)
-			mfsprg	r4,1							; Get the current activation
-			lwz		r4,ACT_PER_PROC(r4)				; Get the per_proc block
-			lhz		r3,PP_CPU_NUMBER(r4)			; Get CPU number 
-			blr										; Return...
-
-/*
- *		processor_t current_processor(void)
- *
- *			Returns the current processor. 
- */
-
-			.align	5
-			.globl	EXT(current_processor)
-
-LEXT(current_processor)
-			mfsprg	r3,1							; Get the current activation
-			lwz		r3,ACT_PER_PROC(r3)				; Get the per_proc block
-			addi	r3,r3,PP_PROCESSOR
-			blr
-
-#if	PROCESSOR_SIZE > PP_PROCESSOR_SIZE
-#error processor overflows per_proc
-#endif
-
-/*
- *		ast_t	*ast_pending(void)
- *
- *		Returns the address of the pending AST mask for the current processor.
- */
-
-			.align	5
-			.globl	EXT(ast_pending)
-
-LEXT(ast_pending)
-			mfsprg	r3,1							; Get the current activation
-			lwz		r3,ACT_PER_PROC(r3)				; Get the per_proc block
-			addi	r3,r3,PP_PENDING_AST
-			blr										; Return...
-
-/*
- *		void machine_set_current_thread(thread_t)
- *
- *			Set the current thread
- */
-			.align	5
-			.globl	EXT(machine_set_current_thread)
-
-LEXT(machine_set_current_thread)
-
-			mfsprg	r4,1							; Get spr1
-			lwz		r5,ACT_PER_PROC(r4)				; Get the PerProc from the previous active thread
-			stw		r5,ACT_PER_PROC(r3)				; Set the PerProc in the active thread
-			mtsprg	1,r3							; Set spr1 with the active thread
-			blr										; Return...
-
-/*
- *		thread_t current_thread(void)
- *		thread_t current_act(void)
- *
- *
- *			Return the current thread for outside components.
- */
-			.align	5
-			.globl	EXT(current_thread)
-			.globl	EXT(current_act)
-
-LEXT(current_thread)
-LEXT(current_act)
-
-			mfsprg	r3,1
-			blr
-		
-			.align	5
-			.globl	EXT(mach_absolute_time)
-LEXT(mach_absolute_time)
-1:			mftbu	r3
-			mftb	r4
-			mftbu	r0
-			cmpw	r0,r3
-			bne--	1b  
-			blr
-
-/*
-**      ml_sense_nmi()
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_sense_nmi)
-
-LEXT(ml_sense_nmi)
-
-			blr										; Leave...
-
-/*
-**      ml_set_processor_speed_powertune()
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_set_processor_speed_powertune)
-
-LEXT(ml_set_processor_speed_powertune)
-			mflr	r0										; Save the link register
-			stwu    r1, -(FM_ALIGN(4*4)+FM_SIZE)(r1)		; Make some space on the stack
-			stw		r28, FM_ARG0+0x00(r1)					; Save a register
-			stw		r29, FM_ARG0+0x04(r1)					; Save a register
-			stw		r30, FM_ARG0+0x08(r1)					; Save a register
-			stw		r31, FM_ARG0+0x0C(r1)					; Save a register
-			stw		r0, (FM_ALIGN(4*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			mfsprg	r31,1									; Get the current activation
-			lwz		r31,ACT_PER_PROC(r31)					; Get the per_proc block
-
-			rlwinm	r28, r3, 31-dnap, dnap, dnap			; Shift the 1 bit to the dnap+32 bit
-			rlwinm	r3, r3, 2, 29, 29						; Shift the 1 to a 4 and mask
-			addi	r3, r3, pfPowerTune0					; Add in the pfPowerTune0 offset
-			lwzx	r29, r31, r3							; Load the PowerTune number 0 or 1
-
-			sldi	r28, r28, 32							; Shift to the top half
-			ld		r3, pfHID0(r31)							; Load the saved hid0 value
-			and		r28, r28, r3							; Save the dnap bit
-			lis		r4, hi16(dnapm)							; Make a mask for the dnap bit
-			sldi	r4, r4, 32								; Shift to the top half
-			andc	r3, r3, r4								; Clear the dnap bit
-			or		r28, r28, r3							; Insert the dnap bit as needed for later
-
-			sync
-			mtspr	hid0, r3								; Turn off dnap in hid0
-			mfspr	r3, hid0								; Yes, this is silly, keep it here
-			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r3, hid0								; Yes, this is a duplicate, keep it here
-			isync											; Make sure it is set
-
-			lis		r3, hi16(PowerTuneControlReg)			; Write zero to the PCR
-			ori		r3, r3, lo16(PowerTuneControlReg)
-			li		r4, 0
-			li		r5, 0
-			bl		_ml_scom_write
-
-			lis		r3, hi16(PowerTuneControlReg)			; Write the PowerTune value to the PCR
-			ori		r3, r3, lo16(PowerTuneControlReg)
-			li		r4, 0
-			mr		r5, r29
-			bl		_ml_scom_write
-
-			rlwinm	r29, r29, 13-6, 6, 7					; Move to PSR speed location and isolate the requested speed
-spsPowerTuneLoop:
-			lis		r3, hi16(PowerTuneStatusReg)			; Read the status from the PSR
-			ori		r3, r3, lo16(PowerTuneStatusReg)
-			li		r4, 0
-			bl		_ml_scom_read
-			srdi	r5, r5, 32
-			rlwinm  r0, r5, 0, 6, 7							; Isolate the current speed
-			rlwimi	r0, r5, 0, 2, 2							; Copy in the change in progress bit
-			cmpw	r0, r29									; Compare the requested and current speeds
-			beq		spsPowerTuneDone
-			rlwinm.	r0, r5, 0, 3, 3
-			beq		spsPowerTuneLoop
-
-spsPowerTuneDone:
-			sync
-			mtspr	hid0, r28								; Turn on dnap in hid0 if needed
-			mfspr	r28, hid0								; Yes, this is silly, keep it here
-			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
-			mfspr	r28, hid0								; Yes, this is a duplicate, keep it here
-			isync											; Make sure it is set
-
-			lwz		r0, (FM_ALIGN(4*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Get the return
-			lwz		r28, FM_ARG0+0x00(r1)					; Restore a register
-			lwz		r29, FM_ARG0+0x04(r1)					; Restore a register
-			lwz		r30, FM_ARG0+0x08(r1)					; Restore a register
-			lwz		r31, FM_ARG0+0x0C(r1)					; Restore a register
-			lwz		r1, FM_BACKPTR(r1)						; Pop the stack
-			mtlr	r0
-			blr
-
-/*
-**      ml_set_processor_speed_dpll()
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_set_processor_speed_dpll)
-
-LEXT(ml_set_processor_speed_dpll)
-			mfsprg	r5,1									; Get the current activation
-			lwz		r5,ACT_PER_PROC(r5)						; Get the per_proc block
-			
-			cmplwi	r3, 0									; Turn off BTIC before low speed
-			beq		spsDPLL1
-			mfspr	r4, hid0								; Get the current hid0 value
-			rlwinm	r4, r4, 0, btic+1, btic-1				; Clear the BTIC bit
-			sync
-			mtspr	hid0, r4								; Set the new hid0 value
-			isync
-			sync
-
-spsDPLL1:
-			mfspr	r4, hid1								; Get the current PLL settings
-			rlwimi  r4, r3, 31-hid1ps, hid1ps, hid1ps		; Copy the PLL Select bit
-			stw		r4, pfHID1(r5)							; Save the new hid1 value
-			mtspr	hid1, r4								; Select desired PLL
-
-			cmplwi	r3, 0									; Restore BTIC after high speed
-			bne		spsDPLL2
-			lwz		r4, pfHID0(r5)							; Load the hid0 value
-			sync
-			mtspr	hid0, r4								; Set the hid0 value
-			isync
-			sync
-spsDPLL2:
-			blr
-
-
-/*
-**      ml_set_processor_speed_dfs(divideby)
-**			divideby == 0 then divide by 1 (full speed)
-**			divideby == 1 then divide by 2 (half speed)
-**			divideby == 2 then divide by 4 (quarter speed)
-**			divideby == 3 then divide by 4 (quarter speed) - preferred
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_set_processor_speed_dfs)
-
-LEXT(ml_set_processor_speed_dfs)
-
-			mfspr	r4,hid1									; Get the current HID1
-			mfsprg	r5,0									; Get the per_proc_info
-			rlwimi	r4,r3,31-hid1dfs1,hid1dfs0,hid1dfs1		; Stick the new divider bits in
-			stw		r4,pfHID1(r5)							; Save the new hid1 value
-			sync
-			mtspr	hid1,r4									; Set the new HID1
-			sync
-			isync
-			blr
-
-
-/*
-**      ml_set_processor_voltage()
-**
-*/
-;			Force a line boundry here
-			.align	5
-			.globl	EXT(ml_set_processor_voltage)
-
-LEXT(ml_set_processor_voltage)
-			mfsprg	r5,1									; Get the current activation
-			lwz		r5,ACT_PER_PROC(r5)						; Get the per_proc block
-
-			lwz		r6, pfPowerModes(r5)					; Get the supported power modes
-
-			rlwinm.	r0, r6, 0, pmDPLLVminb, pmDPLLVminb		; Is DPLL Vmin supported
-			beq		spvDone
-
-			mfspr	r4, hid2								; Get HID2 value
-			rlwimi	r4, r3, 31-hid2vmin, hid2vmin, hid2vmin	; Insert the voltage mode bit
-			mtspr	hid2, r4								; Set the voltage mode
-			sync											; Make sure it is done
-
-spvDone:
-			blr
-
-
-;
-;			unsigned int ml_scom_write(unsigned int reg, unsigned long long data)
-;			64-bit machines only
-;			returns status
-;
-
-			.align	5
-			.globl	EXT(ml_scom_write)
-
-LEXT(ml_scom_write)
-
-			rldicr	r3,r3,8,47							; Align register it correctly
-			rldimi	r5,r4,32,0							; Merge the high part of data
-			sync										; Clean up everything
-			
-			mtspr	scomd,r5							; Stick in the data
-			mtspr	scomc,r3							; Set write to register
-			sync
-			isync					
-
-			mfspr	r3,scomc							; Read back status
-			blr											; leave....							
-
-;
-;			unsigned int ml_read_scom(unsigned int reg, unsigned long long *data)
-;			64-bit machines only
-;			returns status
-;			ASM Callers: data (r4) can be zero and the 64 bit data will be returned in r5
-;
-
-			.align	5
-			.globl	EXT(ml_scom_read)
-
-LEXT(ml_scom_read)
-
-			mfsprg	r0,2								; Get the feature flags
-			rldicr	r3,r3,8,47							; Align register it correctly
-			rlwinm	r0,r0,pfSCOMFixUpb+1,31,31			; Set shift if we need a fix me up
-			
-			ori		r3,r3,0x8000						; Set to read data
-			sync
-
-			mtspr	scomc,r3							; Request the register
-			mfspr	r5,scomd							; Get the register contents
-			mfspr	r3,scomc							; Get back the status
-			sync
-			isync							
-
-			sld		r5,r5,r0							; Fix up if needed
-
-			cmplwi	r4, 0								; If data pointer is null, just return
-			beqlr										; the received data in r5
-			std		r5,0(r4)							; Pass back the received data			
-			blr											; Leave...
-
-;
-;			Calculates the hdec to dec ratio
-;
-
-			.align	5
-			.globl	EXT(ml_hdec_ratio)
-
-LEXT(ml_hdec_ratio)
-
-			li		r0,0								; Clear the EE bit (and everything else for that matter)
-			mfmsr	r11									; Get the MSR
-			mtmsrd	r0,1								; Set the EE bit only (do not care about RI)
-			rlwinm	r11,r11,0,MSR_EE_BIT,MSR_EE_BIT		; Isolate just the EE bit
-			mfmsr	r10									; Refresh our view of the MSR (VMX/FP may have changed)
-			or		r12,r10,r11							; Turn on EE if on before we turned it off
-
-			mftb	r9									; Get time now
-			mfspr	r2,hdec								; Save hdec
-
-mhrcalc:	mftb	r8									; Get time now
-			sub		r8,r8,r9							; How many ticks?
-			cmplwi	r8,10000							; 10000 yet?
-			blt		mhrcalc								; Nope...
-
-			mfspr	r9,hdec								; Get hdec now
-			sub		r3,r2,r9							; How many ticks?
-			mtmsrd	r12,1								; Flip EE on if needed
-			blr											; Leave...
-
-
-;
-;			int setPop(time)
-;	
-;			Calculates the number of ticks to the supplied event and
-;			sets the decrementer.  Never set the time for less that the
-;			minimum, which is 10, nor more than maxDec, which is usually 0x7FFFFFFF
-;			and never more than that but can be set by root.
-;
-;
-
-			.align	7
-			.globl	EXT(setPop)
-
-#define kMin	10
-
-LEXT(setPop)
-
-spOver:		mftbu	r8									; Get upper time
-			addic	r2,r4,-kMin							; Subtract minimum from target
-			mftb	r9									; Get lower
-			addme	r11,r3								; Do you have any bits I could borrow?
-			mftbu	r10									; Get upper again
-			subfe	r0,r0,r0							; Get -1 if we went negative 0 otherwise
-			subc	r7,r2,r9							; Subtract bottom and get carry
-			cmplw	r8,r10								; Did timebase upper tick?
-			subfe	r6,r8,r11							; Get the upper difference accounting for borrow
-			lwz		r12,maxDec(0)						; Get the maximum decrementer size 
-			addme	r0,r0								; Get -1 or -2 if anything negative, 0 otherwise
-			addic	r2,r6,-1							; Set carry if diff < 2**32
-			srawi	r0,r0,1								; Make all foxes
-			subi	r10,r12,kMin						; Adjust maximum for minimum adjust
-			andc	r7,r7,r0							; Pin time at 0 if under minimum
-			subfe	r2,r2,r2							; 0 if diff > 2**32, -1 otherwise		
-			sub		r7,r7,r10							; Negative if duration is less than (max - min)
-			or		r2,r2,r0							; If the duration is negative, it is not too big
-			srawi	r0,r7,31							; -1 if duration is too small
-			and		r7,r7,r2							; Clear duration if high part too big
-			and		r7,r7,r0							; Clear duration if low part too big
-			bne--	spOver								; Timer ticked...
-			add		r3,r7,r12							; Add back the max for total				
-			mtdec	r3									; Set the decrementer
-			blr											; Leave...
-
-
diff --git a/osfmk/ppc/machine_task.c b/osfmk/ppc/machine_task.c
deleted file mode 100644
index 5decd0ce2..000000000
--- a/osfmk/ppc/machine_task.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#include <kern/task.h>
-#include <kern/thread.h>
-
-kern_return_t
-machine_task_set_state(
-		__unused task_t task, 
-		__unused int flavor,
-		__unused thread_state_t state, 
-		__unused mach_msg_type_number_t state_count)
-{
-	return KERN_FAILURE;
-}
-
-kern_return_t 	
-machine_task_get_state(__unused task_t task, 
-		__unused int flavor, 
-		__unused thread_state_t state,
-		__unused mach_msg_type_number_t *state_count)
-{
-	return KERN_FAILURE;
-}
-
-kern_return_t
-machine_thread_inherit_taskwide(
-				__unused thread_t thread,
-				__unused task_t parent_task)
-{
-	return KERN_FAILURE;
-}
diff --git a/osfmk/ppc/machlimits.h b/osfmk/ppc/machlimits.h
deleted file mode 100644
index b43f64958..000000000
--- a/osfmk/ppc/machlimits.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:41  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:02  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.2.1  1996/12/09  16:55:05  stephen
- * 	nmklinux_1.0b3_shared into pmk1.1
- * 	New file based on hp_pa
- * 	[1996/12/09  11:09:22  stephen]
- *
- * $EndLog$
- */
-/*
- * Copyright (c) 1988 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms are permitted
- * provided that the above copyright notice and this paragraph are
- * duplicated in all such forms and that any documentation,
- * advertising materials, and other materials related to such
- * distribution and use acknowledge that the software was developed
- * by the University of California, Berkeley.  The name of the
- * University may not be used to endorse or promote products derived
- * from this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- *
- *	@(#)machlimits.h	7.1 (Berkeley) 2/15/89
- */
-#ifndef _MACH_MACHLIMITS_H_
-#define _MACH_MACHLIMITS_H_
-
-#define	CHAR_BIT	8		/* number of bits in a char */
-
-#define	SCHAR_MAX	127		/* max value for a signed char */
-#define	SCHAR_MIN	(-128)		/* min value for a signed char */
-
-#define	UCHAR_MAX	255U		/* max value for an unsigned char */
-#define	CHAR_MAX	127		/* max value for a char */
-#define	CHAR_MIN	(-128)		/* min value for a char */
-
-#define	USHRT_MAX	65535U		/* max value for an unsigned short */
-#define	SHRT_MAX	32767		/* max value for a short */
-#define	SHRT_MIN	(-32768)	/* min value for a short */
-
-#define	UINT_MAX	0xFFFFFFFFU	/* max value for an unsigned int */
-#define	INT_MAX		2147483647	/* max value for an int */
-#define	INT_MIN		(-2147483647-1)	/* min value for an int */
-
-#define	ULONG_MAX	UINT_MAX	/* max value for an unsigned long */
-#define	LONG_MAX	INT_MAX		/* max value for a long */
-#define	LONG_MIN	INT_MIN		/* min value for a long */
-
-/* Must be at least two, for internationalization (NLS/KJI) */
-#define MB_LEN_MAX	4		/* multibyte characters */
-
-#endif /* _MACH_MACHLIMITS_H_ */
diff --git a/osfmk/ppc/machparam.h b/osfmk/ppc/machparam.h
deleted file mode 100644
index b5f5374cd..000000000
--- a/osfmk/ppc/machparam.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Copyright (c) 1990, 1991 The University of Utah and
- * the Center for Software Science at the University of Utah (CSS).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the Center
- * for Software Science at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSS DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- * 	Utah $Hdr: machparam.h 1.7 92/05/22$
- */
-
-#ifndef _PPC_MACHPARAM_H_
-#define _PPC_MACHPARAM_H_
-
-/*
- * Machine dependent constants for ppc. 
- * Added as needed (for device drivers).
- */
-#define	NBPG	4096		/* bytes/page */
-#define	PGSHIFT	12		/* LOG2(NBPG) */
-
-#define DEV_BSHIFT      10               /* log2(DEV_BSIZE) */
-
-/*
- * Disk devices do all IO in 1024-byte blocks.
- */
-#define	DEV_BSIZE	1024
-
-#define	btop(x)	((x)>>PGSHIFT)
-#define	ptob(x)	((x)<<PGSHIFT)
-
-/* Clicks to disk blocks */
-#define ctod(x) ((x)<<(PGSHIFT-DEV_BSHIFT))
-
-/* Disk blocks to clicks */
-#define       dtoc(x) ((x)>>(PGSHIFT-DEV_BSHIFT))
-
-/* clicks to bytes */
-#define       ctob(x) ((x)<<PGSHIFT)
-
-/* bytes to clicks */
-#define       btoc(x) (((unsigned)(x)+(NBPG-1))>>PGSHIFT)
-
-#endif /* _PPC_MACHPARAM_H_ */
diff --git a/osfmk/ppc/mappings.c b/osfmk/ppc/mappings.c
deleted file mode 100644
index 5da3b85d7..000000000
--- a/osfmk/ppc/mappings.c
+++ /dev/null
@@ -1,1805 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *	This file is used to maintain the virtual to real mappings for a PowerPC machine.
- *	The code herein is primarily used to bridge between the pmap layer and the hardware layer.
- *	Currently, some of the function of this module is contained within pmap.c.  We may want to move
- *	all of this into it (or most anyway) for the sake of performance.  We shall see as we write it.
- *
- *	We also depend upon the structure of the phys_entry control block.  We do put some processor 
- *	specific stuff in there.
- *
- */
-
-#include <debug.h>
-#include <mach_kgdb.h>
-#include <mach_vm_debug.h>
-#include <db_machine_commands.h>
-
-#include <mach/mach_types.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-
-#include <kern/kern_types.h>
-#include <kern/thread.h>
-#include <kern/spl.h>
-#include <kern/misc_protos.h>
-
-#include <vm/vm_fault.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <vm/pmap.h>
-
-#include <ppc/exception.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/new_screen.h>
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#include <ddb/db_output.h>
-
-#include <console/video_console.h>		/* (TEST/DEBUG) */
-
-#define PERFTIMES 0
-
-vm_map_t        mapping_map = VM_MAP_NULL;
-
-unsigned int	incrVSID = 0;						/* VSID increment value */
-unsigned int	mappingdeb0 = 0;						
-unsigned int	mappingdeb1 = 0;
-int ppc_max_adrsp;									/* Maximum address spaces */			
-				
-addr64_t		*mapdebug;							/* (BRINGUP) */
-extern unsigned int DebugWork;						/* (BRINGUP) */
-						
-void mapping_verify(void);
-void mapping_phys_unused(ppnum_t pa);
-
-int nx_enabled = 1;			/* enable no-execute protection */
-int allow_data_exec  = VM_ABI_32;	/* 32-bit apps may execute data by default, 64-bit apps may not */
-int allow_stack_exec = VM_ABI_32;	/* 32-bit apps may execute from the stack by default, 64-bit apps may not */
-
-/*
- *  ppc_prot translates Mach's representation of protections to that of the PPC hardware.
- *  For Virtual Machines (VMM), we also provide translation entries where the output is
- *  the same as the input, allowing direct specification of PPC protections. Mach's 
- *	representations are always in the range 0..7, so they always fall into the first
- *	8 table entries; direct translations are placed in the range 8..16, so they fall into
- *  the second half of the table.
- *
- */
- 
-unsigned char ppc_prot[16] = { 4, 7, 6, 6, 3, 3, 2, 2,		/* Mach -> PPC translations */
-                               0, 1, 2, 3, 4, 5, 6, 7 };	/* VMM direct  translations */
-
-
-
-vm_prot_t getProtPPC(int key, boolean_t disable_NX) {
-        vm_prot_t prot;
-
-	prot = ppc_prot[key & 0xF];
-
-	if (key <= 7 && disable_NX == TRUE)
-	        prot &= ~mpN;
-
-	return (prot);
-}
-
-
-/*
- *			About PPC VSID generation:
- *
- *			This function is called to generate an address space ID. This space ID must be unique within
- *			the system.  For the PowerPC, it is used to build the VSID.  We build a VSID in the following
- *			way:  space ID << 4 | segment.  Since a VSID is 24 bits, and out of that, we reserve the last
- *			4, so, we can have 2^20 (2M) unique IDs.  Each pmap has a unique space ID, so we should be able
- *			to have 2M pmaps at a time, which we couldn't, we'd run out of memory way before then.  The 
- *			problem is that only a certain number of pmaps are kept in a free list and if that is full,
- *			they are release.  This causes us to lose track of what space IDs are free to be reused.
- *			We can do 4 things: 1) not worry about it, 2) keep all free pmaps, 3) rebuild all mappings
- *			when the space ID wraps, or 4) scan the list of pmaps and find a free one.
- *
- *			Yet another consideration is the hardware use of the VSID.  It is used as part of the hash
- *			calculation for virtual address lookup.  An improperly chosen value could potentially cause
- *			too many hashes to hit the same bucket, causing PTEG overflows.  The actual hash function
- *			is (page index XOR vsid) mod number of ptegs. For a 32MB machine, using the suggested
- *			hash table size, there are 2^12 (8192) PTEGs.  Remember, though, that the bottom 4 bits
- *			are reserved for the segment number, which means that we really have 2^(12-4) 512 space IDs
- *			before we start hashing to the same buckets with the same vaddrs. Also, within a space ID,
- *			every 8192 pages (32MB) within a segment will hash to the same bucket.  That's 8 collisions
- *			per segment.  So, a scan of every page for 256MB would fill 32 PTEGs completely, but
- *			with no overflow.  I don't think that this is a problem.
- *
- *			There may be a problem with the space ID, though. A new space ID is generate (mainly) 
- *			whenever there is a fork.  There shouldn't really be any problem because (for a 32MB
- *			machine) we can have 512 pmaps and still not have hash collisions for the same address.
- *			The potential problem, though, is if we get long-term pmaps that have space IDs that are
- *			the same modulo 512.  We can reduce this problem by having the segment number be bits
- *			0-3 of the space ID rather than 20-23.  Doing this means that, in effect, corresponding
- *			vaddrs in different segments hash to the same PTEG. While this is somewhat of a problem,
- *			I don't think that it is as signifigant as the other, so, I'll make the space ID
- *			with segment first.
- *
- *			The final, and biggest problem is the wrap, which will happen every 2^20 space IDs.
- *			While this is a problem that should only happen in periods counted in weeks, it can and
- *			will happen.  This is assuming a monotonically increasing space ID. If we were to search
- *			for an inactive space ID, there could not be a wrap until there was 2^20 concurrent space IDs.
- *			That's pretty unlikely to happen.  There couldn't be enough storage to support a million tasks.
- *
- *			So, what we do is to keep all active pmaps in a chain (anchored from kernel_pmap and
- *			locked by free_pmap_lock) that is sorted in VSID sequence order.
- *
- *			Whenever we need a VSID, we walk the list looking for the next in the sequence from
- *			the last that was freed.  The we allocate that.
- *
- *			NOTE: We must be called with interruptions off and free_pmap_lock held.
- *
- */
-
-/*
- *		mapping_init();
- *			Do anything that needs to be done before the mapping system can be used.
- *			Hash table must be initialized before we call this.
- *
- *			Calculate the SID increment.  Currently we use size^(1/2) + size^(1/4) + 1;
- */
-
-void mapping_init(void) {
-
-	unsigned int tmp, maxeff, rwidth;
-	
-	ppc_max_adrsp = maxAdrSp;									/* Set maximum address spaces */			
-	
-	maxeff = 32;												/* Assume 32-bit */
-	if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) maxeff = 64;	/* Is this a 64-bit machine? */
-	
-	rwidth = PerProcTable[0].ppe_vaddr->pf.pfMaxVAddr - maxAdrSpb;		/* Reduce address width by width of address space ID */
-	if(rwidth > maxeff) rwidth = maxeff;						/* If we still have more virtual than effective, clamp at effective */
-	
-	vm_max_address = 0xFFFFFFFFFFFFFFFFULL >> (64 - rwidth);		/* Get maximum effective address supported */
-	vm_max_physical = 0xFFFFFFFFFFFFFFFFULL >> (64 - PerProcTable[0].ppe_vaddr->pf.pfMaxPAddr);	/* Get maximum physical address supported */
-	
-	if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) {				/* Are we 64 bit? */
-		tmp = 12;												/* Size of hash space */
-	}
-	else {
-		__asm__ volatile("cntlzw %0, %1" : "=r" (tmp) : "r" (hash_table_size));	/* Get number of leading 0s */
-		tmp = 32 - tmp;											/* Size of hash space */
-	}
-
-	incrVSID = 1 << ((tmp + 1) >> 1);							/* Get ceiling of sqrt of table size */
-	incrVSID |= 1 << ((tmp + 1) >> 2);							/* Get ceiling of quadroot of table size */
-	incrVSID |= 1;												/* Set bit and add 1 */
-
-	return;
-
-}
-
-
-/*
- *		mapping_remove(pmap_t pmap, addr64_t va);
- *			Given a pmap and virtual address, this routine finds the mapping and unmaps it.
- *			The mapping block will be added to
- *			the free list.  If the free list threshold is reached, garbage collection will happen.
- *
- *			We also pass back the next higher mapped address. This is done so that the higher level
- *			pmap_remove function can release a range of addresses simply by calling mapping_remove
- *			in a loop until it finishes the range or is returned a vaddr of 0.
- *
- *			Note that if the mapping is not found, we return the next VA ORed with 1
- *
- */
-
-addr64_t mapping_remove(pmap_t pmap, addr64_t va) {		/* Remove a single mapping for this VADDR 
-														   Returns TRUE if a mapping was found to remove */
-
-	mapping_t	*mp;
-	addr64_t	nextva;
-	ppnum_t		pgaddr;
-	
-	va &= ~PAGE_MASK;									/* Scrub noise bits */
-	
-	do {												/* Keep trying until we truely fail */
-		mp = hw_rem_map(pmap, va, &nextva);				/* Remove a mapping from this pmap */
-	} while (mapRtRemove == ((unsigned int)mp & mapRetCode));
-	
-	switch ((unsigned int)mp & mapRetCode) {
-		case mapRtOK:
-			break;										/* Mapping removed */
-		case mapRtNotFnd:
-			return (nextva | 1);						/* Nothing found to unmap */
-		default:
-			panic("mapping_remove: hw_rem_map failed - pmap = %p, va = %016llX, code = %p\n",
-				pmap, va, mp);
-			break;
-	}
-
-	pgaddr = mp->mpPAddr;								/* Get page number from mapping */
-	
-	mapping_free(mp);									/* Add mapping to the free list */
-	
-	if ((pmap->pmapFlags & pmapVMhost) && pmap->pmapVmmExt) {
-														/* If this is an assisted host, scrub any guest mappings */
-		unsigned int  idx;
-		phys_entry_t *physent = mapping_phys_lookup(pgaddr, &idx);
-														/* Get physent for our physical page */
-		if (!physent) {									/* No physent, could be in I/O area, so exit */
-			return (nextva);
-		}
-		
-		do {											/* Iterate 'till all guest mappings are gone */
-			mp = hw_scrub_guest(physent, pmap);			/* Attempt to scrub a guest mapping */
-			switch ((unsigned int)mp & mapRetCode) {
-				case mapRtGuest:						/* Found a guest mapping */
-				case mapRtNotFnd:						/* Mapping was there, but disappeared, must retry */
-				case mapRtEmpty:						/* No guest mappings left to scrub */
-					break;
-				default:
-					panic("mapping_remove: hw_scrub_guest failed - physent = %p, code = %p\n",
-						physent, mp);					/* Cry havoc, cry wrack,
-															at least we die with harness on our backs */
-					break;
-			}
-		} while (mapRtEmpty != ((unsigned int)mp & mapRetCode));
-	}
-
-	return nextva;										/* Tell them we did it */
-}
-
-/*
- *		mapping_make(pmap, va, pa, flags, size, prot) - map a virtual address to a real one 
- *
- *		This routine takes the given parameters, builds a mapping block, and queues it into the 
- *		correct lists.
- *		
- *		pmap (virtual address)		is the pmap to map into
- *		va   (virtual address)		is the 64-bit virtual address that is being mapped
- *		pa	(physical page number)	is the physical page number (i.e., physcial address >> 12). This is
- *									a 32-bit quantity.
- *		Flags:
- *			block					if 1, mapping is a block, size parameter is used. Note: we do not keep 
- *									reference and change information or allow protection changes of blocks.
- *									any changes must first unmap and then remap the area.
- *			use attribute			Use specified attributes for map, not defaults for physical page
- *			perm					Mapping is permanent
- *			cache inhibited			Cache inhibited (used if use attribute or block set )
- *			guarded					Guarded access (used if use attribute or block set )
- *		size						size of block in pages - 1 (not used if not block)
- *		prot						VM protection bits
- *		attr						Cachability/Guardedness    
- *
- *		Returns 0 if mapping was successful.  Returns vaddr that overlaps/collides.
- *		Returns 1 for any other failure.
- *
- *		Note that we make an assumption that all memory in the range 0f 0x0000000080000000 to 0x00000000FFFFFFFF is reserved
- *		for I/O and default the cache attrubutes appropriately.  The caller is free to set whatever they want however.
- *
- *		If there is any physical page that is not found in the physent table, the mapping is forced to be a
- *		block mapping of length 1.  This keeps us from trying to update a physent during later mapping use,
- *		e.g., fault handling.
- *
- *
- */
- 
-addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int size, vm_prot_t prot) {	/* Make an address mapping */
-
-	register mapping_t *mp;
-	addr64_t colladdr, psmask;
-	unsigned int pindex, mflags, pattr, wimg, rc;
-	phys_entry_t *physent;
-	int nlists, pcf;
-	boolean_t disable_NX = FALSE;
-
-	pindex = 0;
-	
-	mflags = 0x01000000;										/* Start building mpFlags field (busy count = 1) */
-
-	pcf = (flags & mmFlgPcfg) >> 24;							/* Get the physical page config index */
-	if(!(pPcfg[pcf].pcfFlags)) {								/* Validate requested physical page configuration */
-		panic("mapping_make: invalid physical page configuration request - pmap = %p, va = %016llX, cfg = %d\n",
-			pmap, va, pcf);
-	}
-	
-	psmask = (1ULL << pPcfg[pcf].pcfPSize) - 1;					/* Mask to isolate any offset into a page */
-	if(va & psmask) {											/* Make sure we are page aligned on virtual */
-		panic("mapping_make: attempt to map unaligned vaddr - pmap = %p, va = %016llX, cfg = %d\n",
-			pmap, va, pcf);
-	}
-	if(((addr64_t)pa << 12) & psmask) {							/* Make sure we are page aligned on physical */
-		panic("mapping_make: attempt to map unaligned paddr - pmap = %p, pa = %08X, cfg = %d\n",
-			pmap, pa, pcf);
-	}
-	
-	mflags |= (pcf << (31-mpPcfgb));							/* Insert physical page configuration index */
-
-	if(!(flags & mmFlgBlock)) {									/* Is this a block map? */
-
-		size = 1;												/* Set size to 1 page if not block */
-	 
-		physent = mapping_phys_lookup(pa, &pindex);				/* Get physical entry */
-		if(!physent) {											/* Did we find the physical page? */
-			mflags |= mpBlock;									/* Force this to a block if no physent */
-			pattr = 0;											/* Assume normal, non-I/O memory */
-			if((pa & 0xFFF80000) == 0x00080000) pattr = mmFlgCInhib | mmFlgGuarded;	/* If this page is in I/O range, set I/O attributes */
-		}
-		else pattr = ((physent->ppLink & (ppI | ppG)) >> 60);	/* Get the default attributes from physent */
-		
-		if(flags & mmFlgUseAttr) pattr = flags & (mmFlgCInhib | mmFlgGuarded);	/* Use requested attributes */
-	}
-	else {														/* This is a block */
-		 
-		pattr = flags & (mmFlgCInhib | mmFlgGuarded);			/* Use requested attributes */
-		mflags |= mpBlock;										/* Show that this is a block */
-	
-		if(size > pmapSmallBlock) {								/* Is it one? */
-			if(size & 0x00001FFF) return mapRtBadSz;			/* Fail if bigger than 256MB and not a 32MB multiple */
-			size = size >> 13;									/* Convert to 32MB chunks */
-			mflags = mflags | mpBSu;							/* Show 32MB basic size unit */
-		}
-	}
-	
-	wimg = 0x2;													/* Set basic PPC wimg to 0b0010 - Coherent */
-	if(pattr & mmFlgCInhib) wimg |= 0x4;						/* Add cache inhibited if we need to */
-	if(pattr & mmFlgGuarded) wimg |= 0x1;						/* Add guarded if we need to */
-	
-	mflags = mflags | (pindex << 16);							/* Stick in the physical entry table index */
-	
-	if(flags & mmFlgPerm) mflags |= mpPerm;						/* Set permanent mapping */
-	
-	size = size - 1;											/* Change size to offset */
-	if(size > 0xFFFF) return mapRtBadSz;						/* Leave if size is too big */
-	
-	nlists = mapSetLists(pmap);									/* Set number of lists this will be on */
-	
-	mp = mapping_alloc(nlists);									/* Get a spare mapping block with this many lists */
-
-                                                                /* the mapping is zero except that the mpLists field is set */
-	mp->mpFlags |= mflags;										/* Add in the rest of the flags to mpLists */
-	mp->mpSpace = pmap->space;									/* Set the address space/pmap lookup ID */
-	mp->u.mpBSize = size;										/* Set the size */
-	mp->mpPte = 0;												/* Set the PTE invalid */
-	mp->mpPAddr = pa;											/* Set the physical page number */
-
-	if ( !nx_enabled || (pmap->pmapFlags & pmapNXdisabled) )
-	        disable_NX = TRUE;
-
-	mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot, disable_NX);			/* Add the protection and attributes to the field */
-	  
-	while(1) {													/* Keep trying... */
-		colladdr = hw_add_map(pmap, mp);						/* Go add the mapping to the pmap */
-		rc = colladdr & mapRetCode;								/* Separate return code */
-		colladdr &= ~mapRetCode;								/* Clean up collision effective address */
-		
-		switch (rc) {
-			case mapRtOK:
-				return mapRtOK;									/* Mapping added successfully */
-				
-			case mapRtRemove:									/* Remove in progress */
-				(void)mapping_remove(pmap, colladdr);			/* Lend a helping hand to another CPU doing block removal */
-				continue;										/* Retry mapping add */
-				
-			case mapRtMapDup:									/* Identical mapping already present */
-				mapping_free(mp);								/* Free duplicate mapping */
-				return mapRtOK;										/* Return success */
-				
-			case mapRtSmash:									/* Mapping already present but does not match new mapping */
-				mapping_free(mp);								/* Free duplicate mapping */
-				return (colladdr | mapRtSmash);					/* Return colliding address, with some dirt added to avoid
-																   confusion if effective address is 0 */
-			default:
-				panic("mapping_make: hw_add_map failed - collision addr = %016llX, code = %02X, pmap = %p, va = %016llX, mapping = %p\n",
-					colladdr, rc, pmap, va, mp);				/* Die dead */
-		}
-		
-	}
-	
-	return 1;													/* Unreachable, but pleases compiler */
-}
-
-
-/*
- *		mapping *mapping_find(pmap, va, *nextva, full) - Finds a mapping 
- *
- *		Looks up the vaddr and returns the mapping and the next mapped va
- *		If full is true, it will descend through all nested pmaps to find actual mapping
- *
- *		Must be called with interruptions disabled or we can hang trying to remove found mapping.
- *
- *		Returns 0 if not found and the virtual address of the mapping if it is
- *		Note that the mappings busy count is bumped. It is the responsibility of the caller
- *		to drop the count.  If this is not done, any attempt to remove the mapping will hang.
- *
- *		NOTE: The nextva field is not valid when full is TRUE.
- *
- *
- */
- 
-mapping_t *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) {	/* Make an address mapping */
-
-	register mapping_t *mp;
-	addr64_t	curva;
-	pmap_t	curpmap;
-	int	nestdepth;
-
-	curpmap = pmap;												/* Remember entry */
-	nestdepth = 0;												/* Set nest depth */
-	curva = (addr64_t)va;										/* Set current va */
-
-	while(1) {
-
-		mp = hw_find_map(curpmap, curva, nextva);				/* Find the mapping for this address */
-		if((unsigned int)mp == mapRtBadLk) {					/* Did we lock up ok? */
-			panic("mapping_find: pmap lock failure - rc = %p, pmap = %p\n", mp, curpmap);	/* Die... */
-		}
-		
-		if(!mp || ((mp->mpFlags & mpType) < mpMinSpecial) || !full) break;		/* Are we done looking? */
-
-		if((mp->mpFlags & mpType) != mpNest) {					/* Don't chain through anything other than a nested pmap */
-			mapping_drop_busy(mp);								/* We have everything we need from the mapping */
-			mp = NULL;												/* Set not found */
-			break;
-		}
-
-		if(nestdepth++ > 64) {									/* Have we nested too far down? */
-			panic("mapping_find: too many nested pmaps - va = %016llX, curva = %016llX, pmap = %p, curpmap = %p\n",
-				va, curva, pmap, curpmap);
-		}
-		
-		curva = curva + mp->mpNestReloc;						/* Relocate va to new pmap */
-		curpmap = (pmap_t) pmapTrans[mp->mpSpace].pmapVAddr;	/* Get the address of the nested pmap */
-		mapping_drop_busy(mp);									/* We have everything we need from the mapping */
-		
-	}
-
-	return mp;													/* Return the mapping if we found one */
-}
-
-/*
- *		void mapping_protect(pmap_t pmap, addt_t va, vm_prot_t prot, addr64_t *nextva) - change the protection of a virtual page
- *
- *		This routine takes a pmap and virtual address and changes
- *		the protection.  If there are PTEs associated with the mappings, they will be invalidated before
- *		the protection is changed. 
- *
- *		We return success if we change the protection or if there is no page mapped at va.  We return failure if
- *		the va corresponds to a block mapped area or the mapping is permanant.
- *
- *
- */
-
-void
-mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) {	/* Change protection of a virtual page */
-
-	int	ret;
-	boolean_t disable_NX = FALSE;
-
-	if ( !nx_enabled || (pmap->pmapFlags & pmapNXdisabled) )
-	        disable_NX = TRUE;
-
-	ret = hw_protect(pmap, va, getProtPPC(prot, disable_NX), nextva);		/* Try to change the protect here */
-
-	switch (ret) {								/* Decode return code */
-	
-		case mapRtOK:							/* Changed */
-		case mapRtNotFnd:						/* Didn't find it */
-		case mapRtBlock:						/* Block map, just ignore request */
-		case mapRtNest:							/* Nested pmap, just ignore request */
-			break;
-			
-		default:
-			panic("mapping_protect: hw_protect failed - rc = %d, pmap = %p, va = %016llX\n", ret, pmap, va);
-		
-	}
-
-}
-
-/*
- *		void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) - change the protection of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and changes
- *		the protection.  If there are PTEs associated with the mappings, they will be invalidated before
- *		the protection is changed.  There is no limitation on changes, e.g., higher to lower, lower to
- *		higher; however, changes to execute protection are ignored.
- *
- *		Any mapping that is marked permanent is not changed
- *
- *		Phys_entry is unlocked.
- */
-
-void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) {	/* Change protection of all mappings to page */
-	
-	unsigned int pindex;
-	phys_entry_t *physent;
-
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_protect_phys: invalid physical page %08X\n", pa);
-	}
-
-	hw_walk_phys(physent, hwpNoop, hwpSPrtMap, hwpNoop,
-		     getProtPPC(prot, FALSE), hwpPurgePTE);			/* Set the new protection for page and mappings */
-
-	return;									/* Leave... */
-}
-
-
-/*
- *		void mapping_clr_mod(ppnum_t pa) - clears the change bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and turns
- *		off the change bit. 
- */
-
-void mapping_clr_mod(ppnum_t pa) {								/* Clears the change bit of a physical page */
-
-	unsigned int pindex;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_clr_mod: invalid physical page %08X\n", pa);
-	}
-
-	hw_walk_phys(physent, hwpNoop, hwpCCngMap, hwpCCngPhy,
-				 0, hwpPurgePTE);								/* Clear change for page and mappings */
-	return;														/* Leave... */
-}
-
-
-/*
- *		void mapping_set_mod(ppnum_t pa) - set the change bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and turns
- *		on the change bit.  
- */
-
-void mapping_set_mod(ppnum_t pa) {								/* Sets the change bit of a physical page */
-
-	unsigned int pindex;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_set_mod: invalid physical page %08X\n", pa);
-	}
-
-	hw_walk_phys(physent, hwpNoop, hwpSCngMap, hwpSCngPhy,
-				 0, hwpNoopPTE);								/* Set change for page and mappings */
-	return;														/* Leave... */
-}
-
-
-/*
- *		void mapping_clr_ref(ppnum_t pa) - clears the reference bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and turns
- *		off the reference bit.  
- */
-
-void mapping_clr_ref(ppnum_t pa) {								/* Clears the reference bit of a physical page */
-
-	unsigned int pindex;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_clr_ref: invalid physical page %08X\n", pa);
-	}
-
-	hw_walk_phys(physent, hwpNoop, hwpCRefMap, hwpCRefPhy,
-				 0, hwpPurgePTE);								/* Clear reference for page and mappings */
-	return;														/* Leave... */
-}
-
-
-/*
- *		void mapping_set_ref(ppnum_t pa) - set the reference bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and turns
- *		on the reference bit. 
- */
-
-void mapping_set_ref(ppnum_t pa) {								/* Sets the reference bit of a physical page */
-
-	unsigned int pindex;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_set_ref: invalid physical page %08X\n", pa);
-	}
-
-	hw_walk_phys(physent, hwpNoop, hwpSRefMap, hwpSRefPhy,
-				 0, hwpNoopPTE);								/* Set reference for page and mappings */
-	return;														/* Leave... */
-}
-
-
-/*
- *		boolean_t mapping_tst_mod(ppnum_t pa) - test the change bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and tests
- *		the changed bit. 
- */
-
-boolean_t mapping_tst_mod(ppnum_t pa) {							/* Tests the change bit of a physical page */
-
-	unsigned int pindex, rc;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_tst_mod: invalid physical page %08X\n", pa);
-	}
-
-	rc = hw_walk_phys(physent, hwpTCngPhy, hwpTCngMap, hwpNoop,
-					  0, hwpMergePTE);							/* Set change for page and mappings */
-	return ((rc & (unsigned long)ppC) != 0);					/* Leave with change bit */
-}
-
-
-/*
- *		boolean_t mapping_tst_ref(ppnum_t pa) - tests the reference bit of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and tests
- *		the reference bit. 
- */
-
-boolean_t mapping_tst_ref(ppnum_t pa) {							/* Tests the reference bit of a physical page */
-
-	unsigned int pindex, rc;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_tst_ref: invalid physical page %08X\n", pa);
-	}
-
-	rc = hw_walk_phys(physent, hwpTRefPhy, hwpTRefMap, hwpNoop,
-	                  0, hwpMergePTE);							/* Test reference for page and mappings */
-	return ((rc & (unsigned long)ppR) != 0);					/* Leave with reference bit */
-}
-
-
-/*
- *		unsigned int mapping_tst_refmod(ppnum_t pa) - tests the reference and change bits of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and tests
- *		their reference and changed bits. 
- */
-
-unsigned int mapping_tst_refmod(ppnum_t pa) {					/* Tests the reference and change bits of a physical page */
-	
-	unsigned int  pindex, rc;
-	phys_entry_t *physent;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if (!physent) {												/* Did we find the physical page? */
-		panic("mapping_tst_refmod: invalid physical page %08X\n", pa);
-	}
-
-	rc = hw_walk_phys(physent, hwpTRefCngPhy, hwpTRefCngMap, hwpNoop,
-					  0, hwpMergePTE);							/* Test reference and change bits in page and mappings */
-	return (((rc & ppC)? VM_MEM_MODIFIED : 0) | ((rc & ppR)? VM_MEM_REFERENCED : 0));
-																/* Convert bits to generic format and return */
-	
-}
-
-
-/*
- *		void mapping_clr_refmod(ppnum_t pa, unsigned int mask) - clears the reference and change bits specified
- *        by mask of a physical page
- *
- *		This routine takes a physical entry and runs through all mappings attached to it and turns
- *		off all the reference and change bits.  
- */
-
-void mapping_clr_refmod(ppnum_t pa, unsigned int mask) {		/* Clears the reference and change bits of a physical page */
-
-	unsigned int  pindex;
-	phys_entry_t *physent;
-	unsigned int  ppcMask;
-	
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_clr_refmod: invalid physical page %08X\n", pa);
-	}
-
-	ppcMask = (((mask & VM_MEM_MODIFIED)? ppC : 0) | ((mask & VM_MEM_REFERENCED)? ppR : 0));
-																/* Convert mask bits to PPC-specific format */
-	hw_walk_phys(physent, hwpNoop, hwpCRefCngMap, hwpCRefCngPhy,
-	             ppcMask, hwpPurgePTE);							/* Clear reference and change bits for page and mappings */
-	return;														/* Leave... */
-}
-
-
-
-/*
- *		phys_ent  *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) - tests the reference bit of a physical page
- *
- *		This routine takes a physical page number and returns the phys_entry associated with it.  It also
- *		calculates the bank address associated with the entry
- *		the reference bit. 
- */
-
-phys_entry_t *
-mapping_phys_lookup(ppnum_t pp, unsigned int *pindex)
-{	/* Finds the physical entry for the page */
-	unsigned int i;
-	
-	for(i = 0; i < pmap_mem_regions_count; i++) {				/* Walk through the list */
-		if(!(unsigned int)pmap_mem_regions[i].mrPhysTab) continue;	/* Skip any empty lists */
-		if((pp < pmap_mem_regions[i].mrStart) || (pp > pmap_mem_regions[i].mrEnd)) continue;	/* This isn't ours */
-		
-		*pindex = (i * sizeof(mem_region_t)) / 4;				/* Make the word index to this list */
-		
-		return &pmap_mem_regions[i].mrPhysTab[pp - pmap_mem_regions[i].mrStart];	/* Return the physent pointer */
-	}
-	
-	return (phys_entry_t *)0;										/* Shucks, can't find it... */
-	
-}
-
-boolean_t
-pmap_valid_page(ppnum_t pn) {
-	unsigned int tmp;
-
-	return (mapping_phys_lookup(pn, &tmp) != 0);
-}
-
-
-/*
- *		mapping_adjust(void) - Releases free mapping blocks and/or allocates new ones 
- *
- *		This routine frees any mapping blocks queued to mapCtl.mapcrel. It also checks
- *		the number of free mappings remaining, and if below a threshold, replenishes them.
- *		The list will be replenshed from mapCtl.mapcrel if there are enough.  Otherwise,
- *		a new one is allocated.
- *
- *		This routine allocates and/or frees memory and must be called from a safe place. 
- *		Currently, vm_pageout_scan is the safest place. 
- */
-
-thread_call_t				mapping_adjust_call;
-static thread_call_data_t	mapping_adjust_call_data;
-
-void mapping_adjust(void) {										/* Adjust free mappings */
-
-	kern_return_t	retr = KERN_SUCCESS;
-	mappingblok_t	*mb, *mbn;
-	spl_t			s;
-	int				allocsize;
-
-	if(mapCtl.mapcmin <= MAPPERBLOK) {
-		mapCtl.mapcmin = (sane_size / PAGE_SIZE) / 16;
-
-#if DEBUG
-		kprintf("mapping_adjust: minimum entries rqrd = %08X\n", mapCtl.mapcmin);
-		kprintf("mapping_adjust: free = %08X; in use = %08X; release = %08X\n",
-		  mapCtl.mapcfree, mapCtl.mapcinuse, mapCtl.mapcreln);
-#endif
-	}
-
-	s = splhigh();												/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-		panic("mapping_adjust - timeout getting control lock (1)\n");	/* Tell all and die */
-	}
-	
-	if (mapping_adjust_call == NULL) {
-		thread_call_setup(&mapping_adjust_call_data, 
-		                  (thread_call_func_t)mapping_adjust, 
-		                  (thread_call_param_t)NULL);
-		mapping_adjust_call = &mapping_adjust_call_data;
-	}
-
-	while(1) {													/* Keep going until we've got enough */
-		
-		allocsize = mapCtl.mapcmin - mapCtl.mapcfree;			/* Figure out how much we need */
-		if(allocsize < 1) break;								/* Leave if we have all we need */
-		
-		if((unsigned int)(mbn = mapCtl.mapcrel)) {				/* Can we rescue a free one? */
-			mapCtl.mapcrel = mbn->nextblok;						/* Dequeue it */
-			mapCtl.mapcreln--;									/* Back off the count */
-			allocsize = MAPPERBLOK;								/* Show we allocated one block */			
-		}
-        else {													/* No free ones, try to get it */
-			
-			allocsize = (allocsize + MAPPERBLOK - 1) / MAPPERBLOK;	/* Get the number of pages we need */
-			
-			hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);		/* Unlock our stuff */
-			splx(s);											/* Restore 'rupts */
-
-			for(; allocsize > 0; allocsize >>= 1) {				/* Try allocating in descending halves */ 
-				retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE * allocsize);	/* Find a virtual address to use */
-				if((retr != KERN_SUCCESS) && (allocsize == 1)) {	/* Did we find any memory at all? */
-					break;
-				}
-				if(retr == KERN_SUCCESS) break;					/* We got some memory, bail out... */
-			}
-		
-			allocsize = allocsize * MAPPERBLOK;					/* Convert pages to number of maps allocated */
-			s = splhigh();										/* Don't bother from now on */
-			if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-				panic("mapping_adjust - timeout getting control lock (2)\n");	/* Tell all and die */
-			}
-		}
-
-		if (retr != KERN_SUCCESS)
-			break;												/* Fail to alocate, bail out... */
-		for(; allocsize > 0; allocsize -= MAPPERBLOK) {			/* Release one block at a time */
-			mapping_free_init((vm_offset_t)mbn, 0, 1);			/* Initialize a non-permanent block */
-			mbn = (mappingblok_t *)((unsigned int)mbn + PAGE_SIZE);	/* Point to the next slot */
-		}
-
-		if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
-		        mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
-	}
-
-	if(mapCtl.mapcholdoff) {									/* Should we hold off this release? */
-		mapCtl.mapcrecurse = 0;									/* We are done now */
-		hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);			/* Unlock our stuff */
-		splx(s);												/* Restore 'rupts */
-		return;													/* Return... */
-	}
-
-	mbn = mapCtl.mapcrel;										/* Get first pending release block */
-	mapCtl.mapcrel = NULL;											/* Dequeue them */
-	mapCtl.mapcreln = 0;										/* Set count to 0 */
-
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);				/* Unlock our stuff */
-	splx(s);													/* Restore 'rupts */
-
-	while((unsigned int)mbn) {									/* Toss 'em all */
-		mb = mbn->nextblok;										/* Get the next */
-		
-		kmem_free(mapping_map, (vm_offset_t) mbn, PAGE_SIZE);	/* Release this mapping block */
-	
-		mbn = mb;												/* Chain to the next */
-	}
-
-	__asm__ volatile("eieio");									/* Make sure all is well */
-	mapCtl.mapcrecurse = 0;										/* We are done now */
-	return;
-}
-
-/*
- *		mapping_free(mapping *mp) - release a mapping to the free list 
- *
- *		This routine takes a mapping and adds it to the free list.
- *		If this mapping make the block non-empty, we queue it to the free block list.
- *		NOTE: we might want to queue it to the end to keep quelch the pathalogical
- *		case when we get a mapping and free it repeatedly causing the block to chain and unchain.
- *		If this release fills a block and we are above the threshold, we release the block
- */
-
-void mapping_free(struct mapping *mp) {							/* Release a mapping */
-
-	mappingblok_t	*mb, *mbn;
-	spl_t			s;
-	unsigned int	full, mindx, lists;
-
-	mindx = ((unsigned int)mp & (PAGE_SIZE - 1)) >> 6;			/* Get index to mapping */
-	mb = (mappingblok_t *)((unsigned int)mp & -PAGE_SIZE);		/* Point to the mapping block */
-    lists = (mp->mpFlags & mpLists);							/* get #lists */
-    if ((lists == 0) || (lists > kSkipListMaxLists)) 			/* panic if out of range */
-        panic("mapping_free: mpLists invalid\n");
-
-#if 0
-	mp->mpFlags = 0x99999999;									/* (BRINGUP) */	
-	mp->mpSpace = 0x9999;										/* (BRINGUP) */	
-	mp->u.mpBSize = 0x9999;										/* (BRINGUP) */	
-	mp->mpPte   = 0x99999998;									/* (BRINGUP) */	
-	mp->mpPAddr = 0x99999999;									/* (BRINGUP) */	
-	mp->mpVAddr = 0x9999999999999999ULL;						/* (BRINGUP) */	
-	mp->mpAlias = 0x9999999999999999ULL;						/* (BRINGUP) */	
-	mp->mpList0 = 0x9999999999999999ULL;						/* (BRINGUP) */	
-	mp->mpList[0] = 0x9999999999999999ULL;						/* (BRINGUP) */	
-	mp->mpList[1] = 0x9999999999999999ULL;						/* (BRINGUP) */	
-	mp->mpList[2] = 0x9999999999999999ULL;						/* (BRINGUP) */	
-
-	if(lists > mpBasicLists) {									/* (BRINGUP) */	
-		mp->mpList[3] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[4] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[5] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[6] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[7] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[8] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[9] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-		mp->mpList[10] = 0x9999999999999999ULL;					/* (BRINGUP) */	
-	}
-#endif	
-	
-
-	s = splhigh();												/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-		panic("mapping_free - timeout getting control lock\n");	/* Tell all and die */
-	}
-	
-	full = !(mb->mapblokfree[0] | mb->mapblokfree[1]);			/* See if full now */ 
-	mb->mapblokfree[mindx >> 5] |= (0x80000000 >> (mindx & 31));	/* Flip on the free bit */
-    if ( lists > mpBasicLists ) {								/* if big block, lite the 2nd bit too */
-        mindx++;
-        mb->mapblokfree[mindx >> 5] |= (0x80000000 >> (mindx & 31));
-        mapCtl.mapcfree++;
-        mapCtl.mapcinuse--;
-    }
-	
-	if(full) {													/* If it was full before this: */
-		mb->nextblok = mapCtl.mapcnext;							/* Move head of list to us */
-		mapCtl.mapcnext = mb;									/* Chain us to the head of the list */
-		if(!((unsigned int)mapCtl.mapclast))
-			mapCtl.mapclast = mb;
-	}
-
-	mapCtl.mapcfree++;											/* Bump free count */
-	mapCtl.mapcinuse--;											/* Decriment in use count */
-	
-	mapCtl.mapcfreec++;											/* Count total calls */
-
-	if(mapCtl.mapcfree > mapCtl.mapcmin) {						/* Should we consider releasing this? */
-		if(((mb->mapblokfree[0] | 0x80000000) & mb->mapblokfree[1]) == 0xFFFFFFFF) {	/* See if empty now */ 
-
-			if(mapCtl.mapcnext == mb) {							/* Are we first on the list? */
-				mapCtl.mapcnext = mb->nextblok;					/* Unchain us */
-				if(!((unsigned int)mapCtl.mapcnext)) mapCtl.mapclast = NULL;	/* If last, remove last */
-			}
-			else {												/* We're not first */
-				for(mbn = mapCtl.mapcnext; mbn != 0; mbn = mbn->nextblok) {	/* Search for our block */
-					if(mbn->nextblok == mb) break;				/* Is the next one our's? */
-				}
-				if(!mbn) panic("mapping_free: attempt to release mapping block (%p) not on list\n", mp);
-				mbn->nextblok = mb->nextblok;					/* Dequeue us */
-				if(mapCtl.mapclast == mb) mapCtl.mapclast = mbn;	/* If last, make our predecessor last */
-			}
-			
-			if(mb->mapblokflags & mbPerm) {						/* Is this permanently assigned? */
-				mb->nextblok = mapCtl.mapcnext;					/* Move chain head to us */
-				mapCtl.mapcnext = mb;							/* Chain us to the head */
-				if(!((unsigned int)mb->nextblok)) mapCtl.mapclast = mb;	/* If last, make us so */
-			}
-			else {
-				mapCtl.mapcfree -= MAPPERBLOK;					/* Remove the block from the free count */
-				mapCtl.mapcreln++;								/* Count on release list */
-				mb->nextblok = mapCtl.mapcrel;					/* Move pointer */
-				mapCtl.mapcrel = mb;							/* Chain us in front */
-			}
-		}
-	}
-
-	if(mapCtl.mapcreln > MAPFRTHRSH) {							/* Do we have way too many releasable mappings? */
-		if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) {	/* Make sure we aren't recursing */
-			thread_call_enter(mapping_adjust_call);				/* Go toss some */
-		}
-	}
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);				/* Unlock our stuff */
-	splx(s);													/* Restore 'rupts */
-
-	return;														/* Bye, dude... */
-}
-
-
-/*
- *		mapping_alloc(lists) - obtain a mapping from the free list 
- *
- *		This routine takes a mapping off of the free list and returns its address.
- *		The mapping is zeroed, and its mpLists count is set.  The caller passes in
- *		the number of skiplists it would prefer; if this number is greater than 
- *		mpBasicLists (ie, 4) then we need to allocate a 128-byte mapping, which is
- *		just two consequtive free entries coallesced into one.  If we cannot find
- *		two consequtive free entries, we clamp the list count down to mpBasicLists
- *		and return a basic 64-byte node.  Our caller never knows the difference.
- *
- *		If this allocation empties a block, we remove it from the free list.
- *		If this allocation drops the total number of free entries below a threshold,
- *		we allocate a new block.
- *
- */
-decl_simple_lock_data(extern,free_pmap_lock)
-
-mapping_t *
-mapping_alloc(int lists) {								/* Obtain a mapping */
-
-	register mapping_t *mp;
-	mappingblok_t	*mb, *mbn;
-	spl_t			s;
-	int				mindx;
-    int				big = (lists > mpBasicLists);				/* set flag if big block req'd */
-	pmap_t			refpmap, ckpmap;
-	unsigned int	space, i;
-	addr64_t		va, nextva;
-	boolean_t		found_mapping;
-	boolean_t		do_rescan;
-    
-	s = splhigh();												/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-		panic("mapping_alloc - timeout getting control lock\n");	/* Tell all and die */
-	}
-
-	if(!((unsigned int)mapCtl.mapcnext)) {						/* Are there any free mappings? */
-	
-/*
- *		No free mappings.  First, there may be some mapping blocks on the "to be released"
- *		list.  If so, rescue one.  Otherwise, try to steal a couple blocks worth.
- */
-
-		if((mbn = mapCtl.mapcrel) != 0) {						/* Try to rescue a block from impending doom */
-			mapCtl.mapcrel = mbn->nextblok;						/* Pop the queue */
-			mapCtl.mapcreln--;									/* Back off the count */
-			mapping_free_init((vm_offset_t)mbn, 0, 1);			/* Initialize a non-permanent block */
-			goto rescued;
-		}
-
-		hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);
-
-		simple_lock(&free_pmap_lock);
-
-		if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-			panic("mapping_alloc - timeout getting control lock\n");	/* Tell all and die */
-		}
-
-		if (!((unsigned int)mapCtl.mapcnext)) {
-
-			refpmap = (pmap_t)cursor_pmap->pmap_link.next;
-			space = mapCtl.mapcflush.spacenum;
-			while (refpmap != cursor_pmap) {
-				if(((pmap_t)(refpmap->pmap_link.next))->spaceNum > space) break;
-				refpmap = (pmap_t)refpmap->pmap_link.next;
-			}
-
-			ckpmap = refpmap;
-			va = mapCtl.mapcflush.addr;
-			found_mapping = FALSE;
-
-			while (mapCtl.mapcfree <= (MAPPERBLOK*2)) {
-
-				hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);
-
-				ckpmap = (pmap_t)ckpmap->pmap_link.next;
-
-				/* We don't steal mappings from the kernel pmap, a VMM host pmap, or a VMM guest pmap with guest
-				   shadow assist active.
-				 */
-				if ((ckpmap->stats.resident_count != 0) && (ckpmap != kernel_pmap)
-														&& !(ckpmap->pmapFlags & (pmapVMgsaa|pmapVMhost))) {
-					do_rescan = TRUE;
-					for (i=0;i<8;i++) {
-						mp = hw_purge_map(ckpmap, va, &nextva);
-
-						switch ((unsigned int)mp & mapRetCode) {
-							case mapRtOK:
-								mapping_free(mp);
-								found_mapping = TRUE;
-								break;
-							case mapRtNotFnd:
-								break;
-							default:
-								panic("mapping_alloc: hw_purge_map failed - pmap = %p, va = %16llX, code = %p\n", ckpmap, va, mp);
-								break;
-						}
-
-						if (mapRtNotFnd == ((unsigned int)mp & mapRetCode)) {
-							if (do_rescan)
-								do_rescan = FALSE;
-							else
-								break;
-						}
-
-						va = nextva;
-					}
-				}
-
-				if (ckpmap == refpmap) {
-					if (found_mapping == FALSE)
-						panic("no valid pmap to purge mappings\n");
-					else
-						found_mapping = FALSE;
-				}
-
-				if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-					panic("mapping_alloc - timeout getting control lock\n");	/* Tell all and die */
-				}
-
-			}
-
-			mapCtl.mapcflush.spacenum = ckpmap->spaceNum;
-			mapCtl.mapcflush.addr = nextva;
-		}
-
-		simple_unlock(&free_pmap_lock);
-	}
-
-rescued:
-
-	mb = mapCtl.mapcnext;
-    
-    if ( big ) {												/* if we need a big (128-byte) mapping */
-        mapCtl.mapcbig++;										/* count attempts to allocate a big mapping */
-        mbn = NULL;												/* this will be prev ptr */
-        mindx = 0;
-        while( mb ) {											/* loop over mapping blocks with free entries */
-            mindx = mapalc2(mb);								/* try for 2 consequtive free bits in this block */
-
-           if ( mindx )	break;									/* exit loop if we found them */
-            mbn = mb;											/* remember previous block */
-            mb = mb->nextblok;									/* move on to next block */
-        }
-        if ( mindx == 0 ) {										/* if we couldn't find 2 consequtive bits... */
-            mapCtl.mapcbigfails++;								/* count failures */
-            big = 0;											/* forget that we needed a big mapping */
-            lists = mpBasicLists;								/* clamp list count down to the max in a 64-byte mapping */
-            mb = mapCtl.mapcnext;								/* back to the first block with a free entry */
-        }
-        else {													/* if we did find a big mapping */
-            mapCtl.mapcfree--;									/* Decrement free count twice */
-            mapCtl.mapcinuse++;									/* Bump in use count twice */
-            if ( mindx < 0 ) {									/* if we just used the last 2 free bits in this block */
-                if (mbn) {										/* if this wasn't the first block */
-                    mindx = -mindx;								/* make positive */
-                    mbn->nextblok = mb->nextblok;				/* unlink this one from the middle of block list */
-                    if (mb ==  mapCtl.mapclast)	{				/* if we emptied last block */
-                        mapCtl.mapclast = mbn;					/* then prev block is now last */
-                    }
-                }
-            }
-        }
-    }
-    
-    if ( !big ) {												/* if we need a small (64-byte) mapping */
-        if(!(mindx = mapalc1(mb))) 								/* Allocate a 1-bit slot */
-            panic("mapping_alloc - empty mapping block detected at %p\n", mb);
-    }
-	
-	if(mindx < 0) {												/* Did we just take the last one */
-		mindx = -mindx;											/* Make positive */
-		mapCtl.mapcnext = mb->nextblok;							/* Remove us from the list */
-		if(!((unsigned int)mapCtl.mapcnext)) mapCtl.mapclast = NULL;	/* Removed the last one */
-	}
-	
-	mapCtl.mapcfree--;											/* Decrement free count */
-	mapCtl.mapcinuse++;											/* Bump in use count */
-	
-	mapCtl.mapcallocc++;										/* Count total calls */
-
-/*
- *	Note: in the following code, we will attempt to rescue blocks only one at a time.
- *	Eventually, after a few more mapping_alloc calls, we will catch up.  If there are none
- *	rescueable, we will kick the misc scan who will allocate some for us.  We only do this
- *	if we haven't already done it.
- *	For early boot, we are set up to only rescue one block at a time.  This is because we prime
- *	the release list with as much as we need until threads start.
- */
-
-	if(mapCtl.mapcfree < mapCtl.mapcmin) {						/* See if we need to replenish */
-		if((mbn = mapCtl.mapcrel) != 0) {						/* Try to rescue a block from impending doom */
-			mapCtl.mapcrel = mbn->nextblok;						/* Pop the queue */
-			mapCtl.mapcreln--;									/* Back off the count */
-			mapping_free_init((vm_offset_t)mbn, 0, 1);			/* Initialize a non-permanent block */
-		}
-		else {													/* We need to replenish */
-		        if (mapCtl.mapcfree < (mapCtl.mapcmin / 4)) {
-			        if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) {	/* Make sure we aren't recursing */
-				        thread_call_enter(mapping_adjust_call);			/* Go allocate some more */
-				}
-			}
-		}
-	}
-
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);				/* Unlock our stuff */
-	splx(s);													/* Restore 'rupts */
-	
-	mp = &((mapping_t *)mb)[mindx];								/* Point to the allocated mapping */
-    mp->mpFlags = lists;										/* set the list count */
-
-
-	return mp;													/* Send it back... */
-}
-
-
-void
-consider_mapping_adjust(void)
-{
-	spl_t			s;
-
-	s = splhigh();												/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {	/* Lock the control header */ 
-		panic("consider_mapping_adjust -- lock timeout\n");
-	}
-
-        if (mapCtl.mapcfree < (mapCtl.mapcmin / 4)) {
-	        if(hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) {	/* Make sure we aren't recursing */
-		        thread_call_enter(mapping_adjust_call);			/* Go allocate some more */
-		}
-	}
-
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);				/* Unlock our stuff */
-	splx(s);													/* Restore 'rupts */
-	
-}
-
-
-
-/*
- *		void mapping_free_init(mb, perm) - Adds a block of storage to the free mapping list
- *
- *		The mapping block is a page size area on a page boundary.  It contains 1 header and 63
- *		mappings.  This call adds and initializes a block for use.  Mappings come in two sizes,
- *		64 and 128 bytes (the only difference is the number of skip-lists.)  When we allocate a
- *		128-byte mapping we just look for two consequtive free 64-byte mappings, so most of the
- *		code only deals with "basic" 64-byte mappings.  This works for two reasons:
- *			- Only one in 256 mappings is big, so they are rare.
- *			- If we cannot find two consequtive free mappings, we just return a small one.
- *			  There is no problem with doing this, except a minor performance degredation.
- *		Therefore, all counts etc in the mapping control structure are in units of small blocks.
- *	
- *		The header contains a chain link, bit maps, a virtual to real translation mask, and
- *		some statistics. Bit maps map each slot on the page (bit 0 is not used because it 
- *		corresponds to the header).  The translation mask is the XOR of the virtual and real
- *		addresses (needless to say, the block must be wired).
- *
- *		We handle these mappings the same way as saveareas: the block is only on the chain so
- *		long as there are free entries in it.
- *
- *		Empty blocks are garbage collected when there are at least mapCtl.mapcmin pages worth of free 
- *		mappings. Blocks marked PERM won't ever be released.
- *
- *		If perm is negative, the mapping is initialized, but immediately queued to the mapCtl.mapcrel
- *		list.  We do this only at start up time. This is done because we only allocate blocks 
- *		in the pageout scan and it doesn't start up until after we run out of the initial mappings.
- *		Therefore, we need to preallocate a bunch, but we don't want them to be permanent.  If we put
- *		them on the release queue, the allocate routine will rescue them.  Then when the
- *		pageout scan starts, all extra ones will be released.
- *
- */
-
-
-void mapping_free_init(vm_offset_t mbl, int perm, boolean_t locked) {		
-															/* Set's start and end of a block of mappings
-															   perm indicates if the block can be released 
-															   or goes straight to the release queue .
-															   locked indicates if the lock is held already */
-														   
-	mappingblok_t	*mb;
-	spl_t		s;
-	addr64_t	raddr;
-	ppnum_t		pp;
-
-	mb = (mappingblok_t *)mbl;								/* Start of area */	
-	
-	if(perm >= 0) {											/* See if we need to initialize the block */
-		if(perm) {
-			raddr = (addr64_t)((unsigned int)mbl);			/* Perm means V=R */
-			mb->mapblokflags = mbPerm;						/* Set perm */
-//			mb->mapblokflags |= (unsigned int)mb;			/* (BRINGUP) */
-		}
-		else {
-			pp = pmap_find_phys(kernel_pmap, (addr64_t)mbl);	/* Get the physical page */
-			if(!pp) {										/* What gives?  Where's the page? */
-				panic("mapping_free_init: could not find translation for vaddr %016llX\n", (addr64_t)mbl);
-			}
-			
-			raddr = (addr64_t)pp << 12;						/* Convert physical page to physical address */
-			mb->mapblokflags = 0;							/* Set not perm */
-//			mb->mapblokflags |= (unsigned int)mb;			/* (BRINGUP) */
-		}
-		
-		mb->mapblokvrswap = raddr ^ (addr64_t)((unsigned int)mbl);		/* Form translation mask */
-		
-		mb->mapblokfree[0] = 0x7FFFFFFF;					/* Set first 32 (minus 1) free */
-		mb->mapblokfree[1] = 0xFFFFFFFF;					/* Set next 32 free */
-	}
-	
-	s = splhigh();											/* Don't bother from now on */
-	if(!locked) {											/* Do we need the lock? */
-		if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {		/* Lock the control header */ 
-			panic("mapping_free_init: timeout getting control lock\n");	/* Tell all and die */
-		}
-	}
-	
-	if(perm < 0) {											/* Direct to release queue? */
-		mb->nextblok = mapCtl.mapcrel;						/* Move forward pointer */
-		mapCtl.mapcrel = mb;								/* Queue us on in */
-		mapCtl.mapcreln++;									/* Count the free block */
-	}
-	else {													/* Add to the free list */
-		
-		mb->nextblok = NULL;									/* We always add to the end */
-		mapCtl.mapcfree += MAPPERBLOK;						/* Bump count */
-		
-		if(!((unsigned int)mapCtl.mapcnext)) {				/* First entry on list? */
-			mapCtl.mapcnext = mapCtl.mapclast = mb;			/* Chain to us */
-		}
-		else {												/* We are not the first */
-			mapCtl.mapclast->nextblok = mb;					/* Point the last to us */
-			mapCtl.mapclast = mb;							/* We are now last */
-		}
-	}
-		
-	if(!locked) {											/* Do we need to unlock? */
-		hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);		/* Unlock our stuff */
-	}
-
-	splx(s);												/* Restore 'rupts */
-	return;													/* All done, leave... */
-}
-
-
-/*
- *		void mapping_prealloc(unsigned int) - Preallocates mapppings for large request
- *	
- *		No locks can be held, because we allocate memory here.
- *		This routine needs a corresponding mapping_relpre call to remove the
- *		hold off flag so that the adjust routine will free the extra mapping
- *		blocks on the release list.  I don't like this, but I don't know
- *		how else to do this for now...
- *
- */
-
-void mapping_prealloc(unsigned int size) {					/* Preallocates mapppings for large request */
-
-	int	nmapb, i;
-	kern_return_t	retr;
-	mappingblok_t	*mbn;
-	spl_t		s;
-
-	s = splhigh();											/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {		/* Lock the control header */ 
-		panic("mapping_prealloc - timeout getting control lock\n");	/* Tell all and die */
-	}
-
-	nmapb = (size >> 12) + mapCtl.mapcmin;					/* Get number of entries needed for this and the minimum */
-	
-	mapCtl.mapcholdoff++;									/* Bump the hold off count */
-	
-	if((nmapb = (nmapb - mapCtl.mapcfree)) <= 0) {			/* Do we already have enough? */
-		hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);		/* Unlock our stuff */
-		splx(s);											/* Restore 'rupts */
-		return;
-	}
-	if (!hw_compare_and_store(0, 1, &mapCtl.mapcrecurse)) {	    /* Make sure we aren't recursing */
-		hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);			/* Unlock our stuff */
-		splx(s);											/* Restore 'rupts */
-		return;
-	}
-	nmapb = (nmapb + MAPPERBLOK - 1) / MAPPERBLOK;			/* Get number of blocks to get */
-	
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);			/* Unlock our stuff */
-	splx(s);												/* Restore 'rupts */
-	
-	for(i = 0; i < nmapb; i++) {							/* Allocate 'em all */
-		retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE);	/* Find a virtual address to use */
-		if(retr != KERN_SUCCESS) 							/* Did we get some memory? */
-			break;
-		mapping_free_init((vm_offset_t)mbn, -1, 0);			/* Initialize on to the release queue */
-	}
-	if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
-	        mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
-
-	mapCtl.mapcrecurse = 0;										/* We are done now */
-}
-
-/*
- *		void mapping_relpre(void) - Releases preallocation release hold off
- *	
- *		This routine removes the
- *		hold off flag so that the adjust routine will free the extra mapping
- *		blocks on the release list.  I don't like this, but I don't know
- *		how else to do this for now...
- *
- */
-
-void mapping_relpre(void) {									/* Releases release hold off */
-
-	spl_t		s;
-
-	s = splhigh();											/* Don't bother from now on */
-	if(!hw_lock_to((hw_lock_t)&mapCtl.mapclock, LockTimeOut)) {		/* Lock the control header */ 
-		panic("mapping_relpre - timeout getting control lock\n");	/* Tell all and die */
-	}
-	if(--mapCtl.mapcholdoff < 0) {							/* Back down the hold off count */
-		panic("mapping_relpre: hold-off count went negative\n");
-	}
-
-	hw_lock_unlock((hw_lock_t)&mapCtl.mapclock);			/* Unlock our stuff */
-	splx(s);												/* Restore 'rupts */
-}
-
-/*
- *		void mapping_free_prime(void) - Primes the mapping block release list
- *
- *		See mapping_free_init.
- *		No locks can be held, because we allocate memory here.
- *		One processor running only.
- *
- */
-
-void mapping_free_prime(void) {									/* Primes the mapping block release list */
-
-	int	nmapb, i;
-	kern_return_t	retr;
-	mappingblok_t	*mbn;
-	vm_offset_t     mapping_min;
-	
-	retr = kmem_suballoc(kernel_map, &mapping_min, sane_size / 16,
-			     FALSE, VM_FLAGS_ANYWHERE, &mapping_map);
-
-	if (retr != KERN_SUCCESS)
-	        panic("mapping_free_prime: kmem_suballoc failed");
-
-
-	nmapb = (mapCtl.mapcfree + mapCtl.mapcinuse + MAPPERBLOK - 1) / MAPPERBLOK;	/* Get permanent allocation */
-	nmapb = nmapb * 4;											/* Get 4 times our initial allocation */
-
-#if DEBUG
-	kprintf("mapping_free_prime: free = %08X; in use = %08X; priming = %08X\n", 
-	  mapCtl.mapcfree, mapCtl.mapcinuse, nmapb);
-#endif
-	
-	for(i = 0; i < nmapb; i++) {								/* Allocate 'em all */
-		retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE);	/* Find a virtual address to use */
-		if(retr != KERN_SUCCESS) {								/* Did we get some memory? */
-			panic("Whoops...  Not a bit of wired memory left for anyone\n");
-		}
-		mapping_free_init((vm_offset_t)mbn, -1, 0);				/* Initialize onto release queue */
-	}
-	if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc)
-	        mapCtl.mapcmaxalloc = mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1));
-}
-
-
-void
-mapping_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
-  		       vm_size_t *alloc_size, int *collectable, int *exhaustable)
-{
-        *count      = mapCtl.mapcinuse;
-	*cur_size   = ((PAGE_SIZE / (MAPPERBLOK + 1)) * (mapCtl.mapcinuse + mapCtl.mapcfree)) + (PAGE_SIZE * mapCtl.mapcreln);
-	*max_size   = (PAGE_SIZE / (MAPPERBLOK + 1)) * mapCtl.mapcmaxalloc;
-	*elem_size  = (PAGE_SIZE / (MAPPERBLOK + 1));
-	*alloc_size = PAGE_SIZE;
-
-	*collectable = 1;
-	*exhaustable = 0;
-}
-
-
-/*
- *		addr64_t	mapping_p2v(pmap_t pmap, ppnum_t pa) - Finds first virtual mapping of a physical page in a space
- *
- *		First looks up  the physical entry associated witht the physical page.  Then searches the alias
- *		list for a matching pmap.  It grabs the virtual address from the mapping, drops busy, and returns 
- *		that.
- *
- */
-
-addr64_t	mapping_p2v(pmap_t pmap, ppnum_t pa) {				/* Finds first virtual mapping of a physical page in a space */
-
-	spl_t s;
-	mapping_t *mp;
-	unsigned int pindex;
-	phys_entry_t *physent;
-	addr64_t va;
-
-	physent = mapping_phys_lookup(pa, &pindex);					/* Get physical entry */
-	if(!physent) {												/* Did we find the physical page? */
-		panic("mapping_p2v: invalid physical page %08X\n", pa);
-	}
-
-	s = splhigh();											/* Make sure interruptions are disabled */
-
-	mp = hw_find_space(physent, pmap->space);				/* Go find the first mapping to the page from the requested pmap */
-
-	if(mp) {												/* Did we find one? */
-		va = mp->mpVAddr & -4096;							/* If so, get the cleaned up vaddr */
-		mapping_drop_busy(mp);								/* Go ahead and relase the mapping now */
-	}
-	else va = 0;											/* Return failure */
-
-	splx(s);												/* Restore 'rupts */
-	
-	return va;												/* Bye, bye... */
-	
-}
-
-
-/*
- *	kvtophys(addr)
- *
- *	Convert a kernel virtual address to a physical address
- */
-addr64_t kvtophys(vm_offset_t va) {
-
-	return pmap_extract(kernel_pmap, va);					/* Find mapping and lock the physical entry for this mapping */
-
-}
-
-/*
- *	kvtophys64(addr)
- *
- *	Convert a kernel virtual address to a 64-bit physical address
- */
-vm_map_offset_t kvtophys64(vm_map_offset_t va) {
-
-	ppnum_t pa = pmap_find_phys(kernel_pmap, (addr64_t)va);
-
-	if (!pa)
-		return 0;
-	return (((vm_map_offset_t)pa) << 12) | (va & 0xfff);
-
-}
-
-/*
- *		void ignore_zero_fault(boolean_t) - Sets up to ignore or honor any fault on 
- *		page 0 access for the current thread.
- *
- *		If parameter is TRUE, faults are ignored
- *		If parameter is FALSE, faults are honored
- *
- */
-
-void ignore_zero_fault(boolean_t type) {				/* Sets up to ignore or honor any fault on page 0 access for the current thread */
-
-	if(type) current_thread()->machine.specFlags |= ignoreZeroFault;	/* Ignore faults on page 0 */
-	else     current_thread()->machine.specFlags &= ~ignoreZeroFault;	/* Honor faults on page 0 */
-	
-	return;												/* Return the result or 0... */
-}
-
-/*
- * no-op in current ppc implementation
- */
-void inval_copy_windows(__unused thread_t th)
-{
-}
-
-
-/* 
- *		Copies data between a physical page and a virtual page, or 2 physical.  This is used to 
- *		move data from the kernel to user state. Note that the "which" parm
- *		says which of the parameters is physical and if we need to flush sink/source.  
- *		Note that both addresses may be physical, but only one may be virtual.
- *
- *		The rules are that the size can be anything.  Either address can be on any boundary
- *		and span pages.  The physical data must be contiguous as must the virtual.
- *
- *		We can block when we try to resolve the virtual address at each page boundary.
- *		We don't check protection on the physical page.
- *
- *		Note that we will not check the entire range and if a page translation fails,
- *		we will stop with partial contents copied.
- *
- */
- 
-kern_return_t
-hw_copypv_32(addr64_t source, addr64_t sink, unsigned int size, int which)
-{
-	vm_map_t map;
-	kern_return_t ret;
-	addr64_t nextva, vaddr = 0, paddr;
-	mapping_t *mp = NULL;
-	spl_t s;
-	unsigned int lop, csize;
-	int needtran, bothphys;
-	unsigned int pindex;
-	phys_entry_t *physent;
-	vm_prot_t prot = 0;
-	int orig_which;
-
-	orig_which = which;
-
-	map = (which & cppvKmap) ? kernel_map : current_map_fast();
-
-	if((which & (cppvPsrc | cppvPsnk)) == 0 ) {		/* Make sure that only one is virtual */
-		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */
-	}
-	
-	bothphys = 1;									/* Assume both are physical */
-	
-	if(!(which & cppvPsnk)) {						/* Is sink page virtual? */
-		vaddr = sink;								/* Sink side is virtual */
-		bothphys = 0;								/* Show both aren't physical */
-		prot = VM_PROT_READ | VM_PROT_WRITE;		/* Sink always must be read/write */
-	} else if (!(which & cppvPsrc)) {				/* Is source page virtual? */
-		vaddr = source;								/* Source side is virtual */
-		bothphys = 0;								/* Show both aren't physical */
-		prot = VM_PROT_READ; 						/* Virtual source is always read only */
-	}
-
-	needtran = 1;									/* Show we need to map the virtual the first time */
-	s = splhigh();									/* Don't bother me */
-
-	while(size) {
-
-		if(!bothphys && (needtran || !(vaddr & 4095LL))) {	/* If first time or we stepped onto a new page, we need to translate */
-			if(!needtran) {							/* If this is not the first translation, we need to drop the old busy */
-				mapping_drop_busy(mp);				/* Release the old mapping now */
-			}
-			needtran = 0;
-			
-			while(1) {
-				mp = mapping_find(map->pmap, vaddr, &nextva, 1);	/* Find and busy the mapping */
-				if(!mp) {							/* Was it there? */
-					if(getPerProc()->istackptr == 0)
-						panic("copypv: No vaild mapping on memory %s %16llx", "RD", vaddr);
-
-					splx(s);						/* Restore the interrupt level */
-					ret = vm_fault(map, vm_map_trunc_page(vaddr), prot, FALSE, THREAD_UNINT, NULL, 0);	/* Didn't find it, try to fault it in... */
-				
-					if(ret != KERN_SUCCESS)return KERN_FAILURE;	/* Didn't find any, return no good... */
-					
-					s = splhigh();					/* Don't bother me */
-					continue;						/* Go try for the map again... */
-	
-				}
-				if (mp->mpVAddr & mpI) {                 /* cache inhibited, so force the appropriate page to be flushed before */
-				        if (which & cppvPsrc)            /* and after the copy to avoid cache paradoxes */
-					        which |= cppvFsnk;
-					else
-					        which |= cppvFsrc;
-				} else
-				        which = orig_which;
-
-				/* Note that we have to have the destination writable.  So, if we already have it, or we are mapping the source,
-					we can just leave.
-				*/		
-				if((which & cppvPsnk) || !(mp->mpVAddr & 1)) break;		/* We got it mapped R/W or the source is not virtual, leave... */
-			
-				mapping_drop_busy(mp);				/* Go ahead and release the mapping for now */
-				if(getPerProc()->istackptr == 0)
-					panic("copypv: No vaild mapping on memory %s %16llx", "RDWR", vaddr);
-				splx(s);							/* Restore the interrupt level */
-				
-				ret = vm_fault(map, vm_map_trunc_page(vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0);	/* check for a COW area */
-				if (ret != KERN_SUCCESS) return KERN_FAILURE;	/* We couldn't get it R/W, leave in disgrace... */
-				s = splhigh();						/* Don't bother me */
-			}
-			paddr = ((addr64_t)mp->mpPAddr << 12) + (vaddr - (mp->mpVAddr & -4096LL));        /* construct the physical address... this calculation works */
-			                                                                                  /* properly on both single page and block mappings */
-			if(which & cppvPsrc) sink = paddr;		/* If source is physical, then the sink is virtual */
-			else source = paddr;					/* Otherwise the source is */
-		}
-			
-		lop = (unsigned int)(4096LL - (sink & 4095LL));		/* Assume sink smallest */
-		if(lop > (unsigned int)(4096LL - (source & 4095LL))) lop = (unsigned int)(4096LL - (source & 4095LL));	/* No, source is smaller */
-		
-		csize = size;								/* Assume we can copy it all */
-		if(lop < size) csize = lop;					/* Nope, we can't do it all */
-		
-		if(which & cppvFsrc) flush_dcache64(source, csize, 1);	/* If requested, flush source before move */
-		if(which & cppvFsnk) flush_dcache64(sink, csize, 1);	/* If requested, flush sink before move */
-
-		bcopy_physvir_32(source, sink, csize);			/* Do a physical copy, virtually */
-		
-		if(which & cppvFsrc) flush_dcache64(source, csize, 1);	/* If requested, flush source after move */
-		if(which & cppvFsnk) flush_dcache64(sink, csize, 1);	/* If requested, flush sink after move */
-
-/*
- *		Note that for certain ram disk flavors, we may be copying outside of known memory.
- *		Therefore, before we try to mark it modifed, we check if it exists.
- */
-
-		if( !(which & cppvNoModSnk)) {
-		        physent = mapping_phys_lookup(sink >> 12, &pindex);	/* Get physical entry for sink */
-			if(physent) mapping_set_mod((ppnum_t)(sink >> 12));		/* Make sure we know that it is modified */
-		}
-		if( !(which & cppvNoRefSrc)) {
-		        physent = mapping_phys_lookup(source >> 12, &pindex);	/* Get physical entry for source */
-			if(physent) mapping_set_ref((ppnum_t)(source >> 12));		/* Make sure we know that it is modified */
-		}
-		size = size - csize;						/* Calculate what is left */
-		vaddr = vaddr + csize;						/* Move to next sink address */
-		source = source + csize;					/* Bump source to next physical address */
-		sink = sink + csize;						/* Bump sink to next physical address */
-	}
-	
-	if(!bothphys) mapping_drop_busy(mp);			/* Go ahead and release the mapping of the virtual page if any */
-	splx(s);										/* Open up for interrupts */
-
-	return KERN_SUCCESS;
-}
-
-
-/*
- *	Debug code 
- */
-
-void mapping_verify(void) {
-
-	spl_t		s;
-	mappingblok_t	*mb, *mbn;
-	unsigned int	relncnt;
-	unsigned int	dumbodude;
-
-	dumbodude = 0;
-	
-	s = splhigh();											/* Don't bother from now on */
-
-	mbn = NULL;												/* Start with none */
-	for(mb = mapCtl.mapcnext; mb; mb = mb->nextblok) {		/* Walk the free chain */
-		if((mappingblok_t *)(mb->mapblokflags & 0x7FFFFFFF) != mb) {	/* Is tag ok? */
-			panic("mapping_verify: flags tag bad, free chain; mb = %p, tag = %08X\n", mb, mb->mapblokflags);
-		}
-		mbn = mb;											/* Remember the last one */
-	}
-	
-	if(mapCtl.mapcnext && (mapCtl.mapclast != mbn)) {		/* Do we point to the last one? */
-		panic("mapping_verify: last pointer bad; mb = %p, mapclast = %p\n", mb, mapCtl.mapclast);
-	}
-	
-	relncnt = 0;											/* Clear count */
-	for(mb = mapCtl.mapcrel; mb; mb = mb->nextblok) {		/* Walk the release chain */
-		dumbodude |= mb->mapblokflags;						/* Just touch it to make sure it is mapped */
-		relncnt++;											/* Count this one */
-	}
-	
-	if(mapCtl.mapcreln != relncnt) {							/* Is the count on release queue ok? */
-		panic("mapping_verify: bad release queue count; mapcreln = %d, cnt = %d, ignore this = %08X\n", mapCtl.mapcreln, relncnt, dumbodude);
-	}
-
-	splx(s);												/* Restore 'rupts */
-
-	return;
-}
-
-void mapping_phys_unused(ppnum_t pa) {
-
-	unsigned int pindex;
-	phys_entry_t *physent;
-
-	physent = mapping_phys_lookup(pa, &pindex);				/* Get physical entry */
-	if(!physent) return;									/* Did we find the physical page? */
-
-	if(!(physent->ppLink & ~(ppLock | ppFlags))) return;	/* No one else is here */
-	
-	panic("mapping_phys_unused: physical page (%08X) in use, physent = %p\n", pa, physent);
-	
-}
-	
-void
-mapping_hibernate_flush(void)
-{
-    unsigned int page, bank;
-    struct phys_entry * entry;
-
-    for (bank = 0; bank < pmap_mem_regions_count; bank++)
-    {
-	entry = (struct phys_entry *) pmap_mem_regions[bank].mrPhysTab;
-	for (page = pmap_mem_regions[bank].mrStart; page <= pmap_mem_regions[bank].mrEnd; page++)
-	{
-	    hw_walk_phys(entry, hwpNoop, hwpNoop, hwpNoop, 0, hwpPurgePTE);
-	    entry++;
-	}
-    }
-}
-
-
-
-
-
-
diff --git a/osfmk/ppc/mappings.h b/osfmk/ppc/mappings.h
deleted file mode 100644
index 0777f1b6f..000000000
--- a/osfmk/ppc/mappings.h
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *		Header files for the hardware virtual memory mapping stuff 
- */
-#ifdef	XNU_KERNEL_PRIVATE
-
-#ifndef	_PPC_MAPPINGS_H_
-#define	_PPC_MAPPINGS_H_
-
-#include <mach/mach_types.h>
-#include <mach/vm_types.h>
-#include <mach/machine/vm_types.h>
-#include <mach/vm_prot.h>
-#include <mach/vm_statistics.h>
-#include <kern/assert.h>
-#include <kern/cpu_number.h>
-#include <kern/lock.h>
-#include <kern/queue.h>
-#include <ppc/proc_reg.h>
-
-/*
- * Don't change these structures unless you change the assembly code
- */
-
-/*
- *	This control block serves as anchor for all virtual mappings of the same physical
- *	page, i.e., aliases.  There is a table for each bank (mem_region).  All tables
- *	must reside in V=R storage and within the first 2GB of memory. Also, the
- *	mappings to which it points must be on at least a 64-byte boundary. These 
- *	requirements allow a total of 2 bits for status and flags, and allow all address
- *	calculations to be 32-bit.
- */
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct phys_entry {
-	addr64_t	ppLink;				/* Physical pointer to aliased mappings and flags */
-#define		ppLock		0x8000000000000000LL	/* Lock for alias chain */
-#define		ppFlags		0x700000000000000FLL	/* Status and flags */
-#define		ppI			0x2000000000000000LL	/* Cache inhibited */
-#define		ppIb		2						/* Cache inhibited */
-#define		ppG			0x1000000000000000LL	/* Guarded */
-#define		ppGb		3						/* Guarded */
-#define		ppR			0x0000000000000008LL	/* Referenced */
-#define		ppRb		60						/* Referenced */
-#define		ppC			0x0000000000000004LL	/* Changed */
-#define		ppCb		61						/* Changed */
-
-/* The lock, attribute, and flag bits are arranged so that their positions may be
- * described by a contiguous mask of one bits wrapping from bit postion 63 to 0.
- * In assembly language, we can then rapidly produce this mask with:
- *		li		r0,ppLFAmask		; r0 <- 0x00000000000000FF
- *		rotrdi	r0,r0,ppLFArrot		; r0 <- 0xF00000000000000F
- */
-#define		ppLFAmask	0x00FF					/* One bit for each lock, attr, or flag bit */
-#define		ppLFArrot	4						/* Right-rotate count to obtain 64-bit mask */
-} phys_entry_t;
-#pragma pack()
-#define physEntrySize sizeof(phys_entry_t)
-
-/* Memory may be non-contiguous. This data structure contains info
- * for mapping this non-contiguous space into the contiguous
- * physical->virtual mapping tables. An array of this type is
- * provided to the pmap system at bootstrap by ppc_vm_init.
- *
- */
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct mem_region {
-	phys_entry_t	   *mrPhysTab;	/* Base of region table */
-	ppnum_t				mrStart;	/* Start of region */
-	ppnum_t				mrEnd;		/* Last page in region */
-	ppnum_t				mrAStart;	/* Next page in region to allocate */
-	ppnum_t				mrAEnd;		/* Last page in region to allocate */
-} mem_region_t;
-#pragma pack()
-
-#define mrSize sizeof(mem_region_t)
-#define PMAP_MEM_REGION_MAX 11
-
-extern mem_region_t pmap_mem_regions[PMAP_MEM_REGION_MAX + 1];
-extern unsigned int pmap_mem_regions_count;
-
-/* Prototypes */
-
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct PCA {					/* PTEG Control Area */
-	union flgs {
-		unsigned int	PCAallo;		/* Allocation controls */
-		struct PCAalflgs {				/* Keep these in order!!! */
-			unsigned char	PCAfree;	/* Indicates the slot is free */
-			unsigned char	PCAsteal;	/* Steal scan start position */
-			unsigned char	PCAauto;	/* Indicates that the PTE was autogenned */
-			unsigned char	PCAmisc;	/* Misc. flags */
-#define PCAlock 1						/* This locks up the associated PTEG */
-#define PCAlockb 31
-		} PCAalflgs;
-	} flgs;
-} PCA_t;
-#pragma pack()
-
-/* The hash table is composed of mappings organized into G groups of S slots
- * each. In the macros below, by GV_GROUPS_LG2, GV_SLOT_SZ_LG2, and GV_SLOTS_LG2, the number 
- * of groups, the size (in bytes) of a slot, and the number of slots in a group are given.
- * Since these values are given as log2, they're restricted to powers of two. Fast operation
- * and all that.
- * 
- * This patch of macros define all of the hash table's metrics and handy masks. It's a
- * build-time thing because it's faster that way. Only the first group of values may
- * be adjusted.
- */
-#define GV_GROUPS_LG2	10	/* 1024 groups per hash table (log2(max) is 14, viz. 16K groups) */
-#define GV_SLOTS_LG2	3	/* 8 slots per group (log2(max) is 8, viz. 256 slots) */
-
-#define GV_SLOT_SZ_LG2	5	/* 32 bytes per slot (mapping size) */
-#define GV_PGIDX_SZ_LG2	3	/* 64-bit Hash-table-page physical-addrress index entry size */
-#define GV_PAGE_SZ_LG2	12	/* 4k-byte hash-table-page size */
-
-#define GV_GROUPS		(1 << GV_GROUPS_LG2)
-#define GV_SLOT_SZ		(1 << GV_SLOT_SZ_LG2)
-#define GV_SLOTS		(1 << GV_SLOTS_LG2)
-#define GV_PAGE_SZ		(1 << GV_PAGE_SZ_LG2)
-#define GV_GRP_MASK		(GV_GROUPS - 1)
-#define GV_SLOT_MASK	(GV_SLOTS - 1)
-#define GV_PAGE_MASK	(GV_PAGE_SZ - 1)
-#define GV_HPAGES		(1 << (GV_GROUPS_LG2 + GV_SLOT_SZ_LG2 + GV_SLOTS_LG2 - GV_PAGE_SZ_LG2))
-#define GV_GRPS_PPG_LG2	(GV_PAGE_SZ_LG2 - (GV_SLOT_SZ_LG2 + GV_SLOTS_LG2))
-#define GV_GRPS_PPG		(1 << GV_GRPS_PPG_LG2)
-#define GV_SLTS_PPG_LG2 (GV_PAGE_SZ_LG2 - GV_SLOT_SZ_LG2)
-#define GV_SLTS_PPG		(1 << GV_SLTS_PPG_LG2)
-
-#define GV_HPAGE_SHIFT	(GV_PGIDX_SZ_LG2 - GV_GRPS_PPG_LG2)
-#define GV_HPAGE_MASK	((GV_HPAGES - 1) << GV_PGIDX_SZ_LG2)
-#define GV_HGRP_SHIFT	(GV_SLOT_SZ_LG2 + GV_SLOTS_LG2)
-#define GV_HGRP_MASK	((GV_GRPS_PPG - 1) << GV_HGRP_SHIFT)
-
-#define GV_MAPWD_BITS_LG2	5	/* 32-bit active map word size */
-#define GV_MAPWD_SZ_LG2	(GV_MAPWD_BITS_LG2 - 3)
-#define GV_BAND_SHIFT	(GV_MAPWD_BITS_LG2 + GV_SLOT_SZ_LG2)
-#define GV_BAND_SZ_LG2	(GV_PAGE_SZ_LG2 - GV_SLOT_SZ_LG2 - GV_MAPWD_BITS_LG2)
-#define GV_BAND_MASK	(((1 << GV_BAND_SZ_LG2) - 1) << GV_BAND_SHIFT)
-#define GV_MAP_WORDS	(1 << (GV_GROUPS_LG2 + GV_SLOTS_LG2 - GV_MAPWD_BITS_LG2))
-#define GV_MAP_MASK		((GV_MAP_WORDS - 1) << GV_MAPWD_SZ_LG2)
-#define GV_MAP_SHIFT	(GV_PGIDX_SZ_LG2 - GV_BAND_SZ_LG2)
-
-
-/* Mappings currently come in two sizes: 64 and 128 bytes.  The only difference is the
- * number of skiplists (ie, mpLists): 64-byte mappings have 1-4 lists and 128-byte mappings
- * have from 5-12.  Only 1 in 256 mappings is large, so an average mapping is 64.25 bytes.
- * All mappings are 64-byte aligned.
- *
- * Special note on mpFIP and mpRIP:
- *	These flags are manipulated under various locks.  RIP is always set under an
- *	exclusive lock while FIP is shared.  The only worry is that there is a possibility that
- *	FIP could be attempted by more than 1 processor at a time.  Obviously, one will win.
- *	The other(s) bail all the way to user state and may refault (or not).  There are only
- *	a few things in mpFlags that are not static, mpFIP, mpRIP, and mpBusy.
- *	
- *	We organize these so that mpFIP is in a byte with static data and mpRIP is in another. 
- *	That means that we can use a store byte to update the guys without worrying about load
- *  and reserve. Note that mpFIP must be set atomically because it is under a share lock;
- *  but, it may be cleared with a simple store byte. Because mpRip is set once and then never
- *  cleared, we can get away with setting it by means of a simple store byte.
- *	
- */   
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct mapping {
-	unsigned int		mpFlags;		/* 0x000 - Various flags, lock bit. These are static except for lock */
-#define	mpBusy				0xFF000000	/*         Busy count */
-#define mpPrevious			0x00800000	/*		   A previous mapping exists in a composite */
-#define mpNext				0x00400000	/*		   A next mapping exist in a composite */
-#define	mpPIndex			0x003F0000	/*         Index into physical table (in words) */
-#define mpType				0x0000F000	/*		   Mapping type: */
-#define mpNormal			0x00000000	/*			Normal logical page - backed by RAM, RC maintained, logical page size == physical page size */
-										/*			DO NOT CHANGE THIS CODE */
-#define mpBlock				0x00001000	/*			Block mapping - used for I/O memory or non-RC maintained RAM, logical page size is independent from physical */
-#define mpMinSpecial		0x00002000	/*			Any mapping with this type or above has extra special handling */
-#define mpNest				0x00002000	/*			Forces transtion to an alternate address space after applying relocation */
-#define mpLinkage			0x00003000	/*			Transition to current user address space with relocation - used for copyin/out/pv */
-#define mpACID				0x00004000	/*			Address Chunk ID - provides the address space ID for VSID calculation.  Normally mapped at chunk size - 2KB */
-#define mpGuest				0x00005000	/*			Guest->physical shadow mapping */
-/*							0x00006000 - 0x0000F000	Reserved */
-#define	mpFIP				0x00000800	/*         Fault in progress */
-#define	mpFIPb				20			/*         Fault in progress */
-#define mpPcfg				0x00000700	/*		   Physical Page configuration */
-#define mpPcfgb				23			/*		   Physical Page configuration index bit */
-#define mpRIP				0x00000080	/*         Remove in progress - DO NOT MOVE */
-#define mpRIPb				24			/*         Remove in progress */
-#define mpPerm				0x00000040	/*         Mapping is permanent - DO NOT MOVE */
-#define mpPermb				25			/*         Mapping is permanent */
-#define mpBSu				0x00000020	/*         Basic Size unit - 0 = 4KB, 1 = 32MB */
-#define mpBSub				26			/*         Basic Size unit - 0 = 4KB, 1 = 32MB */
-#define mpLists				0x0000001F	/*         Number of skip lists mapping is on, max of 27 */
-#define mpListsb			27			/*         Number of skip lists mapping is on, max of 27 */
-#define mpgFlags			0x0000001F	/*	Shadow cache mappings re-use mpLists for flags: */
-#define mpgGlobal			0x00000004	/*         Mapping is global (1) or local (0) */
-#define mpgFree				0x00000002	/*		   Mapping is free */
-#define mpgDormant			0x00000001	/*		   Mapping is dormant */
-
-	unsigned short		mpSpace;		/* 0x004 - Address space hash */
-	union {	
-		unsigned short	mpBSize;		/* 0x006 - Block size - 1 in pages - max block size 256MB */
-		unsigned char	mpgCursor;		/* 0x006 - Shadow-cache group allocation cursor (first mapping in the group) */
-	} u;
-	
-	unsigned int		mpPte;			/* 0x008 - Offset to PTEG in hash table. Offset to exact PTE if mpHValid set - NOTE: this MUST be 0 for block mappings */
-#define mpHValid			0x00000001	/* PTE is entered in hash table */
-#define mpHValidb			31			/* PTE is entered in hash table */
-	ppnum_t				mpPAddr;		/* 0x00C - Physical page number */
-	addr64_t			mpVAddr;		/* 0x010 - Starting virtual address */
-#define mpHWFlags			0x0000000000000FFFULL	/* Reference/Change, WIMG, AC, N, protection flags from PTE */
-#define mpHWFlagsb			52
-#define mpN					0x0000000000000004ULL	/* Page-level no-execute (PowerAS machines) */
-#define mpNb				61
-#define mpPP				0x0000000000000003ULL	/* Protection flags */
-#define mpPPb				62
-#define mpPPe				63
-#define mpKKN				0x0000000000000007ULL	/* Segment key and no execute flag (nested pmap) */
-#define mpKKNb				61
-#define mpWIMG				0x0000000000000078ULL	/* Attribute bits */
-#define mpWIMGb				57
-#define mpW					0x0000000000000040ULL
-#define mpWb				57
-#define mpI					0x0000000000000020ULL
-#define mpIb				58
-#define mpM					0x0000000000000010ULL
-#define mpMb				59
-#define mpG					0x0000000000000008ULL
-#define mpGb				60
-#define mpWIMGe				60
-#define mpC					0x0000000000000080ULL	/* Change bit */
-#define mpCb				56
-#define mpR					0x0000000000000100ULL	/* Reference bit */
-#define mpRb				55
-	addr64_t			mpAlias;		/* 0x018 - Pointer to alias mappings of physical page */
-#define mpNestReloc			mpAlias		/* 0x018 - Redefines mpAlias relocation value of vaddr to nested pmap value */
-#define mpBlkRemCur			mpAlias		/* 0x018 - Next offset in block map to remove (this is 4 bytes) */
-	addr64_t			mpList0;		/* 0x020 - Forward chain of mappings. This one is always used */
-	addr64_t			mpList[3];		/* 0x028 - Forward chain of mappings. Next higher order */
-/*										   0x040 - End of basic mapping */
-#define	mpBasicSize			64
-#define	mpBasicLists		4
-/* note the dependence on kSkipListMaxLists, which must be <= #lists in a 256-byte mapping (ie, <=28) */
-/*	addr64_t			mpList4[8];		   0x040 - First extended list entries */
-/*										   0x080 - End of first extended mapping */
-/*	addr64_t			mpList12[8];	   0x080 - Second extended list entries */
-/*										   0x0C0 - End of second extended mapping */
-/*	addr64_t			mpList20[8];	   0x0C0 - Third extended list entries */
-/*										   0x100 - End of third extended mapping */
-
-} mapping_t;
-#pragma pack()
-
-#define MAPPING_NULL	((struct mapping *) 0)
-
-#define mapDirect 0x08
-#define mapRWNA   0x00000000
-#define mapRWRO   0x00000001
-#define mapRWRW   0x00000002
-#define mapRORO   0x00000003
-
-/* All counts are in units of basic 64-byte mappings.  A 128-byte mapping is
- * just two adjacent 64-byte entries.
- */
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-
-typedef struct mappingflush {
-	addr64_t			addr;			/* Start address to search mapping */
-	unsigned int		spacenum;		/* Last space num to search pmap */
-	unsigned int		mapfgas[1];		/* Pad to 64 bytes */
-} mappingflush_t;
-
-typedef struct mappingctl {
-	unsigned int		mapclock;		/* Mapping allocation lock */
-	unsigned int		mapcrecurse;	/* Mapping allocation recursion control */
-	struct mappingblok	*mapcnext;		/* First mapping block with free entries */
-	struct mappingblok	*mapclast;		/* Last mapping block with free entries */
-	struct mappingblok	*mapcrel;		/* List of deferred block releases */
-	unsigned int		mapcfree;		/* Total free entries on list */
-	unsigned int		mapcinuse;		/* Total entries in use */
-	unsigned int		mapcreln;		/* Total blocks on pending release list */
-	int					mapcholdoff;	/* Hold off clearing release list */
-	unsigned int		mapcfreec;		/* Total calls to mapping free */
-	unsigned int		mapcallocc;		/* Total calls to mapping alloc */
-    unsigned int		mapcbig;		/* Count times a big mapping was requested of mapping_alloc */
-    unsigned int		mapcbigfails;	/* Times caller asked for a big one but we gave 'em a small one */
-	unsigned int		mapcmin;		/* Minimum free mappings to keep */
-	unsigned int		mapcmaxalloc;	/* Maximum number of mappings allocated at one time */
-	unsigned int		mapcgas[1];		/* Pad to 64 bytes */
-	struct mappingflush	mapcflush;
-} mappingctl_t;
-#pragma pack()
-
-/* MAPPERBLOK is the number of basic 64-byte mappings per block (ie, per page.) */
-#define MAPPERBLOK 63
-#define MAPALTHRSH (4*MAPPERBLOK)
-#define MAPFRTHRSH (2 * ((MAPALTHRSH + MAPPERBLOK - 1) / MAPPERBLOK))
-typedef struct mappingblok {
-	unsigned int		mapblokfree[2];	/* Bit map of free mapping entrys */
-	addr64_t			mapblokvrswap;	/* Virtual address XORed with physical address */
-	unsigned int		mapblokflags;	/* Various flags */
-#define mbPerm 0x80000000				/* Block is permanent */
-	struct mappingblok	*nextblok;		/* Pointer to the next mapping block */
-} mappingblok_t;
-
-#define mapRemChunk 128
-
-#define mapRetCode	0xF
-#define mapRtOK		0
-#define mapRtBadLk	1
-#define mapRtPerm	2
-#define mapRtNotFnd	3
-#define mapRtBlock	4
-#define mapRtNest	5
-#define mapRtRemove	6
-#define mapRtMapDup	7
-#define mapRtGuest	8
-#define mapRtEmpty	9
-#define mapRtSmash	10					/* Mapping already exists and doesn't match new mapping */
-#define mapRtBadSz	11					/* Requested size too big or more than 256MB and not mult of 32MB */
-
-/*
- *	This struct describes available physical page configurations
- *	Note:
- *		Index 0 is required and is the primary page configuration (4K, non-large)
- *		Index 1 is the primary large page config if supported by hw (16M, large page)
- */
- 
-typedef struct pcfg {
-	uint8_t				pcfFlags;		/* Flags */
-#define pcfValid		0x80			/* Configuration is valid */
-#define pcfLarge		0x40			/* Large page */
-#define pcfDedSeg		0x20			/* Requires dedicated segment */
-	uint8_t				pcfEncode;		/* Implementation specific PTE encoding */
-	uint8_t				pcfPSize;		/* Page size in powers of 2 */
-	uint8_t				pcfShift;		/* Shift for PTE construction */
-} pcfg;
-
-#define pcfDefPcfg		0				/* Primary page configuration */
-#define pcfLargePcfg	1				/* Primary large page configuration */
-
-extern pcfg pPcfg[8];					/* Supported page configurations */
-
-extern mappingctl_t	mapCtl;				/* Mapping allocation control */
-
-extern unsigned char ppc_prot[];		/* Mach -> PPC protection translation table */
-
-vm_prot_t getProtPPC(int, boolean_t);
-										/* Safe Mach -> PPC protection key conversion */
-
-extern addr64_t 	mapping_remove(pmap_t pmap, addr64_t va);	/* Remove a single mapping for this VADDR */
-extern mapping_t 	*mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full);	/* Finds a mapping */
-extern void 		mapping_free_init(vm_offset_t mbl, int perm, boolean_t locked);	/* Sets start and end of a block of mappings */
-extern void 		mapping_prealloc(unsigned int);				/* Preallocate mappings for large use */
-extern void 		mapping_relpre(void);						/* Releases preallocate request */
-extern void 		mapping_init(void);							/* Do initial stuff */
-extern mapping_t    *mapping_alloc(int lists);					/* Obtain a mapping */
-extern void 		mapping_free(struct mapping *mp);			/* Release a mapping */
-extern boolean_t 	mapping_tst_ref(ppnum_t pa);				/* Tests the reference bit of a physical page */
-extern boolean_t 	mapping_tst_mod(ppnum_t pa);				/* Tests the change bit of a physical page */
-extern void 		mapping_set_ref(ppnum_t pa);				/* Sets the reference bit of a physical page */
-extern void 		mapping_clr_ref(ppnum_t pa);				/* Clears the reference bit of a physical page */
-extern void 		mapping_set_mod(ppnum_t pa);				/* Sets the change bit of a physical page */
-extern void 		mapping_clr_mod(ppnum_t pa);				/* Clears the change bit of a physical page */
-extern unsigned int mapping_tst_refmod(ppnum_t pa);				/* Tests the reference and change bits of a physical page */
-extern void			mapping_clr_refmod(ppnum_t pa, unsigned int mask);	/* Clears the reference and change bits of a physical page */
-extern void 		mapping_protect_phys(ppnum_t pa, vm_prot_t prot);	/* Change protection of all mappings to page */
-extern void	 	mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva);	/* Change protection of a single mapping to page */
-extern addr64_t		mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int size, vm_prot_t prot); /* Make a mapping */
-/* Flags for mapping_make */
-#define mmFlgBlock		0x80000000	/* This is a block map, use size for number of pages covered */
-#define mmFlgUseAttr	0x40000000	/* Use specified attributes */
-#define mmFlgPerm		0x20000000	/* Mapping is permanant */
-#define mmFlgPcfg		0x07000000	/* Physical page configuration index */
-#define mmFlgCInhib		0x00000002	/* Cahching inhibited - use if mapFlgUseAttr set or block */
-#define mmFlgGuarded	0x00000001	/* Access guarded - use if mapFlgUseAttr set or block */
-extern void 		mapping_purge(ppnum_t pa);		/* Remove all mappings for this physent */
-extern addr64_t		mapping_p2v(pmap_t pmap, ppnum_t pa);	/* Finds first virtual mapping of a physical page in a space */
-extern void			mapping_drop_busy(struct mapping *mapping);	/* Drops busy count on mapping */
-extern phys_entry_t  *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex);	/* Finds the physical entry for the page */
-extern int			mapalc1(struct mappingblok *mb);			/* Finds and allcates a 1-bit mapping entry */
-extern int			mapalc2(struct mappingblok *mb);			/* Finds and allcates a 2-bit mapping entry */
-extern void			ignore_zero_fault(boolean_t type);			/* Sets up to ignore or honor any fault on page 0 access for the current thread */
-extern void			mapping_hibernate_flush(void);
-
-extern void			mapping_fake_zone_info(		/* return mapping usage stats as a fake zone info */
-						int *count,
-						vm_size_t *cur_size,
-						vm_size_t *max_size,
-						vm_size_t *elem_size,
-						vm_size_t *alloc_size,
-						int *collectable,
-						int *exhaustable);
-
-extern mapping_t 	*hw_rem_map(pmap_t pmap, addr64_t va, addr64_t *next);	/* Remove a mapping from the system */
-extern mapping_t	*hw_purge_map(pmap_t pmap, addr64_t va, addr64_t *next);	/* Remove a regular mapping from the system */
-extern mapping_t	*hw_purge_space(struct phys_entry *pp, pmap_t pmap);	/* Remove the first mapping for a specific pmap from physentry */
-extern mapping_t	*hw_purge_phys(struct phys_entry *pp);		/* Remove the first mapping for a physentry */
-extern mapping_t	*hw_scrub_guest(struct phys_entry *pp, pmap_t pmap);	/* Scrub first guest mapping belonging to this host */ 
-extern mapping_t	*hw_find_map(pmap_t pmap, addr64_t va, addr64_t *nextva);	/* Finds a mapping */
-extern mapping_t	*hw_find_space(struct phys_entry *pp, unsigned int space);	/* Given a phys_entry, find its first mapping in the specified space */
-extern addr64_t		hw_add_map(pmap_t pmap, struct mapping *mp);	/* Add a mapping to a pmap */
-extern unsigned int	hw_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva);	/* Change the protection of a virtual page */
-extern unsigned int	hw_test_rc(pmap_t pmap, addr64_t va, boolean_t reset);	/* Test and optionally reset the RC bit of specific mapping */
-
-extern unsigned int	hw_clear_maps(void);
-
-extern unsigned int	hw_walk_phys(struct phys_entry *pp, unsigned int preop, unsigned int op, /* Perform function on all mappings on a physical page */
-	unsigned int postop, unsigned int parm, unsigned int opmod);	
-/* Opcodes for hw_walk_phys */
-#define hwpNoop			0	/* No operation */
-#define hwpSPrtPhy		1	/* Sets protection in physent (obsolete)  */
-#define hwpSPrtMap		2	/* Sets protection in mapping  */
-#define hwpSAtrPhy		3	/* Sets attributes in physent  */
-#define hwpSAtrMap		4	/* Sets attributes in mapping  */
-#define hwpCRefPhy		5	/* Clears reference in physent  */
-#define hwpCRefMap		6	/* Clears reference in mapping  */
-#define hwpCCngPhy		7	/* Clears change in physent  */
-#define hwpCCngMap		8	/* Clears change in mapping  */
-#define hwpSRefPhy		9	/* Sets reference in physent  */
-#define hwpSRefMap		10	/* Sets reference in mapping  */
-#define hwpSCngPhy		11	/* Sets change in physent  */
-#define hwpSCngMap		12	/* Sets change in mapping  */
-#define hwpTRefPhy		13	/* Tests reference in physent  */
-#define hwpTRefMap		14	/* Tests reference in mapping  */
-#define hwpTCngPhy		15	/* Tests change in physent  */
-#define hwpTCngMap		16	/* Tests change in mapping  */
-#define hwpTRefCngPhy	17  /* Tests reference and change in physent */
-#define hwpTRefCngMap	18	/* Tests reference and change in mapping */
-#define hwpCRefCngPhy	19  /* Clears reference and change in physent */
-#define hwpCRefCngMap	20	/* Clears reference and change in mapping */
-/* Operation modifiers for connected PTE visits for hw_walk_phys */
-#define hwpPurgePTE		0	/* Invalidate/purge PTE and merge RC bits for each connected mapping */
-#define hwpMergePTE		1	/* Merge RC bits for each connected mapping */
-#define hwpNoopPTE		2	/* Take no additional action for each connected mapping */
-
-extern void 		hw_set_user_space(pmap_t pmap);				/* Indicate we need a space switch */
-extern void 		hw_set_user_space_dis(pmap_t pmap);			/* Indicate we need a space switch (already disabled) */
-extern void 		hw_setup_trans(void);						/* Setup hardware for translation */
-extern void 		hw_start_trans(void);						/* Start translation for the first time */
-extern void 		hw_map_seg(pmap_t pmap, addr64_t seg, addr64_t va);		/* Validate a segment */
-extern void 		hw_blow_seg(addr64_t seg);					/* Invalidate a segment */
-extern void 		invalidateSegs(pmap_t pmap);				/* Invalidate the segment cache */
-extern struct phys_entry *pmap_find_physentry(ppnum_t pa);
-extern void			mapLog(unsigned int laddr, unsigned int type, addr64_t va);
-extern unsigned int	mapSkipListVerifyC(pmap_t pmap, unsigned long long *dumpa);
-extern kern_return_t hw_copypv_32(addr64_t source, addr64_t sink, unsigned int size, int which);
-
-extern void			hw_rem_all_gv(pmap_t pmap);					/* Remove all of a guest's mappings */
-extern void			hw_rem_local_gv(pmap_t gpmap);				/* Remove guest local mappings */
-extern unsigned int hw_res_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t hva, addr64_t gva, vm_prot_t prot);
-																/* Resume a guest mapping */
-extern void			hw_add_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva, unsigned int mflags, ppnum_t pa);
-																/* Add a guest mapping */
-extern void			hw_susp_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva);
-																/* Suspend a guest mapping */
-extern unsigned int hw_test_rc_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva, unsigned int reset);
-																/* Test/reset mapping ref and chg */
-extern unsigned int	hw_protect_gv(pmap_t gpmap, addr64_t va, vm_prot_t prot);
-																/* Change the protection of a guest page */
-extern addr64_t		hw_gva_to_hva(pmap_t gpmap, addr64_t gva);	/* Convert guest to host virtual address */
-extern unsigned int hw_find_map_gv(pmap_t gpmap, addr64_t gva, void *mpbuf);
-																/* Find and copy guest mapping into buffer */
-
-extern unsigned int	mappingdeb0;								/* (TEST/DEBUG) */
-extern unsigned int	incrVSID;									/* VSID increment value */
-
-extern int mapSetLists(pmap_t);
-extern void consider_mapping_adjust(void);
-
-#endif /* _PPC_MAPPINGS_H_ */
-
-#endif /* XNU_KERNEL_PRIVATE */
diff --git a/osfmk/ppc/mcount.s b/osfmk/ppc/mcount.s
deleted file mode 100644
index fd2518567..000000000
--- a/osfmk/ppc/mcount.s
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <assym.s>
-#include <debug.h>
-#include <mach/ppc/vm_param.h>
-#include <ppc/exception.h>
-
-
-/*
- * The compiler generates calls to this function and passes address
- * of caller of the function [ from which mcount is called ] as the
- * first parameter.
- * mcount disables interrupts prior to call mcount() and restores 
- * interrupt upon return.
- * To prevent recursive calls to mcount(), a flag, mcountOff, is set 
- * in cpu_flags per_proc.
- */
-
-			.align 4
-			.globl mcount
-mcount:
-		mflr r0										; Load lr
-		stw r0,8(r1)								; Save lr on the stack
-		stwu r1,-64(r1)								; Get a stack frame 
-		mfmsr	r9									; Get msr
-		rlwinm	r9,r9,0,MSR_FP_BIT+1,MSR_FP_BIT-1	; Force floating point off
-		rlwinm	r9,r9,0,MSR_VEC_BIT+1,MSR_VEC_BIT-1	; Force vectors off
-		rlwinm	r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1	; Turn off interruptions
-		mtmsr	r8									; Update msr	
-		isync		
-		mfsprg	r7,1								; Get the current activation
-		lwz		r7,ACT_PER_PROC(r7)					; Get the per_proc block
-		lhz		r6,PP_CPU_FLAGS(r7)					; Get  cpu flags 
-		ori		r5,r6,mcountOff						; 
-		cmplw	r5,r6								; is mount off
-		beq		mcount_ret							; return if off
-		sth		r5,PP_CPU_FLAGS(r7)					; Update cpu_flags
-		stw	r9,FM_ARG0(r1)							; Save MSR
-		mr r4, r0
-		bl	_mcount									; Call the C routine
-		lwz	r9,FM_ARG0(r1)
-		mfsprg	r7,1								; Get the current activation
-		lwz		r7,ACT_PER_PROC(r7)					; Get the per_proc block
-		lhz		r6,PP_CPU_FLAGS(r7)					; Get CPU number 
-		li		r5,mcountOff						; 
-		andc		r6,r6,r5						; Clear mcount_off
-		sth		r6,PP_CPU_FLAGS(r7)					; Save cpu_flags
-mcount_ret:
-		addi r1,r1,64
-		mtmsr	r9									; Restore MSR
-		lwz r0,8(r1)
-		mtlr r0
-		blr
-
diff --git a/osfmk/ppc/mem.h b/osfmk/ppc/mem.h
deleted file mode 100644
index e4ee80c0e..000000000
--- a/osfmk/ppc/mem.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _PPC_MEM_H_
-#define _PPC_MEM_H_
-
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
-
-#include <ppc/proc_reg.h>
-#include <ppc/pmap.h>
-#include <mach/vm_types.h>
-
-extern vm_offset_t	static_memory_end;
-
-extern addr64_t		hash_table_base;
-extern unsigned int	hash_table_size;
-extern int          hash_table_shift;   /* size adjustment: bigger if >0, smaller if <0 */
-
-void hash_table_init(vm_offset_t base, vm_offset_t size);
-
-#define MAX_BAT		4
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct ppcBAT {
-	unsigned int	upper;	/* Upper half of BAT */
-	unsigned int	lower;	/* Lower half of BAT */
-} ppcBAT;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct shadowBAT {
-	ppcBAT	IBATs[MAX_BAT];	/* Instruction BATs */
-	ppcBAT	DBATs[MAX_BAT];	/* Data BAT */
-};
-#pragma pack()
-
-extern struct shadowBAT shadow_BAT;     
-
-#endif /* _PPC_MEM_H_ */
diff --git a/osfmk/ppc/misc.c b/osfmk/ppc/misc.c
deleted file mode 100644
index 807c03512..000000000
--- a/osfmk/ppc/misc.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-#if 0  // dead code
-#include <debug.h>
-#include <mach_debug.h>
-
-#include <mach/ppc/thread_status.h>
-#include <mach/vm_types.h>
-#include <kern/thread.h>
-#include <kern/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/pmap.h>
-#include <ppc/misc_protos.h>
-#include <ppc/exception.h>
-
-/*
- * copyin/out_multiple - the assembler copyin/out functions jump to C for
- * help when the copyin lies over a segment boundary. The C breaks
- * down the copy into two sub-copies and re-calls the assembler with
- * these sub-copies. Very rare occurrance. Warning: These functions are
- * called whilst active_thread->thread_recover is still set.
- */
-
-extern boolean_t copyin_multiple(const char *src,
-				 char *dst,
-				 vm_size_t count);
-
-boolean_t copyin_multiple(const char *src,
-			  char *dst,
-			  vm_size_t count)
-{
-	const char *midpoint;
-	vm_size_t first_count;
-	boolean_t first_result;
-
-	/* Assert that we've been called because of a segment boundary,
-	 * this function is more expensive than the assembler, and should
-	 * only be called in this difficult case.
-	 */
-	assert(((vm_offset_t)src & 0xF0000000) !=
-	       ((vm_offset_t)(src + count -1) & 0xF0000000));
-
-	/* TODO NMGS define sensible constants for segments, and apply
-	 * to C and assembler (assembler is much harder)
-	 */
-	midpoint = (const char*) ((vm_offset_t)(src + count) & 0xF0000000);
-	first_count = (midpoint - src);
-
-	first_result = copyin(CAST_USER_ADDR_T(src), dst, first_count);
-	
-	/* If there was an error, stop now and return error */
-	if (first_result != 0)
-		return first_result;
-
-	/* otherwise finish the job and return result */
-	return copyin(CAST_USER_ADDR_T(midpoint), dst + first_count, count-first_count);
-}
-
-extern int copyout_multiple(const char *src, char *dst, vm_size_t count);
-
-int copyout_multiple(const char *src, char *dst, vm_size_t count)
-{
-	char *midpoint;
-	vm_size_t first_count;
-	boolean_t first_result;
-
-	/* Assert that we've been called because of a segment boundary,
-	 * this function is more expensive than the assembler, and should
-	 * only be called in this difficult case. For copyout, the
-	 * segment boundary is on the dst
-	 */
-	assert(((vm_offset_t)dst & 0xF0000000) !=
-	       ((vm_offset_t)(dst + count - 1) & 0xF0000000));
-
-	/* TODO NMGS define sensible constants for segments, and apply
-	 * to C and assembler (assembler is much harder)
-	 */
-	midpoint = (char *) ((vm_offset_t)(dst + count) & 0xF0000000);
-	first_count = (midpoint - dst);
-
-	first_result = copyout(src, CAST_USER_ADDR_T(dst), first_count);
-	
-	/* If there was an error, stop now and return error */
-	if (first_result != 0)
-		return first_result;
-
-	/* otherwise finish the job and return result */
-
-	return copyout(src + first_count, CAST_USER_ADDR_T(midpoint), count-first_count);
-}
-#endif // dead code
-
diff --git a/osfmk/ppc/misc_asm.s b/osfmk/ppc/misc_asm.s
deleted file mode 100644
index 32d4b58df..000000000
--- a/osfmk/ppc/misc_asm.s
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
-#include <mach_debug.h>
-#include <assym.s>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <mach/ppc/vm_param.h>
-
-/*
- * vm_offset_t getrpc(void) - Return address of the function
- *	                      that called the current function
- */
-
-/* By using this function, we force the caller to save its LR in a known
- * location, which we can pick up and return. See PowerPC ELF specs.
- */
-ENTRY(getrpc, TAG_NO_FRAME_USED)
-	lwz	ARG0,	FM_BACKPTR(r1)		/* Load our backchain ptr */
-	lwz	ARG0,	FM_LR_SAVE(ARG0)	/* Load previously saved LR */
-	blr					/* And return */
-
-
-/*
- *	General entry for all debuggers.  This gets us onto the debug stack and
- *	then back off at exit. We need to pass back R3 to caller.
- */
- 
-ENTRY(Call_Debugger, TAG_NO_FRAME_USED)
-
-
-			lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
-			mfmsr	r7				; Get the current MSR
-			ori		r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Add the FP flag
-			mflr	r0				; Save the return
-			andc	r7,r7,r8						; Clear VEC and FP
-			mtmsr	r7				; Do it 
-			isync
-			mfsprg	r8,1					; Get the current activation
-			lwz		r8,ACT_PER_PROC(r8)		; Get the per_proc block
-			stw		r0,FM_LR_SAVE(r1)	; Save return on current stack
-			
-			lwz		r9,PP_DEBSTACKPTR(r8)	; Get the debug stack
-			cmpwi	r9,0			; Are we already on it?
-			bne		cdNewDeb		; No...
-		
-			mr		r9,r1 			; We are already on the stack, so use the current value
-			subi	r9,r9,FM_REDZONE+FM_SIZE	; Carve some extra space here
-		
-cdNewDeb:	li		r0,0			; Clear this out
-			stw		r1,FM_ARG0(r9)	; Save the old stack pointer as if it were the first arg
-
-			stw		r0,PP_DEBSTACKPTR(r8)	; Mark debug stack as busy
-			
-			subi	r1,r9,FM_SIZE	; Carve a new frame
-			stw		r0,FM_BACKPTR(r1)	; Chain back
-			
-			bl		EXT(Call_DebuggerC)	; Call the "C" phase of this
-		
-			lis		r8,hi16(MASK(MSR_VEC))			; Get the vector flag
-			mfmsr	r0				; Get the current MSR
-			ori		r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_FP))	; Add the FP flag
-			addi	r1,r1,FM_SIZE	; Pop off first stack frame
-			andc	r0,r0,r8		; Turn off all the interesting stuff
-			mtmsr	r0
-		
-			mfsprg	r8,1					; Get the current activation
-			lwz		r8,ACT_PER_PROC(r8)		; Get the per_proc block
-			
-			lwz		r9,PP_DEBSTACK_TOP_SS(r8)	; Get the top of the stack
-			cmplw	r1,r9			; Have we hit the bottom of the debug stack?
-			lwz		r1,FM_ARG0(r1)	; Get previous stack frame
-			lwz		r0,FM_LR_SAVE(r1)	; Get return address
-			mtlr	r0				; Set the return point
-			bnelr					; Return if still on debug stack
-
-			stw		r9,PP_DEBSTACKPTR(r8)	; Mark debug stack as free		
-			blr
- 
-
-/* The following routines are for C-support. They are usually
- * inlined into the C using the specifications in proc_reg.h,
- * but if optimisation is switched off, the inlining doesn't work
- */
-
-ENTRY(get_got, TAG_NO_FRAME_USED)
-	mr	ARG0,	r2
-	blr
-	
-ENTRY(mflr, TAG_NO_FRAME_USED)
-	mflr	ARG0
-	blr
-
-ENTRY(mfpvr, TAG_NO_FRAME_USED)
-	mfpvr	ARG0
-	blr
-
-ENTRY(mtmsr, TAG_NO_FRAME_USED)
-	mtmsr	ARG0
-	isync
-	blr
-
-ENTRY(mfmsr, TAG_NO_FRAME_USED)
-	mfmsr	ARG0
-	blr
-
-ENTRY(mtsrin, TAG_NO_FRAME_USED)
-	isync
-	mtsrin	ARG0,	ARG1
-	isync
-	blr
-
-ENTRY(mfsrin, TAG_NO_FRAME_USED)
-	mfsrin	ARG0,	ARG0
-	blr
-
-ENTRY(mtsdr1, TAG_NO_FRAME_USED)
-	mtsdr1	ARG0
-	blr
-
-ENTRY(mtdar, TAG_NO_FRAME_USED)
-	mtdar	ARG0
-	blr
-
-ENTRY(mfdar, TAG_NO_FRAME_USED)
-	mfdar	ARG0
-	blr
-
-ENTRY(mtdec, TAG_NO_FRAME_USED)
-	mtdec	ARG0
-	blr
-
-ENTRY(cntlzw, TAG_NO_FRAME_USED)
-	cntlzw	r3,r3
-	blr
-
-/* Decrementer frequency and realtime|timebase processor registers
- * are different between ppc601 and ppc603/4, we define them all.
- */
-
-ENTRY(isync_mfdec, TAG_NO_FRAME_USED)
-	isync
-	mfdec	ARG0
-	blr
-
-
-ENTRY(mftb, TAG_NO_FRAME_USED)
-	mftb	ARG0
-	blr
-
-ENTRY(mftbu, TAG_NO_FRAME_USED)
-	mftbu	ARG0
-	blr
-
-ENTRY(mfrtcl, TAG_NO_FRAME_USED)
-	mfspr	ARG0,	5
-	blr
-
-ENTRY(mfrtcu, TAG_NO_FRAME_USED)
-	mfspr	ARG0,	4
-	blr
-
-ENTRY(tlbie, TAG_NO_FRAME_USED)
-	tlbie	ARG0
-	blr
-
-
-/*
- * Performance Monitor Register Support
- */	
-
-ENTRY(mfmmcr0, TAG_NO_FRAME_USED)	
-	mfspr	r3,mmcr0
-	blr
-
-ENTRY(mtmmcr0, TAG_NO_FRAME_USED)
-	mtspr	mmcr0,r3
-	blr								
-
-ENTRY(mfmmcr1, TAG_NO_FRAME_USED)
-	mfspr	r3,mmcr1
-	blr								
-
-ENTRY(mtmmcr1, TAG_NO_FRAME_USED)
-	mtspr	mmcr1,r3
-	blr
-
-ENTRY(mfmmcr2, TAG_NO_FRAME_USED)
-	mfspr	r3,mmcr2
-	blr								
-
-ENTRY(mtmmcr2, TAG_NO_FRAME_USED)
-	mtspr	mmcr2,r3
-	blr
-
-ENTRY(mfpmc1, TAG_NO_FRAME_USED)
-	mfspr	r3,pmc1
-	blr
-
-ENTRY(mtpmc1, TAG_NO_FRAME_USED)
-	mtspr	pmc1,r3
-	blr								
-
-ENTRY(mfpmc2, TAG_NO_FRAME_USED)
-	mfspr	r3,pmc2
-	blr
-
-ENTRY(mtpmc2, TAG_NO_FRAME_USED)
-	mtspr	pmc2,r3
-	blr								
-
-ENTRY(mfpmc3, TAG_NO_FRAME_USED)
-	mfspr	r3,pmc3
-	blr
-
-ENTRY(mtpmc3, TAG_NO_FRAME_USED)
-	mtspr	pmc3,r3
-	blr								
-
-ENTRY(mfpmc4, TAG_NO_FRAME_USED)
-	mfspr	r3,pmc4
-	blr
-
-ENTRY(mtpmc4, TAG_NO_FRAME_USED)
-	mtspr	pmc4,r3
-	blr			
-						
-ENTRY(mfsia, TAG_NO_FRAME_USED)
-	mfspr	r3,sia
-	blr
-
-ENTRY(mfsda, TAG_NO_FRAME_USED)
-	mfspr	r3,sda
-	blr
-
-	.globl	EXT(hid1get)
-LEXT(hid1get)
-
-	mfspr	r3,hid1					; Get the HID1
-	blr
-
-	.globl	EXT(hid0get64)
-LEXT(hid0get64)
-
-	mfspr	r4,hid0					; Get the HID0
-	srdi	r3,r4,32				; Move top down
-	rlwinm	r4,r4,0,0,31			; Clean top
-	blr
-
-	.globl	EXT(hid5set64)
-LEXT(hid5set64)
-
-	rlwinm	r3,r3,0,1,0				; Copy low 32 int high 32
-	rlwimi	r3,r4,0,0,31			; Inser the low part behind top
-	mtspr	hid5,r3					; Set it
-	isync							; Wait for it
-	blr
diff --git a/osfmk/ppc/misc_protos.h b/osfmk/ppc/misc_protos.h
deleted file mode 100644
index d3eddc42a..000000000
--- a/osfmk/ppc/misc_protos.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _PPC_MISC_PROTOS_H_
-#define _PPC_MISC_PROTOS_H_
-
-#include <debug.h>
-#include <mach_kdb.h>
-#include <mach_kgdb.h>
-#include <mach_kdp.h>
-#include <mach_debug.h>
-
-#include <ppc/boot.h>
-#include <kern/thread.h>
-#include <mach/vm_types.h>
-#include <kern/cpu_data.h>
-#include <ppc/savearea.h>
-#include <mach/ppc/thread_status.h>
-#include <stdarg.h>
-#include <string.h>
-
-/* uncached-safe */
-extern void		bzero_nc(
-					char				*buf, 
-					int					size);
-
-/* uncached-safe */
-void bcopy_nc(const char *, char *, int);
-
-/* Physical to physical copy (ints must be disabled) */
-extern void		bcopy_phys(
-					addr64_t			from,
-					addr64_t			to,
-					int					size);
-
-/* Physical to physical copy virtually (ints must be disabled) */
-extern void		bcopy_physvir_32(
-					addr64_t			from,
-					addr64_t			to,
-					int					size);
-
-extern void		phys_copy(
-					addr64_t			from,
-					addr64_t			to,
-					vm_size_t			size); 
-
-extern void		machine_conf(
-					void);
-
-extern void		machine_startup(void);
-
-extern void		ppc_vm_init(
-					uint64_t			ppc_mem_size,
-					boot_args			*args);
-
-extern int		ppcNull(
-					struct savearea		*asavearea);
-
-extern int		ppcNullinst(
-					struct savearea		*asavearea);
-
-extern void		disable_bluebox_internal(
-					thread_t		act);
-
-extern uint64_t	hid0get64(
-					void);
-
-extern void		hid5set64(
-					uint64_t);
-
-extern void		Load_context(
-					thread_t			th);
-
-thread_t Switch_context(thread_t, thread_continue_t, thread_t);
-
-extern void		vec_save(
-					struct facility_context *vec_fc);
-
-extern void		toss_live_fpu(
-					struct facility_context *fpu_fc);
-
-extern void		toss_live_vec(
-					struct facility_context *vec_fc);
-
-extern struct savearea *enterDebugger(unsigned int, struct savearea *,
-		unsigned int);
-
-extern void		draw_panic_dialog(
-					void);
-
-extern void		commit_paniclog(
-					void);
-#if	DEBUG
-#define DPRINTF(x) { printf("%s : ",__FUNCTION__);printf x; }
-#endif	/* DEBUG */
-
-#if MACH_ASSERT
-extern void		dump_thread(
-					thread_t			th);
-#endif /* MACH_ASSERT */
-
-/* XXX move to osfmk/ppc/debug.h or some other debug header */
-void print_backtrace(struct savearea *);
-int Call_Debugger(int, struct savearea *);
-int kdp_dump_trap(int, struct savearea *);
-void SysChoked(unsigned int, struct savearea *);
-int Call_DebuggerC(unsigned int, struct savearea *);
-void kdp_trap(unsigned int, struct savearea *);
-
-#endif /* _PPC_MISC_PROTOS_H_ */
diff --git a/osfmk/ppc/model_dep.c b/osfmk/ppc/model_dep.c
deleted file mode 100644
index 9eff5b0bb..000000000
--- a/osfmk/ppc/model_dep.c
+++ /dev/null
@@ -1,1045 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/*
- *  (c) Copyright 1988 HEWLETT-PACKARD COMPANY
- *
- *  To anyone who acknowledges that this file is provided "AS IS"
- *  without any express or implied warranty:
- *      permission to use, copy, modify, and distribute this file
- *  for any purpose is hereby granted without fee, provided that
- *  the above copyright notice and this notice appears in all
- *  copies, and that the name of Hewlett-Packard Company not be
- *  used in advertising or publicity pertaining to distribution
- *  of the software without specific, written prior permission.
- *  Hewlett-Packard Company makes no representations about the
- *  suitability of this software for any purpose.
- */
-/*
- * Copyright (c) 1990,1991,1992,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).  All rights reserved.
- *
- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
- * WHATSOEVER RESULTING FROM ITS USE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * 	Utah $Hdr: model_dep.c 1.34 94/12/14$
- */
-/*
- * NOTICE: This file was modified by McAfee Research in 2004 to introduce
- * support for mandatory and extensible security protections.  This notice
- * is included in support of clause 2.2 (b) of the Apple Public License,
- * Version 2.0.
- */
-
-#include <debug.h>
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-#include <db_machine_commands.h>
-
-#include <kern/thread.h>
-#include <machine/pmap.h>
-#include <device/device_types.h>
-
-#include <libkern/OSKextLibPrivate.h>
-
-#include <mach/vm_param.h>
-#include <mach/clock_types.h>
-#include <mach/machine.h>
-#include <mach/kmod.h>
-#include <ppc/boot.h>
-
-#include <kern/misc_protos.h>
-#include <kern/startup.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/thread.h>
-#include <ppc/asm.h>
-#include <ppc/mem.h>
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#include <ppc/mappings.h>
-#include <ppc/FirmwareCalls.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/hw_perfmon.h>
-#include <ppc/lowglobals.h>
-#include <ppc/machine_cpu.h>
-#include <ppc/db_machdep.h>
-
-#include <kern/clock.h>
-#include <kern/debug.h>
-#include <machine/trap.h>
-#include <kern/spl.h>
-#include <pexpert/pexpert.h>
-#include <kern/sched.h>
-#include <kern/task.h>
-#include <kern/machine.h>
-#include <vm/vm_map.h>
-
-#include <IOKit/IOPlatformExpert.h>
-
-#include <mach/vm_prot.h>
-#include <vm/pmap.h>
-#include <mach/time_value.h>
-#include <mach/mach_types.h>
-#include <mach/mach_vm.h>
-#include <machine/machparam.h>	/* for btop */
-
-#if	MACH_KDB
-#include <ddb/db_aout.h>
-#include <ddb/db_output.h>
-#include <ddb/db_command.h>
-#include <machine/db_machdep.h>
-
-extern struct db_command ppc_db_commands[];
-#endif	/* MACH_KDB */
-
-char kernel_args_buf[256] = "/mach_kernel";
-char boot_args_buf[256] = "/mach_servers/bootstrap";
-char env_buf[256];
-
-#define TRAP_DEBUGGER	__asm__ volatile("tw 4,r3,r3");
-#define TRAP_DEBUGGER_INST	0x7c831808
-#define TRAP_DIRECT	__asm__ volatile("tw 4,r4,r4");
-#define TRAP_DIRECT_INST	0x7c842008
-#define TRAP_INST_SIZE	4
-#define BREAK_TO_KDP0 0x7fe00008
-#define BREAK_TO_KDP1 0x7c800008
-#define BREAK_TO_KDB0 0x7c810808
-
-/*
- * Code used to synchronize debuggers among all cpus, one active at a time, switch
- * from on to another using kdb_on! #cpu or cpu #cpu
- */
-
-hw_lock_data_t debugger_lock;	/* debugger lock */
-hw_lock_data_t pbtlock;		/* backtrace print lock */
-
-unsigned int debugger_cpu = (unsigned)-1; /* current cpu running debugger	*/
-int			debugger_debug = 0;			/* Debug debugger */
-int 		db_run_mode;				/* Debugger run mode */
-unsigned int debugger_sync = 0;			/* Cross processor debugger entry sync */
-extern 		unsigned int NMIss;			/* NMI debounce switch */
-
-extern volatile int panicwait;
-volatile unsigned int pbtcnt = 0;
-volatile unsigned int pbtcpu = -1;
-
-unsigned int lastTrace;					/* Value of low-level exception trace controls */
-
-
-volatile unsigned int	cpus_holding_bkpts;	/* counter for number of cpus holding
-											   breakpoints (ie: cpus that did not
-											   insert back breakpoints) */
-void unlock_debugger(void);
-void lock_debugger(void);
-void dump_backtrace(struct savearea *sv,
-		    unsigned int stackptr,
-		    unsigned int fence);
-void dump_savearea(struct savearea *sv,
-		   unsigned int fence);
-
-#if !MACH_KDB
-boolean_t	db_breakpoints_inserted = TRUE;
-jmp_buf_t *db_recover;
-#endif
-
-#if	MACH_KDB
-#include <ddb/db_run.h>
-int	kdb_flag=0;
-extern boolean_t db_breakpoints_inserted;
-extern jmp_buf_t *db_recover;
-#define	KDB_READY	0x1
-#endif
-
-#if	MACH_KDP
-extern int 	kdp_flag;
-#define	KDP_READY	0x1
-#endif
-
-unsigned int db_im_stepping = 0xFFFFFFFF; /* Remember if we were stepping */
-
-
-const char *failNames[] = {	
-	"Debugging trap",			/* failDebug */
-	"Corrupt stack",			/* failStack */
-	"Corrupt mapping tables",	/* failMapping */
-	"Corrupt context",			/* failContext */
-	"No saveareas",				/* failNoSavearea */
-	"Savearea corruption",		/* failSaveareaCorr */
-	"Invalid live context",		/* failBadLiveContext */
-	"Corrupt skip lists",		/* failSkipLists */
-	"Unaligned stack",			/* failUnalignedStk */
-	"Invalid pmap",				/* failPmap */
-	"Lock timeout",				/* failTimeout */
-	"Unknown failure code"		/* Unknown failure code - must always be last */
-};
-
-const char *invxcption = "Unknown code";
-
-static unsigned	commit_paniclog_to_nvram;
-
-#if !MACH_KDB
-void kdb_trap(__unused int type, __unused struct savearea *regs) {}
-#endif /* !MACH_KDB */
-
-#if !MACH_KDP
-void kdp_trap(__unused int type, __unused struct savearea *regs) {}
-#endif /* !MACH_KDP */
-
-extern int default_preemption_rate;
-extern int max_unsafe_quanta;
-extern int max_poll_quanta;
-
-void
-machine_startup(void)
-{
-	int	boot_arg;
-	unsigned int wncpu;
-
-	if (PE_parse_boot_argn("cpus", &wncpu, sizeof (wncpu))) {
-		if ((wncpu > 0) && (wncpu < MAX_CPUS))
-                        max_ncpus = wncpu;
-	}
-
-	if( PE_get_hotkey( kPEControlKey ))
-            halt_in_debugger = halt_in_debugger ? 0 : 1;
-
-	if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
-		if (boot_arg & DB_HALT) halt_in_debugger=1;
-		if (boot_arg & DB_PRT) disable_debug_output=FALSE; 
-		if (boot_arg & DB_SLOG) systemLogDiags=TRUE; 
-		if (boot_arg & DB_NMI) panicDebugging=TRUE; 
-		if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE; 
-	}
-	
-	if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
-		commit_paniclog_to_nvram = 1;
-
-	PE_parse_boot_argn("vmmforce", &lowGlo.lgVMMforcedFeats, sizeof (lowGlo.lgVMMforcedFeats));
-
-	hw_lock_init(&debugger_lock);				/* initialize debugger lock */
-	hw_lock_init(&pbtlock);						/* initialize print backtrace lock */
-
-#if	MACH_KDB
-	/*
-	 * Initialize KDB
-	 */
-#if	DB_MACHINE_COMMANDS
-	db_machine_commands_install(ppc_db_commands);
-#endif	/* DB_MACHINE_COMMANDS */
-	ddb_init();
-
-	if (boot_arg & DB_KDB)
-		current_debugger = KDB_CUR_DB;
-
-	/*
-	 * Cause a breakpoint trap to the debugger before proceeding
-	 * any further if the proper option bit was specified in
-	 * the boot flags.
-	 */
-	if (halt_in_debugger && (current_debugger == KDB_CUR_DB)) {
-	        Debugger("inline call to debugger(machine_startup)");
-		halt_in_debugger = 0;
-		active_debugger =1;
-	}
-#endif /* MACH_KDB */
-	if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
-		default_preemption_rate = boot_arg;
-	}
-	if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
-		max_unsafe_quanta = boot_arg;
-	}
-	if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
-		max_poll_quanta = boot_arg;
-	}
-	if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
-		sched_poll_yield_shift = boot_arg;
-	}
-
-	machine_conf();
-
-	/*
-	 * Kick off the kernel bootstrap.
-	 */
-	kernel_bootstrap();
-	/*NOTREACHED*/
-}
-
-char *
-machine_boot_info(__unused char *buf, __unused vm_size_t size)
-{
-	return(PE_boot_args());
-}
-
-void
-machine_conf(void)
-{
-	machine_info.memory_size = mem_size;	/* Note that this will be 2 GB for >= 2 GB machines */
-}
-
-void
-machine_init(void)
-{
-	debug_log_init();
-	clock_config();
-/*	Note that we must initialize the stepper tables AFTER the clock is configured!!!!! */
-	if(pmsExperimental & 1) pmsCPUConf();	/* (EXPERIMENTAL) Initialize the stepper tables */
-	perfmon_init();
-	return;
-
-}
-
-void
-slave_machine_init(__unused void *param)
-{
-	cpu_machine_init();			/* Initialize the processor */
-	clock_init();				/* Init the clock */
-}                               
-
-void
-halt_all_cpus(boolean_t	reboot)
-{
-	if(reboot)
-	{
-		printf("MACH Reboot\n");
-		PEHaltRestart(kPERestartCPU);
-	}
-	else
-	{
-		printf("CPU halted\n");
-		PEHaltRestart(kPEHaltCPU);
-	} 
-	while(1);
-}
-
-void
-halt_cpu(void)
-{
-        halt_all_cpus(FALSE);
-}
-
-#if	MACH_ASSERT
-/*
- * Machine-dependent routine to fill in an array with up to callstack_max
- * levels of return pc information.
- */
-void
-machine_callstack(__unused natural_t *buf, __unused vm_size_t callstack_max)
-{
-}
-#endif	/* MACH_ASSERT */
-
-void
-print_backtrace(struct savearea *ssp)
-{
-	unsigned int stackptr, fence;
-	struct savearea *sv, *svssp, *psv;
-	unsigned int cpu;
-
-/*
- *	We need this lock to make sure we don't hang up when we double panic on an MP.
- */
-
-	cpu  = cpu_number();					/* Just who are we anyways? */
-	if(pbtcpu != cpu) {						/* Allow recursion */
-		(void)hw_atomic_add(&pbtcnt, 1); /* Remember we are trying */
-		while(!hw_lock_try(&pbtlock));		/* Spin here until we can get in. If we never do, well, we're crashing anyhow... */	
-		pbtcpu = cpu;						/* Mark it as us */	
-	}	
-
-	svssp = (struct savearea *)ssp;				/* Make this easier */
-	sv = NULL;
-	if(current_thread())
-		sv = (struct savearea *)current_thread()->machine.pcb;	/* Find most current savearea if system has started */
-
-	fence = 0xFFFFFFFF;						/* Show we go all the way */
-	if(sv) fence = (unsigned int)sv->save_r1;	/* Stop at previous exception point */
-	
-	if(!svssp) {							/* Should we start from stack? */
-		kdb_printf("Latest stack backtrace for cpu %d:\n", cpu_number());
-		__asm__ volatile("mr %0,r1" : "=r" (stackptr));	/* Get current stack */
-		dump_backtrace((struct savearea *)0,stackptr, fence);	/* Dump the backtrace */
-		if(!sv) {							/* Leave if no saveareas */
-			hw_lock_unlock(&pbtlock);		/* Allow another back trace to happen */
-			goto pbt_exit;
-		}
-	}
-	else {									/* Were we passed an exception? */
-		fence = 0xFFFFFFFF;					/* Show we go all the way */
-		if(svssp->save_hdr.save_prev) {
-			if((svssp->save_hdr.save_prev <= vm_last_addr) && ((unsigned int)pmap_find_phys(kernel_pmap, (addr64_t)svssp->save_hdr.save_prev))) {	/* Valid address? */	
-				psv = (struct savearea *)((unsigned int)svssp->save_hdr.save_prev);	/* Get the 64-bit back chain converted to a regualr pointer */
-				fence = (unsigned int)psv->save_r1;	/* Stop at previous exception point */
-			}
-		}
-	
-		kdb_printf("Latest crash info for cpu %d:\n", cpu_number());
-		kdb_printf("   Exception state (sv=%p)\n", svssp);
-		dump_savearea(svssp, fence);		/* Dump this savearea */	
-	}
-
-	if(!sv) {								/* Leave if no saveareas */
-		hw_lock_unlock(&pbtlock);			/* Allow another back trace to happen */
-		goto pbt_exit;
-	}
-	
-	kdb_printf("Proceeding back via exception chain:\n");
-
-	while(sv) {								/* Do them all... */
-		if(!(((addr64_t)((uintptr_t)sv) <= vm_last_addr) && 
-			(unsigned int)pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)sv)))) {	/* Valid address? */	
-			kdb_printf("   Exception state (sv=%p) Not mapped or invalid. stopping...\n", sv);
-			break;
-		}
-		
-		kdb_printf("   Exception state (sv=%p)\n", sv);
-		if(sv == svssp) {					/* Did we dump it already? */
-			kdb_printf("      previously dumped as \"Latest\" state. skipping...\n");
-		}
-		else {
-			fence = 0xFFFFFFFF;				/* Show we go all the way */
-			if(sv->save_hdr.save_prev) {
-				if((sv->save_hdr.save_prev <= vm_last_addr) && ((unsigned int)pmap_find_phys(kernel_pmap, (addr64_t)sv->save_hdr.save_prev))) {	/* Valid address? */	
-					psv = (struct savearea *)((unsigned int)sv->save_hdr.save_prev);	/* Get the 64-bit back chain converted to a regualr pointer */
-					fence = (unsigned int)psv->save_r1;	/* Stop at previous exception point */
-				}
-			}
-			dump_savearea(sv, fence);		/* Dump this savearea */	
-		}	
-		
-		sv = CAST_DOWN(struct savearea *, sv->save_hdr.save_prev);	/* Back chain */ 
-	}
-	
-
-	pbtcpu = -1;							/* Mark as unowned */
-	hw_lock_unlock(&pbtlock);				/* Allow another back trace to happen */
-	(void)hw_atomic_sub(&pbtcnt, 1);  /* Show we are done */
-
-	while(pbtcnt);							/* Wait for completion */
-pbt_exit:
-    panic_display_system_configuration();
-    return;
-}
-
-void
-dump_savearea(struct savearea *sv, unsigned int fence)
-{
-	const char *xcode;
-	
-	if(sv->save_exception > T_MAX)
-		xcode = invxcption;	/* Too big for table */
-	else
-		xcode = trap_type[sv->save_exception / 4];		/* Point to the type */
-	
-	kdb_printf("      PC=0x%08X; MSR=0x%08X; DAR=0x%08X; DSISR=0x%08X; LR=0x%08X; R1=0x%08X; XCP=0x%08X (%s)\n",
-		(unsigned int)sv->save_srr0, (unsigned int)sv->save_srr1, (unsigned int)sv->save_dar, sv->save_dsisr,
-		(unsigned int)sv->save_lr, (unsigned int)sv->save_r1, sv->save_exception, xcode);
-	
-	if(!(sv->save_srr1 & MASK(MSR_PR))) {		/* Are we in the kernel? */
-		dump_backtrace(sv, (unsigned int)sv->save_r1, fence);	/* Dump the stack back trace from  here if not user state */
-	}
-	
-	return;
-}
-
-#define DUMPFRAMES 34
-#define LRindex 2
-
-void dump_backtrace(struct savearea *sv, unsigned int stackptr, unsigned int fence) {
-
-	unsigned int bframes[DUMPFRAMES];
-	unsigned int  sframe[8], raddr, dumbo;
-	int i, index=0;
-//	char syminfo[80];
-	
-	kdb_printf("      Backtrace:\n");
-	if (sv != (struct savearea *)0) {
-		bframes[0] = (unsigned int)sv->save_srr0;
-		bframes[1] = (unsigned int)sv->save_lr;
-		index = 2;
-	}
-	for(i = index; i < DUMPFRAMES; i++) {			/* Dump up to max frames */
-	
-		if(!stackptr || (stackptr == fence)) break;		/* Hit stop point or end... */
-		
-		if(stackptr & 0x0000000F) {				/* Is stack pointer valid? */
-			kdb_printf("\n         backtrace terminated - unaligned frame address: 0x%08X\n", stackptr);	/* No, tell 'em */
-			break;
-		}
-
-		raddr = (unsigned int)pmap_find_phys(kernel_pmap, (addr64_t)stackptr);	/* Get physical frame address */
-		if(!raddr || (stackptr > vm_last_addr)) {		/* Is it mapped? */
-			kdb_printf("\n         backtrace terminated - frame not mapped or invalid: 0x%08X\n", stackptr);	/* No, tell 'em */
-			break;
-		}
-	
-		if(!mapping_phys_lookup(raddr, &dumbo)) {	/* Is it within physical RAM? */
-			kdb_printf("\n         backtrace terminated - frame outside of RAM: v=0x%08X, p=%08X\n", stackptr, raddr);	/* No, tell 'em */
-			break;
-		}
-	
-		ReadReal((addr64_t)((raddr << 12) | (stackptr & 4095)), &sframe[0]);	/* Fetch the stack frame */
-
-		bframes[i] = sframe[LRindex];				/* Save the link register */
-		
-//		syms_formataddr((vm_offset_t)bframes[i], syminfo, sizeof (syminfo));
-//		kdb_printf("        %s\n", syminfo);
-		if(!i) kdb_printf("         ");				/* Indent first time */
-		else if(!(i & 7)) kdb_printf("\n         ");	/* Skip to new line every 8 */
-		kdb_printf("0x%08X ", bframes[i]);			/* Dump the link register */
-		
-		stackptr = sframe[0];						/* Chain back */
-	}
-	kdb_printf("\n");
-	if(i >= DUMPFRAMES) kdb_printf("      backtrace continues...\n");	/* Say we terminated early */
-	if(i) kmod_panic_dump((vm_offset_t *)&bframes[0], i);	/* Show what kmods are in trace */
-	
-}
-	
-void commit_paniclog(void) {
-	unsigned long pi_size = 0;
-
-	if (debug_buf_size > 0)	{
-		if (commit_paniclog_to_nvram) {
-			unsigned int bufpos;
-			
-			/* XXX Consider using the WKdm compressor in the
-			 * future, rather than just packing - would need to
-			 * be co-ordinated with crashreporter, which decodes
-			 * this post-restart. The compressor should be
-			 * capable of in-place compression.
-			 */
-			bufpos = packA(debug_buf, (unsigned) (debug_buf_ptr - debug_buf), debug_buf_size);
-			/* If compression was successful,
-			 * use the compressed length
-			 */
-			pi_size = bufpos ? bufpos : (unsigned) (debug_buf_ptr - debug_buf);
-
-			/* Truncate if the buffer is larger than a
-			 * certain magic size - this really ought to
-			 * be some appropriate fraction of the NVRAM
-			 * image buffer, and is best done in the
-			 * savePanicInfo() or PESavePanicInfo() calls
-			 * This call must save data synchronously,
-			 * since we can subsequently halt the system.
-			 */
-			kprintf("Attempting to commit panic log to NVRAM\n");
-			/* N.B.: This routine (currently an IOKit wrapper that
-			 * calls through to the appropriate platform NVRAM
-			 * driver, must be panic context safe, i.e.
-			 * acquire no locks or require kernel services.
-			 * This does not appear to be the case currently
-			 * on some platforms, unfortunately (the driver
-			 * on command gate serialization).
-			 */
-			pi_size = PESavePanicInfo((unsigned char *)debug_buf,
-			    ((pi_size > 2040) ? 2040 : pi_size));
-			/* Uncompress in-place, to allow debuggers to examine
-			 * the panic log.
-			 */
-			if (bufpos) 
-				unpackA(debug_buf, bufpos);
-		}
-	}
-}
-
-void 
-Debugger(const char	*message) {
-
-	spl_t spl;
-	
-	spl = splhigh();								/* No interruptions from here on */
-	
-/*
- *	backtrace for Debugger() call  from panic() if no current debugger
- *	backtrace and return for double panic() call
- */
-	if ((panicstr != (char *)0) && 
-	  (((nestedpanic != 0) && (current_debugger == 1)) || (active_debugger == 0))) {
-		print_backtrace(NULL);
-		if (nestedpanic != 0)  {
-			splx(spl);
-			return;									/* Yeah, don't enter again... */
-		}
-	}
-	
-	if (debug_mode && getPerProc()->debugger_active) {	/* Are we already on debugger on this processor? */
-		splx(spl);
-		return;										/* Yeah, don't do it again... */
-	}
-
-
-/*
- * The above stuff catches the double panic case so we shouldn't have to worry about that here.
- */
-	if ( panicstr != (char *)0 )
-	{
-		disable_preemption();
-		/* Commit the panic log buffer to NVRAM, unless otherwise
-		 * specified via a boot-arg.
-		 */
-		commit_paniclog();
-		if(!panicDebugging) {
-			unsigned int my_cpu, tcpu;
-
-			my_cpu = cpu_number();
-			debugger_cpu = my_cpu;
-
-			(void)hw_atomic_add(&debug_mode, 1);
-			PerProcTable[my_cpu].ppe_vaddr->debugger_active++;
-			lock_debugger();
-
-			for(tcpu = 0; tcpu < real_ncpus; tcpu++) {
-				if(tcpu == my_cpu) continue;
-				(void)hw_atomic_add(&debugger_sync, 1);
-				(void)cpu_signal(tcpu, SIGPdebug, 0 ,0);
-			}
-			(void)hw_cpu_sync(&debugger_sync, LockTimeOut);
-			debugger_sync = 0;
-		}
-
-		draw_panic_dialog();
-		
-		if(!panicDebugging) {
-#if CONFIG_EMBEDDED
-					PEHaltRestart(kPEPanicRestartCPU);
-#else
-					PEHaltRestart( kPEHangCPU );
-#endif
-		}
-
-		enable_preemption();
-	}
-
-	if ((current_debugger != NO_CUR_DB)) {			/* If there is a debugger configured, enter it */
-		printf("Debugger(%s)\n", message);
-		TRAP_DEBUGGER;
-		splx(spl);
-		return;										/* Done debugging for a while */
-	}
-
-	printf("\nNo debugger configured - dumping debug information\n");
-	printf("MSR=%08X\n",mfmsr());
-	print_backtrace(NULL);
-	splx(spl);
-	return;
-}
-
-/*
- *		Here's where we attempt to get some diagnostic information dumped out
- *		when the system is really confused.  We will try to get into the 
- *		debugger as well.
- *
- *		We are here with interrupts disabled and on the debug stack.  The savearea
- *		that was passed in is NOT chained to the activation.
- *
- *		save_r3 contains the failure reason code.
- */
-
-void
-SysChoked(unsigned int type, struct savearea *sv)
-{
-	unsigned int failcode;
-	const char * const pmsg = "System Failure: cpu=%d; code=%08X (%s)\n";
-	mp_disable_preemption();
-	disable_debug_output = FALSE;
-	debug_mode = TRUE;
-
-	failcode = (unsigned int)sv->save_r3;			/* Get the failure code */
-	if(failcode > failUnknown) failcode = failUnknown;	/* Set unknown code code */
-	
-	kprintf(pmsg, cpu_number(), (unsigned int)sv->save_r3, failNames[failcode]);
-	kdb_printf(pmsg, cpu_number(), (unsigned int)sv->save_r3, failNames[failcode]);
-
-	print_backtrace(sv);							/* Attempt to print backtrace */
-
-	/* Commit the panic log buffer to NVRAM, unless otherwise
-	 * specified via a boot-arg. For certain types of panics
-	 * which result in a "choke" exception, this may well
-	 * be inadvisable, and setting the nvram_paniclog=0
-	 * boot-arg may be useful.
-	 */
-
-	if (panicDebugging)
-		commit_paniclog();
-
-	Call_DebuggerC(type, sv);						/* Attempt to get into debugger */
-
-	if ((current_debugger != NO_CUR_DB))
-		Call_DebuggerC(type, sv);	/* Attempt to get into debugger */
-	panic_plain(pmsg, cpu_number(), (unsigned int)sv->save_r3, failNames[failcode]);
-}
-
-
-
-/*
- *	When we get here, interruptions are disabled and we are on the debugger stack
- *	Never, ever, ever, ever enable interruptions from here on
- */
-
-int
-Call_DebuggerC(unsigned int type, struct savearea *saved_state)
-{
-	int				directcall, wait;
-	addr64_t		instr_ptr = 0ULL;
-	ppnum_t			instr_pp;
-	unsigned int 	instr, tcpu, my_cpu;
-	int 			wasdebugger;
-
-	my_cpu = cpu_number();								/* Get our CPU */
-
-#if	MACH_KDB
-	if((debugger_cpu == my_cpu) && 						/* Do we already own debugger? */
-	  PerProcTable[my_cpu].ppe_vaddr->debugger_active && 						/* and are we really active? */
-	  db_recover && 									/* and have we set up recovery? */
-	  (current_debugger == KDB_CUR_DB)) {				/* and are we in KDB (only it handles recovery) */
-		kdb_trap(type, saved_state);					/* Then reenter it... */
-	}
-#endif
-	
-	(void)hw_atomic_add(&debug_mode, 1); /* Indicate we are in debugger */
-	PerProcTable[my_cpu].ppe_vaddr->debugger_active++;	/* Show active on our CPU */
-	
-	lock_debugger();									/* Insure that only one CPU is in debugger */
-
-	if(db_im_stepping == my_cpu) {						/* Are we just back from a step? */
-		enable_preemption_no_check();					/* Enable preemption now */
-		db_im_stepping = 0xFFFFFFFF;					/* Nobody stepping right now */
-	}
-
-	if (debugger_debug) {
-#if 0
-		kprintf("Call_DebuggerC(%d): %08X %08X, debact = %d\n", my_cpu, type, (uint32_t)saved_state, debug_mode);	/* (TEST/DEBUG) */
-#endif
-		printf("Call_Debugger: enter - cpu %d, is_slave %d, debugger_cpu %d, pc %08llX\n",
-		   my_cpu, PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave, debugger_cpu, saved_state->save_srr0);
-	}
-	
-	instr_pp = (vm_offset_t)pmap_find_phys(kernel_pmap, (addr64_t)(saved_state->save_srr0));
-
-	if (instr_pp) {
-		instr_ptr = (addr64_t)(((addr64_t)instr_pp << 12) | (saved_state->save_srr0 & 0xFFF));	/* Make physical address */
-		instr = ml_phys_read_64(instr_ptr);				/* Get the trap that caused entry */
-	} 
-	else instr = 0;
-
-#if 0
-	if (debugger_debug) kprintf("Call_DebuggerC(%d): instr_pp = %08X, instr_ptr = %016llX, instr = %08X\n", my_cpu, instr_pp, instr_ptr, instr);	/* (TEST/DEBUG) */
-#endif
-
-	if (db_breakpoints_inserted) cpus_holding_bkpts++;	/* Bump up the holding count */
-	if ((debugger_cpu == (unsigned)-1) &&
-		!PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave) {
-#if 0
-		if (debugger_debug) kprintf("Call_DebuggerC(%d): lasttrace = %08X\n", my_cpu, lastTrace);	/* (TEST/DEBUG) */
-#endif
-		debugger_cpu = my_cpu;							/* Show that we are debugger */
-
-
-		lastTrace = LLTraceSet(0);						/* Disable low-level tracing */
-
-		for(tcpu = 0; tcpu < real_ncpus; tcpu++) {		/* Stop all the other guys */
-			if(tcpu == my_cpu) continue;				/* Don't diddle ourselves */
-			(void)hw_atomic_add(&debugger_sync, 1); /* Count signal sent */
-			(void)cpu_signal(tcpu, SIGPdebug, 0 ,0);	/* Tell 'em to enter debugger */
-		}
-		(void)hw_cpu_sync(&debugger_sync, LockTimeOut);	/* Wait for the other processors to enter debug */
-		debugger_sync = 0;								/* We're done with it */
-	} 
-	else if (debugger_cpu != my_cpu)  goto debugger_exit;	/* We are not debugger, don't continue... */
-	
-
-	if (instr == TRAP_DIRECT_INST) {
-		disable_debug_output = FALSE;
-		print_backtrace(saved_state);
-	}
-
-	switch_debugger = 0;								/* Make sure switch request is off */
-	directcall = 1;										/* Assume direct call */
-
-	if (saved_state->save_srr1 & MASK(SRR1_PRG_TRAP)) {	/* Trap instruction? */
-		
-		directcall = 0;									/* We had a trap not a direct call */
-
-		switch (instr) {								/* Select trap type */
-
-#if	MACH_KDP
-			case BREAK_TO_KDP0:							/* Breakpoint into KDP? */
-			case BREAK_TO_KDP1:							/* Breakpoint into KDP? */
-				current_debugger = KDP_CUR_DB;			/* Yes, set KDP */
-				kdp_trap(type, saved_state);			/* Enter it */
-				break;
-#endif
-	
-#if	MACH_KDB
-			case BREAK_TO_KDB0: 						/* Breakpoint to KDB (the "good" debugger)? */
-				current_debugger = KDB_CUR_DB;			/* Yes, set it */
-				kdb_trap(type, saved_state);			/* Enter it */
-				break;
-#endif
-				
-			case TRAP_DEBUGGER_INST:					/* Should we enter the current debugger? */
-			case TRAP_DIRECT_INST:						/* Should we enter the current debugger? */
-				if (current_debugger == KDP_CUR_DB) 	/* Is current KDP? */
-					kdp_trap(type, saved_state);		/* Yes, enter it */
-				else if (current_debugger == KDB_CUR_DB) 	/* Is this KDB? */
-					kdb_trap(type, saved_state);		/* Yes, go ahead and enter */
-				else goto debugger_error;				/* No debugger active */
-				break;
-				
-			default:									/* Unknown/bogus trap type */
-				goto debugger_error;
-		}
-	}
-
-	while(1) {											/* We are here to handle debugger switches */
-		
-		if(!directcall) {								/* Was this a direct call? */
-			if(!switch_debugger) break;					/* No, then leave if no switch requested... */
-
-/*
- *			Note: we can only switch to a debugger we have.  Ignore bogus switch requests.
- */
-#if 0
-			if (debugger_debug) kprintf("Call_DebuggerC(%d): switching debuggers\n", my_cpu);	/* (TEST/DEBUG) */
-#endif
-#if MACH_KDB
-			if(current_debugger == KDP_CUR_DB) current_debugger = KDB_CUR_DB; /* Switch to KDB */
-#if MACH_KDP
-			else 
-#endif
-#endif
-#if MACH_KDP
-			if(current_debugger == KDB_CUR_DB) current_debugger = KDP_CUR_DB;		/* Switch to KDP */
-#endif
-		}
-		
-		switch_debugger = 0;							/* Clear request */
-		directcall = 0;									/* Clear first-time direct call indication */
-
-		switch (current_debugger) {						/* Enter correct debugger */
-		
-			case KDP_CUR_DB:							/* Enter KDP */
-				kdp_trap(type, saved_state);
-				break;
-				
-			case KDB_CUR_DB:							/* Enter KDB */
-				kdb_trap(type, saved_state);
-				break;
-				
-			default:									/* No debugger installed */
-				goto debugger_error;
-				break;
-		}
-	}
-
-debugger_exit:
-#if 0
-	if (debugger_debug) kprintf("Call_DebuggerC(%d): exit - inst = %08X, cpu=%d(%d), run=%d\n", my_cpu, 
-		instr, my_cpu, debugger_cpu, db_run_mode);	/* (TEST/DEBUG) */
-#endif
-	if ((instr == TRAP_DEBUGGER_INST) ||				/* Did we trap to enter debugger? */
-		(instr == TRAP_DIRECT_INST)) saved_state->save_srr0 += TRAP_INST_SIZE;	/* Yes, point past trap */
-
-	wasdebugger = 0;									/* Assume not debugger */
-	if(debugger_cpu == my_cpu) {						/* Are the debugger processor? */
-		wasdebugger = 1;								/* Remember that we were the debugger */
-		LLTraceSet(lastTrace);							/* Enable tracing on the way out if we are debugger */
-	}
-
-	wait = FALSE;										/* Assume we are not going to wait */
-	if (db_run_mode == STEP_CONTINUE) {					/* Are we going to run? */
-		wait = TRUE;									/* Yeah, remember to wait for breakpoints to clear */
-		debugger_cpu = -1;								/* Release other processor's debuggers */
-		for(tcpu = 0; tcpu < real_ncpus; tcpu++)
-			PerProcTable[tcpu].ppe_vaddr->debugger_pending = 0;	/* Release request (this is a HACK) */
-		NMIss = 0;										/* Let NMI bounce */
-	}
-	
-	if(db_run_mode == STEP_ONCE) {						/* Are we about to step? */
-		disable_preemption();							/* Disable preemption for the step */
-		db_im_stepping = my_cpu;						/* Remember that I am about to step */
-	}
-
-	if (db_breakpoints_inserted) cpus_holding_bkpts--;	/* If any breakpoints, back off count */
-	if (PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave) PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave--;	/* If we were a slove, uncount us */
-	if (debugger_debug)
-		printf("Call_Debugger: exit - cpu %d, debugger_cpu %d, run_mode %d holds %d\n",
-			  my_cpu, debugger_cpu, db_run_mode,
-			  cpus_holding_bkpts);
-
-	unlock_debugger();									/* Release the lock */
-	PerProcTable[my_cpu].ppe_vaddr->debugger_active--;	/* Say we aren't active anymore */
-
-	if (wait) while(cpus_holding_bkpts);				/* Wait for breakpoints to clear */
-
-
-	(void)hw_atomic_sub(&debug_mode, 1); /* Set out of debug now */
-
-	return(1);											/* Exit debugger normally */
-
-debugger_error:
-	if(db_run_mode != STEP_ONCE) enable_preemption_no_check();	/* Enable preemption, but don't preempt here */
-	(void)hw_atomic_sub(&debug_mode, 1); /* Set out of debug now */
-	return(0);											/* Return in shame... */
-
-}
-
-void
-lock_debugger(void)
-{
-	unsigned int my_cpu;
-
-	my_cpu = cpu_number();								/* Get our CPU number */
-
-	while(1) { /* Check until we get it */
-		if (debugger_cpu != (unsigned)-1 && debugger_cpu != my_cpu)
-			continue;	/* Someone, not us, is debugger... */
-		if (hw_lock_try(&debugger_lock)) { /* Get the debug lock */			
-			if (debugger_cpu == (unsigned)-1 || debugger_cpu == my_cpu)
-				break;	/* Is it us? */
-			hw_lock_unlock(&debugger_lock); /* Not us, release lock */
-		}
-	} 
-}
-
-void unlock_debugger(void) {
-
-	hw_lock_unlock(&debugger_lock);
-
-}
-
-int patchInst(task_t task, addr64_t vaddr, uint32_t inst);
-int patchInst(task_t task, addr64_t vaddr, uint32_t inst)
-{
-	vm_map_t map;
-	addr64_t paddr;
-	uint32_t instr, nestingDepth;
-	kern_return_t ret;
-	vm_region_submap_short_info_data_64_t info;
-	mach_msg_type_number_t count;
-	mach_vm_address_t address;
-	mach_vm_size_t sizeOfRegion;
-	vm_prot_t reprotect;
-
-	if(task == TASK_NULL) return -1;		/* Leave if task is bogus... */
-
-	task_lock(task);						/* Make sure the task doesn't go anywhaere */
-	if (!task->active) {					/* Is is alive? */
-		task_unlock(task);					/* Nope, unlock */
-		return -1;							/* Not a active task, fail... */
-	}
-	map = task->map;						/* Get his map */
-	vm_map_reference_swap(map);				/* Don't let it go away */
-	task_unlock(task);						/* Unleash the task */
-
-	/* Find the memory permissions. */
-	nestingDepth=999999;					/* Limit recursion */
-	
-	count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
-	address = (mach_vm_address_t)vaddr;
-	sizeOfRegion = (mach_vm_size_t)4;
-
-	ret = mach_vm_region_recurse(map, &address, &sizeOfRegion, &nestingDepth, (vm_region_recurse_info_t)&info, &count);
-	if (ret != KERN_SUCCESS) {				/* Leave if it didn't work */
-		vm_map_deallocate(map);				/* Drop reference on map */
-		return (-1);			
-	}
-
-/*
- *	We need to check if there could be a problem if the dtrace probes are being removed and the code is being
- *	executed at the same time.  This sequence may leave us with no-execute turned on temporarily when we execute
- *	through it.
- */
- 
-	if (!(info.protection & VM_PROT_WRITE)) {
-		/* Save the original protection values for restoration later */
-		reprotect = info.protection;
-
-		if (info.max_protection & VM_PROT_WRITE) {
-			/* The memory is not currently writable, but can be made writable. */
-			ret = mach_vm_protect(map, (mach_vm_offset_t)vaddr, (mach_vm_size_t)4, 0, reprotect | VM_PROT_WRITE);
-		} 
-		else {
-			/*
-			 * The memory is not currently writable, and cannot be made writable. We need to COW this memory.
-			 *
-			 * Strange, we can't just say "reprotect | VM_PROT_COPY", that fails.
-			 */
-			ret = mach_vm_protect(map, (mach_vm_offset_t)vaddr, (mach_vm_size_t)4, 0, VM_PROT_COPY | VM_PROT_READ | VM_PROT_WRITE);
-		}
-
-		if (ret != KERN_SUCCESS) {
-			vm_map_deallocate(map);			/* Drop reference on map */
-			return (-1);		
-		}
-		
-	} 
-	else {
-		/* The memory was already writable. */
-		reprotect = VM_PROT_NONE;
-	}
-
-	instr = inst;							/* Place instruction in local memory */
-	ret = vm_map_write_user(map, &inst, (vm_map_address_t)vaddr, (vm_size_t)4);	/* Write the instruction */
-	if (ret != KERN_SUCCESS) {				/* Leave if it didn't work */
-	
-		if (reprotect != VM_PROT_NONE) {
-			ret = mach_vm_protect (map, (mach_vm_offset_t)vaddr, (mach_vm_size_t)4, 0, reprotect);
-		}
-
-		vm_map_deallocate(map);				/* Drop reference on map */
-		return (-1);			
-	}
-
-	paddr = (addr64_t)pmap_find_phys(map->pmap, vaddr) << 12;	/* Find the physical address of the patched address */
-	if(!paddr) {							/* Is address mapped now? */
-		vm_map_deallocate(map);				/* Drop reference on map */
-		return 0;							/* Leave... */
-	}
-	paddr = paddr | (vaddr & 4095);			/* Construct physical address */
-	invalidate_icache64(paddr, 4, 1);		/* Flush out the instruction cache here */
-
-	if (reprotect != VM_PROT_NONE) {
-		ret = mach_vm_protect(map, (mach_vm_offset_t)vaddr, (mach_vm_size_t)4, 0, reprotect);
-	}
-
-	vm_map_deallocate(map);
-
-	return (0);
-}
diff --git a/osfmk/ppc/movc.s b/osfmk/ppc/movc.s
deleted file mode 100644
index 2e100071b..000000000
--- a/osfmk/ppc/movc.s
+++ /dev/null
@@ -1,1303 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <mach/ppc/vm_param.h>
-#include <assym.s>
-#include <sys/errno.h>
-
-#define INSTRUMENT 0
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * void pmap_zero_page(vm_offset_t pa)
- *
- * Zero a page of physical memory.  This routine runs in 32 or 64-bit mode,
- * and handles 32 and 128-byte cache lines.
- */
-
-
-		.align	5
-		.globl	EXT(pmap_zero_page)
-
-LEXT(pmap_zero_page)
-
-        mflr	r12								// save return address
-        bl		EXT(ml_set_physical_disabled)	// turn DR and EE off, SF on, get features in r10
-        mtlr	r12								// restore return address
-        andi.	r9,r10,pf32Byte+pf128Byte		// r9 <- cache line size
-
-        subfic	r4,r9,PPC_PGBYTES				// r4 <- starting offset in page
-		
-		bt++	pf64Bitb,page0S4				// Go do the big guys...
-		
-		slwi	r3,r3,12						// get page address from page num
-		b		page_zero_1						// Jump to line aligned loop...
-
-        .align	5
-
-		nop
-		nop
-		nop
-		nop
-		nop
-		nop
-		nop
-		
-page0S4:
-		sldi	r3,r3,12						// get page address from page num
-
-page_zero_1:									// loop zeroing cache lines
-        sub.	r5,r4,r9						// more to go?
-        dcbz128	r3,r4							// zero either 32 or 128 bytes
-        sub		r4,r5,r9						// generate next offset
-        dcbz128	r3,r5
-        bne--	page_zero_1
-        
-        b		EXT(ml_restore)					// restore MSR and do the isync
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/* void
- * phys_copy(src, dst, bytecount)
- *      addr64_t 	    src;
- *      addr64_t 	    dst;
- *      int             bytecount
- *
- * This routine will copy bytecount bytes from physical address src to physical
- * address dst.  It runs in 64-bit mode if necessary, but does not handle
- * overlap or make any attempt to be optimal.  Length must be a signed word.
- * Not performance critical.
- */
-
-
-		.align	5
-		.globl	EXT(phys_copy)
-
-LEXT(phys_copy)
-
-		rlwinm	r3,r3,0,1,0					; Duplicate high half of long long paddr into top of reg
-        mflr	r12								// get return address
-		rlwimi	r3,r4,0,0,31				; Combine bottom of long long to full 64-bits
-		rlwinm	r4,r5,0,1,0					; Duplicate high half of long long paddr into top of reg
-        bl		EXT(ml_set_physical_disabled)	// turn DR and EE off, SF on, get features in r10
-		rlwimi	r4,r6,0,0,31				; Combine bottom of long long to full 64-bits
-        mtlr	r12								// restore return address
-        subic.	r5,r7,4							// a word to copy?
-        b		phys_copy_2
-        
-		.align	5
-         
-phys_copy_1:									// loop copying words
-        subic.	r5,r5,4							// more to go?
-        lwz		r0,0(r3)
-        addi	r3,r3,4
-        stw		r0,0(r4)
-        addi	r4,r4,4
-phys_copy_2:
-        bge		phys_copy_1
-        addic.	r5,r5,4							// restore count
-        ble		phys_copy_4						// no more
-        
-        										// Loop is aligned here
-        
-phys_copy_3:									// loop copying bytes
-        subic.	r5,r5,1							// more to go?
-        lbz		r0,0(r3)
-        addi	r3,r3,1
-        stb		r0,0(r4)
-        addi	r4,r4,1
-        bgt		phys_copy_3
-phys_copy_4:        
-        b		EXT(ml_restore)					// restore MSR and do the isync
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/* void
- * pmap_copy_page(src, dst)
- *      ppnum_t     src;
- *      ppnum_t     dst;
- *
- * This routine will copy the physical page src to physical page dst
- * 
- * This routine assumes that the src and dst are page numbers and that the
- * destination is cached.  It runs on 32 and 64 bit processors, with and
- * without altivec, and with 32 and 128 byte cache lines.
- * We also must assume that no-one will be executing within the destination
- * page, and that this will be used for paging.  Because this
- * is a common routine, we have tuned loops for each processor class.
- *
- */
-#define	kSFSize	(FM_SIZE+160)
-
-ENTRY(pmap_copy_page, TAG_NO_FRAME_USED)
-
-		lis		r2,hi16(MASK(MSR_VEC))			; Get the vector flag
-        mflr	r0								// get return
- 		ori		r2,r2,lo16(MASK(MSR_FP))		; Add the FP flag
-		stw		r0,8(r1)						// save
-        stwu	r1,-kSFSize(r1)					// set up a stack frame for VRs or FPRs
-        mfmsr	r11								// save MSR at entry
-        mfsprg	r10,2							// get feature flags
-        andc	r11,r11,r2						// Clear out vec and fp
-        ori		r2,r2,lo16(MASK(MSR_EE))		// Get EE on also
-        andc	r2,r11,r2						// Clear out EE as well
-        mtcrf	0x02,r10						// we need to test pf64Bit
-        ori		r2,r2,MASK(MSR_FP)				// must enable FP for G3...
-        mtcrf	0x80,r10						// we need to test pfAltivec too
-        oris	r2,r2,hi16(MASK(MSR_VEC))		// enable altivec for G4 (ignored if G3)
-        mtmsr	r2								// turn EE off, FP and VEC on
-        isync
-        bt++	pf64Bitb,pmap_copy_64			// skip if 64-bit processor (only they take hint)
- 		slwi	r3,r3,12						// get page address from page num
-		slwi	r4,r4,12						// get page address from page num
-        rlwinm	r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1	// get ready to turn off DR
-        bt		pfAltivecb,pmap_copy_g4			// altivec but not 64-bit means G4
-        
-        
-        // G3 -- copy using FPRs
-        
-        stfd	f0,FM_SIZE+0(r1)				// save the 4 FPRs we use to copy
-        stfd	f1,FM_SIZE+8(r1)
-        li		r5,PPC_PGBYTES/32				// count of cache lines in a page
-        stfd	f2,FM_SIZE+16(r1)
-        mtctr	r5
-        stfd	f3,FM_SIZE+24(r1)
-        mtmsr	r12								// turn off DR after saving FPRs on stack
-        isync
-        
-pmap_g3_copy_loop:								// loop over 32-byte cache lines
-        dcbz	0,r4							// avoid read of dest line
-        lfd		f0,0(r3)
-        lfd		f1,8(r3)
-        lfd		f2,16(r3)
-        lfd		f3,24(r3)
-        addi	r3,r3,32
-        stfd	f0,0(r4)
-        stfd	f1,8(r4)
-        stfd	f2,16(r4)
-        stfd	f3,24(r4)
-        dcbst	0,r4							// flush dest line to RAM
-        addi	r4,r4,32
-        bdnz	pmap_g3_copy_loop
-        
-        sync									// wait for stores to take
-        subi	r4,r4,PPC_PGBYTES				// restore ptr to destintation page
-        li		r6,PPC_PGBYTES-32				// point to last line in page
-pmap_g3_icache_flush:
-        subic.	r5,r6,32						// more to go?
-        icbi	r4,r6							// flush another line in icache
-        subi	r6,r5,32						// get offset to next line
-        icbi	r4,r5
-        bne		pmap_g3_icache_flush
-        
-        sync
-        mtmsr	r2								// turn DR back on
-        isync
-        lfd		f0,FM_SIZE+0(r1)				// restore the FPRs
-        lfd		f1,FM_SIZE+8(r1)
-        lfd		f2,FM_SIZE+16(r1)
-        lfd		f3,FM_SIZE+24(r1)        
-        
-        b		pmap_g4_restore					// restore MSR and done
-
-        
-        // G4 -- copy using VRs
-
-pmap_copy_g4:									// r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR
-        la		r9,FM_SIZE+16(r1)				// place where we save VRs to r9
-        li		r5,16							// load x-form offsets into r5-r9
-        li		r6,32							// another offset
-        stvx	v0,0,r9							// save some VRs so we can use to copy
-        li		r7,48							// another offset
-        stvx	v1,r5,r9
-        li		r0,PPC_PGBYTES/64				// we loop over 64-byte chunks
-        stvx	v2,r6,r9
-        mtctr	r0
-        li		r8,96							// get look-ahead for touch
-        stvx	v3,r7,r9
-        li		r9,128
-        mtmsr	r12								// now we've saved VRs on stack, turn off DR
-        isync									// wait for it to happen
-        b		pmap_g4_copy_loop
-        
-        .align	5								// align inner loops
-pmap_g4_copy_loop:								// loop over 64-byte chunks
-        dcbt	r3,r8							// touch 3 lines ahead
-        nop										// avoid a 17-word loop...
-        dcbt	r3,r9							// touch 4 lines ahead
-        nop										// more padding
-        dcba	0,r4							// avoid pre-fetch of 1st dest line
-        lvx		v0,0,r3							// offset 0
-        lvx		v1,r5,r3						// offset 16
-        lvx		v2,r6,r3						// offset 32
-        lvx		v3,r7,r3						// offset 48
-        addi	r3,r3,64
-        dcba	r6,r4							// avoid pre-fetch of 2nd line
-        stvx	v0,0,r4							// offset 0
-        stvx	v1,r5,r4						// offset 16
-        stvx	v2,r6,r4						// offset 32
-        stvx	v3,r7,r4						// offset 48
-        dcbf	0,r4							// push line 1
-        dcbf	r6,r4							// and line 2
-        addi	r4,r4,64
-        bdnz	pmap_g4_copy_loop
-
-        sync									// wait for stores to take
-        subi	r4,r4,PPC_PGBYTES				// restore ptr to destintation page
-        li		r8,PPC_PGBYTES-32				// point to last line in page
-pmap_g4_icache_flush:
-        subic.	r9,r8,32						// more to go?
-        icbi	r4,r8							// flush from icache
-        subi	r8,r9,32						// get offset to next line
-        icbi	r4,r9
-        bne		pmap_g4_icache_flush
-        
-        sync
-        mtmsr	r2								// turn DR back on
-        isync
-        la		r9,FM_SIZE+16(r1)				// get base of VR save area
-        lvx		v0,0,r9							// restore the VRs
-        lvx		v1,r5,r9
-        lvx		v2,r6,r9
-        lvx		v3,r7,r9        
-        
-pmap_g4_restore:								// r11=MSR
-        mtmsr	r11								// turn EE on, VEC and FR off
-        isync									// wait for it to happen
-        addi	r1,r1,kSFSize					// pop off our stack frame
-        lwz		r0,8(r1)						// restore return address
-        mtlr	r0
-        blr
-        
-        
-        // 64-bit/128-byte processor: copy using VRs
-        
-pmap_copy_64:									// r10=features, r11=old MSR
- 		sldi	r3,r3,12						// get page address from page num
-		sldi	r4,r4,12						// get page address from page num
-		la		r9,FM_SIZE+16(r1)				// get base of VR save area
-        li		r5,16							// load x-form offsets into r5-r9
-        li		r6,32							// another offset
-        bf		pfAltivecb,pmap_novmx_copy		// altivec suppressed...
-        stvx	v0,0,r9							// save 8 VRs so we can copy wo bubbles
-        stvx	v1,r5,r9
-        li		r7,48							// another offset
-        li		r0,PPC_PGBYTES/128				// we loop over 128-byte chunks
-        stvx	v2,r6,r9
-        stvx	v3,r7,r9
-        addi	r9,r9,64						// advance base ptr so we can store another 4
-        mtctr	r0
-        li		r0,MASK(MSR_DR)					// get DR bit
-        stvx	v4,0,r9
-        stvx	v5,r5,r9
-        andc	r12,r2,r0						// turn off DR bit
-        li		r0,1							// get a 1 to slam into SF
-        stvx	v6,r6,r9
-        stvx	v7,r7,r9
-        rldimi	r12,r0,63,MSR_SF_BIT			// set SF bit (bit 0)
-        li		r8,-128							// offset so we can reach back one line
-        mtmsrd	r12								// now we've saved VRs, turn DR off and SF on
-        isync									// wait for it to happen
-        dcbt128	0,r3,1							// start a forward stream
-        b		pmap_64_copy_loop
-        
-        .align	5								// align inner loops
-pmap_64_copy_loop:								// loop over 128-byte chunks
-        dcbz128	0,r4							// avoid read of destination line
-        lvx		v0,0,r3							// offset 0
-        lvx		v1,r5,r3						// offset 16
-        lvx		v2,r6,r3						// offset 32
-        lvx		v3,r7,r3						// offset 48
-        addi	r3,r3,64						// don't have enough GPRs so add 64 2x
-        lvx		v4,0,r3							// offset 64
-        lvx		v5,r5,r3						// offset 80
-        lvx		v6,r6,r3						// offset 96
-        lvx		v7,r7,r3						// offset 112
-        addi	r3,r3,64
-        stvx	v0,0,r4							// offset 0
-        stvx	v1,r5,r4						// offset 16
-        stvx	v2,r6,r4						// offset 32
-        stvx	v3,r7,r4						// offset 48
-        addi	r4,r4,64
-        stvx	v4,0,r4							// offset 64
-        stvx	v5,r5,r4						// offset 80
-        stvx	v6,r6,r4						// offset 96
-        stvx	v7,r7,r4						// offset 112
-        addi	r4,r4,64
-        dcbf	r8,r4							// flush the line we just wrote
-        bdnz	pmap_64_copy_loop
-
-        sync									// wait for stores to take
-        subi	r4,r4,PPC_PGBYTES				// restore ptr to destintation page
-        li		r8,PPC_PGBYTES-128				// point to last line in page
-pmap_64_icache_flush:
-        subic.	r9,r8,128						// more to go?
-        icbi	r4,r8							// flush from icache
-        subi	r8,r9,128						// get offset to next line
-        icbi	r4,r9
-        bne		pmap_64_icache_flush
-        
-        sync
-        mtmsrd	r2								// turn DR back on, SF off
-        isync
-        la		r9,FM_SIZE+16(r1)				// get base address of VR save area on stack
-        lvx		v0,0,r9							// restore the VRs
-        lvx		v1,r5,r9
-        lvx		v2,r6,r9
-        lvx		v3,r7,r9
-        addi	r9,r9,64        
-        lvx		v4,0,r9
-        lvx		v5,r5,r9
-        lvx		v6,r6,r9
-        lvx		v7,r7,r9
-
-        b		pmap_g4_restore					// restore lower half of MSR and return
-
- //
- //		Copy on 64-bit without VMX
- //
-
-pmap_novmx_copy:        
-		li		r0,PPC_PGBYTES/128				// we loop over 128-byte chunks
-		mtctr	r0
-		li		r0,MASK(MSR_DR)					// get DR bit
-		andc	r12,r2,r0						// turn off DR bit
-		li		r0,1							// get a 1 to slam into SF
-		rldimi	r12,r0,63,MSR_SF_BIT			// set SF bit (bit 0)
-		mtmsrd	r12								// now we've saved VRs, turn DR off and SF on
-		isync									// wait for it to happen
-		dcbt128	0,r3,1							// start a forward stream 
-       
-pmap_novmx_copy_loop:							// loop over 128-byte cache lines
-        dcbz128	0,r4							// avoid read of dest line
-        
-        ld		r0,0(r3)						// Load half a line
-        ld		r12,8(r3)
-        ld		r5,16(r3)
-        ld		r6,24(r3)
-        ld		r7,32(r3)
-        ld		r8,40(r3)
-        ld		r9,48(r3)
-        ld		r10,56(r3)
-        
-        std		r0,0(r4)						// Store half a line
-        std		r12,8(r4)
-        std		r5,16(r4)
-        std		r6,24(r4)
-        std		r7,32(r4)
-        std		r8,40(r4)
-        std		r9,48(r4)
-        std		r10,56(r4)
-        
-        ld		r0,64(r3)						// Load half a line
-        ld		r12,72(r3)
-        ld		r5,80(r3)
-        ld		r6,88(r3)
-        ld		r7,96(r3)
-        ld		r8,104(r3)
-        ld		r9,112(r3)
-        ld		r10,120(r3)
-        
-        addi	r3,r3,128
- 
-        std		r0,64(r4)						// Store half a line
-        std		r12,72(r4)
-        std		r5,80(r4)
-        std		r6,88(r4)
-        std		r7,96(r4)
-        std		r8,104(r4)
-        std		r9,112(r4)
-        std		r10,120(r4)
-        
-        dcbf	0,r4							// flush the line we just wrote
-		addi	r4,r4,128
-        bdnz	pmap_novmx_copy_loop
-
-        sync									// wait for stores to take
-        subi	r4,r4,PPC_PGBYTES				// restore ptr to destintation page
-        li		r8,PPC_PGBYTES-128				// point to last line in page
-
-pmap_novmx_icache_flush:
-        subic.	r9,r8,128						// more to go?
-        icbi	r4,r8							// flush from icache
-        subi	r8,r9,128						// get offset to next line
-        icbi	r4,r9
-        bne		pmap_novmx_icache_flush
-        
-        sync
-        mtmsrd	r2								// turn DR back on, SF off
-        isync
-
-        b		pmap_g4_restore					// restore lower half of MSR and return
-
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>	
-		
-// Stack frame format used by copyin, copyout, copyinstr and copyoutstr.
-// These routines all run both on 32 and 64-bit machines, though because they are called
-// by the BSD kernel they are always in 32-bit mode when entered.  The mapped ptr returned
-// by MapUserMemoryWindow will be 64 bits however on 64-bit machines.  Beware to avoid
-// using compare instructions on this ptr.  This mapped ptr is kept globally in r31, so there
-// is no need to store or load it, which are mode-dependent operations since it could be
-// 32 or 64 bits.
-
-#define	kkFrameSize	(FM_SIZE+32)
-
-#define	kkBufSize	(FM_SIZE+0)
-#define	kkCR3		(FM_SIZE+4)
-#define	kkSource	(FM_SIZE+8)
-#define	kkDest		(FM_SIZE+12)
-#define	kkCountPtr	(FM_SIZE+16)
-#define	kkR31Save	(FM_SIZE+20)
-#define	kkThrErrJmp	(FM_SIZE+24)
- 
- 
-// nonvolatile CR bits we use as flags in cr3
-
-#define	kk64bit		12
-#define	kkNull		13
-#define	kkIn		14
-#define	kkString	15
-#define	kkZero		15
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyoutstr(src, dst, maxcount, count)
- *	vm_offset_t	src;        // r3
- *	addr64_t	dst;        // r4 and r5
- *	vm_size_t	maxcount;   // r6
- *	vm_size_t*	count;      // r7
- *
- * Set *count to the number of bytes copied.
- */
-
-ENTRY(copyoutstr, TAG_NO_FRAME_USED)
-        mfcr	r2,0x10                         // save caller's cr3, which we use for flags
-        mr      r10,r4                          // move high word of 64-bit user address to r10
-        li		r0,0
-        crset	kkString						// flag as a string op
-        mr      r11,r5                          // move low word of 64-bit user address to r11
-        stw		r0,0(r7)						// initialize #bytes moved
-        crclr	kkIn							// flag as copyout
-        b		copyJoin
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyinstr(src, dst, maxcount, count)
- *	addr64_t	src;        // r3 and r4
- *	vm_offset_t	dst;        // r5
- *	vm_size_t	maxcount;   // r6
- *	vm_size_t*	count;      // r7
- *
- * Set *count to the number of bytes copied
- * If dst == NULL, don't copy, just count bytes.
- * Only currently called from klcopyinstr. 
- */
-
-ENTRY(copyinstr, TAG_NO_FRAME_USED)
-        mfcr	r2,0x10                         // save caller's cr3, which we use for flags
-        cmplwi	r5,0							// dst==NULL?
-        mr      r10,r3                          // move high word of 64-bit user address to r10
-        li		r0,0
-        crset	kkString						// flag as a string op
-        mr      r11,r4                          // move low word of 64-bit user address to r11
-        crmove	kkNull,cr0_eq					// remember if (dst==NULL)
-        stw		r0,0(r7)						// initialize #bytes moved
-        crset	kkIn							// flag as copyin (rather than copyout)
-        b		copyJoin1						// skip over the "crclr kkNull"
-
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyout(src, dst, count)
- *	vm_offset_t	src;        // r3
- *	addr64_t	dst;        // r4 and r5
- *	size_t		count;      // r6
- */
-
-			.align	5
-			.globl	EXT(copyout)
-			.globl	EXT(copyoutmsg)
-
-LEXT(copyout)
-LEXT(copyoutmsg)
-
-#if INSTRUMENT
-        mfspr	r12,pmc1						; INSTRUMENT - saveinstr[12] - Take stamp at copyout
-        stw		r12,0x6100+(12*16)+0x0(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc2						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(12*16)+0x4(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc3						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(12*16)+0x8(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc4						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(12*16)+0xC(0)		; INSTRUMENT - Save it
-#endif			
-        mfcr	r2,0x10                         // save caller's cr3, which we use for flags
-        mr      r10,r4                          // move high word of 64-bit user address to r10
-        crclr	kkString						// not a string version
-        mr      r11,r5                          // move low word of 64-bit user address to r11
-        crclr	kkIn							// flag as copyout
-        b		copyJoin
-        
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copyin(src, dst, count)
- *	addr64_t	src;        // r3 and r4
- *	vm_offset_t	dst;        // r5
- *	size_t		count;      // r6
- */
-
-
-			.align	5
-			.globl	EXT(copyin)
-			.globl	EXT(copyinmsg)
-
-LEXT(copyin)
-LEXT(copyinmsg)
-
-        mfcr	r2,0x10                         // save caller's cr3, which we use for flags
-        mr      r10,r3                          // move high word of 64-bit user address to r10
-        crclr	kkString						// not a string version
-        mr      r11,r4                          // move low word of 64-bit user address to r11
-        crset	kkIn							// flag as copyin
-        
-        
-// Common code to handle setup for all the copy variants:
-//		r2 = caller's cr3
-//      r3 = source if copyout
-//      r5 = dest if copyin
-//      r6 = buffer length or count
-//      r7 = count output ptr (if kkString set)
-//	   r10 = high word of 64-bit user-space address (source if copyin, dest if copyout)
-//	   r11 = low word of 64-bit user-space address
-//     cr3 = kkIn, kkString, kkNull flags
-
-copyJoin:
-        crclr	kkNull							// (dst==NULL) convention not used with this call
-copyJoin1:										// enter from copyinstr with kkNull set
-		mflr	r0								// get return address
-        cmplwi	r6,0							// buffer length 0?
-        lis		r9,0x1000						// r9 <- 0x10000000 (256MB)
-		stw		r0,FM_LR_SAVE(r1)				// save return
-        cmplw	cr1,r6,r9						// buffer length > 256MB ?
-        mfsprg	r8,2							// get the features
-        beq--	copyinout_0						// 0 length is degenerate case
-		stwu	r1,-kkFrameSize(r1)				// set up stack frame
-        stw		r2,kkCR3(r1)                    // save caller's cr3, which we use for flags
-        mtcrf	0x02,r8							// move pf64Bit to cr6
-        stw		r3,kkSource(r1)					// save args across MapUserMemoryWindow
-        stw		r5,kkDest(r1)
-        stw		r6,kkBufSize(r1)
-        crmove	kk64bit,pf64Bitb				// remember if this is a 64-bit processor
-        stw		r7,kkCountPtr(r1)
-        stw		r31,kkR31Save(r1)				// we use r31 globally for mapped user ptr
-
-        
-        
-// Handle buffer length > 256MB.  This is an error (ENAMETOOLONG) on copyin and copyout.
-// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp
-// the buffer length to 256MB.  This isn't an issue if the string is less than 256MB
-// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG.  This restriction
-// is due to MapUserMemoryWindow; we don't want to consume more than two segments for
-// the mapping. 
-
-        ble++	cr1,copyin0						// skip if buffer length <= 256MB
-        bf		kkString,copyinout_too_big		// error if not string op
-        mr		r6,r9							// silently clamp buffer length to 256MB
-        stw		r9,kkBufSize(r1)				// update saved copy too
-
-
-// Set up thread_recover in case we hit an illegal address.
-
-copyin0:
-		li		r31,0							// no mapped ptr yet
-		mfsprg  r8,1							// Get the current thread 
-		lis		r2,hi16(copyinout_error)
-		ori		r2,r2,lo16(copyinout_error)
-		lwz		r4,THREAD_RECOVER(r8)
-		lwz		r3,ACT_VMMAP(r8)				// r3 <- vm_map virtual address
-		stw		r2,THREAD_RECOVER(r8)
-		stw		r4,kkThrErrJmp(r1)
-
-
-// Map user segment into kernel map, turn on 64-bit mode.  At this point:
-//		r3 = vm map
-//		r6 = buffer length
-// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout)
-//
-// When we call MapUserMemoryWindow, we pass:
-//      r3 = vm map ptr
-//   r4/r5 = 64-bit user space address as an addr64_t
-        
-        mr      r4,r10                          // copy user ptr into r4/r5
-        mr      r5,r11
-#if INSTRUMENT
-        mfspr	r12,pmc1						; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace
-        stw		r12,0x6100+(13*16)+0x0(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc2						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(13*16)+0x4(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc3						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(13*16)+0x8(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc4						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(13*16)+0xC(0)		; INSTRUMENT - Save it
-#endif			
-        bl		EXT(MapUserMemoryWindow)		// get r3/r4 <- 64-bit address in kernel map of user operand
-#if INSTRUMENT
-        mfspr	r12,pmc1						; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace
-        stw		r12,0x6100+(14*16)+0x0(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc2						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(14*16)+0x4(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc3						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(14*16)+0x8(0)		; INSTRUMENT - Save it
-        mfspr	r12,pmc4						; INSTRUMENT - Get stamp
-        stw		r12,0x6100+(14*16)+0xC(0)		; INSTRUMENT - Save it
-#endif			
-        mr		r31,r4							// r31 <- mapped ptr into user space (may be 64-bit)
-        bf--	kk64bit,copyin1					// skip if a 32-bit processor
- 
- 		rldimi	r31,r3,32,0						// slam high-order bits into mapped ptr
-        mfmsr	r4								// if 64-bit, turn on SF so we can use returned ptr
-        li		r0,1
-        rldimi	r4,r0,63,MSR_SF_BIT				// light bit 0
-        mtmsrd	r4								// turn on 64-bit mode
-        isync									// wait for mode to change
-        
-        
-// Load r3-r5, substituting mapped ptr as appropriate.
-
-copyin1:
-        lwz		r5,kkBufSize(r1)				// restore length to copy
-        bf		kkIn,copyin2					// skip if copyout
-        lwz		r4,kkDest(r1)					// copyin: dest is kernel ptr
-        mr		r3,r31							// source is mapped ptr
-        b		copyin3
-copyin2:										// handle copyout
-        lwz		r3,kkSource(r1)					// source is kernel buffer (r3 at entry)
-        mr		r4,r31							// dest is mapped ptr into user space
-        
-        
-// Finally, all set up to copy:
-//		r3 = source ptr (mapped if copyin)
-//		r4 = dest ptr (mapped if copyout)
-//		r5 = length
-//	   r31 = mapped ptr returned by MapUserMemoryWindow
-//	   cr3 = kkIn, kkString, kk64bit, and kkNull flags
-
-copyin3:
-        bt		kkString,copyString				// handle copyinstr and copyoutstr
-        bl		EXT(bcopy)						// copyin and copyout: let bcopy do the work
-        li		r3,0							// return success
-        
-        
-// Main exit point for copyin, copyout, copyinstr, and copyoutstr.  Also reached
-// from error recovery if we get a DSI accessing user space.  Clear recovery ptr, 
-// and pop off frame.
-//		r3 = 0, EFAULT, or ENAMETOOLONG
-
-copyinx: 
-        lwz		r2,kkCR3(r1)                    // get callers cr3
-		mfsprg  r6,1							// Get the current thread 
-        bf--	kk64bit,copyinx1				// skip if 32-bit processor
-        mfmsr	r12
-        rldicl	r12,r12,0,MSR_SF_BIT+1			// if 64-bit processor, turn 64-bit mode off
-        mtmsrd	r12								// turn SF off
-        isync									// wait for the mode to change
-copyinx1:
-		lwz		r0,FM_LR_SAVE+kkFrameSize(r1)   // get return address
-        lwz		r31,kkR31Save(r1)				// restore callers r31
-        lwz		r4,kkThrErrJmp(r1)				// load saved thread recover
-        addi	r1,r1,kkFrameSize				// pop off our stack frame
-		mtlr	r0
-		stw		r4,THREAD_RECOVER(r6)			// restore thread recover
-        mtcrf	0x10,r2							// restore cr3
-		blr
-
-
-/* We get here via the exception handler if an illegal
- * user memory reference was made.  This error handler is used by
- * copyin, copyout, copyinstr, and copyoutstr.  Registers are as
- * they were at point of fault, so for example cr3 flags are valid.
- */
-
-copyinout_error:
-        li		r3,EFAULT						// return error
-        b		copyinx
-
-copyinout_0:									// degenerate case: 0-length copy
-		mtcrf	0x10,r2							// restore cr3
-        li		r3,0							// return success
-        blr
-        
-copyinout_too_big:								// degenerate case
-        mtcrf	0x10,r2							// restore cr3
-        lwz		r1,0(r1)						// pop off stack frame
-        li		r3,ENAMETOOLONG
-        blr
-        
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-// Handle copyinstr and copyoutstr.  At this point the stack frame is set up,
-// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode
-// if necessary, and:
-//		r3 = source ptr, mapped if copyinstr
-//		r4 = dest ptr, mapped if copyoutstr
-//		r5 = buffer length
-//	   r31 = mapped ptr returned by MapUserMemoryWindow
-//     cr3 = kkIn, kkString, kkNull, and kk64bit flags
-// We do word copies unless the buffer is very short, then use a byte copy loop
-// for the leftovers if necessary.  The crossover at which the word loop becomes
-// faster is about seven bytes, counting the zero.
-//
-// We first must word-align the source ptr, in order to avoid taking a spurious
-// page fault.
-
-copyString:
-        cmplwi	cr1,r5,15						// is buffer very short?
-        mr      r12,r3                          // remember ptr to 1st source byte
-        mtctr	r5								// assuming short, set up loop count for bytes
-        blt--   cr1,copyinstr8					// too short for word loop
-        rlwinm  r2,r3,0,0x3                     // get byte offset of 1st byte within word
-        rlwinm  r9,r3,3,0x18                    // get bit offset of 1st byte within word
-        li      r7,-1
-        sub     r3,r3,r2                        // word-align source address
-        add     r6,r5,r2                        // get length starting at byte 0 in word
-        srw     r7,r7,r9                        // get mask for bytes in first word
-        srwi	r0,r6,2							// get #words in buffer
-        lwz     r5,0(r3)                        // get aligned word with first source byte
-        lis		r10,hi16(0xFEFEFEFF)			// load magic constants into r10 and r11
-        lis		r11,hi16(0x80808080)
-        mtctr	r0								// set up word loop count
-        addi    r3,r3,4                         // advance past the source word
-        ori		r10,r10,lo16(0xFEFEFEFF)
-        ori		r11,r11,lo16(0x80808080)
-        orc     r8,r5,r7                        // map bytes preceeding first source byte into 0xFF
-        bt--	kkNull,copyinstr5enter          // enter loop that just counts
-        
-// Special case 1st word, which has been 0xFF filled on left.  Note that we use
-// "and.", even though we execute both in 32 and 64-bit mode.  This is OK.
-
-        slw     r5,r5,r9                        // left justify payload bytes
-        add		r9,r10,r8						// r9 =  data + 0xFEFEFEFF
-        andc	r7,r11,r8						// r7 = ~data & 0x80808080
-		subfic  r0,r2,4							// get r0 <- #payload bytes in 1st word
-        and.    r7,r9,r7						// if r7==0, then all bytes in r8 are nonzero
-        stw     r5,0(r4)                        // copy payload bytes to dest buffer
-        add		r4,r4,r0						// then point to next byte in dest buffer
-        bdnzt   cr0_eq,copyinstr6               // use loop that copies if 0 not found
-        
-        b		copyinstr7                      // 0 found (buffer can't be full)
-        
-        
-// Word loop(s).  They do a word-parallel search for 0s, using the following
-// inobvious but very efficient test:
-//		y =  data + 0xFEFEFEFF
-//		z = ~data & 0x80808080
-// If (y & z)==0, then all bytes in dataword are nonzero.  There are two copies
-// of this loop, one that just counts and another that copies.
-//		r3 = ptr to next word of source (word aligned)
-//		r4 = ptr to next byte in buffer
-//      r6 = original buffer length (adjusted to be word origin)
-//     r10 = 0xFEFEFEFE
-//     r11 = 0x80808080
-//     r12 = ptr to 1st source byte (used to determine string length)
-
-        .align	5								// align inner loops for speed
-copyinstr5:										// version that counts but does not copy
-        lwz     r8,0(r3)						// get next word of source
-        addi    r3,r3,4                         // advance past it
-copyinstr5enter:
-        add		r9,r10,r8						// r9 =  data + 0xFEFEFEFF
-        andc	r7,r11,r8						// r7 = ~data & 0x80808080
-        and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
-        bdnzt   cr0_eq,copyinstr5				// if r7==0, then all bytes in r8 are nonzero
-
-        b		copyinstr7
-
-        .align	5								// align inner loops for speed
-copyinstr6:										// version that counts and copies
-        lwz     r8,0(r3)						// get next word of source
-        addi    r3,r3,4                         // advance past it
-        addi	r4,r4,4							// increment dest ptr while we wait for data
-        add		r9,r10,r8						// r9 =  data + 0xFEFEFEFF
-        andc	r7,r11,r8						// r7 = ~data & 0x80808080
-        and.    r7,r9,r7                        // r7 = r9 & r7 ("." ok even in 64-bit mode)
-        stw		r8,-4(r4)						// pack all 4 bytes into buffer
-        bdnzt	cr0_eq,copyinstr6				// if r7==0, then all bytes are nonzero
-
-
-// Either 0 found or buffer filled.  The above algorithm has mapped nonzero bytes to 0
-// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also
-// mapped to 0x80.  We must mask out these false hits before searching for an 0x80 byte.
-//		r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4))
-//      r6 = original buffer length (adjusted to be word origin)
-//      r7 = computed vector of 0x00 and 0x80 bytes
-//      r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word
-//     r12 = ptr to 1st source byte (used to determine string length)
-//     cr0 = beq set iff 0 not found
-
-copyinstr7:
-        rlwinm	r2,r8,7,0,31					// move 0x01 bits to 0x80 position
-		rlwinm  r6,r6,0,0x3						// mask down to partial byte count in last word
-        andc	r7,r7,r2						// turn off false hits from 0x0100 worst case
-        crnot	kkZero,cr0_eq					// 0 found iff cr0_eq is off
-        srwi    r7,r7,8                         // we want to count the 0 as a byte xferred
-		cmpwi   r6,0							// any bytes left over in last word?
-        cntlzw	r7,r7							// now we can find the 0 byte (ie, the 0x80)
-        subi    r3,r3,4                         // back up r3 to point to 1st byte in r8
-        srwi	r7,r7,3							// convert 8,16,24,32 to 1,2,3,4
-        add     r3,r3,r7                        // now r3 points one past 0 byte, or at 1st byte not xferred
-        bt++	kkZero,copyinstr10				// 0 found, so done
-        
-        beq		copyinstr10						// r6==0, so buffer truly full
-        mtctr	r6								// 0 not found, loop over r6 bytes
-        b		copyinstr8						// enter byte loop for last 1-3 leftover bytes
-        
-
-// Byte loop.  This is used for very small buffers and for the odd bytes left over
-// after searching and copying words at a time.
-//      r3 = ptr to next byte of source
-//      r4 = ptr to next dest byte
-//     r12 = ptr to first byte of source
-//     ctr = count of bytes to check
-    
-        .align	5								// align inner loops for speed
-copyinstr8:										// loop over bytes of source
-        lbz		r0,0(r3)						// get next byte of source
-        addi	r3,r3,1
-        addi	r4,r4,1							// increment dest addr whether we store or not
-        cmpwi	r0,0							// the 0?
-        bt--	kkNull,copyinstr9				// don't store if copyinstr with NULL ptr
-        stb		r0,-1(r4)
-copyinstr9:
-        bdnzf	cr0_eq,copyinstr8				// loop if byte not 0 and more room in buffer
-        
-        crmove	kkZero,cr0_eq					// remember if 0 found or buffer filled
-
-        
-// Buffer filled or 0 found.  Unwind and return.
-//      r3 = ptr to 1st source byte not transferred
-//     r12 = ptr to 1st source byte
-//     r31 = mapped ptr returned by MapUserMemoryWindow
-//     cr3 = kkZero set iff 0 found
-
-copyinstr10:
-        lwz		r9,kkCountPtr(r1)				// get ptr to place to store count of bytes moved
-        sub     r2,r3,r12                       // compute #bytes copied (including the 0)
-        li		r3,0							// assume success return status
-        stw		r2,0(r9)						// store #bytes moved
-        bt++	kkZero,copyinx					// we did find the 0 so return 0
-        li		r3,ENAMETOOLONG					// buffer filled
-        b		copyinx							// join main exit routine
-
-//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-/*
- * int
- * copypv(source, sink, size, which)
- *	addr64_t	src;        // r3 and r4
- *	addr64_t	dst;        // r5 and r6
- *	size_t		size;		// r7
- *	int			which;		// r8
- *
- * Operand size bytes are copied from operand src into operand dst. The source and
- * destination operand addresses are given as addr64_t, and may designate starting
- * locations in physical or virtual memory in any combination except where both are
- * virtual. Virtual memory locations may be in either the kernel or the current thread's
- * address space. Operand size may be up to 256MB.
- *
- * Operation is controlled by operand which, which offers these options:
- *		cppvPsrc : source operand is (1) physical or (0) virtual
- *		cppvPsnk : destination operand is (1) physical or (0) virtual
- *		cppvKmap : virtual operand is in (1) kernel or (0) current thread
- *		cppvFsnk : (1) flush destination before and after transfer
- *		cppvFsrc : (1) flush source before and after transfer
- *		cppvNoModSnk : (1) don't set source operand's changed bit(s)
- *		cppvNoRefSrc : (1) don't set destination operand's referenced bit(s)
- *
- * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32().
- * This section describes the operation of the new 64-bit path.
- *
- * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a
- * window in the kernel address space into all of physical RAM plus the I/O hole. Since
- * the window's mappings specify the proper access policies for the underlying memory,
- * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk
- * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical
- * memory window, and are accessed with data relocation on. Virtual addresses are either
- * within the kernel, or are mapped into the kernel address space through the user memory
- * window. Because accesses to a virtual operand are performed with data relocation on,
- * the new path does not have to translate the address, disable/enable interrupts, lock
- * the mapping, or update referenced and changed bits.
- *
- * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is
- * a substantial performance penalty for copypv operating in real mode. Utilizing the
- * new 64-bit path, transfer performance increases >100% on the G5.
- *
- * The attentive reader may notice that mtmsrd ops are not followed by isync ops as 
- * might be expected. The 970 follows PowerPC architecture version 2.01, which defines
- * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer
- * required.
- *
- * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need
- * to call 32-bit functions, which would lead to the high-order 32 bits of our values
- * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles
- * in our own stack frame across calls to 32-bit functions.
- *		
- */
-
-// Map operand which bits into non-volatile CR2 and CR3 bits.
-#define whichAlign	((3+1)*4)
-#define whichMask	0x007F0000
-#define pvPsnk		(cppvPsnkb - whichAlign)
-#define pvPsrc		(cppvPsrcb - whichAlign)
-#define pvFsnk		(cppvFsnkb - whichAlign)
-#define pvFsrc		(cppvFsrcb - whichAlign)
-#define pvNoModSnk	(cppvNoModSnkb - whichAlign)
-#define pvNoRefSrc	(cppvNoRefSrcb - whichAlign)
-#define pvKmap		(cppvKmapb - whichAlign)
-#define pvNoCache	cr2_lt
-
-		.align	5
-		.globl	EXT(copypv)
-
-LEXT(copypv)
-        mfsprg	r10,2							// get feature flags
-        mtcrf	0x02,r10						// we need to test pf64Bit
-        bt++	pf64Bitb,copypv_64				// skip if 64-bit processor (only they take hint)
-        
-        b		EXT(hw_copypv_32)				// carry on with 32-bit copypv
-
-// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber.        
-copypv_64:
-		mfsprg	r9,1							// get current thread
-		stwu	r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1)
-												// allocate stack frame and link it
-		mflr	r0								// get return address
-		mfcr	r10								// get cr2 and cr3
-		lwz		r12,THREAD_RECOVER(r9)			// get error callback
-		stw		r26,FM_ARG0+0x00(r1)			// save non-volatile r26
-		stw		r27,FM_ARG0+0x04(r1)			// save non-volatile r27
-		stw		r28,FM_ARG0+0x08(r1)			// save non-volatile r28
-		stw		r29,FM_ARG0+0x0C(r1)			// save non-volatile r29
-		stw		r30,FM_ARG0+0x10(r1)			// save non-volatile r30
-		stw		r31,FM_ARG0+0x14(r1)			// save non-volatile r31
-		stw		r12,FM_ARG0+0x20(r1)			// save error callback
-		stw		r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
-												// save return address
-		stw		r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
-												// save non-volatile cr2 and cr3
-
-// Non-volatile register usage in this routine is:
-//	r26: saved msr image
-//	r27: current pmap_t / virtual source address
-//	r28: destination virtual address
-//	r29: source address
-//	r30: destination address
-//	r31: byte count to copy
-//	cr2/3: parameter 'which' bits
-
-		rlwinm	r8,r8,whichAlign,whichMask		// align and mask which bits
-		mr		r31,r7							// copy size to somewhere non-volatile
-		mtcrf	0x20,r8							// insert which bits into cr2 and cr3
-		mtcrf	0x10,r8							// insert which bits into cr2 and cr3
-		rlwinm	r29,r3,0,1,0					// form source address high-order bits
-		rlwinm	r30,r5,0,1,0					// form destination address high-order bits
-		rlwimi	r29,r4,0,0,31					// form source address low-order bits
-		rlwimi	r30,r6,0,0,31					// form destination address low-order bits
-		crand	cr7_lt,pvPsnk,pvPsrc			// are both operand addresses physical?
-		cntlzw	r0,r31							// count leading zeroes in byte count
-		cror	cr7_eq,pvPsnk,pvPsrc			// cr7_eq <- source or destination is physical
-		bf--	cr7_eq,copypv_einval			// both operands may not be virtual
-		cmplwi	r0,4							// byte count greater than or equal 256M (2**28)?
-		blt--	copypv_einval					// byte count too big, give EINVAL
-		cmplwi	r31,0							// byte count zero?
-		beq--	copypv_zero						// early out
-		bt		cr7_lt,copypv_phys				// both operand addresses are physical
-		mr		r28,r30							// assume destination is virtual
-		bf		pvPsnk,copypv_dv				// is destination virtual?
-		mr		r28,r29							// no, so source must be virtual
-copypv_dv:
-		lis		r27,ha16(EXT(kernel_pmap))		// get kernel's pmap_t *, high-order
-		lwz		r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t
-		bt		pvKmap,copypv_kern				// virtual address in kernel map?
-		lwz		r3,ACT_VMMAP(r9)				// get user's vm_map *
-		rldicl	r4,r28,32,32					// r4, r5 <- addr64_t virtual address 
-		rldicl	r5,r28,0,32
-		std		r29,FM_ARG0+0x30(r1)			// preserve 64-bit r29 across 32-bit call
-		std		r30,FM_ARG0+0x38(r1)			// preserve 64-bit r30 across 32-bit call
-		bl		EXT(MapUserMemoryWindow)		// map slice of user space into kernel space
-		ld		r29,FM_ARG0+0x30(r1)			// restore 64-bit r29
-		ld		r30,FM_ARG0+0x38(r1)			// restore 64-bit r30
-		rlwinm	r28,r3,0,1,0					// convert relocated addr64_t virtual address 
-		rlwimi	r28,r4,0,0,31					//  into a single 64-bit scalar
-copypv_kern:
-
-// Since we'll be accessing the virtual operand with data-relocation on, we won't need to 
-// update the referenced and changed bits manually after the copy. So, force the appropriate
-// flag bit on for the virtual operand.
-		crorc	pvNoModSnk,pvNoModSnk,pvPsnk	// for virtual dest, let hardware do ref/chg bits
-		crorc	pvNoRefSrc,pvNoRefSrc,pvPsrc	// for virtual source, let hardware do ref bit
-		
-// We'll be finding a mapping and looking at, so we need to disable 'rupts.
-		lis		r0,hi16(MASK(MSR_VEC))			// get vector mask
-		ori		r0,r0,lo16(MASK(MSR_FP))		// insert fp mask
-		mfmsr	r26								// save current msr
-		andc	r26,r26,r0						// turn off VEC and FP in saved copy
-		ori		r0,r0,lo16(MASK(MSR_EE))		// add EE to our mask
-		andc	r0,r26,r0						// disable EE in our new msr image
-		mtmsrd	r0								// introduce new msr image
-
-// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now
-// try to find a mapping corresponding to this address in order to determine whether the address
-// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable
-// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we
-// examine the mapping's caching-inhibited bit.
-		mr		r3,r27							// r3 <- pmap_t pmap
-		rldicl	r4,r28,32,32					// r4, r5 <- addr64_t va
-		rldicl	r5,r28,0,32
-		la		r6,FM_ARG0+0x18(r1)				// r6 <- addr64_t *nextva
-		li		r7,1							// r7 <- int full, search nested mappings
-		std		r26,FM_ARG0+0x28(r1)			// preserve 64-bit r26 across 32-bit calls
-		std		r28,FM_ARG0+0x30(r1)			// preserve 64-bit r28 across 32-bit calls
-		std		r29,FM_ARG0+0x38(r1)			// preserve 64-bit r29 across 32-bit calls
-		std		r30,FM_ARG0+0x40(r1)			// preserve 64-bit r30 across 32-bit calls
-		bl		EXT(mapping_find)				// find mapping for virtual operand
-		mr.		r3,r3							// did we find it?
-		beq		copypv_nomapping				// nope, so we'll assume it's cacheable
-		lwz		r4,mpVAddr+4(r3)				// get low half of virtual addr for hw flags
-		rlwinm.	r4,r4,0,mpIb-32,mpIb-32			// caching-inhibited bit set?
-		crnot	pvNoCache,cr0_eq				// if it is, use bcopy_nc
-		bl		EXT(mapping_drop_busy)			// drop busy on the mapping
-copypv_nomapping:
-		ld		r26,FM_ARG0+0x28(r1)			// restore 64-bit r26
-		ld		r28,FM_ARG0+0x30(r1)			// restore 64-bit r28
-		ld		r29,FM_ARG0+0x38(r1)			// restore 64-bit r29
-		ld		r30,FM_ARG0+0x40(r1)			// restore 64-bit r30
-		mtmsrd	r26								// restore msr to it's previous state
-
-// Set both the source and destination virtual addresses to the virtual operand's address --
-// we'll overlay one of them with the physical operand's address.
-		mr		r27,r28							// make virtual operand BOTH source AND destination
-
-// Now we're ready to relocate the physical operand address(es) into the physical memory window.
-// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address
-// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole,
-// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy.
-copypv_phys:
-		ld		r6,lgPMWvaddr(0)				// get physical memory window virtual address
-		bf		pvPsnk,copypv_dstvirt			// is destination address virtual?
-		cntlzd	r4,r30							// count leading zeros in destination address
-		cmplwi	r4,32							// if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
-		cror	pvNoCache,cr0_eq,pvNoCache		// use bcopy_nc for I/O hole locations		
-		add		r28,r30,r6						// relocate physical destination into physical window
-copypv_dstvirt:
-		bf		pvPsrc,copypv_srcvirt			// is source address virtual?
-		cntlzd	r4,r29							// count leading zeros in source address
-		cmplwi	r4,32							// if it's 32, then it's in the I/O hole (2**30 to 2**31-1)
-		cror	pvNoCache,cr0_eq,pvNoCache		// use bcopy_nc for I/O hole locations		
-		add		r27,r29,r6						// relocate physical source into physical window
-copypv_srcvirt:
-
-// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything
-// funny happens during the copy. So, we set a pointer to our error handler in the per-thread
-// control block.
-		mfsprg	r8,1							// get current threads stuff
-		lis		r3,hi16(copypv_error)			// get our error callback's address, high
-		ori		r3,r3,lo16(copypv_error)		// get our error callback's address, low
-		stw		r3,THREAD_RECOVER(r8)			// set our error callback
-		
-// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter
-// 64-bit mode.
-		li		r0,1							// get a handy one bit
-		mfmsr	r3								// get current msr
-		rldimi	r3,r0,63,MSR_SF_BIT				// set SF bit on in our msr copy
-		mtmsrd	r3								// enter 64-bit mode
-
-// If requested, flush data cache
-// Note that we don't flush, the code is being saved "just in case".
-#if 0
-		bf		pvFsrc,copypv_nfs				// do we flush the source?
-		rldicl	r3,r27,32,32					// r3, r4 <- addr64_t source virtual address
-		rldicl	r4,r27,0,32
-		mr		r5,r31							// r5 <- count (in bytes)
-		li		r6,0							// r6 <- boolean phys (false, not physical)
-		bl		EXT(flush_dcache)				// flush the source operand
-copypv_nfs:
-		bf		pvFsnk,copypv_nfdx				// do we flush the destination?
-		rldicl	r3,r28,32,32					// r3, r4 <- addr64_t destination virtual address
-		rldicl	r4,r28,0,32
-		mr		r5,r31							// r5 <- count (in bytes)
-		li		r6,0							// r6 <- boolean phys (false, not physical)
-		bl		EXT(flush_dcache)				// flush the destination operand
-copypv_nfdx:
-#endif
-
-// Call bcopy or bcopy_nc to perform the copy.
-		mr		r3,r27							// r3 <- source virtual address
-		mr		r4,r28							// r4 <- destination virtual address
-		mr		r5,r31							// r5 <- bytes to copy
-		bt		pvNoCache,copypv_nc				// take non-caching route
-		bl		EXT(bcopy)						// call bcopy to do the copying
-		b		copypv_copydone
-copypv_nc:
-		bl		EXT(bcopy_nc)					// call bcopy_nc to do the copying
-copypv_copydone:
-
-// If requested, flush data cache
-// Note that we don't flush, the code is being saved "just in case".
-#if 0
-		bf		pvFsrc,copypv_nfsx				// do we flush the source?
-		rldicl	r3,r27,32,32					// r3, r4 <- addr64_t source virtual address
-		rldicl	r4,r27,0,32
-		mr		r5,r31							// r5 <- count (in bytes)
-		li		r6,0							// r6 <- boolean phys (false, not physical)
-		bl		EXT(flush_dcache)				// flush the source operand
-copypv_nfsx:
-		bf		pvFsnk,copypv_nfd				// do we flush the destination?
-		rldicl	r3,r28,32,32					// r3, r4 <- addr64_t destination virtual address
-		rldicl	r4,r28,0,32
-		mr		r5,r31							// r5 <- count (in bytes)
-		li		r6,0							// r6 <- boolean phys (false, not physical)
-		bl		EXT(flush_dcache)				// flush the destination operand
-copypv_nfd:
-#endif
-
-// Leave 64-bit mode.
-		mfmsr	r3								// get current msr
-		rldicl	r3,r3,0,MSR_SF_BIT+1			// clear SF bit in our copy
-		mtmsrd	r3								// leave 64-bit mode
-
-// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is
-// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling
-// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic.
-// Note that this code is page-size sensitive, so it should probably be a part of our low-level
-// code in hw_vm.s.
-		bt		pvNoModSnk,copypv_nomod			// skip destination update if not requested
-		std		r29,FM_ARG0+0x30(r1)			// preserve 64-bit r29 across 32-bit calls
-		li		r26,1							// r26 <- 4K-page count						
-		mr		r27,r31							// r27 <- byte count
-		rlwinm	r3,r30,0,20,31					// does destination cross a page boundary?
-		subfic	r3,r3,4096						//
-		cmplw	r3,r27							// 
-		blt		copypv_modnox					// skip if not crossing case
-		subf	r27,r3,r27						// r27 <- byte count less initial fragment
-		addi	r26,r26,1						// increment page count
-copypv_modnox:
-		srdi	r3,r27,12						// pages to update (not including crosser)
-		add		r26,r26,r3						// add in crosser
-		srdi	r27,r30,12						// r27 <- destination page number
-copypv_modloop:
-		mr		r3,r27							// r3 <- destination page number				
-		la		r4,FM_ARG0+0x18(r1)				// r4 <- unsigned int *pindex
-		bl		EXT(mapping_phys_lookup)		// see if page is really there
-		mr.		r3,r3							// is it?
-		beq--	copypv_modend					// nope, break out of modify loop
-		mr		r3,r27							// r3 <- destination page number
-		bl		EXT(mapping_set_mod)			// set page changed status
-		subi	r26,r26,1						// decrement page count
-		cmpwi	r26,0							// done yet?
-		bgt		copypv_modloop					// nope, iterate
-copypv_modend:
-		ld		r29,FM_ARG0+0x30(r1)			// restore 64-bit r29
-copypv_nomod:
-		bt		pvNoRefSrc,copypv_done			// skip source update if not requested
-copypv_debugref:
-		li		r26,1							// r26 <- 4K-page count						
-		mr		r27,r31							// r27 <- byte count
-		rlwinm	r3,r29,0,20,31					// does source cross a page boundary?
-		subfic	r3,r3,4096						//
-		cmplw	r3,r27							// 
-		blt		copypv_refnox					// skip if not crossing case
-		subf	r27,r3,r27						// r27 <- byte count less initial fragment
-		addi	r26,r26,1						// increment page count
-copypv_refnox:
-		srdi	r3,r27,12						// pages to update (not including crosser)
-		add		r26,r26,r3						// add in crosser
-		srdi	r27,r29,12						// r27 <- source page number
-copypv_refloop:
-		mr		r3,r27							// r3 <- source page number
-		la		r4,FM_ARG0+0x18(r1)				// r4 <- unsigned int *pindex
-		bl		EXT(mapping_phys_lookup)		// see if page is really there
-		mr.		r3,r3							// is it?
-		beq--	copypv_done						// nope, break out of modify loop
-		mr		r3,r27							// r3 <- source  page number
-		bl		EXT(mapping_set_ref)			// set page referenced status
-		subi	r26,r26,1						// decrement page count
-		cmpwi	r26,0							// done yet?
-		bgt		copypv_refloop					// nope, iterate
-		
-// Return, indicating success.
-copypv_done:
-copypv_zero:
-		li		r3,0							// our efforts were crowned with success
-
-// Pop frame, restore caller's non-volatiles, clear recovery routine pointer.
-copypv_return:
-		mfsprg	r9,1							// get current threads stuff
-		lwz		r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1)
-												// get return address
-		lwz		r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1)
-												// get non-volatile cr2 and cr3
-		lwz		r26,FM_ARG0+0x00(r1)			// restore non-volatile r26
-		lwz		r27,FM_ARG0+0x04(r1)			// restore non-volatile r27
-		mtlr	r0								// restore return address
-		lwz		r28,FM_ARG0+0x08(r1)			// restore non-volatile r28
-		mtcrf	0x20,r4							// restore non-volatile cr2
-		mtcrf	0x10,r4							// restore non-volatile cr3
-		lwz		r11,FM_ARG0+0x20(r1)			// save error callback
-		lwz		r29,FM_ARG0+0x0C(r1)			// restore non-volatile r29
-		lwz		r30,FM_ARG0+0x10(r1)			// restore non-volatile r30
-		lwz		r31,FM_ARG0+0x14(r1)			// restore non-volatile r31
-		stw		r11,THREAD_RECOVER(r9)			// restore our error callback
-		lwz		r1,0(r1)						// release stack frame
-												
-		blr										// y'all come back now
-
-// Invalid argument handler.
-copypv_einval:
-		li		r3,EINVAL						// invalid argument
-		b		copypv_return					// return
-
-// Error encountered during bcopy or bcopy_nc.		
-copypv_error:
-		mfmsr	r3								// get current msr
-		rldicl	r3,r3,0,MSR_SF_BIT+1			// clear SF bit in our copy
-		mtmsrd	r3								// leave 64-bit mode
-		li		r3,EFAULT						// it was all his fault
-		b		copypv_return					// return
diff --git a/osfmk/ppc/new_screen.h b/osfmk/ppc/new_screen.h
deleted file mode 100644
index ba84184ef..000000000
--- a/osfmk/ppc/new_screen.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _NEW_SCREEN_H_
-#define _NEW_SCREEN_H_
-
-#include <ppc/boot.h>
-
-/* AV and HPV cards */
-#define	AV_BUFFER_START	   0xE0000000
-#define	AV_BUFFER_END	   0xE0500000
-#define	HPV_BUFFER_START   0xFE000000
-#define	HPV_BUFFER_END	   0xFF000000
-
-extern void clear_RGB16(int color);
-extern void adj_position(unsigned char C);
-extern void put_cursor(int color);
-extern void screen_put_char(unsigned char C);
-
-#endif /* _NEW_SCREEN_H_ */
diff --git a/osfmk/ppc/pcb.c b/osfmk/ppc/pcb.c
deleted file mode 100644
index a38687b14..000000000
--- a/osfmk/ppc/pcb.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Copyright (c) 1990,1991,1992 The University of Utah and
- * the Center for Software Science (CSS).  All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the Center
- * for Software Science at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSS DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- * 	Utah $Hdr: pcb.c 1.23 92/06/27$
- */
-
-#include <debug.h>
-
-#include <types.h>
-
-#include <mach/mach_types.h>
-#include <mach/thread_status.h>
-
-#include <kern/kern_types.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <kern/misc_protos.h>
-#include <kern/mach_param.h>
-#include <kern/spl.h>
-#include <kern/machine.h>
-
-#include <vm/vm_map.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_protos.h>
-
-#include <ppc/misc_protos.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/proc_reg.h>
-#include <ppc/pmap.h>
-#include <ppc/trap.h>
-#include <ppc/mappings.h>
-#include <ppc/savearea.h>
-#include <ppc/Firmware.h>
-#include <ppc/asm.h>
-#include <ppc/thread.h>
-#include <ppc/vmachmon.h>
-#include <ppc/low_trace.h>
-#include <ppc/lowglobals.h>
-#include <ppc/fpu_protos.h>
-
-#include <sys/kdebug.h>
-
-void	machine_act_terminate(thread_t);
-
-/*
- * These constants are dumb. They should not be in asm.h!
- */
-
-#define KF_SIZE		(FM_SIZE+ARG_SIZE+FM_REDZONE)
-
-#if DEBUG
-int   fpu_trap_count = 0;
-int   fpu_switch_count = 0;
-int   vec_trap_count = 0;
-int   vec_switch_count = 0;
-#endif
-
-/*
- * consider_machine_collect: try to collect machine-dependent pages
- */
-void
-consider_machine_collect(void)
-{
-    /*
-     * XXX none currently available
-     */
-}
-
-void
-consider_machine_adjust(void)
-{
-        consider_mapping_adjust();
-}
-
-/*
- * switch_context: Switch from one thread to another, needed for
- * 		   switching of space
- * 
- */
-thread_t
-machine_switch_context(
-	thread_t			old,
-	thread_continue_t	continuation,
-	thread_t			new)
-{
-	register thread_t retval;
-	pmap_t	new_pmap;
-	facility_context *fowner;
-	struct per_proc_info *ppinfo;
-
-	if (old == new)
-		panic("machine_switch_context");
-
-	ppinfo = getPerProc();								/* Get our processor block */
-
-	ppinfo->old_thread = (unsigned int)old;
-	       
-	/* Our context might wake up on another processor, so we must
-	 * not keep hot state in our FPU, it must go back to the pcb
-	 * so that it can be found by the other if needed
-	 */
-	if(real_ncpus > 1) {								/* This is potentially slow, so only do when actually SMP */
-		fowner = ppinfo->FPU_owner;						/* Cache this because it may change */
-		if(fowner) {									/* Is there any live context? */
-			if(fowner->facAct == old) {		/* Is it for us? */
-				fpu_save(fowner);						/* Yes, save it */
-			}
-		}
-		fowner = ppinfo->VMX_owner;						/* Cache this because it may change */
-		if(fowner) {									/* Is there any live context? */
-			if(fowner->facAct == old) {		/* Is it for us? */
-				vec_save(fowner);						/* Yes, save it */
-			}
-		}
-	}
-
-	/*
-	 * If old thread is running VM, save per proc userProtKey and FamVMmode spcFlags bits in the thread spcFlags
- 	 * This bits can be modified in the per proc without updating the thread spcFlags
-	 */
-	if(old->machine.specFlags & runningVM) {
-		old->machine.specFlags &=  ~(userProtKey|FamVMmode);
-		old->machine.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode);
-	}
-	old->machine.specFlags &= ~OnProc;
-	new->machine.specFlags |= OnProc;
-
-	/*
-	 * We do not have to worry about the PMAP module, so switch.
-	 *
-	 * We must not use thread->map since this may not be the actual
-	 * task map, but the map being used for a klcopyin/out.
-	 */
-
-	if(new->machine.specFlags & runningVM) {			/* Is the new guy running a VM? */
-		pmap_switch(new->machine.vmmCEntry->vmmPmap);	/* Switch to the VM's pmap */
-		ppinfo->VMMareaPhys = new->machine.vmmCEntry->vmmContextPhys;
-		ppinfo->VMMXAFlgs = new->machine.vmmCEntry->vmmXAFlgs;
-		ppinfo->FAMintercept = new->machine.vmmCEntry->vmmFAMintercept;
-	}
-	else {												/* otherwise, we use the task's pmap */
-		new_pmap = new->task->map->pmap;
-		if ((old->task->map->pmap != new_pmap) || (old->machine.specFlags & runningVM)) {
-			pmap_switch(new_pmap);						/* Switch if there is a change */
-		}
-	}
-
-	if(old->machine.umwSpace != invalSpace) {			/* Does our old guy have an active window? */
-		old->machine.umwSpace |= umwSwitchAway;			/* Show we switched away from this guy */
-		hw_blow_seg(lowGlo.lgUMWvaddr);					/* Blow off the first segment */
-		hw_blow_seg(lowGlo.lgUMWvaddr + 0x10000000ULL);	/* Blow off the second segment */
-	}
-
-	retval = Switch_context(old, continuation, new);
-	assert(retval != NULL);
-
-	/* We've returned from having switched context, so we should be
-	 * back in the original context.
-	 */
-
-	return retval;
-}
-
-/*
- * Initialize the machine-dependent state for a new thread.
- */
-kern_return_t
-machine_thread_create(
-	thread_t		thread,
-	task_t			task)
-{
-	struct savearea		*sv;									/* Pointer to newly allocated savearea */
-
-	(void)hw_atomic_add(&saveanchor.savetarget, 4);	/* Account for the number of saveareas we think we "need"
-															   for this activation */
-	assert(thread->machine.pcb == (struct savearea *)0);				/* Make sure there was no previous savearea */
-	
-	sv = save_alloc();										/* Go get us a savearea */
-		
-	bzero((char *)((unsigned int)sv + sizeof(savearea_comm)), (sizeof(struct savearea) - sizeof(savearea_comm)));	/* Clear it */
-		
-	sv->save_hdr.save_prev = 0;								/* Clear the back pointer */
-	sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft);	/* Mark as in use */
-	sv->save_hdr.save_act = thread;	/* Set who owns it */
-	thread->machine.pcb = sv;									/* Point to the save area */
-	thread->machine.curctx = &thread->machine.facctx;			/* Initialize facility context */
-	thread->machine.facctx.facAct = thread;						/* Initialize facility context pointer to activation */
-	thread->machine.umwSpace = invalSpace;						/* Initialize user memory window space to invalid */
-	thread->machine.preemption_count = 0;						/* Initialize preemption counter */
-
-	/*
-	 * User threads will pull their context from the pcb when first
-	 * returning to user mode, so fill in all the necessary values.
-	 * Kernel threads are initialized from the save state structure 
-	 * at the base of the kernel stack (see stack_attach()).
-	 */
-
-	thread->machine.upcb = sv;								/* Set user pcb */
-	sv->save_srr1 = (uint64_t)MSR_EXPORT_MASK_SET;			/* Set the default user MSR */
-	if(task_has_64BitAddr(task)) sv->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32;	/* If 64-bit task, force 64-bit mode */
-	sv->save_fpscr = 0;										/* Clear all floating point exceptions */
-	sv->save_vrsave = 0;									/* Set the vector save state */
-	sv->save_vscr[0] = 0x00000000;					
-	sv->save_vscr[1] = 0x00000000;					
-	sv->save_vscr[2] = 0x00000000;					
-	sv->save_vscr[3] = 0x00010000;							/* Disable java mode and clear saturated */
-	
-    return(KERN_SUCCESS);
-}
-
-/*
- * Machine-dependent cleanup prior to destroying a thread
- */
-void
-machine_thread_destroy(
-	thread_t		thread)
-{
-	struct savearea *local_pcb, *ppsv;
-	savearea_vec *vsv, *vpsv;
-	savearea_fpu *fsv, *fpsv;
- 	boolean_t intr;
-
-/*
- *	This function will release all context.
- */
-
-	machine_act_terminate(thread);							/* Make sure all virtual machines are dead first */
- 
-/*
- *
- *	Walk through and release all floating point and vector contexts. Also kill live context.
- *
- */
-
-	intr = ml_set_interrupts_enabled(FALSE);				/* Disable for interruptions */
- 
- 	toss_live_vec(thread->machine.curctx);					/* Dump live vectors */
-
-	vsv = thread->machine.curctx->VMXsave;					/* Get the top vector savearea */
-	
-	while(vsv) {											/* Any VMX saved state? */
-		vpsv = vsv;											/* Remember so we can toss this */
-		/* XXX save_prev should be a void * 4425537 */
-		vsv = CAST_DOWN(savearea_vec *, vsv->save_hdr.save_prev);  /* Get one underneath our's */
-		save_release((struct savearea *)vpsv);						/* Release it */
-	}
-	
-	thread->machine.curctx->VMXsave = NULL;					/* Kill chain */
- 
- 	toss_live_fpu(thread->machine.curctx);					/* Dump live float */
-
-	fsv = thread->machine.curctx->FPUsave;					/* Get the top float savearea */
-	
-	while(fsv) {											/* Any float saved state? */
-		fpsv = fsv;											/* Remember so we can toss this */
-		/* XXX save_prev should be a void * 4425537 */
-		fsv = CAST_DOWN(savearea_fpu *, fsv->save_hdr.save_prev);   /* Get one underneath our's */
-		save_release((struct savearea *)fpsv);						/* Release it */
-	}
-	
-	thread->machine.curctx->FPUsave = NULL;					/* Kill chain */
-
-/*
- * free all regular saveareas.
- */
-
-	local_pcb = thread->machine.pcb;								/* Get the general savearea */
-	
-	while(local_pcb) {											/* Any float saved state? */
-		ppsv = local_pcb;											/* Remember so we can toss this */
-		/* XXX save_prev should be a void * 4425537 */
-		local_pcb = CAST_DOWN(struct savearea *, local_pcb->save_hdr.save_prev);  /* Get one underneath our's */ 
-		save_release(ppsv);									/* Release it */
-	}
-	
-	(void)hw_atomic_sub(&saveanchor.savetarget, 4);	/* Unaccount for the number of saveareas we think we "need" */
-
-	(void) ml_set_interrupts_enabled(intr);					/* Restore interrupts if enabled */
-
-}
-
-/*
- * act_machine_sv_free
- * release saveareas associated with a thread. if flag is true, release
- * user level savearea(s) too, else don't
- *
- * This code must run with interruptions disabled because an interrupt handler
- * could use floating point and/or vectors.  If this happens and the thread we
- * are blowing off owns the facility, we can deadlock.
- */
-void
-act_machine_sv_free(thread_t act,  __unused int flag)
-{
-	struct savearea *local_pcb, *userpcb;
-	register savearea_vec *vsv, *vpst, *vsvt;
-	register savearea_fpu *fsv, *fpst, *fsvt;
-	struct savearea *svp;
- 	boolean_t intr;
-
-/*
- *	This function will release all non-user state context.
- */
- 
-/*
- *
- *	Walk through and release all floating point and vector contexts that are not
- *	user state.  We will also blow away live context if it belongs to non-user state.
- *	Note that the level can not change while we are in this code.  Nor can another
- *	context be pushed on the stack.
- *
- *	We do nothing here if the current level is user.  Otherwise,
- *	the live context is cleared.  Then we find the user saved context.
- *	Next,  we take the sync lock (to keep us from munging things in *_switch).
- *	The level is set to 0 and all stacked context other than user is dequeued.
- *	Then we unlock.  Next, all of the old kernel contexts are released.
- *
- */
-
-	intr = ml_set_interrupts_enabled(FALSE);				/* Disable for interruptions */
-
- 	if(act->machine.curctx->VMXlevel) {						/* Is the current level user state? */
- 		
- 		toss_live_vec(act->machine.curctx);					/* Dump live vectors if is not user */
-		
-		if(!hw_lock_to((hw_lock_t)&act->machine.curctx->VMXsync, LockTimeOut)) {	/* Get the sync lock */ 
-			panic("act_machine_sv_free - timeout getting VMX sync lock\n");	/* Tell all and die */
-		}
-	
-		vsv = act->machine.curctx->VMXsave;					/* Get the top vector savearea */
-		while(vsv && vsv->save_hdr.save_level)	/* Find user context if any */
-			/* XXX save_prev should be a void * 4425537 */
-			vsv = CAST_DOWN(savearea_vec *,
-					vsv->save_hdr.save_prev);
-		
-		vsvt = act->machine.curctx->VMXsave;				/* Get the top of the chain */
-		act->machine.curctx->VMXsave = vsv;					/* Point to the user context */
-		act->machine.curctx->VMXlevel = NULL;					/* Set the level to user */
-		hw_lock_unlock((hw_lock_t)&act->machine.curctx->VMXsync);	/* Unlock */
-		
-		while(vsvt) {										/* Clear any VMX saved state */
-			if (vsvt == vsv) break;   						/* Done when hit user if any */
-			vpst = vsvt;									/* Remember so we can toss this */
-			/* XXX save_prev should be a void * 4425537 */
-			vsvt = CAST_DOWN(savearea_vec *, vsvt->save_hdr.save_prev);	/* Get one underneath our's */		
-			save_ret((struct savearea *)vpst);						/* Release it */
-		}
-		
-	}
- 
- 	if(act->machine.curctx->FPUlevel) {						/* Is the current level user state? */
- 		
- 		toss_live_fpu(act->machine.curctx);					/* Dump live floats if is not user */
-
-		if(!hw_lock_to((hw_lock_t)&act->machine.curctx->FPUsync, LockTimeOut)) {	/* Get the sync lock */ 
-			panic("act_machine_sv_free - timeout getting FPU sync lock\n");	/* Tell all and die */
-		}
-		
-		fsv = act->machine.curctx->FPUsave;					/* Get the top floats savearea */
-		while(fsv && fsv->save_hdr.save_level)	/* Find user context if any */
-			/* XXX save_prev should be a void * */
-			fsv = CAST_DOWN(savearea_fpu *, fsv->save_hdr.save_prev);
-		
-		fsvt = act->machine.curctx->FPUsave;				/* Get the top of the chain */
-		act->machine.curctx->FPUsave = fsv;					/* Point to the user context */
-		act->machine.curctx->FPUlevel = NULL;					/* Set the level to user */
-		hw_lock_unlock((hw_lock_t)&act->machine.curctx->FPUsync);	/* Unlock */
-		
-		while(fsvt) {										/* Clear any VMX saved state */
-			if (fsvt == fsv) break;   						/* Done when hit user if any */
-			fpst = fsvt;									/* Remember so we can toss this */
-			/* XXX save_prev should be a void * 4425537 */
-			fsvt = CAST_DOWN(savearea_fpu *, fsvt->save_hdr.save_prev);	/* Get one underneath our's */		
-			save_ret((struct savearea *)fpst);						/* Release it */
-		}
-		
-	}
-
-/*
- * free all regular saveareas except a user savearea, if any
- */
-
-	local_pcb = act->machine.pcb;									/* Get the general savearea */
-	userpcb = NULL;											/* Assume no user context for now */
-	
-	while(local_pcb) {											/* Any float saved state? */
-		if (local_pcb->save_srr1 & MASK(MSR_PR)) {				/* Is this a user savearea? */
-			userpcb = local_pcb;									/* Remember so we can toss this */
-			break;
-		}
-		svp = local_pcb;											/* Remember this */
-		/* XXX save_prev should be a void * 4425537 */
-		local_pcb = CAST_DOWN(struct savearea *, local_pcb->save_hdr.save_prev);  /* Get one underneath our's */ 
-		save_ret(svp);										/* Release it */
-	}
-	
-	act->machine.pcb = userpcb;								/* Chain in the user if there is one, or 0 if not */
-	(void) ml_set_interrupts_enabled(intr);					/* Restore interrupts if enabled */
-
-}
-
-void
-machine_act_terminate(
-	thread_t	act)
-{
-	if(act->machine.bbDescAddr) {							/* Check if the Blue box assist is active */
-		disable_bluebox_internal(act);						/* Kill off bluebox */
-	}
-	
-	if(act->machine.vmmControl) {							/* Check if VMM is active */
-		vmm_tear_down_all(act);								/* Kill off all VMM contexts */
-	}
-}
-
-void
-machine_thread_terminate_self(void)
-{
-	machine_act_terminate(current_thread());
-}
-
-void
-machine_thread_init(void)
-{
-#ifdef	MACHINE_STACK
-#if KERNEL_STACK_SIZE > PPC_PGBYTES
-	panic("KERNEL_STACK_SIZE can't be greater than PPC_PGBYTES\n");
-#endif
-#endif
-}
-
-#if MACH_ASSERT
-void
-dump_thread(thread_t th)
-{
-	printf(" thread @ %p:\n", th);
-}
-#endif /* MACH_ASSERT */
-
-user_addr_t 
-get_useraddr(void)
-{
-	return(current_thread()->machine.upcb->save_srr0);
-}
-
-/*
- * detach and return a kernel stack from a thread
- */
-
-vm_offset_t
-machine_stack_detach(
-	thread_t		thread)
-{
-  vm_offset_t stack;
-
-  KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_DETACH),
-											(uintptr_t)thread_tid(thread), thread->priority,
-											thread->sched_pri, 0, 0);
-
-  act_machine_sv_free(thread, 0);	/* XXX flag == 0 OK? */
-
-  stack = thread->kernel_stack;
-  thread->kernel_stack = 0;
-  return(stack);
-}
-
-/*
- * attach a kernel stack to a thread and initialize it
- *
- * attaches a stack to a thread. if there is no save
- * area we allocate one.  the top save area is then
- * loaded with the pc (continuation address), the initial
- * stack pointer, and a std kernel MSR. if the top
- * save area is the user save area bad things will 
- * happen
- *
- */
-
-void
-machine_stack_attach(
-	thread_t		thread,
-	vm_offset_t		stack)
-{
-  unsigned int *kss;
-  struct savearea *sv;
-
-        KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH),
-            (uintptr_t)thread_tid(thread), thread->priority,
-            thread->sched_pri, 0, 0);
-
-  assert(stack);
-  kss = (unsigned int *)STACK_IKS(stack);
-  thread->kernel_stack = stack;
-
-  /* during initialization we sometimes do not have an
-     activation. in that case do not do anything */
-  sv = save_get();  /* cannot block */
-  sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft);	/* Mark as in use */
-  sv->save_hdr.save_act = thread;
-  sv->save_hdr.save_prev = (addr64_t)((uintptr_t)thread->machine.pcb);
-  thread->machine.pcb = sv;
-
-  sv->save_srr0 = (unsigned int)thread_continue;
-  /* sv->save_r3 = ARG ? */
-  sv->save_r1 = (vm_offset_t)((int)kss - KF_SIZE);
-  sv->save_srr1 = MSR_SUPERVISOR_INT_OFF;
-  sv->save_fpscr = 0;									/* Clear all floating point exceptions */
-  sv->save_vrsave = 0;								/* Set the vector save state */
-  sv->save_vscr[3] = 0x00010000;						/* Supress java mode */
-  *(CAST_DOWN(int *, sv->save_r1)) = 0;
-
-  thread->machine.ksp = 0;			      
-}
-
-/*
- * move a stack from old to new thread
- */
-
-void
-machine_stack_handoff(
-	thread_t		old,
-	thread_t		new)
-{
-
-	vm_offset_t stack;
-	pmap_t new_pmap;
-	facility_context *fowner;
-	mapping_t *mp;
-	struct per_proc_info *ppinfo;
-	
-	assert(new);
-	assert(old);
-
-	if (old == new)
-		panic("machine_stack_handoff");
-	
-	stack = machine_stack_detach(old);
-	new->kernel_stack = stack;
-	if (stack == old->reserved_stack) {
-		assert(new->reserved_stack);
-		old->reserved_stack = new->reserved_stack;
-		new->reserved_stack = stack;
-	}
-
-	ppinfo = getPerProc();								/* Get our processor block */
-
-	if(real_ncpus > 1) {								/* This is potentially slow, so only do when actually SMP */
-		fowner = ppinfo->FPU_owner;						/* Cache this because it may change */
-		if(fowner) {									/* Is there any live context? */
-			if(fowner->facAct == old) {		/* Is it for us? */
-				fpu_save(fowner);						/* Yes, save it */
-			}
-		}
-		fowner = ppinfo->VMX_owner;						/* Cache this because it may change */
-		if(fowner) {									/* Is there any live context? */
-			if(fowner->facAct == old) {		/* Is it for us? */
-				vec_save(fowner);						/* Yes, save it */
-			}
-		}
-	}
-
-	/*
-	 * If old thread is running VM, save per proc userProtKey and FamVMmode spcFlags bits in the thread spcFlags
- 	 * This bits can be modified in the per proc without updating the thread spcFlags
-	 */
-	if(old->machine.specFlags & runningVM) {			/* Is the current thread running a VM? */
-		old->machine.specFlags &= ~(userProtKey|FamVMmode);
-		old->machine.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode);
-	}
-	old->machine.specFlags &= ~OnProc;
-	new->machine.specFlags |= OnProc;
-
-	if(new->machine.specFlags & runningVM) {	/* Is the new guy running a VM? */
-		pmap_switch(new->machine.vmmCEntry->vmmPmap);	/* Switch to the VM's pmap */
-		ppinfo->VMMareaPhys = new->machine.vmmCEntry->vmmContextPhys;
-		ppinfo->VMMXAFlgs = new->machine.vmmCEntry->vmmXAFlgs;
-		ppinfo->FAMintercept = new->machine.vmmCEntry->vmmFAMintercept;
-	}
-	else {											/* otherwise, we use the task's pmap */
-		new_pmap = new->task->map->pmap;
-		if ((old->task->map->pmap != new_pmap) || (old->machine.specFlags & runningVM)) {
-			pmap_switch(new_pmap);
-		}
-	}
-
-	machine_set_current_thread(new);
-	ppinfo->Uassist = new->machine.cthread_self;
-
-	ppinfo->ppbbTaskEnv = new->machine.bbTaskEnv;
-	ppinfo->spcFlags = new->machine.specFlags;
-	
-	old->machine.umwSpace |= umwSwitchAway;			/* Show we switched away from this guy */
-	mp = (mapping_t *)&ppinfo->ppUMWmp;
-	mp->mpSpace = invalSpace;						/* Since we can't handoff in the middle of copy in/out, just invalidate */
-
-	if(trcWork.traceMask) dbgTrace(0x9903, (unsigned int)old, (unsigned int)new, 0, 0);	/* Cut trace entry if tracing */    
-    
-  return;
-}
-
-void Call_continuation(thread_continue_t, void *, wait_result_t, vm_offset_t);
-
-/*
- * clean and initialize the current kernel stack and go to
- * the given continuation routine
- */
-
-void
-call_continuation(
-	thread_continue_t	continuation,
-	void				*parameter,
-	wait_result_t		wresult)
-{
-	thread_t		self = current_thread();
-	unsigned int	*kss;
-	vm_offset_t		tsp;
-
-	assert(self->kernel_stack);
-	kss = (unsigned int *)STACK_IKS(self->kernel_stack);
-	assert(continuation);
-
-	tsp = (vm_offset_t)((int)kss - KF_SIZE);
-	assert(tsp);
-	*((int *)tsp) = 0;
-
-	Call_continuation(continuation, parameter, wresult, tsp);
-}
diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c
deleted file mode 100644
index 90940a9ab..000000000
--- a/osfmk/ppc/pmap.c
+++ /dev/null
@@ -1,2121 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Mach Operating System
- * Copyright (c) 1990,1991,1992 The University of Utah and
- * the Center for Software Science (CSS).
- * Copyright (c) 1991,1987 Carnegie Mellon University.
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation,
- * and that all advertising materials mentioning features or use of
- * this software display the following acknowledgement: ``This product
- * includes software developed by the Center for Software Science at
- * the University of Utah.''
- *
- * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF
- * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
- * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
- * THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- * Carnegie Mellon requests users of this software to return to
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- *
- * 	Utah $Hdr: pmap.c 1.28 92/06/23$
- *	Author: Mike Hibler, Bob Wheeler, University of Utah CSS, 10/90
- */
- 
-/*
- *	Manages physical address maps for powerpc.
- *
- *	In addition to hardware address maps, this
- *	module is called upon to provide software-use-only
- *	maps which may or may not be stored in the same
- *	form as hardware maps.  These pseudo-maps are
- *	used to store intermediate results from copy
- *	operations to and from address spaces.
- *
- *	Since the information managed by this module is
- *	also stored by the logical address mapping module,
- *	this module may throw away valid virtual-to-physical
- *	mappings at almost any time.  However, invalidations
- *	of virtual-to-physical mappings must be done as
- *	requested.
- *
- *	In order to cope with hardware architectures which
- *	make virtual-to-physical map invalidates expensive,
- *	this module may delay invalidate or reduced protection
- *	operations until such time as they are actually
- *	necessary.  This module is given full information to
- *	when physical maps must be made correct.
- *	
- */
-
-#include <zone_debug.h>
-#include <debug.h>
-#include <mach_kgdb.h>
-#include <mach_vm_debug.h>
-#include <db_machine_commands.h>
-
-#include <kern/thread.h>
-#include <kern/simple_lock.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-#include <vm/vm_kern.h>
-#include <kern/spl.h>
-
-#include <kern/misc_protos.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/mappings.h>
-
-#include <ppc/new_screen.h>
-#include <ppc/Firmware.h>
-#include <ppc/savearea.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/low_trace.h>
-#include <ppc/lowglobals.h>
-#include <ppc/limits.h>
-#include <ddb/db_output.h>
-#include <machine/cpu_capabilities.h>
-
-#include <vm/vm_protos.h> /* must be last */
-
-
-extern unsigned int	avail_remaining;
-unsigned int 	debugbackpocket;							/* (TEST/DEBUG) */
-
-vm_offset_t		first_free_virt;
-unsigned int current_free_region;						/* Used in pmap_next_page */
-
-pmapTransTab *pmapTrans;									/* Point to the hash to pmap translations */
-struct phys_entry *phys_table;
-
-/* forward */
-static void pmap_map_physical(void);
-static void pmap_map_iohole(addr64_t paddr, addr64_t size);
-void pmap_activate(pmap_t pmap, thread_t th, int which_cpu);
-void pmap_deactivate(pmap_t pmap, thread_t th, int which_cpu);
-
-extern void hw_hash_init(void);
-
-/*  NOTE:  kernel_pmap_store must be in V=R storage and aligned!!!!!!!!!!!!!! */
-
-extern struct pmap	kernel_pmap_store;
-pmap_t		kernel_pmap;			/* Pointer to kernel pmap and anchor for in-use pmaps */		
-addr64_t	kernel_pmap_phys;		/* Pointer to kernel pmap and anchor for in-use pmaps, physical address */		
-pmap_t		cursor_pmap;			/* Pointer to last pmap allocated or previous if removed from in-use list */
-pmap_t		sharedPmap;				/* Pointer to common pmap for 64-bit address spaces */
-struct zone	*pmap_zone;				/* zone of pmap structures */
-boolean_t	pmap_initialized = FALSE;
-
-int ppc_max_pmaps;					/* Maximum number of concurrent address spaces allowed. This is machine dependent */	
-addr64_t vm_max_address;			/* Maximum effective address supported */
-addr64_t vm_max_physical;			/* Maximum physical address supported */
-
-/*
- * Physical-to-virtual translations are handled by inverted page table
- * structures, phys_tables.  Multiple mappings of a single page are handled
- * by linking the affected mapping structures. We initialise one region
- * for phys_tables of the physical memory we know about, but more may be
- * added as it is discovered (eg. by drivers).
- */
-
-/*
- *	free pmap list. caches the first free_pmap_max pmaps that are freed up
- */
-int		free_pmap_max = 32;
-int		free_pmap_count;
-pmap_t	free_pmap_list;
-decl_simple_lock_data(,free_pmap_lock)
-
-/*
- * Function to get index into phys_table for a given physical address
- */
-
-struct phys_entry *pmap_find_physentry(ppnum_t pa)
-{
-	int i;
-	unsigned int entry;
-
-	for (i = pmap_mem_regions_count - 1; i >= 0; i--) {
-		if (pa < pmap_mem_regions[i].mrStart) continue;	/* See if we fit in this region */
-		if (pa > pmap_mem_regions[i].mrEnd) continue;	/* Check the end too */
-		
-		entry = (unsigned int)pmap_mem_regions[i].mrPhysTab + ((pa - pmap_mem_regions[i].mrStart) * sizeof(phys_entry_t));
-		return (struct phys_entry *)entry;
-	}
-//	kprintf("DEBUG - pmap_find_physentry: page 0x%08X not found\n", pa);
-	return NULL;
-}
-
-/*
- * kern_return_t
- * pmap_add_physical_memory(vm_offset_t spa, vm_offset_t epa,
- *                          boolean_t available, unsigned int attr)
- *
- *	THIS IS NOT SUPPORTED
- */
-kern_return_t
-pmap_add_physical_memory(
-	__unused vm_offset_t spa, 
-	__unused vm_offset_t epa,
-	__unused boolean_t available,
-	__unused unsigned int attr)
-{
-	
-	panic("Forget it! You can't map no more memory, you greedy puke!\n");
-	return KERN_SUCCESS;
-}
-
-/*
- * pmap_map(va, spa, epa, prot)
- *	is called during boot to map memory in the kernel's address map.
- *	A virtual address range starting at "va" is mapped to the physical
- *	address range "spa" to "epa" with machine independent protection
- *	"prot".
- *
- *	"va", "spa", and "epa" are byte addresses and must be on machine
- *	independent page boundaries.
- *
- *	Pages with a contiguous virtual address range, the same protection, and attributes.
- *	therefore, we map it with a single block.
- *
- *	Note that this call will only map into 32-bit space
- *
- */
-
-vm_offset_t
-pmap_map(
-	vm_offset_t va,
-	vm_offset_t spa,
-	vm_offset_t epa,
-	vm_prot_t prot,
-	unsigned int flags)
-{
-	unsigned int mflags;
-	addr64_t colladr;
-	mflags = 0;										/* Make sure this is initialized to nothing special */
-	if(!(flags & VM_WIMG_USE_DEFAULT)) {			/* Are they supplying the attributes? */
-		mflags = mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-	}
-	
-	if (spa == epa) return(va);
-
-	assert(epa > spa);
-
-	colladr = mapping_make(kernel_pmap, (addr64_t)va, (ppnum_t)(spa >> 12),
-			       (mmFlgBlock | mmFlgPerm), (epa - spa) >> 12, (prot & VM_PROT_ALL) );
-
-	if(colladr) {											/* Was something already mapped in the range? */
-		panic("pmap_map: attempt to map previously mapped range - va = %08X, pa = %08X, epa = %08X, collision = %016llX\n",
-			va, spa, epa, colladr);
-	}				
-	return(va);
-}
-
-/*
- * pmap_map_physical()
- *	Maps physical memory into the kernel's address map beginning at lgPMWvaddr, the
- *  physical memory window.
- *
- */
-void
-pmap_map_physical(void)
-{
-	unsigned region;
-	uint64_t msize, size;
-	addr64_t paddr, vaddr, colladdr;
-
-	/* Iterate over physical memory regions, block mapping each into the kernel's address map */	
-	for (region = 0; region < (unsigned)pmap_mem_regions_count; region++) {
-		paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12);	/* Get starting physical address */
-		size  = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr;
-
-		vaddr = paddr + lowGlo.lgPMWvaddr;					/* Get starting virtual address */
-
-		while (size > 0) {
-			
-			msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size);	/* Get size, but no more than 2TBs */
-			
-			colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
-				(mmFlgBlock | mmFlgPerm), (msize >> 12),
-				(VM_PROT_READ | VM_PROT_WRITE));
-			if (colladdr) {
-				panic ("pmap_map_physical: mapping failure - va = %016llX, pa = %016llX, size = %016llX, collision = %016llX\n",
-					   vaddr, (paddr >> 12), (msize >> 12), colladdr);
-			}
-
-			vaddr = vaddr + (uint64_t)msize;				/* Point to the next virtual addr */
-			paddr = paddr + (uint64_t)msize;				/* Point to the next physical addr */
-			size  -= msize;
-		}
-	}
-}
-
-/*
- * pmap_map_iohole(addr64_t paddr, addr64_t size)
- *	Maps an I/O hole into the kernel's address map at its proper offset in
- *	the physical memory window.
- *
- */
-void
-pmap_map_iohole(addr64_t paddr, addr64_t size)
-{
-
-	addr64_t vaddr, colladdr, msize;
-
-	vaddr = paddr + lowGlo.lgPMWvaddr;						/* Get starting virtual address */		
-
-	while (size > 0) {
-
-		msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size);	/* Get size, but no more than 2TBs */
-		
-		colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
-			(mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12),
-			(VM_PROT_READ | VM_PROT_WRITE));
-		if (colladdr) {
-			panic ("pmap_map_iohole: mapping failed - va = %016llX, pa = %016llX, size = %016llX, collision = %016llX\n",
-				   vaddr, (paddr >> 12), (msize >> 12), colladdr);
-		}
-
-		vaddr = vaddr + (uint64_t)msize;					/* Point to the next virtual addr */
-		paddr = paddr + (uint64_t)msize;					/* Point to the next physical addr */
-		size  -= msize;
-	}	
-}
-
-/*
- *	Bootstrap the system enough to run with virtual memory.
- *	Map the kernel's code and data, and allocate the system page table.
- *	Called with mapping done by BATs. Page_size must already be set.
- *
- *	Parameters:
- *	msize:	Total memory present
- *	first_avail:	First virtual address available
- *	kmapsize:	Size of kernel text and data
- */
-void
-pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize)
-{
-	vm_offset_t 	addr;
-	vm_size_t 		size;
-	unsigned int 	i, num, mapsize, vmpagesz, vmmapsz, nbits;
-	signed			bank;
-	uint64_t		tmemsize;
-	uint_t			htslop;
-	vm_offset_t		first_used_addr, PCAsize;
-	struct phys_entry *phys_entry;
-
-	*first_avail = round_page(*first_avail);				/* Make sure we start out on a page boundary */
-	vm_last_addr = VM_MAX_KERNEL_ADDRESS;					/* Set the highest address know to VM */
-
-	/*
-	 * Initialize kernel pmap
-	 */
-	kernel_pmap = &kernel_pmap_store;
-	kernel_pmap_phys = (addr64_t)(uintptr_t)&kernel_pmap_store;
-	cursor_pmap = &kernel_pmap_store;
-
-	kernel_pmap->pmap_link.next = (queue_t)kernel_pmap;		/* Set up anchor forward */
-	kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap;		/* Set up anchor reverse */
-	kernel_pmap->ref_count = 1;
-	kernel_pmap->pmapFlags = pmapKeyDef;					/* Set the default keys */
-	kernel_pmap->pmapFlags |= pmapNXdisabled;
-	kernel_pmap->pmapCCtl = pmapCCtlVal;					/* Initialize cache control */
-	kernel_pmap->space = PPC_SID_KERNEL;
-	kernel_pmap->pmapvr = 0;								/* Virtual = Real  */
-
-/*
- *  IBM's recommended hash table size is one PTEG for every 2 physical pages.
- *  However, we have found that OSX rarely uses more than 4 PTEs in a PTEG
- *  with this size table.  Therefore, by default we allocate a hash table
- *  one half IBM's recommended size, ie one PTEG per 4 pages.  The "ht_shift" boot-arg
- *  can be used to override the default hash table size.
- *	We will allocate the hash table in physical RAM, outside of kernel virtual memory,
- *	at the top of the highest bank that will contain it.
- *	Note that "bank" doesn't refer to a physical memory slot here, it is a range of
- *	physically contiguous memory.
- *
- *	The PCA will go there as well, immediately before the hash table.
- */
- 
-	nbits = cntlzw(((msize << 1) - 1) >> 32);				/* Get first bit in upper half */
-	if (nbits == 32)                                        /* If upper half was empty, find bit in bottom half */
-        nbits = nbits + cntlzw((uint_t)((msize << 1) - 1));
- 	tmemsize = 0x8000000000000000ULL >> nbits;              /* Get memory size rounded up to power of 2 */
- 	
-    /* Calculate hash table size:  First, make sure we don't overflow 32-bit arithmetic. */
- 	if (tmemsize > 0x0000002000000000ULL)
-        tmemsize = 0x0000002000000000ULL;
-
-    /* Second, calculate IBM recommended hash table size, ie one PTEG per 2 physical pages */
- 	hash_table_size = (uint_t)(tmemsize >> 13) * PerProcTable[0].ppe_vaddr->pf.pfPTEG;
-    
-    /* Third, cut this in half to produce the OSX default, ie one PTEG per 4 physical pages */
-    hash_table_size >>= 1;
-    
-    /* Fourth, adjust default size per "ht_shift" boot arg */
-    if (hash_table_shift >= 0)                              /* if positive, make size bigger */
-        hash_table_size <<= hash_table_shift;
-    else                                                    /* if "ht_shift" is negative, make smaller */
-        hash_table_size >>= (-hash_table_shift);
-    
-    /* Fifth, make sure we are at least minimum size */
- 	if (hash_table_size < (256 * 1024))
-        hash_table_size = (256 * 1024);
-
-	while(1) {												/* Try to fit hash table in PCA into contiguous memory */
-
-		if(hash_table_size < (256 * 1024)) {				/* Have we dropped too short? This should never, ever happen */
-			panic("pmap_bootstrap: Can't find space for hash table\n");	/* This will never print, system isn't up far enough... */
-		}
-
-		PCAsize = (hash_table_size / PerProcTable[0].ppe_vaddr->pf.pfPTEG) * sizeof(PCA_t);	/* Get total size of PCA table */
-		PCAsize = round_page(PCAsize);					/* Make sure it is at least a page long */
-	
-		for(bank = pmap_mem_regions_count - 1; bank >= 0; bank--) {	/* Search backwards through banks */
-			
-			hash_table_base = ((addr64_t)pmap_mem_regions[bank].mrEnd << 12) - hash_table_size + PAGE_SIZE;	/* Get tenative address */
-			
-			htslop = hash_table_base & (hash_table_size - 1);	/* Get the extra that we will round down when we align */
-			hash_table_base = hash_table_base & -(addr64_t)hash_table_size;	/* Round down to correct boundary */
-			
-			if((hash_table_base - round_page(PCAsize)) >= ((addr64_t)pmap_mem_regions[bank].mrStart << 12)) break;	/* Leave if we fit */
-		}
-		
-		if(bank >= 0) break;								/* We are done if we found a suitable bank */
-		
-		hash_table_size = hash_table_size >> 1;				/* Try the next size down */
-	}
-
-	if(htslop) {											/* If there was slop (i.e., wasted pages for alignment) add a new region */
-		for(i = pmap_mem_regions_count - 1; i >= (unsigned)bank; i--) {	/* Copy from end to our bank, including our bank */
-			pmap_mem_regions[i + 1].mrStart  = pmap_mem_regions[i].mrStart;	/* Set the start of the bank */
-			pmap_mem_regions[i + 1].mrAStart = pmap_mem_regions[i].mrAStart;	/* Set the start of allocatable area */
-			pmap_mem_regions[i + 1].mrEnd    = pmap_mem_regions[i].mrEnd;	/* Set the end address of bank */
-			pmap_mem_regions[i + 1].mrAEnd   = pmap_mem_regions[i].mrAEnd;	/* Set the end address of allocatable area */
-		}
-		
-		pmap_mem_regions[i + 1].mrStart  = (hash_table_base + hash_table_size) >> 12;	/* Set the start of the next bank to the start of the slop area */
-		pmap_mem_regions[i + 1].mrAStart = (hash_table_base + hash_table_size) >> 12;	/* Set the start of allocatable area to the start of the slop area */
-		pmap_mem_regions[i].mrEnd        = (hash_table_base + hash_table_size - 4096) >> 12;	/* Set the end of our bank to the end of the hash table */
-		
-	}		
-	
-	pmap_mem_regions[bank].mrAEnd = (hash_table_base - PCAsize - 4096) >> 12;	/* Set the maximum allocatable in this bank */
-	
-	hw_hash_init();											/* Initiaize the hash table and PCA */
-	hw_setup_trans();										/* Set up hardware registers needed for translation */
-	
-/*
- *	The hash table is now all initialized and so is the PCA.  Go on to do the rest of it.
- *	This allocation is from the bottom up.
- */	
-	
-	num = atop_64(msize);										/* Get number of pages in all of memory */
-
-/* Figure out how much we need to allocate */
-
-	size = (vm_size_t) (
-		(InitialSaveBloks * PAGE_SIZE) +					/* Allow space for the initial context saveareas */
-		(BackPocketSaveBloks * PAGE_SIZE) +					/* For backpocket saveareas */
-		trcWork.traceSize +								/* Size of trace table */
-		((((1 << maxAdrSpb) * sizeof(pmapTransTab)) + 4095) & -4096) +	/* Size of pmap translate table */
-		(((num * sizeof(struct phys_entry)) + 4095) & -4096) 	/* For the physical entries */
-	);
-
-	mapsize = size = round_page(size);						/* Get size of area to map that we just calculated */
-	mapsize = mapsize + kmapsize;							/* Account for the kernel text size */
-
-	vmpagesz = round_page(num * sizeof(struct vm_page));	/* Allow for all vm_pages needed to map physical mem */
-	vmmapsz = round_page((num / 8) * sizeof(struct vm_map_entry));	/* Allow for vm_maps */
-	
-	mapsize = mapsize + vmpagesz + vmmapsz;					/* Add the VM system estimates into the grand total */
-
-	mapsize = mapsize + (4 * 1024 * 1024);					/* Allow for 4 meg of extra mappings */
-	mapsize = ((mapsize / PAGE_SIZE) + MAPPERBLOK - 1) / MAPPERBLOK;	/* Get number of blocks of mappings we need */
-	mapsize = mapsize + ((mapsize  + MAPPERBLOK - 1) / MAPPERBLOK);	/* Account for the mappings themselves */
-
-	size = size + (mapsize * PAGE_SIZE);					/* Get the true size we need */
-
-	/* hash table must be aligned to its size */
-
-	addr = *first_avail;									/* Set the address to start allocations */
-	first_used_addr = addr;									/* Remember where we started */
-
-	bzero((char *)addr, size);								/* Clear everything that we are allocating */
-
- 	savearea_init(addr);									/* Initialize the savearea chains and data */
-
-	addr = (vm_offset_t)((unsigned int)addr + ((InitialSaveBloks + BackPocketSaveBloks) * PAGE_SIZE));	/* Point past saveareas */
-
-	trcWork.traceCurr = (unsigned int)addr;					/* Set first trace slot to use */
-	trcWork.traceStart = (unsigned int)addr;				/* Set start of trace table */
-	trcWork.traceEnd = (unsigned int)addr + trcWork.traceSize;		/* Set end of trace table */
-
-	addr = (vm_offset_t)trcWork.traceEnd;					/* Set next allocatable location */
-		
-	pmapTrans = (pmapTransTab *)addr;						/* Point to the pmap to hash translation table */
-		
-	pmapTrans[PPC_SID_KERNEL].pmapPAddr = (addr64_t)((uintptr_t)kernel_pmap);	/* Initialize the kernel pmap in the translate table */
-	pmapTrans[PPC_SID_KERNEL].pmapVAddr = CAST_DOWN(unsigned int, kernel_pmap);  /* Initialize the kernel pmap in the translate table */
-		
-	addr += ((((1 << maxAdrSpb) * sizeof(pmapTransTab)) + 4095) & -4096);	/* Point past pmap translate table */
-
-/*	NOTE: the phys_table must be within the first 2GB of physical RAM. This makes sure we only need to do 32-bit arithmetic */
-
-	phys_entry = (struct phys_entry *) addr;				/* Get pointer to physical table */
-
-	for (bank = 0; (unsigned)bank < pmap_mem_regions_count; bank++) {	/* Set pointer and initialize all banks of ram */
-		
-		pmap_mem_regions[bank].mrPhysTab = phys_entry;		/* Set pointer to the physical table for this bank */
-		
-		phys_entry = phys_entry + (pmap_mem_regions[bank].mrEnd - pmap_mem_regions[bank].mrStart + 1);	/* Point to the next */
-	}
-
-	addr += (((num * sizeof(struct phys_entry)) + 4095) & -4096);	/* Step on past the physical entries */
-	
-/*
- * 		Remaining space is for mapping entries.  Tell the initializer routine that
- * 		the mapping system can't release this block because it's permanently assigned
- */
-
-	mapping_init();											/* Initialize the mapping tables */
-
-	for(i = addr; i < first_used_addr + size; i += PAGE_SIZE) {	/* Add initial mapping blocks */
-		mapping_free_init(i, 1, 0);							/* Pass block address and say that this one is not releasable */
-	}
-	mapCtl.mapcmin = MAPPERBLOK;							/* Make sure we only adjust one at a time */
-
-	/* Map V=R the page tables */
-	pmap_map(first_used_addr, first_used_addr,
-		 round_page(first_used_addr + size), VM_PROT_READ | VM_PROT_WRITE, VM_WIMG_USE_DEFAULT);
-
-	*first_avail = round_page(first_used_addr + size);		/* Set next available page */
-	first_free_virt = *first_avail;							/* Ditto */
-	
-	/* For 64-bit machines, block map physical memory and the I/O hole into kernel space */
-	if(BootProcInfo.pf.Available & pf64Bit) {				/* Are we on a 64-bit machine? */
-		lowGlo.lgPMWvaddr = PHYS_MEM_WINDOW_VADDR;			/* Initialize the physical memory window's virtual address */
-
-		pmap_map_physical();								/* Block map physical memory into the window */
-		
-		pmap_map_iohole(IO_MEM_WINDOW_VADDR, IO_MEM_WINDOW_SIZE);
-															/* Block map the I/O hole */
-	}
-
-	/* All the rest of memory is free - add it to the free
-	 * regions so that it can be allocated by pmap_steal
-	 */
-
-	pmap_mem_regions[0].mrAStart = (*first_avail >> 12);	/* Set up the free area to start allocations (always in the first bank) */
-
-	current_free_region = 0;								/* Set that we will start allocating in bank 0 */
-	avail_remaining = 0;									/* Clear free page count */
-	for(bank = 0; (unsigned)bank < pmap_mem_regions_count; bank++) {	/* Total up all of the pages in the system that are available */
-		avail_remaining += (pmap_mem_regions[bank].mrAEnd - pmap_mem_regions[bank].mrAStart) + 1;	/* Add in allocatable pages in this bank */
-	}
-
-
-}
-
-/*
- * pmap_init(spa, epa)
- *	finishes the initialization of the pmap module.
- *	This procedure is called from vm_mem_init() in vm/vm_init.c
- *	to initialize any remaining data structures that the pmap module
- *	needs to map virtual memory (VM is already ON).
- *
- *	Note that the pmap needs to be sized and aligned to
- *	a power of two.  This is because it is used both in virtual and
- *	real so it can't span a page boundary.
- */
-
-void
-pmap_init(void)
-{
-
-	pmap_zone = zinit(pmapSize, 400 * pmapSize, 4096, "pmap");
-#if	ZONE_DEBUG
-	zone_debug_disable(pmap_zone);		/* Can't debug this one 'cause it messes with size and alignment */
-#endif	/* ZONE_DEBUG */
-
-	pmap_initialized = TRUE;
-
-	/*
-	 *	Initialize list of freed up pmaps
-	 */
-	free_pmap_list = NULL;					/* Set that there are no free pmaps */
-	free_pmap_count = 0;
-	simple_lock_init(&free_pmap_lock, 0);
-	
-}
-
-unsigned int pmap_free_pages(void)
-{
-	return avail_remaining;
-}
-
-/*
- *	This function allocates physical pages.
- */
-
-boolean_t
-pmap_next_page_hi(ppnum_t * pnum)
-{
-	return pmap_next_page(pnum);
-}
-
-
-/* Non-optimal, but only used for virtual memory startup.
- * Allocate memory from a table of free physical addresses
- * If there are no more free entries, too bad. 
- */
-
-boolean_t
-pmap_next_page(ppnum_t *addrp)
-{
-	unsigned int i;
-
-	if(current_free_region >= pmap_mem_regions_count) return FALSE;	/* Return failure if we have used everything... */
-
-	for(i = current_free_region; i < pmap_mem_regions_count; i++) {	/* Find the next bank with free pages */
-		if(pmap_mem_regions[i].mrAStart <= pmap_mem_regions[i].mrAEnd) break;	/* Found one */
-	}
-
-	current_free_region = i;										/* Set our current bank */
-	if(i >= pmap_mem_regions_count) return FALSE;					/* Couldn't find a free page */
-
-	*addrp = pmap_mem_regions[i].mrAStart;					/* Allocate the page */
-	pmap_mem_regions[i].mrAStart = pmap_mem_regions[i].mrAStart + 1;	/* Set the next one to go */
-	avail_remaining--;												/* Drop free count */
-
-	return TRUE;
-}
-
-void pmap_virtual_space(
-	vm_offset_t *startp,
-	vm_offset_t *endp)
-{
-	*startp = round_page(first_free_virt);
-	*endp   = vm_last_addr;
-}
-
-/*
- * pmap_create
- *
- * Create and return a physical map.
- *
- * If the size specified for the map is zero, the map is an actual physical
- * map, and may be referenced by the hardware.
- *
- * A pmap is either in the free list or in the in-use list.  The only use
- * of the in-use list (aside from debugging) is to handle the VSID wrap situation.
- * Whenever a new pmap is allocated (i.e., not recovered from the free list). The
- * in-use list is matched until a hole in the VSID sequence is found. (Note
- * that the in-use pmaps are queued in VSID sequence order.) This is all done
- * while free_pmap_lock is held.
- *
- * If the size specified is non-zero, the map will be used in software 
- * only, and is bounded by that size.
- */
-pmap_t
-pmap_create(vm_map_size_t size, __unused boolean_t is_64bit)
-{
-	pmap_t pmap, ckpmap, fore;
-	int s;
-	unsigned int currSID;
-	addr64_t physpmap;
-
-	/*
-	 * A software use-only map doesn't even need a pmap structure.
-	 */
-	if (size)
-		return(PMAP_NULL);
-
-	/* 
-	 * If there is a pmap in the pmap free list, reuse it. 
-	 * Note that we use free_pmap_list for all chaining of pmaps, both to
-	 * the free list and the in use chain (anchored from kernel_pmap).
-	 */
-	s = splhigh();
-	simple_lock(&free_pmap_lock);
-	
-	if(free_pmap_list) {							/* Any free? */
-		pmap = free_pmap_list;						/* Yes, allocate it */
-		free_pmap_list = (pmap_t)pmap->freepmap;	/* Dequeue this one (we chain free ones through freepmap) */
-		free_pmap_count--;
-	}
-	else {
-		simple_unlock(&free_pmap_lock);				/* Unlock just in case */
-		splx(s);
-
-		pmap = (pmap_t) zalloc(pmap_zone);			/* Get one */
-		if (pmap == PMAP_NULL) return(PMAP_NULL);	/* Handle out-of-memory condition */
-		
-		bzero((char *)pmap, pmapSize);				/* Clean up the pmap */
-		
-		s = splhigh();
-		simple_lock(&free_pmap_lock);				/* Lock it back up	*/
-		
-		ckpmap = cursor_pmap;						/* Get starting point for free ID search */
-		currSID = ckpmap->spaceNum;					/* Get the actual space ID number */
-
-		while(1) {									/* Keep trying until something happens */
-		
-			currSID = (currSID + 1) & (maxAdrSp - 1);	/* Get the next in the sequence */
-			if(((currSID * incrVSID) & (maxAdrSp - 1)) == invalSpace) continue;	/* Skip the space we have reserved */
-			ckpmap = (pmap_t)ckpmap->pmap_link.next;	/* On to the next in-use pmap */
-	
-			if(ckpmap->spaceNum != currSID) break;	/* If we are out of sequence, this is free */
-			
-			if(ckpmap == cursor_pmap) {				/* See if we have 2^20 already allocated */
-				panic("pmap_create: Maximum number (%d) active address spaces reached\n", maxAdrSp);	/* Die pig dog */
-			}
-		}
-
-		pmap->space = (currSID * incrVSID) & (maxAdrSp - 1);	/* Calculate the actual VSID */
-		pmap->spaceNum = currSID;					/* Set the space ID number */
-/*
- *		Now we link into the chain just before the out of sequence guy.
- */
-
-		fore = (pmap_t)ckpmap->pmap_link.prev;		/* Get the current's previous */
-		pmap->pmap_link.next = (queue_t)ckpmap;		/* My next points to the current */
-		fore->pmap_link.next = (queue_t)pmap;		/* Current's previous's next points to me */
-		pmap->pmap_link.prev = (queue_t)fore;		/* My prev points to what the current pointed to */
-		ckpmap->pmap_link.prev = (queue_t)pmap;		/* Current's prev points to me */
-		
-		physpmap = ((addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)pmap)) << 12) | (addr64_t)((unsigned int)pmap & 0xFFF);	/* Get the physical address of the pmap */
-		
-		pmap->pmapvr = (addr64_t)((uintptr_t)pmap) ^ physpmap;	/* Make V to R translation mask */
-		
-		pmapTrans[pmap->space].pmapPAddr = physpmap;	/* Set translate table physical to point to us */
-		pmapTrans[pmap->space].pmapVAddr = CAST_DOWN(unsigned int, pmap);	/* Set translate table virtual to point to us */
-	}
-
-	pmap->pmapVmmExt = NULL;						/* Clear VMM extension block vaddr */
-	pmap->pmapVmmExtPhys = 0;						/*  and the paddr, too */
-	pmap->pmapFlags = pmapKeyDef;					/* Set default key */
-	pmap->pmapCCtl = pmapCCtlVal;					/* Initialize cache control */
-	pmap->ref_count = 1;
-	pmap->stats.resident_count = 0;
-	pmap->stats.wired_count = 0;
-	pmap->pmapSCSubTag = 0x0000000000000000ULL;		/* Make sure this is clean an tidy */
-	simple_unlock(&free_pmap_lock);
-
-	splx(s);
-	return(pmap);
-}
-
-/* 
- * pmap_destroy
- * 
- * Gives up a reference to the specified pmap.  When the reference count 
- * reaches zero the pmap structure is added to the pmap free list.
- *
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_destroy(pmap_t pmap)
-{
-	uint32_t ref_count;
-	spl_t s;
-	pmap_t fore, aft;
-
-	if (pmap == PMAP_NULL)
-		return;
-
-	if ((ref_count = hw_atomic_sub(&pmap->ref_count, 1)) == UINT_MAX) /* underflow */
-		panic("pmap_destroy(): ref_count < 0");
-	
-	if (ref_count > 0)
-		return; /* Still more users, leave now... */
-
-	if (!(pmap->pmapFlags & pmapVMgsaa)) {					/* Don't try this for a shadow assist guest */
-		pmap_unmap_sharedpage(pmap);						/* Remove any mapping of page -1 */
-	}
-	
-#ifdef notdef
-	if(pmap->stats.resident_count != 0)
-		panic("PMAP_DESTROY: pmap not empty");
-#else
-	if(pmap->stats.resident_count != 0) {
-		pmap_remove(pmap, 0, 0xFFFFFFFFFFFFF000ULL);
-	}
-#endif
-
-	/* 
-	 * Add the pmap to the pmap free list. 
-	 */
-
-	s = splhigh();
-	/* 
-	 * Add the pmap to the pmap free list. 
-	 */
-	simple_lock(&free_pmap_lock);
-	
-	if (free_pmap_count <= free_pmap_max) {		/* Do we have enough spares? */
-		
-		pmap->freepmap = free_pmap_list;		/* Queue in front */
-		free_pmap_list = pmap;
-		free_pmap_count++;
-		simple_unlock(&free_pmap_lock);
-
-	} else {
-		if(cursor_pmap == pmap) cursor_pmap = (pmap_t)pmap->pmap_link.prev;	/* If we are releasing the cursor, back up */
-		fore = (pmap_t)pmap->pmap_link.prev;
-		aft  = (pmap_t)pmap->pmap_link.next;
-		fore->pmap_link.next = pmap->pmap_link.next;	/* My previous's next is my next */
-		aft->pmap_link.prev = pmap->pmap_link.prev;		/* My next's previous is my previous */	
-		simple_unlock(&free_pmap_lock);
-		pmapTrans[pmap->space].pmapPAddr = -1;			/* Invalidate the translate table physical */
-		pmapTrans[pmap->space].pmapVAddr = -1;			/* Invalidate the translate table virtual */
-		zfree(pmap_zone, pmap);
-	}
-	splx(s);
-}
-
-/*
- * pmap_reference(pmap)
- *	gains a reference to the specified pmap.
- */
-void
-pmap_reference(pmap_t pmap)
-{
-	if (pmap != PMAP_NULL)
-		(void)hw_atomic_add(&pmap->ref_count, 1); /* Bump the count */
-}
-
-/*
- * pmap_remove_some_phys
- *
- *	Removes mappings of the associated page from the specified pmap
- *
- */
-void pmap_remove_some_phys(
-	     pmap_t pmap,
-	     vm_offset_t pa)
-{
-	register struct phys_entry 	*pp;
-	register struct mapping 	*mp;
-	unsigned int pindex;
-
-	if (pmap == PMAP_NULL) {					/* This should never be called with a null pmap */
-		panic("pmap_remove_some_phys: null pmap\n");
-	}
-
-	pp = mapping_phys_lookup(pa, &pindex);		/* Get physical entry */
-	if (pp == 0) return;						/* Leave if not in physical RAM */
-
-	do {										/* Keep going until we toss all pages from this pmap */
-		if (pmap->pmapFlags & pmapVMhost) {
-			mp = hw_purge_phys(pp);				/* Toss a map */
-			switch ((unsigned int)mp & mapRetCode) {
-				case mapRtOK:
-					mapping_free(mp);			/* Return mapping to free inventory */
-					break;
-				case mapRtGuest:
-					break;						/* Don't try to return a guest mapping */
-				case mapRtEmpty:
-					break;						/* Physent chain empty, we're done */
-				case mapRtNotFnd:				
-					break;						/* Mapping disappeared on us, retry */	
-				default:
-					panic("pmap_remove_some_phys: hw_purge_phys failed - pp = %p, pmap = %p, code = %p\n",
-							pp, pmap, mp);		/* Handle failure with our usual lack of tact */
-			}
-		} else { 
-			mp = hw_purge_space(pp, pmap);		/* Toss a map */
-			switch ((unsigned int)mp & mapRetCode) {
-				case mapRtOK:
-					mapping_free(mp);			/* Return mapping to free inventory */
-					break;
-				case mapRtEmpty:
-					break;						/* Physent chain empty, we're done */
-				case mapRtNotFnd:				
-					break;						/* Mapping disappeared on us, retry */	
-				default:
-					panic("pmap_remove_some_phys: hw_purge_phys failed - pp = %p, pmap = %p, code = %p\n",
-							pp, pmap, mp);		/* Handle failure with our usual lack of tact */
-			}
-		}
-	} while (mapRtEmpty != ((unsigned int)mp & mapRetCode));
-
-#if DEBUG	
-	if ((pmap->pmapFlags & pmapVMhost) && !pmap_verify_free(pa)) 
-		panic("pmap_remove_some_phys: cruft left behind - pa = %08X, pmap = %p\n", pa, pmap);
-#endif
-
-	return;										/* Leave... */
-}
-
-/*
- * pmap_remove(pmap, s, e)
- *	unmaps all virtual addresses v in the virtual address
- *	range determined by [s, e) and pmap.
- *	s and e must be on machine independent page boundaries and
- *	s must be less than or equal to e.
- *
- *	Note that pmap_remove does not remove any mappings in nested pmaps. We just 
- *	skip those segments.
- */
-void
-pmap_remove(
-	    pmap_t pmap,
-	    addr64_t sva,
-	    addr64_t eva)
-{
-	addr64_t		va, endva;
-
-	if (pmap == PMAP_NULL) return;					/* Leave if software pmap */
-
-
-	/* It is just possible that eva might have wrapped around to zero,
-	 * and sometimes we get asked to liberate something of size zero
-	 * even though it's dumb (eg. after zero length read_overwrites)
-	 */
-	assert(eva >= sva);
-
-	/* If these are not page aligned the loop might not terminate */
-	assert((sva == trunc_page_64(sva)) && (eva == trunc_page_64(eva)));
-
-	va = sva & -4096LL;							/* Round start down to a page */
-	endva = eva & -4096LL;						/* Round end down to a page */
-
-	while(1) {									/* Go until we finish the range */
-		va = mapping_remove(pmap, va);			/* Remove the mapping and see what's next */
-		va = va & -4096LL;						/* Make sure the "not found" indication is clear */
-		if((va == 0) || (va >= endva)) break;	/* End loop if we finish range or run off the end */
-	}
-
-}
-
-/*
- *	Routine:
- *		pmap_page_protect
- *
- *	Function:
- *		Lower the permission for all mappings to a given page.
- */
-void
-pmap_page_protect(
-	ppnum_t pa,
-	vm_prot_t prot)
-{
-	register struct phys_entry 	*pp;
-	boolean_t 			remove;
-	unsigned int		pindex;
-	mapping_t			*mp;
-
-
-	switch (prot & VM_PROT_ALL) {
-		case VM_PROT_READ:
-		case VM_PROT_READ|VM_PROT_EXECUTE:
-			remove = FALSE;
-			break;
-		case VM_PROT_ALL:
-			return;
-		default:
-			remove = TRUE;
-			break;
-	}
-
-
-	pp = mapping_phys_lookup(pa, &pindex);		/* Get physical entry */
-	if (pp == 0) return;						/* Leave if not in physical RAM */
-
-	if (remove) {								/* If the protection was set to none, we'll remove all mappings */
-		
-		do {									/* Keep going until we toss all pages from this physical page */
-			mp = hw_purge_phys(pp);				/* Toss a map */
-			switch ((unsigned int)mp & mapRetCode) {
-				case mapRtOK:
-							mapping_free(mp);	/* Return mapping to free inventory */
-							break;
-				case mapRtGuest:
-							break;				/* Don't try to return a guest mapping */
-				case mapRtNotFnd:
-							break;				/* Mapping disappeared on us, retry */
-				case mapRtEmpty:
-							break;				/* Physent chain empty, we're done */
-				default:	panic("pmap_page_protect: hw_purge_phys failed - pp = %p, code = %p\n",
-								  pp, mp);		/* Handle failure with our usual lack of tact */
-			}
-		} while (mapRtEmpty != ((unsigned int)mp & mapRetCode));
-
-#if DEBUG
-		if (!pmap_verify_free(pa)) 
-			panic("pmap_page_protect: cruft left behind - pa = %08X\n", pa);
-#endif
-
-		return;									/* Leave... */
-	}
-
-/*	When we get here, it means that we are to change the protection for a 
- *	physical page.  
- */
- 
-	mapping_protect_phys(pa, (prot & VM_PROT_ALL) );		/* Change protection of all mappings to page. */
-
-}
-
-/*
- *	Routine:
- *		pmap_disconnect
- *
- *	Function:
- *		Disconnect all mappings for this page and return reference and change status
- *		in generic format.
- *
- */
-unsigned int pmap_disconnect(
-	ppnum_t pa)
-{
-	register struct phys_entry *pp;
-	unsigned int				pindex;
-	mapping_t				   *mp;
-	
-	pp = mapping_phys_lookup(pa, &pindex);		/* Get physical entry */
-	if (pp == 0) return (0);					/* Return null ref and chg if not in physical RAM */
-	do {										/* Iterate until all mappings are dead and gone */
-		mp = hw_purge_phys(pp);					/* Disconnect a mapping */
-		if (!mp) break;							/* All mappings are gone, leave the loop */
-		switch ((unsigned int)mp & mapRetCode) {
-			case mapRtOK:
-						mapping_free(mp);		/* Return mapping to free inventory */
-						break;
-			case mapRtGuest:
-						break;					/* Don't try to return a guest mapping */
-			case mapRtNotFnd:
-						break;					/* Mapping disappeared on us, retry */
-			case mapRtEmpty:
-						break;					/* Physent chain empty, we're done */
-			default:	panic("hw_purge_phys: hw_purge_phys failed - pp = %p, code = %p\n",
-							  pp, mp);			/* Handle failure with our usual lack of tact */
-		}
-	} while (mapRtEmpty != ((unsigned int)mp & mapRetCode));
-
-#if DEBUG
-	if (!pmap_verify_free(pa)) 
-		panic("pmap_disconnect: cruft left behind - pa = %08X\n", pa);
-#endif
-
-	return (mapping_tst_refmod(pa));			/* Return page ref and chg in generic format */
-}
-
-
-boolean_t
-pmap_is_noencrypt(__unused ppnum_t pn)
-{
-	return (FALSE);
-}
-
-void
-pmap_set_noencrypt(__unused ppnum_t pn)
-{
-}
-
-void
-pmap_clear_noencrypt(__unused ppnum_t pn)
-{
-}
-
-
-/*
- * pmap_protect(pmap, s, e, prot)
- *	changes the protection on all virtual addresses v in the 
- *	virtual address range determined by [s, e] and pmap to prot.
- *	s and e must be on machine independent page boundaries and
- *	s must be less than or equal to e.
- *
- *	Note that any requests to change the protection of a nested pmap are
- *	ignored. Those changes MUST be done by calling this with the correct pmap.
- */
-void pmap_protect(
-	     pmap_t pmap,
-	     vm_map_offset_t sva, 
-	     vm_map_offset_t eva,
-	     vm_prot_t prot)
-{
-
-	addr64_t va, endva;
-
-	if (pmap == PMAP_NULL) return;				/* Do nothing if no pmap */
-
-	if (prot == VM_PROT_NONE) {					/* Should we kill the address range?? */
-		pmap_remove(pmap, (addr64_t)sva, (addr64_t)eva);	/* Yeah, dump 'em */
-		return;									/* Leave... */
-	}
-
-	va = sva & -4096LL;							/* Round start down to a page */
-	endva = eva & -4096LL;						/* Round end down to a page */
-
-	while(1) {									/* Go until we finish the range */
-		mapping_protect(pmap, va, (prot & VM_PROT_ALL), &va);	/* Change the protection and see what's next */
-		if((va == 0) || (va >= endva)) break;	/* End loop if we finish range or run off the end */
-	}
-
-}
-
-
-
-/*
- * pmap_enter
- *
- * Create a translation for the virtual address (virt) to the physical
- * address (phys) in the pmap with the protection requested. If the
- * translation is wired then we can not allow a full page fault, i.e., 
- * the mapping control block is not eligible to be stolen in a low memory
- * condition.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- *     or lose information.  That is, this routine must actually
- *     insert this page into the given map NOW.
- */
-void
-pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, 
-		unsigned int flags, __unused boolean_t wired)
-{
-	unsigned int		mflags;
-	addr64_t			colva;
-	
-	if (pmap == PMAP_NULL) return;					/* Leave if software pmap */
-
-	mflags = 0;										/* Make sure this is initialized to nothing special */
-	if(!(flags & VM_WIMG_USE_DEFAULT)) {			/* Are they supplying the attributes? */
-		mflags = mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-	}
-	
-/*
- *	It is possible to hang here if another processor is remapping any pages we collide with and are removing
- */ 
-
-	while(1) {										/* Keep trying the enter until it goes in */
-	
-		colva = mapping_make(pmap, va, pa, mflags, 1, (prot & VM_PROT_ALL) );		/* Enter the mapping into the pmap */
-		
-		if(!colva) break;							/* If there were no collisions, we are done... */
-		
-		mapping_remove(pmap, colva);				/* Remove the mapping that collided */
-	}
-}
-
-/*
- *		Enters translations for odd-sized V=F blocks.
- *
- *		The higher level VM map should be locked to insure that we don't have a
- *		double diddle here.
- *
- *		We panic if we get a block that overlaps with another. We do not merge adjacent
- *		blocks because removing any address within a block removes the entire block and if
- *		would really mess things up if we trashed too much.
- *
- *		Once a block is mapped, it is unmutable, that is, protection, catch mode, etc. can
- *		not be changed.  The block must be unmapped and then remapped with the new stuff.
- *		We also do not keep track of reference or change flags.
- *
- *		Any block that is larger than 256MB must be a multiple of 32MB.  We panic if it is not.
- *
- *		Note that pmap_map_block_rc is the same but doesn't panic if collision.
- *
- */
- 
-void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) {	/* Map an autogenned block */
-
-	unsigned int		mflags;
-	addr64_t			colva;
-
-	
-	if (pmap == PMAP_NULL) {						/* Did they give us a pmap? */
-		panic("pmap_map_block: null pmap\n");		/* No, like that's dumb... */
-	}
-
-//	kprintf("pmap_map_block: (%08X) va = %016llX, pa = %08X, size = %08X, prot = %08X, attr = %08X, flags = %08X\n", 	/* (BRINGUP) */
-//		current_thread(), va, pa, size, prot, attr, flags);	/* (BRINGUP) */
-
-	mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-	if(flags) mflags |= mmFlgPerm;					/* Mark permanent if requested */
-	
-	colva = mapping_make(pmap, va, pa, mflags, size, prot);	/* Enter the mapping into the pmap */
-	
-	if(colva) {										/* If there was a collision, panic */
-		panic("pmap_map_block: mapping error %d, pmap = %p, va = %016llX\n", (uint32_t)(colva & mapRetCode), pmap, va);
-	}
-	
-	return;											/* Return */
-}
-
-int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) {	/* Map an autogenned block */
-
-	unsigned int		mflags;
-	addr64_t			colva;
-
-	
-	if (pmap == PMAP_NULL) {						/* Did they give us a pmap? */
-		panic("pmap_map_block_rc: null pmap\n");	/* No, like that's dumb... */
-	}
-
-	mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1);	/* Convert to our mapping_make flags */
-	if(flags) mflags |= mmFlgPerm;					/* Mark permanent if requested */
-
-	colva = mapping_make(pmap, va, pa, mflags, size, prot);	/* Enter the mapping into the pmap */
-	
-	if(colva) return 0;								/* If there was a collision, fail */
-	
-	return 1;										/* Return true of we worked */
-}
-
-/*
- * pmap_extract(pmap, va)
- *	returns the physical address corrsponding to the 
- *	virtual address specified by pmap and va if the
- *	virtual address is mapped and 0 if it is not.
- *	Note: we assume nothing is ever mapped to phys 0.
- *
- *	NOTE: This call always will fail for physical addresses greater than 0xFFFFF000.
- */
-vm_offset_t pmap_extract(pmap_t pmap, vm_map_offset_t va) {
-
-	spl_t					spl;
-	register struct mapping	*mp;
-	register vm_offset_t	pa;
-	addr64_t				nextva;
-	ppnum_t					ppoffset;
-	unsigned int			gva;
-
-#ifdef BOGUSCOMPAT
-	panic("pmap_extract: THIS CALL IS BOGUS. NEVER USE IT EVER. So there...\n");	/* Don't use this */
-#else
-
-	gva = (unsigned int)va;							/* Make sure we don't have a sign */
-
-	spl = splhigh();								/* We can't allow any loss of control here */
-	
-	mp = mapping_find(pmap, (addr64_t)gva, &nextva,1);	/* Find the mapping for this address */
-	
-	if(!mp) {										/* Is the page mapped? */
-		splx(spl);									/* Enable interrupts */
-		return 0;									/* Pass back 0 if not found */
-	}
-
-	ppoffset = (ppnum_t)(((gva & -4096LL) - (mp->mpVAddr & -4096LL)) >> 12);	/* Get offset from va to base va */
-	
-	
-	pa = mp->mpPAddr + ppoffset;					/* Remember ppage because mapping may vanish after drop call */
-			
-	mapping_drop_busy(mp);							/* We have everything we need from the mapping */
-	splx(spl);										/* Restore 'rupts */
-
-	if(pa > maxPPage32) return 0;					/* Force large addresses to fail */
-	
-	pa = (pa << 12) | (va & 0xFFF);					/* Convert physical page number to address */
-	
-#endif
-	return pa;										/* Return physical address or 0 */
-}
-
-/*
- * ppnum_t pmap_find_phys(pmap, addr64_t va)
- *	returns the physical page corrsponding to the 
- *	virtual address specified by pmap and va if the
- *	virtual address is mapped and 0 if it is not.
- *	Note: we assume nothing is ever mapped to phys 0.
- *
- */
-ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) {
-
-	spl_t					spl;
-	register struct mapping	*mp;
-	ppnum_t					pa, ppoffset;
-	addr64_t				nextva;
-
-	spl = splhigh();								/* We can't allow any loss of control here */
-	
-	mp = mapping_find(pmap, va, &nextva, 1);		/* Find the mapping for this address */
-	
-	if(!mp) {										/* Is the page mapped? */
-		splx(spl);									/* Enable interrupts */
-		return 0;									/* Pass back 0 if not found */
-	}
-		
-	
-	ppoffset = (ppnum_t)(((va & -4096LL) - (mp->mpVAddr & -4096LL)) >> 12);	/* Get offset from va to base va */
-	
-	pa = mp->mpPAddr + ppoffset;					/* Get the actual physical address */
-
-	mapping_drop_busy(mp);							/* We have everything we need from the mapping */
-
-	splx(spl);										/* Restore 'rupts */
-	return pa;										/* Return physical address or 0 */
-}
-
-
-/*
- *	pmap_attributes:
- *
- *	Set/Get special memory attributes; not implemented.
- *
- *	Note: 'VAL_GET_INFO' is used to return info about a page.
- *	  If less than 1 page is specified, return the physical page
- *	  mapping and a count of the number of mappings to that page.
- *	  If more than one page is specified, return the number
- *	  of resident pages and the number of shared (more than
- *	  one mapping) pages in the range;
- *
- *
- */
-kern_return_t
-pmap_attribute(
-	__unused pmap_t				pmap,
-	__unused vm_map_offset_t		address,
-	__unused vm_map_size_t			size,
-	__unused vm_machine_attribute_t		attribute,
-	__unused vm_machine_attribute_val_t*	value)	
-{
-	
-	return KERN_INVALID_ARGUMENT;
-
-}
-
-
-
-unsigned int pmap_cache_attributes(ppnum_t pgn) {
-
-        unsigned int	flags;
-	struct phys_entry * pp;
-
-	// Find physical address
-	if ((pp = pmap_find_physentry(pgn))) {
-	        // Use physical attributes as default
-	        // NOTE: DEVICE_PAGER_FLAGS are made to line up
-	        flags = VM_MEM_COHERENT;				/* We only support coherent memory */
-		if (pp->ppLink & ppG) flags |= VM_MEM_GUARDED;		/* Add in guarded if it is */
-		if (pp->ppLink & ppI) flags |= VM_MEM_NOT_CACHEABLE;	/* Add in cache inhibited if so */
-	} else
-	        // If no physical, just hard code attributes
-	        flags = VM_WIMG_IO;
-
-	return (flags);
-}
-
-
-
-/*
- * pmap_attribute_cache_sync(vm_offset_t pa)
- * 
- * Invalidates all of the instruction cache on a physical page and
- * pushes any dirty data from the data cache for the same physical page
- */
- 
-kern_return_t pmap_attribute_cache_sync(ppnum_t pp, vm_size_t size,
-				__unused vm_machine_attribute_t  attribute,
-				__unused vm_machine_attribute_val_t* value) {
-	
-	spl_t s;
-	unsigned int i, npages;
-	
-	npages = round_page(size) >> 12;			/* Get the number of pages to do */
-	
-	for(i = 0; i < npages; i++) {				/* Do all requested pages */
-		s = splhigh();							/* No interruptions here */
-		sync_ppage(pp + i);						/* Go flush data cache and invalidate icache */
-		splx(s);								/* Allow interruptions */
-	}
-	
-	return KERN_SUCCESS;
-}
-
-/*
- * pmap_sync_page_data_phys(ppnum_t pa)
- * 
- * Invalidates all of the instruction cache on a physical page and
- * pushes any dirty data from the data cache for the same physical page
- */
- 
-void pmap_sync_page_data_phys(ppnum_t pa) {
-	
-	spl_t s;
-	
-	s = splhigh();								/* No interruptions here */
-	sync_ppage(pa);								/* Sync up dem caches */
-	splx(s);									/* Allow interruptions */
-	return;
-}
-
-void
-pmap_sync_page_attributes_phys(ppnum_t pa)
-{
-	pmap_sync_page_data_phys(pa);
-}
-
-#ifdef CURRENTLY_UNUSED_AND_UNTESTED
-/*
- * pmap_collect
- * 
- * Garbage collects the physical map system for pages that are no longer used.
- * It isn't implemented or needed or wanted.
- */
-void
-pmap_collect(__unused pmap_t pmap)
-{
-	return;
-}
-#endif
-
-/*
- *	Routine:	pmap_activate
- *	Function:
- *		Binds the given physical map to the given
- *		processor, and returns a hardware map description.
- *		It isn't implemented or needed or wanted.
- */
-void
-pmap_activate(
-	__unused pmap_t pmap,
-	__unused thread_t th,
-	__unused int which_cpu)
-{
-	return;
-}
-/*
- * pmap_deactivate:
- * It isn't implemented or needed or wanted.
- */
-void
-pmap_deactivate(
-	__unused pmap_t pmap,
-	__unused thread_t th,
-	__unused int which_cpu)
-{
-	return;
-}
-
-
-/*
- * pmap_pageable(pmap, s, e, pageable)
- *	Make the specified pages (by pmap, offset)
- *	pageable (or not) as requested.
- *
- *	A page which is not pageable may not take
- *	a fault; therefore, its page table entry
- *	must remain valid for the duration.
- *
- *	This routine is merely advisory; pmap_enter()
- *	will specify that these pages are to be wired
- *	down (or not) as appropriate.
- *
- *	(called from vm/vm_fault.c).
- */
-void
-pmap_pageable(
-	__unused pmap_t				pmap,
-	__unused vm_map_offset_t	start,
-	__unused vm_map_offset_t	end,
-	__unused boolean_t			pageable)
-{
-
-	return;												/* This is not used... */
-
-}
-/*
- *	Routine:	pmap_change_wiring
- *	NOT USED ANYMORE.
- */
-void
-pmap_change_wiring(
-	__unused pmap_t				pmap,
-	__unused vm_map_offset_t	va,
-	__unused boolean_t			wired)
-{
-	return;												/* This is not used... */
-}
-
-/*
- * pmap_clear_modify(phys)
- *	clears the hardware modified ("dirty") bit for one
- *	machine independant page starting at the given
- *	physical address.  phys must be aligned on a machine
- *	independant page boundary.
- */
-void
-pmap_clear_modify(ppnum_t pa)
-{
-
-	mapping_clr_mod(pa);				/* Clear all change bits for physical page */
-
-}
-
-/*
- * pmap_is_modified(phys)
- *	returns TRUE if the given physical page has been modified 
- *	since the last call to pmap_clear_modify().
- */
-boolean_t
-pmap_is_modified(register ppnum_t pa)
-{
-	return mapping_tst_mod(pa);	/* Check for modified */
-	
-}
-
-/*
- * pmap_clear_reference(phys)
- *	clears the hardware referenced bit in the given machine
- *	independant physical page.  
- *
- */
-void
-pmap_clear_reference(ppnum_t pa)
-{
-	mapping_clr_ref(pa);			/* Check for modified */
-}
-
-/*
- * pmap_is_referenced(phys)
- *	returns TRUE if the given physical page has been referenced 
- *	since the last call to pmap_clear_reference().
- */
-boolean_t
-pmap_is_referenced(ppnum_t pa)
-{
-	return mapping_tst_ref(pa);	/* Check for referenced */
-}
-
-/*
- * pmap_get_refmod(phys)
- *  returns the referenced and modified bits of the specified
- *  physical page.
- */
-unsigned int
-pmap_get_refmod(ppnum_t pa)
-{
-	return (mapping_tst_refmod(pa));
-}
-
-/*
- * pmap_clear_refmod(phys, mask)
- *  clears the referenced and modified bits as specified by the mask
- *  of the specified physical page.
- */
-void
-pmap_clear_refmod(ppnum_t pa, unsigned int mask)
-{
-	mapping_clr_refmod(pa, mask);
-}
-
-/*
- * pmap_eligible_for_execute(ppnum_t pa)
- *	return true if physical address is eligible to contain executable code;
- *  otherwise, return false
- */
-boolean_t
-pmap_eligible_for_execute(ppnum_t pa)
-{
-	phys_entry_t *physent;
-	unsigned int  pindex;
-
-	physent = mapping_phys_lookup(pa, &pindex);				/* Get physical entry */
-
-	if((!physent) || (physent->ppLink & ppG))
-		return 0;											/* If there is no physical entry or marked guarded,
-		                                                       the entry is not eligible for execute */
-
-	return 1;												/* Otherwise, entry is eligible for execute */
-}
-
-#if	MACH_VM_DEBUG
-int
-pmap_list_resident_pages(
-	__unused pmap_t		pmap,
-	__unused vm_offset_t	*listp,
-	__unused int		space)
-{
-	return 0;
-}
-#endif	/* MACH_VM_DEBUG */
-
-/*
- * Locking:
- *	spl: VM
- */
-void
-pmap_copy_part_page(
-	vm_offset_t	src,
-	vm_offset_t	src_offset,
-	vm_offset_t	dst,
-	vm_offset_t	dst_offset,
-	vm_size_t	len)
-{
-	addr64_t fsrc, fdst;
-
-	assert((((dst << 12) & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE);
-	assert((((src << 12) & PAGE_MASK) + src_offset + len) <= PAGE_SIZE);
-
-	fsrc = ((addr64_t)src << 12) + src_offset;
-	fdst = ((addr64_t)dst << 12) + dst_offset;
-
-	phys_copy(fsrc, fdst, len);								/* Copy the stuff physically */
-}
-
-void
-pmap_zero_part_page(
-	__unused vm_offset_t		p,
-	__unused vm_offset_t    offset,
-	__unused vm_size_t      len)
-{
-    panic("pmap_zero_part_page");
-}
-
-boolean_t pmap_verify_free(ppnum_t pa) {
-
-	struct phys_entry	*pp;
-	unsigned int pindex;
-
-	pp = mapping_phys_lookup(pa, &pindex);	/* Get physical entry */
-	if (pp == 0) return FALSE;					/* If there isn't one, show no mapping... */
-
-	if(pp->ppLink & ~(ppLock | ppFlags)) return FALSE;	/* We have at least one mapping */
-	return TRUE;								/* No mappings */
-}
-
-
-/* Determine if we need to switch space and set up for it if so */
-
-void pmap_switch(pmap_t map)
-{
-	hw_blow_seg(lowGlo.lgUMWvaddr);					/* Blow off the first segment */
-	hw_blow_seg(lowGlo.lgUMWvaddr + 0x10000000ULL);	/* Blow off the second segment */
-
-/* when changing to kernel space, don't bother
- * doing anything, the kernel is mapped from here already.
- */
-	if (map->space == PPC_SID_KERNEL) {			/* Are we switching into kernel space? */
-		return;									/* If so, we don't do anything... */
-	}
-	
-	hw_set_user_space(map);						/* Indicate if we need to load the SRs or not */
-	return;										/* Bye, bye, butterfly... */
-}
-
-
-/*
- * The PPC pmap can only nest segments of 256MB, aligned on a 256MB boundary.
- */
-uint64_t pmap_nesting_size_min = 0x10000000ULL;
-uint64_t pmap_nesting_size_max = 0x10000000ULL;
-
-/*
- *	kern_return_t pmap_nest(grand, subord, vstart, size)
- *
- *	grand  = the pmap that we will nest subord into
- *	subord = the pmap that goes into the grand
- *	vstart  = start of range in pmap to be inserted
- *	nstart  = start of range in pmap nested pmap
- *	size   = Size of nest area (up to 2TB)
- *
- *	Inserts a pmap into another.  This is used to implement shared segments.
- *	On the current PPC processors, this is limited to segment (256MB) aligned
- *	segment sized ranges.
- *
- *	We actually kinda allow recursive nests.  The gating factor is that we do not allow 
- *	nesting on top of something that is already mapped, i.e., the range must be empty.
- *
- *	Note that we depend upon higher level VM locks to insure that things don't change while
- *	we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
- *	or do 2 nests at once.
- */
-
-kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) {
-		
-	addr64_t vend, colladdr;
-	unsigned int msize;
-	int nlists;
-	mapping_t *mp;
-	
-	if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;	/* We can only do this for multiples of 256MB */
-	if((size >> 25) > 65536)  return KERN_INVALID_VALUE;	/* Max size we can nest is 2TB */
-	if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;	/* We can only do this aligned to 256MB */
-	if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;	/* We can only do this aligned to 256MB */
-	
-	if(size == 0) {								/*	Is the size valid? */
-		panic("pmap_nest: size is invalid - %016llX\n", size);
-	}
-	
-	msize = (size >> 25) - 1;							/* Change size to blocks of 32MB */
-	
-	nlists = mapSetLists(grand);						/* Set number of lists this will be on */
-
-	mp = mapping_alloc(nlists);							/* Get a spare mapping block */
-	
-	mp->mpFlags = 0x01000000 | mpNest | mpPerm | mpBSu | nlists;	/* Make this a permanent nested pmap with a 32MB basic size unit */
-														/* Set the flags. Make sure busy count is 1 */
-	mp->mpSpace = subord->space;						/* Set the address space/pmap lookup ID */
-	mp->u.mpBSize = msize;								/* Set the size */
-	mp->mpPte = 0;										/* Set the PTE invalid */
-	mp->mpPAddr = 0;									/* Set the physical page number */
-	mp->mpVAddr = vstart;								/* Set the address */
-	mp->mpNestReloc = nstart - vstart;					/* Set grand to nested vaddr relocation value */
-	
-	colladdr = hw_add_map(grand, mp);					/* Go add the mapping to the pmap */
-	
-	if(colladdr) {										/* Did it collide? */
-		vend = vstart + size - 4096;					/* Point to the last page we would cover in nest */	
-		panic("pmap_nest: attempt to nest into a non-empty range - pmap = %p, start = %016llX, end = %016llX\n",
-			grand, vstart, vend);
-	}
-	
-	return KERN_SUCCESS;
-}
-
-/*
- *	kern_return_t pmap_unnest(grand, vaddr, size)
- *
- *	grand  = the pmap that we will nest subord into
- *	vaddr  = start of range in pmap to be unnested
- *	size   = size of range in pmap to be unnested
- *
- *	Removes a pmap from another.  This is used to implement shared segments.
- *	On the current PPC processors, this is limited to segment (256MB) aligned
- *	segment sized ranges.
- */
-
-kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
-			
-	unsigned int tstamp, i, mycpu;
-	addr64_t nextva;
-	spl_t s;
-	mapping_t *mp;
-		
-	if (size != pmap_nesting_size_min ||
-	    (vaddr & (pmap_nesting_size_min-1))) {
-		panic("pmap_unnest(vaddr=0x%016llx, size=0x016%llx): "
-		      "must be 256MB and aligned\n",
-		      vaddr, size);
-	}
-
-	s = splhigh();										/* Make sure interruptions are disabled */
-
-	mp = mapping_find(grand, vaddr, &nextva, 0);		/* Find the nested map */
-
-	if(((unsigned int)mp & mapRetCode) != mapRtOK) {	/* See if it was even nested */
-		panic("pmap_unnest: Attempt to unnest an unnested segment - va = %016llX\n", vaddr);
-	}
-
-	if((mp->mpFlags & mpType) != mpNest) {				/* Did we find something other than a nest? */
-		panic("pmap_unnest: Attempt to unnest something that is not a nest - va = %016llX\n", vaddr);
-	}
-	
-	if(mp->mpVAddr != vaddr) {							/* Make sure the address is the same */
-		panic("pmap_unnest: Attempt to unnest something that is not at start of nest - va = %016llX\n", vaddr);
-	}
-
-	hw_atomic_and_noret(&mp->mpFlags, ~mpPerm);			/* Show that this mapping is now removable */
-	
-	mapping_drop_busy(mp);								/* Go ahead and release the mapping now */
-
-	splx(s);											/* Restore 'rupts */
-		
-	(void)mapping_remove(grand, vaddr);					/* Toss the nested pmap mapping */
-	
-	invalidateSegs(grand);								/* Invalidate the pmap segment cache */
-	
-/*
- *	Note that the following will force the segment registers to be reloaded 
- *	on all processors (if they are using the pmap we just changed) before returning.
- *
- *	This is needed.  The reason is that until the segment register is 
- *	reloaded, another thread in the same task on a different processor will
- *	be able to access memory that it isn't allowed to anymore.  That can happen
- *	because access to the subordinate pmap is being removed, but the pmap is still
- *	valid.
- *
- *	Note that we only kick the other processor if we see that it was using the pmap while we
- *	were changing it.
- */
-
-
-	for(i=0; i < real_ncpus; i++) {						/* Cycle through processors */
-		disable_preemption();
-		mycpu = cpu_number();								/* Who am I? Am I just a dream? */
-		if((unsigned int)grand == PerProcTable[i].ppe_vaddr->ppUserPmapVirt) {	/* Is this guy using the changed pmap? */
-			
-			PerProcTable[i].ppe_vaddr->ppInvSeg = 1;	/* Show that we need to invalidate the segments */
-			
-			if(i != mycpu) {
-		
-				tstamp = PerProcTable[i].ppe_vaddr->ruptStamp[1];		/* Save the processor's last interrupt time stamp */
-				if(cpu_signal(i, SIGPcpureq, CPRQsegload, 0) == KERN_SUCCESS) {	/* Make sure we see the pmap change */
-					if(!hw_cpu_wcng(&PerProcTable[i].ppe_vaddr->ruptStamp[1], tstamp, LockTimeOut)) {	/* Wait for the other processors to enter debug */
-						panic("pmap_unnest: Other processor (%d) did not see interruption request\n", i);
-					}
-				}
-			}
-		}
-		enable_preemption();
-	}
-
-	return KERN_SUCCESS;								/* Bye, bye, butterfly... */
-}
-
-boolean_t pmap_adjust_unnest_parameters(__unused pmap_t p, __unused vm_map_offset_t *s, __unused vm_map_offset_t *e) {
-	return FALSE; /* Not implemented on PowerPC */
-}
-
-/*
- *	void MapUserMemoryWindowInit(void)
- *
- *	Initialize anything we need to in order to map user address space slices into
- *	the kernel.  Primarily used for copy in/out.
- *
- *	Currently we only support one 512MB slot for this purpose.  There are two special
- *	mappings defined for the purpose: the special pmap nest, and linkage mapping.
- *
- *	The special pmap nest (which is allocated in this function) is used as a place holder
- *	in the kernel's pmap search list. It is 512MB long and covers the address range
- *	starting at lgUMWvaddr.  It points to no actual memory and when the fault handler 
- *	hits in it, it knows to look in the per_proc and start using the linkage
- *	mapping contained therin.
- *
- *	The linkage mapping is used to glue the user address space slice into the 
- *	kernel.  It contains the relocation information used to transform the faulting
- *	kernel address into the user address space.  It also provides the link to the
- *	user's pmap.  This is pointed to by the per_proc and is switched in and out
- *	whenever there is a context switch.
- *
- */
-
-void MapUserMemoryWindowInit(void) {
-		
-	addr64_t colladdr;
-	int nlists;
-	mapping_t *mp;
-	
-	nlists = mapSetLists(kernel_pmap);					/* Set number of lists this will be on */
-	
-	mp = mapping_alloc(nlists);							/* Get a spare mapping block */
-
-	mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | mpBSu | nlists;	/* Make this a permanent nested pmap with a 32MB basic size unit */
-														/* Set the flags. Make sure busy count is 1 */
-	mp->mpSpace = kernel_pmap->space;					/* Set the address space/pmap lookup ID */
-	mp->u.mpBSize = 15;									/* Set the size to 2 segments in 32MB chunks - 1 */
-	mp->mpPte = 0;										/* Means nothing */
-	mp->mpPAddr = 0;									/* Means nothing */
-	mp->mpVAddr = lowGlo.lgUMWvaddr;					/* Set the address range we cover */
-	mp->mpNestReloc = 0;								/* Means nothing */
-	
-	colladdr = hw_add_map(kernel_pmap, mp);				/* Go add the mapping to the pmap */
-	
-	if(colladdr) {										/* Did it collide? */
-		panic("MapUserMemoryWindowInit: MapUserMemoryWindow range already mapped\n");
-	}
-	
-	return;
-}
-
-/*
- *	addr64_t MapUserMemoryWindow(vm_map_t map, vm_offset_t va, size)
- *
- *	map  = the vm_map that we are mapping into the kernel
- *	va = start of the address range we are mapping
- *	Note that we do not test validty, we chose to trust our fellows...
- *
- *	Maps a 512M slice of a user address space into a predefined kernel range
- *	on a per-thread basis. We map only the first 256M segment, allowing the
- *  second 256M segment to fault in as needed. This allows our clients to access
- *  an arbitrarily aligned operand up to 256M in size.
- *
- *  In the future, the restriction of a predefined range may be loosened.
- *
- *	Builds the proper linkage map to map the user range
- *  We will round this down to the previous segment boundary and calculate
- *	the relocation to the kernel slot
- *
- *	We always make a segment table entry here if we need to.  This is mainly because of
- *	copyin/out and if we don't, there will be multiple segment faults for
- *	each system call.  I have seen upwards of 30000 per second.
- *
- *	We do check, however, to see if the slice is already mapped and if so,
- *	we just exit.  This is done for performance reasons.  It was found that 
- *	there was a considerable boost in copyin/out performance if we did not
- *	invalidate the segment at ReleaseUserAddressSpace time, so we dumped the
- *	restriction that you had to bracket MapUserMemoryWindow.  Further, there 
- *	is a yet further boost if you didn't need to map it each time.  The theory
- *	behind this is that many times copies are to or from the same segment and
- *	done multiple times within the same system call.  To take advantage of that,
- *	we check umwSpace and umwRelo to see if we've already got it.  
- *
- *	We also need to half-invalidate the slice when we context switch or go
- *	back to user state.  A half-invalidate does not clear the actual mapping,
- *	but it does force the MapUserMemoryWindow function to reload the segment
- *	register/SLBE.  If this is not done, we can end up some pretty severe
- *	performance penalties. If we map a slice, and the cached space/relocation is
- *	the same, we won't reload the segment registers.  Howver, since we ran someone else,
- *	our SR is cleared and we will take a fault.  This is reasonable if we block
- *	while copying (e.g., we took a page fault), but it is not reasonable when we 
- *	just start.  For this reason, we half-invalidate to make sure that the SR is
- *	explicitly reloaded.
- *	 
- *	Note that we do not go to the trouble of making a pmap segment cache
- *	entry for these guys because they are very short term -- 99.99% of the time
- *	they will be unmapped before the next context switch.
- *
- */
-
-addr64_t MapUserMemoryWindow(
-	vm_map_t map,
-	addr64_t va) {
-		
-	addr64_t baddrs, reladd;
-	thread_t thread;
-	mapping_t *mp;
-	
-	baddrs = va & 0xFFFFFFFFF0000000ULL;				/* Isolate the segment */
-	thread = current_thread();							/* Remember our activation */
-
-	reladd = baddrs - lowGlo.lgUMWvaddr;				/* Get the relocation from user to kernel */
-	
-	if((thread->machine.umwSpace == map->pmap->space) && (thread->machine.umwRelo == reladd)) {	/* Already mapped? */
-		return ((va & 0x0FFFFFFFULL) | lowGlo.lgUMWvaddr);	/* Pass back the kernel address we are to use */
-	}
-
-	disable_preemption();								/* Don't move... */	
-	
-	mp = (mapping_t *)&(getPerProc()->ppUMWmp);			/* Make up for C */
-	thread->machine.umwRelo = reladd;					/* Relocation from user to kernel */
-	mp->mpNestReloc = reladd;							/* Relocation from user to kernel */
-	
-	thread->machine.umwSpace = map->pmap->space;		/* Set the address space/pmap lookup ID */
-	mp->mpSpace = map->pmap->space;						/* Set the address space/pmap lookup ID */
-	
-/*
- *	Here we make an assumption that we are going to be using the base pmap's address space.
- *	If we are wrong, and that would be very, very, very rare, the fault handler will fix us up.
- */ 
-
-	hw_map_seg(map->pmap,  lowGlo.lgUMWvaddr, baddrs);	/* Make the entry for the first segment */
-
-	enable_preemption();								/* Let's move */
-	return ((va & 0x0FFFFFFFULL) | lowGlo.lgUMWvaddr);	/* Pass back the kernel address we are to use */
-}
-
-#if CONFIG_DTRACE
-/*
- * Constrain DTrace copyin/copyout actions
- */
-extern kern_return_t dtrace_copyio_preflight(addr64_t);
-extern kern_return_t dtrace_copyio_postflight(addr64_t);
-
-kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
-{
-	if (current_map() == kernel_map)
-		return KERN_FAILURE;
-	else
-		return KERN_SUCCESS;
-}
- 
-kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
-{
-	thread_t thread = current_thread();
-
-	thread->machine.umwSpace |= umwSwitchAway;
-	return KERN_SUCCESS;
-}
-#endif /* CONFIG_DTRACE */
-
-/*
- *	kern_return_t pmap_boot_map(size)
- *
- *	size   = size of virtual address range to be mapped
- *
- *	This function is used to assign a range of virtual addresses before VM in 
- *	initialized.  It starts at VM_MAX_KERNEL_ADDRESS and works downward.
- *	The variable vm_last_addr contains the current highest possible VM
- *	assignable address.  It is a panic to attempt to call this after VM has
- *	started up.  The only problem is, is that we may not have the serial or
- *	framebuffer mapped, so we'll never know we died.........
- */
-
-vm_offset_t pmap_boot_map(vm_size_t size) {
-			
-	if(kernel_map != VM_MAP_NULL) {				/* Has VM already started? */
-		panic("pmap_boot_map: VM started\n");
-	}
-	
-	size = round_page(size);					/* Make sure this is in pages */
-	vm_last_addr = vm_last_addr - size;			/* Allocate the memory */
-	return (vm_last_addr + 1);					/* Return the vaddr we just allocated */
-
-}
-
-
-/*
- *	void pmap_init_sharedpage(void);
- *
- *	Hack map for the 64-bit commpage
- */
-
-void pmap_init_sharedpage(vm_offset_t cpg){
-	
-	addr64_t cva, cpoff;
-	ppnum_t cpphys;
-	
-	sharedPmap = pmap_create(0, FALSE);				/* Get a pmap to hold the common segment */
-	if(!sharedPmap) {							/* Check for errors */
-		panic("pmap_init_sharedpage: couldn't make sharedPmap\n");
-	}
-
-	for(cpoff = 0; cpoff < _COMM_PAGE_AREA_USED; cpoff += 4096) {	/* Step along now */
-	
-		cpphys = pmap_find_phys(kernel_pmap, (addr64_t)cpg + cpoff);
-		if(!cpphys) {
-			panic("pmap_init_sharedpage: compage %016llX not mapped in kernel\n", cpg + cpoff);
-		}
-		
-		cva = mapping_make(sharedPmap, (addr64_t)((uint32_t)_COMM_PAGE_BASE_ADDRESS) + cpoff,
-			cpphys, mmFlgPerm, 1, VM_PROT_READ | VM_PROT_EXECUTE);		/* Map the page read/execute only */
-		if(cva) {								/* Check for errors */
-			panic("pmap_init_sharedpage: couldn't map commpage page - cva = %016llX\n", cva);
-		}
-	
-	}
-		
-	return;
-}
-
-
-/*
- *	void pmap_map_sharedpage(pmap_t pmap);
- *
- *	Maps the last segment in a 64-bit address space
- *
- *	
- */
-
-void pmap_map_sharedpage(task_t task, pmap_t pmap){
-	
-	kern_return_t ret;
-
-	if(task_has_64BitAddr(task) || _cpu_capabilities & k64Bit) {	/* Should we map the 64-bit page -1? */
-		ret = pmap_nest(pmap, sharedPmap, 0xFFFFFFFFF0000000ULL, 0x00000000F0000000ULL,
-			0x0000000010000000ULL);				/* Nest the highest possible segment to map comm page */
-		if(ret != KERN_SUCCESS) {				/* Did it work? */
-			panic("pmap_map_sharedpage: couldn't nest shared page - ret = %08X\n", ret);
-		}
-	}
-
-	return;
-}
-
-
-/*
- *	void pmap_unmap_sharedpage(pmap_t pmap);
- *
- *	Unmaps the last segment in a 64-bit address space
- *
- */
-
-void pmap_unmap_sharedpage(pmap_t pmap){
-	
-	kern_return_t ret;
-	mapping_t *mp;
-	boolean_t inter;
-	int gotnest;
-	addr64_t nextva;
-
-	if(BootProcInfo.pf.Available & pf64Bit) {		/* Are we on a 64-bit machine? */
-		
-		inter  = ml_set_interrupts_enabled(FALSE);	/* Disable interruptions for now */
-		mp = hw_find_map(pmap, 0xFFFFFFFFF0000000ULL, &nextva);	/* Find the mapping for this address */
-		if((unsigned int)mp == mapRtBadLk) {		/* Did we lock up ok? */
-			panic("pmap_unmap_sharedpage: mapping lock failure - rc = %p, pmap = %p\n", mp, pmap);	/* Die... */
-		}
-		
-		gotnest = 0;								/* Assume nothing here */
-		if(mp) {
-			gotnest = ((mp->mpFlags & mpType) == mpNest);
-													/* Remember if we have a nest here */
-			mapping_drop_busy(mp);					/* We have everything we need from the mapping */
-		}
-		ml_set_interrupts_enabled(inter);			/* Put interrupts back to what they were */
-		
-		if(!gotnest) return;						/* Leave if there isn't any nesting here */
-		
-		ret = pmap_unnest(pmap, 0xFFFFFFFFF0000000ULL, 0x0000000010000000ULL);	/* Unnest the max 64-bit page */
-		
-		if(ret != KERN_SUCCESS) {					/* Did it work? */
-			panic("pmap_unmap_sharedpage: couldn't unnest shared page - ret = %08X\n", ret);
-		}
-	}
-	
-	return;
-}
-
-
-/* temporary workaround */
-boolean_t
-coredumpok(
-	__unused vm_map_t map,
-	__unused vm_offset_t va)
-{
-	return TRUE;
-}
-
-
-/*
- * disable no-execute capability on
- * the specified pmap
- */
-void pmap_disable_NX(pmap_t pmap) {
-  
-        pmap->pmapFlags |= pmapNXdisabled;
-}
-
diff --git a/osfmk/ppc/pmap.h b/osfmk/ppc/pmap.h
deleted file mode 100644
index 24db51ea2..000000000
--- a/osfmk/ppc/pmap.h
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * Copyright (c) 1990 The University of Utah and
- * the Center for Software Science at the University of Utah (CSS).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the Center
- * for Software Science at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSS ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSS DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSS requests users of this software to return to css-dist@cs.utah.edu any
- * improvements that they make and grant CSS redistribution rights.
- *
- * 	Utah $Hdr: pmap.h 1.13 91/09/25$
- *	Author: Mike Hibler, Bob Wheeler, University of Utah CSS, 9/90
- */
-
-#ifndef	_PPC_PMAP_H_
-#define	_PPC_PMAP_H_
-
-#include <mach/vm_types.h>
-#include <mach/machine/vm_types.h>
-#include <mach/vm_prot.h>
-#include <mach/vm_statistics.h>
-#include <kern/queue.h>
-#include <vm/pmap.h>
-#include <ppc/mappings.h>
-
-#define maxPPage32 0x000FFFFF			/* Maximum page number in 32-bit machines */
-
-typedef uint32_t shexlock;
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-
-struct sgc {
-	uint64_t	sgcESID;				/* ESID portion of segment cache */
-#define sgcESmsk	0xFFFFFFFFF0000000ULL	/* ESID portion of segment register cache */
-	uint64_t	sgcVSID;				/* VSID portion of segment cache */
-#define sgcVSmsk	0xFFFFFFFFFFFFF000ULL	/* VSID mask */
-#define sgcVSKeys	0x0000000000000C00ULL	/* Protection keys */
-#define sgcVSKeyUsr	53					/* User protection key */
-#define sgcVSNoEx	0x0000000000000200ULL	/* No execute */
-};
-#pragma pack()
-
-typedef struct sgc sgc;
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct pmap_vmm_stats {
-	unsigned int	vxsGpf;				/* Guest faults */
-	unsigned int	vxsGpfMiss;			/* Faults that miss in hash table */
-	
-	unsigned int	vxsGrm;				/* Guest mapping remove requests */
-	unsigned int	vxsGrmMiss;			/* Remove misses in hash table */
-	unsigned int	vxsGrmActive;		/* Remove hits that are active */
-	
-	unsigned int	vxsGra;				/* Guest remove all mappings requests */
-	unsigned int	vxsGraHits;			/* Remove hits in hash table */
-	unsigned int	vxsGraActive;		/* Remove hits that are active */
-	
-	unsigned int	vxsGrl;				/* Guest remove local mappings requests */
-	unsigned int	vxsGrlActive;		/* Active mappings removed */
-
-	unsigned int	vxsGrs;				/* Guest mapping resumes */
-	unsigned int	vxsGrsHitAct;		/* Resume hits active entry */
-	unsigned int	vxsGrsHitSusp;		/* Resume hits suspended entry */
-	unsigned int	vxsGrsMissGV;		/* Resume misses on guest virtual */
-	unsigned int	vxsGrsHitPE;		/* Resume hits on host virtual */
-	unsigned int	vxsGrsMissPE;		/* Resume misses on host virtual */
-
-	unsigned int	vxsGad;				/* Guest mapping adds */
-	unsigned int	vxsGadHit;			/* Add hits entry (active or dormant) */
-	unsigned int	vxsGadFree;			/* Add takes free entry in group */
-	unsigned int	vxsGadDormant;		/* Add steals dormant entry in group */
-	unsigned int	vxsGadSteal;		/* Add steals active entry in group */
-	
-	unsigned int	vxsGsu;				/* Guest mapping suspends */
-	unsigned int	vxsGsuHit;			/* Suspend hits entry (active only) */
-	unsigned int	vxsGsuMiss;			/* Suspend misses entry */
-	
-	unsigned int	vxsGtd;				/* Guest test ref&chg */
-	unsigned int	vxsGtdHit;			/* Test r&c hits entry (active only) */
-	unsigned int	vxsGtdMiss;			/* Test r&c misses entry */
-};
-#pragma pack()
-typedef struct pmap_vmm_stats pmap_vmm_stats;
-
-/* Not wanting to tax all of our customers for the sins of those that use virtual operating
-   systems, we've built the hash table from its own primitive virtual memory. We first
-   allocate a pmap_vmm_ext with sufficient space following to accomodate the hash table 
-   index (one 64-bit physical address per 4k-byte page of hash table). The allocation 
-   must not cross a 4k-byte page boundary (we'll be accessing the block with relocation
-   off), so we'll try a couple of times, then just burn a whole page. We stuff the effective
-   address of the cache-aligned index into hIdxBase; the physical-mode code locates the index
-   by adding the size of a pmap_vmm_extension to its translated physical address, then rounding
-   up to the next 32-byte boundary. Now we grab enough virtual pages to contain the hash table,
-   and fill in the index with the page's physical addresses. For the final touch that's sure
-   to please, we initialize the hash table. Mmmmm, golden brown perfection.
- */
-
-#pragma pack(4)
-struct pmap_vmm_ext {
-	addr64_t		vmxSalt;			/* This block's virt<->real conversion salt */
-	addr64_t		vmxHostPmapPhys;	/* Host pmap physical address */
-	struct pmap		*vmxHostPmap;		/* Host pmap effective address */
-	addr64_t		*vmxHashPgIdx;		/* Hash table physical index base address */
-	vm_offset_t		*vmxHashPgList;		/* List of virtual pages comprising the hash table */
-	unsigned int	*vmxActiveBitmap;	/* Bitmap of active mappings in hash table */
-	pmap_vmm_stats	vmxStats;			/* Stats for VMM assists */
-#define VMX_HPIDX_OFFSET ((sizeof(pmap_vmm_ext) + 127) & ~127)
-										/* The hash table physical index begins at the first
-										   128-byte boundary after the pmap_vmm_ext struct */
-#define VMX_HPLIST_OFFSET (VMX_HPIDX_OFFSET + (GV_HPAGES * sizeof(addr64_t)))
-#define VMX_ACTMAP_OFFSET (VMX_HPLIST_OFFSET + (GV_HPAGES * sizeof(vm_offset_t)))
-};
-#pragma pack()
-typedef struct pmap_vmm_ext pmap_vmm_ext;
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct pmap {
-	queue_head_t	pmap_link;			/* MUST BE FIRST */
-	addr64_t		pmapvr;				/* Virtual to real conversion mask */
-	shexlock		pmapSXlk;			/* Shared/Exclusive lock for mapping changes */
-	unsigned int	space;				/* space for this pmap */
-#define invalSpace 0x00000001			/* Predefined always invalid space */
-	uint32_t	ref_count;			/* reference count */
-	unsigned int	pmapFlags;			/* Flags */
-#define pmapKeys	0x00000007			/* Keys and no execute bit to use with this pmap */
-#define pmapKeyDef	0x00000006			/* Default keys - Sup = 1, user = 1, no ex = 0 */
-#define pmapVMhost	0x00000010			/* pmap with Virtual Machines attached to it */
-#define pmapVMgsaa	0x00000020			/* Guest shadow assist active */
-#define pmapNXdisabled	0x00000040			/* no-execute disabled for this pmap */
-	unsigned int	spaceNum;			/* Space number */
-	unsigned int	pmapCCtl;			/* Cache control */
-#define pmapCCtlVal	0xFFFF0000			/* Valid entries */
-#define pmapCCtlLck	0x00008000			/* Lock bit */
-#define pmapCCtlLckb	16				/* Lock bit */
-#define pmapCCtlGen	0x00007FFF			/* Generation number */
-
-#define pmapSegCacheCnt 16				/* Maximum number of cache entries */
-#define pmapSegCacheUse	16				/* Number of cache entries to use */
-
-	struct pmap		*freepmap;			/* Free pmaps */
-	pmap_vmm_ext   *pmapVmmExt;			/* VMM extension block, for VMM host and guest pmaps */
-	addr64_t		pmapVmmExtPhys;		/* VMM extension block physical address */
-/*											0x038 */
-	uint64_t		pmapSCSubTag;		/* Segment cache sub-tags. This is a 16 entry 4 bit array */
-/*											0x040 */
-	sgc			pmapSegCache[pmapSegCacheCnt];	/* SLD values cached for quick load */
-
-/*											0x140 */	
-/* if fanout is 4, then shift is 1, if fanout is 8 shift is 2, etc */
-#define	kSkipListFanoutShift	1
-/* with n lists, we can handle (fanout**n) pages optimally */
-#define	kSkipListMaxLists		12    
-    unsigned char	pmapCurLists;		/*  0x140 - max #lists any mapping in this pmap currently has */
-    unsigned char	pmapRsv2[3];
-    uint32_t		pmapRandNum;		/* 0x144 - used by mapSetLists() as a random number generator */
-    addr64_t		pmapSkipLists[kSkipListMaxLists];	/* 0x148 - the list headers */
-/* following statistics conditionally gathered */
-    uint64_t		pmapSearchVisits;	/* 0x1A8 - nodes visited searching pmaps */
-    uint32_t		pmapSearchCnt;		/* 0x1B0 - number of calls to mapSearch or mapSearchFull */
-
-	unsigned int	pmapRsv3[3];
-
-/*											0x1C0 */	
-
-	struct pmap_statistics	stats;		/* statistics */
-	
-/* Need to pad out to a power of 2 - right now it is 512 bytes */
-#define pmapSize 512
-};
-#pragma pack()
-
-#pragma pack(4)
-struct pmapTransTab {
-	addr64_t		pmapPAddr;			/* Physcial address of pmap */
-	unsigned int	pmapVAddr;			/* Virtual address of pmap */
-};
-#pragma pack()							/* Make sure the structure stays as we defined it */
-
-typedef struct pmapTransTab pmapTransTab;
-
-/*
- *	Address Chunk IDentified Table
- */
- 
-struct acidTabEnt {
-	unsigned int	acidVAddr;			/* Virtual address of pmap or pointer to next free entry */
-	unsigned int	acidGas;			/* reserved */
-	addr64_t		acidPAddr;			/* Physcial address of pmap */
-};
-
-typedef struct acidTabEnt acidTabEnt;
-
-extern acidTabEnt *acidTab;				/* Pointer to acid table */
-extern acidTabEnt *acidFree;			/* List of free acid entries */
-
-#define PMAP_NULL  ((pmap_t) 0)
-
-extern pmap_t	cursor_pmap;			/* The pmap to start allocations with */
-extern pmap_t	sharedPmap;
-extern unsigned int sharedPage;
-extern int ppc_max_adrsp;				/* Maximum number of concurrent address spaces allowed. */	
-extern addr64_t vm_max_address;			/* Maximum effective address supported */
-extern addr64_t vm_max_physical;		/* Maximum physical address supported */
-extern pmapTransTab *pmapTrans;			/* Space to pmap translate table */
-#define	PMAP_SWITCH_USER(th, map, my_cpu) th->map = map;	
-
-#define PMAP_CONTEXT(pmap,th)
-
-#define pmap_kernel_va(VA)	\
-	(((VA) >= VM_MIN_KERNEL_ADDRESS) && ((VA) <= vm_last_addr))
-
-#define	PPC_SID_KERNEL  0       /* Must change KERNEL_SEG_REG0_VALUE if !0 */
-
-#define maxAdrSp 16384
-#define maxAdrSpb 14
-#define USER_MEM_WINDOW_VADDR	0x00000000E0000000ULL
-#define PHYS_MEM_WINDOW_VADDR	0x0000000100000000ULL
-#define IO_MEM_WINDOW_VADDR		0x0000000080000000ULL
-#define IO_MEM_WINDOW_SIZE		0x0000000080000000ULL
-#define pmapSmallBlock 65536
-
-#define pmap_kernel()			(kernel_pmap)
-#define	pmap_resident_count(pmap)	((pmap)->stats.resident_count)
-#define	pmap_resident_max(pmap)		((pmap)->stats.resident_max)
-#define pmap_remove_attributes(pmap,start,end)
-#define pmap_copy(dpmap,spmap,da,len,sa)
-#define	pmap_update()
-
-#define PMAP_DEFAULT_CACHE	0
-#define PMAP_INHIBIT_CACHE	1
-#define PMAP_GUARDED_CACHE	2
-#define PMAP_ACTIVATE_CACHE	4
-#define PMAP_NO_GUARD_CACHE	8
-
-/* corresponds to cached, coherent, not writethru, not guarded */
-#define VM_WIMG_DEFAULT		(VM_MEM_COHERENT)
-#define	VM_WIMG_COPYBACK	(VM_MEM_COHERENT)
-#define VM_WIMG_IO		(VM_MEM_COHERENT | 	\
-				VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
-#define VM_WIMG_WTHRU		(VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
-/* write combining mode, aka store gather */
-#define VM_WIMG_WCOMB		(VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) 
-
-/* superpages */
-#define SUPERPAGE_NBASEPAGES 1	/* we don't support superpages on PowerPC */
-
-/* 
- * prototypes.
- */
-extern addr64_t	   	kvtophys(vm_offset_t va);				/* Get physical address from kernel virtual */
-extern vm_map_offset_t kvtophys64(vm_map_offset_t va);				/* Get 64-bit physical address from kernel virtual */
-extern vm_offset_t	pmap_map(vm_offset_t va,
-				 vm_offset_t spa,
-				 vm_offset_t epa,
-				 vm_prot_t prot,
-				 unsigned int flags);
-extern kern_return_t    pmap_add_physical_memory(vm_offset_t spa,
-						 vm_offset_t epa,
-						 boolean_t available,
-						 unsigned int attr);
-extern void		pmap_bootstrap(uint64_t msize,
-				       vm_offset_t *first_avail,
-				       unsigned int kmapsize);
-
-extern vm_offset_t pmap_boot_map(vm_size_t size);
-
-extern void sync_cache64(addr64_t pa, unsigned length);
-extern void sync_ppage(ppnum_t pa);
-extern void	sync_cache_virtual(vm_offset_t va, unsigned length);
-extern void flush_dcache(vm_offset_t va, unsigned length, boolean_t phys);
-extern void flush_dcache64(addr64_t va, unsigned length, boolean_t phys);
-extern void invalidate_dcache(vm_offset_t va, unsigned length, boolean_t phys);
-extern void invalidate_dcache64(addr64_t va, unsigned length, boolean_t phys);
-extern void invalidate_icache(vm_offset_t va, unsigned length, boolean_t phys);
-extern void invalidate_icache64(addr64_t va, unsigned length, boolean_t phys);
-extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags);
-extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags);
-
-extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
-extern void MapUserMemoryWindowInit(void);
-extern addr64_t MapUserMemoryWindow(vm_map_t map, addr64_t va);
-extern boolean_t pmap_eligible_for_execute(ppnum_t pa);
-extern int pmap_list_resident_pages(
-	struct pmap	*pmap,
-	vm_offset_t	*listp,
-	int		space);
-extern void pmap_init_sharedpage(vm_offset_t cpg);
-extern void pmap_disable_NX(pmap_t pmap);
-
-extern boolean_t	pmap_valid_page(
-				ppnum_t	pn);
-
-/* Not required for ppc: */
-static inline void pmap_set_4GB_pagezero(__unused pmap_t pmap) {}
-static inline void pmap_clear_4GB_pagezero(__unused pmap_t pmap) {}
-
-#endif /* _PPC_PMAP_H_ */
-
diff --git a/osfmk/ppc/pms.c b/osfmk/ppc/pms.c
deleted file mode 100644
index fb69f7618..000000000
--- a/osfmk/ppc/pms.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <machine/machine_routines.h>
-#include <machine/machine_cpu.h>
-#ifdef __ppc__
-# include <ppc/exception.h>
-# include <ppc/misc_protos.h>
-# include <ppc/cpu_internal.h>
-#else
-# include <i386/cpu_data.h>
-# include <i386/misc_protos.h>
-#endif
-#include <machine/pmap.h>
-#include <kern/pms.h>
-#include <kern/processor.h>
-#include <kern/kalloc.h>
-#include <vm/vm_protos.h>
-
-extern int is_suser(void);
-
-static uint32_t pmsSyncrolator = 0;					/* Only one control operation at a time please */
-uint32_t pmsBroadcastWait = 0;						/* Number of outstanding broadcasts */
-
-int pmsInstalled = 0;								/* Power Management Stepper can run and has table installed */
-int pmsExperimental = 0;							/* Power Management Stepper in experimental mode */
-decl_simple_lock_data(,pmsBuildLock)				/* Make sure only one guy can replace table  at the same time */
-
-static pmsDef *altDpmsTab;						/* Alternate step definition table */
-static uint32_t altDpmsTabSize = 0;					/* Size of alternate step definition table */
-
-pmsDef pmsDummy = {									/* This is the dummy step for initialization.  All it does is to park */
-	.pmsLimit = 0,									/* Time doesn't matter for a park */
-	.pmsStepID = pmsMaxStates - 1,					/* Use the very last ID number for the dummy */
-	.pmsSetCmd = pmsParkIt,							/* Force us to be parked */
-	.sf.pmsSetFuncInd = 0,							/* No platform call for this one */
-	.pmsDown = pmsPrepSleep,						/* We always park */
-	.pmsNext = pmsPrepSleep							/* We always park */
-};
-
-pmsStat pmsStatsd[4][pmsMaxStates];					/* Generate enough statistics blocks for 4 processors */
-
-pmsCtl pmsCtls = {									/* Power Management Stepper control */
-	.pmsStats = pmsStatsd,
-};
-
-pmsSetFunc_t pmsFuncTab[pmsSetFuncMax] = {NULL};		/* This is the function index table */
-pmsQueryFunc_t pmsQueryFunc;					/* Pointer to pmsQuery function */
-uint32_t pmsPlatformData = 0;						/* Data provided by and passed to platform functions */
-
-#ifdef __ppc__
-# define PER_PROC_INFO		struct per_proc_info
-# define GET_PER_PROC_INFO()	getPerProc()
-#else
-# define PER_PROC_INFO 		cpu_data_t
-# define GET_PER_PROC_INFO()	current_cpu_datap()
-#endif
-
-
-
-/*
- *	Do any initialization needed
- */
- 
-void
-pmsInit(void)
-{
-	int i;
-	
-	simple_lock_init(&pmsBuildLock, 0);				/* Initialize the build lock */
-	for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy;	/* Initialize the table to dummy steps */
-
-	pmsCPUMachineInit();
-}
-
-
-/*
- *	Start the power management stepper on all processors
- *
- *	All processors must be parked.  This should be called when the hardware
- *	is ready to step.  Probably only at boot and after wake from sleep.
- *
- */
- 
- void
- pmsStart(void)
-{
- 	boolean_t	intr;
-
-	if(!pmsInstalled)
-		return;						/* We can't do this if no table installed */
-
-	intr = ml_set_interrupts_enabled(FALSE);		/* No interruptions in here */
-	pmsRun(pmsStartUp);								/* Start running the stepper everywhere */
-	(void)ml_set_interrupts_enabled(intr);			/* Restore interruptions */
- }
- 
-
-/*
- *	Park the stepper execution.  This will force the stepper on this
- *	processor to abandon its current step and stop.  No changes to the
- *	hardware state is made and any previous step is lost.
- *	
- *	This is used as the initial state at startup and when the step table
- *	is being changed.
- *
- */
- 
-void
-pmsPark(void)
-{
-	boolean_t	intr;
-
-	if(!pmsInstalled)
-		return;						/* We can't do this if no table installed */
-
-	intr = ml_set_interrupts_enabled(FALSE);		/* No interruptions in here */
-	pmsSetStep(pmsParked, 0);						/* Park the stepper */
-	(void)ml_set_interrupts_enabled(intr);			/* Restore interruptions */
-}
- 
-
-/*
- *	Steps down to a lower power.
- *	Interrupts must be off...
- */
-
-void
-pmsDown(void)
-{
-	PER_PROC_INFO *pp;
-	uint32_t nstate;
-	
-	pp = GET_PER_PROC_INFO();								/* Get our per_proc */
-	
-	if(!pmsInstalled || pp->pms.pmsState == pmsParked)
-		return;		/* No stepping if parked or not installed */
-	
-	nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsDown;	/* Get the downward step */
-	pmsSetStep(nstate, 0);							/* Step to it */
-}
-
-
-/*
- *	Steps up to a higher power.  The "timer" parameter is true if the
- *	step was driven due to the pms timer expiring.
- *
- *	Interrupts must be off...
- */
-
-int pmsStepIdleSneaks;
-int pmsStepIdleTries;
- 
-void
-pmsStep(int timer)
-{
-	PER_PROC_INFO	*pp;
-	uint32_t	nstate;
-	uint32_t	tstate;
-	uint32_t	pkgstate;
-	int		dir;
-	int		i;
-	
-	pp = GET_PER_PROC_INFO();								/* Get our per_proc */
-
-	if(!pmsInstalled || pp->pms.pmsState == pmsParked)
-		return;	/* No stepping if parked or not installed */
-	
-	/*
-	 * Assume a normal step.
-	 */
-	nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsNext;
-
-	/*
-	 * If we are idling and being asked to step up, check to see whether
-	 * the package we're in is already at a non-idle power state.  If so,
-	 * attempt to work out what state that is, and go there directly to
-	 * avoid wasting time ramping up.
-	 */
-	if ((pp->pms.pmsState == pmsIdle)
-	    && ((pkgstate = pmsCPUPackageQuery()) != ~(uint32_t)0)) {
-		/*
-		 * Search forward through the stepper program,
-		 * avoid looping for too long.
-		 */
-		tstate = nstate;
-		pmsStepIdleTries++;
-		for (i = 0; i < 32; i++) {
-		    /*
-		     * Compare command with current package state
-		     */
-		    if ((pmsCtls.pmsDefs[tstate]->pmsSetCmd & pmsCPU) == pkgstate) {
-			nstate = tstate;
-			pmsStepIdleSneaks++;
-			break;
-		    }
-
-		    /*
-		     * Advance to the next step in the program.
-		     */
-		    if (pmsCtls.pmsDefs[tstate]->pmsNext == tstate)
-			break;	/* infinite loop */
-		    tstate = pmsCtls.pmsDefs[tstate]->pmsNext;
-		}
-        }
-
-	/*
-	 * Default to a step up.
-	 */
-	dir = 1;
-
-	/*
-	 * If we are stepping as a consequence of timer expiry, select the
-	 * alternate exit path and note this as downward step for accounting
-	 * purposes.
-	 */
-	if (timer
-	    && (pmsCtls.pmsDefs[pp->pms.pmsState]->pmsSetCmd == pmsDelay)) {
-	    nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsTDelay;
-
-	    /*
-	     * Delayed steps are a step down for accounting purposes.
-	     */
-	    dir = 0;
-	}
-
-	pmsSetStep(nstate, dir);
-}
-
-
-/*
- *	Set a specific step
- *
- *	We do not do statistics if exiting park
- *	Interrupts must be off...
- *
- */
-
-void
-pmsSetStep(uint32_t nstep, int dir)
-{
-	PER_PROC_INFO *pp;
-	uint32_t pstate, nCSetCmd, mCSetCmd;
-	pmsDef *pnstate, *pcstate;
-	uint64_t tb, dur;
-	int cpu;
-
-	pp = GET_PER_PROC_INFO();								/* Get our per_proc */
-	cpu = cpu_number();								/* Get our processor */
-	
-	while(1) {										/* Keep stepping until we get a delay */
-		
-		if(pp->pms.pmsCSetCmd & pmsMustCmp) {		/* Do we have to finish the delay before changing? */
-			while(mach_absolute_time() < pp->pms.pmsPop);	/* Yes, spin here... */
-		}
-		
-		if((nstep == pmsParked) || ((uint32_t)pmsCtls.pmsDefs[nstep]->pmsSetCmd == pmsParkIt)) {	/* Are we parking? */
-			
-			tb = mach_absolute_time();				/* What time is it? */
-			pp->pms.pmsStamp = tb;					/* Show transition now */
-			pp->pms.pmsPop = HalfwayToForever;		/* Set the pop way into the future */
-			pp->pms.pmsState = pmsParked;			/* Make sure we are parked */
-			etimer_resync_deadlines();							/* Cancel our timer if going */
-			return;
-		}
-
-		pnstate = pmsCtls.pmsDefs[nstep];			/* Point to the state definition */ 
-		pstate = pp->pms.pmsState;					/* Save the current step */
-		pp->pms.pmsState = nstep;					/* Set the current to the next step */
-
-		if(pnstate->pmsSetCmd != pmsDelay) {		/* If this is not a delayed state, change the actual hardware now */
-			if(pnstate->pmsSetCmd & pmsCngCPU) pmsCPUSet(pnstate->pmsSetCmd);	/* We have some CPU work to do... */
-			if((uint32_t)pnstate->sf.pmsSetFunc) pnstate->sf.pmsSetFunc(pnstate->pmsSetCmd, cpu, pmsPlatformData);	/* Tell the platform to set power mode */
-	
-			mCSetCmd = pnstate->pmsSetCmd & (pmsCngXClk | pmsCngCPU | pmsCngVolt);	/* Isolate just the change flags */
-			mCSetCmd = (mCSetCmd - (mCSetCmd >> 7)) | pmsSync | pmsMustCmp | pmsPowerID;	/* Form mask of bits that come from new command */
-			nCSetCmd = pp->pms.pmsCSetCmd & ~mCSetCmd;	/* Clear changing bits */
-			nCSetCmd = nCSetCmd | (pnstate->pmsSetCmd & mCSetCmd);	/* Flip on the changing bits and the always copy bits */
-	
-			pp->pms.pmsCSetCmd = nCSetCmd;			/* Set it for real */
-		}
-	
-		tb = mach_absolute_time();					/* What time is it? */
-		pp->pms.pmsPop = tb + pnstate->pmsLimit;	/* Set the next pop */
-	
-		if((pnstate->pmsSetCmd != pmsDelay) && (pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0)) {	/* Is this a synchronous command with a delay? */
-			while(mach_absolute_time() < pp->pms.pmsPop);	/* Yes, spin here and wait it out... */
-		}
-
-/*
- *		Gather some statistics
- */
-	  
-		dur = tb - pp->pms.pmsStamp;				/* Get the amount of time we were in the old step */
-		pp->pms.pmsStamp = tb;						/* Set the new timestamp */
-		if(!(pstate == pmsParked)) {				/* Only take stats if we were not parked */
-			pcstate = pmsCtls.pmsDefs[pstate];		/* Get the previous step */
-			pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stTime[dir] += dur;	/* Accumulate the total time in the old step */	
-			pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stCnt[dir] += 1;	/* Count transitions */
-		}
-
-/*
- *		See if we are done chaining steps
- */
- 
-		if((pnstate->pmsSetCmd == pmsDelay) 
-			|| (!(pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0))) {	/* Is this not syncronous and a non-zero delay or a delayed step? */
-			etimer_resync_deadlines();							/* Start the timers ticking */
-			break;									/* We've stepped as far as we're going to... */
-		}
-		
-		nstep = pnstate->pmsNext;					/* Chain on to the next */
-	}
-}
-
-/*
- *	Either park the stepper or force the step on a parked stepper for local processor only
- *
- */
- 
-void
-pmsRunLocal(uint32_t nstep)
-{
-	PER_PROC_INFO *pp;
-	uint32_t lastState;
-	int cpu, i;
-	boolean_t	intr;
-
-	if(!pmsInstalled) /* Ignore this if no step programs installed... */
-		return;
-
-	intr = ml_set_interrupts_enabled(FALSE);		/* No interruptions in here */
-
-	pp = GET_PER_PROC_INFO();								/* Get our per_proc */
-
-	if(nstep == pmsStartUp) {						/* Should we start up? */
-		pmsCPUInit();								/* Get us up to full with high voltage and park */
-		nstep = pmsNormHigh;						/* Change request to transition to normal high */
-	}
-
-	lastState = pp->pms.pmsState;					/* Remember if we are parked now */
-
-	pmsSetStep(nstep, 1);							/* Step to the new state */
-	
-	if((lastState == pmsParked) && (pp->pms.pmsState != pmsParked)) {	/* Did we just unpark? */
-		cpu = cpu_number();							/* Get our processor */
-		for(i = 0; i < pmsMaxStates; i++) {			/* Step through the steps and clear the statistics since we were parked */
-			pmsCtls.pmsStats[cpu][i].stTime[0] = 0;	/* Clear accumulated time - downward */	
-			pmsCtls.pmsStats[cpu][i].stTime[1] = 0;	/* Clear accumulated time - forward */	
-			pmsCtls.pmsStats[cpu][i].stCnt[0] = 0;	/* Clear transition count - downward */
-			pmsCtls.pmsStats[cpu][i].stCnt[1] = 0;	/* Clear transition count - forward */
-		}
-	}
-
-	(void)ml_set_interrupts_enabled(intr);			/* Restore interruptions */
-}
-
-/*
- *	Control the Power Management Stepper.
- *	Called from user state by the superuser.
- *	Interruptions disabled.
- *
- */
-kern_return_t
-pmsControl(uint32_t request, user_addr_t reqaddr, uint32_t reqsize)
-{
-	uint32_t nstep = 0, result, presult;
-	int ret, cpu;
-	kern_return_t kret = KERN_SUCCESS;
-	pmsDef *ndefs;
-	PER_PROC_INFO *pp;
-
-	pp = GET_PER_PROC_INFO();								/* Get our per_proc */
-	cpu = cpu_number();								/* Get our processor */
-	
-	if(!is_suser()) {								/* We are better than most, */
-		kret = KERN_FAILURE;
-		goto out;
-	}
-
-	if(request >= pmsCFree) {					/* Can we understand the request? */
-		kret = KERN_INVALID_ARGUMENT;
-		goto out;
-	}
-	
-	if(request == pmsCQuery) {						/* Are we just checking? */
-		result = pmsCPUQuery() & pmsCPU;			/* Get the processor data and make sure there is no slop */
-		presult = 0;								/* Assume nothing */
-		if((uint32_t)pmsQueryFunc)
-			presult = pmsQueryFunc(cpu, pmsPlatformData);	/* Go get the platform state */
-		kret = result | (presult & (pmsXClk | pmsVoltage | pmsPowerID));	/* Merge the platform state with no slop */
-		goto out;
-	}
-	
-	if(request == pmsCExperimental) {				/* Enter experimental mode? */
-	
-		if(pmsInstalled || (pmsExperimental & 1)) {	/* Are we already running or in experimental? */
-			kret = KERN_FAILURE;
-			goto out;
-		}
-		
-		pmsExperimental |= 1;						/* Flip us into experimental but don't change other flags */
-		
-		pmsCPUConf();								/* Configure for this machine */
-		pmsStart();									/* Start stepping */
-		goto out;
-	}
-
-	if(request == pmsCCnfg) {						/* Do some up-front checking before we commit to doing this */
-		if((reqsize > (pmsMaxStates * sizeof(pmsDef))) || (reqsize < (pmsFree * sizeof(pmsDef)))) {	/* Check that the size is reasonable */
-			kret = KERN_NO_SPACE;
-			goto out;
-		}
-	}
-
-	if (request == pmsGCtls) {
-		if (reqsize != sizeof(pmsCtls)) {
-			kret = KERN_FAILURE;
-			goto out;
-		}
-		ret = copyout(&pmsCtls, reqaddr, reqsize);
-		goto out;
-	}
-			
-	if (request == pmsGStats) {
-		if (reqsize != sizeof(pmsStatsd)) { /* request size is fixed */
-			kret = KERN_FAILURE;
-			goto out;
-		}
-		ret = copyout(&pmsStatsd, reqaddr, reqsize);
-		goto out;
-	}
-
-/*
- *	We are committed after here.  If there are any errors detected, we shouldn't die, but we
- *	will be stuck in park.
- *
- *	Also, we can possibly end up on another processor after the broadcast.
- *
- */
- 		
-	if(!hw_compare_and_store(0, 1, &pmsSyncrolator)) {	/* Are we already doing this? */
-		/* Tell them that we are already busy and to try again */
-		kret = KERN_RESOURCE_SHORTAGE;
-		goto out;
-	}
-
-//	NOTE:  We will block in the following code until everyone has finished the prepare
-
-	pmsRun(pmsPrepCng);								/* Get everyone parked and in a proper state for step table changes, including me */
-	
-	if(request == pmsCPark) {						/* Is all we're supposed to do park? */
-		pmsSyncrolator = 0;							/* Free us up */
-		goto out;
-	}
-	
-	switch(request) {								/* Select the routine */
-
-		case pmsCStart:								/* Starts normal steppping */
-			nstep = pmsNormHigh;					/* Set the request */
-			break;
-
-		case pmsCFLow:								/* Forces low power */
-			nstep = pmsLow;							/* Set request */
-			break;
-
-		case pmsCFHigh:								/* Forces high power */
-			nstep = pmsHigh;						/* Set request */
-			break;
-
-		case pmsCCnfg:								/* Loads new stepper program */
-			
-			if(!(ndefs = (pmsDef *)kalloc(reqsize))) {	/* Get memory for the whole thing */
-				pmsSyncrolator = 0;					/* Free us up */
-				kret = KERN_INVALID_ADDRESS;
-				goto out;
-			}
-			
-			ret = copyin(reqaddr, (void *)ndefs, reqsize);	/* Get the new config table */
-			if(ret) {								/* Hmmm, something went wrong with the copyin */
-				kfree(ndefs, reqsize);	/* Free up the copied in data */
-				pmsSyncrolator = 0;					/* Free us up */
-				kret = KERN_INVALID_ADDRESS;
-				goto out;
-			}
-
-			kret = pmsBuild(ndefs, reqsize, NULL, 0, NULL);	/* Go build and replace the tables.  Make sure we keep the old platform stuff */
-			if(kret) {								/* Hmmm, something went wrong with the compilation */
-				kfree(ndefs, reqsize);	/* Free up the copied in data */
-				pmsSyncrolator = 0;					/* Free us up */
-				goto out;
-			}
-
-			nstep = pmsNormHigh;					/* Set the request */
-			break;
-
-		default:
-			panic("pmsCntrl: stepper control is so very, very confused = %08X\n", request);
-	
-	}
-
-	pmsRun(nstep);									/* Get everyone into step */
-	pmsSyncrolator = 0;								/* Free us up */
-out:
-	return kret;
-
-}
-
-/*
- *	Broadcast a change to all processors including ourselves.
- *
- *	Interruptions are disabled.
- */
- 
-void
-pmsRun(uint32_t nstep)
-{
-	pmsCPURun(nstep);
-}
-
-
-/*
- *	Build the tables needed for the stepper.  This includes both the step definitions and the step control table.
- *
- *	We most absolutely need to be parked before this happens because we're gonna change the table.
- *	We're going to have to be pretty complete about checking for errors.
- *	Also, a copy is always made because we don't want to be crippled by not being able to change
- *	the table or description formats.
- *
- *	We pass in a table of external functions and the new stepper def uses the corresponding 
- *	indexes rather than actual function addresses.  This is done so that a proper table can be
- *	built with the control syscall.  It can't supply addresses, so the index has to do.  We
- *	internalize the table so our caller does not need to keep it.  Note that passing in a 0
- *	will use the current function table.  Also note that entry 0 is reserved and must be 0,
- *	we will check and fail the build.
- *
- *	The platformData parameter is a 32-bit word of data that is passed unaltered to the set function.
- *
- *	The queryFunc parameter is the address of a function that will return the current state of the platform.
- *	The format of the data returned is the same as the platform specific portions of pmsSetCmd, i.e., pmsXClk,
- *	pmsVoltage, and any part of pmsPowerID that is maintained by the platform hardware (an example would be
- *	the values of the gpios that correspond to pmsPowerID).  The value should be constructed by querying
- *	hardware rather than returning a value cached by software. One of the intents of this function is to 
- *	help recover lost or determine initial power states.
- *
- */
- 
-kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc) {
-
-	int newsize, cstp, oldAltSize, xdsply;
-	uint32_t setf, steps, i, nstps;
-	uint64_t nlimit;
-	pmsDef *newpd, *oldAlt;
-	boolean_t intr;
-
-	xdsply = (pmsExperimental & 3) != 0;			/* Turn on kprintfs if requested or in experimental mode */
-
-	if(pdsize % sizeof(pmsDef))
-		return KERN_INVALID_ARGUMENT;	/* Length not multiple of definition size */
-	
-	steps = pdsize / sizeof(pmsDef);				/* Get the number of steps supplied */
-
-	if((steps >= pmsMaxStates) || (steps < pmsFree))	/* Complain if too big or too small */
-		return KERN_INVALID_ARGUMENT;			/* Squeak loudly!!! */
-			
-	if((uint32_t)functab && (uint32_t)functab[0])	/* Verify that if they supplied a new function table, entry 0 is 0 */
-		return KERN_INVALID_ARGUMENT;				/* Fail because they didn't reserve entry 0 */
-			
-	if(xdsply) kprintf("\n  StepID   Down   Next    HWSel  HWfun                Limit\n");
-
-	for(i = 0; i < steps; i++) {					/* Step through and verify the definitions */
-
-		if(xdsply) kprintf("  %6d %6d %6d %08X %6d %20lld\n", pd[i].pmsStepID, pd[i].pmsDown, 
-			pd[i].pmsNext, pd[i].pmsSetCmd,
-			pd[i].sf.pmsSetFuncInd, pd[i].pmsLimit);
-
-		if((pd[i].pmsLimit != 0) && (pd[i].pmsLimit < 100ULL)) {
-			if(xdsply) kprintf("error step %3d: pmsLimit too small/n", i);
-			return KERN_INVALID_ARGUMENT;	/* Has to be 100µS or more */
-		}
-		
-		if((pd[i].pmsLimit != 0xFFFFFFFFFFFFFFFFULL) && (pd[i].pmsLimit > (HalfwayToForever / 1000ULL))) {
-			if(xdsply) kprintf("error step %3d: pmsLimit too big\n", i);
-			return KERN_INVALID_ARGUMENT;			/* Can't be too big */
-		}
-		
-		if(pd[i].pmsStepID != i) {
-			if(xdsply) kprintf("error step %3d: step ID does not match (%d)\n", i, pd[i].pmsStepID);
-			return KERN_INVALID_ARGUMENT;	/* ID must match */
-		}
-
-		if(pd[i].sf.pmsSetFuncInd >= pmsSetFuncMax) {
-			if(xdsply) kprintf("error step %3d: function invalid (%d)\n", i, pd[i].sf.pmsSetFuncInd);
-			return KERN_INVALID_ARGUMENT;	/* Fail if this function is not in the table */
-		}
-		
-		if((pd[i].pmsDown != pmsParked) && pd[i].pmsDown >= steps) {
-			if(xdsply) kprintf("error step %3d: pmsDown out of range (%d)\n", i, pd[i].pmsDown);
-			return KERN_INVALID_ARGUMENT;	/* Step down must be in the table or park */
-		}
-		
-		if((pd[i].pmsNext != pmsParked) && pd[i].pmsNext >= steps) {
-			if(xdsply) kprintf("error step %3d: pmsNext out of range (%d)\n", i, pd[i].pmsNext);
-			return KERN_INVALID_ARGUMENT;	/* Step up must be in the table or park */
-		}
-		
-		if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsTDelay >= steps)) {
-			if(xdsply) kprintf("error step %3d: pmsTDelay out of range (%d)\n", i, pd[i].pmsTDelay);
-			return KERN_INVALID_ARGUMENT;	/* Delayed step must be in the table */
-		}
-		
-		if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL)) {
-			if(xdsply) kprintf("error step %3d: delay time limit must not be infinite\n", i);
-			return KERN_INVALID_ARGUMENT;	/* Delayed step must have a time limit */
-		}
-		
-	}
-	
-/*
- *	Verify that there are no infinite synchronous forward loops in the table
- */
- 
-	if(xdsply) kprintf("\nInitial scan passed, start in loop check\n");
-	for(i = 0; i < steps; i++) {					/* Start with each step. Inefficient, but who cares */
- 
-		cstp = i;									/* Set starting point */
-		nstps = 0;									/* Initialize chain length counter */
-		while(1) {									/* Do until we hit the end */
-			if(pd[cstp].pmsSetCmd == pmsParkIt) break;	/* Parking always terminates a chain so no endless loop here */
-			if(pd[cstp].pmsSetCmd == pmsDelay) break;	/* Delayed steps always terminate a chain so no endless loop here */
-			if((pd[cstp].pmsLimit != 0) && ((pd[cstp].pmsSetCmd & pmsSync) != pmsSync)) break;	/* If time limit is not 0 and not synchrouous, no endless loop */
-			if(pd[cstp].pmsNext == pmsParked) break;	/* If the next step is parked, no endless loop */
-			
- 			cstp = pd[cstp].pmsNext;				/* Chain to the next */
- 			nstps = nstps + 1;						/* Count this step */
- 			if(nstps >= steps) {					/* We've stepped for more steps than we have, must be an endless loop! */
-				if(xdsply) kprintf("error step %3d: infinite pmsNext loop\n", i);
-		 		return KERN_INVALID_ARGUMENT;		/* Suggest to our caller that they can't program... */
- 			}
- 		}
-	}
-	
-	if((pmsExperimental & 4) && (pmsInstalled) && ((uint32_t)functab != 0)) {	/* If we are already initted and experimental is locked in, and we are doing first */
-		if(xdsply) kprintf("Experimental locked, ignoring driver pmsBuild\n");
-		return KERN_RESOURCE_SHORTAGE;				/* Just ignore the request. */
-	}
-	
-	
-	
-/*
- *	Well, things look ok, let's do it to it...
- */
-
-	if(xdsply) kprintf("Loop check passed, building and installing table\n");
-
-	newsize = steps * sizeof(pmsDef);				/* Get the size needed for the definition blocks */
-
-	if(!(newpd = (pmsDef *)kalloc(newsize))) {		/* Get memory for the whole thing */
-		return KERN_RESOURCE_SHORTAGE;				/* No storage... */
-	}
-	
-	bzero((void *)newpd, newsize);					/* Make it pretty */
-	
-/*
- *	Ok, this is it, finish intitializing, switch the tables, and pray...
- *	We want no interruptions at all and we need to lock the table.  Everybody should be parked,
- *	so no one should ever touch this.  The lock is to keep multiple builders safe.  It probably
- *	will never ever happen, but paranoia is a good thing...
- */
- 
-	intr = ml_set_interrupts_enabled(FALSE);		/* No interruptions in here */
-	simple_lock(&pmsBuildLock);						/* Lock out everyone... */
-	
-	if(platformData) pmsPlatformData = platformData;	/* Remember the platform data word passed in if any was... */
-	if((uint32_t)queryFunc) pmsQueryFunc = queryFunc;	/* Remember the query function passed in, if it was... */
-	
-	oldAlt = altDpmsTab;							/* Remember any old alternate we had */
-	oldAltSize = altDpmsTabSize;					/* Remember its size */
-
-	altDpmsTab = newpd;								/* Point to the new table */
-	altDpmsTabSize = newsize;						/* Set the size */
-	
-	if((uint32_t)functab) {							/* Did we get a new function table? */
-		for(i = 0; i < pmsSetFuncMax; i++) pmsFuncTab[i] = functab[i];	/* Copy in the new table */
-	}
-
-	for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy;	/* Initialize the table to point to the dummy step */
-
-	for(i = 0; i < steps; i++) {					/* Replace the step table entries */
-		if(pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL) nlimit = century;	/* Default to 100 years */
-		else nlimit = pd[i].pmsLimit;				/* Otherwise use what was supplied */
-		
-		nanoseconds_to_absolutetime(nlimit * 1000ULL, &newpd[i].pmsLimit);	/* Convert microseconds to nanoseconds and then to ticks */
-	
-		setf = pd[i].sf.pmsSetFuncInd;					/* Make convienient */
-		newpd[i].sf.pmsSetFunc = pmsFuncTab[setf];		/* Replace the index with the function address */
-	 
-		newpd[i].pmsStepID  = pd[i].pmsStepID;		/* Set the step ID */ 
-		newpd[i].pmsSetCmd  = pd[i].pmsSetCmd;		/* Set the hardware selector ID */
-		newpd[i].pmsDown    = pd[i].pmsDown;		/* Set the downward step */
-		newpd[i].pmsNext    = pd[i].pmsNext;		/* Set the next setp */
-		newpd[i].pmsTDelay  = pd[i].pmsTDelay;		/* Set the delayed setp */
-		pmsCtls.pmsDefs[i]  = &newpd[i];			/* Copy it in */
-	}
-#ifdef __ppc__	
-	pmsCtlp = (uint32_t)&pmsCtls;					/* Point to the new pms table */
-#endif
- 	pmsInstalled = 1;								/* The stepper has been born or born again... */
-
-	simple_unlock(&pmsBuildLock);					/* Free play! */
-	(void)ml_set_interrupts_enabled(intr);			/* Interrupts back the way there were */
-
-	if((uint32_t)oldAlt) /* If we already had an alternate, free it */
-		kfree(oldAlt, oldAltSize);
-
-	if(xdsply) kprintf("Stepper table installed\n");
-	
-	return KERN_SUCCESS;							/* We're in fate's hands now... */
-}
diff --git a/osfmk/ppc/pmsCPU.c b/osfmk/ppc/pmsCPU.c
deleted file mode 100644
index 0b12f2d31..000000000
--- a/osfmk/ppc/pmsCPU.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <ppc/machine_routines.h>
-#include <ppc/machine_cpu.h>
-#include <ppc/exception.h>
-#include <ppc/misc_protos.h>
-#include <ppc/Firmware.h>
-#include <ppc/pmap.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <kern/pms.h>
-#include <ppc/savearea.h>
-#include <ppc/Diagnostics.h>
-#include <kern/processor.h>
-
-
-static void pmsCPURemote(uint32_t nstep);
-
-
-pmsDef pmsDefault[] = {
-	{
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsIdle,							/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsIdle,								/* We stay here */
-		.pmsNext = pmsNorm								/* Next step */
-	},
-	{
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsNorm,							/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsIdle,								/* Down to idle */
-		.pmsNext = pmsNorm								/* Next step */
-	},
-	{
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsNormHigh,						/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsIdle,								/* Down to idle */
-		.pmsNext = pmsNormHigh							/* Next step */
-	},
-	{
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsBoost,							/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsIdle,								/* Step down */
-		.pmsNext = pmsBoost								/* Next step */
-	},	
-	{	
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsLow,							/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsLow,								/* We always stay here */
-		.pmsNext = pmsLow								/* We always stay here */
-	},	
-	{	
-		.pmsLimit = century,							/* We can normally stay here for 100 years */
-		.pmsStepID = pmsHigh,							/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsHigh,								/* We always stay here */
-		.pmsNext = pmsHigh								/* We always stay here */
-	},	
-	{	
-		.pmsLimit = 0,									/* Time doesn't matter for a prepare for change */
-		.pmsStepID = pmsPrepCng,						/* Unique identifier to this step */
-		.pmsSetCmd = pmsParkIt,							/* Force us to be parked */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsPrepCng,							/* We always stay here */
-		.pmsNext = pmsPrepCng							/* We always stay here */
-	},	
-	{	
-		.pmsLimit = 0,									/* Time doesn't matter for a prepare for sleep */
-		.pmsStepID = pmsPrepSleep,						/* Unique identifier to this step */
-		.pmsSetCmd = pmsParkIt,							/* Force us to be parked */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsPrepSleep,						/* We always stay here */
-		.pmsNext = pmsPrepSleep							/* We always stay here */
-	},	
-	{	
-		.pmsLimit = 0,									/* Time doesn't matter for a prepare for sleep */
-		.pmsStepID = pmsOverTemp,						/* Unique identifier to this step */
-		.pmsSetCmd = 0,									/* Dummy platform power level */
-		.sf.pmsSetFuncInd = 0,							/* Dummy platform set function */
-		.pmsDown = pmsOverTemp,							/* We always stay here */
-		.pmsNext = pmsOverTemp							/* We always stay here */
-	}	
-};
-
-
-
-/*
- *	This is where the CPU part of the stepper code lives.   
- *
- *	It also contains the "hacked kext" experimental code.  This is/was used for
- *	experimentation and bringup.  It should neither live long nor prosper.
- *
- */
-
-/*
- *	Set the processor frequency and stuff
- */
-
-void pmsCPUSet(uint32_t sel) {
-	int nfreq;
-	struct per_proc_info *pp;
-
-	pp = getPerProc();									/* Get our per_proc */
-
-	if(!((sel ^ pp->pms.pmsCSetCmd) & pmsCPU)) return;	/* If there aren't any changes, bail now... */
-
-	nfreq = (sel & pmsCPU) >> 16;						/* Isolate the new frequency */
-	
-	switch(pp->pf.pfPowerModes & pmType) {				/* Figure out what type to do */
-	
-		case pmDFS:										/* This is a DFS machine */
-			ml_set_processor_speed_dfs(nfreq);			/* Yes, set it */
-			break;
-	
-		case pmDualPLL:
-			ml_set_processor_speed_dpll(nfreq);			/* THIS IS COMPLETELY UNTESTED!!! */
-			break;
-
-		case pmPowerTune:								/* This is a PowerTune machine */
-			ml_set_processor_speed_powertune(nfreq);	/* Diddle the deal */
-			break;
-			
-		default:										/* Not this time dolt!!! */
-			panic("pmsCPUSet: unsupported power manager type: %08X\n", pp->pf.pfPowerModes);
-			break;
-	
-	}
-	
-}
-
-/*
- *	This code configures the initial step tables.  It should be called after the timebase frequency is initialized.
- */
-
-void pmsCPUConf(void) {
-
-	int i;
-	kern_return_t ret;
-	pmsSetFunc_t pmsDfltFunc[pmsSetFuncMax];			/* List of functions for the external power control to use */
-
-	for(i = 0; i < pmsSetFuncMax; i++) pmsDfltFunc[i] = NULL;	/* Clear this */
-
-
-	ret = pmsBuild((pmsDef *)&pmsDefault, sizeof(pmsDefault), pmsDfltFunc, 0, (pmsQueryFunc_t)0);	/* Configure the default stepper */
-
-	if(ret != KERN_SUCCESS) {							/* Some screw up? */
-		panic("pmsCPUConf: initial stepper table build failed, ret = %08X\n", ret);	/* Squeal */
-	}
-	
-	pmsSetStep(pmsHigh, 1);								/* Slew to high speed */
-	pmsPark();											/* Then park */
-	return;
-}
-
-/*
- * Machine-dependent initialization
- */
-void
-pmsCPUMachineInit(void)
-{
-	return;
-}
-
-/*
- *	This function should be called once for each processor to force the
- *	processor to the correct voltage and frequency.
- */
- 
-void pmsCPUInit(void) {
-
-	int cpu;
-
-	cpu = cpu_number();									/* Who are we? */
-	
-	kprintf("************ Initializing stepper hardware, cpu %d ******************\n", cpu);	/* (BRINGUP) */
-	
-	pmsSetStep(pmsHigh, 1);								/* Slew to high speed */
-	pmsPark();											/* Then park */
-
-	kprintf("************ Stepper hardware initialized, cpu %d ******************\n", cpu);	/* (BRINGUP) */
-}
-
-extern uint32_t hid1get(void);
-
-uint32_t
-pmsCPUQuery(void)
-{
-	uint32_t result;
-	struct per_proc_info *pp;
-	uint64_t scdata;
-
-	pp = getPerProc();									/* Get our per_proc */
-
-	switch(pp->pf.pfPowerModes & pmType) {				/* Figure out what type to do */
-	
-		case pmDFS:										/* This is a DFS machine */
-			result = hid1get();							/* Get HID1 */
-			result = (result >> 6) & 0x00030000;		/* Isolate the DFS bits */
-			break;
-			
-		case pmPowerTune:								/* This is a PowerTune machine */		
-			(void)ml_scom_read(PowerTuneStatusReg, &scdata);	/* Get the current power level */
-			result = (scdata >> (32 + 8)) & 0x00030000;	/* Shift the data to align with the set command */
-			break;
-			
-		default:										/* Query not supported for this kind */
-			result = 0;									/* Return highest if not supported */
-			break;
-	
-	}
-
-	return result;
-}
-
-/*
- *	These are not implemented for PPC.
- */
-void pmsCPUYellowFlag(void) {
-}
-
-void pmsCPUGreenFlag(void) {
-}
-
-uint32_t pmsCPUPackageQuery(void)
-{
-    	/* multi-core CPUs are not supported. */
-    	return(~(uint32_t)0);
-}
-
-/*
- *	Broadcast a change to all processors including ourselves.
- *	This must transition before broadcasting because we may block and end up on a different processor.
- *
- *	This will block until all processors have transitioned, so
- *	obviously, this can block.
- *
- *	Called with interruptions disabled.
- *
- */
- 
-void pmsCPURun(uint32_t nstep) {
-
-	pmsRunLocal(nstep);								/* If we aren't parking (we are already parked), transition ourselves */
-	(void)cpu_broadcast(&pmsBroadcastWait, pmsCPURemote, nstep);	/* Tell everyone else to do it too */
-
-	return;
-	
-}
-
-/*
- *	Receive a broadcast and react.
- *	This is called from the interprocessor signal handler.
- *	We wake up the initiator after we are finished.
- *
- */
-	
-static void pmsCPURemote(uint32_t nstep) {
-
-	pmsRunLocal(nstep);								/* Go set the step */
-	if(!hw_atomic_sub(&pmsBroadcastWait, 1)) {		/* Drop the wait count */
-		thread_wakeup((event_t)&pmsBroadcastWait);	/* If we were the last, wake up the signaller */
-	}
-	return;
-}	
-
-/*
- *	Control the Power Management Stepper.
- *	Called from user state by the superuser via a ppc system call.
- *	Interruptions disabled.
- *
- */
-int pmsCntrl(struct savearea *save) {
-	save->save_r3 = pmsControl(save->save_r3, (user_addr_t)(uintptr_t)save->save_r4, save->save_r5);
-	return 1;
-}
-
-
-
diff --git a/osfmk/ppc/ppc_disasm.i b/osfmk/ppc/ppc_disasm.i
deleted file mode 100644
index 688f81bbc..000000000
--- a/osfmk/ppc/ppc_disasm.i
+++ /dev/null
@@ -1,234 +0,0 @@
-# @OSF_COPYRIGHT@
-# 
-
-# ppc.i - PowerPC instructions
-#    ,
-# By Eamonn McManus <emcmanus@gr.osf.org>, 1995.
-
-# simplified mnemonics
-# ori 0,0,0 
-in 01100000000000000000000000000000 nop
-# addi[s] rD,0,value
-in 00111sddddd00000iiiiiiiiiiiiiiii li{|s}[$s] \
-				    $reg($d),{$simm16($i)|$shifted16($i)}[$s]
-# or rA,rS,rS
-in 011111dddddaaaaabbbbb0110111100r {or{|.}[$r] $reg($a),$reg($b),$reg($d)|\
-				     mr{|.}[$r] $reg($a),$reg($d)}[$b == $d]
-in 011111dddddaaaaabbbbb0100111100r xor{|.}[$r] $reg($a),$reg($b),$reg($d)
-
-# mtcrf 0xFF,rS
-in 011111ddddd011111111000100100000 mtcr $reg($d)
-
-in 00001Dcccccaaaaaiiiiiiiiiiiiiiii t{d|w}[$D]$tcond($c)i $reg($a),$simm16($i)
-in 000111dddddaaaaaiiiiiiiiiiiiiiii mulli $reg($d),$reg($a),$simm16($i)
-in 001000dddddaaaaaiiiiiiiiiiiiiiii subfic $reg($d),$reg($a),$simm16($i)
-in 00101Uddd0laaaaaiiiiiiiiiiiiiiii cmp{l|}[$U]i \
-				    $crcom($d){|1,}[$l]$reg($a),$simm16($i)
-in 00110rdddddaaaaaiiiiiiiiiiiiiiii addic{|.}[$r] $reg($d),$reg0($a),$simm16($i)
-in 00111sdddddaaaaaiiiiiiiiiiiiiiii addi{|s}[$s] $reg($d),$reg0($a),\
-				    {$simm16($i)|$shifted16($i)}[$s]
-in 010000cccccccccciiiiiiiiiiiiiial $br($c,$a,$l,,1)\
-				    {$brdispl($i,14)|$brabs($i)}[$a]
-in 01000100000000000000000000000010 sc
-in 010010iiiiiiiiiiiiiiiiiiiiiiiial b{|l}[$l]{|a}[$a] \
-				    {$brdispl($i,24)|$brabs($i)}[$a]
-in 010011ddd00sss000000000000000000 mcrf $crf($d),$crf($s)
-in 010011cccccccccc000000000010000l $br($c,0,$l,lr,0)
-in 010011dddddaaaaabbbbb0oooo000010 cr$crop($o) $crb($d),$crb($a),$crb($b)
-in 01001100000000000000000001100100 rfi
-in 01001100000000000000000000100100 rfid
-in 01001100000000000000001000100100 hrfid
-in 01001100000000000000000100101100 isync
-in 010011cccccccccc000001000010000l $br($c,0,$l,ctr,0)
-in 010111dddddaaaaabbbbbffffftttttr rlwnm{|.}[$r] \
-				    $reg($a),$reg($d),$reg($b),$dec($f),$dec($t)
-in 0101xxdddddaaaaasssssffffftttttr rl{wimi|winm|?|?}[$x]{|.}[$r] \
-				    $reg($a),$reg($d),$dec($s),$dec($f),$dec($t)
-in 011110dddddaaaaasssssffffff0xxSr rld{icl|icr|ic|imi}[$x]{|.}[$r] \
-				    $reg($a),$reg($d),$dec($[Ssssss]),$dec($f)
-in 011110dddddaaaaabbbbbffffff100xr rldc{l|r}[$x]{|.}[$r] \
-				    $reg($a),$reg($d),$reg($b),$dec($f)
-in 011111ddd0laaaaabbbbb0000u000000 cmp{|l}[$u] \
-				    $crcom($d){|1,}[$l]$reg($a),$reg($b)
-in 011111cccccaaaaabbbbb000w0001000 t{w|d}[$w]$tcond($c) $reg($a),$reg($b)
-in 011111dddddaaaaabbbbbo000C01000r subf{c|}[$C]{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a),$reg($b)
-in 011111dddddaaaaabbbbb000u0010w1r mulh{d|w}[$w]{u|}[$u]{|.}[$r] \
-				    $reg($d),$reg($a),$reg($b)
-in 011111dddddaaaaabbbbbott0001010r add{c|e||?}[$t]{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a),$reg($b)
-in 011111ddddd0000000000000m0100110 mf{cr|msr}[$m] $reg($d)
-in 011111ddddd0ffffffff000000100110 mfcr $hex($f),$reg($d)
-in 011111dddddaaaaabbbbb000w0101000 l{w|d}[$w]arx $reg($d),$reg0($a),$reg($b)
-in 011111dddddaaaaabbbbb0000u101010 ld{|u}[$u]x $reg($d),$reg0($a),$reg($b)
-in 011111dddddaaaaabbbbb0ooou101110 $ldst($o){|u}[$u]x \
-				    $reg($d),$reg($a),$reg($b)
-in 011111dddddaaaaabbbbb0000011A00r {slw|and}[$A]{|.}[$r] \
-				    $reg($a),$reg($d),$reg($b)
-in 011111dddddaaaaa000000000w11010r cntlz{w|d}[$w]{|.}[$r] $reg($a),$reg($d)
-in 011111dddddaaaaabbbbb0000011011r sld{|.}[$r] $reg($a),$reg($d),$reg($b)
-in 01111100000aaaaabbbbb00001101100 dcbst $reg($a),$reg($b)
-in 011111dddddaaaaabbbbb0000111100r andc{|.}[$r] $reg($a),$reg($d),$reg($b)
-in 01111100000aaaaabbbbb00010101100 dcbf $reg($a),$reg($b)
-in 011111dddddaaaaa00000o001101000r neg{|o}[$o]{|.}[$r] $reg($d),$reg($a)
-in 011111dddddaaaaabbbbb0001111100r nor{|.}[$r] $reg($a),$reg($d),$reg($b)
-in 011111dddddaaaaabbbbbo01z001000r subf{|z}[$z]e{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a)
-in 011111ddddd0ffffffff000100100m00 mt{crf $hex($f),|msr}[$m] $reg($d)
-in 011111ddddd000000000000101100100 mtmsrd $reg($d)
-in 011111sssssaaaaabbbbb0010u101010 std{|u}[$u]x $reg($s),$reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb001w0101101 st{w|d}[$w]cx. $reg($s),$reg0($a),$reg($b)
-in 011111dddddaaaaa00000o011001010r addze{|o}[$o]{|.}[$r] $reg($d),$reg($a)
-in 011111sssss0rrrr0000000110100100 mtsr $dec($r),$reg($s)
-in 011111dddddaaaaa00000o0111010x0r {subf|add}[$x]me{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a)
-in 011111dddddaaaaabbbbbo0111010w1r mull{w|d}[$w]{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a),$reg($b)
-in 011111sssss00000bbbbb00111100100 mtsrin $reg($s),$reg($b)
-in 01111100000aaaaabbbbb00111101100 dcbtst $reg0($a),$reg($b)
-in 01111100000aaaaabbbbb01000101100 dcbt $reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb0100011100r eqv{|.}[$r] $reg($a),$reg($s),$reg($b)
-in 0111110000000000bbbbb01001100100 tlbie $reg($b)
-in 011111dddddaaaaabbbbb01i01101100 ec{i|o}[$i]wx $reg($d),$reg0($a),$reg($b)
-in 011111dddddrrrrrrrrrr01t10100110 m{f|t}[$t]spr $reg($d),$spr($r)
-in 011111dddddaaaaabbbbb0101u101010 lwa{|u}[$u]x $reg($d),$reg($a),$reg($b)
-in 01111100000000000000001011100100 tlbia
-in 011111dddddtttttttttt01011100110 mftb $reg($d),$dec($t)
-in 011111sssssaaaaabbbbb0110011100r orc{|.}[$r] $reg($a),$reg($s),$reg($b)
-in 0111110000000000bbbbb01101100100 slbie $reg($b)
-in 011111dddddaaaaabbbbbo111u010w1r div{d|w}[$w]{u|}[$u]{|o}[$o]{|.}[$r] \
-				    $reg($d),$reg($a),$reg($b)
-in 01111100000aaaaabbbbb01110101100 dcbi $reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb0111011100r nand{|.}[$r] $reg($a),$reg($s),$reg($b)
-in 01111100000000000000001111100100 slbia
-in 011111ddddd00000bbbbb01100100100 slbmte $reg($d),$reg($b)
-in 011111ddddd00000bbbbb11010100110 slbmfev $reg($d),$reg($b)
-in 011111ddddd00000bbbbb11100100110 slbmfee $reg($d),$reg($b)
-in 011111ddd00000000000010000000000 mcrxr $crf($d)
-in 011111dddddaaaaabbbbb10000101010 lswx $reg($d),$reg0($a),$reg($b)
-in 011111dddddaaaaabbbbb1w000101100 l{w|h}[$w]brx $reg($d),$reg0($a),$reg($b)
-in 011111dddddaaaaabbbbb100su101110 lf{s|d}[$s]{|u}[$u]x \
-				    $fr($d),$reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb1x000110w0r sr{|a}[$x]{w|d}[$w]{|.}[$r] \
-				    $reg($a),$reg($s),$reg($b)
-in 011111sssssaaaaabbbbb1000011011r srd{|.}[$r] $reg($a),$reg($s),$reg($b)
-in 01111100000000000000010001101100 tlbsync
-in 011111ddddd0rrrr0000010010101100 mfsr $reg($d),$dec($r)
-in 011111dddddaaaaannnnn10010101010 lswi $reg($d),$reg0($a),$dec($n)
-in 011111000ll000000000010010101100 {sync|?|ptesync|?}[$l]
-in 011111ddddd00000bbbbb10100100110 mfsrin $reg($d),$reg($b)
-in 011111sssssaaaaabbbbb10100101010 stswx $reg($s),$reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb1w100101100 st{w|h}[$w]brx $reg($s),$reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb101du101110 stf{s|d}[$d]{|u}[$u]x \
-				    $fr($s),{$reg0($a)|$reg($a)}[$u],$reg($b)
-in 011111sssssaaaaannnnn10110101010 stswi $reg($s),$reg0($a),$dec($n)
-in 011111dddddaaaaasssss1100111000r srawi{|.}[$r] $reg($a),$reg($d),$dec($s)
-in 011111dddddaaaaasssss110011101Sr sradi{|.}[$r] $reg($a),$reg($d),$dec($[Ssssss])
-in 01111100000000000000011010101100 eieio
-in 00000000000000000000001000000000 attn
-in 011111sssssaaaaa00000111xx11010r exts{h|b|w|?}[$x]{|.}[$r] $reg($a),$reg($s)
-in 01111100000aaaaabbbbb11110101100 icbi $reg0($a),$reg($b)
-in 011111sssssaaaaabbbbb11110101110 stfiwx $fr($s),$reg0($a),$reg($b)
-in 01111100000aaaaabbbbb11111101100 dcbz $reg0($a),$reg($b)
-in 011Axsaaaaadddddiiiiiiiiiiiiiiii {{|x}[$x]or|{and|?}[$x]}[$A]i{|s}[$s]\
-				    {|.}[$A] $reg($d),$reg($a),\
-				    {$hex($i)|$shifted16($i)}[$s]
-# Grouping andi with xori and ori may not be such a brilliant idea, since it
-# gets invoked as a catch-all for the 011111 instructions below.  But that
-# just means that we get a different sort of undefined instruction.
-in 10111sdddddaaaaaiiiiiiiiiiiiiiii {l|st}[$s]mw \
-				    $reg($d),$simm16($i)($reg0($a))
-in 10oooudddddaaaaaiiiiiiiiiiiiiiii $ldst($o){|u}[$u] \
-				    $reg($d),$simm16($i)($reg0($a))
-in 110sDudddddaaaaaiiiiiiiiiiiiiiii {l|st}[$s]f{s|d}[$D]{|u}[$u] \
-				    $fr($d),$simm16($i)($reg0($a))
-in 111010dddddaaaaaiiiiiiiiiiiiiixy l{d{|u}[$y]|{|?}[$y]w}[$x] \
-				    $reg($d),$simm16($i)($reg0($a))
-in 111s11dddddaaaaabbbbb0000010010r fdiv{s|}[$s]{|.}[$r] \
-				    $fr($d),$fr($a),$fr($b) 
-in 111s11dddddaaaaabbbbb000001010xr f{sub|add}[$x]{s|}[$s]{|.}[$r] \
-				    $fr($d),$fr($a),$fr($b) 
-in 111s11ddddd00000bbbbb0000010110r fsqrt{s|}[$s]{|.}[$r] $fr($d),$fr($b)
-in 111011ddddd00000bbbbb0000011000r fress{|.}[$r] $fr($d),$fr($b)
-in 111s11dddddaaaaa00000ccccc11001r fmul{s|}[$s]{|.}[$r] \
-				    $fr($d),$fr($a),$fr($c) 
-in 111s11dddddaaaaabbbbbccccc111nxr f{|n}[$n]m{sub|add}[$x]{s|}[$s]{|.}[$r] \
-				    $fr($d),$fr($a),$fr($c),$fr($b)
-in 111110sssssaaaaaiiiiiiiiiiiiii0u std{|u}[$u] \
-				    $reg($s),$simm16($i)({$reg0($a)|$reg($a)}[$u])
-in 111111ccc00aaaaabbbbb0000o000000 fcmp{u|o}[$o] $crf($c),$fr($a),$fr($b)
-in 111111ddddd00000bbbbb0000001100r frsp{|.}[$r] $fr($d),$fr($b)
-in 111111ddddd00000bbbbb000000111zr fctiw{|z}[$z]{|.}[$r] $fr($d),$fr($b)
-in 111111dddddaaaaabbbbbccccc10111r fsel{|.}[$r] \
-				    $fr($d),$fr($a),$fr($c),$fr($b)
-in 111111ddddd00000bbbbb0000011010r frsqrte{|,.}[$r] $fr($d),$fr($b)
-in 111111ddddd0000000000000xx00110r mtfsb{?|1|0|?}[$x]{|.}[$r] $fcond($d)
-in 111111ddddd00000bbbbb0000101000r fneg{|.}[$r] $fr($d),$fr($b)
-in 111111ddd00sss000000000010000000 mcrfs $crf($d),$crf($s)
-in 111111ddddd00000bbbbb0001001000r fmr{|.}[$r] $fr($d),$fr($b)
-in 111111ddd0000000iiii00010000110r mtfsfi{|.}[$r] $crf($d),$simm16($i)
-in 111111ddddd00000bbbbb0010001000r fnabs{|.}[$r] $fr($d),$fr($b)
-in 111111ddddd00000bbbbb0100001000r fabs{|.}[$r] $fr($d),$fr($b)
-in 111111ddddd00000000001001000111r mffs{|.}[$r] $fr($d)
-in 1111110ffffffff0bbbbb1011000111r mtfsf{|.}[$r] $hex($f),$fr($b)
-in 111111ddddd00000bbbbb110010111zr fctid{|z}[$z]{|.}[$r] $fr($d),$fr($b)
-in 111111ddddd00000bbbbb1101001110r fcfid{|.}[$r] $fr($d),$fr($b)
-
-in xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ?
-
-
-ldst ooo {lwz|lbz|stw|stb|lhz|lha|sth|?}[$o]
-br utdzyrrrcc(%a,%l,s,%C) b{d{nz|z}[$z]|{|?}[$z]}[$d]{c|}[$u]\
-			  {|l}[$l]{|a}[$a]$s \
-			  {$crcom($r)$cond($[cct]){|,}[$C]|}[$u]
-cond ccc {ge|lt|le|gt|ne|eq|ns|so}[$c]
-fcond ccc $hex($c)
-crb rrrcc $cr($r):$cond($[cc1])
-crop oooo {?|nor|?|?|andc|?|xor|nand|and|eqv|?|?|?|orc|or|?}[$o]
-tcond ccccc {?|lgt|llt|?|eq|lge|lle|?|?|?|?|?|ge|?|?|?|lt|?|?|?|le|?|?|?|ne|?|?|?|?|?|?|a}[$c]
-
-spr 0000000000 mq
-spr 0000100000 xer
-spr 0010l00000 rtc{u|l}[$l]
-spr s011000000 dec{u|s}[$s]
-spr 0100000000 lr
-spr 0100100000 ctr
-spr 1001000000 dsisr
-spr 1001100000 dar
-spr 1100100000 sdr1
-spr 1101n00000 srr$dec($n)
-spr 100nn01000 sprg$dec($n)
-spr 1101001000 ear
-spr 1101101000 pvr
-spr 10nnl10000 ibat$dec($n){u|l}[$l]
-spr 1000n11111 hid$dec($n)
-spr 1001011111 iabr
-spr 1010111111 dabr
-spr 1111111111 pir
-spr 0000110000 hspr0
-spr 0000110001 hspr1
-spr 0000110110 hdec0
-spr 0000111010 hsrr0
-spr 0000111011 hsrr1
-spr xxxxxxxxxx ?
-
-reg0 00000 0
-reg0 nnnnn $reg($n)
-
-reg (%n) r$dec($n)
-fr (%n) fr$dec($n)
-cr (%n) cr$dec($n)
-crf (%n) crf$dec($n)
-crcom 000
-crcom nnn $cr($n),
-
-simm16 snnnnnnnnnnnnnnn {$hex($n)|-$hex((1 << 15) - $n)}[$s]
-
-shifted16 (%n) $hex($n << 16)
-
-brabs (%n) $hex($n << 2)
-
-hex (%n) :
-dec (%n) :
-mbz (%n) :
-brdispl (%d,%n) :
diff --git a/osfmk/ppc/ppc_init.c b/osfmk/ppc/ppc_init.c
deleted file mode 100644
index 9be44aed8..000000000
--- a/osfmk/ppc/ppc_init.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <debug.h>
-#include <mach_ldebug.h>
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-
-#include <kern/misc_protos.h>
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <kern/startup.h>
-#include <machine/machine_routines.h>
-#include <ppc/boot.h>
-#include <ppc/proc_reg.h>
-#include <ppc/misc_protos.h>
-#include <ppc/pmap.h>
-#include <ppc/new_screen.h>
-#include <ppc/exception.h>
-#include <ppc/asm.h>
-#include <ppc/Firmware.h>
-#include <ppc/savearea.h>
-#include <ppc/low_trace.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/mem.h>
-#include <ppc/mappings.h>
-#include <ppc/locks.h>
-#include <kern/pms.h>
-#include <ppc/rtclock.h>
-
-#include <pexpert/pexpert.h>
-
-extern unsigned int mckFlags;
-extern vm_offset_t	intstack;
-extern vm_offset_t	debstack;  
-
-extern unsigned int extPatchMCK;
-extern unsigned int extPatch32;
-extern unsigned int hwulckPatch_isync;
-extern unsigned int hwulckPatch_eieio;
-extern unsigned int hwulckbPatch_isync;
-extern unsigned int hwulckbPatch_eieio;
-extern unsigned int mulckPatch_isync;
-extern unsigned int mulckPatch_eieio;
-extern unsigned int mulckePatch_isync;
-extern unsigned int mulckePatch_eieio;
-extern unsigned int sulckPatch_isync;
-extern unsigned int sulckPatch_eieio;
-extern unsigned int rwlesPatch_isync;
-extern unsigned int rwlesPatch_eieio;
-extern unsigned int rwldPatch_isync;
-extern unsigned int rwldPatch_eieio;
-extern unsigned int bcopy_nop_if_32bit;
-extern unsigned int bcopy_nc_nop_if_32bit;
-extern unsigned int memcpy_nop_if_32bit;
-extern unsigned int xsum_nop_if_32bit;
-extern unsigned int uft_nop_if_32bit;
-extern unsigned int uft_uaw_nop_if_32bit;
-extern unsigned int uft_cuttrace;
-
-int forcenap = 0;
-int wcte = 0;				/* Non-cache gather timer disabled */
-
-int debug_task;
-
-patch_entry_t patch_table[] = {
-	{&extPatch32,			0x60000000, PATCH_FEATURE,		PatchExt32},
-	{&extPatchMCK,			0x60000000, PATCH_PROCESSOR,	CPU_SUBTYPE_POWERPC_970},
-	{&hwulckPatch_isync,	0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&hwulckPatch_eieio,	0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&hwulckbPatch_isync,   0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&hwulckbPatch_eieio,   0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&mulckPatch_isync,		0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&mulckPatch_eieio,		0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&mulckePatch_isync,	0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&mulckePatch_eieio,	0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&sulckPatch_isync,		0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&sulckPatch_eieio,		0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&rwlesPatch_isync,		0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&rwlesPatch_eieio,		0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&rwldPatch_isync,		0x60000000, PATCH_FEATURE,		PatchLwsync},
-	{&rwldPatch_eieio,		0x7c2004ac, PATCH_FEATURE,		PatchLwsync},
-	{&bcopy_nop_if_32bit,	0x60000000, PATCH_FEATURE,		PatchExt32},
-	{&bcopy_nc_nop_if_32bit,0x60000000, PATCH_FEATURE,		PatchExt32},
-	{&memcpy_nop_if_32bit,	0x60000000, PATCH_FEATURE,		PatchExt32},
-	{&xsum_nop_if_32bit,	0x60000000,	PATCH_FEATURE,		PatchExt32},
-	{&uft_nop_if_32bit,		0x60000000,	PATCH_FEATURE,		PatchExt32},
-	{&uft_uaw_nop_if_32bit,	0x60000000,	PATCH_FEATURE,		PatchExt32},
-	{&uft_cuttrace,			0x60000000,	PATCH_FEATURE,		PatchExt32},
-    {NULL,                  0x00000000, PATCH_END_OF_TABLE, 0}
-	};
-
-
-/*
- * Forward definition
- */
-void	ppc_init(
-			boot_args	*args);
-
-void	ppc_init_cpu(
-			struct per_proc_info *proc_info);
-
-	
-/*
- *		Routine:		ppc_init
- *		Function:
- */
-void
-ppc_init(
-		boot_args *args)
-{
-	unsigned int		maxmem;
-	uint64_t			xmaxmem;
-	uint64_t			newhid;
-	unsigned int		cputrace;
-	unsigned int		novmx;
-	unsigned int		mcksoft;
-	thread_t			thread;
-	mapping_t			*mp;
-	uint64_t 			scdata;
-
-
-	/*
-	 * Setup per_proc info for first cpu.
-	 */
-
-	BootProcInfo.cpu_number = 0;
-	BootProcInfo.cpu_flags = 0;
-	BootProcInfo.istackptr = 0;							/* we're on the interrupt stack */
-	BootProcInfo.intstack_top_ss = (vm_offset_t)&intstack + INTSTACK_SIZE - FM_SIZE;
-	BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + kernel_stack_size - FM_SIZE;
-	BootProcInfo.debstackptr = BootProcInfo.debstack_top_ss;
-	BootProcInfo.interrupts_enabled = 0;
-	BootProcInfo.pending_ast = AST_NONE;
-	BootProcInfo.FPU_owner = NULL;
-	BootProcInfo.VMX_owner = NULL;
-	BootProcInfo.pp_cbfr = console_per_proc_alloc(TRUE);
-	BootProcInfo.rtcPop = EndOfAllTime;
-	queue_init(&BootProcInfo.rtclock_timer.queue);
-	BootProcInfo.rtclock_timer.deadline = EndOfAllTime;
-	BootProcInfo.pp2ndPage = (addr64_t)(uintptr_t)&BootProcInfo;	/* Initial physical address of the second page */
-
- 	BootProcInfo.pms.pmsStamp = 0;						/* Dummy transition time */
- 	BootProcInfo.pms.pmsPop = EndOfAllTime;				/* Set the pop way into the future */
- 	
- 	BootProcInfo.pms.pmsState = pmsParked;				/* Park the power stepper */
-	BootProcInfo.pms.pmsCSetCmd = pmsCInit;				/* Set dummy initial hardware state */
-	
-	mp = (mapping_t *)BootProcInfo.ppUMWmp;
-	mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1;
-	mp->mpSpace = invalSpace;
-
-	pmsInit();											/* Initialize the stepper */
-
-	thread_bootstrap();
-
-	thread = current_thread();
-	thread->machine.curctx = &thread->machine.facctx;
-	thread->machine.facctx.facAct = thread;
-	thread->machine.umwSpace = invalSpace;				/* Initialize user memory window space to invalid */
-	thread->machine.preemption_count = 1;
-
-	cpu_bootstrap();
-	cpu_init();
-
-	master_cpu = 0;
-	processor_bootstrap();
-
-	timer_start(&thread->system_timer, mach_absolute_time());
-	PROCESSOR_DATA(master_processor, kernel_timer) =
-				PROCESSOR_DATA(master_processor, thread_timer) = &thread->system_timer;
-
-	static_memory_end = round_page(args->topOfKernelData);;
-      
-	PE_init_platform(FALSE, args);						/* Get platform expert set up */
-
-	if (!PE_parse_boot_argn("novmx", &novmx, sizeof (novmx))) novmx=0;	/* Special run without VMX? */
-	if(novmx) {											/* Yeah, turn it off */
-		BootProcInfo.pf.Available &= ~pfAltivec;		/* Turn off Altivec available */
-		__asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available));	/* Set live value */
-	}
-
-	if (!PE_parse_boot_argn("fn", &forcenap, sizeof (forcenap))) forcenap = 0;	/* If force nap not set, make 0 */
-	else {
-		if(forcenap < 2) forcenap = forcenap + 1;		/* Else set 1 for off, 2 for on */
-		else forcenap = 0;								/* Clear for error case */
-	}
-	
-	if (!PE_parse_boot_argn("pmsx", &pmsExperimental, sizeof (pmsExperimental))) pmsExperimental = 0;	/* Check if we should start in experimental power management stepper mode */
-	if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts))) LcksOpts = 0;	/* Set lcks options */
-	if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) dgWork.dgFlags = 0;	/* Set diagnostic flags */
-	if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF;	/* If tracing requested, enable it */
-
-	if(PE_parse_boot_argn("ctrc", &cputrace, sizeof (cputrace))) {			/* See if tracing is limited to a specific cpu */
-		trcWork.traceMask = (trcWork.traceMask & 0xFFFFFFF0) | (cputrace & 0xF);	/* Limit to 4 */
-	}
-
-	if(!PE_parse_boot_argn("tb", &trcWork.traceSize, sizeof (trcWork.traceSize))) {	/* See if non-default trace buffer size */
-#if DEBUG
-		trcWork.traceSize = 32;							/* Default 32 page trace table for DEBUG */
-#else
-		trcWork.traceSize = 8;							/* Default 8 page trace table for RELEASE */
-#endif
-	}
-
-	if(trcWork.traceSize < 1) trcWork.traceSize = 1;	/* Minimum size of 1 page */
-	if(trcWork.traceSize > 256) trcWork.traceSize = 256;	/* Maximum size of 256 pages */
-	trcWork.traceSize = trcWork.traceSize * 4096;		/* Change page count to size */
-
-	if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
-		xmaxmem=0;
-	else
-		xmaxmem = (uint64_t)maxmem * (1024 * 1024);
-
-	if (!PE_parse_boot_argn("wcte", &wcte, sizeof (wcte))) wcte = 0;	/* If write combine timer enable not supplied, make 1 */
-	else wcte = (wcte != 0);							/* Force to 0 or 1 */
-
-	if (!PE_parse_boot_argn("mcklog", &mckFlags, sizeof (mckFlags))) mckFlags = 0;	/* If machine check flags not specified, clear */
-	else if(mckFlags > 1) mckFlags = 0;					/* If bogus, clear */
-    
-    if (!PE_parse_boot_argn("ht_shift", &hash_table_shift, sizeof (hash_table_shift)))  /* should we use a non-default hash table size? */
-        hash_table_shift = 0;                           /* no, use default size */
-
-	/*   
-	 * VM initialization, after this we're using page tables...
-	 */
-
-	ppc_vm_init(xmaxmem, args);
-	
-	if(BootProcInfo.pf.Available & pf64Bit) {			/* Are we on a 64-bit machine */
-		
-		if(!wcte) {
-			(void)ml_scom_read(GUSModeReg << 8, &scdata);	/* Get GUS mode register */
-			scdata = scdata | GUSMstgttoff;					/* Disable the NCU store gather timer */
-			(void)ml_scom_write(GUSModeReg << 8, scdata);	/* Get GUS mode register */
-		}
-		
-		if(PE_parse_boot_argn("mcksoft", &mcksoft, sizeof (mcksoft))) {	/* Have they supplied "machine check software recovery? */
-			newhid = BootProcInfo.pf.pfHID5;			/* Get the old HID5 */
-			if(mcksoft < 2) {
-				newhid &= 0xFFFFFFFFFFFFDFFFULL;		/* Clear the old one */
-				newhid |= (mcksoft & 1) << 13;			/* Set new value to enable machine check recovery */
-				BootProcInfo.pf.pfHID5 = newhid;		/* Set the new one */
-				hid5set64(newhid);						/* Set the hid for this processir */
-			}
-		}
-	}
-		
-	machine_startup();
-}
-
-/*
- *		Routine:		ppc_init_cpu
- *		Function:
- */
-void
-ppc_init_cpu(
-		struct per_proc_info	*proc_info) 
-{
-	uint64_t scdata;
-
-	proc_info->cpu_flags &= ~SleepState;
-
-	if((BootProcInfo.pf.Available & pf64Bit) && !wcte) {	/* Should we disable the store gather timer? */
-		(void)ml_scom_read(GUSModeReg << 8, &scdata);	/* Get GUS mode register */
-		scdata = scdata | GUSMstgttoff;					/* Disable the NCU store gather timer */
-		(void)ml_scom_write(GUSModeReg << 8, scdata);	/* Get GUS mode register */
-	}
-
-	cpu_init();
-	
-	slave_main(NULL);
-}
diff --git a/osfmk/ppc/ppc_vm_init.c b/osfmk/ppc/ppc_vm_init.c
deleted file mode 100644
index e94b6b545..000000000
--- a/osfmk/ppc/ppc_vm_init.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-#include <mach_debug.h>
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-#include <debug.h>
-
-#include <mach/vm_types.h>
-#include <mach/vm_param.h>
-#include <mach/thread_status.h>
-#include <kern/misc_protos.h>
-#include <kern/assert.h>
-#include <kern/cpu_number.h>
-#include <kern/thread.h>
-#include <console/serial_protos.h>
-
-#include <ppc/proc_reg.h>
-#include <ppc/Firmware.h>
-#include <ppc/boot.h>
-#include <ppc/misc_protos.h>
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/mappings.h>
-#include <ppc/exception.h>
-#include <ppc/lowglobals.h>
-#include <ppc/serial_io.h>
-
-#include <libkern/kernel_mach_header.h>
-
-extern const char version[];
-extern const char version_variant[];
-
-addr64_t hash_table_base;				/* Hash table base */
-unsigned int hash_table_size;			/* Hash table size */
-int         hash_table_shift;           /* "ht_shift" boot arg, used to scale hash_table_size */
-vm_offset_t taproot_addr;				/* (BRINGUP) */
-unsigned int taproot_size;				/* (BRINGUP) */
-extern int disableConsoleOutput;
-
-struct shadowBAT shadow_BAT;
-
-
-
-/*
- *	NOTE: mem_size is bogus on large memory machines.  We will pin it to 0x80000000 if there is more than 2 GB
- *	This is left only for compatibility and max_mem should be used.
- */
-vm_offset_t mem_size;					/* Size of actual physical memory present
-										   minus any performance buffer and possibly limited
-										   by mem_limit in bytes */
-uint64_t	mem_actual;					/* The "One True" physical memory size 
-						  				   actually, it's the highest physical address + 1 */
-uint64_t	max_mem;					/* Size of physical memory (bytes), adjusted by maxmem */
-uint64_t	sane_size;					/* Memory size to use for defaults calculations */
-						  
-
-mem_region_t pmap_mem_regions[PMAP_MEM_REGION_MAX + 1];
-unsigned int  pmap_mem_regions_count;		/* Assume no non-contiguous memory regions */
-
-unsigned int avail_remaining = 0;
-vm_offset_t first_avail;
-vm_offset_t static_memory_end;
-addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS;	/* Highest kernel virtual address known to the VM system */
-
-vm_offset_t sectTEXTB;
-unsigned long sectSizeTEXT;
-vm_offset_t sectDATAB;
-unsigned long sectSizeDATA;
-vm_offset_t sectLINKB;
-unsigned long sectSizeLINK;
-vm_offset_t sectKLDB;
-unsigned long sectSizeKLD;
-vm_offset_t sectPRELINKB;
-unsigned long sectSizePRELINK;
-vm_offset_t sectHIBB;
-unsigned long sectSizeHIB;
-
-vm_offset_t end, etext, edata;
-
-extern unsigned long exception_entry;
-extern unsigned long exception_end;
-
-
-void ppc_vm_init(uint64_t mem_limit, boot_args *args)
-{
-	unsigned int i, kmapsize, pvr;
-	vm_offset_t  addr;
-	unsigned int *xtaproot, bank_shift;
-	uint64_t	cbsize, xhid0;
-
-
-/*
- *	Invalidate all shadow BATs
- */
-
-	/* Initialize shadow IBATs */
-	shadow_BAT.IBATs[0].upper=BAT_INVALID;
-	shadow_BAT.IBATs[0].lower=BAT_INVALID;
-	shadow_BAT.IBATs[1].upper=BAT_INVALID;
-	shadow_BAT.IBATs[1].lower=BAT_INVALID;
-	shadow_BAT.IBATs[2].upper=BAT_INVALID;
-	shadow_BAT.IBATs[2].lower=BAT_INVALID;
-	shadow_BAT.IBATs[3].upper=BAT_INVALID;
-	shadow_BAT.IBATs[3].lower=BAT_INVALID;
-
-	/* Initialize shadow DBATs */
-	shadow_BAT.DBATs[0].upper=BAT_INVALID;
-	shadow_BAT.DBATs[0].lower=BAT_INVALID;
-	shadow_BAT.DBATs[1].upper=BAT_INVALID;
-	shadow_BAT.DBATs[1].lower=BAT_INVALID;
-	shadow_BAT.DBATs[2].upper=BAT_INVALID;
-	shadow_BAT.DBATs[2].lower=BAT_INVALID;
-	shadow_BAT.DBATs[3].upper=BAT_INVALID;
-	shadow_BAT.DBATs[3].lower=BAT_INVALID;
-
-
-	/*
-	 * Go through the list of memory regions passed in via the boot_args
-	 * and copy valid entries into the pmap_mem_regions table, adding
-	 * further calculated entries.
-	 *
-	 * boot_args version 1 has address instead of page numbers
-	 * in the PhysicalDRAM banks, set bank_shift accordingly.
-	 */
-	
-	bank_shift = 0;
-	if (args->Version == kBootArgsVersion1) bank_shift = 12;
-	
-	pmap_mem_regions_count = 0;
-	max_mem = 0;   															/* Will use to total memory found so far */
-	mem_actual = 0;															/* Actual size of memory */
-	
-	if (mem_limit == 0) mem_limit = 0xFFFFFFFFFFFFFFFFULL;					/* If there is no set limit, use all */
-	
-	for (i = 0; i < kMaxDRAMBanks; i++) {									/* Look at all of the banks */
-		
-		cbsize = (uint64_t)args->PhysicalDRAM[i].size << (12 - bank_shift);	/* Remember current size */
-		
-		if (!cbsize) continue;												/* Skip if the bank is empty */
-		
-		mem_actual = mem_actual + cbsize;									/* Get true memory size */
-
-		if(mem_limit == 0) continue;										/* If we hit restriction, just keep counting */
-
-		if (cbsize > mem_limit) cbsize = mem_limit;							/* Trim to max allowed */
-		max_mem += cbsize;													/* Total up what we have so far */
-		mem_limit = mem_limit - cbsize;										/* Calculate amount left to do */
-		
-		pmap_mem_regions[pmap_mem_regions_count].mrStart  = args->PhysicalDRAM[i].base >> bank_shift;	/* Set the start of the bank */
-		pmap_mem_regions[pmap_mem_regions_count].mrAStart = pmap_mem_regions[pmap_mem_regions_count].mrStart;		/* Set the start of allocatable area */
-		pmap_mem_regions[pmap_mem_regions_count].mrEnd    = ((uint64_t)args->PhysicalDRAM[i].base >> bank_shift) + (cbsize >> 12) - 1;	/* Set the end address of bank */
-		pmap_mem_regions[pmap_mem_regions_count].mrAEnd   = pmap_mem_regions[pmap_mem_regions_count].mrEnd;	/* Set the end address of allocatable area */
-
-		/* Regions must be provided in ascending order */
-		assert ((pmap_mem_regions_count == 0) ||
-			pmap_mem_regions[pmap_mem_regions_count].mrStart >
-			pmap_mem_regions[pmap_mem_regions_count-1].mrStart);
-
-		pmap_mem_regions_count++;											/* Count this region */
-	}
-
-	mem_size = (unsigned int)max_mem;										/* Get size of memory */
-	if(max_mem > 0x0000000080000000ULL) mem_size = 0x80000000;				/* Pin at 2 GB */
-
-	sane_size = max_mem;													/* Calculate a sane value to use for init */
-	if(sane_size > (addr64_t)(VM_MAX_KERNEL_ADDRESS + 1)) 
-		sane_size = (addr64_t)(VM_MAX_KERNEL_ADDRESS + 1);					/* If flush with ram, use addressible portion */
-
-
-/* 
- * Initialize the pmap system, using space above `first_avail'
- * for the necessary data structures.
- * NOTE : assume that we'll have enough space mapped in already
- */
-
-	first_avail = static_memory_end;
-
-	/*
-	 * Now retrieve addresses for end, edata, and etext 
-	 * from MACH-O headers for the currently running 32 bit kernel.
-	 */
-	/* XXX fix double casts for 64 bit kernel */
-	sectTEXTB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__TEXT", &sectSizeTEXT);
-	sectDATAB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__DATA", &sectSizeDATA);
-	sectLINKB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__LINKEDIT", &sectSizeLINK);
-	sectKLDB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__KLD", &sectSizeKLD);
-	sectHIBB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__HIB", &sectSizeHIB);
-	sectPRELINKB = (vm_offset_t)(uint32_t *)getsegdatafromheader(
-		&_mh_execute_header, "__PRELINK_TEXT", &sectSizePRELINK);
-
-	etext = (vm_offset_t) sectTEXTB + sectSizeTEXT;
-	edata = (vm_offset_t) sectDATAB + sectSizeDATA;
-	end = round_page(getlastaddr());					/* Force end to next page */
-	
-	kmapsize = (round_page(exception_end) - trunc_page(exception_entry)) +	/* Get size we will map later */
-		(round_page(sectTEXTB+sectSizeTEXT) - trunc_page(sectTEXTB)) +
-		(round_page(sectDATAB+sectSizeDATA) - trunc_page(sectDATAB)) +
-		(round_page(sectLINKB+sectSizeLINK) - trunc_page(sectLINKB)) +
-		(round_page(sectKLDB+sectSizeKLD) - trunc_page(sectKLDB)) +
-		(round_page_32(sectKLDB+sectSizeHIB) - trunc_page_32(sectHIBB)) +
-		(round_page(sectPRELINKB+sectSizePRELINK) - trunc_page(sectPRELINKB)) +
-		(round_page(static_memory_end) - trunc_page(end));
-
-	pmap_bootstrap(max_mem, &first_avail, kmapsize);
-
-	pmap_map(trunc_page(exception_entry), trunc_page(exception_entry), 
-		round_page(exception_end), VM_PROT_READ|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT);
-
-	pmap_map(trunc_page(sectTEXTB), trunc_page(sectTEXTB), 
-		round_page(sectTEXTB+sectSizeTEXT), VM_PROT_READ|VM_PROT_EXECUTE, VM_WIMG_USE_DEFAULT);
-
-	pmap_map(trunc_page(sectDATAB), trunc_page(sectDATAB), 
-		round_page(sectDATAB+sectSizeDATA), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT);
-
-/* The KLD and LINKEDIT segments are unloaded in toto after boot completes,
-* but via ml_static_mfree(), through IODTFreeLoaderInfo(). Hence, we have
-* to map both segments page-by-page.
-*/
-	
-	for (addr = trunc_page(sectPRELINKB);
-             addr < round_page(sectPRELINKB+sectSizePRELINK);
-             addr += PAGE_SIZE) {
-
-            pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), 
-			VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 
-			VM_WIMG_USE_DEFAULT, TRUE);
-
-	}
-
-	for (addr = trunc_page(sectKLDB);
-             addr < round_page(sectKLDB+sectSizeKLD);
-             addr += PAGE_SIZE) {
-
-            pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), 
-			VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 
-			VM_WIMG_USE_DEFAULT, TRUE);
-
-	}
-
-	for (addr = trunc_page(sectLINKB);
-             addr < round_page(sectLINKB+sectSizeLINK);
-             addr += PAGE_SIZE) {
-
-           pmap_enter(kernel_pmap, (vm_map_offset_t)addr,
-			(ppnum_t)(addr>>12), 
-			VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 
-			VM_WIMG_USE_DEFAULT, TRUE);
-
-	}
-
-	for (addr = trunc_page_32(sectHIBB);
-             addr < round_page_32(sectHIBB+sectSizeHIB);
-             addr += PAGE_SIZE) {
-
-            pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), 
-			VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 
-			VM_WIMG_USE_DEFAULT, TRUE);
-
-	}
-
-	pmap_enter(kernel_pmap, (vm_map_offset_t)(uintptr_t)&sharedPage,
-		(ppnum_t)&sharedPage >> 12, /* Make sure the sharedPage is mapped */
-		VM_PROT_READ|VM_PROT_WRITE, 
-		VM_WIMG_USE_DEFAULT, TRUE);
-
-	pmap_enter(kernel_pmap, (vm_map_offset_t)(uintptr_t)&lowGlo.lgVerCode,
-		(ppnum_t)&lowGlo.lgVerCode >> 12,	/* Make sure the low memory globals are mapped */
-		VM_PROT_READ|VM_PROT_WRITE, 
-		VM_WIMG_USE_DEFAULT, TRUE);
-		
-/*
- *	We need to map the remainder page-by-page because some of this will
- *	be released later, but not all.  Ergo, no block mapping here 
- */
-
-	for(addr = trunc_page(end); addr < round_page(static_memory_end); addr += PAGE_SIZE) {
-
-		pmap_enter(kernel_pmap, (vm_map_address_t)addr, (ppnum_t)addr>>12, 
-			VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 
-			VM_WIMG_USE_DEFAULT, TRUE);
-
-	}
-	
-/*
- *	Here we map a window into the kernel address space that will be used to
- *  access a slice of a user address space. Clients for this service include
- *  copyin/out and copypv.
- */
-
-	lowGlo.lgUMWvaddr = USER_MEM_WINDOW_VADDR;
-										/* Initialize user memory window base address */
-	MapUserMemoryWindowInit();			/* Go initialize user memory window */
-
-/*
- *	At this point, there is enough mapped memory and all hw mapping structures are
- *	allocated and initialized.  Here is where we turn on translation for the
- *	VERY first time....
- *
- *	NOTE: Here is where our very first interruption will happen.
- *
- */
-
-	hw_start_trans();					/* Start translating */
-	PE_init_platform(TRUE, args);		/* Initialize this right off the bat */
-
-
-#if 0
-	GratefulDebInit((bootBumbleC *)&(args->Video));	/* Initialize the GratefulDeb debugger */
-#endif
-
-
-	printf_init();						/* Init this in case we need debugger */
-	panic_init();						/* Init this in case we need debugger */
-	PE_init_kprintf(TRUE);				/* Note on PPC we only call this after VM is set up */
-
-	kprintf("kprintf initialized\n");
-
-	serialmode = 0;						/* Assume normal keyboard and console */
-	if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) {		/* Do we want a serial keyboard and/or console? */
-		kprintf("Serial mode specified: %08X\n", serialmode);
-	}
-	if(serialmode & 1) {				/* Start serial if requested */
-		(void)switch_to_serial_console();	/* Switch into serial mode */
-		disableConsoleOutput = FALSE;	/* Allow printfs to happen */
-	}
-	
-	kprintf("max_mem: %ld M\n", (unsigned long)(max_mem >> 20));
-	kprintf("version_variant = %s\n", version_variant);
-	kprintf("version         = %s\n\n", version);
-	__asm__ ("mfpvr %0" : "=r" (pvr));
-	kprintf("proc version    = %08x\n", pvr);
-	if(getPerProc()->pf.Available & pf64Bit) {	/* 64-bit processor? */
-		xhid0 = hid0get64();			/* Get the hid0 */
-		if(xhid0 & (1ULL << (63 - 19))) kprintf("Time base is externally clocked\n");
-		else kprintf("Time base is internally clocked\n");
-	}
-
-
-	taproot_size = PE_init_taproot(&taproot_addr);	/* (BRINGUP) See if there is a taproot */
-	if(taproot_size) {					/* (BRINGUP) */
-		kprintf("TapRoot card configured to use vaddr = %08X, size = %08X\n", taproot_addr, taproot_size);
-		bcopy_nc(version, (void *)(taproot_addr + 16), strlen(version));	/* (BRINGUP) Pass it our kernel version */
-		__asm__ volatile("eieio");		/* (BRINGUP) */
-		xtaproot = (unsigned int *)taproot_addr;	/* (BRINGUP) */
-		xtaproot[0] = 1;				/* (BRINGUP) */
-		__asm__ volatile("eieio");		/* (BRINGUP) */
-	}
-
-	PE_create_console();				/* create the console for verbose or pretty mode */
-
-	/* setup console output */
-	PE_init_printf(FALSE);
-
-#if DEBUG
-	printf("\n\n\nThis program was compiled using gcc %d.%d for powerpc\n",
-	       __GNUC__,__GNUC_MINOR__);
-
-
-	/* Processor version information */
-	__asm__ ("mfpvr %0" : "=r" (pvr));
-	printf("processor version register : %08X\n", pvr);
-
-	kprintf("Args at %p\n", args);
-	for (i = 0; i < pmap_mem_regions_count; i++) {
-			printf("DRAM at %08lX size %08lX\n",
-			       args->PhysicalDRAM[i].base,
-			       args->PhysicalDRAM[i].size);
-	}
-#endif /* DEBUG */
-
-#if DEBUG
-	kprintf("Mapped memory:\n");
-	kprintf("   exception vector: %08X, %08X - %08X\n", trunc_page(exception_entry), 
-		trunc_page(exception_entry), round_page(exception_end));
-	kprintf("          sectTEXTB: %08X, %08X - %08X\n", trunc_page(sectTEXTB), 
-		trunc_page(sectTEXTB), round_page(sectTEXTB+sectSizeTEXT));
-	kprintf("          sectDATAB: %08X, %08X - %08X\n", trunc_page(sectDATAB), 
-		trunc_page(sectDATAB), round_page(sectDATAB+sectSizeDATA));
-	kprintf("          sectLINKB: %08X, %08X - %08X\n", trunc_page(sectLINKB), 
-		trunc_page(sectLINKB), round_page(sectLINKB+sectSizeLINK));
-	kprintf("           sectKLDB: %08X, %08X - %08X\n", trunc_page(sectKLDB), 
-		trunc_page(sectKLDB), round_page(sectKLDB+sectSizeKLD));
-	kprintf("               end: %08X, %08X - %08X\n", trunc_page(end), 
-		trunc_page(end), static_memory_end);
-
-#endif
-
-	return;
-}
-
diff --git a/osfmk/ppc/proc_reg.h b/osfmk/ppc/proc_reg.h
deleted file mode 100644
index 6fb49c613..000000000
--- a/osfmk/ppc/proc_reg.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _PPC_PROC_REG_H_
-#define _PPC_PROC_REG_H_
-
-#include <mach/boolean.h>
-
-/* Define some useful masks that convert from bit numbers */
-
-#if __PPC__
-#ifdef __BIG_ENDIAN__
-#ifndef ENDIAN_MASK
-#define ENDIAN_MASK(val,size) (1 << ((size-1) - val))
-#endif
-#else
-#error code not ported to little endian targets yet
-#endif /* __BIG_ENDIAN__ */
-#endif /* __PPC__ */
-
-#define MASK32(PART)	ENDIAN_MASK(PART ## _BIT, 32)
-#define MASK16(PART)	ENDIAN_MASK(PART ## _BIT, 16)
-#define MASK8(PART)	ENDIAN_MASK(PART ## _BIT, 8)
-
-#undef MASK
-#define MASK(PART)	MASK32(PART)
-
-#define BITS_PER_WORD	32
-#define BITS_PER_WORD_POW2 5
-
-/* Defines for decoding the MSR bits */
-
-#define MSR_SF_BIT		0
-#define MSR_HV_BIT		3
-#define MSR_RES1_BIT	1
-#define MSR_RES2_BIT	2
-#define MSR_RES3_BIT	3
-#define MSR_RES4_BIT	4
-#define MSR_RES5_BIT	5
-#define MSR_VEC_BIT		6
-#define MSR_RES7_BIT	7
-#define MSR_RES8_BIT	8
-#define MSR_RES9_BIT	9
-#define MSR_RES10_BIT	10
-#define MSR_RES11_BIT	11
-#define MSR_KEY_BIT	12	/* Key bit on 603e (not on 603) */
-#define	MSR_POW_BIT	13
-#define MSR_TGPR_BIT	14	/* Temporary GPR mappings on 603/603e */
-#define MSR_ILE_BIT	15
-#define	MSR_EE_BIT	16
-#define	MSR_PR_BIT	17
-#define MSR_FP_BIT	18
-#define MSR_ME_BIT	19
-#define MSR_FE0_BIT	20
-#define MSR_SE_BIT	21
-#define	MSR_BE_BIT	22
-#define MSR_FE1_BIT	23
-#define MSR_RES24_BIT	24	/* AL bit in power architectures */
-#define MSR_IP_BIT      25
-#define MSR_IR_BIT      26
-#define MSR_DR_BIT      27
-#define MSR_RES28_BIT	28
-#define MSR_PM_BIT	29
-#define	MSR_RI_BIT	30
-#define MSR_LE_BIT	31
-
-/* MSR for kernel mode, interrupts disabled, running in virtual mode */
-#define MSR_SUPERVISOR_INT_OFF (MASK(MSR_ME) | MASK(MSR_IR) | MASK(MSR_DR))  
-
-/* MSR for above but with interrupts enabled */
-#define MSR_SUPERVISOR_INT_ON (MSR_SUPERVISOR_INT_OFF | MASK(MSR_EE))
-
-/* MSR for physical mode code */
-#define MSR_VM_OFF     (MASK(MSR_ME))
-
-/* MSR for physical instruction, virtual data */
-#define MSR_PHYS_INST_VIRT_DATA     (MASK(MSR_ME) | MASK(MSR_IR))
-
-/* MSR mask for user-exported bits - identify bits that must be set/reset */
-
-/* SET - external exceptions, machine check, vm on, user-level privs */
-#define MSR_EXPORT_MASK_SET	(MASK(MSR_EE)| MASK(MSR_ME)| \
-				 MASK(MSR_IR)|MASK(MSR_DR)|MASK(MSR_PR))
-
-/* only the following bits may be changed by a task */
-#define MSR_IMPORT_BITS (MASK(MSR_FE0)|MASK(MSR_SE)|MASK(MSR_BE)| \
-			 MASK(MSR_FE1)| MASK(MSR_PM) | MASK(MSR_LE))
-
-#define MSR_PREPARE_FOR_IMPORT(origmsr, newmsr) \
-	((origmsr & ~MSR_IMPORT_BITS) | (newmsr & MSR_IMPORT_BITS))
-
-#define MSR_VEC_ON	(MASK(MSR_VEC))
-
-#define USER_MODE(msr) (msr & MASK(MSR_PR) ? TRUE : FALSE)
-
-/* seg reg values must be simple expressions so that assembler can cope */
-#define SEG_REG_INVALID 0x0000
-#define KERNEL_SEG_REG0_VALUE 0x20000000 /* T=0,Ks=0,Ku=1 PPC_SID_KERNEL=0*/
-
-/* For SEG_REG_PROT we have T=0, Ks=0, Ku=1 */
-#define SEG_REG_PROT	0x20000000   /* seg regs should have these bits set */
-
-/* SR_COPYIN is used for copyin/copyout+remapping and must be
- * saved and restored in the thread context.
- */
-/* SR_UNUSED_BY_KERN is unused by the kernel, and thus contains
- * the space ID of the currently interrupted user task immediately
- * after an exception and before interrupts are reenabled. It's used
- * purely for an assert.
- */
-
-/* SR_KERNEL used for asserts... */
-
-#define SR_COPYIN	sr14
-#define SR_UNUSED_BY_KERN sr13
-#define SR_KERNEL 	sr0
-
-#define SR_UNUSED_BY_KERN_NUM 13
-#define SR_COPYIN_NAME	sr14
-#define SR_COPYIN_NUM	14
-#define BAT_INVALID 0
-
-
-/* DSISR bits on data access exceptions */
-
-#define DSISR_IO_BIT		0	/* NOT USED on 601 */
-#define DSISR_HASH_BIT		1
-#define DSISR_NOEX_BIT		3
-#define DSISR_PROT_BIT		4
-#define DSISR_IO_SPC_BIT	5
-#define DSISR_WRITE_BIT		6
-#define DSISR_WATCH_BIT		9
-#define DSISR_EIO_BIT		11
-
-#define dsiMiss 			0x40000000
-#define dsiMissb 			1
-#define dsiNoEx				0x10000000
-#define dsiProt				0x08000000
-#define dsiInvMode			0x04000000
-#define dsiStore			0x02000000
-#define dsiAC				0x00400000
-#define dsiSeg				0x00200000
-#define dsiValid			0x5E600000
-#define dsiLinkage			0x00010000	/* Linkage mapping type - software flag */
-#define dsiLinkageb			15			/* Linkage mapping type - software flag */
-#define dsiSoftware			0x0000FFFF
-
-/* SRR1 bits on data/instruction translation exceptions */
-
-#define SRR1_TRANS_HASH_BIT	1
-#define SRR1_TRANS_IO_BIT	3
-#define SRR1_TRANS_PROT_BIT	4
-#define SRR1_TRANS_NO_PTE_BIT	10
-
-/* SRR1 bits on program exceptions */
-
-#define SRR1_PRG_FE_BIT		11
-#define SRR1_PRG_ILL_INS_BIT	12
-#define SRR1_PRG_PRV_INS_BIT	13
-#define SRR1_PRG_TRAP_BIT	14
-
-/*
- * Virtual to physical mapping macros/structures.
- * IMPORTANT NOTE: there is one mapping per HW page, not per MACH page.
- */
-
-#define PTE1_WIMG_GUARD_BIT	28	/* Needed for assembler */
-#define PTE1_REFERENCED_BIT	23	/* ditto */
-#define PTE1_CHANGED_BIT	24
-#define PTE0_HASH_ID_BIT	25
-
-#define PTE_WIMG_CB_CACHED_COHERENT		0 	/* cached, writeback, coherent (default) */
-#define PTE_WIMG_CB_CACHED_COHERENT_GUARDED	1 	/* cached, writeback, coherent, guarded */
-#define PTE_WIMG_UNCACHED_COHERENT		2	/* uncached, coherentt */
-#define PTE_WIMG_UNCACHED_COHERENT_GUARDED	3	/* uncached, coherent, guarded */
-
-#define PTE_WIMG_DEFAULT 	PTE_WIMG_CB_CACHED_COHERENT
-#define PTE_WIMG_IO		PTE_WIMG_UNCACHED_COHERENT_GUARDED
-
-
-
-#ifndef ASSEMBLER
-#ifdef __GNUC__
-
-/* Structures and types for machine registers */
-
-
-/*
- * C-helper inline functions for accessing machine registers follow.
- */
-
-
-/*
- * Various memory/IO synchronisation instructions
- */
-
-        /*	Use eieio as a memory barrier to order stores.
-         *	Useful for device control and PTE maintenance.
-         */
-
-#define eieio() \
-        __asm__ volatile("eieio")
-
-        /* 	Use sync to ensure previous stores have completed.
-        	This is  required when manipulating locks and/or
-        	maintaining PTEs or other shared structures on SMP 
-        	machines.
-        */
-
-#define sync() \
-        __asm__ volatile("sync")
-
-        /*	Use isync to sychronize context; that is, the ensure
-        	no prefetching of instructions happen before the
-        	instruction.
-        */
-
-#define isync() \
-        __asm__ volatile("isync")
-
-
-/*
- * Access to various system registers
- */
-
-extern unsigned int mflr(void);
-
-extern __inline__ unsigned int mflr(void)
-{
-        unsigned int result;
-        __asm__ volatile("mflr %0" : "=r" (result));
-        return result;
-}
-
-extern unsigned int mfpvr(void);
-
-extern __inline__ unsigned int mfpvr(void)
-{
-        unsigned int result;
-        __asm__ ("mfpvr %0" : "=r" (result));
-        return result;
-}
-
-/* mtmsr might need syncs etc around it, don't provide simple
- * inline macro
- */
-
-extern unsigned int mfmsr(void);
-
-extern __inline__ unsigned int mfmsr(void)
-{
-        unsigned int result;
-        __asm__ volatile("mfmsr %0" : "=r" (result));
-        return result;
-}
-
-
-extern unsigned int mfdar(void);
-
-extern __inline__ unsigned int mfdar(void)
-{
-        unsigned int result;
-        __asm__ volatile("mfdar %0" : "=r" (result));
-        return result;
-}
-
-extern void mtdec(unsigned int val);
-
-extern __inline__ void mtdec(unsigned int val)
-{
-        __asm__ volatile("mtdec %0" : : "r" (val));
-        return;
-}
-
-extern void mttb(unsigned int val);
-
-extern __inline__ void mttb(unsigned int val)
-{
-        __asm__ volatile("mtspr tbl, %0" : : "r" (val));
-        return;
-}
-
-extern unsigned int mftb(void);
-
-extern __inline__ unsigned int mftb(void)
-{
-        unsigned int result;
-        __asm__ volatile("mftb %0" : "=r" (result));
-        return result;
-}
-
-extern void mttbu(unsigned int val);
-
-extern __inline__ void mttbu(unsigned int val)
-{
-        __asm__ volatile("mtspr tbu, %0" : : "r" (val));
-        return;
-}
-
-extern unsigned int mftbu(void);
-
-extern __inline__ unsigned int mftbu(void)
-{
-        unsigned int result;
-        __asm__ volatile("mftbu %0" : "=r" (result));
-        return result;
-}
-
-extern unsigned int mfl2cr(void);
-
-extern __inline__ unsigned int mfl2cr(void)
-{
-  unsigned int result;
-  __asm__ volatile("mfspr %0, l2cr" : "=r" (result));
-  return result;
-}
-
-extern unsigned int cntlzw(unsigned int num);
-
-extern __inline__ unsigned int cntlzw(unsigned int num)
-{
-  unsigned int result;
-  __asm__ volatile("cntlzw %0, %1" : "=r" (result) : "r" (num));
-  return result;
-}
-
-
-/* functions for doing byte reversed loads and stores */
-
-extern unsigned int lwbrx(unsigned int addr);
-
-extern __inline__ unsigned int lwbrx(unsigned int addr)
-{
-  unsigned int result;
-  __asm__ volatile("lwbrx %0, 0, %1" : "=r" (result) : "r" (addr));
-  return result;
-}
-
-extern void stwbrx(unsigned int data, unsigned int addr);
-
-extern __inline__ void stwbrx(unsigned int data, unsigned int addr)
-{
-  __asm__ volatile("stwbrx %0, 0, %1" : : "r" (data), "r" (addr));
-}
-
-/* Performance Monitor Register access routines */
-extern unsigned long   mfmmcr0(void);
-extern void                    mtmmcr0(unsigned long);
-extern unsigned long   mfmmcr1(void);
-extern void                    mtmmcr1(unsigned long);
-extern unsigned long   mfmmcr2(void);
-extern void                    mtmmcr2(unsigned long);
-extern unsigned long   mfpmc1(void);
-extern void                    mtpmc1(unsigned long);
-extern unsigned long   mfpmc2(void);
-extern void                    mtpmc2(unsigned long);
-extern unsigned long   mfpmc3(void);
-extern void                    mtpmc3(unsigned long);
-extern unsigned long   mfpmc4(void);
-extern void                    mtpmc4(unsigned long);
-extern unsigned long   mfsia(void);
-extern unsigned long   mfsda(void);
-
-/* macros since the argument n is a hard-coded constant */
-
-#define mtsprg(n, reg)  __asm__ volatile("mtsprg  " # n ", %0" : : "r" (reg))
-#define mfsprg(reg, n)  __asm__ volatile("mfsprg  %0, " # n : "=r" (reg))
-
-#define mtspr(spr, val)  __asm__ volatile("mtspr  " # spr ", %0" : : "r" (val))
-#define mfspr(reg, spr)  __asm__ volatile("mfspr  %0, " # spr : "=r" (reg))
-
-#endif /* __GNUC__ */
-#endif /* !ASSEMBLER */
-
-#endif /* _PPC_PROC_REG_H_ */
diff --git a/osfmk/ppc/rtclock.c b/osfmk/ppc/rtclock.c
deleted file mode 100644
index 7c1222bd0..000000000
--- a/osfmk/ppc/rtclock.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/*
- *	File:		rtclock.c
- *	Purpose:	Routines for handling the machine dependent
- *				real-time clock.
- */
-
-#include <mach/mach_types.h>
-
-#include <kern/clock.h>
-#include <kern/thread.h>
-#include <kern/processor.h>
-#include <kern/macro_help.h>
-#include <kern/spl.h>
-#include <kern/pms.h>
-
-#include <machine/commpage.h>
-#include <machine/machine_routines.h>
-#include <ppc/exception.h>
-#include <ppc/proc_reg.h>
-#include <ppc/rtclock.h>
-
-#include <sys/kdebug.h>
-
-int		rtclock_config(void);
-
-int		rtclock_init(void);
-
-#define NSEC_PER_HZ		(NSEC_PER_SEC / 100)
-
-static uint32_t			rtclock_sec_divisor;
-
-static mach_timebase_info_data_t	rtclock_timebase_const;
-
-static boolean_t		rtclock_timebase_initialized;
-
-decl_simple_lock_data(static,rtclock_lock)
-
-/*
- *	Macros to lock/unlock real-time clock device.
- */
-#define LOCK_RTC(s)					\
-MACRO_BEGIN							\
-	(s) = splclock();				\
-	simple_lock(&rtclock_lock);		\
-MACRO_END
-
-#define UNLOCK_RTC(s)				\
-MACRO_BEGIN							\
-	simple_unlock(&rtclock_lock);	\
-	splx(s);						\
-MACRO_END
-
-static void
-timebase_callback(
-	struct timebase_freq_t	*freq)
-{
-	uint32_t	numer, denom;
-	spl_t		s;
-
-	if (	freq->timebase_den < 1 || freq->timebase_den > 4	||
-			freq->timebase_num < freq->timebase_den				)			
-		panic("rtclock timebase_callback: invalid constant %lu / %lu",
-					freq->timebase_num, freq->timebase_den);
-
-	denom = freq->timebase_num;
-	numer = freq->timebase_den * NSEC_PER_SEC;
-
-	LOCK_RTC(s);
-	if (!rtclock_timebase_initialized) {
-		commpage_set_timestamp(0,0,0);
-
-		rtclock_timebase_const.numer = numer;
-		rtclock_timebase_const.denom = denom;
-		rtclock_sec_divisor = freq->timebase_num / freq->timebase_den;
-
-		ml_init_lock_timeout();
-	}
-	else {
-		UNLOCK_RTC(s);
-		printf("rtclock timebase_callback: late old %d / %d new %d / %d\n",
-					rtclock_timebase_const.numer, rtclock_timebase_const.denom,
-							numer, denom);
-		return;
-	}
-	UNLOCK_RTC(s);
-
-	clock_timebase_init();
-}
-
-/*
- * Configure the system clock device.
- */
-int
-rtclock_config(void)
-{
-	simple_lock_init(&rtclock_lock, 0);
-
-	PE_register_timebase_callback(timebase_callback);
-
-	return (1);
-}
-
-/*
- * Initialize the system clock device.
- */
-int
-rtclock_init(void)
-{
-	etimer_resync_deadlines();			/* Start the timers going */
-
-	return (1);
-}
-
-void
-clock_get_system_microtime(
-	uint32_t			*secs,
-	uint32_t			*microsecs)
-{
-	uint64_t	now, t64;
-	uint32_t	divisor;
-
-	now = mach_absolute_time();
-
-	*secs = t64 = now / (divisor = rtclock_sec_divisor);
-	now -= (t64 * divisor);
-	*microsecs = (now * USEC_PER_SEC) / divisor;
-}
-
-void
-clock_get_system_nanotime(
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
-{
-	uint64_t	now, t64;
-	uint32_t	divisor;
-
-	now = mach_absolute_time();
-
-	*secs = t64 = now / (divisor = rtclock_sec_divisor);
-	now -= (t64 * divisor);
-	*nanosecs = (now * NSEC_PER_SEC) / divisor;
-}
-
-void
-clock_gettimeofday_set_commpage(
-	uint64_t				abstime,
-	uint64_t				epoch,
-	uint64_t				offset,
-	uint32_t				*secs,
-	uint32_t				*microsecs)
-{
-	uint64_t				t64, now = abstime;
-
-	simple_lock(&rtclock_lock);
-
-	now += offset;
-
-	*secs = t64 = now / rtclock_sec_divisor;
-	now -= (t64 * rtclock_sec_divisor);
-	*microsecs = (now * USEC_PER_SEC) / rtclock_sec_divisor;
-
-	*secs += epoch;
-
-	commpage_set_timestamp(abstime - now, *secs, rtclock_sec_divisor);
-
-	simple_unlock(&rtclock_lock);
-}
-
-void
-clock_timebase_info(
-	mach_timebase_info_t	info)
-{
-	spl_t		s;
-
-	LOCK_RTC(s);
-	*info = rtclock_timebase_const;
-	rtclock_timebase_initialized = TRUE;
-	UNLOCK_RTC(s);
-}	
-
-void
-clock_interval_to_absolutetime_interval(
-	uint32_t			interval,
-	uint32_t			scale_factor,
-	uint64_t			*result)
-{
-	uint64_t		nanosecs = (uint64_t)interval * scale_factor;
-	uint64_t		t64;
-	uint32_t		divisor;
-
-	*result = (t64 = nanosecs / NSEC_PER_SEC) *
-							(divisor = rtclock_sec_divisor);
-	nanosecs -= (t64 * NSEC_PER_SEC);
-	*result += (nanosecs * divisor) / NSEC_PER_SEC;
-}
-
-void
-absolutetime_to_microtime(
-	uint64_t			abstime,
-	uint32_t			*secs,
-	uint32_t			*microsecs)
-{
-	uint64_t	t64;
-	uint32_t	divisor;
-
-	*secs = t64 = abstime / (divisor = rtclock_sec_divisor);
-	abstime -= (t64 * divisor);
-	*microsecs = (abstime * USEC_PER_SEC) / divisor;
-}
-
-void
-absolutetime_to_nanotime(
-	uint64_t			abstime,
-	uint32_t			*secs,
-	uint32_t			*nanosecs)
-{
-	uint64_t	t64;
-	uint32_t	divisor;
-
-	*secs = t64 = abstime / (divisor = rtclock_sec_divisor);
-	abstime -= (t64 * divisor);
-	*nanosecs = (abstime * NSEC_PER_SEC) / divisor;
-}
-
-void
-nanotime_to_absolutetime(
-	uint32_t			secs,
-	uint32_t			nanosecs,
-	uint64_t			*result)
-{
-	uint32_t	divisor = rtclock_sec_divisor;
-
-	*result = ((uint64_t)secs * divisor) +
-				((uint64_t)nanosecs * divisor) / NSEC_PER_SEC;
-}
-
-void
-absolutetime_to_nanoseconds(
-	uint64_t			abstime,
-	uint64_t			*result)
-{
-	uint64_t		t64;
-	uint32_t		divisor;
-
-	*result = (t64 = abstime / (divisor = rtclock_sec_divisor)) * NSEC_PER_SEC;
-	abstime -= (t64 * divisor);
-	*result += (abstime * NSEC_PER_SEC) / divisor;
-}
-
-void
-nanoseconds_to_absolutetime(
-	uint64_t			nanosecs,
-	uint64_t			*result)
-{
-	uint64_t		t64;
-	uint32_t		divisor;
-
-	*result = (t64 = nanosecs / NSEC_PER_SEC) *
-							(divisor = rtclock_sec_divisor);
-	nanosecs -= (t64 * NSEC_PER_SEC);
-	*result += (nanosecs * divisor) / NSEC_PER_SEC;
-}
-
-void
-machine_delay_until(
-	uint64_t		deadline)
-{
-	uint64_t		now;
-
-	do {
-		now = mach_absolute_time();
-	} while (now < deadline);
-}
diff --git a/osfmk/ppc/savearea.c b/osfmk/ppc/savearea.c
deleted file mode 100644
index 0e95c5ff1..000000000
--- a/osfmk/ppc/savearea.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *	This file is used to maintain the exception save areas
- *
- */
-
-#include <debug.h>
-#include <mach_kgdb.h>
-#include <mach_vm_debug.h>
-
-#include <kern/thread.h>
-#include <mach/vm_attributes.h>
-#include <mach/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_map.h>
-#include <vm/vm_page.h>
-#include <mach/ppc/thread_status.h>
-#include <kern/spl.h>
-#include <kern/simple_lock.h>
-
-#include <kern/misc_protos.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/mem.h>
-#include <ppc/pmap.h>
-#include <ppc/Firmware.h>
-#include <ppc/mappings.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-#include <ddb/db_output.h>
-
-
-struct Saveanchor backpocket;									/* Emergency saveareas */
-unsigned int	debsave0 = 0;									/* Debug flag */
-unsigned int	backchain = 0;									/* Debug flag */
-
-/*
- *		These routines keep track of exception save areas and keeps the count within specific limits.  If there are
- *		too few, more are allocated, too many, and they are released. This savearea is where the PCBs are
- *		stored.  They never span a page boundary and are referenced by both virtual and real addresses.
- *		Within the interrupt vectors, the real address is used because at that level, no exceptions
- *		can be tolerated.  Save areas can be dynamic or permanent.  Permanant saveareas are allocated
- *		at boot time and must be in place before any type of exception occurs.  These are never released,
- *		and the number is based upon some arbitrary (yet to be determined) amount times the number of
- *		processors.  This represents the minimum number required to process a total system failure without
- *		destroying valuable and ever-so-handy system debugging information.
- *
- *		We keep two global free lists (the savearea free pool and the savearea free list) and one local
- *		list per processor.
- *
- *		The local lists are small and require no locked access.  They are chained using physical addresses
- *		and no interruptions are allowed when adding to or removing from the list. Also known as the 
- *		qfret list. This list is local to a processor and is intended for use only by very low level
- *		context handling code. 
- *
- *		The savearea free list is a medium size list that is globally accessible.  It is updated
- *		while holding a simple lock. The length of time that the lock is held is kept short.  The
- *		longest period of time is when the list is trimmed. Like the qfret lists, this is chained physically
- *		and must be accessed with translation and interruptions disabled. This is where the bulk
- *		of the free entries are located.
- *
- *		The saveareas are allocated from full pages.  A pool element is marked
- *		with an allocation map that shows which "slots" are free.  These pages are allocated via the
- *		normal kernel memory allocation functions. Queueing is with physical addresses.  The enqueue,
- *		dequeue, and search for free blocks is done under free list lock.  
- *		only if there are empty slots in it.
- *
- *		Saveareas that are counted as "in use" once they are removed from the savearea free list.
- *		This means that all areas on the local qfret list are considered in use.
- *
- *		There are two methods of obtaining a savearea.  The save_get function (which is also inlined
- *		in the low-level exception handler) attempts to get an area from the local qfret list.  This is
- *		done completely without locks.  If qfret is exahusted (or maybe just too low) an area is allocated
- *		from the savearea free list. If the free list is empty, we install the back pocket areas and
- *		panic.
- *
- *		The save_alloc function is designed to be called by high level routines, e.g., thread creation,
- *		etc.  It will allocate from the free list.  After allocation, it will compare the free count
- *		to the target value.  If outside of the range, it will adjust the size either upwards or
- *		downwards.
- *
- *		If we need to shrink the list, it will be trimmed to the target size and unlocked.  The code
- *		will walk the chain and return each savearea to its pool page.  If a pool page becomes
- *		completely empty, it is dequeued from the free pool list and enqueued (atomic queue
- *		function) to be released.
- *
- *		Once the trim list is finished, the pool release queue is checked to see if there are pages
- *		waiting to be released. If so, they are released one at a time.
- *
- *		If the free list needed to be grown rather than shrunken, we will first attempt to recover
- *		a page from the pending release queue (built when we trim the free list).  If we find one,
- *		it is allocated, otherwise, a page of kernel memory is allocated.  This loops until there are
- *		enough free saveareas.
- *		
- */
-
-
-
-/*
- *		Allocate our initial context save areas.  As soon as we do this,
- *		we can take an interrupt. We do the saveareas here, 'cause they're guaranteed
- *		to be at least page aligned.
- *
- *		Note: these initial saveareas are all to be allocated from V=R, less than 4GB
- *		space.
- */
-
-
-void savearea_init(vm_offset_t addr) {
-
-	savearea_comm	*savec;
-	vm_offset_t	save;
-	unsigned int i;
-
-	
-	saveanchor.savetarget	= InitialSaveTarget;		/* Initial target value */
-	saveanchor.saveinuse	= 0;						/* Number of areas in use */
-
-	saveanchor.savefree    = 0;							/* Remember the start of the free chain */
-	saveanchor.savefreecnt = 0;							/* Remember the length */
-	saveanchor.savepoolfwd = (addr64_t)(uintptr_t)&saveanchor;		/* Remember pool forward */
-	saveanchor.savepoolbwd = (addr64_t)(uintptr_t)&saveanchor;		/* Remember pool backward */
-
-	save = 	addr;										/* Point to the whole block of blocks */	
-
-/*
- *	First we allocate the back pocket in case of emergencies
- */
-
-
-	for(i=0; i < BackPocketSaveBloks; i++) {			/* Initialize the back pocket saveareas */
-
-		savec = (savearea_comm *)save;					/* Get the control area for this one */
-
-		savec->sac_alloc = 0;							/* Mark it allocated */
-		savec->sac_vrswap = 0;							/* V=R, so the translation factor is 0 */
-		savec->sac_flags = sac_perm;					/* Mark it permanent */
-		savec->sac_flags |= 0x0000EE00;					/* Debug eyecatcher */
-		save_queue((uint32_t)savec >> 12);				/* Add page to savearea lists */
-		save += PAGE_SIZE;								/* Jump up to the next one now */
-	
-	}
-
-	backpocket = saveanchor;							/* Save this for emergencies */
-
-
-/*
- *	We've saved away the back pocket savearea info, so reset it all and
- *	now allocate for real
- */
-
-
-	saveanchor.savefree = 0;							/* Remember the start of the free chain */
-	saveanchor.savefreecnt = 0;							/* Remember the length */
-	saveanchor.saveadjust = 0;							/* Set none needed yet */
-	saveanchor.savepoolfwd = (addr64_t)(uintptr_t)&saveanchor;		/* Remember pool forward */
-	saveanchor.savepoolbwd = (addr64_t)(uintptr_t)&saveanchor;		/* Remember pool backward */
-
-	for(i=0; i < InitialSaveBloks; i++) {				/* Initialize the saveareas */
-
-		savec = (savearea_comm *)save;					/* Get the control area for this one */
-
-		savec->sac_alloc = 0;							/* Mark it allocated */
-		savec->sac_vrswap = 0;							/* V=R, so the translation factor is 0 */
-		savec->sac_flags = sac_perm;					/* Mark it permanent */
-		savec->sac_flags |= 0x0000EE00;					/* Debug eyecatcher */
-		save_queue((uint32_t)savec >> 12);				/* Add page to savearea lists */
-		save += PAGE_SIZE;								/* Jump up to the next one now */
-	
-	}
-
-/*
- *	We now have a free list that has our initial number of entries  
- *	The local qfret lists is empty.  When we call save_get below it will see that
- *	the local list is empty and fill it for us.
- *
- *	It is ok to call save_get here because all initial saveareas are V=R in less
- *  than 4GB space, so 32-bit addressing is ok.
- *
- */
-
-/*
- * This will populate the local list  and get the first one for the system
- */ 	
-	/* XXX next_savearea should be a void * 4425541 */
-	getPerProc()->next_savearea = (unsigned long)(void *)save_get();
-
-/*
- *	The system is now able to take interruptions
- */
-}
-
-
-
-
-/*
- *		Obtains a savearea.  If the free list needs size adjustment it happens here.
- *		Don't actually allocate the savearea until after the adjustment is done.
- */
-
-struct savearea	*save_alloc(void) {						/* Reserve a save area */
-	
-	
-	if(saveanchor.saveadjust) save_adjust();			/* If size need adjustment, do it now */
-	
-	return save_get();									/* Pass the baby... */
-}
-
-
-/*
- * This routine releases a save area to the free queue.  If after that,
- * we have more than our maximum target, we start releasing what we can
- * until we hit the normal target. 
- */
-
-void
-save_release(struct savearea *save)
-{
-	/* Return a savearea to the free list */
-	save_ret(save);
-
-	/* Adjust the savearea free list and pool size if needed */
-	if(saveanchor.saveadjust)
-		save_adjust(); 
-}
-
-/*
- *		Adjusts the size of the free list.  Can either release or allocate full pages
- *		of kernel memory.  This can block.
- *
- *		Note that we will only run one adjustment and the amount needed may change
- *		while we are executing.
- *
- *		Calling this routine is triggered by saveanchor.saveadjust.  This value is always calculated just before
- *		we unlock the saveanchor lock (this keeps it pretty accurate).  If the total of savefreecnt and saveinuse
- *		is within the hysteresis range, it is set to 0.  If outside, it is set to the number needed to bring
- *		the total to the target value.  Note that there is a minimum size to the free list (FreeListMin) and if
- *		savefreecnt falls below that, saveadjust is set to the number needed to bring it to that.
- */
-
-
-void save_adjust(void) {
-	
-	savearea_comm	*sctl, *sctlnext, *freepage;
-	kern_return_t ret;
-	ppnum_t physpage;
-
-	if(saveanchor.saveadjust < 0) 					{	/* Do we need to adjust down? */
-			
-		sctl = (savearea_comm *)save_trim_free();		/* Trim list to the need count, return start of trim list */
-				
-		while(sctl) {									/* Release the free pages back to the kernel */
-			sctlnext = CAST_DOWN(savearea_comm *, sctl->save_prev);	/* Get next in list */  
-			kmem_free(kernel_map, (vm_offset_t) sctl, PAGE_SIZE);	/* Release the page */
-			sctl = sctlnext;							/* Chain onwards */
-		}
-	}
-	else {												/* We need more... */
-
-		if(save_recover()) return;						/* If we can recover enough from the pool, return */
-		
-		while(saveanchor.saveadjust > 0) {				/* Keep going until we have enough */
-
-			ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&freepage, PAGE_SIZE);	/* Get a page for free pool */
-			if(ret != KERN_SUCCESS) {					/* Did we get some memory? */
-				panic("Whoops...  Not a bit of wired memory left for saveareas\n");
-			}
-			
-			physpage = pmap_find_phys(kernel_pmap, (vm_offset_t)freepage);	/* Find physical page */
-			if(!physpage) {								/* See if we actually have this mapped*/
-				panic("save_adjust: wired page not mapped - va = %p\n", freepage);	/* Die */
-			}
-			
-			bzero((void *)freepage, PAGE_SIZE);			/* Clear it all to zeros */
-			freepage->sac_alloc = 0;					/* Mark all entries taken */
-			freepage->sac_vrswap = ((uint64_t)physpage << 12) ^ (uint64_t)((uintptr_t)freepage);	/* XOR to calculate conversion mask */
-	
-			freepage->sac_flags |= 0x0000EE00;			/* Set debug eyecatcher */
-						
-			save_queue(physpage);						/* Add all saveareas on page to free list */
-		}
-	}
-}
-
-/*
- *		Fake up information to make the saveareas look like a zone
- */
-void
-save_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
-		    vm_size_t *alloc_size, int *collectable, int *exhaustable)
-{
-	*count      = saveanchor.saveinuse;
-	*cur_size   = (saveanchor.savefreecnt + saveanchor.saveinuse) * (PAGE_SIZE / sac_cnt);
-	*max_size   = saveanchor.savemaxcount * (PAGE_SIZE / sac_cnt);
-	*elem_size  = sizeof(struct savearea);
-	*alloc_size = PAGE_SIZE;
-	*collectable = 1;
-	*exhaustable = 0;
-}
-
-
diff --git a/osfmk/ppc/savearea.h b/osfmk/ppc/savearea.h
deleted file mode 100644
index 496591cb6..000000000
--- a/osfmk/ppc/savearea.h
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifdef	XNU_KERNEL_PRIVATE
-
-#ifndef _PPC_SAVEAREA_H_
-#define _PPC_SAVEAREA_H_
-
-#ifndef ASSEMBLER
-
-#include <sys/appleapiopts.h>
-
-#ifdef __APPLE_API_PRIVATE
-
-#if	defined(MACH_KERNEL_PRIVATE) || defined(BSD_KERNEL_PRIVATE)
-#include <stdint.h>
-#include <mach/vm_types.h>
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct savearea_comm {
-
-/*
- *	The following fields are common to all saveareas and are used to manage individual
- *	contexts.
- *	
- *	Fields that start with "save" are part of the individual saveareas.  Those that
- *	start with "sac" pertain to the free pool stuff and are valid only on the first slot
- *	in the page.
- */
-
-
-/*	Keep the save_prev, sac_next, and sac_prev in these positions, some assembler code depends upon it to
- *	match up with fields in saveanchor.
- */
-                                                /* offset 0x000 */
-	addr64_t		save_prev;					/* The address of the previous (or next) savearea */
-	addr64_t		sac_next;					/* Points to next savearea page that has a free slot  - real */
-	addr64_t		sac_prev;					/* Points to previous savearea page that has a free slot  - real */
-	unsigned int	save_level;					/* Context ID */
-	unsigned int	save_01C;
-
-												/*	 0x20 */
-	unsigned int	save_time[2];				/* Context save time - for debugging or performance */
-	struct thread	*save_act;					/* Associated thread */
-    unsigned int	save_02c;
-	uint64_t		sac_vrswap;					/* XOR mask to swap V to R or vice versa */
-	unsigned int	save_flags;					/* Various flags */
-	unsigned int	sac_flags;					/* Various flags */
-    
-                                                /* offset 0x040 */
-	uint64_t		save_misc0;					/* Various stuff */
-	uint64_t		save_misc1;					/* Various stuff - snapshot chain during hibernation */
-	unsigned int	sac_alloc;					/* Bitmap of allocated slots */
-    unsigned int	save_054;
-    unsigned int	save_misc2;
-    unsigned int	save_misc3;
-
-												/* offset 0x0060 */
-} savearea_comm;
-#pragma pack()
-
-/*
- *	This type of savearea contains all of the general context.
- */
- 
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct savearea {
-
-	savearea_comm	save_hdr;					/* Stuff common to all saveareas */
-
-	uint64_t		save_xdat0;					/* Exception data 0 */
-	uint64_t		save_xdat1;					/* Exception data 1 */
-	uint64_t		save_xdat2;					/* Exception data 2 */
-	uint64_t		save_xdat3;					/* Exception data 3 */
-                                                /* offset 0x0080 */
-	uint64_t	 	save_r0;
-	uint64_t	 	save_r1;
-	uint64_t	 	save_r2;
-	uint64_t	 	save_r3;
-                                                /* offset 0x0A0 */
-	uint64_t	 	save_r4;
-	uint64_t	 	save_r5;
-	uint64_t	 	save_r6;
-	uint64_t	 	save_r7;
-                                                /* offset 0x0C0 */
-	uint64_t	 	save_r8;
-	uint64_t	 	save_r9;
-	uint64_t	 	save_r10;
-	uint64_t	 	save_r11;
-                                                /* offset 0x0E0 */
-	uint64_t	 	save_r12;
-	uint64_t	 	save_r13;
-	uint64_t	 	save_r14;
-	uint64_t	 	save_r15;
-                                                /* offset 0x100 */
-	uint64_t	 	save_r16;
-	uint64_t	 	save_r17;
-	uint64_t	 	save_r18;
-	uint64_t	 	save_r19;
-                                                /* offset 0x120 */
-	uint64_t	 	save_r20;
-	uint64_t	 	save_r21;
-	uint64_t	 	save_r22;
-	uint64_t	 	save_r23;
-                                                /* offset 0x140 */
-	uint64_t	 	save_r24;
-	uint64_t	 	save_r25;
-	uint64_t	 	save_r26;	
-	uint64_t	 	save_r27;
-                                                /* offset 0x160 */
-	uint64_t	 	save_r28;
-	uint64_t		save_r29;
-	uint64_t	 	save_r30;
-	uint64_t	 	save_r31;
-                                                /* offset 0x180 */
-	uint64_t	 	save_srr0;
- 	uint64_t	 	save_srr1;
-	uint64_t	 	save_xer;
-	uint64_t	 	save_lr;
-                                                /* offset 0x1A0 */
-	uint64_t	 	save_ctr;
-	uint64_t	 	save_dar;
-	unsigned int	save_cr;
-	unsigned int 	save_dsisr;
-	unsigned int	save_exception; 
-	unsigned int	save_vrsave;
-                                                /* offset 0x1C0 */
-	unsigned int	save_vscr[4];
-	unsigned int	save_fpscrpad;
-	unsigned int	save_fpscr;
-    unsigned int	save_1d8[2];
-                                                /* offset 0x1E0 */
-	unsigned int	save_1E0[8];
-                                                /* offset 0x200 - keep on 128 byte bndry */
-    uint32_t        save_pmc[8]; 
-    uint64_t        save_mmcr0;					/* offset 0x220 */
-    uint64_t        save_mmcr1;
-    uint64_t        save_mmcr2;
-
-	unsigned int	save_238[2];
-												/* offset 0x240 */
-	unsigned int	save_instr[16];				/* Instrumentation or emulation. Note: save_instr[0] is number of instructions */
-												/* offset 0x280 */
-} savearea_t;
-#pragma pack()
-
-
-/*
- *	This type of savearea contains all of the floating point context.
- */
- 
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct savearea_fpu {
-
-	savearea_comm	save_hdr;					/* Stuff common to all saveareas */
-
-	unsigned int	save_060[8];				/* Fill 32 bytes */
-												/* offset 0x0080 */
-	double			save_fp0;
-	double			save_fp1;
-	double			save_fp2;
-	double			save_fp3;
-
-	double			save_fp4;
-	double			save_fp5;
-	double			save_fp6;
-	double			save_fp7;
-
-	double			save_fp8;
-	double			save_fp9;
-	double			save_fp10;
-	double			save_fp11;
-	
-	double			save_fp12;
-	double			save_fp13;
-	double			save_fp14;
-	double			save_fp15;
-	
-	double			save_fp16;
-	double			save_fp17;
-	double			save_fp18;
-	double			save_fp19;
-
-	double			save_fp20;
-	double			save_fp21;
-	double			save_fp22;
-	double			save_fp23;
-	
-	double			save_fp24;
-	double			save_fp25;
-	double			save_fp26;
-	double			save_fp27;
-	
-	double			save_fp28;
-	double			save_fp29;
-	double			save_fp30;
-	double			save_fp31;
-												/* offset 0x180 */
-	unsigned int	save_180[8];
-	unsigned int	save_1A0[8];
-	unsigned int	save_1C0[8];
-	unsigned int	save_1E0[8];
-	unsigned int	save_200[8];
-	unsigned int	save_220[8];
-	unsigned int	save_240[8];
-	unsigned int	save_260[8];
-
-												/* offset 0x280 */
-} savearea_fpu;
-#pragma pack()
-
-	
-
-/*
- *	This type of savearea contains all of the vector context.
- */
- 
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct savearea_vec {
-
-	savearea_comm	save_hdr;					/* Stuff common to all saveareas */
-
-	unsigned int	save_060[7];				/* Fill 32 bytes */
-	unsigned int	save_vrvalid;				/* Valid registers in saved context */
-
-												/* offset 0x0080 */
-	unsigned int	save_vr0[4];
-	unsigned int	save_vr1[4];
-	unsigned int	save_vr2[4];
-	unsigned int	save_vr3[4];
-	unsigned int	save_vr4[4];
-	unsigned int	save_vr5[4];
-	unsigned int	save_vr6[4];
-	unsigned int	save_vr7[4];
-	unsigned int	save_vr8[4];
-	unsigned int	save_vr9[4];
-	unsigned int	save_vr10[4];
-	unsigned int	save_vr11[4];
-	unsigned int	save_vr12[4];
-	unsigned int	save_vr13[4];
-	unsigned int	save_vr14[4];
-	unsigned int	save_vr15[4];
-	unsigned int	save_vr16[4];
-	unsigned int	save_vr17[4];
-	unsigned int	save_vr18[4];
-	unsigned int	save_vr19[4];
-	unsigned int	save_vr20[4];
-	unsigned int	save_vr21[4];
-	unsigned int	save_vr22[4];
-	unsigned int	save_vr23[4];
-	unsigned int	save_vr24[4];
-	unsigned int	save_vr25[4];
-	unsigned int	save_vr26[4];
-	unsigned int	save_vr27[4];
-	unsigned int	save_vr28[4];
-	unsigned int	save_vr29[4];
-	unsigned int	save_vr30[4];
-	unsigned int	save_vr31[4];
-
-												/* offset 0x280 */
-} savearea_vec;
-#pragma pack()
-#endif /* MACH_KERNEL_PRIVATE || BSD_KERNEL_PRIVATE */
-
-#ifdef	MACH_KERNEL_PRIVATE
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-struct Saveanchor {
-
-/*	
- *	Note that this force aligned in aligned_data.s and must be in V=R storage.
- *	Also, all addresses in chains are physical.  This structure can only be 
- *	updated with translation and interrupts disabled. This is because it is 
- *	locked during exception processing and if we were to take a PTE miss while the
- *	lock were held, well, that would be very bad now wouldn't it? 
- *  Note that the first 24 bytes must be the same format as a savearea header.
- */
-
-	unsigned int			savelock;		/* 000 Lock word for savearea free list manipulation */
-    int						saveRSVD4;		/* 004 reserved */
-	addr64_t				savepoolfwd;	/* 008 Forward anchor for the free pool */
-	addr64_t				savepoolbwd;	/* 010 Backward anchor for the free pool */
-	volatile addr64_t		savefree;		/* 018 Anchor for the global free list */
-	volatile unsigned int	savefreecnt;	/* 020 Number of saveareas on global free list */
-	volatile int			saveadjust;		/* 024 If 0 number of saveareas is ok, otherwise # to change (pos means grow, neg means shrink */
-	volatile int			saveinuse;		/* 028 Number of areas in use counting those on the local free list */
-	unsigned int			savetarget;		/* 02C Number of saveareas needed */
-	int						savemaxcount;	/* 030 Maximum saveareas ever allocated */
-	unsigned int			saveinusesnapshot;		/* 034 snapshot inuse count */
-	volatile addr64_t		savefreesnapshot;		/* 038 snapshot global free list header */
-/*											   040 */
-};
-#pragma pack()
-
-extern struct Saveanchor	saveanchor;			/* Aliged savearea anchor */
-
-#define sac_cnt		(4096 / sizeof(struct savearea))	/* Number of saveareas per page */
-#define sac_empty	(0xFFFFFFFF << (32 - sac_cnt))	/* Mask with all entries empty */
-#define sac_perm	0x40000000				/* Page permanently assigned */
-#define sac_permb	1						/* Page permanently assigned - bit position */
-
-#define LocalSaveTarget	(((8 + sac_cnt - 1) / sac_cnt) * sac_cnt)	/* Target for size of local savearea free list */
-#define LocalSaveMin	(LocalSaveTarget / 2)	/* Min size of local savearea free list before we grow */
-#define LocalSaveMax	(LocalSaveTarget * 2)	/* Max size of local savearea free list before we trim */
-
-#define FreeListMin		(2 * LocalSaveTarget)	/* Always make sure there are enough to fill local list twice per processor */
-#define SaveLowHysteresis	LocalSaveTarget		/* The number off from target before we adjust upwards */
-#define SaveHighHysteresis	(2 * FreeListMin)	/* The number off from target before we adjust downwards */
-#define InitialSaveAreas 	(2 * FreeListMin)	/* The number of saveareas to make at boot time */
-#define InitialSaveTarget	FreeListMin			/* The number of saveareas for an initial target. This should be the minimum ever needed. */
-#define	InitialSaveBloks	(InitialSaveAreas + sac_cnt - 1) / sac_cnt	/* The number of savearea blocks to allocate at boot */
-#define BackPocketSaveBloks	8				/* Number of pages of back pocket saveareas */
-
-void			save_queue(ppnum_t);		/* Add a new savearea block to the free list */
-addr64_t		save_get_init(void);		/* special savearea-get for cpu initialization (returns physical address) */
-struct savearea	*save_get(void);			/* Obtains a savearea from the free list (returns virtual address) */
-reg64_t			save_get_phys_32(void);		/* Obtains a savearea from the free list (returns phys addr in r3) */
-reg64_t			save_get_phys_64(void);		/* Obtains a savearea from the free list (returns phys addr in r3) */
-struct savearea	*save_alloc(void);			/* Obtains a savearea and allocates blocks if needed */
-struct savearea	*save_cpv(addr64_t);		/* Converts a physical savearea address to virtual */
-void			save_ret(struct savearea *);	/* Returns a savearea to the free list by virtual address */
-void			save_ret_wMSR(struct savearea *, reg64_t);	/* returns a savearea and restores an MSR */
-void			save_ret_phys(reg64_t);		/* Returns a savearea to the free list by physical address */
-void			save_adjust(void);			/* Adjust size of the global free list */
-struct savearea_comm	*save_trim_free(void);	/* Remove free pages from savearea pool */
-int				save_recover(void);			/* returns nonzero if we can recover enough from the free pool */
-void 			savearea_init(vm_offset_t addr);	/* Boot-time savearea initialization */
-
-void 			save_fake_zone_info(		/* report savearea usage statistics as fake zone info */
-					int *count,
-					vm_size_t *cur_size,
-					vm_size_t *max_size,
-					vm_size_t *elem_size,
-					vm_size_t *alloc_size, 
-					int *collectable, 
-					int *exhaustable);
-
-void			save_snapshot(void);
-void			save_snapshot_restore(void);
-void save_release(struct savearea *);
-
-#endif /* MACH_KERNEL_PRIVATE */
-#endif /* __APPLE_API_PRIVATE */
-
-#endif /* ndef ASSEMBLER */
-
-#define SAVattach	0x80000000				/* Savearea has valid context */
-#define SAVrststk	0x00010000				/* Indicates that the current stack should be reset to empty */
-#define SAVsyscall	0x00020000				/* Indicates that the savearea is associated with a syscall */
-#define SAVredrive	0x00040000				/* Indicates that the low-level fault handler associated */
-#define SAVredriveb	13						/* Indicates that the low-level fault handler associated */
-#define	SAVinstrument 0x00080000			/* Indicates that we should return instrumentation data */
-#define	SAVinstrumentb 12					/* Indicates that we should return instrumentation data */
-#define	SAVeat 		0x00100000				/* Indicates that interruption should be ignored */
-#define	SAVeatb 	11						/* Indicates that interruption should be ignored */
-#define	SAVinject 	0x00200000				/* Indicates that save_instr contains code to inject */
-#define	SAVinjectb 	10						/* Indicates that save_instr contains code to inject */
-#define SAVtype		0x0000FF00				/* Shows type of savearea */
-#define SAVtypeshft	8						/* Shift to position type */
-#define SAVempty	0x86					/* Savearea is on free list */
-#define SAVgeneral	0x01					/* Savearea contains general context */
-#define SAVfloat	0x02					/* Savearea contains floating point context */
-#define SAVvector	0x03					/* Savearea contains vector context */
-
-
-
-#endif /* _PPC_SAVEAREA_H_ */
-
-#endif	/* XNU_KERNEL_PRIVATE */
diff --git a/osfmk/ppc/savearea_asm.s b/osfmk/ppc/savearea_asm.s
deleted file mode 100644
index 6b42c7dd3..000000000
--- a/osfmk/ppc/savearea_asm.s
+++ /dev/null
@@ -1,1621 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#define FPVECDBG 0
-
-#include <assym.s>
-#include <debug.h>
-#include <db_machine_commands.h>
-#include <mach_rt.h>
-	
-#include <mach_debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/Performance.h>
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-#include <mach/ppc/vm_param.h>
-	
-			.text
-
-/* Register usage conventions in this code:
- *	r9 = return address
- * r10 = per-proc ptr
- * r11 = MSR at entry
- * cr6 = feature flags (ie, pf64Bit)
- *
- * Because much of this code deals with physical addresses,
- * there are parallel paths for 32- and 64-bit machines.
- */
- 
-
-/*
- * *****************************
- * * s a v e _ s n a p s h o t *
- * *****************************
- *
- *	void save_snapshot();
- *
- *			Link the current free list & processor local list on an independent list.
- */
-			.align	5
-			.globl	EXT(save_snapshot)
-
-LEXT(save_snapshot)
-            mflr	r9							; get return address
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bf--	pf64Bitb,save_snapshot32 	; skip if 32-bit processor
-
-            ; Handle 64-bit processor.
-
-save_snapshot64:
-
-			ld		r8,next_savearea(r10)		; Start with the current savearea
-			std		r8,SVsavefreesnapshot(0)	; Make it the restore list anchor
-			ld		r5,SVfree(0)				; Get free save area list anchor 
-
-save_snapshot64nextfree:
-            mr		r7,r5
-			std		r7,savemisc1(r8)  			; Link this one
-			ld		r5,SAVprev(r7)				; Get the next 
-            mr		r8,r7
-            mr.		r0,r5
-            bne		save_snapshot64nextfree
-
-			lwz		r6,SVinuse(0)				; Get inuse count
-			ld		r5,lclfree(r10)				; Get the local savearea list
-            subi	r6,r6,1						; Count the first as free
-
-save_snapshot64nextlocalfree:
-            subi	r6,r6,1						; Count as free
-            mr		r7,r5
-			std		r7,savemisc1(r8)	 		; Link this one
-			ld		r5,SAVprev(r7)				; Get the next 
-            mr		r8,r7
-            mr.		r0,r5
-            bne		save_snapshot64nextlocalfree
-
-			std		r5,savemisc1(r8)	   		; End the list
-			stw		r6,SVsaveinusesnapshot(0)	; Save the new number of inuse saveareas
-
-			mtlr	r9							; Restore the return
-            b		saveRestore64				; Restore interrupts and translation
-
-            ; Handle 32-bit processor.
-
-save_snapshot32:
-			lwz		r8,next_savearea+4(r10)		; Start with the current savearea
-			stw		r8,SVsavefreesnapshot+4(0)	; Make it the restore list anchor
-			lwz		r5,SVfree+4(0)				; Get free save area list anchor 
-
-save_snapshot32nextfree:
-            mr		r7,r5
-			stw		r7,savemisc1+4(r8)  		; Link this one
-			lwz		r5,SAVprev+4(r7)			; Get the next 
-            mr		r8,r7
-            mr.		r0,r5
-            bne		save_snapshot32nextfree
-
-			lwz		r6,SVinuse(0)				; Get inuse count
-			lwz		r5,lclfree+4(r10)			; Get the local savearea list
-            subi	r6,r6,1						; Count the first as free
-
-save_snapshot32nextlocalfree:
-            subi	r6,r6,1						; Count as free
-            mr		r7,r5
-			stw		r7,savemisc1+4(r8)	 		; Link this one
-			lwz		r5,SAVprev+4(r7)			; Get the next 
-            mr		r8,r7
-            mr.		r0,r5
-            bne		save_snapshot32nextlocalfree
-
-			stw		r5,savemisc1+4(r8)	   		; End the list
-			stw		r6,SVsaveinusesnapshot(0)	; Save the new number of inuse saveareas
-
-			mtlr	r9							; Restore the return
-            b		saveRestore32				; Restore interrupts and translation
-
-/*
- * *********************************************
- * * s a v e _ s n a p s h o t _ r e s t o r e *
- * *********************************************
- *
- *	void save_snapshot_restore();
- *
- *			Restore the free list from the snapshot list, and reset the processors next savearea.
- */
-			.align	5
-			.globl	EXT(save_snapshot_restore)
-
-LEXT(save_snapshot_restore)
-            mflr	r9							; get return address
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bf--	pf64Bitb,save_snapshot_restore32 	; skip if 32-bit processor
-
-            ; Handle 64-bit processor.
-
-save_snapshot_restore64:
-  			lwz		r7,SVsaveinusesnapshot(0)
-			stw		r7,SVinuse(0)				; Set the new inuse count
-
-            li		r6,0
-            stw		r6,lclfreecnt(r10)			; None local now
-			std		r6,lclfree(r10)				; None local now
-
-			ld		r8,SVsavefreesnapshot(0)	; Get the restore list anchor 
-			std		r8,SVfree(0)				; Make it the free list anchor
-			li		r5,SAVempty					; Get marker for free savearea
-
-save_snapshot_restore64nextfree:
-            addi	r6,r6,1						; Count as free
-			stb		r5,SAVflags+2(r8)			; Mark savearea free
-			ld		r7,savemisc1(r8)			; Get the next 
-			std		r7,SAVprev(r8)		   		; Set the next in free list
-            mr.		r8,r7
-            bne		save_snapshot_restore64nextfree
-
-            stw		r6,SVfreecnt(0)				; Set the new free count
-
-            bl		saveGet64
-            std		r3,next_savearea(r10)		; Get the next savearea 
-
-			mtlr	r9							; Restore the return
-            b		saveRestore64				; Restore interrupts and translation
-
-            ; Handle 32-bit processor.
-
-save_snapshot_restore32:
-  			lwz		r7,SVsaveinusesnapshot(0)
-			stw		r7,SVinuse(0)				; Set the new inuse count
-
-            li		r6,0
-            stw		r6,lclfreecnt(r10)			; None local now
-			stw		r6,lclfree+4(r10)			; None local now
-
-			lwz		r8,SVsavefreesnapshot+4(0)	; Get the restore list anchor 
-			stw		r8,SVfree+4(0)				; Make it the free list anchor
-			li		r5,SAVempty					; Get marker for free savearea
-
-save_snapshot_restore32nextfree:
-            addi	r6,r6,1						; Count as free
-			stb		r5,SAVflags+2(r8)			; Mark savearea free
-			lwz		r7,savemisc1+4(r8)			; Get the next 
-			stw		r7,SAVprev+4(r8)	   		; Set the next in free list
-            mr.		r8,r7
-            bne		save_snapshot_restore32nextfree
-
-            stw		r6,SVfreecnt(0)				; Set the new free count
-
-            bl		saveGet32
-            stw		r3,next_savearea+4(r10)		; Get the next savearea 
-
-			mtlr	r9							; Restore the return
-            b		saveRestore32				; Restore interrupts and translation
-
-/*
- * ***********************
- * * s a v e _ q u e u e *
- * ***********************
- *
- *	void save_queue(ppnum_t pagenum);
- *
- *			This routine will add a savearea block to the free list.
- *			We also queue the block to the free pool list.  This is a
- *			circular double linked list. Because this block has no free entries,
- *			it gets queued to the end of the list
- */
-			.align	5
-			.globl	EXT(save_queue)
-
-LEXT(save_queue)
-            mflr	r9							; get return address
-            mr		r8,r3						; move pagenum out of the way
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bf--	pf64Bitb,saveQueue32		; skip if 32-bit processor
-            
-            sldi	r2,r8,12					; r2 <-- phys address of page
-			li		r8,sac_cnt					; Get the number of saveareas per page
-			mr		r4,r2						; Point to start of chain
-			li		r0,SAVempty					; Get empty marker
-
-saveQueue64a:	
-            addic.	r8,r8,-1					; Keep track of how many we did
-			stb		r0,SAVflags+2(r4)			; Set empty
-			addi	r7,r4,SAVsize				; Point to the next slot
-			ble-	saveQueue64b				; We are done with the chain
-			std		r7,SAVprev(r4)				; Set this chain
-			mr		r4,r7						; Step to the next
-			b		saveQueue64a				; Fill the whole block...
-
-saveQueue64b:
-			bl		savelock					; Go lock the save anchor 
-
-			ld		r7,SVfree(0)				; Get the free save area list anchor 
-			lwz		r6,SVfreecnt(0)				; Get the number of free saveareas
-
-			std		r2,SVfree(0)				; Queue in the new one 
-			addi	r6,r6,sac_cnt				; Count the ones we are linking in 
-			std		r7,SAVprev(r4)				; Queue the old first one off of us
-			stw		r6,SVfreecnt(0)				; Save the new count
-			b		saveQueueExit
-
-            ; Handle 32-bit processor.
-            
-saveQueue32:            
-            slwi	r2,r8,12					; r2 <-- phys address of page
-			li		r8,sac_cnt					; Get the number of saveareas per page
-			mr		r4,r2						; Point to start of chain
-			li		r0,SAVempty					; Get empty marker
-
-saveQueue32a:	
-            addic.	r8,r8,-1					; Keep track of how many we did
-			stb		r0,SAVflags+2(r4)			; Set empty
-			addi	r7,r4,SAVsize				; Point to the next slot
-			ble-	saveQueue32b				; We are done with the chain
-			stw		r7,SAVprev+4(r4)			; Set this chain
-			mr		r4,r7						; Step to the next
-			b		saveQueue32a				; Fill the whole block...
-
-saveQueue32b:
-			bl		savelock					; Go lock the save anchor 
-
-			lwz		r7,SVfree+4(0)				; Get the free save area list anchor 
-			lwz		r6,SVfreecnt(0)				; Get the number of free saveareas
-
-			stw		r2,SVfree+4(0)				; Queue in the new one 
-			addi	r6,r6,sac_cnt				; Count the ones we are linking in 
-			stw		r7,SAVprev+4(r4)			; Queue the old first one off of us
-			stw		r6,SVfreecnt(0)				; Save the new count
-
-saveQueueExit:									; join here from 64-bit path		
-			bl		saveunlock					; Unlock the list and set the adjust count
-			mtlr	r9							; Restore the return
-
-#if FPVECDBG
-			mfsprg	r2,1						; (TEST/DEBUG)
-			mr.		r2,r2						; (TEST/DEBUG)
-			beq--	saveRestore					; (TEST/DEBUG)
-			lis		r0,hi16(CutTrace)			; (TEST/DEBUG)
-			li		r2,0x2201					; (TEST/DEBUG)
-			oris	r0,r0,lo16(CutTrace)		; (TEST/DEBUG)
-			sc									; (TEST/DEBUG)
-#endif
-            b		saveRestore					; Restore interrupts and translation
-
-/*
- * *****************************
- * * s a v e _ g e t _ i n i t *
- * *****************************
- *
- *	addr64_t  save_get_init(void);
- *
- *			Note that save_get_init is used in initial processor startup only.  It
- *			is used because translation is on, but no tables exist yet and we have
- *			no V=R BAT registers that cover the entire physical memory.
- */
-			.align	5
-			.globl	EXT(save_get_init)
-
-LEXT(save_get_init)
-            mflr	r9							; get return address
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bfl--	pf64Bitb,saveGet32			; Get r3 <- savearea, r5 <- page address (with SAC)
-            btl++	pf64Bitb,saveGet64			; get one on a 64-bit machine
-            bl		saveRestore					; restore translation etc
-            mtlr	r9
-            
-            ; unpack the physaddr in r3 into a long long in (r3,r4)
-            
-            mr		r4,r3						; copy low word of phys address to r4
-            li		r3,0						; assume upper word was 0
-            bflr--	pf64Bitb					; if 32-bit processor, return
-            srdi	r3,r4,32					; unpack reg64_t to addr64_t on 64-bit machine
-            rlwinm	r4,r4,0,0,31
-            blr
-            
-
-/*
- * *******************
- * * s a v e _ g e t *
- * *******************
- *
- *	savearea *save_get(void);
- *
- *			Allocate a savearea, returning a virtual address.  NOTE: we must preserve
- *			r0, r2, and r12.  Our callers in cswtch.s depend on this.
- */
-			.align	5
-			.globl	EXT(save_get)
-
-LEXT(save_get)
-            mflr	r9							; get return address
-            mr		r5,r0						; copy regs before saveSetup nails them
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bf--	pf64Bitb,svgt1				; skip if 32-bit processor
-            
-            std		r5,tempr0(r10)				; save r0 in per-proc across call to saveGet64
-            std		r2,tempr2(r10)				; and r2
-            std		r12,tempr4(r10)				; and r12
-            bl		saveGet64					; get r3 <- savearea, r5 <- page address (with SAC)
-            ld		r0,tempr0(r10)				; restore callers regs
-            ld		r2,tempr2(r10)
-            ld		r12,tempr4(r10)
-            b		svgt2
-            
-svgt1:											; handle 32-bit processor
-            stw		r5,tempr0+4(r10)			; save r0 in per-proc across call to saveGet32
-            stw		r2,tempr2+4(r10)			; and r2
-            stw		r12,tempr4+4(r10)			; and r12
-            bl		saveGet32					; get r3 <- savearea, r5 <- page address (with SAC)
-            lwz		r0,tempr0+4(r10)			; restore callers regs
-            lwz		r2,tempr2+4(r10)
-            lwz		r12,tempr4+4(r10)
-            
-svgt2:
-			lwz		r5,SACvrswap+4(r5)			; Get the virtual to real translation (only need low word)
-            mtlr	r9							; restore return address
-            xor		r3,r3,r5					; convert physaddr to virtual
-            rlwinm	r3,r3,0,0,31				; 0 upper word if a 64-bit machine
-
-#if FPVECDBG
-            mr		r6,r0						; (TEST/DEBUG)
-            mr		r7,r2						; (TEST/DEBUG)
-			mfsprg	r2,1						; (TEST/DEBUG)
-			mr.		r2,r2						; (TEST/DEBUG)
-			beq--	svgDBBypass					; (TEST/DEBUG)
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2203					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG)
-			sc									; (TEST/DEBUG) 
-svgDBBypass:									; (TEST/DEBUG)
-            mr		r0,r6						; (TEST/DEBUG)
-            mr		r2,r7						; (TEST/DEBUG) 
-#endif			
-            b		saveRestore					; restore MSR and return to our caller
-            
-            
-/*
- * ***********************************
- * * s a v e _ g e t _ p h y s _ 3 2 *
- * ***********************************
- *
- *	reg64_t	save_get_phys(void);
- *
- * 			This is the entry normally called from lowmem_vectors.s with
- *			translation and interrupts already off.
- *			MUST NOT TOUCH CR7
- */
-			.align	5
-			.globl	EXT(save_get_phys_32)
-
-LEXT(save_get_phys_32)
-            mfsprg	r10,0						; get the per-proc ptr
-			b		saveGet32					; Get r3 <- savearea, r5 <- page address (with SAC)
-
-
-/*
- * ***********************************
- * * s a v e _ g e t _ p h y s _ 6 4 *
- * ***********************************
- *
- *	reg64_t	save_get_phys_64(void);
- *
- * 			This is the entry normally called from lowmem_vectors.s with
- *			translation and interrupts already off, and in 64-bit mode.
- *			MUST NOT TOUCH CR7
- */
-			.align	5
-			.globl	EXT(save_get_phys_64)
-
-LEXT(save_get_phys_64)
-            mfsprg	r10,0						; get the per-proc ptr
- 			b		saveGet64					; Get r3 <- savearea, r5 <- page address (with SAC)
-            
-
-/*
- * *********************
- * * s a v e G e t 6 4 *
- * *********************
- *
- *			This is the internal routine to allocate a savearea on a 64-bit processor.  
- *			Note that we must not take any exceptions of any kind, including PTE misses, as that
- *			would deadlock trying to reenter this routine.  We pass back the 64-bit physical address.
- *			First we try the local list.  If that is below a threshold, we try the global free list,
- *			which requires taking a lock, and replenish.  If there are no saveareas in either list,
- *			we will install the  backpocket and choke.  This routine assumes that the caller has
- *			turned translation off, masked interrupts,  turned on 64-bit mode, and set up:
- *				r10 = per-proc ptr
- *
- *			We return:
- *				r3 = 64-bit physical address of the savearea
- *				r5 = 64-bit physical address of the page the savearea is in, with SAC
- *
- *			We destroy:
- *				r2-r8.
- *		
- * 			MUST NOT TOUCH CR7
- */
-
-saveGet64:            
-			lwz		r8,lclfreecnt(r10)			; Get the count
-			ld		r3,lclfree(r10)				; Get the start of local savearea list
-			cmplwi	r8,LocalSaveMin				; Are we too low?
-			ble--	saveGet64GetGlobal			; We are too low and need to grow list...
-
-            ; Get it from the per-processor local list.
-            
-saveGet64GetLocal:
-            li		r2,0x5555					; get r2 <-- 0x55555555 55555555, our bugbug constant
-			ld		r4,SAVprev(r3)				; Chain to the next one
-			oris	r2,r2,0x5555
-			subi	r8,r8,1						; Back down count
-            rldimi	r2,r2,32,0
-
-			std		r2,SAVprev(r3)				; bug next ptr
-			stw		r2,SAVlevel(r3)				; bug context ID
-            li		r6,0
-			std		r4,lclfree(r10)				; Unchain first savearea
-			stw		r2,SAVact(r3)				; bug activation ptr
-			rldicr	r5,r3,0,51					; r5 <-- page ptr, where SAC is kept
-			stw		r8,lclfreecnt(r10)			; Set new count
-			stw		r6,SAVflags(r3)				; clear the flags
-
-            blr
-
-            ; Local list was low so replenish from global list.
-            ;	 r7 = return address to caller of saveGet64
-            ;	 r8 = lclfreecnt
-            ;	r10 = per-proc ptr
-            
-saveGet64GetGlobal:
-            mflr	r7							; save return adress
-			subfic	r5,r8,LocalSaveTarget		; Get the number of saveareas we need to grab to get to target
-			bl		savelock					; Go lock up the anchor
-			
-			lwz		r2,SVfreecnt(0)				; Get the number on this list
-			ld		r8,SVfree(0)				; Get the head of the save area list 
-			
-			sub		r3,r2,r5					; Get number left after we swipe enough for local list
-			sradi	r3,r3,63					; Get 0 if enough or -1 if not
-			andc	r4,r5,r3					; Get number to get if there are enough, 0 otherwise
-			and		r5,r2,r3					; Get 0 if there are enough, number on list otherwise
-			or.		r5,r4,r5					; r5 <- number we will move from global to local list
-			beq--	saveGet64NoFree				; There are none to get...
-			
-			mtctr	r5							; Get loop count
-			mr		r6,r8						; Remember the first in the list
-
-saveGet64c:
-            bdz		saveGet64d					; Count down and branch when we hit 0...
-			ld		r8,SAVprev(r8)				; Get the next
-			b		saveGet64c					; Keep going...
-
-saveGet64d:			
-            ld		r3,SAVprev(r8)				; Get the next one
-			lwz		r4,SVinuse(0)				; Get the in use count
-			sub		r2,r2,r5					; Count down what we stole
-			std		r3,SVfree(0)				; Set the new first in list
-			add		r4,r4,r5					; Count the ones we just put in the local list as "in use"
-			stw		r2,SVfreecnt(0)				; Set the new count
-			stw		r4,SVinuse(0)				; Set the new in use count
-			
-			ld		r4,lclfree(r10)				; Get the old head of list
-			lwz		r3,lclfreecnt(r10)			; Get the old count
-			std		r6,lclfree(r10)				; Set the new head of the list
-			add		r3,r3,r5					; Get the new count
-			std		r4,SAVprev(r8)				; Point to the old head
-			stw		r3,lclfreecnt(r10)			; Set the new count
-
-			bl		saveunlock					; Update the adjust field and unlock
-            mtlr	r7							; restore return address
-			b		saveGet64					; Start over and finally allocate the savearea...
-			
-            ; The local list is below the repopulate threshold and the global list is empty.
-            ; First we check if there are any left in the local list and if so, we allow
-            ; them to be allocated.  If not, we release the backpocket list and choke.  
-            ; There is nothing more that we can do at this point.  Hopefully we stay alive
-            ; long enough to grab some much-needed panic information.
-            ;	 r7 = return address to caller of saveGet64 
-            ;	r10 = per-proc ptr
-
-saveGet64NoFree:			
-			lwz		r8,lclfreecnt(r10)			; Get the count
-			mr.		r8,r8						; Are there any reserve to get?
-			beq--	saveGet64Choke				; No, go choke and die...
-			bl		saveunlock					; Update the adjust field and unlock
-			ld		r3,lclfree(r10)				; Get the start of local savearea list
-			lwz		r8,lclfreecnt(r10)			; Get the count
-            mtlr	r7							; restore return address
-			b		saveGet64GetLocal			; We have some left, dip on in...
-			
-;			We who are about to die salute you.  The savearea chain is messed up or
-;			empty.  Add in a few so we have enough to take down the system.
-
-saveGet64Choke:
-            lis		r9,hi16(EXT(backpocket))	; Get high order of back pocket
-			ori		r9,r9,lo16(EXT(backpocket))	; and low part
-			
-			lwz		r8,SVfreecnt-saveanchor(r9)	; Get the new number of free elements
-			ld		r7,SVfree-saveanchor(r9)	; Get the head of the chain
-			lwz		r6,SVinuse(0)				; Get total in the old list
-
-			stw		r8,SVfreecnt(0)				; Set the new number of free elements
-			add		r6,r6,r8					; Add in the new ones
-			std		r7,SVfree(0)				; Set the new head of the chain
-			stw		r6,SVinuse(0)				; Set total in the new list
-
-saveGetChokeJoin:								; join in the fun from 32-bit mode
-			lis		r0,hi16(Choke)				; Set choke firmware call
-			li		r7,0						; Get a clear register to unlock
-			ori		r0,r0,lo16(Choke)			; Set the rest of the choke call
-			li		r3,failNoSavearea			; Set failure code
-
-			eieio								; Make sure all is committed
-			stw		r7,SVlock(0)				; Unlock the free list
-			sc									; System ABEND
-
-
-/*
- * *********************
- * * s a v e G e t 3 2 *
- * *********************
- *
- *			This is the internal routine to allocate a savearea on a 32-bit processor.  
- *			Note that we must not take any exceptions of any kind, including PTE misses, as that
- *			would deadlock trying to reenter this routine.  We pass back the 32-bit physical address.
- *			First we try the local list.  If that is below a threshold, we try the global free list,
- *			which requires taking a lock, and replenish.  If there are no saveareas in either list,
- *			we will install the  backpocket and choke.  This routine assumes that the caller has
- *			turned translation off, masked interrupts, and set up:
- *				r10 = per-proc ptr
- *
- *			We return:
- *				r3 = 32-bit physical address of the savearea
- *				r5 = 32-bit physical address of the page the savearea is in, with SAC
- *
- *			We destroy:
- *				r2-r8.
- */
-
-saveGet32:            
-			lwz		r8,lclfreecnt(r10)			; Get the count
-			lwz		r3,lclfree+4(r10)			; Get the start of local savearea list
-			cmplwi	r8,LocalSaveMin				; Are we too low?
-			ble-	saveGet32GetGlobal			; We are too low and need to grow list...
-
-            ; Get savearea from per-processor local list.
-            
-saveGet32GetLocal:
-            li		r2,0x5555					; get r2 <-- 0x55555555, our bugbug constant
-			lwz		r4,SAVprev+4(r3)			; Chain to the next one
-			oris	r2,r2,0x5555
-			subi	r8,r8,1						; Back down count
-
-			stw		r2,SAVprev+4(r3)			; bug next ptr
-			stw		r2,SAVlevel(r3)				; bug context ID
-            li		r6,0
-			stw		r4,lclfree+4(r10)			; Unchain first savearea
-			stw		r2,SAVact(r3)				; bug activation ptr
-			rlwinm	r5,r3,0,0,19				; r5 <-- page ptr, where SAC is kept
-			stw		r8,lclfreecnt(r10)			; Set new count
-			stw		r6,SAVflags(r3)				; clear the flags
-
-            blr
-
-            ; Local list was low so replenish from global list.
-            ;	 r7 = return address to caller of saveGet32
-            ;	 r8 = lclfreecnt
-            ;	r10 = per-proc ptr
-            
-saveGet32GetGlobal:
-            mflr	r7							; save return adress
-			subfic	r5,r8,LocalSaveTarget		; Get the number of saveareas we need to grab to get to target
-			bl		savelock					; Go lock up the anchor
-			
-			lwz		r2,SVfreecnt(0)				; Get the number on this list
-			lwz		r8,SVfree+4(0)				; Get the head of the save area list 
-			
-			sub		r3,r2,r5					; Get number left after we swipe enough for local list
-			srawi	r3,r3,31					; Get 0 if enough or -1 if not
-			andc	r4,r5,r3					; Get number to get if there are enough, 0 otherwise
-			and		r5,r2,r3					; Get 0 if there are enough, number on list otherwise
-			or.		r5,r4,r5					; r5 <- number we will move from global to local list
-			beq-	saveGet32NoFree				; There are none to get...
-			
-			mtctr	r5							; Get loop count
-			mr		r6,r8						; Remember the first in the list
-
-saveGet32c:
-            bdz		saveGet32d					; Count down and branch when we hit 0...
-			lwz		r8,SAVprev+4(r8)			; Get the next
-			b		saveGet32c					; Keep going...
-
-saveGet32d:			
-            lwz		r3,SAVprev+4(r8)			; Get the next one
-			lwz		r4,SVinuse(0)				; Get the in use count
-			sub		r2,r2,r5					; Count down what we stole
-			stw		r3,SVfree+4(0)				; Set the new first in list
-			add		r4,r4,r5					; Count the ones we just put in the local list as "in use"
-			stw		r2,SVfreecnt(0)				; Set the new count
-			stw		r4,SVinuse(0)				; Set the new in use count
-			
-			lwz		r4,lclfree+4(r10)			; Get the old head of list
-			lwz		r3,lclfreecnt(r10)			; Get the old count
-			stw		r6,lclfree+4(r10)			; Set the new head of the list
-			add		r3,r3,r5					; Get the new count
-			stw		r4,SAVprev+4(r8)			; Point to the old head
-			stw		r3,lclfreecnt(r10)			; Set the new count
-
-			bl		saveunlock					; Update the adjust field and unlock
-            mtlr	r7							; restore return address
-			b		saveGet32					; Start over and finally allocate the savearea...
-			
-            ; The local list is below the repopulate threshold and the global list is empty.
-            ; First we check if there are any left in the local list and if so, we allow
-            ; them to be allocated.  If not, we release the backpocket list and choke.  
-            ; There is nothing more that we can do at this point.  Hopefully we stay alive
-            ; long enough to grab some much-needed panic information.
-            ;	 r7 = return address to caller of saveGet32
-            ;	r10 = per-proc ptr
-
-saveGet32NoFree:			
-			lwz		r8,lclfreecnt(r10)			; Get the count
-			mr.		r8,r8						; Are there any reserve to get?
-			beq-	saveGet32Choke				; No, go choke and die...
-			bl		saveunlock					; Update the adjust field and unlock
-			lwz		r3,lclfree+4(r10)			; Get the start of local savearea list
-			lwz		r8,lclfreecnt(r10)			; Get the count
-            mtlr	r7							; restore return address
-			b		saveGet32GetLocal			; We have some left, dip on in...
-			
-;			We who are about to die salute you.  The savearea chain is messed up or
-;			empty.  Add in a few so we have enough to take down the system.
-
-saveGet32Choke:
-            lis		r9,hi16(EXT(backpocket))	; Get high order of back pocket
-			ori		r9,r9,lo16(EXT(backpocket))	; and low part
-			
-			lwz		r8,SVfreecnt-saveanchor(r9)	; Get the new number of free elements
-			lwz		r7,SVfree+4-saveanchor(r9)	; Get the head of the chain
-			lwz		r6,SVinuse(0)				; Get total in the old list
-
-			stw		r8,SVfreecnt(0)				; Set the new number of free elements
-			add		r6,r6,r8					; Add in the new ones (why?)
-			stw		r7,SVfree+4(0)				; Set the new head of the chain
-			stw		r6,SVinuse(0)				; Set total in the new list
-            
-            b		saveGetChokeJoin
-
-
-/*
- * *******************
- * * s a v e _ r e t *
- * *******************
- *
- *	void	save_ret(struct savearea *);				// normal call
- *	void	save_ret_wMSR(struct savearea *,reg64_t); 	// passes MSR to restore as 2nd arg
- *
- *			Return a savearea passed by virtual address to the free list.
- *			Note really well: we can take NO exceptions of any kind,
- *			including a PTE miss once the savearea lock is held. That's
- *			a guaranteed deadlock.  That means we must disable for interrutions
- *			and turn all translation off.
- */
-            .globl	EXT(save_ret_wMSR)			; alternate entry pt w MSR to restore in r4
-            
-LEXT(save_ret_wMSR)
-            crset	31							; set flag for save_ret_wMSR
-            b		svrt1						; join common code
-            
-            .align	5
-            .globl	EXT(save_ret)
-            
-LEXT(save_ret)
-            crclr	31							; clear flag for save_ret_wMSR
-svrt1:											; join from save_ret_wMSR
-            mflr	r9							; get return address
-            rlwinm	r7,r3,0,0,19				; get virtual address of SAC area at start of page
-            mr		r8,r3						; save virtual address
-            lwz		r5,SACvrswap+0(r7)			; get 64-bit converter from V to R
-            lwz		r6,SACvrswap+4(r7)			; both halves, though only bottom used on 32-bit machine
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2204					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG) 
-			sc									; (TEST/DEBUG) 
-#endif
-            bl		saveSetup					; turn translation off, 64-bit on, load many regs
-            bf++	31,svrt3					; skip if not save_ret_wMSR
-            mr		r11,r4						; was save_ret_wMSR, so overwrite saved MSR
-svrt3:
-            bf--	pf64Bitb,svrt4				; skip if a 32-bit processor
-            
-            ; Handle 64-bit processor.
-            
-            rldimi	r6,r5,32,0					; merge upper and lower halves of SACvrswap together
-            xor		r3,r8,r6					; get r3 <- 64-bit physical address of this savearea
-            bl		saveRet64					; return it
-            mtlr	r9							; restore return address
-            b		saveRestore64				; restore MSR
-            
-            ; Handle 32-bit processor.
-            
-svrt4:
-            xor		r3,r8,r6					; get r3 <- 32-bit physical address of this savearea
-            bl		saveRet32					; return it
-            mtlr	r9							; restore return address
-            b		saveRestore32				; restore MSR
- 
-
-/*
- * *****************************
- * * s a v e _ r e t _ p h y s *
- * *****************************
- *
- *	void	save_ret_phys(reg64_t);
- *
- *			Called from lowmem vectors to return (ie, free) a savearea by physical address.
- *			Translation and interrupts are already off, and 64-bit mode is set if defined.
- *			We can take _no_ exceptions of any kind in this code, including PTE miss, since
- *			that would result in a deadlock.  We expect:
- *				r3 = phys addr of savearea
- *			   msr = IR, DR, and EE off, SF on
- *             cr6 = pf64Bit flag
- *			We destroy:
- *				r0,r2-r10.
- */
-			.align	5
-			.globl	EXT(save_ret_phys)
-
-LEXT(save_ret_phys)
-            mfsprg	r10,0						; get the per-proc ptr
-            bf--	pf64Bitb,saveRet32			; handle 32-bit machine
-            b		saveRet64					; handle 64-bit machine
-            
-
-/*
- * *********************
- * * s a v e R e t 6 4 *
- * *********************
- *
- *			This is the internal routine to free a savearea, passed by 64-bit physical
- *			address.  We assume that IR, DR, and EE are all off, that SF is on, and:
- *				r3 = phys address of the savearea
- *			   r10 = per-proc ptr
- *			We destroy:
- *				r0,r2-r8.
- */
-            .align	5
- saveRet64:
-			li		r0,SAVempty					; Get marker for free savearea
-			lwz		r7,lclfreecnt(r10)			; Get the local count
-			ld		r6,lclfree(r10)				; Get the old local header
-			addi	r7,r7,1						; Pop up the free count
-			std		r6,SAVprev(r3)				; Plant free chain pointer
-			cmplwi	r7,LocalSaveMax				; Has the list gotten too long?
-			stb		r0,SAVflags+2(r3)			; Mark savearea free
-			std		r3,lclfree(r10)				; Chain us on in
-			stw		r7,lclfreecnt(r10)			; Bump up the count
-			bltlr++								; List not too long, so done
-			
-/*			The local savearea chain has gotten too long.  Trim it down to the target.
- *			Here's a tricky bit, and important:
- *
- *			When we trim the list, we NEVER trim the very first one.  This is because that is
- *			the very last one released and the exception exit code will release the savearea
- *			BEFORE it is done using it. Wouldn't be too good if another processor started
- *			using it, eh?  So for this case, we are safe so long as the savearea stays on
- *			the local list.  (Note: the exit routine needs to do this because it is in the 
- *			process of restoring all context and it needs to keep it until the last second.)
- */
-
-            mflr	r0							; save return to caller of saveRet64
-			mr		r2,r3						; r2 <- 1st one on local list, which must not be trimmed
-			ld		r3,SAVprev(r3)				; Skip over the first
-			subi	r7,r7,LocalSaveTarget		; Figure out how much to trim	
-			mr		r6,r3						; r6 <- first one to trim
-			mr		r5,r7						; Save the number we are trimming
-			
-saveRet64a:
-            addic.	r7,r7,-1					; Any left to do?
-			ble--	saveRet64b					; Nope...
-			ld		r3,SAVprev(r3)				; Skip to the next one
-			b		saveRet64a					; Keep going...
-			
-saveRet64b:										; r3 <- last one to trim
-			ld		r7,SAVprev(r3)				; Point to the first one not to trim
-			li		r4,LocalSaveTarget			; Set the target count
-			std		r7,SAVprev(r2)				; Trim stuff leaving the one just released as first
-			stw		r4,lclfreecnt(r10)			; Set the current count
-			
-			bl		savelock					; Lock up the anchor
-			
-			ld		r8,SVfree(0)				; Get the old head of the free list
-			lwz		r4,SVfreecnt(0)				; Get the number of free ones
-			lwz		r7,SVinuse(0)				; Get the number that are in use
-			std		r6,SVfree(0)				; Point to the first trimmed savearea
-			add		r4,r4,r5					; Add number trimmed to free count
-			std		r8,SAVprev(r3)				; Chain the old head to the tail of the trimmed guys
-			sub		r7,r7,r5					; Remove the trims from the in use count
-			stw		r4,SVfreecnt(0)				; Set new free count
-			stw		r7,SVinuse(0)				; Set new in use count
-
-			mtlr	r0							; Restore the return to our caller
-			b		saveunlock					; Set adjust count, unlock the saveanchor, and return
-            
-
-/*
- * *********************
- * * s a v e R e t 3 2 *
- * *********************
- *
- *			This is the internal routine to free a savearea, passed by 32-bit physical
- *			address.  We assume that IR, DR, and EE are all off, and:
- *				r3 = phys address of the savearea
- *			   r10 = per-proc ptr
- *			We destroy:
- *				r0,r2-r8.
- */
-            .align	5
- saveRet32:
-			li		r0,SAVempty					; Get marker for free savearea
-			lwz		r7,lclfreecnt(r10)			; Get the local count
-			lwz		r6,lclfree+4(r10)			; Get the old local header
-			addi	r7,r7,1						; Pop up the free count
-			stw		r6,SAVprev+4(r3)			; Plant free chain pointer
-			cmplwi	r7,LocalSaveMax				; Has the list gotten too long?
-			stb		r0,SAVflags+2(r3)			; Mark savearea free
-			stw		r3,lclfree+4(r10)			; Chain us on in
-			stw		r7,lclfreecnt(r10)			; Bump up the count
-			bltlr+								; List not too long, so done
-			
-/*			The local savearea chain has gotten too long.  Trim it down to the target.
- *			Here's a tricky bit, and important:
- *
- *			When we trim the list, we NEVER trim the very first one.  This is because that is
- *			the very last one released and the exception exit code will release the savearea
- *			BEFORE it is done using it. Wouldn't be too good if another processor started
- *			using it, eh?  So for this case, we are safe so long as the savearea stays on
- *			the local list.  (Note: the exit routine needs to do this because it is in the 
- *			process of restoring all context and it needs to keep it until the last second.)
- */
-
-            mflr	r0							; save return to caller of saveRet32
-			mr		r2,r3						; r2 <- 1st one on local list, which must not be trimmed
-			lwz		r3,SAVprev+4(r3)			; Skip over the first
-			subi	r7,r7,LocalSaveTarget		; Figure out how much to trim	
-			mr		r6,r3						; r6 <- first one to trim
-			mr		r5,r7						; Save the number we are trimming
-			
-saveRet32a:
-            addic.	r7,r7,-1					; Any left to do?
-			ble-	saveRet32b					; Nope...
-			lwz		r3,SAVprev+4(r3)			; Skip to the next one
-			b		saveRet32a					; Keep going...
-			
-saveRet32b:										; r3 <- last one to trim
-			lwz		r7,SAVprev+4(r3)			; Point to the first one not to trim
-			li		r4,LocalSaveTarget			; Set the target count
-			stw		r7,SAVprev+4(r2)			; Trim stuff leaving the one just released as first
-			stw		r4,lclfreecnt(r10)			; Set the current count
-			
-			bl		savelock					; Lock up the anchor
-			
-			lwz		r8,SVfree+4(0)				; Get the old head of the free list
-			lwz		r4,SVfreecnt(0)				; Get the number of free ones
-			lwz		r7,SVinuse(0)				; Get the number that are in use
-			stw		r6,SVfree+4(0)				; Point to the first trimmed savearea
-			add		r4,r4,r5					; Add number trimmed to free count
-			stw		r8,SAVprev+4(r3)			; Chain the old head to the tail of the trimmed guys
-			sub		r7,r7,r5					; Remove the trims from the in use count
-			stw		r4,SVfreecnt(0)				; Set new free count
-			stw		r7,SVinuse(0)				; Set new in use count
-
-			mtlr	r0							; Restore the return to our caller
-			b		saveunlock					; Set adjust count, unlock the saveanchor, and return
-
-
-/*
- * *******************************
- * * s a v e _ t r i m _ f r e e *
- * *******************************
- *
- *	struct savearea_comm	*save_trim_free(void);
- *
- *			Trim the free list down to the target count, ie by -(SVadjust) save areas.
- *			It trims the list and, if a pool page was fully allocated, puts that page on 
- *			the start of the pool list.
- *
- *			If the savearea being released is the last on a pool page (i.e., all entries
- *			are released), the page is dequeued from the pool and queued to any other 
- *			found during this scan.  Note that this queue is maintained virtually.
- *
- *			When the scan is done, the saveanchor lock is released and the list of
- *			freed pool pages is returned to our caller.
- *
- *			For latency sake we may want to revisit this code. If we are trimming a
- *			large number of saveareas, we could be disabled and holding the savearea lock
- *			for quite a while.  It may be that we want to break the trim down into parts.
- *			Possibly trimming the free list, then individually pushing them into the free pool.
- *
- *			This function expects to be called with translation on and a valid stack.
- *			It uses the standard ABI, ie we destroy r2 and r3-r11, and return the ptr in r3.
- */
-			.align	5
-			.globl	EXT(save_trim_free)
-
-LEXT(save_trim_free)
-
-			subi	r1,r1,(FM_ALIGN(16)+FM_SIZE)	; Make space for 4 registers on stack
-            mflr	r9							; save our return address
-			stw		r28,FM_SIZE+0(r1)			; Save R28
-			stw		r29,FM_SIZE+4(r1)			; Save R29
-			stw		r30,FM_SIZE+8(r1)			; Save R30
-			stw		r31,FM_SIZE+12(r1)			; Save R31
-
-            bl		saveSetup					; turn off translation and interrupts, load many regs
-            bl		savelock					; Go lock up the anchor
-
-			lwz		r8,SVadjust(0)				; How many do we need to clear out?
-			li		r3,0						; Get a 0
-			neg.	r8,r8						; Get the actual we need to toss (adjust is neg if too many)
-            ble-	save_trim_free1				; skip if no trimming needed anymore
-            bf--	pf64Bitb,saveTrim32			; handle 32-bit processors
-            b		saveTrim64					; handle 64-bit processors
-
-save_trim_free1:								; by the time we were called, no need to trim anymore			
-			stw		r3,SVlock(0)				; Quick unlock (no need for sync or to set adjust, nothing changed)
-			mtlr	r9							; Restore return
-	
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2206					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG) 
-			sc									; (TEST/DEBUG) 
-#endif
-			addi	r1,r1,(FM_ALIGN(16)+FM_SIZE); Pop stack - have not trashed register so no need to reload
-			b		saveRestore					; restore translation and EE, turn SF off, return to our caller
-
-
-/*
- * ***********************
- * * s a v e T r i m 3 2 *
- * ***********************
- *
- *	Handle "save_trim_free" on 32-bit processors.  At this point, translation and interrupts
- *  are off, the savearea anchor is locked, and:
- *		 r8 = #pages to trim (>0)
- *	     r9 = return address
- *	 	r10 = per-proc ptr
- *		r11 = MSR at entry
- */
-
-saveTrim32:	
-			lwz		r7,SVfree+4(0)				; Get the first on the free list
-            mr		r6,r7						; Save the first one 
-			mr		r5,r8						; Save the number we are trimming
-			
-sttrimming:	addic.	r5,r5,-1					; Any left to do?
-			ble-	sttrimmed					; Nope...
-			lwz		r7,SAVprev+4(r7)			; Skip to the next one
-			b		sttrimming					; Keep going...
-
-sttrimmed:	lwz		r5,SAVprev+4(r7)			; Get the next one (for new head of free list)
-			lwz		r4,SVfreecnt(0)				; Get the free count
-			stw		r5,SVfree+4(0)				; Set new head
-			sub		r4,r4,r8					; Calculate the new free count
-			li		r31,0						; Show we have no free pool blocks yet
-			crclr	cr1_eq						; dont exit loop before 1st iteration
-			stw		r4,SVfreecnt(0)				; Set new free count
-			lis		r30,hi16(sac_empty)			; Get what empty looks like
-			
-;			NOTE: The savearea size must be 640 (0x280).  We are doing a divide by shifts and stuff
-;			here.
-;
-#if SAVsize != 640
-#error Savearea size is not 640!!!!!!!!!!!!
-#endif
-
-            ; Loop over each savearea we are trimming.
-            ;	 r6 = next savearea to trim
-            ;	 r7 = last savearea to trim
-            ;	 r8 = #pages to trim (>0)
-            ;    r9 = return address
-            ;	r10 = per-proc ptr
-            ;	r11 = MSR at entry
-            ;	r30 = what SACalloc looks like when all saveareas are free
-            ;	r31 = free pool block list
-            ;	cr1 = beq set if we just trimmed the last, ie if we are done
-
-sttoss:		beq+	cr1,stdone					; All done now...
-
-			cmplw	cr1,r6,r7					; Have we finished the loop?
-
-			lis		r0,0x0044					; Get top of table	
-			rlwinm	r2,r6,0,0,19				; Back down to the savearea control stuff
-			ori		r0,r0,0x2200				; Finish shift table
-			rlwinm	r4,r6,25,27,30				; Get (addr >> 7) & 0x1E (same as twice high nybble)
-			lwz		r5,SACalloc(r2)				; Get the allocation bits
-			addi	r4,r4,1						; Shift 1 extra
-			rlwinm	r3,r6,25,31,31				; Get (addr >> 7) & 1
-			rlwnm	r0,r0,r4,29,31				; Get partial index
-			lis		r4,lo16(0x8000)				; Get the bit mask
-			add		r0,r0,r3					; Make the real index
-			srw		r4,r4,r0					; Get the allocation mask
-			or		r5,r5,r4					; Free this entry
-			cmplw	r5,r4						; Is this the only free entry?
-			lwz		r6,SAVprev+4(r6)			; Chain to the next trimmed savearea
-			cmplw	cr7,r30,r5					; Does this look empty?
-			stw		r5,SACalloc(r2)				; Save back the allocation bits
-			beq-	stputpool					; First free entry, go put it into the pool...
-			bne+	cr7,sttoss					; Not an empty block
-			
-;
-;			We have an empty block.  Remove it from the pool list.
-;
-			
-			lwz		r29,SACflags(r2)			; Get the flags
-			cmplwi	cr5,r31,0					; Is this guy on the release list?
-			lwz		r28,SACnext+4(r2)			; Get the forward chain
-
-			rlwinm.	r0,r29,0,sac_permb,sac_permb	; Is this a permanently allocated area? (also sets 0 needed below)
-			bne-	sttoss						; This is permanent entry, do not try to release...
-
-			lwz		r29,SACprev+4(r2)			; and the previous
-			beq-	cr5,stnot1st				; Not first
-			lwz		r0,SACvrswap+4(r31)			; Load the previous pool page vr conversion
-			
-stnot1st:	stw		r28,SACnext+4(r29)			; Previous guy points to my next
-			xor		r0,r0,r31					; Make the last guy virtual
-			stw		r29,SACprev+4(r28)			; Next guy points back to my previous 			
-			stw		r0,SAVprev+4(r2)			; Store the old top virtual as my back chain
-			mr		r31,r2						; My physical is now the head of the chain
-			b		sttoss						; Get the next one...
-			
-;
-;			A pool block that had no free entries now has one.  Stick it on the pool list.
-;
-			
-stputpool:	lwz		r28,SVpoolfwd+4(0)			; Get the first guy on the list
-			li		r0,saveanchor				; Point to the saveanchor
-			stw		r2,SVpoolfwd+4(0)			; Put us on the top of the list
-			stw		r28,SACnext+4(r2)			; We point to the old top
-			stw		r2,SACprev+4(r28)			; Old top guy points back to us
-			stw		r0,SACprev+4(r2)			; Our back points to the anchor
-			b		sttoss						; Go on to the next one...
-
-
-/*
- * ***********************
- * * s a v e T r i m 6 4 *
- * ***********************
- *
- *	Handle "save_trim_free" on 64-bit processors.  At this point, translation and interrupts
- *  are off, SF is on, the savearea anchor is locked, and:
- *		 r8 = #pages to trim (>0)
- *	     r9 = return address
- *	 	r10 = per-proc ptr
- *		r11 = MSR at entry
- */
-
-saveTrim64:	
-			ld		r7,SVfree(0)				; Get the first on the free list
-            mr		r6,r7						; Save the first one 
-			mr		r5,r8						; Save the number we are trimming
-			
-sttrimming64:	
-            addic.	r5,r5,-1					; Any left to do?
-			ble--	sttrimmed64					; Nope...
-			ld		r7,SAVprev(r7)				; Skip to the next one
-			b		sttrimming64				; Keep going...
-
-sttrimmed64:
-            ld		r5,SAVprev(r7)				; Get the next one (for new head of free list)
-			lwz		r4,SVfreecnt(0)				; Get the free count
-			std		r5,SVfree(0)				; Set new head
-			sub		r4,r4,r8					; Calculate the new free count
-			li		r31,0						; Show we have no free pool blocks yet
-			crclr	cr1_eq						; dont exit loop before 1st iteration
-			stw		r4,SVfreecnt(0)				; Set new free count
-			lis		r30,hi16(sac_empty)			; Get what empty looks like
-			
-
-            ; Loop over each savearea we are trimming.
-            ;	 r6 = next savearea to trim
-            ;	 r7 = last savearea to trim
-            ;	 r8 = #pages to trim (>0)
-            ;    r9 = return address
-            ;	r10 = per-proc ptr
-            ;	r11 = MSR at entry
-            ;	r30 = what SACalloc looks like when all saveareas are free
-            ;	r31 = free pool block list
-            ;	cr1 = beq set if we just trimmed the last, ie if we are done
-            ;
-            ; WARNING: as in the 32-bit path, this code is doing a divide by 640 (SAVsize).
-
-sttoss64:
-            beq++	cr1,stdone					; All done now...
-
-			cmpld	cr1,r6,r7					; Have we finished the loop?
-
-			lis		r0,0x0044					; Get top of table	
-			rldicr	r2,r6,0,51					; r2 <- phys addr of savearea block (with control area)
-			ori		r0,r0,0x2200				; Finish shift table
-			rlwinm	r4,r6,25,27,30				; Get (addr >> 7) & 0x1E (same as twice high nybble)
-			lwz		r5,SACalloc(r2)				; Get the allocation bits
-			addi	r4,r4,1						; Shift 1 extra
-			rlwinm	r3,r6,25,31,31				; Get (addr >> 7) & 1
-			rlwnm	r0,r0,r4,29,31				; Get partial index
-			lis		r4,lo16(0x8000)				; Get the bit mask
-			add		r0,r0,r3					; Make the real index
-			srw		r4,r4,r0					; Get the allocation mask
-			or		r5,r5,r4					; Free this entry
-			cmplw	r5,r4						; Is this the only free entry?
-			ld		r6,SAVprev(r6)				; Chain to the next trimmed savearea
-			cmplw	cr7,r30,r5					; Does this look empty?
-			stw		r5,SACalloc(r2)				; Save back the allocation bits
-			beq--	stputpool64					; First free entry, go put it into the pool...
-			bne++	cr7,sttoss64				; Not an empty block
-			
-;			We have an empty block.  Remove it from the pool list.
-			
-			lwz		r29,SACflags(r2)			; Get the flags
-			cmpldi	cr5,r31,0					; Is this guy on the release list?
-			ld		r28,SACnext(r2)				; Get the forward chain
-
-			rlwinm.	r0,r29,0,sac_permb,sac_permb	; Is this a permanently allocated area? (also sets 0 needed below)
-			bne--	sttoss64					; This is permanent entry, do not try to release...
-
-			ld		r29,SACprev(r2)				; and the previous
-			beq--	cr5,stnot1st64				; Not first
-			ld		r0,SACvrswap(r31)			; Load the previous pool page vr conversion
-			
-stnot1st64:	
-            std		r28,SACnext(r29)			; Previous guy points to my next
-			xor		r0,r0,r31					; Make the last guy virtual
-			std		r29,SACprev(r28)			; Next guy points back to my previous 			
-			std		r0,SAVprev(r2)				; Store the old top virtual as my back chain
-			mr		r31,r2						; My physical is now the head of the chain
-			b		sttoss64					; Get the next one...
-			
-;			A pool block that had no free entries now has one.  Stick it on the pool list.
-			
-stputpool64:
-            ld		r28,SVpoolfwd(0)			; Get the first guy on the list
-			li		r0,saveanchor				; Point to the saveanchor
-			std		r2,SVpoolfwd(0)				; Put us on the top of the list
-			std		r28,SACnext(r2)				; We point to the old top
-			std		r2,SACprev(r28)				; Old top guy points back to us
-			std		r0,SACprev(r2)				; Our back points to the anchor
-			b		sttoss64					; Go on to the next one...
-			
-
-;			We are all done.  Relocate pool release head, restore all, and go.  This code
-;			is used both by the 32 and 64-bit paths.
-;  				 r9 = return address
-;				r10 = per-proc ptr
-;				r11 = MSR at entry
-;				r31 = free pool block list
-
-stdone:		bl		saveunlock					; Unlock the saveanchor and set adjust field
-
-			mr.		r3,r31						; Move release chain and see if there are any
-			li		r5,0						; Assume either V=R or no release chain
-			beq-	stnorel						; Nothing to release...
-			lwz		r5,SACvrswap+4(r31)			; Get the vr conversion (only need low half if 64-bit)
-
-stnorel:	
-            bl		saveRestore					; restore translation and exceptions, turn off SF
-			mtlr	r9							; Restore the return
-			
-			lwz		r28,FM_SIZE+0(r1)			; Restore R28
-			lwz		r29,FM_SIZE+4(r1)			; Restore R29
-			lwz		r30,FM_SIZE+8(r1)			; Restore R30
-			lwz		r31,FM_SIZE+12(r1)			; Restore R31
-			addi	r1,r1,(FM_ALIGN(16)+FM_SIZE)	; Pop the stack
-			xor		r3,r3,r5					; Convert release chain address to virtual
-            rlwinm	r3,r3,0,0,31				; if 64-bit, clear upper half of virtual address
-							
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2207					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG) 
-			sc									; (TEST/DEBUG) 
-#endif
-			blr									; Return...
-            
-            
-/*
- * ***************************
- * * s a v e _ r e c o v e r *
- * ***************************
- *
- *	int save_recover(void);
- *
- *	Returns nonzero if we can get enough saveareas to hit the target.  We scan the free
- * 	pool.  If we empty a pool block, we remove it from the pool list.
- */			
-			
-			.align	5
-			.globl	EXT(save_recover)
-
-LEXT(save_recover)
-            mflr	r9							; save return address
-            bl		saveSetup					; turn translation and interrupts off, SF on, load many regs
-            bl		savelock					; lock the savearea anchor
-
-			lwz		r8,SVadjust(0)				; How many do we need to clear get?
-			li		r3,0						; Get a 0
-			mr.		r8,r8						; Do we need any?
-            ble--	save_recover1				; not any more
-            bf--	pf64Bitb,saveRecover32		; handle 32-bit processor
-            b		saveRecover64				; handle 64-bit processor
-            
-save_recover1:									; by the time we locked the anchor, no longer short
-			mtlr	r9							; Restore return
-			stw		r3,SVlock(0)				; Quick unlock (no need for sync or to set adjust, nothing changed)
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2208					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG) 
-			sc									; (TEST/DEBUG) 
-#endif
-			b		saveRestore					; turn translation etc back on, return to our caller
-
-
-/*
- * *****************************
- * * s a v e R e c o v e r 3 2 *
- * *****************************
- *
- *	Handle "save_recover" on 32-bit processors.  At this point, translation and interrupts
- *  are off, the savearea anchor is locked, and:
- *		 r8 = #pages to recover
- *	     r9 = return address
- *	 	r10 = per-proc ptr
- *		r11 = MSR at entry
- */
-
-saveRecover32:
-			li		r6,saveanchor				; Start at pool anchor
-			crclr	cr1_eq						; initialize the loop test					
-			lwz		r7,SVfreecnt(0)				; Get the current free count
-
-
-; Loop over next block in free pool.  r6 is the ptr to the last block we looked at.
-
-srcnpool:	lwz		r6,SACnext+4(r6)			; Point to the next one
-			cmplwi	r6,saveanchor				; Have we wrapped?
-			beq-	srcdone						; Yes, did not have enough...
-			
-			lwz		r5,SACalloc(r6)				; Pick up the allocation for this pool block
-			
-;
-;			NOTE: The savearea size must be 640 (0x280).  We are doing a multiply by shifts and add.
-;			offset = (index << 9) + (index << 7)
-;
-#if SAVsize != 640
-#error Savearea size is not 640!!!!!!!!!!!!
-#endif
-
-; Loop over free savearea in current block.
-;		 r5 = bitmap of free saveareas in block at r6 (ie, SACalloc)
-;		 r6 = ptr to current free pool block
-;		 r7 = free count
-;		 r8 = #pages more we still need to recover
-;	     r9 = return address
-;	 	r10 = per-proc ptr
-;		r11 = MSR at entry
-;		cr1 = beq if (r8==0)
-
-srcnext:	beq-	cr1,srcdone					; We have no more to get...
-
-			lis		r3,0x8000					; Get the top bit on
-			cntlzw	r4,r5						; Find a free slot
-			addi	r7,r7,1						; Bump up the free count
-			srw		r3,r3,r4					; Make a mask
-			slwi	r0,r4,7						; First multiply by 128
-			subi	r8,r8,1						; Decrement the need count
-			slwi	r2,r4,9						; Then multiply by 512
-			andc.	r5,r5,r3					; Clear out the "free" bit
-			add		r2,r2,r0					; Sum to multiply by 640	
-			
-			stw		r5,SACalloc(r6)				; Set new allocation bits
-			
-			add		r2,r2,r6					; Get the actual address of the savearea
-			lwz		r3,SVfree+4(0)				; Get the head of the chain
-			cmplwi	cr1,r8,0					; Do we actually need any more?
-			stw		r2,SVfree+4(0)				; Push ourselves in the front
-			stw		r3,SAVprev+4(r2)			; Chain the rest of the list behind 
-			
-			bne+	srcnext						; The pool block is not empty yet, try for another...
-			
-			lwz		r2,SACnext+4(r6)			; Get the next pointer
-			lwz		r3,SACprev+4(r6)			; Get the previous pointer
-			stw		r3,SACprev+4(r2)			; The previous of my next points to my previous
-			stw		r2,SACnext+4(r3)			; The next of my previous points to my next
-			bne+	cr1,srcnpool				; We still have more to do...
-
-
-; Join here from 64-bit path when we have recovered all the saveareas we need to.
-
-srcdone:	stw		r7,SVfreecnt(0)				; Set the new free count
-			bl		saveunlock					; Unlock the save and set adjust field
-
-			mtlr	r9							; Restore the return
-#if FPVECDBG
-			lis		r0,HIGH_ADDR(CutTrace)		; (TEST/DEBUG)
-			li		r2,0x2209					; (TEST/DEBUG)
-			oris	r0,r0,LOW_ADDR(CutTrace)	; (TEST/DEBUG) 
-			sc									; (TEST/DEBUG) 
-#endif
-			b		saveRestore					; turn xlate and EE back on, SF off, and return to our caller
-
-
-/*
- * *****************************
- * * s a v e R e c o v e r 6 4 *
- * *****************************
- *
- *	Handle "save_recover" on 64-bit processors.  At this point, translation and interrupts
- *  are off, the savearea anchor is locked, and:
- *		 r8 = #pages to recover
- *	     r9 = return address
- *	 	r10 = per-proc ptr
- *		r11 = MSR at entry
- */
-
-saveRecover64:
-			li		r6,saveanchor				; Start at pool anchor
-			crclr	cr1_eq						; initialize the loop test					
-			lwz		r7,SVfreecnt(0)				; Get the current free count
-
-
-; Loop over next block in free pool.  r6 is the ptr to the last block we looked at.
-
-srcnpool64:	
-            ld		r6,SACnext(r6)				; Point to the next one
-			cmpldi	r6,saveanchor				; Have we wrapped?
-			beq--	srcdone						; Yes, did not have enough...
-			
-			lwz		r5,SACalloc(r6)				; Pick up the allocation for this pool block
-			
-
-; Loop over free savearea in current block.
-;		 r5 = bitmap of free saveareas in block at r6 (ie, SACalloc)
-;		 r6 = ptr to current free pool block
-;		 r7 = free count
-;		 r8 = #pages more we still need to recover
-;	     r9 = return address
-;	 	r10 = per-proc ptr
-;		r11 = MSR at entry
-;		cr1 = beq if (r8==0)
-;
-; WARNING: as in the 32-bit path, we depend on (SAVsize==640)
-
-srcnext64:	
-            beq--	cr1,srcdone					; We have no more to get...
-
-			lis		r3,0x8000					; Get the top bit on
-			cntlzw	r4,r5						; Find a free slot
-			addi	r7,r7,1						; Bump up the free count
-			srw		r3,r3,r4					; Make a mask
-			slwi	r0,r4,7						; First multiply by 128
-			subi	r8,r8,1						; Decrement the need count
-			slwi	r2,r4,9						; Then multiply by 512
-			andc.	r5,r5,r3					; Clear out the "free" bit
-			add		r2,r2,r0					; Sum to multiply by 640	
-			
-			stw		r5,SACalloc(r6)				; Set new allocation bits
-			
-			add		r2,r2,r6					; Get the actual address of the savearea
-			ld		r3,SVfree(0)				; Get the head of the chain
-			cmplwi	cr1,r8,0					; Do we actually need any more?
-			std		r2,SVfree(0)				; Push ourselves in the front
-			std		r3,SAVprev(r2)				; Chain the rest of the list behind 
-			
-			bne++	srcnext64					; The pool block is not empty yet, try for another...
-			
-			ld		r2,SACnext(r6)				; Get the next pointer
-			ld		r3,SACprev(r6)				; Get the previous pointer
-			std		r3,SACprev(r2)				; The previous of my next points to my previous
-			std		r2,SACnext(r3)				; The next of my previous points to my next
-			bne++	cr1,srcnpool64				; We still have more to do...
-            
-            b		srcdone
-
-
-/* 
- * *******************
- * * s a v e l o c k *
- * *******************
- *
- *			Lock the savearea anchor, so we can manipulate the free list.
- *              msr = interrupts and translation off
- *			We destroy:
- *				r8, r3, r12
- */			
-			.align	5
-
-savelock:	lwz		r8,SVlock(0)				; See if lock is held
-            cmpwi	r8,0
-			li		r12,saveanchor				; Point to the saveanchor
-			bne--	savelock					; loop until lock released...
-		
-savelock0:	lwarx	r8,0,r12					; Grab the lock value 
-			cmpwi	r8,0						; taken?
-            li		r8,1						; get nonzero to lock it with
-			bne--	savelock1					; already locked, wait for it to clear...
-			stwcx.	r8,0,r12					; Try to seize that there durn lock
-            isync								; assume we got it
-            beqlr++								; reservation not lost, so we have the lock
-			b		savelock0					; Try again...
-			
-savelock1:	li		r8,lgKillResv				; Point to killing field
-			stwcx.	r8,0,r8						; Kill reservation
-			b		savelock					; Start over....
-		
-
-/*
- * ***********************
- * * s a v e u n l o c k *
- * ***********************
- *
- *
- *			This is the common routine that sets the saveadjust field and unlocks the savearea 
- *			anchor.
- *				msr = interrupts and translation off
- *			We destroy:
- *				r2, r5, r6, r8.
- */
-			.align	5
-saveunlock:
-			lwz		r6,SVfreecnt(0)				; and the number on the free list
-			lwz		r5,SVinuse(0)				; Pick up the in use count
-            subic.	r8,r6,FreeListMin			; do we have at least the minimum?
-			lwz		r2,SVtarget(0)				; Get the target
-            neg		r8,r8						; assuming we are short, get r8 <- shortfall
-            blt--	saveunlock1					; skip if fewer than minimum on free list
-			
-			add		r6,r6,r5					; Get the total number of saveareas
-			addi	r5,r2,-SaveLowHysteresis	; Find low end of acceptible range
-			sub		r5,r6,r5					; Make everything below hysteresis negative
-			sub		r2,r2,r6					; Get the distance from the target
-			addi	r5,r5,-(SaveLowHysteresis + SaveHighHysteresis + 1)	; Subtract full hysteresis range
-			srawi	r5,r5,31					; Get 0xFFFFFFFF if outside range or 0 if inside
-			and		r8,r2,r5					; r8 <- 0 if in range or distance to target if not
-
-saveunlock1:
-			li		r5,0						; Set a clear value
-			stw		r8,SVadjust(0)				; Set the adjustment value			
-			eieio								; Make sure everything is done
-			stw		r5,SVlock(0)				; Unlock the savearea chain 
-			blr
-
-
-/*
- * *******************
- * * s a v e _ c p v *
- * *******************
- *
- *	struct savearea	*save_cpv(addr64_t saveAreaPhysAddr);
- *
- *          Converts a physical savearea address to virtual.  Called with translation on
- *			and in 32-bit mode.  Note that the argument is passed as a long long in (r3,r4).
- */
-
-			.align	5
-			.globl	EXT(save_cpv)
-
-LEXT(save_cpv)
-            mflr	r9							; save return address
-            mr		r8,r3						; save upper half of phys address here
-            bl		saveSetup					; turn off translation and interrupts, turn SF on
-			rlwinm	r5,r4,0,0,19				; Round back to the start of the physical savearea block
-            bf--	pf64Bitb,save_cpv1			; skip if 32-bit processor
-            rldimi	r5,r8,32,0					; r5 <- 64-bit phys address of block
-save_cpv1:
-			lwz		r6,SACvrswap+4(r5)			; Get the conversion to virtual (only need low half if 64-bit)
-            mtlr	r9							; restore return address
-            xor		r3,r4,r6					; convert phys to virtual
-            rlwinm	r3,r3,0,0,31				; if 64-bit, zero upper half of virtual address
-            b		saveRestore					; turn translation etc back on, SF off, and return r3
-				
-			
-/*
- * *********************
- * * s a v e S e t u p *
- * *********************
- *
- * This routine is called at the start of all the save-area subroutines.
- * It turns off translation, disabled interrupts, turns on 64-bit mode,
- * and sets up cr6 with the feature flags (especially pf64Bit).
- * 
- * Note that most save-area routines cannot take _any_ interrupt (such as a
- * PTE miss) once the savearea anchor is locked, since that would result in
- * instant deadlock as we need a save-area to process any exception.
- * We set up:
- *		r10 = per-proc ptr
- *		r11 = old MSR
- *		cr5 = pfNoMSRir feature flag
- *		cr6 = pf64Bit   feature flag
- *
- * We use r0, r3, r10, and r11.
- */
- 
-saveSetup:
-            mfmsr	r11							; get msr
-			mfsprg	r3,2						; get feature flags
-			li		r0,0
-            mtcrf	0x2,r3						; copy pf64Bit to cr6
-            ori		r0,r0,lo16(MASK(MSR_IR)+MASK(MSR_DR)+MASK(MSR_EE))
-            mtcrf	0x4,r3						; copy pfNoMSRir to cr5
-            andc	r3,r11,r0					; turn off IR, DR, and EE
-            li		r0,1						; get a 1 in case its a 64-bit machine
-            bf--	pf64Bitb,saveSetup1			; skip if not a 64-bit machine
-			rldimi	r3,r0,63,MSR_SF_BIT			; turn SF (bit 0) on
-            mtmsrd	r3							; turn translation and interrupts off, 64-bit mode on
-            isync								; wait for it to happen
-            mfsprg	r10,0						; get per-proc ptr
-            blr
-saveSetup1:										; here on 32-bit machines
-            bt-		pfNoMSRirb,saveSetup2		; skip if cannot turn off IR with a mtmsr
-            mtmsr	r3							; turn translation and interrupts off
-            isync								; wait for it to happen
-            mfsprg	r10,0						; get per-proc ptr
-            blr
-saveSetup2:										; here if pfNoMSRir set for this machine
-            li		r0,loadMSR					; we will "mtmsr r3" via system call
-            sc
-            mfsprg	r10,0						; get per-proc ptr
-            blr
-        
-			
-/*
- * *************************
- * * s a v e R e s t o r e *
- * *************************
- *
- * Undoes the effect of calling "saveSetup", ie it turns relocation and interrupts back on,
- * and turns 64-bit mode back off.
- *		r11 = old MSR
- *		cr6 = pf64Bit   feature flag
- */
- 
-saveRestore:
-            bt++	pf64Bitb,saveRestore64		; handle a 64-bit processor
-saveRestore32:
-            mtmsr	r11							; restore MSR
-            isync								; wait for translation to start up
-            blr
-saveRestore64:									; 64-bit processor
-            mtmsrd	r11							; restore MSR
-            isync								; wait for changes to happen
-            blr
-
diff --git a/osfmk/ppc/scc_8530.h b/osfmk/ppc/scc_8530.h
deleted file mode 100644
index 2fcdfeb80..000000000
--- a/osfmk/ppc/scc_8530.h
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *	File: scc_8530.h
- * 	Author: Alessandro Forin, Carnegie Mellon University
- *	Date:	6/91
- *
- *	Definitions for the Zilog Z8530 SCC serial line chip
- */
-
-#ifndef	_SCC_8530_H_
-#define	_SCC_8530_H_
-
-/*
- * Register map, needs definition of the alignment
- * used on the specific machine.
- * #define the 'scc_register_t' data type before
- * including this header file.  For restrictions on
- * access modes define the set/get_datum macros.
- * We provide defaults ifnot.
- */
-
-
-#define	SCC_CHANNEL_A	1
-#define	SCC_CHANNEL_B	0
-
-#define	SCC_MODEM	SCC_CHANNEL_A
-#define	SCC_PRINTER	SCC_CHANNEL_B
-
-#define	SCC_DATA_OFFSET	4
-
-typedef unsigned char *scc_regmap_t;
-
-extern void	powermac_scc_set_datum(scc_regmap_t regs, unsigned int offset, unsigned char value);
-extern unsigned char powermac_scc_get_datum(scc_regmap_t regs, unsigned int offset);
-
-#define scc_set_datum(regs, d, v)	powermac_scc_set_datum(regs, (d), (v))
-#define	scc_get_datum(regs, d,v)	(v)  = powermac_scc_get_datum(regs, (d));
-
-#define	scc_init_reg(regs,chan)		{ \
-		char tmp; \
-		scc_get_datum(regs, ((chan)<<1),tmp); \
-		scc_get_datum(regs, ((chan)<<1),tmp); \
-	}
-
-#define	scc_read_reg(regs,chan,reg,val)	{ \
-		scc_set_datum(regs, ((chan)<<1),reg); \
-		scc_get_datum(regs, ((chan)<<1),val); \
-	}
-
-#define	scc_read_reg_zero(regs,chan,val)	{ \
-		scc_get_datum(regs, ((chan)<<1),val); \
-	}
-
-#define	scc_write_reg(regs,chan,reg,val)	{ \
-		scc_set_datum(regs, ((chan)<<1),reg); \
-		scc_set_datum(regs, ((chan)<<1),val); \
-	}
-
-#define	scc_write_reg_zero(regs,chan,val) { \
-		scc_set_datum(regs, ((chan)<<1),val); \
-	}
-
-#define	scc_read_data(regs,chan,val)	{ \
-		scc_get_datum(regs, ((chan)<<1)+SCC_DATA_OFFSET,val); \
-	}
-
-#define	scc_write_data(regs,chan,val) { \
-		scc_set_datum(regs, ((chan)<<1)+SCC_DATA_OFFSET,val); \
-	}
-
-
-/*
- * Addressable registers
- */
-
-#define	SCC_RR0		0	/* status register */
-#define	SCC_RR1		1	/* special receive conditions */
-#define	SCC_RR2		2	/* (modified) interrupt vector */
-#define	SCC_RR3		3	/* interrupts pending (cha A only) */
-#define	SCC_RR8		8	/* recv buffer (alias for data) */
-#define	SCC_RR10	10	/* sdlc status */
-#define	SCC_RR12	12	/* BRG constant, low part */
-#define	SCC_RR13	13	/* BRG constant, high part */
-#define	SCC_RR15	15	/* interrupts currently enabled */
-
-#define	SCC_WR0		0	/* reg select, and commands */
-#define	SCC_WR1		1	/* interrupt and DMA enables */
-#define	SCC_WR2		2	/* interrupt vector */
-#define	SCC_WR3		3	/* receiver params and enables */
-#define	SCC_WR4		4	/* clock/char/parity params */
-#define	SCC_WR5		5	/* xmit params and enables */
-#define	SCC_WR6		6	/* synchr SYNCH/address */
-#define	SCC_WR7		7	/* synchr SYNCH/flag */
-#define	SCC_WR8		8	/* xmit buffer (alias for data) */
-#define	SCC_WR9		9	/* vectoring and resets */
-#define	SCC_WR10	10	/* synchr params */
-#define	SCC_WR11	11	/* clocking definitions */
-#define	SCC_WR12	12	/* BRG constant, low part */
-#define	SCC_WR13	13	/* BRG constant, high part */
-#define	SCC_WR14	14	/* BRG enables and commands */
-#define	SCC_WR15	15	/* interrupt enables */
-
-/*
- * Read registers defines
- */
-
-#define	SCC_RR0_BREAK		0x80	/* break detected (rings twice), or */
-#define	SCC_RR0_ABORT		0x80	/* abort (synchr) */
-#define	SCC_RR0_TX_UNDERRUN	0x40	/* xmit buffer empty/end of message */
-#define	SCC_RR0_CTS		0x20	/* clear-to-send pin active (sampled
-					   only on intr and after RESI cmd */
-#define	SCC_RR0_SYNCH		0x10	/* SYNCH found/still hunting */
-#define	SCC_RR0_DCD		0x08	/* carrier-detect (same as CTS) */
-#define	SCC_RR0_TX_EMPTY	0x04	/* xmit buffer empty */
-#define	SCC_RR0_ZERO_COUNT	0x02	/* ? */
-#define	SCC_RR0_RX_AVAIL	0x01	/* recv fifo not empty */
-
-#define	SCC_RR1_EOF		0x80	/* end-of-frame, SDLC mode */
-#define	SCC_RR1_CRC_ERR		0x40	/* incorrect CRC or.. */
-#define	SCC_RR1_FRAME_ERR	0x40	/* ..bad frame */
-#define	SCC_RR1_RX_OVERRUN	0x20	/* rcv fifo overflow */
-#define	SCC_RR1_PARITY_ERR	0x10	/* incorrect parity in data */
-#define	SCC_RR1_RESIDUE0	0x08
-#define	SCC_RR1_RESIDUE1	0x04
-#define	SCC_RR1_RESIDUE2	0x02
-#define	SCC_RR1_ALL_SENT	0x01
-
-/* RR2 contains the interrupt vector unmodified (channel A) or
-   modified as follows (channel B, if vector-include-status) */
-
-#define	SCC_RR2_STATUS(val)	((val)&0xe)	/* 11/7/95 used to be 0xf */
-
-#define	SCC_RR2_B_XMIT_DONE	0x0
-#define	SCC_RR2_B_EXT_STATUS	0x2
-#define	SCC_RR2_B_RECV_DONE	0x4
-#define	SCC_RR2_B_RECV_SPECIAL	0x6
-#define	SCC_RR2_A_XMIT_DONE	0x8
-#define	SCC_RR2_A_EXT_STATUS	0xa
-#define	SCC_RR2_A_RECV_DONE	0xc
-#define	SCC_RR2_A_RECV_SPECIAL	0xe
-
-/* Interrupts pending, to be read from channel A only (B raz) */
-#define	SCC_RR3_zero		0xc0
-#define	SCC_RR3_RX_IP_A		0x20
-#define	SCC_RR3_TX_IP_A		0x10
-#define	SCC_RR3_EXT_IP_A	0x08
-#define	SCC_RR3_RX_IP_B		0x04
-#define	SCC_RR3_TX_IP_B		0x02
-#define	SCC_RR3_EXT_IP_B	0x01
-
-/* RR8 is the receive data buffer, a 3 deep FIFO */
-#define	SCC_RECV_BUFFER		SCC_RR8
-#define	SCC_RECV_FIFO_DEEP	3
-
-#define	SCC_RR10_1CLKS		0x80
-#define	SCC_RR10_2CLKS		0x40
-#define	SCC_RR10_zero		0x2d
-#define	SCC_RR10_LOOP_SND	0x10
-#define	SCC_RR10_ON_LOOP	0x02
-
-/* RR12/RR13 hold the timing base, upper byte in RR13 */
-
-#define	scc_get_timing_base(scc,chan,val)	{ \
-		register char	tmp;	\
-		scc_read_reg(scc,chan,SCC_RR12,val);\
-		scc_read_reg(scc,chan,SCC_RR13,tmp);\
-		(val) = ((val)<<8)|(tmp&0xff);\
-	}
-
-#define	SCC_RR15_BREAK_IE	0x80
-#define	SCC_RR15_TX_UNDERRUN_IE	0x40
-#define	SCC_RR15_CTS_IE		0x20
-#define	SCC_RR15_SYNCH_IE	0x10
-#define	SCC_RR15_DCD_IE		0x08
-#define	SCC_RR15_zero		0x05
-#define	SCC_RR15_ZERO_COUNT_IE	0x02
-
-
-/*
- * Write registers defines
- */
-
-/* WR0 is used for commands too */
-#define	SCC_RESET_TXURUN_LATCH	0xc0
-#define	SCC_RESET_TX_CRC	0x80
-#define	SCC_RESET_RX_CRC	0x40
-#define	SCC_RESET_HIGHEST_IUS	0x38	/* channel A only */
-#define	SCC_RESET_ERROR		0x30
-#define	SCC_RESET_TX_IP		0x28
-#define	SCC_IE_NEXT_CHAR	0x20
-#define	SCC_SEND_SDLC_ABORT	0x18
-#define	SCC_RESET_EXT_IP	0x10
-
-#define	SCC_WR1_DMA_ENABLE	0x80	/* dma control */
-#define	SCC_WR1_DMA_MODE	0x40	/* drive ~req for DMA controller */
-#define	SCC_WR1_DMA_RECV_DATA	0x20	/* from wire to host memory */
-					/* interrupt enable/conditions */
-#define	SCC_WR1_RXI_SPECIAL_O	0x18	/* on special only */
-#define	SCC_WR1_RXI_ALL_CHAR	0x10	/* on each char, or special */
-#define	SCC_WR1_RXI_FIRST_CHAR	0x08	/* on first char, or special */
-#define	SCC_WR1_RXI_DISABLE	0x00	/* never on recv */
-#define	SCC_WR1_PARITY_IE	0x04	/* on parity errors */
-#define	SCC_WR1_TX_IE		0x02
-#define	SCC_WR1_EXT_IE		0x01
-
-/* WR2 is common and contains the interrupt vector (high nibble) */
-
-#define	SCC_WR3_RX_8_BITS	0xc0
-#define	SCC_WR3_RX_6_BITS	0x80
-#define	SCC_WR3_RX_7_BITS	0x40
-#define	SCC_WR3_RX_5_BITS	0x00
-#define	SCC_WR3_AUTO_ENABLE	0x20
-#define	SCC_WR3_HUNT_MODE	0x10
-#define	SCC_WR3_RX_CRC_ENABLE	0x08
-#define	SCC_WR3_SDLC_SRCH	0x04
-#define	SCC_WR3_INHIBIT_SYNCH	0x02
-#define	SCC_WR3_RX_ENABLE	0x01
-
-/* Should be re-written after reset */
-#define	SCC_WR4_CLK_x64		0xc0	/* clock divide factor */
-#define	SCC_WR4_CLK_x32		0x80
-#define	SCC_WR4_CLK_x16		0x40
-#define	SCC_WR4_CLK_x1		0x00
-#define	SCC_WR4_EXT_SYNCH_MODE	0x30	/* synch modes */
-#define	SCC_WR4_SDLC_MODE	0x20
-#define	SCC_WR4_16BIT_SYNCH	0x10
-#define	SCC_WR4_8BIT_SYNCH	0x00
-#define	SCC_WR4_2_STOP		0x0c	/* asynch modes */
-#define	SCC_WR4_1_5_STOP	0x08
-#define	SCC_WR4_1_STOP		0x04
-#define	SCC_WR4_SYNCH_MODE	0x00
-#define	SCC_WR4_EVEN_PARITY	0x02
-#define	SCC_WR4_PARITY_ENABLE	0x01
-
-#define	SCC_WR5_DTR		0x80	/* drive DTR pin */
-#define	SCC_WR5_TX_8_BITS	0x60
-#define	SCC_WR5_TX_6_BITS	0x40
-#define	SCC_WR5_TX_7_BITS	0x20
-#define	SCC_WR5_TX_5_BITS	0x00
-#define	SCC_WR5_SEND_BREAK	0x10
-#define	SCC_WR5_TX_ENABLE	0x08
-#define	SCC_WR5_CRC_16		0x04	/* CRC if non zero, .. */
-#define	SCC_WR5_SDLC		0x00	/* ..SDLC otherwise  */
-#define	SCC_WR5_RTS		0x02	/* drive RTS pin */
-#define	SCC_WR5_TX_CRC_ENABLE	0x01
-
-/* Registers WR6 and WR7 are for synch modes data, with among other things: */
-
-#define	SCC_WR6_BISYNCH_12	0x0f
-#define	SCC_WR6_SDLC_RANGE_MASK	0x0f
-#define	SCC_WR7_SDLC_FLAG	0x7e
-
-/* Register WR7' (prime) controls some ESCC features */
-#define SCC_WR7P_RX_FIFO	0x08	/* Enable interrupt on FIFO 1/2 full */
-
-/* WR8 is the transmit data buffer (no FIFO) */
-#define	SCC_XMT_BUFFER		SCC_WR8
-
-#define	SCC_WR9_HW_RESET	0xc0	/* force hardware reset */
-#define	SCC_WR9_RESET_CHA_A	0x80
-#define	SCC_WR9_RESET_CHA_B	0x40
-#define	SCC_WR9_NON_VECTORED	0x20	/* mbz for Zilog chip */
-#define	SCC_WR9_STATUS_HIGH	0x10
-#define	SCC_WR9_MASTER_IE	0x08
-#define	SCC_WR9_DLC		0x04	/* disable-lower-chain */
-#define	SCC_WR9_NV		0x02	/* no vector */
-#define	SCC_WR9_VIS		0x01	/* vector-includes-status */
-
-#define	SCC_WR10_CRC_PRESET	0x80
-#define	SCC_WR10_FM0		0x60
-#define	SCC_WR10_FM1		0x40
-#define	SCC_WR10_NRZI		0x20
-#define	SCC_WR10_NRZ		0x00
-#define	SCC_WR10_ACTIVE_ON_POLL	0x10
-#define	SCC_WR10_MARK_IDLE	0x08	/* flag if zero */
-#define	SCC_WR10_ABORT_ON_URUN	0x04	/* flag if zero */
-#define	SCC_WR10_LOOP_MODE	0x02
-#define	SCC_WR10_6BIT_SYNCH	0x01
-#define	SCC_WR10_8BIT_SYNCH	0x00
-
-#define	SCC_WR11_RTxC_XTAL	0x80	/* RTxC pin is input (ext oscill) */
-#define	SCC_WR11_RCLK_DPLL	0x60	/* clock received data on dpll */
-#define	SCC_WR11_RCLK_BAUDR	0x40	/* .. on BRG */
-#define	SCC_WR11_RCLK_TRc_PIN	0x20	/* .. on TRxC pin */
-#define	SCC_WR11_RCLK_RTc_PIN	0x00	/* .. on RTxC pin */
-#define	SCC_WR11_XTLK_DPLL	0x18
-#define	SCC_WR11_XTLK_BAUDR	0x10
-#define	SCC_WR11_XTLK_TRc_PIN	0x08
-#define	SCC_WR11_XTLK_RTc_PIN	0x00
-#define	SCC_WR11_TRc_OUT	0x04	/* drive TRxC pin as output from..*/
-#define	SCC_WR11_TRcOUT_DPLL	0x03	/* .. the dpll */
-#define	SCC_WR11_TRcOUT_BAUDR	0x02	/* .. the BRG */
-#define	SCC_WR11_TRcOUT_XMTCLK	0x01	/* .. the xmit clock */
-#define	SCC_WR11_TRcOUT_XTAL	0x00	/* .. the external oscillator */
-
-/* WR12/WR13 are for timing base preset */
-#define	scc_set_timing_base(scc,chan,val)	{ \
-		scc_write_reg(scc,chan,SCC_RR12,val);\
-		scc_write_reg(scc,chan,SCC_RR13,(val)>>8);\
-	}
-
-/* More commands in this register */
-#define	SCC_WR14_NRZI_MODE	0xe0	/* synch modulations */
-#define	SCC_WR14_FM_MODE	0xc0
-#define	SCC_WR14_RTc_SOURCE	0xa0	/* clock is from pin .. */
-#define	SCC_WR14_BAUDR_SOURCE	0x80	/* .. or internal BRG */
-#define	SCC_WR14_DISABLE_DPLL	0x60
-#define	SCC_WR14_RESET_CLKMISS	0x40
-#define	SCC_WR14_SEARCH_MODE	0x20
-/* ..and more bitsy */
-#define	SCC_WR14_LOCAL_LOOPB	0x10
-#define	SCC_WR14_AUTO_ECHO	0x08
-#define	SCC_WR14_DTR_REQUEST	0x04
-#define	SCC_WR14_BAUDR_SRC	0x02
-#define	SCC_WR14_BAUDR_ENABLE	0x01
-
-#define	SCC_WR15_BREAK_IE	0x80
-#define	SCC_WR15_TX_UNDERRUN_IE	0x40
-#define	SCC_WR15_CTS_IE		0x20
-#define	SCC_WR15_SYNCHUNT_IE	0x10
-#define	SCC_WR15_DCD_IE		0x08
-#define	SCC_WR15_zero		0x05
-#define	SCC_WR15_ZERO_COUNT_IE	0x02
-#define SCC_WR15_ENABLE_ESCC	0x01	/* Enable some ESCC registers */
-
-#define	NSCC_LINE		2	/* How many lines are support per 8530 */
-/*
- * Driver status
- */
-
-#define	SCC_FLAGS_DMA_PAUSED	0x00001		/* DMA has been paused because of XON/XOFF */
-#define	SCC_FLAGS_DMA_TX_BUSY	0x00002		/* On going DMA operation.. */
-
-struct scc_softreg {
-	unsigned char	wr1;
-	unsigned char	wr4;
-	unsigned char	wr5;
-	unsigned char	wr14;
-
-	unsigned long	speed;
-	unsigned long	flags;
-	unsigned long	dma_flags;
-};
-
-
-struct scc_softc {
-	scc_regmap_t		regs;
-	struct scc_dma_ops	*dma_ops;
-
-	/* software copy of some write regs, for reg |= */
-	struct scc_softreg softr[NSCC_LINE];
-
-	int		flags;
-	int		modem[NSCC_LINE]; /* Mach modem bits (TM_DTR etc). */
-	int		dcd_timer[NSCC_LINE];
-	int		dma_initted;
-
-	char		polling_mode;
-	char		probed_once;
-
-	boolean_t		full_modem;
-};
-
-#define DCD_TIMEOUT 4
-
-typedef struct scc_softc *scc_softc_t;
-extern struct scc_softc		scc_softc[];
-
-#endif	/*_SCC_8530_H_*/
diff --git a/osfmk/ppc/sched_param.h b/osfmk/ppc/sched_param.h
deleted file mode 100644
index eefe3303f..000000000
--- a/osfmk/ppc/sched_param.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon 
- * the rights to redistribute these changes.
- */
-
-/*
- */
-
-/*
- *	Scheduler parameters.
- */
-
-#ifndef	_PPC_SCHED_PARAM_H_
-#define	_PPC_SCHED_PARAM_H_
-
-#include <ppc/exception.h>
-#include <ppc/savearea.h>
-
-#endif	/* _PPC_SCHED_PARAM_H_ */
diff --git a/osfmk/ppc/screen_switch.h b/osfmk/ppc/screen_switch.h
deleted file mode 100644
index 956d1ac84..000000000
--- a/osfmk/ppc/screen_switch.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *	File: screen_switch.h
- * 	Author: Alessandro Forin, Carnegie Mellon University
- *	Date:	10/90
- *
- *	Definitions of things that must be tailored to
- *	specific hardware boards for the Generic Screen Driver.
- */
-
-#ifndef	SCREEN_SWITCH_H
-#define	SCREEN_SWITCH_H	1
-
-#include <mach/boolean.h>
-
-/*
- *	List of probe routines, scanned at cold-boot time
- *	to see which, if any, graphic display is available.
- *	This is done before autoconf, so that printing on
- *	the console works early on.  The alloc routine is
- *	called only on the first device that answers.
- *	Ditto for the setup routine, called later on.
- */
-struct screen_probe_vector {
-	int		(*probe)(void);
-	unsigned int	(*alloc)(void);
-	int		(*setup)(int, user_info_t);
-};
-
-/*
- *	Low-level operations on the graphic device, used
- *	by the otherwise device-independent interface code
- */
-
-/* Forward declaration of screen_softc_t */
-typedef struct screen_softc *screen_softc_t;
-
-struct screen_switch {
-	int	(*graphic_open)(void);			/* when X11 opens */
-	int	(*graphic_close)(screen_softc_t);	/* .. or closes */
-	int	(*set_status)(screen_softc_t,
-			      dev_flavor_t,
-			      dev_status_t,
-			      natural_t);		/* dev-specific ops */
-	int	(*get_status)(screen_softc_t,
-			      dev_flavor_t,
-			      dev_status_t,
-			      natural_t*);		/* dev-specific ops */
-	int	(*char_paint)(screen_softc_t,
-			      int,
-			      int,
-			      int);			/* blitc */
-	int	(*pos_cursor)(void*,
-			      int,
-			      int);			/* cursor positioning*/
-	int	(*insert_line)(screen_softc_t,
-			       short);			/* ..and scroll down */
-	int	(*remove_line)(screen_softc_t,
-			       short);			/* ..and scroll up */
-	int	(*clear_bitmap)(screen_softc_t);	/* blank screen */
-	int	(*video_on)(void*,
-			    user_info_t*);		/* screen saver */
-	int	(*video_off)(void*,
-			     user_info_t*);
-	int	(*intr_enable)(void*,
-			       boolean_t);
-	int	(*map_page)(screen_softc_t,
-			    vm_offset_t,
-			    int);			/* user-space mapping*/
-};
-
-/*
- *	Each graphic device needs page-aligned memory
- *	to be mapped in user space later (for events
- *	and such).  Size and content of this memory
- *	is unfortunately device-dependent, even if
- *	it did not need to (puns).
- */
-extern char  *screen_data;
-
-extern struct screen_probe_vector screen_probe_vector[];
-
-extern int screen_noop(void), screen_find(void);
-
-#endif	/* SCREEN_SWITCH_H */
diff --git a/osfmk/ppc/serial_defs.h b/osfmk/ppc/serial_defs.h
deleted file mode 100644
index e18994c35..000000000
--- a/osfmk/ppc/serial_defs.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *	File: serial_defs.h
- * 	Author: Alessandro Forin, Carnegie Mellon University
- *	Date:	7/91
- *
- *	Generic console driver for serial-line based consoles.
- */
-
-#ifndef	_PPC_SERIAL_DEFS_
-#define	_PPC_SERIAL_DEFS_
-
-#include <device/tty.h>
-/*
- * Common defs
- */
-
-
-#define	CONS_ERR_PARITY		0x1000
-#define	CONS_ERR_BREAK		0x2000
-#define	CONS_ERR_OVERRUN	0x4000
-
-
-#endif	/* _PPC_SERIAL_DEFS_ */
diff --git a/osfmk/ppc/serial_io.c b/osfmk/ppc/serial_io.c
deleted file mode 100644
index 2f03aa110..000000000
--- a/osfmk/ppc/serial_io.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- */
-/*
- *	File: scc_8530_hdw.c
- * 	Author: Alessandro Forin, Carnegie Mellon University
- *	Date:	6/91
- *
- *	Hardware-level operations for the SCC Serial Line Driver
- */
-
-#define	NSCC	1	/* Number of serial chips, two ports per chip. */
-#if	NSCC > 0
-
-#include <mach_kdb.h>
-#include <platforms.h>
-#include <kern/spl.h>
-#include <mach/std_types.h>
-#include <types.h>
-#include <sys/syslog.h>
-#include <kern/thread.h>
-#include <ppc/misc_protos.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-#include <ppc/Firmware.h>
-#include <ppc/serial_io.h>
-#include <ppc/scc_8530.h>
-
-#if	MACH_KDB
-#include <machine/db_machdep.h>
-#endif	/* MACH_KDB */
-
-#define	kdebug_state()	(1)
-#define	delay(x)	{ volatile int _d_; for (_d_ = 0; _d_ < (10000*x); _d_++) ; }
-
-#define	NSCC_LINE	2	/* 2 ttys per chip */
-
-#define	SCC_DMA_TRANSFERS	0
-  
-struct scc_tty scc_tty[NSCC_LINE];
-
-#define scc_tty_for(chan)	(&scc_tty[chan])
-/* #define scc_unit(dev_no)	(dev_no) */
-
-#define scc_dev_no(chan) ((chan)^0x01)
-#define scc_chan(dev_no) ((dev_no)^0x01)
-
-int	serial_initted = 0;
-unsigned int scc_parm_done = 0;
-
-static struct scc_byte {
-	unsigned char	reg;
-	unsigned char	val;
-} scc_init_hw[] = {
-	
-	{9, 0x80},
-	{4, 0x44},
-	{3, 0xC0},
-	{5, 0xE2},
-	{2, 0x00},
-	{10, 0x00},
-	{11, 0x50},
-	{12, 0x0A},
-	{13, 0x00},
-	{3, 0xC1},
-	{5, 0xEA},
-	{14, 0x01},
-	{15, 0x00},
-	{0, 0x10},
-	{0, 0x10},
-#if 0
-	{1, 0x12},			/* int or Rx, Tx int enable */
-#else
-	{1, 0x10},			/* int or Rx,  no Tx int enable */
-#endif
-	{9, 0x0A}
-};
-
-static int	scc_init_hw_count = sizeof(scc_init_hw)/sizeof(scc_init_hw[0]);
-
-enum scc_error {SCC_ERR_NONE, SCC_ERR_PARITY, SCC_ERR_BREAK, SCC_ERR_OVERRUN};
-
-
-/*
- * BRG formula is:
- *				ClockFrequency (115200 for Power Mac)
- *	BRGconstant = 	---------------------------  -  2
- *			      BaudRate
- */
-
-#define SERIAL_CLOCK_FREQUENCY (115200*2) /* Power Mac value */
-#define	convert_baud_rate(rate)	((((SERIAL_CLOCK_FREQUENCY) + (rate)) / (2 * (rate))) - 2)
-
-#define DEFAULT_SPEED 57600
-#define DEFAULT_PORT0_SPEED 1200
-#define DEFAULT_FLAGS (TF_LITOUT|TF_ECHO)
-
-int	scc_param(struct scc_tty *tp);
-
-
-struct scc_softc	scc_softc[NSCC];
-caddr_t	scc_std[NSCC] = { (caddr_t) 0};
-
-
-#define SCC_RR1_ERRS (SCC_RR1_FRAME_ERR|SCC_RR1_RX_OVERRUN|SCC_RR1_PARITY_ERR)
-#define SCC_RR3_ALL (SCC_RR3_RX_IP_A|SCC_RR3_TX_IP_A|SCC_RR3_EXT_IP_A|\
-                     SCC_RR3_RX_IP_B|SCC_RR3_TX_IP_B|SCC_RR3_EXT_IP_B)
-
-#define DEBUG_SCC
-#undef  DEBUG_SCC
-
-#ifdef DEBUG_SCC
-static int total_chars, total_ints, total_overruns, total_errors, num_ints, max_chars;
-static int chars_received[8];
-static int __SCC_STATS = 0;
-static int max_in_q = 0;
-static int max_out_q = 0;
-#endif
-
-DECL_FUNNEL(, scc_funnel)	/* funnel to serialize the SCC driver */
-boolean_t scc_funnel_initted = FALSE;
-#define SCC_FUNNEL		scc_funnel
-#define SCC_FUNNEL_INITTED	scc_funnel_initted
-
-
-/*
- * Adapt/Probe/Attach functions
- */
-boolean_t	scc_uses_modem_control = FALSE;/* patch this with adb */
-decl_simple_lock_data(,scc_stomp)
-
-/* This is called VERY early on in the init and therefore has to have
- * hardcoded addresses of the serial hardware control registers. The
- * serial line may be needed for console and debugging output before
- * anything else takes place
- */
-
-void
-initialize_serial( caddr_t scc_phys_base, int32_t serial_baud )
-{
-	int i, chan, bits;
-	scc_regmap_t	regs;
-	DECL_FUNNEL_VARS
-
-	assert( scc_phys_base );
-
-	if (!SCC_FUNNEL_INITTED) {
-		FUNNEL_INIT(&SCC_FUNNEL, master_processor);
-		SCC_FUNNEL_INITTED = TRUE;
-	}
-	FUNNEL_ENTER(&SCC_FUNNEL);
-
-	if (serial_initted) {
-		FUNNEL_EXIT(&SCC_FUNNEL);
-		return;
-	}
-
-	simple_lock_init(&scc_stomp, FALSE);
-	
-	if (serial_baud == -1) serial_baud = DEFAULT_SPEED;
-	
-	scc_softc[0].full_modem = TRUE;
-
-        scc_std[0] = scc_phys_base;
-
-	regs = scc_softc[0].regs = (scc_regmap_t)scc_std[0];
-
-	for (chan = 0; chan < NSCC_LINE; chan++) {
-		if (chan == 1)
-			scc_init_hw[0].val = 0x80;
-
-		for (i = 0; i < scc_init_hw_count; i++) {
-			scc_write_reg(regs, chan,
-				      scc_init_hw[i].reg, scc_init_hw[i].val);
-		}
-	}
-
-	/* Call probe so we are ready very early for remote gdb and for serial
-	   console output if appropriate.  */
-	if (scc_probe(serial_baud)) {
-		for (i = 0; i < NSCC_LINE; i++) {
-			scc_softc[0].softr[i].wr5 = SCC_WR5_DTR | SCC_WR5_RTS;
-			scc_param(scc_tty_for(i));
-	/* Enable SCC interrupts (how many interrupts are to this thing?!?) */
-			scc_write_reg(regs,  i,  9, SCC_WR9_NV);
-
-			scc_read_reg_zero(regs, 0, bits);/* Clear the status */
-		}
-		scc_parm_done = 1;
-	}
-
-	serial_initted = TRUE;
-
-	FUNNEL_EXIT(&SCC_FUNNEL);
-	return;
-}
-
-int
-scc_probe(int32_t serial_baud)
-{
-	scc_softc_t     scc;
-	int i;
-	scc_regmap_t regs;
-	spl_t	s;
-	DECL_FUNNEL_VARS
-
-	if (!SCC_FUNNEL_INITTED) {
-		FUNNEL_INIT(&SCC_FUNNEL, master_processor);
-		SCC_FUNNEL_INITTED = TRUE;
-	}
-	FUNNEL_ENTER(&SCC_FUNNEL);
-
-	/* Readjust the I/O address to handling 
-	 * new memory mappings.
-	 */
-
-	regs = (scc_regmap_t)scc_std[0];
-
-	if (regs == (scc_regmap_t) 0) {
-		FUNNEL_EXIT(&SCC_FUNNEL);
-		return 0;
-	}
-
-	scc = &scc_softc[0];
-	scc->regs = regs;
-
-	s = splhigh();
-
-	for (i = 0; i < NSCC_LINE; i++) {
-		register struct scc_tty	*tp;
-		tp = scc_tty_for(i);
-		tp->t_addr = (char*)(0x80000000L + (i&1));
-		/* Set default values.  These will be overridden on
-		   open but are needed if the port will be used
-		   independently of the Mach interfaces, e.g., for
-		   gdb or for a serial console.  */
-		if (i == 0) {
-		  tp->t_ispeed = DEFAULT_PORT0_SPEED;
-		  tp->t_ospeed = DEFAULT_PORT0_SPEED;
-		} else {
-		  tp->t_ispeed = serial_baud;
-		  tp->t_ospeed = serial_baud;
-		}
-		tp->t_flags = DEFAULT_FLAGS;
-		scc->softr[i].speed = -1;
-
-		/* do min buffering */
-		tp->t_state |= TS_MIN;
-
-		tp->t_dev = scc_dev_no(i);
-	}
-
-	splx(s);
-
-	FUNNEL_EXIT(&SCC_FUNNEL);
-	return 1;
-}
-
-/*
- * Get a char from a specific SCC line
- * [this is only used for console&screen purposes]
- * must be splhigh since it may be called from another routine under spl
- */
-
-int
-scc_getc(__unused int unit, int line, boolean_t wait, __unused boolean_t raw)
-{
-	scc_regmap_t	regs;
-	unsigned char   c, value;
-	int             rcvalue;
-	spl_t		s = splhigh();
-	DECL_FUNNEL_VARS
-
-	FUNNEL_ENTER(&SCC_FUNNEL);
-
-
-	simple_lock(&scc_stomp);
-	regs = scc_softc[0].regs;
-
-	/*
-	 * wait till something available
-	 *
-	 */
-again:
-	rcvalue = 0;
-	while (1) {
-		scc_read_reg_zero(regs, line, value);
-
-		if (value & SCC_RR0_RX_AVAIL)
-			break;
-
-		if (!wait) {
-			simple_unlock(&scc_stomp);
-			splx(s);
-			FUNNEL_EXIT(&SCC_FUNNEL);
-			return -1;
-		}
-	}
-
-	/*
-	 * if nothing found return -1
-	 */
-
-	scc_read_reg(regs, line, SCC_RR1, value);
-	scc_read_data(regs, line, c);
-
-#if	MACH_KDB
-	if (console_is_serial() &&
-	    c == ('_' & 0x1f)) {
-		/* Drop into the debugger */
-		simple_unlock(&scc_stomp);
-		Debugger("Serial Line Request");
-		simple_lock(&scc_stomp);
-		scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
-		if (wait) {
-			goto again;
-		}
-		simple_unlock(&scc_stomp);
-		splx(s);
-		FUNNEL_EXIT(&SCC_FUNNEL);
-		return -1;
-	}
-#endif	/* MACH_KDB */
-
-	/*
-	 * bad chars not ok
-	 */
-	if (value&(SCC_RR1_PARITY_ERR | SCC_RR1_RX_OVERRUN | SCC_RR1_FRAME_ERR)) {
-		scc_write_reg(regs, line, SCC_RR0, SCC_RESET_ERROR);
-
-		if (wait) {
-			scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
-			goto again;
-		}
-	}
-
-	scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
-
-	simple_unlock(&scc_stomp);
-	splx(s);
-
-	FUNNEL_EXIT(&SCC_FUNNEL);
-	return c;
-}
-
-
-/*
- *	This front-ends scc_getc to make some intel changes easier
- */
- 
-int _serial_getc(int unit, int line, boolean_t wait, boolean_t raw) {
-
-	return(scc_getc(unit, line, wait, raw));
-
-}
-
-/*
- * Put a char on a specific SCC line
- * use splhigh since we might be doing a printf in high spl'd code
- */
-
-void
-scc_putc(__unused int unit, int line, int c)
-{
-	scc_regmap_t	regs;
-	spl_t            s;
-	unsigned char	 value;
-	DECL_FUNNEL_VARS
-
-
-	if (disable_serial_output)
-		return;
-
-	s = splhigh();
-	FUNNEL_ENTER(&SCC_FUNNEL);
-	simple_lock(&scc_stomp);		
-
-	regs = scc_softc[0].regs;
-
-	do {
-		scc_read_reg(regs, line, SCC_RR0, value);
-		if (value & SCC_RR0_TX_EMPTY)
-			break;
-		delay(1);
-	} while (1);
-
-	scc_write_data(regs, line, c);
-/* wait for it to swallow the char ? */
-
-	do {
-		scc_read_reg(regs, line, SCC_RR0, value);
-		if (value & SCC_RR0_TX_EMPTY)
-			break;
-	} while (1);
-	scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
-	simple_unlock(&scc_stomp);		
-
-	splx(s);
-
-	FUNNEL_EXIT(&SCC_FUNNEL);
-}
-
-
-void
-powermac_scc_set_datum(scc_regmap_t regs, unsigned int offset, unsigned char value)
-{
-	volatile unsigned char *address = (unsigned char *) regs + offset;
-  
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-
-	*address = value;
-	eieio();
-
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-}
-  
-unsigned char
-powermac_scc_get_datum(scc_regmap_t regs, unsigned int offset)
-{
-	volatile unsigned char *address = (unsigned char *) regs + offset;
-	unsigned char	value;
-  
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-
-	value = *address; eieio();
-	return value;
-
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-}
-
-int
-scc_param(struct scc_tty *tp)
-{
-	scc_regmap_t	regs;
-	unsigned char	value;
-	unsigned short	speed_value;
-	int		bits, chan;
-	spl_t		s;
-	struct scc_softreg	*sr;
-	scc_softc_t	scc;
-
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-	
-	s = splhigh();
-	simple_lock(&scc_stomp);
-
-	chan = scc_chan(tp->t_dev);
-	scc = &scc_softc[0];
-	regs = scc->regs;
-
-	sr = &scc->softr[chan];
-	
-	/* Do a quick check to see if the hardware needs to change */
-	if ((sr->flags & (TF_ODDP|TF_EVENP)) == (tp->t_flags & (TF_ODDP|TF_EVENP))
-	    && sr->speed == (unsigned long)tp->t_ispeed) {
-		assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-		simple_unlock(&scc_stomp);
-		splx(s);
-		return 0;
-	}
-
-	if(scc_parm_done) 	{								
-		
-		scc_write_reg(regs,  chan,  3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE);
-		sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
-		scc_write_reg(regs,  chan,  1, sr->wr1);
-       	scc_write_reg(regs,  chan, 15, SCC_WR15_ENABLE_ESCC);
-		scc_write_reg(regs,  chan,  7, SCC_WR7P_RX_FIFO);
-		scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);
-		scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
-		scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
-		scc_write_reg(regs,  chan,  9, SCC_WR9_MASTER_IE|SCC_WR9_NV);
-		scc_read_reg_zero(regs, 0, bits);
-		sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
-		scc_write_reg(regs,  chan,  1, sr->wr1);
-		scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);
-		simple_unlock(&scc_stomp);
-		splx(s);
-		return 0;
-	}
-	
-	sr->flags = tp->t_flags;
-	sr->speed = tp->t_ispeed;
-
-
-	if (tp->t_ispeed == 0) {
-		sr->wr5 &= ~SCC_WR5_DTR;
-		scc_write_reg(regs,  chan, 5, sr->wr5);
-		simple_unlock(&scc_stomp);
-		splx(s);
-
-		assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-		return 0;
-	}
-	
-
-#if	SCC_DMA_TRANSFERS
-	if (scc->dma_initted & (1<<chan)) 
-		scc->dma_ops->scc_dma_reset_rx(chan);
-#endif
-
-	value = SCC_WR4_1_STOP;
-
-	/* 
-	 * For 115K the clocking divide changes to 64.. to 230K will
-	 * start at the normal clock divide 16.
-	 *
-	 * However, both speeds will pull from a different clocking
-	 * source
-	 */
-
-	if (tp->t_ispeed == 115200)
-		value |= SCC_WR4_CLK_x32;
-	else	
-		value |= SCC_WR4_CLK_x16 ;
-
-	/* .. and parity */
-	if ((tp->t_flags & (TF_ODDP | TF_EVENP)) == TF_EVENP)
-		value |= (SCC_WR4_EVEN_PARITY |  SCC_WR4_PARITY_ENABLE);
-	else if ((tp->t_flags & (TF_ODDP | TF_EVENP)) == TF_ODDP)
-		value |= SCC_WR4_PARITY_ENABLE;
-
-	/* set it now, remember it must be first after reset */
-	sr->wr4 = value;
-
-	/* Program Parity, and Stop bits */
-	scc_write_reg(regs,  chan, 4, sr->wr4);
-
-	/* Setup for 8 bits */
-	scc_write_reg(regs,  chan, 3, SCC_WR3_RX_8_BITS);
-
-	// Set DTR, RTS, and transmitter bits/character.
-	sr->wr5 = SCC_WR5_TX_8_BITS | SCC_WR5_RTS | SCC_WR5_DTR;
-
-	scc_write_reg(regs,  chan, 5, sr->wr5);
-	
-	scc_write_reg(regs, chan, 14, 0);	/* Disable baud rate */
-
-	/* Setup baud rate 57.6Kbps, 115K, 230K should all yeild
-	 * a converted baud rate of zero
-	 */
-	speed_value = convert_baud_rate(tp->t_ispeed);
-
-	if (speed_value == 0xffff)
-		speed_value = 0;
-
-	scc_set_timing_base(regs, chan, speed_value);
-	
-	if (tp->t_ispeed == 115200 || tp->t_ispeed == 230400) {
-		/* Special case here.. change the clock source*/
-		scc_write_reg(regs, chan, 11, 0);
-		/* Baud rate generator is disabled.. */
-	} else {
-		scc_write_reg(regs, chan, 11, SCC_WR11_RCLK_BAUDR|SCC_WR11_XTLK_BAUDR);
-		/* Enable the baud rate generator */
-		scc_write_reg(regs,  chan, 14, SCC_WR14_BAUDR_ENABLE);
-	}
-
-
-	scc_write_reg(regs,  chan,  3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE);
-
-
-	sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
-	scc_write_reg(regs,  chan,  1, sr->wr1);
-       	scc_write_reg(regs,  chan, 15, SCC_WR15_ENABLE_ESCC);
-	scc_write_reg(regs,  chan,  7, SCC_WR7P_RX_FIFO);
-	scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);
-
-
-	/* Clear out any pending external or status interrupts */
-	scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
-	scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
-	//scc_write_reg(regs,  chan,  0, SCC_RESET_ERROR);
-
-	/* Enable SCC interrupts (how many interrupts are to this thing?!?) */
-	scc_write_reg(regs,  chan,  9, SCC_WR9_MASTER_IE|SCC_WR9_NV);
-
-	scc_read_reg_zero(regs, 0, bits);/* Clear the status */
-
-#if	SCC_DMA_TRANSFERS
-	if (scc->dma_initted & (1<<chan))  {
-		scc->dma_ops->scc_dma_start_rx(chan);
-		scc->dma_ops->scc_dma_setup_8530(chan);
-	} else
-#endif
-	{
-		sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
-		scc_write_reg(regs, chan, 1, sr->wr1);
-		scc_write_reg(regs, chan, 0, SCC_IE_NEXT_CHAR);
-	}
-
-	sr->wr5 |= SCC_WR5_TX_ENABLE;
-	scc_write_reg(regs,  chan,  5, sr->wr5);
-
-	simple_unlock(&scc_stomp);
-	splx(s);
-
-	assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-	return 0;
-
-}
-#endif	/* NSCC > 0 */
diff --git a/osfmk/ppc/serial_io.h b/osfmk/ppc/serial_io.h
deleted file mode 100644
index a280fa1a4..000000000
--- a/osfmk/ppc/serial_io.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * @APPLE_FREE_COPYRIGHT@
- */
-
-#ifndef _PPC_SERIAL_IO_H_
-#define _PPC_SERIAL_IO_H_
-
-#include <console/serial_protos.h>
-#include <device/device_types.h>
-#include <mach_kdp.h>
-
-/*
- *	Console is on the Printer Port (chip channel 0)
- *	Debugger is on the Modem Port (chip channel 1)
- */
-
-#define	CONSOLE_PORT	1
-
-struct scc_tty {
-	char *		t_addr;		/* device pointer */
-	int		t_dev;		/* device number */
-	int		t_ispeed;	/* input speed */
-	int		t_ospeed;	/* output speed */
-	char		t_breakc;	/* character to deliver when 'break'
-					   condition received */
-	int		t_flags;	/* mode flags */
-	int		t_state;	/* current state */
-	int		t_line;		/* fake line discipline number,
-					   for old drivers - always 0 */
-	int		t_outofband;	/* current out-of-band events */
-	int		t_outofbandarg;	/* arg to first out-of-band event */
-	int		t_nquoted;	/* number of quoted chars in inq */
-	int		t_hiwater;	/* baud-rate limited high water mark */
-	int		t_lowater;	/* baud-rate limited low water mark */
-};
-typedef struct scc_tty	*scc_tty_t;
-
-/*
- * function declarations for performing serial i/o
- * other functions below are declared in kern/misc_protos.h
- *    cnputc, cngetc, cnmaygetc
- */
-
-void initialize_serial(caddr_t scc_phys_base, int32_t serial_baud);
-
-extern int		scc_probe(int32_t serial_baud);
-
-#if 0
-extern int		scc_open(
-				dev_t		dev,
-				dev_mode_t	flag,
-				io_req_t	ior);
-
-extern void		scc_close(
-				dev_t		dev);
-
-extern int		scc_read(
-				dev_t		dev,
-				io_req_t	ior);
-
-extern io_return_t	scc_write(
-				dev_t		dev,
-				io_req_t	ior);
-
-extern io_return_t	scc_get_status(
-				dev_t			dev,
-				dev_flavor_t		flavor,
-				dev_status_t		data,
-				mach_msg_type_number_t	*status_count);
-
-extern io_return_t	scc_set_status(
-				dev_t			dev,
-				dev_flavor_t		flavor,
-				dev_status_t		data,
-				mach_msg_type_number_t	status_count);
-
-extern boolean_t	scc_portdeath(
-				dev_t		dev,
-				ipc_port_t	port);
-
-#endif /* 0 */
-
-extern void	 	scc_putc(
-				int			unit,
-				int			line,
-				int			c);
-
-extern int		scc_getc(
-				int			unit,
-				int			line,
-				boolean_t		wait,
-				boolean_t		raw);
-
-/*
- * JMM - We are not really going to support this driver in SMP (barely
- * support it now - so just pick up the stubbed out versions.
- */
-#define DECL_FUNNEL(class,f)
-#define DECL_FUNNEL_VARS
-#define FUNNEL_INIT(f,p)
-#define FUNNEL_ENTER(f)
-#define FUNNEL_EXIT(f)
-#define FUNNEL_ESCAPE(f)		(1)
-#define FUNNEL_REENTER(f,count)
-#define FUNNEL_IN_USE(f)		(TRUE)
-
-/*
- * Flags
- */
-#define	TF_ODDP		0x00000002	/* get/send odd parity */
-#define	TF_EVENP	0x00000004	/* get/send even parity */
-#define	TF_ANYP		(TF_ODDP|TF_EVENP)
-					/* get any parity/send none */
-#define	TF_LITOUT	0x00000008	/* output all 8 bits
-					   otherwise, characters >= 0x80
-					   are time delays	XXX */
-#define	TF_ECHO		0x00000080	/* device wants user to echo input */
-#define	TS_MIN		0x00004000	/* buffer input chars, if possible */
-
-#endif /* _PPC_SERIAL_IO_H_ */
diff --git a/osfmk/ppc/setjmp.h b/osfmk/ppc/setjmp.h
deleted file mode 100644
index 2c7b1b9fc..000000000
--- a/osfmk/ppc/setjmp.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef	_PPC_SETJMP_H_
-#define	_PPC_SETJMP_H_
-
-/*
- * We save the following registers (marked as non-volatile in the ELF spec)
- *
- * r1      - stack pointer
- * r13     - small data area pointer
- * r14-r30 - local variables
- * r31     - local variable/environment pointer
- * 
- * cr      - condition register
- * lr      - link register (to know where to jump back to)
- * xer     - fixed point exception register
- *
- * fpscr   - floating point status and control
- * f14-f31 - local variables
- *
- * which comes to 57 words. We round up to 64 for good measure.
- */
-
-typedef	struct jmp_buf {
-	int	jmp_buf[64];
-} jmp_buf_t;
-
-#endif	/* _PPC_SETJMP_H_ */
diff --git a/osfmk/ppc/simple_lock.h b/osfmk/ppc/simple_lock.h
deleted file mode 100644
index 80be1e6ff..000000000
--- a/osfmk/ppc/simple_lock.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#ifdef	KERNEL_PRIVATE
-
-#ifndef	_PPC_SIMPLE_LOCK_TYPES_H_
-#define	_PPC_SIMPLE_LOCK_TYPES_H_
-
-#ifdef	KERNEL_PRIVATE
-#include <mach/boolean.h>
-#include <kern/kern_types.h>
-
-#include <sys/appleapiopts.h>
-#ifdef  MACH_KERNEL_PRIVATE
-#include <ppc/hw_lock_types.h>
-#include <ppc/locks.h>
-#include <mach_ldebug.h>
-#endif
-
-#ifdef MACH_KERNEL_PRIVATE
-
-#if MACH_LDEBUG
-#define USLOCK_DEBUG 1
-#else
-#define USLOCK_DEBUG 0
-#endif
-
-#if     !USLOCK_DEBUG
-
-typedef lck_spin_t usimple_lock_data_t, *usimple_lock_t;
-
-#else
-
-typedef struct uslock_debug {
-	void			*lock_pc;	/* pc where lock operation began    */
-	void			*lock_thread;	/* thread that acquired lock */
-	unsigned long	duration[2];
-	unsigned short	state;
-	unsigned char	lock_cpu;
-	void			*unlock_thread;	/* last thread to release lock */
-	unsigned char	unlock_cpu;
-	void			*unlock_pc;	/* pc where lock operation ended    */
-} uslock_debug;
-
-typedef struct {
-	hw_lock_data_t	interlock;	/* must be first... see lock.c */
-	unsigned short	lock_type;	/* must be second... see lock.c */
-#define USLOCK_TAG	0x5353
-	uslock_debug	debug;
-} usimple_lock_data_t, *usimple_lock_t;
-
-#endif	/* USLOCK_DEBUG */
-
-#else
-
-typedef	struct slock {
-	unsigned int	lock_data[10];
-} usimple_lock_data_t, *usimple_lock_t;
-
-#endif	/* MACH_KERNEL_PRIVATE */
-
-#define	USIMPLE_LOCK_NULL	((usimple_lock_t) 0)
-
-#if !defined(decl_simple_lock_data)
-
-typedef usimple_lock_data_t	*simple_lock_t;
-typedef usimple_lock_data_t	simple_lock_data_t;
-
-#define	decl_simple_lock_data(class,name) \
-	class	simple_lock_data_t	name;
-
-#endif	/* !defined(decl_simple_lock_data) */
-
-#ifdef	MACH_KERNEL_PRIVATE
-#if	!MACH_LDEBUG
-
-#define MACHINE_SIMPLE_LOCK
-
-extern void						ppc_usimple_lock_init(simple_lock_t,unsigned short);
-extern void						ppc_usimple_lock(simple_lock_t);
-extern void						ppc_usimple_unlock_rwmb(simple_lock_t);
-extern void						ppc_usimple_unlock_rwcmb(simple_lock_t);
-extern unsigned int				ppc_usimple_lock_try(simple_lock_t);
-
-#define simple_lock_init(l,t)	ppc_usimple_lock_init(l,t)
-#define simple_lock(l)			ppc_usimple_lock(l)
-#define simple_unlock(l)		ppc_usimple_unlock_rwcmb(l)
-#define simple_unlock_rwmb(l)	ppc_usimple_unlock_rwmb(l)
-#define simple_lock_try(l)		ppc_usimple_lock_try(l)
-#define simple_lock_addr(l)		(&(l))
-#define thread_sleep_simple_lock(l, e, i) \
-								thread_sleep_fast_usimple_lock((l), (e), (i))
-#endif	/* !MACH_LDEBUG */
-
-extern unsigned int		hw_lock_bit(
-					unsigned int *,
-					unsigned int,
-					unsigned int);
-
-extern unsigned int		hw_cpu_sync(
-					unsigned int *,
-					unsigned int);
-
-extern unsigned int		hw_cpu_wcng(
-					unsigned int *,
-					unsigned int,
-					unsigned int);
-
-extern unsigned int		hw_lock_mbits(
-					unsigned int *,
-					unsigned int,
-					unsigned int,
-					unsigned int,
-					unsigned int);
-
-void				hw_unlock_bit(
-					unsigned int *,
-					unsigned int);
-
-#endif	/* MACH_KERNEL_PRIVATE */
-#endif	/* KERNEL_PRIVATE */
-
-#endif /* !_PPC_SIMPLE_LOCK_TYPES_H_ */
-
-#endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/skiplists.s b/osfmk/ppc/skiplists.s
deleted file mode 100644
index 69a9dccbb..000000000
--- a/osfmk/ppc/skiplists.s
+++ /dev/null
@@ -1,1297 +0,0 @@
-/*
- * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-/* skiplists.s
- *
- * These are the subroutines that manage the skip-list data structures used for the
- * resident mappings for each pmap.  We used to use a much simpler hash-based scheme,
- * but it didn't scale well for 64-bit address spaces and multi-GB real memories.
- * Here's a brief tutorial on skip-lists:
- *
- * The basic idea is that each mapping is on one or more singly-linked lists, sorted
- * in increasing order by virtual address.  The number of lists a mapping is on is an
- * invariant property determined when the mapping is created, using an exponentially-
- * distributed random number.  Every mapping is on the first list.  Ideally, each
- * successive list has only 1/F as many nodes on it as the previous, where F is the
- * "fanout."  With a max of n lists, up to F**n nodes can be handled optimally.
- *
- * Searching, adding, and deleting from a skip-list can all be done in O(ln(n)) time.
- * Because the first skip-list is just a sorted list of all mappings, it is also
- * efficient to purge a sparsely populated pmap of all the mappings in a large range,
- * for example when tearing down an address space.  Large-range deletes are the
- * primary advantage of skip-lists over a hash, btw.
- *
- * We currently use a fanout of 4 and a maximum of 12 lists (cf kSkipListFanoutShift
- * and kSkipListMaxLists.)  Thus, we can optimally handle pmaps with as many as 4**12 
- * pages, which is 64GB of resident physical memory per pmap.  Pmaps can be larger than
- * this, albeit with diminishing efficiency.
- *
- * The major problem with skip-lists is that we could waste a lot of space with 12
- * 64-bit link fields in every mapping.  So we currently have two sizes of mappings:
- * 64-byte nodes with 4 list links, and 128-byte nodes with 12.  Only one in every
- * (4**4)==256 mappings requires the larger node, so the average size is 64.25 bytes.
- * In practice, the additional complexity of the variable node size is entirely
- * contained in the allocate and free routines.
- *
- * The other, mostly theoretic problem with skip-lists is that they have worst cases
- * where performance becomes nearly linear.  These worst-cases are quite rare but there
- * is no practical way to prevent them.
- */   
- 
-
-; set nonzero to accumulate skip-list stats on a per-map basis:
-#define	SKIPLISTSTATS	1
-
-; cr7 bit set when mapSearchFull() finds a match on a high list:
-#define	bFullFound	28
-
-#include <assym.s>
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-
-
-/*
- *  *********************
- * 	* m a p S e a r c h *
- *	*********************
- *
- * Given a pmap and a virtual address (VA), find the mapping for that address.
- * This is the fast call, that does not set up the previous-ptr vector or make
- * consistency checks.  When called:
- *		the pmap is locked (shared or exclusive)
- *		translation is off, interrupts masked
- *		64-bit mode is enabled (if on a 64-bit machine)
- *		cr6 is loaded with the corresponding feature flags (in particular, pf64Bit)
- *		r3 = pmap ptr
- *		r4 = high 32 bits of key to search for (0 if a 32-bit processor)
- *		r5 = low 32 bits of key (low 12 bits may be nonzero garbage)
- *		r7 = mpFlags field if found.  Undefined if not
- *
- * We return the mapping ptr (or 0) in r3, and the next VA (or 0 if no more) in r4 and r5.
- * Except for cr6 (which is global), we trash nonvolatile regs.  Called both on 32- and 64-bit
- * machines, though we quickly branch into parallel code paths.
- */ 
-            .text
-			.align	5
-            .globl	EXT(mapSearch)
-LEXT(mapSearch)
-            lbz		r7,pmapCurLists(r3)		; get largest #lists any mapping is on
-            la		r8,pmapSkipLists+4(r3)	; point to lists in pmap, assuming 32-bit machine
-            rlwinm	r5,r5,0,0,19			; zero low 12 bits of key
-            mr		r6,r3					; save pmap ptr here so we can accumulate statistics
-            li		r9,0					; initialize prev ptr
-            addic.	r7,r7,-1				; get base-0 number of last list, and test for 0
-            li		r2,0					; initialize count of mappings visited
-            slwi	r7,r7,3					; get offset of last list in use
-            blt--	mapSrchPmapEmpty		; pmapCurLists==0 (ie, no mappings)
-            lwzx	r3,r8,r7				; get 32-bit ptr to 1st mapping in highest list
-            bf--	pf64Bitb,mapSrch32c		; skip if 32-bit processor
-            subi	r8,r8,4					; we use all 64 bits of ptrs
-            rldimi	r5,r4,32,0				; r5 <- 64-bit va
-            ldx		r3,r8,r7				; get 64-bit ptr to 1st mapping in highest list
-            b		mapSrch64c				; enter 64-bit search loop
-
-            
-            ; 64-bit processors.  Check next mapping.
-            ;   r2 = count of mappings visited so far
-            ;	r3 = current mapping ptr
-            ;	r4 = va of current mapping (ie, of r3)
-            ;	r5 = va to search for (the "key") (low 12 bits are 0)
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r8 = ptr to skip list vector of mapping pointed to by r9 (or pmap, if r9==0)
-            ;	r9 = prev ptr, or 0 if none
-            
-            .align	5
-mapSrch64a:									; loop over each mapping
-            ld		r4,mpVAddr(r3)			; get va for this mapping (plus flags in low 12 bits)
-            addi	r2,r2,1					; count mappings visited
-            rldicr	r4,r4,0,51				; zero low 12 bits of mapping va
-            cmpld	cr1,r5,r4				; compare the vas
-            blt		cr1,mapSrch64d			; key is less, try next list
-            la		r8,mpList0(r3)			; point to skip list vector in this mapping
-            mr		r9,r3					; remember prev ptr
-            beq--	cr1,mapSrch64Found		; this is the correct mapping
-            ldx		r3,r7,r8				; get ptr to next mapping in current list
-mapSrch64c:
-            mr.		r3,r3					; was there another mapping on current list?
-            bne++	mapSrch64a				; was another, so loop
-mapSrch64d:
-            subic.	r7,r7,8					; move on to next list offset
-            ldx		r3,r7,r8				; get next mapping on next list (if any)
-            bge++	mapSrch64c				; loop to try next list
-          
-            ; Mapping not found, check to see if prev node was a block mapping or nested pmap.
-            ; If not, or if our address is not covered by the block or nested map, return 0.
-            ; Note the advantage of keeping the check for block mappings (and nested pmaps)
-            ; out of the inner loop; we do the special case work at most once per search, and
-            ; never for the most-common case of finding a scalar mapping.  The full searches
-            ; must check _in_ the inner loop, to get the prev ptrs right.
-
-			mr.		r9,r9					; was there a prev ptr?
-			li		r3,0					; assume we are going to return null
-			ld		r4,pmapSkipLists(r6)	; assume prev ptr null... so next is first
-			beq--	mapSrch64Exit			; prev ptr was null, search failed
-			lwz		r0,mpFlags(r9)			; get flag bits from prev mapping
-			lhz		r11,mpBSize(r9)			; get #pages/#segments in block/submap mapping
-			
-			rlwinm	r0,r0,mpBSub+1,31,31	; 0 if 4K bsu or 1 if 32MB bsu
-			ld		r10,mpVAddr(r9)			; re-fetch base address of prev ptr
-			ori		r0,r0,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			addi	r11,r11,1				; Convert 0-based to 1-based
-			rlwnm	r0,r0,r0,27,31			; Rotate to get 12 or 25
-			ld		r4,mpList0(r9)			; get 64-bit ptr to next mapping, if any
-			sld		r11,r11,r0				; Get the length in bytes
-			rldicr	r10,r10,0,51			; zero low 12 bits of mapping va
-			subi	r0,r11,4096				; get offset last page in mapping
-			add		r10,r10,r0				; r10 <- last page in this mapping
-			cmpld	r5,r10					; does this mapping cover our page?
-			bgt		mapSrch64Exit			; no, search failed
-			mr		r3,r9					; yes, we found it
-
-            ; found the mapping
-            ;   r2 = count of nodes visited
-            ;	r3 = the mapping
-            ;	r6 = pmap ptr
-            
-mapSrch64Found:								; WARNING: can drop down to here
-            ld		r4,mpList0(r3)			; get ptr to next mapping
-            lwz		r7,mpFlags(r3)			; Get the flags for our caller
-            
-            ;   r2 = count of nodes visited
-            ;	r3 = return value (ie, found mapping or 0)
-            ;   r4 = next mapping (or 0 if none)
-            ;	r6 = pmap ptr
-            ;	r7 = mpFlags
-            
-mapSrch64Exit:								; WARNING: can drop down to here
-            mr.		r5,r4					; next ptr null?
-#if	SKIPLISTSTATS
-            lwz		r10,pmapSearchCnt(r6)	; prepare to accumulate statistics
-            ld		r8,pmapSearchVisits(r6)
-            addi	r10,r10,1				; count searches
-            add		r8,r8,r2				; count nodes visited
-            stw		r10,pmapSearchCnt(r6)
-            std		r8,pmapSearchVisits(r6)
-#endif
-            beqlr-							; next ptr was null, so return 0 in r4 and r5
-            lwz		r5,mpVAddr+4(r4)		; get VA of next node
-            lwz		r4,mpVAddr+0(r4)
-            blr
-
-            
-            ; 32-bit processors.  Check next mapping.
-            ;   r2 = count of mappings visited so far
-            ;	r3 = current mapping ptr
-            ;	r4 = va of current mapping (ie, of r3)
-            ;	r5 = va to search for (the "key") (low 12 bits are 0)
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r8 = ptr to skip list vector of mapping pointed to by r9 (or pmap, if r9==0)
-            ;	r9 = prev ptr, or 0 if none
-            
-            .align	4
-mapSrch32a:									; loop over each mapping
-            lwz		r4,mpVAddr+4(r3)		; get va for this mapping (plus flags in low 12 bits)
-            addi	r2,r2,1					; count mappings visited
-            rlwinm	r4,r4,0,0,19			; zero low 12 bits of mapping va
-            cmplw	cr1,r5,r4				; compare the vas
-            blt		cr1,mapSrch32d			; key is less, try next list
-            la		r8,mpList0+4(r3)		; point to skip list vector in this mapping
-            mr		r9,r3					; remember prev ptr
-            beq-	cr1,mapSrch32Found		; this is the correct mapping
-            lwzx	r3,r7,r8				; get ptr to next mapping in current list
-mapSrch32c:
-            mr.		r3,r3					; was there another mapping on current list?
-            bne+	mapSrch32a				; was another, so loop
-mapSrch32d:
-            subic.	r7,r7,8					; move on to next list offset
-            lwzx	r3,r7,r8				; get next mapping on next list (if any)
-            bge+	mapSrch32c				; loop to try next list
-          
-            ; Mapping not found, check to see if prev node was a block mapping or nested pmap.
-            ; If not, or if our address is not covered by the block or nested map, return 0.
-            ; Note the advantage of keeping the check for block mappings (and nested pmaps)
-            ; out of the inner loop; we do the special case work at most once per search, and
-            ; never for the most-common case of finding a scalar mapping.  The full searches
-            ; must check _in_ the inner loop, to get the prev ptrs right.
-
-			mr.		r9,r9					; was there a prev ptr?
-			li		r3,0					; assume we are going to return null
-			lwz		r4,pmapSkipLists+4(r6)	; assume prev ptr null... so next is first
-			beq-	mapSrch32Exit			; prev ptr was null, search failed
-			lwz		r0,mpFlags(r9)			; get flag bits from prev mapping
-			lhz		r11,mpBSize(r9)			; get #pages/#segments in block/submap mapping
-			lwz		r10,mpVAddr+4(r9)		; re-fetch base address of prev ptr
-			
-			rlwinm	r0,r0,mpBSub+1,31,31	; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r11,r11,1				; Convert 0-based to 1-based
-			ori		r0,r0,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			rlwnm	r0,r0,r0,27,31			; Rotate to get 12 or 25
-			lwz		r4,mpList0+4(r9)		; get ptr to next mapping, if any
-			slw		r11,r11,r0				; Get length in bytes
-			rlwinm	r10,r10,0,0,19			; zero low 12 bits of block mapping va
-			subi	r0,r11,4096				; get address of last page in submap
-			add		r10,r10,r0				; r10 <- last page in this mapping
-			cmplw	r5,r10					; does this mapping cover our page?
-			bgt		mapSrch32Exit			; no, search failed
-			mr		r3,r9					; yes, we found it
-
-            ; found the mapping
-            ;   r2 = count of nodes visited
-            ;	r3 = the mapping
-            ;	r6 = pmap ptr
-            
-mapSrch32Found:								; WARNING: can drop down to here
-            lwz		r4,mpList0+4(r3)		; get ptr to next mapping
-            lwz		r7,mpFlags(r3)			; Get mpFlags for our caller
-            ;   r2 = count of nodes visited
-            ;	r3 = return value (ie, found mapping or 0)
-            ;   r4 = next mapping (or 0 if none)
-            ;	r6 = pmap ptr
-            ;	r7 = mpFlags
-            
-mapSrch32Exit:
-            mr.		r5,r4					; next ptr null?
-#if	SKIPLISTSTATS
-            lwz		r10,pmapSearchCnt(r6)	; prepare to accumulate statistics
-            lwz		r8,pmapSearchVisits(r6)
-            lwz		r9,pmapSearchVisits+4(r6)
-            addi	r10,r10,1				; count searches
-            addc	r9,r9,r2				; count nodes visited
-            addze	r8,r8
-            stw		r10,pmapSearchCnt(r6)
-            stw		r8,pmapSearchVisits(r6)
-            stw		r9,pmapSearchVisits+4(r6)
-#endif
-            beqlr-							; next ptr was null, so return 0 in r4 and r5
-            lwz		r5,mpVAddr+4(r4)		; get VA of next node
-            lwz		r4,mpVAddr+0(r4)
-            blr
-
-            ; Here when the pmap is empty (ie, pmapCurLists==0), both in 32 and 64-bit mode,
-            ; and from both mapSearch and mapSearchFull.
-            ;	r6 = pmap ptr
-            
-mapSrchPmapEmpty:
-            li		r3,0					; return null
-            li		r4,0					; return 0 as virtual address of next node
-            li		r5,0
-#if	SKIPLISTSTATS
-            lwz		r7,pmapSearchCnt(r6)	; prepare to accumulate statistics
-            addi	r7,r7,1					; count searches
-            stw		r7,pmapSearchCnt(r6)
-#endif
-            blr
-            
-
-/*
- *  *****************************
- * 	* m a p S e a r c h F u l l *
- *	*****************************
- *
- * Given a pmap and a virtual address (VA), find the mapping for that address.
- * This is the "full" call, that sets up a vector of ptrs to the previous node
- * (or to the pmap, if there is no previous node) for each list that the mapping
- * in on.  We also make consistency checks on the skip-lists.  When called:
- *		the pmap is locked (shared or exclusive)
- *		translation is off, interrupts masked
- *		64-bit mode is enabled (if on a 64-bit machine)
- *		cr6 is loaded with the corresponding feature flags (in particular, pf64Bit)
- *		r3 = pmap ptr
- *		r4 = high 32 bits of key to search for (0 if a 32-bit processor)
- *		r5 = low 32 bits of key (low 12 bits may be nonzero garbage)
- *
- * We return the mapping ptr (or 0) in r3, and the next VA (or 0 if no more) in r4 and r5.
- * Except for cr6 (which is global), we trash nonvolatile regs.  Called both on 32- and 64-bit
- * machines, though we quickly branch into parallel code paths.
- */ 
-            .text
-			.align	5
-            .globl	EXT(mapSearchFull)
-LEXT(mapSearchFull)
-            lbz		r7,pmapCurLists(r3)		; get largest #lists any mapping is on
-            la		r8,pmapSkipLists+4(r3)	; point to lists in pmap, assuming 32-bit machine
-            rlwinm	r5,r5,0,0,19			; zero low 12 bits of key
-            mr		r6,r3					; save pmap ptr here so we can accumulate statistics
-            li		r2,0					; initialize count of mappings visited
-            mfsprg	r12,0					; get the per-proc data ptr
-            crclr	bFullFound				; we have not found the mapping yet
-            addic.	r7,r7,-1				; get base-0 number of last list, and test for 0
-            subi	r9,r8,mpList0+4			; initialize prev ptr to be a fake mapping
-            slwi	r7,r7,3					; get (offset*8) of last list
-            la		r12,skipListPrev+4(r12)	; point to vector of prev ptrs, assuming 32-bit machine
-            blt--	mapSrchPmapEmpty		; pmapCurLists==0 (ie, no mappings)
-            lwzx	r3,r8,r7				; get 32-bit ptr to 1st mapping in highest list
-            li		r10,0					; initialize prev ptrs VA to 0 too
-            bf--	pf64Bitb,mapSrchFull32c	; skip if 32-bit processor
-            subi	r8,r8,4					; we use all 64 bits of ptrs
-            subi	r12,r12,4
-            rldimi	r5,r4,32,0				; r5 <- 64-bit va
-            ldx		r3,r8,r7				; get 64-bit ptr to 1st mapping in highest list
-            b		mapSrchFull64c			; enter 64-bit search loop
-
-            
-            ; 64-bit processors.  Check next mapping.
-            ;   r2 = count of mappings visited so far
-            ;	r3 = current mapping ptr
-            ;	r4 = va of current mapping (ie, of r3)
-            ;	r5 = va to search for (the "key") (low 12 bits are 0)
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r8 = ptr to skip list vector of mapping pointed to by r9
-            ;	r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap)
-            ;  r10 = lowest expected next va, 0 at the beginning of the search
-            ;  r12 = ptr to the skipListPrev vector in the per-proc
-            
-            .align	5
-mapSrchFull64a:								; loop over each mapping
-			addi	r2,r2,1					; count mappings visited
-			lwz		r0,mpFlags(r3)			; get mapping flag bits
-			lhz		r11,mpBSize(r3)			; get #pages/#segments in block/submap mapping
-			ld		r4,mpVAddr(r3)			; get va for this mapping (plus flags in low 12 bits)
-
-			rlwinm	r0,r0,mpBSub+1,31,31	; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r11,r11,1				; Convert 0-based to 1-based
-			ori		r0,r0,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			rlwnm	r0,r0,r0,27,31			; Rotate to get 12 or 25
-			sld		r11,r11,r0				; Get the length in bytes
-            rldicr	r4,r4,0,51				; zero low 12 bits of mapping va
-            addic.	r0,r11,-4096			; get offset last page in mapping (set cr0_eq if 1 page)
-
-            cmpld	cr5,r10,r4				; make sure VAs come in strictly ascending order
-            cmpld	cr1,r5,r4				; compare the vas
-            bgt--	cr5,mapSkipListPanic	; die if keys are out of order
-
-            blt		cr1,mapSrchFull64d		; key is less, try next list
-            beq		cr1,mapSrchFull64Found	; this is the correct mapping
-            bne--	cr0,mapSrchFull64e		; handle mapping larger than one page
-mapSrchFull64b:
-            la		r8,mpList0(r3)			; point to skip list vector in this mapping
-            mr		r9,r3					; current becomes previous
-            ldx		r3,r7,r8				; get ptr to next mapping in current list
-            addi	r10,r4,0x1000			; Get the lowest VA we can get next
-mapSrchFull64c:
-            mr.		r3,r3					; was there another mapping on current list?
-            bne++	mapSrchFull64a			; was another, so loop
-mapSrchFull64d:
-            stdx	r9,r7,r12				; save prev ptr in per-proc vector
-            subic.	r7,r7,8					; move on to next list offset
-            ldx		r3,r7,r8				; get next mapping on next list (if any)
-            bge++	mapSrchFull64c			; loop to try next list
-          
-            ; Mapping not found, return 0 and next higher key
-
-            li		r3,0					; return null
-            bt--	bFullFound,mapSkipListPanic	; panic if it was on earlier list
-            ld		r4,mpList0(r9)			; get 64-bit ptr to next mapping, if any
-            b		mapSrch64Exit
-            
-            ; Block mapping or nested pmap, and key > base.  We must compute the va of
-            ; the end of the block to see if key fits within it.
-
-mapSrchFull64e:            
-            add		r4,r4,r0				; r4 <- last page in this mapping
-            cmpld	r5,r4					; does this mapping cover our page?
-            bgt		mapSrchFull64b			; no, try next mapping (r4 is advanced to end of range)
-
-
-            ; found the mapping
-            ;   r2 = count of nodes visited
-            ;	r3 = the mapping
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r8 = ptr to prev mappings (ie, r9) skip-list vector
-            ;	r9 = prev ptr, ie highest mapping that comes before search target
-            ;  r10 = prev mappings va
-            ;  r12 = ptr to the skipListPrev vector in the per-proc
-            
-mapSrchFull64Found:							; WARNING: can drop down to here
-            cmpwi	r7,0					; are we in the last skip-list?
-            crset	bFullFound				; remember that we found the mapping
-            bne		mapSrchFull64d			; mapSearchFull must search all lists to get prev ptrs
-            ld		r4,mpList0(r3)			; get ptr to next mapping
-            stdx	r9,r7,r12				; save prev ptr in last list
-            lwz		r7,mpFlags(r3)			; Get the flags for our caller
-            b		mapSrch64Exit
-
-            
-            ; 32-bit processors.  Check next mapping.
-            ;   r2 = count of nodes visited
-            ;	r3 = ptr to next mapping in current list
-            ;	r5 = va to search for (the "key") (low 12 bits are 0)
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r8 = ptr to skip list vector of mapping pointed to by r9
-            ;	r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap)
-            ;  r10 = lowest expected next va, 0 at the beginning of the search
-            ;  r12 = ptr to the skipListPrev vector in the per-proc
-            
-            .align	4
-mapSrchFull32a:								; loop over each mapping
-			addi	r2,r2,1					; count mappings visited
-			lwz		r0,mpFlags(r3)			; get mapping flag bits
-			lhz		r11,mpBSize(r3)			; get #pages/#segments in block/submap mapping
-			lwz		r4,mpVAddr+4(r3)		; get va for this mapping (plus flags in low 12 bits)
-						
-			rlwinm	r0,r0,mpBSub+1,31,31	; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r11,r11,1				; Convert 0-based to 1-based
-			ori		r0,r0,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			rlwnm	r0,r0,r0,27,31			; Rotate to get 12 or 25
-			slw		r11,r11,r0				; Get the length in bytes
-			rlwinm	r4,r4,0,0,19			; zero low 12 bits of mapping va
-            addic.	r0,r11,-4096			; get offset last page in mapping (set cr0_eq if 1 page)
-
-			cmplw	cr0,r10,r4				; make sure VAs come in strictly ascending order
-			cmplw	cr1,r5,r4				; compare the vas
-			bgt-	cr0,mapSkipListPanic	; die if keys are out of order
-			
-			blt		cr1,mapSrchFull32d		; key is less than this va, try next list
-			beq		cr1,mapSrchFull32Found	; this is the correct mapping
-			bne-	cr0,mapSrchFull32e		; handle mapping larger than one page
-mapSrchFull32b:
-            la		r8,mpList0+4(r3)		; point to skip list vector in this mapping
-            mr		r9,r3					; current becomes previous
-            lwzx	r3,r7,r8				; get ptr to next mapping in current list
-            addi	r10,r4,0x1000					; Get the lowest VA we can get next
-mapSrchFull32c:
-            mr.		r3,r3					; next becomes current
-            bne+	mapSrchFull32a			; was another, so loop
-mapSrchFull32d:
-            stwx	r9,r7,r12				; save prev ptr in per-proc vector
-            subic.	r7,r7,8					; move on to next list offset
-            lwzx	r3,r7,r8				; get next mapping on lower list (if any)
-            bge+	mapSrchFull32c			; loop to try next list
-
-            ; mapping not found, return 0 and next-key
-            
-            li		r3,0					; return null
-            bt-		bFullFound,mapSkipListPanic	; panic if it was on an earlier list
-            lwz		r4,mpList0+4(r9)		; get ptr to next mapping
-            b		mapSrch32Exit
-            
-            ; Block mapping or nested pmap, and key > base.  We must compute the va of
-            ; the end of the block to see if our key fits within it.
-
-mapSrchFull32e:            
-            add		r4,r4,r0				; r4 <- last page in this mapping
-            cmplw	r5,r4					; does this mapping cover our page?
-            bgt		mapSrchFull32b			; no, try next mapping
-            
-            
-            ; found the mapping
-            ;   r2 = count of nodes visited
-            ;	r3 = the mapping
-            ;	r6 = pmap ptr
-            ;	r7 = current skip list number * 8
-            ;	r9 = prev ptr, ie highest mapping that comes before search target, or 0
-            ;  r10 = prev mappings va
-            ;  r12 = ptr to the skipListPrev vector in the per-proc
-            
-mapSrchFull32Found:							; WARNING: can drop down to here
-            cmpwi	r7,0					; are we in the last skip-list?
-            crset	bFullFound				; remember that we found the mapping
-            bne		mapSrchFull32d			; mapSearchFull must search all lists to get prev ptrs
-            lwz		r4,mpList0+4(r3)		; get ptr to next mapping
-            stwx	r9,r7,r12				; save prev ptr in last list
-            lwz		r7,mpFlags(r3)			; Get mpFlags for our caller
-            b		mapSrch32Exit
-
-
-/*
- * 	*********************
- * 	* m a p I n s e r t *
- *	*********************
- *
- * Insert a mapping into pmap skip-lists.  The caller has already called mapSearchFull to 
- * determine that this mapping does not overlap other mappings in the pmap.  As a side effect 
- * of calling mapSearchFull, the per-proc skipListPrev array is set up with a vector of the 
- * previous ptrs for each skip list.  When called:
- *		the pmap is locked (exclusive)
- *		translation is off, interrupts masked
- *		64-bit mode is enabled (if on a 64-bit machine)
- *		mapSearchFull has just been called for this mappings key
- *		cr6 is loaded with the corresponding feature flags (in particular, pf64Bit)
- *		r3 = pmap ptr
- *		r4 = mapping ptr
- *
- * There is no return value.  Except for cr6 (which is global), we trash nonvolatile regs.
- */ 
-
-			.align	5
-			.globl	EXT(mapInsert)
-LEXT(mapInsert)
-            lwz		r8,mpFlags(r4)			; get this mappings flags
-            lbz		r7,pmapCurLists(r3)		; get current max# lists any mapping is on
-            la		r10,pmapSkipLists+4(r3)	; r10 <-- base of pmap list headers, assuming 32-bit machine
-            la		r11,mpList0+4(r4)		; r11 <-- base of this mappings list vector
-            mfsprg	r12,0					; get ptr to our per-proc
-            andi.	r9,r8,mpLists			; get #lists this mapping is on (1<=n<=27)
-            la		r12,skipListPrev+4(r12)	; r12 <-- base of prev ptr vector
-            sub.	r6,r9,r7				; is this mapping on more lists than any other?
-            slwi	r8,r9,3					; get #lists * 8
-            subi	r8,r8,8					; get offset to topmost (last) list in use
-            bf--	pf64Bitb,mapIns32		; handle 32-bit processor
-            subi	r10,r10,4				; we use all 8 bytes of the ptr fields
-            subi	r11,r11,4
-            subi	r12,r12,4
-            ble++	mapIns64a				; not new max #lists
-            
-            ; 64-bit processor: We must increase pmapCurLists.  Since mapSearchFull() only
-            ; sets up the first pmapCurLists prev ptrs, we must initialize the new ones to
-            ; point to the pmap.  While we are at it, we verify that the unused list hdrs in
-            ; the pmap are 0.
-            
-            cmpwi	r9,kSkipListMaxLists	; in range?
-            stb		r9,pmapCurLists(r3)		; remember new max
-            mtctr	r6						; set up count of new lists
-            mr		r5,r8					; copy offset to last list
-            subi	r0,r10,mpList0			; r0 <-- fake mapping ptr (to pmap) for null prev ptrs
-            bgt--	mapSkipListPanic		; choke if this mapping is on too many lists
-mapIns64NewList:
-            ldx		r6,r5,r10				; get pmap list head
-            stdx	r0,r5,r12				; initialize prev ptr
-            subi	r5,r5,8					; get next list offset
-            cmpdi	r6,0					; was list hdr null?
-            bdnzt	cr0_eq,mapIns64NewList	; loop if more lists to initialize and list hdr was 0
-            bne--	mapSkipListPanic		; die if pmap list hdr was not null
-            b		mapIns64a
-            
-            ; 64-bit processor: loop over each list this mapping is on
-            ;	 r4 = mapping
-            ;	 r8 = next list offset
-            ;	r10 = ptr to base of pmap list header vector
-            ;	r11 = ptr to base of new mappings list vector
-            ;	r12 = ptr to base of prev ptr vector in per-proc
-            
-            .align	5
-mapIns64a:
-            ldx		r5,r8,r12				; get prev ptr from per-proc vector
-            cmpwi	cr1,r8,0				; more to go?
-            la		r7,mpList0(r5)			; get base of prev mappings list vector
-            ldx		r9,r8,r7				; ***
-            stdx	r4,r8,r7				; * insert new mapping in middle of this list
-            stdx	r9,r8,r11				; ***
-            subi	r8,r8,8					; get next list offset
-            bne++	cr1,mapIns64a			; more lists to go
-            blr								; done		
-
-            ; Handle 32-bit processor.  First, increase pmapCurLists if necessary; cr0 is bgt
-            ; iff the new mapping has more lists.  Since mapSearchFull() only sets up the first
-            ; pmapCurLists prev ptrs, we must initialize any new ones to point to the pmap.
-            ; While we are at it, we verify that the unused list hdrs in the pmap are 0.
-            
-mapIns32:
-            ble+	mapIns32a				; skip if new mapping does not use extra lists
-            cmpwi	r9,kSkipListMaxLists	; in range?
-            stb		r9,pmapCurLists(r3)		; remember new max
-            mtctr	r6						; set up count of new lists
-            mr		r5,r8					; copy offset to last list
-            subi	r0,r10,mpList0+4		; r0 <-- fake mapping ptr (to pmap) for null prev ptrs
-            bgt-	mapSkipListPanic		; choke if this mapping is on too many lists
-mapIns32NewList:
-            lwzx	r6,r5,r10				; get pmap list head
-            stwx	r0,r5,r12				; initialize prev ptr
-            subi	r5,r5,8					; get next list offset
-            cmpwi	r6,0					; was list hdr null?
-            bdnzt	cr0_eq,mapIns32NewList	; loop if more lists to initialize and list hdr was 0
-            bne-	mapSkipListPanic		; die if pmap list hdr was not null
-            b		mapIns32a
-            
-            ; 32-bit processor: loop over each list this mapping is on
-            ;	 r4 = mapping
-            ;	 r8 = next list offset
-            ;	r10 = ptr to base of pmap list header vector
-            ;	r11 = ptr to base of new mappings list vector
-            ;	r12 = ptr to base of prev ptr vector
-            
-            .align	4
-mapIns32a:
-            lwzx	r5,r8,r12				; get prev ptr from per-proc vector
-            cmpwi	cr1,r8,0				; more to go?
-            la		r7,mpList0+4(r5)		; get base of prev mappings list vector
-            lwzx	r9,r8,r7				; ***
-            stwx	r4,r8,r7				; * insert new mapping in middle of this list
-            stwx	r9,r8,r11				; ***
-            subi	r8,r8,8					; get next list offset
-            bne+	cr1,mapIns32a			; more lists to go
-            blr								; done		
-
-
-/*
- * 	*********************
- * 	* m a p R e m o v e *
- *	*********************
- *
- * Remove a mapping from pmap skip-lists.  The caller has already called mapSearchFull to 
- * find the mapping, which sets up the skipListPrev array with a vector of the previous
- * ptrs for each skip list.  When called:
- *		the pmap is locked (exclusive)
- *		translation is off, interrupts masked
- *		64-bit mode is enabled (if on a 64-bit machine)
- *		mapSearchFull has just been called for this mappings key
- *		cr6 is loaded with the corresponding feature flags (in particular, pf64Bit)
- *		r3 = pmap ptr
- *		r4 = mapping ptr
- *
- * There is no return value.  Except for cr6 (which is global), we trash nonvolatile regs.
- */ 
-
-			.align	5
-			.globl	EXT(mapRemove)
-LEXT(mapRemove)
-            lwz		r8,mpFlags(r4)			; get this mappings flags
-            lbz		r10,pmapCurLists(r3)	; get current #lists in use
-            la		r11,mpList0+4(r4)		; r11 <-- base of this mappings list vector
-            mfsprg	r12,0					; get ptr to our per-proc
-            andi.	r9,r8,mpLists			; get #lists this mapping is on (1<=n<=27)
-            slwi	r8,r9,3					; get #lists * 8
-            cmpw	cr5,r9,r10				; compare mpLists to pmapCurLists
-            la		r12,skipListPrev+4(r12)	; r12 <-- base of prev ptr vector
-            bgt--	cr5,mapSkipListPanic	; die if mpLists > pmapCurLists
-            subi	r8,r8,8					; get offset to topmast (last) list this mapping is in
-            bf--	pf64Bitb,mapRem32a		; skip if 32-bit processor
-            subi	r11,r11,4				; we use all 64 bits of list links on 64-bit machines
-            subi	r12,r12,4
-            b		mapRem64a
-
-            ; 64-bit processor: loop over each list this mapping is on
-            ;	 r3 = pmap
-            ;	 r4 = mapping
-            ;	 r8 = offset to next list
-            ;	r10 = pmapCurLists
-            ;	r11 = ptr to base of mapping list vector
-            ;	r12 = ptr to base of prev ptr vector in per-proc
-            ;	cr5 = beq if (mpLists == pmapCurLists)
-
-            .align	5
-mapRem64a:
-            ldx		r5,r8,r12				; get prev ptr from per-proc vector
-            ldx		r9,r8,r11				; get next ptr from mapping
-            cmpwi	cr1,r8,0				; more to go?
-            la		r7,mpList0(r5)			; get base of prev mappings list vector
-            stdx	r9,r8,r7				; point to next from prev
-            subi	r8,r8,8					; get next list offset
-            bne++	cr1,mapRem64a			; loop if another list to unlink from
-            
-            ; Did we reduce #lists in use by removing last mapping in last list?
-            
-            bnelr++	cr5						; if (mpLists!=pmapCurLists) cannot have removed last map
-            la		r5,pmapSkipLists(r3)	; point to vector of list hdrs
-mapRem64b:
-            subic.	r10,r10,1				; get base-0 list#
-            slwi	r8,r10,3				; get offset to last list
-            ldx		r0,r8,r5				; get last list ptr
-            cmpdi	cr1,r0,0				; null?
-            bnelr	cr1						; not null, so we are done
-            stb		r10,pmapCurLists(r3)	; was null, so decrement pmapCurLists
-            bgt		mapRem64b				; loop to see if more than one list was emptied
-            blr
-            
-            
-            ; 32-bit processor: loop over each list this mapping is on
-            ;	 r3 = pmap
-            ;	 r4 = mapping
-            ;	 r8 = offset to next list
-            ;	r10 = pmapCurLists
-            ;	r11 = ptr to base of mapping list vector
-            ;	r12 = ptr to base of prev ptr vector in per-proc
-            ;	cr5 = beq if (mpLists == pmapCurLists)
-            
-            .align	4
-mapRem32a:
-            lwzx	r5,r8,r12				; get prev ptr from per-proc vector
-            lwzx	r9,r8,r11				; get next ptr from mapping
-            cmpwi	cr1,r8,0				; more to go?
-            la		r7,mpList0+4(r5)		; get base of prev mappings list vector
-            stwx	r9,r8,r7				; point to next from prev
-            subi	r8,r8,8					; get next list offset
-            bne+	cr1,mapRem32a			; loop if another list to unlink from
-            
-            ; Did we reduce #lists in use by removing last mapping in last list?
-            
-            bnelr+	cr5						; if (mpLists!=pmapCurLists) cannot have removed last map
-            la		r5,pmapSkipLists+4(r3)	; point to vector of list hdrs
-mapRem32b:
-            subic.	r10,r10,1				; get base-0 list#
-            slwi	r8,r10,3				; get offset to last list
-            lwzx	r0,r8,r5				; get last list ptr
-            cmpwi	cr1,r0,0				; null?
-            bnelr	cr1						; not null, so we are done
-            stb		r10,pmapCurLists(r3)	; was null, so decrement pmapCurLists
-            bgt		mapRem32b				; loop to see if more than one list was emptied
-            blr
-            
-
-/*
- * *************************
- * * m a p S e t L i s t s *
- * *************************
- *
- * Called to decide how many skip-lists the next mapping will be on.  For each pmap,
- * we maintain a psuedo-random sequence based on a linear feedback shift register.  The
- * next number is generated by rotating the old value left by 1 and XORing with a
- * polynomial (actually 4 8-bit polynomials concatanated) and adding 1.
- * The simple (unclamped) number of lists a mapping is on is the number of trailing 0s
- * in the pseudo-random sequence, shifted by the (log2-1) of the fanout F, plus one.  
- * This seems to give us a near perfect distribution, in the sense that about F times more nodes
- * are allocated on n lists, as are on (n+1) lists.
- *
- * At one point we used a simple counter to assign lists.  While this gave perfect
- * distribution, there were certain access pattern that would drive a worst case 
- * distribution (e.g., insert low, then high, then low, etc.).  Unfortunately,
- * these patterns were not too uncommon.  We changed to a less-than-perfect assignment,
- * but one that works consistently across all known access patterns.
- *
- * Also, we modify the "simple" trailing-0-based list count, to account for an important
- * observation: because VM does a lot of removing and restoring of mappings in the process of
- * doing copy-on-write etc, it is common to have the pmap's "random number" (ie, the
- * count of created mappings) be much larger than the number of mappings currently in the
- * pmap.  This means the simple list count will often be larger than justified by the number of 
- * mappings in the pmap.  To avoid this common situation, we clamp the list count to be no more
- * than ceil(logBaseF(pmapResidentCnt)).
- *
- * Finally, we also clamp the list count to kSkipListMaxLists.
- *
- * We are passed the pmap ptr in r3.  Called with translation on, interrupts enabled,
- * and in 32-bit mode.
- */
-            .align	5
-			.globl	EXT(mapSetLists)
-LEXT(mapSetLists)
-            lwz		r5,pmapRandNum(r3)		; get the per-pmap counter of mapping creates
-            lwz		r4,pmapResidentCnt(r3)	; get number of mappings in this pmap
-			lis		r11,hi16(0xA7CBF5B9)	; Get polynomial (I just made this up...)
-			li		r0,-1					; get a mask of 1s
-			ori		r11,r11,lo16(0xA7CBF5B9)	; Get polynomial (I just made this up...)
-			rlwinm	r5,r5,1,0,31			; Rotate
-			cntlzw	r7,r4					; get magnitude of pmapResidentCnt
-			xor		r5,r5,r11				; Munge with poly
-			srw		r7,r0,r7				; r7 <- mask for magnitude of pmapResidentCnt
-			addi	r6,r5,1					; increment pmapRandNum non-atomically
-            andc	r8,r5,r6				; get a mask for trailing zeroes in pmapRandNum
-            stw		r6,pmapRandNum(r3)		; update "random number"
-			and		r8,r8,r7				; clamp trailing 0s to magnitude of pmapResidentCnt
-            rlwinm	r8,r8,0,32-(kSkipListMaxLists*(kSkipListFanoutShift+1))+1,31 ; clamp to kSkipListMaxLists
-            cntlzw	r9,r8					; count leading 0s in the mask
-            subfic	r10,r9,32				; r10 <- trailing zero count
-            srwi	r11,r10,kSkipListFanoutShift ; shift by 1 if fanout is 4, 2 if 8, etc
-            addi	r3,r11,1				; every mapping is on at least one list
-            blr
-            
-
-/*
- * *************************************
- * * m a p S k i p L i s t V e r i f y *
- * *************************************
- *
- * This does a fairly thorough sweep through a pmaps skip-list data structure, doing
- * consistency checks.  It is typically called (from hw_exceptions.s) from debug or
- * instrumented builds.  It is probably not a good idea to call this in production builds,
- * as it must run with exceptions disabled and can take a long time to verify a big pmap.
- * It runs in O(n*ln(n)).
- *
- * Called on a bl, with the pmap ptr in r20.  We assume the pmap is locked (shared) and
- * that EE and DR are off.  We check all 64 bits of ptrs even on 32-bit machines.
- * We use r20-r31, cr0, cr1, and cr7.  If we return, no inconsistencies were found.
- *
- * You will notice we make little attempt to schedule the code; clarity is deemed more
- * important than speed.
- */
- 
- 
- /*
-  *			mapSkipListVerifyC is a version that is callable from C.
-  *			This should be called only from the debugger, IT DOES NOT LOCK THE PMAP!!!!
-  */
- 
-			.globl	EXT(mapSkipListVerifyC)
-LEXT(mapSkipListVerifyC)
-
- 			stwu	r1,-(FM_ALIGN((31-13+1)*4)+FM_SIZE)(r1)	; Make some space on the stack
-			mflr	r0							; Save the link register
-			stmw	r13,FM_ARG0(r1)				; Save all registers
-			stw		r0,(FM_ALIGN((31-13+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-			
-			lwz		r15,pmapvr(r3)				; Get the V to R translation
-			lwz		r16,pmapvr+4(r3)			; Get the V to R translation
-			mr		r19,r4						; Save register dump area
-			
-			bl		EXT(mapSetUp)				; Get set up
-			
-			mr		r17,r11
-			xor		r20,r3,r16					; Translate 32-bit portion
-			bf--	pf64Bitb,mslvc32a			; Skip if 32-bit...
-			
-			rldimi	r20,r15,32,0				; Shift the fixed upper part of the physical over and cram in top
-			
-mslvc32a:	lis		r18,hi16(EXT(DebugWork))
-			ori		r18,r18,lo16(EXT(DebugWork))
-			li		r0,0x4262
-			stw		r0,4(r18)					; Make sure the test knows to run
-			
-			bl		EXT(mapSkipListVerify)		; Run the test
-
-			li		r0,0						
-			stw		r0,4(r18)					; Remove explicit call flag
-
-			bt++	pf64Bitb,mslvc64a			; This is 64-bit...
-
-			mtmsr	r17							; Restore enables/translation/etc.
-			isync
-			
-			li		r0,0
-			stw		r0,0x000+0(r19)
-			stw		r0,0x000+4(r19)
-			stw		r0,0x008+0(r19)
-			stw		r1,0x008+4(r19)
-			stw		r0,0x010+0(r19)
-			stw		r2,0x010+4(r19)
-			stw		r0,0x018+0(r19)
-			stw		r3,0x018+4(r19)
-			stw		r0,0x020+0(r19)
-			stw		r4,0x020+4(r19)
-			stw		r0,0x028+0(r19)
-			stw		r5,0x028+4(r19)
-			stw		r0,0x030+0(r19)
-			stw		r6,0x030+4(r19)
-			stw		r0,0x038+0(r19)
-			stw		r7,0x038+4(r19)
-			stw		r0,0x040+0(r19)
-			stw		r8,0x040+4(r19)
-			stw		r0,0x048+0(r19)
-			stw		r9,0x048+4(r19)
-			stw		r0,0x050+0(r19)
-			stw		r10,0x050+4(r19)
-			stw		r0,0x058+0(r19)
-			stw		r11,0x058+4(r19)
-			stw		r0,0x060+0(r19)
-			stw		r12,0x060+4(r19)
-			stw		r0,0x068+0(r19)
-			stw		r13,0x068+4(r19)
-			stw		r0,0x070+0(r19)
-			stw		r14,0x070+4(r19)
-			stw		r0,0x078+0(r19)
-			stw		r15,0x078+4(r19)
-			stw		r0,0x080+0(r19)
-			stw		r16,0x080+4(r19)
-			stw		r0,0x088+0(r19)
-			stw		r17,0x088+4(r19)
-			stw		r0,0x090+0(r19)
-			stw		r18,0x090+4(r19)
-			stw		r0,0x098+0(r19)
-			stw		r19,0x098+4(r19)
-			stw		r0,0x0A0+0(r19)
-			stw		r20,0x0A0+4(r19)
-			stw		r0,0x0A8+0(r19)
-			stw		r21,0x0A8+4(r19)
-			stw		r0,0x0B0+0(r19)
-			stw		r22,0x0B0+4(r19)
-			stw		r0,0x0B8+0(r19)
-			stw		r23,0x0B8+4(r19)
-			stw		r0,0x0C0+0(r19)
-			stw		r24,0x0C0+4(r19)
-			stw		r0,0x0C8+0(r19)
-			stw		r25,0x0C8+4(r19)
-			stw		r0,0x0D0+0(r19)
-			stw		r26,0x0D0+4(r19)
-			stw		r0,0x0D8+0(r19)
-			stw		r27,0x0D8+4(r19)
-			stw		r0,0x0E0+0(r19)
-			stw		r28,0x0E0+4(r19)
-			stw		r0,0x0E8+0(r19)
-			stw		r29,0x0E8+4(r19)
-			stw		r0,0x0F0+0(r19)
-			stw		r30,0x0F0+4(r19)
-			stw		r0,0x0F8+0(r19)
-			stw		r31,0x0F8+4(r19)
-			
-			b		mslvcreturn					; Join common...
-
-mslvc64a:	mtmsrd	r17							; Restore enables/translation/etc.
-			isync								
-			
-			std		r0,0x000(r19)
-			std		r1,0x008(r19)
-			std		r2,0x010(r19)
-			std		r3,0x018(r19)
-			std		r4,0x020(r19)
-			std		r5,0x028(r19)
-			std		r6,0x030(r19)
-			std		r7,0x038(r19)
-			std		r8,0x040(r19)
-			std		r9,0x048(r19)
-			std		r10,0x050(r19)
-			std		r11,0x058(r19)
-			std		r12,0x060(r19)
-			std		r13,0x068(r19)
-			std		r14,0x070(r19)
-			std		r15,0x078(r19)
-			std		r16,0x080(r19)
-			std		r17,0x088(r19)
-			std		r18,0x090(r19)
-			std		r19,0x098(r19)
-			std		r20,0x0A0(r19)
-			std		r21,0x0A8(r19)
-			std		r22,0x0B0(r19)
-			std		r23,0x0B8(r19)
-			std		r24,0x0C0(r19)
-			std		r25,0x0C8(r19)
-			std		r26,0x0D0(r19)
-			std		r27,0x0D8(r19)
-			std		r28,0x0E0(r19)
-			std		r29,0x0E8(r19)
-			std		r30,0x0F0(r19)
-			std		r31,0x0F8(r19)
-			
-			
-mslvcreturn:
-			lwz		r0,(FM_ALIGN((31-13+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Get the return
-			lmw		r13,FM_ARG0(r1)				; Get the registers
-			mtlr	r0							; Restore the return
-			lwz		r1,0(r1)					; Pop the stack
-			blr
-
- 
-			.globl	EXT(mapSkipListVerify)
-LEXT(mapSkipListVerify)
-            mflr	r31						; save LR so we can bl to mapVerifyDie
-            
-            ; If we have already found an inconsistency and died, don not do so again, to
-            ; avoid a loop.
-            
-			lis		r27,hi16(EXT(DebugWork))
-			ori		r27,r27,lo16(EXT(DebugWork))
-			lwz		r0,4(r27)				; Get the explicit entry flag
-			lwz		r27,0(r27)				; Get lockout
-			cmplwi	r0,0x4262				; Should we run anyway?
-			beq--	mslvAnyway				; Yes...
-            cmpwi	r27,0					; have we already found an error?
-            bnelr--							; yes, just return wo checking again
-
-mslvAnyway:           
-            ; Not recursive call, so initialize.
-            
-            mfsprg	r23,2					; get the feature flags
-            mtcrf	0x02,r23				; put pf64Bit where we can test it
-            lbz		r26,pmapCurLists(r20)	; get #lists that are in use
-            lwz		r21,pmapResidentCnt(r20); get #mappings in this pmap
-            cmpwi	r26,kSkipListMaxLists	; in range?
-            bgtl--	mapVerifyDie			; pmapCurLists is too big
-            
-            ; To prevent infinite loops, set limit of (pmapCurLists*pmapResidentCnt) iterations.
-            ; Since we walk each list this is the max number of mappings we could visit.
-            
-            li		r23,0					; initialize count
-mapVer0:
-            subic.	r26,r26,1				; loop pmapCurLists times (but at least once)
-            add		r23,r23,r21				; compute (pmapCurLists*pmapResidentCnt) 
-            bgt		mapVer0					; this will be a 64-bit qty on 64-bit machines
-            
-            li		r22,kSkipListMaxLists	; initialize list#
-            bf--	pf64Bitb,mapVer32		; go handle a 32-bit processor
-            
-            ; 64-bit machine.
-            ;
-            ; Loop over each list, counting mappings in each.  We first check whether or not
-            ; the list is empty (ie, if the pmapSlipLists ptr is null.)  All lists above
-            ; pmapCurLists should be empty, and no list at or below pmapCurLists should be.
-            ;	r20 = pmap ptr
-            ;	r21 = decrementing counter of mappings in this pmap
-            ;	r22 = next list# (1...kSkipListMaxLists)
-            ;	r23 = decrementing counter for infinite loop check
-            
-mapVer64:
-            slwi	r25,r22,3				; get offset to next skiplist
-            la		r26,pmapSkipLists(r20)	; get ptr to base of skiplist vector
-            subi	r25,r25,8
-            ldx		r26,r25,r26				; get 1st mapping on this list, if any
-            lbz		r28,pmapCurLists(r20)	; get #lists in use
-            cmpdi	cr6,r26,0				; set cr6_eq if this list is null ("null")
-            cmpw	cr7,r22,r28				; set cr7_gt if this list is > pmapCurLists ("high")
-            crxor	cr0_eq,cr6_eq,cr7_gt	; cr0_eq <-- (null & !high) | (!null & high)
-            beql--	mapVerifyDie			; die if this list is null when it should not be, etc
-            b		mapVer64g
-           
-            ; Loop over each node in the list.
-            ;	r20 = pmap ptr
-            ;	r21 = decrementing counter of mappings in this pmap
-            ;	r22 = this list# (1...kSkipListMaxLists)
-            ;	r23 = decrementing counter for infinite loop check
-            ;	r25 = offset to this skiplist (ie, ((r22<<3)-8))
-            ;	r26 = mapping
-            
-mapVer64a:
-            lwz		r29,mpFlags(r26)		; get bits for this mapping
-            ld		r28,mpVAddr(r26)		; get key
-            subic.	r23,r23,1				; check for loops
-            bltl--	mapVerifyDie			; we have visited > (pmapCurLists*pmapResidentCnt) nodes
-            andi.	r30,r26,mpBasicSize-1	; test address for alignment
-            bnel--	mapVerifyDie			; not aligned
-            andi.	r27,r29,mpLists			; get #lists this mapping is supposed to be on
-            cmpw	cr1,r27,r22				; is it supposed to be on this list?
-            bltl--	cr1,mapVerifyDie		; mappings mpLists is too low
-            cmpwi	r27,kSkipListMaxLists	; too big?
-            bgtl--	mapVerifyDie			; mappings mpLists > max
-            rldicr	r28,r28,0,51			; clear low 12 bits of va
-            bne++	cr1,mapVer64f			; jump if this is not highest list for this node
-            
-            ; This is the "highest" (last) list this mapping is on.
-            ; Do some additional checks (so we only do them once per mapping.)
-            ; First, if a block mapping or nested pmap, compute block end.
-            
-			lhz		r27,mpBSize(r26)		; get #pages or #segments
-			rlwinm	r29,r29,mpBSub+1,31,31	; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r27,r27,1				; units of nested pmap are (#segs-1)
-			ori		r29,r29,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			rlwnm	r29,r29,r29,27,31		; Rotate to get 12 or 25
-			subi	r21,r21,1				; count mappings in this pmap
-			sld		r29,r27,r29				; Get the length in bytes
-			subi	r29,r29,4096			; get offset to last byte in nested pmap
-            
-            ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page.
-
-            add		r24,r28,r29				; r24 <- address of last valid page in this mapping
-            la		r28,mpList0(r26)		; get base of this mappings vector            
-            lwz		r27,mpFlags(r26)		; Get the number of lists
-            andi.	r27,r27,mpLists			; get #lists this mapping is on (1<=n<=27)
-            cmplwi	r27,mpBasicLists		; Into bigger mapping?
-            li		r27,mpBasicLists*8-8	; Assume normal
-            ble+	mapVer64c				; It is...
-            li		r27,kSkipListMaxLists*8-8	; initialize list offset for inner loop
-            
-            ; Inner loop over each list link in this mappingss mpList vector.
-            ;	r24 = address of last valid page in this mapping
-            ;	r27 = offset for next list in inner loop
-            ;	r28 = base of this mappings list links
-            
-mapVer64c:
-            cmpw	cr1,r27,r25				; higher, lower, or same?
-            ldx		r29,r27,r28				; get link to next mapping at this level
-            mr.		r29,r29					; null?
-            beq		mapVer64d				; link null, which is always OK
-            bgtl--	cr1,mapVerifyDie		; a mapping has a non-null list higher than its mpLists
-            ld		r30,mpVAddr(r29)		; get next mappings va
-            rldicr	r30,r30,0,51			; zero low 12 bits
-            cmpld	r30,r24					; compare next key with ours
-            blel--	mapVerifyDie			; a next node has key <= to ours
-mapVer64d:
-            subic.	r27,r27,8				; move on to next list
-            bne++	mapVer64c				; loop if more to go
-            
-            ; Next node on current list, or next list if current done, or return if no more lists.
-            
-mapVer64f:
-            la		r28,mpList0(r26)		; get base of this mappings vector
-            ldx		r26,r25,r28				; get next mapping on this list
-mapVer64g:
-            mr.		r26,r26					; is there one?
-            bne++	mapVer64a				; yes, handle
-            subic.	r22,r22,1				; is there another list?
-            bgt++	mapVer64				; loop if so
-            
-            cmpwi	r21,0					; did we find all the mappings in the pmap?
-            bnel--	mapVerifyDie			; no
-            mtlr	r31						; restore return address
-            li		r3,0
-            blr
-            
-            
-            ; Handle 32-bit machine.
-            
-mapVer32:
-            lwz		r24,mpFlags(r20)		; Get number of lists
-            la		r30,pmapSkipLists(r20)	; first, check the pmap list hdrs
-            andi.	r24,r24,mpLists			; Clean the number of lists
-            bl		mapVerUpperWordsAre0	; are the upper words of each list all 0?
-            
-            ; Loop over each list, counting mappings in each.  We first check whether or not
-            ; the list is empty.  All lists above pmapCurLists should be empty, and no list
-            ; at or below pmapCurLists should be.
-            ;
-            ;	r20 = pmap ptr
-            ;	r21 = decrementing counter of mappings in this pmap
-            ;	r22 = next list# (1...kSkipListMaxLists)
-            ;	r23 = decrementing counter for infinite loop check
-            
-mapVer32NextList:
-            lbz		r28,pmapCurLists(r20)	; get #lists in use
-            slwi	r25,r22,3				; get offset to next skiplist
-            la		r26,pmapSkipLists+4(r20) ; get ptr to base of skiplist vector
-            subi	r25,r25,8
-            lwzx	r26,r25,r26				; get the 1st mapping on this list, or 0
-            cmpw	cr7,r22,r28				; set cr7_gt if this list is > pmapCurLists ("high")
-            cmpwi	cr6,r26,0				; set cr6_eq if this list is null ("null")
-            crxor	cr0_eq,cr6_eq,cr7_gt	; cr0_eq <-- (null & !high) | (!null & high)
-            beql-	mapVerifyDie			; die if this list is null when it should not be, etc
-            b		mapVer32g
-           
-            ; Loop over each node in the list.
-            ;	r20 = pmap ptr
-            ;	r21 = decrementing counter of mappings in this pmap
-            ;	r22 = this list# (1...kSkipListMaxLists)
-            ;	r23 = decrementing counter for infinite loop check
-            ;	r25 = offset to this skiplist (ie, ((r22<<3)-8))
-            ;	r26 = mapping
-            
-mapVer32a:
-            lwz		r29,mpFlags(r26)		; get bits for this mapping
-            andi.	r30,r26,mpBasicSize-1	; test address for alignment
-            lwz		r24,mpVAddr+0(r26)		; get upper word of key
-            bnel-	mapVerifyDie			; mapping address not 64-byte aligned
-            lwz		r28,mpVAddr+4(r26)		; get lower word of key
-            subic.	r23,r23,1				; check for loops
-            bltl-	mapVerifyDie			; we have visited > (pmapCurLists*pmapResidentCnt) nodes
-            cmpwi	r24,0					; upper word of key (ie, va) should be 0
-            bnel-	mapVerifyDie			; was not
-            andi.	r27,r29,mpLists			; get #lists this mapping is supposed to be on
-            cmpw	cr1,r27,r22				; is it supposed to be on this list?
-            bltl-	cr1,mapVerifyDie		; mappings mpLists is too low
-            cmpwi	r27,kSkipListMaxLists	; too big?
-            bgtl-	mapVerifyDie			; mappings mpLists > max
-            rlwinm	r28,r28,0,0,19			; clear low 12 bits of va
-            bne+	cr1,mapVer32f			; jump if this is not highest list for this node
-            
-            ; This is the "highest" (last) list this mapping is on.
-            ; Do some additional checks (so we only do them once per mapping.)
-            ; First, make sure upper words of the mpList vector are 0.
-
-			lhz		r27,mpBSize(r26)		; get #blocks
-			rlwinm	r29,r29,mpBSub+1,31,31	; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
-			addi	r27,r27,1				; units of nested pmap are (#segs-1)
-			ori		r29,r29,0x3216			; OR in 0x00003216 (0x3200 and a base rotate of 22)
-			rlwnm	r29,r29,r29,27,31		; Rotate to get 12 or 25
-			subi	r21,r21,1				; count mappings in this pmap
-			slw		r29,r27,r29				; Get the length in bytes
-			subi	r29,r29,4096			; get offset to last byte in nested pmap
-
-            lwz		r24,mpFlags(r26)		; Get number of lists
-            la		r30,mpList0(r26)		; point to base of skiplist vector
-			andi.	r24,r24,mpLists			; Clean the number of lists
-			bl		mapVerUpperWordsAre0	; make sure upper words are all 0 (uses r24 and r27)
-                        
-            ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page.
-
-            add		r24,r28,r29				; r24 <- address of last valid page in this mapping
-            la		r28,mpList0+4(r26)		; get base of this mappings vector            
-            lwz		r27,mpFlags(r26)		; Get the number of lists
-            andi.	r27,r27,mpLists			; get #lists this mapping is on (1<=n<=27)
-            cmplwi	r27,mpBasicLists		; Into bigger mapping?
-            li		r27,mpBasicLists*8-8	; Assume normal
-            ble+	mapVer32c				; It is...
-            li		r27,kSkipListMaxLists*8-8	; initialize list offset for inner loop
-            
-            ; Inner loop over each list in this mappings mpList vector.
-            ;	r24 = address of last valid page in this mapping
-            ;	r27 = offset for next list in inner loop
-            ;	r28 = base of this mappings list links
-            
-mapVer32c:
-            cmpw	cr1,r27,r25				; higher, lower, or same?
-            lwzx	r29,r27,r28				; get link to next mapping at this level
-            mr.		r29,r29					; null?
-            beq		mapVer32d				; link null, which is always OK
-           
-           
-            bgtl-	cr1,mapVerifyDie		; a mapping has a non-null list higher than its mpLists
-            lwz		r30,mpVAddr+4(r29)		; get next mappings va
-            rlwinm	r30,r30,0,0,19			; zero low 12 bits
-            cmplw	r30,r24					; compare next key with ours
-            blel-	mapVerifyDie			; a next node has key <= to ours
-mapVer32d:
-            subic.	r27,r27,8				; move on to next list
-            bne+	mapVer32c				; loop if more to go
-            
-            ; Next node on current list, or next list if current done, or return if no more lists.
-            
-mapVer32f:
-            la		r28,mpList0+4(r26)		; get base of this mappings vector again
-            lwzx	r26,r25,r28				; get next mapping on this list
-mapVer32g:
-            mr.		r26,r26					; is there one?
-            bne+	mapVer32a				; yes, handle
-            subic.	r22,r22,1				; is there another list?
-            bgt+	mapVer32NextList		; loop if so
-            
-            cmpwi	r21,0					; did we find all the mappings in the pmap?
-            bnel-	mapVerifyDie			; no
-            mtlr	r31						; restore return address
-            li		r3,0
-            blr
-
-            ; Subroutine to verify that the upper words of a vector of kSkipListMaxLists
-            ; doublewords are 0.
-            ;	r30 = ptr to base of vector
-            ; Uses r24 and r27.
-            
-mapVerUpperWordsAre0:
-			cmplwi	r24,mpBasicLists		; Do we have more than basic?
-            li		r24,mpBasicLists*8		; Assume basic
-            ble++	mapVerUpper1			; We have the basic size
-            li		r24,kSkipListMaxLists*8	; Use max size
-            
-mapVerUpper1:
-            subic.	r24,r24,8				; get offset to next doubleword
-            lwzx	r27,r24,r30				; get upper word
-            cmpwi	cr1,r27,0				; 0 ?
-            bne-	cr1,mapVerifyDie		; die if not, passing callers LR
-            bgt+	mapVerUpper1			; loop if more to go
-            blr
-            
-            ; bl here if mapSkipListVerify detects an inconsistency.
-
-mapVerifyDie:
-			mflr	r3
-			mtlr	r31						; Restore return
-			lis		r31,hi16(EXT(DebugWork))
-			ori		r31,r31,lo16(EXT(DebugWork))
-			lwz		r0,4(r31)				; Get the explicit entry flag
-			cmplwi	r0,0x4262				; Should we run anyway?
-			beqlr--							; Explicit call, return...
-			
-            li		r0,1
-			stw		r0,0(r31)				; Lock out further calls
-            BREAKPOINT_TRAP					; hopefully, enter debugger
-            b		.-4
-            
-            
-/*
- * Panic (choke, to be exact) because of messed up skip lists.  The LR points back
- * to the original caller of the skip-list function.
- */
- 
-mapSkipListPanic:							; skip-lists are screwed up
-            lis		r0,hi16(Choke)
-            ori		r0,r0,lo16(Choke)
-            li      r3,failSkipLists		; get choke code
-            sc								; choke
-            b		.-4
-            
-
diff --git a/osfmk/ppc/spec_reg.h b/osfmk/ppc/spec_reg.h
deleted file mode 100644
index 46a13bdf6..000000000
--- a/osfmk/ppc/spec_reg.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _PPC_SPEC_REG_H_
-#define _PPC_SPEC_REG_H_
-
-/* Defines for PVRs */
-#define PROCESSOR_VERSION_750		8	/* ? */
-#define PROCESSOR_VERSION_750FX		0x7000  /* ? */
-#define PROCESSOR_VERSION_7400		12	/* ? */
-#define PROCESSOR_VERSION_7410		0x800C	/* ? */
-#define PROCESSOR_VERSION_7450		0x8000	/* ? */
-#define PROCESSOR_VERSION_7455		0x8001	/* ? */
-#define PROCESSOR_VERSION_7457		0x8002	/* ? */
-#define PROCESSOR_VERSION_7447A		0x8003	/* ? */
-#define PROCESSOR_VERSION_970		0x0039	/* ? */
-#define PROCESSOR_VERSION_970FX		0x003C	/* ? */
-
-#endif /* _PPC_SPEC_REG_H_ */
diff --git a/osfmk/ppc/start.s b/osfmk/ppc/start.s
deleted file mode 100644
index 34ebea8ac..000000000
--- a/osfmk/ppc/start.s
+++ /dev/null
@@ -1,1283 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#define __APPLE_API_PRIVATE
-
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-#include <mach_kgdb.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/spec_reg.h>
-#include <machine/cpu_capabilities.h>
-#include <mach/ppc/vm_param.h>
-#include <assym.s>
-
-
-; Definitions of the processor type table format, which drives this code.
-; The table ("processor_types") is assembled in at the end of this file.
-	
-#define ptFilter	0
-#define ptVersion	4
-#define ptRevision	6
-#define ptFeatures	8
-#define ptCPUCap	12
-#define ptPwrModes	16
-#define ptPatch		20
-#define ptInitRout	24
-#define ptRptdProc	28
-#define ptLineSize	32
-#define ptl1iSize	36
-#define ptl1dSize	40
-#define ptPTEG		44
-#define ptMaxVAddr	48
-#define ptMaxPAddr	52
-#define ptSize		56
-
-
-; We use cr2 for flags:
-
-#define bootCPU 10
-#define firstInit 9
-#define firstBoot 8
-
-/*
- * Interrupt and bootup stack for initial processor
- */
-
-	.file	"start.s"
-	
-/*
- * All CPUs start here.
- *
- * This code is called from SecondaryLoader
- *
- * Various arguments are passed via a table:
- *   R3 = pointer to other startup parameters
- */
-	.text
-
-ENTRY(resetPOR,TAG_NO_FRAME_USED)
-
-			li		r12,0								; Get a 0
-			stw		r12,0xF0(0)							; Make sure the special flag is clear
-			mtmsrd	r12									; Make sure we are in 32-bit mode
-			isync										; Really make sure
-			lwz		r3,0xF4(0)							; Get the boot_args pointer
-			b		startJoin							; Join up...
-
-
-ENTRY(_start_cpu,TAG_NO_FRAME_USED)
-			crclr	bootCPU								; Set non-boot processor
-			crclr	firstInit							; Set not first time init
-			lwz		r30,ppe_paddr(r3)					; Set current per_proc
-			lwz		r28,ppe_paddr+4(r3)					; Set current per_proc
-			rlwinm	r30,r30,0,1,0						; Copy low 32 bits to top 32 
-			rlwimi	r30,r28,0,0,31						; Insert low part of 64-bit address in bottom 32 bits
-			subi	r29,r3,(ACT_PER_PROC-ppe_vaddr)		; Substract mact.PerProc offset
-			mr		r3,r30								; Set current per_proc
-			
-;
-;			Note that we are just trying to get close.  The real TB sync will take
-;			place later.  The value we are loading is set in two places.  For the 
-;			main processor, it will be the TB at the last interrupt before we went
-;			to sleep.  For the others, it will be the time just before the main
-;			processor woke us up.
-;
-
-			lwz		r15,ruptStamp(r3)					; Get the timebase from the other processor
-			li		r17,0								; Clear this out
-			lwz		r16,ruptStamp+4(r3)					; Get the timebase from the other processor
-			mtspr	tbl,r17								; Clear bottom so we do not tick
-			mtspr	tbu,r15								; Set top
-			mtspr	tbl,r16								; Then bottom again
-			b		allstart
-			
-ENTRY(_start,TAG_NO_FRAME_USED)
-
-startJoin:
-			mflr		r2					; Save the return address
-			lis		r28,hi16(EXT(PerProcTable))			; Set PerProcTable
-			lis		r30,hi16(EXT(BootProcInfo))			; Set current per_proc
-			ori		r28,r28,lo16(EXT(PerProcTable))		; Set PerProcTable
-			ori		r30,r30,lo16(EXT(BootProcInfo))		; Set current per_proc
-			stw		r30,ppe_paddr+4(r28)				; Set per_proc_entry
-			stw		r30,ppe_vaddr(r28)					; Set per_proc_entry
-			subi	r29,r28,(ACT_PER_PROC-ppe_vaddr)	; Substract mact.PerProc offset
-			crset	bootCPU								; Set boot processor
-			
-			lwz		r17,pfAvailable(r30)				; Get the available bits
-			rlwinm.	r0,r17,0,pfValidb,pfValidb			; Have we initialized the feature flags yet?
-			crmove	firstInit,cr0_eq					; Set if we are doing first time init
-			bne		allstart							; Yeah, we must be waking up from sleep...
-			
-;
-;			Here is where we do any one time general low-level initialization
-
-			lis		r20,HIGH_ADDR(fwdisplock)			; Get address of the firmware display lock
-			li		r19,0								; Zorch a register
-			ori		r20,r20,LOW_ADDR(fwdisplock)		; Get address of the firmware display lock
-			stw		r19,0(r20)							; Make sure the lock is free
-			
-allstart:
-			mr		r31,r3								; Save away arguments
-
-			crand	firstBoot,bootCPU,firstInit			; Indicate if we are on the initial first processor startup
-
-			mtsprg	0,r30								; Set per_proc paddr
-			mtsprg	1,r29								; Set spr1
-
-			li		r9,0								; Clear out a register
-			mtsprg	2,r9
-			mtsprg	3,r9
-
-			li		r7,MSR_VM_OFF						; Get real mode MSR			
-			mtmsr	r7									; Set the real mode SRR
-			isync					
-
-			lis		r26,hi16(processor_types)			; Point to processor table
-			ori		r26,r26,lo16(processor_types)		; Other half
-			mfpvr	r10									; Get the PVR
-			
-nextPVR:	lwz		r28,ptFilter(r26)					; Get the filter
-			lwz		r27,ptVersion(r26)					; Get the version and revision codes
-			and		r28,r10,r28							; Throw away dont care bits
-			cmplw	r27,r28								; Is this the right set?
-			beq		donePVR								; We have the right one...
-			addi	r26,r26,ptSize						; Point to the next type
-			b		nextPVR								; Check it out...
-			
-donePVR:	lwz		r20,ptInitRout(r26)					; Grab the special init routine
-			mtlr	r20									; Setup to call the init
-
-			bf		firstBoot,notFirst					; Not first boot, go...
-			
-;			
-;			The following code just does a general initialization of the features just
-;			after the initial first-time boot.  This is not done after waking up or on
-;			any "secondary" processor.  Just after the boot-processor init, we copy the
-;			features to any possible per_proc.
-;
-;			We are just setting defaults.   The specific initialization code will modify these
-;			if necessary. 
-;			
-			lis		r18,hi16(EXT(_cpu_capabilities))	; Get the address of _cpu_capabilities
-			ori		r18,r18,lo16(EXT(_cpu_capabilities))
-			lwz		r17,ptCPUCap(r26)					; Get the default cpu capabilities
-			stw		r17, 0(r18)							; Save the default value in _cpu_capabilities
-			
-			lwz		r17,ptFeatures(r26)					; Pick up the features
-			
-			lwz		r18,ptRptdProc(r26)					; Get the reported processor
-			sth		r18,pfrptdProc(r30)					; Set the reported processor
-			
-			lwz		r13,ptPwrModes(r26)					; Get the supported power modes
-			stw		r13,pfPowerModes(r30)				; Set the supported power modes
-			
-			lwz		r13,ptLineSize(r26)					; Get the cache line size
-			sth		r13,pflineSize(r30)					; Save it
-			lwz		r13,ptl1iSize(r26)					; Get icache size
-			stw		r13,pfl1iSize(r30)					; Save it
-			lwz		r13,ptl1dSize(r26)					; Get dcache size
-			stw		r13,pfl1dSize(r30)					; Save it
-			lwz		r13,ptPTEG(r26)						; Get PTEG size address
-			stw		r13,pfPTEG(r30)						; Save it
-			lwz		r13,ptMaxVAddr(r26)					; Get max virtual address
-			stw		r13,pfMaxVAddr(r30)					; Save it
-			lwz		r13,ptMaxPAddr(r26)					; Get max physical address
-			stw		r13,pfMaxPAddr(r30)					; Save it
-
-            
-;           Go through the patch table, changing performance sensitive kernel routines based on the
-;           processor type or other things.
-
-			lis		r11,hi16(EXT(patch_table))
-			ori		r11,r11,lo16(EXT(patch_table))
-			lwz		r19,ptPatch(r26)					; Get ptPatch field
-patch_loop:
-			lwz		r16,patchType(r11)					; Load the patch type
-			lwz		r15,patchValue(r11)					; Load the patch value
-			cmplwi	cr1,r16,PATCH_FEATURE				; Is it a patch feature entry
-            cmplwi  cr7,r16,PATCH_END_OF_TABLE          ; end of table?
-			and.	r14,r15,r19							; Is it set in the patch feature
-			crandc	cr0_eq,cr1_eq,cr0_eq				; Do we have a match
-            beq     cr7,doOurInit                       ; end of table, Go do processor specific initialization
-			beq		patch_apply							; proc feature matches, so patch memory
-			cmplwi	cr1,r16,PATCH_PROCESSOR				; Is it a patch processor entry
-			cmplw	cr0,r15,r18							; Check matching processor
-			crand	cr0_eq,cr1_eq,cr0_eq				; Do we have a match
-			bne		patch_skip							; No, skip patch memory
-patch_apply:
-			lwz		r13,patchAddr(r11)					; Load the address to patch
-			lwz		r14,patchData(r11)					; Load the patch data
-			stw		r14,0(r13)							; Patch the location
-			dcbf	0,r13								; Flush the old one
-			sync										; Make sure we see it all
-			icbi	0,r13								; Flush the i-cache
-			isync										; Hang out
-			sync										; Hang out some more...
-patch_skip:
-			addi	r11,r11,peSize						; Point to the next patch entry
-			b       patch_loop							; handle next
-
-
-;           Additional processors join here after skipping above code.
-
-notFirst:	lwz		r17,pfAvailable(r30)				; Get our features
-
-doOurInit:	mr.		r20,r20								; See if initialization routine
-			crand	firstBoot,bootCPU,firstInit			; Indicate if we are on the initial first processor startup
-			bnelrl										; Do the initialization
-			
-			ori		r17,r17,lo16(pfValid)				; Set the valid bit
-			stw		r17,pfAvailable(r30)				; Set the available features
-
-			rlwinm.	r0,r17,0,pf64Bitb,pf64Bitb			; Is this a 64-bit machine?
-			mtsprg	2,r17								; Remember the feature flags
-
-			bne++	start64								; Skip following if 64-bit...
-
-			mfspr	r6,hid0								; Get the HID0
-			rlwinm	r6,r6,0,sleep+1,doze-1				; Remove any vestiges of sleep
-			mtspr	hid0,r6								; Set the insominac HID0
-			isync					
-		
-;			Clear the BAT registers
-
-			li		r9,0								; Clear out a register
-			sync
-			isync
-			mtdbatu 0,r9								; Invalidate maps
-			mtdbatl 0,r9								; Invalidate maps
-			mtdbatu 1,r9								; Invalidate maps
-			mtdbatl 1,r9								; Invalidate maps
-			mtdbatu 2,r9								; Invalidate maps
-			mtdbatl 2,r9								; Invalidate maps
-			mtdbatu 3,r9								; Invalidate maps
-			mtdbatl 3,r9								; Invalidate maps
-			sync
-			isync
-			mtibatu 0,r9								; Invalidate maps
-			mtibatl 0,r9								; Invalidate maps
-			mtibatu 1,r9								; Invalidate maps
-			mtibatl 1,r9								; Invalidate maps
-			mtibatu 2,r9								; Invalidate maps
-			mtibatl 2,r9								; Invalidate maps
-			mtibatu 3,r9								; Invalidate maps
-			mtibatl 3,r9								; Invalidate maps
-			sync
-			isync
-			b		startcommon							; Go join up the common start routine
-			
-start64:	lis		r5,hi16(startcommon)				; Get top of address of continue point
-			mfspr	r6,hid0								; Get the HID0
-			ori		r5,r5,lo16(startcommon)				; Get low of address of continue point
-			lis		r9,hi16(MASK(MSR_HV)|MASK(MSR_SF))	; ?
-			lis		r20,hi16(dozem|napm|sleepm)			; Get mask of power saving features	
-			ori		r20,r20,lo16(1)						; Disable the attn instruction
-			li		r7,MSR_VM_OFF						; Get real mode MSR
-			sldi	r9,r9,32							; Slide into position
-			sldi	r20,r20,32							; Slide power stuff into position
-			or		r9,r9,r7							; Form initial MSR
-			andc	r6,r6,r20							; Remove any vestiges of sleep
-			isync
-			mtspr	hid0,r6								; Set the insominac HID0
-			mfspr	r6,hid0								; Get it
-			mfspr	r6,hid0								; Get it
-			mfspr	r6,hid0								; Get it
-			mfspr	r6,hid0								; Get it
-			mfspr	r6,hid0								; Get it
-			mfspr	r6,hid0								; Get it
-			isync
-			mtsrr0	r5									; Set the continue point
-			mtsrr1	r9									; Set our normal disabled MSR				
-			rfid										; Tally ho...
-			
-			.align	5					
-
-startcommon:
-			rlwinm.	r0,r17,0,pfFloatb,pfFloatb			; See if there is floating point
-			beq-	noFloat								; Nope, this is a really stupid machine...
-			
-			li		r0,MSR_VM_OFF|MASK(MSR_FP)			; Enable for floating point
-			mtmsr	r0									/* Set the standard MSR values */
-			isync
-			
-			lis		r5,HIGH_ADDR(EXT(FloatInit))		/* Get top of floating point init value */
-			ori		r5,r5,LOW_ADDR(EXT(FloatInit))		/* Slam bottom */
-			lfd		f0,0(r5)							/* Initialize FP0 */
-			fmr		f1,f0								/* Ours in not */					
-			fmr		f2,f0								/* to wonder why, */
-			fmr		f3,f0								/* ours is but to */
-			fmr		f4,f0								/* do or die! */
-			fmr		f5,f0						
-			fmr		f6,f0						
-			fmr		f7,f0						
-			fmr		f8,f0						
-			fmr		f9,f0						
-			fmr		f10,f0						
-			fmr		f11,f0						
-			fmr		f12,f0						
-			fmr		f13,f0						
-			fmr		f14,f0						
-			fmr		f15,f0						
-			fmr		f16,f0						
-			fmr		f17,f0						
-			fmr		f18,f0						
-			fmr		f19,f0						
-			fmr		f20,f0						
-			fmr		f21,f0						
-			fmr		f22,f0						
-			fmr		f23,f0						
-			fmr		f24,f0						
-			fmr		f25,f0						
-			fmr		f26,f0						
-			fmr		f27,f0						
-			fmr		f28,f0						
-			fmr		f29,f0						
-			fmr		f30,f0						
-			fmr		f31,f0						
-		
-			li		r0,	MSR_VM_OFF						; Turn off floating point
-			mtmsr	r0
-			isync
-
-noFloat:	rlwinm.	r0,r17,0,pfAltivecb,pfAltivecb		; See if there is Altivec
-			beq-	noVector							; Nope...
-		
-			li		r0,0								; Clear out a register
-			
-			lis		r7,hi16(MSR_VEC_ON)					; Get real mode MSR + Altivec
-			ori		r7,r7,lo16(MSR_VM_OFF)				; Get real mode MSR + Altivec
-			mtmsr	r7									; Set the real mode SRR */
-			isync										; Make sure it has happened									
-		
-			lis		r5,hi16(EXT(QNaNbarbarian))			; Altivec initializer
-			ori		r5,r5,lo16(EXT(QNaNbarbarian))		; Altivec initializer
-
-			mtspr	vrsave,r0							; Set that no VRs are used yet */
-			
-			vspltish v1,1								; Turn on the non-Java bit and saturate
-			vspltisw v0,1								; Turn on the saturate bit
-			vxor	v1,v1,v0							; Turn off saturate	and leave non-Java set
-			lvx		v0,br0,r5							; Initialize VR0
-			mtvscr	v1									; Clear the vector status register
-			vor		v2,v0,v0							; Copy into the next register
-			vor		v1,v0,v0							; Copy into the next register
-			vor		v3,v0,v0							; Copy into the next register
-			vor		v4,v0,v0							; Copy into the next register
-			vor		v5,v0,v0							; Copy into the next register
-			vor		v6,v0,v0							; Copy into the next register
-			vor		v7,v0,v0							; Copy into the next register
-			vor		v8,v0,v0							; Copy into the next register
-			vor		v9,v0,v0							; Copy into the next register
-			vor		v10,v0,v0							; Copy into the next register
-			vor		v11,v0,v0							; Copy into the next register
-			vor		v12,v0,v0							; Copy into the next register
-			vor		v13,v0,v0							; Copy into the next register
-			vor		v14,v0,v0							; Copy into the next register
-			vor		v15,v0,v0							; Copy into the next register
-			vor		v16,v0,v0							; Copy into the next register
-			vor		v17,v0,v0							; Copy into the next register
-			vor		v18,v0,v0							; Copy into the next register
-			vor		v19,v0,v0							; Copy into the next register
-			vor		v20,v0,v0							; Copy into the next register
-			vor		v21,v0,v0							; Copy into the next register
-			vor		v22,v0,v0							; Copy into the next register
-			vor		v23,v0,v0							; Copy into the next register
-			vor		v24,v0,v0							; Copy into the next register
-			vor		v25,v0,v0							; Copy into the next register
-			vor		v26,v0,v0							; Copy into the next register
-			vor		v27,v0,v0							; Copy into the next register
-			vor		v28,v0,v0							; Copy into the next register
-			vor		v29,v0,v0							; Copy into the next register
-			vor		v30,v0,v0							; Copy into the next register
-			vor		v31,v0,v0							; Copy into the next register
-		
-			li		r0,	MSR_VM_OFF						; Turn off vectors
-			mtmsr	r0
-			isync
-
-noVector:
-			bl		EXT(cacheInit)						; Initializes all caches (including the TLB)
-
-			bt		bootCPU,run32					
-
-			mfsprg	r30,0								; Phys per proc
-			lwz		r29,PP_HIBERNATE(r30)
-            andi.	r29, r29, 1
-			beq		noHashTableInit						; Skip following if not waking from from hibernate
-			bl		EXT(hw_clear_maps)					; Mark all maps as absent from hash table
-			bl		EXT(hw_hash_init)					; Clear hash table
-			bl		EXT(save_snapshot_restore)			; Reset save area chains
-noHashTableInit:
-			bl	EXT(hw_setup_trans)						; Set up hardware needed for translation
-			bl	EXT(hw_start_trans)						; Start translating 
-
-run32:
-			rlwinm.	r0,r17,0,pf64Bitb,pf64Bitb			; Is this a 64-bit machine?
-			beq++	isnot64								; Skip following if not 64-bit...
-			
-			mfmsr	r29									; Get the MSR
-			rldicl	r29,r29,0,MSR_SF_BIT+1				; turn 64-bit mode off
-			mtmsrd	r29									; Set it
-			isync										; Make sure
-			
-isnot64:	bf		bootCPU,callcpu					
-
-			lis		r29,HIGH_ADDR(EXT(intstack))		; move onto interrupt stack
-			ori		r29,r29,LOW_ADDR(EXT(intstack))
-			addi	r29,r29,INTSTACK_SIZE-FM_SIZE
-
-			li		r28,0
-			stw		r28,FM_BACKPTR(r29) 				; store a null frame backpointer
-	
-			mr		r1,r29
-			mr		r3,r31								; Restore any arguments we may have trashed
-
-;			Note that we exit from here with translation still off
-
-			bl	EXT(ppc_init)							; Jump into boot init code
-			BREAKPOINT_TRAP
-
-callcpu:
-			mfsprg	r31,1								; Fake activation pointer
-			lwz		r31,ACT_PER_PROC(r31)				; Load per_proc
-			lwz		r29,PP_INTSTACK_TOP_SS(r31)			; move onto interrupt stack
-
-			li		r28,0
-			stw		r28,FM_BACKPTR(r29) 				; store a null frame backpointer
-
-			mr		r1,r29								; move onto new stack
-			mr		r3,r31								; Restore any arguments we may have trashed
-
-;			Note that we exit from here with translation on
-
-			bl		EXT(ppc_init_cpu)					; Jump into cpu init code
-			BREAKPOINT_TRAP								; Should never return
-
-;
-;			Specific processor initialization routines
-;
-
-;			750
-
-init750:
-			bf	firstBoot, init750nb						; No init for wakeup....
-
-			mfspr	r13,l2cr							; Get the L2CR
-			rlwinm.	r0,r13,0,l2e,l2e					; Any L2?
-			bne+	i750hl2								; Yes...
-			rlwinm	r17,r17,0,pfL2b+1,pfL2b-1			; No L2, turn off feature
-			
-i750hl2:
-			lis	r14,hi16(256*1024)						; Base L2 size
-			addis	r15,r13,0x3000							; Hah... Figure this one out...
-			rlwinm	r15,r15,4,30,31							; Isolate
-			rlwinm.	r8,r13,0,l2siz,l2sizf						; Was size valid?
-			slw	r14,r14,r15							; Set 256KB, 512KB, or 1MB
-			beq-	init750l2none							; Not a valid setting...
-			
-			stw	r13,pfl2crOriginal(r30)						; Shadow the L2CR
-			stw	r13,pfl2cr(r30)							; Shadow the L2CR
-			stw	r14,pfl2Size(r30)						; Set the L2 size
-			b	init750l2done							; Done with L2
-			
-init750l2none:
-			rlwinm	r17,r17,0,pfL2b+1,pfL2b-1					; No level 2 cache
-			
-init750l2done:
-			mfspr	r11,hid0							; Get the current HID0
-			stw	r11,pfHID0(r30)							; Save the HID0 value
-			blr									; Return...
-			
-init750nb:
-			lwz	r11,pfHID0(r30)							; Get HID0
-			sync
-			mtspr	hid0,r11							; Set the HID
-			isync
-			sync
-			blr
-
-;			750CX
-
-init750CX:
-			bf	firstBoot, init750						; No init for wakeup....
-			mfspr	r13,hid1							; Get HID1
-			li	r14,lo16(0xFD5F)						; Get valid
-			rlwinm	r13,r13,4,28,31						; Isolate
-			slw	r14,r14,r13								; Position
-			rlwimi	r17,r14,15-pfCanNapb,pfCanNapb,pfCanNapb	; Set it			
-			b	init750									; Join common...
-
-
-;			750FX
-
-init750FX:
-			bf	firstBoot, init750FXnb
-			mfspr	r11, hid1
-			stw	r11, pfHID1(r30)						; Save the HID1 value
-			b	init750
-
-init750FXnb:
-			lwz	r13, pfHID0(r30)						; Get HID0
-			lwz	r11, pfHID1(r30)						; Get HID1
-
-			rlwinm.	r0, r11, 0, hid1ps, hid1ps			; Isolate the hid1ps bit
-			beq	init750FXnb2							; Clear BTIC if hid1ps set
-			rlwinm	r13, r13, 0, btic+1, btic-1			; Clear the BTIC bit
-
-init750FXnb2:
-			sync
-			mtspr	hid0, r13							; Set the HID
-			isync
-			sync
-
-			rlwinm  r12, r11, 0, hid1ps+1, hid1ps-1		; Select PLL0
-			mtspr	hid1, r12							; Restore PLL config
-			mftb	r13									; Wait 5000 ticks (> 200 us)
-
-init750FXnbloop:
-			mftb	r14
-			sub	r14, r14, r13
-			cmpli	cr0, r14, 5000
-			ble	init750FXnbloop
-			mtspr	hid1, r11							; Select the desired PLL
-			blr
-
-;			750FX vers 2.0 or later
-init750FXV2:
-			bf	firstBoot, init750FXV2nb					; Wake from sleep
-
-			mfspr	r11, hid2
-			stw	r11, pfHID2(r30)						; Save the HID2 value
-			b	init750FX							; Continue with 750FX init
-
-init750FXV2nb:
-			lwz	r13, pfHID2(r30)						; Get HID2
-			rlwinm	r13, r13, 0, hid2vmin+1, hid2vmin-1				; Clear the vmin bit
-			mtspr	hid2, r13							; Restore HID2 value
-			sync									; Wait for it to be done
-			b	init750FX
-
-;			7400
-
-init7400:	bf		firstBoot,i7400nb					; Do different if not initial boot...
-			mfspr	r13,l2cr							; Get the L2CR
-			rlwinm.	r0,r13,0,l2e,l2e					; Any L2?
-			bne+	i7400hl2							; Yes...
-			rlwinm	r17,r17,0,pfL2b+1,pfL2b-1			; No L2, turn off feature
-			
-i7400hl2:	lis		r14,hi16(256*1024)					; Base L2 size
-			addis	r15,r13,0x3000						; Hah... Figure this one out...
-			rlwinm	r15,r15,4,30,31						 
-			slw		r14,r14,r15							; Set 256KB, 512KB, 1MB, or 2MB
-			
-			stw		r13,pfl2crOriginal(r30)				; Shadow the L2CR
-			stw		r13,pfl2cr(r30)						; Shadow the L2CR
-			stw		r14,pfl2Size(r30)					; Set the L2 size
-			
-			mfspr	r11,hid0							; Get the current HID0
-			oris	r11,r11,hi16(emcpm|eiecm)			; ?
-			mtspr	hid0,r11							; ?
-			isync
-			stw		r11,pfHID0(r30)						; Save the HID0 value
-
-			mfspr	r11,msscr0							; Get the msscr0 register
-			stw		r11,pfMSSCR0(r30)					; Save the MSSCR0 value
-			mfspr	r11,msscr1							; Get the msscr1 register
-			stw		r11,pfMSSCR1(r30)					; Save the MSSCR1 value
-			blr											; Return...
-			
-i7400nb:
-			li		r11,0
-			mtspr	l2cr,r11							; Make sure L2CR is zero
-			lwz		r11,pfHID0(r30)						; Get HID0
-			sync
-			mtspr	hid0,r11							; Set the HID
-			isync
-			sync			
-			lwz		r11,pfMSSCR0(r30)					; Get MSSCR0
-			isync
-			sync
-			mtspr	msscr0,r11							; Set the MSSCR0
-			lwz		r11,pfMSSCR1(r30)					; Get msscr1
-			isync
-			sync
-			mtspr	msscr1,r11							; Set the msscr1
-			isync
-			sync
-			blr
-
-;			7400 (ver 2.0 - ver 2.7)
-
-init7400v2_7:
-			bf	firstBoot, init7400
-			mfspr	r13, hid0							; Get the HID0
-			ori	r13, r13, nopdstm						; ?
-			mtspr	hid0, r13							; Set the HID0
-			isync
-			sync
-			b	init7400
-
-;			7410
-;			Note that this is the same as 7400 except we initialize the l2cr2 register
-
-init7410:	li		r13,0								; Clear
-			mtspr	1016,r13							; Turn off direct cache
-			b		init7400							; Join up with common....
-
-
-;			745X - Any 7450 family processor
-
-init745X:
-			bf		firstBoot,init745Xnb				; Do different if not initial boot...
-
-			mfspr	r13,l2cr							; Get the L2CR
-			rlwinm.	r0,r13,0,l2e,l2e					; Any L2?
-			bne+	init745Xhl2							; Yes...
-			rlwinm	r17,r17,0,pfL2b+1,pfL2b-1			; No L2, turn off feature
-			
-init745Xhl2:
-			mfpvr	r14									; Get processor version
-			rlwinm	r14,r14,16,16,31					; Isolate processor version
-			cmpli	cr0, r14, PROCESSOR_VERSION_7457	; Test for 7457 or
-			cmpli	cr1, r14, PROCESSOR_VERSION_7447A	; 7447A
-			cror	cr0_eq, cr1_eq, cr0_eq
-			lis		r14,hi16(512*1024)					; 512KB L2
-			beq		init745Xhl2_2
-
-			lis		r14,hi16(256*1024)					; Base L2 size
-			rlwinm	r15,r13,22,12,13					; Convert to 256k, 512k, or 768k
-			add		r14,r14,r15							; Add in minimum
-
-init745Xhl2_2:
-			stw		r13,pfl2crOriginal(r30)				; Shadow the L2CR
-			stw		r13,pfl2cr(r30)						; Shadow the L2CR
-			stw		r14,pfl2Size(r30)					; Set the L2 size
-				
-;			Take care of level 3 cache
-
-			mfspr	r13,l3cr							; Get the L3CR
-			rlwinm.	r0,r13,0,l3e,l3e					; Any L3?
-			bne+	init745Xhl3							; Yes...
-			rlwinm	r17,r17,0,pfL3b+1,pfL3b-1			; No L3, turn off feature
-
-init745Xhl3:	cmplwi	cr0,r13,0						; No L3 if L3CR is zero
-			beq-	init745Xnone						; Go turn off the features...
-			lis		r14,hi16(1024*1024)					; Base L3 size
-			rlwinm	r15,r13,4,31,31						; Get size multiplier
-			slw		r14,r14,r15							; Set 1 or 2MB
-			
-			stw		r13,pfl3crOriginal(r30)				; Shadow the L3CR
-			stw		r13,pfl3cr(r30)						; Shadow the L3CR
-			stw		r14,pfl3Size(r30)					; Set the L3 size
-			b		init745Xfin							; Return....
-				
-init745Xnone:
-			rlwinm	r17,r17,0,pfL3fab+1,pfL3b-1			; No 3rd level cache or assist
-			rlwinm	r11,r17,pfWillNapb-pfCanNapb,pfCanNapb,pfCanNapb		; Set pfCanNap if pfWillNap is set
-			or	r17,r17,r11
-
-init745Xfin:
-			rlwinm	r17,r17,0,pfWillNapb+1,pfWillNapb-1	; Make sure pfWillNap is not set
-
-			mfspr	r11,hid0							; Get the current HID0
-			stw		r11,pfHID0(r30)						; Save the HID0 value
-			mfspr	r11,hid1							; Get the current HID1
-			stw		r11,pfHID1(r30)						; Save the HID1 value
-			mfspr	r11,msscr0							; Get the msscr0 register
-			stw		r11,pfMSSCR0(r30)					; Save the MSSCR0 value
-			mfspr	r11,msscr1							; Get the msscr1 register
-			stw		r11,pfMSSCR1(r30)					; Save the MSSCR1 value
-			mfspr	r11,ictrl							; Get the ictrl register
-			stw		r11,pfICTRL(r30)					; Save the ICTRL value
-			mfspr	r11,ldstcr							; Get the ldstcr register
-			stw		r11,pfLDSTCR(r30)					; Save the LDSTCR value
-			mfspr	r11,ldstdb							; Get the ldstdb register
-			stw		r11,pfLDSTDB(r30)					; Save the LDSTDB value
-			mfspr	r11,pir								; Get the pir register
-			stw		r11,pfBootConfig(r30)					; Save the BootConfig value
-			blr											; Return....
-
-
-init745Xnb:	lwz		r11,pfHID0(r30)						; Get HID0
-			sync
-			mtspr	hid0,r11							; Set the HID
-			isync
-			lwz		r11,pfHID1(r30)						; Get HID1
-			sync
-			mtspr	hid1,r11							; Set the HID
-			isync
-			lwz		r11,pfMSSCR0(r30)					; Get MSSCR0
-			sync
-			mtspr	msscr0,r11							; Set the MSSCR0
-			isync
-			sync
-			lwz		r11,pfICTRL(r30)					; Get ICTRL
-			sync
-			mtspr	ictrl,r11							; Set the ICTRL
-			isync
-			sync
-			lwz		r11,pfLDSTCR(r30)					; Get LDSTCR
-			sync
-			mtspr	ldstcr,r11							; Set the LDSTCR
-			isync
-			sync
-			lwz		r11,pfLDSTDB(r30)					; Get LDSTDB
-			sync
-			mtspr	ldstdb,r11							; Set the LDSTDB
-			isync
-			sync
-			blr
-
-;			7450 - Specific
-
-init7450:
-			bf	firstBoot, init745X						; Not boot, use standard init
-			
-			mfspr	r13, pir							; Get BootConfig from PIR
-			rlwinm.	r14, r13, 0, 20, 23						; Is the pdet value zero
-			bne	init7450done							; No, done for now
-			
-			ori	r13, r13, 0x0400						; Force pdet value to 4
-			mtspr	pir, r13							; Write back the BootConfig
-			
-init7450done:
-			b	init745X							; Continue with standard init
-
-
-init970:
-			lis		r20,8								; Set up for 512K L2
-init970x:
-			li		r0,0								; Clear this
-			mtspr	hior,r0								; Make sure that 0 is interrupt prefix
-			bf		firstBoot,init970nb					; No init for wakeup or second processor....
-
-
-;
-;			We can not query or change the L2 size.  We will just
-;			phoney up a L2CR to make sysctl "happy" and set the
-;			L2 size to 512K.
-;
-
-			lis		r0,0x8000							; Synthesize a "valid" but non-existant L2CR
-			stw		r0,pfl2crOriginal(r30)				; Set a dummy L2CR
-			stw		r0,pfl2cr(r30)						; Set a dummy L2CR
-			stw		r20,pfl2Size(r30)					; Set the L2 size
-
-			mfspr	r11,hid0							; Get original hid0
-			std		r11,pfHID0(r30)						; Save original
-			mfspr	r11,hid1							; Get original hid1
-			std		r11,pfHID1(r30)						; Save original
-			mfspr	r11,hid4							; Get original hid4
-			std		r11,pfHID4(r30)						; Save original
-			mfspr	r11,hid5							; Get original hid5
-			std		r11,pfHID5(r30)						; Save original
-
-			lis		r0, hi16(dnapm)						; Create a mask for the dnap bit
-			sldi	r0, r0, 32							; Shift to the top half
-			ld		r11,pfHID0(r30)						; Load the hid0 value
-			andc	r11, r11, r0						; Clear the dnap bit
-			isync
-			mtspr	hid0,r11							; Stuff it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			isync
-
-			lis		r0,(pcfValid|pcfLarge|pcfDedSeg)<<8	; Set the valid bit, dedicated segment, and large page flags
-			ori		r0,r0,(24<<8)|24					; Add in the 16M page size
-			stw		r0,lgpPcfg+(pcfSize*pcfLargePcfg)(0)	; Set the 16M primary large page configuration entry
-
-			blr
-			
-;
-;			Start up code for second processor or wake up from sleep
-;
-			
-init970nb:
-			lis		r0, hi16(dnapm)						; Create a mask for the dnap bit
-			sldi	r0, r0, 32							; Shift to the top half
-			ld		r11,pfHID0(r30)						; Load the hid0 value
-			andc	r11, r11, r0						; Clear the dnap bit
-			isync
-			mtspr	hid0,r11							; Stuff it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			mfspr	r11,hid0							; Get it
-			isync
-		
-			ld		r20,pfHID1(r30)						; Get it
-			isync
-			mtspr	hid1,r20							; Stick it
-			mtspr	hid1,r20							; Stick it again
-			isync
-		
-			ld		r11,pfHID4(r30)						; Get it
-			sync
-			mtspr	hid4,r11							; Stick it
-			isync
-
-			lis		r11,0xE000							; Get the unlikeliest ESID possible
-			srdi	r11,r11,1							; Make 0x7FFFFFFFF0000000
-			slbie	r11									; Make sure the ERAT is cleared 
-			
-			ld		r11,pfHID5(r30)						; Get it
-			mtspr	hid5,r11							; Set it
-			isync
-;
-;			May have changed dcbz mode so kill icache
-;
-
-			eqv		r13,r13,r13							; Get a constant -1
-			mr		r14,r20								; Save HID1
-			rldimi	r14,r13,54,9						; Set force icbi match mode
-			
-			li		r11,0								; Set start if ICBI range
-			isync
-			mtspr	hid1,r14							; Stick it
-			mtspr	hid1,r14							; Stick it again
-			isync
-
-inin970ki:	icbi	0,r11								; Kill I$
-			addi	r11,r11,128							; Next line
-			andis.	r0,r11,1							; Have we done them all?
-			beq++	inin970ki							; Not yet...
-
-			isync
-			mtspr	hid1,r20							; Stick it
-			mtspr	hid1,r20							; Stick it again
-			isync
-
-			blr											; Leave...
-
-
-
-;			Unsupported Processors
-initUnsupported:
-			mtlr	r2					; Restore the return address
-			blr						; Return to the booter
-
-
-;
-;	Processor to feature table
-
-;	.align	2				- Always on word boundary
-;	.long	ptFilter		- Mask of significant bits in the Version/Revision code
-;							- NOTE: Always order from most restrictive to least restrictive matching
-;	.short	ptVersion		- Version code from PVR.  Always start with 0 which is default
-;	.short	ptRevision		- Revision code from PVR. A zero value denotes the generic attributes if not specific
-;	.long	ptFeatures		- Available features
-;	.long	ptCPUCap		- Default value for _cpu_capabilities
-;	.long	ptPwrModes		- Available power management features
-;	.long	ptPatch			- Patch features
-;	.long	ptInitRout		- Initilization routine.  Can modify any of the other attributes.
-;	.long	ptRptdProc		- Processor type reported
-;	.long	ptLineSize		- Level 1 cache line size
-;	.long	ptl1iSize		- Level 1 instruction cache size
-;	.long	ptl1dSize		- Level 1 data cache size
-;	.long	ptPTEG			- Size of PTEG
-;	.long	ptMaxVAddr		- Maximum effective address
-;	.long	ptMaxPAddr		- Maximum physical address
-;
-	
-	.align	2
-processor_types:
-
-;       750CX (ver 2.x)
-
-			.align  2
-			.long   0xFFFF0F00              ; 2.x vers
-			.short  PROCESSOR_VERSION_750
-			.short  0x0200
-			.long   pfFloat | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL2
-			.long   kCache32 | kHasGraphicsOps | kHasStfiwx
-			.long   0
-			.long	PatchExt32
-			.long   init750CX
-			.long   CPU_SUBTYPE_POWERPC_750
-			.long   32
-			.long   32*1024
-			.long   32*1024
-			.long	64
-			.long	52
-			.long	32
-
-;	750 (generic)
-
-			.align	2
-			.long	0xFFFF0000		; All revisions
-			.short	PROCESSOR_VERSION_750
-			.short	0
-			.long	pfFloat | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL2
-			.long   kCache32 | kHasGraphicsOps | kHasStfiwx
-			.long   0
-			.long	PatchExt32
-			.long	init750
-			.long	CPU_SUBTYPE_POWERPC_750
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	32
-
-;       750FX (ver 1.x)
-
-			.align  2
-			.long   0xFFFF0F00              ; 1.x vers
-			.short  PROCESSOR_VERSION_750FX
-			.short  0x0100
-			.long   pfFloat | pfCanSleep | pfCanNap | pfCanDoze | pfSlowNap | pfNoMuMMCK | pf32Byte | pfL2
-			.long   kCache32 | kHasGraphicsOps | kHasStfiwx
-			.long   pmDualPLL
-			.long	PatchExt32
-			.long   init750FX
-			.long   CPU_SUBTYPE_POWERPC_750
-			.long   32
-			.long   32*1024
-			.long   32*1024
-			.long	64
-			.long	52
-			.long	32
-
-;       750FX (generic)
-
-			.align  2
-			.long   0xFFFF0000              ; All revisions
-			.short  PROCESSOR_VERSION_750FX
-			.short  0
-			.long   pfFloat | pfCanSleep | pfCanNap | pfCanDoze | pfSlowNap | pfNoMuMMCK | pf32Byte | pfL2
-			.long   kCache32 | kHasGraphicsOps | kHasStfiwx
-			.long   pmDualPLL | pmDPLLVmin
-			.long	PatchExt32
-			.long   init750FXV2
-			.long   CPU_SUBTYPE_POWERPC_750
-			.long   32
-			.long   32*1024
-			.long   32*1024
-			.long	64
-			.long	52
-			.long	32
-
-;	7400 (ver 2.0 - ver 2.7)
-
-			.align	2
-			.long	0xFFFFFFF8		; ver 2.0 - 2.7
-			.short	PROCESSOR_VERSION_7400
-			.short	0x0200
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL1fa | pfL2 | pfL2fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7400v2_7
-			.long	CPU_SUBTYPE_POWERPC_7400
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	32
-
-;	7400 (generic)
-
-			.align	2
-			.long	0xFFFF0000		; All revisions
-			.short	PROCESSOR_VERSION_7400
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL1fa | pfL2 | pfL2fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7400
-			.long	CPU_SUBTYPE_POWERPC_7400
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7410 (ver 1.1)
-
-			.align	2
-			.long	0xFFFFFFFF		; Exact match
-			.short	PROCESSOR_VERSION_7400
-			.short	0x1101
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL1fa | pfL2 | pfL2fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7410
-			.long	CPU_SUBTYPE_POWERPC_7400
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7410 (generic)
-
-			.align	2
-			.long	0xFFFF0000		; All other revisions
-			.short	PROCESSOR_VERSION_7410
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfCanDoze | pf32Byte | pfL1fa | pfL2 | pfL2fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7410
-			.long	CPU_SUBTYPE_POWERPC_7400
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7450 (ver 1.xx)
-
-			.align	2
-			.long	0xFFFFFF00		; Just revisions 1.xx
-			.short	PROCESSOR_VERSION_7450
-			.short	0x0100
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa  | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7450
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7450 (2.0)
-
-			.align	2
-			.long	0xFFFFFFFF		; Just revision 2.0
-			.short	PROCESSOR_VERSION_7450
-			.short	0x0200
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7450
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7450 (2.1)
-
-			.align	2
-			.long	0xFFFF0000		; All other revisions
-			.short	PROCESSOR_VERSION_7450
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfWillNap | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init7450
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7455 (1.xx)  Just like 7450 2.0
-
-			.align	2
-			.long	0xFFFFFF00		; Just revisions 1.xx
-			.short	PROCESSOR_VERSION_7455
-			.short	0x0100
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init745X
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7455 (2.0)
-
-			.align	2
-			.long	0xFFFFFFFF		; Just revision 2.0
-			.short	PROCESSOR_VERSION_7455
-			.short	0x0200
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfWillNap | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init745X
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7455 (2.1)
-
-			.align	2
-			.long	0xFFFF0000		; All other revisions
-			.short	PROCESSOR_VERSION_7455
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init745X
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7457
-
-			.align	2
-			.long	0xFFFF0000		; All revisions
-			.short	PROCESSOR_VERSION_7457
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	init745X
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	7447A
-
-			.align	2
-			.long	0xFFFF0000		; All revisions
-			.short	PROCESSOR_VERSION_7447A
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pfNoMSRir | pfNoL2PFNap | pfLClck | pf32Byte | pfL2 | pfL2fa | pfL2i | pfL3 | pfL3fa | pfHasDcba
-			.long   kHasAltivec | kCache32 | kDcbaAvailable | kDataStreamsRecommended | kDataStreamsAvailable | kHasGraphicsOps | kHasStfiwx
-			.long	pmDFS
-			.long	PatchExt32
-			.long	init745X
-			.long	CPU_SUBTYPE_POWERPC_7450
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	36
-
-;	970
-
-			.align	2
-			.long	0xFFFF0000		; All versions so far
-			.short	PROCESSOR_VERSION_970
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pf128Byte | pf64Bit | pfL2 | pfSCOMFixUp
-			.long   kHasAltivec | k64Bit | kCache128 | kDataStreamsAvailable | kDcbtStreamsRecommended | kDcbtStreamsAvailable | kHasGraphicsOps | kHasStfiwx | kHasFsqrt
-			.long	0
-			.long	PatchLwsync
-			.long	init970
-			.long	CPU_SUBTYPE_POWERPC_970
-			.long	128
-			.long	64*1024
-			.long	32*1024
-			.long	128
-			.long	65
-			.long	42
-
-;	970FX
-
-			.align	2
-			.long	0xFFFF0000		; All versions so far
-			.short	PROCESSOR_VERSION_970FX
-			.short	0
-			.long	pfFloat | pfAltivec | pfSMPcap | pfCanSleep | pfCanNap | pf128Byte | pf64Bit | pfL2
-			.long   kHasAltivec | k64Bit | kCache128 | kDataStreamsAvailable | kDcbtStreamsRecommended | kDcbtStreamsAvailable | kHasGraphicsOps | kHasStfiwx | kHasFsqrt
-			.long	pmPowerTune
-			.long	PatchLwsync
-			.long	init970
-			.long	CPU_SUBTYPE_POWERPC_970
-			.long	128
-			.long	64*1024
-			.long	32*1024
-			.long	128
-			.long	65
-			.long	42
-
-
-;	All other processors are not supported
-
-			.align	2
-			.long	0x00000000		; Matches everything
-			.short	0
-			.short	0
-			.long	pfFloat | pf32Byte
-			.long   kCache32 | kHasGraphicsOps | kHasStfiwx
-			.long	0
-			.long	PatchExt32
-			.long	initUnsupported
-			.long	CPU_SUBTYPE_POWERPC_ALL
-			.long	32
-			.long	32*1024
-			.long	32*1024
-			.long	64
-			.long	52
-			.long	32
-
diff --git a/osfmk/ppc/status.c b/osfmk/ppc/status.c
deleted file mode 100644
index 50fee6015..000000000
--- a/osfmk/ppc/status.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-
-#include <kern/thread.h>
-#include <kern/misc_protos.h>
-#include <mach/ppc/thread_status.h>
-#include <ppc/proc_reg.h>
-#include <ppc/cpu_internal.h>
-#include <ppc/exception.h>
-#include <ppc/misc_protos.h>
-#include <ppc/fpu_protos.h>
-#include <ppc/savearea.h>
-#include <ppc/thread.h>
-#include <ppc/Firmware.h>
-
-typedef	unsigned int fixpt_t;	/* XXX <sys/resource.h> not self contained */
-#include <ppc/vmparam.h>	/* USRSTACK, etc. */
-
-#include <vm/vm_map.h>
-
-extern unsigned int killprint;
-extern double FloatInit;
-extern unsigned long QNaNbarbarian[4];
-
-kern_return_t
-thread_userstack(
-    thread_t,
-    int,
-    thread_state_t,
-    unsigned int,
-	mach_vm_offset_t *,
-	int *
-);
-
-kern_return_t
-thread_entrypoint(
-    thread_t,
-    int,
-    thread_state_t,
-    unsigned int,
-    mach_vm_offset_t *
-); 
-
-unsigned int get_msr_exportmask(void);
-unsigned int get_msr_nbits(void);
-unsigned int get_msr_rbits(void);
-void ppc_checkthreadstate(void *, int);
-void thread_set_child(thread_t child, int pid);
-void thread_set_parent(thread_t parent, int pid);
-void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
-		
-/*
- * Maps state flavor to number of words in the state:
- */
-/* __private_extern__ */
-unsigned int _MachineStateCount[] = {
-	/* FLAVOR_LIST */ 0,
-	PPC_THREAD_STATE_COUNT,
-	PPC_FLOAT_STATE_COUNT,
-	PPC_EXCEPTION_STATE_COUNT,
-	PPC_VECTOR_STATE_COUNT,
-	PPC_THREAD_STATE64_COUNT,
-	PPC_EXCEPTION_STATE64_COUNT,
-};
-
-/*
- * thread_getstatus:
- *
- * Get the status of the specified thread.
- */
-
-kern_return_t 
-machine_thread_get_state(
-	thread_t				thread,
-	thread_flavor_t			flavor,
-	thread_state_t			tstate,
-	mach_msg_type_number_t	*count)
-{
-	
-	register struct savearea *sv;						/* Pointer to the context savearea */
-	register savearea_fpu *fsv;
-	register savearea_vec *vsv;
-	struct savearea *genuser;
-	int i, j;
-	unsigned int vrvalidwrk;
-
-	register struct ppc_thread_state *ts;
-	register struct ppc_thread_state64 *xts;
-	register struct ppc_exception_state *es;
-	register struct ppc_exception_state64 *xes;
-	register struct ppc_float_state *fs;
-	register struct ppc_vector_state *vs;
-	
-	genuser = find_user_regs(thread);
-
-	switch (flavor) {
-		
-		case THREAD_STATE_FLAVOR_LIST:
-			
-			if (*count < 6)  {
-				return (KERN_INVALID_ARGUMENT);
-			}
-		
-			tstate[0] = PPC_THREAD_STATE;
-			tstate[1] = PPC_FLOAT_STATE;
-			tstate[2] = PPC_EXCEPTION_STATE;
-			tstate[3] = PPC_VECTOR_STATE;
-			tstate[4] = PPC_THREAD_STATE64;
-			tstate[5] = PPC_EXCEPTION_STATE64;
-			*count = 6;
-		
-			return KERN_SUCCESS;
-	
-		case PPC_THREAD_STATE:
-	
-			if (*count < PPC_THREAD_STATE_COUNT) {			/* Is the count ok? */
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			ts = (struct ppc_thread_state *) tstate;
-
-			sv = genuser;									/* Copy this over */
-			
-			if(sv) {										/* Is there a save area yet? */
-				ts->r0	= (unsigned int)sv->save_r0;
-				ts->r1	= (unsigned int)sv->save_r1;
-				ts->r2	= (unsigned int)sv->save_r2;
-				ts->r3	= (unsigned int)sv->save_r3;
-				ts->r4	= (unsigned int)sv->save_r4;
-				ts->r5	= (unsigned int)sv->save_r5;
-				ts->r6	= (unsigned int)sv->save_r6;
-				ts->r7	= (unsigned int)sv->save_r7;
-				ts->r8	= (unsigned int)sv->save_r8;
-				ts->r9	= (unsigned int)sv->save_r9;
-				ts->r10	= (unsigned int)sv->save_r10;
-				ts->r11	= (unsigned int)sv->save_r11;
-				ts->r12	= (unsigned int)sv->save_r12;
-				ts->r13	= (unsigned int)sv->save_r13;
-				ts->r14	= (unsigned int)sv->save_r14;
-				ts->r15	= (unsigned int)sv->save_r15;
-				ts->r16	= (unsigned int)sv->save_r16;
-				ts->r17	= (unsigned int)sv->save_r17;
-				ts->r18	= (unsigned int)sv->save_r18;
-				ts->r19	= (unsigned int)sv->save_r19;
-				ts->r20	= (unsigned int)sv->save_r20;
-				ts->r21	= (unsigned int)sv->save_r21;
-				ts->r22	= (unsigned int)sv->save_r22;
-				ts->r23	= (unsigned int)sv->save_r23;
-				ts->r24	= (unsigned int)sv->save_r24;
-				ts->r25	= (unsigned int)sv->save_r25;
-				ts->r26	= (unsigned int)sv->save_r26;
-				ts->r27	= (unsigned int)sv->save_r27;
-				ts->r28	= (unsigned int)sv->save_r28;
-				ts->r29	= (unsigned int)sv->save_r29;
-				ts->r30	= (unsigned int)sv->save_r30;
-				ts->r31	= (unsigned int)sv->save_r31;
-				ts->cr	= (unsigned int)sv->save_cr;
-				ts->xer	= (unsigned int)sv->save_xer;
-				ts->lr	= (unsigned int)sv->save_lr;
-				ts->ctr	= (unsigned int)sv->save_ctr;
-				ts->srr0 = (unsigned int)sv->save_srr0;
-				ts->srr1 = (unsigned int)sv->save_srr1;
-				ts->mq	= 0;							/* MQ register (601 only) */
-				ts->vrsave	= (unsigned int)sv->save_vrsave;			/* VRSAVE register (Altivec only) */
-			}
-			else {										/* No user state yet. Save seemingly random values. */
-						
-				for(i=0; i < 32; i+=2) {				/* Fill up with defaults */
-					((unsigned int *)&ts->r0)[i] = ((unsigned int *)&FloatInit)[0];
-					((unsigned int *)&ts->r0)[i+1] = ((unsigned int *)&FloatInit)[1];
-				}
-				ts->cr	= 0;
-				ts->xer	= 0;
-				ts->lr	= ((unsigned int *)&FloatInit)[0];
-				ts->ctr	= ((unsigned int *)&FloatInit)[1];
-				ts->srr0	= ((unsigned int *)&FloatInit)[0];
-				ts->srr1 = MSR_EXPORT_MASK_SET;
-				ts->mq	= 0;
-				ts->vrsave	= 0;						/* VRSAVE register (Altivec only) */
-			}
-		
-			*count = PPC_THREAD_STATE_COUNT;			/* Pass back the amount we actually copied */
-			return KERN_SUCCESS;
-	
-	
-		case PPC_THREAD_STATE64:
-	
-			if (*count < PPC_THREAD_STATE64_COUNT) {	/* Is the count ok? */
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			xts = (struct ppc_thread_state64 *) tstate;
-
-			sv = genuser;								/* Copy this over */
-			
-			if(sv) {									/* Is there a save area yet? */
-				xts->r0		= sv->save_r0;
-				xts->r1		= sv->save_r1;
-				xts->r2		= sv->save_r2;
-				xts->r3		= sv->save_r3;
-				xts->r4		= sv->save_r4;
-				xts->r5		= sv->save_r5;
-				xts->r6		= sv->save_r6;
-				xts->r7		= sv->save_r7;
-				xts->r8		= sv->save_r8;
-				xts->r9		= sv->save_r9;
-				xts->r10	= sv->save_r10;
-				xts->r11	= sv->save_r11;
-				xts->r12	= sv->save_r12;
-				xts->r13	= sv->save_r13;
-				xts->r14	= sv->save_r14;
-				xts->r15	= sv->save_r15;
-				xts->r16	= sv->save_r16;
-				xts->r17	= sv->save_r17;
-				xts->r18	= sv->save_r18;
-				xts->r19	= sv->save_r19;
-				xts->r20	= sv->save_r20;
-				xts->r21	= sv->save_r21;
-				xts->r22	= sv->save_r22;
-				xts->r23	= sv->save_r23;
-				xts->r24	= sv->save_r24;
-				xts->r25	= sv->save_r25;
-				xts->r26	= sv->save_r26;
-				xts->r27	= sv->save_r27;
-				xts->r28	= sv->save_r28;
-				xts->r29	= sv->save_r29;
-				xts->r30	= sv->save_r30;
-				xts->r31	= sv->save_r31;
-				xts->cr		= sv->save_cr;
-				xts->xer	= sv->save_xer;
-				xts->lr		= sv->save_lr;
-				xts->ctr	= sv->save_ctr;
-				xts->srr0 	= sv->save_srr0;
-				xts->srr1 	= sv->save_srr1;
-				xts->vrsave	= sv->save_vrsave;			/* VRSAVE register (Altivec only) */
-			}
-			else {										/* No user state yet. Save seemingly random values. */
-						
-				for(i=0; i < 32; i++) {					/* Fill up with defaults */
-					((unsigned long long *)&xts->r0)[i] = ((unsigned long long *)&FloatInit)[0];
-				}
-				xts->cr		= 0;
-				xts->xer	= 0;
-				xts->lr		= ((unsigned long long *)&FloatInit)[0];
-				xts->ctr	= ((unsigned long long *)&FloatInit)[0];
-				xts->srr0	= ((unsigned long long *)&FloatInit)[0];
-				xts->srr1 	= MSR_EXPORT_MASK_SET;
-				if(task_has_64BitAddr(thread->task)) 
-					xts->srr1 |= (uint64_t)MASK32(MSR_SF) << 32;	/* If 64-bit task, force 64-bit mode */
-				xts->vrsave	= 0;						/* VRSAVE register (Altivec only) */
-			}
-		
-			*count = PPC_THREAD_STATE64_COUNT;			/* Pass back the amount we actually copied */
-			return KERN_SUCCESS;
-	
-		case PPC_EXCEPTION_STATE:
-	
-			if (*count < PPC_EXCEPTION_STATE_COUNT) {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			es = (struct ppc_exception_state *) tstate;
-			sv = genuser;								/* Copy this over */
-		
-			if(sv) {									/* See if valid state yet */
-				es->dar = (unsigned int)sv->save_dar;
-				es->dsisr = sv->save_dsisr;
-				es->exception = sv->save_exception;
-			}
-			else {										/* Nope, not yet */
-				es->dar = 0;
-				es->dsisr = 0;
-				es->exception = ((unsigned int *)&FloatInit)[0];
-			}
-		
-			*count = PPC_EXCEPTION_STATE_COUNT;
-			return KERN_SUCCESS;
-	
-		case PPC_EXCEPTION_STATE64:
-	
-			if (*count < PPC_EXCEPTION_STATE64_COUNT) {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			xes = (struct ppc_exception_state64 *) tstate;
-			sv = genuser;								/* Copy this over */
-		
-			if(sv) {									/* See if valid state yet */
-				xes->dar = sv->save_dar;
-				xes->dsisr = sv->save_dsisr;
-				xes->exception = sv->save_exception;
-			}
-			else {										/* Nope, not yet */
-				xes->dar = 0;
-				xes->dsisr = 0;
-				xes->exception = ((unsigned int *)&FloatInit)[0];
-			}
-		
-			*count = PPC_EXCEPTION_STATE64_COUNT;
-			return KERN_SUCCESS;
-	
-		case PPC_FLOAT_STATE: 
-		
-			if (*count < PPC_FLOAT_STATE_COUNT)  {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			fpu_save(thread->machine.curctx);				/* Just in case it's live, save it */
-		
-			fs = (struct ppc_float_state *) tstate;		/* Point to destination */
-			
-			fsv = find_user_fpu(thread);				/* Get the user's fpu savearea */
-			
-			if(fsv) {									/* See if we have any */
-				bcopy((char *)&fsv->save_fp0, (char *)fs, 32*8); /* 32 registers  */
-				fs->fpscr_pad	= 0;					/* Be clean and tidy */
-				if(genuser) fs->fpscr = genuser->save_fpscr;	/* Set the fpscr value to general */
-				else fs->fpscr = 0;						/* If no user, initialize this */
-			}
-			else {										/* No floating point yet */
-			
-				for(i=0; i < 32; i++) {					/* Initialize floating points */
-					fs->fpregs[i] = FloatInit;			/* Initial value */
-				}
-				fs->fpscr_pad	= 0;					/* Initial value */
-				fs->fpscr 		= 0;					/* Initial value */
-			}
-			
-			*count = PPC_FLOAT_STATE_COUNT;
-			
-			return KERN_SUCCESS;
-	
-		case PPC_VECTOR_STATE: 
-			
-			if (*count < PPC_VECTOR_STATE_COUNT)  {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			vec_save(thread->machine.curctx);				/* Just in case it's live, save it */
-		
-			vs = (struct ppc_vector_state *) tstate;	/* Point to destination */
-			
-			vsv = find_user_vec(thread);				/* Find the vector savearea */
-			
-			if(vsv) {									/* See if we have any */
-				
-				vrvalidwrk = vsv->save_vrvalid;			/* Get the valid flags */
-				vs->save_vrvalid = vsv->save_vrvalid;	/* Set the valid flags */
-				if(genuser) for(j=0; j < 4; j++) vs->save_vscr[j] = genuser->save_vscr[j];	/* Set value for vscr */
-				else {
-					vs->save_vscr[0] = 0;				/* Set an initial value if no general user yet */
-					vs->save_vscr[1] = 0;
-					vs->save_vscr[2] = 0;
-					vs->save_vscr[3] = 0x00010000;		/* Always start with Java mode off */
-				}
-				for(i=0; i < 32; i++) {					/* Copy the saved registers and invalidate the others */
-					for(j=0; j < 4; j++) {
-						if(vrvalidwrk & 0x80000000) (vs->save_vr)[i][j] = 
-							((unsigned int *)&(vsv->save_vr0))[(i * 4) + j];	/* We have this register saved */
-						else vs->save_vr[i][j] = QNaNbarbarian[j];	/* Set invalid value */
-					}
-					vrvalidwrk = vrvalidwrk << 1;		/* Shift over to the next */
-				}
-			}
-			else {										/* No vector yet */
-			
-				for(i=0; i < 32; i++) {					/* Initialize vector registers */
-					for(j=0; j < 4; j++) vs->save_vr[i][j] = QNaNbarbarian[j];		/* Initial value */
-				}
-				
-				if(genuser) for(j=0; j < 4; j++) vs->save_vscr[j] = genuser->save_vscr[j];	/* Set value for vscr */
-				else {
-					vs->save_vscr[0] = 0;				/* Set an initial value if no general user yet */
-					vs->save_vscr[1] = 0;
-					vs->save_vscr[2] = 0;
-					vs->save_vscr[3] = 0x00010000;		/* Always start with Java mode off */
-				}
-				vs->save_vrvalid = 0;					/* Clear the valid flags */
-			}
-			
-			for (i=0; i < 4; i++) vs->save_pad5[i] = 0;	/* Clear cruft */
-			for (i=0; i < 7; i++) vs->save_pad6[i] = 0;	/* Clear cruft */
-			
-			*count = PPC_VECTOR_STATE_COUNT;
-			return KERN_SUCCESS;
-	
-		default:
-			return KERN_INVALID_ARGUMENT;
-	}
-}
-/* Close cousin of machine_thread_get_state(). 
- * This function is currently incomplete since we don't really need vector
- * or FP for the core dump (the save area can be accessed directly if the 
- * user is so inclined). Also the function name is something of a misnomer,
- * see the comment above find_kern_regs(). 
- */
-
-kern_return_t 
-machine_thread_get_kern_state(
-	thread_t				thread,
-	thread_flavor_t			flavor,
-	thread_state_t			tstate,
-	mach_msg_type_number_t	*count)
-{
-	
-	register struct savearea *sv;						/* Pointer to the context savearea */
-	struct savearea *genkern;
-	int i;
-
-	register struct ppc_thread_state *ts;
-	register struct ppc_thread_state64 *xts;
-	register struct ppc_exception_state *es;
-	register struct ppc_exception_state64 *xes;
-	
-	genkern = find_kern_regs(thread);
-
-	switch (flavor) {
-		
-		case THREAD_STATE_FLAVOR_LIST:
-			
-			if (*count < 6)  {
-				return (KERN_INVALID_ARGUMENT);
-			}
-		
-			tstate[0] = PPC_THREAD_STATE;
-			tstate[1] = PPC_FLOAT_STATE;
-			tstate[2] = PPC_EXCEPTION_STATE;
-			tstate[3] = PPC_VECTOR_STATE;
-			tstate[4] = PPC_THREAD_STATE64;
-			tstate[5] = PPC_EXCEPTION_STATE64;
-			*count = 6;
-		
-			return KERN_SUCCESS;
-	
-		case PPC_THREAD_STATE:
-	
-			if (*count < PPC_THREAD_STATE_COUNT) {			/* Is the count ok? */
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			ts = (struct ppc_thread_state *) tstate;
-
-			sv = genkern;									/* Copy this over */
-			
-			if(sv) {										/* Is there a save area yet? */
-				ts->r0	= (unsigned int)sv->save_r0;
-				ts->r1	= (unsigned int)sv->save_r1;
-				ts->r2	= (unsigned int)sv->save_r2;
-				ts->r3	= (unsigned int)sv->save_r3;
-				ts->r4	= (unsigned int)sv->save_r4;
-				ts->r5	= (unsigned int)sv->save_r5;
-				ts->r6	= (unsigned int)sv->save_r6;
-				ts->r7	= (unsigned int)sv->save_r7;
-				ts->r8	= (unsigned int)sv->save_r8;
-				ts->r9	= (unsigned int)sv->save_r9;
-				ts->r10	= (unsigned int)sv->save_r10;
-				ts->r11	= (unsigned int)sv->save_r11;
-				ts->r12	= (unsigned int)sv->save_r12;
-				ts->r13	= (unsigned int)sv->save_r13;
-				ts->r14	= (unsigned int)sv->save_r14;
-				ts->r15	= (unsigned int)sv->save_r15;
-				ts->r16	= (unsigned int)sv->save_r16;
-				ts->r17	= (unsigned int)sv->save_r17;
-				ts->r18	= (unsigned int)sv->save_r18;
-				ts->r19	= (unsigned int)sv->save_r19;
-				ts->r20	= (unsigned int)sv->save_r20;
-				ts->r21	= (unsigned int)sv->save_r21;
-				ts->r22	= (unsigned int)sv->save_r22;
-				ts->r23	= (unsigned int)sv->save_r23;
-				ts->r24	= (unsigned int)sv->save_r24;
-				ts->r25	= (unsigned int)sv->save_r25;
-				ts->r26	= (unsigned int)sv->save_r26;
-				ts->r27	= (unsigned int)sv->save_r27;
-				ts->r28	= (unsigned int)sv->save_r28;
-				ts->r29	= (unsigned int)sv->save_r29;
-				ts->r30	= (unsigned int)sv->save_r30;
-				ts->r31	= (unsigned int)sv->save_r31;
-				ts->cr	= (unsigned int)sv->save_cr;
-				ts->xer	= (unsigned int)sv->save_xer;
-				ts->lr	= (unsigned int)sv->save_lr;
-				ts->ctr	= (unsigned int)sv->save_ctr;
-				ts->srr0 = (unsigned int)sv->save_srr0;
-				ts->srr1 = (unsigned int)sv->save_srr1;
-				ts->mq	= 0;							/* MQ register (601 only) */
-				ts->vrsave	= (unsigned int)sv->save_vrsave;			/* VRSAVE register (Altivec only) */
-			}
-			else {										/* No state yet. Save seemingly random values. */
-						
-				for(i=0; i < 32; i+=2) {				/* Fill up with defaults */
-					((unsigned int *)&ts->r0)[i] = ((unsigned int *)&FloatInit)[0];
-					((unsigned int *)&ts->r0)[i+1] = ((unsigned int *)&FloatInit)[1];
-				}
-				ts->cr	= 0;
-				ts->xer	= 0;
-				ts->lr	= ((unsigned int *)&FloatInit)[0];
-				ts->ctr	= ((unsigned int *)&FloatInit)[1];
-				ts->srr0	= ((unsigned int *)&FloatInit)[0];
-				ts->srr1 = MSR_EXPORT_MASK_SET;
-				ts->mq	= 0;
-				ts->vrsave	= 0;						/* VRSAVE register (Altivec only) */
-			}
-		
-			*count = PPC_THREAD_STATE_COUNT;			/* Pass back the amount we actually copied */
-			return KERN_SUCCESS;
-	
-	
-		case PPC_THREAD_STATE64:
-	
-			if (*count < PPC_THREAD_STATE64_COUNT) {	/* Is the count ok? */
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			xts = (struct ppc_thread_state64 *) tstate;
-
-			sv = genkern;								/* Copy this over */
-			
-			if(sv) {									/* Is there a save area yet? */
-				xts->r0		= sv->save_r0;
-				xts->r1		= sv->save_r1;
-				xts->r2		= sv->save_r2;
-				xts->r3		= sv->save_r3;
-				xts->r4		= sv->save_r4;
-				xts->r5		= sv->save_r5;
-				xts->r6		= sv->save_r6;
-				xts->r7		= sv->save_r7;
-				xts->r8		= sv->save_r8;
-				xts->r9		= sv->save_r9;
-				xts->r10	= sv->save_r10;
-				xts->r11	= sv->save_r11;
-				xts->r12	= sv->save_r12;
-				xts->r13	= sv->save_r13;
-				xts->r14	= sv->save_r14;
-				xts->r15	= sv->save_r15;
-				xts->r16	= sv->save_r16;
-				xts->r17	= sv->save_r17;
-				xts->r18	= sv->save_r18;
-				xts->r19	= sv->save_r19;
-				xts->r20	= sv->save_r20;
-				xts->r21	= sv->save_r21;
-				xts->r22	= sv->save_r22;
-				xts->r23	= sv->save_r23;
-				xts->r24	= sv->save_r24;
-				xts->r25	= sv->save_r25;
-				xts->r26	= sv->save_r26;
-				xts->r27	= sv->save_r27;
-				xts->r28	= sv->save_r28;
-				xts->r29	= sv->save_r29;
-				xts->r30	= sv->save_r30;
-				xts->r31	= sv->save_r31;
-				xts->cr		= sv->save_cr;
-				xts->xer	= sv->save_xer;
-				xts->lr		= sv->save_lr;
-				xts->ctr	= sv->save_ctr;
-				xts->srr0 	= sv->save_srr0;
-				xts->srr1 	= sv->save_srr1;
-				xts->vrsave	= sv->save_vrsave;			/* VRSAVE register (Altivec only) */
-			}
-			else {										/* No user state yet. Save seemingly random values. */
-						
-				for(i=0; i < 32; i++) {					/* Fill up with defaults */
-					((unsigned long long *)&xts->r0)[i] = ((unsigned long long *)&FloatInit)[0];
-				}
-				xts->cr		= 0;
-				xts->xer	= 0;
-				xts->lr		= ((unsigned long long *)&FloatInit)[0];
-				xts->ctr	= ((unsigned long long *)&FloatInit)[0];
-				xts->srr0	= ((unsigned long long *)&FloatInit)[0];
-				xts->srr1 	= MSR_EXPORT_MASK_SET;
-				xts->vrsave	= 0;						/* VRSAVE register (Altivec only) */
-			}
-		
-			*count = PPC_THREAD_STATE64_COUNT;			/* Pass back the amount we actually copied */
-			return KERN_SUCCESS;
-	
-		case PPC_EXCEPTION_STATE:
-	
-			if (*count < PPC_EXCEPTION_STATE_COUNT) {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			es = (struct ppc_exception_state *) tstate;
-			sv = genkern;								/* Copy this over */
-		
-			if(sv) {									/* See if valid state yet */
-				es->dar = (unsigned int)sv->save_dar;
-				es->dsisr = sv->save_dsisr;
-				es->exception = sv->save_exception;
-			}
-			else {										/* Nope, not yet */
-				es->dar = 0;
-				es->dsisr = 0;
-				es->exception = ((unsigned int *)&FloatInit)[0];
-			}
-		
-			*count = PPC_EXCEPTION_STATE_COUNT;
-			return KERN_SUCCESS;
-	
-		case PPC_EXCEPTION_STATE64:
-	
-			if (*count < PPC_EXCEPTION_STATE64_COUNT) {
-				return KERN_INVALID_ARGUMENT;
-			}
-		
-			xes = (struct ppc_exception_state64 *) tstate;
-			sv = genkern;								/* Copy this over */
-		
-			if(sv) {									/* See if valid state yet */
-				xes->dar = sv->save_dar;
-				xes->dsisr = sv->save_dsisr;
-				xes->exception = sv->save_exception;
-			}
-			else {										/* Nope, not yet */
-				xes->dar = 0;
-				xes->dsisr = 0;
-				xes->exception = ((unsigned int *)&FloatInit)[0];
-			}
-		
-			*count = PPC_EXCEPTION_STATE64_COUNT;
-			return KERN_SUCCESS;
-	
-		default:
-			return KERN_INVALID_ARGUMENT;
-	}
-}
-
-
-/*
- * thread_setstatus:
- *
- * Set the status of the specified thread.
- */
-kern_return_t 
-machine_thread_set_state(
-	thread_t				thread,
-	thread_flavor_t			flavor,
-	thread_state_t			tstate,
-	mach_msg_type_number_t	count)
-{
-  
-  	struct savearea		*genuser;
-  	savearea_fpu	*fsv, *fsvn, *fsvo;
-  	savearea_vec	*vsv, *vsvn, *vsvo;
-	unsigned int	i;
-	unsigned int	clgn;
-	register struct ppc_thread_state *ts;
-	register struct ppc_thread_state64 *xts;
-	register struct ppc_exception_state *es;
-	register struct ppc_exception_state *xes;
-	register struct ppc_float_state *fs;
-	register struct ppc_vector_state *vs;
-	
-//	dbgTrace((unsigned int)thr_act, (unsigned int)0 /*sv: was never set*/, flavor);	/* (TEST/DEBUG) */
-
-	clgn = count;											/* Get the count */
-	
-	switch (flavor) {										/* Validate the count before we do anything else */
-		case PPC_THREAD_STATE:
-			
-			if (clgn < PPC_THREAD_STATE_COUNT)  {			/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			break;
-	
-		case PPC_THREAD_STATE64:
-			
-			if (clgn < PPC_THREAD_STATE64_COUNT)  {			/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			break;
-			
-		case PPC_EXCEPTION_STATE:
-			
-			if (clgn < PPC_EXCEPTION_STATE_COUNT)  {		/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			
-		case PPC_EXCEPTION_STATE64:
-			
-			if (clgn < PPC_EXCEPTION_STATE64_COUNT)  {		/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			
-			break;
-			
-		case PPC_FLOAT_STATE:
-			
-			if (clgn < PPC_FLOAT_STATE_COUNT)  {			/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			
-			break;
-			
-
-		case PPC_VECTOR_STATE:
-			
-			if (clgn < PPC_VECTOR_STATE_COUNT)  {			/* Is it too short? */
-				return KERN_INVALID_ARGUMENT;				/* Yeah, just leave... */
-			}
-			
-			break;
-			
-		default:
-			return KERN_INVALID_ARGUMENT;
-	}
-	
-	genuser = get_user_regs(thread);						/* Find or allocate and initialize one */
-
-	switch (flavor) {
-		
-		case PPC_THREAD_STATE:
-				
-			ts = (struct ppc_thread_state *)tstate;
-
-			genuser->save_r0	= (uint64_t)ts->r0;
-			genuser->save_r1	= (uint64_t)ts->r1;
-			genuser->save_r2	= (uint64_t)ts->r2;
-			genuser->save_r3	= (uint64_t)ts->r3;
-			genuser->save_r4	= (uint64_t)ts->r4;
-			genuser->save_r5	= (uint64_t)ts->r5;
-			genuser->save_r6	= (uint64_t)ts->r6;
-			genuser->save_r7	= (uint64_t)ts->r7;
-			genuser->save_r8	= (uint64_t)ts->r8;
-			genuser->save_r9	= (uint64_t)ts->r9;
-			genuser->save_r10	= (uint64_t)ts->r10;
-			genuser->save_r11	= (uint64_t)ts->r11;
-			genuser->save_r12	= (uint64_t)ts->r12;
-			genuser->save_r13	= (uint64_t)ts->r13;
-			genuser->save_r14	= (uint64_t)ts->r14;
-			genuser->save_r15	= (uint64_t)ts->r15;
-			genuser->save_r16	= (uint64_t)ts->r16;
-			genuser->save_r17	= (uint64_t)ts->r17;
-			genuser->save_r18	= (uint64_t)ts->r18;
-			genuser->save_r19	= (uint64_t)ts->r19;
-			genuser->save_r20	= (uint64_t)ts->r20;
-			genuser->save_r21	= (uint64_t)ts->r21;
-			genuser->save_r22	= (uint64_t)ts->r22;
-			genuser->save_r23	= (uint64_t)ts->r23;
-			genuser->save_r24	= (uint64_t)ts->r24;
-			genuser->save_r25	= (uint64_t)ts->r25;
-			genuser->save_r26	= (uint64_t)ts->r26;
-			genuser->save_r27	= (uint64_t)ts->r27;
-			genuser->save_r28	= (uint64_t)ts->r28;
-			genuser->save_r29	= (uint64_t)ts->r29;
-			genuser->save_r30	= (uint64_t)ts->r30;
-			genuser->save_r31	= (uint64_t)ts->r31;
-		
-			genuser->save_cr	= ts->cr;
-			genuser->save_xer	= (uint64_t)ts->xer;
-			genuser->save_lr	= (uint64_t)ts->lr;
-			genuser->save_ctr	= (uint64_t)ts->ctr;
-			genuser->save_srr0	= (uint64_t)ts->srr0;
-			genuser->save_vrsave	= ts->vrsave;					/* VRSAVE register (Altivec only) */
-
-			genuser->save_srr1 = MSR_PREPARE_FOR_IMPORT(genuser->save_srr1, ts->srr1);	/* Set the bits we can change */
-
-			genuser->save_srr1 |= MSR_EXPORT_MASK_SET;
-		
-			genuser->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC));	/* Make sure we don't enable the floating point unit */
-			
-			if(task_has_64BitAddr(thread->task)) 
-				genuser->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32;	/* If 64-bit task, force 64-bit mode */
-			else
-				genuser->save_srr1 &= ~((uint64_t)MASK32(MSR_SF) << 32);	/* else 32-bit mode */
-		
-			return KERN_SUCCESS;
-
-
-		case PPC_THREAD_STATE64:
-				
-			xts = (struct ppc_thread_state64 *)tstate;
-
-			genuser->save_r0	= xts->r0;
-			genuser->save_r1	= xts->r1;
-			genuser->save_r2	= xts->r2;
-			genuser->save_r3	= xts->r3;
-			genuser->save_r4	= xts->r4;
-			genuser->save_r5	= xts->r5;
-			genuser->save_r6	= xts->r6;
-			genuser->save_r7	= xts->r7;
-			genuser->save_r8	= xts->r8;
-			genuser->save_r9	= xts->r9;
-			genuser->save_r10	= xts->r10;
-			genuser->save_r11	= xts->r11;
-			genuser->save_r12	= xts->r12;
-			genuser->save_r13	= xts->r13;
-			genuser->save_r14	= xts->r14;
-			genuser->save_r15	= xts->r15;
-			genuser->save_r16	= xts->r16;
-			genuser->save_r17	= xts->r17;
-			genuser->save_r18	= xts->r18;
-			genuser->save_r19	= xts->r19;
-			genuser->save_r20	= xts->r20;
-			genuser->save_r21	= xts->r21;
-			genuser->save_r22	= xts->r22;
-			genuser->save_r23	= xts->r23;
-			genuser->save_r24	= xts->r24;
-			genuser->save_r25	= xts->r25;
-			genuser->save_r26	= xts->r26;
-			genuser->save_r27	= xts->r27;
-			genuser->save_r28	= xts->r28;
-			genuser->save_r29	= xts->r29;
-			genuser->save_r30	= xts->r30;
-			genuser->save_r31	= xts->r31;
-		
-			genuser->save_cr	= xts->cr;
-			genuser->save_xer	= xts->xer;
-			genuser->save_lr	= xts->lr;
-			genuser->save_ctr	= xts->ctr;
-			genuser->save_srr0	= xts->srr0;
-			genuser->save_vrsave	= xts->vrsave;					/* VRSAVE register (Altivec only) */
-
-			genuser->save_srr1 = MSR_PREPARE_FOR_IMPORT(genuser->save_srr1, xts->srr1);	/* Set the bits we can change */
-
-			genuser->save_srr1 |= MSR_EXPORT_MASK_SET;
-		
-			genuser->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC));	/* Make sure we don't enable the floating point unit */
-			
-			if(task_has_64BitAddr(thread->task)) 
-				genuser->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32;	/* If 64-bit task, force 64-bit mode */
-			else
-				genuser->save_srr1 &= ~((uint64_t)MASK32(MSR_SF) << 32);	/* else 32-bit mode */
-		
-			return KERN_SUCCESS;
-				
-				
-		case PPC_EXCEPTION_STATE:
-			
-			es = (struct ppc_exception_state *) tstate;
-		
-			genuser->save_dar = (uint64_t)es->dar;
-			genuser->save_dsisr = es->dsisr;
-			genuser->save_exception = es->exception;
-
-			return KERN_SUCCESS;
-	
-/*
- *		It's pretty worthless to try to change this stuff, but we'll do it anyway.
- */
- 
-		case PPC_EXCEPTION_STATE64:
-			
-			xes = (struct ppc_exception_state *) tstate;
-		
-			genuser->save_dar 	= xes->dar;
-			genuser->save_dsisr = xes->dsisr;
-			genuser->save_exception = xes->exception;
-
-			return KERN_SUCCESS;
-	
-		case PPC_FLOAT_STATE:
-
-			toss_live_fpu(thread->machine.curctx);			/* Toss my floating point if live anywhere */
-			
-			fsv = find_user_fpu(thread);					/* Get the user's floating point context */
-		
-			if(!fsv) {										/* Do we have one yet? */
-				fsv = (savearea_fpu *)save_alloc();			/* If we still don't have one, get a new one */
-				fsv->save_hdr.save_flags = (fsv->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft);	/* Mark as in use as float */
-				fsv->save_hdr.save_act = thread;
-				fsv->save_hdr.save_prev = 0;				/* Mark no more */
-				fsv->save_hdr.save_level = 0;				/* Mark user state */
-				
-				if(!thread->machine.curctx->FPUsave) thread->machine.curctx->FPUsave = fsv;	/* If no floating point, chain us first */
-				else {
-				
-					fsvn = fsvo = thread->machine.curctx->FPUsave;	/* Remember first one */
-					
-					while (fsvn) {							/* Go until we hit the end */
-						fsvo = fsvn;						/* Remember the previous one */
-						fsvn = CAST_DOWN(savearea_fpu *, fsvo->save_hdr.save_prev);	/* Skip on to the next */
-					}
-					
-					fsvo->save_hdr.save_prev = (addr64_t)((uintptr_t)fsv);		/* Queue us on in */
-				}
-				
-			}
-			
-			fs = (struct ppc_float_state *) tstate;			/* Point to source */
-
-		
-			bcopy((char *)fs, (char *)&fsv->save_fp0, 32*8);	/* Move in the 32 registers */
-			
-			genuser->save_fpscr = fs->fpscr;				/* Copy the fpscr value to normal */	
-			
-			return KERN_SUCCESS;
-			
-	
-		case PPC_VECTOR_STATE:
-
-			toss_live_vec(thread->machine.curctx);			/* Toss my vector if live anywhere */
-			
-			vsv = find_user_vec(thread);					/* Get the user's vector context */
-		
-			if(!vsv) {										/* Do we have one yet? */
-				vsv = (savearea_vec *)save_alloc();			/* If we still don't have one, get a new one */
-				vsv->save_hdr.save_flags = (vsv->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft);	/* Mark as in use as vector */
-				vsv->save_hdr.save_act = thread;
-				vsv->save_hdr.save_prev = 0;				/* Mark no more */
-				vsv->save_hdr.save_level = 0;				/* Mark user state */
-				
-				if(!thread->machine.curctx->VMXsave) thread->machine.curctx->VMXsave = vsv;	/* If no vector, chain us first */
-				else {
-				
-					vsvn = vsvo = thread->machine.curctx->VMXsave;	/* Remember first one */
-					
-					while (vsvn) {							/* Go until we hit the end */
-						vsvo = vsvn;						/* Remember the previous one */
-						vsvn = CAST_DOWN(savearea_vec *, vsvo->save_hdr.save_prev);	/* Skip on to the next */
-					}
-					
-					vsvo->save_hdr.save_prev = (addr64_t)((uintptr_t)vsv);	/* Queue us on in */
-				}
-				
-			}
-			
-			vs = (struct ppc_vector_state *) tstate;		/* Point to source */
-		
-			bcopy((char *)vs, (char *)&vsv->save_vr0, 32*16);	/* 32 registers plus status and validity and pad */
-			vsv->save_vrvalid = vs->save_vrvalid;			/* Set validity bits */
-			
-			
-			for(i = 0; i < 4; i++) genuser->save_vscr[i] = vs->save_vscr[i];	/* Set value for vscr */
-		
-			return KERN_SUCCESS;
-			
-		
-		default:
-			return KERN_INVALID_ARGUMENT;
-    }
-}
-
-
-void
-thread_set_wq_state64(thread_t thread, thread_state_t tstate)
-{
-        struct ppc_thread_state64 *ts;
-  	struct savearea		*genuser;
-	thread_t curth = current_thread();
-
-	genuser = get_user_regs(thread);					/* Find or allocate and initialize one */
-	ts = (struct ppc_thread_state64 *)tstate;
-
-	if (curth != thread)
-	        thread_lock(thread);
-
-	genuser->save_r1	= ts->r1;
-	genuser->save_r3	= ts->r3;
-	genuser->save_r4	= ts->r4;
-	genuser->save_r5	= ts->r5;
-	genuser->save_r6	= ts->r6;
-	genuser->save_r7	= ts->r7;
-	genuser->save_r8	= ts->r8;
-	genuser->save_srr0	= ts->srr0;
-
-        genuser->save_srr1 = (uint64_t)MSR_EXPORT_MASK_SET;
-
-        if (task_has_64BitAddr(thread->task))
-	        genuser->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32;		/* If 64-bit task, force 64-bit mode */
-
-	if (curth != thread)
-	        thread_unlock(thread);
-}
-
-
-/*
- * This is where registers that are not normally specified by the mach-o
- * file on an execve should be nullified, perhaps to avoid a covert channel.
- * We've never bothered to clear FPRs or VRs, but it is important to clear
- * the FPSCR, which is kept in the general state but not set by the general
- * flavor (ie, PPC_THREAD_STATE or PPC_THREAD_STATE64.)
- */
-kern_return_t
-machine_thread_state_initialize(
-	thread_t thread)
-{
-  	struct savearea		*sv;
-	
-	sv = get_user_regs(thread);						/* Find or allocate and initialize one */
-
-	sv->save_fpscr = 0;								/* Clear all floating point exceptions */
-	sv->save_vrsave = 0;							/* Set the vector save state */
-	sv->save_vscr[0] = 0x00000000;					
-	sv->save_vscr[1] = 0x00000000;					
-	sv->save_vscr[2] = 0x00000000;					
-	sv->save_vscr[3] = 0x00010000;					/* Disable java mode and clear saturated */
-
-    return  KERN_SUCCESS;
-}
-
-
-/*
- *		Duplicates the context of one thread into a new one.
- *		The new thread is assumed to be new and have no user state contexts except maybe a general one.
- *		We also assume that the old thread can't be running anywhere.
- *
- *		We're only going to be duplicating user context here.  That means that we will have to 
- *		eliminate any floating point or vector kernel contexts and carry across the user state ones.
- */
-
-kern_return_t
-machine_thread_dup(
-	thread_t		self,
-	thread_t		target)
-{
-  	struct savearea		*sv, *osv; 
-  	savearea_fpu	*fsv, *fsvn;
-  	savearea_vec	*vsv, *vsvn;
-	
-	fpu_save(self->machine.curctx);						/* Make certain floating point state is all saved */
-	vec_save(self->machine.curctx);						/* Make certain the vector state is all saved */
-	
-	sv = get_user_regs(target);						/* Allocate and initialze context in the new activation */
-	
-	osv = find_user_regs(self);						/* Find the original context */
-	if(!osv)
-		return (KERN_FAILURE);
-	
-	bcopy((char *)((unsigned int)osv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-		(char *)((unsigned int)sv + sizeof(savearea_comm)), 
-		sizeof(struct savearea) - sizeof(savearea_comm));
-	
-	sv->save_srr1 &= (uint64_t)(~(MASK(MSR_FP) | MASK(MSR_VEC)));	/* Make certain that floating point and vector are turned off */
-
-	fsv = find_user_fpu(self);						/* Get any user floating point */
-	
-	target->machine.curctx->FPUsave = NULL;					/* Assume no floating point */
-
-	if(fsv) {										/* Did we find one? */
-		fsvn = (savearea_fpu *)save_alloc();		/* If we still don't have one, get a new one */
-		fsvn->save_hdr.save_flags = (fsvn->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft);	/* Mark as in use as float */
-		fsvn->save_hdr.save_act = target;
-		fsvn->save_hdr.save_prev = 0;				/* Mark no more */
-		fsvn->save_hdr.save_level = 0;				/* Mark user state */
-
-		target->machine.curctx->FPUsave = fsvn;			/* Chain in the floating point */
-
-		bcopy((char *)((unsigned int)fsv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-			(char *)((unsigned int)fsvn + sizeof(savearea_comm)), 
-			sizeof(struct savearea) - sizeof(savearea_comm));
-	}
-
-	vsv = find_user_vec(self);						/* Get any user vector */
-	
-	target->machine.curctx->VMXsave = NULL;					/* Assume no vector */
-
-	if(vsv) {										/* Did we find one? */
-		vsvn = (savearea_vec *)save_alloc();		/* If we still don't have one, get a new one */
-		vsvn->save_hdr.save_flags = (vsvn->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft);	/* Mark as in use as float */
-		vsvn->save_hdr.save_act = target;
-		vsvn->save_hdr.save_prev = 0;				/* Mark no more */
-		vsvn->save_hdr.save_level = 0;				/* Mark user state */
-
-		target->machine.curctx->VMXsave = vsvn;			/* Chain in the floating point */
-
-		bcopy((char *)((unsigned int)vsv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-			(char *)((unsigned int)vsvn + sizeof(savearea_comm)), 
-			sizeof(struct savearea) - sizeof(savearea_comm));
-	}
-
-	return (KERN_SUCCESS);
-}
-
-/*
- *		Initializes a fresh set of user state values.  If there is no user state context,
- *		one is created. Floats and VMX are not created. 
- *		
- *		We only set initial values if there was no context found.
- */
-
-struct savearea *
-get_user_regs(
-	thread_t	 thread)
-{
-  	struct savearea		*sv, *osv;
-	unsigned int	i;
-
-	if (thread->machine.upcb)
-		return	thread->machine.upcb;
-
-	sv = thread->machine.pcb;								/* Get the top savearea on the stack */
-	osv = NULL;										/* Set no user savearea yet */	
-	
-	while(sv) {										/* Find the user context */
-		osv = sv;									/* Save the last one */
-		sv = CAST_DOWN(struct savearea *, sv->save_hdr.save_prev);	/* Get the previous context */ 
-	}
-
-	sv = save_alloc();								/* Get one */
-	sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft);	/* Mark as in use as general */
-	sv->save_hdr.save_act = thread;
-	sv->save_hdr.save_prev = 0;						/* Mark no more */
-	sv->save_hdr.save_level = 0;					/* Mark user state */
-	
-	if(osv) {										/* Did we already have one? */
-		osv->save_hdr.save_prev = (addr64_t)((uintptr_t)sv);		/* Chain us on the end */
-	}
-	else {											/* We are the first */
-		thread->machine.pcb = sv;							/* Put it there */
-	}
-	thread->machine.upcb = sv;							/* Set user pcb */
-
-	for(i=0; i < 32; i+=2) {						/* Fill up with defaults */
-		((unsigned int *)&sv->save_r0)[i] = ((unsigned int *)&FloatInit)[0];
-		((unsigned int *)&sv->save_r0)[i+1] = ((unsigned int *)&FloatInit)[1];
-	}
-	sv->save_cr	= 0;
-	sv->save_xer	= 0;
-	sv->save_lr	= (uint64_t)FloatInit;
-	sv->save_ctr	= (uint64_t)FloatInit;
-	sv->save_srr0	= (uint64_t)FloatInit;
-	sv->save_srr1 = (uint64_t)MSR_EXPORT_MASK_SET;
-	if(task_has_64BitAddr(thread->task)) 
-		sv->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32;	/* If 64-bit task, force 64-bit mode */
-
-	sv->save_fpscr = 0;								/* Clear all floating point exceptions */
-
-	sv->save_vrsave = 0;							/* Set the vector save state */
-	sv->save_vscr[0] = 0x00000000;					
-	sv->save_vscr[1] = 0x00000000;					
-	sv->save_vscr[2] = 0x00000000;					
-	sv->save_vscr[3] = 0x00010000;					/* Disable java mode and clear saturated */
-	
-	return sv;										/* Bye bye... */
-}
-
-/*
- *		Find the user state context.  If there is no user state context,
- *		we just return a 0.
- */
-
-struct savearea *
-find_user_regs(
-	thread_t	thread)
-{
-	return thread->machine.upcb;
-}
-
-/* The name of this call is something of a misnomer since the mact.pcb can 
- * contain chained saveareas, but it will do for now..
- */
-struct savearea *
-find_kern_regs(
-	thread_t	thread)
-{
-        return thread->machine.pcb;
-}
-
-/*
- *		Find the user state floating point context.  If there is no user state context,
- *		we just return a 0.
- */
-
-savearea_fpu *
-find_user_fpu(
-	thread_t	thread)
-{
-  	savearea_fpu	*fsv;
-	boolean_t		intr;
-
-	intr = ml_set_interrupts_enabled(FALSE);
-	fsv = thread->machine.curctx->FPUsave;				/* Get the start of the floating point chain */
-	
-	while(fsv) {									/* Look until the end or we find it */
-		if(!(fsv->save_hdr.save_level)) break;		/* Is the the user state stuff? (the level is 0 if so) */	
-		fsv = CAST_DOWN(savearea_fpu *, fsv->save_hdr.save_prev);	/* Try the previous one */ 
-	}
-	(void) ml_set_interrupts_enabled(intr);
-	
-	return fsv;										/* Bye bye... */
-}
-
-/*
- *		Find the user state vector context.  If there is no user state context,
- *		we just return a 0.
- */
-
-savearea_vec *
-find_user_vec(
-	thread_t	thread)
-{
-  	savearea_vec	*vsv;
-	boolean_t		intr;
-
-	intr = ml_set_interrupts_enabled(FALSE);
-	vsv = thread->machine.curctx->VMXsave;				/* Get the start of the vector chain */
-	
-	while(vsv) {									/* Look until the end or we find it */
-		if(!(vsv->save_hdr.save_level)) break;		/* Is the the user state stuff? (the level is 0 if so) */	
-		vsv = CAST_DOWN(savearea_vec *, vsv->save_hdr.save_prev);	/* Try the previous one */ 
-	}
-	(void) ml_set_interrupts_enabled(intr);
-	
-	return vsv;										/* Bye bye... */
-}
-/*
- *		Find the user state vector context for the current thread.  If there is no user state context,
- *		we just return a 0.
- */
-
-savearea_vec *find_user_vec_curr(void) {
-
-  	savearea_vec	*vsv;
-	thread_t		thread = current_thread();
-	boolean_t		intr;
-	
-	vec_save(thread->machine.curctx);						/* Force save if live */
-
-	intr = ml_set_interrupts_enabled(FALSE);
-	vsv = thread->machine.curctx->VMXsave;				/* Get the start of the vector chain */
-	
-	while(vsv) {									/* Look until the end or we find it */
-		if(!(vsv->save_hdr.save_level)) break;		/* Is the the user state stuff? (the level is 0 if so) */	
-		vsv = CAST_DOWN(savearea_vec *, vsv->save_hdr.save_prev);	/* Try the previous one */ 
-	}
-	(void) ml_set_interrupts_enabled(intr);
-	
-	return vsv;										/* Bye bye... */
-}
-
-/*
- * thread_userstack:
- *
- * Return the user stack pointer from the machine
- * dependent thread state info.
- */
-kern_return_t
-thread_userstack(
-    __unused thread_t	thread,
-    int                 flavor,
-    thread_state_t      tstate,
-    unsigned int        count,
-    mach_vm_offset_t	*user_stack,
-	int					*customstack
-)
-{
-        /*
-         * Set a default.
-         */
-
-        switch (flavor) {
-        case PPC_THREAD_STATE:
-		{
-			struct ppc_thread_state *state;
-
-                if (count < PPC_THREAD_STATE_COUNT)
-                        return (KERN_INVALID_ARGUMENT);
- 
-                state = (struct ppc_thread_state *) tstate;
-    
-                /*
-                 * If a valid user stack is specified, use it.
-                 */
-			if (state->r1) {
-				*user_stack = CAST_USER_ADDR_T(state->r1);
-				if (customstack)
-					*customstack = 1;
-			} else {
-				*user_stack = CAST_USER_ADDR_T(USRSTACK);
-				if (customstack)
-					*customstack = 0;
-			}
-		}
-                break;
-					
-	case PPC_THREAD_STATE64:
-		{
-			struct ppc_thread_state64 *state64;
-					
-			if (count < PPC_THREAD_STATE64_COUNT)
-				return (KERN_INVALID_ARGUMENT);
-
-			state64 = (struct ppc_thread_state64 *)tstate;
-
-			/*
-			 * If a valid user stack is specified, use it.
-			 */
-			if (state64->r1 != MACH_VM_MIN_ADDRESS) {
-				*user_stack = state64->r1;
-				if (customstack)
-					*customstack = 1;
-			} else {
-				*user_stack = USRSTACK64;
-				if (customstack)
-					*customstack = 0;
-			}
-		}
-                break;
-		
-        default :
-                return (KERN_INVALID_ARGUMENT);
-        }
-                
-        return (KERN_SUCCESS);
-}    
-
-
-/*
- * thread_setuserstack:
- *
- * Sets the user stack pointer into the machine
- * dependent thread state info.
- */
-void
-thread_setuserstack(thread_t thread, mach_vm_address_t user_stack)
-{
-	struct savearea *sv;
-	
-	sv = get_user_regs(thread);	/* Get the user state registers */
-	
-	sv->save_r1 = user_stack;
-	
-	return;
-}    
-
-void 
-thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64)
-{
-	struct savearea *sv;
-	
-	if (isLP64 == 0) {
-		thread->machine.cthread_self = pself;
-	} else {
-		sv = get_user_regs(thread);	/* Get the user state registers */
-
-		thread->machine.cthread_self = pself;
-		sv->save_r13 = pself;
-	}
-}
-
-
-/*
- * thread_adjuserstack:
- *
- * Returns the adjusted user stack pointer from the machine
- * dependent thread state info.  Usef for small (<2G) deltas.
- */
-uint64_t
-thread_adjuserstack(thread_t thread, int adjust)
-{
-	struct savearea *sv;
-	
-	sv = get_user_regs(thread);	/* Get the user state registers */
-	
-	sv->save_r1 += adjust;		/* Adjust the stack */
-	
-	return sv->save_r1;		/* Return the adjusted stack */
-	
-}    
-
-kern_return_t
-thread_setsinglestep(thread_t thread, int on)
-{
-	struct savearea *sv;
-	
-	sv = get_user_regs(thread);	/* Get the user state registers */
-	
-	if (on)
-	        sv->save_srr1 |= MASK(MSR_SE);
-	else
-	        sv->save_srr1 &= ~MASK(MSR_SE);
-	
-	return (KERN_SUCCESS);
-}    
-
-/*
- * thread_setentrypoint:
- *
- * Sets the user PC into the machine
- * dependent thread state info.
- */
-
-void
-thread_setentrypoint(thread_t thread, uint64_t entry)
-{
-	struct savearea *sv;
-	
-	sv = get_user_regs(thread);	/* Get the user state registers */
-	
-	sv->save_srr0 = entry;
-}    
-
-kern_return_t
-thread_entrypoint(
-    __unused thread_t	thread,
-    int                 flavor,
-    thread_state_t      tstate,
-    unsigned int        count,
-    mach_vm_offset_t	*entry_point
-)
-{ 
-#if 0
-	/* Silly code: "if *entry_point is 0, make it 0" */
-    /*
-     * Set a default.
-     */
-    if (*entry_point == 0ULL)
-        *entry_point = MACH_VM_MIN_ADDRESS;
-#endif
-    
-    switch (flavor) {   
-    case PPC_THREAD_STATE:
-    	{
-	    struct ppc_thread_state     *state;
-
-        if (count < PPC_THREAD_STATE_COUNT)
-            return (KERN_INVALID_ARGUMENT);
-
-        state = (struct ppc_thread_state *) tstate;
-
-        /* 
-         * If a valid entry point is specified, use it.
-         */     
-	    if (state->srr0) {
-		*entry_point = CAST_USER_ADDR_T(state->srr0);
-	    } else {
-		*entry_point = CAST_USER_ADDR_T(VM_MIN_ADDRESS);
-	    }
-	}
-        break; 
-
-    case PPC_THREAD_STATE64:
-    	{
-	    struct ppc_thread_state64     *state64;
-
-	    if (count < PPC_THREAD_STATE_COUNT)
-		return (KERN_INVALID_ARGUMENT);
-
-	    state64 = (struct ppc_thread_state64 *)tstate;
-
-	    /* 
-	     * If a valid entry point is specified, use it.
-	     */     
-	    if (state64->srr0) {
-		*entry_point = state64->srr0;
-	    } else {
-		*entry_point = MACH_VM_MIN_ADDRESS;
-	    }
-	}
-        break; 
-
-    default: 
-        return (KERN_INVALID_ARGUMENT);
-    }           
- 
-    return (KERN_SUCCESS);
-}   
-
-unsigned int get_msr_exportmask(void)
-{
-        return (MSR_EXPORT_MASK_SET);
-}
-
-unsigned int get_msr_nbits(void)
-{
-        return (MASK(MSR_POW)|MASK(MSR_ILE)|MASK(MSR_IP)|MASK(MSR_LE));
-}
-unsigned int get_msr_rbits(void)
-{
-	return (MASK(MSR_PR)|MASK(MSR_ME)|MASK(MSR_IR)|MASK(MSR_DR)|MASK(MSR_EE));
-}
-
-void ppc_checkthreadstate(void * tsptr, int flavor)
-{
-	if (flavor == PPC_THREAD_STATE64) {
-		struct ppc_thread_state64 *ts64 =(struct ppc_thread_state64 *)tsptr;
-
-		/* Make sure naughty bits are off and necessary bits are on */
-		ts64->srr1 &= ~(MASK(MSR_POW)|MASK(MSR_ILE)|MASK(MSR_IP)|MASK(MSR_LE));
-		ts64->srr1 |= (MASK(MSR_PR)|MASK(MSR_ME)|MASK(MSR_IR)|MASK(MSR_DR)|MASK(MSR_EE));
-	} else {
-		struct ppc_thread_state *ts =(struct ppc_thread_state *)tsptr;
-
-		/* Make sure naughty bits are off and necessary bits are on */
-		ts->srr1 &= ~(MASK(MSR_POW)|MASK(MSR_ILE)|MASK(MSR_IP)|MASK(MSR_LE));
-		ts->srr1 |= (MASK(MSR_PR)|MASK(MSR_ME)|MASK(MSR_IR)|MASK(MSR_DR)|MASK(MSR_EE));
-	}
-	return;
-}
-
-void
-thread_set_child(
-	thread_t	child,
-	int			pid)
-{
-	struct savearea *child_state;
-	
-	child_state = get_user_regs(child);
-	
-	child_state->save_r3 = (uint_t)pid;
-	child_state->save_r4 = 1ULL;
-}
-void
-thread_set_parent(
-	thread_t	parent,
-	int			pid)
-{
-	struct savearea *parent_state;
-	
-	parent_state = get_user_regs(parent);
-	
-	parent_state->save_r3 = (uint64_t)pid;
-	parent_state->save_r4 = 0;
-}
-
-/*
- *		Saves the complete context (general, floating point, and vector) of the current activation.
- *		We will collect everything into an opaque block of 1 to 3 saveareas and pass back a 
- *		pointer to that.
- *
- *		The savearea is made to look like it belongs to the source activation.  This needs to 
- *		be adjusted when these contexts are attached to a new activation.
- *
- */
-
-void *act_thread_csave(void) {
-
-  	struct savearea		*sv, *osv;
-  	savearea_fpu	*fsv, *ofsv;
-  	savearea_vec	*vsv, *ovsv;
-	
-	thread_t thread;	
-	
-	thread = current_thread();
-	
-	fpu_save(thread->machine.curctx);						/* Make certain floating point state is all saved */
-	vec_save(thread->machine.curctx);						/* Make certain the vector state is all saved */
-
-	osv = find_user_regs(thread);						/* Get our savearea */
-
-	if(!osv) {
-		panic("act_thread_csave: attempting to preserve the context of an activation with none (%p)\n", thread);
-	}
-	
-	sv = save_alloc();								/* Get a fresh save area to save into */
-	sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft);	/* Mark as in use as general */
-	sv->save_hdr.save_act = thread;
-	sv->save_hdr.save_prev = 0;						/* Mark no more */
-	sv->save_hdr.save_level = 0;					/* Mark user state */
-	
-	
-	bcopy((char *)((unsigned int)osv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-		(char *)((unsigned int)sv + sizeof(savearea_comm)), 
-		sizeof(struct savearea) - sizeof(savearea_comm));
-	
-	sv->save_srr1 &= (uint64_t)(~(MASK(MSR_FP) | MASK(MSR_VEC)));	/* Make certain that floating point and vector are turned off */	
-	
-	sv->save_hdr.save_misc2 = 0xDEBB1ED0;			/* Eye catcher for debug */
-	sv->save_hdr.save_misc3 = 0xE5DA11A5;			/* Eye catcher for debug */
-	
-
-	ofsv = find_user_fpu(thread);						/* Get any user floating point */
-
-	sv->save_hdr.save_misc0 = 0;					/* Assume no floating point */
-
-	if(ofsv) {										/* Did we find one? */
-		fsv = (savearea_fpu *)save_alloc();			/* If we still don't have one, get a new one */
-		fsv->save_hdr.save_flags = (fsv->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft);	/* Mark as in use as float */
-		fsv->save_hdr.save_act = thread;
-		fsv->save_hdr.save_prev = 0;				/* Mark no more */
-		fsv->save_hdr.save_level = 0;				/* Mark user state */
-		fsv->save_hdr.save_misc2 = 0xDEBB1ED0;		/* Eye catcher for debug */
-		fsv->save_hdr.save_misc3 = 0xE5DA11A5;		/* Eye catcher for debug */
-
-		sv->save_hdr.save_misc0 = (uint64_t)((uintptr_t)fsv);	/* Remember this one */
-
-		bcopy((char *)((unsigned int)ofsv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-			(char *)((unsigned int)fsv + sizeof(savearea_comm)), 
-			sizeof(struct savearea) - sizeof(savearea_comm));
-	}
-
-	ovsv = find_user_vec(thread);						/* Get any user vector */
-	
-	sv->save_hdr.save_misc1 = 0;					/* Assume no vector */
-
-	if(ovsv) {										/* Did we find one? */
-		vsv = (savearea_vec *)save_alloc();			/* If we still don't have one, get a new one */
-		vsv->save_hdr.save_flags = (vsv->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft);	/* Mark as in use as float */
-		vsv->save_hdr.save_act = thread;
-		vsv->save_hdr.save_prev = 0;				/* Mark no more */
-		vsv->save_hdr.save_level = 0;				/* Mark user state */
-		vsv->save_hdr.save_misc2 = 0xDEBB1ED0;		/* Eye catcher for debug */
-		vsv->save_hdr.save_misc3 = 0xE5DA11A5;		/* Eye catcher for debug */
-
-		sv->save_hdr.save_misc1 = (uint64_t)((uintptr_t)vsv);	/* Chain in the floating point */
-
-		bcopy((char *)((unsigned int)ovsv + sizeof(savearea_comm)),	/* Copy everything but the headers */
-			(char *)((unsigned int)vsv + sizeof(savearea_comm)), 
-			sizeof(struct savearea) - sizeof(savearea_comm));
-	}
-
-	return (void *)sv;								/* Bye bye... */
-}
-
-
-
-/*
- *		Attaches saved user state context to an activation.  We will replace any
- *		user state context with what is passed in.  The saved context consists of a
- *		savearea that was setup by 
- *		We will collect everything into one savearea and pass that back.
- *
- *		The savearea is made to look like it belongs to the source activation.  This needs to 
- *		be adjusted when these contexts are attached to a new activation.
- *
- */
-
-void act_thread_catt(void *ctx) {
-
-  	struct savearea		*sv, *osv, *psv;
-  	savearea_fpu	*fsv, *ofsv, *pfsv;
-  	savearea_vec	*vsv, *ovsv, *pvsv;
-	unsigned int	spc;
-	thread_t thread;	
-	
-	sv = (struct savearea *)ctx;							/* Make this easier for C */
-	
-	fsv = CAST_DOWN(savearea_fpu *, sv->save_hdr.save_misc0);	/* Get a possible floating point savearea */ 
-	vsv = CAST_DOWN(savearea_vec *, sv->save_hdr.save_misc1);	/* Get a possible vector savearea */ 
-	
-	if((sv->save_hdr.save_misc2 != 0xDEBB1ED0) || (sv->save_hdr.save_misc3 != 0xE5DA11A5)) {	/* See if valid savearea */
-		panic("act_thread_catt: attempt to attach invalid general context savearea - %p\n", sv);	/* Die */
-	}
-
-	if(fsv && ((fsv->save_hdr.save_misc2 != 0xDEBB1ED0) || (fsv->save_hdr.save_misc3 != 0xE5DA11A5))) {	/* See if valid savearea */
-		panic("act_thread_catt: attempt to attach invalid float context savearea - %p\n", fsv);	/* Die */
-	}
-
-	if(vsv && ((vsv->save_hdr.save_misc2 != 0xDEBB1ED0) || (vsv->save_hdr.save_misc3 != 0xE5DA11A5))) {	/* See if valid savearea */
-		panic("act_thread_catt: attempt to attach invalid vector context savearea - %p\n", vsv);	/* Die */
-	}
-
-	thread = current_thread();
-
-	act_machine_sv_free(thread, 0);					/* Blow away any current kernel FP or vector.
-													   We do not support those across a vfork */
-	toss_live_fpu(thread->machine.curctx);			/* Toss my floating point if live anywhere */
-	toss_live_vec(thread->machine.curctx);			/* Toss my vector if live anywhere */
-		
-	sv->save_hdr.save_misc2 = 0;					/* Eye catcher for debug */
-	sv->save_hdr.save_misc3 = 0;					/* Eye catcher for debug */
-	sv->save_hdr.save_act = thread;
-	
-	spc = (unsigned int)thread->map->pmap->space;	/* Get the space we're in */
-	
-	osv = thread->machine.pcb;						/* Get the top general savearea */
-	psv = NULL;
-	while(osv) {									/* Any saved state? */
-		if(osv->save_srr1 & MASK(MSR_PR)) break;	/* Leave if this is user state */
-		psv = osv;									/* Save previous savearea address */
-		osv = CAST_DOWN(struct savearea *, osv->save_hdr.save_prev);	/* Get one underneath our's */
-	}
-	
-	if(osv) {										/* Did we find one? */
-		if(psv) psv->save_hdr.save_prev = 0;		/* Yes, clear pointer to it (it should always be last) or */	
-		else thread->machine.pcb = NULL;					/* to the start if the only one */
-
-		save_release(osv);							/* Nope, release it */
-		
-	}
-
-	if(psv)	psv->save_hdr.save_prev = (addr64_t)((uintptr_t)sv);	/* Chain us to the end or */
-	else thread->machine.pcb = (pcb_t)sv;					/* to the start if the only one */
-	thread->machine.upcb = (pcb_t)sv;						/* Set the user pcb */
-	
-	ovsv = thread->machine.curctx->VMXsave;				/* Get the top vector savearea */
-	
-	pvsv = NULL;
-	while(ovsv) {									/* Any VMX saved state? */
-		if(!(ovsv->save_hdr.save_level)) break;		/* Leave if this is user state */
-		pvsv = ovsv;								/* Save previous savearea address */
-		ovsv = CAST_DOWN(savearea_vec *, ovsv->save_hdr.save_prev);	/* Get one underneath our's */ 
-	}
-	
-	if(ovsv) {										/* Did we find one? */
-		if(pvsv) pvsv->save_hdr.save_prev = 0;		/* Yes, clear pointer to it (it should always be last) or */	
-		else thread->machine.curctx->VMXsave = NULL;	/* to the start if the only one */
-
-		save_release((struct savearea *)ovsv);				/* Nope, release it */
-	}
-	
-	if(vsv) {										/* Are we sticking any vector on this one? */
-		if(pvsv) pvsv->save_hdr.save_prev = (addr64_t)((uintptr_t)vsv);	/* Yes, chain us to the end or */
-		else {
-			thread->machine.curctx->VMXsave = vsv;	/* to the start if the only one */
-			thread->machine.curctx->VMXlevel = NULL;	/* Insure that we don't have a leftover level */
-		}
-
-		vsv->save_hdr.save_misc2 = 0;				/* Eye catcher for debug */
-		vsv->save_hdr.save_misc3 = 0;				/* Eye catcher for debug */
-		vsv->save_hdr.save_act = thread;
-	}
-	
-	ofsv = thread->machine.curctx->FPUsave;			/* Get the top float savearea */
-	
-	pfsv = NULL;
-	while(ofsv) {									/* Any float saved state? */
-		if(!(ofsv->save_hdr.save_level)) break;		/* Leave if this is user state */
-		pfsv = ofsv;								/* Save previous savearea address */
-		ofsv = CAST_DOWN(savearea_fpu *, ofsv->save_hdr.save_prev);	/* Get one underneath our's */
-	}
-	
-	if(ofsv) {										/* Did we find one? */
-		if(pfsv) pfsv->save_hdr.save_prev = 0;		/* Yes, clear pointer to it (it should always be last) or */	
-		else thread->machine.curctx->FPUsave = NULL;	/* to the start if the only one */
-
-		save_release((struct savearea *)ofsv);				/* Nope, release it */
-	}
-	
-	if(fsv) {										/* Are we sticking any vector on this one? */
-		if(pfsv) pfsv->save_hdr.save_prev = (addr64_t)((uintptr_t)fsv);	/* Yes, chain us to the end or */
-		else {
-			thread->machine.curctx->FPUsave = fsv;	/* to the start if the only one */
-			thread->machine.curctx->FPUlevel = NULL;	/* Insure that we don't have a leftover level */
-		}
-
-		fsv->save_hdr.save_misc2 = 0;				/* Eye catcher for debug */
-		fsv->save_hdr.save_misc3 = 0;				/* Eye catcher for debug */
-		fsv->save_hdr.save_act = thread;
-	}
-	
-}
-
-
-
-/*
- *		Releases saved context.  We need this because the saved context is opague.
- *		be adjusted when these contexts are attached to a new activation.
- *
- */
-
-void
-act_thread_cfree(void *ctx)
-{
-
-  	struct savearea	*sv;
-  	savearea_fpu	*fsv;
-  	savearea_vec	*vsv;
-
-	sv = (struct savearea *)ctx;							/* Make this easier for C */
-	
-	fsv = CAST_DOWN(savearea_fpu *, sv->save_hdr.save_misc0);	/* Get a possible floating point savearea */ 
-	vsv = CAST_DOWN(savearea_vec *, sv->save_hdr.save_misc1);	/* Get a possible vector savearea */ 
-	
-	if((sv->save_hdr.save_misc2 != 0xDEBB1ED0) || (sv->save_hdr.save_misc3 != 0xE5DA11A5)) {	/* See if valid savearea */
-		panic("act_thread_cfree: attempt to detatch invalid general context savearea - %p\n", sv);	/* Die */
-	}
-	
-	save_release(sv);								/* Toss the general savearea */
-
-	if(fsv) {										/* See if there is any saved floating point */ 
-		if((fsv->save_hdr.save_misc2 != 0xDEBB1ED0) || (fsv->save_hdr.save_misc3 != 0xE5DA11A5)) {	/* See if valid savearea */
-			panic("act_thread_cfree: attempt to detatch invalid float context savearea - %p\n", fsv);	/* Die */
-		}
-		
-		save_release((struct savearea *)fsv);				/* Toss saved context */
-	}
-
-	if(vsv) {										/* See if there is any saved floating point */ 
-		if((vsv->save_hdr.save_misc2 != 0xDEBB1ED0) || (vsv->save_hdr.save_misc3 != 0xE5DA11A5)) {	/* See if valid savearea */
-			panic("act_thread_cfree: attempt to detatch invalid vector context savearea - %p\n", vsv);	/* Die */
-		}
-		
-		save_release((struct savearea *)vsv);				/* Toss saved context */
-	}
-	
-	return;
-}
-
-/*
- * thread_enable_fpe:
- *
- * enables or disables floating point exceptions for the thread.
- * returns old state
- */
-int thread_enable_fpe(
-	thread_t		thread,
-	int				onoff)
-{
-        struct savearea *sv;
-        uint64_t oldmsr;
-
-        sv = find_user_regs(thread);										/* Find the user registers */
-        if(!sv) sv = get_user_regs(thread);									/* Didn't find any, allocate and initialize one */
-
-        oldmsr = sv->save_srr1;												/* Get the old msr */
-
-        if(onoff) sv->save_srr1 = oldmsr | (uint64_t)(MASK(MSR_FE0) | MASK(MSR_FE1));	/* Flip on precise FP exceptions */
-        else sv->save_srr1 = oldmsr & (uint64_t)(~(MASK(MSR_FE0) | MASK(MSR_FE1)));		/* Flip on precise FP exceptions */
-
-        return ((oldmsr & (MASK(MSR_FE0) | MASK(MSR_FE1))) != 0);			/* Return if it was enabled or not */
-}   
diff --git a/osfmk/ppc/task.h b/osfmk/ppc/task.h
deleted file mode 100644
index 3c9ad4164..000000000
--- a/osfmk/ppc/task.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990,1989 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-/*
- * No machine dependant task fields
- */
-
-#define MACHINE_TASK
-
diff --git a/osfmk/ppc/thread.h b/osfmk/ppc/thread.h
deleted file mode 100644
index d3e4b1109..000000000
--- a/osfmk/ppc/thread.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-/*
- *	File:	machine/thread.h
- *
- *	This file contains the structure definitions for the thread
- *	state as applied to PPC processors.
- */
-
-#ifndef	_PPC_THREAD_H_
-#define _PPC_THREAD_H_
-
-#include <mach/boolean.h>
-#include <mach/ppc/vm_types.h>
-#include <mach/thread_status.h>
-#include <kern/lock.h>
-#include <kern/clock.h>
-#include <ppc/savearea.h>
-
-/*
- * Kernel state structure
- *
- * This holds the kernel state that is saved and restored across context
- * switches. 
- */
-
-/*
- * PPC process control block
- *
- * The PCB holds normal context.  It does not contain vector or floating point 
- * registers.
- *
- */
-
-typedef struct savearea pcb;
-typedef struct savearea *pcb_t;
-
-struct facility_context {
-
-	savearea_fpu	*FPUsave;		/* The floating point savearea */
-	struct savearea		*FPUlevel;		/* The floating point context level */
-	unsigned int	FPUcpu;			/* The last processor to enable floating point */
-	unsigned int	FPUsync;		/* Sync lock */
-	savearea_vec	*VMXsave;		/* The VMX savearea */
-	struct savearea		*VMXlevel;		/* The VMX context level */
-	unsigned int	VMXcpu;			/* The last processor to enable vector */
-	unsigned int	VMXsync;		/* Sync lock */
-	struct thread	*facAct;
-};
-
-typedef struct facility_context facility_context;
-
-/*
- * Maps state flavor to number of words in the state:
- */
-__private_extern__ unsigned int _MachineStateCount[];
-
-#define USER_REGS(ThrAct)	((ThrAct)->machine.pcb)
-
-#define	user_pc(ThrAct)		((ThrAct)->machine.pcb->save_srr0)
-
-#define act_machine_state_ptr(ThrAct)	(thread_state_t)USER_REGS(ThrAct)
-
-struct machine_thread {
-	/*
-	 * pointer to process control block control blocks.  Potentially
-	 * one for each active facility context.  They may point to the
-	 * same saveareas.
-	 */
-	struct savearea		*pcb;			/* The "normal" savearea */
-	struct savearea		*upcb;			/* The "normal" user savearea */
-	facility_context *curctx;		/* Current facility context */
-	facility_context *deferctx;		/* Deferred facility context */
-	facility_context facctx;		/* "Normal" facility context */
-	struct vmmCntrlEntry *vmmCEntry;	/* Pointer current emulation context or 0 */
-	struct vmmCntrlTable *vmmControl;	/* Pointer to virtual machine monitor control table */
-	uint64_t		qactTimer;		/* Time thread needs to interrupt. This is a single-shot timer. Zero is unset */
-	unsigned int	umwSpace;		/* Address space ID for user memory window */
-#define	umwSwitchAway 0x80000000	/* Context switched away from thread since MapUserAddressWindow */
-#define umwSwitchAwayb 0
-	addr64_t		umwRelo;		/* Relocation value for user memory window */
-	unsigned int	ksp;			/* points to TOP OF STACK or zero */
-	unsigned int	preemption_count;	/* preemption count */
-	struct per_proc_info	*PerProc;	/* current per processor data */
-	unsigned int	bbDescAddr;		/* Points to Blue Box Trap descriptor area in kernel (page aligned) */
-	unsigned int	bbUserDA;		/* Points to Blue Box Trap descriptor area in user (page aligned) */
-	unsigned int	bbTableStart;	/* Points to Blue Box Trap dispatch area in user */
-	unsigned int	emPendRupts;	/* Number of pending emulated interruptions */
-	unsigned int	bbTaskID;		/* Opaque task ID for Blue Box threads */
-	unsigned int	bbTaskEnv;		/* Opaque task data reference for Blue Box threads */
-	unsigned int	specFlags;		/* Special flags */
-    unsigned int    pmcovfl[8];     /* PMC overflow count */
-    unsigned int    perfmonFlags;   /* Perfmon facility flags */
-    unsigned int	bbTrap;			/* Blue Box trap vector */
-    unsigned int	bbSysCall;		/* Blue Box syscall vector */
-    unsigned int	bbInterrupt;	/* Blue Box interrupt vector */
-    unsigned int	bbPending;		/* Blue Box pending interrupt vector */
-
-/* special flags bits */
-
-#define ignoreZeroFaultbit		0
-#define floatUsedbit			1
-#define vectorUsedbit			2
-#define runningVMbit			4
-#define floatCngbit				5
-#define vectorCngbit			6
-#define timerPopbit				7
-#define userProtKeybit			8
-#define FamVMenabit		 	    11
-#define FamVMmodebit			12
-#define perfMonitorbit          13
-#define OnProcbit				14
-/*	NOTE: Do not move or assign bit 31 without changing exception vector ultra fast path code */
-#define bbThreadbit				28
-#define bbNoMachSCbit	 		29
-#define bbPreemptivebit			30
-#define spfReserved1			31	/* See note above */
-
-#define ignoreZeroFault		0x80000000  /* (1<<(31-ignoreZeroFaultbit)) */
-#define floatUsed			0x40000000  /* (1<<(31-floatUsedbit)) */
-#define vectorUsed			0x20000000  /* (1<<(31-vectorUsedbit)) */
-
-#define runningVM			0x08000000  /* (1<<(31-runningVMbit)) */
-#define floatCng			0x04000000  /* (1<<(31-floatCngbit)) */
-#define vectorCng			0x02000000  /* (1<<(31-vectorCngbit)) */
-#define timerPop			0x01000000  /* (1<<(31-timerPopbit)) */
-
-#define userProtKey			0x00800000  /* (1<<(31-userProtKeybit)) */
-
-#define	FamVMena			0x00100000  /* (1<<(31-FamVMenabit)) */
-#define	FamVMmode			0x00080000  /* (1<<(31-FamVMmodebit)) */
-#define perfMonitor         0x00040000  /* (1<<(31-perfMonitorbit)) */
-#define	OnProc				0x00020000  /* (1<<(31-OnProcbit)) */
-
-#define bbThread			0x00000008  /* (1<<(31-bbThreadbit)) */
-#define bbNoMachSC			0x00000004  /* (1<<(31-bbNoMachSCbit)) */
-#define bbPreemptive		0x00000002  /* (1<<(31-bbPreemptivebit)) */
-
-#define fvChkb 0
-#define fvChk 0x80000000
-
-#ifdef	MACH_BSD
-	uint64_t        cthread_self;	/* for use of cthread package  */
-#endif
-
-};
-
-extern struct savearea *find_user_regs(thread_t);
-extern struct savearea *get_user_regs(thread_t);
-extern struct savearea_fpu *find_user_fpu(thread_t);
-extern struct savearea_vec *find_user_vec(thread_t);
-extern struct savearea_vec *find_user_vec_curr(void);
-extern int thread_enable_fpe(thread_t act, int onoff);
-
-extern struct savearea *find_kern_regs(thread_t);
-
-extern void *act_thread_csave(void);
-extern void act_thread_catt(void *ctx);
-extern void act_thread_cfree(void *ctx);
-
-/*
- * Return address of the function that called current function, given
- *	address of the first parameter of current function. We can't
- *      do it this way, since parameter was copied from a register
- *      into a local variable. Call an assembly sub-function to 
- *      return this.
- */
-
-extern vm_offset_t getrpc(void);
-#define	GET_RETURN_PC(addr)	getrpc()
-
-#define STACK_IKS(stack)		\
-	((vm_offset_t)(((vm_offset_t)stack)+KERNEL_STACK_SIZE)-FM_SIZE)
-
-/*
- * Defining this indicates that MD code will supply an exception()
- * routine, conformant with kern/exception.c (dependency alert!)
- * but which does wonderfully fast, machine-dependent magic.
- */
-
-#define MACHINE_FAST_EXCEPTION 1
-
-#endif	/* _PPC_THREAD_H_ */
diff --git a/osfmk/ppc/trap.c b/osfmk/ppc/trap.c
deleted file mode 100644
index c30bf7381..000000000
--- a/osfmk/ppc/trap.c
+++ /dev/null
@@ -1,1012 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#include <mach_kdb.h>
-#include <mach_kdp.h>
-#include <debug.h>
-
-#include <mach/mach_types.h>
-#include <mach/mach_traps.h>
-#include <mach/thread_status.h>
-
-#include <kern/processor.h>
-#include <kern/thread.h>
-#include <kern/exception.h>
-#include <kern/syscall_sw.h>
-#include <kern/cpu_data.h>
-#include <kern/debug.h>
-
-#include <vm/vm_fault.h>
-#include <vm/vm_kern.h> 	/* For kernel_map */
-
-#include <ppc/misc_protos.h>
-#include <ppc/trap.h>
-#include <ppc/exception.h>
-#include <ppc/proc_reg.h>	/* for SR_xxx definitions */
-#include <ppc/pmap.h>
-#include <ppc/mem.h>
-#include <ppc/mappings.h>
-#include <ppc/Firmware.h>
-#include <ppc/low_trace.h>
-#include <ppc/Diagnostics.h>
-#include <ppc/hw_perfmon.h>
-#include <ppc/fpu_protos.h>
-
-#include <sys/kdebug.h>
-
-volatile perfCallback perfTrapHook; /* Pointer to CHUD trap hook routine */
-volatile perfCallback perfASTHook;  /* Pointer to CHUD AST hook routine */
-
-#if CONFIG_DTRACE
-extern kern_return_t dtrace_user_probe(ppc_saved_state_t *sv);
-
-/* See <rdar://problem/4613924> */
-perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routine */
-
-extern boolean_t dtrace_tally_fault(user_addr_t);
-#endif
-
-#if	MACH_KDB
-#include <ddb/db_watch.h>
-#include <ddb/db_run.h>
-#include <ddb/db_break.h>
-#include <ddb/db_trap.h>
-
-boolean_t let_ddb_vm_fault = FALSE;
-boolean_t	debug_all_traps_with_kdb = FALSE;
-extern struct db_watchpoint *db_watchpoint_list;
-extern boolean_t db_watchpoints_inserted;
-extern boolean_t db_breakpoints_inserted;
-
-
-
-#endif	/* MACH_KDB */
-
-extern task_t bsd_init_task;
-extern char init_task_failure_data[];
-extern int not_in_kdp;
-
-#define	PROT_EXEC	(VM_PROT_EXECUTE)
-#define PROT_RO		(VM_PROT_READ)
-#define PROT_RW		(VM_PROT_READ|VM_PROT_WRITE)
-
-
-/* A useful macro to update the ppc_exception_state in the PCB
- * before calling doexception
- */
-#define UPDATE_PPC_EXCEPTION_STATE {							\
-	thread_t _thread = current_thread();							\
-	_thread->machine.pcb->save_dar = (uint64_t)dar;					\
-	_thread->machine.pcb->save_dsisr = dsisr;						\
-	_thread->machine.pcb->save_exception = trapno / T_VECTOR_SIZE;	/* back to powerpc */ \
-}
-
-void unresolved_kernel_trap(int trapno,
-				   struct savearea *ssp,
-				   unsigned int dsisr,
-				   addr64_t dar,
-				   const char *message);
-
-static void handleMck(struct savearea *ssp);		/* Common machine check handler */
-
-#ifdef MACH_BSD
-extern void get_procrustime(time_value_t *);
-extern void bsd_uprofil(time_value_t *, user_addr_t);
-#endif /* MACH_BSD */
-
-
-struct savearea *trap(int trapno,
-			     struct savearea *ssp,
-			     unsigned int dsisr,
-			     addr64_t dar)
-{
-	int exception;
-	mach_exception_code_t code = 0;
-	mach_exception_subcode_t subcode = 0;
-	vm_map_t map;
-	vm_map_offset_t offset;
-	thread_t thread = current_thread();
-	boolean_t intr;
-	ast_t *myast;
-	int ret;
-	
-#ifdef MACH_BSD
-	time_value_t tv;
-#endif /* MACH_BSD */
-
-	myast = ast_pending();
-	perfCallback fn = perfASTHook;
-	if(fn) {
-		if(*myast & AST_CHUD_ALL) {
-			fn(trapno, ssp, dsisr, (unsigned int)dar);
-		}
-	} else {
-		*myast &= ~AST_CHUD_ALL;
-	}
-
-	fn = perfTrapHook;
-	if(fn) {							/* Is there a hook? */
-		if(fn(trapno, ssp, dsisr, (unsigned int)dar) == KERN_SUCCESS) return ssp;	/* If it succeeds, we are done... */
-	}
-
-#if CONFIG_DTRACE
-	if(tempDTraceTrapHook) {							/* Is there a hook? */
-		if(tempDTraceTrapHook(trapno, ssp, dsisr, (unsigned int)dar) == KERN_SUCCESS) return ssp;	/* If it succeeds, we are done... */
-	}
-#endif
-
-#if 0
-	{
-		extern void fctx_text(void);
-		fctx_test();
-	}
-#endif
-
-	exception = 0;								/* Clear exception for now */
-
-/*
- *	Remember that we are disabled for interruptions when we come in here.  Because
- *	of latency concerns, we need to enable interruptions in the interrupted process
- *	was enabled itself as soon as we can.
- */
-
-	intr = (ssp->save_srr1 & MASK(MSR_EE)) != 0;	/* Remember if we were enabled */
-
-	/* Handle kernel traps first */
-
-	if (!USER_MODE(ssp->save_srr1)) {
-		/*
-		 * Trap came from kernel
-		 */
-		switch (trapno) {
-
-		case T_PREEMPT:			/* Handle a preempt trap */
-			ast_taken(AST_PREEMPTION, FALSE);
-			break;	
-
-		case T_PERF_MON:
-			perfmon_handle_pmi(ssp);
-			break;
-
-		case T_RESET:					/* Reset interruption */
-			if (!Call_Debugger(trapno, ssp))
-				unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			break;						/* We just ignore these */
-		
-		/*
-		 * These trap types should never be seen by trap()
-		 * in kernel mode, anyway.
-		 * Some are interrupts that should be seen by
-		 * interrupt() others just don't happen because they
-		 * are handled elsewhere. Some could happen but are
-		 * considered to be fatal in kernel mode.
-		 */
-		case T_DECREMENTER:
-		case T_IN_VAIN:			/* Shouldn't ever see this, lowmem_vectors eats it */
-		case T_SYSTEM_MANAGEMENT:
-		case T_ALTIVEC_ASSIST:
-		case T_INTERRUPT:
-		case T_FP_UNAVAILABLE:
-		case T_IO_ERROR:
-		case T_RESERVED:
-		default:
-			unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			break;
-
-
-/*
- *			Here we handle a machine check in the kernel
- */
-
-		case T_MACHINE_CHECK:
-			handleMck(ssp);						/* Common to both user and kernel */
-			break;
-
-
-		case T_ALIGNMENT:
-/*
-*			If enaNotifyEMb is set, we get here, and
-*			we have actually already emulated the unaligned access.
-*			All that we want to do here is to ignore the interrupt. This is to allow logging or
-*			tracing of unaligned accesses.  
-*/
-			
-			if(ssp->save_hdr.save_misc3) {				/* Was it a handled exception? */
-				unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);	/* Go panic */
-				break;
-			}
-			KERNEL_DEBUG_CONSTANT(
-				MACHDBG_CODE(DBG_MACH_EXCP_ALNG, 0) | DBG_FUNC_NONE,
-				(int)ssp->save_srr0 - 4, (int)dar, (int)dsisr, (int)ssp->save_lr, 0);
-			break;
-
-		case T_EMULATE:
-/*
-*			If enaNotifyEMb is set we get here, and
-*			we have actually already emulated the instruction.
-*			All that we want to do here is to ignore the interrupt. This is to allow logging or
-*			tracing of emulated instructions.  
-*/
-
-			KERNEL_DEBUG_CONSTANT(
-				MACHDBG_CODE(DBG_MACH_EXCP_EMUL, 0) | DBG_FUNC_NONE,
-				(int)ssp->save_srr0 - 4, (int)((savearea_comm *)ssp)->save_misc2, (int)dsisr, (int)ssp->save_lr, 0);
-			break;
-
-
-
-
-			
-		case T_TRACE:
-		case T_RUNMODE_TRACE:
-		case T_INSTRUCTION_BKPT:
-			if (!Call_Debugger(trapno, ssp))
-				unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			break;
-
-		case T_PROGRAM:
-			if (ssp->save_srr1 & MASK(SRR1_PRG_TRAP)) {
-				if (!Call_Debugger(trapno, ssp))
-					unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			} else {
-				unresolved_kernel_trap(trapno, ssp, 
-							dsisr, dar, NULL);
-			}
-			break;
-
-		case T_DATA_ACCESS:
-#if	MACH_KDB
-			mp_disable_preemption();
-			if (debug_mode
-			    && getPerProc()->debugger_active
-			    && !let_ddb_vm_fault) {
-				/*
-				 * Force kdb to handle this one.
-				 */
-				kdb_trap(trapno, ssp);
-			}
-			mp_enable_preemption();
-#endif	/* MACH_KDB */
-			/* can we take this during normal panic dump operation? */
-			if (debug_mode
-			    && getPerProc()->debugger_active
-			    && !not_in_kdp) {
-			        /* 
-				 * Access fault while in kernel core dump.
-				 */
-			        kdp_dump_trap(trapno, ssp); 
-			}
-
-
-			if(ssp->save_dsisr & dsiInvMode) {			/* Did someone try to reserve cache inhibited? */
-				panic("trap: disallowed access to cache inhibited memory - %016llX\n", dar);
-			}
-
-			if(intr) ml_set_interrupts_enabled(TRUE);	/* Enable if we were */
-			
-			if(((dar >> 28) < 0xE) | ((dar >> 28) > 0xF))  {	/* User memory window access? */
-			
-				offset = (vm_map_offset_t)dar;				/* Set the failing address */
-				map = kernel_map;						/* No, this is a normal kernel access */
-				
-/*
- *	Note: Some ROM device drivers will access page 0 when they start.  The IOKit will 
- *	set a flag to tell us to ignore any access fault on page 0.  After the driver is
- *	opened, it will clear the flag.
- */
-				if((0 == (offset & -PAGE_SIZE)) && 		/* Check for access of page 0 and */
-				  ((thread->machine.specFlags) & ignoreZeroFault)) {	/* special case of ignoring page zero faults */
-					ssp->save_srr0 += 4;				/* Point to next instruction */
-					break;
-				}
-
-#if CONFIG_DTRACE
-				if (thread->options & TH_OPT_DTRACE) {	/* Executing under dtrace_probe? */
-					if (dtrace_tally_fault(dar)) { /* Should a fault under dtrace be ignored? */
-						ssp->save_srr0 += 4;                /* Point to next instruction */
-						break;
-					} else {
-						unresolved_kernel_trap(trapno, ssp, dsisr, dar, "Unexpected page fault under dtrace_probe");
-					}
-				}
-#endif
-
-				code = vm_fault(map, vm_map_trunc_page(offset),
-						dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO,
-						FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0));
-
-				if (code != KERN_SUCCESS) {
-					unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-				} else { 
-					ssp->save_hdr.save_flags |= SAVredrive;	/* Tell low-level to re-try fault */
-					ssp->save_dsisr = (ssp->save_dsisr & 
-						~((MASK(DSISR_NOEX) | MASK(DSISR_PROT)))) | MASK(DSISR_HASH);	/* Make sure this is marked as a miss */
-				}
-				break;
-			}
-
-			/* If we get here, the fault was due to a user memory window access */
-
-#if CONFIG_DTRACE
-			if (thread->options & TH_OPT_DTRACE) {	/* Executing under dtrace_probe? */
-				if (dtrace_tally_fault(dar)) { /* Should a user memory window access fault under dtrace be ignored? */
-					if (thread->recover) {
-						ssp->save_srr0 = thread->recover;
-						thread->recover = (vm_offset_t)NULL;
-					} else {
-						unresolved_kernel_trap(trapno, ssp, dsisr, dar, "copyin/out has no recovery point");
-					}
-					break;
-				} else {
-					unresolved_kernel_trap(trapno, ssp, dsisr, dar, "Unexpected UMW page fault under dtrace_probe");
-				}
-			}
-#endif
-
-			map = thread->map;
-			
-			offset = (vm_map_offset_t)(thread->machine.umwRelo + dar);	/* Compute the user space address */
-
-			code = vm_fault(map, vm_map_trunc_page(offset),
-					dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO,
-					FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0));
-
-			/* If we failed, there should be a recovery
-			 * spot to rfi to.
-			 */
-			if (code != KERN_SUCCESS) {
-				if (thread->recover) {
-					ssp->save_srr0 = thread->recover;
-					thread->recover = (vm_offset_t)NULL;
-				} else {
-					unresolved_kernel_trap(trapno, ssp, dsisr, dar, "copyin/out has no recovery point");
-				}
-			}
-			else { 
-				ssp->save_hdr.save_flags |= SAVredrive;	/* Tell low-level to re-try fault */
-				ssp->save_dsisr = (ssp->save_dsisr & 
-					~((MASK(DSISR_NOEX) | MASK(DSISR_PROT)))) | MASK(DSISR_HASH);	/* Make sure this is marked as a miss */
-			}
-			
-			break;
-			
-		case T_INSTRUCTION_ACCESS:
-
-#if	MACH_KDB
-			if (debug_mode
-			    && getPerProc()->debugger_active
-			    && !let_ddb_vm_fault) {
-				/*
-				 * Force kdb to handle this one.
-				 */
-				kdb_trap(trapno, ssp);
-			}
-#endif	/* MACH_KDB */
-
-			/* Same as for data access, except fault type
-			 * is PROT_EXEC and addr comes from srr0
-			 */
-
-			if(intr) ml_set_interrupts_enabled(TRUE);	/* Enable if we were */
-
-			map = kernel_map;
-			
-			code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0),
-					(PROT_EXEC | PROT_RO), FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0));
-
-			if (code != KERN_SUCCESS) {
-				unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			} else { 
-				ssp->save_hdr.save_flags |= SAVredrive;	/* Tell low-level to re-try fault */
-				ssp->save_srr1 = (ssp->save_srr1 & 
-					~((unsigned long long)(MASK(DSISR_NOEX) | MASK(DSISR_PROT)))) | MASK(DSISR_HASH);		/* Make sure this is marked as a miss */
-			}
-			break;
-
-		/* Usually shandler handles all the system calls, but the
-		 * atomic thread switcher may throwup (via thandler) and
-		 * have to pass it up to the exception handler.
-		 */
-
-		case T_SYSTEM_CALL:
-			unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			break;
-
-		case T_AST:
-			unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-			break;
-		}
-	} else {
-
-		/* 
-		 * Processing for user state traps with interrupt enabled
-		 * For T_AST, interrupts are enabled in the AST delivery
-		 */
-		if (trapno != T_AST) 
-			ml_set_interrupts_enabled(TRUE);
-
-#ifdef MACH_BSD
-		{
-			get_procrustime(&tv);
-		}
-#endif /* MACH_BSD */
-
-	
-		/*
-		 * Trap came from user task
-		 */
-
-		switch (trapno) {
-	
-			case T_PREEMPT:
-				unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL);
-				break;	
-	
-			case T_PERF_MON:
-				perfmon_handle_pmi(ssp);
-				break;
-	
-				/*
-				 * These trap types should never be seen by trap()
-				 * Some are interrupts that should be seen by
-				 * interrupt() others just don't happen because they
-				 * are handled elsewhere.
-				 */
-			case T_DECREMENTER:
-			case T_IN_VAIN:								/* Shouldn't ever see this, lowmem_vectors eats it */
-			case T_INTERRUPT:
-			case T_FP_UNAVAILABLE:
-			case T_SYSTEM_MANAGEMENT:
-			case T_RESERVED:
-			case T_IO_ERROR:
-				
-			default:
-	
-				ml_set_interrupts_enabled(FALSE);		/* Turn off interruptions */
-	
-				panic("Unexpected user state trap(cpu %d): 0x%08X DSISR=0x%08X DAR=0x%016llX PC=0x%016llX, MSR=0x%016llX\n",
-					   cpu_number(), trapno, dsisr, dar, ssp->save_srr0, ssp->save_srr1);
-				break;
-	
-	
-	/*
-	 *			Here we handle a machine check in user state
-	 */
-	
-			case T_MACHINE_CHECK:
-				handleMck(ssp);							/* Common to both user and kernel */
-				break;
-	
-			case T_RESET:
-				ml_set_interrupts_enabled(FALSE);		/* Turn off interruptions */
-				if (!Call_Debugger(trapno, ssp))
-					panic("Unexpected Reset exception: srr0 = %016llx, srr1 = %016llx\n",
-						ssp->save_srr0, ssp->save_srr1);
-				break;									/* We just ignore these */
-	
-			case T_ALIGNMENT:
-	/*
-	*			If enaNotifyEMb is set, we get here, and
-	*			we have actually already emulated the unaligned access.
-	*			All that we want to do here is to ignore the interrupt. This is to allow logging or
-	*			tracing of unaligned accesses.  
-	*/
-				
-				KERNEL_DEBUG_CONSTANT(
-					MACHDBG_CODE(DBG_MACH_EXCP_ALNG, 0) | DBG_FUNC_NONE,
-					(int)ssp->save_srr0 - 4, (int)dar, (int)dsisr, (int)ssp->save_lr, 0);
-				
-				if(ssp->save_hdr.save_misc3) {			/* Was it a handled exception? */
-					exception = EXC_BAD_ACCESS;			/* Yes, throw exception */
-					code = EXC_PPC_UNALIGNED;
-					subcode = dar;
-				}
-				break;
-	
-			case T_EMULATE:
-	/*
-	*			If enaNotifyEMb is set we get here, and
-	*			we have actually already emulated the instruction.
-	*			All that we want to do here is to ignore the interrupt. This is to allow logging or
-	*			tracing of emulated instructions.  
-	*/
-	
-				KERNEL_DEBUG_CONSTANT(
-					MACHDBG_CODE(DBG_MACH_EXCP_EMUL, 0) | DBG_FUNC_NONE,
-					(int)ssp->save_srr0 - 4, (int)((savearea_comm *)ssp)->save_misc2, (int)dsisr, (int)ssp->save_lr, 0);
-				break;
-	
-			case T_TRACE:			/* Real PPC chips */
-			case T_INSTRUCTION_BKPT:
-				exception = EXC_BREAKPOINT;
-				code = EXC_PPC_TRACE;
-				subcode = ssp->save_srr0;
-				break;
-	
-			case T_PROGRAM:
-				if (ssp->save_srr1 & MASK(SRR1_PRG_FE)) {
-					fpu_save(thread->machine.curctx);
-					UPDATE_PPC_EXCEPTION_STATE;
-					exception = EXC_ARITHMETIC;
-					code = EXC_ARITHMETIC;
-				
-					mp_disable_preemption();
-					subcode = ssp->save_fpscr;
-					mp_enable_preemption();
-				} 	
-				else if (ssp->save_srr1 & MASK(SRR1_PRG_ILL_INS)) {
-					
-					UPDATE_PPC_EXCEPTION_STATE
-					exception = EXC_BAD_INSTRUCTION;
-					code = EXC_PPC_UNIPL_INST;
-					subcode = ssp->save_srr0;
-				} else if ((unsigned int)ssp->save_srr1 & MASK(SRR1_PRG_PRV_INS)) {
-	
-					UPDATE_PPC_EXCEPTION_STATE;
-					exception = EXC_BAD_INSTRUCTION;
-					code = EXC_PPC_PRIVINST;
-					subcode = ssp->save_srr0;
-				} else if (ssp->save_srr1 & MASK(SRR1_PRG_TRAP)) {
-					unsigned int inst;
-	
-					if (copyin(ssp->save_srr0, (char *) &inst, 4 )) panic("copyin failed\n");
-					
-					if(dgWork.dgFlags & enaDiagTrap) {	/* Is the diagnostic trap enabled? */
-						if((inst & 0xFFFFFFF0) == 0x0FFFFFF0) {	/* Is this a TWI 31,R31,0xFFFx? */
-							if(diagTrap(ssp, inst & 0xF)) {	/* Call the trap code */
-								ssp->save_srr0 += 4ULL;		/* If we eat the trap, bump pc */
-								exception = 0;				/* Clear exception */
-								break;						/* All done here */
-							}
-						}
-					}
-					
-#if CONFIG_DTRACE
-					if(inst == 0x0FFFDDDD) {				/* Is this the dtrace trap? */
-						ret = dtrace_user_probe((ppc_saved_state_t *)ssp);	/* Go check if it is for real and process if so... */
-						if(ret == KERN_SUCCESS) {			/* Was it really? */
-							exception = 0;					/* Clear the exception */
-							break;							/* Go flow through and out... */
-						}
-					}
-#endif				
-					
-					UPDATE_PPC_EXCEPTION_STATE;
-					
-					if (inst == 0x7FE00008) {
-						exception = EXC_BREAKPOINT;
-						code = EXC_PPC_BREAKPOINT;
-					} else {
-						exception = EXC_SOFTWARE;
-						code = EXC_PPC_TRAP;
-					}
-					subcode = ssp->save_srr0;
-				}
-				break;
-
-#if CONFIG_DTRACE
-			case T_DTRACE_RET:								/* Are we returning from a dtrace injection? */	
-				ret = dtrace_user_probe((ppc_saved_state_t *)ssp);	/* Call the probe function if so... */
-				if(ret == KERN_SUCCESS) {					/* Did this actually work? */
-					exception = 0;							/* Clear the exception */
-					break;									/* Go flow through and out... */
-				}
-				break;
-#endif				
-				
-			case T_ALTIVEC_ASSIST:
-				UPDATE_PPC_EXCEPTION_STATE;
-				exception = EXC_ARITHMETIC;
-				code = EXC_PPC_ALTIVECASSIST;
-				subcode = ssp->save_srr0;
-				break;
-	
-			case T_DATA_ACCESS:
-				map = thread->map;
-	
-				if(ssp->save_dsisr & dsiInvMode) {			/* Did someone try to reserve cache inhibited? */
-					UPDATE_PPC_EXCEPTION_STATE;				/* Don't even bother VM with this one */
-					exception = EXC_BAD_ACCESS;
-					subcode = dar;
-					break;
-				}
-				
-				code = vm_fault(map, vm_map_trunc_page(dar),
-					 dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO,
-					 FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0));
-	
-				if ((code != KERN_SUCCESS) && (code != KERN_ABORTED)) {
-					UPDATE_PPC_EXCEPTION_STATE;
-					exception = EXC_BAD_ACCESS;
-					subcode = dar;
-				} else { 
-					ssp->save_hdr.save_flags |= SAVredrive;	/* Tell low-level to retry fault */
-					ssp->save_dsisr = (ssp->save_dsisr & 
-						~((MASK(DSISR_NOEX) | MASK(DSISR_PROT)))) | MASK(DSISR_HASH);	/* Make sure this is marked as a miss */
-				}
-				break;
-				
-			case T_INSTRUCTION_ACCESS:
-				/* Same as for data access, except fault type
-				 * is PROT_EXEC and addr comes from srr0
-				 */
-				map = thread->map;
-				
-				code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0),
-						(PROT_EXEC | PROT_RO), FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0));
-	
-				if ((code != KERN_SUCCESS) && (code != KERN_ABORTED)) {
-					UPDATE_PPC_EXCEPTION_STATE;
-					exception = EXC_BAD_ACCESS;
-					subcode = ssp->save_srr0;
-				} else { 
-					ssp->save_hdr.save_flags |= SAVredrive;	/* Tell low-level to re-try fault */
-					ssp->save_srr1 = (ssp->save_srr1 & 
-						~((unsigned long long)(MASK(DSISR_NOEX) | MASK(DSISR_PROT)))) | MASK(DSISR_HASH);		/* Make sure this is marked as a miss */
-				}
-				break;
-	
-			case T_AST:
-				/* AST delivery is done below */
-				break;
-			
-		}
-		
-#ifdef MACH_BSD
-		{
-		bsd_uprofil(&tv, ssp->save_srr0);
-		}
-#endif /* MACH_BSD */
-	}
-
-	if (exception) {
-		/* if this is the init task, save the exception information */
-		/* this probably is a fatal exception */
-#if 0
-		if(bsd_init_task == current_task()) {
-			char *buf;
-        		int i;
-
-			buf = init_task_failure_data;
-
-
-			buf += sprintf(buf, "Exception Code = 0x%x, Subcode = 0x%x\n", code, subcode);
-			buf += sprintf(buf, "DSISR = 0x%08x, DAR = 0x%016llx\n"
-								, dsisr, dar);
-
-			for (i=0; i<32; i++) {
-		       		if ((i % 8) == 0) {
-					buf += sprintf(buf, "\n%4d :",i);
-				}
-				buf += sprintf(buf, " %08x",*(&ssp->save_r0+i));
-			}
-
-        		buf += sprintf(buf, "\n\n");
-        		buf += sprintf(buf, "cr        = 0x%08X\t\t",ssp->save_cr);
-        		buf += sprintf(buf, "xer       = 0x%08X\n",ssp->save_xer);
-        		buf += sprintf(buf, "lr        = 0x%016llX\t\t",ssp->save_lr);
-        		buf += sprintf(buf, "ctr       = 0x%016llX\n",ssp->save_ctr); 
-        		buf += sprintf(buf, "srr0(iar) = 0x%016llX\t\t",ssp->save_srr0);
-        		buf += sprintf(buf, "srr1(msr) = 0x%016llX\n",ssp->save_srr1,
-                   	   "\x10\x11""EE\x12PR\x13""FP\x14ME\x15""FE0\x16SE\x18"
-                    	   "FE1\x19""AL\x1a""EP\x1bIT\x1c""DT");
-        		buf += sprintf(buf, "\n\n");
-
-        		/* generate some stack trace */
-        		buf += sprintf(buf, "Application level back trace:\n");
-        		if (ssp->save_srr1 & MASK(MSR_PR)) { 
-                	   char *addr = (char*)ssp->save_r1;
-                	   unsigned int stack_buf[3];
-                	   for (i = 0; i < 8; i++) {
-                        	if (addr == (char*)NULL)
-                               		break;
-                        	if (!copyin(ssp->save_r1,(char*)stack_buf, 
-							3 * sizeof(int))) {
-                               		buf += sprintf(buf, "0x%08X : 0x%08X\n"
-						,addr,stack_buf[2]);
-                               		addr = (char*)stack_buf[0];
-                        	} else {
-                               		break;
-                       	   	}
-                	   }
-        		}
-			buf[0] = '\0';
-		}
-#endif
-		doexception(exception, code, subcode);
-	}
-	/* AST delivery
-	 * Check to see if we need an AST, if so take care of it here
-	 */
-	ml_set_interrupts_enabled(FALSE);
-
-	if (USER_MODE(ssp->save_srr1)) {
-		myast = ast_pending();
-		while (*myast & AST_ALL) {
-			ast_taken(AST_ALL, intr);
-			ml_set_interrupts_enabled(FALSE);
-			myast = ast_pending();
-		}
-	}
-
-	return ssp;
-}
-
-/* This routine is called from assembly before each and every system call.
- * It must preserve r3.
- */
-
-extern int syscall_trace(int, struct savearea *);
-
-
-extern int pmdebug;
-
-int syscall_trace(int retval, struct savearea *ssp)
-{
-	int i, argc;
-	int kdarg[3];
-/* Always prepare to trace mach system calls */
-
-	kdarg[0]=0;
-	kdarg[1]=0;
-	kdarg[2]=0;
-	
-	argc = mach_trap_table[-((unsigned int)ssp->save_r0)].mach_trap_arg_count;
-	
-	if (argc > 3)
-		argc = 3;
-	
-	for (i=0; i < argc; i++)
-		kdarg[i] = (int)*(&ssp->save_r3 + i);
-	
-	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (-(ssp->save_r0))) | DBG_FUNC_START,
-		kdarg[0], kdarg[1], kdarg[2], 0, 0);
-
-	return retval;
-}
-
-/* This routine is called from assembly after each mach system call
- * It must preserve r3.
- */
-
-extern int syscall_trace_end(int, struct savearea *);
-
-int syscall_trace_end(int retval, struct savearea *ssp)
-{
-	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(-((unsigned int)ssp->save_r0))) | DBG_FUNC_END,
-		retval, 0, 0, 0, 0);
-	return retval;
-}
-
-/*
- * called from syscall if there is an error
- */
-
-int syscall_error(
-	int exception,
-	mach_exception_code_t code,
-	mach_exception_subcode_t subcode,
-	struct savearea *ssp)
-{
-	register thread_t thread;
-
-	thread = current_thread();
-
-	if (thread == 0)
-	    panic("syscall error in boot phase");
-
-	if (!USER_MODE(ssp->save_srr1))
-		panic("system call called from kernel");
-
-	doexception(exception, code, subcode);
-
-	return 0;
-}
-
-/* Pass up a server syscall/exception */
-void
-doexception(
-	    int exc,
-	    mach_exception_code_t code,
-	    mach_exception_subcode_t sub)
-{
-	mach_exception_data_type_t   codes[EXCEPTION_CODE_MAX];
-
-	codes[0] = code;	
-	codes[1] = sub;
-	exception_triage(exc, codes, 2);
-}
-
-const char *trap_type[] = {
-	"Unknown",
-	"0x100 - System reset",
-	"0x200 - Machine check",
-	"0x300 - Data access",
-	"0x400 - Inst access",
-	"0x500 - Ext int",
-	"0x600 - Alignment",
-	"0x700 - Program",
-	"0x800 - Floating point",
-	"0x900 - Decrementer",
-	"0xA00 - n/a",
-	"0xB00 - n/a",
-	"0xC00 - System call",
-	"0xD00 - Trace",
-	"0xE00 - FP assist",
-	"0xF00 - Perf mon",
-	"0xF20 - VMX",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"0x1300 - Inst bkpnt",
-	"0x1400 - Sys mgmt",
-	"0x1600 - Altivec Assist",
-	"0x1700 - Thermal",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"INVALID EXCEPTION",
-	"Emulate",
-	"0x2000 - Run Mode/Trace",
-	"Signal Processor",
-	"Preemption",
-	"Context Switch",
-	"Shutdown",
-	"System Failure"
-};
-int TRAP_TYPES = sizeof (trap_type) / sizeof (trap_type[0]);
-
-void unresolved_kernel_trap(int trapno,
-			    struct savearea *ssp,
-			    __unused unsigned int dsisr,
-			    addr64_t dar,
-			    const char *message)
-{
-	const char *trap_name;
-
-	ml_set_interrupts_enabled(FALSE);					/* Turn off interruptions */
-	lastTrace = LLTraceSet(0);							/* Disable low-level tracing */
-	
-#if 0
-	{
-		struct per_proc_info *pp;
-		kprintf("  srr0: %016llX\n", ssp->save_srr0);	/* (TEST/DEBUG) */
-		kprintf("  srr1: %016llX\n", ssp->save_srr1);	/* (TEST/DEBUG) */
-		kprintf("   dar: %016llX\n", ssp->save_dar);	/* (TEST/DEBUG) */
-		kprintf("   xcp: %08X\n", ssp->save_exception);	/* (TEST/DEBUG) */
-		kprintf("  ins0: %08X\n", ssp->save_instr[0]);	/* (TEST/DEBUG) */
-		kprintf("  ins1: %08X\n", ssp->save_instr[1]);	/* (TEST/DEBUG) */
-		kprintf("  ins2: %08X\n", ssp->save_instr[2]);	/* (TEST/DEBUG) */
-		kprintf("  ins3: %08X\n", ssp->save_instr[3]);	/* (TEST/DEBUG) */
-		kprintf("  ins4: %08X\n", ssp->save_instr[4]);	/* (TEST/DEBUG) */
-		kprintf("  ins5: %08X\n", ssp->save_instr[5]);	/* (TEST/DEBUG) */
-		kprintf("  ins6: %08X\n", ssp->save_instr[6]);	/* (TEST/DEBUG) */
-		kprintf("  ins7: %08X\n", ssp->save_instr[7]);	/* (TEST/DEBUG) */
-		pp = getPerProc();								/* (TEST/DEBUG) */
-		kprintf("ijsave: %016llX\n", pp->ijsave);		/* (TEST/DEBUG) */
-	}
-#endif
-
-	if( logPanicDataToScreen )
-		disable_debug_output = FALSE;
-
-	debug_mode++;
-	if ((unsigned)trapno <= T_MAX)
-		trap_name = trap_type[trapno / T_VECTOR_SIZE];
-	else
-		trap_name = "???? unrecognized exception";
-	if (message == NULL)
-		message = trap_name;
-
-	kdb_printf("\n\nUnresolved kernel trap(cpu %d): %s DAR=0x%016llX PC=0x%016llX\n",
-	       cpu_number(), trap_name, dar, ssp->save_srr0);
-
-	print_backtrace(ssp);
-
-	panic_caller = (0xFFFF0000 | (trapno / T_VECTOR_SIZE) );
-	/* Commit the panic log buffer to NVRAM, unless otherwise
-	 * specified via a boot-arg.
-	 */
-	if (panicDebugging)
-		commit_paniclog();
-
-	draw_panic_dialog();
-	/* XXX: This is yet another codepath into the debugger, which should
-	 * be reworked to enter the primary panic codepath instead.
-	 * The idea appears to be to enter the debugger (performing a
-	 * stack switch) as soon as possible, but we do have a 
-	 * savearea encapsulating state (accessible by walking the savearea
-	 * chain), so that's superfluous.
-	 */
-	if( panicDebugging )
-		(void)Call_Debugger(trapno, ssp);
-	panic_plain("%s", message);
-}
-
-const char *corr[2] = {"uncorrected", "corrected  "};
-
-void handleMck(struct savearea *ssp) {					/* Common machine check handler */
-
-	int cpu;
-	
-	cpu = cpu_number();
-
-	printf("Machine check (%d) - %s - pc = %016llX, msr = %016llX, dsisr = %08X, dar = %016llX\n",
-		cpu, corr[ssp->save_hdr.save_misc3], ssp->save_srr0, ssp->save_srr1, ssp->save_dsisr, ssp->save_dar);		/* Tell us about it */
-	printf("Machine check (%d) -   AsyncSrc = %016llX, CoreFIR = %016llx\n", cpu, ssp->save_xdat0, ssp->save_xdat1);
-	printf("Machine check (%d) -      L2FIR = %016llX,  BusFir = %016llx\n", cpu, ssp->save_xdat2, ssp->save_xdat3);
-	
-	if(ssp->save_hdr.save_misc3) return;				/* Leave the the machine check was recovered */
-
-	panic("Uncorrectable machine check: pc = %016llX, msr = %016llX, dsisr = %08X, dar = %016llX\n"
-	      "  AsyncSrc = %016llX, CoreFIR = %016llx\n"
-	      "     L2FIR = %016llX,  BusFir = %016llx\n",
-		  ssp->save_srr0, ssp->save_srr1, ssp->save_dsisr, ssp->save_dar, 
-		  ssp->save_xdat0, ssp->save_xdat1, ssp->save_xdat2, ssp->save_xdat3);
-	
-	return;
-}
-
-void
-thread_syscall_return(
-        kern_return_t ret)
-{
-        register thread_t   thread = current_thread();
-        register struct savearea *regs = USER_REGS(thread);
-
-	if (kdebug_enable && ((unsigned int)regs->save_r0 & 0x80000000)) {
-	  /* Mach trap */
-	  KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(-(regs->save_r0))) | DBG_FUNC_END,
-		       ret, 0, 0, 0, 0);
-	}	    
-        regs->save_r3 = ret;
-
-        thread_exception_return();
-        /*NOTREACHED*/
-}
-
-
-#if	MACH_KDB
-void
-thread_kdb_return(void)
-{
-	register thread_t	thread = current_thread();
-	register struct savearea *regs = USER_REGS(thread);
-
-	Call_Debugger(thread->machine.pcb->save_exception, regs);
-	thread_exception_return();
-	/*NOTREACHED*/
-}
-#endif	/* MACH_KDB */
diff --git a/osfmk/ppc/trap.h b/osfmk/ppc/trap.h
deleted file mode 100644
index 2a4a33ca8..000000000
--- a/osfmk/ppc/trap.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/* 
- * Mach Operating System
- * Copyright (c) 1991,1990 Carnegie Mellon University
- * All Rights Reserved.
- * 
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- * 
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
- * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- * 
- * Carnegie Mellon requests users of this software to return to
- * 
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- * 
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-
-#ifndef	_PPC_TRAP_H_
-#define	_PPC_TRAP_H_
-
-/* maximum number of arguments to a syscall trap */
-#define NARGS	12
-/* Size to reserve in frame for arguments - first 8 are in registers */
-#define ARG_SIZE FM_ALIGN((NARGS-8)*4)
-#define MUNGE_ARGS_SIZE FM_ALIGN(8*8)
-
-/*
- * Hardware exception vectors for powerpc are in exception.h
- */
-
-#ifndef	ASSEMBLER
-
-#include <mach/thread_status.h>
-#include <mach/boolean.h>
-#include <mach/kern_return.h>
-#include <ppc/thread.h>
-
-extern void			doexception(int exc, mach_exception_code_t code,
-					mach_exception_subcode_t sub);
-
-extern struct savearea*	trap(int trapno,
-				     struct savearea *ss,
-				     unsigned int dsisr,
-				     addr64_t dar);
-
-typedef kern_return_t (*perfCallback)(int trapno, struct savearea *ss, 
-	unsigned int dsisr, addr64_t dar);
-
-extern volatile perfCallback perfTrapHook;
-extern volatile perfCallback perfASTHook;
-extern volatile perfCallback perfIntHook;
-
-extern struct savearea* interrupt(int intno,
-					 struct savearea *ss,
-					 unsigned int dsisr,
-					 unsigned int dar);
-
-extern int			syscall_error(int exception,
-					      int64_t code,
-					      int64_t subcode,
-					      struct savearea *ss);
-
-
-#endif	/* ASSEMBLER */
-
-#endif	/* _PPC_TRAP_H_ */
diff --git a/osfmk/ppc/vm_tuning.h b/osfmk/ppc/vm_tuning.h
deleted file mode 100644
index 6cf00baeb..000000000
--- a/osfmk/ppc/vm_tuning.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-
-#ifndef _MACHINE_VM_TUNING_H_
-#define _MACHINE_VM_TUNING_H_
-
-#endif /* _MACHINE_VM_TUNING_H_ */
diff --git a/osfmk/ppc/vmachmon.c b/osfmk/ppc/vmachmon.c
deleted file mode 100644
index f8d7caac6..000000000
--- a/osfmk/ppc/vmachmon.c
+++ /dev/null
@@ -1,2024 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*-----------------------------------------------------------------------
-** vmachmon.c
-**
-** C routines that we are adding to the MacOS X kernel.
-**
------------------------------------------------------------------------*/
-
-#include <mach/mach_types.h>
-#include <mach/kern_return.h>
-#include <mach/host_info.h>
-#include <kern/kern_types.h>
-#include <kern/kalloc.h>
-#include <kern/host.h>
-#include <kern/task.h>
-#include <kern/thread.h>
-#include <ppc/exception.h>
-#include <ppc/mappings.h>
-#include <ppc/thread.h>
-#include <ppc/savearea.h>
-#include <ppc/misc_protos.h>
-#include <ppc/fpu_protos.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_fault.h>
-
-#include <ppc/vmachmon.h>
-#include <ppc/lowglobals.h>
-
-extern double FloatInit;
-extern unsigned long QNaNbarbarian[4];
-
-/*************************************************************************************
-	Virtual Machine Monitor Internal Routines
-**************************************************************************************/
-
-/*-----------------------------------------------------------------------
-** vmm_get_entry
-**
-** This function verifies and return a vmm context entry index
-**
-** Inputs:
-**		act - pointer to current thread activation
-**		index - index into vmm control table (this is a "one based" value)
-**
-** Outputs:
-**		address of a vmmCntrlEntry or 0 if not found
------------------------------------------------------------------------*/
-
-static vmmCntrlEntry *vmm_get_entry(
-	thread_t			act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlTable *CTable;
-	vmmCntrlEntry *CEntry;
-
-	index = index & vmmTInum;								/* Clean up the index */
-
-	if (act->machine.vmmControl == 0) return NULL;			/* No control table means no vmm */
-	if ((index - 1) >= kVmmMaxContexts) return NULL;		/* Index not in range */	
-
-	CTable = act->machine.vmmControl;						/* Make the address a bit more convienient */
-	CEntry = &CTable->vmmc[index - 1];						/* Point to the entry */
-	
-	if (!(CEntry->vmmFlags & vmmInUse)) return NULL;		/* See if the slot is actually in use */
-	
-	return CEntry;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_get_adsp
-**
-** This function verifies and returns the pmap for an address space.
-** If there is none and the request is valid, a pmap will be created.
-**
-** Inputs:
-**		act - pointer to current thread activation
-**		index - index into vmm control table (this is a "one based" value)
-**
-** Outputs:
-**		address of a pmap or 0 if not found or could no be created
-**		Note that if there is no pmap for the address space it will be created.
------------------------------------------------------------------------*/
-
-static pmap_t vmm_get_adsp(thread_t act, vmm_thread_index_t index)
-{
-	pmap_t pmap;
-
-	if (act->machine.vmmControl == 0) return NULL;			/* No control table means no vmm */
-	if ((index - 1) >= kVmmMaxContexts) return NULL;		/* Index not in range */	
-
-	pmap = act->machine.vmmControl->vmmAdsp[index - 1];		/* Get the pmap */
-	return (pmap);											/*  and return it. */
-}
-
-/*-----------------------------------------------------------------------
-** vmm_build_shadow_hash
-**
-** Allocate and initialize a shadow hash table.
-**
-** This function assumes that PAGE_SIZE is 4k-bytes.
-**
------------------------------------------------------------------------*/
-static pmap_vmm_ext *vmm_build_shadow_hash(pmap_t pmap)
-{
-	pmap_vmm_ext   *ext;									/* VMM pmap extension we're building */
-	ppnum_t			extPP;									/* VMM pmap extension physical page number */
-	kern_return_t	ret;									/* Return code from various calls */
-	uint32_t		pages = GV_HPAGES;						/* Number of pages in the hash table */
-	vm_offset_t		free = VMX_HPIDX_OFFSET;				/* Offset into extension page of free area (128-byte aligned) */
-	uint32_t		freeSize  = PAGE_SIZE - free;			/* Number of free bytes in the extension page */
-	uint32_t	idx;
-															
-	if ((pages * sizeof(addr64_t)) + (pages * sizeof(vm_offset_t)) > freeSize) {
-		panic("vmm_build_shadow_hash: too little pmap_vmm_ext free space\n");
-	}
-	
-	ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&ext, PAGE_SIZE);
-															/* Allocate a page-sized extension block */
-	if (ret != KERN_SUCCESS) return (NULL);					/* Return NULL for failed allocate */
-	bzero((char *)ext, PAGE_SIZE);							/* Zero the entire extension block page */
-	
-	extPP = pmap_find_phys(kernel_pmap, (vm_offset_t)ext);
-															/* Get extension block's physical page number */
-	if (!extPP) {											/* This should not fail, but then again... */
-		panic("vmm_build_shadow_hash: could not translate pmap_vmm_ext vaddr %p\n", ext);
-	}
-	
-	ext->vmxSalt	     = (addr64_t)(vm_offset_t)ext ^ ptoa_64(extPP);
-															/* Set effective<->physical conversion salt */
-	ext->vmxHostPmapPhys = (addr64_t)(vm_offset_t)pmap ^ pmap->pmapvr;
-															/* Set host pmap's physical address */
-	ext->vmxHostPmap     = pmap;							/* Set host pmap's effective address */
-	ext->vmxHashPgIdx    = (addr64_t *)((vm_offset_t)ext + VMX_HPIDX_OFFSET);
-															/* Allocate physical index */
-	ext->vmxHashPgList	 = (vm_offset_t *)((vm_offset_t)ext + VMX_HPLIST_OFFSET);
-															/* Allocate page list */
-	ext->vmxActiveBitmap = (vm_offset_t *)((vm_offset_t)ext + VMX_ACTMAP_OFFSET);
-															/* Allocate active mapping bitmap */
-	
-	/* The hash table is typically larger than a single page, but we don't require it to be in a
-	   contiguous virtual or physical chunk. So, we allocate it page by page, noting the effective and
-	   physical address of each page in vmxHashPgList and vmxHashPgIdx, respectively. */
-	for (idx = 0; idx < pages; idx++) {
-		mapping_t *map;
-		uint32_t mapIdx;
-		ret = kmem_alloc_kobject(kernel_map, &ext->vmxHashPgList[idx], PAGE_SIZE);
-															/* Allocate a hash-table page */
-		if (ret != KERN_SUCCESS) goto fail;					/* Allocation failed, exit through cleanup */
-		bzero((char *)ext->vmxHashPgList[idx], PAGE_SIZE);	/* Zero the page */
-		ext->vmxHashPgIdx[idx] = ptoa_64(pmap_find_phys(kernel_pmap, (addr64_t)ext->vmxHashPgList[idx]));
-															/* Put page's physical address into index */
-		if (!ext->vmxHashPgIdx[idx]) {						/* Hash-table page's LRA failed */
-			panic("vmm_build_shadow_hash: could not translate hash-table vaddr %08X\n", ext->vmxHashPgList[idx]);
-		}
-		map = (mapping_t *)ext->vmxHashPgList[idx];
-		for (mapIdx = 0; mapIdx < GV_SLTS_PPG; mapIdx++) {	/* Iterate over mappings in this page */
-			map->mpFlags = (mpGuest | mpgFree);				/* Mark guest type and free */
-			map = (mapping_t *)((char *)map + GV_SLOT_SZ);	/* Next slot-sized mapping */
-		}
-	}
-	
-	return (ext);											/* Return newly-minted VMM pmap extension */
-	
-fail:
-	for (idx = 0; idx < pages; idx++) {						/* De-allocate any pages we managed to allocate */
-		if (ext->vmxHashPgList[idx]) {
-			kmem_free(kernel_map, ext->vmxHashPgList[idx], PAGE_SIZE);
-		}
-	}
-	kmem_free(kernel_map, (vm_offset_t)ext, PAGE_SIZE);		/* Release the VMM pmap extension page */
-	return (NULL);											/* Return NULL for failure */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_release_shadow_hash
-**
-** Release shadow hash table and VMM extension block
-**
------------------------------------------------------------------------*/
-static void vmm_release_shadow_hash(pmap_vmm_ext *ext)
-{
-	uint32_t		idx;
-
-	for (idx = 0; idx < GV_HPAGES; idx++) {					/* Release the hash table page by page */
-		kmem_free(kernel_map, ext->vmxHashPgList[idx], PAGE_SIZE);
-	}
-
-	kmem_free(kernel_map, (vm_offset_t)ext, PAGE_SIZE);		/* Release the VMM pmap extension page */
-}
-
-/*-----------------------------------------------------------------------
-** vmm_activate_gsa
-**
-** Activate guest shadow assist
-**
------------------------------------------------------------------------*/
-static kern_return_t vmm_activate_gsa(
-	thread_t			act,
-	vmm_thread_index_t	index)
-{
-	vmmCntrlTable	*CTable = act->machine.vmmControl;		/* Get VMM control table */
-	vmmCntrlEntry *CEntry;
-	pmap_t hpmap;
-	pmap_t gpmap;
-	if (!CTable) {											/* Caller guarantees that this will work */
-		panic("vmm_activate_gsa: VMM control table not present; act = %p, idx = %lu\n",
-			act, index);
-		return KERN_FAILURE;
-	}
-	CEntry = vmm_get_entry(act, index);	/* Get context from index */
-	if (!CEntry) {											/* Caller guarantees that this will work */
-		panic("vmm_activate_gsa: Unexpected failure of vmm_get_entry; act = %p, idx = %lu\n",
-			act, index);
-		return KERN_FAILURE;
-	}
-
-	hpmap = act->map->pmap;							/* Get host pmap */
-	gpmap = vmm_get_adsp(act, index);				/* Get guest pmap */
-	if (!gpmap) {											/* Caller guarantees that this will work */
-		panic("vmm_activate_gsa: Unexpected failure of vmm_get_adsp; act = %p, idx = %lu\n",
-			act, index);
-		return KERN_FAILURE;
-	}
-	
-	if (!hpmap->pmapVmmExt) {								/* If there's no VMM extension for this host, create one */
-		hpmap->pmapVmmExt = vmm_build_shadow_hash(hpmap);	/* Build VMM extension plus shadow hash and attach */
-		if (hpmap->pmapVmmExt) {							/* See if we succeeded */
-			hpmap->pmapVmmExtPhys = (addr64_t)(vm_offset_t)hpmap->pmapVmmExt ^ hpmap->pmapVmmExt->vmxSalt;
-															/* Get VMM extensions block physical address */
-		} else {
-			return KERN_RESOURCE_SHORTAGE;					/* Not enough mojo to go */
-		}
-	}
-	gpmap->pmapVmmExt = hpmap->pmapVmmExt;					/* Copy VMM extension block virtual address into guest */
-	gpmap->pmapVmmExtPhys = hpmap->pmapVmmExtPhys;			/*  and its physical address, too */
-	gpmap->pmapFlags |= pmapVMgsaa;							/* Enable GSA for this guest */
-	CEntry->vmmXAFlgs |= vmmGSA;							/* Show GSA active here, too */
-
-	return KERN_SUCCESS;
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_deactivate_gsa
-**
-** Deactivate guest shadow assist
-**
------------------------------------------------------------------------*/
-static void
-vmm_deactivate_gsa(
-	thread_t			act,
-	vmm_thread_index_t	index)
-{
-	vmmCntrlEntry	*CEntry = vmm_get_entry(act, index);	/* Get context from index */
-	pmap_t	gpmap;
-	if (!CEntry) {											/* Caller guarantees that this will work */
-		panic("vmm_deactivate_gsa: Unexpected failure of vmm_get_entry; act = %p, idx = %lu\n",
-			act, index);
-	}
-
-	gpmap = vmm_get_adsp(act, index);				/* Get guest pmap */
-	if (!gpmap) {											/* Caller guarantees that this will work */
-		panic("vmm_deactivate_gsa: Unexpected failure of vmm_get_adsp; act = %p, idx = %lu\n",
-			act, index);
-	}
-	
-	gpmap->pmapFlags &= ~pmapVMgsaa;						/* Deactivate GSA for this guest */
-	CEntry->vmmXAFlgs &= ~vmmGSA;							/* Show GSA deactivated here, too */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_flush_context
-**
-** Flush specified guest context, purging all guest mappings and clearing
-** the context page.
-**
------------------------------------------------------------------------*/
-static void vmm_flush_context(
-	thread_t			act,
-	vmm_thread_index_t	index)
-{
-	vmmCntrlEntry 		*CEntry;
-	vmmCntrlTable		*CTable;
-	vmm_state_page_t 	*vks;
-	vmm_version_t 		version;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */
-	if (!CEntry) {									/* Caller guarantees that this will work */
-		panic("vmm_flush_context: Unexpected failure of vmm_get_entry; act = %p, idx = %lu\n",
-			act, index);
-		return;
-	}
-
-	if(CEntry->vmmFacCtx.FPUsave) {					/* Is there any floating point context? */
-		toss_live_fpu(&CEntry->vmmFacCtx);			/* Get rid of any live context here */
-		save_release((struct savearea *)CEntry->vmmFacCtx.FPUsave);	/* Release it */
-	}
-
-	if(CEntry->vmmFacCtx.VMXsave) {					/* Is there any vector context? */
-		toss_live_vec(&CEntry->vmmFacCtx);			/* Get rid of any live context here */
-		save_release((struct savearea *)CEntry->vmmFacCtx.VMXsave);	/* Release it */
-	}
-	
-	vmm_unmap_all_pages(act, index);				/* Blow away all mappings for this context */
-
-	CTable = act->machine.vmmControl;				/* Get the control table address */
-	CTable->vmmGFlags = CTable->vmmGFlags & ~vmmLastAdSp;	/* Make sure we don't try to automap into this */
-	
-	CEntry->vmmFlags &= vmmInUse;					/* Clear out all of the flags for this entry except in use */
-	CEntry->vmmFacCtx.FPUsave = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.FPUlevel = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.FPUcpu = 0;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXsave = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXlevel = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXcpu = 0;					/* Clear facility context control */
-	
-	vks = CEntry->vmmContextKern;					/* Get address of the context page */
-	version = vks->interface_version;				/* Save the version code */
-	bzero((char *)vks, 4096);						/* Clear all */
-
-	vks->interface_version = version;				/* Set our version code */
-	vks->thread_index = index % vmmTInum;			/* Tell the user the index for this virtual machine */
-		
-	/* Context is now flushed */
-}
-
-
-/*************************************************************************************
-	Virtual Machine Monitor Exported Functionality
-	
-	The following routines are used to implement a quick-switch mechanism for
-	virtual machines that need to execute within their own processor envinroment
-	(including register and MMU state).
-**************************************************************************************/
-
-/*-----------------------------------------------------------------------
-** vmm_get_version
-**
-** This function returns the current version of the virtual machine
-** interface. It is divided into two portions. The top 16 bits
-** represent the major version number, and the bottom 16 bits
-** represent the minor version number. Clients using the Vmm
-** functionality should make sure they are using a verison new
-** enough for them.
-**
-** Inputs:
-**		none
-**
-** Outputs:
-**		32-bit number representing major/minor version of 
-**				the Vmm module
------------------------------------------------------------------------*/
-
-int vmm_get_version(struct savearea *save)
-{
-	save->save_r3 = kVmmCurrentVersion;		/* Return the version */
-	return 1;
-}
-
-
-/*-----------------------------------------------------------------------
-** Vmm_get_features
-**
-** This function returns a set of flags that represents the functionality
-** supported by the current verison of the Vmm interface. Clients should
-** use this to determine whether they can run on this system.
-**
-** Inputs:
-**		none
-**
-** Outputs:
-**		32-bit number representing functionality supported by this
-**				version of the Vmm module
------------------------------------------------------------------------*/
-
-int vmm_get_features(struct savearea *save)
-{
-	save->save_r3 = kVmmCurrentFeatures;		/* Return the features */
-	if(getPerProc()->pf.Available & pf64Bit) {
-		save->save_r3 &= ~kVmmFeature_LittleEndian;	/* No little endian here */
-		save->save_r3 |= kVmmFeature_SixtyFourBit;	/* Set that we can do 64-bit */
-	}
-	return 1;
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_max_addr
-**
-** This function returns the maximum addressable virtual address sported
-**
-** Outputs:
-**		Returns max address
------------------------------------------------------------------------*/
-
-addr64_t
-vmm_max_addr(__unused thread_t act) 
-{
-	return vm_max_address;							/* Return the maximum address */
-}
-
-/*-----------------------------------------------------------------------
-** vmm_get_XA
-**
-** This function retrieves the eXtended Architecture flags for the specifed VM.
-** 
-** We need to return the result in the return code rather than in the return parameters
-** because we need an architecture independent format so the results are actually 
-** usable by the host. For example, the return parameters for 64-bit are 8 bytes wide vs.
-** 4 for 32-bit. 
-** 
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**
-** Outputs:
-**		Return code is set to the XA flags.  If the index is invalid or the
-**		context has not been created, we return 0.
------------------------------------------------------------------------*/
-
-unsigned int vmm_get_XA(
-	thread_t	 		act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlEntry 		*CEntry;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return 0;					/* Either this isn't a vmm or the index is bogus */
-	
-	return CEntry->vmmXAFlgs;						/* Return the flags */
-}
-
-/*-----------------------------------------------------------------------
-** vmm_init_context
-**
-** This function initializes an  emulation context. It allocates
-** a new pmap (address space) and fills in the initial processor
-** state within the specified structure. The structure, mapped
-** into the client's logical address space, must be page-aligned.
-**
-** Inputs:
-**		act - pointer to current thread activation
-**		version - requested version of the Vmm interface (allowing
-**			future versions of the interface to change, but still
-**			support older clients)
-**		vmm_user_state - pointer to a logical page within the
-**			client's address space
-**
-** Outputs:
-**		kernel return code indicating success or failure
------------------------------------------------------------------------*/
-
-int vmm_init_context(struct savearea *save)
-{
-
-	thread_t			act;
-	vmm_version_t 		version;
-	vmm_state_page_t *	vmm_user_state;
-	vmmCntrlTable		*CTable;
-	vm_offset_t			conkern;
-	vmm_state_page_t *	vks;
-	ppnum_t				conphys;
-	kern_return_t 		ret;
-	int					cvi, i;
-    task_t				task;
-    thread_t			fact, gact;
-	pmap_t hpmap;
-	pmap_t gpmap;
-
-	vmm_user_state = CAST_DOWN(vmm_state_page_t *, save->save_r4);  /* Get the user address of the comm area */
-	if ((unsigned int)vmm_user_state & (PAGE_SIZE - 1)) {	/* Make sure the comm area is page aligned */
-		save->save_r3 = KERN_FAILURE;			/* Return failure */
-		return 1;
-	}
-
-	/* Make sure that the version requested is supported */
-	version = save->save_r3;					/* Pick up passed in version */
-	if (((version >> 16) < kVmmMinMajorVersion) || ((version >> 16) > (kVmmCurrentVersion >> 16))) {
-		save->save_r3 = KERN_FAILURE;			/* Return failure */
-		return 1;
-	}
-
-	if((version & 0xFFFF) > kVmmCurMinorVersion) {	/* Check for valid minor */
-		save->save_r3 = KERN_FAILURE;			/* Return failure */
-		return 1;
-	}
-
-	act = current_thread();						/* Pick up our activation */
-	
-	ml_set_interrupts_enabled(TRUE);			/* This can take a bit of time so pass interruptions */
-	
-	task = current_task();						/* Figure out who we are */
-
-	task_lock(task);							/* Lock our task */
-
-	fact = (thread_t)task->threads.next;	/* Get the first activation on task */
-	gact = NULL;									/* Pretend we didn't find it yet */
-
-	for(i = 0; i < task->thread_count; i++) {	/* All of the activations */
-		if(fact->machine.vmmControl) {				/* Is this a virtual machine monitor? */
-			gact = fact;						/* Yeah... */
-			break;								/* Bail the loop... */
-		}
-		fact = (thread_t)fact->task_threads.next;	/* Go to the next one */
-	}
-	
-
-/*
- *	We only allow one thread per task to be a virtual machine monitor right now.  This solves
- *	a number of potential problems that I can't put my finger on right now.
- *
- *	Utlimately, I think we want to move the controls and make all this task based instead of
- *	thread based.  That would allow an emulator architecture to spawn a kernel thread for each
- *	VM (if they want) rather than hand dispatch contexts.
- */
-
-	if(gact && (gact != act)) {					/* Check if another thread is a vmm or trying to be */
-		task_unlock(task);						/* Release task lock */
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		save->save_r3 = KERN_FAILURE;			/* We must play alone... */
-		return 1;
-	}
-	
-	if(!gact) act->machine.vmmControl = (vmmCntrlTable *)1;	/* Temporarily mark that we are the vmm thread */
-
-	task_unlock(task);							/* Safe to release now (because we've marked ourselves) */
-
-	CTable = act->machine.vmmControl;				/* Get the control table address */
-	if ((unsigned int)CTable == 1) {			/* If we are marked, try to allocate a new table, otherwise we have one */
-		if(!(CTable = (vmmCntrlTable *)kalloc(sizeof(vmmCntrlTable)))) {	/* Get a fresh emulation control table */
-			act->machine.vmmControl = NULL;			/* Unmark us as vmm 'cause we failed */
-			ml_set_interrupts_enabled(FALSE);	/* Set back interruptions */
-			save->save_r3 = KERN_RESOURCE_SHORTAGE;		/* No storage... */
-			return 1;
-		}
-		
-		bzero((void *)CTable, sizeof(vmmCntrlTable));	/* Clean it up */
-		act->machine.vmmControl = CTable;			/* Initialize the table anchor */
-	}
-
-	for(cvi = 0; cvi < kVmmMaxContexts; cvi++) {	/* Search to find a free slot */
-		if(!(CTable->vmmc[cvi].vmmFlags & vmmInUse)) break;	/* Bail if we find an unused slot */
-	}
-	
-	if(cvi >= kVmmMaxContexts) {				/* Did we find one? */
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		save->save_r3 = KERN_RESOURCE_SHORTAGE;	/* No empty slots... */	
-		return 1;
-	}
-
-	ret = vm_map_wire(							/* Wire the virtual machine monitor's context area */
-		act->map,
-		(vm_offset_t)vmm_user_state,
-		(vm_offset_t)vmm_user_state + PAGE_SIZE,
-		VM_PROT_READ | VM_PROT_WRITE,
-		FALSE);															
-		
-	if (ret != KERN_SUCCESS) 					/* The wire failed, return the code */
-		goto return_in_shame;
-
-	/* Map the vmm state into the kernel's address space. */
-	conphys = pmap_find_phys(act->map->pmap, (addr64_t)((uintptr_t)vmm_user_state));
-
-	/* Find a virtual address to use. */
-	ret = kmem_alloc_pageable(kernel_map, &conkern, PAGE_SIZE);
-	if (ret != KERN_SUCCESS) {					/* Did we find an address? */
-		(void) vm_map_unwire(act->map,			/* No, unwire the context area */
-			(vm_offset_t)vmm_user_state,
-			(vm_offset_t)vmm_user_state + PAGE_SIZE,
-			TRUE);
-		goto return_in_shame;
-	}
-	
-	/* Map it into the kernel's address space. */
-
-	pmap_enter(kernel_pmap, conkern, conphys, 
-		VM_PROT_READ | VM_PROT_WRITE, 
-		VM_WIMG_USE_DEFAULT, TRUE);
-	
-	/* Clear the vmm state structure. */
-	vks = (vmm_state_page_t *)conkern;
-	bzero((char *)vks, PAGE_SIZE);
-	
-	
-	/* We're home free now. Simply fill in the necessary info and return. */
-	
-	vks->interface_version = version;			/* Set our version code */
-	vks->thread_index = cvi + 1;				/* Tell the user the index for this virtual machine */
-	
-	CTable->vmmc[cvi].vmmFlags = vmmInUse;		/* Mark the slot in use and make sure the rest are clear */
-	CTable->vmmc[cvi].vmmContextKern = vks;		/* Remember the kernel address of comm area */
-	CTable->vmmc[cvi].vmmContextPhys = conphys;	/* Remember the state page physical addr */
-	CTable->vmmc[cvi].vmmContextUser = vmm_user_state;		/* Remember user address of comm area */
-	
-	CTable->vmmc[cvi].vmmFacCtx.FPUsave = NULL;	/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.FPUlevel = NULL;	/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.FPUcpu = 0;		/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.VMXsave = NULL;	/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.VMXlevel = NULL;	/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.VMXcpu = 0;		/* Clear facility context control */
-	CTable->vmmc[cvi].vmmFacCtx.facAct = act;	/* Point back to the activation */
-
-	(void)hw_atomic_add(&saveanchor.savetarget, 2);	/* Account for the number of extra saveareas we think we might "need" */
-
-	hpmap = act->map->pmap;						/* Get host pmap */
-	gpmap = pmap_create(0, FALSE);					/* Make a fresh guest pmap */
-	if (gpmap) {										/* Did we succeed ? */
-		CTable->vmmAdsp[cvi] = gpmap;					/* Remember guest pmap for new context */
-		if (lowGlo.lgVMMforcedFeats & vmmGSA) {			/* Forcing on guest shadow assist ? */
-			vmm_activate_gsa(act, cvi+1);				/* Activate GSA */ 
-		}
-	} else {
-		ret = KERN_RESOURCE_SHORTAGE;					/* We've failed to allocate a guest pmap */
-		goto return_in_shame;							/* Shame on us. */
-	}
-
-	if (!(hpmap->pmapFlags & pmapVMhost)) {				/* Do this stuff if this is our first time hosting */
-		hpmap->pmapFlags |= pmapVMhost;					/* We're now hosting */
-	}
-	
-	ml_set_interrupts_enabled(FALSE);			/* Set back interruptions */
-	save->save_r3 = KERN_SUCCESS;				/* Hip, hip, horay... */	
-	return 1;
-
-return_in_shame:
-	if(!gact) kfree(CTable, sizeof(vmmCntrlTable));	/* Toss the table if we just allocated it */
-	act->machine.vmmControl = NULL;					/* Unmark us as vmm 'cause we failed */
-	ml_set_interrupts_enabled(FALSE);			/* Set back interruptions */
-	save->save_r3 = ret;						/* Pass back return code... */	
-	return 1;
-
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_tear_down_context
-**
-** This function uninitializes an emulation context. It deallocates
-** internal resources associated with the context block.
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**
-** Outputs:
-**		kernel return code indicating success or failure
-**
-** Strangeness note:
-**		This call will also trash the address space with the same ID.  While this 
-**		is really not too cool, we have to do it because we need to make
-**		sure that old VMM users (not that we really have any) who depend upon 
-**		the address space going away with the context still work the same.
------------------------------------------------------------------------*/
-
-kern_return_t vmm_tear_down_context(
-	thread_t	 		act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlEntry 		*CEntry;
-	vmmCntrlTable		*CTable;
-	int					cvi;
-	pmap_t gpmap;
-	pmap_t pmap;
-
-	CEntry = vmm_get_entry(act, index);					/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;			/* Either this isn't vmm thread or the index is bogus */
-
-	ml_set_interrupts_enabled(TRUE);					/* This can take a bit of time so pass interruptions */
-
-	(void)hw_atomic_sub(&saveanchor.savetarget, 2);	/* We don't need these extra saveareas anymore */
-
-	if(CEntry->vmmFacCtx.FPUsave) {						/* Is there any floating point context? */
-		toss_live_fpu(&CEntry->vmmFacCtx);				/* Get rid of any live context here */
-		save_release((struct savearea *)CEntry->vmmFacCtx.FPUsave);	/* Release it */
-	}
-
-	if(CEntry->vmmFacCtx.VMXsave) {						/* Is there any vector context? */
-		toss_live_vec(&CEntry->vmmFacCtx);				/* Get rid of any live context here */
-		save_release((struct savearea *)CEntry->vmmFacCtx.VMXsave);	/* Release it */
-	}
-	
-	CEntry->vmmPmap = NULL;								/* Remove this trace */
-	gpmap = act->machine.vmmControl->vmmAdsp[index - 1];
-														/* Get context's guest pmap (if any) */
-	if (gpmap) {										/* Check if there is an address space assigned here */
-		if (gpmap->pmapFlags & pmapVMgsaa) {			/* Handle guest shadow assist case specially */
-			hw_rem_all_gv(gpmap);						/* Remove all guest mappings from shadow hash table */
-		} else {
-			mapping_remove(gpmap, 0xFFFFFFFFFFFFF000LL);/* Remove final page explicitly because we might have mapped it */	
-			pmap_remove(gpmap, 0, 0xFFFFFFFFFFFFF000LL);/* Remove all entries from this map */
-		}
-		pmap_destroy(gpmap);							/* Toss the pmap for this context */
-		act->machine.vmmControl->vmmAdsp[index - 1] = NULL;	/* Clean it up */
-	}
-	
-	(void) vm_map_unwire(							/* Unwire the user comm page */
-		act->map,
-		(vm_offset_t)CEntry->vmmContextUser,
-		(vm_offset_t)CEntry->vmmContextUser + PAGE_SIZE,
-		FALSE);
-	
-	kmem_free(kernel_map, (vm_offset_t)CEntry->vmmContextKern, PAGE_SIZE);	/* Remove kernel's view of the comm page */
-	
-	CTable = act->machine.vmmControl;					/* Get the control table address */
-	CTable->vmmGFlags = CTable->vmmGFlags & ~vmmLastAdSp;	/* Make sure we don't try to automap into this */
-
-	CEntry->vmmFlags = 0;							/* Clear out all of the flags for this entry including in use */
-	CEntry->vmmContextKern = NULL;						/* Clear the kernel address of comm area */
-	CEntry->vmmContextUser = NULL;						/* Clear the user address of comm area */
-	
-	CEntry->vmmFacCtx.FPUsave = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.FPUlevel = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.FPUcpu = 0;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXsave = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXlevel = NULL;					/* Clear facility context control */
-	CEntry->vmmFacCtx.VMXcpu = 0;					/* Clear facility context control */
-	CEntry->vmmFacCtx.facAct = NULL;					/* Clear facility context control */
-	
-	for(cvi = 0; cvi < kVmmMaxContexts; cvi++) {	/* Search to find a free slot */
-		if(CTable->vmmc[cvi].vmmFlags & vmmInUse) {	/* Return if there are still some in use */
-			ml_set_interrupts_enabled(FALSE);		/* No more interruptions */
-			return KERN_SUCCESS;					/* Leave... */
-		}
-	}
-
-/*
- *	When we have tossed the last context, toss any address spaces left over before releasing
- *	the VMM control block 
- */
-
-	for(cvi = 1; cvi <= kVmmMaxContexts; cvi++) {	/* Look at all slots */
-		if(!act->machine.vmmControl->vmmAdsp[index - 1]) continue;	/* Nothing to remove here */
-		mapping_remove(act->machine.vmmControl->vmmAdsp[index - 1], 0xFFFFFFFFFFFFF000LL);	/* Remove final page explicitly because we might have mapped it */	
-		pmap_remove(act->machine.vmmControl->vmmAdsp[index - 1], 0, 0xFFFFFFFFFFFFF000LL);	/* Remove all entries from this map */
-		pmap_destroy(act->machine.vmmControl->vmmAdsp[index - 1]);	/* Toss the pmap for this context */
-		act->machine.vmmControl->vmmAdsp[index - 1] = NULL;	/* Clear just in case */
-	}
-	
-	pmap = act->map->pmap;					/* Get our pmap */
-	if (pmap->pmapVmmExt) {							/* Release any VMM pmap extension block and shadow hash table */
-		vmm_release_shadow_hash(pmap->pmapVmmExt);	/* Release extension block and shadow hash table */
-		pmap->pmapVmmExt     = NULL;					/* Forget extension block */
-		pmap->pmapVmmExtPhys = 0;					/* Forget extension block's physical address, too */
-	}
-	pmap->pmapFlags &= ~pmapVMhost;					/* We're no longer hosting */
-
-	kfree(CTable, sizeof(vmmCntrlTable));	/* Toss the table because to tossed the last context */
-	act->machine.vmmControl = NULL;						/* Unmark us as vmm */
-
-	ml_set_interrupts_enabled(FALSE);				/* No more interruptions */
-	
-	return KERN_SUCCESS;
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_activate_XA
-**
-** This function activates the eXtended Architecture flags for the specifed VM.
-** 
-** We need to return the result in the return code rather than in the return parameters
-** because we need an architecture independent format so the results are actually 
-** usable by the host. For example, the return parameters for 64-bit are 8 bytes wide vs.
-** 4 for 32-bit. 
-** 
-** Note that this function does a lot of the same stuff as vmm_tear_down_context
-** and vmm_init_context.
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**		flags - the extended architecture flags
-**		
-**
-** Outputs:
-**		KERN_SUCCESS if vm is valid and initialized. KERN_FAILURE if not.
-**		Also, the internal flags are set and, additionally, the VM is completely reset.
------------------------------------------------------------------------*/
-kern_return_t vmm_activate_XA(
-	thread_t	 		act,
-	vmm_thread_index_t 	index,
-	unsigned int xaflags)
-{
-	vmmCntrlEntry 		*CEntry;
-	kern_return_t		result	= KERN_SUCCESS;		/* Assume success */
-
-	if ((xaflags & ~kVmmSupportedSetXA) || ((xaflags & vmm64Bit) && (!getPerProc()->pf.Available & pf64Bit)))
-		return (KERN_FAILURE);						/* Unknown or unsupported feature requested */
-		
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't a vmm or the index is bogus */
-
-	ml_set_interrupts_enabled(TRUE);				/* This can take a bit of time so pass interruptions */
-	
-	vmm_flush_context(act, index);					/* Flush the context */
-
-	if (xaflags & vmm64Bit) {						/* Activating 64-bit mode ? */	
-		CEntry->vmmXAFlgs |= vmm64Bit;				/* Activate 64-bit mode */
-	}
-	
-	if (xaflags & vmmGSA) {							/* Activating guest shadow assist ? */
-		result = vmm_activate_gsa(act, index);		/* Activate guest shadow assist */
-	}
-	
-	ml_set_interrupts_enabled(FALSE);				/* No more interruptions */
-	
-	return result;									/* Return activate result */
-}
-
-/*-----------------------------------------------------------------------
-** vmm_deactivate_XA
-**
------------------------------------------------------------------------*/
-kern_return_t vmm_deactivate_XA(
-	thread_t	 		act,
-	vmm_thread_index_t 	index,
-	unsigned int xaflags)
-{
-	vmmCntrlEntry 		*CEntry;
-	kern_return_t		result	= KERN_SUCCESS;		/* Assume success */
-
-	if ((xaflags & ~kVmmSupportedSetXA) || ((xaflags & vmm64Bit) && (getPerProc()->pf.Available & pf64Bit)))
-		return (KERN_FAILURE);						/* Unknown or unsupported feature requested */
-		
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't a vmm or the index is bogus */
-
-	ml_set_interrupts_enabled(TRUE);				/* This can take a bit of time so pass interruptions */
-	
-	vmm_flush_context(act, index);					/* Flush the context */
-
-	if (xaflags & vmm64Bit) {						/* Deactivating 64-bit mode ? */	
-		CEntry->vmmXAFlgs &= ~vmm64Bit;				/* Deactivate 64-bit mode */
-	}
-	
-	if (xaflags & vmmGSA) {							/* Deactivating guest shadow assist ? */
-		vmm_deactivate_gsa(act, index);				/* Deactivate guest shadow assist */
-	}
-	
-	ml_set_interrupts_enabled(FALSE);				/* No more interruptions */
-	
-	return result;									/* Return deactivate result */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_tear_down_all
-**
-** This function uninitializes all emulation contexts. If there are
-** any vmm contexts, it calls vmm_tear_down_context for each one.
-**
-** Note: this can also be called from normal thread termination.  Because of
-** that, we will context switch out of an alternate if we are currenty in it.
-** It will be terminated with no valid return code set because we don't expect 
-** the activation to ever run again.
-**
-** Inputs:
-**		activation to tear down
-**
-** Outputs:
-**		All vmm contexts released and VMM shut down
------------------------------------------------------------------------*/
-void vmm_tear_down_all(thread_t act) {
-
-	vmmCntrlTable		*CTable;
-	int					cvi;
-	kern_return_t		ret;
-	struct savearea			*save;
-	spl_t				s;
-	
-	if(act->machine.specFlags & runningVM) {			/* Are we actually in a context right now? */
-		save = find_user_regs(act);					/* Find the user state context */
-		if(!save) {									/* Did we find it? */
-			panic("vmm_tear_down_all: runningVM marked but no user state context\n");
-			return;
-		}
-		
-		save->save_exception = kVmmBogusContext*4;	/* Indicate that this context is bogus now */
-		s = splhigh();								/* Make sure interrupts are off */
-		vmm_force_exit(act, save);					/* Force and exit from VM state */
-		splx(s);									/* Restore interrupts */
-	}
-	
-	if(act->machine.vmmControl) { /* Do we have a vmm control block? */
-		CTable = act->machine.vmmControl;
-		for(cvi = 1; cvi <= kVmmMaxContexts; cvi++) {	/* Look at all slots */
-			if(CTable->vmmc[cvi - 1].vmmFlags & vmmInUse) {	/* Is this one in use */
-				ret = vmm_tear_down_context(act, cvi);	/* Take down the found context */
-				if(ret != KERN_SUCCESS) {			/* Did it go away? */
-					panic("vmm_tear_down_all: vmm_tear_down_context failed; ret=%08X, act = %p, cvi = %d\n",
-					  ret, act, cvi);
-				}
-			}
-		}		
-
-/*
- *		Note that all address apces should be gone here.
- */
-		if(act->machine.vmmControl) {						/* Did we find one? */
-			panic("vmm_tear_down_all: control table did not get deallocated\n");	/* Table did not go away */
-		}
-	}
-}
-
-/*-----------------------------------------------------------------------
-** vmm_map_page
-**
-** This function maps a page from within the client's logical
-** address space into the alternate address space.
-**
-** The page need not be locked or resident.  If not resident, it will be faulted
-** in by this code, which may take some time.   Also, if the page is not locked,
-** it, and this mapping may disappear at any time, even before it gets used.  Note also
-** that reference and change information is NOT preserved when a page is unmapped, either
-** explicitly or implicitly (e.g., a pageout, being unmapped in the non-alternate address
-** space).  This means that if RC is needed, the page MUST be wired.
-**
-** Note that if there is already a mapping at the address, it is removed and all 
-** information (including RC) is lost BEFORE an attempt is made to map it. Also,
-** if the map call fails, the old address is still unmapped..
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of address space to map into
-**		va    - virtual address within the client's address
-**			    space
-**		ava   - virtual address within the alternate address
-**			    space
-**		prot - protection flags
-**
-**	Note that attempted mapping of areas in nested pmaps (shared libraries) or block mapped
-**  areas are not allowed and will fail. Same with directly mapped I/O areas.
-**
-** Input conditions:
-**      Interrupts disabled (from fast trap)
-**
-** Outputs:
-**		kernel return code indicating success or failure
-**      if success, va resident and alternate mapping made
------------------------------------------------------------------------*/
-
-kern_return_t vmm_map_page(
-	thread_t	 		act,
-	vmm_adsp_id_t	 	index,
-	addr64_t	 		cva,
-	addr64_t	 		ava,
-	vm_prot_t 			prot)
-{
-	kern_return_t		ret;
-	register mapping_t 	*mp;
-	vm_map_t 			map;
-	addr64_t			ova, nextva;
-	pmap_t				pmap;
-
-	pmap = vmm_get_adsp(act, index);			/* Get the guest pmap for this address space */
-	if(!pmap) return KERN_FAILURE;				/* Bogus address space, no VMs, or we can't make a pmap, failure... */
-
-	if(ava > vm_max_address) return kVmmInvalidAddress;	/* Does the machine support an address of this size? */
-
-	map = current_thread()->map;				/* Get the host's map */
-
-	if (pmap->pmapFlags & pmapVMgsaa) {			/* Guest shadow assist active ? */
-		ret = hw_res_map_gv(map->pmap, pmap, cva, ava, getProtPPC(prot, TRUE));
-												/* Attempt to resume an existing gv->phys mapping */
-		if (mapRtOK != ret) {					/* Nothing to resume, construct a new mapping */
-			unsigned int  pindex;
-			phys_entry_t *physent;
-			unsigned int pattr;
-			unsigned int wimg;
-			unsigned int mflags;
-			addr64_t gva;
-			
-			while (1) {							/* Find host mapping or fail */
-				mp = mapping_find(map->pmap, cva, &nextva, 0);
-												/* Attempt to find host mapping and pin it */
-				if (mp) break;					/* Got it */
-				
-				ml_set_interrupts_enabled(TRUE);
-												/* Open 'rupt window */
-				ret = vm_fault(map,				/* Didn't find it, try to fault in host page read/write */
-					vm_map_trunc_page(cva), 
-					VM_PROT_READ | VM_PROT_WRITE,
-					FALSE, /* change wiring */
-					THREAD_UNINT,
-					NULL,
-					0);
-				ml_set_interrupts_enabled(FALSE);
-												/* Close 'rupt window */
-				if (ret != KERN_SUCCESS)
-					return KERN_FAILURE;		/* Fault failed, return failure */
-			}
-			
-			if (mpNormal != (mp->mpFlags & mpType)) {
-												/* Host mapping must be a vanilla page */
-				mapping_drop_busy(mp);			/* Un-pin host mapping */
-				return KERN_FAILURE;			/* Return failure */
-			}
-	
-												/* Partially construct gv->phys mapping */
-			physent = mapping_phys_lookup(mp->mpPAddr, &pindex);
-			if (!physent) {
-				mapping_drop_busy(mp);
-				return KERN_FAILURE;
-			}
-			pattr = ((physent->ppLink & (ppI | ppG)) >> 60);
-			wimg = 0x2;
-			if (pattr & mmFlgCInhib)  wimg |= 0x4;
-			if (pattr & mmFlgGuarded) wimg |= 0x1;
-			mflags = (pindex << 16) | mpGuest;
-			gva = ((ava & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot, TRUE));
-			
-			hw_add_map_gv(map->pmap, pmap, gva, mflags, mp->mpPAddr);
-												/* Construct new guest->phys mapping */
-			
-			mapping_drop_busy(mp);				/* Un-pin host mapping */
-		}
-	} else {
-		while(1) {								/* Keep trying until we get it or until we fail */
-	
-			mp = mapping_find(map->pmap, cva, &nextva, 0);	/* Find the mapping for this address */
-			
-			if(mp) break;						/* We found it */
-	
-			ml_set_interrupts_enabled(TRUE);	/* Enable interruptions */
-			ret = vm_fault(map,					/* Didn't find it, try to fault it in read/write... */
-					vm_map_trunc_page(cva), 
-					VM_PROT_READ | VM_PROT_WRITE,
-					FALSE, /*change wiring */
-					THREAD_UNINT,
-					NULL,
-					0);
-			ml_set_interrupts_enabled(FALSE);	/* Disable interruptions */
-			if (ret != KERN_SUCCESS) return KERN_FAILURE;	/* There isn't a page there, return... */
-		}
-	
-		if((mp->mpFlags & mpType) != mpNormal) {	/* If this is a block, a nest, or some other special thing, we can't map it */
-			mapping_drop_busy(mp);				/* We have everything we need from the mapping */
-			return KERN_FAILURE;				/* Leave in shame */
-		}
-		
-		while(1) {								/* Keep trying the enter until it goes in */
-			ova = mapping_make(pmap, ava, mp->mpPAddr, 0, 1, prot);	/* Enter the mapping into the pmap */
-			if(!ova) break;						/* If there were no collisions, we are done... */
-			mapping_remove(pmap, ova);			/* Remove the mapping that collided */
-		}
-	
-		mapping_drop_busy(mp);					/* We have everything we need from the mapping */
-	}
-
-	if (!((getPerProc()->spcFlags) & FamVMmode)) {
-		act->machine.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL;	/* Remember the last mapping we made */
-		act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | index;	/* Remember last address space */
-	}
-
-	return KERN_SUCCESS;
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_map_execute
-**
-** This function maps a page from within the client's logical
-** address space into the alternate address space of the
-** Virtual Machine Monitor context and then directly starts executing.
-**
-**	See description of vmm_map_page for details. 
-**
-** Inputs:
-**		Index is used for both the context and the address space ID.
-**		index[24:31] is the context id and index[16:23] is the address space.
-**		if the address space ID is 0, the context ID is used for it.
-**
-** Outputs:
-**		Normal exit is to run the VM.  Abnormal exit is triggered via a 
-**		non-KERN_SUCCESS return from vmm_map_page or later during the 
-**		attempt to transition into the VM. 
------------------------------------------------------------------------*/
-
-vmm_return_code_t vmm_map_execute(
-	thread_t	 		act,
-	vmm_thread_index_t 	index,
-	addr64_t	 		cva,
-	addr64_t	 		ava,
-	vm_prot_t 			prot)
-{
-	kern_return_t		ret;
-	vmmCntrlEntry 		*CEntry;
-	unsigned int		adsp;
-	vmm_thread_index_t	cndx;
-
-	cndx = index & 0xFF;							/* Clean it up */
-
-	CEntry = vmm_get_entry(act, cndx);				/* Get and validate the index */
-	if (CEntry == NULL) return kVmmBogusContext;	/* Return bogus context */
-	
-	if (((getPerProc()->spcFlags) & FamVMmode) && (CEntry != act->machine.vmmCEntry))
-		return kVmmBogusContext;			/* Yes, invalid index in Fam */
-	
-	adsp = (index >> 8) & 0xFF;						/* Get any requested address space */
-	if(!adsp) adsp = (index & 0xFF);				/* If 0, use context ID as address space ID */
-	
-	ret = vmm_map_page(act, adsp, cva, ava, prot);	/* Go try to map the page on in */
-	
-	
-	if(ret == KERN_SUCCESS) {
-		act->machine.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL;	/* Remember the last mapping we made */
-		act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx;	/* Remember last address space */
-		vmm_execute_vm(act, cndx);				/* Return was ok, launch the VM */
-	}
-	
-	return ret;										/* We had trouble mapping in the page */	
-	
-}
-
-/*-----------------------------------------------------------------------
-** vmm_map_list
-**
-** This function maps a list of pages into various address spaces
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of default address space (used if not specifed in list entry
-**		count - number of pages to release
-**		flavor - 0 if 32-bit version, 1 if 64-bit
-**		vmcpComm in the comm page contains up to kVmmMaxMapPages to map
-**
-** Outputs:
-**		kernel return code indicating success or failure
-**		KERN_FAILURE is returned if kVmmMaxUnmapPages is exceeded
-**		or the vmm_map_page call fails.
-**		We return kVmmInvalidAddress if virtual address size is not supported
------------------------------------------------------------------------*/
-
-kern_return_t vmm_map_list(
-	thread_t	 		act,
-	vmm_adsp_id_t 		index,
-	unsigned int		cnt,
-	unsigned int		flavor)
-{
-	vmmCntrlEntry 		*CEntry;
-	boolean_t			ret;
-	unsigned int 		i;
-	vmmMList			*lst;
-	vmmMList64			*lstx;
-	addr64_t	 		cva;
-	addr64_t	 		ava;
-	vm_prot_t 			prot;
-	vmm_adsp_id_t 		adsp;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't a vmm or the index is bogus */
-	
-	if(cnt > kVmmMaxMapPages) return KERN_FAILURE;	/* They tried to map too many */
-	if(!cnt) return KERN_SUCCESS;					/* If they said none, we're done... */
-	
-	lst = (vmmMList *)&((vmm_comm_page_t *)CEntry->vmmContextKern)->vmcpComm[0];	/* Point to the first entry */
-	lstx = (vmmMList64 *)&((vmm_comm_page_t *)CEntry->vmmContextKern)->vmcpComm[0];	/* Point to the first entry */
-	
-	for(i = 0; i < cnt; i++) {						/* Step and release all pages in list */
-		if(flavor) {								/* Check if 32- or 64-bit addresses */
-			cva = lstx[i].vmlva;					/* Get the 64-bit actual address */	
-			ava = lstx[i].vmlava;					/* Get the 64-bit guest address */	
-		}
-		else {
-			cva = lst[i].vmlva;						/* Get the 32-bit actual address */	
-			ava = lst[i].vmlava;					/* Get the 32-bit guest address */	
-		}
-
-		prot = ava & vmmlProt;						/* Extract the protection bits */	
-		adsp = (ava & vmmlAdID) >> 4;				/* Extract an explicit address space request */	
-		if(!adsp) /* If no explicit, use supplied default */
-			adsp = index - 1;
-		ava &= 0xFFFFFFFFFFFFF000ULL; /* Clean up the address */
-		
-		ret = vmm_map_page(act, index, cva, ava, prot);	/* Go try to map the page on in */
-		if(ret != KERN_SUCCESS) /* Bail if any error */
-			return ret;
-	}
-	
-	return KERN_SUCCESS;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_get_page_mapping
-**
-** Given a context index and a guest virtual address, convert the address
-** to its corresponding host virtual address.
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - context index
-**		gva   - guest virtual address 
-**
-** Outputs:
-**		Host virtual address (page aligned) or -1 if not mapped or any failure
-**
-** Note:
-**		If the host address space contains multiple virtual addresses mapping
-**		to the physical address corresponding to the specified guest virtual
-**		address (i.e., host virtual aliases), it is unpredictable which host
-**		virtual address (alias) will be returned. Moral of the story: No host
-**		virtual aliases.
------------------------------------------------------------------------*/
-
-addr64_t vmm_get_page_mapping(
-	thread_t 			act,
-	vmm_adsp_id_t	 	index,
-	addr64_t	 		gva)
-{
-	register mapping_t 	*mp;
-	pmap_t				pmap;
-	addr64_t			nextva, hva;
-	ppnum_t				pa;
-
-	pmap = vmm_get_adsp(act, index);				/* Get and validate the index */
-	if (!pmap)return -1;							/* No good, failure... */
-	
-	if (pmap->pmapFlags & pmapVMgsaa) {				/* Guest shadow assist (GSA) active ? */
-		return (hw_gva_to_hva(pmap, gva));			/* Convert guest to host virtual address */			
-	} else {
-		mp = mapping_find(pmap, gva, &nextva, 0);	/* Find guest mapping for this virtual address */
-	
-		if(!mp) return -1;							/* Not mapped, return -1 */
-
-		pa = mp->mpPAddr;							/* Remember the physical page address */
-
-		mapping_drop_busy(mp);						/* Go ahead and relase the mapping now */
-	
-		pmap = current_thread()->map->pmap;			/* Get the host pmap */
-		hva = mapping_p2v(pmap, pa);				/* Now find the source virtual */
-
-		if(hva != 0) return hva;					/* We found it... */
-	
-		panic("vmm_get_page_mapping: could not back-map guest va (%016llX)\n", gva);
-													/* We are bad wrong if we can't find it */
-
-		return -1;									/* Never executed, prevents compiler warning */
-	}
-}
-
-/*-----------------------------------------------------------------------
-** vmm_unmap_page
-**
-** This function unmaps a page from the guest address space.
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of vmm state for this page
-**		va    - virtual address within the vmm's address
-**			    space
-**
-** Outputs:
-**		kernel return code indicating success or failure
------------------------------------------------------------------------*/
-
-kern_return_t vmm_unmap_page(
-	thread_t	 		act,
-	vmm_adsp_id_t	 	index,
-	addr64_t	 		va)
-{
-	addr64_t			nadd;
-	pmap_t				pmap;
-
-	pmap = vmm_get_adsp(act, index);						/* Get and validate the index */
-	if (!pmap)return -1;									/* No good, failure... */
-	
-	if (pmap->pmapFlags & pmapVMgsaa) {						/* Handle guest shadow assist specially */
-		hw_susp_map_gv(act->map->pmap, pmap, va);			/* Suspend the mapping */
-		return (KERN_SUCCESS);								/* Always returns success */
-	} else {
-		nadd = mapping_remove(pmap, va);					/* Toss the mapping */
-		
-		return ((nadd & 1) ? KERN_FAILURE : KERN_SUCCESS);	/* Return... */
-	}
-}
-
-/*-----------------------------------------------------------------------
-** vmm_unmap_list
-**
-** This function unmaps a list of pages from the alternate's logical
-** address space.
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of vmm state for this page
-**		count - number of pages to release
-**		flavor - 0 if 32-bit, 1 if 64-bit
-**		vmcpComm in the comm page contains up to kVmmMaxUnmapPages to unmap
-**
-** Outputs:
-**		kernel return code indicating success or failure
-**		KERN_FAILURE is returned if kVmmMaxUnmapPages is exceeded
------------------------------------------------------------------------*/
-
-kern_return_t vmm_unmap_list(
-	thread_t	 		act,
-	vmm_adsp_id_t	 	index,
-	unsigned int 		cnt,
-	unsigned int		flavor)
-{
-	vmmCntrlEntry 		*CEntry;
-	kern_return_t		kern_result = KERN_SUCCESS;
-	unsigned int i;
-	addr64_t			gva;
-	vmmUMList			*lst;
-	vmmUMList64			*lstx;
-	pmap_t				pmap;
-	int					adsp;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) { /* Either this isn't a vmm or the index is bogus */
-		kern_result = KERN_FAILURE;
-		goto out;
-	}
-	
-	if(cnt > kVmmMaxUnmapPages) { /* They tried to unmap too many */
-		kern_result = KERN_FAILURE;
-		goto out;
-	}
-	if(!cnt) { /* If they said none, we're done... */
-		kern_result = KERN_SUCCESS;
-		goto out;
-	}
-	
-	lstx = (vmmUMList64 *) &((vmm_comm_page_t *)CEntry->vmmContextKern)->vmcpComm[0];	/* Point to the first entry */
-	lst = (vmmUMList *)lstx;
-	
-	for(i = 0; i < cnt; i++) {						/* Step and release all pages in list */
-		if(flavor) {								/* Check if 32- or 64-bit addresses */
-			gva = lstx[i].vmlava;					/* Get the 64-bit guest address */	
-		}
-		else {
-			gva = lst[i].vmlava;					/* Get the 32-bit guest address */	
-		}
-
-		adsp = (gva & vmmlAdID) >> 4;				/* Extract an explicit address space request */	
-		if(!adsp) /* If no explicit, use supplied default */
-			adsp = index - 1;
-		pmap = act->machine.vmmControl->vmmAdsp[adsp];	/* Get the pmap for this request */
-		if(!pmap)
-			continue;							/* Ain't nuthin' mapped here, no durn map... */
-
-		gva &= 0xFFFFFFFFFFFFF000ULL; /* Clean up the address */	
-		if (pmap->pmapFlags & pmapVMgsaa) {			/* Handle guest shadow assist specially */
-			hw_susp_map_gv(act->map->pmap, pmap, gva);
-													/* Suspend the mapping */
-		} else {
-			(void)mapping_remove(pmap, gva);		/* Toss the mapping */
-		}
-	}
-	
-out:
-	return kern_result;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_unmap_all_pages
-**
-** This function unmaps all pages from the alternates's logical
-** address space.
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of context state
-**
-** Outputs:
-**		none
-**
-** Note:
-**      All pages are unmapped, but the address space (i.e., pmap) is still alive
------------------------------------------------------------------------*/
-
-void vmm_unmap_all_pages(
-	thread_t	 		act,
-	vmm_adsp_id_t	 	index)
-{
-	pmap_t				pmap;
-
-	pmap = vmm_get_adsp(act, index);						/* Convert index to entry */		
-	if (!pmap) return;										/* Either this isn't vmm thread or the index is bogus */
-
-	if (pmap->pmapFlags & pmapVMgsaa) {						/* Handle guest shadow assist specially */
-		hw_rem_all_gv(pmap);								/* Remove all guest's mappings from shadow hash table */
-	} else {
-		/*
-		 *	Note: the pmap code won't deal with the last page in the address space, so handle it explicitly
-		 */
-		mapping_remove(pmap, 0xFFFFFFFFFFFFF000LL);			/* Remove final page explicitly because we might have mapped it */	
-		pmap_remove(pmap, 0, 0xFFFFFFFFFFFFF000LL);			/* Remove all entries from this map */
-	}
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_get_page_dirty_flag
-**
-** This function returns the changed flag of the page
-** and optionally clears clears the flag.
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of vmm state for this page
-**		va    - virtual address within the vmm's address
-**			    space
-**		reset - Clears dirty if true, untouched if not
-**
-** Outputs:
-**		the dirty bit
-**		clears the dirty bit in the pte if requested
-**
-**	Note:
-**		The RC bits are merged into the global physical entry
------------------------------------------------------------------------*/
-
-boolean_t vmm_get_page_dirty_flag(
-	thread_t			act,
-	vmm_adsp_id_t	 	index,
-	addr64_t	 		va,
-	unsigned int		reset)
-{
-	unsigned int		RC;
-	pmap_t				pmap;
-
-	pmap = vmm_get_adsp(act, index);						/* Convert index to entry */		
-	if (!pmap) return 1;									/* Either this isn't vmm thread or the index is bogus */
-
-	if (pmap->pmapFlags & pmapVMgsaa) {						/* Handle guest shadow assist specially */
-		RC = hw_test_rc_gv(act->map->pmap, pmap, va, reset);/* Fetch the RC bits and clear if requested */	
-	} else {
-		RC = hw_test_rc(pmap, (addr64_t)va, reset);			/* Fetch the RC bits and clear if requested */
-	}
-
-	switch (RC & mapRetCode) {								/* Decode return code */
-	
-		case mapRtOK:										/* Changed */
-			return ((RC & (unsigned int)mpC) == (unsigned int)mpC);	/* Return if dirty or not */
-			break;
-	
-		case mapRtNotFnd:									/* Didn't find it */
-			return 1;										/* Return dirty */
-			break;
-			
-		default:
-			panic("vmm_get_page_dirty_flag: hw_test_rc failed - rc = %d, pmap = %p, va = %016llX\n", RC, pmap, va);
-		
-	}
-
-	return 1;												/* Return the change bit */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_protect_page
-**
-** This function sets the protection bits of a mapped page
-**
-** Inputs:
-**		act   - pointer to current thread activation
-**		index - index of vmm state for this page
-**		va    - virtual address within the vmm's address
-**			    space
-**		prot  - Protection flags
-**
-** Outputs:
-**		none
-**		Protection bits of the mapping are modifed
-**
------------------------------------------------------------------------*/
-
-kern_return_t vmm_protect_page(
-	thread_t	 		act,
-	vmm_adsp_id_t	 	index,
-	addr64_t	 		va,
-	vm_prot_t			prot)
-{
-	addr64_t			nextva;
-	int	ret;
-	pmap_t				pmap;
-
-	pmap = vmm_get_adsp(act, index);						/* Convert index to entry */		
-	if (!pmap) return KERN_FAILURE;							/* Either this isn't vmm thread or the index is bogus */
-	
-	if (pmap->pmapFlags & pmapVMgsaa) {						/* Handle guest shadow assist specially */
-		ret = hw_protect_gv(pmap, va, prot);				/* Try to change protection, GSA varient */
-	} else {
-		ret = hw_protect(pmap, va, prot, &nextva);			/* Try to change protection */
-	}
-
-	switch (ret) {											/* Decode return code */
-	
-		case mapRtOK:										/* All ok... */
-			break;											/* Outta here */
-			
-		case mapRtNotFnd:									/* Didn't find it */
-			return KERN_SUCCESS;							/* Ok, return... */
-			break;
-			
-		default:
-			panic("vmm_protect_page: hw_protect failed - rc = %d, pmap = %p, va = %016llX\n", ret, pmap, (addr64_t)va);
-		
-	}
-
-	if (!((getPerProc()->spcFlags) & FamVMmode)) {
-		act->machine.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL;	/* Remember the last mapping we made */
-		act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | index;	/* Remember last address space */
-	}
-
-	return KERN_SUCCESS;									/* Return */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_protect_execute
-**
-** This function sets the protection bits of a mapped page
-** and then directly starts executing.
-**
-**	See description of vmm_protect_page for details
-**
-** Inputs:
-**		See vmm_protect_page and vmm_map_execute
-**
-** Outputs:
-**		Normal exit is to run the VM.  Abnormal exit is triggered via a 
-**		non-KERN_SUCCESS return from vmm_map_page or later during the 
-**		attempt to transition into the VM. 
------------------------------------------------------------------------*/
-
-vmm_return_code_t vmm_protect_execute(
-	thread_t	 		act,
-	vmm_thread_index_t 	index,
-	addr64_t	 		va,
-	vm_prot_t			prot)
-{
-	kern_return_t		ret;
-	vmmCntrlEntry 		*CEntry;
-	unsigned int		adsp;
-	vmm_thread_index_t	cndx;
-
-	cndx = index & 0xFF;							/* Clean it up */
-	CEntry = vmm_get_entry(act, cndx);				/* Get and validate the index */
-	if (CEntry == NULL) return kVmmBogusContext;	/* Return bogus context */
-	
-	adsp = (index >> 8) & 0xFF;						/* Get any requested address space */
-	if(!adsp) adsp = (index & 0xFF);				/* If 0, use context ID as address space ID */
-	
-	if (((getPerProc()->spcFlags) & FamVMmode) && (CEntry != act->machine.vmmCEntry))
-		return kVmmBogusContext;			/* Yes, invalid index in Fam */
-	
-	ret = vmm_protect_page(act, adsp, va, prot);	/* Go try to change access */
-	
-	if(ret == KERN_SUCCESS) {
-		act->machine.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL;	/* Remember the last mapping we made */
-		act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx;	/* Remember last address space */
-		vmm_execute_vm(act, cndx);	/* Return was ok, launch the VM */
-	}
-	
-	return ret;										/* We had trouble of some kind (shouldn't happen) */	
-	
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_get_float_state
-**
-** This function causes the current floating point state to 
-** be saved into the shared context area.  It also clears the
-** vmmFloatCngd changed flag.
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**
-** Outputs:
-**		context saved
------------------------------------------------------------------------*/
-
-kern_return_t vmm_get_float_state(
-	thread_t	 		act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlEntry 		*CEntry;
-	int					i;
-	register struct savearea_fpu *sv;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't vmm thread or the index is bogus */
-	
-	act->machine.specFlags &= ~floatCng;				/* Clear the special flag */
-	CEntry->vmmContextKern->vmmStat &= ~vmmFloatCngd;	/* Clear the change indication */
-
-	fpu_save(&CEntry->vmmFacCtx);					/* Save context if live */
-
-	if(CEntry->vmmFacCtx.FPUsave) { /* Is there context yet? */
-		sv = CEntry->vmmFacCtx.FPUsave;
-		bcopy((char *)&sv->save_fp0, (char *)&(CEntry->vmmContextKern->vmm_proc_state.ppcFPRs), 32 * 8); /* 32 registers */
-		return KERN_SUCCESS;
-	}
-
-
-	for(i = 0; i < 32; i++) {						/* Initialize floating points */
-		CEntry->vmmContextKern->vmm_proc_state.ppcFPRs[i].d = FloatInit;	/* Initial value */
-	}
-
-	return KERN_SUCCESS;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_get_vector_state
-**
-** This function causes the current vector state to 
-** be saved into the shared context area.  It also clears the
-** vmmVectorCngd changed flag.
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**
-** Outputs:
-**		context saved
------------------------------------------------------------------------*/
-
-kern_return_t vmm_get_vector_state(
-	thread_t	 		act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlEntry 		*CEntry;
-	int					i, j;
-	unsigned int 		vrvalidwrk;
-	register struct savearea_vec *sv;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't vmm thread or the index is bogus */
-
-	vec_save(&CEntry->vmmFacCtx);					/* Save context if live */
-	
-	act->machine.specFlags &= ~vectorCng;				/* Clear the special flag */
-	CEntry->vmmContextKern->vmmStat &= ~vmmVectCngd;	/* Clear the change indication */
-	
-	if(CEntry->vmmFacCtx.VMXsave) { /* Is there context yet? */
-		sv = CEntry->vmmFacCtx.VMXsave;
-		vrvalidwrk = sv->save_vrvalid;				/* Get the valid flags */
-
-		for(i = 0; i < 32; i++) {					/* Copy the saved registers and invalidate the others */
-			if(vrvalidwrk & 0x80000000) {			/* Do we have a valid value here? */
-				for(j = 0; j < 4; j++) {			/* If so, copy it over */
-					CEntry->vmmContextKern->vmm_proc_state.ppcVRs[i].i[j] = ((unsigned int *)&(sv->save_vr0))[(i * 4) + j];
-				}
-			}
-			else {
-				for(j = 0; j < 4; j++) {			/* Otherwise set to empty value */
-					CEntry->vmmContextKern->vmm_proc_state.ppcVRs[i].i[j] = QNaNbarbarian[j];
-				}
-			}
-			
-			vrvalidwrk = vrvalidwrk << 1;			/* Shift over to the next */
-			
-		}
-
-		return KERN_SUCCESS;
-	}
-
-	for(i = 0; i < 32; i++) {						/* Initialize vector registers */
-		for(j=0; j < 4; j++) {						/* Do words */
-			CEntry->vmmContextKern->vmm_proc_state.ppcVRs[i].i[j] = QNaNbarbarian[j];		/* Initial value */
-		}
-	}
-
-	return KERN_SUCCESS;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_set_timer
-**
-** This function causes a timer (in AbsoluteTime) for a specific time
-** to be set  It also clears the vmmTimerPop flag if the timer is actually 
-** set, it is cleared otherwise.
-**
-** A timer is cleared by setting setting the time to 0. This will clear
-** the vmmTimerPop bit. Simply setting the timer to earlier than the
-** current time clears the internal timer request, but leaves the
-** vmmTimerPop flag set.
-** 
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**		timerhi - high order word of AbsoluteTime to pop
-**		timerlo - low order word of AbsoluteTime to pop
-**
-** Outputs:
-**		timer set, vmmTimerPop cleared
------------------------------------------------------------------------*/
-
-kern_return_t vmm_set_timer(
-	thread_t 			act,
-	vmm_thread_index_t 	index,
-	unsigned int 		timerhi, 
-	unsigned int 		timerlo)
-{
-	vmmCntrlEntry 		*CEntry;
-		
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't vmm thread or the index is bogus */
-	
-	CEntry->vmmTimer = ((uint64_t)timerhi << 32) | timerlo;
-	
-	vmm_timer_pop(act);								/* Go adjust all of the timer stuff */
-	return KERN_SUCCESS;							/* Leave now... */
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_get_timer
-**
-** This function causes the timer for a specified VM to be
-** returned in return_params[0] and return_params[1].
-** Note that this is kind of funky for 64-bit VMs because we
-** split the timer into two parts so that we still set parms 0 and 1.
-** Obviously, we don't need to do this because the parms are 8 bytes
-** wide.
-** 
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**		index - index returned by vmm_init_context
-**
-** Outputs:
-**		Timer value set in return_params[0] and return_params[1].
-**		Set to 0 if timer is not set.
------------------------------------------------------------------------*/
-
-kern_return_t vmm_get_timer(
-	thread_t 			act,
-	vmm_thread_index_t 	index)
-{
-	vmmCntrlEntry 		*CEntry;
-
-	CEntry = vmm_get_entry(act, index);				/* Convert index to entry */		
-	if (CEntry == NULL) return KERN_FAILURE;		/* Either this isn't vmm thread or the index is bogus */
-
-	if(CEntry->vmmXAFlgs & vmm64Bit) {				/* A 64-bit virtual machine? */
-		CEntry->vmmContextKern->vmmRet.vmmrp64.return_params[0] = (uint32_t)(CEntry->vmmTimer >> 32);	/* Return the last timer value */
-		CEntry->vmmContextKern->vmmRet.vmmrp64.return_params[1] = (uint32_t)CEntry->vmmTimer;	/* Return the last timer value */
-	}
-	else {
-		CEntry->vmmContextKern->vmmRet.vmmrp32.return_params[0] = (CEntry->vmmTimer >> 32);	/* Return the last timer value */
-		CEntry->vmmContextKern->vmmRet.vmmrp32.return_params[1] = (uint32_t)CEntry->vmmTimer;	/* Return the last timer value */
-	}
-	return KERN_SUCCESS;
-}
-
-
-/*-----------------------------------------------------------------------
-** vmm_timer_pop
-**
-** This function causes all timers in the array of VMs to be updated.
-** All appropriate flags are set or reset.  If a VM is currently
-** running and its timer expired, it is intercepted.
-**
-** The qactTimer value is set to the lowest unexpired timer.  It is
-** zeroed if all timers are expired or have been reset.
-**
-** Inputs:
-**		act - pointer to current thread activation structure
-**
-** Outputs:
-**		timers set, vmmTimerPop cleared or set
------------------------------------------------------------------------*/
-
-void vmm_timer_pop(
-	thread_t	 		act)
-{
-	vmmCntrlTable		*CTable;
-	int					cvi, any;
-	uint64_t			now, soonest;
-	struct savearea			*sv;
-		
-	if(!((unsigned int)act->machine.vmmControl & 0xFFFFFFFE)) {	/* Are there any virtual machines? */
-		panic("vmm_timer_pop: No virtual machines defined; act = %p\n", act);
-	}
-
-	soonest = 0xFFFFFFFFFFFFFFFFULL;				/* Max time */
-
-	clock_get_uptime(&now);							/* What time is it? */
-	
-	CTable = act->machine.vmmControl;					/* Make this easier */	
-	any = 0;										/* Haven't found a running unexpired timer yet */
-	
-	for(cvi = 0; cvi < kVmmMaxContexts; cvi++) {	/* Cycle through all and check time now */
-
-		if(!(CTable->vmmc[cvi].vmmFlags & vmmInUse)) continue;	/* Do not check if the entry is empty */
-		
-		if(CTable->vmmc[cvi].vmmTimer == 0) {		/* Is the timer reset? */
-			CTable->vmmc[cvi].vmmFlags &= ~vmmTimerPop;			/* Clear timer popped */
-			CTable->vmmc[cvi].vmmContextKern->vmmStat &= ~vmmTimerPop;	/* Clear timer popped */
-			continue;								/* Check next */
-		}
-
-		if (CTable->vmmc[cvi].vmmTimer <= now) {
-			CTable->vmmc[cvi].vmmFlags |= vmmTimerPop;	/* Set timer popped here */
-			CTable->vmmc[cvi].vmmContextKern->vmmStat |= vmmTimerPop;	/* Set timer popped here */
-			if((unsigned int)&CTable->vmmc[cvi] == (unsigned int)act->machine.vmmCEntry) {	/* Is this the running VM? */
-				sv = find_user_regs(act);			/* Get the user state registers */
-				if(!sv) {							/* Did we find something? */
-					panic("vmm_timer_pop: no user context; act = %p\n", act);
-				}
-				sv->save_exception = kVmmReturnNull*4;	/* Indicate that this is a null exception */
-				vmm_force_exit(act, sv);			/* Intercept a running VM */
-			}
-			continue;								/* Check the rest */
-		}
-		else {										/* It hasn't popped yet */
-			CTable->vmmc[cvi].vmmFlags &= ~vmmTimerPop;	/* Set timer not popped here */
-			CTable->vmmc[cvi].vmmContextKern->vmmStat &= ~vmmTimerPop;	/* Set timer not popped here */
-		}
-		
-		any = 1;									/* Show we found an active unexpired timer */
-		
-		if (CTable->vmmc[cvi].vmmTimer < soonest)
-			soonest = CTable->vmmc[cvi].vmmTimer;
-	}
-	
-	if(any) {
-		if (act->machine.qactTimer == 0 || soonest <= act->machine.qactTimer)
-			act->machine.qactTimer = soonest;	/* Set lowest timer */
-	}
-}
-
-
-
-/*-----------------------------------------------------------------------
-** vmm_stop_vm
-**
-** This function prevents the specified VM(s) to from running.
-** If any is currently executing, the execution is intercepted
-** with a code of kVmmStopped.  Note that execution of the VM is
-** blocked until a vmmExecuteVM is called with the start flag set to 1.
-** This provides the ability for a thread to stop execution of a VM and
-** insure that it will not be run until the emulator has processed the
-** "virtual" interruption.
-**
-** Inputs:
-**		vmmask - 32 bit mask corresponding to the VMs to put in stop state
-**				 NOTE: if this mask is all 0s, any executing VM is intercepted with
-*     			 a kVmmStopped (but not marked stopped), otherwise this is a no-op. Also note that there
-**				 note that there is a potential race here and the VM may not stop.
-**
-** Outputs:
-**		kernel return code indicating success
-**      or if no VMs are enabled, an invalid syscall exception.
------------------------------------------------------------------------*/
-
-int vmm_stop_vm(struct savearea *save)
-{
-
-	thread_t			act;
-	vmmCntrlTable		*CTable;
-	int					cvi, i;
-    task_t				task;
-    thread_t			fact;
-    unsigned int		vmmask;
-    ReturnHandler		*stopapc;
-
-	ml_set_interrupts_enabled(TRUE);			/* This can take a bit of time so pass interruptions */
-	
-	task = current_task();						/* Figure out who we are */
-
-	task_lock(task);							/* Lock our task */
-
-	fact = (thread_t)task->threads.next;	/* Get the first activation on task */
-	act = NULL;									/* Pretend we didn't find it yet */
-
-	for(i = 0; i < task->thread_count; i++) {	/* All of the activations */
-		if(fact->machine.vmmControl) {				/* Is this a virtual machine monitor? */
-			act = fact;							/* Yeah... */
-			break;								/* Bail the loop... */
-		}
-		fact = (thread_t)fact->task_threads.next;	/* Go to the next one */
-	}
-
-	if(!((unsigned int)act)) {					/* See if we have VMMs yet */
-		task_unlock(task);						/* No, unlock the task */
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		return 0;								/* Go generate a syscall exception */
-	}
-
-	thread_reference(act);
-
-	task_unlock(task);							/* Safe to release now */
-
-	thread_mtx_lock(act);
-
-	CTable = act->machine.vmmControl;				/* Get the pointer to the table */
-	
-	if(!((unsigned int)CTable & -2)) {			/* Are there any all the way up yet? */
-		thread_mtx_unlock(act);					/* Unlock the activation */
-		thread_deallocate(act);
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		return 0;								/* Go generate a syscall exception */
-	}
-	
-	if(!(vmmask = save->save_r3)) {				/* Get the stop mask and check if all zeros */
-		thread_mtx_unlock(act);					/* Unlock the activation */
-		thread_deallocate(act);
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		save->save_r3 = KERN_SUCCESS;			/* Set success */	
-		return 1;								/* Return... */
-	}
-
-	for(cvi = 0; cvi < kVmmMaxContexts; cvi++) {	/* Search slots */
-		if((0x80000000 & vmmask) && (CTable->vmmc[cvi].vmmFlags & vmmInUse)) {	/* See if we need to stop and if it is in use */
-			hw_atomic_or_noret(&CTable->vmmc[cvi].vmmFlags, vmmXStop);	/* Set this one to stop */
-		}
-		vmmask = vmmask << 1;					/* Slide mask over */
-	}
-	
-	if(hw_compare_and_store(0, 1, &act->machine.emPendRupts)) {	/* See if there is already a stop pending and lock out others if not */
-		thread_mtx_unlock(act);					/* Already one pending, unlock the activation */
-		thread_deallocate(act);
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		save->save_r3 = KERN_SUCCESS;			/* Say we did it... */	
-		return 1;								/* Leave */
-	}
-
-	if(!(stopapc = (ReturnHandler *)kalloc(sizeof(ReturnHandler)))) {	/* Get a return handler control block */
-		act->machine.emPendRupts = 0;				/* No memory, say we have given up request */
-		thread_mtx_unlock(act);					/* Unlock the activation */
-		thread_deallocate(act);
-		ml_set_interrupts_enabled(FALSE);		/* Set back interruptions */
-		save->save_r3 = KERN_RESOURCE_SHORTAGE;	/* No storage... */
-		return 1;								/* Return... */
-	}
-
-	ml_set_interrupts_enabled(FALSE);			/* Disable interruptions for now */
-
-	stopapc->handler = vmm_interrupt;			/* Set interruption routine */
-
-	stopapc->next = act->handlers;				/* Put our interrupt at the start of the list */
-	act->handlers = stopapc;					/* Point to us */
-
-	act_set_apc(act);							/* Set an APC AST */
-	ml_set_interrupts_enabled(TRUE);			/* Enable interruptions now */
-
-	thread_mtx_unlock(act);						/* Unlock the activation */
-	thread_deallocate(act);
-	
-	ml_set_interrupts_enabled(FALSE);			/* Set back interruptions */
-	save->save_r3 = KERN_SUCCESS;				/* Hip, hip, horay... */	
-	return 1;
-}
-
-/*-----------------------------------------------------------------------
-** vmm_interrupt
-**
-** This function is executed asynchronously from an APC AST.
-** It is to be used for anything that needs to interrupt a running VM.
-** This include any kind of interruption generation (other than timer pop)
-** or entering the stopped state.
-**
-** Inputs:
-**		ReturnHandler *rh - the return handler control block as required by the APC.
-**		thread_t act  - the activation
-**
-** Outputs:
-**		Whatever needed to be done is done.
------------------------------------------------------------------------*/
-
-void vmm_interrupt(ReturnHandler *rh, thread_t act) {
-
-	vmmCntrlTable		*CTable;
-	struct savearea			*sv;
-	boolean_t			inter;
-
-
-
-	kfree(rh, sizeof(ReturnHandler));	/* Release the return handler block */
-	
-	inter  = ml_set_interrupts_enabled(FALSE);	/* Disable interruptions for now */
-
-	act->machine.emPendRupts = 0;					/* Say that there are no more interrupts pending */
-	CTable = act->machine.vmmControl;				/* Get the pointer to the table */
-	
-	if(!((unsigned int)CTable & -2)) return;	/* Leave if we aren't doing VMs any more... */
-
-	if(act->machine.vmmCEntry && (act->machine.vmmCEntry->vmmFlags & vmmXStop)) {	/* Do we need to stop the running guy? */
-		sv = find_user_regs(act);				/* Get the user state registers */
-		if(!sv) {								/* Did we find something? */
-			panic("vmm_interrupt: no user context; act = %p\n", act);
-		}
-		sv->save_exception = kVmmStopped*4;		/* Set a "stopped" exception */
-		vmm_force_exit(act, sv);				/* Intercept a running VM */
-	}
-	ml_set_interrupts_enabled(inter);			/* Put interrupts back to what they were */
-}
diff --git a/osfmk/ppc/vmachmon.h b/osfmk/ppc/vmachmon.h
deleted file mode 100644
index 91626cfa2..000000000
--- a/osfmk/ppc/vmachmon.h
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*-----------------------------------------------------------------------
-** vmachmon.h
-**
-** C routines that we are adding to the MacOS X kernel.
-**
------------------------------------------------------------------------*/
-
-#include <ppc/exception.h>
-
-#ifndef	_VEMULATION_H_
-#define	_VEMULATION_H_
-
-/*************************************************************************************
-	External Emulation Types
-**************************************************************************************/
-
-typedef union vmm_vector_register_t {
-	unsigned long			i[4];
-	unsigned short			s[8];
-	unsigned char			b[16];
-} vmm_vector_register_t;
-
-typedef union vmm_fp_register_t {
-	double					d;
-	unsigned long			i[2];
-	unsigned short			s[4];
-	unsigned char			b[8];
-} vmm_fp_register_t;
-
-
-typedef struct vmm_regs32_t {
-
-	unsigned long			ppcPC;						/* 000 */
-	unsigned long			ppcMSR;						/* 004 */
-
-	unsigned long			ppcGPRs[32];				/* 008 */
-
-	unsigned long			ppcCR;						/* 088 */
-	unsigned long			ppcXER;						/* 08C */
-	unsigned long			ppcLR;						/* 090 */
-	unsigned long			ppcCTR;						/* 094 */
-	unsigned long			ppcMQ;						/* 098 - Obsolete */
-	unsigned long			ppcVRSave;					/* 09C */
-	unsigned long			ppcRsrvd0A0[40];			/* 0A0 */
-														/* 140 */
-} vmm_regs32_t;
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmm_regs64_t {
-
-	unsigned long long		ppcPC;						/* 000 */
-	unsigned long long		ppcMSR;						/* 008 */
-
-	unsigned long long		ppcGPRs[32];				/* 010 */
-
-	unsigned long long		ppcXER;						/* 110 */
-	unsigned long long		ppcLR;						/* 118 */
-	unsigned long long		ppcCTR;						/* 120 */
-	unsigned long			ppcCR;						/* 128 */
-	unsigned long			ppcVRSave;					/* 12C */
-	unsigned long			ppcRsvd130[4];				/* 130 */
-														/* 140 */
-} vmm_regs64_t;
-#pragma pack()
-	
-	
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef union vmm_regs_t {
-	vmm_regs32_t			ppcRegs32;
-	vmm_regs64_t			ppcRegs64;
-} vmm_regs_t;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmm_processor_state_t {
-														/* 32-byte bndry */
-	vmm_regs_t				ppcRegs;					/* Define registers areas */
-	
-/*	We must be 16-byte aligned here */
-
-	vmm_vector_register_t	ppcVRs[32];					/* These are only valid after a kVmmGetVectorState */
-	vmm_vector_register_t	ppcVSCR;					/* This is always loaded/saved at host/guest transition */
-	
-/*	We must be 8-byte aligned here */
-
-	vmm_fp_register_t		ppcFPRs[32];				/* These are only valid after a kVmmGetFloatState */
-	vmm_fp_register_t		ppcFPSCR;					/* This is always loaded/saved at host/guest transition */
-	unsigned long			ppcReserved2[2];			/* Pad out to multiple of 16 bytes */
-} vmm_processor_state_t;
-#pragma pack()
-
-typedef unsigned long vmm_return_code_t;
-
-typedef unsigned long vmm_thread_index_t;
-#define vmmTInum 0x000000FF
-#define vmmTIadsp 0x0000FF00
-typedef unsigned long vmm_adsp_id_t;
-
-enum {
-	kVmmCurMajorVersion					= 0x0001,
-	kVmmCurMinorVersion					= 0x0007,
-	kVmmMinMajorVersion					= 0x0001,
-};
-#define kVmmCurrentVersion ((kVmmCurMajorVersion << 16) | kVmmCurMinorVersion)
-
-typedef unsigned long vmm_features_t;
-enum {
-	kVmmFeature_LittleEndian			= 0x00000001,
-	kVmmFeature_Stop					= 0x00000002,
-	kVmmFeature_ExtendedMapping			= 0x00000004,
-	kVmmFeature_ListMapping				= 0x00000008,
-	kVmmFeature_FastAssist				= 0x00000010,
-	kVmmFeature_XA						= 0x00000020,
-	kVmmFeature_SixtyFourBit			= 0x00000040,
-	kVmmFeature_MultAddrSpace			= 0x00000080,
-	kVmmFeature_GuestShadowAssist		= 0x00000100,	/* Guest->physical shadow hash table */
-	kVmmFeature_GlobalMappingAssist		= 0x00000200,	/* Global shadow mapping support */
-	kVmmFeature_HostShadowAssist		= 0x00000400,	/* Linear shadow mapping of an area of
-	                                                       host virtual as guest physical */
-	kVmmFeature_MultAddrSpaceAssist		= 0x00000800,	/* Expanded pool of guest virtual
-	                                                       address spaces */
-};
-#define kVmmCurrentFeatures (kVmmFeature_LittleEndian | kVmmFeature_Stop | kVmmFeature_ExtendedMapping \
-	| kVmmFeature_ListMapping | kVmmFeature_FastAssist | kVmmFeature_XA \
-	| kVmmFeature_GuestShadowAssist)
-
-enum {
-	vmm64Bit							= 0x80000000,	/* Make guest 64-bit */
-	vmmGSA								= 0x40000000,	/* Enable guest shadow assist (GSA) */
-	vmmGMA								= 0x20000000,	/* Enable global shadow mapping assist (GMA) */
-};
-
-#define kVmmSupportedSetXA (vmm64Bit | vmmGSA | vmmGMA)
-
-typedef unsigned long vmm_version_t;
-
-typedef struct vmm_ret_parms32_t {
-	unsigned long 			return_params[4];
-} vmm_ret_parms32_t;
-
-typedef struct vmm_ret_parms64_t {
-	unsigned long long		return_params[4];
-} vmm_ret_parms64_t;
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef union vmm_ret_parms_t {
-	vmm_ret_parms64_t		vmmrp64;		/* 64-bit flavor */
-	vmm_ret_parms32_t		vmmrp32;		/* 32-bit flavor */
-	unsigned int			retgas[11];		/* Force this to be 11 words long */
-} vmm_ret_parms_t;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmm_fastassist_state32_t {
-	unsigned long fastassist_dispatch;
-	unsigned long fastassist_refcon;
-
-	unsigned long fastassist_dispatch_code;
-	unsigned long fastassist_parameter[5];
-
-	unsigned long guest_register[8];
-
-	unsigned long guest_pc;
-	unsigned long guest_msr;
-
-	unsigned long fastassist_intercepts;
-	unsigned long fastassist_reserved1;
-} vmm_fastassist_state32_t;
-
-typedef struct vmm_fastassist_state64_t {
-	unsigned long long fastassist_dispatch;
-	unsigned long long fastassist_refcon;
-
-	unsigned long long fastassist_dispatch_code;
-	unsigned long long fastassist_parameter[5];
-
-	unsigned long long guest_register[8];
-
-	unsigned long long guest_pc;
-	unsigned long long guest_msr;
-
-	unsigned long fastassist_intercepts;
-	unsigned long fastassist_reserved1;
-} vmm_fastassist_state64_t;
-
-typedef union vmm_fastassist_state_t {
-	vmm_fastassist_state64_t		vmmfs64;		/* 64-bit flavor */
-	vmm_fastassist_state32_t		vmmfs32;		/* 32-bit flavor */
-} vmm_fastassist_state_t;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmm_state_page_t {
-	/* This structure must remain below 4Kb (one page) in size */
-	vmm_version_t			interface_version;
-	vmm_thread_index_t		thread_index;
-	unsigned int			vmmStat;	/* Note: this field is identical to vmmFlags in vmmCntrlEntry */
-	unsigned int			vmmCntrl;
-#define vmmFloatLoad	0x80000000
-#define vmmFloatLoadb	0
-#define vmmVectLoad		0x40000000
-#define vmmVectLoadb	1
-#define vmmVectVRall	0x20000000
-#define vmmVectVRallb	2
-#define vmmVectVAss		0x10000000
-#define vmmVectVAssb	3
-#define vmmXStart		0x08000000
-#define vmmXStartb		4
-#define vmmKey			0x04000000
-#define vmmKeyb			5
-#define vmmFamEna		0x02000000
-#define vmmFamEnab		6
-#define vmmFamSet		0x01000000
-#define vmmFamSetb		7
-
-	vmm_return_code_t		return_code;
-	vmm_ret_parms_t			vmmRet;
-
-	/* The next portion of the structure must remain 32-byte aligned */
-	vmm_processor_state_t	vmm_proc_state;
-
-	/* The next portion of the structure must remain 16-byte aligned */
-	vmm_fastassist_state_t	vmm_fastassist_state;
-
-} vmm_state_page_t;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmm_comm_page_t {
-	union {
-		vmm_state_page_t	vmcpState;					/* Reserve area for state */
-		unsigned int		vmcpPad[768];				/* Reserve space for 3/4 page state area */
-	} vmcpfirst;
-	unsigned int			vmcpComm[256];				/* Define last 1024 bytes as a communications area - function specific */
-} vmm_comm_page_t;
-#pragma pack()
-
-enum {
-	/* Function Indices (passed in r3) */
-	kVmmGetVersion				= 0,					/* Get VMM system version */
-	kVmmvGetFeatures,									/* Get VMM supported features */
-	kVmmInitContext,									/* Initialize a context */
-	kVmmTearDownContext,								/* Destroy a context */
-	kVmmTearDownAll,									/* Destory all contexts */
-	kVmmMapPage,										/* Map a host to guest address space */
-	kVmmGetPageMapping,									/* Get host address of a guest page */
-	kVmmUnmapPage,										/* Unmap a guest page */
-	kVmmUnmapAllPages,									/* Unmap all pages in a guest address space */
-	kVmmGetPageDirtyFlag,								/* Check if guest page modified */
-	kVmmGetFloatState,									/* Retrieve guest floating point context */
-	kVmmGetVectorState,									/* Retrieve guest vector context */
-	kVmmSetTimer,										/* Set a guest timer */
-	kVmmGetTimer,										/* Get a guest timer */
-	kVmmExecuteVM,										/* Launch a guest */
-	kVmmProtectPage,									/* Set protection attributes for a guest page */
-	kVmmMapExecute,										/* Map guest page and launch */
-	kVmmProtectExecute,									/* Set prot attributes and launch */
-	kVmmMapList,										/* Map a list of pages into guest address spaces */
-	kVmmUnmapList,										/* Unmap a list of pages from guest address spaces */
-	kvmmExitToHost,										/* Exit from FAM to host -- fast-path syscall */
-	kvmmResumeGuest,									/* Resume guest from FAM -- fast-path syscall */
-	kvmmGetGuestRegister,								/* Get guest register from FAM -- fast-path syscall */
-	kvmmSetGuestRegister,								/* Set guest register from FAM -- fast-path syscall */
-	
-	kVmmActivateXA,										/* Activate extended architecture features for a VM */
-	kVmmDeactivateXA,									/* Deactivate extended architecture features for a VM */
-	kVmmGetXA,											/* Get extended architecture features from a VM */
-
-	kVmmMapPage64,										/* Map a host to guest address space - supports 64-bit */
-	kVmmGetPageMapping64,								/* Get host address of a guest page - supports 64-bit  */
-	kVmmUnmapPage64,									/* Unmap a guest page - supports 64-bit  */
-	kVmmGetPageDirtyFlag64,								/* Check if guest page modified - supports 64-bit  */
-	kVmmProtectPage64,									/* Set protection attributes for a guest page - supports 64-bit */
-	kVmmMapExecute64,									/* Map guest page and launch - supports 64-bit  */
-	kVmmProtectExecute64,								/* Set prot attributes and launch - supports 64-bit  */
-	kVmmMapList64,										/* Map a list of pages into guest address spaces - supports 64-bit  */
-	kVmmUnmapList64,									/* Unmap a list of pages from guest address spaces - supports 64-bit  */
-	kVmmMaxAddr,										/* Returns the maximum virtual address that is mappable  */
-	
-	kVmmSetGuestMemory,									/* Sets base and extent of guest physical memory in host address space */
-	kVmmPurgeLocal,										/* Purges all non-global mappings for a given guest address space */
-};
-
-#define kVmmReturnNull					0
-#define kVmmBogusContext				1
-#define kVmmStopped						2
-#define kVmmReturnDataPageFault			3
-#define kVmmReturnInstrPageFault		4
-#define kVmmReturnAlignmentFault		6
-#define kVmmReturnProgramException		7
-#define kVmmReturnSystemCall			12
-#define kVmmReturnTraceException		13
-#define kVmmAltivecAssist				22
-#define kVmmInvalidAddress				0x1000
-#define kVmmInvalidAdSpace				0x1001
-
-/*
- *	Notes on guest address spaces.
- *
- *	Address spaces are loosely coupled to virtual machines.  The default is for
- *	a guest with an index of 1 to use address space 1, 2 to use 2, etc.  However,
- *	any guest may be launched using any address space and any address space may be the
- *	target for a map or unmap function.  Note that the (un)map list functions may pass in
- *	an address space ID on a page-by-page basis.
- *	
- *	An address space is instantiated either explicitly by mapping something into it, or 
- *	implicitly by launching a guest with it.
- *
- *	An address space is destroyed explicitly by kVmmTearDownAll or kVmmUnmapAllPages.  It is
- *	destroyed implicitly by kVmmTearDownContext.  The latter is done in order to remain
- *	backwards compatible with the previous implementation, which does not have decoupled
- *	guests and address spaces.
- *
- *	An address space supports the maximum virtual address supported by the processor.  
- *	The 64-bit variant of the mapping functions can be used on non-64-bit machines.  If an
- *	unmappable address (e.g., an address larger than 4GB-1 on a 32-bit machine) is requested, 
- *	the operation fails with a kVmmInvalidAddress return code.
- *
- *	Note that for 64-bit calls, both host and guest are specified at 64-bit values.
- *
- */
-
-
-
-
-/*
- *	Storage Extended Protection modes
- *	Notes:
- *		To keep compatibility, vmmKey and the PPC key have reversed meanings,
- *		i.e., vmmKey 0 is PPC key 1 and vice versa.
- *
- *	    vmmKey										Notes
- *	Mode			0				1
- *
- *	kVmmProtNARW	not accessible	read/write		VM_PROT_NONE (not settable via VM calls)
- *	kVmmProtRORW	read only		read/write		
- *	kVmmProtRWRW	read/write		read/write		VM_PROT_WRITE or (VM_PROT_WRITE | VM_PROT_READ)
- *	kVmmProtRORO	read only		read only		VM_PROT_READ
- 
- */
- 
-#define kVmmProtXtnd 0x00000008
-#define kVmmProtNARW (kVmmProtXtnd | 0x00000000)
-#define kVmmProtRORW (kVmmProtXtnd | 0x00000001)
-#define kVmmProtRWRW (kVmmProtXtnd | 0x00000002)
-#define kVmmProtRORO (kVmmProtXtnd | 0x00000003)
-
-/*
- *	Map list formats
- *	The last 12 bits in the guest virtual address is used as flags as follows:
- *		0x007 - for the map calls, this is the key to set
- *		0x3F0 - for both map and unmap, this is the address space ID upon which to operate.
- *				Note that if 0, the address space ID from the function call is used instead.
- */
-
-typedef struct vmmMList {
-	unsigned int	vmlva;			/* Virtual address in host address space */
-	unsigned int	vmlava;			/* Virtual address in guest address space */
-} vmmMList;
-
-typedef struct vmmMList64 {
-	unsigned long long	vmlva;		/* Virtual address in host address space */
-	unsigned long long	vmlava;		/* Virtual address in guest address space */
-} vmmMList64;
-
-typedef struct vmmUMList {
-	unsigned int	vmlava;			/* Virtual address in guest address space */
-} vmmUMList;
-
-typedef struct vmmUMList64 {
-	unsigned long long	vmlava;		/* Virtual address in guest address space */
-} vmmUMList64;
-
-#define vmmlFlgs 0x00000FFF			/* Flags passed in in vmlava low order 12 bits */
-#define vmmlProt 0x00000007			/* Protection flags for the page */
-#define vmmlAdID 0x000003F0			/* Guest address space ID - used only if non-zero */
-#define vmmlGlob 0x00000400			/* Mapping is global */
-#define vmmlRsvd 0x00000800			/* Reserved for future */
-
-/*************************************************************************************
-	Internal Emulation Types
-**************************************************************************************/
-
-#define kVmmMaxContexts					32
-#define kVmmMaxUnmapPages				64
-#define kVmmMaxMapPages					64
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmmCntrlEntry {						/* Virtual Machine Monitor control table entry */
-	unsigned int	vmmFlags;						/* Assorted control flags */
-#define vmmInUse 		0x80000000
-#define vmmInUseb 		0
-#define vmmFloatCngd	0x40000000
-#define vmmFloatCngdb	1
-#define vmmVectCngd		0x20000000
-#define vmmVectCngdb	2
-#define vmmTimerPop		0x10000000
-#define vmmTimerPopb	3
-#define vmmFAMmode		0x04000000
-#define vmmFAMmodeb		5
-#define vmmXStop		0x00800000
-#define vmmXStopb		8
-#define vmmSpfSave		0x000000FF
-#define vmmSpfSaveb		24
-	unsigned int	vmmXAFlgs;						/* Extended Architecture flags */
-	vmm_state_page_t *vmmContextKern;				/* Kernel address of context communications area */
-	ppnum_t			vmmContextPhys;					/* Physical address of context communications area */
-	vmm_state_page_t *vmmContextUser;				/* User address of context communications area */
-	facility_context vmmFacCtx;						/* Header for vector and floating point contexts */
-	pmap_t			vmmPmap;						/* Last dispatched pmap */
-	uint64_t		vmmTimer;						/* Last set timer value. Zero means unset */
-	unsigned int	vmmFAMintercept;				/* FAM intercepted exceptions */
-} vmmCntrlEntry;
-#pragma pack()
-
-#pragma pack(4)							/* Make sure the structure stays as we defined it */
-typedef struct vmmCntrlTable {						/* Virtual Machine Monitor Control table */
-	unsigned int	vmmGFlags;						/* Global flags */
-#define vmmLastAdSp 0xFF							/* Remember the address space that was mapped last */
-	addr64_t		vmmLastMap;						/* Last vaddr mapping made */
-	vmmCntrlEntry	vmmc[kVmmMaxContexts];			/* One entry for each possible Virtual Machine Monitor context */
-	pmap_t			vmmAdsp[kVmmMaxContexts];		/* Guest address space pmaps */
-} vmmCntrlTable;
-#pragma pack()
-
-/* function decls for kernel level routines... */
-extern void vmm_execute_vm(thread_t act, vmm_thread_index_t index);
-extern kern_return_t vmm_tear_down_context(thread_t act, vmm_thread_index_t index);
-extern kern_return_t vmm_get_float_state(thread_t act, vmm_thread_index_t index);
-extern kern_return_t vmm_get_vector_state(thread_t act, vmm_thread_index_t index);
-extern kern_return_t vmm_set_timer(thread_t act, vmm_thread_index_t index, unsigned int timerhi, unsigned int timerlo);
-extern kern_return_t vmm_get_timer(thread_t act, vmm_thread_index_t index);
-extern void vmm_tear_down_all(thread_t act);
-extern kern_return_t vmm_map_page(thread_t act, vmm_thread_index_t hindex, addr64_t cva,
-	addr64_t ava, vm_prot_t prot);
-extern vmm_return_code_t vmm_map_execute(thread_t act, vmm_thread_index_t hindex, addr64_t cva,
-	addr64_t ava, vm_prot_t prot);
-extern kern_return_t vmm_protect_page(thread_t act, vmm_thread_index_t hindex, addr64_t va,
-	vm_prot_t prot);
-extern vmm_return_code_t vmm_protect_execute(thread_t act, vmm_thread_index_t hindex, addr64_t va,
-	vm_prot_t prot);
-extern addr64_t vmm_get_page_mapping(thread_t act, vmm_thread_index_t index,
-	addr64_t va);
-extern kern_return_t vmm_unmap_page(thread_t act, vmm_thread_index_t index, addr64_t va);
-extern void vmm_unmap_all_pages(thread_t act, vmm_thread_index_t index);
-extern boolean_t vmm_get_page_dirty_flag(thread_t act, vmm_thread_index_t index,
-	addr64_t va, unsigned int reset);
-extern kern_return_t vmm_activate_XA(thread_t act, vmm_thread_index_t index, unsigned int xaflags);
-extern kern_return_t vmm_deactivate_XA(thread_t act, vmm_thread_index_t index, unsigned int xaflags);
-extern unsigned int vmm_get_XA(thread_t act, vmm_thread_index_t index);
-extern int vmm_get_features(struct savearea *);
-extern int vmm_get_version(struct savearea *);
-extern int vmm_init_context(struct savearea *);
-extern int vmm_dispatch(struct savearea *);
-extern int vmm_exit(thread_t act, struct savearea *);
-extern void vmm_force_exit(thread_t act, struct savearea *);
-extern int vmm_stop_vm(struct savearea *save);
-extern void vmm_timer_pop(thread_t act);
-extern void vmm_interrupt(ReturnHandler *rh, thread_t act);
-extern kern_return_t vmm_map_list(thread_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor);
-extern kern_return_t vmm_unmap_list(thread_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor);
-extern vmm_return_code_t vmm_resume_guest(vmm_thread_index_t index, unsigned long pc, 
-	unsigned long vmmCntrl, unsigned long vmmCntrMaskl);
-extern vmm_return_code_t vmm_exit_to_host(vmm_thread_index_t index);
-extern unsigned long vmm_get_guest_register(vmm_thread_index_t index, unsigned long reg_index);
-extern vmm_return_code_t vmm_set_guest_register(vmm_thread_index_t index, unsigned long reg_index, unsigned long reg_value);
-extern addr64_t vmm_max_addr(thread_t act);
-extern kern_return_t vmm_set_guest_memory(thread_t act, vmm_thread_index_t index, addr64_t base, addr64_t extent);
-extern kern_return_t vmm_purge_local(thread_t act, vmm_thread_index_t index);
-
-#endif
diff --git a/osfmk/ppc/vmachmon_asm.s b/osfmk/ppc/vmachmon_asm.s
deleted file mode 100644
index 3f8cac654..000000000
--- a/osfmk/ppc/vmachmon_asm.s
+++ /dev/null
@@ -1,2368 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <assym.s>
-#include <debug.h>
-#include <ppc/asm.h>
-#include <ppc/proc_reg.h>
-#include <ppc/exception.h>
-
-/*
- *	This file contains implementations for the Virtual Machine Monitor
- *	facility.
- */
-
-#define vmmMapDone 31
-#define vmmDoing64 30
-
-
-/*
- *	int vmm_dispatch(savearea, act);
- 
- *	vmm_dispatch is a PPC only system call.  It is used with a selector (first
- *	parameter) to determine what function to enter.  This is treated as an extension
- *	of hw_exceptions.
- *
- *	Inputs: 
- *		R4  = current activation
- *		R16 = current thread
- *		R30 = current savearea
- */
- 
- 			.align	5												; Line up on cache line
-			.globl	EXT(vmm_dispatch_table)
-
-LEXT(vmm_dispatch_table)
-
-			/* Don't change the order of these routines in the table. It's  */
-			/* OK to add new routines, but they must be added at the bottom. */
-
-			.long	EXT(vmm_get_version_sel)						; Get the version of the VMM interface
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_get_features_sel)						; Get the features of the VMM interface
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_init_context_sel)						; Initializes a new VMM context
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_tear_down_context)						; Tears down a previously-allocated VMM context
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_tear_down_all)							; Tears down all VMMs 
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_map_page32)								; Maps a page from the main address space into the VM space - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_page_mapping32)						; Returns client va associated with VM va - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_unmap_page32)							; Unmaps a page from the VM space - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_unmap_all_pages)						; Unmaps all pages from the VM space
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_page_dirty_flag32)					; Gets the change bit for a page and optionally clears it - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_float_state)						; Gets current floating point state
-			.long	0												; not valid in Fam
-			.long	EXT(vmm_get_vector_state)						; Gets current vector state
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_set_timer)								; Sets a timer value
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_timer)								; Gets a timer value
-			.long	1												; Valid in Fam
-			.long	EXT(switchIntoVM)								; Switches to the VM context
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_protect_page32)							; Sets protection values for a page - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_map_execute32)							; Maps a page an launches VM - supports 32-bit
-			.long	1												; Not valid in Fam
-			.long	EXT(vmm_protect_execute32)						; Sets protection values for a page and launches VM - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_map_list32)								; Maps a list of pages - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_unmap_list32)							; Unmaps a list of pages - supports 32-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_fam_reserved)							; exit from Fam to host
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_fam_reserved)							; resume guest from Fam
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_fam_reserved)							; get guest register from Fam
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_fam_reserved)							; Set guest register from Fam
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_activate_XA)							; Activate extended architecture features for a VM 
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_deactivate_XA)							; Deactivate extended architecture features for a VM 
-			.long	0												; Not valid in Fam
-			.long	EXT(vmm_get_XA)									; Get extended architecture features from a VM 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_map_page)								; Map a host to guest address space - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_page_mapping)						; Get host address of a guest page - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_unmap_page)								; Unmap a guest page - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_get_page_dirty_flag)					; Check if guest page modified - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_protect_page)							; Sets protection values for a page - supports 64-bit
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_map_execute)							; Map guest page and launch - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_protect_execute)						; Set prot attributes and launch - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_map_list64)								; Map a list of pages into guest address spaces - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_unmap_list64)							; Unmap a list of pages from guest address spaces - supports 64-bit 
-			.long	1												; Valid in Fam
-			.long	EXT(vmm_max_addr)								; Returns the maximum virtual address 
-			.long	1												; Valid in Fam
-#if 0
-			.long	EXT(vmm_set_guest_memory)						; Set guest memory extent
-			.long	0												; Not valid in FAM
-			.long	EXT(vmm_purge_local)							; Purge all local guest mappings */
-			.long	1												; Valid in FAM
-#endif
-			.set	vmm_count,(.-EXT(vmm_dispatch_table))/8			; Get the top number
-
-
-			.align	5
-			.globl	EXT(vmm_dispatch)
-
-LEXT(vmm_dispatch)
-
-			lwz		r11,saver3+4(r30)			; Get the selector
-			mr		r3,r4						; All of our functions want the activation as the first parm
-			lis		r10,hi16(EXT(vmm_dispatch_table))	; Get top half of table
-			cmplwi	r11,kVmmExecuteVM			; Should we switch to the VM now?
-			cmplwi	cr1,r11,vmm_count			; See if we have a valid selector
-			ori		r10,r10,lo16(EXT(vmm_dispatch_table))	; Get low half of table
-			lwz		r4,saver4+4(r30)			; Get 1st parameter after selector
-			beq+	EXT(switchIntoVM)			; Yes, go switch to it....
-			rlwinm	r11,r11,3,0,28				; Index into table
-			bge-	cr1,vmmBogus				; It is a bogus entry
-			add		r12,r10,r11					; Get the vmm dispatch syscall entry
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			lwz		r13,0(r12)					; Get address of routine
-			lwz		r12,4(r12)					; Get validity flag
-			lwz		r5,spcFlags(r10)			; Get per_proc special flags
-			cmpwi	cr1,r12,0					; Check Fam valid 
-			rlwinm.	r5,r5,0,FamVMmodebit,FamVMmodebit	; Test FamVMmodebit
-			crand	cr0_eq,cr1_eq,cr0_gt		; In Fam and Invalid syscall	
-			beq		vmmBogus					; Intercept to host
-			lwz		r5,saver5+4(r30)			; Get 2nd parameter after selector - note that some of these parameters may actually be long longs
-			lwz		r6,saver6+4(r30)			; Get 3rd parameter after selector
-			mtlr	r13							; Set the routine address
-			lwz		r7,saver7+4(r30)			; Get 4th parameter after selector
-			lwz		r8,saver8+4(r30)			; Get 5th parameter after selector
-			lwz		r9,saver9+4(r30)			; Get 6th parameter after selector
-;
-;			NOTE: some of the above parameters are actually long longs.  We have glue code that transforms
-;			all needed parameters and/or adds 32-/64-bit flavors to the needed functions.
-;			
-
-			blrl								; Call function
-
-vmmRetPt:	li		r0,0						; Clear this out			
-			stw		r0,saver3(r30)				; Make sure top of RC is clear
-			stw		r3,saver3+4(r30)			; Pass back the return code
-			stw		r0,saver4(r30)				; Make sure bottom of RC is clear (just in case)
-			stw		r4,saver4+4(r30)			; Pass back the bottom return code (just in case)
-			li		r3,1						; Set normal return with check for AST
-			b		EXT(ppcscret)				; Go back to handler...
-			
-vmmBogus:	
-			mfsprg	r3,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r3)		; Get the per_proc block
-			lwz		r5,spcFlags(r10)			; Get per_proc special flags
-			rlwinm.	r5,r5,0,FamVMmodebit,FamVMmodebit	; Test FamVMmodebit
-			bne		vmmexitcall					; Do it to it		
-			li		r3,0						; Bogus selector, treat like a bogus system call
-			b		EXT(ppcscret)				; Go back to handler...
-
-
-			.align	5
-			.globl	EXT(vmm_get_version_sel)
-
-LEXT(vmm_get_version_sel)						; Selector based version of get version
-
-			lis		r3,hi16(EXT(vmm_get_version))
-			ori		r3,r3,lo16(EXT(vmm_get_version))
-			b		selcomm
-
-
-			.align	5
-			.globl	EXT(vmm_get_features_sel)
-
-LEXT(vmm_get_features_sel)						; Selector based version of get features
-
-			lis		r3,hi16(EXT(vmm_get_features))
-			ori		r3,r3,lo16(EXT(vmm_get_features))
-			b		selcomm
-
-
-			.align	5
-			.globl	EXT(vmm_init_context_sel)
-
-LEXT(vmm_init_context_sel)						; Selector based version of init context
-
-			lwz		r4,saver4+4(r30)			; Get the passed in version
-			lwz		r5,saver5+4(r30)			; Get the passed in comm area
-			lis		r3,hi16(EXT(vmm_init_context))
-			stw		r4,saver3+4(r30)			; Cheat and move this parameter over
-			ori		r3,r3,lo16(EXT(vmm_init_context))
-			stw		r5,saver4+4(r30)			; Cheat and move this parameter over
-
-selcomm:	mtlr	r3							; Set the real routine address
-			mr		r3,r30						; Pass in the savearea
-			blrl								; Call the function
-			b		EXT(ppcscret)				; Go back to handler...
-
-			.align	5
-			.globl	EXT(vmm_map_page32)
-
-LEXT(vmm_map_page32)
-			mr		r9,r7											; Move prot to correct parm
-			mr		r8,r6											; Move guest address to low half of long long
-			li		r7,0											; Clear high half of guest address
-			mr		r6,r5											; Move host address to low half of long long
-			li		r5,0											; Clear high half of host address
-			b		EXT(vmm_map_page)								; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_get_page_mapping32)
-
-LEXT(vmm_get_page_mapping32)
-			mr		r6,r5											; Move guest address to low half of long long
-			li		r5,0											; Clear high half of guest address
-			bl		EXT(vmm_get_page_mapping)						; Transition to real function...
-			mr		r3,r4											; Convert addr64_t to vm_offset_t, dropping top half
-			b		vmmRetPt										; Join normal return...
-
-			.align	5
-			.globl	EXT(vmm_unmap_page32)
-
-LEXT(vmm_unmap_page32)
-			mr		r6,r5											; Move guest address to low half of long long
-			li		r5,0											; Clear high half of guest address
-			b		EXT(vmm_unmap_page)								; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_get_page_dirty_flag32)
-
-LEXT(vmm_get_page_dirty_flag32)
-			mr		r7,r6											; Move reset flag
-			mr		r6,r5											; Move guest address to low half of long long
-			li		r5,0											; Clear high half of guest address
-			b		EXT(vmm_get_page_dirty_flag)					; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_protect_page32)
-
-LEXT(vmm_protect_page32)
-			mr		r7,r6											; Move protection bits
-			mr		r6,r5											; Move guest address to low half of long long
-			li		r5,0											; Clear high half of guest address
-			b		EXT(vmm_protect_page)							; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_map_execute32)
-
-LEXT(vmm_map_execute32)
-			mr		r9,r7											; Move prot to correct parm
-			mr		r8,r6											; Move guest address to low half of long long
-			li		r7,0											; Clear high half of guest address
-			mr		r6,r5											; Move host address to low half of long long
-			li		r5,0											; Clear high half of host address
-			b		EXT(vmm_map_execute)							; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_protect_execute32)
-			
-LEXT(vmm_protect_execute32)
-			mr		r7,r6											; Move protection bits
-			mr		r6,r5											; Move guest address to low half of long long
-			li		r5,0											; Clear high half of guest address
-			b		EXT(vmm_protect_execute)						; Transition to real function...
-
-			.align	5
-			.globl	EXT(vmm_map_list32)
-			
-LEXT(vmm_map_list32)
-			li		r6,0											; Set 32-bit flavor
-			b		EXT(vmm_map_list)								; Go to common routine...
-
-			.align	5
-			.globl	EXT(vmm_map_list64)
-			
-LEXT(vmm_map_list64)
-			li		r6,1											; Set 64-bit flavor
-			b		EXT(vmm_map_list)								; Go to common routine...
-
-			.align	5
-			.globl	EXT(vmm_map_list32)
-			
-LEXT(vmm_unmap_list32)
-			li		r6,0											; Set 32-bit flavor
-			b		EXT(vmm_unmap_list)								; Go to common routine...
-
-			.align	5
-			.globl	EXT(vmm_map_list64)
-			
-LEXT(vmm_unmap_list64)
-			li		r6,1											; Set 64-bit flavor
-			b		EXT(vmm_unmap_list)								; Go to common routine...
-
-/*
- *			Here is where we transition to the virtual machine.
- *
- *			We will swap the register context in the savearea with that which is saved in our shared
- *			context area.  We will validity check a bit and clear any nasty bits in the MSR and force 
- *			the manditory ones on.
- *
- *			Then we will setup the new address space to run with, and anything else that is normally part
- *			of a context switch.
- *
- *			The vmm_execute_vm entry point is for the fused vmm_map_execute and vmm_protect_execute
- *			calls.  This is called, but never returned from.  We always go directly back to the
- *			user from here.
- *
- *
- */
- 
- 
- 			.align	5
- 			.globl	EXT(vmm_execute_vm)
-
-LEXT(vmm_execute_vm)
- 			lwz		r30,ACT_MACT_PCB(r3)		; Restore the savearea pointer because it could be trash here
- 			b		EXT(switchIntoVM)			; Join common...
- 
- 
- 			.align	5
- 			.globl	EXT(switchIntoVM)
-
-LEXT(switchIntoVM)
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			rlwinm	r31,r4,24,24,31				; Get the address space
-			rlwinm	r4,r4,0,24,31				; Isolate the context id
-			lwz		r28,vmmControl(r3)			; Pick up the control table address
-			subi	r4,r4,1						; Switch to zero offset
-			rlwinm.	r2,r28,0,0,30				; Is there a context there? (Note: we will ignore bit 31 so that we 
-												;   do not try this while we are transitioning off to on
-			cmplwi	cr1,r4,kVmmMaxContexts		; Is the index valid?
-			beq-	vmmBogus					; Not started, treat like a bogus system call
-			subic.	r31,r31,1					; Make address space 0 based and test if we use default
-			mulli	r2,r4,vmmCEntrySize			; Get displacement from index
-			bge-	cr1,swvmmBogus				; Index is bogus...
-			add		r2,r2,r28					; Point to the entry
-			bge--	swvmmDAdsp					; There was an explicit address space request
-			mr		r31,r4						; Default the address space to the context ID
-
-swvmmDAdsp:	la		r2,vmmc(r2)					; Get the offset to the context array
-			lwz		r8,vmmGFlags(r28)			; Get the general flags
-			lwz		r4,vmmFlags(r2)				; Get the flags for the selected entry
-			crset	vmmMapDone					; Assume we will be mapping something
-			lwz		r5,vmmContextKern(r2)		; Get the context area address
-			rlwinm.	r26,r4,0,vmmInUseb,vmmInUseb	; See if the slot is in use
-			cmplwi	cr1,r31,kVmmMaxContexts		; See if we have a valid address space ID
-			rlwinm	r8,r8,0,24,31				; Clean up address space
-			beq--	swvmmBogus					; This context is no good...
-
-			la		r26,vmmAdsp(r28)			; Point to the pmaps
-			sub		r8,r8,r31					; Get diff between launching address space - 1 and last mapped into (should be 1 if the same)
-			rlwinm	r31,r31,2,0,29				; Index to the pmap
-			cmplwi	r8,1						; See if we have the same address space
-			bge--	cr1,swvmmBogAdsp			; Address space is no good...
-			lwzx	r31,r26,r31					; Get the requested address space pmap
-			li		r0,0						; Get a 0 in case we need to trash redrive
-			lwz		r15,spcFlags(r10)			; Get per_proc special flags
-			beq		swvmmAdspOk					; Do not invalidate address space if we are launching the same
-			crclr	vmmMapDone					; Clear map done flag
-			stb		r0,vmmGFlags+3(r28)			; Clear the last mapped address space ID so we will not redrive later
-;
-;			Here we check for any immediate intercepts.  So far, the only
-;			two of these are a timer pop and and external stop.  We will not dispatch if
-;			either is true.  They need to either reset the timer (i.e. set timer
-;			to 0) or to set a future time, or if it is external stop, set the vmmXStopRst flag.
-;
-
-swvmmAdspOk:
-			rlwinm.	r0,r15,0,FamVMmodebit,FamVMmodebit	; Test FamVMmodebit
-			stw		r31,vmmPmap(r2)				; Save the last dispatched address space
-			bne		vmmFamGuestResume		
-			lwz		r6,vmmCntrl(r5)				; Get the control field
-			rlwinm.	r7,r6,0,vmmXStartb,vmmXStartb	; Clear all but start bit
-			beq+	swvmChkStop					; Do not reset stop
-			andc	r6,r6,r7					; Clear it
-			li		r8,vmmFlags					; Point to the flags
-			stw		r6,vmmCntrl(r5)				; Set the control field
-
-swvmtryx:	lwarx	r4,r8,r2					; Pick up the flags
-			rlwinm	r4,r4,0,vmmXStopb+1,vmmXStopb-1	; Clear the stop bit
-			stwcx.	r4,r8,r2					; Save the updated field
-			bne-	swvmtryx					; Try again...
-
-swvmChkStop:			
-			rlwinm.	r26,r4,0,vmmXStopb,vmmXStopb	; Is this VM stopped?
-			bne--	swvmSetStop					; Yes...
-			
-			rlwinm.	r26,r4,0,vmmTimerPopb,vmmTimerPopb	; Did the timer go pop?
-			cmplwi	cr1,r31,0					; Is there actually an address space defined?
-			bne--	svvmTimerPop				; Yes...
-
-;
-;			Special note: we need to intercept any attempt to launch a guest into a non-existent address space.
-;			We will just go emulate an ISI if there is not one.
-;
-
-			beq--	cr1,swvmEmulateISI			; We are trying to launch into an undefined address space.  This is not so good...
-
-;
-;			Here is where we actually swap into the VM (alternate) context.
-;			We will bulk do a wholesale swap of the registers in the context area (the VMs)
-;			with the ones in the savearea (our main code).  During the copy, we will fix up the
-;			MSR, forcing on a few bits and turning off a few others.  Then we will deal with the 
-;			PMAP and other per_proc stuff.  Finally, we will exit back through the main exception
-;			handler to deal with unstacking saveareas and ASTs, etc.
-;
-
-swvmDoSwitch:
-
-;			
-;			First, we save the volatile registers we care about.  Remember, all register
-;			handling here is pretty funky anyway, so we just pick the ones that are ok.
-;			
-			mr		r26,r3						; Save the activation pointer
-			
-			la		r11,vmmFacCtx(r2)			; Point to the virtual machine facility context
-			mr		r27,r2						; Save the context entry
-			stw		r11,deferctx(r3)			; Start using the virtual machine facility context when we exit
-
-			lwz		r11,ACT_MACT_SPF(r26)		; Get the special flags
-			mr		r3,r31						; Get the pointer to the PMAP
-			oris	r15,r11,hi16(runningVM)	; 	; Show that we are swapped to the VM right now
-			bl		EXT(hw_set_user_space_dis)	; Swap the address spaces
-			lwz		r17,vmmFlags(r27)			; Get the status flags
-			lwz		r20,vmmContextKern(r27)		; Get the state page kernel addr
-			lwz		r21,vmmCntrl(r20)			; Get vmmCntrl
-			rlwinm.	r22,r21,0,vmmFamEnab,vmmFamEnab	; Is vmmFamEnab set?
-			lwz		r22,vmmXAFlgs(r27)			; Get the eXtended Architecture flags
-			stw		r22,VMMXAFlgs(r10)			; Store vmmXAFlgs in per_proc VMMXAFlgs
-			beq		swvmNoFam					; No Fam intercept
-			rlwinm.	r22,r22,0,0,0				; Are we doing a 64-bit virtual machine?
-			rlwimi	r15,r21,32+vmmFamSetb-FamVMmodebit,FamVMmodebit,FamVMmodebit	; Set FamVMmode bit
-			rlwinm	r21,r21,0,vmmFamSetb+1,vmmFamSetb-1	; Clear FamSet bit
-			bne		swvmXfamintercpt
-			lwz		r22,famintercepts(r20)		; Load intercept bit field
-			b		swvmfamintercptres
-swvmXfamintercpt:
-			lwz		r22,faminterceptsX(r20)		; Load intercept bit field
-swvmfamintercptres:
-			stw		r21,vmmCntrl(r20)			; Update vmmCntrl
-			lwz		r19,vmmContextPhys(r27)		; Get vmmFAMarea address
-			stw		r22,vmmFAMintercept(r27)	; Get vmmFAMintercept
-			stw		r22,FAMintercept(r10)		; Store vmmFAMintercept in per_proc FAMintercept
-			stw		r19,VMMareaPhys(r10)		; Store VMMareaPhys
-			oris	r15,r15,hi16(FamVMena)		; Set FamVMenabit
-swvmNoFam:
-			stw		r27,vmmCEntry(r26)			; Remember what context we are running
-			bf++	vmmMapDone,swvmNoMap		; We have not mapped anything or it was not for this address space
-
-;
-;			This little bit of hoopala here (triggered by vmmMapDone) is
-;			a performance enhancement.  This will change the returning savearea
-;			to look like we had a DSI rather than a system call. Then, setting
-;			the redrive bit, the exception handler will redrive the exception as 
-;			a DSI, entering the last mapped address into the hash table.  This keeps
-;			double faults from happening.  Note that there is only a gain if the VM
-;			takes a fault, then the emulator resolves it only, and then begins
-;			the VM execution again.  It seems like this should be the normal case.
-;
-;			Note that we need to revisit this when we move the virtual machines to the task because
-;			then it will be possible for more than one thread to access this stuff at the same time.
-;
-			
-			lwz		r3,SAVflags(r30)			; Pick up the savearea flags
-			lwz		r2,vmmLastMap(r28)			; Get the last mapped address
-			lwz		r14,vmmLastMap+4(r28)		; Get the last mapped address low half
-			li		r20,T_DATA_ACCESS			; Change to DSI fault
-			oris	r3,r3,hi16(SAVredrive)		; Set exception redrive
-			stw		r2,savedar(r30)				; Set the DAR to the last thing we mapped
-			stw		r14,savedar+4(r30)			; Set the DAR to the last thing we mapped
-			stw		r3,SAVflags(r30)			; Turn on the redrive request
-			lis		r2,hi16(MASK(DSISR_HASH))	; Set PTE/DBAT miss
-			li		r0,0						; Clear
-			stw		r20,saveexception(r30)		; Say we need to emulate a DSI
-			stw		r2,savedsisr(r30)			; Pretend we have a PTE miss			
-			stb		r0,vmmGFlags+3(r28)			; Show that the redrive has been taken care of
-			
-swvmNoMap:	lwz		r20,vmmContextKern(r27)		; Get the comm area
-			rlwimi	r15,r17,32-(floatCngbit-vmmFloatCngdb),floatCngbit,vectorCngbit	; Shift and insert changed bits			
-			lwz		r20,vmmCntrl(r20)			; Get the control flags
-			rlwimi	r17,r11,8,24,31				; Save the old spf flags
-			rlwimi	r15,r20,32+vmmKeyb-userProtKeybit,userProtKeybit,userProtKeybit	; Set the protection key
-			stw		r15,spcFlags(r10)			; Set per_proc copy of the special flags
-			stw		r15,ACT_MACT_SPF(r26)		; Get the special flags
-
-			stw		r17,vmmFlags(r27)			; Set the status flags
-			
-			bl		swapCtxt					; First, swap the general register state
-
-			lwz		r17,vmmContextKern(r27)		; Get the comm area back
-			la		r25,vmmFacCtx(r27)			; Point to the facility context
-			lwz		r15,vmmCntrl(r17)			; Get the control flags again
-			mfsprg	r29,1						; Get the current activation
-			lwz		r29,ACT_PER_PROC(r29)		; Get the per_proc block
-			
-;
-;			Check if there is new floating point context to load
-;			
-						
-			rlwinm.	r0,r15,0,vmmFloatLoadb,vmmFloatLoadb	; Are there new floating point values?
-			lhz		r29,PP_CPU_NUMBER(r29)		; Get our cpu number
-			li		r14,vmmppcFPRs				; Get displacement to the new values
-			andc	r15,r15,r0					; Clear the bit
-			beq+	swvmNoNewFloats				; Nope, good...
-			
-			lwz		r19,FPUcpu(r25)				; Get the last CPU we ran on
-			
-			stw		r29,FPUcpu(r25)				; Claim the context for ourselves
-			
-			eieio								; Make sure this stays in order
-			
-			lis		r18,hi16(EXT(PerProcTable))	; Set base PerProcTable
-			mulli	r19,r19,ppeSize				; Find offset to the owner per_proc_entry
-			ori		r18,r18,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r16,FPUowner				; Displacement to float owner
-			add		r19,r18,r19					; Point to the owner per_proc_entry
-			lwz		r19,ppe_vaddr(r19)			; Point to the owner per_proc
-			
-swvminvfpu:	lwarx	r18,r16,r19					; Get the owner
-
-			sub		r0,r18,r25					; Subtract one from the other
-			sub		r3,r25,r18					; Subtract the other from the one
-			or		r3,r3,r0					; Combine them
-			srawi	r3,r3,31					; Get a 0 if equal or -1 of not
-			and		r18,r18,r3					; Make 0 if same, unchanged if not
-			stwcx.	r18,r16,r19					; Try to invalidate it
-			bne--	swvminvfpu					; Try again if there was a collision...
-
-			lwz		r3,FPUsave(r25)				; Get the FPU savearea
-			dcbt	r14,r17						; Touch in first line of new stuff
-			mr.		r3,r3						; Is there one?
-			bne+	swvmGotFloat				; Yes...
-			
-			bl		EXT(save_get)				; Get a savearea
-
-			li		r7,SAVfloat					; Get floating point flag
-			stw		r26,SAVact(r3)				; Save our activation
-			li		r0,0						; Get a zero
-			stb		r7,SAVflags+2(r3)			; Set that this is floating point
-			stw		r0,SAVprev+4(r3)			; Clear the back chain
-			stw		r0,SAVlevel(r3)				; We are always at level 0 (user state)
-			
-			stw		r3,FPUsave(r25)				; Chain us to context
-
-swvmGotFloat:
-			la		r4,savefp0(r3)				; Point to the destination
-			mr		r21,r3						; Save the save area
-			la		r3,vmmppcFPRs(r17)			; Point to the source
-			li		r5,32*8						; Get the size (32 FPRs at 8 bytes each)
-			
-			bl		EXT(bcopy)					; Copy the new values
-			
-			lwz		r11,ACT_MACT_SPF(r26)		; Get the special flags
-			stw		r15,vmmCntrl(r17)			; Save the control flags sans vmmFloatLoad
-			rlwinm	r11,r11,0,floatCngbit+1,floatCngbit-1	; Clear the changed bit here
-			lwz		r14,vmmStat(r17)			; Get the status flags
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			stw		r11,ACT_MACT_SPF(r26)		; Get the special flags
-			rlwinm	r14,r14,0,vmmFloatCngdb+1,vmmFloatCngdb-1	; Clear the changed flag
-			stw		r11,spcFlags(r10)			; Set per_proc copy of the special flags
-			stw		r14,vmmStat(r17)			; Set the status flags sans vmmFloatCngd
-			
-;
-;			Check if there is new vector context to load
-;			
-									
-swvmNoNewFloats:
-			rlwinm.	r0,r15,0,vmmVectLoadb,vmmVectLoadb	; Are there new vector values?
-			li		r14,vmmppcVRs				; Get displacement to the new values
-			andc	r15,r15,r0					; Clear the bit
-			beq+	swvmNoNewVects				; Nope, good...
-			
-			lwz		r19,VMXcpu(r25)				; Get the last CPU we ran on
-			
-			stw		r29,VMXcpu(r25)				; Claim the context for ourselves
-			
-			eieio								; Make sure this stays in order
-			
-			lis		r18,hi16(EXT(PerProcTable))	; Set base PerProcTable
-			mulli	r19,r19,ppeSize				; Find offset to the owner per_proc_entry
-			ori		r18,r18,lo16(EXT(PerProcTable))	; Set base PerProcTable
-			li		r16,VMXowner				; Displacement to vector owner
-			add		r19,r18,r19					; Point to the owner per_proc_entry
-			lwz		r19,ppe_vaddr(r19)			; Point to the owner per_proc
-			
-swvminvvec:	lwarx	r18,r16,r19					; Get the owner
-
-			sub		r0,r18,r25					; Subtract one from the other
-			sub		r3,r25,r18					; Subtract the other from the one
-			or		r3,r3,r0					; Combine them
-			srawi	r3,r3,31					; Get a 0 if equal or -1 of not
-			and		r18,r18,r3					; Make 0 if same, unchanged if not
-			stwcx.	r18,r16,r19					; Try to invalidate it
-			bne--	swvminvfpu					; Try again if there was a collision...
-			
-swvminvved:	lwz		r3,VMXsave(r25)				; Get the vector savearea
-			dcbt	r14,r17						; Touch in first line of new stuff
-			mr.		r3,r3						; Is there one?
-			bne+	swvmGotVect					; Yes...
-			
-			bl		EXT(save_get)				; Get a savearea
-
-			li		r7,SAVvector				; Get the vector type flag
-			stw		r26,SAVact(r3)				; Save our activation
-			li		r0,0						; Get a zero
-			stb		r7,SAVflags+2(r3)			; Set that this is vector
-			stw		r0,SAVprev+4(r3)			; Clear the back chain
-			stw		r0,SAVlevel(r3)				; We are always at level 0 (user state)
-			
-			stw		r3,VMXsave(r25)				; Chain us to context
-
-swvmGotVect:
-			mr		r21,r3						; Save the pointer to the savearea
-			la		r4,savevr0(r3)				; Point to the destination
-			la		r3,vmmppcVRs(r17)			; Point to the source
-			li		r5,32*16					; Get the size (32 vectors at 16 bytes each)
-			
-			bl		EXT(bcopy)					; Copy the new values
-
-			lwz		r8,savevrsave(r30)			; Get the current VRSave
-			
-			lwz		r11,ACT_MACT_SPF(r26)		; Get the special flags
-			stw		r15,vmmCntrl(r17)			; Save the control flags sans vmmVectLoad
-			rlwinm	r11,r11,0,vectorCngbit+1,vectorCngbit-1	; Clear the changed bit here
-			stw		r8,savevrvalid(r21)			; Set the current VRSave as valid saved
-			lwz		r14,vmmStat(r17)			; Get the status flags
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			stw		r11,ACT_MACT_SPF(r26)		; Get the special flags
-			rlwinm	r14,r14,0,vmmVectCngdb+1,vmmVectCngdb-1	; Clear the changed flag
-			stw		r11,spcFlags(r10)			; Set per_proc copy of the special flags
-			stw		r14,vmmStat(r17)			; Set the status flags sans vmmVectCngd
-			
-swvmNoNewVects:			
-			li		r3,1						; Show normal exit with check for AST
-			mr		r16,r26			; Restore the thread pointer
-			b		EXT(ppcscret)				; Go back to handler...
-
-			.align	5
-			
-swvmmBogus:	li		r2,kVmmBogusContext			; Set bogus index return
-			li		r0,0						; Clear
-			li		r3,1						; Set normal return with check for AST	
-			stw		r0,saver3(r30)				; Clear upper half
-			stw		r2,saver3+4(r30)			; Pass back the return code
-			b		EXT(ppcscret)				; Go back to handler...
-			
-swvmmBogAdsp:
-			li		r2,kVmmInvalidAdSpace		; Set bogus address space return
-			li		r0,0						; Clear
-			li		r3,1						; Set normal return with check for AST	
-			stw		r0,saver3(r30)				; Clear upper half
-			stw		r2,saver3+4(r30)			; Pass back the return code
-			b		EXT(ppcscret)				; Go back to handler...
-				
-swvmSetStop:
-			li		r2,kVmmStopped				; Set stopped return
-			li		r0,0						; Clear
-			li		r3,1						; Set normal return with check for AST
-			stw		r0,saver3(r30)				; Clear upper half
-			stw		r2,saver3+4(r30)			; Pass back the return code
-			stw		r2,return_code(r5)			; Save the exit code
-			b		EXT(ppcscret)				; Go back to handler...
-		
-svvmTimerPop:
-			li		r2,kVmmReturnNull			; Set null return
-			li		r0,0						; Clear
-			li		r3,1						; Set normal return with check for AST
-			stw		r0,saver3(r30)				; Clear upper half
-			stw		r2,saver3+4(r30)			; Pass back the return code
-			stw		r2,return_code(r5)			; Save the exit code
-			b		EXT(ppcscret)				; Go back to handler...
-		
-swvmEmulateISI:
-			mfsprg	r10,2						; Get feature flags
-			lwz		r11,vmmXAFlgs(r28)			; Get the eXtended Architecture flags			
-			mtcrf	0x02,r10					; Move pf64Bit to its normal place in CR6
-			rlwinm.	r11,r11,0,0,0				; Are we doing a 64-bit virtual machine?		
-			li		r2,kVmmReturnInstrPageFault	; Set ISI
-			crnot	vmmDoing64,cr0_eq			; Remember if this is a 64-bit VM
-			li		r0,0						; Clear
-			li		r3,1						; Set normal return with check for AST
-			stw		r0,saver3(r30)				; Clear upper half
-			stw		r2,saver3+4(r30)			; Pass back the return code
-			stw		r2,return_code(r5)			; Save the exit code
-			lis		r7,hi16(MASK(DSISR_HASH))	; Pretend like we got a PTE miss
-			bt		vmmDoing64,vmISI64			; Go do this for a 64-bit VM...
-
-			lwz		r10,vmmppcpc(r5)			; Get the PC as failing address
-			stw		r10,return_params+0(r5)		; Save PC as first return parm
-			stw		r7,return_params+4(r5)		; Save the pseudo-DSISR as second return parm
-			b		EXT(ppcscret)				; Go back to handler...
-
-vmISI64:	ld		r10,vmmppcXpc(r5)			; Get the PC as failing address
-			std		r10,return_paramsX+0(r5)	; Save PC as first return parm
-			std		r7,return_paramsX+8(r5)		; Save the pseudo-DSISR as second return parm
-			b		EXT(ppcscret)				; Go back to handler...
-
-;
-;			These syscalls are invalid, FAM syscall fast path 
-;
-
-			.align	5
-			.globl	EXT(vmm_fam_reserved)
-
-LEXT(vmm_fam_reserved)
-			li		r3,0						; Force exception
-			b		EXT(ppcscret)				; Go back to handler...
-
-;
-;			Here is where we exit from vmm mode.  We do this on any kind of exception.
-;			Interruptions (decrementer, external, etc.) are another story though.  
-;			These we just pass through. We also switch back explicity when requested.
-;			This will happen in response to a timer pop and some kinds of ASTs.
-;
-;			Inputs:
-;				R3  = activation
-;				R4  = savearea
-;
-
-			.align	5
-			.globl	EXT(vmm_exit)
-
-LEXT(vmm_exit)
-
-vmmexitcall:
-			lwz		r2,vmmCEntry(r3)			; Get the context that is active
-			lwz		r12,ACT_VMMAP(r3)			; Get the VM_MAP for this guy
-			lwz		r11,ACT_MACT_SPF(r3)		; Get the special flags
-			lwz		r19,vmmFlags(r2)			; Get the status flags
-			mr		r16,r3						; R16 is safe to use for the activation address
-		
-			rlwimi	r19,r11,floatCngbit-vmmFloatCngdb,vmmFloatCngdb,vmmVectCngdb	; Shift and insert changed bits			
-			li		r0,0						; Get a zero
-			rlwimi	r11,r19,vmmSpfSaveb,floatCngbit,vectorCngbit	; Restore the saved part of the spf
-			lwz		r3,VMMAP_PMAP(r12)			; Get the pmap for the activation
-			rlwinm	r11,r11,0,runningVMbit+1,runningVMbit-1	; Clear the "in VM" flag
-			stw		r0,vmmCEntry(r16)			; Clear pointer to active context
-			stw		r19,vmmFlags(r2)			; Set the status flags
-			rlwinm	r11,r11,0,userProtKeybit+1,userProtKeybit-1	; Set back to normal protection key
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			rlwinm	r11,r11,0,FamVMenabit+1,FamVMenabit-1	; Clear FamVMEnable
-			lwz		r18,spcFlags(r10)			; Get per_proc copy of the special flags
-			lwz		r5,vmmContextKern(r2)		; Get the state page kernel addr 
-			rlwinm	r11,r11,0,FamVMmodebit+1,FamVMmodebit-1	; Clear FamVMMode
-			lwz		r6,vmmCntrl(r5)				; Get the control field
-			rlwimi	r19,r18,FamVMmodebit-vmmFAMmodeb,vmmFAMmodeb,vmmFAMmodeb	; Shift and insert changed bits			
-			rlwimi	r6,r18,FamVMmodebit-vmmFamSetb,vmmFamSetb,vmmFamSetb		; Shift and insert changed bits			
-			rlwimi	r6,r18,userProtKeybit-vmmKeyb,vmmKeyb,vmmKeyb				; Shift and insert changed bits			
-			stw		r11,ACT_MACT_SPF(r16)		; Get the special flags
-			stw		r6,vmmCntrl(r5)				; Store the control field
-			stw		r11,spcFlags(r10)			; Set per_proc copy of the special flags
-			
-			mr		r26,r16						; Save the activation pointer
-			mr		r27,r2						; Save the context entry
-			
-			bl		EXT(hw_set_user_space_dis)	; Swap the address spaces back to the emulator
-			
-			la		r5,facctx(r16)				; Point to the main facility context
-			mr		r2,r27						; Restore
-			stw		r5,deferctx(r16)			; Start using the main facility context on the way out
-			lwz		r5,vmmContextKern(r27)		; Get the context area address
-			mr		r3,r16						; Restore activation address
-			stw		r19,vmmStat(r5)				; Save the changed and popped flags
-			bl		swapCtxt					; Exchange the VM context for the emulator one
-			stw		r8,saver3+4(r30)			; Set the return code as the return value also
-			b		EXT(retFromVM)				; Go back to handler...
-
-
-;
-;			Here is where we force exit from vmm mode.  We do this when as
-;			part of termination and is used to insure that we are not executing
-;			in an alternate context.  Because this is called from C we need to save 
-;			all non-volatile registers.
-;
-;			Inputs:
-;				R3  = activation
-;				R4  = user savearea
-;				Interruptions disabled
-;
-
-			.align	5
-			.globl	EXT(vmm_force_exit)
-
-LEXT(vmm_force_exit)
-
-			stwu	r1,-(FM_ALIGN(20*4)+FM_SIZE)(r1)	; Get enough space for the registers
-			mflr	r0							; Save the return
-			stmw	r13,FM_ARG0(r1)				; Save all non-volatile registers
-			stw		r0,(FM_ALIGN(20*4)+FM_SIZE+FM_LR_SAVE)(r1)	; Save the return
-
-			lwz		r2,vmmCEntry(r3)			; Get the context that is active
-			lwz		r11,ACT_MACT_SPF(r3)		; Get the special flags
-			lwz		r19,vmmFlags(r2)			; Get the status flags
-			lwz		r12,ACT_VMMAP(r3)			; Get the VM_MAP for this guy
-			
-			rlwimi	r19,r11,floatCngbit-vmmFloatCngdb,vmmFloatCngdb,vmmVectCngdb	; Shift and insert changed bits			
-			mr		r26,r3						; Save the activation pointer
-			rlwimi	r11,r19,vmmSpfSaveb,floatCngbit,vectorCngbit	; Restore the saved part of the spf
-			li		r0,0						; Get a zero
-			rlwinm	r9,r11,0,runningVMbit+1,runningVMbit-1	; Clear the "in VM" flag
-			cmplw	r9,r11						; Check if we were in a vm
-			lwz		r3,VMMAP_PMAP(r12)			; Get the pmap for the activation
-			beq-	vfeNotRun					; We were not in a vm....
-			rlwinm	r9,r9,0,userProtKeybit+1,userProtKeybit-1	; Set back to normal protection key
-			stw		r0,vmmCEntry(r26)			; Clear pointer to active context
-			mfsprg	r10,1						; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)		; Get the per_proc block
-			lwz		r18,spcFlags(r10)			; Get per_proc copy of the special flags
-			rlwinm	r9,r9,0,FamVMenabit+1,FamVMenabit-1	; Clear Fam Enable
-			rlwinm	r9,r9,0,FamVMmodebit+1,FamVMmodebit-1	; Clear Fam Enable
-			lwz		r5,vmmContextKern(r2)		; Get the context area address
-			lwz		r6,vmmCntrl(r5)				; Get the control field
-			rlwimi	r19,r18,FamVMmodebit-vmmFAMmodeb,vmmFAMmodeb,vmmFAMmodeb	; Shift and insert changed bits			
-			rlwimi	r6,r18,FamVMmodebit-vmmFamSetb,vmmFamSetb,vmmFamSetb		; Shift and insert changed bits			
-			rlwimi	r6,r18,userProtKeybit-vmmKeyb,vmmKeyb,vmmKeyb				; Shift and insert changed bits			
-			stw		r6,vmmCntrl(r5)				; Store the control field
-			stw		r9,ACT_MACT_SPF(r26)		; Get the special flags
-			stw		r9,spcFlags(r10)			; Set per_proc copy of the special flags
-			
-			mr		r27,r2						; Save the context entry
-			mr		r30,r4						; Save the savearea
-			
-			bl		EXT(hw_set_user_space_dis)	; Swap the address spaces back to the emulator
-			
-			la		r7,facctx(r26)				; Point to the main facility context
-			
-			lwz		r5,vmmContextKern(r27)		; Get the context area address
-			stw		r19,vmmStat(r5)				; Save the changed and popped flags
-			stw		r7,deferctx(r26)			; Tell context launcher to switch facility context
-	
-			bl		swapCtxt					; Exchange the VM context for the emulator one
-			
-			lwz		r8,saveexception(r30)		; Pick up the exception code
-			lwz		r7,SAVflags(r30)			; Pick up the savearea flags
-			lis		r9,hi16(SAVredrive)			; Get exception redrive bit
-			rlwinm	r8,r8,30,24,31				; Convert exception to return code
-			andc	r7,r7,r9					; Make sure redrive is off because we are intercepting
-			stw		r8,saver3+4(r30)			; Set the return code as the return value also
-			stw		r7,SAVflags(r30)			; Set the savearea flags
-			
-
-vfeNotRun:	lmw		r13,FM_ARG0(r1)				; Restore all non-volatile registers
-			lwz		r1,0(r1)					; Pop the stack
-			lwz		r0,FM_LR_SAVE(r1)			; Get the return address
-			mtlr	r0							; Set return
-			blr				
-
-;
-;			Note: we will not do any DCBTs to the savearea.  It was just stored to a few cycles ago and should 
-;			still be in the cache.
-;
-;			NOTE NOTE:  R16 is important to save!!!!
-;
-			.align	5
-
-swapCtxt:	
-			mfsprg	r10,2						; Get feature flags
-			la		r6,vmmppcpc(r5)				; Point to the first line
-			mtcrf	0x02,r10					; Move pf64Bit to its normal place in CR6
-			
-			lwz		r14,saveexception(r30)		; Get the exception code
-			dcbt	0,r6						; Touch in the first line of the context area
-			bt++	pf64Bitb,swap64				; Go do this swap on a 64-bit machine...
-			
-			lwz		r7,savesrr0+4(r30)			; Start moving context	
-			lwz		r8,savesrr1+4(r30)				
-			lwz		r9,saver0+4(r30)				
-			cmplwi	cr1,r14,T_SYSTEM_CALL		; Are we switching because of a system call?
-			lwz		r10,saver1+4(r30)				
-			lwz		r11,saver2+4(r30)				
-			lwz		r12,saver3+4(r30)				
-			lwz		r13,saver4+4(r30)	
-			la		r6,vmmppcr6(r5)				; Point to second line		
-			lwz		r14,saver5+4(r30)				
-			
-			dcbt	0,r6						; Touch second line of context area
-			
-			lwz		r15,vmmppcpc(r5)			; First line of context	
-			lis		r22,hi16(MSR_IMPORT_BITS)	; Get the MSR bits that are controllable by user
-			lwz		r23,vmmppcmsr(r5)				
-			ori		r22,r22,lo16(MSR_IMPORT_BITS)	; Get the rest of the MSR bits that are controllable by user
-			lwz		r17,vmmppcr0(r5)				
-			lwz		r18,vmmppcr1(r5)		
-			and		r23,r23,r22					; Keep only the controllable bits		
-			lwz		r19,vmmppcr2(r5)		
-			oris	r23,r23,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			lwz		r20,vmmppcr3(r5)				
-			ori		r23,r23,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			lwz		r21,vmmppcr4(r5)				
-			lwz		r22,vmmppcr5(r5)				
-
-			dcbt	0,r6						; Touch third line of context area
-		
-			stw		r7,vmmppcpc(r5)				; Save emulator context into the context area	
-			stw		r8,vmmppcmsr(r5)				
-			stw		r9,vmmppcr0(r5)				
-			stw		r10,vmmppcr1(r5)				
-			stw		r11,vmmppcr2(r5)				
-			stw		r12,vmmppcr3(r5)				
-			stw		r13,vmmppcr4(r5)				
-			stw		r14,vmmppcr5(r5)			
-
-;			
-;			Save the first 3 parameters if we are an SC (we will take care of the last later)
-;
-			bne+	cr1,swapnotsc				; Skip next if not an SC exception...	
-			stw		r12,return_params+0(r5)		; Save the first return
-			stw		r13,return_params+4(r5)		; Save the second return
-			stw		r14,return_params+8(r5)		; Save the third return
-
-swapnotsc:	li		r6,0						; Clear this out
-			stw		r6,savesrr0(r30)			; Insure that high order is clear
-			stw		r15,savesrr0+4(r30)			; Save vm context into the savearea	
-			stw		r6,savesrr1(r30)			; Insure that high order is clear
-			stw		r23,savesrr1+4(r30)				
-			stw		r17,saver0+4(r30)				
-			stw		r18,saver1+4(r30)		
-			stw		r19,saver2+4(r30)		
-			stw		r20,saver3+4(r30)				
-			stw		r21,saver4+4(r30)		
-			la		r6,vmmppcr14(r5)			; Point to fourth line		
-			stw		r22,saver5+4(r30)				
-			
-			dcbt	0,r6						; Touch fourth line
-
-;			Swap 8 registers
-			
-			lwz		r7,saver6+4(r30)			; Read savearea	
-			lwz		r8,saver7+4(r30)				
-			lwz		r9,saver8+4(r30)				
-			lwz		r10,saver9+4(r30)				
-			lwz		r11,saver10+4(r30)				
-			lwz		r12,saver11+4(r30)				
-			lwz		r13,saver12+4(r30)				
-			lwz		r14,saver13+4(r30)				
-
-			lwz		r15,vmmppcr6(r5)			; Read vm context 
-			lwz		r24,vmmppcr7(r5)				
-			lwz		r17,vmmppcr8(r5)				
-			lwz		r18,vmmppcr9(r5)		
-			lwz		r19,vmmppcr10(r5)		
-			lwz		r20,vmmppcr11(r5)				
-			lwz		r21,vmmppcr12(r5)				
-			lwz		r22,vmmppcr13(r5)				
-
-			stw		r7,vmmppcr6(r5)				; Write context	
-			stw		r8,vmmppcr7(r5)				
-			stw		r9,vmmppcr8(r5)				
-			stw		r10,vmmppcr9(r5)				
-			stw		r11,vmmppcr10(r5)				
-			stw		r12,vmmppcr11(r5)				
-			stw		r13,vmmppcr12(r5)	
-			la		r6,vmmppcr22(r5)			; Point to fifth line			
-			stw		r14,vmmppcr13(r5)				
-
-			dcbt	0,r6						; Touch fifth line
-
-			stw		r15,saver6+4(r30)			; Write vm context 
-			stw		r24,saver7+4(r30)				
-			stw		r17,saver8+4(r30)				
-			stw		r18,saver9+4(r30)		
-			stw		r19,saver10+4(r30)		
-			stw		r20,saver11+4(r30)				
-			stw		r21,saver12+4(r30)				
-			stw		r22,saver13+4(r30)				
-
-;			Swap 8 registers
-			
-			lwz		r7,saver14+4(r30)			; Read savearea	
-			lwz		r8,saver15+4(r30)				
-			lwz		r9,saver16+4(r30)				
-			lwz		r10,saver17+4(r30)				
-			lwz		r11,saver18+4(r30)				
-			lwz		r12,saver19+4(r30)				
-			lwz		r13,saver20+4(r30)				
-			lwz		r14,saver21+4(r30)				
-
-			lwz		r15,vmmppcr14(r5)			; Read vm context 
-			lwz		r24,vmmppcr15(r5)				
-			lwz		r17,vmmppcr16(r5)				
-			lwz		r18,vmmppcr17(r5)		
-			lwz		r19,vmmppcr18(r5)		
-			lwz		r20,vmmppcr19(r5)				
-			lwz		r21,vmmppcr20(r5)				
-			lwz		r22,vmmppcr21(r5)				
-
-			stw		r7,vmmppcr14(r5)			; Write context	
-			stw		r8,vmmppcr15(r5)				
-			stw		r9,vmmppcr16(r5)				
-			stw		r10,vmmppcr17(r5)				
-			stw		r11,vmmppcr18(r5)				
-			stw		r12,vmmppcr19(r5)				
-			stw		r13,vmmppcr20(r5)
-			la		r6,vmmppcr30(r5)			; Point to sixth line				
-			stw		r14,vmmppcr21(r5)				
-			
-			dcbt	0,r6						; Touch sixth line
-
-			stw		r15,saver14+4(r30)			; Write vm context 
-			stw		r24,saver15+4(r30)				
-			stw		r17,saver16+4(r30)				
-			stw		r18,saver17+4(r30)		
-			stw		r19,saver18+4(r30)		
-			stw		r20,saver19+4(r30)				
-			stw		r21,saver20+4(r30)				
-			stw		r22,saver21+4(r30)				
-
-;			Swap 8 registers
-			
-			lwz		r7,saver22+4(r30)			; Read savearea	
-			lwz		r8,saver23+4(r30)				
-			lwz		r9,saver24+4(r30)				
-			lwz		r10,saver25+4(r30)				
-			lwz		r11,saver26+4(r30)				
-			lwz		r12,saver27+4(r30)				
-			lwz		r13,saver28+4(r30)				
-			lwz		r14,saver29+4(r30)				
-
-			lwz		r15,vmmppcr22(r5)			; Read vm context 
-			lwz		r24,vmmppcr23(r5)				
-			lwz		r17,vmmppcr24(r5)				
-			lwz		r18,vmmppcr25(r5)		
-			lwz		r19,vmmppcr26(r5)		
-			lwz		r20,vmmppcr27(r5)				
-			lwz		r21,vmmppcr28(r5)				
-			lwz		r22,vmmppcr29(r5)				
-
-			stw		r7,vmmppcr22(r5)			; Write context	
-			stw		r8,vmmppcr23(r5)				
-			stw		r9,vmmppcr24(r5)				
-			stw		r10,vmmppcr25(r5)				
-			stw		r11,vmmppcr26(r5)				
-			stw		r12,vmmppcr27(r5)				
-			stw		r13,vmmppcr28(r5)	
-			la		r6,vmmppcvscr(r5)			; Point to seventh line			
-			stw		r14,vmmppcr29(r5)				
-
-			dcbt	0,r6						; Touch seventh line
-
-			stw		r15,saver22+4(r30)			; Write vm context 
-			stw		r24,saver23+4(r30)				
-			stw		r17,saver24+4(r30)				
-			stw		r18,saver25+4(r30)		
-			stw		r19,saver26+4(r30)		
-			stw		r20,saver27+4(r30)				
-			stw		r21,saver28+4(r30)				
-			stw		r22,saver29+4(r30)				
-
-;			Swap 8 registers
-			
-			lwz		r7,saver30+4(r30)			; Read savearea	
-			lwz		r8,saver31+4(r30)				
-			lwz		r9,savecr(r30)				
-			lwz		r10,savexer+4(r30)				
-			lwz		r11,savelr+4(r30)				
-			lwz		r12,savectr+4(r30)				
-			lwz		r14,savevrsave(r30)				
-
-			lwz		r15,vmmppcr30(r5)			; Read vm context 
-			lwz		r24,vmmppcr31(r5)				
-			lwz		r17,vmmppccr(r5)				
-			lwz		r18,vmmppcxer(r5)		
-			lwz		r19,vmmppclr(r5)		
-			lwz		r20,vmmppcctr(r5)				
-			lwz		r22,vmmppcvrsave(r5)				
-
-			stw		r7,vmmppcr30(r5)			; Write context	
-			stw		r8,vmmppcr31(r5)				
-			stw		r9,vmmppccr(r5)				
-			stw		r10,vmmppcxer(r5)				
-			stw		r11,vmmppclr(r5)				
-			stw		r12,vmmppcctr(r5)				
-			stw		r14,vmmppcvrsave(r5)				
-
-			stw		r15,saver30+4(r30)			; Write vm context 
-			stw		r24,saver31+4(r30)				
-			stw		r17,savecr(r30)				
-			stw		r18,savexer+4(r30)		
-			stw		r19,savelr+4(r30)		
-			stw		r20,savectr+4(r30)				
-			stw		r22,savevrsave(r30)				
-
-;			Swap 8 registers
-			
-			lwz		r7,savevscr+0(r30)			; Read savearea	
-			lwz		r8,savevscr+4(r30)				
-			lwz		r9,savevscr+8(r30)				
-			lwz		r10,savevscr+12(r30)				
-			lwz		r11,savefpscrpad(r30)				
-			lwz		r12,savefpscr(r30)				
-
-			lwz		r15,vmmppcvscr+0(r5)		; Read vm context 
-			lwz		r24,vmmppcvscr+4(r5)				
-			lwz		r17,vmmppcvscr+8(r5)				
-			lwz		r18,vmmppcvscr+12(r5)		
-			lwz		r19,vmmppcfpscrpad(r5)		
-			lwz		r20,vmmppcfpscr(r5)				
-
-			stw		r7,vmmppcvscr+0(r5)			; Write context	
-			stw		r8,vmmppcvscr+4(r5)				
-			stw		r9,vmmppcvscr+8(r5)				
-			stw		r10,vmmppcvscr+12(r5)				
-			stw		r11,vmmppcfpscrpad(r5)				
-			stw		r12,vmmppcfpscr(r5)				
-
-			stw		r15,savevscr+0(r30)			; Write vm context 
-			stw		r24,savevscr+4(r30)				
-			stw		r17,savevscr+8(r30)				
-			stw		r18,savevscr+12(r30)		
-			stw		r19,savefpscrpad(r30)		
-			stw		r20,savefpscr(r30)				
-
-			
-;
-;			Cobble up the exception return code and save any specific return values
-;
-			
-			lwz		r7,saveexception(r30)		; Pick up the exception code
-			rlwinm	r8,r7,30,24,31				; Convert exception to return code
-			cmplwi	r7,T_DATA_ACCESS			; Was this a DSI?
-			stw		r8,return_code(r5)			; Save the exit code
-			cmplwi	cr1,r7,T_INSTRUCTION_ACCESS	; Exiting because of an ISI?
-			beq+	swapDSI						; Yeah...
-			cmplwi	r7,T_ALIGNMENT				; Alignment exception?
-			beq+	cr1,swapISI					; We had an ISI...
-			cmplwi	cr1,r7,T_SYSTEM_CALL		; Exiting because of an system call?
-			beq+	swapDSI						; An alignment exception looks like a DSI...
-			beq+	cr1,swapSC					; We had a system call...
-			
-			blr									; Return...
-
-;
-;			Set exit returns for a DSI or alignment exception
-;
-
-swapDSI:	lwz		r10,savedar+4(r30)			; Get the DAR
-			lwz		r7,savedsisr(r30)			; and the DSISR
-			stw		r10,return_params+0(r5)		; Save DAR as first return parm
-			stw		r7,return_params+4(r5)		; Save DSISR as second return parm
-			blr									; Return...
-
-;
-;			Set exit returns for a ISI
-;
-
-swapISI:	lwz		r7,vmmppcmsr(r5)			; Get the SRR1 value
-			lwz		r10,vmmppcpc(r5)			; Get the PC as failing address
-			rlwinm	r7,r7,0,1,4					; Save the bits that match the DSISR
-			stw		r10,return_params+0(r5)		; Save PC as first return parm
-			stw		r7,return_params+4(r5)		; Save the pseudo-DSISR as second return parm
-			blr									; Return...
-
-;
-;			Set exit returns for a system call (note: we did the first 3 earlier)
-;			Do we really need to pass parameters back here????
-;
-
-swapSC:		lwz		r10,vmmppcr6(r5)			; Get the fourth paramter
-			stw		r10,return_params+12(r5)	; Save it
-			blr									; Return...
-
-;
-;			Here is the swap for 64-bit machines
-;
-
-swap64:		lwz		r22,vmmXAFlgs(r27)			; Get the eXtended Architecture flags			
-			ld		r7,savesrr0(r30)			; Start moving context	
-			ld		r8,savesrr1(r30)				
-			ld		r9,saver0(r30)				
-			cmplwi	cr1,r14,T_SYSTEM_CALL		; Are we switching because of a system call?
-			ld		r10,saver1(r30)				
-			ld		r11,saver2(r30)		
-			rlwinm.	r22,r22,0,0,0				; Are we doing a 64-bit virtual machine?		
-			ld		r12,saver3(r30)				
-			crnot	vmmDoing64,cr0_eq			; Remember if this is a 64-bit VM
-			ld		r13,saver4(r30)	
-			la		r6,vmmppcr6(r5)				; Point to second line		
-			ld		r14,saver5(r30)				
-			
-			dcbt	0,r6						; Touch second line of context area
-			
-			bt		vmmDoing64,sw64x1			; Skip to 64-bit stuff
-			
-			lwz		r15,vmmppcpc(r5)			; First line of context	
-			lis		r22,hi16(MSR_IMPORT_BITS)	; Get the MSR bits that are controllable by user
-			lwz		r23,vmmppcmsr(r5)				
-			ori		r22,r22,lo16(MSR_IMPORT_BITS)	; Get the rest of the MSR bits that are controllable by user
-			lwz		r17,vmmppcr0(r5)				
-			lwz		r18,vmmppcr1(r5)		
-			and		r23,r23,r22					; Keep only the controllable bits		
-			lwz		r19,vmmppcr2(r5)		
-			oris	r23,r23,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			lwz		r20,vmmppcr3(r5)				
-			ori		r23,r23,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			lwz		r21,vmmppcr4(r5)				
-			lwz		r22,vmmppcr5(r5)				
-
-			dcbt	0,r6						; Touch third line of context area
-		
-			stw		r7,vmmppcpc(r5)				; Save emulator context into the context area	
-			stw		r8,vmmppcmsr(r5)				
-			stw		r9,vmmppcr0(r5)				
-			stw		r10,vmmppcr1(r5)				
-			stw		r11,vmmppcr2(r5)				
-			stw		r12,vmmppcr3(r5)				
-			stw		r13,vmmppcr4(r5)				
-			stw		r14,vmmppcr5(r5)			
-
-;			
-;			Save the first 3 parameters if we are an SC (we will take care of the last later)
-;
-			bne+	cr1,sw64x1done				; Skip next if not an SC exception...	
-			stw		r12,return_params+0(r5)		; Save the first return
-			stw		r13,return_params+4(r5)		; Save the second return
-			stw		r14,return_params+8(r5)		; Save the third return
-			b		sw64x1done					; We are done with this section...
-
-sw64x1:		ld		r15,vmmppcXpc(r5)			; First line of context	
-			li		r0,1						; Get a 1 to turn on 64-bit
-			lis		r22,hi16(MSR_IMPORT_BITS)	; Get the MSR bits that are controllable by user (we will also allow 64-bit here)
-			sldi	r0,r0,63					; Get 64-bit bit
-			ld		r23,vmmppcXmsr(r5)				
-			ori		r22,r22,lo16(MSR_IMPORT_BITS)	; Get the rest of the MSR bits that are controllable by user
-			ld		r17,vmmppcXr0(r5)		
-			or		r22,r22,r0					; Add the 64-bit bit		
-			ld		r18,vmmppcXr1(r5)		
-			and		r23,r23,r22					; Keep only the controllable bits		
-			ld		r19,vmmppcXr2(r5)		
-			oris	r23,r23,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			ld		r20,vmmppcXr3(r5)				
-			ori		r23,r23,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			ld		r21,vmmppcXr4(r5)				
-			ld		r22,vmmppcXr5(r5)				
-
-			dcbt	0,r6						; Touch third line of context area
-		
-			std		r7,vmmppcXpc(r5)			; Save emulator context into the context area	
-			std		r8,vmmppcXmsr(r5)				
-			std		r9,vmmppcXr0(r5)				
-			std		r10,vmmppcXr1(r5)				
-			std		r11,vmmppcXr2(r5)				
-			std		r12,vmmppcXr3(r5)				
-			std		r13,vmmppcXr4(r5)				
-			std		r14,vmmppcXr5(r5)			
-
-;			
-;			Save the first 3 parameters if we are an SC (we will take care of the last later)
-;
-			bne+	cr1,sw64x1done				; Skip next if not an SC exception...	
-			std		r12,return_paramsX+0(r5)	; Save the first return
-			std		r13,return_paramsX+8(r5)	; Save the second return
-			std		r14,return_paramsX+16(r5)	; Save the third return
-
-sw64x1done:	
-			std		r15,savesrr0(r30)			; Save vm context into the savearea	
-			std		r23,savesrr1(r30)				
-			std		r17,saver0(r30)				
-			std		r18,saver1(r30)		
-			std		r19,saver2(r30)		
-			std		r20,saver3(r30)				
-			std		r21,saver4(r30)		
-			la		r6,vmmppcr14(r5)			; Point to fourth line		
-			std		r22,saver5(r30)				
-			
-			dcbt	0,r6						; Touch fourth line
-
-;			Swap 8 registers
-			
-			ld		r7,saver6(r30)			; Read savearea	
-			ld		r8,saver7(r30)				
-			ld		r9,saver8(r30)				
-			ld		r10,saver9(r30)				
-			ld		r11,saver10(r30)				
-			ld		r12,saver11(r30)				
-			ld		r13,saver12(r30)				
-			ld		r14,saver13(r30)				
-			
-			bt		vmmDoing64,sw64x2			; Skip to 64-bit stuff
-
-			lwz		r15,vmmppcr6(r5)			; Read vm context 
-			lwz		r24,vmmppcr7(r5)				
-			lwz		r17,vmmppcr8(r5)				
-			lwz		r18,vmmppcr9(r5)		
-			lwz		r19,vmmppcr10(r5)		
-			lwz		r20,vmmppcr11(r5)				
-			lwz		r21,vmmppcr12(r5)				
-			lwz		r22,vmmppcr13(r5)				
-
-			stw		r7,vmmppcr6(r5)				; Write context	
-			stw		r8,vmmppcr7(r5)				
-			stw		r9,vmmppcr8(r5)				
-			stw		r10,vmmppcr9(r5)				
-			stw		r11,vmmppcr10(r5)				
-			stw		r12,vmmppcr11(r5)				
-			stw		r13,vmmppcr12(r5)	
-			la		r6,vmmppcr22(r5)			; Point to fifth line			
-			stw		r14,vmmppcr13(r5)				
-
-			dcbt	0,r6						; Touch fifth line
-			b		sw64x2done					; We are done with this section...
-
-sw64x2:		ld		r15,vmmppcXr6(r5)			; Read vm context 
-			ld		r24,vmmppcXr7(r5)				
-			ld		r17,vmmppcXr8(r5)				
-			ld		r18,vmmppcXr9(r5)		
-			ld		r19,vmmppcXr10(r5)		
-			ld		r20,vmmppcXr11(r5)				
-			ld		r21,vmmppcXr12(r5)				
-			ld		r22,vmmppcXr13(r5)				
-
-			std		r7,vmmppcXr6(r5)				; Write context	
-			std		r8,vmmppcXr7(r5)				
-			std		r9,vmmppcXr8(r5)				
-			std		r10,vmmppcXr9(r5)				
-			std		r11,vmmppcXr10(r5)				
-			std		r12,vmmppcXr11(r5)				
-			std		r13,vmmppcXr12(r5)	
-			la		r6,vmmppcXr22(r5)			; Point to fifth line			
-			std		r14,vmmppcXr13(r5)				
-
-			dcbt	0,r6						; Touch fifth line
-
-sw64x2done:	std		r15,saver6(r30)			; Write vm context 
-			std		r24,saver7(r30)				
-			std		r17,saver8(r30)				
-			std		r18,saver9(r30)		
-			std		r19,saver10(r30)		
-			std		r20,saver11(r30)				
-			std		r21,saver12(r30)				
-			std		r22,saver13(r30)				
-
-;			Swap 8 registers
-			
-			ld		r7,saver14(r30)			; Read savearea	
-			ld		r8,saver15(r30)				
-			ld		r9,saver16(r30)				
-			ld		r10,saver17(r30)				
-			ld		r11,saver18(r30)				
-			ld		r12,saver19(r30)				
-			ld		r13,saver20(r30)				
-			ld		r14,saver21(r30)				
-
-			bt		vmmDoing64,sw64x3			; Skip to 64-bit stuff
-
-			lwz		r15,vmmppcr14(r5)			; Read vm context 
-			lwz		r24,vmmppcr15(r5)				
-			lwz		r17,vmmppcr16(r5)				
-			lwz		r18,vmmppcr17(r5)		
-			lwz		r19,vmmppcr18(r5)		
-			lwz		r20,vmmppcr19(r5)				
-			lwz		r21,vmmppcr20(r5)				
-			lwz		r22,vmmppcr21(r5)				
-
-			stw		r7,vmmppcr14(r5)			; Write context	
-			stw		r8,vmmppcr15(r5)				
-			stw		r9,vmmppcr16(r5)				
-			stw		r10,vmmppcr17(r5)				
-			stw		r11,vmmppcr18(r5)				
-			stw		r12,vmmppcr19(r5)				
-			stw		r13,vmmppcr20(r5)
-			la		r6,vmmppcr30(r5)			; Point to sixth line				
-			stw		r14,vmmppcr21(r5)				
-			
-			dcbt	0,r6						; Touch sixth line
-			b		sw64x3done					; Done with this section...
-
-sw64x3:		ld		r15,vmmppcXr14(r5)			; Read vm context 
-			ld		r24,vmmppcXr15(r5)				
-			ld		r17,vmmppcXr16(r5)				
-			ld		r18,vmmppcXr17(r5)		
-			ld		r19,vmmppcXr18(r5)		
-			ld		r20,vmmppcXr19(r5)				
-			ld		r21,vmmppcXr20(r5)				
-			ld		r22,vmmppcXr21(r5)				
-
-			std		r7,vmmppcXr14(r5)			; Write context	
-			std		r8,vmmppcXr15(r5)				
-			std		r9,vmmppcXr16(r5)				
-			std		r10,vmmppcXr17(r5)				
-			std		r11,vmmppcXr18(r5)				
-			std		r12,vmmppcXr19(r5)				
-			std		r13,vmmppcXr20(r5)
-			la		r6,vmmppcXr30(r5)			; Point to sixth line				
-			std		r14,vmmppcXr21(r5)				
-			
-			dcbt	0,r6						; Touch sixth line
-
-sw64x3done:	std		r15,saver14(r30)			; Write vm context 
-			std		r24,saver15(r30)				
-			std		r17,saver16(r30)				
-			std		r18,saver17(r30)		
-			std		r19,saver18(r30)		
-			std		r20,saver19(r30)				
-			std		r21,saver20(r30)				
-			std		r22,saver21(r30)				
-
-;			Swap 8 registers
-			
-			ld		r7,saver22(r30)			; Read savearea	
-			ld		r8,saver23(r30)				
-			ld		r9,saver24(r30)				
-			ld		r10,saver25(r30)				
-			ld		r11,saver26(r30)				
-			ld		r12,saver27(r30)				
-			ld		r13,saver28(r30)				
-			ld		r14,saver29(r30)				
-
-			bt		vmmDoing64,sw64x4			; Skip to 64-bit stuff
-
-			lwz		r15,vmmppcr22(r5)			; Read vm context 
-			lwz		r24,vmmppcr23(r5)				
-			lwz		r17,vmmppcr24(r5)				
-			lwz		r18,vmmppcr25(r5)		
-			lwz		r19,vmmppcr26(r5)		
-			lwz		r20,vmmppcr27(r5)				
-			lwz		r21,vmmppcr28(r5)				
-			lwz		r22,vmmppcr29(r5)				
-
-			stw		r7,vmmppcr22(r5)			; Write context	
-			stw		r8,vmmppcr23(r5)				
-			stw		r9,vmmppcr24(r5)				
-			stw		r10,vmmppcr25(r5)				
-			stw		r11,vmmppcr26(r5)				
-			stw		r12,vmmppcr27(r5)				
-			stw		r13,vmmppcr28(r5)	
-			la		r6,vmmppcvscr(r5)			; Point to seventh line			
-			stw		r14,vmmppcr29(r5)				
-			dcbt	0,r6						; Touch seventh line
-			b		sw64x4done					; Done with this section...
-			
-sw64x4:		ld		r15,vmmppcXr22(r5)			; Read vm context 
-			ld		r24,vmmppcXr23(r5)				
-			ld		r17,vmmppcXr24(r5)				
-			ld		r18,vmmppcXr25(r5)		
-			ld		r19,vmmppcXr26(r5)		
-			ld		r20,vmmppcXr27(r5)				
-			ld		r21,vmmppcXr28(r5)				
-			ld		r22,vmmppcXr29(r5)				
-
-			std		r7,vmmppcXr22(r5)			; Write context	
-			std		r8,vmmppcXr23(r5)				
-			std		r9,vmmppcXr24(r5)				
-			std		r10,vmmppcXr25(r5)				
-			std		r11,vmmppcXr26(r5)				
-			std		r12,vmmppcXr27(r5)				
-			std		r13,vmmppcXr28(r5)	
-			la		r6,vmmppcvscr(r5)			; Point to seventh line			
-			std		r14,vmmppcXr29(r5)				
-
-			dcbt	0,r6						; Touch seventh line
-
-sw64x4done:	std		r15,saver22(r30)			; Write vm context 
-			std		r24,saver23(r30)				
-			std		r17,saver24(r30)				
-			std		r18,saver25(r30)		
-			std		r19,saver26(r30)		
-			std		r20,saver27(r30)				
-			std		r21,saver28(r30)				
-			std		r22,saver29(r30)				
-
-;			Swap 8 registers
-			
-			ld		r7,saver30(r30)			; Read savearea	
-			ld		r8,saver31(r30)				
-			lwz		r9,savecr(r30)				
-			ld		r10,savexer(r30)				
-			ld		r11,savelr(r30)				
-			ld		r12,savectr(r30)				
-			lwz		r14,savevrsave(r30)				
-
-			bt		vmmDoing64,sw64x5			; Skip to 64-bit stuff
-
-			lwz		r15,vmmppcr30(r5)			; Read vm context 
-			lwz		r24,vmmppcr31(r5)				
-			lwz		r17,vmmppccr(r5)				
-			lwz		r18,vmmppcxer(r5)		
-			lwz		r19,vmmppclr(r5)		
-			lwz		r20,vmmppcctr(r5)				
-			lwz		r22,vmmppcvrsave(r5)				
-
-			stw		r7,vmmppcr30(r5)			; Write context	
-			stw		r8,vmmppcr31(r5)				
-			stw		r9,vmmppccr(r5)				
-			stw		r10,vmmppcxer(r5)				
-			stw		r11,vmmppclr(r5)				
-			stw		r12,vmmppcctr(r5)				
-			stw		r14,vmmppcvrsave(r5)	
-			b		sw64x5done					; Done here...		
-
-sw64x5:		ld		r15,vmmppcXr30(r5)			; Read vm context 
-			ld		r24,vmmppcXr31(r5)				
-			lwz		r17,vmmppcXcr(r5)				
-			ld		r18,vmmppcXxer(r5)		
-			ld		r19,vmmppcXlr(r5)		
-			ld		r20,vmmppcXctr(r5)				
-			lwz		r22,vmmppcXvrsave(r5)				
-
-			std		r7,vmmppcXr30(r5)			; Write context	
-			std		r8,vmmppcXr31(r5)				
-			stw		r9,vmmppcXcr(r5)				
-			std		r10,vmmppcXxer(r5)				
-			std		r11,vmmppcXlr(r5)				
-			std		r12,vmmppcXctr(r5)				
-			stw		r14,vmmppcXvrsave(r5)				
-
-sw64x5done:	std		r15,saver30(r30)			; Write vm context 
-			std		r24,saver31(r30)				
-			stw		r17,savecr(r30)				
-			std		r18,savexer(r30)		
-			std		r19,savelr(r30)		
-			std		r20,savectr(r30)				
-			stw		r22,savevrsave(r30)				
-
-;			Swap 8 registers
-			
-			lwz		r7,savevscr+0(r30)			; Read savearea	
-			lwz		r8,savevscr+4(r30)				
-			lwz		r9,savevscr+8(r30)				
-			lwz		r10,savevscr+12(r30)				
-			lwz		r11,savefpscrpad(r30)				
-			lwz		r12,savefpscr(r30)				
-
-			lwz		r15,vmmppcvscr+0(r5)		; Read vm context 
-			lwz		r24,vmmppcvscr+4(r5)				
-			lwz		r17,vmmppcvscr+8(r5)				
-			lwz		r18,vmmppcvscr+12(r5)		
-			lwz		r19,vmmppcfpscrpad(r5)		
-			lwz		r20,vmmppcfpscr(r5)				
-
-			stw		r7,vmmppcvscr+0(r5)			; Write context	
-			stw		r8,vmmppcvscr+4(r5)				
-			stw		r9,vmmppcvscr+8(r5)				
-			stw		r10,vmmppcvscr+12(r5)				
-			stw		r11,vmmppcfpscrpad(r5)				
-			stw		r12,vmmppcfpscr(r5)				
-
-			stw		r15,savevscr+0(r30)			; Write vm context 
-			stw		r24,savevscr+4(r30)				
-			stw		r17,savevscr+8(r30)				
-			stw		r18,savevscr+12(r30)		
-			stw		r19,savefpscrpad(r30)		
-			stw		r20,savefpscr(r30)				
-
-			
-;
-;			Cobble up the exception return code and save any specific return values
-;
-			
-			lwz		r7,saveexception(r30)		; Pick up the exception code
-			rlwinm	r8,r7,30,24,31				; Convert exception to return code
-			cmplwi	r7,T_DATA_ACCESS			; Was this a DSI?
-			stw		r8,return_code(r5)			; Save the exit code
-			cmplwi	cr1,r7,T_INSTRUCTION_ACCESS	; Exiting because of an ISI?
-			beq+	swapDSI64					; Yeah...
-			cmplwi	r7,T_ALIGNMENT				; Alignment exception?
-			beq+	cr1,swapISI64				; We had an ISI...
-			cmplwi	cr1,r7,T_SYSTEM_CALL		; Exiting because of an system call?
-			beq+	swapDSI64					; An alignment exception looks like a DSI...
-			beq+	cr1,swapSC64				; We had a system call...
-			
-			blr									; Return...
-
-;
-;			Set exit returns for a DSI or alignment exception
-;
-
-swapDSI64:	ld		r10,savedar(r30)			; Get the DAR
-			lwz		r7,savedsisr(r30)			; and the DSISR
-			bt		vmmDoing64,sw64DSI			; Skip to 64-bit stuff...
-
-
-			stw		r10,return_params+0(r5)		; Save DAR as first return parm
-			stw		r7,return_params+4(r5)		; Save DSISR as second return parm
-			blr									; Return...
-
-sw64DSI:	std		r10,return_paramsX+0(r5)	; Save DAR as first return parm
-			std		r7,return_paramsX+8(r5)		; Save DSISR as second return parm (note that this is expanded to 64 bits)
-			blr									; Return...
-
-;
-;			Set exit returns for a ISI
-;
-
-swapISI64:	bt		vmmDoing64,sw64ISI			; Skip to 64-bit stuff...
-			lwz		r7,vmmppcmsr(r5)			; Get the SRR1 value
-			lwz		r10,vmmppcpc(r5)			; Get the PC as failing address
-			rlwinm	r7,r7,0,1,4					; Save the bits that match the DSISR
-			stw		r10,return_params+0(r5)		; Save PC as first return parm
-			stw		r7,return_params+4(r5)		; Save the pseudo-DSISR as second return parm
-			blr									; Return...
-
-sw64ISI:	ld		r7,vmmppcXmsr(r5)			; Get the SRR1 value
-			ld		r10,vmmppcXpc(r5)			; Get the PC as failing address
-			rlwinm	r7,r7,0,1,4					; Save the bits that match the DSISR
-			std		r10,return_paramsX+0(r5)		; Save PC as first return parm
-			std		r7,return_paramsX+8(r5)		; Save the pseudo-DSISR as second return parm
-			blr									; Return...
-
-;
-;			Set exit returns for a system call (note: we did the first 3 earlier)
-;			Do we really need to pass parameters back here????
-;
-
-swapSC64:	bt		vmmDoing64,sw64SC			; Skip to 64-bit stuff...
-			lwz		r10,vmmppcr6(r5)			; Get the fourth paramter
-			stw		r10,return_params+12(r5)	; Save it
-			blr									; Return...
-
-sw64SC:		ld		r10,vmmppcXr6(r5)			; Get the fourth paramter
-			std		r10,return_paramsX+24(r5)	; Save it
-			blr									; Return...
-
-;
-;			vmmFamGuestResume:
-;				Restore Guest context from Fam mode.
-;
-
-vmmFamGuestResume:
-			mfsprg	r10,1							; Get the current activation
-			lwz		r10,ACT_PER_PROC(r10)			; Get the per_proc block
-			lwz		r27,vmmCEntry(r3)				; Get the context that is active
-			lwz		r4,VMMXAFlgs(r10)				; Get the eXtended Architecture flags			
-			rlwinm.	r4,r4,0,0,0						; Are we doing a 64-bit virtual machine?		
-			lwz		r15,spcFlags(r10)				; Get per_proc special flags
-			mr		r26,r3							; Save the activation pointer
-			lwz		r20,vmmContextKern(r27)			; Get the comm area
-			rlwinm	r15,r15,0,FamVMmodebit+1,FamVMmodebit-1	; Clear FamVMmodebit
-			stw		r15,spcFlags(r10)				; Update the special flags
-			bne		fgrX
-			lwz		r7,famguestpc(r20)				; Load famguest ctx pc
-			bf++	vmmMapDone,fgrNoMap				; No mapping done for this space.
-			lwz		r3,SAVflags(r30)				; Pick up the savearea flags
-			lwz		r2,vmmLastMap(r28)				; Get the last mapped address
-			lwz		r6,vmmLastMap+4(r28)			; Get the last mapped address
-			li		r4,T_DATA_ACCESS				; Change to DSI fault
-			oris	r3,r3,hi16(SAVredrive)			; Set exception redrive
-			stw		r2,savedar(r30)					; Set the DAR to the last thing we mapped
-			stw		r6,savedar+4(r30)				; Set the DAR to the last thing we mapped
-			stw		r3,SAVflags(r30)				; Turn on the redrive request
-			lis		r2,hi16(MASK(DSISR_HASH))		; Set PTE/DBAT miss
-			stw		r4,saveexception(r30)			; Say we need to emulate a DSI
-			li		r0,0							; Clear
-			stw		r2,savedsisr(r30)				; Pretend we have a PTE miss
-			stb		r0,vmmGFlags+3(r28)				; Show that the redrive has been taken care of
-fgrNoMap:
-			lwz		r4,savesrr1+4(r30)				; Get the saved MSR value
-			stw		r7,savesrr0+4(r30)				; Set savearea pc
-			lwz		r5,famguestmsr(r20)				; Load famguest ctx msr
-			lis		r6,hi16(MSR_IMPORT_BITS)		; Get the MSR bits that are controllable by user
-			ori		r6,r6,lo16(MSR_IMPORT_BITS)		; Get the rest of the MSR bits that are controllable by user
-			and		r5,r5,r6						; Keep only the controllable bits
-			oris	r5,r5,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			ori		r5,r5,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			rlwimi	r5,r4,0,MSR_FP_BIT,MSR_FP_BIT	; Propagate guest FP
-			rlwimi	r5,r4,0,MSR_VEC_BIT,MSR_VEC_BIT	; Propagate guest Vector	
-			stw		r5,savesrr1+4(r30)				; Set savearea srr1
-			lwz		r4,famguestr0(r20)				; Load famguest ctx r0
-			lwz		r5,famguestr1(r20)				; Load famguest ctx r1
-			lwz		r6,famguestr2(r20)				; Load famguest ctx r2
-			lwz		r7,famguestr3(r20)				; Load famguest ctx r3
-			stw		r4,saver0+4(r30)				; Set savearea r0
-			stw		r5,saver1+4(r30)				; Set savearea r1
-			stw		r6,saver2+4(r30)				; Set savearea r2
-			stw		r7,saver3+4(r30)				; Set savearea r3
-			lwz		r4,famguestr4(r20)				; Load famguest ctx r4
-			lwz		r5,famguestr5(r20)				; Load famguest ctx r5
-			lwz		r6,famguestr6(r20)				; Load famguest ctx r6
-			lwz		r7,famguestr7(r20)				; Load famguest ctx r7
-			stw		r4,saver4+4(r30)				; Set savearea r4
-			stw		r5,saver5+4(r30)				; Set savearea r5
-			stw		r6,saver6+4(r30)				; Set savearea r6
-			stw		r7,saver7+4(r30)				; Set savearea r7
-			b		fgrret
-fgrX:
-			ld		r7,famguestXpc(r20)				; Load famguest ctx pc
-			bf++	vmmMapDone,fgrXNoMap			; No mapping done for this space.
-			lwz		r3,SAVflags(r30)				; Pick up the savearea flags
-			ld		r2,vmmLastMap(r28)				; Get the last mapped address
-			li		r4,T_DATA_ACCESS				; Change to DSI fault
-			oris	r3,r3,hi16(SAVredrive)			; Set exception redrive
-			std		r2,savedar(r30)					; Set the DAR to the last thing we mapped
-			stw		r3,SAVflags(r30)				; Turn on the redrive request
-			lis		r2,hi16(MASK(DSISR_HASH))		; Set PTE/DBAT miss
-			stw		r4,saveexception(r30)			; Say we need to emulate a DSI
-			li		r0,0							; Clear
-			stw		r2,savedsisr(r30)				; Pretend we have a PTE miss
-			stb		r0,vmmGFlags+3(r28)				; Show that the redrive has been taken care of
-fgrXNoMap:
-			ld		r4,savesrr1(r30)				; Get the saved MSR value
-			std		r7,savesrr0(r30)				; Set savearea pc
-			ld		r5,famguestXmsr(r20)			; Load famguest ctx msr
-			lis		r6,hi16(MSR_IMPORT_BITS)		; Get the MSR bits that are controllable by user
-			ori		r6,r6,lo16(MSR_IMPORT_BITS)		; Get the rest of the MSR bits that are controllable by user
-			and		r5,r5,r6						; Keep only the controllable bits
-			oris	r5,r5,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			ori		r5,r5,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			rlwimi	r5,r4,0,MSR_FP_BIT,MSR_FP_BIT	; Propagate guest FP
-			rlwimi	r5,r4,0,MSR_VEC_BIT,MSR_VEC_BIT	; Propagate guest Vector	
-			std		r5,savesrr1(r30)				; Set savearea srr1
-			ld		r4,famguestXr0(r20)				; Load famguest ctx r0
-			ld		r5,famguestXr1(r20)				; Load famguest ctx r1
-			ld		r6,famguestXr2(r20)				; Load famguest ctx r2
-			ld		r7,famguestXr3(r20)				; Load famguest ctx r3
-			std		r4,saver0(r30)					; Set savearea r0
-			std		r5,saver1(r30)					; Set savearea r1
-			std		r6,saver2(r30)					; Set savearea r2
-			std		r7,saver3(r30)					; Set savearea r3
-			ld		r4,famguestXr4(r20)				; Load famguest ctx r4
-			ld		r5,famguestXr5(r20)				; Load famguest ctx r5
-			ld		r6,famguestXr6(r20)				; Load famguest ctx r6
-			ld		r7,famguestXr7(r20)				; Load famguest ctx r7
-			std		r4,saver4(r30)					; Set savearea r4
-			std		r5,saver5(r30)					; Set savearea r5
-			std		r6,saver6(r30)					; Set savearea r6
-			std		r7,saver7(r30)					; Set savearea r7
-fgrret:
-			li		r3,1							; Show normal exit with check for AST
-			mr		r16,r26				; Restore the thread pointer
-			b		EXT(ppcscret)					; Go back to handler...
-
-;
-;			FAM Intercept exception handler
-;
-
-			.align	5
-			.globl	EXT(vmm_fam_exc)
-
-LEXT(vmm_fam_exc)
-			lwz		r4,VMMXAFlgs(r2)				; Get the eXtended Architecture flags			
-			lwz		r1,pfAvailable(r2)				; Get the CPU features flags
-			rlwinm.	r4,r4,0,0,0						; Are we doing a 64-bit virtual machine?		
-			bne		fexcX
-			lwz		r4,saver4+4(r13)				; Load savearea r4
-			cmplwi	r11,T_ALIGNMENT					; Alignment exception?
-			lwz		r3,VMMareaPhys(r2)				; Load phys state page addr
-			mtcrf   0x02,r1							; Move pf64Bit to its normal place in CR6
-			cmplwi	cr1,r11,T_PROGRAM				; Exiting because of an PRG?
-            bt++    pf64Bitb,fexcVMareaPhys64		; Go do this on a 64-bit machine...
-			slwi	r3,r3,12						; Change ppnum to physical address
-			b		fexcVMareaPhysres
-fexcVMareaPhys64:
-			mtxer	r5								; Restore xer
-			lwz		r5,saver5+4(r13)				; Load savearea r5
-			lwz		r6,saver6+4(r13)				; Load savearea r6
-			sldi	r3,r3,12						; Change ppnum to physical address
-fexcVMareaPhysres:
-			stw		r4,famguestr4(r3)				; Save r4 in famguest ctx
-			stw		r5,famguestr5(r3)				; Save r5 in famguest ctx
-			stw		r6,famguestr6(r3)				; Save r6 in famguest ctx
-			stw		r7,famguestr7(r3)				; Save r7 in famguest ctx
-			lwz		r4,saver0+4(r13)				; Load savearea r0
-			lwz		r5,saver1+4(r13)				; Load savearea r1
-			lwz		r6,saver2+4(r13)				; Load savearea r2
-			lwz		r7,saver3+4(r13)				; Load savearea r3
-			stw		r4,famguestr0(r3)				; Save r0 in famguest ctx
-			stw		r5,famguestr1(r3)				; Save r1 in famguest ctx
-			stw		r6,famguestr2(r3)				; Save r2 in famguest ctx
-			stw		r7,famguestr3(r3)				; Save r3 in famguest ctx
-			lwz		r4,spcFlags(r2)					; Load per_proc spcFlags
-			oris	r4,r4,hi16(FamVMmode)			; Set FAM mode
-			stw		r4,spcFlags(r2)					; Update per_proc spcFlags
-			mfsrr0  r2								; Get the interrupt srr0
-			mfsrr1  r4								; Get the interrupt srr1
-			stw		r2,famguestpc(r3)				; Save srr0 in famguest ctx
-			stw		r4,famguestmsr(r3)				; Save srr1 in famguest ctx
-			li		r6,lo16(MASK(MSR_FE0)|MASK(MSR_SE)|MASK(MSR_BE)|MASK(MSR_FE1))
-			andc	r6,r4,r6						; Clear SE BE FE0 FE1
-			mtsrr1	r6								; Set srr1
-			mr		r6,r3							; Set r6 with  phys state page addr
-			rlwinm	r7,r11,30,24,31					; Convert exception to return code
-			beq+	cr1,fexcPRG						; We had a program exception...
-			bne+	fexcret	
-													; We had an Alignment...
-			mfdar	r3								; Load dar
-			mfdsisr	r4								; Load dsisr
-			stw		r3,famparam+0x4(r6)				; Set famparam 1 with dar
-			stw		r4,famparam+0x8(r6)				; Set famparam 2 with dsir
-			b		fexcret							;
-fexcPRG:
-			stw		r4,famparam+0x4(r6)				; Set famparam 1 with srr1
-			mr		r3,r4							; Set r3 with dsisr
-			lwz		r4,famguestr4(r6)				; Load r4 from famguest context
-fexcret:
-			lwz		r5,famguestr5(r6)				; Load r5 from famguest context
-			lwz		r13,famhandler(r6)				; Load user address to resume
-			stw		r2,famparam(r6)					; Set famparam 0 with srr0
-			stw		r7,famdispcode(r6)				; Save the exit code
-			lwz		r1,famrefcon(r6)				; load refcon
-            bt++    pf64Bitb,fexcrfi64				; Go do this on a 64-bit machine...
-			mtcr	r0								; Restore cr
-			mtsrr0	r13								; Load srr0
-			mr		r0,r7							; Set dispatch code
-			lwz		r7,famguestr7(r6)				; Load r7 from famguest context
-			lwz		r6,famguestr6(r6)				; Load r6 from famguest context
-			mfsprg	r13,2							; Restore r13
-			mfsprg  r11,3							; Restore r11
-			rfi
-fexcrfi64:
-			mtcr	r0								; Restore cr
-			mtsrr0	r13								; Load srr0
-			mr		r0,r7							; Set dispatch code
-			lwz		r7,famguestr7(r6)				; Load r7 from famguest context
-			lwz		r6,famguestr6(r6)				; Load r6 from famguest context
-			mfsprg	r13,2							; Restore r13
-			mfsprg  r11,3							; Restore r11
-			rfid
-fexcX:
-			mtxer	r5								; Restore xer
-			ld		r4,saver4(r13)					; Load savearea r4
-			ld		r5,saver5(r13)					; Load savearea r5
-			ld		r6,saver6(r13)					; Load savearea r6
-			cmplwi	r11,T_ALIGNMENT					; Alignment exception?
-			lwz		r3,VMMareaPhys(r2)				; Load phys state page addr
-			mtcrf   0x02,r1							; Move pf64Bit to its normal place in CR6
-			cmplwi	cr1,r11,T_PROGRAM				; Exiting because of an PRG?
-			sldi	r3,r3,12						; Change ppnum to physical address
-			std		r4,famguestXr4(r3)				; Save r4 in famguest ctx
-			std		r5,famguestXr5(r3)				; Save r5 in famguest ctx
-			std		r6,famguestXr6(r3)				; Save r6 in famguest ctx
-			std		r7,famguestXr7(r3)				; Save r7 in famguest ctx
-			ld		r4,saver0(r13)					; Load savearea r0
-			ld		r5,saver1(r13)					; Load savearea r1
-			ld		r6,saver2(r13)					; Load savearea r2
-			ld		r7,saver3(r13)					; Load savearea r3
-			std		r4,famguestXr0(r3)				; Save r0 in famguest ctx
-			std		r5,famguestXr1(r3)				; Save r1 in famguest ctx
-			std		r6,famguestXr2(r3)				; Save r2 in famguest ctx
-			std		r7,famguestXr3(r3)				; Save r3 in famguest ctx
-			lwz		r4,spcFlags(r2)					; Load per_proc spcFlags
-			oris	r4,r4,hi16(FamVMmode)			; Set FAM mode
-			stw		r4,spcFlags(r2)					; Update per_proc spcFlags
-			mfsrr0  r2								; Get the interrupt srr0
-			mfsrr1  r4								; Get the interrupt srr1
-			std		r2,famguestXpc(r3)				; Save srr0 in famguest ctx
-			std		r4,famguestXmsr(r3)				; Save srr1 in famguest ctx
-			li		r6,lo16(MASK(MSR_FE0)|MASK(MSR_SE)|MASK(MSR_BE)|MASK(MSR_FE1))
-			andc	r6,r4,r6						; Clear SE BE FE0 FE1
-			mtsrr1	r6								; Set srr1
-			mr		r6,r3							; Set r6 with  phys state page addr
-			rlwinm	r7,r11,30,24,31					; Convert exception to return code
-			beq+	cr1,fexcXPRG					; We had a program exception...
-			bne+	fexcXret	
-													; We had an Alignment...
-			mfdar	r3								; Load dar
-			mfdsisr	r4								; Load dsisr
-			std		r3,famparamX+0x8(r6)			; Set famparam 1 with dar
-			std		r4,famparamX+0x10(r6)			; Set famparam 2 with dsir
-			b		fexcXret
-fexcXPRG:
-			std		r4,famparamX+0x8(r6)			; Set famparam 1 with srr1
-			mr		r3,r4							; Set r3 with dsisr
-			ld		r4,famguestXr4(r6)				; Load r4 from famguest context
-fexcXret:
-			ld		r5,famguestXr5(r6)				; Load r5 from famguest context
-			ld		r13,famhandlerX(r6)				; Load user address to resume
-			std		r2,famparamX(r6)				; Set famparam 0 with srr0
-			std		r7,famdispcodeX(r6)				; Save the exit code
-			ld		r1,famrefconX(r6)				; load refcon
-			mtcr	r0								; Restore cr
-			mtsrr0	r13								; Load srr0
-			mr		r0,r7							; Set dispatch code
-			ld		r7,famguestXr7(r6)				; Load r7 from famguest context
-			ld		r6,famguestXr6(r6)				; Load r6 from famguest context
-			mfsprg	r13,2							; Restore r13
-			mfsprg  r11,3							; Restore r11
-			rfid
-
-;
-;			FAM Intercept DSI ISI fault handler
-;
-
-			.align	5
-			.globl	EXT(vmm_fam_pf)
-
-LEXT(vmm_fam_pf)
-			lwz		r4,VMMXAFlgs(r2)				; Get the eXtended Architecture flags			
-			lwz		r3,VMMareaPhys(r2)				; Load phys state page addr
-			rlwinm.	r4,r4,0,0,0						; Are we doing a 64-bit virtual machine?		
-			bne		fpfX
-			lwz		r4,saver0+4(r13)				; Load savearea r0
-			lwz		r5,saver1+4(r13)				; Load savearea r1
-			lwz		r6,saver2+4(r13)				; Load savearea r2
-			lwz		r7,saver3+4(r13)				; Load savearea r3
-            bt++    pf64Bitb,fpfVMareaPhys64		; Go do this on a 64-bit machine...
-			slwi	r3,r3,12						; Change ppnum to physical address
-			b		fpfVMareaPhysret
-fpfVMareaPhys64:
-			sldi	r3,r3,12						; Change ppnum to physical address
-fpfVMareaPhysret:
-			stw		r4,famguestr0(r3)				; Save r0 in famguest
-			stw		r5,famguestr1(r3)				; Save r1 in famguest
-			stw		r6,famguestr2(r3)				; Save r2 in famguest
-			stw		r7,famguestr3(r3)				; Save r3 in famguest
-			lwz		r4,saver4+4(r13)				; Load savearea r0
-			lwz		r5,saver5+4(r13)				; Load savearea r1
-			lwz		r6,saver6+4(r13)				; Load savearea r2
-			lwz		r7,saver7+4(r13)				; Load savearea r3
-			stw		r4,famguestr4(r3)				; Save r4 in famguest
-			lwz		r4,spcFlags(r2)					; Load spcFlags
-			stw		r5,famguestr5(r3)				; Save r5 in famguest
-			lwz		r5,savesrr0+4(r13)				; Get the interrupt srr0
-			stw		r6,famguestr6(r3)				; Save r6 in famguest
-			lwz		r6,savesrr1+4(r13)				; Load srr1
-			oris	r4,r4,hi16(FamVMmode)			; Set FAM mode
-			stw		r7,famguestr7(r3)				; Save r7 in famguest
-			stw		r4,spcFlags(r2)					; Update spcFlags
-			lwz		r1,famrefcon(r3)				; Load refcon
-			lwz		r2,famhandler(r3)				; Load famhandler to resume
-			stw		r5,famguestpc(r3)				; Save srr0
-			stw		r5,saver2+4(r13)				; Store srr0 in savearea r2
-			stw		r5,famparam(r3)					; Store srr0 in fam param 0
-			stw		r6,famguestmsr(r3)				; Save srr1 in famguestmsr
-			cmplwi	cr1,r11,T_INSTRUCTION_ACCESS	; Was this a ISI?
-			rlwinm	r7,r11,30,24,31					; Convert exception to return code
-			beq+	cr1,fpfISI						; We had an ISI...
-; fpfDSI
-			lwz		r6,savedar+4(r13)				; Load dar from savearea
-			lwz		r4,savedsisr(r13)				; Load dsisr from savearea
-			stw		r6,famparam+0x4(r3)				; Store dar in fam param 1
-			stw		r6,saver3+4(r13)				; Store dar in savearea r3
-			stw		r4,famparam+0x8(r3)				; Store dsisr in fam param 2
-			stw		r4,saver4+4(r13)				; Store dsisr in savearea r4
-			b		fpfret
-fpfISI:	
-			rlwinm	r6,r6,0,1,4						; Save the bits that match the DSISR
-			stw		r6,famparam+0x4(r3)				; Store srr1 in fam param 1 
-			stw		r6,saver3+4(r13)				; Store srr1 in savearea r3
-fpfret:
-			stw		r7,saver0+4(r13)				; Set dispatch code
-			stw		r7,famdispcode(r3)				; Set dispatch code
-			stw		r1,saver1+4(r13)				; Store refcon in savearea r1
-			stw		r2,savesrr0+4(r13)				; Store famhandler in srr0
-			blr
-fpfX:
-			ld		r4,saver0(r13)					; Load savearea r0
-			ld		r5,saver1(r13)					; Load savearea r1
-			ld		r6,saver2(r13)					; Load savearea r2
-			ld		r7,saver3(r13)					; Load savearea r3
-			sldi	r3,r3,12						; Change ppnum to physical address
-			std		r4,famguestXr0(r3)				; Save r0 in famguest
-			std		r5,famguestXr1(r3)				; Save r1 in famguest
-			std		r6,famguestXr2(r3)				; Save r2 in famguest
-			std		r7,famguestXr3(r3)				; Save r3 in famguest
-			ld		r4,saver4(r13)					; Load savearea r0
-			ld		r5,saver5(r13)					; Load savearea r1
-			ld		r6,saver6(r13)					; Load savearea r2
-			ld		r7,saver7(r13)					; Load savearea r3
-			std		r4,famguestXr4(r3)				; Save r4 in famguest
-			lwz		r4,spcFlags(r2)					; Load spcFlags
-			std		r5,famguestXr5(r3)				; Save r5 in famguest
-			ld		r5,savesrr0(r13)				; Get the interrupt srr0
-			std		r6,famguestXr6(r3)				; Save r6 in famguest
-			ld		r6,savesrr1(r13)				; Load srr1
-			oris	r4,r4,hi16(FamVMmode)			; Set FAM mode
-			std		r7,famguestXr7(r3)				; Save r7 in famguest
-			stw		r4,spcFlags(r2)					; Update spcFlags
-			ld		r1,famrefconX(r3)				; Load refcon
-			ld		r2,famhandlerX(r3)				; Load famhandler to resume
-			std		r5,famguestXpc(r3)				; Save srr0
-			std		r5,saver2(r13)					; Store srr0 in savearea r2
-			std		r5,famparamX(r3)				; Store srr0 in fam param 0
-			std		r6,famguestXmsr(r3)				; Save srr1 in famguestmsr
-			cmplwi	cr1,r11,T_INSTRUCTION_ACCESS	; Was this a ISI?
-			rlwinm	r7,r11,30,24,31					; Convert exception to return code
-			beq+	cr1,fpfXISI						; We had an ISI...
-; fpfXDSI
-			ld		r6,savedar(r13)					; Load dar from savearea
-			lwz		r4,savedsisr(r13)				; Load dsisr from savearea
-			std		r6,famparamX+0x8(r3)			; Store dar in fam param 1
-			std		r6,saver3(r13)					; Store dar in savearea r3
-			std		r4,famparamX+0x10(r3)				; Store dsisr in fam param 2
-			std		r4,saver4(r13)					; Store dsisr in savearea r4
-			b		fpfXret
-fpfXISI:	
-			rlwinm	r6,r6,0,1,4						; Save the bits that match the DSISR
-			std		r6,famparamX+0x8(r3)			; Store srr1 in fam param 1 
-			std		r6,saver3(r13)					; Store srr1 in savearea r3
-fpfXret:
-			std		r7,saver0(r13)					; Set dispatch code
-			std		r7,famdispcodeX(r3)				; Set dispatch code
-			std		r1,saver1(r13)					; Store refcon in savearea r1
-			std		r2,savesrr0(r13)				; Store famhandler in srr0
-			blr
-
-/*
- *	Ultra Fast Path FAM syscalls
- *
- *	The UFT FAMs are those from kvmmResumeGuest to kvmmSetGuestRegister, inclusive.
- *	We get here directly from the syscall vector, with interrupts and translation off,
- *	64-bit mode on if supported, and all registers live except:
- *
- *	r13   = holds caller's cr
- *	sprg2 = holds caller's r13
- *	sprg3 = holds caller's r11
- *	cr2   = set on (r3==kvmmSetGuestRegister)
- *	cr5   = set on (r3==kvmmResumeGuest)
- */
-
-			.align	5
-			.globl	EXT(vmm_ufp)
-
-LEXT(vmm_ufp)
-			mfsprg	r3,0							; Get the per_proc area
-			mr		r11,r13							; Move saved cr to r11
-			lwz		r13,VMMXAFlgs(r3)				; Get the eXtended Architecture flags			
-			rlwinm.	r13,r13,0,0,0					; Are we doing a 64-bit virtual machine?		
-
-			lwz		r13,pfAvailable(r3)				; Get feature flags
-			mtcrf	0x02,r13						; Put pf64Bitb etc in cr6
-			lwz		r13,VMMareaPhys(r3)				; Load fast assist area
-            bt++    pf64Bitb,ufpVMareaPhys64		; Go do this on a 64-bit machine...
-			slwi	r13,r13,12						; Change ppnum to physical address
-			b		ufpVMareaPhysret
-ufpVMareaPhys64:
-			sldi	r13,r13,12						; Change ppnum to physical address
-ufpVMareaPhysret:
-			bne		ufpX							; go handle a 64-bit virtual machine
-
-			bt		cr5_eq,ufpResumeGuest			; if kvmmResumeGuest, branch to ResumeGuest
-			cmplwi	cr5,r4,7						; First argument in range? (ie, 0-7)
-			bgt		cr5,ufpVMret					; Return if not in the range
-			slwi	r4,r4,2							; multiply index by 4
-			la		r3,famguestr0(r13)				; Load the base address
-			bt		cr2_eq,ufpSetGuestReg			; Set/get selector
-; ufpGetGuestReg
-			lwzx	r3,r4,r3						; Load the guest register
-			b		ufpVMret						; Return
-ufpSetGuestReg:
-			stwx	r5,r4,r3						; Update the guest register
-			li		r3,0							; Set return value
-			b		ufpVMret						; Return
-ufpResumeGuest:
-			lwz		r7,spcFlags(r3)					; Pick up the special flags
-			mtsrr0	r4								; Set srr0
-			rlwinm.	r6,r6,0,vmmKeyb,vmmKeyb			; Check vmmKeyb in maskCntrl
-			rlwinm	r7,r7,0,FamVMmodebit+1,FamVMmodebit-1	; Clear FamVMmodebit
-			stw		r7,spcFlags(r3)					; Update the special flags
-			mfsrr1	r6								; Get the current MSR value
-
-			lwz		r4,famguestmsr(r13)				; Load guest srr1
-			lis		r1,hi16(MSR_IMPORT_BITS)		; Get the MSR bits that are controllable by user
-			ori		r1,r1,lo16(MSR_IMPORT_BITS)		; Get the rest of the MSR bits that are controllable by user
-			and		r4,r4,r1						; Keep only the controllable bits
-			oris	r4,r4,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			ori		r4,r4,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			rlwimi	r4,r6,0,MSR_FP_BIT,MSR_FP_BIT	; Propagate guest FP
-			rlwimi	r4,r6,0,MSR_VEC_BIT,MSR_VEC_BIT	; Propagate guest Vector	
-			beq		ufpnokey						; Branch if not key switch
-			mr		r2,r7							; Save r7
-			rlwimi	r7,r5,32+vmmKeyb-userProtKeybit,userProtKeybit,userProtKeybit	; Set the protection key
-			cmpw	cr0,r7,r2						; Is userProtKeybit changed?						
-			beq		ufpnokey						; No, go to ResumeGuest_nokey
-			mr		r5,r3							; Get the per_proc area
-			stw		r7,spcFlags(r3)					; Update the special flags
-
-            bt++    pf64Bitb,ufpsave64			; Go do this on a 64-bit machine...
-
-			lwz		r3,next_savearea+4(r5)			; Get the exception save area
-			stw		r8,saver8+4(r3)					; Save r8
-			stw		r9,saver9+4(r3)					; Save r9
-			stw		r10,saver10+4(r3)				; Save r10
-			stw		r11,saver11+4(r3)				; Save r11
-			stw		r12,saver12+4(r3)				; Save r12
-			stw		r13,saver13+4(r3)				; Save r12
-			stw		r14,saver14+4(r3)				; Save r14
-			stw		r15,saver15+4(r3)				; Save r15
-			stw		r16,saver16+4(r3)				; Save r16
-			stw		r17,saver17+4(r3)				; Save r17
-			stw		r18,saver18+4(r3)				; Save r18
-			stw		r19,saver19+4(r3)				; Save r19
-			stw		r20,saver20+4(r3)				; Save r20
-			stw		r21,saver21+4(r3)				; Save r21
-			stw		r22,saver22+4(r3)				; Save r22
-			stw		r23,saver23+4(r3)				; Save r23
-			stw		r24,saver24+4(r3)				; Save r24
-			stw		r25,saver25+4(r3)				; Save r25
-			stw		r26,saver26+4(r3)				; Save r26
-			stw		r27,saver27+4(r3)				; Save r27
-			stw		r28,saver28+4(r3)				; Save r28
-			stw		r29,saver29+4(r3)				; Save r29
-			stw		r30,saver30+4(r3)				; Save r30
-			stw		r31,saver31+4(r3)				; Save r31
-			b		ufpsaveres						; Continue
-
-ufpsave64:
-			ld		r3,next_savearea(r5)			; Get the exception save area
-			std		r8,saver8(r3)					; Save r8
-			std		r9,saver9(r3)					; Save r9
-			std		r10,saver10(r3)					; Save r10
-			std		r11,saver11(r3)					; Save r11
-			std		r12,saver12(r3)					; Save r12
-			std		r13,saver13(r3)					; Save r12
-			std		r14,saver14(r3)					; Save r14
-			std		r15,saver15(r3)					; Save r15
-			std		r16,saver16(r3)					; Save r16
-			std		r17,saver17(r3)					; Save r17
-			std		r18,saver18(r3)					; Save r18
-			std		r19,saver19(r3)					; Save r19
-			std		r20,saver20(r3)					; Save r20
-			std		r21,saver21(r3)					; Save r21
-			std		r22,saver22(r3)					; Save r22
-			std		r23,saver23(r3)					; Save r23
-			std		r24,saver24(r3)					; Save r24
-			std		r25,saver25(r3)					; Save r25
-			std		r26,saver26(r3)					; Save r26
-			std		r27,saver27(r3)					; Save r27
-			std		r28,saver28(r3)					; Save r28
-			std		r29,saver29(r3)					; Save r29
-			mfxer	r2								; Get xer
-			std		r30,saver30(r3)					; Save r30
-			std		r31,saver31(r3)					; Save r31
-			std		r2,savexer(r3)					; Save xer
-
-ufpsaveres:
-			mflr	r20								; Get lr
-			li		r2,1							; Set to  1
-			stw		r7,spcFlags(r5)					; Update the special flags
-			mr		r13,r3							; Set current savearea
-			mr		r21,r4							; Save r4
-			sth		r2,ppInvSeg(r5)					; Force a reload of the SRs
-			mr		r29,r5							; Get the per_proc area
-			mr		r3,r4							; Set MSR value we going to
-			bl		EXT(switchSegs)					; Go handle the segment registers/STB
-			mr		r3,r13							; Set current savearea
-			mr		r4,r21							; Restore r4
-			mtlr	r20								; Set lr
-
-            bt++    pf64Bitb,ufprestore64			; Go do this on a 64-bit machine...
-			lwz		r8,saver8+4(r3)					; Load r8
-			lwz		r9,saver9+4(r3)					; Load r9
-			lwz		r10,saver10+4(r3)				; Load r10
-			lwz		r11,saver11+4(r3)				; Load r11
-			lwz		r12,saver12+4(r3)				; Load r12
-			lwz		r13,saver13+4(r3)				; Load r12
-			lwz		r14,saver14+4(r3)				; Load r14
-			lwz		r15,saver15+4(r3)				; Load r15
-			lwz		r16,saver16+4(r3)				; Load r16
-			lwz		r17,saver17+4(r3)				; Load r17
-			lwz		r18,saver18+4(r3)				; Load r18
-			lwz		r19,saver19+4(r3)				; Load r19
-			lwz		r20,saver20+4(r3)				; Load r20
-			lwz		r21,saver21+4(r3)				; Load r21
-			lwz		r22,saver22+4(r3)				; Load r22
-			lwz		r23,saver23+4(r3)				; Load r23
-			lwz		r24,saver24+4(r3)				; Load r24
-			lwz		r25,saver25+4(r3)				; Load r25
-			lwz		r26,saver26+4(r3)				; Load r26
-			lwz		r27,saver27+4(r3)				; Load r27
-			lwz		r28,saver28+4(r3)				; Load r28
-			lwz		r29,saver29+4(r3)				; Load r29
-			lwz		r30,saver30+4(r3)				; Load r30
-			lwz		r31,saver31+4(r3)				; Load r31
-			b		ufpnokey						; Continue
-ufprestore64:
-			ld		r2,savexer(r3)					; Load xer
-			ld		r8,saver8(r3)					; Load r8
-			ld		r9,saver9(r3)					; Load r9
-			ld		r10,saver10(r3)					; Load r10
-			mtxer	r2								; Restore xer
-			ld		r11,saver11(r3)					; Load r11
-			ld		r12,saver12(r3)					; Load r12
-			ld		r13,saver13(r3)					; Load r12
-			ld		r14,saver14(r3)					; Load r14
-			ld		r15,saver15(r3)					; Load r15
-			ld		r16,saver16(r3)					; Load r16
-			ld		r17,saver17(r3)					; Load r17
-			ld		r18,saver18(r3)					; Load r18
-			ld		r19,saver19(r3)					; Load r19
-			ld		r20,saver20(r3)					; Load r20
-			ld		r21,saver21(r3)					; Load r21
-			ld		r22,saver22(r3)					; Load r22
-			ld		r23,saver23(r3)					; Load r23
-			ld		r24,saver24(r3)					; Load r24
-			ld		r25,saver25(r3)					; Load r25
-			ld		r26,saver26(r3)					; Load r26
-			ld		r27,saver27(r3)					; Load r27
-			ld		r28,saver28(r3)					; Load r28
-			ld		r29,saver29(r3)					; Load r29
-			ld		r30,saver30(r3)					; Load r30
-			ld		r31,saver31(r3)					; Load r31
-ufpnokey:
-			mfsprg	r3,0							; Get the per_proc area
-			mtsrr1	r4								; Set srr1
-			lwz		r0,famguestr0(r13)				; Load r0 
-			lwz		r1,famguestr1(r13)				; Load r1
-			lwz		r2,famguestr2(r13)				; Load r2
-			lwz		r3,famguestr3(r13)				; Load r3
-			lwz		r4,famguestr4(r13)				; Load r4
-			lwz		r5,famguestr5(r13)				; Load r5
-			lwz		r6,famguestr6(r13)				; Load r6
-			lwz		r7,famguestr7(r13)				; Load r7
-ufpVMret:
-			mfsprg	r13,2							; Restore R13
-            bt++    pf64Bitb,ufpVMrfi64				; Go do this on a 64-bit machine...
-			mtcrf	0xFF,r11						; Restore CR
-			mfsprg	r11,3							; Restore R11
-			rfi										; All done, go back...
-ufpVMrfi64:
-			mtcrf	0xFF,r11						; Restore CR
-			mfsprg	r11,3							; Restore R11
-			rfid
-
-ufpX:                                               ; here if virtual machine is 64-bit
-			bt		cr5_eq,ufpXResumeGuest			; if kvmmResumeGuest, branch to ResumeGuest
-			cmplwi	cr5,r4,7						; Is first arg in range 0-7?
-			bgt		cr5,ufpXVMret					; Return if not in the range
-			slwi	r4,r4,3							; multiply index by 8
-			la		r3,famguestXr0(r13)				; Load the base address
-			bt		cr2_eq,ufpXSetGuestReg			; Set/get selector
-; ufpXGetGuestReg
-			ldx	r3,r4,r3							; Load the guest register
-			b			ufpXVMret					; Return
-ufpXSetGuestReg:
-			stdx	r5,r4,r3						; Update the guest register
-			li		r3,0							; Set return value
-			b		ufpXVMret						; Return
-ufpXResumeGuest:
-			lwz		r7,spcFlags(r3)					; Pick up the special flags
-			mtsrr0	r4								; Set srr0
-			rlwinm.	r6,r6,0,vmmKeyb,vmmKeyb			; Check vmmKeyb in maskCntrl
-			rlwinm	r7,r7,0,FamVMmodebit+1,FamVMmodebit-1	; Clear FamVMmodebit
-			stw		r7,spcFlags(r3)					; Update the special flags
-			mfsrr1	r6								; Get the current MSR value
-
-			ld		r4,famguestXmsr(r13)			; Load guest srr1
-			lis		r1,hi16(MSR_IMPORT_BITS)		; Get the MSR bits that are controllable by user
-			ori		r1,r1,lo16(MSR_IMPORT_BITS)		; Get the rest of the MSR bits that are controllable by user
-			and		r4,r4,r1						; Keep only the controllable bits
-			oris	r4,r4,hi16(MSR_EXPORT_MASK_SET)	; Force on the required bits
-			ori		r4,r4,lo16(MSR_EXPORT_MASK_SET)	; Force on the other required bits
-			rlwimi	r4,r6,0,MSR_FP_BIT,MSR_FP_BIT	; Propagate guest FP
-			rlwimi	r4,r6,0,MSR_VEC_BIT,MSR_VEC_BIT	; Propagate guest Vector	
-			beq		ufpXnokey						; Branch if not key switch
-			mr		r2,r7							; Save r7
-			rlwimi	r7,r5,32+vmmKeyb-userProtKeybit,userProtKeybit,userProtKeybit	; Set the protection key
-			cmpw	cr0,r7,r2						; Is userProtKeybit changed?						
-			beq		ufpXnokey						; No, go to ResumeGuest_nokey
-			mr		r5,r3							; Get the per_proc area
-			stw		r7,spcFlags(r3)					; Update the special flags
-
-			ld		r3,next_savearea(r5)			; Get the exception save area
-			std		r8,saver8(r3)					; Save r8
-			std		r9,saver9(r3)					; Save r9
-			std		r10,saver10(r3)					; Save r10
-			std		r11,saver11(r3)					; Save r11
-			std		r12,saver12(r3)					; Save r12
-			std		r13,saver13(r3)					; Save r12
-			std		r14,saver14(r3)					; Save r14
-			std		r15,saver15(r3)					; Save r15
-			std		r16,saver16(r3)					; Save r16
-			std		r17,saver17(r3)					; Save r17
-			std		r18,saver18(r3)					; Save r18
-			std		r19,saver19(r3)					; Save r19
-			std		r20,saver20(r3)					; Save r20
-			std		r21,saver21(r3)					; Save r21
-			std		r22,saver22(r3)					; Save r22
-			std		r23,saver23(r3)					; Save r23
-			std		r24,saver24(r3)					; Save r24
-			std		r25,saver25(r3)					; Save r25
-			std		r26,saver26(r3)					; Save r26
-			std		r27,saver27(r3)					; Save r27
-			std		r28,saver28(r3)					; Save r28
-			std		r29,saver29(r3)					; Save r29
-			mfxer	r2								; Get xer
-			std		r30,saver30(r3)					; Save r30
-			std		r31,saver31(r3)					; Save r31
-			std		r2,savexer(r3)					; Save xer
-
-			mflr	r20								; Get lr
-			li		r2,1							; Set to  1
-			stw		r7,spcFlags(r5)					; Update the special flags
-			mr		r13,r3							; Set current savearea
-			mr		r21,r4							; Save r4
-			sth		r2,ppInvSeg(r5)					; Force a reload of the SRs
-			mr		r29,r5							; Get the per_proc area
-			mr		r3,r4							; Set MSR value we going to
-			bl		EXT(switchSegs)					; Go handle the segment registers/STB
-			mr		r3,r13							; Set current savearea
-			mr		r4,r21							; Restore r4
-			mtlr	r20								; Set lr
-
-			ld		r2,savexer(r3)					; Load xer
-			ld		r8,saver8(r3)					; Load r8
-			ld		r9,saver9(r3)					; Load r9
-			ld		r10,saver10(r3)					; Load r10
-			mtxer	r2								; Restore xer
-			ld		r11,saver11(r3)					; Load r11
-			ld		r12,saver12(r3)					; Load r12
-			ld		r13,saver13(r3)					; Load r12
-			ld		r14,saver14(r3)					; Load r14
-			ld		r15,saver15(r3)					; Load r15
-			ld		r16,saver16(r3)					; Load r16
-			ld		r17,saver17(r3)					; Load r17
-			ld		r18,saver18(r3)					; Load r18
-			ld		r19,saver19(r3)					; Load r19
-			ld		r20,saver20(r3)					; Load r20
-			ld		r21,saver21(r3)					; Load r21
-			ld		r22,saver22(r3)					; Load r22
-			ld		r23,saver23(r3)					; Load r23
-			ld		r24,saver24(r3)					; Load r24
-			ld		r25,saver25(r3)					; Load r25
-			ld		r26,saver26(r3)					; Load r26
-			ld		r27,saver27(r3)					; Load r27
-			ld		r28,saver28(r3)					; Load r28
-			ld		r29,saver29(r3)					; Load r29
-			ld		r30,saver30(r3)					; Load r30
-			ld		r31,saver31(r3)					; Load r31
-ufpXnokey:
-			mtsrr1	r4								; Set srr1
-			ld		r0,famguestXr0(r13)				; Load r0 
-			ld		r1,famguestXr1(r13)				; Load r1
-			ld		r2,famguestXr2(r13)				; Load r2
-			ld		r3,famguestXr3(r13)				; Load r3
-			ld		r4,famguestXr4(r13)				; Load r4
-			ld		r5,famguestXr5(r13)				; Load r5
-			ld		r6,famguestXr6(r13)				; Load r6
-			ld		r7,famguestXr7(r13)				; Load r7
-ufpXVMret:
-			mfsprg	r13,2							; Restore R13
-			mtcrf	0xFF,r11						; Restore CR
-			mfsprg	r11,3							; Restore R11
-			rfid
-
diff --git a/osfmk/profiling/Makefile b/osfmk/profiling/Makefile
index e037d5041..3b2b64363 100644
--- a/osfmk/profiling/Makefile
+++ b/osfmk/profiling/Makefile
@@ -10,9 +10,6 @@ include $(MakeInc_def)
 INSTINC_SUBDIRS = \
 	machine
 
-INSTINC_SUBDIRS_PPC = \
-	ppc
-
 INSTINC_SUBDIRS_I386 = \
 	i386
 
@@ -25,9 +22,6 @@ INSTINC_SUBDIRS_ARM = \
 EXPINC_SUBDIRS = \
 	machine
 
-EXPINC_SUBDIRS_PPC = \
-	ppc
-
 EXPINC_SUBDIRS_I386 = \
 	i386
 
diff --git a/osfmk/profiling/machine/profile-md.h b/osfmk/profiling/machine/profile-md.h
index 66f783531..028bde46d 100644
--- a/osfmk/profiling/machine/profile-md.h
+++ b/osfmk/profiling/machine/profile-md.h
@@ -28,9 +28,7 @@
 #ifndef _MACH_MACHINE_PROFILE_MD_H
 #define _MACH_MACHINE_PROFILE_MD_H_
 
-#if defined (__ppc__)
-#include "profiling/ppc/profile-md.h"
-#elif defined (__i386__) || defined (__x86_64__)
+#if defined (__i386__) || defined (__x86_64__)
 #include "profiling/i386/profile-md.h"
 #else
 #error architecture not supported
diff --git a/osfmk/profiling/ppc/profile-md.h b/osfmk/profiling/ppc/profile-md.h
deleted file mode 100644
index d8d83698e..000000000
--- a/osfmk/profiling/ppc/profile-md.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- * 
- */
-/*
- * HISTORY
- * 
- * Revision 1.1.1.1  1998/09/22 21:05:49  wsanchez
- * Import of Mac OS X kernel (~semeria)
- *
- * Revision 1.1.1.1  1998/03/07 02:26:08  wsanchez
- * Import of OSF Mach kernel (~mburg)
- *
- * Revision 1.1.8.1  1996/12/09  16:57:22  stephen
- * 	nmklinux_1.0b3_shared into pmk1.1
- * 	[1996/12/09  11:13:16  stephen]
- *
- * Revision 1.1.6.1  1996/04/11  11:20:35  emcmanus
- * 	Copied from mainline.ppc.
- * 	[1996/04/11  08:26:36  emcmanus]
- * 
- * 	hppa merge
- * 	[1995/03/15  09:47:27  bruel]
- * 
- * Revision 1.1.4.1  1995/11/23  17:37:28  stephen
- * 	first powerpc checkin to mainline.ppc
- * 	[1995/11/23  16:46:29  stephen]
- * 
- * Revision 1.1.2.1  1995/08/25  06:50:17  stephen
- * 	Initial checkin of files for PowerPC port
- * 	[1995/08/23  15:05:31  stephen]
- * 
- * Revision 1.1.2.1  1995/02/14  14:25:16  bruel
- * 	First Revision.
- * 	[95/01/27            bruel]
- * 
- * $EndLog$
- */
-
-#ifndef _PROFILE_MD_H
-#define _PROFILE_MD_H
-
-/*
- * Define the interfaces between the assembly language profiling support
- * that is common between the kernel, mach servers, and user space library.
- */
-
-/*
- * Integer types used.
- */
-
-typedef	long		prof_ptrint_t;	/* hold either pointer or signed int */
-typedef	unsigned long	prof_uptrint_t;	/* hold either pointer or unsigned int */
-typedef	long		prof_lock_t;	/* lock word type */
-typedef unsigned char	prof_flag_t;	/* type for boolean flags */
-
-/*
- * Double precision counter.
- */
-
-typedef struct prof_cnt_t {
-	prof_uptrint_t	low;		/* low 32 bits of counter */
-	prof_uptrint_t	high;		/* high 32 bits of counter */
-} prof_cnt_t;
-
-#define PROF_CNT_INC(cnt)	((++((cnt).low) == 0) ? ++((cnt).high) : 0)
-#define PROF_CNT_ADD(cnt,val)	(((((cnt).low + (val)) < (val)) ? ((cnt).high++) : 0), ((cnt).low += (val)))
-#define PROF_CNT_LADD(cnt,val)	(PROF_CNT_ADD(cnt,(val).low), (cnt).high += (val).high)
-#define PROF_CNT_SUB(cnt,val)	(((((cnt).low - (val)) > (cnt).low) ? ((cnt).high--) : 0), ((cnt).low -= (val)))
-#define PROF_CNT_LSUB(cnt,val)	(PROF_CNT_SUB(cnt,(val).low), (cnt).high -= (val).high)
-
-#define LPROF_ULONG_TO_CNT(cnt,val)	PROF_ULONG_TO_CNT(cnt,val)
-#define LPROF_CNT_INC(lp)		PROF_CNT_INC(lp)
-#define LPROF_CNT_ADD(lp,val)		PROF_CNT_ADD(lp,val)
-#define LPROF_CNT_LADD(lp,val)		PROF_CNT_LADD(lp,val)
-#define LPROF_CNT_SUB(lp,val)		PROF_CNT_SUB(lp,val)
-#define LPROF_CNT_LSUB(lp,val)		PROF_CNT_LSUB(lp,val)
-#define	LPROF_CNT_OVERFLOW(lp,high,low)	PROF_CNT_OVERFLOW(lp,high,low)
-#define LPROF_CNT_TO_ULONG(lp)		PROF_CNT_TO_ULONG(lp)
-#define LPROF_CNT_TO_LDOUBLE(lp)	PROF_CNT_TO_LDOUBLE(lp)
-#define LPROF_CNT_TO_DECIMAL(buf,cnt)	PROF_CNT_TO_DECIMAL(buf,cnt)
-#define LPROF_CNT_EQ_0(cnt)		PROF_CNT_EQ_0(cnt)
-#define LPROF_CNT_NE_0(cnt)		PROF_CNT_NE_0(cnt)
-#define LPROF_CNT_EQ(cnt1,cnt2)		PROF_CNT_EQ(cnt1,cnt2)
-#define LPROF_CNT_NE(cnt1,cnt2)		PROF_CNT_NE(cnt1,cnt2)
-#define LPROF_CNT_GT(cnt1,cnt2)		PROF_CNT_GT(cnt1,cnt2)
-#define LPROF_CNT_LT(cnt1,cnt2)		PROF_CNT_LT(cnt1,cnt2)
-#define LPROF_CNT_DIGITS		PROF_CNT_DIGITS
-
-
-/*
- * Types of the profil counter.
- */
-
-typedef unsigned short	HISTCOUNTER;		/* profil */
-typedef prof_cnt_t	LHISTCOUNTER;		/* lprofil */
-
-struct profile_stats {			/* Debugging counters */
-	prof_uptrint_t major_version;	/* major version number */
-	prof_uptrint_t minor_version;	/* minor version number */
-};
-
-struct profile_md {
-	int major_version;		/* major version number */
-	int minor_version;		/* minor version number */
-};
-
-#define PROFILE_MAJOR_VERSION 1
-#define PROFILE_MINOR_VERSION 1
-
-#endif /* _PROFILE_MD_H */
-
-
-
-
-
-
diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c
index 9b583cd0c..cd8dc8317 100644
--- a/osfmk/vm/bsd_vm.c
+++ b/osfmk/vm/bsd_vm.c
@@ -92,26 +92,6 @@ mach_get_vm_end(vm_map_t map)
 	return( vm_map_last_entry(map)->vme_end);
 }
 
-/*
- * Legacy routines to get the start and end for a vm_map_t.  They
- * return them in the vm_offset_t format.  So, they should only be
- * called on maps that are the same size as the kernel map for
- * accurate results.
- */
-vm_offset_t
-get_vm_start(
-	vm_map_t map)
-{
-	return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
-}
-
-vm_offset_t
-get_vm_end(
-	vm_map_t map)
-{
-	return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
-}
-
 /* 
  * BSD VNODE PAGER 
  */
@@ -128,6 +108,7 @@ const struct memory_object_pager_ops vnode_pager_ops = {
 	vnode_pager_synchronize,
 	vnode_pager_map,
 	vnode_pager_last_unmap,
+	NULL, /* data_reclaim */
 	"vnode pager"
 };
 
@@ -602,8 +583,10 @@ vnode_pager_bootstrap(void)
 	size = (vm_size_t) sizeof(struct vnode_pager);
 	vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
 				PAGE_SIZE, "vnode pager structures");
+	zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
 	zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
 
+
 #if CONFIG_CODE_DECRYPTION
 	apple_protect_pager_bootstrap();
 #endif	/* CONFIG_CODE_DECRYPTION */
@@ -749,6 +732,22 @@ vnode_pager_check_hard_throttle(
 	return KERN_SUCCESS;
 }
 
+kern_return_t
+vnode_pager_get_isSSD(
+	memory_object_t		mem_obj,
+	boolean_t		*isSSD)
+{
+	vnode_pager_t	vnode_object;
+
+	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
+		return KERN_INVALID_ARGUMENT;
+
+	vnode_object = vnode_pager_lookup(mem_obj);
+
+	*isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
+	return KERN_SUCCESS;
+}
+
 kern_return_t
 vnode_pager_get_object_size(
 	memory_object_t		mem_obj,
@@ -821,6 +820,25 @@ vnode_pager_get_object_cs_blobs(
 					blobs);
 }
 
+#if CHECK_CS_VALIDATION_BITMAP
+kern_return_t
+vnode_pager_cs_check_validation_bitmap( 
+	memory_object_t	mem_obj, 
+	memory_object_offset_t	offset,
+        int		optype	)
+{
+	vnode_pager_t	vnode_object;
+
+	if (mem_obj == MEMORY_OBJECT_NULL ||
+	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
+		return KERN_INVALID_ARGUMENT;
+	}
+
+	vnode_object = vnode_pager_lookup(mem_obj);
+	return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
+}
+#endif /* CHECK_CS_VALIDATION_BITMAP */
+
 /*
  *
  */
diff --git a/osfmk/vm/default_freezer.c b/osfmk/vm/default_freezer.c
new file mode 100644
index 000000000..dd8197d7d
--- /dev/null
+++ b/osfmk/vm/default_freezer.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#if CONFIG_FREEZE
+
+#include "default_freezer.h"
+
+/*
+ * Indicates that a page has been faulted back in.
+ */
+#define FREEZER_OFFSET_ABSENT ((vm_object_offset_t)(-1))
+
+/*
+ * Create the mapping table that will
+ * tell us the object/offset pair that
+ * corresponds to the page being sent
+ * out or being brought back in.
+ */
+
+void*
+default_freezer_mapping_create(vm_object_t object, vm_offset_t offset)
+{
+	default_freezer_mapping_table_t table;
+	
+	table = kalloc(sizeof(struct default_freezer_mapping_table));
+	if (table) {
+		memset(table, 0, sizeof(*table));
+	} else {
+		panic("Could not allocate mapping table\n");
+	}
+	
+	table->object = object;
+	table->offset = offset;
+	
+	return (void*)table;
+}
+
+void
+default_freezer_mapping_free(void **table, boolean_t all)
+{	
+	default_freezer_mapping_table_t freezer_table = *((default_freezer_mapping_table_t *)table);
+	assert(freezer_table);
+	
+	if (all) {
+		do { 
+			default_freezer_mapping_table_t next = freezer_table->next;
+			kfree(freezer_table, sizeof(*freezer_table));
+			freezer_table = next;	
+		} while (freezer_table);
+	} else {
+		kfree(freezer_table, sizeof(*freezer_table));
+	}
+}
+ 
+kern_return_t
+default_freezer_mapping_store(
+		default_freezer_mapping_table_t *table,
+		memory_object_offset_t table_offset,
+		memory_object_t memory_object,
+		memory_object_offset_t offset)
+{
+	default_freezer_mapping_table_entry_t entry;
+	uint32_t index;
+	
+	assert(*table);
+	
+	if ((*table)->index >= MAX_FREEZE_TABLE_ENTRIES) {
+		vm_object_t compact_object = (*table)->object;
+		default_freezer_mapping_table_t next;
+		
+		next = default_freezer_mapping_create(compact_object, table_offset);
+		if (!next) {
+			return KERN_FAILURE;
+		}
+		
+		(*table)->next = next;
+		*table = next;
+	}
+
+	index = (*table)->index++;
+	entry = &(*table)->entry[index];
+
+	entry->memory_object = memory_object;
+	entry->offset = offset;
+	
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+default_freezer_mapping_update(
+		default_freezer_mapping_table_t table, 
+		memory_object_t memory_object,
+		memory_object_offset_t offset,
+		memory_object_offset_t *table_offset, /*OUT: contains the offset into the compact object*/
+		boolean_t remove_entry)
+{
+
+	kern_return_t kr = KERN_SUCCESS;
+	vm_object_offset_t compact_offset;
+	default_freezer_mapping_table_entry_t entry;
+	uint32_t index = 0;
+	
+	if (table == NULL){
+		return KERN_FAILURE;
+	}
+
+	compact_offset = table->offset;
+
+	while (1) {	
+		if (index >= table->index) {
+			if (table->next) {
+				table = table->next;
+				index = 0;
+			} else {
+				/* End of tables and we didn't find our candidate entry */
+				kr = KERN_FAILURE;
+				break;
+			}
+		}
+
+		entry = &table->entry[index];
+
+		if (memory_object == entry->memory_object && offset == entry->offset) {
+			if (remove_entry == TRUE) {
+				/*
+				 * Mark the page absent whilst retaining the object 
+				 * for cleanup during thaw.
+				 */
+				entry->offset = FREEZER_OFFSET_ABSENT;
+			}
+			if (table_offset != NULL) {
+				*table_offset = compact_offset;
+			}
+			break;
+		}
+	
+		index++;
+		compact_offset += PAGE_SIZE;
+	}
+	return kr;
+}
+
+/*
+ * Create a freezer memory object for this
+ * vm object.
+ */
+void
+default_freezer_memory_object_create(
+			vm_object_t object,
+			vm_object_t compact_object,
+			default_freezer_mapping_table_t table)
+{
+
+	default_freezer_memory_object_t fo = NULL;
+	
+	fo = kalloc(sizeof(struct default_freezer_memory_object));
+
+	if (fo) {
+		memory_object_control_t control = NULL;
+
+		memset(fo, 0, sizeof(*fo));
+		
+		control = memory_object_control_allocate(object);
+		assert (control != MEMORY_OBJECT_CONTROL_NULL);
+
+		df_memory_object_init((memory_object_t)fo, control, 0);		
+		fo->fo_compact_object = compact_object;
+		fo->fo_table = table;
+		
+		object->pager = (memory_object_t)fo;
+		object->pager_created = TRUE;
+		object->pager_initialized = TRUE;
+		object->pager_ready = TRUE;
+		object->pager_trusted = TRUE;
+		object->pager_control = control;
+	} else {
+		panic(" Could not allocate freezer object\n");
+	}
+}
+
+void
+default_freezer_pack_page(
+		vm_page_t p, 
+		vm_object_t compact_object, 
+		vm_object_offset_t offset, 
+		void **table)
+{
+
+	default_freezer_mapping_table_t *freeze_table = (default_freezer_mapping_table_t *)table;
+	memory_object_t memory_object = p->object->pager;
+	
+	if (memory_object == NULL) {
+		default_freezer_memory_object_create(p->object, compact_object, *freeze_table);
+		memory_object = p->object->pager;
+	} else {
+		default_freezer_memory_object_t fo = (default_freezer_memory_object_t)memory_object;
+		if (fo->fo_compact_object == VM_OBJECT_NULL) {
+			fo->fo_compact_object = compact_object;
+			fo->fo_table = *freeze_table;
+		}
+	}
+	
+	default_freezer_mapping_store(freeze_table, offset, memory_object, p->offset + p->object->paging_offset);
+
+	/* Remove from the original and insert into the compact destination object */
+	vm_page_rename(p, compact_object, offset, FALSE);
+}
+
+void
+default_freezer_unpack(
+		vm_object_t object, 
+		void **table)
+{
+	
+	vm_page_t p = VM_PAGE_NULL;
+	uint32_t index = 0;
+	vm_object_t src_object = VM_OBJECT_NULL;
+	memory_object_t	src_mem_object = MEMORY_OBJECT_NULL;
+	memory_object_offset_t	src_offset = 0;
+	vm_object_offset_t	compact_offset = 0;
+	default_freezer_memory_object_t	fo = NULL;
+	default_freezer_memory_object_t last_memory_object_thawed = NULL;
+	default_freezer_mapping_table_t freeze_table = *(default_freezer_mapping_table_t *)table;
+
+	assert(freeze_table);
+	
+	vm_object_lock(object);
+	
+	for (index = 0, compact_offset = 0; ; index++, compact_offset += PAGE_SIZE){
+		if (index >= freeze_table->index) {
+			default_freezer_mapping_table_t table_next;
+			
+			table_next = freeze_table->next; 
+			
+			/* Free the tables as we go along */
+			default_freezer_mapping_free((void**)&freeze_table, FALSE);
+			
+			if (table_next == NULL){
+				break;
+			}
+			
+			freeze_table = table_next;
+			index = 0;
+		}
+
+		/* 
+		 * Skip slots that represent deallocated memory objects.
+		 */
+		src_mem_object = freeze_table->entry[index].memory_object;
+		if (src_mem_object == MEMORY_OBJECT_NULL)
+			continue;
+
+		/* 
+		 * Skip slots that represent faulted pages.
+		 */
+		src_offset = freeze_table->entry[index].offset;
+		if (src_offset != FREEZER_OFFSET_ABSENT) {
+			
+			p = vm_page_lookup(object, compact_offset);
+			assert(p);
+
+			fo = (default_freezer_memory_object_t)src_mem_object;
+		
+			src_object = memory_object_control_to_vm_object(fo->fo_pager_control); 
+	
+			/* Move back over from the freeze object to the original */
+			vm_object_lock(src_object);
+			vm_page_rename(p, src_object, src_offset - src_object->paging_offset, FALSE);
+			vm_object_unlock(src_object);
+		}
+		
+		if (src_mem_object != ((memory_object_t)last_memory_object_thawed)){
+			if (last_memory_object_thawed != NULL){
+				last_memory_object_thawed->fo_compact_object = VM_OBJECT_NULL;
+				last_memory_object_thawed->fo_table = NULL;
+			}
+			last_memory_object_thawed = (default_freezer_memory_object_t)src_mem_object;
+		}
+	}
+	
+	if (last_memory_object_thawed != NULL){
+		last_memory_object_thawed->fo_compact_object = VM_OBJECT_NULL;
+		last_memory_object_thawed->fo_table = NULL;
+	}
+	
+	vm_object_unlock(object);
+}
+
+vm_object_t
+default_freezer_get_compact_vm_object(void** table)
+{
+	default_freezer_mapping_table_t freeze_table = *((default_freezer_mapping_table_t *)table);
+	assert(freeze_table);
+	return ((vm_object_t)(freeze_table->object));
+}
+
+void
+df_memory_object_reference(__unused memory_object_t mem_obj)
+{
+	
+	/* No-op */
+}
+
+void
+df_memory_object_deallocate(memory_object_t mem_obj)
+{
+
+	default_freezer_memory_object_t	fo = (default_freezer_memory_object_t)mem_obj;
+	vm_object_t compact_object = fo->fo_compact_object;
+	
+	assert(fo);
+	
+	if (compact_object != VM_OBJECT_NULL) {
+		
+		default_freezer_mapping_table_t fo_table = fo->fo_table;
+		default_freezer_mapping_table_entry_t entry;
+		boolean_t found = FALSE;
+		uint32_t index = 0;
+		
+		vm_object_lock(compact_object);
+	
+		/* Remove from table */
+		while (1) {	
+			if (index >= fo_table->index) {
+				if (fo_table->next) {
+					fo_table = fo_table->next;
+					index = 0;
+				} else {
+					/* End of tables */
+					break;
+				}
+			}
+
+			entry = &fo_table->entry[index];
+			if (mem_obj == entry->memory_object) {
+				/* It matches, so clear the entry */
+				if (!found) {
+					found = TRUE;
+				} 
+				entry->memory_object = MEMORY_OBJECT_NULL;
+				entry->offset = 0;
+			} else if (MEMORY_OBJECT_NULL != entry->memory_object) {
+				/* We have a different valid object; we're done */
+				if (found) {
+					break;
+				}
+			}
+		
+			index++;
+		}
+	
+		vm_object_unlock(compact_object);
+	}
+	
+	kfree(fo, sizeof(*fo));
+}
+
+kern_return_t
+df_memory_object_init(
+		memory_object_t mem_obj,
+		memory_object_control_t control,
+		__unused memory_object_cluster_size_t pager_page_size)
+{
+
+	default_freezer_memory_object_t	fo = (default_freezer_memory_object_t)mem_obj;
+	assert(fo);
+
+	fo->fo_pager_ops = &default_freezer_ops;
+	fo->fo_pager_header.io_bits = IKOT_MEMORY_OBJECT;
+	fo->fo_pager_control = control;
+	
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+df_memory_object_terminate(memory_object_t mem_obj)
+{
+
+	default_freezer_memory_object_t	fo = (default_freezer_memory_object_t)mem_obj;
+	assert(fo);
+	memory_object_control_deallocate(fo->fo_pager_control);
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+df_memory_object_data_request(
+		memory_object_t mem_obj, 
+		memory_object_offset_t offset,
+		memory_object_cluster_size_t length,
+		vm_prot_t protection_required,
+		memory_object_fault_info_t fault_info)
+{
+
+	vm_object_t	src_object = VM_OBJECT_NULL, compact_object = VM_OBJECT_NULL;
+	memory_object_offset_t	compact_offset = 0;
+	memory_object_t pager = NULL;
+	kern_return_t kr = KERN_SUCCESS;
+
+	default_freezer_memory_object_t fo = (default_freezer_memory_object_t)mem_obj;
+
+	src_object = memory_object_control_to_vm_object(fo->fo_pager_control);
+	compact_object = fo->fo_compact_object;
+	
+	if (compact_object != VM_OBJECT_NULL) {
+		
+		vm_object_lock(compact_object);
+	
+		kr = default_freezer_mapping_update(fo->fo_table,
+							mem_obj,
+							offset,
+							&compact_offset,
+							FALSE);
+						
+		vm_object_unlock(compact_object);
+	} else {
+		kr = KERN_FAILURE;
+	}
+	
+	if (length == 0){
+		/*Caller is just querying to see if we have the page*/
+		return kr;
+	}
+
+	if (kr != KERN_SUCCESS){
+
+		unsigned int request_flags;
+		upl_t        upl;
+		unsigned int page_list_count = 0;
+
+		request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
+		/*
+		 * Should we decide to activate USE_PRECIOUS (from default_pager_internal.h)
+		 * here, then the request_flags will need to add these to the ones above:
+		 *
+		 * request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE
+		 */
+		request_flags |= UPL_REQUEST_SET_DIRTY;
+
+		memory_object_super_upl_request(fo->fo_pager_control,
+						(memory_object_offset_t)offset,
+						PAGE_SIZE, PAGE_SIZE, 
+						&upl, NULL, &page_list_count,
+						request_flags);
+
+		upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+		upl_deallocate(upl);
+		
+		return KERN_SUCCESS;
+	}
+
+	vm_object_lock(compact_object);
+
+	pager = (memory_object_t)compact_object->pager;
+
+	if (!compact_object->pager_ready || pager == MEMORY_OBJECT_NULL){
+		vm_object_unlock(compact_object);
+		return KERN_FAILURE;
+	}
+	
+	vm_object_paging_wait(compact_object, THREAD_UNINT);
+	vm_object_paging_begin(compact_object);
+
+	compact_object->blocked_access = TRUE;
+	vm_object_unlock(compact_object);
+
+	((vm_object_fault_info_t) fault_info)->io_sync = TRUE;
+
+	kr = dp_memory_object_data_request(pager,
+					compact_offset,
+					length,
+					protection_required,
+					fault_info);
+	if (kr == KERN_SUCCESS){
+
+		vm_page_t src_page = VM_PAGE_NULL, dst_page = VM_PAGE_NULL;
+
+		vm_object_lock(compact_object);
+
+		compact_object->blocked_access = FALSE;
+		vm_object_paging_end(compact_object);
+
+		vm_object_lock(src_object);
+
+		if ((src_page = vm_page_lookup(compact_object, compact_offset)) != VM_PAGE_NULL){
+			
+			dst_page = vm_page_lookup(src_object, offset - src_object->paging_offset);
+			
+			VM_PAGE_FREE(dst_page);
+			vm_page_rename(src_page, src_object, offset - src_object->paging_offset, FALSE);
+			
+			if (default_freezer_mapping_update(fo->fo_table,
+							mem_obj,
+							offset,
+							NULL,
+							TRUE) != KERN_SUCCESS) {
+				printf("Page for object: 0x%lx at offset: 0x%lx not found in table\n", (uintptr_t)src_object, (uintptr_t)offset);
+			}
+			
+			PAGE_WAKEUP_DONE(src_page);
+		} else {
+			printf("%d: default_freezer: compact_object doesn't have the page for object 0x%lx at offset 0x%lx \n", kr, (uintptr_t)compact_object, (uintptr_t)compact_offset);
+			kr = KERN_FAILURE;
+		}
+		vm_object_unlock(src_object);
+		vm_object_unlock(compact_object);
+	} else {
+		panic("%d: default_freezer TOC pointed us to default_pager incorrectly\n", kr);
+	}
+	return kr;
+}
+
+kern_return_t
+df_memory_object_data_return(
+		__unused memory_object_t		mem_obj,
+		__unused memory_object_offset_t	offset,
+		__unused memory_object_cluster_size_t			size,
+		__unused memory_object_offset_t	*resid_offset,
+		__unused int		*io_error,
+		__unused boolean_t	dirty,
+		__unused boolean_t	kernel_copy,
+		__unused int	upl_flags)
+{
+
+	panic(" default_freezer: df_memory_object_data_return should not be called\n");
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+df_memory_object_data_initialize(
+		__unused memory_object_t mem_obj,
+		__unused  memory_object_offset_t offset,
+		__unused memory_object_cluster_size_t size)
+{
+	
+	panic(" default_freezer: df_memory_object_data_initialize should not be called\n");
+	return KERN_SUCCESS;
+}
+
+kern_return_t
+df_memory_object_data_unlock(
+		__unused memory_object_t mem_obj,
+		__unused memory_object_offset_t offset,
+		__unused memory_object_size_t length,
+		__unused vm_prot_t prot)
+{
+
+	panic(" default_freezer: df_memory_object_data_unlock should not be called\n");
+	return KERN_FAILURE;
+}
+
+kern_return_t
+df_memory_object_synchronize(
+		__unused memory_object_t mem_obj,
+		__unused memory_object_offset_t offset,
+		__unused memory_object_size_t length,
+		__unused vm_sync_t flags)
+{
+
+	panic(" default_freezer: df_memory_object_synchronize should not be called\n");
+	return KERN_FAILURE;
+}
+
+kern_return_t
+df_memory_object_map(
+		__unused memory_object_t mem_obj,
+		__unused vm_prot_t prot)
+{
+
+	panic(" default_freezer: df_memory_object_map should not be called\n");
+	return KERN_FAILURE;
+}
+
+kern_return_t
+df_memory_object_last_unmap(__unused memory_object_t mem_obj)
+{
+
+	panic(" default_freezer: df_memory_object_last_unmap should not be called\n");
+	return KERN_FAILURE;
+}
+
+
+kern_return_t
+df_memory_object_data_reclaim(
+		__unused memory_object_t mem_obj,
+		__unused boolean_t reclaim_backing_store)
+{
+
+	panic("df_memory_object_data_reclaim\n");
+	return KERN_SUCCESS;
+}
+#endif /* CONFIG_FREEZE */
diff --git a/osfmk/vm/default_freezer.h b/osfmk/vm/default_freezer.h
new file mode 100644
index 000000000..46730fd71
--- /dev/null
+++ b/osfmk/vm/default_freezer.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#if CONFIG_FREEZE
+
+#ifndef	_DEFAULT_FREEZER_H_
+#define _DEFAULT_FREEZER_H_
+
+#ifdef MACH_KERNEL
+
+#include <default_pager/default_pager_internal.h>
+#include <default_pager/default_pager_object_server.h>
+#include <mach/memory_object_default_server.h>
+#include <mach/memory_object_control.h>
+#include <mach/memory_object_types.h>
+#include <mach/memory_object_server.h>
+#include <mach/upl.h>
+#include <mach/vm_map.h>
+#include <vm/memory_object.h>
+#include <vm/vm_pageout.h> 
+#include <vm/vm_map.h>
+
+
+/*
+ * Begin declaration for default_freezer_ops.
+*/
+extern void   df_memory_object_reference(memory_object_t);
+extern void   df_memory_object_deallocate(memory_object_t);
+extern kern_return_t   df_memory_object_init(memory_object_t,
+					     memory_object_control_t,
+					     memory_object_cluster_size_t);
+extern	kern_return_t df_memory_object_terminate(memory_object_t);
+extern	kern_return_t   df_memory_object_data_request(memory_object_t, 
+						      memory_object_offset_t,
+						      memory_object_cluster_size_t,
+						      vm_prot_t,
+						      memory_object_fault_info_t);
+extern kern_return_t df_memory_object_data_return(memory_object_t,
+						    memory_object_offset_t,
+						    memory_object_cluster_size_t,
+						    memory_object_offset_t *,
+						    int *,
+						    boolean_t,
+						    boolean_t,
+						    int);
+extern kern_return_t df_memory_object_data_initialize(memory_object_t,
+						      memory_object_offset_t,
+						      memory_object_cluster_size_t);
+extern kern_return_t df_memory_object_data_unlock(memory_object_t,
+						  memory_object_offset_t,
+						  memory_object_size_t,
+						  vm_prot_t);
+extern kern_return_t df_memory_object_synchronize(memory_object_t,
+						  memory_object_offset_t,
+						  memory_object_size_t,
+						  vm_sync_t);
+extern kern_return_t df_memory_object_map(memory_object_t,
+					  vm_prot_t);
+extern kern_return_t df_memory_object_last_unmap(memory_object_t);
+
+extern kern_return_t df_memory_object_data_reclaim( memory_object_t,
+						    boolean_t);
+/*
+ * End declaration for default_freezer_ops.
+*/
+
+const struct memory_object_pager_ops default_freezer_ops = {
+	df_memory_object_reference,
+	df_memory_object_deallocate,
+	df_memory_object_init,
+	df_memory_object_terminate,
+	df_memory_object_data_request,
+	df_memory_object_data_return,
+	df_memory_object_data_initialize,
+	df_memory_object_data_unlock,
+	df_memory_object_synchronize,
+	df_memory_object_map,
+	df_memory_object_last_unmap,
+	df_memory_object_data_reclaim,
+	"default freezer"
+};
+
+#define MAX_FREEZE_TABLE_ENTRIES 128
+ 
+struct default_freezer_mapping_table_entry {
+	memory_object_t memory_object; /* memory object will lead us to the most current VM object */
+	memory_object_offset_t offset;
+};
+typedef struct default_freezer_mapping_table *default_freezer_mapping_table_t;
+
+struct default_freezer_mapping_table {
+	struct default_freezer_mapping_table *next;
+	vm_object_t object; /* packed object */
+	vm_object_offset_t offset;
+	unsigned int index;
+	struct default_freezer_mapping_table_entry entry[MAX_FREEZE_TABLE_ENTRIES];
+};
+typedef struct default_freezer_mapping_table_entry *default_freezer_mapping_table_entry_t;
+
+struct default_freezer_memory_object{
+	struct ipc_object_header	fo_pager_header;	/* fake ip_kotype() */
+	memory_object_pager_ops_t	fo_pager_ops; 		/* == &default_freezer_ops */
+	memory_object_control_t		fo_pager_control;
+	vm_object_t			        fo_compact_object;
+	default_freezer_mapping_table_t		fo_table;
+};
+typedef struct default_freezer_memory_object *default_freezer_memory_object_t;
+
+
+__private_extern__ void*	default_freezer_mapping_create(vm_object_t, vm_offset_t);
+
+__private_extern__ void		default_freezer_mapping_free(void**, boolean_t all);
+
+__private_extern__  kern_return_t	default_freezer_mapping_store( default_freezer_mapping_table_t *,
+									memory_object_offset_t,
+									memory_object_t,
+									memory_object_offset_t );
+
+__private_extern__ kern_return_t	default_freezer_mapping_update( default_freezer_mapping_table_t, 
+									memory_object_t,
+									memory_object_offset_t,
+									memory_object_offset_t *,
+									boolean_t );
+
+__private_extern__  void	default_freezer_memory_object_create(vm_object_t, vm_object_t, default_freezer_mapping_table_t);
+
+__private_extern__  void	default_freezer_pack_page(vm_page_t, vm_object_t, vm_object_offset_t, void**);
+
+__private_extern__  void	default_freezer_unpack(vm_object_t, void**);
+
+__private_extern__ vm_object_t	default_freezer_get_compact_vm_object(void**);
+
+#endif /* MACH_KERNEL */
+#endif /* DEFAULT_FREEZER_H */
+#endif /* CONFIG_FREEZE */
diff --git a/osfmk/vm/device_vm.c b/osfmk/vm/device_vm.c
index 575351078..f3df12b70 100644
--- a/osfmk/vm/device_vm.c
+++ b/osfmk/vm/device_vm.c
@@ -75,6 +75,7 @@ const struct memory_object_pager_ops device_pager_ops = {
 	device_pager_synchronize,
 	device_pager_map,
 	device_pager_last_unmap,
+	NULL, /* data_reclaim */
 	"device pager"
 };
 
@@ -126,7 +127,7 @@ device_pager_bootstrap(void)
 	size = (vm_size_t) sizeof(struct device_pager);
 	device_pager_zone = zinit(size, (vm_size_t) MAX_DNODE*size,
 				PAGE_SIZE, "device node pager structures");
-
+	zone_change(device_pager_zone, Z_CALLERACCT, FALSE);
 	return;
 }
 
diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c
index 0fece7fc0..de7baff29 100644
--- a/osfmk/vm/memory_object.c
+++ b/osfmk/vm/memory_object.c
@@ -101,6 +101,7 @@
 #include <vm/vm_kern.h>		/* For kernel_map, vm_move */
 #include <vm/vm_map.h>		/* For vm_map_pageable */
 #include <vm/vm_purgeable_internal.h>	/* Needed by some vm_page.h macros */
+#include <vm/vm_shared_region.h>
 
 #if	MACH_PAGEMAP
 #include <vm/vm_external.h>
@@ -139,10 +140,10 @@ decl_lck_mtx_data(,	memory_manager_default_lock)
 
 typedef	int	memory_object_lock_result_t;
 
-#define MEMORY_OBJECT_LOCK_RESULT_DONE          0
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    1
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN    2
-#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   3
+#define MEMORY_OBJECT_LOCK_RESULT_DONE          	0
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    	1
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   	2
+#define MEMORY_OBJECT_LOCK_RESULT_MUST_FREE		3
 
 memory_object_lock_result_t memory_object_lock_page(
 				vm_page_t		m,
@@ -174,185 +175,149 @@ memory_object_lock_page(
             "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
             m, should_return, should_flush, prot, 0);
 
-	/*
-	 *	If we cannot change access to the page,
-	 *	either because a mapping is in progress
-	 *	(busy page) or because a mapping has been
-	 *	wired, then give up.
-	 */
 
 	if (m->busy || m->cleaning) {
-		if (m->list_req_pending && (m->pageout || m->cleaning) &&
+		if (m->list_req_pending &&
 		    should_return == MEMORY_OBJECT_RETURN_NONE &&
 		    should_flush == TRUE) {
-			/*
-			 * if pageout is set, page was earmarked by vm_pageout_scan
-			 * to be cleaned and stolen... if cleaning is set, we're
-			 * pre-cleaning pages for a hibernate...
-			 * in either case, we're going
-			 * to take it back since we are being asked to
-			 * flush the page w/o cleaning it (i.e. we don't
-			 * care that it's dirty, we want it gone from
-			 * the cache) and we don't want to stall
-			 * waiting for it to be cleaned for 2 reasons...
-			 * 1 - no use paging it out since we're probably
-			 *     shrinking the file at this point or we no
-			 *     longer care about the data in the page
-			 * 2 - if we stall, we may casue a deadlock in
-			 *     the FS trying to acquire its locks
-			 *     on the VNOP_PAGEOUT path presuming that
-			 *     those locks are already held on the truncate
-			 *     path before calling through to this function
-			 *
-			 * so undo all of the state that vm_pageout_scan
-			 * hung on this page
-			 */
-			m->busy = FALSE;
 
-			vm_pageout_queue_steal(m, FALSE);
+			if (m->absent) {
+				/*
+				 * this is the list_req_pending | absent | busy case
+				 * which originates from vm_fault_page. 
+				 * Combine that with should_flush == TRUE and we
+				 * have a case where we need to toss the page from
+				 * the object.
+				 */
+				if (!VM_PAGE_WIRED(m)) {
+					return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
+				} else {
+					return (MEMORY_OBJECT_LOCK_RESULT_DONE);
+				}
+			}
+			if  (m->pageout || m->cleaning) {
+				/*
+				 * if pageout is set, page was earmarked by vm_pageout_scan
+				 * to be cleaned and stolen... if cleaning is set, we're
+				 * pre-cleaning pages for a hibernate...
+				 * in either case, we're going
+				 * to take it back since we are being asked to
+				 * flush the page w/o cleaning it (i.e. we don't
+				 * care that it's dirty, we want it gone from
+				 * the cache) and we don't want to stall
+				 * waiting for it to be cleaned for 2 reasons...
+				 * 1 - no use paging it out since we're probably
+				 *     shrinking the file at this point or we no
+				 *     longer care about the data in the page
+				 * 2 - if we stall, we may casue a deadlock in
+				 *     the FS trying to acquire its locks
+				 *     on the VNOP_PAGEOUT path presuming that
+				 *     those locks are already held on the truncate
+				 *     path before calling through to this function
+				 *
+				 * so undo all of the state that vm_pageout_scan
+				 * hung on this page
+				 */
+
+				vm_pageout_queue_steal(m, FALSE);
+				PAGE_WAKEUP_DONE(m);
+			} else {
+				panic("list_req_pending on page %p without absent/pageout/cleaning set\n", m);
+			}
 		} else
-			return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 	}
-
 	/*
 	 *	Don't worry about pages for which the kernel
 	 *	does not have any data.
 	 */
-
 	if (m->absent || m->error || m->restart) {
-		if(m->error && should_flush) {
-			/* dump the page, pager wants us to */
-			/* clean it up and there is no      */
-			/* relevant data to return */
-			if ( !VM_PAGE_WIRED(m)) {
-				VM_PAGE_FREE(m);
-				return(MEMORY_OBJECT_LOCK_RESULT_DONE);
-			}
-		} else {
-			return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+		if (m->error && should_flush && !VM_PAGE_WIRED(m)) {
+			/*
+			 * dump the page, pager wants us to
+			 * clean it up and there is no
+			 * relevant data to return
+			 */
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
 		}
+		return (MEMORY_OBJECT_LOCK_RESULT_DONE);
 	}
-
 	assert(!m->fictitious);
 
-	/*
-	 *	If the page is wired, just clean or return the page if needed.
-	 *	Wired pages don't get flushed or disconnected from the pmap.
-	 */
-
 	if (VM_PAGE_WIRED(m)) {
-		if (memory_object_should_return_page(m, should_return)) {
-			if (m->dirty)
-				return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
-			else
-				return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
-		}
-
-		return(MEMORY_OBJECT_LOCK_RESULT_DONE);
-	}
-
-	/*
-	 *	If the page is to be flushed, allow
-	 *	that to be done as part of the protection.
-	 */
-
-	if (should_flush)
-		prot = VM_PROT_ALL;
-
-	/*
-	 *	Set the page lock.
-	 *
-	 *	If we are decreasing permission, do it now;
-	 *	let the fault handler take care of increases
-	 *	(pmap_page_protect may not increase protection).
-	 */
+		/*
+		 * The page is wired... just clean or return the page if needed.
+		 * Wired pages don't get flushed or disconnected from the pmap.
+		 */
+		if (memory_object_should_return_page(m, should_return))
+			return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 
-	if (prot != VM_PROT_NO_CHANGE) {
-	        pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
+		return (MEMORY_OBJECT_LOCK_RESULT_DONE);
+	}		
 
-		PAGE_WAKEUP(m);
+	if (should_flush) {
+		/*
+		 * must do the pmap_disconnect before determining the 
+		 * need to return the page... otherwise it's possible
+		 * for the page to go from the clean to the dirty state
+		 * after we've made our decision
+		 */
+		if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)
+			m->dirty = TRUE;
+	} else {
+		/*
+		 * If we are decreasing permission, do it now;
+		 * let the fault handler take care of increases
+		 * (pmap_page_protect may not increase protection).
+		 */
+		if (prot != VM_PROT_NO_CHANGE)
+			pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
 	}
-
 	/*
-	 *	Handle page returning.
+	 *	Handle returning dirty or precious pages
 	 */
 	if (memory_object_should_return_page(m, should_return)) {
-
 		/*
-		 *	If we weren't planning
-		 *	to flush the page anyway,
-		 *	we may need to remove the
-		 *	page from the pageout
-		 *	system and from physical
-		 *	maps now.
+		 * we use to do a pmap_disconnect here in support
+		 * of memory_object_lock_request, but that routine
+		 * no longer requires this...  in any event, in
+		 * our world, it would turn into a big noop since
+		 * we don't lock the page in any way and as soon
+		 * as we drop the object lock, the page can be
+		 * faulted back into an address space
+		 *
+		 *	if (!should_flush)
+		 *		pmap_disconnect(m->phys_page);
 		 */
-		
-		vm_page_lockspin_queues();
-		VM_PAGE_QUEUES_REMOVE(m);
-		vm_page_unlock_queues();
-
-		if (!should_flush)
-			pmap_disconnect(m->phys_page);
-
-		if (m->dirty)
-			return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
-		else
-			return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
+		return (MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 	}
 
 	/*
-	 *	Handle flushing
+	 *	Handle flushing clean pages
 	 */
-	if (should_flush) {
-		VM_PAGE_FREE(m);
-	} else {
-		/*
-		 *	XXX Make clean but not flush a paging hint,
-		 *	and deactivate the pages.  This is a hack
-		 *	because it overloads flush/clean with
-		 *	implementation-dependent meaning.  This only
-		 *	happens to pages that are already clean.
-		 */
+	if (should_flush)
+		return (MEMORY_OBJECT_LOCK_RESULT_MUST_FREE);
 
-		if (vm_page_deactivate_hint &&
-		    (should_return != MEMORY_OBJECT_RETURN_NONE)) {
-			vm_page_lockspin_queues();
-			vm_page_deactivate(m);
-			vm_page_unlock_queues();
-		}
-	}
+	/*
+	 * we use to deactivate clean pages at this point,
+	 * but we do not believe that an msync should change
+	 * the 'age' of a page in the cache... here is the
+	 * original comment and code concerning this...
+	 *
+	 *	XXX Make clean but not flush a paging hint,
+	 *	and deactivate the pages.  This is a hack
+	 *	because it overloads flush/clean with
+	 *	implementation-dependent meaning.  This only
+	 *	happens to pages that are already clean.
+	 *
+	 *   if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE))
+	 *	return (MEMORY_OBJECT_LOCK_RESULT_MUST_DEACTIVATE);
+	 */
 
-	return(MEMORY_OBJECT_LOCK_RESULT_DONE);
+	return (MEMORY_OBJECT_LOCK_RESULT_DONE);
 }
 
-#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync)    \
-MACRO_BEGIN								\
-									\
-        register int            upl_flags;                              \
-	memory_object_t		pager;					\
-				                   			\
-	if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) {		\
-		vm_object_paging_begin(object);				\
-		vm_object_unlock(object);				\
-									\
-                if (iosync)                                     	\
-                        upl_flags = UPL_MSYNC | UPL_IOSYNC;     	\
-                else                                            	\
-                        upl_flags = UPL_MSYNC;                  	\
-				                   			\
-	   	(void) memory_object_data_return(pager,			\
-			po,						\
-			(memory_object_cluster_size_t)data_cnt,					\
-	                ro,                                             \
-	                ioerr,                                          \
-			(action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\
-			!should_flush,                                  \
-			upl_flags);                                 	\
-									\
-		vm_object_lock(object);					\
-		vm_object_paging_end(object);				\
-	}								\
-MACRO_END
+
 
 /*
  *	Routine:	memory_object_lock_request [user interface]
@@ -556,6 +521,40 @@ vm_object_sync(
 
 
 
+#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, po, ro, ioerr, iosync)    \
+MACRO_BEGIN								\
+									\
+        int			upl_flags;                              \
+	memory_object_t		pager;					\
+									\
+	if (object == slide_info.slide_object) {					\
+		panic("Objects with slid pages not allowed\n");		\
+	}								\
+				                   			\
+	if ((pager = (object)->pager) != MEMORY_OBJECT_NULL) {		\
+		vm_object_paging_begin(object);				\
+		vm_object_unlock(object);				\
+									\
+                if (iosync)                                     	\
+                        upl_flags = UPL_MSYNC | UPL_IOSYNC;     	\
+                else                                            	\
+                        upl_flags = UPL_MSYNC;                  	\
+				                   			\
+	   	(void) memory_object_data_return(pager,			\
+			po,						\
+			(memory_object_cluster_size_t)data_cnt,		\
+	                ro,                                             \
+	                ioerr,                                          \
+			FALSE,						\
+			FALSE,		                                \
+			upl_flags);                                 	\
+									\
+		vm_object_lock(object);					\
+		vm_object_paging_end(object);				\
+	}								\
+MACRO_END
+
+
 
 static int
 vm_object_update_extent(
@@ -571,13 +570,18 @@ vm_object_update_extent(
 {
         vm_page_t	m;
         int		retval = 0;
-	memory_object_cluster_size_t	data_cnt = 0;
 	vm_object_offset_t	paging_offset = 0;
 	vm_object_offset_t	next_offset = offset;
         memory_object_lock_result_t	page_lock_result;
-	memory_object_lock_result_t	pageout_action;
-	
-	pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
+	memory_object_cluster_size_t	data_cnt = 0;
+	struct vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct vm_page_delayed_work	*dwp;
+	int		dw_count;
+	int		dw_limit;
+
+        dwp = &dw_array[0];
+        dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
 	for (;
 	     offset < offset_end && object->resident_page_count;
@@ -589,98 +593,105 @@ vm_object_update_extent(
 		 */
 		if (data_cnt) {
 			if ((data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) || (next_offset != offset)) {
-				LIST_REQ_PAGEOUT_PAGES(object, data_cnt, 
-						       pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
+
+				if (dw_count) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
+				LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+						       paging_offset, offset_resid, io_errno, should_iosync);
 				data_cnt = 0;
 			}
 		}
-
 		while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
-		        page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
-
-			XPR(XPR_MEMORY_OBJECT,
-			    "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
-			    object, offset, page_lock_result, 0, 0);
-
-			switch (page_lock_result)
-			{
-			  case MEMORY_OBJECT_LOCK_RESULT_DONE:
-			    /*
-			     *	End of a cluster of dirty pages.
-			     */
-			    if (data_cnt) {
-			            LIST_REQ_PAGEOUT_PAGES(object, 
-							   data_cnt, pageout_action, 
-							   paging_offset, offset_resid, io_errno, should_iosync);
-				    data_cnt = 0;
-				    continue;
-			    }
-			    break;
-
-			  case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
-			    /*
-			     *	Since it is necessary to block,
-			     *	clean any dirty pages now.
-			     */
-			    if (data_cnt) {
-			            LIST_REQ_PAGEOUT_PAGES(object,
-							   data_cnt, pageout_action, 
-							   paging_offset, offset_resid, io_errno, should_iosync);
-				    data_cnt = 0;
-				    continue;
-			    }
-			    PAGE_SLEEP(object, m, THREAD_UNINT);
-			    continue;
-
-			  case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
-			  case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
-			    /*
-			     * The clean and return cases are similar.
-			     *
-			     * if this would form a discontiguous block,
-			     * clean the old pages and start anew.
-			     */
-			    if (data_cnt && pageout_action != page_lock_result) {
-			            LIST_REQ_PAGEOUT_PAGES(object, 
-							   data_cnt, pageout_action, 
-							   paging_offset, offset_resid, io_errno, should_iosync);
-				    data_cnt = 0;
-				    continue;
-			    }
-			    if (m->cleaning) {
-			            PAGE_SLEEP(object, m, THREAD_UNINT);
-				    continue;
-			    }
-			    if (data_cnt == 0) {
-			            pageout_action = page_lock_result;
-				    paging_offset = offset;
-			    }
-			    data_cnt += PAGE_SIZE;
-			    next_offset = offset + PAGE_SIZE_64;
-
-			    /*
-			     * Clean
-			     */
-			    m->list_req_pending = TRUE;
-			    m->cleaning = TRUE;
-
-			    if (should_flush &&
-				/* let's not flush a wired page... */
-				!VM_PAGE_WIRED(m)) {
-			            /*
-				     * and add additional state
-				     * for the flush
-				     */
-				    m->busy = TRUE;
-				    m->pageout = TRUE;
-
-				    vm_page_lockspin_queues();
-				    vm_page_wire(m);
-				    vm_page_unlock_queues();
-			    }
-
-			    retval = 1;
-			    break;
+
+			dwp->dw_mask = 0;
+		        
+			page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
+
+			if (data_cnt && page_lock_result != MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN) {
+				/*
+				 *	End of a run of dirty/precious pages.
+				 */
+				if (dw_count) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
+				LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+						       paging_offset, offset_resid, io_errno, should_iosync);
+				/*
+				 * LIST_REQ_PAGEOUT_PAGES will drop the object lock which will
+				 * allow the state of page 'm' to change... we need to re-lookup
+				 * the current offset
+				 */
+				data_cnt = 0;
+				continue;
+			}
+
+			switch (page_lock_result) {
+
+			case MEMORY_OBJECT_LOCK_RESULT_DONE:
+				break;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_FREE:
+				dwp->dw_mask |= DW_vm_page_free;
+				break;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
+				PAGE_SLEEP(object, m, THREAD_UNINT);
+				continue;
+
+			case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
+				if (data_cnt == 0)
+					paging_offset = offset;
+
+				data_cnt += PAGE_SIZE;
+				next_offset = offset + PAGE_SIZE_64;
+
+				/*
+				 * Clean
+				 */
+				m->list_req_pending = TRUE;
+				m->cleaning = TRUE;
+
+				/*
+				 * wired pages shouldn't be flushed and
+				 * since they aren't on any queue,
+				 * no need to remove them
+				 */
+				if (!VM_PAGE_WIRED(m)) {
+
+					if (should_flush) {
+						/*
+						 * add additional state for the flush
+						 */
+						m->busy = TRUE;
+						m->pageout = TRUE;
+
+						dwp->dw_mask |= DW_vm_page_wire;
+					}
+					/*
+					 * we use to remove the page from the queues at this
+					 * point, but we do not believe that an msync
+					 * should cause the 'age' of a page to be changed
+					 *
+					 *    else
+					 *	dwp->dw_mask |= DW_VM_PAGE_QUEUES_REMOVE;
+					 */
+				}
+				retval = 1;
+				break;
+			}
+			if (dwp->dw_mask) {
+				VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
+
+				if (dw_count >= dw_limit) {
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+					dwp = &dw_array[0];
+					dw_count = 0;
+				}
 			}
 			break;
 		}
@@ -689,9 +700,12 @@ vm_object_update_extent(
 	 *	We have completed the scan for applicable pages.
 	 *	Clean any pages that have been saved.
 	 */
+	if (dw_count)
+		vm_page_do_delayed_work(object, &dw_array[0], dw_count);
+
 	if (data_cnt) {
-	        LIST_REQ_PAGEOUT_PAGES(object,
-				       data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
+	        LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
+				       paging_offset, offset_resid, io_errno, should_iosync);
 	}
 	return (retval);
 }
@@ -707,14 +721,14 @@ vm_object_update_extent(
  */
 kern_return_t
 vm_object_update(
-	register vm_object_t		object,
-	register vm_object_offset_t	offset,
-	register vm_object_size_t	size,
-	register vm_object_offset_t	*resid_offset,
-	int				*io_errno,
-	memory_object_return_t		should_return,
-	int				flags,
-	vm_prot_t			protection)
+	vm_object_t		object,
+	vm_object_offset_t	offset,
+	vm_object_size_t	size,
+	vm_object_offset_t	*resid_offset,
+	int			*io_errno,
+	memory_object_return_t	should_return,
+	int			flags,
+	vm_prot_t		protection)
 {
         vm_object_t		copy_object = VM_OBJECT_NULL;
 	boolean_t		data_returned = FALSE;
@@ -801,27 +815,27 @@ vm_object_update(
 		        /*
 			 * translate offset with respect to shadow's offset
 			 */
-		        copy_offset = (offset >= copy_object->shadow_offset) ?
-			  (vm_map_offset_t)(offset - copy_object->shadow_offset) :
+		        copy_offset = (offset >= copy_object->vo_shadow_offset) ?
+			  (vm_map_offset_t)(offset - copy_object->vo_shadow_offset) :
 			  (vm_map_offset_t) 0;
 
-			if (copy_offset > copy_object->size)
-			        copy_offset = copy_object->size;
+			if (copy_offset > copy_object->vo_size)
+			        copy_offset = copy_object->vo_size;
 
 			/*
 			 * clip size with respect to shadow offset
 			 */
-			if (offset >= copy_object->shadow_offset) {
+			if (offset >= copy_object->vo_shadow_offset) {
 			        copy_size = size;
-			} else if (size >= copy_object->shadow_offset - offset) {
-			        copy_size = size - (copy_object->shadow_offset - offset);
+			} else if (size >= copy_object->vo_shadow_offset - offset) {
+			        copy_size = size - (copy_object->vo_shadow_offset - offset);
 			} else {
 			        copy_size = 0;
 			}
 			
-			if (copy_offset + copy_size > copy_object->size) {
-			        if (copy_object->size >= copy_offset) {
-				        copy_size = copy_object->size - copy_offset;
+			if (copy_offset + copy_size > copy_object->vo_size) {
+			        if (copy_object->vo_size >= copy_offset) {
+				        copy_size = copy_object->vo_size - copy_offset;
 				} else {
 				        copy_size = 0;
 				}
@@ -841,6 +855,8 @@ vm_object_update(
 		fault_info.hi_offset = copy_size;
 		fault_info.no_cache   = FALSE;
 		fault_info.stealth = TRUE;
+		fault_info.io_sync = FALSE;
+		fault_info.cs_bypass = FALSE;
 		fault_info.mark_zf_absent = FALSE;
 
 		vm_object_paging_begin(copy_object);
@@ -898,12 +914,6 @@ vm_object_update(
 				vm_object_lock(copy_object);
 				vm_object_paging_begin(copy_object);
 				goto RETRY_COW_OF_LOCK_REQUEST;
-			case VM_FAULT_FICTITIOUS_SHORTAGE:
-				vm_page_more_fictitious();
-				prot = 	VM_PROT_WRITE|VM_PROT_READ;
-				vm_object_lock(copy_object);
-				vm_object_paging_begin(copy_object);
-				goto RETRY_COW_OF_LOCK_REQUEST;
 			case VM_FAULT_SUCCESS_NO_VM_PAGE:
 				/* success but no VM page: fail */
 				vm_object_paging_end(copy_object);
@@ -1783,14 +1793,17 @@ host_default_memory_manager(
 
 		thread_wakeup((event_t) &memory_manager_default);
 
+#ifndef CONFIG_FREEZE
 		/*
 		 * Now that we have a default pager for anonymous memory,
 		 * reactivate all the throttled pages (i.e. dirty pages with
 		 * no pager).
 		 */
-		if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
+		if (current_manager == MEMORY_OBJECT_DEFAULT_NULL)
+		{
 			vm_page_reactivate_all_throttled();
 		}
+#endif
 	}
  out:
 	lck_mtx_unlock(&memory_manager_default_lock);
@@ -1924,6 +1937,39 @@ memory_object_range_op(
 }
 
 
+void
+memory_object_mark_used(
+        memory_object_control_t	control)
+{
+	vm_object_t		object;
+
+	if (control == NULL)
+		return;
+
+	object = memory_object_control_to_vm_object(control);
+
+	if (object != VM_OBJECT_NULL)
+		vm_object_cache_remove(object);
+}
+
+
+void
+memory_object_mark_unused(
+	memory_object_control_t	control,
+	__unused boolean_t	rage)
+{
+	vm_object_t		object;
+
+	if (control == NULL)
+		return;
+
+	object = memory_object_control_to_vm_object(control);
+
+	if (object != VM_OBJECT_NULL)
+		vm_object_cache_add(object);
+}
+
+
 kern_return_t
 memory_object_pages_resident(
 	memory_object_control_t	control,
@@ -1961,6 +2007,20 @@ memory_object_signed(
 	return KERN_SUCCESS;
 }
 
+boolean_t
+memory_object_is_slid(
+	memory_object_control_t	control)
+{
+	vm_object_t	object = VM_OBJECT_NULL;
+	vm_object_t	slide_object = slide_info.slide_object;
+
+	object = memory_object_control_to_vm_object(control);
+	if (object == VM_OBJECT_NULL)
+		return FALSE;
+
+	return (object == slide_object);
+}
+
 static zone_t mem_obj_control_zone;
 
 __private_extern__ void
@@ -1970,6 +2030,7 @@ memory_object_control_bootstrap(void)
 
 	i = (vm_size_t) sizeof (struct memory_object_control);
 	mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
+	zone_change(mem_obj_control_zone, Z_CALLERACCT, FALSE);
 	zone_change(mem_obj_control_zone, Z_NOENCRYPT, TRUE);
 	return;
 }
@@ -2251,6 +2312,20 @@ kern_return_t memory_object_last_unmap
 		memory_object);
 }
 
+/* Routine memory_object_data_reclaim */
+kern_return_t memory_object_data_reclaim
+(
+	memory_object_t memory_object,
+	boolean_t	reclaim_backing_store
+)
+{
+	if (memory_object->mo_pager_ops->memory_object_data_reclaim == NULL)
+		return KERN_NOT_SUPPORTED;
+	return (memory_object->mo_pager_ops->memory_object_data_reclaim)(
+		memory_object,
+		reclaim_backing_store);
+}
+
 /* Routine memory_object_create */
 kern_return_t memory_object_create
 (
diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h
index a0b6690c1..05f78fcf4 100644
--- a/osfmk/vm/memory_object.h
+++ b/osfmk/vm/memory_object.h
@@ -141,4 +141,15 @@ extern kern_return_t	memory_object_signed(
 	memory_object_control_t		control,
 	boolean_t			is_signed);
 
+extern boolean_t	memory_object_is_slid(
+	memory_object_control_t		control);
+
+extern void		memory_object_mark_used(
+	memory_object_control_t		control);
+
+extern void		memory_object_mark_unused(
+	memory_object_control_t		control,
+	boolean_t			rage);
+
+
 #endif	/* _VM_MEMORY_OBJECT_H_ */
diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h
index 5d6253718..76d7cb305 100644
--- a/osfmk/vm/pmap.h
+++ b/osfmk/vm/pmap.h
@@ -274,6 +274,12 @@ extern kern_return_t	(pmap_attribute_cache_sync)(  /* Flush appropriate
 extern unsigned int	(pmap_cache_attributes)(
 				ppnum_t		pn);
 
+/*
+ * Set (override) cache attributes for the specified physical page
+ */
+extern	void		pmap_set_cache_attributes(
+				ppnum_t,
+				unsigned int);
 extern void pmap_sync_page_data_phys(ppnum_t pa);
 extern void pmap_sync_page_attributes_phys(ppnum_t pa);
 
@@ -453,14 +459,14 @@ extern void		(pmap_pageable)(
 extern uint64_t pmap_nesting_size_min;
 extern uint64_t pmap_nesting_size_max;
 
-extern kern_return_t pmap_nest(pmap_t grand,
-			       pmap_t subord,
-			       addr64_t vstart,
-			       addr64_t nstart,
-			       uint64_t size);
-extern kern_return_t pmap_unnest(pmap_t grand,
-				 addr64_t vaddr,
-				 uint64_t size);
+extern kern_return_t pmap_nest(pmap_t,
+			       pmap_t,
+			       addr64_t,
+			       addr64_t,
+			       uint64_t);
+extern kern_return_t pmap_unnest(pmap_t,
+				 addr64_t,
+				 uint64_t);
 extern boolean_t pmap_adjust_unnest_parameters(pmap_t, vm_map_offset_t *, vm_map_offset_t *);
 #endif	/* MACH_KERNEL_PRIVATE */
 
@@ -484,8 +490,8 @@ extern pmap_t	kernel_pmap;			/* The kernel's map */
 #define VM_MEM_NOT_CACHEABLE	0x4		/* (I) Cache Inhibit */
 #define VM_MEM_WRITE_THROUGH	0x8		/* (W) Write-Through */
 
+#define VM_WIMG_USE_DEFAULT	0x80
 #define VM_WIMG_MASK		0xFF
-#define VM_WIMG_USE_DEFAULT	0x80000000
 
 #define VM_MEM_SUPERPAGE	0x100		/* map a superpage instead of a base page */
 
diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c
index 953a4e139..ef46cfeca 100644
--- a/osfmk/vm/vm_apple_protect.c
+++ b/osfmk/vm/vm_apple_protect.c
@@ -130,6 +130,7 @@ const struct memory_object_pager_ops apple_protect_pager_ops = {
 	apple_protect_pager_synchronize,
 	apple_protect_pager_map,
 	apple_protect_pager_last_unmap,
+	NULL, /* data_reclaim */
 	"apple protect pager"
 };
 
@@ -354,6 +355,7 @@ apple_protect_pager_data_request(
 	upl_pl = NULL;
 	fault_info = *((struct vm_object_fault_info *) mo_fault_info);
 	fault_info.stealth = TRUE;
+	fault_info.io_sync = FALSE;
 	fault_info.mark_zf_absent = FALSE;
 	interruptible = fault_info.interruptible;
 
@@ -510,7 +512,7 @@ apple_protect_pager_data_request(
 			   kernel_mapping,
 			   src_page->phys_page,
 			   VM_PROT_READ,
-			   src_object->wimg_bits & VM_WIMG_MASK,
+			   0,
 			   TRUE);
 		/*
 		 * Establish an explicit pmap mapping of the destination
@@ -525,7 +527,7 @@ apple_protect_pager_data_request(
 			   kernel_mapping + PAGE_SIZE_64,
 			   dst_pnum,
 			   VM_PROT_READ | VM_PROT_WRITE,
-			   dst_object->wimg_bits & VM_WIMG_MASK,
+			   0,
 			   TRUE);
 
 		/*
@@ -725,13 +727,13 @@ apple_protect_pager_terminate_internal(
 		vm_object_deallocate(pager->backing_object);
 		pager->backing_object = VM_OBJECT_NULL;
 	}
-
-	/* trigger the destruction of the memory object */
-	memory_object_destroy(pager->pager_control, 0);
 	
 	/* deallocate any crypt module data */
 	if(pager->crypt.crypt_end)
 		pager->crypt.crypt_end(pager->crypt.crypt_ops);
+
+	/* trigger the destruction of the memory object */
+	memory_object_destroy(pager->pager_control, 0);
 }
 
 /*
diff --git a/osfmk/vm/vm_debug.c b/osfmk/vm/vm_debug.c
index a0712c54d..1dfa947ef 100644
--- a/osfmk/vm/vm_debug.c
+++ b/osfmk/vm/vm_debug.c
@@ -206,7 +206,7 @@ vm32_region_info(
 				vio->vio_object =
 					(natural_t)(uintptr_t) cobject;
 				vio->vio_size =
-					(natural_t) cobject->size;
+					(natural_t) cobject->vo_size;
 				vio->vio_ref_count =
 					cobject->ref_count;
 				vio->vio_resident_page_count =
@@ -216,7 +216,7 @@ vm32_region_info(
 				vio->vio_shadow =
 					(natural_t)(uintptr_t) cobject->shadow;
 				vio->vio_shadow_offset =
-					(natural_t) cobject->shadow_offset;
+					(natural_t) cobject->vo_shadow_offset;
 				vio->vio_paging_offset =
 					(natural_t) cobject->paging_offset;
 				vio->vio_copy_strategy =
@@ -408,7 +408,7 @@ vm32_region_info_64(
 				vio->vio_object =
 					(natural_t)(uintptr_t) cobject;
 				vio->vio_size =
-					(natural_t) cobject->size;
+					(natural_t) cobject->vo_size;
 				vio->vio_ref_count =
 					cobject->ref_count;
 				vio->vio_resident_page_count =
@@ -418,7 +418,7 @@ vm32_region_info_64(
 				vio->vio_shadow =
 					(natural_t)(uintptr_t) cobject->shadow;
 				vio->vio_shadow_offset =
-					(natural_t) cobject->shadow_offset;
+					(natural_t) cobject->vo_shadow_offset;
 				vio->vio_paging_offset =
 					(natural_t) cobject->paging_offset;
 				vio->vio_copy_strategy =
diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c
index a36714b57..ab281c928 100644
--- a/osfmk/vm/vm_fault.c
+++ b/osfmk/vm/vm_fault.c
@@ -89,8 +89,6 @@
 #include <kern/zalloc.h>
 #include <kern/misc_protos.h>
 
-#include <ppc/proc_reg.h>
-
 #include <vm/vm_fault.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
@@ -102,8 +100,7 @@
 #include <vm/vm_external.h>
 #include <vm/memory_object.h>
 #include <vm/vm_purgeable_internal.h>	/* Needed by some vm_page.h macros */
-
-#include <sys/kdebug.h>
+#include <vm/vm_shared_region.h>
 
 #define VM_FAULT_CLASSIFY	0
 
@@ -133,12 +130,13 @@ extern unsigned int dp_pages_free, dp_pages_reserve;
 
 #define NEED_TO_HARD_THROTTLE_THIS_TASK() 	(((dp_pages_free + dp_pages_reserve < 2000) && \
 						 (get_task_resident_size(current_task()) > vm_hard_throttle_threshold) && \
-						 (current_task() != kernel_task) && IP_VALID(memory_manager_default)) || \
+						 (current_task() != kernel_task) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) || \
 						 (vm_page_free_count < vm_page_throttle_limit && thread_is_io_throttled() && \
 						  (get_task_resident_size(current_task()) > vm_hard_throttle_threshold)))
 
 
-#define HARD_THROTTLE_DELAY	10000	/* 10000 us == 10 ms */
+#define HARD_THROTTLE_DELAY	20000	/* 20000 us == 20 ms */
+#define SOFT_THROTTLE_DELAY	2000	/* 2000 us == 2 ms */
 
 
 extern int cs_debug;
@@ -180,6 +178,7 @@ unsigned long vm_cs_validates = 0;
 unsigned long vm_cs_revalidates = 0;
 unsigned long vm_cs_query_modified = 0;
 unsigned long vm_cs_validated_dirtied = 0;
+unsigned long vm_cs_bitmap_validated = 0;
 #if CONFIG_ENFORCE_SIGNED_CODE
 int cs_enforcement_disable=0;
 #else
@@ -521,7 +520,7 @@ vm_fault_deactivate_behind(
 }
 
 
-static boolean_t
+static int
 vm_page_throttled(void)
 {
         clock_sec_t     elapsed_sec;
@@ -531,12 +530,12 @@ vm_page_throttled(void)
 	thread_t thread = current_thread();
 	
 	if (thread->options & TH_OPT_VMPRIV)
-		return (FALSE);
+		return (0);
 
 	thread->t_page_creation_count++;
 
 	if (NEED_TO_HARD_THROTTLE_THIS_TASK())
-		return (TRUE);
+		return (HARD_THROTTLE_DELAY);
 
 	if (vm_page_free_count < vm_page_throttle_limit &&
 	    thread->t_page_creation_count > vm_page_creation_throttle) {
@@ -562,12 +561,12 @@ vm_page_throttled(void)
 			}
 			++vm_page_throttle_count;
 
-			return (TRUE);
+			return (SOFT_THROTTLE_DELAY);
 		}
 		thread->t_page_creation_time = tv_sec;
 		thread->t_page_creation_count = 0;
 	}
-	return (FALSE);
+	return (0);
 }
 
 
@@ -582,6 +581,8 @@ vm_page_throttled(void)
 static vm_fault_return_t
 vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state)
 {
+	int throttle_delay;
+
         if (object->shadow_severed ||
 	    VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) {
 	        /*
@@ -619,7 +620,7 @@ vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t int
 			return (VM_FAULT_RETRY);
 		}
 	}
-	if (vm_page_throttled()) {
+	if ((throttle_delay = vm_page_throttled())) {
 	        /*
 		 * we're throttling zero-fills...
 		 * treat this as if we couldn't grab a page
@@ -628,15 +629,14 @@ vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t int
 		        VM_PAGE_FREE(m);
 		vm_fault_cleanup(object, first_m);
 
-		if (NEED_TO_HARD_THROTTLE_THIS_TASK()) {
-			delay(HARD_THROTTLE_DELAY);
+		VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
 
-			if (current_thread_aborted()) {
-				thread_interrupt_level(interruptible_state);
-				return VM_FAULT_INTERRUPTED;
-			}
-		}
+		delay(throttle_delay);
 
+		if (current_thread_aborted()) {
+			thread_interrupt_level(interruptible_state);
+			return VM_FAULT_INTERRUPTED;
+		}
 		thread_interrupt_level(interruptible_state);
 
 		return (VM_FAULT_MEMORY_SHORTAGE);
@@ -677,9 +677,9 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
 	m->cs_validated = FALSE;
 	m->cs_tainted = FALSE;
 
-	if (no_zero_fill == TRUE)
-	        my_fault = DBG_NZF_PAGE_FAULT;
-	else {
+	if (no_zero_fill == TRUE) {
+		my_fault = DBG_NZF_PAGE_FAULT;
+	} else {
 		vm_page_zero_fill(m);
 
 		VM_STAT_INCR(zero_fill_count);
@@ -689,12 +689,17 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
 	assert(m->object != kernel_object);
 	//assert(m->pageq.next == NULL && m->pageq.prev == NULL);
 
-	if (!IP_VALID(memory_manager_default) &&
+	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
 		(m->object->purgable == VM_PURGABLE_DENY ||
 		 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
 		 m->object->purgable == VM_PURGABLE_VOLATILE )) {
+
 		vm_page_lockspin_queues();
 
+		assert(!VM_PAGE_WIRED(m));
+
+		VM_PAGE_QUEUES_REMOVE(m);
+
                 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
                 m->throttled = TRUE;
                 vm_page_throttled_count++;
@@ -990,23 +995,105 @@ vm_fault_page(
 #if TRACEFAULTPAGE
 				dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);	/* (TEST/DEBUG) */
 #endif
-				wait_result = PAGE_SLEEP(object, m, interruptible);
-				XPR(XPR_VM_FAULT,
-				    "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
-					object, offset,
-					m, 0, 0);
-				counter(c_vm_fault_page_block_busy_kernel++);
+				if (m->list_req_pending) {
+					/*
+					 * "list_req_pending" means that the
+					 * page has been marked for a page-in
+					 * or page-out operation but hasn't been
+					 * grabbed yet.
+					 * Since whoever marked it
+					 * "list_req_pending" might now be
+					 * making its way through other layers
+					 * of code and possibly blocked on locks
+					 * that we might be holding, we can't
+					 * just block on a "busy" and
+					 * "list_req_pending" page or we might
+					 * deadlock with that other thread.
+					 * 
+					 * [ For pages backed by a file on an
+					 * HFS volume, we might deadlock with
+					 * the HFS truncate lock, for example:
+					 * A: starts a pageout or pagein
+					 * operation and marks a page "busy",
+					 * "list_req_pending" and either
+					 * "pageout", "cleaning" or "absent".
+					 * A: makes its way through the
+					 * memory object (vnode) code.
+					 * B: starts from the memory object
+					 * side, via a write() on a file, for
+					 * example.
+					 * B: grabs some filesystem locks.
+					 * B: attempts to grab the same page for
+					 * its I/O.
+					 * B: blocks here because the page is
+					 * "busy".
+					 * A: attempts to grab the filesystem
+					 * lock we're holding.
+					 * And we have a deadlock... ]
+					 *
+					 * Since the page hasn't been claimed
+					 * by the other thread yet, it's fair
+					 * for us to grab here.
+					 */
+					if (m->absent) {
+						/*
+						 * The page needs to be paged
+						 * in.  We can do it here but we
+						 * need to get rid of "m", the
+						 * place holder page inserted by
+						 * another thread who is also
+						 * trying to page it in.  When
+						 * that thread resumes, it will
+						 * either wait for our page to
+						 * arrive or it will find it
+						 * already there.
+						 */
+						VM_PAGE_FREE(m);
 
-				if (wait_result != THREAD_AWAKENED) {
-					vm_fault_cleanup(object, first_m);
-					thread_interrupt_level(interruptible_state);
+						/*
+						 * Retry the fault.  We'll find
+						 * that the page is not resident
+						 * and initiate a page-in again.
+						 */
+						continue;
+					}
+					if (m->pageout || m->cleaning) {
+						/*
+						 * This page has been selected
+						 * for a page-out but we want
+						 * to bring it in.  Let's just
+						 * cancel the page-out...
+						 */
+						vm_pageout_queue_steal(m, FALSE);
+						/*
+						 * ... and clear "busy" and
+						 * wake up any waiters...
+						 */
+						PAGE_WAKEUP_DONE(m);
+						/*
+						 * ... and continue with the
+						 * "fault" handling.
+						 */
+					}
+				} else {
+					wait_result = PAGE_SLEEP(object, m, interruptible);
+					XPR(XPR_VM_FAULT,
+					    "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
+						object, offset,
+						m, 0, 0);
+					counter(c_vm_fault_page_block_busy_kernel++);
 
-					if (wait_result == THREAD_RESTART)
-					        return (VM_FAULT_RETRY);
-					else
-						return (VM_FAULT_INTERRUPTED);
+					if (wait_result != THREAD_AWAKENED) {
+						vm_fault_cleanup(object, first_m);
+						thread_interrupt_level(interruptible_state);
+
+						if (wait_result == THREAD_RESTART)
+							return (VM_FAULT_RETRY);
+						else
+							return (VM_FAULT_INTERRUPTED);
+					}
+					continue;
 				}
-				continue;
 			}
 
 			if (m->phys_page == vm_page_guard_addr) {
@@ -1152,6 +1239,7 @@ vm_fault_page(
 
 					if (fault_info->mark_zf_absent && no_zero_fill == TRUE)
 						m->absent = TRUE;
+
 					break;
 				} else {
 					if (must_be_resident)
@@ -1172,11 +1260,11 @@ vm_fault_page(
 					    "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
 						object, offset,
 						next_object,
-						offset+object->shadow_offset,0);
+						offset+object->vo_shadow_offset,0);
 
-					offset += object->shadow_offset;
-					fault_info->lo_offset += object->shadow_offset;
-					fault_info->hi_offset += object->shadow_offset;
+					offset += object->vo_shadow_offset;
+					fault_info->lo_offset += object->vo_shadow_offset;
+					fault_info->hi_offset += object->vo_shadow_offset;
 					access_required = VM_PROT_READ;
 
 					vm_object_lock(next_object);
@@ -1418,8 +1506,8 @@ vm_fault_page(
 				vm_object_lock(object);
 				assert(object->ref_count > 0);
 
-				if (object->paging_in_progress > vm_object_pagein_throttle) {
-				        vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, interruptible);
+				if (object->paging_in_progress >= vm_object_pagein_throttle) {
+				        vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS, interruptible);
 
 					vm_object_unlock(object);
 					wait_result = thread_block(THREAD_CONTINUE_NULL);
@@ -1520,7 +1608,7 @@ vm_fault_page(
 				current_thread()->t_page_creation_time = tv_sec;
 				current_thread()->t_page_creation_count = 0;
 			}
-			if ((interruptible != THREAD_UNINT) && (current_thread()->sched_mode & TH_MODE_ABORT)) {
+			if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) {
 
 				vm_fault_cleanup(object, first_m);
 				thread_interrupt_level(interruptible_state);
@@ -1637,9 +1725,9 @@ vm_fault_page(
 			if ((object != first_object) || must_be_resident)
 				vm_object_paging_end(object);
 
-			offset += object->shadow_offset;
-			fault_info->lo_offset += object->shadow_offset;
-			fault_info->hi_offset += object->shadow_offset;
+			offset += object->vo_shadow_offset;
+			fault_info->lo_offset += object->vo_shadow_offset;
+			fault_info->hi_offset += object->vo_shadow_offset;
 			access_required = VM_PROT_READ;
 
 			vm_object_lock(next_object);
@@ -1884,9 +1972,9 @@ vm_fault_page(
 		/*
 		 * Does the page exist in the copy?
 		 */
-		copy_offset = first_offset - copy_object->shadow_offset;
+		copy_offset = first_offset - copy_object->vo_shadow_offset;
 
-		if (copy_object->size <= copy_offset)
+		if (copy_object->vo_size <= copy_offset)
 			/*
 			 * Copy object doesn't cover this page -- do nothing.
 			 */
@@ -2194,12 +2282,13 @@ vm_fault_enter(vm_page_t m,
 	       pmap_t pmap,
 	       vm_map_offset_t vaddr,
 	       vm_prot_t prot,
+	       vm_prot_t fault_type,
 	       boolean_t wired,
 	       boolean_t change_wiring,
 	       boolean_t no_cache,
+	       boolean_t cs_bypass,
 	       int *type_of_fault)
 {
-	unsigned int	cache_attr;
 	kern_return_t	kr, pe_result;
 	boolean_t	previously_pmapped = m->pmapped;
 	boolean_t	must_disconnect = 0;
@@ -2215,19 +2304,25 @@ vm_fault_enter(vm_page_t m,
 		return KERN_SUCCESS;
 	}
 
-        cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
+	if (*type_of_fault == DBG_ZERO_FILL_FAULT) {
 
-	if (m->pmapped == FALSE) {
+		vm_object_lock_assert_exclusive(m->object);
+
+	} else if ((fault_type & VM_PROT_WRITE) == 0) {
 		/*
-		 * This is the first time this page is being
-		 * mapped in an address space (pmapped == FALSE).
-		 *
-		 * Part of that page may still be in the data cache
-		 * and not flushed to memory.  In case we end up
-		 * accessing that page via the instruction cache,
-		 * we need to ensure that the 2 caches are in sync.
+		 * This is not a "write" fault, so we
+		 * might not have taken the object lock
+		 * exclusively and we might not be able
+		 * to update the "wpmapped" bit in
+		 * vm_fault_enter().
+		 * Let's just grant read access to
+		 * the page for now and we'll
+		 * soft-fault again if we need write
+		 * access later...
 		 */
-		pmap_sync_page_data_phys(m->phys_page);
+		prot &= ~VM_PROT_WRITE;
+	}       
+	if (m->pmapped == FALSE) {
 
 		if ((*type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
 		        /*
@@ -2251,8 +2346,7 @@ vm_fault_enter(vm_page_t m,
 		}
 		VM_PAGE_CONSUME_CLUSTERED(m);
 
-	} else if (cache_attr != VM_WIMG_DEFAULT)
-	        pmap_sync_page_attributes_phys(m->phys_page);
+	}
 
 	if (*type_of_fault != DBG_COW_FAULT) {
 		DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);
@@ -2309,7 +2403,7 @@ vm_fault_enter(vm_page_t m,
 	 *   code can be created
 	 */
 	if (m->cs_tainted ||
-	    ( !cs_enforcement_disable &&
+	    (( !cs_enforcement_disable && !cs_bypass ) &&
 	     (/* The page is unsigned and wants to be executable */
 	      (!m->cs_validated && (prot & VM_PROT_EXECUTE))  ||
 	      /* The page should be immutable, but is in danger of being modified
@@ -2387,29 +2481,58 @@ vm_fault_enter(vm_page_t m,
 		 * that's needed for an AtomicCompareAndSwap
 		 */
 		m->pmapped = TRUE;
-		if (prot & VM_PROT_WRITE) {
-			vm_object_lock_assert_exclusive(m->object);
-			m->wpmapped = TRUE;
-			if(must_disconnect) {
-				/* We can only get here 
-				 * because of the CSE logic */
+		if(vm_page_is_slideable(m)) {
+			boolean_t was_busy = m->busy;
+			m->busy = TRUE;
+			kr = vm_page_slide(m, 0);
+			assert(m->busy);
+			if(!was_busy) {
+				PAGE_WAKEUP_DONE(m);
+			}
+			if (kr != KERN_SUCCESS) {
+				/*
+				 * This page has not been slid correctly,
+				 * do not do the pmap_enter() !
+				 * Let vm_fault_enter() return the error
+				 * so the caller can fail the fault.
+				 */
+				goto after_the_pmap_enter;
+			}
+		}
+
+		if (fault_type & VM_PROT_WRITE) {
+
+			if (m->wpmapped == FALSE) {
+				vm_object_lock_assert_exclusive(m->object);
+
+				m->wpmapped = TRUE;
+			}
+			if (must_disconnect) {
+				/*
+				 * We can only get here 
+				 * because of the CSE logic
+				 */
 				assert(cs_enforcement_disable == FALSE);
 				pmap_disconnect(m->phys_page);
-				/* If we are faulting for a write, we can clear
+				/* 
+				 * If we are faulting for a write, we can clear
 				 * the execute bit - that will ensure the page is
 				 * checked again before being executable, which
 				 * protects against a map switch.
 				 * This only happens the first time the page
 				 * gets tainted, so we won't get stuck here 
-				 * to make an already writeable page executable. */
-				prot &= ~VM_PROT_EXECUTE;
+				 * to make an already writeable page executable.
+				 */
+				if (!cs_bypass){
+					prot &= ~VM_PROT_EXECUTE;
+				}
 			}
 		}
 
 		/* Prevent a deadlock by not
 		 * holding the object lock if we need to wait for a page in
 		 * pmap_enter() - <rdar://problem/7138958> */
-		PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, cache_attr,
+		PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, 0,
 				  wired, PMAP_OPTIONS_NOWAIT, pe_result);
 
 		if(pe_result == KERN_RESOURCE_SHORTAGE) {
@@ -2420,7 +2543,7 @@ vm_fault_enter(vm_page_t m,
 			m->busy = TRUE;
 			vm_object_unlock(m->object);
 			
-			PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
+			PMAP_ENTER(pmap, vaddr, m, prot, 0, wired);
 
 			/* Take the object lock again. */
 			vm_object_lock(m->object);
@@ -2435,6 +2558,7 @@ vm_fault_enter(vm_page_t m,
 		}
 	}
 
+after_the_pmap_enter:
 	/*
 	 * Hold queues lock to manipulate
 	 * the page queues.  Change wiring
@@ -2521,11 +2645,8 @@ vm_fault_enter(vm_page_t m,
 					if (no_cache && (!previously_pmapped || m->no_cache)) {
 						m->no_cache = TRUE;
 
-						if (m->active || m->inactive)
-							VM_PAGE_QUEUES_REMOVE(m);
-
 						if (!m->speculative) 
-							vm_page_speculate(m, TRUE);
+							vm_page_speculate(m, FALSE);
 
 					} else if (!m->active && !m->inactive)
 						vm_page_activate(m);
@@ -2596,12 +2717,13 @@ vm_fault(
 	int			object_lock_type = 0;
 	int			cur_object_lock_type;
 	vm_object_t		top_object = VM_OBJECT_NULL;
+	int			throttle_delay;
 
 
 	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
 			      (int)((uint64_t)vaddr >> 32),
 			      (int)vaddr,
-			      0,
+			      (map == kernel_map),
 			      0,
 			      0);
 
@@ -2658,6 +2780,7 @@ RetryFault:
 	pmap = real_map->pmap;
 	fault_info.interruptible = interruptible;
 	fault_info.stealth = FALSE;
+	fault_info.io_sync = FALSE;
 	fault_info.mark_zf_absent = FALSE;
 
 	/*
@@ -2915,7 +3038,35 @@ RetryFault:
 			}
 			ASSERT_PAGE_DECRYPTED(m);
 
+			if(vm_page_is_slideable(m)) {
+				/*
+				 * We might need to slide this page, and so,
+				 * we want to hold the VM object exclusively.
+				 */
+			        if (object != cur_object) {
+					if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
+						vm_object_unlock(object);
+						vm_object_unlock(cur_object);
+
+					        cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+
+						vm_map_unlock_read(map);
+						if (real_map != map)
+							vm_map_unlock(real_map);
+
+						goto RetryFault;
+					}
+				} else if (object_lock_type == OBJECT_LOCK_SHARED) {
+
+					vm_object_unlock(object);
+				        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
+					vm_map_unlock_read(map);
+					goto RetryFault;
+				}
+			}
+
 			if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) {
+upgrade_for_validation:
 				/*
 				 * We might need to validate this page
 				 * against its code signature, so we
@@ -2963,27 +3114,12 @@ RetryFault:
 			 */
 
 			if (object == cur_object && object->copy == VM_OBJECT_NULL) {
-				if ((fault_type & VM_PROT_WRITE) == 0) {
-					/*
-					 * This is not a "write" fault, so we
-					 * might not have taken the object lock
-					 * exclusively and we might not be able
-					 * to update the "wpmapped" bit in
-					 * vm_fault_enter().
-					 * Let's just grant read access to
-					 * the page for now and we'll
-					 * soft-fault again if we need write
-					 * access later...
-					 */
-					prot &= ~VM_PROT_WRITE;
-				}
+
 				goto FastPmapEnter;
 			}
 
 			if ((fault_type & VM_PROT_WRITE) == 0) {
 
-				prot &= ~VM_PROT_WRITE;
-
 			  	if (object != cur_object) {
 				        /*
 					 * We still need to hold the top object
@@ -3020,27 +3156,27 @@ FastPmapEnter:
 				 * cur_object == NULL or it's been unlocked
 				 * no paging references on either object or cur_object
 				 */
-#if	MACH_KDB
-				if (db_watchpoint_list && (fault_type & VM_PROT_WRITE) == 0)
-					prot &= ~VM_PROT_WRITE;
-#endif
 				if (caller_pmap) {
 				        kr = vm_fault_enter(m,
 							    caller_pmap,
 							    caller_pmap_addr,
 							    prot,
+							    fault_type,
 							    wired,
 							    change_wiring,
 							    fault_info.no_cache,
+							    fault_info.cs_bypass,
 							    &type_of_fault);
 				} else {
 				        kr = vm_fault_enter(m,
 							    pmap,
 							    vaddr,
 							    prot,
+							    fault_type,
 							    wired,
 							    change_wiring,
 							    fault_info.no_cache,
+							    fault_info.cs_bypass,
 							    &type_of_fault);
 				}
 
@@ -3060,7 +3196,7 @@ FastPmapEnter:
 
 				if (need_collapse == TRUE)
 				        vm_object_collapse(object, offset, TRUE);
-
+				
 				if (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT) {
 				        /*
 					 * evaluate access pattern and update state
@@ -3090,7 +3226,7 @@ FastPmapEnter:
 			 */
 			assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE);
 
-			if (vm_page_throttled()) {
+			if ((throttle_delay = vm_page_throttled())) {
 				/*
 				 * drop all of our locks...
 				 * wait until the free queue is
@@ -3104,8 +3240,9 @@ FastPmapEnter:
 				if (real_map != map)
 					vm_map_unlock(real_map);
 
-				if (NEED_TO_HARD_THROTTLE_THIS_TASK())
-					delay(HARD_THROTTLE_DELAY);
+				VM_DEBUG_EVENT(vmf_cowdelay, VMF_COWDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
+
+				delay(throttle_delay);
 
 				if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 
 						 THREAD_UNINT :
@@ -3128,11 +3265,19 @@ FastPmapEnter:
 				 */
 				break;
 			}
+			
 			/*
 			 * This is now a shadow based copy on write
 			 * fault -- it requires a copy up the shadow
 			 * chain.
-			 *
+			 */
+			
+			if ((cur_object_lock_type == OBJECT_LOCK_SHARED) &&
+			    VM_FAULT_NEED_CS_VALIDATION(NULL, m)) {
+				goto upgrade_for_validation;
+			}
+
+			/*
 			 * Allocate a page in the original top level
 			 * object. Give up if allocate fails.  Also
 			 * need to remember current page, as it's the
@@ -3246,7 +3391,7 @@ FastPmapEnter:
 					kr = KERN_MEMORY_ERROR;
 					goto done;
 				}
-				if (vm_page_throttled()) {
+				if ((throttle_delay = vm_page_throttled())) {
 					/*
 					 * drop all of our locks...
 					 * wait until the free queue is
@@ -3260,8 +3405,9 @@ FastPmapEnter:
 					if (real_map != map)
 						vm_map_unlock(real_map);
 
-					if (NEED_TO_HARD_THROTTLE_THIS_TASK())
-						delay(HARD_THROTTLE_DELAY);
+					VM_DEBUG_EVENT(vmf_zfdelay, VMF_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
+
+					delay(throttle_delay);
 
 					if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 
 							 THREAD_UNINT :
@@ -3329,7 +3475,7 @@ FastPmapEnter:
 			/*
 			 * On to the next level in the shadow chain
 			 */
-			cur_offset += cur_object->shadow_offset;
+			cur_offset += cur_object->vo_shadow_offset;
 			new_object = cur_object->shadow;
 
 			/*
@@ -3628,18 +3774,22 @@ handle_copy_delay:
 					    caller_pmap,
 					    caller_pmap_addr,
 					    prot,
+					    fault_type,
 					    wired,
 					    change_wiring,
 					    fault_info.no_cache,
+					    fault_info.cs_bypass,
 					    &type_of_fault);
 		} else {
 			kr = vm_fault_enter(m,
 					    pmap,
 					    vaddr,
 					    prot,
+					    fault_type,
 					    wired,
 					    change_wiring,
 					    fault_info.no_cache,
+					    fault_info.cs_bypass,
 					    &type_of_fault);
 		}
 		if (kr != KERN_SUCCESS) {
@@ -3670,7 +3820,7 @@ handle_copy_delay:
 		/* to execute, we return with a protection failure.      */
 
 		if ((fault_type & VM_PROT_EXECUTE) &&
-			(!pmap_eligible_for_execute((ppnum_t)(object->shadow_offset >> 12)))) {
+			(!pmap_eligible_for_execute((ppnum_t)(object->vo_shadow_offset >> 12)))) {
 
 			vm_map_verify_done(map, &version);
 
@@ -3735,7 +3885,7 @@ handle_copy_delay:
 				assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12));
 				pmap_map_block(caller_pmap, 
 					       (addr64_t)(caller_pmap_addr - ldelta), 
-					       (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) +
+					       (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) +
 							  entry->offset + (laddr - entry->vme_start) - ldelta) >> 12),
 					       (uint32_t)((ldelta + hdelta) >> 12), prot, 
 					       (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
@@ -3746,7 +3896,7 @@ handle_copy_delay:
 				assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12));
 				pmap_map_block(real_map->pmap, 
 					       (addr64_t)(vaddr - ldelta), 
-					       (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->shadow_offset)) +
+					       (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) +
 							  entry->offset + (laddr - entry->vme_start) - ldelta) >> 12),
 					       (uint32_t)((ldelta + hdelta) >> 12), prot, 
 					       (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
@@ -3888,6 +4038,8 @@ vm_fault_unwire(
 	fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
 	fault_info.no_cache = entry->no_cache;
 	fault_info.stealth = TRUE;
+	fault_info.io_sync = FALSE;
+	fault_info.cs_bypass = FALSE;
 	fault_info.mark_zf_absent = FALSE;
 
 	/*
@@ -3955,16 +4107,17 @@ vm_fault_unwire(
 
 			result_object = result_page->object;
 
-			if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) {
-				pmap_change_wiring(pmap, 
-						   pmap_addr + (va - entry->vme_start), FALSE);
-			}
 			if (deallocate) {
 				assert(result_page->phys_page !=
 				       vm_page_fictitious_addr);
 				pmap_disconnect(result_page->phys_page);
 				VM_PAGE_FREE(result_page);
 			} else {
+				if ((pmap) && (result_page->phys_page != vm_page_guard_addr))
+					pmap_change_wiring(pmap, 
+					    pmap_addr + (va - entry->vme_start), FALSE);
+
+
 				if (VM_PAGE_WIRED(result_page)) {
 					vm_page_lockspin_queues();
 					vm_page_unwire(result_page, TRUE);
@@ -4160,9 +4313,11 @@ vm_fault_wire_fast(
 			    pmap,
 			    pmap_addr,
 			    prot,
+			    prot,
 			    TRUE,
 			    FALSE,
 			    FALSE,
+			    FALSE,
 			    &type_of_fault);
 
 done:
@@ -4295,6 +4450,8 @@ vm_fault_copy(
 	fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left;
 	fault_info_src.no_cache   = FALSE;
 	fault_info_src.stealth = TRUE;
+	fault_info_src.io_sync = FALSE;
+	fault_info_src.cs_bypass = FALSE;
 	fault_info_src.mark_zf_absent = FALSE;
 
 	fault_info_dst.interruptible = interruptible;
@@ -4304,6 +4461,8 @@ vm_fault_copy(
 	fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left;
 	fault_info_dst.no_cache   = FALSE;
 	fault_info_dst.stealth = TRUE;
+	fault_info_dst.io_sync = FALSE;
+	fault_info_dst.cs_bypass = FALSE;
 	fault_info_dst.mark_zf_absent = FALSE;
 
 	do { /* while (amount_left > 0) */
@@ -4625,7 +4784,7 @@ vm_fault_classify(vm_object_t		object,
 				break;
 		        }
 
-			offset += object->shadow_offset;
+			offset += object->vo_shadow_offset;
 			object = object->shadow;
 			level++;
 			continue;
@@ -4786,6 +4945,14 @@ vm_page_validate_cs(
 		return;
 	}
 
+#if CHECK_CS_VALIDATION_BITMAP	
+	if ( vnode_pager_cs_check_validation_bitmap( page->object->pager, trunc_page(page->offset + page->object->paging_offset), CS_BITMAP_CHECK ) == KERN_SUCCESS) {
+		page->cs_validated = TRUE;
+		page->cs_tainted = FALSE;
+		vm_cs_bitmap_validated++;
+		return;
+	}
+#endif
 	vm_object_lock_assert_exclusive(page->object);
 
 	object = page->object;
@@ -4823,6 +4990,11 @@ vm_page_validate_cs(
 	/* validate the mapped page */
 	vm_page_validate_cs_mapped(page, (const void *) kaddr);
 
+#if CHECK_CS_VALIDATION_BITMAP	
+	if ( page->cs_validated == TRUE && page->cs_tainted == FALSE ) {
+		vnode_pager_cs_check_validation_bitmap( object->pager, trunc_page( offset + object->paging_offset), CS_BITMAP_SET );
+	}
+#endif
 	assert(page->busy);
 	assert(object == page->object);
 	vm_object_lock_assert_exclusive(object);
diff --git a/osfmk/vm/vm_fault.h b/osfmk/vm/vm_fault.h
index 855100338..6d90a84b0 100644
--- a/osfmk/vm/vm_fault.h
+++ b/osfmk/vm/vm_fault.h
@@ -79,7 +79,6 @@ typedef	kern_return_t	vm_fault_return_t;
 #define VM_FAULT_RETRY			1
 #define VM_FAULT_INTERRUPTED		2
 #define VM_FAULT_MEMORY_SHORTAGE 	3
-#define VM_FAULT_FICTITIOUS_SHORTAGE 	4
 #define VM_FAULT_MEMORY_ERROR		5
 #define VM_FAULT_SUCCESS_NO_VM_PAGE	6	/* success but no VM page */
 
@@ -160,9 +159,11 @@ extern kern_return_t vm_fault_enter(
 	pmap_t pmap,
 	vm_map_offset_t vaddr,
 	vm_prot_t prot,
+	vm_prot_t fault_type,
 	boolean_t wired,
 	boolean_t change_wiring,
 	boolean_t no_cache,
+	boolean_t cs_bypass,
 	int *type_of_fault);
 
 #endif	/* MACH_KERNEL_PRIVATE */
diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c
index 8180254b2..cf29c82f6 100644
--- a/osfmk/vm/vm_init.c
+++ b/osfmk/vm/vm_init.c
@@ -143,6 +143,9 @@ vm_mem_bootstrap(void)
 
 	if (zsize < ZONE_MAP_MIN)
 		zsize = ZONE_MAP_MIN;	/* Clamp to min */
+#if defined(__LP64__)
+	zsize += zsize >> 1;
+#endif  /* __LP64__ */
 	if (zsize > sane_size >> 1)
 		zsize = sane_size >> 1;	/* Clamp to half of RAM max */
 #if !__LP64__
diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c
index aa0dbafe2..acd7d2a82 100644
--- a/osfmk/vm/vm_kern.c
+++ b/osfmk/vm/vm_kern.c
@@ -422,7 +422,7 @@ kernel_memory_allocate(
 		mem->wpmapped = TRUE;
 
 		PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 
-			   VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE);
+			   VM_PROT_READ | VM_PROT_WRITE, 0, TRUE);
 
 		if (flags & KMA_NOENCRYPT) {
 			bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE);
@@ -550,9 +550,9 @@ kmem_realloc(
 	/* attempt is made to realloc a kmem_alloc'd area       */
 	vm_object_lock(object);
 	vm_map_unlock(map);
-	if (object->size != oldmapsize)
+	if (object->vo_size != oldmapsize)
 		panic("kmem_realloc");
-	object->size = newmapsize;
+	object->vo_size = newmapsize;
 	vm_object_unlock(object);
 
 	/* allocate the new pages while expanded portion of the */
@@ -574,7 +574,7 @@ kmem_realloc(
 				VM_PAGE_FREE(mem);
 			}
 		}
-		object->size = oldmapsize;
+		object->vo_size = oldmapsize;
 		vm_object_unlock(object);
 		vm_object_deallocate(object);
 		return kr;
@@ -598,7 +598,7 @@ kmem_realloc(
 				VM_PAGE_FREE(mem);
 			}
 		}
-		object->size = oldmapsize;
+		object->vo_size = oldmapsize;
 		vm_object_unlock(object);
 		vm_object_deallocate(object);
 		return (kr);
@@ -812,10 +812,7 @@ kmem_remap_pages(
 	    mem->pmapped = TRUE;
 	    mem->wpmapped = TRUE;
 
-	    PMAP_ENTER(kernel_pmap, map_start, mem, protection, 
-			((unsigned int)(mem->object->wimg_bits))
-					& VM_WIMG_MASK,
-			TRUE);
+	    PMAP_ENTER(kernel_pmap, map_start, mem, protection, 0, TRUE);
 
 	    map_start += PAGE_SIZE;
 	    offset += PAGE_SIZE;
@@ -892,7 +889,6 @@ kmem_suballoc(
 	return (KERN_SUCCESS);
 }
 
-
 /*
  *	kmem_init:
  *
@@ -910,25 +906,35 @@ kmem_init(
 	map_start = vm_map_trunc_page(start);
 	map_end = vm_map_round_page(end);
 
-	kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_ADDRESS,
+	kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS,
 			    map_end, FALSE);
 	/*
 	 *	Reserve virtual memory allocated up to this time.
 	 */
-	if (start != VM_MIN_KERNEL_ADDRESS) {
+	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
 		vm_map_offset_t map_addr;
+		kern_return_t kr;
  
-		map_addr = VM_MIN_KERNEL_ADDRESS;
-		(void) vm_map_enter(kernel_map,
-			    &map_addr, 
-			    (vm_map_size_t)(map_start - VM_MIN_KERNEL_ADDRESS),
-			    (vm_map_offset_t) 0,
-			    VM_FLAGS_ANYWHERE | VM_FLAGS_NO_PMAP_CHECK,
-			    VM_OBJECT_NULL, 
-			    (vm_object_offset_t) 0, FALSE,
-			    VM_PROT_NONE, VM_PROT_NONE,
-			    VM_INHERIT_DEFAULT);
+		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
+		kr = vm_map_enter(kernel_map,
+			&map_addr, 
+		    	(vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
+			(vm_map_offset_t) 0,
+			VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK,
+			VM_OBJECT_NULL, 
+			(vm_object_offset_t) 0, FALSE,
+			VM_PROT_NONE, VM_PROT_NONE,
+			VM_INHERIT_DEFAULT);
+		
+		if (kr != KERN_SUCCESS) {
+			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n",
+			      (uint64_t) start, (uint64_t) end,
+			      (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
+			      (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
+			      kr);
+		}	
 	}
+
 	/*
 	 * Set the default global user wire limit which limits the amount of
 	 * memory that can be locked via mlock().  We set this to the total
@@ -1057,7 +1063,7 @@ vm_conflict_check(
 		obj = entry->object.vm_object;
 		obj_off = (off - entry->vme_start) + entry->offset;
 		while(obj->shadow) {
-			obj_off += obj->shadow_offset;
+			obj_off += obj->vo_shadow_offset;
 			obj = obj->shadow;
 		}
 		if((obj->pager_created) && (obj->pager == pager)) {
diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c
index 68fece885..1fe35f53f 100644
--- a/osfmk/vm/vm_map.c
+++ b/osfmk/vm/vm_map.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -102,12 +102,9 @@
 #include <vm/vm_protos.h>
 #include <vm/vm_purgeable_internal.h>
 
-#ifdef ppc
-#include <ppc/mappings.h>
-#endif /* ppc */
-
 #include <vm/vm_protos.h>
 #include <vm/vm_shared_region.h>
+#include <vm/vm_map_store.h>
 
 /* Internal prototypes
  */
@@ -235,7 +232,8 @@ static kern_return_t	vm_map_copy_overwrite_nested(
 	vm_map_offset_t		   dst_addr,
 	vm_map_copy_t		   copy,
 	boolean_t		   interruptible,
-	pmap_t			   pmap);
+	pmap_t			   pmap,
+	boolean_t		   discard_on_success);
 
 static kern_return_t	vm_map_remap_extract(
 	vm_map_t		map,
@@ -290,6 +288,12 @@ static kern_return_t	vm_map_can_reuse(
 	vm_map_offset_t	start,
 	vm_map_offset_t	end);
 
+#if CONFIG_FREEZE
+struct default_freezer_table;
+__private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
+__private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);	
+#endif
+
 /*
  * Macros to copy a vm_map_entry. We must be careful to correctly
  * manage the wired page count. vm_map_entry_copy() creates a new
@@ -334,7 +338,12 @@ MACRO_END
  * 	execute from a page that lacks execute permission.
  *
  *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
- *	default behavior for both 32 and 64 bit apps on a system-wide basis.
+ *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
+ *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
+ *	execution from data areas for a particular binary even if the arch normally permits it. As
+ *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
+ *	to support some complicated use cases, notably browsers with out-of-process plugins that
+ *	are not all NX-safe.
  */
 
 extern int allow_data_exec, allow_stack_exec;
@@ -361,7 +370,7 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 	if (user_tag == VM_MEMORY_STACK)
 		return allow_stack_exec & current_abi;
 
-	return allow_data_exec & current_abi;
+	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 }
 
 
@@ -428,12 +437,57 @@ static void		*kentry_data;
 static vm_size_t	kentry_data_size;
 static int		kentry_count = 2048;		/* to init kentry_data_size */
 
+#if CONFIG_EMBEDDED
+#define		NO_COALESCE_LIMIT  0
+#else
 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
-
+#endif
 
 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 unsigned int not_in_kdp = 1;
 
+unsigned int vm_map_set_cache_attr_count = 0;
+
+kern_return_t
+vm_map_set_cache_attr(
+	vm_map_t	map,
+	vm_map_offset_t	va)
+{
+	vm_map_entry_t	map_entry;
+	vm_object_t	object;
+	kern_return_t	kr = KERN_SUCCESS;
+
+	vm_map_lock_read(map);
+
+	if (!vm_map_lookup_entry(map, va, &map_entry) ||
+	    map_entry->is_sub_map) {
+		/*
+		 * that memory is not properly mapped
+		 */
+		kr = KERN_INVALID_ARGUMENT;
+		goto done;
+	}
+	object = map_entry->object.vm_object;
+
+	if (object == VM_OBJECT_NULL) {
+		/*
+		 * there should be a VM object here at this point
+		 */
+		kr = KERN_INVALID_ARGUMENT;
+		goto done;
+	}
+	vm_object_lock(object);
+	object->set_cache_attr = TRUE;
+	vm_object_unlock(object);
+
+	vm_map_set_cache_attr_count++;
+done:
+	vm_map_unlock_read(map);
+
+	return kr;
+}
+
+
 #if CONFIG_CODE_DECRYPTION
 /*
  * vm_map_apple_protected:
@@ -565,7 +619,6 @@ vm_map_init(
 			    PAGE_SIZE, "maps");
 	zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 
-
 	vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 				  1024*1024, PAGE_SIZE*5,
 				  "non-kernel map entries");
@@ -588,6 +641,9 @@ vm_map_init(
 	zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 	zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 	zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
+	zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
+	zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
+
 	zcram(vm_map_zone, map_data, map_data_size);
 	zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 	
@@ -646,6 +702,8 @@ vm_map_create(
 	result->hdr.nentries = 0;
 	result->hdr.entries_pageable = pageable;
 
+	vm_map_store_init( &(result->hdr) );
+	
 	result->size = 0;
 	result->user_wire_limit = MACH_VM_MAX_ADDRESS;	/* default limit is unlimited */
 	result->user_wire_size  = 0;
@@ -662,9 +720,16 @@ vm_map_create(
 	result->mapped = FALSE;
 	result->wait_for_space = FALSE;
 	result->switch_protect = FALSE;
+	result->disable_vmentry_reuse = FALSE;
+	result->map_disallow_data_exec = FALSE;
+	result->highest_entry_end = 0;
 	result->first_free = vm_map_to_entry(result);
 	result->hint = vm_map_to_entry(result);
 	result->color_rr = (color_seed++) & vm_color_mask;
+ 	result->jit_entry_exists = FALSE;
+#if CONFIG_FREEZE
+	result->default_freezer_toc = NULL;
+#endif
 	vm_map_lock_init(result);
 	lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 	
@@ -698,6 +763,7 @@ _vm_map_entry_create(
 	entry = (vm_map_entry_t) zalloc(zone);
 	if (entry == VM_MAP_ENTRY_NULL)
 		panic("vm_map_entry_create");
+	vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 
 	return(entry);
 }
@@ -712,13 +778,8 @@ _vm_map_entry_create(
  * 	of the stores
  */
 #define	vm_map_entry_dispose(map, entry)			\
-	MACRO_BEGIN						\
-	if((entry) == (map)->first_free)			\
-		(map)->first_free = vm_map_to_entry(map);	\
-	if((entry) == (map)->hint)				\
-		(map)->hint = vm_map_to_entry(map);		\
-	_vm_map_entry_dispose(&(map)->hdr, (entry));		\
-	MACRO_END
+	vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);	\
+	_vm_map_entry_dispose(&(map)->hdr, (entry))
 
 #define	vm_map_copy_entry_dispose(map, entry) \
 	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
@@ -739,116 +800,24 @@ _vm_map_entry_dispose(
 }
 
 #if MACH_ASSERT
-static boolean_t first_free_is_valid(vm_map_t map);	/* forward */
 static boolean_t first_free_check = FALSE;
-static boolean_t
+boolean_t
 first_free_is_valid(
 	vm_map_t	map)
 {
-	vm_map_entry_t	entry, next;
-
 	if (!first_free_check)
 		return TRUE;
 	
-	entry = vm_map_to_entry(map);
-	next = entry->vme_next;
-	while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
-	       (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
-		next != vm_map_to_entry(map))) {
-		entry = next;
-		next = entry->vme_next;
-		if (entry == vm_map_to_entry(map))
-			break;
-	}
-	if (map->first_free != entry) {
-		printf("Bad first_free for map %p: %p should be %p\n",
-		       map, map->first_free, entry);
-		return FALSE;
-	}
-	return TRUE;
+	return( first_free_is_valid_store( map ));	
 }
 #endif /* MACH_ASSERT */
 
-/*
- *	UPDATE_FIRST_FREE:
- *
- *	Updates the map->first_free pointer to the
- *	entry immediately before the first hole in the map.
- * 	The map should be locked.
- */
-#define UPDATE_FIRST_FREE(map, new_first_free) 				\
-	MACRO_BEGIN							\
-	vm_map_t	UFF_map; 					\
-	vm_map_entry_t	UFF_first_free; 				\
-	vm_map_entry_t	UFF_next_entry; 				\
-	UFF_map = (map); 						\
-	UFF_first_free = (new_first_free);				\
-	UFF_next_entry = UFF_first_free->vme_next; 			\
-	while (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
-	       vm_map_trunc_page(UFF_first_free->vme_end) || 			\
-	       (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
-		vm_map_trunc_page(UFF_first_free->vme_start) &&		\
-		UFF_next_entry != vm_map_to_entry(UFF_map))) { 		\
-		UFF_first_free = UFF_next_entry; 			\
-		UFF_next_entry = UFF_first_free->vme_next; 		\
-		if (UFF_first_free == vm_map_to_entry(UFF_map)) 	\
-			break; 						\
-	} 								\
-	UFF_map->first_free = UFF_first_free; 				\
-	assert(first_free_is_valid(UFF_map));				\
-	MACRO_END
-
-/*
- *	vm_map_entry_{un,}link:
- *
- *	Insert/remove entries from maps (or map copies).
- */
-#define vm_map_entry_link(map, after_where, entry)			\
-	MACRO_BEGIN							\
-	vm_map_t VMEL_map; 						\
-	vm_map_entry_t VMEL_entry; 					\
-	VMEL_map = (map);						\
-	VMEL_entry = (entry); 						\
-	_vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); 	\
-	UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); 		\
-	MACRO_END
-
 
 #define vm_map_copy_entry_link(copy, after_where, entry)		\
-	_vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
-
-#define _vm_map_entry_link(hdr, after_where, entry)			\
-	MACRO_BEGIN							\
-	(hdr)->nentries++;						\
-	(entry)->vme_prev = (after_where);				\
-	(entry)->vme_next = (after_where)->vme_next;			\
-	(entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
-	MACRO_END
-
-#define vm_map_entry_unlink(map, entry)					\
-	MACRO_BEGIN							\
-	vm_map_t VMEU_map; 						\
-	vm_map_entry_t VMEU_entry; 					\
-	vm_map_entry_t VMEU_first_free;					\
-	VMEU_map = (map); 						\
-	VMEU_entry = (entry); 						\
-	if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start)	\
-		VMEU_first_free = VMEU_entry->vme_prev;			\
-	else								\
-		VMEU_first_free = VMEU_map->first_free;			\
-	_vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); 		\
-	UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free);			\
-	MACRO_END
+	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 
 #define vm_map_copy_entry_unlink(copy, entry)				\
-	_vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
-
-#define _vm_map_entry_unlink(hdr, entry)				\
-	MACRO_BEGIN							\
-	(hdr)->nentries--;						\
-	(entry)->vme_next->vme_prev = (entry)->vme_prev; 		\
-	(entry)->vme_prev->vme_next = (entry)->vme_next; 		\
-	MACRO_END
+	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 
 #if	MACH_ASSERT && TASK_SWAPPER
 /*
@@ -935,22 +904,14 @@ vm_map_destroy(
 	(void) vm_map_delete(map, map->min_offset, map->max_offset,
 			     flags, VM_MAP_NULL);
 	/* clean up leftover special mappings (commpage, etc...) */
-#ifdef __ppc__
-	/*
-	 * PPC51: ppc64 is limited to 51-bit addresses.
-	 * Memory beyond this 51-bit limit is mapped specially at the
-	 * pmap level, so do not interfere.
-	 * On PPC64, the commpage is mapped beyond the addressable range
-	 * via a special pmap hack, so ask pmap to clean it explicitly...
-	 */
-	if (map->pmap) {
-		pmap_unmap_sharedpage(map->pmap);
-	}
-	/* ... and do not let regular pmap cleanup apply here */
-	flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
-#endif /* __ppc__ */
 	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 			     flags, VM_MAP_NULL);
+
+#if CONFIG_FREEZE
+	if (map->default_freezer_toc){
+		default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
+	}
+#endif
 	vm_map_unlock(map);
 
 	assert(map->hdr.nentries == 0);
@@ -1144,42 +1105,12 @@ void vm_map_swapout(vm_map_t map)
 
 #endif	/* TASK_SWAPPER */
 
-
-/*
- *	SAVE_HINT_MAP_READ:
- *
- *	Saves the specified entry as the hint for
- *	future lookups.  only a read lock is held on map,
- * 	so make sure the store is atomic... OSCompareAndSwap
- *	guarantees this... also, we don't care if we collide
- *	and someone else wins and stores their 'hint'
- */
-#define	SAVE_HINT_MAP_READ(map,value) \
-	MACRO_BEGIN							\
-	OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
-	MACRO_END
-
-
-/*
- *	SAVE_HINT_MAP_WRITE:
- *
- *	Saves the specified entry as the hint for
- *	future lookups.  write lock held on map,
- * 	so no one else can be writing or looking
- * 	until the lock is dropped, so it's safe
- * 	to just do an assignment
- */
-#define	SAVE_HINT_MAP_WRITE(map,value) \
-	MACRO_BEGIN		       \
-	(map)->hint = (value);	       \
-	MACRO_END
-
 /*
  *	vm_map_lookup_entry:	[ internal use only ]
  *
- *	Finds the map entry containing (or
- *	immediately preceding) the specified address
- *	in the given map; the entry is returned
+ *	Calls into the vm map store layer to find the map 
+ *	entry containing (or immediately preceding) the 
+ *	specified address in the given map; the entry is returned
  *	in the "entry" parameter.  The boolean
  *	result indicates whether the address is
  *	actually contained in the map.
@@ -1190,69 +1121,7 @@ vm_map_lookup_entry(
 	register vm_map_offset_t	address,
 	vm_map_entry_t		*entry)		/* OUT */
 {
-	register vm_map_entry_t		cur;
-	register vm_map_entry_t		last;
-
-	/*
-	 *	Start looking either from the head of the
-	 *	list, or from the hint.
-	 */
-	cur = map->hint;
-
-	if (cur == vm_map_to_entry(map))
-		cur = cur->vme_next;
-
-	if (address >= cur->vme_start) {
-		/*
-		 *	Go from hint to end of list.
-		 *
-		 *	But first, make a quick check to see if
-		 *	we are already looking at the entry we
-		 *	want (which is usually the case).
-		 *	Note also that we don't need to save the hint
-		 *	here... it is the same hint (unless we are
-		 *	at the header, in which case the hint didn't
-		 *	buy us anything anyway).
-		 */
-		last = vm_map_to_entry(map);
-		if ((cur != last) && (cur->vme_end > address)) {
-			*entry = cur;
-			return(TRUE);
-		}
-	}
-	else {
-		/*
-		 *	Go from start to hint, *inclusively*
-		 */
-		last = cur->vme_next;
-		cur = vm_map_first_entry(map);
-	}
-
-	/*
-	 *	Search linearly
-	 */
-
-	while (cur != last) {
-		if (cur->vme_end > address) {
-			if (address >= cur->vme_start) {
-				/*
-				 *	Save this lookup for future
-				 *	hints, and return
-				 */
-
-				*entry = cur;
-				SAVE_HINT_MAP_READ(map, cur);
-
-				return(TRUE);
-			}
-			break;
-		}
-		cur = cur->vme_next;
-	}
-	*entry = cur->vme_prev;
-	SAVE_HINT_MAP_READ(map, *entry);
-
-	return(FALSE);
+	return ( vm_map_store_lookup_entry( map, address, entry ));
 }
 
 /*
@@ -1300,11 +1169,15 @@ vm_map_find_space(
 
 	vm_map_lock(map);
 
-	assert(first_free_is_valid(map));
-	if ((entry = map->first_free) == vm_map_to_entry(map))
-		start = map->min_offset;
-	else
-		start = entry->vme_end;
+	if( map->disable_vmentry_reuse == TRUE) {
+		VM_MAP_HIGHEST_ENTRY(map, entry, start);
+	} else {
+		assert(first_free_is_valid(map));
+		if ((entry = map->first_free) == vm_map_to_entry(map))
+			start = map->min_offset;
+		else
+			start = entry->vme_end;
+	}
 
 	/*
 	 *	In any case, the "entry" always precedes
@@ -1415,7 +1288,7 @@ vm_map_find_space(
 	 *	Insert the new entry into the list
 	 */
 
-	vm_map_entry_link(map, entry, new_entry);
+	vm_map_store_entry_link(map, entry, new_entry);
 
 	map->size += size;
 
@@ -1484,8 +1357,8 @@ vm_map_pmap_enter(
 			       map, (unsigned long long)addr, object, (unsigned long long)offset);
 		}
 		type_of_fault = DBG_CACHE_HIT_FAULT;
-		kr = vm_fault_enter(m, map->pmap, addr, protection,
-				    VM_PAGE_WIRED(m), FALSE, FALSE,
+		kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
+				    VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
 				    &type_of_fault);
 
 		vm_object_unlock(object);
@@ -1589,6 +1462,9 @@ vm_map_enter(
 			 * with a lookup of the size depending on superpage_size.
 			 */
 #ifdef __x86_64__
+			case SUPERPAGE_SIZE_ANY:
+				/* handle it like 2 MB and round up to page size */
+				size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
 			case SUPERPAGE_SIZE_2MB:
 				break;
 #endif
@@ -1601,9 +1477,10 @@ vm_map_enter(
 		inheritance = VM_INHERIT_NONE;	/* fork() children won't inherit superpages */
 	}
 
+
 #if CONFIG_EMBEDDED
-	if (cur_protection & VM_PROT_WRITE) {
-		if (cur_protection & VM_PROT_EXECUTE) {
+	if (cur_protection & VM_PROT_WRITE){
+		if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
 			printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
 			cur_protection &= ~VM_PROT_EXECUTE;
 		}
@@ -1634,14 +1511,7 @@ vm_map_enter(
 		}
 	}
 
-	if (flags & VM_FLAGS_BELOW_MIN) {
-		/*
-		 * Allow an insertion below the map's min offset.
-		 */
-		effective_min_offset = 0ULL;
-	} else {
-		effective_min_offset = map->min_offset;
-	}
+	effective_min_offset = map->min_offset;
 
 	if (flags & VM_FLAGS_BEYOND_MAX) {
 		/*
@@ -1675,7 +1545,7 @@ vm_map_enter(
 	if (purgable &&
 	    (offset != 0 ||
 	     (object != VM_OBJECT_NULL &&
-	      (object->size != size ||
+	      (object->vo_size != size ||
 	       object->purgable == VM_PURGABLE_DENY))
 	     || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
 		return KERN_INVALID_ARGUMENT;
@@ -1703,6 +1573,11 @@ StartAgain: ;
 	if (anywhere) {
 		vm_map_lock(map);
 		map_locked = TRUE;
+		
+		if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
+			result = KERN_INVALID_ARGUMENT;
+			goto BailOut;
+		}
 
 		/*
 		 *	Calculate the first possible address.
@@ -1719,15 +1594,39 @@ StartAgain: ;
 		 *	address, we have to start after it.
 		 */
 
-		assert(first_free_is_valid(map));
-		if (start == effective_min_offset) {
-			if ((entry = map->first_free) != vm_map_to_entry(map))
-				start = entry->vme_end;
+		if( map->disable_vmentry_reuse == TRUE) {
+			VM_MAP_HIGHEST_ENTRY(map, entry, start);
 		} else {
-			vm_map_entry_t	tmp_entry;
-			if (vm_map_lookup_entry(map, start, &tmp_entry))
-				start = tmp_entry->vme_end;
-			entry = tmp_entry;
+			assert(first_free_is_valid(map));
+
+			entry = map->first_free;
+
+			if (entry == vm_map_to_entry(map)) {
+				entry = NULL;
+			} else {
+			       if (entry->vme_next == vm_map_to_entry(map)){
+				       /*
+					* Hole at the end of the map.
+					*/
+					entry = NULL;
+			       } else {
+					if (start < (entry->vme_next)->vme_start ) {
+						start = entry->vme_end;
+					} else {
+						/*
+						 * Need to do a lookup.
+						 */
+						entry = NULL;
+					}
+			       }
+			}
+
+			if (entry == NULL) {
+				vm_map_entry_t	tmp_entry;
+				if (vm_map_lookup_entry(map, start, &tmp_entry))
+					start = tmp_entry->vme_end;
+				entry = tmp_entry;
+			}
 		}
 
 		/*
@@ -1944,7 +1843,7 @@ StartAgain: ;
 		   (entry->vme_end == start) &&
 		   (!entry->is_shared) &&
 		   (!entry->is_sub_map) &&
-		   (entry->alias == alias) &&
+		   ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
 		   (entry->inheritance == inheritance) &&
 		   (entry->protection == cur_protection) &&
 		   (entry->max_protection == max_protection) &&
@@ -1970,7 +1869,7 @@ StartAgain: ;
 			 */
 			map->size += (end - entry->vme_end);
 			entry->vme_end = end;
-			UPDATE_FIRST_FREE(map, map->first_free);
+			vm_map_store_update_first_free(map, map->first_free);
 			RETURN(KERN_SUCCESS);
 		}
 	}
@@ -2009,9 +1908,17 @@ StartAgain: ;
 							FALSE, FALSE,
 							cur_protection, max_protection,
 							VM_BEHAVIOR_DEFAULT,
-							inheritance, 0, no_cache,
+							(flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance, 
+							0, no_cache,
 							permanent, superpage_size);
 			new_entry->alias = alias;
+			if (flags & VM_FLAGS_MAP_JIT){
+				if (!(map->jit_entry_exists)){
+					new_entry->used_for_jit = TRUE;
+					map->jit_entry_exists = TRUE;
+				}
+			}
+
 			if (is_submap) {
 				vm_map_t	submap;
 				boolean_t	submap_is_64bit;
@@ -2068,7 +1975,7 @@ StartAgain: ;
 				/* create one vm_object per superpage */
 				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
 				sp_object->phys_contiguous = TRUE;
-				sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
+				sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
 				entry->object.vm_object = sp_object;
 
 				/* enter the base pages into the object */
@@ -2225,10 +2132,10 @@ BailOut: ;
 
 					entry_size = (entry2->vme_end -
 						      entry2->vme_start);
-					vm_map_entry_unlink(zap_old_map,
+					vm_map_store_entry_unlink(zap_old_map,
 							    entry2);
 					zap_old_map->size -= entry_size;
-					vm_map_entry_link(map, entry1, entry2);
+					vm_map_store_entry_link(map, entry1, entry2);
 					map->size += entry_size;
 					entry1 = entry2;
 				}
@@ -2284,6 +2191,12 @@ vm_map_enter_mem_object(
 	vm_object_t		object;
 	vm_object_size_t	size;
 	kern_return_t		result;
+	boolean_t		mask_cur_protection, mask_max_protection;
+
+	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
+	mask_max_protection = max_protection & VM_PROT_IS_MASK;
+	cur_protection &= ~VM_PROT_IS_MASK;
+	max_protection &= ~VM_PROT_IS_MASK;
 
 	/*
 	 * Check arguments for validity
@@ -2294,7 +2207,7 @@ vm_map_enter_mem_object(
 	    (inheritance > VM_INHERIT_LAST_VALID) ||
 	    initial_size == 0)
 		return KERN_INVALID_ARGUMENT;
-
+	
 	map_addr = vm_map_trunc_page(*address);
 	map_size = vm_map_round_page(initial_size);
 	size = vm_object_round_page(initial_size);	
@@ -2316,6 +2229,12 @@ vm_map_enter_mem_object(
 				return KERN_INVALID_RIGHT;
 			size = named_entry->size - offset;
 		}
+		if (mask_max_protection) {
+			max_protection &= named_entry->protection;
+		}
+		if (mask_cur_protection) {
+			cur_protection &= named_entry->protection;
+		}
 		if ((named_entry->protection & max_protection) !=
 		    max_protection)
 			return KERN_INVALID_RIGHT;
@@ -2379,7 +2298,6 @@ vm_map_enter_mem_object(
 			unsigned int	access;
 			vm_prot_t	protections;
 			unsigned int	wimg_mode;
-			boolean_t	cache_attr;
 
 			protections = named_entry->protection & VM_PROT_ALL;
 			access = GET_MAP_MEM(named_entry->protection);
@@ -2404,6 +2322,7 @@ vm_map_enter_mem_object(
 			named_entry_unlock(named_entry);
 
 			wimg_mode = object->wimg_bits;
+
 			if (access == MAP_MEM_IO) {
 				wimg_mode = VM_WIMG_IO;
 			} else if (access == MAP_MEM_COPYBACK) {
@@ -2413,11 +2332,6 @@ vm_map_enter_mem_object(
 			} else if (access == MAP_MEM_WCOMB) {
 				wimg_mode = VM_WIMG_WCOMB;
 			}
-			if (wimg_mode == VM_WIMG_IO ||
-			    wimg_mode == VM_WIMG_WCOMB)
-				cache_attr = TRUE;
-			else 
-				cache_attr = FALSE;
 
 			/* wait for object (if any) to be ready */
 			if (!named_entry->internal) {
@@ -2430,22 +2344,11 @@ vm_map_enter_mem_object(
 				}
 			}
 
-			if (object->wimg_bits != wimg_mode) {
-				vm_page_t p;
+			if (object->wimg_bits != wimg_mode)
+				vm_object_change_wimg_mode(object, wimg_mode);
 
-				vm_object_paging_wait(object, THREAD_UNINT);
-
-				object->wimg_bits = wimg_mode;
-				queue_iterate(&object->memq, p, vm_page_t, listq) {
-					if (!p->fictitious) {
-					        if (p->pmapped)
-						        pmap_disconnect(p->phys_page);
-						if (cache_attr)
-						        pmap_sync_page_attributes_phys(p->phys_page);
-					}
-				}
-			}
 			object->true_share = TRUE;
+
 			if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
 				object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
 			vm_object_unlock(object);
@@ -2926,8 +2829,8 @@ vm_map_enter_cpm(
 
 		type_of_fault = DBG_ZERO_FILL_FAULT;
 
-		vm_fault_enter(m, pmap, va, VM_PROT_ALL,
-			       VM_PAGE_WIRED(m), FALSE, FALSE,
+		vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
+			       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
 			       &type_of_fault);
 
 		vm_object_unlock(cpm_obj);
@@ -3037,13 +2940,13 @@ vm_map_clip_unnest(
 		_vm_map_clip_start(&map->hdr,
 				   entry,
 				   start_unnest);
-		UPDATE_FIRST_FREE(map, map->first_free);
+		vm_map_store_update_first_free(map, map->first_free);
 	}
 	if (entry->vme_end > end_unnest) {
 		_vm_map_clip_end(&map->hdr,
 				 entry,
 				 end_unnest);
-		UPDATE_FIRST_FREE(map, map->first_free);
+		vm_map_store_update_first_free(map, map->first_free);
 	}
 
 	pmap_unnest(map->pmap,
@@ -3100,7 +3003,7 @@ vm_map_clip_start(
 				    (addr64_t)(entry->vme_end));
 		}
 		_vm_map_clip_start(&map->hdr, entry, startaddr);
-		UPDATE_FIRST_FREE(map, map->first_free);
+		vm_map_store_update_first_free(map, map->first_free);
 	}
 }
 
@@ -3138,7 +3041,7 @@ _vm_map_clip_start(
 	entry->offset += (start - entry->vme_start);
 	entry->vme_start = start;
 
-	_vm_map_entry_link(map_header, entry->vme_prev, new_entry);
+	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
 
 	if (entry->is_sub_map)
 		vm_map_reference(new_entry->object.sub_map);
@@ -3194,7 +3097,7 @@ vm_map_clip_end(
 				    (addr64_t)(entry->vme_end));
 		}
 		_vm_map_clip_end(&map->hdr, entry, endaddr);
-		UPDATE_FIRST_FREE(map, map->first_free);
+		vm_map_store_update_first_free(map, map->first_free);
 	}
 }
 
@@ -3228,7 +3131,7 @@ _vm_map_clip_end(
 	new_entry->vme_start = entry->vme_end = end;
 	new_entry->offset += (end - entry->vme_start);
 
-	_vm_map_entry_link(map_header, entry, new_entry);
+	_vm_map_store_entry_link(map_header, entry, new_entry);
 
 	if (entry->is_sub_map)
 		vm_map_reference(new_entry->object.sub_map);
@@ -3497,7 +3400,7 @@ vm_map_protect(
 
 #if CONFIG_EMBEDDED
 		if (new_prot & VM_PROT_WRITE) {
-			if (new_prot & VM_PROT_EXECUTE) {
+			if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
 				printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
 				new_prot &= ~VM_PROT_EXECUTE;
 			}
@@ -3541,6 +3444,11 @@ vm_map_protect(
 			/* will include write.  Caller must be prepared   */
 			/* for loss of shared memory communication in the */
 			/* target area after taking this step */
+
+			if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
+				current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
+				current->offset = 0;
+			}
 			current->needs_copy = TRUE;
 			current->max_protection |= VM_PROT_WRITE;
 		}
@@ -3676,6 +3584,7 @@ add_wire_counts(
 	vm_map_size_t	size;
 
 	if (user_wire) {
+		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
 
 		/*
 		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
@@ -3694,8 +3603,8 @@ add_wire_counts(
 			 */
 
 			if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
-			   size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit ||
-		    	   size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount)
+			   size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
+		    	   size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
 				return KERN_RESOURCE_SHORTAGE;
 
 			/*
@@ -3907,9 +3816,6 @@ vm_map_wire_nested(
 			 * Worse that can happen is, it may not exist anymore.
 			 */
 			if (!vm_map_lookup_entry(map, s, &first_entry)) {
-				if (!user_wire)
-					panic("vm_map_wire: re-lookup failed");
-
 				/*
 				 * User: undo everything upto the previous
 				 * entry.  let vm_map_unwire worry about
@@ -4269,25 +4175,8 @@ vm_map_wire(
 
 	kern_return_t	kret;
 
-#ifdef ppc
-        /*
-	 * the calls to mapping_prealloc and mapping_relpre
-	 * (along with the VM_MAP_RANGE_CHECK to insure a
-	 * resonable range was passed in) are
-	 * currently necessary because
-	 * we haven't enabled kernel pre-emption
-	 * and/or the pmap_enter cannot purge and re-use
-	 * existing mappings
-	 */
-	VM_MAP_RANGE_CHECK(map, start, end);
-	assert((unsigned int) (end - start) == (end - start));
-	mapping_prealloc((unsigned int) (end - start));
-#endif
 	kret = vm_map_wire_nested(map, start, end, access_type, 
 				  user_wire, (pmap_t)NULL, 0);
-#ifdef ppc
-	mapping_relpre();
-#endif
 	return kret;
 }
 
@@ -4677,7 +4566,7 @@ vm_map_entry_delete(
 		object = entry->object.vm_object;
 	}
 
-	vm_map_entry_unlink(map, entry);
+	vm_map_store_entry_unlink(map, entry);
 	map->size -= e - s;
 
 	vm_map_entry_dispose(map, entry);
@@ -4735,7 +4624,7 @@ vm_map_submap_pmap_clean(
 			   && (entry->object.vm_object != NULL)) {
 				vm_object_pmap_protect(
 					entry->object.vm_object,
-					entry->offset,
+					entry->offset+(offset-entry->vme_start),
 					remove_size,
 					PMAP_NULL,
 					entry->vme_start,
@@ -5192,9 +5081,9 @@ vm_map_delete(
 			 * these entries.
 			 */
 			/* unlink the entry from "map" ... */
-			vm_map_entry_unlink(map, entry);
+			vm_map_store_entry_unlink(map, entry);
 			/* ... and add it to the end of the "zap_map" */
-			vm_map_entry_link(zap_map,
+			vm_map_store_entry_link(zap_map,
 					  vm_map_last_entry(zap_map),
 					  entry);
 			entry_size = entry->vme_end - entry->vme_start;
@@ -5569,7 +5458,8 @@ vm_map_copy_overwrite_nested(
 	vm_map_address_t	dst_addr,
 	vm_map_copy_t		copy,
 	boolean_t		interruptible,
-	pmap_t			pmap)
+	pmap_t			pmap,
+	boolean_t		discard_on_success)
 {
 	vm_map_offset_t		dst_end;
 	vm_map_entry_t		tmp_entry;
@@ -5609,7 +5499,8 @@ vm_map_copy_overwrite_nested(
 	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
 
 	if (copy->size == 0) {
-		vm_map_copy_discard(copy);
+		if (discard_on_success)
+			vm_map_copy_discard(copy);
 		return(KERN_SUCCESS);
 	}
 
@@ -5942,20 +5833,23 @@ start_overwrite:
 						sub_start,
 						copy,
 						interruptible, 
-						entry->object.sub_map->pmap);
+						entry->object.sub_map->pmap,
+						TRUE);
 				} else if (pmap != NULL) {
 					kr = vm_map_copy_overwrite_nested(
 						entry->object.sub_map,
 						sub_start,
 						copy,
-						interruptible, pmap);
+						interruptible, pmap,
+						TRUE);
 				} else {
 					kr = vm_map_copy_overwrite_nested(
 						entry->object.sub_map,
 						sub_start,
 						copy,
 						interruptible,
-						dst_map->pmap);
+						dst_map->pmap,
+						TRUE);
 				}
 				if(kr != KERN_SUCCESS) {
 					if(next_copy != NULL) {
@@ -6148,7 +6042,8 @@ start_overwrite:
 	/*
 	 *	Throw away the vm_map_copy object
 	 */
-	vm_map_copy_discard(copy);
+	if (discard_on_success)
+		vm_map_copy_discard(copy);
 
 	return(KERN_SUCCESS);
 }/* vm_map_copy_overwrite */
@@ -6160,8 +6055,236 @@ vm_map_copy_overwrite(
 	vm_map_copy_t	copy,
 	boolean_t	interruptible)
 {
-	return vm_map_copy_overwrite_nested(
-		dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
+	vm_map_size_t	head_size, tail_size;
+	vm_map_copy_t	head_copy, tail_copy;
+	vm_map_offset_t	head_addr, tail_addr;
+	vm_map_entry_t	entry;
+	kern_return_t	kr;
+
+	head_size = 0;
+	tail_size = 0;
+	head_copy = NULL;
+	tail_copy = NULL;
+	head_addr = 0;
+	tail_addr = 0;
+
+	if (interruptible ||
+	    copy == VM_MAP_COPY_NULL ||
+	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
+		/*
+		 * We can't split the "copy" map if we're interruptible
+		 * or if we don't have a "copy" map...
+		 */
+	blunt_copy:
+		return vm_map_copy_overwrite_nested(dst_map,
+						    dst_addr,
+						    copy,
+						    interruptible,
+						    (pmap_t) NULL,
+						    TRUE);
+	}
+
+	if (copy->size < 3 * PAGE_SIZE) {
+		/*
+		 * Too small to bother with optimizing...
+		 */
+		goto blunt_copy;
+	}
+
+	if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
+		/*
+		 * Incompatible mis-alignment of source and destination...
+		 */
+		goto blunt_copy;
+	}
+
+	/*
+	 * Proper alignment or identical mis-alignment at the beginning.
+	 * Let's try and do a small unaligned copy first (if needed)
+	 * and then an aligned copy for the rest.
+	 */
+	if (!page_aligned(dst_addr)) {
+		head_addr = dst_addr;
+		head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
+	}
+	if (!page_aligned(copy->offset + copy->size)) {
+		/*
+		 * Mis-alignment at the end.
+		 * Do an aligned copy up to the last page and
+		 * then an unaligned copy for the remaining bytes.
+		 */
+		tail_size = (copy->offset + copy->size) & PAGE_MASK;
+		tail_addr = dst_addr + copy->size - tail_size;
+	}
+
+	if (head_size + tail_size == copy->size) {
+		/*
+		 * It's all unaligned, no optimization possible...
+		 */
+		goto blunt_copy;
+	}
+
+	/*
+	 * Can't optimize if there are any submaps in the
+	 * destination due to the way we free the "copy" map
+	 * progressively in vm_map_copy_overwrite_nested()
+	 * in that case.
+	 */
+	vm_map_lock_read(dst_map);
+	if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
+		vm_map_unlock_read(dst_map);
+		goto blunt_copy;
+	}
+	for (;
+	     (entry != vm_map_copy_to_entry(copy) &&
+	      entry->vme_start < dst_addr + copy->size);
+	     entry = entry->vme_next) {
+		if (entry->is_sub_map) {
+			vm_map_unlock_read(dst_map);
+			goto blunt_copy;
+		}
+	}
+	vm_map_unlock_read(dst_map);
+
+	if (head_size) {
+		/*
+		 * Unaligned copy of the first "head_size" bytes, to reach
+		 * a page boundary.
+		 */
+		
+		/*
+		 * Extract "head_copy" out of "copy".
+		 */
+		head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+		vm_map_copy_first_entry(head_copy) =
+			vm_map_copy_to_entry(head_copy);
+		vm_map_copy_last_entry(head_copy) =
+			vm_map_copy_to_entry(head_copy);
+		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
+		head_copy->cpy_hdr.nentries = 0;
+		head_copy->cpy_hdr.entries_pageable =
+			copy->cpy_hdr.entries_pageable;
+		vm_map_store_init(&head_copy->cpy_hdr);
+
+		head_copy->offset = copy->offset;
+		head_copy->size = head_size;
+
+		copy->offset += head_size;
+		copy->size -= head_size;
+
+		entry = vm_map_copy_first_entry(copy);
+		vm_map_copy_clip_end(copy, entry, copy->offset);
+		vm_map_copy_entry_unlink(copy, entry);
+		vm_map_copy_entry_link(head_copy,
+				       vm_map_copy_to_entry(head_copy),
+				       entry);
+
+		/*
+		 * Do the unaligned copy.
+		 */
+		kr = vm_map_copy_overwrite_nested(dst_map,
+						  head_addr,
+						  head_copy,
+						  interruptible,
+						  (pmap_t) NULL,
+						  FALSE);
+		if (kr != KERN_SUCCESS)
+			goto done;
+	}
+
+	if (tail_size) {
+		/*
+		 * Extract "tail_copy" out of "copy".
+		 */
+		tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+		vm_map_copy_first_entry(tail_copy) =
+			vm_map_copy_to_entry(tail_copy);
+		vm_map_copy_last_entry(tail_copy) =
+			vm_map_copy_to_entry(tail_copy);
+		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
+		tail_copy->cpy_hdr.nentries = 0;
+		tail_copy->cpy_hdr.entries_pageable =
+			copy->cpy_hdr.entries_pageable;
+		vm_map_store_init(&tail_copy->cpy_hdr);
+
+		tail_copy->offset = copy->offset + copy->size - tail_size;
+		tail_copy->size = tail_size;
+
+		copy->size -= tail_size;
+
+		entry = vm_map_copy_last_entry(copy);
+		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
+		entry = vm_map_copy_last_entry(copy);
+		vm_map_copy_entry_unlink(copy, entry);
+		vm_map_copy_entry_link(tail_copy,
+				       vm_map_copy_last_entry(tail_copy),
+				       entry);
+	}
+
+	/*
+	 * Copy most (or possibly all) of the data.
+	 */
+	kr = vm_map_copy_overwrite_nested(dst_map,
+					  dst_addr + head_size,
+					  copy,
+					  interruptible,
+					  (pmap_t) NULL,
+					  FALSE);
+	if (kr != KERN_SUCCESS) {
+		goto done;
+	}
+
+	if (tail_size) {
+		kr = vm_map_copy_overwrite_nested(dst_map,
+						  tail_addr,
+						  tail_copy,
+						  interruptible,
+						  (pmap_t) NULL,
+						  FALSE);
+	}
+
+done:
+	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
+	if (kr == KERN_SUCCESS) {
+		/*
+		 * Discard all the copy maps.
+		 */
+		if (head_copy) {
+			vm_map_copy_discard(head_copy);
+			head_copy = NULL;
+		}
+		vm_map_copy_discard(copy);
+		if (tail_copy) {
+			vm_map_copy_discard(tail_copy);
+			tail_copy = NULL;
+		}
+	} else {
+		/*
+		 * Re-assemble the original copy map.
+		 */
+		if (head_copy) {
+			entry = vm_map_copy_first_entry(head_copy);
+			vm_map_copy_entry_unlink(head_copy, entry);
+			vm_map_copy_entry_link(copy,
+					       vm_map_copy_to_entry(copy),
+					       entry);
+			copy->offset -= head_size;
+			copy->size += head_size;
+			vm_map_copy_discard(head_copy);
+			head_copy = NULL;
+		}
+		if (tail_copy) {
+			entry = vm_map_copy_last_entry(tail_copy);
+			vm_map_copy_entry_unlink(tail_copy, entry);
+			vm_map_copy_entry_link(copy,
+					       vm_map_copy_last_entry(copy),
+					       entry);
+			copy->size += tail_size;
+			vm_map_copy_discard(tail_copy);
+			tail_copy = NULL;
+		}
+	}
+	return kr;
 }
 
 
@@ -6832,19 +6955,8 @@ vm_map_copyout_kernel_buffer(
  */
 #define	vm_map_copy_insert(map, where, copy)				\
 MACRO_BEGIN								\
-	vm_map_t VMCI_map;						\
-	vm_map_entry_t VMCI_where;					\
-	vm_map_copy_t VMCI_copy;					\
-	VMCI_map = (map);						\
-	VMCI_where = (where);						\
-	VMCI_copy = (copy);						\
-	((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
-		->vme_next = (VMCI_where->vme_next);			\
-	((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy))	\
-		->vme_prev = VMCI_where;				\
-	VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries;		\
-	UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free);		\
-	zfree(vm_map_copy_zone, VMCI_copy);				\
+	vm_map_store_copy_insert(map, where, copy);	  \
+	zfree(vm_map_copy_zone, copy);		\
 MACRO_END
 
 /*
@@ -6928,9 +7040,14 @@ vm_map_copyout(
 StartAgain: ;
 
 	vm_map_lock(dst_map);
-	assert(first_free_is_valid(dst_map));
-	start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
+	if( dst_map->disable_vmentry_reuse == TRUE) {
+		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
+		last = entry;
+	} else {
+		assert(first_free_is_valid(dst_map));
+		start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
 		vm_map_min(dst_map) : last->vme_end;
+	}
 
 	while (TRUE) {
 		vm_map_entry_t	next = last->vme_next;
@@ -6984,11 +7101,8 @@ StartAgain: ;
 		 * Reinitialize the copy so that vm_map_copy_entry_link
 		 * will work.
 		 */
-		copy->cpy_hdr.nentries = 0;
+		vm_map_store_copy_reset(copy, entry);
 		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
-		vm_map_copy_first_entry(copy) =
-			vm_map_copy_last_entry(copy) =
-			vm_map_copy_to_entry(copy);
 
 		/*
 		 * Copy each entry.
@@ -7087,8 +7201,8 @@ StartAgain: ;
 
 				type_of_fault = DBG_CACHE_HIT_FAULT;
 
-				vm_fault_enter(m, dst_map->pmap, va, prot,
-					       VM_PAGE_WIRED(m), FALSE, FALSE,
+				vm_fault_enter(m, dst_map->pmap, va, prot, prot,
+					       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
 					       &type_of_fault);
 
 				vm_object_unlock(object);
@@ -7257,6 +7371,8 @@ vm_map_copyin_common(
 	copy->cpy_hdr.nentries = 0;
 	copy->cpy_hdr.entries_pageable = TRUE;
 
+	vm_map_store_init( &(copy->cpy_hdr) );
+
 	copy->offset = src_addr;
 	copy->size = len;
 	
@@ -7820,7 +7936,7 @@ vm_map_fork_share(
 		 object->shadowed ||		/* case 2 */
 		 (!object->true_share && 	/* case 3 */
 		  !old_entry->is_shared &&
-		  (object->size >
+		  (object->vo_size >
 		   (vm_map_size_t)(old_entry->vme_end -
 				   old_entry->vme_start)))) {
 		
@@ -7900,7 +8016,6 @@ vm_map_fork_share(
 		 *	(This is a preemptive version of
 		 *	case 2.)
 		 */
-		
 		vm_object_shadow(&old_entry->object.vm_object,
 				 &old_entry->offset,
 				 (vm_map_size_t) (old_entry->vme_end -
@@ -7941,6 +8056,7 @@ vm_map_fork_share(
 		old_entry->needs_copy = FALSE;
 		object = old_entry->object.vm_object;
 	}
+
 	
 	/*
 	 *	If object was using a symmetric copy strategy,
@@ -7980,7 +8096,7 @@ vm_map_fork_share(
 	 *	map.
 	 */
 	
-	vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
+	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
 	
 	/*
 	 *	Update the physical map
@@ -8110,7 +8226,6 @@ vm_map_fork(
 				old_map->min_offset,
 				old_map->max_offset,
 				old_map->hdr.entries_pageable);
-
 	for (
 		old_entry = vm_map_first_entry(old_map);
 		old_entry != vm_map_to_entry(old_map);
@@ -8192,7 +8307,7 @@ vm_map_fork(
 			 *	of the map.
 			 */
 			
-			vm_map_entry_link(new_map, vm_map_last_entry(new_map),
+			vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
 					  new_entry);
 			new_size += entry_size;
 			break;
@@ -8284,9 +8399,22 @@ vm_map_lookup_locked(
 	vm_map_offset_t			old_start = 0;
 	vm_map_offset_t			old_end = 0;
 	register vm_prot_t		prot;
+	boolean_t			mask_protections;
+	vm_prot_t			original_fault_type;
+
+	/*
+	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
+	 * as a mask against the mapping's actual protections, not as an
+	 * absolute value.
+	 */
+	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
+	fault_type &= ~VM_PROT_IS_MASK;
+	original_fault_type = fault_type;
 
 	*real_map = map;
-RetryLookup: ;
+
+RetryLookup:
+	fault_type = original_fault_type;
 
 	/*
 	 *	If the map has an interesting hint, try it before calling
@@ -8614,7 +8742,14 @@ submap_recurse:
 	        prot |= VM_PROT_EXECUTE;
 	}
 
+	if (mask_protections) {
+		fault_type &= prot;
+		if (fault_type == VM_PROT_NONE) {
+			goto protection_failure;
+		}
+	}
 	if ((fault_type & (prot)) != fault_type) {
+	protection_failure:
 		if (*real_map != map) {
 			vm_map_unlock(*real_map);
 		}
@@ -8716,6 +8851,8 @@ submap_recurse:
 		fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
 		fault_info->no_cache  = entry->no_cache;
 		fault_info->stealth = FALSE;
+		fault_info->io_sync = FALSE;
+		fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
 		fault_info->mark_zf_absent = FALSE;
 	}
 
@@ -8797,20 +8934,26 @@ vm_map_region_recurse_64(
 	 * "curr_entry" is the VM map entry preceding or including the
 	 * address we're looking for.
 	 * "curr_map" is the map or sub-map containing "curr_entry".
+	 * "curr_address" is the equivalent of the top map's "user_address" 
+	 * in the current map.
 	 * "curr_offset" is the cumulated offset of "curr_map" in the
 	 * target task's address space.
 	 * "curr_depth" is the depth of "curr_map" in the chain of
 	 * sub-maps.
-	 * "curr_max_offset" is the maximum offset we should take into
-	 * account in the current map.  It may be smaller than the current
-	 * map's "max_offset" because we might not have mapped it all in
-	 * the upper level map.
+	 * 
+	 * "curr_max_below" and "curr_max_above" limit the range (around
+	 * "curr_address") we should take into account in the current (sub)map.
+	 * They limit the range to what's visible through the map entries
+	 * we've traversed from the top map to the current map.
+
 	 */
 	vm_map_entry_t			curr_entry;
+	vm_map_address_t		curr_address;
 	vm_map_offset_t			curr_offset;
 	vm_map_t			curr_map;
 	unsigned int			curr_depth;
-	vm_map_offset_t			curr_max_offset;
+	vm_map_offset_t			curr_max_below, curr_max_above;
+	vm_map_offset_t			curr_skip;
 
 	/*
 	 * "next_" is the same as "curr_" but for the VM region immediately
@@ -8820,9 +8963,11 @@ vm_map_region_recurse_64(
 	 */
 	vm_map_entry_t			next_entry;
 	vm_map_offset_t			next_offset;
+	vm_map_offset_t			next_address;
 	vm_map_t			next_map;
 	unsigned int			next_depth;
-	vm_map_offset_t			next_max_offset;
+	vm_map_offset_t			next_max_below, next_max_above;
+	vm_map_offset_t			next_skip;
 
 	boolean_t			look_for_pages;
 	vm_region_submap_short_info_64_t short_info;
@@ -8857,15 +9002,21 @@ vm_map_region_recurse_64(
 	
 	curr_entry = NULL;
 	curr_map = map;
+	curr_address = user_address;
 	curr_offset = 0;
+	curr_skip = 0;
 	curr_depth = 0;
-	curr_max_offset = curr_map->max_offset;
+	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
+	curr_max_below = curr_address;
 
 	next_entry = NULL;
 	next_map = NULL;
+	next_address = 0;
 	next_offset = 0;
+	next_skip = 0;
 	next_depth = 0;
-	next_max_offset = curr_max_offset;
+	next_max_above = (vm_map_offset_t) -1;
+	next_max_below = (vm_map_offset_t) -1;
 
 	if (not_in_kdp) {
 		vm_map_lock_read(curr_map);
@@ -8873,19 +9024,22 @@ vm_map_region_recurse_64(
 
 	for (;;) {
 		if (vm_map_lookup_entry(curr_map,
-					user_address - curr_offset,
+					curr_address,
 					&tmp_entry)) {
 			/* tmp_entry contains the address we're looking for */
 			curr_entry = tmp_entry;
 		} else {
+			vm_map_offset_t skip;
 			/*
 			 * The address is not mapped.  "tmp_entry" is the
 			 * map entry preceding the address.  We want the next
 			 * one, if it exists.
 			 */
 			curr_entry = tmp_entry->vme_next;
+
 			if (curr_entry == vm_map_to_entry(curr_map) ||
-			    curr_entry->vme_start >= curr_max_offset) {
+			    (curr_entry->vme_start >=
+			     curr_address + curr_max_above)) {
 				/* no next entry at this level: stop looking */
 				if (not_in_kdp) {
 					vm_map_unlock_read(curr_map);
@@ -8894,9 +9048,18 @@ vm_map_region_recurse_64(
 				curr_map = NULL;
 				curr_offset = 0;
 				curr_depth = 0;
-				curr_max_offset = 0;
+				curr_max_above = 0;
+				curr_max_below = 0;
 				break;
 			}
+
+			/* adjust current address and offset */
+			skip = curr_entry->vme_start - curr_address;
+			curr_address = curr_entry->vme_start;
+			curr_skip = skip;
+			curr_offset += skip;
+			curr_max_above -= skip;
+			curr_max_below = 0;
 		}
 
 		/*
@@ -8907,7 +9070,8 @@ vm_map_region_recurse_64(
 		tmp_entry = curr_entry->vme_next;
 		if (tmp_entry == vm_map_to_entry(curr_map)) {
 			/* no next entry at this level */
-		} else if (tmp_entry->vme_start >= curr_max_offset) {
+		} else if (tmp_entry->vme_start >=
+			   curr_address + curr_max_above) {
 			/*
 			 * tmp_entry is beyond the scope of what we mapped of
 			 * this submap in the upper level: ignore it.
@@ -8928,11 +9092,32 @@ vm_map_region_recurse_64(
 			}
 			next_entry = tmp_entry;
 			next_map = curr_map;
-			next_offset = curr_offset;
 			next_depth = curr_depth;
-			next_max_offset = curr_max_offset;
+			next_address = next_entry->vme_start;
+			next_skip = curr_skip;
+			next_offset = curr_offset;
+			next_offset += (next_address - curr_address);
+			next_max_above = MIN(next_max_above, curr_max_above);
+			next_max_above = MIN(next_max_above,
+					     next_entry->vme_end - next_address);
+			next_max_below = MIN(next_max_below, curr_max_below);
+			next_max_below = MIN(next_max_below,
+					     next_address - next_entry->vme_start);
 		}
 
+		/*
+		 * "curr_max_{above,below}" allow us to keep track of the
+		 * portion of the submap that is actually mapped at this level:
+		 * the rest of that submap is irrelevant to us, since it's not
+		 * mapped here.
+		 * The relevant portion of the map starts at
+		 * "curr_entry->offset" up to the size of "curr_entry".
+		 */
+		curr_max_above = MIN(curr_max_above,
+				     curr_entry->vme_end - curr_address);
+		curr_max_below = MIN(curr_max_below,
+				     curr_address - curr_entry->vme_start);
+
 		if (!curr_entry->is_sub_map ||
 		    curr_depth >= user_max_depth) {
 			/*
@@ -8973,21 +9158,11 @@ vm_map_region_recurse_64(
 		 * space (i.e. the top-level VM map).
 		 */
 		curr_offset +=
-			(curr_entry->vme_start - curr_entry->offset);
+			(curr_entry->offset - curr_entry->vme_start);
+		curr_address = user_address + curr_offset;
 		/* switch to the submap */
 		curr_map = curr_entry->object.sub_map;
 		curr_depth++;
-		/*
-		 * "curr_max_offset" allows us to keep track of the
-		 * portion of the submap that is actually mapped at this level:
-		 * the rest of that submap is irrelevant to us, since it's not
-		 * mapped here.
-		 * The relevant portion of the map starts at
-		 * "curr_entry->offset" up to the size of "curr_entry".
-		 */
-		curr_max_offset =
-			curr_entry->vme_end - curr_entry->vme_start +
-			curr_entry->offset;
 		curr_entry = NULL;
 	}
 
@@ -9000,9 +9175,15 @@ vm_map_region_recurse_64(
 		/* ... gather info about the next VM region */
 		curr_entry = next_entry;
 		curr_map = next_map;	/* still locked ... */
+		curr_address = next_address;
+		curr_skip = next_skip;
 		curr_offset = next_offset;
 		curr_depth = next_depth;
-		curr_max_offset = next_max_offset;
+		curr_max_above = next_max_above;
+		curr_max_below = next_max_below;
+		if (curr_map == map) {
+			user_address = curr_address;
+		}
 	} else {
 		/* we won't need "next_entry" after all */
 		if (next_entry != NULL) {
@@ -9015,12 +9196,14 @@ vm_map_region_recurse_64(
 	next_entry = NULL;
 	next_map = NULL;
 	next_offset = 0;
+	next_skip = 0;
 	next_depth = 0;
-	next_max_offset = 0;
+	next_max_below = -1;
+	next_max_above = -1;
 
 	*nesting_depth = curr_depth;
-	*size = curr_entry->vme_end - curr_entry->vme_start;
-	*address = curr_entry->vme_start + curr_offset;
+	*size = curr_max_above + curr_max_below;
+	*address = user_address + curr_skip - curr_max_below;
 
 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
 // so probably should be a real 32b ID vs. ptr.
@@ -9058,12 +9241,18 @@ vm_map_region_recurse_64(
 
 	if (not_in_kdp) {
 		if (!curr_entry->is_sub_map) {
+			vm_map_offset_t range_start, range_end;
+			range_start = MAX((curr_address - curr_max_below),
+					  curr_entry->vme_start);
+			range_end = MIN((curr_address + curr_max_above),
+					curr_entry->vme_end);
 			vm_map_region_walk(curr_map,
-					   curr_entry->vme_start,
+					   range_start,
 					   curr_entry,
-					   curr_entry->offset,
-					   (curr_entry->vme_end -
-					    curr_entry->vme_start),
+					   (curr_entry->offset +
+					    (range_start -
+					     curr_entry->vme_start)),
+					   range_end - range_start,
 					   &extended,
 					   look_for_pages);
 			if (extended.external_pager &&
@@ -9372,7 +9561,11 @@ vm_map_region_top_walk(
 				top->ref_count += ref_count - 1;
 			}
 		} else {
-			if (entry->needs_copy) {
+			if (entry->superpage_size) {
+				top->share_mode = SM_LARGE_PAGE;
+				top->shared_pages_resident = 0;
+				top->private_pages_resident = entry_size;
+			} else if (entry->needs_copy) {
 				top->share_mode = SM_COW;
 				top->shared_pages_resident =
 					OBJ_RESIDENT_COUNT(obj, entry_size);
@@ -9418,11 +9611,23 @@ vm_map_region_walk(
 
 	if ((entry->object.vm_object == 0) ||
 	    (entry->is_sub_map) ||
-	    (entry->object.vm_object->phys_contiguous)) {
+	    (entry->object.vm_object->phys_contiguous &&
+	     !entry->superpage_size)) {
 		extended->share_mode = SM_EMPTY;
 		extended->ref_count = 0;
 		return;
 	}
+
+	if (entry->superpage_size) {
+		extended->shadow_depth = 0;
+		extended->share_mode = SM_LARGE_PAGE;
+		extended->ref_count = 1;
+		extended->external_pager = 0;
+		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
+		extended->shadow_depth = 0;
+		return;
+	}
+
 	{
 		obj = entry->object.vm_object;
 
@@ -9619,7 +9824,7 @@ vm_map_region_look_for_page(
 			if(object != caller_object)
 				vm_object_unlock(object);
 
-			offset = offset + object->shadow_offset;
+			offset = offset + object->vo_shadow_offset;
 			object = shadow;
 			shadow = object->shadow;
 			continue;
@@ -9723,7 +9928,7 @@ vm_map_simplify_entry(
 	    (prev_entry->is_shared == FALSE) &&
 	    (this_entry->is_shared == FALSE)
 		) {
-		_vm_map_entry_unlink(&map->hdr, prev_entry);
+		_vm_map_store_entry_unlink(&map->hdr, prev_entry);
 		this_entry->vme_start = prev_entry->vme_start;
 		this_entry->offset = prev_entry->offset;
 		if (prev_entry->is_sub_map) {
@@ -9891,7 +10096,7 @@ vm_map_machine_attribute(
 									attribute, value);
 							
 						} else if (object->shadow) {
-						        offset = offset + object->shadow_offset;
+						        offset = offset + object->vo_shadow_offset;
 							last_object = object;
 							object = object->shadow;
 							vm_object_lock(last_object->shadow);
@@ -9947,6 +10152,12 @@ vm_map_behavior_set(
 	    "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
 	    map, start, end, new_behavior, 0);
 
+	if (start > end ||
+	    start < vm_map_min(map) ||
+	    end > vm_map_max(map)) {
+		return KERN_NO_SPACE;
+	}
+
 	switch (new_behavior) {
 
 	/*
@@ -10059,6 +10270,8 @@ vm_map_willneed(
 	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
 	fault_info.no_cache      = FALSE;			/* ignored value */
 	fault_info.stealth	 = TRUE;
+	fault_info.io_sync = FALSE;
+	fault_info.cs_bypass = FALSE;
 	fault_info.mark_zf_absent = FALSE;
 
 	/*
@@ -10074,106 +10287,130 @@ vm_map_willneed(
 	 * an error.
 	 */
 
-	if (vm_map_range_check(map, start, end, &entry)) {
+	if (! vm_map_range_check(map, start, end, &entry)) {
+		vm_map_unlock_read(map);
+		return KERN_INVALID_ADDRESS;
+	}
 
+	/*
+	 * Examine each vm_map_entry_t in the range.
+	 */
+	for (; entry != vm_map_to_entry(map) && start < end; ) {
+		
 		/*
-		 * Examine each vm_map_entry_t in the range.
+		 * The first time through, the start address could be anywhere
+		 * within the vm_map_entry we found.  So adjust the offset to
+		 * correspond.  After that, the offset will always be zero to
+		 * correspond to the beginning of the current vm_map_entry.
 		 */
+		offset = (start - entry->vme_start) + entry->offset;
 
-		for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
-
-			/*
-			 * The first time through, the start address could be anywhere within the 
-			 * vm_map_entry we found.  So adjust the offset to correspond.  After that,
-			 * the offset will always be zero to correspond to the beginning of the current
-			 * vm_map_entry.
-			 */
-			
-			offset = (start - entry->vme_start) + entry->offset;
-
-			/*
-			 * Set the length so we don't go beyond the end of the map_entry or beyond the
-			 * end of the range we were given.  This range could span also multiple map 
-			 * entries all of which map different files, so make sure we only do the right
-			 * amount of I/O for each object.  Note that it's possible for there to be
-			 * multiple map entries all referring to the same object but with different
-			 * page permissions, but it's not worth trying to optimize that case.
-			 */
-
-			len = MIN(entry->vme_end - start, end - start);
+		/*
+		 * Set the length so we don't go beyond the end of the
+		 * map_entry or beyond the end of the range we were given.
+		 * This range could span also multiple map entries all of which
+		 * map different files, so make sure we only do the right amount
+		 * of I/O for each object.  Note that it's possible for there
+		 * to be multiple map entries all referring to the same object
+		 * but with different page permissions, but it's not worth
+		 * trying to optimize that case.
+		 */
+		len = MIN(entry->vme_end - start, end - start);
 
-			if ((vm_size_t) len != len) {
-				/* 32-bit overflow */
-				len = (vm_size_t) (0 - PAGE_SIZE);
-			}
-			fault_info.cluster_size = (vm_size_t) len;
-			fault_info.lo_offset    = offset; 
-			fault_info.hi_offset    = offset + len;
-			fault_info.user_tag     = entry->alias;
+		if ((vm_size_t) len != len) {
+			/* 32-bit overflow */
+			len = (vm_size_t) (0 - PAGE_SIZE);
+		}
+		fault_info.cluster_size = (vm_size_t) len;
+		fault_info.lo_offset    = offset; 
+		fault_info.hi_offset    = offset + len;
+		fault_info.user_tag     = entry->alias;
 
-			/*
-			 * If there's no read permission to this mapping, then just skip it.
-			 */
+		/*
+		 * If there's no read permission to this mapping, then just
+		 * skip it.
+		 */
+		if ((entry->protection & VM_PROT_READ) == 0) {
+			entry = entry->vme_next;
+			start = entry->vme_start;
+			continue;
+		}
 
-			if ((entry->protection & VM_PROT_READ) == 0) {
-				continue;
-			}
+		/*
+		 * Find the file object backing this map entry.  If there is
+		 * none, then we simply ignore the "will need" advice for this
+		 * entry and go on to the next one.
+		 */
+		if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
+			entry = entry->vme_next;
+			start = entry->vme_start;
+			continue;
+		}
 
-			/*
-			 * Find the file object backing this map entry.  If there is none,
-			 * then we simply ignore the "will need" advice for this entry and
-			 * go on to the next one.
-			 */
+		/*
+		 * The data_request() could take a long time, so let's
+		 * release the map lock to avoid blocking other threads.
+		 */
+		vm_map_unlock_read(map);
 
-			if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
-				continue;
-			}
+		vm_object_paging_begin(object);
+		pager = object->pager;
+		vm_object_unlock(object);
 
-			vm_object_paging_begin(object);
-			pager = object->pager;
-			vm_object_unlock(object);
+		/*
+		 * Get the data from the object asynchronously.
+		 *
+		 * Note that memory_object_data_request() places limits on the
+		 * amount of I/O it will do.  Regardless of the len we
+		 * specified, it won't do more than MAX_UPL_TRANSFER and it
+		 * silently truncates the len to that size.  This isn't
+		 * necessarily bad since madvise shouldn't really be used to
+		 * page in unlimited amounts of data.  Other Unix variants
+		 * limit the willneed case as well.  If this turns out to be an
+		 * issue for developers, then we can always adjust the policy
+		 * here and still be backwards compatible since this is all
+		 * just "advice".
+		 */
+		kr = memory_object_data_request(
+			pager,
+			offset + object->paging_offset,
+			0,	/* ignored */
+			VM_PROT_READ,
+			(memory_object_fault_info_t)&fault_info);
 
-			/*
-			 * Get the data from the object asynchronously.
-			 *
-			 * Note that memory_object_data_request() places limits on the amount
-			 * of I/O it will do.  Regardless of the len we specified, it won't do
-			 * more than MAX_UPL_TRANSFER and it silently truncates the len to that
-			 * size.  This isn't necessarily bad since madvise shouldn't really be 
-			 * used to page in unlimited amounts of data.  Other Unix variants limit
-			 * the willneed case as well.  If this turns out to be an issue for
-			 * developers, then we can always adjust the policy here and still be
-			 * backwards compatible since this is all just "advice".
-			 */
+		vm_object_lock(object);
+		vm_object_paging_end(object);
+		vm_object_unlock(object);
 
-			kr = memory_object_data_request(
-				pager,
-				offset + object->paging_offset,
-				0,	/* ignored */
-				VM_PROT_READ,
-				(memory_object_fault_info_t)&fault_info);
+		/*
+		 * If we couldn't do the I/O for some reason, just give up on
+		 * the madvise.  We still return success to the user since
+		 * madvise isn't supposed to fail when the advice can't be
+		 * taken.
+		 */
+		if (kr != KERN_SUCCESS) {
+			return KERN_SUCCESS;
+		}
 
-			vm_object_lock(object);
-			vm_object_paging_end(object);
-			vm_object_unlock(object);
+		start += len;
+		if (start >= end) {
+			/* done */
+			return KERN_SUCCESS;
+		}
 
+		/* look up next entry */
+		vm_map_lock_read(map);
+		if (! vm_map_lookup_entry(map, start, &entry)) {
 			/*
-			 * If we couldn't do the I/O for some reason, just give up on the
-			 * madvise.  We still return success to the user since madvise isn't
-			 * supposed to fail when the advice can't be taken.
+			 * There's a new hole in the address range.
 			 */
-
-			if (kr != KERN_SUCCESS) {
-				break;
-			}
+			vm_map_unlock_read(map);
+			return KERN_INVALID_ADDRESS;
 		}
-
-		kr = KERN_SUCCESS;
-	} else
-		kr = KERN_INVALID_ADDRESS;
+	}
 
 	vm_map_unlock_read(map);
-	return kr;
+	return KERN_SUCCESS;
 }
 
 static boolean_t
@@ -10208,7 +10445,7 @@ vm_map_entry_is_reusable(
 	    object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
 	    object->internal &&
 	    !object->true_share &&
-	    object->wimg_bits == VM_WIMG_DEFAULT &&
+	    object->wimg_bits == VM_WIMG_USE_DEFAULT &&
 	    !object->code_signed) {
 		return TRUE;
 	}
@@ -10798,12 +11035,13 @@ vm_map_entry_insert(
 	new_entry->no_cache = no_cache;
 	new_entry->permanent = permanent;
 	new_entry->superpage_size = superpage_size;
+	new_entry->used_for_jit = FALSE;
 
 	/*
 	 *	Insert the new entry into the list.
 	 */
 
-	vm_map_entry_link(map, insp_entry, new_entry);
+	vm_map_store_entry_link(map, insp_entry, new_entry);
 	map->size += end - start;
 
 	/*
@@ -10866,6 +11104,8 @@ vm_map_remap_extract(
 	map_header->nentries = 0;
 	map_header->entries_pageable = pageable;
 
+	vm_map_store_init( map_header );
+
 	*cur_protection = VM_PROT_ALL;
 	*max_protection = VM_PROT_ALL;
 
@@ -10923,7 +11163,7 @@ vm_map_remap_extract(
 			} else if (src_entry->needs_copy || object->shadowed ||
 				   (object->internal && !object->true_share &&
 				    !src_entry->is_shared &&
-				    object->size > entry_size)) {
+				    object->vo_size > entry_size)) {
 
 				vm_object_shadow(&src_entry->object.vm_object,
 						 &src_entry->offset,
@@ -11109,12 +11349,14 @@ vm_map_remap_extract(
 			}
 		}
 
-		_vm_map_entry_link(map_header,
+		_vm_map_store_entry_link(map_header,
 				   map_header->links.prev, new_entry);
 
-		*cur_protection &= src_entry->protection;
-		*max_protection &= src_entry->max_protection;
-
+		/*Protections for submap mapping are irrelevant here*/
+		if( !src_entry->is_sub_map ) {
+			*cur_protection &= src_entry->protection;
+			*max_protection &= src_entry->max_protection;
+		}
 		map_address += tmp_size;
 		mapped_size += tmp_size;
 		src_start += tmp_size;
@@ -11130,7 +11372,7 @@ vm_map_remap_extract(
 		     src_entry != (struct vm_map_entry *)&map_header->links;
 		     src_entry = new_entry) {
 			new_entry = src_entry->vme_next;
-			_vm_map_entry_unlink(map_header, src_entry);
+			_vm_map_store_entry_unlink(map_header, src_entry);
 			vm_object_deallocate(src_entry->object.vm_object);
 			_vm_map_entry_dispose(map_header, src_entry);
 		}
@@ -11210,11 +11452,11 @@ vm_map_remap(
 	     entry != (struct vm_map_entry *)&map_header.links;
 	     entry = new_entry) {
 		new_entry = entry->vme_next;
-		_vm_map_entry_unlink(&map_header, entry);
+		_vm_map_store_entry_unlink(&map_header, entry);
 		if (result == KERN_SUCCESS) {
 			entry->vme_start += *address;
 			entry->vme_end += *address;
-			vm_map_entry_link(target_map, insp_entry, entry);
+			vm_map_store_entry_link(target_map, insp_entry, entry);
 			insp_entry = entry;
 		} else {
 			if (!entry->is_sub_map) {
@@ -11226,6 +11468,12 @@ vm_map_remap(
 		}
 	}
 
+	if( target_map->disable_vmentry_reuse == TRUE) {
+		if( target_map->highest_entry_end < insp_entry->vme_end ){
+			target_map->highest_entry_end = insp_entry->vme_end;
+		}
+	}
+
 	if (result == KERN_SUCCESS) {
 		target_map->size += size;
 		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
@@ -11284,15 +11532,19 @@ StartAgain: ;
 		 *	address, we have to start after it.
 		 */
 
-		assert(first_free_is_valid(map));
-		if (start == map->min_offset) {
-			if ((entry = map->first_free) != vm_map_to_entry(map))
-				start = entry->vme_end;
+		if( map->disable_vmentry_reuse == TRUE) {
+			VM_MAP_HIGHEST_ENTRY(map, entry, start);
 		} else {
-			vm_map_entry_t	tmp_entry;
-			if (vm_map_lookup_entry(map, start, &tmp_entry))
-				start = tmp_entry->vme_end;
-			entry = tmp_entry;
+			assert(first_free_is_valid(map));
+			if (start == map->min_offset) {
+				if ((entry = map->first_free) != vm_map_to_entry(map))
+					start = entry->vme_end;
+			} else {
+				vm_map_entry_t	tmp_entry;
+				if (vm_map_lookup_entry(map, start, &tmp_entry))
+					start = tmp_entry->vme_end;
+				entry = tmp_entry;
+			}
 		}
 		
 		/*
@@ -11671,7 +11923,7 @@ vm_map_purgable_control(
 	vm_object_lock(object);
 
 	if (entry->offset != 0 || 
-	    entry->vme_end - entry->vme_start != object->size) {
+	    entry->vme_end - entry->vme_start != object->vo_size) {
 		/*
 		 * Can only apply purgable controls to the whole (existing)
 		 * object at once.
@@ -11737,11 +11989,18 @@ vm_map_page_info(
 	vm_object_id_t		object_id;
 	vm_page_info_basic_t	basic_info;
 	int			depth;
+	vm_map_offset_t		offset_in_page;
 
 	switch (flavor) {
 	case VM_PAGE_INFO_BASIC:
 		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
-			return KERN_INVALID_ARGUMENT;
+			/*
+			 * The "vm_page_info_basic_data" structure was not
+			 * properly padded, so allow the size to be off by
+			 * one to maintain backwards binary compatibility...
+			 */
+			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
+				return KERN_INVALID_ARGUMENT;
 		}
 		break;
 	default:
@@ -11755,6 +12014,7 @@ vm_map_page_info(
 	depth = 0;
 
 	retval = KERN_SUCCESS;
+	offset_in_page = offset & PAGE_MASK;
 	offset = vm_map_trunc_page(offset);
 
 	vm_map_lock_read(map);
@@ -11861,7 +12121,7 @@ vm_map_page_info(
 			if (object->shadow != VM_OBJECT_NULL) {
 			        vm_object_t shadow;
 
-				offset += object->shadow_offset;
+				offset += object->vo_shadow_offset;
 				shadow = object->shadow;
 				
 				vm_object_lock(shadow);
@@ -11926,7 +12186,8 @@ done:
 		basic_info->disposition = disposition;
 		basic_info->ref_count = ref_count;
 		basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
-		basic_info->offset = (memory_object_offset_t) offset;
+		basic_info->offset =
+			(memory_object_offset_t) offset + offset_in_page;
 		basic_info->depth = depth;
 		break;
 	}
@@ -12457,6 +12718,15 @@ vm_map_disable_NX(vm_map_t map)
         pmap_disable_NX(map->pmap);
 }
 
+void
+vm_map_disallow_data_exec(vm_map_t map)
+{
+    if (map == NULL)
+        return;
+
+    map->map_disallow_data_exec = TRUE;
+}
+
 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
  * more descriptive.
  */
@@ -12506,7 +12776,7 @@ vm_map_has_4GB_pagezero(
 void
 vm_map_set_4GB_pagezero(vm_map_t map)
 {
-#ifdef __i386__
+#if defined(__i386__)
 	pmap_set_4GB_pagezero(map->pmap);
 #else
 #pragma unused(map)
@@ -12517,7 +12787,7 @@ vm_map_set_4GB_pagezero(vm_map_t map)
 void
 vm_map_clear_4GB_pagezero(vm_map_t map)
 {
-#ifdef __i386__
+#if defined(__i386__)
 	pmap_clear_4GB_pagezero(map->pmap);
 #else
 #pragma unused(map)
@@ -12681,3 +12951,184 @@ kern_return_t vm_map_sign(vm_map_t map,
 	return KERN_SUCCESS;
 }
 #endif
+
+#if CONFIG_FREEZE
+
+kern_return_t vm_map_freeze_walk(
+             	vm_map_t map,
+             	unsigned int *purgeable_count,
+             	unsigned int *wired_count,
+             	unsigned int *clean_count,
+             	unsigned int *dirty_count,
+             	boolean_t *has_shared)
+{
+	vm_map_entry_t entry;
+	
+	vm_map_lock_read(map);
+	
+	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
+	*has_shared = FALSE;
+	
+	for (entry = vm_map_first_entry(map);
+	     entry != vm_map_to_entry(map);
+	     entry = entry->vme_next) {
+		unsigned int purgeable, clean, dirty, wired;
+		boolean_t shared;
+
+		if ((entry->object.vm_object == 0) ||
+		    (entry->is_sub_map) ||
+		    (entry->object.vm_object->phys_contiguous)) {
+			continue;
+		}
+
+		vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
+		
+		*purgeable_count += purgeable;
+		*wired_count += wired;
+		*clean_count += clean;
+		*dirty_count += dirty;
+		
+		if (shared) {
+			*has_shared = TRUE;
+		}
+	}
+
+	vm_map_unlock_read(map);
+
+	return KERN_SUCCESS;
+}
+
+kern_return_t vm_map_freeze(
+             	vm_map_t map,
+             	unsigned int *purgeable_count,
+             	unsigned int *wired_count,
+             	unsigned int *clean_count,
+             	unsigned int *dirty_count,
+             	boolean_t *has_shared)
+{	
+	vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
+	vm_object_t compact_object = VM_OBJECT_NULL;
+	vm_object_offset_t offset = 0x0;
+	kern_return_t kr = KERN_SUCCESS;
+	void *default_freezer_toc = NULL;
+	boolean_t cleanup = FALSE;
+
+	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
+	*has_shared = FALSE;
+
+	/* Create our compact object */
+	compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
+	if (!compact_object) {
+		kr = KERN_FAILURE;
+		goto done;
+	}
+	
+	default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
+	if (!default_freezer_toc) {
+		kr = KERN_FAILURE;
+		goto done;
+	}
+
+	/*
+	 * We need the exclusive lock here so that we can
+	 * block any page faults or lookups while we are
+	 * in the middle of freezing this vm map.
+	 */
+	vm_map_lock(map);
+
+	if (map->default_freezer_toc != NULL){
+		/*
+		 * This map has already been frozen.
+		 */
+		cleanup = TRUE;
+		kr = KERN_SUCCESS;
+		goto done;
+	}
+
+	/* Get a mapping in place for the freezing about to commence */
+	map->default_freezer_toc = default_freezer_toc;
+
+	vm_object_lock(compact_object);
+
+	for (entry2 = vm_map_first_entry(map);
+	     entry2 != vm_map_to_entry(map);
+	     entry2 = entry2->vme_next) {
+	
+		vm_object_t	src_object = entry2->object.vm_object;
+
+		/* If eligible, scan the entry, moving eligible pages over to our parent object */
+		if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
+			unsigned int purgeable, clean, dirty, wired;
+			boolean_t shared;
+    		
+			vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
+							src_object, compact_object, &default_freezer_toc, &offset);
+									 
+			*purgeable_count += purgeable;
+			*wired_count += wired;
+			*clean_count += clean;
+			*dirty_count += dirty;
+
+			if (shared) {
+				*has_shared = TRUE;
+			}
+		}
+	}
+
+	vm_object_unlock(compact_object);	
+	
+	/* Finally, throw out the pages to swap */
+	vm_object_pageout(compact_object);
+
+done:
+	vm_map_unlock(map);
+
+	/* Unwind if there was a failure */
+	if ((cleanup) || (KERN_SUCCESS != kr)) {
+		if (default_freezer_toc){
+			default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
+		}
+		if (compact_object){
+			vm_object_deallocate(compact_object);
+		}
+	}
+	
+	return kr;
+}
+
+__private_extern__ vm_object_t	default_freezer_get_compact_vm_object( void** );
+
+void
+vm_map_thaw(
+	vm_map_t map)
+{
+	void **default_freezer_toc;
+	vm_object_t compact_object;
+
+	vm_map_lock(map);
+
+	if (map->default_freezer_toc == NULL){
+		/*
+		 * This map is not in a frozen state.
+		 */
+		goto out;
+	}
+	
+	default_freezer_toc = &(map->default_freezer_toc);
+	
+	compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
+	
+	/* Bring the pages back in */
+	vm_object_pagein(compact_object);
+	
+	/* Shift pages back to their original objects */
+	vm_object_unpack(compact_object, default_freezer_toc);
+
+	vm_object_deallocate(compact_object);
+
+	map->default_freezer_toc = NULL;
+	
+out:
+	vm_map_unlock(map);
+}
+#endif
diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h
index dd39abb5c..d27859858 100644
--- a/osfmk/vm/vm_map.h
+++ b/osfmk/vm/vm_map.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -114,6 +114,9 @@ __END_DECLS
 #define current_map_fast()	(current_thread()->map)
 #define	current_map()		(current_map_fast())
 
+#include <vm/vm_map_store.h>
+
+
 /*
  *	Types defined:
  *
@@ -191,6 +194,7 @@ struct vm_named_entry {
  *		Control information for virtual copy operations is also
  *		stored in the address map entry.
  */
+
 struct vm_map_links {
 	struct vm_map_entry	*prev;		/* previous entry */
 	struct vm_map_entry	*next;		/* next entry */
@@ -204,6 +208,8 @@ struct vm_map_entry {
 #define vme_next		links.next
 #define vme_start		links.start
 #define vme_end			links.end
+
+	struct vm_map_store	store;
 	union vm_map_object	object;		/* object I point to */
 	vm_object_offset_t	offset;		/* offset into object */
 	unsigned int
@@ -230,7 +236,8 @@ struct vm_map_entry {
 	/* boolean_t */		permanent:1,	/* mapping can not be removed */
 	/* boolean_t */		superpage_size:3,/* use superpages of a certain size */
 	/* boolean_t */		zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */
-	/* unsigned char */	pad:2;		/* available bits */
+	/* boolean_t */		used_for_jit:1,
+	/* unsigned char */	pad:1;		/* available bits */
 	unsigned short		wired_count;	/* can be paged if = 0 */
 	unsigned short		user_wired_count; /* for vm_wire */
 };
@@ -258,11 +265,17 @@ struct vm_map_entry {
  *	Description:
  *		Header for a vm_map and a vm_map_copy.
  */
+
+
 struct vm_map_header {
 	struct vm_map_links	links;		/* first, last, min, max */
 	int			nentries;	/* Number of entries */
 	boolean_t		entries_pageable;
 						/* are map entries pageable? */
+	vm_map_offset_t		highest_entry_end_addr;	/* The ending address of the highest allocated vm_entry_t */
+#ifdef VM_MAP_STORE_USE_RB
+	struct rb_head	rb_head_store;
+#endif
 };
 
 /*
@@ -285,6 +298,7 @@ struct _vm_map {
 	struct vm_map_header	hdr;		/* Map entry header */
 #define min_offset		hdr.links.start	/* start of range */
 #define max_offset		hdr.links.end	/* end of range */
+#define highest_entry_end	hdr.highest_entry_end_addr
 	pmap_t			pmap;		/* Physical map */
 	vm_map_size_t		size;		/* virtual size */
 	vm_map_size_t		user_wire_limit;/* rlimit on user locked memory */
@@ -298,14 +312,21 @@ struct _vm_map {
 	lck_mtx_ext_t		s_lock_ext;
 	vm_map_entry_t		hint;		/* hint for quick lookups */
 	vm_map_entry_t		first_free;	/* First free space hint */
-	boolean_t		wait_for_space;	/* Should callers wait
-						   for space? */
-	boolean_t		wiring_required;/* All memory wired? */
-	boolean_t		no_zero_fill;	/* No zero fill absent pages */
-	boolean_t		mapped;		/* has this map been mapped */
-	boolean_t		switch_protect;	/* Protect map from write faults while switched */
+	unsigned int		
+	/* boolean_t */		wait_for_space:1, /* Should callers wait for space? */
+	/* boolean_t */		wiring_required:1, /* All memory wired? */
+	/* boolean_t */		no_zero_fill:1, /*No zero fill absent pages */
+	/* boolean_t */		mapped:1, /*has this map been mapped */
+	/* boolean_t */		switch_protect:1, /*  Protect map from write faults while switched */
+	/* boolean_t */		disable_vmentry_reuse:1, /*  All vm entries should keep using newer and higher addresses in the map */ 
+	/* boolean_t */		map_disallow_data_exec:1, /* Disallow execution from data pages on exec-permissive architectures */
+	/* reserved */		pad:25;
 	unsigned int		timestamp;	/* Version number */
 	unsigned int		color_rr;	/* next color (not protected by a lock) */
+#if CONFIG_FREEZE
+	void			*default_freezer_toc;
+#endif
+ 	boolean_t		jit_entry_exists;
 } ;
 
 #define vm_map_to_entry(map)	((struct vm_map_entry *) &(map)->hdr.links)
@@ -800,6 +821,11 @@ extern vm_object_t convert_port_entry_to_object(
 	ipc_port_t	port);
 
 
+extern kern_return_t vm_map_set_cache_attr(
+        vm_map_t        map,
+        vm_map_offset_t va);
+
+
 /* definitions related to overriding the NX behavior */
 
 #define VM_ABI_32	0x1
@@ -931,6 +957,9 @@ extern kern_return_t	vm_map_copyin_common(
 extern void		vm_map_disable_NX(
 			        vm_map_t		map);
 
+extern void		vm_map_disallow_data_exec(
+			        vm_map_t		map);
+
 extern void		vm_map_set_64bit(
 			        vm_map_t		map);
 
@@ -964,6 +993,8 @@ extern void vm_map_switch_protect(
 				vm_map_t		map, 
 				boolean_t		val);
 
+extern boolean_t first_free_is_valid(vm_map_t);
+
 #ifdef XNU_KERNEL_PRIVATE
 extern kern_return_t vm_map_page_info(
 	vm_map_t		map,
@@ -1030,6 +1061,27 @@ extern kern_return_t vm_map_sign(vm_map_t map,
 				 vm_map_offset_t end);
 #endif
 
+#if CONFIG_FREEZE
+extern kern_return_t vm_map_freeze_walk(
+              	vm_map_t map,
+              	unsigned int *purgeable_count,
+              	unsigned int *wired_count,
+              	unsigned int *clean_count,
+              	unsigned int *dirty_count,
+              	boolean_t *has_shared);
+
+extern kern_return_t vm_map_freeze(
+             	vm_map_t map,
+             	unsigned int *purgeable_count,
+             	unsigned int *wired_count,
+             	unsigned int *clean_count,
+             	unsigned int *dirty_count,
+             	boolean_t *has_shared);
+                
+extern void vm_map_thaw(
+                vm_map_t map);
+#endif
+
 __END_DECLS
 
 #endif	/* KERNEL_PRIVATE */
diff --git a/osfmk/vm/vm_map_store.c b/osfmk/vm/vm_map_store.c
new file mode 100644
index 000000000..58148a964
--- /dev/null
+++ b/osfmk/vm/vm_map_store.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <vm/vm_map_store.h>
+
+#if MACH_ASSERT
+boolean_t
+first_free_is_valid_store( vm_map_t map )
+{
+	return(first_free_is_valid_ll( map ));
+}
+#endif
+
+void
+vm_map_store_init( struct vm_map_header *hdr )
+{
+	vm_map_store_init_ll( hdr );
+#ifdef VM_MAP_STORE_USE_RB
+	vm_map_store_init_rb( hdr );
+#endif
+}
+
+boolean_t
+vm_map_store_lookup_entry(
+	register vm_map_t		map,
+	register vm_map_offset_t	address,
+	vm_map_entry_t		*entry)		/* OUT */
+{
+#ifdef VM_MAP_STORE_USE_LL
+	return (vm_map_store_lookup_entry_ll( map, address, entry ));
+#elif defined VM_MAP_STORE_USE_RB
+	return (vm_map_store_lookup_entry_rb( map, address, entry ));
+#endif
+}
+
+void
+vm_map_store_update( vm_map_t map, vm_map_entry_t entry, int update_type )
+{
+	switch (update_type) {
+		case VM_MAP_ENTRY_CREATE:
+			break;
+		case VM_MAP_ENTRY_DELETE:
+			if((entry) == (map)->first_free) {
+				(map)->first_free = vm_map_to_entry(map);
+			}
+			if((entry) == (map)->hint) {
+				(map)->hint = vm_map_to_entry(map);
+			}
+			break;
+		default:
+			break;
+	}
+}
+
+void	vm_map_store_copy_insert( vm_map_t map, vm_map_entry_t after_where, vm_map_copy_t copy)
+{
+	vm_map_store_copy_insert_ll(map, after_where, copy);
+#ifdef VM_MAP_STORE_USE_RB
+	vm_map_store_copy_insert_rb(map, after_where, copy);
+#endif
+}
+
+/*
+ *	vm_map_entry_{un,}link:
+ *
+ *	Insert/remove entries from maps (or map copies).
+ *	The _vm_map_store_entry_{un,}link variants are used at
+ *	some places where updating first_free is not needed &
+ *	copy maps are being modified. Also note the first argument
+ *	is the map header.
+ *	Modifying the vm_map_store_entry_{un,}link functions to 
+ *	deal with these call sites made the interface confusing
+ *	and clunky.
+ */
+
+void
+_vm_map_store_entry_link( struct vm_map_header * mapHdr, vm_map_entry_t after_where, vm_map_entry_t entry)
+{
+	vm_map_store_entry_link_ll(mapHdr, after_where, entry);
+#ifdef VM_MAP_STORE_USE_RB
+	vm_map_store_entry_link_rb(mapHdr, after_where, entry);
+#endif
+}
+
+void
+vm_map_store_entry_link( vm_map_t map, vm_map_entry_t after_where, vm_map_entry_t entry)
+{
+	vm_map_t VMEL_map;
+	vm_map_entry_t VMEL_entry;
+	VMEL_map = (map);
+	VMEL_entry = (entry);
+	
+	_vm_map_store_entry_link(&VMEL_map->hdr, after_where, VMEL_entry);
+	if( VMEL_map->disable_vmentry_reuse == TRUE ) {
+		UPDATE_HIGHEST_ENTRY_END( VMEL_map, VMEL_entry);
+	} else {
+		update_first_free_ll(VMEL_map, VMEL_map->first_free);
+#ifdef VM_MAP_STORE_USE_RB
+		update_first_free_rb(VMEL_map, VMEL_map->first_free);
+#endif
+	}
+}
+
+void
+_vm_map_store_entry_unlink( struct vm_map_header * mapHdr, vm_map_entry_t entry)
+{
+	vm_map_store_entry_unlink_ll(mapHdr, entry);
+#ifdef VM_MAP_STORE_USE_RB
+	vm_map_store_entry_unlink_rb(mapHdr, entry);
+#endif
+}
+
+void
+vm_map_store_entry_unlink( vm_map_t map, vm_map_entry_t entry)
+{
+	vm_map_t VMEU_map;
+	vm_map_entry_t VMEU_entry;
+	vm_map_entry_t VMEU_first_free;
+	VMEU_map = (map);
+	VMEU_entry = (entry);
+	if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start){
+		VMEU_first_free = VMEU_entry->vme_prev;		
+	} else	{
+		VMEU_first_free = VMEU_map->first_free;
+	}
+	
+	_vm_map_store_entry_unlink(&VMEU_map->hdr, VMEU_entry);
+	update_first_free_ll(VMEU_map, VMEU_first_free);
+#ifdef VM_MAP_STORE_USE_RB
+	update_first_free_rb(VMEU_map, VMEU_first_free);
+#endif
+}
+
+void
+vm_map_store_copy_reset( vm_map_copy_t copy,vm_map_entry_t entry)
+{
+	int nentries = copy->cpy_hdr.nentries;
+	vm_map_store_copy_reset_ll(copy, entry, nentries);
+#ifdef VM_MAP_STORE_USE_RB
+	vm_map_store_copy_reset_rb(copy, entry, nentries);
+#endif
+}
+
+void
+vm_map_store_update_first_free( vm_map_t map, vm_map_entry_t first_free)
+{
+	update_first_free_ll(map, first_free);
+#ifdef VM_MAP_STORE_USE_RB
+	update_first_free_rb(map, first_free);
+#endif
+}
diff --git a/osfmk/vm/vm_map_store.h b/osfmk/vm/vm_map_store.h
new file mode 100644
index 000000000..dab7746ed
--- /dev/null
+++ b/osfmk/vm/vm_map_store.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _VM_VM_MAP_STORE_H
+#define _VM_VM_MAP_STORE_H
+
+/*
+#ifndef VM_MAP_STORE_USE_LL
+#define VM_MAP_STORE_USE_LL
+#endif
+*/
+#ifndef VM_MAP_STORE_USE_RB
+#define VM_MAP_STORE_USE_RB
+#endif
+
+#include <libkern/tree.h>
+
+struct _vm_map;
+struct vm_map_entry;
+struct vm_map_copy;
+struct vm_map_header;
+
+struct vm_map_store {
+#ifdef VM_MAP_STORE_USE_RB
+	RB_ENTRY(vm_map_store) entry;
+#endif
+};
+
+#ifdef VM_MAP_STORE_USE_RB
+	RB_HEAD( rb_head, vm_map_store );
+#endif
+
+#include <vm/vm_map.h>
+#include <vm/vm_map_store_ll.h>
+#include <vm/vm_map_store_rb.h>
+
+#define UPDATE_HIGHEST_ENTRY_END(map, highest_entry)	 			\
+	MACRO_BEGIN								\
+	struct _vm_map*	UHEE_map; 						\
+	struct vm_map_entry*	UHEE_entry; 						\
+	UHEE_map = (map); 							\
+	UHEE_entry = (highest_entry);	 					\
+	if( UHEE_map->highest_entry_end < UHEE_entry->vme_end) { 		\
+		UHEE_map->highest_entry_end = UHEE_entry->vme_end;		\
+	}								\
+	MACRO_END
+
+#define	VM_MAP_HIGHEST_ENTRY(map, entry, start)					\
+	MACRO_BEGIN								\
+	struct _vm_map* VMHE_map;							\
+	struct vm_map_entry*	tmp_entry;						\
+	vm_map_offset_t VMHE_start;						\
+	VMHE_map = (map);							\
+	VMHE_start= VMHE_map->highest_entry_end + PAGE_SIZE_64;			\
+	while(vm_map_lookup_entry(VMHE_map, VMHE_start, &tmp_entry)){		\
+		VMHE_map->highest_entry_end = tmp_entry->vme_end;		\
+		VMHE_start = VMHE_map->highest_entry_end + PAGE_SIZE_64;	\
+	}									\
+	entry = tmp_entry;							\
+	start = VMHE_start;							\
+	MACRO_END
+
+/*
+ *	SAVE_HINT_MAP_READ:
+ *
+ *	Saves the specified entry as the hint for
+ *	future lookups.  only a read lock is held on map,
+ * 	so make sure the store is atomic... OSCompareAndSwap
+ *	guarantees this... also, we don't care if we collide
+ *	and someone else wins and stores their 'hint'
+ */
+#define	SAVE_HINT_MAP_READ(map,value) \
+	MACRO_BEGIN							\
+	OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
+	MACRO_END
+
+
+/*
+ *	SAVE_HINT_MAP_WRITE:
+ *
+ *	Saves the specified entry as the hint for
+ *	future lookups.  write lock held on map,
+ * 	so no one else can be writing or looking
+ * 	until the lock is dropped, so it's safe
+ * 	to just do an assignment
+ */
+#define	SAVE_HINT_MAP_WRITE(map,value) \
+	MACRO_BEGIN		       \
+	(map)->hint = (value);	       \
+	MACRO_END
+
+#define VM_MAP_ENTRY_CREATE	1
+#define VM_MAP_ENTRY_DELETE	2
+
+void vm_map_store_init( struct vm_map_header*  );
+boolean_t vm_map_store_lookup_entry( struct _vm_map*, vm_map_offset_t, struct vm_map_entry**);
+void	vm_map_store_update( struct _vm_map*, struct vm_map_entry*, int);
+void 	_vm_map_store_entry_link( struct vm_map_header *, struct vm_map_entry*, struct vm_map_entry*);
+void 	vm_map_store_entry_link( struct _vm_map*, struct vm_map_entry*, struct vm_map_entry*);
+void	_vm_map_store_entry_unlink( struct vm_map_header *, struct vm_map_entry*);
+void	vm_map_store_entry_unlink( struct _vm_map*, struct vm_map_entry*);
+void	vm_map_store_update_first_free( struct _vm_map*, struct vm_map_entry*);
+void	vm_map_store_copy_insert( struct _vm_map*, struct vm_map_entry*, struct vm_map_copy*);
+void	vm_map_store_copy_reset( struct vm_map_copy*, struct vm_map_entry*);
+#if MACH_ASSERT
+boolean_t first_free_is_valid_store( struct _vm_map*);
+#endif
+
+#endif /* _VM_VM_MAP_STORE_H */
+
diff --git a/osfmk/vm/vm_map_store_ll.c b/osfmk/vm/vm_map_store_ll.c
new file mode 100644
index 000000000..16959bdb6
--- /dev/null
+++ b/osfmk/vm/vm_map_store_ll.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <vm/vm_map_store_ll.h>
+
+boolean_t
+first_free_is_valid_ll( vm_map_t map )
+{
+	vm_map_entry_t	entry, next;
+	entry = vm_map_to_entry(map);
+	next = entry->vme_next;
+	while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
+	       (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
+		next != vm_map_to_entry(map))) {
+		entry = next;
+		next = entry->vme_next;
+		if (entry == vm_map_to_entry(map))
+			break;
+	}
+	if (map->first_free != entry) {
+		printf("Bad first_free for map %p: %p should be %p\n",
+		       map, map->first_free, entry);
+		return FALSE;
+	}
+	return TRUE;
+}
+
+/*
+ *	UPDATE_FIRST_FREE:
+ *
+ *	Updates the map->first_free pointer to the
+ *	entry immediately before the first hole in the map.
+ * 	The map should be locked.
+ */
+#define UPDATE_FIRST_FREE_LL(map, new_first_free) 				\
+	MACRO_BEGIN							\
+	if( map->disable_vmentry_reuse == FALSE){		\
+		vm_map_t	UFF_map; 					\
+		vm_map_entry_t	UFF_first_free; 				\
+		vm_map_entry_t	UFF_next_entry; 				\
+		UFF_map = (map); 						\
+		UFF_first_free = (new_first_free);				\
+		UFF_next_entry = UFF_first_free->vme_next; 			\
+		while (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
+		       vm_map_trunc_page(UFF_first_free->vme_end) || 			\
+		       (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
+			vm_map_trunc_page(UFF_first_free->vme_start) &&		\
+			UFF_next_entry != vm_map_to_entry(UFF_map))) { 		\
+			UFF_first_free = UFF_next_entry; 			\
+			UFF_next_entry = UFF_first_free->vme_next; 		\
+			if (UFF_first_free == vm_map_to_entry(UFF_map)) 	\
+				break; 						\
+		} 								\
+		UFF_map->first_free = UFF_first_free; 				\
+		assert(first_free_is_valid(UFF_map));				\
+	}									\
+	MACRO_END
+
+#define _vm_map_entry_link_ll(hdr, after_where, entry)			\
+	MACRO_BEGIN							\
+	(hdr)->nentries++;						\
+	(entry)->vme_prev = (after_where);				\
+	(entry)->vme_next = (after_where)->vme_next;			\
+	(entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
+	MACRO_END
+
+#define _vm_map_entry_unlink_ll(hdr, entry)				\
+	MACRO_BEGIN							\
+	(hdr)->nentries--;						\
+	(entry)->vme_next->vme_prev = (entry)->vme_prev; 		\
+	(entry)->vme_prev->vme_next = (entry)->vme_next; 		\
+	MACRO_END
+/*
+ *	Macro:		vm_map_copy_insert
+ *	
+ *	Description:
+ *		Link a copy chain ("copy") into a map at the
+ *		specified location (after "where").
+ *	Side effects:
+ *		The copy chain is destroyed.
+ *	Warning:
+ *		The arguments are evaluated multiple times.
+ */
+#define	_vm_map_copy_insert_ll(map, where, copy)				\
+MACRO_BEGIN								\
+	vm_map_t VMCI_map;						\
+	vm_map_entry_t VMCI_where;					\
+	vm_map_copy_t VMCI_copy;					\
+	VMCI_map = (map);						\
+	VMCI_where = (where);						\
+	VMCI_copy = (copy);						\
+	((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
+		->vme_next = (VMCI_where->vme_next);			\
+	((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy))	\
+		->vme_prev = VMCI_where;				\
+	VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries;		\
+	update_first_free_ll(VMCI_map, VMCI_map->first_free);		\
+MACRO_END
+
+
+
+void
+vm_map_store_init_ll( __unused struct vm_map_header *hdr)
+{
+	return;
+}
+
+/*
+ *	vm_map_lookup_entry_ll:	[ internal use only ]
+ *	Use the linked list to find the map entry containing (or
+ *	immediately preceding) the specified address
+ *	in the given map; the entry is returned
+ *	in the "entry" parameter.  The boolean
+ *	result indicates whether the address is
+ *	actually contained in the map.
+ */
+boolean_t
+vm_map_store_lookup_entry_ll(
+	register vm_map_t		map,
+	register vm_map_offset_t	address,
+	vm_map_entry_t		*entry)		/* OUT */
+{
+	register vm_map_entry_t		cur;
+	register vm_map_entry_t		last;
+
+	/*
+	 *	Start looking either from the head of the
+	 *	list, or from the hint.
+	 */
+	cur = map->hint;
+
+	if (cur == vm_map_to_entry(map))
+		cur = cur->vme_next;
+
+	if (address >= cur->vme_start) {
+		/*
+		 *	Go from hint to end of list.
+		 *
+		 *	But first, make a quick check to see if
+		 *	we are already looking at the entry we
+		 *	want (which is usually the case).
+		 *	Note also that we don't need to save the hint
+		 *	here... it is the same hint (unless we are
+		 *	at the header, in which case the hint didn't
+		 *	buy us anything anyway).
+		 */
+		last = vm_map_to_entry(map);
+		if ((cur != last) && (cur->vme_end > address)) {
+			*entry = cur;
+			return(TRUE);
+		}
+	}
+	else {
+		/*
+		 *	Go from start to hint, *inclusively*
+		 */
+		last = cur->vme_next;
+		cur = vm_map_first_entry(map);
+	}
+
+	/*
+	 *	Search linearly
+	 */
+
+	while (cur != last) {
+		if (cur->vme_end > address) {
+			if (address >= cur->vme_start) {
+				/*
+				 *	Save this lookup for future
+				 *	hints, and return
+				 */
+
+				*entry = cur;
+				SAVE_HINT_MAP_READ(map, cur);
+
+				return(TRUE);
+			}
+			break;
+		}
+		cur = cur->vme_next;
+	}
+	*entry = cur->vme_prev;
+	SAVE_HINT_MAP_READ(map, *entry);
+
+	return(FALSE);
+}
+
+void
+vm_map_store_entry_link_ll( struct vm_map_header *mapHdr, vm_map_entry_t after_where, vm_map_entry_t entry)
+{
+	_vm_map_entry_link_ll( mapHdr, after_where, entry);
+}
+
+void
+vm_map_store_entry_unlink_ll( struct vm_map_header *mapHdr, vm_map_entry_t entry)
+{
+	_vm_map_entry_unlink_ll( mapHdr, entry);
+}
+
+void
+vm_map_store_copy_insert_ll( vm_map_t map, vm_map_entry_t after_where, vm_map_copy_t copy)
+{
+	_vm_map_copy_insert_ll( map, after_where, copy);
+}
+
+void
+vm_map_store_copy_reset_ll( vm_map_copy_t copy, __unused vm_map_entry_t entry, __unused int nentries)
+{
+	copy->cpy_hdr.nentries = 0;
+	vm_map_copy_first_entry(copy) =
+		vm_map_copy_last_entry(copy) =
+			vm_map_copy_to_entry(copy);
+
+}
+
+void
+update_first_free_ll( vm_map_t map, vm_map_entry_t new_first_free)
+{
+	UPDATE_FIRST_FREE_LL( map, new_first_free);
+}
+
diff --git a/bsd/dev/ppc/machdep.c b/osfmk/vm/vm_map_store_ll.h
similarity index 61%
rename from bsd/dev/ppc/machdep.c
rename to osfmk/vm/vm_map_store_ll.h
index bf9f5beff..0bbe00d48 100644
--- a/bsd/dev/ppc/machdep.c
+++ b/osfmk/vm/vm_map_store_ll.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -25,46 +25,20 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
-/*
- * Copyright (c) 1997 Apple Computer, Inc.  All rights reserved.
- * Copyright (c) 1992 NeXT Computer, Inc.  All rights reserved.
- *
- *
- * Machine dependent cruft.
- *
- * 27-Apr-1997  A.Ramesh at Apple 
- *
- *
- */
-
-#include <mach/mach_types.h>
-#include <mach/machine.h>
-#include <sys/reboot.h>
-
-int reboot_how;
-extern struct tty	cons;
-extern struct tty	*constty;		/* current console device */
-
-extern int getchar(void);
-extern int cngetc(void);
-extern void cnputc(char);
 
-#define putchar cnputc
+#ifndef _VM_VM_MAP_STORE_LL_H
+#define _VM_VM_MAP_STORE_LL_H
 
-int
-getchar(void)
-{
-	int c;
+#include <vm/vm_map_store.h>
 
-	c = cngetc();
-#if 0
-	if (c == 0x1b)		/* ESC ? */
-		call_kdp();
-#endif
+boolean_t first_free_is_valid_ll( struct _vm_map*);
 
-	if (c == '\r')
-		c = '\n';
-        cnputc(c);
-	return c;
-}
+void vm_map_store_init_ll( struct vm_map_header*  );
+boolean_t vm_map_store_lookup_entry_ll( struct _vm_map*, vm_map_offset_t, struct vm_map_entry**);
+void 	vm_map_store_entry_link_ll( struct vm_map_header*, struct vm_map_entry*, struct vm_map_entry*);
+void	vm_map_store_entry_unlink_ll( struct vm_map_header*, struct vm_map_entry*);
+void	update_first_free_ll(struct _vm_map*, struct vm_map_entry*);
+void    vm_map_store_copy_insert_ll( struct _vm_map*, struct vm_map_entry*, struct vm_map_copy*);
+void    vm_map_store_copy_reset_ll( struct vm_map_copy*, struct vm_map_entry*, int);
 
+#endif /* _VM_VM_MAP_STORE_LL_H */
diff --git a/osfmk/vm/vm_map_store_rb.c b/osfmk/vm/vm_map_store_rb.c
new file mode 100644
index 000000000..2e103b0a2
--- /dev/null
+++ b/osfmk/vm/vm_map_store_rb.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <vm/vm_map_store_rb.h>
+
+RB_GENERATE(rb_head, vm_map_store, entry, rb_node_compare);
+
+#define VME_FOR_STORE( store)	\
+	(vm_map_entry_t)(((unsigned long)store) - ((unsigned long)sizeof(struct vm_map_links)))
+
+void
+vm_map_store_init_rb( struct vm_map_header* hdr )
+{
+	RB_INIT(&(hdr->rb_head_store));
+}
+
+int rb_node_compare(struct vm_map_store *node, struct vm_map_store *parent)
+{
+	vm_map_entry_t vme_c;
+	vm_map_entry_t vme_p;
+
+	vme_c = VME_FOR_STORE(node);
+	vme_p =  VME_FOR_STORE(parent);
+	if (vme_c->vme_start < vme_p->vme_start)
+		return -1;
+	if (vme_c->vme_start >= vme_p->vme_end)
+		return 1;
+	return 0;
+}
+
+void vm_map_store_walk_rb( vm_map_t map, vm_map_entry_t *wrong_vme, vm_map_entry_t *vm_entry)
+{
+	struct vm_map_header hdr = map->hdr;
+	struct vm_map_store *rb_entry = RB_ROOT(&(hdr.rb_head_store));
+	vm_map_entry_t cur = *vm_entry;
+
+	rb_entry = RB_FIND( rb_head, &(hdr.rb_head_store), &(cur->store));	
+	if(rb_entry == NULL)
+		panic("NO SUCH ENTRY %p. Gave back %p", *vm_entry, *wrong_vme);
+	else
+		panic("Cur: %p, L: %p, R: %p",  VME_FOR_STORE(rb_entry),  VME_FOR_STORE(RB_LEFT(rb_entry,entry)),  VME_FOR_STORE(RB_RIGHT(rb_entry,entry)));
+}
+
+
+boolean_t vm_map_store_lookup_entry_rb( vm_map_t map, vm_map_offset_t address, vm_map_entry_t *vm_entry)
+{
+	struct vm_map_header hdr = map->hdr;
+	struct vm_map_store *rb_entry = RB_ROOT(&(hdr.rb_head_store));
+	vm_map_entry_t cur = vm_map_to_entry(map);
+	vm_map_entry_t prev = VM_MAP_ENTRY_NULL;
+
+	while (rb_entry != (struct vm_map_store*)NULL) {
+       		cur =  VME_FOR_STORE(rb_entry);
+		if(cur == VM_MAP_ENTRY_NULL)
+			panic("no entry");
+		if (address >= cur->vme_start) {
+			if (address < cur->vme_end) {
+				*vm_entry = cur;
+				return TRUE;
+			}
+			rb_entry = RB_RIGHT(rb_entry, entry);
+			prev = cur;
+		} else {
+			rb_entry = RB_LEFT(rb_entry, entry);
+		}
+	}
+	if( prev == VM_MAP_ENTRY_NULL){
+		prev = vm_map_to_entry(map);
+	}
+	*vm_entry = prev;
+	return FALSE;
+}
+
+void 	vm_map_store_entry_link_rb( struct vm_map_header *mapHdr, __unused vm_map_entry_t after_where, vm_map_entry_t entry)
+{
+	struct rb_head *rbh = &(mapHdr->rb_head_store);
+	struct vm_map_store *store = &(entry->store);
+	struct vm_map_store *tmp_store;
+	if((tmp_store = RB_INSERT( rb_head, rbh, store )) != NULL) {
+		panic("VMSEL: INSERT FAILED: 0x%lx, 0x%lx, 0x%lx, 0x%lx", (uintptr_t)entry->vme_start, (uintptr_t)entry->vme_end,
+				(uintptr_t)(VME_FOR_STORE(tmp_store))->vme_start,  (uintptr_t)(VME_FOR_STORE(tmp_store))->vme_end);
+	}
+}
+
+void	vm_map_store_entry_unlink_rb( struct vm_map_header *mapHdr, vm_map_entry_t entry)
+{
+	struct rb_head *rbh = &(mapHdr->rb_head_store);
+	struct vm_map_store *rb_entry;
+	struct vm_map_store *store = &(entry->store);
+	
+	rb_entry = RB_FIND( rb_head, rbh, store);	
+	if(rb_entry == NULL)
+		panic("NO ENTRY TO DELETE");
+	RB_REMOVE( rb_head, rbh, store );
+}
+
+void	vm_map_store_copy_insert_rb( vm_map_t map, __unused vm_map_entry_t after_where, vm_map_copy_t copy)
+{
+	struct vm_map_header *mapHdr = &(map->hdr);
+	struct rb_head *rbh = &(mapHdr->rb_head_store);
+	struct vm_map_store *store;
+	vm_map_entry_t entry = vm_map_copy_first_entry(copy);
+	int inserted=0, nentries = copy->cpy_hdr.nentries;
+		
+	while (entry != vm_map_copy_to_entry(copy) && nentries > 0) {		
+		vm_map_entry_t prev = entry;
+		store = &(entry->store);
+		if( RB_INSERT( rb_head, rbh, store ) != NULL){
+			panic("VMSCIR1: INSERT FAILED: %d: %p, %p, %p, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx",inserted, prev, entry, vm_map_copy_to_entry(copy), 
+					(uintptr_t)prev->vme_start,  (uintptr_t)prev->vme_end,  (uintptr_t)entry->vme_start,  (uintptr_t)entry->vme_end,  
+					 (uintptr_t)(VME_FOR_STORE(rbh->rbh_root))->vme_start,  (uintptr_t)(VME_FOR_STORE(rbh->rbh_root))->vme_end);
+		} else {
+			entry = entry->vme_next;
+			inserted++;
+			nentries--;
+		}
+	}
+}
+
+void
+vm_map_store_copy_reset_rb( vm_map_copy_t copy, vm_map_entry_t entry, int nentries )
+{
+	struct vm_map_header *mapHdr = &(copy->cpy_hdr);
+	struct rb_head *rbh = &(mapHdr->rb_head_store);
+	struct vm_map_store *store;
+	int deleted=0;
+		
+	while (entry != vm_map_copy_to_entry(copy) && nentries > 0) {		
+		store = &(entry->store);
+		RB_REMOVE( rb_head, rbh, store );
+		entry = entry->vme_next;
+		deleted++;
+		nentries--;
+	}
+}
+
+void	update_first_free_rb( __unused vm_map_t map, __unused vm_map_entry_t entry)
+{
+	return ;
+}
+
diff --git a/osfmk/vm/vm_map_store_rb.h b/osfmk/vm/vm_map_store_rb.h
new file mode 100644
index 000000000..da6794929
--- /dev/null
+++ b/osfmk/vm/vm_map_store_rb.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#ifndef _VM_VM_MAP_STORE_H_RB
+#define _VM_VM_MAP_STORE_H_RB
+
+#include <vm/vm_map_store.h>
+
+RB_PROTOTYPE_SC(__private_extern__, rb_head, vm_map_store, entry, rb_node_compare);
+
+void vm_map_store_init_rb( struct vm_map_header*  );
+int rb_node_compare(struct vm_map_store *, struct vm_map_store *);
+void vm_map_store_walk_rb( struct _vm_map*, struct vm_map_entry**, struct vm_map_entry**);
+boolean_t vm_map_store_lookup_entry_rb( struct _vm_map*, vm_map_offset_t, struct vm_map_entry**);
+void 	vm_map_store_entry_link_rb( struct vm_map_header*, struct vm_map_entry*, struct vm_map_entry*);
+void	vm_map_store_entry_unlink_rb( struct vm_map_header*, struct vm_map_entry*);
+void	vm_map_store_copy_insert_rb( struct _vm_map*, struct vm_map_entry*, struct vm_map_copy*);
+void	vm_map_store_copy_reset_rb( struct vm_map_copy*, struct vm_map_entry*, int);
+void	update_first_free_rb(struct _vm_map*, struct vm_map_entry*);
+
+#endif /* _VM_VM_MAP_STORE_RB_H */
diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c
index c71627a4f..1c0138d82 100644
--- a/osfmk/vm/vm_object.c
+++ b/osfmk/vm/vm_object.c
@@ -80,6 +80,7 @@
 #include <kern/lock.h>
 #include <kern/queue.h>
 #include <kern/xpr.h>
+#include <kern/kalloc.h>
 #include <kern/zalloc.h>
 #include <kern/host.h>
 #include <kern/host_statistics.h>
@@ -261,28 +262,40 @@ unsigned int vm_page_purged_others = 0;
 static vm_object_t	vm_object_cache_trim(
 				boolean_t called_from_vm_object_deallocate);
 
-static queue_head_t	vm_object_cached_list;
-static int		vm_object_cached_count=0;
+static void		vm_object_deactivate_all_pages(
+				vm_object_t	object);
+
 static int		vm_object_cached_high;	/* highest # cached objects */
 static int		vm_object_cached_max = 512;	/* may be patched*/
 
-static lck_mtx_t	vm_object_cached_lock_data;
-static lck_mtx_ext_t	vm_object_cached_lock_data_ext;
-
 #define vm_object_cache_lock()		\
 		lck_mtx_lock(&vm_object_cached_lock_data)
 #define vm_object_cache_lock_try()		\
 		lck_mtx_try_lock(&vm_object_cached_lock_data)
+
+#endif	/* VM_OBJECT_CACHE */
+
+static queue_head_t	vm_object_cached_list;
+static uint32_t		vm_object_cache_pages_freed = 0;
+static uint32_t		vm_object_cache_pages_moved = 0;
+static uint32_t		vm_object_cache_pages_skipped = 0;
+static uint32_t		vm_object_cache_adds = 0;
+static uint32_t		vm_object_cached_count = 0;
+static lck_mtx_t	vm_object_cached_lock_data;
+static lck_mtx_ext_t	vm_object_cached_lock_data_ext;
+
+static uint32_t		vm_object_page_grab_failed = 0;
+static uint32_t		vm_object_page_grab_skipped = 0;
+static uint32_t		vm_object_page_grab_returned = 0;
+static uint32_t		vm_object_page_grab_pmapped = 0;
+static uint32_t		vm_object_page_grab_reactivations = 0;
+
 #define vm_object_cache_lock_spin()		\
 		lck_mtx_lock_spin(&vm_object_cached_lock_data)
 #define vm_object_cache_unlock()	\
 		lck_mtx_unlock(&vm_object_cached_lock_data)
 
-#endif	/* VM_OBJECT_CACHE */
-
-
-static void		vm_object_deactivate_all_pages(
-				vm_object_t	object);
+static void	vm_object_cache_remove_locked(vm_object_t);
 
 
 #define	VM_OBJECT_HASH_COUNT		1024
@@ -333,6 +346,10 @@ unsigned int vm_object_reap_count_async = 0;
 #define vm_object_reaper_unlock()	\
 		lck_mtx_unlock(&vm_object_reaper_lock_data)
 
+#if 0
+#undef KERNEL_DEBUG
+#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
+#endif
 
 
 static lck_mtx_t *
@@ -446,7 +463,7 @@ _vm_object_allocate(
 	queue_init(&object->uplq);
 #endif /* UPL_DEBUG */
 	vm_object_lock_init(object);
-	object->size = size;
+	object->vo_size = size;
 }
 
 __private_extern__ vm_object_t
@@ -467,7 +484,8 @@ vm_object_allocate(
 
 
 lck_grp_t		vm_object_lck_grp;
-lck_grp_attr_t	vm_object_lck_grp_attr;
+lck_grp_t		vm_object_cache_lck_grp;
+lck_grp_attr_t		vm_object_lck_grp_attr;
 lck_attr_t		vm_object_lck_attr;
 lck_attr_t		kernel_object_lck_attr;
 
@@ -485,18 +503,18 @@ vm_object_bootstrap(void)
 				round_page(512*1024),
 				round_page(12*1024),
 				"vm objects");
+	zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 	zone_change(vm_object_zone, Z_NOENCRYPT, TRUE);
 
 	vm_object_init_lck_grp();
 
-#if VM_OBJECT_CACHE
 	queue_init(&vm_object_cached_list);
 
 	lck_mtx_init_ext(&vm_object_cached_lock_data,
 		&vm_object_cached_lock_data_ext,
-		&vm_object_lck_grp,
+		&vm_object_cache_lck_grp,
 		&vm_object_lck_attr);
-#endif
+
 	queue_init(&vm_object_reaper_queue);
 
 	for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) {
@@ -515,6 +533,7 @@ vm_object_bootstrap(void)
 			      round_page(512*1024),
 			      round_page(12*1024),
 			      "vm object hash entries");
+	zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE);
 	zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE);
 
 	for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
@@ -538,7 +557,7 @@ vm_object_bootstrap(void)
 	 */
 	vm_object_lock_init(&vm_object_template);
 #endif
-	vm_object_template.size = 0;
+	vm_object_template.vo_size = 0;
 	vm_object_template.memq_hint = VM_PAGE_NULL;
 	vm_object_template.ref_count = 1;
 #if	TASK_SWAPPER
@@ -549,7 +568,7 @@ vm_object_bootstrap(void)
 	vm_object_template.reusable_page_count = 0;
 	vm_object_template.copy = VM_OBJECT_NULL;
 	vm_object_template.shadow = VM_OBJECT_NULL;
-	vm_object_template.shadow_offset = (vm_object_offset_t) 0;
+	vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0;
 	vm_object_template.pager = MEMORY_OBJECT_NULL;
 	vm_object_template.paging_offset = 0;
 	vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
@@ -590,6 +609,7 @@ vm_object_bootstrap(void)
 	vm_object_template.sequential = (vm_object_offset_t) 0;
 	vm_object_template.pages_created = 0;
 	vm_object_template.pages_used = 0;
+	vm_object_template.scan_collisions = 0;
 
 #if	MACH_PAGEMAP
 	vm_object_template.existence_map = VM_EXTERNAL_NULL;
@@ -600,7 +620,8 @@ vm_object_bootstrap(void)
 #endif	/* MACH_ASSERT */
 
 	/* cache bitfields */
-	vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
+	vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT;
+	vm_object_template.set_cache_attr = FALSE;
 	vm_object_template.code_signed = FALSE;
 	vm_object_template.hashed = FALSE;
 	vm_object_template.transposed = FALSE;
@@ -622,6 +643,7 @@ vm_object_bootstrap(void)
 	vm_object_template.objq.next=NULL;
 	vm_object_template.objq.prev=NULL;
 
+	vm_object_template.vo_cache_ts = 0;
 	
 	/*
 	 *	Initialize the "kernel object"
@@ -704,6 +726,7 @@ vm_object_init_lck_grp(void)
 	 */
 	lck_grp_attr_setdefault(&vm_object_lck_grp_attr);
 	lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr);
+	lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr);
 	lck_attr_setdefault(&vm_object_lck_attr);
 	lck_attr_setdefault(&kernel_object_lck_attr);
 	lck_attr_cleardebug(&kernel_object_lck_attr);
@@ -838,17 +861,6 @@ vm_object_deallocate(
 				vm_object_lock(object);
 				vm_object_mapping_end(object);
 			}
-			/*
-			 * recheck the ref_count since we dropped the object lock
-			 * to call 'memory_object_last_unmap'... it's possible
-			 * additional references got taken and we only want
-			 * to deactivate the pages if this 'named' object will only
-			 * referenced by the backing pager once we drop our reference
-			 * below
-			 */
-			if (!object->terminating && object->ref_count == 2)
-				vm_object_deactivate_all_pages(object);
-
 			assert(object->ref_count > 0);
 		}
 
@@ -1073,6 +1085,360 @@ vm_object_deallocate(
 }
 
 
+
+vm_page_t
+vm_object_page_grab(
+	vm_object_t	object)
+{
+	vm_page_t	p, next_p;
+	int		p_limit = 0;
+	int		p_skipped = 0;
+
+	vm_object_lock_assert_exclusive(object);
+
+	next_p = (vm_page_t)queue_first(&object->memq);
+	p_limit = MIN(50, object->resident_page_count);
+
+	while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) {
+
+		p = next_p;
+		next_p = (vm_page_t)queue_next(&next_p->listq);
+
+		if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->fictitious)
+			goto move_page_in_obj;
+
+		if (p->pmapped || p->dirty || p->precious) {
+			vm_page_lockspin_queues();
+
+			if (p->pmapped) {
+				int refmod_state;
+
+				vm_object_page_grab_pmapped++;
+
+				if (p->reference == FALSE || p->dirty == FALSE) {
+
+					refmod_state = pmap_get_refmod(p->phys_page);
+
+					if (refmod_state & VM_MEM_REFERENCED)
+						p->reference = TRUE;
+					if (refmod_state & VM_MEM_MODIFIED)
+						p->dirty = TRUE;
+				}
+				if (p->dirty == FALSE && p->precious == FALSE) {
+
+					refmod_state = pmap_disconnect(p->phys_page);
+
+					if (refmod_state & VM_MEM_REFERENCED)
+						p->reference = TRUE;
+					if (refmod_state & VM_MEM_MODIFIED)
+						p->dirty = TRUE;
+
+					if (p->dirty == FALSE)
+						goto take_page;
+				}
+			}
+			if (p->inactive && p->reference == TRUE) {
+				vm_page_activate(p);
+
+				VM_STAT_INCR(reactivations);
+				vm_object_page_grab_reactivations++;
+			}
+			vm_page_unlock_queues();
+move_page_in_obj:
+			queue_remove(&object->memq, p, vm_page_t, listq);
+			queue_enter(&object->memq, p, vm_page_t, listq);
+
+			p_skipped++;
+			continue;
+		}
+		vm_page_lockspin_queues();
+take_page:
+		vm_page_free_prepare_queues(p);
+		vm_object_page_grab_returned++;
+		vm_object_page_grab_skipped += p_skipped;
+
+		vm_page_unlock_queues();
+
+		vm_page_free_prepare_object(p, TRUE);
+		
+		return (p);
+	}
+	vm_object_page_grab_skipped += p_skipped;
+	vm_object_page_grab_failed++;
+
+	return (NULL);
+}
+
+
+
+#define EVICT_PREPARE_LIMIT	64
+#define EVICT_AGE		10
+
+static	clock_sec_t	vm_object_cache_aging_ts = 0;
+
+static void
+vm_object_cache_remove_locked(
+	vm_object_t	object)
+{
+	queue_remove(&vm_object_cached_list, object, vm_object_t, objq);
+	object->objq.next = NULL;
+	object->objq.prev = NULL;
+
+	vm_object_cached_count--;
+}
+
+void
+vm_object_cache_remove(
+	vm_object_t	object)
+{
+	vm_object_cache_lock_spin();
+
+	if (object->objq.next || object->objq.prev)
+		vm_object_cache_remove_locked(object);
+
+	vm_object_cache_unlock();
+}
+
+void
+vm_object_cache_add(
+	vm_object_t	object)
+{
+	clock_sec_t sec;
+	clock_nsec_t nsec;
+
+	if (object->resident_page_count == 0)
+		return;
+	clock_get_system_nanotime(&sec, &nsec);
+
+	vm_object_cache_lock_spin();
+
+	if (object->objq.next == NULL && object->objq.prev == NULL) {
+		queue_enter(&vm_object_cached_list, object, vm_object_t, objq);
+		object->vo_cache_ts = sec + EVICT_AGE;
+		object->vo_cache_pages_to_scan = object->resident_page_count;
+
+		vm_object_cached_count++;
+		vm_object_cache_adds++;
+	}
+	vm_object_cache_unlock();
+}
+
+int
+vm_object_cache_evict(
+	int	num_to_evict,
+	int	max_objects_to_examine)
+{
+	vm_object_t	object = VM_OBJECT_NULL;
+	vm_object_t	next_obj = VM_OBJECT_NULL;
+	vm_page_t	local_free_q = VM_PAGE_NULL;
+	vm_page_t	p;
+	vm_page_t	next_p;
+	int		object_cnt = 0;
+	vm_page_t	ep_array[EVICT_PREPARE_LIMIT];
+	int		ep_count;
+	int		ep_limit;
+	int		ep_index;
+	int		ep_freed = 0;
+	int		ep_moved = 0;
+	uint32_t	ep_skipped = 0;
+	clock_sec_t	sec;
+	clock_nsec_t	nsec;
+
+	KERNEL_DEBUG(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
+	/*
+	 * do a couple of quick checks to see if it's 
+	 * worthwhile grabbing the lock
+	 */
+	if (queue_empty(&vm_object_cached_list)) {
+		KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0);
+		return (0);
+	}
+	clock_get_system_nanotime(&sec, &nsec);
+
+	/*
+	 * the object on the head of the queue has not
+	 * yet sufficiently aged
+	 */
+	if (sec < vm_object_cache_aging_ts) {
+		KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0);
+		return (0);
+	}
+	/*
+	 * don't need the queue lock to find 
+	 * and lock an object on the cached list
+	 */
+	vm_page_unlock_queues();
+
+	vm_object_cache_lock_spin();
+
+	for (;;) {
+		next_obj = (vm_object_t)queue_first(&vm_object_cached_list);
+
+		while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) {
+
+			object = next_obj;
+			next_obj = (vm_object_t)queue_next(&next_obj->objq);
+			
+			if (sec < object->vo_cache_ts) {
+				KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0);
+
+				vm_object_cache_aging_ts = object->vo_cache_ts;
+				object = VM_OBJECT_NULL;
+				break;
+			}
+			if (!vm_object_lock_try_scan(object)) {
+				/*
+				 * just skip over this guy for now... if we find
+				 * an object to steal pages from, we'll revist in a bit...
+				 * hopefully, the lock will have cleared
+				 */
+				KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0);
+
+				object = VM_OBJECT_NULL;
+				continue;
+			}
+			if (queue_empty(&object->memq) || object->vo_cache_pages_to_scan == 0) {
+				/*
+				 * this case really shouldn't happen, but it's not fatal
+				 * so deal with it... if we don't remove the object from
+				 * the list, we'll never move past it.
+				 */
+				KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0);
+				
+				vm_object_cache_remove_locked(object);
+				vm_object_unlock(object);
+				object = VM_OBJECT_NULL;
+				continue;
+			}
+			/*
+			 * we have a locked object with pages...
+			 * time to start harvesting
+			 */
+			break;
+		}
+		vm_object_cache_unlock();
+
+		if (object == VM_OBJECT_NULL)
+			break;
+
+		/*
+		 * object is locked at this point and
+		 * has resident pages
+		 */
+		next_p = (vm_page_t)queue_first(&object->memq);
+
+		/*
+		 * break the page scan into 2 pieces to minimize the time spent
+		 * behind the page queue lock...
+		 * the list of pages on these unused objects is likely to be cold
+		 * w/r to the cpu cache which increases the time to scan the list
+		 * tenfold...  and we may have a 'run' of pages we can't utilize that
+		 * needs to be skipped over...
+		 */
+		if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT)
+			ep_limit = EVICT_PREPARE_LIMIT;
+		ep_count = 0;
+
+		while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) {
+
+			p = next_p;
+			next_p = (vm_page_t)queue_next(&next_p->listq);
+
+			object->vo_cache_pages_to_scan--;
+
+			if (VM_PAGE_WIRED(p) || p->busy || p->cleaning) {
+				queue_remove(&object->memq, p, vm_page_t, listq);
+				queue_enter(&object->memq, p, vm_page_t, listq);
+
+				ep_skipped++;
+				continue;
+			}
+			if (p->wpmapped || p->dirty || p->precious) {
+				queue_remove(&object->memq, p, vm_page_t, listq);
+				queue_enter(&object->memq, p, vm_page_t, listq);
+
+				pmap_clear_reference(p->phys_page);
+			}
+			ep_array[ep_count++] = p;
+		}
+		KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0);
+
+		vm_page_lockspin_queues();
+
+		for (ep_index = 0; ep_index < ep_count; ep_index++) {
+
+			p = ep_array[ep_index];
+
+			if (p->wpmapped || p->dirty || p->precious) {
+				p->reference = FALSE;
+				p->no_cache = FALSE;
+
+				VM_PAGE_QUEUES_REMOVE(p);
+				VM_PAGE_ENQUEUE_INACTIVE(p, TRUE);
+
+				ep_moved++;
+			} else {
+				vm_page_free_prepare_queues(p);
+
+				assert(p->pageq.next == NULL && p->pageq.prev == NULL);
+				/*
+				 * Add this page to our list of reclaimed pages,
+				 * to be freed later.
+				 */
+				p->pageq.next = (queue_entry_t) local_free_q;
+				local_free_q = p;
+
+				ep_freed++;
+			}
+		}
+		vm_page_unlock_queues();
+
+		KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0);
+
+		if (local_free_q) {
+			vm_page_free_list(local_free_q, TRUE);
+			local_free_q = VM_PAGE_NULL;
+		}
+		if (object->vo_cache_pages_to_scan == 0) {
+			KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0);
+
+			vm_object_cache_remove(object);
+
+			KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0);
+		}
+		/*
+		 * done with this object
+		 */
+		vm_object_unlock(object);
+		object = VM_OBJECT_NULL;
+
+		/*
+		 * at this point, we are not holding any locks
+		 */
+		if ((ep_freed + ep_moved) >= num_to_evict) {
+			/*
+			 * we've reached our target for the
+			 * number of pages to evict
+			 */
+			break;
+		}
+		vm_object_cache_lock_spin();
+	}
+	/*
+	 * put the page queues lock back to the caller's
+	 * idea of it 
+	 */
+	vm_page_lock_queues();
+
+	vm_object_cache_pages_freed += ep_freed;
+	vm_object_cache_pages_moved += ep_moved;
+	vm_object_cache_pages_skipped += ep_skipped;
+
+	KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, ep_freed, 0, 0, 0, 0);
+	return (ep_freed);
+}
+
+
 #if VM_OBJECT_CACHE
 /*
  *	Check to see whether we really need to trim
@@ -1233,6 +1599,9 @@ vm_object_terminate(
 	object->terminating = TRUE;
 	object->alive = FALSE;
 
+	if ( !object->internal && (object->objq.next || object->objq.prev))
+		vm_object_cache_remove(object);
+
 	if (object->hashed) {
 		lck_mtx_t	*lck;
 
@@ -1344,7 +1713,7 @@ vm_object_reap(
 	/*
 	 * remove from purgeable queue if it's on
 	 */
-	if (object->objq.next || object->objq.prev) {
+	if (object->internal && (object->objq.next || object->objq.prev)) {
 	        purgeable_q_t queue = vm_purgeable_object_remove(object);
 		assert(queue);
 
@@ -1393,7 +1762,7 @@ vm_object_reap(
 	vm_object_unlock(object);
 
 #if	MACH_PAGEMAP
-	vm_external_destroy(object->existence_map, object->size);
+	vm_external_destroy(object->existence_map, object->vo_size);
 #endif	/* MACH_PAGEMAP */
 
 	object->shadow = VM_OBJECT_NULL;
@@ -1407,9 +1776,12 @@ vm_object_reap(
 }
 
 
+unsigned int vm_max_batch = 256;
 
 #define V_O_R_MAX_BATCH 128
 
+#define BATCH_LIMIT(max) 	(vm_max_batch >= max ? max : vm_max_batch)
+
 
 #define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect)		\
 	MACRO_BEGIN							\
@@ -1461,7 +1833,7 @@ vm_object_reap_pages(
 restart_after_sleep:
 	if (queue_empty(&object->memq))
 		return;
-	loop_count = V_O_R_MAX_BATCH + 1;
+	loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH) + 1;
 
 	vm_page_lockspin_queues();
 
@@ -1487,7 +1859,7 @@ restart_after_sleep:
 			} else
 				mutex_pause(0);
 
-			loop_count = V_O_R_MAX_BATCH + 1;
+			loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH) + 1;
 
 			vm_page_lockspin_queues();
 		}
@@ -1505,12 +1877,8 @@ restart_after_sleep:
 				 *
 				 * the laundry and pageout_queue flags are cleared...
 				 */
-#if CONFIG_EMBEDDED
-				if (p->laundry) 
-					vm_pageout_throttle_up(p);
-#else
 				vm_pageout_throttle_up(p);
-#endif
+
 				if (p->pageout == TRUE) {
 					/*
 					 * toss the wire count we picked up
@@ -1624,6 +1992,11 @@ restart_after_sleep:
 				p->busy = TRUE;
 
 				VM_PAGE_QUEUES_REMOVE(p);
+				/*
+				 * flush page... page will be freed
+				 * upon completion of I/O
+				 */
+				vm_pageout_cluster(p);
 
 				vm_page_unlock_queues();
 				/*
@@ -1632,11 +2005,6 @@ restart_after_sleep:
 				VM_OBJ_REAP_FREELIST(local_free_q,
 						     disconnect_on_release);
 
-				/*
-				 * flush page... page will be freed
-				 * upon completion of I/O
-				 */
-				vm_pageout_cluster(p);
 				vm_object_paging_wait(object, THREAD_UNINT);
 
 				goto restart_after_sleep;
@@ -1885,6 +2253,8 @@ vm_object_destroy(
 }
 
 
+#if VM_OBJECT_CACHE
+
 #define VM_OBJ_DEACT_ALL_STATS DEBUG
 #if VM_OBJ_DEACT_ALL_STATS
 uint32_t vm_object_deactivate_all_pages_batches = 0;
@@ -1909,7 +2279,7 @@ vm_object_deactivate_all_pages(
 #endif /* VM_OBJ_DEACT_ALL_STATS */
 #define V_O_D_A_P_MAX_BATCH	256
 
-	loop_count = V_O_D_A_P_MAX_BATCH;
+	loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH);
 #if VM_OBJ_DEACT_ALL_STATS
 	pages_count = 0;
 #endif /* VM_OBJ_DEACT_ALL_STATS */
@@ -1924,7 +2294,7 @@ vm_object_deactivate_all_pages(
 			pages_count = 0;
 #endif /* VM_OBJ_DEACT_ALL_STATS */
 			lck_mtx_yield(&vm_page_queue_lock);
-			loop_count = V_O_D_A_P_MAX_BATCH;
+			loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH);
 		}
 		if (!p->busy && !p->throttled) {
 #if VM_OBJ_DEACT_ALL_STATS
@@ -1943,133 +2313,7 @@ vm_object_deactivate_all_pages(
 #endif /* VM_OBJ_DEACT_ALL_STATS */
 	vm_page_unlock_queues();
 }
-
-
-
-/*
- * when deallocating pages it is necessary to hold 
- * the vm_page_queue_lock (a hot global lock) for certain operations
- * on the page... however, the majority of the work can be done
- * while merely holding the object lock... to mitigate the time spent behind the
- * global lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
- * while doing all of the work that doesn't require the vm_page_queue_lock...
- * them call dw_do_work to acquire the vm_page_queue_lock and do the
- * necessary work for each page... we will grab the busy bit on the page
- * so that dw_do_work can drop the object lock if it can't immediately take the
- * vm_page_queue_lock in order to compete for the locks in the same order that
- * vm_pageout_scan takes them.
- */
-
-#define DELAYED_WORK_LIMIT	32
-
-#define DW_clear_reference	0x01
-#define DW_move_page		0x02
-#define DW_clear_busy		0x04
-#define DW_PAGE_WAKEUP		0x08
-
-
-struct dw {
-	vm_page_t	dw_m;
-	int		dw_mask;
-};
-
-static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
-
-
-static void
-dw_do_work(
-	vm_object_t 	object,
-	struct dw 	*dwp,
-	int		dw_count)
-{
-	vm_page_t	m;
-	int		j;
-
-	/*
-	 * pageout_scan takes the vm_page_lock_queues first
-	 * then tries for the object lock... to avoid what
-	 * is effectively a lock inversion, we'll go to the
-	 * trouble of taking them in that same order... otherwise
-	 * if this object contains the majority of the pages resident
-	 * in the UBC (or a small set of large objects actively being
-	 * worked on contain the majority of the pages), we could
-	 * cause the pageout_scan thread to 'starve' in its attempt
-	 * to find pages to move to the free queue, since it has to
-	 * successfully acquire the object lock of any candidate page
-	 * before it can steal/clean it.
-	 */
-	if (!vm_page_trylockspin_queues()) {
-		vm_object_unlock(object);
-
-		vm_page_lockspin_queues();
-
-		for (j = 0; ; j++) {
-			if (!vm_object_lock_avoid(object) &&
-			    _vm_object_lock_try(object))
-				break;
-			vm_page_unlock_queues();
-			mutex_pause(j);
-			vm_page_lockspin_queues();
-		}
-	}
-	for (j = 0; j < dw_count; j++, dwp++) {
-
-		m = dwp->dw_m;
-
-		if (dwp->dw_mask & DW_clear_reference)
-			m->reference = FALSE;
-
-		if (dwp->dw_mask & DW_move_page) {
-			VM_PAGE_QUEUES_REMOVE(m);
-
-			assert(!m->laundry);
-			assert(m->object != kernel_object);
-			assert(m->pageq.next == NULL &&
-			       m->pageq.prev == NULL);
-					
-			if (m->zero_fill) {
-				queue_enter_first(&vm_page_queue_zf, m, vm_page_t, pageq);
-				vm_zf_queue_count++;
-			} else {
-				queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq);
-			}
-			m->inactive = TRUE;
-
-			if (!m->fictitious) {
-				vm_page_inactive_count++;
-				token_new_pagecount++;
-			} else {
-				assert(m->phys_page == vm_page_fictitious_addr);
-			}
-		}
-		if (dwp->dw_mask & DW_clear_busy)
-			dwp->dw_m->busy = FALSE;
-
-		if (dwp->dw_mask & DW_PAGE_WAKEUP)
-			PAGE_WAKEUP(dwp->dw_m);
-	}
-	vm_page_unlock_queues();
-
-#if CONFIG_EMBEDDED
-	{
-	int percent_avail;
-
-	/*
-	 * Decide if we need to send a memory status notification.
-	 */
-	percent_avail = 
-		(vm_page_active_count + vm_page_inactive_count + 
-		 vm_page_speculative_count + vm_page_free_count +
-		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
-		atop_64(max_mem);
-	if (percent_avail >= (kern_memorystatus_level + 5) || 
-	    percent_avail <= (kern_memorystatus_level - 5)) {
-		kern_memorystatus_level = percent_avail;
-		thread_wakeup((event_t)&kern_memorystatus_wakeup);
-	}
-	}
-#endif
-}
+#endif	/* VM_OBJECT_CACHE */
 
 
 
@@ -2121,6 +2365,7 @@ typedef uint64_t	chunk_state_t;
 		MARK_PAGE_HANDLED(c, p);				\
 	MACRO_END
 
+
 /*
  * Return true if all pages in the chunk have not yet been processed.
  */
@@ -2211,6 +2456,7 @@ page_is_paged_out(
 }
 
 
+
 /*
  * Deactivate the pages in the specified object and range.  If kill_page is set, also discard any
  * page modified state from the pmap.  Update the chunk_state as we go along.  The caller must specify
@@ -2232,9 +2478,10 @@ deactivate_pages_in_object(
 {
 	vm_page_t	m;
 	int		p;
-	struct	dw	dw_array[DELAYED_WORK_LIMIT];
-	struct	dw	*dwp;
+	struct vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct vm_page_delayed_work	*dwp;
 	int		dw_count;
+	int		dw_limit;
 	unsigned int	reusable = 0;
 
 
@@ -2247,6 +2494,7 @@ deactivate_pages_in_object(
 
 	dwp = &dw_array[0];
 	dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
 	for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) {
 
@@ -2307,39 +2555,23 @@ deactivate_pages_in_object(
 						object->reusable_page_count++;
 						assert(object->resident_page_count >= object->reusable_page_count);
 						reusable++;
-#if CONFIG_EMBEDDED
-					} else {
-						if (m->reusable) {
-							m->reusable = FALSE;
-							object->reusable_page_count--;
-						}
-#endif
 					}
 				}
 				pmap_clear_refmod(m->phys_page, clear_refmod);
 
 				if (!m->throttled && !(reusable_page || all_reusable))
 					dwp->dw_mask |= DW_move_page;
-				/*
-				 * dw_do_work may need to drop the object lock
-				 * if it does, we need the pages its looking at to
-				 * be held stable via the busy bit.
-				 */
-				m->busy = TRUE;
-				dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
-
-				dwp->dw_m = m;
-				dwp++;
-				dw_count++;
+				
+				VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
-				if (dw_count >= DELAYED_WORK_LIMIT) {
+				if (dw_count >= dw_limit) {
 					if (reusable) {
 						OSAddAtomic(reusable,
 							    &vm_page_stats_reusable.reusable_count);
 						vm_page_stats_reusable.reusable += reusable;
 						reusable = 0;
 					}
-					dw_do_work(object, &dw_array[0], dw_count);
+					vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 
 					dwp = &dw_array[0];
 					dw_count = 0;
@@ -2378,7 +2610,7 @@ deactivate_pages_in_object(
 	}
 		
 	if (dw_count)
-		dw_do_work(object, &dw_array[0], dw_count);
+		vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 }
 
 
@@ -2447,7 +2679,7 @@ deactivate_a_chunk(
 			kill_page = FALSE;
 			reusable_page = FALSE;
 			all_reusable = FALSE;
-		        offset += object->shadow_offset;
+		        offset += object->vo_shadow_offset;
 		        vm_object_lock(tmp_object);
 		}
 
@@ -2492,20 +2724,19 @@ vm_object_deactivate_pages(
 
 	all_reusable = FALSE;
 	if (reusable_page &&
-	    object->size != 0 &&
-	    object->size == size &&
+	    object->internal &&
+	    object->vo_size != 0 &&
+	    object->vo_size == size &&
 	    object->reusable_page_count == 0) {
 		all_reusable = TRUE;
 		reusable_page = FALSE;
 	}
 
-#if CONFIG_EMBEDDED
 	if ((reusable_page || all_reusable) && object->all_reusable) {
 		/* This means MADV_FREE_REUSABLE has been called twice, which 
 		 * is probably illegal. */
 		return;
 	}
-#endif
 
 	while (size) {
 		length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable);
@@ -2564,7 +2795,7 @@ vm_object_reuse_pages(
 	if (object->all_reusable) {
 		assert(object->reusable_page_count == 0);
 		object->all_reusable = FALSE;
-		if (end_offset - start_offset == object->size ||
+		if (end_offset - start_offset == object->vo_size ||
 		    !allow_partial_reuse) {
 			vm_page_stats_reusable.all_reuse_calls++;
 			reused = object->resident_page_count;
@@ -2662,10 +2893,10 @@ vm_object_pmap_protect(
 		} else {
 			vm_object_offset_t phys_start, phys_end, phys_addr;
 
-			phys_start = object->shadow_offset + offset;
+			phys_start = object->vo_shadow_offset + offset;
 			phys_end = phys_start + size;
 			assert(phys_start <= phys_end);
-			assert(phys_end <= object->shadow_offset + object->size);
+			assert(phys_end <= object->vo_shadow_offset + object->vo_size);
 			vm_object_unlock(object);
 
 			for (phys_addr = phys_start;
@@ -2754,7 +2985,7 @@ vm_object_pmap_protect(
 
 		next_object = object->shadow;
 		if (next_object != VM_OBJECT_NULL) {
-		    offset += object->shadow_offset;
+		    offset += object->vo_shadow_offset;
 		    vm_object_lock(next_object);
 		    vm_object_unlock(object);
 		    object = next_object;
@@ -2858,6 +3089,8 @@ vm_object_copy_slowly(
 	fault_info.hi_offset = src_offset + size;
 	fault_info.no_cache  = FALSE;
 	fault_info.stealth = TRUE;
+	fault_info.io_sync = FALSE;
+	fault_info.cs_bypass = FALSE;
 	fault_info.mark_zf_absent = FALSE;
 
 	for ( ;
@@ -2965,10 +3198,6 @@ vm_object_copy_slowly(
 			case VM_FAULT_RETRY:
 				break;
 
-			case VM_FAULT_FICTITIOUS_SHORTAGE:
-				vm_page_more_fictitious();
-				break;
-
 			case VM_FAULT_MEMORY_SHORTAGE:
 				if (vm_page_wait(interruptible))
 					break;
@@ -3198,8 +3427,8 @@ Retry:
 		vm_object_lock(src_object);
 		goto Retry;
 	}
-	if (copy->size < src_offset+size)
-		copy->size = src_offset+size;
+	if (copy->vo_size < src_offset+size)
+		copy->vo_size = src_offset+size;
 
 	if (!copy->pager_ready)
 		check_ready = TRUE;
@@ -3365,7 +3594,7 @@ vm_object_copy_delayed(
 			 *	needed).
 			 */
 
-			if (old_copy->size < copy_size) {
+			if (old_copy->vo_size < copy_size) {
 			        if (src_object_shared == TRUE) {
 				        vm_object_unlock(old_copy);
 					vm_object_unlock(src_object);
@@ -3385,7 +3614,7 @@ vm_object_copy_delayed(
 
 				queue_iterate(&src_object->memq, p, vm_page_t, listq) {
 					if (!p->fictitious && 
-					    p->offset >= old_copy->size && 
+					    p->offset >= old_copy->vo_size && 
 					    p->offset < copy_size) {
 						if (VM_PAGE_WIRED(p)) {
 							vm_object_unlock(old_copy);
@@ -3403,7 +3632,7 @@ vm_object_copy_delayed(
 						}
 					}
 				}
-				old_copy->size = copy_size;
+				old_copy->vo_size = copy_size;
 			}
 			if (src_object_shared == TRUE)
 			        vm_object_reference_shared(old_copy);
@@ -3426,8 +3655,8 @@ vm_object_copy_delayed(
 		 * copy object will be large enough to back either the
 		 * old copy object or the new mapping.
 		 */
-		if (old_copy->size > copy_size)
-			copy_size = old_copy->size;
+		if (old_copy->vo_size > copy_size)
+			copy_size = old_copy->vo_size;
 
 		if (new_copy == VM_OBJECT_NULL) {
 			vm_object_unlock(old_copy);
@@ -3439,7 +3668,7 @@ vm_object_copy_delayed(
 			src_object_shared = FALSE;
 			goto Retry;
 		}
-		new_copy->size = copy_size;	
+		new_copy->vo_size = copy_size;	
 
 		/*
 		 *	The copy-object is always made large enough to
@@ -3449,7 +3678,7 @@ vm_object_copy_delayed(
 		 */
 
 		assert((old_copy->shadow == src_object) &&
-		    (old_copy->shadow_offset == (vm_object_offset_t) 0));
+		    (old_copy->vo_shadow_offset == (vm_object_offset_t) 0));
 
 	} else if (new_copy == VM_OBJECT_NULL) {
 		vm_object_unlock(src_object);
@@ -3521,7 +3750,7 @@ vm_object_copy_delayed(
 	 */
 	vm_object_lock_assert_exclusive(new_copy);
 	new_copy->shadow = src_object;
-	new_copy->shadow_offset = 0;
+	new_copy->vo_shadow_offset = 0;
 	new_copy->shadowed = TRUE;	/* caller must set needs_copy */
 
 	vm_object_lock_assert_exclusive(src_object);
@@ -3653,7 +3882,7 @@ vm_object_copy_strategically(
  *	The new object and offset into that object
  *	are returned in the source parameters.
  */
-boolean_t vm_object_shadow_check = FALSE;
+boolean_t vm_object_shadow_check = TRUE;
 
 __private_extern__ boolean_t
 vm_object_shadow(
@@ -3684,11 +3913,19 @@ vm_object_shadow(
 
 	/*
 	 *	Determine if we really need a shadow.
+	 *
+	 *	If the source object is larger than what we are trying
+	 *	to create, then force the shadow creation even if the
+	 *	ref count is 1.  This will allow us to [potentially]
+	 *	collapse the underlying object away in the future
+	 *	(freeing up the extra data it might contain and that
+	 *	we don't need).
 	 */
-
-	if (vm_object_shadow_check && source->ref_count == 1 &&
+	if (vm_object_shadow_check &&
+	    source->vo_size == length &&
+	    source->ref_count == 1 &&
 	    (source->shadow == VM_OBJECT_NULL ||
-	     source->shadow->copy == VM_OBJECT_NULL))
+	     source->shadow->copy == VM_OBJECT_NULL) )
 	{
 		source->shadowed = FALSE;
 		return FALSE;
@@ -3715,7 +3952,7 @@ vm_object_shadow(
 	 *	and fix up the offset into the new object.
 	 */
 
-	result->shadow_offset = *offset;
+	result->vo_shadow_offset = *offset;
 
 	/*
 	 *	Return the new things
@@ -4088,21 +4325,21 @@ vm_object_pager_create(
 	object->paging_offset = 0;
 		
 #if	MACH_PAGEMAP
-	size = object->size;
+	size = object->vo_size;
 #endif	/* MACH_PAGEMAP */
 	vm_object_unlock(object);
 
 #if	MACH_PAGEMAP
 	map = vm_external_create(size);
 	vm_object_lock(object);
-	assert(object->size == size);
+	assert(object->vo_size == size);
 	object->existence_map = map;
 	vm_object_unlock(object);
 #endif	/* MACH_PAGEMAP */
 
-	if ((uint32_t) object->size != object->size) {
+	if ((uint32_t) object->vo_size != object->vo_size) {
 		panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n",
-		      (uint64_t) object->size);
+		      (uint64_t) object->vo_size);
 	}
 
 	/*
@@ -4121,8 +4358,8 @@ vm_object_pager_create(
 		assert(object->temporary);
 
 		/* create our new memory object */
-		assert((vm_size_t) object->size == object->size);
-		(void) memory_object_create(dmm, (vm_size_t) object->size,
+		assert((vm_size_t) object->vo_size == object->vo_size);
+		(void) memory_object_create(dmm, (vm_size_t) object->vo_size,
 					    &pager);
 
 		memory_object_default_deallocate(dmm);
@@ -4140,7 +4377,7 @@ vm_object_pager_create(
 	 *	copied by vm_object_enter().
 	 */
 
-	if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
+	if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object)
 		panic("vm_object_pager_create: mismatch");
 
 	/*
@@ -4222,8 +4459,8 @@ vm_object_do_collapse(
 	vm_object_lock_assert_exclusive(object);
 	vm_object_lock_assert_exclusive(backing_object);
 
-	backing_offset = object->shadow_offset;
-	size = object->size;
+	backing_offset = object->vo_shadow_offset;
+	size = object->vo_size;
 
 	/*
 	 *	Move all in-memory pages from backing_object
@@ -4363,10 +4600,10 @@ vm_object_do_collapse(
 	 *	this code should be fixed to salvage the map.
 	 */
 	assert(object->existence_map == VM_EXTERNAL_NULL);
-	if (backing_offset || (size != backing_object->size)) {
+	if (backing_offset || (size != backing_object->vo_size)) {
 		vm_external_discarded++;
 		vm_external_destroy(backing_object->existence_map,
-			backing_object->size);
+			backing_object->vo_size);
 	}
 	else {
 		vm_external_collapsed++;
@@ -4385,10 +4622,10 @@ vm_object_do_collapse(
 	assert(!backing_object->phys_contiguous);
 	object->shadow = backing_object->shadow;
 	if (object->shadow) {
-		object->shadow_offset += backing_object->shadow_offset;
+		object->vo_shadow_offset += backing_object->vo_shadow_offset;
 	} else {
 		/* no shadow, therefore no shadow offset... */
-		object->shadow_offset = 0;
+		object->vo_shadow_offset = 0;
 	}
 	assert((object->shadow == VM_OBJECT_NULL) ||
 	       (object->shadow->copy != backing_object));
@@ -4456,10 +4693,10 @@ vm_object_do_bypass(
 	assert(!backing_object->phys_contiguous);
 	object->shadow = backing_object->shadow;
 	if (object->shadow) {
-		object->shadow_offset += backing_object->shadow_offset;
+		object->vo_shadow_offset += backing_object->vo_shadow_offset;
 	} else {
 		/* no shadow, therefore no shadow offset... */
-		object->shadow_offset = 0;
+		object->vo_shadow_offset = 0;
 	}
 	
 	/*
@@ -4762,7 +4999,7 @@ retry:
 		 *	we have to make sure no pages in the backing object
 		 *	"show through" before bypassing it.
 		 */
-		size = atop(object->size);
+		size = atop(object->vo_size);
 		rcount = object->resident_page_count;
 		if (rcount != size) {
 			vm_object_offset_t	offset;
@@ -4821,7 +5058,7 @@ retry:
 			 *
 			 */
 
-			backing_offset = object->shadow_offset;
+			backing_offset = object->vo_shadow_offset;
 			backing_rcount = backing_object->resident_page_count;
 
 #if	MACH_PAGEMAP
@@ -4894,7 +5131,7 @@ retry:
 					}
 
 					offset = (p->offset - backing_offset);
-					if (offset < object->size &&
+					if (offset < object->vo_size &&
 					    offset != hint_offset &&
 					    !EXISTS_IN_OBJECT(object, offset, rc)) {
 						/* found a dependency */
@@ -4928,7 +5165,7 @@ retry:
 				offset = hint_offset;
 				
 				while((offset =
-				      (offset + PAGE_SIZE_64 < object->size) ?
+				      (offset + PAGE_SIZE_64 < object->vo_size) ?
 				      (offset + PAGE_SIZE_64) : 0) != hint_offset) {
 
 					/* Until we get more than one lookup lock */
@@ -5148,7 +5385,7 @@ vm_object_coalesce(
 	 *	Extend the object if necessary.
 	 */
 	newsize = prev_offset + prev_size + next_size;
-	if (newsize > prev_object->size) {
+	if (newsize > prev_object->vo_size) {
 #if	MACH_PAGEMAP
 		/*
 		 *	We cannot extend an object that has existence info,
@@ -5161,7 +5398,7 @@ vm_object_coalesce(
 		 */
 		assert(prev_object->existence_map == VM_EXTERNAL_NULL);
 #endif	/* MACH_PAGEMAP */
-		prev_object->size = newsize;
+		prev_object->vo_size = newsize;
 	}
 
 	vm_object_unlock(prev_object);
@@ -5346,7 +5583,7 @@ vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
 
 	db_indent += 2;
 
-	iprintf("size=0x%x", object->size);
+	iprintf("size=0x%x", object->vo_size);
 	printf(", memq_hint=%p", object->memq_hint);
 	printf(", ref_count=%d\n", object->ref_count);
 	iprintf("");
@@ -5364,7 +5601,7 @@ vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
 		printf(" (depth %d)", i);
 	}
 	printf(", copy=0x%x", object->copy);
-	printf(", shadow_offset=0x%x", object->shadow_offset);
+	printf(", shadow_offset=0x%x", object->vo_shadow_offset);
 	printf(", last_alloc=0x%x\n", object->last_alloc);
 
 	iprintf("pager=0x%x", object->pager);
@@ -5455,7 +5692,7 @@ vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr,
 
 #if	MACH_PAGEMAP
 	iprintf("existence_map=");
-	vm_external_print(object->existence_map, object->size);
+	vm_external_print(object->existence_map, object->vo_size);
 #endif	/* MACH_PAGEMAP */
 #if	MACH_ASSERT
 	iprintf("paging_object=0x%x\n", object->paging_object);
@@ -5642,8 +5879,8 @@ vm_object_populate_with_private(
 		
 		/* shadows on contiguous memory are not allowed */
 		/* we therefore can use the offset field */
-		object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
-		object->size = size;
+		object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
+		object->vo_size = size;
 	}
 	vm_object_unlock(object);
 	return KERN_SUCCESS;
@@ -6585,8 +6822,8 @@ vm_object_transpose(
 	vm_object_paging_only_wait(object2, THREAD_UNINT);
 
 
-	if (object1->size != object2->size ||
-	    object1->size != transpose_size) {
+	if (object1->vo_size != object2->vo_size ||
+	    object1->vo_size != transpose_size) {
 		/*
 		 * If the 2 objects don't have the same size, we can't
 		 * exchange their backing stores or one would overflow.
@@ -6661,7 +6898,7 @@ MACRO_END
 
 	/* "Lock" refers to the object not its contents */
 	/* "size" should be identical */
-	assert(object1->size == object2->size);
+	assert(object1->vo_size == object2->vo_size);
 	/* "memq_hint" was updated above when transposing pages */
 	/* "ref_count" refers to the object not its contents */
 #if TASK_SWAPPER
@@ -6676,7 +6913,7 @@ MACRO_END
 	/* there should be no "shadow" */
 	assert(!object1->shadow);
 	assert(!object2->shadow);
-	__TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
+	__TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */
 	__TRANSPOSE_FIELD(pager);
 	__TRANSPOSE_FIELD(paging_offset);
 	__TRANSPOSE_FIELD(pager_control);
@@ -6735,6 +6972,7 @@ MACRO_END
 	__TRANSPOSE_FIELD(sequential);
 	__TRANSPOSE_FIELD(pages_created);
 	__TRANSPOSE_FIELD(pages_used);
+	__TRANSPOSE_FIELD(scan_collisions);
 #if MACH_PAGEMAP
 	__TRANSPOSE_FIELD(existence_map);
 #endif
@@ -6743,6 +6981,7 @@ MACRO_END
 	__TRANSPOSE_FIELD(paging_object);
 #endif
 	__TRANSPOSE_FIELD(wimg_bits);
+	__TRANSPOSE_FIELD(set_cache_attr);
 	__TRANSPOSE_FIELD(code_signed);
 	if (object1->hashed) {
 		hash_lck = vm_object_hash_lock_spin(object2->pager);
@@ -6825,14 +7064,14 @@ done:
  *
  */
 extern int speculative_reads_disabled;
+extern int ignore_is_ssd;
+
 #if CONFIG_EMBEDDED
 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
 unsigned int preheat_pages_min = 8;
-unsigned int preheat_pages_mult = 4;
 #else
 unsigned int preheat_pages_max = MAX_UPL_TRANSFER;
 unsigned int preheat_pages_min = 8;
-unsigned int preheat_pages_mult = 4;
 #endif
 
 uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1];
@@ -6855,26 +7094,21 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 	vm_behavior_t		behavior;
 	boolean_t		look_behind = TRUE;
 	boolean_t		look_ahead  = TRUE;
+	boolean_t		isSSD = FALSE;
 	uint32_t		throttle_limit;
 	int			sequential_run;
 	int			sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
 	unsigned int		max_ph_size;
 	unsigned int		min_ph_size;
-	unsigned int		ph_mult;
+	unsigned int		min_ph_size_in_bytes;
 
 	assert( !(*length & PAGE_MASK));
 	assert( !(*start & PAGE_MASK_64));
 
-	if ( (ph_mult = preheat_pages_mult) < 1 ) 
-		ph_mult = 1;
-	if ( (min_ph_size = preheat_pages_min) < 1 ) 
-		min_ph_size = 1;
-	if ( (max_ph_size = preheat_pages_max) > MAX_UPL_TRANSFER ) 
-		max_ph_size = MAX_UPL_TRANSFER;
-	
-	if ( (max_length = *length) > (max_ph_size * PAGE_SIZE) ) 
-	        max_length = (max_ph_size * PAGE_SIZE);
-
+	/*
+	 * remember maxiumum length of run requested
+	 */
+	max_length = *length;
 	/*
 	 * we'll always return a cluster size of at least
 	 * 1 page, since the original fault must always
@@ -6883,7 +7117,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 	*length = PAGE_SIZE;
 	*io_streaming = 0;
 
-	if (speculative_reads_disabled || fault_info == NULL || max_length == 0) {
+	if (speculative_reads_disabled || fault_info == NULL) {
 	        /*
 		 * no cluster... just fault the page in
 		 */
@@ -6896,12 +7130,39 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 
 	vm_object_lock(object);
 
+	if (object->pager == MEMORY_OBJECT_NULL)
+		goto out;	/* pager is gone for this object, nothing more to do */
+
+	if (!ignore_is_ssd)
+		vnode_pager_get_isSSD(object->pager, &isSSD);
+
+	min_ph_size = preheat_pages_min;
+	max_ph_size = preheat_pages_max;
+
+	if (isSSD) {
+		min_ph_size /= 2;
+		max_ph_size /= 8;
+	}
+	if (min_ph_size < 1)
+		min_ph_size = 1;
+
+	if (max_ph_size < 1)
+		max_ph_size = 1;
+	else if (max_ph_size > MAX_UPL_TRANSFER)
+		max_ph_size = MAX_UPL_TRANSFER;
+
+	if (max_length > (max_ph_size * PAGE_SIZE)) 
+	        max_length = max_ph_size * PAGE_SIZE;
+
+	if (max_length <= PAGE_SIZE)
+		goto out;
+
+	min_ph_size_in_bytes = min_ph_size * PAGE_SIZE;
+
 	if (object->internal)
-	        object_size = object->size;
-	else if (object->pager != MEMORY_OBJECT_NULL)
-	        vnode_pager_get_object_size(object->pager, &object_size);
+	        object_size = object->vo_size;
 	else
-		goto out;	/* pager is gone for this object, nothing more to do */
+	        vnode_pager_get_object_size(object->pager, &object_size);
 
 	object_size = round_page_64(object_size);
 
@@ -6929,7 +7190,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 		  }
 
 	}
-	switch(behavior) {
+	switch (behavior) {
 
 	default:
 	        behavior = VM_BEHAVIOR_DEFAULT;
@@ -6949,25 +7210,25 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 			*io_streaming = 1;
 		} else {
 
-			if (object->pages_created < 32 * ph_mult) {
+			if (object->pages_created < (20 * min_ph_size)) {
 			        /*
 				 * prime the pump
 				 */
-			        pre_heat_size = PAGE_SIZE * 8 * ph_mult;
-			        break;
+			        pre_heat_size = min_ph_size_in_bytes;
+			} else {
+				/*
+				 * Linear growth in PH size: The maximum size is max_length...
+				 * this cacluation will result in a size that is neither a 
+				 * power of 2 nor a multiple of PAGE_SIZE... so round
+				 * it up to the nearest PAGE_SIZE boundary
+				 */
+				pre_heat_size = (max_length * object->pages_used) / object->pages_created;
+				
+				if (pre_heat_size < min_ph_size_in_bytes)
+					pre_heat_size = min_ph_size_in_bytes;
+				else
+					pre_heat_size = round_page(pre_heat_size);
 			}
-			/*
-			 * Linear growth in PH size: The maximum size is max_length...
-			 * this cacluation will result in a size that is neither a 
-			 * power of 2 nor a multiple of PAGE_SIZE... so round
-			 * it up to the nearest PAGE_SIZE boundary
-			 */
-			pre_heat_size = (ph_mult * (max_length * object->pages_used) / object->pages_created);
-			
-			if (pre_heat_size < PAGE_SIZE * min_ph_size)
-				pre_heat_size = PAGE_SIZE * min_ph_size;
-			else
-				pre_heat_size = round_page(pre_heat_size);
 		}
 		break;
 
@@ -7003,14 +7264,14 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 	if (pre_heat_size > max_length)
 	        pre_heat_size = max_length;
 
-	if (behavior == VM_BEHAVIOR_DEFAULT) {
+	if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) {
 		if (vm_page_free_count < vm_page_throttle_limit)
-			pre_heat_size = trunc_page(pre_heat_size / 8);
+			pre_heat_size = trunc_page(pre_heat_size / 16);
 		else if (vm_page_free_count < vm_page_free_target)
-			pre_heat_size = trunc_page(pre_heat_size / 2);
+			pre_heat_size = trunc_page(pre_heat_size / 4);
 
-		if (pre_heat_size <= PAGE_SIZE)
-			goto out;
+		if (pre_heat_size < min_ph_size_in_bytes)
+			pre_heat_size = min_ph_size_in_bytes;
 	}
 	if (look_ahead == TRUE) {
 	        if (look_behind == TRUE) { 
@@ -7047,8 +7308,14 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start,
 		assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start));
 	        tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE;
 	} else {
-	        if (pre_heat_size > target_start)
-	                pre_heat_size = (vm_size_t) target_start; /* XXX: 32-bit vs 64-bit ? Joe ? */
+	        if (pre_heat_size > target_start) {
+			/*
+			 * since pre_heat_size is always smaller then 2^32,
+			 * if it is larger then target_start (a 64 bit value)
+			 * it is safe to clip target_start to 32 bits
+			 */
+	                pre_heat_size = (vm_size_t) target_start;
+		}
 		tail_size = 0;
 	}
 	assert( !(target_start & PAGE_MASK_64));
@@ -7159,7 +7426,7 @@ vm_object_page_op(
 		if(object->phys_contiguous) {
 			if (phys_entry) {
 				*phys_entry = (ppnum_t)
-					(object->shadow_offset >> PAGE_SHIFT);
+					(object->vo_shadow_offset >> PAGE_SHIFT);
 			}
 			vm_object_unlock(object);
 			return KERN_SUCCESS;
@@ -7340,8 +7607,13 @@ vm_object_range_op(
 		dst_page = vm_page_lookup(object, offset);
 		if (dst_page != VM_PAGE_NULL) {
 			if (ops & UPL_ROP_DUMP) {
-				if (dst_page->busy || dst_page->cleaning) {
-				        /*
+				if (dst_page->list_req_pending) {
+					/*
+					 * This page isn't on a UPL yet.
+					 * So it's safe to steal it here and dump it.
+					 */
+				} else if (dst_page->busy || dst_page->cleaning) {
+					/*
 					 * someone else is playing with the 
 					 * page, we will have to wait
 					 */
@@ -7413,12 +7685,15 @@ _vm_object_lock_try(vm_object_t object)
 boolean_t
 vm_object_lock_try(vm_object_t object)
 {
-    // called from hibernate path so check before blocking
-	if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled()) {
+	/*
+	 * Called from hibernate path so check before blocking.
+	 */
+	if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) {
 		mutex_pause(2);
 	}
 	return _vm_object_lock_try(object);
 }
+
 void
 vm_object_lock_shared(vm_object_t object)
 {
@@ -7436,3 +7711,264 @@ vm_object_lock_try_shared(vm_object_t object)
 	}
 	return (lck_rw_try_lock_shared(&object->Lock));
 }
+
+
+unsigned int vm_object_change_wimg_mode_count = 0;
+
+/*
+ * The object must be locked
+ */
+void
+vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode)
+{
+	vm_page_t p;
+
+	vm_object_lock_assert_exclusive(object);
+
+	vm_object_paging_wait(object, THREAD_UNINT);
+
+	queue_iterate(&object->memq, p, vm_page_t, listq) {
+
+		if (!p->fictitious)
+			pmap_set_cache_attributes(p->phys_page, wimg_mode);
+	}
+	if (wimg_mode == VM_WIMG_USE_DEFAULT)
+		object->set_cache_attr = FALSE;
+	else
+		object->set_cache_attr = TRUE;
+
+	object->wimg_bits = wimg_mode;
+
+	vm_object_change_wimg_mode_count++;
+}
+
+#if CONFIG_FREEZE
+
+__private_extern__ void	default_freezer_pack_page(vm_page_t , vm_object_t , vm_object_offset_t, void**);
+__private_extern__ void	default_freezer_unpack(vm_object_t , void**);
+
+kern_return_t vm_object_pack(
+	unsigned int       *purgeable_count,
+	unsigned int       *wired_count,
+	unsigned int       *clean_count,
+	unsigned int       *dirty_count,
+	boolean_t          *shared,
+	vm_object_t         src_object,
+	vm_object_t         compact_object,
+	void		      **table,
+	vm_object_offset_t *offset)
+{
+	kern_return_t	kr = KERN_SUCCESS;
+	
+	vm_object_lock(src_object);
+
+	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
+	*shared = FALSE;
+
+	if (!src_object->alive || src_object->terminating){
+		kr = KERN_FAILURE;
+		goto done;
+	}
+
+	if (src_object->purgable == VM_PURGABLE_VOLATILE) {
+		*purgeable_count = src_object->resident_page_count;
+		
+		/* If the destination object is null, we're just walking the pages to discover how many can be hibernated */
+		if (VM_OBJECT_NULL != compact_object) {
+			purgeable_q_t queue;
+			/* object should be on a queue */
+			assert(src_object->objq.next != NULL &&
+			       src_object->objq.prev != NULL);
+			queue = vm_purgeable_object_remove(src_object);
+			assert(queue);
+			vm_page_lock_queues();
+			vm_purgeable_token_delete_first(queue);
+			vm_page_unlock_queues();
+			vm_object_purge(src_object);
+		}
+		goto done;
+	}
+
+	if (src_object->ref_count == 1) {
+		vm_object_pack_pages(wired_count, clean_count, dirty_count, src_object, compact_object, table, offset);
+	} else {
+		if (src_object->internal) {
+			*shared = TRUE;
+		}
+	}
+done:
+	vm_object_unlock(src_object);
+	
+	return kr;
+}
+
+
+void
+vm_object_pack_pages(
+	unsigned int       *wired_count,
+	unsigned int       *clean_count,
+	unsigned int       *dirty_count,
+	vm_object_t         src_object,
+	vm_object_t         compact_object,
+	void		      **table,
+	vm_object_offset_t *offset)
+{
+	vm_page_t p, next;
+
+	next = (vm_page_t)queue_first(&src_object->memq);
+
+	/* Since this function is dual purpose in order that we can count
+	 * the freezable pages as well as prepare them, assert that our 
+	 * arguments are sane. Gnarly, but avoids code duplication. 
+	 */
+	if (VM_OBJECT_NULL == compact_object){
+		assert(!table);
+		assert(!offset);
+	} else {
+		assert(table);
+		assert(offset);
+	}
+
+	while (!queue_end(&src_object->memq, (queue_entry_t)next)) {
+		p = next;
+		next = (vm_page_t)queue_next(&next->listq);
+		
+		if (p->fictitious || p->busy ) 
+			continue;
+		
+		if (p->absent || p->unusual || p->error)
+			continue;
+		
+		if (VM_PAGE_WIRED(p)) {
+			(*wired_count)++;
+			continue;
+		}
+		
+		if (VM_OBJECT_NULL == compact_object) {
+			if (p->dirty || pmap_is_modified(p->phys_page)) {
+				(*dirty_count)++;
+			} else {
+				(*clean_count)++;				
+			}
+			continue;
+		}
+		
+		if (p->cleaning) {
+			p->busy = TRUE;
+			p->pageout = TRUE;
+			p->dump_cleaning = TRUE;
+
+			vm_page_lockspin_queues();
+			vm_page_wire(p);
+			vm_page_unlock_queues();
+
+			continue;
+		}
+
+		if (p->pmapped == TRUE) {
+			int refmod_state;
+		 	refmod_state = pmap_disconnect(p->phys_page);
+			if (refmod_state & VM_MEM_MODIFIED) {
+				p->dirty = TRUE;
+			}
+		}
+		
+		if (p->dirty) {
+			p->busy = TRUE;
+		
+			default_freezer_pack_page(p, compact_object, *offset, table);	
+			*offset += PAGE_SIZE;
+
+			(*dirty_count)++;
+		}
+		else {
+			VM_PAGE_FREE(p);
+			(*clean_count)++;
+		}
+	}
+}
+
+void
+vm_object_pageout(
+	vm_object_t object)
+{
+	vm_page_t p, next;
+	
+	assert(object != VM_OBJECT_NULL );
+	
+	vm_object_lock(object);
+	
+	next = (vm_page_t)queue_first(&object->memq);
+
+	while (!queue_end(&object->memq, (queue_entry_t)next)) {
+		p = next;
+		next = (vm_page_t)queue_next(&next->listq);
+		
+		/* Throw to the pageout queue */
+		vm_page_lockspin_queues();
+
+		VM_PAGE_QUEUES_REMOVE(p);
+		vm_pageout_cluster(p);
+
+		vm_page_unlock_queues();
+	}
+
+	vm_object_unlock(object);
+}
+
+kern_return_t
+vm_object_pagein(
+	vm_object_t object)
+{
+	memory_object_t	pager;
+	kern_return_t	kr;
+
+	vm_object_lock(object);
+
+	pager = object->pager;
+
+	if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) {
+		vm_object_unlock(object);
+		return KERN_FAILURE;
+	}
+	
+	vm_object_paging_wait(object, THREAD_UNINT);
+	vm_object_paging_begin(object);
+
+	object->blocked_access = TRUE;
+	vm_object_unlock(object);
+	
+	kr = memory_object_data_reclaim(pager, TRUE);
+
+	vm_object_lock(object);
+
+	object->blocked_access = FALSE;
+	vm_object_paging_end(object);
+
+	vm_object_unlock(object);
+	
+	return kr;
+}
+
+void
+vm_object_unpack(
+	vm_object_t compact_object,
+	void	**table)
+{
+	/*
+	 * Future Work:
+	 * Right now we treat the default freezer much like
+	 * the default pager with respect to when it is
+	 * created and terminated.
+	 * But, in the future, we may want to terminate the
+	 * default freezer at the very instant that an object
+	 * has been completely re-filled with all it's previously
+	 * paged-out pages.
+	 * At that time we'll need to reset the object fields like
+	 * "pager" and the associated "pager_{created,initialized,trusted}"
+	 * fields right here.
+	 */
+	default_freezer_unpack(compact_object, table);
+}
+
+#endif /* CONFIG_FREEZE */
diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h
index eacd7d65e..0d21734af 100644
--- a/osfmk/vm/vm_object.h
+++ b/osfmk/vm/vm_object.h
@@ -66,6 +66,8 @@
 #ifndef	_VM_VM_OBJECT_H_
 #define _VM_VM_OBJECT_H_
 
+#include <debug.h>
+#include <mach_assert.h>
 #include <mach_pagemap.h>
 #include <task_swapper.h>
 
@@ -107,20 +109,32 @@ struct vm_object_fault_info {
         vm_behavior_t	behavior;
         vm_map_offset_t	lo_offset;
 	vm_map_offset_t	hi_offset;
-	boolean_t	no_cache;
-	boolean_t	stealth;
-	boolean_t	mark_zf_absent;
+	unsigned int
+	/* boolean_t */	no_cache:1,
+	/* boolean_t */	stealth:1,
+	/* boolean_t */	io_sync:1,
+	/* boolean_t */ cs_bypass:1,
+	/* boolean_t */	mark_zf_absent:1,
+		__vm_object_fault_info_unused_bits:27;
 };
 
 
+#define	vo_size			vo_un1.vou_size
+#define vo_cache_pages_to_scan	vo_un1.vou_cache_pages_to_scan
+#define vo_shadow_offset	vo_un2.vou_shadow_offset
+#define vo_cache_ts		vo_un2.vou_cache_ts
 
 struct vm_object {
 	queue_head_t		memq;		/* Resident memory */
         lck_rw_t		Lock;		/* Synchronization */
 
-	vm_object_size_t	size;		/* Object size (only valid
-						 * if internal)
-						 */
+	union {
+		vm_object_size_t  vou_size;	/* Object size (only valid if internal) */
+		int		  vou_cache_pages_to_scan;	/* pages yet to be visited in an
+								 * external object in cache
+								 */
+	} vo_un1;
+
 	struct vm_page		*memq_hint;
 	int			ref_count;	/* Number of references */
 #if	TASK_SWAPPER
@@ -139,7 +153,13 @@ struct vm_object {
 						 * copy_call.
 						 */
 	struct vm_object	*shadow;	/* My shadow */
-	vm_object_offset_t	shadow_offset;	/* Offset into shadow */
+
+	union {
+		vm_object_offset_t vou_shadow_offset;	/* Offset into shadow */
+		clock_sec_t	   vou_cache_ts;	/* age of an external object
+							 * present in cache
+							 */
+	} vo_un2;
 
 	memory_object_t		pager;		/* Where to get data */
 	vm_object_offset_t	paging_offset;	/* Offset into memory object */
@@ -303,7 +323,10 @@ struct vm_object {
 		volatile_fault:1,
 		all_reusable:1,
 		blocked_access:1,
-		__object2_unused_bits:16;	/* for expansion */
+		set_cache_attr:1,
+		__object2_unused_bits:15;	/* for expansion */
+
+	uint32_t		scan_collisions;
 
 #if	UPL_DEBUG
 	queue_head_t		uplq;		/* List of outstanding upls */
@@ -321,7 +344,7 @@ struct vm_object {
 	} pip_holders[VM_PIP_DEBUG_MAX_REFS];
 #endif	/* VM_PIP_DEBUG  */
 
-        queue_chain_t       objq;      /* object queue - currently used for purgable queues */
+        queue_chain_t		objq;      /* object queue - currently used for purgable queues */
 };
 
 #define VM_OBJECT_PURGEABLE_FAULT_ERROR(object)				\
@@ -644,6 +667,10 @@ __private_extern__ kern_return_t vm_object_populate_with_private(
 	ppnum_t			phys_page,
 	vm_size_t		size);
 
+__private_extern__ void vm_object_change_wimg_mode(
+	vm_object_t		object,
+	unsigned int		wimg_mode);
+
 extern kern_return_t adjust_vm_object_cache(
 	vm_size_t oval,
 	vm_size_t nval);
@@ -671,6 +698,41 @@ __private_extern__ void		vm_object_reap_pages(
 #define REAP_PURGEABLE	2
 #define REAP_DATA_FLUSH	3
 
+#if CONFIG_FREEZE
+
+__private_extern__ kern_return_t 
+vm_object_pack(
+	unsigned int       *purgeable_count,
+	unsigned int       *wired_count,
+	unsigned int       *clean_count,
+	unsigned int       *dirty_count,
+	boolean_t          *shared,
+	vm_object_t         src_object,
+	vm_object_t         dst_object,
+	void		      **table,
+	vm_object_offset_t *offset);
+
+__private_extern__ void
+vm_object_pack_pages(
+	unsigned int       *wired_count,
+	unsigned int       *clean_count,
+	unsigned int       *dirty_count,
+	vm_object_t         src_object,
+	vm_object_t         dst_object,
+	void		      **table,
+	vm_object_offset_t *offset);
+
+__private_extern__ void vm_object_pageout(
+    vm_object_t     object);
+
+__private_extern__  kern_return_t vm_object_pagein(
+	vm_object_t     object);
+
+__private_extern__ void vm_object_unpack(
+	vm_object_t     object,
+	void          **table);
+
+#endif /* CONFIG_FREEZE */
 
 /*
  *	Event waiting handling
@@ -881,20 +943,24 @@ extern boolean_t	vm_object_lock_try_shared(vm_object_t);
  * check if anyone is holding the lock, but the holder may not necessarily
  * be the caller...
  */
-#if DEBUG
+#if MACH_ASSERT || DEBUG
 #define vm_object_lock_assert_held(object) \
 	lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_HELD)
 #define vm_object_lock_assert_shared(object)	\
 	lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_SHARED)
 #define vm_object_lock_assert_exclusive(object) \
 	lck_rw_assert(&(object)->Lock, LCK_RW_ASSERT_EXCLUSIVE)
-#else /* DEBUG */
+#else  /* MACH_ASSERT || DEBUG */ 
 #define vm_object_lock_assert_held(object)
 #define vm_object_lock_assert_shared(object)
 #define vm_object_lock_assert_exclusive(object)
-#endif /* DEBUG */
+#endif /* MACH_ASSERT || DEBUG */
 
 #define vm_object_round_page(x) (((vm_object_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK))
 #define vm_object_trunc_page(x) ((vm_object_offset_t)(x) & ~((signed)PAGE_MASK))
 
+extern void	vm_object_cache_add(vm_object_t);
+extern void	vm_object_cache_remove(vm_object_t);
+extern int	vm_object_cache_evict(int, int);
+
 #endif	/* _VM_VM_OBJECT_H_ */
diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h
index ecae81c15..543a0c6f5 100644
--- a/osfmk/vm/vm_page.h
+++ b/osfmk/vm/vm_page.h
@@ -114,7 +114,6 @@
 
 #define VM_PAGE_SPECULATIVE_Q_AGE_MS	500
 
-
 struct vm_speculative_age_q {
 	/*
 	 * memory queue for speculative pages via clustered pageins
@@ -124,11 +123,13 @@ struct vm_speculative_age_q {
 };
 
 
+
 extern
 struct vm_speculative_age_q	vm_page_queue_speculative[];
 
 extern int			speculative_steal_index;
 extern int			speculative_age_index;
+extern unsigned int		vm_page_speculative_q_age_ms;
 
 
 /*
@@ -179,6 +180,7 @@ struct vm_page {
 #define local_id wire_count
 	unsigned int	wire_count:16,	/* how many wired down maps use me? (O&P) */
 	/* boolean_t */	inactive:1,	/* page is in inactive list (P) */
+			zero_fill:1,
 			active:1,	/* page is in active list (P) */
 			pageout_queue:1,/* page is on queue for pageout (P) */
 			speculative:1,	/* page is on speculative list (P) */
@@ -190,7 +192,9 @@ struct vm_page {
 					 *  the free list (P) */
 			throttled:1,	/* pager is not responding (P) */
 		        local:1,
-			__unused_pageq_bits:5;	/* 5 bits available here */
+			no_cache:1,	/* page is not to be cached and should
+					 * be reused ahead of other pages (P) */
+			__unused_pageq_bits:3;	/* 3 bits available here */
 
 	ppnum_t		phys_page;	/* Physical address of page, passed
 					 *  to pmap_enter (read-only) */
@@ -244,13 +248,10 @@ struct vm_page {
 					   /* a pageout candidate           */
 			cs_validated:1,    /* code-signing: page was checked */	
 			cs_tainted:1,	   /* code-signing: page is tainted */
-			no_cache:1,	   /* page is not to be cached and */
-					   /* should be reused ahead of    */
-					   /* other pages		   */
-			zero_fill:1,
 			reusable:1,
 		        lopage:1,
-			__unused_object_bits:6;  /* 6 bits available here */
+			slid:1,
+			__unused_object_bits:7;  /* 7 bits available here */
 
 #if __LP64__
 	unsigned int __unused_padding;	/* Pad structure explicitly
@@ -400,8 +401,6 @@ queue_head_t	vm_page_queue_free[MAX_COLORS];	/* memory free queue */
 extern
 queue_head_t	vm_lopage_queue_free;		/* low memory free queue */
 extern
-vm_page_t	vm_page_queue_fictitious;	/* fictitious free queue */
-extern
 queue_head_t	vm_page_queue_active;	/* active memory queue */
 extern
 queue_head_t	vm_page_queue_inactive;	/* inactive memory queue for normal pages */
@@ -545,7 +544,7 @@ extern vm_page_t	vm_page_alloc_guard(
 extern void		vm_page_init(
 					vm_page_t	page,
 					ppnum_t		phys_page,
-					boolean_t	lopage);
+					boolean_t 	lopage);
 
 extern void		vm_page_free(
 	                                vm_page_t	page);
@@ -648,6 +647,9 @@ extern void		vm_page_free_prepare_object(
 	                                vm_page_t	page,
 					boolean_t	remove_from_hash);
 
+extern void		vm_check_memorystatus(void);
+
+
 /*
  *	Functions implemented as macros. m->wanted and m->busy are
  *	protected by the object lock.
@@ -744,6 +746,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 		assert(mem->object != kernel_object);		\
 		assert(!mem->inactive && !mem->speculative);	\
 		assert(!mem->active && !mem->throttled);	\
+		assert(!mem->fictitious);			\
 		lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;	\
 		VPL_LOCK(&lq->vpl_lock);			\
 		queue_remove(&lq->vpl_queue,			\
@@ -753,25 +756,23 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 		lq->vpl_count--;				\
 		VPL_UNLOCK(&lq->vpl_lock);			\
 	}							\
-	if (mem->active) {					\
+								\
+	else if (mem->active) {					\
 		assert(mem->object != kernel_object);		\
 		assert(!mem->inactive && !mem->speculative);	\
 		assert(!mem->throttled);			\
+		assert(!mem->fictitious);			\
 		queue_remove(&vm_page_queue_active,		\
 			mem, vm_page_t, pageq);			\
 		mem->active = FALSE;				\
-		if (!mem->fictitious) {				\
-			vm_page_active_count--;			\
-		} else {					\
-			assert(mem->phys_page ==		\
-			       vm_page_fictitious_addr);	\
-		}						\
+		vm_page_active_count--;				\
 	}							\
 								\
 	else if (mem->inactive) {				\
 		assert(mem->object != kernel_object);		\
 		assert(!mem->active && !mem->speculative);	\
 		assert(!mem->throttled);			\
+		assert(!mem->fictitious);			\
 		if (mem->zero_fill) {				\
 			queue_remove(&vm_page_queue_zf,		\
 			mem, vm_page_t, pageq);			\
@@ -781,23 +782,18 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 			mem, vm_page_t, pageq);			\
 		}						\
 		mem->inactive = FALSE;				\
-		if (!mem->fictitious) {				\
-			vm_page_inactive_count--;		\
-			vm_purgeable_q_advance_all();		\
-		} else {					\
-			assert(mem->phys_page ==		\
-			       vm_page_fictitious_addr);	\
-		}						\
+		vm_page_inactive_count--;			\
+		vm_purgeable_q_advance_all();			\
 	}							\
 								\
 	else if (mem->throttled) {				\
 		assert(!mem->active && !mem->inactive);		\
 		assert(!mem->speculative);			\
+		assert(!mem->fictitious);			\
 		queue_remove(&vm_page_queue_throttled,		\
 			     mem, vm_page_t, pageq);		\
 		mem->throttled = FALSE;				\
-		if (!mem->fictitious)				\
-			vm_page_throttled_count--;		\
+		vm_page_throttled_count--;			\
 	}							\
 								\
 	else if (mem->speculative) {				\
@@ -808,12 +804,39 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 		mem->speculative = FALSE;			\
 		vm_page_speculative_count--;			\
 	}							\
+								\
+	else if (mem->pageq.next || mem->pageq.prev)		\
+		panic("VM_PAGE_QUEUES_REMOVE: unmarked page on Q");	\
 	mem->pageq.next = NULL;					\
 	mem->pageq.prev = NULL;					\
 	VM_PAGE_QUEUES_ASSERT(mem, 0);				\
 	MACRO_END
 
 
+#define VM_PAGE_ENQUEUE_INACTIVE(mem, first)			\
+	MACRO_BEGIN						\
+	VM_PAGE_QUEUES_ASSERT(mem, 0);				\
+	assert(!mem->fictitious);				\
+	assert(!mem->laundry);					\
+	assert(!mem->pageout_queue);				\
+	if (mem->zero_fill) {					\
+		if (first == TRUE)				\
+			queue_enter_first(&vm_page_queue_zf, mem, vm_page_t, pageq);	\
+		else						\
+			queue_enter(&vm_page_queue_zf, mem, vm_page_t, pageq);		\
+		vm_zf_queue_count++;				\
+	} else {						\
+		if (first == TRUE)				\
+			queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq); \
+		else						\
+			queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);	\
+	}							\
+	mem->inactive = TRUE;					\
+	vm_page_inactive_count++;				\
+	token_new_pagecount++;					\
+	MACRO_END
+
+
 #if DEVELOPMENT || DEBUG
 #define VM_PAGE_SPECULATIVE_USED_ADD()				\
 	MACRO_BEGIN						\
@@ -834,4 +857,71 @@ extern void vm_page_queues_assert(vm_page_t mem, int val);
 	}							\
 	MACRO_END
 
+
+	
+#define DW_vm_page_unwire		0x01
+#define DW_vm_page_wire			0x02
+#define DW_vm_page_free			0x04
+#define DW_vm_page_activate		0x08
+#define DW_vm_page_deactivate_internal	0x10
+#define DW_vm_page_speculate	 	0x20
+#define DW_vm_page_lru		 	0x40
+#define DW_vm_pageout_throttle_up	0x80
+#define DW_PAGE_WAKEUP			0x100
+#define DW_clear_busy			0x200
+#define DW_clear_reference		0x400
+#define DW_set_reference		0x800
+#define DW_move_page			0x1000
+#define DW_VM_PAGE_QUEUES_REMOVE	0x2000
+#define DW_set_list_req_pending		0x4000
+
+struct vm_page_delayed_work {
+	vm_page_t	dw_m;
+	int		dw_mask;
+};
+
+void vm_page_do_delayed_work(vm_object_t object, struct vm_page_delayed_work *dwp, int dw_count);
+
+extern unsigned int vm_max_delayed_work_limit;
+
+#define DEFAULT_DELAYED_WORK_LIMIT	32
+
+#define DELAYED_WORK_LIMIT(max)	((vm_max_delayed_work_limit >= max ? max : vm_max_delayed_work_limit))
+
+/*
+ * vm_page_do_delayed_work may need to drop the object lock...
+ * if it does, we need the pages it's looking at to
+ * be held stable via the busy bit, so if busy isn't already
+ * set, we need to set it and ask vm_page_do_delayed_work
+ * to clear it and wakeup anyone that might have blocked on
+ * it once we're done processing the page.
+ *
+ * additionally, we can't call vm_page_do_delayed_work with
+ * list_req_pending == TRUE since it may need to 
+ * drop the object lock before dealing
+ * with this page and because list_req_pending == TRUE, 
+ * busy == TRUE will NOT protect this page from being stolen
+ * so clear list_req_pending and ask vm_page_do_delayed_work
+ * to re-set it once it holds both the pageq and object locks
+ */
+
+#define VM_PAGE_ADD_DELAYED_WORK(dwp, mem, dw_cnt)		\
+	MACRO_BEGIN						\
+	if (mem->busy == FALSE) {				\
+		mem->busy = TRUE;				\
+		if ( !(dwp->dw_mask & DW_vm_page_free))		\
+			dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); \
+	}							\
+	if (mem->list_req_pending) {				\
+		mem->list_req_pending = FALSE;			\
+		dwp->dw_mask |= DW_set_list_req_pending;	\
+	}							\
+	dwp->dw_m = mem;					\
+	dwp++;							\
+	dw_count++;						\
+	MACRO_END
+
+extern vm_page_t vm_object_page_grab(vm_object_t);
+
+
 #endif	/* _VM_VM_PAGE_H_ */
diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c
index 4098fb8bc..acf4d64bd 100644
--- a/osfmk/vm/vm_pageout.c
+++ b/osfmk/vm/vm_pageout.c
@@ -95,9 +95,7 @@
 #include <machine/vm_tuning.h>
 #include <machine/commpage.h>
 
-#if CONFIG_EMBEDDED
 #include <sys/kern_memorystatus.h>
-#endif
 
 #include <vm/pmap.h>
 #include <vm/vm_fault.h>
@@ -108,7 +106,7 @@
 #include <vm/vm_protos.h> /* must be last */
 #include <vm/memory_object.h>
 #include <vm/vm_purgeable_internal.h>
-
+#include <vm/vm_shared_region.h>
 /*
  * ENCRYPTED SWAP:
  */
@@ -119,6 +117,8 @@ extern u_int32_t random(void);	/* from <libkern/libkern.h> */
 #include <libkern/OSDebug.h>
 #endif
 
+extern void consider_pressure_events(void);
+
 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE   /* maximum iterations of the active queue to move pages to inactive */
 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  100
 #endif
@@ -140,11 +140,11 @@ extern u_int32_t random(void);	/* from <libkern/libkern.h> */
 #endif
 
 #ifndef	VM_PAGE_LAUNDRY_MAX
-#define	VM_PAGE_LAUNDRY_MAX	16UL	/* maximum pageouts on a given pageout queue */
+#define	VM_PAGE_LAUNDRY_MAX	128UL	/* maximum pageouts on a given pageout queue */
 #endif	/* VM_PAGEOUT_LAUNDRY_MAX */
 
 #ifndef	VM_PAGEOUT_BURST_WAIT
-#define	VM_PAGEOUT_BURST_WAIT	30	/* milliseconds per page */
+#define	VM_PAGEOUT_BURST_WAIT	30	/* milliseconds */
 #endif	/* VM_PAGEOUT_BURST_WAIT */
 
 #ifndef	VM_PAGEOUT_EMPTY_WAIT
@@ -159,10 +159,14 @@ extern u_int32_t random(void);	/* from <libkern/libkern.h> */
 #define VM_PAGEOUT_IDLE_WAIT	10	/* milliseconds */
 #endif	/* VM_PAGEOUT_IDLE_WAIT */
 
+unsigned int	vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
+unsigned int	vm_page_speculative_percentage = 5;
+
 #ifndef VM_PAGE_SPECULATIVE_TARGET
-#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20)
+#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage))
 #endif /* VM_PAGE_SPECULATIVE_TARGET */
 
+
 #ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT
 #define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200)
 #endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */
@@ -210,6 +214,7 @@ extern u_int32_t random(void);	/* from <libkern/libkern.h> */
 #endif
 #endif	/* VM_PAGE_FREE_MIN */
 
+#define VM_PAGE_FREE_RESERVED_LIMIT	100
 #define VM_PAGE_FREE_MIN_LIMIT		1500
 #define VM_PAGE_FREE_TARGET_LIMIT	2000
 
@@ -282,6 +287,8 @@ unsigned int vm_pageout_inactive_relief = 0;
 unsigned int vm_pageout_burst_active_throttle = 0;
 unsigned int vm_pageout_burst_inactive_throttle = 0;
 
+int	vm_upl_wait_for_pages = 0;
+
 /*
  *	Protection against zero fill flushing live working sets derived
  *	from existing backing store and files
@@ -290,11 +297,7 @@ unsigned int vm_accellerate_zf_pageout_trigger = 400;
 unsigned int zf_queue_min_count = 100;
 unsigned int vm_zf_queue_count = 0;
 
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
-unsigned int vm_zf_count = 0;
-#else
 uint64_t vm_zf_count __attribute__((aligned(8))) = 0;
-#endif
 
 /*
  *	These variables record the pageout daemon's actions:
@@ -303,16 +306,22 @@ uint64_t vm_zf_count __attribute__((aligned(8))) = 0;
  */
 
 unsigned int vm_pageout_active = 0;		/* debugging */
+unsigned int vm_pageout_active_busy = 0;	/* debugging */
 unsigned int vm_pageout_inactive = 0;		/* debugging */
 unsigned int vm_pageout_inactive_throttled = 0;	/* debugging */
 unsigned int vm_pageout_inactive_forced = 0;	/* debugging */
 unsigned int vm_pageout_inactive_nolock = 0;	/* debugging */
 unsigned int vm_pageout_inactive_avoid = 0;	/* debugging */
 unsigned int vm_pageout_inactive_busy = 0;	/* debugging */
+unsigned int vm_pageout_inactive_error = 0;	/* debugging */
 unsigned int vm_pageout_inactive_absent = 0;	/* debugging */
+unsigned int vm_pageout_inactive_notalive = 0;	/* debugging */
 unsigned int vm_pageout_inactive_used = 0;	/* debugging */
+unsigned int vm_pageout_cache_evicted = 0;	/* debugging */
 unsigned int vm_pageout_inactive_clean = 0;	/* debugging */
-unsigned int vm_pageout_inactive_dirty = 0;	/* debugging */
+unsigned int vm_pageout_speculative_clean = 0;	/* debugging */
+unsigned int vm_pageout_inactive_dirty_internal = 0;	/* debugging */
+unsigned int vm_pageout_inactive_dirty_external = 0;	/* debugging */
 unsigned int vm_pageout_inactive_deactivated = 0;	/* debugging */
 unsigned int vm_pageout_inactive_zf = 0;	/* debugging */
 unsigned int vm_pageout_dirty_no_pager = 0;	/* debugging */
@@ -325,8 +334,10 @@ unsigned int vm_pageout_reactivation_limit_exceeded = 0;	/* debugging */
 unsigned int vm_pageout_catch_ups = 0;				/* debugging */
 unsigned int vm_pageout_inactive_force_reclaim = 0;	/* debugging */
 
+unsigned int vm_pageout_scan_reclaimed_throttled = 0;
 unsigned int vm_pageout_scan_active_throttled = 0;
-unsigned int vm_pageout_scan_inactive_throttled = 0;
+unsigned int vm_pageout_scan_inactive_throttled_internal = 0;
+unsigned int vm_pageout_scan_inactive_throttled_external = 0;
 unsigned int vm_pageout_scan_throttle = 0;			/* debugging */
 unsigned int vm_pageout_scan_throttle_aborted = 0;		/* debugging */
 unsigned int vm_pageout_scan_burst_throttle = 0;		/* debugging */
@@ -334,7 +345,7 @@ unsigned int vm_pageout_scan_empty_throttle = 0;		/* debugging */
 unsigned int vm_pageout_scan_deadlock_detected = 0;		/* debugging */
 unsigned int vm_pageout_scan_active_throttle_success = 0;	/* debugging */
 unsigned int vm_pageout_scan_inactive_throttle_success = 0;	/* debugging */
-
+unsigned int vm_pageout_inactive_external_forced_reactivate_count = 0; /* debugging */
 unsigned int vm_page_speculative_count_drifts = 0;
 unsigned int vm_page_speculative_count_drift_max = 0;
 
@@ -372,6 +383,9 @@ boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
 unsigned long vm_cs_validated_resets = 0;
 #endif
 
+int	vm_debug_events	= 0;
+
+
 /*
  *	Routine:	vm_backing_store_disable
  *	Purpose:
@@ -449,7 +463,7 @@ vm_pageout_object_terminate(
 		p = VM_PAGE_NULL;
 
 		m = vm_page_lookup(shadow_object,
-			offset + object->shadow_offset);
+			offset + object->vo_shadow_offset);
 
 		if(m == VM_PAGE_NULL)
 			continue;
@@ -529,39 +543,44 @@ vm_pageout_object_terminate(
 			else
 				vm_page_deactivate(m);
 		}
-		if((m->busy) && (m->cleaning)) {
-
-			/* the request_page_list case, (COPY_OUT_FROM FALSE) */
-			m->busy = FALSE;
-
-			/* We do not re-set m->dirty ! */
-			/* The page was busy so no extraneous activity     */
-			/* could have occurred. COPY_INTO is a read into the */
-			/* new pages. CLEAN_IN_PLACE does actually write   */
-			/* out the pages but handling outside of this code */
-			/* will take care of resetting dirty. We clear the */
-			/* modify however for the Programmed I/O case.     */ 
-			pmap_clear_modify(m->phys_page);
+		if (m->overwriting) {
+			/*
+			 * the (COPY_OUT_FROM == FALSE) request_page_list case
+			 */
+			if (m->busy) {
+				/*
+				 * We do not re-set m->dirty !
+				 * The page was busy so no extraneous activity
+				 * could have occurred. COPY_INTO is a read into the
+				 * new pages. CLEAN_IN_PLACE does actually write
+				 * out the pages but handling outside of this code
+				 * will take care of resetting dirty. We clear the
+				 * modify however for the Programmed I/O case.
+				 */
+				pmap_clear_modify(m->phys_page);
 
-			m->absent = FALSE;
-			m->overwriting = FALSE;
-		} else if (m->overwriting) {
-			/* alternate request page list, write to page_list */
-			/* case.  Occurs when the original page was wired  */
-			/* at the time of the list request */
-			assert(VM_PAGE_WIRED(m));
-			vm_page_unwire(m, TRUE);	/* reactivates */
+				m->busy = FALSE;
+				m->absent = FALSE;
+			} else {
+				/*
+				 * alternate (COPY_OUT_FROM == FALSE) request_page_list case
+				 * Occurs when the original page was wired
+				 * at the time of the list request
+				 */
+				 assert(VM_PAGE_WIRED(m));
+				 vm_page_unwire(m, TRUE);	/* reactivates */
+			}
 			m->overwriting = FALSE;
 		} else {
-		/*
-		 * Set the dirty state according to whether or not the page was
-		 * modified during the pageout. Note that we purposefully do
-		 * NOT call pmap_clear_modify since the page is still mapped.
-		 * If the page were to be dirtied between the 2 calls, this
-		 * this fact would be lost. This code is only necessary to
-		 * maintain statistics, since the pmap module is always
-		 * consulted if m->dirty is false.
-		 */
+			/*
+			 * Set the dirty state according to whether or not the page was
+			 * modified during the pageout. Note that we purposefully do
+			 * NOT call pmap_clear_modify since the page is still mapped.
+			 * If the page were to be dirtied between the 2 calls, this
+			 * this fact would be lost. This code is only necessary to
+			 * maintain statistics, since the pmap module is always
+			 * consulted if m->dirty is false.
+			 */
 #if MACH_CLUSTER_STATS
 			m->dirty = pmap_is_modified(m->phys_page);
 
@@ -572,8 +591,11 @@ vm_pageout_object_terminate(
 			m->dirty = 0;
 #endif
 		}
+		if (m->encrypted_cleaning == TRUE) {
+			m->encrypted_cleaning = FALSE;
+			m->busy = FALSE;
+		}
 		m->cleaning = FALSE;
-		m->encrypted_cleaning = FALSE;
 
 		/*
 		 * Wakeup any thread waiting for the page to be un-cleaning.
@@ -786,13 +808,20 @@ vm_pageout_cluster(vm_page_t m)
 		object, m->offset, m, 0, 0);
 
 	VM_PAGE_CHECK(m);
+#if DEBUG
+	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
+#endif
+	vm_object_lock_assert_exclusive(object);
 
 	/*
 	 * Only a certain kind of page is appreciated here.
 	 */
 	assert(m->busy && (m->dirty || m->precious) && (!VM_PAGE_WIRED(m)));
-	assert(!m->cleaning && !m->pageout && !m->inactive && !m->active);
+	assert(!m->cleaning && !m->pageout);
+#ifndef CONFIG_FREEZE
+	assert(!m->inactive && !m->active);
 	assert(!m->throttled);
+#endif
 
 	/*
 	 * protect the object from collapse - 
@@ -817,7 +846,7 @@ vm_pageout_cluster(vm_page_t m)
         /* 
 	 * pgo_laundry count is tied to the laundry bit
 	 */
-        m->laundry = TRUE;
+	m->laundry = TRUE;
 	q->pgo_laundry++;
 
 	m->pageout_queue = TRUE;
@@ -843,62 +872,46 @@ unsigned long vm_pageout_throttle_up_count = 0;
  */
 void
 vm_pageout_throttle_up(
-	vm_page_t	m)
+       vm_page_t       m)
 {
-        struct vm_pageout_queue *q;
+       struct vm_pageout_queue *q;
 
-	assert(m->object != VM_OBJECT_NULL);
-	assert(m->object != kernel_object);
+       assert(m->object != VM_OBJECT_NULL);
+       assert(m->object != kernel_object);
 
-	vm_pageout_throttle_up_count++;
+       vm_pageout_throttle_up_count++;
 
-	if (m->object->internal == TRUE)
-	        q = &vm_pageout_queue_internal;
-	else
-	        q = &vm_pageout_queue_external;
-
-	if (m->pageout_queue == TRUE) {
-
-		queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
-		m->pageout_queue = FALSE;
+       if (m->object->internal == TRUE)
+               q = &vm_pageout_queue_internal;
+       else
+               q = &vm_pageout_queue_external;
 
-		m->pageq.next = NULL;
-		m->pageq.prev = NULL;
-
-		vm_object_paging_end(m->object);
-	}
-	if (m->laundry == TRUE) {
-		m->laundry = FALSE;
-		q->pgo_laundry--;
+       if (m->pageout_queue == TRUE) {
 
-		if (q->pgo_throttled == TRUE) {
-			q->pgo_throttled = FALSE;
-			thread_wakeup((event_t) &q->pgo_laundry);
-		}
-		if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
-			q->pgo_draining = FALSE;
-			thread_wakeup((event_t) (&q->pgo_laundry+1));
-		}
-	}
-}
+	       queue_remove(&q->pgo_pending, m, vm_page_t, pageq);
+	       m->pageout_queue = FALSE;
 
+	       m->pageq.next = NULL;
+	       m->pageq.prev = NULL;
 
-/*
- *	vm_pageout_scan does the dirty work for the pageout daemon.
- *	It returns with vm_page_queue_free_lock held and
- *	vm_page_free_wanted == 0.
- */
+	       vm_object_paging_end(m->object);
+       }
 
-#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT  (3 * MAX_UPL_TRANSFER)
+       if ( m->laundry == TRUE ) {
 
-#define	FCS_IDLE		0
-#define FCS_DELAYED		1
-#define FCS_DEADLOCK_DETECTED	2
+	       m->laundry = FALSE;
+	       q->pgo_laundry--;
 
-struct flow_control {
-        int		state;
-        mach_timespec_t	ts;
-};
+	       if (q->pgo_throttled == TRUE) {
+		       q->pgo_throttled = FALSE;
+                       thread_wakeup((event_t) &q->pgo_laundry);
+               }
+	       if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
+		       q->pgo_draining = FALSE;
+		       thread_wakeup((event_t) (&q->pgo_laundry+1));
+	       }
+	}
+}
 
 
 /*
@@ -1041,10 +1054,10 @@ mach_vm_pressure_monitor(
    helps us do the right accounting in certain cases
 */
 
-#define PAGE_STATE_SPECULATIVE	1
-#define PAGE_STATE_THROTTLED	2
-#define PAGE_STATE_ZEROFILL	3
-#define PAGE_STATE_INACTIVE	4
+#define PAGE_STATE_SPECULATIVE		1
+#define PAGE_STATE_ZEROFILL		2
+#define PAGE_STATE_INACTIVE		3
+#define PAGE_STATE_INACTIVE_FIRST	4
 
 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m)				\
 	MACRO_BEGIN							\
@@ -1065,6 +1078,25 @@ mach_vm_pressure_monitor(
 	}								\
 	MACRO_END
 
+
+#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT  	128
+#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX	1024
+
+#define	FCS_IDLE		0
+#define FCS_DELAYED		1
+#define FCS_DEADLOCK_DETECTED	2
+
+struct flow_control {
+        int		state;
+        mach_timespec_t	ts;
+};
+
+
+/*
+ *	vm_pageout_scan does the dirty work for the pageout daemon.
+ *	It returns with vm_page_queue_free_lock held and
+ *	vm_page_free_wanted == 0.
+ */
 void
 vm_pageout_scan(void)
 {
@@ -1076,6 +1108,7 @@ vm_pageout_scan(void)
 	vm_page_t   local_freeq = NULL;
 	int         local_freed = 0;
 	int         delayed_unlock;
+	int	    delayed_unlock_limit = 0;
 	int	    refmod_state = 0;
         int	vm_pageout_deadlock_target = 0;
 	struct	vm_pageout_queue *iq;
@@ -1084,21 +1117,22 @@ vm_pageout_scan(void)
 	struct  flow_control	flow_control = { 0, { 0, 0 } };
         boolean_t inactive_throttled = FALSE;
 	boolean_t try_failed;
-	mach_timespec_t		ts;
-	unsigned int msecs = 0;
+	mach_timespec_t	ts;
+	unsigned	int msecs = 0;
 	vm_object_t	object;
 	vm_object_t	last_object_tried;
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
-	unsigned int	zf_ratio;
-	unsigned int	zf_run_count;
-#else
 	uint64_t	zf_ratio;
 	uint64_t	zf_run_count;
-#endif
 	uint32_t	catch_up_count = 0;
 	uint32_t	inactive_reclaim_run;
 	boolean_t	forced_reclaim;
 	int		page_prev_state = 0;
+	int		cache_evict_throttle = 0;
+	uint32_t	vm_pageout_inactive_external_forced_reactivate_limit = 0;
+
+	VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
+		       vm_pageout_speculative_clean, vm_pageout_inactive_clean,
+		       vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
 
 	flow_control.state = FCS_IDLE;
 	iq = &vm_pageout_queue_internal;
@@ -1122,30 +1156,21 @@ vm_pageout_scan(void)
 	inactive_reclaim_run = 0;
 
 
-/*???*/	/*
+	/*
 	 *	We want to gradually dribble pages from the active queue
 	 *	to the inactive queue.  If we let the inactive queue get
 	 *	very small, and then suddenly dump many pages into it,
 	 *	those pages won't get a sufficient chance to be referenced
 	 *	before we start taking them from the inactive queue.
 	 *
-	 *	We must limit the rate at which we send pages to the pagers.
-	 *	data_write messages consume memory, for message buffers and
-	 *	for map-copy objects.  If we get too far ahead of the pagers,
-	 *	we can potentially run out of memory.
-	 *
-	 *	We can use the laundry count to limit directly the number
-	 *	of pages outstanding to the default pager.  A similar
-	 *	strategy for external pagers doesn't work, because
-	 *	external pagers don't have to deallocate the pages sent them,
-	 *	and because we might have to send pages to external pagers
-	 *	even if they aren't processing writes.  So we also
-	 *	use a burst count to limit writes to external pagers.
-	 *
-	 *	When memory is very tight, we can't rely on external pagers to
-	 *	clean pages.  They probably aren't running, because they
-	 *	aren't vm-privileged.  If we kept sending dirty pages to them,
-	 *	we could exhaust the free list.
+	 *	We must limit the rate at which we send pages to the pagers
+	 *	so that we don't tie up too many pages in the I/O queues.
+	 *	We implement a throttling mechanism using the laundry count
+	 * 	to limit the number of pages outstanding to the default
+	 *	and external pagers.  We can bypass the throttles and look
+	 *	for clean pages if the pageout queues don't drain in a timely
+	 *	fashion since this may indicate that the pageout paths are
+	 *	stalled waiting for memory, which only we can provide.
 	 */
 
 
@@ -1163,13 +1188,8 @@ Restart:
 	 *	but at the moment mach vm cannot do this.
 	 */
 	{
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
- 		uint32_t  total  = vm_page_active_count + vm_page_inactive_count;
- 		uint32_t  normal = total - vm_zf_count;
-#else
 		uint64_t  total  = vm_page_active_count + vm_page_inactive_count;
 		uint64_t  normal = total - vm_zf_count;
-#endif
 
 		/* zf_ratio is the number of zf pages we victimize per normal page */
 		
@@ -1195,8 +1215,16 @@ Restart:
 	 */
 	vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400);
 
+	if (vm_page_speculative_percentage > 50)
+		vm_page_speculative_percentage = 50;
+	else if (vm_page_speculative_percentage <= 0)
+		vm_page_speculative_percentage = 1;
+
 	vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
 								vm_page_inactive_count);
+
+	vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+
 	object = NULL;
 	last_object_tried = NULL;
 	try_failed = FALSE;
@@ -1215,24 +1243,40 @@ Restart:
 		        vm_page_lock_queues();
 			delayed_unlock = 1;
 		}
+		if (vm_upl_wait_for_pages < 0)
+			vm_upl_wait_for_pages = 0;
 
-		/*
-		 *	Don't sweep through active queue more than the throttle
-		 *	which should be kept relatively low
-		 */
-		active_burst_count = MIN(vm_pageout_burst_active_throttle,
-					 vm_page_active_count);
+		delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
+
+		if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
+			delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
 
 		/*
-		 *	Move pages from active to inactive.
+		 * Move pages from active to inactive if we're below the target
 		 */
 		if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target)
 		        goto done_moving_active_pages;
 
-		while (!queue_empty(&vm_page_queue_active) && active_burst_count) {
+		if (object != NULL) {
+			vm_object_unlock(object);
+			object = NULL;
+			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
+		}
+		/*
+		 * Don't sweep through active queue more than the throttle
+		 * which should be kept relatively low
+		 */
+		active_burst_count = MIN(vm_pageout_burst_active_throttle,
+					 vm_page_active_count);
+
+		VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START,
+			       vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed);
+
+		VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE,
+			       vm_pageout_speculative_clean, vm_pageout_inactive_clean,
+			       vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
 
-		        if (active_burst_count)
-			       active_burst_count--;
+		while (!queue_empty(&vm_page_queue_active) && active_burst_count--) {
 
 			vm_pageout_active++;
 
@@ -1246,91 +1290,24 @@ Restart:
 			DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
 
 			/*
-			 * Try to lock object; since we've already got the
-			 * page queues lock, we can only 'try' for this one.
-			 * if the 'try' fails, we need to do a mutex_pause
-			 * to allow the owner of the object lock a chance to
-			 * run... otherwise, we're likely to trip over this
-			 * object in the same state as we work our way through
-			 * the queue... clumps of pages associated with the same
-			 * object are fairly typical on the inactive and active queues
-			 */
-			if (m->object != object) {
-			        if (object != NULL) {
-				        vm_object_unlock(object);
-					object = NULL;
-					vm_pageout_scan_wants_object = VM_OBJECT_NULL;
-				}
-			        if (!vm_object_lock_try_scan(m->object)) {
-				        /*
-					 * move page to end of active queue and continue
-					 */
-				        queue_remove(&vm_page_queue_active, m,
-						     vm_page_t, pageq);
-					queue_enter(&vm_page_queue_active, m,
-						    vm_page_t, pageq);
-
-					try_failed = TRUE;
-					
-					m = (vm_page_t) queue_first(&vm_page_queue_active);
-					/*
-					 * this is the next object we're going to be interested in
-					 * try to make sure it's available after the mutex_yield
-					 * returns control
-					 */
-					vm_pageout_scan_wants_object = m->object;
-
-					goto done_with_activepage;
-				}
-				object = m->object;
-
-				try_failed = FALSE;
-			}
-
-			/*
-			 * if the page is BUSY, then we pull it
-			 * off the active queue and leave it alone.
-			 * when BUSY is cleared, it will get stuck
-			 * back on the appropriate queue
-			 */
-			if (m->busy) {
-				queue_remove(&vm_page_queue_active, m,
-					     vm_page_t, pageq);
-				m->pageq.next = NULL;
-				m->pageq.prev = NULL;
-
-				if (!m->fictitious)
-					vm_page_active_count--;
-				m->active = FALSE;
-
-				goto done_with_activepage;
-			}
-
-			/* deal with a rogue "reusable" page */
-			VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
-
-			/*
-			 *	Deactivate the page while holding the object
-			 *	locked, so we know the page is still not busy.
-			 *	This should prevent races between pmap_enter
-			 *	and pmap_clear_reference.  The page might be
-			 *	absent or fictitious, but vm_page_deactivate
-			 *	can handle that.
+			 * The page might be absent or busy,
+			 * but vm_page_deactivate can handle that.
 			 */
 			vm_page_deactivate(m);
 
-done_with_activepage:
-			if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
+			if (delayed_unlock++ > delayed_unlock_limit) {
 
-			        if (object != NULL) {
-					vm_pageout_scan_wants_object = VM_OBJECT_NULL;
-				        vm_object_unlock(object);
-					object = NULL;
-				}
 			        if (local_freeq) {
 					vm_page_unlock_queues();
+					
+					VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+						       vm_page_free_count, local_freed, delayed_unlock_limit, 1);
+
 				        vm_page_free_list(local_freeq, TRUE);
 					
+					VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+						       vm_page_free_count, 0, 0, 1);
+
 					local_freeq = NULL;
 					local_freed = 0;
 					vm_page_lock_queues();
@@ -1347,6 +1324,8 @@ done_with_activepage:
 			}
 		}
 
+		VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END,
+			       vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target);
 
 
 		/**********************************************************************
@@ -1357,10 +1336,6 @@ done_with_activepage:
 
 done_moving_active_pages:
 
-		/*
-		 *	We are done if we have met our target *and*
-		 *	nobody is still waiting for a page.
-		 */
 		if (vm_page_free_count + local_freed >= vm_page_free_target) {
 			if (object != NULL) {
 			        vm_object_unlock(object);
@@ -1370,45 +1345,55 @@ done_moving_active_pages:
 
 			if (local_freeq) {
 				vm_page_unlock_queues();
+					
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+					       vm_page_free_count, local_freed, delayed_unlock_limit, 2);
+
 			        vm_page_free_list(local_freeq, TRUE);
 					
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+					       vm_page_free_count, local_freed, 0, 2);
+
 				local_freeq = NULL;
 				local_freed = 0;
 				vm_page_lock_queues();
 			}
 			/*
-			 * inactive target still not met... keep going
-			 * until we get the queues balanced
-			 */
-
-			/*
-			 *	Recalculate vm_page_inactivate_target.
+			 * recalculate vm_page_inactivate_target
 			 */
 			vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
 									  vm_page_inactive_count +
 									  vm_page_speculative_count);
-
 #ifndef	CONFIG_EMBEDDED
-			/*
-			 * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying 
-			 *      to balance the queues
-			 */
 			if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
-			    !queue_empty(&vm_page_queue_active))
+			    !queue_empty(&vm_page_queue_active)) {
+				/*
+				 * inactive target still not met... keep going
+				 * until we get the queues balanced...
+				 */
 			        continue;
+			}
 #endif
-
 		        lck_mtx_lock(&vm_page_queue_free_lock);
 
 			if ((vm_page_free_count >= vm_page_free_target) &&
 			    (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
-
+				/*
+				 * done - we have met our target *and*
+				 * there is no one waiting for a page.
+				 */
 			        vm_page_unlock_queues();
 
 				thread_wakeup((event_t) &vm_pageout_garbage_collect);
 
 				assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
 
+				VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
+					       vm_pageout_inactive, vm_pageout_inactive_used, 0, 0);
+				VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
+					       vm_pageout_speculative_clean, vm_pageout_inactive_clean,
+					       vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
+
 				return;
 			}
 			lck_mtx_unlock(&vm_page_queue_free_lock);
@@ -1428,14 +1413,20 @@ done_moving_active_pages:
 			        vm_object_unlock(object);
 				object = NULL;
 			}
-			if(TRUE == vm_purgeable_object_purge_one()) {
+
+			VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
+
+			if (TRUE == vm_purgeable_object_purge_one()) {
+
+				VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
+
 				continue;
 			}
+			VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
 		}
-        
 		if (queue_empty(&sq->age_q) && vm_page_speculative_count) {
 		        /*
-			 * try to pull pages from the aging bins
+			 * try to pull pages from the aging bins...
 			 * see vm_page.h for an explanation of how
 			 * this mechanism works
 			 */
@@ -1458,21 +1449,20 @@ done_moving_active_pages:
 				aq = &vm_page_queue_speculative[speculative_steal_index];
 			}
 
-			if (num_scanned_queues ==
-			    VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
+			if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
 				/*
 				 * XXX We've scanned all the speculative
 				 * queues but still haven't found one
 				 * that is not empty, even though
 				 * vm_page_speculative_count is not 0.
+				 *
+				 * report the anomaly...
 				 */
-				/* report the anomaly... */
 				printf("vm_pageout_scan: "
 				       "all speculative queues empty "
 				       "but count=%d.  Re-adjusting.\n",
 				       vm_page_speculative_count);
-				if (vm_page_speculative_count >
-				    vm_page_speculative_count_drift_max)
+				if (vm_page_speculative_count > vm_page_speculative_count_drift_max)
 					vm_page_speculative_count_drift_max = vm_page_speculative_count;
 				vm_page_speculative_count_drifts++;
 #if 6553678
@@ -1487,8 +1477,8 @@ done_moving_active_pages:
 			if (vm_page_speculative_count > vm_page_speculative_target)
 			        can_steal = TRUE;
 			else {
-			        ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000;
-				ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000)
+			        ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000;
+				ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000)
 				                      * 1000 * NSEC_PER_USEC;
 
 				ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
@@ -1505,6 +1495,34 @@ done_moving_active_pages:
 			if (can_steal == TRUE)
 			        vm_page_speculate_ageit(aq);
 		}
+		if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
+			int 	pages_evicted;
+
+		        if (object != NULL) {
+			        vm_object_unlock(object);
+				object = NULL;
+			}
+			pages_evicted = vm_object_cache_evict(100, 10);
+
+			if (pages_evicted) {
+
+				vm_pageout_cache_evicted += pages_evicted;
+
+				VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
+					       vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0);
+
+				/*
+				 * we just freed up to 100 pages,
+				 * so go back to the top of the main loop
+				 * and re-evaulate the memory situation
+				 */
+				continue;
+			} else
+				cache_evict_throttle = 100;
+		}
+		if  (cache_evict_throttle)
+			cache_evict_throttle--;
+
 
 		/*
 		 * Sometimes we have to pause:
@@ -1513,8 +1531,7 @@ done_moving_active_pages:
 		 *	3) Loop control - no acceptable pages found on the inactive queue
 		 *         within the last vm_pageout_burst_inactive_throttle iterations
 		 */
-		if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) &&
-		    (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) {
+		if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q)) {
 		        vm_pageout_scan_empty_throttle++;
 			msecs = vm_pageout_empty_wait;
 			goto vm_pageout_scan_delay;
@@ -1527,7 +1544,8 @@ done_moving_active_pages:
 			msecs = vm_pageout_burst_wait;
 			goto vm_pageout_scan_delay;
 
-		} else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) {
+		} else if (VM_PAGE_Q_THROTTLED(iq) && 
+				  VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
 			clock_sec_t sec;
 			clock_nsec_t nsec;
 
@@ -1602,8 +1620,15 @@ vm_pageout_scan_delay:
 
 			if (local_freeq) {
 				vm_page_unlock_queues();
+
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+					       vm_page_free_count, local_freed, delayed_unlock_limit, 3);
+
 			        vm_page_free_list(local_freeq, TRUE);
 					
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+					       vm_page_free_count, local_freed, 0, 3);
+
 				local_freeq = NULL;
 				local_freed = 0;
 				vm_page_lock_queues();
@@ -1615,25 +1640,9 @@ vm_pageout_scan_delay:
 					goto consider_inactive;
 				}
 			}
-#if CONFIG_EMBEDDED
-			{
-			int percent_avail;
 
-			/*
-			 * Decide if we need to send a memory status notification.
-			 */
-			percent_avail = 
-				(vm_page_active_count + vm_page_inactive_count + 
-				 vm_page_speculative_count + vm_page_free_count +
-				 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
-				atop_64(max_mem);
-			if (percent_avail >= (kern_memorystatus_level + 5) || 
-			    percent_avail <= (kern_memorystatus_level - 5)) {
-				kern_memorystatus_level = percent_avail;
-				thread_wakeup((event_t)&kern_memorystatus_wakeup);
-			}
-			}
-#endif
+			VM_CHECK_MEMORYSTATUS;
+
 			assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
 			counter(c_vm_pageout_scan_block++);
 
@@ -1641,8 +1650,14 @@ vm_pageout_scan_delay:
 
 			assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
 
+			VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, 
+				       iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+
 			thread_block(THREAD_CONTINUE_NULL);
 
+			VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
+				       iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
+
 			vm_page_lock_queues();
 			delayed_unlock = 1;
 
@@ -1659,6 +1674,8 @@ vm_pageout_scan_delay:
 
 		flow_control.state = FCS_IDLE;
 consider_inactive:
+		vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count), 
+									    vm_pageout_inactive_external_forced_reactivate_limit);
 		loop_count++;
 		inactive_burst_count++;
 		vm_pageout_inactive++;
@@ -1668,7 +1685,7 @@ consider_inactive:
 		while (1) {	
 			m = NULL;
 			
-			if (IP_VALID(memory_manager_default)) {
+			if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) {
 				assert(vm_page_throttled_count == 0);
 				assert(queue_empty(&vm_page_queue_throttled));
 			}
@@ -1679,6 +1696,8 @@ consider_inactive:
 			 */
 			if ( !queue_empty(&sq->age_q) ) {
 			        m = (vm_page_t) queue_first(&sq->age_q);
+
+				page_prev_state = PAGE_STATE_SPECULATIVE;
 				break;
 			}
 			/*
@@ -1688,6 +1707,8 @@ consider_inactive:
 			     queue_empty(&vm_page_queue_inactive)) {
 				if ( !queue_empty(&vm_page_queue_zf) ) {
 					m = (vm_page_t) queue_first(&vm_page_queue_zf);
+
+					page_prev_state = PAGE_STATE_ZEROFILL;
 					zf_run_count++;
 					break;
 				}
@@ -1697,21 +1718,25 @@ consider_inactive:
 			 */
                         if ( !queue_empty(&vm_page_queue_inactive) ) {
                                 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+				
+				page_prev_state = PAGE_STATE_INACTIVE;
                                 zf_run_count = 0;
 				break;
                         }
 
                         panic("vm_pageout: no victim");
 		}
+		VM_PAGE_QUEUES_REMOVE(m);
 
-		assert(!m->active && (m->inactive || m->speculative || m->throttled));
 		assert(!m->laundry);
+		assert(!m->private);
+		assert(!m->fictitious);
 		assert(m->object != kernel_object);
 		assert(m->phys_page != vm_page_guard_addr);
 
-		if (!m->speculative) {
+
+		if (page_prev_state != PAGE_STATE_SPECULATIVE)
 			vm_pageout_stats[vm_pageout_stat_now].considered++;
-		}
 
 		DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
 
@@ -1742,88 +1767,43 @@ consider_inactive:
 			 * object are fairly typical on the inactive and active queues
 			 */
 			if (!vm_object_lock_try_scan(m->object)) {
+				vm_page_t m_want = NULL;
+
 				vm_pageout_inactive_nolock++;
 
-			requeue_page:
-			        /*
-				 *	Move page to end and continue.
-				 * 	Don't re-issue ticket
-				 */
-			        if (m->zero_fill) {
-					if (m->speculative) {
-						panic("vm_pageout_scan(): page %p speculative and zero-fill !?\n", m);
-					}
-					assert(!m->speculative);
-				        queue_remove(&vm_page_queue_zf, m,
-						     vm_page_t, pageq);
-					queue_enter(&vm_page_queue_zf, m,
-						    vm_page_t, pageq);
-				} else if (m->speculative) {
-				        remque(&m->pageq);
-					m->speculative = FALSE;
-					vm_page_speculative_count--;
-					
-					/*
-					 * move to the head of the inactive queue
-					 * to get it out of the way... the speculative
-					 * queue is generally too small to depend
-					 * on there being enough pages from other
-					 * objects to make cycling it back on the
-					 * same queue a winning proposition
-					 */
-					queue_enter_first(&vm_page_queue_inactive, m,
-							  vm_page_t, pageq);
-					m->inactive = TRUE;
-					vm_page_inactive_count++;
-					token_new_pagecount++;
-				}  else if (m->throttled) {
-					queue_remove(&vm_page_queue_throttled, m,
-						     vm_page_t, pageq);
-					m->throttled = FALSE;
-					vm_page_throttled_count--;
-					
-					/*
-					 * not throttled any more, so can stick
-					 * it on the inactive queue.
-					 */
-					queue_enter(&vm_page_queue_inactive, m,
-						    vm_page_t, pageq);
-					m->inactive = TRUE;
-					vm_page_inactive_count++;
-					token_new_pagecount++;
-				} else {
-				        queue_remove(&vm_page_queue_inactive, m,
-						     vm_page_t, pageq);
-#if MACH_ASSERT
-					vm_page_inactive_count--;	/* balance for purgeable queue asserts */
-#endif
-					vm_purgeable_q_advance_all();
+				if (page_prev_state == PAGE_STATE_SPECULATIVE)
+					page_prev_state = PAGE_STATE_INACTIVE_FIRST;
 
-					queue_enter(&vm_page_queue_inactive, m,
-						    vm_page_t, pageq);
-#if MACH_ASSERT
-					vm_page_inactive_count++;	/* balance for purgeable queue asserts */
-#endif
-					token_new_pagecount++;
-				}
 				pmap_clear_reference(m->phys_page);
 				m->reference = FALSE;
 
+				/*
+				 * m->object must be stable since we hold the page queues lock...
+				 * we can update the scan_collisions field sans the object lock
+				 * since it is a separate field and this is the only spot that does
+				 * a read-modify-write operation and it is never executed concurrently...
+				 * we can asynchronously set this field to 0 when creating a UPL, so it
+				 * is possible for the value to be a bit non-determistic, but that's ok
+				 * since it's only used as a hint
+				 */
+				m->object->scan_collisions++;
+
 				if ( !queue_empty(&sq->age_q) )
-				        m = (vm_page_t) queue_first(&sq->age_q);
+				        m_want = (vm_page_t) queue_first(&sq->age_q);
 				else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) ||
 					  queue_empty(&vm_page_queue_inactive)) {
 				        if ( !queue_empty(&vm_page_queue_zf) )
-					        m = (vm_page_t) queue_first(&vm_page_queue_zf);
+					        m_want = (vm_page_t) queue_first(&vm_page_queue_zf);
 				} else if ( !queue_empty(&vm_page_queue_inactive) ) {
-				        m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+				        m_want = (vm_page_t) queue_first(&vm_page_queue_inactive);
 				}
 				/*
 				 * this is the next object we're going to be interested in
 				 * try to make sure its available after the mutex_yield
 				 * returns control
 				 */
-				vm_pageout_scan_wants_object = m->object;
+				if (m_want)
+					vm_pageout_scan_wants_object = m_want->object;
 
 				/*
 				 * force us to dump any collected free pages
@@ -1831,99 +1811,85 @@ consider_inactive:
 				 */
 				try_failed = TRUE;
 
-				goto done_with_inactivepage;
+				goto requeue_page;
 			}
 			object = m->object;
 			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 
 			try_failed = FALSE;
 		}
+		if (catch_up_count)
+		        catch_up_count--;
 
-		/*
-		 *	Paging out pages of external objects which
-		 *	are currently being created must be avoided.
-		 *	The pager may claim for memory, thus leading to a
-		 *	possible dead lock between it and the pageout thread,
-		 *	if such pages are finally chosen. The remaining assumption
-		 *	is that there will finally be enough available pages in the
-		 *	inactive pool to page out in order to satisfy all memory
-		 *	claimed by the thread which concurrently creates the pager.
-		 */
-		if (!object->pager_initialized && object->pager_created) {
-			/*
-			 *	Move page to end and continue, hoping that
-			 *	there will be enough other inactive pages to
-			 *	page out so that the thread which currently
-			 *	initializes the pager will succeed.
-			 *	Don't re-grant the ticket, the page should
-			 *	pulled from the queue and paged out whenever
-			 *	one of its logically adjacent fellows is
-			 *	targeted.
-			 */
-			vm_pageout_inactive_avoid++;
-			goto requeue_page;
-		}
-		/*
-		 *	Remove the page from its list.
-		 */
-		if (m->speculative) {
-			remque(&m->pageq);
-			page_prev_state = PAGE_STATE_SPECULATIVE;
-			m->speculative = FALSE;
-			vm_page_speculative_count--;
-		} else if (m->throttled) {
-			queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq);
-			page_prev_state = PAGE_STATE_THROTTLED;
-			m->throttled = FALSE;
-			vm_page_throttled_count--;
-		} else {
-			if (m->zero_fill) {
-				queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
-				page_prev_state = PAGE_STATE_ZEROFILL;
-				vm_zf_queue_count--;
-			} else {
-				page_prev_state = PAGE_STATE_INACTIVE;
-			        queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
+		if (m->busy) {
+			if (m->encrypted_cleaning) {
+				/*
+				 * ENCRYPTED SWAP:
+				 * if this page has already been picked up as
+				 * part of a page-out cluster, it will be busy 
+				 * because it is being encrypted (see
+				 * vm_object_upl_request()).  But we still
+				 * want to demote it from "clean-in-place"
+				 * (aka "adjacent") to "clean-and-free" (aka
+				 * "target"), so let's ignore its "busy" bit
+				 * here and proceed to check for "cleaning" a
+				 * little bit below...
+				 *
+				 * CAUTION CAUTION:
+				 * A "busy" page should still be left alone for
+				 * most purposes, so we have to be very careful
+				 * not to process that page too much.
+				 */
+				assert(m->cleaning);
+				goto consider_inactive_page;
 			}
-			m->inactive = FALSE;
-			if (!m->fictitious)
-				vm_page_inactive_count--;
-			vm_purgeable_q_advance_all();
-		}
-
-		m->pageq.next = NULL;
-		m->pageq.prev = NULL;
-
-		if ( !m->fictitious && catch_up_count)
-		        catch_up_count--;
 
-		/*
-		 * ENCRYPTED SWAP:
-		 * if this page has already been picked up as part of a
-		 * page-out cluster, it will be busy because it is being
-		 * encrypted (see vm_object_upl_request()).  But we still
-		 * want to demote it from "clean-in-place" (aka "adjacent")
-		 * to "clean-and-free" (aka "target"), so let's ignore its
-		 * "busy" bit here and proceed to check for "cleaning" a
-		 * little bit below...
-		 */
-		if ( !m->encrypted_cleaning && (m->busy || !object->alive)) {
 			/*
 			 *	Somebody is already playing with this page.
-			 *	Leave it off the pageout queues.
+			 *	Put it back on the appropriate queue
 			 *
 			 */
 			vm_pageout_inactive_busy++;
+requeue_page:
+			switch (page_prev_state) {
+
+			case PAGE_STATE_SPECULATIVE:
+				vm_page_speculate(m, FALSE);
+				break;
 
+			case PAGE_STATE_ZEROFILL:
+				m->zero_fill = TRUE;
+				/*
+				 * fall through to add in the
+				 * inactive state
+				 */
+			case PAGE_STATE_INACTIVE:
+				VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+				break;
+
+			case PAGE_STATE_INACTIVE_FIRST:
+				VM_PAGE_ENQUEUE_INACTIVE(m, TRUE);
+				break;
+			}
 			goto done_with_inactivepage;
 		}
 
+
 		/*
-		 *	If it's absent or in error, we can reclaim the page.
+		 *	If it's absent, in error or the object is no longer alive,
+		 *	we can reclaim the page... in the no longer alive case,
+		 *	there are 2 states the page can be in that preclude us
+		 *	from reclaiming it - busy or cleaning - that we've already
+		 *	dealt with
 		 */
+		if (m->absent || m->error || !object->alive) {
 
-		if (m->absent || m->error) {
-			vm_pageout_inactive_absent++;
+			if (m->absent)
+				vm_pageout_inactive_absent++;
+			else if (!object->alive)
+				vm_pageout_inactive_notalive++;
+			else
+				vm_pageout_inactive_error++;
 reclaim_page:
 			if (vm_pageout_deadlock_target) {
 				vm_pageout_scan_inactive_throttle_success++;
@@ -1956,37 +1922,11 @@ reclaim_page:
 
 			inactive_burst_count = 0;
 
-			if(page_prev_state != PAGE_STATE_SPECULATIVE) {
+			if (page_prev_state != PAGE_STATE_SPECULATIVE)
 				vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
-				page_prev_state = 0;
-			}
 
 			goto done_with_inactivepage;
 		}
-
-		assert(!m->private);
-		assert(!m->fictitious);
-
-		/*
-		 *	If already cleaning this page in place, convert from
-		 *	"adjacent" to "target". We can leave the page mapped,
-		 *	and vm_pageout_object_terminate will determine whether
-		 *	to free or reactivate.
-		 */
-
-		if (m->cleaning) {
-			m->busy = TRUE;
-			m->pageout = TRUE;
-			m->dump_cleaning = TRUE;
-			vm_page_wire(m);
-
-			CLUSTER_STAT(vm_pageout_cluster_conversions++);
-
-			inactive_burst_count = 0;
-
-			goto done_with_inactivepage;
-		}
-
 		/*
 		 * If the object is empty, the page must be reclaimed even
 		 * if dirty or used.
@@ -2012,11 +1952,35 @@ reclaim_page:
 			if (object->purgable == VM_PURGABLE_VOLATILE) {
 				/* if it's wired, we can't put it on our queue */
 				assert(!VM_PAGE_WIRED(m));
+
 				/* just stick it back on! */
+				reactivated_this_call++;
 				goto reactivate_page;
 			}
 		}
 
+	consider_inactive_page:
+		if (m->busy) {
+			/*
+			 * CAUTION CAUTION:
+			 * A "busy" page should always be left alone, except...
+			 */
+			if (m->cleaning && m->encrypted_cleaning) {
+				/*
+				 * ENCRYPTED_SWAP:
+				 * We could get here with a "busy" page 
+				 * if it's being encrypted during a
+				 * "clean-in-place" operation.  We'll deal
+				 * with it right away by testing if it has been
+				 * referenced and either reactivating it or
+				 * promoting it from "clean-in-place" to
+				 * "clean-and-free".
+				 */
+			} else {
+				panic("\"busy\" page considered for pageout\n");
+			}
+		}
+
 		/*
 		 *	If it's being used, reactivate.
 		 *	(Fictitious pages are either busy or absent.)
@@ -2034,6 +1998,35 @@ reclaim_page:
 			        m->dirty = TRUE;
 		}
 
+		/*
+		 *	If already cleaning this page in place and it hasn't
+		 *      been recently referenced, convert from
+		 *	"adjacent" to "target". We can leave the page mapped,
+		 *	and upl_commit_range will determine whether
+		 *	to free or reactivate.
+		 *
+		 *	note: if m->encrypted_cleaning == TRUE, then
+		 *		m->cleaning == TRUE
+		 *	and we'll handle it here
+		 */
+		if (m->cleaning) {
+			
+			if (m->reference == TRUE) {
+				reactivated_this_call++;
+				goto reactivate_page;
+			}
+			m->busy = TRUE;
+			m->pageout = TRUE;
+			m->dump_cleaning = TRUE;
+			vm_page_wire(m);
+
+			CLUSTER_STAT(vm_pageout_cluster_conversions++);
+
+			inactive_burst_count = 0;
+
+			goto done_with_inactivepage;
+		}
+
 		if (m->reference || m->dirty) {
 			/* deal with a rogue "reusable" page */
 			VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m);
@@ -2123,33 +2116,75 @@ reactivate_page:
 				inactive_throttled = TRUE;
 			}
 		}
-		if (inactive_throttled == TRUE) {
 throttle_inactive:
-			if (!IP_VALID(memory_manager_default) &&
-			    object->internal && m->dirty &&
-			    (object->purgable == VM_PURGABLE_DENY ||
-			     object->purgable == VM_PURGABLE_NONVOLATILE ||
-			     object->purgable == VM_PURGABLE_VOLATILE)) {
-			        queue_enter(&vm_page_queue_throttled, m,
-					    vm_page_t, pageq);
-				m->throttled = TRUE;
-				vm_page_throttled_count++;
-			} else {
-			        if (m->zero_fill) {
-					queue_enter(&vm_page_queue_zf, m,
-						    vm_page_t, pageq);
-					vm_zf_queue_count++;
-				} else 
-					queue_enter(&vm_page_queue_inactive, m,
-						    vm_page_t, pageq);
-				m->inactive = TRUE;
-				if (!m->fictitious) {
-				        vm_page_inactive_count++;
-					token_new_pagecount++;
+		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
+		    object->internal && m->dirty &&
+		    (object->purgable == VM_PURGABLE_DENY ||
+		     object->purgable == VM_PURGABLE_NONVOLATILE ||
+		     object->purgable == VM_PURGABLE_VOLATILE)) {
+			queue_enter(&vm_page_queue_throttled, m,
+				    vm_page_t, pageq);
+			m->throttled = TRUE;
+			vm_page_throttled_count++;
+
+			vm_pageout_scan_reclaimed_throttled++;
+
+			goto done_with_inactivepage;
+		}
+		if (inactive_throttled == TRUE) {
+
+			if (object->internal)
+				vm_pageout_scan_inactive_throttled_internal++;
+			else
+				vm_pageout_scan_inactive_throttled_external++;			
+
+			if (page_prev_state == PAGE_STATE_SPECULATIVE)
+				page_prev_state = PAGE_STATE_INACTIVE;
+
+			if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && object->internal == FALSE) {
+				/*
+				 * a) The external pageout queue is throttled
+				 * b) We're done with the active queue and moved on to the inactive queue
+				 * c) We start noticing dirty pages and usually we would put them at the end of the inactive queue, but,
+				 * d) We don't have a default pager, and so,
+				 * e) We push these onto the active queue in an effort to cause a re-evaluation of the active queue 
+				 *    and get back some, possibly clean, pages.
+				 *
+				 * We also keep a count of the pages of this kind, since, these will be a good indicator of us being in a deadlock
+				 * on systems without a dynamic pager, where: 
+				 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
+				 * b) The thread doing the writing is waiting for pages while holding the truncate lock
+				 * c) Most of the pages in the inactive queue belong to this file.
+				 */
+				
+				vm_page_activate(m);
+				vm_pageout_inactive_external_forced_reactivate_count++;
+				vm_pageout_inactive_external_forced_reactivate_limit--;
+
+				if (vm_pageout_inactive_external_forced_reactivate_limit <= 0){
+					vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
+#if CONFIG_EMBEDDED
+					/*
+					 * Possible deadlock scenario so request jetsam action
+					 */
+					assert(object);
+					vm_object_unlock(object);
+					object = VM_OBJECT_NULL;
+					vm_page_unlock_queues();
+
+					if (jetsam_kill_top_proc(TRUE, kJetsamFlagsKilledVM) < 0){
+						panic("vm_pageout_scan: Jetsam request failed\n");	
+					}
+
+					vm_page_lock_queues();	
+					delayed_unlock = 1;
+#endif
 				}
+				inactive_burst_count = 0;
+				goto done_with_inactivepage;
+			} else {
+				goto requeue_page;
 			}
-			vm_pageout_scan_inactive_throttled++;
-			goto done_with_inactivepage;
 		}
 
 		/*
@@ -2211,10 +2246,14 @@ throttle_inactive:
 		 *	If it's clean and not precious, we can free the page.
 		 */
 		if (!m->dirty && !m->precious) {
-			if (m->zero_fill)
-				vm_pageout_inactive_zf++;
-			vm_pageout_inactive_clean++;
 
+			if (page_prev_state == PAGE_STATE_SPECULATIVE)
+				vm_pageout_speculative_clean++;
+			else {
+				if (page_prev_state == PAGE_STATE_ZEROFILL)
+					vm_pageout_inactive_zf++;
+				vm_pageout_inactive_clean++;
+			}
 			goto reclaim_page;
 		}
 
@@ -2224,33 +2263,38 @@ throttle_inactive:
 		 * if the page was clean then).  With the dirty page
 		 * disconnected here, we can make one final check.
 		 */
-		{
-			boolean_t disconnect_throttled = FALSE;
-			if (object->internal) {
-				if (VM_PAGE_Q_THROTTLED(iq))
-					disconnect_throttled = TRUE;
-			} else if (VM_PAGE_Q_THROTTLED(eq)) {
-				disconnect_throttled = TRUE;
-			}
+		if (object->internal) {
+			if (VM_PAGE_Q_THROTTLED(iq))
+				inactive_throttled = TRUE;
+		} else if (VM_PAGE_Q_THROTTLED(eq)) {
+			inactive_throttled = TRUE;
+		}
 
-			if (disconnect_throttled == TRUE) {
-				PAGE_WAKEUP_DONE(m);
-				goto throttle_inactive;
-			}
+		if (inactive_throttled == TRUE) {
+			/*
+			 * we set busy before issuing the pmap_disconnect,
+			 * so clear it and wakeup anyone that happened upon
+			 * it in that state
+			 */
+			PAGE_WAKEUP_DONE(m);
+			goto throttle_inactive;
 		}
 
 		vm_pageout_stats[vm_pageout_stat_now].reclaimed++;
 
 		vm_pageout_cluster(m);
 
-		if (m->zero_fill)
+		if (page_prev_state == PAGE_STATE_ZEROFILL)
 			vm_pageout_inactive_zf++;
-		vm_pageout_inactive_dirty++;
-
+		if (object->internal)
+			vm_pageout_inactive_dirty_internal++;
+		else
+			vm_pageout_inactive_dirty_external++;
+	
 		inactive_burst_count = 0;
 
 done_with_inactivepage:
-		if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) {
+		if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) {
 
 		        if (object != NULL) {
 				vm_pageout_scan_wants_object = VM_OBJECT_NULL;
@@ -2259,8 +2303,15 @@ done_with_inactivepage:
 			}
 		        if (local_freeq) {
 				vm_page_unlock_queues();
+
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
+					       vm_page_free_count, local_freed, delayed_unlock_limit, 4);
+
 			        vm_page_free_list(local_freeq, TRUE);
 				
+				VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END,
+					       vm_page_free_count, local_freed, 0, 4);
+
 				local_freeq = NULL;
 				local_freed = 0;
 				vm_page_lock_queues();
@@ -2286,6 +2337,9 @@ vm_page_free_reserve(
 
 	vm_page_free_reserved += pages;
 
+	if (vm_page_free_reserved > VM_PAGE_FREE_RESERVED_LIMIT)
+		vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
+
 	free_after_reserve = vm_page_free_count_init - vm_page_free_reserved;
 
 	vm_page_free_min = vm_page_free_reserved +
@@ -2357,6 +2411,9 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
 
 		   q->pgo_busy = TRUE;
 		   queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq);
+		   if (m->object == slide_info.slide_object) {
+			   panic("slid page %p not allowed on this path\n", m);
+		   }
 		   VM_PAGE_CHECK(m);
 		   m->pageout_queue = FALSE;
 		   m->pageq.next = NULL;
@@ -2410,8 +2467,8 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q)
 			           vm_page_lockspin_queues();
 
 				   vm_pageout_queue_steal(m, TRUE);
-				   vm_pageout_dirty_no_pager++;
 				   vm_page_activate(m);
+				   vm_pageout_dirty_no_pager++;
 
 				   vm_page_unlock_queues();
 
@@ -2559,6 +2616,8 @@ vm_pageout_garbage_collect(int collect)
 		consider_zone_gc(buf_large_zfree);
 
 		consider_machine_adjust();
+		consider_pressure_events();
+		
 	}
 
 	assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
@@ -2720,51 +2779,6 @@ vm_pageout_internal_start(void)
 }
 
 
-/*
- * when marshalling pages into a UPL and subsequently committing
- * or aborting them, it is necessary to hold 
- * the vm_page_queue_lock (a hot global lock) for certain operations
- * on the page... however, the majority of the work can be done
- * while merely holding the object lock... in fact there are certain
- * collections of pages that don't require any work brokered by the
- * vm_page_queue_lock... to mitigate the time spent behind the global
- * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
- * while doing all of the work that doesn't require the vm_page_queue_lock...
- * then call dw_do_work to acquire the vm_page_queue_lock and do the
- * necessary work for each page... we will grab the busy bit on the page
- * if it's not already held so that dw_do_work can drop the object lock
- * if it can't immediately take the vm_page_queue_lock in order to compete
- * for the locks in the same order that vm_pageout_scan takes them.
- * the operation names are modeled after the names of the routines that
- * need to be called in order to make the changes very obvious in the
- * original loop
- */
-
-#define DELAYED_WORK_LIMIT	32
-
-#define DW_vm_page_unwire		0x01
-#define DW_vm_page_wire			0x02
-#define DW_vm_page_free			0x04
-#define DW_vm_page_activate		0x08
-#define DW_vm_page_deactivate_internal	0x10
-#define DW_vm_page_speculate	 	0x20
-#define DW_vm_page_lru		 	0x40
-#define DW_vm_pageout_throttle_up	0x80
-#define DW_PAGE_WAKEUP			0x100
-#define DW_clear_busy			0x200
-#define DW_clear_reference		0x400
-#define DW_set_reference		0x800
-
-struct dw {
-	vm_page_t	dw_m;
-	int		dw_mask;
-};
-
-
-static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count);
-
-
-
 static upl_t
 upl_create(int type, int flags, upl_size_t size)
 {
@@ -2797,6 +2811,7 @@ upl_create(int type, int flags, upl_size_t size)
 	upl->size = 0;
 	upl->map_object = NULL;
 	upl->ref_count = 1;
+	upl->ext_ref_count = 0;
 	upl->highest_page = 0;
 	upl_lock_init(upl);
 	upl->vector_upl = NULL;
@@ -2821,6 +2836,10 @@ upl_destroy(upl_t upl)
 	int	page_field_size;  /* bit field in word size buf */
         int	size;
 
+	if (upl->ext_ref_count) {
+		panic("upl(%p) ext_ref_count", upl);
+	}
+
 #if UPL_DEBUG
 	{
 		vm_object_t	object;
@@ -2864,14 +2883,6 @@ upl_destroy(upl_t upl)
 	}
 }
 
-void uc_upl_dealloc(upl_t upl);
-__private_extern__ void
-uc_upl_dealloc(upl_t upl)
-{
-	if (--upl->ref_count == 0)
-		upl_destroy(upl);
-}
-
 void
 upl_deallocate(upl_t upl)
 {
@@ -2952,6 +2963,7 @@ vm_object_upl_request(
 	vm_page_t		dst_page = VM_PAGE_NULL;
 	vm_object_offset_t	dst_offset;
 	upl_size_t		xfer_size;
+	unsigned int		size_in_pages;
 	boolean_t		dirty;
 	boolean_t		hw_dirty;
 	upl_t			upl = NULL;
@@ -2963,9 +2975,10 @@ vm_object_upl_request(
         int			refmod_state = 0;
 	wpl_array_t 		lite_list = NULL;
 	vm_object_t		last_copy_object;
-	struct	dw		dw_array[DELAYED_WORK_LIMIT];
-	struct	dw		*dwp;
+	struct	vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct	vm_page_delayed_work	*dwp;
 	int			dw_count;
+	int			dw_limit;
 
 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
 		/*
@@ -3037,7 +3050,7 @@ vm_object_upl_request(
 		upl->map_object->pageout = TRUE;
 		upl->map_object->can_persist = FALSE;
 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
-		upl->map_object->shadow_offset = offset;
+		upl->map_object->vo_shadow_offset = offset;
 		upl->map_object->wimg_bits = object->wimg_bits;
 
 		VM_PAGE_GRAB_FICTITIOUS(alias_page);
@@ -3101,9 +3114,15 @@ vm_object_upl_request(
 
 	xfer_size = size;
 	dst_offset = offset;
+	size_in_pages = size / PAGE_SIZE;
 
 	dwp = &dw_array[0];
 	dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
+
+	if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
+	    object->resident_page_count < (MAX_UPL_SIZE * 2))
+		object->scan_collisions = 0;
 
 	while (xfer_size) {
 
@@ -3174,7 +3193,7 @@ vm_object_upl_request(
 				 * currently on the inactive queue or it meets the page
 				 * ticket (generation count) check
 				 */
-				if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED)) && 
+				if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED) || dst_page->throttled) && 
 				     ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) {
 				        goto check_busy;
 				}
@@ -3243,12 +3262,7 @@ check_busy:
 
 				vm_page_lockspin_queues();
 
-#if CONFIG_EMBEDDED
-				if (dst_page->laundry)
-#else
-				if (dst_page->pageout_queue == TRUE)
-#endif
-				{
+				if (dst_page->pageout_queue == TRUE) {
 					/*
 					 * we've buddied up a page for a clustered pageout
 					 * that has already been moved to the pageout
@@ -3452,7 +3466,7 @@ check_busy:
 						 * the default_pager case
 						 */
 					        dst_page->list_req_pending = FALSE;
-						dst_page->busy = FALSE;
+						PAGE_WAKEUP_DONE(dst_page);
 
 					} else if (dst_page->pageout || dst_page->cleaning) {
 						/*
@@ -3471,9 +3485,8 @@ check_busy:
 						 * so undo all of the state that vm_pageout_scan
 						 * hung on this page
 						 */
-						dst_page->busy = FALSE;
-
 					       	vm_pageout_queue_steal(dst_page, FALSE);
+						PAGE_WAKEUP_DONE(dst_page);
 					}
 				}
 			}
@@ -3493,11 +3506,31 @@ check_busy:
 
 					goto try_next_page;
 				}
-				/*
-				 * need to allocate a page
-				 */
-		 		dst_page = vm_page_grab();
+				if (object->scan_collisions) {
+					/*
+					 * the pageout_scan thread is trying to steal
+					 * pages from this object, but has run into our
+					 * lock... grab 2 pages from the head of the object...
+					 * the first is freed on behalf of pageout_scan, the
+					 * 2nd is for our own use... we use vm_object_page_grab
+					 * in both cases to avoid taking pages from the free
+					 * list since we are under memory pressure and our
+					 * lock on this object is getting in the way of
+					 * relieving it
+					 */
+					dst_page = vm_object_page_grab(object);
+
+					if (dst_page != VM_PAGE_NULL)
+						vm_page_release(dst_page);
 
+					dst_page = vm_object_page_grab(object);
+				}
+				if (dst_page == VM_PAGE_NULL) {
+					/*
+					 * need to allocate a page
+					 */
+					dst_page = vm_page_grab();
+				}
 				if (dst_page == VM_PAGE_NULL) {
 				        if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
 					       /*
@@ -3516,7 +3549,16 @@ check_busy:
 					 * offset...
 					 */
 					vm_object_unlock(object);
+					
+					OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
+
+					VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
+
 					VM_PAGE_WAIT();
+					OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
+
+					VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
+
 					vm_object_lock(object);
 
 					continue;
@@ -3613,7 +3655,12 @@ check_busy:
 				alias_page = NULL;
 			}
 
-			if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
+			if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
+				upl->flags &= ~UPL_CLEAR_DIRTY;
+				upl->flags |= UPL_SET_DIRTY;
+				dirty = TRUE;
+				upl->flags |= UPL_SET_DIRTY;
+			} else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
 				/*
 				 * clean in place for read implies
 				 * that a write will be done on all
@@ -3649,7 +3696,16 @@ check_busy:
 				 */
 				dwp->dw_mask |= DW_set_reference;
 			}
-			dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE;
+			if (cntrl_flags & UPL_PRECIOUS) {
+				if (dst_page->object->internal) {
+					dst_page->dirty = TRUE;
+					dst_page->precious = FALSE;
+				} else {
+					dst_page->precious = TRUE;
+				}
+			} else {
+				dst_page->precious = FALSE;
+			}
 		}
 		if (dst_page->busy)
 			upl->flags |= UPL_HAS_BUSY;
@@ -3690,21 +3746,10 @@ try_next_page:
 			if (dwp->dw_mask & DW_vm_page_activate)
 				VM_STAT_INCR(reactivations);
 
-			if (dst_page->busy == FALSE) {
-				/*
-				 * dw_do_work may need to drop the object lock
-				 * if it does, we need the pages it's looking at to
-				 * be held stable via the busy bit.
-				 */
-				dst_page->busy = TRUE;
-				dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
-			}
-			dwp->dw_m = dst_page;
-			dwp++;
-			dw_count++;
+			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
 
-			if (dw_count >= DELAYED_WORK_LIMIT) {
-				dw_do_work(object, &dw_array[0], dw_count);
+			if (dw_count >= dw_limit) {
+				vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 
 				dwp = &dw_array[0];
 				dw_count = 0;
@@ -3715,7 +3760,7 @@ try_next_page:
 		xfer_size -= PAGE_SIZE;
 	}
 	if (dw_count)
-		dw_do_work(object, &dw_array[0], dw_count);
+		vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 
 	if (alias_page != NULL) {
 		VM_PAGE_FREE(alias_page);
@@ -3825,7 +3870,7 @@ vm_object_super_upl_request(
 
 		base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
 		super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
-		super_size_64 = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size;
+		super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
 		super_size = (upl_size_t) super_size_64;
 		assert(super_size == super_size_64);
 
@@ -3999,7 +4044,7 @@ REDISCOVER_ENTRY:
 							       (vm_object_offset_t)
 							       ((offset - local_start) +
 								local_offset) +
-							       local_object->shadow_offset,
+							       local_object->vo_shadow_offset,
 							       *upl_size, FALSE, 
 							       MEMORY_OBJECT_DATA_SYNC,
 							       VM_PROT_NO_CHANGE);
@@ -4132,6 +4177,7 @@ process_upl_to_enter:
 		upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
 		if(upl == NULL)
 			goto process_upl_to_enter;
+
 		vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
 		*dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
 	} else {
@@ -4169,9 +4215,9 @@ process_upl_to_enter:
 		upl->map_object->pageout = TRUE;
 		upl->map_object->can_persist = FALSE;
 		upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
-		upl->map_object->shadow_offset = upl->offset - object->paging_offset;
+		upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
 		upl->map_object->wimg_bits = object->wimg_bits;
-		offset = upl->map_object->shadow_offset;
+		offset = upl->map_object->vo_shadow_offset;
 		new_offset = 0;
 		size = upl->size;
 
@@ -4246,6 +4292,7 @@ process_upl_to_enter:
 	        offset = 0;
 	else
 	        offset = upl->offset - upl->map_object->paging_offset;
+
 	size = upl->size;
 	
 	vm_object_reference(upl->map_object);
@@ -4277,9 +4324,6 @@ process_upl_to_enter:
 		m = vm_page_lookup(upl->map_object, offset);
 
 		if (m) {
-		        unsigned int	cache_attr;
-			cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
-
 			m->pmapped = TRUE;
 
 			/* CODE SIGNING ENFORCEMENT: page has been wpmapped, 
@@ -4288,7 +4332,7 @@ process_upl_to_enter:
 			/* m->wpmapped = TRUE; */
 			assert(map==kernel_map);
 	
-			PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE);
+			PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, 0, TRUE);
 		}
 		offset += PAGE_SIZE_64;
 	}
@@ -4411,88 +4455,6 @@ process_upl_to_remove:
 	return KERN_FAILURE;
 }
 
-static void
-dw_do_work(
-	vm_object_t 	object,
-	struct dw 	*dwp,
-	int		dw_count)
-{
-	int		j;
-	boolean_t	held_as_spin = TRUE;
-
-	/*
-	 * pageout_scan takes the vm_page_lock_queues first
-	 * then tries for the object lock... to avoid what
-	 * is effectively a lock inversion, we'll go to the
-	 * trouble of taking them in that same order... otherwise
-	 * if this object contains the majority of the pages resident
-	 * in the UBC (or a small set of large objects actively being
-	 * worked on contain the majority of the pages), we could
-	 * cause the pageout_scan thread to 'starve' in its attempt
-	 * to find pages to move to the free queue, since it has to
-	 * successfully acquire the object lock of any candidate page
-	 * before it can steal/clean it.
-	 */
-	if (!vm_page_trylockspin_queues()) {
-		vm_object_unlock(object);
-
-		vm_page_lockspin_queues();
-
-		for (j = 0; ; j++) {
-			if (!vm_object_lock_avoid(object) &&
-			    _vm_object_lock_try(object))
-				break;
-			vm_page_unlock_queues();
-			mutex_pause(j);
-			vm_page_lockspin_queues();
-		}
-	}
-	for (j = 0; j < dw_count; j++, dwp++) {
-
-		if (dwp->dw_mask & DW_vm_pageout_throttle_up)
-			vm_pageout_throttle_up(dwp->dw_m);
-
-		if (dwp->dw_mask & DW_vm_page_wire)
-			vm_page_wire(dwp->dw_m);
-		else if (dwp->dw_mask & DW_vm_page_unwire) {
-			boolean_t	queueit;
-
-			queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
-
-			vm_page_unwire(dwp->dw_m, queueit);
-		}
-		if (dwp->dw_mask & DW_vm_page_free) {
-			if (held_as_spin == TRUE) {
-				vm_page_lockconvert_queues();
-				held_as_spin = FALSE;
-			}
-			vm_page_free(dwp->dw_m);
-		} else {
-			if (dwp->dw_mask & DW_vm_page_deactivate_internal)
-				vm_page_deactivate_internal(dwp->dw_m, FALSE);
-			else if (dwp->dw_mask & DW_vm_page_activate)
-				vm_page_activate(dwp->dw_m);
-			else if (dwp->dw_mask & DW_vm_page_speculate)
-				vm_page_speculate(dwp->dw_m, TRUE);
-			else if (dwp->dw_mask & DW_vm_page_lru)
-				vm_page_lru(dwp->dw_m);
-			
-			if (dwp->dw_mask & DW_set_reference)
-				dwp->dw_m->reference = TRUE;
-			else if (dwp->dw_mask & DW_clear_reference)
-				dwp->dw_m->reference = FALSE;
-
-			if (dwp->dw_mask & DW_clear_busy)
-				dwp->dw_m->busy = FALSE;
-
-			if (dwp->dw_mask & DW_PAGE_WAKEUP)
-				PAGE_WAKEUP(dwp->dw_m);
-		}
-	}
-	vm_page_unlock_queues();
-}
-
-
 
 kern_return_t
 upl_commit_range(
@@ -4514,10 +4476,13 @@ upl_commit_range(
 	int			occupied;
 	int			clear_refmod = 0;
 	int			pgpgout_count = 0;
-	struct	dw		dw_array[DELAYED_WORK_LIMIT];
-	struct	dw		*dwp;
-	int			dw_count, isVectorUPL = 0;
+	struct	vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct	vm_page_delayed_work	*dwp;
+	int			dw_count;
+	int			dw_limit;
+	int			isVectorUPL = 0;
 	upl_t			vector_upl = NULL;
+	boolean_t		should_be_throttled = FALSE;
 
 	*empty = FALSE;
 
@@ -4575,6 +4540,8 @@ process_upl_to_commit:
 		}
 		return KERN_FAILURE;
 	}
+	if (upl->flags & UPL_SET_DIRTY)
+		flags |= UPL_COMMIT_SET_DIRTY;
 	if (upl->flags & UPL_CLEAR_DIRTY)
 	        flags |= UPL_COMMIT_CLEAR_DIRTY;
 
@@ -4622,9 +4589,12 @@ process_upl_to_commit:
 		 */
 		flags &= ~UPL_COMMIT_CS_VALIDATED;
 	}
+	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal)
+		should_be_throttled = TRUE;
 
 	dwp = &dw_array[0];
 	dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
 	while (xfer_size) {
 		vm_page_t	t, m;
@@ -4655,7 +4625,7 @@ process_upl_to_commit:
 				VM_PAGE_FREE(t);
 
 				if (m == VM_PAGE_NULL)
-					m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
+					m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
 			}
 		}
 		if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL)
@@ -4675,9 +4645,9 @@ process_upl_to_commit:
 			if (page_list)
 				page_list[entry].phys_addr = 0;
 
-			if (flags & UPL_COMMIT_SET_DIRTY)
+			if (flags & UPL_COMMIT_SET_DIRTY) {
 				m->dirty = TRUE;
-			else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
+			} else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
 				m->dirty = FALSE;
 
 				if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
@@ -4869,29 +4839,32 @@ process_upl_to_commit:
 			pmap_disconnect(m->phys_page);
 		}
 
-		if ((m->busy) && (m->cleaning)) {
+		if (m->overwriting) {
 			/*
-			 * the request_page_list case
+			 * the (COPY_OUT_FROM == FALSE) request_page_list case
 			 */
-			m->absent = FALSE;
-			m->overwriting = FALSE;
+			if (m->busy) {
+				m->absent = FALSE;
 
-			dwp->dw_mask |= DW_clear_busy;
+				dwp->dw_mask |= DW_clear_busy;
+			} else {
+				/*
+				 * alternate (COPY_OUT_FROM == FALSE) page_list case
+				 * Occurs when the original page was wired
+				 * at the time of the list request
+				 */
+				assert(VM_PAGE_WIRED(m));
 
-		} else if (m->overwriting) {
-			/*
-			 * alternate request page list, write to 
-			 * page_list case.  Occurs when the original
-			 * page was wired at the time of the list
-			 * request
-			 */
-			assert(VM_PAGE_WIRED(m));
+				dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
+			}
 			m->overwriting = FALSE;
+		}
+		if (m->encrypted_cleaning == TRUE) {
+			m->encrypted_cleaning = FALSE;
 
-			dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
+			dwp->dw_mask |= DW_clear_busy;
 		}
 		m->cleaning = FALSE;
-		m->encrypted_cleaning = FALSE;
 
 		/*
 		 * It is a part of the semantic of COPYOUT_FROM
@@ -4906,19 +4879,30 @@ process_upl_to_commit:
 		if (flags & UPL_COMMIT_SET_DIRTY)
 			m->dirty = TRUE;
 
-		if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
-			dwp->dw_mask |= DW_vm_page_deactivate_internal;
-			clear_refmod |= VM_MEM_REFERENCED;
-
-		} else if (!m->active && !m->inactive && !m->speculative) {
+		if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) {
+			/*
+			 * page coming back in from being 'frozen'...
+			 * it was dirty before it was frozen, so keep it so
+			 * the vm_page_activate will notice that it really belongs
+			 * on the throttle queue and put it there
+			 */
+			m->dirty = TRUE;
+			dwp->dw_mask |= DW_vm_page_activate;
 
-			if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
-				dwp->dw_mask |= DW_vm_page_speculate;
-			else if (m->reference)
-				dwp->dw_mask |= DW_vm_page_activate;
-			else {
+		} else {
+			if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
 				dwp->dw_mask |= DW_vm_page_deactivate_internal;
 				clear_refmod |= VM_MEM_REFERENCED;
+			} else if (!m->active && !m->inactive && !m->speculative) {
+
+				if (m->clustered || (flags & UPL_COMMIT_SPECULATE))
+					dwp->dw_mask |= DW_vm_page_speculate;
+				else if (m->reference)
+					dwp->dw_mask |= DW_vm_page_activate;
+				else {
+					dwp->dw_mask |= DW_vm_page_deactivate_internal;
+					clear_refmod |= VM_MEM_REFERENCED;
+				}
 			}
 		}
 		if (upl->flags & UPL_ACCESS_BLOCKED) {
@@ -4944,21 +4928,10 @@ commit_next_page:
 
 		if (dwp->dw_mask) {
 			if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
-				if (m->busy == FALSE) {
-					/*
-					 * dw_do_work may need to drop the object lock
-					 * if it does, we need the pages it's looking at to
-					 * be held stable via the busy bit.
-					 */
-					m->busy = TRUE;
-					dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
-				}
-				dwp->dw_m = m;
-				dwp++;
-				dw_count++;
+				VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
-				if (dw_count >= DELAYED_WORK_LIMIT) {
-					dw_do_work(shadow_object, &dw_array[0], dw_count);
+				if (dw_count >= dw_limit) {
+					vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
 			
 					dwp = &dw_array[0];
 					dw_count = 0;
@@ -4973,7 +4946,7 @@ commit_next_page:
 		}
 	}
 	if (dw_count)
-		dw_do_work(shadow_object, &dw_array[0], dw_count);
+		vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
 
 	occupied = 1;
 
@@ -5071,9 +5044,11 @@ upl_abort_range(
 	int			entry;
 	wpl_array_t 	 	lite_list;
 	int			occupied;
-	struct	dw		dw_array[DELAYED_WORK_LIMIT];
-	struct	dw		*dwp;
-	int			dw_count, isVectorUPL = 0;
+	struct	vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct	vm_page_delayed_work	*dwp;
+	int			dw_count;
+	int			dw_limit;
+	int			isVectorUPL = 0;
 	upl_t			vector_upl = NULL;
 
 	*empty = FALSE;
@@ -5166,6 +5141,7 @@ process_upl_to_abort:
 
 	dwp = &dw_array[0];
 	dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
 	if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
 		panic("upl_abort_range: kernel_object being DUMPED");
@@ -5199,7 +5175,7 @@ process_upl_to_abort:
 				VM_PAGE_FREE(t);
 
 				if (m == VM_PAGE_NULL)
-					m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
+					m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
 			}
 		}
 		if ((upl->flags & UPL_KERNEL_OBJECT))
@@ -5210,7 +5186,6 @@ process_upl_to_abort:
 			if (m->absent) {
 			        boolean_t must_free = TRUE;
 
-				m->clustered = FALSE;
 				/*
 				 * COPYOUT = FALSE case
 				 * check for error conditions which must
@@ -5232,6 +5207,18 @@ process_upl_to_abort:
 					m->unusual = TRUE;
 					must_free = FALSE;
 				}
+				if (m->clustered) {
+					/*
+					 * This page was a part of a speculative
+					 * read-ahead initiated by the kernel
+					 * itself.  No one is expecting this
+					 * page and no one will clean up its
+					 * error state if it ever becomes valid
+					 * in the future.
+					 * We have to free it here.
+					 */
+					must_free = TRUE;
+				}
 
 				/*
 				 * ENCRYPTED SWAP:
@@ -5244,6 +5231,21 @@ process_upl_to_abort:
 
 				m->cleaning = FALSE;
 				m->encrypted_cleaning = FALSE;
+
+				if (m->overwriting && !m->busy) {
+					/*
+					 * this shouldn't happen since
+					 * this is an 'absent' page, but
+					 * it doesn't hurt to check for
+					 * the 'alternate' method of 
+					 * stabilizing the page...
+					 * we will mark 'busy' to be cleared
+					 * in the following code which will
+					 * take care of the primary stabilzation
+					 * method (i.e. setting 'busy' to TRUE)
+					 */
+					dwp->dw_mask |= DW_vm_page_unwire;
+				}
 				m->overwriting = FALSE;
 
 				dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
@@ -5259,17 +5261,45 @@ process_upl_to_abort:
 			        if (m->laundry)
 					dwp->dw_mask |= DW_vm_pageout_throttle_up;
 
+				if (upl->flags & UPL_ACCESS_BLOCKED) {
+					/*
+					 * We blocked access to the pages in this UPL.
+					 * Clear the "busy" bit and wake up any waiter
+					 * for this page.
+					 */
+					dwp->dw_mask |= DW_clear_busy;
+				}
 				if (m->pageout) {
 				        assert(m->busy);
 					assert(m->wire_count == 1);
 					m->pageout = FALSE;
 
-					dwp->dw_mask |= DW_vm_page_unwire;
+					dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy);
+				}
+				if (m->overwriting) {
+					if (m->busy)
+						dwp->dw_mask |= DW_clear_busy;
+					else {
+						/*
+						 * deal with the 'alternate' method
+						 * of stabilizing the page...
+						 * we will either free the page
+						 * or mark 'busy' to be cleared
+						 * in the following code which will
+						 * take care of the primary stabilzation
+						 * method (i.e. setting 'busy' to TRUE)
+						 */
+						dwp->dw_mask |= DW_vm_page_unwire;
+					}
+					m->overwriting = FALSE;
+				}
+				if (m->encrypted_cleaning == TRUE) {
+					m->encrypted_cleaning = FALSE;
+
+					dwp->dw_mask |= DW_clear_busy;
 				}
 				m->dump_cleaning = FALSE;
 				m->cleaning = FALSE;
-				m->encrypted_cleaning = FALSE;
-				m->overwriting = FALSE;
 #if	MACH_PAGEMAP
 				vm_external_state_clr(m->object->existence_map, m->offset);
 #endif	/* MACH_PAGEMAP */
@@ -5287,7 +5317,7 @@ process_upl_to_abort:
 						 */
 						dwp->dw_mask |= DW_vm_page_lru;
 					}
-					dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
+					dwp->dw_mask |= DW_PAGE_WAKEUP;
 				}
 			}
 		}
@@ -5298,21 +5328,10 @@ abort_next_page:
 
 		if (dwp->dw_mask) {
 			if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
-				if (m->busy == FALSE) {
-					/*
-					 * dw_do_work may need to drop the object lock
-					 * if it does, we need the pages it's looking at to
-					 * be held stable via the busy bit.
-					 */
-					m->busy = TRUE;
-					dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
-				}
-				dwp->dw_m = m;
-				dwp++;
-				dw_count++;
+				VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
 
-				if (dw_count >= DELAYED_WORK_LIMIT) {
-					dw_do_work(shadow_object, &dw_array[0], dw_count);
+				if (dw_count >= dw_limit) {
+					vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
 				
 					dwp = &dw_array[0];
 					dw_count = 0;
@@ -5327,7 +5346,7 @@ abort_next_page:
 		}
 	}
 	if (dw_count)
-		dw_do_work(shadow_object, &dw_array[0], dw_count);
+		vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count);
 
 	occupied = 1;
 
@@ -5449,13 +5468,15 @@ vm_object_iopl_request(
 	unsigned int		entry;
 	wpl_array_t 		lite_list = NULL;
 	int			no_zero_fill = FALSE;
+	unsigned int		size_in_pages;
 	u_int32_t		psize;
 	kern_return_t		ret;
 	vm_prot_t		prot;
 	struct vm_object_fault_info fault_info;
-	struct	dw		dw_array[DELAYED_WORK_LIMIT];
-	struct	dw		*dwp;
+	struct	vm_page_delayed_work	dw_array[DEFAULT_DELAYED_WORK_LIMIT];
+	struct	vm_page_delayed_work	*dwp;
 	int			dw_count;
+	int			dw_limit;
 	int			dw_index;
 
 	if (cntrl_flags & ~UPL_VALID_FLAGS) {
@@ -5473,10 +5494,10 @@ vm_object_iopl_request(
 		        return KERN_INVALID_VALUE;
 
 		if (object->phys_contiguous) {
-		        if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
+		        if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
 			        return KERN_INVALID_ADDRESS;
 	      
-			if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
+			if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
 			        return KERN_INVALID_ADDRESS;
 		}
 	}
@@ -5543,6 +5564,8 @@ vm_object_iopl_request(
 	upl->map_object = object;
 	upl->size = size;
 
+	size_in_pages = size / PAGE_SIZE;
+
 	if (object == kernel_object &&
 	    !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
 		upl->flags |= UPL_KERNEL_OBJECT;
@@ -5586,10 +5609,10 @@ vm_object_iopl_request(
 		 */
 		upl->flags |= UPL_DEVICE_MEMORY;
 
-		upl->highest_page = (ppnum_t) ((offset + object->shadow_offset + size - 1)>>PAGE_SHIFT);
+		upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
 
 		if (user_page_list) {
-		        user_page_list[0].phys_addr = (ppnum_t) ((offset + object->shadow_offset)>>PAGE_SHIFT);
+		        user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
 			user_page_list[0].device = TRUE;
 		}
 		if (page_list_count != NULL) {
@@ -5609,7 +5632,7 @@ vm_object_iopl_request(
 		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
 			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
 	}
-
+ 
 #if UPL_DEBUG
 	queue_enter(&object->uplq, upl, upl_t, uplq);
 #endif /* UPL_DEBUG */
@@ -5659,10 +5682,13 @@ vm_object_iopl_request(
 	fault_info.hi_offset = offset + xfer_size;
 	fault_info.no_cache  = FALSE;
 	fault_info.stealth = FALSE;
+	fault_info.io_sync = FALSE;
+	fault_info.cs_bypass = FALSE;
 	fault_info.mark_zf_absent = TRUE;
 
 	dwp = &dw_array[0];
 	dw_count = 0;
+	dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
 
 	while (xfer_size) {
 	        vm_fault_return_t	result;
@@ -5756,17 +5782,23 @@ vm_object_iopl_request(
 				vm_object_lock(object);
 				break;
 
-			case VM_FAULT_FICTITIOUS_SHORTAGE:
-				vm_page_more_fictitious();
+			case VM_FAULT_MEMORY_SHORTAGE:
+				OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
 
-				vm_object_lock(object);
-				break;
+				VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
 
-			case VM_FAULT_MEMORY_SHORTAGE:
 				if (vm_page_wait(interruptible)) {
+					OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
+
+					VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
 					vm_object_lock(object);
+
 					break;
 				}
+				OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
+
+				VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
+
 				/* fall thru */
 
 			case VM_FAULT_INTERRUPTED:
@@ -5791,7 +5823,6 @@ vm_object_iopl_request(
 		   } while (result != VM_FAULT_SUCCESS);
 
 		}
-
 		if (upl->flags & UPL_KERNEL_OBJECT)
 			goto record_phys_addr;
 
@@ -5845,7 +5876,7 @@ vm_object_iopl_request(
 			else
 			        refmod = 0;
 
-			if ( !dst_page->absent)
+			if (!dst_page->absent)
 				vm_page_copy(dst_page, low_page);
 		  
 			low_page->reference = dst_page->reference;
@@ -5874,7 +5905,8 @@ vm_object_iopl_request(
 		if (cntrl_flags & UPL_BLOCK_ACCESS) {
 			/*
 			 * Mark the page "busy" to block any future page fault
-			 * on this page.  We'll also remove the mapping
+			 * on this page in addition to wiring it.
+			 * We'll also remove the mapping
 			 * of all these pages before leaving this routine.
 			 */
 			assert(!dst_page->fictitious);
@@ -5926,21 +5958,10 @@ record_phys_addr:
 		xfer_size -= PAGE_SIZE;
 
 		if (dwp->dw_mask) {
-			if (dst_page->busy == FALSE) {
-				/*
-				 * dw_do_work may need to drop the object lock
-				 * if it does, we need the pages it's looking at to
-				 * be held stable via the busy bit.
-				 */
-				dst_page->busy = TRUE;
-				dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
-			}
-			dwp->dw_m = dst_page;
-			dwp++;
-			dw_count++;
+			VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
 
-			if (dw_count >= DELAYED_WORK_LIMIT) {
-				dw_do_work(object, &dw_array[0], dw_count);
+			if (dw_count >= dw_limit) {
+				vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 				
 				dwp = &dw_array[0];
 				dw_count = 0;
@@ -5948,7 +5969,7 @@ record_phys_addr:
 		}
 	}
 	if (dw_count)
-		dw_do_work(object, &dw_array[0], dw_count);
+		vm_page_do_delayed_work(object, &dw_array[0], dw_count);
 
 	if (page_list_count != NULL) {
 	        if (upl->flags & UPL_INTERNAL)
@@ -6019,7 +6040,7 @@ return_err:
 				vm_page_unwire(dst_page, TRUE);
 
 			PAGE_WAKEUP_DONE(dst_page);
-		}	
+		}
 		vm_page_unlock_queues();
 
 		if (need_unwire == TRUE)
@@ -6154,7 +6175,7 @@ done:
  * can call the encryption/decryption routines with a kernel
  * virtual address.  We keep this pool of pre-allocated kernel
  * virtual addresses so that we don't have to scan the kernel's
- * virtaul address space each time we need to encrypt or decrypt
+ * virtual address space each time we need to encrypt or decrypt
  * a physical page.
  * It would be nice to be able to encrypt and decrypt in physical
  * mode but that might not always be more efficient...
@@ -6197,6 +6218,9 @@ vm_paging_map_init(void)
 	}
 	map_entry->object.vm_object = kernel_object;
 	map_entry->offset = page_map_offset;
+	map_entry->protection = VM_PROT_NONE;
+	map_entry->max_protection = VM_PROT_NONE;
+	map_entry->permanent = TRUE;
 	vm_object_reference(kernel_object);
 	vm_map_unlock(kernel_map);
 
@@ -6295,9 +6319,6 @@ vm_paging_map_object(
 			vm_paging_page_inuse[i] = TRUE;
 			simple_unlock(&vm_paging_lock);
 
-			if (page->pmapped == FALSE) {
-				pmap_sync_page_data_phys(page->phys_page);
-			}
 			page->pmapped = TRUE;
 
 			/*
@@ -6310,8 +6331,7 @@ vm_paging_map_object(
 				   page_map_offset,
 				   page,
 				   protection,
-				   ((int) page->object->wimg_bits &
-				    VM_WIMG_MASK),
+				   0,
 				   TRUE);
 			vm_paging_objects_mapped++;
 			vm_paging_pages_mapped++; 
@@ -6380,7 +6400,6 @@ vm_paging_map_object(
 	for (page_map_offset = 0;
 	     map_size != 0;
 	     map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
-		unsigned int	cache_attr;
 
 		page = vm_page_lookup(object, offset + page_map_offset);
 		if (page == VM_PAGE_NULL) {
@@ -6394,18 +6413,14 @@ vm_paging_map_object(
 			vm_object_lock(object);
 			return KERN_MEMORY_ERROR;
 		}
-		if (page->pmapped == FALSE) {
-			pmap_sync_page_data_phys(page->phys_page);
-		}
 		page->pmapped = TRUE;
-		cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
 
 		//assert(pmap_verify_free(page->phys_page));
 		PMAP_ENTER(kernel_pmap,
 			   *address + page_map_offset,
 			   page,
 			   protection,
-			   cache_attr,
+			   0,
 			   TRUE);
 	}
 			   
@@ -6742,10 +6757,13 @@ vm_page_decrypt(
 			vm_object_offset_t	paging_offset;
 		} vm;
 	} decrypt_iv;
+	boolean_t		was_dirty;
 
 	assert(page->busy);
 	assert(page->encrypted);
 
+	was_dirty = page->dirty;
+
 	/*
 	 * Take a paging-in-progress reference to keep the object
 	 * alive even if we have to unlock it (in vm_paging_map_object()
@@ -6817,16 +6835,24 @@ vm_page_decrypt(
 				       kernel_vaddr + PAGE_SIZE);
 	}
 
-	/*
-	 * After decryption, the page is actually clean.
-	 * It was encrypted as part of paging, which "cleans"
-	 * the "dirty" pages.
-	 * Noone could access it after it was encrypted
-	 * and the decryption doesn't count.
-	 */
-	page->dirty = FALSE;
-	assert (page->cs_validated == FALSE);
-	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+	if (was_dirty) {
+		/*
+		 * The pager did not specify that the page would be
+		 * clean when it got paged in, so let's not clean it here
+		 * either.
+		 */
+	} else {
+		/*
+		 * After decryption, the page is actually still clean.
+		 * It was encrypted as part of paging, which "cleans"
+		 * the "dirty" pages.
+		 * Noone could access it after it was encrypted
+		 * and the decryption doesn't count.
+		 */
+		page->dirty = FALSE;
+		assert (page->cs_validated == FALSE);
+		pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+	}
 	page->encrypted = FALSE;
 
 	/*
@@ -6943,7 +6969,7 @@ process_upl_to_encrypt:
 				      base_offset + offset_in_upl);
 		if (page == VM_PAGE_NULL) {
 			panic("upl_encrypt: "
-			      "no page for (obj=%p,off=%lld+%d)!\n",
+			      "no page for (obj=%p,off=0x%llx+0x%x)!\n",
 			      shadow_object,
 			      base_offset,
 			      offset_in_upl);
@@ -7373,6 +7399,126 @@ upl_clear_dirty(
 	}
 }
 
+void
+upl_set_referenced(
+	upl_t		upl,
+	boolean_t 	value)
+{
+	upl_lock(upl);
+	if (value) {
+		upl->ext_ref_count++;
+	} else {
+		if (!upl->ext_ref_count) {
+			panic("upl_set_referenced not %p\n", upl);
+		}
+		upl->ext_ref_count--;
+	}
+	upl_unlock(upl);
+}
+
+boolean_t
+vm_page_is_slideable(vm_page_t m)
+{
+	boolean_t result = FALSE;
+	vm_object_t slide_object = slide_info.slide_object;
+	mach_vm_offset_t start = slide_info.start;
+	mach_vm_offset_t end = slide_info.end;
+
+	/* make sure our page belongs to the one object allowed to do this */
+	if (slide_object == VM_OBJECT_NULL) {
+		return result;
+	}
+
+	/*Should we traverse down the chain?*/
+	if (m->object != slide_object) {
+		return result;
+	}
+
+	if(!m->slid && (start <= m->offset && end > m->offset)) {
+		result = TRUE;
+	}
+	return result;
+}
+
+int vm_page_slide_counter = 0;
+int vm_page_slide_errors = 0;
+kern_return_t
+vm_page_slide(
+	vm_page_t	page,
+	vm_map_offset_t	kernel_mapping_offset)
+{
+	kern_return_t		kr;
+	vm_map_size_t		kernel_mapping_size;
+	vm_offset_t		kernel_vaddr;
+	uint32_t		pageIndex = 0;
+
+	assert(!page->slid);
+	
+	/*
+	 * Take a paging-in-progress reference to keep the object
+	 * alive even if we have to unlock it (in vm_paging_map_object()
+	 * for example)...
+	 */
+	vm_object_paging_begin(page->object);
+
+	if (kernel_mapping_offset == 0) {
+		/*
+		 * The page hasn't already been mapped in kernel space
+		 * by the caller.  Map it now, so that we can access
+		 * its contents and decrypt them.
+		 */
+		kernel_mapping_size = PAGE_SIZE;
+		kr = vm_paging_map_object(&kernel_mapping_offset,
+					  page,
+					  page->object,
+					  page->offset,
+					  &kernel_mapping_size,
+					  VM_PROT_READ | VM_PROT_WRITE,
+					  FALSE);
+		if (kr != KERN_SUCCESS) {
+			panic("vm_page_slide: "
+			      "could not map page in kernel: 0x%x\n",
+			      kr);
+		}
+	} else {
+		kernel_mapping_size = 0;
+	}
+	kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset);
+
+	/*
+	 * Slide the pointers on the page.
+	 */
+
+	/*assert that slide_file_info.start/end are page-aligned?*/
+
+	pageIndex = (uint32_t)((page->offset - slide_info.start)/PAGE_SIZE);
+	kr = vm_shared_region_slide(kernel_vaddr, pageIndex);
+	vm_page_slide_counter++;
+
+	/*
+	 * Unmap the page from the kernel's address space,
+	 */
+	if (kernel_mapping_size != 0) {
+		vm_paging_unmap_object(page->object,
+				       kernel_vaddr,
+				       kernel_vaddr + PAGE_SIZE);
+	}
+	
+	page->dirty = FALSE;
+	pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+
+	if (kr == KERN_SUCCESS) {
+		page->slid = TRUE;
+	} else {
+		page->error = TRUE;
+		vm_page_slide_errors++;
+	}
+
+	vm_object_paging_end(page->object);
+
+	return kr;
+}
+
 
 #ifdef MACH_BSD
 
@@ -7568,9 +7714,9 @@ db_pageout(void)
 	iprintf("nolock %5d  avoid  %5d  busy   %5d  absent %5d\n",
 		vm_pageout_inactive_nolock, vm_pageout_inactive_avoid,
 		vm_pageout_inactive_busy, vm_pageout_inactive_absent);
-	iprintf("used   %5d  clean  %5d  dirty  %5d\n",
+	iprintf("used   %5d  clean  %5d  dirty(internal)  %5d  dirty(external)  %5d\n",
 		vm_pageout_inactive_used, vm_pageout_inactive_clean,
-		vm_pageout_inactive_dirty);
+		vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external);
 #if	MACH_COUNTERS
 	iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed);
 #endif	/* MACH_COUNTERS */
diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h
index 31a8b61df..d8ddac6a7 100644
--- a/osfmk/vm/vm_pageout.h
+++ b/osfmk/vm/vm_pageout.h
@@ -85,6 +85,40 @@
 #include <vm/vm_page.h>
 #endif
 
+#include <sys/kdebug.h>
+
+#if CONFIG_FREEZE
+extern boolean_t vm_freeze_enabled;
+#define VM_DYNAMIC_PAGING_ENABLED(port) ((vm_freeze_enabled == FALSE) && IP_VALID(port))
+#else
+#define VM_DYNAMIC_PAGING_ENABLED(port) IP_VALID(port)
+#endif
+
+
+extern int	vm_debug_events;
+
+#define VMF_CHECK_ZFDELAY	0x100
+#define VMF_COWDELAY		0x101
+#define VMF_ZFDELAY		0x102
+
+#define VM_PAGEOUT_SCAN		0x104
+#define VM_PAGEOUT_BALANCE	0x105
+#define VM_PAGEOUT_FREELIST	0x106
+#define VM_PAGEOUT_PURGEONE	0x107
+#define VM_PAGEOUT_CACHE_EVICT	0x108
+#define VM_PAGEOUT_THREAD_BLOCK	0x109
+
+#define VM_UPL_PAGE_WAIT	0x120
+#define VM_IOPL_PAGE_WAIT	0x121
+
+#define VM_DEBUG_EVENT(name, event, control, arg1, arg2, arg3, arg4)	\
+	MACRO_BEGIN						\
+	if (vm_debug_events) {					\
+		KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, event)) | control, arg1, arg2, arg3, arg4, 0); \
+	}							\
+	MACRO_END
+
+
 
 extern kern_return_t vm_map_create_upl(
 	vm_map_t		map,
@@ -106,7 +140,6 @@ extern upl_size_t upl_get_size(
 typedef struct vm_page	*vm_page_t;
 #endif
 
-
 extern void                vm_page_free_list(
                             vm_page_t	mem,
                             boolean_t	prepare_object);
@@ -129,22 +162,6 @@ extern unsigned int	vm_pageout_scan_event_counter;
 extern unsigned int	vm_zf_queue_count;
 
 
-#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */
-
-extern unsigned int	vm_zf_count;
-
-#define VM_ZF_COUNT_INCR()				\
-	MACRO_BEGIN					\
-	OSAddAtomic(1, (SInt32 *) &vm_zf_count);	\
-	MACRO_END					\
-
-#define VM_ZF_COUNT_DECR()				\
-	MACRO_BEGIN					\
-	OSAddAtomic(-1, (SInt32 *) &vm_zf_count);	\
-	MACRO_END					\
-
-#else /* !(defined(__ppc__)) */
-
 extern uint64_t	vm_zf_count;
 
 #define VM_ZF_COUNT_INCR()				\
@@ -157,8 +174,6 @@ extern uint64_t	vm_zf_count;
 	OSAddAtomic64(-1, (SInt64 *) &vm_zf_count);	\
 	MACRO_END					\
 
-#endif /* !(defined(__ppc__)) */
-
 /*
  * must hold the page queues lock to
  * manipulate this structure
@@ -181,6 +196,7 @@ struct vm_pageout_queue {
 extern struct	vm_pageout_queue	vm_pageout_queue_internal;
 extern struct	vm_pageout_queue	vm_pageout_queue_external;
 
+
 /*
  *	Routines exported to Mach.
  */
@@ -252,6 +268,7 @@ struct ucd {
 struct upl {
 	decl_lck_mtx_data(,	Lock)	/* Synchronization */
 	int		ref_count;
+	int		ext_ref_count;
 	int		flags;
 	vm_object_t	src_object; /* object derived from */
 	vm_object_offset_t offset;
@@ -290,7 +307,8 @@ struct upl {
 #define UPL_SHADOWED		0x1000
 #define UPL_KERNEL_OBJECT	0x2000
 #define UPL_VECTOR		0x4000
-#define UPL_HAS_BUSY            0x10000
+#define UPL_SET_DIRTY		0x8000
+#define UPL_HAS_BUSY		0x10000
 
 /* flags for upl_create flags parameter */
 #define UPL_CREATE_EXTERNAL	0
@@ -385,6 +403,9 @@ extern void vm_pageout_queue_steal(
 	vm_page_t page, 
 	boolean_t queues_locked);
 	
+extern boolean_t vm_page_is_slideable(vm_page_t m);
+
+extern kern_return_t vm_page_slide(vm_page_t page, vm_map_offset_t kernel_mapping_offset);
 #endif  /* MACH_KERNEL_PRIVATE */
 
 #if UPL_DEBUG
diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h
index a4562ce8a..53cf9e12a 100644
--- a/osfmk/vm/vm_protos.h
+++ b/osfmk/vm/vm_protos.h
@@ -135,15 +135,6 @@ extern kern_return_t vm_region_object_create
 extern mach_vm_offset_t mach_get_vm_start(vm_map_t);
 extern mach_vm_offset_t mach_get_vm_end(vm_map_t);
 
-/*
- * Legacy routines to get the start and end for a vm_map_t.  They
- * return them in the vm_offset_t format.  So, they should only be
- * called on maps that are the same size as the kernel map for
- * accurate results.
- */
-extern vm_offset_t get_vm_start(vm_map_t);
-extern vm_offset_t get_vm_end(vm_map_t);
-
 #if CONFIG_CODE_DECRYPTION
 struct pager_crypt_info;
 extern kern_return_t vm_map_apple_protected(
@@ -179,12 +170,17 @@ extern pager_return_t	vnode_pageout(
 	struct vnode *, upl_t,
 	upl_offset_t, vm_object_offset_t,
 	upl_size_t, int, int *);
+extern uint32_t vnode_trim (struct vnode *, int64_t offset, unsigned long len);
 extern memory_object_t vnode_pager_setup(
 	struct vnode *, memory_object_t);
 extern vm_object_offset_t vnode_pager_get_filesize(
 	struct vnode *);
 extern uint32_t vnode_pager_isinuse(
 	struct vnode *);
+extern boolean_t vnode_pager_isSSD(
+	struct vnode *);
+extern void vnode_pager_throttle(
+	void);
 extern uint32_t vnode_pager_return_hard_throttle_limit(
 	struct vnode *,
 	uint32_t     *,
@@ -199,7 +195,14 @@ extern kern_return_t vnode_pager_get_filename(
 extern kern_return_t vnode_pager_get_cs_blobs(
 	struct vnode	*vp,
 	void		**blobs);
-	
+
+#if CHECK_CS_VALIDATION_BITMAP	
+/* used by the vnode_pager_cs_validation_bitmap routine*/
+#define CS_BITMAP_SET	1
+#define CS_BITMAP_CLEAR	2
+#define CS_BITMAP_CHECK	3
+
+#endif /* CHECK_CS_VALIDATION_BITMAP */
 
 extern void vnode_pager_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 extern kern_return_t
@@ -218,6 +221,9 @@ extern kern_return_t vnode_pager_get_object_size(
 extern kern_return_t vnode_pager_get_isinuse(
 	memory_object_t,
 	uint32_t *);
+extern kern_return_t vnode_pager_get_isSSD(
+	memory_object_t,
+	boolean_t *);
 extern kern_return_t vnode_pager_check_hard_throttle(
 	memory_object_t,
 	uint32_t *,
@@ -232,6 +238,19 @@ extern kern_return_t vnode_pager_get_object_filename(
 extern kern_return_t vnode_pager_get_object_cs_blobs(
 	memory_object_t	mem_obj,
 	void		**blobs);
+
+#if CHECK_CS_VALIDATION_BITMAP	
+extern kern_return_t vnode_pager_cs_check_validation_bitmap( 
+	memory_object_t	mem_obj, 
+	memory_object_offset_t	offset,
+	int		optype);
+#endif /*CHECK_CS_VALIDATION_BITMAP*/
+
+extern	kern_return_t ubc_cs_check_validation_bitmap (
+	struct vnode *vp, 
+	memory_object_offset_t offset,
+	int optype);
+
 extern kern_return_t vnode_pager_data_request( 
 	memory_object_t, 
 	memory_object_offset_t,
@@ -322,6 +341,10 @@ extern kern_return_t default_pager_memory_object_create(
 	memory_object_t *);
 #endif /* _memory_object_default_server_ */
 
+#if CONFIG_FREEZE
+extern unsigned int default_pager_swap_pages_free(void);
+#endif
+
 extern void   device_pager_reference(memory_object_t);
 extern void   device_pager_deallocate(memory_object_t);
 extern kern_return_t   device_pager_init(memory_object_t,
@@ -422,6 +445,11 @@ extern int macx_backing_store_compaction(int flags);
 extern unsigned int mach_vm_ctl_page_free_wanted(void);
 
 extern void no_paging_space_action(void);
+
+#define VM_TOGGLE_CLEAR		0
+#define VM_TOGGLE_SET		1
+#define VM_TOGGLE_GETVALUE	999
+int vm_toggle_entry_reuse(int, int*);
 #endif	/* _VM_VM_PROTOS_H_ */
 
 #endif	/* XNU_KERNEL_PRIVATE */
diff --git a/osfmk/vm/vm_purgeable_internal.h b/osfmk/vm/vm_purgeable_internal.h
index 5e6d4e4af..4f720eb39 100644
--- a/osfmk/vm/vm_purgeable_internal.h
+++ b/osfmk/vm/vm_purgeable_internal.h
@@ -46,15 +46,9 @@ enum purgeable_q_type {
 	PURGEABLE_Q_TYPE_MAX
 };
 
-#if (CONFIG_TOKEN_QUEUE_SMALL == 1)
-typedef uint16_t token_idx_t;
-typedef uint16_t token_cnt_t;
-#define TOKEN_COUNT_MAX UINT16_MAX
-#else
 typedef uint32_t token_idx_t;
 typedef uint32_t token_cnt_t;
 #define TOKEN_COUNT_MAX UINT32_MAX
-#endif
 
 #define NUM_VOLATILE_GROUPS 8
 struct purgeable_q {
diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c
index 979c81624..397914b0c 100644
--- a/osfmk/vm/vm_resident.c
+++ b/osfmk/vm/vm_resident.c
@@ -85,8 +85,7 @@
 #include <kern/misc_protos.h>
 #include <zone_debug.h>
 #include <vm/cpm.h>
-#include <ppc/mappings.h>		/* (BRINGUP) */
-#include <pexpert/pexpert.h>	/* (BRINGUP) */
+#include <pexpert/pexpert.h>
 
 #include <vm/vm_protos.h>
 #include <vm/memory_object.h>
@@ -95,17 +94,15 @@
 #include <IOKit/IOHibernatePrivate.h>
 
 
-#if CONFIG_EMBEDDED
 #include <sys/kern_memorystatus.h>
-#endif
 
 #include <sys/kdebug.h>
 
 boolean_t	vm_page_free_verify = TRUE;
 
-uint_t		vm_lopage_free_count = 0;
-uint_t		vm_lopage_free_limit = 0;
-uint_t		vm_lopage_lowater    = 0;
+uint32_t	vm_lopage_free_count = 0;
+uint32_t	vm_lopage_free_limit = 0;
+uint32_t	vm_lopage_lowater    = 0;
 boolean_t	vm_lopage_refill = FALSE;
 boolean_t	vm_lopage_needed = FALSE;
 
@@ -120,7 +117,9 @@ struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AG
 
 __private_extern__ void		vm_page_init_lck_grp(void);
 
-static void			vm_page_free_prepare(vm_page_t	page);
+static void		vm_page_free_prepare(vm_page_t	page);
+static vm_page_t	vm_page_grab_fictitious_common(ppnum_t phys_addr);
+
 
 
 
@@ -241,7 +240,6 @@ unsigned int	vm_colors;
 unsigned int    vm_color_mask;			/* mask is == (vm_colors-1) */
 unsigned int	vm_cache_geometry_colors = 0;	/* set by hw dependent code during startup */
 queue_head_t	vm_page_queue_free[MAX_COLORS];
-vm_page_t       vm_page_queue_fictitious;
 unsigned int	vm_page_free_wanted;
 unsigned int	vm_page_free_wanted_privileged;
 unsigned int	vm_page_free_count;
@@ -458,11 +456,6 @@ vm_page_init_local_q()
 }
 
 
-uint64_t initial_max_mem;
-int initial_wire_count;
-int initial_free_count;
-int initial_lopage_count;
-
 /*
  *	vm_page_bootstrap:
  *
@@ -542,6 +535,7 @@ vm_page_bootstrap(
 	m->no_cache = FALSE;
 	m->zero_fill = FALSE;
 	m->reusable = FALSE;
+	m->slid = FALSE;
 	m->__unused_object_bits = 0;
 
 
@@ -572,8 +566,8 @@ vm_page_bootstrap(
     
 	for (i = 0; i < MAX_COLORS; i++ )
 		queue_init(&vm_page_queue_free[i]);
+
 	queue_init(&vm_lopage_queue_free);
-	vm_page_queue_fictitious = VM_PAGE_NULL;
 	queue_init(&vm_page_queue_active);
 	queue_init(&vm_page_queue_inactive);
 	queue_init(&vm_page_queue_throttled);
@@ -689,11 +683,6 @@ vm_page_bootstrap(
 	vm_page_wire_count_initial = vm_page_wire_count;
 	vm_page_free_count_minimum = vm_page_free_count;
 
-	initial_max_mem = max_mem;
-	initial_wire_count = vm_page_wire_count;
-	initial_free_count = vm_page_free_count;
-	initial_lopage_count = vm_lopage_free_count;
-
 	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
 	       vm_page_free_count, vm_page_wire_count);
 
@@ -743,7 +732,7 @@ pmap_steal_memory(
 	addr = virtual_space_start;
 	virtual_space_start += size;
 
-	kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);	/* (TEST/DEBUG) */
+	//kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);	/* (TEST/DEBUG) */
 
 	/*
 	 *	Allocate and map physical pages to back new virtual pages.
@@ -910,6 +899,7 @@ vm_page_module_init(void)
 	zone_debug_disable(vm_page_zone);
 #endif	/* ZONE_DEBUG */
 
+	zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
 	zone_change(vm_page_zone, Z_EXPAND, FALSE);
 	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
 	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
@@ -919,6 +909,7 @@ vm_page_module_init(void)
          * in vm_page_create(). [Q: is this really what we want?]
          */
         vm_page_zone->count += vm_page_pages;
+        vm_page_zone->sum_count += vm_page_pages;
         vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
 
 	lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
@@ -944,12 +935,13 @@ vm_page_create(
 	for (phys_page = start;
 	     phys_page < end;
 	     phys_page++) {
-		while ((m = (vm_page_t) vm_page_grab_fictitious())
+		while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
 			== VM_PAGE_NULL)
 			vm_page_more_fictitious();
 
-		vm_page_init(m, phys_page, FALSE);
+		m->fictitious = FALSE;
 		pmap_clear_noencrypt(phys_page);
+
 		vm_page_pages++;
 		vm_page_release(m);
 	}
@@ -1021,7 +1013,7 @@ vm_page_insert_internal(
 			      "already in (obj=%p,off=0x%llx)",
 			      mem, object, offset, mem->object, mem->offset);
 #endif
-		assert(!object->internal || offset < object->size);
+		assert(!object->internal || offset < object->vo_size);
 
 		/* only insert "pageout" pages into "pageout" objects,
 		 * and normal pages into normal objects */
@@ -1054,6 +1046,16 @@ vm_page_insert_internal(
 
 		lck_spin_unlock(bucket_lock);
 	}
+
+	{	unsigned int    cache_attr;
+
+		cache_attr = object->wimg_bits & VM_WIMG_MASK;
+
+		if (cache_attr != VM_WIMG_USE_DEFAULT) {
+			pmap_set_cache_attributes(mem->phys_page, cache_attr);
+			object->set_cache_attr = TRUE;
+		}
+	}
 	/*
 	 *	Now link into the object's list of backed pages.
 	 */
@@ -1253,6 +1255,12 @@ vm_page_remove(
 
 	assert(mem->object->resident_page_count > 0);
 	mem->object->resident_page_count--;
+
+	if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
+		if (mem->object->resident_page_count == 0)
+			vm_object_cache_remove(mem->object);
+	}
+
 	if (VM_PAGE_WIRED(mem)) {
 		assert(mem->object->wired_page_count > 0);
 		mem->object->wired_page_count--;
@@ -1281,6 +1289,9 @@ vm_page_remove(
 			OSAddAtomic(-1, &vm_page_purgeable_count);
 		}
 	}
+	if (mem->object->set_cache_attr == TRUE)
+		pmap_set_cache_attributes(mem->phys_page, 0);
+
 	mem->tabled = FALSE;
 	mem->object = VM_OBJECT_NULL;
 	mem->offset = (vm_object_offset_t) -1;
@@ -1462,9 +1473,27 @@ vm_page_init(
 	boolean_t	lopage)
 {
 	assert(phys_page);
-
 	*mem = vm_page_template;
 	mem->phys_page = phys_page;
+#if 0
+	/*
+	 * we're leaving this turned off for now... currently pages
+	 * come off the free list and are either immediately dirtied/referenced
+	 * due to zero-fill or COW faults, or are used to read or write files...
+	 * in the file I/O case, the UPL mechanism takes care of clearing
+	 * the state of the HW ref/mod bits in a somewhat fragile way.
+	 * Since we may change the way this works in the future (to toughen it up),
+	 * I'm leaving this as a reminder of where these bits could get cleared
+	 */
+
+	/*
+	 * make sure both the h/w referenced and modified bits are
+	 * clear at this point... we are especially dependent on 
+	 * not finding a 'stale' h/w modified in a number of spots
+	 * once this page goes back into use
+	 */
+	pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
+#endif
 	mem->lopage = lopage;
 }
 
@@ -1475,24 +1504,25 @@ vm_page_init(
  *	Returns VM_PAGE_NULL if there are no free pages.
  */
 int	c_vm_page_grab_fictitious = 0;
+int	c_vm_page_grab_fictitious_failed = 0;
 int	c_vm_page_release_fictitious = 0;
 int	c_vm_page_more_fictitious = 0;
 
-extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
-
 vm_page_t
 vm_page_grab_fictitious_common(
 	ppnum_t phys_addr)
 {
-	register vm_page_t m;
+	vm_page_t	m;
+
+	if ((m = (vm_page_t)zget(vm_page_zone))) {
 
-	m = (vm_page_t)zget(vm_page_zone);
-	if (m) {
 		vm_page_init(m, phys_addr, FALSE);
 		m->fictitious = TRUE;
-	}
 
-	c_vm_page_grab_fictitious++;
+		c_vm_page_grab_fictitious++;
+	} else
+		c_vm_page_grab_fictitious_failed++;
+
 	return m;
 }
 
@@ -1508,35 +1538,30 @@ vm_page_grab_guard(void)
 	return vm_page_grab_fictitious_common(vm_page_guard_addr);
 }
 
+
 /*
  *	vm_page_release_fictitious:
  *
- *	Release a fictitious page to the free list.
+ *	Release a fictitious page to the zone pool
  */
-
 void
 vm_page_release_fictitious(
-	register vm_page_t m)
+	vm_page_t m)
 {
 	assert(!m->free);
-	assert(m->busy);
 	assert(m->fictitious);
 	assert(m->phys_page == vm_page_fictitious_addr ||
 	       m->phys_page == vm_page_guard_addr);
 
 	c_vm_page_release_fictitious++;
-#if DEBUG
-	if (m->free)
-		panic("vm_page_release_fictitious");
-#endif
-	m->free = TRUE;
+
 	zfree(vm_page_zone, m);
 }
 
 /*
  *	vm_page_more_fictitious:
  *
- *	Add more fictitious pages to the free list.
+ *	Add more fictitious pages to the zone.
  *	Allowed to block. This routine is way intimate
  *	with the zones code, for several reasons:
  *	1. we need to carve some page structures out of physical
@@ -1550,23 +1575,13 @@ vm_page_release_fictitious(
  *	   permanent allocation of a resource.
  *	3. To smooth allocation humps, we allocate single pages
  *	   with kernel_memory_allocate(), and cram them into the
- *	   zone. This also allows us to initialize the vm_page_t's
- *	   on the way into the zone, so that zget() always returns
- *	   an initialized structure. The zone free element pointer
- *	   and the free page pointer are both the first item in the
- *	   vm_page_t.
- *	4. By having the pages in the zone pre-initialized, we need
- *	   not keep 2 levels of lists. The garbage collector simply
- *	   scans our list, and reduces physical memory usage as it
- *	   sees fit.
+ *	   zone.
  */
 
 void vm_page_more_fictitious(void)
 {
-	register vm_page_t m;
-	vm_offset_t addr;
-	kern_return_t retval;
-	int i;
+	vm_offset_t	addr;
+	kern_return_t	retval;
 
 	c_vm_page_more_fictitious++;
 
@@ -1605,7 +1620,7 @@ void vm_page_more_fictitious(void)
 					KMA_KOBJECT|KMA_NOPAGEWAIT);
 	if (retval != KERN_SUCCESS) { 
 		/*
-		 * No page was available. Tell the pageout daemon, drop the
+		 * No page was available. Drop the
 		 * lock to give another thread a chance at it, and
 		 * wait for the pageout daemon to make progress.
 		 */
@@ -1613,18 +1628,8 @@ void vm_page_more_fictitious(void)
 		vm_page_wait(THREAD_UNINT);
 		return;
 	}
-	/*
-	 * Initialize as many vm_page_t's as will fit on this page. This
-	 * depends on the zone code disturbing ONLY the first item of
-	 * each zone element.
-	 */
-	m = (vm_page_t)addr;
-	for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
-		vm_page_init(m, vm_page_fictitious_addr, FALSE);
-		m->fictitious = TRUE;
-		m++;
-	}
 	zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
+
 	lck_mtx_unlock(&vm_page_alloc_lock);
 }
 
@@ -1719,6 +1724,7 @@ vm_page_grablo(void)
 	return (mem);
 }
 
+
 /*
  *	vm_page_grab:
  *
@@ -1769,6 +1775,10 @@ return_page_from_cpu_list:
 		assert(!mem->encrypted);
 		assert(!mem->pmapped);
 		assert(!mem->wpmapped);
+		assert(!mem->active);
+		assert(!mem->inactive);
+		assert(!mem->throttled);
+		assert(!mem->speculative);
 
 		return mem;
 	}
@@ -1858,6 +1868,11 @@ return_page_from_cpu_list:
 			mem->pageq.next = NULL;
 			mem->pageq.prev = NULL;
 
+			assert(!mem->active);
+			assert(!mem->inactive);
+			assert(!mem->throttled);
+			assert(!mem->speculative);			
+
 			color = (color + 1) & vm_color_mask;
 
 			if (head == NULL)
@@ -1910,25 +1925,8 @@ return_page_from_cpu_list:
 	     ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
 	        thread_wakeup((event_t) &vm_page_free_wanted);
 
-#if CONFIG_EMBEDDED
-	{
-	int 	percent_avail;
-
-	/*
-	 * Decide if we need to poke the memorystatus notification thread.
-	 */
-	percent_avail = 
-		(vm_page_active_count + vm_page_inactive_count + 
-		 vm_page_speculative_count + vm_page_free_count +
-		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
-		atop_64(max_mem);
-	if (percent_avail <= (kern_memorystatus_level - 5)) {
-		kern_memorystatus_level = percent_avail;
-		thread_wakeup((event_t)&kern_memorystatus_wakeup);
-	}
-	}
-#endif
-
+	VM_CHECK_MEMORYSTATUS;
+	
 //	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);	/* (TEST/DEBUG) */
 
 	return mem;
@@ -1947,16 +1945,8 @@ vm_page_release(
 	unsigned int	color;
 	int	need_wakeup = 0;
 	int	need_priv_wakeup = 0;
-#if 0
-	unsigned int pindex;
-	phys_entry *physent;
 
-	physent = mapping_phys_lookup(mem->phys_page, &pindex);		/* (BRINGUP) */
-	if(physent->ppLink & ppN) {											/* (BRINGUP) */
-		panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
-	}
-	physent->ppLink = physent->ppLink | ppN;							/* (BRINGUP) */
-#endif
+
 	assert(!mem->private && !mem->fictitious);
 	if (vm_page_free_verify) {
 		assert(pmap_verify_free(mem->phys_page));
@@ -1969,6 +1959,7 @@ vm_page_release(
 	if (mem->free)
 		panic("vm_page_release");
 #endif
+
 	assert(mem->busy);
 	assert(!mem->laundry);
 	assert(mem->object == VM_OBJECT_NULL);
@@ -1977,7 +1968,7 @@ vm_page_release(
 	assert(mem->listq.next == NULL &&
 	       mem->listq.prev == NULL);
 	
-	if ((mem->lopage || vm_lopage_refill == TRUE) &&
+	if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
 	    vm_lopage_free_count < vm_lopage_free_limit &&
 	    mem->phys_page < max_valid_low_ppnum) {
 	        /*
@@ -1996,7 +1987,7 @@ vm_page_release(
 
 		mem->lopage = TRUE;
 	} else {	  
-	        mem->lopage = FALSE;
+		mem->lopage = FALSE;
 		mem->free = TRUE;
 
 	        color = mem->phys_page & vm_color_mask;
@@ -2042,25 +2033,7 @@ vm_page_release(
 	else if (need_wakeup)
 		thread_wakeup_one((event_t) &vm_page_free_count);
 
-#if CONFIG_EMBEDDED
-	{
-	int	percent_avail;
-
-	/*
-	 * Decide if we need to poke the memorystatus notification thread.
-	 * Locking is not a big issue, as only a single thread delivers these.
-	 */
-	percent_avail = 
-		(vm_page_active_count + vm_page_inactive_count + 
-		 vm_page_speculative_count + vm_page_free_count +
-		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
-		atop_64(max_mem);
-	if (percent_avail >= (kern_memorystatus_level + 5)) {
-		kern_memorystatus_level = percent_avail;
-		thread_wakeup((event_t)&kern_memorystatus_wakeup);
-	}
-	}
-#endif
+	VM_CHECK_MEMORYSTATUS;
 }
 
 /*
@@ -2195,16 +2168,16 @@ vm_page_alloc_guard(
 counter(unsigned int c_laundry_pages_freed = 0;)
 
 /*
- *	vm_page_free:
+ *	vm_page_free_prepare:
  *
- *	Returns the given page to the free list,
- *	disassociating it with any VM object.
+ *	Removes page from any queue it may be on
+ *	and disassociates it from its VM object.
  *
  *	Object and page queues must be locked prior to entry.
  */
 static void
 vm_page_free_prepare(
-	register vm_page_t	mem)
+	vm_page_t	mem)
 {
 	vm_page_free_prepare_queues(mem);
 	vm_page_free_prepare_object(mem, TRUE);
@@ -2247,6 +2220,12 @@ vm_page_free_prepare_queues(
 			mem->object->wired_page_count--;
 			assert(mem->object->resident_page_count >=
 			       mem->object->wired_page_count);
+
+			if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
+				OSAddAtomic(+1, &vm_page_purgeable_count);
+				assert(vm_page_purgeable_wired_count > 0);
+				OSAddAtomic(-1, &vm_page_purgeable_wired_count);
+			}
 		}
 		if (!mem->private && !mem->fictitious)
 			vm_page_wire_count--;
@@ -2265,10 +2244,6 @@ vm_page_free_prepare_object(
 	vm_page_t	mem,
 	boolean_t	remove_from_hash)
 {
-	if (mem->object) {
-	        vm_object_lock_assert_exclusive(mem->object);
-	}
-
 	if (mem->tabled)
 		vm_page_remove(mem, remove_from_hash);	/* clears tabled, object, offset */
 
@@ -2279,21 +2254,7 @@ vm_page_free_prepare_object(
 		mem->fictitious = TRUE;
 		mem->phys_page = vm_page_fictitious_addr;
 	}
-	if (mem->fictitious) {
-		/* Some of these may be unnecessary */
-		mem->gobbled = FALSE;
-		mem->busy = TRUE;
-		mem->absent = FALSE;
-		mem->error = FALSE;
-		mem->dirty = FALSE;
-		mem->precious = FALSE;
-		mem->reference = FALSE;
-		mem->encrypted = FALSE;
-		mem->encrypted_cleaning = FALSE;
-		mem->pmapped = FALSE;
-		mem->wpmapped = FALSE;
-		mem->reusable = FALSE;
-	} else {
+	if ( !mem->fictitious) {
 		if (mem->zero_fill == TRUE)
 		        VM_ZF_COUNT_DECR();
 		vm_page_init(mem, mem->phys_page, mem->lopage);
@@ -2301,11 +2262,20 @@ vm_page_free_prepare_object(
 }
 
 
+/*
+ *	vm_page_free:
+ *
+ *	Returns the given page to the free list,
+ *	disassociating it with any VM object.
+ *
+ *	Object and page queues must be locked prior to entry.
+ */
 void
 vm_page_free(
 	vm_page_t	mem)
 {
 	vm_page_free_prepare(mem);
+
 	if (mem->fictitious) {
 		vm_page_release_fictitious(mem);
 	} else {
@@ -2373,9 +2343,9 @@ vm_page_free_list(
 		if (vm_page_free_verify && !mem->fictitious && !mem->private) {
 			assert(pmap_verify_free(mem->phys_page));
 		}
-		assert(mem->busy);
 
 		if (!mem->fictitious) {
+			assert(mem->busy);
 			if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
 			    vm_lopage_free_count < vm_lopage_free_limit &&
 			    mem->phys_page < max_valid_low_ppnum) {
@@ -2518,24 +2488,8 @@ vm_page_free_list(
 			 */
 			thread_wakeup_one((event_t) &vm_page_free_count);
 		}
-#if CONFIG_EMBEDDED
-		{
-		int percent_avail;
 
-		/*
-		 * Decide if we need to poke the memorystatus notification thread.
-		 */
-		percent_avail = 
-			(vm_page_active_count + vm_page_inactive_count + 
-			 vm_page_speculative_count + vm_page_free_count +
-			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
-			atop_64(max_mem);
-		if (percent_avail >= (kern_memorystatus_level + 5)) {
-			kern_memorystatus_level = percent_avail;
-			thread_wakeup((event_t)&kern_memorystatus_wakeup);
-		}
-		}
-#endif
+		VM_CHECK_MEMORYSTATUS;
 	}
 }
 
@@ -2614,24 +2568,9 @@ vm_page_wire(
 			mem->zero_fill = FALSE;
 		        VM_ZF_COUNT_DECR();
 		}
-#if CONFIG_EMBEDDED
-		{
-		int 	percent_avail;
 
-		/*
-		 * Decide if we need to poke the memorystatus notification thread.
-		 */
-		percent_avail = 
-			(vm_page_active_count + vm_page_inactive_count + 
-			 vm_page_speculative_count + vm_page_free_count +
-			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
-			atop_64(max_mem);
-		if (percent_avail <= (kern_memorystatus_level - 5)) {
-			kern_memorystatus_level = percent_avail;
-			thread_wakeup((event_t)&kern_memorystatus_wakeup);
-		}
-		}
-#endif
+		VM_CHECK_MEMORYSTATUS;
+		
 		/* 
 		 * ENCRYPTED SWAP:
 		 * The page could be encrypted, but
@@ -2719,24 +2658,9 @@ vm_page_unwire(
 				vm_page_activate(mem);
 			}
 		}
-#if CONFIG_EMBEDDED
-		{
-		int 	percent_avail;
 
-		/*
-		 * Decide if we need to poke the memorystatus notification thread.
-		 */
-		percent_avail = 
-			(vm_page_active_count + vm_page_inactive_count + 
-			 vm_page_speculative_count + vm_page_free_count +
-			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
-			atop_64(max_mem);
-		if (percent_avail >= (kern_memorystatus_level + 5)) {
-			kern_memorystatus_level = percent_avail;
-			thread_wakeup((event_t)&kern_memorystatus_wakeup);
-		}
-		}
-#endif
+		VM_CHECK_MEMORYSTATUS;
+		
 	}
 	VM_PAGE_CHECK(mem);
 }
@@ -2779,9 +2703,7 @@ vm_page_deactivate_internal(
 	 *	inactive queue.  Note wired pages should not have
 	 *	their reference bit cleared.
 	 */
-
-	if (m->absent && !m->unusual)
-		panic("vm_page_deactivate: %p absent", m);
+	assert ( !(m->absent && !m->unusual));
 
 	if (m->gobbled) {		/* can this happen? */
 		assert( !VM_PAGE_WIRED(m));
@@ -2791,10 +2713,10 @@ vm_page_deactivate_internal(
 		vm_page_gobble_count--;
 		m->gobbled = FALSE;
 	}
-	if (m->private || (VM_PAGE_WIRED(m)))
+	if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
 		return;
 
-	if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
+	if (!m->absent && clear_hw_reference == TRUE)
 		pmap_clear_reference(m->phys_page);
 
 	m->reference = FALSE;
@@ -2806,7 +2728,7 @@ vm_page_deactivate_internal(
 		assert(!m->laundry);
 		assert(m->pageq.next == NULL && m->pageq.prev == NULL);
 
-		if (!IP_VALID(memory_manager_default) &&
+		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
 		    m->dirty && m->object->internal &&
 		    (m->object->purgable == VM_PURGABLE_DENY ||
 		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
@@ -2815,24 +2737,13 @@ vm_page_deactivate_internal(
 			m->throttled = TRUE;
 			vm_page_throttled_count++;
 		} else {
-			if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
+			if (m->object->named && m->object->ref_count == 1) {
 			        vm_page_speculate(m, FALSE);
 #if DEVELOPMENT || DEBUG
 				vm_page_speculative_recreated++;
 #endif
-				return;
 			} else {
-				if (m->zero_fill) {
-					queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
-					vm_zf_queue_count++;
-				} else {
-					queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
-				}
-			}
-			m->inactive = TRUE;
-			if (!m->fictitious) {
-			        vm_page_inactive_count++;
-				token_new_pagecount++;
+				VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
 			}
 		}
 	}
@@ -2858,9 +2769,7 @@ vm_page_activate(
 #if DEBUG
 	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 #endif
-
-	if (m->absent && !m->unusual)
-		panic("vm_page_activate: %p absent", m);
+	assert( !(m->absent && !m->unusual));
 
 	if (m->gobbled) {
 		assert( !VM_PAGE_WIRED(m));
@@ -2869,7 +2778,7 @@ vm_page_activate(
 		vm_page_gobble_count--;
 		m->gobbled = FALSE;
 	}
-	if (m->private)
+	if (m->private || m->fictitious)
 		return;
 
 #if DEBUG
@@ -2887,8 +2796,8 @@ vm_page_activate(
 	if ( !VM_PAGE_WIRED(m)) {
 		assert(!m->laundry);
 		assert(m->pageq.next == NULL && m->pageq.prev == NULL);
-		if (!IP_VALID(memory_manager_default) && 
-		    !m->fictitious && m->dirty && m->object->internal && 
+		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 
+		    m->dirty && m->object->internal && 
 		    (m->object->purgable == VM_PURGABLE_DENY ||
 		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
 		     m->object->purgable == VM_PURGABLE_VOLATILE)) {
@@ -2898,8 +2807,7 @@ vm_page_activate(
 		} else {
 			queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
 			m->active = TRUE;
-			if (!m->fictitious)
-				vm_page_active_count++;
+			vm_page_active_count++;
 		}
 		m->reference = TRUE;
 		m->no_cache = FALSE;
@@ -2928,9 +2836,10 @@ vm_page_speculate(
 #if DEBUG
 	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 #endif
+	assert( !(m->absent && !m->unusual));
 
-	if (m->absent && !m->unusual)
-		panic("vm_page_speculate: %p absent", m);
+	if (m->private || m->fictitious)
+		return;
 
 	VM_PAGE_QUEUES_REMOVE(m);		
 
@@ -2953,8 +2862,8 @@ vm_page_speculate(
 		        /*
 			 * set the timer to begin a new group
 			 */
-			aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
-			aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
+			aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
+			aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
 
 			ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
 		} else {
@@ -2977,8 +2886,8 @@ vm_page_speculate(
 				if (!queue_empty(&aq->age_q))
 				        vm_page_speculate_ageit(aq);
 
-				aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
-				aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
+				aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
+				aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
 
 				ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
 			}
@@ -2988,6 +2897,8 @@ vm_page_speculate(
 		vm_page_speculative_count++;
 
 		if (new == TRUE) {
+			vm_object_lock_assert_exclusive(m->object);
+
 		        m->object->pages_created++;
 #if DEVELOPMENT || DEBUG
 			vm_page_speculative_created++;
@@ -3061,11 +2972,7 @@ vm_page_lru(
 	assert(!m->laundry);
 	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
 
-	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
-	m->inactive = TRUE;
-
-        vm_page_inactive_count++;
-	token_new_pagecount++;
+	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
 }
 
 
@@ -3077,6 +2984,9 @@ vm_page_reactivate_all_throttled(void)
 	vm_page_t	m;
 	int		extra_active_count;
 
+	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
+		return;
+
 	extra_active_count = 0;
 	vm_page_lock_queues();
 	if (! queue_empty(&vm_page_queue_throttled)) {
@@ -3090,9 +3000,9 @@ vm_page_reactivate_all_throttled(void)
 			assert(!m->inactive);
 			assert(!m->speculative);
 			assert(!VM_PAGE_WIRED(m));
-			if (!m->fictitious) {
-				extra_active_count++;
-			}
+
+			extra_active_count++;
+
 			m->throttled = FALSE;
 			m->active = TRUE;
 			VM_PAGE_CHECK(m);
@@ -3350,6 +3260,17 @@ vm_page_copy(
 		vm_page_copy_cs_validations++;
 		vm_page_validate_cs(src_m);
 	}
+
+	if (vm_page_is_slideable(src_m)) {
+		boolean_t was_busy = src_m->busy;
+		src_m->busy = TRUE;
+		(void) vm_page_slide(src_m, 0);
+		assert(src_m->busy);
+		if(!was_busy) {
+			PAGE_WAKEUP_DONE(src_m);
+		}
+	}
+
 	/*
 	 * Propagate the cs_tainted bit to the copy page. Do not propagate
 	 * the cs_validated bit.
@@ -3358,7 +3279,8 @@ vm_page_copy(
 	if (dest_m->cs_tainted) {
 		vm_page_copy_cs_tainted++;
 	}
-
+	dest_m->slid = src_m->slid;
+	dest_m->error = src_m->error; /* sliding src_m might have failed... */
 	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
 }
 
@@ -3439,7 +3361,7 @@ vm_page_verify_contiguous(
 		if (m->phys_page != prev_addr + 1) {
 			printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
 			       m, (long)prev_addr, m->phys_page);
-			printf("pages %p page_count %d\n", pages, page_count);
+			printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
 			panic("vm_page_verify_contiguous:  not contiguous!");
 		}
 		prev_addr = m->phys_page;
@@ -3476,21 +3398,24 @@ vm_page_verify_free_list(
 		      m,
 		      vm_page_t,
 		      pageq) {
+
 		if (m == look_for_page) {
 			found_page = TRUE;
 		}
 		if ((vm_page_t) m->pageq.prev != prev_m)
 			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
 			      color, npages, m, m->pageq.prev, prev_m);
-		if ( ! m->free )
-			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
-			      color, npages, m);
 		if ( ! m->busy )
 			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
 			      color, npages, m);
-		if ( color != (unsigned int) -1 && (m->phys_page & vm_color_mask) != color)
-			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
-			      color, npages, m, m->phys_page & vm_color_mask, color);
+		if (color != (unsigned int) -1) {
+			if ((m->phys_page & vm_color_mask) != color)
+				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
+				      color, npages, m, m->phys_page & vm_color_mask, color);
+			if ( ! m->free )
+				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
+				      color, npages, m);
+		}
 		++npages;
 		prev_m = m;
 	}
@@ -3507,13 +3432,12 @@ vm_page_verify_free_list(
 				if (other_color == color)
 					continue;
 				vm_page_verify_free_list(&vm_page_queue_free[other_color],
-							other_color, look_for_page, FALSE);
+							 other_color, look_for_page, FALSE);
 			}
-			if (color != (unsigned int) -1) {
+			if (color == (unsigned int) -1) {
 				vm_page_verify_free_list(&vm_lopage_queue_free,
 							 (unsigned int) -1, look_for_page, FALSE);
 			}
-
 			panic("vm_page_verify_free_list(color=%u)\n", color);
 		}
 		if (!expect_page && found_page) {
@@ -3539,9 +3463,8 @@ vm_page_verify_free_lists( void )
 
 	for( color = 0; color < vm_colors; color++ ) {
 		npages += vm_page_verify_free_list(&vm_page_queue_free[color],
-						color, VM_PAGE_NULL, FALSE);
+						   color, VM_PAGE_NULL, FALSE);
 	}
-
 	nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
 					    (unsigned int) -1,
 					    VM_PAGE_NULL, FALSE);
@@ -3549,6 +3472,7 @@ vm_page_verify_free_lists( void )
 		panic("vm_page_verify_free_lists:  "
 		      "npages %u free_count %d nlopages %u lo_free_count %u",
 		      npages, vm_page_free_count, nlopages, vm_lopage_free_count);
+
 	lck_mtx_unlock(&vm_page_queue_free_lock);
 }
 
@@ -3717,7 +3641,7 @@ retry:
 			/* no more low pages... */
 			break;
 		}
-		if (!npages && ((m->phys_page & pnum_mask) != 0)) {
+		if (!npages & ((m->phys_page & pnum_mask) != 0)) {
 			/*
 			 * not aligned
 			 */
@@ -3901,8 +3825,7 @@ did_consider:
 
 				color = m1->phys_page & vm_color_mask;
 #if MACH_ASSERT
-				vm_page_verify_free_list(&vm_page_queue_free[color],
-							 color, m1, TRUE);
+				vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
 #endif
 				queue_remove(&vm_page_queue_free[color],
 					     m1,
@@ -3911,8 +3834,7 @@ did_consider:
 				m1->pageq.next = NULL;
 				m1->pageq.prev = NULL;
 #if MACH_ASSERT
-				vm_page_verify_free_list(&vm_page_queue_free[color],
-							 color, VM_PAGE_NULL, FALSE);
+				vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
 #endif
 				/*
 				 * Clear the "free" bit so that this page
@@ -4184,7 +4106,7 @@ cpm_allocate(
 	vm_page_t		pages;
 	unsigned int		npages;
 
-	if (size % page_size != 0)
+	if (size % PAGE_SIZE != 0)
 		return KERN_INVALID_ARGUMENT;
 
 	npages = (unsigned int) (size / PAGE_SIZE);
@@ -4210,24 +4132,8 @@ cpm_allocate(
 	     ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
 		thread_wakeup((event_t) &vm_page_free_wanted);
 		
-#if CONFIG_EMBEDDED
-	{
-	int			percent_avail;
-
-	/*
-	 * Decide if we need to poke the memorystatus notification thread.
-	 */
-	percent_avail = 
-		(vm_page_active_count + vm_page_inactive_count + 
-		 vm_page_speculative_count + vm_page_free_count +
-		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
-		atop_64(max_mem);
-	if (percent_avail <= (kern_memorystatus_level - 5)) {
-		kern_memorystatus_level = percent_avail;
-		thread_wakeup((event_t)&kern_memorystatus_wakeup);
-	}
-	}
-#endif
+	VM_CHECK_MEMORYSTATUS;
+	
 	/*
 	 *	The CPM pages should now be available and
 	 *	ordered by ascending physical address.
@@ -4237,8 +4143,188 @@ cpm_allocate(
 	*list = pages;
 	return KERN_SUCCESS;
 }
+
+
+unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
+
+/*
+ * when working on a 'run' of pages, it is necessary to hold 
+ * the vm_page_queue_lock (a hot global lock) for certain operations
+ * on the page... however, the majority of the work can be done
+ * while merely holding the object lock... in fact there are certain
+ * collections of pages that don't require any work brokered by the
+ * vm_page_queue_lock... to mitigate the time spent behind the global
+ * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
+ * while doing all of the work that doesn't require the vm_page_queue_lock...
+ * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
+ * necessary work for each page... we will grab the busy bit on the page
+ * if it's not already held so that vm_page_do_delayed_work can drop the object lock
+ * if it can't immediately take the vm_page_queue_lock in order to compete
+ * for the locks in the same order that vm_pageout_scan takes them.
+ * the operation names are modeled after the names of the routines that
+ * need to be called in order to make the changes very obvious in the
+ * original loop
+ */
+
+void
+vm_page_do_delayed_work(
+	vm_object_t 	object,
+	struct vm_page_delayed_work *dwp,
+	int		dw_count)
+{
+	int		j;
+	vm_page_t	m;
+        vm_page_t       local_free_q = VM_PAGE_NULL;
+	boolean_t	dropped_obj_lock = FALSE;
+
+	/*
+	 * pageout_scan takes the vm_page_lock_queues first
+	 * then tries for the object lock... to avoid what
+	 * is effectively a lock inversion, we'll go to the
+	 * trouble of taking them in that same order... otherwise
+	 * if this object contains the majority of the pages resident
+	 * in the UBC (or a small set of large objects actively being
+	 * worked on contain the majority of the pages), we could
+	 * cause the pageout_scan thread to 'starve' in its attempt
+	 * to find pages to move to the free queue, since it has to
+	 * successfully acquire the object lock of any candidate page
+	 * before it can steal/clean it.
+	 */
+	if (!vm_page_trylockspin_queues()) {
+		vm_object_unlock(object);
+
+		vm_page_lockspin_queues();
+
+		for (j = 0; ; j++) {
+			if (!vm_object_lock_avoid(object) &&
+			    _vm_object_lock_try(object))
+				break;
+			vm_page_unlock_queues();
+			mutex_pause(j);
+			vm_page_lockspin_queues();
+		}
+		dropped_obj_lock = TRUE;
+	}
+	for (j = 0; j < dw_count; j++, dwp++) {
+
+		m = dwp->dw_m;
+
+		if (dwp->dw_mask & DW_set_list_req_pending) {
+			m->list_req_pending = TRUE;
+
+			if (dropped_obj_lock == TRUE) {
+				/*
+				 * need to make sure anyone that might have
+				 * blocked on busy == TRUE when we dropped
+				 * the object lock gets a chance to re-evaluate
+				 * its state since we have several places
+				 * where we avoid potential deadlocks with
+				 * the fileysystem by stealing pages with
+				 * list_req_pending == TRUE and busy == TRUE
+				 */
+				dwp->dw_mask |= DW_PAGE_WAKEUP;
+			}
+		}
+		if (dwp->dw_mask & DW_vm_pageout_throttle_up)
+			vm_pageout_throttle_up(m);
+
+		if (dwp->dw_mask & DW_vm_page_wire)
+			vm_page_wire(m);
+		else if (dwp->dw_mask & DW_vm_page_unwire) {
+			boolean_t	queueit;
+
+			queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
+
+			vm_page_unwire(m, queueit);
+		}
+		if (dwp->dw_mask & DW_vm_page_free) {
+			vm_page_free_prepare_queues(m);
+
+			assert(m->pageq.next == NULL && m->pageq.prev == NULL);
+			/*
+			 * Add this page to our list of reclaimed pages,
+			 * to be freed later.
+			 */
+			m->pageq.next = (queue_entry_t) local_free_q;
+			local_free_q = m;
+		} else {
+			if (dwp->dw_mask & DW_vm_page_deactivate_internal)
+				vm_page_deactivate_internal(m, FALSE);
+			else if (dwp->dw_mask & DW_vm_page_activate) {
+				if (m->active == FALSE) {
+					vm_page_activate(m);
+				}
+			}
+			else if (dwp->dw_mask & DW_vm_page_speculate)
+				vm_page_speculate(m, TRUE);
+			else if (dwp->dw_mask & DW_vm_page_lru)
+				vm_page_lru(m);
+			else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
+				VM_PAGE_QUEUES_REMOVE(m);
+			
+			if (dwp->dw_mask & DW_set_reference)
+				m->reference = TRUE;
+			else if (dwp->dw_mask & DW_clear_reference)
+				m->reference = FALSE;
+
+			if (dwp->dw_mask & DW_move_page) {
+				VM_PAGE_QUEUES_REMOVE(m);
+
+				assert(!m->laundry);
+				assert(m->object != kernel_object);
+				assert(m->pageq.next == NULL &&
+				       m->pageq.prev == NULL);
+
+				VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
+			}
+			if (dwp->dw_mask & DW_clear_busy)
+				m->busy = FALSE;
+
+			if (dwp->dw_mask & DW_PAGE_WAKEUP)
+				PAGE_WAKEUP(m);
+		}
+	}
+	vm_page_unlock_queues();
+
+	if (local_free_q)
+		vm_page_free_list(local_free_q, TRUE);
+	
+	VM_CHECK_MEMORYSTATUS;
+
+}
+
+
 	
 
+void vm_check_memorystatus()
+{
+#if CONFIG_EMBEDDED
+	static boolean_t in_critical = FALSE;
+	static unsigned int last_memorystatus = 0;
+	unsigned int pages_avail;
+	
+	if (!kern_memorystatus_delta) {
+	    return;
+	}
+	
+	pages_avail = (vm_page_active_count + 
+	              vm_page_inactive_count + 
+	              vm_page_speculative_count + 
+	              vm_page_free_count +
+	    	      (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
+	if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
+	     (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
+	     (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
+	    kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
+	    last_memorystatus = pages_avail;
+		
+	    thread_wakeup((event_t)&kern_memorystatus_wakeup);
+		
+	    in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
+	}
+#endif
+}
+
 kern_return_t
 vm_page_alloc_list(
 	int	page_count,
@@ -4716,6 +4802,7 @@ hibernate_flush_memory()
 	return (retval);
 }
 
+
 static void
 hibernate_page_list_zero(hibernate_page_list_t *list)
 {
@@ -4819,8 +4906,8 @@ hibernate_consider_discard(vm_page_t m)
            /*
             *	Somebody is playing with this page.
             */
-   	    hibernate_stats.cd_found_busy++;
-	    break;
+	    hibernate_stats.cd_found_busy++;
+            break;
 	}
         if (m->absent || m->unusual || m->error) {
            /*
@@ -4911,6 +4998,7 @@ hibernate_discard_page(vm_page_t m)
 void
 hibernate_page_list_setall(hibernate_page_list_t * page_list,
 			   hibernate_page_list_t * page_list_wired,
+			   hibernate_page_list_t * page_list_pal,
 			   uint32_t * pagesOut)
 {
     uint64_t start, end, nsec;
@@ -4937,6 +5025,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
 
     hibernate_page_list_zero(page_list);
     hibernate_page_list_zero(page_list_wired);
+    hibernate_page_list_zero(page_list_pal);
 
     hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
     hibernate_stats.cd_pages = pages;
@@ -4955,7 +5044,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
 	hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
 	m = (vm_page_t) m->pageq.next;
     }
-#ifndef PPC
+
     for( i = 0; i < real_ncpus; i++ )
     {
 	if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
@@ -4972,7 +5061,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list,
 	    }
 	}
     }
-#endif
+
     for( i = 0; i < vm_colors; i++ )
     {
 	queue_iterate(&vm_page_queue_free[i],
diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c
index 05fa65614..a5931c998 100644
--- a/osfmk/vm/vm_shared_region.c
+++ b/osfmk/vm/vm_shared_region.c
@@ -124,6 +124,9 @@ int shared_region_persistence = 0;	/* no by default */
 /* delay before reclaiming an unused shared region */
 int shared_region_destroy_delay = 120; /* in seconds */
 
+/* indicate if the shared region has been slid. Only one region can be slid */
+boolean_t shared_region_completed_slide = FALSE;
+
 /* this lock protects all the shared region data structures */
 lck_grp_t *vm_shared_region_lck_grp;
 lck_mtx_t vm_shared_region_lock;
@@ -760,8 +763,24 @@ vm_shared_region_destroy(
 		thread_call_free(shared_region->sr_timer_call);
 	}
 
+	if ((slide_info.slide_info_entry != NULL) && (slide_info.sr == shared_region)) {
+		kmem_free(kernel_map,
+			  (vm_offset_t) slide_info.slide_info_entry,
+			  (vm_size_t) slide_info.slide_info_size);
+		vm_object_deallocate(slide_info.slide_object);
+	        slide_info.slide_object	= NULL;
+		slide_info.start = 0;
+		slide_info.end = 0;	
+		slide_info.slide = 0;
+		slide_info.sr = NULL;
+		slide_info.slide_info_entry = NULL;
+		slide_info.slide_info_size = 0;
+		shared_region_completed_slide = FALSE;
+	}
+
 	/* release the shared region structure... */
 	kfree(shared_region, sizeof (*shared_region));
+
 	SHARED_REGION_TRACE_DEBUG(
 		("shared_region: destroy(%p) <-\n",
 		 shared_region));
@@ -821,6 +840,106 @@ vm_shared_region_start_address(
 
 	return kr;
 }
+
+void
+vm_shared_region_undo_mappings(
+			vm_map_t sr_map,
+			mach_vm_offset_t sr_base_address,
+			struct shared_file_mapping_np *mappings,
+			unsigned int mappings_count)
+{
+	unsigned int		j = 0;
+	vm_shared_region_t	shared_region = NULL;
+	boolean_t		reset_shared_region_state = FALSE;
+	
+	shared_region = vm_shared_region_get(current_task());
+	if (shared_region == NULL) {
+		SHARED_REGION_TRACE_DEBUG(("Failed to undo mappings because of NULL shared region.\n"));
+		return;
+	}
+
+
+	if (sr_map == NULL) {
+		ipc_port_t		sr_handle;
+		vm_named_entry_t	sr_mem_entry;
+
+		vm_shared_region_lock();
+		assert(shared_region->sr_ref_count > 1);
+
+		while (shared_region->sr_mapping_in_progress) {
+			/* wait for our turn... */
+			vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
+					       THREAD_UNINT);
+		}
+		assert(! shared_region->sr_mapping_in_progress);
+		assert(shared_region->sr_ref_count > 1);
+		/* let others know we're working in this shared region */
+		shared_region->sr_mapping_in_progress = TRUE;
+
+		vm_shared_region_unlock();
+
+		reset_shared_region_state = TRUE;
+
+		/* no need to lock because this data is never modified... */
+		sr_handle = shared_region->sr_mem_entry;
+		sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
+		sr_map = sr_mem_entry->backing.map;
+		sr_base_address = shared_region->sr_base_address;
+	}
+	/*
+	 * Undo the mappings we've established so far.
+	 */
+	for (j = 0; j < mappings_count; j++) {
+		kern_return_t kr2;
+
+		if (mappings[j].sfm_size == 0) {
+			/*
+			 * We didn't establish this
+			 * mapping, so nothing to undo.
+			 */
+			continue;
+		}
+		SHARED_REGION_TRACE_INFO(
+			("shared_region: mapping[%d]: "
+			 "address:0x%016llx "
+			 "size:0x%016llx "
+			 "offset:0x%016llx "
+			 "maxprot:0x%x prot:0x%x: "
+			 "undoing...\n",
+			 j,
+			 (long long)mappings[j].sfm_address,
+			 (long long)mappings[j].sfm_size,
+			 (long long)mappings[j].sfm_file_offset,
+			 mappings[j].sfm_max_prot,
+			 mappings[j].sfm_init_prot));
+		kr2 = mach_vm_deallocate(
+			sr_map,
+			(mappings[j].sfm_address -
+			 sr_base_address),
+			mappings[j].sfm_size);
+		assert(kr2 == KERN_SUCCESS);
+	}
+
+	/*
+	 * This is how check_np() knows if the shared region
+	 * is mapped. So clear it here.
+	 */
+	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
+
+	if (reset_shared_region_state) {
+		vm_shared_region_lock();
+		assert(shared_region->sr_ref_count > 1);
+		assert(shared_region->sr_mapping_in_progress);
+		/* we're done working on that shared region */
+		shared_region->sr_mapping_in_progress = FALSE;
+		thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
+		vm_shared_region_unlock();
+		reset_shared_region_state = FALSE;
+	}
+
+	vm_shared_region_deallocate(shared_region);
+}
+
 /*
  * Establish some mappings of a file in the shared region.
  * This is used by "dyld" via the shared_region_map_np() system call
@@ -838,7 +957,8 @@ vm_shared_region_map_file(
 	struct shared_file_mapping_np	*mappings,
 	memory_object_control_t		file_control,
 	memory_object_size_t		file_size,
-	void				*root_dir)
+	void				*root_dir,
+	struct shared_file_mapping_np	*mapping_to_slide)
 {
 	kern_return_t		kr;
 	vm_object_t		file_object;
@@ -851,6 +971,7 @@ vm_shared_region_map_file(
 	mach_vm_offset_t	target_address;
 	vm_object_t		object;
 	vm_object_size_t	obj_size;
+	boolean_t		found_mapping_to_slide = FALSE;
 
 
 	kr = KERN_SUCCESS;
@@ -921,6 +1042,32 @@ vm_shared_region_map_file(
 			/* file-backed memory */
 			map_port = (ipc_port_t) file_object->pager;
 		}
+		
+		if (mappings[i].sfm_init_prot & VM_PROT_SLIDE) {
+			/*
+			 * This is the mapping that needs to be slid.
+			 */
+			if (found_mapping_to_slide == TRUE) {
+				SHARED_REGION_TRACE_INFO(
+					("shared_region: mapping[%d]: "
+					 "address:0x%016llx size:0x%016llx "
+					 "offset:0x%016llx "
+					 "maxprot:0x%x prot:0x%x "
+					 "will not be slid as only one such mapping is allowed...\n",
+					 i,
+					 (long long)mappings[i].sfm_address,
+					 (long long)mappings[i].sfm_size,
+					 (long long)mappings[i].sfm_file_offset,
+					 mappings[i].sfm_max_prot,
+					 mappings[i].sfm_init_prot));
+			} else {
+				if (mapping_to_slide != NULL) {
+					mapping_to_slide->sfm_file_offset = mappings[i].sfm_file_offset;
+					mapping_to_slide->sfm_size = mappings[i].sfm_size;
+					found_mapping_to_slide = TRUE;
+				}
+			}
+		}
 
 		/* mapping's address is relative to the shared region base */
 		target_address =
@@ -1002,8 +1149,6 @@ vm_shared_region_map_file(
 				mappings[i].sfm_size = 0;
 				kr = KERN_SUCCESS;
 			} else {
-				unsigned int j;
-
 				/* this mapping failed ! */
 				SHARED_REGION_TRACE_ERROR(
 					("shared_region: mapping[%d]: "
@@ -1018,40 +1163,7 @@ vm_shared_region_map_file(
 					 mappings[i].sfm_init_prot,
 					 kr));
 
-				/*
-				 * Undo the mappings we've established so far.
-				 */
-				for (j = 0; j < i; j++) {
-					kern_return_t kr2;
-
-					if (mappings[j].sfm_size == 0) {
-						/*
-						 * We didn't establish this
-						 * mapping, so nothing to undo.
-						 */
-						continue;
-					}
-					SHARED_REGION_TRACE_INFO(
-						("shared_region: mapping[%d]: "
-						 "address:0x%016llx "
-						 "size:0x%016llx "
-						 "offset:0x%016llx "
-						 "maxprot:0x%x prot:0x%x: "
-						 "undoing...\n",
-						 j,
-						 (long long)mappings[j].sfm_address,
-						 (long long)mappings[j].sfm_size,
-						 (long long)mappings[j].sfm_file_offset,
-						 mappings[j].sfm_max_prot,
-						 mappings[j].sfm_init_prot));
-					kr2 = mach_vm_deallocate(
-						sr_map,
-						(mappings[j].sfm_address -
-						 sr_base_address),
-						mappings[j].sfm_size);
-					assert(kr2 == KERN_SUCCESS);
-				}
-
+				vm_shared_region_undo_mappings(sr_map, sr_base_address, mappings, i);
 				break;
 			}
 
@@ -1262,6 +1374,264 @@ done:
 	return kr;
 }
 
+#define SANE_SLIDE_INFO_SIZE		(1024*1024) /*Can be changed if needed*/
+struct vm_shared_region_slide_info	slide_info;
+
+kern_return_t
+vm_shared_region_sliding_valid(uint32_t slide) {
+
+	kern_return_t kr = KERN_SUCCESS;
+
+	if ((shared_region_completed_slide == TRUE) && slide) {
+	        if (slide != slide_info.slide) {
+			SHARED_REGION_TRACE_DEBUG(("Only one shared region can be slid\n"));
+			kr = KERN_FAILURE;	
+		} else if (slide == slide_info.slide) {
+			/*
+			 * Request for sliding when we've
+			 * already done it with exactly the
+			 * same slide value before.
+			 * This isn't wrong technically but
+			 * we don't want to slide again and
+			 * so we return this value.
+			 */
+			kr = KERN_INVALID_ARGUMENT; 
+		}
+	}
+	return kr;
+}
+
+kern_return_t
+vm_shared_region_slide_init(
+		mach_vm_size_t	slide_info_size,
+		mach_vm_offset_t start,
+		mach_vm_size_t size,
+		uint32_t slide,
+		memory_object_control_t	sr_file_control)
+{
+	kern_return_t kr = KERN_SUCCESS;
+	vm_object_t object = VM_OBJECT_NULL;
+	vm_object_offset_t offset = 0;
+	
+	vm_map_t map =NULL, cur_map = NULL;
+	boolean_t	is_map_locked = FALSE;
+
+	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
+		if (kr == KERN_INVALID_ARGUMENT) {
+			/*
+			 * This will happen if we request sliding again 
+			 * with the same slide value that was used earlier
+			 * for the very first sliding.
+			 */
+			kr = KERN_SUCCESS;
+		}
+		return kr;
+	}
+
+	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
+		SHARED_REGION_TRACE_DEBUG(("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size));
+		kr = KERN_FAILURE;
+		return kr;
+	}
+
+	if (sr_file_control != MEMORY_OBJECT_CONTROL_NULL) {
+
+		object = memory_object_control_to_vm_object(sr_file_control);
+		vm_object_reference(object);
+		offset = start;
+
+		vm_object_lock_shared(object);
+
+	} else {
+		/*
+		 * Remove this entire "else" block and all "map" references
+		 * once we get rid of the shared_region_slide_np()
+		 * system call. 
+		 */ 
+		vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
+		map = current_map();
+		vm_map_lock_read(map);
+		is_map_locked = TRUE;
+	Retry:
+		cur_map = map;
+		if(!vm_map_lookup_entry(map, start, &entry)) {
+			kr = KERN_INVALID_ARGUMENT;
+		} else {
+			vm_object_t shadow_obj = VM_OBJECT_NULL;
+	 
+			if (entry->is_sub_map == TRUE) { 
+				map = entry->object.sub_map;
+				start -= entry->vme_start;
+				start += entry->offset;
+				vm_map_lock_read(map);
+				vm_map_unlock_read(cur_map);
+				goto Retry;
+			} else {
+				object = entry->object.vm_object;
+				offset = (start - entry->vme_start) + entry->offset;
+			}
+	 
+			vm_object_lock_shared(object);
+			while (object->shadow != VM_OBJECT_NULL) {
+				shadow_obj = object->shadow;
+				vm_object_lock_shared(shadow_obj);
+				vm_object_unlock(object);
+				object = shadow_obj;		
+			}
+		}
+	}
+		
+	if (object->internal == TRUE) {
+		kr = KERN_INVALID_ADDRESS;
+	} else {
+		kr = kmem_alloc(kernel_map,
+				(vm_offset_t *) &slide_info.slide_info_entry,
+				(vm_size_t) slide_info_size);
+		if (kr == KERN_SUCCESS) {
+			slide_info.slide_info_size = slide_info_size;
+			slide_info.slide_object = object;
+			slide_info.start = offset;
+			slide_info.end = slide_info.start + size;	
+			slide_info.slide = slide;
+			slide_info.sr = vm_shared_region_get(current_task());
+			/*
+			 * We want to keep the above reference on the shared region
+			 * because we have a pointer to it in the slide_info.
+			 *
+			 * If we want to have this region get deallocated/freed
+			 * then we will have to make sure that we msync(..MS_INVALIDATE..)
+			 * the pages associated with this shared region. Those pages would
+			 * have been slid with an older slide value.
+			 *
+			 * vm_shared_region_deallocate(slide_info.sr);
+			 */
+			shared_region_completed_slide = TRUE;
+		} else {
+			kr = KERN_FAILURE;
+		}
+	}
+	vm_object_unlock(object);
+
+	if (is_map_locked == TRUE) {
+		vm_map_unlock_read(map);
+	}
+	return kr;
+}
+
+void*
+vm_shared_region_get_slide_info(void) {
+	return (void*)&slide_info;
+}
+
+void* 
+vm_shared_region_get_slide_info_entry(void) {
+	return (void*)slide_info.slide_info_entry;
+}
+
+
+kern_return_t
+vm_shared_region_slide_sanity_check(void)
+{
+	uint32_t pageIndex=0;
+	uint16_t entryIndex=0;
+	uint16_t *toc = NULL;
+	vm_shared_region_slide_info_entry_t s_info;
+	kern_return_t kr;
+
+	s_info = vm_shared_region_get_slide_info_entry();
+	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
+
+	kr = mach_vm_protect(kernel_map,
+			     (mach_vm_offset_t)(vm_offset_t) slide_info.slide_info_entry,
+			     (mach_vm_size_t) slide_info.slide_info_size,
+			     VM_PROT_READ, TRUE);
+	if (kr != KERN_SUCCESS) {
+		panic("vm_shared_region_slide_sanity_check: vm_protect() error 0x%x\n", kr);
+	}
+
+	for (;pageIndex < s_info->toc_count; pageIndex++) {
+
+		entryIndex =  (uint16_t)(toc[pageIndex]);
+		
+		if (entryIndex >= s_info->entry_count) {
+			printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
+			goto fail;
+		}
+
+	}
+	return KERN_SUCCESS;
+fail:
+	if (slide_info.slide_info_entry != NULL) {
+		kmem_free(kernel_map,
+			  (vm_offset_t) slide_info.slide_info_entry,
+			  (vm_size_t) slide_info.slide_info_size);
+		vm_object_deallocate(slide_info.slide_object);
+	        slide_info.slide_object	= NULL;
+		slide_info.start = 0;
+		slide_info.end = 0;	
+		slide_info.slide = 0;
+		slide_info.slide_info_entry = NULL;
+		slide_info.slide_info_size = 0;
+		shared_region_completed_slide = FALSE;
+	}
+	return KERN_FAILURE;
+}
+
+kern_return_t
+vm_shared_region_slide(vm_offset_t vaddr, uint32_t pageIndex)
+{
+	uint16_t *toc = NULL;
+	slide_info_entry_toc_t bitmap = NULL;
+	uint32_t i=0, j=0;
+	uint8_t b = 0;
+	uint32_t slide = slide_info.slide;
+	int is_64 = task_has_64BitAddr(current_task());
+
+	vm_shared_region_slide_info_entry_t s_info = vm_shared_region_get_slide_info_entry();
+	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
+	
+	if (pageIndex >= s_info->toc_count) {
+		printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
+	} else {
+		uint16_t entryIndex =  (uint16_t)(toc[pageIndex]);
+		slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
+		
+		if (entryIndex >= s_info->entry_count) {
+			printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
+		} else {
+			bitmap = &slide_info_entries[entryIndex];
+
+			for(i=0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
+				b = bitmap->entry[i];
+				if (b!=0) {
+					for (j=0; j <8; ++j) {
+						if (b & (1 <<j)){
+							uint32_t *ptr_to_slide;
+							uint32_t old_value;
+
+							ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr)+(sizeof(uint32_t)*(i*8 +j)));
+							old_value = *ptr_to_slide;
+							*ptr_to_slide += slide;
+							if (is_64 && *ptr_to_slide < old_value) {
+								/*
+								 * We just slid the low 32 bits of a 64-bit pointer
+								 * and it looks like there should have been a carry-over
+								 * to the upper 32 bits.
+								 * The sliding failed...
+								 */
+								printf("vm_shared_region_slide() carry over\n");
+								return KERN_FAILURE;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return KERN_SUCCESS;
+}
+
 /******************************************************************************/
 /* Comm page support                                                          */
 /******************************************************************************/
@@ -1368,14 +1738,6 @@ vm_commpage_enter(
 	/* select the appropriate comm page for this task */
 	assert(! (task_has_64BitAddr(task) ^ vm_map_is_64bit(map)));
 	if (task_has_64BitAddr(task)) {
-#ifdef __ppc__
-		/*
-		 * PPC51: ppc64 is limited to 51-bit addresses.
-		 * Memory above that limit is handled specially at the
-		 * pmap level, so do not interfere.
-		 */
-		vm_flags |= VM_FLAGS_NO_PMAP_CHECK;
-#endif /* __ppc__ */
 		commpage_handle = commpage64_handle;
 		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
 		commpage_size = _COMM_PAGE64_AREA_LENGTH;
diff --git a/osfmk/vm/vm_shared_region.h b/osfmk/vm/vm_shared_region.h
index 9cc8ba5af..51742f0b0 100644
--- a/osfmk/vm/vm_shared_region.h
+++ b/osfmk/vm/vm_shared_region.h
@@ -43,6 +43,7 @@
 
 extern int shared_region_version;
 extern int shared_region_persistence;
+extern boolean_t shared_region_completed_slide;
 
 #if DEBUG
 extern int shared_region_debug;
@@ -110,9 +111,41 @@ struct vm_shared_region {
 	thread_call_t		sr_timer_call;
 };
 
+typedef struct vm_shared_region_slide_info_entry	*vm_shared_region_slide_info_entry_t;
+struct vm_shared_region_slide_info_entry {
+	uint32_t	version;
+	uint32_t	toc_offset;	// offset from start of header to table-of-contents
+	uint32_t	toc_count;	// number of entries in toc (same as number of pages in r/w mapping)
+	uint32_t	entry_offset;
+	uint32_t	entry_count;
+};
+
+#define NBBY	8
+#define	NUM_SLIDING_BITMAPS_PER_PAGE	(PAGE_SIZE/sizeof(int)/NBBY) /*128*/
+typedef struct slide_info_entry_toc	*slide_info_entry_toc_t;
+struct slide_info_entry_toc { 
+	uint8_t entry[NUM_SLIDING_BITMAPS_PER_PAGE];
+};
+
+typedef struct vm_shared_region_slide_info vm_shared_region_slide_info_t;
+struct vm_shared_region_slide_info {
+	mach_vm_offset_t	start;
+	mach_vm_offset_t	end;
+	uint32_t		slide;
+	vm_object_t		slide_object;
+	mach_vm_size_t		slide_info_size;
+	vm_shared_region_slide_info_entry_t	slide_info_entry;
+	vm_shared_region_t	sr;
+};
+
+extern struct vm_shared_region_slide_info	slide_info;
+
 #else  /* !MACH_KERNEL_PRIVATE */
 
 struct vm_shared_region;
+struct vm_shared_region_slide_info;
+struct vm_shared_region_slide_info_entry;
+struct slide_info_entry_toc;
 
 #endif /* MACH_KERNEL_PRIVATE */
 
@@ -145,14 +178,31 @@ extern vm_shared_region_t vm_shared_region_lookup(
 extern kern_return_t vm_shared_region_start_address(
 	struct vm_shared_region	*shared_region,
 	mach_vm_offset_t	*start_address);
+extern void vm_shared_region_undo_mappings(
+			vm_map_t sr_map,
+			mach_vm_offset_t sr_base_address,
+			struct shared_file_mapping_np *mappings,
+			unsigned int mappings_count);
 extern kern_return_t vm_shared_region_map_file(
 	struct vm_shared_region	*shared_region,
 	unsigned int		mappings_count,
 	struct shared_file_mapping_np *mappings,
 	memory_object_control_t	file_control,
 	memory_object_size_t	file_size,
-	void			*root_dir);
-
+	void			*root_dir,
+	struct shared_file_mapping_np *mapping_to_slide);
+extern kern_return_t vm_shared_region_sliding_valid(uint32_t slide);
+extern kern_return_t vm_shared_region_slide_sanity_check(void);
+extern kern_return_t vm_shared_region_slide_init(mach_vm_size_t slide_info_size,
+		mach_vm_offset_t start,
+		mach_vm_size_t size,
+		uint32_t slide,
+		memory_object_control_t);
+extern void* vm_shared_region_get_slide_info(void);
+extern void* vm_shared_region_get_slide_info_entry(void);
+extern kern_return_t vm_shared_region_slide(
+	vm_offset_t	vaddr, 
+	uint32_t pageIndex);
 extern void vm_commpage_init(void);
 extern kern_return_t vm_commpage_enter(
 	struct _vm_map		*map,
diff --git a/osfmk/vm/vm_swapfile_pager.c b/osfmk/vm/vm_swapfile_pager.c
index db8943367..4739455df 100644
--- a/osfmk/vm/vm_swapfile_pager.c
+++ b/osfmk/vm/vm_swapfile_pager.c
@@ -130,6 +130,7 @@ const struct memory_object_pager_ops swapfile_pager_ops = {
 	swapfile_pager_synchronize,
 	swapfile_pager_map,
 	swapfile_pager_last_unmap,
+	NULL, /* data_reclaim */
 	"swapfile pager"
 };
 
@@ -411,7 +412,7 @@ swapfile_pager_data_request(
 			   kernel_mapping,
 			   dst_pnum,
 			   VM_PROT_READ | VM_PROT_WRITE,
-			   dst_object->wimg_bits & VM_WIMG_MASK,
+			   0,
 			   TRUE);
 
 		memset(dst_ptr, '\0', PAGE_SIZE);
diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c
index 582e51fc0..de18c16a1 100644
--- a/osfmk/vm/vm_user.c
+++ b/osfmk/vm/vm_user.c
@@ -1245,6 +1245,32 @@ vm_msync(
 }
 
 
+int
+vm_toggle_entry_reuse(int toggle, int *old_value)
+{
+	vm_map_t map = current_map();
+	
+	if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){
+		*old_value = map->disable_vmentry_reuse;
+	} else if(toggle == VM_TOGGLE_SET){
+		vm_map_lock(map);
+		map->disable_vmentry_reuse = TRUE;
+		if (map->first_free == vm_map_to_entry(map)) {
+			map->highest_entry_end = vm_map_min(map);
+		} else {
+			map->highest_entry_end = map->first_free->vme_end;
+		}
+		vm_map_unlock(map);
+	} else if (toggle == VM_TOGGLE_CLEAR){
+		vm_map_lock(map);
+		map->disable_vmentry_reuse = FALSE;
+		vm_map_unlock(map);
+	} else
+		return KERN_INVALID_ARGUMENT;
+
+	return KERN_SUCCESS;
+}
+
 /*
  *	mach_vm_behavior_set 
  *
@@ -1804,8 +1830,8 @@ mach_make_memory_entry_64(
 
 	unsigned int		access;
 	vm_prot_t		protections;
+	vm_prot_t		original_protections, mask_protections;
 	unsigned int		wimg_mode;
-	boolean_t		cache_attr = FALSE;
 
 	if (((permission & 0x00FF0000) &
 	     ~(MAP_MEM_ONLY |
@@ -1825,7 +1851,9 @@ mach_make_memory_entry_64(
 		parent_entry = NULL;
 	}
 
-	protections = permission & VM_PROT_ALL;
+	original_protections = permission & VM_PROT_ALL;
+	protections = original_protections;
+	mask_protections = permission & VM_PROT_IS_MASK;
 	access = GET_MAP_MEM(permission);
 
 	user_handle = IP_NULL;
@@ -1846,7 +1874,7 @@ mach_make_memory_entry_64(
 		if(parent_is_object && object != VM_OBJECT_NULL)
 			wimg_mode = object->wimg_bits;
 		else
-			wimg_mode = VM_WIMG_DEFAULT;
+			wimg_mode = VM_WIMG_USE_DEFAULT;
 		if((access != GET_MAP_MEM(parent_entry->protection)) &&
 				!(parent_entry->protection & VM_PROT_WRITE)) { 
 			return KERN_INVALID_RIGHT;
@@ -1856,7 +1884,7 @@ mach_make_memory_entry_64(
 		   wimg_mode = VM_WIMG_IO;
 		} else if (access == MAP_MEM_COPYBACK) {
 		   SET_MAP_MEM(access, parent_entry->protection);
-		   wimg_mode = VM_WIMG_DEFAULT;
+		   wimg_mode = VM_WIMG_USE_DEFAULT;
 		} else if (access == MAP_MEM_WTHRU) {
 		   SET_MAP_MEM(access, parent_entry->protection);
 		   wimg_mode = VM_WIMG_WTHRU;
@@ -1864,29 +1892,14 @@ mach_make_memory_entry_64(
 		   SET_MAP_MEM(access, parent_entry->protection);
 		   wimg_mode = VM_WIMG_WCOMB;
 		}
-		if(parent_is_object && object &&
+		if (parent_is_object && object &&
 			(access != MAP_MEM_NOOP) && 
 			(!(object->nophyscache))) {
-			if(object->wimg_bits != wimg_mode) {
-			   vm_page_t p;
-			   if ((wimg_mode == VM_WIMG_IO)
-				|| (wimg_mode == VM_WIMG_WCOMB))
-				cache_attr = TRUE;
-			   else 
-				cache_attr = FALSE;
-			   vm_object_lock(object);
-			   vm_object_paging_wait(object, THREAD_UNINT);
-		           object->wimg_bits = wimg_mode;
-			   queue_iterate(&object->memq, 
-						p, vm_page_t, listq) {
-				if (!p->fictitious) {
-				        if (p->pmapped)
-					        pmap_disconnect(p->phys_page);
-					if (cache_attr)
-					        pmap_sync_page_attributes_phys(p->phys_page);
-				}
-			   }
-			   vm_object_unlock(object);
+
+			if (object->wimg_bits != wimg_mode) {
+				vm_object_lock(object);
+				vm_object_change_wimg_mode(object, wimg_mode);
+				vm_object_unlock(object);
 			}
 		}
 		if (object_handle)
@@ -1935,7 +1948,7 @@ mach_make_memory_entry_64(
 		if (access == MAP_MEM_IO) {
 			wimg_mode = VM_WIMG_IO;
 		} else if (access == MAP_MEM_COPYBACK) {
-			wimg_mode = VM_WIMG_DEFAULT;
+			wimg_mode = VM_WIMG_USE_DEFAULT;
 		} else if (access == MAP_MEM_WTHRU) {
 			wimg_mode = VM_WIMG_WTHRU;
 		} else if (access == MAP_MEM_WCOMB) {
@@ -1985,6 +1998,7 @@ mach_make_memory_entry_64(
 		}
 
 redo_lookup:
+		protections = original_protections;
 		vm_map_lock_read(target_map);
 
 		/* get the object associated with the target address */
@@ -1992,14 +2006,23 @@ redo_lookup:
 		/* that requested by the caller */
 
 		kr = vm_map_lookup_locked(&target_map, map_offset, 
-			        protections, OBJECT_LOCK_EXCLUSIVE, &version,
-				&object, &obj_off, &prot, &wired,
-				&fault_info,
-				&real_map);
+					  protections | mask_protections,
+					  OBJECT_LOCK_EXCLUSIVE, &version,
+					  &object, &obj_off, &prot, &wired,
+					  &fault_info,
+					  &real_map);
 		if (kr != KERN_SUCCESS) {
 			vm_map_unlock_read(target_map);
 			goto make_mem_done;
 		}
+		if (mask_protections) {
+			/*
+			 * The caller asked us to use the "protections" as
+			 * a mask, so restrict "protections" to what this
+			 * mapping actually allows.
+			 */
+			protections &= prot;
+		}
 		if (((prot & protections) != protections) 
 					|| (object == kernel_object)) {
 			kr = KERN_INVALID_RIGHT;
@@ -2085,6 +2108,14 @@ redo_lookup:
 			 /* JMM - The check below should be reworked instead. */
 			 object->true_share = TRUE;
 		      }
+		if (mask_protections) {
+			/*
+			 * The caller asked us to use the "protections" as
+			 * a mask, so restrict "protections" to what this
+			 * mapping actually allows.
+			 */
+			protections &= map_entry->max_protection;
+		}
 		if(((map_entry->max_protection) & protections) != protections) {
 			 kr = KERN_INVALID_RIGHT;
                          vm_object_unlock(object);
@@ -2113,6 +2144,16 @@ redo_lookup:
 					   next_entry->vme_prev->offset + 
 					   (next_entry->vme_prev->vme_end - 
 				 	   next_entry->vme_prev->vme_start))) {
+					if (mask_protections) {
+						/*
+						 * The caller asked us to use
+						 * the "protections" as a mask,
+						 * so restrict "protections" to
+						 * what this mapping actually
+						 * allows.
+						 */
+						protections &= next_entry->max_protection;
+					}
 					if(((next_entry->max_protection) 
 						& protections) != protections) {
 			 			break;
@@ -2140,7 +2181,7 @@ redo_lookup:
 			/* under us.  */
 
 	      		if ((map_entry->needs_copy  || object->shadowed ||
-			     (object->size > total_size))
+			     (object->vo_size > total_size))
 					&& !object->true_share) {
 				/*
 				 * We have to unlock the VM object before
@@ -2177,7 +2218,7 @@ redo_lookup:
 				 
 		   		/* create a shadow object */
 				vm_object_shadow(&map_entry->object.vm_object,
-						&map_entry->offset, total_size);
+						 &map_entry->offset, total_size);
 				shadow_object = map_entry->object.vm_object;
 				vm_object_unlock(object);
 
@@ -2275,28 +2316,8 @@ redo_lookup:
 		if(real_map != target_map)
 			vm_map_unlock_read(real_map);
 
-		if(object->wimg_bits != wimg_mode) {
-			vm_page_t p;
-
-			vm_object_paging_wait(object, THREAD_UNINT);
-
-			if ((wimg_mode == VM_WIMG_IO)
-			    || (wimg_mode == VM_WIMG_WCOMB))
-				cache_attr = TRUE;
-			else 
-				cache_attr = FALSE;
-
-			queue_iterate(&object->memq, 
-						p, vm_page_t, listq) {
-				if (!p->fictitious) {
-				        if (p->pmapped)
-					        pmap_disconnect(p->phys_page);
-					if (cache_attr)
-					        pmap_sync_page_attributes_phys(p->phys_page);
-				}
-			}
-			object->wimg_bits = wimg_mode;
-		}
+		if (object->wimg_bits != wimg_mode)
+			vm_object_change_wimg_mode(object, wimg_mode);
 
 		/* the size of mapped entry that overlaps with our region */
 		/* which is targeted for share.                           */
@@ -2353,7 +2374,8 @@ redo_lookup:
 		user_entry->is_sub_map = FALSE;
 		user_entry->is_pager = FALSE;
 		user_entry->offset = obj_off;
-		user_entry->protection = permission;
+		user_entry->protection = protections;
+		SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection);
 		user_entry->size = map_size;
 
 		/* user_object pager and internal fields are not used */
@@ -2375,6 +2397,14 @@ redo_lookup:
 			goto make_mem_done;
 		}
 
+		if (mask_protections) {
+			/*
+			 * The caller asked us to use the "protections" as
+			 * a mask, so restrict "protections" to what this
+			 * mapping actually allows.
+			 */
+			protections &= parent_entry->protection;
+		}
 		if((protections & parent_entry->protection) != protections) {
 			kr = KERN_PROTECTION_FAILURE;
 			goto make_mem_done;
@@ -2654,7 +2684,7 @@ mach_memory_entry_purgable_control(
 	vm_object_lock(object);
 
 	/* check that named entry covers entire object ? */
-	if (mem_entry->offset != 0 || object->size != mem_entry->size) {
+	if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
 		vm_object_unlock(object);
 		named_entry_unlock(mem_entry);
 		return KERN_INVALID_ARGUMENT;
@@ -3058,7 +3088,7 @@ vm_map_get_phys_page(
 			/* If they are not present in the object they will  */
 			/* have to be picked up from the pager through the  */
 			/* fault mechanism.  */
-			if(entry->object.vm_object->shadow_offset == 0) {
+			if(entry->object.vm_object->vo_shadow_offset == 0) {
 				/* need to call vm_fault */
 				vm_map_unlock(map);
 				vm_fault(map, map_offset, VM_PROT_NONE, 
@@ -3068,7 +3098,7 @@ vm_map_get_phys_page(
 			}
 			offset = entry->offset + (map_offset - entry->vme_start);
 			phys_page = (ppnum_t)
-				((entry->object.vm_object->shadow_offset 
+				((entry->object.vm_object->vo_shadow_offset 
 							+ offset) >> 12);
 			break;
 			
@@ -3083,7 +3113,7 @@ vm_map_get_phys_page(
 					vm_object_t old_object;
 					vm_object_lock(object->shadow);
 					old_object = object;
-					offset = offset + object->shadow_offset;
+					offset = offset + object->vo_shadow_offset;
 					object = object->shadow;
 					vm_object_unlock(old_object);
 				} else {
diff --git a/osfmk/x86_64/bzero.s b/osfmk/x86_64/bzero.s
index cb2426300..fcfdf7245 100644
--- a/osfmk/x86_64/bzero.s
+++ b/osfmk/x86_64/bzero.s
@@ -88,7 +88,7 @@ ENTRY(memset)
  * void bzero(char * addr, size_t length)
  */
 Entry(blkclr)
-ENTRY(bzero)
+ENTRY2(bzero,__bzero)
 	movq	%rsi,%rcx
 	xorq	%rax,%rax
 	shrq	$3,%rcx
diff --git a/osfmk/x86_64/copyio.c b/osfmk/x86_64/copyio.c
new file mode 100644
index 000000000..1adb732e5
--- /dev/null
+++ b/osfmk/x86_64/copyio.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <mach_assert.h>
+
+#include <sys/errno.h>
+#include <i386/param.h>
+#include <i386/misc_protos.h>
+#include <i386/cpu_data.h>
+#include <i386/machine_routines.h>
+#include <i386/cpuid.h>
+#include <i386/vmx.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_fault.h>
+
+#include <sys/kdebug.h>
+
+static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
+static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
+
+/*
+ * The copy engine has the following characteristics
+ *   - copyio() handles copies to/from user or kernel space
+ *   - copypv() deals with physical or virtual addresses
+ *
+ * Readers familiar with the 32-bit kernel will expect Joe's thesis at this
+ * point describing the full glory of the copy window implementation. In K64,
+ * however, there is no need for windowing. Thanks to the vast shared address
+ * space, the kernel has direct access to userspace and to physical memory.
+ *
+ * User virtual addresses are accessible provided the user's cr3 is loaded.
+ * Physical addresses are accessible via the direct map and the PHYSMAP_PTOV()
+ * translation.
+ *
+ * Copyin/out variants all boil done to just these 2 routines in locore.s which
+ * provide fault-recoverable copying:
+ */
+extern int _bcopy(const void *, void *, vm_size_t);
+extern int _bcopystr(const void *, void *, vm_size_t, vm_size_t *);
+
+
+/*
+ * Types of copies:
+ */
+#define COPYIN		0	/* from user virtual to kernel virtual */
+#define COPYOUT		1	/* from kernel virtual to user virtual */
+#define COPYINSTR	2	/* string variant of copyout */
+#define COPYINPHYS	3	/* from user virtual to kernel physical */
+#define COPYOUTPHYS	4	/* from kernel physical to user virtual */
+
+static int
+copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
+       vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
+{
+        thread_t	thread;
+	pmap_t		pmap;
+	vm_size_t	bytes_copied;
+	int		error = 0;
+	boolean_t	istate = FALSE;
+	boolean_t	recursive_CopyIOActive;
+#if KDEBUG
+	int		debug_type = 0xeff70010;
+	debug_type += (copy_type << 2);
+#endif
+
+	thread = current_thread();
+
+	KERNEL_DEBUG(debug_type | DBG_FUNC_START,
+		     (unsigned)(user_addr >> 32), (unsigned)user_addr,
+		     nbytes, thread->machine.copyio_state, 0);
+
+	if (nbytes == 0)
+		goto out;
+
+        pmap = thread->map->pmap;
+
+	if ((copy_type != COPYINPHYS) && (copy_type != COPYOUTPHYS) && ((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS)) {
+		panic("Invalid copy parameter, copy type: %d, kernel address: %p", copy_type, kernel_addr);
+	}
+
+	/* Sanity and security check for addresses to/from a user */
+
+	if (((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
+	    ((nbytes && (user_addr+nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map)))) {
+		error = EFAULT;
+		goto out;
+	}
+
+	/*
+	 * If the no_shared_cr3 boot-arg is set (true), the kernel runs on 
+	 * its own pmap and cr3 rather than the user's -- so that wild accesses
+	 * from kernel or kexts can be trapped. So, during copyin and copyout,
+	 * we need to switch back to the user's map/cr3. The thread is flagged
+	 * "CopyIOActive" at this time so that if the thread is pre-empted,
+	 * we will later restore the correct cr3.
+	 */
+	recursive_CopyIOActive = thread->machine.specFlags & CopyIOActive;
+	thread->machine.specFlags |= CopyIOActive;
+	if (no_shared_cr3) {
+		istate = ml_set_interrupts_enabled(FALSE);
+ 		if (get_cr3_base() != pmap->pm_cr3)
+			set_cr3_raw(pmap->pm_cr3);
+	}
+
+	/*
+	 * Ensure that we're running on the target thread's cr3.
+	 */
+	if ((pmap != kernel_pmap) && !use_kernel_map &&
+	    (get_cr3_base() != pmap->pm_cr3)) {
+		panic("copyio(%d,%p,%p,%ld,%p,%d) cr3 is %p expects %p",
+			copy_type, (void *)user_addr, kernel_addr, nbytes, lencopied, use_kernel_map,
+			(void *) get_cr3_raw(), (void *) pmap->pm_cr3);
+	}
+	if (no_shared_cr3)
+		(void) ml_set_interrupts_enabled(istate);
+
+	KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_addr,
+		     (unsigned)kernel_addr, nbytes, 0, 0);
+
+        switch (copy_type) {
+
+	case COPYIN:
+	        error = _bcopy((const void *) user_addr,
+				kernel_addr,
+				nbytes);
+		break;
+			
+	case COPYOUT:
+	        error = _bcopy(kernel_addr,
+				(void *) user_addr,
+				nbytes);
+		break;
+
+	case COPYINPHYS:
+	        error = _bcopy((const void *) user_addr,
+				PHYSMAP_PTOV(kernel_addr),
+				nbytes);
+		break;
+
+	case COPYOUTPHYS:
+	        error = _bcopy((const void *) PHYSMAP_PTOV(kernel_addr),
+				(void *) user_addr,
+				nbytes);
+		break;
+
+	case COPYINSTR:
+	        error = _bcopystr((const void *) user_addr,
+				kernel_addr,
+				(int) nbytes,
+				&bytes_copied);
+
+		/*
+		 * lencopied should be updated on success
+		 * or ENAMETOOLONG...  but not EFAULT
+		 */
+		if (error != EFAULT)
+		        *lencopied = bytes_copied;
+
+		if (error) {
+#if KDEBUG
+		        nbytes = *lencopied;
+#endif
+		        break;
+		}
+		if (*(kernel_addr + bytes_copied - 1) == 0) {
+		        /*
+			 * we found a NULL terminator... we're done
+			 */
+#if KDEBUG
+		        nbytes = *lencopied;
+#endif
+			break;
+		} else {
+		        /*
+			 * no more room in the buffer and we haven't
+			 * yet come across a NULL terminator
+			 */
+#if KDEBUG
+		        nbytes = *lencopied;
+#endif
+		        error = ENAMETOOLONG;
+			break;
+		}
+		break;
+	}
+
+	if (!recursive_CopyIOActive)
+		thread->machine.specFlags &= ~CopyIOActive;
+	if (no_shared_cr3) {
+		istate = ml_set_interrupts_enabled(FALSE);
+		if  (get_cr3_raw() != kernel_pmap->pm_cr3)
+			set_cr3_raw(kernel_pmap->pm_cr3);
+		(void) ml_set_interrupts_enabled(istate);
+	}
+
+out:
+	KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
+		     (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
+
+	return (error);
+}
+
+
+static int
+copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
+{
+        char	    *paddr;
+	user_addr_t vaddr;
+	int         ctype;
+
+	if (which & cppvPsnk) {
+		paddr  = (char *)sink;
+	        vaddr  = (user_addr_t)source;
+		ctype  = COPYINPHYS;
+	} else {
+	        paddr  = (char *)source;
+		vaddr  = (user_addr_t)sink;
+		ctype  = COPYOUTPHYS;
+	}
+	return copyio(ctype, vaddr, paddr, csize, NULL, which & cppvKmap);
+}
+
+int
+copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes)
+{
+    return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
+}    
+
+int
+copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
+{
+    return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
+}
+
+int
+copyinstr(const user_addr_t user_addr,  char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
+{
+    *lencopied = 0;
+
+    return copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0);
+}
+
+int
+copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes)
+{
+    return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
+}
+
+int
+copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
+{
+    return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
+}
+
+
+kern_return_t
+copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
+{
+	unsigned int lop, csize;
+	int bothphys = 0;
+	
+	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
+		     (unsigned)snk64, size, which, 0);
+
+	if ((which & (cppvPsrc | cppvPsnk)) == 0 )				/* Make sure that only one is virtual */
+		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */
+
+	if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
+	        bothphys = 1;							/* both are physical */
+
+	while (size) {
+	  
+	        if (bothphys) {
+		        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));		/* Assume sink smallest */
+
+			if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
+			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));	/* No, source is smaller */
+		} else {
+		        /*
+			 * only need to compute the resid for the physical page
+			 * address... we don't care about where we start/finish in
+			 * the virtual since we just call the normal copyin/copyout
+			 */
+		        if (which & cppvPsrc)
+			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
+			else
+			        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
+		}
+		csize = size;						/* Assume we can copy it all */
+		if (lop < size)
+		        csize = lop;					/* Nope, we can't do it all */
+#if 0		
+		/*
+		 * flush_dcache64 is currently a nop on the i386... 
+		 * it's used when copying to non-system memory such
+		 * as video capture cards... on PPC there was a need
+		 * to flush due to how we mapped this memory... not
+		 * sure if it's needed on i386.
+		 */
+		if (which & cppvFsrc)
+		        flush_dcache64(src64, csize, 1);		/* If requested, flush source before move */
+		if (which & cppvFsnk)
+		        flush_dcache64(snk64, csize, 1);		/* If requested, flush sink before move */
+#endif
+		if (bothphys)
+		        bcopy_phys(src64, snk64, csize);		/* Do a physical copy, virtually */
+		else {
+		        if (copyio_phys(src64, snk64, csize, which))
+			        return (KERN_FAILURE);
+		}
+#if 0
+		if (which & cppvFsrc)
+		        flush_dcache64(src64, csize, 1);	/* If requested, flush source after move */
+		if (which & cppvFsnk)
+		        flush_dcache64(snk64, csize, 1);	/* If requested, flush sink after move */
+#endif
+		size   -= csize;					/* Calculate what is left */
+		snk64 += csize;					/* Bump sink to next physical address */
+		src64 += csize;					/* Bump source to next physical address */
+	}
+	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
+		     (unsigned)snk64, size, which, 0);
+
+	return KERN_SUCCESS;
+}
diff --git a/osfmk/x86_64/cswitch.s b/osfmk/x86_64/cswitch.s
index 6abb9a22c..c0f3715c4 100644
--- a/osfmk/x86_64/cswitch.s
+++ b/osfmk/x86_64/cswitch.s
@@ -60,19 +60,12 @@
 
 #include <i386/asm.h>
 #include <i386/proc_reg.h>
-#include <assym.s>
-
-#ifdef	SYMMETRY
-#include <sqt/asm_macros.h>
-#endif
-
-#if	AT386
 #include <i386/mp.h>
-#endif	/* AT386 */
+#include <assym.s>
 
 Entry(Load_context)
 	movq	TH_KERNEL_STACK(%rdi),%rcx	/* get kernel stack */
-	leaq	-IKS_SIZE-IEL_SIZE(%rcx),%rdx
+	leaq	-IKS_SIZE(%rcx),%rdx
 	addq	EXT(kernel_stack_size)(%rip),%rdx /* point to stack top */
 	movq	%rcx,%gs:CPU_ACTIVE_STACK	/* store stack address */
 	movq	%rdx,%gs:CPU_KERNEL_STACK	/* store stack top */
@@ -110,7 +103,7 @@ Entry(Switch_context)
 	/* new thread in %rdx */
 	movq    %rdx,%gs:CPU_ACTIVE_THREAD      /* new thread is active */
 	movq	TH_KERNEL_STACK(%rdx),%rdx	/* get its kernel stack */
-	lea	-IKS_SIZE-IEL_SIZE(%rdx),%rcx
+	lea	-IKS_SIZE(%rdx),%rcx
 	add	EXT(kernel_stack_size)(%rip),%rcx /* point to stack top */
 
 	movq	%rdx,%gs:CPU_ACTIVE_STACK	/* set current stack */
diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s
index a4ca1cecd..fe6cb1295 100644
--- a/osfmk/x86_64/idt64.s
+++ b/osfmk/x86_64/idt64.s
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -29,7 +29,7 @@
 #include <assym.s>
 #include <mach_kdb.h>
 #include <i386/eflags.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_asm.h>
 #include <i386/trap.h>
 #define _ARCH_I386_ASM_HELP_H_	/* Prevent inclusion of user header */
 #include <mach/i386/syscall_sw.h>
@@ -81,149 +81,6 @@
 #define	HNDL_DOUBLE_FAULT	EXT(hndl_double_fault)
 #define	HNDL_MACHINE_CHECK	EXT(hndl_machine_check)
 
-/*
- * Nanosecond timing.
- */
-
-/*
- * Nanotime returned in %rax.
- * Computed from tsc based on the scale factor and an implicit 32 bit shift.
- * This code must match what _rtc_nanotime_read does in
- * machine_routines_asm.s.  Failure to do so can
- * result in "weird" timing results.
- *
- * Uses: %rsi, %rdi, %rdx, %rcx
- */
-#define NANOTIME							  \
-	movq	%gs:CPU_NANOTIME,%rdi					; \
-	RTC_NANOTIME_READ_FAST()
-
-/*
- * Add 64-bit delta in register reg to timer pointed to by register treg.
- */
-#define TIMER_UPDATE(treg,reg,offset)						  \
-	addq	reg,(offset)+TIMER_ALL(treg)		/* add timer */
-
-/*
- * Add time delta to old timer and start new.
- * Uses: %rsi, %rdi, %rdx, %rcx, %rax
- */
-#define TIMER_EVENT(old,new)						  \
-	NANOTIME				/* %rax := nanosecs */	; \
-	movq	%rax,%rsi			/* save timestamp */	; \
-	movq	%gs:CPU_ACTIVE_THREAD,%rcx		/* get thread */	; \
-	subq	(old##_TIMER)+TIMER_TSTAMP(%rcx),%rax	/* compute elapsed */	; \
-	TIMER_UPDATE(%rcx,%rax,old##_TIMER)	/* update timer */	; \
-	leaq	(new##_TIMER)(%rcx),%rcx	/* point to new timer */; \
-	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	; \
-	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	; \
-	movq	%rcx,THREAD_TIMER(%rdx)		/* set current timer */	; \
-	movq	%rsi,%rax			/* restore timestamp */	; \
-	subq	(old##_STATE)+TIMER_TSTAMP(%rdx),%rax	/* compute elapsed */	; \
-	TIMER_UPDATE(%rdx,%rax,old##_STATE)	/* update timer */	; \
-	leaq	(new##_STATE)(%rdx),%rcx 	/* point to new state */; \
-	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	; \
-	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
-
-/*
- * Update time on user trap entry.
- * Uses: %rsi, %rdi, %rdx, %rcx, %rax
- */
-#define	TIME_TRAP_UENTRY	TIMER_EVENT(USER,SYSTEM)
-
-/*
- * update time on user trap exit.
- * Uses: %rsi, %rdi, %rdx, %rcx, %rax
- */
-#define	TIME_TRAP_UEXIT		TIMER_EVENT(SYSTEM,USER)
-
-/*
- * update time on interrupt entry.
- * Uses: %rsi, %rdi, %rdx, %rcx, %rax
- * Saves processor state info on stack.
- */
-#define	TIME_INT_ENTRY							  \
-	NANOTIME				/* %rax := nanosecs */	; \
-	movq	%rax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	; \
-	movq	%rax,%rsi			/* save timestamp */	; \
-	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	; \
-	movq 	THREAD_TIMER(%rdx),%rcx		/* get current timer */	; \
-	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	; \
-	TIMER_UPDATE(%rcx,%rax,0)			/* update timer */	; \
-	movq	KERNEL_TIMER(%rdx),%rcx		/* get kernel timer */	; \
-	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	; \
-	movq	%rsi,%rax			/* restore timestamp */	; \
-	movq	CURRENT_STATE(%rdx),%rcx	/* get current state */	; \
-	pushq	%rcx				/* save state */	; \
-	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	; \
-	TIMER_UPDATE(%rcx,%rax,0)			/* update timer */	; \
-	leaq	IDLE_STATE(%rdx),%rax		/* get idle state */	; \
-	cmpq	%rax,%rcx			/* compare current */	; \
-	je	0f				/* skip if equal */	; \
-	leaq	SYSTEM_STATE(%rdx),%rcx		/* get system state */	; \
-	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	; \
-0:	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
-
-/*
- * update time on interrupt exit.
- * Uses: %rsi, %rdi, %rdx, %rcx, %rax
- * Restores processor state info from stack.
- */
-#define	TIME_INT_EXIT							  \
-	NANOTIME				/* %rax := nanosecs */	; \
-	movq	%rax,%gs:CPU_INT_EVENT_TIME	/* save in cpu data */	; \
-	movq	%rax,%rsi			/* save timestamp */	; \
-	movq	%gs:CPU_PROCESSOR,%rdx		/* get processor */	; \
-	movq	KERNEL_TIMER(%rdx),%rcx		/* get kernel timer */	; \
-	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	; \
-	TIMER_UPDATE(%rcx,%rax,0)			/* update timer */	; \
-	movq	THREAD_TIMER(%rdx),%rcx		/* interrupted timer */	; \
-	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */	; \
-	movq	%rsi,%rax			/* restore timestamp */	; \
-	movq	CURRENT_STATE(%rdx),%rcx	/* get current state */	; \
-	subq	TIMER_TSTAMP(%rcx),%rax		/* compute elapsed */	; \
-	TIMER_UPDATE(%rcx,%rax,0)			/* update timer */	; \
-	popq	%rcx				/* restore state */	; \
-	movq	%rcx,CURRENT_STATE(%rdx)	/* set current state */	; \
-	movq	%rsi,TIMER_TSTAMP(%rcx)		/* set timestamp */
-
-/*
- * Check for vtimers for task.
- *   task_reg   is register pointing to current task
- *   thread_reg is register pointing to current thread
- */
-#define TASK_VTIMER_CHECK(task_reg,thread_reg)				  \
-	cmpl	$0,TASK_VTIMERS(task_reg)				; \
-	jz	1f							; \
-	orl	$(AST_BSD),%gs:CPU_PENDING_AST	/* Set pending AST */	; \
-	lock								; \
-	orl	$(AST_BSD),ACT_AST(thread_reg)	/* Set thread AST  */	; \
-1:									; \
-
-
-/*
- * Macros for calling into C functions.
- * The stack is 16-byte aligned by masking.
- */
-#define CCALL(fn)				 \
-	mov	%rsp, %r12			;\
-	and	$0xFFFFFFFFFFFFFFF0, %rsp	;\
-	call	EXT(fn)				;\
-	mov	%r12, %rsp
-
-#define CCALL1(fn, arg1) 			 \
-	mov	arg1, %rdi 			;\
-	CCALL(fn)
-
-#define CCALL2(fn, arg1, arg2)		 	 \
-	mov	arg1, %rdi 			;\
-	CCALL(fn)
-
-#define CCALL3(fn, arg1, arg2, arg3) 		 \
-	mov	arg1, %rdi 			;\
-	mov	arg2, %rsi 			;\
-	mov	arg3, %rdx 			;\
-	CCALL(fn)
 
 #if 1
 #define PUSH_FUNCTION(func) 			 \
@@ -287,11 +144,27 @@
  * Determine what mode has been interrupted and save state accordingly.
  */
 L_dispatch:
-	cmpq	$(KERNEL64_CS), ISF64_CS(%rsp)
+	cmpl	$(KERNEL64_CS), ISF64_CS(%rsp)
 	je	L_64bit_dispatch
 
 	swapgs
 
+	/*
+	 * Check for trap from EFI32, and restore cr3 and rsp if so.
+	 * A trap from EFI32 is fatal.
+	 */
+	cmpl	$(KERNEL32_CS), ISF64_CS(%rsp)
+	jne	L_dispatch_continue
+	push	%rcx
+	mov	EXT(pal_efi_saved_cr3)(%rip), %rcx
+	mov	%rcx, %cr3
+	leaq	0(%rip), %rcx
+	shr	$32, %rcx		/* splice the upper 32-bits of rip */
+	shl	$32, %rsp		/* .. and the lower 32-bits of rsp */
+	shrd	$32, %rcx, %rsp		/* to recover the full 64-bits of rsp */
+	pop	%rcx
+
+L_dispatch_continue:
 	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
 	je	L_32bit_dispatch	/* 32-bit user task */
 	/* fall through to 64bit user dispatch */
@@ -303,6 +176,8 @@ L_64bit_dispatch:
 	subq	$(ISS64_OFFSET), %rsp
 	movl	$(SS_64), SS_FLAVOR(%rsp)
 
+	cld
+	
 	/*
 	 * Save segment regs - for completeness since theyre not used.
 	 */
@@ -361,6 +236,7 @@ L_32bit_dispatch: /* 32-bit user task */
 	subq	$(ISC32_OFFSET), %rsp
 	movl	$(SS_32), SS_FLAVOR(%rsp)
 
+	cld
 	/*
 	 * Save segment regs
 	 */
@@ -426,22 +302,32 @@ L_common_dispatch:
 	je	1f
 	mov	%rcx, %cr3			/* load kernel cr3 */
 	jmp	2f				/* and skip tlb flush test */
-1:	
-	cmpl	$0, %gs:CPU_TLB_INVALID		/* flush needed? */
-	je	2f				/* - no */
-	movl	$0, %gs:CPU_TLB_INVALID 
-	mov	%cr3, %rcx
+1:
+	mov	%gs:CPU_ACTIVE_CR3+4, %rcx
+	shr	$32, %rcx
+	testl	%ecx, %ecx
+	jz	2f
+	movl	$0, %gs:CPU_TLB_INVALID
+	testl	$(1<<16), %ecx			/* Global? */
+	jz	11f
+	mov	%cr4, %rcx	/* RMWW CR4, for lack of an alternative*/
+	and	$(~CR4_PGE), %rcx
+	mov	%rcx, %cr4
+	or	$(CR4_PGE), %rcx
+	mov	%rcx, %cr4
+	jmp	2f
+
+11:	mov	%cr3, %rcx
 	mov	%rcx, %cr3
 2:
 	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Get the active thread */
-	cmpq	$0, ACT_PCB_IDS(%rcx)	/* Is there a debug register state? */
+	cmpq	$0, TH_PCB_IDS(%rcx)	/* Is there a debug register state? */
 	je	3f
 	mov	$0, %rcx		/* If so, reset DR7 (the control) */
 	mov	%rcx, %dr7
 3:
-	addl	$1,%gs:hwIntCnt(,%ebx,4)	// Bump the trap/intr count
+	incl	%gs:hwIntCnt(,%ebx,4)		// Bump the trap/intr count
 	/* Dispatch the designated handler */
-	mov	%rsp, %rdi		/* rsp points to saved state */
 	jmp	*%rdx
 
 /*
@@ -453,7 +339,7 @@ Entry(return_to_user)
 Entry(ret_to_user)
 // XXX 'Be nice to tidy up this debug register restore sequence...
 	mov	%gs:CPU_ACTIVE_THREAD, %rdx
-	movq	ACT_PCB_IDS(%rdx),%rax	/* Obtain this thread's debug state */
+	movq	TH_PCB_IDS(%rdx),%rax	/* Obtain this thread's debug state */
 	
 	cmpq	$0,%rax			/* Is there a debug register context? */
 	je	2f 			/* branch if not */
@@ -486,21 +372,15 @@ Entry(ret_to_user)
 	 * On exiting the kernel there's no need to switch cr3 since we're
 	 * already running in the user's address space which includes the
 	 * kernel. Nevertheless, we now mark the task's cr3 as active.
-	 * However, there may be a defered tlb flush to deal with.
-	 * This is a case where another cpu modified this task's address 
-	 * space while this thread was in the kernel.
 	 * But, if no_shared_cr3 is set, we do need to switch cr3 at this point.
 	 */
 	mov	%gs:CPU_TASK_CR3, %rcx
 	mov	%rcx, %gs:CPU_ACTIVE_CR3
-	movl	%gs:CPU_TLB_INVALID, %eax
-	orl	EXT(no_shared_cr3)(%rip), %eax
-	test	%eax, %eax		/* -no_shered_cr3 or flush required? */
+	movl	EXT(no_shared_cr3)(%rip), %eax
+	test	%eax, %eax		/* -no_shared_cr3 */
 	jz	3f
-	movl	$0, %gs:CPU_TLB_INVALID
 	mov	%rcx, %cr3
 3:
-
 	mov	%gs:CPU_DR7, %rax	/* Is there a debug control register?*/
 	cmp	$0, %rax
 	je	4f
@@ -586,7 +466,7 @@ ret_to_kernel:
 	CCALL1(panic_idt64, %rsp)
 	hlt
 1:
-	cmpq	$(KERNEL64_CS), R64_CS(%rsp)
+	cmpl	$(KERNEL64_CS), R64_CS(%rsp)
 	je	2f
 	CCALL1(panic_idt64, %rsp)
 	hlt
@@ -653,7 +533,6 @@ L_sysret:
 #endif
 Entry(idt64_unix_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
-L_unix_scall_continue:
 	pushq	%rax			/* save system call number */
 	PUSH_FUNCTION(HNDL_UNIX_SCALL)
 	pushq	$(UNIX_INT)
@@ -662,7 +541,6 @@ L_unix_scall_continue:
 	
 Entry(idt64_mach_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
-L_mach_scall_continue:
 	pushq	%rax			/* save system call number */
 	PUSH_FUNCTION(HNDL_MACH_SCALL)
 	pushq	$(MACH_INT)
@@ -671,7 +549,6 @@ L_mach_scall_continue:
 	
 Entry(idt64_mdep_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
-L_mdep_scall_continue:
 	pushq	%rax			/* save system call number */
 	PUSH_FUNCTION(HNDL_MDEP_SCALL)
 	pushq	$(MACHDEP_INT)
@@ -680,7 +557,6 @@ L_mdep_scall_continue:
 	
 Entry(idt64_diag_scall)
 	swapgs				/* switch to kernel gs (cpu_data) */
-L_diag_scall_continue:
 	push	%rax			/* save system call number */
 	PUSH_FUNCTION(HNDL_DIAG_SCALL)
 	pushq	$(DIAG_INT)
@@ -688,8 +564,8 @@ L_diag_scall_continue:
 
 Entry(hi64_syscall)
 Entry(idt64_syscall)
-	swapgs				/* Kapow! get per-cpu data area */
 L_syscall_continue:
+	swapgs				/* Kapow! get per-cpu data area */
 	mov	%rsp, %gs:CPU_UBER_TMP	/* save user stack */
 	mov	%gs:CPU_UBER_ISF, %rsp	/* switch stack to pcb */
 
@@ -729,9 +605,15 @@ Entry(idt64_sysenter)
 	push	$(USER_DS)		/* ss */
 	push	%rcx			/* uesp */
 	pushf				/* flags */
+	/*
+	 * Clear, among others, the Nested Task (NT) flags bit;
+	 * this is zeroed by INT, but not by SYSENTER.
+	 */
+	push	$0
+	popf
 	push	$(SYSENTER_CS)		/* cs */ 
-	swapgs				/* switch to kernel gs (cpu_data) */
 L_sysenter_continue:
+	swapgs				/* switch to kernel gs (cpu_data) */
 	push	%rdx			/* eip */
 	push	%rax			/* err/eax - syscall code */
 	PUSH_FUNCTION(HNDL_SYSENTER)
@@ -742,16 +624,19 @@ L_sysenter_continue:
 
 Entry(idt64_page_fault)
 	PUSH_FUNCTION(HNDL_ALLTRAPS)
-	push	%rax			/* save %rax temporarily in trap slot */
+	push	$(T_PAGE_FAULT)
+	push	%rax			/* save %rax temporarily */
 	leaq	EXT(idt64_unix_scall_copy_args)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
-	jne	1f
-	add	$(ISF64_SIZE), %rsp	/* remove entire intr stack frame */
-	jmp	L_copy_args_continue	/* continue system call entry */
+	cmp	%rax, 8+ISF64_RIP(%rsp) /* fault during copy args? */
+	je	1f			/* - yes, handle copy arg fault */
+	testb	$3, 8+ISF64_CS(%rsp)	/* was trap from kernel? */
+	jz	L_kernel_trap		/* - yes, handle with care */
+	pop	%rax			/* restore %rax, swapgs, and continue */
+	swapgs
+	jmp	L_dispatch_continue
 1:
-	mov	(%rsp), %rax		/* restore %rax from trap slot */
-	movq	$(T_PAGE_FAULT), (%rsp)	/* set trap code */
-	jne	L_dispatch
+	add	$(8+ISF64_SIZE), %rsp	/* remove entire intr stack frame */
+	jmp	L_copy_args_continue	/* continue system call entry */
 
 
 /*
@@ -773,50 +658,23 @@ Entry(idt64_debug)
 	 */
 
 	push	%rax			/* save %rax temporarily */
-
-	leaq	EXT(idt64_mach_scall)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
-	jne	1f
-	pop	%rax
-	add	$(ISF64_SIZE),%rsp	/* remove entire intr stack frame */
-	jmp	L_mach_scall_continue	/* continue system call entry */
-1:
-	leaq	EXT(idt64_mdep_scall)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
-	jne	2f
-	pop	%rax
-	add	$(ISF64_SIZE),%rsp	/* remove entire intr stack frame */
-	jmp	L_mdep_scall_continue	/* continue system call entry */
-2:
-	leaq	EXT(idt64_unix_scall)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
-	jne	3f
-	pop	%rax
-	add	$(ISF64_SIZE),%rsp	/* remove entire intr stack frame */
-	jmp	L_unix_scall_continue	/* continue system call entry */
-3:
 	lea	EXT(idt64_sysenter)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
-	je	4f
-	pop	%rax
-	jmp	L_dispatch
-4:
+	cmp	%rax, ISF64_RIP+8(%rsp)
 	pop	%rax
+	jne	L_dispatch
 	/*
 	 * Interrupt stack frame has been pushed on the temporary stack.
-	 * We have to switch to pcb stack and copy eflags.
+	 * We have to switch to pcb stack and patch up the saved state.
 	 */ 
-	add	$40,%rsp		/* remove trapno/trapfn/err/rip/cs */
-	push	%rcx			/* save %rcx - user stack pointer */
-	mov	40(%rsp),%rcx		/* top of intr stack -> pcb stack */
+	mov	%rcx, ISF64_ERR(%rsp)	/* save %rcx in error slot */
+	mov	ISF64_SS+8(%rsp), %rcx	/* top of temp stack -> pcb stack */
 	xchg	%rcx,%rsp		/* switch to pcb stack */
 	push	$(USER_DS)		/* ss */
-	push	(%rcx)			/* saved %rcx into rsp slot */
-	push	8(%rcx)			/* rflags */
-	mov	(%rcx),%rcx		/* restore %rcx */
+	push	ISF64_ERR(%rcx)		/* saved %rcx into rsp slot */
+	push	ISF64_RFLAGS(%rcx)	/* rflags */
 	push	$(SYSENTER_TF_CS)	/* cs - not SYSENTER_CS for iret path */
+	mov	ISF64_ERR(%rcx),%rcx	/* restore %rcx */
 	jmp	L_sysenter_continue	/* continue sysenter entry */
-
 	
 
 Entry(idt64_double_fault)
@@ -825,9 +683,9 @@ Entry(idt64_double_fault)
 
 	push	%rax
 	leaq	EXT(idt64_syscall)(%rip), %rax
-	cmp	%rax, ISF64_RIP(%rsp)
+	cmp	%rax, ISF64_RIP+8(%rsp)
 	pop	%rax
-	jne	L_dispatch
+	jne	L_64bit_dispatch
 
 	mov	ISF64_RSP(%rsp), %rsp
 	jmp	L_syscall_continue
@@ -838,15 +696,15 @@ Entry(idt64_double_fault)
  * Check for a GP/NP fault in the kernel_return
  * sequence; if there, report it as a GP/NP fault on the user's instruction.
  *
- * rsp->     0:	trap function
- *	     8: trap code (NP or GP)
- *	    16:	segment number in error (error code)
- *	    24:	rip
- *	    32:	cs
- *	    40:	rflags 
- *	    48:	rsp
- *	    56:	ss
- *	    64:	old registers (trap is from kernel)
+ * rsp->     0 ISF64_TRAPNO:	trap code (NP or GP)
+ *	     8 ISF64_TRAPFN:	trap function
+ *	    16 ISF64_ERR:	segment number in error (error code)
+ *	    24 ISF64_RIP:	rip
+ *	    32 ISF64_CS:	cs
+ *	    40 ISF64_RFLAGS:	rflags 
+ *	    48 ISF64_RIP:	rsp
+ *	    56 ISF64_SS:	ss
+ *	    64:			old registers (trap is from kernel)
  */
 Entry(idt64_gen_prot)
 	PUSH_FUNCTION(HNDL_ALLTRAPS)
@@ -863,7 +721,7 @@ Entry(idt64_segnp)
 	pushq	$(T_SEGMENT_NOT_PRESENT)
 					/* indicate fault type */
 trap_check_kernel_exit:
-	testb	$3,32(%rsp)
+	testb	$3,ISF64_CS(%rsp)
 	jnz	L_dispatch
 	/*
 	 * trap was from kernel mode,
@@ -872,33 +730,69 @@ trap_check_kernel_exit:
 	push	%rax
 
 	leaq	EXT(ret32_iret)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_fault_iret
 	leaq	EXT(ret64_iret)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_fault_iret
 	leaq	EXT(ret32_set_ds)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
 	leaq	EXT(ret32_set_es)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
 	leaq	EXT(ret32_set_fs)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
 	leaq	EXT(ret32_set_gs)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
 	je	L_32bit_fault_set_seg
 
 	leaq	EXT(idt64_unix_scall_copy_args)(%rip), %rax
-	cmp	%rax, 24+8(%rsp)
-	add	$(ISF64_SIZE)+8, (%rsp)
+	cmp	%rax, 8+ISF64_RIP(%rsp)
+	cmove	8+ISF64_RSP(%rsp), %rsp
 	je	L_copy_args_continue
 
-	pop %rax
-	jmp	L_dispatch
+	/* fall through */
+
+L_kernel_trap:
+	/*
+	 * Here after taking an unexpected trap from kernel mode - perhaps
+	 * while running in the trampolines hereabouts.
+	 * Note: %rax has been pushed on stack.
+	 * Make sure we're not on the PCB stack, if so move to the kernel stack.
+	 * This is likely a fatal condition.
+	 * But first, try to ensure we have the kernel gs base active...
+	 */
+	movq	%gs:CPU_THIS, %rax		/* get gs_base into %rax */
+	test	%rax, %rax			/* test sign bit (MSB) */
+	js	1f				/* -ve kernel addr, no swap */
+	swapgs					/* +ve user addr, swap */
+1:
+	movq	%gs:CPU_UBER_ISF, %rax		/* PCB stack addr */
+	subq	%rsp, %rax
+	cmpq	$(PAGE_SIZE), %rax		/* current stack in PCB? */
+	jb	2f				/*  - yes, deal with it */
+	pop	%rax				/*  - no, restore %rax */
+	jmp	L_64bit_dispatch
+2:
+	/*
+	 *  Here if %rsp is in the PCB
+	 *  Copy the interrupt stack frame from PCB stack to kernel stack
+	 */
+	movq	%gs:CPU_KERNEL_STACK, %rax
+	xchgq	%rax, %rsp
+	pushq	8+ISF64_SS(%rax)
+	pushq	8+ISF64_RSP(%rax)
+	pushq	8+ISF64_RFLAGS(%rax)
+	pushq	8+ISF64_CS(%rax)
+	pushq	8+ISF64_RIP(%rax)
+	pushq	8+ISF64_ERR(%rax)
+	pushq	8+ISF64_TRAPFN(%rax)
+	pushq	8+ISF64_TRAPNO(%rax)
+	movq	(%rax), %rax
+	jmp	L_64bit_dispatch
 
-		
 /*
  * GP/NP fault on IRET: CS or SS is in error.
  * Note that the user ss is originally 16-byte aligned, we'd popped the
@@ -908,32 +802,32 @@ trap_check_kernel_exit:
  *
  * on SP is
  *  (-  rax saved above, which is immediately popped)
- *   0	function
- *   8	trap number
- *  16	errcode
- *  24	rip
- *  32	cs
- *  40	rflags
- *  48	rsp		--> new trapfn
- *  56	ss		--> new trapno
- *  64	pad		--> new errcode
- *  72	user rip
- *  80	user cs
- *  88	user rflags
- *  96	user rsp
- * 104  user ss	(16-byte aligned)
+ *  0  ISF64_TRAPNO:	trap code (NP or GP)
+ *  8  ISF64_TRAPFN:	trap function
+ *  16 ISF64_ERR:	segment number in error (error code)
+ *  24 ISF64_RIP:	rip
+ *  32 ISF64_CS:	cs
+ *  40 ISF64_RFLAGS:	rflags 
+ *  48 ISF64_RSP:	rsp --> new trapno
+ *  56 ISF64_SS:	ss  --> new trapfn
+ *  64			pad --> new errcode
+ *  72			user rip
+ *  80			user cs
+ *  88			user rflags
+ *  96			user rsp
+ * 104 			user ss	(16-byte aligned)
  */
 L_fault_iret:
 	pop	%rax			/* recover saved %rax */
-	mov	%rax, 24(%rsp)		/* save rax (we don`t need saved rip) */
-	mov	0(%rsp), %rax		/* get trap func */
-	mov	%rax, 48(%rsp)		/* put in user trap func */
-	mov	8(%rsp), %rax		/* get trap number */
-	mov	%rax, 56(%rsp)		/* put in user trap number */
-	mov	16(%rsp), %rax		/* get error code */
-	mov	%rax, 64(%rsp)		/* put in user errcode */
-	mov	24(%rsp), %rax		/* restore rax */
-	add	$48,%rsp		/* reset to new trapfn */
+	mov	%rax, ISF64_RIP(%rsp)	/* save rax (we don`t need saved rip) */
+	mov	ISF64_TRAPNO(%rsp), %rax
+	mov	%rax, ISF64_TRAPNO(%rsp)/* put in user trap number */
+	mov	ISF64_TRAPFN(%rsp), %rax
+	mov	%rax, ISF64_SS(%rsp)	/* put in user trap function */
+	mov	ISF64_ERR(%rsp), %rax	/* get error code */
+	mov	%rax, 8+ISF64_SS(%rsp)	/* put in user errcode */
+	mov	ISF64_RIP(%rsp), %rax	/* restore rax */
+	add	$(ISF64_RSP),%rsp	/* reset to new trapfn */
 					/* now treat as fault from user */
 	jmp	L_dispatch
 
@@ -942,13 +836,14 @@ L_fault_iret:
  * on the stack untouched since we haven't yet moved the stack pointer.
  */
 L_32bit_fault_set_seg:
-	pop	%rax			/* recover %rax from stack */
-	mov	0(%rsp), %rax		/* get trap function */
-	mov	8(%rsp), %rcx		/* get trap number */
-	mov	16(%rsp), %rdx		/* get error code */
-	mov	48(%rsp), %rsp		/* reset stack to saved state */
-	mov	%rax,ISC32_TRAPFN(%rsp)
-	mov	%rcx,ISC32_TRAPNO(%rsp)
+	swapgs
+	pop	%rax			/* toss saved %rax from stack */
+	mov	ISF64_TRAPNO(%rsp), %rax
+	mov	ISF64_TRAPFN(%rsp), %rcx
+	mov	ISF64_ERR(%rsp), %rdx
+	mov	ISF64_RSP(%rsp), %rsp	/* reset stack to saved state */
+	mov	%rax,ISC32_TRAPNO(%rsp)
+	mov	%rcx,ISC32_TRAPFN(%rsp)
 	mov	%rdx,ISC32_ERR(%rsp)
 					/* now treat as fault from user */
 					/* except that all the state is */
@@ -993,22 +888,25 @@ Entry(hndl_alltraps)
 
 	TIME_TRAP_UENTRY
 
-	movq	%gs:CPU_ACTIVE_THREAD,%rdi
-	movq	%rsp, ACT_PCB_ISS(%rdi)		/* stash the PCB stack */
+	/* Check for active vtimers in the current task */
+	mov	%gs:CPU_ACTIVE_THREAD, %rcx
+	mov	TH_TASK(%rcx), %rbx
+	TASK_VTIMER_CHECK(%rbx, %rcx)
+
 	movq	%rsp, %rdi			/* also pass it as arg0 */
 	movq	%gs:CPU_KERNEL_STACK,%rsp	/* switch to kernel stack */
-	sti
 
 	CCALL(user_trap)			/* call user trap routine */
+	/* user_trap() unmasks interrupts */
 	cli					/* hold off intrs - critical section */
-	movq	%gs:CPU_ACTIVE_THREAD,%rsp
-	movq	ACT_PCB_ISS(%rsp), %rsp 	/* switch back to PCB stack */
 	xorl	%ecx, %ecx			/* don't check if we're in the PFZ */
 
 #define CLI cli
 #define STI sti
 
 Entry(return_from_trap)
+	movq	%gs:CPU_ACTIVE_THREAD,%rsp
+	movq	TH_PCB_ISS(%rsp), %rsp 	/* switch back to PCB stack */
 	movl	%gs:CPU_PENDING_AST,%eax
 	testl	%eax,%eax
 	je	EXT(return_to_user)	/* branch if no AST */
@@ -1023,6 +921,7 @@ L_return_from_trap_with_ast:
 	je	1f
 					/* no... 32-bit user mode */
 	movl	R32_EIP(%r13), %edi
+	xorq	%rbp, %rbp		/* clear framepointer */
 	CCALL(commpage_is_in_pfz32)
 	testl	%eax, %eax
 	je	2f			/* not in the PFZ... go service AST */
@@ -1031,6 +930,7 @@ L_return_from_trap_with_ast:
 	jmp	EXT(return_to_user)
 1:
 	movq	R64_RIP(%r13), %rdi
+	xorq	%rbp, %rbp		/* clear framepointer */
 	CCALL(commpage_is_in_pfz64)
 	testl	%eax, %eax
 	je	2f			/* not in the PFZ... go service AST */
@@ -1040,12 +940,11 @@ L_return_from_trap_with_ast:
 2:	
 	STI				/* interrupts always enabled on return to user mode */
 
-	xor	%edi, %edi			/* zero %rdi */
-	CCALL(i386_astintr)	/* take the AST */
+	xor	%edi, %edi		/* zero %rdi */
+	xorq	%rbp, %rbp		/* clear framepointer */
+	CCALL(i386_astintr)		/* take the AST */
 
 	CLI
-	movq	%r13, %rsp			/* switch back to PCB stack */
-
 	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
 	jmp	EXT(return_from_trap)	/* and check again (rare) */
 
@@ -1061,7 +960,7 @@ trap_from_kernel:
 	pushq   R64_RIP(%rsp)           /* Simulate a CALL from fault point */
 	pushq   %rbp                    /* Extend framepointer chain */
 	movq    %rsp, %rbp
-	CCALL(kernel_trap)		/* to kernel trap routine */
+	CCALLWITHSP(kernel_trap)	/* to kernel trap routine */
 	popq    %rbp
 	addq    $8, %rsp
 	cli
@@ -1117,6 +1016,11 @@ Entry(hndl_allintrs)
 	
 	TIME_INT_ENTRY			/* do timing */
 
+	/* Check for active vtimers in the current task */
+	mov	%gs:CPU_ACTIVE_THREAD, %rcx
+	mov	TH_TASK(%rcx), %rbx
+	TASK_VTIMER_CHECK(%rbx, %rcx)
+
 	incl	%gs:CPU_PREEMPTION_LEVEL
 	incl	%gs:CPU_INTERRUPT_LEVEL
 
@@ -1137,8 +1041,7 @@ LEXT(return_to_iret)			/* (label for kdb_kintr and hardclock) */
 	TIME_INT_EXIT			/* do timing */
 
 	movq	%gs:CPU_ACTIVE_THREAD,%rax
-	movq	ACT_PCB(%rax),%rax	/* get act`s PCB */
-	movq	PCB_FPS(%rax),%rax	/* get pcb's ims.ifps */
+	movq	TH_PCB_FPS(%rax),%rax	/* get pcb's ifps */
 	cmpq	$0,%rax			/* Is there a context */
 	je	1f			/* Branch if not */
 	movl	FP_VALID(%rax),%eax	/* Load fp_valid */
@@ -1286,9 +1189,8 @@ L_copy_args_continue:
 	movq	%gs:CPU_KERNEL_STACK,%rdi
 	xchgq	%rdi,%rsp			/* switch to kernel stack */
 	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
-	movq	%rdi,ACT_PCB_ISS(%rcx)
-	movq	ACT_TASK(%rcx),%rbx		/* point to current task  */
-	addl	$1,TASK_SYSCALLS_UNIX(%rbx)	/* increment call count   */
+	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
+	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
 
 	/* Check for active vtimers in the current task */
 	TASK_VTIMER_CHECK(%rbx,%rcx)
@@ -1307,9 +1209,8 @@ Entry(hndl_mach_scall)
 	movq	%gs:CPU_KERNEL_STACK,%rdi
 	xchgq	%rdi,%rsp			/* switch to kernel stack */
 	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
-	movq	%rdi,ACT_PCB_ISS(%rcx)
-	movq	ACT_TASK(%rcx),%rbx		/* point to current task  */
-	addl	$1,TASK_SYSCALLS_MACH(%rbx)	/* increment call count   */
+	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
+	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
 
 	/* Check for active vtimers in the current task */
 	TASK_VTIMER_CHECK(%rbx,%rcx)
@@ -1330,7 +1231,7 @@ Entry(hndl_mdep_scall)
 
 	/* Check for active vtimers in the current task */
 	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
-	movq	ACT_TASK(%rcx),%rbx		/* point to current task  */
+	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
 	TASK_VTIMER_CHECK(%rbx,%rcx)
 
 	sti
@@ -1349,7 +1250,7 @@ Entry(hndl_diag_scall)
 	
 	/* Check for active vtimers in the current task */
 	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
-	movq	ACT_TASK(%rcx),%rbx		/* point to current task  */
+	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
 	TASK_VTIMER_CHECK(%rbx,%rcx)
 
 	pushq	%rdi			/* push pcb stack */
@@ -1359,14 +1260,13 @@ Entry(hndl_diag_scall)
 	cli				// Disable interruptions just in case
 	cmpl	$0,%eax			// What kind of return is this?
 	je	1f			// - branch if bad (zero)
-	popq	%rsp			// Get back the original stack
+	popq	%rsp			// Get back the pcb stack
 	jmp	EXT(return_to_user)	// Normal return, do not check asts...
 1:
 	CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
 		// pass what would be the diag syscall
 		// error return - cause an exception
 	/* no return */
-	
 
 
 /*
@@ -1384,8 +1284,7 @@ Entry(hndl_syscall)
 	movq	%gs:CPU_KERNEL_STACK,%rdi
 	xchgq	%rdi,%rsp			/* switch to kernel stack */
 	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
-	movq	%rdi, ACT_PCB_ISS(%rcx)
-	movq	ACT_TASK(%rcx),%rbx		/* point to current task  */
+	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
 
 	/* Check for active vtimers in the current task */
 	TASK_VTIMER_CHECK(%rbx,%rcx)
@@ -1412,7 +1311,7 @@ Entry(hndl_syscall)
 
 
 Entry(hndl_unix_scall64)
-	addl	$1,TASK_SYSCALLS_UNIX(%rbx)	/* increment call count   */
+	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
 	sti
 
 	CCALL(unix_syscall64)
@@ -1422,7 +1321,7 @@ Entry(hndl_unix_scall64)
 
 
 Entry(hndl_mach_scall64)
-	addl	$1,TASK_SYSCALLS_MACH(%rbx)	/* increment call count   */
+	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
 	sti
 
 	CCALL(mach_call_munger64)
@@ -1443,13 +1342,11 @@ Entry(hndl_mdep_scall64)
 
 Entry(hndl_diag_scall64)
 	pushq	%rdi			// Push the previous stack
-
 	CCALL(diagCall64)		// Call diagnostics
-
 	cli				// Disable interruptions just in case
 	cmpl	$0,%eax			// What kind of return is this?
 	je	1f			// - branch if bad (zero)
-	popq	%rsp			// Get back the original stack
+	popq	%rsp			// Get back the pcb stack
 	jmp	EXT(return_to_user)	// Normal return, do not check asts...
 1:
 	CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
diff --git a/osfmk/x86_64/idt_table.h b/osfmk/x86_64/idt_table.h
index 243ca18da..f2f26ce13 100644
--- a/osfmk/x86_64/idt_table.h
+++ b/osfmk/x86_64/idt_table.h
@@ -1,44 +1,71 @@
-     TRAP(0x00,idt64_zero_div)
- TRAP_SPC(0x01,idt64_debug)
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+TRAP(0x00,idt64_zero_div)
+TRAP_SPC(0x01,idt64_debug)
 INTERRUPT(0x02)			/* NMI */
 USER_TRAP(0x03,idt64_int3)
 USER_TRAP(0x04,idt64_into)
 USER_TRAP(0x05,idt64_bounds)
-     TRAP(0x06,idt64_invop)
-     TRAP(0x07,idt64_nofpu)
+TRAP(0x06,idt64_invop)
+TRAP(0x07,idt64_nofpu)
 #if	MACH_KDB
- TRAP_IST(0x08,idt64_db_task_dbl_fault)
+TRAP_IST(0x08,idt64_db_task_dbl_fault)
 #else
- TRAP_IST(0x08,idt64_double_fault)
+TRAP_IST(0x08,idt64_double_fault)
 #endif
-     TRAP(0x09,idt64_fpu_over)
-     TRAP(0x0a,idt64_inv_tss)
- TRAP_SPC(0x0b,idt64_segnp)
+TRAP(0x09,idt64_fpu_over)
+TRAP(0x0a,idt64_inv_tss)
+TRAP_SPC(0x0b,idt64_segnp)
 #if	MACH_KDB
- TRAP_IST(0x0c,idt64_db_task_stk_fault)
+TRAP_IST(0x0c,idt64_db_task_stk_fault)
 #else
- TRAP_SPC(0x0c,idt64_stack_fault)
+TRAP_SPC(0x0c,idt64_stack_fault)
 #endif
- TRAP_SPC(0x0d,idt64_gen_prot)
-// TRAP_ERR(0x0d,idt64_gen_prot_not)
- TRAP_SPC(0x0e,idt64_page_fault)
-     TRAP(0x0f,idt64_trap_0f)
-     TRAP(0x10,idt64_fpu_err)
-     TRAP(0x11,idt64_trap_11)
- TRAP_IST(0x12,idt64_mc)
-     TRAP(0x13,idt64_sse_err)
-     TRAP(0x14,idt64_trap_14)
-     TRAP(0x15,idt64_trap_15)
-     TRAP(0x16,idt64_trap_16)
-     TRAP(0x17,idt64_trap_17)
-     TRAP(0x18,idt64_trap_18)
-     TRAP(0x19,idt64_trap_19)
-     TRAP(0x1a,idt64_trap_1a)
-     TRAP(0x1b,idt64_trap_1b)
-     TRAP(0x1c,idt64_trap_1c)
-     TRAP(0x1d,idt64_trap_1d)
-     TRAP(0x1e,idt64_trap_1e)
-     TRAP(0x1f,idt64_trap_1f)
+TRAP_SPC(0x0d,idt64_gen_prot)
+TRAP_SPC(0x0e,idt64_page_fault)
+TRAP(0x0f,idt64_trap_0f)
+TRAP(0x10,idt64_fpu_err)
+TRAP(0x11,idt64_trap_11)
+TRAP_IST(0x12,idt64_mc)
+TRAP(0x13,idt64_sse_err)
+TRAP(0x14,idt64_trap_14)
+TRAP(0x15,idt64_trap_15)
+TRAP(0x16,idt64_trap_16)
+TRAP(0x17,idt64_trap_17)
+TRAP(0x18,idt64_trap_18)
+TRAP(0x19,idt64_trap_19)
+TRAP(0x1a,idt64_trap_1a)
+TRAP(0x1b,idt64_trap_1b)
+TRAP(0x1c,idt64_trap_1c)
+TRAP(0x1d,idt64_trap_1d)
+TRAP(0x1e,idt64_trap_1e)
+TRAP(0x1f,idt64_trap_1f)
 
 INTERRUPT(0x20)
 INTERRUPT(0x21)
@@ -140,7 +167,7 @@ INTERRUPT(0x7b)
 INTERRUPT(0x7c)
 INTERRUPT(0x7d)
 INTERRUPT(0x7e)
-INTERRUPT(0x7f)
+USER_TRAP(0x7f, idt64_dtrace_ret) /* Required by dtrace "fasttrap" */
 
 USER_TRAP_SPC(0x80,idt64_unix_scall)
 USER_TRAP_SPC(0x81,idt64_mach_scall)
@@ -277,4 +304,4 @@ INTERRUPT(0xfb)
 INTERRUPT(0xfc)
 INTERRUPT(0xfd)
 INTERRUPT(0xfe)
-     TRAP(0xff,idt64_preempt)
+TRAP(0xff,idt64_preempt)
diff --git a/osfmk/x86_64/locore.s b/osfmk/x86_64/locore.s
index 82d712985..af3bac12a 100644
--- a/osfmk/x86_64/locore.s
+++ b/osfmk/x86_64/locore.s
@@ -113,16 +113,6 @@ LEXT(recover_table_end)			;\
 	RECOVERY_SECTION
 	RECOVER_TABLE_START
 
-Entry(call_continuation)
-	movq	%rdi,%rcx			/* get continuation */
-	movq	%rsi,%rdi			/* continuation param */
-	movq	%rdx,%rsi			/* wait result */
-	movq	%gs:CPU_KERNEL_STACK,%rsp	/* set the stack */
-	xorq	%rbp,%rbp			/* zero frame pointer */
-	call	*%rcx				/* call continuation */
-	movq	%gs:CPU_ACTIVE_THREAD,%rdi
-	call	EXT(thread_terminate)
-
 /*
  * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi)
  */
@@ -150,9 +140,7 @@ LEXT(thread_bootstrap_return)
 
 LEXT(thread_exception_return)
 	cli
-	movq	%gs:CPU_ACTIVE_THREAD,%rsp
-	movq	ACT_PCB_ISS(%rsp), %rsp
-	xorl	%ecx, %ecx			/* don't check if we're in the PFZ */
+	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
 	jmp	EXT(return_from_trap)
 
 /*
@@ -187,6 +175,17 @@ _bcopy_fail:
 	movl	$(EFAULT),%eax		/* return error for failure */
 	ret
 
+Entry(pmap_safe_read)
+	RECOVERY_SECTION
+	RECOVER(_pmap_safe_read_fail)
+	movq	(%rdi), %rcx
+	mov	%rcx, (%rsi)
+	mov	$1, %eax
+	ret
+_pmap_safe_read_fail:
+	xor	%eax, %eax
+	ret
+
 
 	
 /*
@@ -231,7 +230,6 @@ _bcopystr_fail:
 	movl	$(EFAULT),%eax		/* return error for failure */
 	ret
 
-
 /*
  * Done with recovery table.
  */
diff --git a/osfmk/x86_64/loose_ends.c b/osfmk/x86_64/loose_ends.c
index e8a1605a7..3d75d8eab 100644
--- a/osfmk/x86_64/loose_ends.c
+++ b/osfmk/x86_64/loose_ends.c
@@ -193,6 +193,25 @@ bcopy_phys(
 	bcopy(PHYSMAP_PTOV(src64), PHYSMAP_PTOV(dst64), bytes);
 }
 
+/*
+ * allow a function to get a quick virtual mapping of a physical page
+ */
+
+int
+apply_func_phys(
+		addr64_t dst64,
+		vm_size_t bytes,
+		int (*func)(void * buffer, vm_size_t bytes, void * arg),
+		void * arg)
+{
+	/* Not necessary for K64 - but ensure we stay within a page */
+	if (((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) {
+	        panic("apply_func_phys alignment");
+	}
+
+	return func(PHYSMAP_PTOV(dst64), bytes, arg);
+}
+
 /* 
  * ovbcopy - like bcopy, but recognizes overlapping ranges and handles 
  *           them correctly.
@@ -224,7 +243,7 @@ ovbcopy(
  */
 
 
-static unsigned int
+static inline unsigned int
 ml_phys_read_data(pmap_paddr_t paddr, int size)
 {
 	unsigned int result;
@@ -255,8 +274,6 @@ ml_phys_read_long_long(pmap_paddr_t paddr )
 	return *(unsigned long long *)PHYSMAP_PTOV(paddr);
 }
 
-
-
 unsigned int ml_phys_read( vm_offset_t paddr)
 {
         return ml_phys_read_data((pmap_paddr_t)paddr, 4);
@@ -313,7 +330,7 @@ unsigned long long ml_phys_read_double_64(addr64_t paddr64)
  *  Write data to a physical address. Memory should not be cache inhibited.
  */
 
-static void
+static inline void
 ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size)
 {
         switch (size) {
@@ -336,8 +353,6 @@ ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data)
 	*(unsigned long long *)PHYSMAP_PTOV(paddr) = data;
 }
 
-
-
 void ml_phys_write_byte(vm_offset_t paddr, unsigned int data)
 {
         ml_phys_write_data((pmap_paddr_t)paddr, data, 1);
@@ -529,18 +544,15 @@ static inline void __clflush(void *ptr)
 
 void dcache_incoherent_io_store64(addr64_t pa, unsigned int count)
 {
-        uint32_t  linesize = cpuid_info()->cache_linesize;
-        addr64_t  addr;
-        boolean_t istate;
+	addr64_t  linesize = cpuid_info()->cache_linesize;
+	addr64_t  bound = (pa + count + linesize - 1) & ~(linesize - 1);
 
 	__mfence();
 
-        istate = ml_set_interrupts_enabled(FALSE);
-
-	for (addr = pa; addr < pa + count; addr += linesize)
-		__clflush(PHYSMAP_PTOV(addr));
-
-        (void) ml_set_interrupts_enabled(istate);
+	while (pa < bound) {
+		__clflush(PHYSMAP_PTOV(pa));
+		pa += linesize;
+	}
 
 	__mfence();
 }
@@ -551,10 +563,21 @@ void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count)
 }
 
 void
-flush_dcache64(__unused addr64_t addr,
-	       __unused unsigned count,
-	       __unused int phys)
+flush_dcache64(addr64_t addr, unsigned count, int phys)
 {
+	if (phys) {
+		dcache_incoherent_io_flush64(addr, count);
+	}
+	else {
+		uint32_t  linesize = cpuid_info()->cache_linesize;
+		addr64_t  bound = (addr + count + linesize -1) & ~(linesize - 1);
+		__mfence();
+		while (addr < bound) {
+			__clflush((void *) (uintptr_t) addr);
+			addr += linesize;
+		}
+		__mfence();
+	}
 }
 
 void
@@ -603,316 +626,6 @@ cache_flush_page_phys(ppnum_t pa)
 }
 
 
-static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int);
-static int copyio_phys(addr64_t, addr64_t, vm_size_t, int);
-
-/*
- * The copy engine has the following characteristics
- *   - copyio() handles copies to/from user or kernel space
- *   - copypv() deals with physical or virtual addresses
- *
- * Readers familiar with the 32-bit kernel will expect Joe's thesis at this
- * point describing the full glory of the copy window implementation. In K64,
- * however, there is no need for windowing. Thanks to the vast shared address
- * space, the kernel has direct access to userspace and to physical memory.
- *
- * User virtual addresses are accessible provided the user's cr3 is loaded.
- * Physical addresses are accessible via the direct map and the PHYSMAP_PTOV()
- * translation.
- *
- * Copyin/out variants all boil done to just these 2 routines in locore.s which
- * provide fault-recoverable copying:
- */
-extern int _bcopy(const void *, void *, vm_size_t);
-extern int _bcopystr(const void *, void *, vm_size_t, vm_size_t *);
-
-
-/*
- * Types of copies:
- */
-#define COPYIN		0	/* from user virtual to kernel virtual */
-#define COPYOUT		1	/* from kernel virtual to user virtual */
-#define COPYINSTR	2	/* string variant of copyout */
-#define COPYINPHYS	3	/* from user virtual to kernel physical */
-#define COPYOUTPHYS	4	/* from kernel physical to user virtual */
-
-
-static int
-copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
-       vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
-{
-        thread_t	thread;
-	pmap_t		pmap;
-	vm_size_t	bytes_copied;
-	int		error = 0;
-	boolean_t	istate = FALSE;
-	boolean_t	recursive_CopyIOActive;
-#if KDEBUG
-	int		debug_type = 0xeff70010;
-	debug_type += (copy_type << 2);
-#endif
-
-	thread = current_thread();
-
-	KERNEL_DEBUG(debug_type | DBG_FUNC_START,
-		     (unsigned)(user_addr >> 32), (unsigned)user_addr,
-		     nbytes, thread->machine.copyio_state, 0);
-
-	if (nbytes == 0)
-		goto out;
-
-        pmap = thread->map->pmap;
-
-
-	assert((vm_offset_t)kernel_addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS ||
-	       copy_type == COPYINPHYS || copy_type == COPYOUTPHYS);
-
-	/* Sanity and security check for addresses to/from a user */
-
-	if (((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
-	    ((nbytes && (user_addr+nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map)))) {
-		error = EFAULT;
-		goto out;
-	}
-
-	/*
-	 * If the no_shared_cr3 boot-arg is set (true), the kernel runs on 
-	 * its own pmap and cr3 rather than the user's -- so that wild accesses
-	 * from kernel or kexts can be trapped. So, during copyin and copyout,
-	 * we need to switch back to the user's map/cr3. The thread is flagged
-	 * "CopyIOActive" at this time so that if the thread is pre-empted,
-	 * we will later restore the correct cr3.
-	 */
-	recursive_CopyIOActive = thread->machine.specFlags & CopyIOActive;
-	thread->machine.specFlags |= CopyIOActive;
-	if (no_shared_cr3) {
-		istate = ml_set_interrupts_enabled(FALSE);
- 		if (get_cr3() != pmap->pm_cr3)
-			set_cr3(pmap->pm_cr3);
-	}
-
-	/*
-	 * Ensure that we're running on the target thread's cr3.
-	 */
-	if ((pmap != kernel_pmap) && !use_kernel_map &&
-	    (get_cr3() != pmap->pm_cr3)) {
-		panic("copyio(%d,%p,%p,%ld,%p,%d) cr3 is %p expects %p",
-			copy_type, (void *)user_addr, kernel_addr, nbytes, lencopied, use_kernel_map,
-			(void *) get_cr3(), (void *) pmap->pm_cr3);
-	}
-	if (no_shared_cr3)
-		(void) ml_set_interrupts_enabled(istate);
-
-	KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_addr,
-		     (unsigned)kernel_addr, nbytes, 0, 0);
-
-        switch (copy_type) {
-
-	case COPYIN:
-	        error = _bcopy((const void *) user_addr,
-				kernel_addr,
-				nbytes);
-		break;
-			
-	case COPYOUT:
-	        error = _bcopy(kernel_addr,
-				(void *) user_addr,
-				nbytes);
-		break;
-
-	case COPYINPHYS:
-	        error = _bcopy((const void *) user_addr,
-				PHYSMAP_PTOV(kernel_addr),
-				nbytes);
-		break;
-
-	case COPYOUTPHYS:
-	        error = _bcopy((const void *) PHYSMAP_PTOV(kernel_addr),
-				(void *) user_addr,
-				nbytes);
-		break;
-
-	case COPYINSTR:
-	        error = _bcopystr((const void *) user_addr,
-				kernel_addr,
-				(int) nbytes,
-				&bytes_copied);
-
-		/*
-		 * lencopied should be updated on success
-		 * or ENAMETOOLONG...  but not EFAULT
-		 */
-		if (error != EFAULT)
-		        *lencopied = bytes_copied;
-
-		if (error) {
-#if KDEBUG
-		        nbytes = *lencopied;
-#endif
-		        break;
-		}
-		if (*(kernel_addr + bytes_copied - 1) == 0) {
-		        /*
-			 * we found a NULL terminator... we're done
-			 */
-#if KDEBUG
-		        nbytes = *lencopied;
-#endif
-			break;
-		} else {
-		        /*
-			 * no more room in the buffer and we haven't
-			 * yet come across a NULL terminator
-			 */
-#if KDEBUG
-		        nbytes = *lencopied;
-#endif
-		        error = ENAMETOOLONG;
-			break;
-		}
-		break;
-	}
-
-	if (!recursive_CopyIOActive)
-		thread->machine.specFlags &= ~CopyIOActive;
-	if (no_shared_cr3) {
-		istate = ml_set_interrupts_enabled(FALSE);
-		if  (get_cr3() != kernel_pmap->pm_cr3)
-			set_cr3(kernel_pmap->pm_cr3);
-		(void) ml_set_interrupts_enabled(istate);
-	}
-
-out:
-	KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
-		     (unsigned)kernel_addr, (unsigned)nbytes, error, 0);
-
-	return (error);
-}
-
-
-static int
-copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which)
-{
-        char	    *paddr;
-	user_addr_t vaddr;
-	int         ctype;
-
-	if (which & cppvPsnk) {
-		paddr  = (char *)sink;
-	        vaddr  = (user_addr_t)source;
-		ctype  = COPYINPHYS;
-	} else {
-	        paddr  = (char *)source;
-		vaddr  = (user_addr_t)sink;
-		ctype  = COPYOUTPHYS;
-	}
-	return copyio(ctype, vaddr, paddr, csize, NULL, which & cppvKmap);
-}
-
-int
-copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes)
-{
-    return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
-}    
-
-int
-copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes)
-{
-    return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0);
-}
-
-int
-copyinstr(const user_addr_t user_addr,  char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied)
-{
-    *lencopied = 0;
-
-    return copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0);
-}
-
-int
-copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes)
-{
-    return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
-}
-
-int
-copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes)
-{
-    return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0);
-}
-
-
-kern_return_t
-copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
-{
-	unsigned int lop, csize;
-	int bothphys = 0;
-	
-	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
-		     (unsigned)snk64, size, which, 0);
-
-	if ((which & (cppvPsrc | cppvPsnk)) == 0 )				/* Make sure that only one is virtual */
-		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */
-
-	if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
-	        bothphys = 1;							/* both are physical */
-
-	while (size) {
-	  
-	        if (bothphys) {
-		        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));		/* Assume sink smallest */
-
-			if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
-			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));	/* No, source is smaller */
-		} else {
-		        /*
-			 * only need to compute the resid for the physical page
-			 * address... we don't care about where we start/finish in
-			 * the virtual since we just call the normal copyin/copyout
-			 */
-		        if (which & cppvPsrc)
-			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
-			else
-			        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
-		}
-		csize = size;						/* Assume we can copy it all */
-		if (lop < size)
-		        csize = lop;					/* Nope, we can't do it all */
-#if 0		
-		/*
-		 * flush_dcache64 is currently a nop on the i386... 
-		 * it's used when copying to non-system memory such
-		 * as video capture cards... on PPC there was a need
-		 * to flush due to how we mapped this memory... not
-		 * sure if it's needed on i386.
-		 */
-		if (which & cppvFsrc)
-		        flush_dcache64(src64, csize, 1);		/* If requested, flush source before move */
-		if (which & cppvFsnk)
-		        flush_dcache64(snk64, csize, 1);		/* If requested, flush sink before move */
-#endif
-		if (bothphys)
-		        bcopy_phys(src64, snk64, csize);		/* Do a physical copy, virtually */
-		else {
-		        if (copyio_phys(src64, snk64, csize, which))
-			        return (KERN_FAILURE);
-		}
-#if 0
-		if (which & cppvFsrc)
-		        flush_dcache64(src64, csize, 1);	/* If requested, flush source after move */
-		if (which & cppvFsnk)
-		        flush_dcache64(snk64, csize, 1);	/* If requested, flush sink after move */
-#endif
-		size   -= csize;					/* Calculate what is left */
-		snk64 += csize;					/* Bump sink to next physical address */
-		src64 += csize;					/* Bump source to next physical address */
-	}
-	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
-		     (unsigned)snk64, size, which, 0);
-
-	return KERN_SUCCESS;
-}
-
 #if !MACH_KDP
 void
 kdp_register_callout(void)
diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s
index f8fecaccf..1c74f9fc8 100644
--- a/osfmk/x86_64/machine_routines_asm.s
+++ b/osfmk/x86_64/machine_routines_asm.s
@@ -27,7 +27,7 @@
  */
  
 #include <i386/asm.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_asm.h>
 #include <i386/proc_reg.h>
 #include <i386/eflags.h>
        
@@ -85,31 +85,7 @@ ENTRY(tmrCvt)
 	shrdq   $32,%rdx,%rax			/* %rdx:%rax >>= 32 */
 	ret
 
-
-/*
- * void _rtc_nanotime_store(
- *		uint64_t        tsc,		// %rdi
- *		uint64_t        nsec,		// %rsi
- *		uint32_t        scale,		// %rdx
- *		uint32_t        shift,		// %rcx
- *		rtc_nanotime_t  *dst);		// %r8
- */
-ENTRY(_rtc_nanotime_store)
-	movl	RNT_GENERATION(%r8),%eax	/* get current generation */
-	movl	$0,RNT_GENERATION(%r8)		/* flag data as being updated */
-	movq	%rdi,RNT_TSC_BASE(%r8)
-	movq	%rsi,RNT_NS_BASE(%r8)
-	movl	%edx,RNT_SCALE(%r8)
-	movl	%ecx,RNT_SHIFT(%r8)
-
-	incl	%eax				/* next generation */
-	jnz	1f
-	incl	%eax				/* skip 0, which is a flag */
-1:	movl	%eax,RNT_GENERATION(%r8)	/* update generation */
-
-	ret
-
-/*
+ /*
  * void _rtc_nanotime_adjust(
  *		uint64_t        tsc_base_delta,	// %rdi
  *		rtc_nanotime_t  *dst);		// %rsi
@@ -170,7 +146,7 @@ ENTRY(_rtc_nanotime_read)
 	/*
 	 * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD
 	 */
-	RTC_NANOTIME_READ_FAST()
+	PAL_RTC_NANOTIME_READ_FAST()
 
 	ret
 
@@ -186,3 +162,14 @@ Lslow:
 	.data
 1: 	String	"_rtc_nanotime_read() - slow algorithm not supported"
 
+
+Entry(call_continuation)
+	movq	%rdi,%rcx			/* get continuation */
+	movq	%rsi,%rdi			/* continuation param */
+	movq	%rdx,%rsi			/* wait result */
+	movq	%gs:CPU_KERNEL_STACK,%rsp	/* set the stack */
+	xorq	%rbp,%rbp			/* zero frame pointer */
+	call	*%rcx				/* call continuation */
+	movq	%gs:CPU_ACTIVE_THREAD,%rdi
+	call	EXT(thread_terminate)
+
diff --git a/osfmk/x86_64/pal_routines_asm.s b/osfmk/x86_64/pal_routines_asm.s
new file mode 100644
index 000000000..4f14284f6
--- /dev/null
+++ b/osfmk/x86_64/pal_routines_asm.s
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+ 
+#include <i386/asm.h>
+#include <i386/asm64.h>
+       
+#include <assym.s>
+
+/*
+ * Copy "count" bytes from "src" to %rsp, using
+ * "tmpindex" for a scratch counter and %rax
+ */
+#define COPY_STACK(src, count, tmpindex) \
+	mov	$0, tmpindex	/* initial scratch counter */ ; \
+1: \
+	mov	0(src,tmpindex,1), %rax	 /* copy one 64-bit word from source... */ ; \
+	mov	%rax, 0(%rsp,tmpindex,1) /* ... to stack */ ; \
+	add	$8, tmpindex		 /* increment counter */ ; \
+	cmp	count, tmpindex		 /* exit it stack has been copied */ ; \
+	jne 1b
+	
+/*
+	void
+	pal_efi_call_in_64bit_mode_asm(uint64_t func,
+	                           struct pal_efi_registers *efi_reg,
+	                           void *stack_contents,
+	                           size_t stack_contents_size)
+
+	* Switch from compatibility mode to long mode, and
+	* then execute the function pointer with the specified
+	* register and stack contents (based at %rsp). Afterwards,
+	* collect the return value, restore the original state,
+	* and return.
+*/
+ENTRY(_pal_efi_call_in_64bit_mode_asm)
+	FRAME
+
+	/* save non-volatile registers */
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	/* save parameters that we will need later */
+	push	%rsi
+	push	%rcx
+
+	sub	$8, %rsp	/* align to 16-byte boundary */
+				/* efi_reg in %rsi */
+				/* stack_contents into %rdx */
+				/* s_c_s into %rcx */
+	sub	%rcx, %rsp	/* make room for stack contents */
+
+	COPY_STACK(%rdx, %rcx, %r8)
+
+	/* load efi_reg into real registers */
+	mov	0(%rsi),  %rcx
+	mov	8(%rsi),  %rdx
+	mov	16(%rsi), %r8
+	mov	24(%rsi), %r9
+	mov	32(%rsi), %rax
+
+					/* func pointer in %rdi */
+	call	*%rdi			/* call EFI runtime */
+
+	mov	-48(%rbp), %rsi		/* load efi_reg into %esi */
+	mov	%rax, 32(%rsi)		/* save RAX back */
+
+	mov	-56(%rbp), %rcx	/* load s_c_s into %rcx */
+	add	%rcx, %rsp	/* discard stack contents */
+	add	$8, %rsp	/* restore stack pointer */
+
+	pop	%rcx
+	pop	%rsi
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbx
+
+	EMARF
+	ret
+
+/*
+	void
+	pal_efi_call_in_32bit_mode_asm(uint32_t func,
+	                           struct pal_efi_registers *efi_reg,
+	                           void *stack_contents,
+	                           size_t stack_contents_size)
+
+*/
+ENTRY(_pal_efi_call_in_32bit_mode_asm)
+	FRAME
+
+	/* save non-volatile registers */
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	/* save parameters that we will need later */
+	push	%rsi
+	push	%rcx
+
+	push	%rbp	/* save %rbp and align to 16-byte boundary */
+				/* efi_reg in %rsi */
+				/* stack_contents into %rdx */
+				/* s_c_s into %rcx */
+	sub	%rcx, %rsp	/* make room for stack contents */
+
+	COPY_STACK(%rdx, %rcx, %r8)
+
+	/*
+	 * Here in long-mode, with high kernel addresses,
+	 * but with the kernel double-mapped in the bottom 4GB.
+	 * We now switch to compat mode and call into EFI.
+	 */
+	ENTER_COMPAT_MODE()
+
+	call	*%edi			/* call EFI runtime */
+
+	ENTER_64BIT_MODE()
+
+	mov	-48(%rbp), %rsi		/* load efi_reg into %esi */
+	mov	%rax, 32(%rsi)		/* save RAX back */
+
+	mov	-56(%rbp), %rcx	/* load s_c_s into %rcx */
+	add	%rcx, %rsp	/* discard stack contents */
+	pop	%rbp		/* restore full 64-bit frame pointer */
+				/* which the 32-bit EFI will have truncated */
+				/* our full %rsp will be restored by EMARF */
+	pop	%rcx
+	pop	%rsi
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbx
+
+	EMARF
+	ret
+
+
+
+/*
+ * void _pal_rtc_nanotime_store(
+ *		uint64_t        tsc,		// %rdi
+ *		uint64_t        nsec,		// %rsi
+ *		uint32_t        scale,		// %rdx
+ *		uint32_t        shift,		// %rcx
+ *		rtc_nanotime_t  *dst);		// %r8
+ */
+ENTRY(_pal_rtc_nanotime_store)
+	movl	RNT_GENERATION(%r8),%eax	/* get current generation */
+	movl	$0,RNT_GENERATION(%r8)		/* flag data as being updated */
+	movq	%rdi,RNT_TSC_BASE(%r8)
+	movq	%rsi,RNT_NS_BASE(%r8)
+	movl	%edx,RNT_SCALE(%r8)
+	movl	%ecx,RNT_SHIFT(%r8)
+
+	incl	%eax				/* next generation */
+	jnz	1f
+	incl	%eax				/* skip 0, which is a flag */
+1:	movl	%eax,RNT_GENERATION(%r8)	/* update generation */
+
+	ret
+
diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c
index a8c8cbde4..69a5c542d 100644
--- a/osfmk/x86_64/pmap.c
+++ b/osfmk/x86_64/pmap.c
@@ -1,6 +1,5 @@
-
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -101,6 +100,7 @@
 #include <kern/thread.h>
 #include <kern/zalloc.h>
 #include <kern/queue.h>
+#include <kern/mach_param.h>
 
 #include <kern/lock.h>
 #include <kern/kalloc.h>
@@ -119,6 +119,7 @@
 
 #include <kern/misc_protos.h>			/* prototyping */
 #include <i386/misc_protos.h>
+#include <i386/i386_lowmem.h>
 #include <x86_64/lowglobals.h>
 
 #include <i386/cpuid.h>
@@ -132,6 +133,7 @@
 #include <i386/proc_reg.h>
 #include <i386/tsc.h>
 #include <i386/pmap_internal.h>
+#include <i386/pmap_pcid.h>
 
 #if	MACH_KDB
 #include <ddb/db_command.h>
@@ -146,7 +148,6 @@
 #include <i386/mp_desc.h>
 
 
-
 #ifdef IWANTTODEBUG
 #undef	DEBUG
 #define DEBUG 1
@@ -154,39 +155,18 @@
 #include <i386/postcode.h>
 #endif /* IWANTTODEBUG */
 
-boolean_t pmap_trace = FALSE;
-
-#if PMAP_DBG
-#define DBG(x...)       kprintf("DBG: " x)
+#ifdef	PMAP_DEBUG
+#define DBG(x...)	kprintf("DBG: " x)
 #else
 #define DBG(x...)
 #endif
-
-boolean_t	no_shared_cr3 = DEBUG;		/* TRUE for DEBUG by default */
-
-/*
- * Forward declarations for internal functions.
+/* Compile time assert to ensure adjacency/alignment of per-CPU data fields used
+ * in the trampolines for kernel/user boundary TLB coherency.
  */
+char pmap_cpu_data_assert[(((offsetof(cpu_data_t, cpu_tlb_invalid) - offsetof(cpu_data_t, cpu_active_cr3)) == 8) && (offsetof(cpu_data_t, cpu_active_cr3) % 64 == 0)) ? 1 : -1];
+boolean_t pmap_trace = FALSE;
 
-
-void		phys_attribute_clear(
-			ppnum_t		phys,
-			int		bits);
-
-int		phys_attribute_test(
-			ppnum_t		phys,
-			int		bits);
-
-void		phys_attribute_set(
-			ppnum_t		phys,
-			int		bits);
-
-void		pmap_set_reference(
-			ppnum_t pn);
-
-boolean_t	phys_page_exists(
-			ppnum_t pn);
-
+boolean_t	no_shared_cr3 = DEBUG;		/* TRUE for DEBUG by default */
 
 int nx_enabled = 1;			/* enable no-execute protection */
 int allow_data_exec  = VM_ABI_32;	/* 32-bit apps may execute data by default, 64-bit apps may not */
@@ -206,24 +186,8 @@ decl_simple_lock_data(,pv_hashed_free_list_lock)
 decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
 decl_simple_lock_data(,pv_hash_table_lock)
 
-int			pv_hashed_free_count = 0;
-int			pv_hashed_kern_free_count = 0;
-
-
 zone_t		pv_hashed_list_zone;	/* zone of pv_hashed_entry structures */
 
-/*
- *	Each entry in the pv_head_table is locked by a bit in the
- *	pv_lock_table.  The lock bits are accessed by the physical
- *	address of the page they lock.
- */
-
-char	*pv_lock_table;		/* pointer to array of bits */
-
-
-char    *pv_hash_lock_table;
-
-
 /*
  *	First and last physical addresses that we maintain any information
  *	for.  Initialized to zero so that pmap operations done before
@@ -236,11 +200,17 @@ static struct vm_object kpml4obj_object_store;
 static struct vm_object kpdptobj_object_store;
 
 /*
- *	Array of physical page attributes for managed pages.
+ *	Array of physical page attribites for managed pages.
  *	One byte per physical page.
  */
 char		*pmap_phys_attributes;
 unsigned int	last_managed_page = 0;
+
+/*
+ *	Amount of virtual memory mapped by one
+ *	page-directory entry.
+ */
+
 uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
 
 unsigned pmap_memory_region_count;
@@ -261,19 +231,18 @@ pd_entry_t	commpage64_pde;
 
 struct zone	*pmap_zone;		/* zone of pmap structures */
 
+struct zone	*pmap_anchor_zone;
+int		pmap_debug = 0;		/* flag for debugging prints */
+
 unsigned int	inuse_ptepages_count = 0;
+long long	alloc_ptepages_count __attribute__((aligned(8))) = 0; /* aligned for atomic access */
+unsigned int	bootstrap_wired_pages = 0;
+int		pt_fake_zone_index = -1;
 
-addr64_t	kernel64_cr3;
+extern 	long	NMIPI_acks;
 
-/*
- *	Pmap cache.  Cache is threaded through ref_count field of pmap.
- *	Max will eventually be constant -- variable for experimentation.
- */
-int		pmap_cache_max = 32;
-int		pmap_alloc_chunk = 8;
-pmap_t		pmap_cache_list;
-int		pmap_cache_count;
-decl_simple_lock_data(,pmap_cache_lock)
+boolean_t	kernel_text_ps_4K = TRUE;
+boolean_t	wpkernel = TRUE;
 
 extern char	end;
 
@@ -282,116 +251,13 @@ static int	nkpt;
 pt_entry_t     *DMAP1, *DMAP2;
 caddr_t         DADDR1;
 caddr_t         DADDR2;
-/*
- * for legacy, returns the address of the pde entry.
- * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
- * then returns the mapped address of the pde entry in that page
- */
-pd_entry_t     *
-pmap_pde(pmap_t m, vm_map_offset_t v)
-{
-	pd_entry_t     *pde;
-
-	assert(m);
-#if 0
-	if (m == kernel_pmap)
-		pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
-	else
-#endif
-		pde = pmap64_pde(m, v);
-
-	return pde;
-}
 
 /*
- * the single pml4 page per pmap is allocated at pmap create time and exists
- * for the duration of the pmap. we allocate this page in kernel vm.
- * this returns the address of the requested pml4 entry in the top level page.
+ * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
+ * properly deals with the anchor.
+ * must be called with the hash locked, does not unlock it
  */
-static inline
-pml4_entry_t *
-pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
-{
-	return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)];
-}
 
-/*
- * maps in the pml4 page, if any, containing the pdpt entry requested
- * and returns the address of the pdpt entry in that mapped page
- */
-pdpt_entry_t *
-pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
-{
-	pml4_entry_t	newpf;
-	pml4_entry_t	*pml4;
-
-	assert(pmap);
-	if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
-	    (vaddr < 0xFFFF800000000000ULL)) {
-		return (0);
-	}
-
-	pml4 = pmap64_pml4(pmap, vaddr);
-	if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
-		newpf = *pml4 & PG_FRAME;
-		return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf))
-			[(vaddr >> PDPTSHIFT) & (NPDPTPG-1)];
-	}
-	return (NULL);
-}
-/*
- * maps in the pdpt page, if any, containing the pde entry requested
- * and returns the address of the pde entry in that mapped page
- */
-pd_entry_t *
-pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
-{
-	pdpt_entry_t	newpf;
-	pdpt_entry_t	*pdpt;
-
-	assert(pmap);
-	if ((vaddr > 0x00007FFFFFFFFFFFULL) &&
-	    (vaddr < 0xFFFF800000000000ULL)) {
-		return (0);
-	}
-
-	pdpt = pmap64_pdpt(pmap, vaddr);
-
-	if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
-		newpf = *pdpt & PG_FRAME;
-		return &((pd_entry_t *) PHYSMAP_PTOV(newpf))
-			[(vaddr >> PDSHIFT) & (NPDPG-1)];
-	}
-	return (NULL);
-}
-
-/*
- * return address of mapped pte for vaddr va in pmap pmap.
- *
- * physically maps the pde page, if any, containing the pte in and returns
- * the address of the pte in that mapped page
- *
- * In case the pde maps a superpage, return the pde, which, in this case
- * is the actual page table entry.
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
-{
-	pd_entry_t	*pde;
-	pd_entry_t	newpf;
-
-	assert(pmap);
-	pde = pmap_pde(pmap, vaddr);
-
-	if (pde && ((*pde & INTEL_PTE_VALID))) {
-		if (*pde & INTEL_PTE_PS) 
-			return pde;
-		newpf = *pde & PG_FRAME;
-		return &((pt_entry_t *)PHYSMAP_PTOV(newpf))
-			[i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)];
-	}
-	return (NULL);
-}
 
 /*
  *	Map memory at initialization.  The physical addresses being
@@ -437,7 +303,7 @@ pmap_map_bd(
 	pt_entry_t	template;
 	pt_entry_t	*pte;
 	spl_t           spl;
-
+	vm_offset_t	base = virt;
 	template = pa_to_pte(start_addr)
 		| INTEL_PTE_REF
 		| INTEL_PTE_MOD
@@ -452,7 +318,6 @@ pmap_map_bd(
 	if (prot & VM_PROT_WRITE)
 		template |= INTEL_PTE_WRITE;
 
-
 	while (start_addr < end_addr) {
 	        spl = splhigh();
 		pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
@@ -465,9 +330,8 @@ pmap_map_bd(
 		virt += PAGE_SIZE;
 		start_addr += PAGE_SIZE;
 	}
-
-
-	flush_tlb();
+	(void)base;
+	PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
 	return(virt);
 }
 
@@ -480,13 +344,16 @@ extern  vm_offset_t		stext;
 extern  vm_offset_t		etext;
 extern  vm_offset_t		sdata;
 
+extern void			*KPTphys;
+
 void
 pmap_cpu_init(void)
 {
 	/*
 	 * Here early in the life of a processor (from cpu_mode_init()).
-	 * Ensure global page feature is disabled.
+	 * Ensure global page feature is disabled at this point.
 	 */
+
 	set_cr4(get_cr4() &~ CR4_PGE);
 
 	/*
@@ -495,6 +362,8 @@ pmap_cpu_init(void)
 	current_cpu_datap()->cpu_kernel_cr3 = kernel_pmap->pm_cr3;
 	current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
 	current_cpu_datap()->cpu_tlb_invalid = FALSE;
+	current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT;
+	pmap_pcid_configure();
 }
 
 
@@ -514,7 +383,6 @@ pmap_bootstrap(
 	vm_offset_t	va;
 	int i;
 #endif
-
 	assert(IA32e);
 
 	vm_last_addr = VM_MAX_KERNEL_ADDRESS;	/* Set the highest address
@@ -534,12 +402,16 @@ pmap_bootstrap(
 	kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT);
 	kernel_pmap->pm_pml4 = IdlePML4;
 	kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4);
+	pmap_pcid_initialize_kernel(kernel_pmap);
 
+	
 
 	current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
 
 	nkpt = NKPT;
 	OSAddAtomic(NKPT,  &inuse_ptepages_count);
+	OSAddAtomic64(NKPT,  &alloc_ptepages_count);
+	bootstrap_wired_pages = NKPT;
 
 	virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail;
 	virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
@@ -590,8 +462,6 @@ pmap_bootstrap(
 		npvhash = NPVHASH;
 	}
 
-	printf("npvhash=%d\n", npvhash);
-
 	simple_lock_init(&kernel_pmap->lock, 0);
 	simple_lock_init(&pv_hashed_free_list_lock, 0);
 	simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
@@ -599,6 +469,14 @@ pmap_bootstrap(
 
 	pmap_cpu_init();
 
+	if (pmap_pcid_ncpus)
+		printf("PMAP: PCID enabled\n");
+
+	boot_args *args = (boot_args *)PE_state.bootArgs;
+	if (args->efiMode == kBootArgsEfiMode32) {
+		printf("EFI32: kernel virtual space limited to 4GB\n");
+		virtual_end = VM_MAX_KERNEL_ADDRESS_EFI32;
+	}
 	kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n",
 			(long)KERNEL_BASE, (long)virtual_end);
 	kprintf("Available physical space from 0x%llx to 0x%llx\n",
@@ -723,7 +601,6 @@ pmap_init(void)
 					pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
 				else if (pn >= lowest_hi && pn <= highest_hi)
 					pmap_phys_attributes[pn] |= PHYS_NOENCRYPT;
-
 			}
 		}
 	}
@@ -743,8 +620,20 @@ pmap_init(void)
 	pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
         zone_change(pmap_zone, Z_NOENCRYPT, TRUE);
 
+	pmap_anchor_zone = zinit(PAGE_SIZE, task_max, PAGE_SIZE, "pagetable anchors");
+	zone_change(pmap_anchor_zone, Z_NOENCRYPT, TRUE);
+
+#if	ZONE_DEBUG
+	/* The anchor is required to be page aligned. Zone debugging adds
+	 * padding which may violate that requirement. Disable it
+	 * to avoid assumptions.
+	 */
+	zone_debug_disable(pmap_anchor_zone);
+#endif	
+
 	s = (vm_size_t) sizeof(struct pv_hashed_entry);
-	pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
+	pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */,
+	    4096 * 3 /* LCM x86_64*/, "pv_list");
 	zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE);
 
 	/* create pv entries for kernel pages mapped by low level
@@ -752,7 +641,7 @@ pmap_init(void)
 	   e.g. kext pages from the middle of our addr space */
 
 	vaddr = (vm_map_offset_t) VM_MIN_KERNEL_ADDRESS;
-	for (ppn = 0; ppn < i386_btop(avail_start); ppn++) {
+	for (ppn = VM_MIN_KERNEL_PAGE; ppn < i386_btop(avail_start); ppn++) {
 		pv_rooted_entry_t pv_e;
 
 		pv_e = pai_to_pvh(ppn);
@@ -763,13 +652,6 @@ pmap_init(void)
 	}
 	pmap_initialized = TRUE;
 
-	/*
-	 *	Initialize pmap cache.
-	 */
-	pmap_cache_list = PMAP_NULL;
-	pmap_cache_count = 0;
-	simple_lock_init(&pmap_cache_lock, 0);
-
 	max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
 
 	/*
@@ -779,6 +661,210 @@ pmap_init(void)
 	pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT);
 }
 
+/*
+ * Called once VM is fully initialized so that we can release unused
+ * sections of low memory to the general pool.
+ * Also complete the set-up of identity-mapped sections of the kernel:
+ *  1) write-protect kernel text
+ *  2) map kernel text using large pages if possible
+ *  3) read and write-protect page zero (for K32)
+ *  4) map the global page at the appropriate virtual address.
+ *
+ * Use of large pages
+ * ------------------
+ * To effectively map and write-protect all kernel text pages, the text
+ * must be 2M-aligned at the base, and the data section above must also be
+ * 2M-aligned. That is, there's padding below and above. This is achieved
+ * through linker directives. Large pages are used only if this alignment
+ * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
+ * memory layout is:
+ * 
+ *                       :                :
+ *                       |     __DATA     |
+ *               sdata:  ==================  2Meg
+ *                       |                |
+ *                       |  zero-padding  |
+ *                       |                |
+ *               etext:  ------------------ 
+ *                       |                |
+ *                       :                :
+ *                       |                |
+ *                       |     __TEXT     |
+ *                       |                |
+ *                       :                :
+ *                       |                |
+ *               stext:  ==================  2Meg
+ *                       |                |
+ *                       |  zero-padding  |
+ *                       |                |
+ *               eHIB:   ------------------ 
+ *                       |     __HIB      |
+ *                       :                :
+ *
+ * Prior to changing the mapping from 4K to 2M, the zero-padding pages
+ * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
+ * 4K pages covering [stext,etext] are coalesced as 2M large pages.
+ * The now unused level-1 PTE pages are also freed.
+ */
+extern uint32_t pmap_reserved_ranges;
+void
+pmap_lowmem_finalize(void)
+{
+	spl_t           spl;
+	int		i;
+
+	/* Check the kernel is linked at the expected base address */
+	if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) !=
+	    I386_KERNEL_IMAGE_BASE_PAGE)
+		panic("pmap_lowmem_finalize() unexpected kernel base address");
+
+	/*
+	 * Update wired memory statistics for early boot pages
+	 */
+	PMAP_ZINFO_PALLOC(bootstrap_wired_pages * PAGE_SIZE);
+
+	/*
+	 * Free all pages in pmap regions below the base:
+	 * rdar://6332712
+	 *	We can't free all the pages to VM that EFI reports available.
+	 *	Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
+	 *	There's also a size miscalculation here: pend is one page less
+	 *	than it should be but this is not fixed to be backwards
+	 *	compatible.
+	 *	Due to this current EFI limitation, we take only the first
+	 *	entry in the memory region table. However, the loop is retained
+	 * 	(with the intended termination criteria commented out) in the
+	 *	hope that some day we can free all low-memory ranges.
+	 */
+	for (i = 0;
+//	     pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE;
+	     i < 1  && (pmap_reserved_ranges == 0);
+	     i++) {
+		vm_offset_t	pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base);
+		vm_offset_t	pend  = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end);
+//		vm_offset_t	pend  = i386_ptob(pmap_memory_regions[i].end+1);
+
+		DBG("ml_static_mfree(%p,%p) for pmap region %d\n",
+		    (void *) ml_static_ptovirt(pbase),
+		    (void *) (pend - pbase), i);
+		ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
+	}
+
+	/*
+	 * If text and data are both 2MB-aligned,
+	 * we can map text with large-pages,
+	 * unless the -kernel_text_ps_4K boot-arg overrides.
+	 */
+	if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
+		kprintf("Kernel text is 2MB aligned");
+		kernel_text_ps_4K = FALSE;
+		if (PE_parse_boot_argn("-kernel_text_ps_4K",
+				       &kernel_text_ps_4K,
+				       sizeof (kernel_text_ps_4K)))
+			kprintf(" but will be mapped with 4K pages\n");
+		else
+			kprintf(" and will be mapped with 2M pages\n");
+	}
+
+	(void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel));
+	if (wpkernel)
+		kprintf("Kernel text %p-%p to be write-protected\n",
+			(void *) stext, (void *) etext);
+
+	spl = splhigh();
+
+	/*
+	 * Scan over text if mappings are to be changed:
+	 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 
+ 	 * - Change to large-pages if possible and not overriden.
+	 */
+	if (kernel_text_ps_4K && wpkernel) {
+		vm_offset_t     myva;
+		for (myva = stext; myva < etext; myva += PAGE_SIZE) {
+			pt_entry_t     *ptep;
+
+			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+			if (ptep)
+				pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW);
+		}
+	}
+
+	if (!kernel_text_ps_4K) {
+		vm_offset_t     myva;
+
+		/*
+		 * Release zero-filled page padding used for 2M-alignment.
+		 */
+		DBG("ml_static_mfree(%p,%p) for padding below text\n",
+			(void *) eHIB, (void *) (stext - eHIB));
+		ml_static_mfree(eHIB, stext - eHIB);
+		DBG("ml_static_mfree(%p,%p) for padding above text\n",
+			(void *) etext, (void *) (sdata - etext));
+		ml_static_mfree(etext, sdata - etext);
+
+		/*
+		 * Coalesce text pages into large pages.
+		 */
+		for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
+			pt_entry_t	*ptep;
+			vm_offset_t	pte_phys;
+			pt_entry_t	*pdep;
+			pt_entry_t	pde;
+
+			pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
+			ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+			DBG("myva: %p pdep: %p ptep: %p\n",
+				(void *) myva, (void *) pdep, (void *) ptep);
+			if ((*ptep & INTEL_PTE_VALID) == 0)
+				continue;
+			pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
+			pde = *pdep & PTMASK;	/* page attributes from pde */
+			pde |= INTEL_PTE_PS;	/* make it a 2M entry */
+			pde |= pte_phys;	/* take page frame from pte */
+
+			if (wpkernel)
+				pde &= ~INTEL_PTE_RW;
+			DBG("pmap_store_pte(%p,0x%llx)\n",
+				(void *)pdep, pde);
+			pmap_store_pte(pdep, pde);
+
+			/*
+			 * Free the now-unused level-1 pte.
+			 * Note: ptep is a virtual address to the pte in the
+			 *   recursive map. We can't use this address to free
+			 *   the page. Instead we need to compute its address
+			 *   in the Idle PTEs in "low memory".
+			 */
+			vm_offset_t vm_ptep = (vm_offset_t) KPTphys
+						+ (pte_phys >> PTPGSHIFT);
+			DBG("ml_static_mfree(%p,0x%x) for pte\n",
+				(void *) vm_ptep, PAGE_SIZE);
+			ml_static_mfree(vm_ptep, PAGE_SIZE);
+		}
+
+		/* Change variable read by sysctl machdep.pmap */
+		pmap_kernel_text_ps = I386_LPGBYTES;
+	}
+
+	/* map lowmem global page into fixed addr */
+	pt_entry_t *pte = NULL;
+	if (0 == (pte = pmap_pte(kernel_pmap,
+				 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000)))
+		panic("lowmem pte");
+	/* make sure it is defined on page boundary */
+	assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
+	pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
+				| INTEL_PTE_REF
+				| INTEL_PTE_MOD
+				| INTEL_PTE_WIRED
+				| INTEL_PTE_VALID
+				| INTEL_PTE_RW);
+	splx(spl);
+	if (pmap_pcid_ncpus)
+		tlb_flush_global();
+	else
+		flush_tlb_raw();
+}
 
 /*
  * this function is only used for debugging fron the vm layer
@@ -885,7 +971,8 @@ pmap_create(
 	p = (pmap_t) zalloc(pmap_zone);
 	if (PMAP_NULL == p)
 		panic("pmap_create zalloc");
-
+	/* Zero all fields */
+	bzero(p, sizeof(*p));
 	/* init counts now since we'll be bumping some */
 	simple_lock_init(&p->lock, 0);
 	p->stats.resident_count = 0;
@@ -896,15 +983,15 @@ pmap_create(
 	p->pm_shared = FALSE;
 
 	p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
+	if (pmap_pcid_ncpus)
+		pmap_pcid_initialize(p);
+	p->pm_pml4 = zalloc(pmap_anchor_zone);
 
-        /* alloc the pml4 page in kernel vm */
-        if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_pml4), PAGE_SIZE))
-	        panic("pmap_create kmem_alloc_kobject pml4");
+	pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0);
 
-        memset((char *)p->pm_pml4, 0, PAGE_SIZE);
-	p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
+	memset((char *)p->pm_pml4, 0, PAGE_SIZE);
 
-	OSAddAtomic(1,  &inuse_ptepages_count);
+	p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4);
 
 	/* allocate the vm_objs to hold the pdpt, pde and pte pages */
 
@@ -920,7 +1007,7 @@ pmap_create(
 	if (NULL == p->pm_obj)
 		panic("pmap_create pte obj");
 
-	/* All pmaps share the kennel's pml4 */
+	/* All pmaps share the kernel's pml4 */
 	pml4 = pmap64_pml4(p, 0ULL);
 	kpml4 = kernel_pmap->pm_pml4;
 	pml4[KERNEL_PML4_INDEX]    = kpml4[KERNEL_PML4_INDEX];
@@ -940,10 +1027,9 @@ pmap_create(
  */
 
 void
-pmap_destroy(
-	register pmap_t	p)
+pmap_destroy(pmap_t	p)
 {
-	register int		c;
+	int		c;
 
 	if (p == PMAP_NULL)
 		return;
@@ -955,6 +1041,8 @@ pmap_destroy(
 
 	c = --p->ref_count;
 
+	pmap_assert((current_thread() && (current_thread()->map)) ? (current_thread()->map->pmap != p) : TRUE);
+
 	if (c == 0) {
 		/* 
 		 * If some cpu is not using the physical pmap pointer that it
@@ -964,12 +1052,14 @@ pmap_destroy(
 		 */
 		PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL);
 	}
-
+	if (pmap_pcid_ncpus)
+		pmap_destroy_pcid_sync(p);
 	PMAP_UNLOCK(p);
 
 	if (c != 0) {
 		PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
 			   p, 1, 0, 0, 0);
+		pmap_assert(p == kernel_pmap);
 	        return;	/* still in use */
 	}
 
@@ -979,8 +1069,7 @@ pmap_destroy(
 	 */
 	int inuse_ptepages = 0;
 
-	inuse_ptepages++;
-	kmem_free(kernel_map, (vm_offset_t)p->pm_pml4, PAGE_SIZE);
+	zfree(pmap_anchor_zone, p->pm_pml4);
 
 	inuse_ptepages += p->pm_obj_pml4->resident_page_count;
 	vm_object_deallocate(p->pm_obj_pml4);
@@ -992,6 +1081,7 @@ pmap_destroy(
 	vm_object_deallocate(p->pm_obj);
 
 	OSAddAtomic(-inuse_ptepages,  &inuse_ptepages_count);
+	PMAP_ZINFO_PFREE(inuse_ptepages * PAGE_SIZE);
 
 	zfree(pmap_zone, p);
 
@@ -1028,22 +1118,6 @@ pmap_remove_some_phys(
 }
 
 
-/*
- *	Routine:
- *		pmap_disconnect
- *
- *	Function:
- *		Disconnect all mappings for this page and return reference and change status
- *		in generic format.
- *
- */
-unsigned int pmap_disconnect(
-	ppnum_t pa)
-{
-	pmap_page_protect(pa, 0);		/* disconnect the page */
-	return (pmap_get_refmod(pa));		/* return ref/chg status */
-}
-
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
@@ -1160,44 +1234,6 @@ pmap_map_block(
 	}
 }
 
-/*
- *	Routine:	pmap_change_wiring
- *	Function:	Change the wiring attribute for a map/virtual-address
- *			pair.
- *	In/out conditions:
- *			The mapping must already exist in the pmap.
- */
-void
-pmap_change_wiring(
-	pmap_t		map,
-	vm_map_offset_t	vaddr,
-	boolean_t	wired)
-{
-	pt_entry_t	*pte;
-
-	PMAP_LOCK(map);
-
-	if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
-		panic("pmap_change_wiring: pte missing");
-
-	if (wired && !iswired(*pte)) {
-		/*
-		 * wiring down mapping
-		 */
-		OSAddAtomic(+1,  &map->stats.wired_count);
-		pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
-	}
-	else if (!wired && iswired(*pte)) {
-		/*
-		 * unwiring mapping
-		 */
-		assert(map->stats.wired_count >= 1);
-		OSAddAtomic(-1,  &map->stats.wired_count);
-		pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
-	}
-
-	PMAP_UNLOCK(map);
-}
 
 void
 pmap_expand_pml4(
@@ -1236,6 +1272,8 @@ pmap_expand_pml4(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj_pml4);
@@ -1251,6 +1289,7 @@ pmap_expand_pml4(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 
@@ -1319,6 +1358,8 @@ pmap_expand_pdpt(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj_pdpt);
@@ -1334,6 +1375,7 @@ pmap_expand_pdpt(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 
@@ -1430,6 +1472,8 @@ pmap_expand(
 	vm_page_unlock_queues();
 
 	OSAddAtomic(1,  &inuse_ptepages_count);
+	OSAddAtomic64(1,  &alloc_ptepages_count);
+	PMAP_ZINFO_PALLOC(PAGE_SIZE);
 
 	/* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 	vm_object_lock(map->pm_obj);
@@ -1446,6 +1490,7 @@ pmap_expand(
 		VM_PAGE_FREE(m);
 
 		OSAddAtomic(-1,  &inuse_ptepages_count);
+		PMAP_ZINFO_PFREE(PAGE_SIZE);
 		return;
 	}
 
@@ -1478,7 +1523,8 @@ pmap_expand(
  * that pmap_steal_memory uses, rather than calling vm_page_grab (which
  * isn't available yet). */
 void
-pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) {
+pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr)
+{
 	ppnum_t pn;
 	pt_entry_t		*pte;
 
@@ -1645,11 +1691,12 @@ pmap_collect(
 			if (m == VM_PAGE_NULL)
 			    panic("pmap_collect: pte page not in object");
 
+			vm_object_unlock(p->pm_obj);
+
 			VM_PAGE_FREE(m);
 
 			OSAddAtomic(-1,  &inuse_ptepages_count);
-
-			vm_object_unlock(p->pm_obj);
+			PMAP_ZINFO_PFREE(PAGE_SIZE);
 		    }
 
 		    PMAP_LOCK(p);
@@ -1701,301 +1748,6 @@ pmap_pageable(
 #endif	/* lint */
 }
 
-/*
- *	Clear specified attribute bits.
- */
-void
-phys_attribute_clear(
-	ppnum_t		pn,
-	int		bits)
-{
-	pv_rooted_entry_t	pv_h;
-	pv_hashed_entry_t	pv_e;
-	pt_entry_t		*pte;
-	int			pai;
-	pmap_t			pmap;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return;
-
-	pai = ppn_to_pai(pn);
-
-	if (!IS_MANAGED_PAGE(pai)) {
-		/*
-		 *	Not a managed page.
-		 */
-		return;
-	}
-
-
-	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
-		   pn, bits, 0, 0, 0);
-
-	pv_h = pai_to_pvh(pai);
-
-	LOCK_PVH(pai);
-
-	/*
-	 * Walk down PV list, clearing all modify or reference bits.
-	 * We do not have to lock the pv_list because we have
-	 * the entire pmap system locked.
-	 */
-	if (pv_h->pmap != PMAP_NULL) {
-		/*
-		 * There are some mappings.
-		 */
-
-		pv_e = (pv_hashed_entry_t)pv_h;
-
-		do {
-			vm_map_offset_t	va;
-
-			pmap = pv_e->pmap;
-			va = pv_e->va;
-
-			 /*
-			  * Clear modify and/or reference bits.
-			  */
-			pte = pmap_pte(pmap, va);
-			pmap_update_pte(pte, *pte, (*pte & ~bits));
-			/* Ensure all processors using this translation
-			 * invalidate this TLB entry. The invalidation *must*
-			 * follow the PTE update, to ensure that the TLB
-			 * shadow of the 'D' bit (in particular) is
-			 * synchronized with the updated PTE.
-			 */
-			PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
-
-			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
-
-		} while (pv_e != (pv_hashed_entry_t)pv_h);
-	}
-	pmap_phys_attributes[pai] &= ~bits;
-
-	UNLOCK_PVH(pai);
-
-	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
-		   0, 0, 0, 0, 0);
-}
-
-/*
- *	Check specified attribute bits.
- */
-int
-phys_attribute_test(
-	ppnum_t		pn,
-	int		bits)
-{
-	pv_rooted_entry_t	pv_h;
-	pv_hashed_entry_t	pv_e;
-	pt_entry_t		*pte;
-	int			pai;
-	pmap_t			pmap;
-	int			attributes = 0;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return 0;
-
-	pai = ppn_to_pai(pn);
-
-	if (!IS_MANAGED_PAGE(pai)) {
-		/*
-		 *	Not a managed page.
-		 */
-		return 0;
-	}
-
-	/*
-	 * super fast check...  if bits already collected
-	 * no need to take any locks...
-	 * if not set, we need to recheck after taking
-	 * the lock in case they got pulled in while
-	 * we were waiting for the lock
-	 */
-	if ((pmap_phys_attributes[pai] & bits) == bits)
-		return bits;
-
-	pv_h = pai_to_pvh(pai);
-
-	LOCK_PVH(pai);
-
-	attributes = pmap_phys_attributes[pai] & bits;
-
-
-	/*
-	 * Walk down PV list, checking the mappings until we
-	 * reach the end or we've found the attributes we've asked for
-	 * We do not have to lock the pv_list because we have
-	 * the entire pmap system locked.
-	 */
-	if (attributes != bits &&
-	    pv_h->pmap != PMAP_NULL) {
-		/*
-		 * There are some mappings.
-		 */
-		pv_e = (pv_hashed_entry_t)pv_h;
-		do {
-			vm_map_offset_t va;
-
-			pmap = pv_e->pmap;
-			va = pv_e->va;
-			/*
-			 * first make sure any processor actively
-			 * using this pmap, flushes its TLB state
-			 */
-			PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
-
-			/*
-	 		 * pick up modify and/or reference bits from mapping
-			 */
-
-			pte = pmap_pte(pmap, va);
-			attributes |= (int)(*pte & bits);
-
-			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
-
-		} while ((attributes != bits) &&
-			 (pv_e != (pv_hashed_entry_t)pv_h));
-	}
-
-	UNLOCK_PVH(pai);
-	return (attributes);
-}
-
-/*
- *	Set specified attribute bits.
- */
-void
-phys_attribute_set(
-	ppnum_t		pn,
-	int		bits)
-{
-	int		pai;
-
-	pmap_intr_assert();
-	assert(pn != vm_page_fictitious_addr);
-	if (pn == vm_page_guard_addr)
-		return;
-
-	pai = ppn_to_pai(pn);
-
-	if (!IS_MANAGED_PAGE(pai)) {
-		/* Not a managed page.  */
-		return;
-	}
-
-	LOCK_PVH(pai);
-	pmap_phys_attributes[pai] |= bits;
-	UNLOCK_PVH(pai);
-}
-
-/*
- *	Set the modify bit on the specified physical page.
- */
-
-void
-pmap_set_modify(ppnum_t pn)
-{
-	phys_attribute_set(pn, PHYS_MODIFIED);
-}
-
-/*
- *	Clear the modify bits on the specified physical page.
- */
-
-void
-pmap_clear_modify(ppnum_t pn)
-{
-	phys_attribute_clear(pn, PHYS_MODIFIED);
-}
-
-/*
- *	pmap_is_modified:
- *
- *	Return whether or not the specified physical page is modified
- *	by any physical maps.
- */
-
-boolean_t
-pmap_is_modified(ppnum_t pn)
-{
-	if (phys_attribute_test(pn, PHYS_MODIFIED))
-		return TRUE;
-	return FALSE;
-}
-
-/*
- *	pmap_clear_reference:
- *
- *	Clear the reference bit on the specified physical page.
- */
-
-void
-pmap_clear_reference(ppnum_t pn)
-{
-	phys_attribute_clear(pn, PHYS_REFERENCED);
-}
-
-void
-pmap_set_reference(ppnum_t pn)
-{
-	phys_attribute_set(pn, PHYS_REFERENCED);
-}
-
-/*
- *	pmap_is_referenced:
- *
- *	Return whether or not the specified physical page is referenced
- *	by any physical maps.
- */
-
-boolean_t
-pmap_is_referenced(ppnum_t pn)
-{
-        if (phys_attribute_test(pn, PHYS_REFERENCED))
-		return TRUE;
-	return FALSE;
-}
-
-/*
- * pmap_get_refmod(phys)
- *  returns the referenced and modified bits of the specified
- *  physical page.
- */
-unsigned int
-pmap_get_refmod(ppnum_t pn)
-{
-        int		refmod;
-	unsigned int	retval = 0;
-
-	refmod = phys_attribute_test(pn, PHYS_MODIFIED | PHYS_REFERENCED);
-
-	if (refmod & PHYS_MODIFIED)
-	        retval |= VM_MEM_MODIFIED;
-	if (refmod & PHYS_REFERENCED)
-	        retval |= VM_MEM_REFERENCED;
-
-	return (retval);
-}
-
-/*
- * pmap_clear_refmod(phys, mask)
- *  clears the referenced and modified bits as specified by the mask
- *  of the specified physical page.
- */
-void
-pmap_clear_refmod(ppnum_t pn, unsigned int mask)
-{
-	unsigned int  x86Mask;
-
-	x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
-	            | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
-	phys_attribute_clear(pn, x86Mask);
-}
 
 void 
 invalidate_icache(__unused vm_offset_t	addr,
@@ -2023,10 +1775,13 @@ extern kern_return_t dtrace_copyio_postflight(addr64_t);
 kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
 {
 	thread_t thread = current_thread();
+	uint64_t ccr3;
 
 	if (current_map() == kernel_map)
 		return KERN_FAILURE;
-	else if (get_cr3() != thread->map->pmap->pm_cr3)
+	else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE))
+		return KERN_FAILURE;
+	else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3))
 		return KERN_FAILURE;
 	else if (thread->machine.specFlags & CopyIOActive)
 		return KERN_FAILURE;
@@ -2090,6 +1845,8 @@ phys_page_exists(ppnum_t pn)
 	return TRUE;
 }
 
+
+
 void
 pmap_switch(pmap_t tpmap)
 {
@@ -2111,6 +1868,12 @@ pmap_disable_NX(pmap_t pmap)
         pmap->nx_enabled = 0;
 }
 
+void 
+pt_fake_zone_init(int zone_index)
+{
+	pt_fake_zone_index = zone_index;
+}
+
 void
 pt_fake_zone_info(
 	int		*count,
@@ -2118,8 +1881,10 @@ pt_fake_zone_info(
 	vm_size_t	*max_size,
 	vm_size_t	*elem_size,
 	vm_size_t	*alloc_size,
+	uint64_t	*sum_size,
 	int		*collectable,
-	int		*exhaustable)
+	int		*exhaustable,
+	int		*caller_acct)
 {
         *count      = inuse_ptepages_count;
 	*cur_size   = PAGE_SIZE * inuse_ptepages_count;
@@ -2129,13 +1894,13 @@ pt_fake_zone_info(
 				   vm_page_free_count);
 	*elem_size  = PAGE_SIZE;
 	*alloc_size = PAGE_SIZE;
+	*sum_size = alloc_ptepages_count * PAGE_SIZE;
 
 	*collectable = 1;
 	*exhaustable = 0;
+	*caller_acct = 1;
 }
 
-extern 	long	NMIPI_acks;
-
 static inline void
 pmap_cpuset_NMIPI(cpu_set cpu_mask) {
 	unsigned int cpu, cpu_bit;
@@ -2159,8 +1924,9 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) {
  *  - flush the local tlb if active for this pmap
  *  - return ... the caller will unlock the pmap
  */
+
 void
-pmap_flush_tlbs(pmap_t	pmap)
+pmap_flush_tlbs(pmap_t	pmap, vm_map_offset_t startv, vm_map_offset_t endv)
 {
 	unsigned int	cpu;
 	unsigned int	cpu_bit;
@@ -2169,6 +1935,7 @@ pmap_flush_tlbs(pmap_t	pmap)
 	pmap_paddr_t	pmap_cr3 = pmap->pm_cr3;
 	boolean_t	flush_self = FALSE;
 	uint64_t	deadline;
+	boolean_t	pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
 
 	assert((processor_avail_count < 2) ||
 	       (ml_get_interrupts_enabled() && get_preemption_level() != 0));
@@ -2179,6 +1946,12 @@ pmap_flush_tlbs(pmap_t	pmap)
 	 * don't signal -- they'll check as they go busy.
 	 */
 	cpus_to_signal = 0;
+
+	if (pmap_pcid_ncpus) {
+		pmap_pcid_invalidate_all_cpus(pmap);
+		__asm__ volatile("mfence":::"memory");
+	}
+
 	for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
 		if (!cpu_datap(cpu)->cpu_running)
 			continue;
@@ -2187,14 +1960,16 @@ pmap_flush_tlbs(pmap_t	pmap)
 
 		if ((pmap_cr3 == cpu_task_cr3) ||
 		    (pmap_cr3 == cpu_active_cr3) ||
-		    (pmap->pm_shared) ||
-		    (pmap == kernel_pmap)) {
+		    (pmap_is_shared)) {
 			if (cpu == my_cpu) {
 				flush_self = TRUE;
 				continue;
 			}
-			cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
-			__asm__ volatile("mfence");
+			if (pmap_pcid_ncpus && pmap_is_shared)
+				cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE;
+			else
+				cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE;
+			__asm__ volatile("mfence":::"memory");
 
 			/*
 			 * We don't need to signal processors which will flush
@@ -2220,15 +1995,24 @@ pmap_flush_tlbs(pmap_t	pmap)
 		}
 	}
 
-	PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
-		   pmap, cpus_to_signal, flush_self, 0, 0);
+	PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
+		   pmap, cpus_to_signal, flush_self, startv, endv);
 
 	/*
 	 * Flush local tlb if required.
 	 * Do this now to overlap with other processors responding.
 	 */
-	if (flush_self)
-		flush_tlb();
+	if (flush_self) {
+		if (pmap_pcid_ncpus) {
+			pmap_pcid_validate_cpu(pmap, my_cpu);
+			if (pmap_is_shared)
+				tlb_flush_global();
+			else
+				flush_tlb_raw();
+		}
+		else
+			flush_tlb_raw();
+	}
 
 	if (cpus_to_signal) {
 		cpu_set	cpus_to_respond = cpus_to_signal;
@@ -2241,6 +2025,9 @@ pmap_flush_tlbs(pmap_t	pmap)
 			long orig_acks = 0;
 
 			for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+				/* Consider checking local/global invalidity
+				 * as appropriate in the PCID case.
+				 */
 				if ((cpus_to_respond & cpu_bit) != 0) {
 					if (!cpu_datap(cpu)->cpu_running ||
 					    cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
@@ -2252,7 +2039,7 @@ pmap_flush_tlbs(pmap_t	pmap)
 				if (cpus_to_respond == 0)
 					break;
 			}
-			if (mach_absolute_time() > deadline) {
+			if (cpus_to_respond && (mach_absolute_time() > deadline)) {
 				if (machine_timeout_suspended())
 					continue;
 				pmap_tlb_flush_timeout = TRUE;
@@ -2266,18 +2053,31 @@ pmap_flush_tlbs(pmap_t	pmap)
 		}
 	}
 
-	PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
-		   pmap, cpus_to_signal, flush_self, 0, 0);
+	PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
+	    pmap, cpus_to_signal, startv, endv, 0);
 }
 
 void
 process_pmap_updates(void)
 {
-	assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
-
-	flush_tlb();
+	int ccpu = cpu_number();
+	pmap_assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+	if (pmap_pcid_ncpus) {
+		pmap_pcid_validate_current();
+		if (cpu_datap(ccpu)->cpu_tlb_invalid_global) {
+			cpu_datap(ccpu)->cpu_tlb_invalid = FALSE;
+			tlb_flush_global();
+		}
+		else {
+			cpu_datap(ccpu)->cpu_tlb_invalid_local = FALSE;
+			flush_tlb_raw();
+		}
+	}
+	else {
+		current_cpu_datap()->cpu_tlb_invalid = FALSE;
+		flush_tlb_raw();
+	}
 
-	current_cpu_datap()->cpu_tlb_invalid = FALSE;
 	__asm__ volatile("mfence");
 }
 
@@ -2292,13 +2092,3 @@ pmap_update_interrupt(void)
         PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
 		   0, 0, 0, 0, 0);
 }
-
-
-unsigned int
-pmap_cache_attributes(ppnum_t pn)
-{
-	return IS_MANAGED_PAGE(ppn_to_pai(pn)) ? VM_WIMG_COPYBACK
-					       : VM_WIMG_IO;
-}
-
-
diff --git a/osfmk/x86_64/pmap_pcid.c b/osfmk/x86_64/pmap_pcid.c
new file mode 100644
index 000000000..c8fef93b4
--- /dev/null
+++ b/osfmk/x86_64/pmap_pcid.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <i386/proc_reg.h>
+#include <i386/cpuid.h>
+#include <i386/tsc.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <i386/pmap_internal.h>
+#include <i386/pmap_pcid.h>
+#include <mach/branch_predicates.h>
+
+/*
+ * PCID (Process context identifier) aka tagged TLB support.
+ * On processors with this feature, unless disabled via the -pmap_pcid_disable
+ * boot-arg, the following algorithm is in effect:
+ * Each processor maintains an array of tag refcounts indexed by tag.
+ * Each address space maintains an array of tags indexed by CPU number.
+ * Each address space maintains a coherency vector, indexed by CPU
+ * indicating that the TLB state for that address space has a pending
+ * invalidation.
+ * On a context switch, a refcounted tag is lazily assigned to the newly
+ * dispatched (CPU, address space) tuple.
+ * When an inactive address space is invalidated on a remote CPU, it is marked
+ * for invalidation upon the next dispatch. Some invalidations are
+ * also processed at the user/kernel boundary.
+ * Provisions are made for the case where a CPU is overcommmitted, i.e.
+ * more active address spaces exist than the number of logical tags
+ * provided for by the processor architecture (currently 4096).
+ * The algorithm assumes the processor remaps the logical tags
+ * to physical TLB context IDs in an LRU fashion for efficiency. (DRK '10)
+ */
+
+uint32_t	pmap_pcid_ncpus;
+boolean_t 	pmap_pcid_disabled = FALSE;
+
+void	pmap_pcid_configure(void) {
+	int ccpu = cpu_number();
+	uintptr_t cr4 = get_cr4();
+	boolean_t pcid_present = FALSE;
+
+	pmap_pcid_log("PCID configure invoked on CPU %d\n", ccpu);
+	pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
+	pmap_assert(cpu_mode_is64bit());
+
+	if (PE_parse_boot_argn("-pmap_pcid_disable", &pmap_pcid_disabled, sizeof (pmap_pcid_disabled))) {
+		pmap_pcid_log("PMAP: PCID feature disabled\n");
+		printf("PMAP: PCID feature disabled, %u\n", pmap_pcid_disabled);
+		kprintf("PMAP: PCID feature disabled %u\n", pmap_pcid_disabled);
+	}
+	 /* no_shared_cr3+PCID is currently unsupported */
+#if	DEBUG
+	if (pmap_pcid_disabled == FALSE)
+		no_shared_cr3 = FALSE;
+	else
+		no_shared_cr3 = TRUE;
+#else
+	if (no_shared_cr3)
+		pmap_pcid_disabled = TRUE;
+#endif
+	if (pmap_pcid_disabled || no_shared_cr3) {
+		unsigned i;
+		/* Reset PCID status, as we may have picked up
+		 * strays if discovered prior to platform
+		 * expert initialization.
+		 */
+		for (i = 0; i < real_ncpus; i++) {
+			if (cpu_datap(i)) {
+				cpu_datap(i)->cpu_pmap_pcid_enabled = FALSE;
+			}
+			pmap_pcid_ncpus = 0;
+		}
+		cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
+		return;
+	}
+	/* DRKTODO: assert if features haven't been discovered yet. Redundant
+	 * invocation of cpu_mode_init and descendants masks this for now.
+	 */
+	if ((cpuid_features() & CPUID_FEATURE_PCID))
+		pcid_present = TRUE;
+	else {
+		cpu_datap(ccpu)->cpu_pmap_pcid_enabled = FALSE;
+		pmap_pcid_log("PMAP: PCID not detected CPU %d\n", ccpu);
+		return;
+	}
+	if ((cr4 & (CR4_PCIDE | CR4_PGE)) == (CR4_PCIDE|CR4_PGE)) {
+		cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
+		pmap_pcid_log("PMAP: PCID already enabled %d\n", ccpu);
+		return;
+	}
+	if (pcid_present == TRUE) {
+		pmap_pcid_log("Pre-PCID:CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, cr4);
+
+		if (cpu_number() >= PMAP_PCID_MAX_CPUS) {
+			panic("PMAP_PCID_MAX_CPUS %d\n", cpu_number());
+		}
+		if ((get_cr4() & CR4_PGE) == 0) {
+			set_cr4(get_cr4() | CR4_PGE);
+			pmap_pcid_log("Toggled PGE ON (CPU: %d\n", ccpu);
+		}
+		set_cr4(get_cr4() | CR4_PCIDE);
+		pmap_pcid_log("Post PCID: CR0: 0x%lx, CR3: 0x%lx, CR4(CPU %d): 0x%lx\n", get_cr0(), get_cr3_raw(), ccpu, get_cr4());
+		tlb_flush_global();
+		cpu_datap(ccpu)->cpu_pmap_pcid_enabled = TRUE;
+
+		if (OSIncrementAtomic(&pmap_pcid_ncpus) == machine_info.max_cpus) {
+			pmap_pcid_log("All PCIDs enabled: real_ncpus: %d, pmap_pcid_ncpus: %d\n", real_ncpus, pmap_pcid_ncpus);
+		}
+		cpu_datap(ccpu)->cpu_pmap_pcid_coherentp =
+		    cpu_datap(ccpu)->cpu_pmap_pcid_coherentp_kernel =
+		    &(kernel_pmap->pmap_pcid_coherency_vector[ccpu]);
+		cpu_datap(ccpu)->cpu_pcid_refcounts[0] = 1;
+	}
+}
+
+void pmap_pcid_initialize(pmap_t p) {
+	unsigned i;
+	unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t);
+
+	pmap_assert(nc >= real_ncpus);
+	for (i = 0; i < nc; i++) {
+		p->pmap_pcid_cpus[i] = PMAP_PCID_INVALID_PCID;
+		/* We assume here that the coherency vector is zeroed by
+		 * pmap_create
+		 */
+	}
+}
+
+void pmap_pcid_initialize_kernel(pmap_t p) {
+	unsigned i;
+	unsigned nc = sizeof(p->pmap_pcid_cpus)/sizeof(pcid_t);
+
+	for (i = 0; i < nc; i++) {
+		p->pmap_pcid_cpus[i] = 0;
+		/* We assume here that the coherency vector is zeroed by
+		 * pmap_create
+		 */
+	}
+}
+
+pcid_t	pmap_pcid_allocate_pcid(int ccpu) {
+	int i;
+	pcid_ref_t 	cur_min = 0xFF;
+	uint32_t	cur_min_index = ~1;
+	pcid_ref_t	*cpu_pcid_refcounts = &cpu_datap(ccpu)->cpu_pcid_refcounts[0];
+	pcid_ref_t	old_count;
+
+	if ((i = cpu_datap(ccpu)->cpu_pcid_free_hint) != 0) {
+		if (cpu_pcid_refcounts[i] == 0) {
+			(void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
+			cpu_datap(ccpu)->cpu_pcid_free_hint = 0;
+			return i;
+		}
+	}
+	/* Linear scan to discover free slot, with hint. Room for optimization
+	 * but with intelligent prefetchers this should be
+	 * adequately performant, as it is invoked
+	 * only on first dispatch of a new address space onto
+	 * a given processor. DRKTODO: use larger loads and
+	 * zero byte discovery -- any pattern != ~1 should
+	 * signify a free slot.
+	 */
+	for (i = PMAP_PCID_MIN_PCID; i < PMAP_PCID_MAX_PCID; i++) {
+		pcid_ref_t cur_refcount = cpu_pcid_refcounts[i];
+
+		pmap_assert(cur_refcount < PMAP_PCID_MAX_REFCOUNT);
+
+		if (cur_refcount == 0) {
+			(void)__sync_fetch_and_add(&cpu_pcid_refcounts[i], 1);
+			return i;
+		}
+		else {
+			if (cur_refcount < cur_min) {
+				cur_min_index = i;
+				cur_min = cur_refcount;
+			}
+		}
+	}
+	pmap_assert(cur_min_index > 0 && cur_min_index < PMAP_PCID_MAX_PCID);
+	/* Consider "rebalancing" tags actively in highly oversubscribed cases
+	 * perhaps selecting tags with lower activity.
+	 */
+
+	old_count = __sync_fetch_and_add(&cpu_pcid_refcounts[cur_min_index], 1);
+	pmap_assert(old_count < PMAP_PCID_MAX_REFCOUNT);
+	return cur_min_index;
+}
+
+void	pmap_pcid_deallocate_pcid(int ccpu, pmap_t tpmap) {
+	pcid_t pcid;
+	pmap_t lp;
+	pcid_ref_t prior_count;
+
+	pcid = tpmap->pmap_pcid_cpus[ccpu];
+	pmap_assert(pcid != PMAP_PCID_INVALID_PCID);
+	if (pcid == PMAP_PCID_INVALID_PCID)
+		return;
+
+	lp = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid];
+	pmap_assert(pcid > 0 && pcid < PMAP_PCID_MAX_PCID);
+	pmap_assert(cpu_datap(ccpu)->cpu_pcid_refcounts[pcid] >= 1);
+
+	if (lp == tpmap)
+		(void)__sync_bool_compare_and_swap(&cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[pcid], tpmap, PMAP_INVALID);
+
+	if ((prior_count = __sync_fetch_and_sub(&cpu_datap(ccpu)->cpu_pcid_refcounts[pcid], 1)) == 1) {
+		    cpu_datap(ccpu)->cpu_pcid_free_hint = pcid;
+	}
+	pmap_assert(prior_count <= PMAP_PCID_MAX_REFCOUNT);
+}
+
+void	pmap_destroy_pcid_sync(pmap_t p) {
+	int i;
+	pmap_assert(ml_get_interrupts_enabled() == FALSE || get_preemption_level() !=0);
+	for (i = 0; i < PMAP_PCID_MAX_CPUS; i++)
+		if (p->pmap_pcid_cpus[i] != PMAP_PCID_INVALID_PCID)
+			pmap_pcid_deallocate_pcid(i, p);
+}
+
+pcid_t	pcid_for_pmap_cpu_tuple(pmap_t pmap, int ccpu) {
+	return pmap->pmap_pcid_cpus[ccpu];
+}
+#if PMAP_ASSERT
+#define PCID_RECORD_SIZE 128
+uint64_t pcid_record_array[PCID_RECORD_SIZE];
+#endif
+
+void	pmap_pcid_activate(pmap_t tpmap, int ccpu) {
+	pcid_t		new_pcid = tpmap->pmap_pcid_cpus[ccpu];
+	pmap_t		last_pmap;
+	boolean_t	pcid_conflict = FALSE, pending_flush = FALSE;
+
+	pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled);
+	if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) {
+		new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu);
+	}
+	pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID);
+#ifdef	PCID_ASSERT	
+	cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid;
+#endif
+	cpu_datap(ccpu)->cpu_active_pcid = new_pcid;
+
+	pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
+	if (__probable(pending_flush == FALSE)) {
+		last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid];
+		pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap));
+	}
+	if (__improbable(pending_flush || pcid_conflict)) {
+		pmap_pcid_validate_cpu(tpmap, ccpu);
+	}
+	/* Consider making this a unique id */
+	cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap;
+
+	pmap_assert(new_pcid < PMAP_PCID_MAX_PCID);
+	pmap_assert(((tpmap ==  kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0)));
+#if	PMAP_ASSERT
+	pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63);
+	pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL);
+	/* Diagnostic to detect pagetable anchor corruption */
+	if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX])
+		__asm__ volatile("int3");
+#endif	/* PMAP_ASSERT */
+	set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict));
+
+	if (!pending_flush) {
+		/* We did not previously observe a pending invalidation for this
+		 * ASID. However, the load from the coherency vector
+		 * could've been reordered ahead of the store to the
+		 * active_cr3 field (in the context switch path, our
+		 * caller). Re-consult the pending invalidation vector
+		 * after the CR3 write. We rely on MOV CR3's documented
+		 * serializing property to avoid insertion of an expensive
+		 * barrier. (DRK)
+		 */
+		pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
+		if (__improbable(pending_flush != 0)) {
+			pmap_pcid_validate_cpu(tpmap, ccpu);
+			set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE);
+		}
+	}
+	cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]);
+#if	DEBUG	
+	KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0);
+#endif
+}
diff --git a/osfmk/x86_64/start.s b/osfmk/x86_64/start.s
index fd0b8491a..8ca246de3 100644
--- a/osfmk/x86_64/start.s
+++ b/osfmk/x86_64/start.s
@@ -84,7 +84,7 @@ EXT(low_intstack):
 	.globl  EXT(gIOHibernateRestoreStack)
 EXT(gIOHibernateRestoreStack):
 
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 
 	.globl	EXT(low_eintstack)
 EXT(low_eintstack:)
@@ -101,7 +101,7 @@ EXT(gIOHibernateRestoreStackEnd):
 	.align	12
 	.globl	EXT(df_task_stack)
 EXT(df_task_stack):
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 	.globl	EXT(df_task_stack_end)
 EXT(df_task_stack_end):
 
@@ -112,37 +112,10 @@ EXT(df_task_stack_end):
 	.align	12
 	.globl	EXT(mc_task_stack)
 EXT(mc_task_stack):
-	.set	., .+INTSTACK_SIZE
+	.space	INTSTACK_SIZE
 	.globl	EXT(mc_task_stack_end)
 EXT(mc_task_stack_end):
 
-
-#if	MACH_KDB
-/*
- * Kernel debugger stack for each processor.
- */
-	.align	12
-	.globl	EXT(db_stack_store)
-EXT(db_stack_store):
-	.set	., .+(INTSTACK_SIZE*MAX_CPUS)
-
-/*
- * Stack for last-ditch debugger task for each processor.
- */
-	.align	12
-	.globl	EXT(db_task_stack_store)
-EXT(db_task_stack_store):
-	.set	., .+(INTSTACK_SIZE*MAX_CPUS)
-
-/*
- * per-processor kernel debugger stacks
- */
-	.align  ALIGN
-	.globl  EXT(kgdb_stack_store)
-EXT(kgdb_stack_store):
-	.set    ., .+(INTSTACK_SIZE*MAX_CPUS)
-#endif	/* MACH_KDB */
-
 /*
  * BSP CPU start here.
  *	eax points to kernbootstruct
@@ -176,7 +149,6 @@ EXT(kgdb_stack_store):
  * This proves that Little Endian is superior to Big Endian.
  */
 	
-
 	.text
 	.align	ALIGN
 	.globl	EXT(_start)
@@ -222,14 +194,6 @@ LEXT(_pstart)
 	movl	$EXT(protected_mode_gdtr), %eax
 	lgdtl	(%eax)
 
-	mov	$(KERNEL_DS), %ax
-	mov	%ax, %ds
-	mov	%ax, %es
-	mov	%ax, %ss
-	xor	%eax, %eax
-	mov	%ax, %fs
-	mov %ax, %gs
-
 /* the following code is shared by the master CPU and all slave CPUs */
 L_pstart_common:
 	/*
@@ -237,6 +201,14 @@ L_pstart_common:
 	 */
 	SWITCH_TO_64BIT_MODE
 
+	/* Flush data segment selectors */
+	xor	%eax, %eax
+	mov	%ax, %ss
+	mov	%ax, %ds
+	mov	%ax, %es
+	mov	%ax, %fs
+	mov	%ax, %gs
+
 	/* %edi = boot_args_start */
 	
 	leaq _vstart(%rip), %rcx
@@ -441,8 +413,12 @@ ENTRY(acpi_sleep_cpu)
 	movw	%gs, saved_gs(%rip)
 	movw	%ss, saved_ss(%rip)	
 
-	/* save the 64bit kernel gs base */
+	/* save the 64bit user and kernel gs base */
+	/* note: user's curently swapped into kernel base MSR */
 	mov	$MSR_IA32_KERNEL_GS_BASE, %rcx
+	rdmsr
+	movl	%eax, saved_ugs_base(%rip)
+	movl	%edx, saved_ugs_base+4(%rip)
 	swapgs
 	rdmsr
 	movl	%eax, saved_kgs_base(%rip)
@@ -519,8 +495,9 @@ Lwake_64:
 	/* protected mode, paging enabled */
 	POSTCODE(ACPI_WAKE_PAGED_ENTRY)
 
-	/* switch to kernel data segment */
-	movw	$(KERNEL_DS), %ax
+	/* load null segment selectors */
+	xor	%eax, %eax
+	movw	%ax, %ss
 	movw	%ax, %ds
 
 	/* restore local and interrupt descriptor tables */
@@ -529,20 +506,20 @@ Lwake_64:
 
 	/* restore segment registers */
 	movw	saved_es(%rip), %es
+	movw	saved_fs(%rip), %fs
+	movw	saved_gs(%rip), %gs
 	movw	saved_ss(%rip), %ss
 
-	/* Program FS/GS with a NULL selector, precautionary */
-	xor	%rax, %rax
-	movw	%ax, %fs
-	movw	%ax, %gs
-	/* restore the 64bit kernel gs base */
+	/* restore the 64bit kernel and user gs base */
 	mov	$MSR_IA32_KERNEL_GS_BASE, %rcx
 	movl	saved_kgs_base(%rip),   %eax 
 	movl	saved_kgs_base+4(%rip), %edx 
 	wrmsr
 	swapgs
+	movl	saved_ugs_base(%rip),   %eax 
+	movl	saved_ugs_base+4(%rip), %edx 
+	wrmsr
 
-	//K64todo verify this TSS stuff
 	/*
 	 * Restore task register. Before doing this, clear the busy flag
 	 * in the TSS descriptor set by the CPU.
@@ -663,4 +640,5 @@ saved_idt:	.word 0
 saved_ldt:	.word 0
 saved_tr:	.word 0
 saved_kgs_base:	.quad 0
+saved_ugs_base:	.quad 0
 
diff --git a/pexpert/Makefile b/pexpert/Makefile
index abccc00b0..6e7b0b31a 100644
--- a/pexpert/Makefile
+++ b/pexpert/Makefile
@@ -8,36 +8,18 @@ include $(MakeInc_cmd)
 include $(MakeInc_def)
 
 INSTINC_SUBDIRS = pexpert
-
-
-INSTINC_SUBDIRS_PPC = pexpert
-
 INSTINC_SUBDIRS_I386 = pexpert
-
-
 INSTINC_SUBDIRS_X86_64 = pexpert
-
-
 INSTINC_SUBDIRS_ARM = pexpert
 
 
 EXPINC_SUBDIRS = pexpert
-
-
-EXPINC_SUBDIRS_PPC = pexpert
-
-
 EXPINC_SUBDIRS_I386 = pexpert
-
-
 EXPINC_SUBDIRS_X86_64 = pexpert
-
-
 EXPINC_SUBDIRS_ARM = pexpert
 
 
-SETUP_SUBDIRS = 	\
-	conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 	\
 	conf
diff --git a/pexpert/conf/MASTER b/pexpert/conf/MASTER
index f622474d6..7731f8388 100644
--- a/pexpert/conf/MASTER
+++ b/pexpert/conf/MASTER
@@ -97,4 +97,3 @@ options		CONFIG_NO_KPRINTF_STRINGS	# <no_kprintf_str>
 # embedded device
 #
 options   CONFIG_EMBEDDED                       # <config_embedded>
-
diff --git a/pexpert/conf/MASTER.i386 b/pexpert/conf/MASTER.i386
index f4e41a8e6..94fb5056c 100644
--- a/pexpert/conf/MASTER.i386
+++ b/pexpert/conf/MASTER.i386
@@ -7,7 +7,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach mach_pe panic_info ]
 #  DEVELOPMENT	= [ EMBEDDED config_dtrace ]
 #
diff --git a/pexpert/conf/MASTER.ppc b/pexpert/conf/MASTER.ppc
deleted file mode 100644
index f36c6062d..000000000
--- a/pexpert/conf/MASTER.ppc
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-######################################################################
-#  
-#  Standard NeXT Research Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE = [ppc mach mach_pe panic_info config_dtrace]
-#  DEVELOPMENT = [ RELEASE ]
-#  RELEASE_TRACE = [RELEASE kdebug]
-#  PROFILE = [RELEASE profile]
-#  DEBUG = [RELEASE debug]
-#  DEBUG_TRACE = [DEBUG kdebug]
-#
-######################################################################
-
-machine		"ppc"					# <ppc>
-cpu			"ppc"					# <ppc>
-
diff --git a/pexpert/conf/MASTER.x86_64 b/pexpert/conf/MASTER.x86_64
index 536c4eb59..9283af226 100644
--- a/pexpert/conf/MASTER.x86_64
+++ b/pexpert/conf/MASTER.x86_64
@@ -7,7 +7,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach mach_pe panic_info ]
 #  DEVELOPMENT	= [ EMBEDDED ]
 #
diff --git a/pexpert/conf/Makefile b/pexpert/conf/Makefile
index 93eb84150..06a9defdf 100644
--- a/pexpert/conf/Makefile
+++ b/pexpert/conf/Makefile
@@ -7,8 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -24,30 +23,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(PEXPERT_KERNEL_CONFIG) $(PEXPERT_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(PEXPERT_KERNEL_CONFIG) $(PEXPERT_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile 
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile 
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile 
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(PEXPERT_KERNEL_CONFIG)/Makefile	\
diff --git a/pexpert/conf/Makefile.ppc b/pexpert/conf/Makefile.ppc
deleted file mode 100644
index 4ef7445f0..000000000
--- a/pexpert/conf/Makefile.ppc
+++ /dev/null
@@ -1,8 +0,0 @@
-######################################################################
-#BEGIN  Machine dependent Makefile fragment for ppc
-######################################################################
-
-######################################################################
-#END    Machine dependent Makefile fragment for ppc
-######################################################################
-
diff --git a/pexpert/conf/Makefile.template b/pexpert/conf/Makefile.template
index 1207442f0..0fcca19b1 100644
--- a/pexpert/conf/Makefile.template
+++ b/pexpert/conf/Makefile.template
@@ -27,8 +27,8 @@ include $(MakeInc_def)
 # CFLAGS
 #
 #
-CFLAGS+= -imacros meta_features.h -DPEXPERT_KERNEL_PRIVATE	\
-	-Werror $(CFLAGS_INLINE_CONFIG)
+CFLAGS+= -include meta_features.h -DPEXPERT_KERNEL_PRIVATE	\
+	$(CFLAGS_INLINE_CONFIG)
 
 #
 # Directories for mig generated files
@@ -74,16 +74,16 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
+$(COMPONENT).filelist: $(LDOBJS)
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS}; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`;
 	
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_build_all: do_depend
 
diff --git a/pexpert/conf/files.ppc b/pexpert/conf/files.ppc
deleted file mode 100644
index ab76f421f..000000000
--- a/pexpert/conf/files.ppc
+++ /dev/null
@@ -1,7 +0,0 @@
-
-pexpert/ppc/pe_init.c				standard
-pexpert/ppc/pe_bootargs.c			standard
-pexpert/ppc/pe_identify_machine.c		standard
-pexpert/ppc/pe_kprintf.c			standard
-pexpert/ppc/pe_clock_speed.c			standard
-pexpert/ppc/pe_clock_speed_asm.s		standard
diff --git a/pexpert/conf/tools/Makefile b/pexpert/conf/tools/Makefile
deleted file mode 100644
index 4f9ccd553..000000000
--- a/pexpert/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = doconf
-
-COMP_SUBDIRS = doconf
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_all:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-setup_build_install:
-	@echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-do_build_install:
-	@echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/pexpert/conf/tools/doconf/Makefile b/pexpert/conf/tools/doconf/Makefile
deleted file mode 100644
index aa55a9419..000000000
--- a/pexpert/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/pexpert/conf/tools/doconf/doconf.csh b/pexpert/conf/tools/doconf/doconf.csh
deleted file mode 100755
index 6fedb4786..000000000
--- a/pexpert/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/pexpert/gen/bootargs.c b/pexpert/gen/bootargs.c
index 0e8749b2b..6ca4fa102 100644
--- a/pexpert/gen/bootargs.c
+++ b/pexpert/gen/bootargs.c
@@ -26,12 +26,22 @@
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <pexpert/pexpert.h>
+#include <pexpert/device_tree.h>
 
-extern boolean_t isargsep( char c);
-extern int argstrcpy(char *from, char *to);
-extern int getval(char *s, int *val);
-
+static boolean_t isargsep( char c);
+#if !CONFIG_EMBEDDED
+static int argstrcpy(char *from, char *to);
+#endif 
 static int argstrcpy2(char *from,char *to, unsigned maxlen);
+static int argnumcpy(int val, void *to, unsigned maxlen);
+static int getval(char *s, int *val);
+
+extern int IODTGetDefault(const char *key, void *infoAddr, unsigned int infoSize);
+
+struct i24 {
+	int32_t	i24 : 24;
+	int32_t _pad : 8;
+};
 
 #define	NUM	0
 #define	STR	1
@@ -69,6 +79,10 @@ PE_parse_boot_argn(
 	args = PE_boot_args();
 	if (*args == '\0') return FALSE;
 
+#if CONFIG_EMBEDDED
+	if (max_len == -1) return FALSE;
+#endif
+
 	arg_found = FALSE;
 
 	while(*args && isargsep(*args)) args++;
@@ -93,7 +107,7 @@ PE_parse_boot_argn(
 		    (i!=strlen(arg_string)))
 			goto gotit;
 		if (arg_boolean) {
-			*(unsigned int *)arg_ptr = TRUE;
+			argnumcpy(1, arg_ptr, max_len);
 			arg_found = TRUE;
 			break;
 		} else {
@@ -113,14 +127,16 @@ PE_parse_boot_argn(
 			switch (getval(cp, &val)) 
 			{
 				case NUM:
-					*(unsigned int *)arg_ptr = val;
+					argnumcpy(val, arg_ptr, max_len);
 					arg_found = TRUE;
 					break;
 				case STR:
 					if(max_len > 0) //max_len of 0 performs no copy at all
 						argstrcpy2(++cp, (char *)arg_ptr, max_len - 1);
-					else if(max_len == -1)
+#if !CONFIG_EMBEDDED
+					else if(max_len == -1) // unreachable on embedded
 						argstrcpy(++cp, (char *)arg_ptr);
+#endif
 					arg_found = TRUE;
 					break;
 			}
@@ -137,7 +153,8 @@ gotit:
 	return(arg_found);
 }
 
-boolean_t isargsep(
+static boolean_t
+isargsep(
 	char c)
 {
 	if (c == ' ' || c == '\0' || c == '\t')
@@ -146,7 +163,8 @@ boolean_t isargsep(
 		return(FALSE);
 }
 
-int
+#if !CONFIG_EMBEDDED
+static int
 argstrcpy(
 	char *from, 
 	char *to)
@@ -160,6 +178,7 @@ argstrcpy(
 	*to = 0;
 	return(i);
 }
+#endif
 
 static int
 argstrcpy2(
@@ -177,7 +196,33 @@ argstrcpy2(
 	return(i);
 }
 
-int
+static int argnumcpy(int val, void *to, unsigned maxlen)
+{
+	switch (maxlen) {
+		case 0:
+			/* No write-back, caller just wants to know if arg was found */
+			break;
+		case 1:
+			*(int8_t *)to = val;
+			break;
+		case 2:
+			*(int16_t *)to = val;
+			break;
+		case 3:
+			/* Unlikely in practice */
+			((struct i24 *)to)->i24 = val;
+			break;
+		case 4:
+		default:
+			*(int32_t *)to = val;
+			maxlen = 4;
+			break;
+	}
+
+	return (int)maxlen;
+}
+
+static int
 getval(
 	char *s, 
 	int *val)
@@ -266,3 +311,45 @@ PE_imgsrc_mount_supported()
 {
 	return TRUE;
 }
+
+boolean_t
+PE_get_default(
+	const char	*property_name,
+	void		*property_ptr,
+	unsigned int max_property)
+{
+	DTEntry		dte;
+	void		**property_data;
+	unsigned int property_size;
+
+	/*
+	 * Look for the property using the PE DT support.
+	 */
+	if (kSuccess == DTLookupEntry(NULL, "/defaults", &dte)) {
+
+		/*
+		 * We have a /defaults node, look for the named property.
+		 */
+		if (kSuccess != DTGetProperty(dte, property_name, (void **)&property_data, &property_size))
+			return FALSE;
+
+		/*
+		 * This would be a fine place to do smart argument size management for 32/64
+		 * translation, but for now we'll insist that callers know how big their
+		 * default values are.
+		 */
+		if (property_size > max_property)
+			return FALSE;
+
+		/*
+		 * Copy back the precisely-sized result.
+		 */
+		memcpy(property_ptr, property_data, property_size);
+		return TRUE;
+	}
+
+	/*
+	 * Look for the property using I/O Kit's DT support.
+	 */
+	return IODTGetDefault(property_name, property_ptr, max_property) ? FALSE : TRUE;
+}
diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c
index 7b37f5eb2..fc29c1a65 100644
--- a/pexpert/i386/pe_init.c
+++ b/pexpert/i386/pe_init.c
@@ -185,9 +185,7 @@ void PE_init_platform(boolean_t vm_initialized, void * _args)
     }
 
     if (!vm_initialized) {
-		/* Hack! FIXME.. */ 
-        outb(0x21, 0xff);   /* Maskout all interrupts Pic1 */
-        outb(0xa1, 0xff);   /* Maskout all interrupts Pic2 */
+
         if (PE_state.deviceTreeHead) {
             DTInit(PE_state.deviceTreeHead);
         }
diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c
index b25bbcf28..6533908eb 100644
--- a/pexpert/i386/pe_kprintf.c
+++ b/pexpert/i386/pe_kprintf.c
@@ -35,6 +35,7 @@
 #include <kern/debug.h>
 #include <kern/simple_lock.h>
 #include <i386/mp.h>
+#include <machine/pal_routines.h>
 
 /* Globals */
 void (*PE_kputc)(char c);
@@ -66,10 +67,11 @@ void PE_init_kprintf(boolean_t vm_initialized)
 			if (boot_arg & DB_KPRT)
 				new_disable_serial_output = FALSE;
 
-		/* If we are newly enabling serial, make sure we only call serial_init()
-		 * if our previous state was not enabled */
-		if (!new_disable_serial_output && (!disable_serial_output || serial_init()))
-			PE_kputc = serial_putc;
+		/* If we are newly enabling serial, make sure we only
+		 * call pal_serial_init() if our previous state was
+		 * not enabled */
+		if (!new_disable_serial_output && (!disable_serial_output || pal_serial_init()))
+			PE_kputc = pal_serial_putc;
 		else
 			PE_kputc = cnputc;
 
@@ -108,7 +110,7 @@ void kprintf(const char *fmt, ...)
 		 * take any locks, just dump to serial */
 		if (!PE_kputc) {
 			va_start(listp, fmt);
-			_doprnt(fmt, &listp, serial_putc, 16);
+			_doprnt(fmt, &listp, pal_serial_putc, 16);
 			va_end(listp);
 			return;
 		}
@@ -120,6 +122,9 @@ void kprintf(const char *fmt, ...)
 		 * interrupts are disabled once we have the lock.
 		 */
 		state = ml_set_interrupts_enabled(FALSE);
+
+		pal_preemption_assert();
+
 		while (!simple_lock_try(&kprintf_lock)) {
 			ml_set_interrupts_enabled(state);
 			ml_set_interrupts_enabled(FALSE);
diff --git a/pexpert/i386/pe_serial.c b/pexpert/i386/pe_serial.c
index cba4e1b59..fcff88b88 100644
--- a/pexpert/i386/pe_serial.c
+++ b/pexpert/i386/pe_serial.c
@@ -198,7 +198,6 @@ int serial_init( void )
     return 1;
 }
 
-
 void serial_putc( char c )
 {
     uart_putc(c);
diff --git a/pexpert/pexpert/Makefile b/pexpert/pexpert/Makefile
index 82be8207d..0680f3ca2 100644
--- a/pexpert/pexpert/Makefile
+++ b/pexpert/pexpert/Makefile
@@ -9,27 +9,16 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 	machine
-
-INSTINC_SUBDIRS_PPC = \
-	ppc
-
 INSTINC_SUBDIRS_I386 = \
 	i386
-
 INSTINC_SUBDIRS_X86_64 = \
 	i386
-
 INSTINC_SUBDIRS_ARM = \
 	arm
 
 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS}
-
-EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC}
-
 EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
-
 EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64}
-
 EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 
 DATAFILES = \
diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h
index dc3b4b09d..18e65d406 100644
--- a/pexpert/pexpert/i386/boot.h
+++ b/pexpert/pexpert/i386/boot.h
@@ -99,14 +99,13 @@ typedef struct Boot_Video	Boot_Video;
 /* Boot argument structure - passed into Mach kernel at boot time.
  * "Revision" can be incremented for compatible changes
  */
-#define kBootArgsRevision		6
-#define kBootArgsVersion		1
+#define kBootArgsRevision		0
+#define kBootArgsVersion		2
 
 /* Snapshot constants of previous revisions that are supported */
 #define kBootArgsVersion1		1
-#define kBootArgsRevision1_4		4
-#define kBootArgsRevision1_5		5
-#define kBootArgsRevision1_6		6
+#define kBootArgsVersion2		2
+#define kBootArgsRevision2_0		0
 
 #define kBootArgsEfiMode32              32
 #define kBootArgsEfiMode64              64
@@ -115,6 +114,10 @@ typedef struct boot_args {
     uint16_t    Revision;	/* Revision of boot_args structure */
     uint16_t    Version;	/* Version of boot_args structure */
 
+    uint8_t     efiMode;    /* 32 = 32-bit, 64 = 64-bit */
+    uint8_t     debugMode;  /* Bit field with behavior changes */
+    uint8_t     __reserved1[2];
+
     char        CommandLine[BOOT_LINE_LENGTH];	/* Passed in command line */
 
     uint32_t    MemoryMap;  /* Physical address of memory map */
@@ -132,17 +135,25 @@ typedef struct boot_args {
 
     uint32_t    efiRuntimeServicesPageStart; /* physical address of defragmented runtime pages */
     uint32_t    efiRuntimeServicesPageCount;
+    uint64_t    efiRuntimeServicesVirtualPageStart; /* virtual address of defragmented runtime pages */
+
     uint32_t    efiSystemTable;   /* physical address of system table in runtime area */
+    uint32_t    __reserved2;
 
-    uint8_t     efiMode;       /* 32 = 32-bit, 64 = 64-bit */
-    uint8_t     __reserved1[3];
-    uint32_t    __reserved2[1];
     uint32_t    performanceDataStart; /* physical address of log */
     uint32_t    performanceDataSize;
-    uint64_t    efiRuntimeServicesVirtualPageStart; /* virtual address of defragmented runtime pages */
-    uint32_t    __reserved3[2];
+
+    uint32_t    keyStoreDataStart; /* physical address of key store data */
+    uint32_t    keyStoreDataSize;
+    uint64_t	bootMemStart;
+    uint64_t	bootMemSize;
+    uint64_t    PhysicalMemorySize;
+    uint64_t    FSBFrequency;
+    uint32_t    __reserved4[734];
 
 } boot_args;
 
+extern char assert_boot_args_size_is_4096[sizeof(boot_args) == 4096 ? 1 : -1];
+
 #endif /* _PEXPERT_I386_BOOT_H */
 
diff --git a/pexpert/pexpert/i386/efi.h b/pexpert/pexpert/i386/efi.h
index 08ff10f84..5ef501593 100644
--- a/pexpert/pexpert/i386/efi.h
+++ b/pexpert/pexpert/i386/efi.h
@@ -253,7 +253,7 @@ EFI_STATUS
   IN EFI_UINTN                    DescriptorSize,
   IN EFI_UINT32                   DescriptorVersion,
   IN EFI_MEMORY_DESCRIPTOR        * VirtualMap
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
@@ -261,7 +261,7 @@ EFI_STATUS
 (EFIAPI *EFI_CONVERT_POINTER) (
   IN EFI_UINTN                DebugDisposition,
   IN OUT VOID                 **Address
-  ) __attribute__((regparm(0)));
+  );
 
 //
 // Variable attributes
@@ -280,7 +280,7 @@ EFI_STATUS
   OUT EFI_UINT32              * Attributes OPTIONAL,
   IN OUT EFI_UINTN            * DataSize,
   OUT VOID                    * Data
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
@@ -289,7 +289,7 @@ EFI_STATUS
   IN OUT EFI_UINTN            * VariableNameSize,
   IN OUT EFI_CHAR16           * VariableName,
   IN OUT EFI_GUID             * VendorGuid
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
@@ -300,7 +300,7 @@ EFI_STATUS
   IN EFI_UINT32               Attributes,
   IN EFI_UINTN                DataSize,
   IN VOID                     * Data
-  ) __attribute__((regparm(0)));
+  );
 
 //
 // EFI Time
@@ -317,14 +317,14 @@ EFI_STATUS
 (EFIAPI *EFI_GET_TIME) (
   OUT EFI_TIME                * Time,
   OUT EFI_TIME_CAPABILITIES   * Capabilities OPTIONAL
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
 EFI_STATUS
 (EFIAPI *EFI_SET_TIME) (
   IN EFI_TIME                 * Time
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
@@ -333,7 +333,7 @@ EFI_STATUS
   OUT EFI_BOOLEAN             * Enabled,
   OUT EFI_BOOLEAN             * Pending,
   OUT EFI_TIME                * Time
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
@@ -341,7 +341,7 @@ EFI_STATUS
 (EFIAPI *EFI_SET_WAKEUP_TIME) (
   IN EFI_BOOLEAN              Enable,
   IN EFI_TIME                 * Time OPTIONAL
-  ) __attribute((regparm(0)));
+  );
 
 typedef enum {
   EfiResetCold,
@@ -362,14 +362,14 @@ VOID
   IN EFI_STATUS                   ResetStatus,
   IN EFI_UINTN                    DataSize,
   IN EFI_CHAR16                   * ResetData OPTIONAL
-  ) __attribute__((regparm(0)));
+  );
 
 typedef
 EFI_RUNTIMESERVICE
 EFI_STATUS
 (EFIAPI *EFI_GET_NEXT_HIGH_MONO_COUNT) (
   OUT EFI_UINT32                  * HighCount
-  ) __attribute__((regparm(0)));
+  );
 
 //
 // Definition of Status Code extended data header
@@ -394,7 +394,7 @@ EFI_STATUS
   IN EFI_UINT32                 Instance,
   IN EFI_GUID                   * CallerId OPTIONAL,
   IN EFI_STATUS_CODE_DATA       * Data OPTIONAL
-  ) __attribute__((regparm(0)));
+  );
 
 #endif
 //
diff --git a/pexpert/pexpert/machine/boot.h b/pexpert/pexpert/machine/boot.h
index 542ee10db..26ba42c37 100644
--- a/pexpert/pexpert/machine/boot.h
+++ b/pexpert/pexpert/machine/boot.h
@@ -28,9 +28,7 @@
 #ifndef _PEXPERT_MACHINE_BOOT_H
 #define _PEXPERT_MACHINE_BOOT_H
 
-#if defined (__ppc__)
-#include "pexpert/ppc/boot.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "pexpert/i386/boot.h"
 #else
 #error architecture not supported
diff --git a/pexpert/pexpert/machine/protos.h b/pexpert/pexpert/machine/protos.h
index 3dd9cbacb..5d71753d9 100644
--- a/pexpert/pexpert/machine/protos.h
+++ b/pexpert/pexpert/machine/protos.h
@@ -28,9 +28,7 @@
 #ifndef _PEXPERT_MACHINE_PROTOS_H
 #define _PEXPERT_MACHINE_PROTOS_H
 
-#if defined (__ppc__)
-#include "pexpert/ppc/protos.h"
-#elif defined (__i386__) || defined(__x86_64__)
+#if defined (__i386__) || defined(__x86_64__)
 #include "pexpert/i386/protos.h"
 #else
 #error architecture not supported
diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h
index f3a539af8..1f714d32b 100644
--- a/pexpert/pexpert/pexpert.h
+++ b/pexpert/pexpert/pexpert.h
@@ -254,6 +254,17 @@ extern boolean_t PE_parse_boot_argn(
 	void    	*arg_ptr,
 	int			max_arg);
 
+extern boolean_t PE_get_default(
+	const char	*property_name,
+	void		*property_ptr,
+	unsigned int max_property);
+
+#define PE_default_value(_key, _variable, _default)	\
+	do {															  \
+		if (!PE_get_default((_key), &(_variable), sizeof(_variable))) \
+			_variable = _default;									  \
+	} while(0)
+
 enum {
     kPEOptionKey	= 0x3a,
     kPECommandKey	= 0x37,
diff --git a/pexpert/pexpert/ppc/Makefile b/pexpert/pexpert/ppc/Makefile
deleted file mode 100644
index b39a66718..000000000
--- a/pexpert/pexpert/ppc/Makefile
+++ /dev/null
@@ -1,27 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-DATAFILES = \
-        boot.h \
-	interrupts.h \
-	powermac.h
-
-INSTALL_MD_LIST	= ${DATAFILES}
-
-INSTALL_MD_DIR = pexpert/ppc
-
-EXPORT_MD_LIST	= ${DATAFILES}
-
-EXPORT_MD_DIR = pexpert/ppc
-
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/pexpert/pexpert/ppc/boot.h b/pexpert/pexpert/ppc/boot.h
deleted file mode 100644
index 3ba51feb3..000000000
--- a/pexpert/pexpert/ppc/boot.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * @OSF_COPYRIGHT@
- */
-/*
- * NOTICE: This file was modified by McAfee Research in 2004 to introduce
- * support for mandatory and extensible security protections.  This notice
- * is included in support of clause 2.2 (b) of the Apple Public License,
- * Version 2.0.
- */
-
-#ifndef _PEXPERT_PPC_BOOT_H_
-#define _PEXPERT_PPC_BOOT_H_
-
-#define BOOT_LINE_LENGTH        256
-
-/*
- * Video information.. 
- */
-
-struct Boot_Video {
-	unsigned long	v_baseAddr;	/* Base address of video memory */
-	unsigned long	v_display;	/* Display Code (if Applicable */
-	unsigned long	v_rowBytes;	/* Number of bytes per pixel row */
-	unsigned long	v_width;	/* Width */
-	unsigned long	v_height;	/* Height */
-	unsigned long	v_depth;	/* Pixel Depth */
-};
-
-typedef struct Boot_Video	Boot_Video;
-
-/* DRAM Bank definitions - describes physical memory layout.
- */
-#define	kMaxDRAMBanks	26		/* maximum number of DRAM banks */
-
-struct DRAMBank
-{
-	unsigned long	base;		/* physical base of DRAM bank */
-	unsigned long	size;		/* size of bank */
-};
-typedef struct DRAMBank DRAMBank;
-
-
-/* Boot argument structure - passed into Mach kernel at boot time.
- */
-#define kBootArgsRevision		2
-#define kBootArgsVersion1		1
-#define kBootArgsVersion2		2
-
-typedef struct boot_args {
-  unsigned short	Revision;	/* Revision of boot_args structure */
-  unsigned short	Version;	/* Version of boot_args structure */
-  char		CommandLine[BOOT_LINE_LENGTH];	/* Passed in command line */
-  DRAMBank	PhysicalDRAM[kMaxDRAMBanks];	/* base and range pairs for the 26 DRAM banks */
-  Boot_Video	Video;		/* Video Information */
-    unsigned long	machineType;	/* Machine Type (gestalt) */
-    void		*deviceTreeP;	/* Base of flattened device tree */
-    unsigned long	deviceTreeLength;/* Length of flattened tree */
-    unsigned long	topOfKernelData;/* Highest address used in kernel data area */
-  void                  *exdata;
-  unsigned long         exdatalen;
-} boot_args;
-
-extern boot_args passed_args;
-
-#endif /* _PEXPERT_PPC_BOOT_H_ */
diff --git a/pexpert/pexpert/ppc/interrupts.h b/pexpert/pexpert/ppc/interrupts.h
deleted file mode 100644
index eac70164c..000000000
--- a/pexpert/pexpert/ppc/interrupts.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _POWERMAC_INTERRUPTS_H_
-#define _POWERMAC_INTERRUPTS_H_
-
-#include <mach/ppc/thread_status.h> /* for struct ppc_saved_state */
-
-extern void	(PE_incoming_interrupt)(int type, ppc_saved_state_t *ssp,
-					unsigned int dsisr, unsigned int dar);
-
-#endif /* POWERMAC_INTERRUPTS_H_ */
diff --git a/pexpert/pexpert/ppc/powermac.h b/pexpert/pexpert/ppc/powermac.h
deleted file mode 100644
index 82a61fcba..000000000
--- a/pexpert/pexpert/ppc/powermac.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _PEXPERT_PPC_POWERMAC_H_
-#define _PEXPERT_PPC_POWERMAC_H_
-
-#ifndef ASSEMBLER
-
-#include <mach/ppc/vm_types.h>
-
-#include <pexpert/pexpert.h>
-#include <pexpert/protos.h>
-#include <pexpert/ppc/boot.h>
-
-
-/* prototypes */
-
-vm_offset_t PE_find_scc( void );
-
-/* Some useful typedefs for accessing control registers */
-
-typedef volatile unsigned char	v_u_char;
-typedef volatile unsigned short v_u_short;
-typedef volatile unsigned int	v_u_int;
-typedef volatile unsigned long  v_u_long;
-
-/* And some useful defines for reading 'volatile' structures,
- * don't forget to be be careful about sync()s and eieio()s
- */
-#define reg8(reg) (*(v_u_char *)reg)
-#define reg16(reg) (*(v_u_short *)reg)
-#define reg32(reg) (*(v_u_int *)reg)
-
-#endif /* ASSEMBLER */
-
-#endif /* _PEXPERT_PPC_POWERMAC_H_ */
diff --git a/pexpert/pexpert/ppc/protos.h b/pexpert/pexpert/ppc/protos.h
deleted file mode 100644
index 74ed2485b..000000000
--- a/pexpert/pexpert/ppc/protos.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _PEXPERT_PPC_PROTOS_H_
-#define _PEXPERT_PPC_PROTOS_H_
- 
-#define mtsprg(n, reg)  __asm__ volatile("mtsprg  " # n ", %0" : : "r" (reg))
-#define mfsprg(reg, n)  __asm__ volatile("mfsprg  %0, " # n : "=r" (reg))
- 
-#define mtspr(spr, val)  __asm__ volatile("mtspr  " # spr ", %0" : : "r" (val))
-#define mfspr(reg, spr)  __asm__ volatile("mfspr  %0, " # spr : "=r" (reg))
-
-/*
- * Various memory/IO synchronisation instructions
- */     
- 
-        /*      Use eieio as a memory barrier to order stores.
-         *      Useful for device control and PTE maintenance.
-         */ 
- 
-#define eieio() \
-        __asm__ volatile("eieio")
- 
-        /*      Use sync to ensure previous stores have completed.
-                This is  required when manipulating locks and/or
-                maintaining PTEs or other shared structures on SMP
-                machines.
-        */
-
-#define sync() \
-        __asm__ volatile("sync") 
- 
-        /*      Use isync to sychronize context; that is, the ensure
-                no prefetching of instructions happen before the
-                instruction.
-        */
-
-#define isync() \
-        __asm__ volatile("isync")
-
-
-//------------------------------------------------------------------------
-// from ppc/endian.h
-static __inline__ unsigned int byte_reverse_word(unsigned int word);
-static __inline__ unsigned int byte_reverse_word(unsigned int word) {
-        unsigned int result;
-        __asm__ volatile("lwbrx %0, 0, %1" : "=r" (result) : "r" (&word));
-        return result;
-}
-
-//------------------------------------------------------------------------
-// from ppc/serial_io.h
-extern void initialize_serial(void * scc_phys_base, uint32_t serial_baud);
-void serial_putc(char);
-int serial_getc(void);
-void cnputc(char);
-
-
-//------------------------------------------------------------------------
-// from osfmk/ppc/POWERMAC/video_console.c
-
-extern void vc_progress_initialize( void * desc,
-				    const unsigned char * data,
-				    const unsigned char * clut );
-
-extern void vc_display_icon( void * desc,
-			     const unsigned char * data );
-
-//-------------------------------------------------------------------------
-// from osfmk/console/panic_dialog.c
-extern void panic_ui_initialize(const unsigned char * clut);
-
-/*
- * from osfmk/ppc/serial_console.h
- */
-int switch_to_serial_console(void);
-void switch_to_old_console(int);
-
-typedef unsigned spl_t;
-
-//------------------------------------------------------------------------
-// from bsd/dev/ppc/busses.h which clashes with mach/device/device_types.h
-typedef int		io_req_t;
-
-
-//typedef struct ipc_port         *ipc_port_t;
-
-extern void            cninit(void);
-
-/*
- *	Temporarily stolen from Firmware.h
- */
-
-extern void dbgTrace(unsigned int item1, unsigned int item2, unsigned int item3);
-#if 1		/* (TEST/DEBUG) - eliminate inline */
-extern __inline__ void dbgTrace(unsigned int item1, unsigned int item2, unsigned int item3) {
- 
-	__asm__ volatile("mr   r3,%0" : : "r" (item1) : "r3");
-	__asm__ volatile("mr   r4,%0" : : "r" (item2) : "r4");
-	__asm__ volatile("mr   r5,%0" : : "r" (item3) : "r5");
-	__asm__ volatile("lis  r0,hi16(CutTrace)" : : : "r0");
-	__asm__ volatile("ori  r0,r0,lo16(CutTrace)" : : : "r0");
-	__asm__ volatile("sc");
-	return;
-}
-#endif
-
-extern void DoPreempt(void);
-extern __inline__ void DoPreempt(void) {
-	__asm__ volatile("lis  r0,hi16(DoPreemptCall)" : : : "r0");
-	__asm__ volatile("ori  r0,r0,lo16(DoPreemptCall)" : : : "r0");
-	__asm__ volatile("sc");
-	return;
-}
-
-extern void CreateFakeIO(void);
-extern __inline__ void CreateFakeIO(void) {
-	__asm__ volatile("lis  r0,hi16(CreateFakeIOCall)" : : : "r0");
-	__asm__ volatile("ori  r0,r0,lo16(CreateFakeIOCall)" : : : "r0");
-	__asm__ volatile("sc");
-		return;
-}
-
-extern void StoreReal(unsigned int val, unsigned int addr);
-extern void ReadReal(unsigned int raddr, unsigned int *vaddr);
-extern unsigned int LLTraceSet(unsigned int tflags);
-extern void GratefulDebInit(void);
-extern void GratefulDebDisp(unsigned int coord, unsigned int data);
-extern void checkNMI(void);
-
-#ifndef VM_WIMG_IO
-#define VM_WIMG_IO		(VM_MEM_COHERENT | 	\
-				VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
-#endif
-
-#endif /* _PEXPERT_PPC_PROTOS_H_ */
diff --git a/pexpert/pexpert/protos.h b/pexpert/pexpert/protos.h
index cd339325c..d54464467 100644
--- a/pexpert/pexpert/protos.h
+++ b/pexpert/pexpert/protos.h
@@ -46,11 +46,7 @@ extern void printf(const char *fmt, ...);
 
 extern void interrupt_enable(void);
 extern void interrupt_disable(void);
-#if __ppc__
-extern void bcopy_nc(const char *from, char *to, int size); /* uncached-safe */
-#else
 #define bcopy_nc bcopy
-#endif 
 
 //------------------------------------------------------------------------
 //from kern/misc_protos.h
diff --git a/pexpert/ppc/pe_clock_speed.c b/pexpert/ppc/pe_clock_speed.c
deleted file mode 100644
index 4a859e2b0..000000000
--- a/pexpert/ppc/pe_clock_speed.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *  pe_clock_speed.c - Determine the best guess for the processor and bus
- *                     speed buy using the values returned by run_clock_test.
- *
- *  (c) Apple Computer, Inc. 1998-2002
- *
- *  Writen by:   Josh de Cesare
- *
- */
-
-#include <pexpert/pexpert.h>
-
-#include <ppc/machine_routines.h>
-
-// prototypes
-extern void pe_run_clock_test(void *tmp);
-void pe_do_clock_test(unsigned int via_addr,
-		      int num_speeds, unsigned long *speed_list);
-void PE_Determine_Clock_Speeds(unsigned int via_addr, int num_speeds, unsigned long *speed_list);
-
-// Threshold for bus speed matches.
-#define kMaxFreqDiff  (30000)
-
-// This is the structure for the data that get passed to pe_run_clock_test.
-struct clock_test_data {
-  unsigned int via_addr;
-  unsigned int via_ticks;
-  unsigned int dec_ticks;
-};
-
-// glocal variables to simplify some stuff.
-static long bus_freq_num, bus_freq_den, cpu_pll;
-
-// PE_Determine_Clock_Speeds is called by the via driver in IOKit
-// It uses the numbers generated by pe_do_clock_test and reports
-// the cleaned up values to the rest of the OS.
-void PE_Determine_Clock_Speeds(unsigned int via_addr, int num_speeds,
-			       unsigned long *speed_list)
-{
-  boolean_t          oldLevel;
-  unsigned long      tmp_bus_speed, tmp_cpu_speed;
-  unsigned long long tmp;
-  
-  oldLevel = ml_set_interrupts_enabled(FALSE);
-  pe_do_clock_test(via_addr, num_speeds, speed_list);
-  ml_set_interrupts_enabled(oldLevel);
-  
-  tmp_bus_speed = bus_freq_num / bus_freq_den;
-  tmp = ((unsigned long long)bus_freq_num * cpu_pll) / (bus_freq_den * 2);
-  tmp_cpu_speed = (unsigned long)tmp;
-  
-  // Report the bus clock rate as is.
-  gPEClockFrequencyInfo.bus_clock_rate_num = bus_freq_num;
-  gPEClockFrequencyInfo.bus_clock_rate_den = bus_freq_den;
-  
-  // pll multipliers are in halfs so set the denominator to 2.
-  gPEClockFrequencyInfo.bus_to_cpu_rate_num = cpu_pll;
-  gPEClockFrequencyInfo.bus_to_cpu_rate_den = 2;
-  
-  // The decrementer rate is one fourth the bus rate.
-  gPEClockFrequencyInfo.bus_to_dec_rate_num = 1;
-  gPEClockFrequencyInfo.bus_to_dec_rate_den = 4;
-  
-  // Assume that the timebase frequency is derived from the bus clock.
-  gPEClockFrequencyInfo.timebase_frequency_num = bus_freq_num;
-  gPEClockFrequencyInfo.timebase_frequency_den = bus_freq_den * 4;
-  
-  // Set the truncated numbers in gPEClockFrequencyInfo.
-  gPEClockFrequencyInfo.bus_clock_rate_hz = tmp_bus_speed;
-  gPEClockFrequencyInfo.cpu_clock_rate_hz = tmp_cpu_speed;
-  gPEClockFrequencyInfo.dec_clock_rate_hz = tmp_bus_speed / 4;
-  gPEClockFrequencyInfo.timebase_frequency_hz = tmp_bus_speed / 4;
-  
-  gPEClockFrequencyInfo.bus_frequency_hz = tmp_bus_speed;
-  gPEClockFrequencyInfo.bus_frequency_min_hz = tmp_bus_speed;
-  gPEClockFrequencyInfo.bus_frequency_max_hz = tmp_bus_speed;
-  gPEClockFrequencyInfo.cpu_frequency_hz = tmp_cpu_speed;
-  gPEClockFrequencyInfo.cpu_frequency_min_hz = tmp_cpu_speed;
-  gPEClockFrequencyInfo.cpu_frequency_max_hz = tmp_cpu_speed;
-  
-  PE_call_timebase_callback();
-}
-
-// pe_do_clock_test uses the number from pe_run_clock_test to
-// find a best fit guess for the bus speed.
-void pe_do_clock_test(unsigned int via_addr,
-		      int num_speeds, unsigned long *speed_list)
-{
-  struct clock_test_data clock_test_data;
-  long cnt, diff, raw_cpu_freq, raw_bus_freq, tmp_bus_freq,
-    last_bus_freq, tries = 10;
-  
-  // Save the via addr so the asm part can use it.
-  clock_test_data.via_addr = via_addr;
-  
-  // Keep looping until it matches the last try.
-  bus_freq_num = 0;
-  do {
-    last_bus_freq = bus_freq_num;
-    
-    // The the asm part to do the real work.
-    pe_run_clock_test((void *)&clock_test_data);
-    
-    // First find the pll mode.  Allow any integer times two.
-    cpu_pll = 10000000 / clock_test_data.dec_ticks;
-    cpu_pll = (cpu_pll / 2) + (cpu_pll & 1);
-    
-    // Using 64 bit math figure out the raw bus speed.
-    // 0xBF401675E5DULL is 1 / 1.27655us times 2 ^ 24.
-    raw_bus_freq = ((0xBF401675E5DULL * clock_test_data.dec_ticks) /
-		    clock_test_data.via_ticks) >> 22;
-    
-    // use the pll mode and the raw bus speed to find the raw cpu speed.
-    raw_cpu_freq = raw_bus_freq * cpu_pll / 2;
-    
-    // Look to see if the bus speed is close to one of the
-    // speeds in the table.
-    for (cnt = 0; cnt < num_speeds; cnt++) {
-      bus_freq_num = speed_list[cnt * 2];
-      bus_freq_den = speed_list[cnt * 2 + 1];
-      diff = bus_freq_num - raw_bus_freq * bus_freq_den;
-      if (diff < 0) diff = -diff;
-      
-      if (diff < kMaxFreqDiff * bus_freq_den) break;
-    }
-    if (cnt != num_speeds) continue;
-    
-    // Look to see if the bus speed is close to n * 0.5 MHz
-    tmp_bus_freq = ((raw_bus_freq + 250000) / 500000) * 500000;
-    
-    diff = tmp_bus_freq - raw_bus_freq;
-    if (diff < 0) diff = -diff;
-    
-    if (diff < kMaxFreqDiff) {
-      bus_freq_num = tmp_bus_freq;
-      bus_freq_den = 1;
-      continue;
-    }
-    
-    // Look to see if the bus speed is close to n * 50/3 MHz
-    tmp_bus_freq = ((raw_bus_freq * 3 + 25000000) / 50000000) * 50000000;
-    
-    diff = tmp_bus_freq - raw_bus_freq * 3;
-    if (diff < 0) diff = -diff;
-    
-    if (diff < kMaxFreqDiff * 3) {
-      bus_freq_num = tmp_bus_freq;
-      bus_freq_den = 3;
-      continue;
-    }
-    
-    // Since all else failed return the raw bus speed
-    bus_freq_num = raw_bus_freq;
-    bus_freq_den = 1;
-  } while ((bus_freq_num != last_bus_freq) && tries--);
-}
diff --git a/pexpert/ppc/pe_clock_speed_asm.s b/pexpert/ppc/pe_clock_speed_asm.s
deleted file mode 100644
index 41e9a0fb6..000000000
--- a/pexpert/ppc/pe_clock_speed_asm.s
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- *  clock_speed_asm.s - Uses the Via timer, decrementer, and counter
- *			to determine the clock and bus rates.
- *
- *  (c) Apple Computer, Inc. 1998-9
- *
- *  Writen by:	 Josh de Cesare
- *
- */
-
-#include <ppc/asm.h>
-
-// constants for the via
-#define CountLow  0x800
-#define CountHigh 0xa00
-#define LatchLow  0xc00
-#define LatchHigh 0xe00
-
-
-// void pe_run_clock_test(clock_test_data *data)
-//
-// data points to the base address of the via and two longs
-// for storing the via and dec results.
-//
-// The basic idea is this...
-// Use the counter register to execute a loop that will take
-// 10,000,000 processor clocks.  Time it using both the via counter
-// and the time base.  Return the number of ticks for both so the
-// raw values for processor and bus speed can be calculated.
-ENTRY(pe_run_clock_test, TAG_NO_FRAME_USED)
-
-	li	r4,	1		; flag for cache load
-	li	r5,	1		; Only once through this time
-	lwz	r9,	0(r3)		; r9 is the via addr
-
-L_again:
-	mtctr	r5			; set the count
-	li	r5,	0xff		; Start the counter at 0xffff
-	stb	r5,	CountLow(r9)	; clear the via counter
-	eieio
-	stb	r5,	CountHigh(r9)
-	eieio
-	mftb	r10			; save starting value of the time base
-	isync
-
-L_loop:
-	addi	r5,	r5,	1	; 8 adds for 8 cycles
-	addi	r5,	r5,	2	; the bdnz should be 0 cycles
-	addi	r5,	r5,	3
-	addi	r5,	r5,	4
-	addi	r5,	r5,	5
-	addi	r5,	r5,	6
-	addi	r5,	r5,	7
-	addi	r5,	r5,	8
-	bdnz	L_loop
-
-	sync
-	mftb	r5			; save the raw time base value
-	lbz	r6,	CountHigh(r9)	; get the via counter values
-	eieio
-	lbz	r7,	CountLow(r9)
-	eieio
-	lbz	r8,	CountHigh(r9)
-	eieio
-
-	cmpi	cr0,	r4,	1	; see if the was the cache run
-	bne	L_finish_up		; nope, we are done.
-
-	li	r4,	0		; set flag for the real test
-	li	r5,	0x12d0		; set the initial count to 1.25e+6
-	oris	r5,	r5,	0x13
-	b	L_again
-
-L_finish_up:
-	cmpi    cr0,    r7,     0	; if L1 is zero then H1 is good. 
-	beq     L_use_H1		; else H2 will be good.
-
-	mr      r6,     r8		; use H2 instead.
-
-L_use_H1:
-	rlwimi	r7,	r6,	8, 16, 23
-	not	r6,	r7	        ; neg - 1 is not
-	andi.	r6,	r6,	0xffff
-	stw	r6,	4(r3)		; save via ticks
-
-	sub	r5,	r5,	r10	; r5 is the number of time base ticks
-	stw	r5,	8(r3)		; save time base ticks
-
-        blr
diff --git a/pexpert/ppc/pe_identify_machine.c b/pexpert/ppc/pe_identify_machine.c
deleted file mode 100644
index 993b124f9..000000000
--- a/pexpert/ppc/pe_identify_machine.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#include <pexpert/protos.h>
-#include <pexpert/pexpert.h>
-#include <pexpert/ppc/powermac.h>
-#include <pexpert/device_tree.h>
-#include <vm/pmap.h>
-
-extern void panic(const char *str, ...);
-
-/* Local declarations */
-void pe_identify_machine(void);
-vm_offset_t get_io_base_addr(void);
-
-/* pe_identify_machine:
- *
- *   Sets up platform parameters.
- *   Returns:    nothing
- */
-void pe_identify_machine(void)
-{
-  DTEntry       cpu, root;
-  unsigned long *value;
-  unsigned int  size;
-
-  // Clear the gPEClockFrequencyInfo struct
-  bzero((void *)&gPEClockFrequencyInfo, sizeof(clock_frequency_info_t));
-  
-  // Start with default values.
-  gPEClockFrequencyInfo.timebase_frequency_hz = 25000000;
-  gPEClockFrequencyInfo.bus_clock_rate_hz     = 100000000;
-  gPEClockFrequencyInfo.cpu_clock_rate_hz     = 300000000;
-
-  // Try to get the values from the device tree.
-  if (DTFindEntry("device_type", "cpu", &cpu) == kSuccess) {
-    // Find the time base frequency first.
-    if (DTGetProperty(cpu, "timebase-frequency", (void **)&value, &size) == kSuccess) {
-      // timebase_frequency_hz is only 32 bits, and the device tree should never provide 64 bits
-      // so this if should never be taken.
-      if (size == 8) gPEClockFrequencyInfo.timebase_frequency_hz = *(unsigned long long *)value;
-      else gPEClockFrequencyInfo.timebase_frequency_hz = *value;
-    }
-    gPEClockFrequencyInfo.dec_clock_rate_hz = gPEClockFrequencyInfo.timebase_frequency_hz;
-    
-    // Find the bus frequency next.  Try the cpu node, then the root.
-    if (DTGetProperty(cpu, "bus-frequency", (void **)&value, &size) == kSuccess) {
-      if (size == 8) gPEClockFrequencyInfo.bus_frequency_hz = *(unsigned long long *)value;
-      else gPEClockFrequencyInfo.bus_frequency_hz = *value;
-    } else {
-      if (DTLookupEntry(0, "/", &root) == kSuccess) {
-	if (DTGetProperty(root, "clock-frequency", (void **)&value, &size) == kSuccess) {
-	  if (size == 8) gPEClockFrequencyInfo.bus_frequency_hz = *(unsigned long long *)value;
-	  else gPEClockFrequencyInfo.bus_frequency_hz = *value;
-	}
-      }
-    }
-    
-    gPEClockFrequencyInfo.bus_frequency_min_hz = gPEClockFrequencyInfo.bus_frequency_hz;
-    gPEClockFrequencyInfo.bus_frequency_max_hz = gPEClockFrequencyInfo.bus_frequency_hz;
-    
-    if (gPEClockFrequencyInfo.bus_frequency_hz < 0x100000000ULL)
-      gPEClockFrequencyInfo.bus_clock_rate_hz = gPEClockFrequencyInfo.bus_frequency_hz;
-    else
-      gPEClockFrequencyInfo.bus_clock_rate_hz = 0xFFFFFFFF;
-    
-    // Find the cpu frequency last.
-    if (DTGetProperty(cpu, "clock-frequency", (void **)&value, &size) == kSuccess) {
-      if (size == 8) gPEClockFrequencyInfo.cpu_frequency_hz = *(unsigned long long *)value;
-      else gPEClockFrequencyInfo.cpu_frequency_hz = *value;
-    }
-    
-    gPEClockFrequencyInfo.cpu_frequency_min_hz = gPEClockFrequencyInfo.cpu_frequency_hz;
-    gPEClockFrequencyInfo.cpu_frequency_max_hz = gPEClockFrequencyInfo.cpu_frequency_hz;
-    
-    if (gPEClockFrequencyInfo.cpu_frequency_hz < 0x100000000ULL)
-      gPEClockFrequencyInfo.cpu_clock_rate_hz = gPEClockFrequencyInfo.cpu_frequency_hz;
-    else
-      gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFF;
-  }
-  
-  // Set the num / den pairs form the hz values.
-  gPEClockFrequencyInfo.timebase_frequency_num = gPEClockFrequencyInfo.timebase_frequency_hz;
-  gPEClockFrequencyInfo.timebase_frequency_den = 1;
-  
-  gPEClockFrequencyInfo.bus_clock_rate_num = gPEClockFrequencyInfo.bus_clock_rate_hz;
-  gPEClockFrequencyInfo.bus_clock_rate_den = 1;
-  
-  gPEClockFrequencyInfo.bus_to_cpu_rate_num =
-    (2 * gPEClockFrequencyInfo.cpu_clock_rate_hz) / gPEClockFrequencyInfo.bus_clock_rate_hz;
-  gPEClockFrequencyInfo.bus_to_cpu_rate_den = 2;
-  
-  gPEClockFrequencyInfo.bus_to_dec_rate_num = 1;
-  gPEClockFrequencyInfo.bus_to_dec_rate_den =
-    gPEClockFrequencyInfo.bus_clock_rate_hz / gPEClockFrequencyInfo.dec_clock_rate_hz;
-}
-
-/* get_io_base_addr():
- *
- *   Get the base address of the io controller.  
- */
-vm_offset_t get_io_base_addr(void)
-{
-  DTEntry     entryP;
-  vm_offset_t *address;
-  unsigned int size;
-  
-  if ((DTFindEntry("device_type", "dbdma", &entryP) == kSuccess)
-      || (DTFindEntry("device_type", "mac-io", &entryP) == kSuccess))
-    {
-      if (DTGetProperty(entryP, "AAPL,address", (void **)&address, &size) == kSuccess)
-	return *address;
-      
-      if (DTGetProperty(entryP, "assigned-addresses", (void **)&address, &size) == kSuccess)
-	// address calculation not correct
-	return *(address+2);
-    }
-  
-  panic("Can't find this machine's io base address\n");
-  return 0;
-}
-
-vm_offset_t PE_find_scc(void)
-{
-	vm_offset_t io, sccadd;
-	DTEntry     entryP;
-	vm_offset_t *sccregs;
-	unsigned int sccrsize;
-	
-	if(!(io = get_io_base_addr())) {		/* Get the I/O controller base address */
-		return (vm_offset_t)0;				/* Hmmm, no I/O??? What gives??? How'd we even boot? */
-	}
-
-	
-/*	Note: if we find a escc-legacy, we need to kind of hack because it can be either an offset
-	into the iobase or the actual address itself.  ORint the two should provide the correct 
-	for either */
-
-	sccadd = 0;								/* Assume none for now */
-
-	if(DTFindEntry("name", "escc-legacy", &entryP) == kSuccess)	{	/* Find the old fashioned serial port */
-		if (DTGetProperty(entryP, "reg", (void **)&sccregs, &sccrsize) == kSuccess) {	/* Do we have some registers? */
-			sccadd = ((vm_offset_t)*sccregs | io);	/* Get the address */
-		}
-	}
-	
-	if(DTFindEntry("name", "escc", &entryP) == kSuccess) {	/* Well, see if we just have the new fangled one */
-		sccadd = io + 0x12000; 				/* Yeah, but still return the oldie goldie... */
-	}
-	
-	return sccadd;							/* Return it if you found it */
-}
-
-unsigned int PE_init_taproot(vm_offset_t *taddr)
-{
-	DTEntry     entryP;
-	vm_offset_t *tappdata;
-	unsigned int tappsize;
-	
-	
-	if(DTFindEntry("name", "memory-map", &entryP) != kSuccess) return 0;	/* no memory map */
-
-	if (DTGetProperty(entryP, "TapRoot", (void **)&tappdata, &tappsize) != kSuccess) return 0;	/* No TapRoot */
-
-	tappdata[1] = (tappdata[1] + 4095 ) & -4096;	/* Make sure this is a whole page */
-
-	*taddr = io_map_spec(tappdata[0], tappdata[1], VM_WIMG_IO);	/* Map it in and return the address */
-	tappdata[0] = *taddr;					/* Also change property */
-	return tappdata[1];						/* And the size */
-}
diff --git a/pexpert/ppc/pe_init.c b/pexpert/ppc/pe_init.c
deleted file mode 100644
index 6bcf93210..000000000
--- a/pexpert/ppc/pe_init.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * file: pe_init.c
- *    PPC platform expert initialization.
- */
-#include <mach/time_value.h>
-#include <pexpert/protos.h>
-#include <pexpert/pexpert.h>
-#include <pexpert/ppc/interrupts.h>
-#include <pexpert/device_tree.h>
-#include <pexpert/pe_images.h>
-#include <kern/debug.h>
-#include <kern/sched_prim.h>
-#include <vm/pmap.h>
-
-
-/* extern references */
-void pe_identify_machine(void);
-
-/* private globals */
-PE_state_t PE_state;
-
-/* Clock Frequency Info */
-clock_frequency_info_t gPEClockFrequencyInfo;
-
-static int PE_stub_read_write_time_of_day(unsigned int options, long * secs)
-{
-    // believe it or, BSD crashes if invalid time returned. FIXME.
-    if( options == kPEReadTOD)
-        *secs = 0xb2383c72;
-
-    return 0;
-}
-
-static int PE_stub_poll_input(__unused unsigned int options, char * c)
-{
-    *c = 0xff;
-
-    return 1;
-}
-
-static int PE_stub_write_IIC(__unused unsigned char addr, __unused unsigned char reg,
-				__unused unsigned char data)
-{
-    return 1;
-}
-
-int (*PE_read_write_time_of_day)(unsigned int options, long * secs)
-	= PE_stub_read_write_time_of_day;
-int (*PE_poll_input)(unsigned int options, char * c)
-	= PE_stub_poll_input;
-
-int (*PE_write_IIC)(unsigned char addr, unsigned char reg,
-				unsigned char data)
-	= PE_stub_write_IIC;
-
-
-int PE_initialize_console( PE_Video * info, int op )
-{
-    static int		last_console = -1;
-
-    if (info) {
-	info->v_offset  = 0;
-	info->v_length  = 0;
-	info->v_display = 0;
-    }
-
-    switch( op ) {
-
-	case kPEDisableScreen:
-            initialize_screen(info, op);
-            last_console = switch_to_serial_console();
-            kprintf("kPEDisableScreen %d\n",last_console);
-	    break;
-
-	case kPEEnableScreen:
-            initialize_screen(info, op);
-            if (info) PE_state.video = *info;
-            kprintf("kPEEnableScreen %d\n",last_console);
-            if( last_console != -1)
-                switch_to_old_console( last_console);
-	    break;
-	
-	default:
-            initialize_screen(info, op);
-	    break;
-    }
-
-    return 0;
-}
-
-void PE_init_iokit(void)
-{
-    kern_return_t	ret;
-    DTEntry			entry;
-    unsigned int	size;
-    void **			map;
-
-    PE_init_kprintf(TRUE);
-    PE_init_printf(TRUE);
-
-    if( kSuccess == DTLookupEntry(NULL, "/chosen/memory-map", &entry)) {
-
-	boot_progress_element * bootPict;
-
-	if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size))
-	    bcopy( map[0], appleClut8, sizeof(appleClut8) );
-
-	if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) {
-
-	    bootPict = (boot_progress_element *) map[0];
-	    default_noroot.width  = bootPict->width;
-	    default_noroot.height = bootPict->height;
-	    default_noroot.dx     = 0;
-	    default_noroot.dy     = bootPict->yOffset;
-	    default_noroot_data   = &bootPict->data[0];
-	}
-    }
-    panic_ui_initialize( (unsigned char *) appleClut8 );
-    vc_progress_initialize( &default_progress, default_progress_data, (unsigned char *) appleClut8 );
-
-    ret = StartIOKit( PE_state.deviceTreeHead, PE_state.bootArgs, (void *)0, (void *)0);
-}
-
-void PE_init_platform(boolean_t vm_initialized, void *_args)
-{
-	DTEntry dsouth, dnorth, root, dcpu;
-	char *model;
-	unsigned int msize, size;
-	uint32_t *south, *north, *pdata, *ddata;
-	int i;
-	
-	boot_args *args = (boot_args *)_args;
-
-	if (PE_state.initialized == FALSE)
-	{
-	  PE_state.initialized		= TRUE;
-	  PE_state.bootArgs		= _args;
-	  PE_state.deviceTreeHead	= args->deviceTreeP;
-	  PE_state.video.v_baseAddr	= args->Video.v_baseAddr;
-	  PE_state.video.v_rowBytes	= args->Video.v_rowBytes;
-	  PE_state.video.v_width	= args->Video.v_width;
-	  PE_state.video.v_height	= args->Video.v_height;
-	  PE_state.video.v_depth	= args->Video.v_depth;
-	  PE_state.video.v_display	= args->Video.v_display;
-	  strlcpy(PE_state.video.v_pixelFormat, "PPPPPPPP",
-		  sizeof(PE_state.video.v_pixelFormat));
-	}
-
-	if (!vm_initialized)
-	{
-            /*
-             * Setup the OpenFirmware Device Tree routines
-             * so the console can be found and the right I/O space
-             * can be used..
-             */
-            DTInit(PE_state.deviceTreeHead);
-	
-            /* Setup gPEClockFrequencyInfo */
-            pe_identify_machine();
-	}
-	else
-	{
-	    pe_init_debug();
-	
-	}
-}
-
-void PE_create_console( void )
-{
-    if ( PE_state.video.v_display )
-        PE_initialize_console( &PE_state.video, kPEGraphicsMode );
-    else
-        PE_initialize_console( &PE_state.video, kPETextMode );
-}
-
-int PE_current_console( PE_Video * info )
-{
-    *info = PE_state.video;
-
-    return( 0);
-}
-
-void PE_display_icon(	__unused unsigned int flags,
-			__unused const char * name )
-{
-    if( default_noroot_data)
-	vc_display_icon( &default_noroot, default_noroot_data );
-}
-
-boolean_t
-PE_get_hotkey(unsigned char key)
-{
-    unsigned char * adbKeymap;
-    unsigned int	size;
-    DTEntry			entry;
-
-    if( (kSuccess != DTLookupEntry(NULL, "/", &entry))
-    ||  (kSuccess != DTGetProperty( entry, "AAPL,adb-keymap",
-            (void **)&adbKeymap, &size))
-    || (size != 16))
-
-        return( FALSE);
-
-    if( key > 127)
-	return( FALSE);
-
-    return( adbKeymap[ key / 8 ] & (0x80 >> (key & 7)));
-}
-
-static timebase_callback_func gTimebaseCallback;
-
-void PE_register_timebase_callback(timebase_callback_func callback)
-{
-  gTimebaseCallback = callback;
-  
-  PE_call_timebase_callback();
-}
-
-void PE_call_timebase_callback(void)
-{
-  struct timebase_freq_t timebase_freq;
-  unsigned long          num, den, cnt;
-  
-  num = gPEClockFrequencyInfo.timebase_frequency_num;
-  den = gPEClockFrequencyInfo.timebase_frequency_den;
-  
-  cnt = 2;
-  while (cnt <= den) {
-    if ((num % cnt) || (den % cnt)) {
-      cnt++;
-      continue;
-    }
-    
-    num /= cnt;
-    den /= cnt;
-  }
-  
-  timebase_freq.timebase_num = num;
-  timebase_freq.timebase_den = den;
-  
-  if (gTimebaseCallback) gTimebaseCallback(&timebase_freq);
-}
diff --git a/pexpert/ppc/pe_kprintf.c b/pexpert/ppc/pe_kprintf.c
deleted file mode 100644
index 2509d6962..000000000
--- a/pexpert/ppc/pe_kprintf.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/*
- * file: pe_kprintf.c
- *    PPC platform expert debugging output initialization.
- */
-#include <stdarg.h>
-#include <machine/machine_routines.h>
-#include <pexpert/protos.h>
-#include <pexpert/pexpert.h>
-#include <pexpert/ppc/powermac.h>
-#include <pexpert/device_tree.h>
-#include <kern/debug.h>
-#include <kern/simple_lock.h>
-#include <vm/pmap.h>
-
-/* extern references */
-extern void scc_putc(int unit, int line, int c);
-extern long strtol(const char *, char **, int);
-
-/* Globals */
-void (*PE_kputc)(char c);
-
-unsigned int disable_serial_output = TRUE;
-
-vm_offset_t	scc = 0;
-
-struct slock kprintf_lock;
-
-void PE_init_kprintf(__unused boolean_t vm_initialized)
-{
-	unsigned int	boot_arg;
-	int32_t			serial_baud = -1;
-	unsigned int	size;
-	DTEntry         options;
-	char            *str, baud[7];
-
-	if (PE_state.initialized == FALSE)
-		panic("Platform Expert not initialized");
-
-	if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg)))
-		if(boot_arg & DB_KPRT) disable_serial_output = FALSE; 
-
-	if (DTLookupEntry(NULL, "/options", &options) == kSuccess) {
-	  if (DTGetProperty(options, "input-device", (void **)&str, &size) == kSuccess) {
-		if ((size > 5) && !strncmp("scca:", str, 5)) {
-		  size -= 5;
-		  str += 5;
-		  if (size <= 6) {
-			strncpy(baud, str, size);
-			baud[size] = '\0';
-			gPESerialBaud = strtol(baud, NULL, 0);
-		  }
-		}
-	  }
-	  if (DTGetProperty(options, "output-device", (void **)&str, &size) == kSuccess) {
-		if ((size > 5) && !strncmp("scca:", str, 5)) {
-		  size -= 5;
-		  str += 5;
-		  if (size <= 6) {
-			strncpy(baud, str, size);
-			baud[size] = '\0';
-			gPESerialBaud = strtol(baud, NULL, 0);
-		  }
-		}
-	  }	  
-	}
-
-	/* Check the boot-args for new serial baud. */
-	if (PE_parse_boot_argn("serialbaud", &serial_baud, sizeof (serial_baud)))
-		if (serial_baud != -1) gPESerialBaud = serial_baud; 
-
-	if( (scc = PE_find_scc())) {				/* See if we can find the serial port */
-		scc = io_map_spec(scc, 0x1000, VM_WIMG_IO);	/* Map it in */
-		initialize_serial((void *)scc, gPESerialBaud); /* Start up the serial driver */
-		PE_kputc = serial_putc;
-
-		simple_lock_init(&kprintf_lock, 0);
-	} else
-			PE_kputc = cnputc;
-
-#if 0
-	/*
-	 * FUTURE: eventually let the boot command determine where
-	 *         the debug output will be, serial, video, etc.
-	 */
-	switch (PE_state.debug_video.v_display) {
-	    case kDebugTypeSerial:
-		    PE_kputc = serial_putc;
-		    break;
-
-	    case kDebugTypeDisplay:
-		    init_display_putc(  (unsigned char*)PE_state.debug_video.v_baseAddr,
-							PE_state.debug_video.v_rowBytes,
-							PE_state.debug_video.v_height);
-		    PE_kputc = display_putc;
-		    break;
-
-	    default:
-		    PE_state.debug_video.v_baseAddr = 0;
-	}
-#endif
-}
-
-void serial_putc(char c)
-{
-	scc_putc(0, 1, c);
-	if (c == '\n')
-		scc_putc(0, 1, '\r');
-}
-
-void kprintf(const char *fmt, ...)
-{
-        va_list   listp;
-	boolean_t state;
-	
-	state = ml_set_interrupts_enabled(FALSE);
-	simple_lock(&kprintf_lock);
-	
-	if (!disable_serial_output) {	
-        	va_start(listp, fmt);
-        	_doprnt(fmt, &listp, PE_kputc, 16);
-        	va_end(listp);
-	}
-	
-	simple_unlock(&kprintf_lock);
-	ml_set_interrupts_enabled(state);
-}
-
diff --git a/security/Makefile b/security/Makefile
index 90a42a992..4af4eb56e 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,20 +8,17 @@ include $(MakeInc_def)
 
 INSTINC_SUBDIRS = \
 
-INSTINC_SUBDIRS_PPC =  \
-
 INSTINC_SUBDIRS_I386 = \
 
 INSTINC_SUBDIRS_X86_64 = \
 
 EXPINC_SUBDIRS = \
 
-EXPINC_SUBDIRS_PPC = \
-
 EXPINC_SUBDIRS_I386 = \
 
 EXPINC_SUBDIRS_X86_64 = \
 
+
 DATAFILES = \
 	mac.h \
 	mac_policy.h
@@ -49,7 +46,7 @@ INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
 # /System/Library/Frameworks/Kernel.framework/PrivateHeaders
 INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES}
 
-SETUP_SUBDIRS = conf
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = conf
 
diff --git a/security/conf/MASTER b/security/conf/MASTER
index 18a6f7f1f..8d1598990 100644
--- a/security/conf/MASTER
+++ b/security/conf/MASTER
@@ -64,4 +64,4 @@ options		CONFIG_NO_PANIC_STRINGS			# <no_panic_str>
 options		CONFIG_NO_PRINTF_STRINGS		# <no_printf_str>
 options		CONFIG_NO_KPRINTF_STRINGS		# <no_kprintf_str>
 options		CONFIG_FSE	# file system events	# <config_fse>
-
+options		CONFIG_TRIGGERS	# trigger vnodes	# <config_triggers>
diff --git a/security/conf/MASTER.i386 b/security/conf/MASTER.i386
index 524008c7b..dd4fb5f69 100644
--- a/security/conf/MASTER.i386
+++ b/security/conf/MASTER.i386
@@ -4,7 +4,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach libkerncpp audit ]
 #  DEVELOPMENT	= [ EMBEDDED config_dtrace ]
 #
@@ -20,6 +19,7 @@ cpu		"i386"					# <intel>
 options		CONFIG_MACF			# Mandatory Access Control Framework
 options		CONFIG_MACF_SOCKET_SUBSET	# MACF subset of socket support
 options		CONFIG_FSE
+options		CONFIG_TRIGGERS
 #options	CONFIG_MACF_SOCKET
 #options	CONFIG_MACF_NET
 #options	CONFIG_MACF_ALWAYS_LABEL_MBUF
diff --git a/security/conf/MASTER.ppc b/security/conf/MASTER.ppc
deleted file mode 100644
index 8b946ff2a..000000000
--- a/security/conf/MASTER.ppc
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-######################################################################
-#  
-#  Standard Apple MacOS X Configurations:
-#  -------- ---- -------- ---------------
-#
-#  RELEASE = [ppc mach libkerncpp config_dtrace audit]
-#  DEVELOPMENT = [RELEASE]
-#  PROFILE = [RELEASE]
-#  DEBUG = [RELEASE debug]
-#  RELEASE_TRACE = [ RELEASE kdebug ]
-#  DEBUG_TRACE   = [ DEBUG kdebug ]
-#
-######################################################################
-
-#
-# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and 
-# security/conf MASTER files.
-#
-options		CONFIG_MACF			# Mandatory Access Control Framework
-options		CONFIG_MACF_SOCKET_SUBSET	# MACF subset of socket support
-options		CONFIG_FSE
-#options	CONFIG_MACF_SOCKET
-#options	CONFIG_MACF_NET
-#options	CONFIG_MACF_ALWAYS_LABEL_MBUF
-#options	CONFIG_MACF_DEBUG
-#options	CONFIG_MACF_MACH
-options		CONFIG_AUDIT			# Kernel auditing
-
-machine		"ppc"					# <ppc>
-cpu		"ppc"					# <ppc>
diff --git a/security/conf/MASTER.x86_64 b/security/conf/MASTER.x86_64
index 86b65c412..d362cf049 100644
--- a/security/conf/MASTER.x86_64
+++ b/security/conf/MASTER.x86_64
@@ -4,7 +4,6 @@
 #  PROFILE	= [ RELEASE profile ]
 #  DEBUG	= [ RELEASE debug ]
 #
-#
 #  EMBEDDED	= [ intel mach libkerncpp audit ]
 #  DEVELOPMENT	= [ EMBEDDED ]
 #
@@ -17,6 +16,7 @@
 options		CONFIG_MACF			# Mandatory Access Control Framework
 options		CONFIG_MACF_SOCKET_SUBSET	# MACF subset of socket support
 options		CONFIG_FSE
+options		CONFIG_TRIGGERS
 #options	CONFIG_MACF_SOCKET
 #options	CONFIG_MACF_NET
 #options	CONFIG_MACF_ALWAYS_LABEL_MBUF
diff --git a/security/conf/Makefile b/security/conf/Makefile
index f32722158..bdb8f33f8 100644
--- a/security/conf/Makefile
+++ b/security/conf/Makefile
@@ -7,8 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
-SETUP_SUBDIRS = \
-	tools
+SETUP_SUBDIRS =
 
 COMP_SUBDIRS = 
 
@@ -24,30 +23,24 @@ else
 export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)
 endif
 
-$(COMPOBJROOT)/doconf:
-	@make build_setup 
+MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC)
 
 $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile :  $(SOURCE)/MASTER  \
 	$(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \
 	$(SOURCE)/Makefile.template  \
 	$(SOURCE)/Makefile.$(ARCH_CONFIG_LC)  \
 	$(SOURCE)/files \
-	$(SOURCE)/files.$(ARCH_CONFIG_LC) \
-	$(COMPOBJROOT)/doconf
+	$(SOURCE)/files.$(ARCH_CONFIG_LC)
 	$(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \
 	$(MKDIR) $${doconf_target}; \
 	cd $${doconf_target}; \
 	rm -f $(notdir $?); \
 	cp  $? $${doconf_target}; \
-	$(COMPOBJROOT)/doconf -c -cpu $(ARCH_CONFIG_LC) -d  $(TARGET)/$(SECURITY_KERNEL_CONFIG) $(SECURITY_KERNEL_CONFIG); \
+	if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \
+	$(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d  $(TARGET)/$(SECURITY_KERNEL_CONFIG) $(SECURITY_KERNEL_CONFIG); \
 	);
 
-.ORDER: $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile
-
-do_setup_conf: $(COMPOBJROOT)/doconf \
-		$(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile
-
-do_all: do_setup_conf
+do_all: $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile
 	$(_v)next_source=$(subst conf/,,$(SOURCE));			\
 	${MAKE} -C $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)	\
 		MAKEFILES=$(TARGET)/$(SECURITY_KERNEL_CONFIG)/Makefile	\
diff --git a/security/conf/Makefile.i386 b/security/conf/Makefile.i386
index 7da8f08d6..3695a666c 100644
--- a/security/conf/Makefile.i386
+++ b/security/conf/Makefile.i386
@@ -2,17 +2,6 @@
 #BEGIN	Machine dependent Makefile fragment for i386
 ######################################################################
 
-# Enable -Werror for i386 builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-#OBJS_NO_WERROR =		\
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
 ######################################################################
 #END	Machine dependent Makefile fragment for i386
 ######################################################################
diff --git a/security/conf/Makefile.ppc b/security/conf/Makefile.ppc
deleted file mode 100644
index d71f1d77b..000000000
--- a/security/conf/Makefile.ppc
+++ /dev/null
@@ -1,18 +0,0 @@
-######################################################################
-#BEGIN	Machine dependent Makefile fragment for ppc
-######################################################################
-
-# Enable -Werror for ppc builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-#OBJS_NO_WERROR =		\
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
-######################################################################
-#END	Machine dependent Makefile fragment for ppc
-######################################################################
diff --git a/security/conf/Makefile.template b/security/conf/Makefile.template
index ebe12b2d7..fd1ffeef0 100644
--- a/security/conf/Makefile.template
+++ b/security/conf/Makefile.template
@@ -26,8 +26,7 @@ include $(MakeInc_def)
 #
 # XXX: CFLAGS
 #
-CFLAGS+= -I. -imacros meta_features.h -DKERNEL -DBSD_KERNEL_PRIVATE \
-	-Wall -fno-common
+CFLAGS+= -I. -include meta_features.h -DBSD_KERNEL_PRIVATE
 
 #
 # Directories for mig generated files
@@ -81,17 +80,17 @@ ${OBJS}: ${OBJSDEPS}
 
 LDOBJS = $(OBJS)
 
-$(COMPONENT).o: $(LDOBJS)
+$(COMPONENT).filelist: $(LDOBJS)
 	$(_v)$(RM) $(RMFLAGS) vers.c
-	$(_v)$(COMPOBJROOT)/newvers \
+	$(_v)$(SRCROOT)/SETUP/newvers \
 	`$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES}
 	$(_v)${KCC} $(CFLAGS) $(INCLUDES) -c vers.c
 	@echo LDFILELIST $(COMPONENT)
 	$(_v)( for obj in ${LDOBJS} vers.o; do	\
 		 echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \
-	done; ) > $(COMPONENT).o
+	done; ) > $(COMPONENT).filelist
 
-do_all: $(COMPONENT).o
+do_all: $(COMPONENT).filelist
 
 do_depend: do_all
 	$(_v)${MD} -u Makedep -f -d `ls *.d`;
diff --git a/security/conf/Makefile.x86_64 b/security/conf/Makefile.x86_64
index 64c2b46d5..7b0de925d 100644
--- a/security/conf/Makefile.x86_64
+++ b/security/conf/Makefile.x86_64
@@ -2,17 +2,6 @@
 #BEGIN	Machine dependent Makefile fragment for x86_64
 ######################################################################
 
-# Enable -Werror for x86_64 builds
-CFLAGS+=$(WERROR)
-CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD))
-
-# Objects that don't compile cleanly:
-#OBJS_NO_WERROR =		\
-
-OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
-
-$(OBJS_WERROR):		WERROR=-Werror
-
 ######################################################################
 #END	Machine dependent Makefile fragment for x86_64
 ######################################################################
diff --git a/security/conf/files b/security/conf/files
index 295d886c2..8c077cb19 100644
--- a/security/conf/files
+++ b/security/conf/files
@@ -33,3 +33,4 @@ security/mac_pipe.c					optional config_macf
 security/mac_iokit.c					optional config_macf
 security/mac_file.c					optional config_macf
 security/mac_inet.c					optional config_macf_net
+security/mac_priv.c					optional config_macf
diff --git a/security/conf/files.i386 b/security/conf/files.i386
index 8b1378917..e69de29bb 100644
--- a/security/conf/files.i386
+++ b/security/conf/files.i386
@@ -1 +0,0 @@
-
diff --git a/security/conf/files.ppc b/security/conf/files.ppc
deleted file mode 100644
index 8b1378917..000000000
--- a/security/conf/files.ppc
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/security/conf/tools/Makefile b/security/conf/tools/Makefile
deleted file mode 100644
index a8111c252..000000000
--- a/security/conf/tools/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-SETUP_SUBDIRS = \
-	doconf \
-	newvers
-
-COMP_SUBDIRS = \
-	doconf \
-	newvers
-
-INST_SUBDIRS = \
-
-
-setup_build_all:
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/security/conf/tools/doconf/Makefile b/security/conf/tools/doconf/Makefile
deleted file mode 100644
index 7794a4ceb..000000000
--- a/security/conf/tools/doconf/Makefile
+++ /dev/null
@@ -1,49 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)doconf
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
-
-
diff --git a/security/conf/tools/doconf/doconf.csh b/security/conf/tools/doconf/doconf.csh
deleted file mode 100644
index 6fedb4786..000000000
--- a/security/conf/tools/doconf/doconf.csh
+++ /dev/null
@@ -1,321 +0,0 @@
-#!/bin/csh -f
-set path = ($path .)
-######################################################################
-# HISTORY
-#  1-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
-#	Added "-verbose" switch, so this script produces no output
-#	in the normal case.
-#
-# 10-Oct-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Flushed cmu_*.h and spin_locks.h
-#	[ V5.1(XF18) ]
-#
-#  6-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	Use MASTER.local and MASTER.<machine>.local for generation of
-#	configuration files in addition to MASTER and MASTER.<machine>.
-#
-# 25-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Removed use of obsolete wb_*.h files when building the feature
-#	list;  modified to save the previous configuration file and
-#	display the differences between it and the new file.
-#	[ V5.1(F8) ]
-#
-# 25-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
-#	If there is no /etc/machine just print out a message telling
-#	user to use the -cpu option.  I thought this script was supposed
-#	to work even without a /etc/machine, but it doesn't... and this
-#	is the easiest way out.
-#
-# 13-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added "romp_fpa.h" file to extra features for the RT.
-#	[ V5.1(F7) ]
-#
-# 11-Mar-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to maintain the appropriate configuration features file
-#	in the "machine" directory whenever the corresponding
-#	configuration is generated.  This replaces the old mechanism of
-#	storing this directly in the <sys/features.h> file since it was
-#	machine dependent and also precluded building programs for more
-#	than one configuration from the same set of sources.
-#	[ V5.1(F6) ]
-#
-# 21-Feb-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Fixed to require wired-in cpu type names for only those
-#	machines where the kernel name differs from that provided by
-#	/etc/machine (i.e. IBMRT => ca and SUN => sun3);  updated to
-#	permit configuration descriptions in both machine indepedent
-#	and dependent master configuration files so that attributes can
-#	be grouped accordingly.
-#	[ V5.1(F3) ]
-#
-# 17-Jan-87  Mike Accetta (mja) at Carnegie-Mellon University
-#	Updated to work from any directory at the same level as
-#	"conf"; generate configuration from both MASTER and
-#	MASTER.<machine-type> files; added -cpu switch.
-#	[ V5.1(F1) ]
-#
-# 18-Aug-86  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -make switch and changed meaning of -config;  upgraded to
-#	allow multiple attributes per configuration and to define
-#	configurations in terms of these attributes within MASTER.
-#
-# 14-Apr-83  Mike Accetta (mja) at Carnegie-Mellon University
-#	Added -config switch to only run /etc/config without 
-#	"make depend" and "make".
-#
-######################################################################
-
-set prog=$0
-set prog=$prog:t
-set nonomatch
-set OBJDIR=../BUILD
-if ("`/usr/bin/uname`" == "Rhapsody" ) then
-set CONFIG_DIR=/usr/local/bin
-else
-set CONFIG_DIR=/usr/bin
-endif
-
-unset domake
-unset doconfig
-unset beverbose
-unset MACHINE
-unset profile
-
-while ($#argv >= 1)
-    if ("$argv[1]" =~ -*) then
-        switch ("$argv[1]")
-	case "-c":
-	case "-config":
-	    set doconfig
-	    breaksw
-	case "-m":
-	case "-make":
-	    set domake
-	    breaksw
-	case "-cpu":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set MACHINE="$argv[2]"
-	    shift
-	    breaksw
-	case "-d":
-	    if ($#argv < 2) then
-		echo "${prog}: missing argument to ${argv[1]}"
-		exit 1
-	    endif
-	    set OBJDIR="$argv[2]"
-	    shift
-	    breaksw
-	case "-verbose":
-	    set beverbose
-	    breaksw
-	case "-p":
-	case "-profile":
-	    set profile
-	    breaksw
-	default:
-	    echo "${prog}: ${argv[1]}: unknown switch"
-	    exit 1
-	    breaksw
-	endsw
-	shift
-    else
-	break
-    endif
-end
-
-if ($#argv == 0) set argv=(GENERIC)
-
-if (! $?MACHINE) then
-    if (-d /NextApps) then
-	set MACHINE=`hostinfo | awk '/MC680x0/ { printf("m68k") } /MC880x0/ { printf("m88k") }'`
-    endif
-endif
-
-if (! $?MACHINE) then
-    if (-f /etc/machine) then
-	    set MACHINE="`/etc/machine`"
-    else
-	    echo "${prog}: no /etc/machine, specify machine type with -cpu"
-	    echo "${prog}: e.g. ${prog} -cpu VAX CONFIGURATION"
-	    exit 1
-    endif
-endif
-
-set FEATURES_EXTRA=
-
-switch ("$MACHINE")
-    case IBMRT:
-	set cpu=ca
-	set ID=RT
-	set FEATURES_EXTRA="romp_dualcall.h romp_fpa.h"
-	breaksw
-    case SUN:
-	set cpu=sun3
-	set ID=SUN3
-	breaksw
-    default:
-	set cpu=`echo $MACHINE | tr A-Z a-z`
-	set ID=`echo $MACHINE | tr a-z A-Z`
-	breaksw
-endsw
-set FEATURES=../h/features.h
-set FEATURES_H=(cs_*.h mach_*.h net_*.h\
-	        cputypes.h cpus.h vice.h\
-	        $FEATURES_EXTRA)
-set MASTER_DIR=../conf
-set MASTER =   ${MASTER_DIR}/MASTER
-set MASTER_CPU=${MASTER}.${cpu}
-
-set MASTER_LOCAL = ${MASTER}.local
-set MASTER_CPU_LOCAL = ${MASTER_CPU}.local
-if (! -f $MASTER_LOCAL) set MASTER_LOCAL = ""
-if (! -f $MASTER_CPU_LOCAL) set MASTER_CPU_LOCAL = ""
-
-if (! -d $OBJDIR) then
-    if ($?beverbose) then
-        echo "[ creating $OBJDIR ]"
-    endif
-    mkdir -p $OBJDIR
-endif
-
-foreach SYS ($argv)
-    set SYSID=${SYS}_${ID}
-    set SYSCONF=$OBJDIR/config.$SYSID
-    set BLDDIR=$OBJDIR
-    if ($?beverbose) then
-	echo "[ generating $SYSID from $MASTER_DIR/MASTER{,.$cpu}{,.local} ]"
-    endif
-    echo +$SYS \
-    | \
-    cat $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL - \
-        $MASTER $MASTER_LOCAL $MASTER_CPU $MASTER_CPU_LOCAL \
-    | \
-    sed -n \
-	-e "/^+/{" \
-	   -e "s;[-+];#&;gp" \
-	      -e 't loop' \
-	   -e ': loop' \
-           -e 'n' \
-	   -e '/^#/b loop' \
-	   -e '/^$/b loop' \
-	   -e 's;^\([^#]*\).*#[ 	]*<\(.*\)>[ 	]*$;\2#\1;' \
-	      -e 't not' \
-	   -e 's;\([^#]*\).*;#\1;' \
-	      -e 't not' \
-	   -e ': not' \
-	   -e 's;[ 	]*$;;' \
-	   -e 's;^\!\(.*\);\1#\!;' \
-	   -e 'p' \
-	      -e 't loop' \
-           -e 'b loop' \
-	-e '}' \
-	-e "/^[^#]/d" \
-	-e 's;	; ;g' \
-	-e "s;^# *\([^ ]*\)[ ]*=[ ]*\[\(.*\)\].*;\1#\2;p" \
-    | \
-    awk '-F#' '\
-part == 0 && $1 != "" {\
-	m[$1]=m[$1] " " $2;\
-	next;\
-}\
-part == 0 && $1 == "" {\
-	for (i=NF;i>1;i--){\
-		s=substr($i,2);\
-		c[++na]=substr($i,1,1);\
-		a[na]=s;\
-	}\
-	while (na > 0){\
-		s=a[na];\
-		d=c[na--];\
-		if (m[s] == "") {\
-			f[s]=d;\
-		} else {\
-			nx=split(m[s],x," ");\
-			for (j=nx;j>0;j--) {\
-				z=x[j];\
-				a[++na]=z;\
-				c[na]=d;\
-			}\
-		}\
-	}\
-	part=1;\
-	next;\
-}\
-part != 0 {\
-	if ($1 != "") {\
-		n=split($1,x,",");\
-		ok=0;\
-		for (i=1;i<=n;i++) {\
-			if (f[x[i]] == "+") {\
-				ok=1;\
-			}\
-		}\
-		if (NF > 2 && ok == 0 || NF <= 2 && ok != 0) {\
-			print $2; \
-		}\
-	} else { \
-		print $2; \
-	}\
-}\
-' >$SYSCONF.new
-    if (-z $SYSCONF.new) then
-	echo "${prog}: ${$SYSID}: no such configuration in $MASTER_DIR/MASTER{,.$cpu}"
-	rm -f $SYSCONF.new
-    endif
-    if (! -d $BLDDIR) then
-        if ($?beverbose) then
-	    echo "[ creating $BLDDIR ]"
-        endif
-	mkdir -p $BLDDIR
-    endif
-#
-# These paths are used by config.
-#
-# "builddir" is the name of the directory where kernel binaries
-# are put.  It is a single path element, never absolute, and is
-# always relative to "objectdir".  "builddir" is used by config
-# solely to determine where to put files created by "config" (e.g.
-# the created Makefile and *.h's.)
-#
-# "objectdir" is the name of the directory which will hold "builddir".
-# It is a path; if relative, it is relative to the current directory
-# where config is run.  It's sole use is to be prepended to "builddir"
-# to indicate where config-created files are to be placed (see above).
-#
-# "sourcedir" is the location of the sources used to build the kernel.
-# It is a path; if relative, it is relative to the directory specified
-# by the concatenation of "objectdir" and "builddir" (i.e. where the
-# kernel binaries are put).
-#
-    echo 'builddir	"."'			>> $SYSCONF.new
-    set OBJRELDIR=`$RELPATH $OBJROOT $OBJDIR`
-    echo 'objectdir	"'$OBJROOT'/'$OBJRELDIR'"'		>> $SYSCONF.new
-    set SRCDIR=`dirname $SOURCE`
-    echo 'sourcedir	"'$SRCROOT'"'		>> $SYSCONF.new
-    if (-f $SYSCONF) then
-	diff $SYSCONF $SYSCONF.new
-	rm -f $SYSCONF.old
-	mv $SYSCONF $SYSCONF.old
-    endif
-    rm -f $SYSCONF
-    mv $SYSCONF.new $SYSCONF
-    if ($?doconfig) then
-        if ($?beverbose) then
-	    echo "[ configuring $SYSID ]"
-        endif
-	if ($?profile) then
-	    $CONFIG_DIR/config -c $MASTER_DIR -p $SYSCONF
-	else
-	    $CONFIG_DIR/config -c $MASTER_DIR $SYSCONF
-	endif
-    endif
-    if ($?domake) then
-        if ($?beverbose) then
-            echo "[ making $SYSID ]"
-        endif
-        (cd $BLDDIR; make)
-    endif
-end
diff --git a/security/conf/tools/newvers/Makefile b/security/conf/tools/newvers/Makefile
deleted file mode 100644
index a430a1fd5..000000000
--- a/security/conf/tools/newvers/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd
-export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def
-export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule
-export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
-
-
-include $(MakeInc_cmd)
-include $(MakeInc_def)
-
-COMP_SUBDIRS = \
-
-INST_SUBDIRS = \
-
-
-#
-# Who and where
-#
-BINDIR= 
-ifneq ($(MACHINE_CONFIG), DEFAULT)
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/)
-else
-DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/)
-endif
-PROGRAM= $(DSTDIR)newvers
-
-# 
-# How to install it
-#
-IFLAGS= -c -m 555
-
-$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS 
-	@sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \
-		< $< >$(notdir $(PROGRAM)).VERS;
-	@install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM);
-	@-$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS;
-
-do_build_setup: $(PROGRAM)
-
-do_build_all:
-
-setup_build_install:
-
-do_build_install:
-
-include $(MakeInc_rule)
-include $(MakeInc_dir)
diff --git a/security/mac.h b/security/mac.h
index 936abc34a..3e0cf7a89 100644
--- a/security/mac.h
+++ b/security/mac.h
@@ -101,6 +101,16 @@ struct user_mac {
 	user_size_t	m_buflen;
 	user_addr_t	m_string;
 };
+
+struct user32_mac {
+	uint32_t	m_buflen;
+	uint32_t	m_string;
+};
+
+struct user64_mac {
+	uint64_t	m_buflen;
+	uint64_t	m_string;
+};
 #endif /* KERNEL */
 
 /*
@@ -131,6 +141,14 @@ struct user_mac {
 #define MAC_DEVICE_FIREWIRE	"FireWire"
 #define MAC_DEVICE_TYPE_KEY	"DeviceType"
 
+/*
+ * Flags for mac_proc_check_suspend_resume()
+ */
+#define MAC_PROC_CHECK_SUSPEND   		0
+#define MAC_PROC_CHECK_RESUME    		1
+#define MAC_PROC_CHECK_HIBERNATE 		2
+#define MAC_PROC_CHECK_SHUTDOWN_SOCKETS	3
+
 #ifndef KERNEL
 /*
  * Location of the userland MAC framework configuration file.  mac.conf
diff --git a/security/mac_alloc.h b/security/mac_alloc.h
index 70e1baef0..956b4344b 100644
--- a/security/mac_alloc.h
+++ b/security/mac_alloc.h
@@ -71,6 +71,7 @@ void	mac_zfree	(zone_t zone, void *elem);
 #define Z_COLLECT       2       /* Make zone collectable        */
 #define Z_EXPAND        3       /* Make zone expandable         */
 #define Z_FOREIGN       4       /* Allow collectable zone to contain foreign elements */
+#define Z_CALLERACCT	5	/* Account alloc/free against the caller */
 
 #endif  /* __APPLE_API_EVOLVING */
 #endif	/* _SECURITY_MAC_ALLOC_H_ */
diff --git a/security/mac_audit.c b/security/mac_audit.c
index 504c55ae8..7fe8b5705 100644
--- a/security/mac_audit.c
+++ b/security/mac_audit.c
@@ -74,10 +74,6 @@
 #include <kern/kalloc.h>
 #include <kern/zalloc.h>
 
-
-int mac_audit(__unused int len, __unused u_char *data);
-
-
 #if CONFIG_AUDIT
 
 /* The zone allocator is initialized in mac_base.c. */
@@ -394,13 +390,6 @@ mac_audit_check_postselect(__unused struct ucred *cred, __unused unsigned short
 	return (MAC_AUDIT_DEFAULT);
 }
 
-int
-mac_audit(__unused int len, __unused u_char *data)
-{
-
-	return (0);
-}
-
 int
 mac_audit_text(__unused char *text, __unused mac_policy_handle_t handle)
 {
diff --git a/security/mac_base.c b/security/mac_base.c
index 8b2eabff2..1b67d3c0e 100644
--- a/security/mac_base.c
+++ b/security/mac_base.c
@@ -140,7 +140,7 @@ MODULE_VERSION(kernel_mac_support, 1);
 
 static unsigned int mac_max_slots = MAC_MAX_SLOTS;
 static unsigned int mac_slot_offsets_free = (1 << MAC_MAX_SLOTS) - 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, max_slots, CTLFLAG_RD,
+SYSCTL_UINT(_security_mac, OID_AUTO, max_slots, CTLFLAG_RD | CTLFLAG_LOCKED,
     &mac_max_slots, 0, "");
 
 /*
@@ -163,7 +163,7 @@ int	mac_late = 0;
  */
 #if CONFIG_MACF_NET
 unsigned int mac_label_mbufs	= 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&mac_label_mbufs, 0, "Label all MBUFs");
 #endif
 
@@ -183,84 +183,84 @@ static int	mac_labelmbufs = 0;
  * be a problem.
  */
 unsigned int	mac_label_vnodes = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, labelvnodes, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, labelvnodes, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_label_vnodes, 0, "Label all vnodes");
 
 
 unsigned int	mac_mmap_revocation = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_mmap_revocation, 0, "Revoke mmap access to files on subject "
     "relabel");
 
 unsigned int	mac_mmap_revocation_via_cow = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation_via_cow, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation_via_cow, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_mmap_revocation_via_cow, 0, "Revoke mmap access to files via "
     "copy-on-write semantics, or by removing all write access");
 
 unsigned int mac_device_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, device_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, device_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &mac_device_enforce, 0, "Enforce MAC policy on device operations");
 
 unsigned int mac_file_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, file_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, file_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&mac_file_enforce, 0, "Enforce MAC policy on file operations");
 
 unsigned int mac_iokit_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, iokit_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, iokit_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&mac_file_enforce, 0, "Enforce MAC policy on IOKit operations");
 
 unsigned int	mac_pipe_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, pipe_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, pipe_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_pipe_enforce, 0, "Enforce MAC policy on pipe operations");
 
 unsigned int	mac_posixsem_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, posixsem_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, posixsem_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_posixsem_enforce, 0, "Enforce MAC policy on POSIX semaphores");
 
 unsigned int mac_posixshm_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, posixshm_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, posixshm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_posixshm_enforce, 0, "Enforce MAC policy on Posix Shared Memory");
 
 unsigned int	mac_proc_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, proc_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, proc_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &mac_proc_enforce, 0, "Enforce MAC policy on process operations");
 
 unsigned int mac_socket_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, socket_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, socket_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	&mac_socket_enforce, 0, "Enforce MAC policy on socket operations");
 
 unsigned int	mac_system_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, system_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, system_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_system_enforce, 0, "Enforce MAC policy on system-wide interfaces");
 
 unsigned int	mac_sysvmsg_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, sysvmsg_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, sysvmsg_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_sysvmsg_enforce, 0, "Enforce MAC policy on System V IPC message queues");
 
 unsigned int	mac_sysvsem_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, sysvsem_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, sysvsem_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_sysvsem_enforce, 0, "Enforce MAC policy on System V IPC semaphores");
 
 unsigned int	mac_sysvshm_enforce = 1;
-SYSCTL_INT(_security_mac, OID_AUTO, sysvshm_enforce, CTLFLAG_RW,
+SYSCTL_INT(_security_mac, OID_AUTO, sysvshm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_sysvshm_enforce, 0, "Enforce MAC policy on System V Shared Memory");
 
 unsigned int	mac_vm_enforce = 1;
-SYSCTL_INT(_security_mac, OID_AUTO, vm_enforce, CTLFLAG_RW,
+SYSCTL_INT(_security_mac, OID_AUTO, vm_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &mac_vm_enforce, 0, "Enforce MAC policy on VM operations");
 
 unsigned int	mac_vnode_enforce = 1;
-SYSCTL_UINT(_security_mac, OID_AUTO, vnode_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, vnode_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
 	   &mac_vnode_enforce, 0, "Enforce MAC policy on vnode operations");
 
 
 #if CONFIG_MACF_MACH
 unsigned int	mac_port_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, port_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, port_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_port_enforce, 0, "Enforce MAC policy on Mach port operations");
 
 unsigned int	mac_task_enforce = 0;
-SYSCTL_UINT(_security_mac, OID_AUTO, task_enforce, CTLFLAG_RW,
+SYSCTL_UINT(_security_mac, OID_AUTO, task_enforce, CTLFLAG_RW | CTLFLAG_LOCKED,
     &mac_task_enforce, 0, "Enforce MAC policy on Mach task operations");
 #endif
 
@@ -1346,12 +1346,15 @@ __mac_get_pid(struct proc *p, struct __mac_get_pid_args *uap, int *ret __unused)
 
 	AUDIT_ARG(pid, uap->pid);
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -1397,12 +1400,15 @@ __mac_get_proc(proc_t p, struct __mac_get_proc_args *uap, int *ret __unused)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -1444,12 +1450,15 @@ __mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, int *ret __unused)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -1526,12 +1535,15 @@ __mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, int *ret __unused)
 
 	AUDIT_ARG(value32, uap->lcid);
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 
 	if (error)
@@ -1590,12 +1602,15 @@ __mac_get_lctx(proc_t p, struct __mac_get_lctx_args *uap, int *ret __unused)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 
 	if (error)
@@ -1643,12 +1658,15 @@ __mac_set_lctx(proc_t p, struct __mac_set_lctx_args *uap, int *ret __unused)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -1732,12 +1750,15 @@ __mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, int *ret __unused)
 	AUDIT_ARG(fd, uap->fd);
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 
 	if (error) 
@@ -1834,12 +1855,15 @@ mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 
 	if (error)
@@ -1862,7 +1886,7 @@ mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow)
 
 	ctx = vfs_context_current();
 
-	NDINIT(&nd, LOOKUP,
+	NDINIT(&nd, LOOKUP, OP_LOOKUP,
 		LOCKLEAF | (follow ? FOLLOW : NOFOLLOW) | AUDITVNPATH1,
 		UIO_USERSPACE, path_p, ctx);
 	error = namei(&nd);
@@ -1926,12 +1950,15 @@ __mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, int *ret __unused)
 	AUDIT_ARG(fd, uap->fd);
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(uap->mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(uap->mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(uap->mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error) 
 		return (error);
@@ -2034,12 +2061,15 @@ mac_set_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p,
 		return ENOSYS;
 
 	if (IS_64BIT_PROCESS(p)) {
-		error = copyin(mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -2066,7 +2096,7 @@ mac_set_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p,
 		return (error);
 	}
 
-	NDINIT(&nd, LOOKUP,
+	NDINIT(&nd, LOOKUP, OP_LOOKUP,
 		LOCKLEAF | (follow ? FOLLOW : NOFOLLOW) | AUDITVNPATH1,
 		UIO_USERSPACE, path_p, ctx);
 	error = namei(&nd);
@@ -2175,12 +2205,15 @@ mac_mount_label_get(struct mount *mp, user_addr_t mac_p)
 	size_t ulen;
 
 	if (IS_64BIT_PROCESS(current_proc())) {
-		error = copyin(mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -2232,7 +2265,7 @@ __mac_get_mount(proc_t p __unused, struct __mac_get_mount_args *uap,
 	struct mount *mp;
 	int error;
 
-	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
 		UIO_USERSPACE, uap->path, ctx);
 	error = namei(&nd);
 	if (error) {
diff --git a/security/mac_framework.h b/security/mac_framework.h
index 8331596a5..20780b249 100644
--- a/security/mac_framework.h
+++ b/security/mac_framework.h
@@ -122,6 +122,17 @@ struct vnode;
 struct vnode_attr;
 struct vop_setlabel_args;
 
+#ifndef __IOKIT_PORTS_DEFINED__
+#define __IOKIT_PORTS_DEFINED__
+#ifdef __cplusplus
+class OSObject;
+typedef OSObject *io_object_t;
+#else
+struct OSObject;
+typedef struct OSObject *io_object_t;
+#endif
+#endif /* __IOKIT_PORTS_DEFINED__ */
+
 /*@ macros */
 #define	VNODE_LABEL_CREATE	1
 
@@ -212,6 +223,9 @@ int	mac_inpcb_label_init(struct inpcb *inp, int flag);
 void	mac_inpcb_label_recycle(struct inpcb *inp);
 void	mac_inpcb_label_update(struct socket *so);
 int	mac_iokit_check_device(char *devtype, struct mac_module_data *mdata);
+int	mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type);
+int	mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io_object_t properties);
+int	mac_iokit_check_hid_control(kauth_cred_t cred);
 void	mac_ipq_label_associate(struct mbuf *fragment, struct ipq *ipq);
 int	mac_ipq_label_compare(struct mbuf *fragment, struct ipq *ipq);
 void	mac_ipq_label_destroy(struct ipq *ipq);
@@ -299,7 +313,7 @@ int	mac_posixshm_check_mmap(kauth_cred_t cred, struct pshminfo *pshm,
 int	mac_posixshm_check_open(kauth_cred_t cred, struct pshminfo *pshm);
 int	mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *pshm);
 int	mac_posixshm_check_truncate(kauth_cred_t cred, struct pshminfo *pshm,
-	    size_t s);
+	    off_t s);
 int	mac_posixshm_check_unlink(kauth_cred_t cred, struct pshminfo *pshm,
 	    const char *name);
 void	mac_posixshm_vnode_label_associate(kauth_cred_t cred,
@@ -309,6 +323,8 @@ void	mac_posixshm_label_associate(kauth_cred_t cred,
 	    struct pshminfo *pshm, const char *name);
 void	mac_posixshm_label_destroy(struct pshminfo *pshm);
 void	mac_posixshm_label_init(struct pshminfo *pshm);
+int	mac_priv_check(kauth_cred_t cred, int priv);
+int	mac_priv_grant(kauth_cred_t cred, int priv);
 int	mac_proc_check_debug(proc_t proc1, proc_t proc2);
 int	mac_proc_check_fork(proc_t proc);
 int	mac_proc_check_suspend_resume(proc_t proc, int sr);
@@ -318,6 +334,8 @@ int	mac_proc_check_getaudit(proc_t proc);
 int	mac_proc_check_getauid(proc_t proc);
 int     mac_proc_check_getlcid(proc_t proc1, proc_t proc2,
 	    pid_t pid);
+int	mac_proc_check_map_anon(proc_t proc, user_addr_t u_addr,
+	    user_size_t u_size, int prot, int flags, int *maxprot);
 int	mac_proc_check_mprotect(proc_t proc,
 	    user_addr_t addr, user_size_t size, int prot);
 int	mac_proc_check_run_cs_invalid(proc_t proc);
@@ -373,6 +391,7 @@ int	mac_system_check_acct(kauth_cred_t cred, struct vnode *vp);
 int	mac_system_check_audit(kauth_cred_t cred, void *record, int length);
 int	mac_system_check_auditctl(kauth_cred_t cred, struct vnode *vp);
 int	mac_system_check_auditon(kauth_cred_t cred, int cmd);
+int	mac_system_check_chud(kauth_cred_t cred);
 int	mac_system_check_host_priv(kauth_cred_t cred);
 int	mac_system_check_nfsd(kauth_cred_t cred);
 int	mac_system_check_reboot(kauth_cred_t cred, int howto);
@@ -426,7 +445,6 @@ void	mac_sysvshm_label_associate(kauth_cred_t cred,
 void	mac_sysvshm_label_destroy(struct shmid_kernel *shmsegptr);
 void	mac_sysvshm_label_init(struct shmid_kernel* shmsegptr);
 void	mac_sysvshm_label_recycle(struct shmid_kernel *shmsegptr);
-void	mac_thread_userret(int code, int error, struct thread *thread);
 int	mac_vnode_check_access(vfs_context_t ctx, struct vnode *vp,
 	    int acc_mode);
 int	mac_vnode_check_chdir(vfs_context_t ctx, struct vnode *dvp);
@@ -440,6 +458,7 @@ int	mac_vnode_check_exchangedata(vfs_context_t ctx, struct vnode *v1,
 	    struct vnode *v2);
 int	mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
 	    struct image_params *imgp);
+int	mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp);
 int	mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
 	    void * signature, size_t size);
 int     mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp,
@@ -468,6 +487,8 @@ int	mac_vnode_check_rename_from(vfs_context_t ctx, struct vnode *dvp,
 int	mac_vnode_check_rename_to(vfs_context_t ctx, struct vnode *dvp,
 	    struct vnode *vp, int samedir, struct componentname *cnp);
 int	mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp);
+int	mac_vnode_check_searchfs(vfs_context_t ctx, struct vnode *vp,
+	    struct attrlist *alist);
 int     mac_vnode_check_select(vfs_context_t ctx, struct vnode *vp,
 	    int which);
 int     mac_vnode_check_setattrlist(vfs_context_t ctxd, struct vnode *vp,
@@ -516,6 +537,8 @@ void	mac_vnode_label_update_extattr(struct mount *mp, struct vnode *vp,
 	    const char *name);
 int	mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp,
 	    struct vnode *dvp, struct vnode *vp, struct componentname *cnp);
+void	mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp,
+	    struct vnode *dvp, struct componentname *cnp);
 int	vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp, int flags, vfs_context_t ctx);
 void	vnode_relabel(struct vnode *vp);
diff --git a/security/mac_internal.h b/security/mac_internal.h
index 283fef64a..6e8ae3d2a 100644
--- a/security/mac_internal.h
+++ b/security/mac_internal.h
@@ -332,6 +332,44 @@ struct label *mac_mbuf_to_label(struct mbuf *m);
 	}								\
 } while (0)
 
+/*
+ * MAC_GRANT performs the designated check by walking the policy
+ * module list and checking with each as to how it feels about the
+ * request.  Unlike MAC_CHECK, it grants if any policies return '0',
+ * and otherwise returns EPERM.  Note that it returns its value via
+ * 'error' in the scope of the caller.
+ */
+#define MAC_GRANT(check, args...) do {					\
+	struct mac_policy_conf *mpc;					\
+	u_int i;							\
+									\
+	error = EPERM;							\
+	for (i = 0; i < mac_policy_list.staticmax; i++) {		\
+		mpc = mac_policy_list.entries[i].mpc;			\
+		if (mpc == NULL)					\
+			continue;					\
+									\
+		if (mpc->mpc_ops->mpo_ ## check != NULL) {		\
+			if (mpc->mpc_ops->mpo_ ## check (args) == 0)	\
+				error = 0;				\
+		}							\
+	}								\
+	if (mac_policy_list_conditional_busy() != 0) {			\
+		for (; i <= mac_policy_list.maxindex; i++) {		\
+			mpc = mac_policy_list.entries[i].mpc;		\
+			if (mpc == NULL)				\
+				continue;				\
+									\
+			if (mpc->mpc_ops->mpo_ ## check != NULL) {	\
+				if (mpc->mpc_ops->mpo_ ## check (args)	\
+				    == 0)				\
+					error = 0;			\
+			}						\
+		}							\
+		mac_policy_list_unbusy();				\
+	}								\
+} while (0)
+
 /*
  * MAC_BOOLEAN performs the designated boolean composition by walking
  * the module list, invoking each instance of the operation, and
diff --git a/security/mac_iokit.c b/security/mac_iokit.c
index 6212d59d3..0207dbf20 100644
--- a/security/mac_iokit.c
+++ b/security/mac_iokit.c
@@ -73,3 +73,30 @@ mac_iokit_check_device(char *devtype, struct mac_module_data *mdata)
 	MAC_CHECK(iokit_check_device, devtype, mdata);
 	return (error);
 }
+
+int
+mac_iokit_check_open(kauth_cred_t cred, io_object_t user_client, unsigned int user_client_type)
+{
+	int error;
+
+	MAC_CHECK(iokit_check_open, cred, user_client, user_client_type);
+	return (error);
+}
+
+int
+mac_iokit_check_set_properties(kauth_cred_t cred, io_object_t registry_entry, io_object_t properties)
+{
+	int error;
+
+	MAC_CHECK(iokit_check_set_properties, cred, registry_entry, properties);
+	return (error);
+}
+
+int
+mac_iokit_check_hid_control(kauth_cred_t cred)
+{
+	int error;
+
+	MAC_CHECK(iokit_check_hid_control, cred);
+	return (error);
+}
diff --git a/security/mac_label.c b/security/mac_label.c
index 0f4e21524..b05c43b84 100644
--- a/security/mac_label.c
+++ b/security/mac_label.c
@@ -48,6 +48,7 @@ mac_labelzone_init(void)
 	    sizeof(struct label), "MAC Labels");
 	zone_change(zone_label, Z_EXPAND, TRUE);
 	zone_change(zone_label, Z_EXHAUST, FALSE);
+	zone_change(zone_label, Z_CALLERACCT, FALSE);
 }
 
 struct label *
diff --git a/security/mac_net.c b/security/mac_net.c
index cd452be5e..e06837b06 100644
--- a/security/mac_net.c
+++ b/security/mac_net.c
@@ -307,8 +307,8 @@ mac_mbuf_label_init(struct mbuf *m, int flag)
 	if (mac_label_mbufs == 0)
 		return (0);
 
-	tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_MACLABEL,
-			  sizeof(struct label), flag);
+	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_MACLABEL,
+			  sizeof(struct label), flag, m);
 	if (tag == NULL) {
 		printf("%s(): m_tag_alloc() failed!\n", __func__);
 		return (ENOBUFS);
diff --git a/security/mac_policy.h b/security/mac_policy.h
index cfbe80987..836be3cc0 100644
--- a/security/mac_policy.h
+++ b/security/mac_policy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -115,6 +115,16 @@ struct vnode;
 typedef struct ucred *kauth_cred_t;
 #endif	/* !_KAUTH_CRED_T */
 
+#ifndef __IOKIT_PORTS_DEFINED__
+#define __IOKIT_PORTS_DEFINED__
+#ifdef __cplusplus
+class OSObject;
+typedef OSObject *io_object_t;
+#else
+struct OSObject;
+typedef struct OSObject *io_object_t;
+#endif
+#endif /* __IOKIT_PORTS_DEFINED__ */
 
 /*-
  * MAC entry points are generally named using the following template:
@@ -1199,6 +1209,56 @@ typedef int mpo_iokit_check_device_t(
 	char *devtype,
 	struct mac_module_data *mdata
 );
+/**
+  @brief Access control check for opening an I/O Kit device
+  @param cred Subject credential
+  @param device_path Device path
+  @param user_client User client instance
+  @param user_client_type User client type
+
+  Determine whether the subject identified by the credential can open an
+  I/O Kit device at the passed path of the passed user client class and
+  type.
+
+  @return Return 0 if access is granted, or an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_iokit_check_open_t(
+	kauth_cred_t cred,
+	io_object_t user_client,
+	unsigned int user_client_type
+);
+/**
+  @brief Access control check for setting I/O Kit device properties
+  @param cred Subject credential
+  @param registry_entry Target device
+  @param properties Property list
+
+  Determine whether the subject identified by the credential can set
+  properties on an I/O Kit device.
+
+  @return Return 0 if access is granted, or an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_iokit_check_set_properties_t(
+	kauth_cred_t cred,
+	io_object_t entry,
+	io_object_t properties
+);
+/**
+  @brief Access control check for software HID control
+  @param cred Subject credential
+
+  Determine whether the subject identified by the credential can
+  control the HID (Human Interface Device) subsystem, such as to
+  post synthetic keypresses, pointer movement and clicks.
+
+  @return Return 0 if access is granted, or an appropriate value for
+  errno.
+*/
+typedef int mpo_iokit_check_hid_control_t(
+	kauth_cred_t cred
+);
 /**
   @brief Create an IP reassembly queue label
   @param fragment First received IP fragment
@@ -2867,7 +2927,7 @@ typedef int mpo_posixshm_check_truncate_t(
 	kauth_cred_t cred,
 	struct pshminfo *ps,
 	struct label *shmlabel,
-	size_t len
+	off_t len
 );
 /**
   @brief Access control check for POSIX shared memory unlink
@@ -2928,6 +2988,45 @@ typedef void mpo_posixshm_label_destroy_t(
 typedef void mpo_posixshm_label_init_t(
 	struct label *label
 );
+/**
+ @brief Access control check for privileged operations
+ @param cred Subject credential
+ @param priv Requested privilege (see sys/priv.h)
+
+ Determine whether the subject identified by the credential can perform
+ a privileged operation.  Privileged operations are allowed if the cred
+ is the superuser or any policy returns zero for mpo_priv_grant, unless
+ any policy returns nonzero for mpo_priv_check.
+
+ @return Return 0 if access is granted, otherwise EPERM should be returned.
+*/
+typedef int mpo_priv_check_t(
+	kauth_cred_t cred,
+	int priv
+);
+/**
+ @brief Grant regular users the ability to perform privileged operations
+ @param cred Subject credential
+ @param priv Requested privilege (see sys/priv.h)
+
+ Determine whether the subject identified by the credential should be
+ allowed to perform a privileged operation that in the absense of any
+ MAC policy it would not be able to perform.  Privileged operations are
+ allowed if the cred is the superuser or any policy returns zero for
+ mpo_priv_grant, unless any policy returns nonzero for mpo_priv_check.
+
+ Unlike other MAC hooks which can only reduce the privilege of a
+ credential, this hook raises the privilege of a credential when it
+ returns 0.  Extreme care must be taken when implementing this hook to
+ avoid undermining the security of the system.
+
+ @return Return 0 if additional privilege is granted, otherwise EPERM
+ should be returned.
+*/
+typedef int mpo_priv_grant_t(
+	kauth_cred_t cred,
+	int priv
+);
 /**
   @brief Access control check for debugging process
   @param cred Subject credential
@@ -3024,6 +3123,37 @@ typedef int mpo_proc_check_getlcid_t(
 	struct proc *p,
 	pid_t pid
 );
+/**
+  @brief Access control check for mmap MAP_ANON
+  @param proc User process requesting the memory
+  @param cred Subject credential
+  @param u_addr Start address of the memory range
+  @param u_size Length address of the memory range
+  @param prot mmap protections; see mmap(2)
+  @param flags Type of mapped object; see mmap(2)
+  @param maxprot Maximum rights
+
+  Determine whether the subject identified by the credential should be
+  allowed to obtain anonymous memory using the specified flags and 
+  protections on the new mapping. MAP_ANON will always be present in the
+  flags. Certain combinations of flags with a non-NULL addr may
+  cause a mapping to be rejected before this hook is called. The maxprot field
+  holds the maximum permissions on the new mapping, a combination of
+  VM_PROT_READ, VM_PROT_WRITE and VM_PROT_EXECUTE. To avoid overriding prior
+  access control checks, a policy should only remove flags from maxprot.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned. Suggested failure: EPERM for lack of privilege.
+*/
+typedef int mpo_proc_check_map_anon_t(
+	struct proc *proc,
+	kauth_cred_t cred,
+	user_addr_t u_addr,
+	user_size_t u_size,
+	int prot,
+	int flags,
+	int *maxprot
+);
 /**
   @brief Access control check for setting memory protections
   @param cred Subject credential
@@ -3821,6 +3951,19 @@ typedef int mpo_system_check_auditon_t(
 	kauth_cred_t cred,
 	int cmd
 );
+/**
+  @brief Access control check for using CHUD facilities
+  @param cred Subject credential
+
+  Determine whether the subject identified by the credential can perform
+  performance-related tasks using the CHUD system call.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_system_check_chud_t(
+	kauth_cred_t cred
+);
 /**
   @brief Access control check for obtaining the host control port
   @param cred Subject credential
@@ -4580,21 +4723,6 @@ typedef void mpo_task_label_update_t(
 	struct label *cred,
 	struct label *task
 );
-/**
-  @brief Perform MAC-related events when a thread returns to user space
-  @param code The number of the syscall/trap that has finished
-  @param error The error code that will be returned to user space
-  @param thread Mach (not BSD) thread that is returning
-
-  This entry point permits policy modules to perform MAC-related
-  events when a thread returns to user space, via a system call
-  return, trap return, or otherwise.
-*/
-typedef void mpo_thread_userret_t(
-	int code,
-	int error,
-	struct thread *thread
-);
 /**
   @brief Check vnode access
   @param cred Subject credential
@@ -4751,6 +4879,23 @@ typedef int mpo_vnode_check_exec_t(
 	struct componentname *cnp,
 	u_int *csflags
 );
+/**
+  @brief Access control check for fsgetpath
+  @param cred Subject credential
+  @param vp Vnode for which a path will be returned
+  @param label Label associated with the vnode
+
+  Determine whether the subject identified by the credential can get the path
+  of the given vnode with fsgetpath.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_vnode_check_fsgetpath_t(
+	kauth_cred_t cred,
+	struct vnode *vp,
+	struct label *label
+);
 /**
   @brief Access control check after determining the code directory hash
  */
@@ -5091,6 +5236,25 @@ typedef int mpo_vnode_check_revoke_t(
 	struct vnode *vp,
 	struct label *label
 );
+/**
+  @brief Access control check for searchfs
+  @param cred Subject credential
+  @param vp Object vnode
+  @param vlabel Policy label for vp
+  @param alist List of attributes used as search criteria
+
+  Determine whether the subject identified by the credential can search the
+  vnode using the searchfs system call.
+
+  @return Return 0 if access is granted, otherwise an appropriate value for
+  errno should be returned.
+*/
+typedef int mpo_vnode_check_searchfs_t(
+	kauth_cred_t cred,
+	struct vnode *vp,
+	struct label *vlabel,
+	struct attrlist *alist
+);
 /**
   @brief Access control check for select
   @param cred Subject credential
@@ -5775,6 +5939,26 @@ typedef int mpo_vnode_notify_create_t(
 	struct componentname *cnp
 );
 
+/**
+  @brief Inform MAC policies that a vnode has been renamed
+  @param cred User credential for the renaming process
+  @param vp Vnode that's being renamed
+  @param label Policy label for vp
+  @param dvp Parent directory for the destination
+  @param dlabel Policy label for dvp
+  @param cnp Component name for the destination
+
+  Inform MAC policies that a vnode has been renamed.
+ */
+typedef void mpo_vnode_notify_rename_t(
+	kauth_cred_t cred,
+	struct vnode *vp,
+	struct label *label,
+	struct vnode *dvp,
+	struct label *dlabel,
+	struct componentname *cnp
+);
+
 /*
  * Placeholder for future events that may need mac hooks.
  */
@@ -5783,7 +5967,7 @@ typedef void mpo_reserved_hook_t(void);
 /*!
   \struct mac_policy_ops
 */
-#define MAC_POLICY_OPS_VERSION 2 /* inc when new reserved slots are taken */
+#define MAC_POLICY_OPS_VERSION 11 /* inc when new reserved slots are taken */
 struct mac_policy_ops {
 	mpo_audit_check_postselect_t		*mpo_audit_check_postselect;
 	mpo_audit_check_preselect_t		*mpo_audit_check_preselect;
@@ -6036,7 +6220,7 @@ struct mac_policy_ops {
 	mpo_task_label_init_t			*mpo_task_label_init;
 	mpo_task_label_internalize_t		*mpo_task_label_internalize;
 	mpo_task_label_update_t			*mpo_task_label_update;
-	mpo_thread_userret_t			*mpo_thread_userret;
+	mpo_iokit_check_hid_control_t	*mpo_iokit_check_hid_control;
 	mpo_vnode_check_access_t		*mpo_vnode_check_access;
 	mpo_vnode_check_chdir_t			*mpo_vnode_check_chdir;
 	mpo_vnode_check_chroot_t		*mpo_vnode_check_chroot;
@@ -6094,11 +6278,32 @@ struct mac_policy_ops {
 	mpo_vnode_check_uipc_connect_t		*mpo_vnode_check_uipc_connect;
 	mac_proc_check_run_cs_invalid_t		*mpo_proc_check_run_cs_invalid;
 	mpo_proc_check_suspend_resume_t		*mpo_proc_check_suspend_resume;
-	mpo_reserved_hook_t			*mpo_reserved5;
-	mpo_reserved_hook_t			*mpo_reserved6;
-	mpo_reserved_hook_t			*mpo_reserved7;
-	mpo_reserved_hook_t			*mpo_reserved8;
-	mpo_reserved_hook_t			*mpo_reserved9;
+	mpo_reserved_hook_t			*mpo_reserved12;
+	mpo_iokit_check_set_properties_t	*mpo_iokit_check_set_properties;
+	mpo_system_check_chud_t			*mpo_system_check_chud;
+	mpo_vnode_check_searchfs_t		*mpo_vnode_check_searchfs;
+	mpo_priv_check_t			*mpo_priv_check;
+	mpo_priv_grant_t			*mpo_priv_grant;
+	mpo_proc_check_map_anon_t		*mpo_proc_check_map_anon;
+	mpo_vnode_check_fsgetpath_t		*mpo_vnode_check_fsgetpath;
+	mpo_iokit_check_open_t			*mpo_iokit_check_open;
+	mpo_vnode_notify_rename_t		*mpo_vnode_notify_rename;
+	mpo_reserved_hook_t			*mpo_reserved14;
+	mpo_reserved_hook_t			*mpo_reserved15;
+	mpo_reserved_hook_t			*mpo_reserved16;
+	mpo_reserved_hook_t			*mpo_reserved17;
+	mpo_reserved_hook_t			*mpo_reserved18;
+	mpo_reserved_hook_t			*mpo_reserved19;
+	mpo_reserved_hook_t			*mpo_reserved20;
+	mpo_reserved_hook_t			*mpo_reserved21;
+	mpo_reserved_hook_t			*mpo_reserved22;
+	mpo_reserved_hook_t			*mpo_reserved23;
+	mpo_reserved_hook_t			*mpo_reserved24;
+	mpo_reserved_hook_t			*mpo_reserved25;
+	mpo_reserved_hook_t			*mpo_reserved26;
+	mpo_reserved_hook_t			*mpo_reserved27;
+	mpo_reserved_hook_t			*mpo_reserved28;
+	mpo_reserved_hook_t			*mpo_reserved29;
 };
 
 /**
diff --git a/security/mac_posix_shm.c b/security/mac_posix_shm.c
index c42cfbb46..f6cc28e56 100644
--- a/security/mac_posix_shm.c
+++ b/security/mac_posix_shm.c
@@ -178,7 +178,7 @@ mac_posixshm_check_stat(kauth_cred_t cred, struct pshminfo *shm)
 
 int
 mac_posixshm_check_truncate(kauth_cred_t cred, struct pshminfo *shm,
-    size_t size)
+    off_t size)
 {
 	int error = 0;
 
diff --git a/security/mac_priv.c b/security/mac_priv.c
new file mode 100644
index 000000000..7d72ce88d
--- /dev/null
+++ b/security/mac_priv.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2010 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*-
+ * Copyright (c) 2006 nCircle Network Security, Inc.
+ * Copyright (c) 2009 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson for the TrustedBSD
+ * Project under contract to nCircle Network Security, Inc.
+ *
+ * This software was developed at the University of Cambridge Computer
+ * Laboratory with support from a grant from Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY,
+ * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * MAC checks for system privileges.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+
+#include <security/mac_internal.h>
+
+/*
+ * The MAC Framework interacts with kernel privilege checks in two ways: it
+ * may restrict the granting of privilege to a subject, and it may grant
+ * additional privileges to the subject.  Policies may implement none, one,
+ * or both of these entry points.  Restriction of privilege by any policy
+ * always overrides granting of privilege by any policy or other privilege
+ * mechanism.  See kern_priv.c:priv_check_cred() for details of the
+ * composition.
+ */
+
+/*
+ * Restrict access to a privilege for a credential.  Return failure if any
+ * policy denies access.
+ */
+int
+mac_priv_check(kauth_cred_t cred, int priv)
+{
+	int error;
+
+	MAC_CHECK(priv_check, cred, priv);
+
+	return (error);
+}
+
+/*
+ * Grant access to a privilege for a credential.  Return success if any
+ * policy grants access.
+ */
+int
+mac_priv_grant(kauth_cred_t cred, int priv)
+{
+	int error;
+
+	MAC_GRANT(priv_grant, cred, priv);
+
+	return (error);
+}
diff --git a/security/mac_process.c b/security/mac_process.c
index 6ac8b6b5f..631b468a9 100644
--- a/security/mac_process.c
+++ b/security/mac_process.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2007-2010 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -205,12 +205,15 @@ mac_execve_enter(user_addr_t mac_p, struct image_params *imgp)
 		return (0);
 
 	if (IS_64BIT_PROCESS(current_proc())) {
-		error = copyin(mac_p, &mac, sizeof(mac));
+		struct user64_mac mac64;
+		error = copyin(mac_p, &mac64, sizeof(mac64));
+		mac.m_buflen = mac64.m_buflen;
+		mac.m_string = mac64.m_string;
 	} else {
-		struct mac mac32;
+		struct user32_mac mac32;
 		error = copyin(mac_p, &mac32, sizeof(mac32));
 		mac.m_buflen = mac32.m_buflen;
-		mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
+		mac.m_string = mac32.m_string;
 	}
 	if (error)
 		return (error);
@@ -241,13 +244,17 @@ out:
  * When the subject's label changes, it may require revocation of privilege
  * to mapped objects.  This can't be done on-the-fly later with a unified
  * buffer cache.
+ *
+ * XXX:		CRF_MAC_ENFORCE should be in a kauth_cred_t field, rather
+ * XXX:		than a posix_cred_t field.
  */
 void
 mac_cred_label_update(kauth_cred_t cred, struct label *newlabel)
 {
+	posix_cred_t pcred = posix_cred_get(cred);
 
 	/* force label to be part of "matching" for credential */
-	cred->cr_flags |= CRF_MAC_ENFORCE;
+	pcred->cr_flags |= CRF_MAC_ENFORCE;
 
 	/* inform the policies of the update */
 	MAC_PERFORM(cred_label_update, cred, newlabel);
@@ -348,6 +355,29 @@ mac_proc_check_get_task(struct ucred *cred, struct proc *p)
 	return (error);
 }
 
+/*
+ * The type of maxprot in proc_check_map_anon must be equivalent to vm_prot_t
+ * (defined in <mach/vm_prot.h>). mac_policy.h does not include any header
+ * files, so cannot use the typedef itself.
+ */
+int
+mac_proc_check_map_anon(proc_t proc, user_addr_t u_addr,
+    user_size_t u_size, int prot, int flags, int *maxprot)
+{
+	kauth_cred_t cred;
+	int error;
+
+	if (!mac_vm_enforce ||
+	    !mac_proc_check_enforce(proc, MAC_VM_ENFORCE))
+		return (0);
+
+	cred = kauth_cred_proc_ref(proc);
+	MAC_CHECK(proc_check_map_anon, proc, cred, u_addr, u_size, prot, flags, maxprot);
+	kauth_cred_unref(&cred);
+
+	return (error);
+}
+
 int
 mac_proc_check_mprotect(proc_t proc,
     user_addr_t addr, user_size_t size, int prot)
@@ -544,14 +574,6 @@ mac_lctx_check_label_update(struct lctx *l, struct label *newlabel)
 }
 #endif	/* LCTX */
 
-void
-mac_thread_userret(int code, int error, struct thread *thread)
-{
-
-	if (mac_late)
-		MAC_PERFORM(thread_userret, code, error, thread);
-}
-
 int
 mac_proc_check_suspend_resume(proc_t curp, int sr)
 {
diff --git a/security/mac_stub.c b/security/mac_stub.c
index b78b081f4..b3e455817 100644
--- a/security/mac_stub.c
+++ b/security/mac_stub.c
@@ -34,6 +34,18 @@
 /*
  * XXX stubs until we fix <rdar://problem/4607887>
  */
+int mac_check_iokit_open(void)
+{
+	return 0;
+}
+int mac_check_iokit_set_properties(void)
+{
+	return 0;
+}
+int mac_check_iokit_hid_control(void)
+{
+	return 0;
+}
 int mac_check_ipc_method(void)
 {
 	return 0;
@@ -262,6 +274,10 @@ int mac_check_system_acct(void)
 {
 	return 0;
 }
+int mac_check_system_chud(void)
+{
+	return 0;
+}
 int mac_check_system_nfsd(void)
 {
 	return 0;
@@ -374,6 +390,10 @@ int mac_check_vnode_revoke(void)
 {
 	return 0;
 }
+int mac_check_vnode_searchfs(void)
+{
+	return 0;
+}
 int mac_check_vnode_select(void)
 {
 	return 0;
diff --git a/security/mac_system.c b/security/mac_system.c
index 410c71310..8089caac8 100644
--- a/security/mac_system.c
+++ b/security/mac_system.c
@@ -83,6 +83,19 @@ mac_system_check_acct(kauth_cred_t cred, struct vnode *vp)
 	return (error);
 }
 
+int
+mac_system_check_chud(kauth_cred_t cred)
+{
+	int error;
+
+	if (!mac_system_enforce)
+		return (0);
+
+	MAC_CHECK(system_check_chud, cred);
+
+	return (error);
+}
+
 int
 mac_system_check_host_priv(kauth_cred_t cred)
 {
diff --git a/security/mac_vfs.c b/security/mac_vfs.c
index 0a136aa4c..7cc5561a2 100644
--- a/security/mac_vfs.c
+++ b/security/mac_vfs.c
@@ -377,6 +377,21 @@ mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp,
 	return (error);
 }
 
+void
+mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp,
+    struct vnode *dvp, struct componentname *cnp)
+{
+	kauth_cred_t cred;
+
+	if (!mac_vnode_enforce ||
+		!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+		return;
+
+	cred = vfs_context_ucred(ctx);
+	MAC_PERFORM(vnode_notify_rename, cred, vp, vp->v_label,
+	    dvp, dvp->v_label, cnp);
+}
+
 /*
  * Extended attribute 'name' was updated via
  * vn_setxattr() or vn_removexattr().  Allow the
@@ -425,12 +440,13 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *
 {
 	kauth_cred_t cred;
 	int disjoint = 0;
+	posix_cred_t pcred = posix_cred_get(new);
 
 	if (!mac_proc_enforce && !mac_vnode_enforce)
 		return disjoint;
 
 	/* mark the new cred to indicate "matching" includes the label */
-	new->cr_flags |= CRF_MAC_ENFORCE;
+	pcred->cr_flags |= CRF_MAC_ENFORCE;
 
 	cred = vfs_context_ucred(ctx);
 	MAC_PERFORM(cred_label_update_execve, cred, new, vp, vp->v_label,
@@ -642,6 +658,21 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
 	return (error);
 }
 
+int
+mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp)
+{
+	kauth_cred_t cred;
+	int error;
+
+	if (!mac_vnode_enforce ||
+		!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+		return (0);
+
+	cred = vfs_context_ucred(ctx);
+	MAC_CHECK(vnode_check_fsgetpath, cred, vp, vp->v_label);
+	return (error);
+}
+
 int
 mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
 			  void * signature, size_t size)
@@ -899,6 +930,21 @@ mac_vnode_check_revoke(vfs_context_t ctx, struct vnode *vp)
 	return (error);
 }
 
+int
+mac_vnode_check_searchfs(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist)
+{
+	kauth_cred_t cred;
+	int error;
+
+	if (!mac_vnode_enforce || 
+		!mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE))
+		return (0);
+
+	cred = vfs_context_ucred(ctx);
+	MAC_CHECK(vnode_check_searchfs, cred, vp, vp->v_label, alist);
+	return (error);
+}
+
 int
 mac_vnode_check_select(vfs_context_t ctx, struct vnode *vp, int which)
 {
diff --git a/tools/lockstat/Makefile b/tools/lockstat/Makefile
index f67ee3ddf..a23a5c682 100644
--- a/tools/lockstat/Makefile
+++ b/tools/lockstat/Makefile
@@ -1,4 +1,4 @@
-CFLAGS=-g -Os -arch ppc -arch i386
+CFLAGS=-g -Os -arch x86_64 -arch i386
 
 TARGETS	= lockstat
 
diff --git a/tools/lockstat/lockstat.c b/tools/lockstat/lockstat.c
index 2ec7f324c..1f2b5af32 100644
--- a/tools/lockstat/lockstat.c
+++ b/tools/lockstat/lockstat.c
@@ -51,7 +51,7 @@
  *	Waits (Meaningful only for lock types that can block): Incremented
  *	if a lock acquisition attempt proceeded to block.
  *
- *	Direct Waits (currently implemented only on i386): For adaptive
+ *	Direct Waits (currently implemented only on i386/x86_64): For adaptive
  *	locks, such as mutexes, incremented if the owner of the mutex
  *	wasn't active on another processor at the time of the lock
  *	attempt. This indicates that no adaptive spin occurred.
@@ -329,7 +329,7 @@ print_all_spin(lockgroup_info_t *lockgroup)
 void
 print_mutex_hdr(void)
 {
-#if defined(__i386__)
+#if defined(__i386__) || defined(__x86_64__)
 	printf("Mutex lock attempts  Misses      Waits Direct Waits Name\n");
 #else
         printf("     mutex locks           misses            waits   name\n");
@@ -343,7 +343,7 @@ print_mutex(int requested, lockgroup_info_t *lockgroup)
 
 	if (curptr->lock_mtx_cnt != 0 && curptr->lock_mtx_util_cnt != 0) {
 		printf("%16lld ", curptr->lock_mtx_util_cnt);
-#if defined(__i386__)
+#if defined(__i386__) || defined(__x86_64__)
 		printf("%10lld %10lld %10lld   ", curptr->lock_mtx_miss_cnt,  curptr->lock_mtx_wait_cnt, curptr->lock_mtx_held_cnt);
 #else
 		printf("%16lld %16lld   ", curptr->lock_mtx_miss_cnt,  curptr->lock_mtx_wait_cnt);
diff --git a/tools/symbolify.py b/tools/symbolify.py
new file mode 100755
index 000000000..dde29a732
--- /dev/null
+++ b/tools/symbolify.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+from subprocess import Popen, PIPE, call
+import re
+import sys
+import os
+
+NM_FORMAT = "([0-9a-f]+) ([UuAaTtDdBbCcSsIi]) (.*)"
+
+nm_re = re.compile(NM_FORMAT)
+
+def parse_nm_output(str):
+    "returns (start, type, name)"
+    m = nm_re.match(str)
+    if m:
+        start = int(m.group(1), 16)
+        return (start, m.group(2), m.group(3))
+    else:
+        return None
+
+def nm(file):
+    cmd = "nm %s" % file
+    p = Popen(cmd, shell=True, stdout=PIPE)
+    return p.stdout
+
+class SymbolLookup:
+    def __init__(self, file, min_width=16):
+        self.min_width = min_width
+        self.symbols = [parse_nm_output(l) for l in nm(file)]
+        self.symbols.sort(key=lambda x: x[0])
+
+    def padded(self, str):
+        return ("%%%ds" % self.min_width) % str
+
+    def __call__(self, saddr):
+        addr = int(saddr.group(0), 16)
+        last = (0, ' ', '<start of file>')
+        # stupid linear search... feel free to improve
+        for s in self.symbols:
+            if s[0] == addr:
+                return self.padded(s[2])
+            elif s[0] > addr:
+                if last[2] == "_last_kernel_symbol":
+                    return saddr.group(0)
+                return self.padded("<%s>+%x" % (last[2], addr - last[0]))
+            else:
+                last = s
+        if last[2] == "_last_kernel_symbol":
+            return saddr.group(0)
+        return self.padded("<%s>+%x" % (last[2], addr - last[0]))
+
+def symbolify(objfile, input, *args, **kargs):
+    replacer = SymbolLookup(objfile, *args, **kargs)
+    for l in input:
+        print re.sub("(0x)?[0-9a-f]{6,16}", replacer, l),
+
+
+def usage():
+    
+    print "usage: %s [filename]" % sys.argv[0]
+    print "\tor speficy a filename in your SYMBOLIFY_KERNEL environment variable"
+
+    # die now
+    sys.exit(1)
+
+KERNEL_FILE = None
+
+if( len(sys.argv) > 2 ):
+    usage()
+
+if( len(sys.argv) == 2 ):
+    KERNEL_FILE = sys.argv[1]
+
+if( KERNEL_FILE is None ):
+    KERNEL_FILE = os.environ.get("SYMBOLIFY_KERNEL")
+
+if( KERNEL_FILE is None ):
+    usage()
+
+print "using kernel file '%s'" % KERNEL_FILE
+
+symbolify(KERNEL_FILE, sys.stdin, min_width=40)
+
diff --git a/tools/tests/MPMMTest/KQMPMMtest.c b/tools/tests/MPMMTest/KQMPMMtest.c
index b16c5f847..5b659a833 100644
--- a/tools/tests/MPMMTest/KQMPMMtest.c
+++ b/tools/tests/MPMMTest/KQMPMMtest.c
@@ -15,6 +15,7 @@
 #include <mach/notify.h>
 #include <servers/bootstrap.h>
 #include <sys/event.h>
+#include <sys/select.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/signal.h>
@@ -67,6 +68,7 @@ static boolean_t	affinity = FALSE;
 static boolean_t	timeshare = FALSE;
 static boolean_t	threaded = FALSE;
 static boolean_t	oneway = FALSE;
+static boolean_t	do_select = FALSE;
 int			msg_type;
 int			num_ints;
 int			num_msgs;
@@ -96,6 +98,7 @@ void usage(const char *progname) {
 	fprintf(stderr, "    -delay num\t\tmicroseconds to sleep clients between messages\n");
 	fprintf(stderr, "    -work num\t\tmicroseconds of client work\n");
 	fprintf(stderr, "    -pages num\t\tpages of memory touched by client work\n");
+	fprintf(stderr, "    -select   \t\tselect prior to calling kevent().\n");
 	fprintf(stderr, "default values are:\n");
 	fprintf(stderr, "    . no affinity\n");
 	fprintf(stderr, "    . not timeshare\n");
@@ -195,6 +198,9 @@ void parse_args(int argc, char *argv[]) {
 				usage(progname);
 			client_pages = strtoul(argv[1], NULL, 0);
 			argc -= 2; argv += 2;
+		} else if (0 == strcmp("-select", argv[0])) {
+			do_select = TRUE;
+			argc--; argv++;
 		} else 
 			usage(progname);
 	}
@@ -339,10 +345,12 @@ server(void *serverarg)
 	int kq;
 	struct kevent64_s kev[1];
 	int err;
+	int count;
 	struct port_args args;
 	int idx;
 	kern_return_t ret;
 	int totalmsg = num_msgs * num_clients;
+	fd_set readfds;
 
 	args.server_num = (int) (long) serverarg;
 	setup_server_ports(&args);
@@ -365,11 +373,26 @@ server(void *serverarg)
 		perror("kevent");
 		exit(1);
 	}
+	
 	for (idx = 0; idx < totalmsg; idx++) {
 
 		if (verbose) 
 			printf("server awaiting message %d\n", idx);
 	retry:
+		if (do_select) {
+			FD_ZERO(&readfds);
+			FD_SET(kq, &readfds);
+
+			if (verbose)
+				printf("Calling select() prior to kevent64().\n");
+
+			count = select(kq + 1, &readfds, NULL, NULL, NULL);
+			if (count == -1) {
+				perror("select");
+				exit(1);
+			}
+		}
+
 		EV_SET64(&kev[0], args.pset, EVFILT_MACHPORT, EV_ENABLE, 
 #if DIRECT_MSG_RCV
 			 MACH_RCV_MSG|MACH_RCV_LARGE, 0, 0, (mach_vm_address_t)args.req_msg, args.req_size);
diff --git a/tools/tests/MPMMTest/Makefile b/tools/tests/MPMMTest/Makefile
index 7762791a0..0421a718a 100644
--- a/tools/tests/MPMMTest/Makefile
+++ b/tools/tests/MPMMTest/Makefile
@@ -1,4 +1,4 @@
-CFLAGS=-g -O2 -arch ppc -arch i386
+CFLAGS=-g -O2 -arch i386
 CFLAGS64=-g -O2 -arch x86_64
 
 TARGETS	= MPMMtest MPMMtest_64 KQMPMMtest KQMPMMtest_64 KQMPMMtestD KQMPMMtest_64D
diff --git a/tools/tests/affinity/Makefile b/tools/tests/affinity/Makefile
index 9450d79c6..b8563d54e 100644
--- a/tools/tests/affinity/Makefile
+++ b/tools/tests/affinity/Makefile
@@ -1,5 +1,5 @@
-CFLAGS	=-g -arch ppc -arch i386
-CFLAGS64=-g -arch ppc64 -arch x86_64
+CFLAGS	=-g -arch i386
+CFLAGS64=-g -arch x86_64
 
 TESTS =		\
 	sets	\
diff --git a/tools/tests/execperf/Makefile b/tools/tests/execperf/Makefile
new file mode 100644
index 000000000..00d03037c
--- /dev/null
+++ b/tools/tests/execperf/Makefile
@@ -0,0 +1,79 @@
+SDKROOT ?= /
+ARCHS = x86_64
+CC = xcrun -sdk $(SDKROOT) cc
+CODESIGN = xcrun -sdk $(SDKROOT) codesign
+CFLAGS = -O0 -g -isysroot $(SDKROOT) $(patsubst %, -arch %,$(ARCHS))
+LDFLAGS = -Wl,-new_linker -dead_strip \
+	-isysroot $(SDKROOT) $(patsubst %, -arch %,$(ARCHS))
+NOPIE_OPTION = -Wl,-no_pie
+
+EXECUTABLES = exit.nodyld \
+	exit.nopie.dyld-but-no-Libsystem exit.pie.dyld-but-no-Libsystem \
+	exit.nopie.dyld-and-Libsystem exit.pie.dyld-and-Libsystem \
+	exit.nopie exit.pie \
+	printexecinfo
+OBJECTS = exit-asm.o exit.o printexecinfo.o
+
+default: $(EXECUTABLES) run
+
+clean:
+	rm -f run $(EXECUTABLES)
+	rm -f run.o $(OBJECTS)
+
+run.o: run.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+run: run.o
+	$(CC) -o $@ $< $(LDFLAGS)
+	$(CODESIGN) -s - $@
+
+# OBJECTS
+
+exit-asm.o: exit-asm.S
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+exit.o: exit.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+printexecinfo.o: printexecinfo.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+# EXECUTABLES
+
+exit.nodyld: exit-asm.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart -nostartfiles -nodefaultlibs -static
+	$(CODESIGN) -s - $@
+
+
+exit.nopie.dyld-but-no-Libsystem: exit-asm.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION) -nostartfiles -nodefaultlibs
+	$(CODESIGN) -s - $@
+
+exit.pie.dyld-but-no-Libsystem: exit-asm.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie -nostartfiles -nodefaultlibs
+	$(CODESIGN) -s - $@
+
+exit.nopie.dyld-and-Libsystem: exit-asm.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION) -nostartfiles -nodefaultlibs -lSystem
+	$(CODESIGN) -s - $@
+
+exit.pie.dyld-and-Libsystem: exit-asm.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie -nostartfiles -nodefaultlibs -lSystem
+	$(CODESIGN) -s - $@
+
+exit.nopie: exit.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart $(NOPIE_OPTION)
+	$(CODESIGN) -s - $@
+
+exit.pie: exit.o
+	$(CC) -o $@ $< $(LDFLAGS) -e mystart -Wl,-pie
+	$(CODESIGN) -s - $@
+
+printexecinfo: printexecinfo.o
+	$(CC) -o $@ $< $(LDFLAGS)
+	$(CODESIGN) -s - $@
+
+# ACTIONS
+
+quick-test: $(EXECUTABLES) run
+	./test.sh
diff --git a/tools/tests/execperf/exit-asm.S b/tools/tests/execperf/exit-asm.S
new file mode 100644
index 000000000..ba63101e4
--- /dev/null
+++ b/tools/tests/execperf/exit-asm.S
@@ -0,0 +1,42 @@
+.text
+	.globl mystart
+mystart:
+#if defined(__x86_64__)
+	pushq $0
+	mov %rsp, %rbp
+	andq $0xfffffffffffffff0, %rsp
+	movl $42, %edi
+	movl $0x2000001, %eax
+	movl $0, %ecx
+	movq %rcx, %r10
+	syscall
+	jmp 1f
+1:	
+	hlt
+	nop
+	nop
+	nop
+	nop
+#elif defined(__i386__)
+	pushl $0
+	mov %esp, %ebp
+	andl $0xfffffff0, %esp
+	subl $12, %esp
+	pushl $42
+	mov $0x40001, %eax
+	call _sysenter_trap
+	jmp 1f
+1:	
+	hlt
+	nop
+	nop
+	nop
+	nop
+_sysenter_trap:
+	pop %edx
+	mov %esp, %ecx
+	sysenter
+	nop
+#else
+#error Unsupported architecture
+#endif
diff --git a/tools/tests/execperf/exit.c b/tools/tests/execperf/exit.c
new file mode 100644
index 000000000..ded537881
--- /dev/null
+++ b/tools/tests/execperf/exit.c
@@ -0,0 +1,12 @@
+void mystart(void) __asm__("mystart");
+
+void mystart(void) {
+#if defined(__x86_64__)
+    asm volatile ("andq  $0xfffffffffffffff0, %rsp\n");
+#elif defined(__i386__)
+    asm volatile ("andl  $0xfffffff0, %esp\n");
+#else
+#error Unsupported architecture
+#endif
+    _Exit(42);
+}
diff --git a/tools/tests/execperf/printexecinfo.c b/tools/tests/execperf/printexecinfo.c
new file mode 100644
index 000000000..1acf0d493
--- /dev/null
+++ b/tools/tests/execperf/printexecinfo.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <err.h>
+#include <crt_externs.h>
+#include <string.h>
+#include <mach/mach.h>
+#include <mach-o/ldsyms.h>
+#include <mach-o/dyld_images.h>
+#include <stdlib.h>
+
+__attribute__((constructor))
+void init(int argc, const char *argv[], const char *envp[], const char *appl[], void *vars __attribute__((unused))) {
+	int i;
+
+	printf("argv = %p\n", argv);
+	for (i=0; argv[i]; i++) {
+		printf("argv[%2d] = %p %.100s%s\n", i, argv[i], argv[i], strlen(argv[i]) > 100 ? "..." : "");
+	}
+	printf("envp = %p\n", envp);
+	for (i=0; envp[i]; i++) {
+		printf("envp[%2d] = %p %.100s%s\n", i, envp[i], envp[i], strlen(envp[i]) > 100 ? "..." : "");
+	}
+	printf("appl = %p\n", appl);
+	for (i=0; appl[i]; i++) {
+		printf("appl[%2d] = %p %.100s%s\n", i, appl[i], appl[i], strlen(appl[i]) > 100 ? "..." : "");
+	}
+}
+
+void printexecinfo(void)
+{
+	int ret;
+	uint64_t stackaddr;
+	size_t len = sizeof(stackaddr);
+
+	printf("executable load address = 0x%016llx\n", (uint64_t)(uintptr_t)&_mh_execute_header);
+
+	ret = sysctlbyname("kern.usrstack64", &stackaddr, &len, NULL, 0);
+	if (ret == -1)
+		err(1, "sysctlbyname");
+
+	printf("          stack address = 0x%016llx\n", stackaddr);
+}
+
+void printdyldinfo(void)
+{
+	task_dyld_info_data_t info;
+	mach_msg_type_number_t size = TASK_DYLD_INFO_COUNT;
+	kern_return_t kret;
+	struct dyld_all_image_infos *all_image_infos;
+	
+	kret = task_info(mach_task_self(), TASK_DYLD_INFO,
+					 (void *)&info, &size);
+	if (kret != KERN_SUCCESS)
+		errx(1, "task_info: %s", mach_error_string(kret));
+
+	all_image_infos = (struct dyld_all_image_infos *)(uintptr_t)info.all_image_info_addr;
+
+	printf("      dyld load address = 0x%016llx\n", (uint64_t)(uintptr_t)all_image_infos->dyldImageLoadAddress);
+	printf("     shared cache slide = 0x%016llx\n", (uint64_t)(uintptr_t)all_image_infos->sharedCacheSlide);
+
+}
+
+int main(int argc, char *argv[]) {
+
+	printexecinfo();
+	printdyldinfo();
+
+	return 0;
+}
diff --git a/tools/tests/execperf/run.c b/tools/tests/execperf/run.c
new file mode 100644
index 000000000..d7d5f6a5b
--- /dev/null
+++ b/tools/tests/execperf/run.c
@@ -0,0 +1,89 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <errno.h>
+#include <err.h>
+#include <pthread.h>
+
+extern char **environ;
+
+char * const *newargv;
+
+void usage(void);
+
+void *work(void *);
+
+int main(int argc, char *argv[]) {
+
+    int i, count, threadcount;
+    int ret;
+    pthread_t *threads;
+
+    if (argc < 4) {
+        usage();
+    }
+
+    threadcount = atoi(argv[1]);
+    count = atoi(argv[2]);
+    
+    newargv = &argv[3];
+
+    threads = (pthread_t *)calloc(threadcount, sizeof(pthread_t));
+    for (i=0; i < threadcount; i++) {
+        ret = pthread_create(&threads[i], NULL, work, (void *)(intptr_t)count);
+        if (ret) {
+            err(1, "pthread_create");
+        }
+    }
+    
+    for (i=0; i < threadcount; i++) {
+        ret = pthread_join(threads[i], NULL);
+        if (ret) {
+            err(1, "pthread_join");
+        }
+    }
+    
+    return 0;
+}
+
+void usage(void) {
+    fprintf(stderr, "Usage: %s <threadcount> <count> <program> [<arg1> [<arg2> ...]]\n",
+            getprogname());
+    exit(1);
+}
+
+void *work(void *arg)
+{
+    int count = (int)(intptr_t)arg;
+    int i;
+    int ret;
+    pid_t pid;
+
+    for (i=0; i < count; i++) {
+        ret = posix_spawn(&pid, newargv[0], NULL, NULL, newargv, environ);
+        if (ret != 0) {
+            errc(1, ret, "posix_spawn(%s)", newargv[0]);
+        }
+        
+        while (-1 == waitpid(pid, &ret, 0)) {
+            if (errno != EINTR) {
+                err(1, "waitpid(%d)", pid);
+            }
+        }
+        
+        if (WIFSIGNALED(ret)) {
+            errx(1, "process exited with signal %d", WTERMSIG(ret));
+        } else if (WIFSTOPPED(ret)) {
+            errx(1, "process stopped with signal %d", WSTOPSIG(ret));
+        } else if (WIFEXITED(ret)) {
+            if (WEXITSTATUS(ret) != 42) {
+                errx(1, "process exited with unexpected exit code %d", WEXITSTATUS(ret));
+            }
+        } else {
+            errx(1, "unknown exit condition %x", ret);
+        }
+    }
+
+    return NULL;
+}
diff --git a/tools/tests/execperf/test.sh b/tools/tests/execperf/test.sh
new file mode 100755
index 000000000..72917a719
--- /dev/null
+++ b/tools/tests/execperf/test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+EXECUTABLES="exit.nodyld \
+        exit.nopie.dyld-but-no-Libsystem exit.pie.dyld-but-no-Libsystem \
+        exit.nopie.dyld-and-Libsystem exit.pie.dyld-and-Libsystem \
+        exit.nopie exit.pie"
+			
+RUN=run
+PRODUCT=`sw_vers -productName`
+COUNT=
+
+case "$PRODUCT" in
+    "iPhone OS")
+	COUNT=1000
+	;;
+    *)
+	COUNT=10000
+	;;
+esac
+
+for j in 1 2 3; do
+    for i in ${EXECUTABLES}; do
+	echo "Running $i"
+	/usr/bin/time ./${RUN} $j $((${COUNT}/$j)) ./$i
+	if [ $? -ne 0 ]; then
+	    echo "Failed $i, exit status $?"
+	    exit 1
+	fi
+    done
+done
diff --git a/tools/tests/jitter/Makefile b/tools/tests/jitter/Makefile
new file mode 100644
index 000000000..ade16e7f4
--- /dev/null
+++ b/tools/tests/jitter/Makefile
@@ -0,0 +1,16 @@
+
+ARCHS=x86_64 i386
+SDKROOT=/
+CC=xcrun -sdk "$(SDKROOT)" cc
+CFLAGS=$(patsubst %, -arch %,$(ARCHS)) -g -Wall -Os -isysroot $(SDKROOT)
+
+all: jitter
+
+timer_jitter.o: timer_jitter.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+cpu_number.o: cpu_number.s
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+jitter: timer_jitter.o cpu_number.o
+	$(CC) -o $@ $^ $(CFLAGS)
diff --git a/tools/tests/jitter/cpu_number.s b/tools/tests/jitter/cpu_number.s
new file mode 100644
index 000000000..2d29bb0cd
--- /dev/null
+++ b/tools/tests/jitter/cpu_number.s
@@ -0,0 +1,33 @@
+.text
+/*
+ * Taken from Libc
+ */
+.globl _cpu_number
+_cpu_number:
+#if defined(__x86_64__)
+        push    %rbp
+        mov     %rsp,%rbp
+        sub     $16,%rsp                // space to read IDTR
+
+        sidt    (%rsp)                  // store limit:base on stack
+        movw    (%rsp), %rax            // get limit
+        and     $0xfff, %rax            // mask off lower 12 bits to return
+
+        mov     %rbp,%rsp
+        pop     %rbp
+        ret
+#elif defined(__i386__)
+        push    %ebp
+	mov     %esp,%ebp
+	sub     $8, %esp                // space to read IDTR
+
+	sidt    (%esp)                  // store limit:base on stack
+	movw    (%esp), %ax             // get limit
+	and     $0xfff, %eax            // mask off lower 12 bits to return
+	
+	mov     %ebp,%esp
+	pop     %ebp
+	ret
+#else
+#error Unsupported architecture
+#endif
diff --git a/tools/tests/jitter/timer_jitter.c b/tools/tests/jitter/timer_jitter.c
new file mode 100644
index 000000000..f81ab3396
--- /dev/null
+++ b/tools/tests/jitter/timer_jitter.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <semaphore.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+#include <string.h>
+
+#include <libkern/OSAtomic.h>
+
+#include <mach/mach_time.h>
+#include <mach/mach.h>
+#include <mach/task.h>
+#include <mach/semaphore.h>
+
+typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
+
+#define DEFAULT_MAX_SLEEP_NS	2000000000ll /* Two seconds */
+#define CONSTRAINT_NANOS	(20000000ll)	/* 20 ms */
+#define COMPUTATION_NANOS	(10000000ll)	/* 10 ms */
+
+struct mach_timebase_info g_mti;
+
+#define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
+
+struct second_thread_args {
+	semaphore_t wakeup_semaphore;
+	semaphore_t return_semaphore;
+	uint64_t iterations;
+	my_policy_type_t pol;
+	double *wakeup_second_jitter_arr;
+	uint64_t woke_on_same_cpu;
+	uint64_t too_much;
+	volatile uint64_t last_poke_time;
+	volatile int cpuno;
+};
+
+extern int cpu_number(void);
+
+void *
+second_thread(void *args);
+
+void
+print_usage()
+{
+	printf("Usage: jitter [-w] [-s <random seed>] [-n <min sleep, ns>] [-m <max sleep, ns>] <realtime | timeshare | fixed> <num iterations> <traceworthy jitter, ns>\n");
+}
+
+my_policy_type_t
+parse_thread_policy(const char *str)
+{
+	if (strcmp(str, "timeshare") == 0) {
+		return MY_POLICY_TIMESHARE;
+	} else if (strcmp(str, "realtime") == 0) {
+		return MY_POLICY_REALTIME;
+	} else if (strcmp(str, "fixed") == 0) {
+		return MY_POLICY_FIXEDPRI;
+	} else {
+		printf("Invalid thread policy %s\n", str);
+		exit(1);
+	}
+}
+
+int
+thread_setup(my_policy_type_t pol)
+{
+	int res;
+
+	switch (pol) {
+		case MY_POLICY_TIMESHARE:
+		{
+			return 0;
+		}
+		case MY_POLICY_REALTIME: 
+		{
+			thread_time_constraint_policy_data_t pol;
+
+			/* Hard-coded realtime parameters (similar to what Digi uses) */
+			pol.period = 100000;
+			pol.constraint =  CONSTRAINT_NANOS * g_mti.denom / g_mti.numer;
+			pol.computation = COMPUTATION_NANOS * g_mti.denom / g_mti.numer;
+			pol.preemptible = 0; /* Ignored by OS */
+
+			res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+			assert(res == 0, fail);
+			break;
+		}
+		case MY_POLICY_FIXEDPRI: 
+		{
+			thread_extended_policy_data_t pol;
+			pol.timeshare = 0;
+
+			res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT);
+			assert(res == 0, fail);
+			break;
+		}
+		default:
+		{
+			printf("invalid policy type\n");
+			return 1;
+		}
+	}
+
+	return 0;
+fail:
+	return 1;
+}
+
+uint64_t 
+get_random_sleep_length_abs_ns(uint64_t min_sleep_ns, uint64_t max_sleep_ns)
+{
+	uint64_t tmp;
+
+	tmp = (uint32_t)random();
+	tmp <<= 32;
+	tmp |= (uint32_t)random();
+
+	/* Now use the random number to sleep amount within the window */
+	tmp %= (max_sleep_ns - min_sleep_ns);
+
+	return min_sleep_ns + tmp;
+}
+
+void 
+compute_stats(double *values, uint64_t count, double *average_magnitudep, double *maxp, double *minp, double *stddevp)
+{
+	uint64_t i;
+	double _sum = 0;
+	double _max = 0;
+	double _min = (double)INT64_MAX;
+	double _avg = 0;
+	double _dev = 0;
+
+	for (i = 0; i < count; i++) {
+		_sum += fabs(values[i]);
+		_max = values[i] > _max ? values[i] : _max;
+		_min = values[i] < _min ? values[i] : _min;
+	}
+
+	_avg = _sum / (double)count;
+	
+	_dev = 0;
+	for (i = 0; i < count; i++) {
+		_dev += pow((values[i] - _avg), 2);
+	}
+	
+	_dev /= count;
+	_dev = sqrt(_dev);
+
+	*average_magnitudep = _avg;
+	*maxp = _max;
+	*minp = _min;
+	*stddevp = _dev;
+}
+
+void
+print_stats_us(const char *label, double avg, double max, double min, double stddev)
+{
+	printf("Max %s: %.1lfus\n", label, max / 1000.0 * (((double)g_mti.numer) / ((double)g_mti.denom)));
+	printf("Min %s: %.1lfus\n", label, min / 1000.0 * (((double)g_mti.numer) / ((double)g_mti.denom)));
+	printf("Avg magnitude of %s: %.1lfus\n", label, avg / 1000.0 * (((double)g_mti.numer) / ((double)g_mti.denom)));
+	printf("Stddev: %.1lfus\n", stddev / 1000.0 * (((double)g_mti.numer) / ((double)g_mti.denom)));
+	putchar('\n');
+}
+
+void
+print_stats_fract(const char *label, double avg, double max, double min, double stddev)
+{
+	printf("Max %s jitter: %.1lf%%\n", label, max * 100);
+	printf("Min %s jitter: %.1lf%%\n", label, min * 100);
+	printf("Avg %s jitter: %.1lf%%\n", label, avg * 100);
+	printf("Stddev: %.1lf%%\n", stddev * 100);
+	putchar('\n');	
+}
+
+int
+main(int argc, char **argv)
+{
+	uint64_t iterations, i;
+	double *jitter_arr, *fraction_arr;
+	double *wakeup_second_jitter_arr;
+	uint64_t target_time;
+	uint64_t sleep_length_abs;
+	uint64_t min_sleep_ns = 0;
+	uint64_t max_sleep_ns = DEFAULT_MAX_SLEEP_NS;
+	uint64_t wake_time;
+	unsigned random_seed;
+	boolean_t need_seed = TRUE;
+	char ch;
+	int res;
+	kern_return_t kret;
+	my_policy_type_t pol;
+	boolean_t wakeup_second_thread = FALSE;
+	semaphore_t wakeup_semaphore, return_semaphore;
+
+	double avg, stddev, max, min;
+	double avg_fract, stddev_fract, max_fract, min_fract;
+	uint64_t too_much;
+
+	struct second_thread_args secargs;
+	pthread_t secthread;
+
+	mach_timebase_info(&g_mti);
+
+	/* Seed random */
+	opterr = 0;
+	while ((ch = getopt(argc, argv, "m:n:hs:w")) != -1 && ch != '?') {
+		switch (ch) {
+			case 's':
+				/* Specified seed for random)() */
+				random_seed = (unsigned)atoi(optarg);
+				srandom(random_seed);
+				need_seed = FALSE;
+				break;
+			case 'm':
+				/* How long per timer? */
+				max_sleep_ns = strtoull(optarg, NULL, 10);	
+				break;
+			case 'n':
+				/* How long per timer? */
+				min_sleep_ns = strtoull(optarg, NULL, 10);	
+				break;
+			case 'w':
+				/* After each timed wait, wakeup another thread */
+				wakeup_second_thread = TRUE;
+				break;
+			case 'h':
+				print_usage();
+				exit(0);
+				break;
+			default:
+				fprintf(stderr, "Got unexpected result from getopt().\n");
+				exit(1);
+				break;
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc != 3) {
+		print_usage();
+		exit(1);
+	}
+
+	if (min_sleep_ns >= max_sleep_ns) {
+		print_usage();
+		exit(1);
+	}
+
+	if (need_seed) {
+		srandom(time(NULL));
+	}
+
+	/* What scheduling policy? */
+	pol = parse_thread_policy(argv[0]);
+
+	/* How many timers? */
+	iterations = strtoull(argv[1], NULL, 10);
+
+	/* How much jitter is so extreme that we should cut a trace point */
+	too_much = strtoull(argv[2], NULL, 10);
+	
+	/* Array for data */
+	jitter_arr = (double*)malloc(sizeof(*jitter_arr) * iterations);
+	if (jitter_arr == NULL) {
+		printf("Couldn't allocate array to store results.\n");
+		exit(1);
+	}
+
+	fraction_arr = (double*)malloc(sizeof(*fraction_arr) * iterations);
+	if (fraction_arr == NULL) {
+		printf("Couldn't allocate array to store results.\n");
+		exit(1);
+	}
+
+	if (wakeup_second_thread) {
+		/* Array for data */
+		wakeup_second_jitter_arr = (double*)malloc(sizeof(*jitter_arr) * iterations);
+		if (wakeup_second_jitter_arr == NULL) {
+			printf("Couldn't allocate array to store results.\n");
+			exit(1);
+		}
+
+		kret = semaphore_create(mach_task_self(), &wakeup_semaphore, SYNC_POLICY_FIFO, 0);
+		if (kret != KERN_SUCCESS) {
+			printf("Couldn't allocate semaphore %d\n", kret);
+			exit(1);
+		}
+
+		kret = semaphore_create(mach_task_self(), &return_semaphore, SYNC_POLICY_FIFO, 0);
+		if (kret != KERN_SUCCESS) {
+			printf("Couldn't allocate semaphore %d\n", kret);
+			exit(1);
+		}
+
+
+		secargs.wakeup_semaphore = wakeup_semaphore;
+		secargs.return_semaphore = return_semaphore;
+		secargs.iterations = iterations;
+		secargs.pol = pol;
+		secargs.wakeup_second_jitter_arr = wakeup_second_jitter_arr;
+		secargs.woke_on_same_cpu = 0;
+		secargs.too_much = too_much;
+		secargs.last_poke_time = 0ULL;
+		secargs.cpuno = 0;
+
+		res = pthread_create(&secthread, NULL, second_thread, &secargs);
+		if (res) {
+			err(1, "pthread_create");
+		}
+
+		sleep(1); /* Time for other thread to start up */
+	}
+
+	/* Set scheduling policy */
+	res = thread_setup(pol);
+	if (res != 0) {
+		printf("Couldn't set thread policy.\n");
+		exit(1);
+	}
+
+	/* 
+	 * Repeatedly pick a random timer length and 
+	 * try to sleep exactly that long 
+	 */
+	for (i = 0; i < iterations; i++) {
+		sleep_length_abs = (uint64_t) (get_random_sleep_length_abs_ns(min_sleep_ns, max_sleep_ns) * (((double)g_mti.denom) / ((double)g_mti.numer)));
+		target_time = mach_absolute_time() + sleep_length_abs;
+		
+		/* Sleep */
+		kret = mach_wait_until(target_time);
+		wake_time = mach_absolute_time();
+	
+		jitter_arr[i] = (double)(wake_time - target_time);
+		fraction_arr[i] = jitter_arr[i] / ((double)sleep_length_abs);
+		
+		/* Too much: cut a tracepoint for a debugger */
+		if (jitter_arr[i] >= too_much) {
+			syscall(SYS_kdebug_trace, 0xeeeeeeee, 0, 0, 0, 0);
+		}
+
+		if (wakeup_second_thread) {
+			secargs.last_poke_time = mach_absolute_time();
+			secargs.cpuno = cpu_number();
+			OSMemoryBarrier();
+			kret = semaphore_signal(wakeup_semaphore);
+			if (kret != KERN_SUCCESS) {
+				errx(1, "semaphore_signal");
+			}
+
+			kret = semaphore_wait(return_semaphore);
+			if (kret != KERN_SUCCESS) {
+				errx(1, "semaphore_wait");
+			}
+
+		}
+	}
+
+	/*
+	 * Compute statistics and output results. 
+	 */
+	compute_stats(jitter_arr, iterations, &avg, &max, &min, &stddev);
+	compute_stats(fraction_arr, iterations, &avg_fract, &max_fract, &min_fract, &stddev_fract);
+
+	putchar('\n');
+	print_stats_us("jitter", avg, max, min, stddev);
+	print_stats_fract("%", avg_fract, max_fract, min_fract, stddev_fract);
+
+	if (wakeup_second_thread) {
+
+		res = pthread_join(secthread, NULL);
+		if (res) {
+			err(1, "pthread_join");
+		}
+
+		compute_stats(wakeup_second_jitter_arr, iterations, &avg, &max, &min, &stddev);
+		
+		putchar('\n');
+		print_stats_us("second jitter", avg, max, min, stddev);
+
+		putchar('\n');
+		printf("%llu/%llu (%.1f%%) wakeups on same CPU\n", secargs.woke_on_same_cpu, iterations,
+			   100.0*((double)secargs.woke_on_same_cpu)/iterations);
+	}
+
+	return 0;
+}
+
+void *
+second_thread(void *args)
+{
+	struct second_thread_args *secargs = (struct second_thread_args *)args;
+	int res;
+	uint64_t i;
+	kern_return_t kret;
+	uint64_t wake_time;
+	int cpuno;
+
+	/* Set scheduling policy */
+	res = thread_setup(secargs->pol);
+	if (res != 0) {
+		printf("Couldn't set thread policy.\n");
+		exit(1);
+	}
+
+	/* 
+	 * Repeatedly pick a random timer length and 
+	 * try to sleep exactly that long 
+	 */
+	for (i = 0; i < secargs->iterations; i++) {
+
+		/* Wake up when poked by main thread */
+		kret = semaphore_wait(secargs->wakeup_semaphore);
+		if (kret != KERN_SUCCESS) {
+			errx(1, "semaphore_wait %d", kret);
+		}
+
+		wake_time = mach_absolute_time();
+		cpuno = cpu_number();
+		if (wake_time < secargs->last_poke_time) {
+			/* Woke in past, unsynchronized mach_absolute_time()? */
+			
+			errx(1, "woke in past %llu (%d) < %llu (%d)", wake_time, cpuno, secargs->last_poke_time, secargs->cpuno);
+		}
+
+		if (cpuno == secargs->cpuno) {
+			secargs->woke_on_same_cpu++;
+		}
+
+		secargs->wakeup_second_jitter_arr[i] = (double)(wake_time - secargs->last_poke_time);
+		
+		/* Too much: cut a tracepoint for a debugger */
+		if (secargs->wakeup_second_jitter_arr[i] >= secargs->too_much) {
+			syscall(SYS_kdebug_trace, 0xeeeeeeef, 0, 0, 0, 0);
+		}
+
+		kret = semaphore_signal(secargs->return_semaphore);
+		if (kret != KERN_SUCCESS) {
+			errx(1, "semaphore_signal %d", kret);
+		}
+
+	}
+
+	return NULL;
+}
diff --git a/tools/tests/kqueue_tests/Makefile b/tools/tests/kqueue_tests/Makefile
old mode 100644
new mode 100755
index 9db391fe4..b51ccd631
--- a/tools/tests/kqueue_tests/Makefile
+++ b/tools/tests/kqueue_tests/Makefile
@@ -1,7 +1,7 @@
-all: readwrite timer
+all: file timer
 
-readwrite:
-	gcc -o readwrite_tests kqueue_readwrite_tests.c -arch ppc -arch i386
+file:
+	gcc -o file_tests kqueue_file_tests.c -arch i386
 
 timer:
-	gcc -o timer_tests kqueue_timer_tests.c -arch ppc -arch i386 -arch x86_64
+	gcc -o timer_tests kqueue_timer_tests.c -arch i386 -arch x86_64
diff --git a/tools/tests/kqueue_tests/kqueue_readwrite_tests.c b/tools/tests/kqueue_tests/kqueue_file_tests.c
similarity index 98%
rename from tools/tests/kqueue_tests/kqueue_readwrite_tests.c
rename to tools/tests/kqueue_tests/kqueue_file_tests.c
index e4ad5b5e4..cef98009e 100644
--- a/tools/tests/kqueue_tests/kqueue_readwrite_tests.c
+++ b/tools/tests/kqueue_tests/kqueue_file_tests.c
@@ -62,37 +62,37 @@ typedef struct _action {
  */
 typedef struct _test {
 	char *t_testname;
-
+	
 	/* Test kevent() or poll() */
 	int 	t_is_poll_test;	
-
+	
 	/* Actions for setting up test */
 	int 	 t_n_prep_actions;
 	action_t t_prep_actions[5];
-
+	
 	/* Actions for cleaning up test */
 	int 	 t_n_cleanup_actions;
 	action_t t_cleanup_actions[5];
 	
 	/* Action for thred to take while we wait */
 	action_t t_helpthreadact;
-		
+	
 	/* File to look for event on */
 	char 	 *t_watchfile; 	/* set event ident IN TEST (can't know fd beforehand)*/
 	int	 t_file_is_fifo;/* FIFOs are handled in a special manner */
-
+	
 	/* Different parameters for poll() vs kevent() */
 	union { 
 		struct kevent	tu_kev;
 		short		tu_pollevents;
 	} t_union;
-
+	
 	/* Do we expect results? */
 	int	 t_want_event;
-
+	
 	/* Not always used--how much data should we find (EVFILT_{READ,WRITE}) */
 	int	 t_nbytes;
-
+	
 	/* Hacks for FILT_READ and pipes */
 	int 	 t_read_to_end_first; 	/* Consume all data in file before waiting for event */
 	int 	 t_write_some_data; 	/* Write some data to file before waiting for event (FIFO hack) */
@@ -112,7 +112,7 @@ void LOG(int level, FILE *f, const char *fmt, ...) {
 		}
 		vfprintf(f, fmt, ap);
 	} 
-
+	
 	va_end(ap);
 }
 
@@ -120,7 +120,7 @@ void LOG(int level, FILE *f, const char *fmt, ...) {
  * Initialize an action struct.  Whether to sleep, what action to take,
  * and arguments for that action.
  */
-	void 
+void 
 init_action(action_t *act, int sleep, action_id_t call, int nargs, ...) 
 {
 	int i;
@@ -128,14 +128,14 @@ init_action(action_t *act, int sleep, action_id_t call, int nargs, ...)
 	va_start(ap, nargs);
 	act->act_dosleep = sleep;
 	act->act_id = call;
-
+	
 	for (i = 0; i < nargs; i++)
 	{
 		act->act_args[i] = va_arg(ap, void*);
 	}
-
+	
 	va_end(ap);
-
+	
 }
 
 /*
@@ -158,14 +158,14 @@ open_fifo(const char *path, int *readfd, int *writefd)
 	int waitres;
 	int res;
 	int tmpreadfd, tmpwritefd;
-
+	
 	res = pthread_create(&thread, 0, open_fifo_readside, (void*)path);
 	if (res == 0) {
 		tmpwritefd = open(path, O_WRONLY);
 		waitres = pthread_join(thread, (void**) &tmpreadfd);
-
+		
 		fcntl(tmpwritefd, F_SETFL, O_WRONLY | O_NONBLOCK);
-
+		
 		if ((waitres == 0) && (tmpwritefd >= 0) && (tmpreadfd >= 0)) {
 			*readfd = tmpreadfd;
 			*writefd = tmpwritefd;
@@ -213,9 +213,9 @@ execute_action(void *actionptr)
 	void *addr;
 	struct timeval tv;
 	struct stat sstat;
-
+	
 	LOG(1, stderr, "Beginning action of type %d\n", act->act_id);
-
+	
 	/* Let other thread get into kevent() sleep */
 	if(SLEEP == act->act_dosleep) {
 		sleep(SLEEP_TIME); 
@@ -252,7 +252,7 @@ execute_action(void *actionptr)
 				} else {
 					res = -1;
 				}
-
+				
 				close(tmpfd);
 			}
 			break;
@@ -323,7 +323,7 @@ execute_action(void *actionptr)
 			break;
 		case SETXATTR:
 			res = setxattr((char*)args[0], KEY, (void*)VAL, strlen(VAL),
-         			0, 0);
+						   0, 0);
 			break;
 		case UTIMES:
 			tv.tv_sec = time(NULL);
@@ -345,9 +345,9 @@ execute_action(void *actionptr)
 			res = -1;
 			break;
 	}
-
+	
 	return (void*)res;
-
+	
 }
 
 /*
@@ -377,7 +377,7 @@ execute_action_list(action_t *actions, int nactions, int failout)
 			LOG(1, stderr, "Action list work succeeded on step %d.\n", i);
 		}
 	}
-
+	
 	return res;
 }
 
@@ -392,24 +392,24 @@ execute_test(test_t *test)
 	pthread_t thr;
 	struct kevent evlist;
 	struct timespec ts = {WAIT_TIME, 0l};
-
+	
 	memset(&evlist, 0, sizeof(evlist));
-
+	
 	LOG(1, stderr, "Test %s starting.\n", test->t_testname);
 	LOG(1, stderr, test->t_want_event ? "Expecting an event.\n" : "Not expecting events.\n");
-
+	
 	res = execute_action_list(test->t_prep_actions, test->t_n_prep_actions, 1);
-
+	
 	/* If prep succeeded */
 	if (0 == res) {
 		/* Create kqueue for kqueue tests*/
 		if (!test->t_is_poll_test) {
 			kqfd = kqueue(); 
 		}
-
+		
 		if ((test->t_is_poll_test) || kqfd >= 0) {
 			LOG(1, stderr, "Opened kqueue.\n");
-
+			
 			/* Open the file we're to monitor.  Fifos get special handling */
 			if (test->t_file_is_fifo) {
 				filefd = -1;
@@ -417,16 +417,16 @@ execute_test(test_t *test)
 			} else {
 				filefd = open(test->t_watchfile, O_RDONLY | O_SYMLINK);
 			}
-
+			
 			if (filefd >= 0) {
 				LOG(1, stderr, "Opened file to monitor.\n");
-
+				
 				/* 
 				 * Fill in the fd to monitor once you know it 
 				 * If it's a fifo test, then the helper is definitely going to want the write end.
 				 */
 				test->t_helpthreadact.act_fd = (writefd >= 0 ? writefd : filefd);
-
+				
 				if (test->t_read_to_end_first) {
 					read_to_end(filefd);
 				} else if (test->t_write_some_data) {
@@ -435,24 +435,24 @@ execute_test(test_t *test)
 					dowr.act_fd = writefd;
 					execute_action(&dowr);
 				}
-
+				
 				/* Helper modifies the file that we're listening on (sleeps first, in general) */
 				res = pthread_create(&thr, NULL, execute_action, (void*) &test->t_helpthreadact);
 				if (0 == res) {
 					LOG(1, stderr, "Created helper thread.\n");
-
+					
 					/* This is ugly business to hack on filling up a FIFO */
 					if (test->t_extra_sleep_hack) {
 						sleep(5);
 					}
-
+					
 					if (test->t_is_poll_test) {
 						struct pollfd pl;
 						pl.fd = filefd;
 						pl.events = test->t_union.tu_pollevents;
 						cnt = poll(&pl, 1, WAIT_TIME);
 						LOG(1, stderr, "Finished poll() call.\n");
-
+						
 						if ((cnt < 0)) {
 							LOG(2, stderr, "error is in errno, %s\n", strerror(errno));
 							res = cnt;
@@ -461,7 +461,7 @@ execute_test(test_t *test)
 						test->t_union.tu_kev.ident = filefd; 
 						cnt = kevent(kqfd, &test->t_union.tu_kev, 1, &evlist, 1,  &ts);
 						LOG(1, stderr, "Finished kevent() call.\n");
-
+						
 						if ((cnt < 0) || (evlist.flags & EV_ERROR))  {
 							LOG(2, stderr, "kevent() call failed.\n");
 							if (cnt < 0) {
@@ -472,7 +472,7 @@ execute_test(test_t *test)
 							res = cnt;
 						}
 					}
-
+					
 					/* Success only if you've succeeded to this point AND joined AND other thread is happy*/
 					status = 0;
 					res2 = pthread_join(thr, (void**)&status);
@@ -485,7 +485,7 @@ execute_test(test_t *test)
 				} else {
 					LOG(2, stderr, "Couldn't start thread.\n");
 				}
-
+				
 				close(filefd);
 				if (test->t_file_is_fifo) {
 					close(writefd);
@@ -500,10 +500,10 @@ execute_test(test_t *test)
 			res = -1;
 		}
 	}
-
+	
 	/* Cleanup work */
 	execute_action_list(test->t_cleanup_actions, test->t_n_cleanup_actions, 0);
-
+	
 	/* Success if nothing failed and we either received or did not receive event,
 	 * as expected 
 	 */
@@ -517,7 +517,7 @@ execute_test(test_t *test)
 			} else {
 				retval = 0;
 			}
-
+			
 		} else {
 			LOG(2, stderr, "Got unexpected event or lack thereof.\n");
 			retval = -1;
@@ -526,7 +526,7 @@ execute_test(test_t *test)
 		LOG(2, stderr, "Failed to execute test.\n");
 		retval = -1;
 	}
-
+	
 	LOG(3, stdout, "Test %s done with result %d.\n", test->t_testname, retval);
 }
 
@@ -539,7 +539,7 @@ init_test_common(test_t *tst, char *testname, char *watchfile, int nprep, int nc
 	tst->t_n_prep_actions = nprep;
 	tst->t_n_cleanup_actions = nclean;
 	tst->t_want_event = (want > 0);
-
+	
 	if (ispoll) {
 		tst->t_is_poll_test = 1;
 		tst->t_union.tu_pollevents = (short)event;
@@ -580,38 +580,38 @@ void
 run_note_delete_tests() 
 {
 	test_t test;
-
+	
 	init_test(&test, "1.1.2: unlink a file", FILE1, 1, 0, NOTE_DELETE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.1.3: rmdir a dir", DIR1, 1, 0, NOTE_DELETE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.1.4: rename one file over another", FILE2, 2, 1, NOTE_DELETE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.1.5: rename one dir over another", DIR2, 2, 1, NOTE_DELETE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL);
 	execute_test(&test);
-
+	
 	/* Do FIFO stuff here */
 	init_test(&test, "1.1.6: make a fifo, unlink it", FILE1, 1, 0, NOTE_DELETE, YES_EVENT);
 	test.t_file_is_fifo = 1;
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.1.7: rename a file over a fifo", FILE1, 2, 1, NOTE_DELETE, YES_EVENT);
 	test.t_file_is_fifo = 1;
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL);
@@ -619,41 +619,41 @@ run_note_delete_tests()
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE2, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.1.8: unlink a symlink to a file", FILE2, 2, 1, NOTE_DELETE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, SYMLINK, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	/* ================= */
-
+	
 	init_test(&test, "1.2.1: Straight-up rename file", FILE1, 1, 1, NOTE_DELETE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.2.2: Straight-up rename dir", DIR1, 1, 1, NOTE_DELETE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.2.3: Null action on file", FILE1, 1, 1, NOTE_DELETE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 2, NULL, NULL); /* The null action */
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.2.4: Rename one file over another: watch the file that lives", FILE1, 2, 1, NOTE_DELETE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "1.2.5: Rename one dir over another, watch the dir that lives", DIR1, 2, 1, NOTE_DELETE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL);
@@ -666,16 +666,16 @@ run_note_write_tests()
 {
 	char pathbuf[50];
 	char otherpathbuf[50];
-
+	
 	test_t test;
-
+	
 	init_test(&test, "2.1.1: Straight-up write to a file", FILE1, 1, 1, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.2: creat() file inside a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -683,7 +683,7 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.3: open() file inside a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -691,7 +691,7 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.3: unlink a file from a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -699,7 +699,7 @@ run_note_write_tests()
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	makepath(otherpathbuf, DIR1, FILE2);
 	init_test(&test, "2.1.5: rename a file in a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT);
@@ -709,7 +709,7 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.6: rename a file to outside of a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -718,7 +718,7 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.7: rename a file into a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -727,7 +727,7 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.9: unlink a fifo from a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -735,7 +735,7 @@ run_note_write_tests()
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.10: make symlink in a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -750,8 +750,8 @@ run_note_write_tests()
 	init_action(&test.t_helpthreadact, SLEEP, WRITEFD, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "2.1.13: delete a symlink in a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -759,7 +759,7 @@ run_note_write_tests()
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	/* This actually should not generate an event, though it's in this section */
 	makepath(pathbuf, DIR1, FILE1);
 	makepath(otherpathbuf, DIR1, FILE2);
@@ -772,47 +772,47 @@ run_note_write_tests()
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[2], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	LOG(1, stderr, "MMAP test should fail on HFS.\n");
 	init_test(&test, "2.1.15: Change a file with mmap()", FILE1, 1, 1, NOTE_WRITE, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, MMAP, 2, (void*)FILE1, (void*)1); /* 1 -> "modify it"*/
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	/*================= no-event tests ==================*/
 	init_test(&test, "2.2.1: just open and close existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.2: read from existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, READ, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.3: rename existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.4: just open and close dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	/* There are no tests 2.2.5 or 2.2.6 */
-
+	
 	init_test(&test, "2.2.7: rename a dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.8: rename a fifo", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	test.t_file_is_fifo = 1;
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL);
@@ -825,40 +825,40 @@ run_note_write_tests()
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK,1, (void*)FILE1);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.10: chmod a file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)FILE1, (void*)0700);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	struct passwd *pwd = getpwnam("local");
 	int uid = pwd->pw_uid;
 	int gid = pwd->pw_gid;
-
+	
 	init_test(&test, "2.2.11: chown a file", FILE1, 2, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)FILE1, (void*)getuid(), (void*)getgid());
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	init_test(&test, "2.2.12: chmod a dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "2.2.13: chown a dir", DIR1, 2, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)DIR1, (void*)uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)DIR1, (void*)getuid(), (void*)getgid());
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
-
-
+	
+	
+	
 	LOG(1, stderr, "MMAP will never give a notification on HFS.\n");
 	init_test(&test, "2.1.14: mmap() a file but do not change it", FILE1, 1, 1, NOTE_WRITE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
@@ -872,19 +872,19 @@ run_note_extend_tests()
 {
 	test_t test;
 	char pathbuf[50];
-
+	
 	LOG(1, stderr, "THESE TESTS WILL FAIL ON HFS!\n");
-
+	
 	init_test(&test, "3.1.1: write beyond the end of a file", FILE1, 1, 1, NOTE_EXTEND, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	/*
 	 * We won't concern ourselves with lengthening directories: commenting these out  
 	 *
-
+	 
 	 makepath(pathbuf, DIR1, FILE1);
 	 init_test(&test, "3.1.2: add a file to a directory with creat()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT);
 	 init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -892,7 +892,7 @@ run_note_extend_tests()
 	 init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	 init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	 execute_test(&test);
-
+	 
 	 makepath(pathbuf, DIR1, FILE1);
 	 init_test(&test, "3.1.3: add a file to a directory with open()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT);
 	 init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -900,7 +900,7 @@ run_note_extend_tests()
 	 init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	 init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	 execute_test(&test);
-
+	 
 	 makepath(pathbuf, DIR1, FILE1);
 	 init_test(&test, "3.1.4: add a file to a directory with rename()", DIR1, 2, 2, NOTE_EXTEND, YES_EVENT);
 	 init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
@@ -910,37 +910,37 @@ run_note_extend_tests()
 	 init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	 execute_test(&test);
 	 */
-
+	
 	/* 3.1.5: a placeholder for a potential kernel test */
 	/*
-	   makepath(pathbuf, DIR1, DIR2);
-	   init_test(&test, "3.1.6: add a file to a directory with mkdir()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT);
-	   init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
-	   init_action(&test.t_helpthreadact, SLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); 
-	   init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL);
-	   init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
-	   execute_test(&test);
-	   */
+	 makepath(pathbuf, DIR1, DIR2);
+	 init_test(&test, "3.1.6: add a file to a directory with mkdir()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT);
+	 init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
+	 init_action(&test.t_helpthreadact, SLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); 
+	 init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL);
+	 init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
+	 execute_test(&test);
+	 */
 	init_test(&test, "3.1.7: lengthen a file with truncate()", FILE1, 1, 1, NOTE_EXTEND, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, LENGTHEN, 2, FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	/** ========== NO EVENT SECTION ============== **/
 	init_test(&test, "3.2.1: setxattr() a file", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "3.2.2: chmod a file", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)FILE1, (void*)0700);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	struct passwd *pwd = getpwnam("local");
 	if (!pwd) {
 		LOG(2, stderr, "Couldn't getpwnam for local.\n");
@@ -948,28 +948,28 @@ run_note_extend_tests()
 	} 	
 	int uid = pwd->pw_uid;
 	int gid = pwd->pw_gid;
-
+	
 	init_test(&test, "3.2.3: chown a file", FILE1, 2, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)FILE1, (void*)getuid(), (void*)getgid());
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	init_test(&test, "3.2.4: chmod a dir", DIR1, 1, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "3.2.5: chown a dir", DIR1, 2, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)DIR1, (void*)uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)DIR1, (void*)getuid(), (void*)getgid());
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "3.2.6: TRUNC a file with truncate()", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, TRUNC, 2, FILE1, (void*)NULL); 
@@ -982,50 +982,50 @@ run_note_attrib_tests()
 {
 	test_t test;
 	char pathbuf[50];
-
+	
 	init_test(&test, "4.1.1: chmod a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, FILE1, (void*)0700); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	struct passwd *pwd = getpwnam("local");
 	int uid = pwd->pw_uid;
 	int gid = pwd->pw_gid;
-
+	
 	init_test(&test, "4.1.2: chown a file", FILE1, 2, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, FILE1, (void*)getuid(), (void*)gid); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.1.3: chmod a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_helpthreadact), SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.1.4: chown a dir", DIR1, 2, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CHOWN, 3, (void*)DIR1, (void*) uid, (void*)gid);
 	init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, DIR1, (void*)getuid(), (void*)getgid()); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.1.5: setxattr on a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, (void*)FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.1.6: setxattr on a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, (void*)DIR1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	init_test(&test, "4.1.7: exchangedata", FILE1, 2, 2, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL);
@@ -1033,52 +1033,52 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	init_test(&test, "4.1.8: utimes on a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UTIMES, 2, (void*)FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.1.9: utimes on a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UTIMES, 2, (void*)DIR1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	/* ====== NO EVENT TESTS ========== */
-
+	
 	init_test(&test, "4.2.1: rename a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.2: open (do not change) a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.3: stat a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, STAT, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.4: unlink a file", FILE1, 1, 0, NOTE_ATTRIB, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.5: write to a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
+	
 	LOG(1, stderr, "EXPECT SPURIOUS NOTE_ATTRIB EVENTS FROM DIRECTORY OPERATIONS on HFS.\n");
 	init_test(&test, "4.2.6: add a file to a directory with creat()", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, FILE1);
@@ -1087,7 +1087,7 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.7: mkdir in a dir", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, DIR2);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1095,7 +1095,7 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.8: add a symlink to a directory", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, FILE1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1103,7 +1103,7 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.9: rename into a dir()", DIR1, 2, 2, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, FILE1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1112,7 +1112,7 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.10: unlink() file from dir", DIR1, 2, 1, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, FILE1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1120,7 +1120,7 @@ run_note_attrib_tests()
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); 
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "4.2.11: mkfifo in a directory", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT);
 	makepath(pathbuf, DIR1, FILE1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1128,8 +1128,8 @@ run_note_attrib_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 }
 
 
@@ -1139,28 +1139,28 @@ run_note_link_tests()
 	test_t test;
 	char pathbuf[50];
 	char otherpathbuf[50];
-
+	
 	LOG(1, stderr, "HFS DOES NOT HANDLE UNLINK CORRECTLY...\n");
 	init_test(&test, "5.1.1: unlink() a file", FILE1, 1, 0, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL);
 	execute_test(&test);
-
-
+	
+	
 	init_test(&test, "5.1.1.5: link A to B, watch A, remove B", FILE1, 2, 1, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "5.1.2: link() to a file", FILE1, 1, 2, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, DIR2);
 	init_test(&test, "5.1.3: make one dir in another", DIR1, 1, 2, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1168,7 +1168,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, DIR2);
 	init_test(&test, "5.1.4: rmdir a dir from within another", DIR1, 2, 1, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1176,7 +1176,7 @@ run_note_link_tests()
 	init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, DIR2);
 	makepath(otherpathbuf, DIR1, DIR1);
 	init_test(&test, "5.1.5: rename dir A over dir B inside dir C", DIR1, 3, 2, NOTE_LINK, YES_EVENT);
@@ -1187,7 +1187,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)otherpathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	LOG(1, stderr, "HFS bypasses hfs_makenode to create in target, so misses knote.\n");
 	makepath(pathbuf, DIR1, DIR2);
 	init_test(&test, "5.1.6: rename one dir into another", DIR1, 2, 2, NOTE_LINK, YES_EVENT);
@@ -1197,7 +1197,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	LOG(1, stderr, "HFS bypasses hfs_removedir to remove from source, so misses knote.\n");
 	makepath(pathbuf, DIR1, DIR2);
 	init_test(&test, "5.1.7: rename one dir out of another", DIR1, 2, 2, NOTE_LINK, YES_EVENT);
@@ -1207,12 +1207,12 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "5.1.8: rmdir a dir", DIR1, 1, 0, NOTE_LINK, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL);
 	execute_test(&test);
-
+	
 	/* ============= NO EVENT SECTION ============== */
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "5.2.1: make a file in a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT);
@@ -1221,7 +1221,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "5.2.2: unlink a file in a dir", DIR1, 2, 1, NOTE_LINK, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1229,7 +1229,7 @@ run_note_link_tests()
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	makepath(otherpathbuf, DIR1, FILE2);
 	init_test(&test, "5.2.3: rename a file within a dir", DIR1, 2, 2, NOTE_LINK, NO_EVENT);
@@ -1239,7 +1239,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "5.2.4: rename a file into a dir", DIR1, 2, 2, NOTE_LINK, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1248,7 +1248,7 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	makepath(pathbuf, DIR1, FILE1);
 	init_test(&test, "5.2.5: make a symlink in a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
@@ -1256,14 +1256,14 @@ run_note_link_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "5.2.6: make a symlink to a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)DIR1, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "5.2.7: make a symlink to a file", FILE1, 1, 2, NOTE_LINK, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)FILE1, (void*)FILE2);
@@ -1276,74 +1276,74 @@ void
 run_note_rename_tests() 
 {
 	test_t test;
-
+	
 	init_test(&test, "6.1.1: rename a file", FILE1, 1, 1, NOTE_RENAME, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.1.2: rename a dir", DIR1, 1, 1, NOTE_RENAME, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.1.2: rename one file over another", FILE1, 2, 1, NOTE_RENAME, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.1.3: rename one dir over another", DIR1, 2, 1, NOTE_RENAME, YES_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL);
 	execute_test(&test);
-
+	
 	/* ========= NO EVENT SECTION =========== */
-
+	
 	init_test(&test, "6.2.1: unlink a file", FILE1, 1, 0, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.2: rmdir a dir", DIR1, 1, 0, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.3: link() to a file", FILE1, 1, 2, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.4: rename one file over another: watch deceased", 
-			FILE2, 2, 1, NOTE_RENAME, NO_EVENT);
+			  FILE2, 2, 1, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.5: rename one dir over another: watch deceased", 
-			DIR2, 2, 1, NOTE_RENAME, NO_EVENT);
+			  DIR2, 2, 1, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.6: rename a file to itself", FILE1, 1, 1, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "6.2.7: rename a dir to itself", DIR1, 1, 1, NOTE_RENAME, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR1);
@@ -1360,7 +1360,7 @@ run_note_revoke_tests()
 	init_action(&test.t_helpthreadact, SLEEP, REVOKE, 1, (void*)FILE1);
 	init_action(&(test.t_cleanup_actions[0]), NOSLEEP, UNLINK, 1, (void*)FILE1);
 	execute_test(&test);
-
+	
 	init_test(&test, "7.2.1: delete file", FILE1, 1, 0, NOTE_REVOKE, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1);
@@ -1378,7 +1378,7 @@ run_evfilt_read_tests()
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "8.1.2: block, then write to file", FILE1, 2, 1, EVFILT_READ, strlen(TEST_STRING));
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1);
@@ -1392,7 +1392,7 @@ run_evfilt_read_tests()
 	init_action(&test.t_helpthreadact, SLEEP, LENGTHEN, 1, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "8.1.4: block, then seek to beginning", FILE1, 2, 1, EVFILT_READ, strlen(TEST_STRING));
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, WRITE, 1, (void*)FILE1);
@@ -1401,14 +1401,14 @@ run_evfilt_read_tests()
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
 	
-
+	
 	init_test(&test, "8.1.5: block, then write to fifo", FILE1, 1, 1, EVFILT_READ, strlen(TEST_STRING));
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1);
 	test.t_file_is_fifo = 1;
 	init_action(&test.t_helpthreadact, SLEEP, WRITE, 1, (void*)FILE1);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	/* No result section... */
 	init_test(&test, "8.2.1: just rename", FILE1, 2, 1, EVFILT_READ, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
@@ -1416,13 +1416,13 @@ run_evfilt_read_tests()
 	init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "8.2.2: delete file", FILE1, 2, 0, EVFILT_READ, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1);
 	init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1);
 	execute_test(&test);
-
+	
 	init_test(&test, "8.2.3: write to beginning", FILE1, 2, 1, EVFILT_READ, NO_EVENT);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, WRITE, 1, (void*)FILE1);
@@ -1438,14 +1438,14 @@ run_evfilt_read_tests()
 	init_action(&test.t_helpthreadact, SLEEP, LSEEK, 1, (void*)strlen(TEST_STRING));
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "8.2.5: trying to read from empty fifo", FILE1, 1, 1, EVFILT_READ, 0);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1);
 	test.t_file_is_fifo = 1;
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 1, (void*)0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 }
 
 
@@ -1474,7 +1474,7 @@ write_to_fd(void *arg)
 void 
 run_evfilt_write_tests()
 {
-
+	
 	test_t test;
 	init_test(&test, "9.1.1: how much space in empty fifo?", FILE1, 1, 1, EVFILT_WRITE, FIFO_SPACE);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL);
@@ -1490,7 +1490,7 @@ run_evfilt_write_tests()
 	init_action(&(test.t_helpthreadact), NOSLEEP, NOTHING, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-
+	
 	init_test(&test, "9.2.1: how much space in a full fifo?", FILE1, 1, 1, EVFILT_WRITE, 0);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL);
 	test.t_file_is_fifo = 1;
@@ -1524,23 +1524,23 @@ run_poll_tests()
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-		
+	
 	init_poll_test(&test, "10.1.4: does poll say I can read a nonempty regular file?", FILE1, 2, 1, POLLRDNORM, 1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1, (void*)NULL);
 	init_action(&(test.t_prep_actions[1]), NOSLEEP, LENGTHEN, 1, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-		
+	
 	init_poll_test(&test, "10.1.5: does poll say I can read an empty file?", FILE1, 1, 1, POLLRDNORM, 1);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1, (void*)NULL);
 	init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0);
 	init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL);
 	execute_test(&test);
-		
-
-
-
+	
+	
+	
+	
 	init_poll_test(&test, "10.2.2: does poll say I can read an empty FIFO?", FILE1, 1, 1, POLLRDNORM, 0);
 	init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL);
 	test.t_file_is_fifo = 1;
@@ -1557,7 +1557,7 @@ run_poll_tests()
 	execute_test(&test);
 }
 
-	void 
+void 
 run_all_tests() 
 {
 	run_note_delete_tests();
@@ -1574,14 +1574,14 @@ run_all_tests()
 	run_poll_tests();
 }
 
-	int 
+int 
 main(int argc, char **argv) 
 {
 	char *which = NULL;
 	if (argc > 1) {
 		which = argv[1];
 	}
-
+	
 	if ((!which) || (strcmp(which, "all") == 0))
 		run_all_tests();
 	else if (strcmp(which, "delete") == 0) 
diff --git a/tools/tests/libMicro/AppleReadMe b/tools/tests/libMicro/AppleReadMe
index 156b3e4b6..de49c7daf 100755
--- a/tools/tests/libMicro/AppleReadMe
+++ b/tools/tests/libMicro/AppleReadMe
@@ -5,7 +5,7 @@ Mac OS X specific notes
 # Disable Open directory and LDAP using Directory Utility app
 # Turn off airport
 # Turn off spotlight. In terminal, execute the following:
-  sudo service com.apple.metadata.mds stop
+  launchctl unload /System/Library/LaunchDaemons/com.apple.metadata.mds.plist
 # Turn off Time Machine in System Preferences
 # Wait at least 2 minutes after boot to desktop for boot cache to settle down
    
@@ -13,10 +13,32 @@ Mac OS X specific notes
 
 	make
 	./bench >output.txt
-gives you a text file named output.txt with the results of one run.
+runs the libMicro test suite excluding the lmbench tests and gives you a text file named output.txt with the results of one run.
+	
+
 	./multiview output1.txt output2.txt >compare.html
 gives you a html file comparing two runs.
 
+*** To run libMicro testsuite with stepper disabled ***
+
+To get a more consistent result of libMicro benchmark run, we need to disable the 
+stepper to prevent it from causing wide variations in results. See rdar://6243819 
+for details.
+
+So to run libMicro test suite with stepper disabled, use 'coreos_bench' script 
+instead of 'bench' script.  
+
+For example:
+./coreos_bench > output.txt
+runs the libMicro test suite excluding the lmbench tests and gives you a text file named output.txt with the results of one run, with stepper disabled.
+ 
+Note: 
+1) We need '/usr/local/bin/pstates' to disable the stepper. Install AppleInternal package 
+which provides '/usr/local/bin/pstates'.
+
+2) 'coreos_bench' script is used exactly like the 'bench' script. All the usage examples for 
+'bench' script in this readme file also holds true for 'coreos_bench' script. 
+
 *** Makefile ***
 
 The Makefile invokes Makefile.Darwin which invokes Makefile.com.Darwin.
@@ -25,32 +47,41 @@ build correctly. The binaries are placed in a directory called
 bin-ARCH where ARCH is the default or specified when building via
 the ARCH flag.
 
+Note:
+1) The binaries of apple added tests are placed in a directory called
+   apple/bin-ARCH
+
+2) All the binaries under bin-ARCH and apple/bin-ARCH are code signed
+   during build.
+ 
 options for invoking Makefile are:
 ARCH 		defaults to  i386
-	if you just want to build for ppc, you can specify
-		make ARCH=ppc
-	this will put the results in bin-ppc
 
         to build fat/multi architecture, specify 
 		make ARCH=fat
-	the makefile will automatically build with ARCH_FLAG="-arch ppc -arch i386 -arch x86_64" and put the results in bin-fat
+	the makefile will automatically build with ARCH_FLAG="-arch i386 -arch x86_64" and put the results in bin-fat
+
+        to build for ARM architecture,
+        first set an environment variable 'SDKROOT' to point to iPhone sdk
+                make ARCH=ARM_ARCH where ARM_ARCH can be armv6 or armv7
+        this will put the results in bin-ARM_ARCH
 
 	to build with only two of the architectures see below
 
 ARCH_FLAG 	defaults to -arch $(ARCH)
 	to build fat/multi architecture, specify 
-		make ARCH_FLAG="-arch ppc -arch i386" ARCH=fat
+		make ARCH_FLAG="-arch i386" ARCH=fat
 	this will put the results in bin-fat
 
 OPT_FLAG 	defaults to  -g
-	to build optimized, specify make OPT_FLAG=-s
+	to build optimized, specify make OPT_FLAG=-Os
 
 SEMOP_FLAG 	defaults to  -DUSE_SEMOP
 	to eliminate SEMOP usage, specify make SEMOP_FLAG=
 	this is needed on some lower-end systems (e.g. M63)
 
 These can be combined, e.g.
-	make ARCH=ppc SEMOP_FLAG=
+	make ARCH=i386 SEMOP_FLAG=
 
 *** Before running benchmarks ***
 
@@ -70,6 +101,17 @@ pass it a parameter to run a single benchmark, e.g.
 
 	bench lmbench_bw_unix
 
+By default the script will run only the libMicro testsuite excluding the lmbench tests. 
+To run the libmicro testsuite with the lmbench tests included, just pass the -l parameter. e.g,
+	
+	bench -l 
+To run only the lmbench testsuite 
+
+	bench lmbench
+
+To display the usage, just do
+	bench -h
+
 Watch for:
 	# WARNINGS
 	#     Quantization error likely;increase batch size (-B option) 4X to avoid.
@@ -110,6 +152,13 @@ argument passing, the flow of control of a benchmark, etc. for the
 trivial case.  The tests starting with "lmbench_" were ported from
 the lmbench suite, so they might be good examples as well.
 
+*** A note regarding future changes in bench.sh script ***
+coreos_bench.sh script is almost identical to bench.sh script, except that it
+has additional code to disable the stepper during libmicro benchmark run. 
+
+In future, if bench.sh script is modified, make sure the changes reflect
+in coreos_bench.sh script also.
+
 *** Things to do ***
 
 * port the rest of the lmbench benchmarks into this framework
@@ -129,3 +178,43 @@ across many machines with historical repository of runs
 Due to rdar://4654956 and its original, rdar://2588252 you cannot
 run these tests on Leopard without removing the cascade_lockf test.
 There may be other tests which panic a Leopard system.
+
+*** benchDS notes ***
+
+From rdar://problem/7468995 add the ability to benchmark the key APIs
+for server daemons.  In particular, a test binary is added for each of:
+
+	ODQueryCreateWithNode()  (standard User, Groups, and Hosts records)
+	getaddrinfo()  (hosts and ports)
+	mbr_check_service_membership()
+	mbr_check_membership()
+	getpwnam()
+	getpwuid()
+	getgrgid()
+	getpwent()
+	getgrent()
+	getgrnam()
+
+The script benchDS is provided to run a standard set of tests presuming
+that the tests are run by root on a system configured with an OD binding.
+The OD server (local or remote) must have a set of accounts created with
+od_acount_create shell script.  This script must also be run as root, 
+and passed a single argument of the number of users to create.  It creates
+od_test_{1..N}, and all belong to a ds_test_group1(gid 1211).  In addition,
+ds_test_group2(gid 1212) is created which has no users as members.  User ids are
+set sequentially from 5000.  In order to administer the OD server, it assumes
+user 'diradmin' and password 'admin' are the OD admin.
+
+Also, these tests consult the APIs listed, which can be run against the local
+account info, or even Active Directory.
+
+Thus, the quick recipe is:
+	Install X Server
+	Enable OD, and create directory admin user 'diradmin' with password 'admin'
+	As root run:  od_account_create 1000
+	Now run the test, as root:  ./benchDS 1000 > output-file
+
+
+In addition, od_account_delete 1000 will delete the 1000 users created with od_account_create.
+
+
diff --git a/tools/tests/libMicro/Makefile b/tools/tests/libMicro/Makefile
index e81cc6c1e..877beb36d 100644
--- a/tools/tests/libMicro/Makefile
+++ b/tools/tests/libMicro/Makefile
@@ -35,7 +35,7 @@ ARCH = i386
 
 BINS=		$(ALL:%=bin-$(ARCH)/%) bin-$(ARCH)/tattle
 
-TARBALL_CONTENTS = 	\
+# TARBALL_CONTENTS = 	\
 	Makefile.benchmarks \
 	Makefile.SunOS 	\
 	Makefile.Linux 	\
@@ -61,6 +61,8 @@ TARBALL_CONTENTS = 	\
 	benchmark_finiworker.c	\
 	bench		\
 	bench.sh	\
+	coreos_bench	\
+	coreos_bench.sh	\
 	mk_tarball	\
 	multiview	\
 	multiview.sh	\
@@ -72,16 +74,27 @@ TARBALL_CONTENTS = 	\
 
 default $(ALL) run cstyle lint tattle: $(BINS)
 	@cp bench.sh bench
+	@cp coreos_bench.sh coreos_bench
 	@cp multiview.sh multiview
 	@cp wrapper.sh wrapper
 	@cp create_stuff.sh create_stuff
-	@chmod +x bench create_stuff multiview wrapper
+	@cp benchDS.sh benchDS
+	@cp od_account_create.sh od_account_create
+	@cp od_account_delete.sh od_account_delete
+	@chmod +x bench coreos_bench create_stuff multiview wrapper benchDS od_account_create od_account_delete
 	@mkdir -p bin-$(ARCH); cd bin-$(ARCH); MACH=$(ARCH) $(MAKE) -f ../Makefile.`uname -s` ARCH=$(ARCH) UNAME_RELEASE=`uname -r | sed 's/\./_/g'` $@
-
+	@echo "code signing all the binaries under bin-$(ARCH) and apple/bin-$(ARCH)"
+	@for file in $(abspath bin-$(ARCH)/*) $(abspath apple/bin-$(ARCH)/*);do        \
+		if test -x $$file;then  \
+			codesign -s - $$file 1>& /dev/null ;    \
+		fi;     \
+	done;
+	@echo "done"
+	
 .PHONY: clean clean_subdirs clean_$(SUBDIRS)
 
 clean: clean_subdirs
-	rm -rf bin bin-* wrapper multiview create_stuff bench tattle
+	rm -rf bin bin-* wrapper multiview create_stuff bench tattle benchDS od_account_create od_account_delete coreos_bench
 
 clean_subdirs:
 	for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean; done
@@ -94,10 +107,10 @@ $(BINS): bin
 	@chmod +x wrapper
 	@ln -sf ../wrapper $@
 
-
-libMicro.tar:	FORCE
-	@chmod +x ./mk_tarball wrapper
-	@./mk_tarball $(TARBALL_CONTENTS) 
+# commenting the lbMicro.tar as it is not being used.
+# libMicro.tar:	FORCE
+#	@chmod +x ./mk_tarball wrapper
+#	@./mk_tarball $(TARBALL_CONTENTS) 
  
-FORCE:
+# FORCE:
 
diff --git a/tools/tests/libMicro/Makefile.Darwin b/tools/tests/libMicro/Makefile.Darwin
index 7eaa1aaf1..d113fc4f2 100644
--- a/tools/tests/libMicro/Makefile.Darwin
+++ b/tools/tests/libMicro/Makefile.Darwin
@@ -30,25 +30,37 @@
 # ident	"@(#)Makefile.Darwin	1.5	05/08/04 SMI"
 #
 
+SDKROOT ?= /
+Product=$(shell tconf --product)
+Embedded=$(shell tconf --test TARGET_OS_EMBEDDED)
 
-CC=		gcc
+ifeq "$(Embedded)" "YES"
+SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path)
+CFLAGS += -isysroot $(SDKPATH)
+endif
+
+CC = xcrun -sdk $(SDKROOT) gcc
 #NOPIC=	-mdynamic-no-pic
 ARCH=	i386
 
 ifeq "$(strip $(ARCH))" "fat"
-ARCH_FLAG=      -arch i386 -arch ppc -arch x86_64 
+ARCH_FLAG=      -arch i386 -arch x86_64 
 else
 ARCH_FLAG=	-arch $(ARCH)
 endif
 
-OPT_FLAG=	-g
+### OPT_FLAG value was modified from '-g' to '-Os' as part of the fix for radar 7508837 
+OPT_FLAG=	-Os
 SEMOP_FLAG=	-DUSE_SEMOP
 
 ###
 ###CFLAGS=		-Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###extra_CFLAGS=	-Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###
-CFLAGS=		$(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+CFLAGS+=		$(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+ifeq "$(Embedded)" "YES"
+CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
+endif
 extra_CFLAGS=	$(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 CPPFLAGS=		$(SEMOP_FLAG) -D_REENTRANT -Wall
 MATHLIB=	-lm
diff --git a/tools/tests/libMicro/Makefile.com.Darwin b/tools/tests/libMicro/Makefile.com.Darwin
old mode 100644
new mode 100755
diff --git a/tools/tests/libMicro/README b/tools/tests/libMicro/README
index a4374cb1a..9db9f814c 100644
--- a/tools/tests/libMicro/README
+++ b/tools/tests/libMicro/README
@@ -88,7 +88,15 @@ Apple-added Benchmarks
 
 	create_file
 	geekbench_stdlib_write
+	getaddrinfo_port
+	getaddrinfo_host
+	getgrgid
+	getgrent
+	getgrnam
 	getppid
+	getpwnam
+	getpwuid
+	getpwent
 	lb_mmtest
 	lm_null_call
 	lmbench_bw_file_rd
@@ -107,6 +115,9 @@ Apple-added Benchmarks
 	lmbench_select_tcp
 	lmbench_stat
 	lmbench_write
+	mbr_check_service_membership
+	mbr_check_membership
+	od_query_create_with_node
 	trivial
 	vm_allocate
 
diff --git a/tools/tests/libMicro/apple/Makefile.Darwin b/tools/tests/libMicro/apple/Makefile.Darwin
index 3ca3607ae..fe5e573bf 100644
--- a/tools/tests/libMicro/apple/Makefile.Darwin
+++ b/tools/tests/libMicro/apple/Makefile.Darwin
@@ -27,29 +27,42 @@
 # Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"@(#)Makefile.Linux	1.5	05/08/04 SMI"
+# ident	"@(#)Makefile.Darwin	1.5	05/08/04 SMI"
 #
 
+SDKROOT ?= /
+Product=$(shell tconf --product)
+Embedded=$(shell tconf --test TARGET_OS_EMBEDDED)
 
-CC=		gcc
+ifeq "$(Embedded)" "YES"
+SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path)
+CFLAGS += -isysroot $(SDKPATH)
+EmbeddedOS=yes
+endif
+
+CC = xcrun -sdk $(SDKROOT) gcc
 #NOPIC=	-mdynamic-no-pic
 ARCH= i386
 
 ifeq "$(strip $(ARCH))" "fat"
-ARCH_FLAG=      -arch i386 -arch ppc -arch x86_64 
+ARCH_FLAG=      -arch i386 -arch x86_64 
 else
 ARCH_FLAG=      -arch $(ARCH)
 endif
 
-OPT_FLAG=	-g
+### OPT_FLAG value was modified from '-g' to '-Os' as part of the fix for radar 7508837
+OPT_FLAG=	-Os
 SEMOP_FLAG=	-DUSE_SEMOP
 
 ###
 ###CFLAGS=		-Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 ###extra_CFLAGS=	-Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
-###
-CFLAGS=		$(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
-extra_CFLAGS=	$(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+### Added -DUSE_GETHRTIME to CFLAGS and extra_CFLAGS as part of the fix for radar 7508837
+CFLAGS+=		$(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
+ifeq "$(Embedded)" "YES"
+CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
+endif
+extra_CFLAGS=	$(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall
 CPPFLAGS=		$(SEMOP_FLAG) -D_REENTRANT -Wall
 MATHLIB=	-lm
 
diff --git a/tools/tests/libMicro/apple/Makefile.benchmarks b/tools/tests/libMicro/apple/Makefile.benchmarks
index 0e3cfe2ac..a26d12871 100644
--- a/tools/tests/libMicro/apple/Makefile.benchmarks
+++ b/tools/tests/libMicro/apple/Makefile.benchmarks
@@ -1,6 +1,4 @@
 #
-# CDDL HEADER START
-#
 # The contents of this file are subject to the terms
 # of the Common Development and Distribution License
 # (the "License").  You may not use this file except
@@ -53,6 +51,15 @@ ALL = 			\
 		lmbench_write		\
 		posix_spawn		\
 		trivial			\
-		vm_allocate
-
-
+		vm_allocate \
+		od_query_create_with_node   \
+		mbr_check_service_membership  \
+		getpwnam		\
+		mbr_check_membership	\
+		getpwuid		\
+		getgrgid		\
+		getpwent		\
+		getgrent		\
+		getaddrinfo_host	\
+		getaddrinfo_port	\
+		getgrnam
diff --git a/tools/tests/libMicro/apple/Makefile.com.Darwin b/tools/tests/libMicro/apple/Makefile.com.Darwin
index 121473735..d16caca8b 100644
--- a/tools/tests/libMicro/apple/Makefile.com.Darwin
+++ b/tools/tests/libMicro/apple/Makefile.com.Darwin
@@ -56,3 +56,6 @@ posix_spawn:    posix_spawn_bin
 
 posix_spawn_bin:        posix_spawn_bin.o
 	$(CC) -o posix_spawn_bin $(CFLAGS) posix_spawn_bin.o
+
+od_query_create_with_node:  od_query_create_with_node.o
+	$(CC) -o $(@) $(@).o $($(@)_EXTRA_DEPS) $(CFLAGS) ../../bin-$(ARCH)/libmicro.a $($(@)_EXTRA_LIBS) $(EXTRA_LIBS) -lpthread -lm -framework CoreFoundation -framework OpenDirectory; cp $@ ../../bin-$(ARCH)/ 
diff --git a/tools/tests/libMicro/apple/getaddrinfo_host.c b/tools/tests/libMicro/apple/getaddrinfo_host.c
new file mode 100644
index 000000000..a7041753f
--- /dev/null
+++ b/tools/tests/libMicro/apple/getaddrinfo_host.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <netdb.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+//
+// Correct use case
+//
+//    getaddrinfo_host -E  -L -S -W -B 200 -C 100 -s "server%d"
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -h is hostname format: for example, "server-%d.performance.rack"
+//                        this is C language string format that can include %d
+// -r hostname digit range in the form of "min-max". For example, -r 100-112
+//    With -h and -r, resulting hostnames are
+//      server-100.performance.rack - server-112.performance.rack
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+#define HOSTNAME_LEN    125
+static int host_min=-1, host_range=0;
+static char *hostname_format=NULL;
+static char *hostname_list=NULL;
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "l:h:r:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n       ------- getaddrinfo_host specific options (default: *)\n"
+                "       [-h \"hostname format\"]. ie. \"server-%%d.perf\"\n"
+                "       [-r min-max]\n"
+                "\n" );
+
+    return (0);
+}
+
+
+int
+parse_range(int *min, int *offset, char *buf)
+{
+    char *value, *tmp_ptr = strdup(buf);
+    int range=0;
+    debug("parse_range");
+
+    value = strsep(&tmp_ptr, "-");
+    *min = atoi(value);
+    debug("min = %d", *min);
+    if (tmp_ptr) {
+        value = strsep(&tmp_ptr, "-");
+        range = atoi(value);
+        if (range < *min) {
+            printf("max id should be larger than min id\n");
+            return -1;
+        }
+        *offset = range - *min + 1; // 1-based
+        debug("range = %d", *offset);
+    }
+    else {
+        printf("argument should be in the form of min-max\n");
+        return -1;
+    }
+
+    return 0;
+
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'h':   // hostname string format
+        hostname_format = strdup(optarg);
+        debug ("hostname format: %s", hostname_format);
+        break;
+
+	case 'l':
+		gL1CacheEnabled = atoi(optarg);
+		break;
+
+    case 'r':    // UID range
+        return parse_range( &host_min, &host_range, optarg);
+        break;
+
+    default:
+        return -1;
+    }
+
+    
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+//
+int
+benchmark_initrun()
+{
+    int i;
+
+    debug("\nbenchmark_initrun");
+
+    if (host_min == -1) {
+        printf("-r min-max needs to be specified\n");
+        exit (1);
+    }
+
+    if (!hostname_format) {
+        printf("-h hostname_format needs to be specified\n");
+        exit (1);
+    }
+
+    hostname_list = malloc ( host_range * HOSTNAME_LEN );
+    if (!hostname_list) {
+        debug("malloc error");
+        exit (1);
+    }
+
+    for (i = 0; i < host_range; i++) {
+        sprintf( &hostname_list[i*HOSTNAME_LEN], hostname_format, i+host_min);
+        // debug("hostname: %s", &hostname_list[i*HOSTNAME_LEN]);
+    }
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, index, err;
+    struct addrinfo *addi;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    srandom(getpid());
+
+    for (i = 0; i < lm_optB; i++) {
+        index = HOSTNAME_LEN * (random() % host_range);
+
+        err = getaddrinfo( &hostname_list[index], NULL, NULL, &addi);
+
+        if (err) {
+            debug("%s: error: %s", &hostname_list[index], gai_strerror(err));
+            res->re_errors++;
+        }
+        else {
+            debug("host %s done", &hostname_list[index]);
+        }
+
+        freeaddrinfo (addi);
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    free(hostname_list);
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getaddrinfo_port.c b/tools/tests/libMicro/apple/getaddrinfo_port.c
new file mode 100644
index 000000000..846486faa
--- /dev/null
+++ b/tools/tests/libMicro/apple/getaddrinfo_port.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <netdb.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// This exercises "ssh" port
+//
+// Correct use case
+//
+//    getaddrinfo_port -E  -L -S -W -B 200 -C 100
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+
+    (void) sprintf(lm_optstr,  "l:");
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    return (0);
+}
+
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+        case 'l':
+            gL1CacheEnabled = atoi(optarg);
+            break;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+//
+int
+benchmark_initrun()
+{
+    debug("\nbenchmark_initrun");
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, err;
+    struct addrinfo *addi;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+
+        err = getaddrinfo(NULL, "ssh", NULL, &addi);
+
+        if (err) {
+            debug("error: %s", gai_strerror(err));
+            res->re_errors++;
+        }
+
+        freeaddrinfo (addi);
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getgrent.c b/tools/tests/libMicro/apple/getgrent.c
new file mode 100644
index 000000000..321bbed87
--- /dev/null
+++ b/tools/tests/libMicro/apple/getgrent.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <grp.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getgrent -E  -L -S -W -B 200 -C 100
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+
+    (void) sprintf(lm_optstr,  "l:");
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    return (0);
+}
+
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+    
+    switch (opt) {
+        case 'l':
+            gL1CacheEnabled = atoi(optarg);
+            break;
+    }
+    
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+//
+int
+benchmark_initrun()
+{
+    debug("\nbenchmark_initrun");
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i;
+    struct group *grp;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+
+        errno = 0;      // this is needed explicitly due to getgrent() design
+        grp = getgrent();
+
+        if (!grp) {
+            if (errno) {
+                debug("error: %s", strerror(errno));
+                res->re_errors++;
+            }
+            else {
+                // will not be counted as error
+                setgroupent(1);  // rewind to the beginning of passwd file
+            }
+        }
+        else {
+            debug("gr_name: %s", grp->gr_name);
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getgrgid.c b/tools/tests/libMicro/apple/getgrgid.c
new file mode 100644
index 000000000..f49925d18
--- /dev/null
+++ b/tools/tests/libMicro/apple/getgrgid.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <membership.h>
+#include <grp.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getgrgid -E  -L -S -W -B 200 -C 10 -g 1211-1213
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -g gid range in the form of "min-max". For example, -g 1211-1213
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+// temporary buffer size
+#define BUFSIZE 200
+#define INVALID_ID  -1
+
+static gid_t  gid_min = INVALID_ID;
+static int    gid_range = 0;  // gid_max = gid_min + gid_range
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "l:g:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n     ------- getgrgid specific options (default: *)\n"
+                "       [-g GID range (min-max)]\n"
+                "\n" );
+    return (0);
+}
+
+
+int
+parse_range(gid_t *min, int *offset, char *buf)
+{
+    char *value, *tmp_ptr = strdup(buf);
+    int range=0;
+    debug("parse_range");
+
+    value = strsep(&tmp_ptr, "-");
+    *min = atoi(value);
+    debug("min = %d", *min);
+    if (tmp_ptr) {
+        value = strsep(&tmp_ptr, "-");
+        range = atoi(value);
+        if (range < *min) {
+            printf("max id should be larger than min id\n");
+            return -1;
+        }
+        *offset = range - *min + 1;
+        debug("range = %d", *offset);
+    }
+    else {
+        printf("argument should be in the form of min-max\n");
+        return -1;
+    }
+
+    return 0;
+
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'l':
+        gL1CacheEnabled = atoi(optarg);
+        break;
+            
+    case 'g':    // GID range
+        return parse_range( &gid_min, &gid_range, optarg);
+        break;
+
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// moved template init from benchmark_initworker -> benchmark_initrun
+//
+int
+benchmark_initrun()
+{
+    debug("\nbenchmark_initrun");
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, err;
+    struct group *grp = NULL;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+        gid_t gid = gid_min + random() % gid_range ;
+
+        if (lm_optT > 1) {
+            struct group gd;
+            struct group *grp_ptr = &gd;
+            struct group *tmp_ptr;
+            char gbuf[BUFSIZE];
+
+            err = getgrgid_r( gid, grp_ptr, gbuf, BUFSIZE, &tmp_ptr);
+            if (err) {
+                debug("error: GID %d -> %s", gid, strerror(err));
+                res->re_errors++;
+            }
+            else if (!tmp_ptr) {
+                debug("not found: GID %d", gid);
+                res->re_errors++;
+            }
+        }
+        else {
+            errno = 0;
+            grp = getgrgid( gid );
+
+            if (!grp) {
+                if (errno) {
+                    debug("error: GID %d -> %s", gid, strerror(errno));
+                    res->re_errors++;
+                }
+                else {
+                    debug("not found: GID %d", gid);
+                    res->re_errors++;
+                }
+            }
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getgrnam.c b/tools/tests/libMicro/apple/getgrnam.c
new file mode 100644
index 000000000..7d50a488b
--- /dev/null
+++ b/tools/tests/libMicro/apple/getgrnam.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <grp.h>
+#include <uuid/uuid.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getgrnam -E  -L -S -W -B 200 -C 10 -r 10
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -r is the number of total groups (from "local_test_group1" to "local_test_group#")
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+// temporary buffer size
+#define BUFSIZE 200
+
+// the number of record lookup to issue is covered by standard option optB
+static int  optRecords =    10;  // the number of total records
+
+// This will use local users (local_test_*)
+static char *default_gprefix = "ds_test_group";
+
+#define GROUPNAME_LEN	30
+static char *grpname_list;
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "l:r:g:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n     ------- getgrnam specific options (default: *)\n"
+                "       [-r total number of group records (10*)]\n"
+                "       [-g group prefix(ds_test_group)]\n"
+                "\n" );
+    return (0);
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'r':    // total number of records. default is 100
+        optRecords = atoi(optarg);
+        debug("optRecords = %d\n", optRecords);
+        break;
+
+    case 'l':
+        gL1CacheEnabled = atoi(optarg);
+        break;
+
+    case 'g':	// base name for the groups to use
+	default_gprefix = strdup(optarg);
+	debug("default_gprefix = %s\n", default_gprefix);
+	break;
+
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// moved template init from benchmark_initworker -> benchmark_initrun
+//  since username_list is static across threads and processes
+//
+int
+benchmark_initrun()
+{
+    int i;
+
+    debug("\nbenchmark_initrun");
+
+    // create an array of usernames to use in benchmark before their use
+    // realtime generation in benchmark effects performance measurements
+    grpname_list = malloc( optRecords * GROUPNAME_LEN );
+    if (!grpname_list) {
+        debug ("malloc error");
+        exit (1);
+    }
+
+    for (i = 0; i < optRecords; i++) {
+        sprintf(&grpname_list[i*GROUPNAME_LEN], "%s%d", default_gprefix, i+1);
+        debug("creating group name %s", &grpname_list[i*GROUPNAME_LEN]);
+    }
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int          i, err;
+    struct group *grp = NULL;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    srandom(getpid());
+
+    for (i = 0; i < lm_optB; i++) {
+        int index = (random() % optRecords) * GROUPNAME_LEN;
+
+        if (lm_optT > 1) {
+            struct group gd;
+            struct group *grp_ptr = &gd;
+            struct group *tmp_ptr;
+            char gbuf[BUFSIZE];
+
+            err = getgrnam_r( &grpname_list[index], grp_ptr, gbuf, BUFSIZE, &tmp_ptr);
+            // non-NULL err means failure and NULL result ptr means no matching
+            // entry
+            if (err) {
+                debug("error: %s -> %s",  &grpname_list[index], strerror(err));
+                res->re_errors++;
+            }
+            else if ( !tmp_ptr) {
+                debug("not found: %s",  &grpname_list[index] );
+                res->re_errors++;
+            }
+        }
+        else {
+            errno = 0;
+            grp = getgrnam( &grpname_list[index] );
+
+            if (!grp) {
+                if (errno) {
+                    debug("error: %s -> %s", &grpname_list[index], strerror(errno));
+                    res->re_errors++;
+                }
+                else {
+                    debug("not found: %s",  &grpname_list[index] );
+                    res->re_errors++;
+                }
+            }
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finiworker: deallocating structures");
+
+    free (grpname_list);
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getpwent.c b/tools/tests/libMicro/apple/getpwent.c
new file mode 100644
index 000000000..49df77e21
--- /dev/null
+++ b/tools/tests/libMicro/apple/getpwent.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <pwd.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getpwent -E  -L -S -W -B 200 -C 100
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+
+    (void) sprintf(lm_optstr, "l:");
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    return (0);
+}
+
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+    
+    switch (opt) {
+        case 'l':
+            gL1CacheEnabled = atoi(optarg);
+            break;
+    }
+    
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+//
+int
+benchmark_initrun()
+{
+    debug("\nbenchmark_initrun");
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i;
+    struct passwd *passwd;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+
+        errno = 0;      // this is needed explicitly due to getpwent() design
+        passwd = getpwent();
+
+        if (!passwd) {
+            if (errno) {
+                debug("error: %s", strerror(errno));
+                res->re_errors++;
+            }
+            else {
+                // will not counted toward libmicro error
+                setpassent(1);  // rewind to the beginning of passwd file
+            }
+        }
+        else {
+            debug("pw_name: %s", passwd->pw_name);
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getpwnam.c b/tools/tests/libMicro/apple/getpwnam.c
new file mode 100644
index 000000000..3db5e6ca4
--- /dev/null
+++ b/tools/tests/libMicro/apple/getpwnam.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <membership.h>
+#include <pwd.h>
+#include <uuid/uuid.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getpwnam -E  -L -S -W -B 200 -C 10 -c 100 -r 300 -U test_user_
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -r is the number of total users
+// -c is the cache hit rate for lookup. set to 10%, you need -c 10.
+//                ie. -B 100 -c 50 -r 1000 -C 200 (out of 1000 records, I want 50%
+//                     lookup, and batch size is 100. 
+//                     To get 50% cache hit rate, you need 500 record lookups.
+//                     Batch size will be adjusted to 500 to get 500 record
+//                     lookup in each benchmark. If -r size is smaller than -B,
+//                     then -B will not be adjusted. 
+// -u prefix: the user name prefix to use in front the user number as the
+//		login name to lookup
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+// temporary buffer size
+#define BUFSIZE 200
+
+// the number of record lookup to issue is covered by standard option optB
+static int  optRecords =    100;  // the number of total records
+static int  optCachehit =   100;  // specify cache hit rate (% of record re-lookup)
+
+// This will use local users (local_test_*)
+static char *default_uprefix = "local_test_";
+
+#define USERNAME_LEN	20
+static char *username_list;
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "l:c:r:u:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n     ------- getpwnam specific options (default: *)\n"
+                "       [-c hitrate%% (100%%*)]\n"
+                "       [-r total number of records (100*)]\n"
+		"	[-u username_prefix (local_test_)]\n"
+                "\n" );
+    return (0);
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'c':    // cache hit rate. 100% means lookup the same records over and over
+        optCachehit = atoi(optarg);
+        debug("optCachehit = %d\n", optCachehit);
+        if (optCachehit > 100 || optCachehit < 0) {
+            printf("cache hit rate should be in between 0%% and 100%%");
+            return (-1);
+        }
+        break;
+
+    case 'l':
+        gL1CacheEnabled = atoi(optarg);
+        break;
+
+    case 'r':    // total number of records. default is 100
+        optRecords = atoi(optarg);
+        debug("optRecords = %d\n", optRecords);
+        break;
+
+    case 'u':
+	default_uprefix = strdup(optarg);
+	debug("default_uprefix = %s\n", default_uprefix);
+	break;
+
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// moved template init from benchmark_initworker -> benchmark_initrun
+//  since username_list is static across threads and processes
+//
+
+int
+benchmark_initrun()
+{
+    int i;
+
+    debug("\nbenchmark_initrun");
+
+    // Adjust # of record lookups to reflect cache hit rate
+    if (optCachehit < 100) {
+        optRecords  = (int) ((float) optRecords * ((float) optCachehit / 100));
+        debug("# of records adjusted to %d for cache hit rate %d%%\n", optRecords, optCachehit);
+    }
+
+    // if batch size (one benchmark run) is less than the number records, adjust
+    // it to match the number record lookups in one batch run
+    if (lm_optB < optRecords) {
+        lm_optB = optRecords;
+        debug("Adjusting batch size to %d to match the lookups required in benchmark run\n", lm_optB);
+    }
+
+    // create an array of usernames to use in benchmark before their use
+    // realtime generation in benchmark effects performance measurements
+    username_list = malloc( optRecords * USERNAME_LEN );
+    if (!username_list) {
+        debug ("malloc error");
+        exit (1);
+    }
+
+    for (i = 0; i < optRecords; i++) {
+        sprintf(&username_list[i*USERNAME_LEN], "%s%d", default_uprefix, i+1);
+        // debug("creating username %s", &username_list[i*USERNAME_LEN]);
+    }
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, err;
+    struct passwd *passwd = NULL;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+        int index = (random() % optRecords) * USERNAME_LEN;
+
+        if (lm_optT > 1) {
+            struct passwd pd;
+            struct passwd *pwd_ptr = &pd;
+            struct passwd *tmp_ptr;
+            char pbuf[BUFSIZE];
+
+            err = getpwnam_r( &username_list[index], pwd_ptr, pbuf, BUFSIZE, &tmp_ptr);
+            if (err) {
+                printf("error: %s -> %s", &username_list[index], strerror(err));
+                res->re_errors++;
+            }
+            else if (!tmp_ptr) {
+                debug("not found: %s", &username_list[index]);
+                res->re_errors++;
+            }
+        }
+        else {
+            errno = 0;
+            passwd = getpwnam( &username_list[index] );
+
+            if (!passwd) {
+                if (errno) {
+                    debug("error: %s -> %s", &username_list[index], strerror(errno));
+                    res->re_errors++;
+                }
+                else {
+                    debug("not found: %s", &username_list[index]);
+                    res->re_errors++;
+                }
+            }
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun: deallocating structures");
+
+    free (username_list);
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/getpwuid.c b/tools/tests/libMicro/apple/getpwuid.c
new file mode 100644
index 000000000..c33149125
--- /dev/null
+++ b/tools/tests/libMicro/apple/getpwuid.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <pwd.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    getpwuid -E  -L -S -W -B 200 -C 10 -c 100 -u 5000-5200
+//
+//      libMicro default benchmark run options are "-E -L -S -W -C 200"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -c is the cache hit rate for lookup. set to 10%, you need -c 10.
+//                ie. -B 100 -c 50 -u 5000-5199
+//                     out of 200 UIDs, I want 50% cache hit, and batch size is 100. 
+// -u uid range in the form of "min-max". For example, -u 5000-5200
+//
+
+extern int gL1CacheEnabled;
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+// temporary buffer size
+#define BUFSIZE 200
+#define INVALID_ID  -1
+
+static uid_t  uid_min = INVALID_ID;
+static int    uid_range = 0;  // uid_max = uid_min + uid_range
+
+// the number of record lookup to issue is covered by standard option optB
+static int    optCachehit =   100;  // specify cache hit rate (% of record re-lookup)
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr, "l:c:u:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n     ------- getpwuid specific options (default: *)\n"
+                "       [-c hitrate%% (100%%*)]\n"
+                "       [-u UID range (min-max)]\n"
+                "       [-l]\n"
+                "\n" );
+    return (0);
+}
+
+int
+parse_range(uid_t *min, int *offset, char *buf)
+{
+    char *value, *tmp_ptr = strdup(buf);
+    int range=0;
+    debug("parse_range");
+
+    value = strsep(&tmp_ptr, "-");
+    *min = atoi(value);
+    debug("min = %d", *min);
+    if (tmp_ptr) {
+        value = strsep(&tmp_ptr, "-");
+        range = atoi(value);
+        if (range < *min) {
+            printf("max id should be larger than min id\n");
+            return -1;
+        }
+        *offset = range - *min + 1;
+        debug("range = %d", *offset);
+    }
+    else {
+        printf("argument should be in the form of min-max\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'c':    // cache hit rate. 100% means lookup the same records over and over
+        optCachehit = atoi(optarg);
+        debug("optCachehit = %d\n", optCachehit);
+        if (optCachehit > 100 || optCachehit < 0) {
+            printf("cache hit rate should be in between 0%% and 100%%");
+            return (-1);
+        }
+        break;
+            
+    case 'l':
+        gL1CacheEnabled = atoi(optarg);
+        break;
+
+    case 'u':    // UID range
+        return parse_range( &uid_min, &uid_range, optarg);
+        break;
+
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// moved template init from benchmark_initworker -> benchmark_initrun
+//  since username_list is static across threads and processes
+//
+int
+benchmark_initrun()
+{
+    uid_t i, range;
+    struct passwd *passwd = NULL;
+
+    debug("\nbenchmark_initrun");
+
+    // To satisfy cache hit rate, lookup cachehit percentage of the UIDs here 
+    if (optCachehit < 100) {
+    
+        range = (int) ((float) uid_range * ((float) optCachehit / 100));
+        for (i = uid_min; i < uid_min+range; i++)
+            passwd = getpwuid( i );
+    }
+
+    return (0);
+}
+
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, err;
+    struct passwd *passwd = NULL;
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+        uid_t uid = uid_min + random() % uid_range ;
+
+        // XXX No need to use getpwuid_r() since getpwuid() is already thread-safe
+        // so it depends on what you want to exercise
+        if (lm_optT > 1) {
+            struct passwd pd;
+            struct passwd *pwd_ptr = &pd;
+            struct passwd *tmp_ptr;
+            char pbuf[BUFSIZE];
+
+            err = getpwuid_r( uid, pwd_ptr, pbuf, BUFSIZE, &tmp_ptr );
+            if (err) {
+                debug("error: %s", strerror(err));
+                res->re_errors++;
+            }
+            else if (!tmp_ptr) {
+                debug("not found: UID %d", uid);
+                res->re_errors++;
+            }
+        }
+        else {
+            errno = 0;
+            passwd = getpwuid( uid );
+
+            if (!passwd) {
+                if (errno) {
+                    debug("error: %s", strerror(errno));
+                    res->re_errors++;
+                }
+                else {
+                    debug("not found: UID %d", uid);
+                    res->re_errors++;
+                }
+            }
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    // tsd_t    *ts = (tsd_t *)tsd;
+    debug("benchmark_finirun ");
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/lmbench_bw_mem.c b/tools/tests/libMicro/apple/lmbench_bw_mem.c
index 4b5aa07c3..9963fe637 100644
--- a/tools/tests/libMicro/apple/lmbench_bw_mem.c
+++ b/tools/tests/libMicro/apple/lmbench_bw_mem.c
@@ -76,6 +76,10 @@
 #define	TRIES		11	// value from bench.h in lmbench
 #define TYPE    	int
 
+/* Added as part of the fix for <rdar://problem/7508837> */
+static volatile u_int64_t       use_result_dummy;
+void use_int(int result) { use_result_dummy += result; }
+
 /*
  * rd - 4 byte read, 32 byte stride
  * wr - 4 byte write, 32 byte stride
@@ -214,6 +218,7 @@ rd(iter_t iterations, void *cookie)
 		p +=  128;
 	    }
 	}
+	use_int(sum);
 }
 #undef	DOIT
 
@@ -257,6 +262,7 @@ rdwr(iter_t iterations, void *cookie)
 		p +=  128;
 	    }
 	}
+	use_int(sum);
 }
 #undef	DOIT
 
@@ -362,6 +368,7 @@ frd(iter_t iterations, void *cookie)
 		p += 128;
 	    }
 	}
+	use_int(sum);
 }
 #undef	DOIT
 
@@ -616,30 +623,30 @@ benchmark(void *tsd, result_t *res)
 		return(-1);
 	}
 
-	if (strcmp(opt_what, "cp") ||
-	    strcmp(opt_what, "fcp") || strcmp(opt_what, "bcopy")) {
+	if (STREQ(opt_what, "cp") ||
+	    STREQ(opt_what, "fcp") || STREQ(opt_what, "bcopy")) {
 		ts->need_buf2 = 1;
 	}
 	
 	for (i = 0 ; i < lm_optB ; i++)
 	{
-		if (strcmp(opt_what, "rd")) {
+		if (STREQ(opt_what, "rd")) {
 			rd( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "wr")) {
+		} else if (STREQ(opt_what, "wr")) {
 			wr( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "rdwr")) {
+		} else if (STREQ(opt_what, "rdwr")) {
 			rdwr( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "cp")) {
+		} else if (STREQ(opt_what, "cp")) {
 			mcp( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "frd")) {
+		} else if (STREQ(opt_what, "frd")) {
 			frd( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "fwr")) {
+		} else if (STREQ(opt_what, "fwr")) {
 			fwr( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "fcp")) {
+		} else if (STREQ(opt_what, "fcp")) {
 			fcp( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "bzero")) {
+		} else if (STREQ(opt_what, "bzero")) {
 			loop_bzero( ts->repetitions, tsd ); 
-		} else if (strcmp(opt_what, "bcopy")) {
+		} else if (STREQ(opt_what, "bcopy")) {
 			loop_bcopy( ts->repetitions, tsd ); 
 		} else {
 			return(-1);
diff --git a/tools/tests/libMicro/apple/mbr_check_membership.c b/tools/tests/libMicro/apple/mbr_check_membership.c
new file mode 100644
index 000000000..91fbffeca
--- /dev/null
+++ b/tools/tests/libMicro/apple/mbr_check_membership.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <membership.h>
+#include <pwd.h>
+#include <uuid/uuid.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    mbr_check_membership -E  -L -S -W -B 200 -C 10 -g 1211-1213 -u 5000-5200
+//
+//      libMicro default benchmark run options are "-E -C 200 -L -S -W"
+//
+// -B is batch size: loop iteration per each benchmark run. (default: 100)
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -u uid range in the form of "min-max". For example, -u 5000-5200
+// -g gid range or gid
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+} tsd_t;
+
+#define INVALID_ID  -1
+
+static uid_t uid_min = INVALID_ID;
+static gid_t gid_min = INVALID_ID;;
+
+static int   uid_range = 0;  // uid_max = uid_min + uid_range
+static int   gid_range = 0; // gid_max = gid_min + gid_range
+
+static uuid_t *u_uuid_list = NULL;  // user uuid list
+static uuid_t *g_uuid_list = NULL;  // group uuid list
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "g:u:");
+
+    lm_tsdsize = sizeof(tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n       ------- mbr_check_membership specific options\n"
+                "       [-u UID range (min-max)]\n"
+                "       [-g GID or GID range (gid or min-max)]\n"
+                "\n" );
+    return (0);
+}
+
+int
+parse_range(uint *min, int *offset, char *buf)
+{
+    char *value, *tmp_ptr = strdup(buf);
+    int range=0;
+    debug("parse_range");
+
+    value = strsep(&tmp_ptr, "-");
+    *min = atoi(value);
+    debug("min = %d", *min);
+    if (tmp_ptr) {
+        value = strsep(&tmp_ptr, "-");
+        range = atoi(value);
+        if (range < *min) {
+            printf("max id should be larger than min id\n");
+            return -1;
+        }
+        *offset = range - *min;
+        debug("range = %d", *offset);
+    }
+
+    return 0;
+
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'g':    // GID or GID range
+        return parse_range( &gid_min, &gid_range, optarg);
+        break;
+
+    case 'u':    // UID range
+        return parse_range( &uid_min, &uid_range, optarg);
+        break;
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// 1. make local or network node for OD query
+// 2. create user key 
+int
+benchmark_initrun(void *tsd)
+{
+    int i;
+    //tsd_t *ts = (tsd_t *)tsd;
+
+    debug("benchmark_initrun");
+
+    if (uid_min == INVALID_ID || gid_min == INVALID_ID) {
+        printf("Both -u and -g need to be specified\n");
+        return -1;
+    }
+
+    // create an array of usernames to use in benchmark before their use
+    // realtime generation in benchmark effects performance measurements
+
+    u_uuid_list = malloc( sizeof(*u_uuid_list) * (uid_range+1) );
+    g_uuid_list = malloc( sizeof(*g_uuid_list) * (gid_range+1) );
+
+    for (i = 0; i <= uid_range; i++) {
+
+        if (mbr_uid_to_uuid(uid_min+i, u_uuid_list[i])) {
+            printf("error converting uid %d to UUID\n", uid_min+i);
+            return -1;
+        }
+    }
+
+    for (i = 0; i <= gid_range; i++) {
+
+        if (mbr_gid_to_uuid(gid_min+i, g_uuid_list[i])) {
+            printf("error converting gid %d to UUID\n", gid_min+i);
+            return -1;
+        }
+    }
+
+    return (0);
+}
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    int         i, index, gindex, err, isMember=0;
+    //tsd_t *ts = (tsd_t *)tsd;
+
+#ifdef DEBUG
+    uid_t       uid;
+    int         id_type;
+#endif
+
+    res->re_errors = 0;
+
+    // debug("in to benchmark - optB = %i", lm_optB);
+
+    for (i = 0; i < lm_optB; i++) {
+
+        index = random() % (uid_range+1);
+        gindex = random() % (gid_range+1);
+        err = mbr_check_membership(u_uuid_list[index], g_uuid_list[gindex], &isMember);
+
+#ifdef DEBUG
+        //mbr_uuid_to_id(u_uuid_list[index], &uid, &id_type);
+        //debug ("loop %d: uid %d is %s (gindex %d)", i, uid, (isMember)?"member":"not a member", gindex);
+#endif
+
+        if (err) {
+            if (err == EIO) {
+                debug("mbr_check_membership returned EIO. Unable to communicate with DS daemon");
+            }
+            else if (err == ENOENT) {
+                debug("mbr_check_membership returned ENOENT. User not found");
+            }
+            else {
+                debug("error: %s", strerror(err));
+            }
+            res->re_errors++;
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+
+// We need to release all the structures we allocated in benchmark_initrun()
+int
+benchmark_finirun(void *tsd)
+{
+    //tsd_t    *ts = (tsd_t *)tsd;
+	
+    debug("benchmark_result: deallocating structures");
+
+    free(u_uuid_list);
+    free(g_uuid_list);
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/mbr_check_service_membership.c b/tools/tests/libMicro/apple/mbr_check_service_membership.c
new file mode 100644
index 000000000..47e3267fa
--- /dev/null
+++ b/tools/tests/libMicro/apple/mbr_check_service_membership.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <membership.h>
+#include <pwd.h>
+#include <uuid/uuid.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    mbr_check_service_membership -E  -L -S -W -B 200 -C 10 -r 100 -s "SACL" -u user_prefix
+//
+//      libMicro default benchmark run options are "-E -C 200 -L -S -W"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -r is the number of total records.
+// -s is SACL string: ie. "ssh"
+// -u user_prefix that preceeds the user number
+
+typedef struct {
+	uuid_t *uuid_list;
+} tsd_t;
+
+// the number of record lookup to issue is covered by standard option optB
+static int  optRecords =    100;  // the number of total records
+static int  optSACL = 0;          // option SACL specified?
+
+static char **sacl = NULL;
+static char *default_sacl[] = { "com.apple.access_dsproxy",
+                                "com.apple.access_screensharing",
+                                "com.apple.access_ssh",
+                                ""};
+static int  numSACL = 3;          // number of SACLs
+
+
+// This will use local users (local_test_*)
+static char *default_uprefix = "local_test_";
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "r:s:u:");
+
+    lm_tsdsize = sizeof(tsd_t);
+    lm_defB = 100;
+
+    (void) sprintf(lm_usage,
+                "\n       ------- mbr_check_service_membership specific options (default: *)\n"
+                "       [-r total number of records (100*)]\n"
+                "       [-s SACL]\n"
+		"	[-u user_prefix]\n"
+                "\n" );
+    return (0);
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'r':    // total number of records. default is 100
+        optRecords = atoi(optarg);
+        debug("optRecords = %d\n", optRecords);
+        break;
+
+    case 's':    // SACL
+        if (optSACL) {
+            printf("SACL already specified. Skipping");
+            break;
+        }
+        sacl = malloc(2 * sizeof(char *));
+        if (!sacl) {
+            printf("Error: no memory available for strdup\n");
+            return -1;
+        }
+        sacl[0] = strdup(optarg);
+        sacl[1] = "";
+        optSACL = 1;
+        numSACL = 1;
+
+        break;
+
+    case 'u':
+	default_uprefix = strdup(optarg);
+	debug("default_uprefix = %s\n", default_uprefix);
+	break;
+
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+
+int
+benchmark_initrun()
+{
+    int i;
+    debug("benchmark_initrun");
+
+    if (!sacl) {
+        sacl = default_sacl;
+    }
+
+    for (i=0; strcmp(sacl[i], "") && i <= numSACL; i++) {
+        debug("SACL = %s", sacl[i]);
+    }
+
+    return (0);
+}
+
+// Initialize all structures that will be used in benchmark()
+// 1. make local or network node for OD query
+// 2. create user key 
+int
+benchmark_initworker(void *tsd)
+{
+    int i;
+    tsd_t *ts = (tsd_t *)tsd;
+    char *uprefix = default_uprefix;              // local user is default
+    char username[30] = "";
+    struct passwd *info = NULL;
+
+    debug("benchmark_initworker");
+
+    // create an array of usernames to use in benchmark before their use
+    // realtime generation in benchmark effects performance measurements
+
+    ts->uuid_list = calloc(optRecords, sizeof(uuid_t));
+
+    for (i = 0; i < optRecords; i++) {
+
+        sprintf(username, "%s%d", uprefix, i+1);
+        info = getpwnam(username);
+        if (!info) {
+            debug ("error converting username %s to uuid", username);
+            exit (1);
+        }
+
+        (void) mbr_uid_to_uuid(info->pw_uid, ts->uuid_list[i]);
+
+#if DEBUG
+        char buf[30];
+        uid_t uid;
+        int id_type; 
+        uuid_unparse(ts->uuid_list[i], buf);
+        mbr_uuid_to_id(ts->uuid_list[i], &uid, &id_type);
+        debug ("username (%s), uid %d, uuid %s, back to uid %d", username, info->pw_uid, buf, uid);
+#endif
+    }
+
+    // if batch size (one benchmark run) is less than the number records, adjust
+    // it to match the number record lookups in one batch run
+    if (optRecords < lm_optB) {
+        lm_optB = optRecords;
+        debug("Reducing batch size to %d to match the record #\n", lm_optB);
+    }
+
+    debug("benchmark_initworker");
+    return (0);
+}
+
+int
+benchmark(void *tsd, result_t *res)
+{
+    tsd_t *ts = (tsd_t *)tsd;
+    int         i;
+    int         err;
+    int         isMember=0;
+    char        *sacl_chosen;
+
+#ifdef DEBUG
+    uid_t       uid;
+    int         id_type;
+#endif
+
+    res->re_errors = 0;
+
+    debug("in to benchmark - optB = %i", lm_optB);
+    for (i = 0; i < lm_optB; i++) {
+
+        sacl_chosen = sacl[random() % numSACL];
+        err = mbr_check_service_membership(ts->uuid_list[i], sacl_chosen, &isMember);
+
+#ifdef DEBUG
+        mbr_uuid_to_id(ts->uuid_list[i], &uid, &id_type);
+        debug ("loop %d: uid %d is %s a member of %s", i, uid, (isMember) ? "" : "not", sacl_chosen);
+#endif
+
+        if (err) {
+            debug("error: %s", strerror(err));
+            res->re_errors++;
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+
+// We need to release all the structures we allocated in benchmark_initworker()
+int
+benchmark_finiworker(void *tsd)
+{
+    tsd_t *ts = (tsd_t *)tsd;
+    debug("benchmark_result: deallocating structures");
+
+    free(ts->uuid_list);
+
+    return (0);
+}
+
+int
+benchmark_finirun(void *tsd)
+{
+	if (optSACL)
+        free(sacl);
+	
+	return 0;
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("benchmark_result");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/apple/od_query_create_with_node.c b/tools/tests/libMicro/apple/od_query_create_with_node.c
new file mode 100644
index 000000000..7cf18ce6d
--- /dev/null
+++ b/tools/tests/libMicro/apple/od_query_create_with_node.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2006 Apple Inc.  All Rights Reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+
+// add additional headers needed here.
+
+#include "../libmicro.h"
+#include <CoreFoundation/CFArray.h>
+#include <CoreFoundation/CFString.h>
+#include <CoreFoundation/CFDictionary.h>
+#include <OpenDirectory/OpenDirectory.h>
+#include <DirectoryService/DirectoryService.h>
+
+#if DEBUG
+# define debug(fmt, args...)    (void) fprintf(stderr, fmt , ##args)
+// # define debug(fmt, args...)    (void) fprintf(stderr, fmt "\n" , ##args)
+#else
+# define debug(fmt, args...)
+#endif
+
+
+// Correct use case
+//
+//    od_query_create_with_node -E  -L -S -W -B 200 -C 10 -c 100 -r 300
+//
+//      libMicro default benchmark run options are "-E -C 200 -L -S -W"
+//
+// -B is batch size: loop iteration per each benchmark run. Needs to match # of
+//                   real lookups. This is total number of lookups to issue.
+// -C is min sample number: how many benchmark needs to run to get proper sample
+//                          1 is mimumum, but you get at least 3 benchmark run
+//                          samples. Do not set to zero. Default is 200 for most
+//                          runs in libMicro.
+// -r is the number of total records. 
+// -c is the cache hit rate for lookup. set to 10%, you need -c 10.
+//                ie. -B 100 -c 50 -r 1000 -C 200 (out of 1000 records, I want 50%
+//                     lookup, and batch size is 100. 
+//                     To get 50% cache hit rate, you need 500 record lookups.
+//                     Batch size will be adjusted to 500 to get 500 record
+//                     lookup in each benchmark. If -r size is smaller than -B,
+//                     then -B will not be adjusted. 
+
+// Defining prefix for user and group name
+// make sure that these match the ones in LDAP records
+// ie. local_test_1 , od_test_4525, od_test_group_43, od_test_host_63
+#define LOCAL_U_PREFIX     CFSTR("local_test_")
+#define OD_U_PREFIX        CFSTR("od_test_")
+#define LOCAL_G_PREFIX     CFSTR("local_test_group_")
+#define OD_G_PREFIX        CFSTR("od_test_group_")
+#define LOCAL_H_PREFIX     CFSTR("local_test_host_")
+#define OD_H_PREFIX        CFSTR("od_test_host_")
+
+/*
+ *    Your state variables should live in the tsd_t struct below
+ */
+typedef struct {
+    ODNodeRef    node;
+} tsd_t;
+
+// dsRecTypeStandard type dictionary
+enum {rectype_users=0, rectype_groups, rectype_hosts};
+CFStringRef rectype_dict[] = { CFSTR(kDSStdRecordTypeUsers),
+                               CFSTR(kDSStdRecordTypeGroups), 
+                               CFSTR(kDSStdRecordTypeHosts) };
+
+// the number of record lookup to issue is covered by standard option optB
+static int  optRecords =    100;  // the number of total records
+static int  optCachehit =   100;  // specify cache hit rate (% of record re-lookup)
+static bool optNodeLocal =  1;    // which node to search. Local node is default
+static int  optType =       rectype_users;    // dsRecType to search for. "Users"" is the default
+static const char *nodename = "/LDAPv3/127.0.0.1";
+
+static CFStringRef *key;                // username array
+
+// parse -t option and return enum type: user, group, and host
+// called by benchmark_optswitch()
+int
+ds_rec_type(char *name)
+{
+    if (strcasecmp("u", name) == 0) {
+        return (rectype_users);
+    } else if (strcasecmp("g", name) == 0) {
+        return (rectype_groups);
+    } else if (strcasecmp("h", name) == 0) {
+        return (rectype_hosts);
+    }
+
+    return (-1);
+}
+
+int
+benchmark_init()
+{
+    debug("benchmark_init");
+    (void) sprintf(lm_optstr,  "c:n:r:t:");
+
+    lm_tsdsize = sizeof (tsd_t);
+    lm_defB = 1000;
+
+    (void) sprintf(lm_usage,
+                "\n       ------- od_query_create_with_node specific options (default: *)\n"
+                "       [-c hitrate%% (100%%*)]\n"
+                "       [-r total number of records (100*)]\n"
+                "       [-n nodename] node name to use for test\n"
+                "       [-t record type: 'u'sers, 'g'roups, 'h'osts]\n"
+                "       use -B option to specify total number of record lookups to issue"
+                "\n" );
+    return (0);
+}
+
+/*
+ * This is where you parse your lower-case arguments.
+ */
+int
+benchmark_optswitch(int opt, char *optarg)
+{
+    debug("benchmark_optswitch");
+
+    switch (opt) {
+    case 'c':    // cache hit rate. 100% means lookup the same records over and over
+        optCachehit = atoi(optarg);
+        debug("optCachehit = %d\n", optCachehit);
+        if (optCachehit > 100 || optCachehit < 0) {
+            printf("cache hit rate should be in between 0%% and 100%%");
+            return (-1);
+        }
+        break;
+
+    case 'r':    // total number of records. default is 100
+        optRecords = atoi(optarg);
+        debug("optRecords = %d\n", optRecords);
+        break;
+
+    case 'n':    // node
+        nodename = optarg;
+        break;
+
+    case 't':    // dsRecType: user, group, hots
+        optType = ds_rec_type(optarg);
+        debug("optType = %d\n", optType);
+
+        if (optType == -1) {
+            printf("wrong -t record type option\n");
+            return (-1);
+        }
+        break;
+
+    default:
+        return (-1);
+    }
+
+    return (0);
+}
+
+
+int
+benchmark_initrun()
+{
+    int i;
+    CFStringRef prefix;              // local user is default
+    
+    debug("benchmark_initrun\n");
+
+    // Adjust # of record lookups to reflect cache hit rate
+    if (optCachehit < 100) {
+        optRecords  = (int) ((float) optRecords * ((float) optCachehit / 100));
+        debug("# of records adjusted to %d for cache hit rate %d%%\n", optRecords, optCachehit);
+    }
+
+    // if batch size (one benchmark run) is less than the number records, adjust
+    // it to match the number record lookups in one batch run
+    if (lm_optB < optRecords) {
+        lm_optB = optRecords;
+        debug("Adjusting batch size to %d to match the lookups required in benchmark run\n", lm_optB);
+    }
+
+    switch (optType) {
+        case rectype_users:
+            prefix = (optNodeLocal) ? LOCAL_U_PREFIX : OD_U_PREFIX;
+            break;
+        case rectype_groups:
+            prefix = (optNodeLocal) ? LOCAL_G_PREFIX : OD_G_PREFIX;
+            break;
+        case rectype_hosts:
+            prefix = (optNodeLocal) ? LOCAL_H_PREFIX : OD_H_PREFIX;
+            break;
+    }
+    // create an array of usernames to use in benchmark before their use
+    // realtime generation in benchmark effects performance measurements
+
+    key = malloc(sizeof(CFStringRef) * optRecords);
+
+    // user, group, hosts key to lookup
+    switch (optType) {
+
+    case rectype_users:     // users query
+    case rectype_groups:    // groups query
+    case rectype_hosts:     // hosts query
+        for (i = 0; i < optRecords; i++) {
+            key[i] = CFStringCreateWithFormat( kCFAllocatorDefault, 
+                                               NULL, 
+                                               CFSTR("%@%d"), 
+                                               prefix, 
+                                               i+1);
+            // CFShow(key[i]);  // print user name to check
+        }
+        break;
+    }
+
+    return (0);
+}
+
+
+// Initialize all structures that will be used in benchmark()
+// 1. make local or network node for OD query
+// 2. create user key 
+int
+benchmark_initworker(void *tsd)
+{
+    CFErrorRef    error;
+    tsd_t *ts = (tsd_t *)tsd;
+
+    debug("benchmark_initworker: %s", (optNodeLocal) ? "local" : "network");
+
+
+    // create OD node for local or OD query
+    if (optNodeLocal) {
+        ts->node = ODNodeCreateWithNodeType(NULL, kODSessionDefault, kODNodeTypeLocalNodes, &error);
+    }
+    else {
+        CFStringRef nodenameStr = CFStringCreateWithCString(kCFAllocatorDefault, nodename, kCFStringEncodingUTF8);
+        ts->node = ODNodeCreateWithName(NULL, kODSessionDefault, nodenameStr, &error);
+        CFRelease(nodenameStr);
+    }
+
+    if (!ts->node) {
+        debug("error calling ODNodeCreateWithNodeType\n");
+        exit(1);
+    }
+
+    CFRetain (ts->node);
+
+    debug("benchmark_initworker: ODNodeRef = 0x%lx\n", ts->node);
+    return (0);
+}
+
+int
+benchmark(void *tsd, result_t *res)
+{
+
+    tsd_t        *ts = (tsd_t *)tsd;
+    int          i;
+    ODNodeRef    node;
+    CFErrorRef   error;
+    CFArrayRef   results;
+    ODQueryRef   query;
+
+   res->re_errors = 0;
+    node = ts->node;
+
+    debug("in to benchmark - optB = %i, node = 0x%lx \n", lm_optB, node);
+    for (i = 0; i < lm_optB; i++) {
+
+        debug("loop %d: querying\n", i);
+        query = ODQueryCreateWithNode(NULL,
+                        node,                        // inNode
+                        rectype_dict[optType],       // inRecordTypeOrList
+                        CFSTR(kDSNAttrRecordName),   // inAttribute
+                        kODMatchInsensitiveEqualTo,  // inMatchType
+                        key[i % optRecords],                      // inQueryValueOrList
+                        NULL,                        // inReturnAttributeOrList
+                        1,                           // inMaxResults
+                        &error);
+
+        if (query) {
+            // we do not want to factually fetch the result in benchmark run
+            // debug("loop %d: calling ODQueryCopyResults\n", i);
+            results = ODQueryCopyResults(query, FALSE, &error);
+            CFRelease(query);
+            if (results) {
+#if DEBUG
+                int c;
+                c = CFArrayGetCount(results);
+                if (c > 0) {
+                    debug("Successful run: %d results, ", c);
+                }
+                else {
+                    debug("no result for ");
+                }
+                CFShow (key[i % optRecords]);
+                debug("\n");
+#endif
+                CFRelease(results);
+            }
+            else {
+                debug("loop %d: ODQueryCopyResults returned empty result for ", i);
+                res->re_errors++;
+                CFShow (key[i % optRecords]);
+                debug("\n");
+            } // if (results)
+
+        } // if (query)
+        else {
+            res->re_errors++;
+        }
+    }
+    res->re_count = i;
+
+    return (0);
+}
+
+
+// We need to release all the structures we allocated in benchmark_initworker()
+int
+benchmark_finiworker(void *tsd)
+{
+    tsd_t    *ts = (tsd_t *)tsd;
+
+    debug("benchmark_result: deallocating structures\n");
+
+    // free the node
+    if (ts->node)
+        CFRelease (ts->node);
+    ts->node = NULL;
+
+    return (0);
+}
+
+int
+benchmark_finirun()
+{
+    int i;
+
+    for (i = 0; i < optRecords; i++){
+        CFRelease(key[i]);
+    }
+
+    free(key);
+
+    return (0);
+}
+
+char *
+benchmark_result()
+{
+    static char    result = '\0';
+    debug("\n\n# of records adjusted to %d for cache hit rate %d%%\n", optRecords, optCachehit);
+    debug("benchmark_result\n");
+    return (&result);
+}
+
diff --git a/tools/tests/libMicro/bench.sh b/tools/tests/libMicro/bench.sh
index 698557eb6..ea890e55a 100644
--- a/tools/tests/libMicro/bench.sh
+++ b/tools/tests/libMicro/bench.sh
@@ -30,6 +30,41 @@
 # Use is subject to license terms.
 #
 
+
+# usage function - defines all the options that can be given to this script.
+function usage {
+	echo "Usage"
+	echo "$0 [-l] [-h] [name of test]"
+	echo "-l               : This option runs the lmbench tests along with the default libmicro tests."
+	echo "-h               : Help. This option displays information on how to run the script. "
+	echo "[name of test]   : This option runs only the test that is specified"
+	echo ""
+	echo "Examples"
+	echo "$0               : This is the defualt execution. This will run only the default libmicro tests."
+	echo "$0 -l            : This will run the lmbench tests too "
+	echo "$0 getppid       : This will run only the getppid tests"
+	exit
+	
+}
+
+if [ $# -eq 1 ]
+then 
+	lmbench=2    # to check if only a single test is to be run. e.g, ./bench.sh getppid
+else
+	lmbench=0    # to run the default libMicro tests, without the lmbench tests.
+fi
+
+while getopts "lh" OPT_LIST
+do
+	case $OPT_LIST in 
+		l) lmbench=1;;    # to run the libmicro tests including the lmbench tests.
+		h) usage;;
+		*) usage;;
+	esac
+done
+
+
+
 tattle="./tattle"
 
 bench_version=0.4.0
@@ -121,6 +156,7 @@ printf "!Machine_name: %30s\n" "$hostname"
 printf "!OS_name:      %30s\n" `uname -s`
 printf "!OS_release:   %30s\n" `sw_vers -productVersion`
 printf "!OS_build:     %30.18s\n" "`sw_vers -buildVersion`"
+printf "!Kernel:       %30.50s\n" "`uname -v|cut -d ' ' -f 11`"
 printf "!Processor:    %30s\n" `arch`
 printf "!#CPUs:        %30s\n" $p_count
 printf "!CPU_MHz:      %30s\n" "$p_mhz"
@@ -174,10 +210,24 @@ do
 		;;
 
 	*$1*)
+		# Default execution without the lmbench tests. 
+		# checks if there is no argument passed by the user.
+		if [  $lmbench -eq 0 ]
+		then
+			string=lmbench
+			if [ "${A:0:7}" == "$string" ]
+			then
+				continue
+			fi
+		fi
+			
 		;;
-
-	*)
-		continue
+	
+	*)		
+		if [ $lmbench -ne 1 ]
+		then
+			continue
+		fi
 		;;
 	esac
 
diff --git a/tools/tests/libMicro/benchDS.sh b/tools/tests/libMicro/benchDS.sh
new file mode 100644
index 000000000..26a2bc562
--- /dev/null
+++ b/tools/tests/libMicro/benchDS.sh
@@ -0,0 +1,324 @@
+#!/bin/sh
+#
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms
+# of the Common Development and Distribution License
+# (the "License").  You may not use this file except
+# in compliance with the License.
+#
+# You can obtain a copy of the license at
+# src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL
+# HEADER in each file and include the License file at
+# usr/src/OPENSOLARIS.LICENSE.  If applicable,
+# add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your
+# own identifying information: Portions Copyright [yyyy]
+# [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+function usage {
+    echo "Usage"
+    echo "$0 [-l] [-h] <#-of-users> nodename [test match pattern]"
+    echo "-l                    : disable libinfo L1 cache"
+    echo "-h                    : Help. This option displays information on how to run the script. "
+    echo "[test match pattern]  : This option runs only the test that is specified"
+    echo                                                                                                                                                                                                       
+    echo "You must have set up users, groups, and SACLs with od_account_create"
+    echo "with the same number of user accounts."                                                                                                                                                               
+    echo "Supply a pattern to match to run a subset of tests"
+    exit 1
+}
+
+# default to libinfo cache enabled
+L1CACHE="1"
+
+while getopts "lh" OPT_LIST
+do
+    case $OPT_LIST in 
+        l) L1CACHE="0";;    # to run the libmicro tests with l1cache disabled
+        h) usage;;
+        *) usage;;
+    esac
+done
+
+shift `expr $OPTIND - 1`
+
+if [ $# -lt 2 -o $# -gt 3 ]; then
+    usage
+fi
+
+tattle="./tattle"
+
+bench_version=0.4.0
+libmicro_version=`$tattle -V`
+
+case $libmicro_version in
+$bench_version)
+	;;
+*)
+	echo "ERROR: libMicro version doesn't match 'bench' script version"
+	exit 1
+esac
+
+TMPROOT=/private/tmp/libmicro.$$
+VARROOT=/private/var/tmp/libmicro.$$
+mkdir -p $TMPROOT
+mkdir -p $VARROOT
+trap "rm -rf $TMPROOT $VARROOT && exit" 0 2
+
+TFILE=$TMPROOT/data
+IFILE=$TMPROOT/ifile
+TDIR1=$TMPROOT/0/1/2/3/4/5/6/7/8/9
+TDIR2=$TMPROOT/1/2/3/4/5/6/7/8/9/0
+VFILE=$VARROOT/data
+VDIR1=$VARROOT/0/1/2/3/4/5/6/7/8/9
+VDIR2=$VARROOT/1/2/3/4/5/6/7/8/9/0
+
+OPTS="-E -C 200 -L -S -W"
+
+dd if=/dev/zero of=$TFILE bs=1024k count=10 2>/dev/null
+dd if=/dev/zero of=$VFILE bs=1024k count=10 2>/dev/null
+mkdir -p $TDIR1 $TDIR2
+mkdir -p $VDIR1 $VDIR2
+
+touch $IFILE
+/usr/bin/touch /private/var/tmp/lmbench
+
+
+# produce benchmark header for easier comparisons
+
+hostname=`uname -n`
+
+if [ -f /usr/sbin/psrinfo ]; then
+	p_count=`psrinfo|wc -l`
+	p_mhz=`psrinfo -v | awk '/operates/{print $6 "MHz"; exit }'`
+	p_type=`psrinfo -vp 2>/dev/null | awk '{if (NR == 3) {print $0; exit}}'` 
+	p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+fi
+
+if [ -f /proc/cpuinfo ]; then
+	p_count=`egrep processor /proc/cpuinfo | wc -l`
+	p_mhz=`awk -F: '/cpu MHz/{printf("%5.0f00Mhz\n",$2/100); exit}' /proc/cpuinfo`
+	p_type=`awk -F: '/model name/{print $2; exit}' /proc/cpuinfo`
+	p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+else
+## Mac OS X specific stuff
+# first, get ugly output, in case pretty output isn't available
+#
+	p_count=`sysctl -n hw.physicalcpu`
+	p_mhz=`sysctl -n hw.cpufrequency`
+	p_type=`sysctl -n hw.model`
+
+if [ -x /usr/sbin/system_profiler ]; then
+	# <rdar://4655981> requires this hunk of work-around
+	# grep the XML for the characteristic we need. The key appears twice, so grep for the useful key (with 'string')
+	# use sed to strip off the <string></string> and the tabs in front of the string.  So much work for so little result.
+	#
+		p_mhz=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+			grep -A1 current_processor_speed | grep string | \
+			sed -E 's/<string>(.+)<\/string>/\1/' | sed 's-	--g'`
+		p_type=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+			grep -A1 cpu_type | grep string | \
+			sed -E 's/<string>(.+)<\/string>/\1/' | sed 's-	--g'`
+fi
+
+# look for en0 (usually ethernet) if that isn't there try en1 (usually wireless) else give up
+	p_ipaddr=`ipconfig getpacket en0 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+	if [ ! $p_ipaddr  ]; then
+		p_ipaddr=`ipconfig getpacket en1 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+	elif [ ! $p_ipaddr ]; then
+		p_ipaddr="unknown"
+	fi
+fi
+
+printf "\n\n!Libmicro_#:   %30s\n" $libmicro_version
+printf "!Options:      %30s\n" "$OPTS"
+printf "!Machine_name: %30s\n" "$hostname"
+printf "!OS_name:      %30s\n" `uname -s`
+printf "!OS_release:   %30s\n" `sw_vers -productVersion`
+printf "!OS_build:     %30.18s\n" "`sw_vers -buildVersion`"
+printf "!Processor:    %30s\n" `arch`
+printf "!#CPUs:        %30s\n" $p_count
+printf "!CPU_MHz:      %30s\n" "$p_mhz"
+printf "!CPU_NAME:     %30s\n" "$p_type"
+printf "!IP_address:   %30s\n" "$p_ipaddr"
+printf "!Run_by:       %30s\n" $LOGNAME
+printf "!Date:	       %30s\n" "`date '+%D %R'`"
+printf "!Compiler:     %30s\n" `$tattle -c`
+printf "!Compiler Ver.:%30s\n" "`$tattle -v`"
+printf "!sizeof(long): %30s\n" `$tattle -s`
+printf "!extra_CFLAGS: %30s\n" "`$tattle -f`"
+printf "!TimerRes:     %30s\n\n\n" "`$tattle -r`"
+ 
+bin_dir="$TMPROOT/bin"
+
+mkdir -p $bin_dir
+cp bin-*/exec_bin $bin_dir/$A
+
+cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A
+
+newline=0
+
+# We commonly want to adjust this script for the number of users
+# and configuration of the accounts and configuration being tested.
+#
+# Users:
+NUSERS=$1
+NODENAME=$2
+UID_BASE=5000
+UID_END=`expr $UID_BASE + $NUSERS - 1`
+USER_PREFIX=od_test_
+#
+# Groups:
+GID_ALL_USERS=1211
+GID_NO_USERS=1212
+GROUP_BASE=od_test_group
+#
+# getaddrinfo on hosts:
+HOST_BASE=sfs0
+HOST_RANGE=1-8
+
+#
+# Everything below the while loop is input for the while loop
+# if you have any tests which can't run in the while loop, put
+# them above this comment
+#
+while read A B
+do
+	# $A contains the command, $B contains the arguments
+	# we echo blank lines and comments
+	# we skip anything which fails to match *$1* (useful
+	# if we only want to test one case, but a nasty hack)
+
+	case $A in
+	\#*)
+		echo "$A $B"
+		newline=1
+		continue
+		;;
+	
+	"")
+		if [ $newline -eq 1 ]
+		then
+			newline=0
+			echo
+			echo
+		fi
+
+		continue
+		;;
+
+	*$3*)
+		;;
+
+	*)
+		continue
+		;;
+	esac
+
+	if [ ! -f $bin_dir/$A ]
+	then
+		cp bin-*/$A $bin_dir/$A
+	fi
+
+	echo
+
+	(cd $TMPROOT && eval "bin/$A $B")
+
+	echo
+	echo
+done <<.
+
+# -P <# procs>
+# -T <# threads> - exclusive!
+
+# mbr_check_service_membership()
+mbr_check_service_membership	$OPTS -N "mbr_check_service_membership" -s libMicro -u ${USER_PREFIX} -r ${NUSERS}
+mbr_check_service_membership	$OPTS -N "mbr_check_service_membership_t2" -T 2 -s libMicro -u ${USER_PREFIX} -r ${NUSERS}
+mbr_check_service_membership	$OPTS -N "mbr_check_service_membership_t4" -T 4 -s libMicro -u ${USER_PREFIX} -r ${NUSERS}
+mbr_check_service_membership	$OPTS -N "mbr_check_service_membership_p2" -P 2 -s libMicro -u ${USER_PREFIX} -r ${NUSERS}
+mbr_check_service_membership	$OPTS -N "mbr_check_service_membership_p4" -P 4 -s libMicro -u ${USER_PREFIX} -r ${NUSERS}
+
+# getpwnam()
+getpwnam			$OPTS -N "getpwnam" -l ${L1CACHE} -r ${NUSERS} -u ${USER_PREFIX}
+getpwnam			$OPTS -N "getpwnam_t2" -T 2 -l ${L1CACHE} -r ${NUSERS} -u ${USER_PREFIX}
+getpwnam			$OPTS -N "getpwnam_p2" -P 2 -l ${L1CACHE} -r ${NUSERS} -u ${USER_PREFIX}
+
+# mbr_check_membership()
+mbr_check_membership		$OPTS -N "mbr_check_membership" -u ${UID_BASE}-${UID_END} -g ${GID_ALL_USERS}-${GID_NO_USERS}
+mbr_check_membership		$OPTS -N "mbr_check_membership_t2" -u ${UID_BASE}-${UID_END} -g ${GID_ALL_USERS}-${GID_NO_USERS} -T 2
+mbr_check_membership		$OPTS -N "mbr_check_membership_t4" -u ${UID_BASE}-${UID_END} -g ${GID_ALL_USERS}-${GID_NO_USERS} -T 4
+mbr_check_membership		$OPTS -N "mbr_check_membership_p2" -u ${UID_BASE}-${UID_END} -g ${GID_ALL_USERS}-${GID_NO_USERS} -P 2
+mbr_check_membership		$OPTS -N "mbr_check_membership_p4" -u ${UID_BASE}-${UID_END} -g ${GID_ALL_USERS}-${GID_NO_USERS} -P 4
+
+# getpwuid()
+getpwuid			$OPTS -N "getpwuid" -l ${L1CACHE} -u ${UID_BASE}-${UID_END}
+getpwuid			$OPTS -N "getpwuid_t2" -l ${L1CACHE} -u ${UID_BASE}-${UID_END} -T 2
+getpwuid			$OPTS -N "getpwuid_t4" -l ${L1CACHE} -u ${UID_BASE}-${UID_END} -T 4
+getpwuid			$OPTS -N "getpwuid_p2" -l ${L1CACHE} -u ${UID_BASE}-${UID_END} -P 2
+getpwuid			$OPTS -N "getpwuid_p4" -l ${L1CACHE} -u ${UID_BASE}-${UID_END} -P 4
+
+# getgrgid()
+getgrgid			$OPTS -N "getgrgid" -l ${L1CACHE} -g ${GID_ALL_USERS}-${GID_NO_USERS}
+getgrgid			$OPTS -N "getgrgid_t2" -l ${L1CACHE} -g ${GID_ALL_USERS}-${GID_NO_USERS} -T 2
+getgrgid			$OPTS -N "getgrgid_t4" -l ${L1CACHE} -g ${GID_ALL_USERS}-${GID_NO_USERS} -T 4
+getgrgid			$OPTS -N "getgrgid_p2" -l ${L1CACHE} -g ${GID_ALL_USERS}-${GID_NO_USERS} -P 2
+getgrgid			$OPTS -N "getgrgid_p4" -l ${L1CACHE} -g ${GID_ALL_USERS}-${GID_NO_USERS} -P 4
+
+# getpwent()
+getpwent			$OPTS -N "getpwent" -l ${L1CACHE} 
+getpwent			$OPTS -N "getpwent_t2" -l ${L1CACHE} -T 2
+getpwent			$OPTS -N "getpwent_t4" -l ${L1CACHE} -T 4
+getpwent			$OPTS -N "getpwent_p2" -l ${L1CACHE} -P 2
+getpwent			$OPTS -N "getpwent_p4" -l ${L1CACHE} -P 4
+
+# getgrent()
+getgrent			$OPTS -N "getgrent" -l ${L1CACHE} 
+getgrent			$OPTS -N "getgrent_t2" -l ${L1CACHE} -T 2
+getgrent			$OPTS -N "getgrent_t4" -l ${L1CACHE} -T 4
+getgrent			$OPTS -N "getgrent_p2" -l ${L1CACHE} -P 2
+getgrent			$OPTS -N "getgrent_p4" -l ${L1CACHE} -P 4
+
+# getaddrinfo() host
+#getaddrinfo_host		$OPTS -N "getaddrinfo_host" -r ${HOST_RANGE} -h ${HOST_BASE}%d
+#getaddrinfo_host		$OPTS -N "getaddrinfo_host_t2" -r ${HOST_RANGE} -h ${HOST_BASE}%d -T 2
+#getaddrinfo_host		$OPTS -N "getaddrinfo_host_t4" -r ${HOST_RANGE} -h ${HOST_BASE}%d -T 4
+#getaddrinfo_host		$OPTS -N "getaddrinfo_host_p2" -r ${HOST_RANGE} -h ${HOST_BASE}%d -P 2
+#getaddrinfo_host		$OPTS -N "getaddrinfo_host_p4" -r ${HOST_RANGE} -h ${HOST_BASE}%d -P 4
+
+# getaddrinfo() port
+getaddrinfo_port		$OPTS -N "getaddrinfo_port" -l ${L1CACHE} 
+getaddrinfo_port		$OPTS -N "getaddrinfo_port_t2" -l ${L1CACHE} -T 2
+getaddrinfo_port		$OPTS -N "getaddrinfo_port_t4" -l ${L1CACHE} -T 4
+getaddrinfo_port		$OPTS -N "getaddrinfo_port_p2" -l ${L1CACHE} -P 2
+getaddrinfo_port		$OPTS -N "getaddrinfo_port_p4" -l ${L1CACHE} -P 4
+
+# getgrnam()
+getgrnam			$OPTS -N "getgrnam" -l ${L1CACHE} -g ${GROUP_BASE} -r 2
+getgrnam			$OPTS -N "getgrnam_t2" -l ${L1CACHE} -T 2 -g ${GROUP_BASE} -r 2
+getgrnam			$OPTS -N "getgrnam_t4" -l ${L1CACHE} -T 4 -g ${GROUP_BASE} -r 2
+getgrnam			$OPTS -N "getgrnam_p2" -l ${L1CACHE} -P 2 -g ${GROUP_BASE} -r 2
+getgrnam			$OPTS -N "getgrnam_p4" -l ${L1CACHE} -P 4 -g ${GROUP_BASE} -r 2
+
+# ODQueryCreateWithNode()
+od_query_create_with_node	$OPTS -N "ODQueryCreateWithNode_cache_${NUSERS}u" -c 50 -r ${NUSERS} -t u -B 50 -n ${NODENAME}
+od_query_create_with_node	$OPTS -N "ODQueryCreateWithNode_cache_${NUSERS}u_t2" -T 2 -c 50 -r ${NUSERS} -t u -B 50 -n ${NODENAME}
+od_query_create_with_node	$OPTS -N "ODQueryCreateWithNode_cache_${NUSERS}u_t4" -T 4 -c 50 -r ${NUSERS} -t u -B 50 -n ${NODENAME}
+od_query_create_with_node	$OPTS -N "ODQueryCreateWithNode_cache_${NUSERS}u_p2" -P 2 -c 50 -r ${NUSERS} -t u -B 50 -n ${NODENAME}
+od_query_create_with_node	$OPTS -N "ODQueryCreateWithNode_cache_${NUSERS}u_p4" -P 4 -c 50 -r ${NUSERS} -t u -B 50 -n ${NODENAME}
+
+.
diff --git a/tools/tests/libMicro/coreos_bench.sh b/tools/tests/libMicro/coreos_bench.sh
new file mode 100644
index 000000000..a862cbd86
--- /dev/null
+++ b/tools/tests/libMicro/coreos_bench.sh
@@ -0,0 +1,837 @@
+#!/bin/sh
+#
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms
+# of the Common Development and Distribution License
+# (the "License").  You may not use this file except
+# in compliance with the License.
+#
+# You can obtain a copy of the license at
+# src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL
+# HEADER in each file and include the License file at
+# usr/src/OPENSOLARIS.LICENSE.  If applicable,
+# add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your
+# own identifying information: Portions Copyright [yyyy]
+# [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+# usage function - defines all the options that can be given to this script.
+function usage {
+	echo "Usage"
+	echo "$0 [-l] [-h] [name of test]"
+	echo "-l               : This option runs the lmbench tests along with the default libmicro tests."
+	echo "-h               : Help. This option displays information on how to run the script. "
+	echo "[name of test]   : This option runs only the test that is specified"
+	echo ""
+	echo "Examples"
+	echo "$0               : This is the defualt execution. This will run only the default libmicro tests."
+	echo "$0 -l            : This will run the lmbench tests too "
+	echo "$0 getppid       : This will run only the getppid tests"
+	exit
+	
+}
+
+if [ $# -eq 1 ]
+then
+	lmbench=2   # to check if only a single test is to be run. e.g, ./coreos_bench.sh getppid
+else
+	lmbench=0   # to run the default libMicro tests, without the lmbench tests.
+fi
+
+while getopts "lh" OPT_LIST
+do
+	case $OPT_LIST in 
+		l) lmbench=1;;   # to run the libmicro tests including the lmbench tests.
+		h) usage;;
+		*) usage;;
+	esac
+done
+
+
+tattle="./tattle"
+
+bench_version=0.4.0
+libmicro_version=`$tattle -V`
+
+case $libmicro_version in
+$bench_version)
+	;;
+*)
+	echo "ERROR: libMicro version doesn't match 'coreos_bench' script version"
+	exit 1
+esac
+
+TMPROOT=/private/tmp/libmicro.$$
+VARROOT=/private/var/tmp/libmicro.$$
+mkdir -p $TMPROOT
+mkdir -p $VARROOT
+
+#if 1 /* Apple modified code */
+
+# If the testsuite finish completely or if it is interrupted before 
+# completion, re-enable stepper for normal operation of the machine
+# see rdar://6243819 for details
+trap "rm -rf $TMPROOT $VARROOT && sudo pstates -e && exit" 0 2
+
+#else
+trap "rm -rf $TMPROOT $VARROOT && exit" 0 2
+#endif /* End of Apple modified code
+
+TFILE=$TMPROOT/data
+IFILE=$TMPROOT/ifile
+TDIR1=$TMPROOT/0/1/2/3/4/5/6/7/8/9
+TDIR2=$TMPROOT/1/2/3/4/5/6/7/8/9/0
+VFILE=$VARROOT/data
+VDIR1=$VARROOT/0/1/2/3/4/5/6/7/8/9
+VDIR2=$VARROOT/1/2/3/4/5/6/7/8/9/0
+
+
+OPTS="-E -C 200 -L -S -W"
+
+dd if=/dev/zero of=$TFILE bs=1024k count=10 2>/dev/null
+dd if=/dev/zero of=$VFILE bs=1024k count=10 2>/dev/null
+mkdir -p $TDIR1 $TDIR2
+mkdir -p $VDIR1 $VDIR2
+
+touch $IFILE
+/usr/bin/touch /private/var/tmp/lmbench
+
+
+# produce benchmark header for easier comparisons
+
+hostname=`uname -n`
+
+if [ -f /usr/sbin/psrinfo ]; then
+	p_count=`psrinfo|wc -l`
+	p_mhz=`psrinfo -v | awk '/operates/{print $6 "MHz"; exit }'`
+	p_type=`psrinfo -vp 2>/dev/null | awk '{if (NR == 3) {print $0; exit}}'` 
+	p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+fi
+
+if [ -f /proc/cpuinfo ]; then
+	p_count=`egrep processor /proc/cpuinfo | wc -l`
+	p_mhz=`awk -F: '/cpu MHz/{printf("%5.0f00Mhz\n",$2/100); exit}' /proc/cpuinfo`
+	p_type=`awk -F: '/model name/{print $2; exit}' /proc/cpuinfo`
+	p_ipaddr=`getent hosts $hostname | awk '{print $1}'`
+else
+## Mac OS X specific stuff
+# first, get ugly output, in case pretty output isn't available
+#
+	p_count=`sysctl -n hw.physicalcpu`
+	p_mhz=`sysctl -n hw.cpufrequency`
+	p_type=`sysctl -n hw.model`
+
+if [ -x /usr/sbin/system_profiler ]; then
+	# <rdar://4655981> requires this hunk of work-around
+	# grep the XML for the characteristic we need. The key appears twice, so grep for the useful key (with 'string')
+	# use sed to strip off the <string></string> and the tabs in front of the string.  So much work for so little result.
+	#
+		p_mhz=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+			grep -A1 current_processor_speed | grep string | \
+			sed -E 's/<string>(.+)<\/string>/\1/' | sed 's-	--g'`
+		p_type=`system_profiler -xml -detailLevel mini SPHardwareDataType | \
+			grep -A1 cpu_type | grep string | \
+			sed -E 's/<string>(.+)<\/string>/\1/' | sed 's-	--g'`
+fi
+
+# look for en0 (usually ethernet) if that isn't there try en1 (usually wireless) else give up
+	p_ipaddr=`ipconfig getpacket en0 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+	if [ ! $p_ipaddr  ]; then
+		p_ipaddr=`ipconfig getpacket en1 | grep yiaddr | tr "= " "\n" | grep [0-9]`
+	elif [ ! $p_ipaddr ]; then
+		p_ipaddr="unknown"
+	fi
+fi
+
+printf "\n\n!Libmicro_#:   %30s\n" $libmicro_version
+printf "!Options:      %30s\n" "$OPTS"
+printf "!Machine_name: %30s\n" "$hostname"
+printf "!OS_name:      %30s\n" `uname -s`
+printf "!OS_release:   %30s\n" `sw_vers -productVersion`
+printf "!OS_build:     %30.18s\n" "`sw_vers -buildVersion`"
+printf "!Processor:    %30s\n" `arch`
+printf "!#CPUs:        %30s\n" $p_count
+printf "!CPU_MHz:      %30s\n" "$p_mhz"
+printf "!CPU_NAME:     %30s\n" "$p_type"
+printf "!IP_address:   %30s\n" "$p_ipaddr"
+printf "!Run_by:       %30s\n" $LOGNAME
+printf "!Date:	       %30s\n" "`date '+%D %R'`"
+printf "!Compiler:     %30s\n" `$tattle -c`
+printf "!Compiler Ver.:%30s\n" "`$tattle -v`"
+printf "!sizeof(long): %30s\n" `$tattle -s`
+printf "!extra_CFLAGS: %30s\n" "`$tattle -f`"
+printf "!TimerRes:     %30s\n\n\n" "`$tattle -r`"
+ 
+bin_dir="$TMPROOT/bin"
+
+mkdir -p $bin_dir
+cp bin-*/exec_bin $bin_dir/$A
+
+cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A
+
+newline=0
+
+#if 1 /* Apple added code */
+
+# We need to disable the stepper to prevent it from causing
+# wide variations in results; see rdar://6243819 for details
+
+pstates=/usr/local/bin/pstates
+if [ -x $pstates ]; then
+        echo "Disabling stepper to provide more consistent results of benchmark run"
+        sudo $pstates -d; sudo pstates -p 0
+else
+        echo "ERROR: No $pstates found; To disable stepper we need $pstates" 1>&2
+        echo "Install AppleInternal package which provides $pstates and execute 'coreos_bench' again" 1>&2
+        echo 1>&2
+        echo "Note: If you cannot install AppleInternal package which provides $pstates, then use 'bench' script to run libMicro testsuite" 1>&2	
+	exit 1
+fi
+
+#endif /* End of Apple code */
+
+#
+# Everything below the while loop is input for the while loop
+# if you have any tests which can't run in the while loop, put
+# them above this comment
+#
+while read A B
+do
+	# $A contains the command, $B contains the arguments
+	# we echo blank lines and comments
+	# we skip anything which fails to match *$1* (useful
+	# if we only want to test one case, but a nasty hack)
+
+	case $A in
+	\#*)
+		echo "$A $B"
+		newline=1
+		continue
+		;;
+	
+	"")
+		if [ $newline -eq 1 ]
+		then
+			newline=0
+			echo
+			echo
+		fi
+
+		continue
+		;;
+
+	*$1*)
+		# Default execution without the lmbench tests. 
+		# checks if there is no argument passed by the user.
+		if [  $lmbench -eq 0 ]
+		then
+			string=lmbench
+			if [ "${A:0:7}" == "$string" ]
+			then
+				continue
+			fi
+		fi
+			
+		;;
+	
+	*)		
+		if [ $lmbench -ne 1 ]
+		then
+			continue
+		fi
+		;;
+	esac
+
+	if [ ! -f $bin_dir/$A ]
+	then
+		cp bin-*/$A $bin_dir/$A
+	fi
+
+	echo
+
+	(cd $TMPROOT && eval "bin/$A $B")
+
+	echo
+	echo
+done <<.
+
+#
+# Obligatory null system call: use very short time
+# for default since SuSe implements this "syscall" in userland
+#
+
+getpid		$OPTS -N "getpid" -I 5
+getppid		$OPTS -N "getppid" -I 5
+
+getenv		$OPTS -N "getenv"	-s 100 -I 100	 
+getenv		$OPTS -N "getenvT2"	-s 100 -I 100	-T 2 
+
+gettimeofday	$OPTS -N "gettimeofday"          
+
+log		$OPTS -N "log"	-I 20	-B 300000	 
+exp		$OPTS -N "exp"	-I 20	-B 100000	 
+lrand48		$OPTS -N "lrand48"
+
+memset		$OPTS -N "memset_10"	-s 10	-I 10 
+memset		$OPTS -N "memset_256"	-s 256	-I 20
+memset		$OPTS -N "memset_256_u"	-s 256	 -a 1 -I 20 
+memset		$OPTS -N "memset_1k"	-s 1k	 -I 100 -B 2000
+memset		$OPTS -N "memset_4k"    -s 4k    -I 250 -B 500
+memset		$OPTS -N "memset_4k_uc" -s 4k    -u -I 400
+
+memset		$OPTS -N "memset_10k"	-s 10k	-I 600 -B 500
+memset		$OPTS -N "memset_1m"	-s 1m	-I 200000
+memset		$OPTS -N "memset_10m"	-s 10m -I 2000000 
+memset		$OPTS -N "memsetP2_10m"	-s 10m -P 2 -I 2000000 
+
+memrand		$OPTS -N "memrand"	-s 40m -B 10000
+
+# This is an elided test and is not ported yet.
+# Check Makefile.darwin for list of elided tests  
+# cachetocache	$OPTS -N "cachetocache" -s 100k -T 2 -I 200
+
+isatty		$OPTS -N "isatty_yes"   
+isatty		$OPTS -N "isatty_no"  -f $IFILE
+
+malloc		$OPTS -N "malloc_10"    -s 10    -g 10 -I 50
+malloc		$OPTS -N "malloc_100"   -s 100   -g 10 -I 50
+malloc		$OPTS -N "malloc_1k"    -s 1k    -g 10 -I 50
+malloc		$OPTS -N "malloc_10k"   -s 10k   -g 10 -I 50
+malloc		$OPTS -N "malloc_100k"  -s 100k  -g 10 -I 2000
+
+malloc		$OPTS -N "mallocT2_10"    -s 10   -g 10 -T 2 -I 200
+malloc		$OPTS -N "mallocT2_100"   -s 100  -g 10 -T 2 -I 200
+malloc		$OPTS -N "mallocT2_1k"    -s 1k   -g 10 -T 2 -I 200
+malloc		$OPTS -N "mallocT2_10k"   -s 10k  -g 10 -T 2 -I 200
+malloc		$OPTS -N "mallocT2_100k"  -s 100k -g 10 -T 2 -I 10000
+
+close		$OPTS -N "close_bad"		-B 96		-b
+close		$OPTS -N "close_tmp"		-B 64		-f $TFILE
+close		$OPTS -N "close_usr"		-B 64		-f $VFILE
+close		$OPTS -N "close_zero"		-B 64		-f /dev/zero
+close_tcp	$OPTS -N "close_tcp"		-B 32  
+
+memcpy		$OPTS -N "memcpy_10"	-s 10	-I 10 
+memcpy		$OPTS -N "memcpy_1k"	-s 1k	-I 50
+memcpy		$OPTS -N "memcpy_10k"	-s 10k	-I 800
+memcpy		$OPTS -N "memcpy_1m"	-s 1m   -I 500000
+memcpy		$OPTS -N "memcpy_10m"	-s 10m  -I 5000000
+
+strcpy		$OPTS -N "strcpy_10"	-s 10   -I 5 
+strcpy		$OPTS -N "strcpy_1k"	-s 1k   -I 100
+
+strlen		$OPTS -N "strlen_10"	-s 10   -I 5
+strlen		$OPTS -N "strlen_1k"	-s 1k   -I 100
+
+strchr		$OPTS -N "strchr_10"	-s 10   -I 5
+strchr		$OPTS -N "strchr_1k"	-s 1k   -I 200
+strcmp		$OPTS -N "strcmp_10"	-s 10   -I 10
+strcmp		$OPTS -N "strcmp_1k"	-s 1k   -I 200
+
+strcasecmp	$OPTS -N "scasecmp_10"	-s 10 -I 50 -B 2000
+strcasecmp	$OPTS -N "scasecmp_1k"	-s 1k -I 20000 -B 100
+
+strtol		$OPTS -N "strtol"      -I 20      
+
+# This is an elided test and is not ported yet.     
+# Check Makefile.darwin for list of elided tests
+# getcontext	$OPTS -N "getcontext"  -I 100
+
+# This is an elided test and is not ported yet.     
+# Check Makefile.darwin for list of elided tests
+# setcontext	$OPTS -N "setcontext"  -I 100
+
+mutex		$OPTS -N "mutex_st"	-I 10
+mutex		$OPTS -N "mutex_mt"	-t -I 10	
+mutex		$OPTS -N "mutex_T2"     -T 2  -I 100
+
+longjmp		$OPTS -N "longjmp"	-I 10
+siglongjmp	$OPTS -N "siglongjmp"	-I 20
+
+getrusage	$OPTS -N "getrusage"	-I 200
+
+times		$OPTS -N "times"	-I 200
+time		$OPTS -N "time"		-I 50
+localtime_r	$OPTS -N "localtime_r"	-I 200  
+strftime	$OPTS -N "strftime" -I 10000 -B 100 
+
+mktime		$OPTS -N "mktime"       -I 500   
+mktime		$OPTS -N "mktimeT2" -T 2 -I 1000 
+
+cascade_mutex	$OPTS -N "c_mutex_1"	-I 50
+cascade_mutex	$OPTS -N "c_mutex_10"	-T 10 -I 5000
+cascade_mutex	$OPTS -N "c_mutex_200"	-T 200	-I 2000000
+
+cascade_cond	$OPTS -N "c_cond_1"	-I 100
+cascade_cond	$OPTS -N "c_cond_10"	-T 10	-I 3000
+cascade_cond	$OPTS -N "c_cond_200"	-T 200	-I 2000000
+
+cascade_lockf	$OPTS -N "c_lockf_1"	-I 1000	
+cascade_lockf	$OPTS -N "c_lockf_10"	-P 10 -I 50000
+cascade_lockf	$OPTS -N "c_lockf_200"	-P 200 -I 5000000
+
+cascade_flock	$OPTS -N "c_flock"	-I 1000	
+cascade_flock	$OPTS -N "c_flock_10"	-P 10   -I 50000
+cascade_flock	$OPTS -N "c_flock_200"	-P 200	-I 5000000
+
+cascade_fcntl	$OPTS -N "c_fcntl_1"	-I 2000 	
+cascade_fcntl	$OPTS -N "c_fcntl_10"	-P 10 -I 20000
+cascade_fcntl	$OPTS -N "c_fcntl_200"	-P 200	-I 5000000
+
+file_lock	$OPTS -N "file_lock"   -I 1000         
+
+getsockname	$OPTS -N "getsockname"	-I 100
+getpeername	$OPTS -N "getpeername"	-I 100
+
+chdir		$OPTS -N "chdir_tmp"	-I 2000		$TDIR1 $TDIR2
+chdir		$OPTS -N "chdir_usr"	-I 2000		$VDIR1 $VDIR2
+
+chdir		$OPTS -N "chgetwd_tmp"	-I 3000	-g $TDIR1 $TDIR2
+chdir		$OPTS -N "chgetwd_usr"	-I 3000	-g $VDIR1 $VDIR2
+
+realpath	$OPTS -N "realpath_tmp" -I 3000		-f $TDIR1
+realpath	$OPTS -N "realpath_usr"	-I 3000	-f $VDIR1
+
+stat		$OPTS -N "stat_tmp" -I 1000		-f $TFILE
+stat		$OPTS -N "stat_usr" -I 1000		-f $VFILE
+
+lmbench_stat		$OPTS -N "lmbench_stat_tmp" -I 1000		-f $TFILE
+lmbench_stat		$OPTS -N "lmbench_stat_usr" -I 10000 -B 100		-f /private/var/tmp/lmbench
+
+#
+# lmbench uses a touched empty file in /private/var/tmp
+# libMicro uses a 1M file in a directory off /private/var/tmp
+# performance difference is ~ 0.2 usecs/call
+#
+# why? - walking the dir tree, empty file vs. non-empty file, non-empty dir
+# in the case of libMicro, etc., etc.
+#
+
+lmbench_stat		$OPTS -N "lmbench_stat_usr - Default" -I 10000 -B 100	-f /private/var/tmp/lmbench
+
+lmbench_fstat		$OPTS -N "lmbench_fstat_tmp" -I 1000		-f $TFILE
+lmbench_fstat		$OPTS -N "lmbench_fstat_usr" -I 10000 -B 100		-f /private/var/tmp/lmbench
+
+# see stat test to understand why we are using /private/var/tmp/lmbench
+
+lmbench_fstat		$OPTS -N "lmbench_fstat_usr - Default" -I 10000 -B 100	-f /private/var/tmp/lmbench
+
+lmbench_openclose	$OPTS -N "lmbench_openclose - Default" -I 10000 -B 100	-f /private/var/tmp/lmbench
+
+lmbench_select_file $OPTS -N "lmbench_select_file_10"  -n 10  -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_100" -n 100 -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_250" -n 250 -B 100
+lmbench_select_file $OPTS -N "lmbench_select_file_500" -n 500 -B 100
+
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_10"  -n 10  -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_100" -n 100 -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_250" -n 250 -B 100
+lmbench_select_tcp $OPTS -N "lmbench_select_tcp_500" -n 500 -B 100
+
+fcntl		$OPTS -N "fcntl_tmp"	-I 100	-f $TFILE
+fcntl		$OPTS -N "fcntl_usr"	-I 100	-f $VFILE
+fcntl_ndelay	$OPTS -N "fcntl_ndelay"	-I 100	
+
+lseek		$OPTS -N "lseek_t8k"	-s 8k	-I 50	-f $TFILE
+lseek		$OPTS -N "lseek_u8k"	-s 8k	-I 50	-f $VFILE
+
+open		$OPTS -N "open_tmp"		-B 256		-f $TFILE
+open		$OPTS -N "open_usr"		-B 256		-f $VFILE
+open		$OPTS -N "open_zero"		-B 256		-f /dev/zero
+
+dup		$OPTS -N "dup"			-B 512   
+
+socket		$OPTS -N "socket_u"		-B 256
+socket		$OPTS -N "socket_i"		-B 256		-f PF_INET
+
+socketpair	$OPTS -N "socketpair"		-B 256
+
+setsockopt	$OPTS -N "setsockopt"		-I 200
+
+bind		$OPTS -N "bind"			-B 100
+
+listen		$OPTS -N "listen"		-B 100
+
+#connection	$OPTS -N "connection"		-B 256 
+
+poll		$OPTS -N "poll_10"	-n 10	-I 500
+poll		$OPTS -N "poll_100"	-n 100	-I 1000
+poll		$OPTS -N "poll_1000"	-n 1000	-I 5000
+
+poll		$OPTS -N "poll_w10"	-n 10	-I 500		-w 1
+poll		$OPTS -N "poll_w100"	-n 100	-I 2000		-w 10
+poll		$OPTS -N "poll_w1000"	-n 1000	-I 40000	-w 100
+
+select		$OPTS -N "select_10"	-n 10	-I 500
+select		$OPTS -N "select_100"	-n 100	-I 1000
+select		$OPTS -N "select_1000"	-n 1000	-I 5000
+
+select		$OPTS -N "select_w10"	-n 10	-I 500		-w 1
+select		$OPTS -N "select_w100"	-n 100	-I 2000		-w 10
+select		$OPTS -N "select_w1000"	-n 1000	-I 40000        -w 100
+
+semop		$OPTS -N "semop" -I 200
+
+sigaction	$OPTS -N "sigaction" -I 100
+signal		$OPTS -N "signal" -I 1000
+sigprocmask	$OPTS -N "sigprocmask" -I 200
+
+lmbench_lat_sig_install	$OPTS -N "lmbench_siginstall"
+# sigcatch and sigsend need to be evaluated together
+# lmbench framework will allow multiple measurements within the same
+# benchmark test which allow them to factor out the cost of sending
+# a signal from catching one
+#
+# for our purposes sigcatch results - sigsend results yield
+# lmbench sig handler overhead measurements
+lmbench_lat_sig_catch	$OPTS -N "lmbench_sigcatch" 
+lmbench_lat_sig_send	$OPTS -N "lmbench_sigsend" 
+
+
+pthread_create  $OPTS -N "pthread_8"		-B 8
+pthread_create  $OPTS -N "pthread_32"		-B 32
+pthread_create  $OPTS -N "pthread_128"		-B 128
+pthread_create  $OPTS -N "pthread_512"		-B 512
+
+fork		$OPTS -N "fork_10"		-B 10
+fork		$OPTS -N "fork_100"		-B 100  -C 100
+
+#fork		$OPTS -N "fork_1000"		-B 1000 -C 50
+
+exit		$OPTS -N "exit_10"		-B 10
+exit		$OPTS -N "exit_100"		-B 100
+
+#exit		$OPTS -N "exit_1000"		-B 1000 -C 50
+
+exit		$OPTS -N "exit_10_nolibc"	-e -B 10
+
+exec		$OPTS -N "exec" -B 10
+
+posix_spawn	$OPTS -N "posix_spawn" -B 10
+
+system		$OPTS -N "system" -I 1000000
+
+recurse		$OPTS -N "recurse"		-B 512
+
+read		$OPTS -N "read_t1k"	-s 1k -B 50			-f $TFILE
+read		$OPTS -N "read_t10k"	-s 10k	-B 16		-f $TFILE
+read		$OPTS -N "read_t100k"	-s 100k	-B 4		-f $TFILE
+
+read		$OPTS -N "read_u1k"	-s 1k	-B 50		-f $VFILE
+read		$OPTS -N "read_u10k"	-s 10k	-B 16		-f $VFILE
+read		$OPTS -N "read_u100k"	-s 100k	-B 4		-f $VFILE
+
+read		$OPTS -N "read_z1k"	-s 1k	-B 100		-f /dev/zero 
+read		$OPTS -N "read_z10k"	-s 10k	-B 30		-f /dev/zero 
+read		$OPTS -N "read_z100k"	-s 100k	-B 4		-f /dev/zero 
+read		$OPTS -N "read_zw100k"	-s 100k	-B 4         -w	-f /dev/zero 
+
+lmbench_read		$OPTS -N "read_t1b"	-s 1 -B 50			-f $TFILE
+lmbench_read		$OPTS -N "read_t1k"	-s 1k -B 50			-f $TFILE
+lmbench_read		$OPTS -N "read_t10k"	-s 10k	-B 16		-f $TFILE
+lmbench_read		$OPTS -N "read_t100k"	-s 100k	-B 4		-f $TFILE
+
+lmbench_read		$OPTS -N "read_u1b"	-s 1	-B 50		-f $VFILE
+lmbench_read		$OPTS -N "read_u1k"	-s 1k	-B 50		-f $VFILE
+lmbench_read		$OPTS -N "read_u10k"	-s 10k	-B 16		-f $VFILE
+lmbench_read		$OPTS -N "read_u100k"	-s 100k	-B 4		-f $VFILE
+
+lmbench_read		$OPTS -N "read_z1b - Default"	-s 1	-B 100		-f /dev/zero 
+lmbench_read		$OPTS -N "read_z1k"	-s 1k	-B 100		-f /dev/zero 
+lmbench_read		$OPTS -N "read_z10k"	-s 10k	-B 30		-f /dev/zero 
+lmbench_read		$OPTS -N "read_z100k"	-s 100k	-B 4		-f /dev/zero 
+lmbench_read		$OPTS -N "read_zw100k"	-s 100k	-B 4         -w	-f /dev/zero 
+
+write		$OPTS -N "write_t1k"	-s 1k	-B 50		-f $TFILE
+write		$OPTS -N "write_t10k"	-s 10k	-B 25		-f $TFILE
+write		$OPTS -N "write_t100k"	-s 100k	-B 4		-f $TFILE
+
+write		$OPTS -N "write_u1k"	-s 1k	-B 50		-f $VFILE
+write		$OPTS -N "write_u10k"	-s 10k	-B 25		-f $VFILE
+write		$OPTS -N "write_u100k"	-s 100k	-B 4		-f $VFILE
+
+write		$OPTS -N "write_n1k"	-s 1k	-I 100 -B 0	-f /dev/null 
+write		$OPTS -N "write_n10k"	-s 10k	-I 100 -B 0	-f /dev/null 
+write		$OPTS -N "write_n100k"	-s 100k	-I 100 -B 0	-f /dev/null 
+
+lmbench_write		$OPTS -N "lmbench_write_t1b"	-s 1	-B 50		-f $TFILE
+lmbench_write		$OPTS -N "lmbench_write_t1k"	-s 1k	-B 50		-f $TFILE
+lmbench_write		$OPTS -N "lmbench_write_t10k"	-s 10k	-B 25		-f $TFILE
+lmbench_write		$OPTS -N "lmbench_write_t100k"	-s 100k	-B 4		-f $TFILE
+
+lmbench_write		$OPTS -N "lmbench_write_u1b"	-s 1	-B 50		-f $VFILE
+lmbench_write		$OPTS -N "lmbench_write_u1k"	-s 1k	-B 50		-f $VFILE
+lmbench_write		$OPTS -N "lmbench_write_u10k"	-s 10k	-B 25		-f $VFILE
+lmbench_write		$OPTS -N "lmbench_write_u100k"	-s 100k	-B 4		-f $VFILE
+
+lmbench_write		$OPTS -N "lmbench_write_n1b - Default"	-s 1	-I 100 -B 0	-f /dev/null 
+lmbench_write		$OPTS -N "lmbench_write_n1k"	-s 1k	-I 100 -B 0	-f /dev/null 
+lmbench_write		$OPTS -N "lmbench_write_n10k"	-s 10k	-I 100 -B 0	-f /dev/null 
+lmbench_write		$OPTS -N "lmbench_write_n100k"	-s 100k	-I 100 -B 0	-f /dev/null 
+
+writev		$OPTS -N "writev_t1k"	-s 1k	-B 20		-f $TFILE
+writev		$OPTS -N "writev_t10k"	-s 10k	-B 4	        -f $TFILE
+writev		$OPTS -N "writev_t100k"	-s 100k			-f $TFILE
+
+writev		$OPTS -N "writev_u1k"	-s 1k	-B 20		-f $VFILE
+writev		$OPTS -N "writev_u10k"	-s 10k	-B 4		-f $VFILE
+writev		$OPTS -N "writev_u100k"	-s 100k			-f $VFILE
+
+writev		$OPTS -N "writev_n1k"	-s 1k	-I 100 -B 0	-f /dev/null 
+writev		$OPTS -N "writev_n10k"	-s 10k	-I 100 -B 0	-f /dev/null 
+writev		$OPTS -N "writev_n100k"	-s 100k	-I 100 -B 0	-f /dev/null 
+
+pread		$OPTS -N "pread_t1k"	-s 1k	-I 300		-f $TFILE
+pread		$OPTS -N "pread_t10k"	-s 10k	-I 1000		-f $TFILE
+pread		$OPTS -N "pread_t100k"	-s 100k	-I 10000	-f $TFILE
+
+pread		$OPTS -N "pread_u1k"	-s 1k	-I 300		-f $VFILE
+pread		$OPTS -N "pread_u10k"	-s 10k	-I 1000		-f $VFILE
+pread		$OPTS -N "pread_u100k"	-s 100k	-I 10000	-f $VFILE
+
+pread		$OPTS -N "pread_z1k"	-s 1k	-I 300		-f /dev/zero 
+pread		$OPTS -N "pread_z10k"	-s 10k	-I 1000		-f /dev/zero 
+pread		$OPTS -N "pread_z100k"	-s 100k	-I 2000	-f /dev/zero 
+pread		$OPTS -N "pread_zw100k"	-s 100k	-w -I 10000	-f /dev/zero 
+
+pwrite		$OPTS -N "pwrite_t1k"	-s 1k	-I 500		-f $TFILE
+pwrite		$OPTS -N "pwrite_t10k"	-s 10k	-I 1000		-f $TFILE
+pwrite		$OPTS -N "pwrite_t100k"	-s 100k	-I 10000	-f $TFILE
+
+pwrite		$OPTS -N "pwrite_u1k"	-s 1k	-I 500		-f $VFILE
+pwrite		$OPTS -N "pwrite_u10k"	-s 10k	-I 1000		-f $VFILE
+pwrite		$OPTS -N "pwrite_u100k"	-s 100k	-I 20000	-f $VFILE
+
+pwrite		$OPTS -N "pwrite_n1k"	-s 1k	-I 100		-f /dev/null 
+pwrite		$OPTS -N "pwrite_n10k"	-s 10k	-I 100		-f /dev/null 
+pwrite		$OPTS -N "pwrite_n100k"	-s 100k	-I 100		-f /dev/null 
+
+mmap		$OPTS -N "mmap_z8k"	-l 8k   -I 1000	-B 50	-f /dev/zero
+mmap		$OPTS -N "mmap_z128k"	-l 128k	-I 2000	-B 100	-f /dev/zero
+mmap		$OPTS -N "mmap_t8k"	-l 8k	-I 1000		-f $TFILE
+mmap		$OPTS -N "mmap_t128k"	-l 128k	-I 1000		-f $TFILE
+mmap		$OPTS -N "mmap_u8k"	-l 8k	-I 1000		-f $VFILE
+mmap		$OPTS -N "mmap_u128k"	-l 128k	-I 1000		-f $VFILE
+mmap		$OPTS -N "mmap_a8k"	-l 8k	-I 200		-f MAP_ANON
+mmap		$OPTS -N "mmap_a128k"	-l 128k	-I 200		-f MAP_ANON
+
+
+mmap		$OPTS -N "mmap_rz8k"	-l 8k	-I 2000 -r	-f /dev/zero
+mmap		$OPTS -N "mmap_rz128k"	-l 128k	-I 2000 -r	-f /dev/zero
+mmap		$OPTS -N "mmap_rt8k"	-l 8k	-I 2000 -r	-f $TFILE
+mmap		$OPTS -N "mmap_rt128k"	-l 128k	-I 20000 -r	-f $TFILE
+mmap		$OPTS -N "mmap_ru8k"	-l 8k	-I 2000 -r	-f $VFILE
+mmap		$OPTS -N "mmap_ru128k"	-l 128k	-I 20000 -r	-f $VFILE
+mmap		$OPTS -N "mmap_ra8k"	-l 8k	-I 2000 -r	-f MAP_ANON
+mmap		$OPTS -N "mmap_ra128k"	-l 128k	-I 20000 -r	-f MAP_ANON
+
+mmap		$OPTS -N "mmap_wz8k"	-l 8k	-I 5000 -w	-B 50 -f /dev/zero
+mmap		$OPTS -N "mmap_wz128k"	-l 128k	-I 50000 -w	-B 50 -f /dev/zero
+mmap		$OPTS -N "mmap_wt8k"	-l 8k	-I 5000 -w	-f $TFILE
+mmap		$OPTS -N "mmap_wt128k"	-l 128k	-I 50000 -w	-f $TFILE
+mmap		$OPTS -N "mmap_wu8k"	-l 8k	-I 5000 -w	-f $VFILE
+mmap		$OPTS -N "mmap_wu128k"	-l 128k	-I 500000 -w	-f $VFILE
+mmap		$OPTS -N "mmap_wa8k"	-l 8k	-I 3000 -w	-f MAP_ANON
+mmap		$OPTS -N "mmap_wa128k"	-l 128k	-I 50000 -w	-f MAP_ANON
+
+munmap		$OPTS -N "unmap_z8k"	-l 8k   -I 500		-f /dev/zero
+munmap		$OPTS -N "unmap_z128k"	-l 128k	-I 500	-B 100	-f /dev/zero
+munmap		$OPTS -N "unmap_t8k"	-l 8k	-I 500		-f $TFILE
+munmap		$OPTS -N "unmap_t128k"	-l 128k	-I 500		-f $TFILE
+munmap		$OPTS -N "unmap_u8k"	-l 8k	-I 500		-f $VFILE
+munmap		$OPTS -N "unmap_u128k"	-l 128k	-I 500		-f $VFILE
+munmap		$OPTS -N "unmap_a8k"	-l 8k	-I 500		-f MAP_ANON
+munmap		$OPTS -N "unmap_a128k"	-l 128k	-I 500		-f MAP_ANON
+
+munmap		$OPTS -N "unmap_rz8k"	-l 8k	-I 1000	-r	-f /dev/zero
+munmap		$OPTS -N "unmap_rz128k"	-l 128k	-I 2000 -r	-B 100 -f /dev/zero
+munmap		$OPTS -N "unmap_rt8k"	-l 8k	-I 1000	-r	-f $TFILE
+munmap		$OPTS -N "unmap_rt128k"	-l 128k	-I 3000	-r	-f $TFILE
+munmap		$OPTS -N "unmap_ru8k"	-l 8k	-I 1000	-r	-f $VFILE
+munmap		$OPTS -N "unmap_ru128k"	-l 128k	-I 3000	-r	-f $VFILE
+munmap		$OPTS -N "unmap_ra8k"	-l 8k	-I 1000	-r	-f MAP_ANON
+munmap		$OPTS -N "unmap_ra128k"	-l 128k	-I 2000	-r	-f MAP_ANON
+
+connection	$OPTS -N "conn_connect"		-B 256 	-c
+
+munmap		$OPTS -N "unmap_wz8k"	-l 8k	-I 1000	-w	-f /dev/zero
+munmap		$OPTS -N "unmap_wz128k"	-l 128k	-I 8000	-w	-B 100 -f /dev/zero
+munmap		$OPTS -N "unmap_wt8k"	-l 8k	-I 1000	-w	-f $TFILE
+munmap		$OPTS -N "unmap_wt128k"	-l 128k	-I 10000	-w	-f $TFILE
+munmap		$OPTS -N "unmap_wu8k"	-l 8k	-I 1000	-w	-f $VFILE
+munmap		$OPTS -N "unmap_wu128k"	-l 128k	-I 50000	-w -B 10	-f $VFILE
+munmap		$OPTS -N "unmap_wa8k"	-l 8k	-I 1000	-w	-f MAP_ANON
+munmap		$OPTS -N "unmap_wa128k"	-l 128k	-I 10000	-w	-f MAP_ANON
+
+
+mprotect	$OPTS -N "mprot_z8k"	-l 8k  -I 300			-f /dev/zero
+mprotect	$OPTS -N "mprot_z128k"	-l 128k	-I 500		-f /dev/zero
+mprotect	$OPTS -N "mprot_wz8k"	-l 8k	-I 500	-w	-f /dev/zero
+mprotect	$OPTS -N "mprot_wz128k"	-l 128k	-I 1000	-w	-f /dev/zero
+mprotect	$OPTS -N "mprot_twz8k"  -l 8k   -I 1000 -w -t   -f /dev/zero
+mprotect	$OPTS -N "mprot_tw128k" -l 128k -I 2000 -w -t   -f /dev/zero
+mprotect	$OPTS -N "mprot_tw4m"   -l 4m   -w -t -B 1  -f /dev/zero
+
+pipe		$OPTS -N "pipe_pst1"	-s 1	-I 1000	-x pipe -m st
+pipe		$OPTS -N "pipe_pmt1"	-s 1	-I 8000	-x pipe -m mt
+pipe		$OPTS -N "pipe_pmp1"	-s 1	-I 8000	-x pipe -m mp
+pipe		$OPTS -N "pipe_pst4k"	-s 4k	-I 1000	-x pipe -m st
+pipe		$OPTS -N "pipe_pmt4k"	-s 4k	-I 8000	-x pipe -m mt
+pipe		$OPTS -N "pipe_pmp4k"	-s 4k	-I 8000	-x pipe -m mp
+
+pipe		$OPTS -N "pipe_sst1"	-s 1	-I 1000	-x sock -m st
+pipe		$OPTS -N "pipe_smt1"	-s 1	-I 8000	-x sock -m mt
+pipe		$OPTS -N "pipe_smp1"	-s 1	-I 8000	-x sock -m mp
+pipe		$OPTS -N "pipe_sst4k"	-s 4k	-I 1000	-x sock -m st
+pipe		$OPTS -N "pipe_smt4k"	-s 4k	-I 8000	-x sock -m mt
+pipe		$OPTS -N "pipe_smp4k"	-s 4k	-I 8000	-x sock -m mp
+
+pipe		$OPTS -N "pipe_tst1"	-s 1	-I 1000	-x tcp  -m st
+pipe		$OPTS -N "pipe_tmt1"	-s 1	-I 8000	-x tcp  -m mt
+pipe		$OPTS -N "pipe_tmp1"	-s 1	-I 8000	-x tcp  -m mp
+pipe		$OPTS -N "pipe_tst4k"	-s 4k	-I 1000	-x tcp  -m st
+pipe		$OPTS -N "pipe_tmt4k"	-s 4k	-I 8000	-x tcp  -m mt
+pipe		$OPTS -N "pipe_tmp4k"	-s 4k	-I 8000	-x tcp  -m mp
+
+#connection	$OPTS -N "conn_accept"		-B 256      -a
+
+lmbench_bw_unix -B 11 -L -W
+
+lmbench_bw_mem $OPTS -N lmbench_bcopy_512 -s 512 -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_1k -s 1k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_2k -s 2k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_4k -s 4k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_8k -s 8k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_16k -s 16k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_32k -s 32k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_64k -s 64k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_128k -s 128k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_256k -s 256k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_512k -s 512k -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bcopy_1m -s 1m -x bcopy
+lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x bzero
+lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x fcp
+lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x fcp
+lmbench_bw_mem $OPTS -N lmbench_cp_512 -s 512 -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_1k -s 1k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_2k -s 2k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_4k -s 4k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_8k -s 8k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_16k -s 16k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_32k -s 32k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_64k -s 64k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_128k -s 128k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_256k -s 256k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_512k -s 512k -x cp
+lmbench_bw_mem $OPTS -N lmbench_cp_1m -s 1m -x cp
+lmbench_bw_mem $OPTS -N lmbench_frd_512 -s 512 -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_1k -s 1k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_2k -s 2k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_4k -s 4k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_8k -s 8k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_16k -s 16k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_32k -s 32k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_64k -s 64k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_128k -s 128k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_256k -s 256k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_512k -s 512k -x frd
+lmbench_bw_mem $OPTS -N lmbench_frd_1m -s 1m -x frd
+lmbench_bw_mem $OPTS -N lmbench_rd_512 -s 512 -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_1k -s 1k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_2k -s 2k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_4k -s 4k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_8k -s 8k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_16k -s 16k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_32k -s 32k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_64k -s 64k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_128k -s 128k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_256k -s 256k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_512k -s 512k -x rd
+lmbench_bw_mem $OPTS -N lmbench_rd_1m -s 1m -x rd
+lmbench_bw_mem $OPTS -N lmbench_fwr_512 -s 512 -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_1k -s 1k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_2k -s 2k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_4k -s 4k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_8k -s 8k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_16k -s 16k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_32k -s 32k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_64k -s 64k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_128k -s 128k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_256k -s 256k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_512k -s 512k -x fwr
+lmbench_bw_mem $OPTS -N lmbench_fwr_1m -s 1m -x fwr
+lmbench_bw_mem $OPTS -N lmbench_wr_512 -s 512 -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_1k -s 1k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_2k -s 2k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_4k -s 4k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_8k -s 8k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_16k -s 16k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_32k -s 32k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_64k -s 64k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_128k -s 128k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_256k -s 256k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_512k -s 512k -x wr
+lmbench_bw_mem $OPTS -N lmbench_wr_1m -s 1m -x wr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_512 -s 512 -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_1k -s 1k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_2k -s 2k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_4k -s 4k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_8k -s 8k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_16k -s 16k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_32k -s 32k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_64k -s 64k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_128k -s 128k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_256k -s 256k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_512k -s 512k -x rdwr
+lmbench_bw_mem $OPTS -N lmbench_rdwr_1m -s 1m -x rdwr
+
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512 -s 512 -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1k -s 1k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_2k -s 2k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_4k -s 4k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_8k -s 8k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_16k -s 16k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_32k -s 32k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_64k -s 64k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_128k -s 128k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_256k -s 256k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512k -s 512k -f $TFILE
+lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1m -s 1m -f $TFILE
+
+.
diff --git a/tools/tests/libMicro/exp.c b/tools/tests/libMicro/exp.c
index acc81c577..14ad80b68 100644
--- a/tools/tests/libMicro/exp.c
+++ b/tools/tests/libMicro/exp.c
@@ -53,9 +53,27 @@ int
 benchmark(void *tsd, result_t *res)
 {
 	int			i;
+	/* Added as part of the fix for radar 7508837 */
+        double                  t = 0.0;
 
 	for (i = 0; i < lm_optB; i += 10) {
 		double value = 1.0 / (i + .01);
+#if 1 /* Apple added code, see radar 7508837 */
+                t += exp(value);
+                t += exp(value + 1.0);
+                t += exp(value + 2.0);
+                t += exp(value + 3.0);
+                t += exp(value + 4.0);
+                t += exp(value + 5.0);
+                t += exp(value + 6.0);
+                t += exp(value + 7.0);
+                t += exp(value + 8.0);
+                t += exp(value + 9.0);
+        }
+        res->re_count = i;
+
+        return ((int)(t - t));
+#else
 		(void) exp(value);
 		(void) exp(value);
 		(void) exp(value);
@@ -70,4 +88,5 @@ benchmark(void *tsd, result_t *res)
 	res->re_count = i;
 
 	return (0);
+#endif /* end of Apple fix  */
 }
diff --git a/tools/tests/libMicro/libmicro.h b/tools/tests/libMicro/libmicro.h
index 54dcb8503..0359134d1 100644
--- a/tools/tests/libMicro/libmicro.h
+++ b/tools/tests/libMicro/libmicro.h
@@ -34,6 +34,8 @@
 
 #define	STRSIZE			1024
 
+#define STREQ(a,b) (strcmp(a,b) == 0)
+
 typedef struct {
 	long long		re_count;
 	long long		re_errors;
diff --git a/tools/tests/libMicro/log.c b/tools/tests/libMicro/log.c
index 0b4605f3f..71910e079 100644
--- a/tools/tests/libMicro/log.c
+++ b/tools/tests/libMicro/log.c
@@ -53,9 +53,27 @@ int
 benchmark(void *tsd, result_t *res)
 {
 	int			i;
+	/* Added as part of the fix for radar 7508837 */
+        double                  t = 0.0;
 
 	for (i = 0; i < lm_optB; i += 10) {
 		double value = i + .01;
+#if 1 /* Apple added code, see radar 7508837 */
+                t += log(value);
+                t += log(value + 1.0);
+                t += log(value + 2.0);
+                t += log(value + 3.0);
+                t += log(value + 4.0);
+                t += log(value + 5.0);
+                t += log(value + 6.0);
+                t += log(value + 7.0);
+                t += log(value + 8.0);
+                t += log(value + 9.0);
+        }
+        res->re_count = i;
+
+        return ((int)(t - t));
+#else
 		(void) log(value);
 		(void) log(value);
 		(void) log(value);
@@ -70,4 +88,5 @@ benchmark(void *tsd, result_t *res)
 	res->re_count = i;
 
 	return (0);
+#endif /* end of Apple fix  */
 }
diff --git a/tools/tests/libMicro/longjmp.c b/tools/tests/libMicro/longjmp.c
index 50f4dbc93..43c54d48a 100644
--- a/tools/tests/libMicro/longjmp.c
+++ b/tools/tests/libMicro/longjmp.c
@@ -51,7 +51,12 @@ benchmark_init()
 int
 benchmark(void *tsd, result_t *res)
 {
-	int			i = 0;
+#if 1  /* Apple fix to longjmp/siglongjmp tests, see radar 7440118 */
+	volatile int		i = 0;
+#else 
+	int i = 0;
+#endif /*end of Apple fix */
+	
 	jmp_buf			env;
 
 	(void) setjmp(env);
diff --git a/tools/tests/libMicro/od_account_create.sh b/tools/tests/libMicro/od_account_create.sh
new file mode 100644
index 000000000..fa9e3b768
--- /dev/null
+++ b/tools/tests/libMicro/od_account_create.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+
+function sighandler {
+  echo 
+  echo "Interrupting account creation"
+  rm -f $TMPF
+  exit 1
+}
+
+trap sighandler INT TERM
+
+# Fixed parameters
+#
+NAME=`basename $0`
+COUNT=$1
+NODE=$2
+PREFIX="od_test_"
+GROUP_ID=1211	# A group everybody's in
+GROUP_ID2=1212	# A group nobody's in
+GROUP_NAME='od_test_group'
+UID_BASE=5000
+TMPF=/tmp/.${NAME}.$$
+
+usage () {
+  echo
+  echo "Usage: ${NAME} count nodename"
+  echo 
+  echo "   ie. ${NAME} 1000 /Local/Default"
+  echo
+  echo "       will create users 1000 users (from '${PREFIX}1' to '${PREFIX}1000')"
+  echo "       Default password is set to 'test'"
+  echo "       User ID starts from 5000"
+  echo "       Default group is '${GROUP_NAME}', Group ID 1211"
+  echo
+  echo "This tool assumes user 'diradmin' with password 'admin' for OD admin"
+  echo
+  exit 85 # WRONGARGS
+}
+
+if [ $# -ne 2 ]; then
+  usage
+fi
+
+# if local node we don't need credentials
+if [ $NODE != "/Local/Default" ]; then
+  OD_ADMIN="diradmin"
+  OD_PASS="admin"
+fi
+
+echo "Creating users ${PREFIX}1 to ${PREFIX}$COUNT"
+
+# check to see if od_test_group exist. if not, create one
+#
+result=`dscl $NODE -list Groups/${GROUP_NAME}1 2> /dev/null`
+if [ $? -ne 0 ]; then
+  echo "Group \"${GROUP_NAME}\" does not exist. Creating ${GROUP_NAME}"
+  if [ -n "$OD_ADMIN" ]; then
+    dseditgroup -q -o create -n $NODE -u $OD_ADMIN -P $OD_PASS -i ${GROUP_ID} ${GROUP_NAME}1
+    dseditgroup -q -o create -n $NODE -u $OD_ADMIN -P $OD_PASS -i ${GROUP_ID2} ${GROUP_NAME}2
+  else
+    dseditgroup -q -o create -n $NODE -i ${GROUP_ID} ${GROUP_NAME}1
+    dseditgroup -q -o create -n $NODE -i ${GROUP_ID2} ${GROUP_NAME}2
+  fi
+fi
+
+if [ $? -ne 0 ]; then
+	echo "Failed to create test_group"
+	exit 1
+fi
+
+# using dsimport is faster than using dscl
+i=1
+uid=$UID_BASE
+echo "Writing a temporary import file ..."
+while [ $i -le $COUNT ]
+do
+  result=`dscl $NODE -list Users/${PREFIX}${i} 2> /dev/null`
+  if [ $? -ne 0 ]; then 
+    # Uses standard template
+	# RecordName:Password:UniqueID:PrimaryGroupID:DistinguishedName:NFSHomeDirectory:UserShell
+	echo "${PREFIX}${i}:test:${uid}:1211:${PREFIX}${i}:/Users/${PREFIX}${i}:/bin/bash" >> $TMPF
+    printf "\r${PREFIX}${i} / ${COUNT}"
+  else
+    echo "account $PREFIX$i already exist. skipping"
+  fi
+  i=`expr $i + 1` 
+  uid=`expr $uid + 1` 
+done
+echo 
+
+# Now do the real work
+#
+if [[ -f $TMPF ]]; then
+  echo "Running dsimport to create users. Please be patient. This takes a while ..."
+  # assume if admin is provided that slapconfig exists
+  if [ -n "$OD_ADMIN" ]; then
+    if [[ -x "/usr/sbin/slapconfig" ]]; then
+      /usr/sbin/slapconfig -setfullsyncmode no
+      sleep 2
+    fi
+    /usr/bin/time dsimport $TMPF $NODE I --username $OD_ADMIN --password $OD_PASS --template StandardUser
+    sleep 2
+    if [[ -x "/usr/sbin/slapconfig" ]]; then
+      /usr/sbin/slapconfig -setfullsyncmode yes
+    fi
+  else
+    /usr/bin/time dsimport $TMPF $NODE I --template StandardUser
+    sleep 2
+  fi
+  
+  # and now delete the temp file
+  #
+  rm -f $TMPF
+else
+  echo "Nothing done. All users already exist"
+fi 
+
+echo Create a SACL group for libMicro
+# Create a sample SACL group
+dseditgroup -q -o create -r "libMicro ACL" com.apple.access_libMicro
+i=1
+while [ $i -le $COUNT ]; do
+	dseditgroup -q -o edit -a ${PREFIX}${i} -t user com.apple.access_libMicro 
+	i=`expr $i + 1` 
+done
+
+echo 'Finished'
+
diff --git a/tools/tests/libMicro/od_account_delete.sh b/tools/tests/libMicro/od_account_delete.sh
new file mode 100644
index 000000000..00ea4e251
--- /dev/null
+++ b/tools/tests/libMicro/od_account_delete.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+function sighandler {
+  echo 
+  echo "Interrupting account creation"
+  rm -f $TMPF
+  exit 2
+}
+
+trap sighandler INT TERM
+
+# Fixed parameters
+#
+NAME=`basename $0`
+COUNT=$1
+PREFIX="od_test_"
+GROUP_NAME='od_test_group'
+TMPF=/tmp/.${NAME}.$$
+NODE=$2
+
+usage () {
+  echo
+  echo "Usage: ${NAME} count nodename"
+  echo 
+  echo "   ie. ${NAME} 1000 /Local/Default"
+  echo
+  echo "       will delete ${GROUPNAME} and 1000 users "
+  echo "       from '${PREFIX}1' to '${PREFIX}1000'"
+  echo
+  echo "This tool assumes user 'diradmin' with password 'admin' for OD admin"
+  echo "when talking to anything other than /Local/Default"
+  exit 85 # WRONGARGS
+}
+
+if [ $# -ne 2 ]; then
+  usage
+fi
+
+# if local node we don't need credentials
+if [ $NODE != "/Local/Default" ]; then
+  OD_ADMIN="diradmin"
+  OD_PASS="admin"
+fi
+
+echo "Deleting users ${PREFIX}1 to ${PREFIX}$COUNT"
+
+# Using a script file and feed it into dscl is much faster than
+# calling dscl everytime.
+# 
+i=1
+echo "Writing a temporary script ..."
+if [ -n "$OD_ADMIN" ]; then
+  echo "auth $OD_ADMIN $OD_PASS" >> $TMPF
+fi
+
+while [ $i -le $COUNT ]
+do
+  result=`dscl $NODE -list Users/${PREFIX}${i} 2> /dev/null`
+  if [ $? -eq 0 ]; then
+    echo "delete Users/${PREFIX}${i}" >> $TMPF
+    printf "\r${PREFIX}${i} / ${COUNT}"
+  fi
+  i=`expr $i + 1` 
+done
+echo 
+
+echo "Deleting temporary test groups"
+if [ -n "$OD_ADMIN" ]; then
+  result=`dseditgroup -q -o delete -n $NODE -u $OD_ADMIN -P $OD_PASS ${GROUP_NAME}1 2>&1 /dev/null`
+  result=`dseditgroup -q -o delete -n $NODE -u $OD_ADMIN -P $OD_PASS ${GROUP_NAME}2 2>&1 /dev/null`
+else
+  result=`dseditgroup -q -o delete -n $NODE ${GROUP_NAME}1 2>&1 /dev/null`
+  result=`dseditgroup -q -o delete -n $NODE ${GROUP_NAME}2 2>&1 /dev/null`
+fi
+
+result=`dseditgroup -q -o delete com.apple.access_libMicro 2>&1 /dev/null`
+
+# Now do the real work
+#
+if [[ -f $TMPF ]]; then
+  echo "Running dscl to delete users. Please be patient. This takes a while ..."
+  if [[ -x /usr/sbin/slapconfig ]]; then
+    /usr/sbin/slapconfig -setfullsyncmode no
+  fi
+
+  /usr/bin/time dscl ${NODE} < $TMPF
+
+  if [[ -x /usr/sbin/slapconfig ]]; then
+    /usr/sbin/slapconfig -setfullsyncmode yes
+  fi
+fi
+
+# and now delete the temp file
+#
+rm -f $TMPF
+
+echo 'Finished'
+
diff --git a/tools/tests/libMicro/siglongjmp.c b/tools/tests/libMicro/siglongjmp.c
index b4dfd160e..385530eab 100644
--- a/tools/tests/libMicro/siglongjmp.c
+++ b/tools/tests/libMicro/siglongjmp.c
@@ -56,7 +56,11 @@ benchmark(void *tsd, result_t *res)
 {
 	tsd_t			*ts = (tsd_t *)tsd;
 
+#if 1 /* Apple fix to longjmp/siglongjmp tests, see radar 7440118 */
+	volatile int i = 0;
+#else 
 	int i = 0;
+#endif /* end of Apple fix */
 
 	(void) sigsetjmp(ts->ts_env, 1);
 
diff --git a/tools/tests/superpages/testsp.c b/tools/tests/superpages/testsp.c
index 97fe85c96..33b637a03 100644
--- a/tools/tests/superpages/testsp.c
+++ b/tools/tests/superpages/testsp.c
@@ -1,3 +1,10 @@
+/*
+ * This tests the Mac OS X Superpage API introduced in 10.7
+ *
+ * Note that most of these calls go through the mach_vm_allocate() interface,
+ * but the actually supported and documented interface is the mmap() one
+ * (see mmap(2)).
+ */
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
@@ -12,8 +19,6 @@
 #define SUPERPAGE_SIZE (2*1024*1024)
 #define SUPERPAGE_MASK (-SUPERPAGE_SIZE)
 
-#define MAP_SUPERPAGE		0x2000
-
 #ifdef __LP64__
 #define FIXED_ADDRESS1 (0x100000000ULL+500*1024*1024) /* at 4 GB + 500 MB virtual */
 #define FIXED_ADDRESS2 (0x100000000ULL+502*1024*1024 + 4*1024) /* at 4 GB + 502 MB + 4 KB virtual */
@@ -100,7 +105,6 @@ boolean_t
 check_nr(mach_vm_address_t addr, mach_vm_size_t size, int *res) {
 	int i;
 	boolean_t ret;
-//printf("%d\n", __LINE__);	
 	for (i=0; i<size/PAGE_SIZE; i++) {
 		if ((ret = check_r(addr+i*PAGE_SIZE, PAGE_SIZE, res))) {
 			sprintf(error, "page still readable");
@@ -146,7 +150,6 @@ check_rw(mach_vm_address_t addr, mach_vm_size_t size) {
 	int res;
 	if (!(ret = check_w(addr, size))) return ret;
 	if (!(ret = check_r(addr, size, &res))) return ret;
-//	printf("res = %x\n", res);
 	if ((size==SUPERPAGE_SIZE) && (res!=0xfff00000)) {
 		sprintf(error, "checksum error");
 		return FALSE;
@@ -158,6 +161,13 @@ check_rw(mach_vm_address_t addr, mach_vm_size_t size) {
 mach_vm_address_t global_addr = 0;
 mach_vm_size_t	global_size = 0;
 
+/*
+ * If we allocate a 2 MB superpage read-write without specifying an address,
+ * - the call should succeed
+ * - not return 0
+ * - return a 2 MB aligned address
+ * - the memory should be readable and writable
+ */
 boolean_t
 test_allocate() {
 	int kr, ret;
@@ -166,8 +176,6 @@ test_allocate() {
 	global_size = SUPERPAGE_SIZE;
 	
 	kr = mach_vm_allocate(mach_task_self(), &global_addr, global_size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
-	//printf("%llx", addr);
-	//printf("\n%d\n", __LINE__);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
 	if (!(ret = check_addr0(global_addr, "mach_vm_allocate"))) return ret;
 	if (!(ret = check_align(global_addr))) return ret;
@@ -176,9 +184,13 @@ test_allocate() {
 	return TRUE;
 }
 
+/*
+ * If we deallocate a superpage,
+ * - the call should succeed
+ * - make the memory inaccessible
+ */
 boolean_t
 test_deallocate() {
-	mach_vm_address_t addr = 0;
 	mach_vm_size_t	size = SUPERPAGE_SIZE;
 	int kr, ret;
 
@@ -188,10 +200,45 @@ test_deallocate() {
 	}
 	kr = mach_vm_deallocate(mach_task_self(), global_addr, global_size);
 	if (!(ret = check_kr(kr, "mach_vm_deallocate"))) return ret;
+	if (!(ret = check_nr(global_addr, size, NULL))) return ret;
 	return TRUE;
 }
 
+/*
+ * If we allocate a superpage of any size read-write without specifying an address
+ * - the call should succeed
+ * - not return 0
+ * - the memory should be readable and writable
+ * If we deallocate it,
+ * - the call should succeed
+ * - make the memory inaccessible
+ */
+boolean_t
+test_allocate_size_any() {
+	int kr;
+	int ret;
+	mach_vm_address_t addr = 0;
+	mach_vm_size_t	size = 2*PAGE_SIZE; /* will be rounded up to some superpage size */
+
+	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_ANY);
+	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
+	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
+	if (!(ret = check_rw(addr, size))) return ret;
+	kr = mach_vm_deallocate(mach_task_self(), addr, size);
+	if (!(ret = check_kr(kr, "mach_vm_deallocate"))) return ret;
+	if (!(ret = check_nr(addr, size, NULL))) return ret;
+	return TRUE;
+}
 
+/*
+ * If we allocate a 2 MB superpage read-write at a 2 MB aligned address,
+ * - the call should succeed
+ * - return the address we wished for
+ * - the memory should be readable and writable
+ * If we deallocate it,
+ * - the call should succeed
+ * - make the memory inaccessible
+ */
 boolean_t
 test_allocatefixed() {
 	int kr;
@@ -201,15 +248,18 @@ test_allocatefixed() {
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
 	if (!(ret = check_addr(addr, FIXED_ADDRESS1, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
 	if (!(ret = check_rw(addr, size))) return ret;
 	kr = mach_vm_deallocate(mach_task_self(), addr, size);
 	if (!(ret = check_kr(kr, "mach_vm_deallocate"))) return ret;
+	if (!(ret = check_nr(addr, size, NULL))) return ret;
 	return TRUE;
 }
 
+/*
+ * If we allocate a 2 MB superpage read-write at an unaligned address,
+ * - the call should fail
+ */
 boolean_t
 test_allocateunalignedfixed() {
 	int kr;
@@ -218,23 +268,39 @@ test_allocateunalignedfixed() {
 	mach_vm_size_t	size = SUPERPAGE_SIZE;
 	
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_SUPERPAGE_SIZE_2MB);
-#if 0
-	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr(addr, FIXED_ADDRESS2 & SUPERPAGE_MASK, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
-	if (!(ret = check_rw(addr, size))) return ret;
-	kr = mach_vm_deallocate(mach_task_self(), addr & SUPERPAGE_MASK, size);
-	if (!(ret = check_kr(kr, "mach_vm_deallocate"))) return ret;
-#else /* is supposed to fail */
+	/* is supposed to fail */
+	if ((ret = check_kr(kr, "mach_vm_allocate"))) {
+		sprintf(error, "mach_vm_allocate() should have failed");
+		return FALSE;
+	}
+	return TRUE;
+}
+
+/*
+ * If we allocate an amount of memory not divisible by 2 MB as a 2 MB superpage
+ * - the call should fail
+ */
+boolean_t
+test_allocateoddsize() {
+	int kr;
+	int ret;
+	mach_vm_address_t addr = FIXED_ADDRESS1;
+	mach_vm_size_t	size = PAGE_SIZE; /* != 2 MB */
+
+	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_SUPERPAGE_SIZE_2MB);
+	/* is supposed to fail */
 	if ((ret = check_kr(kr, "mach_vm_allocate"))) {
 		sprintf(error, "mach_vm_allocate() should have failed");
 		return FALSE;
 	}
-#endif
 	return TRUE;
 }
 
+/*
+ * If we deallocate a sub-page of a superpage,
+ * - the call should succeed
+ * - make the complete memory inaccessible
+ */
 boolean_t
 test_deallocatesubpage() {
 	int kr;
@@ -244,15 +310,16 @@ test_deallocatesubpage() {
 	
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
-	if (!(ret = check_rw(addr, size))) return ret;
 	kr = mach_vm_deallocate(mach_task_self(), addr + PAGE_SIZE, size);
 	if (!(ret = check_kr(kr, "mach_vm_deallocate"))) return ret;
 	if (!(ret = check_nr(addr, size, NULL))) return ret;
 	return TRUE;
 }
 
+/*
+ * If we try to allocate memory occupied by superpages as normal pages
+ * - the call should fail
+ */
 boolean_t
 test_reallocate() {
 	mach_vm_address_t addr = 0, addr2;
@@ -262,9 +329,6 @@ test_reallocate() {
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
-	if (!(ret = check_rw(addr, size))) return ret;
 
 	/* attempt to allocate every sub-page of superpage */
 	for (i=0; i<SUPERPAGE_SIZE/PAGE_SIZE; i++) {
@@ -282,6 +346,11 @@ test_reallocate() {
 	return TRUE;
 }
 
+/*
+ * If we try to wire superpages
+ * - the call should succeed
+ * - the memory should remain readable and writable
+ */
 boolean_t
 test_wire() {
 	int kr;
@@ -290,18 +359,12 @@ test_wire() {
 	mach_vm_size_t	size = SUPERPAGE_SIZE;
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
-
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
-	if (!(ret = check_rw(addr, size))) return ret;
 
 	kr = mach_vm_wire(mach_host_self(), mach_task_self(), addr, size, VM_PROT_WRITE | VM_PROT_READ);
 
-	if (kr && geteuid()) /* may fail as user */
-		return TRUE;
-
-	if (!(ret = check_kr(kr, "mach_vm_wire"))) return ret;
+	if (!geteuid()) /* may fail as user */
+		if (!(ret = check_kr(kr, "mach_vm_wire"))) return ret;
 
 	if (!(ret = check_rw(addr, size))) return ret;
 
@@ -311,6 +374,12 @@ test_wire() {
 	return TRUE;
 }
 
+/*
+ * If we try to wire superpages
+ * - the call should fail
+ * - the memory should remain readable and writable
+ * Currently, superpages are always wired.
+ */
 boolean_t
 test_unwire() {
 	int kr;
@@ -320,8 +389,6 @@ test_unwire() {
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
 
 	kr = mach_vm_wire(mach_host_self(), mach_task_self(), addr, size, VM_PROT_NONE);
 	if ((ret = check_kr(kr, "mach_vm_wire"))) {
@@ -337,6 +404,12 @@ test_unwire() {
 	return TRUE;
 }
 
+/*
+ * If we try to write-protect superpages
+ * - the call should succeed
+ * - the memory should remain readable
+ * - the memory should not be writable
+ */
 boolean_t
 test_readonly() {
 	int kr;
@@ -346,8 +419,6 @@ test_readonly() {
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
 
 	mach_vm_protect(mach_task_self(), addr, size, 0, VM_PROT_READ);
 	if (!(ret = check_kr(kr, "mach_vm_protect"))) return ret;
@@ -361,24 +432,23 @@ test_readonly() {
 	return TRUE;
 }
 
+/*
+ * If we try to write-protect a sub-page of a superpage
+ * - the call should succeed
+ * - the complete memory should remain readable
+ * - the complete memory should not be writable
+ */
 boolean_t
 test_readonlysubpage() {
 	int kr;
 	int ret;
 	mach_vm_address_t addr = 0;
-//	mach_vm_size_t	size = SUPERPAGE_SIZE;
 	mach_vm_size_t	size = SUPERPAGE_SIZE;
 
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
 
-	/* changing protection on a single sub-page has to change protection for the whole superpage */
-	/* write protect second page from start */
-//printf("+mach_vm_protect\n");
 	mach_vm_protect(mach_task_self(), addr+PAGE_SIZE, PAGE_SIZE, 0, VM_PROT_READ);
-//printf("-mach_vm_protect\n");
 	if (!(ret = check_kr(kr, "mach_vm_protect"))) return ret;
 
 	if (!(ret = check_r(addr, size, NULL))) return ret;
@@ -390,6 +460,11 @@ test_readonlysubpage() {
 	return TRUE;
 }
 
+/*
+ * If we fork with active superpages
+ * - the parent should still be able to access the superpages
+ * - the child should not be able to access the superpages
+ */
 boolean_t
 test_fork() {
 	mach_vm_address_t addr = 0;
@@ -399,20 +474,16 @@ test_fork() {
 	
 	kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | VM_FLAGS_SUPERPAGE_SIZE_2MB);
 	if (!(ret = check_kr(kr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
-	if (!(ret = check_align(addr))) return ret;
-	if (!(ret = check_rw(addr, size))) return ret;
 
 	fflush(stdout);
-	if ((pid=fork())) {
+	if ((pid=fork())) { /* parent */
 		if (!(ret = check_rw(addr, size))) return ret;
 		waitpid(pid, &ret, 0);
 		if (!ret) {
 			sprintf(error, "child could access superpage");
 			return ret;
 		}
-	} else {
-		/* for the child, the superpage should not be mapped */
+	} else { /* child */
 		if (!(ret = check_nr(addr, size, NULL))) exit(ret);
 		exit(TRUE);
 	}
@@ -422,6 +493,11 @@ test_fork() {
 	return TRUE;
 }
 
+/*
+ * Doing file I/O with superpages
+ * - should succeed
+ * - should behave the same as with base pages (i.e. no bad data)
+ */
 #define FILENAME "/mach_kernel"
 boolean_t
 test_fileio() {
@@ -471,35 +547,34 @@ test_fileio() {
 	return TRUE;
 }
 
-#ifdef MMAP
 /*
- * this tests several things at once:
- * - we pass a non-superpage-aligned address and expect it to be rounded up
- * - we pass a size < SUPERPAGE_SIZE and expect SUPERPAGE_SIZE bytes to be mapped
- * - we set the address range to read-only and make sure it's readable, but not writable
+ * The mmap() interface should work just as well!
  */
 boolean_t
 test_mmap() {
 	int kr, ret;
-	void *addr = (void*)(1*1024*1024*1024 + 4096); /* 1 GB + base page (i.e. not superpage-aligned) */
-	int size = 4096;
+	uintptr_t addr = 0;
+	int size = SUPERPAGE_SIZE;
 	
-	addr = mmap(addr, size, PROT_READ, MAP_ANON | MAP_PRIVATE | MAP_SUPERPAGE, -1, 0);
-	if (addr == MAP_FAILED) {
+	addr = (uintptr_t)mmap((void*)addr, size, PROT_READ, MAP_ANON | MAP_PRIVATE, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
+	if (addr == (uintptr_t)MAP_FAILED) {
 		sprintf(error, "mmap()");
 		return FALSE;
 	}
-	if (!(ret = check_align((uintptr_t)addr))) return ret;
-	if (!(ret = check_r((uintptr_t)addr, SUPERPAGE_SIZE, NULL))) return ret;
-	if (!(ret = check_nw((uintptr_t)addr, SUPERPAGE_SIZE))) return ret;
-
-	kr = munmap(addr, size);
-	if (!(ret = check_kr(kr, "mach_vm_deallocate (2)"))) return ret;
+	if (!(ret = check_addr0(addr, "mach_vm_allocate"))) return ret;
+	if (!(ret = check_align(addr))) return ret;
+	if (!(ret = check_r(addr, SUPERPAGE_SIZE, NULL))) return ret;
+	if (!(ret = check_nw(addr, SUPERPAGE_SIZE))) return ret;
+	kr = munmap((void*)addr, size);
+	if (!(ret = check_kr(kr, "munmap"))) return ret;
+	if (!(ret = check_nr(addr, size, NULL))) return ret;
 
 	return TRUE;
 }
-#endif
 
+/*
+ * Tests one allocation/deallocaton cycle; used in a loop this tests for leaks
+ */
 boolean_t
 test_alloc_dealloc() {
 	mach_vm_address_t addr = 0;
@@ -519,6 +594,7 @@ test_alloc_dealloc() {
 test_t test[] = {
 	{ "allocate one page anywhere", test_allocate },
 	{ "deallocate a page", test_deallocate },
+	{ "allocate a SIZE_ANY page anywhere", test_allocate_size_any },
 	{ "allocate one page at a fixed address", test_allocatefixed },
 	{ "allocate one page at an unaligned fixed address", test_allocateunalignedfixed },
 	{ "deallocate sub-page", test_deallocatesubpage },
@@ -528,9 +604,7 @@ test_t test[] = {
 	{ "make page readonly", test_readonly },
 	{ "make sub-page readonly", test_readonlysubpage },
 	{ "file I/O", test_fileio },
-#ifdef MMAP
 	{ "mmap()", test_mmap },
-#endif
 	{ "fork", test_fork },
 };
 #define TESTS ((int)(sizeof(test)/sizeof(*test)))
diff --git a/tools/tests/testkext/testkext.xcodeproj/project.pbxproj b/tools/tests/testkext/testkext.xcodeproj/project.pbxproj
index be0c891c3..a1fab81db 100644
--- a/tools/tests/testkext/testkext.xcodeproj/project.pbxproj
+++ b/tools/tests/testkext/testkext.xcodeproj/project.pbxproj
@@ -8,6 +8,7 @@
 
 /* Begin PBXBuildFile section */
 		C68D22B30EB2441400C3A06C /* testvmx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C68D22B20EB2441400C3A06C /* testvmx.cpp */; };
+		C6CBD9CE1225B9FF00F317B5 /* testthreadcall.cpp in Sources */ = {isa = PBXBuildFile; fileRef = C6CBD9CD1225B9FF00F317B5 /* testthreadcall.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
@@ -16,6 +17,10 @@
 		C68D22A90EB243BC00C3A06C /* testvmx-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "testvmx-Info.plist"; sourceTree = "<group>"; };
 		C68D22B10EB2441400C3A06C /* testvmx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = testvmx.h; sourceTree = "<group>"; };
 		C68D22B20EB2441400C3A06C /* testvmx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testvmx.cpp; sourceTree = "<group>"; };
+		C6CBD9C31225B98F00F317B5 /* testthreadcall.kext */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = testthreadcall.kext; sourceTree = BUILT_PRODUCTS_DIR; };
+		C6CBD9C41225B98F00F317B5 /* testthreadcall-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "testthreadcall-Info.plist"; sourceTree = "<group>"; };
+		C6CBD9CC1225B9FF00F317B5 /* testthreadcall.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = testthreadcall.h; sourceTree = "<group>"; };
+		C6CBD9CD1225B9FF00F317B5 /* testthreadcall.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testthreadcall.cpp; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -26,6 +31,13 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		C6CBD9C11225B98F00F317B5 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXFrameworksBuildPhase section */
 
 /* Begin PBXGroup section */
@@ -43,6 +55,7 @@
 			isa = PBXGroup;
 			children = (
 				C68D22A80EB243BC00C3A06C /* testvmx.kext */,
+				C6CBD9C31225B98F00F317B5 /* testthreadcall.kext */,
 			);
 			name = Products;
 			sourceTree = "<group>";
@@ -50,9 +63,12 @@
 		247142CAFF3F8F9811CA285C /* Source */ = {
 			isa = PBXGroup;
 			children = (
+				C6CBD9CC1225B9FF00F317B5 /* testthreadcall.h */,
+				C6CBD9CD1225B9FF00F317B5 /* testthreadcall.cpp */,
 				C68D22B10EB2441400C3A06C /* testvmx.h */,
 				C68D22B20EB2441400C3A06C /* testvmx.cpp */,
 				C68D22A90EB243BC00C3A06C /* testvmx-Info.plist */,
+				C6CBD9C41225B98F00F317B5 /* testthreadcall-Info.plist */,
 			);
 			name = Source;
 			sourceTree = "<group>";
@@ -77,6 +93,23 @@
 			productReference = C68D22A80EB243BC00C3A06C /* testvmx.kext */;
 			productType = "com.apple.product-type.kernel-extension.iokit";
 		};
+		C6CBD9C21225B98F00F317B5 /* testthreadcall */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = C6CBD9C71225B98F00F317B5 /* Build configuration list for PBXNativeTarget "testthreadcall" */;
+			buildPhases = (
+				C6CBD9BF1225B98F00F317B5 /* Sources */,
+				C6CBD9C01225B98F00F317B5 /* Resources */,
+				C6CBD9C11225B98F00F317B5 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = testthreadcall;
+			productName = testthreadcall;
+			productReference = C6CBD9C31225B98F00F317B5 /* testthreadcall.kext */;
+			productType = "com.apple.product-type.kernel-extension.iokit";
+		};
 /* End PBXNativeTarget section */
 
 /* Begin PBXProject section */
@@ -84,12 +117,20 @@
 			isa = PBXProject;
 			buildConfigurationList = 1DEB91DD08733DB10010E9CD /* Build configuration list for PBXProject "testkext" */;
 			compatibilityVersion = "Xcode 3.1";
+			developmentRegion = English;
 			hasScannedForEncodings = 1;
+			knownRegions = (
+				English,
+				Japanese,
+				French,
+				German,
+			);
 			mainGroup = 089C166AFE841209C02AAC07 /* testkext */;
 			projectDirPath = "";
 			projectRoot = "";
 			targets = (
 				C68D22A70EB243BC00C3A06C /* testvmx */,
+				C6CBD9C21225B98F00F317B5 /* testthreadcall */,
 			);
 		};
 /* End PBXProject section */
@@ -102,6 +143,13 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		C6CBD9C01225B98F00F317B5 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXResourcesBuildPhase section */
 
 /* Begin PBXSourcesBuildPhase section */
@@ -113,6 +161,14 @@
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
+		C6CBD9BF1225B98F00F317B5 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				C6CBD9CE1225B9FF00F317B5 /* testthreadcall.cpp in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
 /* End PBXSourcesBuildPhase section */
 
 /* Begin XCBuildConfiguration section */
@@ -124,6 +180,7 @@
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
+				ONLY_ACTIVE_ARCH = YES;
 				PREBINDING = NO;
 			};
 			name = Debug;
@@ -180,6 +237,45 @@
 			};
 			name = Release;
 		};
+		C6CBD9C51225B98F00F317B5 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = NO;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INFOPLIST_FILE = "testthreadcall-Info.plist";
+				INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+				MODULE_NAME = com.yourcompany.driver.testthreadcall;
+				MODULE_VERSION = 1.0.0d1;
+				PREBINDING = NO;
+				PRODUCT_NAME = testthreadcall;
+				WRAPPER_EXTENSION = kext;
+			};
+			name = Debug;
+		};
+		C6CBD9C61225B98F00F317B5 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_ENABLE_FIX_AND_CONTINUE = NO;
+				GCC_MODEL_TUNING = G5;
+				INFOPLIST_FILE = "testthreadcall-Info.plist";
+				INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+				MODULE_NAME = com.yourcompany.driver.testthreadcall;
+				MODULE_VERSION = 1.0.0d1;
+				PREBINDING = NO;
+				PRODUCT_NAME = testthreadcall;
+				WRAPPER_EXTENSION = kext;
+				ZERO_LINK = NO;
+			};
+			name = Release;
+		};
 /* End XCBuildConfiguration section */
 
 /* Begin XCConfigurationList section */
@@ -201,6 +297,15 @@
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
+		C6CBD9C71225B98F00F317B5 /* Build configuration list for PBXNativeTarget "testthreadcall" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				C6CBD9C51225B98F00F317B5 /* Debug */,
+				C6CBD9C61225B98F00F317B5 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 089C1669FE841209C02AAC07 /* Project object */;
diff --git a/tools/tests/testkext/testthreadcall-Info.plist b/tools/tests/testkext/testthreadcall-Info.plist
new file mode 100644
index 000000000..e33642e9b
--- /dev/null
+++ b/tools/tests/testkext/testthreadcall-Info.plist
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIdentifier</key>
+	<string>com.yourcompany.driver.${PRODUCT_NAME:rfc1034identifier}</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>KEXT</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>IOKitPersonalities</key>
+	<dict>
+                <key>testthreadcall</key>
+                <dict>
+                        <key>CFBundleIdentifier</key>
+                        <string>com.yourcompany.driver.${PRODUCT_NAME:identifier}</string>
+                        <key>IOClass</key>
+                        <string>testthreadcall</string>
+                        <key>IOMatchCategory</key>
+                        <string>testthreadcall</string>
+                        <key>IOProviderClass</key>
+                        <string>IOResources</string>
+                        <key>IOResourceMatch</key>
+                        <string>IOKit</string>
+                </dict>
+	</dict>
+	<key>OSBundleLibraries</key>
+	<dict>
+		<key>com.apple.kpi.iokit</key>
+		<string>11.0</string>
+		<key>com.apple.kpi.libkern</key>
+		<string>11.0</string>
+		<key>com.apple.kpi.mach</key>
+		<string>11.0</string>
+	</dict>
+</dict>
+</plist>
diff --git a/tools/tests/testkext/testthreadcall.cpp b/tools/tests/testkext/testthreadcall.cpp
new file mode 100644
index 000000000..b879b71b2
--- /dev/null
+++ b/tools/tests/testkext/testthreadcall.cpp
@@ -0,0 +1,65 @@
+/*
+ *  testthreadcall.cpp
+ *  testkext
+ *
+ */
+
+#define ABSOLUTETIME_SCALAR_TYPE
+
+#include "testthreadcall.h"
+
+#include <kern/thread_call.h>
+
+#define super IOService
+OSDefineMetaClassAndStructors(testthreadcall, super);
+
+extern "C" {
+
+static void thread_call_test_func(thread_call_param_t param0,
+								  thread_call_param_t param1);
+
+}
+
+bool
+testthreadcall::start( IOService * provider )
+{
+	boolean_t ret;
+	uint64_t deadline;
+    
+    IOLog("%s\n", __PRETTY_FUNCTION__);
+    
+    if (!super::start(provider)) {
+        return false;
+    }
+    
+    IOLog("Attempting thread_call_allocate\n");
+	tcall = thread_call_allocate(thread_call_test_func, this);
+    IOLog("thread_call_t %p\n", tcall);
+    
+	tlock = IOSimpleLockAlloc();
+	IOLog("tlock %p\n", tlock);
+	
+	clock_interval_to_deadline(5, NSEC_PER_SEC, &deadline);
+	IOLog("%d sec deadline is %llu\n", 5, deadline);
+	
+	ret = thread_call_enter_delayed(tcall, deadline);
+	
+    return true;
+}
+
+static void thread_call_test_func(thread_call_param_t param0,
+								  thread_call_param_t param1)
+{
+	testthreadcall *self = (testthreadcall *)param0;
+	
+	IOLog("thread_call_test_func %p %p\n", param0, param1);
+	
+	IOSimpleLockLock(self->tlock);
+	IOSimpleLockUnlock(self->tlock);
+
+#if 1
+	IOSimpleLockLock(self->tlock);
+#else
+	IOSimpleLockUnlock(self->tlock);	
+#endif
+}
diff --git a/tools/tests/testkext/testthreadcall.h b/tools/tests/testkext/testthreadcall.h
new file mode 100644
index 000000000..2b8973825
--- /dev/null
+++ b/tools/tests/testkext/testthreadcall.h
@@ -0,0 +1,18 @@
+/*
+ *  testthreadcall.h
+ *  testkext
+ *
+ */
+
+#include <IOKit/IOService.h>
+#include <IOKit/IOLib.h>
+
+class testthreadcall : public IOService {
+    OSDeclareDefaultStructors(testthreadcall);
+    
+    virtual bool start( IOService * provider );
+    
+public:
+	thread_call_t tcall;
+	IOSimpleLock *tlock;
+};
\ No newline at end of file
diff --git a/tools/tests/testkext/testvmx.cpp b/tools/tests/testkext/testvmx.cpp
index eaa93d3ee..4ca0e1fab 100644
--- a/tools/tests/testkext/testvmx.cpp
+++ b/tools/tests/testkext/testvmx.cpp
@@ -2,9 +2,6 @@
  *  testvmx.cpp
  *  testkext
  *
- *  Created by Shantonu Sen on 10/24/08.
- *  Copyright 2008 Apple Computer, Inc.. All rights reserved.
- *
  */
 
 #include "testvmx.h"
diff --git a/tools/tests/testkext/testvmx.h b/tools/tests/testkext/testvmx.h
index 9da7ca9d5..f8937b248 100644
--- a/tools/tests/testkext/testvmx.h
+++ b/tools/tests/testkext/testvmx.h
@@ -2,9 +2,6 @@
  *  testvmx.h
  *  testkext
  *
- *  Created by Shantonu Sen on 10/24/08.
- *  Copyright 2008 Apple Computer, Inc.. All rights reserved.
- *
  */
 
 #include <IOKit/IOService.h>
diff --git a/tools/tests/xnu_quick_test/32bit_inode_tests.c b/tools/tests/xnu_quick_test/32bit_inode_tests.c
index e5effea56..b85bb8911 100644
--- a/tools/tests/xnu_quick_test/32bit_inode_tests.c
+++ b/tools/tests/xnu_quick_test/32bit_inode_tests.c
@@ -19,7 +19,6 @@
 
 extern char		g_target_path[ PATH_MAX ];
 extern int		g_skip_setuid_tests;
-extern int		g_is_under_rosetta;
 extern int		g_is_single_user;
 
 /*  **************************************************************************************************************
diff --git a/tools/tests/xnu_quick_test/README b/tools/tests/xnu_quick_test/README
index 3e15a5a91..3bf8a2d8f 100644
--- a/tools/tests/xnu_quick_test/README
+++ b/tools/tests/xnu_quick_test/README
@@ -28,22 +28,24 @@ is 32 or 64 bits.
 
 NOTE - we have several workarounds and test exceptions for some
 outstanding bugs in xnu.  All the workarounds are marked with "todo" and
-some comments noting the radar number of the offending bug.  Do a seach
+some comments noting the radar number of the offending bug.  Do a search
 for "todo" in the source files for this project to locate which tests have
 known failures.   And please tag any new exceptions you find with "todo"
 in the comment and the radar number of the bug.
 
-To build a fat binary, export ARCH="ppc ppc64 i386 x86_64". This will work
+To build a fat binary, export ARCH="i386 x86_64". This will work
 for any architectures that Apple gcc recognizes.
 
-Added three defines which you can use at the compile line to build variants.
+Added four defines which you can use at the compile line to build variants.
 DEBUG
        turn on additional printfs
 CONFORMANCE_TESTS_IN_XNU 
        when conformance tests are in xnu, set this to 1
 TEST_SYSTEM_CALLS
        test system calls (doesn't compile; a different bug)
-by default, all three are set to 0, i.e. disabled.  To build, export
+RUN_UNDER_TESTBOTS
+       when running under testbots, set this to 1		
+by default, all four are set to 0, i.e. disabled.  To build, export
 MORECFLAGS with the values you want set, e.g. 
        export MORECFLAGS="-D DEBUG=1 -D CONFORMANCE_TESTS_IN_XNU=1"
 
@@ -59,7 +61,8 @@ USAGE:  xnu_quick_test -target TARGET_PATH
          -l[ist]                         # list all the tests this tool performs   
          -r[un] 1, 3, 10 - 19            # run specific tests.  enter individual test numbers and/or range of numbers.  use -list to list tests.   
          -s[kip]                         # skip setuid tests   
-         -t[arget] TARGET_PATH           # path to directory where tool will create test files.  defaults to "/tmp/"   
+         -t[arget] TARGET_PATH           # path to directory where tool will create test files.  defaults to "/tmp/"  
+	 -x[ilog] 			 # To run the xnu_quick_test with xilog reporting enabled. 
 
 examples:  
 --- Place all test files and directories at the root of volume "test_vol" --- 
diff --git a/tools/tests/xnu_quick_test/atomic_fifo_queue_test.c b/tools/tests/xnu_quick_test/atomic_fifo_queue_test.c
new file mode 100644
index 000000000..06a0e809f
--- /dev/null
+++ b/tools/tests/xnu_quick_test/atomic_fifo_queue_test.c
@@ -0,0 +1,33 @@
+#if defined(i386) || defined(__x86_64__)
+
+#include <libkern/OSAtomic.h>
+#include <stdio.h>
+#include <string.h>
+#include <err.h>
+
+typedef struct {
+	void *next;
+	char *str;
+} QueueNode;
+
+int atomic_fifo_queue_test( void *the_argp ) {
+	OSFifoQueueHead head = OS_ATOMIC_FIFO_QUEUE_INIT;
+	char *str1 = "String 1", *str2 = "String 2";
+	QueueNode node1 = { 0, str1 };
+	OSAtomicFifoEnqueue(&head, &node1, 0);
+	QueueNode node2 = { 0, str2 };
+	OSAtomicFifoEnqueue(&head, &node2, 0);
+	QueueNode *node_ptr = OSAtomicFifoDequeue(&head, 0);
+	if( strcmp(node_ptr->str, str1) != 0 ) {
+		warnx("OSAtomicFifoDequeue returned incorrect string. Expected %s, got %s", str1, node_ptr->str);
+		return 1;
+	}
+	node_ptr = OSAtomicFifoDequeue(&head, 0);
+	if( strcmp(node_ptr->str, str2) != 0 ) {
+		warnx("OSAtomicFifoDequeue returned incorrect string. Expected %s, got %s", str2, node_ptr->str);
+		return 1;
+	}
+	return 0;
+}
+
+#endif
diff --git a/tools/tests/xnu_quick_test/commpage_tests.c b/tools/tests/xnu_quick_test/commpage_tests.c
new file mode 100644
index 000000000..792e78f00
--- /dev/null
+++ b/tools/tests/xnu_quick_test/commpage_tests.c
@@ -0,0 +1,361 @@
+/*
+ *  commpage_tests.c
+ *  xnu_quick_test
+ *
+ *  Copyright 2009 Apple Inc. All rights reserved.
+ *
+ */
+
+#include "tests.h"
+#include <unistd.h>
+#include <stdint.h>
+#include <err.h>
+#include <sys/param.h>
+#include <System/machine/cpu_capabilities.h>
+#include <mach/mach.h>
+#include <mach/mach_error.h>
+#include <mach/bootstrap.h>
+
+
+#ifdef _COMM_PAGE_ACTIVE_CPUS
+int active_cpu_test(void);
+#endif
+
+int get_sys_uint64(const char *sel, uint64_t *val);
+int get_sys_int32(const char *sel, int32_t *val);
+
+#define getcommptr(var, commpageaddr) do { \
+		var = (typeof(var))(uintptr_t)(commpageaddr); \
+	} while(0)
+
+/*
+ * Check some of the data in the commpage
+ * against manual sysctls
+ */
+int commpage_data_tests( void * the_argp )
+{
+	int ret;
+	uint64_t sys_u64;
+	int32_t sys_i32;
+
+	volatile uint64_t *comm_u64;
+	volatile uint32_t *comm_u32;
+	volatile uint16_t *comm_u16;
+	volatile uint8_t *comm_u8;
+
+
+	/* _COMM_PAGE_CPU_CAPABILITIES */
+	getcommptr(comm_u32, _COMM_PAGE_CPU_CAPABILITIES);
+
+	ret = get_sys_int32("hw.ncpu", &sys_i32);
+	if (ret) goto fail;
+
+	if (sys_i32 != ((*comm_u32 & kNumCPUs) >> kNumCPUsShift)) {
+		warnx("kNumCPUs does not match hw.ncpu");
+		ret = -1;
+		goto fail;
+	}
+
+	getcommptr(comm_u8, _COMM_PAGE_NCPUS);
+	if (sys_i32 != (*comm_u8)) {
+		warnx("_COMM_PAGE_NCPUS does not match hw.ncpu");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.logicalcpu", &sys_i32);
+	if (ret) goto fail;
+
+	if (sys_i32 != ((*comm_u32 & kNumCPUs) >> kNumCPUsShift)) {
+		warnx("kNumCPUs does not match hw.logicalcpu");
+		ret = -1;
+		goto fail;
+	}
+
+	/* Intel only capabilities */
+#if defined(__i386__) || defined(__x86_64__)
+	ret = get_sys_int32("hw.optional.mmx", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasMMX)) {
+		warnx("kHasMMX does not match hw.optional.mmx");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.sse", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSSE)) {
+		warnx("kHasSSE does not match hw.optional.sse");
+		ret = -1;
+		goto fail;
+	}
+	ret = get_sys_int32("hw.optional.sse2", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSSE2)) {
+		warnx("kHasSSE2 does not match hw.optional.sse2");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.sse3", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSSE3)) {
+		warnx("kHasSSE3 does not match hw.optional.sse3");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.supplementalsse3", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSupplementalSSE3)) {
+		warnx("kHasSupplementalSSE3 does not match hw.optional.supplementalsse3");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.sse4_1", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSSE4_1)) {
+		warnx("kHasSSE4_1 does not match hw.optional.sse4_1");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.sse4_2", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasSSE4_2)) {
+		warnx("kHasSSE4_2 does not match hw.optional.sse4_2");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.aes", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & kHasAES)) {
+		warnx("kHasAES does not match hw.optional.aes");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = get_sys_int32("hw.optional.x86_64", &sys_i32);
+	if (ret) goto fail;
+
+	if (!(sys_i32) ^ !(*comm_u32 & k64Bit)) {
+		warnx("k64Bit does not match hw.optional.x86_64");
+		ret = -1;
+		goto fail;
+	}
+#endif /* __i386__ || __x86_64__ */
+	 
+	/* These fields are not implemented for all architectures */
+#ifdef _COMM_PAGE_SCHED_GEN
+	uint32_t preempt_count1, preempt_count2;
+	uint64_t count;
+
+	ret = get_sys_uint64("hw.cpufrequency_max", &sys_u64);
+	if (ret) goto fail;
+	
+    getcommptr(comm_u32, _COMM_PAGE_SCHED_GEN);
+	preempt_count1 = *comm_u32;
+	/* execute for around 1 quantum (10ms) */
+	for(count = MAX(10000000ULL, sys_u64/64); count > 0; count--) {
+		asm volatile("");
+	}
+	preempt_count2 = *comm_u32;
+	if (preempt_count1 >= preempt_count2) {
+		warnx("_COMM_PAGE_SCHED_GEN not incrementing (%u => %u)",
+			  preempt_count1, preempt_count2);
+		ret = -1;
+		goto fail;
+	}
+#endif /* _COMM_PAGE_SCHED_GEN */
+
+#ifdef _COMM_PAGE_ACTIVE_CPUS
+	ret = get_sys_int32("hw.activecpu", &sys_i32);
+	if (ret) goto fail;
+
+	getcommptr(comm_u8, _COMM_PAGE_ACTIVE_CPUS);
+	if (sys_i32 != (*comm_u8)) {
+		warnx("_COMM_PAGE_ACTIVE_CPUS does not match hw.activecpu");
+		ret = -1;
+		goto fail;
+	}
+
+	ret = active_cpu_test();
+	if (ret) goto fail;
+#endif /* _COMM_PAGE_ACTIVE_CPUS */
+
+#ifdef _COMM_PAGE_PHYSICAL_CPUS
+	ret = get_sys_int32("hw.physicalcpu_max", &sys_i32);
+	if (ret) goto fail;
+
+	getcommptr(comm_u8, _COMM_PAGE_PHYSICAL_CPUS);
+	if (sys_i32 != (*comm_u8)) {
+		warnx("_COMM_PAGE_PHYSICAL_CPUS does not match hw.physicalcpu_max");
+		ret = -1;
+		goto fail;
+	}
+#endif /* _COMM_PAGE_PHYSICAL_CPUS */
+
+#ifdef _COMM_PAGE_LOGICAL_CPUS
+	ret = get_sys_int32("hw.logicalcpu_max", &sys_i32);
+	if (ret) goto fail;
+
+	getcommptr(comm_u8, _COMM_PAGE_LOGICAL_CPUS);
+	if (sys_i32 != (*comm_u8)) {
+		warnx("_COMM_PAGE_LOGICAL_CPUS does not match hw.logicalcpu_max");
+		ret = -1;
+		goto fail;
+	}
+#endif /* _COMM_PAGE_LOGICAL_CPUS */
+
+#if 0
+#ifdef _COMM_PAGE_MEMORY_SIZE
+	ret = get_sys_uint64("hw.memsize", &sys_u64);
+	if (ret) goto fail;
+
+	getcommptr(comm_u64, _COMM_PAGE_MEMORY_SIZE);
+	if (sys_u64 != (*comm_u64)) {
+		warnx("_COMM_PAGE_MEMORY_SIZE does not match hw.memsize");
+		ret = -1;
+		goto fail;
+	}
+#endif /* _COMM_PAGE_MEMORY_SIZE */
+#endif
+
+	ret = 0;
+
+fail:
+	
+	return ret;
+}
+
+
+int get_sys_uint64(const char *sel, uint64_t *val)
+{
+	size_t size = sizeof(*val);
+	int ret;
+
+	ret = sysctlbyname(sel, val, &size, NULL, 0);
+	if (ret == -1) {
+		warn("sysctlbyname(%s)", sel);
+		return ret;
+	}
+
+//	warnx("sysctlbyname(%s) => %llx", sel, *val);
+
+	return 0;
+}
+
+int get_sys_int32(const char *sel, int32_t *val)
+{
+	size_t size = sizeof(*val);
+	int ret;
+
+	ret = sysctlbyname(sel, val, &size, NULL, 0);
+	if (ret == -1) {
+		warn("sysctlbyname(%s)", sel);
+		return ret;
+	}
+
+//	warnx("sysctlbyname(%s) => %x", sel, *val);
+
+	return 0;
+}
+
+#ifdef _COMM_PAGE_ACTIVE_CPUS
+/*
+ * Try to find a secondary processor that we can disable,
+ * and make sure the commpage reflects that. This test
+ * will pass on UP systems, and if all secondary processors
+ * have been manually disabled
+ */
+int active_cpu_test(void)
+{
+	volatile uint8_t *activeaddr;
+	uint8_t original_activecpu;
+	boolean_t test_failed = FALSE;
+
+	/* Code stolen from hostinfo.c */
+	kern_return_t           ret;
+	processor_t             *processor_list;                
+	host_name_port_t        host;
+	struct processor_basic_info     processor_basic_info;
+	int                     cpu_count;
+	int                     data_count;
+	int                     i;
+
+
+	getcommptr(activeaddr, _COMM_PAGE_ACTIVE_CPUS);
+	original_activecpu = *activeaddr;
+
+	host = mach_host_self();
+	ret = host_processors(host,
+						  (processor_array_t *) &processor_list, &cpu_count);
+	if (ret != KERN_SUCCESS) {
+		mach_error("host_processors()", ret);
+		return ret;
+	}
+
+	/* skip master processor */
+	for (i = 1; i < cpu_count; i++) {
+		data_count = PROCESSOR_BASIC_INFO_COUNT;
+		ret = processor_info(processor_list[i], PROCESSOR_BASIC_INFO,
+							 &host,
+							 (processor_info_t) &processor_basic_info,
+							 &data_count);
+		if (ret != KERN_SUCCESS) {
+			if (ret == MACH_SEND_INVALID_DEST) {
+				continue;
+			}
+			mach_error("processor_info", ret);
+			return ret;
+		}
+	
+		if (processor_basic_info.running) {
+			/* found victim */
+			ret = processor_exit(processor_list[i]);
+			if (ret != KERN_SUCCESS) {
+				mach_error("processor_exit()", ret);
+				return ret;
+			}
+
+			sleep(1);
+
+			if (*activeaddr != (original_activecpu - 1)) {
+				test_failed = TRUE;
+			}
+
+			ret = processor_start(processor_list[i]);
+			if (ret != KERN_SUCCESS) {
+				mach_error("processor_exit()", ret);
+				return ret;
+			}
+
+			sleep(1);
+
+			break;
+		}
+	}
+
+	if (test_failed) {
+		warnx("_COMM_PAGE_ACTIVE_CPUS not updated after disabling a CPU");
+		return -1;
+	}
+
+	if (*activeaddr != original_activecpu) {
+		warnx("_COMM_PAGE_ACTIVE_CPUS not restored to original value");
+		return -1;
+	}
+
+	return 0;
+}
+#endif
diff --git a/tools/tests/xnu_quick_test/helpers/arch.c b/tools/tests/xnu_quick_test/helpers/arch.c
index f526ed027..1e4f867a5 100644
--- a/tools/tests/xnu_quick_test/helpers/arch.c
+++ b/tools/tests/xnu_quick_test/helpers/arch.c
@@ -6,12 +6,6 @@
  */
 int main()
 {
-#if __ppc__
-	return CPU_TYPE_POWERPC;
-#endif /* __ppc__ */
-#if __ppc64__
-	return CPU_TYPE_POWERPC64;
-#endif /* __ppc64__ */
 #if __i386__
 	return CPU_TYPE_I386;
 #endif /* __i386__ */
diff --git a/tools/tests/xnu_quick_test/helpers/data_exec.c b/tools/tests/xnu_quick_test/helpers/data_exec.c
index f8b353c5e..8cd7c0316 100644
--- a/tools/tests/xnu_quick_test/helpers/data_exec.c
+++ b/tools/tests/xnu_quick_test/helpers/data_exec.c
@@ -14,10 +14,10 @@ jmp_buf resume;
 
 #define ALT_STK_SIZE	(MINSIGSTKSZ + pagesize)
 
-#if __i386__ || __ppc__
+#if __i386__
 typedef	unsigned int		psint_t;
 #endif
-#if __x86_64__ || __ppc64__
+#if __x86_64__
 typedef unsigned long long	psint_t;
 #endif
 
@@ -43,25 +43,18 @@ int verbose = 0;
 #define FAIL	-1	/* can't use 0 since setjmp uses that */
 
 int expected[4] = {
-#if __i386__
+#if NXDATA32TESTNONX
 	SUCCEED,		/* execute from heap */
 	SUCCEED,		/* exeucte from heap with PROT_EXEC */
 	FAIL,			/* execute from stack */
 	SUCCEED,		/* exeucte from stack with PROT_EXEC */
-#endif
-#if __x86_64__
-	FAIL,			/* execute from heap */
+#elif __i386__
+	FAIL,		/* execute from heap */
 	SUCCEED,		/* exeucte from heap with PROT_EXEC */
 	FAIL,			/* execute from stack */
 	SUCCEED,		/* exeucte from stack with PROT_EXEC */
 #endif
-#if __ppc__
-	SUCCEED,		/* execute from heap */
-	SUCCEED,		/* exeucte from heap with PROT_EXEC */
-	SUCCEED,		/* execute from stack */
-	SUCCEED,		/* exeucte from stack with PROT_EXEC */
-#endif
-#if __ppc64__
+#if __x86_64__
 	FAIL,			/* execute from heap */
 	SUCCEED,		/* exeucte from heap with PROT_EXEC */
 	FAIL,			/* execute from stack */
diff --git a/tools/tests/xnu_quick_test/helpers/launch.c b/tools/tests/xnu_quick_test/helpers/launch.c
index 7e8638eff..206116042 100644
--- a/tools/tests/xnu_quick_test/helpers/launch.c
+++ b/tools/tests/xnu_quick_test/helpers/launch.c
@@ -71,43 +71,6 @@ int main(int argc, const char * argv[])
 #endif
 
 
-#if defined(__ppc__)
-	/* 
-	 * This is the helper binary for the PPC64 version of xnu_quick_test. xnu_quick_test 
-	 * forks and execs this code to test exec()ing from a 32-bit binary.
-	 */
-	errmsg = "execve failed: from ppc forking and exec()ing ppc process.\n";
-	argvs[0] = "sleep-ppc32";
-	if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 0))	goto test_failed_exit;
-
-	errmsg = "execve failed: from ppc forking and exec()ing ppc64 process w/ 4G pagezero.\n";
-	argvs[0] = "sleep-ppc64-4G";
-	if (do_execve_test("helpers/sleep-ppc64-4G", argvs, NULL, 0))	goto test_failed_exit;
-
-	errmsg = "execve failed: from ppc forking and exec()ing ppc64 process w/ 4K pagezero.\n";
-	argvs[0] = "sleep-ppc64-4K";
-	if (do_execve_test("helpers/sleep-ppc64-4K", argvs, NULL, 0))	goto test_failed_exit;
-#endif
-
-
-#if defined(__ppc64__)
-	/* 
-	 * This is the helper binary for the ppc version of xnu_quick_test. xnu_quick_test 
-	 * forks and execs this code to test exec()ing from a 64-bit binary.
-	 */
-	errmsg = "execve failed: from ppc64 forking and exec()ing 64-bit ppc process w/ 4G pagezero.\n";
-	argvs[0] = "sleep-ppc64-4G";
-	if (do_execve_test("helpers/sleep-ppc64-4G", argvs, NULL, 1))		goto test_failed_exit;
-
-	errmsg = "execve failed: from ppc64 forking and exec()ing 64-bit ppc process w/ 4K pagezero.\n";
-	argvs[0] = "sleep-ppc64-4K";
-	if (do_execve_test("helpers/sleep-ppc64-4K", argvs, NULL, 1))		goto test_failed_exit;
-
-	errmsg = "execve failed: from ppc64 forking and exec()ing 32 bit ppc process.\n";
-	argvs[0] = "sleep-ppc32";
-	if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 1))		goto test_failed_exit;	
-#endif
-
 	/* 
 	 * We are ourselves launched with do_execve_test, which wants a chance to 
 	 * send a SIGKILL
diff --git a/tools/tests/xnu_quick_test/kqueue_tests.c b/tools/tests/xnu_quick_test/kqueue_tests.c
index 0d872ccea..40069048c 100644
--- a/tools/tests/xnu_quick_test/kqueue_tests.c
+++ b/tools/tests/xnu_quick_test/kqueue_tests.c
@@ -18,7 +18,6 @@
 
 extern char		g_target_path[ PATH_MAX ];
 extern int		g_skip_setuid_tests;
-extern int		g_is_under_rosetta;
 
 int msg_count = 14;
 int last_msg_seen = 0;
@@ -144,6 +143,7 @@ kmsg_consumer_thread(void * arg)
 int kqueue_tests( void * the_argp )
 {
 	int				my_err, my_status;
+	void				*my_pthread_join_status;
 	int				my_kqueue = -1;
 	int				my_kqueue64 = -1;
 	int				my_fd = -1;
@@ -271,22 +271,20 @@ int kqueue_tests( void * the_argp )
 	}
 
 #if !TARGET_OS_EMBEDDED	
-	if (!g_is_under_rosetta) {
-		/* use kevent64 to test EVFILT_PROC */
-		EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
-		my_err = kevent64( my_kqueue, &my_kevent64, 1, NULL, 0, 0, 0); 
-		if ( my_err != -1 && errno != EINVAL ) {
-			printf( "kevent64 call should fail with kqueue used for kevent() - %d\n", my_err);
-			goto test_failed_exit;
-		}
+	/* use kevent64 to test EVFILT_PROC */
+	EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
+	my_err = kevent64( my_kqueue, &my_kevent64, 1, NULL, 0, 0, 0); 
+	if ( my_err != -1 && errno != EINVAL ) {
+		printf( "kevent64 call should fail with kqueue used for kevent() - %d\n", my_err);
+		goto test_failed_exit;
+	}
 		
-		my_kqueue64 = kqueue();
-		EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
-		my_err = kevent64( my_kqueue64, &my_kevent64, 1, NULL, 0, 0, 0); 
-		if ( my_err == -1 ) {
-			printf( "kevent64 call to get proc exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
-			goto test_failed_exit;
-		}
+	my_kqueue64 = kqueue();
+	EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); 
+	my_err = kevent64( my_kqueue64, &my_kevent64, 1, NULL, 0, 0, 0); 
+	if ( my_err == -1 ) {
+		printf( "kevent64 call to get proc exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
 	}
 #endif
 
@@ -344,28 +342,26 @@ int kqueue_tests( void * the_argp )
 	}
 
 #if !TARGET_OS_EMBEDDED	
-	if (!g_is_under_rosetta) {
-		/* look for child exit notification on the kevent64 kqueue */
-		EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_CLEAR, NOTE_EXIT, 0, 0, 0, 0 ); 
-		my_err = kevent64( my_kqueue64, NULL, 0, &my_kevent64, 1, 0, 0); 
-		if ( my_err == -1 ) {
-			printf( "kevent64 call to get child exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
-			goto test_failed_exit;
-		}
-		if ( my_err == 0 ) {
-			printf( "kevent64 call to get proc exit event did not return any when it should have \n" );
-			goto test_failed_exit;
-		}
-		if ( my_kevent64.filter != EVFILT_PROC ) {
-			printf( "kevent64 call to get proc exit event did not return EVFILT_PROC \n" );
-			printf( "filter %i \n", my_kevent64.filter );
-			goto test_failed_exit;
-		}
-		if ( (my_kevent64.fflags & NOTE_EXIT) == 0 ) {
-			printf( "kevent64 call to get proc exit event did not return NOTE_EXIT \n" );
-			printf( "fflags 0x%02X \n", my_kevent64.fflags );
-			goto test_failed_exit;
-		}
+	/* look for child exit notification on the kevent64 kqueue */
+	EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_CLEAR, NOTE_EXIT, 0, 0, 0, 0 ); 
+	my_err = kevent64( my_kqueue64, NULL, 0, &my_kevent64, 1, 0, 0); 
+	if ( my_err == -1 ) {
+		printf( "kevent64 call to get child exit failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	if ( my_err == 0 ) {
+		printf( "kevent64 call to get proc exit event did not return any when it should have \n" );
+		goto test_failed_exit;
+	}
+	if ( my_kevent64.filter != EVFILT_PROC ) {
+		printf( "kevent64 call to get proc exit event did not return EVFILT_PROC \n" );
+		printf( "filter %i \n", my_kevent64.filter );
+		goto test_failed_exit;
+	}
+	if ( (my_kevent64.fflags & NOTE_EXIT) == 0 ) {
+		printf( "kevent64 call to get proc exit event did not return NOTE_EXIT \n" );
+		printf( "fflags 0x%02X \n", my_kevent64.fflags );
+		goto test_failed_exit;
 	}
 
 	my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
@@ -472,12 +468,12 @@ int kqueue_tests( void * the_argp )
 	for (my_index = 0;
 	     my_index < 3;
 	     my_index++) {
-	  my_err = pthread_join( my_threadv[my_index], (void **)&my_status );
+	  my_err = pthread_join( my_threadv[my_index], &my_pthread_join_status );
                 if ( my_err != 0 ) {
                         printf( "pthread_join failed with error %d - %s \n", my_err, strerror(my_err) );
                         goto test_failed_exit;
                 }
-                if ( my_status != 0 ) {
+                if ( my_pthread_join_status != 0 ) {
                         goto test_failed_exit;
                 }
         }
diff --git a/tools/tests/xnu_quick_test/machvm_tests.c b/tools/tests/xnu_quick_test/machvm_tests.c
index d478807a7..8ea8b645b 100644
--- a/tools/tests/xnu_quick_test/machvm_tests.c
+++ b/tools/tests/xnu_quick_test/machvm_tests.c
@@ -13,8 +13,6 @@
 #include <sys/param.h>
 #include <mach-o/ldsyms.h>
 
-extern int		g_is_under_rosetta;
-
 int machvm_tests( void * the_argp )
 {
 	int pagesize = getpagesize();
@@ -155,22 +153,20 @@ int machvm_tests( void * the_argp )
 		}
 	}		
 	
-	// do a vm_copy of our mach-o header and compare. Rosetta doesn't support this, though
-	if (!g_is_under_rosetta) {
+	// do a vm_copy of our mach-o header and compare.
 
-		kret = vm_write(mach_task_self(), (vm_address_t)regionbuffers[2],
+	kret = vm_write(mach_task_self(), (vm_address_t)regionbuffers[2],
 						(vm_offset_t)&_mh_execute_header, pagesize);
-		if (kret != KERN_SUCCESS) {
-			warnx("vm_write of %d pages failed: %d", 1, kret);
-			goto fail;
-		}
-		
-		if (_mh_execute_header.magic != *(uint32_t *)regionbuffers[2]) {
-			warnx("vm_write comparison failed");
-			kret = -1;
-			goto fail;
-		}	
+	if (kret != KERN_SUCCESS) {
+		warnx("vm_write of %d pages failed: %d", 1, kret);
+		goto fail;
 	}
+		
+	if (_mh_execute_header.magic != *(uint32_t *)regionbuffers[2]) {
+		warnx("vm_write comparison failed");
+		kret = -1;
+		goto fail;
+	}	
 	
 	// check that the vm_protects above worked
 	{
@@ -180,8 +176,11 @@ int machvm_tests( void * the_argp )
 		vm_region_basic_info_t basic = (vm_region_basic_info_t)_basic;
 		int _basic64[VM_REGION_BASIC_INFO_COUNT_64];
 		vm_region_basic_info_64_t basic64 = (vm_region_basic_info_64_t)_basic64;
+		int _submap[VM_REGION_SUBMAP_INFO_COUNT];
+		vm_region_submap_info_t submap = (vm_region_submap_info_t)_submap;
 		mach_msg_type_number_t	infocnt;
 		mach_port_t	objname;
+		natural_t nesting_depth = 0;
 		
 #if !__LP64__
 		infocnt = VM_REGION_BASIC_INFO_COUNT;
@@ -242,16 +241,115 @@ int machvm_tests( void * the_argp )
 		
 #if !__LP64__
 		// try to compare some stuff. Particularly important for fields after offset
-		if (!g_is_under_rosetta) {
-			if (basic->offset != basic64->offset ||
-				basic->behavior != basic64->behavior ||
-				basic->user_wired_count != basic64->user_wired_count) {
-				warnx("vm_region and vm_region_64 did not agree");
-				kret = -1;
-				goto fail;			
-			}
-		}		
+		if (basic->offset != basic64->offset ||
+			basic->behavior != basic64->behavior ||
+			basic->user_wired_count != basic64->user_wired_count) {
+			warnx("vm_region and vm_region_64 did not agree");
+			kret = -1;
+			goto fail;			
+		}
 #endif
+
+#if !__LP64__
+		infocnt = VM_REGION_SUBMAP_INFO_COUNT;
+		kret = vm_region_recurse(mach_task_self(), &addr, &size,
+								 &nesting_depth, (vm_region_info_t)submap,
+								 &infocnt);
+		if (kret != KERN_SUCCESS) {
+			warnx("vm_region_recurse() failed: %d", kret);
+			goto fail;
+		}
+
+		if (VM_REGION_SUBMAP_INFO_COUNT != infocnt) {
+			warnx("vm_region_recurse() returned a bad info count");
+			kret = -1;
+			goto fail;
+		}
+
+		if (submap->pages_dirtied != 10) {
+			warnx("vm_region_recurse() returned bage pages_dirtied");
+			kret = -1;
+			goto fail;
+		}
+
+#endif /* !__LP64__ */
+
+	}
+
+	// exercise mach_make_memory_entry/vm_map
+	{
+		vm_address_t addr1, addr2;
+		vm_size_t size;
+		mach_port_t mem_handle = MACH_PORT_NULL;
+
+		addr1 = 0;
+		size = 11*pagesize;
+		kret = vm_allocate(mach_task_self(), &addr1, size, VM_FLAGS_ANYWHERE);
+		if (kret != KERN_SUCCESS) {
+			warnx("vm_allocate failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		*(uint32_t *)(uintptr_t)addr1 = 'test';
+
+		kret = mach_make_memory_entry(mach_task_self(),
+									  &size, addr1, VM_PROT_DEFAULT,
+									  &mem_handle, MACH_PORT_NULL);
+		if (kret != KERN_SUCCESS) {
+			warnx("mach_make_memory_entry failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		kret = vm_deallocate(mach_task_self(), addr1, size);
+		if (kret != KERN_SUCCESS) {
+			warnx("vm_deallocate failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		addr2 = 0;
+		kret = vm_map(mach_task_self(), &addr2, size, 0, VM_FLAGS_ANYWHERE,
+					  mem_handle, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_DEFAULT,
+					  VM_INHERIT_NONE);
+		if (kret != KERN_SUCCESS) {
+			warnx("vm_map failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		if (*(uint32_t *)(uintptr_t)addr2 != 'test') {
+			warnx("mapped data mismatch");
+			kret = -1;
+			goto fail;
+		}
+
+		kret = vm_deallocate(mach_task_self(), addr2, size);
+		if (kret != KERN_SUCCESS) {
+			warnx("vm_deallocate failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		kret = mach_port_mod_refs(mach_task_self(), mem_handle, MACH_PORT_RIGHT_SEND, -1);
+		if (kret != KERN_SUCCESS) {
+			warnx("mach_port_mod_refs(-1) failed: %d", kret);
+			kret = -1;
+			goto fail;
+		}
+
+		addr2 = 0;
+		kret = vm_map(mach_task_self(), &addr2, size, 0, VM_FLAGS_ANYWHERE,
+					  mem_handle, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_DEFAULT,
+					  VM_INHERIT_NONE);
+		if (kret == KERN_SUCCESS) {
+			warnx("vm_map succeeded when it should not have");
+			kret = -1;
+			goto fail;
+		}
+
+		kret = KERN_SUCCESS;
 	}
 	
 fail:
diff --git a/tools/tests/xnu_quick_test/main.c b/tools/tests/xnu_quick_test/main.c
index 0c116215c..d1ca1574d 100644
--- a/tools/tests/xnu_quick_test/main.c
+++ b/tools/tests/xnu_quick_test/main.c
@@ -119,6 +119,11 @@ struct test_entry   g_tests[] =
 	{1, &message_queue_tests, NULL, "msgctl, msgget, msgrcv, msgsnd"},
 	{1, &data_exec_tests, NULL, "data/stack execution"},
 	{1, &machvm_tests, NULL, "Mach VM calls"},
+	{1, &commpage_data_tests, NULL, "Commpage data"},
+#if defined(i386) || defined(__x86_64__)
+	{1, &atomic_fifo_queue_test, NULL, "OSAtomicFifoEnqueue, OSAtomicFifoDequeue"},
+#endif
+	{1, &sched_tests, NULL, "Scheduler tests"},
 	{0, NULL, NULL, "last one"}
 };
 
@@ -136,11 +141,13 @@ int		g_skip_setuid_tests = 0;
 int		g_xilog_active = 0;
 const char *	g_cmd_namep;
 char		g_target_path[ PATH_MAX ];
-int		g_is_under_rosetta = 0;
 int		g_is_single_user = 0;
+int		g_testbots_active = 0;
  
 int main( int argc, const char * argv[] ) 
 {
+	#pragma unused(argc)
+	#pragma unused(argv)
 	int				my_tests_count, i;
 	int				err;
 	int				my_failures = 0;
@@ -255,20 +262,16 @@ int main( int argc, const char * argv[] )
 
 	/* done parsing.
 	 */
-    
-#ifdef __ppc__
-	/* determine if we are running under Rosetta 
-	 */
-	{
-	    int val = 0;
-	    size_t size = sizeof val;
-	    if (sysctlbyname("sysctl.proc_native", &val, &size, NULL, 0) == -1)
-		g_is_under_rosetta = 0;
-	    else
-		g_is_under_rosetta = val ? 0 : 1;
-	}
-#endif
 
+/* Check if we are running under testbots */
+#if RUN_UNDER_TESTBOTS
+g_testbots_active = 1;
+#endif
+	/* Code added to run xnu_quick_test under testbots */
+	if ( g_testbots_active == 1 ) {
+	printf("[TEST] xnu_quick_test \n");	/* Declare the beginning of test suite */
+	}
+    
 	/* Populate groups list if we're in single user mode */
 	if (setgroups_if_single_user()) {
 		return 1;
@@ -296,14 +299,15 @@ int main( int argc, const char * argv[] )
 	create_target_directory( my_targetp );
 	printf( "Will allow %ld failures before testing is aborted \n", g_max_failures );
 	
-        if (g_is_under_rosetta) {
-                printf("Running under Rosetta.\n");
-        }
-        
 	my_start_time = time( NULL );
 	printf( "\nBegin testing - %s \n", ctime_r( &my_start_time, &my_buffer[0] ) );
 	printf( "Current architecture is %s\n", current_arch() );
 
+	/* Code added to run xnu_quick_test under testbots */
+        if ( g_testbots_active == 1 ) {
+        printf("[PASS] xnu_quick_test started\n");     
+        }
+		
 	/* run each test that is marked to run in our table until we complete all of them or
 	 * hit the maximum number of failures.
 	 */
@@ -322,6 +326,7 @@ int main( int argc, const char * argv[] )
 		}
 #endif
 		printf( "test #%d - %s \n", (i + 1), my_testp->test_infop );
+		fflush(stdout);
 		my_err = my_testp->test_routine( my_testp->test_input );
 		if ( my_err != 0 ) {
 			printf("\t--> FAILED \n");
@@ -334,19 +339,38 @@ int main( int argc, const char * argv[] )
 			my_failures++;
 			if ( my_failures > g_max_failures ) {
 #if !TARGET_OS_EMBEDDED	
-				if (g_xilog_active == 1) {	
+				if (g_xilog_active == 1) {
+					XILogMsg("Reached the maximum number of failures - Aborting xnu_quick_test.");
 					XILogEndTestCase( logRef, kXILogTestPassOnErrorLevel );
 				}
 #endif
-				printf( "\n too many failures - test aborted \n" );
+				printf( "\n Reached the maximum number of failures - Aborting xnu_quick_test. \n" );
+	                        /* Code added to run xnu_quick_test under testbots */
+        	                if ( g_testbots_active == 1 ) {
+				printf("[FAIL] %s \n", my_testp->test_infop);
+				}	
 				goto exit_this_routine;
 			}
+			/* Code added to run xnu_quick_test under testbots */
+			if ( g_testbots_active == 1 ) {
+				printf("[FAIL] %s \n", my_testp->test_infop);
+			}			
+#if !TARGET_OS_EMBEDDED	
+			if (g_xilog_active == 1) {
+				XILogEndTestCase( logRef, kXILogTestPassOnErrorLevel );
+			}
+#endif
+			continue;
 		}
 #if !TARGET_OS_EMBEDDED	
 		if (g_xilog_active == 1) {	
 			XILogEndTestCase(logRef, kXILogTestPassOnErrorLevel);
 		}
 #endif
+		/* Code added to run xnu_quick_test under testbots */
+		if ( g_testbots_active == 1 ) {
+		printf("[PASS] %s \n", my_testp->test_infop);
+		}	
 	}
 	
 exit_this_routine:
diff --git a/tools/tests/xnu_quick_test/makefile b/tools/tests/xnu_quick_test/makefile
index 65a770c3e..554416475 100644
--- a/tools/tests/xnu_quick_test/makefile
+++ b/tools/tests/xnu_quick_test/makefile
@@ -4,7 +4,7 @@ Embedded=$(shell tconf --test TARGET_OS_EMBEDDED)
 
 ifeq "$(Embedded)" "YES"
 XILogFLAG =
-SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version | grep Path | cut -f 2 -d " ")
+SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path)
 CFLAGS += -isysroot $(SDKPATH)
 LIBFLAGS += -isysroot $(SDKPATH)
 else
@@ -38,7 +38,7 @@ else
 	endif
 	
 	ifndef ARCH
-		ARCH=i386 x86_64 ppc
+		ARCH=i386 x86_64
 		# this hack should be removed once tconf gets
 		# <rdar://problem/6618734>
 		ifeq "$(Product)" "iPhone"
@@ -56,6 +56,7 @@ else
 	CFLAGS += $(MY_ARCH)
 endif
 
+
 CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS)
 LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders  -F/AppleInternal/Library/Frameworks/ $(XILogFLAG)
 
@@ -63,7 +64,8 @@ MY_OBJECTS = $(OBJROOT)/main.o $(OBJROOT)/memory_tests.o $(OBJROOT)/misc.o \
 			 $(OBJROOT)/sema_tests.o $(OBJROOT)/shared_memory_tests.o \
 			 $(OBJROOT)/socket_tests.o $(OBJROOT)/tests.o \
 			 $(OBJROOT)/xattr_tests.o $(OBJROOT)/kqueue_tests.o \
-			 $(OBJROOT)/machvm_tests.o
+			 $(OBJROOT)/machvm_tests.o $(OBJROOT)/commpage_tests.o \
+			 $(OBJROOT)/atomic_fifo_queue_test.o $(OBJROOT)/sched_tests.o
 
 ifneq "$(Product)" "iPhone"
 MY_OBJECTS += $(OBJROOT)/32bit_inode_tests.o
@@ -79,6 +81,14 @@ xnu_quick_test : $(OBJROOT) $(DSTROOT) $(MY_OBJECTS) helpers
 	@$(CHOWN_COMMAND) || echo $(PERM_ADVICE)
 	sudo chmod 4755 $(DSTROOT)/xnu_quick_test
 
+# This target is defined for testbots. 
+# Before compiling this target, MORECFLAGS must be set to "-D RUN_UNDER_TESTBOTS=1", check README file for more details
+# NOTE: -f[ailures] MAX_FAILS_ALLOWED option is set to 100 to make sure we completely run the test suite and 
+# report all the failures.
+
+testbots: xnu_quick_test 
+	@(cd $(DSTROOT) ; ./xnu_quick_test -f 100)	
+
 # The helper binaries are used to test exec()'ing between 64bit and 32bit. 
 # Creates test binaries with page zero sizes = 4KB and 4GB. Also creates 32-bit
 # helper processes for the 64-bit version of xnu_quick_test to test the conversion
@@ -90,21 +100,21 @@ endif
 ifeq "$(Product)" "MacOSX"
 	$(CC) -arch x86_64 -pagezero_size 0x100000000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4G
 	$(CC) -arch x86_64 -pagezero_size 0x1000      helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4K
-	$(CC) -arch ppc                               helpers/sleep.c -o $(DSTROOT)/helpers/sleep-ppc32
 endif
 ifneq "$(Product)" "iPhone"
 	$(CC) $(LIBFLAGS) -arch i386	$(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-i386
 endif
 ifeq "$(Product)" "MacOSX"
 	$(CC) $(LIBFLAGS) -arch x86_64	$(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-x86_64
-	$(CC) $(LIBFLAGS) -arch ppc	$(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-ppc
 	$(CC) $(MY_ARCH) 	helpers/arch.c -o $(DSTROOT)/helpers/arch
 	$(CC) $(MY_ARCH) 	helpers/data_exec.c -o $(DSTROOT)/helpers/data_exec
+	$(CC) -arch i386 	-DNXDATA32TESTNONX helpers/data_exec.c -o $(DSTROOT)/helpers/data_exec32nonxspawn
 
 endif
 ifeq "$(Product)" "iPhone"
 	$(CC) -arch armv6 -isysroot $(SDKROOT) $(CFLAGS) helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm
 	$(CC) $(LIBFLAGS) -arch armv6 -isysroot $(SDKROOT) $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm
+	$(CC) $(MY_ARCH) -isysroot $(SDKROOT)	helpers/arch.c -o $(DSTROOT)/helpers/arch
 endif
 	
 	
@@ -128,7 +138,7 @@ $(OBJROOT)/misc.o : misc.c tests.h
 ifeq "$(Product)" "iPhone"
 	$(CC) -arch armv6 $(CFLAGS) -c misc.c   -o $@
 else
-	$(CC) -arch i386 -arch x86_64 -arch ppc $(CFLAGS) -c misc.c   -o $@
+	$(CC) -arch i386 -arch x86_64 $(CFLAGS) -c misc.c   -o $@
 endif
 	
 $(OBJROOT)/sema_tests.o : sema_tests.c tests.h
@@ -149,12 +159,22 @@ $(OBJROOT)/xattr_tests.o : xattr_tests.c tests.h
 $(OBJROOT)/machvm_tests.o : machvm_tests.c tests.h
 	$(CC) $(CFLAGS) -c machvm_tests.c    -o $@
 
+$(OBJROOT)/sched_tests.o : sched_tests.c tests.h
+	$(CC) $(CFLAGS) -c sched_tests.c    -o $@
+
 $(OBJROOT)/kqueue_tests.o : kqueue_tests.c tests.h
 	$(CC) $(CFLAGS) -c kqueue_tests.c   -o $@
 
 $(OBJROOT)/32bit_inode_tests.o : 32bit_inode_tests.c tests.h
 	$(CC) $(CFLAGS) -c 32bit_inode_tests.c    -o $@
 
+$(OBJROOT)/commpage_tests.o : commpage_tests.c tests.h
+	$(CC) $(CFLAGS) -c commpage_tests.c    -o $@
+	
+$(OBJROOT)/atomic_fifo_queue_test.o : atomic_fifo_queue_test.c tests.h
+	$(CC) $(CFLAGS) -c atomic_fifo_queue_test.c    -o $@
+
+
 ifndef DOING_BUILDIT
 .PHONY : clean
 clean :
diff --git a/tools/tests/xnu_quick_test/memory_tests.c b/tools/tests/xnu_quick_test/memory_tests.c
index eb8817b9a..dc8675087 100644
--- a/tools/tests/xnu_quick_test/memory_tests.c
+++ b/tools/tests/xnu_quick_test/memory_tests.c
@@ -9,9 +9,92 @@
 
 #include "tests.h"
 #include <mach/mach.h>
+#include <dirent.h>		/* crashcount() */
 
 extern char  g_target_path[ PATH_MAX ];
 
+/*
+ * static to localize to this compilation unit; volatile to avoid register
+ * optimization which would prevent modification by a signal handler.
+ */
+static volatile int	my_err;
+
+/*
+ * Handler; used by memory_tests() child to reset my_err so that it will
+ * exit normally following a SIGBUS, rather than triggering a crash report;
+ * this depends on setting the error non-zero before triggering the condition
+ * that would trigger a SIGBUS.  To avoid confusion, this is most easily done
+ * right before the test in question, and if there are subsequent tests, then
+ * undone immediately after to avoid false test negatives.
+ */
+void
+bus_handler(int sig, siginfo_t *si, void *mcontext)
+{
+	/* Reset global error value when we see a SIGBUS */
+	if (sig == SIGBUS)
+		my_err = 0;
+}
+
+/*
+ * Count the number of crashes for us in /Library/Logs/CrashReporter/
+ *
+ * XXX Assumes that CrashReporter uses our name as a prefix
+ * XXX Assumes no one lese has the same prefix as our name
+ */
+int
+crashcount(char *namebuf1, char *namebuf2)
+{
+	char		*crashdir1 = "/Library/Logs/CrashReporter";
+	char            *crashdir2 = "/Library/Logs/DiagnosticReports";
+	char		*crash_file_pfx = "xnu_quick_test";
+	int		crash_file_pfxlen = strlen(crash_file_pfx);
+	struct stat	sb;
+	DIR		*dirp1, *dirp2;
+	struct dirent	*dep1, *dep2;
+	int		count = 0;
+
+	/* If we can't open the directory, it hasn't been created */
+	if ((dirp1 = opendir(crashdir1)) == NULL) {
+		return( 0 );
+	}
+
+	while((dep1 = readdir(dirp1)) != NULL) {
+		if (strncmp(crash_file_pfx, dep1->d_name, crash_file_pfxlen))
+			continue;
+		/* record each one to get the last one */
+		if (namebuf1) {
+			strcpy(namebuf1, crashdir1);
+			strcat(namebuf1, "/");
+			strcat(namebuf1, dep1->d_name);
+		}
+		count++;
+	}
+
+	closedir(dirp1);
+
+	/* If we can't open the directory, it hasn't been created */
+        if ((dirp2 = opendir(crashdir2)) == NULL) {
+                return( 0 );
+        }
+
+        while((dep2 = readdir(dirp2)) != NULL) {
+                if (strncmp(crash_file_pfx, dep2->d_name, crash_file_pfxlen))
+                        continue;
+                /* record each one to get the last one */
+                if (namebuf2) {
+                        strcpy(namebuf2, crashdir2);
+                        strcat(namebuf2, "/");
+                        strcat(namebuf2, dep2->d_name);
+                }
+                count++;
+        }
+
+        closedir(dirp2);
+
+	return( count/2 );
+}
+
+
 /*  **************************************************************************************************************
  *	Test madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap system calls.
  *	todo - see if Francois has better versions of these tests...
@@ -19,7 +102,6 @@ extern char  g_target_path[ PATH_MAX ];
  */
 int memory_tests( void * the_argp )
 {
-	int			my_err;
 	int			my_page_size, my_status;
 	int			my_fd = -1;
 	char *		my_pathp = NULL;
@@ -29,6 +111,10 @@ int memory_tests( void * the_argp )
 	ssize_t		my_result;
 	pid_t		my_pid, my_wait_pid;
 	kern_return_t   my_kr;		
+	struct sigaction	my_sa;
+	static int	my_crashcount;
+	static char	my_namebuf1[256];	/* XXX big enough */
+	static char     my_namebuf2[256];
 
         my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
         if(my_kr != KERN_SUCCESS){
@@ -65,6 +151,16 @@ int memory_tests( void * the_argp )
 		goto test_failed_exit;
 	}
 
+	/*
+	 * Find out how many crashes there have already been; if it's not
+	 * zero, then don't even attempt this test.
+	 */
+	if ((my_crashcount = crashcount(my_namebuf1, my_namebuf2)) != 0) {
+		printf( "memtest aborted: can not distinguish our expected crash from \n");
+		printf( "%d existing crashes including %s \n", my_crashcount, my_namebuf2);
+		goto test_failed_exit;
+	}
+
 	/*
 	 * spin off a child process that we will use for testing.   
 	 */
@@ -252,18 +348,35 @@ int memory_tests( void * the_argp )
 			goto exit_child;
 		}
 
+		/*
+		 * Establish SIGBUS handler; will reset (disable itself) if it fires;
+		 * we would need how to recover from the exceptional condition that
+		 * raised the SIGBUS by modifying the contents of the (opaque to us)
+		 * mcontext in order to prevent this from being terminal, so we let
+		 * it be terminal.  This is enough to avoid triggering crash reporter.
+		 */
+		my_sa.sa_sigaction = bus_handler;
+		my_sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+		if ((my_err = sigaction(SIGBUS, &my_sa, NULL)) != 0) {
+			printf("sigaction call failed with error %d - \"%s\" \n", errno, strerror( errno) );
+			my_err = -1;
+			goto exit_child;
+		}
+
+		my_err = -1;	/* default to error out if we do NOT trigger a SIGBUS */
+
 		*my_addr = 'z'; /* should cause SIGBUS signal (we look for this at child termination within the parent) */
-	
 
-		 
-		my_err = 0;
+		/* NOTREACHED */
+
+		printf("Expected SIGBUS signal, got nothing!\n");
+		my_err = -1;
 exit_child:
 		exit( my_err );
 	}
 
-	
 	/* parent process -
-	 * we should get SIGBUS exit when child tries to write to read only memory 
+	 * we should get no error if the child has completed all tests successfully
 	 */
 	my_wait_pid = wait4( my_pid, &my_status, 0, NULL );
 	if ( my_wait_pid == -1 ) {
@@ -277,11 +390,39 @@ exit_child:
 		goto test_failed_exit;
 	}
 
-	if ( WIFSIGNALED( my_status ) && WTERMSIG( my_status ) != SIGBUS ) {
-		printf( "wait4 returned wrong signal status - 0x%02X \n", my_status );
+	/* If we were not signalled, or we died from an unexpected signal, report it.
+	 */
+	if ( !WIFSIGNALED( my_status ) || WTERMSIG( my_status ) != SIGBUS) {
+		printf( "wait4 returned child died of status - 0x%02X \n", my_status );
 		goto test_failed_exit;
 	}
 
+	/*
+	 * Wait long enough that CrashReporter has finished.
+	 */
+	sleep(5);
+
+	/*
+	 * Find out how many crashes there have already been; if it's not
+	 * one, then don't even attempt this test.
+	 */
+	if ((my_crashcount = crashcount(my_namebuf1, my_namebuf2)) != 1) {
+		printf( "child did not crash as expected \n");
+		printf( "saw %d crashes including %s \n", my_crashcount, my_namebuf2);
+		goto test_failed_exit;
+	}
+
+	/* post-remove the expected crash report */
+	if (unlink(my_namebuf1)) {
+		printf("unlink of expected crash report '%s' failed \n", my_namebuf1);
+		goto test_failed_exit;
+	}
+
+        if (unlink(my_namebuf2)) {
+                printf("unlink of expected crash report '%s' failed \n", my_namebuf2);
+                goto test_failed_exit;
+        }
+
 	/* make sure shared page got modified in child */
 	if ( strcmp( my_test_page_p, "parent data child data" ) != 0 ) {
 		printf( "minherit did not work correctly - shared page looks wrong \n" );
diff --git a/tools/tests/xnu_quick_test/misc.c b/tools/tests/xnu_quick_test/misc.c
index 1fcc298f1..9545bf140 100644
--- a/tools/tests/xnu_quick_test/misc.c
+++ b/tools/tests/xnu_quick_test/misc.c
@@ -277,9 +277,12 @@ int do_spawn_test(int arch, int shouldfail)
 		}
 		my_err = 0;
 	} else {
-		/* child should exit with return code == arch */
+		/*
+		 * child should exit with return code == arch; note that the
+		 * posix_spawn error numers are *returned*, NOT set in errno!!!
+		 */
 		if (my_err != 0) {
-			printf("posix_spawn failed with errno %d - %s\n", errno, strerror(errno));
+			printf("posix_spawn failed with errno %d - %s\n", my_err, strerror(my_err));
 			goto done;
 		}
 
@@ -324,10 +327,6 @@ int get_architecture()
 	case CPU_TYPE_X86_64:
 		rval = INTEL;
 		break;
-	case CPU_TYPE_POWERPC:
-	case CPU_TYPE_POWERPC64:
-		rval = POWERPC;
-		break;
 	case CPU_TYPE_ARM:
 		rval = ARM;
 		break;
diff --git a/tools/tests/xnu_quick_test/sched_tests.c b/tools/tests/xnu_quick_test/sched_tests.c
new file mode 100644
index 000000000..6dd23bf68
--- /dev/null
+++ b/tools/tests/xnu_quick_test/sched_tests.c
@@ -0,0 +1,231 @@
+/*
+ *  sched_tests.c
+ *  xnu_quick_test
+ *
+ *  Copyright 2011 Apple Inc. All rights reserved.
+ *
+ */
+
+#include "tests.h"
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#include <mach/semaphore.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/param.h>
+#include <pthread.h>
+
+#define DEBUG 0
+
+#if DEBUG
+#define dprintf(...) printf(__VA_ARGS__)
+#else
+#define dprintf(...) do { } while(0)
+#endif
+
+static uint64_t
+nanos_to_abs(uint64_t ns, uint32_t numer, uint32_t denom)
+{
+	return (uint64_t)(ns * (((double)denom) / ((double)numer)));
+}
+
+static void set_realtime(void) {
+	struct mach_timebase_info mti;
+	thread_time_constraint_policy_data_t pol;
+	kern_return_t kret;
+
+	kret = mach_timebase_info(&mti);
+	if (kret != KERN_SUCCESS) {
+		warnx("Could not get timebase info %d", kret);
+		return;
+	}
+
+	/* 1s 100ms 10ms */
+	pol.period      = nanos_to_abs(1000000000, mti.numer, mti.denom);
+	pol.constraint  = nanos_to_abs(100000000,  mti.numer, mti.denom);
+	pol.computation = nanos_to_abs(10000000,   mti.numer, mti.denom);
+	pol.preemptible = 0; /* Ignored by OS */
+
+	kret = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+	if (kret != KERN_SUCCESS) {
+		warnx("Failed to set realtime %d", kret);
+	}
+}
+
+struct t1_ctx {
+	pthread_t __p;
+	int currentThread;
+	int totalThreads;
+	boolean_t useRealtime;
+	semaphore_t wait_to_start;
+	semaphore_t next_waiter;
+
+	semaphore_t common_sema; /* main thing everyone blocks on */
+	uint64_t wakeup_time; /* out parameter */
+};
+
+void *t1(void *arg) {
+	struct t1_ctx *ctx = (struct t1_ctx *)arg;
+	kern_return_t kret;
+
+	dprintf("thread %d (pthread %p) started\n", ctx->currentThread, pthread_self());
+
+	/* Wait to allow previous thread to block on common semaphore */
+	kret = semaphore_wait(ctx->wait_to_start);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_wait(wait_to_start) thread %d failed %d",
+			  ctx->currentThread, kret);
+	}
+
+	sleep(1);
+
+	if (ctx->useRealtime) {
+		dprintf("thread %d going realtime\n", ctx->currentThread);
+		set_realtime();
+	}
+
+	kret = semaphore_signal(ctx->next_waiter);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_signal(next_waiter) thread %d failed %d",
+			  ctx->currentThread, kret);
+	}
+
+	/*
+	 * We have 1 second to block on the common semaphore before
+	 * the next thread does.
+	 */
+	dprintf("thread %d blocking on common semaphore\n", ctx->currentThread);
+
+	kret = semaphore_wait(ctx->common_sema);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_wait(common_sema) thread %d failed %d",
+			  ctx->currentThread, kret);
+	}
+
+	/* Save our time for analysis */
+	ctx->wakeup_time = mach_absolute_time();
+	dprintf("thread %d woke up at %llu\n", ctx->currentThread, ctx->wakeup_time);
+
+	kret = semaphore_signal(ctx->common_sema);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_signal(common_sema) thread %d failed %d",
+			  ctx->currentThread, kret);
+	}
+
+	return NULL;
+}
+	   
+
+
+
+int sched_tests( void * the_argp )
+{
+	kern_return_t kret;
+	int ret;
+	int i;
+	semaphore_t common_sema;
+	semaphore_t all_checked_in;
+	
+	struct t1_ctx ctxs[3];
+	
+	/*
+	 * Test 8979062. Ensure that a realtime thread that
+	 * blocks on a semaphore after a non-realtime thread
+	 * gets woken up first.
+	 */
+
+	kret = semaphore_create(mach_task_self(), &common_sema, SYNC_POLICY_FIFO /* not really, in this case */, 0);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_create failed: %d", kret);
+		return -1;
+	}
+
+	kret = semaphore_create(mach_task_self(), &all_checked_in, SYNC_POLICY_FIFO, 0);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_create failed: %d", kret);
+		return -1;
+	}
+
+	memset(&ctxs, 0x00, sizeof(ctxs));
+	for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
+		ctxs[i].__p = NULL; /* set later */
+		ctxs[i].currentThread = i;
+		ctxs[i].totalThreads = sizeof(ctxs)/sizeof(ctxs[0]);
+		ctxs[i].useRealtime = FALSE;
+
+		kret = semaphore_create(mach_task_self(), &ctxs[i].wait_to_start, SYNC_POLICY_FIFO /* not really, in this case */, 0);
+		if (kret != KERN_SUCCESS) {
+			warnx("semaphore_create failed: %d", kret);
+			return -1;
+		}
+		ctxs[i].next_waiter = MACH_PORT_NULL; /* set later */
+		ctxs[i].common_sema = common_sema;
+		ctxs[i].wakeup_time = 0;
+	}
+
+	ctxs[1].useRealtime = TRUE;
+
+	for (i=1; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
+		ctxs[i-1].next_waiter = ctxs[i].wait_to_start;
+	}
+	ctxs[i-1].next_waiter = all_checked_in;
+
+
+	for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
+		ret = pthread_create(&ctxs[i].__p, NULL, t1, &ctxs[i]);
+		if (ret != 0) {
+			warn("pthread_create failed");
+			return -1;
+		}
+	}
+
+	/* wake up first thread */
+	kret = semaphore_signal(ctxs[0].wait_to_start);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_signal(initial wait_to_start) failed %d", kret);
+		return -1;
+	}
+
+	/* Wait for everyone to have blocked */
+	kret = semaphore_wait(all_checked_in);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_wait(all_checked_in) failed %d", kret);
+		return -1;
+	}
+
+	/* Give some slack for last guy */
+	sleep(1);
+
+	kret = semaphore_signal(common_sema);
+	if (kret != KERN_SUCCESS) {
+		warnx("semaphore_signal(initial common_sema) failed %d", kret);
+		return -1;
+	}
+
+	for (i=0; i < sizeof(ctxs)/sizeof(ctxs[0]); i++) {
+		ret = pthread_join(ctxs[i].__p, NULL);
+		if (ret != 0) {
+			warn("pthread_join failed");
+			return -1;
+		}
+	}
+
+	dprintf("All threads joined\n");
+
+	/*
+	 * Our expectation is that thread 1 was realtime and
+	 * finished first, followed by 0 and then 2
+	 */
+	if ((ctxs[1].wakeup_time < ctxs[0].wakeup_time)
+		&& (ctxs[0].wakeup_time < ctxs[2].wakeup_time)) {
+		/* success */
+	} else {
+		warnx("Threads woken out of order %llu %llu %llu",
+			  ctxs[0].wakeup_time, ctxs[1].wakeup_time,
+			  ctxs[2].wakeup_time);
+		return -1;
+	}
+
+	return 0;
+}
+
diff --git a/tools/tests/xnu_quick_test/socket_tests.c b/tools/tests/xnu_quick_test/socket_tests.c
index c80172e19..00433d5ba 100644
--- a/tools/tests/xnu_quick_test/socket_tests.c
+++ b/tools/tests/xnu_quick_test/socket_tests.c
@@ -12,7 +12,6 @@
 #include <mach/mach.h>
 
 extern char  g_target_path[ PATH_MAX ];
-extern int		g_is_under_rosetta;
 
 /*  **************************************************************************************************************
  *	Test accept, bind, connect, listen, socket, recvmsg, sendmsg, recvfrom, sendto, getpeername, getsockname
@@ -213,51 +212,49 @@ int socket_tests( void * the_argp )
 		
 #if 1
 		/* sendfile test. Open libsystem, set up some headers, and send it */
-		if (!g_is_under_rosetta) {
-			struct sf_hdtr		my_sf_hdtr;
-			int					my_libsys_fd;
-			off_t				my_libsys_len;
-
-			my_libsys_fd = open("/usr/lib/libSystem.dylib", O_RDONLY, 0644);
-			if (my_libsys_fd < 0) {
-				printf( "test failed - could not open /usr/lib/libSystem.dylib\n" );
-			 	close ( my_child_fd );
-				exit ( -1 );
-			}
+		struct sf_hdtr		my_sf_hdtr;
+		int					my_libsys_fd;
+		off_t				my_libsys_len;
+
+		my_libsys_fd = open("/usr/lib/libSystem.dylib", O_RDONLY, 0644);
+		if (my_libsys_fd < 0) {
+			printf( "test failed - could not open /usr/lib/libSystem.dylib\n" );
+			 close ( my_child_fd );
+			exit ( -1 );
+		}
 
-			my_libsys_len = 7+2; /* 2 bytes of header */
-			my_buffer[0] = 's';
-			my_iov[0].iov_base = &my_buffer[0];
-			my_iov[0].iov_len = 1;
-			my_buffer[1] = 'e';
-			my_iov[1].iov_base = &my_buffer[1];
-			my_iov[1].iov_len = 1;
-			my_buffer[2] = 'n';
-			my_iov[2].iov_base = &my_buffer[2];
-			my_iov[2].iov_len = 1;
-			my_buffer[3] = 'd';
-			my_iov[3].iov_base = &my_buffer[3];
-			my_iov[3].iov_len = 1;
-
-			my_sf_hdtr.headers = &my_iov[0];
-			my_sf_hdtr.hdr_cnt = 2;
-			my_sf_hdtr.trailers = &my_iov[2];
-			my_sf_hdtr.trl_cnt = 2;
+		my_libsys_len = 7+2; /* 2 bytes of header */
+		my_buffer[0] = 's';
+		my_iov[0].iov_base = &my_buffer[0];
+		my_iov[0].iov_len = 1;
+		my_buffer[1] = 'e';
+		my_iov[1].iov_base = &my_buffer[1];
+		my_iov[1].iov_len = 1;
+		my_buffer[2] = 'n';
+		my_iov[2].iov_base = &my_buffer[2];
+		my_iov[2].iov_len = 1;
+		my_buffer[3] = 'd';
+		my_iov[3].iov_base = &my_buffer[3];
+		my_iov[3].iov_len = 1;
+
+		my_sf_hdtr.headers = &my_iov[0];
+		my_sf_hdtr.hdr_cnt = 2;
+		my_sf_hdtr.trailers = &my_iov[2];
+		my_sf_hdtr.trl_cnt = 2;
 			
-	 		my_result = sendfile(my_libsys_fd, my_child_fd, 3, &my_libsys_len, &my_sf_hdtr, 0);
-			if (my_result < 0 || my_libsys_len != 11) {
-				printf( "sendfile failed with error %d - \"%s\" \n", errno, strerror( errno) );
-				close( my_child_fd );
-				exit( -1 );
-			}
+	 	my_result = sendfile(my_libsys_fd, my_child_fd, 3, &my_libsys_len, &my_sf_hdtr, 0);
+		if (my_result < 0 || my_libsys_len != 11) {
+			printf( "sendfile failed with error %d - \"%s\" \n", errno, strerror( errno) );
+			close( my_child_fd );
+			exit( -1 );
+		}
 
-			my_result = close ( my_libsys_fd );
-			if ( my_libsys_fd < 0 ) {
-				printf ( "close failed with error %d - \"%s\" \n", errno, strerror( errno) );
-				close ( my_child_fd );
-				exit ( -1 );
-			}
-		}		
+		my_result = close ( my_libsys_fd );
+		if ( my_libsys_fd < 0 ) {
+			printf ( "close failed with error %d - \"%s\" \n", errno, strerror( errno) );
+			close ( my_child_fd );
+			exit ( -1 );
+		}
 #endif
 
 		/* tell parent we're done */
@@ -332,31 +329,29 @@ int socket_tests( void * the_argp )
 #endif
 
 #if 1
-		if (!g_is_under_rosetta) {
-			size_t neededBytes = 11;
+		size_t neededBytes = 11;
 			
-			/* Check for sendfile output */
-			bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) );
-			while (neededBytes > 0) {
-				my_result = read( my_accepted_socket, &my_parent_buffer[11-neededBytes], neededBytes );
-				if ( my_result == -1 ) {
-					printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
-					goto test_failed_exit;
-				} else if (my_result == 0) {
-					break;
-				}
-				neededBytes -= my_result;
-			}
-			
-			if ( neededBytes > 0 ) {
-				printf( "read call returned %ld bytes instead of 11\n", 11 - neededBytes );
+		/* Check for sendfile output */
+		bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) );
+		while (neededBytes > 0) {
+			my_result = read( my_accepted_socket, &my_parent_buffer[11-neededBytes], neededBytes );
+			if ( my_result == -1 ) {
+				printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) );
 				goto test_failed_exit;
+			} else if (my_result == 0) {
+				break;
 			}
+			neededBytes -= my_result;
+		}
+			
+		if ( neededBytes > 0 ) {
+			printf( "read call returned %ld bytes instead of 11\n", 11 - neededBytes );
+			goto test_failed_exit;
+		}
 
-			if ( ! (my_parent_buffer[0] == 's' && my_parent_buffer[1] == 'e' && my_parent_buffer[9] == 'n' && my_parent_buffer[10] == 'd') ) {
-				printf( "read wrong sendfile message from child \n" );
-				goto test_failed_exit;
-			}
+		if ( ! (my_parent_buffer[0] == 's' && my_parent_buffer[1] == 'e' && my_parent_buffer[9] == 'n' && my_parent_buffer[10] == 'd') ) {
+			printf( "read wrong sendfile message from child \n" );
+			goto test_failed_exit;
 		}
 		
 #endif
diff --git a/tools/tests/xnu_quick_test/tests.c b/tools/tests/xnu_quick_test/tests.c
index a916b6a38..2d79c6be5 100644
--- a/tools/tests/xnu_quick_test/tests.c
+++ b/tools/tests/xnu_quick_test/tests.c
@@ -16,9 +16,9 @@
 #include <libkern/OSByteOrder.h> /* for OSSwap32() */
 #include <mach/mach.h>
 
+
 extern char		g_target_path[ PATH_MAX ];
 extern int		g_skip_setuid_tests;
-extern int		g_is_under_rosetta;
 extern int		g_is_single_user;
 
 
@@ -896,6 +896,7 @@ int access_chmod_fchmod_test( void * the_argp )
 	int			my_err;
 	int			my_fd = -1;
 	char *		my_pathp = NULL;
+	uid_t euid,ruid;
 	struct stat		my_sb;
 	kern_return_t           my_kr;
 
@@ -915,6 +916,7 @@ int access_chmod_fchmod_test( void * the_argp )
 		goto test_failed_exit;
 	}
 	
+	
 	/* test chmod */
 	my_err = chmod( my_pathp, S_IRWXU );
 	if ( my_err == -1 ) {
@@ -940,17 +942,10 @@ int access_chmod_fchmod_test( void * the_argp )
 		
 		/* special case when running as root - we get back EPERM when running as root */
 		my_err = errno;
-#if !TARGET_OS_EMBEDDED
 		if ( ( tmp == 0 && my_err != EPERM) || (tmp != 0 && my_err != EACCES) ) {
 			printf( "access failed with errno %d - %s. \n", my_err, strerror( my_err ) );
 			goto test_failed_exit;
 		}
-#else
-		if ( ( tmp == 0 && my_err != EACCES) || (tmp != 0 && my_err != EACCES) ) {
-			printf( "access failed with errno %d - %s. \n", my_err, strerror( my_err ) );
-			goto test_failed_exit;
-		}
-#endif
 	}
 
 	/* verify correct modes are set */
@@ -965,7 +960,55 @@ int access_chmod_fchmod_test( void * the_argp )
 		printf( "chmod call appears to have failed.  stat shows incorrect values in st_mode! \n" );
 		goto test_failed_exit;
 	}
-
+	
+	
+	/*  another test for the access system call  -- refer ro radar# 6725311 */
+	
+	system("touch /tmp/me");
+	system("echo local | sudo touch /tmp/notme");
+	
+	euid = geteuid();
+	ruid = getuid();
+	//printf("effective user id is %d: and real user id is %d: \n", (int)euid, (int)ruid);
+	setreuid(ruid, ruid);
+	//printf("effective user id is %d: and real user id is %d: \n", (int)geteuid, (int)getuid);
+	my_err = unlink(FILE_NOTME);
+	if (my_err < 0) {
+		my_err = errno;
+	}
+	if (my_err == 0) {
+		printf("Unresolved: First attempt deleted '" FILE_NOTME "'! \n" );
+		goto test_failed_exit;
+	} else {
+		printf("Status: First attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
+			
+		if (true) {
+			my_err = access(FILE_ME, _DELETE_OK);
+            if (my_err < 0) {
+                my_err = errno;
+            }
+			//printf("Status: access('" FILE_ME "') = %d - %s.\n", my_err, strerror( my_err ));
+          fprintf(stderr, "Status: access('" FILE_ME "') = %d\n", my_err);
+		}
+		my_err = unlink(FILE_NOTME);
+        if (my_err < 0) {
+            my_err = errno;
+        }
+        if (my_err == 0) {
+			printf("Failed: Second attempt deleted '" FILE_NOTME "'!\n");
+            //fprintf(stderr, "Failed: Second attempt deleted '" FILE_NOTME "'!\n");
+			goto test_failed_exit;
+        } else {
+			printf("Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d - %s.\n", my_err, strerror( my_err ));
+           // fprintf(stderr, "Passed: Second attempt to delete '" FILE_NOTME "' failed with error %d\n", my_err);
+			
+        }
+	}
+	setreuid(ruid, euid);
+	//printf("effective user id is %d: and real user id is %d    ---1: \n", euid, ruid);
+	/* end of test*/
+	
+	
 	/* test fchmod */
 	my_fd = open( my_pathp, O_RDONLY, 0 );
 	if ( my_fd == -1 ) {
@@ -1219,6 +1262,8 @@ struct vol_attr_buf {
 #pragma pack()
 typedef struct vol_attr_buf vol_attr_buf;
 
+#define STATFS_TEST_PATH	"/tmp"
+
 int fs_stat_tests( void * the_argp )
 {
 	int			my_err, my_count, i;
@@ -1255,7 +1300,7 @@ int fs_stat_tests( void * the_argp )
         }
 
 	my_statfsp = (struct statfs *) my_bufferp;
-	my_err = statfs( "/", my_statfsp );
+	my_err = statfs( STATFS_TEST_PATH, my_statfsp );
 	if ( my_err == -1 ) {
 		printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
 		goto test_failed_exit;
@@ -1289,7 +1334,7 @@ int fs_stat_tests( void * the_argp )
 #if !TARGET_OS_EMBEDDED
 	/* now try statfs64 */
 	my_statfs64p = (struct statfs64 *) my_buffer64p;
-	my_err = statfs64( "/", my_statfs64p );
+	my_err = statfs64( STATFS_TEST_PATH, my_statfs64p );
 	if ( my_err == -1 ) {
 		printf( "statfs64 call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
 		goto test_failed_exit;
@@ -1338,8 +1383,8 @@ int fs_stat_tests( void * the_argp )
 		}
 	}
 	
-	/* open kernel to use as test file for fstatfs */
- 	my_fd = open( "/mach_kernel", O_RDONLY, 0 );
+	/* open to use as test file for fstatfs */
+ 	my_fd = open( STATFS_TEST_PATH, O_RDONLY, 0 );
 	if ( my_fd == -1 ) {
 		printf( "open call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
 		goto test_failed_exit;
@@ -1384,7 +1429,7 @@ int fs_stat_tests( void * the_argp )
 	} 
 
 	/* try again with statfs */
-	my_err = statfs( "/mach_kernel", my_statfsp );
+	my_err = statfs( STATFS_TEST_PATH , my_statfsp );
 	if ( my_err == -1 ) {
 		printf( "statfs call failed.  got errno %d - %s. \n", errno, strerror( errno ) );
 		goto test_failed_exit;
@@ -1942,12 +1987,8 @@ int execve_kill_vfork_test( void * the_argp )
 	}
 	
 	if (get_architecture() == INTEL) {
-		int ppc_fail_flag = 0;
 		struct stat sb;
 
-		if (stat("/usr/libexec/oah/translate", &sb))
-			ppc_fail_flag = 1;
-
 		if (bits == 64 && sizeof(long) == 8) {
 			/*
 			 * Running on x86_64 hardware and running in 64-bit mode.
@@ -1970,19 +2011,12 @@ int execve_kill_vfork_test( void * the_argp )
 			argvs[0] = "launch-i386";
 			if (do_execve_test("helpers/launch-i386", argvs, NULL, 1) != 0)		goto test_failed_exit;
 
-			/* Test posix_spawn for i386, x86_64, and ppc (should succeed) */
+			/* Test posix_spawn for i386, x86_64 (should succeed) */
 			errmsg = NULL;
 			if (do_spawn_test(CPU_TYPE_I386, 0))
 				goto test_failed_exit;
 			if (do_spawn_test(CPU_TYPE_X86_64, 0))
 				goto test_failed_exit;
-			/*
-			 * Note: rosetta is no go in single-user mode
-			 */
-			if (!g_is_single_user) {
-				if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag))
-					goto test_failed_exit;
-			}
 		}
 		else if (bits == 64 && sizeof(long) == 4) {
 			/*
@@ -2006,19 +2040,12 @@ int execve_kill_vfork_test( void * the_argp )
 			argvs[0] = "launch-x86_64";
 			if (do_execve_test("helpers/launch-x86_64", argvs, NULL, 1) != 0)	goto test_failed_exit;
 
-			/* Test posix_spawn for i386, x86_64, and ppc (should succeed) */
+			/* Test posix_spawn for i386, x86_64 (should succeed) */
 			errmsg = NULL;
 			if (do_spawn_test(CPU_TYPE_I386, 0))
 				goto test_failed_exit;
 			if (do_spawn_test(CPU_TYPE_X86_64, 0))
 				goto test_failed_exit;
-			/*
-			 * Note: rosetta is no go in single-user mode
-			 */
-			if (!g_is_single_user) {
-				if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag))
-					goto test_failed_exit;
-			}
 		}
 		else if (bits == 32) {
 			/* Running on i386 hardware. Check cases 4. */
@@ -2026,61 +2053,14 @@ int execve_kill_vfork_test( void * the_argp )
 			argvs[0] = "sleep-i386";
 			if (do_execve_test("helpers/sleep-i386", argvs, NULL, 1)) 		goto test_failed_exit;
 
-			/* Test posix_spawn for x86_64 (should fail), i386, and ppc (should succeed) */
+			/* Test posix_spawn for x86_64 (should fail), i386 (should succeed) */
 			errmsg = NULL;
 			if (do_spawn_test(CPU_TYPE_X86_64, 1))
 				goto test_failed_exit;
 			if (do_spawn_test(CPU_TYPE_I386, 0))
 				goto test_failed_exit;
-			/*
-			 * Note: rosetta is no go in single-user mode
-			 */
-			if (!g_is_single_user) {
-				if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag))
-					goto test_failed_exit;
-			}
-		}
-	}
-	else if (get_architecture() == POWERPC) {
-		if	(bits == 64 && sizeof(long) == 8) {
-			/*
-			 * Running on PPC64 hardware and running in 64-bit mode.
-			 * No longer supported on SnowLeopard.
-			 */ 
-			errmsg = "runnning ppc64 on snowleopard";
-			goto test_failed_exit;
-		}
-		else if	(bits == 64 && sizeof(long) == 4) {
-			/*
-			 * Running as PPC on PPC64 hardware or under Rosetta on x86_64 hardware.
-			 * Check cases 4, 5, 6 and fork a child to check 1, 2, 3. 
-			 */ 
-			errmsg = "execve failed: from ppc forking and exec()ing ppc process.\n";
-			argvs[0] = "sleep-ppc32";
-			if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 0))	goto test_failed_exit;
-
-			/* Test posix_spawn for i386 and ppc */
-			errmsg = NULL;
-			if (do_spawn_test(CPU_TYPE_I386, (g_is_under_rosetta ? 0 : 1)))
-				goto test_failed_exit;
-			if (do_spawn_test(CPU_TYPE_POWERPC, 0))
-				goto test_failed_exit;
 		}
-		else if (bits == 32) {
-			/* Running on ppc hardware. Check cases 4. */
-			errmsg = "execve failed: from ppc forking and exec()ing 32 bit ppc process.\n";
-			argvs[0] = "sleep-ppc32";
-			if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 1))		goto test_failed_exit;	
-			/* Test posix_spawn for i386 (should fail) and ppc (should succeed) */
-			errmsg = NULL;
-			 /* when under Rosetta, this process is CPU_TYPE_POWERPC, but the system should be able to run CPU_TYPE_I386 binaries */
-			if (do_spawn_test(CPU_TYPE_I386, (g_is_under_rosetta ? 0 : 1)))
-				goto test_failed_exit;
-			if (do_spawn_test(CPU_TYPE_POWERPC, 0))
-				goto test_failed_exit;
-		}
-	}
-	else if(get_architecture() == ARM) {
+	}else if(get_architecture() == ARM) {
 		if	(bits == 32) {
 
 			/* Running on arm hardware. Check cases 2. */
@@ -2914,36 +2894,11 @@ int acct_test( void * the_argp )
 	/* first letters in ac_comm should match the name of the executable */
 	if ( getuid( ) != my_acctp->ac_uid || getgid( ) != my_acctp->ac_gid ||
 			my_acctp->ac_comm[0] != 't' || my_acctp->ac_comm[1] != 'r' ) {
-		if (g_is_under_rosetta) {
-			// on x86 systems, data written by kernel to accounting info file is little endian; 
-                        // but Rosetta processes expects it to be big endian; so swap the uid for our test
-			if ( getuid( ) != OSSwapInt32(my_acctp->ac_uid) || 
-					getgid( ) != OSSwapInt32(my_acctp->ac_gid) ||
-					my_acctp->ac_comm[0] != 't' || 
-					my_acctp->ac_comm[1] != 'r' ) {
-				printf( "accounting data does not look correct under Rosetta:\n" );
-				printf( "------------------------\n" );
-				printf( "my_acctp->ac_uid = %lu (should be: %lu)\n",
-					(unsigned long) OSSwapInt32( my_acctp->ac_uid ), (unsigned long) getuid() );
-				printf( "my_acctp->ac_gid = %lu (should be: %lu)\n", 
-					(unsigned long) OSSwapInt32( my_acctp->ac_gid ), (unsigned long) getgid() );
-
-				print_acct_debug_strings(my_acctp->ac_comm);
-			}
-			else {
-				// is cool under Rosetta 
-				my_err = 0;
-				goto test_passed_exit;
-			}
-		}
-		else {
-			printf( "accounting data does not look correct:\n" );
 			printf( "------------------------\n" );
 			printf( "my_acctp->ac_uid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_uid, (unsigned long) getuid() );
 			printf( "my_acctp->ac_gid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_gid, (unsigned long) getgid() );
 
 			print_acct_debug_strings(my_acctp->ac_comm);
-		}
 		
 		goto test_failed_exit;
 	}
@@ -3320,6 +3275,7 @@ int fcntl_test( void * the_argp )
 {
 	int			my_err, my_result, my_tmep;
 	int			my_fd = -1;
+	int			my_newfd = -1;
 	char *		my_pathp = NULL;
 	kern_return_t           my_kr;
 
@@ -3376,7 +3332,60 @@ int fcntl_test( void * the_argp )
 		printf( "fcntl - F_SETFD failed to set FD_CLOEXEC correctly!!! \n" );
 		goto test_failed_exit;
 	}
-	
+
+	/* dup it to a new fd with FD_CLOEXEC forced on */
+
+	my_result = fcntl( my_fd, F_DUPFD_CLOEXEC, 0);
+	if ( my_result == -1 ) {
+		printf( "fcntl - F_DUPFD_CLOEXEC - failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	my_newfd = my_result;
+
+	/* check to see that it too is marked with FD_CLOEXEC */
+
+	my_result = fcntl( my_newfd, F_GETFD, 0);
+	if ( my_result == -1 ) {
+		printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	if ( (my_result & FD_CLOEXEC) == 0 ) {
+		printf( "fcntl - F_DUPFD_CLOEXEC failed to set FD_CLOEXEC!!! \n" );
+		goto test_failed_exit;
+	}
+
+	close( my_newfd );
+	my_newfd = -1;
+
+	/* While we're here, dup it via an open of /dev/fd/<fd> .. */
+
+	{
+		char devfdpath[PATH_MAX];
+
+		(void) snprintf( devfdpath, sizeof (devfdpath),
+			"/dev/fd/%u", my_fd );
+		my_result = open( devfdpath, O_RDONLY | O_CLOEXEC );
+	}
+	if ( my_result == -1 ) {
+		printf( "open call failed on /dev/fd/%u with error %d - \"%s\" \n", my_fd, errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	my_newfd = my_result;
+
+	/* check to see that it too is marked with FD_CLOEXEC */
+
+	my_result = fcntl( my_newfd, F_GETFD, 0);
+	if ( my_result == -1 ) {
+		printf( "fcntl - F_GETFD - failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	if ( (my_result & FD_CLOEXEC) == 0 ) {
+		printf( "fcntl - O_CLOEXEC open of /dev/fd/%u failed to set FD_CLOEXEC!!! \n", my_fd );
+		goto test_failed_exit;
+	}
+	close ( my_newfd );
+	my_newfd = -1;
+
 	my_err = 0;
 	goto test_passed_exit;
 
@@ -3384,6 +3393,8 @@ test_failed_exit:
 	my_err = -1;
 	
 test_passed_exit:
+	if ( my_newfd != -1)
+		close ( my_newfd );
 	if ( my_fd != -1 )
 		close( my_fd );
 	if ( my_pathp != NULL ) {
@@ -4993,6 +5004,7 @@ test_passed_exit:
 }
 
 
+
 /*  **************************************************************************************************************
  *	Test execution from data and stack areas.
  *  **************************************************************************************************************
@@ -5001,7 +5013,11 @@ int data_exec_tests( void * the_argp )
 {
 	int my_err = 0;
 	int arch, bits;
+	posix_spawnattr_t attrp;
+	char *argv[] = { "helpers/data_exec32nonxspawn", NULL };
 
+	int my_pid, my_status, ret;
+	
 	if ((arch = get_architecture()) == -1) {
 		printf("data_exec_test: couldn't determine architecture\n");
 		goto test_failed_exit;
@@ -5026,11 +5042,21 @@ int data_exec_tests( void * the_argp )
 			printf("data_exec-i386 failed\n");
 			goto test_failed_exit;
 		}
-	}
-
-	if (arch == POWERPC) {
-		if (system("arch -arch ppc helpers/data_exec") != 0) {
-			printf("data_exec-ppc failed\n");
+		
+		posix_spawnattr_init(&attrp);
+		posix_spawnattr_setflags(&attrp, _POSIX_SPAWN_ALLOW_DATA_EXEC );
+		ret = posix_spawn(&my_pid, "helpers/data_exec32nonxspawn", NULL, &attrp, argv, NULL);
+		if (ret) {
+			printf("data_exec-i386 failed in posix_spawn %s\n", strerror(errno));
+			goto test_failed_exit;
+		}
+		ret = wait4(my_pid, &my_status, 0, NULL);
+		if (ret == -1) {
+			printf("data_exec-i386 wait4 failed with errno %d - %s\n", errno, strerror(errno));
+			goto test_failed_exit;
+		}
+		if (WEXITSTATUS(my_status) != 0) {
+			printf("data_exec-i386 _POSIX_SPAWN_ALLOW_DATA_EXEC failed\n");
 			goto test_failed_exit;
 		}
 	}
diff --git a/tools/tests/xnu_quick_test/tests.h b/tools/tests/xnu_quick_test/tests.h
index f93240152..53b346804 100644
--- a/tools/tests/xnu_quick_test/tests.h
+++ b/tools/tests/xnu_quick_test/tests.h
@@ -16,6 +16,8 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <stdbool.h>
+#include <assert.h>
 #include <stdarg.h>		/* Used to support printf() in misc.c */
 #include <mach/machine.h>	/* Used to determine host properties */
 #include <mach/vm_inherit.h>
@@ -49,11 +51,12 @@
 
 #define MY_BUFFER_SIZE (1024 * 10)
 #define ARM 	100  /* I am not sure if the value really matters? */
-#define POWERPC	238947
 #define INTEL	38947			/* 
 					 * Random values used by execve tests to 
 					 * determine architecture of machine.
 					 */
+#define	FILE_NOTME	"/tmp/notme"	/* file in /tm not owned by me */
+#define	FILE_ME		"/tmp/me"	/* file in /tmp owned by me */
 
 typedef int (*test_rtn_t)(void *);
 
@@ -112,6 +115,9 @@ int data_exec_tests( void * the_argp );
 int machvm_tests( void * the_argp );
 int getdirentries_test( void * the_argp );
 int statfs_32bit_inode_tests( void * the_argp );
+int commpage_data_tests( void * the_argp );
+int atomic_fifo_queue_test( void * the_argp );
+int sched_tests( void * the_argp );
 
 struct test_entry 
 {
@@ -126,4 +132,14 @@ typedef struct test_entry * test_entryp;
 int my_printf(const char * __restrict fmt, ...);
 #define printf my_printf
 
+/* 
+   If running xnu_quick_test under testbots, disable special 
+   printf defined in the previous step. This is done in order
+   to generate log messages in a format which testbots understands
+*/ 
+
+#if RUN_UNDER_TESTBOTS
+#undef printf
+#endif
+
 #endif /* !_TESTS_H_ */
diff --git a/tools/tests/xnu_quick_test/xattr_tests.c b/tools/tests/xnu_quick_test/xattr_tests.c
index b3248edc2..2b33d6387 100644
--- a/tools/tests/xnu_quick_test/xattr_tests.c
+++ b/tools/tests/xnu_quick_test/xattr_tests.c
@@ -3,7 +3,7 @@
  *  xnu_quick_test
  *
  *  Created by Jerry Cottingham on 6/2/2005.
- *  Copyright 2005 Apple Computer Inc. All rights reserved.
+ *  Copyright 2005 Apple Computer Inc. All& rights reserved.
  *
  */
 
@@ -28,17 +28,18 @@ int xattr_tests( void * the_argp )
 	char		my_buffer[ 64 ];
 	char		my_xattr_data[ ] = "xattr_foo";
 	kern_return_t   my_kr;
+	int xattr_len = 0;
 	
 	my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE);
-        if(my_kr != KERN_SUCCESS){
-                printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
-                goto test_failed_exit;
-        }
-
+	if(my_kr != KERN_SUCCESS){
+		printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) );
+		goto test_failed_exit;
+	}
+	
 	*my_pathp = 0x00;
 	strcat( my_pathp, &g_target_path[0] );
 	strcat( my_pathp, "/" );
-
+	
 	/* create a test file */
 	my_err = create_random_name( my_pathp, 1 );
 	if ( my_err != 0 ) {
@@ -58,41 +59,49 @@ int xattr_tests( void * the_argp )
 		printf( "listxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-	if ( my_result != (strlen( XATTR_TEST_NAME ) + 1) ) {
-		printf( "listxattr did not get the attribute name length \n" );
+	
+	if ( my_result < (strlen( XATTR_TEST_NAME ) + 1) ) {
+		printf( "listxattr did not get the attribute name length: my_result %d, strlen %zu \n", my_result, (strlen(XATTR_TEST_NAME)+1) );
 		goto test_failed_exit;
 	}
-
+	
 	memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
+	
 	my_result = getxattr( my_pathp, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
 	if ( my_err == -1 ) {
 		printf( "getxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
+	
 	if ( my_result != (strlen( &my_xattr_data[0] ) + 1) ||
-		 strcmp( &my_buffer[0], &my_xattr_data[0] ) != 0 ) {
+		strcmp(&my_buffer[0], &my_xattr_data[0] ) != 0 ) {
 		printf( "getxattr did not get the correct attribute data \n" );
 		goto test_failed_exit;
 	}
-
+	
 	/* use removexattr to remove an attribute to our test file */
 	my_err = removexattr( my_pathp, XATTR_TEST_NAME, 0 );
 	if ( my_err == -1 ) {
 		printf( "removexattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-
+	
 	/* make sure it is gone */
 	my_result = listxattr( my_pathp, NULL, 0, 0 );
-	if ( my_err == -1 ) {
+	if ( my_result == -1 ) {
 		printf( "listxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-	if ( my_result != 0 ) {
-		printf( "removexattr did not remove our test attribute \n" );
+	
+	memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
+	my_result = getxattr( my_pathp, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
+	if ( my_result != -1 && errno != ENOATTR) {
+		printf( "getxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-
+	
+	
+	
 	/* repeat tests using file descriptor versions of the xattr system calls */
 	my_fd = open( my_pathp, O_RDONLY, 0 );
 	if ( my_fd == -1 ) {
@@ -100,7 +109,7 @@ int xattr_tests( void * the_argp )
 		printf( "\t file we attempted to open -> \"%s\" \n", my_pathp );
 		goto test_failed_exit;
 	}
-
+	
 	/* use fsetxattr to add an attribute to our test file */
 	my_err = fsetxattr( my_fd, XATTR_TEST_NAME, &my_xattr_data[0], sizeof(my_xattr_data), 0, 0 );
 	if ( my_err == -1 ) {
@@ -114,11 +123,11 @@ int xattr_tests( void * the_argp )
 		printf( "flistxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-	if ( my_result != (strlen( XATTR_TEST_NAME ) + 1) ) {
+	if ( my_result < (strlen( XATTR_TEST_NAME ) + 1) ) {
 		printf( "flistxattr did not get the attribute name length \n" );
 		goto test_failed_exit;
 	}
-
+	
 	memset( &my_buffer[0], 0x00, sizeof( my_buffer ) );
 	my_result = fgetxattr( my_fd, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
 	if ( my_err == -1 ) {
@@ -126,32 +135,35 @@ int xattr_tests( void * the_argp )
 		goto test_failed_exit;
 	}
 	if ( my_result != (strlen( &my_xattr_data[0] ) + 1) ||
-		 strcmp( &my_buffer[0], &my_xattr_data[0] ) != 0 ) {
+		strcmp(  &my_buffer[0], &my_xattr_data[0] ) != 0 ) {
 		printf( "fgetxattr did not get the correct attribute data \n" );
 		goto test_failed_exit;
 	}
-
+	
 	/* use fremovexattr to remove an attribute to our test file */
 	my_err = fremovexattr( my_fd, XATTR_TEST_NAME, 0 );
 	if ( my_err == -1 ) {
 		printf( "fremovexattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-
+	
 	/* make sure it is gone */
 	my_result = flistxattr( my_fd, NULL, 0, 0 );
-	if ( my_err == -1 ) {
+	if ( my_result == -1 ) {
 		printf( "flistxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-	if ( my_result != 0 ) {
-		printf( "fremovexattr did not remove our test attribute \n" );
+	
+	memset( my_buffer, 0x00, sizeof( my_buffer ) );
+	my_result = fgetxattr( my_fd, XATTR_TEST_NAME, &my_buffer[0], sizeof(my_buffer), 0, 0 );
+	if ( my_result == -1 && (errno != ENOATTR) ) {
+		printf( "fgetxattr failed with error %d - \"%s\" \n", errno, strerror( errno) );
 		goto test_failed_exit;
 	}
-	 
+	
 	my_err = 0;
 	goto test_passed_exit;
-
+	
 test_failed_exit:
 	my_err = -1;
 	
@@ -161,7 +173,7 @@ test_passed_exit:
 	if ( my_pathp != NULL ) {
 		remove( my_pathp );
 		vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX);	
-	 }
+	}
 	return( my_err );
 }
 
diff --git a/tools/tests/zero-to-n/Makefile b/tools/tests/zero-to-n/Makefile
new file mode 100644
index 000000000..30a82361b
--- /dev/null
+++ b/tools/tests/zero-to-n/Makefile
@@ -0,0 +1,5 @@
+DEBUG=0
+CC=/usr/bin/llvm-gcc-4.2
+
+zn: zero-to-n.c
+	$(CC) -Wall -arch i386 -arch x86_64 zero-to-n.c -o zn -DDEBUG=$(DEBUG) -ggdb
diff --git a/tools/tests/zero-to-n/zero-to-n.c b/tools/tests/zero-to-n/zero-to-n.c
new file mode 100644
index 000000000..0d4dcfe16
--- /dev/null
+++ b/tools/tests/zero-to-n/zero-to-n.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2009 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <semaphore.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+#include <libkern/OSAtomic.h>
+
+#include <mach/mach_time.h>
+#include <mach/mach.h>
+#include <mach/task.h>
+#include <mach/semaphore.h>
+
+typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN } wake_type_t;
+typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t;
+
+#define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
+
+#define CONSTRAINT_NANOS	(20000000ll)	/* 20 ms */
+#define COMPUTATION_NANOS	(10000000ll)	/* 10 ms */
+#define TRACEWORTHY_NANOS	(10000000ll)	/* 10 ms */
+
+#if DEBUG
+#define debug_log(args...) printf(args)
+#else
+#define debug_log(args...) do { } while(0)
+#endif
+
+/* Declarations */
+void* 			child_thread_func(void *arg);
+void			print_usage();
+int			thread_setup();
+my_policy_type_t	parse_thread_policy(const char *str);
+int			thread_finish_iteration();
+
+/* Global variables (general) */
+int			g_numthreads;
+wake_type_t 		g_waketype;
+policy_t		g_policy;
+int			g_iterations;
+struct mach_timebase_info g_mti;
+semaphore_t		g_main_sem;
+uint64_t 		*g_thread_endtimes_abs;
+volatile int32_t 	g_done_threads;
+boolean_t		g_do_spin = FALSE;
+boolean_t		g_verbose = FALSE;
+uint64_t	 	g_starttime_abs;
+#if MIMIC_DIGI_LEAD_TIME
+int			g_long_spinid;
+uint64_t		g_spinlength_abs;
+#endif /* MIMIC_DIGI_LEAD_TIME */
+
+/* Global variables (broadcast) */
+semaphore_t 		g_machsem;
+semaphore_t 		g_leadersem;
+
+/* Global variables (chain) */
+semaphore_t		*g_semarr;
+
+uint64_t
+abs_to_nanos(uint64_t abstime)
+{
+	return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom)));
+}
+
+uint64_t
+nanos_to_abs(uint64_t ns)
+{
+	return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer)));
+}
+
+/*
+ * Figure out what thread policy to use 
+ */
+my_policy_type_t
+parse_thread_policy(const char *str)
+{
+	if (strcmp(str, "timeshare") == 0) {
+		return MY_POLICY_TIMESHARE;
+	} else if (strcmp(str, "realtime") == 0) {
+		return MY_POLICY_REALTIME;
+	} else if (strcmp(str, "fixed") == 0) {
+		return MY_POLICY_FIXEDPRI;
+	} else {
+		printf("Invalid thread policy %s\n", str);
+		exit(1);
+	}
+}
+
+/*
+ * Figure out what wakeup pattern to use
+ */
+wake_type_t 
+parse_wakeup_pattern(const char *str) 
+{
+	if (strcmp(str, "chain") == 0) {
+		return WAKE_CHAIN;
+	} else if (strcmp(str, "broadcast-single-sem") == 0) {
+		return WAKE_BROADCAST_ONESEM;
+	} else if (strcmp(str, "broadcast-per-thread") == 0) {
+		return WAKE_BROADCAST_PERTHREAD;
+	} else {
+		print_usage();
+		exit(1);
+	}
+}
+
+/*
+ * Set policy
+ */
+int
+thread_setup()
+{
+	int res;
+
+	switch (g_policy) {
+		case MY_POLICY_TIMESHARE:
+		{
+			return 0;
+		}
+		case MY_POLICY_REALTIME: 
+		{
+			thread_time_constraint_policy_data_t pol;
+
+			/* Hard-coded realtime parameters (similar to what Digi uses) */
+			pol.period = 100000;
+			pol.constraint =  nanos_to_abs(CONSTRAINT_NANOS);
+			pol.computation = nanos_to_abs(COMPUTATION_NANOS);
+			pol.preemptible = 0; /* Ignored by OS */
+
+			res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
+			assert(res == 0, fail);
+			break;
+		}
+		case MY_POLICY_FIXEDPRI: 
+		{
+			thread_extended_policy_data_t pol;
+			pol.timeshare = 0;
+
+			res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT);
+			assert(res == 0, fail);
+			break;
+		}
+		default:
+		{
+			printf("invalid policy type\n");
+			return 1;
+		}
+	}
+
+	return 0;
+fail:
+	return 1;
+}
+
+/*
+ * Wake up main thread if everyone's done
+ */
+int
+thread_finish_iteration(int id)
+{
+	int32_t new;
+	int res = 0;
+	volatile float x = 0.0;
+	volatile float y = 0.0;
+
+	debug_log("Thread %p finished iteration.\n", pthread_self());
+	
+#if MIMIC_DIGI_LEAD_TIME
+	/*
+	 * One randomly chosen thread determines when everybody gets to stop.
+	 */
+	if (g_do_spin) {
+		if (g_long_spinid == id) {
+			uint64_t endspin;
+
+			/* This thread took up fully half of his computation */
+			endspin = g_starttime_abs + g_spinlength_abs;
+			while (mach_absolute_time() < endspin) {
+				y = y + 1.5 + x;
+				x = sqrt(y);
+			}
+		}
+	}
+#endif /* MIMIC_DIGI_LEAD_TIME */
+	
+	new = OSAtomicIncrement32(&g_done_threads);
+
+	debug_log("New value is %d\n", new);
+
+	/*
+	 * When the last thread finishes, everyone gets to go back to sleep.
+	 */
+	if (new == g_numthreads) {
+		debug_log("Thread %p signalling main thread.\n", pthread_self());
+		res = semaphore_signal(g_main_sem);
+	} else {
+		if (g_do_spin) {
+			while (g_done_threads < g_numthreads) {
+				y = y + 1.5 + x;
+				x = sqrt(y);
+			}
+		}
+	}
+
+	return res;
+}
+
+/*
+ * Wait for a wakeup, potentially wake up another of the "0-N" threads,
+ * and notify the main thread when done.
+ */
+void*
+child_thread_func(void *arg)
+{
+	int my_id = (int)(uintptr_t)arg;
+	int res;
+	int i, j;
+	int32_t new;
+
+	/* Set policy and so forth */
+	thread_setup();
+
+	/* Tell main thread when everyone has set up */
+	new = OSAtomicIncrement32(&g_done_threads);
+	if (new == g_numthreads) {
+		semaphore_signal(g_main_sem);
+	}
+
+	/* For each iteration */
+	for (i = 0; i < g_iterations; i++) {
+		/*
+		 * Leader thread either wakes everyone up or starts the chain going.
+		 */
+		if (my_id == 0) { 
+			res = semaphore_wait(g_leadersem);
+			assert(res == 0, fail);
+			
+			g_thread_endtimes_abs[my_id] = mach_absolute_time();
+
+#if MIMIC_DIGI_LEAD_TIME
+			g_long_spinid = rand() % g_numthreads;
+#endif /* MIMIC_DIGI_LEAD_TIME */
+
+			switch (g_waketype) {
+			case WAKE_CHAIN:
+				semaphore_signal(g_semarr[my_id + 1]);
+				break;
+			case WAKE_BROADCAST_ONESEM: 
+				semaphore_signal_all(g_machsem);
+				break;
+			case WAKE_BROADCAST_PERTHREAD:
+				for (j = 1; j < g_numthreads; j++) {
+					semaphore_signal(g_semarr[j]);
+				}
+				break;
+			default:
+				printf("Invalid wakeup type?!\n");
+				exit(1);
+			}
+		} else {
+			/*
+			 * Everyone else waits to be woken up,
+			 * records when she wake up, and possibly
+			 * wakes up a friend.
+			 */
+			switch(g_waketype)  {
+			case WAKE_BROADCAST_ONESEM:
+				res = semaphore_wait(g_machsem);
+				assert(res == KERN_SUCCESS, fail);
+
+				g_thread_endtimes_abs[my_id] = mach_absolute_time();
+
+				break;
+				/*
+				 * For the chain wakeup case:
+				 * wait, record time, signal next thread if appropriate
+				 */
+			case WAKE_BROADCAST_PERTHREAD:
+				res = semaphore_wait(g_semarr[my_id]);
+				assert(res == 0, fail);
+
+				g_thread_endtimes_abs[my_id] = mach_absolute_time();
+				break;
+
+			case WAKE_CHAIN:
+				res = semaphore_wait(g_semarr[my_id]);
+				assert(res == 0, fail);
+
+				g_thread_endtimes_abs[my_id] = mach_absolute_time();
+
+				if (my_id < (g_numthreads - 1)) {
+					res = semaphore_signal(g_semarr[my_id + 1]);
+					assert(res == 0, fail);
+				}
+
+				break;
+			default:
+				printf("Invalid wake type.\n");
+				goto fail;
+			}
+		}
+
+		res = thread_finish_iteration(my_id);
+		assert(res == 0, fail);
+	}
+
+	return 0;
+fail:
+	exit(1);
+}
+
+/*
+ * Admittedly not very attractive.
+ */
+void
+print_usage()
+{
+	printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace  <traceworthy latency in ns>] [-spin] [-verbose]\n");
+}
+
+/*
+ * Given an array of uint64_t values, compute average, max, min, and standard deviation
+ */
+void 
+compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp)
+{
+	int i;
+	uint64_t _sum = 0;
+	uint64_t _max = 0;
+	uint64_t _min = UINT64_MAX;
+	float	 _avg = 0;
+	float 	 _dev = 0;
+
+	for (i = 0; i < count; i++) {
+		_sum += values[i];
+		_max = values[i] > _max ? values[i] : _max;
+		_min = values[i] < _min ? values[i] : _min;
+	}
+
+	_avg = ((float)_sum) / ((float)count);
+	
+	_dev = 0;
+	for (i = 0; i < count; i++) {
+		_dev += powf((((float)values[i]) - _avg), 2);
+	}
+	
+	_dev /= count;
+	_dev = sqrtf(_dev);
+
+	*averagep = _avg;
+	*maxp = _max;
+	*minp = _min;
+	*stddevp = _dev;
+}
+
+int
+main(int argc, char **argv)
+{
+	int		i;
+	int 		res;
+	pthread_t	*threads;
+	uint64_t	*worst_latencies_ns;
+	uint64_t	*worst_latencies_from_first_ns;
+	uint64_t 	last_end;
+	uint64_t	max, min;
+	uint64_t	traceworthy_latency_ns = TRACEWORTHY_NANOS;
+	float		avg, stddev;
+
+	srand(time(NULL));
+
+	if (argc < 5 || argc > 9) {
+		print_usage();
+		goto fail;
+	}
+
+	/* How many threads? */
+	g_numthreads = atoi(argv[1]);
+
+	/* What wakeup pattern? */
+	g_waketype = parse_wakeup_pattern(argv[2]);
+
+	/* Policy */
+	g_policy = parse_thread_policy(argv[3]);
+
+	/* Iterations */
+	g_iterations = atoi(argv[4]);
+
+	/* Optional args */
+	for (i = 5; i < argc; i++) {
+		if (strcmp(argv[i], "-spin") == 0) {
+			g_do_spin = TRUE;
+		} else if (strcmp(argv[i], "-verbose") == 0) {
+			g_verbose = TRUE;
+		} else if ((strcmp(argv[i], "-trace") == 0) && 
+				(i < (argc - 1))) {
+			traceworthy_latency_ns = strtoull(argv[++i], NULL, 10);
+		} else {
+			print_usage();
+			goto fail;
+		}
+	}
+
+	mach_timebase_info(&g_mti);
+
+#if MIMIC_DIGI_LEAD_TIME
+	g_spinlength_abs = nanos_to_abs(COMPUTATION_NANOS) / 2;
+#endif /* MIMIC_DIGI_LEAD_TIME */
+
+	/* Arrays for threads and their wakeup times */
+	threads = (pthread_t*) malloc(sizeof(pthread_t) * g_numthreads);
+	assert(threads, fail);
+
+	g_thread_endtimes_abs = (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads);
+	assert(g_thread_endtimes_abs, fail);
+
+	worst_latencies_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
+	assert(worst_latencies_ns, fail);
+
+	worst_latencies_from_first_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations);
+	assert(worst_latencies_from_first_ns, fail);
+	res = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
+	assert(res == KERN_SUCCESS, fail);
+
+	/* Either one big semaphore or one per thread */
+	if (g_waketype == WAKE_CHAIN || g_waketype == WAKE_BROADCAST_PERTHREAD) {
+		g_semarr = malloc(sizeof(semaphore_t) * g_numthreads);
+		assert(g_semarr != NULL, fail);
+
+		for (i = 0; i < g_numthreads; i++) {
+			res = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
+			assert(res == KERN_SUCCESS, fail);
+		}
+		
+		g_leadersem = g_semarr[0];
+	} else {
+		res = semaphore_create(mach_task_self(), &g_machsem, SYNC_POLICY_FIFO, 0);
+		assert(res == KERN_SUCCESS, fail);
+		res = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
+		assert(res == KERN_SUCCESS, fail);
+	}
+
+	/* Create the threads */
+	g_done_threads = 0;
+	for (i = 0; i < g_numthreads; i++) {
+		res = pthread_create(&threads[i], NULL, child_thread_func, (void*)(uintptr_t)i);
+		assert(res == 0, fail);
+	}
+
+	/* Let everyone get settled */
+	semaphore_wait(g_main_sem);
+	sleep(1);
+
+	/* Go! */
+	for (i = 0; i < g_iterations; i++) {
+		int j;
+		uint64_t worst_abs = 0, best_abs = UINT64_MAX;
+
+		g_done_threads = 0;
+		OSMemoryBarrier();
+
+		g_starttime_abs = mach_absolute_time();
+
+		/* Fire them off */
+		semaphore_signal(g_leadersem);
+
+		/* Wait for worker threads to finish */
+		semaphore_wait(g_main_sem);
+		assert(res == KERN_SUCCESS, fail);
+
+		/* 
+		 * We report the worst latencies relative to start time
+		 * and relative to the lead worker thread.
+		 */
+		for (j = 0; j < g_numthreads; j++) {
+			uint64_t latency_abs;
+		
+			latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs;
+			worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
+		}
+	
+		worst_latencies_ns[i] = abs_to_nanos(worst_abs);
+
+		worst_abs = 0;
+		for (j = 1; j < g_numthreads; j++) {
+			uint64_t latency_abs;
+		
+			latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[0];
+			worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
+			best_abs = best_abs > latency_abs ? latency_abs : best_abs;
+		}
+
+		worst_latencies_from_first_ns[i] = abs_to_nanos(worst_abs);
+
+		/*
+		 * In the event of a bad run, cut a trace point.
+		 */
+		if (worst_latencies_from_first_ns[i] > traceworthy_latency_ns) {
+			int _tmp;
+
+			if (g_verbose) {
+				printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns[i]) / 1000.0);
+			}
+
+			_tmp = syscall(SYS_kdebug_trace, 0xEEEEEEEE, 0, 0, 0, 0);
+		}
+
+		/* Let worker threads get back to sleep... */
+		usleep(g_numthreads * 10);
+	}
+
+	/* Rejoin threads */
+	last_end = 0;
+	for (i = 0; i < g_numthreads; i++) {
+		res = pthread_join(threads[i], NULL);
+		assert(res == 0, fail);
+	}
+
+	compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
+	printf("Results (from a stop):\n");
+	printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
+	printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
+	printf("Avg:\t\t%.2f us\n", avg / 1000.0);
+	printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
+
+	putchar('\n');
+
+	compute_stats(worst_latencies_from_first_ns, g_iterations, &avg, &max, &min, &stddev);
+	printf("Results (relative to first thread):\n");
+	printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
+	printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
+	printf("Avg:\t\t%.2f us\n", avg / 1000.0);
+	printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
+
+#if 0
+	for (i = 0; i < g_iterations; i++) {
+		printf("Iteration %d: %f us\n", i, worst_latencies_ns[i] / 1000.0);
+	}
+#endif 
+
+	return 0;
+fail:
+	return 1;
+}
-- 
2.47.2